diff --git a/.ackrc b/.ackrc
index 0e0073a801..44d645a5d2 100644
--- a/.ackrc
+++ b/.ackrc
@@ -3,4 +3,5 @@
 --ignore-directory=is:.mypy_cache
 --ignore-directory=is:.pytest_cache
 --ignore-directory=is:.ruff_cache
+--ignore-directory=is:.venv
 --ignore-directory=is:site
diff --git a/.github/workflows/code-quality.yml b/.github/workflows/code-quality.yml
index 3e1c9acd8f..ab87914ff3 100644
--- a/.github/workflows/code-quality.yml
+++ b/.github/workflows/code-quality.yml
@@ -76,6 +76,6 @@ jobs:
 
       # Coverage report
       - name: Upload coverage reports to Codecov
-        uses: codecov/codecov-action@v4
+        uses: codecov/codecov-action@v5
         with:
           token: ${{ secrets.CODECOV_TOKEN }}
diff --git a/bin/create_extra_bib.py b/bin/create_extra_bib.py
index 92d2924641..b3bc3ec5a1 100755
--- a/bin/create_extra_bib.py
+++ b/bin/create_extra_bib.py
@@ -36,6 +36,7 @@
 import msgspec
 from pathlib import Path
 import re
+from rich.console import Console
 from rich.progress import track
 import shutil
 import subprocess
@@ -48,6 +49,7 @@
 
 BIB2XML = None
 XML2END = None
+CONSOLE = Console(stderr=True)
 
 # Max shard size in MiB
 MAX_SHARD_MB = 49
@@ -89,6 +91,7 @@ def create_bibtex(builddir, clean=False) -> None:
                 reverse=True,
             ),
             description="Create anthology.bib.gz...  ",
+            console=CONSOLE,
         ):
             with open(volume_file, "r") as f:
                 bibtex = f.read()
@@ -124,6 +127,7 @@ def create_bibtex(builddir, clean=False) -> None:
                 reverse=True,
             ),
             description="       +abstracts.bib.gz... ",
+            console=CONSOLE,
         ):
             with open(collection_file, "rb") as f:
                 data = msgspec.json.decode(f.read())
@@ -351,7 +355,7 @@ def batch_convert_to_mods_and_endf(bibtex, context):
         )
 
     log_level = log.DEBUG if args["--debug"] else log.INFO
-    tracker = setup_rich_logging(level=log_level)
+    tracker = setup_rich_logging(console=CONSOLE, level=log_level)
 
     max_workers = int(args["--max-workers"]) if args["--max-workers"] else None
     if (BIB2XML := shutil.which("bib2xml")) is None:
diff --git a/bin/create_hugo_data.py b/bin/create_hugo_data.py
index b2c5bf0f5b..1ed8bc656e 100755
--- a/bin/create_hugo_data.py
+++ b/bin/create_hugo_data.py
@@ -39,6 +39,7 @@
 import msgspec
 from omegaconf import OmegaConf
 import os
+from rich.console import Console
 from rich.progress import (
     Progress,
     TextColumn,
@@ -60,6 +61,7 @@
 
 
 BIBLIMIT = None
+CONSOLE = Console(stderr=True)
 ENCODER = msgspec.json.Encoder()
 SCRIPTDIR = os.path.dirname(os.path.realpath(__file__))
 
@@ -93,7 +95,7 @@ def make_progress():
         TaskProgressColumn(show_speed=True),
         TimeRemainingColumn(elapsed_when_finished=True),
     ]
-    return Progress(*columns)
+    return Progress(*columns, console=CONSOLE)
 
 
 @cache
@@ -396,6 +398,8 @@ def export_people(anthology, builddir, dryrun):
                     data["full"] = f"{data['full']} ({', '.join(diff_script_variants)})"
             if person.comment is not None:
                 data["comment"] = person.comment
+            if person.orcid is not None:
+                data["orcid"] = person.orcid
             similar = anthology.people.similar.subset(person_id)
             if len(similar) > 1:
                 data["similar"] = list(similar - {person_id})
@@ -567,7 +571,7 @@ def export_anthology(anthology, builddir, clean=False, dryrun=False):
         )
 
     log_level = log.DEBUG if args["--debug"] else log.INFO
-    tracker = setup_rich_logging(level=log_level)
+    tracker = setup_rich_logging(console=CONSOLE, level=log_level)
 
     if limit := args["--bib-limit"]:
         BIBLIMIT = int(limit)
diff --git a/bin/oneoff/transition_to_people_yaml.py b/bin/oneoff/transition_to_people_yaml.py
new file mode 100644
index 0000000000..544c900600
--- /dev/null
+++ b/bin/oneoff/transition_to_people_yaml.py
@@ -0,0 +1,260 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+#
+# Copyright 2025 Marcel Bollmann <marcel@bollmann.me>
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Usage: transition_to_people_yaml.py [options]
+
+Creates people.yaml and rewrites author IDs in the XML according to <https://github.com/acl-org/acl-anthology/wiki/Author-Page-Plan#transitioning-the-metadata>.
+
+Options:
+  --debug                  Output debug-level log messages.
+  -d, --datadir=DIR        Directory with data files. [default: {scriptdir}/../../data]
+  -x, --write-xml          Write changes to the XML files.
+  -y, --write-yaml         Write the new people.yaml.
+  -h, --help               Display this helpful text.
+"""
+
+from collections import defaultdict
+from docopt import docopt
+from importlib.metadata import version as get_version
+import itertools as it
+import logging as log
+import os
+from pathlib import Path
+import yaml
+
+try:
+    from yaml import CLoader as Loader, CDumper as Dumper
+except ImportError:  # pragma: no cover
+    from yaml import Loader, Dumper  # type: ignore
+
+from acl_anthology import Anthology
+from acl_anthology.people import Name
+from acl_anthology.utils.logging import setup_rich_logging
+
+
+def parse_variant_list(anthology):
+    # We create a dictionary mapping person IDs to their original entry in
+    # name_variants.yaml; this is because there are fields in name_variants.yaml
+    # that the Python library does not store (such as 'orcid' or 'degree'), and
+    # we might want to transfer them to the new people.yaml
+    name_variants = {}
+    with open(
+        anthology.datadir / "yaml" / "name_variants.yaml", "r", encoding="utf-8"
+    ) as f:
+        variant_list = yaml.load(f, Loader=Loader)
+    for entry in variant_list:
+        if "id" in entry:
+            name_variants[entry["id"]] = entry
+        else:
+            people = anthology.people.get_by_name(Name.from_dict(entry["canonical"]))
+            assert (
+                len(people) == 1
+            ), "Canonical name in name_variants.yaml shouldn't be ambiguous"
+            name_variants[people[0].id] = entry
+    return name_variants
+
+
+# This exists to serialize names in "flow" style (i.e. one-liner {first: ...,
+# last: ...}), without having to force flow style on the entire YAML document
+class YAMLName(yaml.YAMLObject):
+    yaml_dumper = Dumper
+    yaml_tag = "tag:yaml.org,2002:map"  # serialize like a dictionary
+    yaml_flow_style = True  # force flow style
+
+    def __init__(self, first, last, script):
+        if first is not None:
+            self.first = first
+        self.last = last
+        if script is not None:
+            self.script = script
+
+
+def name_to_yaml(name):
+    return YAMLName(name.first, name.last, name.script)
+
+
+def refactor(anthology, name_variants):
+    new_people_dict = {}
+    c_removed, c_added = 0, 0
+
+    # These two are to infer if we need to set disable_name_matching: true somewhere
+    names_to_ids = defaultdict(list)
+    names_with_catchall_id = []
+    c_disable_name_matching = 0
+
+    for pid, person in anthology.people.items():
+        # We only consider people who are currently defined in name_variants.yaml
+        if not person.is_explicit:
+            continue
+
+        orig_entry = name_variants[pid]
+
+        # name_variants.yaml may define IDs that are actually never used
+        if not person.item_ids:
+            log.warning(
+                f"Person '{pid}' derived from name_variants.yaml has no papers; discarding"
+            )
+            continue
+
+        # If person has a comment like "May refer to multiple people" or "May
+        # refer to several people", their identity is "unverified", so we:
+        #   - Don't write them to people.yaml
+        #   - Remove their ID from the XML
+        if person.comment is not None and person.comment.startswith("May refer"):
+            log.debug(f"Removing ID '{pid}' ('{person.comment}')")
+            for paper in person.papers():
+                # Remove their ID from the XML
+                for namespec in it.chain(paper.authors, paper.get_editors()):
+                    if namespec.id == pid:
+                        namespec.id = None
+                        c_removed += 1
+
+            # Record the name(s) of this person so we can check later if this ID
+            # was important for disambiguation
+            names_with_catchall_id.extend(person.names)
+
+            # Don't process this person further
+            continue
+
+        # If we reach this point, this person should be considered "verified"
+        # under the new system.  However, maybe not all of their *names* should
+        # go into people.yaml---a name can have been added to `person.names` in
+        # different ways:
+        #
+        #   1. It was listed explicitly in `name_variants.yaml` -- keep
+        #   2. It was in the XML with this person's explicit ID -- keep
+        #   3. It was added to this person via the name matching mechanism that
+        #      compares slugified names -- don't keep, as it was inferred heuristically
+        #
+        # (This happens in <https://github.com/acl-org/acl-anthology/blob/170ff9706aba87de0e353da690e6b0bb33ea6a98/python/acl_anthology/people/index.py#L252-L299>)
+        c = 0
+        names_to_keep = {Name.from_dict(orig_entry["canonical"])} | {
+            Name.from_dict(name) for name in orig_entry.get("variants", [])
+        }  # Case 1
+
+        for paper in person.papers():
+            for namespec in it.chain(paper.authors, paper.get_editors()):
+                if namespec.id == pid:
+                    names_to_keep.add(namespec.name)  # Case 2
+                    break
+            else:
+                # Does *not* already have an explicit ID in the XML; add it.
+                # ---
+                # NOTE: Doing this in a separate loop to avoid the edge case where
+                # a paper might have two authors with identical names,
+                # disambiguated by their ID---not sure if that ever happens, but
+                # better be safe than sorry.
+                for namespec in it.chain(paper.authors, paper.get_editors()):
+                    if person.has_name(namespec.name):
+                        if namespec.name in names_to_keep:  # Avoid case 3
+                            namespec.id = pid
+                            c += 1
+                            c_added += 1
+                        break
+                else:
+                    # Should never happen
+                    log.error(
+                        f"Did not find '{pid}' on paper '{paper.full_id}' connected to them",
+                    )
+
+        if c > 0:
+            log.debug(f"Added explicit ID '{pid}' to {c} papers")
+
+        for name in person.names:
+            names_to_ids[name].append(pid)
+
+        # Construct entry for new people.yaml
+        entry = {
+            # First name is always the canonical one
+            "names": [
+                name_to_yaml(name) for name in person.names if name in names_to_keep
+            ],
+        }
+        if person.comment is not None:
+            entry["comment"] = person.comment
+        # These are keys we copy over from the old name_variants.yaml
+        for key in ("degree", "similar", "orcid"):
+            if key in orig_entry:
+                entry[key] = orig_entry[key]
+
+        new_people_dict[pid] = entry
+
+    for name in names_with_catchall_id:
+        pids = names_to_ids.get(name, [])
+        if len(pids) == 1:
+            # There is only one "verified" person with this name, but there was
+            # a catch-all ID ("May refer to several people") with this name too,
+            # so we need to disable name matching under the new system
+            new_people_dict[pids[0]]["disable_name_matching"] = True
+            c_disable_name_matching += 1
+
+    log.info(
+        f"Removed {c_removed:>5d} explicit IDs from the XML ('May refer to several people' etc.)"
+    )
+    log.info(f"  Added {c_added:>5d} explicit IDs to the XML")
+    log.info(f"Created {len(new_people_dict):>5d} entries for people.yaml")
+    log.info(
+        f"        {c_disable_name_matching:>5d} of those have `disable_name_matching: true`"
+    )
+
+    return new_people_dict
+
+
+if __name__ == "__main__":
+    args = docopt(__doc__)
+
+    log_level = log.DEBUG if args["--debug"] else log.INFO
+    tracker = setup_rich_logging(level=log_level)
+
+    if (version := get_version("acl_anthology")) != "0.5.3":
+        log.error(
+            f"This script needs to run with version 0.5.3 of the acl-anthology library; got {version}"
+        )
+        exit(1)
+
+    if "{scriptdir}" in args["--datadir"]:
+        args["--datadir"] = os.path.abspath(
+            args["--datadir"].format(scriptdir=os.path.dirname(os.path.abspath(__file__)))
+        )
+    datadir = Path(args["--datadir"])
+    log.info(f"Using data directory {datadir}")
+
+    anthology = Anthology(datadir=datadir)
+    anthology.load_all()
+
+    name_variants = parse_variant_list(anthology)
+    log.info(f"  Found {len(name_variants):>5d} entries in name_variants.yaml")
+
+    new_people_dict = refactor(anthology, name_variants)
+
+    if tracker.highest >= log.ERROR:
+        log.warning("There were errors; aborting without saving")
+        exit(1)
+
+    if args["--write-yaml"]:
+        log.info("Writing new people.yaml...")
+        with open(datadir / "yaml" / "people.yaml", "w", encoding="utf-8") as f:
+            yaml.dump(new_people_dict, f, allow_unicode=True, Dumper=Dumper)
+    else:
+        log.warning("Not writing people.yaml; use -y/--write-yaml flag")
+
+    if args["--write-xml"]:
+        log.info("Saving XML files...")
+        for collection in anthology.collections.values():
+            collection.save()
+    else:
+        log.warning("Not modifying XML files; use -x/--write-xml flag")
diff --git a/data/xml/1952.earlymt.xml b/data/xml/1952.earlymt.xml
index 0e95c06119..d8be388ffe 100644
--- a/data/xml/1952.earlymt.xml
+++ b/data/xml/1952.earlymt.xml
@@ -40,7 +40,7 @@
     </paper>
     <paper id="6">
       <title>Human translation versus machine translation</title>
-      <author><first>Leon</first><last>Dostert</last></author>
+      <author id="leon-dostert"><first>Leon</first><last>Dostert</last></author>
       <bibkey>dostert-1952-human</bibkey>
     </paper>
     <paper id="7">
diff --git a/data/xml/1956.earlymt.xml b/data/xml/1956.earlymt.xml
index 97c72adc49..025d7e5b2a 100644
--- a/data/xml/1956.earlymt.xml
+++ b/data/xml/1956.earlymt.xml
@@ -28,7 +28,7 @@
     </paper>
     <paper id="3">
       <title>Organisation and Method in Mechanical Translation Work</title>
-      <author><first>L. E.</first><last>Dostert</last></author>
+      <author id="leon-dostert"><first>L. E.</first><last>Dostert</last></author>
       <url hash="ec10d336">1956.earlymt-1.3</url>
       <bibkey>dostert-1956-organisation</bibkey>
     </paper>
diff --git a/data/xml/1957.earlymt.xml b/data/xml/1957.earlymt.xml
index 990e19843d..359560052c 100644
--- a/data/xml/1957.earlymt.xml
+++ b/data/xml/1957.earlymt.xml
@@ -6,7 +6,7 @@
       <address>Georgetown University</address>
       <month>12-13 April</month>
       <year>1957</year>
-      <editor><first>Léon</first><last>Dostert</last></editor>
+      <editor id="leon-dostert"><first>Léon</first><last>Dostert</last></editor>
       <venue>earlymt</venue>
     </meta>
     <frontmatter>
diff --git a/data/xml/1960.earlymt.xml b/data/xml/1960.earlymt.xml
index 405aace488..b258b7e20b 100644
--- a/data/xml/1960.earlymt.xml
+++ b/data/xml/1960.earlymt.xml
@@ -56,7 +56,7 @@
     </paper>
     <paper id="8">
       <title>Summation by Chairman</title>
-      <author><first>Leon</first><last>Dostert</last></author>
+      <author id="leon-dostert"><first>Leon</first><last>Dostert</last></author>
       <url hash="86d780f0">1960.earlymt-nsmt.8</url>
       <bibkey>dostert-1960-summation</bibkey>
     </paper>
diff --git a/data/xml/1961.earlymt.xml b/data/xml/1961.earlymt.xml
index c2e2aaca59..8eadd7b769 100644
--- a/data/xml/1961.earlymt.xml
+++ b/data/xml/1961.earlymt.xml
@@ -269,7 +269,7 @@
     </paper>
     <paper id="37">
       <title>An approach to the segmentation problem in speech analysis and language translation</title>
-      <author><first>Gerard</first><last>Salton</last></author>
+      <author id="gerard-salton"><first>Gerard</first><last>Salton</last></author>
       <author><first>R. W.</first><last>Thorpe</last></author>
       <url hash="8eabc1b8">1961.earlymt-1.37</url>
       <bibkey>salton-thorpe-1961-approach</bibkey>
diff --git a/data/xml/1962.earlymt.xml b/data/xml/1962.earlymt.xml
index 5cad5b0ff0..38dbcb4d96 100644
--- a/data/xml/1962.earlymt.xml
+++ b/data/xml/1962.earlymt.xml
@@ -73,7 +73,7 @@
     </paper>
     <paper id="9">
       <title>Langages Artificiels, Systèmes formels et Traduction automatique</title>
-      <author><first>Bernard</first><last>Vauquois</last></author>
+      <author id="bernard-vauquois"><first>Bernard</first><last>Vauquois</last></author>
       <pages>211-236</pages>
       <url hash="071d943a">1962.earlymt-1.9</url>
       <bibkey>vauquois-1962-langages</bibkey>
diff --git a/data/xml/1976.earlymt.xml b/data/xml/1976.earlymt.xml
index 37f342a45a..39dfa7769b 100644
--- a/data/xml/1976.earlymt.xml
+++ b/data/xml/1976.earlymt.xml
@@ -40,13 +40,13 @@
     </paper>
     <paper id="5">
       <title>Automatic language processing project, Brigham Young University</title>
-      <author><first>Eldon G.</first><last>Lytel</last></author>
+      <author id="eldon-g-lytle"><first>Eldon G.</first><last>Lytel</last></author>
       <pages>14-23</pages>
       <bibkey>lytel-1976-automatic</bibkey>
     </paper>
     <paper id="6">
       <title><fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish machine translation project on linguistic analysis, <fixed-case>U</fixed-case>niversity of <fixed-case>C</fixed-case>alifornia, <fixed-case>B</fixed-case>erkeley</title>
-      <author><first>William S-Y.</first><last>Wang</last></author>
+      <author id="william-s-y-wang"><first>William S-Y.</first><last>Wang</last></author>
       <pages>24</pages>
       <url hash="65fd83eb">1976.earlymt-1.6</url>
       <bibkey>wang-1976-chinese</bibkey>
@@ -135,13 +135,13 @@
     </paper>
     <paper id="19">
       <title>Programs to understand stories</title>
-      <author><first>Roger C.</first><last>Schank</last></author>
+      <author id="roger-c-schank"><first>Roger C.</first><last>Schank</last></author>
       <pages>65</pages>
       <bibkey>schank-1976-programs</bibkey>
     </paper>
     <paper id="20">
       <title>Semantics and world knowledge in <fixed-case>MT</fixed-case></title>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <pages>67-69</pages>
       <url hash="7f022082">1976.earlymt-1.20</url>
       <bibkey>wilks-1976-semantics</bibkey>
diff --git a/data/xml/1978.tc.xml b/data/xml/1978.tc.xml
index 4012c1a5fa..a16f8b92ee 100644
--- a/data/xml/1978.tc.xml
+++ b/data/xml/1978.tc.xml
@@ -22,7 +22,7 @@
     </paper>
     <paper id="2">
       <title>Machine translation and artificial intelligence Implementing machine aids to translation</title>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <url hash="f96fd14a">1978.tc-1.2</url>
       <bibkey>wilks-1978-machine</bibkey>
     </paper>
diff --git a/data/xml/1980.tc.xml b/data/xml/1980.tc.xml
index 8449895ee3..fce225a735 100644
--- a/data/xml/1980.tc.xml
+++ b/data/xml/1980.tc.xml
@@ -35,7 +35,7 @@
     </paper>
     <paper id="5">
       <title>Terminological Data Banks: a model for a <fixed-case>B</fixed-case>ritish Linguistic Data Bank (<fixed-case>LDB</fixed-case>)</title>
-      <author><first>John</first><last>McNaught</last></author>
+      <author id="john-mcnaught"><first>John</first><last>McNaught</last></author>
       <url hash="d6b6b281">1980.tc-1.5</url>
       <bibkey>mcnaught-1980-terminological</bibkey>
     </paper>
diff --git a/data/xml/1981.tc.xml b/data/xml/1981.tc.xml
index 4d7cb470c8..43bc1b5c04 100644
--- a/data/xml/1981.tc.xml
+++ b/data/xml/1981.tc.xml
@@ -28,7 +28,7 @@
     </paper>
     <paper id="3">
       <title>The evolution of machine translation systems</title>
-      <author><first>W. John</first><last>Hutchins</last></author>
+      <author id="w-john-hutchins"><first>W. John</first><last>Hutchins</last></author>
       <url hash="cddaf41f">1981.tc-1.3</url>
       <bibkey>hutchins-1981-evolution</bibkey>
     </paper>
@@ -143,13 +143,13 @@
     </paper>
     <paper id="22">
       <title>Summary of discussion: Speculation; The Limits of Innovation</title>
-      <author><first>W. John</first><last>Hutchins</last></author>
+      <author id="w-john-hutchins"><first>W. John</first><last>Hutchins</last></author>
       <url hash="f6870f98">1981.tc-1.22</url>
       <bibkey>hutchins-1981-summary</bibkey>
     </paper>
     <paper id="23">
       <title>Concluding remarks</title>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <url hash="16de61b0">1981.tc-1.23</url>
       <bibkey>wilks-1981-concluding</bibkey>
     </paper>
diff --git a/data/xml/1984.bcs.xml b/data/xml/1984.bcs.xml
index 7a9dcf454c..fe86be894f 100644
--- a/data/xml/1984.bcs.xml
+++ b/data/xml/1984.bcs.xml
@@ -25,7 +25,7 @@
     </paper>
     <paper id="3">
       <title>Methods of linguistic analysis in machine translation</title>
-      <author><first>John</first><last>Hutchins</last></author>
+      <author id="w-john-hutchins"><first>John</first><last>Hutchins</last></author>
       <url hash="39f1870b">1984.bcs-1.3</url>
       <bibkey>hutchins-1984-methods</bibkey>
     </paper>
@@ -62,7 +62,7 @@
     </paper>
     <paper id="9">
       <title>Searching single-word and multi-word dictionaries</title>
-      <author><first>Francis J.</first><last>Smith</last></author>
+      <author id="francis-j-smith"><first>Francis J.</first><last>Smith</last></author>
       <author><first>K.</first><last>Devine</last></author>
       <author><first>P.</first><last>Craig</last></author>
       <url hash="85783960">1984.bcs-1.9</url>
@@ -92,8 +92,8 @@
     </paper>
     <paper id="13">
       <title>A software system for describing a grammar of machine translation: <fixed-case>GRADE</fixed-case></title>
-      <author><first>Jun-ichi</first><last>Nakamura</last></author>
-      <author><first>Makoto</first><last>Nagao</last></author>
+      <author id="jun-ichi-nakamura"><first>Jun-ichi</first><last>Nakamura</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
       <abstract>A new software system for describing a grammar of a machine translation system has been developed. This software system is called GRADE (GRAmmar DEscriber). GRADE has the following features: 1. GRADE allows a grammar writer to divide a whole grammar into several parts. Each part of the grammar is called a subgrammar. A subgrammar describes a step of the translation process. A whole grammar is then described by a network of sub-grammars. This network is called a subgrammar network. A subgrammar network allows a grammar writer to control the process of the translation precisely. When a subgrammar network in the analysis phase consists of a subgrammar for a noun-phrase (SG1) and a subgrammar for a verb-phase (SG2) in this sequence, the subgrammar network first applies SG1 to an input sentence, then applies SG2 to the result of an application of SG1, thus getting a syntactic structure for the input sentence. 2. A subgrammar consists of a set of rewriting rules. Rewriting rules in a subgrammar are applied for an input sentence in an appropriate order, which is specified in the description of the subgrammar. A rewriting rule transforms a tree structure into another tree structure. Rewriting rules use a powerful pattern matching algorithm to test their applicability to a tree structure. For example, a grammar writer can write a pattern that recognizes and parses an arbitrary numbers of sub-trees. Each node of a tree-structure has a list of pairs of a property name and a property value. A node can express a category name, a semantic marker, flags to control the translation process, and various other information. This tree-to-tree transformation operation by GRADE allows a grammar writer to describe all the processes of analysis, transfer and generation of a machine translation system with this uniform description capability of GRADE. 3. A subgrammar network or a subgrammar can be written in an entry of the dictionaries for a machine translation system. A subgrammar network or a subgrammar written in a dictionary entry is called a dictionary rule, which is specific for a word. When an input sentence contains a word which has a dictionary rule, it is applied to an input sentence at an appropriate point of a translation process. It can express more precise processing appropriate for that specific word that a general Subgrammar Network or Subgrammar. it also allows grammar writers to adjust a machine translation system to a specific domain easily. 4. GRADE is written in LISP. GRADE is implemented on FACOM M-382 and Symbolics 3600. GRADE is used in the machine translation system between Japanese and English. The project was started by the Japanese government in 1982. The effectiveness of GRADE has been demonstrated in the project.</abstract>
       <url hash="c9b1916c">1984.bcs-1.13</url>
       <bibkey>nakamura-nagao-1984-software</bibkey>
@@ -113,7 +113,7 @@
     </paper>
     <paper id="16">
       <title>Production of sentences: a general algorithm and a case study</title>
-      <author><first>Giovanni</first><last>Adorni</last></author>
+      <author id="giovanni-adorni"><first>Giovanni</first><last>Adorni</last></author>
       <author><first>Lina</first><last>Massone</last></author>
       <url hash="663cf1a3">1984.bcs-1.16</url>
       <abstract>In this paper a procedure for the production of sentences is described, producing written sentences in a particular language starting from formal representations of their meaning. After a brief description of the internal representation used, the algorithm is presented, and some results and future trends are discussed.</abstract>
@@ -134,7 +134,7 @@
     </paper>
     <paper id="19">
       <title>Machine translation with post editing versus a three-level integrated translator aid system</title>
-      <author><first>Alan K.</first><last>Melby</last></author>
+      <author id="alan-k-melby"><first>Alan K.</first><last>Melby</last></author>
       <url hash="f593e45b">1984.bcs-1.19</url>
       <abstract>The standard design for a computer-assisted translation system consists of data entry of source text, machine translation, and post editing (i.e. revision) of raw machine translation. This paper discusses this standard design and presents an alternative three-level design consisting of word processing integrated with terminology aids, simple source text processing, and a link to an off-line machine translation system. Advantages of the new design are discussed.</abstract>
       <bibkey>melby-1984-machine</bibkey>
@@ -143,7 +143,7 @@
       <title>The grammatical tagging of unrestricted <fixed-case>E</fixed-case>nglish text</title>
       <author><first>Roger</first><last>Garside</last></author>
       <author><first>Geoffrey</first><last>Leach</last></author>
-      <author><first>Eric</first><last>Atwell</last></author>
+      <author id="eric-atwell"><first>Eric</first><last>Atwell</last></author>
       <url hash="e9a37088">1984.bcs-1.20</url>
       <bibkey>garside-etal-1984-grammatical</bibkey>
     </paper>
diff --git a/data/xml/1984.tc.xml b/data/xml/1984.tc.xml
index f7625a5e65..4f6c74b035 100644
--- a/data/xml/1984.tc.xml
+++ b/data/xml/1984.tc.xml
@@ -68,7 +68,7 @@
     </paper>
     <paper id="9">
       <title>Parallel Session <fixed-case>III</fixed-case>: Machine translation. Summary of discussion</title>
-      <author><first>John</first><last>Hutchins</last></author>
+      <author id="w-john-hutchins"><first>John</first><last>Hutchins</last></author>
       <author><first>Monique</first><last>L’Huillier</last></author>
       <author><first>Brian</first><last>McCluskey</last></author>
       <url hash="84cf0a81">1984.tc-1.9</url>
diff --git a/data/xml/1985.tc.xml b/data/xml/1985.tc.xml
index f51d060ce1..e10bd29525 100644
--- a/data/xml/1985.tc.xml
+++ b/data/xml/1985.tc.xml
@@ -114,7 +114,7 @@
     </paper>
     <paper id="17">
       <title>Machine translation: <fixed-case>J</fixed-case>apanese perspectives</title>
-      <author><first>Toyoaki</first><last>Nishida</last></author>
+      <author id="toyoaki-nishida"><first>Toyoaki</first><last>Nishida</last></author>
       <author><first>Shuji</first><last>Doshita</last></author>
       <url hash="7a6f23c5">1985.tc-1.17</url>
       <bibkey>nishida-doshita-1985-machine</bibkey>
diff --git a/data/xml/1985.tmi.xml b/data/xml/1985.tmi.xml
index a1c791d788..686061e870 100644
--- a/data/xml/1985.tmi.xml
+++ b/data/xml/1985.tmi.xml
@@ -13,11 +13,11 @@
       <author><first>Doug</first><last>Arnold</last></author>
       <author><first>Lieven</first><last>Jaspaert</last></author>
       <author><first>Rod</first><last>Johnson</last></author>
-      <author><first>Steven</first><last>Krauwer</last></author>
-      <author><first>Mike</first><last>Rosner</last></author>
-      <author><first>Louis</first><last>des Tombe</last></author>
+      <author id="steven-krauwer"><first>Steven</first><last>Krauwer</last></author>
+      <author id="michael-rosner"><first>Mike</first><last>Rosner</last></author>
+      <author id="louis-des-tombe"><first>Louis</first><last>des Tombe</last></author>
       <author><first>Nino</first><last>Varile</last></author>
-      <author><first>Susan</first><last>Warwick</last></author>
+      <author id="susan-armstrong"><first>Susan</first><last>Warwick</last></author>
       <bibkey>arnold-etal-1985-mul</bibkey>
     </paper>
     <paper id="2">
@@ -27,15 +27,15 @@
     </paper>
     <paper id="3">
       <title>A Case Study in Software Evolution: from Ariane-78.4 to Ariane-85</title>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <author><first>P.</first><last>Guillaume</last></author>
       <author><first>M.</first><last>Quezel-Ambrunaz</last></author>
       <bibkey>boitet-etal-1985-case</bibkey>
     </paper>
     <paper id="4">
       <title>New Approaches to Machine Translation</title>
-      <author><first>Jaime G.</first><last>Carbonell</last></author>
-      <author><first>Masaru</first><last>Tomita</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime G.</first><last>Carbonell</last></author>
+      <author id="masaru-tomita"><first>Masaru</first><last>Tomita</last></author>
       <bibkey>carbonell-tomita-1985-new</bibkey>
     </paper>
     <paper id="5">
@@ -47,7 +47,7 @@
     <paper id="6">
       <title>On the Design of Expert Systems Grafted on <fixed-case>MT</fixed-case> Systems</title>
       <author><first>R.</first><last>Gerber</last></author>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <bibkey>gerber-boitet-1985-design</bibkey>
     </paper>
     <paper id="7">
@@ -57,13 +57,13 @@
     </paper>
     <paper id="8">
       <title>Machine Translation as an Expert Task</title>
-      <author><first>Roderick L.</first><last>Johnson</last></author>
+      <author id="roderick-l-johnson"><first>Roderick L.</first><last>Johnson</last></author>
       <author><first>Peter</first><last>Whitelock</last></author>
       <bibkey>johnson-whitelock-1985-machine</bibkey>
     </paper>
     <paper id="9">
       <title>Structural Correspondences and <fixed-case>L</fixed-case>exical-<fixed-case>F</fixed-case>unctional <fixed-case>G</fixed-case>rammar</title>
-      <author><first>Ronald M.</first><last>Kaplan</last></author>
+      <author id="ronald-m-kaplan"><first>Ronald M.</first><last>Kaplan</last></author>
       <bibkey>kaplan-1985-structural</bibkey>
     </paper>
     <paper id="10">
@@ -73,34 +73,34 @@
     </paper>
     <paper id="11">
       <title>Integrating Syntax and Semantics</title>
-      <author><first>Steven L.</first><last>Lytinen</last></author>
+      <author id="steven-l-lytinen"><first>Steven L.</first><last>Lytinen</last></author>
       <bibkey>lytinen-1985-integrating</bibkey>
     </paper>
     <paper id="12">
       <title><fixed-case>LMT</fixed-case>: a <fixed-case>P</fixed-case>rolog-Based Machine Translation System</title>
-      <author><first>Michael</first><last>McCord</last></author>
+      <author id="michael-c-mccord"><first>Michael</first><last>McCord</last></author>
       <bibkey>mccord-1985-lmt</bibkey>
     </paper>
     <paper id="13">
       <title>Recovering the Speaker’s Decisions during Mechanical Translation</title>
-      <author><first>David D.</first><last>McDonald</last></author>
+      <author id="david-d-mcdonald"><first>David D.</first><last>McDonald</last></author>
       <bibkey>mcdonald-1985-recovering</bibkey>
     </paper>
     <paper id="14">
       <title>Structural Transformation in the Generation Stage of the <fixed-case>MU</fixed-case> <fixed-case>J</fixed-case>apanese to <fixed-case>E</fixed-case>nglish Machine Translation System</title>
-      <author><first>Makoto</first><last>Nagao</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
       <bibkey>nagao-1985-structural</bibkey>
     </paper>
     <paper id="15">
       <title>Interlingua Design for translator</title>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <author><first>Victor</first><last>Raskin</last></author>
-      <author><first>Allen B.</first><last>Tucker</last></author>
+      <author id="allen-b-tucker"><first>Allen B.</first><last>Tucker</last></author>
       <bibkey>nirenburg-etal-1985-interlingua</bibkey>
     </paper>
     <paper id="16">
       <title>The Level Hypothesis in Discourse Analysis</title>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <bibkey>pustejovsky-1985-level</bibkey>
     </paper>
     <paper id="17">
@@ -110,24 +110,24 @@
     </paper>
     <paper id="18">
       <title>A Preliminary Linguistic Framework for <fixed-case>EUROTRA</fixed-case>, <fixed-case>J</fixed-case>une 1985</title>
-      <author><first>Louis</first><last>des Tombe</last></author>
+      <author id="louis-des-tombe"><first>Louis</first><last>des Tombe</last></author>
       <author><first>Doug</first><last>Arnold</last></author>
       <author><first>Lieven</first><last>Jaspaert</last></author>
       <author><first>Rod</first><last>Johnson</last></author>
-      <author><first>Steven</first><last>Krauwer</last></author>
-      <author><first>Mike</first><last>Rosner</last></author>
+      <author id="steven-krauwer"><first>Steven</first><last>Krauwer</last></author>
+      <author id="michael-rosner"><first>Mike</first><last>Rosner</last></author>
       <author><first>Nino</first><last>Varile</last></author>
-      <author><first>Susan</first><last>Warwick</last></author>
+      <author id="susan-armstrong"><first>Susan</first><last>Warwick</last></author>
       <bibkey>des-tombe-etal-1985-preliminary</bibkey>
     </paper>
     <paper id="19">
       <title>Feasibility Study of Personal/Interactive Machine Translation Systems</title>
-      <author><first>Masaru</first><last>Tomita</last></author>
+      <author id="masaru-tomita"><first>Masaru</first><last>Tomita</last></author>
       <bibkey>tomita-1985-feasibility</bibkey>
     </paper>
     <paper id="20">
       <title>Static Grammars: A Formalism for the Description of Linguistic Models</title>
-      <author><first>Bernard</first><last>Vauquois</last></author>
+      <author id="bernard-vauquois"><first>Bernard</first><last>Vauquois</last></author>
       <author><first>Sylviane</first><last>Chappuy</last></author>
       <bibkey>vauquois-chappuy-1985-static</bibkey>
     </paper>
@@ -143,17 +143,17 @@
     </paper>
     <paper id="23">
       <title>Reflections on the Knowledge Needed to Process Ill-Formed Language</title>
-      <author><first>Ralph M.</first><last>Weischedel</last></author>
+      <author id="ralph-weischedel"><first>Ralph M.</first><last>Weischedel</last></author>
       <bibkey>weischedel-1985-reflections</bibkey>
     </paper>
     <paper id="24">
       <title>Characteristics of the metal Machine Translation System at Production Stage</title>
-      <author><first>John S.</first><last>White</last></author>
+      <author id="john-s-white"><first>John S.</first><last>White</last></author>
       <bibkey>white-1985-characteristics</bibkey>
     </paper>
     <paper id="25">
       <title>Relevance, Points of View and Dialogue Modelling</title>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <bibkey>wilks-1985-relevance</bibkey>
     </paper>
   </volume>
diff --git a/data/xml/1986.tc.xml b/data/xml/1986.tc.xml
index b357259005..a6a44cda35 100644
--- a/data/xml/1986.tc.xml
+++ b/data/xml/1986.tc.xml
@@ -22,7 +22,7 @@
     </paper>
     <paper id="2">
       <title>Continuing training for the language professions: a survey of needs</title>
-      <author><first>Anthony F.</first><last>Hartley</last></author>
+      <author id="anthony-hartley"><first>Anthony F.</first><last>Hartley</last></author>
       <url hash="4cfea278">1986.tc-1.2</url>
       <bibkey>hartley-1986-continuing</bibkey>
     </paper>
@@ -76,7 +76,7 @@
     </paper>
     <paper id="11">
       <title>Translation practice in <fixed-case>E</fixed-case>urope</title>
-      <author><first>David</first><last>Smith</last></author>
+      <author id="david-a-smith"><first>David</first><last>Smith</last></author>
       <url hash="7dc25e98">1986.tc-1.11</url>
       <bibkey>smith-1986-translation</bibkey>
     </paper>
@@ -106,7 +106,7 @@
     </paper>
     <paper id="16">
       <title>Current machine translation systems developed with <fixed-case>GETA</fixed-case>’s methodology and software tools</title>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <url hash="a1b3ed1a">1986.tc-1.16</url>
       <bibkey>boitet-1986-current</bibkey>
     </paper>
diff --git a/data/xml/1987.mtsummit.xml b/data/xml/1987.mtsummit.xml
index b6c6e79637..e1d14b5566 100644
--- a/data/xml/1987.mtsummit.xml
+++ b/data/xml/1987.mtsummit.xml
@@ -11,7 +11,7 @@
     </meta>
     <paper id="1">
       <title>Present and future of machine translation systems — an introduction to the <fixed-case>MT</fixed-case> Summit —</title>
-      <author><first>Makoto</first><last>Nagao</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
       <url hash="5949195e">1987.mtsummit-1.1</url>
       <bibkey>nagao-1987-present</bibkey>
     </paper>
@@ -29,7 +29,7 @@
     </paper>
     <paper id="4">
       <title>Prospects in Machine Translation</title>
-      <author><first>W. John</first><last>Hutchins</last></author>
+      <author id="w-john-hutchins"><first>W. John</first><last>Hutchins</last></author>
       <url hash="e7c03d4f">1987.mtsummit-1.4</url>
       <bibkey>hutchins-1987-prospects</bibkey>
     </paper>
@@ -90,7 +90,7 @@
     </paper>
     <paper id="14">
       <title><fixed-case>TAURAS</fixed-case>: The Toshiba Machine Translation System</title>
-      <author><first>Shin-ya</first><last>Amano</last></author>
+      <author id="shin-ya-amano"><first>Shin-ya</first><last>Amano</last></author>
       <author><first>Yoshinao</first><last>Tsutsumi</last></author>
       <url hash="28a3149f">1987.mtsummit-1.14</url>
       <bibkey>amano-tsutsumi-1987-tauras</bibkey>
@@ -121,7 +121,7 @@
     </paper>
     <paper id="19">
       <title>Interlingua - Technical Prospect of Interlingua -</title>
-      <author><first>Jaime G.</first><last>Carbonell</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime G.</first><last>Carbonell</last></author>
       <url hash="efed2548">1987.mtsummit-1.19</url>
       <bibkey>carbonell-1987-interlingua</bibkey>
     </paper>
@@ -145,7 +145,7 @@
     </paper>
     <paper id="23">
       <title>What is ‘<fixed-case>PIVOT</fixed-case>’?</title>
-      <author><first>Jun-ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun-ichi</first><last>Tsujii</last></author>
       <url hash="453d0483">1987.mtsummit-1.23</url>
       <bibkey>tsujii-1987-pivot</bibkey>
     </paper>
@@ -194,14 +194,14 @@
     </paper>
     <paper id="31">
       <title>The Current Stage of the Mu-Project</title>
-      <author><first>Jun-ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun-ichi</first><last>Tsujii</last></author>
       <url hash="a1d9a744">1987.mtsummit-1.31</url>
       <bibkey>tsujii-1987-current</bibkey>
     </paper>
     <paper id="32">
       <title><fixed-case>CMU</fixed-case> Project</title>
-      <author><first>Masaru</first><last>Tomita</last></author>
-      <author><first>Jaime G.</first><last>Carbonell</last></author>
+      <author id="masaru-tomita"><first>Masaru</first><last>Tomita</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime G.</first><last>Carbonell</last></author>
       <url hash="0cad60ba">1987.mtsummit-1.32</url>
       <bibkey>tomita-carbonell-1987-cmu</bibkey>
     </paper>
@@ -262,7 +262,7 @@
     </paper>
     <paper id="42">
       <title>Governmental Views of <fixed-case>MT</fixed-case> for <fixed-case>I</fixed-case>taly</title>
-      <author><first>Antonio</first><last>Zampolli</last></author>
+      <author id="antonio-zampolli"><first>Antonio</first><last>Zampolli</last></author>
       <url hash="12ebb685">1987.mtsummit-1.42</url>
       <bibkey>zampolli-1987-governmental</bibkey>
     </paper>
@@ -280,7 +280,7 @@
     </paper>
     <paper id="45">
       <title>Concluding Remarks</title>
-      <author><first>Makoto</first><last>Nagao</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
       <url hash="0b39d771">1987.mtsummit-1.45</url>
       <bibkey>nagao-1987-concluding</bibkey>
     </paper>
diff --git a/data/xml/1987.tc.xml b/data/xml/1987.tc.xml
index a295bf243a..92bfc8651b 100644
--- a/data/xml/1987.tc.xml
+++ b/data/xml/1987.tc.xml
@@ -77,7 +77,7 @@
     </paper>
     <paper id="11">
       <title>A survey of termbanks worldwide</title>
-      <author><first>John</first><last>McNaught</last></author>
+      <author id="john-mcnaught"><first>John</first><last>McNaught</last></author>
       <url hash="c982d848">1987.tc-1.11</url>
       <bibkey>mcnaught-1987-survey</bibkey>
     </paper>
diff --git a/data/xml/1988.tc.xml b/data/xml/1988.tc.xml
index cf5fa76b08..8dc0e6809f 100644
--- a/data/xml/1988.tc.xml
+++ b/data/xml/1988.tc.xml
@@ -112,7 +112,7 @@
     </paper>
     <paper id="17">
       <title>Themes in the work of Margaret Masterman</title>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <url hash="2eed6096">1988.tc-1.17</url>
       <bibkey>wilks-1988-themes</bibkey>
     </paper>
diff --git a/data/xml/1988.tmi.xml b/data/xml/1988.tmi.xml
index 33c6de7d45..d5da22c02e 100644
--- a/data/xml/1988.tmi.xml
+++ b/data/xml/1988.tmi.xml
@@ -24,37 +24,37 @@
       <title>A method of analyzing <fixed-case>J</fixed-case>apanese speech act types</title>
       <author><first>Kiyoshi</first><last>Kogure</last></author>
       <author><first>Hitoshi</first><last>Iida</last></author>
-      <author><first>Kei</first><last>Yoshimoto</last></author>
+      <author id="kei-yoshimoto"><first>Kei</first><last>Yoshimoto</last></author>
       <author><first>Hiroyuki</first><last>Maeda</last></author>
-      <author><first>Masako</first><last>Kume</last></author>
+      <author id="masako-kume"><first>Masako</first><last>Kume</last></author>
       <author><first>Susumu</first><last>Kato</last></author>
       <url hash="876ef814">1988.tmi-1.3</url>
       <bibkey>kogure-etal-1988-method</bibkey>
     </paper>
     <paper id="4">
       <title>Lexical realization in natural language generation</title>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <author><first>Rita</first><last>McCardell</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <author><first>Scott</first><last>Huffman</last></author>
       <author><first>Edward</first><last>Kernschaft</last></author>
-      <author><first>Irene</first><last>Nirenburg</last></author>
+      <author id="irene-nirenburg"><first>Irene</first><last>Nirenburg</last></author>
       <url hash="80e618ef">1988.tmi-1.4</url>
       <bibkey>nirenburg-etal-1988-lexical</bibkey>
     </paper>
     <paper id="5">
       <title>Multi-lingual text generation and the Meaning-Text Theory</title>
-      <author><first>Richard</first><last>Kittredge</last></author>
+      <author id="richard-kittredge"><first>Richard</first><last>Kittredge</last></author>
       <author><first>Lidija</first><last>Iordanskaja</last></author>
-      <author><first>Alain</first><last>Polguère</last></author>
+      <author id="alain-polguere"><first>Alain</first><last>Polguère</last></author>
       <url hash="8a3945c3">1988.tmi-1.5</url>
       <bibkey>kittredge-etal-1988-multi</bibkey>
     </paper>
     <paper id="6">
       <title>‘Relaxed’ compositionality in machine translation</title>
       <author><first>Doug</first><last>Arnold</last></author>
-      <author><first>Steven</first><last>Krauwer</last></author>
-      <author><first>Louis</first><last>des Tombe</last></author>
+      <author id="steven-krauwer"><first>Steven</first><last>Krauwer</last></author>
+      <author id="louis-des-tombe"><first>Louis</first><last>des Tombe</last></author>
       <author><first>Louisa</first><last>Sadler</last></author>
       <url hash="88bf4498">1988.tmi-1.6</url>
       <bibkey>arnold-etal-1988-relaxed</bibkey>
@@ -67,13 +67,13 @@
     </paper>
     <paper id="8">
       <title>Towards speech translation systems</title>
-      <author><first>Masaru</first><last>Tomita</last></author>
+      <author id="masaru-tomita"><first>Masaru</first><last>Tomita</last></author>
       <url hash="46ff4839">1988.tmi-1.8</url>
       <bibkey>tomita-1988-towards</bibkey>
     </paper>
     <paper id="9">
       <title>The Universal Parser Compiler and its application to a speech translation system</title>
-      <author><first>Masaru</first><last>Tomita</last></author>
+      <author id="masaru-tomita"><first>Masaru</first><last>Tomita</last></author>
       <author><first>Marion</first><last>Kee</last></author>
       <author><first>Hiroaki</first><last>Saito</last></author>
       <author><first>Teruko</first><last>Mitamura</last></author>
@@ -84,13 +84,13 @@
     <paper id="10">
       <title>Functional descriptions as a formalism for linguistic knowledge representation in a generation oriented approach</title>
       <author><first>Miyo</first><last>Otani</last></author>
-      <author><first>Nathalie</first><last>Simonin</last></author>
+      <author id="nathalie-simonin"><first>Nathalie</first><last>Simonin</last></author>
       <url hash="0f317048">1988.tmi-1.10</url>
       <bibkey>otani-simonin-1988-functional</bibkey>
     </paper>
     <paper id="11">
       <title>Computational complexity of left-associative grammar</title>
-      <author><first>Roland</first><last>Hausser</last></author>
+      <author id="roland-r-hausser"><first>Roland</first><last>Hausser</last></author>
       <url hash="3103eb64">1988.tmi-1.11</url>
       <bibkey>hausser-1988-computational</bibkey>
     </paper>
@@ -103,7 +103,7 @@
     </paper>
     <paper id="13">
       <title>A translation aid system using flexible text retrieval based on syntax-matching</title>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Yutaka</first><last>Tsutsumi</last></author>
       <url hash="f08433c1">1988.tmi-1.13</url>
       <bibkey>sumita-tsutsumi-1988-translation</bibkey>
@@ -124,7 +124,7 @@
     </paper>
     <paper id="16">
       <title>A principle-based <fixed-case>K</fixed-case>orean/<fixed-case>J</fixed-case>apanese machine translation system: <fixed-case>NARA</fixed-case></title>
-      <author><first>Hee Sung</first><last>Chung</last></author>
+      <author id="hee-sung-chung"><first>Hee Sung</first><last>Chung</last></author>
       <url hash="0de22d4e">1988.tmi-1.16</url>
       <bibkey>chung-1988-principle</bibkey>
     </paper>
@@ -132,7 +132,7 @@
       <title>A comparative study of <fixed-case>J</fixed-case>apanese and <fixed-case>E</fixed-case>nglish sublanguage patterns</title>
       <author><first>Virginia</first><last>Teller</last></author>
       <author><first>Michiko</first><last>Kosaka</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <url hash="b10a1365">1988.tmi-1.17</url>
       <bibkey>teller-etal-1988-comparative</bibkey>
     </paper>
@@ -146,11 +146,11 @@
     <paper id="19">
       <title>A statistical approach to <fixed-case>F</fixed-case>rench/<fixed-case>E</fixed-case>nglish translation</title>
       <author><first>P.</first><last>Brown</last></author>
-      <author><first>John</first><last>Cocke</last></author>
-      <author><first>Stephen</first><last>Della Pietra</last></author>
-      <author><first>Vincent J.</first><last>Della Pietra</last></author>
-      <author><first>Frederick</first><last>Jelinek</last></author>
-      <author><first>Robert L.</first><last>Mercer</last></author>
+      <author id="john-cocke"><first>John</first><last>Cocke</last></author>
+      <author id="stephen-a-della-pietra"><first>Stephen</first><last>Della Pietra</last></author>
+      <author id="vincent-j-della-pietra"><first>Vincent J.</first><last>Della Pietra</last></author>
+      <author id="frederick-jelinek"><first>Frederick</first><last>Jelinek</last></author>
+      <author id="robert-l-mercer"><first>Robert L.</first><last>Mercer</last></author>
       <author><first>P.</first><last>Roossin</last></author>
       <url hash="2c10f597">1988.tmi-1.19</url>
       <bibkey>brown-etal-1988-statistical-approach</bibkey>
@@ -165,7 +165,7 @@
       <title>Application of natural language interface to a machine translation problem</title>
       <author><first>Heidi M.</first><last>Johnson</last></author>
       <author><first>Yukiko</first><last>Sekine</last></author>
-      <author><first>John S.</first><last>White</last></author>
+      <author id="john-s-white"><first>John S.</first><last>White</last></author>
       <author><first>Gil C.</first><last>Kim</last></author>
       <url hash="7e8c4a27">1988.tmi-1.21</url>
       <bibkey>johnson-etal-1988-application</bibkey>
@@ -178,7 +178,7 @@
     </paper>
     <paper id="23">
       <title>Bernard Vauqois’ contribution to the theory and practice of building <fixed-case>MT</fixed-case> systems: a historical perspective</title>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <url hash="49bc0855">1988.tmi-1.23</url>
       <bibkey>boitet-1988-bernard</bibkey>
     </paper>
diff --git a/data/xml/1989.mtsummit.xml b/data/xml/1989.mtsummit.xml
index 2a2b2dd2d8..d796c30d55 100644
--- a/data/xml/1989.mtsummit.xml
+++ b/data/xml/1989.mtsummit.xml
@@ -14,7 +14,7 @@
     </frontmatter>
     <paper id="1">
       <title>Two years after the <fixed-case>MT</fixed-case> Summit</title>
-      <author><first>Makoto</first><last>Nagao</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
       <url hash="734c9660">1989.mtsummit-1.1</url>
       <bibkey>nagao-1989-two</bibkey>
     </paper>
@@ -91,13 +91,13 @@
     </paper>
     <paper id="14">
       <title><fixed-case>LMT</fixed-case></title>
-      <author><first>Michael</first><last>McCord</last></author>
+      <author id="michael-c-mccord"><first>Michael</first><last>McCord</last></author>
       <url hash="0476a0f4">1989.mtsummit-1.14</url>
       <bibkey>mccord-1989-lmt</bibkey>
     </paper>
     <paper id="15">
       <title>The Rosetta project</title>
-      <author><first>Jan</first><last>Landsbergen</last></author>
+      <author id="jan-landsbergen"><first>Jan</first><last>Landsbergen</last></author>
       <url hash="546c2548">1989.mtsummit-1.15</url>
       <bibkey>landsbergen-1989-rosetta</bibkey>
     </paper>
@@ -117,7 +117,7 @@
     </paper>
     <paper id="18">
       <title><fixed-case>J</fixed-case>apanese view of the future of machine translation</title>
-      <author><first>Makoto</first><last>Nagao</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
       <url hash="40b4eadf">1989.mtsummit-1.18</url>
       <bibkey>nagao-1989-japanese</bibkey>
     </paper>
@@ -140,7 +140,7 @@
     </paper>
     <paper id="22">
       <title>Computational Linguistics and <fixed-case>MT</fixed-case> in <fixed-case>I</fixed-case>taly</title>
-      <author><first>Antonio</first><last>Zampolli</last></author>
+      <author id="antonio-zampolli"><first>Antonio</first><last>Zampolli</last></author>
       <url hash="e0014765">1989.mtsummit-1.22</url>
       <bibkey>zampolli-1989-computational</bibkey>
     </paper>
@@ -173,7 +173,7 @@
     </paper>
     <paper id="27">
       <title><fixed-case>KBMT</fixed-case>-89 - A knowledge-based <fixed-case>MT</fixed-case> project at <fixed-case>C</fixed-case>arnegie <fixed-case>M</fixed-case>ellon <fixed-case>U</fixed-case>niversity</title>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <url hash="c540a2aa">1989.mtsummit-1.27</url>
       <bibkey>nirenburg-1989-kbmt</bibkey>
     </paper>
diff --git a/data/xml/1989.tc.xml b/data/xml/1989.tc.xml
index bf09f36128..c9ed0d7448 100644
--- a/data/xml/1989.tc.xml
+++ b/data/xml/1989.tc.xml
@@ -111,7 +111,7 @@
     </paper>
     <paper id="17">
       <title>Speech recognition, artificial intelligence and translation: how rosy a future?</title>
-      <author><first>Henry</first><last>Thompson</last></author>
+      <author id="henry-s-thompson"><first>Henry</first><last>Thompson</last></author>
       <url hash="020d2ceb">1989.tc-1.17</url>
       <bibkey>thompson-1989-speech</bibkey>
     </paper>
diff --git a/data/xml/1990.tc.xml b/data/xml/1990.tc.xml
index 6addff3e4f..2da27eae66 100644
--- a/data/xml/1990.tc.xml
+++ b/data/xml/1990.tc.xml
@@ -16,7 +16,7 @@
     </frontmatter>
     <paper id="1">
       <title>Machine Translation and Machine-Aided Translation - What’s going on</title>
-      <author><first>Jun-ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun-ichi</first><last>Tsujii</last></author>
       <url hash="168852c6">1990.tc-1.1</url>
       <bibkey>tsujii-1990-machine</bibkey>
     </paper>
diff --git a/data/xml/1991.iwpt.xml b/data/xml/1991.iwpt.xml
index de1bfb050b..63fa622b21 100644
--- a/data/xml/1991.iwpt.xml
+++ b/data/xml/1991.iwpt.xml
@@ -20,14 +20,14 @@
     </meta>
     <paper id="1">
       <title>Proceedings of the Second International Workshop on Parsing Technologies (<fixed-case>IWPT</fixed-case> ’91)</title>
-      <author><first>Masaru</first><last>Tomita</last></author>
+      <author id="masaru-tomita"><first>Masaru</first><last>Tomita</last></author>
       <author><first>Martin</first><last>Kay</last></author>
-      <author><first>Robert</first><last>Berwick</last></author>
-      <author><first>Eva</first><last>Hajicova</last></author>
-      <author><first>Aravind</first><last>Joshi</last></author>
-      <author><first>Ronald</first><last>Kaplan</last></author>
-      <author><first>Makoto</first><last>Nagao</last></author>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="robert-c-berwick"><first>Robert</first><last>Berwick</last></author>
+      <author id="eva-hajicova"><first>Eva</first><last>Hajicova</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
+      <author id="ronald-m-kaplan"><first>Ronald</first><last>Kaplan</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <pages>i-viii</pages>
       <url hash="cefceb0a">1991.iwpt-1.1</url>
       <abstract>February 13-25, 1991</abstract>
@@ -60,7 +60,7 @@
     </paper>
     <paper id="5">
       <title>Preprocessing and lexicon design for parsing technical text</title>
-      <author><first>Robert P.</first><last>Futrelle</last></author>
+      <author id="robert-p-futrelle"><first>Robert P.</first><last>Futrelle</last></author>
       <author><first>Christopher E.</first><last>Dunn</last></author>
       <author><first>Debra S.</first><last>Ellis</last></author>
       <author><first>Maurice J.</first><last>Pescitelli, Jr.</last></author>
@@ -107,7 +107,7 @@
     </paper>
     <paper id="10">
       <title>Using Inheritance in <fixed-case>O</fixed-case>bject-<fixed-case>O</fixed-case>riented <fixed-case>P</fixed-case>rogramming to Combine Syntactic Rules and Lexical Idiosyncrasies</title>
-      <author><first>Benoît</first><last>Habert</last></author>
+      <author id="benoit-habert"><first>Benoît</first><last>Habert</last></author>
       <pages>79-88</pages>
       <url hash="1f3beb7c">1991.iwpt-1.10</url>
       <abstract>In parsing idioms and frozen expressions in French, one needs to combine general syntactic rules and idiosyncratic constraints. The inheritance structure provided by Object-Oriented Programming languages, and more specifically the combination of methods present in CLOS, Common Lisp Object System, appears as an elegant and efficient approach to deal with such a complex interaction.</abstract>
@@ -161,7 +161,7 @@
       <title>Processing Unknown Words in Continuous Speech Recognition</title>
       <author><first>Kenji</first><last>Kita</last></author>
       <author><first>Terumasa</first><last>Ehara</last></author>
-      <author><first>Tsuyoshi</first><last>Morimoto</last></author>
+      <author id="tsuyoshi-morimoto"><first>Tsuyoshi</first><last>Morimoto</last></author>
       <pages>136-142</pages>
       <url hash="c80ff2fc">1991.iwpt-1.16</url>
       <abstract>Current continuous speech recognition systems essentially ignore unknown words. Systems are designed to recognize words in the lexicon. However, for using speech recognition systems in real applications of spoken-language processing, it is very important to process unknown words. This paper proposes a continuous speech recognition method which accepts any utterance that might include unknown words. In this method, words not in the lexicon are transcribed as phone sequences, while words in the lexicon are recognized correctly. The HMM-LR speech recognition system, which is an integration of Hidden Markov Models and generalized LR parsing, is used as the baseline system, and enhanced with the trigram model of syllables to take into account the stochastic characteristics of a language. Preliminary results indicate that our approach is very promising.</abstract>
@@ -170,7 +170,7 @@
     <paper id="17">
       <title>The Specification and Implementation of Constraint-Based Unification Grammars</title>
       <author><first>Robert</first><last>Carpenter</last></author>
-      <author><first>Carl</first><last>Pollard</last></author>
+      <author id="carl-pollard"><first>Carl</first><last>Pollard</last></author>
       <author><first>Alex</first><last>Franz</last></author>
       <pages>143-153</pages>
       <url hash="944d48c7">1991.iwpt-1.17</url>
@@ -179,8 +179,8 @@
     </paper>
     <paper id="18">
       <title>Probabilistic <fixed-case>LR</fixed-case> Parsing for General Context-Free Grammars</title>
-      <author><first>See-Kiong</first><last>Ng</last></author>
-      <author><first>Masaru</first><last>Tomita</last></author>
+      <author id="see-kiong-ng"><first>See-Kiong</first><last>Ng</last></author>
+      <author id="masaru-tomita"><first>Masaru</first><last>Tomita</last></author>
       <pages>154-163</pages>
       <url hash="5670fa72">1991.iwpt-1.18</url>
       <abstract>To combine the advantages of probabilistic grammars and generalized LR parsing, an algorithm for constructing a probabilistic LR parser given a probabilistic context-free grammar is needed. In this paper, implementation issues in adapting Tomita’s generalized LR parser with graph-structured stack to perform probabilistic parsing are discussed. Wright and Wrigley (1989) has proposed a probabilistic LR-table construction algorithm for non-left-recursive context-free grammars. To account for left recursions, a method for computing item probabilities using the generation of systems of linear equations is presented. The notion of deferred probabilities is proposed as a means for dealing with similar item sets with differing probability assignments.</abstract>
@@ -205,7 +205,7 @@
     <paper id="21">
       <title>Unification-Based Dependency Parsing of Governor-Final Languages</title>
       <author><first>Hyuk-Chul</first><last>Kwon</last></author>
-      <author><first>Aesun</first><last>Yoon</last></author>
+      <author id="aesun-yoon"><first>Aesun</first><last>Yoon</last></author>
       <pages>182-192</pages>
       <url hash="0770231a">1991.iwpt-1.21</url>
       <abstract>This paper describes a unification-based dependency parsing method for governor-final languages. Our method can parse not only projective sentences but also non-projective sentences. The feature structures in the tradition of the unification-based formalism are used for writing dependency relations. We use a structure sharing and a local ambiguity packing to save storage.</abstract>
@@ -213,8 +213,8 @@
     </paper>
     <paper id="22">
       <title><fixed-case>P</fixed-case>earl: A Probabilistic Chart Parser</title>
-      <author><first>David M.</first><last>Magerman</last></author>
-      <author><first>Mitchell P.</first><last>Marcus</last></author>
+      <author id="david-m-magerman"><first>David M.</first><last>Magerman</last></author>
+      <author id="mitch-marcus"><first>Mitchell P.</first><last>Marcus</last></author>
       <pages>193-199</pages>
       <url hash="540fbb96">1991.iwpt-1.22</url>
       <abstract>This paper describes a natural language parsing algorithm for unrestricted text which uses a probability-based scoring function to select the “best” parse of a sentence. The parser, Pearl, is a time-asynchronous bottom-up chart parser with Earley-type top-down prediction which pursues the highest-scoring theory in the chart, where the score of a theory represents the extent to which the context of the sentence predicts that interpretation. This parser differs from previous attempts at stochastic parsers in that it uses a richer form of conditional probabilities based on context to predict likelihood. Pearl also provides a framework for incorporating the results of previous work in part-of-speech assignment, unknown word models, and other probabilistic models of linguistic features into one parsing tool, interleaving these techniques instead of using the traditional pipeline architecture. In preliminary tests, Pearl has been successful at resolving part-of-speech and word (in speech processing) ambiguity, determining categories for unknown words, and selecting correct parses first using a very loosely fitting covering grammar.</abstract>
@@ -232,7 +232,7 @@
     <paper id="24">
       <title>Stochastic Context-Free Grammars for Island-Driven Probabilistic Parsing</title>
       <author><first>Anna</first><last>Corazza</last></author>
-      <author><first>Renato</first><last>De Mori</last></author>
+      <author id="renato-de-mori"><first>Renato</first><last>De Mori</last></author>
       <author><first>Roberto</first><last>Gretter</last></author>
       <author><first>Giorgio</first><last>Satta</last></author>
       <pages>210-217</pages>
diff --git a/data/xml/1991.mtsummit.xml b/data/xml/1991.mtsummit.xml
index babf1f7c0a..9e84fb2f1a 100644
--- a/data/xml/1991.mtsummit.xml
+++ b/data/xml/1991.mtsummit.xml
@@ -22,7 +22,7 @@
       <title>Advances in Machine Translation Research in <fixed-case>IBM</fixed-case></title>
       <author><first>Mori</first><last>Rimon</last></author>
       <author><first>Pilar</first><last>Martinez</last></author>
-      <author><first>Michael</first><last>McCord</last></author>
+      <author id="michael-c-mccord"><first>Michael</first><last>McCord</last></author>
       <author><first>Ulrike</first><last>Schwall</last></author>
       <pages>11-18</pages>
       <url hash="10af4fff">1991.mtsummit-papers.2</url>
@@ -31,8 +31,8 @@
     </paper>
     <paper id="3">
       <title><fixed-case>ULTRA</fixed-case>: A Multi-lingual Machine Translator</title>
-      <author><first>David</first><last>Farwell</last></author>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="david-farwell"><first>David</first><last>Farwell</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <pages>19-24</pages>
       <url hash="94149b23">1991.mtsummit-papers.3</url>
       <abstract>ULTRA (Universal Language TRAnslator) is a multilingual, interlingual machine translation system currently under development at the Computing Research Laboratory at New Mexico State University. It translates between five languages (Chinese, English, German, Japanese, Spanish) with vocabularies in each language based on approximately 10,000 word senses. The major design criteria are that the system be robust and general purpose with simple to use utilities for customization to suit the needs of particular users. This paper describes the central characteristics of the system: the intermediate representation, the language components, semantic and pragmatic processes, and supporting lexical entry tools.</abstract>
@@ -42,7 +42,7 @@
       <title>Capturing Language-Specific Semantic Distinctions in Interlingua-based <fixed-case>MT</fixed-case></title>
       <author><first>James</first><last>Barnett</last></author>
       <author><first>Inderjeet</first><last>Mani</last></author>
-      <author><first>Elaine</first><last>Rich</last></author>
+      <author id="elaine-rich"><first>Elaine</first><last>Rich</last></author>
       <author><first>Chinatsu</first><last>Aone</last></author>
       <author><first>Kevin</first><last>Knight</last></author>
       <author><first>Juan C.</first><last>Martinez</last></author>
@@ -72,7 +72,7 @@
     </paper>
     <paper id="7">
       <title>Applying an Experimental <fixed-case>MT</fixed-case> System to a Realistic Problem</title>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Katharina</first><last>Boeseleldt</last></author>
       <pages>45-49</pages>
       <url hash="4f65f5f7">1991.mtsummit-papers.7</url>
@@ -91,8 +91,8 @@
     <paper id="9">
       <title>An Efficient Interlingua Translation System for Multi-lingual Document Production</title>
       <author><first>Teruko</first><last>Mitamura</last></author>
-      <author><first>Eric H.</first><last>Nyberg</last></author>
-      <author><first>Jaime G.</first><last>Carbonell</last></author>
+      <author id="eric-nyberg"><first>Eric H.</first><last>Nyberg</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime G.</first><last>Carbonell</last></author>
       <pages>55-61</pages>
       <url hash="6626cc37">1991.mtsummit-papers.9</url>
       <abstract>Knowledge-based interlingual machine translation systems produce semantically accurate translations, but typically require massive knowledge acquisition. This paper describes KANT, a system that reduces this requirement to produce practical, scalable, and accurate KBMT applications. First, the set of requirements is discussed, then the full KANT architecture is illustrated, and finally results from a fully implemented prototype are presented.</abstract>
@@ -119,9 +119,9 @@
     </paper>
     <paper id="12">
       <title><fixed-case>EJ</fixed-case>/<fixed-case>JE</fixed-case> Machine Translation System <fixed-case>ASTRANSAC</fixed-case> — Extensions toward Personalization</title>
-      <author><first>Hideki</first><last>Hirakawa</last></author>
+      <author id="hideki-hirakawa"><first>Hideki</first><last>Hirakawa</last></author>
       <author><first>Hiroyasu</first><last>Nogami</last></author>
-      <author><first>Shin-ya</first><last>Amano</last></author>
+      <author id="shin-ya-amano"><first>Shin-ya</first><last>Amano</last></author>
       <pages>73-80</pages>
       <url hash="98fefe73">1991.mtsummit-papers.12</url>
       <abstract>The demand for personal use of a translation system seems to be increasing in accordance with the improvement in MT quality. A recent portable and powerful engineering workstation, such as AS1000 (SPARC LT), enables us to develop a personal-use oriented MT system This paper describes the outline of ASTRANSAC (an English-Japanese/Japanese- English bi-directional MT system) and the extensions related to the personalization of ASTRANSAC, which have been newly made since the MT Summit II.</abstract>
@@ -150,8 +150,8 @@
     <paper id="15">
       <title>Toward High Performance Machine Translation: Preliminary Results from Massively Parallel Memory-Based Translation on <fixed-case>SNAP</fixed-case></title>
       <author><first>Hiroaki</first><last>Kitano</last></author>
-      <author><first>Dan</first><last>Moldovan</last></author>
-      <author><first>Seungho</first><last>Cha</last></author>
+      <author id="dan-moldovan"><first>Dan</first><last>Moldovan</last></author>
+      <author id="seungho-cha"><first>Seungho</first><last>Cha</last></author>
       <pages>93-100</pages>
       <url hash="7fa5d9cf">1991.mtsummit-papers.15</url>
       <abstract>This paper describes a memory-based machine translation system developed for the Semantic Net- work Array Processor (SNAP). The goal of our work is to develop a scalable and high-performance memory-based machine translation system which utilizes the high degree of parallelism provided by the SNAP machine. We have implemented an experimental machine translation system DMSNAP as a central part of a real-time speech-to-speech dia- logue translation system. It is a SNAP version of the ΦDMDIALOG speech-to-speech translation system. Memory-based natural language processing and syntactic constraint network model has been incorporated using parallel marker-passing which is directly supported from hardware level. Experimental results demonstrate that the parsing of a sentence is done in the order of milliseconds.</abstract>
@@ -160,7 +160,7 @@
     <paper id="16">
       <title>Toward an <fixed-case>MT</fixed-case> System without Pre-Editing: Effects of a New Method in <fixed-case>ALT</fixed-case>-<fixed-case>J</fixed-case>/<fixed-case>E</fixed-case></title>
       <author><first>Satoru</first><last>Ikehara</last></author>
-      <author><first>Satoshi</first><last>Shirai</last></author>
+      <author id="satoshi-shirai"><first>Satoshi</first><last>Shirai</last></author>
       <author><first>Akio</first><last>Yokoo</last></author>
       <author><first>Hiromi</first><last>Nakaiwa</last></author>
       <pages>101-106</pages>
@@ -170,7 +170,7 @@
     </paper>
     <paper id="17">
       <title><fixed-case>KIELIKONE</fixed-case> Machine Translation Workstation</title>
-      <author><first>Harri</first><last>Jäppinen</last></author>
+      <author id="harri-jappinen"><first>Harri</first><last>Jäppinen</last></author>
       <author><first>L.</first><last>Kulikov</last></author>
       <author><first>A.</first><last>Ylä-Rotiala</last></author>
       <pages>107-111</pages>
@@ -243,7 +243,7 @@
       <author><first>Harald</first><last>Hille</last></author>
       <author><first>Martin</first><last>Kay</last></author>
       <author><first>Frederick</first><last>Klein</last></author>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <pages>131-140</pages>
       <url hash="2f0559c5">1991.mtsummit-panels.4</url>
       <bibkey>gross-etal-1991-translators</bibkey>
@@ -251,7 +251,7 @@
     <paper id="5">
       <title>Evaluation of <fixed-case>MT</fixed-case> Systems</title>
       <author><first>Margaret</first><last>King</last></author>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <author><first>Sture</first><last>Allen</last></author>
       <author><first>Ulrich</first><last>Heid</last></author>
       <author><first>Doris</first><last>Albisser</last></author>
diff --git a/data/xml/1991.tc.xml b/data/xml/1991.tc.xml
index 54d5550a45..43bd733ca0 100644
--- a/data/xml/1991.tc.xml
+++ b/data/xml/1991.tc.xml
@@ -11,14 +11,14 @@
     </meta>
     <paper id="1">
       <title>Why Computers Do Not Translate Better</title>
-      <author><first>W. John</first><last>Hutchins</last></author>
+      <author id="w-john-hutchins"><first>W. John</first><last>Hutchins</last></author>
       <url hash="314f09a5">1991.tc-1.1</url>
       <bibkey>hutchins-1991-computers</bibkey>
     </paper>
     <paper id="2">
       <title><fixed-case>TEI</fixed-case>-<fixed-case>TERM</fixed-case>: an <fixed-case>SGML</fixed-case>-based interchange format for terminology files The <fixed-case>E</fixed-case>uro<fixed-case>T</fixed-case>erm<fixed-case>B</fixed-case>ank</title>
-      <author><first>Alan</first><last>Melby</last></author>
-      <author><first>Sue Ellen</first><last>Wright</last></author>
+      <author id="alan-k-melby"><first>Alan</first><last>Melby</last></author>
+      <author id="sue-ellen-wright"><first>Sue Ellen</first><last>Wright</last></author>
       <url hash="b6edb439">1991.tc-1.2</url>
       <bibkey>melby-wright-1991-tei</bibkey>
     </paper>
@@ -78,7 +78,7 @@
     </paper>
     <paper id="12">
       <title>Current Practical Machine Translation Systems in <fixed-case>J</fixed-case>apan and Future Directions <fixed-case>EUROTRA</fixed-case>: an assessment of the current state of the <fixed-case>EC</fixed-case>’s <fixed-case>MT</fixed-case> programme</title>
-      <author><first>Makoto</first><last>Nagao</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
       <url hash="3bbb8c6f">1991.tc-1.12</url>
       <bibkey>nagao-1991-current</bibkey>
     </paper>
@@ -91,7 +91,7 @@
     </paper>
     <paper id="14">
       <title>Machine Translation Seen as Interactive Multilingual Text Generation</title>
-      <author><first>Harold L.</first><last>Somers</last></author>
+      <author id="harold-somers"><first>Harold L.</first><last>Somers</last></author>
       <author><first>Danny</first><last>Jones</last></author>
       <url hash="3f34069e">1991.tc-1.14</url>
       <bibkey>somers-jones-1991-machine</bibkey>
diff --git a/data/xml/1992.tc.xml b/data/xml/1992.tc.xml
index 44f6dc0ef6..a9424ddd16 100644
--- a/data/xml/1992.tc.xml
+++ b/data/xml/1992.tc.xml
@@ -93,11 +93,11 @@
     </paper>
     <paper id="14">
       <title><fixed-case>E</fixed-case>nglish-<fixed-case>S</fixed-case>wedish translation of dialogue software</title>
-      <author><first>Hiyan</first><last>Alshawi</last></author>
-      <author><first>David</first><last>Carter</last></author>
+      <author id="hiyan-alshawi"><first>Hiyan</first><last>Alshawi</last></author>
+      <author id="david-carter"><first>David</first><last>Carter</last></author>
       <author><first>Steve</first><last>Pulman</last></author>
-      <author><first>Manny</first><last>Rayner</last></author>
-      <author><first>Björn</first><last>Gambäck</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gambäck</last></author>
       <url hash="8a9c9b66">1992.tc-1.14</url>
       <bibkey>alshawi-etal-1992-english</bibkey>
     </paper>
diff --git a/data/xml/1992.tmi.xml b/data/xml/1992.tmi.xml
index 9c6046b137..0752f28239 100644
--- a/data/xml/1992.tmi.xml
+++ b/data/xml/1992.tmi.xml
@@ -11,10 +11,10 @@
     <paper id="1">
       <title>Translation equivalence and lexicalization in the <fixed-case>ACQUILEX</fixed-case> <fixed-case>LKB</fixed-case></title>
       <author><first>Antonio</first><last>Sanfilippo</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <author><first>Ann</first><last>Copestake</last></author>
-      <author><first>Maria Antònia</first><last>Martí</last></author>
-      <author><first>Mariona</first><last>Taulé</last></author>
+      <author id="m-antonia-marti"><first>Maria Antònia</first><last>Martí</last></author>
+      <author id="mariona-taule"><first>Mariona</first><last>Taulé</last></author>
       <author><first>Antonietta</first><last>Alonge</last></author>
       <url hash="30a6b6e0">1992.tmi-1.1</url>
       <bibkey>sanfilippo-etal-1992-translation</bibkey>
@@ -63,18 +63,18 @@
     </paper>
     <paper id="8">
       <title>Analysis, statistical transfer, and synthesis in machine translation</title>
-      <author><first>Peter F.</first><last>Brown</last></author>
-      <author><first>Stephen A.</first><last>Della Pietra</last></author>
-      <author><first>Vincent J.</first><last>Della Pietra</last></author>
-      <author><first>John D.</first><last>Lafferty</last></author>
-      <author><first>Robert L.</first><last>Mercer</last></author>
+      <author id="peter-f-brown"><first>Peter F.</first><last>Brown</last></author>
+      <author id="stephen-a-della-pietra"><first>Stephen A.</first><last>Della Pietra</last></author>
+      <author id="vincent-j-della-pietra"><first>Vincent J.</first><last>Della Pietra</last></author>
+      <author id="john-lafferty"><first>John D.</first><last>Lafferty</last></author>
+      <author id="robert-l-mercer"><first>Robert L.</first><last>Mercer</last></author>
       <url hash="07bc87d4">1992.tmi-1.8</url>
       <bibkey>brown-etal-1992-analysis</bibkey>
     </paper>
     <paper id="9">
       <title>Using bilingual materials to develop word sense disambiguation methods</title>
-      <author><first>William A.</first><last>Gale</last></author>
-      <author><first>Kenneth W.</first><last>Church</last></author>
+      <author id="william-a-gale"><first>William A.</first><last>Gale</last></author>
+      <author id="kenneth-church"><first>Kenneth W.</first><last>Church</last></author>
       <author><first>David</first><last>Yarowsky</last></author>
       <url hash="1f1b9ce6">1992.tmi-1.9</url>
       <bibkey>gale-etal-1992-using</bibkey>
@@ -87,7 +87,7 @@
     </paper>
     <paper id="11">
       <title>Are the grammars so far developed appropriate to recognize the real structure of a sentence?</title>
-      <author><first>Makoto</first><last>Nagao</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
       <url hash="6e01d4f6">1992.tmi-1.11</url>
       <bibkey>nagao-1992-grammars</bibkey>
     </paper>
@@ -100,7 +100,7 @@
     </paper>
     <paper id="13">
       <title>Interactive multilingual text generation for a monolingual user</title>
-      <author><first>Harold</first><last>Somers</last></author>
+      <author id="harold-somers"><first>Harold</first><last>Somers</last></author>
       <url hash="a5d12d3a">1992.tmi-1.13</url>
       <bibkey>somers-1992-interactive</bibkey>
     </paper>
@@ -139,14 +139,14 @@
     <paper id="19">
       <title>Contextual constraints for <fixed-case>MT</fixed-case></title>
       <author><first>Kurt</first><last>Eberle</last></author>
-      <author><first>Walter</first><last>Kasper</last></author>
+      <author id="walter-kasper"><first>Walter</first><last>Kasper</last></author>
       <author><first>Christian</first><last>Rohrer</last></author>
       <url hash="702f5e41">1992.tmi-1.19</url>
       <bibkey>eberle-etal-1992-contextual</bibkey>
     </paper>
     <paper id="20">
       <title>The <fixed-case>KANT</fixed-case> perspective: a critique of pure transfer (and pure interlingua, pure statistics, .. )</title>
-      <author><first>Jaime G.</first><last>Carbonell</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime G.</first><last>Carbonell</last></author>
       <author><first>Teruko</first><last>Mitamura</last></author>
       <author><first>Eric H.</first><last>Nyberg 3rd</last></author>
       <url hash="c8c3d67b">1992.tmi-1.20</url>
@@ -168,7 +168,7 @@
     </paper>
     <paper id="23">
       <title>Combining rationalist and empiricist approaches to machine translation</title>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <author><first>Michiko</first><last>Kosaka</last></author>
       <url hash="e8758df0">1992.tmi-1.23</url>
       <bibkey>grishman-kosaka-1992-combining</bibkey>
diff --git a/data/xml/1993.eamt.xml b/data/xml/1993.eamt.xml
index a5ca89f159..c1985171ec 100644
--- a/data/xml/1993.eamt.xml
+++ b/data/xml/1993.eamt.xml
@@ -21,8 +21,8 @@
     </paper>
     <paper id="2">
       <title>Knowledge extraction from machine-readable dictionaries: an evaluation</title>
-      <author><first>Nancy</first><last>Ide</last></author>
-      <author><first>Jean</first><last>Véronis</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
+      <author id="jean-veronis"><first>Jean</first><last>Véronis</last></author>
       <pages>19-34</pages>
       <abstract>Machine-readable versions of everyday dictionaries have been seen as a likely source of information for use in natural language processing because they contain an enormous amount of lexical and semantic knowledge. However, after 15 years of research, the results appear to be disappointing. No comprehensive evaluation of machine-readable dictionaries (MRDs) as a knowledge source has been made to date, although this is necessary to determine what, if anything, can be gained from MRD research. To this end, this paper will first consider the postulates upon which MRD research has been based over the past fifteen years, discuss the validity of these postulates, and evaluate the results of this work. We will then propose possible future directions and applications that may exploit these years of effort, in the light of current directions in not only NLP research, but also fields such as lexicography and electronic publishing.</abstract>
       <bibkey>ide-veronis-1993-knowledge</bibkey>
@@ -51,21 +51,21 @@
     </paper>
     <paper id="6">
       <title>Memory-based lexical acquisition and processing</title>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>85-98</pages>
       <abstract>Current approaches to computational lexicology in language technology are knowledge-based (competence-oriented) and try to abstract away from specific formalisms, domains, and applications. This results in severe complexity, acquisition and reusability bottlenecks. As an alternative, we propose a particular performance-oriented approach to Natural Language Processing based on automatic memory-based learning of linguistic (lexical) tasks. The consequences of the approach for computational lexicology are discussed, and the application of the approach on a number of lexical acquisition and disambiguation tasks in phonology, morphology and syntax is described.</abstract>
       <bibkey>daelemans-1993-memory</bibkey>
     </paper>
     <paper id="7">
       <title>Typed feature formalisms as a common basis for linguistic specification</title>
-      <author><first>Hans-Ulrich</first><last>Krieger</last></author>
+      <author id="hans-ulrich-krieger"><first>Hans-Ulrich</first><last>Krieger</last></author>
       <pages>101-119</pages>
       <abstract>Typed feature formalisms (TFF) play an increasingly important role in NLP and, in particular, in MT. Many of these systems are inspired by Pollard and Sag’s work on Head-Driven Phrase Structure Grammar (HPSG), which has shown that a great deal of syntax and semantics can be neatly encoded within TFF. However, syntax and semantics are not the only areas in which TFF can be beneficially employed. In this paper, I will show that TFF can also be used as a means to model finite automata (FA) and to perform certain types of logical inferencing. In particular, I will (i) describe how FA can be defined and processed within TFF and (ii) propose a conservative extension to HPSG, which allows for a restricted form of semantic processing within TFF, so that the construction of syntax and semantics can be intertwined with the simplification of the logical form of an utterance. The approach which I propose provides a uniform, HPSG-oriented framework for different levels of linguistic processing, including allomorphy and morphotactics, syntax, semantics, and logical form simplification.</abstract>
       <bibkey>krieger-1993-typed</bibkey>
     </paper>
     <paper id="8">
       <title><fixed-case>E</fixed-case>uropean efforts towards standardizing language resources</title>
-      <author><first>Nicoletta</first><last>Calzolari</last></author>
+      <author id="nicoletta-calzolari"><first>Nicoletta</first><last>Calzolari</last></author>
       <pages>121-130</pages>
       <abstract>This paper aims at providing a broad overview of the situation in Europe during the past few years, regarding efforts and concerted actions towards the standardization of large language resources, with particular emphasis on what is taking place in the fields of Computational Lexicons and Text Corpora. Attention will be focused on the plans, work in progress, and a few preliminary results of the LRE project EAGLES (Expert Advisory Group on Language Engineering Standards).</abstract>
       <bibkey>calzolari-1993-european</bibkey>
@@ -80,18 +80,18 @@
     </paper>
     <paper id="10">
       <title>A generic lexical model</title>
-      <author><first>Daniel</first><last>Bachut</last></author>
+      <author id="daniel-bachut"><first>Daniel</first><last>Bachut</last></author>
       <author><first>Isabelle</first><last>Duquennoy</last></author>
       <author><first>Lee</first><last>Humphreys</last></author>
       <author><first>Tita</first><last>Kyriakopoulou</last></author>
       <author><first>Anne</first><last>Monceaux</last></author>
-      <author><first>Fiammetta</first><last>Namer</last></author>
+      <author id="fiammetta-namer"><first>Fiammetta</first><last>Namer</last></author>
       <author><first>Jean-Michel</first><last>Ombrouck</last></author>
       <author><first>Claire</first><last>Perrey</last></author>
       <author><first>Anne</first><last>Poncet-Montange</last></author>
       <author><first>Maria-Claudia</first><last>Puerta</last></author>
       <author><first>Caroline</first><last>Raffy</last></author>
-      <author><first>Brigitte</first><last>Roudaud</last></author>
+      <author id="brigitte-roudaud"><first>Brigitte</first><last>Roudaud</last></author>
       <author><first>Simon</first><last>Sabbagh</last></author>
       <pages>141-158</pages>
       <abstract>Linguistic engineering presupposes lexical resources. For translation, it is highly desirable that a Machine Translation engine and human translators should have access to the same dictionary information. The present paper describes a multilingual dictionary model, which integrates information for use by both humans and a variety of NLP systems. The model is used as a reference in the design of commercial translation products.</abstract>
@@ -107,7 +107,7 @@
     <paper id="12">
       <title>The use of terminological knowledge bases in software localisation</title>
       <author><first>Vangelis</first><last>Karkaletsis</last></author>
-      <author><first>Constantine D.</first><last>Spyropoulos</last></author>
+      <author id="constantine-d-spyropoulos"><first>Constantine D.</first><last>Spyropoulos</last></author>
       <author><first>George A.</first><last>Vouros</last></author>
       <pages>174-188</pages>
       <abstract>This paper describes the work that was undertaken in the Glossasoft project in the area of terminology management. Some of the draw-backs of existing terminology management systems are outlined and an alternative approach to maintaining terminological data is proposed. The approach which we advocate relies on knowledge-based representation techniques. These are used to model conceptual knowledge about the terms included in the database, general knowledge about the subject domain, application-specific knowledge, and - of course - language-specific terminological knowledge. We consider the multifunctionality of the proposed architecture to be one of its major advantages. To illustrate this, we outline how the knowledge representation scheme, which we suggest, could be drawn upon in message generation and machine-assisted translation.</abstract>
diff --git a/data/xml/1993.iwpt.xml b/data/xml/1993.iwpt.xml
index d7f19ff470..6b1ed1e9e8 100644
--- a/data/xml/1993.iwpt.xml
+++ b/data/xml/1993.iwpt.xml
@@ -27,10 +27,10 @@
     </meta>
     <paper id="1">
       <title>Proceedings of the Third International Workshop on Parsing Technologies (<fixed-case>IWPT</fixed-case> ’93)</title>
-      <author><first>Harry</first><last>Bunt</last></author>
+      <author id="harry-bunt"><first>Harry</first><last>Bunt</last></author>
       <pages>i-vii</pages>
       <url hash="257716fe">1993.iwpt-1.1</url>
-      <abstract/>
+      <abstract></abstract>
       <bibkey>bunt-1993-proceedings</bibkey>
     </paper>
     <paper id="2">
@@ -51,7 +51,7 @@
     </paper>
     <paper id="4">
       <title>Parsing as Dynamic Interpretation</title>
-      <author><first>Harry</first><last>Bunt</last></author>
+      <author id="harry-bunt"><first>Harry</first><last>Bunt</last></author>
       <author><first>Ko</first><last>van der Sloat</last></author>
       <pages>27-38</pages>
       <url hash="7d5d9484">1993.iwpt-1.4</url>
@@ -77,8 +77,8 @@
     <paper id="7">
       <title>A New Transformation into Deterministically Parsable Form for Natural Language Grammars</title>
       <author><first>Nigel R.</first><last>Ellis</last></author>
-      <author><first>Roberto</first><last>Garigliano</last></author>
-      <author><first>Richard G.</first><last>Morgan</last></author>
+      <author id="roberto-garigliano"><first>Roberto</first><last>Garigliano</last></author>
+      <author id="richard-g-morgan"><first>Richard G.</first><last>Morgan</last></author>
       <pages>61-72</pages>
       <url hash="94fb29eb">1993.iwpt-1.7</url>
       <abstract>Marcus demonstrated that it was possible to construct a deterministic grammar/interpreter for a subset of natural language [Marcus, 1980]. Although his work with PARSIFAL pioneered the field of deterministic natural language parsing, his method has several drawbacks: • The rules and actions in the grammar / interpreter are so embedded that it is difficult to distinguish between them. • The grammar / interpreter is very difficult to construct (the small grammar shown in [Marcus, 1980] took about four months to construct). • The grammar is very difficult to maintain, as a small change may have several side effects. This paper outlines a set of structure transformations for converting a non-deterministic grammar into deterministic form. The original grammar is written in a context free form; this is then transformed to resolve ambiguities.</abstract>
@@ -89,7 +89,7 @@
       <author><first>Joe</first><last>Garman</last></author>
       <author><first>Jeffery</first><last>Martin</last></author>
       <author><first>Paola</first><last>Merlo</last></author>
-      <author><first>Amy</first><last>Weinberg</last></author>
+      <author id="amy-weinberg"><first>Amy</first><last>Weinberg</last></author>
       <pages>73-88</pages>
       <url hash="49efe7c1">1993.iwpt-1.8</url>
       <abstract>In this paper we discuss the design and implementation of a parser for German and Arabic, which is currently being used in a tutoring system for foreign language training. Computer-aided language tutoring is a good application for testing the robustness and flexibility of a parsing system, since the input is usually ungrammatical in some way. Efficiency is also a concern, as tutoring applications typically run on personal computers, with the parser sharing memory with other components of the system. Our system is principle-based, which ensures a compact representation, and improves portability, needed in order to extend the initial design from German to Arabic and (eventually) Spanish. Currently, the parser diagnoses agreement errors, case errors, selection errors, and some word order errors. The parser can handle simple and complex declaratives and questions, topicalisations, verb movement, relative clauses — broad enough coverage to be useful in the design of real exercises and dialogues.</abstract>
@@ -116,7 +116,7 @@
     <paper id="11">
       <title>Structural Disambiguation in <fixed-case>J</fixed-case>apanese by Evaluating Case Structures based on Examples in a Case Frame Dictionary</title>
       <author><first>Sadao</first><last>Kurohashi</last></author>
-      <author><first>Makoto</first><last>Nagao</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
       <pages>111-122</pages>
       <url hash="031e44d6">1993.iwpt-1.11</url>
       <abstract>A case structure expression is one of the most important forms to represent the <i>meaning</i> of a sentence. Case structure analysis is usually performed by consulting <i>case frame information</i> in verb dictionaries and by selecting a <i>proper case frame</i> for an input sentence. However, this analysis is very difficult because of <i>word sense ambiguity</i> and <i>structural ambiguity</i>. A conventional method for solving these problems is to use the method of <i>selectional restriction</i>, but this method has a drawback in the semantic marker (SM) system – the trade-off between descriptive power and construction cost. This paper describes a method of case structure analysis of Japanese sentences which overcomes the drawback in the SM system, concentrating on the structural disambiguation. This method selects a proper case frame for an input by the similarity measure between the input and typical example sentences of each case frame. When there are two or more possible readings for an input because of structural ambiguity, the best reading will be selected by evaluating case structures in each possible reading by the similarity measure with typical example sentences of case frames. </abstract>
@@ -124,8 +124,8 @@
     </paper>
     <paper id="12">
       <title><fixed-case>GLR</fixed-case>* – An Efficient Noise-skipping Parsing Algorithm For Context Free Grammars</title>
-      <author><first>Alon</first><last>Lavie</last></author>
-      <author><first>Masaru</first><last>Tomita</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
+      <author id="masaru-tomita"><first>Masaru</first><last>Tomita</last></author>
       <pages>123-134</pages>
       <url hash="4dcdd9d6">1993.iwpt-1.12</url>
       <abstract>This paper describes GLR*, a parser that can parse any input sentence by ignoring unrecognizable parts of the sentence. In case the standard parsing procedure fails to parse an input sentence, the parser nondeterministically skips some word(s) in the sentence, and returns the parse with fewest skipped words. Therefore, the parser will return some parse(s) with any input sentence, unless no part of the sentence can be recognized at all. The problem can be defined in the following way: Given a context-free grammar <tex-math>G</tex-math> and a sentence <tex-math>S</tex-math>, find and parse <tex-math>S'</tex-math> – the largest subset of words of <tex-math>S</tex-math>, such that <tex-math>S' \in L(G)</tex-math>. The algorithm described in this paper is a modification of the Generalized LR (Tomita) parsing algorithm [Tomita, 1986] . The parser accommodates the skipping of words by allowing shift operations to be performed from inactive state nodes of the Graph Structured Stack. A heuristic similar to beam search makes the algorithm computationally tractable. There have been several other approaches to the problem of robust parsing, most of which are special purpose algorithms [Carbonell and Hayes, 1984] , [Ward, 1991] and others. Because our approach is a modification to a standard context-free parsing algorithm, all the techniques and grammars developed for the standard parser can be applied as they are. Also, in case the input sentence is by itself grammatical, our parser behaves exactly as the standard GLR parser. The modified parser, GLR*, has been implemented and integrated with the latest version of the Generalized LR Parser/Compiler [Tomita et al , 1988], [Tomita, 1990]. We discuss an application of the GLR* parser to spontaneous speech understanding and present some preliminary tests on the utility of the GLR* parser in such settings. </abstract>
@@ -149,7 +149,7 @@
     </paper>
     <paper id="15">
       <title>The Interplay of Syntactic and Semantic Node Labels in Partial Parsing</title>
-      <author><first>David D.</first><last>McDonald</last></author>
+      <author id="david-d-mcdonald"><first>David D.</first><last>McDonald</last></author>
       <pages>171-186</pages>
       <url hash="686ddb02">1993.iwpt-1.15</url>
       <abstract>Our natural language comprehension system, “Sparser” , uses a semantic grammar in conjunction with a domain model that defines the categories and already-known individuals that can be expected in the sublanguages we are studying, the most significant of which to date has been articles from the Wall Street Journal’s “Who’s News” column. In this paper we describe the systematic use of default syntactic rules in this grammar: an alternative set of labels on consitutents that are used to capture generalities in the semantic interpretation of constructions like the verbal auxiliaries or many adverbials. Syntactic rules form the basis of a set of schemas in a Tree Adjoining Grammar that are used as templates from which to create the primary, semantically labeled rules of the grammar as part of defining the categories in the domain models. This design permits the semantic grammar to be developed on a linguistically principled basis since all the rules must conform to syntactically sound patterns.</abstract>
@@ -218,7 +218,7 @@
     </paper>
     <paper id="23">
       <title>Evaluation of <fixed-case>TTP</fixed-case> Parser: A Preliminary Report</title>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
       <author><first>Peter G. N.</first><last>Scheyen</last></author>
       <pages>293-308</pages>
       <url hash="26fd3cf9">1993.iwpt-1.23</url>
@@ -228,9 +228,9 @@
     <paper id="24">
       <title>Frequency Estimation of Verb Subcategorization Frames Based on Syntactic and Multidimensional Statistical Analysis</title>
       <author><first>Akira</first><last>Ushioda</last></author>
-      <author><first>David A.</first><last>Evans</last></author>
+      <author id="david-a-evans"><first>David A.</first><last>Evans</last></author>
       <author><first>Ted</first><last>Gibson</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>309-318</pages>
       <url hash="aebf342e">1993.iwpt-1.24</url>
       <abstract>We describe a mechanism for automatically estimating frequencies of verb subcategorization frames in a large corpus. A tagged corpus is first partially parsed to identify noun phrases and then a regular grammar is used to estimate the appropriate subcategorization frame for each verb token in the corpus. In an experiment involving the identification of six fixed subcategorization frames, our current system showed more than 80% accuracy. In addition, a new statistical method enables the system to learn patterns of errors based on a set of training samples and substantially improves the accuracy of the frequency estimation.</abstract>
diff --git a/data/xml/1993.mtsummit.xml b/data/xml/1993.mtsummit.xml
index 12465135e8..ece60cdea1 100644
--- a/data/xml/1993.mtsummit.xml
+++ b/data/xml/1993.mtsummit.xml
@@ -10,14 +10,14 @@
     </meta>
     <paper id="1">
       <title>Machine Translation: What have we to do?</title>
-      <author><first>Makoto</first><last>Nagao</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
       <pages>3-10</pages>
       <url hash="a060b509">1993.mtsummit-1.1</url>
       <bibkey>nagao-1993-machine</bibkey>
     </paper>
     <paper id="2">
       <title>Latest Developments in Machine Translation Technology: Beginning a New Era in <fixed-case>MT</fixed-case> Research</title>
-      <author><first>John</first><last>Hutchins</last></author>
+      <author id="w-john-hutchins"><first>John</first><last>Hutchins</last></author>
       <pages>11-34</pages>
       <url hash="8a7c6e03">1993.mtsummit-1.2</url>
       <bibkey>hutchins-1993-latest</bibkey>
@@ -80,14 +80,14 @@
     </paper>
     <paper id="11">
       <title>Verbmobil: Translation of Face-To-Face Dialogs</title>
-      <author><first>Wolfgang</first><last>Wahlster</last></author>
+      <author id="wolfgang-wahlster"><first>Wolfgang</first><last>Wahlster</last></author>
       <pages>127-136</pages>
       <url hash="23a7138b">1993.mtsummit-1.11</url>
       <bibkey>wahlster-1993-verbmobil</bibkey>
     </paper>
     <paper id="12">
       <title>Corpora and Machine Translation</title>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <pages>137-146</pages>
       <url hash="82cc3dde">1993.mtsummit-1.12</url>
       <bibkey>wilks-1993-corpora</bibkey>
@@ -109,7 +109,7 @@
     </paper>
     <paper id="15">
       <title>Practical Speech Translation Systems will Integrate Human Expertise, Multimodal Communication, and Interactive Disambiguation</title>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <pages>173-176</pages>
       <url hash="101c410d">1993.mtsummit-1.15</url>
       <bibkey>boitet-1993-practical</bibkey>
@@ -130,7 +130,7 @@
     </paper>
     <paper id="18">
       <title>A Direction of <fixed-case>MT</fixed-case> Development</title>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <pages>189-194</pages>
       <url hash="1df19327">1993.mtsummit-1.18</url>
       <bibkey>nirenburg-1993-direction</bibkey>
@@ -144,7 +144,7 @@
     </paper>
     <paper id="20">
       <title>After Linguistics-based <fixed-case>MT</fixed-case></title>
-      <author><first>Junichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Junichi</first><last>Tsujii</last></author>
       <pages>197-198</pages>
       <url hash="9d6720e6">1993.mtsummit-1.20</url>
       <bibkey>tsujii-1993-linguistics</bibkey>
@@ -179,7 +179,7 @@
     </paper>
     <paper id="25">
       <title>Evaluation Method of Machine Translation: from the Viewpoint of Natural Language Processing</title>
-      <author><first>Shoichi</first><last>Yokoyama</last></author>
+      <author id="shoichi-yokoyama"><first>Shoichi</first><last>Yokoyama</last></author>
       <pages>215-220</pages>
       <url hash="3c342299">1993.mtsummit-1.25</url>
       <bibkey>yokoyama-1993-evaluation</bibkey>
diff --git a/data/xml/1993.tc.xml b/data/xml/1993.tc.xml
index 5b7ffb914c..773c199206 100644
--- a/data/xml/1993.tc.xml
+++ b/data/xml/1993.tc.xml
@@ -11,7 +11,7 @@
     </meta>
     <paper id="1">
       <title>Developments in machine translation research in the <fixed-case>US</fixed-case></title>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <url hash="60eb2811">1993.tc-1.1</url>
       <bibkey>wilks-1993-developments</bibkey>
     </paper>
@@ -96,7 +96,7 @@
     </paper>
     <paper id="15">
       <title>Multilingual drafting of instructional texts</title>
-      <author><first>Donia</first><last>Scott</last></author>
+      <author id="donia-scott"><first>Donia</first><last>Scott</last></author>
       <url hash="fd774c4d">1993.tc-1.15</url>
       <bibkey>scott-1993-multilingual</bibkey>
     </paper>
diff --git a/data/xml/1993.tmi.xml b/data/xml/1993.tmi.xml
index 22425af60e..25651f1248 100644
--- a/data/xml/1993.tmi.xml
+++ b/data/xml/1993.tmi.xml
@@ -33,7 +33,7 @@
     </paper>
     <paper id="4">
       <title>Two Approaches to Matching in Example-Based Machine Translation</title>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <author><first>Constantine</first><last>Domashnev</last></author>
       <author><first>Dean J.</first><last>Grannes</last></author>
       <url hash="6c527be2">1993.tmi-1.4</url>
@@ -41,7 +41,7 @@
     </paper>
     <paper id="5">
       <title>Example-Based Translation of Technical Terms</title>
-      <author><first>Satoshi</first><last>Sato</last></author>
+      <author id="satoshi-sato"><first>Satoshi</first><last>Sato</last></author>
       <url hash="c3ac1c6e">1993.tmi-1.5</url>
       <bibkey>sato-1993-example</bibkey>
     </paper>
@@ -49,13 +49,13 @@
       <title>Combining Dictionary-Based and Example-Based Methods for Natural Language Analysis</title>
       <author><first>Stephen D.</first><last>Richardson</last></author>
       <author><first>Lucy</first><last>Vanderwende</last></author>
-      <author><first>William</first><last>Dolan</last></author>
+      <author id="william-b-dolan"><first>William</first><last>Dolan</last></author>
       <url hash="6137505e">1993.tmi-1.6</url>
       <bibkey>richardson-etal-1993-combining</bibkey>
     </paper>
     <paper id="7">
       <title>An Example-Based Disambiguation of Prepositional Phrase Attachment</title>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Osamu</first><last>Furuse</last></author>
       <author><first>Hitoshi</first><last>Iida</last></author>
       <url hash="295bfd49">1993.tmi-1.7</url>
@@ -85,14 +85,14 @@
     <paper id="11">
       <title>Treatment of Tense and Aspect in Translation from <fixed-case>I</fixed-case>talian to <fixed-case>G</fixed-case>reek — An Example of Treatment of Implicit Information in Knowledge-based Transfer <fixed-case>MT</fixed-case> —</title>
       <author><first>Margherita</first><last>Antona</last></author>
-      <author><first>Jun-ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun-ichi</first><last>Tsujii</last></author>
       <url hash="8a767c84">1993.tmi-1.11</url>
       <bibkey>antona-tsujii-1993-treatment</bibkey>
     </paper>
     <paper id="12">
       <title>— An Example of Treatment of Implicit Information in Knowledge-based Transfer <fixed-case>MT</fixed-case> —</title>
       <author><first>Margherita</first><last>Antona</last></author>
-      <author><first>Jun-ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun-ichi</first><last>Tsujii</last></author>
       <url hash="8a767c84">1993.tmi-1.12</url>
       <bibkey>antona-tsujii-1993-example</bibkey>
     </paper>
@@ -105,7 +105,7 @@
     <paper id="14">
       <title>An Idiom-based Approach to Machine Translation</title>
       <author><first>Hagyu</first><last>Lee</last></author>
-      <author><first>Yung Taek</first><last>Kim</last></author>
+      <author id="yung-taek-kim"><first>Yung Taek</first><last>Kim</last></author>
       <url hash="1495771a">1993.tmi-1.14</url>
       <bibkey>lee-kim-1993-idiom</bibkey>
     </paper>
@@ -130,7 +130,7 @@
       <author><first>J.</first><last>Tsutsumi</last></author>
       <author><first>N.</first><last>Aoki-Waibel</last></author>
       <author><first>A.</first><last>Waibel</last></author>
-      <author><first>Wayne</first><last>Ward</last></author>
+      <author id="wayne-ward"><first>Wayne</first><last>Ward</last></author>
       <url hash="b61bb06c">1993.tmi-1.16</url>
       <bibkey>woszczyna-etal-1993-recent-advances</bibkey>
     </paper>
@@ -147,20 +147,20 @@
     <paper id="18">
       <title>Determination of Referential Property and Number of Nouns in <fixed-case>J</fixed-case>apanese Sentences for Machine Translation into <fixed-case>E</fixed-case>nglish</title>
       <author><first>Masaki</first><last>Murata</last></author>
-      <author><first>Makoto</first><last>Nagao</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
       <url hash="d75fe9f8">1993.tmi-1.18</url>
       <bibkey>murata-nagao-1993-determination</bibkey>
     </paper>
     <paper id="19">
       <title>Translation into <fixed-case>E</fixed-case>nglish</title>
       <author><first>Masaki</first><last>Murata</last></author>
-      <author><first>Makoto</first><last>Nagao</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
       <url hash="d75fe9f8">1993.tmi-1.19</url>
       <bibkey>murata-nagao-1993-translation</bibkey>
     </paper>
     <paper id="20">
       <title>Effects of Automatic Rewriting of Source Language within a <fixed-case>J</fixed-case>apanese to <fixed-case>E</fixed-case>nglish <fixed-case>MT</fixed-case> System</title>
-      <author><first>Satoshi</first><last>Shirai</last></author>
+      <author id="satoshi-shirai"><first>Satoshi</first><last>Shirai</last></author>
       <author><first>Satoru</first><last>Ikehara</last></author>
       <author><first>Tsukasa</first><last>Kawaoka</last></author>
       <url hash="15414c4b">1993.tmi-1.20</url>
@@ -170,15 +170,15 @@
       <title>Better Translation with Knowledge Extracted from Source Text</title>
       <author><first>Satoshi</first><last>Kinoshita</last></author>
       <author><first>Miwako</first><last>Shimazu</last></author>
-      <author><first>Hideki</first><last>Hirakawa</last></author>
+      <author id="hideki-hirakawa"><first>Hideki</first><last>Hirakawa</last></author>
       <url hash="df273a86">1993.tmi-1.21</url>
       <bibkey>kinoshita-etal-1993-better</bibkey>
     </paper>
     <paper id="22">
       <title>Evaluation of <fixed-case>MT</fixed-case> Systems by <fixed-case>TOEFL</fixed-case></title>
-      <author><first>Masaru</first><last>Tomita</last></author>
+      <author id="masaru-tomita"><first>Masaru</first><last>Tomita</last></author>
       <author><first>Masako</first><last>Shirai</last></author>
-      <author><first>Junya</first><last>Tsutsumi</last></author>
+      <author id="junya-tsutsumi"><first>Junya</first><last>Tsutsumi</last></author>
       <author><first>Miki</first><last>Matsumura</last></author>
       <author><first/><last>Yuki</last></author>
       <url hash="53364c05">1993.tmi-1.22</url>
@@ -193,7 +193,7 @@
     <paper id="24">
       <title>Towards a Machine Translation System with Self-Critiquing Capability</title>
       <author><first>Kwangseob</first><last>Shim</last></author>
-      <author><first>Yung Taek</first><last>Kim</last></author>
+      <author id="yung-taek-kim"><first>Yung Taek</first><last>Kim</last></author>
       <url hash="994c4003">1993.tmi-1.24</url>
       <bibkey>shim-kim-1993-towards</bibkey>
     </paper>
@@ -205,14 +205,14 @@
     </paper>
     <paper id="26">
       <title>Evaluation of <fixed-case>DMAX</fixed-case> Criteria for Selecting Equivalent Translation based on Dual Corpora Statistics</title>
-      <author><first>Shinichi</first><last>Doi</last></author>
+      <author id="shinichi-doi"><first>Shinichi</first><last>Doi</last></author>
       <author><first>Kazunori</first><last>Muraki</last></author>
       <url hash="2f1b332a">1993.tmi-1.26</url>
       <bibkey>doi-muraki-1993-evaluation</bibkey>
     </paper>
     <paper id="27">
       <title>Corpora Statistics</title>
-      <author><first>Shinichi</first><last>Doi</last></author>
+      <author id="shinichi-doi"><first>Shinichi</first><last>Doi</last></author>
       <author><first>Kazunori</first><last>Muraki</last></author>
       <url hash="2f1b332a">1993.tmi-1.27</url>
       <bibkey>doi-muraki-1993-corpora</bibkey>
@@ -221,7 +221,7 @@
       <title>Automated Corpus Analysis and the Acquisition of Large, Multi-Lingual Knowledge Bases for <fixed-case>MT</fixed-case></title>
       <author><first>Teruko</first><last>Mitamara</last></author>
       <author><first>Eric H.</first><last>Nyberg 3rd</last></author>
-      <author><first>Jaime G.</first><last>Carbonell</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime G.</first><last>Carbonell</last></author>
       <url hash="531b24d6">1993.tmi-1.28</url>
       <bibkey>mitamara-etal-1993-automated</bibkey>
     </paper>
diff --git a/data/xml/1994.amta.xml b/data/xml/1994.amta.xml
index e8e8234e16..05d1c19bcf 100644
--- a/data/xml/1994.amta.xml
+++ b/data/xml/1994.amta.xml
@@ -12,9 +12,9 @@
     <paper id="1">
       <title>A Hybrid Approach to Multilingual Text Processing: Information Extraction and Machine Translation</title>
       <author><first>Chinatsu</first><last>Aone</last></author>
-      <author><first>Hatte</first><last>Blejer</last></author>
+      <author id="hatte-blejer"><first>Hatte</first><last>Blejer</last></author>
       <author><first>Mary Ellen</first><last>Okurowski</last></author>
-      <author><first>Carol</first><last>Van Ess-Dykema</last></author>
+      <author id="carol-van-ess-dykema"><first>Carol</first><last>Van Ess-Dykema</last></author>
       <url hash="cf47d134">1994.amta-1.1</url>
       <bibkey>aone-etal-1994-hybrid</bibkey>
     </paper>
@@ -23,14 +23,14 @@
       <author><first>Lynn</first><last>Carlson</last></author>
       <author><first>Elizabeth</first><last>Cooper</last></author>
       <author><first>Ronald</first><last>Dolan</last></author>
-      <author><first>Steven</first><last>Maiorano</last></author>
+      <author id="steven-j-maiorano"><first>Steven</first><last>Maiorano</last></author>
       <url hash="1afb733c">1994.amta-1.2</url>
       <bibkey>carlson-etal-1994-representing</bibkey>
     </paper>
     <paper id="3">
       <title>Using Partially Aligned Parallel Text and Part-of-speech Information in Word Alignment</title>
-      <author><first>Jyun-Sheng</first><last>Chang</last></author>
-      <author><first>Huey-Chyun</first><last>Chen</last></author>
+      <author id="jyun-sheng-chang"><first>Jyun-Sheng</first><last>Chang</last></author>
+      <author id="huey-chyun-chen"><first>Huey-Chyun</first><last>Chen</last></author>
       <url hash="511405b9">1994.amta-1.3</url>
       <bibkey>chang-chen-1994-using</bibkey>
     </paper>
@@ -45,14 +45,14 @@
     </paper>
     <paper id="5">
       <title>Stylistic Choice in Machine Translation</title>
-      <author><first>Chrysanne</first><last>DiMarco</last></author>
+      <author id="chrysanne-dimarco"><first>Chrysanne</first><last>DiMarco</last></author>
       <url hash="a07ef706">1994.amta-1.5</url>
       <bibkey>dimarco-1994-stylistic</bibkey>
     </paper>
     <paper id="6">
       <title>The Case for a <fixed-case>MT</fixed-case> Developers’ Tool with a Two-Component View of the Interlingua</title>
-      <author><first>Bonnie</first><last>Dorr</last></author>
-      <author><first>Clare</first><last>Voss</last></author>
+      <author id="bonnie-dorr"><first>Bonnie</first><last>Dorr</last></author>
+      <author id="clare-voss"><first>Clare</first><last>Voss</last></author>
       <url hash="ae94fce7">1994.amta-1.6</url>
       <bibkey>dorr-voss-1994-case</bibkey>
     </paper>
@@ -67,7 +67,7 @@
     </paper>
     <paper id="8">
       <title><fixed-case>PANGLYZER</fixed-case>: <fixed-case>S</fixed-case>panish Language Analysis System</title>
-      <author><first>David</first><last>Farwell</last></author>
+      <author id="david-farwell"><first>David</first><last>Farwell</last></author>
       <author><first>Steven</first><last>Helmreich</last></author>
       <author><first>Wanying</first><last>Jin</last></author>
       <author><first>Mark</first><last>Casper</last></author>
@@ -85,46 +85,46 @@
     </paper>
     <paper id="10">
       <title>Integrating Translations from Multiple Sources within the <fixed-case>PANGLOSS</fixed-case> Mark <fixed-case>III</fixed-case> Machine Translation System</title>
-      <author><first>Robert</first><last>Frederking</last></author>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
-      <author><first>David</first><last>Farwell</last></author>
+      <author id="robert-frederking"><first>Robert</first><last>Frederking</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="david-farwell"><first>David</first><last>Farwell</last></author>
       <author><first>Steven</first><last>Helmreich</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <author><first>Kevin</first><last>Knight</last></author>
       <author><first>Stephen</first><last>Beale</last></author>
       <author><first>Constantino</first><last>Domashnev</last></author>
       <author><first>Donalee</first><last>Attardo</last></author>
       <author><first>Dean</first><last>Grannes</last></author>
-      <author><first>Ralf</first><last>Brown</last></author>
+      <author id="ralf-d-brown"><first>Ralf</first><last>Brown</last></author>
       <url hash="c218ccbc">1994.amta-1.10</url>
       <bibkey>frederking-etal-1994-integrating</bibkey>
     </paper>
     <paper id="11">
       <title>Aligning Noisy Parallel Corpora Across Language Groups: Word Pair Feature Matching by Dynamic Time Warping</title>
       <author><first>Pascale</first><last>Fung</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <url hash="e451f919">1994.amta-1.11</url>
       <bibkey>fung-mckeown-1994-aligning</bibkey>
     </paper>
     <paper id="12">
       <title>Complex Verb Transfer Phenomena in the <fixed-case>SLT</fixed-case> System</title>
-      <author><first>Björn</first><last>Gambäck</last></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gambäck</last></author>
       <author><first>Ivan</first><last>Bretan</last></author>
       <url hash="6aee0edc">1994.amta-1.12</url>
       <bibkey>gamback-bretan-1994-complex</bibkey>
     </paper>
     <paper id="13">
       <title>The Logos Translatability Index</title>
-      <author><first>Claudia</first><last>Gdaniec</last></author>
+      <author id="claudia-gdaniec"><first>Claudia</first><last>Gdaniec</last></author>
       <url hash="1065d038">1994.amta-1.13</url>
       <bibkey>gdaniec-1994-logos</bibkey>
     </paper>
     <paper id="14">
       <title>An Adaptation of Lexical Conceptual Structures to Multilingual Processing in an Existing Text Understanding System</title>
-      <author><first>Bonnie Glover</first><last>Stalls</last></author>
-      <author><first>Robert</first><last>Belvin</last></author>
+      <author id="bonnie-glover-stalls"><first>Bonnie Glover</first><last>Stalls</last></author>
+      <author id="robert-s-belvin"><first>Robert</first><last>Belvin</last></author>
       <author><first>Alfredo</first><last>Arnaiz</last></author>
-      <author><first>Christine</first><last>Montgomery</last></author>
+      <author id="christine-a-montgomery"><first>Christine</first><last>Montgomery</last></author>
       <author><first>Robert</first><last>Stumberger</last></author>
       <url hash="0261d6f2">1994.amta-1.14</url>
       <bibkey>stalls-etal-1994-adaptation</bibkey>
@@ -164,7 +164,7 @@
       <author><first>Ishwar</first><last>Chander</last></author>
       <author><first>Matthew</first><last>Haines</last></author>
       <author><first>Vasileios</first><last>Hatzivassiloglou</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <author><first>Masayo</first><last>Iida</last></author>
       <author><first>Steve K.</first><last>Luk</last></author>
       <author><first>Akitoshi</first><last>Okumura</last></author>
@@ -184,7 +184,7 @@
     <paper id="20">
       <title>A Parameter-Based Message-Passing Parser for <fixed-case>MT</fixed-case> of <fixed-case>K</fixed-case>orean and <fixed-case>E</fixed-case>nglish</title>
       <author><first>Dekang</first><last>Lin</last></author>
-      <author><first>Bonnie</first><last>Dorr</last></author>
+      <author id="bonnie-dorr"><first>Bonnie</first><last>Dorr</last></author>
       <author><first>Jye-hoon</first><last>Lee</last></author>
       <author><first>Sungki</first><last>Suh</last></author>
       <url hash="64094fa7">1994.amta-1.20</url>
@@ -205,7 +205,7 @@
     <paper id="23">
       <title>Lexicon-to-Ontology Concept Association Using a Bilingual Dictionary</title>
       <author><first>Akitoshi</first><last>Okumura</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <url hash="0de59939">1994.amta-1.23</url>
       <bibkey>okumura-hovy-1994-lexicon</bibkey>
     </paper>
@@ -218,7 +218,7 @@
     </paper>
     <paper id="25">
       <title>The <fixed-case>ARPA</fixed-case> <fixed-case>MT</fixed-case> Evaluation Methodologies: Evolution, Lessons, and Future Approaches</title>
-      <author><first>John S.</first><last>White</last></author>
+      <author id="john-s-white"><first>John S.</first><last>White</last></author>
       <author><first>Theresa A.</first><last>O’Connell</last></author>
       <author><first>Francis E.</first><last>O’Mara</last></author>
       <url hash="244bd68f">1994.amta-1.25</url>
@@ -233,10 +233,10 @@
     </paper>
     <paper id="27">
       <title>Is <fixed-case>MT</fixed-case> Research Doing Any Good?</title>
-      <author><first>Kenneth</first><last>Church</last></author>
-      <author><first>Bonnie</first><last>Dorr</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="kenneth-church"><first>Kenneth</first><last>Church</last></author>
+      <author id="bonnie-dorr"><first>Bonnie</first><last>Dorr</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <author><first>Bernard</first><last>Scott</last></author>
       <author><first>Virginia</first><last>Teller</last></author>
       <url hash="2066716e">1994.amta-1.27</url>
@@ -245,12 +245,12 @@
     <paper id="28">
       <title>The Role of <fixed-case>MT</fixed-case> Evaluation</title>
       <author><first>Scott</first><last>Bennett</last></author>
-      <author><first>George</first><last>Doddington</last></author>
+      <author id="george-r-doddington"><first>George</first><last>Doddington</last></author>
       <author><first>Mary</first><last>Flanagan</last></author>
       <author><first>Laurie</first><last>Gerber</last></author>
       <author><first>Maghi</first><last>King</last></author>
       <author><first>Marjorie</first><last>León</last></author>
-      <author><first>John</first><last>White</last></author>
+      <author id="john-s-white"><first>John</first><last>White</last></author>
       <url hash="8ea866e5">1994.amta-1.28</url>
       <bibkey>bennett-etal-1994-role</bibkey>
     </paper>
@@ -267,8 +267,8 @@
     </paper>
     <paper id="30">
       <title>Voices of Experience: <fixed-case>MT</fixed-case> in Operational Settings</title>
-      <author><first>Susan</first><last>Armstrong</last></author>
-      <author><first>Roberta</first><last>Merchant</last></author>
+      <author id="susan-armstrong"><first>Susan</first><last>Armstrong</last></author>
+      <author id="roberta-h-merchant"><first>Roberta</first><last>Merchant</last></author>
       <author><first>Kazunori</first><last>Muraki</last></author>
       <author><first>Karin</first><last>Spalink</last></author>
       <author><first>Mike</first><last>Tacelosky</last></author>
@@ -280,11 +280,11 @@
     <paper id="31">
       <title>Future Directions</title>
       <author><first>Joseph</first><last>Pentheroudakis</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
       <author><first>Lutz</first><last>Graunitz</last></author>
       <author><first>Pierre</first><last>Isabelle</last></author>
       <author><first>Chris</first><last>Montgomery</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <url hash="e94e90b0">1994.amta-1.31</url>
       <bibkey>pentheroudakis-etal-1994-future</bibkey>
     </paper>
@@ -316,8 +316,8 @@
     <paper id="36">
       <title><fixed-case>KANT</fixed-case>: Knowledge-Based, Accurate Natural Language Translation</title>
       <author><first>Teruko</first><last>Mitamura</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
       <url hash="0548c441">1994.amta-1.36</url>
       <bibkey>mitamura-etal-1994-kant</bibkey>
     </paper>
@@ -329,11 +329,11 @@
     </paper>
     <paper id="38">
       <title>Machine-Aided Voice Translation (<fixed-case>MAVT</fixed-case>): Advanced Development Model</title>
-      <author><first>Christine</first><last>Montgomery</last></author>
-      <author><first>Bonnie Glover</first><last>Stalls</last></author>
+      <author id="christine-a-montgomery"><first>Christine</first><last>Montgomery</last></author>
+      <author id="bonnie-glover-stalls"><first>Bonnie Glover</first><last>Stalls</last></author>
       <author><first>Robert</first><last>Stumberger</last></author>
       <author><first>Naicong</first><last>Li</last></author>
-      <author><first>Robert</first><last>Belvin</last></author>
+      <author id="robert-s-belvin"><first>Robert</first><last>Belvin</last></author>
       <author><first>Alfredo</first><last>Arnaiz</last></author>
       <author><first>Susan Hirsh</first><last>Litenatsky</last></author>
       <url hash="5979ef00">1994.amta-1.38</url>
@@ -353,14 +353,14 @@
     </paper>
     <paper id="41">
       <title><fixed-case>PANGLOSS</fixed-case></title>
-      <author><first>Jaime</first><last>Carbonell</last></author>
-      <author><first>David</first><last>Farwell</last></author>
-      <author><first>Robert</first><last>Frederking</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
+      <author id="david-farwell"><first>David</first><last>Farwell</last></author>
+      <author id="robert-frederking"><first>Robert</first><last>Frederking</last></author>
       <author><first>Steven</first><last>Helmreich</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <author><first>Kevin</first><last>Knight</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <url hash="bb055340">1994.amta-1.41</url>
       <bibkey>carbonell-etal-1994-pangloss</bibkey>
     </paper>
diff --git a/data/xml/1994.bcs.xml b/data/xml/1994.bcs.xml
index 6d465116f6..27c09165dc 100644
--- a/data/xml/1994.bcs.xml
+++ b/data/xml/1994.bcs.xml
@@ -17,7 +17,7 @@
     </paper>
     <paper id="2">
       <title>Some notes on the state of the art: Where are we now in <fixed-case>MT</fixed-case>: what works and what doesn’t?</title>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <url hash="0c6b9771">1994.bcs-1.2</url>
       <abstract>The paper examines briefly the impact of the “statistical turn” in machine translation (MT) R&amp;D in the last decade, and particularly the way in which it has made large scale language resources (lexicons, text corpora etc.) more important than ever before and reinforced the role of evaluation in the development of the field. But resources mean, almost by definition, co-operation between groups and, in the case of MT, specifically co-operation between language groups and states. The paper then considers what alternatives there are now for MT R&amp;D. One is to continue with interlingual methods of translation, even though those are not normally thought of as close to statistical methods. The reason is that statistical methods, taken alone, have almost certainly reached a ceiling in terms of the proportion of sentences and linguistic phenomena they can translate successfully. Interlingual methods remain popular within large electronics companies in Japan, and in a large US Government funded project (PANGLOSS). The question then discussed is what role there can be for interlinguas and interlingual methods in co-operation in MT across linguistic and national boundaries. The paper then turns to evaluation and asks whether, across national and continental boundaries, it can become a co-operative or a “hegemonic” enterprise. Finally the paper turns to resources themselves and asks why co-operation on resources is proving so hard, even though there are bright spots of real co-operation.</abstract>
       <bibkey>wilks-1994-notes</bibkey>
@@ -30,7 +30,7 @@
     </paper>
     <paper id="4">
       <title>Research methods and system designs in machine translation: a ten-year review, 1984-1994</title>
-      <author><first>John</first><last>Hutchins</last></author>
+      <author id="w-john-hutchins"><first>John</first><last>Hutchins</last></author>
       <url hash="36bcdf5c">1994.bcs-1.4</url>
       <bibkey>hutchins-1994-research</bibkey>
     </paper>
@@ -61,7 +61,7 @@
     </paper>
     <paper id="8">
       <title>Machine translation, ten years on: Discourse has yet to make a breakthrough</title>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <author><first>Johann</first><last>Haller</last></author>
       <url hash="27deb8b1">1994.bcs-1.8</url>
       <abstract>Progress in Machine Translation (MT) during the last ten years has been observed at different levels, but discourse has yet to make a breakthrough. MT research and development has concentrated so far mostly on sentence translation (discourse analysis being a very complicated task) and the successful operation of most of the working MT systems does not usually go beyond the sentence level. To start with, the paper will refer to the MT research and development in the last ten years at the IAI in Saarbrücken. Next, the MT discourse issues will be discussed both from the point of view of source language analysis and target text generation, and on the basis of the preliminary results of an ongoing "discourse-oriented MT" project . Probably the most important aspect in successfully analysing multisentential source texts is the capacity to establish the anaphoric references to preceding discourse entities. The paper will discuss the problem of anaphora resolution from the perspective of MT. A new integrated model for anaphora resolution, developed for the needs of MT, will be also outlined. As already mentioned, most machine translation systems perform translation sentence by sentence. But even in the case of paragraph translation, the discourse structure of the target text tends to be identical to that of the source text. However, the sublanguage discourse structures may differ across the different languages, and thus a translated text which assumes the same discourse structure as the source text may sound unnatural and perhaps disguise the true intent of the writer. Finally, the paper will outline a new approach for generating discourse structures, appropriate to the target sublanguage and will discuss some of the complicated problems encountered.</abstract>
@@ -107,15 +107,15 @@
     </paper>
     <paper id="15">
       <title>Translation by meaning and style in <fixed-case>LOLITA</fixed-case></title>
-      <author><first>Richard</first><last>Morgan</last></author>
-      <author><first>Mark</first><last>Smith</last></author>
+      <author id="richard-g-morgan"><first>Richard</first><last>Morgan</last></author>
+      <author id="mark-h-smith"><first>Mark</first><last>Smith</last></author>
       <author><first>Sengan</first><last>Short</last></author>
       <url hash="355d6718">1994.bcs-1.15</url>
       <bibkey>morgan-etal-1994-translation</bibkey>
     </paper>
     <paper id="16">
       <title>Providing factual information in <fixed-case>MAT</fixed-case></title>
-      <author><first>Walther</first><last>v. Hahn</last></author>
+      <author id="walther-von-hahn"><first>Walther</first><last>v. Hahn</last></author>
       <author><first>Galja</first><last>Angelova</last></author>
       <url hash="eae3e547">1994.bcs-1.16</url>
       <abstract>Most translations are needed for technical documents in specific domains and often the domain knowledge available to the translator is crucial for the efficiency and quality of the translation task. Our project1 aims at the investigation of a MAT-paradigm where the human user is supported by linguistic as well as by subject information ([vHa90], [vHAn92]). The basic hypotheses of the approach are: - domain knowledge is not encoded in the lexicon entries, i.e. we clearly distinguish between the language layer and the conceptual layer; - the representation of domain knowledge is language independent and replaces most of the semantic entries in a traditional semantic lexicon of MT/MAT-systems; - the user accesses domain information by highlighting a sequence in the source text and specifying the type of query; - factual explanations to the user should be simple and transparent although the underlying formalisms for knowledge representation and processing might be very complex; - as a language for knowledge representation, conceptual graphs (CGs) of Sowa [Sow84] were chosen. In providing connections between the terms (lexical entries) and the knowledge base our approach will be compared to terminological knowledge bases (TKBs) which are hybrid systems between concept-oriented term banks and knowledge bases. This paper presents: - a contrastive view to knowledge based techniques in MAT, - mechanisms for mapping the "ordinary" linguistic lexicon and the terminological lexicon of two languages onto one knowledge base, - methods to access the domain knowledge in a flexible way without allowing completely free linguistic dialogues, - techniques to present the result of queries to the translator in restricted natural language, and - use of domain knowledge to solve specific translation difficulties.</abstract>
@@ -154,7 +154,7 @@
     </paper>
     <paper id="22">
       <title>Dialogue-Based <fixed-case>MT</fixed-case> and self-explaining documents as an alternative to <fixed-case>MAHT</fixed-case> and <fixed-case>MT</fixed-case> of controlled languages</title>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <url hash="bd08ee46">1994.bcs-1.22</url>
       <abstract>We argue that, in many situations, Dialogue-Based MT is likely to offer better solutions to translation needs than machine aids to translators or batch MT, even if controlled languages are used. Objections to DBMT have led us to introduce the new concept of “self-explaining document”, which might be used in monolingual as well as in multilingual contexts, and deeply change our way of understanding important or difficult written material.</abstract>
       <bibkey>boitet-1994-dialogue</bibkey>
@@ -182,7 +182,7 @@
     </paper>
     <paper id="26">
       <title>Machine translation and philosophy of language</title>
-      <author><first>Alan</first><last>Melby</last></author>
+      <author id="alan-k-melby"><first>Alan</first><last>Melby</last></author>
       <url hash="c5e50c98">1994.bcs-1.26</url>
       <bibkey>melby-1994-machine</bibkey>
     </paper>
diff --git a/data/xml/1994.eamt.xml b/data/xml/1994.eamt.xml
index e3bc348560..9aceff8b37 100644
--- a/data/xml/1994.eamt.xml
+++ b/data/xml/1994.eamt.xml
@@ -56,7 +56,7 @@
     </paper>
     <paper id="9">
       <title>Compensation</title>
-      <author><first>Louis</first><last>des Tombe</last></author>
+      <author id="louis-des-tombe"><first>Louis</first><last>des Tombe</last></author>
       <bibkey>des-tombe-1994-compensation</bibkey>
     </paper>
     <paper id="10">
@@ -76,7 +76,7 @@
     </paper>
     <paper id="13">
       <title>Discourse processing for voice-to-voice machine translation</title>
-      <author><first>Susann</first><last>LuperFoy</last></author>
+      <author id="susann-luperfoy"><first>Susann</first><last>LuperFoy</last></author>
       <bibkey>luperfoy-1994-discourse</bibkey>
     </paper>
     <paper id="14">
diff --git a/data/xml/1994.tc.xml b/data/xml/1994.tc.xml
index 053c7a57a9..ae792bfee7 100644
--- a/data/xml/1994.tc.xml
+++ b/data/xml/1994.tc.xml
@@ -11,7 +11,7 @@
     </meta>
     <paper id="1">
       <title>A New Era in Machine Translation Research</title>
-      <author><first>John</first><last>Hutchins</last></author>
+      <author id="w-john-hutchins"><first>John</first><last>Hutchins</last></author>
       <url hash="c8dbec9e">1994.tc-1.1</url>
       <bibkey>hutchins-1994-new</bibkey>
     </paper>
@@ -30,7 +30,7 @@
     </paper>
     <paper id="4">
       <title>Interactive Corpus-based Translation Learning Tool (Translearn)</title>
-      <author><first>Stelios</first><last>Piperidis</last></author>
+      <author id="stelios-piperidis"><first>Stelios</first><last>Piperidis</last></author>
       <url hash="63b5bb5c">1994.tc-1.4</url>
       <bibkey>piperidis-1994-interactive</bibkey>
     </paper>
@@ -56,7 +56,7 @@
     </paper>
     <paper id="8">
       <title>Simplified <fixed-case>E</fixed-case>nglish grammar and style correction in an <fixed-case>MT</fixed-case> framework: The <fixed-case>LRE</fixed-case> <fixed-case>SECC</fixed-case> Project</title>
-      <author><first>Geert</first><last>Adriaens</last></author>
+      <author id="geert-adriaens"><first>Geert</first><last>Adriaens</last></author>
       <url hash="4473d369">1994.tc-1.8</url>
       <bibkey>adriaens-1994-simplified</bibkey>
     </paper>
diff --git a/data/xml/1994.vlc.xml b/data/xml/1994.vlc.xml
index 049e881e42..2522795882 100644
--- a/data/xml/1994.vlc.xml
+++ b/data/xml/1994.vlc.xml
@@ -15,7 +15,7 @@
     <paper id="1">
       <title><fixed-case>TEI</fixed-case>-Conformant Structural Markup of a Trilingual Parallel Corpus in the <fixed-case>ECI</fixed-case> Multilingual Corpus 1</title>
       <author><first>David</first><last>McKelvie</last><affiliation>University of Edinburgh</affiliation></author>
-      <author><first>Henry S.</first><last>Thompson</last><affiliation>University of Edinburgh</affiliation></author>
+      <author id="henry-s-thompson"><first>Henry S.</first><last>Thompson</last><affiliation>University of Edinburgh</affiliation></author>
       <pages>7-18</pages>
       <abstract>In this paper we provide an overview of the ACL European Corpus Initiative (ECI) Multilingual Corpus 1 (ECI/MC1). In particular, we look at one particular subcorpus in the ECI/MC1, the trilingual corpus of International Labour Organisation reports, and discuss the problems involved in TEI-compliant structural markup and preliminary alignment of this large corpus. We discuss gross structural alignment down to the level of text paragraphs. We see this as a necessary first step in corpus preparation before detailed (possibly automatic) alignment of text is possible. We try and generalise our experience with this corpus to illustrate the process of preliminary markup of large corpora which in their raw state can be in an arbitrary format (eg printers tapes, proprietary word-processor format); noisy (not fully parallel, with structure obscured by spelling mistakes); full of poorly documented formatting instructions; and whose structure is present but anything but explicit. We illustrate these points by reference to other parallel subcorpora of ECI/MC1. We attempt to define some guidelines for the development of corpus annotation toolkits which would aid this kind of structural preparation of large corpora.</abstract>
       <url hash="ff5e8fc3">1994.vlc-1.1</url>
@@ -50,7 +50,7 @@
     </paper>
     <paper id="5">
       <title>Iterative Alignment of Syntactic Structures for a Bilingual Corpus</title>
-      <author><first>Ralph</first><last>Grishman</last><affiliation>New York University</affiliation></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last><affiliation>New York University</affiliation></author>
       <pages>57-68</pages>
       <abstract>Alignment of parallel bilingual corpora at the level of syntactic structure holds the promise of being able to discover detailed bilingual structural correspondences automatically. This paper describes a procedure for the alignment of regularized syntactic structures, proceeding bottom-up through the trees. It makes use of information about possible lexical correspondences, from a bilingual dictionary, to generate initial candidate alignments. We consider in particular how much dictionary coverage is needed for the alignment process, and how the alignment can be iteratively improved by having an initial alignment generate additional lexical correspondences for the dictionary, and then using this augmented dictionary for subsequent alignment passes.</abstract>
       <url hash="7880c317">1994.vlc-1.5</url>
diff --git a/data/xml/1995.iwpt.xml b/data/xml/1995.iwpt.xml
index ccb54fc484..2516882396 100644
--- a/data/xml/1995.iwpt.xml
+++ b/data/xml/1995.iwpt.xml
@@ -73,7 +73,7 @@
     <paper id="6">
       <title>Parsing Non-Immediate Dominance Relations</title>
       <author><first>Tilman</first><last>Becker</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>26-33</pages>
       <url hash="0de5e2b6">1995.iwpt-1.6</url>
       <abstract>We present a new technique for parsing grammar formalisms that express non-immediate dominance relations by ‘dominance-links’. Dominance links have been introduced in various formalisms such as extensions to CFG and TAG in order to capture long-distance dependencies in free-word order languages (Becker et al., 1991; Rambow, 1994). We show how the addition of ‘link counters’ to standard parsing algorithms such as CKY- and Earley-based methods for TAG results in a polynomial time complexity algorithm for parsing lexicalized V-TAG, a multi-component version of TAGs defined in (Rambow, 1994). A variant of this method has previously been applied to context-free grammar based formalisms such as UVG-DL.</abstract>
@@ -89,8 +89,8 @@
     </paper>
     <paper id="8">
       <title>Developing and Evaluating a Probabilistic <fixed-case>LR</fixed-case> Parser of Part-of-Speech and Punctuation Labels</title>
-      <author><first>Ted</first><last>Briscoe</last></author>
-      <author><first>John</first><last>Carroll</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
       <pages>48-58</pages>
       <url hash="62cfa85d">1995.iwpt-1.8</url>
       <abstract>We describe an approach to robust domain-independent syntactic parsing of unrestricted naturally-occurring (English) input. The technique involves parsing sequences of part-of-speech and punctuation labels using a unification-based grammar coupled with a probabilistic LR parser. We describe the coverage of several corpora using this grammar and report the results of a parsing experiment using probabilities derived from bracketed training data. We report the first substantial experiments to assess the contribution of punctuation to deriving an accurate syntactic analysis, by parsing identical texts both with and without naturally-occurring punctuation marks.</abstract>
@@ -117,11 +117,11 @@
     <paper id="11">
       <title>Distributed Parsing With <fixed-case>HPSG</fixed-case> Grammars</title>
       <author><first>Abdel Kader</first><last>Diagne</last></author>
-      <author><first>Walter</first><last>Kasper</last></author>
-      <author><first>Hans-Ulrich</first><last>Krieger</last></author>
+      <author id="walter-kasper"><first>Walter</first><last>Kasper</last></author>
+      <author id="hans-ulrich-krieger"><first>Hans-Ulrich</first><last>Krieger</last></author>
       <pages>79-86</pages>
       <url hash="02ce38a9">1995.iwpt-1.11</url>
-      <abstract/>
+      <abstract></abstract>
       <bibkey>diagne-etal-1995-distributed</bibkey>
     </paper>
     <paper id="12">
@@ -154,7 +154,7 @@
     <paper id="15">
       <title>A Robust Parsing Algorithm for Link Grammars</title>
       <author><first>Dennis</first><last>Grinberg</last></author>
-      <author><first>John</first><last>Lafferty</last></author>
+      <author id="john-lafferty"><first>John</first><last>Lafferty</last></author>
       <author><first>Daniel</first><last>Sleator</last></author>
       <pages>111-125</pages>
       <url hash="24a145e3">1995.iwpt-1.15</url>
@@ -163,9 +163,9 @@
     </paper>
     <paper id="16">
       <title>An Implementation of Syntactic Analysis of <fixed-case>C</fixed-case>zech</title>
-      <author><first>Tomáš</first><last>Holan</last></author>
-      <author><first>Vladislav</first><last>Kuboň</last></author>
-      <author><first>Martin</first><last>Plátek</last></author>
+      <author id="tomas-holan"><first>Tomáš</first><last>Holan</last></author>
+      <author id="vladislav-kubon"><first>Vladislav</first><last>Kuboň</last></author>
+      <author id="martin-platek"><first>Martin</first><last>Plátek</last></author>
       <pages>126-135</pages>
       <url hash="1c63a694">1995.iwpt-1.16</url>
       <abstract>This paper describes current results achieved during the work on parsing of a free-word-order natural language (Czech) . It contains theoretical base for a new class of grammars - CFG extended for dependecies and non-projectivities – and also the description of the implementation of a parser and grammar-checker. The paper also describes some typical problems of parsing of free-word-order languages and their solutions (or discusssion of those problems), which are still subject of investigation. The implementation described here serves currently as a testing tool for the development of a large scale grammar of Czech. Some of the quantitative data from a processing of test sentences are also included.</abstract>
@@ -181,17 +181,17 @@
     </paper>
     <paper id="18">
       <title>On Parsing Control for Efficient Text Analysis</title>
-      <author><first>Fabio</first><last>Ciravegna</last></author>
-      <author><first>Alberto</first><last>Lavelli</last></author>
+      <author id="fabio-ciravegna"><first>Fabio</first><last>Ciravegna</last></author>
+      <author id="alberto-lavelli"><first>Alberto</first><last>Lavelli</last></author>
       <pages>148-149</pages>
       <url hash="fe590662">1995.iwpt-1.18</url>
-      <abstract/>
+      <abstract></abstract>
       <bibkey>ciravegna-lavelli-1995-parsing</bibkey>
     </paper>
     <paper id="19">
       <title>A Practical Dependency Parser</title>
       <author><first>Vincenzo</first><last>Lombardo</last></author>
-      <author><first>Leonardo</first><last>Lesmo</last></author>
+      <author id="leonardo-lesmo"><first>Leonardo</first><last>Lesmo</last></author>
       <pages>150-151</pages>
       <url hash="8ebf73f0">1995.iwpt-1.19</url>
       <abstract>The working assumption is that cognitive modeling of NLP and engineering solutions to free text parsing can converge to optimal parsing. The claim of the paper is that the methodology to achieve such a result is to develop a concrete environment with a flexible parser, that allows the testing of various psycholinguistic strategies on real texts. In this paper we outline a flexible parser based on a dependency grammar.</abstract>
@@ -217,7 +217,7 @@
     <paper id="22">
       <title>Parsing Without Grammar</title>
       <author><first>Shinsuke</first><last>Mori</last></author>
-      <author><first>Makoto</first><last>Nagao</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
       <pages>174-185</pages>
       <url hash="c3e1f605">1995.iwpt-1.22</url>
       <abstract>We describe and evaluate experimentally a method to parse a tagged corpus without grammar modeling a natural language on context-free language. This method is based on the following three hypotheses. 1) Part-of-speech sequences on the right-hand side of a rewriting rule are less constrained as to what part-of-speech precedes and follows them than non-constituent sequences. 2) Part-of-speech sequences directly derived from the same non-terminal symbol have similar environments. 3) The most suitable set of rewriting rules makes the greatest reduction of the corpus size. Based on these hypotheses, the system finds a set of constituent-like part-of-speech sequences and replaces them with a new symbol. The repetition of these processes brings us a set of rewriting rules, a grammar, and the bracketed corpus.</abstract>
@@ -250,7 +250,7 @@
     <paper id="26">
       <title>A Corpus-based Probabilistic Grammar with Only Two Non-terminals</title>
       <author><first>Satoshi</first><last>Sekine</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <pages>216-223</pages>
       <url hash="4aa6e318">1995.iwpt-1.26</url>
       <abstract>The availability of large, syntactically-bracketed corpora such as the Penn Tree Bank affords us the opportunity to automatically build or train broad-coverage grammars, and in particular to train probabilistic grammars. A number of recent parsing experiments have also indicated that grammars whose production probabilities are dependent on the context can be more effective than context-free grammars in selecting a correct parse. To make maximal use of context, we have automatically constructed, from the Penn Tree Bank version 2, a grammar in which the symbols S and NP are the only real nonterminals, and the other non-terminals or grammatical nodes are in effect embedded into the right-hand-sides of the S and NP rules. For example, one of the rules extracted from the tree bank would be S -&gt; NP VBX JJ CC VBX NP [1] ( where NP is a non-terminal and the other symbols are terminals – part-of-speech tags of the Tree Bank). The most common structure in the Tree Bank associated with this expansion is (S NP (VP (VP VBX (ADJ JJ) CC (VP VBX NP)))) [2]. So if our parser uses rule [1] in parsing a sentence, it will generate structure [2] for the corresponding part of the sentence. Using 94% of the Penn Tree Bank for training, we extracted 32,296 distinct rules ( 23,386 for S, and 8,910 for NP). We also built a smaller version of the grammar based on higher frequency patterns for use as a back-up when the larger grammar is unable to produce a parse due to memory limitation. We applied this parser to 1,989 Wall Street Journal sentences (separate from the training set and with no limit on sentence length). Of the parsed sentences (1,899), the percentage of no-crossing sentences is 33.9%, and Parseval recall and precision are 73.43% and 72 .61%.</abstract>
@@ -258,8 +258,8 @@
     </paper>
     <paper id="27">
       <title>Heuristics and Parse Ranking</title>
-      <author><first>B.</first><last>Srinivas</last></author>
-      <author><first>Christine</first><last>Doran</last></author>
+      <author id="srinivas-bangalore"><first>B.</first><last>Srinivas</last></author>
+      <author id="christine-doran"><first>Christine</first><last>Doran</last></author>
       <author><first>Seth</first><last>Kulick</last></author>
       <pages>224-233</pages>
       <url hash="473b0921">1995.iwpt-1.27</url>
@@ -277,20 +277,20 @@
     <paper id="29">
       <title>An <fixed-case>HPSG</fixed-case>-based Parser for Automatic Knowledge Acquisition</title>
       <author><first>Kentaro</first><last>Torisawa</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>250-251</pages>
       <url hash="566016d1">1995.iwpt-1.29</url>
-      <abstract/>
+      <abstract></abstract>
       <bibkey>torisawa-tsujii-1995-hpsg</bibkey>
     </paper>
     <paper id="30">
       <title>Parsing <fixed-case>D</fixed-case>-Tree Grammars</title>
-      <author><first>K.</first><last>Vijay-Shanker</last></author>
-      <author><first>David</first><last>Weir</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="k-vijay-shanker"><first>K.</first><last>Vijay-Shanker</last></author>
+      <author id="david-weir"><first>David</first><last>Weir</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>252-259</pages>
       <url hash="3ef0f98c">1995.iwpt-1.30</url>
-      <abstract/>
+      <abstract></abstract>
       <bibkey>vijay-shanker-etal-1995-parsing</bibkey>
     </paper>
     <paper id="31">
@@ -304,10 +304,10 @@
     <paper id="32">
       <title>Partitioning Grammars and Composing Parsers</title>
       <author><first>Fuliang</first><last>Weng</last></author>
-      <author><first>Andreas</first><last>Stolcke</last></author>
+      <author id="andreas-stolcke"><first>Andreas</first><last>Stolcke</last></author>
       <pages>271-272</pages>
       <url hash="537a8354">1995.iwpt-1.32</url>
-      <abstract/>
+      <abstract></abstract>
       <bibkey>weng-stolcke-1995-partitioning</bibkey>
     </paper>
     <paper id="33">
diff --git a/data/xml/1995.mtsummit.xml b/data/xml/1995.mtsummit.xml
index 592b5e7de5..db8784e09f 100644
--- a/data/xml/1995.mtsummit.xml
+++ b/data/xml/1995.mtsummit.xml
@@ -27,25 +27,25 @@
     </paper>
     <paper id="4">
       <title>Problems with the second generation architecture and new trends in <fixed-case>MT</fixed-case></title>
-      <author><first>Harold</first><last>Somers</last></author>
+      <author id="harold-somers"><first>Harold</first><last>Somers</last></author>
       <url hash="2261e892">1995.mtsummit-1.4</url>
       <bibkey>somers-1995-problems</bibkey>
     </paper>
     <paper id="5">
       <title><fixed-case>E</fixed-case>urotra, history and results</title>
-      <author><first>Bente</first><last>Maegaard</last></author>
+      <author id="bente-maegaard"><first>Bente</first><last>Maegaard</last></author>
       <url hash="3b9aec89">1995.mtsummit-1.5</url>
       <bibkey>maegaard-1995-eurotra</bibkey>
     </paper>
     <paper id="6">
       <title>Reflections on the history and present state of <fixed-case>MT</fixed-case></title>
-      <author><first>John</first><last>Hutchins</last></author>
+      <author id="w-john-hutchins"><first>John</first><last>Hutchins</last></author>
       <url hash="40db269b">1995.mtsummit-1.6</url>
       <bibkey>hutchins-1995-reflections</bibkey>
     </paper>
     <paper id="7">
       <title>Factors for success and failure in <fixed-case>MT</fixed-case></title>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <url hash="ada585b7">1995.mtsummit-1.7</url>
       <bibkey>boitet-1995-factors</bibkey>
     </paper>
@@ -77,7 +77,7 @@
     </paper>
     <paper id="12">
       <title>A bidirectional <fixed-case>R</fixed-case>ussian-<fixed-case>E</fixed-case>nglish <fixed-case>MT</fixed-case> system (<fixed-case>ETAP</fixed-case>-3)</title>
-      <author><first>Igor</first><last>Boguslavsky</last></author>
+      <author id="igor-boguslavsky"><first>Igor</first><last>Boguslavsky</last></author>
       <url hash="a0dc2f11">1995.mtsummit-1.12</url>
       <bibkey>boguslavsky-1995-bidirectional</bibkey>
     </paper>
@@ -144,7 +144,7 @@
     </paper>
     <paper id="23">
       <title>Machine Translation in the <fixed-case>C</fixed-case>zech <fixed-case>R</fixed-case>epublic: history, methods, systems</title>
-      <author><first>Jan</first><last>Hajič</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
       <url hash="afbb8165">1995.mtsummit-1.23</url>
       <bibkey>hajic-1995-machine</bibkey>
     </paper>
@@ -204,7 +204,7 @@
     </paper>
     <paper id="33">
       <title>What have we to do for the future of <fixed-case>MT</fixed-case> systems?</title>
-      <author><first>Makoto</first><last>Nagao</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
       <url hash="e2c1992a">1995.mtsummit-1.33</url>
       <bibkey>nagao-1995-future</bibkey>
     </paper>
@@ -227,7 +227,7 @@
     </paper>
     <paper id="37">
       <title>Approaches to black box <fixed-case>MT</fixed-case> evaluation</title>
-      <author><first>John S.</first><last>White</last></author>
+      <author id="john-s-white"><first>John S.</first><last>White</last></author>
       <url hash="538ff607">1995.mtsummit-1.37</url>
       <bibkey>white-1995-approaches</bibkey>
     </paper>
@@ -239,7 +239,7 @@
     </paper>
     <paper id="39">
       <title>Verbmobil: Towards a <fixed-case>DRT</fixed-case>-based translation of spontaneous negotiation dialogues</title>
-      <author><first>Wolfgang</first><last>Wahlster</last></author>
+      <author id="wolfgang-wahlster"><first>Wolfgang</first><last>Wahlster</last></author>
       <url hash="9d8ad899">1995.mtsummit-1.39</url>
       <bibkey>wahlster-1995-verbmobil</bibkey>
     </paper>
@@ -251,7 +251,7 @@
     </paper>
     <paper id="41">
       <title>Translation and interpretation of spontaneous speech</title>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <bibkey>waibel-1995-translation</bibkey>
     </paper>
     <paper id="42">
@@ -262,7 +262,7 @@
     </paper>
     <paper id="43">
       <title>Issues in multimodal telecommunications</title>
-      <author><first>Tsuyoshi</first><last>Morimoto</last></author>
+      <author id="tsuyoshi-morimoto"><first>Tsuyoshi</first><last>Morimoto</last></author>
       <url hash="488c67dc">1995.mtsummit-1.43</url>
       <bibkey>morimoto-1995-issues</bibkey>
     </paper>
@@ -274,7 +274,7 @@
     </paper>
     <paper id="45">
       <title>Machine Translation for the office automation</title>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <url hash="20670f70">1995.mtsummit-1.45</url>
       <bibkey>choi-1995-machine</bibkey>
     </paper>
diff --git a/data/xml/1995.tc.xml b/data/xml/1995.tc.xml
index 972e59a255..ad61de74f3 100644
--- a/data/xml/1995.tc.xml
+++ b/data/xml/1995.tc.xml
@@ -74,9 +74,9 @@
     </paper>
     <paper id="11">
       <title>Using corpora to develop limited-domain speech translation systems</title>
-      <author><first>Manny</first><last>Rayner</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
-      <author><first>David</first><last>Carter</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="david-carter"><first>David</first><last>Carter</last></author>
       <url hash="7f582adb">1995.tc-1.11</url>
       <bibkey>rayner-etal-1995-using</bibkey>
     </paper>
diff --git a/data/xml/1995.tmi.xml b/data/xml/1995.tmi.xml
index e433fe8b59..c6ae7228ee 100644
--- a/data/xml/1995.tmi.xml
+++ b/data/xml/1995.tmi.xml
@@ -19,8 +19,8 @@
     <paper id="2">
       <title>Translation using <fixed-case>M</fixed-case>inimal <fixed-case>R</fixed-case>ecursion <fixed-case>S</fixed-case>emantics</title>
       <author><first>Ann</first><last>Copestake</last></author>
-      <author><first>Dan</first><last>Flickinger</last></author>
-      <author><first>Rob</first><last>Malouf</last></author>
+      <author id="dan-flickinger"><first>Dan</first><last>Flickinger</last></author>
+      <author id="robert-malouf"><first>Rob</first><last>Malouf</last></author>
       <author><first>Susanne</first><last>Riehemann</last></author>
       <author><first>Ivan</first><last>Sag</last></author>
       <url hash="cd8a57c6">1995.tmi-1.2</url>
@@ -41,13 +41,13 @@
     </paper>
     <paper id="5">
       <title>A Sign-Based Approach to the Translation of Temporal Expressions</title>
-      <author><first>Frank</first><last>Van Eynde</last></author>
+      <author id="frank-van-eynde"><first>Frank</first><last>Van Eynde</last></author>
       <url hash="42300515">1995.tmi-1.5</url>
       <bibkey>van-eynde-1995-sign</bibkey>
     </paper>
     <paper id="6">
       <title>Anaphora Resolution in Machine Translation</title>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <author><first>Sung-Kwon</first><last>Choi</last></author>
       <author><first>Randall</first><last>Sharp</last></author>
       <url hash="4d15bdd9">1995.tmi-1.6</url>
@@ -62,9 +62,9 @@
     </paper>
     <paper id="8">
       <title>Apologiae Ontologiae</title>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <author><first>Victor</first><last>Raskin</last></author>
-      <author><first>Boyan</first><last>Onyshkevych</last></author>
+      <author id="boyan-onyshkevych"><first>Boyan</first><last>Onyshkevych</last></author>
       <url hash="19ca8218">1995.tmi-1.8</url>
       <bibkey>nirenburg-etal-1995-apologiae</bibkey>
     </paper>
@@ -78,7 +78,7 @@
     </paper>
     <paper id="10">
       <title>Technological evaluation of a controlled language application: precision, recall and convergence tests for <fixed-case>SECC</fixed-case></title>
-      <author><first>Geert</first><last>Adriaens</last></author>
+      <author id="geert-adriaens"><first>Geert</first><last>Adriaens</last></author>
       <author><first>Lieve</first><last>Macken</last></author>
       <url hash="e9e68ca8">1995.tmi-1.10</url>
       <bibkey>adriaens-macken-1995-technological</bibkey>
@@ -99,14 +99,14 @@
     </paper>
     <paper id="13">
       <title>Using Context in Machine Translation of Spoken Language</title>
-      <author><first>Lori</first><last>Levin</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
       <author><first>Oren</first><last>Glickman</last></author>
       <author><first>Yan</first><last>Qu</last></author>
-      <author><first>Carolyn P.</first><last>Rose</last></author>
-      <author><first>Donna</first><last>Gates</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
-      <author><first>Carol</first><last>Van Ess-Dykema</last></author>
+      <author id="carolyn-rose"><first>Carolyn P.</first><last>Rose</last></author>
+      <author id="donna-gates"><first>Donna</first><last>Gates</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
+      <author id="carol-van-ess-dykema"><first>Carol</first><last>Van Ess-Dykema</last></author>
       <url hash="c9110e10">1995.tmi-1.13</url>
       <bibkey>levin-etal-1995-using</bibkey>
     </paper>
@@ -122,7 +122,7 @@
       <author><first>M.</first><last>Gavalda</last></author>
       <author><first>Y-H.</first><last>Seo</last></author>
       <author><first>B.</first><last>Suhm</last></author>
-      <author><first>Wayne</first><last>Ward</last></author>
+      <author id="wayne-ward"><first>Wayne</first><last>Ward</last></author>
       <author><first>A.</first><last>Waibel</last></author>
       <url hash="626789fe">1995.tmi-1.15</url>
       <bibkey>mayfield-etal-1995-concept</bibkey>
@@ -138,8 +138,8 @@
     </paper>
     <paper id="17">
       <title>Applying Statistical <fixed-case>E</fixed-case>nglish Language Modelling to Symbolic Machine Translation</title>
-      <author><first>Ralf</first><last>Brown</last></author>
-      <author><first>Robert</first><last>Frederking</last></author>
+      <author id="ralf-d-brown"><first>Ralf</first><last>Brown</last></author>
+      <author id="robert-frederking"><first>Robert</first><last>Frederking</last></author>
       <url hash="f1d36f2d">1995.tmi-1.17</url>
       <bibkey>brown-frederking-1995-applying</bibkey>
     </paper>
@@ -159,14 +159,14 @@
     </paper>
     <paper id="20">
       <title>Heterogeneous Computing for Example-Based Translation of Spoken Language</title>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Hitoshi</first><last>Iida</last></author>
       <url hash="33638d7f">1995.tmi-1.20</url>
       <bibkey>sumita-iida-1995-heterogeneous</bibkey>
     </paper>
     <paper id="21">
       <title>Machine Translation: an Integration Approach</title>
-      <author><first>Kuang-hua</first><last>Chen</last></author>
+      <author id="kuang-hua-chen"><first>Kuang-hua</first><last>Chen</last></author>
       <author><first>Hsin-Hsi</first><last>Chen</last></author>
       <url hash="cc994fc7">1995.tmi-1.21</url>
       <bibkey>chen-chen-1995-machine</bibkey>
@@ -188,7 +188,7 @@
     </paper>
     <paper id="24">
       <title>Constituent Shifts in the Logos <fixed-case>E</fixed-case>nglish-<fixed-case>G</fixed-case>erman System</title>
-      <author><first>Claudia</first><last>Gdaniec</last></author>
+      <author id="claudia-gdaniec"><first>Claudia</first><last>Gdaniec</last></author>
       <author><first>Patricia</first><last>Schmid</last></author>
       <url hash="9132e45c">1995.tmi-1.24</url>
       <bibkey>gdaniec-schmid-1995-constituent</bibkey>
@@ -201,8 +201,8 @@
     </paper>
     <paper id="26">
       <title>Spoken-Language Machine Translation in Limited Domains: Can it be Achieved by Finite-State Models?</title>
-      <author><first>Juan Miguel</first><last>Vilar</last></author>
-      <author><first>Antonio</first><last>Castellanos</last></author>
+      <author id="juan-miguel-vilar"><first>Juan Miguel</first><last>Vilar</last></author>
+      <author id="antonio-castellanos"><first>Antonio</first><last>Castellanos</last></author>
       <author><first>Juan Miguel</first><last>Jimenez</last></author>
       <author><first>J. A.</first><last>Sanchez</last></author>
       <author><first>E.</first><last>Vidal</last></author>
diff --git a/data/xml/1996.amta.xml b/data/xml/1996.amta.xml
index 7eee34e973..56cccbfdcc 100644
--- a/data/xml/1996.amta.xml
+++ b/data/xml/1996.amta.xml
@@ -38,14 +38,14 @@
     <paper id="5">
       <title>Translation differences and pragmatics-based <fixed-case>MT</fixed-case></title>
       <author><first>Stephen</first><last>Helmreich</last></author>
-      <author><first>David</first><last>Farwell</last></author>
+      <author id="david-farwell"><first>David</first><last>Farwell</last></author>
       <url hash="a5298d4e">1996.amta-1.5</url>
       <bibkey>helmreich-farwell-1996-translation</bibkey>
     </paper>
     <paper id="6">
       <title>Abstraction and underspecification in semantic transfer</title>
       <author><first>Bernd</first><last>Abb</last></author>
-      <author><first>Bianka</first><last>Buschbeck-Wolf</last></author>
+      <author id="bianka-buschbeck"><first>Bianka</first><last>Buschbeck-Wolf</last></author>
       <author><first>Christel</first><last>Tschernitschek</last></author>
       <url hash="8f31ea72">1996.amta-1.6</url>
       <bibkey>abb-etal-1996-abstraction</bibkey>
@@ -60,7 +60,7 @@
     </paper>
     <paper id="8">
       <title>Capturing motion verb generalizations in synchronous tree adjoining grammars</title>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Joseph</first><last>Rosenzweig</last></author>
       <url hash="ebfa4814">1996.amta-1.8</url>
       <bibkey>palmer-rosenzweig-1996-capturing</bibkey>
@@ -68,34 +68,34 @@
     <paper id="9">
       <title>Evolution of the <fixed-case>L</fixed-case>ogos grammar: system design and development methodology</title>
       <author><first>Patricia</first><last>Schmid</last></author>
-      <author><first>Claudia</first><last>Gdaniec</last></author>
+      <author id="claudia-gdaniec"><first>Claudia</first><last>Gdaniec</last></author>
       <url hash="dd631c86">1996.amta-1.9</url>
       <bibkey>schmid-gdaniec-1996-evolution</bibkey>
     </paper>
     <paper id="10">
       <title>Two principles and six techniques for rapid <fixed-case>MT</fixed-case> development</title>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <author><first>Stephen</first><last>Beale</last></author>
       <author><first>Stephen</first><last>Helmreich</last></author>
-      <author><first>Kavi</first><last>Mahesh</last></author>
+      <author id="kavi-mahesh"><first>Kavi</first><last>Mahesh</last></author>
       <author><first>Evelyne</first><last>Viegas</last></author>
-      <author><first>Rémi</first><last>Zajac</last></author>
+      <author id="remi-zajac"><first>Rémi</first><last>Zajac</last></author>
       <url hash="8a704cc8">1996.amta-1.10</url>
       <bibkey>nirenburg-etal-1996-two</bibkey>
     </paper>
     <paper id="11">
       <title>Adaptation of the <fixed-case>DARPA</fixed-case> machine translation evlauation paradigm to end-to-end systems</title>
-      <author><first>John S.</first><last>White</last></author>
+      <author id="john-s-white"><first>John S.</first><last>White</last></author>
       <author><first>Theresa A.</first><last>O’Connell</last></author>
       <url hash="5a6b075c">1996.amta-1.11</url>
       <bibkey>white-oconnell-1996-adaptation</bibkey>
     </paper>
     <paper id="12">
       <title>Combining machine readable lexical resources and bilingual corpora for broad word sense disambiguation</title>
-      <author><first>Jason J. S.</first><last>Chang</last></author>
-      <author><first>Jen-Nan</first><last>Chen</last></author>
+      <author id="jason-s-chang"><first>Jason J. S.</first><last>Chang</last></author>
+      <author id="jen-nan-chen"><first>Jen-Nan</first><last>Chen</last></author>
       <author><first>Huei-Hong</first><last>Sheng</last></author>
-      <author><first>Sur-Jin</first><last>Ker</last></author>
+      <author id="sue-j-ker"><first>Sur-Jin</first><last>Ker</last></author>
       <url hash="40dcf1dc">1996.amta-1.12</url>
       <bibkey>chang-etal-1996-combining</bibkey>
     </paper>
@@ -146,7 +146,7 @@
     </paper>
     <paper id="20">
       <title>The state of machine translation in <fixed-case>E</fixed-case>urope</title>
-      <author><first>John</first><last>Hutchins</last></author>
+      <author id="w-john-hutchins"><first>John</first><last>Hutchins</last></author>
       <url hash="e38caeeb">1996.amta-1.20</url>
       <bibkey>hutchins-1996-state</bibkey>
     </paper>
@@ -159,7 +159,7 @@
     </paper>
     <paper id="22">
       <title>The primacy of core technology <fixed-case>MT</fixed-case> evaluation</title>
-      <author><first>John S.</first><last>White</last></author>
+      <author id="john-s-white"><first>John S.</first><last>White</last></author>
       <url hash="d6464950">1996.amta-1.22</url>
       <bibkey>white-1996-primacy</bibkey>
     </paper>
@@ -176,13 +176,13 @@
     </paper>
     <paper id="24">
       <title>Panel: The limits of automation: optimists vs skeptics.</title>
-      <author><first>Eduard</first><last>Hovy</last></author>
-      <author><first>Ken</first><last>Church</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
+      <author id="kenneth-church"><first>Ken</first><last>Church</last></author>
       <author><first>Denis</first><last>Gachot</last></author>
       <author><first>Marge</first><last>Leon</last></author>
-      <author><first>Alan</first><last>Melby</last></author>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="alan-k-melby"><first>Alan</first><last>Melby</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <url hash="299be1e8">1996.amta-1.24</url>
       <bibkey>hovy-etal-1996-panel</bibkey>
     </paper>
@@ -199,8 +199,8 @@
     <paper id="26">
       <title>Panel: Next steps in <fixed-case>MT</fixed-case> research</title>
       <author><first>Lynn</first><last>Carlson</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
-      <author><first>David</first><last>Farwell</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
+      <author id="david-farwell"><first>David</first><last>Farwell</last></author>
       <author><first>Pierre</first><last>Isabelle</last></author>
       <author><first>Jackie</first><last>Murgida</last></author>
       <author><first>John</first><last>O’Hara</last></author>
@@ -226,28 +226,28 @@
     </paper>
     <paper id="29">
       <title><fixed-case>ITSVOX</fixed-case></title>
-      <author><first>Eric</first><last>Wehrli</last></author>
+      <author id="eric-wehrli"><first>Eric</first><last>Wehrli</last></author>
       <url hash="f4b2d4eb">1996.amta-1.29</url>
       <bibkey>wehrli-1996-itsvox</bibkey>
     </paper>
     <paper id="30">
       <title><fixed-case>JANUS</fixed-case>: multi-lingual translation of spontaneous speech in limited domain</title>
-      <author><first>Alon</first><last>Lavie</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
-      <author><first>Donna</first><last>Gates</last></author>
-      <author><first>Marsal</first><last>Gavalda</last></author>
-      <author><first>Laura</first><last>Mayfield</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
+      <author id="donna-gates"><first>Donna</first><last>Gates</last></author>
+      <author id="marsal-gavalda"><first>Marsal</first><last>Gavalda</last></author>
+      <author id="laura-mayfield-tomokiyo"><first>Laura</first><last>Mayfield</last></author>
       <url hash="cfbd372b">1996.amta-1.30</url>
       <bibkey>lavie-etal-1996-janus</bibkey>
     </paper>
     <paper id="31">
       <title><fixed-case>JAPANGLOSS</fixed-case>: using statistics to fill knowledge gaps</title>
       <author><first>Kevin</first><last>Knight</last></author>
-      <author><first>Yaser</first><last>Al-Onaizan</last></author>
+      <author id="yaser-al-onaizan"><first>Yaser</first><last>Al-Onaizan</last></author>
       <author><first>Ishwar</first><last>Chander</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
-      <author><first>Irene</first><last>Langkilde</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
+      <author id="irene-langkilde"><first>Irene</first><last>Langkilde</last></author>
       <author><first>Richard</first><last>Whitney</last></author>
       <author><first>Kenji</first><last>Yamada</last></author>
       <url hash="6973f966">1996.amta-1.31</url>
@@ -274,8 +274,8 @@
     </paper>
     <paper id="35">
       <title>The <fixed-case>P</fixed-case>angloss-<fixed-case>L</fixed-case>ite machine translation system</title>
-      <author><first>Robert E.</first><last>Frederking</last></author>
-      <author><first>Ralf D.</first><last>Brown</last></author>
+      <author id="robert-frederking"><first>Robert E.</first><last>Frederking</last></author>
+      <author id="ralf-d-brown"><first>Ralf D.</first><last>Brown</last></author>
       <url hash="60fdcece">1996.amta-1.35</url>
       <bibkey>frederking-brown-1996-pangloss</bibkey>
     </paper>
@@ -293,7 +293,7 @@
     </paper>
     <paper id="38">
       <title>Towards a multilingual analyst’s workstation: Temple</title>
-      <author><first>Rémi</first><last>Zajac</last></author>
+      <author id="remi-zajac"><first>Rémi</first><last>Zajac</last></author>
       <url hash="3af45b2b">1996.amta-1.38</url>
       <bibkey>zajac-1996-towards</bibkey>
     </paper>
diff --git a/data/xml/1996.eamt.xml b/data/xml/1996.eamt.xml
index 6bdca03ba0..a3f4eefdab 100644
--- a/data/xml/1996.eamt.xml
+++ b/data/xml/1996.eamt.xml
@@ -16,7 +16,7 @@
     </frontmatter>
     <paper id="1">
       <title>Introduction</title>
-      <author><first>John</first><last>Hutchins</last></author>
+      <author id="w-john-hutchins"><first>John</first><last>Hutchins</last></author>
       <url hash="f29b58ec">1996.eamt-1.1</url>
       <bibkey>hutchins-1996-introduction</bibkey>
     </paper>
@@ -28,7 +28,7 @@
     </paper>
     <paper id="3">
       <title>Machine Translation, Translation Memories and the Phrasal Lexicon: The Localisation Perspective</title>
-      <author><first>Reinhard</first><last>Schäler</last></author>
+      <author id="reinhard-schaler"><first>Reinhard</first><last>Schäler</last></author>
       <url hash="4321f1b7">1996.eamt-1.3</url>
       <bibkey>schaler-1996-machine</bibkey>
     </paper>
diff --git a/data/xml/1996.tc.xml b/data/xml/1996.tc.xml
index 5b9626f186..a3959286fd 100644
--- a/data/xml/1996.tc.xml
+++ b/data/xml/1996.tc.xml
@@ -41,7 +41,7 @@
     </paper>
     <paper id="6">
       <title>Towards a more efficient use of <fixed-case>PC</fixed-case>-based <fixed-case>MT</fixed-case> in education</title>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <url hash="aaeeb486">1996.tc-1.6</url>
       <bibkey>mitkov-1996-towards</bibkey>
     </paper>
@@ -77,7 +77,7 @@
     </paper>
     <paper id="12">
       <title>Computer Support for Authoring Multilingual Software Documentation</title>
-      <author><first>Donia</first><last>Scott</last></author>
+      <author id="donia-scott"><first>Donia</first><last>Scott</last></author>
       <url hash="922b1faa">1996.tc-1.12</url>
       <bibkey>scott-1996-computer</bibkey>
     </paper>
diff --git a/data/xml/1997.eamt.xml b/data/xml/1997.eamt.xml
index 86311a3a72..43ebb0e219 100644
--- a/data/xml/1997.eamt.xml
+++ b/data/xml/1997.eamt.xml
@@ -12,7 +12,7 @@
     </meta>
     <paper id="1">
       <title>Introduction</title>
-      <author><first>John</first><last>Hutchins</last></author>
+      <author id="w-john-hutchins"><first>John</first><last>Hutchins</last></author>
       <url hash="96001008">1997.eamt-1.1</url>
       <bibkey>hutchins-1997-introduction</bibkey>
     </paper>
@@ -42,7 +42,7 @@
     </paper>
     <paper id="6">
       <title>Why don’t they use translation tools?</title>
-      <author><first>Hanne</first><last>Fersøe</last></author>
+      <author id="hanne-fersoe"><first>Hanne</first><last>Fersøe</last></author>
       <url hash="9007f5d5">1997.eamt-1.6</url>
       <bibkey>fersoe-1997-dont</bibkey>
     </paper>
@@ -54,7 +54,7 @@
     </paper>
     <paper id="8">
       <title>The workflow in a document production environment using translation tools</title>
-      <author><first>Bente</first><last>Maegaard</last></author>
+      <author id="bente-maegaard"><first>Bente</first><last>Maegaard</last></author>
       <url hash="89f15c74">1997.eamt-1.8</url>
       <bibkey>maegaard-1997-workflow</bibkey>
     </paper>
@@ -78,7 +78,7 @@
     </paper>
     <paper id="12">
       <title>Providing multilingual term explanations in machine aided translation</title>
-      <author><first>Walther</first><last>von Hahn</last></author>
+      <author id="walther-von-hahn"><first>Walther</first><last>von Hahn</last></author>
       <url hash="f44b04de">1997.eamt-1.12</url>
       <bibkey>von-hahn-1997-providing</bibkey>
     </paper>
@@ -91,7 +91,7 @@
     <paper id="14">
       <title>Summary and conclusions</title>
       <author><first>Dimitri</first><last>Theologitis</last></author>
-      <author><first>Bente</first><last>Maegaard</last></author>
+      <author id="bente-maegaard"><first>Bente</first><last>Maegaard</last></author>
       <url hash="f7b7b991">1997.eamt-1.14</url>
       <bibkey>theologitis-maegaard-1997-summary</bibkey>
     </paper>
diff --git a/data/xml/1997.iwpt.xml b/data/xml/1997.iwpt.xml
index 3a6dc39d29..02ca41ea41 100644
--- a/data/xml/1997.iwpt.xml
+++ b/data/xml/1997.iwpt.xml
@@ -46,7 +46,7 @@
     </paper>
     <paper id="3">
       <title>Intelligent Multimedia Information Access</title>
-      <author><first>Mark T.</first><last>Maybury</last></author>
+      <author id="mark-t-maybury"><first>Mark T.</first><last>Maybury</last></author>
       <pages>xvii-xviii</pages>
       <url hash="f974e87f">1997.iwpt-1.3</url>
       <abstract>The expansion of the information highway has generated requirements for more effective access to global and corporate information repositories. These repositories are increasingly multimedia, including text, audio (e.g., spoken language, music), graphics, imagery, and video. The advent of large, multimedia digital libraries has turned attention toward the problem of processing and managing multiple and heterogeneous media in a principled manner, including their creation, storage, indexing, browsing, search, visualization, and summarization. Intelligent multimedia information access is a multidisciplinary area that lies at the intersection of artificial intelligence, information retrieval, human computer interaction, and multimedia computing. Intelligent multimedia information access includes those systems which go beyond traditional hypermedia or hypertext environments and analyze media, generate media, or support intelligent interaction with or via multiple media using knowledge of the user, discourse, domain, world, or the media itself. Providing machines with the ability to interpret, generate, and support interaction with multimedia artifacts (e.g., documents, broadcasts, hypermedia) will be a valuable facility for a number of key applications such as videoteleconference archiving, custom on-line news, and briefing assistants. These media facilities, in turn, may support a variety of tasks ranging from training to information analysis to decision support. In this talk I will describe our group’s efforts to provide content based access to broadcast news sources, including our use of corpus-based processing techniques to the problems of video indexing, segmentation, and summarization. In addition to better access to content, we also need to concern ourselves with enabling more effective, efficient and natural human computer or computer mediated human-human interaction. This will require automated understanding and generation of multimedia and demand explicit representation of and reasoning about the user, discourse, task and context (Maybury 1993). To this end, I will describe our work in progress that aims to fully instrument the interface and build ( automatically and semi-automatically) annotated corpora of human-machine interaction. We believe this will yield deeper and more comprehensive models of interaction which should ultimately enable more principled interface design.</abstract>
@@ -54,7 +54,7 @@
     </paper>
     <paper id="4">
       <title>Making Use of Intonation in Interactive Dialogue Translation</title>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>xix</pages>
       <url hash="7621c8b8">1997.iwpt-1.4</url>
       <abstract>Intonational information is frequently discarded in speech recognition, and assigned by default heuristics in text-to-speech generation. However, in many applications involving dialogue and interactive discourse, intonation conveys significant information, and we ignore it at our peril. Translating telephones and personal assistants are an interesting test case, in which the salience of rapidly shifting discourse topics and the fact that sentences are machine-generated, rather than written by humans, combine to make the application particularly vulnerable to our poor theoretical grasp of intonation and its functions. I will discuss a number of approaches to the problem for such applications, ranging from cheap tricks to a combinatory grammar-based theory of the semantics involved and a syntax-phonology interface for building and generating from interpretations.</abstract>
@@ -70,8 +70,8 @@
     </paper>
     <paper id="6">
       <title>Encoding Frequency Information in Lexicalized Grammars</title>
-      <author><first>John</first><last>Carroll</last></author>
-      <author><first>David</first><last>Weir</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
+      <author id="david-weir"><first>David</first><last>Weir</last></author>
       <pages>8-17</pages>
       <url hash="52027d29">1997.iwpt-1.6</url>
       <abstract>We address the issue of how to associate frequency information with lexicalized grammar formalisms, using Lexicalized Tree Adjoining Grammar as a representative framework. We consider systematically a number of alternative probabilistic frameworks, evaluating their adequacy from both a theoretical and empirical perspective using data from existing large treebanks. We also propose three orthogonal approaches fo r backing off probability estimates to cope with the large number of parameters involved.</abstract>
@@ -80,7 +80,7 @@
     <paper id="7">
       <title>Towards a Reduced Commitment, <fixed-case>D</fixed-case>-Theory Style <fixed-case>TAG</fixed-case> Parser</title>
       <author><first>John</first><last>Chen</last></author>
-      <author><first>K.</first><last>Vijay-Shankar</last></author>
+      <author id="k-vijay-shanker"><first>K.</first><last>Vijay-Shankar</last></author>
       <pages>18-29</pages>
       <url hash="ea4da73a">1997.iwpt-1.7</url>
       <abstract>Many traditional TAG parsers handle ambiguity by considering all of the possible choices as they unfold during parsing. In contrast , D-theory parsers cope with ambiguity by using underspecified descriptions of trees. This paper introduces a novel approach to parsing TAG, namely one that explores how D-theoretic notions may be applied to TAG parsing. Combining the D-theoretic approach to TAG parsing as we do here raises new issues and problems. D-theoretic underspecification is used as a novel approach in the context of TAG parsing for delaying attachment decisions. Conversely, the use of TAG reveals the need for additional types of underspecification that have not been considered so far in the D-theoretic framework. These include combining sets of trees into their underspecified equivalents as well as underspecifying combinations of trees. In this paper, we examine various issues that arise in this new approach to TAG parsing and present solutions to some of the problems. We also describe other issues which need to be resolved for this method of parsing to be implemented.</abstract>
@@ -88,8 +88,8 @@
     </paper>
     <paper id="8">
       <title>Controlling Bottom-Up Chart Parsers through Text Chunking</title>
-      <author><first>Fabio</first><last>Ciravegna</last></author>
-      <author><first>Alberto</first><last>Lavelli</last></author>
+      <author id="fabio-ciravegna"><first>Fabio</first><last>Ciravegna</last></author>
+      <author id="alberto-lavelli"><first>Alberto</first><last>Lavelli</last></author>
       <pages>30-41</pages>
       <url hash="35521a85">1997.iwpt-1.8</url>
       <abstract>In this paper we propose to use text chunking for controlling a bottom-up parser. As it is well known, during analysis such parsers produce many constituents not contributing to the final solution(s). Most of these constituents are introduced due to t he parser inability of checking the input context around them. Preliminary text chunking allows to focus directly on the constituents that seem more likely and to prune the search space in the case some satisfactory solutions are found. Preliminary experiments show that a CYK-like parser controlled through chunking is definitely more efficient than a traditional parser without significantly losing in correctness. Moreover the quality of possible partial results produced by the controlled parser is high. The strategy is particularly suited for tasks like Information Extraction from text (IE) where sentences are often long and complex and it is very difficult to have a complete coverage. Hence, there is a strong necessity of focusing on the most likely solutions; furthermore, in IE the quality of partial results is important .</abstract>
@@ -105,16 +105,16 @@
     </paper>
     <paper id="10">
       <title>Bilexical Grammars and a Cubic-time Probabilistic Parser</title>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>54-65</pages>
       <url hash="3eff81a6">1997.iwpt-1.10</url>
-      <abstract/>
+      <abstract></abstract>
       <bibkey>eisner-1997-bilexical</bibkey>
     </paper>
     <paper id="11">
       <title>Automaton-based Parsing for Lexicalised Grammars</title>
-      <author><first>Roger</first><last>Evans</last></author>
-      <author><first>David</first><last>Weir</last></author>
+      <author id="roger-evans"><first>Roger</first><last>Evans</last></author>
+      <author id="david-weir"><first>David</first><last>Weir</last></author>
       <pages>66-76</pages>
       <url hash="f5fd3b8b">1997.iwpt-1.11</url>
       <abstract>In wide-coverage lexicalized grammars many of the elementary structures have substructures in common. This means that during parsing some of the computation associated with different structures is duplicated. This paper explores ways in which the grammar can be precompiled into finite state automata so that some of this shared structure results in shared computation at run-time.</abstract>
@@ -131,7 +131,7 @@
     </paper>
     <paper id="13">
       <title>Probabilistic Feature Grammars</title>
-      <author><first>Joshua</first><last>Goodman</last></author>
+      <author id="joshua-goodman"><first>Joshua</first><last>Goodman</last></author>
       <pages>89-100</pages>
       <url hash="de8d8d24">1997.iwpt-1.13</url>
       <abstract>We present a new formalism, probabilistic feature grammar (PFG). PFGs combine most of the best properties of several other formalisms, including those of Collins, Magerman, and Charniak, and in experiments have comparable or better performance. PFGs generate features one at a time, probabilistically, conditioning the probabilities of each feature on other features in a local context. Because the conditioning is local, efficient polynomial time parsing algorithms exist for computing inside, outside, and Viterbi parses. PFGs can produce probabilities of strings, making them potentially useful for language modeling. Precision and recall results are comparable to the state of the art with words, and the best reported without words.</abstract>
@@ -176,7 +176,7 @@
     </paper>
     <paper id="18">
       <title>Probabilistic Parsing using Left Corner Language Models</title>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <author><first>Bob</first><last>Carpenter</last></author>
       <pages>147-158</pages>
       <url hash="041a2d17">1997.iwpt-1.18</url>
@@ -209,7 +209,7 @@
     </paper>
     <paper id="22">
       <title>Performance Evaluation of Supertagging for Partial Parsing</title>
-      <author><first>B.</first><last>Srinivas</last></author>
+      <author id="srinivas-bangalore"><first>B.</first><last>Srinivas</last></author>
       <pages>187-198</pages>
       <url hash="df31d49f">1997.iwpt-1.22</url>
       <abstract>In previous work we introduced the idea of supertagging as a means of improving the efficiency of a lexicalized grammar parser. In this paper, we present supertagging in conjunction with a lightweight dependency analyzer as a robust and efficient partial parser. The present work is significant for two reasons. First, we have vastly improved our results; 92% accurate for supertag disambiguation using lexical information, larger training corpus and smoothing techniques. Second, we show how supertagging can be used for partial parsing and provide detailed evaluation results for detecting noun chunks, verb chunks, preposition phrase attachment and a variety of other linguistic constructions. Using supertag representation, we achieve a recall rate of 93.0% and a precision rate of 91.8% for noun chunking, improving on the best known result for noun chunking.</abstract>
@@ -281,9 +281,9 @@
     </paper>
     <paper id="30">
       <title>Formal Tools for Separating Syntactically Correct and Incorrect Structures</title>
-      <author><first>Martin</first><last>Plátek</last></author>
-      <author><first>Vladislav</first><last>Kuboň</last></author>
-      <author><first>Tomáš</first><last>Holan</last></author>
+      <author id="martin-platek"><first>Martin</first><last>Plátek</last></author>
+      <author id="vladislav-kubon"><first>Vladislav</first><last>Kuboň</last></author>
+      <author id="tomas-holan"><first>Tomáš</first><last>Holan</last></author>
       <pages>247-248</pages>
       <url hash="0c2aee8e">1997.iwpt-1.30</url>
       <abstract>In this paper we introduce a class of formal grammars with special measures capable to describe typical syntactic inconsistencies in free word order languages. By means of these measures it is possible to characterize more precisely the problems connected with the task of building a robust parser or a grammar checker of Czech.</abstract>
@@ -293,7 +293,7 @@
       <title>Parsers Optimization for Wide-coverage Unification-based Grammars using the Restriction Technique</title>
       <author><first>Nora</first><last>La Serna</last></author>
       <author><first>Arantxa</first><last>Díaz</last></author>
-      <author><first>Horacio</first><last>Rodríguez</last></author>
+      <author id="horacio-rodriguez"><first>Horacio</first><last>Rodríguez</last></author>
       <pages>249-250</pages>
       <url hash="a0ff81c7">1997.iwpt-1.31</url>
       <abstract>This article describes the methodology we have followed in order to improve the efficiency of a parsing algorithm for wide coverage unification-based grammars. The technique used is the restriction technique (Shieber 85), which has been recognized as an important operation to obtain efficient parsers for unification-based grammars. The main objective of the research is how to choose appropriate restrictors for using the restriction technique. We have developed a statistical model for selecting restrictors. Several experiments have been done in order to characterise those restrictors.</abstract>
diff --git a/data/xml/1997.mtsummit.xml b/data/xml/1997.mtsummit.xml
index 605aaf7fab..ba0353e21a 100644
--- a/data/xml/1997.mtsummit.xml
+++ b/data/xml/1997.mtsummit.xml
@@ -12,7 +12,7 @@
     </meta>
     <paper id="1">
       <title>A gentle introduction to <fixed-case>MT</fixed-case>: theory and current practice</title>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <abstract>This tutorial provides a nontechnical introduction to machine translation. It reviews the whole scope of MT, outlining briefly its history and the major application areas today, and describing the various kinds of MT techniques that have been invented—from direct replacement through transfer to the holy grail of interlinguas. It briefly outlines the newest statistics-based techniques and provides an introduction to the difficult questions of MT evaluation. Topics include: History and development of MT; Theoretical foundations of MT; Traditional and modern MT techniques; Newest MT research; Thorny questions of evaluating MT systems</abstract>
       <bibkey>hovy-1997-gentle</bibkey>
     </paper>
@@ -24,7 +24,7 @@
     </paper>
     <paper id="3">
       <title><fixed-case>MT</fixed-case> evaluation: old, new, and recycled</title>
-      <author><first>John</first><last>White</last></author>
+      <author id="john-s-white"><first>John</first><last>White</last></author>
       <abstract>The tutorial addresses the issues peculiar to machine translation evaluation, namely the difficulty in determining what constitutes correct translation, and which types of evaluation are the most meaningful for evaluation "consumers." The tutorial is structured around evaluation methods designed for particular purposes: types of MT design, stages in the development lifecycle, and intended end-use of a system that includes MT. It will provide an overview of the issues and classic approaches to MT evaluation. The traditional processes, such as those outlined in the ALPAC report, will be examined for their value historically and in terms of today's environments. The tutorial also provides an insight into the latest evaluation techniques, designed to capture the value of MT systems in the context of current and future automated text handling processes.</abstract>
       <bibkey>white-1997-mt</bibkey>
     </paper>
@@ -55,7 +55,7 @@
     </paper>
     <paper id="2">
       <title>First steps in Mechanical Translation</title>
-      <author><first>John</first><last>Hutchins</last></author>
+      <author id="w-john-hutchins"><first>John</first><last>Hutchins</last></author>
       <pages>14-23</pages>
       <url hash="41bb0eed">1997.mtsummit-plenaries.2</url>
       <abstract>Although the first ideas for mechanical translation were made in the seventeenth century, it was not until this century that means became available for realization with the appearance of the electronic computer in the mid 1940s. Fifty years ago, in March 1947 Warren Weaver wrote to Norbert Wiener and met Andrew Booth, mentioning to both the use of computers for translation. The possibilities were investigated during the next seven years, until in January 1954 the first prototype program was demonstrated. This article is a brief chronicle of these early years of mechanizing translation processes.</abstract>
@@ -78,7 +78,7 @@
     </paper>
     <paper id="5">
       <title>The Fulcrum Approach to Machine Translation</title>
-      <author><first>Christine A.</first><last>Montgomery</last></author>
+      <author id="christine-a-montgomery"><first>Christine A.</first><last>Montgomery</last></author>
       <pages>29-30</pages>
       <url hash="f83da0e7">1997.mtsummit-plenaries.5</url>
       <bibkey>montgomery-1997-fulcrum</bibkey>
@@ -120,7 +120,7 @@
     </paper>
     <paper id="11">
       <title>Machine Translation Through Language Understanding</title>
-      <author><first>Makoto</first><last>Nagao</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
       <pages>41-49</pages>
       <url hash="6ead8fde">1997.mtsummit-plenaries.11</url>
       <abstract>In this paper is described a general framework of a next generation machine translation system which translates a text not sentence by sentence but by considering inter-sentential discourse. The method is a step closer to human translation than the present-day machine translation systems. Particularly important are a detailed discourse analysis and a flexible text generation by using information obtained from the discourse analysis.</abstract>
@@ -128,7 +128,7 @@
     </paper>
     <paper id="12">
       <title>The Current State of Machine Translation</title>
-      <author><first>Harold L.</first><last>Somers</last></author>
+      <author id="harold-somers"><first>Harold L.</first><last>Somers</last></author>
       <pages>115-124</pages>
       <url hash="fc245b8e">1997.mtsummit-plenaries.12</url>
       <abstract>This paper aims to survey the current state of research, development and use of Machine Translation (MT). Under ‘research’ the role of linguistics is discussed, and contrasted with research in ‘analogy- based’ MT. The range of languages covered by MT systems is discussed, and the lack of development for minority languages noted. The new research area of spoken language translation (SLT) is reviewed, with some major differences between SLT and text MT described. Under ‘use and users’ we discuss tools for users: Translation Memory, bilingual concordances and software to help checking for mistranslations. The use of MT on the World Wide Web is also discussed, regarding pre- and post-editing, the impact of ‘controlled language’ is reviewed, and finally a proposal is made that MT users can revise the input text in the light of errors that the system makes, thus ‘post-editing the source text’.</abstract>
@@ -136,7 +136,7 @@
     </paper>
     <paper id="13">
       <title>Whither <fixed-case>MT</fixed-case>?</title>
-      <author><first>Bente</first><last>Maegaard</last></author>
+      <author id="bente-maegaard"><first>Bente</first><last>Maegaard</last></author>
       <pages>191-199</pages>
       <url hash="5b2902ef">1997.mtsummit-plenaries.13</url>
       <abstract>MT started out as a ‘technology push’: more than 50 years ago, researchers had the bright idea of doing translation with the use of the newly developed computers. MT remained in the technology push area for many years. However, in the nineties we are seeing the ‘market pull’ beginning to play a role and there are good reasons to believe that this trend will continue. MT is going where the market and the users wants it to go, and MT will be prospering in the future. MT will be available electronically over the network, and MT will be available in environments which also offer a variety of other tools for translation, as well as tools for other types of information management. Also in research and in development of new technologies, MT will further develop, e.g. along the lines of knowledge-based MT, advanced integration of different analysis techniques (rule-based, statistics-based, etc.), integration with speech etc.</abstract>
@@ -162,7 +162,7 @@
     </paper>
     <paper id="2">
       <title>A Real-Time <fixed-case>MT</fixed-case> System for Translating Broadcast Captions</title>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <author><first>Teruko</first><last>Mitamura</last></author>
       <pages>51-57</pages>
       <url hash="89c36dfc">1997.mtsummit-papers.2</url>
@@ -220,7 +220,7 @@
     </paper>
     <paper id="9">
       <title>User-Friendly Machine Translation: Alternate Translations Based on Differing Beliefs</title>
-      <author><first>David</first><last>Farwell</last></author>
+      <author id="david-farwell"><first>David</first><last>Farwell</last></author>
       <author><first>Stephen</first><last>Helmreich</last></author>
       <pages>125-131</pages>
       <url hash="bcd829de">1997.mtsummit-papers.9</url>
@@ -229,7 +229,7 @@
     </paper>
     <paper id="10">
       <title>Sharable Formats and Their Supporting Environments for Exchanging User Dictionaries among Different <fixed-case>MT</fixed-case> Systems as a Part of <fixed-case>AAMT</fixed-case> Activities</title>
-      <author><first>Shin-ichiro</first><last>Kamei</last></author>
+      <author id="shin-ichiro-kamei"><first>Shin-ichiro</first><last>Kamei</last></author>
       <author><first>Etsuo</first><last>Itoh</last></author>
       <author><first>Mikiko</first><last>Fujii</last></author>
       <author><first>Tokuyuki</first><last>Hirai</last></author>
@@ -429,8 +429,8 @@
     </paper>
     <paper id="9">
       <title>The <fixed-case>DIPLOMAT</fixed-case> Rapid Development Speech <fixed-case>MT</fixed-case> System</title>
-      <author><first>Robert E.</first><last>Frederking</last></author>
-      <author><first>Ralf D.</first><last>Brown</last></author>
+      <author id="robert-frederking"><first>Robert E.</first><last>Frederking</last></author>
+      <author id="ralf-d-brown"><first>Ralf D.</first><last>Brown</last></author>
       <author><first>Christopher</first><last>Hogan</last></author>
       <pages>261-262</pages>
       <url hash="26689a8f">1997.mtsummit-systems.9</url>
@@ -576,18 +576,18 @@
     </paper>
     <paper id="2">
       <title>Associating semantic components with intersective Levin classes</title>
-      <author><first>Hoa Trang</first><last>Dang</last></author>
+      <author id="hoa-trang-dang"><first>Hoa Trang</first><last>Dang</last></author>
       <author><first>Joseph</first><last>Rosenzweig</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>11-18</pages>
       <url hash="7e0cf466">1997.mtsummit-workshop.2</url>
       <bibkey>dang-etal-1997-associating</bibkey>
     </paper>
     <paper id="3">
       <title><fixed-case>S</fixed-case>panish <fixed-case>E</fixed-case>uro<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et and <fixed-case>LCS</fixed-case>-based interlingual <fixed-case>MT</fixed-case></title>
-      <author><first>Bonnie J.</first><last>Dorr</last></author>
-      <author><first>M. Antonia</first><last>Martí</last></author>
-      <author><first>Irene</first><last>Castellón</last></author>
+      <author id="bonnie-dorr"><first>Bonnie J.</first><last>Dorr</last></author>
+      <author id="m-antonia-marti"><first>M. Antonia</first><last>Martí</last></author>
+      <author id="irene-castellon"><first>Irene</first><last>Castellón</last></author>
       <pages>19-31</pages>
       <url hash="a0eb83e7">1997.mtsummit-workshop.3</url>
       <abstract>We present a machine translation framework in which the interlingua— Lexical Conceptual Structure (LCS)—is coupled with a definitional component that includes bilingual (EuroWordNet) links between words in the source and target languages. While the links between individual words are language-specific, the LCS is designed to be a language-independent, compositional representation. We take the view that the two types of information—shallower, transfer-like knowledge as well as deeper, compositional knowledge—can be reconciled in interlingual machine translation, the former for overcoming the intractability of LCS-based lexical selec- tion, and the latter for relating the underlying semantics of two words cross-linguistically. We describe the acquisition process for these two information types and present results of hand-verification of the acquired lexicon. Finally, we demonstrate the utility of the two information types in interlingual MT.</abstract>
@@ -595,8 +595,8 @@
     </paper>
     <paper id="4">
       <title>Toward compact monotonically compositional interlingua using lexical aspect</title>
-      <author><first>Bonnie J.</first><last>Dorr</last></author>
-      <author><first>Mari Broman</first><last>Olsen</last></author>
+      <author id="bonnie-dorr"><first>Bonnie J.</first><last>Dorr</last></author>
+      <author id="mari-broman-olsen"><first>Mari Broman</first><last>Olsen</last></author>
       <author><first>Scott C.</first><last>Thomas</last></author>
       <pages>33-43</pages>
       <url hash="33797218">1997.mtsummit-workshop.4</url>
@@ -605,7 +605,7 @@
     </paper>
     <paper id="5">
       <title>On representing language-specific information in interlingua</title>
-      <author><first>David</first><last>Farwell</last></author>
+      <author id="david-farwell"><first>David</first><last>Farwell</last></author>
       <pages>45-50</pages>
       <url hash="0dc68d48">1997.mtsummit-workshop.5</url>
       <bibkey>farwell-1997-representing</bibkey>
@@ -620,14 +620,14 @@
     <paper id="7">
       <title>Improving the precision of lexicon-to-ontology alignment algorithms</title>
       <author><first>Latifur R.</first><last>Khan</last></author>
-      <author><first>Eduard H.</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard H.</first><last>Hovy</last></author>
       <pages>53-58</pages>
       <url hash="be2490e3">1997.mtsummit-workshop.7</url>
       <bibkey>khan-hovy-1997-improving</bibkey>
     </paper>
     <paper id="8">
       <title>Interlingua developed and utilized in real multilingual <fixed-case>MT</fixed-case> product systems</title>
-      <author><first>Shin-ichiro</first><last>Kamei</last></author>
+      <author id="shin-ichiro-kamei"><first>Shin-ichiro</first><last>Kamei</last></author>
       <author><first>Kazunori</first><last>Muraki</last></author>
       <pages>59-69</pages>
       <url hash="9cfdb8b4">1997.mtsummit-workshop.8</url>
@@ -637,18 +637,18 @@
     <paper id="9">
       <title>Simplification of nomenclature leads to an ideal <fixed-case>IL</fixed-case> for human language communication</title>
       <author><first>Young-Suk</first><last>Lee</last></author>
-      <author><first>Clifford</first><last>Weinstein</last></author>
+      <author id="clifford-j-weinstein"><first>Clifford</first><last>Weinstein</last></author>
       <author><first>Dinesh</first><last>Tummala</last></author>
       <author><first>Linda</first><last>Kukolich</last></author>
-      <author><first>Stephanie</first><last>Seneff</last></author>
+      <author id="stephanie-seneff"><first>Stephanie</first><last>Seneff</last></author>
       <pages>71-72</pages>
       <url hash="afa5547b">1997.mtsummit-workshop.9</url>
       <bibkey>lee-etal-1997-simplification</bibkey>
     </paper>
     <paper id="10">
       <title>The use of pegs computational discourse framework as an interlingua representation</title>
-      <author><first>Susann</first><last>Luperfoy</last></author>
-      <author><first>Keith</first><last>Miller</last></author>
+      <author id="susann-luperfoy"><first>Susann</first><last>Luperfoy</last></author>
+      <author id="keith-j-miller"><first>Keith</first><last>Miller</last></author>
       <pages>73-80</pages>
       <url hash="5ffd180f">1997.mtsummit-workshop.10</url>
       <bibkey>luperfoy-miller-1997-use</bibkey>
@@ -666,8 +666,8 @@
     <paper id="12">
       <title>Enriching lexical transfer with cross-linguistic semantic features or how to do interlingua without interlingua</title>
       <author><first>Alexis</first><last>Nasr</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Joseph</first><last>Rosenzweig</last></author>
       <pages>91-98</pages>
       <url hash="a9150b3b">1997.mtsummit-workshop.12</url>
@@ -675,8 +675,8 @@
     </paper>
     <paper id="13">
       <title>Using <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et to posit hierarchical structure in Levin’s verb classes</title>
-      <author><first>Mari Broman</first><last>Olsen</last></author>
-      <author><first>Bonnie J.</first><last>Dorr</last></author>
+      <author id="mari-broman-olsen"><first>Mari Broman</first><last>Olsen</last></author>
+      <author id="bonnie-dorr"><first>Bonnie J.</first><last>Dorr</last></author>
       <author><first>David J.</first><last>Clark</last></author>
       <pages>99-110</pages>
       <url hash="83be5931">1997.mtsummit-workshop.13</url>
diff --git a/data/xml/1997.tc.xml b/data/xml/1997.tc.xml
index 77da022720..760f44ebd5 100644
--- a/data/xml/1997.tc.xml
+++ b/data/xml/1997.tc.xml
@@ -35,7 +35,7 @@
     </paper>
     <paper id="5">
       <title>Evaluation of Language Tools</title>
-      <author><first>Bente</first><last>Maegaard</last></author>
+      <author id="bente-maegaard"><first>Bente</first><last>Maegaard</last></author>
       <url hash="28d0037d">1997.tc-1.5</url>
       <bibkey>maegaard-1997-evaluation</bibkey>
     </paper>
@@ -77,13 +77,13 @@
     </paper>
     <paper id="12">
       <title>Scania <fixed-case>S</fixed-case>wedish - A Basis for Multilingual Translation</title>
-      <author><first>Anna Sågvall</first><last>Hein</last></author>
+      <author id="anna-sagvall-hein"><first>Anna Sågvall</first><last>Hein</last></author>
       <url hash="6cb45507">1997.tc-1.12</url>
       <bibkey>hein-1997-scania</bibkey>
     </paper>
     <paper id="13">
       <title>Machine Translation and Minority Languages</title>
-      <author><first>Harold</first><last>Somers</last></author>
+      <author id="harold-somers"><first>Harold</first><last>Somers</last></author>
       <url hash="e2d194c0">1997.tc-1.13</url>
       <bibkey>somers-1997-machine</bibkey>
     </paper>
diff --git a/data/xml/1997.tmi.xml b/data/xml/1997.tmi.xml
index dcacda7dbc..fbccef7e75 100644
--- a/data/xml/1997.tmi.xml
+++ b/data/xml/1997.tmi.xml
@@ -11,8 +11,8 @@
     </meta>
     <paper id="1">
       <title>If you have it, flaunt it: using full ontological knowledge for word sense disambiguation</title>
-      <author><first>Kavi</first><last>Mahesh</last></author>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="kavi-mahesh"><first>Kavi</first><last>Mahesh</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <author><first>Stephen</first><last>Beale</last></author>
       <url hash="7413d18d">1997.tmi-1.1</url>
       <bibkey>mahesh-etal-1997-flaunt</bibkey>
@@ -27,7 +27,7 @@
     </paper>
     <paper id="3">
       <title>Looking back to 1952: the first <fixed-case>MT</fixed-case> conference</title>
-      <author><first>John</first><last>Hutchins</last></author>
+      <author id="w-john-hutchins"><first>John</first><last>Hutchins</last></author>
       <url hash="9014ffde">1997.tmi-1.3</url>
       <bibkey>hutchins-1997-looking</bibkey>
     </paper>
@@ -47,7 +47,7 @@
     </paper>
     <paper id="6">
       <title><fixed-case>MT</fixed-case> at the paragraph level: improving <fixed-case>E</fixed-case>nglish synthesis in <fixed-case>SYSTRAN</fixed-case></title>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <author><first>Laurie</first><last>Gerber</last></author>
       <url hash="b9236d24">1997.tmi-1.6</url>
       <bibkey>hovy-gerber-1997-mt</bibkey>
@@ -90,20 +90,20 @@
     <paper id="11">
       <title><fixed-case>E</fixed-case>nglish adverb processing in <fixed-case>J</fixed-case>apanese-to-<fixed-case>E</fixed-case>nglish machine translation</title>
       <author><first>Kentaro</first><last>Ogura</last></author>
-      <author><first>Satoshi</first><last>Shirai</last></author>
+      <author id="satoshi-shirai"><first>Satoshi</first><last>Shirai</last></author>
       <author><first>Francis</first><last>Bond</last></author>
       <url hash="7f780eac">1997.tmi-1.11</url>
       <bibkey>ogura-etal-1997-english</bibkey>
     </paper>
     <paper id="12">
       <title>Language control and machine translation</title>
-      <author><first>Anna</first><last>Sågvall Hein</last></author>
+      <author id="anna-sagvall-hein"><first>Anna</first><last>Sågvall Hein</last></author>
       <url hash="2621fd4e">1997.tmi-1.12</url>
       <bibkey>sagvall-hein-1997-language</bibkey>
     </paper>
     <paper id="13">
       <title>Automated dictionary extraction for “knowledge-free” example-based translation</title>
-      <author><first>Ralf D.</first><last>Brown</last></author>
+      <author id="ralf-d-brown"><first>Ralf D.</first><last>Brown</last></author>
       <url hash="5c7e5cf4">1997.tmi-1.13</url>
       <bibkey>brown-1997-automated</bibkey>
     </paper>
@@ -117,9 +117,9 @@
     <paper id="15">
       <title><fixed-case>T</fixed-case>op<fixed-case>A</fixed-case>lign: word alignment for bilingual corpora based on topical clusters of dictionary entries and translations</title>
       <author><first>Mathis H.</first><last>Chen</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
-      <author><first>Sue J.</first><last>Ker</last></author>
-      <author><first>Jen-Nan</first><last>Chen</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
+      <author id="sue-j-ker"><first>Sue J.</first><last>Ker</last></author>
+      <author id="jen-nan-chen"><first>Jen-Nan</first><last>Chen</last></author>
       <url hash="98732ab2">1997.tmi-1.15</url>
       <bibkey>chen-etal-1997-topalign</bibkey>
     </paper>
@@ -127,14 +127,14 @@
       <title>Error correcting parsing for text-to-text machine translation using finite state models</title>
       <author><first>Juan C.</first><last>Amengual</last></author>
       <author><first>José M.</first><last>Benedí</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
-      <author><first>Asunción</first><last>Castaño</last></author>
-      <author><first>Antonio</first><last>Castellanos</last></author>
-      <author><first>David</first><last>Llorens</last></author>
-      <author><first>Andrés</first><last>Marzal</last></author>
-      <author><first>Federico</first><last>Prat</last></author>
-      <author><first>Enrique</first><last>Vidal</last></author>
-      <author><first>Juan M.</first><last>Vilar</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="asuncion-castano"><first>Asunción</first><last>Castaño</last></author>
+      <author id="antonio-castellanos"><first>Antonio</first><last>Castellanos</last></author>
+      <author id="david-llorens"><first>David</first><last>Llorens</last></author>
+      <author id="andres-marzal"><first>Andrés</first><last>Marzal</last></author>
+      <author id="federico-prat"><first>Federico</first><last>Prat</last></author>
+      <author id="enrique-vidal"><first>Enrique</first><last>Vidal</last></author>
+      <author id="juan-miguel-vilar"><first>Juan M.</first><last>Vilar</last></author>
       <url hash="61d1e55d">1997.tmi-1.16</url>
       <bibkey>amengual-etal-1997-error</bibkey>
     </paper>
@@ -146,20 +146,20 @@
     </paper>
     <paper id="18">
       <title>Word sense disambiguation: why statistics when we have these numbers?</title>
-      <author><first>Kavi</first><last>Mahesh</last></author>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="kavi-mahesh"><first>Kavi</first><last>Mahesh</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <author><first>Stephen</first><last>Beale</last></author>
       <author><first>Evelyne</first><last>Viegas</last></author>
       <author><first>Victor</first><last>Raskin</last></author>
-      <author><first>Boyan</first><last>Onyshkevych</last></author>
+      <author id="boyan-onyshkevych"><first>Boyan</first><last>Onyshkevych</last></author>
       <url hash="471a959f">1997.tmi-1.18</url>
       <bibkey>mahesh-etal-1997-word</bibkey>
     </paper>
     <paper id="19">
       <title>Machine translation using neural networks and finite-state models</title>
-      <author><first>Asunción</first><last>Castaño</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
-      <author><first>Enrique</first><last>Vidal</last></author>
+      <author id="asuncion-castano"><first>Asunción</first><last>Castaño</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="enrique-vidal"><first>Enrique</first><last>Vidal</last></author>
       <url hash="66f7862a">1997.tmi-1.19</url>
       <bibkey>castano-etal-1997-machine</bibkey>
     </paper>
@@ -167,7 +167,7 @@
       <title>A left-to-right breadth-first algorithm for subcategorization frame selection of <fixed-case>J</fixed-case>apanese verbs</title>
       <author><first>Kazunori</first><last>Muraki</last></author>
       <author><first>Shin’ichiro</first><last>Kamei</last></author>
-      <author><first>Shinichi</first><last>Doi</last></author>
+      <author id="shinichi-doi"><first>Shinichi</first><last>Doi</last></author>
       <url hash="101ea194">1997.tmi-1.20</url>
       <bibkey>muraki-etal-1997-left</bibkey>
     </paper>
diff --git a/data/xml/1998.amta.xml b/data/xml/1998.amta.xml
index 06ac963897..9aa3365847 100644
--- a/data/xml/1998.amta.xml
+++ b/data/xml/1998.amta.xml
@@ -14,12 +14,12 @@
     </meta>
     <paper id="1">
       <title><fixed-case>MT</fixed-case> evaluation</title>
-      <author><first>John S.</first><last>White</last></author>
+      <author id="john-s-white"><first>John S.</first><last>White</last></author>
       <bibkey>white-1998-mt</bibkey>
     </paper>
     <paper id="2">
       <title>Survey of methodological approaches to <fixed-case>MT</fixed-case></title>
-      <author><first>Harold</first><last>Somers</last></author>
+      <author id="harold-somers"><first>Harold</first><last>Somers</last></author>
       <bibkey>somers-1998-survey</bibkey>
     </paper>
     <paper id="3">
@@ -29,7 +29,7 @@
     </paper>
     <paper id="4">
       <title>Ontological semantics for knowledge-based <fixed-case>MT</fixed-case></title>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <bibkey>nirenburg-1998-ontological</bibkey>
     </paper>
     <paper id="5">
@@ -39,12 +39,12 @@
     </paper>
     <paper id="6">
       <title>Speech to speech machine translation</title>
-      <author><first>Monika</first><last>Woszczyna</last></author>
+      <author id="monika-woszczyna"><first>Monika</first><last>Woszczyna</last></author>
       <bibkey>woszczyna-1998-speech</bibkey>
     </paper>
     <paper id="7">
       <title>Multilingual text summarization</title>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <author><first>Danel</first><last>Marcu</last></author>
       <bibkey>hovy-marcu-1998-multilingual</bibkey>
     </paper>
@@ -63,7 +63,7 @@
     </meta>
     <paper id="1">
       <title>A seal of approval for <fixed-case>MT</fixed-case> systems</title>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <bibkey>hovy-1998-seal</bibkey>
     </paper>
     <paper id="2">
@@ -73,7 +73,7 @@
     </paper>
     <paper id="3">
       <title>Breaking the quality ceiling</title>
-      <author><first>David</first><last>Farwell</last></author>
+      <author id="david-farwell"><first>David</first><last>Farwell</last></author>
       <bibkey>farwell-1998-breaking</bibkey>
     </paper>
   </volume>
@@ -109,11 +109,11 @@
       <title>A modular approach to spoken language translation for large domains</title>
       <author><first>Monika</first><last>Woszczcyna</last></author>
       <author><first>Matthew</first><last>Broadhead</last></author>
-      <author><first>Donna</first><last>Gates</last></author>
+      <author id="donna-gates"><first>Donna</first><last>Gates</last></author>
       <author><first>Marsal</first><last>Gavaldá</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>31-49</pages>
       <url>https://link.springer.com/chapter/10.1007/3-540-49478-2_3</url>
       <abstract>The MT engine of the JANUS speech-to-speech translation system is designed around four main principles: 1) an interlingua approach that allows the efficient addition of new languages, 2) the use of semantic grammars that yield low cost high quality translations for limited domains, 3) modular grammars that support easy expansion into new domains, and 4) efficient integration of multiple grammars using multi-domain parse lattices and domain re-scoring. Within the framework of the C-STAR-II speech-to-speech translation effort, these principles are tested against the challenge of providing translation for a number of domains and language pairs with the additional restriction of a common interchange format.</abstract>
@@ -121,8 +121,8 @@
     </paper>
     <paper id="4">
       <title>Enhancing automatic acquisition of the thematic structure in a large-scale lexicon for <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese</title>
-      <author><first>Mari Broman</first><last>Olsen</last></author>
-      <author><first>Bonnie J.</first><last>Dorr</last></author>
+      <author id="mari-broman-olsen"><first>Mari Broman</first><last>Olsen</last></author>
+      <author id="bonnie-dorr"><first>Bonnie J.</first><last>Dorr</last></author>
       <author><first>Scott C.</first><last>Thomas</last></author>
       <pages>41-50</pages>
       <url>https://link.springer.com/chapter/10.1007/3-540-49478-2_4</url>
@@ -132,7 +132,7 @@
     <paper id="5">
       <title>Ordering translation templates by assigning confidence factors</title>
       <author><first>Zeynep</first><last>Öz</last></author>
-      <author><first>Ilyas</first><last>Cicekli</last></author>
+      <author id="ilyas-cicekli"><first>Ilyas</first><last>Cicekli</last></author>
       <pages>51-61</pages>
       <url>https://link.springer.com/chapter/10.1007/3-540-49478-2_5</url>
       <abstract>TTL (Translation Template Learner) algorithm learns lexical level correspondences between two translation examples by using analogical reasoning. The sentences used as translation examples have similar and different parts in the source language which must correspond to the similar and different parts in the target language. Therefore these correspondences are learned as translation templates. The learned translation templates are used in the translation of other sentences. However, we need to assign confidence factors to these translation templates to order translation results with respect to previously assigned confidence factors. This paper proposes a method for assigning confidence factors to translation templates learned by the TTL algorithm. Training data is used for collecting statistical information that will be used in confidence factor assignment process. In this process, each template is assigned a confidence factor according to the statistical information obtained from training data. Furthermore, some template combinations are also assigned confidence factors in order to eliminate certain combinations resulting bad translation.</abstract>
@@ -140,7 +140,7 @@
     </paper>
     <paper id="6">
       <title>Quality and robustness in <fixed-case>MT</fixed-case>—<fixed-case>A</fixed-case> balancing act</title>
-      <author><first>Bianka</first><last>Buschbeck-Wolf</last></author>
+      <author id="bianka-buschbeck"><first>Bianka</first><last>Buschbeck-Wolf</last></author>
       <author><first>Michael</first><last>Dorna</last></author>
       <pages>62-71</pages>
       <url>https://link.springer.com/chapter/10.1007/3-540-49478-2_6</url>
@@ -161,7 +161,7 @@
       <author><first>Göklan</first><last>Tür</last></author>
       <author><first>Kemal</first><last>Oflazer</last></author>
       <author><first>Teruko</first><last>Mitamura</last></author>
-      <author><first>Eric H.</first><last>Nyberg, 3rd</last></author>
+      <author id="eric-nyberg"><first>Eric H.</first><last>Nyberg, 3rd</last></author>
       <pages>83-94</pages>
       <url>https://link.springer.com/chapter/10.1007/3-540-49478-2_8</url>
       <abstract>This paper describes the integration of a Turkish generation system with the KANT knowledge-based machine translation system to produce a prototype English-Turkish interlingua-based machine translation system. These two independently constructed systems were successfully integrated within a period of two months, through development of a module which maps KANT interlingua expressions to Turkish syntactic structures. The combined system is able to translate completely and correctly 44 of 52 benchmark sentences in the domain of broadcast news captions. This study is the first known application of knowledge-based machine translation from English to Turkish, and our initial results show promise for future development.</abstract>
@@ -169,8 +169,8 @@
     </paper>
     <paper id="9">
       <title>Rapid prototyping of domain-apecific machine translation systems</title>
-      <author><first>Martha</first><last>Palmer</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <author><first>Alexis</first><last>Nasr</last></author>
       <pages>95-102</pages>
       <url>https://link.springer.com/chapter/10.1007/3-540-49478-2_9</url>
@@ -180,7 +180,7 @@
     <paper id="10">
       <title>An evaluation of the multi-engine <fixed-case>MT</fixed-case> architecture</title>
       <author><first>Christopher</first><last>Hogan</last></author>
-      <author><first>Robert E.</first><last>Frederking</last></author>
+      <author id="robert-frederking"><first>Robert E.</first><last>Frederking</last></author>
       <pages>113-123</pages>
       <url>https://link.springer.com/chapter/10.1007/3-540-49478-2_11</url>
       <abstract>The Multi-Engine MT (MEMT) architecture combines the outputs of multiple MT engines using a statistical language model of the target language. It has been used successfully in a number of MT research systems, for both text and speech translation. Despite its perceived benefits, there has never been a rigorous, published, double-blind evaluation of the claim that the combined output of a MEMT system is in fact better than that of any one of the component MT engines. We report here the results of such an evaluation. The combined MEMT output is shown to indeed be better overall than the output of the component engines in a Croatian ↔ English MT system. This result is consistent in both translation directions, and between different raters.</abstract>
@@ -207,7 +207,7 @@
     <paper id="13">
       <title>Fast document translation for cross-language information retrieval</title>
       <author><first>J.Scott</first><last>McCarley</last></author>
-      <author><first>Salim</first><last>Roukos</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
       <pages>150-157</pages>
       <url>https://link.springer.com/chapter/10.1007/3-540-49478-2_14</url>
       <abstract>We describe a statistical algorithm for machine translation intended to provide translations of large document collections at speeds far in excess of traditional machine translation systems, and of sufficiently high quality to perform information retrieval on the translated document collections. The model is trained from a parallel corpus and is capable of disambiguating senses of words. Information retrieval (IR) experiments on a French language dataset from a recent cross-language information retrieval evaluation yields results superior to those obtained by participants in the evaluation, and confirm the importance of word sense disambiugation in cross-language information retrieval.</abstract>
@@ -239,9 +239,9 @@
     </paper>
     <paper id="17">
       <title>A multilingual procedure for dictionary-based sentence alignment</title>
-      <author><first>Adam</first><last>Meyers</last></author>
+      <author id="adam-meyers"><first>Adam</first><last>Meyers</last></author>
       <author><first>Michiko</first><last>Kosaka</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <pages>187-198</pages>
       <url>https://link.springer.com/chapter/10.1007/3-540-49478-2_18</url>
       <abstract>This paper describes a sentence alignment technique based on a machine readable dictionary. Alignment takes place in a single pass through the text, based on the scores of matches between pairs of source and target sentences. Pairings consisting of sets of matches are evaluated using a version of the Gale-Shapely solution to the stable marriage problem. An algorithm is described which can handle N-to-1 (or 1-to-N) matches, for n ≥ 0, i.e., deletions, 1-to-1 (including scrambling), and 1-to-many matches. A simple frequency based method for acquiring supplemental dictionary entries is also discussed. We achieve high quality alignments using available bilingual dictionaries, both for closely related language pairs (Spanish/English) and more distantly related pairs (Japanese/English).</abstract>
@@ -249,8 +249,8 @@
     </paper>
     <paper id="18">
       <title>Taxonomy and lexical semantics—from the perspective of machine readable dictionary</title>
-      <author><first>Jason S.</first><last>Chang</last></author>
-      <author><first>Sue J.</first><last>Ker</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
+      <author id="sue-j-ker"><first>Sue J.</first><last>Ker</last></author>
       <author><first>Mathis H.</first><last>Chen</last></author>
       <pages>199-212</pages>
       <url>https://link.springer.com/chapter/10.1007/3-540-49478-2_19</url>
@@ -295,7 +295,7 @@
     <paper id="23">
       <title>When Stålhandske becomes Steelglove</title>
       <author><first>Pernilla</first><last>Danielsson</last></author>
-      <author><first>Katarina</first><last>Mühlenbock</last></author>
+      <author id="katarina-heimann-muhlenbock"><first>Katarina</first><last>Mühlenbock</last></author>
       <pages>266-274</pages>
       <url>https://link.springer.com/chapter/10.1007/3-540-49478-2_24</url>
       <abstract>Names can serve several purposes in the field of Machine Translation. The problems range from identifying to processing the various types of names. The paper begins with a short description of the search strategy and then continues with the classification of types into a typology. We present our findings according to degrees of translation from which we highlight clues. These clues indicate a first step towards formalization.</abstract>
@@ -337,7 +337,7 @@
     </paper>
     <paper id="28">
       <title>Twisted pair grammar: support for rapid development of machine translation for low density languages</title>
-      <author><first>Douglas</first><last>Jones</last></author>
+      <author id="douglas-jones"><first>Douglas</first><last>Jones</last></author>
       <author><first>Rick</first><last>Havrilla</last></author>
       <pages>318-332</pages>
       <url>https://link.springer.com/chapter/10.1007/3-540-49478-2_29</url>
@@ -346,9 +346,9 @@
     </paper>
     <paper id="29">
       <title>A thematic hierarchy for efficient generation from lexical-conceptual structure</title>
-      <author><first>Bonnie</first><last>Dorr</last></author>
+      <author id="bonnie-dorr"><first>Bonnie</first><last>Dorr</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <pages>333-343</pages>
       <url>https://link.springer.com/chapter/10.1007/3-540-49478-2_30</url>
       <abstract>This paper describes an implemented algorithm for syntactic realization of a target-language sentence from an interlingual representation called Lexical Conceptual Structure (LCS). We provide a mapping between LCS thematic roles and Abstract Meaning Representation (AMR) relations; these relations serve as input to an off-the-shelf generator (Nitrogen). There are two contributions of this work: (1) the development of a thematic hierarchy that provides ordering information for realization of arguments in their surface positions; (2) the provision of a diagnostic tool for detecting inconsistencies in an existing online LCS-based lexicon that allows us to enhance principles for thematic-role assignment.</abstract>
@@ -356,7 +356,7 @@
     </paper>
     <paper id="30">
       <title>The <fixed-case>LMT</fixed-case> Transformational System</title>
-      <author><first>Michael</first><last>McCord</last></author>
+      <author id="michael-c-mccord"><first>Michael</first><last>McCord</last></author>
       <author><first>Arendse</first><last>Bernth</last></author>
       <pages>344-355</pages>
       <url>https://link.springer.com/chapter/10.1007/3-540-49478-2_31</url>
@@ -375,7 +375,7 @@
     <paper id="32">
       <title>Predicting what <fixed-case>MT</fixed-case> is good for: user judgments and task performance</title>
       <author><first>Kathryn</first><last>Taylor</last></author>
-      <author><first>John</first><last>White</last></author>
+      <author id="john-s-white"><first>John</first><last>White</last></author>
       <pages>364-373</pages>
       <url>https://link.springer.com/chapter/10.1007/3-540-49478-2_33</url>
       <abstract>As part of the Machine Translation (MT) Proficiency Scale project at the US Federal Intelligent Document Understanding Laboratory (FIDUL), Litton PRC is developing a method to measure MT systems in terms of the tasks for which their output may be successfully used. This paper describes the development of a task inventory, i.e., a comprehensive list of the tasks analysts perform with translated material and details the capture of subjective user judgments and insights about MT samples. Also described are the user exercises conducted using machine and human translation samples and the assessment of task performance. By analyzing translation errors, user judgments about errors that interfere with task performance, and user task performance results, we isolate source language patterns which produce output problems. These patterns can then be captured in a single diagnostic test set, to be easily applied to any new Japanese-English system to predict the utility of its output.</abstract>
@@ -408,7 +408,7 @@
     </paper>
     <paper id="36">
       <title>Lexical choice and syntactic generation in a transfer system: transformations in the new <fixed-case>LMT</fixed-case> <fixed-case>E</fixed-case>nglish-<fixed-case>G</fixed-case>erman system</title>
-      <author><first>Claudia</first><last>Gdaniec</last></author>
+      <author id="claudia-gdaniec"><first>Claudia</first><last>Gdaniec</last></author>
       <pages>408-420</pages>
       <url>https://link.springer.com/chapter/10.1007/3-540-49478-2_37</url>
       <abstract>This paper argues that, contrary to received wisdom in the MT research community, a transfer system such as LMT is well suited to deal with most of the problems that MT faces. It may in fact be superior to other approaches in that it can handle target surface-structure constraints, variation of syntactic patterns, discourse-structure constraints, and stylistic preference. The paper describes the linguistic issues involved in LMT’s English⇒German transformational component, its interaction with the lexical transfer component, and types of transformations. It identifies context-dependent and context-independent transformations and among the context-dependent ones, it differentiates between those that are triggered by instructions in the lexicon, by semantic category, by syntactic context, and by setting of stylistic preference. The paper concludes with some examples of divergence between English and German and shows how LMT handles them.</abstract>
@@ -417,7 +417,7 @@
     <paper id="37">
       <title>Translation with finite-state devices</title>
       <author><first>Kevin</first><last>Knight</last></author>
-      <author><first>Yaser</first><last>Al-Onaizan</last></author>
+      <author id="yaser-al-onaizan"><first>Yaser</first><last>Al-Onaizan</last></author>
       <pages>421-437</pages>
       <url>https://link.springer.com/chapter/10.1007/3-540-49478-2_38</url>
       <abstract>Statistical models have recently been applied to machine translation with interesting results. Algorithms for processing these models have not received wide circulation, however. By contrast, general finite-state transduction algorithms have been applied in a variety of tasks. This paper gives a finite-state reconstruction of statistical translation and demonstrates the use of standard tools to compute statistically likely translations. Ours is the first translation algorithm for “fertility/permutation” statistical models to be described in replicable detail.</abstract>
@@ -425,7 +425,7 @@
     </paper>
     <paper id="38">
       <title>Lexical selection for cross-language applications: combining <fixed-case>LCS</fixed-case> with <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et</title>
-      <author><first>Bonnie</first><last>Dorr</last></author>
+      <author id="bonnie-dorr"><first>Bonnie</first><last>Dorr</last></author>
       <author><first>Maria</first><last>Katsova</last></author>
       <pages>438-447</pages>
       <url>https://link.springer.com/chapter/10.1007/3-540-49478-2_39</url>
@@ -435,7 +435,7 @@
     <paper id="39">
       <title>Improving translation quality by manipulating sentence length</title>
       <author><first>Laurie</first><last>Gerber</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>448-460</pages>
       <url>https://link.springer.com/chapter/10.1007/3-540-49478-2_40</url>
       <abstract>Translation systems tend to have more trouble with long sentences than with short ones for a variety of reasons. When the source and target languages differ rather markedly, as do Japanese and English, this problem is reflected in lower quality output. To improve readability, we experimented with automatically splitting long sentences into shorter ones. This paper outlines the problem, describes the sentence splitting procedure and rules, and provides an evaluation of the results.</abstract>
@@ -451,7 +451,7 @@
     </paper>
     <paper id="41">
       <title>A comparative study of query and document translation for cross-language information retrieval</title>
-      <author><first>Douglas W.</first><last>Oard</last></author>
+      <author id="douglas-w-oard"><first>Douglas W.</first><last>Oard</last></author>
       <pages>472-483</pages>
       <url>https://link.springer.com/chapter/10.1007/3-540-49478-2_42</url>
       <abstract>Cross-language retrieval systems use queries in one natural language to guide retrieval of documents that might be written in another. Acquisition and representation of translation knowledge plays a central role in this process. This paper explores the utility of two sources of translation knowledge for cross-language retrieval. We have implemented six query translation techniques that use bilingual term lists and one based on direct use of the translation output from an existing machine translation system; these are compared with a document translation technique that uses output from the same machine translation system. Average precision measures on a TREC collection suggest that arbitrarily selecting a single dictionary translation is typically no less effective than using every translation in the dictionary, that query translation using a machine translation system can achieve somewhat better effectiveness than simpler techniques, and that document translation may result in further improvements in retrieval effectiveness under some conditions.</abstract>
@@ -459,7 +459,7 @@
     </paper>
     <paper id="42">
       <title>Lexicons as gold: mining, embellishment and reuse</title>
-      <author><first>Keith J.</first><last>Miller</last></author>
+      <author id="keith-j-miller"><first>Keith J.</first><last>Miller</last></author>
       <author><first>David M.</first><last>Zajic</last></author>
       <pages>484-493</pages>
       <url>https://link.springer.com/chapter/10.1007/3-540-49478-2_43</url>
diff --git a/data/xml/1998.eamt.xml b/data/xml/1998.eamt.xml
index 0fbe063ddb..b325531b1f 100644
--- a/data/xml/1998.eamt.xml
+++ b/data/xml/1998.eamt.xml
@@ -8,7 +8,7 @@
       <month>April 2–3</month>
       <year>1998</year>
       <url hash="c4182515">1998.eamt-1</url>
-      <editor><first>John</first><last>Hutchins</last></editor>
+      <editor id="w-john-hutchins"><first>John</first><last>Hutchins</last></editor>
       <venue>eamt</venue>
     </meta>
     <frontmatter>
@@ -91,7 +91,7 @@
     <paper id="13">
       <title>Summary of the concluding session</title>
       <author><first>Dimitrios</first><last>Theologitis</last></author>
-      <author><first>Bente</first><last>Maegaard</last></author>
+      <author id="bente-maegaard"><first>Bente</first><last>Maegaard</last></author>
       <url hash="40435532">1998.eamt-1.13</url>
       <bibkey>theologitis-maegaard-1998-summary</bibkey>
     </paper>
diff --git a/data/xml/1998.tc.xml b/data/xml/1998.tc.xml
index 03384bacce..7d8d2e7308 100644
--- a/data/xml/1998.tc.xml
+++ b/data/xml/1998.tc.xml
@@ -11,7 +11,7 @@
     </meta>
     <paper id="1">
       <title>Twenty Years of Translating and the Computer</title>
-      <author><first>John</first><last>Hutchins</last></author>
+      <author id="w-john-hutchins"><first>John</first><last>Hutchins</last></author>
       <url hash="ded30347">1998.tc-1.1</url>
       <bibkey>hutchins-1998-twenty</bibkey>
     </paper>
diff --git a/data/xml/1999.eamt.xml b/data/xml/1999.eamt.xml
index 1db62cbcde..fdf86cc503 100644
--- a/data/xml/1999.eamt.xml
+++ b/data/xml/1999.eamt.xml
@@ -26,13 +26,13 @@
     </paper>
     <paper id="3">
       <title>Experience from translation of <fixed-case>EU</fixed-case> documents</title>
-      <author><first>Gábor</first><last>Prószéky</last></author>
+      <author id="gabor-proszeky"><first>Gábor</first><last>Prószéky</last></author>
       <url hash="ed382a93">1999.eamt-1.3</url>
       <bibkey>proszeky-1999-experience</bibkey>
     </paper>
     <paper id="4">
       <title>Aligning and extracting translation equivalents from <fixed-case>EU</fixed-case> documents - a possible look on <fixed-case>EU</fixed-case> Integration</title>
-      <author><first>Elena</first><last>Paskaleva</last></author>
+      <author id="elena-paskaleva"><first>Elena</first><last>Paskaleva</last></author>
       <bibkey>paskaleva-1999-aligning</bibkey>
     </paper>
     <paper id="5">
@@ -64,21 +64,21 @@
     </paper>
     <paper id="10">
       <title>Translation to and from <fixed-case>R</fixed-case>ussian: the <fixed-case>ETAP</fixed-case> system</title>
-      <author><first>Igor</first><last>Boguslavsky</last></author>
+      <author id="igor-boguslavsky"><first>Igor</first><last>Boguslavsky</last></author>
       <url hash="5fb82f12">1999.eamt-1.10</url>
       <bibkey>boguslavsky-1999-translation</bibkey>
     </paper>
     <paper id="11">
       <title>On intermediate structures and tectogrammatics</title>
-      <author><first>Petr</first><last>Sgall</last></author>
+      <author id="petr-sgall"><first>Petr</first><last>Sgall</last></author>
       <url hash="1778c041">1999.eamt-1.11</url>
       <bibkey>sgall-1999-intermediate</bibkey>
     </paper>
     <paper id="12">
       <title>Machine translation of very closely related languages</title>
-      <author><first>Jan</first><last>Hajič</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
       <author><first>Jan</first><last>Hric</last></author>
-      <author><first>Vladislav</first><last>Kuboň</last></author>
+      <author id="vladislav-kubon"><first>Vladislav</first><last>Kuboň</last></author>
       <bibkey>hajic-etal-1999-machine</bibkey>
     </paper>
     <paper id="13">
@@ -89,13 +89,13 @@
     </paper>
     <paper id="14">
       <title>Automatic translation lexicon extraction from <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>zech parallel texts</title>
-      <author><first>Martin</first><last>Čmejrek</last></author>
+      <author id="martin-cmejrek"><first>Martin</first><last>Čmejrek</last></author>
       <author><first>Jan</first><last>Cuřin</last></author>
       <bibkey>cmejrek-curin-1999-automatic</bibkey>
     </paper>
     <paper id="15">
       <title>Human Language Technologies - possibilities in the <fixed-case>EU</fixed-case> 5th Framework Programme for Research and Technological Development</title>
-      <author><first>Bente</first><last>Maegaard</last></author>
+      <author id="bente-maegaard"><first>Bente</first><last>Maegaard</last></author>
       <bibkey>maegaard-1999-human</bibkey>
     </paper>
     <paper id="16">
@@ -105,7 +105,7 @@
     </paper>
     <paper id="17">
       <title>Post-Workshop commentary: New languages are not virgin languages: <fixed-case>EAMT</fixed-case> ‘99 Workshop from an “eastern” point of view</title>
-      <author><first>Vladislav</first><last>Kuboň</last></author>
+      <author id="vladislav-kubon"><first>Vladislav</first><last>Kuboň</last></author>
       <bibkey>kubon-1999-post</bibkey>
     </paper>
     <paper id="18">
diff --git a/data/xml/1999.mtsummit.xml b/data/xml/1999.mtsummit.xml
index 6677540dc1..2f1b4ffa33 100644
--- a/data/xml/1999.mtsummit.xml
+++ b/data/xml/1999.mtsummit.xml
@@ -44,7 +44,7 @@
     </paper>
     <paper id="5">
       <title>Retrospect and prospect in computer-based translation</title>
-      <author><first>John</first><last>Hutchins</last></author>
+      <author id="w-john-hutchins"><first>John</first><last>Hutchins</last></author>
       <pages>30-36</pages>
       <url hash="34969ada">1999.mtsummit-1.5</url>
       <abstract>At the last MT Summit conference this century, this paper looks back briefly at what has happened in the 50 years since MT began, reviews the present situation, and speculates on what the future may bring. Progress in the basic processes of computerized translation has not been as dramatic as developments in computer technology and software. There is still much scope for the improvement of the linguistic quality of MT output, which hopefully developments in both rule-based and corpus-based methods can bring. Greater impact on the future MT scenario will probably come from the expected huge increase in demand for on-line real-time communication in many languages, where quality may be less important than accessibility and usability.</abstract>
@@ -121,7 +121,7 @@
     </paper>
     <paper id="15">
       <title>Machine translation in <fixed-case>K</fixed-case>orea</title>
-      <author><first>Se-Young</first><last>Park</last></author>
+      <author id="se-young-park"><first>Se-Young</first><last>Park</last></author>
       <author><first>Gil-Rok</first><last>Oh</last></author>
       <pages>100-106</pages>
       <url hash="f7936af6">1999.mtsummit-1.15</url>
@@ -146,7 +146,7 @@
     </paper>
     <paper id="18">
       <title>Translation systems under the <fixed-case>C</fixed-case>-<fixed-case>STAR</fixed-case> framework</title>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>121-124</pages>
       <url hash="ec2475af">1999.mtsummit-1.18</url>
       <abstract>This talk will review our work on Speech Translation under the recent worldwide C-STAR demonstration. C-STAR is the Consortium for Speech Translation Advanced Research and now includes 6 partners and 20 partner/affiliate laboratories around the world. The work demonstrated concludes the second phase of the consortium, which has focused on translating conversational spontaneous speech as opposed to well formed, well structured text. As such, much of the work has focused on exploiting semantic and pragmatic constraints derived from the task domain and dialog situation to produce an understandable translation. Six partners have connected their respective systems with each other and allowed travel related spoken dialogs to provide communication between each of them. A common Interlingua representation was developed and used between the partners to make this multilingual deployment possible. The systems were also complemented by the introduction of Web based shared workspaces that allow one user in one country to communicate pictures, documents, sounds, tables, etc. to the other over the Web while referring to these documents in the dialog. Some of the partners' systems were also deployed in wearable situations, such as a traveler exploring a foreign city. In this case speech and language technology was installed on a wearable computer with a small hand-held display. It was used to provide language translation as well as human-machine information access for the purpose of navigation (using GPS localization) and tour guidance. This combination of human-machine and human-machine-human dialogs could allow a user explore a foreign environment more effectively by resorting to human-machine and human-human dialogs wherever most appropriate.</abstract>
@@ -154,7 +154,7 @@
     </paper>
     <paper id="19">
       <title>A research perspective on how to democratize machine translation and translation aids aiming at high quality final output</title>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <pages>125-133</pages>
       <url hash="097497e7">1999.mtsummit-1.19</url>
       <abstract>Machine Translation (MT) systems and Translation Aids (TA) aiming at cost-effective high quality final translation are not yet usable by small firms, departments and individuals, and handle only a few languages and language pairs. This is due to a variety of reasons, some of them not frequently mentioned. But commercial, technical and cultural reasons make it mandatory to find ways to democratize MT and TA. This goal could be attained by: (1) giving users, free of charge, TA client tools and server resources in exchange for the permission to store and refine on the server linguistic resources produced while using TA; (2) establishing a synergy between MT and TA, in particular by using them jointly in translation projects where translators codevelop the lexical resources specific to MT; (3) renouncing the illusion of fully automatic general purpose high quality MT (FAHQMT) and go for semi-automaticity (SAHQMT), where user participation, made possible by recent technical network-oriented advances, is used to solve ambiguities otherwise computationnally unsolvable due to the impossibility, intractability or cost of accessing the necessary knowledge; (4) adopting a hybrid (symbolic &amp; numerical) and "pivot" approach for MT, where pivot lexemes arc UNL or UNL inspired English-oriented denotations of (sets of) interlingual acceptions or word/term senses, and the rest of the representation of utterances is either fully abstract and interlingual as in UNL, or, less ambitiously but more realistically, obtained by adding to an abstract English multilevel structure features underspecified in English but essential for other languages, including minority languages.</abstract>
@@ -194,7 +194,7 @@
     </paper>
     <paper id="24">
       <title>A scalable cross-language metasearch architecture for multilingual information access on the Web</title>
-      <author><first>Yoshihiko</first><last>Hayashi</last></author>
+      <author id="yoshihiko-hayashi"><first>Yoshihiko</first><last>Hayashi</last></author>
       <author><first>Genichiro</first><last>Kikui</last></author>
       <author><first>Toshiaki</first><last>Iwadera</last></author>
       <pages>157-164</pages>
@@ -204,7 +204,7 @@
     </paper>
     <paper id="25">
       <title>Complementing dictionary-based query translations with corpus statistics for cross-language <fixed-case>IR</fixed-case></title>
-      <author><first>Sung Hyon</first><last>Myaeng</last></author>
+      <author id="sung-hyon-myaeng"><first>Sung Hyon</first><last>Myaeng</last></author>
       <author><first>Mung-Gil</first><last>Jang</last></author>
       <pages>165-174</pages>
       <url hash="268c1fee">1999.mtsummit-1.25</url>
@@ -213,7 +213,7 @@
     </paper>
     <paper id="26">
       <title>Machine translation for the next century</title>
-      <author><first>Jun-ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun-ichi</first><last>Tsujii</last></author>
       <pages>175-176</pages>
       <url hash="6b685444">1999.mtsummit-1.26</url>
       <abstract>The panel intends to pick up some of the issues discussed in the Summit and discuss them further in the final session from broader perspectives. Since the Summit has not even started yet, I will just enumerate in this paper a list of possible perspectives on MT that I hope are relevant to our discussion.</abstract>
@@ -229,7 +229,7 @@
     </paper>
     <paper id="28">
       <title>Sharing dictionaries among <fixed-case>MT</fixed-case> users by common formats and social filtering framework</title>
-      <author><first>Shin-ichiro</first><last>Kamei</last></author>
+      <author id="shin-ichiro-kamei"><first>Shin-ichiro</first><last>Kamei</last></author>
       <pages>180-181</pages>
       <url hash="63260faa">1999.mtsummit-1.28</url>
       <abstract>MT users have to build "user dictionaries" in order to obtain high-quality translation results. However, building dictionaries needs time and labor. In order to meet the speed of the information flow in the global network society, we need to have common formats for sharing dictionaries among different MT systems, and a new way of dictionary authorization, that is "social filtering".</abstract>
@@ -259,9 +259,9 @@
     <paper id="31">
       <title><fixed-case>MT</fixed-case> evaluation</title>
       <author><first>Margaret</first><last>King</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
-      <author><first>Benjamin K.</first><last>Tsou</last></author>
-      <author><first>John</first><last>White</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
+      <author id="benjamin-k-tsou"><first>Benjamin K.</first><last>Tsou</last></author>
+      <author id="john-s-white"><first>John</first><last>White</last></author>
       <author><first>Yusoff</first><last>Zaharin</last></author>
       <pages>197-207</pages>
       <url hash="c7f93d9a">1999.mtsummit-1.31</url>
@@ -270,7 +270,7 @@
     </paper>
     <paper id="32">
       <title>Applying <fixed-case>TDMT</fixed-case> to abstracts on science and technology</title>
-      <author><first>Hideki</first><last>Kashioka</last></author>
+      <author id="hideki-kashioka"><first>Hideki</first><last>Kashioka</last></author>
       <author><first>Hiroko</first><last>Ohta</last></author>
       <author><first>Yoshiko</first><last>Shirokizawa</last></author>
       <author><first>Kazutaka</first><last>Takao</last></author>
@@ -281,8 +281,8 @@
     </paper>
     <paper id="33">
       <title><fixed-case>UNL</fixed-case>-<fixed-case>F</fixed-case>rench deconversion as transfer &amp; generation from an interlingua with possible quality enhancement through offline human interaction</title>
-      <author><first>Gilles</first><last>Sérasset</last></author>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="gilles-serasset"><first>Gilles</first><last>Sérasset</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <pages>220-228</pages>
       <url hash="c052baaf">1999.mtsummit-1.33</url>
       <abstract>We present the architecture of the UNL-French deconverter, which "generates" from the UNL interlingua by first "localizing" the UNL form for French, within UNL, and then applying slightly adapted but classical transfer and generation techniques, implemented in GETA's Ariane-G5 environment, supplemented by some UNL-specific tools. Online interaction can be used during deconversion to enhance output quality and is now used for development purposes. We show how interaction could be delayed and embedded in the postedition phase, which would then interact not directly with the output text, but indirectly with several components of the deconverter. Interacting online or offline can improve the quality not only of the utterance at hand, but also of the utterances processed later, as various preferences may be automatically changed to let the deconverter "learn".</abstract>
@@ -290,13 +290,13 @@
     </paper>
     <paper id="34">
       <title>Solutions to problems inherent in spoken-language translation: the <fixed-case>ATR</fixed-case>-<fixed-case>MATRIX</fixed-case> approach</title>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Setsuo</first><last>Yamada</last></author>
       <author><first>Kazuhide</first><last>Yamamoto</last></author>
-      <author><first>Michael</first><last>Paul</last></author>
-      <author><first>Hideki</first><last>Kashioka</last></author>
+      <author id="michael-paul"><first>Michael</first><last>Paul</last></author>
+      <author id="hideki-kashioka"><first>Hideki</first><last>Kashioka</last></author>
       <author><first>Kai</first><last>Ishikawa</last></author>
-      <author><first>Satoshi</first><last>Shirai</last></author>
+      <author id="satoshi-shirai"><first>Satoshi</first><last>Shirai</last></author>
       <pages>229-235</pages>
       <url hash="9329a1d9">1999.mtsummit-1.34</url>
       <abstract>ATR has built a multi-language speech translation system called ATR-MATRIX. It consists of a spoken-language translation subsystem, which is the focus of this paper, together with a highly accurate speech recognition subsystem and a high-definition speech synthesis subsystem. This paper gives a road map of solutions to the problems inherent in spoken-language translation. Spoken-language translation systems need to tackle difficult problems such as ungrammaticality. contextual phenomena, speech recognition errors, and the high-speeds required for real-time use. We have made great strides towards solving these problems in recent years. Our approach mainly uses an example-based translation model called TDMT. We have added the use of extra-linguistic information, a decision tree learning mechanism, and methods dealing with recognition errors.</abstract>
@@ -314,7 +314,7 @@
     </paper>
     <paper id="36">
       <title>Example-based machine translation based on the synchronous <fixed-case>SSTC</fixed-case> annotation schema</title>
-      <author><first>Mosleh H.</first><last>Al-Adhaileh</last></author>
+      <author id="mosleh-hmoud-al-adhaileh"><first>Mosleh H.</first><last>Al-Adhaileh</last></author>
       <author><first>Tang Enya</first><last>Kong</last></author>
       <pages>244-249</pages>
       <url hash="49c055c0">1999.mtsummit-1.36</url>
@@ -372,11 +372,11 @@
     <paper id="43">
       <title>Study on evaluation of <fixed-case>WWW</fixed-case> <fixed-case>MT</fixed-case> systems</title>
       <author><first>Shinichiro</first><last>Miyazawa</last></author>
-      <author><first>Shoichi</first><last>Yokoyama</last></author>
+      <author id="shoichi-yokoyama"><first>Shoichi</first><last>Yokoyama</last></author>
       <author><first>Masaki</first><last>Matsudaira</last></author>
       <author><first>Akira</first><last>Kumano</last></author>
       <author><first>Shuji</first><last>Kodama</last></author>
-      <author><first>Hideki</first><last>Kashioka</last></author>
+      <author id="hideki-kashioka"><first>Hideki</first><last>Kashioka</last></author>
       <author><first>Yoshiko</first><last>Shirokizawa</last></author>
       <author><first>Yasuo</first><last>Nakajima</last></author>
       <pages>290-298</pages>
@@ -397,7 +397,7 @@
     </paper>
     <paper id="45">
       <title>Machine translation for information access across the language barrier: the <fixed-case>M</fixed-case>u<fixed-case>ST</fixed-case> system</title>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <pages>308-316</pages>
       <url hash="9ea4ce7b">1999.mtsummit-1.45</url>
       <abstract>In this paper we describe the design and implementation of MuST, a multilingual information retrieval, summarization, and translation system. MuST integrates machine translation and other text processing services to enable users to perform cross-language information retrieval using available search services such as commercial Internet search engines. To handle non-standard languages, a new Internet indexing agent can be deployed, specialized local search services can be built, and shallow MT can be added to provide useful functionality. A case study of augmenting MuST with Indonesian is included. MuST adopts ubiquitous web browsers as its primary user interface, and provides tightly integrated automated shallow translation and user biased summarization to help users quickly judge the relevance of documents.</abstract>
@@ -415,7 +415,7 @@
     <paper id="47">
       <title>Interactive <fixed-case>MT</fixed-case> as support for non-native language authoring</title>
       <author><first>Svetlana</first><last>Sheremetyeva</last></author>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <pages>324-330</pages>
       <url hash="162bde19">1999.mtsummit-1.47</url>
       <abstract>The paper describes an approach to developing an interactive MT system for translating technical texts on the example of translating patent claims between Russian and English. The approach conforms to the human-aided machine translation paradigm. The system is meant for a source language (SL) speaker who does not know the target language (TL). It consists of i) an analysis module which includes a submodule of interactive syntactic analysis of SL text and a submodule of fully automated morphological analysis, ii) an automatic module for transferring the lexical and partially syntactic content of SL text into a similar content of the TL text and iii) a fully automated TL text generation module which relies on knowledge about the legal format of TL patent claims. An interactive analysis module guides the user through a sequence of SL analysis procedures, as a result of which the system produces a set of internal knowledge structures which serve as input to the TL text generation. Both analysis and generation rely heavily on the analysis of the sublanguage of patent claims. The model has been developed for English and Russian as both SLs and TLs but is readily extensible to other languages.</abstract>
@@ -451,7 +451,7 @@
     </paper>
     <paper id="51">
       <title>The <fixed-case>ELAN</fixed-case> <fixed-case>S</fixed-case>lovene-<fixed-case>E</fixed-case>nglish aligned corpus</title>
-      <author><first>Tomaz</first><last>Erjavec</last></author>
+      <author id="tomaz-erjavec"><first>Tomaz</first><last>Erjavec</last></author>
       <pages>349-357</pages>
       <url hash="7b1dbba8">1999.mtsummit-1.51</url>
       <abstract>Multilingual parallel corpora are a basic resource for research and development of MT. Such corpora are still scarce, especially for lower-diffusion languages. The paper presents a sentence-aligned tokenised Slovene-English corpus, developed in the scope of the EU ELAN project. The corpus contains 1 million words from fifteen recent terminology-rich texts and is encoded according to the Guidelines for Text Encoding and Interchange (TEI). Our document type definition is a parametrisation of the TEI which directly encodes translation units of the bi-texts. in a manner similar to that of translation memories. The corpus is aimed as a widely-distributable dataset for language engineering and for translation and terminology studies. The paper describes the compilation of the corpus, its composition, encoding and availability. We highlight the corpus acquisition and distribution bottlenecks and present our solutions. These have to do with the workflow in the project, and. not unrelatedly, with the encoding scheme for the corpus.</abstract>
@@ -459,8 +459,8 @@
     </paper>
     <paper id="52">
       <title>Harmonised large-scale syntactic/semantic lexicons: a <fixed-case>E</fixed-case>uropean multilingual infrastructure</title>
-      <author><first>Nicoletta</first><last>Calzolari</last></author>
-      <author><first>Antonio</first><last>Zampolli</last></author>
+      <author id="nicoletta-calzolari"><first>Nicoletta</first><last>Calzolari</last></author>
+      <author id="antonio-zampolli"><first>Antonio</first><last>Zampolli</last></author>
       <pages>358-365</pages>
       <url hash="b6f9e30e">1999.mtsummit-1.52</url>
       <abstract>The paper aims at providing an overview of the situation of Language Resources (LR) in Europe, in particular as emerging from a few European projects regarding the construction of large-scale harmonised resources to be used for many applicative purpose, also of multilingual nature. An important research aspect of the projects is given by the very fact that the large enterprise described is, at our knowledge, the first attempt at developing wide-coverage lexicons for so many languages (12 European languages), with a harmonised common model, and with encoding of structured "semantic types" and semantic (subcategorisation) frames on a large scale. Reaching a common agreed model grounded on sound theoretical approaches within a very large consortium is in itself a challenging task. The actual lexicons will then provide a framework for testing and evaluating the maturity of the current state-of-the-art in lexical semantics grounded on, and connected to. a syntactic foundation. Another research aspect is provided by the recognition of the necessity of accompanying these "static" lexicons with dynamic means of acquiring lexical information from large corpora. This is one of the challenging research aspects of a global strategy for building a large and useful multilingual LR infrastructure.</abstract>
@@ -477,7 +477,7 @@
     <paper id="54">
       <title>A pipelined multi-engine approach to <fixed-case>C</fixed-case>hinese-to-<fixed-case>K</fixed-case>orean machine translation: <fixed-case>MATES</fixed-case>/<fixed-case>CK</fixed-case></title>
       <author><first>Min</first><last>Zhang</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <pages>375-379</pages>
       <url hash="be379ea3">1999.mtsummit-1.54</url>
       <abstract>This paper presents MATES/CK, a Chinese-to-Korean machine translation system. We introduce the design philosophy, component modules, implementation and some other aspects of MATES/CK system in this paper.</abstract>
@@ -499,7 +499,7 @@
       <title>Rapid development of translation tools</title>
       <author><first>Jan</first><last>Amtrup</last></author>
       <author><first>Karine</first><last>Megerdoomian</last></author>
-      <author><first>Remi</first><last>Zajac</last></author>
+      <author id="remi-zajac"><first>Remi</first><last>Zajac</last></author>
       <pages>385-389</pages>
       <url hash="0c9f3260">1999.mtsummit-1.56</url>
       <abstract>The Computing Research Laboratory is currently developing technologies that allow rapid deployment of automatic translation capabilities. These technologies are designed to handle low-density languages for which resources, be that human informants or data in electronically readable form, are scarce. All tools are built in an incremental fashion, such that some simple tools (a bilingual dictionary or a glosser) can be delivered early in the development to support initial analysis tasks. More complex applications can be fielded in successive functional versions. The technology we demonstrate has first been applied to Persian-English machine translation within the Shiraz project and is currently extended to cover languages such as Arabic, Japanese, Korean and others.</abstract>
@@ -507,7 +507,7 @@
     </paper>
     <paper id="57">
       <title>The use of abstracted knowledge from an automatically sense-tagged corpus for lexical transfer ambiguity resolution</title>
-      <author><first>Hui-Feng</first><last>Li</last></author>
+      <author id="huifeng-li"><first>Hui-Feng</first><last>Li</last></author>
       <author><first>Namwon Heo. Kyounghi</first><last>Moon</last></author>
       <author><first>Jong-Hyeok</first><last>Lee</last></author>
       <pages>390-396</pages>
@@ -545,9 +545,9 @@
     </paper>
     <paper id="61">
       <title>Using a target language model for domain independent lexical disambiguation</title>
-      <author><first>Jim</first><last>Cowie</last></author>
+      <author id="jim-cowie"><first>Jim</first><last>Cowie</last></author>
       <author><first>Yevgeny</first><last>Ludovik</last></author>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <pages>417-420</pages>
       <url hash="2bee322d">1999.mtsummit-1.61</url>
       <abstract>In this paper we describe a lexical disambiguation algorithm based on a statistical language model we call maximum likelihood disambiguation. The maximum likelihood method depends solely on the target language. The model was trained on a corpus of American English newspaper texts. Its performance was tested using output from a transfer based translation system between Turkish and English. The method is source language independent, and can be used for systems translating from any language into English.</abstract>
@@ -565,7 +565,7 @@
       <title>Compound noun decomposition using a <fixed-case>M</fixed-case>arkov model</title>
       <author><first>Jongwoo</first><last>Lee</last></author>
       <author><first>Byoung-Tak</first><last>Zhang</last></author>
-      <author><first>Yung Taek</first><last>Kim</last></author>
+      <author id="yung-taek-kim"><first>Yung Taek</first><last>Kim</last></author>
       <pages>427-431</pages>
       <url hash="9c495276">1999.mtsummit-1.63</url>
       <abstract>A statistical method for compound noun decomposition is presented. Previous studies on this problem showed some statistical information are helpful. But applying statistical information was not so systemic that performance depends heavily on the algorithm and some algorithms usually have many separated steps. In our work statistical information is collected from manually decomposed compound noun corpus to build a Markov model for composition. Two Markov chains representing statistical information are assumed independent: one for the sequence of participants' lengths and another for the sequence of participants ' features. Besides Markov assumptions, least participants preference assumption also is used. These two assumptions enable the decomposition algorithm to be a kind of conditional dynamic programming so that efficient and systemic computation can be performed. When applied to test data of size 5027, we obtained a precision of 98.4%.</abstract>
@@ -576,9 +576,9 @@
       <author><first>Sung-Kwon</first><last>Choi</last></author>
       <author><first>Taewan</first><last>Kim</last></author>
       <author><first>Sanghwa</first><last>Yuh</last></author>
-      <author><first>Han-Min</first><last>Jung</last></author>
+      <author id="han-min-jung"><first>Han-Min</first><last>Jung</last></author>
       <author><first>Chul-Min</first><last>Sim</last></author>
-      <author><first>Sang-Kyu</first><last>Park</last></author>
+      <author id="sang-kyu-park"><first>Sang-Kyu</first><last>Park</last></author>
       <pages>432-437</pages>
       <url hash="edf1c3f5">1999.mtsummit-1.64</url>
       <abstract>The previous English-Korean MT system that have been developed in Korea have dealt with only written text as translation object. Most of them enumerated a following list of the problems that had not seemed to be easy to solve in the near future : 1) processing of non-continuous idiomatic expressions 2) reduction of too many POS or structural ambiguities 3) robust processing for long sentence and parsing failure 4) selecting correct word correspondence between several alternatives. The problems can be considered as important factors that have influence on the translation quality of machine translation system. This paper describes not only the solutions of problems of the previous English-to-Korean machine translation systems but also the HTML tags management between two structurally different languages, English and Korean. Through the solutions we translate successfully English web documents into Korean one in the English-to-Korean web translator "FromTo/Web-EK" which has been developed from 1997.</abstract>
@@ -624,7 +624,7 @@
       <author><first>Byong-Rae</first><last>Ryu</last></author>
       <author><first>Youngkil</first><last>Kim</last></author>
       <author><first>Sanghwa</first><last>Yuh</last></author>
-      <author><first>Sangkyu</first><last>Park</last></author>
+      <author id="sang-kyu-park"><first>Sangkyu</first><last>Park</last></author>
       <pages>469-475</pages>
       <url hash="63fab4a5">1999.mtsummit-1.69</url>
       <abstract>In this paper we describe and experimentally evaluate FromTo K/E, a rule-based Korean-English machine translation system adapting transfer methodology. In accordance with the view that a successful Korean-English machine translation system presumes a highly efficient robust Korean parser, we develop a parser reinforced with "Fail Softening", i.e. the long sentence segmentation and the recovery of failed parse trees. To overcome the language-typological differences between Korean and English, we adopt a powerful module for processing Korean multi-word lexemes and Korean idiomatic expressions. Prior to parsing Korean sentences, furthermore, we try to resolve the ambiguity of words with unknown grammatical functions on the basis of the collocation and subcategorization information. The results of the experimental evaluation show that the degree of understandability for sample 2000 sentences amounts to 2.67, indicating that the meaning of the translated English sentences is almost clear to users, but the sentences still include minor grammatical or stylistic errors up to max. 30% of the whole words.</abstract>
@@ -648,7 +648,7 @@
     </paper>
     <paper id="72">
       <title><fixed-case>WEBTRAN</fixed-case>: a controlled language machine translation system for building multilingual services on <fixed-case>I</fixed-case>nternet</title>
-      <author><first>Aarno</first><last>Lehtola</last></author>
+      <author id="aarno-lehtola"><first>Aarno</first><last>Lehtola</last></author>
       <author><first>Jarno</first><last>Tenni</last></author>
       <author><first>Catherine</first><last>Bounsaythip</last></author>
       <author><first>Kristiina</first><last>Jaaranen</last></author>
@@ -658,7 +658,7 @@
     </paper>
     <paper id="73">
       <title>Improvement of translation quality of <fixed-case>E</fixed-case>nglish newspaper headlines by automatic preediting</title>
-      <author><first>Takehiko</first><last>Yoshimi</last></author>
+      <author id="takehiko-yoshimi"><first>Takehiko</first><last>Yoshimi</last></author>
       <author><first>Ichiko</first><last>Sata</last></author>
       <pages>496-500</pages>
       <url hash="2c6ce8c8">1999.mtsummit-1.73</url>
@@ -668,7 +668,7 @@
     <paper id="74">
       <title>Transfer in experience-guided machine translation</title>
       <author><first>Gang</first><last>Zhao</last></author>
-      <author><first>Junichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Junichi</first><last>Tsujii</last></author>
       <pages>501-508</pages>
       <url hash="d32f14aa">1999.mtsummit-1.74</url>
       <abstract>Experience-Guided Machine Translation (EGMT) seeks to represent the translators' knowledge of translation as experiences and translates by analogy. The transfer in EGMT finds the experiences most similar to a new text and its parts, segments it into units of translation and translates them by analogy to the experiences and then assembles them into a whole. A research prototype of analogical transfer from Chinese to English is built to prove the viability of the approach in the exploration of new architecture of machine translation. The paper discusses how the experiences are represented and selected with respect to a new text. It describes how units of translation are defined, partial translation is derived and composed into a whole.</abstract>
@@ -677,7 +677,7 @@
     <paper id="75">
       <title>Example-based machine translation of part-of-speech tagged sentences by recursive division</title>
       <author><first>Tantely</first><last>Andriamanankasina</last></author>
-      <author><first>Kenji</first><last>Araki</last></author>
+      <author id="kenji-araki"><first>Kenji</first><last>Araki</last></author>
       <author><first>Koji</first><last>Tochinai</last></author>
       <pages>509-517</pages>
       <url hash="c1c7eca0">1999.mtsummit-1.75</url>
@@ -703,7 +703,7 @@
     </paper>
     <paper id="78">
       <title>Sources of linguistic knowledge for minority languages</title>
-      <author><first>Harold L.</first><last>Somers</last></author>
+      <author id="harold-somers"><first>Harold L.</first><last>Somers</last></author>
       <pages>531-537</pages>
       <url hash="2c2457b0">1999.mtsummit-1.78</url>
       <abstract>Language Engineering (LE) products and resources for the world’s “major” languages are steadily increasing, but there remains a major gap as regards less widely-used languages. This paper considers the current situation regarding LE resources for some of the languages in question, and some proposals for rectifying this situation are made, including techniques based on adapting existing resources and “knowledge extraction” techniques from machine-readable corpora.</abstract>
@@ -712,7 +712,7 @@
     <paper id="79">
       <title><fixed-case>BITS</fixed-case>: a method for bilingual text search over the Web</title>
       <author><first>Xiaoyi</first><last>Ma</last></author>
-      <author><first>Mark Y.</first><last>Liberman</last></author>
+      <author id="mark-liberman"><first>Mark Y.</first><last>Liberman</last></author>
       <pages>538-542</pages>
       <url hash="e8c5613a">1999.mtsummit-1.79</url>
       <abstract>Parallel corpus are valuable resource for machine translation, multi-lingual text retrieval, language education and other applications, but for various reasons, its availability is very limited at present. Noticed that the World Word Web is a potential source to mine parallel text, researchers are making their efforts to explore the Web in order to get a big collection of bitext. This paper presents BITS (Bilingual Internet Text Search), a system which harvests multilingual texts over the World Wide Web with virtually no human intervention. The technique is simple, easy to port to any language pairs, and with high accuracy. The results of the experiments on German-English pair proved that the method is very successful.</abstract>
@@ -738,9 +738,9 @@
     </paper>
     <paper id="82">
       <title>A new approach to the translating telephone</title>
-      <author><first>Robert</first><last>Frederking</last></author>
+      <author id="robert-frederking"><first>Robert</first><last>Frederking</last></author>
       <author><first>Christopher</first><last>Hogan</last></author>
-      <author><first>Alexander</first><last>Rudnicky</last></author>
+      <author id="alexander-rudnicky"><first>Alexander</first><last>Rudnicky</last></author>
       <pages>556-563</pages>
       <url hash="63ba9b66">1999.mtsummit-1.82</url>
       <abstract>The Translating Telephone has been a major goal of speech translation for many years. Previous approaches have attempted to work from limited-domain, fully-automatic translation towards broad-coverage, fully-automatic translation. We are approaching the problem from a different direction: starting with a broad-coverage but not fully-automatic system, and working towards full automation. We believe that working in this direction will provide us with better feedback, by observing users and collecting language data under realistic conditions, and thus may allow more rapid progress towards the same ultimate goal. Our initial approach relies on the wide-spread availability of Internet connections and web browsers to provide a user interface. We describe our initial work, which is an extension of the Diplomat wearable speech translator.</abstract>
@@ -756,13 +756,13 @@
     </paper>
     <paper id="84">
       <title>Quantitative evaluation of machine translation using two-way <fixed-case>MT</fixed-case></title>
-      <author><first>Shoichi</first><last>Yokoyama</last></author>
+      <author id="shoichi-yokoyama"><first>Shoichi</first><last>Yokoyama</last></author>
       <author><first>Akira</first><last>Kumano</last></author>
       <author><first>Masaki</first><last>Matsudaira</last></author>
       <author><first>Yoshiko</first><last>Shirokizawa</last></author>
       <author><first>Mutsumi</first><last>Kawagoe</last></author>
       <author><first>Shuji</first><last>Kodama</last></author>
-      <author><first>Hideki</first><last>Kashioka</last></author>
+      <author id="hideki-kashioka"><first>Hideki</first><last>Kashioka</last></author>
       <author><first>Terumasa</first><last>Ehara</last></author>
       <author><first>Shinichiro</first><last>Miyazawa</last></author>
       <author><first>Yasuo</first><last>Nakajima</last></author>
@@ -773,9 +773,9 @@
     </paper>
     <paper id="85">
       <title>Task-based evaluation for machine translation</title>
-      <author><first>Jennifer B.</first><last>Doyon</last></author>
+      <author id="jennifer-doyon"><first>Jennifer B.</first><last>Doyon</last></author>
       <author><first>Kathryn B.</first><last>Taylor</last></author>
-      <author><first>John S.</first><last>White</last></author>
+      <author id="john-s-white"><first>John S.</first><last>White</last></author>
       <pages>574-578</pages>
       <url hash="1d4b0552">1999.mtsummit-1.85</url>
       <abstract>In an effort to reduce the subjectivity, cost, and complexity of evaluation methods for machine translation (MT) and other language technologies, task-based assessment is examined as an alternative to metrics-based in human judgments about MT, i.e., the previously applied adequacy, fluency, and informativeness measures. For task-based evaluation strategies to be employed effectively to evaluate languageprocessing technologies in general, certain key elements must be known. Most importantly, the objectives the technology’s use is expected to accomplish must be known, the objectives must be expressed as tasks that accomplish the objectives, and then successful outcomes defined for the tasks. For MT, task-based evaluation is correlated to a scale of tasks, and has as its premise that certain tasks are more forgiving of errors than others. In other words, a poor translation may suffice to determine the general topic of a text, but may not permit accurate identification of participants or the specific event. The ordering of tasks according to their tolerance for errors, as determined by actual task outcomes provided in this paper, is the basis of a scale and repeatable process by which to measure MT systems that has advantages over previous methods.</abstract>
@@ -843,7 +843,7 @@
     <paper id="92">
       <title>Linking translation memories with example-based machine translation</title>
       <author><first>Michael</first><last>Carl</last></author>
-      <author><first>Silvia</first><last>Hansen</last></author>
+      <author id="silvia-hansen-schirra"><first>Silvia</first><last>Hansen</last></author>
       <pages>617-624</pages>
       <url hash="cabc89e6">1999.mtsummit-1.92</url>
       <abstract>The paper reports on experiments which compare the translation outcome of three corpus-based MT systems, a string-based translation memory (STM), a lexeme-based translation memory (LTM) and the example-based machine translation (EBMT) system EDGAR. We use a fully automatic evaluation method to compare the outcome of each MT system and discuss the results. We investigate the benefits for the linkage of different MT strategies such as TMsystems and EBMT systems.</abstract>
@@ -859,8 +859,8 @@
     </paper>
     <paper id="94">
       <title>Resolving category ambiguity of non-text symbols in <fixed-case>M</fixed-case>andarin text</title>
-      <author><first>Feng-Long</first><last>Hwang</last></author>
-      <author><first>Ming-Shing</first><last>Yu</last></author>
+      <author id="feng-long-huang"><first>Feng-Long</first><last>Hwang</last></author>
+      <author id="ming-shing-yu"><first>Ming-Shing</first><last>Yu</last></author>
       <pages>633-640</pages>
       <url hash="5946beb0">1999.mtsummit-1.94</url>
       <bibkey>hwang-yu-1999-resolving</bibkey>
@@ -876,7 +876,7 @@
     </paper>
     <paper id="96">
       <title>A multilevel framework for incremental development of <fixed-case>MT</fixed-case> systems</title>
-      <author><first>Remi</first><last>Zajac</last></author>
+      <author id="remi-zajac"><first>Remi</first><last>Zajac</last></author>
       <pages>646-653</pages>
       <url hash="cbf10ad5">1999.mtsummit-1.96</url>
       <abstract>We describe a Machine Translation framework aimed at the rapid development of large scale robust machine translation systems for assimilation purposes, where the MT system is incorporated as one of the tools in an analyst’s workstation. The multilevel architecture of the system is designed to enable early delivery of functional translation capabilities and incremental improvement of quality. A crucial aspect of the framework is a careful articulation of a software architecture, a linguistic architecture and an incremental development process of linguistic knowledge.</abstract>
diff --git a/data/xml/1999.tc.xml b/data/xml/1999.tc.xml
index b2fb7d6f64..40f4b183f5 100644
--- a/data/xml/1999.tc.xml
+++ b/data/xml/1999.tc.xml
@@ -43,7 +43,7 @@
     </paper>
     <paper id="6">
       <title>A Multi-level Framework for Memory-Based Translation Aid Tools</title>
-      <author><first>Stelios</first><last>Piperidis</last></author>
+      <author id="stelios-piperidis"><first>Stelios</first><last>Piperidis</last></author>
       <author><first>Christos</first><last>Malavazos</last></author>
       <author><first>Ioannis</first><last>Triantafyllou</last></author>
       <url hash="b05e55db">1999.tc-1.6</url>
@@ -75,7 +75,7 @@
     </paper>
     <paper id="11">
       <title>A Building Blocks Approach to Translation Memory</title>
-      <author><first>Kevin</first><last>McTait</last></author>
+      <author id="kevin-mctait"><first>Kevin</first><last>McTait</last></author>
       <author><first>Maeve</first><last>Olohan</last></author>
       <author><first>Arturo</first><last>Trujillo</last></author>
       <url hash="d03d18d7">1999.tc-1.11</url>
@@ -83,7 +83,7 @@
     </paper>
     <paper id="12">
       <title><fixed-case>MABL</fixed-case>e: A Multi-lingual Authoring Tool for Business Letters</title>
-      <author><first>John</first><last>Tait</last></author>
+      <author id="john-tait"><first>John</first><last>Tait</last></author>
       <author><first>Jeremy</first><last>Ellman</last></author>
       <author><first>Diomidis</first><last>Spinelis</last></author>
       <url hash="88c76e32">1999.tc-1.12</url>
@@ -104,8 +104,8 @@
     <paper id="15">
       <title>Integrating Translation Technologies Using <fixed-case>SALT</fixed-case></title>
       <author><first>Gerhard</first><last>Budin</last></author>
-      <author><first>Alan K.</first><last>Melby</last></author>
-      <author><first>Sue Ellen</first><last>Wright</last></author>
+      <author id="alan-k-melby"><first>Alan K.</first><last>Melby</last></author>
+      <author id="sue-ellen-wright"><first>Sue Ellen</first><last>Wright</last></author>
       <author><first>Deryle</first><last>Lonsdale</last></author>
       <author><first>Arle</first><last>Lommel</last></author>
       <url hash="4185d1f1">1999.tc-1.15</url>
diff --git a/data/xml/1999.tmi.xml b/data/xml/1999.tmi.xml
index 6c27ad9795..af2632b3f9 100644
--- a/data/xml/1999.tmi.xml
+++ b/data/xml/1999.tmi.xml
@@ -10,7 +10,7 @@
     </meta>
     <paper id="1">
       <title>Mental spaces, space builders and bilingual summarization of news reports</title>
-      <author><first>Barbara</first><last>Gawronska</last></author>
+      <author id="barbara-gawronska"><first>Barbara</first><last>Gawronska</last></author>
       <author><first>Jaana</first><last>Anttila</last></author>
       <author><first>Dan-Ivar</first><last>Jacobsson</last></author>
       <url hash="e5b4b5bb">1999.tmi-1.1</url>
@@ -26,7 +26,7 @@
     </paper>
     <paper id="3">
       <title>Adding linguistic knowledge to a lexical example-based translation system</title>
-      <author><first>Ralf D.</first><last>Brown</last></author>
+      <author id="ralf-d-brown"><first>Ralf D.</first><last>Brown</last></author>
       <url hash="09b1d004">1999.tmi-1.3</url>
       <bibkey>brown-1999-adding</bibkey>
     </paper>
@@ -42,7 +42,7 @@
     <paper id="5">
       <title>Learning, forgetting and remembering: statistical support for rule-based <fixed-case>MT</fixed-case></title>
       <author><first>Oliver</first><last>Streiter</last></author>
-      <author><first>Leonid L.</first><last>Iomdin</last></author>
+      <author id="leonid-iomdin"><first>Leonid L.</first><last>Iomdin</last></author>
       <author><first>Munpyo</first><last>Hong</last></author>
       <author><first>Ute</first><last>Hauck</last></author>
       <url hash="7a13cb75">1999.tmi-1.5</url>
@@ -67,8 +67,8 @@
     </paper>
     <paper id="8">
       <title>Bilingual clustering using monolingual algorithms</title>
-      <author><first>Sergio</first><last>Barrachina</last></author>
-      <author><first>Juan Miguel</first><last>Vilar</last></author>
+      <author id="sergio-barrachina"><first>Sergio</first><last>Barrachina</last></author>
+      <author id="juan-miguel-vilar"><first>Juan Miguel</first><last>Vilar</last></author>
       <url hash="babb8846">1999.tmi-1.8</url>
       <bibkey>barrachina-vilar-1999-bilingual</bibkey>
     </paper>
@@ -81,7 +81,7 @@
     </paper>
     <paper id="10">
       <title>A language-neutral sparse-data algorithm for extracting translation patterns</title>
-      <author><first>Kevin</first><last>McTait</last></author>
+      <author id="kevin-mctait"><first>Kevin</first><last>McTait</last></author>
       <author><first>Arturo</first><last>Trujillo</last></author>
       <url hash="f1b3b5af">1999.tmi-1.10</url>
       <bibkey>mctait-trujillo-1999-language</bibkey>
@@ -101,13 +101,13 @@
     </paper>
     <paper id="13">
       <title>Errors of omission in translation</title>
-      <author><first>Graham</first><last>Russell</last></author>
+      <author id="graham-russell"><first>Graham</first><last>Russell</last></author>
       <url hash="64aeedeb">1999.tmi-1.13</url>
       <bibkey>russell-1999-errors</bibkey>
     </paper>
     <paper id="14">
       <title>Profiling translation projects: an essential part of routing translations</title>
-      <author><first>Nancy L.</first><last>Underwood</last></author>
+      <author id="nancy-underwood"><first>Nancy L.</first><last>Underwood</last></author>
       <author><first>Bart</first><last>Jongejan</last></author>
       <url hash="911992c7">1999.tmi-1.14</url>
       <bibkey>underwood-jongejan-1999-profiling</bibkey>
@@ -115,8 +115,8 @@
     <paper id="15">
       <title>Lexical selection with a target language monolingual corpus and an <fixed-case>MRD</fixed-case></title>
       <author><first>Hyun Ah</first><last>Lee</last></author>
-      <author><first>Jong C.</first><last>Park</last></author>
-      <author><first>Gil Chang</first><last>Kim</last></author>
+      <author id="jong-c-park"><first>Jong C.</first><last>Park</last></author>
+      <author id="gil-chang-kim"><first>Gil Chang</first><last>Kim</last></author>
       <url hash="2336e687">1999.tmi-1.15</url>
       <bibkey>lee-etal-1999-lexical</bibkey>
     </paper>
@@ -152,14 +152,14 @@
     </paper>
     <paper id="20">
       <title>Argument status in <fixed-case>J</fixed-case>apanese verb sense disambiguation</title>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Hozumi</first><last>Tanaka</last></author>
       <url hash="2b17e751">1999.tmi-1.20</url>
       <bibkey>baldwin-tanaka-1999-argument</bibkey>
     </paper>
     <paper id="21">
       <title>A valency dictionary architecture for Machine Translation</title>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Francis</first><last>Bond</last></author>
       <author><first>Ben</first><last>Hutchinson</last></author>
       <url hash="811df81f">1999.tmi-1.21</url>
@@ -168,7 +168,7 @@
     <paper id="22">
       <title>Multiple strategies for automatic disambiguation in technical translation</title>
       <author><first>Teruko</first><last>Mitamura</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <author><first>Enrique</first><last>Torrejon</last></author>
       <author><first>Robert</first><last>Igo</last></author>
       <url hash="da52789f">1999.tmi-1.22</url>
@@ -177,7 +177,7 @@
     <paper id="23">
       <title>Pipelined multi-engine Machine Translation: accomplishment of <fixed-case>MATES</fixed-case>/<fixed-case>CK</fixed-case> system</title>
       <author><first>Min</first><last>Zhang</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <url hash="df0e6e54">1999.tmi-1.23</url>
       <bibkey>zhang-choi-1999-pipelined-multi</bibkey>
     </paper>
diff --git a/data/xml/2000.amta.xml b/data/xml/2000.amta.xml
index d2496498c0..fb7db418dc 100644
--- a/data/xml/2000.amta.xml
+++ b/data/xml/2000.amta.xml
@@ -12,18 +12,18 @@
     </meta>
     <paper id="1">
       <title>Ontological semantics</title>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <bibkey>nirenburg-2000-ontological</bibkey>
     </paper>
     <paper id="2">
       <title>A gentle introduction to <fixed-case>MT</fixed-case>: theory and current practice</title>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <bibkey>hovy-2000-gentle</bibkey>
     </paper>
     <paper id="3">
       <title>Controlled languages</title>
       <author><first>Teruko</first><last>Mitamura</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <bibkey>mitamura-nyberg-2000-controlled</bibkey>
     </paper>
     <paper id="4">
@@ -39,7 +39,7 @@
     <paper id="6">
       <title><fixed-case>MT</fixed-case>ranslatability</title>
       <author><first>Arendse</first><last>Bernth</last></author>
-      <author><first>Claudia</first><last>Gdaniec</last></author>
+      <author id="claudia-gdaniec"><first>Claudia</first><last>Gdaniec</last></author>
       <bibkey>bernth-gdaniec-2000-mtranslatability</bibkey>
     </paper>
   </volume>
@@ -55,8 +55,8 @@
     </meta>
     <paper id="1">
       <title>Building a <fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish mapping between verb concepts for multilingual applications</title>
-      <author><first>Bonnie J.</first><last>Dorr</last></author>
-      <author><first>Gina-Anne</first><last>Levow</last></author>
+      <author id="bonnie-dorr"><first>Bonnie J.</first><last>Dorr</last></author>
+      <author id="gina-anne-levow"><first>Gina-Anne</first><last>Levow</last></author>
       <author><first>Dekang</first><last>Lin</last></author>
       <pages>1-12</pages>
       <url>https://link.springer.com/chapter/10.1007/3-540-39965-8_1</url>
@@ -84,7 +84,7 @@
     <paper id="4">
       <title>A self-learning method of parallel texts alignment</title>
       <author><first>António</first><last>Ribeiro</last></author>
-      <author><first>Gabriel</first><last>Lopes</last></author>
+      <author id="gabriel-lopes"><first>Gabriel</first><last>Lopes</last></author>
       <author><first>João</first><last>Mexia</last></author>
       <pages>30-39</pages>
       <url>https://link.springer.com/chapter/10.1007/3-540-39965-8_4</url>
@@ -93,11 +93,11 @@
     </paper>
     <paper id="5">
       <title>Handling structural divergences and recovering dropped arguments in a <fixed-case>K</fixed-case>orean/<fixed-case>E</fixed-case>nglish machine translation system</title>
-      <author><first>Chung-hye</first><last>Han</last></author>
-      <author><first>Benoit</first><last>Lavoie</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
-      <author><first>Richard</first><last>Kittredge</last></author>
+      <author id="chung-hye-han"><first>Chung-hye</first><last>Han</last></author>
+      <author id="benoit-lavoie"><first>Benoit</first><last>Lavoie</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
+      <author id="richard-kittredge"><first>Richard</first><last>Kittredge</last></author>
       <author><first>Tanya</first><last>Korelsky</last></author>
       <author><first>Nari</first><last>Kim</last></author>
       <author><first>Myunghee</first><last>Kim</last></author>
@@ -109,11 +109,11 @@
     <paper id="6">
       <title>A machine translation system from <fixed-case>E</fixed-case>nglish to <fixed-case>A</fixed-case>merican <fixed-case>S</fixed-case>ign <fixed-case>L</fixed-case>anguage</title>
       <author><first>Liwei</first><last>Zhao</last></author>
-      <author><first>Karin</first><last>Kipper</last></author>
+      <author id="karin-kipper"><first>Karin</first><last>Kipper</last></author>
       <author><first>William</first><last>Schuler</last></author>
       <author><first>Christian</first><last>Vogler</last></author>
       <author><first>Norman</first><last>Badler</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>54-67</pages>
       <url>https://link.springer.com/chapter/10.1007/3-540-39965-8_6</url>
       <abstract>Research in computational linguistics, computer graphics and autonomous agents has led to the development of increasingly sophisticated communicative agents over the past few years, bringing new perspective to machine translation research. The engineering of language- based smooth, expressive, natural-looking human gestures can give us useful insights into the design principles that have evolved in natural communication between people. In this paper we prototype a machine translation system from English to American Sign Language (ASL), taking into account not only linguistic but also visual and spatial information associated with ASL signs.</abstract>
@@ -138,7 +138,7 @@
     <paper id="9">
       <title>The effect of source analysis on translation confidence</title>
       <author><first>Arendse</first><last>Bernth</last></author>
-      <author><first>Michael C.</first><last>McCord</last></author>
+      <author id="michael-c-mccord"><first>Michael C.</first><last>McCord</last></author>
       <pages>89-99</pages>
       <url>https://link.springer.com/chapter/10.1007/3-540-39965-8_9</url>
       <abstract>Translations produced by an MT system can automatically be assigned a number that reflects the MT system’s confidence in their quality. We describe the design of such a confidence index, with focus on the contribution of source analysis, which plays a crucial role in many MT systems, including ours. Various problematic areas of source analysis are identified, and their impact on the overall confidence index is given. We will describe two methods of training the confidence index, one by hand-tuning of the heuristics, the other by linear regression analysis.</abstract>
@@ -146,7 +146,7 @@
     </paper>
     <paper id="10">
       <title>Contemplating automatic <fixed-case>MT</fixed-case> evaluation</title>
-      <author><first>John S.</first><last>White</last></author>
+      <author id="john-s-white"><first>John S.</first><last>White</last></author>
       <pages>100-108</pages>
       <url>https://link.springer.com/chapter/10.1007/3-540-39965-8_10</url>
       <abstract>Researchers, developers, translators and information consumers all share the problem that there is no accepted standard for machine translation. The problem is much further confounded by the fact that MT evaluations properly done require a considerable commitment of time and resources, an anachronism in this day of cross-lingual information processing when new MT systems may developed in weeks instead of years. This paper surveys the needs addressed by several of the classic “types” of MT, and speculates on ways that each of these types might be automated to create relevant, near-instantaneous evaluation of approaches and systems.</abstract>
@@ -155,7 +155,7 @@
     <paper id="11">
       <title>How are you doing? A look at <fixed-case>MT</fixed-case> evaluation</title>
       <author><first>Michelle</first><last>Vanni</last></author>
-      <author><first>Florence</first><last>Reeder</last></author>
+      <author id="florence-reeder"><first>Florence</first><last>Reeder</last></author>
       <pages>109-116</pages>
       <url>https://link.springer.com/chapter/10.1007/3-540-39965-8_11</url>
       <abstract>Machine Translation evaluation has been more magic and opinion than science. The history of MT evaluation is long and checkered - the search for objective, measurable, resource-reduced methods of evaluation continues. A recent trend towards task-based evaluation inspires the question - can we use methods of evaluation of language competence in language learners and apply them reasonably to MT evaluation? This paper is the first in a series of steps to look at this question. In this paper, we will present the theoretical framework for our ideas, the notions we ultimately aim towards and some very preliminary results of a small experiment along these lines.</abstract>
@@ -163,9 +163,9 @@
     </paper>
     <paper id="12">
       <title>Recycling annotated parallel corpora for bilingual document composition</title>
-      <author><first>Arantza</first><last>Casillas</last></author>
+      <author id="arantza-casillas"><first>Arantza</first><last>Casillas</last></author>
       <author><first>Joseba</first><last>Abaitua</last></author>
-      <author><first>Raquel</first><last>Martínez</last></author>
+      <author id="raquel-martinez"><first>Raquel</first><last>Martínez</last></author>
       <pages>117-126</pages>
       <url>https://link.springer.com/chapter/10.1007/3-540-39965-8_12</url>
       <abstract>Parallel corpora enriched with descriptive annotations facilitate multilingual authoring development. Departing from an annotated bitext we show how SGML markup can be recycled to produce complementary language resources. On the one hand, several translation memory databases together with glossaries of proper nouns have been produced. On the other, DTDs for source and target documents have been derived and put into correspondence. This paper discusses how these resources have been automatically generated and applied to an interactive bilingual authoring system. This tool is capable of handling a substantial proportion of text both in the composition and translation of structured documents.</abstract>
@@ -182,7 +182,7 @@
     <paper id="14">
       <title>What’s been forgotten in translation memory</title>
       <author><first>Elliott</first><last>Macklovitch</last></author>
-      <author><first>Graham</first><last>Russell</last></author>
+      <author id="graham-russell"><first>Graham</first><last>Russell</last></author>
       <pages>137-146</pages>
       <url>https://link.springer.com/chapter/10.1007/3-540-39965-8_14</url>
       <abstract>Although undeniably useful for the translation of certain types of repetitive document, current translation memory technology is limited by the rudimentary techniques employed for approximate matching. Such systems, moreover, incorporate no real notion of a document, since the databases that underlie them are essentially composed of isolated sentence strings. As a result, current TM products can only exploit a small portion of the knowledge residing in translators’ past production. This paper examines some of the changes that will have to be implemented if the technology is to be made more widely applicable.</abstract>
@@ -190,7 +190,7 @@
     </paper>
     <paper id="15">
       <title>Understanding politics by studying weather: a cognitive approach to representation of <fixed-case>P</fixed-case>olish verbs of motion, appearance, and existence</title>
-      <author><first>Barbara</first><last>Gawronska</last></author>
+      <author id="barbara-gawronska"><first>Barbara</first><last>Gawronska</last></author>
       <author><first>Hannah</first><last>Duczak</last></author>
       <pages>147-157</pages>
       <url>https://link.springer.com/chapter/10.1007/3-540-39965-8_15</url>
@@ -200,7 +200,7 @@
     <paper id="16">
       <title>Small but efficient: the misconception of high-frequency words in <fixed-case>S</fixed-case>candinavian translation</title>
       <author><first>Pernilla</first><last>Danielsson</last></author>
-      <author><first>Katarina</first><last>Mühlenbock</last></author>
+      <author id="katarina-heimann-muhlenbock"><first>Katarina</first><last>Mühlenbock</last></author>
       <pages>158-168</pages>
       <url>https://link.springer.com/chapter/10.1007/3-540-39965-8_16</url>
       <abstract>Machine translation has proved itself to be easier between languages that are closely related, such as German and English, while far apart languages, such as Chinese and English, encounter much more problems. The present study focuses upon Swedish and Norwegian; two languages so closely related that they would be referred to as dialects if it were not for the fact that they had a Royal house and an army connected to each of them. Despite their similarity though, some differences make the translation phase much less straight-forward than what could be expected. Taking the outset in sentence aligned parallel texts, this study aims at highlighting some of the differences, and to formalise the results. In order to do so, the texts have been aligned on smaller units, by a simple cognate alignment method. Not at all surprising, the longer words were easier to align, while shorter and often high-frequent words became a problem. Also when trying to align to a specific word sense in a dictionary, content words rendered better results. Therefore, we abandoned the use of single-word units, and searched for multi-word units whenever possible. This study reinforces the view that Machine Translation should rest upon methods based on multiword unit searches.</abstract>
@@ -211,7 +211,7 @@
       <author><first>Violetta</first><last>Cavalli-Sforza</last></author>
       <author><first>Krzysztof</first><last>Czuba</last></author>
       <author><first>Teruko</first><last>Mitamura</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <pages>169-178</pages>
       <url>https://link.springer.com/chapter/10.1007/3-540-39965-8_17</url>
       <abstract>We describe our experience in adapting an existing high- quality, interlingual, unidirectional machine translation system to a new domain and bidirectional translation for a new language pair (English and Italian). We focus on the interlingua design changes which were necessary to achieve high quality output in view of the language mismatches between English and Italian. The representation we propose contains features that are interpreted differently, depending on the translation direction. This decision simplified the process of creating the interlingua for individual sentences, and allows the system to defer mapping of language-specific features (such as tense and aspect), which are realized when the target syntactic feature structure is created. We also describe a set of problems we encountered in translating modal verbs, and discuss the representation of modality in our interlingua.</abstract>
@@ -220,7 +220,7 @@
     <paper id="18">
       <title>Text meaning representation as a basis for representation of text interpretation</title>
       <author><first>Stephen</first><last>Helmreich</last></author>
-      <author><first>David</first><last>Farwell</last></author>
+      <author id="david-farwell"><first>David</first><last>Farwell</last></author>
       <pages>179-188</pages>
       <url>https://link.springer.com/chapter/10.1007/3-540-39965-8_18</url>
       <abstract>In this paper we propose a representation for what we have called an interpretation of a text. We base this representation on TMR (Text Meaning Representation), an interlingual representation developed for Machine Translation purposes. A TMR consists of a complex feature-value structure, with the feature names and filler values drawn from an ontology, in this case, ONTOS, developed concurrently with TMR. We suggest on the basis of previous work, that a representation of an interpretation of a text must build on a TMR structure for the text in several ways: (1) by the inclusion of additional required features and feature values (which may themselves be complex feature structures); (2) by pragmatically filling in empty slots in the TMR structure itself; and (3) by supporting the connections between feature values by including, as part of the TMR itself, the chains of inferencing that link various parts of the structure.</abstract>
@@ -247,7 +247,7 @@
     </paper>
     <paper id="2">
       <title>The <fixed-case>KANTOO</fixed-case> machine translation environment</title>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <author><first>Teruko</first><last>Mitamura</last></author>
       <pages>192-195</pages>
       <url>https://link.springer.com/chapter/10.1007/3-540-39965-8_20</url>
@@ -313,7 +313,7 @@
     <paper id="1">
       <title>Is <fixed-case>MT</fixed-case> software documentation appropriate for <fixed-case>MT</fixed-case> users?</title>
       <author><first>David</first><last>Mowatt</last></author>
-      <author><first>Harold</first><last>Somers</last></author>
+      <author id="harold-somers"><first>Harold</first><last>Somers</last></author>
       <pages>223-238</pages>
       <url>https://link.springer.com/chapter/10.1007/3-540-39965-8_26</url>
       <abstract>This paper discusses an informal methodology for evaluating Machine Translation software documentation with reference to a case study, in which a number of currently available MT packages are evaluated. Different types of documentation style are discussed, as well as different user profiles. It is found that documentation is often inadequate in identifying the level of linguistic background and knowledge necessary to use translation software, and in explaining technical (linguistic) terms needed to use the software effectively. In particular, the level of knowledge and training needed to use the software is often incompatible with the user profile implied by the documentation. Also, guidance on how to perform more complex tasks, which may be especially idiosyncratic, is often inadequate or missing altogether.</abstract>
@@ -332,13 +332,13 @@
     <paper id="3">
       <title>Machine translation systems: <fixed-case>E</fixed-case>-K, K-<fixed-case>E</fixed-case>, <fixed-case>J</fixed-case>-K, K-<fixed-case>J</fixed-case></title>
       <author><first>Yu Seop</first><last>Kim</last></author>
-      <author><first>Sung Dong</first><last>Kim</last></author>
+      <author id="sung-dong-kim"><first>Sung Dong</first><last>Kim</last></author>
       <author><first>Seong Bae</first><last>Park</last></author>
       <author><first>Jong Woo</first><last>Lee</last></author>
       <author><first>Jeong Ho</first><last>Chang</last></author>
       <author><first>Kyu Baek</first><last>Hwang</last></author>
       <author><first>Min O</first><last>Jang</last></author>
-      <author><first>Yung Taek</first><last>Kim</last></author>
+      <author id="yung-taek-kim"><first>Yung Taek</first><last>Kim</last></author>
       <pages>248-251</pages>
       <url>https://link.springer.com/chapter/10.1007/3-540-39965-8_28</url>
       <abstract>We present four kinds of machine translation system in this description: E-K (English to Korean), K-E (Korean to English), J-K (Japanese to Korean), K-J (Korean to Japanese). Among these, E-K and K-J translation systems are published commercially, and the other systems have finished their development. This paper describes the structure and function of each system with figures and translation results.</abstract>
diff --git a/data/xml/2000.bcs.xml b/data/xml/2000.bcs.xml
index 2ef5c37f8a..ae9452150f 100644
--- a/data/xml/2000.bcs.xml
+++ b/data/xml/2000.bcs.xml
@@ -11,16 +11,16 @@
     <paper id="1">
       <title>Towards memory and template-based translation synthesis</title>
       <author><first>Christos</first><last>Malavazos</last></author>
-      <author><first>Stelios</first><last>Piperidis</last></author>
-      <author><first>George</first><last>Carayannis</last></author>
+      <author id="stelios-piperidis"><first>Stelios</first><last>Piperidis</last></author>
+      <author id="george-carayannis"><first>George</first><last>Carayannis</last></author>
       <url hash="c58eea30">2000.bcs-1.1</url>
       <bibkey>malavazos-etal-2000-towards</bibkey>
     </paper>
     <paper id="2">
       <title>Building a lexicon for an <fixed-case>E</fixed-case>nglish-<fixed-case>B</fixed-case>asque <fixed-case>MT</fixed-case> system from heterogeneous wide-coverage dictionaries</title>
       <author><first>Arantxa</first><last>Diaz de Ilarraza</last></author>
-      <author><first>Aingeru</first><last>Mayor</last></author>
-      <author><first>Kepa</first><last>Sarasola</last></author>
+      <author id="aingeru-mayor"><first>Aingeru</first><last>Mayor</last></author>
+      <author id="kepa-sarasola"><first>Kepa</first><last>Sarasola</last></author>
       <url hash="ba49e0d3">2000.bcs-1.2</url>
       <bibkey>diaz-de-ilarraza-etal-2000-building</bibkey>
     </paper>
@@ -29,7 +29,7 @@
       <author><first>Ioannis</first><last>Triantafyllou</last></author>
       <author><first>Iason</first><last>Demiros</last></author>
       <author><first>Christos</first><last>Malavazos</last></author>
-      <author><first>Stelios</first><last>Piperidis</last></author>
+      <author id="stelios-piperidis"><first>Stelios</first><last>Piperidis</last></author>
       <url hash="c35c83f7">2000.bcs-1.3</url>
       <bibkey>triantafyllou-etal-2000-alignment</bibkey>
     </paper>
@@ -41,8 +41,8 @@
     </paper>
     <paper id="5">
       <title>Effectiveness of layering translation rules based on transition networks in machine translation using inductive learning with genetic algorithms</title>
-      <author><first>Hiroshi</first><last>Echizen-ya</last></author>
-      <author><first>Kenji</first><last>Araki</last></author>
+      <author id="hiroshi-echizen-ya"><first>Hiroshi</first><last>Echizen-ya</last></author>
+      <author id="kenji-araki"><first>Kenji</first><last>Araki</last></author>
       <author><first>Yoshio</first><last>Momouchi</last></author>
       <author><first>Koji</first><last>Tochinai</last></author>
       <url hash="48aa49e2">2000.bcs-1.5</url>
@@ -57,7 +57,7 @@
     </paper>
     <paper id="7">
       <title>Learning machine translation strategies using commercial systems: discovering word reordering rules</title>
-      <author><first>Mikel L.</first><last>Forcada</last></author>
+      <author id="mikel-l-forcada"><first>Mikel L.</first><last>Forcada</last></author>
       <url hash="02387945">2000.bcs-1.7</url>
       <bibkey>forcada-2000-learning</bibkey>
     </paper>
@@ -77,7 +77,7 @@
     </paper>
     <paper id="10">
       <title>An example-based <fixed-case>MT</fixed-case> system in news items domain from <fixed-case>E</fixed-case>nglish to <fixed-case>I</fixed-case>ndian languages</title>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <url hash="43f29f39">2000.bcs-1.10</url>
       <bibkey>bandyopadhyay-2000-example</bibkey>
     </paper>
@@ -85,16 +85,16 @@
       <title><fixed-case>EMILLE</fixed-case>: building a corpus of <fixed-case>S</fixed-case>outh <fixed-case>A</fixed-case>sian languages</title>
       <author><first>Anthony</first><last>McEnery</last></author>
       <author><first>Paul</first><last>Baker</last></author>
-      <author><first>Rob</first><last>Gaizauskas</last></author>
-      <author><first>Hamish</first><last>Cunningham</last></author>
+      <author id="robert-gaizauskas"><first>Rob</first><last>Gaizauskas</last></author>
+      <author id="hamish-cunningham"><first>Hamish</first><last>Cunningham</last></author>
       <url hash="b76add3f">2000.bcs-1.11</url>
       <bibkey>mcenery-etal-2000-emille</bibkey>
     </paper>
     <paper id="12">
       <title>Reusability of wide-coverage linguistic resources in the construction of multilingual technical documentation</title>
       <author><first>Arantxa</first><last>Diaz de Ilarraza</last></author>
-      <author><first>Aingeru</first><last>Mayor</last></author>
-      <author><first>Kepa</first><last>Sarasola</last></author>
+      <author id="aingeru-mayor"><first>Aingeru</first><last>Mayor</last></author>
+      <author id="kepa-sarasola"><first>Kepa</first><last>Sarasola</last></author>
       <url hash="d8fd63e6">2000.bcs-1.12</url>
       <bibkey>diaz-de-ilarraza-etal-2000-reusability</bibkey>
     </paper>
@@ -113,65 +113,65 @@
     </paper>
     <paper id="15">
       <title>Semi-automatic construction of multilingual lexicons</title>
-      <author><first>Lynne</first><last>Cahill</last></author>
+      <author id="lynne-cahill"><first>Lynne</first><last>Cahill</last></author>
       <url hash="02a6acc4">2000.bcs-1.15</url>
       <bibkey>cahill-2000-semi</bibkey>
     </paper>
     <paper id="16">
       <title>Evaluation of statistical tools for automatic extraction of lexical correspondences between parallel texts</title>
-      <author><first>Olivier</first><last>Kraif</last></author>
+      <author id="olivier-kraif"><first>Olivier</first><last>Kraif</last></author>
       <url hash="0c710b19">2000.bcs-1.16</url>
       <bibkey>kraif-2000-evaluation</bibkey>
     </paper>
     <paper id="17">
       <title>Semantic approach to bridging reference resolution</title>
-      <author><first>Rafael</first><last>Muñoz</last></author>
-      <author><first>Maximiliano</first><last>Saiz-Noeda</last></author>
-      <author><first>Armando</first><last>Suárez</last></author>
+      <author id="rafael-munoz"><first>Rafael</first><last>Muñoz</last></author>
+      <author id="maximiliano-saiz-noeda"><first>Maximiliano</first><last>Saiz-Noeda</last></author>
+      <author id="armando-suarez"><first>Armando</first><last>Suárez</last></author>
       <author><first>Manual</first><last>Palomar</last></author>
       <url hash="bd53cf32">2000.bcs-1.17</url>
       <bibkey>munoz-etal-2000-semantic</bibkey>
     </paper>
     <paper id="18">
       <title>Evaluation environment for anaphora resolution</title>
-      <author><first>Catalina</first><last>Barbu</last></author>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="catalina-barbu"><first>Catalina</first><last>Barbu</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <url hash="a980cb90">2000.bcs-1.18</url>
       <bibkey>barbu-mitkov-2000-evaluation</bibkey>
     </paper>
     <paper id="19">
       <title><fixed-case>NLP</fixed-case> system oriented to anaphora resolution</title>
-      <author><first>Maximiliano</first><last>Saiz-Noeda</last></author>
+      <author id="maximiliano-saiz-noeda"><first>Maximiliano</first><last>Saiz-Noeda</last></author>
       <author><first>Manual</first><last>Palomar</last></author>
-      <author><first>David</first><last>Farwell</last></author>
+      <author id="david-farwell"><first>David</first><last>Farwell</last></author>
       <url hash="0bfa0847">2000.bcs-1.19</url>
       <bibkey>saiz-noeda-etal-2000-nlp</bibkey>
     </paper>
     <paper id="20">
       <title><fixed-case>LINGUA</fixed-case>: a robust architecture for text processing and anaphora resolution in <fixed-case>B</fixed-case>ulgarian</title>
-      <author><first>Hristo</first><last>Tanev</last></author>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="hristo-tanev"><first>Hristo</first><last>Tanev</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <url hash="3abf61db">2000.bcs-1.20</url>
       <bibkey>tanev-mitkov-2000-lingua</bibkey>
     </paper>
     <paper id="21">
       <title>Grammar specification for the recognition of temporal expressions</title>
-      <author><first>Estela</first><last>Saquete</last></author>
-      <author><first>Patricio</first><last>Martínez-Barco</last></author>
+      <author id="estela-saquete"><first>Estela</first><last>Saquete</last></author>
+      <author id="patricio-martinez-barco"><first>Patricio</first><last>Martínez-Barco</last></author>
       <url hash="09304a39">2000.bcs-1.21</url>
       <bibkey>saquete-martinez-barco-2000-grammar</bibkey>
     </paper>
     <paper id="22">
       <title><fixed-case>VASISTH</fixed-case>: an ellipsis resolution algorithm for <fixed-case>I</fixed-case>ndian languages</title>
-      <author><first>L.</first><last>Sobha</last></author>
+      <author id="sobha-l"><first>L.</first><last>Sobha</last></author>
       <author><first>B. N.</first><last>Patnaik</last></author>
       <url hash="82eebd58">2000.bcs-1.22</url>
       <bibkey>sobha-patnaik-2000-vasisth</bibkey>
     </paper>
     <paper id="23">
       <title>Generating personal profiles</title>
-      <author><first>Jim</first><last>Cowie</last></author>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="jim-cowie"><first>Jim</first><last>Cowie</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <author><first>Hugo</first><last>Molina-Salgado</last></author>
       <url hash="27ce94fe">2000.bcs-1.23</url>
       <bibkey>cowie-etal-2000-generating</bibkey>
@@ -184,9 +184,9 @@
     </paper>
     <paper id="25">
       <title>Generating from a discourse model</title>
-      <author><first>Rodolfo</first><last>Delmonte</last></author>
+      <author id="rodolfo-delmonte"><first>Rodolfo</first><last>Delmonte</last></author>
       <author><first>Dario</first><last>Bianchi</last></author>
-      <author><first>Emanuele</first><last>Pianta</last></author>
+      <author id="emanuele-pianta"><first>Emanuele</first><last>Pianta</last></author>
       <url hash="927cef67">2000.bcs-1.25</url>
       <bibkey>delmonte-etal-2000-generating</bibkey>
     </paper>
diff --git a/data/xml/2000.eamt.xml b/data/xml/2000.eamt.xml
index de7c28355c..623d6d2226 100644
--- a/data/xml/2000.eamt.xml
+++ b/data/xml/2000.eamt.xml
@@ -11,7 +11,7 @@
     </meta>
     <paper id="1">
       <title>Introduction</title>
-      <author><first>John</first><last>Hutchins</last></author>
+      <author id="w-john-hutchins"><first>John</first><last>Hutchins</last></author>
       <url hash="319574a7">2000.eamt-1.1</url>
       <bibkey>hutchins-2000-introduction</bibkey>
     </paper>
@@ -23,13 +23,13 @@
     </paper>
     <paper id="3">
       <title>Extracting Terms and Terminological Collocations from the <fixed-case>ELAN</fixed-case> <fixed-case>S</fixed-case>lovene–<fixed-case>E</fixed-case>nglish Parallel Corpus</title>
-      <author><first>Špela</first><last>Vintar</last></author>
+      <author id="spela-vintar"><first>Špela</first><last>Vintar</last></author>
       <url hash="0054d4b2">2000.eamt-1.3</url>
       <bibkey>vintar-2000-extracting</bibkey>
     </paper>
     <paper id="4">
       <title>Extracting Textual Associations in Part-of-Speech Tagged Corpora</title>
-      <author><first>Gaël</first><last>Dias</last></author>
+      <author id="gael-dias"><first>Gaël</first><last>Dias</last></author>
       <author><first>Sylvie</first><last>Guilloré</last></author>
       <author><first>José Gabriel</first><last>Pereira Lopes</last></author>
       <url hash="b5abbd3f">2000.eamt-1.4</url>
@@ -37,28 +37,28 @@
     </paper>
     <paper id="5">
       <title>Statistical Machine Translation</title>
-      <author><first>Franz Josef</first><last>Och</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="franz-josef-och"><first>Franz Josef</first><last>Och</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <url hash="67331dd8">2000.eamt-1.5</url>
       <bibkey>och-ney-2000-statistical</bibkey>
     </paper>
     <paper id="6">
       <title><fixed-case>POLENG</fixed-case>–Adjusting a Rule-Based <fixed-case>P</fixed-case>olish–<fixed-case>E</fixed-case>nglish Machine Translation System by Means of Corpus Analysis</title>
       <author><first>Krzysztof</first><last>Jassem</last></author>
-      <author><first>Filip</first><last>Graliński</last></author>
+      <author id="filip-gralinski"><first>Filip</first><last>Graliński</last></author>
       <author><first>Grzegorz</first><last>Krynicki</last></author>
       <url hash="c0e8499c">2000.eamt-1.6</url>
       <bibkey>jassem-etal-2000-poleng</bibkey>
     </paper>
     <paper id="7">
       <title><fixed-case>S</fixed-case>lovene–<fixed-case>E</fixed-case>nglish Datasets for <fixed-case>MT</fixed-case></title>
-      <author><first>Tomaž</first><last>Erjavec</last></author>
+      <author id="tomaz-erjavec"><first>Tomaž</first><last>Erjavec</last></author>
       <url hash="f1d29c51">2000.eamt-1.7</url>
       <bibkey>erjavec-2000-slovene</bibkey>
     </paper>
     <paper id="8">
       <title>The <fixed-case>IAMT</fixed-case> Certification Initiative and Defining Translation System Categories</title>
-      <author><first>John</first><last>Hutchins</last></author>
+      <author id="w-john-hutchins"><first>John</first><last>Hutchins</last></author>
       <url hash="00217b7c">2000.eamt-1.8</url>
       <bibkey>hutchins-2000-iamt</bibkey>
     </paper>
diff --git a/data/xml/2000.iwpt.xml b/data/xml/2000.iwpt.xml
index 4e4b31f501..f4705560d5 100644
--- a/data/xml/2000.iwpt.xml
+++ b/data/xml/2000.iwpt.xml
@@ -43,7 +43,7 @@
     <paper id="2">
       <title>Automatic Grammar Induction: Combining, Reducing and Doing Nothing</title>
       <author><first>Eric</first><last>Brill</last></author>
-      <author><first>John C.</first><last>Henderson</last></author>
+      <author id="john-henderson"><first>John C.</first><last>Henderson</last></author>
       <author><first>Grace</first><last>Ngai</last></author>
       <pages>1-5</pages>
       <url hash="7023653a">2000.iwpt-1.2</url>
@@ -68,9 +68,9 @@
     </paper>
     <paper id="5">
       <title>A Bootstrapping Approach to Parser Development</title>
-      <author><first>Izaskun</first><last>Aldezabal</last></author>
-      <author><first>Koldo</first><last>Gojenola</last></author>
-      <author><first>Kepa</first><last>Sarasola</last></author>
+      <author id="izaskun-aldezabal"><first>Izaskun</first><last>Aldezabal</last></author>
+      <author id="koldo-gojenola"><first>Koldo</first><last>Gojenola</last></author>
+      <author id="kepa-sarasola"><first>Kepa</first><last>Sarasola</last></author>
       <pages>17-28</pages>
       <url hash="ba9b39aa">2000.iwpt-1.5</url>
       <abstract>This paper presents a robust parsing system for unrestricted Basque texts. It analyzes a sentence in two stages: a unification-based parser builds basic syntactic units such as NPs, PPs, and sentential complements, while a finite-state parser performs syntactic disambiguation and filtering of the results. The system has been applied to the acquisition of verbal subcategorization information, obtaining 66% recall and 87% precision in the determination of verb subcategorization instances. This information will be later incorporated to the parser, in order to improve its performance.</abstract>
@@ -78,10 +78,10 @@
     </paper>
     <paper id="6">
       <title>New Tabular Algorithms for Parsing</title>
-      <author><first>Miguel A.</first><last>Alonso</last></author>
+      <author id="miguel-a-alonso"><first>Miguel A.</first><last>Alonso</last></author>
       <author><first>Jorge</first><last>Graña</last></author>
       <author><first>Manuel</first><last>Vilares</last></author>
-      <author><first>Eric</first><last>de la Clergerie</last></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Eric</first><last>de la Clergerie</last></author>
       <pages>29-40</pages>
       <url hash="83d18b19">2000.iwpt-1.6</url>
       <abstract>We develop a set of new tabular parsing algorithms for Linear Indexed Grammars, including bottom-up algorithms and Earley-like algorithms with and without the valid prefix property, creating a continuum in which one algorithm can in turn be derived from another. The output of these algorithms is a shared forest in the form of a context-free grammar that encodes all possible derivations for a given input string.</abstract>
@@ -108,7 +108,7 @@
     <paper id="9">
       <title>Automated Extraction of <fixed-case>TAG</fixed-case>s from the <fixed-case>Penn</fixed-case> <fixed-case>Treebank</fixed-case></title>
       <author><first>John</first><last>Chen</last></author>
-      <author><first>K.</first><last>Vijay-Shanker</last></author>
+      <author id="k-vijay-shanker"><first>K.</first><last>Vijay-Shanker</last></author>
       <pages>65-76</pages>
       <url hash="6373ce40">2000.iwpt-1.9</url>
       <abstract>The accuracy of statistical parsing models can be improved with the use of lexical information. Statistical parsing using Lexicalized tree adjoining grammar (LTAG), a kind of lexicalized grammar, has remained relatively unexplored. We believe that is largely in part due to the absence of large corpora accurately bracketed in terms of a perspicuous yet broad coverage LTAG. Our work attempts to alleviate this difficulty. We extract different LTAGs from the Penn Treebank. We show that certain strategies yield an improved extracted LTAG in terms of compactness, broad coverage, and supertagging accuracy. Furthermore, we perform a preliminary investigation in smoothing these grammars by means of an external linguistic resource, namely, the tree families of an XTAG grammar, a hand built grammar of English.</abstract>
@@ -116,7 +116,7 @@
     </paper>
     <paper id="10">
       <title>From Cases to Rules and Vice Versa: Robust Practical Parsing With Analogy</title>
-      <author><first>Alex Chengyu</first><last>Fang</last></author>
+      <author id="alex-chengyu-fang"><first>Alex Chengyu</first><last>Fang</last></author>
       <pages>77-88</pages>
       <url hash="c2fa7522">2000.iwpt-1.10</url>
       <abstract>This article describes the architecture of the Survey Parser and discusses two major components related to the analogy-based parsing of unrestricted English. Firstly, it discusses the automatic generation of a large declarative formal grammar from a corpus that has been syntactically analysed. Secondly, it describes analogy-based parsing that employs both the automatically learned rules and the database of cases to determine the syntactic structure of the input string. Statistics are presented to characterise the performance of the parsing system.</abstract>
@@ -124,7 +124,7 @@
     </paper>
     <paper id="11">
       <title>A Transformation-based Parsing Technique With Anytime Properties</title>
-      <author><first>Kilian</first><last>Foth</last></author>
+      <author id="kilian-a-foth"><first>Kilian</first><last>Foth</last></author>
       <author><first>Ingo</first><last>Schröder</last></author>
       <author><first>Wolfgang</first><last>Menzel</last></author>
       <pages>89-100</pages>
@@ -134,7 +134,7 @@
     </paper>
     <paper id="12">
       <title><fixed-case>SOUP</fixed-case>: A Parser for Real-world Spontaneous Speech</title>
-      <author><first>Marsal</first><last>Gavaldà</last></author>
+      <author id="marsal-gavalda"><first>Marsal</first><last>Gavaldà</last></author>
       <pages>101-110</pages>
       <url hash="d37adc0b">2000.iwpt-1.12</url>
       <abstract>This paper describes the key features of SOUP, a stochastic, chart-based, top-down parser, especially engineered for real-time analysis of spoken language with very large, multi-domain semantic grammars. SOUP achieves flexibility by encoding context-free grammars, specified for example in the Java Speech Grammar Format, as probabilistic recursive transition networks, and robustness by allowing skipping of input words at any position and producing ranked interpretations that may consist of multiple parse trees. Moreover, SOUP is very efficient, which allows for practically instantaneous backend response.</abstract>
@@ -150,7 +150,7 @@
     </paper>
     <paper id="14">
       <title>A Neural Network Parser that Handles Sparse Data</title>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <pages>123-134</pages>
       <url hash="3ed32612">2000.iwpt-1.14</url>
       <abstract>Previous work has demonstrated the viability of a particular neural network architecture, Simple Synchrony Networks, for syntactic parsing. Here we present additional results on the performance of this type of parser, including direct comparisons on the same dataset with a standard statistical parsing method, Probabilistic Context Free Grammars. We focus these experiments on demonstrating one of the main advantages of the SSN parser over the PCFG, handling sparse data. We use smaller datasets than are typically used with statistical methods, resulting in the PCFG finding parses for under half of the test sentences, while the SSN finds parses for all sentences. Even on the PCFG ‘s parsed half, the SSN performs better than the PCFG, as measure by recall and precision on both constituents and a dependency-like measure.</abstract>
@@ -158,8 +158,8 @@
     </paper>
     <paper id="15">
       <title>A Context-free Approximation of <fixed-case>H</fixed-case>ead-driven <fixed-case>P</fixed-case>hrase <fixed-case>S</fixed-case>tructure <fixed-case>G</fixed-case>rammar</title>
-      <author><first>Bernd</first><last>Kiefer</last></author>
-      <author><first>Hans-Ulrich</first><last>Krieger</last></author>
+      <author id="bernd-kiefer"><first>Bernd</first><last>Kiefer</last></author>
+      <author id="hans-ulrich-krieger"><first>Hans-Ulrich</first><last>Krieger</last></author>
       <pages>135-146</pages>
       <url hash="1ede73be">2000.iwpt-1.15</url>
       <abstract>We present a context-free approximation of unification-based grammars, such as HPSG or PATR-II. The theoretical underpinning is established through a least fixpoint construction over a certain monotonic function. In order to reach a finite fixpoint, the concrete implementation can be parameterized in several ways , either by specifying a finite iteration depth, by using different restrictors, or by making the symbols of the CFG more complex adding annotations a la GPSG. We also present several methods that speed up the approximation process and help to limit the size of the resulting CF grammar.</abstract>
@@ -167,8 +167,8 @@
     </paper>
     <paper id="16">
       <title>Optimal Ambiguity Packing in Context-free Parsers with Interleaved Unification</title>
-      <author><first>Alon</first><last>Lavie</last></author>
-      <author><first>Carolyn Penstein</first><last>Rosé</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
+      <author id="carolyn-rose"><first>Carolyn Penstein</first><last>Rosé</last></author>
       <pages>147-158</pages>
       <url hash="7cc8b864">2000.iwpt-1.16</url>
       <abstract>Ambiguity packing is a well known technique for enhancing the efficiency of context-free parsers. However, in the case of unification-augmented context-free parsers where parsing is interleaved with feature unification, the propagation of feature structures imposes difficulties on the ability of the parser to effectively perform ambiguity packing. We demonstrate that a clever heuristic for prioritizing the execution order of grammar rules and parsing actions can achieve a high level of ambiguity packing that is provably optimal. We present empirical evaluations of the proposed technique, performed with both a Generalized LR parser and a chart parser, that demonstrate its effectiveness.</abstract>
@@ -184,7 +184,7 @@
     </paper>
     <paper id="18">
       <title>Improved Left-corner Chart Parsing for Large Context-free Grammars</title>
-      <author><first>Robert C.</first><last>Moore</last></author>
+      <author id="robert-c-moore"><first>Robert C.</first><last>Moore</last></author>
       <pages>171-182</pages>
       <url hash="d523663b">2000.iwpt-1.18</url>
       <abstract>We develop an improved form of left-corner chart parsing for large context-free grammars, introducing improvements that result in significant speed-ups more compared to previously-known variants of left corner parsing. We also compare our method to several other major parsing approaches, and find that our improved left-corner parsing method outperforms each of these across a range of grammars. Finally, we also describe a new technique for minimizing the extra information needed to efficiently recover parses from the data structures built in the course of parsing.</abstract>
@@ -209,7 +209,7 @@
     </paper>
     <paper id="21">
       <title>An Efficient <fixed-case>LR</fixed-case> Parser Generator for <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars</title>
-      <author><first>Carlos A.</first><last>Prolo</last></author>
+      <author id="carlos-a-prolo"><first>Carlos A.</first><last>Prolo</last></author>
       <pages>207-218</pages>
       <url hash="73000bac">2000.iwpt-1.21</url>
       <abstract>The first published LR algorithm for Tree Adjoining Grammars (TAGs [Joshi and Schabes, 1996]) was due to Schabes and Vijay-Shanker [1990] . Nederhof [1998] showed that it was incorrect (after [Kinyon, 1997]), and proposed a new one. Experimenting with his new algorithm over the XTAG English Grammar [XTAG Research Group, 1998] he concluded that LR parsing was inadequate for use with reasonably sized grammars because the size of the generated table was unmanageable. Also the degree of conflicts is too high. In this paper we discuss issues involved with LR parsing for TAGs and propose a new version of the algorithm that, by maintaining the degree of prediction while deferring the “subtree reduction”, dramatically reduces both the average number of conflicts per state and the size of the parser.</abstract>
@@ -227,7 +227,7 @@
       <title>On the Use of Grammar Based Language Models for Statistical Machine Translation</title>
       <author><first>Hassan</first><last>Sawaf</last></author>
       <author><first>Kai</first><last>Schütz</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>231-241</pages>
       <url hash="99f459e7">2000.iwpt-1.23</url>
       <abstract>In this paper, we describe some concepts of language models beyond the usually used standard trigram and use such language models for statistical machine translation. In statistical machine translation the language model is the a-priori knowledge source of the system about the target language. One important requirement for the language model is the correct word order, given a certain choice of words, and to score the translations generated by the translation model <tex-math>\textrm{Pr}(f_1^J/e^I_1)</tex-math>, in view of the syntactic context. In addition to standard <tex-math>m</tex-math>-grams with long histories, we examine the use of Part-of-Speech based models as well as linguistically motivated grammars with stochastic parsing as a special type of language model. Translation results are given on the VERBMOBIL task, where translation is performed from German to English, with vocabulary sizes of 6500 and 4000 words, respectively.</abstract>
@@ -253,7 +253,7 @@
     <paper id="26">
       <title>Parsing a Lattice with Multiple Grammars</title>
       <author><first>Fuliang</first><last>Weng</last></author>
-      <author><first>Helen</first><last>Meng</last></author>
+      <author id="helen-meng"><first>Helen</first><last>Meng</last></author>
       <author><first>Po Chui</first><last>Luk</last></author>
       <pages>266-277</pages>
       <url hash="06c40834">2000.iwpt-1.26</url>
@@ -262,7 +262,7 @@
     </paper>
     <paper id="27">
       <title>Modular Unification-based Parsers</title>
-      <author><first>Rémi</first><last>Zajac</last></author>
+      <author id="remi-zajac"><first>Rémi</first><last>Zajac</last></author>
       <author><first>Jan</first><last>Amtrup</last></author>
       <pages>278-290</pages>
       <url hash="7cc99606">2000.iwpt-1.27</url>
@@ -296,11 +296,11 @@
     </paper>
     <paper id="31">
       <title>Grammar Organization for Cascade-based Parsing in Information Extraction</title>
-      <author><first>Fabio</first><last>Ciravegna</last></author>
-      <author><first>Alberto</first><last>Lavelli</last></author>
+      <author id="fabio-ciravegna"><first>Fabio</first><last>Ciravegna</last></author>
+      <author id="alberto-lavelli"><first>Alberto</first><last>Lavelli</last></author>
       <pages>297-298</pages>
       <url hash="eb265dbc">2000.iwpt-1.31</url>
-      <abstract/>
+      <abstract></abstract>
       <bibkey>ciravegna-lavelli-2000-grammar</bibkey>
     </paper>
     <paper id="32">
@@ -349,7 +349,7 @@
     </paper>
     <paper id="37">
       <title>Exploiting Parallelism in Unification-based Parsing</title>
-      <author><first>Marcel P.</first><last>van Lohuizen</last></author>
+      <author id="marcel-p-van-lohuizen"><first>Marcel P.</first><last>van Lohuizen</last></author>
       <pages>309-310</pages>
       <url hash="a2a364a4">2000.iwpt-1.37</url>
       <abstract>Because of the nature of the parsing problem, unification-based parsers are hard to parallelize. We present a parallelization technique designed to cope with these difficulties.</abstract>
@@ -367,11 +367,11 @@
     </paper>
     <paper id="39">
       <title>Uniquely Parsable Accepting Grammar Systems</title>
-      <author><first>Carlos</first><last>Martín-Vide</last></author>
+      <author id="carlos-martin-vide"><first>Carlos</first><last>Martín-Vide</last></author>
       <author><first>Victor</first><last>Mitrana</last></author>
       <pages>313-314</pages>
       <url hash="23ccb7b0">2000.iwpt-1.39</url>
-      <abstract/>
+      <abstract></abstract>
       <bibkey>martin-vide-mitrana-2000-uniquely</bibkey>
     </paper>
     <paper id="40">
@@ -416,7 +416,7 @@
     <paper id="44">
       <title>The Editing Distance in Shared Forest</title>
       <author><first>Manuel</first><last>Vilares</last></author>
-      <author><first>David</first><last>Cabrero</last></author>
+      <author id="david-cabrero-souto"><first>David</first><last>Cabrero</last></author>
       <author><first>Francisco J.</first><last>Ribadas</last></author>
       <pages>323-324</pages>
       <url hash="4c3bc888">2000.iwpt-1.44</url>
diff --git a/data/xml/2000.tc.xml b/data/xml/2000.tc.xml
index 1da7c950b2..ae3149bf8b 100644
--- a/data/xml/2000.tc.xml
+++ b/data/xml/2000.tc.xml
@@ -31,13 +31,13 @@
     <paper id="4">
       <title>A Language Checker of Controlled Language and its Integration in a Documentation and Translation Workflow</title>
       <author><first>Ingrid</first><last>Almqvist</last></author>
-      <author><first>Anna Sågvall</first><last>Hein</last></author>
+      <author id="anna-sagvall-hein"><first>Anna Sågvall</first><last>Hein</last></author>
       <url hash="b6ff9920">2000.tc-1.4</url>
       <bibkey>almqvist-hein-2000-language</bibkey>
     </paper>
     <paper id="5">
       <title>Evaluating Machine Translation: the Cloze Procedure Revisited</title>
-      <author><first>Harold</first><last>Somers</last></author>
+      <author id="harold-somers"><first>Harold</first><last>Somers</last></author>
       <author><first>Elizabeth</first><last>Wild</last></author>
       <url hash="3ac488dc">2000.tc-1.5</url>
       <bibkey>somers-wild-2000-evaluating</bibkey>
@@ -88,7 +88,7 @@
     </paper>
     <paper id="13">
       <title>The <fixed-case>EU</fixed-case> <fixed-case>LE</fixed-case>4 <fixed-case>T</fixed-case>rans<fixed-case>R</fixed-case>outer Project</title>
-      <author><first>Reinhard</first><last>Schäler</last></author>
+      <author id="reinhard-schaler"><first>Reinhard</first><last>Schäler</last></author>
       <url hash="6a1007ee">2000.tc-1.13</url>
       <bibkey>schaler-2000-eu</bibkey>
     </paper>
diff --git a/data/xml/2001.jeptalnrecital.xml b/data/xml/2001.jeptalnrecital.xml
index e3198ae44e..51db08dd25 100644
--- a/data/xml/2001.jeptalnrecital.xml
+++ b/data/xml/2001.jeptalnrecital.xml
@@ -48,7 +48,7 @@
     </frontmatter>
     <paper id="1">
       <title>Un corpus français arboré : quelques interrogations</title>
-      <author><first>Anne</first><last>Abeillé</last></author>
+      <author id="anne-abeille"><first>Anne</first><last>Abeillé</last></author>
       <author><first>Lionel</first><last>Clément</last></author>
       <author><first>Alexandra</first><last>Kinyon</last></author>
       <author><first>François</first><last>Toussenel</last></author>
@@ -81,7 +81,7 @@
     </paper>
     <paper id="4">
       <title>Atelier <fixed-case>ATOLL</fixed-case> pour les grammaires d’arbres adjoints</title>
-      <author><first>François</first><last>Barthélemy</last></author>
+      <author id="francois-barthelemy"><first>François</first><last>Barthélemy</last></author>
       <author><first>Pierre</first><last>Boullier</last></author>
       <author><first>Philippe</first><last>Deschamp</last></author>
       <author><first>Linda</first><last>Kaouane</last></author>
@@ -95,7 +95,7 @@
     <paper id="5">
       <title>Modèle d’exploration contextuelle pour l’analyse sémantique de textes</title>
       <author><first>Slim</first><last>Ben Hazez</last></author>
-      <author><first>Jean-Pierre</first><last>Desclés</last></author>
+      <author id="jean-pierre-descles"><first>Jean-Pierre</first><last>Desclés</last></author>
       <author><first>Jean-Luc</first><last>Minel</last></author>
       <pages>73–82</pages>
       <abstract>Nous présentons dans cet article un modèle d’exploration contextuelle et une plate-forme logicielle qui permet d’accéder au contenu sémantique des textes et d’en extraire des séquences particulièrement pertinentes. L’objectif est de développer et d’exploiter des ressources linguistiques pour identifier dans les textes, indépendamment des domaines traités, certaines des relations organisatrices des connaissances ainsi que les organisations discursives mises en places par l’auteur. L’analyse sémantique du texte est guidée par le repérage d’indices linguistiques déclencheurs dont l’emploi est représentatif des notions étudiées.</abstract>
@@ -108,7 +108,7 @@
       <author><first>Romaric</first><last>Besançon</last></author>
       <author><first>Antoine</first><last>Rozenknop</last></author>
       <author><first>Jean-Cédric</first><last>Chappelier</last></author>
-      <author><first>Martin</first><last>Rajman</last></author>
+      <author id="martin-rajman"><first>Martin</first><last>Rajman</last></author>
       <pages>83–91</pages>
       <abstract>Le sujet du présent article est l’intégration des sens portés par les mots en contexte dans une représentation vectorielle de textes, au moyen d’un modèle probabiliste. La représentation vectorielle considérée est le modèle DSIR, qui étend le modèle vectoriel (VS) standard en tenant compte à la fois des occurrences et des co-occurrences de mots dans les documents. L’intégration des sens dans cette représentation se fait à l’aide d’un modèle de Champ de Markov avec variables cachées, en utilisant une information sémantique dérivée de relations de synonymie extraites d’un dictionnaire de synonymes.</abstract>
       <url hash="2196669a">2001.jeptalnrecital-long.6</url>
@@ -148,7 +148,7 @@
     <paper id="10">
       <title>Etiquetage prosodique semi-automatique des corpus oraux</title>
       <author><first>Estelle</first><last>Campione</last></author>
-      <author><first>Jean</first><last>Véronis</last></author>
+      <author id="jean-veronis"><first>Jean</first><last>Véronis</last></author>
       <pages>122–131</pages>
       <abstract>La transcription manuelle de la prosodie est une tâche extrêmement coûteuse en temps, qui requiert des annotateurs très spécialisés, et qui est sujette à de multiples erreurs et une grande part de subjectivité. Une automatisation complète n’est pas envisageable dans l’état actuel de la technologie, mais nous présentons dans cette communication des outils et une méthodologie qui permettent une réduction substantielle du temps d’intervention manuelle, et améliorent l’objectivité et la cohérence du résultat. De plus, les étapes manuelles nécessaires ne demandent pas une expertise phonétique poussée et peuvent être menées à bien par des étudiants et des “linguistes de corpus”.</abstract>
       <url hash="c1072247">2001.jeptalnrecital-long.10</url>
@@ -158,7 +158,7 @@
     <paper id="11">
       <title>Grammaire à substitution d’arbre de complexité polynomiale : un cadre efficace pour <fixed-case>DOP</fixed-case></title>
       <author><first>Jean-Cédric</first><last>Chappelier</last></author>
-      <author><first>Martin</first><last>Rajman</last></author>
+      <author id="martin-rajman"><first>Martin</first><last>Rajman</last></author>
       <pages>132–141</pages>
       <abstract>Trouver l’arbre d’analyse le plus probable dans le cadre du modèle DOP (Data-Oriented Parsing) — une version probabiliste de grammaire à substitution d’arbres développée par R. Bod (1992) — est connu pour être un problème NP-difficile dans le cas le plus général (Sima’an, 1996a). Cependant, si l’on introduit des restrictions a priori sur le choix des arbres élémentaires, on peut obtenir des instances particulières de DOP pour lesquelles la recherche de l’arbre d’analyse le plus probable peut être effectuée en un temps polynomial (par rapport à la taille de la phrase à analyser). La présente contribution se propose d’étudier une telle instance polynomiale de DOP, fondée sur le principe de sélection miminale-maximale et d’en évaluer les performances sur deux corpus différents.</abstract>
       <url hash="13266fb5">2001.jeptalnrecital-long.11</url>
@@ -232,7 +232,7 @@
     </paper>
     <paper id="18">
       <title>Compréhension Automatique de la Parole combinant syntaxe locale et sémantique globale pour une <fixed-case>CHM</fixed-case> portant sur des tâches relativement complexes</title>
-      <author><first>Jérôme</first><last>Goulian</last></author>
+      <author id="jerome-goulian"><first>Jérôme</first><last>Goulian</last></author>
       <author><first>Jean-Yves</first><last>Antoine</last></author>
       <pages>202–211</pages>
       <abstract>Nous présentons dans cet article un système de Compréhension Automatique de la Parole (CAP) tentant de concilier les contraintes antinomiques de robustesse et d’analyse détaillée de la parole spontanée. Dans une première partie, nous montrons l’importance de la mise en oeuvre d’une CAP fine dans l’optique d’une Communication Homme-Machine (CHM) sur des tâches moyennement complexes. Nous présentons ensuite l’architecture de notre système qui repose sur une analyse en deux étapes : une première étape d’analyse syntaxique de surface (Shallow Parsing) générique suivie d’une seconde étape d’analyse sémantico-pragmatique – dépendante du domaine d’application – de la structure profonde de l’ ́enoncé complet.</abstract>
@@ -242,8 +242,8 @@
     </paper>
     <paper id="19">
       <title>Exploitation de l’expertise humaine dans un processus de constitution de terminologie</title>
-      <author><first>Thierry</first><last>Hamon</last></author>
-      <author><first>Adeline</first><last>Nazarenko</last></author>
+      <author id="thierry-hamon"><first>Thierry</first><last>Hamon</last></author>
+      <author id="adeline-nazarenko"><first>Adeline</first><last>Nazarenko</last></author>
       <pages>212–221</pages>
       <abstract>Le processus de construction de terminologie ne peut être entièrement automatisé. Les méthodes et des outils de la terminologie computationnelle permettent de prendre en charge une partie de la tâche, mais l’expertise humaine garde une place prépondérant. Le défi pour les outils terminologiques est de dégrossir les tâches qui sont soit trop longues soit trop complexes pour l’utilisateur tout en permettant à ce dernier d’intégrer ses propres connaissances spécialisées et en lui laissant le contrôle sur la terminologie à construire. Nous montrons ici comment le rôle de cette expertise est pris en compte dans SynoTerm, l’outil d’acquisition de relation de synonymie entre termes que nous avons d ́eveloppé.</abstract>
       <url hash="cf18a247">2001.jeptalnrecital-long.19</url>
@@ -271,7 +271,7 @@
     </paper>
     <paper id="22">
       <title>Récupération de segments sous-phrastiques dans une mémoire de traduction</title>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <author><first>Michel</first><last>Simard</last></author>
       <pages>242–251</pages>
       <abstract>L’utilité des outils d’aide à la traduction reposant sur les mémoires de traduction est souvent limitée par la nature des segments que celles-ci mettent en correspondance, le plus souvent des phrases entières. Cet article examine le potentiel d’un type de système qui serait en mesure de récupérer la traduction de séquences de mots de longueur arbitraire.</abstract>
@@ -308,9 +308,9 @@
     </paper>
     <paper id="26">
       <title><fixed-case>DEFI</fixed-case>, un outil d’aide à la compréhension</title>
-      <author><first>Archibald</first><last>Michiels</last></author>
+      <author id="archibald-michiels"><first>Archibald</first><last>Michiels</last></author>
       <pages>282–292</pages>
-      <abstract/>
+      <abstract></abstract>
       <url hash="1e4605e0">2001.jeptalnrecital-long.26</url>
       <language>fra</language>
       <bibkey>michiels-2001-defi</bibkey>
@@ -326,8 +326,8 @@
     </paper>
     <paper id="28">
       <title>Ontologies for Information Retrieval</title>
-      <author><first>Amalia</first><last>Todiraşcu</last></author>
-      <author><first>François</first><last>Rousselot</last></author>
+      <author id="amalia-todirascu"><first>Amalia</first><last>Todiraşcu</last></author>
+      <author id="francois-rousselot"><first>François</first><last>Rousselot</last></author>
       <pages>303–312</pages>
       <abstract>The paper presents a system for querying (in natural language) a set of text documents from a limited domain. The domain knowledge, represented in description logics (DL), is used for filtering the documents returned as answer and it is extended dynamically (when new concepts are identified in the texts), as result of DL inference mechanisms. The conceptual hierarchy is built semi-automatically from the texts. Concept instances are identified using shallow natural language parsing techniques.</abstract>
       <url hash="a0b03b51">2001.jeptalnrecital-long.28</url>
@@ -358,9 +358,9 @@
     </frontmatter>
     <paper id="1">
       <title>Modèles de langage hiérarchiques pour les applications de dialogue en parole spontanée</title>
-      <author><first>Frédéric</first><last>Béchet</last></author>
+      <author id="frederic-bechet"><first>Frédéric</first><last>Béchet</last></author>
       <author><first>Yannick</first><last>Estève</last></author>
-      <author><first>Renato</first><last>De Mori</last></author>
+      <author id="renato-de-mori"><first>Renato</first><last>De Mori</last></author>
       <pages>325–330</pages>
       <abstract>Le cadre de cette étude concerne les systèmes de dialogue via le téléphone entre un serveur de données et un utilisateur. Nous nous intéresserons au cas de dialogues non contraints où l’utilisateur à toute liberté pour formuler ses requêtes. Généralement, le module de Reconnaissance Automatique de la Parole (RAP) de tels serveurs utilise un seul Modèle de Langage (ML) de type bigramme ou trigramme pour modéliser l’ensemble des interventions possibles de l’utilisateur. Ces ML sont appris sur des corpus de phrases retranscrites à partir de sessions entre le serveur et plusieurs utilisateurs. Nous proposons dans cette étude une méthode de segmentation de corpus d’apprentissage de dialogue utilisant une stratégie mixte basée à la fois sur des connaissances explicites mais aussi sur l’optimisation d’un critère statistique. Nous montrons qu’un gain en terme de perplexité et de taux d’erreurs/mot peut être constaté en utilisant un ensemble de sous modèles de langage issus de la segmentation plutôt qu’un modèle unique appris sur l’ensemble du corpus.</abstract>
       <url hash="39836326">2001.jeptalnrecital-poster.1</url>
@@ -450,8 +450,8 @@
     </paper>
     <paper id="10">
       <title>Gestionnaire de dialogue pour un système d’informations à reconnaissance vocale</title>
-      <author><first>Sophie</first><last>Rosset</last></author>
-      <author><first>Lori</first><last>Lamel</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
+      <author id="lori-lamel"><first>Lori</first><last>Lamel</last></author>
       <pages>381–386</pages>
       <abstract>Dans cet article, nous présentons un gestionnaire de dialogue pour un système de demande d’informations à reconnaissance vocale. Le gestionnaire de dialogue dispose de différentes sources de connaissance, des connaissances statiques et des connaissances dynamiques. Ces connaissances sont gérées et utilisées par le gestionnaire de dialogue via des stratégies. Elles sont mises en oeuvre et organisées en fonction des objectifs concernant le système de dialogue et en fonction des choix ergonomiques que nous avons retenus. Le gestionnaire de dialogue utilise un modèle de dialogue fondé sur la détermination de phases et un modèle de la tâche dynamique. Il augmente les possibilités d’adaptation de la stratégie en fonction des historiques et de l’état du dialogue. Ce gestionnaire de dialogue, implémenté et évalué lors de la dernière campagne d’évaluation du projet LE-3 ARISE, a permi une amélioration du taux de succès de dialogue (de 53% à 85%).</abstract>
       <url hash="168e9022">2001.jeptalnrecital-poster.10</url>
@@ -471,7 +471,7 @@
     <paper id="12">
       <title>Word Sense Disambiguation in a <fixed-case>S</fixed-case>panish Explanatory Dictionary</title>
       <author><first>Grigori</first><last>Sidorov</last></author>
-      <author><first>Alexander</first><last>Gelbukh</last></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last></author>
       <pages>393–398</pages>
       <abstract>We apply word sense disambiguation to the definitions in a Spanish explanatory dictionary. To calculate the scores of word senses basing on the context (which in our case is the dictionary definition), we use a modification of Lesk’s algorithm. The algorithm relies on a comparison between two words. In the original Lesk’s algorithm, the comparison is trivial: two words are either the same lexeme or not; our modification consists in fuzzy (weighted) comparison using a large synonym dictionary and a simple derivational morphology system. Application of disambiguation to dictionary definitions (in contrast to usual texts) allows for some simplifications of the algorithm, e.g., we do not have to care of context window size.</abstract>
       <url hash="b3ced2ab">2001.jeptalnrecital-poster.12</url>
@@ -479,9 +479,9 @@
     </paper>
     <paper id="13">
       <title>L’apport de connaissances morphologiques pour la projection de requêtes sur une terminologie normalisée</title>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <author><first>Natalia</first><last>Grabar</last></author>
-      <author><first>Stefan</first><last>Darmoni</last></author>
+      <author id="stefan-darmoni"><first>Stefan</first><last>Darmoni</last></author>
       <pages>399–404</pages>
       <abstract>L’apport de connaissances linguistiques à la recherche d’information reste un sujet de débat. Nous examinons ici l’influence de connaissances morphologiques (flexion, dérivation) sur les résultats d’une tâche spécifique de recherche d’information dans un domaine spécialisé. Cette influence est étudiée à l’aide d’une liste de requêtes réelles recueillies sur un serveur opérationnel ne disposant pas de connaissances linguistiques. Nous observons que pour cette tâche, flexion et dérivation apportent un gain modéré mais réel.</abstract>
       <url hash="8e2630af">2001.jeptalnrecital-poster.13</url>
@@ -505,7 +505,7 @@
     </frontmatter>
     <paper id="1">
       <title>Extraction de collocations à partir de textes</title>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <pages>3–8</pages>
       <abstract>Les collocations sont intéressantes dans de nombreuses applications du TALN comme la l’analyse ou la génération de textes ou encore la lexicographie monolingue ou bilingue. Les premières tentatives d’extraction automatique de collocations à partir de textes ou de dictionnaires ont vu le jour dans les années 1970. Il s’agissait principalement de méthodes à base de statistiques lexicales. Aujourd’hui, les méthodes d’identification automatique font toujours appel à des statistiques mais qu’elles combinent avec des analyses linguistiques. Nous examinons quelques méthodes d’identification des collocations en corpus en soulignant pour chaque méthode les propriétés linguistiques des collocations qui ont été prises en compte.</abstract>
       <url hash="98c8901d">2001.jeptalnrecital-tutoriel.1</url>
@@ -532,16 +532,16 @@
     </paper>
     <paper id="4">
       <title>Formal Languages for Linguists: Classical and Nonclassical Models</title>
-      <author><first>Carlos</first><last>Martín-Vide</last></author>
+      <author id="carlos-martin-vide"><first>Carlos</first><last>Martín-Vide</last></author>
       <pages>77–127</pages>
-      <abstract/>
+      <abstract></abstract>
       <url hash="1c7a8b1d">2001.jeptalnrecital-tutoriel.4</url>
       <language>fra</language>
       <bibkey>martin-vide-2001-formal</bibkey>
     </paper>
     <paper id="5">
       <title>L’apport de connaissances linguistiques en recherche documentaire</title>
-      <author><first>Claude</first><last>De Loupy</last></author>
+      <author id="claude-de-loupy"><first>Claude</first><last>De Loupy</last></author>
       <pages>128–142</pages>
       <abstract>L’utilisation de connaissances et de traitements linguistiques évolués en recherche documentaire ne fait pas l’unanimité dans le milieu scientifique. En effet, de nombreuses expériences semblent montrer que les résultats obtenus ne sont pas améliorés, voire sont parfois dégradés, lorsque de telles connaissances sont utilisées dans un système de RD. Dans ce tutoriel, nous montrons que les environnements d’évaluation ne sont pas adaptés aux besoins réels d’un utilisateur car celui-ci recherche presque toujours une information. Il veut donc retrouver des documents pertinents le plus rapidement possible car ce n’est pas là le but de sa recherche. Le temps global de la recherche est donc fondamentalement important. Néanmoins, le cadre d’évaluation TREC nous permet de montrer que l’utilisation de connaissances linguistiques permet d’augmenter la précision des premiers documents renvoyés, ce qui est très important pour diminuer le temps de recherche.</abstract>
       <url hash="a9e0e1a7">2001.jeptalnrecital-tutoriel.5</url>
@@ -550,7 +550,7 @@
     </paper>
     <paper id="6">
       <title>Intex et ses applications informatiques</title>
-      <author><first>Max</first><last>Silberztein</last></author>
+      <author id="max-silberztein"><first>Max</first><last>Silberztein</last></author>
       <author><first>Thierry</first><last>Poibeau</last></author>
       <author><first>Antonio</first><last>Balvet</last></author>
       <pages>143–172</pages>
@@ -594,7 +594,7 @@
     </paper>
     <paper id="3">
       <title>Bibliothèques d’automates finis et grammaires context-free : de nouveaux traitements informatiques</title>
-      <author><first>Matthieu</first><last>Constant</last></author>
+      <author id="matthieu-constant"><first>Matthieu</first><last>Constant</last></author>
       <pages>424–433</pages>
       <abstract>La quantité de documents disponibles via Internet explose. Cette situation nous incite à rechercher de nouveaux outils de localisation d’information dans des documents et, en particulier, à nous pencher sur l’algorithmique des grammaires context-free appliquée à des familles de graphes d’automates finis (strictement finis ou à cycles). Nous envisageons une nouvelle représentation et de nouveaux traitements informatiques sur ces grammaires, afin d’assurer un accès rapide aux données et un stockage peu coûteux en mémoire.</abstract>
       <url hash="3081d62e">2001.jeptalnrecital-recital.3</url>
diff --git a/data/xml/2001.mtsummit.xml b/data/xml/2001.mtsummit.xml
index b335bd3eb4..0cf754776a 100644
--- a/data/xml/2001.mtsummit.xml
+++ b/data/xml/2001.mtsummit.xml
@@ -6,7 +6,7 @@
       <address>Santiago de Compostela, Spain</address>
       <month>September 18-22</month>
       <year>2001</year>
-      <editor><first>Bente</first><last>Maegaard</last></editor>
+      <editor id="bente-maegaard"><first>Bente</first><last>Maegaard</last></editor>
       <venue>mtsummit</venue>
     </meta>
     <frontmatter>
@@ -15,7 +15,7 @@
     </frontmatter>
     <paper id="1">
       <title>Towards a new vision for <fixed-case>MT</fixed-case></title>
-      <author><first>John</first><last>Hutchins</last></author>
+      <author id="w-john-hutchins"><first>John</first><last>Hutchins</last></author>
       <url hash="fb6d1c66">2001.mtsummit-papers.1</url>
       <bibkey>hutchins-2001-towards</bibkey>
     </paper>
@@ -33,7 +33,7 @@
       <title>Using multiple edit distances to automatically rank machine translation output</title>
       <author><first>Yasuhiro</first><last>Akiba</last></author>
       <author><first>Kenji</first><last>Imamura</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <url hash="1e78aee7">2001.mtsummit-papers.3</url>
       <abstract>This paper addresses the challenging problem of automatically evaluating output from machine translation (MT) systems in order to support the developers of these systems. Conventional approaches to the problem include methods that automatically assign a rank such as A, B, C, or D to MT output according to a single edit distance between this output and a correct translation example. The single edit distance can be differently designed, but changing its design makes assigning a certain rank more accurate, but another rank less accurate. This inhibits improving accuracy of rank assignment. To overcome this obstacle, this paper proposes an automatic ranking method that, by using multiple edit distances, encodes machine-translated sentences with a rank assigned by humans into multi-dimensional vectors from which a classifier of ranks is learned in the form of a decision tree (DT). The proposed method assigns a rank to MT output through the learned DT. The proposed method is evaluated using transcribed texts of real conversations in the travel arrangement domain. Experimental results show that the proposed method is more accurate than the single-edit-distance-based ranking methods, in both closed and open tests. Moreover, the proposed method could estimate MT quality within 3% error in some cases.</abstract>
       <bibkey>akiba-etal-2001-using</bibkey>
@@ -51,7 +51,7 @@
       <title>Finding translation correspondences from parallel parsed corpus for example-based translation</title>
       <author><first>Eiji</first><last>Aramaki</last></author>
       <author><first>Sadao</first><last>Kurohashi</last></author>
-      <author><first>Satoshi</first><last>Sato</last></author>
+      <author id="satoshi-sato"><first>Satoshi</first><last>Sato</last></author>
       <author><first>Hideo</first><last>Watanabe</last></author>
       <url hash="1b357d91">2001.mtsummit-papers.5</url>
       <abstract>This paper describes a system for finding phrasal translation correspondences from parallel parsed corpus that are collections paired English and Japanese sentences. First, the system finds phrasal correspondences by Japanese-English translation dictionary consultation. Then, the system finds correspondences in remaining phrases by using sentences dependency structures and the balance of all correspondences. The method is based on an assumption that in parallel corpus most fragments in a source sentence have corresponding fragments in a target sentence.</abstract>
@@ -117,10 +117,10 @@
     </paper>
     <paper id="13">
       <title>The <fixed-case>ISLE</fixed-case> in the ocean. Transatlantic standards for multilingual lexicons (with an eye to machine translation)</title>
-      <author><first>Nicoletta</first><last>Calzolari</last></author>
+      <author id="nicoletta-calzolari"><first>Nicoletta</first><last>Calzolari</last></author>
       <author><first>Alessandro</first><last>Lenci</last></author>
-      <author><first>Antonio</first><last>Zampolli</last></author>
-      <author><first>Nuria</first><last>Bel</last></author>
+      <author id="antonio-zampolli"><first>Antonio</first><last>Zampolli</last></author>
+      <author id="nuria-bel"><first>Nuria</first><last>Bel</last></author>
       <author><first>Marta</first><last>Villegas</last></author>
       <author><first>Gregor</first><last>Thurmair</last></author>
       <url hash="f3768c02">2001.mtsummit-papers.13</url>
@@ -198,7 +198,7 @@
       <title>Using machine learning for system-internal evaluation of transferred linguistic representations</title>
       <author><first>Michael</first><last>Gamon</last></author>
       <author><first>Hisami</first><last>Suzuki</last></author>
-      <author><first>Simon</first><last>Corston-Oliver</last></author>
+      <author id="simon-corston-oliver"><first>Simon</first><last>Corston-Oliver</last></author>
       <url hash="878073e0">2001.mtsummit-papers.21</url>
       <attachment type="presentation" hash="878073e0">2001.mtsummit-papers.21.Presentation.pdf</attachment>
       <abstract>We present an automated, system-internal evaluation technique for linguistic representations in a large-scale, multilingual MT system. We use machine-learned classifiers to recognize the differences between linguistic representations generated from transfer in an MT context from representations that are produced by "native" analysis of the target language. In the MT scenario, convergence of the two is the desired result. Holding the feature set and the learning algorithm constant, the accuracy of the classifiers provides a measure of the overall difference between the two sets of linguistic representations: classifiers with higher accuracy correspond to more pronounced differences between representations. More importantly, the classifiers yield the basis for error-analysis by providing a ranking of the importance of linguistic features. The more salient a linguistic criterion is in discriminating transferred representations from "native" representations, the more work will be needed in order to get closer to the goal of producing native-like MT. We present results from using this approach on the Microsoft MT system and discuss its advantages and possible extensions.</abstract>
@@ -206,24 +206,24 @@
     </paper>
     <paper id="22">
       <title>Search algorithms for statistical machine translation based on dynamic programming and pruning techniques</title>
-      <author><first>Ismael</first><last>García-Varea</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="ismael-garcia-varea"><first>Ismael</first><last>García-Varea</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <url hash="446d229f">2001.mtsummit-papers.22</url>
       <abstract>The increasing interest in the statistical approach to Machine Translation is due to the development of effective algorithms for training the probabilistic models proposed so far. However, one of the open problems with statistical machine translation is the design of efficient algorithms for translating a given input string. For some interesting models, only (good) approximate solutions can be found. Recently, a dynamic programming-like algorithm for the IBM-Model 2 has been proposed which is based on an iterative process of refinement solutions. A new dynamic programming-like algorithm is proposed here to deal with more complex IBM models (models 3 to 5). The computational cost of the algorithm is reduced by using an alignment-based pruning technique. Experimental results with the so-called “Tourist Task” are also presented.</abstract>
       <bibkey>garcia-varea-casacuberta-2001-search</bibkey>
     </paper>
     <paper id="23">
       <title><fixed-case>P</fixed-case>ol<fixed-case>V</fixed-case>erb<fixed-case>N</fixed-case>et: an experimental database for <fixed-case>P</fixed-case>olish verbs</title>
-      <author><first>Barbara</first><last>Gawronska</last></author>
+      <author id="barbara-gawronska"><first>Barbara</first><last>Gawronska</last></author>
       <url hash="4f507e72">2001.mtsummit-papers.23</url>
       <abstract>The semantics of verbs implies, as is known, a great number of difficulties, when it is to be represented in a computational lexicon. The Slavic languages are especially challenging in respect of this task because of the huge complexity of verbs, where the stems are combined with prefixes indicating aspect and Aktionsart. The current paper describes an approach to build PolVerbNet, a database for Polish verbs, considering the internal structure of the aspect-Aktionsart system. PolVerbNet is thus implemented in a larger English-Polish MT-system, which incorporates WordNet. We report our translation procedure and the system’s performance is evaluated and discussed.</abstract>
       <bibkey>gawronska-2001-polverbnet</bibkey>
     </paper>
     <paper id="24">
       <title>Derivational morphology to the rescue: how it can help resolve unfound words in <fixed-case>MT</fixed-case></title>
-      <author><first>Claudia</first><last>Gdaniec</last></author>
+      <author id="claudia-gdaniec"><first>Claudia</first><last>Gdaniec</last></author>
       <author><first>Esmé</first><last>Manandise</last></author>
-      <author><first>Michael C.</first><last>McCord</last></author>
+      <author id="michael-c-mccord"><first>Michael C.</first><last>McCord</last></author>
       <url hash="1aff723d">2001.mtsummit-papers.24</url>
       <abstract>Machine Translation (MT) systems that process unrestricted text should be able to deal with words that are not found in the MT lexicon. Without some kind of recognition, the parse may be incomplete, there is no transfer for the unfound word, and tests for transfers for surrounding words will often fail, resulting in poor translation. Interestingly, not much has been published on unfound- word guessing in the context of MT although such work has been going on for other applications. In our work on the IBM MT system, we implemented a far-reaching strategy for recognizing unfound words based on rules of word formation and for generating transfers. What distinguishes our approach from others is the use of semantic and syntactic features for both analysis and transfer, a scoring system to assign levels of confidence to possible word structures, and the creation of transfers in the transformation component. We also successfully applied rules of derivational morphological analysis to non-derived unfound words.</abstract>
       <bibkey>gdaniec-etal-2001-derivational</bibkey>
@@ -239,15 +239,15 @@
     <paper id="26">
       <title>Large scale language independent generation using thematic hierarchies</title>
       <author><first>Nizar</first><last>Habash</last></author>
-      <author><first>Bonnie</first><last>Dorr</last></author>
+      <author id="bonnie-dorr"><first>Bonnie</first><last>Dorr</last></author>
       <url hash="04d58904">2001.mtsummit-papers.26</url>
       <bibkey>habash-dorr-2001-large</bibkey>
     </paper>
     <paper id="27">
       <title><fixed-case>AGILE</fixed-case> - a system for multilingual generation of technical instructions</title>
-      <author><first>Anthony</first><last>Hartley</last></author>
-      <author><first>Donia</first><last>Scott</last></author>
-      <author><first>John</first><last>Bateman</last></author>
+      <author id="anthony-hartley"><first>Anthony</first><last>Hartley</last></author>
+      <author id="donia-scott"><first>Donia</first><last>Scott</last></author>
+      <author id="john-bateman"><first>John</first><last>Bateman</last></author>
       <author><first>Danail</first><last>Dochev</last></author>
       <url hash="1d658534">2001.mtsummit-papers.27</url>
       <abstract>This paper presents a multilingual Natural Language Generation system that produces technical instruction texts in Bulgarian, Czech and Russian. It generates several types of texts, common for software manuals, in two styles. We illustrate the system’s functionality with examples of its input and output behaviour. We discuss the criteria and procedures adopted for evaluating the system and summarise their results. The system embodies novel approaches to providing multilingual documentation, ranging from the re-use of a large-scale, broad coverage grammar of English in order to develop the lexico-grammatical resources necessary for the generation in the three target languages, through to the adoption of a ‘knowledge editing’ approach to specifying the desired content of the texts to be generated independently of the target languages in which those texts finally appear.</abstract>
@@ -319,14 +319,14 @@
       <author><first>Jinee</first><last>Maeng</last></author>
       <author><first>Ji-Young</first><last>Lee</last></author>
       <author><first>Young-Sook</first><last>Chae</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <url hash="ab931912">2001.mtsummit-papers.35</url>
       <abstract>This paper describes KORTERM’s test suite and their practicability. The test-sets have been being constructed on the basis of fine-grained classification of linguistic phenomena to evaluate the technical status of English-to-Korean MT systems systematically. They consist of about 5000 test-sets and are growing. Each test-set contains an English sentence, a model Korean translation, a linguistic phenomenon category, and a yes/no question about the linguistic phenomenon. Two commercial systems were evaluated with a yes/no test of prepared questions. Total accuracy rates of the two systems were different (50% vs. 66%). In addition, a comprehension test was carried out. We found that one system was more comprehensible than the other system. These results seem to show that our test suite is practicable.</abstract>
       <bibkey>koh-etal-2001-test</bibkey>
     </paper>
     <paper id="36">
       <title>Integrating bilingual lexicons in a probabilistic translation assistant</title>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <author><first>George</first><last>Foster</last></author>
       <author><first>Guy</first><last>Lapalme</last></author>
       <url hash="b1cc2547">2001.mtsummit-papers.36</url>
@@ -368,7 +368,7 @@
     </paper>
     <paper id="41">
       <title>Evaluation of machine translation systems at <fixed-case>CLS</fixed-case> Corporate Language Services <fixed-case>AG</fixed-case></title>
-      <author><first>Elisabeth</first><last>Maier</last></author>
+      <author id="elisabeth-maier"><first>Elisabeth</first><last>Maier</last></author>
       <author><first>Anthony</first><last>Clarke</last></author>
       <author><first>Hans-Udo</first><last>Stadler</last></author>
       <url hash="aa522c23">2001.mtsummit-papers.41</url>
@@ -377,7 +377,7 @@
     </paper>
     <paper id="42">
       <title>Scaling the <fixed-case>ISLE</fixed-case> taxonomy: development of metrics for the multi-dimensional characterization of machine translation quality</title>
-      <author><first>Keith J.</first><last>Miller</last></author>
+      <author id="keith-j-miller"><first>Keith J.</first><last>Miller</last></author>
       <author><first>Michelle</first><last>Vanni</last></author>
       <url hash="dbcfe567">2001.mtsummit-papers.42</url>
       <abstract>The DARPA MT evaluations of the early 1990s, along with subsequent work on the MT Scale, and the International Standards for Language Engineering (ISLE) MT Evaluation framework represent two of the principal efforts in Machine Translation Evaluation (MTE) over the past decade. We describe a research program that builds on both of these efforts. This paper focuses on the selection of MT output features suggested in the ISLE framework, as well as the development of metrics for the features to be used in the study. We define each metric and describe the rationale for its development. We also discuss several of the finer points of the evaluation measures that arose as a result of verification of the measures against sample output texts from three machine translation systems.</abstract>
@@ -386,10 +386,10 @@
     <paper id="43">
       <title>Pronominal anaphora resolution in <fixed-case>KANTOO</fixed-case> <fixed-case>E</fixed-case>nglish-to-<fixed-case>S</fixed-case>panish machine translation system</title>
       <author><first>Teruko</first><last>Mitamura</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <author><first>Enrique</first><last>Torrejon</last></author>
       <author><first>David</first><last>Svoboda</last></author>
-      <author><first>Kathryn</first><last>Baker</last></author>
+      <author id="kathryn-baker"><first>Kathryn</first><last>Baker</last></author>
       <url hash="512b7176">2001.mtsummit-papers.43</url>
       <abstract>We describe the automatic resolution of pronominal anaphora using KANT Controlled English (KCE) and the KANTOO English-to-Spanish MT system. Our algorithm is based on a robust, syntax-based approach that applies a set of restrictions and preferences to select the correct antecedent. We report a success rate of 89.6% on a training corpus with 289 anaphors, and 87.5% on held-out data containing 145 anaphors. Resolution of anaphors is important in translation, due to gender mismatches among languages; our approach translates anaphors to Spanish with 97.2% accuracy.</abstract>
       <bibkey>mitamura-etal-2001-pronominal</bibkey>
@@ -403,32 +403,32 @@
     </paper>
     <paper id="45">
       <title>Morpho-syntactic analysis for reordering in statistical machine translation</title>
-      <author><first>Sonja</first><last>Niessen</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="sonja-niessen"><first>Sonja</first><last>Niessen</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <url hash="aea6cc84">2001.mtsummit-papers.45</url>
       <abstract>In the framework of statistical machine translation (SMT), correspondences between the words in the source and the target language are learned from bilingual corpora on the basis of so-called alignment models. Among other things these are meant to capture the differences in word order in different languages. In this paper we show that SMT can take advantage of the explicit introduction of some linguistic knowledge about the sentence structure in the languages under consideration. In contrast to previous publications dealing with the incorporation of morphological and syntactic information into SMT, we focus on two aspects of reordering for the language pair German and English, namely question inversion and detachable German verb prefixes. The results of systematic experiments are reported and demonstrate the applicability of the approach to both translation directions on a German-English corpus.</abstract>
       <bibkey>niessen-ney-2001-morpho</bibkey>
     </paper>
     <paper id="46">
       <title>Statistical multi-source translation</title>
-      <author><first>Franz Josef</first><last>Och</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="franz-josef-och"><first>Franz Josef</first><last>Och</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <url hash="c033006e">2001.mtsummit-papers.46</url>
       <abstract>We describe methods for translating a text given in multiple source languages into a single target language. The goal is to improve translation quality in applications where the ultimate goal is to translate the same document into many languages. We describe a statistical approach and two specific statistical models to deal with this problem. Our method is generally applicable as it is independent of specific models, languages or application domains. We evaluate the approach on a multilingual corpus covering all eleven official European Union languages that was collected automatically from the Internet. In various tests we show that these methods can significantly improve translation quality. As a side effect, we also compare the quality of statistical machine translation systems for many European languages in the same domain.</abstract>
       <bibkey>och-ney-2001-statistical</bibkey>
     </paper>
     <paper id="47">
       <title>Implicit cues for explicit generation: using telicity as a cue for tense structure in a <fixed-case>C</fixed-case>hinese to <fixed-case>E</fixed-case>nglish <fixed-case>MT</fixed-case> system</title>
-      <author><first>Mari</first><last>Olsen</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="mari-broman-olsen"><first>Mari</first><last>Olsen</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <author><first>Carol</first><last>van Ess-Dykema</last></author>
-      <author><first>Amy</first><last>Weinberg</last></author>
+      <author id="amy-weinberg"><first>Amy</first><last>Weinberg</last></author>
       <url hash="531c4608">2001.mtsummit-papers.47</url>
       <bibkey>olsen-etal-2001-implicit</bibkey>
     </paper>
     <paper id="48">
       <title>Translation knowledge recycling for related languages</title>
-      <author><first>Michael</first><last>Paul</last></author>
+      <author id="michael-paul"><first>Michael</first><last>Paul</last></author>
       <url hash="995b55ab">2001.mtsummit-papers.48</url>
       <abstract>An increasing interest in multi-lingual translation systems demands a reconsideration of the development costs of machine translation engines for language pairs. This paper proposes an approach that reuses the existing translation knowledge resources of high-quality translation engines for translation into different, but related languages. The lexical information of the target representation is utilized to generate the corresponding translation in the related language by using a transfer dictionary for the mapping of words and a set of heuristic rules for the mapping of structural information. Experiments using a Japanese-English translation engine for the generation of German translations show a minor decrease of up to 5% in the acceptability of the German output compared with the English translation of unseen Japanese input.</abstract>
       <bibkey>paul-2001-translation</bibkey>
@@ -467,8 +467,8 @@
     <paper id="52">
       <title>Cognates alignment</title>
       <author><first>António</first><last>Ribeiro</last></author>
-      <author><first>Gaël</first><last>Dias</last></author>
-      <author><first>Gabriel</first><last>Lopes</last></author>
+      <author id="gael-dias"><first>Gaël</first><last>Dias</last></author>
+      <author id="gabriel-lopes"><first>Gabriel</first><last>Lopes</last></author>
       <author><first>João</first><last>Mexia</last></author>
       <url hash="ea5b048a">2001.mtsummit-papers.52</url>
       <abstract>Some authors (Simard et al.; Melamed; Danielsson &amp; Mühlenbock) have suggested measures of similarity of words in different languages so as to find extra clues for alignment of parallel texts. Cognate words, like ‘Parliament’ and ‘Parlement’, in English and French respectively, provide extra anchors that help to improve the quality of the alignment. In this paper, we will extend an alignment algorithm proposed by Ribeiro et al. using typical contiguous and non-contiguous sequences of characters extracted using a statistically sound method (Dias et al.). With these typical sequences, we are able to find more reliable correspondence points and improve the alignment quality without recurring to heuristics to identify cognates.</abstract>
@@ -477,7 +477,7 @@
     <paper id="53">
       <title>Achieving commercial-quality translation with example-based methods</title>
       <author><first>Stephen</first><last>Richardson</last></author>
-      <author><first>William</first><last>Dolan</last></author>
+      <author id="william-b-dolan"><first>William</first><last>Dolan</last></author>
       <author><first>Arul</first><last>Menezes</last></author>
       <author><first>Jessie</first><last>Pinkham</last></author>
       <url hash="ebc22b74">2001.mtsummit-papers.53</url>
@@ -494,13 +494,13 @@
     <paper id="55">
       <title>A morphological analyser for machine translation based on finite-state transducers</title>
       <author><first>Alberto</first><last>Sanchis</last></author>
-      <author><first>David</first><last>Picó</last></author>
+      <author id="david-pico"><first>David</first><last>Picó</last></author>
       <author><first>Joan</first><last>Miquel del Val</last></author>
       <author><first>Ferran</first><last>Fabregat</last></author>
       <author><first>Jesús</first><last>Tomás</last></author>
       <author><first>Moisés</first><last>Pastor</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
-      <author><first>Enrique</first><last>Vidal</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="enrique-vidal"><first>Enrique</first><last>Vidal</last></author>
       <url hash="5337cd5b">2001.mtsummit-papers.55</url>
       <abstract>A finite-state, rule-based morphological analyser is presented here, within the framework of machine translation system TAVAL. This morphological analyser introduces specific features which are particularly useful for translation, such as the detection and morphological tagging of word groups that act as a single lexical unit for translation purposes. The case where words in one such group are not strictly contiguous is also covered. A brief description of the Spanish-to-Catalan and Catalan-to-Spanish translation system TAVAL is given in the paper.</abstract>
       <bibkey>sanchis-etal-2001-morphological</bibkey>
@@ -509,7 +509,7 @@
       <title>New generation Systran translation system</title>
       <author><first>Jean</first><last>Senellart</last></author>
       <author><first>Péter</first><last>Dienes</last></author>
-      <author><first>Tamás</first><last>Váradi</last></author>
+      <author id="tamas-varadi"><first>Tamás</first><last>Váradi</last></author>
       <url hash="68e5724f">2001.mtsummit-papers.56</url>
       <abstract>In this paper, we present the design of the new generation Systran translation systems, currently utilized in the development of English-Hungarian, English-Polish, English-Arabic, French-Arabic, Hungarian-French and Polish-French language pairs. The new design, based on the traditional Systran machine translation expertise and the existing linguistic resources, addresses the following aspects: efficiency, modularity, declarativity, reusability, and maintainability. Technically, the new systems rely on intensive use of state-of-the-art finite automaton and formal grammar implementation. The finite automata provide the essential lookup facilities and the natural capacity of factorizing intuitive linguistic sets. Linguistically, we have introduced a full monolingual description of linguistic information and the concept of implicit transfer. Finally, we present some by-products that are directly derived from the new architecture: intuitive coding tools, spell checker and syntactic tagger.</abstract>
       <bibkey>senellart-etal-2001-new</bibkey>
@@ -529,7 +529,7 @@
       <author><first>Young-Ae</first><last>Seo</last></author>
       <author><first>Yoon-Hyung</first><last>Roh</last></author>
       <author><first>Ki-Young</first><last>Lee</last></author>
-      <author><first>Sang-Kyu</first><last>Park</last></author>
+      <author id="sang-kyu-park"><first>Sang-Kyu</first><last>Park</last></author>
       <url hash="505c1dc1">2001.mtsummit-papers.58</url>
       <bibkey>seo-etal-2001-captioneye</bibkey>
     </paper>
@@ -546,7 +546,7 @@
     <paper id="60">
       <title>Sub-sentential exploitation of translation memories</title>
       <author><first>Michel</first><last>Simard</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <url hash="e99a6852">2001.mtsummit-papers.60</url>
       <abstract>Translation memory systems (TMS) are a family of computer tools whose purpose is to facilitate and encourage the re-use of existing translations. By searching a database of past translations, these systems can retrieve the translation of whole segments of text and propose them to the translator for re-use. However, the usefulness of existing TMS’s is limited by the nature of the text segments that that they are able to put in correspondence, generally whole sentences. This article examines the potential of a type of system that is able to recuperate the translation of sub-sentential sequences of words.</abstract>
       <bibkey>simard-langlais-2001-sub</bibkey>
@@ -571,7 +571,7 @@
     <paper id="63">
       <title>Converting a bilingual dictionary into a bilingual knowledge bank based on the synchronous <fixed-case>SSTC</fixed-case></title>
       <author><first>Enya Kong</first><last>Tang</last></author>
-      <author><first>Mosleh H.</first><last>Al-Adhaileh</last></author>
+      <author id="mosleh-hmoud-al-adhaileh"><first>Mosleh H.</first><last>Al-Adhaileh</last></author>
       <url hash="f1bd2e18">2001.mtsummit-papers.63</url>
       <abstract>In this paper, we would like to present an approach to construct a huge Bilingual Knowledge Bank (BKB) from an English Malay bilingual dictionary based on the idea of synchronous Structured String-Tree Correspondence (SSTC). The SSTC is a general structure that can associate an arbitrary tree structure to string in a language as desired by the annotator to be the interpretation structure of the string, and more importantly is the facility to specify the correspondence between the string and the associated tree which can be non-projective. With this structure, we are able to match linguistic units at different inter levels of the structure (i.e. define the correspondence between substrings in the sentence, nodes in the tree, subtrees in the tree and sub-correspondences in the SSTC). This flexibility makes synchronous SSTC very well suited for the construction of a Bilingual Knowledge Bank we need for the English-Malay MT application.</abstract>
       <bibkey>tang-al-adhaileh-2001-converting</bibkey>
@@ -579,14 +579,14 @@
     <paper id="64">
       <title>Monotone statistical translation using word groups</title>
       <author><first>Jesús</first><last>Tomás</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <url hash="4deee6d0">2001.mtsummit-papers.64</url>
       <abstract>A new system for statistical natural language translation for languages with similar grammar is introduced. Specifically, it can be used with Romanic Languages, such as French, Spanish or Catalan. The statistical translation uses two sources of information: a language model and a translation model. The language model used is a standard trigram model. A new approach is defined in the translation model. The two main properties of the translation model are: the translation probabilities are computed between groups of words and the alignment between those groups is monotone. That is, the order between the word groups in the source sentence is conserved in the target sentence. Once, the translation model has been defined, we present an algorithm to infer its parameters from training samples. The translation process is carried out with an efficient algorithm based on stack-decoding. Finally, we present some translation results from Catalan to Spanish and compare our model with other conventional models.</abstract>
       <bibkey>tomas-casacuberta-2001-monotone</bibkey>
     </paper>
     <paper id="65">
       <title>Translatability checker: a tool to help decide whether to use <fixed-case>MT</fixed-case></title>
-      <author><first>Nancy</first><last>Underwood</last></author>
+      <author id="nancy-underwood"><first>Nancy</first><last>Underwood</last></author>
       <author><first>Bart</first><last>Jongejan</last></author>
       <url hash="9d2a852a">2001.mtsummit-papers.65</url>
       <abstract>This paper describes a tool designed to assess the machine translatability of English source texts by assigning a translatability index to both individual sentences and the text as a whole. The tool is designed to be both stand-alone and integratable into a suite of other tools which together help to improve the quality of professional translation in the preparatory phase of the translation workflow. Assessing translatability is an important element in ensuring the most efficient and cost effective use of current translation technology, and the tool must be able to quickly determine the translatability of a text without itself using too many resources. It is therefore based on rather simple tagging and pattern matching technologies which bring with them a certain level of indeterminacy. This potential disadvantage can, however, be offset by the fact that an annotated version of the text is simultaneously produced to allow the user to interpret the results of the checker.</abstract>
@@ -615,8 +615,8 @@
     </paper>
     <paper id="68">
       <title>An automatic evaluation method for machine translation using two-way <fixed-case>MT</fixed-case></title>
-      <author><first>Shoichi</first><last>Yokoyama</last></author>
-      <author><first>Hideki</first><last>Kashioka</last></author>
+      <author id="shoichi-yokoyama"><first>Shoichi</first><last>Yokoyama</last></author>
+      <author id="hideki-kashioka"><first>Hideki</first><last>Kashioka</last></author>
       <author><first>Akira</first><last>Kumano</last></author>
       <author><first>Masaki</first><last>Matsudaira</last></author>
       <author><first>Yoshiko</first><last>Shirokizawa</last></author>
@@ -630,10 +630,10 @@
     </paper>
     <paper id="69">
       <title>Pre-processing of bilingual corpora for <fixed-case>M</fixed-case>andarin-<fixed-case>E</fixed-case>nglish <fixed-case>EBMT</fixed-case></title>
-      <author><first>Ying</first><last>Zhang</last></author>
-      <author><first>Ralf</first><last>Brown</last></author>
-      <author><first>Robert</first><last>Frederking</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="ying-zhang"><first>Ying</first><last>Zhang</last></author>
+      <author id="ralf-d-brown"><first>Ralf</first><last>Brown</last></author>
+      <author id="robert-frederking"><first>Robert</first><last>Frederking</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <url hash="edbd08bc">2001.mtsummit-papers.69</url>
       <abstract>Pre-processing of bilingual corpora plays an important role in Example-Based Machine Translation (EBMT) and Statistical-Based Machine Translation (SBMT). For our Mandarin-English EBMT system, pre-processing includes segmentation for Mandarin, bracketing for English and building a statistical dictionary from the corpora. We used the Mandarin segmenter from the Linguistic Data Consortium (LDC). It uses dynamic programming with a frequency dictionary to segment the text. Although the frequency dictionary is large, it does not completely cover the corpora. In this paper, we describe the work we have done to improve the segmentation for Mandarin and the bracketing process for English to increase the length of English phrases. A statistical dictionary is built from the aligned bilingual corpus. It is used as feedback to segmentation and bracketing to re-segment / re-bracket the corpus. The process iterates several times to achieve better results. The final results of the corpus pre-processing are a segmented/bracketed aligned bilingual corpus and a statistical dictionary. We achieved positive results by increasing the average length of Chinese terms about 60% and 10% for English. The statistical dictionary gained about a 30% increase in coverage.</abstract>
       <bibkey>zhang-etal-2001-pre</bibkey>
@@ -669,9 +669,9 @@
     </paper>
     <paper id="3">
       <title>Evaluating machine translation output for an unknown source language: report of an <fixed-case>ISLE</fixed-case>-based investigation</title>
-      <author><first>Keith J.</first><last>Miller</last></author>
-      <author><first>Donna M.</first><last>Gates</last></author>
-      <author><first>Nancy</first><last>Underwood</last></author>
+      <author id="keith-j-miller"><first>Keith J.</first><last>Miller</last></author>
+      <author id="donna-gates"><first>Donna M.</first><last>Gates</last></author>
+      <author id="nancy-underwood"><first>Nancy</first><last>Underwood</last></author>
       <author><first>Josemina</first><last>Magdalen</last></author>
       <url hash="6deec640">2001.mtsummit-eval.3</url>
       <abstract>It is often assumed that knowledge of both the source and target languages is necessary in order to evaluate the output of a machine translation (MT) system. This paper reports on an experimental evaluation of Chinese-English MT and Spanish-English MT from output specifically designed for evaluators who do not read or speak Chinese or Spanish. An outline of the characteristics measured and evaluation follows.</abstract>
@@ -679,16 +679,16 @@
     </paper>
     <paper id="4">
       <title>Setting a methodology for machine translation evaluation</title>
-      <author><first>Widad</first><last>Mustafa El Hadi</last></author>
-      <author><first>Ismail</first><last>Timimi</last></author>
-      <author><first>Marianne</first><last>Dabbadie</last></author>
+      <author id="widad-mustafa-el-hadi"><first>Widad</first><last>Mustafa El Hadi</last></author>
+      <author id="ismail-timimi"><first>Ismail</first><last>Timimi</last></author>
+      <author id="marianne-dabbadie"><first>Marianne</first><last>Dabbadie</last></author>
       <url hash="de7e5f38">2001.mtsummit-eval.4</url>
       <abstract>In this paper some of the problems encountered in designing an evaluation for an MT system will be examined. The source text, in French, provided by INRA (Institut National pour la Recherche Agronomique i.e. National Institute for Agronomic Research) deals with biotechnology and animal reproduction. It has been translated into English. The output of the system (i.e. the result of the assembling of several components), as opposed to its individual modules or specific components (i.e. analysis, generation, grammar, lexicon, core, etc.), will be evaluated. Moreover, the evaluation will concentrate on translation quality and its fidelity to the source text. The evaluation is not comparative, which means that we tested a specific MT system, not necessarily representative of other MT systems that can be found on the market.</abstract>
       <bibkey>mustafa-el-hadi-etal-2001-setting</bibkey>
     </paper>
     <paper id="5">
       <title>Towards a two-stage taxonomy for machine translation evaluation</title>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <author><first>Sandra</first><last>Manzi</last></author>
       <author><first>Maghi</first><last>King</last></author>
       <url hash="9646c1b1">2001.mtsummit-eval.5</url>
@@ -696,7 +696,7 @@
     </paper>
     <paper id="6">
       <title>Automatically predicting <fixed-case>MT</fixed-case> systems rankings compatible with fluency, adequacy and informativeness scores</title>
-      <author><first>Martin</first><last>Rajman</last></author>
+      <author id="martin-rajman"><first>Martin</first><last>Rajman</last></author>
       <author><first>Tony</first><last>Hartley</last></author>
       <url hash="66166f1e">2001.mtsummit-eval.6</url>
       <abstract>The main goal of the work presented in this paper is to find an inexpensive and automatable way of predicting rankings of MT systems compatible with human evaluations of these systems expressed in the form of Fluency, Adequacy or Informativeness scores. Our approach is to establish whether there is a correlation between rankings derived from such scores and the ones that can be built on the basis of automatically computable attributes of syntactic or semantic nature. We present promising results obtained on the DARPA94 MT evaluation corpus.</abstract>
@@ -704,17 +704,17 @@
     </paper>
     <paper id="7">
       <title>In one hundred words or less</title>
-      <author><first>Florence</first><last>Reeder</last></author>
+      <author id="florence-reeder"><first>Florence</first><last>Reeder</last></author>
       <url hash="16fee18f">2001.mtsummit-eval.7</url>
       <abstract>This paper reports on research which aims to test the efficacy of applying automated evaluation techniques, originally designed for human second language learners, to machine translation (MT) system evaluation. We believe that such evaluation techniques will provide insight into MT evaluation, MT development, the human translation process and the human language learning process. The experiment described here looks only at the intelligibility of MT output. The evaluation technique is derived from a second language acquisition experiment that showed that assessors can differentiate native from non-native language essays in less than 100 words. Particularly illuminating for our purposes is the set of factor on which the assessors made their decisions. We duplicated this experiment to see if similar criteria could be elicited from duplicating the test using both human and machine translation outputs in the decision set. The encouraging results of this experiment, along with an analysis of language factors contributing to the successful outcomes, is presented here.</abstract>
       <bibkey>reeder-2001-one</bibkey>
     </paper>
     <paper id="8">
       <title>The naming of things and the confusion of tongues: an <fixed-case>MT</fixed-case> metric</title>
-      <author><first>Florence</first><last>Reeder</last></author>
-      <author><first>Keith</first><last>Miller</last></author>
-      <author><first>Jennifer</first><last>Doyon</last></author>
-      <author><first>John</first><last>White</last></author>
+      <author id="florence-reeder"><first>Florence</first><last>Reeder</last></author>
+      <author id="keith-j-miller"><first>Keith</first><last>Miller</last></author>
+      <author id="jennifer-doyon"><first>Jennifer</first><last>Doyon</last></author>
+      <author id="john-s-white"><first>John</first><last>White</last></author>
       <url hash="331e7908">2001.mtsummit-eval.8</url>
       <abstract>This paper reports the results of an experiment in machine translation (MT) evaluation, designed to determine whether easily/rapidly collected metrics can predict the human generated quality parameters of MT output. In this experiment we evaluated a system’s ability to translate named entities, and compared this measure with previous evaluation scores of fidelity and intelligibility. There are two significant benefits potentially associated with a correlation between traditional MT measures and named entity scores: the ability to automate named entity scoring and thus MT scoring; and insights into the linguistic aspects of task-based uses of MT, as captured in previous studies.</abstract>
       <bibkey>reeder-etal-2001-naming</bibkey>
@@ -722,21 +722,21 @@
     <paper id="9">
       <title>Scaling the <fixed-case>ISLE</fixed-case> framework: validating tests of machine translation quality for multi-dimensional measurement</title>
       <author><first>Michelle</first><last>Vanni</last></author>
-      <author><first>Keith J.</first><last>Miller</last></author>
+      <author id="keith-j-miller"><first>Keith J.</first><last>Miller</last></author>
       <url hash="d524e151">2001.mtsummit-eval.9</url>
       <abstract>Work on comparing a set of linguistic test scores for MT output to a set of the same tests’ scores for naturally-occurring target language text (Jones and Rusk 2000) broke new ground in automating MT Evaluation. However, the tests used were selected on an ad hoc basis. In this paper, we report on work to extend our understanding, through refinement and validation, of suitable linguistic tests in the context of our novel approach to MTE. This approach was introduced in Miller and Vanni (2001a) and employs standard, rather than randomly-chosen, tests of MT output quality selected from the ISLE framework as well as a scoring system for predicting the type of information processing task performable with the output. Since the intent is to automate the scoring system, this work can also be viewed as the preliminary steps of algorithm design.</abstract>
       <bibkey>vanni-miller-2001-scaling</bibkey>
     </paper>
     <paper id="10">
       <title>Predicting intelligibility from fidelity in <fixed-case>MT</fixed-case> evaluation</title>
-      <author><first>John</first><last>White</last></author>
+      <author id="john-s-white"><first>John</first><last>White</last></author>
       <url hash="ca0c8458">2001.mtsummit-eval.10</url>
       <abstract>Attempts to formulate methods of automatically evaluating machine translation (MT) have generally looked at some attrinbute of translation and then tried, explicitly or implicitly, to extrapolate the measurement to cover a broader class of attributes. In particular, some studies have focused on measuring fidelity of translation, and inferring intelligibility from that, and others have taken the opposite approach. In this paper we examine the more fundamental question of whether, and to what extent, the one attribute can be predicted by the other. As a starting point we use the 1994 DARPA MT corpus, which has measures for both attributes, and perform a simple comparison of the behavior of each. Two hypotheses about a predictable inference between fidelity and intelligibility are compared with the comparative behavior across all language pairs and all documents in the corpus.</abstract>
       <bibkey>white-2001-predicting</bibkey>
     </paper>
     <paper id="11">
       <title>Predicting <fixed-case>MT</fixed-case> fidelity from noun-compound handling</title>
-      <author><first>John</first><last>White</last></author>
+      <author id="john-s-white"><first>John</first><last>White</last></author>
       <author><first>Monika</first><last>Forner</last></author>
       <url hash="3ab1b692">2001.mtsummit-eval.11</url>
       <abstract>Approaches to the automation of machine translation (MT) evaluation have attempted, or presumed, to connect some rapidly measurable phenomenon with general attributes of the MT output and/or system. In particular, measurements of the fluency of output are often asserted to be predictive of the usefulness of MT output in information-intensive, downstream tasks. The connections between the fluency (“intelligibility”) of translation and its informational adequacy (“fidelity”) are not actually straightforward. This paper discussed a small experiment in isolating a particular contrastive linguistic phenomena common to both French-English and Spanish-English pairs, and attempts to associate that behavior in machine and human translations with known fidelity properties of those translations. Our results show a definite correlative trend.</abstract>
@@ -767,7 +767,7 @@
     </frontmatter>
     <paper id="1">
       <title>Transfer-rule induction for example-based translation</title>
-      <author><first>Ralf D.</first><last>Brown</last></author>
+      <author id="ralf-d-brown"><first>Ralf D.</first><last>Brown</last></author>
       <url hash="b1f569d8">2001.mtsummit-ebmt.1</url>
       <bibkey>brown-2001-transfer</bibkey>
     </paper>
@@ -779,7 +779,7 @@
     </paper>
     <paper id="3">
       <title>Linguistic knowledge and complexity in an <fixed-case>EBMT</fixed-case> system based on translation patterns</title>
-      <author><first>Kevin</first><last>McTait</last></author>
+      <author id="kevin-mctait"><first>Kevin</first><last>McTait</last></author>
       <url hash="1b5481f5">2001.mtsummit-ebmt.3</url>
       <abstract>An approach to Example-Based Machine Translation is presented which operates by extracting translation patterns from a bilingual corpus aligned at the level of the sentence. This is carried out using a language-neutral recursive machine-learning algorithm based on the principle of similar distributions of strings. The translation patterns extracted represent generalisations of sentences that are translations of each other and, to some extent, resemble transfer rules but with fewer constraints. The strings and variables, of which translations patterns are composed, are aligned in order to provide a more refined bilingual knowledge source, necessary for the recombination phase. A non-structural approach based on surface forms is error prone and liable to produce translation patterns that are false translations. Such errors are highlighted and solutions are proposed by the addition of external linguistic resources, namely morphological analysis and part-of-speech tagging. The amount of linguistic resources added has consequences for computational complexity and portability.</abstract>
       <bibkey>mctait-2001-linguistic</bibkey>
@@ -794,14 +794,14 @@
     </paper>
     <paper id="5">
       <title>Beyond translation memories</title>
-      <author><first>Reinhard</first><last>Schäler</last></author>
+      <author id="reinhard-schaler"><first>Reinhard</first><last>Schäler</last></author>
       <url hash="a0d65725">2001.mtsummit-ebmt.5</url>
       <abstract>One key to the success of EBMT is the removal of the boundaries limiting the potential of translation memories. To bring EBMT to fruition, researchers and developers have to go beyond the self-imposed limitations of what is now traditional, in computing terms almost old fashioned, TM technology. Experiments have shown that the probability of finding exact matches at phrase level is higher than the probability of finding exact matches at the current TM segment level. We outline our implementation of a linguistically enhanced translation memory system (or Phrasal Lexicon) implementing phrasal matching. This system takes advantage of the huge and underused resources available in existing translation memories and develops a traditional TM into a sophisticated example-based machine translation engine which when integrated into a hybrid MT solution can yield significant improvements in translation quality.</abstract>
       <bibkey>schaler-2001-beyond</bibkey>
     </paper>
     <paper id="6">
       <title><fixed-case>EBMT</fixed-case> seen as case-based reasoning</title>
-      <author><first>Harold</first><last>Somers</last></author>
+      <author id="harold-somers"><first>Harold</first><last>Somers</last></author>
       <url hash="3977cc4e">2001.mtsummit-ebmt.6</url>
       <abstract>This paper looks at EBMT from the perspective of the Case-based Reasoning (CBR) paradigm. We attempt to describe the task of machine translation (MT) seen as a potential application of CBR, and attempt to describe MT in standard CBR terms. The aim is to see if other applications of CBR can suggest better ways to approach EBMT.</abstract>
       <bibkey>somers-2001-ebmt</bibkey>
@@ -878,14 +878,14 @@
     <paper id="7">
       <title>Discovering machine translation strategies beyond word-for-word translation: a laboratory assignment</title>
       <author><first>Juan Antonio</first><last>Pérez-Ortiz</last></author>
-      <author><first>Mikel L.</first><last>Forcada</last></author>
+      <author id="mikel-l-forcada"><first>Mikel L.</first><last>Forcada</last></author>
       <url hash="f5c2fa23">2001.mtsummit-teach.7</url>
       <abstract>It is a common mispreconception to say that machine translation programs translate word-for-word, but real systems follow strategies which are much more complex. This paper proposes a laboratory assignment to study the way in which some commercial machine translation programs translate whole sentences and how the translation differs from a word-for-word translation. Students are expected to infer some of these extra strategies by observing the outcome of real systems when translating a set of sentences designed on purpose. The assignment also makes students aware of the difficulty of constructing such programs while bringing some technological light into the apparent “magic” of machine translation.</abstract>
       <bibkey>perez-ortiz-forcada-2001-discovering</bibkey>
     </paper>
     <paper id="8">
       <title>Three perspectives on <fixed-case>MT</fixed-case> in the classroom</title>
-      <author><first>Harold</first><last>Somers</last></author>
+      <author id="harold-somers"><first>Harold</first><last>Somers</last></author>
       <url hash="1f1efe84">2001.mtsummit-teach.8</url>
       <abstract>This paper considers the role of translation software, especially Machine Translation (MT), in curricula for students of computational linguistics, for trainee translators and for language learners. These three sets of students have differing needs and interests, although there is some overlap between them. A brief historical view of MT in the classroom is given, including comments on the author’s 25 years of experience in the field. This is followed by discussion and examples of strategies for teaching about MT and related aspects of Language Engineering and Information Technology for the three types of student.</abstract>
       <bibkey>somers-2001-three</bibkey>
@@ -904,7 +904,7 @@
       <address>Santiago de Compostela, Spain</address>
       <month>September 18-22</month>
       <year>2001</year>
-      <editor><first>Steven</first><last>Krauwer</last></editor>
+      <editor id="steven-krauwer"><first>Steven</first><last>Krauwer</last></editor>
       <venue>mtsummit</venue>
     </meta>
     <frontmatter>
@@ -913,14 +913,14 @@
     </frontmatter>
     <paper id="1">
       <title>Four technical and organizational keys to handle more languages and improve quality (on demand) in <fixed-case>MT</fixed-case></title>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <url hash="4e178eb9">2001.mtsummit-road.1</url>
       <abstract>Despite considerable investment over the past 50 years, only a small number of language pairs is covered by MT systems designed for information access, and even fewer are capable of quality translation or speech translation. To open the door toward MT of adequate quality for all languages (at least in principle), we propose four keys. On the technical side, we should (1) dramatically increase the use of learning techniques which have demonstrated their potential at the research level, and (2) use pivot architectures, the most universally usable pivot being UNL. On the organizational side, the keys are (3) the cooperative development of open source linguistic resources on the Web, and (4) the construction of systems where quality can be improved "on demand" by users, either a priori through interactive disambiguation, or a posteriori by correcting the pivot representation through any language, thereby unifying MT, computer-aided authoring, and multilingual generation.</abstract>
       <bibkey>boitet-2001-four</bibkey>
     </paper>
     <paper id="2">
       <title>Towards pragmatics-based machine translation</title>
-      <author><first>David</first><last>Farwell</last></author>
+      <author id="david-farwell"><first>David</first><last>Farwell</last></author>
       <author><first>Stephen</first><last>Helmreich</last></author>
       <url hash="30c6b505">2001.mtsummit-road.2</url>
       <abstract>We propose a program of research which has as its goal establishing a framework and methodology for investigating the pragmatic aspects of the translation process and implementing a computational platform for carrying out systematic experiments on the pragmatics of translation. The program has four components. First, on the basis of a comparative study of multiple translations of the same document into a single target language, a pragmatics-based computational model is to be developed in which reasoning about the beliefs of the participants in the translation task and about the content of a text are central. Second, existing Natural Language Processing technologies are to be appraised as potential components of a computational platform that supports investigations into the effects of pragmatics on translation. Third, the platform is to be assembled and prototype translation systems implemented which conform to the pragmatics-based computational model of translation. Finally, a novel evaluation methodology is to be developed and evaluations of the systems carried out.</abstract>
@@ -944,15 +944,15 @@
     <paper id="5">
       <title>Rethinking interaction: the solution for high-quality <fixed-case>MT</fixed-case>?</title>
       <author><first>Elliott</first><last>Macklovitch</last></author>
-      <author><first>Antonio S.</first><last>Valderrábanos</last></author>
+      <author id="antonio-s-valderrabanos"><first>Antonio S.</first><last>Valderrábanos</last></author>
       <url hash="3e935ed2">2001.mtsummit-road.5</url>
       <abstract>Our focus is on high-quality (HQ) translation, the worldwide demand for which continues to increase exponentially and now far exceeds the capacity of the translation profession to satisfy it. To what extent is MT currently being used to satisfy this growing demand for HQ translation? Quite obviously, very little. Although MT is being used today by more people than ever before, very few of these users are professional translators. This represents a major change, for a mere ten years ago, translators were still the principal target market for most MT vendors. What happened to bring about this change? For that matter, what happened to most of those MT vendors? The view we present is that the most promising strategy for HQ MT is to embed MT systems in translation environments where the translator retains full control over their output. In our opinion, this new type of interactive MT will achieve better acceptance levels among translators and significantly improve the prospects of MT’s commercial success in the translation industry.</abstract>
       <bibkey>macklovitch-valderrabanos-2001-rethinking</bibkey>
     </paper>
     <paper id="6">
       <title>What can machine translation learn from speech recognition?</title>
-      <author><first>Franz Josef</first><last>Och</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="franz-josef-och"><first>Franz Josef</first><last>Och</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <url hash="5324ac40">2001.mtsummit-road.6</url>
       <abstract>The performance of machine translation technology after 50 years of development leaves much to be desired. There is a high demand for well performing and cheap MT systems for many language pairs and domains, which automatically adapt to rapidly changing terminology. We argue that for successful MT systems it will be crucial to apply data-driven methods, especially statistical machine translation. In addition, it will be very important to establish common test environments. This includes the availability of large parallel training corpora, well defined test corpora and standardized evaluation criteria. Thereby research results can be compared and this will open the possibility for more competition in MT research.</abstract>
       <bibkey>och-ney-2001-machine</bibkey>
@@ -960,10 +960,10 @@
     <paper id="7">
       <title>Design and implementation of controlled elicitation for machine translation of low-density languages</title>
       <author><first>Katharina</first><last>Probst</last></author>
-      <author><first>Ralf</first><last>Brown</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
+      <author id="ralf-d-brown"><first>Ralf</first><last>Brown</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
       <author><first>Erik</first><last>Peterson</last></author>
       <url hash="22ce9f9f">2001.mtsummit-road.7</url>
       <abstract>NICE is a machine translation project for low-density languages. We are building a tool that will elicit a controlled corpus from a bilingual speaker who is not an expert in linguistics. The corpus is intended to cover major typological phenomena, as it is designed to work for any language. Using implicational universals, we strive to minimize the number of sentences that each informant has to translate. From the elicited sentences, we learn transfer rules with a version space algorithm. Our vision for MT in the future is one in which systems can be quickly trained for new languages by native speakers, so that speakers of minor languages can participate in education, health care, government, and internet without having to give up their languages.</abstract>
@@ -978,8 +978,8 @@
     </paper>
     <paper id="9">
       <title>Evaluating <fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish translation systems for personal name coverage</title>
-      <author><first>Benjamin K.</first><last>Tsou</last></author>
-      <author><first>Oi Yee</first><last>Kwong</last></author>
+      <author id="benjamin-k-tsou"><first>Benjamin K.</first><last>Tsou</last></author>
+      <author id="olivia-o-y-kwong"><first>Oi Yee</first><last>Kwong</last></author>
       <url hash="e0e39719">2001.mtsummit-road.9</url>
       <abstract>This paper discusses the challenges which Chinese-English machine translation (MT) systems face in translating personal names. We show that the translation of names between Chinese and English is complicated by different factors, including orthographic, phonetic, geographic and social ones. Four existing systems were tested for their capability in translating personal names from Chinese to English. Test data embodying geographic and sociolinguistic differences were obtained from a synchronous Chinese corpus of news media texts. It is obvious that systems vary considerably in their ability to identify personal names in the source language and render them properly in the target language. Given the criticality of personal name translation to the overall intelligibility of a translated text, the coverage of personal names should be one of the important criteria in the evaluation of MT performance. Moreover, name translation, which calls for a hybrid approach, would remain a central issue to the future development of MT systems, especially for online and real-time applications.</abstract>
       <bibkey>tsou-kwong-2001-evaluating</bibkey>
diff --git a/data/xml/2002.amta.xml b/data/xml/2002.amta.xml
index f947756733..d112d92252 100644
--- a/data/xml/2002.amta.xml
+++ b/data/xml/2002.amta.xml
@@ -12,13 +12,13 @@
     </meta>
     <paper id="1">
       <title>Example-based machine translation</title>
-      <author><first>Ralf</first><last>Brown</last></author>
+      <author id="ralf-d-brown"><first>Ralf</first><last>Brown</last></author>
       <url hash="f75a9393">2002.amta-tutorials.1</url>
       <bibkey>brown-2002-example</bibkey>
     </paper>
     <paper id="2">
       <title>The state of the art in language modeling</title>
-      <author><first>Joshua</first><last>Goodman</last></author>
+      <author id="joshua-goodman"><first>Joshua</first><last>Goodman</last></author>
       <url hash="178d123d">2002.amta-tutorials.2</url>
       <bibkey>goodman-2002-state</bibkey>
     </paper>
@@ -35,13 +35,13 @@
     </meta>
     <paper id="1">
       <title>Automatic rule learning for resource-limited <fixed-case>MT</fixed-case></title>
-      <author><first>Jaime</first><last>Carbonell</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
       <author><first>Katharina</first><last>Probst</last></author>
       <author><first>Erik</first><last>Peterson</last></author>
       <author><first>Christian</first><last>Monson</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
-      <author><first>Ralf</first><last>Brown</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
+      <author id="ralf-d-brown"><first>Ralf</first><last>Brown</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
       <pages>1-10</pages>
       <url>https://link.springer.com/chapter/10.1007/3-540-45820-4_1</url>
       <abstract>Machine Translation of minority languages presents unique challenges, including the paucity of bilingual training data and the unavailability of linguistically-trained speakers. This paper focuses on a machine learning approach to transfer-based MT, where data in the form of translations and lexical alignments are elicited from bilingual speakers, and a seeded version-space learning algorithm formulates and refines transfer rules. A rule-generalization lattice is defined based on LFG-style f-structures, permitting generalization operators in the search for the most general rules consistent with the elicited data. The paper presents these methods and illustrates examples.</abstract>
@@ -51,7 +51,7 @@
       <title>Toward a hybrid integrated translation environment</title>
       <author><first>Michael</first><last>Carl</last></author>
       <author><first>Andy</first><last>Way</last></author>
-      <author><first>Reinhard</first><last>Schäler</last></author>
+      <author id="reinhard-schaler"><first>Reinhard</first><last>Schäler</last></author>
       <pages>11-20</pages>
       <url>https://link.springer.com/chapter/10.1007/3-540-45820-4_2</url>
       <abstract>In this paper we present a model for the future use of Machine Translation (MT) and Computer Assisted Translation. In order to accommodate the future needs in middle value translations, we discuss a number of MT techniques and architectures. We anticipate a hybrid environment that integrates data- and rule-driven approaches where translations will be routed through the available translation options and consumers will receive accurate information on the quality, pricing and time implications of their translation choice.</abstract>
@@ -61,7 +61,7 @@
       <title>Adaptive bilingual sentence alignment</title>
       <author><first>Thomas C.</first><last>Chuang</last></author>
       <author><first>G.N.</first><last>You</last></author>
-      <author><first>Jason</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason</first><last>Chang</last></author>
       <pages>21-30</pages>
       <url>https://link.springer.com/chapter/10.1007/3-540-45820-4_3</url>
       <abstract>We present a new approach to the problem of aligning English and Chinese sentences in a bilingual corpus based on adaptive learning. While using length information alone produces surprisingly good results for aligning bilingual French and English sentences with success rates well over 95%, it does not fair as well for the alignment of English and Chinese sentences. The crux of the problem lies in greater variability of lengths and match types of the matched sentences. We propose to cope with such variability via a two-pass scheme under which model parameters can be learned from the data at hand. Experiments show that under the approach bilingual English-Chinese texts can be aligned effectively across diverse domains, genres and translation directions with accuracy rates approaching 99%.</abstract>
@@ -69,7 +69,7 @@
     </paper>
     <paper id="4">
       <title><fixed-case>DUST</fixed-case>er: a method for unraveling cross-language divergences for statistical word-level alignment</title>
-      <author><first>Bonnie</first><last>Dorr</last></author>
+      <author id="bonnie-dorr"><first>Bonnie</first><last>Dorr</last></author>
       <author><first>Lisa</first><last>Pearl</last></author>
       <author><first>Rebecca</first><last>Hwa</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
@@ -81,7 +81,7 @@
     <paper id="5">
       <title>Text prediction with fuzzy alignment</title>
       <author><first>George</first><last>Foster</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <author><first>Guy</first><last>Lapalme</last></author>
       <pages>44-53</pages>
       <url>https://link.springer.com/chapter/10.1007/3-540-45820-4_5</url>
@@ -90,10 +90,10 @@
     </paper>
     <paper id="6">
       <title>Efficient integration of maximum entropy lexicon models within the training of statistical alignment models</title>
-      <author><first>Ismael</first><last>García-Varea</last></author>
-      <author><first>Franz J.</first><last>Och</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="ismael-garcia-varea"><first>Ismael</first><last>García-Varea</last></author>
+      <author id="franz-josef-och"><first>Franz J.</first><last>Och</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <pages>54-63</pages>
       <url>https://link.springer.com/chapter/10.1007/3-540-45820-4_6</url>
       <abstract>Maximum entropy (ME) models have been successfully applied to many natural language problems. In this paper, we show how to integrate ME models efficiently within a maximum likelihood training scheme of statistical machine translation models. Specifically, we define a set of context-dependent ME lexicon models and we present how to perform an efficient training of these ME models within the conventional expectation-maximization (EM) training of statistical translation models. Experimental results are also given in order to demonstrate how these ME models improve the results obtained with the traditional translation models. The results are presented by means of alignment quality comparing the resulting alignments with manually annotated reference alignments.</abstract>
@@ -101,7 +101,7 @@
     </paper>
     <paper id="7">
       <title>Using word formation rules to extend <fixed-case>MT</fixed-case> lexicons</title>
-      <author><first>Claudia</first><last>Gdaniec</last></author>
+      <author id="claudia-gdaniec"><first>Claudia</first><last>Gdaniec</last></author>
       <author><first>Esmé</first><last>Manandise</last></author>
       <pages>64-73</pages>
       <url>https://link.springer.com/chapter/10.1007/3-540-45820-4_7</url>
@@ -121,7 +121,7 @@
     <paper id="9">
       <title>Handling translation divergences: combining statistical and symbolic techniques in generation-heavy machine translation</title>
       <author><first>Nizar</first><last>Habash</last></author>
-      <author><first>Bonnie</first><last>Dorr</last></author>
+      <author id="bonnie-dorr"><first>Bonnie</first><last>Dorr</last></author>
       <pages>84-93</pages>
       <url>https://link.springer.com/chapter/10.1007/3-540-45820-4_9</url>
       <abstract>This paper describes a novel approach to handling translation divergences in a Generation-Heavy Hybrid Machine Translation (GHMT) system. The translation divergence problem is usually reserved for Transfer and Interlingual MT because it requires a large combination of complex lexical and structural mappings. A major requirement of these approaches is the accessibility of large amounts of explicit symmetric knowledge for both source and target languages. This limitation renders Transfer and Interlingual approaches ineffective in the face of structurally-divergent language pairs with asymmetric resources. GHMT addresses the more common form of this problem, source-poor/targetrich, by fully exploiting symbolic and statistical target-language resources. This non-interlingual non-transfer approach is accomplished by using target-language lexical semantics, categorial variations and subcategorization frames to overgenerate multiple lexico-structural variations from a target-glossed syntactic dependency of the source-language sentence. The symbolic overgeneration, which accounts for different possible translation divergences, is constrained by a statistical target-language model.</abstract>
@@ -129,10 +129,10 @@
     </paper>
     <paper id="10">
       <title><fixed-case>K</fixed-case>orean-<fixed-case>C</fixed-case>hinese machine translation based on verb patterns</title>
-      <author><first>Changhyun</first><last>Kim</last></author>
+      <author id="chang-hyun-kim"><first>Changhyun</first><last>Kim</last></author>
       <author><first>Munpyo</first><last>Hong</last></author>
       <author><first>Yinxia</first><last>Huang</last></author>
-      <author><first>Young Kil</first><last>Kim</last></author>
+      <author id="young-gil-kim"><first>Young Kil</first><last>Kim</last></author>
       <author><first>Sung Il</first><last>Yang</last></author>
       <author><first>Young Ae</first><last>Seo</last></author>
       <author><first>Sung-Kwon</first><last>Choi</last></author>
@@ -143,7 +143,7 @@
     </paper>
     <paper id="11">
       <title>Merging example-based and statistical machine translation: an experiment</title>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <author><first>Michel</first><last>Simard</last></author>
       <pages>104-113</pages>
       <url>https://link.springer.com/chapter/10.1007/3-540-45820-4_11</url>
@@ -168,7 +168,7 @@
     </paper>
     <paper id="14">
       <title>Fast and accurate sentence alignment of bilingual corpora</title>
-      <author><first>Robert C.</first><last>Moore</last></author>
+      <author id="robert-c-moore"><first>Robert C.</first><last>Moore</last></author>
       <pages>135-144</pages>
       <url>https://link.springer.com/chapter/10.1007/3-540-45820-4_14</url>
       <abstract>We present a new method for aligning sentences with their translations in a parallel bilingual corpus. Previous approaches have generally been based either on sentence length or word correspondences. Sentence-length-based methods are relatively fast and fairly accurate. Word-correspondence-based methods are generally more accurate but much slower, and usually depend on cognates or a bilingual lexicon. Our method adapts and combines these approaches, achieving high accuracy at a modest computational cost, and requiring no knowledge of the languages or the corpus beyond division into words and sentences.</abstract>
@@ -176,9 +176,9 @@
     </paper>
     <paper id="15">
       <title>Deriving semantic knowledge from descriptive texts using an <fixed-case>MT</fixed-case> system</title>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <author><first>Teruko</first><last>Mitamura</last></author>
-      <author><first>Kathryn</first><last>Baker</last></author>
+      <author id="kathryn-baker"><first>Kathryn</first><last>Baker</last></author>
       <author><first>David</first><last>Svoboda</last></author>
       <author><first>Brian</first><last>Peterson</last></author>
       <author><first>Jennifer</first><last>Williams</last></author>
@@ -230,7 +230,7 @@
     <paper id="1">
       <title>A report on the experiences of implementing an <fixed-case>MT</fixed-case> system for use in a commercial environment</title>
       <author><first>Anthony</first><last>Clarke</last></author>
-      <author><first>Elisabeth</first><last>Maier</last></author>
+      <author id="elisabeth-maier"><first>Elisabeth</first><last>Maier</last></author>
       <author><first>Hans-Udo</first><last>Stadler</last></author>
       <pages>187-194</pages>
       <url>https://link.springer.com/chapter/10.1007/3-540-45820-4_19</url>
@@ -323,10 +323,10 @@
     </paper>
     <paper id="7">
       <title>The <fixed-case>NESPOLE</fixed-case>! speech-to-speech translation system</title>
-      <author><first>Alon</first><last>Lavie</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
-      <author><first>Robert</first><last>Frederking</last></author>
-      <author><first>Fabio</first><last>Pianesi</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
+      <author id="robert-frederking"><first>Robert</first><last>Frederking</last></author>
+      <author id="fabio-pianesi"><first>Fabio</first><last>Pianesi</last></author>
       <pages>240-243</pages>
       <url>https://link.springer.com/chapter/10.1007/3-540-45820-4_28</url>
       <abstract>NESPOLE! is a speech-to-speech machine translation research system designed to provide fully functional speech-to-speech capabilities within real-world settings of common users involved in e-commerce applications. The project is funded jointly by the European Commission and the US NSF. The NESPOLE! system uses a client-server architecture to allow a common user, who is browsing web-pages on the internet, to connect seamlessly in real-time to an agent of the service provider, using a video-conferencing channel and with speech-to-speech translation services mediating the conversation. Shared web pages and annotated images supported via a Whiteboard application are available to enhance the communication.</abstract>
@@ -335,7 +335,7 @@
     <paper id="8">
       <title>The <fixed-case>KANTOO</fixed-case> <fixed-case>MT</fixed-case> sytem: controlled language checker and lexical maintenance tool</title>
       <author><first>Teriuko</first><last>Mitamura</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <author><first>Kathy</first><last>Baker</last></author>
       <author><first>Peter</first><last>Cramer</last></author>
       <author><first>Jeongwoo</first><last>Ko</last></author>
@@ -348,7 +348,7 @@
     </paper>
     <paper id="9">
       <title>Approaches to spoken translation</title>
-      <author><first>Christine A.</first><last>Montgomery</last></author>
+      <author id="christine-a-montgomery"><first>Christine A.</first><last>Montgomery</last></author>
       <author><first>Naicong</first><last>Li</last></author>
       <pages>248-252</pages>
       <url>https://link.springer.com/chapter/10.1007/3-540-45820-4_30</url>
diff --git a/data/xml/2002.eamt.xml b/data/xml/2002.eamt.xml
index c277306641..5dd420575e 100644
--- a/data/xml/2002.eamt.xml
+++ b/data/xml/2002.eamt.xml
@@ -17,7 +17,7 @@
     </paper>
     <paper id="2">
       <title>Teaching <fixed-case>MT</fixed-case> - an <fixed-case>I</fixed-case>ndian pespective</title>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <url hash="010bb7e1">2002.eamt-1.2</url>
       <bibkey>bandyopadhyay-2002-teaching</bibkey>
     </paper>
@@ -35,7 +35,7 @@
     </paper>
     <paper id="5">
       <title>Teaching contrastive linguistics for <fixed-case>MT</fixed-case></title>
-      <author><first>Paul</first><last>Bennett</last></author>
+      <author id="paul-bennett"><first>Paul</first><last>Bennett</last></author>
       <url hash="2b90d91f">2002.eamt-1.5</url>
       <bibkey>bennett-2002-teaching</bibkey>
     </paper>
@@ -53,7 +53,7 @@
     </paper>
     <paper id="8">
       <title>Architectures of “toy” systems for teaching machine translation</title>
-      <author><first>Walther</first><last>v. Hahn</last></author>
+      <author id="walther-von-hahn"><first>Walther</first><last>v. Hahn</last></author>
       <author><first>Cristina</first><last>Vertan</last></author>
       <url hash="720ea5e0">2002.eamt-1.8</url>
       <bibkey>v-hahn-vertan-2002-architectures</bibkey>
@@ -103,7 +103,7 @@
     </paper>
     <paper id="16">
       <title>Explaining real <fixed-case>MT</fixed-case> to translators: between compositional semantics and word-for-word</title>
-      <author><first>Mikel L.</first><last>Forcada</last></author>
+      <author id="mikel-l-forcada"><first>Mikel L.</first><last>Forcada</last></author>
       <url hash="239891e4">2002.eamt-1.16</url>
       <bibkey>forcada-2002-explaining</bibkey>
     </paper>
@@ -115,7 +115,7 @@
     </paper>
     <paper id="18">
       <title>Teaching commercial <fixed-case>MT</fixed-case> to translators: bridging the gap between human and machine</title>
-      <author><first>Natalie</first><last>Kübler</last></author>
+      <author id="natalie-kubler"><first>Natalie</first><last>Kübler</last></author>
       <url hash="7faede75">2002.eamt-1.18</url>
       <bibkey>kubler-2002-teaching</bibkey>
     </paper>
diff --git a/data/xml/2002.jeptalnrecital.xml b/data/xml/2002.jeptalnrecital.xml
index 4771f9a49e..ce9b73dff7 100644
--- a/data/xml/2002.jeptalnrecital.xml
+++ b/data/xml/2002.jeptalnrecital.xml
@@ -50,7 +50,7 @@
     </paper>
     <paper id="2">
       <title>Ressources terminologiques et traduction probabiliste: premiers pas positifs vers un système adaptatif</title>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <pages>43–52</pages>
       <abstract>Cette dernière décennie a été le témoin d’importantes avancées dans le domaine de la traduction statistique (TS). Aucune évaluation fine n’a cependant été proposée pour mesurer l’adéquation de l’approche statistique dans un contexte applicatif réel. Dans cette étude, nous étudions le comportement d’un engin de traduction probabiliste lorsqu’il traduit un texte de nature très éloignée de celle du corpus utilisé lors de l’entraînement. Nous quantifions en particulier la baisse de performance du système et développons l’idée que l’intégration de ressources terminologiques dans le processus est une solution naturelle et salutaire à la traduction. Nous décrivons cette intégration et évaluons son potentiel.</abstract>
       <url hash="c39b9656">2002.jeptalnrecital-long.2</url>
@@ -59,7 +59,7 @@
     </paper>
     <paper id="3">
       <title>Accentuation de mots inconnus : application au thesaurus biomédical <fixed-case>M</fixed-case>e<fixed-case>SH</fixed-case></title>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <author><first>Natalia</first><last>Grabar</last></author>
       <pages>53–62</pages>
       <abstract>Certaines ressources textuelles ou terminologiques sont écrites sans signes diacritiques, ce qui freine leur utilisation pour le traitement automatique des langues. Dans un domaine spécialisé comme la médecine, il est fréquent que les mots rencontrés ne se trouvent pas dans les lexiques électroniques disponibles. Se pose alors la question de l’accentuation de mots inconnus : c’est le sujet de ce travail. Nous proposons deux méthodes d’accentuation de mots inconnus fondées sur un apprentissage par observation des contextes d’occurrence des lettres à accentuer dans un ensemble de mots d’entraînement, l’une adaptée de l’étiquetage morphosyntaxique, l’autre adaptée d’une méthode d’apprentissage de règles morphologiques. Nous présentons des résultats expérimentaux pour la lettre e sur un thesaurus biomédical en français : le MeSH. Ces méthodes obtiennent une précision de 86 à 96 % (+-4 %) pour un rappel allant de 72 à 86 %.</abstract>
@@ -106,7 +106,7 @@
     <paper id="8">
       <title>Extraction d’informations à partir de corpus dégradés</title>
       <author><first>Fabrice</first><last>Even</last></author>
-      <author><first>Chantal</first><last>Enguehard</last></author>
+      <author id="chantal-enguehard"><first>Chantal</first><last>Enguehard</last></author>
       <pages>105–115</pages>
       <abstract>Nous présentons une méthode automatique d’extraction d’information à partir d’un corpus mono-domaine de mauvaise qualité, sur lequel il est impossible d’appliquer les méthodes classiques de traitement de la langue naturelle. Cette approche se fonde sur la construction d’une ontologie semi-formelle (modélisant les informations contenues dans le corpus et les relations entre elles). Notre méthode se déroule en trois phases : 1) la normalisation du corpus, 2) la construction de l’ontologie, et 3) sa formalisation sous la forme d’une grammaire. L’extraction d’information à proprement parler exploite un étiquetage utilisant les règles définies par la grammaire. Nous illustrons notre démarche d’une application sur un corpus bancaire.</abstract>
       <url hash="ae2db300">2002.jeptalnrecital-long.8</url>
@@ -116,7 +116,7 @@
     <paper id="9">
       <title>Identification thématique hiérarchique : Application aux forums de discussions</title>
       <author><first>Brigitte</first><last>Bigi</last></author>
-      <author><first>Kamel</first><last>Smaïli</last></author>
+      <author id="kamel-smaili"><first>Kamel</first><last>Smaïli</last></author>
       <pages>116–125</pages>
       <abstract>Les modèles statistiques du langage ont pour but de donner une représentation statistique de la langue mais souffrent de nombreuses imperfections. Des travaux récents ont montré que ces modèles peuvent être améliorés s’ils peuvent bénéficier de la connaissance du thème traité, afin de s’y adapter. Le thème du document est alors obtenu par un mécanisme d’identification thématique, mais les thèmes ainsi traités sont souvent de granularité différente, c’est pourquoi il nous semble opportun qu’ils soient organisés dans une hiérarchie. Cette structuration des thèmes implique la mise en place de techniques spécifiques d’identification thématique. Cet article propose un modèle statistique à base d’unigrammes pour identifier automatiquement le thème d’un document parmi une arborescence prédéfinie de thèmes possibles. Nous présentons également un critère qui permet au modèle de donner un degré de fiabilité à la décision prise. L’ensemble des expérimentations a été réalisé sur des données extraites du groupe ’fr’ des forums de discussion.</abstract>
       <url hash="2fd5e9f2">2002.jeptalnrecital-long.9</url>
@@ -137,7 +137,7 @@
     <paper id="11">
       <title>Filtrages syntaxiques de co-occurrences pour la représentation vectorielle de documents</title>
       <author><first>Romaric</first><last>Besançon</last></author>
-      <author><first>Martin</first><last>Rajman</last></author>
+      <author id="martin-rajman"><first>Martin</first><last>Rajman</last></author>
       <pages>136–145</pages>
       <abstract>L’intégration de co-occurrences dans les modèles de représentation vectorielle de documents s’est avérée une source d’amélioration de la pertinence des mesures de similarités textuelles calculées dans le cadre de ces modèles (Rajman et al., 2000; Besançon, 2001). Dans cette optique, la définition des contextes pris en compte pour les co-occurrences est cruciale, par son influence sur les performances des modèles à base de co-occurrences. Dans cet article, nous proposons d’étudier deux méthodes de filtrage des co-occurrences fondées sur l’utilisation d’informations syntaxiques supplémentaires. Nous présentons également une évaluation de ces méthodes dans le cadre de la tâche de la recherche documentaire.</abstract>
       <url hash="1bbecc34">2002.jeptalnrecital-long.11</url>
@@ -147,7 +147,7 @@
     <paper id="12">
       <title><fixed-case>WSIM</fixed-case> : une méthode de détection de thème fondée sur la similarité entre mots</title>
       <author><first>Armelle</first><last>Brun</last></author>
-      <author><first>Kamel</first><last>Smaïli</last></author>
+      <author id="kamel-smaili"><first>Kamel</first><last>Smaïli</last></author>
       <author><first>Jean-Paul</first><last>Haton</last></author>
       <pages>146–155</pages>
       <abstract>L’adaptation des modèles de langage dans les systèmes de reconnaissance de la parole est un des enjeux importants de ces dernières années. Elle permet de poursuivre la reconnaissance en utilisant le modèle de langage adéquat : celui correspondant au thème identifié. Dans cet article nous proposons une méthode originale de détection de thème fondée sur des vocabulaires caractéristiques de thèmes et sur la similarité entre mots et thèmes. Cette méthode dépasse la méthode classique (TFIDF) de 14%, ce qui représente un gain important en terme d’identification. Nous montrons également l’intérêt de choisir un vocabulaire adéquat. Notre méthode de détermination des vocabulaires atteint des performances 3 fois supérieures à celles obtenues avec des vocabulaires construits sur la fréquence des mots.</abstract>
@@ -166,7 +166,7 @@
     </paper>
     <paper id="14">
       <title><fixed-case>LOGUS</fixed-case> : un système formel de compréhension du français parlé spontané-présentation et évaluation</title>
-      <author><first>Jeanne</first><last>Villaneau</last></author>
+      <author id="jeanne-villaneau"><first>Jeanne</first><last>Villaneau</last></author>
       <author><first>Jean-Yves</first><last>Antoine</last></author>
       <author><first>Olivier</first><last>Ridoux</last></author>
       <pages>167–176</pages>
@@ -178,7 +178,7 @@
     <paper id="15">
       <title>Etude des relations entre pauses et ponctuations pour la synthèse de la parole à partir de texte</title>
       <author><first>Estelle</first><last>Campione</last></author>
-      <author><first>Jean</first><last>Véronis</last></author>
+      <author id="jean-veronis"><first>Jean</first><last>Véronis</last></author>
       <pages>177–186</pages>
       <abstract>Nous présentons dans cette communication la première étude à grande échelle de la relation entre pauses et ponctuations, à l’aide de l’analyse de plusieurs milliers de pauses dans un corpus comportant près de 5 heures de parole lue en cinq langues, faisant intervenir 50 locuteurs des deux sexes. Nos résultats remettent en cause l’idée reçue de rapports bi-univoques entre pauses et ponctuations. Nous mettons en évidence une proportion importante de pauses hors ponctuation, qui délimitent des constituants, mais aussi un pourcentage élevé de ponctuations faibles réalisées sans pauses. Nous notons également une très grande variabilité inter-locuteur, ainsi que des différences importantes entre langues. Enfin, nous montrons que la durée des pauses est liée au sexe des locuteurs.</abstract>
       <url hash="540eea23">2002.jeptalnrecital-long.15</url>
@@ -216,7 +216,7 @@
     </paper>
     <paper id="19">
       <title>Groupes prépositionnels arguments ou circonstants : vers un repérage automatique en corpus</title>
-      <author><first>Cécile</first><last>Fabre</last></author>
+      <author id="cecile-fabre"><first>Cécile</first><last>Fabre</last></author>
       <author><first>Cécile</first><last>Frérot</last></author>
       <pages>217–226</pages>
       <abstract>Dans cette étude, menée dans le cadre de la réalisation d’un analyseur syntaxique de corpus spécialisés, nous nous intéressons à la question des arguments et circonstants et à leur repérage automatique en corpus. Nous proposons une mesure simple pour distinguer automatiquement, au sein des groupes prépositionnels rattachés au verbe, des types de compléments différents. Nous réalisons cette distinction sur corpus, en mettant en oeuvre une stratégie endogène, et en utilisant deux mesures de productivité : la productivité du recteur verbal vis à vis de la préposition évalue le degré de cohésion entre le verbe et son groupe prépositionnel (GP), tandis que la productivité du régi vis à vis de la préposition permet d’évaluer le degré de cohésion interne du GP. Cet article présente ces deux mesures, commente les données obtenues, et détermine dans quelle mesure cette partition recouvre la distinction traditionnelle entre arguments et circonstants.</abstract>
@@ -226,7 +226,7 @@
     </paper>
     <paper id="20">
       <title>Évaluation des taux de synonymie et de polysémie dans un texte</title>
-      <author><first>Claude</first><last>De Loupy</last></author>
+      <author id="claude-de-loupy"><first>Claude</first><last>De Loupy</last></author>
       <pages>227–236</pages>
       <abstract>La polysémie et la synonymie sont deux aspects fondamentaux de la langue. Nous présentons ici une évaluation de l’importance de ces deux phénomènes à l’aide de statistiques basées sur le lexique WordNet et sur le SemCor. Ainsi, on a un taux de polysémie théorique de 5 sens par mot dans le SemCor. Mais si on regarde les occurrences réelles, moins de 50 % des sens possibles sont utilisés. De même, s’il y a, en moyenne, 2,7 mots possibles pour désigner un concept qui apparaît dans le corpus, plus de la moitié d’entre eux ne sont jamais utilisés. Ces résultats relativisent l’utilité de telles ressources sémantiques pour le traitement de la langue.</abstract>
       <url hash="570f3eec">2002.jeptalnrecital-long.20</url>
@@ -235,7 +235,7 @@
     </paper>
     <paper id="21">
       <title>Acquisition automatique de sens à partir d’opérations morphologiques en français : études de cas</title>
-      <author><first>Fiammetta</first><last>Namer</last></author>
+      <author id="fiammetta-namer"><first>Fiammetta</first><last>Namer</last></author>
       <pages>237–246</pages>
       <abstract>Cet article propose une méthode de codage automatique de traits lexicaux sémantiques en français. Cette approche exploite les relations fixées par l’instruction sémantique d’un opérateur de construction morphologique entre la base et le mot construit. En cela, la réflexion s’inspire des travaux de Marc Light (Light 1996) tout en exploitant le fonctionnement d’un système d’analyse morphologique existant : l’analyseur DériF. A ce jour, l’analyse de 12 types morphologiques conduit à l’étiquetage d’environ 10 % d’un lexique composé de 99000 lemmes. L’article s’achève par la description de deux techniques utilisées pour valider les traits sémantiques.</abstract>
       <url hash="fd3e3584">2002.jeptalnrecital-long.21</url>
@@ -274,7 +274,7 @@
     </paper>
     <paper id="25">
       <title>La coédition langue↔<fixed-case>UNL</fixed-case> pour partager la révision entre les langues d’un document multilingue : un concept unificateur</title>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <author><first>Wang-Ju</first><last>Tsai</last></author>
       <pages>277–288</pages>
       <abstract>La coédition d’un texte en langue naturelle et de sa représentation dans une forme interlingue semble le moyen le meilleur et le plus simple de partager la révision du texte vers plusieurs langues. Pour diverses raisons, les graphes UNL sont les meilleurs candidats dans ce contexte. Nous développons un prototype où, dans le scénario avec partage le plus simple, des utilisateurs “naïfs” interagissent directement avec le texte dans leur langue (L0), et indirectement avec le graphe associé pour corriger les erreurs. Le graphe modifié est ensuite envoyé au déconvertisseur UNL-L0 et le résultat est affiché. S’il est satisfaisant, les erreurs étaient probablement dues au graphe et non au déconvertisseur, et le graphe est envoyé aux déconvertisseurs vers d’autres langues. Les versions dans certaines autres langues connues de l’utilisateur peuvent être affichées, de sorte que le partage de l’amélioration soit visible et encourageant. Comme les nouvelles versions sont ajoutées dans le document multilingue original avec des balises et des attributs appropriés, rien n’est jamais perdu, et le travail coopératif sur un même document est rendu possible. Du côté interne, des liaisons sont établies entre des éléments du texte et du graphe en utilisant des ressources largement disponibles comme un dictionnaire L0-anglais, ou mieux L0-UNL, un analyseur morphosyntaxique de L0, et une transformation canonique de graphe UNL à arbre. On peut établir une “meilleure” correspondance entre “l’arbre-UNL+L0” et la “structure MS-L0”, une treille, en utilisant le dictionnaire et en cherchant à aligner l’arbre et une trajectoire avec aussi peu que possible de croisements de liaisons. Un but central de cette recherche est de fusionner les approches de la TA par pivot, de la TA interactive, et de la génération multilingue de texte.</abstract>
@@ -396,7 +396,7 @@ _MASSY) mis librement à la disposition de la communauté scientifique. Ces deux
     <paper id="7">
       <title>Polynomial Tree Substitution Grammars: Characterization and New Examples</title>
       <author><first>Jean-Cédric</first><last>Chappelier</last></author>
-      <author><first>Martin</first><last>Rajman</last></author>
+      <author id="martin-rajman"><first>Martin</first><last>Rajman</last></author>
       <author><first>Antoine</first><last>Rozenknop</last></author>
       <pages>357–362</pages>
       <abstract>Polynomial Tree Substitution Grammars, a subclass of STSGs for which finding the most probable parse is no longer NP-hard but polynomial, are defined and characterized in terms of general properties on the elementary trees in the grammar. Various sufficient and easy to compute properties for a STSG to be polynomial are presented. The min-max selection principle is shown to be one such sufficient property. In addition, another, new, instance of a sufficient property, based on lexical heads, is presented. The performances of both models are evaluated on several corpora.</abstract>
@@ -444,7 +444,7 @@ _MASSY) mis librement à la disposition de la communauté scientifique. Ces deux
     </paper>
     <paper id="12">
       <title>Compréhension Automatique de la Parole et <fixed-case>TAL</fixed-case> : une approche syntaxico-sémantique pour le traitement des inattendus structuraux du français parlé</title>
-      <author><first>Jérôme</first><last>Goulian</last></author>
+      <author id="jerome-goulian"><first>Jérôme</first><last>Goulian</last></author>
       <author><first>Jean-Yves</first><last>Antoine</last></author>
       <author><first>Franck</first><last>Poirier</last></author>
       <pages>388–393</pages>
@@ -455,7 +455,7 @@ _MASSY) mis librement à la disposition de la communauté scientifique. Ces deux
     </paper>
     <paper id="13">
       <title>Automatic Item Text Generation in Educational Assessment</title>
-      <author><first>Cédrick</first><last>Fairon</last></author>
+      <author id="cedrick-fairon"><first>Cédrick</first><last>Fairon</last></author>
       <author><first>David M.</first><last>Williamson</last></author>
       <pages>394–400</pages>
       <abstract>We present an automatic text generation system (ATG) developed for the generation of natural language text for automatically produced test items. This ATG has been developed to work with an automatic item generation system for analytical reasoning items for use in tests with high-stakes outcomes (such as college admissions decisions). As such, the development and implementation of this ATG is couched in the context and goals of automated item generation for educational assessment.</abstract>
@@ -491,7 +491,7 @@ _MASSY) mis librement à la disposition de la communauté scientifique. Ces deux
     </paper>
     <paper id="2">
       <title>Modélisation des liens lexicaux au moyen des fonctions lexicales</title>
-      <author><first>Alain</first><last>Polguère</last></author>
+      <author id="alain-polguere"><first>Alain</first><last>Polguère</last></author>
       <pages>37–60</pages>
       <abstract>Ce tutoriel est une introduction à la modélisation lexicographique des liens lexicaux au moyen des fonctions lexicales de la théorie Sens-Texte. Il s’agit donc d’examiner un sous-ensemble des tâches effectuées en lexicographie formelle basée sur la lexicologie explicative et combinatoire. Plutôt que de viser l’introduction de toutes les fonctions lexicales identifiées par la théorie Sens- Texte, je vais m’attacher à introduire la notion de fonction lexicale de façon méthodique, en présentant d’abord les notions linguistiques plus générales sur lesquelles elle s’appuie (lexie, prédicat, actant, dérivation sémantique, collocation, etc.). Ce document vise essentiellement à récapituler les définitions des notions linguistiques qui vont être vues dans le tutoriel de façon pratique, par le biais d’exercices à caractère lexicographique.</abstract>
       <url hash="5b7f7439">2002.jeptalnrecital-tutoriel.2</url>
@@ -561,7 +561,7 @@ _MASSY) mis librement à la disposition de la communauté scientifique. Ces deux
     </paper>
     <paper id="4">
       <title>Conceptualisation d’un système d’informations lexicales, une interface paramétrable pour le <fixed-case>T</fixed-case>.<fixed-case>A</fixed-case>.<fixed-case>L</fixed-case></title>
-      <author><first>Djamé</first><last>Seddah</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
       <author><first>Evelyne</first><last>Jacquey</last></author>
       <pages>435–444</pages>
       <abstract>La nécessité de ressources lexicales normalisées et publiques est avérée dans le domaine du TAL. Cet article vise à montrer comment, sur la base d’une partie du lexique MULTEXT disponible sur le serveur ABU, il serait possible de construire une architecture permettant tout à la fois l’accès aux ressources avec des attentes différentes (lemmatiseur, parseur, extraction d’informations, prédiction, etc.) et la mise à jour par un groupe restreint de ces ressources. Cette mise à jour consistant en l’intégration et la modification, automatique ou manuelle, de données existantes. Pour ce faire, nous cherchons à prendre en compte à la fois les besoins et les données accessibles. Ce modèle est évalué conceptuellement dans un premier temps en fonction des systèmes utilisés dans notre équipe : un analyseur TAG, un constructeur de grammaires TAGs, un extracteur d’information.</abstract>
diff --git a/data/xml/2002.tc.xml b/data/xml/2002.tc.xml
index efefb8a0fd..7037505af3 100644
--- a/data/xml/2002.tc.xml
+++ b/data/xml/2002.tc.xml
@@ -17,7 +17,7 @@
     </paper>
     <paper id="2">
       <title><fixed-case>M</fixed-case>eta<fixed-case>M</fixed-case>orpho: A Pattern-Based Machine Translation System</title>
-      <author><first>Gábor</first><last>Prószéky</last></author>
+      <author id="gabor-proszeky"><first>Gábor</first><last>Prószéky</last></author>
       <url hash="48909466">2002.tc-1.2</url>
       <bibkey>proszeky-2002-metamorpho</bibkey>
     </paper>
@@ -29,7 +29,7 @@
     </paper>
     <paper id="4">
       <title>Can Translation Companies Survive the Current Economic Climate?</title>
-      <author><first>Reinhard</first><last>Schäler</last></author>
+      <author id="reinhard-schaler"><first>Reinhard</first><last>Schäler</last></author>
       <url hash="8e72bea8">2002.tc-1.4</url>
       <bibkey>schaler-2002-translation</bibkey>
     </paper>
diff --git a/data/xml/2002.tmi.xml b/data/xml/2002.tmi.xml
index 14a97326f0..b885c38b13 100644
--- a/data/xml/2002.tmi.xml
+++ b/data/xml/2002.tmi.xml
@@ -18,21 +18,21 @@
     </paper>
     <paper id="2">
       <title>Alternation-based lexicon reconstruction</title>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Francis</first><last>Bond</last></author>
       <url hash="dee55b31">2002.tmi-papers.2</url>
       <bibkey>baldwin-bond-2002-alternation</bibkey>
     </paper>
     <paper id="3">
       <title>Corpus-driven splitting of compound words</title>
-      <author><first>Ralf</first><last>Brown</last></author>
+      <author id="ralf-d-brown"><first>Ralf</first><last>Brown</last></author>
       <url hash="f16f7685">2002.tmi-papers.3</url>
       <bibkey>brown-2002-corpus</bibkey>
     </paper>
     <paper id="4">
       <title>Two experiments in situated <fixed-case>MT</fixed-case></title>
-      <author><first>Jim</first><last>Cowie</last></author>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="jim-cowie"><first>Jim</first><last>Cowie</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <url hash="325bf2dc">2002.tmi-papers.4</url>
       <bibkey>cowie-nirenburg-2002-two</bibkey>
     </paper>
@@ -53,14 +53,14 @@
     <paper id="7">
       <title>Incremental construction and maintenance of morphological analysers based on augmented letter transducers</title>
       <author><first>Alicia</first><last>Garrido-Alenda</last></author>
-      <author><first>Mikel L.</first><last>Forcada</last></author>
+      <author id="mikel-l-forcada"><first>Mikel L.</first><last>Forcada</last></author>
       <author><first>Rafael C.</first><last>Carrasco</last></author>
       <url hash="2d056186">2002.tmi-papers.7</url>
       <bibkey>garrido-alenda-etal-2002-incremental</bibkey>
     </paper>
     <paper id="8">
       <title>Extracting semantic classes and morphosyntactic features for <fixed-case>E</fixed-case>nglish-<fixed-case>P</fixed-case>olish machine translation</title>
-      <author><first>Barbara</first><last>Gawronska</last></author>
+      <author id="barbara-gawronska"><first>Barbara</first><last>Gawronska</last></author>
       <author><first>Björn</first><last>Erlendsson</last></author>
       <author><first>Hanna</first><last>Duczak</last></author>
       <url hash="ef6fe81c">2002.tmi-papers.8</url>
@@ -99,11 +99,11 @@
     <paper id="13">
       <title>Pronominal anaphora resolution in the <fixed-case>KANTOO</fixed-case> multilingual machine translation system</title>
       <author><first>Teruko</first><last>Mitamura</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <author><first>Enrique</first><last>Torrejon</last></author>
       <author><first>Dave</first><last>Svoboda</last></author>
       <author><first>Annelen</first><last>Brunner</last></author>
-      <author><first>Kathryn</first><last>Baker</last></author>
+      <author id="kathryn-baker"><first>Kathryn</first><last>Baker</last></author>
       <url hash="e1894136">2002.tmi-papers.13</url>
       <bibkey>mitamura-etal-2002-pronominal</bibkey>
     </paper>
@@ -133,7 +133,7 @@
     <paper id="17">
       <title>Challenges in automated elicitation of a controlled bilingual corpus</title>
       <author><first>Katharina</first><last>Probst</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
       <url hash="9624d9dd">2002.tmi-papers.17</url>
       <bibkey>probst-levin-2002-challenges</bibkey>
     </paper>
@@ -146,8 +146,8 @@
     <paper id="19">
       <title>Rapid adaptive development of semantic analysis grammars</title>
       <author><first>Alicia</first><last>Tribble</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
       <url hash="8d4ca586">2002.tmi-papers.19</url>
       <bibkey>tribble-etal-2002-rapid</bibkey>
     </paper>
@@ -155,7 +155,7 @@
       <title>Statistical machine translation based on hierarchical phrase alignment</title>
       <author><first>Taro</first><last>Watanabe</last></author>
       <author><first>Kenji</first><last>Imamura</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <url hash="efda73c3">2002.tmi-papers.20</url>
       <bibkey>watanabe-etal-2002-statistical-machine</bibkey>
     </paper>
@@ -178,7 +178,7 @@
     </meta>
     <paper id="1">
       <title>Example-based machine translation</title>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Kenji</first><last>Imamura</last></author>
       <attachment type="presentation" hash="2a487f0b">2002.tmi-tutorials.1.Presentation.pdf</attachment>
       <bibkey>sumita-imamura-2002-example</bibkey>
@@ -190,7 +190,7 @@
     </paper>
     <paper id="3">
       <title>Translation memories</title>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <attachment type="presentation" hash="437b88fc">2002.tmi-tutorials.3.Presentation.pdf</attachment>
       <bibkey>baldwin-2002-translation</bibkey>
     </paper>
@@ -216,13 +216,13 @@
     </paper>
     <paper id="3">
       <title>Using multilingual content on the web to build fast finite-state direct translation systems</title>
-      <author><first>Mikel L.</first><last>Forcada</last></author>
+      <author id="mikel-l-forcada"><first>Mikel L.</first><last>Forcada</last></author>
       <url hash="004e5f4c">2002.tmi-tmiw.3</url>
       <bibkey>forcada-2002-using</bibkey>
     </paper>
     <paper id="4">
       <title>Machine translation in the mobile and wearable age</title>
-      <author><first>Nigel</first><last>Ward</last></author>
+      <author id="nigel-ward"><first>Nigel</first><last>Ward</last></author>
       <url hash="be97f8b4">2002.tmi-tmiw.4</url>
       <bibkey>ward-2002-machine</bibkey>
     </paper>
@@ -234,18 +234,18 @@
     </paper>
     <paper id="6">
       <title>What are we celebrating today?</title>
-      <author><first>Harold</first><last>Somers</last></author>
+      <author id="harold-somers"><first>Harold</first><last>Somers</last></author>
       <bibkey>somers-2002-celebrating</bibkey>
     </paper>
     <paper id="7">
       <title>Speech related technologies: Where will the field go in 10 years?</title>
-      <author><first>Niels Ole</first><last>Bernsen</last></author>
+      <author id="niels-ole-bernsen"><first>Niels Ole</first><last>Bernsen</last></author>
       <url hash="38f864fa">2002.tmi-tmiw.7</url>
       <bibkey>bernsen-2002-speech-related</bibkey>
     </paper>
     <paper id="8">
       <title>Towards a road map on human language technology: Natural language processing</title>
-      <author><first>Andreas</first><last>Eisele</last></author>
+      <author id="andreas-eisele"><first>Andreas</first><last>Eisele</last></author>
       <author><first>Dorothea</first><last>Ziegler-Eisele</last></author>
       <url hash="64ddc62f">2002.tmi-tmiw.8</url>
       <bibkey>eisele-ziegler-eisele-2002-towards-road</bibkey>
diff --git a/data/xml/2003.eamt.xml b/data/xml/2003.eamt.xml
index 14723ed2e8..414c371f8a 100644
--- a/data/xml/2003.eamt.xml
+++ b/data/xml/2003.eamt.xml
@@ -30,7 +30,7 @@
     <paper id="4">
       <title>Tuning general translation knowledge to a sublanguage</title>
       <author><first>Michael</first><last>Carl</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <url hash="9041b68d">2003.eamt-1.4</url>
       <bibkey>carl-langlais-2003-tuning</bibkey>
     </paper>
@@ -46,17 +46,17 @@
       <title>Adapting finite-state translation to the <fixed-case>T</fixed-case>rans<fixed-case>T</fixed-case>ype2 project</title>
       <author><first>Elsa</first><last>Cubel</last></author>
       <author><first>Jorge</first><last>González</last></author>
-      <author><first>Antonio</first><last>Lagarda</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
-      <author><first>Alfons</first><last>Juan</last></author>
-      <author><first>Enrique</first><last>Vidal</last></author>
+      <author id="antonio-l-lagarda"><first>Antonio</first><last>Lagarda</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="alfons-juan"><first>Alfons</first><last>Juan</last></author>
+      <author id="enrique-vidal"><first>Enrique</first><last>Vidal</last></author>
       <url hash="577a7b55">2003.eamt-1.6</url>
       <bibkey>cubel-etal-2003-adapting</bibkey>
     </paper>
     <paper id="7">
       <title>Using monolingual corpora for statistical machine translation: the <fixed-case>METIS</fixed-case> system</title>
       <author><first>Yannis</first><last>Dologlou</last></author>
-      <author><first>Stella</first><last>Markantonatou</last></author>
+      <author id="stella-markantonatou"><first>Stella</first><last>Markantonatou</last></author>
       <author><first>George</first><last>Tambouratzis</last></author>
       <author><first>Olga</first><last>Yannoutsou</last></author>
       <author><first>Athanassia</first><last>Fourla</last></author>
@@ -72,7 +72,7 @@
     </paper>
     <paper id="9">
       <title>Multilingual cataloguing of product information of specific domains: case Mkbeem system</title>
-      <author><first>Aarno</first><last>Lehtola</last></author>
+      <author id="aarno-lehtola"><first>Aarno</first><last>Lehtola</last></author>
       <author><first>Jarno</first><last>Tenni</last></author>
       <author><first>Tuula</first><last>Käpylä</last></author>
       <url hash="254dbfec">2003.eamt-1.9</url>
@@ -81,8 +81,8 @@
     <paper id="10">
       <title>Diagnostics for interactive controlled language checking</title>
       <author><first>Teruko</first><last>Mitamura</last></author>
-      <author><first>Kathryn</first><last>Baker</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="kathryn-baker"><first>Kathryn</first><last>Baker</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <author><first>David</first><last>Svoboda</last></author>
       <url hash="377d9e8c">2003.eamt-1.10</url>
       <bibkey>mitamura-etal-2003-diagnostics</bibkey>
@@ -102,8 +102,8 @@
     <paper id="13">
       <title>Multilingual generation of controlled languages</title>
       <author><first>Richard</first><last>Power</last></author>
-      <author><first>Donia</first><last>Scott</last></author>
-      <author><first>Anthony</first><last>Hartley</last></author>
+      <author id="donia-scott"><first>Donia</first><last>Scott</last></author>
+      <author id="anthony-hartley"><first>Anthony</first><last>Hartley</last></author>
       <url hash="36f4ae1a">2003.eamt-1.13</url>
       <bibkey>power-etal-2003-multilingual</bibkey>
     </paper>
@@ -116,7 +116,7 @@
     <paper id="15">
       <title>A specification and validating parser for simplified technical <fixed-case>S</fixed-case>panish</title>
       <author><first>Remedios</first><last>Ruiz Cascales</last></author>
-      <author><first>Richard F. E.</first><last>Sutcliffe</last></author>
+      <author id="richard-f-e-sutcliffe"><first>Richard F. E.</first><last>Sutcliffe</last></author>
       <url hash="6c2deb00">2003.eamt-1.15</url>
       <bibkey>ruiz-cascales-sutcliffe-2003-specification</bibkey>
     </paper>
@@ -144,7 +144,7 @@
     </paper>
     <paper id="19">
       <title>Mind your language! Controlled language for inference purposes</title>
-      <author><first>Jana</first><last>Sukkarieh</last></author>
+      <author id="jana-sukkarieh"><first>Jana</first><last>Sukkarieh</last></author>
       <url hash="005d0a59">2003.eamt-1.19</url>
       <bibkey>sukkarieh-2003-mind</bibkey>
     </paper>
@@ -163,7 +163,7 @@
     <paper id="22">
       <title>Evaluating specifications for controlled <fixed-case>G</fixed-case>reek</title>
       <author><first>Marina</first><last>Vassiliou</last></author>
-      <author><first>Stella</first><last>Markantonatou</last></author>
+      <author id="stella-markantonatou"><first>Stella</first><last>Markantonatou</last></author>
       <author><first>Yanis</first><last>Maistros</last></author>
       <author><first>Vangelis</first><last>Karkaletsis</last></author>
       <url hash="15cac357">2003.eamt-1.22</url>
@@ -172,7 +172,7 @@
     <paper id="23">
       <title>Menu choice translation: a flexible menu-based controlled natural language system</title>
       <author><first>Cristina</first><last>Vertan</last></author>
-      <author><first>Walther</first><last>von Hahn</last></author>
+      <author id="walther-von-hahn"><first>Walther</first><last>von Hahn</last></author>
       <url hash="1ef00b4c">2003.eamt-1.23</url>
       <bibkey>vertan-von-hahn-2003-menu</bibkey>
     </paper>
diff --git a/data/xml/2003.jeptalnrecital.xml b/data/xml/2003.jeptalnrecital.xml
index f854dd29e0..99cfccbd98 100644
--- a/data/xml/2003.jeptalnrecital.xml
+++ b/data/xml/2003.jeptalnrecital.xml
@@ -3,7 +3,7 @@
   <volume id="long" ingest-date="2021-02-05" type="proceedings">
     <meta>
       <booktitle>Actes de la 10ème conférence sur le Traitement Automatique des Langues Naturelles. Articles longs</booktitle>
-      <editor><first>Béatrice</first><last>Daille</last></editor>
+      <editor id="beatrice-daille"><first>Béatrice</first><last>Daille</last></editor>
       <editor><first>Emmanuel</first><last>Morin</last></editor>
       <publisher>ATALA</publisher>
       <address>Batz-sur-Mer, France</address>
@@ -18,8 +18,8 @@
     <paper id="1">
       <title>Quand le <fixed-case>TAL</fixed-case> robuste s’attaque au langage parlé : analyse incrémentale pour la compréhension de la parole spontanée</title>
       <author><first>Jean-Yves</first><last>Antoine</last></author>
-      <author><first>Jérôme</first><last>Goulian</last></author>
-      <author><first>Jeanne</first><last>Villaneau</last></author>
+      <author id="jerome-goulian"><first>Jérôme</first><last>Goulian</last></author>
+      <author id="jeanne-villaneau"><first>Jeanne</first><last>Villaneau</last></author>
       <pages>25–34</pages>
       <abstract>Dans cet article, nous discutons de l’application au langage parlé des techniques d’analyse syntaxique robuste développées pour l’écrit. Nous présentons deux systèmes de compréhension de parole spontané en situation de dialogue homme-machine finalisé, dont les performances montrent la pertinence de ces méthodes pour atteindre une compréhension fine et robuste des énoncés oraux.</abstract>
       <url hash="c7e964ef">2003.jeptalnrecital-long.1</url>
@@ -38,7 +38,7 @@
     <paper id="3">
       <title>Nouvelle approche de la sélection de vocabulaire pour la détection de thème</title>
       <author><first>Armelle</first><last>Brun</last></author>
-      <author><first>Kamel</first><last>Smaïli</last></author>
+      <author id="kamel-smaili"><first>Kamel</first><last>Smaïli</last></author>
       <author><first>Jean-Paul</first><last>Haton</last></author>
       <pages>45–54</pages>
       <abstract>En reconnaissance de la parole, un des moyens d’améliorer les performances des systèmes est de passer par l’adaptation des modèles de langage. Une étape cruciale de ce processus consiste à détecter le thème du document traité et à adapter ensuite le modèle de langage. Dans cet article, nous proposons une nouvelle approche de création des vocabulaires utilisés pour la détection de thème. Cette dernière est fondée sur le développement de vocabulaires spécifiques et caractéristiques des différents thèmes. Nous montrons que cette approche permet non seulement d’améliorer les performances des méthodes, mais exploite également des vocabulaires de taille réduite. De plus, elle permet d’améliorer de façon très significative les performances de méthodes de détection lorsqu’elles sont combinées.</abstract>
@@ -48,7 +48,7 @@
     </paper>
     <paper id="4">
       <title>Classification automatique de textes à partir de leur analyse syntaxico-sémantique</title>
-      <author><first>Jacques</first><last>Chauché</last></author>
+      <author id="jacques-chauche"><first>Jacques</first><last>Chauché</last></author>
       <author><first>Violaine</first><last>Prince</last></author>
       <author><first>Simon</first><last>Jaillet</last></author>
       <author><first>Maguelonne</first><last>Teisseire</last></author>
@@ -69,7 +69,7 @@
     </paper>
     <paper id="6">
       <title>Une plate-forme de conception et d’exploitation d’une grammaire d’arbres adjoints lexicalisés</title>
-      <author><first>Benoît</first><last>Crabbé</last></author>
+      <author id="benoit-crabbe"><first>Benoît</first><last>Crabbé</last></author>
       <author><first>Bertrand</first><last>Gaiffe</last></author>
       <author><first>Azim</first><last>Roussanaly</last></author>
       <pages>75–84</pages>
@@ -81,8 +81,8 @@
     <paper id="7">
       <title>Peut-on trouver la taille de contexte optimale en désambiguïsation sémantique?</title>
       <author><first>Éric</first><last>Crestan</last></author>
-      <author><first>Marc</first><last>El-Bèze</last></author>
-      <author><first>Claude</first><last>De Loupy</last></author>
+      <author id="marc-el-beze"><first>Marc</first><last>El-Bèze</last></author>
+      <author id="claude-de-loupy"><first>Claude</first><last>De Loupy</last></author>
       <pages>85–94</pages>
       <abstract>Dans la tâche de désambiguïsation sémantique, la détermination de la taille optimale de fenêtre de contexte à utiliser, a fait l’objet de plusieurs études. Dans cet article, nous proposons une approche à deux niveaux pour répondre à cette problématique de manière automatique. Trois systèmes concurrents à base d’arbres de classification sémantique sont, dans un premier temps, utilisés pour déterminer les trois sens les plus vraisemblables d’un mot. Ensuite, un système décisionnel tranche entre ces sens au regard d’un contexte plus étendu. Les améliorations constatées lors d’expériences menées sur les données de SENSEVAL-1 et vérifiées sur les données SENSEVAL-2 sont significatives.</abstract>
       <url hash="ffafb85e">2003.jeptalnrecital-long.7</url>
@@ -138,7 +138,7 @@
     <paper id="12">
       <title>Contextual Grammars and Dependency Trees</title>
       <author><first>Radu</first><last>Gramatovici</last></author>
-      <author><first>Carlos</first><last>Martín-Vide</last></author>
+      <author id="carlos-martin-vide"><first>Carlos</first><last>Martín-Vide</last></author>
       <pages>135–144</pages>
       <abstract>A new variant of structured contextual grammar, which generates dependency trees, is introduced. The new generative model, called dependency contextual grammar, improves both the strong and weak generative power of contextual grammars, while being a potential candidate for the mathematical description of dependency-based syntactic models.</abstract>
       <url hash="4cb56ea8">2003.jeptalnrecital-long.12</url>
@@ -167,7 +167,7 @@
     <paper id="15">
       <title>Vers la compréhension automatique de la parole : extraction de concepts par réseaux bayésiens</title>
       <author><first>Salma</first><last>Jamoussi</last></author>
-      <author><first>Kamel</first><last>Smaïli</last></author>
+      <author id="kamel-smaili"><first>Kamel</first><last>Smaïli</last></author>
       <author><first>Jean-Paul</first><last>Haton</last></author>
       <pages>165–174</pages>
       <abstract>La compréhension automatique de la parole peut être considérée comme un problème d’association entre deux langages différents. En entrée, la requête exprimée en langage naturel et en sortie, juste avant l’étape d’interprétation, la même requête exprimée en terme de concepts. Un concept représente un sens bien déterminé. Il est défini par un ensemble de mots partageant les mêmes propriétés sémantiques. Dans cet article, nous proposons une méthode à base de réseau bayésien pour l’extraction automatique des concepts ainsi que trois approches différentes pour la représentation vectorielle des mots. Ces représentations aident un réseau bayésien à regrouper les mots, construisant ainsi la liste adéquate des concepts à partir d’un corpus d’apprentissage. Nous conclurons cet article par la description d’une étape de post-traitement au cours de laquelle, nous étiquetons nos requêtes et nous générons les commandes SQL appropriées validant ainsi, notre approche de compréhension.</abstract>
@@ -195,7 +195,7 @@
     </paper>
     <paper id="18">
       <title>De la traduction probabiliste aux mémoires de traduction (ou l’inverse)</title>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <author><first>Michel</first><last>Simard</last></author>
       <pages>195–204</pages>
       <abstract>En dépit des travaux réalisés cette dernière décennie dans le cadre général de la traduction probabiliste, nous sommes toujours bien loin du jour où un engin de traduction automatique (probabiliste ou pas) sera capable de répondre pleinement aux besoins d’un traducteur professionnel. Dans une étude récente (Langlais, 2002), nous avons montré comment un engin de traduction probabiliste pouvait bénéficier de ressources terminologiques extérieures. Dans cette étude, nous montrons que les techniques de traduction probabiliste peuvent être utilisées pour extraire des informations sous-phrastiques d’une mémoire de traduction. Ces informations peuvent à leur tour s’avérer utiles à un engin de traduction probabiliste. Nous rapportons des résultats sur un corpus de test de taille importante en utilisant la mémoire de traduction d’un concordancier bilingue commercial.</abstract>
@@ -226,7 +226,7 @@
       <title>Apprentissage discriminant pour les Grammaires à Substitution d’Arbres</title>
       <author><first>Antoine</first><last>Rozenknop</last></author>
       <author><first>Jean-Cédric</first><last>Chappelier</last></author>
-      <author><first>Martin</first><last>Rajman</last></author>
+      <author id="martin-rajman"><first>Martin</first><last>Rajman</last></author>
       <pages>225–234</pages>
       <abstract>Les grammaires stochastiques standards utilisent des modèles probabilistes de nature générative, fondés sur des probabilités de récriture conditionnées par le symbole récrit. Les expériences montrent qu’elles tendent ainsi par nature à pénaliser les dérivations les plus longues pour une meme entrée, ce qui n’est pas forcément un comportement souhaitable, ni en analyse syntaxique, ni en reconnaissance de la parole. Dans cet article, nous proposons une approche probabiliste non-générative du modèle STSG (grammaire stochastique à substitution d’arbres), selon laquelle les probabilités sont conditionnées par les feuilles des arbres syntaxiques plutot que par leur racine, et qui par nature fait appel à un apprentissage discriminant. Plusieurs expériences sur ce modèle sont présentées.</abstract>
       <url hash="47642d52">2003.jeptalnrecital-long.21</url>
@@ -248,8 +248,8 @@
       <title><fixed-case>F</fixed-case>rench Amalgam: A machine-learned sentence realization system</title>
       <author><first>Martine</first><last>Smets</last></author>
       <author><first>Michael</first><last>Gamon</last></author>
-      <author><first>Simon</first><last>Corston-Oliver</last></author>
-      <author><first>Eric</first><last>Ringger</last></author>
+      <author id="simon-corston-oliver"><first>Simon</first><last>Corston-Oliver</last></author>
+      <author id="eric-ringger"><first>Eric</first><last>Ringger</last></author>
       <pages>245–254</pages>
       <abstract>This paper presents the French implementation of Amalgam, a machine-learned sentence realization system. It presents in some detail two of the machine-learned models employed in Amalgam and shows how linguistic intuition and knowledge can be combined with statistical techniques to improve the performance of the models.</abstract>
       <url hash="48a6f388">2003.jeptalnrecital-long.23</url>
@@ -266,7 +266,7 @@
     </paper>
     <paper id="25">
       <title>Cartographie lexicale pour la recherche d”information</title>
-      <author><first>Jean</first><last>Véronis</last></author>
+      <author id="jean-veronis"><first>Jean</first><last>Véronis</last></author>
       <pages>265–274</pages>
       <abstract>Nous décrivons un algorithme, HyperLex, de détermination automatique des différents usages d’un mot dans une base textuelle sans utilisation d’un dictionnaire. Cet algorithme basé sur la détection des composantes de forte densité du graphe des cooccurrences de mots permet, contrairement aux méthodes précédemment proposées (vecteurs de mots), d’isoler des usages très peu fréquents. Il est associé à une technique de représentation graphique permettant à l’utilisateur de naviguer de façon visuelle à travers le lexique et d’explorer les différentes thématiques correspondant aux usages discriminés.</abstract>
       <url hash="f5e496b4">2003.jeptalnrecital-long.25</url>
@@ -286,7 +286,7 @@
     </paper>
     <paper id="27">
       <title>Apprentissage de relations morphologiques en corpus</title>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <author><first>Fadila</first><last>Hadouche</last></author>
       <author><first>Natalia</first><last>Grabar</last></author>
       <pages>285–294</pages>
@@ -299,7 +299,7 @@
   <volume id="poster" ingest-date="2021-02-05" type="proceedings">
     <meta>
       <booktitle>Actes de la 10ème conférence sur le Traitement Automatique des Langues Naturelles. Posters</booktitle>
-      <editor><first>Béatrice</first><last>Daille</last></editor>
+      <editor id="beatrice-daille"><first>Béatrice</first><last>Daille</last></editor>
       <editor><first>Emmanuel</first><last>Morin</last></editor>
       <publisher>ATALA</publisher>
       <address>Batz-sur-Mer, France</address>
@@ -343,14 +343,14 @@
     <paper id="4">
       <title>Indexation discursive pour la navigation intradocumentaire : cadres temporels et spatiaux dans l’information géographique</title>
       <author><first>Frédérik</first><last>Bilhaut</last></author>
-      <author><first>Lydia-Mai</first><last>Ho-Dac</last></author>
+      <author id="lydia-mai-ho-dac"><first>Lydia-Mai</first><last>Ho-Dac</last></author>
       <author><first>Andrée</first><last>Borillo</last></author>
       <author><first>Thierry</first><last>Charnois</last></author>
       <author><first>Patrice</first><last>Enjalbert</last></author>
       <author><first>Anne</first><last>Le Draoulec</last></author>
       <author><first>Yann</first><last>Mathet</last></author>
       <author><first>Hélène</first><last>Miguet</last></author>
-      <author><first>Marie-Paule</first><last>Péry-Woodley</last></author>
+      <author id="marie-paule-pery-woodley"><first>Marie-Paule</first><last>Péry-Woodley</last></author>
       <author><first>Laure</first><last>Sarda</last></author>
       <pages>315–320</pages>
       <abstract>Cet article concerne la structuration automatique de documents par des méthodes linguistiques. De telles procédures sont rendues nécessaires par les nouvelles tâches de recherche d’information intradocumentaires (systèmes de questions-réponses, navigation sélective dans des documents...). Nous développons une méthode exploitant la théorie de l’encadrement du discours de Charolles, avec une application visée en recherche d’information dans les documents géographiques - d’où l’intérêt tout particulier porté aux cadres spatiaux et temporels. Nous décrivons une implémentation de la méthode de délimitation de ces cadres et son exploitation pour une tâche d’indexation intratextuelle croisant les critères spatiaux et temporels avec des critères thématiques.</abstract>
@@ -371,7 +371,7 @@
       <title>Identification automatique des valeurs temporelles dans les textes</title>
       <author><first>Marie</first><last>Chagnoux</last></author>
       <author><first>Slim</first><last>Ben Hazez</last></author>
-      <author><first>Jean-Pierre</first><last>Desclés</last></author>
+      <author id="jean-pierre-descles"><first>Jean-Pierre</first><last>Desclés</last></author>
       <pages>327–332</pages>
       <abstract>Cet article présente une application qui associe un certain nombre de valeurs sémantiques à des segments textuels en vue de proposer un traitement automatique de la temporalité dans les textes. Il s’agit d’automatiser une analyse sémantique de surface à l’aide de règles heuristiques d’exploration contextuelle et d’une base organisée de marqueurs linguistiques.</abstract>
       <url hash="9febbb56">2003.jeptalnrecital-poster.6</url>
@@ -380,7 +380,7 @@
     </paper>
     <paper id="7">
       <title>Structuration automatique de preuves mathématiques : de la logique à la rhétorique</title>
-      <author><first>Adil</first><last>El Ghali</last></author>
+      <author id="adil-el-ghali"><first>Adil</first><last>El Ghali</last></author>
       <author><first>Laurent</first><last>Roussarie</last></author>
       <pages>333–338</pages>
       <abstract>Nous présentons dans ses grandes lignes un modèle de structuration de documents pour la génération automatique de preuves mathématiques. Le modèle prend en entrée des sorties d’un prouveur automatique et vise à produire des textes dont le style s’approche le plus possible des démonstrations rédigées par des humains. Cela implique la mise au point d’une stratégie de planification de document capable de s’écarter de la structure purement logique de la preuve. La solution que nous proposons consiste à intégrer de manière simple des informations de type intentionnel afin d’enrichir la structure rhétorique finale du texte.</abstract>
@@ -390,7 +390,7 @@
     </paper>
     <paper id="8">
       <title><fixed-case>C</fixed-case>o<fixed-case>RR</fixed-case>ec<fixed-case>T</fixed-case> : Démarche coopérative pour l’évaluation de systèmes de reconnaissance de termes</title>
-      <author><first>Chantal</first><last>Enguehard</last></author>
+      <author id="chantal-enguehard"><first>Chantal</first><last>Enguehard</last></author>
       <pages>339–346</pages>
       <abstract>La reconnaissance de termes dans les textes intervient dans de nombreux domaines du Traitement Automatique des Langues Naturelles, qu’il s’agisse d’indexation automatique, de traduction, ou d’extraction de connaissances. Nous présentons une méthodologie d’évaluation de Systèmes de Reconnaissance de Termes (SRT) qui vise à minimiser le temps d’expertise des spécialistes en faisant coopérer des SRT. La méthodologie est mise en oeuvre sur des textes en anglais dans le domaine de la chimie des métaux et à l’aide de deux SRT : FASTR et SYRETE. Le banc de test construit selon cette méthodologie a permis de valider les SRT et d’évaluer leurs performances en termes de rappel et de précision.</abstract>
       <url hash="91402c3a">2003.jeptalnrecital-poster.8</url>
@@ -417,9 +417,9 @@
     </paper>
     <paper id="11">
       <title>Bases de connaissances pour asseoir la crédibilité des réponses d’un système de <fixed-case>Q</fixed-case>/<fixed-case>R</fixed-case></title>
-      <author><first>Laurent</first><last>Gillard</last></author>
-      <author><first>Patrice</first><last>Bellot</last></author>
-      <author><first>Marc</first><last>El-Bèze</last></author>
+      <author id="laurent-gillard"><first>Laurent</first><last>Gillard</last></author>
+      <author id="patrice-bellot"><first>Patrice</first><last>Bellot</last></author>
+      <author id="marc-el-beze"><first>Marc</first><last>El-Bèze</last></author>
       <pages>359–364</pages>
       <abstract>Cet article présente un prototype de Question/Réponse (Q/R) impliquant un ensemble de bases de connaissances (BC) dont l’objectif est d’apporter un crédit supplémentaire aux réponses candidates trouvées. Ces BC et leur influence sur la stratégie d’ordonnancement mise en uvre sont décrites dans le cadre de la participation du système à la campagne Q/R de TREC-2002.</abstract>
       <url hash="4f4fb4a3">2003.jeptalnrecital-poster.11</url>
@@ -472,7 +472,7 @@
     </paper>
     <paper id="16">
       <title>Prototypage rapide et évaluation de modèles de dialogue finalisés</title>
-      <author><first>Martin</first><last>Rajman</last></author>
+      <author id="martin-rajman"><first>Martin</first><last>Rajman</last></author>
       <author><first>Andréa</first><last>Rajman</last></author>
       <author><first>Florian</first><last>Seydoux</last></author>
       <author><first>Alex</first><last>Trutnev</last></author>
@@ -485,8 +485,8 @@
     <paper id="17">
       <title>Text Tokenization for Knowledge-free Automatic Extraction of Lexical Similarities</title>
       <author><first>Aristomenis</first><last>Thanopoulos</last></author>
-      <author><first>Nikos</first><last>Fakotakis</last></author>
-      <author><first>George</first><last>Kokkinakis</last></author>
+      <author id="nikos-fakotakis"><first>Nikos</first><last>Fakotakis</last></author>
+      <author id="george-kokkinakis"><first>George</first><last>Kokkinakis</last></author>
       <pages>397–402</pages>
       <abstract>Previous studies on automatic extraction of lexical similarities have considered as semantic unit of text the word. However, the theory of contextual lexical semantics implies that larger segments of text, namely non-compositional multiwords, are more appropriate for this role. We experimentally tested the applicability of this notion applying automatic collocation extraction to identify and merge such multiwords prior to the similarity estimation process. Employing an automatic WordNet-based comparative evaluation scheme along with a manual evaluation procedure, we ascertain improvement of the extracted similarity relations.</abstract>
       <url hash="619f37fc">2003.jeptalnrecital-poster.17</url>
@@ -515,7 +515,7 @@
   <volume id="tutoriel" ingest-date="2021-02-05" type="proceedings">
     <meta>
       <booktitle>Actes de la 10ème conférence sur le Traitement Automatique des Langues Naturelles. Tutoriels</booktitle>
-      <editor><first>Béatrice</first><last>Daille</last></editor>
+      <editor id="beatrice-daille"><first>Béatrice</first><last>Daille</last></editor>
       <editor><first>Emmanuel</first><last>Morin</last></editor>
       <publisher>ATALA</publisher>
       <address>Batz-sur-Mer, France</address>
diff --git a/data/xml/2003.mtsummit.xml b/data/xml/2003.mtsummit.xml
index 9fe08a35de..f89acb3833 100644
--- a/data/xml/2003.mtsummit.xml
+++ b/data/xml/2003.mtsummit.xml
@@ -37,7 +37,7 @@
     </paper>
     <paper id="5">
       <title>Have we found the Holy Grail?</title>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <attachment type="presentation" hash="9dd877bc">2003.mtsummit-plenaries.5.Presentation.pdf</attachment>
       <bibkey>ney-2003-found</bibkey>
     </paper>
@@ -55,7 +55,7 @@
     </paper>
     <paper id="8">
       <title>Holy and unholy grails</title>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <author><first>Deepak</first><last>Ravichandran</last></author>
       <attachment type="presentation" hash="7a5a5abf">2003.mtsummit-plenaries.8.Presentation.pdf</attachment>
       <bibkey>hovy-ravichandran-2003-holy</bibkey>
@@ -72,10 +72,10 @@
     <paper id="1">
       <title>Experimental comparison of <fixed-case>MT</fixed-case> evaluation methods: <fixed-case>RED</fixed-case> vs.<fixed-case>BLEU</fixed-case></title>
       <author><first>Yasuhiro</first><last>Akiba</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Hiromi</first><last>Nakaiwa</last></author>
       <author><first>Seiichi</first><last>Yamamoto</last></author>
-      <author><first>Hiroshi G.</first><last>Okuno</last></author>
+      <author id="hiroshi-g-okuno"><first>Hiroshi G.</first><last>Okuno</last></author>
       <url hash="8e2b4525">2003.mtsummit-papers.1</url>
       <abstract>This paper experimentally compares two automatic evaluators, RED and BLEU, to determine how close the evaluation results of each automatic evaluator are to average evaluation results by human evaluators, following the ATR standard of MT evaluation. This paper gives several cautionary remarks intended to prevent MT developers from drawing misleading conclusions when using the automatic evaluators. In addition, this paper reports a way of using the automatic evaluators so that their results agree with those of human evaluators.</abstract>
       <bibkey>akiba-etal-2003-experimental</bibkey>
@@ -83,7 +83,7 @@
     <paper id="2">
       <title>A hybrid approach to deriving selectional preferences</title>
       <author><first>Arendse</first><last>Bernth</last></author>
-      <author><first>Michael C.</first><last>McCord</last></author>
+      <author id="michael-c-mccord"><first>Michael C.</first><last>McCord</last></author>
       <url hash="062df972">2003.mtsummit-papers.2</url>
       <abstract>A hybrid approach to automatic derivation of class-based selectional preferences is proposed. A lexicon of selectional preferences can assist in handling several forms of ambiguity, a major problem for MT. The approach combines knowledge-rich parsing and lexicons, with statistics and corpus data. We illustrate the use of a selectional preference lexicon for anaphora resolution.</abstract>
       <bibkey>bernth-mccord-2003-hybrid</bibkey>
@@ -98,11 +98,11 @@
     </paper>
     <paper id="4">
       <title>Reducing boundary friction using translation-fragment overlap</title>
-      <author><first>Ralf D.</first><last>Brown</last></author>
+      <author id="ralf-d-brown"><first>Ralf D.</first><last>Brown</last></author>
       <author><first>Rebecca</first><last>Hutchinson</last></author>
-      <author><first>Paul N.</first><last>Bennett</last></author>
-      <author><first>Jaime G.</first><last>Carbonell</last></author>
-      <author><first>Peter</first><last>Jansen</last></author>
+      <author id="paul-bennett"><first>Paul N.</first><last>Bennett</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime G.</first><last>Carbonell</last></author>
+      <author id="peter-jansen"><first>Peter</first><last>Jansen</last></author>
       <url hash="c88ee7cc">2003.mtsummit-papers.4</url>
       <abstract>Many corpus-based Machine Translation (MT) systems generate a number of partial translations which are then pieced together rather than immediately producing one overall translation. While this makes them more robust to ill-formed input, they are subject to disfluencies at phrasal translation boundaries even for well-formed input. We address this “boundary friction” problem by introducing a method that exploits overlapping phrasal translations and the increased confidence in translation accuracy they imply. We specify an efficient algorithm for producing translations using overlap. Finally, our empirical analysis indicates that this approach produces higher quality translations than the standard method of combining non-overlapping fragments generated by our Example-Based MT (EBMT) system in a peak-to-peak comparison.</abstract>
       <bibkey>brown-etal-2003-reducing</bibkey>
@@ -111,7 +111,7 @@
       <title>Communicative strategies and patterns of multimodal integration in a speech-to-speech translation system</title>
       <author><first>Susanne</first><last>Burger</last></author>
       <author><first>Erica</first><last>Costantini</last></author>
-      <author><first>Fabio</first><last>Pianesi</last></author>
+      <author id="fabio-pianesi"><first>Fabio</first><last>Pianesi</last></author>
       <url hash="4dddd84c">2003.mtsummit-papers.5</url>
       <abstract>When multilingual communication through a speech-to-speech translation system is supported by multimodal features, e.g. pen-based gestures, the following issues arise concerning the nature of the supported communication: a) to what extend does multilingual communication differ from ‘ordinary’ monolingual communication with respect to the dialogue structure and the communicative strategies used by participants; b) the patterns of integration between speech and gestures. Building on the outcomes of a previous work, we present results from a study aimed at addressing those issues. The initial findings confirm that multilingual communication, and the way in which it is realized by actual systems (e.g., with or without the push-to-talk mode) affects the form and structure of the conversation.</abstract>
       <bibkey>burger-etal-2003-communicative</bibkey>
@@ -134,7 +134,7 @@
     </paper>
     <paper id="8">
       <title>Combining decision trees and transformation-based learning to correct transferred linguistic representations</title>
-      <author><first>Simon</first><last>Corston-Oliver</last></author>
+      <author id="simon-corston-oliver"><first>Simon</first><last>Corston-Oliver</last></author>
       <author><first>Michael</first><last>Gamon</last></author>
       <url hash="ff8739c1">2003.mtsummit-papers.8</url>
       <abstract>We approach to correcting features in transferred linguistic representations in machine translation. The hybrid approach combines decision trees and transformation-based learning. Decision trees serve as a filter on the intractably large search space of possible interrelations among features. Transformation-based learning results in a simple set of ordered rules that can be compiled and executed after transfer and before sentence realization in the target language. We measure the reduction in noise in the linguistic representations and the results of human evaluations of end-to-end English-German machine translation.</abstract>
@@ -149,7 +149,7 @@
     </paper>
     <paper id="10">
       <title>The limits of n-gram translation evaluation metrics</title>
-      <author><first>Christopher</first><last>Culy</last></author>
+      <author id="chris-culy"><first>Christopher</first><last>Culy</last></author>
       <author><first>Susanne Z.</first><last>Riehemann</last></author>
       <url hash="9ce534f7">2003.mtsummit-papers.10</url>
       <abstract>N-gram measures of translation quality, such as BLEU and the related NIST metric, are becoming increasingly important in machine translation, yet their behaviors are not fully understood. In this paper we examine the performance of these metrics on professional human translations into German of two literary genres, the Bible and Tom Sawyer. The most surprising result is that some machine translations outscore some professional human translations. In addition, it can be difficult to distinguish some other human translations from machine translations with only two reference translations; with four reference translations it is much easier. Our results lead us to conclude that much care must be taken in using n-gram measures in formal evaluations of machine translation quality, though they are still valuable as part of the iterative development cycle.</abstract>
@@ -157,7 +157,7 @@
     </paper>
     <paper id="11">
       <title>A hybrid approach to word order transfer in the <fixed-case>E</fixed-case>nglish-to-<fixed-case>V</fixed-case>ietnamese machine translation</title>
-      <author><first>Dinh</first><last>Dien</last></author>
+      <author id="dinh-dien"><first>Dinh</first><last>Dien</last></author>
       <author><first>Nguyen Luu Thuy</first><last>Ngan</last></author>
       <author><first>Do Xuan</first><last>Quang</last></author>
       <author><first>Van Chi</first><last>Nam</last></author>
@@ -167,9 +167,9 @@
     </paper>
     <paper id="12">
       <title><fixed-case>BTL</fixed-case>: a hybrid model for <fixed-case>E</fixed-case>nglish-<fixed-case>V</fixed-case>ietnamese machine translation</title>
-      <author><first>Dinh</first><last>Dien</last></author>
-      <author><first>Kiem</first><last>Hoang</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="dinh-dien"><first>Dinh</first><last>Dien</last></author>
+      <author id="hoang-kiem"><first>Kiem</first><last>Hoang</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <url hash="b79fdd03">2003.mtsummit-papers.12</url>
       <abstract>Machine Translation (MT) is the most interesting and difficult task which has been posed since the beginning of computer history. The highest difficulty which computers had to face with, is the built-in ambiguity of Natural Languages. Formerly, a lot of human-devised rules have been used to disambiguate those ambiguities. Building such a complete rule-set is time-consuming and labor-intensive task whilst it doesn’t cover all the cases. Besides, when the scale of system increases, it is very difficult to control that rule-set. In this paper, we present a new model of learning-based MT (entitled BTL: Bitext-Transfer Learning) that learns from bilingual corpus to extract disambiguating rules. This model has been experimented in English-to-Vietnamese MT system (EVT) and it gave encouraging results.</abstract>
       <bibkey>dien-etal-2003-btl</bibkey>
@@ -178,15 +178,15 @@
       <title>An algorithm for word-level alignment of parallel dependency trees</title>
       <author><first>Yuan</first><last>Ding</last></author>
       <author><first>Daniel</first><last>Gildea</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <url hash="cbaf3463">2003.mtsummit-papers.13</url>
       <abstract>Structural divergence presents a challenge to the use of syntax in statistical machine translation. We address this problem with a new algorithm for alignment of loosely matched non-isomorphic dependency trees. The algorithm selectively relaxes the constraints of the two tree structures while keeping computational complexity polynomial in the length of the sentences. Experimentation with a large Chinese-English corpus shows an improvement in alignment results over the unstructured models of (Brown et al., 1993).</abstract>
       <bibkey>ding-etal-2003-algorithm</bibkey>
     </paper>
     <paper id="14">
       <title>Effectiveness of automatic extraction of bilingual collocations using recursive chain-link-type learning</title>
-      <author><first>Hiroshi</first><last>Echizen-ya</last></author>
-      <author><first>Kenji</first><last>Araki</last></author>
+      <author id="hiroshi-echizen-ya"><first>Hiroshi</first><last>Echizen-ya</last></author>
+      <author id="kenji-araki"><first>Kenji</first><last>Araki</last></author>
       <author><first>Yoshio</first><last>Momouchi</last></author>
       <author><first>Koji</first><last>Tochinai</last></author>
       <url hash="cd4328e1">2003.mtsummit-papers.14</url>
@@ -196,9 +196,9 @@
       <title>Statistical machine translation: rapid development with limited resources</title>
       <author><first>George</first><last>Foster</last></author>
       <author><first>Simona</first><last>Gandrabur</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <author><first>Pierre</first><last>Plamondon</last></author>
-      <author><first>Graham</first><last>Russell</last></author>
+      <author id="graham-russell"><first>Graham</first><last>Russell</last></author>
       <author><first>Michel</first><last>Simard</last></author>
       <url hash="43d87d09">2003.mtsummit-papers.15</url>
       <abstract>We describe an experiment in rapid development of a statistical machine translation (SMT) system from scratch, using limited resources: under this heading we include not only training data, but also computing power, linguistic knowledge, programming effort, and absolute time.</abstract>
@@ -215,7 +215,7 @@
     <paper id="17">
       <title>Transliteration considering context information based on the maximum entropy method</title>
       <author><first>Isao</first><last>Goto</last></author>
-      <author><first>Naoto</first><last>Kato</last></author>
+      <author id="naoto-kato"><first>Naoto</first><last>Kato</last></author>
       <author><first>Noriyoshi</first><last>Uratani</last></author>
       <author><first>Terumasa</first><last>Ehara</last></author>
       <url hash="991d3769">2003.mtsummit-papers.17</url>
@@ -232,8 +232,8 @@
     </paper>
     <paper id="19">
       <title>Identification of divergence for <fixed-case>E</fixed-case>nglish to <fixed-case>H</fixed-case>indi <fixed-case>EBMT</fixed-case></title>
-      <author><first>Deepa</first><last>Gupta</last></author>
-      <author><first>Niladri</first><last>Chatterjee</last></author>
+      <author id="deepak-gupta"><first>Deepa</first><last>Gupta</last></author>
+      <author id="niladri-chatterjee"><first>Niladri</first><last>Chatterjee</last></author>
       <url hash="ead7184f">2003.mtsummit-papers.19</url>
       <abstract>Divergence is a key aspect of translation between two languages. Divergence occurs when structurally similar sentences of the source language do not translate into sentences that are similar in structures in the target language. Divergence assumes special significance in the domain of Example-Based Machine Translation (EBMT). An EBMT system generates translation of a given sentence by retrieving similar past translation examples from its example base and then adapting them suitably to meet the current translation requirements. Divergence imposes a great challenge to the success of EBMT. The present work provides a technique for identification of divergence without going into the semantic details of the underlying sentences. This identification helps in partitioning the example database into divergence / non-divergence categories, which in turn should facilitate efficient retrieval and adaptation in an EBMT system.</abstract>
       <bibkey>gupta-chatterjee-2003-identification</bibkey>
@@ -247,9 +247,9 @@
     </paper>
     <paper id="21">
       <title>A simple multilingual machine translation system</title>
-      <author><first>Jan</first><last>Hajič</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
       <author><first>Petr</first><last>Homola</last></author>
-      <author><first>Vladislav</first><last>Kuboň</last></author>
+      <author id="vladislav-kubon"><first>Vladislav</first><last>Kuboň</last></author>
       <url hash="14979c06">2003.mtsummit-papers.21</url>
       <abstract>The multilingual machine translation system described in the first part of this paper demonstrates that the translation memory (TM) can be used in a creative way for making the translation process more automatic (in a way which in fact does not depend on the languages used). The MT system is based upon exploitation of syntactic similarities between more or less related natural languages. It currently covers the translation from Czech to Slovak, Polish and Lithuanian. The second part of the paper also shows that one of the most popular TM based commercial systems, TRADOS, can be used not only for the translation itself, but also for a relatively fast and natural method of evaluation of the translation quality of MT systems.</abstract>
       <bibkey>hajic-etal-2003-simple</bibkey>
@@ -273,7 +273,7 @@
     </paper>
     <paper id="24">
       <title>Has machine translation improved? some historical comparisons</title>
-      <author><first>John</first><last>Hutchins</last></author>
+      <author id="w-john-hutchins"><first>John</first><last>Hutchins</last></author>
       <url hash="d9038c10">2003.mtsummit-papers.24</url>
       <abstract>The common assertion that MT systems have improved over the last decades is examined by informal comparisons of translations produced by operational systems in the 1960s, 1970s and 1980s and of translations of the same source texts produced by some currently available commercial and online systems. The scarcity of source and target texts for earlier systems means that the conclusions are consequently tentative and preliminary.</abstract>
       <bibkey>hutchins-2003-machine</bibkey>
@@ -289,7 +289,7 @@
       <title>Lexical knowledge representation with contextonyms</title>
       <author><first>Hyungsuk</first><last>Ji</last></author>
       <author><first>Sabine</first><last>Ploux</last></author>
-      <author><first>Eric</first><last>Wehrli</last></author>
+      <author id="eric-wehrli"><first>Eric</first><last>Wehrli</last></author>
       <url hash="ac5399dd">2003.mtsummit-papers.26</url>
       <abstract>Inter-word associations like stagger - drunken, or intra-word sense divisions (e.g. write a diary vs. write an article) are difficult to compile using a traditional lexicographic approach. As an alternative, we present a model that reflects this kind of subtle lexical knowledge. Based on the minimal sense of a word (clique), the model (1) selects contextually related words (contexonyms) and (2) classifies them in a multi-dimensional semantic space. Trained on very large corpora, the model provides relevant, organized contexonyms that reflect the fine-grained connotations and contextual usage of the target word, as well as the distinct senses of homonyms and polysemous words. Further study on the neighbor effect showed that the model can handle the data sparseness problem.</abstract>
       <bibkey>ji-etal-2003-lexical</bibkey>
@@ -306,14 +306,14 @@
       <title>Acquisition of bilingual <fixed-case>MT</fixed-case> lexicons from <fixed-case>OCR</fixed-case>ed dictionaries</title>
       <author><first>Burcu</first><last>Karagol-Ayan</last></author>
       <author><first>David</first><last>Doermann</last></author>
-      <author><first>Bonnie J.</first><last>Dorr</last></author>
+      <author id="bonnie-dorr"><first>Bonnie J.</first><last>Dorr</last></author>
       <url hash="a4712b58">2003.mtsummit-papers.28</url>
       <abstract>This paper describes an approach to analyzing the lexical structure of OCRed bilingual dictionaries to construct resources suited for machine translation of low-density languages, where online resources are limited. A rule-based, an HMM-based, and a post-processed HMM-based method are used for rapid construction of MT lexicons based on systematic structural clues provided in the original dictionary. We evaluate the effectiveness of our techniques, concluding that: (1) the rule-based method performs better with dictionaries where the font is not an important distinguishing feature for determining information types; (2) the post-processed stochastic method improves the results of the stochastic method for phrasal entries; and (3) Our resulting bilingual lexicons are comprehensive enough to provide the basis for reasonable translation results when compared to human translations.</abstract>
       <bibkey>karagol-ayan-etal-2003-acquisition</bibkey>
     </paper>
     <paper id="29">
       <title>Building a parallel corpus for monologues with clause alignment</title>
-      <author><first>Hideki</first><last>Kashioka</last></author>
+      <author id="hideki-kashioka"><first>Hideki</first><last>Kashioka</last></author>
       <author><first>Takehiko</first><last>Maruyama</last></author>
       <author><first>Hideki</first><last>Tanaka</last></author>
       <url hash="69036496">2003.mtsummit-papers.29</url>
@@ -323,8 +323,8 @@
     <paper id="30">
       <title><fixed-case>FEMTI</fixed-case>: creating and using a framework for <fixed-case>MT</fixed-case> evaluation</title>
       <author><first>Margaret</first><last>King</last></author>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <url hash="0bcc1d59">2003.mtsummit-papers.30</url>
       <abstract>This paper presents FEMTI, a web-based Framework for the Evaluation of Machine Translation in ISLE. FEMTI offers structured descriptions of potential user needs, linked to an overview of technical characteristics of MT systems. The description of possible systems is mainly articulated around the quality characteristics for software product set out in ISO/IEC standard 9126. Following the philosophy set out there and in the related 14598 series of standards, each quality characteristic bottoms out in metrics which may be applied to a particular instance of a system in order to judge how satisfactory the system is with respect to that characteristic. An evaluator can use the description of user needs to help identify the specific needs of his evaluation and the relations between them. He can then follow the pointers to system description to determine what metrics should be applied and how. In the current state of the framework, emphasis is on being exhaustive, including as much as possible of the information available in the literature on machine translation evaluation. Future work will aim at being more analytic, looking at characteristics and metrics to see how they relate to one another, validating metrics and investigating the correlation between particular metrics and human judgement.</abstract>
       <bibkey>king-etal-2003-femti</bibkey>
@@ -341,14 +341,14 @@
       <title>A novel string-to-string distance measure with applications to machine translation evaluation</title>
       <author><first>Gregor</first><last>Leusch</last></author>
       <author><first>Nicola</first><last>Ueffing</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <url hash="286e36f7">2003.mtsummit-papers.32</url>
       <abstract>We introduce a string-to-string distance measure which extends the edit distance by block transpositions as constant cost edit operation. An algorithm for the calculation of this distance measure in polynomial time is presented. We then demonstrate how this distance measure can be used as an evaluation criterion in machine translation. The correlation between this evaluation criterion and human judgment is systematically compared with that of other automatic evaluation measures on two translation tasks. In general, like other automatic evaluation measures, the criterion shows low correlation at sentence level, but good correlation at system level.</abstract>
       <bibkey>leusch-etal-2003-novel</bibkey>
     </paper>
     <paper id="33">
       <title>Scalability in <fixed-case>MT</fixed-case> systems</title>
-      <author><first>Elisabeth</first><last>Maier</last></author>
+      <author id="elisabeth-maier"><first>Elisabeth</first><last>Maier</last></author>
       <author><first>Anthony</first><last>Clarke</last></author>
       <url hash="57ec4f65">2003.mtsummit-papers.33</url>
       <abstract>In this paper we show why scalability is one of the most important aspects for the evaluation of Machine Translation (MT) systems and what scalability entails in the framework of MT. We illustrate the issue of scalability by reporting about an MT solution, which has been chosen in the course of a thorough hands-on evaluation and which in the meantime has been developed from a pilot system to a MT turnkey solution for mid-to large-scale enterprises.</abstract>
@@ -357,9 +357,9 @@
     <paper id="34">
       <title>Source language diagnostics for <fixed-case>MT</fixed-case></title>
       <author><first>Teruko</first><last>Mitamura</last></author>
-      <author><first>Kathryn</first><last>Baker</last></author>
+      <author id="kathryn-baker"><first>Kathryn</first><last>Baker</last></author>
       <author><first>David</first><last>Svoboda</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <url hash="52af86df">2003.mtsummit-papers.34</url>
       <abstract>This paper presents a source language diagnostic system for controlled translation. Diagnostics were designed and implemented to address the most difficult rewrites for authors, based on an empirical analysis of log files containing over 180,000 sentences. The design and implementation of the diagnostic system are presented, along with experimental results from an empirical evaluation of the completed system. We found that the diagnostic system can correctly identify the problem in 90.2% of the cases. In addition, depending on the type of grammar problem, the diagnostic system may offer a rewritten sentence. We found that 89.4% of the rewritten sentences were correctly rewritten. The results suggest that these methods could be used as the basis for an automatic rewriting system in the future.</abstract>
       <bibkey>mitamura-etal-2003-source</bibkey>
@@ -380,8 +380,8 @@
     </paper>
     <paper id="37">
       <title>Rapid-response machine translation for unexpected languages</title>
-      <author><first>Douglas W.</first><last>Oard</last></author>
-      <author><first>Franz Josef</first><last>Och</last></author>
+      <author id="douglas-w-oard"><first>Douglas W.</first><last>Oard</last></author>
+      <author id="franz-josef-och"><first>Franz Josef</first><last>Och</last></author>
       <url hash="5e9a30eb">2003.mtsummit-papers.37</url>
       <abstract>Statistical techniques for machine translation offer promise for rapid development in response to unexpected requirements, but realizing that potential requires rapid acquisition of required resources as well. This paper reports the results of experiments with resources collected in ten days; about 1.3 million words of parallel text from five types of sources and a bilingual term list with about 20,000 term pairs. Systems were trained with resources individually and in combination, using an approach based on alignment templates. The use of all available resources was found to yield the best results in an automatic evaluation using the BLEU measure, but a single resource (the Bible) coupled with a small amount of in-domain manual translation (less than 6,000 words) achieved more than 85% of that upper baseline. With a concerted effort, such a system could be built in a single day.</abstract>
       <bibkey>oard-och-2003-rapid</bibkey>
@@ -404,9 +404,9 @@
     <paper id="40">
       <title>On the use of statistical machine-translation techniques within a memory-based translation system (<fixed-case>AMETRA</fixed-case>)</title>
       <author><first>Daniel</first><last>Ortíz</last></author>
-      <author><first>Ismael</first><last>García-Varea</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
-      <author><first>Antonio</first><last>Lagarda</last></author>
+      <author id="ismael-garcia-varea"><first>Ismael</first><last>García-Varea</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="antonio-l-lagarda"><first>Antonio</first><last>Lagarda</last></author>
       <author><first>Jorge</first><last>González</last></author>
       <url hash="84d35ac1">2003.mtsummit-papers.40</url>
       <abstract>The goal of the AMETRA project is to make a computer-assisted translation tool from the Spanish language to the Basque language under the memory-based translation framework. The system is based on a large collection of bilingual word-segments. These segments are obtained using linguistic or statistical techniques from a Spanish-Basque bilingual corpus consisting of sentences extracted from the Basque Country’s of£cial government record. One of the tasks within the global information document of the AMETRA project is to study the combination of well-known statistical techniques for the translation of short sequences and techniques for memory-based translation. In this paper, we address the problem of constructing a statistical module to deal with the task of translating segments. The task undertaken in the AMETRA project is compared with other existing translation tasks, This study includes the results of some preliminary experiments we have carried out using well-known statistical machine translation tools and techniques.</abstract>
@@ -414,7 +414,7 @@
     </paper>
     <paper id="41">
       <title>An experiment in comparative evaluation: humans vs. computers</title>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <url hash="c676cd30">2003.mtsummit-papers.41</url>
       <abstract>This paper reports results from an experiment that was aimed at comparing evaluation metrics for machine translation. Implemented as a workshop at a major conference in 2002, the experiment defined an evaluation task, description of the metrics, as well as test data consisting of human and machine translations of two texts. Several metrics, either applicable by human judges or automated, were used, and the overall results were analyzed. It appeared that most human metrics and automated metrics provided in general consistent rankings of the various candidate translations; the ranking of the human translations matched the one provided by translation professionals; and human translations were distinguished from machine translations.</abstract>
       <bibkey>popescu-belis-2003-experiment</bibkey>
@@ -432,7 +432,7 @@
       <author><first>Munpyo</first><last>Hong</last></author>
       <author><first>Sung-Kwon</first><last>Choi</last></author>
       <author><first>Ki-Young</first><last>Lee</last></author>
-      <author><first>Sang-Kyu</first><last>Park</last></author>
+      <author id="sang-kyu-park"><first>Sang-Kyu</first><last>Park</last></author>
       <url hash="933fae30">2003.mtsummit-papers.43</url>
       <abstract>This paper describes a sentence pattern-based English-Korean machine translation system backed up by a rule-based module as a solution to the translation of long sentences. A rule-based English-Korean MT system typically suffers from low translation accuracy for long sentences due to poor parsing performance. In the proposed method we only use chunking information on the phrase-level of the parse result (i.e. NP, PP, and AP). By applying a sentence pattern directly to a chunking result, the high performance of analysis and a good quality of translation are expected. The parsing efficiency problem in the traditional RBMT approach is resolved by sentence partitioning, which is generally assumed to have many problems. However, we will show that the sentence partitioning has little side effect, if any, in our approach, because we use only the chunking results for the transfer. The coverage problem of a pattern-based method is overcome by applying sentence pattern matching recursively to the sub-sentences of the input sentence, in case there is no exact matching pattern to the input sentence.</abstract>
       <bibkey>roh-etal-2003-proper</bibkey>
@@ -449,8 +449,8 @@
     <paper id="45">
       <title><fixed-case>SYSTRAN</fixed-case> new generation: the <fixed-case>XML</fixed-case> translation workflow</title>
       <author><first>Jean</first><last>Senellart</last></author>
-      <author><first>Christian</first><last>Boitet</last></author>
-      <author><first>Laurent</first><last>Romary</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
       <url hash="0bc02184">2003.mtsummit-papers.45</url>
       <abstract>Customization of Machine Translation (MT) is a prerequisite for corporations to adopt the technology. It is therefore important but nonetheless challenging. Ongoing implementation proves that XML is an excellent exchange device between MT modules that efficiently enables interaction between the user and the processes to reach highly granulated structure-based customization. Accomplished through an innovative approach called the SYSTRAN Translation Stylesheet, this method is coherent with the current evolution of the “authoring process”. As a natural progression, the next stage in the customization process is the integration of MT in a multilingual tool kit designed for the “authoring process”.</abstract>
       <bibkey>senellart-etal-2003-systran</bibkey>
@@ -466,9 +466,9 @@
     </paper>
     <paper id="47">
       <title>Example-based rough translation for speech-to-speech translation</title>
-      <author><first>Mitsuo</first><last>Shimohata</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="mitsuo-shimohata"><first>Mitsuo</first><last>Shimohata</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <url hash="94a9cd60">2003.mtsummit-papers.47</url>
       <abstract>Example-based machine translation (EBMT) is a promising translation method for speech-to-speech translation (S2ST) because of its robustness. However, it has two problems in that the performance degrades when input sentences are long and when the style of the input sentences and that of the example corpus are different. This paper proposes example-based rough translation to overcome these two problems. The rough translation method relies on “meaning-equivalent sentences,” which share the main meaning with an input sentence despite missing some unimportant information. This method facilitates retrieval of meaning-equivalent sentences for long input sentences. The retrieval of meaning-equivalent sentences is based on content words, modality, and tense. This method also provides robustness against the style differences between the input sentence and the example corpus.</abstract>
       <bibkey>shimohata-etal-2003-example</bibkey>
@@ -486,7 +486,7 @@
     </paper>
     <paper id="49">
       <title>Evaluating commercial spoken language translation software</title>
-      <author><first>Harold</first><last>Somers</last></author>
+      <author id="harold-somers"><first>Harold</first><last>Somers</last></author>
       <author><first>Yuri</first><last>Sugita</last></author>
       <url hash="6b88840c">2003.mtsummit-papers.49</url>
       <abstract>While spoken language translation remains a research goal, a crude form of it is widely available commercially for Japanese–English as a pipeline concatenation of speech-to-text recognition (SR), text-to-text translation (MT) and text-to-speech synthesis (SS). This paper proposes and illustrates an evaluation methodology for this noisy channel which tries to quantify the relative amount of degradation in translation quality due to each of the contributing modules. A small pilot experiment involving word-accuracy rate for the SR, and a fidelity evaluation for the MT and SS modules is proposed in which subjects are asked to paraphrase translated and/or synthesised sentences from a tourist’s phrasebook. Results show (as expected) that MT is the “noisiest” channel, with SS contributing least noise. The concatenation of the three channels is worse than could be predicted from the performance of each as individual tasks.</abstract>
@@ -495,14 +495,14 @@
     <paper id="50">
       <title>Translation selection for <fixed-case>J</fixed-case>apanese-<fixed-case>E</fixed-case>nglish noun-noun compounds</title>
       <author><first>Takaaki</first><last>Tanaka</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <url hash="0333f0c7">2003.mtsummit-papers.50</url>
       <abstract>We present a method for compositionally translating Japanese NN compounds into English, using a word-level transfer dictionary and target language monolingual corpus. The method interpolates over fully-specified and partial translation data, based on corpus evidence. In evaluation, we demonstrate that interpolation over the two data types is superior to using either one, and show that our method performs at an F-score of 0.68 over translation-aligned inputs and 0.66 over a random sample of 500 NN compounds.</abstract>
       <bibkey>tanaka-baldwin-2003-translation</bibkey>
     </paper>
     <paper id="51">
       <title>Evaluation of machine translation and its evaluation</title>
-      <author><first>Joseph P.</first><last>Turian</last></author>
+      <author id="joseph-turian"><first>Joseph P.</first><last>Turian</last></author>
       <author><first>Luke</first><last>Shen</last></author>
       <author><first>I. Dan</first><last>Melamed</last></author>
       <url hash="c77173e1">2003.mtsummit-papers.51</url>
@@ -513,20 +513,20 @@
       <title>Confidence measures for statistical machine translation</title>
       <author><first>Nicola</first><last>Ueffing</last></author>
       <author><first>Klaus</first><last>Macherey</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <url hash="a590c438">2003.mtsummit-papers.52</url>
       <abstract>In this paper, we present several confidence measures for (statistical) machine translation. We introduce word posterior probabilities for words in the target sentence that can be determined either on a word graph or on an N best list. Two alternative confidence measures that can be calculated on N best lists are proposed. The performance of the measures is evaluated on two different translation tasks: on spontaneously spoken dialogues from the domain of appointment scheduling, and on a collection of technical manuals.</abstract>
       <bibkey>ueffing-etal-2003-confidence</bibkey>
     </paper>
     <paper id="53">
       <title>The <fixed-case>CMU</fixed-case> statistical machine translation system</title>
-      <author><first>Stephan</first><last>Vogel</last></author>
-      <author><first>Ying</first><last>Zhang</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
+      <author id="ying-zhang"><first>Ying</first><last>Zhang</last></author>
       <author><first>Fei</first><last>Huang</last></author>
       <author><first>Alicia</first><last>Tribble</last></author>
       <author><first>Ashish</first><last>Venugopal</last></author>
       <author><first>Bing</first><last>Zhao</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <url hash="c6865c22">2003.mtsummit-papers.53</url>
       <abstract>In this paper we describe the components of our statistical machine translation system. This system combines phrase-to-phrase translations extracted from a bilingual corpus using different alignment approaches. Special methods to extract and align named entities are used. We show how a manual lexicon can be incorporated into the statistical system in an optimized way. Experiments on Chinese-to-English and Arabic-to-English translation tasks are presented.</abstract>
       <bibkey>vogel-etal-2003-cmu</bibkey>
@@ -534,7 +534,7 @@
     <paper id="54">
       <title>Example-based decoding for statistical machine translation</title>
       <author><first>Taro</first><last>Watanabe</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <url hash="416513a3">2003.mtsummit-papers.54</url>
       <abstract>This paper presents a decoder for statistical machine translation that can take advantage of the example-based machine translation framework. The decoder presented here is based on the greedy approach to the decoding problem, but the search is initiated from a similar translation extracted from a bilingual corpus. The experiments on multilingual translations showed that the proposed method was far superior to a word-by-word generation beam search algorithm.</abstract>
       <bibkey>watanabe-sumita-2003-example</bibkey>
@@ -558,7 +558,7 @@
     </paper>
     <paper id="57">
       <title>Customizing complex lexical entries for high-quality <fixed-case>MT</fixed-case></title>
-      <author><first>Rémi</first><last>Zajac</last></author>
+      <author id="remi-zajac"><first>Rémi</first><last>Zajac</last></author>
       <author><first>Elke</first><last>Lange</last></author>
       <author><first>Jin</first><last>Yang</last></author>
       <url hash="d98bcc45">2003.mtsummit-papers.57</url>
@@ -628,7 +628,7 @@
     <paper id="7">
       <title>A multi-language translation example browser</title>
       <author><first>Isao</first><last>Goto</last></author>
-      <author><first>Naoto</first><last>Kato</last></author>
+      <author id="naoto-kato"><first>Naoto</first><last>Kato</last></author>
       <author><first>Noriyoshi</first><last>Uratani</last></author>
       <author><first>Terumasa</first><last>Ehara</last></author>
       <author><first>Tadashi</first><last>Kumano</last></author>
@@ -647,7 +647,7 @@
     <paper id="9">
       <title><fixed-case>C</fixed-case>at<fixed-case>V</fixed-case>ar: a database of categorial variations for <fixed-case>E</fixed-case>nglish</title>
       <author><first>Nizar</first><last>Habash</last></author>
-      <author><first>Bonnie</first><last>Dorr</last></author>
+      <author id="bonnie-dorr"><first>Bonnie</first><last>Dorr</last></author>
       <url hash="5f2365b1">2003.mtsummit-systems.9</url>
       <abstract>We present a new large-scale database called “CatVar” (Habash and Dorr, 2003) which contains categorial variations of English lexemes. Due to the prevalence of cross-language categorial variation in multilingual applications, our categorial-variation resource may serve as an integral part of a diverse range of natural language applications. Thus, the research reported herein overlaps heavily with that of the machine-translation, lexicon-construction, and information-retrieval communities. We demonstrate this database, embedded in a graphical interface; we also show a GUI for user input of corrections to the database.</abstract>
       <bibkey>habash-dorr-2003-catvar</bibkey>
@@ -682,11 +682,11 @@
     </paper>
     <paper id="13">
       <title>An integrated system for source language checking, analysis and term management</title>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <author><first>Teruko</first><last>Mitamura</last></author>
       <author><first>David</first><last>Svoboda</last></author>
       <author><first>Jeongwoo</first><last>Ko</last></author>
-      <author><first>Kathryn</first><last>Baker</last></author>
+      <author id="kathryn-baker"><first>Kathryn</first><last>Baker</last></author>
       <author><first>Jeffrey</first><last>Micher</last></author>
       <url hash="dd9661e8">2003.mtsummit-systems.13</url>
       <abstract>This paper presents an overview of the tools provided by KANTOO MT system for controlled source language checking, source text analysis, and terminology management. The steps in each process are described, and screen images are provided to illustrate the system architecture and example tool interfaces.</abstract>
@@ -694,11 +694,11 @@
     </paper>
     <paper id="14">
       <title><fixed-case>MATS</fixed-case> – a glass box machine translation system</title>
-      <author><first>Anna</first><last>Sågvall Hein</last></author>
+      <author id="anna-sagvall-hein"><first>Anna</first><last>Sågvall Hein</last></author>
       <author><first>Eva</first><last>Forsbom</last></author>
       <author><first>Per</first><last>Weijnitz</last></author>
       <author><first>Ebba</first><last>Gustavii</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <url hash="20321556">2003.mtsummit-systems.14</url>
       <bibkey>sagvall-hein-etal-2003-mats</bibkey>
     </paper>
@@ -712,7 +712,7 @@
     </paper>
     <paper id="16">
       <title><fixed-case>T</fixed-case>rans<fixed-case>T</fixed-case>ype2 - a new paradigm for translation automation</title>
-      <author><first>Antonio S.</first><last>Valderrábanos</last></author>
+      <author id="antonio-s-valderrabanos"><first>Antonio S.</first><last>Valderrábanos</last></author>
       <author><first>José</first><last>Esteban</last></author>
       <author><first>Luis</first><last>Iraola</last></author>
       <url hash="817552a9">2003.mtsummit-systems.16</url>
@@ -721,7 +721,7 @@
     </paper>
     <paper id="17">
       <title>Translation of words in context</title>
-      <author><first>Eric</first><last>Wehrli</last></author>
+      <author id="eric-wehrli"><first>Eric</first><last>Wehrli</last></author>
       <url hash="aaa5ee35">2003.mtsummit-systems.17</url>
       <abstract>TWiC is an on-line word and expression translation syste m which uses a powerful parser to (i) properly identify the relevant lexical units, (ii) retrieve the base form of the selected word and (iii) recognize the presence of a multiword expression (compound, idiom, collocation) the selected word may be part of. The conjunction of state-of-the-art natural language parsing, multiword expression identification and large bilingual databases provides a powerful and effective tool for people who want to read on-line material in a foreign language which they are not completely fluent in. A full prototype version of TWiC has been completed for the English-French pair of languages.</abstract>
       <bibkey>wehrli-2003-translation</bibkey>
@@ -737,7 +737,7 @@
     </meta>
     <paper id="1">
       <title><fixed-case>MT</fixed-case> customization</title>
-      <author><first>Rémi</first><last>Zajac</last></author>
+      <author id="remi-zajac"><first>Rémi</first><last>Zajac</last></author>
       <url hash="395dc87b">2003.mtsummit-tutorials.1</url>
       <bibkey>zajac-2003-mt</bibkey>
     </paper>
@@ -764,20 +764,20 @@
     </paper>
     <paper id="2">
       <title><fixed-case>SMT</fixed-case> – <fixed-case>TIDES</fixed-case> – and all that</title>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <attachment type="presentation" hash="45a3ba8b">2003.mtsummit-semit.2.Presentation.pdf</attachment>
       <bibkey>vogel-2003-smt</bibkey>
     </paper>
     <paper id="3">
       <title>The <fixed-case>CMU</fixed-case> <fixed-case>A</fixed-case>rabic-to-<fixed-case>E</fixed-case>nglish statistical <fixed-case>MT</fixed-case> system</title>
       <author><first>Alicia</first><last>Tribble</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <attachment type="presentation" hash="1642cbb5">2003.mtsummit-semit.3.Presentation.pdf</attachment>
       <bibkey>tribble-vogel-2003-cmu</bibkey>
     </paper>
     <paper id="4">
       <title>Issues in <fixed-case>A</fixed-case>rabic <fixed-case>MT</fixed-case></title>
-      <author><first>Alex</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alex</first><last>Fraser</last></author>
       <attachment type="presentation" hash="74c59622">2003.mtsummit-semit.4.Presentation.pdf</attachment>
       <bibkey>fraser-2003-issues</bibkey>
     </paper>
@@ -799,7 +799,7 @@
     </paper>
     <paper id="7">
       <title>Application of corpus-based techniques to <fixed-case>A</fixed-case>mharic texts</title>
-      <author><first>Sisay</first><last>Fissaha</last></author>
+      <author id="sisay-fissaha-adafre"><first>Sisay</first><last>Fissaha</last></author>
       <author><first>Johann</first><last>Haller</last></author>
       <url hash="febe4abd">2003.mtsummit-semit.7</url>
       <abstract>A number of corpus-based techniques have been used in the development of natural language processing application. One area in which these techniques have extensively been applied is lexical development. The current work is being undertaken in the context of a machine translation project in which lexical development activities constitute a significant portion of the overall task. In the first part, we applied corpus-based techniques to the extraction of collocations from Amharic text corpus. Analysis of the output reveals important collocations that can usefully be incorporated in the lexicon. This is especially true for the extraction of idiomatic expressions. The patterns of idiom formation which are observed in a small manually collected data enabled extraction of large set of idioms which otherwise may be difficult or impossible to recognize. Furthermore, preliminary results of other corpus-based techniques, that is, clustering and classification, that are currently being under investigation are presented. The results show that clustering performed no better than the frequency base line whereas classification showed a clear performance improvement over the frequency base line. This in turn suggests the need to carry out further experiments using large sets of data and more contextual information.</abstract>
@@ -807,8 +807,8 @@
     </paper>
     <paper id="8">
       <title>Towards semantic composition of <fixed-case>A</fixed-case>rabic: a λ-<fixed-case>DRT</fixed-case> based approach</title>
-      <author><first>Bassam</first><last>Haddad</last></author>
-      <author><first>Mustafa</first><last>Yaseen</last></author>
+      <author id="bassam-haddad"><first>Bassam</first><last>Haddad</last></author>
+      <author id="mustafa-yaseen"><first>Mustafa</first><last>Yaseen</last></author>
       <url hash="84632680">2003.mtsummit-semit.8</url>
       <abstract>This paper addresses issues related to employing logic-based semantic composition as a meaning representation for Arabic within a unification-based syntax-semantics interface. Since semantic representation has to be compositional on the level of semantic processing λ-calculus based on Discourse Representation Theory can be utilized as a helpful and practical technique for the semantic construction of ARABIC in Arabic understanding systems. As ARABIC computational linguistics is also short of feature-based compositional syntax-semantics interfaces we hope that this approach might be a further motivation to redirect research to modern semantic construction techniques for developing an adequate model of semantic processing for Arabic and even no existing formal theory is capable to provide a complete and consistent account of all phenomena involved in Arabic semantic processing.</abstract>
       <bibkey>haddad-yaseen-2003-towards</bibkey>
@@ -864,7 +864,7 @@
     </paper>
     <paper id="2">
       <title>A 45-hour computers in translation course</title>
-      <author><first>Mikel L.</first><last>Forcada</last></author>
+      <author id="mikel-l-forcada"><first>Mikel L.</first><last>Forcada</last></author>
       <url hash="d02532b7">2003.mtsummit-tttt.2</url>
       <abstract>This paper describes how a 45-hour Computers in Translation course is actually taught to 3rd-year translation students at the University of Alacant; the course described started in year 1995–1996 and has undergone substantial redesign until its present form. It is hoped that this description may be of use to instructors who are forced to teach a similar subject in such as small slot of time and need some design guidelines.</abstract>
       <bibkey>forcada-2003-45</bibkey>
@@ -879,8 +879,8 @@
     <paper id="4">
       <title>Teaching machine translation in a graduate language technologies program</title>
       <author><first>Teruko</first><last>Mitamura</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
-      <author><first>Robert</first><last>Frederking</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
+      <author id="robert-frederking"><first>Robert</first><last>Frederking</last></author>
       <url hash="e25ca3d1">2003.mtsummit-tttt.4</url>
       <abstract>This paper describes a graduate-level machine translation (MT) course taught at the Language Technologies Institute at Carnegie Mellon University. Most of the students in the course have a background in computer science. We discuss what we teach (the course syllabus), and how we teach it (lectures, homeworks, and projects). The course has evolved steadily over the past several years to incorporate refinements in the set of course topics, how they are taught, and how students “learn by doing”. The course syllabus has also evolved in response to changes in the field of MT and the role that MT plays in various social contexts.</abstract>
       <bibkey>mitamura-etal-2003-teaching</bibkey>
@@ -888,14 +888,14 @@
     <paper id="5">
       <title>Teaching the automation of the translation process to future translators</title>
       <author><first>Benoît</first><last>Robichaud</last></author>
-      <author><first>Marie-Claude</first><last>L’Homme</last></author>
+      <author id="marie-claude-lhomme"><first>Marie-Claude</first><last>L’Homme</last></author>
       <url hash="6417be40">2003.mtsummit-tttt.5</url>
       <abstract>This paper describes the approach used for introducing CAT tools and MT systems into a course offered in translation curricula at the Université de Montréal (Canada). It focuses on the automation of the translation process and presents various strategies that have been developed to help students progressively acquire the knowledge necessary to understand and undertake the tasks involved in the automation of translation. We begin with very basic principles and techniques, and move towards complex processes of advanced CAT and revision tools, including ultimately MT systems. As we will see, teaching concepts related to MT serves both as a wrap-up for the subjects dealt with during the semester and a way to highlight the tasks involved in the transfer phase of translation.</abstract>
       <bibkey>robichaud-lhomme-2003-teaching</bibkey>
     </paper>
     <paper id="6">
       <title><fixed-case>P</fixed-case>rolog models of classical approaches to <fixed-case>MT</fixed-case></title>
-      <author><first>Harold</first><last>Somers</last></author>
+      <author id="harold-somers"><first>Harold</first><last>Somers</last></author>
       <url hash="e68ca3fc">2003.mtsummit-tttt.6</url>
       <abstract>This paper describes a number of “toy” MT systems written in Prolog, designed as programming exercises and illustrations of various approaches to MT. The systems include a dumb word-for-word system, DCG-based “transfer” system, an interlingua-based system with an LFG-like interface structure, a first-generation-like Russian-English system, an interactive system, and an implementation based on early example-based MT.</abstract>
       <bibkey>somers-2003-prolog</bibkey>
@@ -903,7 +903,7 @@
     <paper id="7">
       <title>Specification and evaluation of machine translation toy systems - criteria for laboratory assignments</title>
       <author><first>Cristina</first><last>Vertan</last></author>
-      <author><first>Walther</first><last>von Hahn</last></author>
+      <author id="walther-von-hahn"><first>Walther</first><last>von Hahn</last></author>
       <url hash="96809bcf">2003.mtsummit-tttt.7</url>
       <abstract>Implementation of machine translation “toy” systems is a good practical exercise especially for computer science students. Our aim in a series of courses on MT in 2002 was to make students familiar both with typical problems of Machine Translation in particular and natural language processing in general, as well as with software implementation. In order to simulate a software implementation proc- ess as realistic as possible, we introduced more than 20 evaluation criteria to be filled by the students when they evaluated their own products. The criteria go far beyond such “toy” systems, but they should demonstrate the students, what a real software evaluation means, and which are the particularities of Machine Translation Evaluation.</abstract>
       <bibkey>vertan-hahn-2003-specification</bibkey>
@@ -927,8 +927,8 @@
     </meta>
     <paper id="1">
       <title>Evaluation techniques applied to domain tuning of <fixed-case>MT</fixed-case> lexicons</title>
-      <author><first>Necip Fazil</first><last>Ayan</last></author>
-      <author><first>Bonnie J.</first><last>Dorr</last></author>
+      <author id="necip-fazil-ayan"><first>Necip Fazil</first><last>Ayan</last></author>
+      <author id="bonnie-dorr"><first>Bonnie J.</first><last>Dorr</last></author>
       <author><first>Okan</first><last>Kolak</last></author>
       <url hash="17b2a9c5">2003.mtsummit-eval.1</url>
       <bibkey>ayan-etal-2003-evaluation</bibkey>
@@ -941,7 +941,7 @@
     </paper>
     <paper id="3">
       <title>Pragmatics-based translation and <fixed-case>MT</fixed-case> evaluation</title>
-      <author><first>David</first><last>Farwell</last></author>
+      <author id="david-farwell"><first>David</first><last>Farwell</last></author>
       <author><first>Stephen</first><last>Helmreich</last></author>
       <url hash="0220f302">2003.mtsummit-eval.3</url>
       <abstract>In this paper the authors wish to present a view of translation equivalence related to a pragmatics-based approach to machine translation. We will argue that current evaluation methods which assume that there is a predictable correspondence between language forms cannot adequately account for this view. We will then describe a method for objectively determining the relative equivalence of two texts. However, given the need for both an open world assumption and non-monotonic inferencing, such a method cannot be realistically implemented and therefore certain "classic" evaluation strategies will continue to be preferable as practical methods of evaluation.</abstract>
@@ -956,8 +956,8 @@
     </paper>
     <paper id="5">
       <title>Granularity in <fixed-case>MT</fixed-case> evaluation</title>
-      <author><first>Florence</first><last>Reeder</last></author>
-      <author><first>John</first><last>White</last></author>
+      <author id="florence-reeder"><first>Florence</first><last>Reeder</last></author>
+      <author id="john-s-white"><first>John</first><last>White</last></author>
       <url hash="1df7bb62">2003.mtsummit-eval.5</url>
       <abstract>This paper looks at granularity issues in machine translation evaluation. We start with work by (White, 2001) who examined the correlation between intelligibility and fidelity at the document level. His work showed that intelligibility and fidelity do not correlate well at the document level. These dissimilarities lead to our investigation of evaluation granularity. In particular, we revisit the intelligibility and fidelity relationship at the corpus level. We expect these to support certain assumptions in both evaluations as well as indicate issues germane to future evaluations.</abstract>
       <bibkey>reeder-white-2003-granularity</bibkey>
@@ -966,7 +966,7 @@
       <title>Task-based <fixed-case>MT</fixed-case> evaluation: tackling software, experimental design, &amp; statistical models.</title>
       <author><first>Calandra</first><last>Tate</last></author>
       <author><first>Sooyon</first><last>Lee</last></author>
-      <author><first>Clare R.</first><last>Voss</last></author>
+      <author id="clare-voss"><first>Clare R.</first><last>Voss</last></author>
       <url hash="ffbc7b04">2003.mtsummit-eval.6</url>
       <abstract>Even with recent, renewed attention to MT evaluation—due in part to n-gram-based metrics (Papineni et al., 2001; Doddington, 2002) and the extensive, online catalogue of MT metrics on the ISLE project (Hovy et al., 2001, 2003), few reports involving task-based metrics have surfaced. This paper presents our work on three parts of task-based MT evaluation: (i) software to track and record users' task performance via a browser, run from a desktop computer or remotely over the web, (ii) factorial experimental design with replicate observations to compare the MT engines, based on the accuracy of users' task responses, and (iii) the use of chi-squared and generalized linear models (GLMs) to permit finer-grained data analyses. We report on the experimental results of a six-way document categorization task, used for the evaluation of three Korean-English MT engines. The statistical models of the probabilities of correct responses yield an ordering of the MT engines, with one engine having a statistically significant lead over the other two. Future research will involve testing user performance on linguistically more complex tasks, as well as extending our initial GLMs with the documents' Bleu scores as variables, to test the scores as independent predictors of task results.</abstract>
       <bibkey>tate-etal-2003-task</bibkey>
diff --git a/data/xml/2003.tc.xml b/data/xml/2003.tc.xml
index 279bb3de75..ebcfc6a5d2 100644
--- a/data/xml/2003.tc.xml
+++ b/data/xml/2003.tc.xml
@@ -53,7 +53,7 @@
     </paper>
     <paper id="8">
       <title>Making a Business Case for Localisation</title>
-      <author><first>Reinhard</first><last>Schäler</last></author>
+      <author id="reinhard-schaler"><first>Reinhard</first><last>Schäler</last></author>
       <url hash="47eb3162">2003.tc-1.8</url>
       <bibkey>schaler-2003-making</bibkey>
     </paper>
diff --git a/data/xml/2004.amta.xml b/data/xml/2004.amta.xml
index 962dd06f58..d9670b5d82 100644
--- a/data/xml/2004.amta.xml
+++ b/data/xml/2004.amta.xml
@@ -53,8 +53,8 @@
     <paper id="2">
       <title>A speech-to-speech translation system for <fixed-case>C</fixed-case>atalan, <fixed-case>S</fixed-case>panish, and <fixed-case>E</fixed-case>nglish</title>
       <author><first>Victoria</first><last>Arranz</last></author>
-      <author><first>Elisabet</first><last>Comelles</last></author>
-      <author><first>David</first><last>Farwell</last></author>
+      <author id="elisabet-comelles"><first>Elisabet</first><last>Comelles</last></author>
+      <author id="david-farwell"><first>David</first><last>Farwell</last></author>
       <author><first>Climent</first><last>Nadeu</last></author>
       <author><first>Jaume</first><last>Padrell</last></author>
       <author><first>Albert</first><last>Febrer</last></author>
@@ -67,8 +67,8 @@
     </paper>
     <paper id="3">
       <title>Multi-Align: combining linguistic and statistical techniques to improve alignments for adaptable <fixed-case>MT</fixed-case></title>
-      <author><first>Necip Fazil</first><last>Ayan</last></author>
-      <author><first>Bonnie</first><last>Dorr</last></author>
+      <author id="necip-fazil-ayan"><first>Necip Fazil</first><last>Ayan</last></author>
+      <author id="bonnie-dorr"><first>Bonnie</first><last>Dorr</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
       <pages>17-26</pages>
       <url>https://link.springer.com/chapter/10.1007/978-3-540-30194-3_3</url>
@@ -77,7 +77,7 @@
     </paper>
     <paper id="4">
       <title>A modified Burrows-Wheeler transform for highly scalable example-based translation</title>
-      <author><first>Ralf D.</first><last>Brown</last></author>
+      <author id="ralf-d-brown"><first>Ralf D.</first><last>Brown</last></author>
       <pages>27-36</pages>
       <url>https://link.springer.com/chapter/10.1007/978-3-540-30194-3_4</url>
       <abstract>The Burrows-Wheeler Transform (BWT) was originally developed for data compression, but can also be applied to indexing text. In this paper, an adaptation of the BWT to word-based indexing of the training corpus for an example-based machine translation (EBMT) system is presented. The adapted BWT embeds the necessary information to retrieve matched training instances without requiring any additional space and can be instantiated in a compressed form which reduces disk space and memory requirements by about 40% while still remaining searchable without decompression. Both the speed advantage from O(log N) lookups compared to the O(N) lookups in the inverted-file index which had previously been used and the structure of the index itself act as enablers for additional capabilities and run-time speed. Because the BWT groups all instances of any n-gram together, it can be used to quickly enumerate the most-frequent n-grams, for which translations can be precomputed and stored, resulting in an order-of-magnitude speedup at run time.</abstract>
@@ -95,7 +95,7 @@
     </paper>
     <paper id="6">
       <title>Normalizing <fixed-case>G</fixed-case>erman and <fixed-case>E</fixed-case>nglish inflectional morphology to improve statistical word alignment</title>
-      <author><first>Simon</first><last>Corston-Oliver</last></author>
+      <author id="simon-corston-oliver"><first>Simon</first><last>Corston-Oliver</last></author>
       <author><first>Michael</first><last>Gamon</last></author>
       <pages>48-57</pages>
       <url>https://link.springer.com/chapter/10.1007/978-3-540-30194-3_6</url>
@@ -114,8 +114,8 @@
     <paper id="8">
       <title>A fluency error categorization scheme to guide automated machine translation evaluation</title>
       <author><first>Debbie</first><last>Elliott</last></author>
-      <author><first>Anthony</first><last>Hartley</last></author>
-      <author><first>Eric</first><last>Atwell</last></author>
+      <author id="anthony-hartley"><first>Anthony</first><last>Hartley</last></author>
+      <author id="eric-atwell"><first>Eric</first><last>Atwell</last></author>
       <pages>64-73</pages>
       <url>https://link.springer.com/chapter/10.1007/978-3-540-30194-3_8</url>
       <abstract>Existing automated MT evaluation methods often require expert human translations. These are produced for every language pair evaluated and, due to this expense, subsequent evaluations tend to rely on the same texts, which do not necessarily reflect real MT use. In contrast, we are designing an automated MT evaluation system, intended for use by post-editors, purchasers and developers, that requires nothing but the raw MT output. Furthermore, our research is based on texts that reflect corporate use of MT. This paper describes our first step in system design: a hierarchical classification scheme of fluency errors in English MT output, to enable us to identify error types and frequencies, and guide the selection of errors for automated detection. We present results from the statistical analysis of 20,000 words of MT output, manually annotated using our classification scheme, and describe correlations between error frequencies and human scores for fluency and adequacy.</abstract>
@@ -132,7 +132,7 @@
     <paper id="10">
       <title>Counting, measuring, ordering: translation problems and solutions</title>
       <author><first>Stephen</first><last>Helmreich</last></author>
-      <author><first>David</first><last>Farwell</last></author>
+      <author id="david-farwell"><first>David</first><last>Farwell</last></author>
       <pages>86-93</pages>
       <url>https://link.springer.com/chapter/10.1007/978-3-540-30194-3_10</url>
       <abstract>This paper describes some difficulties associated with the translation of numbers (scalars) used for counting, measuring, or selecting items or properties. A set of problematic issues is described, and the presence of these difficulties is quantified by examining a set of texts and translations. An approach to a solution is suggested.</abstract>
@@ -150,7 +150,7 @@
     </paper>
     <paper id="12">
       <title>The <fixed-case>G</fixed-case>eorgetown-<fixed-case>IBM</fixed-case> experiment demonstrated in <fixed-case>J</fixed-case>anuary 1954</title>
-      <author><first>W. John</first><last>Hutchins</last></author>
+      <author id="w-john-hutchins"><first>W. John</first><last>Hutchins</last></author>
       <pages>102-114</pages>
       <url>https://link.springer.com/chapter/10.1007/978-3-540-30194-3_12</url>
       <abstract>The public demonstration of a Russian-English machine translation system in New York in January 1954 – a collaboration of IBM and Georgetown University – caused a great deal of public interest and much controversy. Although a small-scale experiment of just 250 words and six ‘grammar’ rules it raised expectations of automatic systems capable of high quality translation in the near future. This paper describes the system, its background, its impact and its implications.</abstract>
@@ -185,7 +185,7 @@
     </paper>
     <paper id="16">
       <title>The significance of recall in automatic metrics for <fixed-case>MT</fixed-case> evaluation</title>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <author><first>Kenji</first><last>Sagae</last></author>
       <author><first>Shyamsundar</first><last>Jayaraman</last></author>
       <pages>134-143</pages>
@@ -195,8 +195,8 @@
     </paper>
     <paper id="17">
       <title>Alignment of bilingual named entities in parallel corpora using statistical model</title>
-      <author><first>Chun-Jen</first><last>Lee</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="chun-jen-lee"><first>Chun-Jen</first><last>Lee</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <author><first>Thomas C.</first><last>Chuang</last></author>
       <pages>144-153</pages>
       <url>https://link.springer.com/chapter/10.1007/978-3-540-30194-3_17</url>
@@ -206,7 +206,7 @@
     <paper id="18">
       <title>Weather report translation using a translation memory</title>
       <author><first>Thomas</first><last>Leplus</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <author><first>Guy</first><last>Lapalme</last></author>
       <pages>154-163</pages>
       <url>https://link.springer.com/chapter/10.1007/978-3-540-30194-3_18</url>
@@ -225,8 +225,8 @@
     <paper id="20">
       <title>Extraction of name and transliteration in monolingual and parallel corpora</title>
       <author><first>Tracy</first><last>Lin</last></author>
-      <author><first>Jian-Cheng</first><last>Wu</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jian-cheng-wu"><first>Jian-Cheng</first><last>Wu</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>177-186</pages>
       <url>https://link.springer.com/chapter/10.1007/978-3-540-30194-3_20</url>
       <abstract>Named-entities in free text represent a challenge to text analysis in Machine Translation and Cross Language Information Retrieval. These phrases are often transliterated into another language with a different sound inventory and writing system. Named-entities found in free text are often not listed in bilingual dictionaries. Although it is possible to identify and translate named-entities on the fly without a list of proper names and transliterations, an extensive list of existing transliterations certainly will ensure high precision rate. We use a seed list of proper names and transliterations to train a Machine Transliteration Model. With the model it is possible to extract proper names and their transliterations in monolingual or parallel corpora with high precision and recall rates.</abstract>
@@ -234,9 +234,9 @@
     </paper>
     <paper id="21">
       <title>Error analysis of two types of grammar for the purpose of automatic rule refinement</title>
-      <author><first>Ariadna</first><last>Font Llitjós</last></author>
+      <author id="ariadna-font-llitjos"><first>Ariadna</first><last>Font Llitjós</last></author>
       <author><first>Katharina</first><last>Probst</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
       <pages>187-196</pages>
       <url>https://link.springer.com/chapter/10.1007/978-3-540-30194-3_21</url>
       <abstract>This paper compares a manually written MT grammar and a grammar learned automatically from an English-Spanish elicitation corpus with the ultimate purpose of automatically refining the translation rules. The experiment described here shows that the kind of automatic refinement operations required to correct a translation not only varies depending on the type of error, but also on the type of grammar. This paper describes the two types of grammars and gives a detailed error analysis of their output, indicating what kinds of refinements are required in each case.</abstract>
@@ -265,7 +265,7 @@
     <paper id="24">
       <title>A structurally diverse minimal corpus for eliciting structural mappings between languages</title>
       <author><first>Katharina</first><last>Probst</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <pages>217-226</pages>
       <url>https://link.springer.com/chapter/10.1007/978-3-540-30194-3_24</url>
       <abstract>We describe an approach to creating a small but diverse corpus in English that can be used to elicit information about any target language. The focus of the corpus is on structural information. The resulting bilingual corpus can then be used for natural language processing tasks such as inferring transfer mappings for Machine Translation. The corpus is sufficiently small that a bilingual user can translate and word-align it within a matter of hours. We describe how the corpus is created and how its structural diversity is ensured. We then argue that it is not necessary to introduce a large amount of redundancy into the corpus. This is shown by creating an increasingly redundant corpus and observing that the information gained converges as redundancy increases.</abstract>
@@ -273,7 +273,7 @@
     </paper>
     <paper id="25">
       <title>Investigation of intelligibility judgments</title>
-      <author><first>Florence</first><last>Reeder</last></author>
+      <author id="florence-reeder"><first>Florence</first><last>Reeder</last></author>
       <pages>227-235</pages>
       <url>https://link.springer.com/chapter/10.1007/978-3-540-30194-3_25</url>
       <abstract>This paper describes an intelligibility snap-judgment test. In this exercise, participants are shown a series of human translations and machine translations and are asked to determine whether the author was human or machine. The experiment shows that snap judgments on intelligibility are made successfully and that system rankings on snap judgments are consistent with more detailed intelligibility measures. In addition to demonstrating a quick intelligibility judgment, representing on a few minutes time of each participant, it details the types of errors which led to the snap judgments.</abstract>
@@ -281,16 +281,16 @@
     </paper>
     <paper id="26">
       <title>Interlingual annotation for <fixed-case>MT</fixed-case> development</title>
-      <author><first>Florence</first><last>Reeder</last></author>
-      <author><first>Bonnie</first><last>Dorr</last></author>
-      <author><first>David</first><last>Farwell</last></author>
+      <author id="florence-reeder"><first>Florence</first><last>Reeder</last></author>
+      <author id="bonnie-dorr"><first>Bonnie</first><last>Dorr</last></author>
+      <author id="david-farwell"><first>David</first><last>Farwell</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
       <author><first>Stephen</first><last>Helmreich</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
       <author><first>Teruko</first><last>Mitamura</last></author>
-      <author><first>Keith</first><last>Miller</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="keith-j-miller"><first>Keith</first><last>Miller</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <author><first>Advaith</first><last>Siddharthan</last></author>
       <pages>236-245</pages>
       <url>https://link.springer.com/chapter/10.1007/978-3-540-30194-3_26</url>
@@ -324,7 +324,7 @@
     </paper>
     <paper id="30">
       <title>A super-function based <fixed-case>J</fixed-case>apanese-<fixed-case>C</fixed-case>hinese machine translation system for business users</title>
-      <author><first>Xin</first><last>Zhao</last></author>
+      <author id="wayne-xin-zhao"><first>Xin</first><last>Zhao</last></author>
       <author><first>Fuji</first><last>Ren</last></author>
       <author><first>Stefan</first><last>Voß</last></author>
       <pages>272-281</pages>
diff --git a/data/xml/2004.eamt.xml b/data/xml/2004.eamt.xml
index 65362d07c2..46ff715e7a 100644
--- a/data/xml/2004.eamt.xml
+++ b/data/xml/2004.eamt.xml
@@ -27,7 +27,7 @@
     <paper id="3">
       <title>Disambiguating translation strategies in <fixed-case>MT</fixed-case> using automatic named entity recognition</title>
       <author><first>Bogdan</first><last>Babych</last></author>
-      <author><first>Anthony</first><last>Hartley</last></author>
+      <author id="anthony-hartley"><first>Anthony</first><last>Hartley</last></author>
       <url hash="3417ed8c">2004.eamt-1.3</url>
       <bibkey>babych-hartley-2004-disambiguating</bibkey>
     </paper>
@@ -42,8 +42,8 @@
     <paper id="5">
       <title>Challenges in using an example-based <fixed-case>MT</fixed-case> system for a transnational digital government project</title>
       <author><first>Violetta</first><last>Cavalli-Sforza</last></author>
-      <author><first>Ralf D.</first><last>Brown</last></author>
-      <author><first>Jaime G.</first><last>Carbonell</last></author>
+      <author id="ralf-d-brown"><first>Ralf D.</first><last>Brown</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime G.</first><last>Carbonell</last></author>
       <author><first>Peter G.</first><last>Jansen</last></author>
       <author><first>Jae Dong</first><last>Kim</last></author>
       <url hash="2a202e81">2004.eamt-1.5</url>
@@ -51,7 +51,7 @@
     </paper>
     <paper id="6">
       <title>Formal analysis of some aspects of <fixed-case>A</fixed-case>mharic noun phrases</title>
-      <author><first>Sisay Fissaha</first><last>Adafre</last></author>
+      <author id="sisay-fissaha-adafre"><first>Sisay Fissaha</first><last>Adafre</last></author>
       <url hash="64aecc5b">2004.eamt-1.6</url>
       <bibkey>adafre-2004-formal</bibkey>
     </paper>
@@ -79,14 +79,14 @@
       <title>Translation memory as a robust example-based translation system</title>
       <author><first>Gábor</first><last>Hodász</last></author>
       <author><first>Tamás</first><last>Grőbler</last></author>
-      <author><first>Balázs</first><last>Kis</last></author>
+      <author id="balazs-kis"><first>Balázs</first><last>Kis</last></author>
       <url hash="acb5620b">2004.eamt-1.10</url>
       <bibkey>hodasz-etal-2004-translation</bibkey>
     </paper>
     <paper id="11">
       <title>A translation model for languages of accessing countries</title>
       <author><first>Petr</first><last>Homola</last></author>
-      <author><first>Vladislav</first><last>Kubon</last></author>
+      <author id="vladislav-kubon"><first>Vladislav</first><last>Kubon</last></author>
       <url hash="15fb3ebc">2004.eamt-1.11</url>
       <bibkey>homola-kubon-2004-translation</bibkey>
     </paper>
@@ -100,25 +100,25 @@
       <title>Towards an automated evaluation of an embedded <fixed-case>MT</fixed-case> system</title>
       <author><first>J.</first><last>Laoudi</last></author>
       <author><first>C.</first><last>Tate</last></author>
-      <author><first>Clare R.</first><last>Voss</last></author>
+      <author id="clare-voss"><first>Clare R.</first><last>Voss</last></author>
       <url hash="1bf42df9">2004.eamt-1.13</url>
       <bibkey>laoudi-etal-2004-towards</bibkey>
     </paper>
     <paper id="14">
       <title>A trainable transfer-based <fixed-case>MT</fixed-case> approach for languages with limited resources</title>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <author><first>Katharina</first><last>Probst</last></author>
       <author><first>Erik</first><last>Peterson</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
-      <author><first>Ariadna</first><last>Font-Llitjos</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
+      <author id="ariadna-font-llitjos"><first>Ariadna</first><last>Font-Llitjos</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
       <url hash="56915139">2004.eamt-1.14</url>
       <bibkey>lavie-etal-2004-trainable</bibkey>
     </paper>
     <paper id="15">
       <title>The <fixed-case>NEMLAR</fixed-case> project on <fixed-case>A</fixed-case>rabic language resources</title>
-      <author><first>Bente</first><last>Maegaard</last></author>
+      <author id="bente-maegaard"><first>Bente</first><last>Maegaard</last></author>
       <url hash="eccba315">2004.eamt-1.15</url>
       <bibkey>maegaard-2004-nemlar</bibkey>
     </paper>
@@ -131,8 +131,8 @@
     </paper>
     <paper id="17">
       <title>Moose: a robust high-performance parser and generator</title>
-      <author><first>Gábor</first><last>Prószéky</last></author>
-      <author><first>László</first><last>Tihanyi</last></author>
+      <author id="gabor-proszeky"><first>Gábor</first><last>Prószéky</last></author>
+      <author id="laszlo-tihanyi"><first>László</first><last>Tihanyi</last></author>
       <author><first>Gábor</first><last>Ugray</last></author>
       <url hash="e022814e">2004.eamt-1.17</url>
       <bibkey>proszeky-etal-2004-moose</bibkey>
diff --git a/data/xml/2004.iwslt.xml b/data/xml/2004.iwslt.xml
index 3168437374..56eba1a6eb 100644
--- a/data/xml/2004.iwslt.xml
+++ b/data/xml/2004.iwslt.xml
@@ -14,29 +14,29 @@
       <author><first>Marcello</first><last>Federico</last></author>
       <author><first>Noriko</first><last>Kando</last></author>
       <author><first>Hiromi</first><last>Nakaiwa</last></author>
-      <author><first>Michael</first><last>Paul</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="michael-paul"><first>Michael</first><last>Paul</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <url hash="55796369">2004.iwslt-evaluation.1</url>
       <bibkey>akiba-etal-2004-overview</bibkey>
     </paper>
     <paper id="2">
       <title><fixed-case>EBMT</fixed-case>, <fixed-case>SMT</fixed-case>, hybrid and more: <fixed-case>ATR</fixed-case> spoken language translation system</title>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Yasuhiro</first><last>Akiba</last></author>
       <author><first>Takao</first><last>Doi</last></author>
       <author><first>Andrew</first><last>Finch</last></author>
       <author><first>Kenji</first><last>Imamura</last></author>
       <author><first>Hideo</first><last>Okuma</last></author>
-      <author><first>Michael</first><last>Paul</last></author>
-      <author><first>Mitsuo</first><last>Shimohata</last></author>
+      <author id="michael-paul"><first>Michael</first><last>Paul</last></author>
+      <author id="mitsuo-shimohata"><first>Mitsuo</first><last>Shimohata</last></author>
       <author><first>Taro</first><last>Watanabe</last></author>
       <url hash="9b08120a">2004.iwslt-evaluation.2</url>
       <bibkey>sumita-etal-2004-ebmt</bibkey>
     </paper>
     <paper id="3">
       <title>Towards fairer evaluations of commercial <fixed-case>MT</fixed-case> systems on basic travel expressions corpora</title>
-      <author><first>Herve</first><last>Blanchon</last></author>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="herve-blanchon"><first>Herve</first><last>Blanchon</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <author><first>Francis</first><last>Brunet-Manquat</last></author>
       <author><first>Mutsuko</first><last>Tomokiyo</last></author>
       <author><first>Agnes</first><last>Hamon</last></author>
@@ -57,7 +57,7 @@
     </paper>
     <paper id="5">
       <title>Experimenting with phrase-based statistical translation within the <fixed-case>IWSLT</fixed-case> <fixed-case>C</fixed-case>hinese-to-<fixed-case>E</fixed-case>nglish shared translation task</title>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <author><first>Michael</first><last>Carl</last></author>
       <author><first>Oliver</first><last>Streiter</last></author>
       <url hash="bb87648f">2004.iwslt-evaluation.5</url>
@@ -66,7 +66,7 @@
     <paper id="6">
       <title><fixed-case>IBM</fixed-case> spoken language translation system evaluation</title>
       <author><first>Young-Suk</first><last>Lee</last></author>
-      <author><first>Salim</first><last>Roukos</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
       <url hash="d5022e00">2004.iwslt-evaluation.6</url>
       <bibkey>lee-roukos-2004-ibm</bibkey>
     </paper>
@@ -96,8 +96,8 @@
       <author><first>Emil</first><last>Ettelaie</last></author>
       <author><first>Kevin</first><last>Knight</last></author>
       <author><first>Daniel</first><last>Marcu</last></author>
-      <author><first>Dragos Stefan</first><last>Munteanu</last></author>
-      <author><first>Franz J.</first><last>Och</last></author>
+      <author id="dragos-stefan-munteanu"><first>Dragos Stefan</first><last>Munteanu</last></author>
+      <author id="franz-josef-och"><first>Franz J.</first><last>Och</last></author>
       <author><first>Ignacio</first><last>Thayer</last></author>
       <author><first>Quamrul</first><last>Tipu</last></author>
       <url hash="892fce43">2004.iwslt-evaluation.9</url>
@@ -106,16 +106,16 @@
     <paper id="10">
       <title>The <fixed-case>ISL</fixed-case> <fixed-case>EDTRL</fixed-case> system</title>
       <author><first>Juergen</first><last>Reichert</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <url hash="f2034fa4">2004.iwslt-evaluation.10</url>
       <bibkey>reichert-waibel-2004-isl</bibkey>
     </paper>
     <paper id="11">
       <title>The <fixed-case>ISL</fixed-case> statistical translation system for spoken language translation</title>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <author><first>Sanjika</first><last>Hewavitharana</last></author>
       <author><first>Muntsin</first><last>Kolss</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <url hash="b0b2f349">2004.iwslt-evaluation.11</url>
       <bibkey>vogel-etal-2004-isl</bibkey>
     </paper>
@@ -123,7 +123,7 @@
       <title>Multi-engine based <fixed-case>C</fixed-case>hinese-to-<fixed-case>E</fixed-case>nglish translation system</title>
       <author><first>Yuncun</first><last>Zuo</last></author>
       <author><first>Yu</first><last>Zhou</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <url hash="9a929f09">2004.iwslt-evaluation.12</url>
       <bibkey>zuo-etal-2004-multi</bibkey>
     </paper>
@@ -132,7 +132,7 @@
       <author><first>Oliver</first><last>Bender</last></author>
       <author><first>Richard</first><last>Zens</last></author>
       <author><first>Evgeny</first><last>Matusov</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <url hash="6ff08450">2004.iwslt-evaluation.13</url>
       <bibkey>bender-etal-2004-alignment</bibkey>
     </paper>
@@ -161,9 +161,9 @@
     </meta>
     <paper id="1">
       <title>Spoken dialogue translation systems evaluation: results, new trends, problems and proposals</title>
-      <author><first>Herve</first><last>Blanchon</last></author>
-      <author><first>Christian</first><last>Boitet</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="herve-blanchon"><first>Herve</first><last>Blanchon</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <url hash="e3d773df">2004.iwslt-papers.1</url>
       <bibkey>blanchon-etal-2004-spoken</bibkey>
     </paper>
@@ -178,38 +178,38 @@
       <title>Phrase-based alignment combining corpus cooccurrences and linguistic knowledge</title>
       <author><first>Adria</first><last>de Gispert</last></author>
       <author><first>Jose B.</first><last>Marino</last></author>
-      <author><first>Josep M.</first><last>Crego</last></author>
+      <author id="josep-m-crego"><first>Josep M.</first><last>Crego</last></author>
       <url hash="5aeb2bcb">2004.iwslt-papers.3</url>
       <bibkey>de-gispert-etal-2004-phrase</bibkey>
     </paper>
     <paper id="4">
       <title>On feature selection in maximum entropy approach to statistical concept-based speech-to-speech translation</title>
       <author><first>Liang</first><last>Gu</last></author>
-      <author><first>Yuqing</first><last>Gao</last></author>
+      <author id="yuqing-guo"><first>Yuqing</first><last>Gao</last></author>
       <url hash="c2958bfb">2004.iwslt-papers.4</url>
       <bibkey>gu-gao-2004-feature</bibkey>
     </paper>
     <paper id="5">
       <title><fixed-case>P</fixed-case>olyphra<fixed-case>Z</fixed-case>: a tool for the quantitative and subjective evaluation of parallel corpora</title>
       <author><first>Najeh</first><last>Hajlaoui</last></author>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <url hash="b0b0853f">2004.iwslt-papers.5</url>
       <bibkey>hajlaoui-boitet-2004-polyphraz-tool</bibkey>
     </paper>
     <paper id="6">
       <title>Toward named entity extraction and translation in spoken language translation</title>
       <author><first>Fei</first><last>Huang</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <url hash="86c5713b">2004.iwslt-papers.6</url>
       <bibkey>huang-etal-2004-toward</bibkey>
     </paper>
     <paper id="7">
       <title>Statistical machine translation of spontaneous speech with scarce resources</title>
       <author><first>Evgeny</first><last>Matusov</last></author>
-      <author><first>Maja</first><last>Popovic</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popovic</last></author>
       <author><first>Richard</first><last>Zens</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <url hash="b05a052c">2004.iwslt-papers.7</url>
       <bibkey>matusov-etal-2004-statistical</bibkey>
     </paper>
@@ -219,7 +219,7 @@
       <author><first>Konstantin</first><last>Markov</last></author>
       <author><first>Takatoshi</first><last>Jitsuhiro</last></author>
       <author><first>Jin-Song</first><last>Zhang</last></author>
-      <author><first>Hirofumi</first><last>Yamamoto</last></author>
+      <author id="hirofumi-yamamoto"><first>Hirofumi</first><last>Yamamoto</last></author>
       <author><first>Genichiro</first><last>Kikui</last></author>
       <url hash="12bbffa2">2004.iwslt-papers.8</url>
       <bibkey>nakamura-etal-2004-multi</bibkey>
diff --git a/data/xml/2004.jeptalnrecital.xml b/data/xml/2004.jeptalnrecital.xml
index af9cbadabd..1e3d8feeb5 100644
--- a/data/xml/2004.jeptalnrecital.xml
+++ b/data/xml/2004.jeptalnrecital.xml
@@ -20,7 +20,7 @@
     <paper id="1">
       <title>Evaluation de méthodes de segmentation thématique linéaire non supervisées après adaptation au français</title>
       <author><first>Laurianne</first><last>Sitbon</last></author>
-      <author><first>Patrice</first><last>Bellot</last></author>
+      <author id="patrice-bellot"><first>Patrice</first><last>Bellot</last></author>
       <pages>1–10</pages>
       <abstract>Nous proposons une évaluation de différentes méthodes et outils de segmentation thématique de textes. Nous présentons les outils de segmentation linéaire et non supervisée DotPlotting, Segmenter, C99, TextTiling, ainsi qu’une manière de les adapter et de les tester sur des documents français. Les résultats des tests montrent des différences en performance notables selon les sujets abordés dans les documents, et selon que le nombre de segments à trouver est fixé au préalable par l’utilisateur. Ces travaux font partie du projet Technolangue AGILE-OURAL.</abstract>
       <url hash="32945c5b">2004.jeptalnrecital-long.1</url>
@@ -117,8 +117,8 @@
     </paper>
     <paper id="11">
       <title>Deux premières étapes vers les documents auto-explicatifs</title>
-      <author><first>Hervé</first><last>Blanchon</last></author>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="herve-blanchon"><first>Hervé</first><last>Blanchon</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <pages>100–109</pages>
       <abstract>Dans le cadre du projet LIDIA, nous avons montré que dans de nombreuses situations, la TA Fondée sur le Dialogue (TAFD) pour auteur monolingue peut offrir une meilleure solution en traduction multicible que les aides aux traducteurs, ou la traduction avec révision, même si des langages contrôlés sont utilisés. Nos premières expériences ont mis en évidence le besoin de conserver les « intentions de l’auteur » au moyen « d’annotations de désambiguïsation ». Ces annotations permettent de transformer le document source en un Document Auto-Explicatif (DAE). Nous présentons ici une solution pour intégrer ces annotations dans un document XML et les rendre visibles et utilisables par un lecteur pour une meilleure compréhension du « vrai contenu » du document. Le concept de Document Auto-Explicatif pourrait changer profondément notre façon de comprendre des documents importants ou écrits dans un style complexe. Nous montrerons aussi qu’un DAE, traduit dans une langue cible L, pourrait aussi être transformé, sans interaction humaine, en un DAE en langue L si un analyseur et un désambiguïseur sont disponibles pour cette langue L. Ainsi, un DAE pourrait être utilisé dans un contexte monolingue, mais aussi dans un contexte multilingue sans travail humain additionnel.</abstract>
       <url hash="d2e2e600">2004.jeptalnrecital-long.11</url>
@@ -138,7 +138,7 @@
       <title>Extraction de terminologies bilingues à partir de corpus comparables</title>
       <author><first>Emmanuel</first><last>Morin</last></author>
       <author><first>Samuel</first><last>Dufour-Kowalski</last></author>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <pages>120–129</pages>
       <abstract>Cet article présente une méthode pour extraire, à partir de corpus comparables d’un domaine de spécialité, un lexique bilingue comportant des termes simples et complexes. Cette méthode extrait d’abord les termes complexes dans chaque langue, puis les aligne à l’aide de méthodes statistiques exploitant le contexte des termes. Après avoir rappelé les difficultés que pose l’alignement des termes complexes et précisé notre approche, nous présentons le processus d’extraction de terminologies bilingues adopté et les ressources utilisées pour nos expérimentations. Enfin, nous évaluons notre approche et démontrons son intérêt en particulier pour l’alignement de termes complexes non compositionnels.</abstract>
       <url hash="38e5d4b9">2004.jeptalnrecital-long.13</url>
@@ -147,7 +147,7 @@
     </paper>
     <paper id="14">
       <title>Traduction, traduction de mots, traduction de phrases</title>
-      <author><first>Éric</first><last>Wehrli</last></author>
+      <author id="eric-wehrli"><first>Éric</first><last>Wehrli</last></author>
       <pages>130–138</pages>
       <abstract>Une des conséquences du développement d’Internet et de la globalisation des échanges est le nombre considérable d’individus amenés à consulter des documents en ligne dans une langue autre que la leur. Après avoir montré que ni la traduction automatique, ni les aides terminologiques en ligne ne constituent une réponse pleinement adéquate à ce nouveau besoin, cet article présente un système d’aide à la lecture en langue étrangère basé sur un analyseur syntaxique puissant. Pour un mot sélectionné par l’usager, ce système analyse la phrase entière, de manière (i) à choisir la lecture du mot sélectionné la mieux adaptée au contexte morphosyntaxique et (ii) à identifier une éventuelle expression idiomatique ou une collocation dont le mot serait un élément. Une démonstration de ce système, baptisé TWiC (Translation of words in context “Traduction de mots en contexte”), pourra être présentée.</abstract>
       <url hash="c47fcd19">2004.jeptalnrecital-long.14</url>
@@ -167,7 +167,7 @@
     <paper id="16">
       <title>Repérage et exploitation d’énoncés définitoires en corpus pour l’aide à la construction d’ontologie</title>
       <author><first>Véronique</first><last>Malaisé</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <author><first>Bruno</first><last>Bachimont</last></author>
       <pages>149–158</pages>
       <abstract>Pour construire une ontologie, un modéliseur a besoin d’objecter des informations sémantiques sur les termes principaux de son domaine d’étude. Les outils d’exploration de corpus peuvent aider à repérer ces types d’information, et l’identification de couples d’hyperonymes a fait l’objet de plusieurs travaux. Nous proposons d’exploiter des énoncés définitoires pour extraire d’un corpus des informations concernant les trois axes de l’ossature ontologique : l’axe vertical, lié à l’hyperonymie, l’axe horizontal, lié à la co-hyponymie et l’axe transversal, lié aux relations du domaine. Après un rappel des travaux existants en repérage d’énoncés définitoires en TAL, nous développons la méthode que nous avons mise en place, puis nous présentons son évaluation et les premiers résultats obtenus. Leur repérage atteint de 10% à 69% de précision suivant les patrons, celui des unités lexicales varie de 31% à 56%, suivant le référentiel adopté.</abstract>
@@ -209,7 +209,7 @@
     <paper id="20">
       <title>Désambiguïsation de corpus monolingues par des approches de type <fixed-case>L</fixed-case>esk</title>
       <author><first>Florentina</first><last>Vasilescu</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <pages>189–198</pages>
       <abstract>Cet article présente une analyse détaillée des facteurs qui déterminent les performances des approches de désambiguïsation dérivées de la méthode de Lesk (1986). Notre étude porte sur une série d’expériences concernant la méthode originelle de Lesk et des variantes que nous avons adaptées aux caractéristiques de WORDNET. Les variantes implémentées ont été évaluées sur le corpus de test de SENSEVAL2, English All Words, ainsi que sur des extraits du corpus SEMCOR. Notre évaluation se base d’un côté, sur le calcul de la précision et du rappel, selon le modèle de SENSEVAL, et d’un autre côté, sur une taxonomie des réponses qui permet de mesurer la prise de risque d’un décideur par rapport à un système de référence.</abstract>
       <url hash="344cc098">2004.jeptalnrecital-long.20</url>
@@ -256,7 +256,7 @@
     <paper id="25">
       <title>Une mesure de pertinence pour le tri de l’information dans un index de “fin de livre”</title>
       <author><first>Touria</first><last>Ait El Mekki</last></author>
-      <author><first>Adeline</first><last>Nazarenko</last></author>
+      <author id="adeline-nazarenko"><first>Adeline</first><last>Nazarenko</last></author>
       <pages>239–248</pages>
       <abstract>Nous nous intéressons à la construction des index de fin de livres. Nous avons développé le système IndDoc qui aide la construction de tels index. L’un des enjeux de la construction d’index est la sélection des informations : sélection des entrées les plus pertinentes et des renvois au texte les plus intéressants. Cette sélection est évidemment utile pour le lecteur qui doit trouver suffisamment d’information mais sans en être submergé. Elle est également précieuse pour l’auteur de l’index qui doit valider et corriger une ébauche d’index produite automatiquement par IndDoc. Nous montrons comment cette sélection de l’information est réalisée par IndDoc. Nous proposons une mesure qui permet de trier les entrées par ordre de pertinence décroissante et une méthode pour calculer les renvois au texte à associer à chaque entrée de l’index.</abstract>
       <url hash="fe5aedd0">2004.jeptalnrecital-long.25</url>
@@ -277,7 +277,7 @@
     <paper id="27">
       <title>Fiabilité de la référence humaine dans la détection de thème</title>
       <author><first>Armelle</first><last>Brun</last></author>
-      <author><first>Kamel</first><last>Smaïli</last></author>
+      <author id="kamel-smaili"><first>Kamel</first><last>Smaïli</last></author>
       <pages>259–268</pages>
       <abstract>Dans cet article, nous nous intéressons à la tâche de détection de thème dans le cadre de la reconnaissance automatique de la parole. La combinaison de plusieurs méthodes de détection montre ses limites, avec des performances de 93.1 %. Ces performances nous mènent à remetttre en cause le thème de référence des paragraphes de notre corpus. Nous avons ainsi effectué une étude sur la fiabilité de ces références, en utilisant notamment les mesures Kappa et erreur de Bayes. Nous avons ainsi pu montrer que les étiquettes thématiques des paragraphes du corpus de test comportaient vraisemblablement des erreurs, les performances de détection de thème obtenues doivent donc êtres exploitées prudemment.</abstract>
       <url hash="95534ccb">2004.jeptalnrecital-long.27</url>
@@ -315,8 +315,8 @@
     <paper id="31">
       <title>La <fixed-case>FREEBANK</fixed-case> : vers une base libre de corpus annotés</title>
       <author><first>Susanne</first><last>Salmon-Alt</last></author>
-      <author><first>Eckhard</first><last>Bick</last></author>
-      <author><first>Laurent</first><last>Romary</last></author>
+      <author id="eckhard-bick"><first>Eckhard</first><last>Bick</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
       <author><first>Jean-Marie</first><last>Pierrel</last></author>
       <pages>299–308</pages>
       <abstract>Les corpus français librement accessibles annotés à d’autres niveaux linguistiques que morpho-syntaxique sont insuffisants à la fois quantitativement et qualitativement. Partant de ce constat, la FREEBANK – construite sur la base d’outils d’analyse automatique dont la sortie est révisée manuellement – se veut une base de corpus du français annotés à plusieurs niveaux (structurel, morphologique, syntaxique, coréférentiel) et à différents degrés de finesse linguistique qui soit libre d’accès, codée selon des schémas normalisés, intégrant des ressources existantes et ouverte à l’enrichissement progressif.</abstract>
@@ -328,11 +328,11 @@
       <title>Annoter en constituants pour évaluer des analyseurs syntaxiques</title>
       <author><first>Anne</first><last>Vilnat</last></author>
       <author><first>Laura</first><last>Monceaux</last></author>
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <author><first>Isabelle</first><last>Robba</last></author>
       <author><first>Véronique</first><last>Gendner</last></author>
       <author><first>Gabriel</first><last>Illouz</last></author>
-      <author><first>Michèle</first><last>Jardino</last></author>
+      <author id="michele-jardino"><first>Michèle</first><last>Jardino</last></author>
       <pages>309–318</pages>
       <abstract>Cet article présente l’annotation en constituants menée dans le cadre d’un protocole d’évaluation des analyseurs syntaxiques (mis au point dans le pré-projet PEAS, puis dans le projet EASY). Le choix des constituants est décrit en détail et une première évaluation effectuée à partir des résultats de deux analyseurs est donnée.</abstract>
       <url hash="1a1943c6">2004.jeptalnrecital-long.32</url>
@@ -341,7 +341,7 @@
     </paper>
     <paper id="33">
       <title>Détermination de contenu dans <fixed-case>GEPHOX</fixed-case></title>
-      <author><first>Adil</first><last>El Ghali</last></author>
+      <author id="adil-el-ghali"><first>Adil</first><last>El Ghali</last></author>
       <pages>319–328</pages>
       <abstract>Le générateur GEPHOX que nous réalisons a pour ambition de produire des textes pour des définition ou preuves mathématiques écrites à l’aide de l’assistant de preuve PHOX. Dans cet article nous nous concentrons sur le module de détermination de contenu ContDet de GEPHOX. Après un aperçu sur l’entrée du générateur, i.e. la preuve formelle et l’ensemble des règles ayant permis de l’obtenir, nous décrivons les base de connaissances du générateur et le fonctionnement de l’algorithme de détermination de contenu.</abstract>
       <url hash="f5da7d36">2004.jeptalnrecital-long.33</url>
@@ -368,7 +368,7 @@
     </paper>
     <paper id="36">
       <title>Les Grammaires à Concaténation d’Intervalles (<fixed-case>RCG</fixed-case>) comme formalisme grammatical pour la linguistique</title>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <author><first>Pierre</first><last>Boullier</last></author>
       <pages>349–358</pages>
       <abstract>Le but de cet article est de montrer pourquoi les Grammaires à Concaténation d’Intervalles (Range Concatenation Grammars, ou RCG) sont un formalisme particulièrement bien adapté à la description du langage naturel. Nous expliquons d’abord que la puissance nécessaire pour décrire le langage naturel est celle de PTIME. Ensuite, parmi les formalismes grammaticaux ayant cette puissance d’expression, nous justifions le choix des RCG. Enfin, après un aperçu de leur définition et de leurs propriétés, nous montrons comment leur utilisation comme grammaires linguistiques permet de traiter des phénomènes syntagmatiques complexes, de réaliser simultanément l’analyse syntaxique et la vérification des diverses contraintes (morphosyntaxiques, sémantique lexicale), et de construire dynamiquement des grammaires linguistiques modulaires.</abstract>
@@ -397,8 +397,8 @@
     <paper id="1">
       <title>Mots composés dans les modèles de langue pour la recherche d’information</title>
       <author><first>Carmen</first><last>Alvarez</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
-      <author><first>Jian-Yun</first><last>Nie</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
+      <author id="jian-yun-nie"><first>Jian-Yun</first><last>Nie</last></author>
       <pages>1–6</pages>
       <abstract>Une approche classique en recherche d’information (RI) consiste à bâtir une représentation des documents et des requêtes basée sur les mots simples les constituant. L’utilisation de modèles bigrammes a été étudiée, mais les contraintes sur l’ordre et l’adjacence des mots dans ces travaux ne sont pas toujours justifiées pour la recherche d’information. Nous proposons une nouvelle approche basée sur les modèles de langue qui incorporent des affinités lexicales (ALs), c’est à dire des paires non ordonnées de mots qui se trouvent proches dans un texte. Nous décrivons ce modèle et le comparons aux plus traditionnels modèles unigrammes et bigrammes ainsi qu’au modèle vectoriel.</abstract>
       <url hash="6793bdc5">2004.jeptalnrecital-poster.1</url>
@@ -407,7 +407,7 @@
     </paper>
     <paper id="2">
       <title>Le Regroupement de Types de Mots et l’Unification d’Occurrences de Mots dans des Catégories grammaticales de mots (Clustering of Word Types and Unification of Word Tokens into Grammatical Word-Classes)</title>
-      <author><first>Eric</first><last>Atwell</last></author>
+      <author id="eric-atwell"><first>Eric</first><last>Atwell</last></author>
       <pages>7–12</pages>
       <abstract>Ce papier discute la Néoposie: l’inférence auto-adaptive de catégories grammaticales de mots de la langue naturelle. L’inférence grammaticale peut être divisée en deux parties : l’inférence de catégories grammaticales de mots et l’inférence de la structure. Nous examinons les éléments de base de l’apprentissage auto-adaptif du marquage des catégories grammaticales, et discutons l’adaptation des trois types principaux de marqueurs des catégories grammaticales à l’inférence auto-adaptive de catégories grammaticales de mots. Des marqueurs statistiques de n-grammes suggèrent une approche de regroupement statistique, mais le regroupement n’aide ni avec les types de mots peu fréquents, ni avec les types de mots nombreux qui peuvent se présenter dans plus d’une catégorie grammaticale. Le marqueur alternatif d’apprentissage basé sur la transformation suggère une approche basée sur la contrainte de l’unification de contextes d’occurrences de mots. Celle-ci présente un moyen de regrouper des mots peu fréquents, et permet aux occurrences différentes d’un seul type de mot d’appartenir à des catégories différentes selon les contextes grammaticaux où ils se présentent. Cependant, la simple unification de contextes d’occurrences de mots produit un nombre incroyablement grand de catégories grammaticales de mots. Nous avons essayé d’unifier plus de catégories en modérant le contexte de la correspondance pour permettre l’unification des catégories de mots aussi bien que des occurrences de mots, mais cela entraîne des unifications fausses. Nous concluons que l’avenir peut être un hybride qui comprend le regroupement de types de mots peu fréquents, l’unification de contextes d’occurrences de mots, et le ‘seeding’ avec une connaissance linguistique limitée. Nous demandons un programme de nouvelles recherches pour développer une valise pour la découverte de la langue naturelle.</abstract>
       <url hash="3f167e83">2004.jeptalnrecital-poster.2</url>
@@ -438,8 +438,8 @@
     </paper>
     <paper id="5">
       <title>Traduction de dialogue: résultats du projet <fixed-case>NESPOLE</fixed-case>! et pistes pour le domaine</title>
-      <author><first>Hervé</first><last>Blanchon</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="herve-blanchon"><first>Hervé</first><last>Blanchon</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <pages>25–30</pages>
       <abstract>Dans cet article, nous détaillons les résultats de la seconde évaluation du projet européen NESPOLE! auquel nous avons pris part pour le français. Dans ce projet, ainsi que dans ceux qui l’ont précédé, des techniques d’évaluation subjectives — réalisées par des évaluateurs humains — ont été mises en oeuvre. Nous présentons aussi les nouvelles techniques objectives — automatiques — proposées en traduction de l’écrit et mises en oeuvre dans le projet C-STAR III. Nous conclurons en proposant quelques idées et perspectives pour le domaine.</abstract>
       <url hash="3a9ce0c2">2004.jeptalnrecital-poster.5</url>
@@ -521,7 +521,7 @@
     </paper>
     <paper id="13">
       <title><fixed-case>NLP</fixed-case> Applications Based on<fixed-case>W</fixed-case>eighted<fixed-case>M</fixed-case>ulti-Tape Automata</title>
-      <author><first>André</first><last>Kempe</last></author>
+      <author id="andre-kempe"><first>André</first><last>Kempe</last></author>
       <pages>73–78</pages>
       <abstract>This article describes two practical applications of weighted multi-tape automata (WMTAs) in Natural Language Processing, that demonstrate the augmented descriptive power of WMTAs compared to weighted 1-tape and 2-tape automata. The two examples concern the preservation of intermediate results in transduction cascades and the search for similar words in two languages. As a basis for these applications, the article proposes a number of operations on WMTAs. Among others, it (re-)defines multi-tape intersection, where a number of tapes of one WMTA are intersected with the same number of tapes of another WMTA. In the proposed approach, multi-tape intersection is not an atomic operation but rather a sequence of more elementary ones, which facilitates its implementation.</abstract>
       <url hash="3321dd4c">2004.jeptalnrecital-poster.13</url>
@@ -559,7 +559,7 @@
     <paper id="17">
       <title>Apprentissage collectif et lexique</title>
       <author><first>Julien</first><last>Poudade</last></author>
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <pages>97–102</pages>
       <abstract>Cet article présente l’influence de la zone de travail que possède une entité logicielle pour lui permettre de prédire l’état futur de son environnement, sur la constitution d’un lexique partagé par les différents membres d’une population, dans le cadre d’une variante “du jeu de désignation” (naming game).</abstract>
       <url hash="096241e9">2004.jeptalnrecital-poster.17</url>
@@ -568,7 +568,7 @@
     </paper>
     <paper id="18">
       <title>L’outil de traitement de corpus <fixed-case>LIKES</fixed-case></title>
-      <author><first>François</first><last>Rousselot</last></author>
+      <author id="francois-rousselot"><first>François</first><last>Rousselot</last></author>
       <pages>103–112</pages>
       <abstract>LIKES (LInguistic and Knowledge Engineering Station) est une station d’ingénierie linguistique destinée à traiter des corpus, elle fonctionne pour l’instant sur la plupart des langues européennes et slaves en utilisant des ressources minimales pour chaque langue. Les corpus sont constitués d’un ou plusieurs textes en ASCII ou en HTML, l’interface donne la possibilité de constituer son corpus et d’y exécuter un certain nombre de tâches allant de simples tâches de découpage en mot, de tri ou de recherche de motifs à des tâches plus complexes d’aide à la synthèse de grammaire, d’aide au repérage de relations, d’aide à la construction d’une terminologie. Nous décrivons ici les principales fonctionnalités de LIKES en rapport avec le traitement des corpus et ce qui fait sa spécificité par rapport à d’autres environnements comparables : l’utilisation minimale de ressources linguistiques.</abstract>
       <url hash="dbdfd3c2">2004.jeptalnrecital-poster.18</url>
@@ -601,7 +601,7 @@
       <author><first>Joaquim</first><last>Silva</last></author>
       <author><first>Zornitsa</first><last>Kozareva</last></author>
       <author><first>Veska</first><last>Noncheva</last></author>
-      <author><first>Gabriel</first><last>Lopes</last></author>
+      <author id="gabriel-lopes"><first>Gabriel</first><last>Lopes</last></author>
       <pages>125–130</pages>
       <abstract>Named entities and more generally Multiword Lexical Units (MWUs) are important for various applications. However, language independent methods for automatically extracting MWUs do not provide us with clean data. So, in this paper we propose a method for selecting possible named entities from automatically extracted MWUs, and later, a statistics-based language independent unsupervised approach is applied to possible named entities in order to cluster them according to their type. Statistical features used by our clustering process are described and motivated. The Model-Based Clustering Analysis (MBCA) software enabled us to obtain different clusters for proposed named entities. The method was applied to Bulgarian and English. For some clusters, precision is very high; other clusters still need further refinement. Based on the obtained clusters, it is also possible to classify new possible named entities.</abstract>
       <url hash="714e67eb">2004.jeptalnrecital-poster.21</url>
@@ -630,8 +630,8 @@
     <paper id="24">
       <title>Modèle de langage sémantique pour la reconnaissance automatique de parole dans un contexte de traduction</title>
       <author><first>Quang</first><last>Vu-minh</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
-      <author><first>Hervé</first><last>Blanchon</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
+      <author id="herve-blanchon"><first>Hervé</first><last>Blanchon</last></author>
       <author><first>Brigitte</first><last>Bigi</last></author>
       <pages>147–152</pages>
       <abstract>Le travail présenté dans cet article a été réalisé dans le cadre d’un projet global de traduction automatique de la parole. L’approche de traduction est fondée sur un langage pivot ou Interchange Format (IF), qui représente le sens de la phrase indépendamment de la langue. Nous proposons une méthode qui intègre des informations sémantiques dans le modèle statistique de langage du système de Reconnaissance Automatique de Parole. Le principe consiste a utiliser certaines classes définies dans l’IF comme des classes sémantiques dans le modèle de langage. Ceci permet au système de reconnaissance de la parole d’analyser partiellement en IF les tours de parole. Les expérimentations realisées montrent qu’avec cette approche, le système de reconnaissance peut analyser directement en IF une partie des données de dialogues de notre application, sans faire appel au système de traduction (35% des mots ; 58% des tours de parole), tout en maintenant le même niveau de performance du système global.</abstract>
@@ -643,8 +643,8 @@
   <volume id="recital" ingest-date="2021-02-05" type="proceedings">
     <meta>
       <booktitle>Actes de la 11ème conférence sur le Traitement Automatique des Langues Naturelles. REncontres jeunes Chercheurs en Informatique pour le Traitement Automatique des Langues</booktitle>
-      <editor><first>Frédéric</first><last>Béchet</last></editor>
-      <editor><first>Tristan</first><last>Vanrullen</last></editor>
+      <editor id="frederic-bechet"><first>Frédéric</first><last>Béchet</last></editor>
+      <editor id="tristan-vanrullen"><first>Tristan</first><last>Vanrullen</last></editor>
       <publisher>ATALA</publisher>
       <address>Fès, Maroc</address>
       <month>April</month>
@@ -675,7 +675,7 @@
     </paper>
     <paper id="3">
       <title>Indexation automatique de ressources de santé à l’aide d’un vocabulaire contrôlé</title>
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <pages>21–30</pages>
       <abstract>Nous présentons ici le système d’indexation automatique actuellement en cours de développement dans l’équipe CISMeF afin d’aider les documentalistes lors de l’indexation de ressources de santé. Nous détaillons l’architecture du système pour l’extraction de mots clés MeSH, et présentons les résultats d’une première évaluation. La stratégie d’indexation choisie atteint une précision comparable à celle des systèmes existants. De plus, elle permet d’extraire des paires mot clé/qualificatif, et non des termes isolés, ce qui constitue une indexation beaucoup plus fine. Les travaux en cours s’attachent à étendre la couverture des dictionnaires, et des tests à plus grande échelle sont envisagés afin de valider le système et d’évaluer sa valeur ajoutée dans le travail quotidien des documentalistes.</abstract>
       <url hash="dfc427f2">2004.jeptalnrecital-recital.3</url>
@@ -733,8 +733,8 @@
   <volume id="recitalposter" ingest-date="2021-02-05" type="proceedings">
     <meta>
       <booktitle>Actes de la 11ème conférence sur le Traitement Automatique des Langues Naturelles. REncontres jeunes Chercheurs en Informatique pour le Traitement Automatique des Langues (Posters)</booktitle>
-      <editor><first>Frédéric</first><last>Béchet</last></editor>
-      <editor><first>Tristan</first><last>Vanrullen</last></editor>
+      <editor id="frederic-bechet"><first>Frédéric</first><last>Béchet</last></editor>
+      <editor id="tristan-vanrullen"><first>Tristan</first><last>Vanrullen</last></editor>
       <publisher>ATALA</publisher>
       <address>Fès, Maroc</address>
       <month>April</month>
@@ -892,7 +892,7 @@
     </paper>
     <paper id="17">
       <title>La relation de synonymie en génomique</title>
-      <author><first>Davy</first><last>Weissenbacher</last></author>
+      <author id="davy-weissenbacher"><first>Davy</first><last>Weissenbacher</last></author>
       <pages>97–102</pages>
       <abstract>L’accès au contenu des textes de génomique est aujourd’hui un enjeu important. Cela suppose au départ d’identifier les noms d’entités biologiques comme les gènes ou les protéines. Se pose alors la question de la variation de ces noms. Cette question revêt une importance particulière en génomique où les noms de gènes sont soumis à de nombreuses variations, notamment la synonymie. A partir d’une étude de corpus montrant que la synonymie est une relation stable et linguistiquement marquée, cet article propose une modélisation de la synonymie et une méthode d’extraction spécifiquement adaptée à cette relation. Au vu de nos premières expériences, cette méthode semble plus prometteuse que les approches génériques utilisées pour l’extraction de cette relation.</abstract>
       <url hash="6f541e3f">2004.jeptalnrecital-recitalposter.17</url>
diff --git a/data/xml/2004.tc.xml b/data/xml/2004.tc.xml
index 9b5eef6bda..68fdf67e83 100644
--- a/data/xml/2004.tc.xml
+++ b/data/xml/2004.tc.xml
@@ -101,7 +101,7 @@
     </paper>
     <paper id="15">
       <title>The Certified Localisation Professional (<fixed-case>CLP</fixed-case>)</title>
-      <author><first>Reinhard</first><last>Schäler</last></author>
+      <author id="reinhard-schaler"><first>Reinhard</first><last>Schäler</last></author>
       <url hash="8b3240bc">2004.tc-1.15</url>
       <bibkey>schaler-2004-certified</bibkey>
     </paper>
diff --git a/data/xml/2004.tmi.xml b/data/xml/2004.tmi.xml
index 861dc18da4..cc0c740b1e 100644
--- a/data/xml/2004.tmi.xml
+++ b/data/xml/2004.tmi.xml
@@ -10,7 +10,7 @@
     </meta>
     <paper id="1">
       <title>Rapid prototyping of a transfer-based <fixed-case>H</fixed-case>ebrew-to-<fixed-case>E</fixed-case>nglish machine translation system</title>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <author><first>Erik</first><last>Peterson</last></author>
       <author><first>Katharina</first><last>Probst</last></author>
       <author><first>Shuly</first><last>Wintner</last></author>
@@ -25,10 +25,10 @@
       <author><first>Jan Tore</first><last>Lønning</last></author>
       <author><first>Erik</first><last>Velldal</last></author>
       <author><first>Dorothee</first><last>Beerman</last></author>
-      <author><first>John</first><last>Carroll</last></author>
-      <author><first>Dan</first><last>Flickinger</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
+      <author id="dan-flickinger"><first>Dan</first><last>Flickinger</last></author>
       <author><first>Lars</first><last>Hellan</last></author>
-      <author><first>Janne Bondi</first><last>Johannessen</last></author>
+      <author id="janne-bondi-johannessen"><first>Janne Bondi</first><last>Johannessen</last></author>
       <author><first>Paul</first><last>Meurer</last></author>
       <author><first>Torbjørn</first><last>Nordgård</last></author>
       <author><first>Victoria</first><last>Rosén</last></author>
@@ -37,9 +37,9 @@
     </paper>
     <paper id="3">
       <title>Comparing rule-based and statistical approaches to speech understanding in a limited domain speech translation system</title>
-      <author><first>Manny</first><last>Rayner</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
-      <author><first>Beth Ann</first><last>Hockey</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="beth-ann-hockey"><first>Beth Ann</first><last>Hockey</last></author>
       <author><first>Nikos</first><last>Chatzichrisafis</last></author>
       <author><first>Marianne</first><last>Starlander</last></author>
       <url hash="bd5742bc">2004.tmi-1.3</url>
@@ -48,7 +48,7 @@
     <paper id="4">
       <title>Non-contiguous tree parsing</title>
       <author><first>Mark</first><last>Dras</last></author>
-      <author><first>Chung-hye</first><last>Han</last></author>
+      <author id="chung-hye-han"><first>Chung-hye</first><last>Han</last></author>
       <url hash="401331a3">2004.tmi-1.4</url>
       <bibkey>dras-han-2004-non</bibkey>
     </paper>
@@ -74,20 +74,20 @@
     <paper id="8">
       <title>A learning approach to improving sentence-level <fixed-case>MT</fixed-case> evaluation</title>
       <author><first>Alex</first><last>Kulesza</last></author>
-      <author><first>Stuart M.</first><last>Shieber</last></author>
+      <author id="stuart-m-shieber"><first>Stuart M.</first><last>Shieber</last></author>
       <url hash="63ceb11c">2004.tmi-1.8</url>
       <bibkey>kulesza-shieber-2004-learning</bibkey>
     </paper>
     <paper id="9">
       <title>Measuring confidence intervals for the machine translation evaluation metrics</title>
-      <author><first>Ying</first><last>Zhang</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="ying-zhang"><first>Ying</first><last>Zhang</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <url hash="205ae10b">2004.tmi-1.9</url>
       <bibkey>zhang-vogel-2004-measuring</bibkey>
     </paper>
     <paper id="10">
       <title>Cross-language algorithms: the progressive conflation of the <fixed-case>MT</fixed-case> and <fixed-case>IR</fixed-case> paradigms</title>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <bibkey>wilks-2004-cross</bibkey>
     </paper>
     <paper id="11">
@@ -99,9 +99,9 @@
     </paper>
     <paper id="12">
       <title>Method for retrieving a similar sentence and its application to machine translation</title>
-      <author><first>Mitsuo</first><last>Shimohata</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="mitsuo-shimohata"><first>Mitsuo</first><last>Shimohata</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <url hash="2452ca68">2004.tmi-1.12</url>
       <bibkey>shimohata-etal-2004-method</bibkey>
     </paper>
@@ -117,7 +117,7 @@
       <author><first>Arul</first><last>Menezes</last></author>
       <author><first>Bob</first><last>Moore</last></author>
       <author><first>Chris</first><last>Quirk</last></author>
-      <author><first>Eric</first><last>Ringger</last></author>
+      <author id="eric-ringger"><first>Eric</first><last>Ringger</last></author>
       <url hash="4a50ad65">2004.tmi-1.14</url>
       <bibkey>aue-etal-2004-statistical</bibkey>
     </paper>
@@ -125,13 +125,13 @@
       <title>Cooperative unsupervised training of the part-of-speech taggers in a bidirectional machine translation system</title>
       <author><first>Felipe</first><last>Sánchez-Martínez</last></author>
       <author><first>Juan Antonio</first><last>Pérez-Ortiz</last></author>
-      <author><first>Mikel L.</first><last>Forcada</last></author>
+      <author id="mikel-l-forcada"><first>Mikel L.</first><last>Forcada</last></author>
       <url hash="c4d92770">2004.tmi-1.15</url>
       <bibkey>sanchez-martinez-etal-2004-cooperative</bibkey>
     </paper>
     <paper id="16">
       <title>Latest challenges to <fixed-case>MT</fixed-case> <fixed-case>R</fixed-case>&amp;<fixed-case>D</fixed-case></title>
-      <author><first>Harold</first><last>Somers</last></author>
+      <author id="harold-somers"><first>Harold</first><last>Somers</last></author>
       <bibkey>somers-2004-latest</bibkey>
     </paper>
   </volume>
diff --git a/data/xml/2005.eamt.xml b/data/xml/2005.eamt.xml
index f4f3470dcc..ab90338199 100644
--- a/data/xml/2005.eamt.xml
+++ b/data/xml/2005.eamt.xml
@@ -11,13 +11,13 @@
     </meta>
     <paper id="1">
       <title>Frontmatter</title>
-      <author><first>Bente</first><last>Maegaard</last></author>
+      <author id="bente-maegaard"><first>Bente</first><last>Maegaard</last></author>
       <url hash="75a76f3e">2005.eamt-1.1</url>
       <bibkey>maegaard-2005-frontamtter</bibkey>
     </paper>
     <paper id="2">
       <title>The Language Translation Interface</title>
-      <author><first>Dominique</first><last>Estival</last></author>
+      <author id="dominique-estival"><first>Dominique</first><last>Estival</last></author>
       <url hash="41e10126">2005.eamt-1.2</url>
       <bibkey>estival-2005-language</bibkey>
     </paper>
@@ -45,10 +45,10 @@
     <paper id="6">
       <title>Comparison of generation strategies for interactive machine translation</title>
       <author><first>Oliver</first><last>Bender</last></author>
-      <author><first>Saša</first><last>Hasan</last></author>
+      <author id="sasa-hasan"><first>Saša</first><last>Hasan</last></author>
       <author><first>David</first><last>Vilar</last></author>
       <author><first>Richard</first><last>Zens</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <url hash="67ff15ad">2005.eamt-1.6</url>
       <bibkey>bender-etal-2005-comparison</bibkey>
     </paper>
@@ -62,10 +62,10 @@
     </paper>
     <paper id="8">
       <title>A generic multi-lingual open source platform for limited-domain medical speech translation</title>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
-      <author><first>Manny</first><last>Rayner</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
       <author><first>Nikos</first><last>Chatzichrisafis</last></author>
-      <author><first>Beth Ann</first><last>Hockey</last></author>
+      <author id="beth-ann-hockey"><first>Beth Ann</first><last>Hockey</last></author>
       <author><first>Marianne</first><last>Santaholma</last></author>
       <author><first>Marianne</first><last>Starlander</last></author>
       <author><first>Yukie</first><last>Nakao</last></author>
@@ -92,32 +92,32 @@
     </paper>
     <paper id="11">
       <title><fixed-case>P</fixed-case>rague <fixed-case>C</fixed-case>zech-<fixed-case>E</fixed-case>nglish dependency treebank: resource for structure-based <fixed-case>MT</fixed-case></title>
-      <author><first>Martin</first><last>Čmejrek</last></author>
-      <author><first>Jan</first><last>Cuřín</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
-      <author><first>Jiří</first><last>Havelka</last></author>
+      <author id="martin-cmejrek"><first>Martin</first><last>Čmejrek</last></author>
+      <author id="jan-curin"><first>Jan</first><last>Cuřín</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
+      <author id="jiri-havelka"><first>Jiří</first><last>Havelka</last></author>
       <url hash="31f46aa6">2005.eamt-1.11</url>
       <bibkey>cmejrek-etal-2005-prague</bibkey>
     </paper>
     <paper id="12">
       <title>An open-source shallow-transfer machine translation engine for the <fixed-case>R</fixed-case>omance languages of <fixed-case>S</fixed-case>pain</title>
       <author><first>Antonio M.</first><last>Corbi-Bellot</last></author>
-      <author><first>Mikel L.</first><last>Forcada</last></author>
+      <author id="mikel-l-forcada"><first>Mikel L.</first><last>Forcada</last></author>
       <author><first>Sergio</first><last>Ortíz-Rojas</last></author>
       <author><first>Juan Antonio</first><last>Pérez-Ortiz</last></author>
-      <author><first>Gema</first><last>Ramírez-Sánchez</last></author>
+      <author id="gema-ramirez-sanchez"><first>Gema</first><last>Ramírez-Sánchez</last></author>
       <author><first>Felipe</first><last>Sánchez-Martínez</last></author>
-      <author><first>Iñaki</first><last>Alegria</last></author>
-      <author><first>Aingeru</first><last>Mayor</last></author>
-      <author><first>Kepa</first><last>Sarasola</last></author>
+      <author id="inaki-alegria"><first>Iñaki</first><last>Alegria</last></author>
+      <author id="aingeru-mayor"><first>Aingeru</first><last>Mayor</last></author>
+      <author id="kepa-sarasola"><first>Kepa</first><last>Sarasola</last></author>
       <url hash="b6adc970">2005.eamt-1.12</url>
       <bibkey>corbi-bellot-etal-2005-open</bibkey>
     </paper>
     <paper id="13">
       <title>A framework for interactive and automatic refinement of transfer-based machine translation</title>
-      <author><first>Ariadna</first><last>Font Llitjós</last></author>
-      <author><first>Jaime G.</first><last>Carbonell</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="ariadna-font-llitjos"><first>Ariadna</first><last>Font Llitjós</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime G.</first><last>Carbonell</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <url hash="0c249ef5">2005.eamt-1.13</url>
       <bibkey>font-llitjos-etal-2005-framework</bibkey>
     </paper>
@@ -143,41 +143,41 @@
     </paper>
     <paper id="17">
       <title>Clustered language models based on regular expressions for <fixed-case>SMT</fixed-case></title>
-      <author><first>Saša</first><last>Hasan</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="sasa-hasan"><first>Saša</first><last>Hasan</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <url hash="a08537fc">2005.eamt-1.17</url>
       <bibkey>hasan-ney-2005-clustered</bibkey>
     </paper>
     <paper id="18">
       <title>Augmenting a statistical translation system with a translation memory</title>
       <author><first>Sanjika</first><last>Hewavitharana</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <url hash="190841c1">2005.eamt-1.18</url>
       <bibkey>hewavitharana-etal-2005-augmenting</bibkey>
     </paper>
     <paper id="19">
       <title>Adaptation of the translation model for statistical machine translation based on information retrieval</title>
-      <author><first>Almut Silja</first><last>Hildebrand</last></author>
+      <author id="almut-silja-hildebrand"><first>Almut Silja</first><last>Hildebrand</last></author>
       <author><first>Matthias</first><last>Eck</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <url hash="fa5ca8fc">2005.eamt-1.19</url>
       <bibkey>hildebrand-etal-2005-adaptation</bibkey>
     </paper>
     <paper id="20">
       <title>Multi-engine machine translation guided by explicit word matching</title>
       <author><first>Shyamsundar</first><last>Jayaraman</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <url hash="6c0e02de">2005.eamt-1.20</url>
       <bibkey>jayaraman-lavie-2005-multi</bibkey>
     </paper>
     <paper id="21">
       <title>Symmetric probabilistic alignment for example-based translation</title>
       <author><first>Jae Dong</first><last>Kim</last></author>
-      <author><first>Ralf D.</first><last>Brown</last></author>
-      <author><first>Peter J.</first><last>Jansen</last></author>
-      <author><first>Jaime G.</first><last>Carbonell</last></author>
+      <author id="ralf-d-brown"><first>Ralf D.</first><last>Brown</last></author>
+      <author id="peter-jansen"><first>Peter J.</first><last>Jansen</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime G.</first><last>Carbonell</last></author>
       <url hash="8faf4d6c">2005.eamt-1.21</url>
       <bibkey>kim-etal-2005-symmetric</bibkey>
     </paper>
@@ -189,7 +189,7 @@
     </paper>
     <paper id="23">
       <title>From the real world to real words: the <fixed-case>METEO</fixed-case> case</title>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <author><first>Thomas</first><last>Leplus</last></author>
       <author><first>Simona</first><last>Gandrabur</last></author>
       <author><first>Guy</first><last>Lapalme</last></author>
@@ -207,7 +207,7 @@
       <title>Efficient statistical machine translation with constrained reordering</title>
       <author><first>Evgeny</first><last>Matusov</last></author>
       <author><first>Stephan</first><last>Kanthak</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <url hash="028c5ad4">2005.eamt-1.25</url>
       <bibkey>matusov-etal-2005-efficient</bibkey>
     </paper>
@@ -215,7 +215,7 @@
       <title><fixed-case>T</fixed-case>rans<fixed-case>B</fixed-case>ooster: boosting the performance of wide-coverage machine translation systems</title>
       <author><first>Bart</first><last>Mellebeek</last></author>
       <author><first>Anna</first><last>Khasin</last></author>
-      <author><first>Josef</first><last>Van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>Van Genabith</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <url hash="d1de1a93">2005.eamt-1.26</url>
       <bibkey>mellebeek-etal-2005-transbooster</bibkey>
@@ -224,7 +224,7 @@
       <title>Holistic regression testing for high-quality <fixed-case>MT</fixed-case>: some methodological and technological reflections</title>
       <author><first>Stephan</first><last>Oepen</last></author>
       <author><first>Helge</first><last>Dyvik</last></author>
-      <author><first>Dan</first><last>Flickinger</last></author>
+      <author id="dan-flickinger"><first>Dan</first><last>Flickinger</last></author>
       <author><first>Jan Tore</first><last>Lønning</last></author>
       <author><first>Paul</first><last>Meurer</last></author>
       <author><first>Victoria</first><last>Rosén</last></author>
@@ -233,24 +233,24 @@
     </paper>
     <paper id="28">
       <title>Building a <fixed-case>WSD</fixed-case> module within an <fixed-case>MT</fixed-case> system to enable interactive resolution in the user’s source language</title>
-      <author><first>Constantin</first><last>Orasan</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orasan</last></author>
       <author><first>Ted</first><last>Marshall</last></author>
       <author><first>Robert</first><last>Clark</last></author>
       <author><first>Le An</first><last>Ha</last></author>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <url hash="6157789a">2005.eamt-1.28</url>
       <bibkey>orasan-etal-2005-building</bibkey>
     </paper>
     <paper id="29">
       <title>Exploiting phrasal lexica and additional morpho-syntactic language resources for statistical machine translation with scarce training data</title>
-      <author><first>Maja</first><last>Popovic</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popovic</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <url hash="0eb2c42a">2005.eamt-1.29</url>
       <bibkey>popovic-ney-2005-exploiting</bibkey>
     </paper>
     <paper id="30">
       <title>An approach to machine translation via the rule-to-rule hypothesis</title>
-      <author><first>Gábor</first><last>Prószéky</last></author>
+      <author id="gabor-proszeky"><first>Gábor</first><last>Prószéky</last></author>
       <url hash="b6687b8a">2005.eamt-1.30</url>
       <bibkey>proszeky-2005-approach</bibkey>
     </paper>
@@ -284,14 +284,14 @@
     <paper id="35">
       <title>Application of word-level confidence measures in interactive statistical machine translation</title>
       <author><first>Nicola</first><last>Ueffing</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <url hash="c11e2658">2005.eamt-1.35</url>
       <bibkey>ueffing-ney-2005-application</bibkey>
     </paper>
     <paper id="36">
       <title>Considerations in maximum mutual information and minimum classification error training for statistical machine translation</title>
       <author><first>Ashish</first><last>Vengupol</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <url hash="9b541fd9">2005.eamt-1.36</url>
       <bibkey>vengupol-vogel-2005-considerations</bibkey>
     </paper>
@@ -299,7 +299,7 @@
       <title>Sentence segmentation using <fixed-case>IBM</fixed-case> word alignment model 1</title>
       <author><first>Jia</first><last>Xu</last></author>
       <author><first>Richard</first><last>Zens</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <url hash="2951fc50">2005.eamt-1.37</url>
       <bibkey>xu-etal-2005-sentence</bibkey>
     </paper>
@@ -320,8 +320,8 @@
     </paper>
     <paper id="39">
       <title>An efficient phrase-to-phrase alignment model for arbitrarily long phrase and large corpora</title>
-      <author><first>Ying</first><last>Zhang</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="ying-zhang"><first>Ying</first><last>Zhang</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <url hash="2d5bb12d">2005.eamt-1.39</url>
       <bibkey>zhang-vogel-2005-efficient</bibkey>
     </paper>
diff --git a/data/xml/2005.iwslt.xml b/data/xml/2005.iwslt.xml
index c2647bcbae..e766326c33 100644
--- a/data/xml/2005.iwslt.xml
+++ b/data/xml/2005.iwslt.xml
@@ -19,8 +19,8 @@
       <title>A decoding algorithm for word lattice translation in speech translation</title>
       <author><first>Ruiqiang</first><last>Zhang</last></author>
       <author><first>Genichiro</first><last>Kikui</last></author>
-      <author><first>Hirofumi</first><last>Yamamoto</last></author>
-      <author><first>Wai-Kit</first><last>Lo</last></author>
+      <author id="hirofumi-yamamoto"><first>Hirofumi</first><last>Yamamoto</last></author>
+      <author id="wai-kit-lo"><first>Wai-Kit</first><last>Lo</last></author>
       <url hash="44403e65">2005.iwslt-1.2</url>
       <bibkey>zhang-etal-2005-decoding</bibkey>
     </paper>
@@ -28,7 +28,7 @@
       <title>Using multiple recognition hypotheses to improve speech translation</title>
       <author><first>Ruiqiang</first><last>Zhang</last></author>
       <author><first>Genichiro</first><last>Kikui</last></author>
-      <author><first>Hirofumi</first><last>Yamamoto</last></author>
+      <author id="hirofumi-yamamoto"><first>Hirofumi</first><last>Yamamoto</last></author>
       <url hash="a18f0b95">2005.iwslt-1.3</url>
       <bibkey>zhang-etal-2005-using</bibkey>
     </paper>
@@ -41,12 +41,12 @@
     </paper>
     <paper id="5">
       <title>Nobody is perfect: <fixed-case>ATR</fixed-case>’s hybrid approach to spoken language translation</title>
-      <author><first>Michael</first><last>Paul</last></author>
+      <author id="michael-paul"><first>Michael</first><last>Paul</last></author>
       <author><first>Takao</first><last>Doi</last></author>
       <author><first>Youngsook</first><last>Hwang</last></author>
       <author><first>Kenji</first><last>Imamura</last></author>
       <author><first>Hideo</first><last>Okuma</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <url hash="3b1b0efa">2005.iwslt-1.5</url>
       <bibkey>paul-etal-2005-nobody</bibkey>
     </paper>
@@ -58,16 +58,16 @@
       <author><first>Almut</first><last>Silja</last></author>
       <author><first>Matthias</first><last>Eck</last></author>
       <author><first>Chiori</first><last>Hori</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <url hash="379120d4">2005.iwslt-1.6</url>
       <bibkey>hewavitharana-etal-2005-cmu</bibkey>
     </paper>
     <paper id="7">
       <title>Low Cost Portability for Statistical Machine Translation based on N-gram Frequency and <fixed-case>TF</fixed-case>-<fixed-case>IDF</fixed-case></title>
       <author><first>Matthias</first><last>Eck</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <url hash="f04bb857">2005.iwslt-1.7</url>
       <bibkey>eck-etal-2005-low</bibkey>
     </paper>
@@ -116,7 +116,7 @@
       <title>The <fixed-case>MIT</fixed-case>-<fixed-case>LL</fixed-case>/<fixed-case>AFRL</fixed-case> <fixed-case>MT</fixed-case> System</title>
       <author><first>Wade</first><last>Shen</last></author>
       <author><first>Brian</first><last>Delaney</last></author>
-      <author><first>Tim</first><last>Anderson</last></author>
+      <author id="tim-anderson"><first>Tim</first><last>Anderson</last></author>
       <url hash="e75d7cb9">2005.iwslt-1.13</url>
       <bibkey>shen-etal-2005-mit</bibkey>
     </paper>
@@ -127,7 +127,7 @@
       <author><first>Zhenbiao</first><last>Chen</last></author>
       <author><first>Wei</first><last>Wei</last></author>
       <author><first>Bo</first><last>Xu</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <url hash="f6144263">2005.iwslt-1.14</url>
       <bibkey>pang-etal-2005-casia</bibkey>
     </paper>
@@ -162,7 +162,7 @@
       <author><first>Jia</first><last>Xu</last></author>
       <author><first>Evgeny</first><last>Matusov</last></author>
       <author><first>Richard</first><last>Zens</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <url hash="1935b7b5">2005.iwslt-1.18</url>
       <bibkey>xu-etal-2005-integrated</bibkey>
     </paper>
@@ -171,7 +171,7 @@
       <author><first>Evgeny</first><last>Matusov</last></author>
       <author><first>Gregor</first><last>Leusch</last></author>
       <author><first>Oliver</first><last>Bender</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <url hash="ca7dab58">2005.iwslt-1.19</url>
       <bibkey>matusov-etal-2005-evaluating</bibkey>
     </paper>
@@ -179,12 +179,12 @@
       <title>The <fixed-case>RWTH</fixed-case> Phrase-based Statistical Machine Translation System</title>
       <author><first>Richard</first><last>Zens</last></author>
       <author><first>Oliver</first><last>Bender</last></author>
-      <author><first>Sasa</first><last>Hasan</last></author>
+      <author id="sasa-hasan"><first>Sasa</first><last>Hasan</last></author>
       <author><first>Shahram</first><last>Khadivi</last></author>
       <author><first>Evgeny</first><last>Matusov</last></author>
       <author><first>Jia</first><last>Xu</last></author>
       <author><first>Yuqi</first><last>Zhang</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <url hash="2563ab21">2005.iwslt-1.20</url>
       <bibkey>zens-etal-2005-rwth</bibkey>
     </paper>
@@ -193,7 +193,7 @@
       <author><first>Yookyung</first><last>Kim</last></author>
       <author><first>Jun</first><last>Huang</last></author>
       <author><first>Youssef</first><last>Billawala</last></author>
-      <author><first>Demitrios</first><last>Master</last></author>
+      <author id="demetrios-master"><first>Demitrios</first><last>Master</last></author>
       <author><first>Farzad</first><last>Ehsani</last></author>
       <url hash="e3260e90">2005.iwslt-1.21</url>
       <bibkey>kim-etal-2005-sehda</bibkey>
@@ -207,23 +207,23 @@
     </paper>
     <paper id="23">
       <title>Ngram-based versus Phrase-based Statistical Machine Translation</title>
-      <author><first>Josep M.</first><last>Crego</last></author>
-      <author><first>Marta R.</first><last>Costa-Jussa</last></author>
+      <author id="josep-m-crego"><first>Josep M.</first><last>Crego</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-Jussa</last></author>
       <author><first>Jose B.</first><last>Marino</last></author>
-      <author><first>Jose A. R.</first><last>Fonollosa</last></author>
+      <author id="jose-a-r-fonollosa"><first>Jose A. R.</first><last>Fonollosa</last></author>
       <url hash="7dbcdbda">2005.iwslt-1.23</url>
       <bibkey>crego-etal-2005-ngram</bibkey>
     </paper>
     <paper id="24">
       <title>Tuning a phrase-based statistical translation system for the <fixed-case>IWSLT</fixed-case> 2005 <fixed-case>C</fixed-case>hinese to <fixed-case>E</fixed-case>nglish and <fixed-case>A</fixed-case>rabic to <fixed-case>E</fixed-case>nglish tasks</title>
-      <author><first>Marta R.</first><last>Costa-Jussa</last></author>
-      <author><first>Jose A. R.</first><last>Fonollosa</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-Jussa</last></author>
+      <author id="jose-a-r-fonollosa"><first>Jose A. R.</first><last>Fonollosa</last></author>
       <url hash="780b3124">2005.iwslt-1.24</url>
       <bibkey>costa-jussa-fonollosa-2005-tuning</bibkey>
     </paper>
     <paper id="25">
       <title>The <fixed-case>TALP</fixed-case> Ngram-based <fixed-case>SMT</fixed-case> System for <fixed-case>IWSLT</fixed-case>’05</title>
-      <author><first>Josep M.</first><last>Crego</last></author>
+      <author id="josep-m-crego"><first>Josep M.</first><last>Crego</last></author>
       <author><first>Adria</first><last>de Gispert</last></author>
       <author><first>Jose B.</first><last>Marino</last></author>
       <url hash="6c9d30c5">2005.iwslt-1.25</url>
@@ -232,7 +232,7 @@
     <paper id="26">
       <title>Machine Translation Evaluation Inside <fixed-case>QARLA</fixed-case></title>
       <author><first>Enrike</first><last>Amigo</last></author>
-      <author><first>Jesus</first><last>Gimenez</last></author>
+      <author id="jesus-gimenez"><first>Jesus</first><last>Gimenez</last></author>
       <author><first>Chiori</first><last>Hori</last></author>
       <url hash="2b5d06b3">2005.iwslt-1.26</url>
       <bibkey>amigo-etal-2005-machine</bibkey>
diff --git a/data/xml/2005.jeptalnrecital.xml b/data/xml/2005.jeptalnrecital.xml
index ae12e18afb..cd2fae3ebc 100644
--- a/data/xml/2005.jeptalnrecital.xml
+++ b/data/xml/2005.jeptalnrecital.xml
@@ -3,7 +3,7 @@
   <volume id="long" ingest-date="2021-02-05" type="proceedings">
     <meta>
       <booktitle>Actes de la 12ème conférence sur le Traitement Automatique des Langues Naturelles. Articles longs</booktitle>
-      <editor><first>Michèle</first><last>Jardino</last></editor>
+      <editor id="michele-jardino"><first>Michèle</first><last>Jardino</last></editor>
       <publisher>ATALA</publisher>
       <address>Dourdan, France</address>
       <month>June</month>
@@ -27,7 +27,7 @@
     <paper id="2">
       <title><fixed-case>XMG</fixed-case> : un Compilateur de Méta-Grammaires Extensible</title>
       <author><first>Denys</first><last>Duchier</last></author>
-      <author><first>Joseph</first><last>Le Roux</last></author>
+      <author id="joseph-le-roux"><first>Joseph</first><last>Le Roux</last></author>
       <author><first>Yannick</first><last>Parmentier</last></author>
       <pages>11–20</pages>
       <abstract>Dans cet article, nous présentons un outil permettant de produire automatiquement des ressources linguistiques, en l’occurence des grammaires. Cet outil se caractérise par son extensibilité, tant du point de vue des formalismes grammaticaux supportés (grammaires d’arbres adjoints et grammaires d’interaction à l’heure actuelle), que de son architecture modulaire, qui facilite l’intégration de nouveaux modules ayant pour but de vérifier la validité des structures produites. En outre, cet outil offre un support adapté au développement de grammaires à portée sémantique.</abstract>
@@ -59,7 +59,7 @@
       <title>Recherche en corpus de réponses à des questions définitoires</title>
       <author><first>Véronique</first><last>Malaisé</last></author>
       <author><first>Thierry</first><last>Delbecque</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <pages>41–50</pages>
       <abstract>Les systèmes de questions-réponses, essentiellement focalisés sur des questions factuelles en domaine ouvert, testent également d’autres tâches, comme le travail en domaine contraint ou la recherche de définitions. Nous nous intéressons ici à la recherche de réponses à des questions « définitoires » portant sur le domaine médical. La recherche de réponses de type définitoire se fait généralement en utilisant deux types de méthodes : celles s’appuyant essentiellement sur le contenu du corpus cible, et celles faisant appel à des connaissances externes. Nous avons choisi de nous limiter au premier de ces deux types de méthodes. Nous présentons une expérience dans laquelle nous réutilisons des patrons de repérage d’énoncés définitoires, conçus pour une autre tâche, pour localiser les réponses potentielles aux questions posées. Nous avons intégré ces patrons dans une chaîne de traitement que nous évaluons sur les questions définitoires et le corpus médical du projet EQueR sur l’évaluation de systèmes de questions-réponses. Cette évaluation montre que, si le rappel reste à améliorer, la « précision » des réponses obtenue (mesurée par la moyenne des inverses de rangs) est honorable. Nous discutons ces résultats et proposons des pistes d’amélioration.</abstract>
       <url hash="764036c3">2005.jeptalnrecital-long.5</url>
@@ -78,7 +78,7 @@
     </paper>
     <paper id="7">
       <title>Morphosémantique pour l’appariement de termes dans le vocabulaire médical : approche multilingue</title>
-      <author><first>Fiammetta</first><last>Namer</last></author>
+      <author id="fiammetta-namer"><first>Fiammetta</first><last>Namer</last></author>
       <pages>61–70</pages>
       <abstract>Cet article s’intéresse à la manière dont la morphosémantique peut contribuer à l’appariement multilingue de variantes terminologiques entre termes. L’approche décrite permet de relier automatiquement entre eux les noms et adjectifs composés savants d’un corpus spécialisé en médecine (synonymie, hyponymie, approximation). L’acquisition de relations lexicales est une question particulièrement cruciale lors de l’élaboration de bases de données et de systèmes de recherche d’information multilingues. La méthode est applicable à au moins cinq langues européennes dont elle exploite les caractéristiques morphologiques similaires des mots composés dans les langues de spécialité. Elle consiste en l’intéraction de trois dispositifs : (1) un analyseur morphosémantique monolingue, (2) une table multilingue qui définit des relations de base entre les racines gréco-latines des lexèmes savants, (3) quatre règles indépendantes de la langue qui infèrent, à partir de ces relations de base, les relations lexicales entre les lexèmes contenant ces racines. L’approche décrite est implémentée en français, où l’on dispose d’un analyseur morphologique capable de calculer la définition de mots construits inconnus à partir du sens de ses composants. Le corpus de travail est un lexique spécialisé médical d’environ 29000 lexèmes, que le calcul des relations de synonymie, hyponymie et approximation a permis de regrouper en plus de 3000 familles lexicales.</abstract>
       <url hash="c4d0f40a">2005.jeptalnrecital-long.7</url>
@@ -99,7 +99,7 @@
     <paper id="9">
       <title>Utilisation de corpus de spécialité pour le filtrage de synonymes de la langue générale</title>
       <author><first>Natalia</first><last>Grabar</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <pages>81–90</pages>
       <abstract>Les ressources linguistiques les plus facilement disponibles en TAL ressortissent généralement au registre général d’une langue. Lorsqu’elles doivent être utilisées sur des textes de spécialité il peut être utile de les adapter à ces textes. Cet article est consacré à l’adaptation de ressources synonymiques générales à la langue médicale. L’adaptation est obtenue suite à une série de filtrages sur un corpus du domaine. Les synonymes originaux et les synonymes filtrés sont ensuite utilisés comme une des ressources pour la normalisation de variantes de termes dans une tâche de structuration de terminologie. Leurs apports respectifs sont évalués par rapport à la structure terminologique de référence. Cette évaluation montre que les résultats sont globalement encourageants après les filtrages, pour une tâche comme la structuration de terminologies : une amélioration de la précision contre une légère diminution du rappel.</abstract>
       <url hash="61eaadbc">2005.jeptalnrecital-long.9</url>
@@ -119,7 +119,7 @@
       <title>Chaînes de traitement syntaxique</title>
       <author><first>Pierre</first><last>Boullier</last></author>
       <author><first>Lionel</first><last>Clément</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <author><first>Éric</first><last>Villemonte De La Clergerie</last></author>
       <pages>101–110</pages>
       <abstract>Cet article expose l’ensemble des outils que nous avons mis en oeuvre pour la campagne EASy d’évaluation d’analyse syntaxique. Nous commençons par un aperçu du lexique morphologique et syntaxique utilisé. Puis nous décrivons brièvement les propriétés de notre chaîne de traitement pré-syntaxique qui permet de gérer des corpus tout-venant. Nous présentons alors les deux systèmes d’analyse que nous avons utilisés, un analyseur TAG issu d’une méta-grammaire et un analyseur LFG. Nous comparons ces deux systèmes en indiquant leurs points communs, comme l’utilisation intensive du partage de calcul et des représentations compactes de l’information, mais également leurs différences, au niveau des formalismes, des grammaires et des analyseurs. Nous décrivons ensuite le processus de post-traitement, qui nous a permis d’extraire de nos analyses les informations demandées par la campagne EASy. Nous terminons par une évaluation quantitative de nos architectures.</abstract>
@@ -182,8 +182,8 @@
     </paper>
     <paper id="17">
       <title>Representational and architectural issues in a limited-domain medical speech translator</title>
-      <author><first>Manny</first><last>Rayner</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Marianne</first><last>Santaholma</last></author>
       <author><first>Yukie</first><last>Nakao</last></author>
       <pages>161–170</pages>
@@ -242,7 +242,7 @@
     <paper id="23">
       <title>Paradocs: un système d’identification automatique de documents parallèles</title>
       <author><first>Alexandre</first><last>Patry</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <pages>221–230</pages>
       <abstract>Les corpus parallèles sont d’une importance capitale pour les applications multilingues de traitement automatique des langues. Malheureusement, leur rareté est le maillon faible de plusieurs applications d’intérêt. Extraire de tels corpus duWeb est une solution viable, mais elle introduit une nouvelle problématique : il n’est pas toujours trivial d’identifier les documents parallèles parmi tous ceux qui ont été extraits. Dans cet article, nous nous intéressons à l’identification automatique des paires de documents parallèles contenues dans un corpus bilingue. Nous montrons que cette tâche peut être accomplie avec précision en utilisant un ensemble restreint d’invariants lexicaux. Nous évaluons également notre approche sur une tâche de traduction automatique et montrons qu’elle obtient des résultats supérieurs à un système de référence faisant usage d’un lexique bilingue.</abstract>
       <url hash="793ba4dc">2005.jeptalnrecital-long.23</url>
@@ -255,9 +255,9 @@
       <author><first>Nicola</first><last>Cancedda</last></author>
       <author><first>Bruno</first><last>Cavestro</last></author>
       <author><first>Marc</first><last>Dymetman</last></author>
-      <author><first>Eric</first><last>Gaussier</last></author>
-      <author><first>Cyril</first><last>Goutte</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="eric-gaussier"><first>Eric</first><last>Gaussier</last></author>
+      <author id="cyril-goutte"><first>Cyril</first><last>Goutte</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <author><first>Arne</first><last>Mauser</last></author>
       <author><first>Kenji</first><last>Yamada</last></author>
       <pages>231–240</pages>
@@ -279,7 +279,7 @@
     <paper id="26">
       <title>Traduction de termes biomédicaux par inférence de transducteurs</title>
       <author><first>Vincent</first><last>Claveau</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <pages>251–260</pages>
       <abstract>Cet article propose et évalue une méthode de traduction automatique de termes biomédicaux simples du français vers l’anglais et de l’anglais vers le français. Elle repose sur une technique d’apprentissage artificiel supervisée permettant d’inférer des transducteurs à partir d’exemples de couples de termes bilingues ; aucune autre ressource ou connaissance n’est requise. Ces transducteurs, capturant les grandes régularités de traduction existant dans le domaine biomédical, sont ensuite utilisés pour traduire de nouveaux termes français en anglais et vice versa. Les évaluations menées montrent que le taux de bonnes traductions de notre technique se situe entre 52 et 67%. À travers un examen des erreurs les plus courantes, nous identifions quelques limites inhérentes à notre approche et proposons quelques pistes pour les dépasser. Nous envisageons enfin plusieurs extensions à ce travail.</abstract>
       <url hash="4906c9c4">2005.jeptalnrecital-long.26</url>
@@ -307,7 +307,7 @@
     </paper>
     <paper id="29">
       <title>Détection automatique d’actes de dialogue par l’utilisation d’indices multiniveaux</title>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <author><first>Delphine</first><last>Tribout</last></author>
       <pages>281–290</pages>
       <abstract>Ces dernières années, il y a eu de nombreux travaux portant sur l’utilisation d’actes de dialogue pour caractériser les dialogues homme-homme ou homme-machine. Cet article fait état de nos travaux sur la détection automatique d’actes de dialogue dans des corpus réels de dialogue homme-homme. Notre travail est fondé essentiellement sur deux hypothèses . (i) la position des mots et la classe sémantique du mot sont plus importants que les mots eux-mêmes pour identifier l’acte de dialogue et (ii) il y a une forte prédictivité dans la succession des actes de dialogues portés sur un même segment dialogique. Une approche de type Memory Based Learning a été utilisée pour la détection automatique des actes de dialogue. Le premier modèle n’utilise pas d’autres informations que celles contenus dans le tour de parole. Dans lex expériences suivantes, des historiques dialogiques de taille variables sont utilisés. Le taux d’erreur de détection d’actes de dialogue est d’environ 16% avec le premier modèle est descend avec une utilisation plus large de l’historique du dialogue à environ 14%.</abstract>
@@ -318,8 +318,8 @@
     <paper id="30">
       <title>Comment mesurer la couverture d’une ressource terminologique pour un corpus ?</title>
       <author><first>Goritsa</first><last>Ninova</last></author>
-      <author><first>Adeline</first><last>Nazarenko</last></author>
-      <author><first>Thierry</first><last>Hamon</last></author>
+      <author id="adeline-nazarenko"><first>Adeline</first><last>Nazarenko</last></author>
+      <author id="thierry-hamon"><first>Thierry</first><last>Hamon</last></author>
       <author><first>Sylvie</first><last>Szulman</last></author>
       <pages>291–300</pages>
       <abstract>Cet article propose une définition formelle de la notion de couverture lexicale. Celleci repose sur un ensemble de quatre métriques qui donnent une vue globale de l’adéquation d’une ressource lexicale à un corpus et permettent ainsi de guider le choix d’une ressource en fonction d’un corpus donné. Les métriques proposées sont testées dans le contexte de l’analyse de corpus spécialisés en génomique : 5 terminologies différentes sont confrontées à 4 corpus. La combinaison des valeurs obtenues permet de discerner différents types de relations entre ressources et corpus.</abstract>
@@ -367,7 +367,7 @@
     </paper>
     <paper id="35">
       <title>Des arbres de dérivation aux forêts de dépendance : un chemin via les forêts partagées</title>
-      <author><first>Djamé</first><last>Seddah</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
       <author><first>Bertrand</first><last>Gaiffe</last></author>
       <pages>341–350</pages>
       <abstract>L’objectif de cet article est de montrer comment bâtir une structure de répresentation proche d’un graphe de dépendance à l’aide des deux structures de représentation canoniques fournies par les Grammaires d’Arbres Adjoints Lexicalisées . Pour illustrer cette approche, nous décrivons comment utiliser ces deux structures à partir d’une forêt partagée.</abstract>
@@ -378,7 +378,7 @@
     <paper id="36">
       <title>Evaluation des Modèles de Langage n-gram et n/m-multigram</title>
       <author><first>Pierre</first><last>Alain</last></author>
-      <author><first>Olivier</first><last>Boeffard</last></author>
+      <author id="olivier-boeffard"><first>Olivier</first><last>Boeffard</last></author>
       <pages>351–360</pages>
       <abstract>Cet article présente une évaluation de modèles statistiques du langage menée sur la langue Française. Nous avons cherché à comparer la performance de modèles de langage exotiques par rapport aux modèles plus classiques de n-gramme à horizon fixe. Les expériences réalisées montrent que des modèles de n-gramme à horizon variable peuvent faire baisser de plus de 10% en moyenne la perplexité d’un modèle de n-gramme à horizon fixe. Les modèles de n/m-multigramme demandent une adaptation pour pouvoir être concurrentiels.</abstract>
       <url hash="38d3b7fb">2005.jeptalnrecital-long.36</url>
@@ -409,7 +409,7 @@
   <volume id="court" ingest-date="2021-02-05" type="proceedings">
     <meta>
       <booktitle>Actes de la 12ème conférence sur le Traitement Automatique des Langues Naturelles. Articles courts</booktitle>
-      <editor><first>Michèle</first><last>Jardino</last></editor>
+      <editor id="michele-jardino"><first>Michèle</first><last>Jardino</last></editor>
       <publisher>ATALA</publisher>
       <address>Dourdan, France</address>
       <month>June</month>
@@ -434,7 +434,7 @@
     <paper id="2">
       <title>Application du métalangage de la <fixed-case>BD</fixed-case>éf au traitement formel de la polysémie</title>
       <author><first>Lucie</first><last>Barque</last></author>
-      <author><first>Alain</first><last>Polguère</last></author>
+      <author id="alain-polguere"><first>Alain</first><last>Polguère</last></author>
       <pages>391–396</pages>
       <abstract>Cet article a pour objet le métalangage définitionnel de la base de données lexicale BDéf, plus précisément l’utilisation de ce métalangage dans la modélisation des structures polysémiques du français. La Bdéf encode sous forme de définitions lexicographiques les sens lexicaux d’un sous-ensemble représentatif du lexique du français parmi lequel on compte environ 500 unités polysémiques appartenant aux principales parties du discours. L’article comprend deux sections. La première présente le métalangage de la BDéf et le situe par rapport aux différents types de définitions lexicales, qu’elles soient ou non formelles, qu’elles visent ou non l’informatisation. La seconde section présente une application de la BDéf qui vise à terme à rendre compte de la polysémie régulière du français. On y présente, à partir d’un cas spécifique, la notion de patron de polysémie.</abstract>
       <url hash="566e3917">2005.jeptalnrecital-court.2</url>
@@ -454,7 +454,7 @@
     <paper id="4">
       <title>Un analyseur <fixed-case>LFG</fixed-case> efficace pour le français : <fixed-case>SXLFG</fixed-case></title>
       <author><first>Pierre</first><last>Boullier</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <author><first>Lionel</first><last>Clément</last></author>
       <pages>403–408</pages>
       <abstract>Dans cet article, nous proposons un nouvel analyseur syntaxique, qui repose sur une variante du modèle Lexical-Functional Grammars (Grammaires Lexicales Fonctionnelles) ou LFG. Cet analyseur LFG accepte en entrée un treillis de mots et calcule ses structures fonctionnelles sur une forêt partagée. Nous présentons également les différentes techniques de rattrapage d’erreurs que nous avons mises en oeuvre. Puis nous évaluons cet analyseur sur une grammaire à large couverture du français dans le cadre d’une utilisation à grande échelle sur corpus variés. Nous montrons que cet analyseur est à la fois efficace et robuste.</abstract>
@@ -476,9 +476,9 @@
       <title>Contextes multilingues alignés pour la désambiguïsation sémantique : une étude expérimentale</title>
       <author><first>Boxing</first><last>Chen</last></author>
       <author><first>Meriam</first><last>Haddara</last></author>
-      <author><first>Olivier</first><last>Kraif</last></author>
-      <author><first>Grégoire</first><last>Moreau de Montcheuil</last></author>
-      <author><first>Marc</first><last>El-Bèze</last></author>
+      <author id="olivier-kraif"><first>Olivier</first><last>Kraif</last></author>
+      <author id="gregoire-moreau-de-montcheuil"><first>Grégoire</first><last>Moreau de Montcheuil</last></author>
+      <author id="marc-el-beze"><first>Marc</first><last>El-Bèze</last></author>
       <pages>415–420</pages>
       <abstract>Cet article s’intéresse a la désambiguïsation sémantique d’unités lexicales alignées a travers un corpus multilingue. Nous appliquons une méthode automatique non supervisée basée sur la comparaison de réseaux sémantiques, et nous dégageons un critère permettant de déterminer a priori si 2 unités alignées ont une chance de se désambiguïser mutuellement. Enfin, nous développons une méthode fondée sur un apprentissage a partir de contextes bilingues. En appliquant ce critère afin de déterminer pour quelles unités l’information traductionnelle doit être prise en compte, nous obtenons une amélioration des résultats.</abstract>
       <url hash="c852a266">2005.jeptalnrecital-court.6</url>
@@ -498,7 +498,7 @@
     </paper>
     <paper id="8">
       <title>Projection et monotonie dans un langage de représentation lexico-grammatical</title>
-      <author><first>Benoît</first><last>Crabbé</last></author>
+      <author id="benoit-crabbe"><first>Benoît</first><last>Crabbé</last></author>
       <pages>427–432</pages>
       <abstract>Cet article apporte une méthode de développement grammatical pour la réalisation de grammaires d’arbres adjoints (TAG) de taille importante augmentées d’une dimension sémantique. La méthode que nous présentons s’exprime dans un langage informatique de représentation grammatical qui est déclaratif et monotone. Pour arriver au résultat, nous montrons comment tirer parti de la théorie de la projection dans le langage de représentation que nous utilisons. Par conséquent cet article justifie l’utilisation d’un langage monotone pour la représentation lexico-grammaticale.</abstract>
       <url hash="6d0c75e7">2005.jeptalnrecital-court.8</url>
@@ -519,7 +519,7 @@
       <title>Ritel : un système de dialogue homme-machine à domaine ouvert</title>
       <author><first>Olivier</first><last>Galibert</last></author>
       <author><first>Gabriel</first><last>Illouz</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <pages>439–444</pages>
       <abstract>L’objectif du projet RITEL est de réaliser un système de dialogue homme-machine permettant à un utilisateur de poser oralement des questions, et de dialoguer avec un système de recherche d’information généraliste (par exemple, chercher sur l’Internet “Qui est le Président du Sénat ?”) et d’en étudier les potentialités. Actuellement, la plateforme RITEL permet de collecter des corpus de dialogue homme-machine. Les utilisateurs peuvent parfois obtenir une réponse, de type factuel (Q : qui est le président de la France ; R : Jacques Chirac.). Cet article présente brièvement la plateforme développée, le corpus collecté ainsi que les questions que soulèvent un tel système et quelques unes des premières solutions envisagées.</abstract>
       <url hash="530e905b">2005.jeptalnrecital-court.10</url>
@@ -539,7 +539,7 @@
     </paper>
     <paper id="12">
       <title>Segmentation de textes arabes basée sur l’analyse contextuelle des signes de ponctuations et de certaines particules</title>
-      <author><first>Lamia</first><last>Hadrich Belguith</last></author>
+      <author id="lamia-hadrich-belguith"><first>Lamia</first><last>Hadrich Belguith</last></author>
       <author><first>Leila</first><last>Baccour</last></author>
       <author><first>Mourad</first><last>Ghassan</last></author>
       <pages>451–456</pages>
@@ -558,7 +558,7 @@
     </paper>
     <paper id="14">
       <title>Approches en corpus pour la traduction : le cas <fixed-case>MÉTÉO</fixed-case></title>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <author><first>Thomas</first><last>Leplus</last></author>
       <author><first>Simona</first><last>Gandrabur</last></author>
       <author><first>Guy</first><last>Lapalme</last></author>
@@ -579,9 +579,9 @@
     </paper>
     <paper id="16">
       <title>Indexation automatique de ressources de santé à l’aide de paires de descripteurs <fixed-case>M</fixed-case>e<fixed-case>SH</fixed-case></title>
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <author><first>Alexandrina</first><last>Rogozan</last></author>
-      <author><first>Stéfan</first><last>Darmoni</last></author>
+      <author id="stefan-darmoni"><first>Stéfan</first><last>Darmoni</last></author>
       <pages>475–480</pages>
       <abstract>Depuis quelques années, médecins et documentalistes doivent faire face à une demande croissante dans le domaine du codage médico-économique et de l’indexation des diverses sources d’information disponibles dans le domaine de la santé. Il est donc nécessaire de développer des outils d’indexation automatique qui réduisent les délais d’indexation et facilitent l’accès aux ressources médicales. Nous proposons deux méthodes d’indexation automatique de ressources de santé à l’aide de paires de descripteurs MeSH. La combinaison de ces deux méthodes permet d’optimiser les résulats en exploitant la complémentarité des approches. Les performances obtenues sont équivalentes à celles des outils de la littérature pour une indexation à l’aide de descripteurs seuls.</abstract>
       <url hash="49b6159c">2005.jeptalnrecital-court.16</url>
@@ -608,7 +608,7 @@
     </paper>
     <paper id="19">
       <title>Les Méta-<fixed-case>RCG</fixed-case>: description et mise en oeuvre</title>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <pages>493–498</pages>
       <abstract>Nous présentons dans cet article un nouveau formalisme linguistique qui repose sur les Grammaires à Concaténation d’Intervalles (RCG), appelé Méta-RCG. Nous exposons tout d’abord pourquoi la non-linéarité permet une représentation adéquate des phénomènes linguistiques, et en particulier de l’interaction entre les différents niveaux de description. Puis nous présentons les Méta-RCG et les concepts linguistiques supplémentaires qu’elles mettent en oeuvre, tout en restant convertibles en RCG classiques. Nous montrons que les analyses classiques (constituants, dépendances, topologie, sémantique prédicat-arguments) peuvent être obtenues par projection partielle d’une analyse Méta-RCG complète. Enfin, nous décrivons la grammaire du français que nous développons dans ce nouveau formalisme et l’analyseur efficace qui en découle. Nous illustrons alors la notion de projection partielle sur un exemple.</abstract>
       <url hash="ba4f6f2d">2005.jeptalnrecital-court.19</url>
@@ -631,7 +631,7 @@
     <paper id="21">
       <title>Segmentation thématique par chaînes lexicales pondérées</title>
       <author><first>Laurianne</first><last>Sitbon</last></author>
-      <author><first>Patrice</first><last>Bellot</last></author>
+      <author id="patrice-bellot"><first>Patrice</first><last>Bellot</last></author>
       <pages>505–510</pages>
       <abstract>Cet article propose une méthode innovante et efficace pour segmenter un texte en parties thématiquement cohérentes, en utilisant des chaînes lexicales pondérées. Les chaînes lexicales sont construites en fonction de hiatus variables, ou bien sans hiatus, ou encore pondérées en fonction de la densité des occurrences du terme dans la chaîne. D’autre part, nous avons constaté que la prise en compte du repérage d’entités nommées dans la chaîne de traitement, du moins sans résolution des anaphores, n’améliore pas significativement les performances. Enfin, la qualité de la segmentation proposée est stable sur différentes thématiques, ce qui montre une indépendance par rapport au type de document.</abstract>
       <url hash="121e060e">2005.jeptalnrecital-court.21</url>
@@ -640,7 +640,7 @@
     </paper>
     <paper id="22">
       <title>Une plateforme pour l’acquisition, la maintenance et la validation de ressources lexicales</title>
-      <author><first>Tristan</first><last>Vanrullen</last></author>
+      <author id="tristan-vanrullen"><first>Tristan</first><last>Vanrullen</last></author>
       <author><first>Philippe</first><last>Blache</last></author>
       <author><first>Cristel</first><last>Portes</last></author>
       <author><first>Stéphane</first><last>Rauzy</last></author>
diff --git a/data/xml/2005.mtsummit.xml b/data/xml/2005.mtsummit.xml
index 58016384da..57b6bc822f 100644
--- a/data/xml/2005.mtsummit.xml
+++ b/data/xml/2005.mtsummit.xml
@@ -10,7 +10,7 @@
     </meta>
     <paper id="1">
       <title>Reviewing Back the Past <fixed-case>MT</fixed-case> Summits</title>
-      <author><first>Makoto</first><last>Nagao</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
       <bibkey>nagao-2005-reviewing</bibkey>
     </paper>
     <paper id="2">
@@ -35,7 +35,7 @@
     </paper>
     <paper id="5">
       <title>One Decade of Statistical Machine Translation: 1996-2005</title>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <url hash="26242421">2005.mtsummit-invited.5</url>
       <abstract>In the last decade, the statistical approach has found widespread use in machine translation both for written and spoken language and has had a major impact on the translation accuracy. This paper will cover the principles of statistical machine translation and summarize the progress made so far.</abstract>
       <bibkey>ney-2005-one</bibkey>
@@ -111,7 +111,7 @@
     <paper id="2">
       <title>Selection of Entries for a Bilingual Dictionary from Aligned Translation Equivalents using Support Vector Machines</title>
       <author><first>Takeshi</first><last>Kutsumi</last></author>
-      <author><first>Takehiko</first><last>Yoshimi</last></author>
+      <author id="takehiko-yoshimi"><first>Takehiko</first><last>Yoshimi</last></author>
       <author><first>Katsunori</first><last>Kotani</last></author>
       <author><first>Ichiko</first><last>Sata</last></author>
       <author><first>Hitoshi</first><last>Isahara</last></author>
@@ -124,7 +124,7 @@
       <title>Subword Clusters as Light-Weight Interlingua for Multilingual Document Retrieval</title>
       <author><first>Udo</first><last>Hahn</last></author>
       <author><first>Kornel</first><last>Marko</last></author>
-      <author><first>Stefan</first><last>Schulz</last></author>
+      <author id="stefan-schulz"><first>Stefan</first><last>Schulz</last></author>
       <pages>17-24</pages>
       <url hash="5d1a1ef5">2005.mtsummit-papers.3</url>
       <abstract>We introduce a light-weight interlingua for a cross-language document retrieval system in the medical domain. It is composed of equivalence classes of semantically primitive, language-specific subwords which are clustered by interlingual and intralingual synonymy. Each subword cluster represents a basic conceptual entity of the language-independent interlingua. Documents, as well as queries, are mapped to this interlingua level on which retrieval operations are performed. Evaluation experiments reveal that this interlingua-based retrieval model outperforms a direct translation approach.</abstract>
@@ -187,7 +187,7 @@
     <paper id="9">
       <title>Document Authoring the <fixed-case>B</fixed-case>ible for Minority Language Translation</title>
       <author><first>Stephen</first><last>Beale</last></author>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <author><first>Marjorie</first><last>McShane</last></author>
       <author><first>Tod</first><last>Allman</last></author>
       <pages>63-70</pages>
@@ -236,7 +236,7 @@
       <title>Semantically Relatable Sets: Building Blocks for Representing Semantics</title>
       <author><first>Rajat</first><last>Kumar Mohanty</last></author>
       <author><first>Anupama</first><last>Dutta</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>101-108</pages>
       <url hash="40497fad">2005.mtsummit-papers.14</url>
       <bibkey>kumar-mohanty-etal-2005-semantically</bibkey>
@@ -253,14 +253,14 @@
     <paper id="16">
       <title>Evaluation of Machine Translation with Predictive Metrics beyond <fixed-case>BLEU</fixed-case>/<fixed-case>NIST</fixed-case>: <fixed-case>CESTA</fixed-case> Evaluation Campaign # 1</title>
       <author><first>Sylvain</first><last>Surcin</last></author>
-      <author><first>Olivier</first><last>Hamon</last></author>
+      <author id="olivier-hamon"><first>Olivier</first><last>Hamon</last></author>
       <author><first>Antony</first><last>Hartley</last></author>
-      <author><first>Martin</first><last>Rajman</last></author>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
-      <author><first>Widad Mustafa El</first><last>Hadi</last></author>
-      <author><first>Ismaïl</first><last>Timimi</last></author>
-      <author><first>Marianne</first><last>Dabbadie</last></author>
-      <author><first>Khalid</first><last>Choukri</last></author>
+      <author id="martin-rajman"><first>Martin</first><last>Rajman</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="widad-mustafa-el-hadi"><first>Widad Mustafa El</first><last>Hadi</last></author>
+      <author id="ismail-timimi"><first>Ismaïl</first><last>Timimi</last></author>
+      <author id="marianne-dabbadie"><first>Marianne</first><last>Dabbadie</last></author>
+      <author id="khalid-choukri"><first>Khalid</first><last>Choukri</last></author>
       <pages>117-124</pages>
       <url hash="966fb900">2005.mtsummit-papers.16</url>
       <abstract>In this paper, we report on the results of a full-size evaluation campaign of various MT systems. This campaign is novel compared to the classical DARPA/NIST MT evaluation campaigns in the sense that French is the target language, and that it includes an experiment of meta-evaluation of various metrics claiming to better predict different attributes of translation quality. We first describe the campaign, its context, its protocol and the data we used. Then we summarise the results obtained by the participating systems and discuss the meta-evaluation of the metrics used.</abstract>
@@ -268,7 +268,7 @@
     </paper>
     <paper id="17">
       <title>Inter-rater Agreement Measures, and the Refinement of Metrics in the <fixed-case>PLATO</fixed-case> <fixed-case>MT</fixed-case> Evaluation Paradigm</title>
-      <author><first>Keith J.</first><last>Miller</last></author>
+      <author id="keith-j-miller"><first>Keith J.</first><last>Miller</last></author>
       <author><first>Michelle</first><last>Vanni</last></author>
       <pages>125-132</pages>
       <url hash="9d3a53a2">2005.mtsummit-papers.17</url>
@@ -287,9 +287,9 @@
     </paper>
     <paper id="19">
       <title>Thot: a Toolkit To Train Phrase-based Statistical Translation Models</title>
-      <author><first>Daniel</first><last>Ortiz-Martínez</last></author>
-      <author><first>Ismael</first><last>García-Varea</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="daniel-ortiz-martinez"><first>Daniel</first><last>Ortiz-Martínez</last></author>
+      <author id="ismael-garcia-varea"><first>Ismael</first><last>García-Varea</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <pages>141-148</pages>
       <url hash="501cb960">2005.mtsummit-papers.19</url>
       <abstract>In this paper, we present the Thot toolkit, a set of tools to train phrase-based models for statistical machine translation, which is publicly available as open source software. The toolkit obtains phrase-based models from word-based alignment models; to our knowledge, this functionality has not been offered by any publicly available toolkit. The Thot toolkit also implements a new way for estimating phrase models, this allows to obtain more complete phrase models than the methods described in the literature, including a segmentation length submodel. The toolkit output can be given in different formats in order to be used by other statistical machine translation tools like Pharaoh, which is a beam search decoder for phrase-based alignment models which was used in order to perform translation experiments with the generated models. Additionally, the Thot toolkit can be used to obtain the best alignment between a sentence pair at phrase level.</abstract>
@@ -297,7 +297,7 @@
     </paper>
     <paper id="20">
       <title>Machine Translation of Bi-lingual <fixed-case>H</fixed-case>indi-<fixed-case>E</fixed-case>nglish (<fixed-case>H</fixed-case>inglish) Text</title>
-      <author><first>R. Mahesh K.</first><last>Sinha</last></author>
+      <author id="r-mahesh-k-sinha"><first>R. Mahesh K.</first><last>Sinha</last></author>
       <author><first>Anil</first><last>Thakur</last></author>
       <pages>149-156</pages>
       <url hash="f3e06b98">2005.mtsummit-papers.20</url>
@@ -316,7 +316,7 @@
     </paper>
     <paper id="22">
       <title><fixed-case>SEM</fixed-case>-<fixed-case>I</fixed-case> Rational <fixed-case>MT</fixed-case>: Enriching Deep Grammars with a Semantic Interface for Scalable Machine Translation</title>
-      <author><first>Dan</first><last>Flickinger</last></author>
+      <author id="dan-flickinger"><first>Dan</first><last>Flickinger</last></author>
       <author><first>Jan Tore</first><last>Lønning</last></author>
       <author><first>Helge</first><last>Dyvik</last></author>
       <author><first>Stephan</first><last>Oepen</last></author>
@@ -328,8 +328,8 @@
     </paper>
     <paper id="23">
       <title><fixed-case>DEMOCRAT</fixed-case>: Deciding between Multiple Outputs Created by Automatic Translation</title>
-      <author><first>Menno</first><last>van Zaanen</last></author>
-      <author><first>Harold</first><last>Somers</last></author>
+      <author id="menno-van-zaanen"><first>Menno</first><last>van Zaanen</last></author>
+      <author id="harold-somers"><first>Harold</first><last>Somers</last></author>
       <pages>173-180</pages>
       <url hash="c4a08f8f">2005.mtsummit-papers.23</url>
       <bibkey>van-zaanen-somers-2005-democrat</bibkey>
@@ -337,12 +337,12 @@
     <paper id="24">
       <title>Customizing a <fixed-case>K</fixed-case>orean-<fixed-case>E</fixed-case>nglish <fixed-case>MT</fixed-case> System for Patent Translation</title>
       <author><first>Munpyo</first><last>Hong</last></author>
-      <author><first>Young-Gil</first><last>Kim</last></author>
-      <author><first>Chang-Hyun</first><last>Kim</last></author>
+      <author id="young-gil-kim"><first>Young-Gil</first><last>Kim</last></author>
+      <author id="chang-hyun-kim"><first>Chang-Hyun</first><last>Kim</last></author>
       <author><first>Seong-Il</first><last>Yang</last></author>
       <author><first>Young-Ae</first><last>Seo</last></author>
       <author><first>Cheol</first><last>Ryu</last></author>
-      <author><first>Sang-Kyu</first><last>Park</last></author>
+      <author id="sang-kyu-park"><first>Sang-Kyu</first><last>Park</last></author>
       <pages>181-187</pages>
       <url hash="5e2de28a">2005.mtsummit-papers.24</url>
       <abstract>This paper addresses a customization process of a Korean-English MT system for patent translation. The major customization steps include terminology construction, linguistic study, and the modification of the existing analysis and generation-module. T o our knowledge, this is the first worth-mentioning large-scale customization effort of an MT system for Korean and English. This research was performed under the auspices of the MIC (Ministry of Information and Communication) of Korean government. A prototype patent MT system for electronics domain was installed and is being tested in the Korean Intellectual Property Office.</abstract>
@@ -351,11 +351,11 @@
     <paper id="25">
       <title>Practicing Controlled Language through a Help System integrated into the Medical Speech Translation System (<fixed-case>M</fixed-case>ed<fixed-case>SLT</fixed-case>)</title>
       <author><first>Marianne</first><last>Starlander</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Nikos</first><last>Chatzichrisafis</last></author>
       <author><first>Marianne</first><last>Santaholma</last></author>
-      <author><first>Manny</first><last>Rayner</last></author>
-      <author><first>Beth Ann</first><last>Hockey</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
+      <author id="beth-ann-hockey"><first>Beth Ann</first><last>Hockey</last></author>
       <author><first>Hitoshi</first><last>Isahara</last></author>
       <author><first>Kyoko</first><last>Kanzaki</last></author>
       <author><first>Yukie</first><last>Nakao</last></author>
@@ -367,8 +367,8 @@
     <paper id="26">
       <title>The <fixed-case>FAME</fixed-case> Speech-to-Speech Translation System for <fixed-case>C</fixed-case>atalan, <fixed-case>E</fixed-case>nglish, and <fixed-case>S</fixed-case>panish</title>
       <author><first>Victoria</first><last>Arranz</last></author>
-      <author><first>Elisabet</first><last>Comelles</last></author>
-      <author><first>David</first><last>Farwell</last></author>
+      <author id="elisabet-comelles"><first>Elisabet</first><last>Comelles</last></author>
+      <author id="david-farwell"><first>David</first><last>Farwell</last></author>
       <pages>195-202</pages>
       <url hash="1ae97584">2005.mtsummit-papers.26</url>
       <abstract>This paper describes the evaluation of the FAME interlingua-based speech-to-speech translation system for Catalan, English and Spanish. This system is an extension of the already existing NESPOLE! that translates between English, French, German and Italian. This article begins with a brief introduction followed by a description of the system architecture and the components of the translation module including the Speech Recognizer, the analysis chain, the generation chain and the Speech Synthesizer. Then we explain the interlingua formalism used, called Interchange Format (IF). We show the results obtained from the evaluation of the system and we describe the three types of evaluation done. We also compare the results of our system with those obtained by a stochastic translator which has been independently developed over the course of the FAME project. Finally, we conclude with future work.</abstract>
@@ -397,8 +397,8 @@
       <title>Probabilistic Model for Example-based Machine Translation</title>
       <author><first>Eiji</first><last>Aramaki</last></author>
       <author><first>Sadao</first><last>Kurohashi</last></author>
-      <author><first>Hideki</first><last>Kashioka</last></author>
-      <author><first>Naoto</first><last>Kato</last></author>
+      <author id="hideki-kashioka"><first>Hideki</first><last>Kashioka</last></author>
+      <author id="naoto-kato"><first>Naoto</first><last>Kato</last></author>
       <pages>219-226</pages>
       <url hash="f0d1e65d">2005.mtsummit-papers.29</url>
       <abstract>Example-based machine translation (EBMT) systems, so far, rely on heuristic measures in retrieving translation examples. Such a heuristic measure costs time to adjust, and might make its algorithm unclear. This paper presents a probabilistic model for EBMT. Under the proposed model, the system searches the translation example combination which has the highest probability. The proposed model clearly formalizes EBMT process. In addition, the model can naturally incorporate the context similarity of translation examples. The experimental results demonstrate that the proposed model has a slightly better translation quality than state-of-the-art EBMT systems.</abstract>
@@ -407,8 +407,8 @@
     <paper id="30">
       <title>Low Cost Portability for Statistical Machine Translation based on N-gram Coverage</title>
       <author><first>Matthias</first><last>Eck</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>227-234</pages>
       <url hash="404d72c1">2005.mtsummit-papers.30</url>
       <abstract>Statistical machine translation relies heavily on the available training data. However, in some cases, it is necessary to limit the amount of training data that can be created for or actually used by the systems. To solve that problem, we introduce a weighting scheme that tries to select more informative sentences first. This selection is based on the previously unseen n-grams the sentences contain, and it allows us to sort the sentences according to their estimated importance. After sorting, we can construct smaller training corpora, and we are able to demonstrate that systems trained on much less training data show a very competitive performance compared to baseline systems using all available training data.</abstract>
@@ -437,7 +437,7 @@
     </paper>
     <paper id="33">
       <title><fixed-case>PESA</fixed-case>: Phrase Pair Extraction as Sentence Splitting</title>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>251-258</pages>
       <url hash="6114d9b5">2005.mtsummit-papers.33</url>
       <abstract>Most statistical machine translation systems use phrase-to-phrase translations to capture local context information, leading to better lexical choice and more reliable local reordering. The quality of the phrase alignment is crucial to the quality of the resulting translations. Here, we propose a new phrase alignment method, not based on the Viterbi path of word alignment models. Phrase alignment is viewed as a sentence splitting task. For a given spitting of the source sentence (source phrase, left segment, right segment) find a splitting for the target sentence, which optimizes the overall sentence alignment probability. Experiments on different translation tasks show that this phrase alignment method leads to highly competitive translation results.</abstract>
@@ -447,9 +447,9 @@
       <title>Statistical Machine Translation of <fixed-case>E</fixed-case>uropean Parliamentary Speeches</title>
       <author><first>David</first><last>Vilar</last></author>
       <author><first>Evgeny</first><last>Matusov</last></author>
-      <author><first>Sasa</first><last>Hasan</last></author>
+      <author id="sasa-hasan"><first>Sasa</first><last>Hasan</last></author>
       <author><first>Richard</first><last>Zens</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>259-266</pages>
       <url hash="623a44d9">2005.mtsummit-papers.34</url>
       <abstract>In this paper we present the ongoing work at RWTH Aachen University for building a speech-to-speech translation system within the TC-Star project. The corpus we work on consists of parliamentary speeches held in the European Plenary Sessions. To our knowledge, this is the first project that focuses on speech-to-speech translation applied to a real-life task. We describe the statistical approach used in the development of our system and analyze its performance under different conditions: dealing with syntactically correct input, dealing with the exact transcription of speech and dealing with the (noisy) output of an automatic speech recognition system. Experimental results show that our system is able to perform adequately in each of these conditions.</abstract>
@@ -459,7 +459,7 @@
       <title>Practical Approach to Syntax-based Statistical Machine Translation</title>
       <author><first>Kenji</first><last>Imamura</last></author>
       <author><first>Hideo</first><last>Okuma</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>267-274</pages>
       <url hash="777f3704">2005.mtsummit-papers.35</url>
       <abstract>This paper presents a practical approach to statistical machine translation (SMT) based on syntactic transfer. Conventionally, phrase-based SMT generates an output sentence by combining phrase (multiword sequence) translation and phrase reordering without syntax. On the other hand, SMT based on tree-to-tree mapping, which involves syntactic information, is theoretical, so its features remain unclear from the viewpoint of a practical system. The SMT proposed in this paper translates phrases with hierarchical reordering based on the bilingual parse tree. In our experiments, the best translation was obtained when both phrases and syntactic information were used for the translation process.</abstract>
@@ -467,12 +467,12 @@
     </paper>
     <paper id="36">
       <title>Bilingual N-gram Statistical Machine Translation</title>
-      <author><first>José B.</first><last>Mariño</last></author>
-      <author><first>Rafael E.</first><last>Banchs</last></author>
-      <author><first>Josep M.</first><last>Crego</last></author>
-      <author><first>Adrià</first><last>de Gispert</last></author>
+      <author id="jose-b-marino"><first>José B.</first><last>Mariño</last></author>
+      <author id="rafael-e-banchs"><first>Rafael E.</first><last>Banchs</last></author>
+      <author id="josep-m-crego"><first>Josep M.</first><last>Crego</last></author>
+      <author id="adria-de-gispert"><first>Adrià</first><last>de Gispert</last></author>
       <author><first>Patrik</first><last>Lambert</last></author>
-      <author><first>José A. R.</first><last>Fonollosa</last></author>
+      <author id="jose-a-r-fonollosa"><first>José A. R.</first><last>Fonollosa</last></author>
       <author><first>Marta</first><last>Ruiz</last></author>
       <pages>275-282</pages>
       <url hash="9fe7604c">2005.mtsummit-papers.36</url>
@@ -481,9 +481,9 @@
     </paper>
     <paper id="37">
       <title>Reordered Search, and Tuple Unfolding for Ngram-based <fixed-case>SMT</fixed-case></title>
-      <author><first>Josep M.</first><last>Crego</last></author>
-      <author><first>José B.</first><last>Mariño</last></author>
-      <author><first>Adrià</first><last>de Gispert</last></author>
+      <author id="josep-m-crego"><first>Josep M.</first><last>Crego</last></author>
+      <author id="jose-b-marino"><first>José B.</first><last>Mariño</last></author>
+      <author id="adria-de-gispert"><first>Adrià</first><last>de Gispert</last></author>
       <pages>283-289</pages>
       <url hash="6a959de0">2005.mtsummit-papers.37</url>
       <abstract>In Statistical Machine Translation, the use of reordering for certain language pairs can produce a significant improvement on translation accuracy. However, the search problem is shown to be NP-hard when arbitrary reorderings are allowed. This paper addresses the question of reordering for an Ngram-based SMT approach following two complementary strategies, namely reordered search and tuple unfolding. These strategies interact to improve translation quality in a Chinese to English task. On the one hand, we allow for an Ngram-based decoder (MARIE) to perform a reordered search over the source sentence, while combining a translation tuples Ngram model, a target language model, a word penalty and a word distance model. Interestingly, even though the translation units are learnt sequentially, its reordered search produces an improved translation. On the other hand, we allow for a modification of the translation units that unfolds the tuples, so that shorter units are learnt from a new parallel corpus, where the source sentences are reordered according to the target language. This tuple unfolding technique reduces data sparseness and, when combined with the reordered search, further boosts translation performance. Translation accuracy and efficency results are reported for the IWSLT 2004 Chinese to English task.</abstract>
@@ -494,7 +494,7 @@
       <author><first>Bart</first><last>Mellebeek</last></author>
       <author><first>Anna</first><last>Khasin</last></author>
       <author><first>Karolina</first><last>Owczarzak</last></author>
-      <author><first>Josef</first><last>Van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>Van Genabith</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <pages>290-297</pages>
       <url hash="e7270500">2005.mtsummit-papers.38</url>
@@ -568,7 +568,7 @@
     </paper>
     <paper id="4">
       <title>Divergence Patterns in Machine Translation between <fixed-case>H</fixed-case>indi and <fixed-case>E</fixed-case>nglish</title>
-      <author><first>R. Mahesh K.</first><last>Sinha</last></author>
+      <author id="r-mahesh-k-sinha"><first>R. Mahesh K.</first><last>Sinha</last></author>
       <author><first>Anil</first><last>Thakur</last></author>
       <pages>346-353</pages>
       <url hash="aa58d437">2005.mtsummit-posters.4</url>
@@ -587,7 +587,7 @@
     </paper>
     <paper id="6">
       <title>Handling ki in <fixed-case>H</fixed-case>indi for <fixed-case>H</fixed-case>indi-<fixed-case>E</fixed-case>nglish <fixed-case>MT</fixed-case></title>
-      <author><first>R. Mahesh K.</first><last>Sinha</last></author>
+      <author id="r-mahesh-k-sinha"><first>R. Mahesh K.</first><last>Sinha</last></author>
       <author><first>Anil</first><last>Thakur</last></author>
       <pages>356-353</pages>
       <url hash="ed4d34ca">2005.mtsummit-posters.6</url>
@@ -607,8 +607,8 @@
     </paper>
     <paper id="8">
       <title>A Phrasal <fixed-case>EBMT</fixed-case> System for Translating <fixed-case>E</fixed-case>nglish to <fixed-case>B</fixed-case>engali</title>
-      <author><first>Sudip Kumar</first><last>Naskar</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>372-379</pages>
       <url hash="4d705bf6">2005.mtsummit-posters.8</url>
       <abstract>The present work describes a Phrasal Example Based Machine Translation system from English to Bengali that identifies the phrases in the input through a shallow analysis, retrieves the target phrases using a Phrasal Example base and finally combines the target language phrases employing some heuristics based on the phrase ordering rules for Bengali. The paper focuses on the structure of the noun, verb and prepositional phrases in English and how these phrases are realized in Bengali. This study has an effect on the design of the phrasal Example Base and recombination rules for the target language phrases.</abstract>
@@ -616,9 +616,9 @@
     </paper>
     <paper id="9">
       <title>An <fixed-case>MT</fixed-case> System Recycled</title>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <author><first>Petr</first><last>Homola</last></author>
-      <author><first>Vladislav</first><last>Kuboň</last></author>
+      <author id="vladislav-kubon"><first>Vladislav</first><last>Kuboň</last></author>
       <pages>380-387</pages>
       <url hash="b12e9e8e">2005.mtsummit-posters.9</url>
       <abstract>This paper describes an attempt to recycle parts of the Czech-to-Russian machine translation system (MT) in the new Czech-to-English MT system. The paper describes the overall architecture of the new system and the details of the modules which have been added. A special attention is paid to the problem of named entity recognition and to the method of automatic acquisition of lexico-syntactic information for the bilingual dictionary of the system.</abstract>
@@ -627,8 +627,8 @@
     <paper id="10">
       <title>Semi-Automated Elicitation Corpus Generation</title>
       <author><first>Alison</first><last>Alvarez</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
-      <author><first>Robert</first><last>Frederking</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
+      <author id="robert-frederking"><first>Robert</first><last>Frederking</last></author>
       <author><first>Erik</first><last>Peterson</last></author>
       <author><first>Jeff</first><last>Good</last></author>
       <pages>388-395</pages>
@@ -639,7 +639,7 @@
     <paper id="11">
       <title>Data Inferred Multi-word Expressions for Statistical Machine Translation</title>
       <author><first>Patrick</first><last>Lambert</last></author>
-      <author><first>Rafael</first><last>Banchs</last></author>
+      <author id="rafael-e-banchs"><first>Rafael</first><last>Banchs</last></author>
       <pages>396-403</pages>
       <url hash="00e409d6">2005.mtsummit-posters.11</url>
       <abstract>This paper presents a strategy for detecting and using multi-word expressions in Statistical Machine Translation. Performance of the proposed strategy is evaluated in terms of alignment quality as well as translation accuracy. Evaluations are performed by using the Verbmobil corpus. Results from translation tasks from English-to-Spanish and from Spanish-to-English are presented and discussed.</abstract>
@@ -660,7 +660,7 @@
     <paper id="13">
       <title>Estimating the predictive Power of N-gram <fixed-case>MT</fixed-case> Evaluation Metrics across Language and Text Types</title>
       <author><first>Bogdan</first><last>Babych</last></author>
-      <author><first>Anthony</first><last>Hartley</last></author>
+      <author id="anthony-hartley"><first>Anthony</first><last>Hartley</last></author>
       <author><first>Debbie</first><last>Elliott</last></author>
       <pages>412-418</pages>
       <url hash="b6fd4f87">2005.mtsummit-posters.13</url>
@@ -670,7 +670,7 @@
     <paper id="14">
       <title>A Useful-based Evaluation of Reading Support Systems: Comprehension, Reading Speed and Effective Speed</title>
       <author><first>Katsunori</first><last>Kotani</last></author>
-      <author><first>Takehiko</first><last>Yoshimi</last></author>
+      <author id="takehiko-yoshimi"><first>Takehiko</first><last>Yoshimi</last></author>
       <author><first>Takeshi</first><last>Kutsumi</last></author>
       <author><first>Ichiko</first><last>Sata</last></author>
       <author><first>Hiroshi</first><last>Isahara</last></author>
@@ -681,7 +681,7 @@
     </paper>
     <paper id="15">
       <title>Word Alignment Viewer for Long Sentences</title>
-      <author><first>Hideki</first><last>Kashioka</last></author>
+      <author id="hideki-kashioka"><first>Hideki</first><last>Kashioka</last></author>
       <pages>427-431</pages>
       <url hash="19164ebf">2005.mtsummit-posters.15</url>
       <abstract>An aligned corpus is an important resource for developing machine translation systems. We consider suitable units for constructing the translation model through observing an aligned parallel corpus. We examine the characteristics of the aligned corpus. Long sentences are especially difficult for word alignment because the sentences can become very complicated. Also, each (source/target) word has a higher possibility to correspond to the (target/source) word. This paper introduces an alignment viewer a developer can use to correct alignment information. We discuss using the viewer on a patent parallel corpus because sentences in patents are often long and complicated.</abstract>
@@ -701,7 +701,7 @@
       <title>Rapid Ramp-up for Statistical Machine Translation: Minimal Training for Maximal Coverage</title>
       <author><first>Hemali</first><last>Majithia</last></author>
       <author><first>Philip</first><last>Rennart</last></author>
-      <author><first>Evelyne</first><last>Tzoukermann</last></author>
+      <author id="evelyne-tzoukermann"><first>Evelyne</first><last>Tzoukermann</last></author>
       <pages>438-444</pages>
       <url hash="cc2de70e">2005.mtsummit-posters.17</url>
       <abstract>This paper investigates optimal ways to get maximal coverage from minimal input training corpus. In effect, it seems antagonistic to think of minimal input training with a statistical machine translation system. Since statistics work well with repetition and thus capture well highly occurring words, one challenge has been to figure out the optimal number of “new” words that the system needs to be appropriately trained. Additionally, the goal is to minimize the human translation time for training a new language. In order to account for rapid ramp-up translation, we ran several experiments to figure out the minimal amount of data to obtain optimal translation results.</abstract>
@@ -741,8 +741,8 @@
     </paper>
     <paper id="21">
       <title>Use of Machine Translation in <fixed-case>I</fixed-case>ndia: Current Status</title>
-      <author><first>Sudip</first><last>Naskar</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip</first><last>Naskar</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>465-470</pages>
       <url hash="9af99f3e">2005.mtsummit-posters.21</url>
       <abstract>A survey of the machine translation systems that have been developed in India for translation from English to Indian languages and among Indian languages reveals that the MT softwares are used in field testing or are available as web translation service. These systems are also used for teaching machine translation to the students and researchers. Most of these systems are in the English-Hindi or Indian language-Indian language domain. The translation domains are mostly government documents/reports and news stories. There are a number of other MT systems that are at their various phases of development and have been demonstrated at various forums. Many of these systems cover other Indian languages beside Hindi.</abstract>
@@ -768,7 +768,7 @@
     </meta>
     <paper id="1">
       <title>Statistical Machine Translation: Foundations and Recent Advances</title>
-      <author><first>Franz Josef</first><last>Och</last></author>
+      <author id="franz-josef-och"><first>Franz Josef</first><last>Och</last></author>
       <attachment type="presentation" hash="ed557664">2005.mtsummit-tutorials.1.Presentation.pdf</attachment>
       <bibkey>och-2005-statistical</bibkey>
     </paper>
@@ -797,15 +797,15 @@
     </paper>
     <paper id="2">
       <title>An Open Architecture for Transfer-based Machine Translation between <fixed-case>S</fixed-case>panish and <fixed-case>B</fixed-case>asque</title>
-      <author><first>Iñaki</first><last>Alegria</last></author>
-      <author><first>Arantza</first><last>Diaz de Ilarraza</last></author>
-      <author><first>Gorka</first><last>Labaka</last></author>
-      <author><first>Mikel</first><last>Lersundi</last></author>
-      <author><first>Aingeru</first><last>Mayor</last></author>
-      <author><first>Kepa</first><last>Sarasola</last></author>
-      <author><first>Mikel L.</first><last>Forcada</last></author>
-      <author><first>Sergio</first><last>Ortiz-Rojas</last></author>
-      <author><first>Lluís</first><last>Padró</last></author>
+      <author id="inaki-alegria"><first>Iñaki</first><last>Alegria</last></author>
+      <author id="arantza-diaz-de-ilarraza"><first>Arantza</first><last>Diaz de Ilarraza</last></author>
+      <author id="gorka-labaka"><first>Gorka</first><last>Labaka</last></author>
+      <author id="mikel-lersundi"><first>Mikel</first><last>Lersundi</last></author>
+      <author id="aingeru-mayor"><first>Aingeru</first><last>Mayor</last></author>
+      <author id="kepa-sarasola"><first>Kepa</first><last>Sarasola</last></author>
+      <author id="mikel-l-forcada"><first>Mikel L.</first><last>Forcada</last></author>
+      <author id="sergio-ortiz-rojas"><first>Sergio</first><last>Ortiz-Rojas</last></author>
+      <author id="lluis-padro"><first>Lluís</first><last>Padró</last></author>
       <pages>7-14</pages>
       <url hash="a9bcfc3a">2005.mtsummit-osmtw.2</url>
       <abstract>We present the current status of development of an open architecture for the translation from Spanish into Basque. The machine translation architecture uses an open source analyser for Spanish and new modules mainly based on finite-state transducers. The project is integrated in the OpenTrad initiative, a larger government funded project shared among different universities and small companies, which will also include MT engines for translation among the main languages in Spain. The main objective is the construction of an open, reusable and interoperable framework. This paper describes the design of the engine, the formats it uses for the communication among the modules, the modules reused from other project named Matxin and the new modules we are building.</abstract>
@@ -817,7 +817,7 @@
       <author><first>Stephan</first><last>Oepen</last></author>
       <author><first>Melanie</first><last>Siegel</last></author>
       <author><first>Ann</first><last>Copestake</last></author>
-      <author><first>Dan</first><last>Flickinger</last></author>
+      <author id="dan-flickinger"><first>Dan</first><last>Flickinger</last></author>
       <pages>15-22</pages>
       <url hash="9379856f">2005.mtsummit-osmtw.3</url>
       <bibkey>bond-etal-2005-open</bibkey>
@@ -826,12 +826,12 @@
       <title>An Open-Source Shallow-Transfer Machine Translation Toolbox: Consequences of Its Release and Availability</title>
       <author><first>Carme</first><last>Armentano-Oller</last></author>
       <author><first>Antonio M.</first><last>Corbí-Bellot</last></author>
-      <author><first>Mikel L.</first><last>Forcada</last></author>
-      <author><first>Mireia</first><last>Ginestí-Rosell</last></author>
+      <author id="mikel-l-forcada"><first>Mikel L.</first><last>Forcada</last></author>
+      <author id="mireia-ginesti-rosell"><first>Mireia</first><last>Ginestí-Rosell</last></author>
       <author><first>Boyan</first><last>Bonev</last></author>
-      <author><first>Sergio</first><last>Ortiz-Rojas</last></author>
+      <author id="sergio-ortiz-rojas"><first>Sergio</first><last>Ortiz-Rojas</last></author>
       <author><first>Juan Antonio</first><last>Pérez-Ortiz</last></author>
-      <author><first>Gema</first><last>Ramírez-Sánchez</last></author>
+      <author id="gema-ramirez-sanchez"><first>Gema</first><last>Ramírez-Sánchez</last></author>
       <author><first>Felipe</first><last>Sánchez-Martínez</last></author>
       <pages>23-30</pages>
       <url hash="6cfb807d">2005.mtsummit-osmtw.4</url>
@@ -850,7 +850,7 @@
     <paper id="1">
       <title>An n-gram Approach to Exploiting a Monolingual Corpus for Machine Translation</title>
       <author><first>Toni</first><last>Badia</last></author>
-      <author><first>Gemma</first><last>Boleda</last></author>
+      <author id="gemma-boleda"><first>Gemma</first><last>Boleda</last></author>
       <author><first>Maite</first><last>Melero</last></author>
       <author><first>Antoni</first><last>Oliver</last></author>
       <pages>1-7</pages>
@@ -859,7 +859,7 @@
     </paper>
     <paper id="2">
       <title>Context-sensitive Retrieval for Example-based Translation</title>
-      <author><first>Ralf</first><last>Brown</last></author>
+      <author id="ralf-d-brown"><first>Ralf</first><last>Brown</last></author>
       <pages>9-15</pages>
       <url hash="77468815">2005.mtsummit-ebmt.2</url>
       <abstract>Example-Based Machine Translation (EBMT) systems have typically operated on individual sentences without taking into account prior context. By adding a simple reweighting of retrieved fragments of training examples on the basis of whether the previous translation retrieved any fragments from examples within a small window of the current instance, translation performance is improved. A further improvement is seen by performing a similar reweighting when another fragment of the current input sentence was retrieved from the same training example. Together, a simple, straightforward implementation of these two factors results in an improvement on the order of 1.0–1.6% in the BLEU metric across multiple data sets in multiple languages.</abstract>
@@ -877,7 +877,7 @@
     </paper>
     <paper id="4">
       <title>Learning Translation Templates with Type Constraints</title>
-      <author><first>Ilyas</first><last>Cicekli</last></author>
+      <author id="ilyas-cicekli"><first>Ilyas</first><last>Cicekli</last></author>
       <pages>27-33</pages>
       <url hash="ad535128">2005.mtsummit-ebmt.4</url>
       <abstract>This paper presents a generalization technique that induces translation templates from given translation examples by replacing differing parts in these examples with typed variables. Since the type of each variable is also inferred during the learning process, each induced template is associated with a set of type constraints. The type constraints that are associated with a translation template restrict the usage of that translation template in certain contexts in order to avoid some of wrong translations. The types of variables are induced using the type lattices designed for both source language and target language. The proposed generalization technique has been implemented as a part of an EBMT system.</abstract>
@@ -903,8 +903,8 @@
     <paper id="7">
       <title>Graph-based Retrieval for Example-based Machine Translation Using Edit-distance</title>
       <author><first>Takao</first><last>Doi</last></author>
-      <author><first>Hirofumi</first><last>Yamamoto</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="hirofumi-yamamoto"><first>Hirofumi</first><last>Yamamoto</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>51-58</pages>
       <url hash="81efb957">2005.mtsummit-ebmt.7</url>
       <bibkey>doi-etal-2005-graph</bibkey>
@@ -919,7 +919,7 @@
     </paper>
     <paper id="9">
       <title>Towards a Definition of Example-based Machine Translation</title>
-      <author><first>John</first><last>Hutchins</last></author>
+      <author id="w-john-hutchins"><first>John</first><last>Hutchins</last></author>
       <pages>63-70</pages>
       <url hash="ba0dc04a">2005.mtsummit-ebmt.9</url>
       <abstract>The example-based approach to MT is becoming increasingly popular. However, such is the variety of techniques and methods used that it is difficult to discern the overall conception of what example-based machine translation (EBMT) is and/or what its practitioners conceive it to be. Although definitions of MT systems are notoriously complex, an attempt is made to define EBMT in contrast to other MT architectures (RBMT and SMT).</abstract>
@@ -927,7 +927,7 @@
     </paper>
     <paper id="10">
       <title><fixed-case>EBMT</fixed-case> by Tree-Phrasing: a Pilot Study</title>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <author><first>Fabrizio</first><last>Gotti</last></author>
       <author><first>Didier</first><last>Bourigault</last></author>
       <author><first>Claude</first><last>Coulombe</last></author>
@@ -947,7 +947,7 @@
     </paper>
     <paper id="12">
       <title>Monolingual Corpus-based <fixed-case>MT</fixed-case> Using Chunks</title>
-      <author><first>Stella</first><last>Markantonatou</last></author>
+      <author id="stella-markantonatou"><first>Stella</first><last>Markantonatou</last></author>
       <author><first>Sokratis</first><last>Sofianopoulos</last></author>
       <author><first>Vassiliki</first><last>Spilioti</last></author>
       <author><first>Yiorgos</first><last>Tambouratzis</last></author>
@@ -979,8 +979,8 @@
     </paper>
     <paper id="15">
       <title>A Machine Learning Approach to Hypotheses Selection of Greedy Decoding for <fixed-case>SMT</fixed-case></title>
-      <author><first>Michael</first><last>Paul</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="michael-paul"><first>Michael</first><last>Paul</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Seiichi</first><last>Yamamoto</last></author>
       <pages>117-124</pages>
       <url hash="e3e0f701">2005.mtsummit-ebmt.15</url>
@@ -990,7 +990,7 @@
     <paper id="16">
       <title>A Semantics-based <fixed-case>E</fixed-case>nglish-<fixed-case>B</fixed-case>engali <fixed-case>EBMT</fixed-case> System for Translating News Headlines</title>
       <author><first>Diganta</first><last>Saha</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>125-133</pages>
       <url hash="5aaaa26f">2005.mtsummit-ebmt.16</url>
       <abstract>The paper reports an Example based Machine Translation System for translating News Headlines from English to Bengali. The input headline is initially searched in the Direct Example Base. If it cannot be found, the input headline is tagged and the tagged headline is searched in the Generalized Tagged Example Base. If a match is obtained, the tagged headline in Bengali is retrieved from the example base, the output Bengali headline is generated after retrieving the Bengali equivalents of the English words from appropriate dictionaries and then applying relevant synthesis rules for generating the Bengali surface level words. If some named entities and acronyms are not present in the dictionary, transliteration scheme is applied for obtaining the Bengali equivalent. If a match is not found, the tagged input headline is analysed to identify the constituent phrase(s). The target translation is generated using English-Bengali phrasal example base, appropriate dictionaries and a set of heuristics for Bengali phrase reordering. If the headline still cannot be translated using example base strategy, a heuristic translation strategy will be applied. Any new input tagged headline along with its translation by the user will be inserted in the tagged Example base after generalization.</abstract>
@@ -1033,7 +1033,7 @@
     </paper>
     <paper id="3">
       <title>Classification of Modified Relationships in <fixed-case>J</fixed-case>apanese Patent Sentences</title>
-      <author><first>Shoichi</first><last>Yokoyama</last></author>
+      <author id="shoichi-yokoyama"><first>Shoichi</first><last>Yokoyama</last></author>
       <author><first>Yuya</first><last>Kaneda</last></author>
       <pages>16-20</pages>
       <url hash="d006fcc6">2005.mtsummit-wpt.3</url>
@@ -1087,14 +1087,14 @@
     </paper>
     <paper id="9">
       <title>Terminology Construction Workflow for <fixed-case>K</fixed-case>orean-<fixed-case>E</fixed-case>nglish Patent <fixed-case>MT</fixed-case></title>
-      <author><first>Young-Gil</first><last>Kim</last></author>
+      <author id="young-gil-kim"><first>Young-Gil</first><last>Kim</last></author>
       <author><first>Seong-Il</first><last>Yang</last></author>
       <author><first>Munpyo</first><last>Hong</last></author>
-      <author><first>Chang-Hyun</first><last>Kim</last></author>
+      <author id="chang-hyun-kim"><first>Chang-Hyun</first><last>Kim</last></author>
       <author><first>Young-Ae</first><last>Seo</last></author>
       <author><first>Cheol</first><last>Ryu</last></author>
-      <author><first>Sang-Kyu</first><last>Park</last></author>
-      <author><first>Se-Young</first><last>Park</last></author>
+      <author id="sang-kyu-park"><first>Sang-Kyu</first><last>Park</last></author>
+      <author id="se-young-park"><first>Se-Young</first><last>Park</last></author>
       <pages>55-59</pages>
       <url hash="c6b632d5">2005.mtsummit-wpt.9</url>
       <abstract>This paper addresses the workflow for terminology construction for Korean-English patent MT system. The workflow consists of the stage for setting lexical goals and the semi- automatic terminology construction stage. As there is no comparable system, it is difficult to determine how many terms are needed. To estimate the number of the needed terms, we analyzed 45,000 patent documents. Given the limited time and budget, we resorted to the semi-automatic methods to create the bilingual term dictionary in electronics domain. We will show that parenthesis information in Korean patent documents and bilingual title corpus can be successfully used to build a bilingual term dictionary.</abstract>
@@ -1126,7 +1126,7 @@
     </paper>
     <paper id="3">
       <title>Challenges for the Multilingual Semantic Web</title>
-      <author><first>Walther</first><last>v. Hahn</last></author>
+      <author id="walther-von-hahn"><first>Walther</first><last>v. Hahn</last></author>
       <author><first>Cristina</first><last>Vertan</last></author>
       <pages>5-9</pages>
       <url hash="7b3c0a70">2005.mtsummit-swtmt.3</url>
diff --git a/data/xml/2005.sigdial.xml b/data/xml/2005.sigdial.xml
index a27f02ad53..ca7699f8a9 100644
--- a/data/xml/2005.sigdial.xml
+++ b/data/xml/2005.sigdial.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2020-11-29" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 6th SIGdial Workshop on Discourse and Dialogue</booktitle>
-      <editor><first>Laila</first><last>Dybkjær</last></editor>
+      <editor id="laila-dybkjaer"><first>Laila</first><last>Dybkjær</last></editor>
       <editor><first>Wolfgang</first><last>Minker</last></editor>
       <publisher>Special Interest Group on Discourse and Dialogue (SIGdial)</publisher>
       <address>Lisbon, Portugal</address>
@@ -19,7 +19,7 @@
     </frontmatter>
     <paper id="1">
       <title>Where do we go from here? Research and Commercial Spoken Dialog Systems</title>
-      <author><first>Roberto</first><last>Pieraccini</last></author>
+      <author id="roberto-pieraccini"><first>Roberto</first><last>Pieraccini</last></author>
       <author><first>Juan</first><last>Huerta</last></author>
       <pages>1–10</pages>
       <url hash="d68a25f9">2005.sigdial-1.1</url>
@@ -46,9 +46,9 @@
     </paper>
     <paper id="4">
       <title>Partially Observable <fixed-case>M</fixed-case>arkov Decision Processes with Continuous Observations for Dialogue Management</title>
-      <author><first>Jason D.</first><last>Williams</last></author>
+      <author id="jason-d-williams"><first>Jason D.</first><last>Williams</last></author>
       <author><first>Pascal</first><last>Poupart</last></author>
-      <author><first>Steve</first><last>Young</last></author>
+      <author id="steve-young"><first>Steve</first><last>Young</last></author>
       <pages>25–34</pages>
       <url hash="c650c535">2005.sigdial-1.4</url>
       <bibkey>williams-etal-2005-partially</bibkey>
@@ -67,7 +67,7 @@
       <title>Quantitative Evaluation of User Simulation Techniques for Spoken Dialogue Systems</title>
       <author><first>Jost</first><last>Schatzmann</last></author>
       <author><first>Kallirroi</first><last>Georgila</last></author>
-      <author><first>Steve</first><last>Young</last></author>
+      <author id="steve-young"><first>Steve</first><last>Young</last></author>
       <pages>45–54</pages>
       <url hash="cfe4135f">2005.sigdial-1.6</url>
       <bibkey>schatzmann-etal-2005-quantitative</bibkey>
@@ -75,8 +75,8 @@
     </paper>
     <paper id="7">
       <title>Automatic Induction of Language Model Data for A Spoken Dialogue System</title>
-      <author><first>Grace</first><last>Chung</last></author>
-      <author><first>Stephanie</first><last>Seneff</last></author>
+      <author id="grace-chung"><first>Grace</first><last>Chung</last></author>
+      <author id="stephanie-seneff"><first>Stephanie</first><last>Seneff</last></author>
       <author><first>Chao</first><last>Wang</last></author>
       <pages>55–64</pages>
       <url hash="3299e579">2005.sigdial-1.7</url>
@@ -86,7 +86,7 @@
     <paper id="8">
       <title>Does this Answer your Question? Towards Dialogue Management for Restricted Domain Question Answering Systems</title>
       <author><first>Matthias</first><last>Denecke</last></author>
-      <author><first>Norihito</first><last>Yasuda</last></author>
+      <author id="norihito-yasuda"><first>Norihito</first><last>Yasuda</last></author>
       <pages>65–76</pages>
       <url hash="60aaa557">2005.sigdial-1.8</url>
       <bibkey>denecke-yasuda-2005-answer</bibkey>
@@ -94,7 +94,7 @@
     </paper>
     <paper id="9">
       <title>Using Machine Learning for Non-Sentential Utterance Classification</title>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <author><first>Jonathan</first><last>Ginzburg</last></author>
       <author><first>Shalom</first><last>Lappin</last></author>
       <pages>77–86</pages>
@@ -104,8 +104,8 @@
     </paper>
     <paper id="10">
       <title>Using Bigrams to Identify Relationships Between Student Certainness States and Tutor Responses in a Spoken Dialogue Corpus</title>
-      <author><first>Kate</first><last>Forbes-Riley</last></author>
-      <author><first>Diane J.</first><last>Litman</last></author>
+      <author id="kate-forbes-riley"><first>Kate</first><last>Forbes-Riley</last></author>
+      <author id="diane-litman"><first>Diane J.</first><last>Litman</last></author>
       <pages>87–96</pages>
       <url hash="14abaa8a">2005.sigdial-1.10</url>
       <bibkey>forbes-riley-litman-2005-using</bibkey>
@@ -114,7 +114,7 @@
     <paper id="11">
       <title>A Corpus Collection and Annotation Framework for Learning Multimodal Clarification Strategies</title>
       <author><first>Verena</first><last>Rieser</last></author>
-      <author><first>Ivana</first><last>Kruijff-Korbayová</last></author>
+      <author id="ivana-kruijff-korbayova"><first>Ivana</first><last>Kruijff-Korbayová</last></author>
       <author><first>Oliver</first><last>Lemon</last></author>
       <pages>97–106</pages>
       <url hash="c9c1f372">2005.sigdial-1.11</url>
@@ -143,8 +143,8 @@
     </paper>
     <paper id="14">
       <title>Sorry and <fixed-case>I</fixed-case> Didn’t Catch That! - An Investigation of Non-understanding Errors and Recovery Strategies</title>
-      <author><first>Dan</first><last>Bohus</last></author>
-      <author><first>Alexander I.</first><last>Rudnicky</last></author>
+      <author id="dan-bohus"><first>Dan</first><last>Bohus</last></author>
+      <author id="alexander-rudnicky"><first>Alexander I.</first><last>Rudnicky</last></author>
       <pages>128–143</pages>
       <url hash="fbb72a76">2005.sigdial-1.14</url>
       <bibkey>bohus-rudnicky-2005-sorry</bibkey>
@@ -153,7 +153,7 @@
     <paper id="15">
       <title>Developing City Name Acquisition Strategies in Spoken Dialogue Systems Via User Simulation</title>
       <author><first>Ed</first><last>Filisko</last></author>
-      <author><first>Stephanie</first><last>Seneff</last></author>
+      <author id="stephanie-seneff"><first>Stephanie</first><last>Seneff</last></author>
       <pages>144–155</pages>
       <url hash="b57303a5">2005.sigdial-1.15</url>
       <bibkey>filisko-seneff-2005-developing</bibkey>
@@ -195,8 +195,8 @@
     </paper>
     <paper id="20">
       <title>A Collaborative Problem-Solving Model of Dialogue</title>
-      <author><first>Nate</first><last>Blaylock</last></author>
-      <author><first>James</first><last>Allen</last></author>
+      <author id="nate-blaylock"><first>Nate</first><last>Blaylock</last></author>
+      <author id="james-allen"><first>James</first><last>Allen</last></author>
       <pages>200–211</pages>
       <url hash="84837b6b">2005.sigdial-1.20</url>
       <bibkey>blaylock-allen-2005-collaborative</bibkey>
@@ -234,7 +234,7 @@
     </paper>
     <paper id="24">
       <title><fixed-case>D</fixed-case>ialog<fixed-case>D</fixed-case>esigner - A Tool for Rapid System Design and Evaluation</title>
-      <author><first>Hans</first><last>Dybkjær</last></author>
+      <author id="hans-dybkjaer"><first>Hans</first><last>Dybkjær</last></author>
       <author><first>Laila</first><last>Dybkjær</last></author>
       <pages>227–231</pages>
       <url hash="15bdae7a">2005.sigdial-1.24</url>
@@ -243,13 +243,13 @@
     </paper>
     <paper id="25">
       <title>Dealing with Doctors: A Virtual Human for Non-team Interaction</title>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <author><first>William</first><last>Swartout</last></author>
       <author><first>Jonathan</first><last>Gratch</last></author>
       <author><first>Stacy</first><last>Marsella</last></author>
       <author><first>Patrick</first><last>Kenny</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
-      <author><first>Shri</first><last>Narayanan</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
+      <author id="shrikanth-narayanan"><first>Shri</first><last>Narayanan</last></author>
       <author><first>Ed</first><last>Fast</last></author>
       <author><first>Bilyana</first><last>Martinovski</last></author>
       <author><first>Rahul</first><last>Baghat</last></author>
@@ -265,7 +265,7 @@
     </paper>
     <paper id="26">
       <title>Meet Hans Christian Andersen</title>
-      <author><first>Niels Ole</first><last>Bernsen</last></author>
+      <author id="niels-ole-bernsen"><first>Niels Ole</first><last>Bernsen</last></author>
       <author><first>Laila</first><last>Dybkjær</last></author>
       <pages>237–241</pages>
       <url hash="a4ad1d39">2005.sigdial-1.26</url>
diff --git a/data/xml/2005.tc.xml b/data/xml/2005.tc.xml
index 767b0f5de1..bf4c40d095 100644
--- a/data/xml/2005.tc.xml
+++ b/data/xml/2005.tc.xml
@@ -25,14 +25,14 @@
     <paper id="3">
       <title>Finding the System that Suits You Best: Towards the Normalization of <fixed-case>MT</fixed-case> Evaluation</title>
       <author><first>Paula</first><last>Estrella</last></author>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
-      <author><first>Nancy</first><last>Underwood</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="nancy-underwood"><first>Nancy</first><last>Underwood</last></author>
       <url hash="6e9329c9">2005.tc-1.3</url>
       <bibkey>estrella-etal-2005-finding</bibkey>
     </paper>
     <paper id="4">
       <title>Reverse Localisation</title>
-      <author><first>Reinhard</first><last>Schäler</last></author>
+      <author id="reinhard-schaler"><first>Reinhard</first><last>Schäler</last></author>
       <url hash="299d0db8">2005.tc-1.4</url>
       <bibkey>schaler-2005-reverse</bibkey>
     </paper>
@@ -51,7 +51,7 @@
     </paper>
     <paper id="7">
       <title>Automatic Detection of Translation Errors: The <fixed-case>T</fixed-case>rans<fixed-case>C</fixed-case>heck System</title>
-      <author><first>Graham</first><last>Russell</last></author>
+      <author id="graham-russell"><first>Graham</first><last>Russell</last></author>
       <url hash="866d38c2">2005.tc-1.7</url>
       <bibkey>russell-2005-automatic</bibkey>
     </paper>
diff --git a/data/xml/2006.amta.xml b/data/xml/2006.amta.xml
index a8e506369d..006ce38707 100644
--- a/data/xml/2006.amta.xml
+++ b/data/xml/2006.amta.xml
@@ -40,7 +40,7 @@
     </paper>
     <paper id="3">
       <title>Context-Based Machine Translation</title>
-      <author><first>Jaime</first><last>Carbonell</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
       <author><first>Steve</first><last>Klein</last></author>
       <author><first>David</first><last>Miller</last></author>
       <author><first>Mike</first><last>Steinbaum</last></author>
@@ -53,8 +53,8 @@
     </paper>
     <paper id="4">
       <title>Integration of <fixed-case>POS</fixed-case>tag-based Source Reordering into <fixed-case>SMT</fixed-case> Decoding by an Extended Search Graph</title>
-      <author><first>Josep M.</first><last>Crego</last></author>
-      <author><first>José B.</first><last>Mariño</last></author>
+      <author id="josep-m-crego"><first>Josep M.</first><last>Crego</last></author>
+      <author id="jose-b-marino"><first>José B.</first><last>Mariño</last></author>
       <pages>29-36</pages>
       <url hash="00c8fc4b">2006.amta-papers.4</url>
       <abstract>This paper presents a reordering framework for statistical machine translation (SMT) where source-side reorderings are integrated into SMT decoding, allowing for a highly constrained reordered search graph. The monotone search is extended by means of a set of reordering patterns (linguistically motivated rewrite patterns). Patterns are automatically learnt in training from word-to-word alignments and source-side Part-Of-Speech (POS) tags. Traversing the extended search graph, the decoder evaluates every hypothesis making use of a group of widely used SMT models and helped by an additional Ngram language model of source-side POS tags. Experiments are reported on the Euparl task (Spanish-to-English and English-to- Spanish). Results are presented regarding translation accuracy (using human and automatic evaluations) and computational efficiency, showing significant improvements in translation quality for both translation directions at a very low computational cost.</abstract>
@@ -63,7 +63,7 @@
     <paper id="5">
       <title>Better Learning and Decoding for Syntax Based <fixed-case>SMT</fixed-case> Using <fixed-case>PSDIG</fixed-case></title>
       <author><first>Yuan</first><last>Ding</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>37-45</pages>
       <url hash="41523813">2006.amta-papers.5</url>
       <abstract>As an approach to syntax based statistical machine translation (SMT), Probabilistic Synchronous Dependency Insertion Grammars (PSDIG), introduced in (Ding and Palmer, 2005), are a version of synchronous grammars defined on dependency trees. In this paper we discuss better learning and decoding algorithms for a PSDIG MT system. We introduce two new grammar learners: (1) an exhaustive learner combining different heuristics, (2) an n-gram based grammar learner. Combining the grammar rules learned from the two learners improved the performance. We introduce a better decoding algorithm which incorporates a tri-gram language model. According to the Bleu metric, the PSDIG MT system performance is significantly better than IBM Model 4, while on par with the state-of-the-art phrase based system Pharaoh (Koehn, 2004). The improved integration of syntax on both source and target languages opens door to more sophisticated SMT processes.</abstract>
@@ -80,7 +80,7 @@
     <paper id="7">
       <title>Challenges in Building an <fixed-case>A</fixed-case>rabic-<fixed-case>E</fixed-case>nglish <fixed-case>GHMT</fixed-case> System with <fixed-case>SMT</fixed-case> Components</title>
       <author><first>Nizar</first><last>Habash</last></author>
-      <author><first>Bonnie</first><last>Dorr</last></author>
+      <author id="bonnie-dorr"><first>Bonnie</first><last>Dorr</last></author>
       <author><first>Christof</first><last>Monz</last></author>
       <pages>56-65</pages>
       <url hash="fa22e870">2006.amta-papers.7</url>
@@ -91,7 +91,7 @@
       <title>Statistical Syntax-Directed Translation with Extended Domain of Locality</title>
       <author><first>Liang</first><last>Huang</last></author>
       <author><first>Kevin</first><last>Knight</last></author>
-      <author><first>Aravind</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
       <pages>66-73</pages>
       <url hash="cd36b518">2006.amta-papers.8</url>
       <abstract>In syntax-directed translation, the source-language input is first parsed into a parse-tree, which is then recursively converted into a string in the target-language. We model this conversion by an extended tree-to-string transducer that has multi-level trees on the source-side, which gives our system more expressive power and flexibility. We also define a direct probability model and use a linear-time dynamic programming algorithm to search for the best derivation. The model is then extended to the general log-linear frame-work in order to incorporate other features like n-gram language models. We devise a simple-yet-effective algorithm to generate non-duplicate k-best translations for n-gram rescoring. Preliminary experiments on English-to-Chinese translation show a significant improvement in terms of translation quality compared to a state-of-the- art phrase-based system.</abstract>
@@ -101,7 +101,7 @@
       <title>Corpus Variations for Translation Lexicon Induction</title>
       <author><first>Rebecca</first><last>Hwa</last></author>
       <author><first>Carol</first><last>Nichols</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <pages>74-81</pages>
       <url hash="181f55d1">2006.amta-papers.9</url>
       <abstract>Lexical mappings (word translations) between languages are an invaluable resource for multilingual processing. While the problem of extracting lexical mappings from parallel corpora is well-studied, the task is more challenging when the language samples are from non-parallel corpora. The goal of this work is to investigate one such scenario: finding lexical mappings between dialects of a diglossic language, in which people conduct their written communications in a prestigious formal dialect, but they communicate verbally in a colloquial dialect. Because the two dialects serve different socio-linguistic functions, parallel corpora do not naturally exist between them. An example of a diglossic dialect pair is Modern Standard Arabic (MSA) and Levantine Arabic. In this paper, we evaluate the applicability of a standard algorithm for inducing lexical mappings between comparable corpora (Rapp, 1999) to such diglossic corpora pairs. The focus of the paper is an in-depth error analysis, exploring the notion of relatedness in diglossic corpora and scrutinizing the effects of various dimensions of relatedness (such as mode, topic, style, and statistics) on the quality of the resulting translation lexicon.</abstract>
@@ -109,8 +109,8 @@
     </paper>
     <paper id="10">
       <title>Toward an Interagency Language Roundtable Based Assessment of Speech-to-Speech Translation Capabilities</title>
-      <author><first>Douglas</first><last>Jones</last></author>
-      <author><first>Timothy</first><last>Anderson</last></author>
+      <author id="douglas-jones"><first>Douglas</first><last>Jones</last></author>
+      <author id="tim-anderson"><first>Timothy</first><last>Anderson</last></author>
       <author><first>Sabine</first><last>Atwell</last></author>
       <author><first>Brian</first><last>Delaney</last></author>
       <author><first>James</first><last>Dirgin</last></author>
@@ -148,7 +148,7 @@
       <title>Multi-Engine Machine Translation by Recursive Sentence Decomposition</title>
       <author><first>Bart</first><last>Mellebeek</last></author>
       <author><first>Karolina</first><last>Owczarzak</last></author>
-      <author><first>Josef</first><last>Van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>Van Genabith</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <pages>110-118</pages>
       <url hash="9e203226">2006.amta-papers.13</url>
@@ -157,7 +157,7 @@
     </paper>
     <paper id="14">
       <title>Toward Communicating Simple Sentences Using Pictorial Representations</title>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <author><first>Ben</first><last>Leong</last></author>
       <pages>119-127</pages>
       <url hash="8675a66d">2006.amta-papers.14</url>
@@ -167,8 +167,8 @@
     <paper id="15">
       <title>Induction of Probabilistic Synchronous Tree-Insertion Grammars for Machine Translation</title>
       <author><first>Rebecca</first><last>Nesson</last></author>
-      <author><first>Stuart</first><last>Shieber</last></author>
-      <author><first>Alexander</first><last>Rush</last></author>
+      <author id="stuart-m-shieber"><first>Stuart</first><last>Shieber</last></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last></author>
       <pages>128-137</pages>
       <url hash="c35d6d6a">2006.amta-papers.15</url>
       <abstract>The more expressive and flexible a base formalism for machine translation is, the less efficient parsing of it will be. However, even among formalisms with the same parse complexity, some formalisms better realize the desired characteristics for machine translation formalisms than others. We introduce a particular formalism, probabilistic synchronous tree-insertion grammar (PSTIG) that we argue satisfies the desiderata optimally within the class of formalisms that can be parsed no less efficiently than context-free grammars and demonstrate that it outperforms state-of-the-art word-based and phrase-based finite-state translation models on training and test data taken from the EuroParl corpus (Koehn, 2005). We then argue that a higher level of translation quality can be achieved by hybridizing our in- duced model with elementary structures produced using supervised techniques such as those of Groves et al. (2004).</abstract>
@@ -188,7 +188,7 @@
       <author><first>Karolina</first><last>Owczarzak</last></author>
       <author><first>Bart</first><last>Mellebeek</last></author>
       <author><first>Declan</first><last>Groves</last></author>
-      <author><first>Josef</first><last>Van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>Van Genabith</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <pages>148-155</pages>
       <url hash="41c46b8f">2006.amta-papers.17</url>
@@ -206,7 +206,7 @@
     </paper>
     <paper id="19">
       <title>Direct Application of a Language Learner Test to <fixed-case>MT</fixed-case> Evaluation</title>
-      <author><first>Florence</first><last>Reeder</last></author>
+      <author id="florence-reeder"><first>Florence</first><last>Reeder</last></author>
       <pages>166-175</pages>
       <url hash="86b1b06b">2006.amta-papers.19</url>
       <abstract>This paper shows the applicability of language testing techniques to machine translation (MT) evaluation through one of a set of related experiments. One straightforward experiment is to use language testing exams and scoring on MT output with little or no adaptation. This paper describes one such experiment, the first in a set. After an initial test (Vanni and Reeder, 2000), we expanded the experiment to include multiple raters and a more detailed analysis of the surprising results. Namely that unlike with humans, MT systems perform more poorly at both level zero and one than at level two and three. This paper presents these results as an illustration of both the applicability of language testing techniques and also the caution that needs to be applied.</abstract>
@@ -214,7 +214,7 @@
     </paper>
     <paper id="20">
       <title>Measuring <fixed-case>MT</fixed-case> Adequacy Using Latent Semantic Analysis</title>
-      <author><first>Florence</first><last>Reeder</last></author>
+      <author id="florence-reeder"><first>Florence</first><last>Reeder</last></author>
       <pages>176-184</pages>
       <url hash="9740e613">2006.amta-papers.20</url>
       <abstract>Translation adequacy is defined as the amount of semantic content from the source language document that is conveyed in the target language document. As such, it is more difficult to measure than intelligibility since semantic content must be measured in two documents and then compared. Latent Semantic Analysis is a content measurement technique used in language learner evaluation that exhibits characteristics attractive for re-use in machine translation evaluation (MTE). This experiment, which is a series of applications of the LSA algorithm in various configurations, demonstrates its usefulness as an MTE metric for adequacy. In addition, this experiment lays the groundwork for using LSA as a method to measure the accuracy of a translation without reliance on reference translations.</abstract>
@@ -233,7 +233,7 @@
       <title>Ambiguity Reduction for Machine Translation: Human-Computer Collaboration</title>
       <author><first>Marcus</first><last>Sammer</last></author>
       <author><first>Kobi</first><last>Reiter</last></author>
-      <author><first>Stephen</first><last>Soderland</last></author>
+      <author id="stephen-soderland"><first>Stephen</first><last>Soderland</last></author>
       <author><first>Katrin</first><last>Kirchhoff</last></author>
       <author><first>Oren</first><last>Etzioni</last></author>
       <pages>193-202</pages>
@@ -251,9 +251,9 @@
     </paper>
     <paper id="24">
       <title>Combining Linguistic and Statistical Methods for Bi-directional <fixed-case>E</fixed-case>nglish <fixed-case>C</fixed-case>hinese Translation in the Flight Domain</title>
-      <author><first>Stephanie</first><last>Seneff</last></author>
+      <author id="stephanie-seneff"><first>Stephanie</first><last>Seneff</last></author>
       <author><first>Chao</first><last>Wang</last></author>
-      <author><first>John</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
       <pages>213-222</pages>
       <url hash="bb87980e">2006.amta-papers.24</url>
       <abstract>In this paper, we discuss techniques to combine an interlingua translation framework with phrase-based statistical methods, for translation from Chinese into English. Our goal is to achieve high-quality translation, suitable for use in language tutoring applications. We explore these ideas in the context of a flight domain, for which we have a large corpus of English queries, obtained from users interacting with a dialogue system. Our techniques exploit a pre-existing English-to-Chinese translation system to automatically produce a synthetic bilingual corpus. Several experiments were conducted combining linguistic and statistical methods, and manual evaluation was conducted for a set of 460 Chinese sentences. The best performance achieved an “adequate” or better analysis (3 or above rating) on nearly 94% of the 409 parsable subset. Using a Rover scheme to combine four systems resulted in an “adequate or better” rating for 88% of all the utterances.</abstract>
@@ -261,11 +261,11 @@
     </paper>
     <paper id="25">
       <title>A Study of Translation Edit Rate with Targeted Human Annotation</title>
-      <author><first>Matthew</first><last>Snover</last></author>
-      <author><first>Bonnie</first><last>Dorr</last></author>
-      <author><first>Rich</first><last>Schwartz</last></author>
+      <author id="matthew-snover"><first>Matthew</first><last>Snover</last></author>
+      <author id="bonnie-dorr"><first>Bonnie</first><last>Dorr</last></author>
+      <author id="richard-schwartz"><first>Rich</first><last>Schwartz</last></author>
       <author><first>Linnea</first><last>Micciulla</last></author>
-      <author><first>John</first><last>Makhoul</last></author>
+      <author id="john-makhoul"><first>John</first><last>Makhoul</last></author>
       <pages>223-231</pages>
       <url hash="b7fcb71d">2006.amta-papers.25</url>
       <abstract>We examine a new, intuitive measure for evaluating machine-translation output that avoids the knowledge intensiveness of more meaning-based approaches, and the labor-intensiveness of human judgments. Translation Edit Rate (TER) measures the amount of editing that a human would have to perform to change a system output so it exactly matches a reference translation. We show that the single-reference variant of TER correlates as well with human judgments of MT quality as the four-reference variant of BLEU. We also define a human-targeted TER (or HTER) and show that it yields higher correlations with human judgments than BLEU—even when BLEU is given human-targeted references. Our results indicate that HTER correlates with human judgments better than HMETEOR and that the four-reference variants of TER and HTER correlate with human judgments as well as—or better than—a second human judgment does.</abstract>
@@ -276,7 +276,7 @@
       <author><first>Nicolas</first><last>Stroppa</last></author>
       <author><first>Declan</first><last>Groves</last></author>
       <author><first>Andy</first><last>Way</last></author>
-      <author><first>Kepa</first><last>Sarasola</last></author>
+      <author id="kepa-sarasola"><first>Kepa</first><last>Sarasola</last></author>
       <pages>232-241</pages>
       <url hash="03f399e2">2006.amta-papers.26</url>
       <abstract>Basque is both a minority and a highly inflected language with free order of sentence constituents. Machine Translation of Basque is thus both a real need and a test bed for MT techniques. In this paper, we present a modular Data-Driven MT system which includes different chunkers as well as chunk aligners which can deal with the free order of sentence constituents of Basque. We conducted Basque to English translation experiments, evaluated on a large corpus (270,000 sentence pairs). The experimental results show that our system significantly outperforms state-of-the-art approaches according to several common automatic evaluation metrics.</abstract>
@@ -285,7 +285,7 @@
     <paper id="27">
       <title>Combining Evaluation Metrics via Loss Functions</title>
       <author><first>Calandra</first><last>Tate</last></author>
-      <author><first>Clare</first><last>Voss</last></author>
+      <author id="clare-voss"><first>Clare</first><last>Voss</last></author>
       <pages>242-250</pages>
       <url hash="9c00b981">2006.amta-papers.27</url>
       <abstract>When response metrics for evaluating the utility of machine translation (MT) output on a given task do not yield a single ranking of MT engines, how are MT users to decide which engine best supports their task? When the cost of different types of response errors vary, how are MT users to factor that information into their rankings? What impact do different costs have on response-based rankings? Starting with data from an extraction experiment detailed in Voss and Tate (2006), this paper describes three response-rate metrics developed to quantify different aspects of MT users’ performance identifying who/when/where-items in MT output, and then presents a loss function analysis over these rates to derive a single customizable metric, applying a range of values to correct responses and costs to different error types. For the given experimental dataset, loss function analyses provided a clearer characterization of the engines’ relative strength than did comparing the response rates to each other. For one MT engine, varying the costs had no impact: the engine consistently ranked best. By contrast, cost variations did impact the ranking of the other two engines: a rank reversal occurred on who-item extractions when incorrect responses were penalized more than non-responses. Future work with loss analysis, developing operational cost ratios of error rates to correct response rates, will require user studies and expert document-screening personnel to establish baseline values for effective MT engine support on wh-item extraction.</abstract>
@@ -294,7 +294,7 @@
     <paper id="28">
       <title>Scalable Purely-Discriminative Training for Word and Tree Transducers</title>
       <author><first>Benjamin</first><last>Wellington</last></author>
-      <author><first>Joseph</first><last>Turian</last></author>
+      <author id="joseph-turian"><first>Joseph</first><last>Turian</last></author>
       <author><first>Chris</first><last>Pike</last></author>
       <author><first>Dan</first><last>Melamed</last></author>
       <pages>251-260</pages>
@@ -315,7 +315,7 @@
     <paper id="1">
       <title>The Potential and Limitations of <fixed-case>MT</fixed-case> Paradigm</title>
       <author><first>Daniel</first><last>Marcu</last></author>
-      <author><first>Alan</first><last>Melby</last></author>
+      <author id="alan-k-melby"><first>Alan</first><last>Melby</last></author>
       <url hash="fe7a6c64">2006.amta-talks.1</url>
       <bibkey>marcu-melby-2006-potential</bibkey>
     </paper>
@@ -343,7 +343,7 @@
     </paper>
     <paper id="2">
       <title>Expecting the Unexpected: Using <fixed-case>MT</fixed-case> Operationally</title>
-      <author><first>Florence</first><last>Reeder</last></author>
+      <author id="florence-reeder"><first>Florence</first><last>Reeder</last></author>
       <bibkey>reeder-2006-expecting</bibkey>
     </paper>
     <paper id="3">
@@ -419,7 +419,7 @@
     </paper>
     <paper id="4">
       <title><fixed-case>MT</fixed-case> for social impact</title>
-      <author><first>Michael</first><last>McCord</last></author>
+      <author id="michael-c-mccord"><first>Michael</first><last>McCord</last></author>
       <url hash="5522b165">2006.amta-panel1.4</url>
       <bibkey>mccord-2006-mt</bibkey>
     </paper>
@@ -445,7 +445,7 @@
     </frontmatter>
     <paper id="1">
       <title>Presentation</title>
-      <author><first>Jaime</first><last>Carbonell</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
       <url hash="e1ccdfad">2006.amta-panel2.1</url>
       <bibkey>carbonell-2006-presentation</bibkey>
     </paper>
@@ -463,14 +463,14 @@
     </paper>
     <paper id="4">
       <title>Combining interlingua with <fixed-case>SMT</fixed-case></title>
-      <author><first>Stephanie</first><last>Seneff</last></author>
+      <author id="stephanie-seneff"><first>Stephanie</first><last>Seneff</last></author>
       <url hash="4e7596e7">2006.amta-panel2.4</url>
       <bibkey>seneff-2006-combining</bibkey>
     </paper>
     <paper id="5">
       <title>First strategies for integrating hybrid approaches into established systems</title>
       <author><first>Jean</first><last>Senellart</last></author>
-      <author><first>John S.</first><last>White</last></author>
+      <author id="john-s-white"><first>John S.</first><last>White</last></author>
       <url hash="03c4e9f2">2006.amta-panel2.5</url>
       <bibkey>senellart-white-2006-first</bibkey>
     </paper>
@@ -486,7 +486,7 @@
     </meta>
     <paper id="1">
       <title>A Gentle Introduction to Ontologies</title>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <url hash="0efb08d7">2006.amta-tutorials.1</url>
       <bibkey>hovy-2006-gentle</bibkey>
     </paper>
@@ -498,22 +498,22 @@
     </paper>
     <paper id="3">
       <title><fixed-case>A</fixed-case>rabic Dialect Processing</title>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
       <url hash="23be85e9">2006.amta-tutorials.3</url>
       <bibkey>diab-habash-2006-arabic</bibkey>
     </paper>
     <paper id="4">
       <title>An Overview of Statistical Machine Translation</title>
-      <author><first>David</first><last>Smith</last></author>
+      <author id="david-a-smith"><first>David</first><last>Smith</last></author>
       <author><first>Charles</first><last>Schafer</last></author>
       <url hash="90c6cd2e">2006.amta-tutorials.4</url>
       <bibkey>smith-schafer-2006-overview</bibkey>
     </paper>
     <paper id="5">
       <title>Name Translation</title>
-      <author><first>Keith</first><last>Miller</last></author>
-      <author><first>Sherri</first><last>Condon</last></author>
+      <author id="keith-j-miller"><first>Keith</first><last>Miller</last></author>
+      <author id="sherri-condon"><first>Sherri</first><last>Condon</last></author>
       <url hash="f5d2f989">2006.amta-tutorials.5</url>
       <bibkey>miller-condon-2006-name</bibkey>
     </paper>
diff --git a/data/xml/2006.bcs.xml b/data/xml/2006.bcs.xml
index 2a1472440c..20a3892224 100644
--- a/data/xml/2006.bcs.xml
+++ b/data/xml/2006.bcs.xml
@@ -11,7 +11,7 @@
     <paper id="1">
       <title>Challenges in Processing Colloquial <fixed-case>A</fixed-case>rabic</title>
       <author><first>Alla</first><last>Rozovskaya</last></author>
-      <author><first>Richard</first><last>Sproat</last></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last></author>
       <author><first>Elabbas</first><last>Benmamoun</last></author>
       <pages>4-14</pages>
       <url hash="81ed56a1">2006.bcs-1.1</url>
@@ -21,10 +21,10 @@
     <paper id="2">
       <title><fixed-case>A</fixed-case>rabic <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et and the Challenges of <fixed-case>A</fixed-case>rabic</title>
       <author><first>Sabri</first><last>Elkateb</last></author>
-      <author><first>William</first><last>Black</last></author>
+      <author id="william-j-black"><first>William</first><last>Black</last></author>
       <author><first>Piek</first><last>Vossen</last></author>
-      <author><first>David</first><last>Farwell</last></author>
-      <author><first>Horacio</first><last>Rodríguez</last></author>
+      <author id="david-farwell"><first>David</first><last>Farwell</last></author>
+      <author id="horacio-rodriguez"><first>Horacio</first><last>Rodríguez</last></author>
       <author><first>Adam</first><last>Pease</last></author>
       <author><first>Musa</first><last>Alkhalifa</last></author>
       <author><first>Christiane</first><last>Fellbaum</last></author>
@@ -35,7 +35,7 @@
     </paper>
     <paper id="3">
       <title>Tips and Tricks of the <fixed-case>P</fixed-case>rague <fixed-case>A</fixed-case>rabic Dependency Treebank</title>
-      <author><first>Otakar</first><last>Smrž</last></author>
+      <author id="otakar-smrz"><first>Otakar</first><last>Smrž</last></author>
       <pages>25-34</pages>
       <url hash="9f652413">2006.bcs-1.3</url>
       <abstract>In this paper, we report on several software implementations that we have developed within Prague Arabic Dependency Treebank or some other projects concerned with Arabic Natural Language Processing. We try to guide the reader through some essential tasks and note the solutions that we have designed and used. We as well point to third-party computational systems that the research community might exploit in the future work in this field.</abstract>
@@ -43,7 +43,7 @@
     </paper>
     <paper id="4">
       <title>Diacritization: A Challenge to <fixed-case>A</fixed-case>rabic Treebank Annotation and Parsing</title>
-      <author><first>Mohamed</first><last>Maamouri</last></author>
+      <author id="mohamed-maamouri"><first>Mohamed</first><last>Maamouri</last></author>
       <author><first>Seth</first><last>Kulick</last></author>
       <author><first>Ann</first><last>Bies</last></author>
       <pages>35-47</pages>
@@ -62,7 +62,7 @@
     <paper id="6">
       <title>Effective Stemming for <fixed-case>A</fixed-case>rabic Information Retrieval</title>
       <author><first>Youssef</first><last>Kadri</last></author>
-      <author><first>Jian-Yun</first><last>Nie</last></author>
+      <author id="jian-yun-nie"><first>Jian-Yun</first><last>Nie</last></author>
       <pages>68-75</pages>
       <url hash="a13f8831">2006.bcs-1.6</url>
       <abstract>Arabic has a very rich and complex morphology. Its appropriate morphological processing is very important for Information Retrieval (IR). In this paper, we propose a new stemming technique that tries to determine the stem of a word representing the semantic core of this word according to Arabic morphology. This method is compared to a commonly used light stemming technique which truncates a word by simple rules. Our tests on TREC collections show that the new stemming technique is more effective than the light stemming.</abstract>
@@ -89,8 +89,8 @@
     <paper id="9">
       <title>Using Cross-language Information Retrieval for Sentence Alignment</title>
       <author><first>Nasredine</first><last>Semmar</last></author>
-      <author><first>Meriama</first><last>Laib</last></author>
-      <author><first>Christian</first><last>Fluhr</last></author>
+      <author id="meriama-laib"><first>Meriama</first><last>Laib</last></author>
+      <author id="christian-fluhr"><first>Christian</first><last>Fluhr</last></author>
       <pages>95-104</pages>
       <url hash="66e53f17">2006.bcs-1.9</url>
       <abstract>Cross-language information retrieval consists in providing a query in one language and searching documents in different languages. Retrieved documents are ordered by the probability of being relevant to the user's request with the highest ranked being considered the most relevant document. The LIC2M cross-language information retrieval system is a weighted Boolean search engine based on a deep linguistic analysis of the query and the documents to be indexed. This system, designed to work on Arabic, Chinese, English, French, German and Spanish, is composed of a multilingual linguistic analyzer, a statistical analyzer, a reformulator, a comparator and a search engine. The multilingual linguistic analyzer includes a morphological analyzer, a part-of-speech tagger and a syntactic analyzer. In the case of Arabic, a clitic stemmer is added to the morphological analyzer to segment the input words into proclitics, simple forms and enclitics. The linguistic analyzer processes both documents to be indexed and queries to produce a set of normalized lemmas, a set of named entities and a set of nominal compounds with their morpho-syntactic tags. The statistical analyzer computes for documents to be indexed concept weights based on concept database frequencies. The comparator computes intersections between queries and documents and provides a relevance weight for each intersection. Before this comparison, the reformulator expands queries during the search. The expansion is used to infer from the original query words other words expressing the same concepts. The expansion can be in the same language or in different languages. The search engine retrieves the ranked, relevant documents from the indexes according to the corresponding reformulated query and then merges the results obtained for each language, taking into account the original words of the query and their weights in order to score the documents. Sentence alignment consists in estimating which sentence or sentences in the source language correspond with which sentence or sentences in a target language. We present in this paper a new approach to aligning sentences from a parallel corpora based on the LIC2M cross-language information retrieval system. This approach consists in building a database of sentences of the target text and considering each sentence of the source text as a "query" to that database. The aligned bilingual parallel corpora can be used as a translation memory in a computer-aided translation tool.</abstract>
diff --git a/data/xml/2006.eamt.xml b/data/xml/2006.eamt.xml
index 8f40dab3e7..94cc8dc858 100644
--- a/data/xml/2006.eamt.xml
+++ b/data/xml/2006.eamt.xml
@@ -8,7 +8,7 @@
       <month>June 19–20</month>
       <year>2006</year>
       <editor><first>Viggo</first><last>Hansen</last></editor>
-      <editor><first>Bente</first><last>Maegaard</last></editor>
+      <editor id="bente-maegaard"><first>Bente</first><last>Maegaard</last></editor>
       <venue>eamt</venue>
     </meta>
     <frontmatter>
@@ -45,18 +45,18 @@
     <paper id="5">
       <title>A Computer-Assisted Translation Tool based on Finite-State Technology</title>
       <author><first>Jorge</first><last>Civera</last></author>
-      <author><first>Antonio L.</first><last>Lagarda</last></author>
+      <author id="antonio-l-lagarda"><first>Antonio L.</first><last>Lagarda</last></author>
       <author><first>Elsa</first><last>Cubel</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
-      <author><first>Enrique</first><last>Vidal</last></author>
-      <author><first>Juan M.</first><last>Vilar</last></author>
-      <author><first>Sergio</first><last>Barrachina</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="enrique-vidal"><first>Enrique</first><last>Vidal</last></author>
+      <author id="juan-miguel-vilar"><first>Juan M.</first><last>Vilar</last></author>
+      <author id="sergio-barrachina"><first>Sergio</first><last>Barrachina</last></author>
       <url hash="217b3604">2006.eamt-1.5</url>
       <bibkey>civera-etal-2006-computer</bibkey>
     </paper>
     <paper id="6">
       <title>Detecting Inappropriate Use of Free Online Machine Translation by Language Students. A Special Case of Plagiarism Detection</title>
-      <author><first>Harold</first><last>Somers</last></author>
+      <author id="harold-somers"><first>Harold</first><last>Somers</last></author>
       <author><first>Federico</first><last>Gaspari</last></author>
       <author><first>Ana</first><last>Niño</last></author>
       <url hash="9602a1ed">2006.eamt-1.6</url>
@@ -84,25 +84,25 @@
     </paper>
     <paper id="10">
       <title>Exploiting Word Transformation in Statistical Machine Translation from <fixed-case>S</fixed-case>panish to <fixed-case>E</fixed-case>nglish</title>
-      <author><first>Deepa</first><last>Gupta</last></author>
+      <author id="deepak-gupta"><first>Deepa</first><last>Gupta</last></author>
       <author><first>Marcello</first><last>Federico</last></author>
       <url hash="40bb05e0">2006.eamt-1.10</url>
       <bibkey>gupta-federico-2006-exploiting</bibkey>
     </paper>
     <paper id="11">
       <title>A Flexible Architecture for <fixed-case>CAT</fixed-case> Applications</title>
-      <author><first>Saša</first><last>Hasan</last></author>
+      <author id="sasa-hasan"><first>Saša</first><last>Hasan</last></author>
       <author><first>Shahram</first><last>Khadivi</last></author>
       <author><first>Richard</first><last>Zens</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <url hash="8cf96dd7">2006.eamt-1.11</url>
       <bibkey>hasan-etal-2006-flexible</bibkey>
     </paper>
     <paper id="12">
       <title>A Flexible Online Server for Machine Translation Evaluation</title>
       <author><first>Matthias</first><last>Eck</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <url hash="8d7d6302">2006.eamt-1.12</url>
       <bibkey>eck-etal-2006-flexible</bibkey>
     </paper>
@@ -127,7 +127,7 @@
     </paper>
     <paper id="16">
       <title>Identifying Complex Phenomena in a Corpus via a Treebank Lens</title>
-      <author><first>Dan</first><last>Flickinger</last></author>
+      <author id="dan-flickinger"><first>Dan</first><last>Flickinger</last></author>
       <url hash="28e2bb80">2006.eamt-1.16</url>
       <bibkey>flickinger-2006-identifying</bibkey>
     </paper>
@@ -143,9 +143,9 @@
     <paper id="18">
       <title>Leveraging Recurrent Phrase Structure in Large-scale Ontology Translation</title>
       <author><first>G. Craig</first><last>Murray</last></author>
-      <author><first>Bonnie J.</first><last>Dorr</last></author>
+      <author id="bonnie-dorr"><first>Bonnie J.</first><last>Dorr</last></author>
       <author><first>Jimmy</first><last>Lin</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
       <author><first>Pavel</first><last>Pecina</last></author>
       <url hash="30d20a15">2006.eamt-1.18</url>
       <bibkey>murray-etal-2006-leveraging</bibkey>
@@ -159,7 +159,7 @@
     <paper id="20">
       <title>Mixtures of <fixed-case>IBM</fixed-case> Model 2</title>
       <author><first>Jorge</first><last>Civera</last></author>
-      <author><first>Alfons</first><last>Juan</last></author>
+      <author id="alfons-juan"><first>Alfons</first><last>Juan</last></author>
       <url hash="238b0464">2006.eamt-1.20</url>
       <bibkey>civera-juan-2006-mixtures</bibkey>
     </paper>
@@ -167,7 +167,7 @@
       <title>Morpho-Syntax Based Statistical Methods for Automatic Sign Language Translation</title>
       <author><first>Daniel</first><last>Stein</last></author>
       <author><first>Jan</first><last>Bungeroth</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <url hash="f338d56e">2006.eamt-1.21</url>
       <bibkey>stein-etal-2006-morpho</bibkey>
     </paper>
@@ -180,7 +180,7 @@
     </paper>
     <paper id="23">
       <title>Pragmatics-based <fixed-case>MT</fixed-case> and the Translation of Puns</title>
-      <author><first>David</first><last>Farwell</last></author>
+      <author id="david-farwell"><first>David</first><last>Farwell</last></author>
       <author><first>Stephen</first><last>Helmreich</last></author>
       <url hash="8e0e5a56">2006.eamt-1.23</url>
       <bibkey>farwell-helmreich-2006-pragmatics</bibkey>
@@ -190,14 +190,14 @@
       <author><first>Bart</first><last>Mellebeek</last></author>
       <author><first>Karolina</first><last>Owczarzak</last></author>
       <author><first>Declan</first><last>Groves</last></author>
-      <author><first>Josef</first><last>Van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>Van Genabith</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <url hash="bb8e9240">2006.eamt-1.24</url>
       <bibkey>mellebeek-etal-2006-syntactic</bibkey>
     </paper>
     <paper id="25">
       <title>Task-based Evaluation of Machine Translation (<fixed-case>MT</fixed-case>) Engines. Measuring How Well People Extract Who, When, Where-Type Elements in <fixed-case>MT</fixed-case> Output</title>
-      <author><first>Clare R.</first><last>Voss</last></author>
+      <author id="clare-voss"><first>Clare R.</first><last>Voss</last></author>
       <author><first>Calandra R.</first><last>Tate</last></author>
       <url hash="3da77b9b">2006.eamt-1.25</url>
       <bibkey>voss-tate-2006-task</bibkey>
@@ -217,7 +217,7 @@
     <paper id="28">
       <title>Translation Context Sensitive <fixed-case>WSD</fixed-case></title>
       <author><first>Lucia</first><last>Specia</last></author>
-      <author><first>Maria</first><last>das Graças Volpe Nunes</last></author>
+      <author id="maria-das-gracas-volpe-nunes"><first>Maria</first><last>das Graças Volpe Nunes</last></author>
       <author><first>Mark</first><last>Stevenson</last></author>
       <url hash="8a095391">2006.eamt-1.28</url>
       <bibkey>specia-etal-2006-translation</bibkey>
@@ -244,10 +244,10 @@
     </paper>
     <paper id="31">
       <title><fixed-case>W</fixed-case>eb<fixed-case>B</fixed-case>oot<fixed-case>C</fixed-case>a<fixed-case>T</fixed-case>. Instant Domain-Specific Corpora to Support Human Translators</title>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <author><first>Adam</first><last>Kilgarriff</last></author>
       <author><first>Jan</first><last>Pomikalek</last></author>
-      <author><first>Pavel</first><last>Rychly</last></author>
+      <author id="pavel-rychly"><first>Pavel</first><last>Rychly</last></author>
       <url hash="d60e0bd7">2006.eamt-1.31</url>
       <bibkey>baroni-etal-2006-webbootcat</bibkey>
     </paper>
diff --git a/data/xml/2006.iwslt.xml b/data/xml/2006.iwslt.xml
index c084a13686..e70919471c 100644
--- a/data/xml/2006.iwslt.xml
+++ b/data/xml/2006.iwslt.xml
@@ -16,7 +16,7 @@
     </paper>
     <paper id="2">
       <title>Rosetta: an analyst’s co-pilot</title>
-      <author><first>Salim</first><last>Roukos</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
       <attachment type="presentation" hash="84c10baf">2006.iwslt-plenaries.2.Presentation.pdf</attachment>
       <bibkey>roukos-2006-rosetta</bibkey>
     </paper>
@@ -31,13 +31,13 @@
     </meta>
     <paper id="1">
       <title>Overview of the <fixed-case>IWSLT</fixed-case>06 evaluation campaign</title>
-      <author><first>Michael</first><last>Paul</last></author>
+      <author id="michael-paul"><first>Michael</first><last>Paul</last></author>
       <url hash="a03676cc">2006.iwslt-evaluation.1</url>
       <bibkey>paul-2006-overview</bibkey>
     </paper>
     <paper id="2">
       <title>Finite-state transducer-based statistical machine translation using joint probabilities</title>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
       <author><first>Stephan</first><last>Kanthak</last></author>
       <author><first>Patrick</first><last>Haffner</last></author>
       <url hash="5dcd6612">2006.iwslt-evaluation.2</url>
@@ -45,11 +45,11 @@
     </paper>
     <paper id="3">
       <title><fixed-case>IWSLT</fixed-case>-06: experiments with commercial <fixed-case>MT</fixed-case> systems and lessons from subjective evaluations</title>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <author><first>Youcef</first><last>Bey</last></author>
       <author><first>Mutsuko</first><last>Tomokio</last></author>
       <author><first>Wenjie</first><last>Cao</last></author>
-      <author><first>Hervé</first><last>Blanchon</last></author>
+      <author id="herve-blanchon"><first>Hervé</first><last>Blanchon</last></author>
       <url hash="3740fc9f">2006.iwslt-evaluation.3</url>
       <bibkey>boitet-etal-2006-iwslt</bibkey>
     </paper>
@@ -107,7 +107,7 @@
       <title>The <fixed-case>MIT</fixed-case>-<fixed-case>LL</fixed-case>/<fixed-case>AFRL</fixed-case> <fixed-case>IWSLT</fixed-case>-2006 <fixed-case>MT</fixed-case> system</title>
       <author><first>Wade</first><last>Shen</last></author>
       <author><first>Brian</first><last>Delaney</last></author>
-      <author><first>Tim</first><last>Anderson</last></author>
+      <author id="tim-anderson"><first>Tim</first><last>Anderson</last></author>
       <url hash="17ac6ee1">2006.iwslt-evaluation.10</url>
       <bibkey>shen-etal-2006-mit</bibkey>
     </paper>
@@ -115,22 +115,22 @@
       <title>Phrase reordering for statistical machine translation based on predicate-argument structure</title>
       <author><first>Mamoru</first><last>Komachi</last></author>
       <author><first>Masaaki</first><last>Nagata</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <url hash="88c6ae78">2006.iwslt-evaluation.11</url>
       <bibkey>komachi-etal-2006-phrase</bibkey>
     </paper>
     <paper id="12">
       <title>The <fixed-case>N</fixed-case>i<fixed-case>CT</fixed-case>-<fixed-case>ATR</fixed-case> statistical machine translation system for <fixed-case>IWSLT</fixed-case> 2006</title>
       <author><first>Ruiqiang</first><last>Zhang</last></author>
-      <author><first>Hirofumi</first><last>Yamamoto</last></author>
-      <author><first>Michael</first><last>Paul</last></author>
+      <author id="hirofumi-yamamoto"><first>Hirofumi</first><last>Yamamoto</last></author>
+      <author id="michael-paul"><first>Michael</first><last>Paul</last></author>
       <author><first>Hideo</first><last>Okuma</last></author>
       <author><first>Keiji</first><last>Yasuda</last></author>
       <author><first>Yves</first><last>Lepage</last></author>
       <author><first>Etienne</first><last>Denoual</last></author>
       <author><first>Daichi</first><last>Mochihashi</last></author>
       <author><first>Andrew</first><last>Finch</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <url hash="0c241827">2006.iwslt-evaluation.12</url>
       <bibkey>zhang-etal-2006-nict</bibkey>
     </paper>
@@ -142,7 +142,7 @@
       <author><first>Peng</first><last>Liu</last></author>
       <author><first>Keyan</first><last>Zhou</last></author>
       <author><first>Yanqing</first><last>He</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <url hash="37d71a20">2006.iwslt-evaluation.13</url>
       <bibkey>chai-etal-2006-nlpr</bibkey>
     </paper>
@@ -160,41 +160,41 @@
       <author><first>Arne</first><last>Mauser</last></author>
       <author><first>Richard</first><last>Zens</last></author>
       <author><first>Evgeny</first><last>Matusov</last></author>
-      <author><first>Sasa</first><last>Hasan</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="sasa-hasan"><first>Sasa</first><last>Hasan</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <url hash="b7e6e6df">2006.iwslt-evaluation.15</url>
       <bibkey>mauser-etal-2006-rwth</bibkey>
     </paper>
     <paper id="16">
       <title>The <fixed-case>SLE</fixed-case> example-based translation system</title>
-      <author><first>Pete</first><last>Whitelock</last></author>
+      <author id="pete-whitelock"><first>Pete</first><last>Whitelock</last></author>
       <author><first>Victor</first><last>Poznanski</last></author>
       <url hash="76bed28f">2006.iwslt-evaluation.16</url>
       <bibkey>whitelock-poznanski-2006-sle</bibkey>
     </paper>
     <paper id="17">
       <title>The <fixed-case>TALP</fixed-case> Ngram-based <fixed-case>SMT</fixed-case> systems for <fixed-case>IWSLT</fixed-case> 2006</title>
-      <author><first>Josep M.</first><last>Crego</last></author>
-      <author><first>Adrià</first><last>de Gispert</last></author>
+      <author id="josep-m-crego"><first>Josep M.</first><last>Crego</last></author>
+      <author id="adria-de-gispert"><first>Adrià</first><last>de Gispert</last></author>
       <author><first>Patrick</first><last>Lambert</last></author>
       <author><first>Maxim</first><last>Khalilov</last></author>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
-      <author><first>José B.</first><last>Mariño</last></author>
-      <author><first>Rafael</first><last>Banchs</last></author>
-      <author><first>José A. R.</first><last>Fonollosa</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="jose-b-marino"><first>José B.</first><last>Mariño</last></author>
+      <author id="rafael-e-banchs"><first>Rafael</first><last>Banchs</last></author>
+      <author id="jose-a-r-fonollosa"><first>José A. R.</first><last>Fonollosa</last></author>
       <url hash="4184de80">2006.iwslt-evaluation.17</url>
       <bibkey>crego-etal-2006-talp</bibkey>
     </paper>
     <paper id="18">
       <title><fixed-case>TALP</fixed-case> phrase-based system and <fixed-case>TALP</fixed-case> system combination for <fixed-case>IWSLT</fixed-case> 2006</title>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
-      <author><first>Josep M.</first><last>Crego</last></author>
-      <author><first>Adrià</first><last>de Gispert</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="josep-m-crego"><first>Josep M.</first><last>Crego</last></author>
+      <author id="adria-de-gispert"><first>Adrià</first><last>de Gispert</last></author>
       <author><first>Patrik</first><last>Lambert</last></author>
       <author><first>Maxim</first><last>Khalilov</last></author>
-      <author><first>José A. R.</first><last>Fonollosa</last></author>
-      <author><first>José B.</first><last>Mariño</last></author>
-      <author><first>Rafael</first><last>Banchs</last></author>
+      <author id="jose-a-r-fonollosa"><first>José A. R.</first><last>Fonollosa</last></author>
+      <author id="jose-b-marino"><first>José B.</first><last>Mariño</last></author>
+      <author id="rafael-e-banchs"><first>Rafael</first><last>Banchs</last></author>
       <url hash="bcc0c11f">2006.iwslt-evaluation.18</url>
       <bibkey>costa-jussa-etal-2006-talp-phrase</bibkey>
     </paper>
@@ -206,9 +206,9 @@
       <author><first>Sanjika</first><last>Hewavitharana</last></author>
       <author><first>Muntsin</first><last>Kolss</last></author>
       <author><first>Bing</first><last>Zhao</last></author>
-      <author><first>Almut Silja</first><last>Hildebrand</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="almut-silja-hildebrand"><first>Almut Silja</first><last>Hildebrand</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <url hash="dae257be">2006.iwslt-evaluation.19</url>
       <bibkey>eck-etal-2006-uka</bibkey>
     </paper>
@@ -216,8 +216,8 @@
       <title>The <fixed-case>CMU</fixed-case>-<fixed-case>UKA</fixed-case> syntax augmented machine translation system for <fixed-case>IWSLT</fixed-case>-06</title>
       <author><first>Andreas</first><last>Zollmann</last></author>
       <author><first>Ashish</first><last>Venugopal</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <url hash="940e3622">2006.iwslt-evaluation.20</url>
       <bibkey>zollmann-etal-2006-cmu</bibkey>
     </paper>
@@ -250,15 +250,15 @@
       <title>Automatic sentence segmentation and punctuation prediction for spoken language translation</title>
       <author><first>Evgeny</first><last>Matusov</last></author>
       <author><first>Arne</first><last>Mauser</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <url hash="bb1a8713">2006.iwslt-papers.1</url>
       <bibkey>matusov-etal-2006-automatic</bibkey>
     </paper>
     <paper id="2">
       <title>Continuous space language models for the <fixed-case>IWSLT</fixed-case> 2006 task</title>
       <author><first>Holger</first><last>Schwenk</last></author>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
-      <author><first>José A. R.</first><last>Fonollosa</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="jose-a-r-fonollosa"><first>José A. R.</first><last>Fonollosa</last></author>
       <url hash="1e486c9f">2006.iwslt-papers.2</url>
       <bibkey>schwenk-etal-2006-continuous-space</bibkey>
     </paper>
@@ -279,7 +279,7 @@
     <paper id="5">
       <title>Tuning machine translation parameters with <fixed-case>SPSA</fixed-case></title>
       <author><first>Patrik</first><last>Lambert</last></author>
-      <author><first>Rafael E.</first><last>Banchs</last></author>
+      <author id="rafael-e-banchs"><first>Rafael E.</first><last>Banchs</last></author>
       <url hash="7ff320a6">2006.iwslt-papers.5</url>
       <bibkey>lambert-banchs-2006-tuning</bibkey>
     </paper>
@@ -287,24 +287,24 @@
       <title>An efficient graph search decoder for phrase-based statistical machine translation</title>
       <author><first>Brian</first><last>Delaney</last></author>
       <author><first>Wade</first><last>Shen</last></author>
-      <author><first>Timothy</first><last>Anderson</last></author>
+      <author id="tim-anderson"><first>Timothy</first><last>Anderson</last></author>
       <url hash="088dea01">2006.iwslt-papers.6</url>
       <bibkey>delaney-etal-2006-efficient</bibkey>
     </paper>
     <paper id="7">
       <title><fixed-case>AER</fixed-case>: do we need to “improve” our alignments?</title>
       <author><first>David</first><last>Vilar</last></author>
-      <author><first>Maja</first><last>Popovic</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popovic</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <url hash="9d80d1fd">2006.iwslt-papers.7</url>
       <bibkey>vilar-etal-2006-aer</bibkey>
     </paper>
     <paper id="8">
       <title>Development of client-server speech translation system on a multi-lingual speech communication platform</title>
-      <author><first>Tohru</first><last>Shimizu</last></author>
+      <author id="tohru-shimizu"><first>Tohru</first><last>Shimizu</last></author>
       <author><first>Yutaka</first><last>Ashikari</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
-      <author><first>Hideki</first><last>Kashioka</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="hideki-kashioka"><first>Hideki</first><last>Kashioka</last></author>
       <author><first>Satoshi</first><last>Nakamura</last></author>
       <url hash="f858315a">2006.iwslt-papers.8</url>
       <bibkey>shimizu-etal-2006-development</bibkey>
diff --git a/data/xml/2006.jeptalnrecital.xml b/data/xml/2006.jeptalnrecital.xml
index 9068420aaf..84f733f847 100644
--- a/data/xml/2006.jeptalnrecital.xml
+++ b/data/xml/2006.jeptalnrecital.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Actes de la 13ème conférence sur le Traitement Automatique des Langues Naturelles. Conférences invitées</booktitle>
       <editor><first>Piet</first><last>Mertens</last></editor>
-      <editor><first>Cédrick</first><last>Fairon</last></editor>
+      <editor id="cedrick-fairon"><first>Cédrick</first><last>Fairon</last></editor>
       <editor><first>Anne</first><last>Dister</last></editor>
       <editor><first>Patrick</first><last>Watrin</last></editor>
       <publisher>ATALA</publisher>
@@ -28,7 +28,7 @@
     </paper>
     <paper id="2">
       <title>At Last Parsing Is Now Operational</title>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <pages>20–42</pages>
       <abstract>Natural language analysis systems which combine knowledge-based and corpus-based methods are now becoming accurate enough to be used in various applications. We describe one such parsing system for Dutch, known as Alpino, and we show how corpus-based methods are essential to obtain accurate knowledge-based parsers. In particular we show a variety of cases where large amounts of parser output are used to improve the parser.</abstract>
       <url hash="7be0eae2">2006.jeptalnrecital-invite.2</url>
@@ -39,7 +39,7 @@
     <meta>
       <booktitle>Actes de la 13ème conférence sur le Traitement Automatique des Langues Naturelles. Articles longs</booktitle>
       <editor><first>Piet</first><last>Mertens</last></editor>
-      <editor><first>Cédrick</first><last>Fairon</last></editor>
+      <editor id="cedrick-fairon"><first>Cédrick</first><last>Fairon</last></editor>
       <editor><first>Anne</first><last>Dister</last></editor>
       <editor><first>Patrick</first><last>Watrin</last></editor>
       <publisher>ATALA</publisher>
@@ -72,7 +72,7 @@
     </paper>
     <paper id="3">
       <title>Un analyseur morphologique multi-niveaux utilisant la jointure</title>
-      <author><first>François</first><last>Barthélemy</last></author>
+      <author id="francois-barthelemy"><first>François</first><last>Barthélemy</last></author>
       <pages>63–72</pages>
       <abstract>Dans cet article nous présentons un analyseur morphologique pour le verbe akkadien. Cette langue est de la famille des langues sémitiques. Les flexions du verbe font intervenir des changements internes à la racine. L’analyseur présenté ici illustre l’utilisation d’un formalisme multi-niveaux et d’opérateurs relationnels puissants, notamment la jointure. La multiplicité de niveaux intermédiaires entre les formes profondes et de surface, ainsi que les opérateurs de compositions permettent de diviser la description en contraintes relativement simples qui sont ensuite rassemblées pour s’exercer soit simultanément, soit en cascade, soit encore d’une façon mixte, c’est-à-dire simultanément pour certains des niveaux et en cascade pour d’autres. Ce mécanisme nous permet de décrire la vocalisation du radical comme un processus d’insertions successives de voyelles. Cela présente l’intérêt d’être plus simple que l’utilisation d’un schéma vocalique figé soumis à interdigitation. De plus, cela semble expliquer de façon plus économique les formes des verbes faibles.</abstract>
       <url hash="5f12fe54">2006.jeptalnrecital-long.3</url>
@@ -93,8 +93,8 @@
     <paper id="5">
       <title>Outilex, plate-forme logicielle de traitement de textes écrits</title>
       <author><first>Olivier</first><last>Blanc</last></author>
-      <author><first>Matthieu</first><last>Constant</last></author>
-      <author><first>Éric</first><last>Laporte</last></author>
+      <author id="matthieu-constant"><first>Matthieu</first><last>Constant</last></author>
+      <author id="eric-laporte"><first>Éric</first><last>Laporte</last></author>
       <pages>83–92</pages>
       <abstract>La plate-forme logicielle Outilex, qui sera mise à la disposition de la recherche, du développement et de l’industrie, comporte des composants logiciels qui effectuent toutes les opérations fondamentales du traitement automatique du texte écrit : traitements sans lexiques, exploitation de lexiques et de grammaires, gestion de ressources linguistiques. Les données manipulées sont structurées dans des formats XML, et également dans d’autres formats plus compacts, soit lisibles soit binaires, lorsque cela est nécessaire ; les convertisseurs de formats nécessaires sont inclus dans la plate-forme ; les formats de grammaires permettent de combiner des méthodes statistiques avec des méthodes fondées sur des ressources linguistiques. Enfin, des lexiques du français et de l’anglais issus du LADL, construits manuellement et d’une couverture substantielle seront distribués avec la plate-forme sous licence LGPL-LR.</abstract>
       <url hash="098d7718">2006.jeptalnrecital-long.5</url>
@@ -103,8 +103,8 @@
     </paper>
     <paper id="6">
       <title>Une grammaire multilingue partagée pour la traduction automatique de la parole</title>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
-      <author><first>Manny</first><last>Rayner</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
       <author><first>Bruna</first><last>Novellas</last></author>
       <author><first>Yukie</first><last>Nakao</last></author>
       <author><first>Marianne</first><last>Santaholma</last></author>
@@ -120,7 +120,7 @@
       <title>Prise en compte des disfluences dans un système d’analyse syntaxique automatique de l’oral</title>
       <author><first>Rémi</first><last>Bove</last></author>
       <author><first>Christine</first><last>Chardenon</last></author>
-      <author><first>Jean</first><last>Véronis</last></author>
+      <author id="jean-veronis"><first>Jean</first><last>Véronis</last></author>
       <pages>103–111</pages>
       <abstract>Nous présentons dans cette étude un essai de prise en compte des disfluences dans un système d’analyse linguistique initialement prévu pour l’écrit, en vue de la réalisation d’un prototype de traduction parole-parole. À partir d’une étude approfondie sur corpus, nous montrons comment des modifications du lexique et de la grammaire ont permis de traiter les cas les plus simples (pauses remplies, répétitions de mots isolés, etc.). D’autres cas plus complexes comme répétitions et auto-corrections de syntagmes ont nécessité la mise au point d’un mécanisme de contrôle sémantique permettant de limiter la combinatoire. Cette étude a mis également en évidence la difficulté de traitement de phénomènes tels que les amorces (mots interrompus) et les constructions inachevées, qui pour l’instant restent sans solution satisfaisante.</abstract>
       <url hash="e2c3368a">2006.jeptalnrecital-long.7</url>
@@ -139,7 +139,7 @@
     </paper>
     <paper id="9">
       <title>Extraction de relations sémantiques entre noms et verbes au-delà des liens morphologiques</title>
-      <author><first>Cécile</first><last>Fabre</last></author>
+      <author id="cecile-fabre"><first>Cécile</first><last>Fabre</last></author>
       <author><first>Didier</first><last>Bourigault</last></author>
       <pages>121–129</pages>
       <abstract>Nous étudions les relations de proximité sémantique entre les noms et les verbes à partir de données calculées sur un corpus de 200 millions de mots par un programme d’analyse distributionnelle automatique. Nous exposons les résultats d’une méthode d’extraction de couples Nom/Verbe, qui combine un indice de proximité distributionnelle et un indice de cooccurrence : un couple est extrait si le nom et le verbe apparaissent avec les mêmes arguments sur l’ensemble du corpus, d’une part, et s’ils apparaissent au moins une fois dans un même paragraphe munis du même argument, d’autre part. L’article élabore une typologie des 1441 couples extraits et démontre l’intérêt de prendre en compte les couples non liés morphologiquement, qui constituent 70 % des données.</abstract>
@@ -180,9 +180,9 @@
     </paper>
     <paper id="13">
       <title>Questions Booléennes : Oui ou Non, des Questions et des Réponses</title>
-      <author><first>Laurent</first><last>Gillard</last></author>
-      <author><first>Patrice</first><last>Bellot</last></author>
-      <author><first>Marc</first><last>El-Bèze</last></author>
+      <author id="laurent-gillard"><first>Laurent</first><last>Gillard</last></author>
+      <author id="patrice-bellot"><first>Patrice</first><last>Bellot</last></author>
+      <author id="marc-el-beze"><first>Marc</first><last>El-Bèze</last></author>
       <pages>159–166</pages>
       <abstract>Dans cet article, nous présentons une approche afin de traiter les questions booléennes, c’est-à-dire des questions dont la réponse peut être un Oui ou un Non, cela, dans le cadre d’un système de Questions-Réponses. En effet, la campagne Technolangue-EQueR, première campagne francophone de Questions-Réponses (QR) utilisant des questions et un corpus en français, a également été la première campagne QR à introduire une évaluation pour ce type de questions. Nous détaillons, parallèlement à notre approche, des pistes de réflexion sur les aspects sous-jacents à ces questions booléennes, notamment au travers d’une analyse des résultats obtenus par notre système dans un contexte similaire à celui de notre participation à la campagne officielle.</abstract>
       <url hash="25d136f5">2006.jeptalnrecital-long.13</url>
@@ -197,10 +197,10 @@
       <author><first>Bernard</first><last>Fradin</last></author>
       <author><first>Nabil</first><last>Hathout</last></author>
       <author><first>Stéphanie</first><last>Lignon</last></author>
-      <author><first>Fiammetta</first><last>Namer</last></author>
+      <author id="fiammetta-namer"><first>Fiammetta</first><last>Namer</last></author>
       <author><first>Clément</first><last>Plancq</last></author>
       <author><first>François</first><last>Yvon</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <pages>167–177</pages>
       <abstract>Dans ce travail, nous étudions en corpus la productivité quantitative des suffixations par -Able et par -ité du français, d’abord indépendamment l’une de l’autre, puis lorsqu’elles s’enchaînent dérivationnellement (la suffixation en -ité s’applique à des bases en -Able dans environ 15 % des cas). Nous estimons la productivité de ces suffixations au moyen de mesures statistiques dont nous suivons l’évolution par rapport à la taille du corpus. Ces deux suffixations sont productives en français moderne : elles forment de nouveaux lexèmes tout au long des corpus étudiés sans qu’on n’observe de saturation, leurs indices de productivité montrent une évolution stable bien qu’étant dépendante des calculs qui leur sont appliqués. On note cependant que, de façon générale, de ces deux suffixations, c’est la suffixation par -ité qui est la plus fréquente en corpus journalistique, sauf précisément quand -ité s’applique à un adjectif en -Able. Étant entendu qu’un adjectif en -Able et le nom en -ité correspondant expriment la même propriété, ce résultat indique que la complexité de la base est un paramètre à prendre en considération dans la formation du lexique possible.</abstract>
       <url hash="acff7c74">2006.jeptalnrecital-long.14</url>
@@ -249,7 +249,7 @@
     </paper>
     <paper id="19">
       <title>De la Chambre des communes à la chambre d’isolement : adaptabilité d’un système de traduction basé sur les segments de phrases</title>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <author><first>Fabrizio</first><last>Gotti</last></author>
       <author><first>Alexandre</first><last>Patry</last></author>
       <pages>217–226</pages>
@@ -292,7 +292,7 @@
     </paper>
     <paper id="23">
       <title>Résolution des références aux documents dans un corpus de dialogues humains</title>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <pages>257–266</pages>
       <abstract>Cet article étudie la résolution des références à des entités lorsqu’une représentation informatique de ces entités est disponible. Nous nous intéressons à un corpus de dialogues entre humains, portant sur les grands titres de la presse francophone du jour, et proposons une méthode pour détecter et résoudre les références faites par les locuteurs aux articles des journaux. La détection des expressions nominales qui réfèrent à ces documents est réalisée grâce à une grammaire, alors que le problème de la détection des pronoms qui réfèrent aux documents est abordé par des moyens statistiques. La résolution de ces expressions, à savoir l’attribution des référents, fait quant à elle l’objet d’un algorithme inspiré de la résolution des coréférences. Ces propositions sont évaluées par le biais de mesures quantitatives spécifiques.</abstract>
       <url hash="cdd31ba4">2006.jeptalnrecital-long.23</url>
@@ -322,7 +322,7 @@
     </paper>
     <paper id="26">
       <title>Trouver le coupable : Fouille d’erreurs sur des sorties d’analyseurs syntaxiques</title>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <author><first>Éric</first><last>Villemonte De La Clergerie</last></author>
       <pages>288–297</pages>
       <abstract>Nous présentons une méthode de fouille d’erreurs pour détecter automatiquement des erreurs dans les ressources utilisées par les systèmes d’analyse syntaxique. Nous avons mis en oeuvre cette méthode sur le résultat de l’analyse de plusieurs millions de mots par deux systèmes d’analyse différents qui ont toutefois en commun le lexique syntaxique et la chaîne de traitement pré-syntaxique. Nous avons pu identifier ainsi des inexactitudes et des incomplétudes dans les ressources utilisées. En particulier, la comparaison des résultats obtenus sur les sorties des deux analyseurs sur un même corpus nous a permis d’isoler les problèmes issus des ressources partagées de ceux issus des grammaires.</abstract>
@@ -350,8 +350,8 @@
     <paper id="29">
       <title>Using Stemming in Morphological Analysis to Improve <fixed-case>A</fixed-case>rabic Information Retrieval</title>
       <author><first>Nasredine</first><last>Semmar</last></author>
-      <author><first>Meriama</first><last>Laib</last></author>
-      <author><first>Christian</first><last>Fluhr</last></author>
+      <author id="meriama-laib"><first>Meriama</first><last>Laib</last></author>
+      <author id="christian-fluhr"><first>Christian</first><last>Fluhr</last></author>
       <pages>318–327</pages>
       <abstract>Information retrieval (IR) consists in finding all relevant documents for a user query in a collection of documents. These documents are ordered by the probability of being relevant to the user’s query. The highest ranked document is considered to be the most likely relevant document. Natural Language Processing (NLP) for IR aims to transform the potentially ambiguous words of queries and documents into unambiguous internal representations on which matching and retrieval can take place. This transformation is generally achieved by several levels of linguistic analysis, morphological, syntactic and so forth. In this paper, we present the Arabic linguistic analyzer used in the LIC2M cross-lingual search engine. We focus on the morphological analyzer and particularly the clitic stemmer which segments the input words into proclitics, simple forms and enclitics. We demonstrate that stemming improves search engine recall and precision.</abstract>
       <url hash="3c85c3eb">2006.jeptalnrecital-long.29</url>
@@ -359,8 +359,8 @@
     </paper>
     <paper id="30">
       <title>Décodage conceptuel et apprentissage automatique : application au corpus de dialogue Homme-Machine <fixed-case>MEDIA</fixed-case></title>
-      <author><first>Christophe</first><last>Servan</last></author>
-      <author><first>Frédéric</first><last>Béchet</last></author>
+      <author id="christophe-servan"><first>Christophe</first><last>Servan</last></author>
+      <author id="frederic-bechet"><first>Frédéric</first><last>Béchet</last></author>
       <pages>328–337</pages>
       <abstract>Cette étude présente les travaux du LIA effectués sur le corpus de dialogue homme-machine MEDIA et visant à proposer des méthodes d’analyse robuste permettant d’extraire d’un message audio une séquence de concepts élémentaires. Le modèle de décodage conceptuel présenté est basé sur une approche stochastique qui intègre directement le processus de compréhension au processus de Reconnaissance Automatique de la Parole (RAP). Cette approche permet de garder l’espace probabiliste des phrases produit en sortie du module de RAP et de le projeter vers un espace probabiliste de séquences de concepts. Les expériences menées sur le corpus MEDIA montrent que les performances atteintes par notre modèle sont au niveau des meilleurs systèmes ayant participé à l’évaluation sur des transcriptions manuelles de dialogues. En détaillant les performances du système en fonction de la taille du corpus d’apprentissage on peut mesurer le nombre minimal ainsi que le nombre optimal de dialogues nécessaires à l’apprentissage des modèles. Enfin nous montrons comment des connaissances a priori peuvent être intégrées dans nos modèles afin d’augmenter significativement leur couverture en diminuant, à performance égale, l’effort de constitution et d’annotation du corpus d’apprentissage.</abstract>
       <url hash="8fd2ecab">2006.jeptalnrecital-long.30</url>
@@ -371,8 +371,8 @@
       <title>Vers une prédiction automatique de la difficulté d’une question en langue naturelle</title>
       <author><first>Laurianne</first><last>Sitbon</last></author>
       <author><first>Jens</first><last>Grivolla</last></author>
-      <author><first>Laurent</first><last>Gillard</last></author>
-      <author><first>Patrice</first><last>Bellot</last></author>
+      <author id="laurent-gillard"><first>Laurent</first><last>Gillard</last></author>
+      <author id="patrice-bellot"><first>Patrice</first><last>Bellot</last></author>
       <author><first>Philippe</first><last>Blache</last></author>
       <pages>338–347</pages>
       <abstract>Nous proposons et testons deux méthodes de prédiction de la capacité d’un système à répondre à une question factuelle. Une telle prédiciton permet de déterminer si l’on doit initier un dialogue afin de préciser ou de reformuler la question posée par l’utilisateur. La première approche que nous proposons est une adaptation d’une méthode de prédiction dans le domaine de la recherche documentaire, basée soit sur des machines à vecteurs supports (SVM) soit sur des arbres de décision, avec des critères tels que le contenu des questions ou des documents, et des mesures de cohésion entre les documents ou passages de documents d’où sont extraits les réponses. L’autre approche vise à utiliser le type de réponse attendue pour décider de la capacité du système à répondre. Les deux approches ont été testées sur les données de la campagne Technolangue EQUER des systèmes de questions-réponses en français. L’approche à base de SVM est celle qui obtient les meilleurs résultats. Elle permet de distinguer au mieux les questions faciles, celles auxquelles notre système apporte une bonne réponse, des questions difficiles, celles restées sans réponses ou auxquelles le système a répondu de manière incorrecte. A l’opposé on montre que pour notre système, le type de réponse attendue (personnes, quantités, lieux...) n’est pas un facteur déterminant pour la difficulté d’une question.</abstract>
@@ -415,7 +415,7 @@
     <meta>
       <booktitle>Actes de la 13ème conférence sur le Traitement Automatique des Langues Naturelles. Posters</booktitle>
       <editor><first>Piet</first><last>Mertens</last></editor>
-      <editor><first>Cédrick</first><last>Fairon</last></editor>
+      <editor id="cedrick-fairon"><first>Cédrick</first><last>Fairon</last></editor>
       <editor><first>Anne</first><last>Dister</last></editor>
       <editor><first>Patrick</first><last>Watrin</last></editor>
       <publisher>ATALA</publisher>
@@ -432,7 +432,7 @@
       <title>Étude et analyse de la phrase nominale arabe en <fixed-case>HPSG</fixed-case></title>
       <author><first>Abdelkarim</first><last>Abdelkader</last></author>
       <author><first>Kais</first><last>Haddar</last></author>
-      <author><first>Abdelmajid</first><last>Ben Hamadou</last></author>
+      <author id="abdelmajid-ben-hamadou"><first>Abdelmajid</first><last>Ben Hamadou</last></author>
       <pages>379–388</pages>
       <abstract>Dans cet article, nous proposons une démarche d’analyse syntaxique pour les phrases nominales arabes à l’aide du formalisme des grammaires syntagmatiques guidées par les têtes HPSG. Pour ce faire, nous commençons par étudier la typologie de la phrase nominale arabe en précisant ses différentes formes. Puis, nous élaborons une grammaire HPSG traitant ce type de phrase et qui respecte la spécificité de la langue arabe. Ensuite, nous présentons une démarche d’analyse syntaxique se basant sur une approche ascendante et sur le mécanisme d’unification. Enfin, nous donnons une idée sur l’implémentation et l’expérimentation du prototype réalisé.</abstract>
       <url hash="b9210013">2006.jeptalnrecital-poster.1</url>
@@ -445,7 +445,7 @@
       <author><first>Maria</first><last>Fernanda Bacelar do Nascimento</last></author>
       <author><first>João</first><last>Miguel Casteleiro</last></author>
       <author><first>Amália</first><last>Mendes</last></author>
-      <author><first>Luísa</first><last>Pereira</last></author>
+      <author id="luisa-pereira"><first>Luísa</first><last>Pereira</last></author>
       <author><first>Tiago</first><last>Sá</last></author>
       <pages>389–397</pages>
       <abstract>This presentation reports on an on-going project aimed at building a large lexical database of corpus-extracted multiword (MW) expressions for the Portuguese language. MW expressions were automatically extracted from a balanced 50 million word corpus compiled for this project, furthermore these were statistically interpreted using lexical association measures, followed by a manual validation process. The lexical database covers different types of MW expressions, from named entities to lexical associations with different degrees of cohesion, ranging from totally frozen idioms to favoured co-occurring forms, such as collocations. We aim to achieve two main objectives with this resource. Firstly to build on the large set of data of different types of MW expressions, thus revising existing typologies of collocations and integrating them in a larger theory of MW units. Secondly, to use the extensive hand-checked data as training data to evaluate existing statistical lexical association measures.</abstract>
@@ -454,8 +454,8 @@
     </paper>
     <paper id="3">
       <title>Ambiguous Turn-Taking Games in Conversations</title>
-      <author><first>Gemma</first><last>Bel-Enguix</last></author>
-      <author><first>Maria Dolores</first><last>Jiménez-López</last></author>
+      <author id="gemma-bel-enguix"><first>Gemma</first><last>Bel-Enguix</last></author>
+      <author id="m-dolores-jimenez-lopez"><first>Maria Dolores</first><last>Jiménez-López</last></author>
       <pages>398–406</pages>
       <abstract>Human-computer interfaces require models of dialogue structure that capture the variability and unpredictability within dialogue. Semantic and pragmatic context are continuously evolving during conversation, especially by the distribution of turns that have a direct effect in dialogue exchanges. In this paper we use a formal language paradigm for modelling multi-agent system conversations. Our computational model combines pragmatic minimal units –speech acts– for constructing dialogues. In this framework, we show how turn-taking distribution can be ambiguous and propose an algorithm for solving it, considering turn coherence, trajectories and turn pairing. Finally, we suggest overlapping as one of the possible phenomena emerging from an unresolved turn-taking.</abstract>
       <url hash="1a23586a">2006.jeptalnrecital-poster.3</url>
@@ -485,7 +485,7 @@
       <title>Exploration et utilisation d’informations distantes dans les modèles de langage statistiques</title>
       <author><first>Armelle</first><last>Brun</last></author>
       <author><first>David</first><last>Langlois</last></author>
-      <author><first>Kamel</first><last>Smaïli</last></author>
+      <author id="kamel-smaili"><first>Kamel</first><last>Smaïli</last></author>
       <pages>425–434</pages>
       <abstract>Dans le cadre de la modélisation statistique du langage, nous montrons qu’il est possible d’utiliser un modèle n-grammes avec un historique qui n’est pas nécessairement celui avec lequel il a été appris. Par exemple, un adverbe présent dans l’historique peut ne pas avoir d’importance pour la prédiction, et devrait donc être ignoré en décalant l’historique utilisé pour la prédiction. Notre étude porte sur les modèles n-grammes classiques et les modèles n-grammes distants et est appliquée au cas des bigrammes. Nous présentons quatre cas d’utilisation pour deux modèles bigrammes : distants et non distants. Nous montrons que la combinaison linéaire dépendante de l’historique de ces quatre cas permet d’améliorer de 14 % la perplexité du modèle bigrammes classique. Par ailleurs, nous nous intéressons à quelques cas de combinaison qui permettent de mettre en valeur les historiques pour lesquels les modèles que nous proposons sont performants.</abstract>
       <url hash="453d8aae">2006.jeptalnrecital-poster.6</url>
@@ -495,7 +495,7 @@
     <paper id="7">
       <title>Création d’une base terminologique juridique multilingue à l’aide de la plateforme générique Jibiki : le projet <fixed-case>L</fixed-case>ex<fixed-case>ALP</fixed-case></title>
       <author><first>Francis</first><last>Brunet-Manquat</last></author>
-      <author><first>Gilles</first><last>Sérasset</last></author>
+      <author id="gilles-serasset"><first>Gilles</first><last>Sérasset</last></author>
       <pages>435–444</pages>
       <abstract>Cet article présente l’utilisation de « Jibiki » (la plateforme de développement du serveur Web Papillon) dans le cadre du projet LexALP1. Le but de ce projet est d’harmoniser la terminologie des quatre langues (français, allemand, italien et slovène) de la Convention Alpine2 de sorte que les états membres puissent coopérer efficacement. Pour cela, le projet utilise la plateforme Jibiki afin de construire une banque terminologique permettant de comparer la terminologie spécialisée de sept systèmes légaux dans quatre langues, et de l’harmoniser, optimisant ainsi la compréhension entre les états alpins sur des questions environnementales au niveau supranational. Dans cet article, nous présentons comment peut être employée la plateforme générique Jibiki afin de gérer un dictionnaire particulier.</abstract>
       <url hash="f709c0ec">2006.jeptalnrecital-poster.7</url>
@@ -536,7 +536,7 @@
       <title>Annotation automatique de relations de contrôle dans des spécifications des besoins informatiques</title>
       <author><first>Jorge</first><last>García-Flores</last></author>
       <author><first>Elena</first><last>Ivanova</last></author>
-      <author><first>Jean-Pierre</first><last>Desclés</last></author>
+      <author id="jean-pierre-descles"><first>Jean-Pierre</first><last>Desclés</last></author>
       <author><first>Brahim</first><last>Djioua</last></author>
       <pages>473–482</pages>
       <abstract>La conception de logiciels est un processus technologique complexe, qui nécessite d’être assisté par des outils de traitement automatique des langues. Cet article présente une méthode pour l’annotation de relations discursives de contrôle dans des textes de spécification de besoins informatiques (SBI). La méthode vise à distinguer les actions contrôlées par le système de celles contrôlées par son environnement, ce qui permet d’établir de façon claire les limites et les responsabilités d’un système informatique. Notre méthode fait appel à la sémantique discursive pour analyser les moyens d’expression du contrôle dans un corpus de SBI industrielles ; l’expression du contrôle est identifiable par la présence, dans un certain contexte, de marqueurs linguistiques exprimés par des règles dites d’Exploration Contextuelle. La dernière partie montre le processus d’annotation automatique de la notion de contrôle par le système EXCOM et termine par la présentation d’un début d’évaluation de cette méthodologie.</abstract>
@@ -547,7 +547,7 @@
     <paper id="12">
       <title>Vers l’intégration du contexte dans une mémoire de traduction sous-phrastique : détection du domaine de traduction</title>
       <author><first>Fabrizio</first><last>Gotti</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <author><first>Claude</first><last>Coulombe</last></author>
       <pages>483–492</pages>
       <abstract>Nous présentons dans cet article une mémoire de traduction sous-phrastique sensible au domaine de traduction, une première étape vers l’intégration du contexte. Ce système est en mesure de recycler les traductions déjà « vues » par la mémoire, non seulement pour des phrases complètes, mais également pour des sous-séquences contiguës de ces phrases, via un aligneur de mots. Les séquences jugées intéressantes sont proposées au traducteur. Nous expliquons également la création d’un utilisateur artificiel, indispensable pour tester les performances du système en l’absence d’intervention humaine. Nous le testons lors de la traduction d’un ensemble disparate de corpus. Ces performances sont exprimées par un ensemble de métriques que nous définissons. Enfin, nous démontrons que la détection automatique du contexte de traduction peut s’avérer bénéfique et prometteuse pour améliorer le fonctionnement d’une telle mémoire, en agissant comme un filtre sur le matériel cible suggéré.</abstract>
@@ -557,7 +557,7 @@
     </paper>
     <paper id="13">
       <title>Analyse et désambiguïsation morphologiques de textes arabes non voyellés</title>
-      <author><first>Lamia</first><last>Hadrich Belguith</last></author>
+      <author id="lamia-hadrich-belguith"><first>Lamia</first><last>Hadrich Belguith</last></author>
       <author><first>Nouha</first><last>Chaâben</last></author>
       <pages>493–501</pages>
       <abstract>Dans ce papier nous proposons d’abord une méthode d’analyse et de désambiguïsation morphologiques de textes arabes non voyellés permettant de lever l’ambiguïté morphologique due à l’absence des marques de voyelles et aussi à l’irrégularité des formes dérivées de certains mots arabes (e.g. formes irrégulières du pluriel des noms et des adjectifs). Ensuite, nous présentons le système MORPH2, un analyseur morphologique de textes arabes non voyellés basé sur la méthode proposée. Ce système est évalué sur un livre scolaire et des articles de journaux. Les résultats obtenus son et très encourageants. En effet, les mesures de rappel et de précision globales sont respectivement de 69,77 % et 68,51 %.</abstract>
@@ -599,7 +599,7 @@
     </paper>
     <paper id="17">
       <title>Graphes paramétrés et outils de lexicalisation</title>
-      <author><first>Éric</first><last>Laporte</last></author>
+      <author id="eric-laporte"><first>Éric</first><last>Laporte</last></author>
       <author><first>Sébastien</first><last>Paumier</last></author>
       <pages>532–540</pages>
       <abstract>La lexicalisation des grammaires réduit le nombre des erreurs d’analyse syntaxique et améliore les résultats des applications. Cependant, cette modification affecte un système d’analyse syntaxique dans tous ses aspects. Un de nos objectifs de recherche est de mettre au point un modèle réaliste pour la lexicalisation des grammaires. Nous avons réalisé des expériences en ce sens avec une grammaire très simple par son contenu et son formalisme, et un lexique syntaxique très informatif, le lexique-grammaire du français élaboré au LADL. La méthode de lexicalisation est celle des graphes paramétrés. Nos résultats tendent à montrer que la plupart des informations contenues dans le lexique-grammaire peuvent être transférées dans une grammaire et exploitées avec succès dans l’analyse syntaxique de phrases.</abstract>
@@ -647,7 +647,7 @@
     <paper id="22">
       <title>Étude de métaphores conceptuelles à l’aide de vues globales et temporelles sur un corpus</title>
       <author><first>Thibault</first><last>Roy</last></author>
-      <author><first>Stéphane</first><last>Ferrari</last></author>
+      <author id="stephane-ferrari"><first>Stéphane</first><last>Ferrari</last></author>
       <author><first>Pierre</first><last>Beust</last></author>
       <pages>580–589</pages>
       <abstract>Cet article présente des expériences récentes menées dans le cadre d’un projet de recherche consacré à l’étude de métaphores conceptuelles. Ces expériences consistent à appréhender visuellement la répartition de trois domaines pouvant être à l’origine de métaphores conceptuelles dans un corpus d’articles boursiers. Les trois domaines étudiés sont la météorologie, la guerre et la santé, un grand nombre d’emplois métaphoriques du lexique de ces trois domaines ayant été observés dans le corpus d’étude. Afin de visualiser la répartition de ces domaines en corpus, nous exploitons la plate-forme ProxiDocs dédiée à la cartographie et à la catégorisation de corpus. Les cartes construites à partir du corpus et des domaines d’étude nous ont ainsi permis de localiser certaines métaphores conceptuelles dans des articles et des groupes d’articles du corpus. Des articles contenant des emplois non métaphoriques des domaines étudiés ont également été distingués sur les cartes. Des représentations cartographiques du corpus mettant dynamiquement en évidence l’évolution des trois domaines d’étude au fil du temps nous ont permis d’amorcer une étude sur le lien entre la présence de certaines métaphores conceptuelles et des faits d’actualité.</abstract>
@@ -676,8 +676,8 @@
     </paper>
     <paper id="25">
       <title>Modélisation et analyse des coordinations elliptiques par l’exploitation dynamique des forêts de dérivation</title>
-      <author><first>Djamé</first><last>Seddah</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <pages>609–618</pages>
       <abstract>Nous présentons dans cet article une approche générale pour la modélisation et l’analyse syntaxique des coordinations elliptiques. Nous montrons que les lexèmes élidés peuvent être remplacés, au cours de l’analyse, par des informations qui proviennent de l’autre membre de la coordination, utilisé comme guide au niveau des dérivations. De plus, nous montrons comment cette approche peut être effectivement mise en oeuvre par une légère extension des Grammaires d’Arbres Adjoints Lexicalisées (LTAG) à travers une opération dite de fusion. Nous décrivons les algorithmes de dérivation nécessaires pour l’analyse de constructions coordonnées pouvant comporter un nombre quelconque d’ellipses.</abstract>
       <url hash="c264411d">2006.jeptalnrecital-poster.25</url>
@@ -721,7 +721,7 @@
     <meta>
       <booktitle>Actes de la 13ème conférence sur le Traitement Automatique des Langues Naturelles. Tutoriels</booktitle>
       <editor><first>Piet</first><last>Mertens</last></editor>
-      <editor><first>Cédrick</first><last>Fairon</last></editor>
+      <editor id="cedrick-fairon"><first>Cédrick</first><last>Fairon</last></editor>
       <editor><first>Anne</first><last>Dister</last></editor>
       <editor><first>Patrick</first><last>Watrin</last></editor>
       <publisher>ATALA</publisher>
@@ -748,7 +748,7 @@
     <meta>
       <booktitle>Actes de la 13ème conférence sur le Traitement Automatique des Langues Naturelles. REncontres jeunes Chercheurs en Informatique pour le Traitement Automatique des Langues</booktitle>
       <editor><first>Piet</first><last>Mertens</last></editor>
-      <editor><first>Cédrick</first><last>Fairon</last></editor>
+      <editor id="cedrick-fairon"><first>Cédrick</first><last>Fairon</last></editor>
       <editor><first>Anne</first><last>Dister</last></editor>
       <editor><first>Patrick</first><last>Watrin</last></editor>
       <publisher>ATALA</publisher>
@@ -837,7 +837,7 @@
     <meta>
       <booktitle>Actes de la 13ème conférence sur le Traitement Automatique des Langues Naturelles. REncontres jeunes Chercheurs en Informatique pour le Traitement Automatique des Langues (Posters)</booktitle>
       <editor><first>Piet</first><last>Mertens</last></editor>
-      <editor><first>Cédrick</first><last>Fairon</last></editor>
+      <editor id="cedrick-fairon"><first>Cédrick</first><last>Fairon</last></editor>
       <editor><first>Anne</first><last>Dister</last></editor>
       <editor><first>Patrick</first><last>Watrin</last></editor>
       <publisher>ATALA</publisher>
@@ -906,7 +906,7 @@
     </paper>
     <paper id="7">
       <title>The Application of Singular Value Decomposition to <fixed-case>D</fixed-case>utch Noun-Adjective Matrices</title>
-      <author><first>Tim</first><last>Van de Cruys</last></author>
+      <author id="tim-van-de-cruys"><first>Tim</first><last>Van de Cruys</last></author>
       <pages>767–772</pages>
       <abstract>Automatic acquisition of semantics from text has received quite some attention in natural language processing. A lot of research has been done by looking at syntactically similar contexts. For example, semantically related nouns can be clustered by looking at the collocating adjectives. There are, however, two major problems with this approach : computational complexity and data sparseness. This paper describes the application of a mathematical technique called singular value decomposition, which has been succesfully applied in Information Retrieval to counter these problems. It is investigated whether this technique is also able to cluster nouns according to latent semantic dimensions in a reduced adjective space.</abstract>
       <url hash="5645832a">2006.jeptalnrecital-recitalposter.7</url>
diff --git a/data/xml/2006.tal.xml b/data/xml/2006.tal.xml
index 0ce11f5b84..98f1d9cdb3 100644
--- a/data/xml/2006.tal.xml
+++ b/data/xml/2006.tal.xml
@@ -48,7 +48,7 @@
     <paper id="5">
       <title>Comparabilité de corpus et fouille terminologique multilingue [Corpus comparability and multilingual terminology Mining]</title>
       <author><first>Emmanuel</first><last>Morin</last></author>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <pages>113–136</pages>
       <url hash="ed81b786">2006.tal-1.5</url>
       <language>fra</language>
@@ -84,8 +84,8 @@
     </meta>
     <paper id="1">
       <title>Computational Approaches to Discourse and Document Processing</title>
-      <author><first>Marie-Paule</first><last>Péry-Woodley</last></author>
-      <author><first>Donia</first><last>Scott</last></author>
+      <author id="marie-paule-pery-woodley"><first>Marie-Paule</first><last>Péry-Woodley</last></author>
+      <author id="donia-scott"><first>Donia</first><last>Scott</last></author>
       <pages>7–19</pages>
       <url hash="b7c08ba6">2006.tal-2.1</url>
       <bibkey>pery-woodley-scott-2006-computational</bibkey>
@@ -93,8 +93,8 @@
     <paper id="2">
       <title>Discourse-based answering of why-questions</title>
       <author><first>Suzan</first><last>Verberne</last></author>
-      <author><first>Lou</first><last>Boves</last></author>
-      <author><first>Peter-Arno</first><last>Coppen</last></author>
+      <author id="lou-boves"><first>Lou</first><last>Boves</last></author>
+      <author id="peter-arno-coppen"><first>Peter-Arno</first><last>Coppen</last></author>
       <author><first>Nelleke</first><last>Oostdijk</last></author>
       <pages>21–41</pages>
       <url hash="6e71583a">2006.tal-2.2</url>
@@ -105,8 +105,8 @@
       <author><first>Rashmi</first><last>Prasad</last></author>
       <author><first>Nikhil</first><last>Dinesh</last></author>
       <author><first>Alan</first><last>Lee</last></author>
-      <author><first>Aravind</first><last>Joshi</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <pages>43–64</pages>
       <url hash="7729a8fe">2006.tal-2.3</url>
       <bibkey>prasad-etal-2006-attribution</bibkey>
@@ -139,7 +139,7 @@
     <paper id="7">
       <title>Visualising discourse coherence in nonlinear documents</title>
       <author><first>Clara</first><last>Mancini</last></author>
-      <author><first>Donia</first><last>Scott</last></author>
+      <author id="donia-scott"><first>Donia</first><last>Scott</last></author>
       <author><first>Simon Buckingham</first><last>Shum</last></author>
       <pages>137–168</pages>
       <url hash="a45ae319">2006.tal-2.7</url>
@@ -195,7 +195,7 @@
     <paper id="2">
       <title>Une architecture de services pour mieux spécialiser les processus d’acquisition terminologique [A service architecture for better specialization of terminology acquisition processes]</title>
       <author><first>Farid</first><last>Cerbah</last></author>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <pages>39–61</pages>
       <url hash="27596113">2006.tal-3.2</url>
       <language>fra</language>
@@ -212,7 +212,7 @@
     </paper>
     <paper id="4">
       <title>Modélisation de la coordination dans les grammaires d’interaction [Modeling coordination in interaction grammars]</title>
-      <author><first>Joseph</first><last>Le Roux</last></author>
+      <author id="joseph-le-roux"><first>Joseph</first><last>Le Roux</last></author>
       <author><first>Guy</first><last>Perrier</last></author>
       <pages>89–113</pages>
       <url hash="4778aae6">2006.tal-3.4</url>
@@ -239,8 +239,8 @@
     </paper>
     <paper id="7">
       <title>Une grammaire partagée multitâche pour le traitement de la parole : application aux langues romanes [A multitask shared grammar for speech processing: application to romance languages]</title>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
-      <author><first>Manny</first><last>Rayner</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
       <author><first>Bruna</first><last>Novellas</last></author>
       <author><first>Marianne</first><last>Starlander</last></author>
       <author><first>Marianne</first><last>Santaholma</last></author>
@@ -253,8 +253,8 @@
     </paper>
     <paper id="8">
       <title>Traduction automatisée fondée sur le dialogue et documents auto-explicatifs : bilan du projet <fixed-case>LIDIA</fixed-case> [Machine translation based on dialogues and self-explanatory documents: an assessment of the <fixed-case>LIDIA</fixed-case> project]</title>
-      <author><first>Hervé</first><last>Blanchon</last></author>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="herve-blanchon"><first>Hervé</first><last>Blanchon</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <author><first>Ali</first><last>Choumane</last></author>
       <pages>175–204</pages>
       <url hash="007b2563">2006.tal-3.8</url>
diff --git a/data/xml/2007.iwslt.xml b/data/xml/2007.iwslt.xml
index edb2badd30..f90170240f 100644
--- a/data/xml/2007.iwslt.xml
+++ b/data/xml/2007.iwslt.xml
@@ -10,7 +10,7 @@
     </meta>
     <paper id="1">
       <title>Overview of the <fixed-case>IWSLT</fixed-case> 2007 evaluation campaign</title>
-      <author><first>Cameron Shaw</first><last>Fordyce</last></author>
+      <author id="cameron-shaw-fordyce"><first>Cameron Shaw</first><last>Fordyce</last></author>
       <url hash="855c090b">2007.iwslt-1.1</url>
       <abstract>In this paper we give an overview of the 2007 evaluation campaign for the International Workshop on Spoken Language Translation (IWSLT)1. As with previous evaluation campaigns, the primary focus of the workshop was the translation of spoken language in the travel domain. This year there were four language pairs; the translation of Chinese, Italian, Arabic, and Japanese into English. The input data consisted of the output of ASR systems for read speech and clean text. The exceptions were the challenge task of the Italian English language pair which used spontaneous speech ASR outputs and transcriptions and the Chinese English task which used only clean text. A new characteristic of this year’s evaluation campaign was an increased focus on the sharing of resources. Participants were requested to submit the data and supplementary resources used in building their systems so that the other participants might be able to take advantage of the same resources. A second new characteristic this year was the focus on the human evaluation of systems. Each primary run was judged in the human evaluation for every task using a straightforward ranking of systems. This year's workshop saw an increased participation over last year's workshop. This year 24 groups submitted runs to one or more of the tasks, compared to the 19 groups that submitted runs last year [1]. Automatic and human evaluation were carried out to measure MT performance under each condition, ASR system outputs for read speech, spontaneous travel dialogues, and clean text.</abstract>
       <bibkey>fordyce-2007-overview</bibkey>
@@ -20,8 +20,8 @@
       <author><first>Alicia</first><last>Pérez</last></author>
       <author><first>Víctor</first><last>Guijarrubia</last></author>
       <author><first>Raquel</first><last>Justo</last></author>
-      <author><first>M. Inés</first><last>Torres</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="m-ines-torres"><first>M. Inés</first><last>Torres</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <url hash="48622ccd">2007.iwslt-1.2</url>
       <abstract>The goal of this work is to improve current translation models by taking into account additional knowledge sources such as semantically motivated segmentation or statistical categorization. Specifically, two different approaches are discussed. On the one hand, phrase-based approach, and on the other hand, categorization. For both approaches, both statistical and linguistic alternatives are explored. As for translation framework, finite-state transducers are considered. These are versatile models that can be easily integrated on-the-fly with acoustic models for speech translation purposes. In what the experimental framework concerns, all the models presented were evaluated and compared taking confidence intervals into account.</abstract>
       <bibkey>perez-etal-2007-comparison</bibkey>
@@ -30,7 +30,7 @@
       <title>Improved chunk-level reordering for statistical machine translation</title>
       <author><first>Yuqi</first><last>Zhang</last></author>
       <author><first>Richard</first><last>Zens</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <url hash="6c4dba26">2007.iwslt-1.3</url>
       <abstract>Inspired by previous chunk-level reordering approaches to statistical machine translation, this paper presents two methods to improve the reordering at the chunk level. By introducing a new lattice weighting factor and by reordering the training source data, an improvement is reported on TER and BLEU. Compared to the previous chunklevel reordering approach, the BLEU score improves 1.4% absolutely. The translation results are reported on IWSLT Chinese-English task.</abstract>
       <bibkey>zhang-etal-2007-improved</bibkey>
@@ -41,13 +41,13 @@
       <author><first>Matthais</first><last>Eck</last></author>
       <author><first>Paisarn</first><last>Charoenpornsawat</last></author>
       <author><first>Thilo</first><last>Köhler</last></author>
-      <author><first>Sebastian</first><last>Stüker</last></author>
-      <author><first>ThuyLinh</first><last>Nguyen</last></author>
+      <author id="sebastian-stuker"><first>Sebastian</first><last>Stüker</last></author>
+      <author id="thuylinh-nguyen"><first>ThuyLinh</first><last>Nguyen</last></author>
       <author><first>Roger</first><last>Hsiao</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <author><first>Tanja</first><last>Schultz</last></author>
-      <author><first>Alan W.</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W.</first><last>Black</last></author>
       <url hash="4a39ee37">2007.iwslt-1.4</url>
       <abstract>The paper describes our portable two-way speech-to-speech translation system using a completely eyes-free/hands-free user interface. This system translates between the language pair English and Iraqi Arabic as well as between English and Farsi, and was built within the framework of the DARPA TransTac program. The Farsi language support was developed within a 90-day period, testing our ability to rapidly support new languages. The paper gives an overview of the system’s components along with the individual component objective measures and a discussion of issues relevant for the overall usage of the system. We found that usability, flexibility, and robustness serve as severe constraints on system architecture and design.</abstract>
       <bibkey>bach-etal-2007-cmu</bibkey>
@@ -56,7 +56,7 @@
       <title><fixed-case>CASIA</fixed-case> phrase-based <fixed-case>SMT</fixed-case> system for <fixed-case>IWSLT</fixed-case>’07</title>
       <author><first>Yu</first><last>Zhou</last></author>
       <author><first>Yanqing</first><last>He</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <url hash="65cf4c25">2007.iwslt-1.5</url>
       <bibkey>zhou-etal-2007-casia</bibkey>
     </paper>
@@ -82,7 +82,7 @@
       <author><first>Jun</first><last>Sun</last></author>
       <author><first>Hongfei</first><last>Jiang</last></author>
       <author><first>Min</first><last>Zhang</last></author>
-      <author><first>Ai Ti</first><last>Aw</last></author>
+      <author id="aiti-aw"><first>Ai Ti</first><last>Aw</last></author>
       <url hash="0b19016b">2007.iwslt-1.8</url>
       <abstract>In this paper, we describe the system and approach used by Institute for Infocomm Research (I2R) for the IWSLT 2007 spoken language evaluation campaign. A multi-pass approach is exploited to generate and select best translation. First, we use two decoders namely the open source Moses and an in-home syntax-based decoder to generate N-best lists. Next we spawn new translation entries through a word-based n-gram language model estimated on the former N-best entries. Finally, we join the N-best lists from the previous two passes, and select the best translation by rescoring them with additional feature functions. In particular, this paper reports our effort on new translation entry generation and system combination. The performance on development and test sets are reported. The system was ranked first with respect to the BLEU measure in Chinese-to-English open data track.</abstract>
       <bibkey>chen-etal-2007-i2r</bibkey>
@@ -91,20 +91,20 @@
       <title>The <fixed-case>CMU</fixed-case>-<fixed-case>UKA</fixed-case> statistical machine translation systems for <fixed-case>IWSLT</fixed-case> 2007</title>
       <author><first>Ian</first><last>Lane</last></author>
       <author><first>Andreas</first><last>Zollmann</last></author>
-      <author><first>Thuy Linh</first><last>Nguyen</last></author>
+      <author id="thuylinh-nguyen"><first>Thuy Linh</first><last>Nguyen</last></author>
       <author><first>Nguyen</first><last>Bach</last></author>
       <author><first>Ashish</first><last>Venugopal</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <author><first>Kay</first><last>Rottmann</last></author>
-      <author><first>Ying</first><last>Zhang</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="ying-zhang"><first>Ying</first><last>Zhang</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <url hash="cbb0f523">2007.iwslt-1.9</url>
       <abstract>This paper describes the CMU-UKA statistical machine translation systems submitted to the IWSLT 2007 evaluation campaign. Systems were submitted for three language-pairs: Japanese→English, Chinese→English and Arabic→English. All systems were based on a common phrase-based SMT (statistical machine translation) framework but for each language-pair a specific research problem was tackled. For Japanese→English we focused on two problems: first, punctuation recovery, and second, how to incorporate topic-knowledge into the translation framework. Our Chinese→English submission focused on syntax-augmented SMT and for the Arabic→English task we focused on incorporating morphological-decomposition into the SMT framework. This research strategy enabled us to evaluate a wide variety of approaches which proved effective for the language pairs they were evaluated on.</abstract>
       <bibkey>lane-etal-2007-cmu</bibkey>
     </paper>
     <paper id="10">
       <title><fixed-case>M</fixed-case>a<fixed-case>T</fixed-case>r<fixed-case>E</fixed-case>x: the <fixed-case>DCU</fixed-case> machine translation system for <fixed-case>IWSLT</fixed-case> 2007</title>
-      <author><first>Hany</first><last>Hassan</last></author>
+      <author id="hany-hassan-awadalla"><first>Hany</first><last>Hassan</last></author>
       <author><first>Yanjun</first><last>Ma</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <url hash="aa2d947a">2007.iwslt-1.10</url>
@@ -143,7 +143,7 @@
       <title>The <fixed-case>MIT</fixed-case>-<fixed-case>LL</fixed-case>/<fixed-case>AFRL</fixed-case> <fixed-case>IWSLT</fixed-case>-2007 <fixed-case>MT</fixed-case> system</title>
       <author><first>Wade</first><last>Shen</last></author>
       <author><first>Brian</first><last>Delaney</last></author>
-      <author><first>Tim</first><last>Anderson</last></author>
+      <author id="tim-anderson"><first>Tim</first><last>Anderson</last></author>
       <author><first>Ray</first><last>Slyh</last></author>
       <url hash="263c336a">2007.iwslt-1.14</url>
       <abstract>The MIT-LL/AFRL MT system implements a standard phrase-based, statistical translation model. It incorporates a number of extensions that improve performance for speech-based translation. During this evaluation our efforts focused on the rapid porting of our SMT system to a new language (Arabic) and novel approaches to translation from speech input. This paper discusses the architecture of the MIT-LL/AFRL MT system, improvements over our 2006 system, and experiments we ran during the IWSLT-2007 evaluation. Specifically, we focus on 1) experiments comparing the performance of confusion network decoding and direct lattice decoding techniques for machine translation of speech, 2) the application of lightweight morphology for Arabic MT preprocessing and 3) improved confusion network decoding.</abstract>
@@ -154,11 +154,11 @@
       <author><first>Andrew</first><last>Finch</last></author>
       <author><first>Etienne</first><last>Denoual</last></author>
       <author><first>Hideo</first><last>Okuma</last></author>
-      <author><first>Michael</first><last>Paul</last></author>
-      <author><first>Hirofumi</first><last>Yamamoto</last></author>
+      <author id="michael-paul"><first>Michael</first><last>Paul</last></author>
+      <author id="hirofumi-yamamoto"><first>Hirofumi</first><last>Yamamoto</last></author>
       <author><first>Keiji</first><last>Yasuda</last></author>
       <author><first>Ruiqiang</first><last>Zhang</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <url hash="15f6db74">2007.iwslt-1.15</url>
       <abstract>This paper describes the NiCT-ATR statistical machine translation (SMT) system used for the IWSLT 2007 evaluation campaign. We participated in three of the four language pair translation tasks (CE, JE, and IE). We used a phrase-based SMT system using log-linear feature models for all tracks. This year we decoded from the ASR n-best lists in the JE track and found a gain in performance. We also applied some new techniques to facilitate the use of out-of-domain external resources by model combination and also by utilizing a huge corpus of n-grams provided by Google Inc.. Using these resources gave mixed results that depended on the technique also the language pair however, in some cases we achieved consistently positive results. The results from model-interpolation in particular were very promising.</abstract>
       <bibkey>finch-etal-2007-nict</bibkey>
@@ -179,11 +179,11 @@
       <author><first>Zhongjun</first><last>He</last></author>
       <author><first>Haitao</first><last>Mi</last></author>
       <author id="yang-liu-ict"><first>Yang</first><last>Liu</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Weihua</first><last>Luo</last></author>
       <author><first>Yun</first><last>Huang</last></author>
       <author><first>Zhixiang</first><last>Ren</last></author>
-      <author><first>Yajuan</first><last>Lu</last></author>
+      <author id="yajuan-lu"><first>Yajuan</first><last>Lu</last></author>
       <author><first>Qun</first><last>Liu</last></author>
       <url hash="3b013c2d">2007.iwslt-1.17</url>
       <abstract>In this paper, we give an overview of the ICT statistical machine translation systems for the evaluation campaign of the International Workshop on Spoken Language Translation (IWSLT) 2007. In this year’s evaluation, we participated in the Chinese-English transcript translation task, and developed three systems based on different techniques: a formally syntax-based system Bruin, an extended phrase-based system Confucius and a linguistically syntax-based system Lynx. We will describe the models of these three systems, and compare their performance in detail. We set Bruin as our primary system, which ranks 2 among the 15 primary results according to the official evaluation results.</abstract>
@@ -191,25 +191,25 @@
     </paper>
     <paper id="18">
       <title>The <fixed-case>INESC</fixed-case>-<fixed-case>ID</fixed-case> <fixed-case>IWSLT</fixed-case>07 <fixed-case>SMT</fixed-case> system</title>
-      <author><first>João V.</first><last>Graça</last></author>
+      <author id="joao-graca"><first>João V.</first><last>Graça</last></author>
       <author><first>Diamantino</first><last>Caseiro</last></author>
-      <author><first>Luísa</first><last>Coheur</last></author>
+      <author id="luisa-coheur"><first>Luísa</first><last>Coheur</last></author>
       <url hash="2b88182d">2007.iwslt-1.18</url>
       <abstract>We present the machine translation system used by L2F from INESC-ID in the evaluation campaign of the International Workshop on Spoken Language Translation (2007), in the task of translating spontaneous conversations in the travel domain from Italian to English.</abstract>
       <bibkey>graca-etal-2007-inesc</bibkey>
     </paper>
     <paper id="19">
       <title>Using word posterior probabilities in lattice translation</title>
-      <author><first>Vicente</first><last>Alabau</last></author>
+      <author id="vicent-alabau"><first>Vicente</first><last>Alabau</last></author>
       <author><first>Alberto</first><last>Sanchis</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <url hash="6bbb2e06">2007.iwslt-1.19</url>
       <abstract>In this paper we describe the statistical machine translation system developed at ITI/UPV, which aims especially at speech recognition and statistical machine translation integration, for the evaluation campaign of the International Workshop on Spoken Language Translation (2007). The system we have developed takes advantage of an improved word lattice representation that uses word posterior probabilities. These word posterior probabilities are then added as a feature to a log-linear model. This model includes a stochastic finite-state transducer which allows an easy lattice integration. Furthermore, it provides a statistical phrase-based reordering model that is able to perform local reorderings of the output. We have tested this model on the Italian-English corpus, for clean text, 1-best ASR and lattice ASR inputs. The results and conclusions of such experiments are reported at the end of this paper.</abstract>
       <bibkey>alabau-etal-2007-using</bibkey>
     </paper>
     <paper id="20">
       <title>The <fixed-case>LIG</fixed-case> <fixed-case>A</fixed-case>rabic/<fixed-case>E</fixed-case>nglish speech translation system at <fixed-case>IWSLT</fixed-case>07</title>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <author><first>Amar</first><last>Mahdhaoui</last></author>
       <author><first>Viet-Bac</first><last>Le</last></author>
       <url hash="5b5cbc8c">2007.iwslt-1.20</url>
@@ -218,7 +218,7 @@
     </paper>
     <paper id="21">
       <title><fixed-case>NUDT</fixed-case> machine translation system for <fixed-case>IWSLT</fixed-case>2007</title>
-      <author><first>Wen-Han</first><last>Chao</last></author>
+      <author id="wenhan-chao"><first>Wen-Han</first><last>Chao</last></author>
       <author><first>Zhou-Jun</first><last>Li</last></author>
       <url hash="6ef5aed3">2007.iwslt-1.21</url>
       <abstract>In this paper, we describe our machine translation system which was used for the Chinese-to-English task in the IWSLT2007 evaluation campaign. The system is a statistical machine translation (SMT) system, while containing an example-based decoder. In this way, it will help to solve the re-ordering problem and other problems for spoken language MT, such as lots of omissions, idioms etc. We report the results of the system for the provided evaluation sets.</abstract>
@@ -227,8 +227,8 @@
     <paper id="22">
       <title><fixed-case>MISTRAL</fixed-case>: a lattice translation system for <fixed-case>IWSLT</fixed-case> 2007</title>
       <author><first>Alexandre</first><last>Patry</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
-      <author><first>Frédéric</first><last>Béchet</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
+      <author id="frederic-bechet"><first>Frédéric</first><last>Béchet</last></author>
       <url hash="22e059fe">2007.iwslt-1.22</url>
       <abstract>This paper describes MISTRAL, the lattice translation system that we developed for the Italian-English track of the International Workshop on Spoken Language Translation 2007. MISTRAL is a discriminative phrase-based system that translates a source word lattice in two passes. The first pass extracts a list of top ranked sentence pairs from the lattice and the second pass rescores this list with more complex features. Our experiments show that our system, when translating pruned lattices, is at least as good as a fair baseline that translates the first ranked sentences returned by a speech recognition system.</abstract>
       <bibkey>patry-etal-2007-mistral</bibkey>
@@ -257,7 +257,7 @@
       <author><first>David</first><last>Vilar</last></author>
       <author><first>Gregor</first><last>Leusch</last></author>
       <author><first>Yuqi</first><last>Zhang</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <url hash="9a591e8b">2007.iwslt-1.25</url>
       <abstract>The RWTH system for the IWSLT 2007 evaluation is a combination of several statistical machine translation systems. The combination includes Phrase-Based models, a n-gram translation model and a hierarchical phrase model. We describe the individual systems and the method that was used for combining the system outputs. Compared to our 2006 system, we newly introduce a hierarchical phrase-based translation model and show improvements in system combination for Machine Translation. RWTH participated in the Italian-to-English and Chinese-to-English translation directions.</abstract>
       <bibkey>mauser-etal-2007-rwth</bibkey>
@@ -265,12 +265,12 @@
     <paper id="26">
       <title>The <fixed-case>TALP</fixed-case> ngram-based <fixed-case>SMT</fixed-case> system for <fixed-case>IWSLT</fixed-case> 2007</title>
       <author><first>Patrik</first><last>Lambert</last></author>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
-      <author><first>Josep M.</first><last>Crego</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="josep-m-crego"><first>Josep M.</first><last>Crego</last></author>
       <author><first>Maxim</first><last>Khalilov</last></author>
-      <author><first>José B.</first><last>Mariño</last></author>
-      <author><first>Rafael E.</first><last>Banchs</last></author>
-      <author><first>José A. R.</first><last>Fonollosa</last></author>
+      <author id="jose-b-marino"><first>José B.</first><last>Mariño</last></author>
+      <author id="rafael-e-banchs"><first>Rafael E.</first><last>Banchs</last></author>
+      <author id="jose-a-r-fonollosa"><first>José A. R.</first><last>Fonollosa</last></author>
       <author><first>Holger</first><last>Schwenk</last></author>
       <url hash="e95a9ab7">2007.iwslt-1.26</url>
       <abstract>This paper describes TALPtuples, the 2007 N-gram-based statistical machine translation system developed at the TALP Research Center of the UPC (Universitat Polite`cnica de Catalunya) in Barcelona. Emphasis is put on improvements and extensions of the system of previous years. Mainly, these include optimizing alignment parameters in function of translation metric scores and rescoring with a neural network language model. Results on two translation directions are reported, namely from Arabic and Chinese into English, thoroughly explaining all language-related preprocessing and translation schemes.</abstract>
@@ -287,7 +287,7 @@
     </paper>
     <paper id="28">
       <title>The <fixed-case>U</fixed-case>niversity of <fixed-case>M</fixed-case>aryland translation system for <fixed-case>IWSLT</fixed-case> 2007</title>
-      <author><first>Christopher J.</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Christopher J.</first><last>Dyer</last></author>
       <url hash="a3231d19">2007.iwslt-1.28</url>
       <abstract>This paper describes the University of Maryland statistical machine translation system used in the IWSLT 2007 evaluation. Our focus was threefold: using hierarchical phrase-based models in spoken language translation, the incorporation of sub-lexical information in model estimation via morphological analysis (Arabic) and word and character segmentation (Chinese), and the use of n-gram sequence models for source-side punctuation prediction. Our efforts yield significant improvements in Chinese-English and Arabic-English translation tasks for both spoken language and human transcription conditions.</abstract>
       <bibkey>dyer-2007-university</bibkey>
diff --git a/data/xml/2007.jeptalnrecital.xml b/data/xml/2007.jeptalnrecital.xml
index 0bdafb6908..4398b3e49a 100644
--- a/data/xml/2007.jeptalnrecital.xml
+++ b/data/xml/2007.jeptalnrecital.xml
@@ -19,7 +19,7 @@
       <title>Exploiting structural meeting-specific features for topic segmentation</title>
       <author><first>Maria</first><last>Georgescul</last></author>
       <author><first>Alexander</first><last>Clarck</last></author>
-      <author><first>Susan</first><last>Armstrong</last></author>
+      <author id="susan-armstrong"><first>Susan</first><last>Armstrong</last></author>
       <pages>15–24</pages>
       <abstract>In this article we address the task of automatic text structuring into linear and non-overlapping thematic episodes. Our investigation reports on the use of various lexical, acoustic and syntactic features, and makes a comparison of how these features influence performance of automatic topic segmentation. Using datasets containing multi-party meeting transcriptions, we base our experiments on a proven state-of-the-art approach using support vector classification.</abstract>
       <url hash="d465a7c6">2007.jeptalnrecital-long.1</url>
@@ -29,7 +29,7 @@
       <title>Énergie textuelle de mémoires associatives</title>
       <author><first>Silvia</first><last>Fernández</last></author>
       <author><first>Eric</first><last>Sanjuan</last></author>
-      <author><first>Juan-Manuel</first><last>Torres-Moreno</last></author>
+      <author id="juan-manuel-torres-moreno"><first>Juan-Manuel</first><last>Torres-Moreno</last></author>
       <pages>25–34</pages>
       <abstract>Dans cet article, nous présentons une approche de réseaux de neurones inspirée de la physique statistique de systèmes magnétiques pour étudier des problèmes fondamentaux du Traitement Automatique de la Langue Naturelle. L’algorithme modélise un document comme un système de neurones où l’on déduit l’énergie textuelle. Nous avons appliqué cette approche aux problèmes de résumé automatique et de détection de frontières thématiques. Les résultats sont très encourageants.</abstract>
       <url hash="a7a3676e">2007.jeptalnrecital-long.2</url>
@@ -48,8 +48,8 @@
     </paper>
     <paper id="4">
       <title>Identifier les pronoms anaphoriques et trouver leurs antécédents : l’intérêt de la classification bayésienne</title>
-      <author><first>Davy</first><last>Weissenbacher</last></author>
-      <author><first>Adeline</first><last>Nazarenko</last></author>
+      <author id="davy-weissenbacher"><first>Davy</first><last>Weissenbacher</last></author>
+      <author id="adeline-nazarenko"><first>Adeline</first><last>Nazarenko</last></author>
       <pages>45–54</pages>
       <abstract>On oppose souvent en TAL les systèmes à base de connaissances linguistiques et ceux qui reposent sur des indices de surface. Chaque approche a ses limites et ses avantages. Nous proposons dans cet article une nouvelle approche qui repose sur les réseaux bayésiens et qui permet de combiner au sein d’une même représentation ces deux types d’informations hétérogènes et complémentaires. Nous justifions l’intérêt de notre approche en comparant les performances du réseau bayésien à celles des systèmes de l’état de l’art, sur un problème difficile du TAL, celui de la résolution d’anaphore.</abstract>
       <url hash="c9e0890d">2007.jeptalnrecital-long.4</url>
@@ -67,7 +67,7 @@
     </paper>
     <paper id="6">
       <title>Structures de traits typées et morphologie à partitions</title>
-      <author><first>François</first><last>Barthélemy</last></author>
+      <author id="francois-barthelemy"><first>François</first><last>Barthélemy</last></author>
       <pages>65–74</pages>
       <abstract>Les structures de traits typées sont une façon abstraite et agréable de représenter une information partielle. Dans cet article, nous montrons comment la combinaison de deux techniques relativement classiques permet de définir une variante de morphologie à deux niveaux intégrant harmonieusement des structures de traits et se compilant en une machine finie. La première de ces techniques est la compilation de structure de traits en expressions régulières, la seconde est la morphologie à partition. Nous illustrons au moyen de deux exemples l’expressivité d’un formalisme qui rapproche les grammaires à deux niveaux des grammaires d’unification.</abstract>
       <url hash="63da3cdd">2007.jeptalnrecital-long.6</url>
@@ -77,8 +77,8 @@
     <paper id="7">
       <title>Analyse morphosémantique des composés savants : transposition du français à l’anglais</title>
       <author><first>Louise</first><last>Deléger</last></author>
-      <author><first>Fiammetta</first><last>Namer</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="fiammetta-namer"><first>Fiammetta</first><last>Namer</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <pages>75–84</pages>
       <abstract>La plupart des vocabulaires spécialisés comprennent une part importante de lexèmes morphologiquement complexes, construits à partir de racines grecques et latines, qu’on appelle « composés savants ». Une analyse morphosémantique permet de décomposer et de donner des définitions à ces lexèmes, et semble pouvoir être appliquée de façon similaire aux composés de plusieurs langues. Cet article présente l’adaptation d’un analyseur morphosémantique, initialement dédié au français (DériF), à l’analyse de composés savants médicaux anglais, illustrant ainsi la similarité de structure de ces composés dans des langues européennes proches. Nous exposons les principes de cette transposition et ses performances. L’analyseur a été testé sur un ensemble de 1299 lexèmes extraits de la terminologie médicale WHO-ART : 859 ont pu être décomposés et définis, dont 675 avec succès. Outre une simple transposition d’une langue à l’autre, la méthode montre la potentialité d’un système multilingue.</abstract>
       <url hash="174c6a93">2007.jeptalnrecital-long.7</url>
@@ -88,7 +88,7 @@
     <paper id="8">
       <title>A tool for detecting <fixed-case>F</fixed-case>rench-<fixed-case>E</fixed-case>nglish cognates and false friends</title>
       <author><first>Oana</first><last>Frunza</last></author>
-      <author><first>Diana</first><last>Inkpen</last></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last></author>
       <pages>85–94</pages>
       <abstract>Cognates are pairs of words in different languages similar in spelling and meaning. They can help a second-language learner on the tasks of vocabulary expansion and reading comprehension. False friends are pairs of words that have similar spelling but different meanings. Partial cognates are pairs of words in two languages that have the same meaning in some, but not all contexts. In this article we present a method to automatically classify a pair of words as cognates or false friends, by using several measures of orthographic similarity as features for classification. We use this method to create complete lists of cognates and false friends between two languages. We also disambiguate partial cognates in context. We applied all our methods to French and English, but they can be applied to other pairs of languages as well. We built a tool that takes the produced lists and annotates a French text with equivalent English cognates or false friends, in order to help second-language learners improve their reading comprehension skills and retention rate.</abstract>
       <url hash="5ecea481">2007.jeptalnrecital-long.8</url>
@@ -96,7 +96,7 @@
     </paper>
     <paper id="9">
       <title>Enrichissement d’un lexique bilingue par analogie</title>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <author><first>Alexandre</first><last>Patry</last></author>
       <pages>95–104</pages>
       <abstract>La présence de mots inconnus dans les applications langagières représente un défi de taille bien connu auquel n’échappe pas la traduction automatique. Les systèmes professionnels de traduction offrent à cet effet à leurs utilisateurs la possibilité d’enrichir un lexique de base avec de nouvelles entrées. Récemment, Stroppa et Yvon (2005) démontraient l’intérêt du raisonnement par analogie pour l’analyse morphologique d’une langue. Dans cette étude, nous montrons que le raisonnement par analogie offre également une réponse adaptée au problème de la traduction d’entrées lexicales inconnues.</abstract>
@@ -127,7 +127,7 @@
     <paper id="12">
       <title>Vers un méta-<fixed-case>EDL</fixed-case> complet, puis un <fixed-case>EDL</fixed-case> universel pour la <fixed-case>TAO</fixed-case></title>
       <author><first>Hong-Thai</first><last>Nguyen</last></author>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <pages>125–134</pages>
       <abstract>Un “méta-EDL” (méta-Environnement de Développement Linguiciel) pour la TAO permet de piloter à distance un ou plusieurs EDL pour construire des systèmes de TAO hétérogènes. Partant de CASH, un méta-EDL dédié à Ariane-G5, et de WICALE 1.0, un premier méta-EDL générique mais aux fonctionnalités minimales, nous dégageons les problèmes liés à l’ajout de fonctionnalités riches comme l’édition et la navigation en local, et donnons une solution implémentée dans WICALE 2.0. Nous y intégrons maintenant une base lexicale pour les systèmes à « pivot lexical », comme UNL/U++. Un but à plus long terme est de passer d’un tel méta-EDL générique multifonctionnel à un EDL « universel », ce qui suppose la réingénierie des compilateurs et des moteurs des langages spécialisés pour la programmation linguistique (LSPL) supportés par les divers EDL.</abstract>
       <url hash="ed024e73">2007.jeptalnrecital-long.12</url>
@@ -137,7 +137,7 @@
     <paper id="13">
       <title>Aides à la navigation dans un corpus de transcriptions d’oral</title>
       <author><first>Frederik</first><last>Cailliau</last></author>
-      <author><first>Claude</first><last>De Loupy</last></author>
+      <author id="claude-de-loupy"><first>Claude</first><last>De Loupy</last></author>
       <pages>135–144</pages>
       <abstract>Dans cet article, nous évaluons les performances de fonctionnalités d’aide à la navigation dans un contexte de recherche dans un corpus audio. Nous montrons que les particularités de la transcription et, en particulier les erreurs, conduisent à une dégradation parfois importante des performances des outils d’analyse. Si la navigation par concepts reste dans des niveaux d’erreur acceptables, la reconnaissance des entités nommées, utilisée pour l’aide à la lecture, voit ses performances fortement baisser. Notre remise en doute de la portabilité de ces fonctions à un corpus oral est néanmoins atténuée par la nature même du corpus qui incite à considérer que toute méthodes permettant de réduire le temps d’accès à l’information est pertinente, même si les outils utilisés sont imparfaits.</abstract>
       <url hash="e676e061">2007.jeptalnrecital-long.13</url>
@@ -185,7 +185,7 @@
       <title>Disambiguating automatic semantic annotation based on a thesaurus structure</title>
       <author><first>Véronique</first><last>Malaisé</last></author>
       <author><first>Luit</first><last>Gazendam</last></author>
-      <author><first>Hennie</first><last>Brugman</last></author>
+      <author id="hennie-brugman"><first>Hennie</first><last>Brugman</last></author>
       <pages>185–194</pages>
       <abstract>The use/use for relationship a thesaurus is usually more complex than the (para-) synonymy recommended in the ISO-2788 standard describing the content of these controlled vocabularies. The fact that a non preferred term can refer to multiple preferred terms (only the latter are relevant in controlled indexing) makes this relationship difficult to use in automatic annotation applications : it generates ambiguity cases. In this paper, we present the CARROT algorithm, meant to rank the output of our Information Extraction pipeline, and how this algorithm can be used to select the relevant preferred term out of different possibilities. This selection is meant to provide suggestions of keywords to human annotators, in order to ease and speed up their daily process and is based on the structure of their thesaurus. We achieve a 95 % success, and discuss these results along with perspectives for this experiment.</abstract>
       <url hash="76cac206">2007.jeptalnrecital-long.18</url>
@@ -213,7 +213,7 @@
     <paper id="21">
       <title>Comparaison du Lexique-Grammaire des verbes pleins et de <fixed-case>DICOVALENCE</fixed-case> : vers une intégration dans le Lefff</title>
       <author><first>Laurence</first><last>Danlos</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <pages>215–224</pages>
       <abstract>Cet article compare le Lexique-Grammaire des verbes pleins et DICOVALENCE, deux ressources lexicales syntaxiques pour le français développées par des linguistes depuis de nombreuses années. Nous étudions en particulier les divergences et les empiètements des modèles lexicaux sous-jacents. Puis nous présentons le Lefff , lexique syntaxique à grande échelle pour le TAL, et son propre modèle lexical. Nous montrons que ce modèle est à même d’intégrer les informations lexicales présentes dans le Lexique-Grammaire et dans DICOVALENCE. Nous présentons les résultats des premiers travaux effectués en ce sens, avec pour objectif à terme la constitution d’un lexique syntaxique de référence pour le TAL.</abstract>
       <url hash="adf20b11">2007.jeptalnrecital-long.21</url>
@@ -245,9 +245,9 @@
     </paper>
     <paper id="24">
       <title>Résolution de la référence dans des dialogues homme-machine : évaluation sur corpus de deux approches symbolique et probabiliste</title>
-      <author><first>Alexandre</first><last>Denis</last></author>
-      <author><first>Frédéric</first><last>Béchet</last></author>
-      <author><first>Matthieu</first><last>Quignard</last></author>
+      <author id="alexandre-denis"><first>Alexandre</first><last>Denis</last></author>
+      <author id="frederic-bechet"><first>Frédéric</first><last>Béchet</last></author>
+      <author id="matthieu-quignard"><first>Matthieu</first><last>Quignard</last></author>
       <pages>245–254</pages>
       <abstract>Cet article décrit deux approches, l’une numérique, l’autre symbolique, traitant le problème de la résolution de la référence dans un cadre de dialogue homme-machine. L’analyse des résultats obtenus sur le corpus MEDIA montre la complémentarité des deux systèmes développés : robustesse aux erreurs et hypothèses multiples pour l’approche numérique ; modélisation de phénomènes complexes et interprétation complète pour l’approche symbolique.</abstract>
       <url hash="8d5184d1">2007.jeptalnrecital-long.24</url>
@@ -256,7 +256,7 @@
     </paper>
     <paper id="25">
       <title>Annotation précise du français en sémantique de rôles par projection cross-linguistique</title>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <author><first>Guillaume</first><last>Pitel</last></author>
       <pages>255–264</pages>
       <abstract>Dans le paradigme FrameNet, cet article aborde le problème de l’annotation précise et automatique de rôles sémantiques dans une langue sans lexique FrameNet existant. Nous évaluons la méthode proposée par Padó et Lapata (2005, 2006), fondée sur la projection de rôles et appliquée initialement à la paire anglais-allemand. Nous testons sa généralisabilité du point de vue (a) des langues, en l’appliquant à la paire (anglais-français) et (b) de la qualité de la source, en utilisant une annotation automatique du côté anglais. Les expériences montrent des résultats à la hauteur de ceux obtenus pour l’allemand, nous permettant de conclure que cette approche présente un grand potentiel pour réduire la quantité de travail nécessaire à la création de telles ressources dans de nombreuses langues.</abstract>
@@ -380,7 +380,7 @@
     <paper id="37">
       <title>Collocation translation based on sentence alignment and parsing</title>
       <author><first>Violeta</first><last>Seretan</last></author>
-      <author><first>Éric</first><last>Wehrli</last></author>
+      <author id="eric-wehrli"><first>Éric</first><last>Wehrli</last></author>
       <pages>375–384</pages>
       <abstract>Bien que de nombreux efforts aient été déployés pour extraire des collocations à partir de corpus de textes, seule une minorité de travaux se préoccupent aussi de rendre le résultat de l’extraction prêt à être utilisé dans les applications TAL qui pourraient en bénéficier, telles que la traduction automatique. Cet article décrit une méthode précise d’identification de la traduction des collocations dans un corpus parallèle, qui présente les avantages suivants : elle peut traiter des collocation flexibles (et pas seulement figées) ; elle a besoin de ressources limitées et d’un pouvoir de calcul raisonnable (pas d’alignement complet, pas d’entraînement) ; elle peut être appliquée à plusieurs paires des langues et fonctionne même en l’absence de dictionnaires bilingues. La méthode est basée sur l’information syntaxique provenant du parseur multilingue Fips. L’évaluation effectuée sur 4000 collocations de type verbe-objet correspondant à plusieurs paires de langues a montré une précision moyenne de 89.8% et une couverture satisfaisante (70.9%). Ces résultats sont supérieurs à ceux enregistrés dans l’évaluation d’autres méthodes de traduction de collocations.</abstract>
       <url hash="ae226aab">2007.jeptalnrecital-long.37</url>
@@ -390,7 +390,7 @@
     <paper id="38">
       <title>Utilisation d’une approche basée sur la recherche cross-lingue d’information pour l’alignement de phrases à partir de textes bilingues Arabe-Français</title>
       <author><first>Nasredine</first><last>Semmar</last></author>
-      <author><first>Christian</first><last>Fluhr</last></author>
+      <author id="christian-fluhr"><first>Christian</first><last>Fluhr</last></author>
       <pages>385–394</pages>
       <abstract>L’alignement de phrases à partir de textes bilingues consiste à reconnaître les phrases qui sont traductions les unes des autres. Cet article présente une nouvelle approche pour aligner les phrases d’un corpus parallèle. Cette approche est basée sur la recherche crosslingue d’information et consiste à construire une base de données des phrases du texte cible et considérer chaque phrase du texte source comme une requête à cette base. La recherche crosslingue utilise un analyseur linguistique et un moteur de recherche. L’analyseur linguistique traite aussi bien les documents à indexer que les requêtes et produit un ensemble de lemmes normalisés, un ensemble d’entités nommées et un ensemble de mots composés avec leurs étiquettes morpho-syntaxiques. Le moteur de recherche construit les fichiers inversés des documents en se basant sur leur analyse linguistique et retrouve les documents pertinents à partir de leur indexes. L’aligneur de phrases a été évalué sur un corpus parallèle Arabe-Français et les résultats obtenus montrent que 97% des phrases ont été correctement alignées.</abstract>
       <url hash="aa1d3efe">2007.jeptalnrecital-long.38</url>
@@ -435,7 +435,7 @@
     <paper id="3">
       <title>Segmentation en super-chunks</title>
       <author><first>Olivier</first><last>Blanc</last></author>
-      <author><first>Matthieu</first><last>Constant</last></author>
+      <author id="matthieu-constant"><first>Matthieu</first><last>Constant</last></author>
       <author><first>Patrick</first><last>Watrin</last></author>
       <pages>33–42</pages>
       <abstract>Depuis l’analyseur développé par Harris à la fin des années 50, les unités polylexicales ont peu à peu été intégrées aux analyseurs syntaxiques. Cependant, pour la plupart, elles sont encore restreintes aux mots composés qui sont plus stables et moins nombreux. Toutefois, la langue est remplie d’expressions semi-figées qui forment également des unités sémantiques : les expressions adverbiales et les collocations. De même que pour les mots composés traditionnels, l’identification de ces structures limite la complexité combinatoire induite par l’ambiguïté lexicale. Dans cet article, nous détaillons une expérience qui intègre ces notions dans un processus de segmentation en super-chunks, préalable à l’analyse syntaxique. Nous montrons que notre chunker, développé pour le français, atteint une précision et un rappel de 92,9 % et 98,7 %, respectivement. Par ailleurs, les unités polylexicales réalisent 36,6 % des attachements internes aux constituants nominaux et prépositionnels.</abstract>
@@ -447,7 +447,7 @@
       <title>Détection et prédiction de la satisfaction des usagers dans les dialogues Personne-Machine</title>
       <author><first>Narjès</first><last>Boufaden</last></author>
       <author><first>Truong</first><last>Le Hoang</last></author>
-      <author><first>Pierre</first><last>Dumouchel</last></author>
+      <author id="pierre-dumouchel"><first>Pierre</first><last>Dumouchel</last></author>
       <pages>43–52</pages>
       <abstract>Nous étudions le rôle des entités nommées et marques discursives de rétroaction pour la tâche de classification et prédiction de la satisfaction usager à partir de dialogues. Les expériences menées sur 1027 dialogues Personne-Machine dans le domaine des agences de voyage montrent que les entités nommées et les marques discursives n’améliorent pas de manière significative le taux de classification des dialogues. Par contre, elles permettent une meilleure prédiction de la satisfaction usager à partir des premiers tours de parole usager.</abstract>
       <url hash="dde4d292">2007.jeptalnrecital-poster.4</url>
@@ -456,8 +456,8 @@
     </paper>
     <paper id="5">
       <title>Les ellipses dans un système de traduction automatique de la parole</title>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
-      <author><first>Manny</first><last>Rayner</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
       <author><first>Marianne</first><last>Starlander</last></author>
       <author><first>Marianne</first><last>Santaholma</last></author>
       <pages>53–62</pages>
@@ -469,9 +469,9 @@
     <paper id="6">
       <title>Analyse automatique de sondages téléphoniques d’opinion</title>
       <author><first>Nathalie</first><last>Camelin</last></author>
-      <author><first>Frédéric</first><last>Béchet</last></author>
-      <author><first>Géraldine</first><last>Damnati</last></author>
-      <author><first>Renato</first><last>De Mori</last></author>
+      <author id="frederic-bechet"><first>Frédéric</first><last>Béchet</last></author>
+      <author id="geraldine-damnati"><first>Géraldine</first><last>Damnati</last></author>
+      <author id="renato-de-mori"><first>Renato</first><last>De Mori</last></author>
       <pages>63–72</pages>
       <abstract>Cette étude présente la problématique de l’analyse automatique de sondages téléphoniques d’opinion. Cette analyse se fait en deux étapes : tout d’abord extraire des messages oraux les expressions subjectives relatives aux opinions de utilisateurs sur une dimension particulière (efficacité, accueil, etc.) ; puis sélectionner les messages fiables, selon un ensemble de mesures de confiance, et estimer la distribution des diverses opinions sur le corpus de test. Le but est d’estimer une distribution aussi proche que possible de la distribution de référence. Cette étude est menée sur un corpus de messages provenant de vrais utilisateurs fournis par France Télécom R&amp;D.</abstract>
       <url hash="840377f8">2007.jeptalnrecital-poster.6</url>
@@ -490,9 +490,9 @@
     </paper>
     <paper id="8">
       <title>Analyse des échecs d’une approche pour traiter les questions définitoires soumises à un système de questions/réponses</title>
-      <author><first>Laurent</first><last>Gillard</last></author>
-      <author><first>Patrice</first><last>Bellot</last></author>
-      <author><first>Marc</first><last>El-Bèze</last></author>
+      <author id="laurent-gillard"><first>Laurent</first><last>Gillard</last></author>
+      <author id="patrice-bellot"><first>Patrice</first><last>Bellot</last></author>
+      <author id="marc-el-beze"><first>Marc</first><last>El-Bèze</last></author>
       <pages>83–92</pages>
       <abstract>Cet article revient sur le type particulier des questions définitoires étudiées dans le cadre des campagnes d’évaluation des systèmes de Questions/Réponses. Nous présentons l’approche développée suite à notre participation à la campagne EQueR et son évaluation lors de QA@CLEF 2006. La réponse proposée est la plus représentative des expressions présentes en apposition avec l’objet à définir, sa sélection est faite depuis des indices dérivés de ces appositions. Environ 80% de bonnes réponses sont trouvées sur les questions définitoires des volets francophones de CLEF. Les cas d’erreurs rencontrés sont analysés et discutés en détail.</abstract>
       <url hash="b444541d">2007.jeptalnrecital-poster.8</url>
@@ -503,7 +503,7 @@
       <title>Caractérisation des discours scientifiques et vulgarisés en français, japonais et russe</title>
       <author><first>Lorraine</first><last>Goeuriot</last></author>
       <author><first>Natalia</first><last>Grabar</last></author>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <pages>93–102</pages>
       <abstract>L’objectif principal de notre travail consiste à étudier la notion de comparabilité des corpus, et nous abordons cette question dans un contexte monolingue en cherchant à distinguer les documents scientifiques et vulgarisés. Nous travaillons séparément sur des corpus composés de documents du domaine médical dans trois langues à forte distance linguistique (le français, le japonais et le russe). Dans notre approche, les documents sont caractérisés dans chaque langue selon leur thématique et une typologie discursive qui se situe à trois niveaux de l’analyse des documents : structurel, modal et lexical. Le typage des documents est implémenté avec deux algorithmes d’apprentissage (SVMlight et C4.5). L’évaluation des résultats montre que la typologie discursive proposée est portable d’une langue à l’autre car elle permet en effet de distinguer les deux discours. Nous constatons néanmoins des performances très variées selon les langues, les algorithmes et les types de caractéristiques discursives.</abstract>
       <url hash="06c5527b">2007.jeptalnrecital-poster.9</url>
@@ -512,9 +512,9 @@
     </paper>
     <paper id="10">
       <title><fixed-case>OGMIOS</fixed-case> : une plate-forme d’annotation linguistique de collection de documents issus du Web</title>
-      <author><first>Thierry</first><last>Hamon</last></author>
-      <author><first>Julien</first><last>Derivière</last></author>
-      <author><first>Adeline</first><last>Nazarenko</last></author>
+      <author id="thierry-hamon"><first>Thierry</first><last>Hamon</last></author>
+      <author id="julien-deriviere"><first>Julien</first><last>Derivière</last></author>
+      <author id="adeline-nazarenko"><first>Adeline</first><last>Nazarenko</last></author>
       <pages>103–112</pages>
       <abstract>L’un des objectifs du projet ALVIS est d’intégrer des informations linguistiques dans des moteurs de recherche spécialisés. Dans ce contexte, nous avons conçu une plate-forme d’enrichissement linguistique de documents issus du Web, OGMIOS, exploitant des outils de TAL existants. Les documents peuvent être en français ou en anglais. Cette architecture est distribuée, afin de répondre aux contraintes liées aux traitements de gros volumes de textes, et adaptable, pour permettre l’analyse de sous-langages. La plate-forme est développée en Perl et disponible sous forme de modules CPAN. C’est une structure modulaire dans lequel il est possible d’intégrer de nouvelles ressources ou de nouveaux outils de TAL. On peut ainsi définir des configuration différentes pour différents domaines et types de collections. Cette plateforme robuste permet d’analyser en masse des données issus du web qui sont par essence très hétérogènes. Nous avons évalué les performances de la plateforme sur plusieurs collections de documents. En distribuant les traitements sur vingt machines, une collection de 55 329 documents du domaine de la biologie (106 millions de mots) a été annotée en 35 heures tandis qu’une collection de 48 422 dépêches relatives aux moteurs de recherche (14 millions de mots) a été annotée en 3 heures et 15 minutes.</abstract>
       <url hash="968a84ad">2007.jeptalnrecital-poster.10</url>
@@ -554,7 +554,7 @@
     </paper>
     <paper id="14">
       <title>Du bruit, du silence et des ambiguïtés : que faire du <fixed-case>TAL</fixed-case> pour l’apprentissage des langues ?</title>
-      <author><first>Olivier</first><last>Kraif</last></author>
+      <author id="olivier-kraif"><first>Olivier</first><last>Kraif</last></author>
       <author><first>Claude</first><last>Ponton</last></author>
       <pages>143–152</pages>
       <abstract>Nous proposons une nouvelle approche pour l’intégration du TAL dans les systèmes d’apprentissage des langues assisté par ordinateur (ALAO), la stratégie « moinsdisante ». Cette approche tire profit des technologies élémentaires mais fiables du TAL et insiste sur la nécessité de traitements modulaires et déclaratifs afin de faciliter la portabilité et la prise en main didactique des systèmes. Basé sur cette approche, ExoGen est un premier prototype pour la génération automatique d’activités lacunaires ou de lecture d’exemples. Il intègre un module de repérage et de description des réponses des apprenants fondé sur la comparaison entre réponse attendue et réponse donnée. L’analyse des différences graphiques, orthographiques et morphosyntaxiques permet un diagnostic des erreurs de type fautes d’orthographe, confusions, problèmes d’accord, de conjugaison, etc. La première évaluation d’ExoGen sur un extrait du corpus d’apprenants FRIDA produit des résultats prometteurs pour le développement de cette approche « moins-disante », et permet d’envisager un modèle d’analyse performant et généralisable à une grande variété d’activités.</abstract>
@@ -564,7 +564,7 @@
     </paper>
     <paper id="15">
       <title>Extraction automatique de cadres de sous-catégorisation verbale pour le français à partir d’un corpus arboré</title>
-      <author><first>Anna</first><last>Kupsc</last></author>
+      <author id="anna-kupsc"><first>Anna</first><last>Kupsc</last></author>
       <pages>153–162</pages>
       <abstract>Nous présentons une expérience d’extraction automatique des cadres de souscatégorisation pour 1362 verbes français. Nous exploitons un corpus journalistique richement annoté de 15 000 phrases dont nous extrayons 12 510 occurrences verbales. Nous évaluons dans un premier temps l’extraction des cadres basée sur la fonction des arguments, ce qui nous fournit 39 cadres différents avec une moyenne de 1.54 cadres par lemme. Ensuite, nous adoptons une approche mixte (fonction et catégorie syntaxique) qui nous fournit dans un premier temps 925 cadres différents, avec une moyenne de 3.44 cadres par lemme. Plusieurs méthodes de factorisation, neutralisant en particulier les variantes de réalisation avec le passif ou les pronoms clitiques, sont ensuite appliquées et nous permettent d’aboutir à 235 cadres différents avec une moyenne de 1.94 cadres par verbe. Nous comparons brièvement nos résultats avec les travaux existants pour le français et pour l’anglais.</abstract>
       <url hash="4d689376">2007.jeptalnrecital-poster.15</url>
@@ -595,7 +595,7 @@
     <paper id="18">
       <title>Ressources lexicales chinoises pour le <fixed-case>TALN</fixed-case></title>
       <author><first>Huei-Chi</first><last>Lin</last></author>
-      <author><first>Max</first><last>Silberztein</last></author>
+      <author id="max-silberztein"><first>Max</first><last>Silberztein</last></author>
       <pages>183–192</pages>
       <abstract>Nous voulons traiter des textes chinois automatiquement ; pour ce faire, nous formalisons le vocabulaire chinois, en utilisant principalement des dictionnaires et des grammaires morphologiques et syntaxiques formalisés avec le logiciel NooJ. Nous présentons ici les critères linguistiques qui nous ont permis de construire dictionnaires et grammaires, sachant que l’application envisagée (linguistique de corpus) nous impose certaines contraintes dans la formalisation des unités de la langue, en particulier des composés.</abstract>
       <url hash="151c2f8e">2007.jeptalnrecital-poster.18</url>
@@ -640,8 +640,8 @@
     </paper>
     <paper id="23">
       <title>Un Lexique Génératif de référence pour le français</title>
-      <author><first>Fiammetta</first><last>Namer</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="fiammetta-namer"><first>Fiammetta</first><last>Namer</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Évelyne</first><last>Jacquey</last></author>
       <pages>233–242</pages>
       <abstract>Cet article propose une approche originale visant la construction d’un lexique sémantique de référence sur le français. Sa principale caractéristique est de pouvoir s’appuyer sur les propriétés morphologiques des lexèmes. La méthode combine en effet des résultats d’analyse morphologique (Namer, 2002;2003), à partir de ressources lexicales de grande taille (nomenclatures du TLF) et des méthodologies d’acquisition d’information lexicale déjà éprouvées (Namer 2005; Sébillot 2002). Le format de représentation choisi, dans le cadre du Lexique Génératif, se distingue par ses propriétés d’expressivité et d’économie. Cette approche permet donc d’envisager la construction d’un lexique de référence sur le français caractérisé par une forte homogénéité tout en garantissant une couverture large, tant du point de vue de la nomenclature que du point de vue des contenus sémantiques. Une première validation de la méthode fournit une projection quantitative et qualitative des résultats attendus.</abstract>
@@ -651,10 +651,10 @@
     </paper>
     <paper id="24">
       <title>Les résultats de la campagne <fixed-case>EASY</fixed-case> d’évaluation des analyseurs syntaxiques du français</title>
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <author><first>Anne</first><last>Vilnat</last></author>
       <author><first>Isabelle</first><last>Robba</last></author>
-      <author><first>Christelle</first><last>Ayache</last></author>
+      <author id="christelle-ayache"><first>Christelle</first><last>Ayache</last></author>
       <pages>243–252</pages>
       <abstract>Dans cet article, nous présentons les résultats de la campagne d’évaluation EASY des analyseurs syntaxiques du français. EASY a été la toute première campagne d’évaluation comparative des analyseurs syntaxiques du français en mode boîte noire utilisant des mesures objectives quantitatives. EASY fait partie du programme TECHNOLANGUE du Ministère délégué à la Recherche et à l’Éducation, avec le soutien du ministère de délégué à l’industrie et du ministère de la culture et de la communication. Nous exposons tout d’abord la position de la campagne par rapport aux autres projets d’évaluation en analyse syntaxique, puis nous présentos son déroulement, et donnons les résultats des 15 analyseurs participants en fonction des différents types de corpus et des différentes annotations (constituants et relations). Nous proposons ensuite un ensemble de leçons à tirer de cette campagne, en particulier à propos du protocole d’évaluation, de la définition de la segmentation en unités linguistiques, du formalisme et des activités d’annotation, des critères de qualité des données, des annotations et des résultats, et finalement de la notion de référence en analyse syntaxique. Nous concluons en présentant comment les résultats d’EASY se prolongent dans le projet PASSAGE (ANR-06-MDCA-013) qui vient de débuter et dont l’objectif est d’étiqueter un grand corpus par plusieurs analyseurs en les combinant selon des paramètres issus de l’évaluation.</abstract>
       <url hash="aeff13fb">2007.jeptalnrecital-poster.24</url>
@@ -664,8 +664,8 @@
     <paper id="25">
       <title>Modèles statistiques enrichis par la syntaxe pour la traduction automatique</title>
       <author><first>Holger</first><last>Schwenk</last></author>
-      <author><first>Daniel</first><last>Déchelotte</last></author>
-      <author><first>Hélène</first><last>Bonneau-Maynard</last></author>
+      <author id="daniel-dechelotte"><first>Daniel</first><last>Déchelotte</last></author>
+      <author id="helene-bonneau-maynard"><first>Hélène</first><last>Bonneau-Maynard</last></author>
       <author><first>Alexandre</first><last>Allauzen</last></author>
       <pages>253–262</pages>
       <abstract>La traduction automatique statistique par séquences de mots est une voie prometteuse. Nous présentons dans cet article deux évolutions complémentaires. La première permet une modélisation de la langue cible dans un espace continu. La seconde intègre des catégories morpho-syntaxiques aux unités manipulées par le modèle de traduction. Ces deux approches sont évaluées sur la tâche Tc-Star. Les résultats les plus intéressants sont obtenus par la combinaison de ces deux méthodes.</abstract>
@@ -676,7 +676,7 @@
     <paper id="26">
       <title>Traitements phrastiques phonétiques pour la réécriture de phrases dysorthographiées</title>
       <author><first>Laurianne</first><last>Sitbon</last></author>
-      <author><first>Patrice</first><last>Bellot</last></author>
+      <author id="patrice-bellot"><first>Patrice</first><last>Bellot</last></author>
       <author><first>Philippe</first><last>Blache</last></author>
       <pages>263–272</pages>
       <abstract>Cet article décrit une méthode qui combine des hypothèses graphémiques et phonétiques au niveau de la phrase, à l’aide d’une réprésentation en automates à états finis et d’un modèle de langage, pour la réécriture de phrases tapées au clavier par des dysorthographiques. La particularité des écrits dysorthographiés qui empêche les correcteurs orthographiques d’être efficaces pour cette tâche est une segmentation en mots parfois incorrecte. La réécriture diffère de la correction en ce sens que les phrases réécrites ne sont pas à destination de l’utilisateur mais d’un système automatique, tel qu’un moteur de recherche. De ce fait l’évaluation est conduite sur des versions filtrées et lemmatisées des phrases. Le taux d’erreurs mots moyen passe de 51 % à 20 % avec notre méthode, et est de 0 % sur 43 % des phrases testées.</abstract>
@@ -696,7 +696,7 @@
     </paper>
     <paper id="28">
       <title>Traitement sémantique par analyse distributionnelle des noms transdisciplinaires des écrits scientifiques</title>
-      <author><first>Agnès</first><last>Tutin</last></author>
+      <author id="agnes-tutin"><first>Agnès</first><last>Tutin</last></author>
       <pages>283–292</pages>
       <abstract>Dans cette étude sur le lexique transdisciplinaire des écrits scientifiques, nous souhaitons évaluer dans quelle mesure les méthodes distributionnelles de TAL peuvent faciliter la tâche du linguiste dans le traitement sémantique de ce lexique. Après avoir défini le champ lexical et les corpus exploités, nous testons plusieurs méthodes basées sur des dépendances syntaxiques et observons les proximités sémantiques et les classes établies. L’hypothèse que certaines relations syntaxiques - en particulier les relations de sous-catégorisation – sont plus appropriées pour établir des classements sémantiques n’apparaît qu’en partie vérifiée. Si les relations de sous-catégorisation génèrent des proximités sémantiques entre les mots de meilleure qualité, cela ne semble pas le cas pour la classification par voisinage.</abstract>
       <url hash="19482ad6">2007.jeptalnrecital-poster.28</url>
@@ -705,7 +705,7 @@
     </paper>
     <paper id="29">
       <title>Une expérience de compréhension en contexte de dialogue avec le système <fixed-case>LOGUS</fixed-case>, approche logique de la compréhension de la langue orale</title>
-      <author><first>Jeanne</first><last>Villaneau</last></author>
+      <author id="jeanne-villaneau"><first>Jeanne</first><last>Villaneau</last></author>
       <pages>293–302</pages>
       <abstract>LOGUS est un système de compréhension de la langue orale dans le cadre d’un dialogue homme-machine finalisé. Il est la mise en oeuvre d’une approche logique qui utilise différents formalismes afin d’obtenir un système robuste mais néanmoins relativement extensible. Cet article décrit essentiellement l’étape de compréhension en contexte de dialogue implémentée sur LOGUS, développée et testée à partir d’un corpus de réservation hôtelière enregistré et annoté lors des travaux du groupe MEDIA du projet technolangue. Il décrit également les différentes interrogations et conclusions que peut susciter une telle expérience et les résultats obtenus par le système dans la résolution des références. Concernant l’approche elle-même, cette expérience semble montrer que le formalisme adopté pour la représentation sémantique des énoncés est bien adapté à la compréhension en contexte.</abstract>
       <url hash="efe42df0">2007.jeptalnrecital-poster.29</url>
@@ -786,7 +786,7 @@ _grammes de transitions.</abstract>
   <volume id="recital" ingest-date="2021-02-05" type="proceedings">
     <meta>
       <booktitle>Actes de la 14ème conférence sur le Traitement Automatique des Langues Naturelles. REncontres jeunes Chercheurs en Informatique pour le Traitement Automatique des Langues</booktitle>
-      <editor><first>Farah</first><last>Benamara</last></editor>
+      <editor id="farah-benamara"><first>Farah</first><last>Benamara</last></editor>
       <editor><first>Sylwia</first><last>Ozdowska</last></editor>
       <publisher>ATALA</publisher>
       <address>Toulouse, France</address>
@@ -865,7 +865,7 @@ _grammes de transitions.</abstract>
   <volume id="recitalposter" ingest-date="2021-02-05" type="proceedings">
     <meta>
       <booktitle>Actes de la 14ème conférence sur le Traitement Automatique des Langues Naturelles. REncontres jeunes Chercheurs en Informatique pour le Traitement Automatique des Langues (Posters)</booktitle>
-      <editor><first>Farah</first><last>Benamara</last></editor>
+      <editor id="farah-benamara"><first>Farah</first><last>Benamara</last></editor>
       <editor><first>Sylwia</first><last>Ozdowska</last></editor>
       <publisher>ATALA</publisher>
       <address>Toulouse, France</address>
diff --git a/data/xml/2007.mtsummit.xml b/data/xml/2007.mtsummit.xml
index 200f10aa0f..9173116d33 100644
--- a/data/xml/2007.mtsummit.xml
+++ b/data/xml/2007.mtsummit.xml
@@ -40,7 +40,7 @@
       <address>Copenhagen, Denmark</address>
       <month>September 10-14</month>
       <year>2007</year>
-      <editor><first>Bente</first><last>Maegaard</last></editor>
+      <editor id="bente-maegaard"><first>Bente</first><last>Maegaard</last></editor>
       <venue>mtsummit</venue>
     </meta>
     <frontmatter>
@@ -59,9 +59,9 @@
     </paper>
     <paper id="2">
       <title>Improving speech-to-speech translation using word posterior probabilities</title>
-      <author><first>Vicente</first><last>Alabau</last></author>
+      <author id="vicent-alabau"><first>Vicente</first><last>Alabau</last></author>
       <author><first>Alberto</first><last>Sanchis</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <url hash="5f959120">2007.mtsummit-papers.2</url>
       <bibkey>alabau-etal-2007-improving</bibkey>
     </paper>
@@ -82,7 +82,7 @@
     <paper id="5">
       <title>Translating from under-resourced languages: comparing direct transfer against pivot translation</title>
       <author><first>Bogdan</first><last>Babych</last></author>
-      <author><first>Anthony</first><last>Hartley</last></author>
+      <author id="anthony-hartley"><first>Anthony</first><last>Hartley</last></author>
       <author><first>Serge</first><last>Sharoff</last></author>
       <url hash="02cb687a">2007.mtsummit-papers.5</url>
       <bibkey>babych-etal-2007-translating</bibkey>
@@ -109,7 +109,7 @@
       <title>A system to mine large-scale bilingual dictionaries from monolingual web pages</title>
       <author><first>Guihong</first><last>Cao</last></author>
       <author><first>Jianfeng</first><last>Gao</last></author>
-      <author><first>Jian-Yun</first><last>Nie</last></author>
+      <author id="jian-yun-nie"><first>Jian-Yun</first><last>Nie</last></author>
       <url hash="76e45b57">2007.mtsummit-papers.9</url>
       <bibkey>cao-etal-2007-system</bibkey>
     </paper>
@@ -129,8 +129,8 @@
     <paper id="12">
       <title>Enhancing image-based <fixed-case>A</fixed-case>rabic document translation using noisy channel correction model</title>
       <author><first>Yi</first><last>Chang</last></author>
-      <author><first>Ying</first><last>Zhang</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="ying-zhang"><first>Ying</first><last>Zhang</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <author><first>Jie</first><last>Yang</last></author>
       <url hash="433d36a7">2007.mtsummit-papers.12</url>
       <bibkey>chang-etal-2007-enhancing</bibkey>
@@ -144,7 +144,7 @@
     </paper>
     <paper id="14">
       <title>Incorporating constituent structure constraint into discriminative word alignment</title>
-      <author><first>Wen-Han</first><last>Chao</last></author>
+      <author id="wenhan-chao"><first>Wen-Han</first><last>Chao</last></author>
       <author><first>Zhou-Jun</first><last>Li</last></author>
       <url hash="51df5212">2007.mtsummit-papers.14</url>
       <bibkey>chao-li-2007-incorporating</bibkey>
@@ -159,8 +159,8 @@
     </paper>
     <paper id="16">
       <title>Syntax-enhanced n-gram-based <fixed-case>SMT</fixed-case></title>
-      <author><first>Josep M.</first><last>Crego</last></author>
-      <author><first>José B.</first><last>Mariño</last></author>
+      <author id="josep-m-crego"><first>Josep M.</first><last>Crego</last></author>
+      <author id="jose-b-marino"><first>José B.</first><last>Mariño</last></author>
       <url hash="cd9440f9">2007.mtsummit-papers.16</url>
       <bibkey>crego-marino-2007-syntax</bibkey>
     </paper>
@@ -174,11 +174,11 @@
     </paper>
     <paper id="18">
       <title>A state-of-the-art statistical machine translation system based on <fixed-case>M</fixed-case>oses</title>
-      <author><first>Daniel</first><last>Déchelotte</last></author>
+      <author id="daniel-dechelotte"><first>Daniel</first><last>Déchelotte</last></author>
       <author><first>Holger</first><last>Schwenk</last></author>
-      <author><first>Hélène</first><last>Bonneau-Maynard</last></author>
+      <author id="helene-bonneau-maynard"><first>Hélène</first><last>Bonneau-Maynard</last></author>
       <author><first>Alexandre</first><last>Allauzen</last></author>
-      <author><first>Gilles</first><last>Adda</last></author>
+      <author id="gilles-adda"><first>Gilles</first><last>Adda</last></author>
       <url hash="6dec8c69">2007.mtsummit-papers.18</url>
       <bibkey>dechelotte-etal-2007-state</bibkey>
     </paper>
@@ -190,7 +190,7 @@
     </paper>
     <paper id="20">
       <title><fixed-case>A</fixed-case>rabic diacritization in the context of statistical machine translation</title>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <author><first>Mahmoud</first><last>Ghoneim</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
       <url hash="9cf6d1a2">2007.mtsummit-papers.20</url>
@@ -198,24 +198,24 @@
     </paper>
     <paper id="21">
       <title>Automatic evaluation of machine translation based on recursive acquisition of an intuitive common parts continuum</title>
-      <author><first>Hiroshi</first><last>Echizen-ya</last></author>
-      <author><first>Kenji</first><last>Araki</last></author>
+      <author id="hiroshi-echizen-ya"><first>Hiroshi</first><last>Echizen-ya</last></author>
+      <author id="kenji-araki"><first>Kenji</first><last>Araki</last></author>
       <url hash="8990bfb7">2007.mtsummit-papers.21</url>
       <bibkey>echizen-ya-araki-2007-automatic</bibkey>
     </paper>
     <paper id="22">
       <title>Estimating phrase pair relevance for translation model pruning</title>
       <author><first>Matthias</first><last>Eck</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <url hash="ec2d48d3">2007.mtsummit-papers.22</url>
       <bibkey>eck-etal-2007-estimating</bibkey>
     </paper>
     <paper id="23">
       <title>How much data is needed for reliable <fixed-case>MT</fixed-case> evaluation? Using bootstrapping to study human and automatic metrics</title>
       <author><first>Paula</first><last>Estrella</last></author>
-      <author><first>Olivier</first><last>Hamon</last></author>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="olivier-hamon"><first>Olivier</first><last>Hamon</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <url hash="3e9959cd">2007.mtsummit-papers.23</url>
       <bibkey>estrella-etal-2007-much</bibkey>
     </paper>
@@ -223,16 +223,16 @@
       <title>Lexical translation with application to image searching on the web</title>
       <author><first>Oren</first><last>Etzioni</last></author>
       <author><first>Kobi</first><last>Reiter</last></author>
-      <author><first>Stephen</first><last>Soderland</last></author>
+      <author id="stephen-soderland"><first>Stephen</first><last>Soderland</last></author>
       <author><first>Marcus</first><last>Sammer</last></author>
       <url hash="e6d7ac31">2007.mtsummit-papers.24</url>
       <bibkey>etzioni-etal-2007-lexical</bibkey>
     </paper>
     <paper id="25">
       <title>Improving transfer-based <fixed-case>MT</fixed-case> systems with automatic refinements</title>
-      <author><first>Ariadna</first><last>Font Llitjós</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="ariadna-font-llitjos"><first>Ariadna</first><last>Font Llitjós</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <url hash="8a0bf483">2007.mtsummit-papers.25</url>
       <bibkey>font-llitjos-etal-2007-improving</bibkey>
     </paper>
@@ -245,13 +245,13 @@
     <paper id="27">
       <title>Online and free! Ten years of online machine translation: origins, developments, current use and future prospects</title>
       <author><first>Federico</first><last>Gaspari</last></author>
-      <author><first>John</first><last>Hutchins</last></author>
+      <author id="w-john-hutchins"><first>John</first><last>Hutchins</last></author>
       <url hash="1e64cab0">2007.mtsummit-papers.27</url>
       <bibkey>gaspari-hutchins-2007-online</bibkey>
     </paper>
     <paper id="28">
       <title><fixed-case>POS</fixed-case>-based reordering models for statistical machine translation</title>
-      <author><first>Deepa</first><last>Gupta</last></author>
+      <author id="deepak-gupta"><first>Deepa</first><last>Gupta</last></author>
       <author><first>Mauro</first><last>Cettolo</last></author>
       <author><first>Marcello</first><last>Federico</last></author>
       <url hash="baa414a3">2007.mtsummit-papers.28</url>
@@ -265,57 +265,57 @@
     </paper>
     <paper id="30">
       <title>End-to-end evaluation of a speech-to-speech translation system in <fixed-case>TC</fixed-case>-<fixed-case>STAR</fixed-case></title>
-      <author><first>Olivier</first><last>Hamon</last></author>
-      <author><first>Djamel</first><last>Mostefa</last></author>
-      <author><first>Khalid</first><last>Choukri</last></author>
+      <author id="olivier-hamon"><first>Olivier</first><last>Hamon</last></author>
+      <author id="djamel-mostefa"><first>Djamel</first><last>Mostefa</last></author>
+      <author id="khalid-choukri"><first>Khalid</first><last>Choukri</last></author>
       <url hash="4c5009b2">2007.mtsummit-papers.30</url>
       <bibkey>hamon-etal-2007-end</bibkey>
     </paper>
     <paper id="31">
       <title>Assessing human and automated quality judgments in the <fixed-case>F</fixed-case>rench <fixed-case>MT</fixed-case> evaluation campaign <fixed-case>CESTA</fixed-case></title>
-      <author><first>Olivier</first><last>Hamon</last></author>
-      <author><first>Anthony</first><last>Hartley</last></author>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
-      <author><first>Khalid</first><last>Choukri</last></author>
+      <author id="olivier-hamon"><first>Olivier</first><last>Hamon</last></author>
+      <author id="anthony-hartley"><first>Anthony</first><last>Hartley</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="khalid-choukri"><first>Khalid</first><last>Choukri</last></author>
       <url hash="bf729bb8">2007.mtsummit-papers.31</url>
       <bibkey>hamon-etal-2007-assessing</bibkey>
     </paper>
     <paper id="32">
       <title>Report on the <fixed-case>NSF</fixed-case>-sponsored Human Language Technology Workshop on Industrial Centers</title>
-      <author><first>Mary</first><last>Harper</last></author>
+      <author id="mary-harper"><first>Mary</first><last>Harper</last></author>
       <author><first>Alex</first><last>Acero</last></author>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
-      <author><first>Jordan</first><last>Cohen</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
+      <author id="jordan-cohen"><first>Jordan</first><last>Cohen</last></author>
       <author><first>Barbara</first><last>Cuthill</last></author>
       <author><first>Carol</first><last>Espy-Wilson</last></author>
       <author><first>Christiane</first><last>Fellbaum</last></author>
-      <author><first>John</first><last>Garofolo</last></author>
+      <author id="john-s-garofolo"><first>John</first><last>Garofolo</last></author>
       <author><first>Chin-Hui</first><last>Lee</last></author>
       <author><first>Jim</first><last>Lester</last></author>
       <author><first>Andrew</first><last>McCallum</last></author>
       <author><first>Nelson</first><last>Morgan</last></author>
       <author><first>Michael</first><last>Picheney</last></author>
       <author><first>Joe</first><last>Picone</last></author>
-      <author><first>Lance</first><last>Ramshaw</last></author>
+      <author id="lance-ramshaw"><first>Lance</first><last>Ramshaw</last></author>
       <author><first>Jeff</first><last>Reynar</last></author>
       <author><first>Hadar</first><last>Shemtov</last></author>
-      <author><first>Clare</first><last>Voss</last></author>
+      <author id="clare-voss"><first>Clare</first><last>Voss</last></author>
       <url hash="583853df">2007.mtsummit-papers.32</url>
       <bibkey>harper-etal-2007-report</bibkey>
     </paper>
     <paper id="33">
       <title>Experiments with a noun-phrase driven statistical machine translation system</title>
       <author><first>Sanjika</first><last>Hewavitharana</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <url hash="315c029a">2007.mtsummit-papers.33</url>
       <bibkey>hewavitharana-etal-2007-experiments</bibkey>
     </paper>
     <paper id="34">
       <title>Domain adaptation of <fixed-case>MT</fixed-case> systems through automatic post-editing</title>
       <author><first>Pierre</first><last>Isabelle</last></author>
-      <author><first>Cyril</first><last>Goutte</last></author>
+      <author id="cyril-goutte"><first>Cyril</first><last>Goutte</last></author>
       <author><first>Michel</first><last>Simard</last></author>
       <url hash="cca758af">2007.mtsummit-papers.34</url>
       <bibkey>isabelle-etal-2007-domain</bibkey>
@@ -324,7 +324,7 @@
       <title>Development of a <fixed-case>J</fixed-case>apanese-<fixed-case>C</fixed-case>hinese machine translation system</title>
       <author><first>Hitoshi</first><last>Isahara</last></author>
       <author><first>Sadao</first><last>Kurohashi</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <author><first>Kiyotaka</first><last>Uchimoto</last></author>
       <author><first>Hiroshi</first><last>Nakagawa</last></author>
       <author><first>Hiroyuki</first><last>Kaji</last></author>
@@ -342,7 +342,7 @@
     </paper>
     <paper id="37">
       <title>Comparing parallel corpora and evaluating their quality</title>
-      <author><first>Heiki-Jaan</first><last>Kaalep</last></author>
+      <author id="heiki-jaan-kaalep"><first>Heiki-Jaan</first><last>Kaalep</last></author>
       <author><first>Kaarel</first><last>Veskis</last></author>
       <url hash="19abb2af">2007.mtsummit-papers.37</url>
       <bibkey>kaalep-veskis-2007-comparing</bibkey>
@@ -350,25 +350,25 @@
     <paper id="38">
       <title>Iterative refinement of lexicon and phrasal alignment</title>
       <author><first>Jae Dong</first><last>Kim</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <url hash="0969aea5">2007.mtsummit-papers.38</url>
       <bibkey>kim-vogel-2007-iterative</bibkey>
     </paper>
     <paper id="39">
       <title>Semi-automatic error analysis for large-scale statistical machine translation</title>
       <author><first>Katrin</first><last>Kirchhoff</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <url hash="084506df">2007.mtsummit-papers.39</url>
       <bibkey>kirchhoff-etal-2007-semi</bibkey>
     </paper>
     <paper id="40">
       <title>Comparing rule-based and data-driven approaches to <fixed-case>S</fixed-case>panish-to-<fixed-case>B</fixed-case>asque machine translation</title>
-      <author><first>Gorka</first><last>Labaka</last></author>
+      <author id="gorka-labaka"><first>Gorka</first><last>Labaka</last></author>
       <author><first>Nicolas</first><last>Stroppa</last></author>
       <author><first>Andy</first><last>Way</last></author>
-      <author><first>Kepa</first><last>Sarasola</last></author>
+      <author id="kepa-sarasola"><first>Kepa</first><last>Sarasola</last></author>
       <url hash="2d39c36f">2007.mtsummit-papers.40</url>
       <bibkey>labaka-etal-2007-comparing</bibkey>
     </paper>
@@ -390,7 +390,7 @@
     </paper>
     <paper id="43">
       <title>Faster beam-search decoding for phrasal statistical machine translation</title>
-      <author><first>Robert C.</first><last>Moore</last></author>
+      <author id="robert-c-moore"><first>Robert C.</first><last>Moore</last></author>
       <author><first>Chris</first><last>Quirk</last></author>
       <url hash="1130376c">2007.mtsummit-papers.43</url>
       <bibkey>moore-quirk-2007-faster</bibkey>
@@ -401,7 +401,7 @@
       <author><first>Andy</first><last>Way</last></author>
       <author><first>Daniel</first><last>Stein</last></author>
       <author><first>Jan</first><last>Bungeroth</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <url hash="a8d60dfb">2007.mtsummit-papers.44</url>
       <bibkey>morrissey-etal-2007-combining</bibkey>
     </paper>
@@ -422,7 +422,7 @@
     </paper>
     <paper id="47">
       <title>Machine transliteration using multiple transliteration engines and hypothesis re-ranking</title>
-      <author><first>Jong-Hoon</first><last>Oh</last></author>
+      <author id="jong-hoon-oh"><first>Jong-Hoon</first><last>Oh</last></author>
       <author><first>Hitoshi</first><last>Isahara</last></author>
       <url hash="73e47b99">2007.mtsummit-papers.47</url>
       <bibkey>oh-isahara-2007-machine</bibkey>
@@ -430,8 +430,8 @@
     <paper id="48">
       <title>Introducing translation dictionary into phrase-based <fixed-case>SMT</fixed-case></title>
       <author><first>Hideo</first><last>Okuma</last></author>
-      <author><first>Hirofumi</first><last>Yamamoto</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="hirofumi-yamamoto"><first>Hirofumi</first><last>Yamamoto</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <url hash="16c77829">2007.mtsummit-papers.48</url>
       <bibkey>okuma-etal-2007-introducing</bibkey>
     </paper>
@@ -439,7 +439,7 @@
       <title>Improving example-based machine translation through morphological generalization and adaptation</title>
       <author><first>Aaron B.</first><last>Phillips</last></author>
       <author><first>Violetta</first><last>Cavalli-Sforza</last></author>
-      <author><first>Ralf D.</first><last>Brown</last></author>
+      <author id="ralf-d-brown"><first>Ralf D.</first><last>Brown</last></author>
       <url hash="06d74e76">2007.mtsummit-papers.49</url>
       <bibkey>phillips-etal-2007-improving</bibkey>
     </paper>
@@ -470,22 +470,22 @@
     <paper id="53">
       <title>Building a sense-distinguished multilingual lexicon from monolingual corpora and bilingual lexicons</title>
       <author><first>Marcus</first><last>Sammer</last></author>
-      <author><first>Stephen</first><last>Soderland</last></author>
+      <author id="stephen-soderland"><first>Stephen</first><last>Soderland</last></author>
       <url hash="eb049335">2007.mtsummit-papers.53</url>
       <bibkey>sammer-soderland-2007-building</bibkey>
     </paper>
     <paper id="54">
       <title>Estimation of confidence measures for machine translation</title>
       <author><first>Alberto</first><last>Sanchis</last></author>
-      <author><first>Alfons</first><last>Juan</last></author>
-      <author><first>Enrique</first><last>Vidal</last></author>
+      <author id="alfons-juan"><first>Alfons</first><last>Juan</last></author>
+      <author id="enrique-vidal"><first>Enrique</first><last>Vidal</last></author>
       <url hash="4174d98c">2007.mtsummit-papers.54</url>
       <bibkey>sanchis-etal-2007-estimation</bibkey>
     </paper>
     <paper id="55">
       <title>Getting professional translation through user interaction</title>
       <author><first>Young-Ae</first><last>Seo</last></author>
-      <author><first>Chang-Hyun</first><last>Kim</last></author>
+      <author id="chang-hyun-kim"><first>Chang-Hyun</first><last>Kim</last></author>
       <author><first>Seong-Il</first><last>Yang</last></author>
       <author><first>Young-gil</first><last>Kim</last></author>
       <url hash="d64f3267">2007.mtsummit-papers.55</url>
@@ -496,14 +496,14 @@
       <author><first>Smriti</first><last>Singh</last></author>
       <author><first>Mrugank</first><last>Dalal</last></author>
       <author><first>Vishal</first><last>Vachhani</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
-      <author><first>Om P.</first><last>Damani</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="om-p-damani"><first>Om P.</first><last>Damani</last></author>
       <url hash="15433621">2007.mtsummit-papers.56</url>
       <bibkey>singh-etal-2007-hindi</bibkey>
     </paper>
     <paper id="57">
       <title>Using rich morphology in resolving certain <fixed-case>H</fixed-case>indi-<fixed-case>E</fixed-case>nglish machine translation divergence</title>
-      <author><first>R. Mahesh K.</first><last>Sinha</last></author>
+      <author id="r-mahesh-k-sinha"><first>R. Mahesh K.</first><last>Sinha</last></author>
       <url hash="b234d1ad">2007.mtsummit-papers.57</url>
       <bibkey>sinha-2007-using</bibkey>
     </paper>
@@ -534,8 +534,8 @@
     </paper>
     <paper id="61">
       <title>A <fixed-case>MT</fixed-case> system from <fixed-case>T</fixed-case>urkmen to <fixed-case>T</fixed-case>urkish employing finite state and statistical methods</title>
-      <author><first>Ahmet Cüneyd</first><last>Tantuğ</last></author>
-      <author><first>Eşref</first><last>Adali</last></author>
+      <author id="ahmet-cuneyd-tantug"><first>Ahmet Cüneyd</first><last>Tantuğ</last></author>
+      <author id="esref-adali"><first>Eşref</first><last>Adali</last></author>
       <author><first>Kemal</first><last>Oflazer</last></author>
       <url hash="005a6cfe">2007.mtsummit-papers.61</url>
       <bibkey>tantug-etal-2007-mt</bibkey>
@@ -558,8 +558,8 @@
     </paper>
     <paper id="64">
       <title><fixed-case>J</fixed-case>apanese-<fixed-case>H</fixed-case>ungarian dictionary generation using ontology resources</title>
-      <author><first>István</first><last>Varga</last></author>
-      <author><first>Shoichi</first><last>Yokoyama</last></author>
+      <author id="istvan-varga"><first>István</first><last>Varga</last></author>
+      <author id="shoichi-yokoyama"><first>Shoichi</first><last>Yokoyama</last></author>
       <url hash="f9a93084">2007.mtsummit-papers.64</url>
       <bibkey>varga-yokoyama-2007-japanese</bibkey>
     </paper>
@@ -590,8 +590,8 @@
       <title>Domain dependent statistical machine translation</title>
       <author><first>Jia</first><last>Xu</last></author>
       <author><first>Yonggang</first><last>Deng</last></author>
-      <author><first>Yuqing</first><last>Gao</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="yuqing-guo"><first>Yuqing</first><last>Gao</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <url hash="94e2396e">2007.mtsummit-papers.68</url>
       <bibkey>xu-etal-2007-domain</bibkey>
     </paper>
@@ -599,7 +599,7 @@
       <title>Aspect marker generation in <fixed-case>E</fixed-case>nglish-to-<fixed-case>C</fixed-case>hinese machine translation</title>
       <author><first>Yang</first><last>Ye</last></author>
       <author><first>Karl-Michael</first><last>Schneider</last></author>
-      <author><first>Steven</first><last>Abney</last></author>
+      <author id="steven-abney"><first>Steven</first><last>Abney</last></author>
       <url hash="f284defc">2007.mtsummit-papers.69</url>
       <bibkey>ye-etal-2007-aspect</bibkey>
     </paper>
@@ -614,17 +614,17 @@
       <title>A tree-to-tree alignment-based model for statistical machine translation</title>
       <author><first>Min</first><last>Zhang</last></author>
       <author><first>Hongfei</first><last>Jiang</last></author>
-      <author><first>Ai Ti</first><last>Aw</last></author>
+      <author id="aiti-aw"><first>Ai Ti</first><last>Aw</last></author>
       <author><first>Jun</first><last>Sun</last></author>
       <author><first>Sheng</first><last>Li</last></author>
-      <author><first>Chew Lim</first><last>Tan</last></author>
+      <author id="chew-lim-tan"><first>Chew Lim</first><last>Tan</last></author>
       <url hash="dcb673c6">2007.mtsummit-papers.71</url>
       <bibkey>zhang-etal-2007-tree</bibkey>
     </paper>
     <paper id="72">
       <title><fixed-case>P</fixed-case>an<fixed-case>D</fixed-case>o<fixed-case>RA</fixed-case>: a large-scale two-way statistical machine translation system for hand-held devices</title>
-      <author><first>Ying</first><last>Zhang</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="ying-zhang"><first>Ying</first><last>Zhang</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <url hash="317bcdc4">2007.mtsummit-papers.72</url>
       <bibkey>zhang-vogel-2007-pandora</bibkey>
     </paper>
@@ -671,7 +671,7 @@
     <paper id="3">
       <title>Context-based evaluation of <fixed-case>MT</fixed-case> systems: principles and tools</title>
       <author><first>Maghi</first><last>King</last></author>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <author><first>Paula</first><last>Estrella</last></author>
       <url hash="b296df7b">2007.mtsummit-tutorials.3</url>
       <attachment type="presentation" hash="91054497">2007.mtsummit-tutorials.3.Presentation.pdf</attachment>
@@ -680,7 +680,7 @@
     <paper id="4">
       <title>Using free online <fixed-case>MT</fixed-case> in multilingual websites</title>
       <author><first>Federico</first><last>Gaspari</last></author>
-      <author><first>Harold</first><last>Somers</last></author>
+      <author id="harold-somers"><first>Harold</first><last>Somers</last></author>
       <url hash="51899ab5">2007.mtsummit-tutorials.4</url>
       <bibkey>gaspari-somers-2007-using</bibkey>
     </paper>
@@ -703,8 +703,8 @@
       <address>Copenhagen, Denmark</address>
       <month>September 11</month>
       <year>2007</year>
-      <editor><first>Jun’ichi</first><last>Tsujii</last></editor>
-      <editor><first>Shoichi</first><last>Yokoyama</last></editor>
+      <editor id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></editor>
+      <editor id="shoichi-yokoyama"><first>Shoichi</first><last>Yokoyama</last></editor>
       <venue>mtsummit</venue>
     </meta>
     <frontmatter>
@@ -723,7 +723,7 @@
       <author><first>Sung-Kwon</first><last>Choi</last></author>
       <author><first>Ki-Young</first><last>Lee</last></author>
       <author><first>Yoon-Hyung</first><last>Roh</last></author>
-      <author><first>Young-Gil</first><last>Kim</last></author>
+      <author id="young-gil-kim"><first>Young-Gil</first><last>Kim</last></author>
       <author><first>Munpyo</first><last>Hong</last></author>
       <url hash="b698852c">2007.mtsummit-wpt.2</url>
       <bibkey>kwon-etal-2007-english</bibkey>
@@ -771,8 +771,8 @@
     </meta>
     <paper id="1">
       <title>The <fixed-case>C</fixed-case>hinese Room Experiment: The Self-Organizing Feng Shui of <fixed-case>MT</fixed-case></title>
-      <author><first>John S.</first><last>White</last></author>
-      <author><first>Florence</first><last>Reeder</last></author>
+      <author id="john-s-white"><first>John S.</first><last>White</last></author>
+      <author id="florence-reeder"><first>Florence</first><last>Reeder</last></author>
       <url hash="643528b6">2007.mtsummit-cre.1</url>
       <bibkey>white-reeder-2007-chinese</bibkey>
     </paper>
@@ -783,7 +783,7 @@
       <address>Copenhagen, Denmark</address>
       <month>September 11</month>
       <year>2007</year>
-      <editor><first>Anja</first><last>Belz</last></editor>
+      <editor id="anja-belz"><first>Anja</first><last>Belz</last></editor>
       <editor><first>Sebastian</first><last>Varges</last></editor>
       <venue>mtsummit</venue>
     </meta>
@@ -801,7 +801,7 @@
       <title>Automatic evaluation of generation and parsing for machine translation with automatically acquired transfer rules</title>
       <author><first>Yvette</first><last>Graham</last></author>
       <author><first>Deirdre</first><last>Hogan</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <url hash="f6819d7e">2007.mtsummit-ucnlg.2</url>
       <bibkey>graham-etal-2007-automatic</bibkey>
     </paper>
@@ -813,8 +813,8 @@
     </paper>
     <paper id="4">
       <title>Towards broad coverage surface realization with <fixed-case>CCG</fixed-case></title>
-      <author><first>Michael</first><last>White</last></author>
-      <author><first>Rajakrishnan</first><last>Rajkumar</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
+      <author id="rajakrishnan-rajkumar"><first>Rajakrishnan</first><last>Rajkumar</last></author>
       <author><first>Scott</first><last>Martin</last></author>
       <url hash="a2ef9924">2007.mtsummit-ucnlg.4</url>
       <bibkey>white-etal-2007-towards</bibkey>
@@ -822,8 +822,8 @@
     <paper id="5">
       <title>Method of selecting training sets to build compact and efficient language model</title>
       <author><first>Keiji</first><last>Yasuda</last></author>
-      <author><first>Hirofumi</first><last>Yamamoto</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="hirofumi-yamamoto"><first>Hirofumi</first><last>Yamamoto</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <url hash="3c7d8076">2007.mtsummit-ucnlg.5</url>
       <bibkey>yasuda-etal-2007-method</bibkey>
     </paper>
@@ -841,7 +841,7 @@
     </paper>
     <paper id="8">
       <title>Declarative syntactic processing of natural language using concurrent constraint programming and probabilistic dependency modeling</title>
-      <author><first>Irene</first><last>Langkilde-Geary</last></author>
+      <author id="irene-langkilde"><first>Irene</first><last>Langkilde-Geary</last></author>
       <url hash="afe830b9">2007.mtsummit-ucnlg.8</url>
       <bibkey>langkilde-geary-2007-declarative</bibkey>
     </paper>
@@ -853,7 +853,7 @@
     </paper>
     <paper id="10">
       <title>Evaluation of <fixed-case>NLG</fixed-case>: some analogies and differences with machine translation and reference resolution</title>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <url hash="24f53c11">2007.mtsummit-ucnlg.10</url>
       <bibkey>popescu-belis-2007-evaluation</bibkey>
     </paper>
@@ -894,7 +894,7 @@
     <paper id="16">
       <title><fixed-case>NIL</fixed-case>: attribute selection for matching the task corpus using relative attribute groupings obtained from the test data</title>
       <author><first>Raquel</first><last>Hervás</last></author>
-      <author><first>Pablo</first><last>Gervás</last></author>
+      <author id="pablo-gervas"><first>Pablo</first><last>Gervás</last></author>
       <url hash="0edcb30c">2007.mtsummit-ucnlg.16</url>
       <bibkey>hervas-gervas-2007-nil</bibkey>
     </paper>
@@ -913,10 +913,10 @@
     </paper>
     <paper id="19">
       <title>Cost-based attribute selection for <fixed-case>GRE</fixed-case> (<fixed-case>GRAPH</fixed-case>-<fixed-case>SC</fixed-case>/<fixed-case>GRAPH</fixed-case>-<fixed-case>FP</fixed-case>)</title>
-      <author><first>Mariët</first><last>Theune</last></author>
+      <author id="mariet-theune"><first>Mariët</first><last>Theune</last></author>
       <author><first>Pascal</first><last>Touset</last></author>
       <author><first>Jette</first><last>Viethen</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <url hash="3756290d">2007.mtsummit-ucnlg.19</url>
       <bibkey>theune-etal-2007-cost</bibkey>
     </paper>
@@ -930,7 +930,7 @@
     </paper>
     <paper id="21">
       <title>Content determination in <fixed-case>GRE</fixed-case>: evaluating the evaluator</title>
-      <author><first>Kees</first><last>van Deemter</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
       <author><first>Albert</first><last>Gatt</last></author>
       <url hash="fe8cc183">2007.mtsummit-ucnlg.21</url>
       <bibkey>van-deemter-gatt-2007-content</bibkey>
@@ -943,8 +943,8 @@
       <month>September 11</month>
       <year>2007</year>
       <editor><first>Gregor</first><last>Thurmair</last></editor>
-      <editor><first>Khalid</first><last>Choukri</last></editor>
-      <editor><first>Bente</first><last>Maegaard</last></editor>
+      <editor id="khalid-choukri"><first>Khalid</first><last>Choukri</last></editor>
+      <editor id="bente-maegaard"><first>Bente</first><last>Maegaard</last></editor>
       <venue>mtsummit</venue>
     </meta>
     <frontmatter>
@@ -953,7 +953,7 @@
     </frontmatter>
     <paper id="1">
       <title>The place of automatic evaluation metrics in external quality models for machine translation</title>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <attachment type="presentation" hash="1b519163">2007.mtsummit-aptme.1.Presentation.pdf</attachment>
       <bibkey>popescu-belis-2007-place</bibkey>
     </paper>
@@ -966,22 +966,22 @@
     </paper>
     <paper id="3">
       <title>Investigating why <fixed-case>BLEU</fixed-case> penalizes non-statistical systems</title>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <attachment type="presentation" hash="69fedb51">2007.mtsummit-aptme.3.Presentation.pdf</attachment>
       <bibkey>hovy-2007-investigating</bibkey>
     </paper>
     <paper id="4">
       <title>Linguistic resources in support of various evaluation metrics</title>
-      <author><first>Christopher</first><last>Cieri</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
-      <author><first>Meghan Lammie</first><last>Glenn</last></author>
+      <author id="christopher-cieri"><first>Christopher</first><last>Cieri</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
+      <author id="meghan-glenn"><first>Meghan Lammie</first><last>Glenn</last></author>
       <author><first>Lauren</first><last>Friedman</last></author>
       <attachment type="presentation" hash="a09c26e9">2007.mtsummit-aptme.4.Presentation.pdf</attachment>
       <bibkey>cieri-etal-2007-linguistic</bibkey>
     </paper>
     <paper id="5">
       <title>Experiences and conclusions from the <fixed-case>CESTA</fixed-case> evaluation project</title>
-      <author><first>Olivier</first><last>Hamon</last></author>
+      <author id="olivier-hamon"><first>Olivier</first><last>Hamon</last></author>
       <attachment type="presentation" hash="cc61d089">2007.mtsummit-aptme.5.Presentation.pdf</attachment>
       <bibkey>hamon-2007-experiences</bibkey>
     </paper>
@@ -994,15 +994,15 @@
     <paper id="7">
       <title>Sensitivity of automated models for <fixed-case>MT</fixed-case> evaluation: proximity-based vs. performance-based methods</title>
       <author><first>Bogdan</first><last>Babych</last></author>
-      <author><first>Anthony</first><last>Hartley</last></author>
+      <author id="anthony-hartley"><first>Anthony</first><last>Hartley</last></author>
       <attachment type="presentation" hash="2f95f7ab">2007.mtsummit-aptme.7.Presentation.pdf</attachment>
       <bibkey>babych-hartley-2007-sensitivity</bibkey>
     </paper>
     <paper id="8">
       <title><fixed-case>MT</fixed-case> evaluation &amp; <fixed-case>TC</fixed-case>-<fixed-case>STAR</fixed-case></title>
       <author><first>Khalid</first><last>Choukri</last></author>
-      <author><first>Olivier</first><last>Hamon</last></author>
-      <author><first>Djamel</first><last>Mostefa</last></author>
+      <author id="olivier-hamon"><first>Olivier</first><last>Hamon</last></author>
+      <author id="djamel-mostefa"><first>Djamel</first><last>Mostefa</last></author>
       <attachment type="presentation" hash="0d8488a9">2007.mtsummit-aptme.8.Presentation.pdf</attachment>
       <bibkey>choukri-etal-2007-mt</bibkey>
     </paper>
diff --git a/data/xml/2007.sigdial.xml b/data/xml/2007.sigdial.xml
index 721741b212..3022243df9 100644
--- a/data/xml/2007.sigdial.xml
+++ b/data/xml/2007.sigdial.xml
@@ -3,7 +3,7 @@
   <volume id="1" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 8th SIGdial Workshop on Discourse and Dialogue</booktitle>
-      <editor><first>Harry</first><last>Bunt</last></editor>
+      <editor id="harry-bunt"><first>Harry</first><last>Bunt</last></editor>
       <editor><first>Simon</first><last>Keizer</last></editor>
       <editor><first>Tim</first><last>Paek</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -30,7 +30,7 @@
     <paper id="2">
       <title>Collective States of Understanding</title>
       <author><first>Arash</first><last>Eshghi</last></author>
-      <author><first>Patrick G.T.</first><last>Healey</last></author>
+      <author id="patrick-healey"><first>Patrick G.T.</first><last>Healey</last></author>
       <pages>2–9</pages>
       <url hash="bd96db25">2007.sigdial-1.2</url>
       <bibkey>eshghi-healey-2007-collective</bibkey>
@@ -38,7 +38,7 @@
     </paper>
     <paper id="3">
       <title>Contrasting the Automatic Identification of Two Discourse Markers in Multiparty Dialogues</title>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <author><first>Sandrine</first><last>Zufferey</last></author>
       <pages>10–17</pages>
       <url hash="b59dd33d">2007.sigdial-1.3</url>
@@ -48,7 +48,7 @@
     <paper id="4">
       <title>Detecting and Summarizing Action Items in Multi-Party Dialogue</title>
       <author><first>Matthew</first><last>Purver</last></author>
-      <author><first>John</first><last>Dowding</last></author>
+      <author id="john-dowding"><first>John</first><last>Dowding</last></author>
       <author><first>John</first><last>Niekrasz</last></author>
       <author><first>Patrick</first><last>Ehlen</last></author>
       <author><first>Sharareh</first><last>Noorbaloochi</last></author>
@@ -62,7 +62,7 @@
       <title>Detecting Arguing and Sentiment in Meetings</title>
       <author><first>Swapna</first><last>Somasundaran</last></author>
       <author><first>Josef</first><last>Ruppenhofer</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <pages>26–34</pages>
       <url hash="305c0e67">2007.sigdial-1.5</url>
       <bibkey>somasundaran-etal-2007-detecting</bibkey>
@@ -71,7 +71,7 @@
     <paper id="6">
       <title>A Model of Compliance and Emotion for Potentially Adversarial Dialogue Agents</title>
       <author><first>Antonio</first><last>Roque</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <pages>35–38</pages>
       <url hash="bb631e91">2007.sigdial-1.6</url>
       <bibkey>roque-traum-2007-model</bibkey>
@@ -81,7 +81,7 @@
       <title>Acquiring and Evaluating a Dialog Corpus through a Dialog Simulation Technique</title>
       <author><first>David</first><last>Griol</last></author>
       <author><first>Lluis F.</first><last>Hurtado</last></author>
-      <author><first>Emilio</first><last>Sanchis</last></author>
+      <author id="emilio-sanchis"><first>Emilio</first><last>Sanchis</last></author>
       <author><first>Encarna</first><last>Segarra</last></author>
       <pages>39–42</pages>
       <url hash="2b334f71">2007.sigdial-1.7</url>
@@ -91,7 +91,7 @@
     <paper id="8">
       <title>An Empirical View on <fixed-case>IQA</fixed-case> Follow-up Questions</title>
       <author><first>Manuel</first><last>Kirschner</last></author>
-      <author><first>Raffaella</first><last>Bernardi</last></author>
+      <author id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last></author>
       <pages>43–46</pages>
       <url hash="0ea699de">2007.sigdial-1.8</url>
       <bibkey>kirschner-bernardi-2007-empirical</bibkey>
@@ -101,7 +101,7 @@
       <title>An Implemented Method for Distributed Collection and Assessment of Speech Data</title>
       <author><first>Alexander</first><last>Siebert</last></author>
       <author><first>David</first><last>Schlangen</last></author>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <pages>47–50</pages>
       <url hash="d17f55e2">2007.sigdial-1.9</url>
       <bibkey>siebert-etal-2007-implemented</bibkey>
@@ -110,7 +110,7 @@
     <paper id="10">
       <title>Beyond Repair – Testing the Limits of the Conversational Repair System</title>
       <author><first>David</first><last>Schlangen</last></author>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <pages>51–54</pages>
       <url hash="96d357c0">2007.sigdial-1.10</url>
       <bibkey>schlangen-fernandez-2007-beyond</bibkey>
@@ -140,7 +140,7 @@
       <title>Emergent Conversational Recommendations: A Dialogue Behavior Approach</title>
       <author><first>Pontus</first><last>Wärnestal</last></author>
       <author><first>Lars</first><last>Degerstedt</last></author>
-      <author><first>Arne</first><last>Jönsson</last></author>
+      <author id="arne-jonsson"><first>Arne</first><last>Jönsson</last></author>
       <pages>63–66</pages>
       <url hash="419fe00d">2007.sigdial-1.13</url>
       <bibkey>warnestal-etal-2007-emergent</bibkey>
@@ -156,13 +156,13 @@
     </paper>
     <paper id="15">
       <title><fixed-case>H</fixed-case>assan: A Virtual Human for Tactical Questioning</title>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <author><first>Antonio</first><last>Roque</last></author>
       <author><first>Anton</first><last>Leuski</last></author>
-      <author><first>Panayiotis</first><last>Georgiou</last></author>
+      <author id="panayiotis-georgiou"><first>Panayiotis</first><last>Georgiou</last></author>
       <author><first>Jillian</first><last>Gerten</last></author>
       <author><first>Bilyana</first><last>Martinovski</last></author>
-      <author><first>Shrikanth</first><last>Narayanan</last></author>
+      <author id="shrikanth-narayanan"><first>Shrikanth</first><last>Narayanan</last></author>
       <author><first>Susan</first><last>Robinson</last></author>
       <author><first>Ashish</first><last>Vaswani</last></author>
       <pages>71–74</pages>
@@ -193,7 +193,7 @@
       <author><first>Rohit</first><last>Mishra</last></author>
       <author><first>Feng</first><last>Lin</last></author>
       <author><first>Matthew</first><last>Purver</last></author>
-      <author><first>Harry</first><last>Bratt</last></author>
+      <author id="harry-bratt"><first>Harry</first><last>Bratt</last></author>
       <author><first>Yao</first><last>Meng</last></author>
       <author><first>Stanley</first><last>Peters</last></author>
       <author><first>Tobias</first><last>Scheideck</last></author>
@@ -208,7 +208,7 @@
       <title>Commute <fixed-case>UX</fixed-case>: Telephone Dialog System for Location-based Services</title>
       <author><first>Ivan</first><last>Tashev</last></author>
       <author><first>Michael</first><last>Seltzer</last></author>
-      <author><first>Yun-Cheng</first><last>Ju</last></author>
+      <author id="yun-cheng-ju"><first>Yun-Cheng</first><last>Ju</last></author>
       <author><first>Dong</first><last>Yu</last></author>
       <author><first>Alex</first><last>Acero</last></author>
       <pages>87–94</pages>
@@ -238,7 +238,7 @@
     <paper id="21">
       <title>Releasing a Multimodal Dialogue System into the Wild: User Support Mechanisms</title>
       <author><first>Alexander</first><last>Gruenstein</last></author>
-      <author><first>Stephanie</first><last>Seneff</last></author>
+      <author id="stephanie-seneff"><first>Stephanie</first><last>Seneff</last></author>
       <pages>111–119</pages>
       <url hash="336d53b0">2007.sigdial-1.21</url>
       <bibkey>gruenstein-seneff-2007-releasing</bibkey>
@@ -250,7 +250,7 @@
       <author><first>Yuka</first><last>Nagano</last></author>
       <author><first>Kotaro</first><last>Funakoshi</last></author>
       <author><first>Toshihiko</first><last>Ito</last></author>
-      <author><first>Kenji</first><last>Araki</last></author>
+      <author id="kenji-araki"><first>Kenji</first><last>Araki</last></author>
       <author><first>Yuji</first><last>Hasegawa</last></author>
       <author><first>Hiroshi</first><last>Tsujino</last></author>
       <pages>120–123</pages>
@@ -262,9 +262,9 @@
       <title>Comparing Spoken Dialog Corpora Collected with Recruited Subjects versus Real Users</title>
       <author><first>Hua</first><last>Ai</last></author>
       <author><first>Antoine</first><last>Raux</last></author>
-      <author><first>Dan</first><last>Bohus</last></author>
+      <author id="dan-bohus"><first>Dan</first><last>Bohus</last></author>
       <author><first>Maxine</first><last>Eskenazi</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <pages>124–131</pages>
       <url hash="935c4391">2007.sigdial-1.23</url>
       <bibkey>ai-etal-2007-comparing</bibkey>
@@ -282,7 +282,7 @@
     </paper>
     <paper id="25">
       <title>Referring under Restricted Interactivity Conditions</title>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <author><first>Tatjana</first><last>Lucht</last></author>
       <author><first>David</first><last>Schlangen</last></author>
       <pages>136–139</pages>
@@ -320,7 +320,7 @@
     <paper id="29">
       <title>Measuring Adaptation Between Dialogs</title>
       <author><first>Svetlana</first><last>Stenchikova</last></author>
-      <author><first>Amanda</first><last>Stent</last></author>
+      <author id="amanda-stent"><first>Amanda</first><last>Stent</last></author>
       <pages>166–173</pages>
       <url hash="50c4009b">2007.sigdial-1.29</url>
       <bibkey>stenchikova-stent-2007-measuring</bibkey>
@@ -347,8 +347,8 @@
     <paper id="32">
       <title>Experimental Modeling of Human-human Multi-threaded Dialogues in the Presence of a Manual-visual Task</title>
       <author><first>Alexander</first><last>Shyrokov</last></author>
-      <author><first>Andrew</first><last>Kun</last></author>
-      <author><first>Peter</first><last>Heeman</last></author>
+      <author id="andrew-l-kun"><first>Andrew</first><last>Kun</last></author>
+      <author id="peter-a-heeman"><first>Peter</first><last>Heeman</last></author>
       <pages>190–193</pages>
       <url hash="d08a010d">2007.sigdial-1.32</url>
       <bibkey>shyrokov-etal-2007-experimental</bibkey>
@@ -357,7 +357,7 @@
     <paper id="33">
       <title>Modeling Vocal Interaction for Text-Independent Classification of Conversation Type</title>
       <author><first>Kornel</first><last>Laskowski</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
       <author><first>Tanja</first><last>Schultz</last></author>
       <pages>194–201</pages>
       <url hash="608f3f35">2007.sigdial-1.33</url>
@@ -369,7 +369,7 @@
       <author><first>Kazunori</first><last>Komatani</last></author>
       <author><first>Yuichiro</first><last>Fukubayashi</last></author>
       <author><first>Tetsuya</first><last>Ogata</last></author>
-      <author><first>Hiroshi G.</first><last>Okuno</last></author>
+      <author id="hiroshi-g-okuno"><first>Hiroshi G.</first><last>Okuno</last></author>
       <pages>202–205</pages>
       <url hash="f7ceaaf1">2007.sigdial-1.34</url>
       <bibkey>komatani-etal-2007-introducing</bibkey>
@@ -385,7 +385,7 @@
     </paper>
     <paper id="36">
       <title>On the Training Data Requirements for an Automatic Dialogue Annotation Technique</title>
-      <author><first>Carlos D.</first><last>Martínez-Hinarejos</last></author>
+      <author id="carlos-d-martinez-hinarejos"><first>Carlos D.</first><last>Martínez-Hinarejos</last></author>
       <pages>211–214</pages>
       <url hash="5a5d7dde">2007.sigdial-1.36</url>
       <bibkey>martinez-hinarejos-2007-training</bibkey>
@@ -393,7 +393,7 @@
     </paper>
     <paper id="37">
       <title>Practical Dialogue Manager Development using <fixed-case>POMDP</fixed-case>s</title>
-      <author><first>Trung H.</first><last>Bui</last></author>
+      <author id="trung-bui"><first>Trung H.</first><last>Bui</last></author>
       <author><first>Boris</first><last>van Schooten</last></author>
       <author><first>Dennis</first><last>Hofs</last></author>
       <pages>215–218</pages>
@@ -404,7 +404,7 @@
     <paper id="38">
       <title>Problem-Sensitive Response Generation in Human-Robot Dialogs</title>
       <author><first>Petra</first><last>Gieselmann</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
       <pages>219–222</pages>
       <url hash="11c6dd61">2007.sigdial-1.38</url>
       <bibkey>gieselmann-ostendorf-2007-problem</bibkey>
@@ -423,7 +423,7 @@
       <author><first>Surabhi</first><last>Gupta</last></author>
       <author><first>John</first><last>Niekrasz</last></author>
       <author><first>Matthew</first><last>Purver</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <pages>227–230</pages>
       <url hash="8518753d">2007.sigdial-1.40</url>
       <bibkey>gupta-etal-2007-resolving</bibkey>
@@ -431,7 +431,7 @@
     </paper>
     <paper id="41">
       <title><fixed-case>SIDGRID</fixed-case>: A Framework for Distributed and Integrated Multimodal Annotation and Archiving and and Analysis</title>
-      <author><first>Gina-Anne</first><last>Levow</last></author>
+      <author id="gina-anne-levow"><first>Gina-Anne</first><last>Levow</last></author>
       <author><first>Bennett</first><last>Bertenthal</last></author>
       <author><first>Mark</first><last>Hereld</last></author>
       <author><first>Sarah</first><last>Kenny</last></author>
@@ -481,8 +481,8 @@
     </paper>
     <paper id="46">
       <title>Implicitly-supervised Learning in Spoken Language Interfaces: an Application to the Confidence Annotation Problem</title>
-      <author><first>Dan</first><last>Bohus</last></author>
-      <author><first>Alexander</first><last>Rudnicky</last></author>
+      <author id="dan-bohus"><first>Dan</first><last>Bohus</last></author>
+      <author id="alexander-rudnicky"><first>Alexander</first><last>Rudnicky</last></author>
       <pages>256–264</pages>
       <url hash="2014a1dc">2007.sigdial-1.46</url>
       <bibkey>bohus-rudnicky-2007-implicitly</bibkey>
@@ -490,7 +490,7 @@
     </paper>
     <paper id="47">
       <title>Planning Dialog Actions</title>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <author><first>Ronald</first><last>Petrick</last></author>
       <pages>265–272</pages>
       <url hash="0db7fbef">2007.sigdial-1.47</url>
@@ -501,7 +501,7 @@
       <title>Statistical User Simulation with a Hidden Agenda</title>
       <author><first>Jost</first><last>Schatzmann</last></author>
       <author><first>Blaise</first><last>Thomson</last></author>
-      <author><first>Steve</first><last>Young</last></author>
+      <author id="steve-young"><first>Steve</first><last>Young</last></author>
       <pages>273–282</pages>
       <url hash="8c39d12b">2007.sigdial-1.48</url>
       <bibkey>schatzmann-etal-2007-statistical</bibkey>
diff --git a/data/xml/2007.tal.xml b/data/xml/2007.tal.xml
index f50f91b076..95a5958d8c 100644
--- a/data/xml/2007.tal.xml
+++ b/data/xml/2007.tal.xml
@@ -13,17 +13,17 @@
     </meta>
     <paper id="1">
       <title>Principles of Evaluation in Natural Language Processing</title>
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <author><first>Stéphane</first><last>Chaudiron</last></author>
-      <author><first>Lynette</first><last>Hirschman</last></author>
+      <author id="lynette-hirschman"><first>Lynette</first><last>Hirschman</last></author>
       <pages>7–31</pages>
       <url hash="1a34edd8">2007.tal-1.1</url>
       <bibkey>paroubek-etal-2007-principles</bibkey>
     </paper>
     <paper id="2">
       <title>Pour l’évaluation externe des systèmes de <fixed-case>TA</fixed-case> par des méthodes fondées sur la tâche [For an external evaluation of <fixed-case>MT</fixed-case> systems by task-based methods]</title>
-      <author><first>Hervé</first><last>Blanchon</last></author>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="herve-blanchon"><first>Hervé</first><last>Blanchon</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <pages>33–65</pages>
       <url hash="ac7090e3">2007.tal-1.2</url>
       <language>fra</language>
@@ -31,7 +31,7 @@
     </paper>
     <paper id="3">
       <title>Le rôle des métriques d’évaluation dans le processus de recherche en <fixed-case>TAL</fixed-case> [The role of evaluation metrics in the <fixed-case>NLP</fixed-case> research process]</title>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <pages>67–91</pages>
       <url hash="9de90f58">2007.tal-1.3</url>
       <language>fra</language>
@@ -49,9 +49,9 @@
       <title><fixed-case>SIMDIAL</fixed-case> - Un paradigme pour évaluer automatiquement des systèmes de dialogue homme-machine en simulant un utilisateur de façon déterministe [<fixed-case>SIMDIAL</fixed-case> - A paradigm for the automatic evaluation of human-machine dialogue systems by deterministic simulation of a user]</title>
       <author><first>Joseph</first><last>Allemandou</last></author>
       <author><first>Laurent</first><last>Charnay</last></author>
-      <author><first>Laurence</first><last>Devillers</last></author>
+      <author id="laurence-devillers"><first>Laurence</first><last>Devillers</last></author>
       <author><first>Muriel</first><last>Lauvergne</last></author>
-      <author><first>Joseph</first><last>Mariani</last></author>
+      <author id="joseph-mariani"><first>Joseph</first><last>Mariani</last></author>
       <pages>115–139</pages>
       <url hash="d602bbd5">2007.tal-1.5</url>
       <language>fra</language>
@@ -76,7 +76,7 @@
     <paper id="8">
       <title>Prosodic Phrase Break Prediction: Problems in the Evaluation of Models against a Gold Standard</title>
       <author><first>Claire</first><last>Brierley</last></author>
-      <author><first>Eric</first><last>Atwell</last></author>
+      <author id="eric-atwell"><first>Eric</first><last>Atwell</last></author>
       <pages>187–206</pages>
       <url hash="10450a2a">2007.tal-1.8</url>
       <bibkey>brierley-atwell-2007-prediction</bibkey>
@@ -139,7 +139,7 @@
     <paper id="6">
       <title>Éléments pour adapter les systèmes de recherche d’information aux dyslexiques [Towards adapting information retrieval systems to dyslexic people]</title>
       <author><first>Lauriane</first><last>Sitbon</last></author>
-      <author><first>Patrice</first><last>Bellot</last></author>
+      <author id="patrice-bellot"><first>Patrice</first><last>Bellot</last></author>
       <author><first>Philippe</first><last>Blache</last></author>
       <pages>123–147</pages>
       <url hash="bf097379">2007.tal-2.6</url>
@@ -160,7 +160,7 @@
     <paper id="1">
       <title>Préface [Foreword]</title>
       <author><first>Christian</first><last>Cuxac</last></author>
-      <author><first>Patrice</first><last>Dalle</last></author>
+      <author id="patrice-dalle"><first>Patrice</first><last>Dalle</last></author>
       <pages>7–10</pages>
       <url hash="a3099234">2007.tal-3.1</url>
       <language>fra</language>
@@ -169,7 +169,7 @@
     <paper id="2">
       <title>Research Directions in Sign Language Processing</title>
       <author><first>Christian</first><last>Cuxac</last></author>
-      <author><first>Patrice</first><last>Dalle</last></author>
+      <author id="patrice-dalle"><first>Patrice</first><last>Dalle</last></author>
       <pages>15–30</pages>
       <url hash="f66d383a">2007.tal-3.2</url>
       <bibkey>cuxac-dalle-2007-research</bibkey>
@@ -212,7 +212,7 @@
     <paper id="7">
       <title>Description lexicale des signes — Intérêts linguistiques d’un modèle géométrique à dépendances [Lexical Description of Signs — Linguistic Benefits of a Geometric Dependency Model]</title>
       <author><first>Michael</first><last>Filhol</last></author>
-      <author><first>Annelies</first><last>Braffort</last></author>
+      <author id="annelies-braffort"><first>Annelies</first><last>Braffort</last></author>
       <pages>151–177</pages>
       <url hash="6d49f1c5">2007.tal-3.7</url>
       <language>fra</language>
@@ -221,7 +221,7 @@
     <paper id="8">
       <title>Modèles et méthodes de traitement d’images pour l’analyse de la langue des signes [Image processing models and methods for sign language analysis]</title>
       <author><first>Frédérick</first><last>Gianni</last></author>
-      <author><first>Christophe</first><last>Collet</last></author>
+      <author id="christophe-collet"><first>Christophe</first><last>Collet</last></author>
       <author><first>François</first><last>Lefebvre</last></author>
       <pages>175–200</pages>
       <url hash="4876b144">2007.tal-3.8</url>
diff --git a/data/xml/2007.tc.xml b/data/xml/2007.tc.xml
index 26f457ac3a..1a752495e5 100644
--- a/data/xml/2007.tc.xml
+++ b/data/xml/2007.tc.xml
@@ -12,7 +12,7 @@
     <paper id="1">
       <title>Making a sow’s ear out of a silk purse: (mis)using online <fixed-case>MT</fixed-case> services as bilingual dictionaries</title>
       <author><first>Federico</first><last>Gaspari</last></author>
-      <author><first>Harold</first><last>Somers</last></author>
+      <author id="harold-somers"><first>Harold</first><last>Somers</last></author>
       <url hash="56f296d1">2007.tc-1.1</url>
       <bibkey>gaspari-somers-2007-making</bibkey>
     </paper>
@@ -25,7 +25,7 @@
     <paper id="3">
       <title>A dynamic dictionary for discovering indirect translation equivalents</title>
       <author><first>Bogdan</first><last>Babych</last></author>
-      <author><first>Anthony</first><last>Hartley</last></author>
+      <author id="anthony-hartley"><first>Anthony</first><last>Hartley</last></author>
       <author><first>Serge</first><last>Sharoff</last></author>
       <url hash="6a9154f4">2007.tc-1.3</url>
       <bibkey>babych-etal-2007-dynamic</bibkey>
@@ -45,13 +45,13 @@
     </paper>
     <paper id="6">
       <title>Lost in specialised translation: the corpus as an inexpensive and under-exploited aid for language service providers</title>
-      <author><first>Gloria</first><last>Corpas Pastor</last></author>
+      <author id="gloria-corpas-pastor"><first>Gloria</first><last>Corpas Pastor</last></author>
       <url hash="b5c2ae2c">2007.tc-1.6</url>
       <bibkey>corpas-pastor-2007-lost</bibkey>
     </paper>
     <paper id="7">
       <title>Medical spoken language translation: What do the users really need?</title>
-      <author><first>Harold</first><last>Somers</last></author>
+      <author id="harold-somers"><first>Harold</first><last>Somers</last></author>
       <url hash="fd077b60">2007.tc-1.7</url>
       <bibkey>somers-2007-medical</bibkey>
     </paper>
@@ -70,7 +70,7 @@
     <paper id="10">
       <title>Building a bilingual dictionary from movie subtitles based on inter-lingual triggers</title>
       <author><first>Caroline</first><last>Lavecchia</last></author>
-      <author><first>Kamel</first><last>Smaili</last></author>
+      <author id="kamel-smaili"><first>Kamel</first><last>Smaili</last></author>
       <author><first>David</first><last>Langlois</last></author>
       <url hash="1d721727">2007.tc-1.10</url>
       <bibkey>lavecchia-etal-2007-building</bibkey>
diff --git a/data/xml/2007.tmi.xml b/data/xml/2007.tmi.xml
index 90ec9a27a2..5dc36df288 100644
--- a/data/xml/2007.tmi.xml
+++ b/data/xml/2007.tmi.xml
@@ -12,20 +12,20 @@
     </meta>
     <paper id="1">
       <title>Rule-based and statistical machine translation with a focus on <fixed-case>S</fixed-case>wedish</title>
-      <author><first>Anna Sågvall</first><last>Hein</last></author>
+      <author id="anna-sagvall-hein"><first>Anna Sågvall</first><last>Hein</last></author>
       <url hash="fc55a446">2007.tmi-plenaries.1</url>
       <bibkey>hein-2007-rule</bibkey>
     </paper>
     <paper id="2">
       <title>Statistical <fixed-case>MT</fixed-case> from <fixed-case>TMI</fixed-case>-1988 to <fixed-case>TMI</fixed-case>-2007: what has happened?</title>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <url hash="761f11cb">2007.tmi-plenaries.2</url>
       <attachment type="presentation" hash="eac62dd8">2007.tmi-plenaries.2.Presentation.pdf</attachment>
       <bibkey>ney-2007-statistical</bibkey>
     </paper>
     <paper id="3">
       <title>Is <fixed-case>MT</fixed-case> in crisis?</title>
-      <author><first>Steven</first><last>Krauwer</last></author>
+      <author id="steven-krauwer"><first>Steven</first><last>Krauwer</last></author>
       <url hash="3adde6e9">2007.tmi-plenaries.3</url>
       <attachment type="presentation" hash="4e4c02f3">2007.tmi-plenaries.3.Presentation.pdf</attachment>
       <bibkey>krauwer-2007-mt</bibkey>
@@ -38,7 +38,7 @@
       <month>September 7-9</month>
       <year>2007</year>
       <editor><first>Andy</first><last>Way</last></editor>
-      <editor><first>Barbara</first><last>Gawronska</last></editor>
+      <editor id="barbara-gawronska"><first>Barbara</first><last>Gawronska</last></editor>
       <venue>tmi</venue>
     </meta>
     <frontmatter>
@@ -48,8 +48,8 @@
     <paper id="1">
       <title>An assessment of language elicitation without the supervision of a linguist</title>
       <author><first>Alison</first><last>Alvarez</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
-      <author><first>Robert</first><last>Frederking</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
+      <author id="robert-frederking"><first>Robert</first><last>Frederking</last></author>
       <author><first>Jill</first><last>Lehman</last></author>
       <url hash="eb555b70">2007.tmi-papers.1</url>
       <attachment type="presentation" hash="222b28c9">2007.tmi-papers.1.Presentation.pdf</attachment>
@@ -59,7 +59,7 @@
       <title>Combining translation models in statistical machine translation</title>
       <author><first>Jesús</first><last>Andrés-Ferrer</last></author>
       <author><first>Ismael</first><last>Garcia-Varea</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <url hash="38fcca7b">2007.tmi-papers.2</url>
       <attachment type="presentation" hash="ab3fafed">2007.tmi-papers.2.Presentation.pdf</attachment>
       <bibkey>andres-ferrer-etal-2007-combining</bibkey>
@@ -109,7 +109,7 @@
     <paper id="8">
       <title>A new method for the study of correlations between <fixed-case>MT</fixed-case> evaluation metrics</title>
       <author><first>Paula</first><last>Estrella</last></author>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <author><first>Maghi</first><last>King</last></author>
       <url hash="e8f5d42c">2007.tmi-papers.8</url>
       <attachment type="presentation" hash="9b01f7c6">2007.tmi-papers.8.Presentation.pdf</attachment>
@@ -154,7 +154,7 @@
     </paper>
     <paper id="13">
       <title>A greedy decoder for phrase-based statistical machine translation</title>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <author><first>Alexandre</first><last>Patry</last></author>
       <author><first>Fabrizio</first><last>Gotti</last></author>
       <url hash="3317aa0b">2007.tmi-papers.13</url>
@@ -190,7 +190,7 @@
       <author><first>Eric</first><last>Nichols</last></author>
       <author><first>Francis</first><last>Bond</last></author>
       <author><first>Darren Scott</first><last>Appling</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <url hash="372a82b4">2007.tmi-papers.17</url>
       <attachment type="presentation" hash="95a2443d">2007.tmi-papers.17.Presentation.pdf</attachment>
       <bibkey>nichols-etal-2007-combining</bibkey>
@@ -202,15 +202,15 @@
       <author><first>Jan Tore</first><last>Lønning</last></author>
       <author><first>Paul</first><last>Meurer</last></author>
       <author><first>Victoria</first><last>Rosén</last></author>
-      <author><first>Dan</first><last>Flickinger</last></author>
+      <author id="dan-flickinger"><first>Dan</first><last>Flickinger</last></author>
       <url hash="b461782a">2007.tmi-papers.18</url>
       <bibkey>oepen-etal-2007-towards</bibkey>
     </paper>
     <paper id="19">
       <title>Reducing human assessment of machine translation quality to binary classifiers</title>
-      <author><first>Michael</first><last>Paul</last></author>
+      <author id="michael-paul"><first>Michael</first><last>Paul</last></author>
       <author><first>Andrew</first><last>Finch</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <url hash="ae48b74c">2007.tmi-papers.19</url>
       <attachment type="presentation" hash="015fb103">2007.tmi-papers.19.Presentation.pdf</attachment>
       <bibkey>paul-etal-2007-reducing</bibkey>
@@ -224,7 +224,7 @@
     <paper id="21">
       <title>Word reordering in statistical machine translation with a <fixed-case>POS</fixed-case>-based distortion model</title>
       <author><first>Kay</first><last>Rottmann</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <url hash="0fe2dfcd">2007.tmi-papers.21</url>
       <attachment type="presentation" hash="81ece79c">2007.tmi-papers.21.Presentation.pdf</attachment>
       <bibkey>rottmann-vogel-2007-word</bibkey>
@@ -232,15 +232,15 @@
     <paper id="22">
       <title>Automatic induction of shallow-transfer rules for open-source machine translation</title>
       <author><first>Felipe</first><last>Sánchez-Martínez</last></author>
-      <author><first>Mikel L.</first><last>Forcada</last></author>
+      <author id="mikel-l-forcada"><first>Mikel L.</first><last>Forcada</last></author>
       <url hash="bf7efe6d">2007.tmi-papers.22</url>
       <attachment type="presentation" hash="ed2393f6">2007.tmi-papers.22.Presentation.pdf</attachment>
       <bibkey>sanchez-martinez-forcada-2007-automatic</bibkey>
     </paper>
     <paper id="23">
       <title>Reordering via n-best lists for <fixed-case>S</fixed-case>panish-<fixed-case>B</fixed-case>asque translation</title>
-      <author><first>Germán</first><last>Sanchis</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="german-sanchis-trilles"><first>Germán</first><last>Sanchis</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <url hash="3562bd28">2007.tmi-papers.23</url>
       <attachment type="presentation" hash="2d6aad2a">2007.tmi-papers.23.Presentation.pdf</attachment>
       <bibkey>sanchis-casacuberta-2007-reordering</bibkey>
@@ -251,13 +251,13 @@
       <author><first>Vassiliki</first><last>Spilioti</last></author>
       <author><first>Marina</first><last>Vassiliou</last></author>
       <author><first>Olga</first><last>Yannoutsou</last></author>
-      <author><first>Stella</first><last>Markantonatou</last></author>
+      <author id="stella-markantonatou"><first>Stella</first><last>Markantonatou</last></author>
       <url hash="6ba4cedc">2007.tmi-papers.24</url>
       <bibkey>sofianopoulos-etal-2007-demonstration</bibkey>
     </paper>
     <paper id="25">
       <title>Theoretical and methodological issues regarding the use of language technologies for patients with limited <fixed-case>E</fixed-case>nglish proficiency</title>
-      <author><first>Harold</first><last>Somers</last></author>
+      <author id="harold-somers"><first>Harold</first><last>Somers</last></author>
       <url hash="b4db0c90">2007.tmi-papers.25</url>
       <attachment type="presentation" hash="5b58dc41">2007.tmi-papers.25.Presentation.pdf</attachment>
       <bibkey>somers-2007-theoretical</bibkey>
@@ -266,7 +266,7 @@
       <title>Hand in hand: automatic sign language to <fixed-case>E</fixed-case>nglish translation</title>
       <author><first>Daniel</first><last>Stein</last></author>
       <author><first>Philippe</first><last>Dreuw</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <author><first>Sara</first><last>Morrissey</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <url hash="b32ad39a">2007.tmi-papers.26</url>
@@ -284,7 +284,7 @@
     <paper id="28">
       <title>Exploiting source similarity for <fixed-case>SMT</fixed-case> using context-informed features</title>
       <author><first>Nicolas</first><last>Stroppa</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <url hash="4789491e">2007.tmi-papers.28</url>
       <attachment type="presentation" hash="c614d7f3">2007.tmi-papers.28.Presentation.pdf</attachment>
diff --git a/data/xml/2008.amta.xml b/data/xml/2008.amta.xml
index 45191f1b26..e484453e15 100644
--- a/data/xml/2008.amta.xml
+++ b/data/xml/2008.amta.xml
@@ -47,14 +47,14 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>S</fixed-case>panish-to-<fixed-case>B</fixed-case>asque <fixed-case>M</fixed-case>ulti<fixed-case>E</fixed-case>ngine Machine Translation for a Restricted Domain</title>
-      <author><first>Iñaki</first><last>Alegria</last></author>
-      <author><first>Arantza</first><last>Casillas</last></author>
-      <author><first>Arantza</first><last>Diaz de Ilarraza</last></author>
+      <author id="inaki-alegria"><first>Iñaki</first><last>Alegria</last></author>
+      <author id="arantza-casillas"><first>Arantza</first><last>Casillas</last></author>
+      <author id="arantza-diaz-de-ilarraza"><first>Arantza</first><last>Diaz de Ilarraza</last></author>
       <author><first>Jon</first><last>Igartua</last></author>
-      <author><first>Gorka</first><last>Labaka</last></author>
-      <author><first>Mikel</first><last>Lersundi</last></author>
-      <author><first>Aingeru</first><last>Mayor</last></author>
-      <author><first>Kepa</first><last>Sarasola</last></author>
+      <author id="gorka-labaka"><first>Gorka</first><last>Labaka</last></author>
+      <author id="mikel-lersundi"><first>Mikel</first><last>Lersundi</last></author>
+      <author id="aingeru-mayor"><first>Aingeru</first><last>Mayor</last></author>
+      <author id="kepa-sarasola"><first>Kepa</first><last>Sarasola</last></author>
       <url hash="0cffa517">2008.amta-papers.1</url>
       <pages>37-45</pages>
       <abstract>We present our initial strategy for Spanish-to-Basque MultiEngine Machine Translation, a language pair with very different structure and word order and with no huge parallel corpus available. This hybrid proposal is based on the combination of three different MT paradigms: Example-Based MT, Statistical MT and Rule- Based MT. We have evaluated the system, reporting automatic evaluation metrics for a corpus in a test domain. The first results obtained are encouraging.</abstract>
@@ -62,7 +62,7 @@
     </paper>
     <paper id="2">
       <title>Exploiting Document-Level Context for Data-Driven Machine Translation</title>
-      <author><first>Ralf</first><last>Brown</last></author>
+      <author id="ralf-d-brown"><first>Ralf</first><last>Brown</last></author>
       <url hash="bb46a737">2008.amta-papers.2</url>
       <pages>46-55</pages>
       <abstract>This paper presents a method for exploiting document-level similarity between the documents in the training corpus for a corpus-driven (statistical or example-based) machine translation system and the input documents it must translate. The method is simple to implement, efficient (increases the translation time of an example-based system by only a few percent), and robust (still works even when the actual document boundaries in the input text are not known). Experiments on French-English and Arabic-English showed relative gains over the same system without using document-level similarity of up to 7.4% and 5.4%, respectively, on the BLEU metric.</abstract>
@@ -90,8 +90,8 @@
     </paper>
     <paper id="5">
       <title>Translation universals: do they exist? A corpus-based <fixed-case>NLP</fixed-case> study of convergence and simplification</title>
-      <author><first>Gloria</first><last>Corpas Pastor</last></author>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="gloria-corpas-pastor"><first>Gloria</first><last>Corpas Pastor</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <author><first>Naveed</first><last>Afzal</last></author>
       <author><first>Viktor</first><last>Pekar</last></author>
       <url hash="d971f9b5">2008.amta-papers.5</url>
@@ -101,8 +101,8 @@
     </paper>
     <paper id="6">
       <title>Computing multiple weighted reordering hypotheses for a phrase-based statistical machine translation system</title>
-      <author><first>Marta R.</first><last>Costa-Jussà</last></author>
-      <author><first>José A. R.</first><last>Fonollosa</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-Jussà</last></author>
+      <author id="jose-a-r-fonollosa"><first>José A. R.</first><last>Fonollosa</last></author>
       <url hash="dbe2d619">2008.amta-papers.6</url>
       <pages>82-88</pages>
       <abstract>Reordering is one source of error in statistical machine translation (SMT). This paper extends the study of the statistical machine reordering (SMR) approach, which uses the powerful techniques of the SMT systems to solve reordering problems. Here, the novelties yield in: (1) using the SMR approach in a SMT phrase-based system, (2) adding a feature function in the SMR step, and (3) analyzing the reordering hypotheses at several stages. Coherent improvements are reported in the TC-STAR task (Es/En) at a relatively low computational cost.</abstract>
@@ -141,7 +141,7 @@
     <paper id="10">
       <title>A Generalized Reordering Model for Phrase-Based Statistical Machine Translation</title>
       <author><first>Yanqing</first><last>He</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <url hash="3f391a12">2008.amta-papers.10</url>
       <pages>117-124</pages>
       <abstract>Phrase-based translation models are widely studied in statistical machine translation (SMT). However, the existing phrase-based translation models either can not deal with non-contiguous phrases or reorder phrases only by the rules without an effective reordering model. In this paper, we propose a generalized reordering model (GREM) for phrase-based statistical machine translation, which is not only able to capture the knowledge on the local and global reordering of phrases, but also is able to obtain some capabilities of phrasal generalization by using non-contiguous phrases. The experimental results have indicated that our model out- performs MEBTG (enhanced BTG with a maximum entropy-based reordering model) and HPTM (hierarchical phrase-based translation model) by improvement of 1.54% and 0.66% in BLEU.</abstract>
@@ -159,7 +159,7 @@
     <paper id="12">
       <title>Large-scale Discriminative n-gram Language Models for Statistical Machine Translation</title>
       <author><first>Zhifei</first><last>Li</last></author>
-      <author><first>Sanjeev</first><last>Khudanpur</last></author>
+      <author id="sanjeev-khudanpur"><first>Sanjeev</first><last>Khudanpur</last></author>
       <url hash="25c77f42">2008.amta-papers.12</url>
       <pages>133-142</pages>
       <abstract>We extend discriminative n-gram language modeling techniques originally proposed for automatic speech recognition to a statistical machine translation task. In this context, we propose a novel data selection method that leads to good models using a fraction of the training data. We carry out systematic experiments on several benchmark tests for Chinese to English translation using a hierarchical phrase-based machine translation system, and show that a discriminative language model significantly improves upon a state-of-the-art baseline. The experiments also highlight the benefits of our data selection method.</abstract>
@@ -169,8 +169,8 @@
       <title>Are Multiple Reference Translations Necessary? Investigating the Value of Paraphrased Reference Translations in Parameter Optimization</title>
       <author><first>Nitin</first><last>Madnani</last></author>
       <author><first>Philip</first><last>Resnik</last></author>
-      <author><first>Bonnie J.</first><last>Dorr</last></author>
-      <author><first>Richard</first><last>Schwartz</last></author>
+      <author id="bonnie-dorr"><first>Bonnie J.</first><last>Dorr</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
       <url hash="fdf54fd8">2008.amta-papers.13</url>
       <pages>143-152</pages>
       <abstract>Most state-of-the-art statistical machine translation systems use log-linear models, which are defined in terms of hypothesis features and weights for those features. It is standard to tune the feature weights in order to maximize a translation quality metric, using held-out test sentences and their corresponding reference translations. However, obtaining reference translations is expensive. In our earlier work (Madnani et al., 2007), we introduced a new full-sentence paraphrase technique, based on English-to-English decoding with an MT system, and demonstrated that the resulting paraphrases can be used to cut the number of human reference translations needed in half. In this paper, we take the idea a step further, asking how far it is possible to get with just a single good reference translation for each item in the development set. Our analysis suggests that it is necessary to invest in four or more human translations in order to significantly improve on a single translation augmented by monolingual paraphrases.</abstract>
@@ -216,8 +216,8 @@
       <title>Wider Pipelines: N-Best Alignments and Parses in <fixed-case>MT</fixed-case> Training</title>
       <author><first>Ashish</first><last>Venugopal</last></author>
       <author><first>Andreas</first><last>Zollmann</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <url hash="dd223139">2008.amta-papers.18</url>
       <pages>192-201</pages>
       <abstract>State-of-the-art statistical machine translation systems use hypotheses from several maximum a posteriori inference steps, including word alignments and parse trees, to identify translational structure and estimate the parameters of translation models. While this approach leads to a modular pipeline of independently developed components, errors made in these “single-best” hypotheses can propagate to downstream estimation steps that treat these inputs as clean, trustworthy training data. In this work we integrate N-best alignments and parses by using a probability distribution over these alternatives to generate posterior fractional counts for use in downstream estimation. Using these fractional counts in a DOP-inspired syntax-based translation system, we show significant improvements in translation quality over a single-best trained baseline.</abstract>
@@ -226,9 +226,9 @@
     <paper id="19">
       <title>Improving <fixed-case>E</fixed-case>nglish-to-<fixed-case>C</fixed-case>hinese Translation for Technical Terms using Morphological Information</title>
       <author><first>Xianchao</first><last>Wu</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Takashi</first><last>Tsunakawa</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <url hash="8c613916">2008.amta-papers.19</url>
       <pages>202-211</pages>
       <abstract>The continuous emergence of new technical terms and the difficulty of keeping up with neologism in parallel corpora deteriorate the performance of statistical machine translation (SMT) systems. This paper explores the use of morphological information to improve English-to-Chinese translation for technical terms. To reduce the morpheme-level translation ambiguity, we group the morphemes into morpheme phrases and propose the use of domain information for translation candidate selection. In order to find correspondences of morpheme phrases between the source and target languages, we propose an algorithm to mine morpheme phrase translation pairs from a bilingual lexicon. We also build a cascaded translation model that dynamically shifts translation units from phrase level to word and morpheme phrase levels. The experimental results show the significant improvements over the current phrase-based SMT systems.</abstract>
@@ -236,10 +236,10 @@
     </paper>
     <paper id="20">
       <title>Mining the Web for Domain-Specific Translations</title>
-      <author><first>Jian-Cheng</first><last>Wu</last></author>
+      <author id="jian-cheng-wu"><first>Jian-Cheng</first><last>Wu</last></author>
       <author><first>Peter</first><last>Wei-Huai Hsu</last></author>
-      <author><first>Chiung-Hui</first><last>Tseng</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="chiung-hui-tseng"><first>Chiung-Hui</first><last>Tseng</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <url hash="57028b0a">2008.amta-papers.20</url>
       <pages>212-221</pages>
       <abstract>We introduce a method for learning to find domain-specific translations for a given term on the Web. In our approach, the source term is transformed into an expanded query aimed at maximizing the probability of retrieving translations from a very large collection of mixed-code documents. The method involves automatically generating sets of target-language words from training data in specific domains, automatically selecting target words for effectiveness in retrieving documents containing the sought-after translations. At run time, the given term is transformed into an expanded query and submitted to a search engine, and ranked translations are extracted from the document snippets returned by the search engine. We present a prototype, TermMine, which applies the method to a Web search engine. Evaluations over a set of domains and terms show that TermMine outperforms state-of-the-art machine translation systems.</abstract>
@@ -248,7 +248,7 @@
     <paper id="21">
       <title>Two-Stage Translation: A Combined Linguistic and Statistical Machine Translation Framework</title>
       <author><first>Yushi</first><last>Xu</last></author>
-      <author><first>Stephanie</first><last>Seneff</last></author>
+      <author id="stephanie-seneff"><first>Stephanie</first><last>Seneff</last></author>
       <url hash="24f8a916">2008.amta-papers.21</url>
       <pages>222-231</pages>
       <abstract>We propose a two-stage system for spoken language machine translation. In the first stage, the source sentence is parsed and paraphrased into an intermediate language which retains the words in the source language but follows the word order of the target language as much as feasible. This stage is mostly linguistic. In the second stage, a statistical MT is performed to translate the intermediate language into the target language. For the task of English-to-Mandarin translation, we achieved a 2.5 increase in BLEU score and a 45% decrease in GIZA-Alignment Crossover, on IWSLT-06 data. In a human evaluation of the sentences that differed, the two-stage system was preferred three times as often as the baseline.</abstract>
@@ -267,7 +267,7 @@
     <paper id="1">
       <title>Improving Syntax-Driven Translation Models by Re-structuring Divergent and Nonisomorphic Parse Tree Structures</title>
       <author><first>Vamshi</first><last>Ambati</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <url hash="6988a07d">2008.amta-srw.1</url>
       <pages>235-244</pages>
       <abstract>Syntax-based approaches to statistical MT require syntax-aware methods for acquiring their underlying translation models from parallel data. This acquisition process can be driven by syntactic trees for either the source or target language, or by trees on both sides. Work to date has demonstrated that using trees for both sides suffers from severe coverage problems. This is primarily due to the highly restrictive space of constituent segmentations that the trees on two sides introduce, which adversely affects the recall of the resulting translation models. Approaches that project from trees on one side, on the other hand, have higher levels of recall, but suffer from lower precision, due to the lack of syntactically-aware word alignments. In this paper we explore the issue of lexical coverage of the translation models learned in both of these scenarios. We specifically look at how the non-isomorphic nature of the parse trees for the two languages affects recall and coverage. We then propose a novel technique for restructuring target parse trees, that generates highly isomorphic target trees that preserve the syntactic boundaries of constituents that were aligned in the original parse trees. We evaluate the translation models learned from these restructured trees and show that they are significantly better than those learned using trees on both sides and trees on one side.</abstract>
@@ -275,7 +275,7 @@
     </paper>
     <paper id="2">
       <title>Using Bilingual <fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish Word Alignments to Resolve <fixed-case>PP</fixed-case>-attachment Ambiguity in <fixed-case>E</fixed-case>nglish</title>
-      <author><first>Victoria</first><last>Fossum</last></author>
+      <author id="victoria-fossum"><first>Victoria</first><last>Fossum</last></author>
       <author><first>Kevin</first><last>Knight</last></author>
       <url hash="55c8d228">2008.amta-srw.2</url>
       <pages>245-253</pages>
@@ -284,8 +284,8 @@
     </paper>
     <paper id="3">
       <title>Combination of Machine Translation Systems via Hypothesis Selection from Combined N-Best Lists</title>
-      <author><first>Almut Silja</first><last>Hildebrand</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="almut-silja-hildebrand"><first>Almut Silja</first><last>Hildebrand</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <url hash="fcefbb7e">2008.amta-srw.3</url>
       <pages>254-261</pages>
       <abstract>Different approaches in machine translation achieve similar translation quality with a variety of translations in the output. Recently it has been shown, that it is possible to leverage the individual strengths of various systems and improve the overall translation quality by combining translation outputs. In this paper we present a method of hypothesis selection which is relatively simple compared to system combination methods which construct a synthesis of the input hypotheses. Our method uses information from n-best lists from several MT systems and features on the sentence level which are independent from the MT systems involved to improve the translation quality.</abstract>
@@ -302,8 +302,8 @@
     <paper id="5">
       <title>Diacritization as a Machine Translation and as a Sequence Labeling Problem</title>
       <author><first>Tim</first><last>Schlippe</last></author>
-      <author><first>ThuyLinh</first><last>Nguyen</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="thuylinh-nguyen"><first>ThuyLinh</first><last>Nguyen</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <url hash="e3e9cdf4">2008.amta-srw.5</url>
       <pages>270-278</pages>
       <abstract>In this paper we describe and compare two techniques for the automatic diacritization of Arabic text: First, we treat diacritization as a monotone machine translation problem, proposing and evaluating several translation and language models, including word and character-based models separately and combined as well as a model which uses statistical machine translation (SMT) to post-edit a rule-based diacritization system. Then we explore a more traditional view of diacritization as a sequence labeling problem, and propose a solution using conditional random fields (Lafferty et al., 2001). All these techniques are compared through word error rate and diacritization error rate both in terms of full diacritization and ignoring vowel endings. The empirical experiments showed that the machine translation approaches perform better than the sequence labeling approaches concerning the error rates.</abstract>
@@ -363,12 +363,11 @@
       <title>Many-to-Many Multilingual Medical Speech Translation on a <fixed-case>PDA</fixed-case></title>
       <author><first>Kyoko</first><last>Kanzaki</last></author>
       <author><first>Yukie</first><last>Nakao</last></author>
-      <author><first>Manny</first><last>Rayner</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
       <author><first>Marianne</first><last>Santaholma</last></author>
       <author><first>Marianne</first><last>Starlander</last></author>
       <author><first>Nikos</first><last>Tsourakis</last></author>
       <url hash="a8019ed7">2008.amta-govandcom.4</url>
-      <pages/>
       <abstract>Particularly considering the requirement of high reliability, we argue that the most appropriate architecture for a medical speech translator that can be realised using today’s technology combines unidirectional (doctor to patient) translation, medium-vocabulary controlled language coverage, interlingua-based translation, an embedded help component, and deployability on a hand-held hardware platform. We present an overview of the Open Source MedSLT prototype, which has been developed in accordance with these design principles. The system is implemented on top of the Regulus and Nuance 8.5 platforms, translates patient examination questions for all language pairs in the set {English, French, Japanese, Arabic, Catalan}, using vocabularies of about 400 to 1 100 words, and can be run in a distributed client/server environment, where the client application is hosted on a Nokia Internet Tablet device.</abstract>
       <bibkey>kanzaki-etal-2008-many</bibkey>
     </paper>
@@ -400,8 +399,8 @@
     </paper>
     <paper id="8">
       <title>Automated Machine Translation Improvement Through Post-Editing Techniques: Analyst and Translator Experiments</title>
-      <author><first>Jennifer</first><last>Doyon</last></author>
-      <author><first>Christine</first><last>Doran</last></author>
+      <author id="jennifer-doyon"><first>Jennifer</first><last>Doyon</last></author>
+      <author id="christine-doran"><first>Christine</first><last>Doran</last></author>
       <author><first>C. Donald</first><last>Means</last></author>
       <author><first>Domenique</first><last>Parr</last></author>
       <url hash="6e178776">2008.amta-govandcom.8</url>
@@ -412,7 +411,7 @@
     <paper id="9">
       <title>User-centered <fixed-case>MT</fixed-case> Development and Implementation</title>
       <author><first>Kathleen</first><last>Egan</last></author>
-      <author><first>Francis</first><last>Kubala</last></author>
+      <author id="francis-kubala"><first>Francis</first><last>Kubala</last></author>
       <author><first>Allen</first><last>Sears</last></author>
       <url hash="a37af409">2008.amta-govandcom.9</url>
       <pages>354-363</pages>
@@ -421,7 +420,7 @@
     <paper id="10">
       <title>Identifying Common Challenges for Human and Machine Translation: A Case Study from the <fixed-case>GALE</fixed-case> Program</title>
       <author><first>Lauren</first><last>Friedman</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <url hash="61e51219">2008.amta-govandcom.10</url>
       <pages>364-369</pages>
       <abstract>The dramatic improvements shown by statistical machine translation systems in recent years clearly demonstrate the benefits of having large quantities of manually translated parallel text for system training and development. And while many competing evaluation metrics exist to evaluate MT technology, most of those methods also crucially rely on the existence of one or more high quality human translations to benchmark system performance. Given the importance of human translations in this framework, understanding the particular challenges of human translation-for-MT is key, as is comprehending the relative strengths and weaknesses of human versus machine translators in the context of an MT evaluation. Vanni (2000) argued that the metric used for evaluation of competence in human language learners may be applicable to MT evaluation; we apply similar thinking to improve the prediction of MT performance, which is currently unreliable. In the current paper we explore an alternate model based upon a set of genre-defining features that prove to be consistently challenging for both humans and MT systems.</abstract>
@@ -441,7 +440,7 @@
     <paper id="12">
       <title>Designing and executing <fixed-case>MT</fixed-case> workflows through the Kepler Framework</title>
       <author><first>Reginald</first><last>Hobbs</last></author>
-      <author><first>Clare</first><last>Voss</last></author>
+      <author id="clare-voss"><first>Clare</first><last>Voss</last></author>
       <url hash="6ff09da7">2008.amta-govandcom.12</url>
       <pages>380-389</pages>
       <bibkey>hobbs-voss-2008-designing</bibkey>
@@ -565,8 +564,8 @@
     </paper>
     <paper id="26">
       <title>Applications of <fixed-case>MT</fixed-case> during Olympic Games 2008</title>
-      <author><first>Chengqing</first><last>Zong</last></author>
-      <author><first>Heyan</first><last>Huang</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last></author>
       <author><first>Shuming</first><last>Shi</last></author>
       <url hash="17ccaae6">2008.amta-govandcom.26</url>
       <pages>470-479</pages>
diff --git a/data/xml/2008.eamt.xml b/data/xml/2008.eamt.xml
index 9f3cc797c1..71302b37f8 100644
--- a/data/xml/2008.eamt.xml
+++ b/data/xml/2008.eamt.xml
@@ -7,9 +7,9 @@
       <address>Hamburg, Germany</address>
       <month>September 22-23</month>
       <year>2008</year>
-      <editor><first>John</first><last>Hutchins</last></editor>
-      <editor><first>Walther</first><last>v. Hahn</last></editor>
-      <editor><first>Bente</first><last>Maegaard</last></editor>
+      <editor id="w-john-hutchins"><first>John</first><last>Hutchins</last></editor>
+      <editor id="walther-von-hahn"><first>Walther</first><last>v. Hahn</last></editor>
+      <editor id="bente-maegaard"><first>Bente</first><last>Maegaard</last></editor>
       <editor><first>John</first><last>Hutchins</last></editor>
       <venue>eamt</venue>
     </meta>
@@ -58,10 +58,10 @@
     </paper>
     <paper id="6">
       <title>Hybrid machine translation architectures within and beyond the <fixed-case>E</fixed-case>uro<fixed-case>M</fixed-case>atrix project</title>
-      <author><first>Andreas</first><last>Eisele</last></author>
+      <author id="andreas-eisele"><first>Andreas</first><last>Eisele</last></author>
       <author><first>Christian</first><last>Federmann</last></author>
       <author><first>Hans</first><last>Uszkoreit</last></author>
-      <author><first>Hervé</first><last>Saint-Amand</last></author>
+      <author id="herve-saint-amand"><first>Hervé</first><last>Saint-Amand</last></author>
       <author><first>Martin</first><last>Kay</last></author>
       <author><first>Michael</first><last>Jellinghaus</last></author>
       <author><first>Sabine</first><last>Hunsicker</last></author>
@@ -83,17 +83,17 @@
     <paper id="8">
       <title>A finite-state framework for log-linear models in machine translation</title>
       <author><first>Jorge</first><last>González</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <pages>41-46</pages>
       <url hash="e4e67fcb">2008.eamt-1.8</url>
       <bibkey>gonzalez-casacuberta-2008-finite</bibkey>
     </paper>
     <paper id="9">
       <title>A novel alignment model inspired on <fixed-case>IBM</fixed-case> Model 1</title>
-      <author><first>Jesús</first><last>González-Rubio</last></author>
-      <author><first>Germán</first><last>Sanchis-Trilles</last></author>
-      <author><first>Alfons</first><last>Juan</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="jesus-gonzalez-rubio"><first>Jesús</first><last>González-Rubio</last></author>
+      <author id="german-sanchis-trilles"><first>Germán</first><last>Sanchis-Trilles</last></author>
+      <author id="alfons-juan"><first>Alfons</first><last>Juan</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <pages>47-56</pages>
       <url hash="ebf0e447">2008.eamt-1.9</url>
       <bibkey>gonzalez-rubio-etal-2008-novel</bibkey>
@@ -101,7 +101,7 @@
     <paper id="10">
       <title>Packed rules for automatic transfer-rule induction</title>
       <author><first>Yvette</first><last>Graham</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>57-65</pages>
       <url hash="27f1bb01">2008.eamt-1.10</url>
       <bibkey>graham-van-genabith-2008-packed</bibkey>
@@ -117,7 +117,7 @@
     <paper id="12">
       <title>Improving machine translation between closely related <fixed-case>R</fixed-case>omance languages</title>
       <author><first>Petr</first><last>Homola</last></author>
-      <author><first>Vladislav</first><last>Kuboň</last></author>
+      <author id="vladislav-kubon"><first>Vladislav</first><last>Kuboň</last></author>
       <pages>72-77</pages>
       <url hash="bed0ec56">2008.eamt-1.12</url>
       <bibkey>homola-kubon-2008-improving</bibkey>
@@ -132,8 +132,8 @@
     </paper>
     <paper id="14">
       <title>Applying boosting to statistical machine translation</title>
-      <author><first>Antonio L.</first><last>Lagarda</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="antonio-l-lagarda"><first>Antonio L.</first><last>Lagarda</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <pages>88-96</pages>
       <url hash="053d93c0">2008.eamt-1.14</url>
       <bibkey>lagarda-casacuberta-2008-applying</bibkey>
@@ -141,7 +141,7 @@
     <paper id="15">
       <title>Word association models and search strategies for discriminative word alignment</title>
       <author><first>Patrik</first><last>Lambert</last></author>
-      <author><first>Rafael E.</first><last>Banchs</last></author>
+      <author id="rafael-e-banchs"><first>Rafael E.</first><last>Banchs</last></author>
       <pages>97-103</pages>
       <url hash="17762cc0">2008.eamt-1.15</url>
       <bibkey>lambert-banchs-2008-word</bibkey>
@@ -149,7 +149,7 @@
     <paper id="16">
       <title>Automatic alignment of <fixed-case>C</fixed-case>zech and <fixed-case>E</fixed-case>nglish deep syntactic dependency trees</title>
       <author><first>David</first><last>Mareček</last></author>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
       <author><first>Václav</first><last>Novák</last></author>
       <pages>103-113</pages>
       <url hash="36aa3949">2008.eamt-1.16</url>
@@ -159,7 +159,7 @@
       <title>Explorations in using grammatical dependencies for contextual phrase translation disambiguation</title>
       <author><first>Aurélien</first><last>Max</last></author>
       <author><first>Rafik</first><last>Makhloufi</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <pages>114-119</pages>
       <url hash="21b16ada">2008.eamt-1.17</url>
       <bibkey>max-etal-2008-explorations</bibkey>
@@ -201,16 +201,16 @@
     </paper>
     <paper id="22">
       <title>Phrase-level alignment generation using a smoothed loglinear phrase-based statistical alignment model</title>
-      <author><first>Daniel</first><last>Ortiz-Martínez</last></author>
-      <author><first>Ismael</first><last>García-Varea</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="daniel-ortiz-martinez"><first>Daniel</first><last>Ortiz-Martínez</last></author>
+      <author id="ismael-garcia-varea"><first>Ismael</first><last>García-Varea</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <pages>160-169</pages>
       <url hash="76819b36">2008.eamt-1.22</url>
       <bibkey>ortiz-martinez-etal-2008-phrase</bibkey>
     </paper>
     <paper id="23">
       <title>Learning context-sensitive synchronous rules</title>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>170-175</pages>
       <url hash="e968ae7f">2008.eamt-1.23</url>
       <bibkey>sogaard-2008-learning</bibkey>
@@ -218,9 +218,9 @@
     <paper id="24">
       <title>Comparing two different bidirectional versions of the limited-domain medical spoken language translator <fixed-case>M</fixed-case>ed<fixed-case>SLT</fixed-case></title>
       <author><first>Marianne</first><last>Starlander</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Glenn</first><last>Flores</last></author>
-      <author><first>Manny</first><last>Rayner</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
       <author><first>Nikos</first><last>Tsourakis</last></author>
       <pages>176-181</pages>
       <url hash="a927b5bc">2008.eamt-1.24</url>
@@ -236,7 +236,7 @@
     </paper>
     <paper id="26">
       <title>Boosting performance of weak <fixed-case>MT</fixed-case> engines automatically: using <fixed-case>MT</fixed-case> output to align segments &amp; build statistical post-editors</title>
-      <author><first>Clare R.</first><last>Voss</last></author>
+      <author id="clare-voss"><first>Clare R.</first><last>Voss</last></author>
       <author><first>Matthew</first><last>Aguirre</last></author>
       <author><first>Jeffrey</first><last>Micher</last></author>
       <author><first>Richard</first><last>Chang</last></author>
diff --git a/data/xml/2008.iwslt.xml b/data/xml/2008.iwslt.xml
index 5d4b34fd99..70ab389ad1 100644
--- a/data/xml/2008.iwslt.xml
+++ b/data/xml/2008.iwslt.xml
@@ -25,7 +25,7 @@
     </meta>
     <paper id="1">
       <title>Overview of the <fixed-case>IWSLT</fixed-case> 2008 evaluation campaign.</title>
-      <author><first>Michael</first><last>Paul</last></author>
+      <author id="michael-paul"><first>Michael</first><last>Paul</last></author>
       <pages>1-7</pages>
       <url hash="e940dec9">2008.iwslt-evaluation.1</url>
       <abstract>This paper gives an overview of the evaluation campaign results of the International1Workshop on Spoken Language Translation (IWSLT) 2008 . In this workshop, we focused on the translation of spontaneous speech recorded in a real situation and the feasability of pivot-language-based translation approaches. The translation directions were English into Chinese and vice versa for the Challenge Task, Chinese into English and English into Spanish for the Pivot Task, and Arabic, Chinese, Spanish into English for the standard BTEC Task. In total, 19 research groups building 58 MT engines participated in this year’s event. Automatic and subjective evaluations were carried out in order to investigate the impact of spontaneity aspects of field data experiments on automatic speech recognition (ASR) and machine translation (MT) system performance as well as the robustness of state-of-the-art MT systems towards speech-to-speech translation in real environments.</abstract>
@@ -35,7 +35,7 @@
       <title>The <fixed-case>CMU</fixed-case> syntax-augmented machine translation system: <fixed-case>SAMT</fixed-case> on Hadoop with n-best alignments.</title>
       <author><first>Andreas</first><last>Zollmann</last></author>
       <author><first>Ashish</first><last>Venugopal</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>18-25</pages>
       <url hash="c78aab4c">2008.iwslt-evaluation.2</url>
       <abstract>We present the CMU Syntax Augmented Machine Translation System that was used in the IWSLT-08 evaluation campaign. We participated in the Full-BTEC data track for Chinese-English translation, focusing on transcript translation. For this year’s evaluation, we ported the Syntax Augmented MT toolkit [1] to the Hadoop MapReduce [2] parallel processing architecture, allowing us to efficiently run experiments evaluating a novel “wider pipelines” approach to integrate evidence from N -best alignments into our translation models. We describe each step of the MapReduce pipeline as it is implemented in the open-source SAMT toolkit, and show improvements in translation quality by using N-best alignments in both hierarchical and syntax augmented translation systems.</abstract>
@@ -45,7 +45,7 @@
       <title>Exploiting alignment techniques in <fixed-case>MATREX</fixed-case>: the <fixed-case>DCU</fixed-case> machine translation system for <fixed-case>IWSLT</fixed-case> 2008.</title>
       <author><first>Yanjun</first><last>Ma</last></author>
       <author><first>John</first><last>Tinsley</last></author>
-      <author><first>Hany</first><last>Hassan</last></author>
+      <author id="hany-hassan-awadalla"><first>Hany</first><last>Hassan</last></author>
       <author><first>Jinhua</first><last>Du</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <pages>26-33</pages>
@@ -78,9 +78,9 @@
     <paper id="6">
       <title><fixed-case>I</fixed-case>2<fixed-case>R</fixed-case> multi-pass machine translation system for <fixed-case>IWSLT</fixed-case> 2008.</title>
       <author><first>Boxing</first><last>Chen</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Min</first><last>Zhang</last></author>
-      <author><first>Aiti</first><last>Aw</last></author>
+      <author id="aiti-aw"><first>Aiti</first><last>Aw</last></author>
       <author><first>Haizhou</first><last>Li</last></author>
       <pages>46-51</pages>
       <url hash="88c34853">2008.iwslt-evaluation.6</url>
@@ -95,7 +95,7 @@
       <author><first>Yun</first><last>Huang</last></author>
       <author><first>Yang</first><last>Feng</last></author>
       <author><first>Wenbin</first><last>Jiang</last></author>
-      <author><first>Yajuan</first><last>Lu</last></author>
+      <author id="yajuan-lu"><first>Yajuan</first><last>Lu</last></author>
       <author><first>Qun</first><last>Liu</last></author>
       <pages>52-57</pages>
       <url hash="5b5f0252">2008.iwslt-evaluation.7</url>
@@ -126,7 +126,7 @@
       <title>The <fixed-case>MIT</fixed-case>-<fixed-case>LL</fixed-case>/<fixed-case>AFRL</fixed-case> <fixed-case>IWSLT</fixed-case>-2008 <fixed-case>MT</fixed-case> system.</title>
       <author><first>Wade</first><last>Shen</last></author>
       <author><first>Brian</first><last>Delaney</last></author>
-      <author><first>Tim</first><last>Anderson</last></author>
+      <author id="tim-anderson"><first>Tim</first><last>Anderson</last></author>
       <author><first>Ray</first><last>Slyh</last></author>
       <pages>69-76</pages>
       <url hash="aea121a4">2008.iwslt-evaluation.10</url>
@@ -138,11 +138,11 @@
       <author><first>Masao</first><last>Utiyama</last></author>
       <author><first>Andrew</first><last>Finch</last></author>
       <author><first>Hideo</first><last>Okuma</last></author>
-      <author><first>Michael</first><last>Paul</last></author>
+      <author id="michael-paul"><first>Michael</first><last>Paul</last></author>
       <author><first>Hailong</first><last>Cao</last></author>
-      <author><first>Hirofumi</first><last>Yamamoto</last></author>
+      <author id="hirofumi-yamamoto"><first>Hirofumi</first><last>Yamamoto</last></author>
       <author><first>Keiji</first><last>Yasuda</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>77-84</pages>
       <url hash="f4a98099">2008.iwslt-evaluation.11</url>
       <abstract>This paper describes the National Institute of Information and Communications Technology/Advanced Telecommunications Research Institute International (NICT/ATR) statistical machine translation (SMT) system used for the IWSLT 2008 evaluation campaign. We participated in the Chinese–English (Challenge Task), English–Chinese (Challenge Task), Chinese–English (BTEC Task), Chinese–Spanish (BTEC Task), and Chinese–English–Spanish (PIVOT Task) translation tasks. In the English–Chinese translation Challenge Task, we focused on exploring various factors for the English–Chinese translation because the research on the translation of English–Chinese is scarce compared to the opposite direction. In the Chinese–English translation Challenge Task, we employed a novel clustering method, where training sentences similar to the development data in terms of the word error rate formed a cluster. In the pivot translation task, we integrated two strategies for pivot translation by linear interpolation.</abstract>
@@ -156,7 +156,7 @@
       <author><first>Licheng</first><last>Fang</last></author>
       <author><first>Yufeng</first><last>Chen</last></author>
       <author><first>Yu</first><last>Zhou</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>85-91</pages>
       <url hash="bffcea9d">2008.iwslt-evaluation.12</url>
       <abstract>This paper describes our statistical machine translation system (CASIA) used in the evaluation campaign of the International Workshop on Spoken Language Translation (IWSLT) 2008. In this year's evaluation, we participated in challenge task for Chinese-English and English-Chinese, BTEC task for Chinese-English. Here, we mainly introduce the overview of our system, the primary modules, the key techniques, and the evaluation results.</abstract>
@@ -177,7 +177,7 @@
     <paper id="14">
       <title><fixed-case>POSTECH</fixed-case> machine translation system for <fixed-case>IWSLT</fixed-case> 2008 evaluation campaign.</title>
       <author><first>Jonghoon</first><last>Lee</last></author>
-      <author><first>Gary Geunbae</first><last>Lee</last></author>
+      <author id="gary-geunbae-lee"><first>Gary Geunbae</first><last>Lee</last></author>
       <pages>98-103</pages>
       <url hash="25b088fd">2008.iwslt-evaluation.14</url>
       <abstract>In this paper, we describe POSTECH system for IWSLT 2008 evaluation campaign. The system is based on phrase based statistical machine translation. We set up a baseline system using well known freely available software. A preprocessing method and a language modeling method have been applied to the baseline system in order to improve machine translation quality. The preprocessing method is to identify and remove useless tokens in source texts. And the language modeling method models phrase level n-gram. We have participated in the BTEC tasks to see the effects of our methods.</abstract>
@@ -202,7 +202,7 @@
       <author><first>Arne</first><last>Mauser</last></author>
       <author><first>Oliver</first><last>Bender</last></author>
       <author><first>Saab</first><last>Mansour</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>108-115</pages>
       <url hash="b456c89b">2008.iwslt-evaluation.16</url>
       <abstract>RWTH’s system for the 2008 IWSLT evaluation consists of a combination of different phrase-based and hierarchical statistical machine translation systems. We participated in the translation tasks for the Chinese-to-English and Arabic-to-English language pairs. We investigated different preprocessing techniques, reordering methods for the phrase-based system, including reordering of speech lattices, and syntax-based enhancements for the hierarchical systems. We also tried the combination of the Arabic-to-English and Chinese-to-English outputs as an additional submission.</abstract>
@@ -213,13 +213,13 @@
       <author><first>Maxim</first><last>Khalilov</last></author>
       <author><first>Maria R.</first><last>Costa-jussà</last></author>
       <author><first>Carlos A. Henríquez</first><last>Q.</last></author>
-      <author><first>José A. R.</first><last>Fonollosa</last></author>
+      <author id="jose-a-r-fonollosa"><first>José A. R.</first><last>Fonollosa</last></author>
       <author><first>Adolfo Hernández</first><last>H.</last></author>
-      <author><first>José B.</first><last>Mariño</last></author>
-      <author><first>Rafael E.</first><last>Banchs</last></author>
+      <author id="jose-b-marino"><first>José B.</first><last>Mariño</last></author>
+      <author id="rafael-e-banchs"><first>Rafael E.</first><last>Banchs</last></author>
       <author><first>Chen</first><last>Boxing</last></author>
       <author><first>Min</first><last>Zhang</last></author>
-      <author><first>Aiti</first><last>Aw</last></author>
+      <author id="aiti-aw"><first>Aiti</first><last>Aw</last></author>
       <author><first>Haizhou</first><last>Li</last></author>
       <pages>116-123</pages>
       <url hash="0ca95440">2008.iwslt-evaluation.17</url>
@@ -234,7 +234,7 @@
       <author><first>Zhanyi</first><last>Liu</last></author>
       <author><first>Jianfeng</first><last>Li</last></author>
       <author><first>Dengjun</first><last>Ren</last></author>
-      <author><first>Zhengyu</first><last>Niu</last></author>
+      <author id="zheng-yu-niu"><first>Zhengyu</first><last>Niu</last></author>
       <pages>124-131</pages>
       <url hash="6c2e0c84">2008.iwslt-evaluation.18</url>
       <abstract>This paper reports on the first participation of TCH (Toshiba (China) Research and Development Center) at the IWSLT evaluation campaign. We participated in all the 5 translation tasks with Chinese as source language or target language. For Chinese-English and English-Chinese translation, we used hybrid systems that combine rule-based machine translation (RBMT) method and statistical machine translation (SMT) method. For Chinese-Spanish translation, phrase-based SMT models were used. For the pivot task, we combined the translations generated by a pivot based statistical translation model and a statistical transfer translation model (firstly, translating from Chinese to English, and then from English to Spanish). Moreover, for better performance of MT, we improved each module in the MT systems as follows: adapting Chinese word segmentation to spoken language translation, selecting out-of-domain corpus to build language models, using bilingual dictionaries to correct word alignment results, handling NE translation and selecting translations from the outputs of multiple systems. According to the automatic evaluation results on the full test sets, we top in all the 5 tasks.</abstract>
@@ -286,7 +286,7 @@
       <author><first>Francis</first><last>Bond</last></author>
       <author><first>Eric</first><last>Nichols</last></author>
       <author><first>Darren</first><last>Scott Appling</last></author>
-      <author><first>Michael</first><last>Paul</last></author>
+      <author id="michael-paul"><first>Michael</first><last>Paul</last></author>
       <pages>150-157</pages>
       <url hash="43fb1ff3">2008.iwslt-papers.2</url>
       <abstract>Large amounts of training data are essential for training statistical machine translations systems. In this paper we show how training data can be expanded by paraphrasing one side. The new data is made by parsing then generating using a precise HPSG based grammar, which gives sentences with the same meaning, but minor variations in lexical choice and word order. In experiments with Japanese and English, we showed consistent gains on the Tanaka Corpus with less consistent improvement on the IWSLT 2005 evaluation data.</abstract>
@@ -311,7 +311,7 @@
       <author><first>R.</first><last>Prasad</last></author>
       <author><first>F.</first><last>Choi</last></author>
       <author><first>P.</first><last>Natarajan</last></author>
-      <author><first>David</first><last>Stallard</last></author>
+      <author id="david-stallard"><first>David</first><last>Stallard</last></author>
       <author><first>K.</first><last>Krstovski</last></author>
       <author><first>M.</first><last>Kamali</last></author>
       <pages>166-173</pages>
@@ -326,7 +326,7 @@
       <author><first>Florian</first><last>Kraft</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
       <author><first>Matthias</first><last>Paulik</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>174-181</pages>
       <url hash="f422edcd">2008.iwslt-papers.5</url>
       <abstract>In an increasingly globalized world, situations in which people of different native tongues have to communicate with each other become more and more frequent. In many such situations, human interpreters are prohibitively expensive or simply not available. Automatic spoken language translation (SLT), as a cost-effective solution to this dilemma, has received increased attention in recent years. For a broad number of applications, including live SLT of lectures and oral presentations, these automatic systems should ideally operate in real time and with low latency. Large and highly specialized vocabularies as well as strong variations in speaking style – ranging from read speech to free presentations suffering from spontaneous events – make simultaneous SLT of lectures a challenging task. This paper presents our progress in building a simultaneous German-English lecture translation system. We emphasize some of the challenges which are particular to this language pair and propose solutions to tackle some of the problems encountered.</abstract>
@@ -344,7 +344,7 @@
       <title>Analysing soft syntax features and heuristics for hierarchical phrase based machine translation.</title>
       <author><first>David</first><last>Vilar</last></author>
       <author><first>Daniel</first><last>Stein</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>190-197</pages>
       <url hash="4fc5518b">2008.iwslt-papers.7</url>
       <abstract>Similar to phrase-based machine translation, hierarchical systems produce a large proportion of phrases, most of which are supposedly junk and useless for the actual translation. For the hierarchical case, however, the amount of extracted rules is an order of magnitude bigger. In this paper, we investigate several soft constraints in the extraction of hierarchical phrases and whether these help as additional scores in the decoding to prune unneeded phrases. We show the methods that help best.</abstract>
@@ -353,7 +353,7 @@
     <paper id="8">
       <title>Improvements in dynamic programming beam search for phrase-based statistical machine translation.</title>
       <author><first>Richard</first><last>Zens</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>195-205</pages>
       <url hash="6bd85ef3">2008.iwslt-papers.8</url>
       <abstract>Search is a central component of any statistical machine translation system. We describe the search for phrase-based SMT in detail and show its importance for achieving good translation quality. We introduce an explicit distinction between reordering and lexical hypotheses and organize the pruning accordingly. We show that for the large Chinese-English NIST task already a small number of lexical alternatives is sufficient, whereas a large number of reordering hypotheses is required to achieve good translation quality. The resulting system compares favorably with the current stateof-the-art, in particular we perform a comparison with cube pruning as well as with Moses.</abstract>
diff --git a/data/xml/2008.jeptalnrecital.xml b/data/xml/2008.jeptalnrecital.xml
index afe8f90df3..29c41181e4 100644
--- a/data/xml/2008.jeptalnrecital.xml
+++ b/data/xml/2008.jeptalnrecital.xml
@@ -3,8 +3,8 @@
   <volume id="long" ingest-date="2021-02-05" type="proceedings">
     <meta>
       <booktitle>Actes de la 15ème conférence sur le Traitement Automatique des Langues Naturelles. Articles longs</booktitle>
-      <editor><first>Frédéric</first><last>Béchet</last></editor>
-      <editor><first>Jean-Francois</first><last>Bonastre</last></editor>
+      <editor id="frederic-bechet"><first>Frédéric</first><last>Béchet</last></editor>
+      <editor id="jean-francois-bonastre"><first>Jean-Francois</first><last>Bonastre</last></editor>
       <publisher>ATALA</publisher>
       <address>Avignon, France</address>
       <month>June</month>
@@ -26,7 +26,7 @@
     </paper>
     <paper id="2">
       <title>Réécriture et Détection d’Implication Textuelle</title>
-      <author><first>Paul</first><last>Bédaride</last></author>
+      <author id="paul-bedaride"><first>Paul</first><last>Bédaride</last></author>
       <author><first>Claire</first><last>Gardent</last></author>
       <pages>11–20</pages>
       <abstract>Nous présentons un système de normalisation de la variation syntaxique qui permet de mieux reconnaître la relation d’implication textuelle entre deux phrases. Le système est évalué sur une suite de tests comportant 2 520 paires test et les résultats montrent un gain en précision par rapport à un système de base variant entre 29.8 et 78.5 points la complexité des cas considérés.</abstract>
@@ -60,7 +60,7 @@
     <paper id="5">
       <title>Un modèle multi-sources pour la segmentation en sujets de journaux radiophoniques</title>
       <author><first>Stéphane</first><last>Huet</last></author>
-      <author><first>Guillaume</first><last>Gravier</last></author>
+      <author id="guillaume-gravier"><first>Guillaume</first><last>Gravier</last></author>
       <author><first>Pascale</first><last>Sébillot</last></author>
       <pages>41–50</pages>
       <abstract>Nous présentons une méthode de segmentation de journaux radiophoniques en sujets, basée sur la prise en compte d’indices lexicaux, syntaxiques et acoustiques. Partant d’un modèle statistique existant de segmentation thématique, exploitant la notion de cohésion lexicale, nous étendons le formalisme pour y inclure des informations d’ordre syntaxique et acoustique. Les résultats expérimentaux montrent que le seul modèle de cohésion lexicale ne suffit pas pour le type de documents étudié en raison de la taille variable des segments et de l’absence d’un lien direct entre segment et thème. L’utilisation d’informations syntaxiques et acoustiques permet une amélioration substantielle de la segmentation obtenue.</abstract>
@@ -71,9 +71,9 @@
     <paper id="6">
       <title>Extraction automatique d’informations à partir de micro-textes non structurés</title>
       <author><first>Cédric</first><last>Vidrequin</last></author>
-      <author><first>Juan-Manuel</first><last>Torres-Moreno</last></author>
+      <author id="juan-manuel-torres-moreno"><first>Juan-Manuel</first><last>Torres-Moreno</last></author>
       <author><first>Jean-Jacques</first><last>Schneider</last></author>
-      <author><first>Marc</first><last>El-Bèze</last></author>
+      <author id="marc-el-beze"><first>Marc</first><last>El-Bèze</last></author>
       <pages>51–60</pages>
       <abstract>Nous présentons dans cet article une méthode d’extraction automatique d’informations sur des textes de très petite taille, faiblement structurés. Nous travaillons sur des textes dont la rédaction n’est pas normalisée, avec très peu de mots pour caractériser chaque information. Les textes ne contiennent pas ou très peu de phrases. Il s’agit le plus souvent de morceaux de phrases ou d’expressions composées de quelques mots. Nous comparons plusieurs méthodes d’extraction, dont certaines sont entièrement automatiques. D’autres utilisent en partie une connaissance du domaine que nous voulons réduite au minimum, de façon à minimiser le travail manuel en amont. Enfin, nous présentons nos résultats qui dépassent ce dont il est fait état dans la littérature, avec une précision équivalente et un rappel supérieur.</abstract>
       <url hash="4d5fe223">2008.jeptalnrecital-long.6</url>
@@ -82,9 +82,9 @@
     </paper>
     <paper id="7">
       <title>Quelles combinaisons de scores et de critères numériques pour un système de Questions/Réponses ?</title>
-      <author><first>Laurent</first><last>Gillard</last></author>
-      <author><first>Patrice</first><last>Bellot</last></author>
-      <author><first>Marc</first><last>El-Bèze</last></author>
+      <author id="laurent-gillard"><first>Laurent</first><last>Gillard</last></author>
+      <author id="patrice-bellot"><first>Patrice</first><last>Bellot</last></author>
+      <author id="marc-el-beze"><first>Marc</first><last>El-Bèze</last></author>
       <pages>61–70</pages>
       <abstract>Dans cet article, nous présentons une discussion sur la combinaison de différents scores et critères numériques pour la sélection finale d’une réponse dans la partie en charge des questions factuelles du système de Questions/Réponses développé au LIA. Ces scores et critères numériques sont dérivés de ceux obtenus en sortie de deux composants cruciaux pour notre système : celui de sélection des passages susceptibles de contenir une réponse et celui d’extraction et de sélection d’une réponse. Ils sont étudiés au regard de leur expressivité. Des comparaisons sont faites avec des approches de sélection de passages mettant en oeuvre des scores conventionnels en recherche d’information. Parallèlement, l’influence de la taille des contextes (en nombre de phrases) est évaluée. Cela permet de mettre en évidence que le choix de passages constitués de trois phrases autour d’une réponse candidate, avec une sélection des réponses basée sur une combinaison entre un score de passage de type Lucene ou Cosine et d’un score de compacité apparaît comme un compromis intéressant.</abstract>
       <url hash="c14d9d8c">2008.jeptalnrecital-long.7</url>
@@ -103,8 +103,8 @@
     </paper>
     <paper id="9">
       <title>Modélisation du principe d’ancrage pour la robustesse des systèmes de dialogue homme-machine finalisés</title>
-      <author><first>Alexandre</first><last>Denis</last></author>
-      <author><first>Matthieu</first><last>Quignard</last></author>
+      <author id="alexandre-denis"><first>Alexandre</first><last>Denis</last></author>
+      <author id="matthieu-quignard"><first>Matthieu</first><last>Quignard</last></author>
       <pages>81–90</pages>
       <abstract>Cet article présente une modélisation du principe d’ancrage (grounding) pour la robustesse des systèmes de dialogue finalisés. Ce principe, décrit dans (Clark &amp; Schaefer, 1989), suggère que les participants à un dialogue fournissent des preuves de compréhension afin d’atteindre la compréhension mutuelle. Nous explicitons une définition computationnelle du principe d’ancrage fondée sur des jugements de compréhension qui, contrairement à d’autres modèles, conserve une motivation pour l’expression de la compréhension. Nous déroulons enfin le processus d’ancrage sur un exemple tiré de l’implémentation du modèle.</abstract>
       <url hash="5d2ad37a">2008.jeptalnrecital-long.9</url>
@@ -115,7 +115,7 @@
       <title>Enertex : un système basé sur l’énergie textuelle</title>
       <author><first>Silvia</first><last>Fernández</last></author>
       <author><first>Eric</first><last>Sanjuan</last></author>
-      <author><first>Juan-Manuel</first><last>Torres-Moreno</last></author>
+      <author id="juan-manuel-torres-moreno"><first>Juan-Manuel</first><last>Torres-Moreno</last></author>
       <pages>91–100</pages>
       <abstract>Dans cet article, nous présentons des applications du système Enertex au Traitement Automatique de la Langue Naturelle. Enertex est basé sur l’énergie textuelle, une approche par réseaux de neurones inspirée de la physique statistique des systèmes magnétiques. Nous avons appliqué cette approche aux problèmes du résumé automatique multi-documents et de la détection de frontières thématiques. Les résultats, en trois langues : anglais, espagnol et français, sont très encourageants.</abstract>
       <url hash="207c4949">2008.jeptalnrecital-long.10</url>
@@ -125,9 +125,9 @@
     <paper id="11">
       <title>Intégration d’une étape de pré-filtrage et d’une fonction multiobjectif en vue d’améliorer le système <fixed-case>E</fixed-case>xtra<fixed-case>N</fixed-case>ews de résumé de documents multiples</title>
       <author><first>Fatma</first><last>Kallel Jaoua</last></author>
-      <author><first>Lamia</first><last>Hadrich Belguith</last></author>
+      <author id="lamia-hadrich-belguith"><first>Lamia</first><last>Hadrich Belguith</last></author>
       <author><first>Maher</first><last>Jaoua</last></author>
-      <author><first>Abdelmajid</first><last>Ben Hamadou</last></author>
+      <author id="abdelmajid-ben-hamadou"><first>Abdelmajid</first><last>Ben Hamadou</last></author>
       <pages>101–110</pages>
       <abstract>Dans cet article, nous présentons les améliorations que nous avons apportées au système ExtraNews de résumé automatique de documents multiples. Ce système se base sur l’utilisation d’un algorithme génétique qui permet de combiner les phrases des documents sources pour former les extraits, qui seront croisés et mutés pour générer de nouveaux extraits. La multiplicité des critères de sélection d’extraits nous a inspiré une première amélioration qui consiste à utiliser une technique d’optimisation multi-objectif en vue d’évaluer ces extraits. La deuxième amélioration consiste à intégrer une étape de pré-filtrage de phrases qui a pour objectif la réduction du nombre des phrases des textes sources en entrée. Une évaluation des améliorations apportées à notre système est réalisée sur les corpus de DUC’04 et DUC’07.</abstract>
       <url hash="3929fef0">2008.jeptalnrecital-long.11</url>
@@ -136,7 +136,7 @@
     </paper>
     <paper id="12">
       <title>Recherche locale pour la traduction statistique à base de segments</title>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <author><first>Alexandre</first><last>Patry</last></author>
       <author><first>Fabrizio</first><last>Gotti</last></author>
       <pages>111–120</pages>
@@ -149,7 +149,7 @@
       <title>Transcrire les <fixed-case>SMS</fixed-case> comme on reconnaît la parole</title>
       <author><first>Catherine</first><last>Kobus</last></author>
       <author><first>François</first><last>Yvon</last></author>
-      <author><first>Géraldine</first><last>Damnati</last></author>
+      <author id="geraldine-damnati"><first>Géraldine</first><last>Damnati</last></author>
       <pages>121–130</pages>
       <abstract>Cet article présente une architecture inspirée des systèmes de reconnaissance vocale pour effectuer une normalisation orthographique de messages en « langage SMS ». Nous décrivons notre système de base, ainsi que diverses évolutions de ce système, qui permettent d’améliorer sensiblement la qualité des normalisations produites.</abstract>
       <url hash="1edfbede">2008.jeptalnrecital-long.13</url>
@@ -186,8 +186,8 @@
     </paper>
     <paper id="17">
       <title>Expériences d’analyse syntaxique statistique du français</title>
-      <author><first>Benoît</first><last>Crabbé</last></author>
-      <author><first>Marie</first><last>Candito</last></author>
+      <author id="benoit-crabbe"><first>Benoît</first><last>Crabbé</last></author>
+      <author id="marie-candito"><first>Marie</first><last>Candito</last></author>
       <pages>161–170</pages>
       <abstract>Nous montrons qu’il est possible d’obtenir une analyse syntaxique statistique satisfaisante pour le français sur du corpus journalistique, à partir des données issues du French Treebank du laboratoire LLF, à l’aide d’un algorithme d’analyse non lexicalisé.</abstract>
       <url hash="4a830b3b">2008.jeptalnrecital-long.17</url>
@@ -196,7 +196,7 @@
     </paper>
     <paper id="18">
       <title>Construction d’un wordnet libre du français à partir de ressources multilingues</title>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <author><first>Darja</first><last>Fišer</last></author>
       <pages>171–180</pages>
       <abstract>Cet article décrit la construction d’un Wordnet Libre du Français (WOLF) à partir du Princeton WordNet et de diverses ressources multilingues. Les lexèmes polysémiques ont été traités au moyen d’une approche reposant sur l’alignement en mots d’un corpus parallèle en cinq langues. Le lexique multilingue extrait a été désambiguïsé sémantiquement à l’aide des wordnets des langues concernées. Par ailleurs, une approche bilingue a été suffisante pour construire de nouvelles entrées à partir des lexèmes monosémiques. Nous avons pour cela extrait des lexiques bilingues à partir deWikipédia et de thésaurus. Le wordnet obtenu a été évalué par rapport au wordnet français issu du projet EuroWordNet. Les résultats sont encourageants, et des applications sont d’ores et déjà envisagées.</abstract>
@@ -220,7 +220,7 @@
       <author><first>Aïda</first><last>Khemakhem</last></author>
       <author><first>Bilel</first><last>Gargouri</last></author>
       <author><first>Kais</first><last>Haddar</last></author>
-      <author><first>Abdelmajid</first><last>Ben Hamadou</last></author>
+      <author id="abdelmajid-ben-hamadou"><first>Abdelmajid</first><last>Ben Hamadou</last></author>
       <pages>192–201</pages>
       <abstract>Le présent papier s’intéresse à l’élaboration des dictionnaires électroniques arabes à usage éditorial. Il propose un modèle unifié et normalisé de ces dictionnaires en se référant à la future norme LMF (Lexical Markup Framework) ISO 24613. Ce modèle permet de construire des dictionnaires extensibles, sur lesquels on peut réaliser, grâce à une structuration fine et standard, des fonctions de consultation génériques adaptées aux besoins des utilisateurs. La mise en oeuvre du modèle proposé est testée sur des dictionnaires existants de la langue arabe en utilisant, pour la consultation, le système ADIQTO (Arabic DIctionary Query TOols) que nous avons développé pour l’interrogation générique des dictionnaires normalisés de l’arabe.</abstract>
       <url hash="4d42c86f">2008.jeptalnrecital-long.20</url>
@@ -240,7 +240,7 @@
     <paper id="22">
       <title>Une alternative aux modèles de traduction statistique d’<fixed-case>IBM</fixed-case>: Les triggers inter-langues</title>
       <author><first>Caroline</first><last>Lavecchia</last></author>
-      <author><first>Kamel</first><last>Smaïli</last></author>
+      <author id="kamel-smaili"><first>Kamel</first><last>Smaïli</last></author>
       <author><first>David</first><last>Langlois</last></author>
       <pages>212–221</pages>
       <abstract>Dans cet article, nous présentons une nouvelle approche pour la traduction automatique fondée sur les triggers inter-langues. Dans un premier temps, nous expliquons le concept de triggers inter-langues ainsi que la façon dont ils sont déterminés. Nous présentons ensuite les différentes expérimentations qui ont été menées à partir de ces triggers afin de les intégrer au mieux dans un processus complet de traduction automatique. Pour cela, nous construisons à partir des triggers inter-langues des tables de traduction suivant différentes méthodes. Nous comparons par la suite notre système de traduction fondé sur les triggers interlangues à un système état de l’art reposant sur le modèle 3 d’IBM (Brown &amp; al., 1993). Les tests menés ont montré que les traductions automatiques générées par notre système améliorent le score BLEU (Papineni &amp; al., 2001) de 2, 4% comparé à celles produites par le système état de l’art.</abstract>
@@ -259,7 +259,7 @@
     </paper>
     <paper id="24">
       <title>Les architectures linguistiques et computationnelles en traduction automatique sont indépendantes</title>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <pages>232–241</pages>
       <abstract>Contrairement à une idée répandue, les architectures linguistiques et computationnelles des systèmes de traduction automatique sont indépendantes. Les premières concernent le choix des représentations intermédiaires, les secondes le type d’algorithme, de programmation et de ressources utilisés. Il est ainsi possible d’utiliser des méthodes de calcul « expertes » ou « empiriques » pour construire diverses phases ou modules de systèmes d’architectures linguistiques variées. Nous terminons en donnant quelques éléments pour le choix de ces architectures en fonction des situations traductionnelles et des ressources disponibles, en termes de dictionnaires, de corpus, et de compétences humaines.</abstract>
       <url hash="ac8db0cb">2008.jeptalnrecital-long.24</url>
@@ -317,7 +317,7 @@
     </paper>
     <paper id="30">
       <title>Apprentissage artificiel de règles d’indexation pour <fixed-case>MEDLINE</fixed-case></title>
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <author><first>Vincent</first><last>Claveau</last></author>
       <pages>292–301</pages>
       <abstract>L’indexation est une composante importante de tout système de recherche d’information. Dans MEDLINE, la base documentaire de référence pour la littérature du domaine biomédical, le contenu des articles référencés est indexé à l’aide de descripteurs issus du thésaurus MeSH. Avec l’augmentation constante de publications à indexer pour maintenir la base à jour, le besoin d’outils automatiques se fait pressant pour les indexeurs. Dans cet article, nous décrivons l’utilisation et l’adaptation de la Programmation Logique Inductive (PLI) pour découvrir des règles d’indexation permettant de générer automatiquement des recommandations d’indexation pour MEDLINE. Les résultats obtenus par cette approche originale sont très satisfaisants comparés à ceux obtenus à l’aide de règles manuelles lorsque celles-ci existent. Ainsi, les jeux de règles obtenus par PLI devraient être prochainement intégrés au système produisant les recommandations d’indexation automatique pour MEDLINE.</abstract>
@@ -329,8 +329,8 @@
   <volume id="court" ingest-date="2021-02-05" type="proceedings">
     <meta>
       <booktitle>Actes de la 15ème conférence sur le Traitement Automatique des Langues Naturelles. Articles courts</booktitle>
-      <editor><first>Frédéric</first><last>Béchet</last></editor>
-      <editor><first>Jean-Francois</first><last>Bonastre</last></editor>
+      <editor id="frederic-bechet"><first>Frédéric</first><last>Béchet</last></editor>
+      <editor id="jean-francois-bonastre"><first>Jean-Francois</first><last>Bonastre</last></editor>
       <publisher>ATALA</publisher>
       <address>Avignon, France</address>
       <month>June</month>
@@ -353,7 +353,7 @@
     <paper id="2">
       <title>Calculs d’unification sur les arbres de dérivation <fixed-case>TAG</fixed-case></title>
       <author><first>Sylvain</first><last>Schmitz</last></author>
-      <author><first>Joseph</first><last>Le Roux</last></author>
+      <author id="joseph-le-roux"><first>Joseph</first><last>Le Roux</last></author>
       <pages>11–20</pages>
       <abstract>Nous définissons un formalisme, les grammaires rationnelles d’arbres avec traits, et une traduction des grammaires d’arbres adjoints avec traits vers ce nouveau formalisme. Cette traduction préserve les structures de dérivation de la grammaire d’origine en tenant compte de l’unification de traits. La construction peut être appliquée aux réalisateurs de surface qui se fondent sur les arbres de dérivation.</abstract>
       <url hash="8b16167d">2008.jeptalnrecital-court.2</url>
@@ -403,8 +403,8 @@
     <paper id="7">
       <title><fixed-case>E</fixed-case>-Gen : Profilage automatique de candidatures</title>
       <author><first>Rémy</first><last>Kessler</last></author>
-      <author><first>Juan-Manuel</first><last>Torres-Moreno</last></author>
-      <author><first>Marc</first><last>El-Bèze</last></author>
+      <author id="juan-manuel-torres-moreno"><first>Juan-Manuel</first><last>Torres-Moreno</last></author>
+      <author id="marc-el-beze"><first>Marc</first><last>El-Bèze</last></author>
       <pages>61–70</pages>
       <abstract>La croissance exponentielle de l’Internet a permis le développement de sites d’offres d’emploi en ligne. Le système E-Gen (Traitement automatique d’offres d’emploi) a pour but de permettre l’analyse et la catégorisation d’offres d’emploi ainsi qu’une analyse et classification des réponses des candidats (Lettre de motivation et CV). Nous présentons les travaux réalisés afin de résoudre la seconde partie : on utilise une représentation vectorielle de texte pour effectuer une classification des pièces jointes contenus dans le mail à l’aide de SVM. Par la suite, une évaluation de la candidature est effectuée à l’aide de différents classifieurs (SVM et n-grammes de mots).</abstract>
       <url hash="04a1403d">2008.jeptalnrecital-court.7</url>
@@ -413,7 +413,7 @@
     </paper>
     <paper id="8">
       <title>Typage, produit cartésien et unités d’analyse pour les modèles à états finis</title>
-      <author><first>François</first><last>Barthélemy</last></author>
+      <author id="francois-barthelemy"><first>François</first><last>Barthélemy</last></author>
       <pages>71–80</pages>
       <abstract>Dans cet article, nous présentons un nouveau langage permettant d’écrire des relations rationnelles compilées en automates finis. Les deux caractéristiques innovantes de ce langage sont de pourvoir décrire des relations à plusieurs niveaux, pas nécessairement deux et d’utiliser diverses unités d’analyse pour exprimer les liens entre niveaux. Cela permet d’aligner de façon fine des représentations multiples.</abstract>
       <url hash="0f16f675">2008.jeptalnrecital-court.8</url>
@@ -431,7 +431,7 @@
     </paper>
     <paper id="10">
       <title><fixed-case>POLYMOTS</fixed-case> : une base de données de constructions dérivationnelles en français à partir de radicaux phonologiques</title>
-      <author><first>Nuria</first><last>Gala</last></author>
+      <author id="nuria-gala"><first>Nuria</first><last>Gala</last></author>
       <author><first>Véronique</first><last>Rey</last></author>
       <pages>91–100</pages>
       <abstract>Cet article présente POLYMOTS, une base de données lexicale contenant huit mille mots communs en français. L’originalité de l’approche proposée tient à l’analyse des mots. En effet, à la différence d’autres bases lexicales représentant la morphologie dérivationnelle des mots à partir d’affixes, ici l’idée a été d’isoler un radical commun à un ensemble de mots d’une même famille. Nous avons donc analysé les formes des mots et, par comparaison phonologique (forme phonique comparable) et morphologique (continuité de sens), nous avons regroupé les mots par familles, selon le type de radical phonologique. L’article présente les fonctionnalités de la base et inclut une discussion sur les applications et les perspectives d’une telle ressource.</abstract>
@@ -484,7 +484,7 @@
       <author><first>Fabien</first><last>Poulard</last></author>
       <author><first>Thierry</first><last>Waszak</last></author>
       <author><first>Nicolas</first><last>Hernandez</last></author>
-      <author><first>Patrice</first><last>Bellot</last></author>
+      <author id="patrice-bellot"><first>Patrice</first><last>Bellot</last></author>
       <pages>141–150</pages>
       <abstract>Dans le contexte de la recherche de plagiat, le repérage de citations et de ses constituants est primordial puisqu’il peut amener à évaluer le caractère licite ou illicite d’une reprise (source citée ou non). Nous proposons ici une comparaison de méthodes automatiques pour le repérage de ces informations et rapportons une évaluation quantitative de celles-ci. Un corpus d’écrits journalistiques français a été manuellement annoté pour nous servir de base d’apprentissage et de test.</abstract>
       <url hash="f925e9ab">2008.jeptalnrecital-court.15</url>
@@ -513,7 +513,7 @@
     </paper>
     <paper id="18">
       <title>Traduction multilingue : le projet <fixed-case>M</fixed-case>ul<fixed-case>T</fixed-case>ra</title>
-      <author><first>Éric</first><last>Wehrli</last></author>
+      <author id="eric-wehrli"><first>Éric</first><last>Wehrli</last></author>
       <author><first>Luka</first><last>Nerima</last></author>
       <pages>171–178</pages>
       <abstract>L’augmentation rapide des échanges et des communications pluriculturels, en particulier sur internet, intensifie les besoins d’outils multilingues y compris de traduction. Cet article décrit un projet en cours au LATL pour le développement d’un système de traduction multilingue basé sur un modèle linguistique abstrait et largement générique, ainsi que sur un modèle logiciel basé sur la notion d’objet. Les langues envisagées dans la première phase de ce projet sont l’allemand, le français, l’italien, l’espagnol et l’anglais.</abstract>
@@ -546,8 +546,8 @@
       <author><first>Marie-Jean</first><last>Meurs</last></author>
       <author><first>Frédéric</first><last>Duvert</last></author>
       <author><first>Frédéric</first><last>Béchet</last></author>
-      <author><first>Fabrice</first><last>Lefèvre</last></author>
-      <author><first>Renato</first><last>De Mori</last></author>
+      <author id="fabrice-lefevre"><first>Fabrice</first><last>Lefèvre</last></author>
+      <author id="renato-de-mori"><first>Renato</first><last>De Mori</last></author>
       <pages>199–208</pages>
       <abstract>Cet article présente un formalisme de représentation des connaissances qui a été utilisé pour fournir des annotations sémantiques de haut niveau pour le corpus de dialogue oral MEDIA. Ces annotations en structures sémantiques, basées sur le paradigme FrameNet, sont obtenues de manière incrémentale et partiellement automatisée. Nous décrivons le processus d’interprétation automatique qui permet d’obtenir des compositions sémantiques et de générer des hypothèses de frames par inférence. Le corpus MEDIA est un corpus de dialogues en langue française dont les tours de parole de l’utilisateur ont été manuellement transcrits et annotés (niveaux mots et constituants sémantiques de base). Le processus proposé utilise ces niveaux pour produire une annotation de haut niveau en frames sémantiques. La base de connaissances développée (définitions des frames et règles de composition) est présentée, ainsi que les résultats de l’annotation automatique.</abstract>
       <url hash="9f353f4a">2008.jeptalnrecital-court.21</url>
@@ -556,7 +556,7 @@
     </paper>
     <paper id="22">
       <title>Dissymétrie entre l’indexation des documents et le traitement des requêtes pour la recherche d’information en langue arabe</title>
-      <author><first>Ramzi</first><last>Abbès</last></author>
+      <author id="ramzi-abbes"><first>Ramzi</first><last>Abbès</last></author>
       <author><first>Malek</first><last>Boualem</last></author>
       <pages>209–218</pages>
       <abstract>Les moteurs de recherches sur le web produisent des résultats comparables et assez satisfaisants pour la recherche de documents écrits en caractères latins. Cependant, ils présentent de sérieuses lacunes dès que l’ont s’intéresse à des langues peu dotées ou des langues sémitiques comme l’arabe. Dans cet article nous présentons une étude analytique et qualitative de la recherche d’information en langue arabe en mettant l’accent sur l’insuffisance des outils de recherche actuels, souvent mal adaptés aux spécificités de la langue arabe. Pour argumenter notre analyse, nous présentons des résultats issus d’observations et de tests autour de certains phénomènes linguistiques de l’arabe écrit. Pour la validation des ces observations, nous avons testé essentiellement le moteur de recherche Google.</abstract>
@@ -568,7 +568,7 @@
   <volume id="recital" ingest-date="2021-02-05" type="proceedings">
     <meta>
       <booktitle>Actes de la 15ème conférence sur le Traitement Automatique des Langues Naturelles. REncontres jeunes Chercheurs en Informatique pour le Traitement Automatique des Langues</booktitle>
-      <editor><first>Patrice</first><last>Bellot</last></editor>
+      <editor id="patrice-bellot"><first>Patrice</first><last>Bellot</last></editor>
       <editor><first>Marie-Laure</first><last>Guénot</last></editor>
       <publisher>ATALA</publisher>
       <address>Avignon, France</address>
diff --git a/data/xml/2008.tal.xml b/data/xml/2008.tal.xml
index c2f3e01f14..5ca2a16975 100644
--- a/data/xml/2008.tal.xml
+++ b/data/xml/2008.tal.xml
@@ -13,7 +13,7 @@
     </meta>
     <paper id="1">
       <title>Enrichissement d’un lexique bilingue par apprentissage analogique [Enrichment of a Bilingual Lexicon by Analogical Learning]</title>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <author><first>Alexandre</first><last>Patry</last></author>
       <pages>13–40</pages>
       <url hash="725cbc99">2008.tal-1.1</url>
@@ -22,8 +22,8 @@
     </paper>
     <paper id="2">
       <title>Fouille d’erreurs sur des sorties d’analyseurs syntaxiques [Error Mining on Syntactic Parser Output]</title>
-      <author><first>Benoît</first><last>Sagot</last></author>
-      <author><first>Éric</first><last>Villemonte de la Clergerie</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Éric</first><last>Villemonte de la Clergerie</last></author>
       <pages>41–60</pages>
       <url hash="5bdf7620">2008.tal-1.2</url>
       <language>fra</language>
@@ -60,7 +60,7 @@
       <title>Compréhension automatique de la parole arabe spontanée — Une modélisation numérique [Automatic Understanding of Spontaneous <fixed-case>A</fixed-case>rabic Speech — A Numerical Model]</title>
       <author><first>Anis</first><last>Zouaghi</last></author>
       <author><first>Mounir</first><last>Zrigui</last></author>
-      <author><first>Georges</first><last>Antoniadis</last></author>
+      <author id="georges-antoniadis"><first>Georges</first><last>Antoniadis</last></author>
       <pages>141–166</pages>
       <url hash="f43465a1">2008.tal-1.6</url>
       <language>fra</language>
@@ -135,8 +135,8 @@
     </paper>
     <paper id="6">
       <title>Le développement d’une plate-forme pour l’annotation spécialisée de documents Web : retour d’expérience [Developping a platform dedicated to the annotation of web documents: a case study]</title>
-      <author><first>Thierry</first><last>Hamon</last></author>
-      <author><first>Adeline</first><last>Nazarenko</last></author>
+      <author id="thierry-hamon"><first>Thierry</first><last>Hamon</last></author>
+      <author id="adeline-nazarenko"><first>Adeline</first><last>Nazarenko</last></author>
       <pages>127–154</pages>
       <url hash="4263be24">2008.tal-2.6</url>
       <language>fra</language>
@@ -144,7 +144,7 @@
     </paper>
     <paper id="7">
       <title><fixed-case>S</fixed-case>x<fixed-case>P</fixed-case>ipe 2 : architecture pour le traitement présyntaxique de corpus bruts [<fixed-case>S</fixed-case>x<fixed-case>P</fixed-case>ipe 2 : an architecture for surface preprocessing of raw corpora]</title>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <author><first>Pierre</first><last>Boullier</last></author>
       <pages>155–188</pages>
       <url hash="cda99074">2008.tal-2.7</url>
@@ -174,8 +174,8 @@
     </paper>
     <paper id="10">
       <title><fixed-case>SEWS</fixed-case> : un serveur d’évaluation orienté Web pour la syntaxe [<fixed-case>SEWS</fixed-case> : an web-based server for evaluating syntactic annotation tools]</title>
-      <author><first>Olivier</first><last>Hamon</last></author>
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="olivier-hamon"><first>Olivier</first><last>Hamon</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <author><first>Djamel</first><last>Mostef</last></author>
       <pages>247–270</pages>
       <url hash="b929681d">2008.tal-2.10</url>
@@ -186,7 +186,7 @@
       <title>Cocytus: parallel <fixed-case>NLP</fixed-case> over disparate data</title>
       <author><first>Noah</first><last>Evans</last></author>
       <author><first>Masayuki</first><last>Asahar</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>271–293</pages>
       <url hash="7e61b573">2008.tal-2.11</url>
       <bibkey>evans-etal-2008-cocytus</bibkey>
@@ -213,8 +213,8 @@
     </paper>
     <paper id="2">
       <title>Contributions du traitement automatique de la parole à l’étude des voyelles orales du français [Using automatic speech processing to study <fixed-case>F</fixed-case>rench oral vowels]</title>
-      <author><first>Martine</first><last>Adda-Decker</last></author>
-      <author><first>Cédric</first><last>Gendrot</last></author>
+      <author id="martine-adda-decker"><first>Martine</first><last>Adda-Decker</last></author>
+      <author id="cedric-gendrot"><first>Cédric</first><last>Gendrot</last></author>
       <author><first>Noël</first><last>Nguyen</last></author>
       <pages>13–46</pages>
       <url hash="488e1059">2008.tal-3.2</url>
@@ -225,9 +225,9 @@
       <title>La parole spontanée : transcription et traitement [Processing and transcribing spontaneous speech]</title>
       <author><first>Thierry</first><last>Bazillon</last></author>
       <author><first>Vincent</first><last>Jousse</last></author>
-      <author><first>Frédéric</first><last>Béchet</last></author>
+      <author id="frederic-bechet"><first>Frédéric</first><last>Béchet</last></author>
       <author><first>Yannick</first><last>Estève</last></author>
-      <author><first>Georges</first><last>Linarès</last></author>
+      <author id="georges-linares"><first>Georges</first><last>Linarès</last></author>
       <author><first>Daniel</first><last>Luzzati</last></author>
       <pages>47–76</pages>
       <url hash="8bd5118d">2008.tal-3.3</url>
@@ -252,7 +252,7 @@
       <author><first>Roxane</first><last>Bertrand</last></author>
       <author><first>Philippe</first><last>Blache</last></author>
       <author><first>Robert</first><last>Espesser</last></author>
-      <author><first>Gaëlle</first><last>Ferré</last></author>
+      <author id="gaelle-ferre"><first>Gaëlle</first><last>Ferré</last></author>
       <author><first>Christine</first><last>Meunier</last></author>
       <author><first>Béatrice</first><last>Priego-Valverde</last></author>
       <author><first>Stéphane</first><last>Rauzy</last></author>
@@ -263,10 +263,10 @@
     </paper>
     <paper id="6">
       <title>Accents étrangers et régionaux en français. Caractérisation et identification [Foreign and regional accents in <fixed-case>F</fixed-case>rench. Characterisation and identification]</title>
-      <author><first>Philippe</first><last>Boula de Mareüil</last></author>
+      <author id="philippe-boula-de-mareuil"><first>Philippe</first><last>Boula de Mareüil</last></author>
       <author><first>Bianca</first><last>Vieru-Dimulescu</last></author>
       <author><first>Cécile</first><last>Woehrling</last></author>
-      <author><first>Martine</first><last>Adda-Decker</last></author>
+      <author id="martine-adda-decker"><first>Martine</first><last>Adda-Decker</last></author>
       <pages>135–163</pages>
       <url hash="5b182685">2008.tal-3.6</url>
       <language>fra</language>
@@ -275,8 +275,8 @@
     <paper id="7">
       <title>Alignement automatique et analyse phonétique : comparaison de différents systèmes pour l’analyse du schwa [Automatic alignment and phonetic studies: Comparing alignment systems for the analysis of the schwa]</title>
       <author><first>Audrey</first><last>Bürki</last></author>
-      <author><first>Cédric</first><last>Gendrot</last></author>
-      <author><first>Guillaume</first><last>Gravier</last></author>
+      <author id="cedric-gendrot"><first>Cédric</first><last>Gendrot</last></author>
+      <author id="guillaume-gravier"><first>Guillaume</first><last>Gravier</last></author>
       <author><first>George</first><last>Linarès</last></author>
       <author><first>Cécile</first><last>Fougeron</last></author>
       <pages>165–197</pages>
@@ -286,8 +286,8 @@
     </paper>
     <paper id="8">
       <title>Caractéristiques acoustiques et prosodiques des hésitations vocaliques dans trois langues [Acoustic and prosodic characteristics of vocalic hesitations in three languages]</title>
-      <author><first>Ioana</first><last>Vasilescu</last></author>
-      <author><first>Martine</first><last>Adda-Decker</last></author>
+      <author id="ioana-vasilescu"><first>Ioana</first><last>Vasilescu</last></author>
+      <author id="martine-adda-decker"><first>Martine</first><last>Adda-Decker</last></author>
       <author><first>Rena</first><last>Nemoto</last></author>
       <pages>199–228</pages>
       <url hash="5158150e">2008.tal-3.8</url>
diff --git a/data/xml/2008.tc.xml b/data/xml/2008.tc.xml
index e671a0f517..99a0278940 100644
--- a/data/xml/2008.tc.xml
+++ b/data/xml/2008.tc.xml
@@ -17,7 +17,7 @@
     </paper>
     <paper id="2">
       <title>Hybrid Architectures for Multi-Engine Machine Translation</title>
-      <author><first>Andreas</first><last>Eisele</last></author>
+      <author id="andreas-eisele"><first>Andreas</first><last>Eisele</last></author>
       <url hash="b0d9cb70">2008.tc-1.2</url>
       <bibkey>eisele-2008-hybrid</bibkey>
     </paper>
diff --git a/data/xml/2008.wac.xml b/data/xml/2008.wac.xml
index c593e2ab2f..b2742245ee 100644
--- a/data/xml/2008.wac.xml
+++ b/data/xml/2008.wac.xml
@@ -21,8 +21,8 @@
     </frontmatter>
     <paper id="1">
       <title>Reranking <fixed-case>G</fixed-case>oogle with <fixed-case>GR</fixed-case>e<fixed-case>G</fixed-case></title>
-      <author><first>Rodolfo</first><last>Delmonte</last></author>
-      <author><first>Marco Aldo Piccolino</first><last>Boniforti</last></author>
+      <author id="rodolfo-delmonte"><first>Rodolfo</first><last>Delmonte</last></author>
+      <author id="marco-aldo-piccolino-boniforti"><first>Marco Aldo Piccolino</first><last>Boniforti</last></author>
       <pages>1-7</pages>
       <url hash="8793c0db">2008.wac-1.1</url>
       <abstract>We present an experiment evaluating the contribution of a system called GReG for reranking the snippets returned by Google’s search engine in the 10 best links presented to the user, captured by the use of Google’s API. The evaluation aims at establishing whether or not the introduction of deep linguistic information may improve the accuracy of Google or rather it is the opposite case as maintained by the majority of people working in Information Retrieval, using a Bag Of Words approach. We used 900 questions, answers taken from TREC 8, 9 competitions, execute three different types of evaluation: one without any linguistic aid; a second one with tagging, syntactic constituency contribution; another run with what we call Partial Logical Form. Even though GReG is still work in progress, it is possible to draw clearcut conclusions: adding linguistic information to the evaluation process of the best snippet that can answer a question improves enormously the performance. In another experiment we used the actual associated to the Q/A pairs distributed by one of TREC’s participant, got even higher accuracy.</abstract>
@@ -31,7 +31,7 @@
     <paper id="2">
       <title><fixed-case>G</fixed-case>oogle for the Linguist on a Budget</title>
       <author><first>András</first><last>Kornai</last></author>
-      <author><first>Péter</first><last>Halácsy</last></author>
+      <author id="peter-halacsy"><first>Péter</first><last>Halácsy</last></author>
       <pages>8-11</pages>
       <url hash="55c98f7b">2008.wac-1.2</url>
       <abstract>In this paper, we present GLB, yet another open source, free system to create, exploit linguistic corpora gathered from the web. A simple, robust web crawl algorithm, a multi-dimensional information retrieval tool„ a crude parallelization mechanism are proposed, especially for researchers working in resource-limited environments.</abstract>
@@ -53,7 +53,7 @@
       <author><first>Pavlina</first><last>Fragkou</last></author>
       <author><first>Aris</first><last>Theodorakos</last></author>
       <author><first>Vangelis</first><last>Karkaletsis</last></author>
-      <author><first>Constantine D.</first><last>Spyropoulos</last></author>
+      <author id="constantine-d-spyropoulos"><first>Constantine D.</first><last>Spyropoulos</last></author>
       <pages>18-25</pages>
       <url hash="a6e2a811">2008.wac-1.4</url>
       <abstract>The information explosion of the Web aggravates the problem of effective information retrieval. Even though linguistic approaches found in the literature perform linguistic annotation by creating metadata in the form of tokens, lemmas or part of speech tags, however, this process is insufficient. This is due to the fact that these linguistic metadata do not exploit the actual content of the page, leading to the need of performing semantic annotation based on a predefined semantic model. This paper proposes a new learning approach for performing automatic semantic annotation. This is the result of a two step procedure: the first step partitions a web page into blocks based on its visual layout, while the second, performs subsequent partitioning based on the examination of appearance of specific types of entities denoting the semantic category as well as the application of a number of simple heuristics. Preliminary experiments performed on a manually annotated corpus regarding athletics proved to be very promising.</abstract>
@@ -62,7 +62,7 @@
     <paper id="5">
       <title>Identification of Duplicate News Stories in Web Pages</title>
       <author><first>John</first><last>Gibson</last></author>
-      <author><first>Ben</first><last>Wellner</last></author>
+      <author id="ben-wellner"><first>Ben</first><last>Wellner</last></author>
       <author><first>Susan</first><last>Lubar</last></author>
       <pages>26-33</pages>
       <url hash="deeb5878">2008.wac-1.5</url>
@@ -71,7 +71,7 @@
     </paper>
     <paper id="6">
       <title><fixed-case>G</fixed-case>lossa<fixed-case>N</fixed-case>et 2: a linguistic search engine for <fixed-case>RSS</fixed-case>-based corpora</title>
-      <author><first>Cédrick</first><last>Fairon</last></author>
+      <author id="cedrick-fairon"><first>Cédrick</first><last>Fairon</last></author>
       <author><first>Kévin</first><last>Macé</last></author>
       <author><first>Hubert</first><last>Naets</last></author>
       <pages>34-39</pages>
@@ -94,7 +94,7 @@
       <title>Introducing, evaluating uk<fixed-case>W</fixed-case>a<fixed-case>C</fixed-case>, a very large web-derived corpus of <fixed-case>E</fixed-case>nglish</title>
       <author><first>Adriano</first><last>Ferraresi</last></author>
       <author><first>Eros</first><last>Zanchetta</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <author><first>Silvia</first><last>Bernardini</last></author>
       <pages>47-54</pages>
       <url hash="195eb191">2008.wac-1.8</url>
diff --git a/data/xml/2009.eamt.xml b/data/xml/2009.eamt.xml
index b72a040516..3f423713f6 100644
--- a/data/xml/2009.eamt.xml
+++ b/data/xml/2009.eamt.xml
@@ -7,8 +7,8 @@
       <address>Barcelona, Spain</address>
       <month>May 14–15</month>
       <year>2009</year>
-      <editor><first>Lluís</first><last>Màrquez</last></editor>
-      <editor><first>Harold</first><last>Somers</last></editor>
+      <editor id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></editor>
+      <editor id="harold-somers"><first>Harold</first><last>Somers</last></editor>
       <venue>eamt</venue>
     </meta>
     <frontmatter>
@@ -23,13 +23,13 @@
     </paper>
     <paper id="2">
       <title>Adaptable, Community-Controlled, Language Technologies for Language Maintenance</title>
-      <author><first>Lori</first><last>Levin</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
       <url hash="578d8512">2009.eamt-1.2</url>
       <bibkey>levin-2009-adaptable</bibkey>
     </paper>
     <paper id="3">
       <title>Character-Based <fixed-case>PSMT</fixed-case> for Closely Related Languages</title>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <url hash="fdfa04ef">2009.eamt-1.3</url>
       <bibkey>tiedemann-2009-character</bibkey>
     </paper>
@@ -37,7 +37,7 @@
       <title><fixed-case>TS</fixed-case>3: an Improved Version of the Bilingual Concordancer <fixed-case>T</fixed-case>rans<fixed-case>S</fixed-case>earch</title>
       <author><first>Stéphane</first><last>Huet</last></author>
       <author><first>Julien</first><last>Bourdaillet</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <url hash="2e5a53a6">2009.eamt-1.4</url>
       <bibkey>huet-etal-2009-ts3</bibkey>
     </paper>
@@ -54,7 +54,7 @@
     <paper id="6">
       <title>Evaluation-Guided Pre-Editing of Source Text: Improving <fixed-case>MT</fixed-case>-Tractability of Light Verb Constructions</title>
       <author><first>Bogdan</first><last>Babych</last></author>
-      <author><first>Anthony</first><last>Hartley</last></author>
+      <author id="anthony-hartley"><first>Anthony</first><last>Hartley</last></author>
       <author><first>Serge</first><last>Sharoff</last></author>
       <url hash="83a3c26e">2009.eamt-1.6</url>
       <bibkey>babych-etal-2009-evaluation</bibkey>
@@ -69,49 +69,49 @@
     <paper id="8">
       <title>Improving a <fixed-case>C</fixed-case>atalan-<fixed-case>S</fixed-case>panish Statistical Translation System using Morphosyntactic Knowledge</title>
       <author><first>Mireia</first><last>Farrús</last></author>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
       <author><first>Marc</first><last>Poch</last></author>
-      <author><first>Adolfo</first><last>Hernández</last></author>
-      <author><first>José B.</first><last>Mariño</last></author>
+      <author id="adolfo-hernandez-h"><first>Adolfo</first><last>Hernández</last></author>
+      <author id="jose-b-marino"><first>José B.</first><last>Mariño</last></author>
       <url hash="9a308ea7">2009.eamt-1.8</url>
       <bibkey>farrus-etal-2009-improving</bibkey>
     </paper>
     <paper id="9">
       <title>Use of Rich Linguistic Information to Translate Prepositions and Grammar Cases to <fixed-case>B</fixed-case>asque</title>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <author><first>Aitziber</first><last>Atutxa</last></author>
-      <author><first>Gorka</first><last>Labaka</last></author>
-      <author><first>Mikel</first><last>Lersundi</last></author>
-      <author><first>Aingeru</first><last>Mayor</last></author>
-      <author><first>Kepa</first><last>Sarasola</last></author>
+      <author id="gorka-labaka"><first>Gorka</first><last>Labaka</last></author>
+      <author id="mikel-lersundi"><first>Mikel</first><last>Lersundi</last></author>
+      <author id="aingeru-mayor"><first>Aingeru</first><last>Mayor</last></author>
+      <author id="kepa-sarasola"><first>Kepa</first><last>Sarasola</last></author>
       <url hash="72f832a2">2009.eamt-1.9</url>
       <bibkey>agirre-etal-2009-use</bibkey>
     </paper>
     <paper id="10">
       <title>Gappy Translation Units under Left-to-Right <fixed-case>SMT</fixed-case> Decoding</title>
-      <author><first>Josep M.</first><last>Crego</last></author>
+      <author id="josep-m-crego"><first>Josep M.</first><last>Crego</last></author>
       <author><first>François</first><last>Yvon</last></author>
       <url hash="1cbdc3a2">2009.eamt-1.10</url>
       <bibkey>crego-yvon-2009-gappy</bibkey>
     </paper>
     <paper id="11">
       <title>Relevance of Different Segmentation Options on <fixed-case>S</fixed-case>panish-<fixed-case>B</fixed-case>asque <fixed-case>SMT</fixed-case></title>
-      <author><first>Arantza</first><last>Díaz de Ilarraza</last></author>
-      <author><first>Gorka</first><last>Labaka</last></author>
-      <author><first>Kepa</first><last>Sarasola</last></author>
+      <author id="arantza-diaz-de-ilarraza"><first>Arantza</first><last>Díaz de Ilarraza</last></author>
+      <author id="gorka-labaka"><first>Gorka</first><last>Labaka</last></author>
+      <author id="kepa-sarasola"><first>Kepa</first><last>Sarasola</last></author>
       <url hash="3c47e277">2009.eamt-1.11</url>
       <bibkey>diaz-de-ilarraza-etal-2009-relevance</bibkey>
     </paper>
     <paper id="12">
       <title><fixed-case>E</fixed-case>nglish–<fixed-case>L</fixed-case>atvian Toponym Processing: Translation Strategies and Linguistic Patterns</title>
       <author><first>Tatiana</first><last>Gornostay</last></author>
-      <author><first>Inguna</first><last>Skadiņa</last></author>
+      <author id="inguna-skadina"><first>Inguna</first><last>Skadiņa</last></author>
       <url hash="bdc4dcc7">2009.eamt-1.12</url>
       <bibkey>gornostay-skadina-2009-english</bibkey>
     </paper>
     <paper id="13">
       <title>An Environment for Named Entity Recognition and Translation</title>
-      <author><first>Filip</first><last>Graliński</last></author>
+      <author id="filip-gralinski"><first>Filip</first><last>Graliński</last></author>
       <author><first>Krzysztof</first><last>Jassem</last></author>
       <author><first>Michał</first><last>Marcińczuk</last></author>
       <url hash="945e872f">2009.eamt-1.13</url>
@@ -129,19 +129,19 @@
       <author><first>Sylvain</first><last>Raybaud</last></author>
       <author><first>Caroline</first><last>Lavecchia</last></author>
       <author><first>David</first><last>Langlois</last></author>
-      <author><first>Kamel</first><last>Smaïli</last></author>
+      <author id="kamel-smaili"><first>Kamel</first><last>Smaïli</last></author>
       <url hash="64e647c7">2009.eamt-1.15</url>
       <bibkey>raybaud-etal-2009-word</bibkey>
     </paper>
     <paper id="16">
       <title>Translating Questions for Cross-Lingual <fixed-case>QA</fixed-case></title>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <url hash="8626ee4f">2009.eamt-1.16</url>
       <bibkey>tiedemann-2009-translating</bibkey>
     </paper>
     <paper id="17">
       <title>Developing Prototypes for Machine Translation between Two <fixed-case>S</fixed-case>ami Languages</title>
-      <author><first>Francis M.</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis M.</first><last>Tyers</last></author>
       <author><first>Linda</first><last>Wiechetek</last></author>
       <author><first>Trond</first><last>Trosterud</last></author>
       <url hash="32653bf9">2009.eamt-1.17</url>
@@ -149,7 +149,7 @@
     </paper>
     <paper id="18">
       <title>Collocations in a Rule-Based <fixed-case>MT</fixed-case> System: A Case Study Evaluation of their Translation Adequacy</title>
-      <author><first>Eric</first><last>Wehrli</last></author>
+      <author id="eric-wehrli"><first>Eric</first><last>Wehrli</last></author>
       <author><first>Violeta</first><last>Seretan</last></author>
       <author><first>Luka</first><last>Nerima</last></author>
       <author><first>Lorenza</first><last>Russo</last></author>
@@ -187,7 +187,7 @@
     <paper id="23">
       <title>A Phrase-Based Hidden Semi-<fixed-case>M</fixed-case>arkov Approach to Machine Translation</title>
       <author><first>Jesús</first><last>Andrés-Ferrer</last></author>
-      <author><first>Alfons</first><last>Juan</last></author>
+      <author id="alfons-juan"><first>Alfons</first><last>Juan</last></author>
       <url hash="ef574387">2009.eamt-1.23</url>
       <bibkey>andres-ferrer-juan-2009-phrase</bibkey>
     </paper>
@@ -200,13 +200,13 @@
     <paper id="25">
       <title>A Constraint Satisfaction Approach to Machine Translation</title>
       <author><first>Sander</first><last>Canisius</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <url hash="e6d32525">2009.eamt-1.25</url>
       <bibkey>canisius-van-den-bosch-2009-constraint</bibkey>
     </paper>
     <paper id="26">
       <title>Introducing the Autshumato Integrated Translation Environment</title>
-      <author><first>Hendrik J.</first><last>Groenewald</last></author>
+      <author id="hendrik-johannes-groenewald"><first>Hendrik J.</first><last>Groenewald</last></author>
       <author><first>Wildrich</first><last>Fourie</last></author>
       <url hash="4f54b176">2009.eamt-1.26</url>
       <bibkey>groenewald-fourie-2009-introducing</bibkey>
@@ -214,7 +214,7 @@
     <paper id="27">
       <title>A New Subtree-Transfer Approach to Syntax-Based Reordering for Statistical Machine Translation</title>
       <author><first>Maxim</first><last>Khalilov</last></author>
-      <author><first>José A. R.</first><last>Fonollosa</last></author>
+      <author id="jose-a-r-fonollosa"><first>José A. R.</first><last>Fonollosa</last></author>
       <author><first>Mark</first><last>Dras</last></author>
       <url hash="1c4d8467">2009.eamt-1.27</url>
       <bibkey>khalilov-etal-2009-new</bibkey>
@@ -227,7 +227,7 @@
     </paper>
     <paper id="29">
       <title>Rule-Based Augmentation of Training Data in <fixed-case>B</fixed-case>reton-<fixed-case>F</fixed-case>rench Statistical Machine Translation</title>
-      <author><first>Francis M.</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis M.</first><last>Tyers</last></author>
       <url hash="251d3fa3">2009.eamt-1.29</url>
       <bibkey>tyers-2009-rule</bibkey>
     </paper>
@@ -242,14 +242,14 @@
       <title>Are Unaligned Words Important for Machine Translation?</title>
       <author><first>Yuqi</first><last>Zhang</last></author>
       <author><first>Evgeny</first><last>Matusov</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <url hash="a8b8823c">2009.eamt-1.31</url>
       <bibkey>zhang-etal-2009-unaligned</bibkey>
     </paper>
     <paper id="32">
       <title>Using Supertags as Source Language Context in <fixed-case>SMT</fixed-case></title>
       <author><first>Rejwanul</first><last>Haque</last></author>
-      <author><first>Sudip Kumar</first><last>Naskar</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last></author>
       <author><first>Yanjun</first><last>Ma</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <url hash="f4502ac0">2009.eamt-1.32</url>
@@ -258,7 +258,7 @@
     <paper id="33">
       <title>On <fixed-case>LM</fixed-case> Heuristics for the Cube Growing Algorithm</title>
       <author><first>David</first><last>Vilar</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <url hash="792f9a88">2009.eamt-1.33</url>
       <bibkey>vilar-ney-2009-lm</bibkey>
     </paper>
@@ -290,7 +290,7 @@
     </paper>
     <paper id="2">
       <title>Empirical machine translation and its evaluation</title>
-      <author><first>Jesús</first><last>Giménez</last></author>
+      <author id="jesus-gimenez"><first>Jesús</first><last>Giménez</last></author>
       <url hash="42784686">2009.eamt-smart.2</url>
       <bibkey>gimenez-2009-empirical</bibkey>
     </paper>
@@ -326,7 +326,7 @@
     </paper>
     <paper id="7">
       <title>Improving <fixed-case>SMT</fixed-case> by learning translation direction</title>
-      <author><first>Cyril</first><last>Goutte</last></author>
+      <author id="cyril-goutte"><first>Cyril</first><last>Goutte</last></author>
       <author><first>David</first><last>Kurokawa</last></author>
       <author><first>Pierre</first><last>Isabelle</last></author>
       <url hash="a56bb7ef">2009.eamt-smart.7</url>
diff --git a/data/xml/2009.freeopmt.xml b/data/xml/2009.freeopmt.xml
index b51b3898e0..f2c105c2b5 100644
--- a/data/xml/2009.freeopmt.xml
+++ b/data/xml/2009.freeopmt.xml
@@ -8,7 +8,7 @@
       <year>2009</year>
       <editor><first>Juan Antonio</first><last>Pérez-Ortiz</last></editor>
       <editor><first>Felipe</first><last>Sánchez-Martinez</last></editor>
-      <editor><first>Francis M.</first><last>Tyers</last></editor>
+      <editor id="francis-tyers"><first>Francis M.</first><last>Tyers</last></editor>
       <venue>freeopmt</venue>
     </meta>
     <frontmatter>
@@ -17,7 +17,7 @@
     </frontmatter>
     <paper id="1">
       <title>Matxin: developing sustainable machine translation for a less-resourced language</title>
-      <author><first>Kepa</first><last>Sarasola</last></author>
+      <author id="kepa-sarasola"><first>Kepa</first><last>Sarasola</last></author>
       <url hash="9bdef98c">2009.freeopmt-1.1</url>
       <bibkey>sarasola-2009-matxin</bibkey>
     </paper>
@@ -29,9 +29,9 @@
     </paper>
     <paper id="3">
       <title>The Apertium machine translation platform: Five years on</title>
-      <author><first>Mikel L.</first><last>Forcada</last></author>
+      <author id="mikel-l-forcada"><first>Mikel L.</first><last>Forcada</last></author>
       <author><first>Francis M.</first><last>Tyers</last></author>
-      <author><first>Gema</first><last>Ramírez-Sánchez</last></author>
+      <author id="gema-ramirez-sanchez"><first>Gema</first><last>Ramírez-Sánchez</last></author>
       <pages>3-10</pages>
       <url hash="b18bf4b3">2009.freeopmt-1.3</url>
       <abstract>This paper describes Apertium: a free/open-source machine translation platform (engine, toolbox and data), its history, its philosophy of design, its technology, the community of developers, the research and business based on it, and its prospects and challenges, now that it is five years old.</abstract>
@@ -39,7 +39,7 @@
     </paper>
     <paper id="4">
       <title>Matxin: Moving towards language independence</title>
-      <author><first>Aingeru</first><last>Mayor</last></author>
+      <author id="aingeru-mayor"><first>Aingeru</first><last>Mayor</last></author>
       <author><first>Francis M.</first><last>Tyers</last></author>
       <pages>11-18</pages>
       <url hash="54df5a95">2009.freeopmt-1.4</url>
@@ -84,7 +84,7 @@
     </paper>
     <paper id="9">
       <title>An open-source highly scalable web service architecture for the Apertium machine translation engine</title>
-      <author><first>Victor M.</first><last>Sánchez-Cartagena</last></author>
+      <author id="victor-m-sanchez-cartagena"><first>Victor M.</first><last>Sánchez-Cartagena</last></author>
       <author><first>Juan Antonio</first><last>Pérez-Ortiz</last></author>
       <pages>51-58</pages>
       <url hash="b18aaf32">2009.freeopmt-1.9</url>
@@ -111,8 +111,8 @@
     <paper id="12">
       <title>Joint efforts to further develop and incorporate Apertium into the document management flow at <fixed-case>U</fixed-case>niversitat Oberta de <fixed-case>C</fixed-case>atalunya</title>
       <author><first>Luis</first><last>Villarejo Muñoz</last></author>
-      <author><first>Sergio</first><last>Ortiz Rojas</last></author>
-      <author><first>Mireia</first><last>Ginestí Rosell</last></author>
+      <author id="sergio-ortiz-rojas"><first>Sergio</first><last>Ortiz Rojas</last></author>
+      <author id="mireia-ginesti-rosell"><first>Mireia</first><last>Ginestí Rosell</last></author>
       <pages>75-82</pages>
       <url hash="c1b99dc7">2009.freeopmt-1.12</url>
       <abstract>This article describes the needs of UOC regarding translation and how these needs are satisfied by Prompsit further developing a free rule-based machine translation system: Apertium. We initially describe the general framework regarding linguistic needs inside UOC. Then, section 2 introduces Apertium and outlines the development scenario that Prompsit executed. After that, section 3 outlines the specific needs of UOC and why Apertium was chosen as the machine translation engine. Then, section 4 describes some of the features specially developed in this project. Section 5 explains how the linguistic data was improved to increase the quality of the output in Catalan and Spanish. And, finally, we draw conclusions and outline further work originating from the project.</abstract>
diff --git a/data/xml/2009.iwslt.xml b/data/xml/2009.iwslt.xml
index 0ccdb6e7a4..821dd305a1 100644
--- a/data/xml/2009.iwslt.xml
+++ b/data/xml/2009.iwslt.xml
@@ -17,7 +17,7 @@
     </paper>
     <paper id="2">
       <title>Two-way speech-to-speech translation for communicating across language barriers</title>
-      <author><first>Premkumar</first><last>Natarajan</last></author>
+      <author id="prem-natarajan"><first>Premkumar</first><last>Natarajan</last></author>
       <url hash="51c1f944">2009.iwslt-keynotes.2</url>
       <bibkey>natarajan-2009-two</bibkey>
     </paper>
@@ -39,7 +39,7 @@
     </meta>
     <paper id="1">
       <title>Overview of the <fixed-case>IWSLT</fixed-case> 2009 evaluation campaign</title>
-      <author><first>Michael</first><last>Paul</last></author>
+      <author id="michael-paul"><first>Michael</first><last>Paul</last></author>
       <pages>1-18</pages>
       <url hash="f88280e0">2009.iwslt-evaluation.1</url>
       <attachment type="presentation" hash="44fecc2b">2009.iwslt-evaluation.1.Presentation.pdf</attachment>
@@ -48,7 +48,7 @@
     </paper>
     <paper id="2">
       <title><fixed-case>A</fixed-case>pp<fixed-case>T</fixed-case>ek <fixed-case>T</fixed-case>urkish-<fixed-case>E</fixed-case>nglish machine translation system description for <fixed-case>IWSLT</fixed-case> 2009</title>
-      <author><first>Selçuk</first><last>Köprü</last></author>
+      <author id="selcuk-kopru"><first>Selçuk</first><last>Köprü</last></author>
       <pages>19-23</pages>
       <url hash="51b9e69b">2009.iwslt-evaluation.2</url>
       <attachment type="presentation" hash="44fecc2b">2009.iwslt-evaluation.2.Presentation.pdf</attachment>
@@ -57,8 +57,8 @@
     </paper>
     <paper id="3">
       <title><fixed-case>B</fixed-case>arcelona Media <fixed-case>SMT</fixed-case> system description for the <fixed-case>IWSLT</fixed-case> 2009</title>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
-      <author><first>Rafael E.</first><last>Banchs</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="rafael-e-banchs"><first>Rafael E.</first><last>Banchs</last></author>
       <pages>24-28</pages>
       <url hash="14b7fbdd">2009.iwslt-evaluation.3</url>
       <attachment type="presentation" hash="44fecc2b">2009.iwslt-evaluation.3.Presentation.pdf</attachment>
@@ -69,7 +69,7 @@
       <title>Low-resource machine translation using <fixed-case>M</fixed-case>a<fixed-case>T</fixed-case>r<fixed-case>E</fixed-case>x</title>
       <author><first>Yanjun</first><last>Ma</last></author>
       <author><first>Tsuyoshi</first><last>Okita</last></author>
-      <author><first>Özlem</first><last>Çetinoğlu</last></author>
+      <author id="ozlem-cetinoglu"><first>Özlem</first><last>Çetinoğlu</last></author>
       <author><first>Jinhua</first><last>Du</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <pages>29-36</pages>
@@ -83,7 +83,7 @@
       <author><first>Nicola</first><last>Bertoldi</last></author>
       <author><first>Arianna</first><last>Bisazza</last></author>
       <author><first>Mauro</first><last>Cettolo</last></author>
-      <author><first>Germán</first><last>Sanchis-Trilles</last></author>
+      <author id="german-sanchis-trilles"><first>Germán</first><last>Sanchis-Trilles</last></author>
       <author><first>Marcello</first><last>Federico</last></author>
       <pages>37-44</pages>
       <url hash="63205525">2009.iwslt-evaluation.5</url>
@@ -105,7 +105,7 @@
     <paper id="7">
       <title><fixed-case>I</fixed-case>2<fixed-case>R</fixed-case>’s machine translation system for <fixed-case>IWSLT</fixed-case> 2009</title>
       <author><first>Xiangyu</first><last>Duan</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Hui</first><last>Zhang</last></author>
       <author><first>Min</first><last>Zhang</last></author>
       <author><first>Haizhou</first><last>Li</last></author>
@@ -137,8 +137,8 @@
     <paper id="9">
       <title><fixed-case>LIG</fixed-case> approach for <fixed-case>IWSLT</fixed-case>09</title>
       <author><first>Fethi</first><last>Bougares</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
-      <author><first>Hervé</first><last>Blanchon</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
+      <author id="herve-blanchon"><first>Hervé</first><last>Blanchon</last></author>
       <pages>60-64</pages>
       <url hash="cc944afc">2009.iwslt-evaluation.9</url>
       <attachment type="presentation" hash="4288a6bc">2009.iwslt-evaluation.9.Presentation.pdf</attachment>
@@ -162,7 +162,7 @@
       <author><first>Wade</first><last>Shen</last></author>
       <author><first>Brian</first><last>Delaney</last></author>
       <author><first>A. Ryan</first><last>Aminzadeh</last></author>
-      <author><first>Tim</first><last>Anderson</last></author>
+      <author id="tim-anderson"><first>Tim</first><last>Anderson</last></author>
       <author><first>Ray</first><last>Slyh</last></author>
       <pages>71-78</pages>
       <url hash="76754a7b">2009.iwslt-evaluation.11</url>
@@ -173,8 +173,8 @@
     <paper id="12">
       <title>Two methods for stabilizing <fixed-case>MERT</fixed-case></title>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Hirofumi</first><last>Yamamoto</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="hirofumi-yamamoto"><first>Hirofumi</first><last>Yamamoto</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>79-82</pages>
       <url hash="526b6622">2009.iwslt-evaluation.12</url>
       <attachment type="presentation" hash="ab638292">2009.iwslt-evaluation.12.Presentation.pdf</attachment>
@@ -186,7 +186,7 @@
       <author><first>Maoxi</first><last>Li</last></author>
       <author><first>Jiajun</first><last>Zhang</last></author>
       <author><first>Yu</first><last>Zhou</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>83-90</pages>
       <url hash="e5ed750b">2009.iwslt-evaluation.13</url>
       <attachment type="presentation" hash="14377541">2009.iwslt-evaluation.13.Presentation.pdf</attachment>
@@ -195,7 +195,7 @@
     </paper>
     <paper id="14">
       <title>The <fixed-case>NUS</fixed-case> statistical machine translation system for <fixed-case>IWSLT</fixed-case> 2009</title>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Chang</first><last>Liu</last></author>
       <author><first>Wei</first><last>Lu</last></author>
       <author><first>Hwee Tou</first><last>Ng</last></author>
@@ -209,9 +209,9 @@
       <title>The <fixed-case>UOT</fixed-case> system</title>
       <author><first>Xianchao</first><last>Wu</last></author>
       <author><first>Takuya</first><last>Matsuzaki</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>99-106</pages>
       <url hash="9e6006c1">2009.iwslt-evaluation.15</url>
       <attachment type="presentation" hash="50ea66dc">2009.iwslt-evaluation.15.Presentation.pdf</attachment>
@@ -243,7 +243,7 @@
     <paper id="18">
       <title><fixed-case>UPV</fixed-case> translation system for <fixed-case>IWSLT</fixed-case> 2009</title>
       <author><first>Guillem</first><last>Gascó</last></author>
-      <author><first>Joan Andreu</first><last>Sánchez</last></author>
+      <author id="joan-andreu-sanchez"><first>Joan Andreu</first><last>Sánchez</last></author>
       <pages>118-123</pages>
       <url hash="89bac74b">2009.iwslt-evaluation.18</url>
       <attachment type="presentation" hash="10c2bc2e">2009.iwslt-evaluation.18.Presentation.pdf</attachment>
@@ -283,7 +283,7 @@
     </paper>
     <paper id="2">
       <title>Enriching <fixed-case>SCFG</fixed-case> rules directly from efficient bilingual chart parsing</title>
-      <author><first>Martin</first><last>Čmejrek</last></author>
+      <author id="martin-cmejrek"><first>Martin</first><last>Čmejrek</last></author>
       <author><first>Bowen</first><last>Zhou</last></author>
       <author><first>Bing</first><last>Xiang</last></author>
       <pages>136-143</pages>
@@ -317,7 +317,7 @@
     </paper>
     <paper id="5">
       <title>Online language model adaptation for spoken dialog translation</title>
-      <author><first>Germán</first><last>Sanchis-Trilles</last></author>
+      <author id="german-sanchis-trilles"><first>Germán</first><last>Sanchis-Trilles</last></author>
       <author><first>Mauro</first><last>Cettolo</last></author>
       <author><first>Nicola</first><last>Bertoldi</last></author>
       <author><first>Marcello</first><last>Federico</last></author>
@@ -331,11 +331,11 @@
       <title>Network-based speech-to-speech translation</title>
       <author><first>Chiori</first><last>Hori</last></author>
       <author><first>Sakriani</first><last>Sakti</last></author>
-      <author><first>Michael</first><last>Paul</last></author>
+      <author id="michael-paul"><first>Michael</first><last>Paul</last></author>
       <author><first>Noriyuki</first><last>Kimura</last></author>
       <author><first>Yutaka</first><last>Ashikari</last></author>
       <author><first>Ryosuke</first><last>Isotani</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Satoshi</first><last>Nakamura</last></author>
       <url hash="60e4c24d">2009.iwslt-papers.6</url>
       <attachment type="presentation" hash="64e04973">2009.iwslt-papers.6.Presentation.pdf</attachment>
diff --git a/data/xml/2009.jeptalnrecital.xml b/data/xml/2009.jeptalnrecital.xml
index 5df07c4d96..84cc40899b 100644
--- a/data/xml/2009.jeptalnrecital.xml
+++ b/data/xml/2009.jeptalnrecital.xml
@@ -3,7 +3,7 @@
   <volume id="long" ingest-date="2021-02-05" type="proceedings">
     <meta>
       <booktitle>Actes de la 16ème conférence sur le Traitement Automatique des Langues Naturelles. Articles longs</booktitle>
-      <editor><first>Adeline</first><last>Nazarenko</last></editor>
+      <editor id="adeline-nazarenko"><first>Adeline</first><last>Nazarenko</last></editor>
       <editor><first>Thierry</first><last>Poibeau</last></editor>
       <publisher>ATALA</publisher>
       <address>Senlis, France</address>
@@ -26,7 +26,7 @@
     </paper>
     <paper id="2">
       <title>Analyse déductive pour les grammaires d’interaction</title>
-      <author><first>Joseph</first><last>Le Roux</last></author>
+      <author id="joseph-le-roux"><first>Joseph</first><last>Le Roux</last></author>
       <pages>11–20</pages>
       <abstract>Nous proposons un algorithme d’analyse pour les grammaires d’interaction qui utilise le cadre formel de l’analyse déductive. Cette approche donne un point de vue nouveau sur ce problème puisque les méthodes précédentes réduisaient ce dernier à la réécriture de graphes et utilisaient des techniques de résolution de contraintes. D’autre part, cette présentation permet de décrire le processus de manière standard et d’exhiber les sources d’indéterminisme qui rendent ce problème difficile.</abstract>
       <url hash="fdea8922">2009.jeptalnrecital-long.2</url>
@@ -36,7 +36,7 @@
     <paper id="3">
       <title>Analyse syntaxique en dépendances de l’oral spontané</title>
       <author><first>Alexis</first><last>Nasr</last></author>
-      <author><first>Frédéric</first><last>Béchet</last></author>
+      <author id="frederic-bechet"><first>Frédéric</first><last>Béchet</last></author>
       <pages>21–30</pages>
       <abstract>Cet article décrit un modèle d’analyse syntaxique de l’oral spontané axé sur la reconnaissance de cadres valenciels verbaux. Le modèle d’analyse se décompose en deux étapes : une étape générique, basée sur des ressources génériques du français et une étape de réordonnancement des solutions de l’analyseur réalisé par un modèle spécifique à une application. Le modèle est évalué sur le corpus MEDIA.</abstract>
       <url hash="0bf1512c">2009.jeptalnrecital-long.3</url>
@@ -45,8 +45,8 @@
     </paper>
     <paper id="4">
       <title>Analyse syntaxique du français : des constituants aux dépendances</title>
-      <author><first>Marie</first><last>Candito</last></author>
-      <author><first>Benoît</first><last>Crabbé</last></author>
+      <author id="marie-candito"><first>Marie</first><last>Candito</last></author>
+      <author id="benoit-crabbe"><first>Benoît</first><last>Crabbé</last></author>
       <author><first>Pascal</first><last>Denis</last></author>
       <author><first>François</first><last>Guérin</last></author>
       <pages>31–40</pages>
@@ -80,8 +80,8 @@
     </paper>
     <paper id="7">
       <title>Classification d’un contenu encyclopédique en vue d’un étiquetage par entités nommées</title>
-      <author><first>Eric</first><last>Charton</last></author>
-      <author><first>Juan-Manuel</first><last>Torres-Moreno</last></author>
+      <author id="eric-charton"><first>Eric</first><last>Charton</last></author>
+      <author id="juan-manuel-torres-moreno"><first>Juan-Manuel</first><last>Torres-Moreno</last></author>
       <pages>61–70</pages>
       <abstract>On utilise souvent des ressources lexicales externes pour améliorer les performances des systèmes d’étiquetage d’entités nommées. Les contenus de ces ressources lexicales peuvent être variés : liste de noms propres, de lieux, de marques. On note cependant que la disponibilité de corpus encyclopédiques exhaustifs et ouverts de grande taille tels que Worldnet ou Wikipedia, a fait émerger de nombreuses propositions spécifiques d’exploitation de ces contenus par des systèmes d’étiquetage. Un problème demeure néanmoins ouvert avec ces ressources : celui de l’adaptation de leur taxonomie interne, complexe et composée de dizaines de milliers catégories, aux exigences particulières de l’étiquetage des entités nommées. Pour ces dernières, au plus de quelques centaines de classes sémantiques sont requises. Dans cet article nous explorons cette difficulté et proposons un système complet de transformation d’un arbre taxonomique encyclopédique en une système à classe sémantiques adapté à l’étiquetage d’entités nommées.</abstract>
       <url hash="3b9e9aa2">2009.jeptalnrecital-long.7</url>
@@ -90,7 +90,7 @@
     </paper>
     <paper id="8">
       <title>Étude quantitative de liens entre l’analogie formelle et la morphologie constructionnelle</title>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <pages>71–80</pages>
       <abstract>Plusieurs travaux ont récemment étudié l’apport de l’apprentissage analogique dans des applications du traitement automatique des langues comme la traduction automatique, ou la recherche d’information. Il est souvent admis que les relations analogiques de forme entre les mots capturent des informations de nature morphologique. Le but de cette étude est de présenter une analyse des points de rencontre entre l’analyse morphologique et les analogies de forme. C’est à notre connaissance la première étude de ce type portant sur des corpus de grande taille et sur plusieurs langues. Bien que notre étude ne soit pas dédiée à une tâche particulière du traitement des langues, nous montrons cependant que le principe d’analogie permet de segmenter des mots en morphèmes avec une bonne précision.</abstract>
       <url hash="c5809ce2">2009.jeptalnrecital-long.8</url>
@@ -102,8 +102,8 @@
       <author><first>Thi-Ngoc-Diep</first><last>Do</last></author>
       <author><first>Viet-Bac</first><last>Le</last></author>
       <author><first>Brigitte</first><last>Bigi</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
-      <author><first>Eric</first><last>Castelli</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
+      <author id="eric-castelli"><first>Eric</first><last>Castelli</last></author>
       <pages>81–90</pages>
       <abstract>Cet article présente nos premiers travaux en vue de la construction d’un système de traduction probabiliste pour le couple de langue vietnamien-français. La langue vietnamienne étant considérée comme une langue peu dotée, une des difficultés réside dans la constitution des corpus parallèles, indispensable à l’apprentissage des modèles. Nous nous concentrons sur la constitution d’un grand corpus parallèle vietnamien-français. La méthode d’identification automatique des paires de documents parallèles fondée sur la date de publication, les mots spéciaux et les scores d’alignements des phrases est appliquée. Cet article présente également la construction d’un premier système de traduction automatique probabiliste vietnamienfrançais et français-vietnamien à partir de ce corpus et discute l’opportunité d’utiliser des unités lexicales ou sous-lexicales pour le vietnamien (syllabes, mots, ou leurs combinaisons). Les performances du système sont encourageantes et se comparent avantageusement à celles du système de Google.</abstract>
       <url hash="7b7b1c8e">2009.jeptalnrecital-long.9</url>
@@ -124,7 +124,7 @@
       <title>Intégration de l’alignement de mots dans le concordancier bilingue <fixed-case>T</fixed-case>rans<fixed-case>S</fixed-case>earch</title>
       <author><first>Stéphane</first><last>Huet</last></author>
       <author><first>Julien</first><last>Bourdaillet</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <pages>101–110</pages>
       <abstract>Malgré les nombreuses études visant à améliorer la traduction automatique, la traduction assistée par ordinateur reste la solution préférée des traducteurs lorsqu’une sortie de qualité est recherchée. Dans cet article, nous présentons nos travaux menés dans le but d’améliorer le concordancier bilingue TransSearch. Ce service, accessible sur le Web, repose principalement sur un alignement au niveau des phrases. Dans cette étude, nous discutons et évaluons l’intégration d’un alignement statistique au niveau des mots. Nous présentons deux nouvelles problématiques essentielles au succès de notre nouveau prototype : la détection des traductions erronées et le regroupement des variantes de traduction similaires.</abstract>
       <url hash="20861895">2009.jeptalnrecital-long.11</url>
@@ -135,7 +135,7 @@
       <title>Jugements d’évaluation et constituants périphériques</title>
       <author><first>Agata</first><last>Jackiewicz</last></author>
       <author><first>Thierry</first><last>Charnois</last></author>
-      <author><first>Stéphane</first><last>Ferrari</last></author>
+      <author id="stephane-ferrari"><first>Stéphane</first><last>Ferrari</last></author>
       <pages>111–120</pages>
       <abstract>L’article présente une étude portant sur des constituants détachés à valeur axiologique. Dans un premier temps, une analyse linguistique sur corpus met en évidence un ensemble de patrons caractéristiques du phénomène. Ensuite, une expérimentation informatique est proposée sur un corpus de plus grande taille afin de permettre l’observation des patrons en vue d’un retour sur le modèle linguistique. Ce travail s’inscrit dans un projet mené à l’interface de la linguistique et du TAL, qui se donne pour but d’enrichir, d’adapter au français et de formaliser le modèle général Appraisal de l’évaluation dans la langue.</abstract>
       <url hash="45289161">2009.jeptalnrecital-long.12</url>
@@ -148,7 +148,7 @@
       <author><first>Albert</first><last>Gatt</last></author>
       <author><first>Jim</first><last>Hunter</last></author>
       <author><first>Ehud</first><last>Reiter</last></author>
-      <author><first>Somayajulu</first><last>Sripada</last></author>
+      <author id="somayajulu-sripada"><first>Somayajulu</first><last>Sripada</last></author>
       <pages>121–130</pages>
       <abstract>Notre société génère une masse d’information toujours croissante, que ce soit en médecine, en météorologie, etc. La méthode la plus employée pour analyser ces données est de les résumer sous forme graphique. Cependant, il a été démontré qu’un résumé textuel est aussi un mode de présentation efficace. L’objectif du prototype BT-45, développé dans le cadre du projet Babytalk, est de générer des résumés de 45 minutes de signaux physiologiques continus et d’événements temporels discrets en unité néonatale de soins intensifs (NICU). L’article présente l’aspect génération de texte de ce prototype. Une expérimentation clinique a montré que les résumés humains améliorent la prise de décision par rapport à l’approche graphique, tandis que les textes de BT-45 donnent des résultats similaires à l’approche graphique. Une analyse a identifié certaines des limitations de BT-45 mais en dépit de cellesci, notre travail montre qu’il est possible de produire automatiquement des résumés textuels efficaces de données complexes.</abstract>
       <url hash="88671fd4">2009.jeptalnrecital-long.13</url>
@@ -178,7 +178,7 @@
       <title>Prise en compte de dépendances syntaxiques pour la traduction contextuelle de segments</title>
       <author><first>Aurélien</first><last>Max</last></author>
       <author><first>Rafik</first><last>Maklhoufi</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <pages>151–160</pages>
       <abstract>Dans un système standard de traduction statistique basé sur les segments, le score attribué aux différentes traductions d’un segment ne dépend pas du contexte dans lequel il apparaît. Plusieurs travaux récents tendent à montrer l’intérêt de prendre en compte le contexte source lors de la traduction, mais ces études portent sur des systèmes traduisant vers l’anglais, une langue faiblement fléchie. Dans cet article, nous décrivons nos expériences sur la prise en compte du contexte source dans un système statistique traduisant de l’anglais vers le français, basé sur l’approche proposée par Stroppa et al. (2007). Nous étudions l’impact de différents types d’indices capturant l’information contextuelle, dont des dépendances syntaxiques typées. Si les mesures automatiques d’évaluation de la qualité d’une traduction ne révèlent pas de gains significatifs de notre système par rapport à un système à l’état de l’art ne faisant pas usage du contexte, une évaluation manuelle conduite sur 100 phrases choisies aléatoirement est en faveur de notre système. Cette évaluation fait également ressortir que la prise en compte de certaines dépendances syntaxiques est bénéfique à notre système.</abstract>
       <url hash="eb1df976">2009.jeptalnrecital-long.16</url>
@@ -249,7 +249,7 @@
     <paper id="23">
       <title>Trouver et confondre les coupables : un processus sophistiqué de correction de lexique</title>
       <author><first>Lionel</first><last>Nicolas</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <author><first>Miguel</first><last>A. Molinero</last></author>
       <author><first>Jacques</first><last>Farré</last></author>
       <author><first>Éric</first><last>Villemonte De La Clergerie</last></author>
@@ -330,7 +330,7 @@
   <volume id="position" ingest-date="2021-02-05" type="proceedings">
     <meta>
       <booktitle>Actes de la 16ème conférence sur le Traitement Automatique des Langues Naturelles. Prise de position</booktitle>
-      <editor><first>Adeline</first><last>Nazarenko</last></editor>
+      <editor id="adeline-nazarenko"><first>Adeline</first><last>Nazarenko</last></editor>
       <editor><first>Thierry</first><last>Poibeau</last></editor>
       <publisher>ATALA</publisher>
       <address>Senlis, France</address>
@@ -376,7 +376,7 @@
   <volume id="court" ingest-date="2021-02-05" type="proceedings">
     <meta>
       <booktitle>Actes de la 16ème conférence sur le Traitement Automatique des Langues Naturelles. Articles courts</booktitle>
-      <editor><first>Adeline</first><last>Nazarenko</last></editor>
+      <editor id="adeline-nazarenko"><first>Adeline</first><last>Nazarenko</last></editor>
       <editor><first>Thierry</first><last>Poibeau</last></editor>
       <publisher>ATALA</publisher>
       <address>Senlis, France</address>
@@ -390,9 +390,9 @@
     </frontmatter>
     <paper id="1">
       <title>Adaptation de parsers statistiques lexicalisés pour le français : Une évaluation complète sur corpus arborés</title>
-      <author><first>Djamé</first><last>Seddah</last></author>
-      <author><first>Marie</first><last>Candito</last></author>
-      <author><first>Benoît</first><last>Crabbé</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
+      <author id="marie-candito"><first>Marie</first><last>Candito</last></author>
+      <author id="benoit-crabbe"><first>Benoît</first><last>Crabbé</last></author>
       <pages>1–10</pages>
       <abstract>Cet article présente les résultats d’une évaluation exhaustive des principaux analyseurs syntaxiques probabilistes dit “lexicalisés” initialement conçus pour l’anglais, adaptés pour le français et évalués sur le CORPUS ARBORÉ DU FRANÇAIS (Abeillé et al., 2003) et le MODIFIED FRENCH TREEBANK (Schluter &amp; van Genabith, 2007). Confirmant les résultats de (Crabbé &amp; Candito, 2008), nous montrons que les modèles lexicalisés, à travers les modèles de Charniak (Charniak, 2000), ceux de Collins (Collins, 1999) et le modèle des TIG Stochastiques (Chiang, 2000), présentent des performances moindres face à un analyseur PCFG à Annotation Latente (Petrov et al., 2006). De plus, nous montrons que le choix d’un jeu d’annotations issus de tel ou tel treebank oriente fortement les résultats d’évaluations tant en constituance qu’en dépendance non typée. Comparés à (Schluter &amp; van Genabith, 2008; Arun &amp; Keller, 2005), tous nos résultats sont state-of-the-art et infirment l’hypothèse d’une difficulté particulière qu’aurait le français en terme d’analyse syntaxique probabiliste et de sources de données.</abstract>
       <url hash="d900dfd5">2009.jeptalnrecital-court.1</url>
@@ -401,7 +401,7 @@
     </paper>
     <paper id="2">
       <title>Analyse automatique des noms déverbaux composés : pourquoi et comment faire interagir analogie et système de règles</title>
-      <author><first>Fiammetta</first><last>Namer</last></author>
+      <author id="fiammetta-namer"><first>Fiammetta</first><last>Namer</last></author>
       <pages>11–20</pages>
       <abstract>Cet article aborde deux problèmes d’analyse morpho-sémantique du lexique : (1) attribuer automatiquement une définition à des noms et verbes morphologiquement construits inconnus des dictionnaires mais présents dans les textes ; (2) proposer une analyse combinant règles et analogie, deux techniques généralement contradictoires. Les noms analysés sont apparemment suffixés et composés (HYDROMASSAGE). La plupart d’entre eux, massivement attestés dans les documents (journaux, Internet) sont absents des dictionnaires. Ils sont souvent reliés à des verbes (HYDROMASSER) également néologiques. Le nombre de ces noms et verbes est estimé à 5.400. L’analyse proposée leur attribue une définition par rapport à leur base, et enrichit un lexique de référence pour le TALN au moyen de cette base, si elle est néologique. L’implémentation des contraintes linguistiques qui régissent ces formations est reproductible dans d’autres langues européennes où sont rencontrés les mêmes types de données dont l’analyse reflète le même raisonnement que pour le français.</abstract>
       <url hash="524a187f">2009.jeptalnrecital-court.2</url>
@@ -430,18 +430,18 @@
     </paper>
     <paper id="5">
       <title><fixed-case>ANNODIS</fixed-case>: une approche outillée de l’annotation de structures discursives</title>
-      <author><first>Marie-Paule</first><last>Péry-Woodley</last></author>
-      <author><first>Nicholas</first><last>Asher</last></author>
+      <author id="marie-paule-pery-woodley"><first>Marie-Paule</first><last>Péry-Woodley</last></author>
+      <author id="nicholas-asher"><first>Nicholas</first><last>Asher</last></author>
       <author><first>Patrice</first><last>Enjalbert</last></author>
-      <author><first>Farah</first><last>Benamara</last></author>
+      <author id="farah-benamara"><first>Farah</first><last>Benamara</last></author>
       <author><first>Myriam</first><last>Bras</last></author>
-      <author><first>Cécile</first><last>Fabre</last></author>
-      <author><first>Stéphane</first><last>Ferrari</last></author>
-      <author><first>Lydia-Mai</first><last>Ho-Dac</last></author>
+      <author id="cecile-fabre"><first>Cécile</first><last>Fabre</last></author>
+      <author id="stephane-ferrari"><first>Stéphane</first><last>Ferrari</last></author>
+      <author id="lydia-mai-ho-dac"><first>Lydia-Mai</first><last>Ho-Dac</last></author>
       <author><first>Anne</first><last>Le Draoulec</last></author>
       <author><first>Yann</first><last>Mathet</last></author>
       <author><first>Philippe</first><last>Muller</last></author>
-      <author><first>Laurent</first><last>Prévot</last></author>
+      <author id="laurent-prevot"><first>Laurent</first><last>Prévot</last></author>
       <author><first>Josette</first><last>Rebeyrolle</last></author>
       <author><first>Ludovic</first><last>Tanguy</last></author>
       <author><first>Marianne</first><last>Vergez-Couret</last></author>
@@ -455,7 +455,7 @@
     </paper>
     <paper id="6">
       <title>Apport de la syntaxe dans un système de question-réponse : étude du système <fixed-case>FIDJI</fixed-case>.</title>
-      <author><first>Véronique</first><last>Moriceau</last></author>
+      <author id="veronique-moriceau"><first>Véronique</first><last>Moriceau</last></author>
       <author><first>Xavier</first><last>Tannier</last></author>
       <pages>47–56</pages>
       <abstract>Cet article présente une série d’évaluations visant à étudier l’apport d’une analyse syntaxique robuste des questions et des documents dans un système de questions-réponses. Ces évaluations ont été effectuées sur le système FIDJI, qui utilise à la fois des informations syntaxiques et des techniques plus “traditionnelles”. La sélection des documents, l’extraction de la réponse ainsi que le comportement selon les différents types de questions ont été étudiés.</abstract>
@@ -485,7 +485,7 @@
     </paper>
     <paper id="9">
       <title>Association automatique de lemmes et de paradigmes de flexion à un mot inconnu</title>
-      <author><first>Claude</first><last>De Loupy</last></author>
+      <author id="claude-de-loupy"><first>Claude</first><last>De Loupy</last></author>
       <author><first>Michaël</first><last>Bagur</last></author>
       <author><first>Helena</first><last>Blancafort</last></author>
       <pages>77–86</pages>
@@ -498,7 +498,7 @@
       <title>Catégorisation sémantico-discursive des évaluations exprimées dans la blogosphère</title>
       <author><first>Matthieu</first><last>Vernier</last></author>
       <author><first>Laura</first><last>Monceaux</last></author>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <author><first>Estelle</first><last>Dubreil</last></author>
       <pages>87–96</pages>
       <abstract>Les blogs constituent un support d’observations idéal pour des applications liées à la fouille d’opinion. Toutefois, ils imposent de nouvelles problématiques et de nouveaux défis au regard des méthodes traditionnelles du domaine. De ce fait, nous proposons une méthode automatique pour la détection et la catégorisation des évaluations localement exprimées dans un corpus de blogs multi-domaine. Celle-ci rend compte des spécificités du langage évaluatif décrites dans deux théories linguistiques. L’outil développé au sein de la plateforme UIMA vise d’une part à construire automatiquement une grammaire du langage évaluatif, et d’autre part à utiliser cette grammaire pour la détection et la catégorisation des passages évaluatifs d’un texte. La catégorisation traite en particulier l’aspect axiologique de l’évaluation, sa configuration d’énonciation et sa modalité dans le discours.</abstract>
@@ -530,7 +530,7 @@
     <paper id="13">
       <title>Détection des émotions à partir du contenu linguistique d’énoncés oraux : application à un robot compagnon pour enfants fragilisés</title>
       <author><first>Marc</first><last>Le Tallec</last></author>
-      <author><first>Jeanne</first><last>Villaneau</last></author>
+      <author id="jeanne-villaneau"><first>Jeanne</first><last>Villaneau</last></author>
       <author><first>Jean-Yves</first><last>Antoine</last></author>
       <author><first>Agata</first><last>Savary</last></author>
       <author><first>Arielle</first><last>Syssau-Vaccarella</last></author>
@@ -542,7 +542,7 @@
     </paper>
     <paper id="14">
       <title>Dispersion sémantique dans des familles morpho-phonologiques : éléments théoriques et empiriques</title>
-      <author><first>Nuria</first><last>Gala</last></author>
+      <author id="nuria-gala"><first>Nuria</first><last>Gala</last></author>
       <author><first>Véronique</first><last>Rey</last></author>
       <author><first>Laurent</first><last>Tichit</last></author>
       <pages>120–127</pages>
@@ -562,8 +562,8 @@
     </paper>
     <paper id="16">
       <title>Exploitation du terrain commun pour la production d’expressions référentielles dans les systèmes de dialogue</title>
-      <author><first>Alexandre</first><last>Denis</last></author>
-      <author><first>Matthieu</first><last>Quignard</last></author>
+      <author id="alexandre-denis"><first>Alexandre</first><last>Denis</last></author>
+      <author id="matthieu-quignard"><first>Matthieu</first><last>Quignard</last></author>
       <pages>138–147</pages>
       <abstract>Cet article présente un moyen de contraindre la production d’expressions référentielles par un système de dialogue en fonction du terrain commun. Cette capacité, fondamentale pour atteindre la compréhension mutuelle, est trop souvent oubliée dans les systèmes de dialogue. Le modèle que nous proposons s’appuie sur une modélisation du processus d’ancrage (grounding process) en proposant un raffinement du statut d’ancrage appliqué à la description des référents. Il décrit quand et comment ce statut doit être révisé en fonction des jugements de compréhension des deux participants ainsi que son influence dans le choix d’une description partagée destinée à la génération d’une expression référentielle.</abstract>
       <url hash="474cc819">2009.jeptalnrecital-court.16</url>
@@ -574,7 +574,7 @@
       <title>Gestion de dialogue oral Homme-machine en arabe</title>
       <author><first>Younès</first><last>Bahou</last></author>
       <author><first>Amine</first><last>Bayoudhi</last></author>
-      <author><first>Lamia</first><last>Hadrich Belguith</last></author>
+      <author id="lamia-hadrich-belguith"><first>Lamia</first><last>Hadrich Belguith</last></author>
       <pages>148–157</pages>
       <abstract>Dans le présent papier, nous présentons nos travaux sur la gestion du dialogue oral arabe Homme-machine. Ces travaux entrent dans le cadre de la réalisation du serveur vocal interactif SARF (Bahou et al., 2008) offrant des renseignements sur le transport ferroviaire tunisien en langue arabe standard moderne. Le gestionnaire de dialogue que nous proposons est basé sur une approche structurelle et est composé de deux modèles à savoir, le modèle de tâche et le modèle de dialogue. Le premier modèle permet de i) compléter et vérifier l’incohérence des structures sémantiques représentant les sens utiles des énoncés, ii) générer une requête vers l’application et iii) récupérer le résultat et de formuler une réponse à l’utilisateur en langage naturel. Quant au modèle de dialogue, il assure l’avancement du dialogue avec l’utilisateur et l’identification de ses intentions. L’interaction entre ces deux modèles est assurée grâce à un contexte du dialogue permettant le suivi et la mise à jour de l’historique du dialogue.</abstract>
       <url hash="299608cd">2009.jeptalnrecital-court.17</url>
@@ -604,7 +604,7 @@
     </paper>
     <paper id="20">
       <title>Intégrer les tables du Lexique-Grammaire à un analyseur syntaxique robuste à grande échelle</title>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <author><first>Elsa</first><last>Tolone</last></author>
       <pages>177–186</pages>
       <abstract>Dans cet article, nous montrons comment nous avons converti les tables du Lexique-Grammaire en un format TAL, celui du lexique Lefff, permettant ainsi son intégration dans l’analyseur syntaxique FRMG. Nous présentons les fondements linguistiques de ce processus de conversion et le lexique obtenu. Nous validons le lexique obtenu en évaluant l’analyseur syntaxique FRMG sur le corpus de référence de la campagne EASy selon qu’il utilise les entrées verbales du Lefff ou celles des tables des verbes du Lexique-Grammaire ainsi converties.</abstract>
@@ -672,7 +672,7 @@
     <paper id="26">
       <title>Collecte et analyses de réponses naturelles pour les systèmes de questions-réponses</title>
       <author><first>Anne</first><last>Garcia-Fernandez</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <author><first>Anne</first><last>Vilnat</last></author>
       <pages>237–246</pages>
       <abstract>Notre travail se situe dans le cadre des systèmes de réponse a une question et à pour but de fournir une réponse en langue naturelle aux questions posées en langue naturelle. Cet article présente une expérience permettant d’analyser les réponses de locuteurs du français à des questions que nous leur posons. L’expérience se déroule à l’écrit comme à l’oral et propose à des locuteurs français des questions relevant de différents types sémantiques et syntaxiques. Nous mettons en valeur une large variabilité dans les formes de réponses possibles en langue française. D’autre part nous établissons un certain nombre de liens entre formulation de question et formulation de réponse. Nous proposons d’autre part une comparaison des réponses selon la modalité oral / écrit. Ces résultats peuvent être intégrés à des systèmes existants pour produire une réponse en langue naturelle de façon dynamique.</abstract>
@@ -691,7 +691,7 @@
     </paper>
     <paper id="28">
       <title>Plusieurs langues (bien choisies) valent mieux qu’une : traduction statistique multi-source par renforcement lexical</title>
-      <author><first>Josep Maria</first><last>Crego</last></author>
+      <author id="josep-m-crego"><first>Josep Maria</first><last>Crego</last></author>
       <author><first>Aurélien</first><last>Max</last></author>
       <author><first>François</first><last>Yvon</last></author>
       <pages>253–262</pages>
@@ -719,9 +719,9 @@
       <title>Profilage de candidatures assisté par Relevance Feedback</title>
       <author><first>Rémy</first><last>Kessler</last></author>
       <author><first>Nicolas</first><last>Béchet</last></author>
-      <author><first>Juan-Manuel</first><last>Torres-Moreno</last></author>
+      <author id="juan-manuel-torres-moreno"><first>Juan-Manuel</first><last>Torres-Moreno</last></author>
       <author><first>Mathieu</first><last>Roche</last></author>
-      <author><first>Marc</first><last>El-Bèze</last></author>
+      <author id="marc-el-beze"><first>Marc</first><last>El-Bèze</last></author>
       <pages>273–282</pages>
       <abstract>Le marché d’offres d’emploi et des candidatures sur Internet connaît une croissance exponentielle. Ceci implique des volumes d’information (majoritairement sous la forme de texte libre) qu’il n’est plus possible de traiter manuellement. Une analyse et catégorisation assistées nous semble pertinente en réponse à cette problématique. Nous proposons E-Gen, système qui a pour but l’analyse et catégorisation assistés d’offres d’emploi et des réponses des candidats. Dans cet article nous présentons plusieurs stratégies, reposant sur les modèles vectoriel et probabiliste, afin de résoudre la problématique du profilage des candidatures en fonction d’une offre précise. Nous avons évalué une palette de mesures de similarité afin d’effectuer un classement pertinent des candidatures au moyen des courbes ROC. L’utilisation d’une forme de relevance feedback a permis de surpasser nos résultats sur ce problème difficile et sujet à une grande subjectivité.</abstract>
       <url hash="f345da57">2009.jeptalnrecital-court.30</url>
@@ -730,7 +730,7 @@
     </paper>
     <paper id="31">
       <title>Profilage sémantique endogène des relations de synonymie au sein de Gene Ontology</title>
-      <author><first>Thierry</first><last>Hamon</last></author>
+      <author id="thierry-hamon"><first>Thierry</first><last>Hamon</last></author>
       <author><first>Natalia</first><last>Grabar</last></author>
       <pages>283–292</pages>
       <abstract>Le calcul de la similarité sémantique entre les termes repose sur l’existence et l’utilisation de ressources sémantiques. Cependant de telles ressources, qui proposent des équivalences entre entités, souvent des relations de synonymie, doivent elles-mêmes être d’abord analysées afin de définir des zones de fiabilité où la similarité sémantique est plus forte. Nous proposons une méthode d’acquisition de synonymes élémentaires grâce à l’exploitation des terminologies structurées au travers l’analyse de la structure syntaxique des termes complexes et de leur compositionnalité. Les synonymes acquis sont ensuite profilés grâce aux indicateurs endogènes inférés automatiquement à partir de ces mêmes terminologies (d’autres types de relations, inclusions lexicales, productivité, forme des composantes connexes). Dans le domaine biomédical, il existe de nombreuses terminologies structurées qui peuvent être exploitées pour la constitution de ressources sémantiques. Le travail présenté ici exploite une de ces terminologies, Gene Ontology.</abstract>
@@ -755,7 +755,7 @@
       <author><first>Anne</first><last>Dister</last></author>
       <author><first>Hubert</first><last>Naets</last></author>
       <author><first>Kévin</first><last>Macé</last></author>
-      <author><first>Cédrick</first><last>Fairon</last></author>
+      <author id="cedrick-fairon"><first>Cédrick</first><last>Fairon</last></author>
       <pages>301–310</pages>
       <abstract>Cet article présente Recto /Verso, un système de traitement automatique du langage dédié à l’application des rectifications orthographiques de 1990. Ce système a été développé dans le cadre de la campagne de sensibilisation réalisée en mars dernier par le Service et le Conseil de la langue française et de la politique linguistique de la Communauté française de Belgique. Nous commençons par rappeler les motivations et le contenu de la réforme proposée, et faisons le point sur les principes didactiques retenus dans le cadre de la campagne. La plus grande partie de l’article est ensuite consacrée à l’implémentation du système. Nous terminons enfin par une première analyse de l’impact de la campagne sur les utilisateurs.</abstract>
       <url hash="15d3ebe3">2009.jeptalnrecital-court.33</url>
@@ -768,7 +768,7 @@
       <author><first>Luit</first><last>Gazendam</last></author>
       <author><first>Willemijn</first><last>Heeren</last></author>
       <author><first>Roeland</first><last>Ordelman</last></author>
-      <author><first>Hennie</first><last>Brugman</last></author>
+      <author id="hennie-brugman"><first>Hennie</first><last>Brugman</last></author>
       <pages>311–320</pages>
       <abstract>Semantic access to multimedia content in audiovisual archives is to a large extent dependent on quantity and quality of the metadata, and particularly the content descriptions that are attached to the individual items. However, the manual annotation of collections puts heavy demands on resources. A large number of archives are introducing (semi) automatic annotation techniques for generating and/or enhancing metadata. The NWO funded CATCH-CHOICE project has investigated the extraction of keywords from textual resources related to TV programs to be archived (context documents), in collaboration with the Dutch audiovisual archives, Sound and Vision. This paper investigates the suitability of Automatic Speech Recognition transcripts produced in the CATCH-CHoral project for generating such keywords, which we evaluate against manual annotations of the documents, and against keywords automatically generated from context documents describing the TV programs’ content.</abstract>
       <url hash="8dc4b14f">2009.jeptalnrecital-court.34</url>
@@ -777,7 +777,7 @@
     <paper id="35">
       <title>Résumé automatique multi-document et indépendance de la langue : une première évaluation en français</title>
       <author><first>Florian</first><last>Boudin</last></author>
-      <author><first>Juan-Manuel</first><last>Torres-Moreno</last></author>
+      <author id="juan-manuel-torres-moreno"><first>Juan-Manuel</first><last>Torres-Moreno</last></author>
       <pages>321–330</pages>
       <abstract>Le résumé automatique de texte est une problématique difficile, fortement dépendante de la langue et qui peut nécessiter un ensemble de données d’apprentissage conséquent. L’approche par extraction peut aider à surmonter ces difficultés. (Mihalcea, 2004) a démontré l’intérêt des approches à base de graphes pour l’extraction de segments de texte importants. Dans cette étude, nous décrivons une approche indépendante de la langue pour la problématique du résumé automatique multi-documents. L’originalité de notre méthode repose sur l’utilisation d’une mesure de similarité permettant le rapprochement de segments morphologiquement proches. De plus, c’est à notre connaissance la première fois que l’évaluation d’une approche de résumé automatique multi-document est conduite sur des textes en français.</abstract>
       <url hash="b5e613e2">2009.jeptalnrecital-court.35</url>
@@ -798,9 +798,9 @@
     <paper id="37">
       <title>Segmentation multiple d’un flux de données textuelles pour la modélisation statistique du langage</title>
       <author><first>Sopheap</first><last>Seng</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <author><first>Brigitte</first><last>Bigi</last></author>
-      <author><first>Eric</first><last>Castelli</last></author>
+      <author id="eric-castelli"><first>Eric</first><last>Castelli</last></author>
       <pages>337–346</pages>
       <abstract>Dans cet article, nous traitons du problème de la modélisation statistique du langage pour les langues peu dotées et sans segmentation entre les mots. Tandis que le manque de données textuelles a un impact sur la performance des modèles, les erreurs introduites par la segmentation automatique peuvent rendre ces données encore moins exploitables. Pour exploiter au mieux les données textuelles, nous proposons une méthode qui effectue des segmentations multiples sur le corpus d’apprentissage au lieu d’une segmentation unique. Cette méthode basée sur les automates d’état finis permet de retrouver les n-grammes non trouvés par la segmentation unique et de générer des nouveaux n-grammes pour l’apprentissage de modèle du langage. L’application de cette approche pour l’apprentissage des modèles de langage pour les systèmes de reconnaissance automatique de la parole en langue khmère et vietnamienne s’est montrée plus performante que la méthode par segmentation unique, à base de règles.</abstract>
       <url hash="ff770488">2009.jeptalnrecital-court.37</url>
@@ -852,7 +852,7 @@
     <paper id="42">
       <title>Une approche exploratoire de compression automatique de phrases basée sur des critères thermodynamiques</title>
       <author><first>Silvia</first><last>Fernández Sabido</last></author>
-      <author><first>Juan-Manuel</first><last>Torres-Moreno</last></author>
+      <author id="juan-manuel-torres-moreno"><first>Juan-Manuel</first><last>Torres-Moreno</last></author>
       <pages>387–393</pages>
       <abstract>Nous présentons une approche exploratoire basée sur des notions thermodynamiques de la Physique statistique pour la compression de phrases. Nous décrivons le modèle magnétique des verres de spins, adapté à notre conception de la problématique. Des simulations Métropolis Monte-Carlo permettent d’introduire des fluctuations thermiques pour piloter la compression. Des comparaisons intéressantes de notre méthode ont été réalisées sur un corpus en français.</abstract>
       <url hash="41d8dbc6">2009.jeptalnrecital-court.42</url>
@@ -902,7 +902,7 @@
   <volume id="demonstration" ingest-date="2021-02-05" type="proceedings">
     <meta>
       <booktitle>Actes de la 16ème conférence sur le Traitement Automatique des Langues Naturelles. Démonstrations</booktitle>
-      <editor><first>Adeline</first><last>Nazarenko</last></editor>
+      <editor id="adeline-nazarenko"><first>Adeline</first><last>Nazarenko</last></editor>
       <editor><first>Thierry</first><last>Poibeau</last></editor>
       <publisher>ATALA</publisher>
       <address>Senlis, France</address>
@@ -917,7 +917,7 @@
     <paper id="1">
       <title><fixed-case>ACOLAD</fixed-case> un environnement pour l’édition de corpus de dépendances</title>
       <author><first>Francis</first><last>Brunet-Manquat</last></author>
-      <author><first>Jérôme</first><last>Goulian</last></author>
+      <author id="jerome-goulian"><first>Jérôme</first><last>Goulian</last></author>
       <pages>1–3</pages>
       <abstract>Dans cette démonstration, nous présentons le prototype d’un environnement open-source pour l’édition de corpus de dépendances. Cet environnement, nommé ACOLAD (Annotation de COrpus Linguistique pour l’Analyse de dépendances), propose des services manuels de segmentation et d’annotation multi-niveaux (segmentation en mots et en syntagmes minimaux (chunks), annotation morphosyntaxique des mots, annotation syntaxique des chunks et annotation syntaxique des dépendances entre mots ou entre chunks).</abstract>
       <url hash="b23dfa69">2009.jeptalnrecital-demonstration.1</url>
@@ -964,7 +964,7 @@
     </paper>
     <paper id="5">
       <title><fixed-case>ASSIST</fixed-case> : un moteur de recherche spécialisé pour l’analyse des cadres d’expériences</title>
-      <author><first>Davy</first><last>Weissenbacher</last></author>
+      <author id="davy-weissenbacher"><first>Davy</first><last>Weissenbacher</last></author>
       <author><first>Elisa</first><last>Pieri</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
       <author><first>Brian</first><last>Rea</last></author>
@@ -991,7 +991,7 @@
       <title><fixed-case>CIFLI</fixed-case>-<fixed-case>S</fixed-case>urvi<fixed-case>T</fixed-case>ra, deux facettes : démonstrateur de composants de <fixed-case>TA</fixed-case> fondée sur <fixed-case>UNL</fixed-case>, et phrasebook multilingue</title>
       <author><first>Georges</first><last>Fafiotte</last></author>
       <author><first>Achille</first><last>Falaise</last></author>
-      <author><first>Jérôme</first><last>Goulian</last></author>
+      <author id="jerome-goulian"><first>Jérôme</first><last>Goulian</last></author>
       <pages>19–21</pages>
       <abstract>CIFLI-SurviTra (“Survival Translation” assistant) est une plate-forme destinée à favoriser l’ingénierie et la mise au point de composants UNL de TA, à partir d’une mémoire de traduction formée de livres de phrases multilingues avec variables lexicales. SurviTra est aussi un phrasebook digital multilingue, assistant linguistique pour voyageurs monolingues (français, hindi, tamoul, anglais) en situation de “survie linguistique”. Le corpus d’un domaine-pilote (“Restaurant”) a été structuré et construit : sous-domaines de phrases alignées et classes lexicales de locutions quadrilingues, graphes UNL, dictionnaires UW++/français et UW++/hindi par domaines. L’approche, générique, est applicable à d’autres langues. Le prototype d’assistant linguistique (application Web, à interface textuelle) peut évoluer vers une application UNL embarquée sur SmartPhone, avec Traitement de Parole et multimodalité.</abstract>
       <url hash="eb3c34eb">2009.jeptalnrecital-demonstration.7</url>
@@ -1013,7 +1013,7 @@
     <paper id="9">
       <title><fixed-case>EXCOM</fixed-case> : Plate-forme d’annotation sémantique de textes multilingues</title>
       <author><first>Motasem</first><last>Alrahabi</last></author>
-      <author><first>Jean-Pierre</first><last>Desclés</last></author>
+      <author id="jean-pierre-descles"><first>Jean-Pierre</first><last>Desclés</last></author>
       <pages>25–27</pages>
       <abstract>Nous proposons une plateforme d‟annotation sémantique, appelée « EXCOM ». Basée sur la méthode de l‟ « Exploration Contextuelle », elle permet, à travers une diversité de langues, de procéder à des annotations automatiques de segments textuels par l’analyse des formes de surface dans leur contexte. Les textes sont traités selon des « points de vue » discursifs dont les valeurs sont organisées dans une « carte sémantique ». L‟annotation se base sur un ensemble de règles linguistiques, écrites par un analyste, qui permettent d‟identifier les représentations textuelles sous-jacentes aux différentes catégories de la carte. Le système offre, à travers deux types d‟interfaces (développeur ou utilisateur), une chaîne de traitements automatiques de textes qui comprend la segmentation, l‟annotation et d‟autres fonctionnalités de post-traitement. Les documents annotés peuvent être utilisés, par exemple, pour des systèmes de recherche d‟information, de veille, de classification ou de résumé automatique.</abstract>
       <url hash="707ff646">2009.jeptalnrecital-demonstration.9</url>
@@ -1077,7 +1077,7 @@
       <booktitle>Actes de la 16ème conférence sur le Traitement Automatique des Langues Naturelles. REncontres jeunes Chercheurs en Informatique pour le Traitement Automatique des Langues</booktitle>
       <editor><first>Thibault</first><last>Mondary</last></editor>
       <editor><first>Aurélien</first><last>Bossard</last></editor>
-      <editor><first>Thierry</first><last>Hamon</last></editor>
+      <editor id="thierry-hamon"><first>Thierry</first><last>Hamon</last></editor>
       <publisher>ATALA</publisher>
       <address>Senlis, France</address>
       <month>June</month>
@@ -1171,7 +1171,7 @@
     </paper>
     <paper id="10">
       <title>Combinaison de contenus encyclopédiques multilingues pour une reconnaissance d’entités nommées en contexte</title>
-      <author><first>Eric</first><last>Charton</last></author>
+      <author id="eric-charton"><first>Eric</first><last>Charton</last></author>
       <pages>91–100</pages>
       <abstract>Dans cet article, nous présentons une méthode de transformation de Wikipédia en ressource d’information externe pour détecter et désambiguïser des entités nommées, en milieu ouvert et sans apprentissage spécifique. Nous expliquons comment nous construisons notre système, puis nous utilisons cinq éditions linguistiques de Wikipédia afin d’enrichir son lexique. Pour finir nous réalisons une évaluation et comparons les performances du système avec et sans compléments lexicaux issus des informations inter-linguistiques, sur une tâche d’extraction d’entités nommées appliquée à un corpus d’articles journalistiques.</abstract>
       <url hash="4213f155">2009.jeptalnrecital-recital.10</url>
diff --git a/data/xml/2009.mtsummit.xml b/data/xml/2009.mtsummit.xml
index 77d71083d5..c78a1ef464 100644
--- a/data/xml/2009.mtsummit.xml
+++ b/data/xml/2009.mtsummit.xml
@@ -34,7 +34,7 @@
     <paper id="4">
       <title>Panel Summary: Educating and Assessing the Human Translator in an Age of Technology</title>
       <author><first>Patricia</first><last>Phillips-Batoma</last></author>
-      <author><first>Roxana</first><last>Girju</last></author>
+      <author id="roxana-girju"><first>Roxana</first><last>Girju</last></author>
       <author><first>Elizabeth</first><last>Lowe</last></author>
       <author><first>Patricia</first><last>Minacori</last></author>
       <url hash="d7013654">2009.mtsummit-plenaries.4</url>
@@ -49,7 +49,7 @@
     </paper>
     <paper id="6">
       <title>Technology in Translator Training and tools for translators</title>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Marianne</first><last>Starlander</last></author>
       <url hash="b6cc6342">2009.mtsummit-plenaries.6</url>
       <bibkey>bouillon-starlander-2009-technology</bibkey>
@@ -126,7 +126,7 @@
       <title>Source-side Dependency Tree Reordering Models with Subtree Movements and Constraints</title>
       <author><first>Nguyen</first><last>Bach</last></author>
       <author><first>Qin</first><last>Gao</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <url hash="463afe6a">2009.mtsummit-papers.1</url>
       <bibkey>bach-etal-2009-source</bibkey>
     </paper>
@@ -140,13 +140,13 @@
     </paper>
     <paper id="3">
       <title>Normalization for Automated Metrics: <fixed-case>E</fixed-case>nglish and <fixed-case>A</fixed-case>rabic Speech Translation</title>
-      <author><first>Sherri</first><last>Condon</last></author>
-      <author><first>Gregory A.</first><last>Sanders</last></author>
+      <author id="sherri-condon"><first>Sherri</first><last>Condon</last></author>
+      <author id="gregory-sanders"><first>Gregory A.</first><last>Sanders</last></author>
       <author><first>Dan</first><last>Parvaz</last></author>
       <author><first>Alan</first><last>Rubenstein</last></author>
       <author><first>Christy</first><last>Doran</last></author>
       <author><first>John</first><last>Aberdeen</last></author>
-      <author><first>Beatrice</first><last>Oshika</last></author>
+      <author id="beatrice-oshika"><first>Beatrice</first><last>Oshika</last></author>
       <url hash="60b31fef">2009.mtsummit-papers.3</url>
       <bibkey>condon-etal-2009-normalization</bibkey>
     </paper>
@@ -160,9 +160,9 @@
     </paper>
     <paper id="5">
       <title>Reassessment of the Role of Phrase Extraction in <fixed-case>PBSMT</fixed-case></title>
-      <author><first>Francisco</first><last>Guzman</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzman</last></author>
       <author><first>Qin</first><last>Gao</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <url hash="68a82dfc">2009.mtsummit-papers.5</url>
       <bibkey>guzman-etal-2009-reassessment</bibkey>
     </paper>
@@ -192,23 +192,23 @@
     <paper id="9">
       <title>Automatic Detection of Translated Text and its Impact on Machine Translation</title>
       <author><first>David</first><last>Kurokawa</last></author>
-      <author><first>Cyril</first><last>Goutte</last></author>
+      <author id="cyril-goutte"><first>Cyril</first><last>Goutte</last></author>
       <author><first>Pierre</first><last>Isabelle</last></author>
       <url hash="c7226b0a">2009.mtsummit-papers.9</url>
       <bibkey>kurokawa-etal-2009-automatic</bibkey>
     </paper>
     <paper id="10">
       <title>Improving a Lexicalized Hierarchical Reordering Model Using Maximum Entropy</title>
-      <author><first>Vinh Van</first><last>Nguyen</last></author>
+      <author id="vinh-van-nguyen"><first>Vinh Van</first><last>Nguyen</last></author>
       <author><first>Akira</first><last>Shimazu</last></author>
-      <author><first>Minh Le</first><last>Nguyen</last></author>
+      <author id="minh-le-nguyen"><first>Minh Le</first><last>Nguyen</last></author>
       <author><first>Thai Phuong</first><last>Nguyen</last></author>
       <url hash="1d1d7c99">2009.mtsummit-papers.10</url>
       <bibkey>nguyen-etal-2009-improving</bibkey>
     </paper>
     <paper id="11">
       <title>User choice as an evaluation metric for web translation in cross language instant messaging applications</title>
-      <author><first>William</first><last>Ogden</last></author>
+      <author id="william-c-ogden"><first>William</first><last>Ogden</last></author>
       <author><first>Ron</first><last>Zacharski</last></author>
       <author><first>Sieun</first><last>An</last></author>
       <author><first>Yuki</first><last>Ishikawa</last></author>
@@ -218,7 +218,7 @@
     <paper id="12">
       <title>Prediction of Words in Statistical Machine Translation using a Multilayer Perceptron</title>
       <author><first>Alexandre</first><last>Patry</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <url hash="3c11085a">2009.mtsummit-papers.12</url>
       <bibkey>patry-langlais-2009-prediction</bibkey>
     </paper>
@@ -228,7 +228,7 @@
       <author><first>Richard</first><last>Rose</last></author>
       <author><first>Hani</first><last>Safadi</last></author>
       <author><first>Samuel</first><last>Larkin</last></author>
-      <author><first>Gilles</first><last>Boulianne</last></author>
+      <author id="gilles-boulianne"><first>Gilles</first><last>Boulianne</last></author>
       <url hash="cba7f2db">2009.mtsummit-papers.13</url>
       <bibkey>reddy-etal-2009-incorporating</bibkey>
     </paper>
@@ -241,7 +241,7 @@
     </paper>
     <paper id="15">
       <title>Lemmatic Machine Translation</title>
-      <author><first>Stephen</first><last>Soderland</last></author>
+      <author id="stephen-soderland"><first>Stephen</first><last>Soderland</last></author>
       <author><first>Christopher</first><last>Lim</last></author>
       <author><first/><last>Mausam</last></author>
       <author><first>Bo</first><last>Qin</last></author>
@@ -273,7 +273,7 @@
       <author><first>Masao</first><last>Utiyama</last></author>
       <author><first>Daisuke</first><last>Kawahara</last></author>
       <author><first>Keiji</first><last>Yasuda</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <url hash="e06b0fe3">2009.mtsummit-papers.18</url>
       <bibkey>utiyama-etal-2009-mining</bibkey>
     </paper>
@@ -289,7 +289,7 @@
       <author><first>Hongmei</first><last>Zhao</last></author>
       <author><first>Jun</first><last>Xie</last></author>
       <author><first>Qun</first><last>Liu</last></author>
-      <author><first>Yajuan</first><last>Lü</last></author>
+      <author id="yajuan-lu"><first>Yajuan</first><last>Lü</last></author>
       <author><first>Dongdong</first><last>Zhang</last></author>
       <author><first>Mu</first><last>Li</last></author>
       <url hash="e5f2fe6b">2009.mtsummit-papers.20</url>
@@ -298,7 +298,7 @@
     <paper id="21">
       <title>Inducing translations from officially published materials in <fixed-case>C</fixed-case>anadian government websites</title>
       <author><first>Qibo</first><last>Zhu</last></author>
-      <author><first>Diana</first><last>Inkpen</last></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last></author>
       <author><first>Ash</first><last>Asudeh</last></author>
       <url hash="bc9b7537">2009.mtsummit-papers.21</url>
       <bibkey>zhu-etal-2009-inducing</bibkey>
@@ -322,8 +322,8 @@
     <paper id="2">
       <title>Extraction of Syntactic Translation Models from Parallel Data using Syntax from Source and Target Languages</title>
       <author><first>Vamshi</first><last>Ambati</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
       <url hash="74dfe0ac">2009.mtsummit-posters.2</url>
       <bibkey>ambati-etal-2009-extraction</bibkey>
     </paper>
@@ -337,8 +337,8 @@
     <paper id="4">
       <title>Reordering on <fixed-case>S</fixed-case>panish-<fixed-case>B</fixed-case>asque <fixed-case>SMT</fixed-case></title>
       <author><first>Arantza</first><last>Díaz de Ilaraza</last></author>
-      <author><first>Gorka</first><last>Labaka</last></author>
-      <author><first>Kepa</first><last>Sarasola</last></author>
+      <author id="gorka-labaka"><first>Gorka</first><last>Labaka</last></author>
+      <author id="kepa-sarasola"><first>Kepa</first><last>Sarasola</last></author>
       <url hash="3dcd147c">2009.mtsummit-posters.4</url>
       <bibkey>diaz-de-ilaraza-etal-2009-reordering</bibkey>
     </paper>
@@ -351,7 +351,7 @@
     </paper>
     <paper id="6">
       <title>Selective addition of corpus-extracted phrasal lexical rules to a rule-based machine translation system</title>
-      <author><first>Loic</first><last>Dugast</last></author>
+      <author id="loic-dugast"><first>Loic</first><last>Dugast</last></author>
       <author><first>Jean</first><last>Senellart</last></author>
       <author><first>Philipp</first><last>Koehn</last></author>
       <url hash="20316ea9">2009.mtsummit-posters.6</url>
@@ -376,7 +376,7 @@
       <title>Harnessing the Redundant Results of Translation Spotting</title>
       <author><first>Stéphane</first><last>Huet</last></author>
       <author><first>Julien</first><last>Bourdaillet</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <author><first>Guy</first><last>Lapalme</last></author>
       <url hash="5255d84c">2009.mtsummit-posters.9</url>
       <bibkey>huet-etal-2009-harnessing</bibkey>
@@ -385,7 +385,7 @@
       <title>Development of a <fixed-case>J</fixed-case>apanese-<fixed-case>E</fixed-case>nglish Software Manual Parallel Corpus</title>
       <author><first>Tatsuya</first><last>Ishisaka</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Kazuhide</first><last>Yamamoto</last></author>
       <url hash="3b44806c">2009.mtsummit-posters.10</url>
       <bibkey>ishisaka-etal-2009-development</bibkey>
@@ -393,8 +393,8 @@
     <paper id="11">
       <title>Word Alignment by Thresholded Two-Dimensional Normalization</title>
       <author><first>Hamidreza</first><last>Kobdani</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <url hash="236aa115">2009.mtsummit-posters.11</url>
       <bibkey>kobdani-etal-2009-word</bibkey>
     </paper>
@@ -433,9 +433,9 @@
     </paper>
     <paper id="16">
       <title>Using Artificial Data to Compare the Difficulty of Using Statistical Machine Translation in Different Language-Pairs</title>
-      <author><first>Manny</first><last>Rayner</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
       <author><first>Paula</first><last>Estrella</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Yukie</first><last>Nakao</last></author>
       <url hash="4b9db2bd">2009.mtsummit-posters.16</url>
       <bibkey>rayner-etal-2009-using-artificial</bibkey>
@@ -449,7 +449,7 @@
     </paper>
     <paper id="18">
       <title>Using Percolated Dependencies for Phrase Extraction in <fixed-case>SMT</fixed-case></title>
-      <author><first>Ankit</first><last>Srivastava</last></author>
+      <author id="ankit-srivastava"><first>Ankit</first><last>Srivastava</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <url hash="dd32c5c3">2009.mtsummit-posters.18</url>
       <bibkey>srivastava-way-2009-using</bibkey>
@@ -477,32 +477,32 @@
       <title>Hosting Volunteer Translators</title>
       <author><first>Masao</first><last>Utiyama</last></author>
       <author><first>Takeshi</first><last>Abekawa</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Kyo</first><last>Kageura</last></author>
       <url hash="902b9ced">2009.mtsummit-posters.22</url>
       <bibkey>utiyama-etal-2009-hosting</bibkey>
     </paper>
     <paper id="23">
       <title>Transfer rule generation for a <fixed-case>J</fixed-case>apanese-<fixed-case>H</fixed-case>ungarian machine translation system</title>
-      <author><first>István</first><last>Varga</last></author>
-      <author><first>Shoichi</first><last>Yokoyama</last></author>
+      <author id="istvan-varga"><first>István</first><last>Varga</last></author>
+      <author id="shoichi-yokoyama"><first>Shoichi</first><last>Yokoyama</last></author>
       <url hash="ee4b9087">2009.mtsummit-posters.23</url>
       <bibkey>varga-yokoyama-2009-transfer</bibkey>
     </paper>
     <paper id="24">
       <title>Efficient Beam Thresholding for Statistical Machine Translation</title>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Min</first><last>Zhang</last></author>
-      <author><first>Aiti</first><last>Aw</last></author>
+      <author id="aiti-aw"><first>Aiti</first><last>Aw</last></author>
       <author><first>Haizhou</first><last>Li</last></author>
       <url hash="f98f4efd">2009.mtsummit-posters.24</url>
       <bibkey>xiong-etal-2009-efficient</bibkey>
     </paper>
     <paper id="25">
       <title>A Source Dependency Model for Statistical Machine Translation</title>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Min</first><last>Zhang</last></author>
-      <author><first>Aiti</first><last>Aw</last></author>
+      <author id="aiti-aw"><first>Aiti</first><last>Aw</last></author>
       <author><first>Haizhou</first><last>Li</last></author>
       <url hash="9e441317">2009.mtsummit-posters.25</url>
       <bibkey>xiong-etal-2009-source</bibkey>
@@ -510,13 +510,13 @@
     <paper id="26">
       <title>Bilingual Dictionary Extraction from <fixed-case>W</fixed-case>ikipedia</title>
       <author><first>Kun</first><last>Yu</last></author>
-      <author><first>Junichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Junichi</first><last>Tsujii</last></author>
       <url hash="ae495533">2009.mtsummit-posters.26</url>
       <bibkey>yu-tsujii-2009-bilingual</bibkey>
     </paper>
     <paper id="27">
       <title>Virtual <fixed-case>B</fixed-case>abel: Towards Context-Aware Machine Translation in Virtual Worlds</title>
-      <author><first>Ying</first><last>Zhang</last></author>
+      <author id="ying-zhang"><first>Ying</first><last>Zhang</last></author>
       <author><first>Nguyen</first><last>Bach</last></author>
       <url hash="77f3a1b9">2009.mtsummit-posters.27</url>
       <bibkey>zhang-bach-2009-virtual</bibkey>
@@ -574,7 +574,7 @@
     <paper id="7">
       <title>Real Time Translation Services at <fixed-case>IBM</fixed-case></title>
       <author><first>David</first><last>Lubensky</last></author>
-      <author><first>Salim</first><last>Roukos</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
       <url hash="6020800f">2009.mtsummit-commercial.7</url>
       <bibkey>lubensky-roukos-2009-real</bibkey>
     </paper>
@@ -615,7 +615,7 @@
     <paper id="3">
       <title>On beyond <fixed-case>TM</fixed-case>: When the Translator Leads the Design of a Translation Support Framework</title>
       <author><first>Reginald</first><last>Hobbs</last></author>
-      <author><first>Clare</first><last>Voss</last></author>
+      <author id="clare-voss"><first>Clare</first><last>Voss</last></author>
       <author><first>Jamal</first><last>Laoudi</last></author>
       <url hash="438215d2">2009.mtsummit-government.3</url>
       <bibkey>hobbs-etal-2009-beyond</bibkey>
@@ -650,11 +650,11 @@
     </paper>
     <paper id="8">
       <title>Translation Memory Technology Assessment</title>
-      <author><first>Carol</first><last>Van Ess-Dykema</last></author>
+      <author id="carol-van-ess-dykema"><first>Carol</first><last>Van Ess-Dykema</last></author>
       <author><first>Dennis</first><last>Perzanowsky</last></author>
-      <author><first>Susan</first><last>Converse</last></author>
+      <author id="susan-p-converse"><first>Susan</first><last>Converse</last></author>
       <author><first>Rachel</first><last>Richardson</last></author>
-      <author><first>John S.</first><last>White</last></author>
+      <author id="john-s-white"><first>John S.</first><last>White</last></author>
       <author><first>Tucker</first><last>Maney</last></author>
       <url hash="7f05bd43">2009.mtsummit-government.8</url>
       <bibkey>van-ess-dykema-etal-2009-translation</bibkey>
@@ -676,7 +676,7 @@
     </paper>
     <paper id="2">
       <title>Machine Learning Approaches for Dealing with Bilingual Data in Statistical Machine Translation</title>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <bibkey>haffari-2009-machine</bibkey>
     </paper>
     <paper id="3">
@@ -734,8 +734,8 @@
     <paper id="3">
       <title>Disfluency and Out-of-vocabulary Word Processing in <fixed-case>A</fixed-case>rabic Speech Understanding</title>
       <author><first>Younès</first><last>Bahou</last></author>
-      <author><first>Lamia</first><last>Hadrich Belguith</last></author>
-      <author><first>Abdelmajid</first><last>Ben Hamadou</last></author>
+      <author id="lamia-hadrich-belguith"><first>Lamia</first><last>Hadrich Belguith</last></author>
+      <author id="abdelmajid-ben-hamadou"><first>Abdelmajid</first><last>Ben Hamadou</last></author>
       <url hash="54c23b46">2009.mtsummit-caasl.3</url>
       <bibkey>bahou-etal-2009-disfluency</bibkey>
     </paper>
@@ -743,7 +743,7 @@
       <title><fixed-case>NP</fixed-case> Subject Detection in Verb-initial <fixed-case>A</fixed-case>rabic Clauses</title>
       <author><first>Spence</first><last>Green</last></author>
       <author><first>Conal</first><last>Sathi</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <url hash="d7afbed4">2009.mtsummit-caasl.4</url>
       <bibkey>green-etal-2009-np</bibkey>
     </paper>
@@ -762,7 +762,7 @@
     </paper>
     <paper id="7">
       <title>A Unification based Approach to the Morphological Analysis and Generation of <fixed-case>A</fixed-case>rabic</title>
-      <author><first>Selçuk</first><last>Köprü</last></author>
+      <author id="selcuk-kopru"><first>Selçuk</first><last>Köprü</last></author>
       <author><first>Jude</first><last>Miller</last></author>
       <url hash="045f0dd3">2009.mtsummit-caasl.7</url>
       <bibkey>kopru-miller-2009-unification</bibkey>
@@ -825,7 +825,7 @@
     </paper>
     <paper id="2">
       <title>Meta-evaluation of Automatic Evaluation Methods for Machine using Patent Translation Data in <fixed-case>NTCIR</fixed-case>-7</title>
-      <author><first>Hiroshi</first><last>Echizen-ya</last></author>
+      <author id="hiroshi-echizen-ya"><first>Hiroshi</first><last>Echizen-ya</last></author>
       <author><first>Terumasa</first><last>Ehara</last></author>
       <author><first>Sayori</first><last>Shimohata</last></author>
       <author><first>Atsushi</first><last>Fujii</last></author>
@@ -839,10 +839,10 @@
     <paper id="3">
       <title>The Construction of a <fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish Patent Parallel Corpus</title>
       <author><first>Bin</first><last>Lu</last></author>
-      <author><first>Benjamin K.</first><last>Tsou</last></author>
+      <author id="benjamin-k-tsou"><first>Benjamin K.</first><last>Tsou</last></author>
       <author><first>Jingbo</first><last>Zhu</last></author>
       <author><first>Tao</first><last>Jiang</last></author>
-      <author><first>Oi Yee</first><last>Kwong</last></author>
+      <author id="olivia-o-y-kwong"><first>Oi Yee</first><last>Kwong</last></author>
       <url hash="edfb47ca">2009.mtsummit-wpt.3</url>
       <bibkey>lu-etal-2009-construction</bibkey>
     </paper>
@@ -854,7 +854,7 @@
     </paper>
     <paper id="5">
       <title>Translation Disambiguation of Patent Sentences using Case Frames</title>
-      <author><first>Shoichi</first><last>Yokoyama</last></author>
+      <author id="shoichi-yokoyama"><first>Shoichi</first><last>Yokoyama</last></author>
       <author><first>Masumi</first><last>Okuyama</last></author>
       <url hash="670b8bb2">2009.mtsummit-wpt.5</url>
       <bibkey>yokoyama-okuyama-2009-translation</bibkey>
@@ -937,14 +937,14 @@
     </paper>
     <paper id="2">
       <title>The Web as a Source of Informative Background Knowledge</title>
-      <author><first>Caroline</first><last>Barrière</last></author>
+      <author id="caroline-barriere"><first>Caroline</first><last>Barrière</last></author>
       <url hash="b9c8a55a">2009.mtsummit-btm.2</url>
       <bibkey>barriere-2009-web</bibkey>
     </paper>
     <paper id="3">
       <title>A Web Service Enabling Gradable Post-edition of Pre-translations Produced by Existing Translation Tools: Practical Use to Provide High-quality Translation of an Online Encyclopedia</title>
-      <author><first>Hervé</first><last>Blanchon</last></author>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="herve-blanchon"><first>Hervé</first><last>Blanchon</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <author><first>Cong-Phap</first><last>Huynh</last></author>
       <url hash="165de751">2009.mtsummit-btm.3</url>
       <bibkey>blanchon-etal-2009-web</bibkey>
@@ -966,7 +966,7 @@
     </paper>
     <paper id="6">
       <title>Bitextor: a Free/Open-source Software to Harvest Translation Memories from Multilingual Websites</title>
-      <author><first>Miquel</first><last>Esplà-Gomis</last></author>
+      <author id="miquel-espla-gomis"><first>Miquel</first><last>Esplà-Gomis</last></author>
       <url hash="88abeb98">2009.mtsummit-btm.6</url>
       <bibkey>espla-gomis-2009-bitextor</bibkey>
     </paper>
diff --git a/data/xml/2009.tal.xml b/data/xml/2009.tal.xml
index d25eb8661d..e987149796 100644
--- a/data/xml/2009.tal.xml
+++ b/data/xml/2009.tal.xml
@@ -81,7 +81,7 @@
       <author><first>Christine</first><last>Jacquin</last></author>
       <author><first>Simon</first><last>Petitrenaud</last></author>
       <author><first>Yannick</first><last>Estève</last></author>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <pages>201–225</pages>
       <url hash="8ddf1862">2009.tal-1.8</url>
       <language>fra</language>
@@ -98,9 +98,9 @@
     </paper>
     <paper id="10">
       <title>Évaluation des outils terminologiques : enjeux, difficultés et propositions [Evaluation of terminological tools : challenges, problems and propositions]</title>
-      <author><first>Adeline</first><last>Nazarenko</last></author>
+      <author id="adeline-nazarenko"><first>Adeline</first><last>Nazarenko</last></author>
       <author><first>Haïfa</first><last>Zargayouna</last></author>
-      <author><first>Olivier</first><last>Hamon</last></author>
+      <author id="olivier-hamon"><first>Olivier</first><last>Hamon</last></author>
       <author><first>Jonathan</first><last>van Puymbrouck</last></author>
       <pages>257–281</pages>
       <url hash="94431be3">2009.tal-1.10</url>
@@ -170,7 +170,7 @@
     <paper id="4">
       <title>Building a Corpus-based Historical <fixed-case>P</fixed-case>ortuguese Dictionary : Challenges and Opportunities</title>
       <author><first>Arnaldo Junior</first><last>Candido</last></author>
-      <author><first>Sandra Maria</first><last>Aluísio</last></author>
+      <author id="sandra-aluisio"><first>Sandra Maria</first><last>Aluísio</last></author>
       <pages>73–102</pages>
       <url hash="c4518c0d">2009.tal-2.4</url>
       <bibkey>candido-aluisio-2009-building</bibkey>
@@ -272,7 +272,7 @@
     </paper>
     <paper id="5">
       <title>Cross-framework parser stacking for data-driven dependency parsing</title>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <author><first>Jonas</first><last>Kuhn</last></author>
       <author><first>Kathrin</first><last>Spreyer</last></author>
       <pages>109–138</pages>
diff --git a/data/xml/2009.tc.xml b/data/xml/2009.tc.xml
index cb2226c23f..7b9249b025 100644
--- a/data/xml/2009.tc.xml
+++ b/data/xml/2009.tc.xml
@@ -20,14 +20,14 @@
     </paper>
     <paper id="2">
       <title>Towards an effective toolkit for translators</title>
-      <author><first>Andreas</first><last>Eisele</last></author>
+      <author id="andreas-eisele"><first>Andreas</first><last>Eisele</last></author>
       <url hash="75e5b783">2009.tc-1.2</url>
       <bibkey>eisele-2009-towards</bibkey>
     </paper>
     <paper id="3">
       <title>Computer-aided translation backed by machine translation</title>
       <author><first>Ondřej</first><last>Odcházal</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <url hash="46563da8">2009.tc-1.3</url>
       <bibkey>odchazal-bojar-2009-computer</bibkey>
     </paper>
@@ -35,7 +35,7 @@
       <title>Minna no Hon’yaku: a website for hosting, archiving, and promoting translations</title>
       <author><first>Masao</first><last>Utiyama</last></author>
       <author><first>Takeshi</first><last>Abekawa</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Kyo</first><last>Kageura</last></author>
       <url hash="2d603ca1">2009.tc-1.4</url>
       <bibkey>utiyama-etal-2009-minna</bibkey>
diff --git a/data/xml/2010.amta.xml b/data/xml/2010.amta.xml
index bae0184cf8..991bbab168 100644
--- a/data/xml/2010.amta.xml
+++ b/data/xml/2010.amta.xml
@@ -54,7 +54,7 @@
     <paper id="3">
       <title>Combining Confidence Estimation and Reference-based Metrics for Segment-level <fixed-case>MT</fixed-case> Evaluation</title>
       <author><first>Lucia</first><last>Specia</last></author>
-      <author><first>Jesús</first><last>Giménez</last></author>
+      <author id="jesus-gimenez"><first>Jesús</first><last>Giménez</last></author>
       <url hash="a3905a70">2010.amta-papers.3</url>
       <abstract>We describe an effort to improve standard reference-based metrics for Machine Translation (MT) evaluation by enriching them with Confidence Estimation (CE) features and using a learning mechanism trained on human annotations. Reference-based MT evaluation metrics compare the system output against reference translations looking for overlaps at different levels (lexical, syntactic, and semantic). These metrics aim at comparing MT systems or analyzing the progress of a given system and are known to have reasonably good correlation with human judgments at the corpus level, but not at the segment level. CE metrics, on the other hand, target the system in use, providing a quality score to the end-user for each translated segment. They cannot rely on reference translations, and use instead information extracted from the input text, system output and possibly external corpora to train machine learning algorithms. These metrics correlate better with human judgments at the segment level. However, they are usually highly biased by difficulty level of the input segment, and therefore are less appropriate for comparing multiple systems translating the same input segments. We show that these two classes of metrics are complementary and can be combined to provide MT evaluation metrics that achieve higher correlation with human judgments at the segment level.</abstract>
       <bibkey>specia-gimenez-2010-combining</bibkey>
@@ -62,7 +62,7 @@
     <paper id="4">
       <title>The Impact of <fixed-case>A</fixed-case>rabic Morphological Segmentation on Broad-coverage <fixed-case>E</fixed-case>nglish-to-<fixed-case>A</fixed-case>rabic Statistical Machine Translation</title>
       <author><first>Hassan</first><last>Al-Haj</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <url hash="73c15a7c">2010.amta-papers.4</url>
       <abstract>Morphologically rich languages pose a challenge for statistical machine translation (SMT). This challenge is magnified when translating into a morphologically rich language. In this work we address this challenge in the framework of a broad-coverage English-to-Arabic phrase based statistical machine translation (PBSMT). We explore the full spectrum of Arabic segmentation schemes ranging from full word form to fully segmented forms and examine the effects on system performance. Our results show a difference of 2.61 BLEU points between the best and worst segmentation schemes indicating that the choice of the segmentation scheme has a significant effect on the performance of a PBSMT system in a large data scenario. We also show that a simple segmentation scheme can perform as good as the best and more complicated segmentation scheme. We also report results on a wide set of techniques for recombining the segmented Arabic output.</abstract>
       <bibkey>al-haj-lavie-2010-impact</bibkey>
@@ -79,7 +79,7 @@
       <author><first>Arafat</first><last>Ahsan</last></author>
       <author><first>Prasanth</first><last>Kolachina</last></author>
       <author><first>Sudheer</first><last>Kolachina</last></author>
-      <author><first>Dipti</first><last>Misra</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti</first><last>Misra</last></author>
       <author><first>Rajeev</first><last>Sangal</last></author>
       <url hash="79b97c53">2010.amta-papers.6</url>
       <abstract>In this paper, we present the insights gained from a detailed study of coupling a highly modular English-Hindi RBMT system with a standard phrase-based SMT system. Coupling the RBMT and SMT systems at various stages in the RBMT pipeline, we observe the effects of the source transformations at each stage on the performance of the coupled MT system. We propose an architecture that systematically exploits the structural transfer and robust generation capabilities of the RBMT system. Working with the English-Hindi language pair, we show that the coupling configurations explored in our experiments help address different aspects of the typological divergence between these languages. In spite of working with very small datasets, we report significant improvements both in terms of BLEU (7.14 and 0.87 over the RBMT and the SMT baselines respectively) and subjective evaluation (relative decrease of 17% in SSER).</abstract>
@@ -87,14 +87,14 @@
     </paper>
     <paper id="7">
       <title>Semantically-Informed Syntactic Machine Translation: A Tree-Grafting Approach</title>
-      <author><first>Kathryn</first><last>Baker</last></author>
+      <author id="kathryn-baker"><first>Kathryn</first><last>Baker</last></author>
       <author><first>Michael</first><last>Bloodgood</last></author>
       <author><first>Chris</first><last>Callison-Burch</last></author>
-      <author><first>Bonnie</first><last>Dorr</last></author>
+      <author id="bonnie-dorr"><first>Bonnie</first><last>Dorr</last></author>
       <author><first>Nathaniel</first><last>Filardo</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
       <author><first>Scott</first><last>Miller</last></author>
-      <author><first>Christine</first><last>Piatko</last></author>
+      <author id="christine-piatko"><first>Christine</first><last>Piatko</last></author>
       <url hash="f4ccb184">2010.amta-papers.7</url>
       <abstract>We describe a unified and coherent syntactic framework for supporting a semantically-informed syntactic approach to statistical machine translation. Semantically enriched syntactic tags assigned to the target-language training texts improved translation quality. The resulting system significantly outperformed a linguistically naive baseline model (Hiero), and reached the highest scores yet reported on the NIST 2009 Urdu-English translation task. This finding supports the hypothesis (posed by many researchers in the MT community, e.g., in DARPA GALE) that both syntactic and semantic information are critical for improving translation quality—and further demonstrates that large gains can be achieved for low-resource languages with different word order than English.</abstract>
       <bibkey>baker-etal-2010-semantically</bibkey>
@@ -104,7 +104,7 @@
       <author><first>Daniel</first><last>Stein</last></author>
       <author><first>Stephan</first><last>Peitz</last></author>
       <author><first>David</first><last>Vilar</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <url hash="54860d97">2010.amta-papers.8</url>
       <abstract>In this work we review and compare three additional syntactic enhancements for the hierarchical phrase-based translation model, which have been presented in the last few years. We compare their performance when applied separately and study whether the combination may yield additional improvements. Our findings show that the models are complementary, and their combination achieve an increase of 1% in BLEU and a reduction of nearly 2% in TER. The models presented in this work are made available as part of the Jane open source machine translation toolkit.</abstract>
       <bibkey>stein-etal-2010-cocktail</bibkey>
@@ -120,7 +120,7 @@
     <paper id="10">
       <title>f-align: An Open-Source Alignment Tool for <fixed-case>LFG</fixed-case> f-Structures</title>
       <author><first>Anton</first><last>Bryl</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <url hash="aaffa7a1">2010.amta-papers.10</url>
       <abstract>Lexical-Functional Grammar (LFG) f-structures (Kaplan and Bresnan, 1982) have attracted some attention in recent years as an intermediate data representation for statistical machine translation. So far, however, there are no alignment tools capable of aligning f-structures directly, and plain word alignment is used for this purpose. In this way no use is made of the structural information contained in f-structures. We present the first version of a specialized f-structure alignment open-source software.</abstract>
       <bibkey>bryl-van-genabith-2010-f</bibkey>
@@ -145,10 +145,10 @@
     </paper>
     <paper id="13">
       <title>Using Sublexical Translations to Handle the <fixed-case>OOV</fixed-case> Problem in <fixed-case>MT</fixed-case></title>
-      <author><first>Chung-chi</first><last>Huang</last></author>
+      <author id="chung-chi-huang"><first>Chung-chi</first><last>Huang</last></author>
       <author><first>Ho-ching</first><last>Yen</last></author>
-      <author><first>Shih-ting</first><last>Huang</last></author>
-      <author><first>Jason</first><last>Chang</last></author>
+      <author id="shih-ting-huang"><first>Shih-ting</first><last>Huang</last></author>
+      <author id="jason-s-chang"><first>Jason</first><last>Chang</last></author>
       <url hash="63e51c17">2010.amta-papers.13</url>
       <abstract>We introduce a method for learning to translate out-of-vocabulary (OOV) words. The method focuses on combining sublexical/constituent translations of an OOV to generate its translation candidates. In our approach, wild-card searches are formulated based on our OOV analysis, aimed at maximizing the probability of retrieving OOVs’ sublexical translations from existing resource of machine translation (MT) systems. At run-time, translation candidates of the unknown words are generated from their suitable sublexical translations and ranked based on monolingual and bilingual information. We have incorporated the OOV model into a state-of-the-art MT system and experimental results show that our model indeed helps to ease the negative impact of OOVs on translation quality, especially for sentences containing more OOVs (significant improvement).</abstract>
       <bibkey>huang-etal-2010-using</bibkey>
@@ -164,8 +164,8 @@
     <paper id="15">
       <title>Detecting Cross-lingual Semantic Similarity Using Parallel <fixed-case>P</fixed-case>rop<fixed-case>B</fixed-case>anks</title>
       <author><first>Shumin</first><last>Wu</last></author>
-      <author><first>Jinho</first><last>Choi</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="jinho-d-choi"><first>Jinho</first><last>Choi</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <url hash="0ab82127">2010.amta-papers.15</url>
       <abstract>This paper suggests a method for detecting cross-lingual semantic similarity using parallel PropBanks. We begin by improving word alignments for verb predicates generated by GIZA++ by using information available in parallel PropBanks. We applied the Kuhn-Munkres method to measure predicate-argument matching and improved verb predicate alignments by an F-score of 12.6%. Using the enhanced word alignments we checked the set of target verbs aligned to a specific source verb for semantic consistency. For a set of English verbs aligned to a Chinese verb, we checked if the English verbs belong to the same semantic class using an existing lexical database, WordNet. For a set of Chinese verbs aligned to an English verb we manually checked semantic similarity between the Chinese verbs within a set. Our results show that the verb sets we generated have a high correlation with semantic classes. This could potentially lead to an automatic technique for generating semantic classes for verbs.</abstract>
       <bibkey>wu-etal-2010-detecting</bibkey>
@@ -175,9 +175,9 @@
       <author><first>Pratyush</first><last>Banerjee</last></author>
       <author><first>Jinhua</first><last>Du</last></author>
       <author><first>Baoli</first><last>Li</last></author>
-      <author><first>Sudip</first><last>Naskar</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip</first><last>Naskar</last></author>
       <author><first>Andy</first><last>Way</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <url hash="b1856629">2010.amta-papers.16</url>
       <abstract>This paper presents a set of experiments on Domain Adaptation of Statistical Machine Translation systems. The experiments focus on Chinese-English and two domain-specific corpora. The paper presents a novel approach for combining multiple domain-trained translation models to achieve improved translation quality for both domain-specific as well as combined sets of sentences. We train a statistical classifier to classify sentences according to the appropriate domain and utilize the corresponding domain-specific MT models to translate them. Experimental results show that the method achieves a statistically significant absolute improvement of 1.58 BLEU (2.86% relative improvement) score over a translation model trained on combined data, and considerable improvements over a model using multiple decoding paths of the Moses decoder, for the combined domain test set. Furthermore, even for domain-specific test sets, our approach works almost as well as dedicated domain-specific models and perfect classification.</abstract>
       <bibkey>banerjee-etal-2010-combining</bibkey>
@@ -186,7 +186,7 @@
       <title>Using Variable Decoding Weight for Language Model in Statistical Machine Translation</title>
       <author><first>Behrang</first><last>Mohit</last></author>
       <author><first>Rebecca</first><last>Hwa</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <url hash="d3400968">2010.amta-papers.17</url>
       <abstract>This paper investigates varying the decoder weight of the language model (LM) when translating different parts of a sentence. We determine the condition under which the LM weight should be adapted. We find that a better translation can be achieved by varying the LM weight when decoding the most problematic spot in a sentence, which we refer to as a difficult segment. Two adaptation strategies are proposed and compared through experiments. We find that adapting a different LM weight for every difficult segment resulted in the largest improvement in translation quality.</abstract>
       <bibkey>mohit-etal-2010-using</bibkey>
@@ -204,7 +204,7 @@
     <paper id="19">
       <title>Maximizing <fixed-case>TM</fixed-case> Performance through Sub-Tree Alignment and <fixed-case>SMT</fixed-case></title>
       <author><first>Ventsislav</first><last>Zhechev</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <url hash="860cec88">2010.amta-papers.19</url>
       <abstract>With the steadily increasing demand for high-quality translation, the localisation industry is constantly searching for technologies that would increase translator throughput, in particular focusing on the use of high-quality Statistical Machine Translation (SMT) supplementing the established Translation Memory (TM) technology. In this paper, we present a novel modular approach that utilises state-of-the-art sub-tree alignment and SMT techniques to turn the fuzzy matches from a TM into near-perfect translations. Rather than relegate SMT to a last-resort status where it is only used should the TM system fail to produce the desired output, for us SMT is an integral part of the translation process that we rely on to obtain high-quality results. We show that the presented system consistently produces better-quality output than the TM and performs on par or better than the standalone SMT system.</abstract>
       <bibkey>zhechev-van-genabith-2010-maximizing</bibkey>
@@ -212,7 +212,7 @@
     <paper id="20">
       <title>Choosing the Right Evaluation for Machine Translation: an Examination of Annotator and Automatic Metric Performance on Human Judgment Tasks</title>
       <author><first>Michael</first><last>Denkowski</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <url hash="a95d8e35">2010.amta-papers.20</url>
       <abstract>This paper examines the motivation, design, and practical results of several types of human evaluation tasks for machine translation. In addition to considering annotator performance and task informativeness over multiple evaluations, we explore the practicality of tuning automatic evaluation metrics to each judgment type in a comprehensive experiment using the METEOR-NEXT metric. We present results showing clear advantages of tuning to certain types of judgments and discuss causes of inconsistency when tuning to various judgment data, as well as sources of difficulty in the human evaluation tasks themselves.</abstract>
       <bibkey>denkowski-lavie-2010-choosing</bibkey>
@@ -229,7 +229,7 @@
       <title>A Source-side Decoding Sequence Model for Statistical Machine Translation</title>
       <author><first>Minwei</first><last>Feng</last></author>
       <author><first>Arne</first><last>Mauser</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <url hash="eb76877f">2010.amta-papers.22</url>
       <abstract>We propose a source-side decoding sequence language model for phrase-based statistical machine translation. This model is a reordering model in the sense that it helps the decoder find the correct decoding sequence. The model uses word-aligned bilingual training data. We show improved translation quality of up to 1.34% BLEU and 0.54% TER using this model compared to three other widely used reordering models.</abstract>
       <bibkey>feng-etal-2010-source</bibkey>
@@ -237,8 +237,8 @@
     <paper id="23">
       <title>Supertags as Source Language Context in Hierarchical Phrase-Based <fixed-case>SMT</fixed-case></title>
       <author><first>Rejwanul</first><last>Haque</last></author>
-      <author><first>Sudip</first><last>Naskar</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip</first><last>Naskar</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <url hash="e422241f">2010.amta-papers.23</url>
       <abstract>Statistical machine translation (SMT) models have recently begun to include source context modeling, under the assumption that the proper lexical choice of the translation for an ambiguous word can be determined from the context in which it appears. Various types of lexical and syntactic features have been explored as effective source context to improve phrase selection in SMT. In the present work, we introduce lexico-syntactic descriptions in the form of supertags as source-side context features in the state-of-the-art hierarchical phrase-based SMT (HPB) model. These features enable us to exploit source similarity in addition to target similarity, as modelled by the language model. In our experiments two kinds of supertags are employed: those from lexicalized tree-adjoining grammar (LTAG) and combinatory categorial grammar (CCG). We use a memory-based classification framework that enables the efficient estimation of these features. Despite the differences between the two supertagging approaches, they give similar improvements. We evaluate the performance of our approach on an English-to-Dutch translation task, and report statistically significant improvements of 4.48% and 6.3% BLEU scores in translation quality when adding CCG and LTAG supertags, respectively, as context-informed features.</abstract>
@@ -277,7 +277,7 @@
       <author><first>Yanjun</first><last>Ma</last></author>
       <author><first>Johann</first><last>Roturier</last></author>
       <author><first>Andy</first><last>Way</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <url hash="b081c3ca">2010.amta-papers.27</url>
       <abstract>We report findings from a user study with professional post-editors using a translation recommendation framework (He et al., 2010) to integrate Statistical Machine Translation (SMT) output with Translation Memory (TM) systems. The framework recommends SMT outputs to a TM user when it predicts that SMT outputs are more suitable for post-editing than the hits provided by the TM. We analyze the effectiveness of the model as well as the reaction of potential users. Based on the performance statistics and the users’ comments, we find that translation recommendation can reduce the workload of professional post-editors and improve the acceptance of MT in the localization industry.</abstract>
       <bibkey>he-etal-2010-improving</bibkey>
@@ -295,7 +295,7 @@
     <paper id="29">
       <title>Improving Reordering in Statistical Machine Translation from <fixed-case>F</fixed-case>arsi</title>
       <author><first>Evgeny</first><last>Matusov</last></author>
-      <author><first>Selçuk</first><last>Köprü</last></author>
+      <author id="selcuk-kopru"><first>Selçuk</first><last>Köprü</last></author>
       <url hash="a1cd6ae4">2010.amta-papers.29</url>
       <abstract>In this paper, we propose a novel model for scoring reordering in phrase-based statistical machine translation (SMT) and successfully use it for translation from Farsi into English and Arabic. The model replaces the distance-based distortion model that is widely used in most SMT systems. The main idea of the model is to penalize each new deviation from the monotonic translation path. We also propose a way for combining this model with manually created reordering rules for Farsi which try to alleviate the difference in sentence structure between Farsi and English/Arabic by changing the position of the verb. The rules are used in the SMT search as soft constraints. In the experiments on two general-domain translation tasks, the proposed penalty-based model improves the BLEU score by up to 1.5% absolute as compared to the baseline of monotonic translation, and up to 1.2% as compared to using the distance-based distortion model.</abstract>
       <bibkey>matusov-kopru-2010-improving</bibkey>
@@ -324,7 +324,7 @@
       <author><first>Matthias</first><last>Huck</last></author>
       <author><first>Martin</first><last>Ratajczak</last></author>
       <author><first>Patrick</first><last>Lehnen</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <url hash="7bbc03db">2010.amta-papers.32</url>
       <abstract>In this work we give a detailed comparison of the impact of the integration of discriminative and trigger-based lexicon models in state-of-the-art hierarchical and conventional phrase-based statistical machine translation systems. As both types of extended lexicon models can grow very large, we apply certain restrictions to discard some of the less useful information. We show how these restrictions facilitate the training of the extended lexicon models. We finally evaluate systems that incorporate both types of models with different restrictions on a large-scale translation task for the Arabic-English language pair. Our results suggest that extended lexicon models can be substantially reduced in size while still giving clear improvements in translation performance.</abstract>
       <bibkey>huck-etal-2010-comparison</bibkey>
@@ -342,7 +342,7 @@
     <paper id="34">
       <title>Voting on N-grams for Machine Translation System Combination</title>
       <author><first>Kenneth</first><last>Heafield</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <url hash="900f719a">2010.amta-papers.34</url>
       <abstract>System combination exploits differences between machine translation systems to form a combined translation from several system outputs. Core to this process are features that reward n-gram matches between a candidate combination and each system output. Systems differ in performance at the n-gram level despite similar overall scores. We therefore advocate a new feature formulation: for each system and each small n, a feature counts n-gram matches between the system and candidate. We show post-evaluation improvement of 6.67 BLEU over the best system on NIST MT09 Arabic-English test data. Compared to a baseline system combination scheme from WMT 2009, we show improvement in the range of 1 BLEU point.</abstract>
       <bibkey>heafield-lavie-2010-voting</bibkey>
@@ -366,7 +366,7 @@
     </meta>
     <paper id="1">
       <title>Statistical Machine Translation of <fixed-case>E</fixed-case>nglish-<fixed-case>M</fixed-case>anipuri using Morpho-syntactic and Semantic Information</title>
-      <author><first>Thoudam Doren</first><last>Singh</last></author>
+      <author id="thoudam-doren-singh"><first>Thoudam Doren</first><last>Singh</last></author>
       <author><first>Savaji</first><last>Bandyopadhyay</last></author>
       <url hash="6093d80b">2010.amta-srw.1</url>
       <abstract>English-Manipuri language pair is one of the rarely investigated with restricted bilingual resources. The development of a factored Statistical Machine Translation (SMT) system between English as source and Manipuri, a morphologically rich language as target is reported. The role of the suffixes and dependency relations on the source side and case markers on the target side are identified as important translation factors. The morphology and dependency relations play important roles to improve the translation quality. A parallel corpus of 10350 sentences from news domain is used for training and the system is tested with 500 sentences. Using the proposed translation factors, the output of the translation quality is improved as indicated by the BLEU score and subjective evaluation.</abstract>
@@ -394,7 +394,7 @@
       <title>Machine Translation between <fixed-case>H</fixed-case>ebrew and <fixed-case>A</fixed-case>rabic: Needs, Challenges and Preliminary Solutions</title>
       <author><first>Reshef</first><last>Shilon</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <author><first>Shuly</first><last>Wintner</last></author>
       <url hash="a7e1e900">2010.amta-srw.4</url>
       <abstract>Hebrew and Arabic are related but mutually incomprehensible languages with complex morphology and scarce parallel corpora. Machine translation between the two languages is therefore interesting and challenging. We discuss similarities and differences between Hebrew and Arabic, the benefits and challenges that they induce, respectively, and their implications for machine translation. We highlight the shortcomings of using English as a pivot language and advocate a direct, transfer-based and linguistically-informed (but still statistical, and hence scalable) approach. We report preliminary results of such a system that we are currently developing.</abstract>
@@ -556,9 +556,9 @@
     </meta>
     <paper id="1">
       <title>Paralinguist Assessment Decision Factors For Machine Translation Output: A Case Study</title>
-      <author><first>Carol</first><last>Van Ess-Dykema</last></author>
+      <author id="carol-van-ess-dykema"><first>Carol</first><last>Van Ess-Dykema</last></author>
       <author><first>Jocelyn</first><last>Phillips</last></author>
-      <author><first>Florence</first><last>Reeder</last></author>
+      <author id="florence-reeder"><first>Florence</first><last>Reeder</last></author>
       <author><first>Laurie</first><last>Gerber</last></author>
       <url hash="95101ed1">2010.amta-government.1</url>
       <abstract>We describe a case study that presents a framework for examining whether Machine Translation (MT) output enables translation professionals to translate faster while at the same time producing better quality translations than without MT output. We seek to find decision factors that enable a translation professional, known as a Paralinguist, to determine whether MT output is of sufficient quality to serve as a “seed translation” for post-editors. The decision factors, unlike MT developers’ automatic metrics, must function without a reference translation. We also examine the correlation of MT developers’ automatic metrics with error annotators’ assessments of post-edited translations.</abstract>
@@ -612,7 +612,7 @@
     </paper>
     <paper id="9">
       <title>Task-based evaluation methods for machine translation, in practice and theory</title>
-      <author><first>Judith L.</first><last>Klavans</last></author>
+      <author id="judith-l-klavans"><first>Judith L.</first><last>Klavans</last></author>
       <abstract>A panel of industry and government experts will discuss ways in which they have applied task-based evaluation for Machine Translation and other language technologies in their organizations and share ideas for new methods that could be tried in the future. As part of the discussion, the panelists will address some of the following points: What task-based evaluation means within their organization, i.e., how task-based evaluation is defined; How task-based evaluation impacts the use of MT technologies in their work environment; Whether task-based evaluation correlates with MT developers' automated metrics and if not, how do we arrive at automated metrics that do correlate with the more expensive task-based evaluation; What "lessons-learned" resulted from the course of performing task-based evaluation; How task-based evaluations can be generalized to multiple workflow environments.</abstract>
       <bibkey>klavans-2010-task</bibkey>
     </paper>
@@ -666,7 +666,7 @@
     </paper>
     <paper id="18">
       <title>Parallel Corpus Development at <fixed-case>NVTC</fixed-case></title>
-      <author><first>Carol</first><last>Van Ess-Dykema</last></author>
+      <author id="carol-van-ess-dykema"><first>Carol</first><last>Van Ess-Dykema</last></author>
       <author><first>Laurie</first><last>Gerber</last></author>
       <url hash="7a24dd97">2010.amta-government.19</url>
       <abstract>In this paper, we describe the methods used to develop an exchangeable translation memory bank of sentence-aligned Mandarin Chinese - English sentences. This effort is part of a larger effort, initiated by the National Virtual Translation Center (NVTC), to foster collaboration and sharing of translation memory banks across the Intelligence Community and the Department of Defense. In this paper, we describe our corpus creation process - a largely automated process - highlighting the human interventions that are still deemed necessary. We conclude with a brief discussion of how this work will affect plans for NVTC's new translation management workflow and future research to increase the performance of the automated components of the corpus creation process.</abstract>
@@ -703,7 +703,7 @@
     </paper>
     <paper id="4">
       <title>Evaluating the Output of Machine Translation Systems</title>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <url hash="ddfdfacb">2010.amta-tutorials.4</url>
       <bibkey>lavie-2010-evaluating</bibkey>
     </paper>
@@ -772,10 +772,10 @@
     </paper>
     <paper id="4">
       <title><fixed-case>W</fixed-case>iki<fixed-case>BABEL</fixed-case>: A System for Multilingual <fixed-case>W</fixed-case>ikipedia Content</title>
-      <author><first>A.</first><last>Kumaran</last></author>
+      <author id="a-kumaran"><first>A.</first><last>Kumaran</last></author>
       <author><first>Naren</first><last>Datha</last></author>
       <author><first>B.</first><last>Ashok</last></author>
-      <author><first>K.</first><last>Saravanan</last></author>
+      <author id="k-saravanan"><first>K.</first><last>Saravanan</last></author>
       <author><first>Anil</first><last>Ande</last></author>
       <author><first>Ashwani</first><last>Sharma</last></author>
       <author><first>Sridhar</first><last>Vedantham</last></author>
diff --git a/data/xml/2010.eamt.xml b/data/xml/2010.eamt.xml
index febbad754e..8aceef81fb 100644
--- a/data/xml/2010.eamt.xml
+++ b/data/xml/2010.eamt.xml
@@ -44,7 +44,7 @@
     </paper>
     <paper id="5">
       <title>Can inversion transduction grammars generate hand alignments</title>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <url hash="543ecbc8">2010.eamt-1.5</url>
       <attachment type="presentation" hash="c8575de1">2010.eamt-1.5.Presentation.pdf</attachment>
       <bibkey>sogaard-2010-inversion</bibkey>
@@ -52,8 +52,8 @@
     <paper id="6">
       <title>A fully unsupervised approach for mining parallel data from comparable corpora</title>
       <author><first>Thi Ngoc Diep</first><last>Do</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
-      <author><first>Eric</first><last>Castelli</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
+      <author id="eric-castelli"><first>Eric</first><last>Castelli</last></author>
       <url hash="f374ded2">2010.eamt-1.6</url>
       <attachment type="presentation" hash="98d15226">2010.eamt-1.6.Presentation.pdf</attachment>
       <bibkey>do-etal-2010-fully</bibkey>
@@ -85,7 +85,7 @@
     </paper>
     <paper id="10">
       <title>Query translation using <fixed-case>W</fixed-case>ikipedia-based resources for analysis and disambiguation</title>
-      <author><first>Benoit</first><last>Gaillard</last></author>
+      <author id="benoit-gaillard"><first>Benoit</first><last>Gaillard</last></author>
       <author><first>Malek</first><last>Boualem</last></author>
       <author><first>Olivier</first><last>Collin</last></author>
       <url hash="b12329f1">2010.eamt-1.10</url>
@@ -100,15 +100,15 @@
     <paper id="12">
       <title>Linguistic-based Evaluation Criteria to identify Statistical Machine Translation Errors</title>
       <author><first>Mireia</first><last>Farrús</last></author>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
-      <author><first>José B.</first><last>Mariño</last></author>
-      <author><first>José A. R.</first><last>Fonollosa</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="jose-b-marino"><first>José B.</first><last>Mariño</last></author>
+      <author id="jose-a-r-fonollosa"><first>José A. R.</first><last>Fonollosa</last></author>
       <url hash="8afb8fea">2010.eamt-1.12</url>
       <bibkey>farrus-etal-2010-linguistic</bibkey>
     </paper>
     <paper id="13">
       <title>Rule-based <fixed-case>B</fixed-case>reton to <fixed-case>F</fixed-case>rench machine translation</title>
-      <author><first>Francis</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last></author>
       <url hash="228ef8df">2010.eamt-1.13</url>
       <bibkey>tyers-2010-rule</bibkey>
     </paper>
@@ -122,31 +122,31 @@
     <paper id="15">
       <title>Robust Estimation of Feature Weights in Statistical Machine Translation</title>
       <author><first>Cristina</first><last>España-Bonet</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <url hash="ed450244">2010.eamt-1.15</url>
       <bibkey>espana-bonet-marquez-2010-robust</bibkey>
     </paper>
     <paper id="16">
       <title>Potential scope of a fully-integrated architecture for speech translation</title>
       <author><first>Alicia</first><last>Pérez</last></author>
-      <author><first>María Inés</first><last>Torres</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="m-ines-torres"><first>María Inés</first><last>Torres</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <url hash="226e5c6a">2010.eamt-1.16</url>
       <bibkey>perez-etal-2010-potential</bibkey>
     </paper>
     <paper id="17">
       <title>Integration of statistical collocation segmentations in a phrase-based statistical machine translation system</title>
       <author><first>Marta R.</first><last>Costa-jussa</last></author>
-      <author><first>Vidas</first><last>Daudaravicius</last></author>
-      <author><first>Rafael E.</first><last>Banchs</last></author>
+      <author id="vidas-daudaravicius"><first>Vidas</first><last>Daudaravicius</last></author>
+      <author id="rafael-e-banchs"><first>Rafael E.</first><last>Banchs</last></author>
       <url hash="14e27cae">2010.eamt-1.17</url>
       <bibkey>costa-jussa-etal-2010-integration</bibkey>
     </paper>
     <paper id="18">
       <title>On the Use of Confidence Measures within an Interactive-predictive Machine Translation System</title>
-      <author><first>Jesús</first><last>González-Rubio</last></author>
-      <author><first>Daniel</first><last>Ortíz-Martínez</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="jesus-gonzalez-rubio"><first>Jesús</first><last>González-Rubio</last></author>
+      <author id="daniel-ortiz-martinez"><first>Daniel</first><last>Ortíz-Martínez</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <url hash="80416772">2010.eamt-1.18</url>
       <bibkey>gonzalez-rubio-etal-2010-use</bibkey>
     </paper>
@@ -163,15 +163,15 @@
       <author><first>Kei</first><last>Hashimoto</last></author>
       <author><first>Yoshihiko</first><last>Nankaku</last></author>
       <author><first>Keiichi</first><last>Tokuda</last></author>
-      <author><first>Germán</first><last>Sanchis-Trilles</last></author>
+      <author id="german-sanchis-trilles"><first>Germán</first><last>Sanchis-Trilles</last></author>
       <url hash="8b3237fb">2010.eamt-1.20</url>
       <bibkey>gomez-etal-2010-deterministic</bibkey>
     </paper>
     <paper id="21">
       <title><fixed-case>E</fixed-case>nglish to <fixed-case>B</fixed-case>angla Phrase-Based Machine Translation</title>
-      <author><first>Zahurul</first><last>Islam</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
-      <author><first>Andreas</first><last>Eisele</last></author>
+      <author id="zahurul-islam"><first>Zahurul</first><last>Islam</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="andreas-eisele"><first>Andreas</first><last>Eisele</last></author>
       <url hash="384bd22d">2010.eamt-1.21</url>
       <bibkey>islam-etal-2010-english</bibkey>
     </paper>
@@ -201,7 +201,7 @@
     <paper id="25">
       <title>Bridging the Gap – <fixed-case>E</fixed-case>uro<fixed-case>T</fixed-case>erm<fixed-case>B</fixed-case>ank Terminology Delivered to Users’ Environment</title>
       <author><first>Tatiana</first><last>Gornostay</last></author>
-      <author><first>Andrejs</first><last>Vasiljevs</last></author>
+      <author id="andrejs-vasiljevs"><first>Andrejs</first><last>Vasiljevs</last></author>
       <author><first>Signe</first><last>Rirdance</last></author>
       <author><first>Roberts</first><last>Rozis</last></author>
       <url hash="34ef8d79">2010.eamt-1.25</url>
@@ -210,7 +210,7 @@
     <paper id="26">
       <title>Lattice Score Based Data Cleaning for Phrase-Based Statistical Machine Translation</title>
       <author><first>Jie</first><last>Jiang</last></author>
-      <author><first>Julie</first><last>Carson-Berndsen</last></author>
+      <author id="julie-carson-berndsen"><first>Julie</first><last>Carson-Berndsen</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <url hash="5ef5f84b">2010.eamt-1.26</url>
       <bibkey>jiang-etal-2010-lattice</bibkey>
@@ -218,29 +218,29 @@
     <paper id="27">
       <title>Chunk-Based <fixed-case>EBMT</fixed-case></title>
       <author><first>Jae Dong</first><last>Kim</last></author>
-      <author><first>Ralf</first><last>Brown</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
+      <author id="ralf-d-brown"><first>Ralf</first><last>Brown</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
       <url hash="2dd49f03">2010.eamt-1.27</url>
       <bibkey>kim-etal-2010-chunk</bibkey>
     </paper>
     <paper id="28">
       <title>Source reordering using <fixed-case>M</fixed-case>ax<fixed-case>E</fixed-case>nt classifiers and supertags</title>
       <author><first>Maxim</first><last>Khalilov</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <url hash="91f4986b">2010.eamt-1.28</url>
       <bibkey>khalilov-simaan-2010-source</bibkey>
     </paper>
     <paper id="29">
       <title>Domain Adaptation in Statistical Machine Translation using Factored Translation Models</title>
       <author><first>Jan</first><last>Niehues</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <url hash="42ca6958">2010.eamt-1.29</url>
       <attachment type="presentation" hash="b58d4d95">2010.eamt-1.29.Presentation.pdf</attachment>
       <bibkey>niehues-waibel-2010-domain</bibkey>
     </paper>
     <paper id="30">
       <title>Online Language Model adaptation via N-gram Mixtures for Statistical Machine Translation</title>
-      <author><first>Germán</first><last>Sanchis-Trilles</last></author>
+      <author id="german-sanchis-trilles"><first>Germán</first><last>Sanchis-Trilles</last></author>
       <author><first>Mauro</first><last>Cettolo</last></author>
       <url hash="a2ae9a6a">2010.eamt-1.30</url>
       <attachment type="presentation" hash="e65362b3">2010.eamt-1.30.Presentation.pdf</attachment>
@@ -267,7 +267,7 @@
     <paper id="33">
       <title>Hierarchical Hybrid Translation between <fixed-case>E</fixed-case>nglish and <fixed-case>G</fixed-case>erman</title>
       <author><first>Yu</first><last>Chen</last></author>
-      <author><first>Andreas</first><last>Eisele</last></author>
+      <author id="andreas-eisele"><first>Andreas</first><last>Eisele</last></author>
       <url hash="c9268975">2010.eamt-1.33</url>
       <attachment type="presentation" hash="f53d5a8a">2010.eamt-1.33.Presentation.pdf</attachment>
       <bibkey>chen-eisele-2010-hierarchical</bibkey>
@@ -286,7 +286,7 @@
       <title>Using the Apertium <fixed-case>S</fixed-case>panish-<fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese machine translation system for localization</title>
       <author><first>François</first><last>Masselot</last></author>
       <author><first>Petra</first><last>Ribiczey</last></author>
-      <author><first>Gema</first><last>Ramírez-Sánchez</last></author>
+      <author id="gema-ramirez-sanchez"><first>Gema</first><last>Ramírez-Sánchez</last></author>
       <url hash="ce522268">2010.eamt-1.35</url>
       <attachment type="presentation" hash="36ec5edf">2010.eamt-1.35.Presentation.pdf</attachment>
       <bibkey>masselot-etal-2010-using</bibkey>
@@ -301,7 +301,7 @@
     </paper>
     <paper id="37">
       <title><fixed-case>H</fixed-case>aitian <fixed-case>C</fixed-case>reole: How to Build and Ship an <fixed-case>MT</fixed-case> Engine from Scratch in 4 days, 17 hours, &amp; 30 minutes</title>
-      <author><first>William</first><last>Lewis</last></author>
+      <author id="william-lewis"><first>William</first><last>Lewis</last></author>
       <url hash="018f5a20">2010.eamt-1.37</url>
       <attachment type="presentation" hash="efb0d68c">2010.eamt-1.37.Presentation.pdf</attachment>
       <bibkey>lewis-2010-haitian</bibkey>
@@ -316,9 +316,9 @@
     </paper>
     <paper id="39">
       <title>A Bootstrapped Interlingua-Based <fixed-case>SMT</fixed-case> Architecture</title>
-      <author><first>Manny</first><last>Rayner</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
       <author><first>Paula</first><last>Estrella</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <url hash="18365f00">2010.eamt-1.39</url>
       <attachment type="presentation" hash="04b961b1">2010.eamt-1.39.Presentation.pdf</attachment>
       <bibkey>rayner-etal-2010-bootstrapped</bibkey>
@@ -326,8 +326,8 @@
     <paper id="40">
       <title>Automatic Determination of Number of clusters for creating Templates in Example-Based Machine Translation</title>
       <author><first>Rashmi</first><last>Gangadharaiah</last></author>
-      <author><first>Ralf</first><last>Brown</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
+      <author id="ralf-d-brown"><first>Ralf</first><last>Brown</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
       <url hash="c654f6f7">2010.eamt-1.40</url>
       <attachment type="presentation" hash="5d322cd4">2010.eamt-1.40.Presentation.pdf</attachment>
       <bibkey>gangadharaiah-etal-2010-automatic</bibkey>
diff --git a/data/xml/2010.iwslt.xml b/data/xml/2010.iwslt.xml
index fbaf7e204b..fedc7c9bb9 100644
--- a/data/xml/2010.iwslt.xml
+++ b/data/xml/2010.iwslt.xml
@@ -24,7 +24,7 @@
     </paper>
     <paper id="3">
       <title>Resources for adding semantics to machine translation</title>
-      <author><first>Jan</first><last>Hajič</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
       <url hash="8d7ba5dd">2010.iwslt-keynotes.3</url>
       <bibkey>hajic-2010-resources</bibkey>
     </paper>
@@ -49,9 +49,9 @@
     </frontmatter>
     <paper id="1">
       <title>Overview of the <fixed-case>IWSLT</fixed-case> 2010 evaluation campaign</title>
-      <author><first>Michael</first><last>Paul</last></author>
+      <author id="michael-paul"><first>Michael</first><last>Paul</last></author>
       <author><first>Marcello</first><last>Federico</last></author>
-      <author><first>Sebastian</first><last>Stüker</last></author>
+      <author id="sebastian-stuker"><first>Sebastian</first><last>Stüker</last></author>
       <pages>3-27</pages>
       <url hash="299877d6">2010.iwslt-evaluation.1</url>
       <abstract>This paper gives an overview of the evaluation campaign results of the 7th International Workshop on Spoken Language Translation (IWSLT 2010)1. This year, we focused on three spoken language tasks: (1) public speeches on a variety of topics (TALK) from English to French, (2) spoken dialog in travel situations (DIALOG) between Chinese and English, and (3) traveling expressions (BTEC) from Arabic, Turkish, and French to English. In total, 28 teams (including 7 firsttime participants) took part in the shared tasks, submitting 60 primary and 112 contrastive runs. Automatic and subjective evaluations of the primary runs were carried out in order to investigate the impact of different communication modalities, spoken language styles and semantic context on automatic speech recognition (ASR) and machine translation (MT) system performances.</abstract>
@@ -60,7 +60,7 @@
     <paper id="2">
       <title><fixed-case>A</fixed-case>pp<fixed-case>T</fixed-case>ek’s <fixed-case>APT</fixed-case> machine translation system for <fixed-case>IWSLT</fixed-case> 2010</title>
       <author><first>Evgeny</first><last>Matusov</last></author>
-      <author><first>Selçuk</first><last>Köprü</last></author>
+      <author id="selcuk-kopru"><first>Selçuk</first><last>Köprü</last></author>
       <pages>29-36</pages>
       <url hash="e86282a4">2010.iwslt-evaluation.2</url>
       <abstract>In this paper, we describe AppTek’s new APT machine translation system that we employed in the IWSLT 2010 evaluation campaign. This year, we participated in the Arabic-to-English and Turkish-to-English BTEC tasks. We discuss the architecture of the system, the preprocessing steps and the experiments carried out during the campaign. We show that competitive translation quality can be obtained with a system that can be turned into a real-life product without much effort.</abstract>
@@ -78,7 +78,7 @@
     <paper id="4">
       <title>N-gram-based machine translation enhanced with neural networks</title>
       <author><first>Francisco</first><last>Zamora-Martinez</last></author>
-      <author><first>Maria Jose</first><last>Castro-Bleda</last></author>
+      <author id="maria-jose-castro-bleda"><first>Maria Jose</first><last>Castro-Bleda</last></author>
       <author><first>Holger</first><last>Schwenk</last></author>
       <pages>45-52</pages>
       <url hash="f969b581">2010.iwslt-evaluation.4</url>
@@ -110,10 +110,10 @@
     <paper id="7">
       <title><fixed-case>I</fixed-case>2<fixed-case>R</fixed-case>’s machine translation system for <fixed-case>IWSLT</fixed-case> 2010</title>
       <author><first>Xiangyu</first><last>Duan</last></author>
-      <author><first>Rafael</first><last>Banchs</last></author>
+      <author id="rafael-e-banchs"><first>Rafael</first><last>Banchs</last></author>
       <author><first>Jun</first><last>Lang</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
-      <author><first>Aiti</first><last>Aw</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
+      <author id="aiti-aw"><first>Aiti</first><last>Aw</last></author>
       <author><first>Min</first><last>Zhang</last></author>
       <author><first>Haizhou</first><last>Li</last></author>
       <pages>67-72</pages>
@@ -129,7 +129,7 @@
       <author><first>Wei</first><last>Luo</last></author>
       <author><first>Haitao</first><last>Mi</last></author>
       <author id="yang-liu-ict"><first>Yang</first><last>Liu</last></author>
-      <author><first>Yajuan</first><last>Lü</last></author>
+      <author id="yajuan-lu"><first>Yajuan</first><last>Lü</last></author>
       <author><first>Qun</first><last>Liu</last></author>
       <pages>73-79</pages>
       <url hash="843f1889">2010.iwslt-evaluation.8</url>
@@ -139,8 +139,8 @@
       <title>The <fixed-case>INESC</fixed-case>-<fixed-case>ID</fixed-case> machine translation system for the <fixed-case>IWSLT</fixed-case> 2010</title>
       <author><first>Wang</first><last>Ling</last></author>
       <author><first>Tiago</first><last>Luís</last></author>
-      <author><first>João</first><last>Graça</last></author>
-      <author><first>Luísa</first><last>Coheur</last></author>
+      <author id="joao-graca"><first>João</first><last>Graça</last></author>
+      <author id="luisa-coheur"><first>Luísa</first><last>Coheur</last></author>
       <author><first>Isabel</first><last>Trancoso</last></author>
       <pages>81-84</pages>
       <url hash="d33f26a9">2010.iwslt-evaluation.9</url>
@@ -150,14 +150,14 @@
     <paper id="10">
       <title><fixed-case>ITI</fixed-case>-<fixed-case>UPV</fixed-case> machine translation system for <fixed-case>IWSLT</fixed-case> 2010</title>
       <author><first>Guillem</first><last>Gascó</last></author>
-      <author><first>Vicent</first><last>Alabau</last></author>
+      <author id="vicent-alabau"><first>Vicent</first><last>Alabau</last></author>
       <author><first>Jesús-Andrés</first><last>Ferrer</last></author>
-      <author><first>Jesús</first><last>González-Rubio</last></author>
-      <author><first>Martha-Alicia</first><last>Rocha</last></author>
-      <author><first>Germán</first><last>Sanchis-Trilles</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="jesus-gonzalez-rubio"><first>Jesús</first><last>González-Rubio</last></author>
+      <author id="martha-alicia-rocha"><first>Martha-Alicia</first><last>Rocha</last></author>
+      <author id="german-sanchis-trilles"><first>Germán</first><last>Sanchis-Trilles</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <author><first>Jorge</first><last>González</last></author>
-      <author><first>Joan-Andreu</first><last>Sánchez</last></author>
+      <author id="joan-andreu-sanchez"><first>Joan-Andreu</first><last>Sánchez</last></author>
       <pages>85-92</pages>
       <url hash="bc17b356">2010.iwslt-evaluation.10</url>
       <abstract>This paper presents the submissions of the PRHLT group for the evaluation campaign of the International Workshop on Spoken Language Translation. We focus on the development of reliable translation systems between syntactically different languages (DIALOG task) and on the efficient training of SMT models in resource-rich scenarios (TALK task).</abstract>
@@ -170,17 +170,17 @@
       <author><first>Teresa</first><last>Herrmann</last></author>
       <author><first>Michael</first><last>Heck</last></author>
       <author><first>Christian</first><last>Herff</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>93-98</pages>
       <url hash="514af2fa">2010.iwslt-evaluation.11</url>
       <bibkey>niehues-etal-2010-kit</bibkey>
     </paper>
     <paper id="12">
       <title><fixed-case>LIG</fixed-case> statistical machine translation systems for <fixed-case>IWSLT</fixed-case> 2010</title>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <author><first>Haitem</first><last>Afli</last></author>
       <author><first>Thi Ngoc Diep</first><last>Do</last></author>
-      <author><first>Hervé</first><last>Blanchon</last></author>
+      <author id="herve-blanchon"><first>Hervé</first><last>Blanchon</last></author>
       <author><first>Marion</first><last>Potet</last></author>
       <pages>99-104</pages>
       <url hash="c9f3cf1f">2010.iwslt-evaluation.12</url>
@@ -189,8 +189,8 @@
     <paper id="13">
       <title><fixed-case>LIMSI</fixed-case> @ <fixed-case>IWSLT</fixed-case> 2010</title>
       <author><first>Alexandre</first><last>Allauzen</last></author>
-      <author><first>Josep M.</first><last>Crego</last></author>
-      <author><first>İlknur Durgar</first><last>El-Kahlout</last></author>
+      <author id="josep-m-crego"><first>Josep M.</first><last>Crego</last></author>
+      <author id="ilknur-durgar-el-kahlout"><first>İlknur Durgar</first><last>El-Kahlout</last></author>
       <author><first>Le</first><last>Hai-Son</last></author>
       <author><first>Guillaume</first><last>Wisniewski</last></author>
       <author><first>François</first><last>Yvon</last></author>
@@ -203,7 +203,7 @@
       <title><fixed-case>LIUM</fixed-case>’s statistical machine translation system for <fixed-case>IWSLT</fixed-case> 2010</title>
       <author><first>Anthony</first><last>Rousseau</last></author>
       <author><first>Loïc</first><last>Barrault</last></author>
-      <author><first>Paul</first><last>Deléglise</last></author>
+      <author id="paul-deleglise"><first>Paul</first><last>Deléglise</last></author>
       <author><first>Yannick</first><last>Estève</last></author>
       <pages>113-117</pages>
       <url hash="27b899a4">2010.iwslt-evaluation.14</url>
@@ -214,7 +214,7 @@
       <title>The <fixed-case>MIRACL</fixed-case> <fixed-case>A</fixed-case>rabic-<fixed-case>E</fixed-case>nglish statistical machine translation system for <fixed-case>IWSLT</fixed-case> 2010</title>
       <author><first>Ines</first><last>Turki Khemakhem</last></author>
       <author><first>Salma</first><last>Jamoussi</last></author>
-      <author><first>Abdelmajid</first><last>Ben Hamadou</last></author>
+      <author id="abdelmajid-ben-hamadou"><first>Abdelmajid</first><last>Ben Hamadou</last></author>
       <pages>119-125</pages>
       <url hash="3ca1acf2">2010.iwslt-evaluation.15</url>
       <abstract>This paper describes the MIRACL statistical Machine Translation system and the improvements that were developed during the IWSLT 2010 evaluation campaign. We participated to the Arabic to English BTEC tasks using a phrase-based statistical machine translation approach. In this paper, we first discuss some challenges in translating from Arabic to English and we explore various techniques to improve performances on a such task. Next, we present our solution for disambiguating the output of an Arabic morphological analyzer. In fact, The Arabic morphological analyzer used produces all possible morphological structures for each word, with an unique correct proposition. In this work we exploit the Arabic-English alignment to choose the correct segmented form and the correct morpho-syntactic features produced by our morphological analyzer.</abstract>
@@ -223,7 +223,7 @@
     <paper id="16">
       <title>The <fixed-case>MIT</fixed-case>-<fixed-case>LL</fixed-case>/<fixed-case>AFRL</fixed-case> <fixed-case>IWSLT</fixed-case>-2010 <fixed-case>MT</fixed-case> system</title>
       <author><first>Wade</first><last>Shen</last></author>
-      <author><first>Timothy</first><last>Anderson</last></author>
+      <author id="tim-anderson"><first>Timothy</first><last>Anderson</last></author>
       <author><first>Raymond</first><last>Slyh</last></author>
       <author><first>A. Ryan</first><last>Aminzadeh</last></author>
       <pages>127-134</pages>
@@ -249,11 +249,11 @@
     </paper>
     <paper id="18">
       <title>The <fixed-case>NICT</fixed-case> translation system for <fixed-case>IWSLT</fixed-case> 2010</title>
-      <author><first>Chooi-Ling</first><last>Goh</last></author>
+      <author id="chooi-ling-goh"><first>Chooi-Ling</first><last>Goh</last></author>
       <author><first>Taro</first><last>Watanabe</last></author>
-      <author><first>Michael</first><last>Paul</last></author>
+      <author id="michael-paul"><first>Michael</first><last>Paul</last></author>
       <author><first>Andrew</first><last>Finch</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>139-146</pages>
       <url hash="b809ec4b">2010.iwslt-evaluation.18</url>
       <abstract>This paper describes NICT’s participation in the IWSLT 2010 evaluation campaign for the DIALOG translation (Chinese-English) and the BTEC (French-English) translation shared-tasks. For the DIALOG translation, the main challenge to this task is applying context information during translation. Context information can be used to decide on word choice and also to replace missing information during translation. We applied discriminative reranking using contextual information as additional features. In order to provide more choices for re-ranking, we generated n-best lists from multiple phrase-based statistical machine translation systems that varied in the type of Chinese word segmentation schemes used. We also built a model that merged the phrase tables generated by the different segmentation schemes. Furthermore, we used a lattice-based system combination model to combine the output from different systems. A combination of all of these systems was used to produce the n-best lists for re-ranking. For the BTEC task, a general approach that used latticebased system combination of two systems, a standard phrasebased system and a hierarchical phrase-based system, was taken. We also tried to process some unknown words by replacing them with the same words but different inflections that are known to the system.</abstract>
@@ -290,7 +290,7 @@
       <author><first>Stephan</first><last>Peitz</last></author>
       <author><first>David</first><last>Vilar</last></author>
       <author><first>Joern</first><last>Wuebker</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>163-168</pages>
       <url hash="c5581e69">2010.iwslt-evaluation.22</url>
       <abstract>In this paper we describe the statistical machine translation system of the RWTH Aachen University developed for the translation task of the IWSLT 2010. This year, we participated in the BTEC translation task for the Arabic to English language direction. We experimented with two state-of-theart decoders: phrase-based and hierarchical-based decoders. Extensions to the decoders included phrase training (as opposed to heuristic phrase extraction) for the phrase-based decoder, and soft syntactic features for the hierarchical decoder. Additionally, we experimented with various rule-based and statistical-based segmenters for Arabic. Due to the different decoders and the different methodologies that we apply for segmentation, we expect that there will be complimentary variation in the results achieved by each system. The next step would be to exploit these variations and achieve better results by combining the systems. We try different strategies for system combination and report significant improvements over the best single system.</abstract>
@@ -325,11 +325,11 @@
     </paper>
     <paper id="26">
       <title><fixed-case>UPC</fixed-case>-<fixed-case>BMIC</fixed-case>-<fixed-case>VDU</fixed-case> system description for the <fixed-case>IWSLT</fixed-case> 2010: testing several collocation segmentations in a phrase-based <fixed-case>SMT</fixed-case> system</title>
-      <author><first>Carlos</first><last>Henríquez</last></author>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
-      <author><first>Vidas</first><last>Daudaravicius</last></author>
-      <author><first>Rafael E.</first><last>Banchs</last></author>
-      <author><first>José B.</first><last>Mariño</last></author>
+      <author id="carlos-henriquez"><first>Carlos</first><last>Henríquez</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="vidas-daudaravicius"><first>Vidas</first><last>Daudaravicius</last></author>
+      <author id="rafael-e-banchs"><first>Rafael E.</first><last>Banchs</last></author>
+      <author id="jose-b-marino"><first>José B.</first><last>Mariño</last></author>
       <pages>189-195</pages>
       <url hash="030eee39">2010.iwslt-evaluation.26</url>
       <abstract>This paper describes the UPC-BMIC-VMU participation in the IWSLT 2010 evaluation campaign. The SMT system is a standard phrase-based enriched with novel segmentations. These novel segmentations are computed using statistical measures such as Log-likelihood, T-score, Chi-squared, Dice, Mutual Information or Gravity-Counts. The analysis of translation results allows to divide measures into three groups. First, Log-likelihood, Chi-squared and T-score tend to combine high frequency words and collocation segments are very short. They improve the SMT system by adding new translation units. Second, Mutual Information and Dice tend to combine low frequency words and collocation segments are short. They improve the SMT system by smoothing the translation units. And third, GravityCounts tends to combine high and low frequency words and collocation segments are long. However, in this case, the SMT system is not improved. Thus, the road-map for translation system improvement is to introduce new phrases with either low frequency or high frequency words. It is hard to introduce new phrases with low and high frequency words in order to improve translation quality. Experimental results are reported in the French-to-English IWSLT 2010 evaluation where our system was ranked 3rd out of nine systems.</abstract>
@@ -338,7 +338,7 @@
     <paper id="27">
       <title><fixed-case>ILLC</fixed-case>-<fixed-case>U</fixed-case>v<fixed-case>A</fixed-case> machine translation system for the <fixed-case>IWSLT</fixed-case> 2010 evaluation</title>
       <author><first>Maxim</first><last>Khalilov</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <pages>197-203</pages>
       <url hash="d76499a8">2010.iwslt-evaluation.27</url>
       <bibkey>khalilov-simaan-2010-illc</bibkey>
@@ -390,8 +390,8 @@
     <paper id="4">
       <title>Improved <fixed-case>V</fixed-case>ietnamese-<fixed-case>F</fixed-case>rench parallel corpus mining using <fixed-case>E</fixed-case>nglish language</title>
       <author><first>Thi Ngoc Diep</first><last>Do</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
-      <author><first>Eric</first><last>Castelli</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
+      <author id="eric-castelli"><first>Eric</first><last>Castelli</last></author>
       <pages>235-242</pages>
       <url hash="3065d41d">2010.iwslt-papers.4</url>
       <bibkey>do-etal-2010-improved</bibkey>
@@ -407,7 +407,7 @@
     </paper>
     <paper id="6">
       <title>The pay-offs of preprocessing for <fixed-case>G</fixed-case>erman-<fixed-case>E</fixed-case>nglish statistical machine translation</title>
-      <author><first>Ilknur Durgar</first><last>El-Kahlout</last></author>
+      <author id="ilknur-durgar-el-kahlout"><first>Ilknur Durgar</first><last>El-Kahlout</last></author>
       <author><first>Francois</first><last>Yvon</last></author>
       <pages>251-258</pages>
       <url hash="fdb6dfa3">2010.iwslt-papers.6</url>
@@ -417,7 +417,7 @@
     <paper id="7">
       <title>A <fixed-case>B</fixed-case>ayesian model of bilingual segmentation for transliteration</title>
       <author><first>Andrew</first><last>Finch</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>259-266</pages>
       <url hash="73238ba2">2010.iwslt-papers.7</url>
       <bibkey>finch-sumita-2010-bayesian</bibkey>
@@ -425,7 +425,7 @@
     <paper id="8">
       <title>Faster cube pruning</title>
       <author><first>Andrea</first><last>Gesmundo</last></author>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <pages>267-274</pages>
       <url hash="a4595fe2">2010.iwslt-papers.8</url>
       <bibkey>gesmundo-henderson-2010-faster</bibkey>
@@ -433,7 +433,7 @@
     <paper id="9">
       <title>Factor templates for factored machine translation models</title>
       <author><first>Yvette</first><last>Graham</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>275-282</pages>
       <url hash="bb2f16b1">2010.iwslt-papers.9</url>
       <bibkey>graham-van-genabith-2010-factor</bibkey>
@@ -452,7 +452,7 @@
       <author><first>Carmen</first><last>Heger</last></author>
       <author><first>Joern</first><last>Wuebker</last></author>
       <author><first>David</first><last>Vilar</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>291-297</pages>
       <url hash="81a1908e">2010.iwslt-papers.11</url>
       <abstract>Currently most state-of-the-art statistical machine translation systems present a mismatch between training and generation conditions. Word alignments are computed using the well known IBM models for single-word based translation. Afterwards phrases are extracted using extraction heuristics, unrelated to the stochastic models applied for finding the word alignment. In the last years, several research groups have tried to overcome this mismatch, but only with limited success. Recently, the technique of forced alignments has shown to improve translation quality for a phrase-based system, applying a more statistically sound approach to phrase extraction. In this work we investigate the first steps to combine forced alignment with a hierarchical model. Experimental results on IWSLT and WMT data show improvements in translation quality of up to 0.7% BLEU and 1.0% TER.</abstract>
@@ -462,8 +462,8 @@
       <title>Multi-pivot translation by system combination</title>
       <author><first>Gregor</first><last>Leusch</last></author>
       <author><first>Aurélien</first><last>Max</last></author>
-      <author><first>Josep Maria</first><last>Crego</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="josep-m-crego"><first>Josep Maria</first><last>Crego</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>299-306</pages>
       <url hash="57811726">2010.iwslt-papers.12</url>
       <abstract>This paper describes a technique to exploit multiple pivot languages when using machine translation (MT) on language pairs with scarce bilingual resources, or where no translation system for a language pair is available. The principal idea is to generate intermediate translations in several pivot languages, translate them separately into the target language, and generate a consensus translation out of these using MT system combination techniques. Our technique can also be applied when a translation system for a language pair is available, but is limited in its translation accuracy because of scarce resources. Using statistical MT systems for the 11 different languages of Europarl, we show experimentally that a direct translation system can be replaced by this pivot approach without a loss in translation quality if about six pivot languages are available. Furthermore, we can already improve an existing MT system by adding two pivot systems to it. The maximum improvement was found to be 1.4% abs. in BLEU in our experiments for 8 or more pivot languages.</abstract>
@@ -471,9 +471,9 @@
     </paper>
     <paper id="13">
       <title>Real-time spoken language identification and recognition for speech-to-speech translation</title>
-      <author><first>Daniel Chung Yong</first><last>Lim</last></author>
+      <author id="chung-yong-lim"><first>Daniel Chung Yong</first><last>Lim</last></author>
       <author><first>Ian</first><last>Lane</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>307-312</pages>
       <url hash="6ae69291">2010.iwslt-papers.13</url>
       <bibkey>lim-etal-2010-real</bibkey>
@@ -482,8 +482,8 @@
       <title>Towards a general and extensible phrase-extraction algorithm</title>
       <author><first>Wang</first><last>Ling</last></author>
       <author><first>Tiago</first><last>Luís</last></author>
-      <author><first>João</first><last>Graça</last></author>
-      <author><first>Luísa</first><last>Coheur</last></author>
+      <author id="joao-graca"><first>João</first><last>Graça</last></author>
+      <author id="luisa-coheur"><first>Luísa</first><last>Coheur</last></author>
       <author><first>Isabel</first><last>Trancoso</last></author>
       <pages>313-320</pages>
       <url hash="3cddb8d2">2010.iwslt-papers.14</url>
@@ -511,7 +511,7 @@
       <title>Sign language machine translation overkill</title>
       <author><first>Daniel</first><last>Stein</last></author>
       <author><first>Christoph</first><last>Schmidt</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>337-344</pages>
       <url hash="890a6a3c">2010.iwslt-papers.17</url>
       <abstract>Sign languages represent an interesting niche for statistical machine translation that is typically hampered by the scarceness of suitable data, and most papers in this area apply only a few, well-known techniques and do not adapt them to small-sized corpora. In this paper, we will propose new methods for common approaches like scaling factor optimization and alignment merging strategies which helped improve our baseline. We also conduct experiments with different decoders and employ state-of-the-art techniques like soft syntactic labels as well as trigger-based and discriminative word lexica and system combination. All methods are evaluated on one of the largest sign language corpora available.</abstract>
@@ -522,7 +522,7 @@
       <author><first>David</first><last>Vilar</last></author>
       <author><first>Daniel</first><last>Stein</last></author>
       <author><first>Stephan</first><last>Peitz</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>345-352</pages>
       <url hash="df29e607">2010.iwslt-papers.18</url>
       <bibkey>vilar-etal-2010-parser</bibkey>
diff --git a/data/xml/2010.jeptalnrecital.xml b/data/xml/2010.jeptalnrecital.xml
index b8f4e74971..19c016dbb3 100644
--- a/data/xml/2010.jeptalnrecital.xml
+++ b/data/xml/2010.jeptalnrecital.xml
@@ -3,7 +3,7 @@
   <volume id="invite" ingest-date="2021-02-05" type="proceedings">
     <meta>
       <booktitle>Actes de la 17e conférence sur le Traitement Automatique des Langues Naturelles. Conférences invitées</booktitle>
-      <editor><first>Philippe</first><last>Langlais</last></editor>
+      <editor id="philippe-langlais"><first>Philippe</first><last>Langlais</last></editor>
       <editor><first>Michel</first><last>Gagnon</last></editor>
       <publisher>ATALA</publisher>
       <address>Montréal, Canada</address>
@@ -17,9 +17,9 @@
     </frontmatter>
     <paper id="1">
       <title>La phraséologie en langue, en dictionnaire et en <fixed-case>TALN</fixed-case></title>
-      <author><first>Igor</first><last>Mel’čuk</last></author>
+      <author id="igor-melcuk"><first>Igor</first><last>Mel’čuk</last></author>
       <pages>1–14</pages>
-      <abstract/>
+      <abstract></abstract>
       <url hash="bccbbb5f">2010.jeptalnrecital-invite.1</url>
       <language>fra</language>
       <bibkey>melcuk-2010-la</bibkey>
@@ -45,7 +45,7 @@
   <volume id="long" ingest-date="2021-02-05" type="proceedings">
     <meta>
       <booktitle>Actes de la 17e conférence sur le Traitement Automatique des Langues Naturelles. Articles longs</booktitle>
-      <editor><first>Philippe</first><last>Langlais</last></editor>
+      <editor id="philippe-langlais"><first>Philippe</first><last>Langlais</last></editor>
       <editor><first>Michel</first><last>Gagnon</last></editor>
       <publisher>ATALA</publisher>
       <address>Montréal, Canada</address>
@@ -80,7 +80,7 @@
     <paper id="3">
       <title>Exploitation d’une ressource lexicale pour la construction d’un étiqueteur morpho-syntaxique état-de-l’art du français</title>
       <author><first>Pascal</first><last>Denis</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <pages>21–30</pages>
       <abstract>Cet article présente MEltfr, un étiqueteur morpho-syntaxique automatique du français. Il repose sur un modèle probabiliste séquentiel qui bénéficie d’informations issues d’un lexique exogène, à savoir le Lefff. Evalué sur le FTB, MEltfr atteint un taux de précision de 97.75% (91.36% sur les mots inconnus) sur un jeu de 29 étiquettes. Ceci correspond à une diminution du taux d’erreur de 18% (36.1% sur les mots inconnus) par rapport au même modèle sans couplage avec le Lefff. Nous étudions plus en détail la contribution de cette ressource, au travers de deux séries d’expériences. Celles-ci font apparaître en particulier que la contribution des traits issus du Lefff est de permettre une meilleure couverture, ainsi qu’une modélisation plus fine du contexte droit des mots.</abstract>
       <url hash="8a133eb4">2010.jeptalnrecital-long.3</url>
@@ -109,7 +109,7 @@
     </paper>
     <paper id="6">
       <title>Une approche cognitive de la fouille de grandes collections de documents</title>
-      <author><first>Adil</first><last>El Ghali</last></author>
+      <author id="adil-el-ghali"><first>Adil</first><last>El Ghali</last></author>
       <author><first>Yann</first><last>Vigile Hoareau</last></author>
       <pages>51–60</pages>
       <abstract>La récente éclosion du Web2.0 engendre un accroissement considérable de volumes textuels et intensifie ainsi l’importance d’une réflexion sur l’exploitation des connaissances à partir de grandes collections de documents. Dans cet article, nous présentons une approche de rechercher d’information qui s’inspire des certaines recherches issues de la psychologie cognitive pour la fouille de larges collections de documents. Nous utilisons un document comme requête permettant de récupérer des informations à partir d’une collection représentée dans un espace sémantique. Nous définissons les notions d’identité sémantique et de pollution sémantique dans un espace de documents. Nous illustrons notre approche par la description d’un système appelé BRAT (Blogosphere Random Analysis using Texts) basé sur les notions préalablement introduites d’identité et de pollution sématique appliquées à une tâche d’identification des actualités dans la blogosphère mondiale lors du concours TREC’09. Les premiers résultats produits sont tout à fait encourageant et indiquent les pistes des recherches à mettre en oeuvre afin d’améliorer les performances de BRAT.</abstract>
@@ -132,7 +132,7 @@
       <title>Approche quantitative en syntaxe : l’exemple de l’alternance de position de l’adjectif épithète en français</title>
       <author><first>Juliette</first><last>Thuilier</last></author>
       <author><first>Gwendoline</first><last>Fox</last></author>
-      <author><first>Benoît</first><last>Crabbé</last></author>
+      <author id="benoit-crabbe"><first>Benoît</first><last>Crabbé</last></author>
       <pages>71–80</pages>
       <abstract>Cet article présente une analyse statistique sur des données de syntaxe qui a pour but d’aider à mieux cerner le phénomène d’alternance de position de l’adjectif épithète par rapport au nom en français. Nous montrons comment nous avons utilisé les corpus dont nous disposons (French Treebank et le corpus de l’Est-Républicain) ainsi que les ressources issues du traitement automatique des langues, pour mener à bien notre étude. La modélisation à partir de 13 variables relevant principalement des propriétés du syntagme adjectival, de celles de l’item adjectival, ainsi que de contraintes basées sur la fréquence, permet de prédire à plus de 93% la position de l’adjectif. Nous insistons sur l’importance de contraintes relevant de l’usage pour le choix de la position de l’adjectif, notamment à travers la fréquence d’occurrence de l’adjectif, et la fréquence de contextes dans lesquels il apparaît.</abstract>
       <url hash="395d2967">2010.jeptalnrecital-long.8</url>
@@ -170,8 +170,8 @@
       <title>Une approche hybride traduction/correction pour la normalisation des <fixed-case>SMS</fixed-case></title>
       <author><first>Richard</first><last>Beaufort</last></author>
       <author><first>Sophie</first><last>Roekhaut</last></author>
-      <author><first>Louise-Amélie</first><last>Cougnon</last></author>
-      <author><first>Cédrick</first><last>Fairon</last></author>
+      <author id="louise-amelie-cougnon"><first>Louise-Amélie</first><last>Cougnon</last></author>
+      <author id="cedrick-fairon"><first>Cédrick</first><last>Fairon</last></author>
       <pages>111–120</pages>
       <abstract>Cet article présente une méthode hybride de normalisation des SMS, à mi-chemin entre correction orthographique et traduction automatique. La partie du système qui assure la normalisation utilise exclusivement des modèles entraînés sur corpus. Evalué en français par validation croisée, le système obtient un taux d’erreur au mot de 9.3% et un score BLEU de 0.83.</abstract>
       <url hash="68895ccf">2010.jeptalnrecital-long.12</url>
@@ -191,9 +191,9 @@
     </paper>
     <paper id="14">
       <title>Extension d’un système d’étiquetage d’entités nommées en étiqueteur sémantique</title>
-      <author><first>Eric</first><last>Charton</last></author>
+      <author id="eric-charton"><first>Eric</first><last>Charton</last></author>
       <author><first>Michel</first><last>Gagnon</last></author>
-      <author><first>Benoit</first><last>Ozell</last></author>
+      <author id="benoit-ozell"><first>Benoit</first><last>Ozell</last></author>
       <pages>131–140</pages>
       <abstract>L’étiquetage sémantique consiste à associer un ensemble de propriétés à une séquence de mots contenue dans un texte. Bien que proche de la tâche d’étiquetage par entités nommées, qui revient à attribuer une classe de sens à un mot, la tâche d’étiquetage ou d’annotation sémantique cherche à établir la relation entre l’entité dans son texte et sa représentation ontologique. Nous présentons un étiqueteur sémantique qui s’appuie sur un étiqueteur d’entités nommées pour mettre en relation un mot ou un groupe de mots avec sa représentation ontologique. Son originalité est d’utiliser une ontologie intermédiaire de nature statistique pour établir ce lien.</abstract>
       <url hash="4d0f415f">2010.jeptalnrecital-long.14</url>
@@ -211,8 +211,8 @@
     </paper>
     <paper id="16">
       <title>Anatomie des structures énumératives</title>
-      <author><first>Lydia-Mai</first><last>Ho-Dac</last></author>
-      <author><first>Marie-Paule</first><last>Péry-Woodley</last></author>
+      <author id="lydia-mai-ho-dac"><first>Lydia-Mai</first><last>Ho-Dac</last></author>
+      <author id="marie-paule-pery-woodley"><first>Marie-Paule</first><last>Péry-Woodley</last></author>
       <author><first>Ludovic</first><last>Tanguy</last></author>
       <pages>151–160</pages>
       <abstract>Cet article présente les premiers résultats d’une campagne d’annotation de corpus à grande échelle réalisée dans le cadre du projet ANNODIS. Ces résultats concernent la partie descendante du dispositif d’annotation, et plus spécifiquement les structures énumératives. Nous nous intéressons à la structuration énumérative en tant que stratégie de base de mise en texte, apparaissant à différents niveaux de granularité, associée à différentes fonctions discursives, et signalée par des indices divers. Avant l’annotation manuelle, une étape de pré-traitement a permis d’obtenir le marquage systématique de traits associés à la signalisation de l’organisation du discours. Nous décrivons cette étape de marquage automatique, ainsi que la procédure d’annotation. Nous proposons ensuite une première typologie des structures énumératives basée sur la description quantitative des données annotées manuellement, prenant en compte la couverture textuelle, la composition et les types d’indices.</abstract>
@@ -224,7 +224,7 @@
       <title>Identification des actants et circonstants par apprentissage machine</title>
       <author><first>Fadila</first><last>Hadouche</last></author>
       <author><first>Guy</first><last>Lapalme</last></author>
-      <author><first>Marie-Claude</first><last>L’Homme</last></author>
+      <author id="marie-claude-lhomme"><first>Marie-Claude</first><last>L’Homme</last></author>
       <pages>161–170</pages>
       <abstract>Dans cet article, nous traitons de l’identification automatique des participants actants et circonstants de lexies prédicatives verbales tirées d’un corpus spécialisé en langue française. Les actants contribuent à la réalisation du sens de la lexie alors que les circonstants sont optionnels : ils ajoutent une information supplémentaire qui ne fait pas partie intégrante du sémantisme de la lexie. Nous proposons une classification de ces participants par apprentissage machine basée sur un corpus de lexies verbales du domaine de l’informatique, lexies qui ont été annotées manuellement avec des rôles sémantiques. Nous présentons des features qui nous permettent d’identifier les participants et de distinguer les actants des circonstants.</abstract>
       <url hash="c7ccbe73">2010.jeptalnrecital-long.17</url>
@@ -247,7 +247,7 @@
       <title>Classification du genre vidéo reposant sur des transcriptions automatiques</title>
       <author><first>Stanislas</first><last>Oger</last></author>
       <author><first>Mickael</first><last>Rouvier</last></author>
-      <author><first>Georges</first><last>Linarès</last></author>
+      <author id="georges-linares"><first>Georges</first><last>Linarès</last></author>
       <pages>181–190</pages>
       <abstract>Dans cet article nous proposons une nouvelle méthode pour l’identification du genre vidéo qui repose sur une analyse de leur contenu linguistique. Cette approche consiste en l’analyse des mots apparaissant dans les transcriptions des pistes audio des vidéos, obtenues à l’aide d’un système de reconnaissance automatique de la parole. Les expériences sont réalisées sur un corpus composé de dessins animés, de films, de journaux télévisés, de publicités, de documentaires, d’émissions de sport et de clips de musique. L’approche proposée permet d’obtenir un taux de bonne classification de 74% sur cette tâche. En combinant cette approche avec des méthodes reposant sur des paramètres acoustiques bas-niveau, nous obtenons un taux de bonne classification de 95%.</abstract>
       <url hash="03c00c53">2010.jeptalnrecital-long.19</url>
@@ -268,7 +268,7 @@
       <title>Traitement des disfluences dans le cadre de la compréhension automatique de l’oral arabe spontané</title>
       <author><first>Younès</first><last>Bahou</last></author>
       <author><first>Abir</first><last>Masmoudi</last></author>
-      <author><first>Lamia</first><last>Hadrich Belguith</last></author>
+      <author id="lamia-hadrich-belguith"><first>Lamia</first><last>Hadrich Belguith</last></author>
       <pages>201–210</pages>
       <abstract>Les disfluences inhérents de toute parole spontanée sont un vrai défi pour les systèmes de compréhension de la parole. Ainsi, nous proposons dans cet article, une méthode originale pour le traitement des disfluences (plus précisément, les autocorrections, les répétitions, les hésitations et les amorces) dans le cadre de la compréhension automatique de l’oral arabe spontané. Notre méthode est basée sur une analyse à la fois robuste et partielle, des énoncés oraux arabes. L’idée consiste à combiner une technique de reconnaissance de patrons avec une analyse sémantique superficielle par segments conceptuels. Cette méthode a été testée à travers le module de compréhension du système SARF, un serveur vocal interactif offrant des renseignements sur le transport ferroviaire tunisien (Bahou et al., 2008). Les résultats d’évaluation de ce module montrent que la méthode proposée est très prometteuse. En effet, les mesures de rappel, de précision et de F-Measure sont respectivement de 79.23%, 74.09% et 76.57%.</abstract>
       <url hash="4764d8bf">2010.jeptalnrecital-long.21</url>
@@ -278,7 +278,7 @@
     <paper id="22">
       <title>Utilisation de relations sémantiques pour améliorer la segmentation thématique de documents télévisuels</title>
       <author><first>Camille</first><last>Guinaudeau</last></author>
-      <author><first>Guillaume</first><last>Gravier</last></author>
+      <author id="guillaume-gravier"><first>Guillaume</first><last>Gravier</last></author>
       <author><first>Pascale</first><last>Sébillot</last></author>
       <pages>211–220</pages>
       <abstract>Les méthodes de segmentation thématique exploitant une mesure de la cohésion lexicale peuvent être appliquées telles quelles à des transcriptions automatiques de programmes télévisuels. Cependant, elles sont moins efficaces dans ce contexte, ne prenant en compte ni les particularités des émissions TV, ni celles des transcriptions. Nous étudions ici l’apport de relations sémantiques pour rendre les techniques de segmentation thématique plus robustes. Nous proposons une méthode pour exploiter ces relations dans une mesure de la cohésion lexicale et montrons qu’elles permettent d’augmenter la F1-mesure de +1.97 et +11.83 sur deux corpus composés respectivement de 40h de journaux télévisés et de 40h d’émissions de reportage. Ces améliorations démontrent que les relations sémantiques peuvent rendre les méthodes de segmentation moins sensibles aux erreurs de transcription et au manque de répétitions constaté dans certaines émissions télévisées.</abstract>
@@ -290,7 +290,7 @@
       <title>Une évaluation de l’impact des types de textes sur la tâche de segmentation thématique</title>
       <author><first>Clémentine</first><last>Adam</last></author>
       <author><first>Philippe</first><last>Muller</last></author>
-      <author><first>Cécile</first><last>Fabre</last></author>
+      <author id="cecile-fabre"><first>Cécile</first><last>Fabre</last></author>
       <pages>221–230</pages>
       <abstract>Cette étude a pour but de contribuer à la définition des objectifs de la segmentation thématique (ST), en incitant à prendre en considération le paramètre du type de textes dans cette tâche. Notre hypothèse est que, si la ST est certes pertinente pour traiter certains textes dont l’organisation est bien thématique, elle n’est pas adaptée à la prise en compte d’autres modes d’organisation (temporelle, rhétorique), et ne peut pas être appliquée sans précaution à des textes tout-venants. En comparant les performances d’un système de ST sur deux corpus, à organisation thématique “forte” et “faible”, nous montrons que cette tâche est effectivement sensible à la nature des textes.</abstract>
       <url hash="474b37e6">2010.jeptalnrecital-long.23</url>
@@ -310,10 +310,10 @@
     </paper>
     <paper id="25">
       <title>Évaluation automatique de résumés avec et sans référence</title>
-      <author><first>Juan-Manuel</first><last>Torres-Moreno</last></author>
+      <author id="juan-manuel-torres-moreno"><first>Juan-Manuel</first><last>Torres-Moreno</last></author>
       <author><first>Horacio</first><last>Saggion</last></author>
-      <author><first>Iria</first><last>da Cunha</last></author>
-      <author><first>Patricia</first><last>Velázquez-Morales</last></author>
+      <author id="iria-da-cunha"><first>Iria</first><last>da Cunha</last></author>
+      <author id="patricia-velazquez-morales"><first>Patricia</first><last>Velázquez-Morales</last></author>
       <author><first>Eric</first><last>Sanjuan</last></author>
       <pages>241–251</pages>
       <abstract>Nous étudions différentes méthodes d’évaluation de résumé de documents basées sur le contenu. Nous nous intéressons en particulier à la corrélation entre les mesures d’évaluation avec et sans référence humaine. Nous avons développé FRESA, un nouveau système d’évaluation fondé sur le contenu qui calcule les divergences entre les distributions de probabilité. Nous appliquons notre système de comparaison aux diverses mesures d’évaluation bien connues en résumé de texte telles que la Couverture, Responsiveness, Pyramids et Rouge en étudiant leurs associations dans les tâches du résumé multi-document générique (francais/anglais), focalisé (anglais) et résumé mono-document générique (français/espagnol).</abstract>
@@ -335,7 +335,7 @@
     <paper id="27">
       <title>Comment formule-t-on une réponse en langue naturelle ?</title>
       <author><first>Anne</first><last>Garcia-Fernandez</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <author><first>Anne</first><last>Vilnat</last></author>
       <pages>262–271</pages>
       <abstract>Cet article présente l’étude d’un corpus de réponses formulées par des humains à des questions factuelles. Des observations qualitatives et quantitatives sur la reprise d’éléments de la question dans les réponses sont exposées. La notion d’information-réponse est introduite et une étude de la présence de cet élément dans le corpus est proposée. Enfin, les formulations des réponses sont étudiées.</abstract>
@@ -346,8 +346,8 @@
     <paper id="28">
       <title>Apprentissage non supervisé pour la traduction automatique : application à un couple de langues peu doté</title>
       <author><first>Thi</first><last>Ngoc Diep</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
-      <author><first>Eric</first><last>Castelli</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
+      <author id="eric-castelli"><first>Eric</first><last>Castelli</last></author>
       <pages>272–281</pages>
       <abstract>Cet article présente une méthode non-supervisée pour extraire des paires de phrases parallèles à partir d’un corpus comparable. Un système de traduction automatique est utilisé pour exploiter le corpus comparable et détecter les paires de phrases parallèles. Un processus itératif est exécuté non seulement pour augmenter le nombre de paires de phrases parallèles extraites, mais aussi pour améliorer la qualité globale du système de traduction. Une comparaison avec une méthode semi-supervisée est présentée également. Les expériences montrent que la méthode non-supervisée peut être réellement appliquée dans le cas où on manque de données parallèles. Bien que les expériences préliminaires soient menées sur la traduction français-anglais, cette méthode non-supervisée est également appliquée avec succès à un couple de langues peu doté : vietnamien-français.</abstract>
       <url hash="03ab1394">2010.jeptalnrecital-long.28</url>
@@ -394,7 +394,7 @@
     </paper>
     <paper id="33">
       <title>Recherche contextuelle d’équivalents en banque de terminologie</title>
-      <author><first>Caroline</first><last>Barrière</last></author>
+      <author id="caroline-barriere"><first>Caroline</first><last>Barrière</last></author>
       <pages>321–330</pages>
       <abstract>Notre recherche démontre que l’utilisation du contenu d’un texte à traduire permet de mieux cibler dans une banque de terminologie les équivalents terminologiques pertinents à ce texte. Une banque de terminologie a comme particularité qu’elle catégorise ses entrées (fiches) en leur assignant un ou des domaines provenant d’une liste de domaines préétablie. La stratégie ici présentée repose sur l’utilisation de cette information sur les domaines. Un algorithme a été développé pour l’assignation automatique d’un profil de domaines à un texte. Celui-ci est combiné à un algorithme d’appariement entre les domaines d’un terme présent dans la banque de terminologie et le profil de domaines du texte. Pour notre expérimentation, des résumés bilingues (français et anglais) provenant de huit revues scientifiques nous fournissent un ensemble de 1130 paires d’équivalents terminologiques et le Grand Dictionnaire Terminologique (Office Québécois de la Langue Française) nous sert de ressource terminologique. Sur notre ensemble, nous démontrons une réduction de 75% du rang moyen de l’équivalent correct en comparaison avec un choix au hasard.</abstract>
       <url hash="c37ca367">2010.jeptalnrecital-long.33</url>
@@ -403,7 +403,7 @@
     </paper>
     <paper id="34">
       <title>Réécriture de graphes de dépendances pour l’interface syntaxe-sémantique</title>
-      <author><first>Guillaume</first><last>Bonfante</last></author>
+      <author id="guillaume-bonfante"><first>Guillaume</first><last>Bonfante</last></author>
       <author><first>Bruno</first><last>Guillaume</last></author>
       <author><first>Mathieu</first><last>Morey</last></author>
       <author><first>Guy</first><last>Perrier</last></author>
@@ -416,7 +416,7 @@
     <paper id="35">
       <title>Évaluer des annotations manuelles dispersées : les coefficients sont-ils suffisants pour estimer l’accord inter-annotateurs ?</title>
       <author><first>Karën</first><last>Fort</last></author>
-      <author><first>Claire</first><last>François</last></author>
+      <author id="claire-francois"><first>Claire</first><last>François</last></author>
       <author><first>Maha</first><last>Ghribi</last></author>
       <pages>341–350</pages>
       <abstract>L’objectif des travaux présentés dans cet article est l’évaluation de la qualité d’annotations manuelles de relations de renommage de gènes dans des résumés scientifiques, annotations qui présentent la caractéristique d’être très dispersées. Pour cela, nous avons calculé et comparé les coefficients les plus communément utilisés, entre autres kappa (Cohen, 1960) et pi (Scott, 1955), et avons analysé dans quelle mesure ils sont adaptés à nos données. Nous avons également étudié les différentes pondérations applicables à ces coefficients permettant de calculer le kappa pondéré (Cohen, 1968) et l’alpha (Krippendorff, 1980, 2004). Nous avons ainsi étudié le biais induit par la grande prévalence d’une catégorie et défini un mode de calcul des distances entre catégories reposant sur les annotations réalisées.</abstract>
@@ -426,7 +426,7 @@
     </paper>
     <paper id="36">
       <title>An empirical study of maximum entropy approach for part-of-speech tagging of <fixed-case>V</fixed-case>ietnamese texts</title>
-      <author><first>Phuong</first><last>Le-Hong</last></author>
+      <author id="phuong-le-hong"><first>Phuong</first><last>Le-Hong</last></author>
       <author><first>Azim</first><last>Roussanaly</last></author>
       <author><first>Thi</first><last>Minh Huyen Nguyen</last></author>
       <author><first>Mathias</first><last>Rossignol</last></author>
@@ -466,7 +466,7 @@
     </paper>
     <paper id="40">
       <title>Développement de ressources pour le persan: lexique morphologique et chaîne de traitements de surface</title>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <author><first>Géraldine</first><last>Walther</last></author>
       <pages>393–402</pages>
       <abstract>Nous présentons PerLex, un lexique morphologique du persan à large couverture et librement disponible, accompagné d’une chaîne de traitements de surface pour cette langue. Nous décrivons quelques caractéristiques de la morphologie du persan, et la façon dont nous l’avons représentée dans le formalisme lexical Alexina, sur lequel repose PerLex. Nous insistons sur la méthodologie que nous avons employée pour construire les entrées lexicales à partir de diverses sources, ainsi que sur les problèmes liés à la normalisation typographique. Le lexique obtenu a une couverture satisfaisante sur un corpus de référence, et devrait donc constituer un bon point de départ pour le développement d’un lexique syntaxique du persan.</abstract>
@@ -478,7 +478,7 @@
   <volume id="court" ingest-date="2021-02-05" type="proceedings">
     <meta>
       <booktitle>Actes de la 17e conférence sur le Traitement Automatique des Langues Naturelles. Articles courts</booktitle>
-      <editor><first>Philippe</first><last>Langlais</last></editor>
+      <editor id="philippe-langlais"><first>Philippe</first><last>Langlais</last></editor>
       <editor><first>Michel</first><last>Gagnon</last></editor>
       <publisher>ATALA</publisher>
       <address>Montréal, Canada</address>
@@ -525,7 +525,7 @@
     <paper id="4">
       <title>Constitution d’une ressource sémantique issue du treillis des catégories de <fixed-case>W</fixed-case>ikipedia</title>
       <author><first>Olivier</first><last>Collin</last></author>
-      <author><first>Benoît</first><last>Gaillard</last></author>
+      <author id="benoit-gaillard"><first>Benoît</first><last>Gaillard</last></author>
       <author><first>Jean-Léon</first><last>Bouraoui</last></author>
       <pages>20–25</pages>
       <abstract>Le travail présenté dans cet article s’inscrit dans le thème de l’acquisition automatique de ressources sémantiques s’appuyant sur les données de Wikipedia. Nous exploitons le graphe des catégories associées aux pages de Wikipedia à partir duquel nous extrayons une hiérarchie de catégories parentes, sémantiquement et thématiquement liées. Cette extraction est le résultat d’une stratégie de plus court chemin appliquée au treillis global des catégories. Chaque page peut ainsi être représentée dans l’espace de ses catégories propres, ainsi que des catégories parentes. Nous montrons la possibilité d’utiliser cette ressource pour deux applications. La première concerne l’indexation et la classification des pages de Wikipedia. La seconde concerne la désambiguïsation dans le cadre d’un traducteur de requêtes français/anglais. Ce dernier travail a été réalisé en exploitant les catégories des pages anglaises.</abstract>
@@ -536,7 +536,7 @@
     <paper id="5">
       <title>Ponctuations fortes abusives</title>
       <author><first>Laurence</first><last>Danlos</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <pages>26–31</pages>
       <abstract>Certaines ponctuations fortes sont « abusivement » utilisées à la place de ponctuations faibles, débouchant sur des phrases graphiques qui ne sont pas des phrases grammaticales. Cet article présente une étude sur corpus de ce phénomène et une ébauche d’outil pour repérer automatiquement les ponctuations fortes abusives.</abstract>
       <url hash="542ad90a">2010.jeptalnrecital-court.5</url>
@@ -545,7 +545,7 @@
     </paper>
     <paper id="6">
       <title>Une étude des questions “complexes” en question-réponse</title>
-      <author><first>Véronique</first><last>Moriceau</last></author>
+      <author id="veronique-moriceau"><first>Véronique</first><last>Moriceau</last></author>
       <author><first>Xavier</first><last>Tannier</last></author>
       <author><first>Mathieu</first><last>Falco</last></author>
       <pages>32–37</pages>
@@ -557,9 +557,9 @@
     <paper id="7">
       <title>Weak Translation Problems – a case study of Scriptural Translation</title>
       <author><first>Muhammad</first><last>Ghulam Abbas Malik</last></author>
-      <author><first>Christian</first><last>Boitet</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <pages>38–43</pages>
       <abstract>General purpose, high quality and fully automatic MT is believed to be impossible. We are interested in scriptural translation problems, which are weak sub-problems of the general problem of translation. We introduce the characteristics of the weak problems of translation and of the scriptural translation problems, describe different computational approaches (finite-state, statistical and hybrid) to solve these problems, and report our results on several combinations of Indo-Pak languages and writing systems.</abstract>
       <url hash="687cfc0d">2010.jeptalnrecital-court.7</url>
@@ -597,7 +597,7 @@
     <paper id="11">
       <title>L’antonymie observée avec des méthodes de <fixed-case>TAL</fixed-case> : une relation à la fois syntagmatique et paradigmatique ?</title>
       <author><first>François</first><last>Morlane-Hondère</last></author>
-      <author><first>Cécile</first><last>Fabre</last></author>
+      <author id="cecile-fabre"><first>Cécile</first><last>Fabre</last></author>
       <pages>62–67</pages>
       <abstract>Cette étude utilise des outils de TAL pour tester l’hypothèse avancée par plusieurs études linguistiques récentes selon laquelle la relation antonymique, classiquement décrite comme une relation paradigmatique, a la particularité de fonctionner également sur le plan syntagmatique, c’est-à-dire de réunir des mots qui sont non seulement substituables mais qui apparaissent également régulièrement dans des relations contextuelles. Nous utilisons deux méthodes – l’analyse distributionnelle pour le plan paradigmatique, la recherche par patrons antonymiques pour le plan syntagmatique. Les résultats montrent que le diagnostic d’antonymie n’est pas significativement meilleur lorsqu’on croise les deux méthodes, puisqu’une partie des antonymes identifiés ne répondent pas au test de substituabilité, ce qui semble confirmer la prépondérance du plan syntagmatique pour l’étude et l’acquisition de cette relation.</abstract>
       <url hash="890809c9">2010.jeptalnrecital-court.11</url>
@@ -646,7 +646,7 @@
       <author><first>Helena</first><last>Blancafort</last></author>
       <author><first>Gaëlle</first><last>Recourcé</last></author>
       <author><first>Javier</first><last>Couto</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <author><first>Rosa</first><last>Stern</last></author>
       <author><first>Denis</first><last>Teyssou</last></author>
       <pages>86–91</pages>
@@ -673,7 +673,7 @@
     <paper id="17">
       <title>Détection hors contexte des émotions à partir du contenu linguistique d’énoncés oraux : le système <fixed-case>E</fixed-case>mo<fixed-case>L</fixed-case>ogus</title>
       <author><first>Marc</first><last>Le Tallec</last></author>
-      <author><first>Jeanne</first><last>Villaneau</last></author>
+      <author id="jeanne-villaneau"><first>Jeanne</first><last>Villaneau</last></author>
       <author><first>Jean-Yves</first><last>Antoine</last></author>
       <author><first>Agata</first><last>Savary</last></author>
       <author><first>Arielle</first><last>Syssau-Vaccarella</last></author>
@@ -727,7 +727,7 @@
     <paper id="22">
       <title><fixed-case>R</fixed-case>ef<fixed-case>G</fixed-case>en : un module d’identification des chaînes de référence dépendant du genre textuel</title>
       <author><first>Laurence</first><last>Longo</last></author>
-      <author><first>Amalia</first><last>Todiraşcu</last></author>
+      <author id="amalia-todirascu"><first>Amalia</first><last>Todiraşcu</last></author>
       <pages>129–134</pages>
       <abstract>Dans cet article, nous présentons RefGen, un module d’identification des chaînes de référence pour le français. RefGen effectue une annotation automatique des expressions référentielles puis identifie les relations de coréférence établies entre ces expressions pour former des chaînes de référence. Le calcul de la référence utilise des propriétés des chaînes de référence dépendantes du genre textuel, l’échelle d’accessibilité d’(Ariel, 1990) et une série de filtres lexicaux, morphosyntaxiques et sémantiques. Nous évaluons les premiers résultats de RefGen sur un corpus issu de rapports publics.</abstract>
       <url hash="2ee50b0e">2010.jeptalnrecital-court.22</url>
@@ -737,7 +737,7 @@
     <paper id="23">
       <title>Détection et résolution d’entités nommées dans des dépêches d’agence</title>
       <author><first>Rosa</first><last>Stern</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <pages>135–140</pages>
       <abstract>Nous présentons NP, un système de reconnaissance d’entités nommées. Comprenant un module de résolution, il permet d’associer à chaque occurrence d’entité le référent qu’elle désigne parmi les entrées d’un référentiel dédié. NP apporte ainsi des informations pertinentes pour l’exploitation de l’extraction d’entités nommées en contexte applicatif. Ce système fait l’objet d’une évaluation grâce au développement d’un corpus annoté manuellement et adapté aux tâches de détection et de résolution.</abstract>
       <url hash="719fc28d">2010.jeptalnrecital-court.23</url>
@@ -747,7 +747,7 @@
     <paper id="24">
       <title>Processus de décision à base de <fixed-case>SVM</fixed-case> pour la composition d’arbres de frames sémantiques</title>
       <author><first>Marie-Jean</first><last>Meurs</last></author>
-      <author><first>Fabrice</first><last>Lefèvre</last></author>
+      <author id="fabrice-lefevre"><first>Fabrice</first><last>Lefèvre</last></author>
       <pages>141–146</pages>
       <abstract>Cet article présente un processus de décision basé sur des classifieurs à vaste marge (SVMDP) pour extraire l’information sémantique dans un système de dialogue oral. Dans notre composant de compréhension, l’information est représentée par des arbres de frames sémantiques définies selon le paradigme FrameNet. Le processus d’interprétation est réalisé en deux étapes. D’abord, des réseaux bayésiens dynamiques (DBN) sont utilisés comme modèles de génération pour inférer des fragments d’arbres de la requête utilisateur. Ensuite, notre SVMDP dépendant du contexte compose ces fragments afin d’obtenir la représentation sémantique globale du message. Les expériences sont menées sur le corpus de dialogue MEDIA. Une procédure semi-automatique fournit une annotation de référence en frames sur laquelle les paramètres des DBN et SVMDP sont appris. Les résultats montrent que la méthode permet d’améliorer les performances d’identification de frames pour les exemples de test les plus complexes par rapport à un processus de décision déterministe ad hoc.</abstract>
       <url hash="c25a31ec">2010.jeptalnrecital-court.24</url>
@@ -768,7 +768,7 @@
     <paper id="26">
       <title>Construction d’un lexique affectif pour le français à partir de <fixed-case>T</fixed-case>witter</title>
       <author><first>Alexander</first><last>Pak</last></author>
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <pages>153–158</pages>
       <abstract>Un lexique affectif est un outil utile pour l’étude des émotions ainsi que pour la fouille d’opinion et l’analyse des sentiments. Un tel lexique contient des listes de mots annotés avec leurs évaluations émotionnelles. Il existe un certain nombre de lexiques affectifs pour la langue anglaise, espagnole, allemande, mais très peu pour le français. Un travail de longue haleine est nécessaire pour construire et enrichir un lexique affectif. Nous proposons d’utiliser Twitter, la plateforme la plus populaire de microblogging de nos jours, pour recueillir un corpus de textes émotionnels en français. En utilisant l’ensemble des données recueillies, nous avons estimé les normes affectives de chaque mot. Nous utilisons les données de la Norme Affective desMots Anglais (ANEW, Affective Norms of EnglishWords) que nous avons traduite en français afin de valider nos résultats. Les valeurs du coefficient tau de Kendall et du coefficient de corrélation de rang de Spearman montrent que nos scores estimés sont en accord avec les scores ANEW.</abstract>
       <url hash="9813f11b">2010.jeptalnrecital-court.26</url>
@@ -778,7 +778,7 @@
     <paper id="27">
       <title>Analyse d’opinion : annotation sémantique de textes chinois</title>
       <author><first>Lei</first><last>Zhang</last></author>
-      <author><first>Stéphane</first><last>Ferrari</last></author>
+      <author id="stephane-ferrari"><first>Stéphane</first><last>Ferrari</last></author>
       <pages>159–164</pages>
       <abstract>Notre travail concerne l’analyse automatique des énoncés d’opinion en chinois. En nous inspirant de la théorie linguistique de l’Appraisal, nous proposons une méthode fondée sur l’usage de lexiques et de règles locales pour déterminer les caractéristiques telles que la Force (intensité), le Focus (prototypicalité) et la polarité de tels énoncés. Nous présentons le modèle et sa mise en oeuvre sur un corpus journalistique. Si pour la détection d’énoncés d’opinion, la précision est bonne (94 %), le taux de rappel (67 %) pose cependant des questions sur l’enrichissement des ressources actuelles.</abstract>
       <url hash="75ef5c83">2010.jeptalnrecital-court.27</url>
@@ -818,7 +818,7 @@
       <title>L’apport d’une approche hybride pour la reconnaissance des entités nommées en langue arabe</title>
       <author><first>Inès</first><last>Zribi</last></author>
       <author><first>Souha</first><last>Mezghani Hammami</last></author>
-      <author><first>Lamia</first><last>Hadrich Belguith</last></author>
+      <author id="lamia-hadrich-belguith"><first>Lamia</first><last>Hadrich Belguith</last></author>
       <pages>183–188</pages>
       <abstract>Dans cet article, nous proposons une méthode hybride pour la reconnaissance des entités nommées pour la langue arabe. Cette méthode profite, d’une part, des avantages de l’utilisation d’une méthode d’apprentissage pour extraire des règles permettant l’identification et la classification des entités nommées. D’autre part, elle repose sur un ensemble de règles extraites manuellement pour corriger et améliorer le résultat de la méthode d’apprentissage. Les résultats de l’évaluation de la méthode proposée sont encourageants. Nous avons obtenu un taux global de F-mesure égal à 79.24%.</abstract>
       <url hash="9974595c">2010.jeptalnrecital-court.31</url>
@@ -858,7 +858,7 @@
     </paper>
     <paper id="35">
       <title>Traduction de requêtes basée sur Wikipédia</title>
-      <author><first>Benoît</first><last>Gaillard</last></author>
+      <author id="benoit-gaillard"><first>Benoît</first><last>Gaillard</last></author>
       <author><first>Olivier</first><last>Collin</last></author>
       <author><first>Malek</first><last>Boualem</last></author>
       <pages>207–212</pages>
@@ -881,7 +881,7 @@
   <volume id="demonstration" ingest-date="2021-02-05" type="proceedings">
     <meta>
       <booktitle>Actes de la 17e conférence sur le Traitement Automatique des Langues Naturelles. Démonstrations</booktitle>
-      <editor><first>Philippe</first><last>Langlais</last></editor>
+      <editor id="philippe-langlais"><first>Philippe</first><last>Langlais</last></editor>
       <editor><first>Michel</first><last>Gagnon</last></editor>
       <publisher>ATALA</publisher>
       <address>Montréal, Canada</address>
@@ -905,7 +905,7 @@
     </paper>
     <paper id="2">
       <title><fixed-case>T</fixed-case>ermino<fixed-case>W</fixed-case>eb : recherche et analyse d’information thématique</title>
-      <author><first>Caroline</first><last>Barrière</last></author>
+      <author id="caroline-barriere"><first>Caroline</first><last>Barrière</last></author>
       <pages>4–7</pages>
       <abstract>Notre démonstration porte sur le prototype TerminoWeb, une plateforme Web qui permet (1) la construction automatique d’un corpus thématique à partir d’une recherche de documents sur le Web, (2) l’extraction de termes du corpus, et (3) la recherche d’information définitionnelle sur ces termes en corpus. La plateforme intégrant les trois modules, elle aidera un langagier (terminologue, traducteur, rédacteur) à découvrir un nouveau domaine (thème) en facilitant la recherche et l’analyse de documents informatifs pertinents à ce domaine.</abstract>
       <url hash="8c2e9232">2010.jeptalnrecital-demonstration.2</url>
@@ -914,10 +914,10 @@
     </paper>
     <paper id="3">
       <title>The i<fixed-case>MAG</fixed-case> concept: multilingual access gateway to an elected Web sites with incremental quality increase through collaborative post-edition of <fixed-case>MT</fixed-case> pretranslations</title>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <author><first>Cong</first><last>Phap Huynh</last></author>
       <author><first>Hong</first><last>Thai Nguyen</last></author>
-      <author><first>Valérie</first><last>Bellynck</last></author>
+      <author id="valerie-bellynck"><first>Valérie</first><last>Bellynck</last></author>
       <pages>8–15</pages>
       <abstract>We will demonstrate iMAGs (interactive Multilingual Access Gateways), in particular on a scientific laboratory web site and on the Greater Grenoble (La Métro) web site.</abstract>
       <url hash="2515b875">2010.jeptalnrecital-demonstration.3</url>
@@ -939,7 +939,7 @@
       <author><first>Jean-Philippe</first><last>Goldman</last></author>
       <author><first>Sophie</first><last>Roekhaut</last></author>
       <author><first>Anne</first><last>Catherine Simon</last></author>
-      <author><first>Cédrick</first><last>Fairon</last></author>
+      <author id="cedrick-fairon"><first>Cédrick</first><last>Fairon</last></author>
       <author><first>Richard</first><last>Beaufort</last></author>
       <pages>20–23</pages>
       <abstract>Nous présentons Expressive, un système de génération de parole expressive à partir de données non linguistiques. Ce système est composé de deux outils distincts : Taittingen, un générateur automatique de textes d’une grande variété lexico-syntaxique produits à partir d’une représentation conceptuelle du discours, et StyloPhone, un système de synthèse vocale multi-styles qui s’attache à rendre le discours produit attractif et naturel en proposant différents styles vocaux.</abstract>
@@ -959,9 +959,9 @@
     </paper>
     <paper id="7">
       <title>Traitement automatique des langues des signes : le projet <fixed-case>D</fixed-case>icta-<fixed-case>S</fixed-case>ign, des corpus aux applications</title>
-      <author><first>Annelies</first><last>Braffort</last></author>
+      <author id="annelies-braffort"><first>Annelies</first><last>Braffort</last></author>
       <author><first>Michael</first><last>Filhol</last></author>
-      <author><first>Jérémie</first><last>Segouat</last></author>
+      <author id="jeremie-segouat"><first>Jérémie</first><last>Segouat</last></author>
       <pages>28–31</pages>
       <abstract>Cet article présente Dicta-Sign, un projet de recherche sur le traitement automatique des langues des signes (LS), qui aborde un grand nombre de questions de recherche : linguistique de corpus, modélisation linguistique, reconnaissance et génération automatique. L’objectif de ce projet est de réaliser trois applications prototypes destinées aux usagers sourds : un traducteur de termes de LS à LS, un outil de recherche par l’exemple et un Wiki en LS. Pour cela, quatre corpus comparables de cinq heures de dialogue seront produits et analysés. De plus, des avancées significatives sont attendues dans le domaine des outils d’annotation. Dans ce projet, le LIMSI est en charge de l’élaboration des modèles linguistiques et participe aux aspects corpus et génération automatique. Nous nous proposons d’illustrer l’état d’avancement de Dicta-Sign au travers de vidéos extraites du corpus et de démonstrations des outils de traitement et de génération d’animations de signeur virtuel.</abstract>
       <url hash="1d23400e">2010.jeptalnrecital-demonstration.7</url>
@@ -992,7 +992,7 @@
       <title>Text-it /Voice-it Une application mobile de normalisation des <fixed-case>SMS</fixed-case></title>
       <author><first>Richard</first><last>Beaufort</last></author>
       <author><first>Kévin</first><last>Macé</last></author>
-      <author><first>Cédrick</first><last>Fairon</last></author>
+      <author id="cedrick-fairon"><first>Cédrick</first><last>Fairon</last></author>
       <pages>40–43</pages>
       <abstract>Cet article présente Text-it / Voice-it, une application de normalisation des SMS pour téléphone mobile. L’application permet d’envoyer et de recevoir des SMS normalisés, et offre le choix entre un résultat textuel (Text-it) et vocal (Voice-it).</abstract>
       <url hash="72bc59e4">2010.jeptalnrecital-demonstration.10</url>
@@ -1010,7 +1010,7 @@
     <paper id="12">
       <title><fixed-case>M</fixed-case>e<fixed-case>TAE</fixed-case> : Plate-forme d’annotation automatique et d’exploration sémantiques pour le domaine médical</title>
       <author><first>Asma</first><last>Ben Abacha</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <pages>48–51</pages>
       <abstract>Nous présentons une plate-forme d’annotation sémantique et d’exploration de textes médicaux, appelée « MeTAE ». Le processus d’annotation automatique comporte une première étape de reconnaissance des entités médicales présentes dans les textes suivie d’une étape d’identification des relations sémantiques qui les relient. Cette identification se fonde sur des patrons linguistiques construits manuellement pour chaque type de relation. MeTAE génère des annotations RDF à partir des informations extraites et offre une interface d’exploration des textes annotés avec des requêtes sous forme de formulaire. La plate-forme peut être utilisée pour analyser sémantiquement les textes médicaux ou interroger la base d’annotation disponible pour avoir une/des réponses à une requête donnée (e.g. « ?X prévient maladie d’Alzheimer », équivalent à la question « comment prévenir la maladie d’Alzheimer ? »). Cette application peut être la base d’un système de questions-réponses pour le domaine médical.</abstract>
       <url hash="377a963b">2010.jeptalnrecital-demonstration.12</url>
@@ -1042,7 +1042,7 @@
     </paper>
     <paper id="15">
       <title>Moz: Translation of Structured Terminology-Rich Text</title>
-      <author><first>Graham</first><last>Russell</last></author>
+      <author id="graham-russell"><first>Graham</first><last>Russell</last></author>
       <pages>60–63</pages>
       <abstract>Description of Moz, a translation support system designed for texts exhibiting a high proportion of structured and semi-structured terminological content. The system comprises a web-based collaborative translation memory, with high recall via subsentential linguistic analysis and facilities for messaging and quality assurance. It is in production use, translating some 140,000 words per week.</abstract>
       <url hash="2024c154">2010.jeptalnrecital-demonstration.15</url>
@@ -1051,10 +1051,10 @@
     <paper id="16">
       <title><fixed-case>MACAON</fixed-case> Une chaîne linguistique pour le traitement de graphes de mots</title>
       <author><first>Alexis</first><last>Nasr</last></author>
-      <author><first>Frédéric</first><last>Béchet</last></author>
+      <author id="frederic-bechet"><first>Frédéric</first><last>Béchet</last></author>
       <author><first>Jean-François</first><last>Rey</last></author>
       <pages>64–67</pages>
-      <abstract/>
+      <abstract></abstract>
       <url hash="e3678c0b">2010.jeptalnrecital-demonstration.16</url>
       <language>fra</language>
       <bibkey>nasr-etal-2010-macaon</bibkey>
@@ -1064,7 +1064,7 @@
     <meta>
       <booktitle>Actes de la 17e conférence sur le Traitement Automatique des Langues Naturelles. REncontres jeunes Chercheurs en Informatique pour le Traitement Automatique des Langues</booktitle>
       <editor><first>Alexandre</first><last>Patry</last></editor>
-      <editor><first>Philippe</first><last>Langlais</last></editor>
+      <editor id="philippe-langlais"><first>Philippe</first><last>Langlais</last></editor>
       <editor><first>Aurélien</first><last>Max</last></editor>
       <publisher>ATALA</publisher>
       <address>Montréal, Canada</address>
diff --git a/data/xml/2010.tal.xml b/data/xml/2010.tal.xml
index 28021d076e..03e6588924 100644
--- a/data/xml/2010.tal.xml
+++ b/data/xml/2010.tal.xml
@@ -86,7 +86,7 @@
     <paper id="3">
       <title><fixed-case>PARADOCS</fixed-case> : l’entremetteur de documents parallèles indépendant de la langue [<fixed-case>PARADOCS</fixed-case>: A Language Independant Go-Between for Mating Parallel Documents]</title>
       <author><first>Alexandre</first><last>Patry</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <pages>41–63</pages>
       <url hash="4b87b207">2010.tal-2.3</url>
       <language>fra</language>
@@ -94,10 +94,10 @@
     </paper>
     <paper id="4">
       <title>Micro-adaptation lexicale en traduction automatique statistique [Lexical Micro-adaptation in Statistical Machine Translation]</title>
-      <author><first>Josep Maria</first><last>Crego</last></author>
+      <author id="josep-m-crego"><first>Josep Maria</first><last>Crego</last></author>
       <author><first>Gregor</first><last>Leusch</last></author>
       <author><first>Aurélien</first><last>Max</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <author><first>François</first><last>Yvon</last></author>
       <pages>65–93</pages>
       <url hash="93d85320">2010.tal-2.4</url>
@@ -106,7 +106,7 @@
     </paper>
     <paper id="5">
       <title>Transliteration as Alignment vs. Transliteration as Generation for Crosslingual Information Retrieval</title>
-      <author><first>Anil Kumar</first><last>Singh</last></author>
+      <author id="anil-kumar-singh"><first>Anil Kumar</first><last>Singh</last></author>
       <author><first>Sethuramalingam</first><last>Subramaniam</last></author>
       <author><first>Taraka</first><last>Rama</last></author>
       <pages>95–117</pages>
@@ -130,7 +130,7 @@
       <title>Préface [Introduction]</title>
       <author><first>Agata</first><last>Jackiewicz</last></author>
       <author><first>Susan</first><last>Hunston</last></author>
-      <author><first>Marc</first><last>El-Bèze</last></author>
+      <author id="marc-el-beze"><first>Marc</first><last>El-Bèze</last></author>
       <pages>7–17</pages>
       <url hash="6b3e6481">2010.tal-3.1</url>
       <language>fra</language>
@@ -159,7 +159,7 @@
     <paper id="4">
       <title>Le microblogging pour la micro analyse des sentiments et des opinons [Microblogging for Micro Sentiment Analysis and Opinion Mining]</title>
       <author><first>Alexander</first><last>Pak</last></author>
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <pages>75–100</pages>
       <url hash="2cd1bb08">2010.tal-3.4</url>
       <language>fra</language>
@@ -169,7 +169,7 @@
       <title>Extraction probabiliste de chaînes de mots relatives à une opinion [A probabilistic approach for extracting opinion-related word chains from texts]</title>
       <author><first>Remi</first><last>Lavalley</last></author>
       <author><first>Chloe</first><last>Clavel</last></author>
-      <author><first>Patrice</first><last>Bellot</last></author>
+      <author id="patrice-bellot"><first>Patrice</first><last>Bellot</last></author>
       <pages>101–130</pages>
       <url hash="014271cf">2010.tal-3.5</url>
       <language>fra</language>
diff --git a/data/xml/2010.tc.xml b/data/xml/2010.tc.xml
index 353e3769fa..4411468c14 100644
--- a/data/xml/2010.tc.xml
+++ b/data/xml/2010.tc.xml
@@ -61,7 +61,7 @@
     <paper id="8">
       <title>Next generation translation and localization: users are taking charge</title>
       <author><first>Sharon</first><last>O’Brien</last></author>
-      <author><first>Reinhard</first><last>Schäler</last></author>
+      <author id="reinhard-schaler"><first>Reinhard</first><last>Schäler</last></author>
       <url hash="e875eecf">2010.tc-1.8</url>
       <bibkey>obrien-schaler-2010-next</bibkey>
     </paper>
@@ -102,7 +102,7 @@
     <paper id="14">
       <title>A hybrid word alignment approach to improve translation lexicons with compound words and idiomatic expressions</title>
       <author><first>Nasredine</first><last>Semmar</last></author>
-      <author><first>Christophe</first><last>Servan</last></author>
+      <author id="christophe-servan"><first>Christophe</first><last>Servan</last></author>
       <author><first>Gaël</first><last>de Chalendar</last></author>
       <author><first>Benoît</first><last>Le Ny</last></author>
       <author><first>Jean-Jacques</first><last>Bouzaglou</last></author>
diff --git a/data/xml/2011.eamt.xml b/data/xml/2011.eamt.xml
index bf1f7cab8e..1294016140 100644
--- a/data/xml/2011.eamt.xml
+++ b/data/xml/2011.eamt.xml
@@ -7,7 +7,7 @@
       <address>Leuven, Belgium</address>
       <month>May 30–31</month>
       <year>2011</year>
-      <editor><first>Mikel L.</first><last>Forcada</last></editor>
+      <editor id="mikel-l-forcada"><first>Mikel L.</first><last>Forcada</last></editor>
       <editor><first>Heidi</first><last>Depraetere</last></editor>
       <editor><first>Vincent</first><last>Vandeghinste</last></editor>
       <venue>eamt</venue>
@@ -44,7 +44,7 @@
       <title>A Comparative Evaluation of Research vs. Online <fixed-case>MT</fixed-case> Systems</title>
       <author><first>Antonio</first><last>Toral</last></author>
       <author><first>Federico</first><last>Gaspari</last></author>
-      <author><first>Sudip Kumar</first><last>Naskar</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <url hash="05a6e01e">2011.eamt-1.4</url>
       <attachment type="presentation" hash="42f101fc">2011.eamt-1.4.Presentation.pdf</attachment>
@@ -52,7 +52,7 @@
     </paper>
     <paper id="5">
       <title>Experiments on Domain Adaptation for Patent Machine Translation in the <fixed-case>PL</fixed-case>u<fixed-case>TO</fixed-case> project</title>
-      <author><first>Alexandru</first><last>Ceauşu</last></author>
+      <author id="alexandru-ceausu"><first>Alexandru</first><last>Ceauşu</last></author>
       <author><first>John</first><last>Tinsley</last></author>
       <author><first>Jian</first><last>Zhang</last></author>
       <author><first>Andy</first><last>Way</last></author>
@@ -72,8 +72,8 @@
       <title>Evaluation of <fixed-case>SMT</fixed-case> in localization to under-resourced inflected language</title>
       <author><first>Raivis</first><last>Skadiņš</last></author>
       <author><first>Maris</first><last>Puriņš</last></author>
-      <author><first>Inguna</first><last>Skadiņa</last></author>
-      <author><first>Andrejs</first><last>Vasiļjevs</last></author>
+      <author id="inguna-skadina"><first>Inguna</first><last>Skadiņa</last></author>
+      <author id="andrejs-vasiljevs"><first>Andrejs</first><last>Vasiļjevs</last></author>
       <url hash="99f041fc">2011.eamt-1.7</url>
       <attachment type="presentation" hash="de98b8e7">2011.eamt-1.7.Presentation.pdf</attachment>
       <bibkey>skadins-etal-2011-evaluation</bibkey>
@@ -117,7 +117,7 @@
     </paper>
     <paper id="13">
       <title>Using word alignments to assist computer-aided translation users by marking which target-side words to change or keep unedited</title>
-      <author><first>Miquel</first><last>Esplà</last></author>
+      <author id="miquel-espla-gomis"><first>Miquel</first><last>Esplà</last></author>
       <author><first>Felipe</first><last>Sánchez-Martínez</last></author>
       <author><first>Mikel L.</first><last>Forcada</last></author>
       <url hash="21894f0c">2011.eamt-1.13</url>
@@ -140,62 +140,62 @@
     </paper>
     <paper id="16">
       <title>Searching Parallel Corpora for Contextually Equivalent Terms</title>
-      <author><first>Caroline</first><last>Barrière</last></author>
+      <author id="caroline-barriere"><first>Caroline</first><last>Barrière</last></author>
       <author><first>Pierre</first><last>Isabelle</last></author>
       <url hash="8ac927f2">2011.eamt-1.16</url>
       <bibkey>barriere-isabelle-2011-searching</bibkey>
     </paper>
     <paper id="17">
       <title>Rule-based Reordering Constraints for Phrase-based <fixed-case>SMT</fixed-case></title>
-      <author><first>Chooi-Ling</first><last>Goh</last></author>
-      <author><first>Takashi</first><last>Onishi</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="chooi-ling-goh"><first>Chooi-Ling</first><last>Goh</last></author>
+      <author id="takashi-onishi"><first>Takashi</first><last>Onishi</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <url hash="47cb5598">2011.eamt-1.17</url>
       <bibkey>goh-etal-2011-rule</bibkey>
     </paper>
     <paper id="18">
       <title>Deriving translation units using small additional corpora</title>
       <author><first>Carlos A. Henríquez</first><last>Q.</last></author>
-      <author><first>José B.</first><last>Mariño</last></author>
-      <author><first>Rafael E.</first><last>Banchs</last></author>
+      <author id="jose-b-marino"><first>José B.</first><last>Mariño</last></author>
+      <author id="rafael-e-banchs"><first>Rafael E.</first><last>Banchs</last></author>
       <url hash="185bd8da">2011.eamt-1.18</url>
       <bibkey>q-etal-2011-deriving</bibkey>
     </paper>
     <paper id="19">
       <title><fixed-case>BP</fixed-case>2<fixed-case>EP</fixed-case> - Adaptation of <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese texts to <fixed-case>E</fixed-case>uropean <fixed-case>P</fixed-case>ortuguese</title>
-      <author><first>Luis</first><last>Marujo</last></author>
+      <author id="luis-marujo"><first>Luis</first><last>Marujo</last></author>
       <author><first>Nuno</first><last>Grazina</last></author>
       <author><first>Tiago</first><last>Luis</last></author>
       <author><first>Wang</first><last>Ling</last></author>
-      <author><first>Luisa</first><last>Coheur</last></author>
+      <author id="luisa-coheur"><first>Luisa</first><last>Coheur</last></author>
       <author><first>Isabel</first><last>Trancoso</last></author>
       <url hash="583b1f8a">2011.eamt-1.19</url>
       <bibkey>marujo-etal-2011-bp2ep</bibkey>
     </paper>
     <paper id="20">
       <title>Deriving translation units using small additional corpora</title>
-      <author><first>Carlos A.</first><last>Henríquez Q.</last></author>
-      <author><first>José B.</first><last>Mariño</last></author>
-      <author><first>Rafael E.</first><last>Banchs</last></author>
+      <author id="carlos-henriquez"><first>Carlos A.</first><last>Henríquez Q.</last></author>
+      <author id="jose-b-marino"><first>José B.</first><last>Mariño</last></author>
+      <author id="rafael-e-banchs"><first>Rafael E.</first><last>Banchs</last></author>
       <bibkey>henriquez-q-etal-2011-deriving</bibkey>
     </paper>
     <paper id="21">
       <title>Cognate Identification for a <fixed-case>F</fixed-case>rench - <fixed-case>R</fixed-case>omanian Lexical Alignment System: Empirical Study</title>
       <author><first>Mirabela</first><last>Navlea</last></author>
-      <author><first>Amalia</first><last>Todiraşcu</last></author>
+      <author id="amalia-todirascu"><first>Amalia</first><last>Todiraşcu</last></author>
       <url hash="c84e5881">2011.eamt-1.21</url>
       <bibkey>navlea-todirascu-2011-cognate</bibkey>
     </paper>
     <paper id="22">
       <title>Rapid rule-based machine translation between <fixed-case>D</fixed-case>utch and <fixed-case>A</fixed-case>frikaans</title>
       <author><first>Pim</first><last>Otte</last></author>
-      <author><first>Francis M.</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis M.</first><last>Tyers</last></author>
       <url hash="f684e936">2011.eamt-1.22</url>
       <bibkey>otte-tyers-2011-rapid</bibkey>
     </paper>
     <paper id="23">
       <title>Preliminary Experiments on Using Users’ Post-Editions to Enhance a <fixed-case>SMT</fixed-case> System Oracle-based Training for Phrase-based Statistical Machine Translation</title>
-      <author><first>Ankit</first><last>Srivastava</last></author>
+      <author id="ankit-srivastava"><first>Ankit</first><last>Srivastava</last></author>
       <author><first>Yanjun</first><last>Ma</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <url hash="43b8e90b">2011.eamt-1.23</url>
@@ -205,8 +205,8 @@
       <title>Oracle-based Training for Phrase-based Statistical Machine Translation</title>
       <author><first>Marion</first><last>Potet</last></author>
       <author><first>Emmanuelle</first><last>Esperança-Rodier</last></author>
-      <author><first>Hervé</first><last>Blanchon</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="herve-blanchon"><first>Hervé</first><last>Blanchon</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <url hash="4833e7a5">2011.eamt-1.24</url>
       <bibkey>potet-etal-2011-oracle</bibkey>
     </paper>
@@ -235,7 +235,7 @@
     </paper>
     <paper id="28">
       <title>Using Example-Based <fixed-case>MT</fixed-case> to Support Statistical <fixed-case>MT</fixed-case> when Translating Homogeneous Data in a Resource-Poor Setting</title>
-      <author><first>Sandipan</first><last>Dandapat</last></author>
+      <author id="sandipan-dandapat"><first>Sandipan</first><last>Dandapat</last></author>
       <author><first>Sara</first><last>Morrissey</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <author><first>Mikel L.</first><last>Forcada</last></author>
@@ -248,7 +248,7 @@
       <author><first>Sarah</first><last>Ebling</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <author><first>Martin</first><last>Volk</last></author>
-      <author><first>Sudip Kumar</first><last>Naskar</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last></author>
       <url hash="d898e7c0">2011.eamt-1.29</url>
       <attachment type="presentation" hash="f57b6d3f">2011.eamt-1.29.Presentation.pdf</attachment>
       <bibkey>ebling-etal-2011-combining</bibkey>
@@ -258,7 +258,7 @@
       <author><first>Martha Dís</first><last>Brandt</last></author>
       <author><first>Hrafh</first><last>Loftsson</last></author>
       <author><first>Hlynur</first><last>Sigurþórsson</last></author>
-      <author><first>Francis M.</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis M.</first><last>Tyers</last></author>
       <url hash="09f39293">2011.eamt-1.30</url>
       <attachment type="presentation" hash="ce6991f3">2011.eamt-1.30.Presentation.pdf</attachment>
       <bibkey>brandt-etal-2011-apertium</bibkey>
@@ -282,7 +282,7 @@
     </paper>
     <paper id="33">
       <title>Minimum Error Rate Training Semiring</title>
-      <author><first>Artem</first><last>Sokolov</last></author>
+      <author id="artem-sokolov"><first>Artem</first><last>Sokolov</last></author>
       <author><first>François</first><last>Yvon</last></author>
       <url hash="f5fb89e6">2011.eamt-1.33</url>
       <attachment type="presentation" hash="c196f1eb">2011.eamt-1.33.Presentation.pdf</attachment>
@@ -299,16 +299,16 @@
     </paper>
     <paper id="35">
       <title>Bilingual segmentation for phrasetable pruning in Statistical Machine Translation</title>
-      <author><first>Germán</first><last>Sanchis-Trilles</last></author>
-      <author><first>Daniel</first><last>Ortiz-Martínez</last></author>
-      <author><first>Jesús</first><last>González-Rubio</last></author>
+      <author id="german-sanchis-trilles"><first>Germán</first><last>Sanchis-Trilles</last></author>
+      <author id="daniel-ortiz-martinez"><first>Daniel</first><last>Ortiz-Martínez</last></author>
+      <author id="jesus-gonzalez-rubio"><first>Jesús</first><last>González-Rubio</last></author>
       <author><first>Jorge</first><last>González</last></author>
       <url hash="ec1daa3c">2011.eamt-1.35</url>
       <bibkey>sanchis-trilles-etal-2011-bilingual</bibkey>
     </paper>
     <paper id="36">
       <title>From Human to Automatic Error Classification for Machine Translation Output</title>
-      <author><first>Maja</first><last>Popović</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
       <author><first>Aljoscha</first><last>Burchardt</last></author>
       <url hash="55f8f980">2011.eamt-1.36</url>
       <attachment type="presentation" hash="a804f898">2011.eamt-1.36.Presentation.pdf</attachment>
@@ -319,7 +319,7 @@
       <author><first>Matthias</first><last>Huck</last></author>
       <author><first>David</first><last>Vilar</last></author>
       <author><first>Daniel</first><last>Stein</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <url hash="eb7fc7fd">2011.eamt-1.37</url>
       <attachment type="presentation" hash="6ec5acc6">2011.eamt-1.37.Presentation.pdf</attachment>
       <bibkey>huck-etal-2011-advancements</bibkey>
diff --git a/data/xml/2011.freeopmt.xml b/data/xml/2011.freeopmt.xml
index b0b581a7cb..24c8cbd53a 100644
--- a/data/xml/2011.freeopmt.xml
+++ b/data/xml/2011.freeopmt.xml
@@ -23,7 +23,7 @@
     </paper>
     <paper id="2">
       <title><fixed-case>F</fixed-case>ree<fixed-case>L</fixed-case>ing: open-source natural language processing for research and development</title>
-      <author><first>Lluís</first><last>Padró</last></author>
+      <author id="lluis-padro"><first>Lluís</first><last>Padró</last></author>
       <pages>2</pages>
       <url hash="cebfd7d3">2011.freeopmt-1.2</url>
       <bibkey>padro-2011-freeling</bibkey>
@@ -50,9 +50,9 @@
     </paper>
     <paper id="5">
       <title>Bootstrapping a statistical speech translator from a rule-based one</title>
-      <author><first>Manny</first><last>Rayner</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
       <author><first>Paula</first><last>Estrella</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <pages>21-28</pages>
       <url hash="f77c6cad">2011.freeopmt-1.5</url>
       <bibkey>rayner-etal-2011-bootstrapping</bibkey>
@@ -97,7 +97,7 @@
     <paper id="10">
       <title>A widely used machine translation service and its migration to a free/open-source solution: the case of Softcatalà</title>
       <author><first>Xavier</first><last>Ivars-Ribes</last></author>
-      <author><first>Victor M.</first><last>Sánchez-Cartagena</last></author>
+      <author id="victor-m-sanchez-cartagena"><first>Victor M.</first><last>Sánchez-Cartagena</last></author>
       <pages>61-68</pages>
       <url hash="2c29bb42">2011.freeopmt-1.10</url>
       <abstract>Softcatala` is a non-profit association created more than 10 years ago to fight the marginalisation of the Catalan language in information and communication technologies. It has led the localisation of many applications and the creation of a website which allows its users to translate texts between Spanish and Catalan using an external closedsource translation engine. Recently, the closed-source translation back-end has been replaced by a free/open-source solution completely managed by Softcatala`: the Apertium machine translation platform and the ScaleMT web service framework. Thanks to the openness of the new solution, it is possible to take advantage of the huge amount of users of the Softcatala` translation service to improve it, using a series of methods presented in this paper. In addition, a study of the translations requested by the users has been carried out, and it shows that the translation back-end change has not affected the usage patterns.</abstract>
@@ -115,8 +115,8 @@
     <paper id="12">
       <title>An <fixed-case>I</fixed-case>talian to <fixed-case>C</fixed-case>atalan <fixed-case>RBMT</fixed-case> system reusing data from existing language pairs</title>
       <author><first>Antonio</first><last>Toral</last></author>
-      <author><first>Mireia</first><last>Ginestí-Rosell</last></author>
-      <author><first>Francis</first><last>Tyers</last></author>
+      <author id="mireia-ginesti-rosell"><first>Mireia</first><last>Ginestí-Rosell</last></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last></author>
       <pages>77-81</pages>
       <url hash="1da94f01">2011.freeopmt-1.12</url>
       <abstract>This paper presents an Italian→Catalan RBMT system automatically built by combining the linguistic data of the existing pairs Spanish–Catalan and Spanish–Italian. A lightweight manual postprocessing is carried out in order to fix inconsistencies in the automatically derived dictionaries and to add very frequent words that are missing according to a corpus analysis. The system is evaluated on the KDE4 corpus and outperforms Google Translate by approximately ten absolute points in terms of both TER and GTM.</abstract>
diff --git a/data/xml/2011.iwslt.xml b/data/xml/2011.iwslt.xml
index 60f9f783c0..0dc5e4e79b 100644
--- a/data/xml/2011.iwslt.xml
+++ b/data/xml/2011.iwslt.xml
@@ -14,7 +14,7 @@
     </meta>
     <paper id="1">
       <title>Data-intensive approaches for <fixed-case>ASR</fixed-case></title>
-      <author><first>Sadaoki</first><last>Furui</last></author>
+      <author id="sadaoki-furui"><first>Sadaoki</first><last>Furui</last></author>
       <url hash="0489e794">2011.iwslt-keynotes.1</url>
       <bibkey>furui-2011-data</bibkey>
     </paper>
@@ -26,7 +26,7 @@
     </paper>
     <paper id="3">
       <title>Resource-rich research on natural language processing and understanding</title>
-      <author><first>Junichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Junichi</first><last>Tsujii</last></author>
       <url hash="0489e794">2011.iwslt-keynotes.3</url>
       <bibkey>tsujii-2011-resource</bibkey>
     </paper>
@@ -47,8 +47,8 @@
       <title>Overview of the <fixed-case>IWSLT</fixed-case> 2011 evaluation campaign</title>
       <author><first>Marcello</first><last>Federico</last></author>
       <author><first>Luisa</first><last>Bentivogli</last></author>
-      <author><first>Michael</first><last>Paul</last></author>
-      <author><first>Sebastian</first><last>Stüker</last></author>
+      <author id="michael-paul"><first>Michael</first><last>Paul</last></author>
+      <author id="sebastian-stuker"><first>Sebastian</first><last>Stüker</last></author>
       <pages>11-27</pages>
       <url hash="113d55e2">2011.iwslt-evaluation.1</url>
       <abstract>We report here on the eighth Evaluation Campaign organized by the IWSLT workshop. This year, the IWSLT evaluation focused on the automatic translation of public talks and included tracks for speech recognition, speech translation, text translation, and system combination. Unlike previous years, all data supplied for the evaluation has been publicly released on the workshop website, and is at the disposal of researchers interested in working on our benchmarks and in comparing their results with those published at the workshop. This paper provides an overview of the IWSLT 2011 Evaluation Campaign, which includes: descriptions of the supplied data and evaluation specifications of each track, the list of participants specifying their submitted runs, a detailed description of the subjective evaluation carried out, the main findings of each exercise drawn from the results and the system descriptions prepared by the participants, and, finally, several detailed tables reporting all the evaluation results.</abstract>
@@ -59,10 +59,10 @@
       <author><first>Kazuhiko</first><last>Abe</last></author>
       <author><first>Youzheng</first><last>Wu</last></author>
       <author><first>Chien-lin</first><last>Huang</last></author>
-      <author><first>Paul R.</first><last>Dixon</last></author>
+      <author id="paul-dixon"><first>Paul R.</first><last>Dixon</last></author>
       <author><first>Shigeki</first><last>Matsuda</last></author>
       <author><first>Chiori</first><last>Hori</last></author>
-      <author><first>Hideki</first><last>Kashioka</last></author>
+      <author id="hideki-kashioka"><first>Hideki</first><last>Kashioka</last></author>
       <pages>28-33</pages>
       <url hash="9148faa7">2011.iwslt-evaluation.2</url>
       <abstract>In this paper, we describe NICT’s participation in the IWSLT 2011 evaluation campaign for the ASR Track. To recognize spontaneous speech, we prepared an acoustic model trained by more spontaneous speech corpora and a language model constructed with text corpora distributed by the organizer. We built the multi-pass ASR system by adapting the acoustic and language models with previous ASR results. The target speech was selected from talks on the TED (Technology, Entertainment, Design) program. Here, a large reduction in word error rate was obtained by the speaker adaptation of the acoustic model with MLLR. Additional improvement was achieved not only by adaptation of the language model but also by parallel usage of the baseline and speaker-dependent acoustic models. Accordingly, the final WER was reduced by 30% from the baseline ASR for the distributed test set.</abstract>
@@ -71,7 +71,7 @@
     <paper id="3">
       <title>The <fixed-case>MIT</fixed-case>-<fixed-case>LL</fixed-case>/<fixed-case>AFRL</fixed-case> <fixed-case>IWSLT</fixed-case>-2011 <fixed-case>MT</fixed-case> system</title>
       <author><first>A. Ryan</first><last>Aminzadeh</last></author>
-      <author><first>Tim</first><last>Anderson</last></author>
+      <author id="tim-anderson"><first>Tim</first><last>Anderson</last></author>
       <author><first>Ray</first><last>Slyh</last></author>
       <author><first>Brian</first><last>Ore</last></author>
       <author><first>Eric</first><last>Hansen</last></author>
@@ -87,11 +87,11 @@
       <title>The <fixed-case>DCU</fixed-case> machine translation systems for <fixed-case>IWSLT</fixed-case> 2011</title>
       <author><first>Pratyush</first><last>Banerjee</last></author>
       <author><first>Hala</first><last>Almaghout</last></author>
-      <author><first>Sudip</first><last>Naskar</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip</first><last>Naskar</last></author>
       <author><first>Johann</first><last>Roturier</last></author>
       <author><first>Jie</first><last>Jiang</last></author>
       <author><first>Andy</first><last>Way</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>41-48</pages>
       <url hash="d878f84c">2011.iwslt-evaluation.4</url>
       <abstract>In this paper, we provide a description of the Dublin City University’s (DCU) submissions in the IWSLT 2011 evaluationcampaign.1 WeparticipatedintheArabic-Englishand Chinese-English Machine Translation(MT) track translation tasks. We use phrase-based statistical machine translation (PBSMT) models to create the baseline system. Due to the open-domain nature of the data to be translated, we use domain adaptation techniques to improve the quality of translation. Furthermore, we explore target-side syntactic augmentation for an Hierarchical Phrase-Based (HPB) SMT model. Combinatory Categorial Grammar (CCG) is used to extract labels for target-side phrases and non-terminals in the HPB system. Combining the domain adapted language models with the CCG-augmented HPB system gave us the best translations for both language pairs providing statistically significant improvements of 6.09 absolute BLEU points (25.94% relative) and 1.69 absolute BLEU points (15.89% relative) over the unadapted PBSMT baselines for the Arabic-English and Chinese-English language pairs, respectively.</abstract>
@@ -100,9 +100,9 @@
     <paper id="5">
       <title>The <fixed-case>NICT</fixed-case> translation system for <fixed-case>IWSLT</fixed-case> 2011</title>
       <author><first>Andrew</first><last>Finch</last></author>
-      <author><first>Chooi-Ling</first><last>Goh</last></author>
+      <author id="chooi-ling-goh"><first>Chooi-Ling</first><last>Goh</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>49-56</pages>
       <url hash="938d69c8">2011.iwslt-evaluation.5</url>
       <abstract>This paper describes NICT’s participation in the IWSLT 2011 evaluation campaign for the TED speech translation ChineseEnglish shared-task. Our approach was based on a phrasebased statistical machine translation system that was augmented in two ways. Firstly we introduced rule-based re-ordering constraints on the decoding. This consisted of a set of rules that were used to segment the input utterances into segments that could be decoded almost independently. This idea here being that constraining the decoding process in this manner would greatly reduce the search space of the decoder, and cut out many possibilities for error while at the same time allowing for a correct output to be generated. The rules we used exploit punctuation and spacing in the input utterances, and we use these positions to delimit our segments. Not all punctuation/spacing positions were used as segment boundaries, and the set of used positions were determined by a set of linguistically-based heuristics. Secondly we used two heterogeneous methods to build the translation model, and lexical reordering model for our systems. The first method employed the popular method of using GIZA++ for alignment in combination with phraseextraction heuristics. The second method used a recentlydeveloped Bayesian alignment technique that is able to perform both phrase-to-phrase alignment and phrase pair extraction within a single unsupervised process. The models produced by this type of alignment technique are typically very compact whilst at the same time maintaining a high level of translation quality. We evaluated both of these methods of translation model construction in isolation, and our results show their performance is comparable. We also integrated both models by linear interpolation to obtain a model that outperforms either component. Finally, we added an indicator feature into the log-linear model to indicate those phrases that were in the intersection of the two translation models. The addition of this feature was also able to provide a small improvement in performance.</abstract>
@@ -114,7 +114,7 @@
       <author><first>Amittai</first><last>Axelrod</last></author>
       <author><first>Li</first><last>Deng</last></author>
       <author><first>Alex</first><last>Acero</last></author>
-      <author><first>Mei-Yuh</first><last>Hwang</last></author>
+      <author id="mei-yuh-hwang"><first>Mei-Yuh</first><last>Hwang</last></author>
       <author><first>Alisa</first><last>Nguyen</last></author>
       <author><first>Andrew</first><last>Wang</last></author>
       <author><first>Xiahui</first><last>Huang</last></author>
@@ -127,7 +127,7 @@
       <title><fixed-case>LIMSI</fixed-case>’s experiments in domain adaptation for <fixed-case>IWSLT</fixed-case>11</title>
       <author><first>Thomas</first><last>Lavergne</last></author>
       <author><first>Alexandre</first><last>Allauzen</last></author>
-      <author><first>Hai-Son</first><last>Le</last></author>
+      <author id="hai-son-le"><first>Hai-Son</first><last>Le</last></author>
       <author><first>François</first><last>Yvon</last></author>
       <pages>62-67</pages>
       <url hash="061a09f3">2011.iwslt-evaluation.7</url>
@@ -137,8 +137,8 @@
     <paper id="8">
       <title><fixed-case>LIG</fixed-case> <fixed-case>E</fixed-case>nglish-<fixed-case>F</fixed-case>rench spoken language translation system for <fixed-case>IWSLT</fixed-case> 2011</title>
       <author><first>Benjamin</first><last>Lecouteux</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
-      <author><first>Hervé</first><last>Blanchon</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
+      <author id="herve-blanchon"><first>Hervé</first><last>Blanchon</last></author>
       <pages>68-72</pages>
       <url hash="b04f7a68">2011.iwslt-evaluation.8</url>
       <abstract>This paper describes the system developed by the LIG laboratory for the 2011 IWSLT evaluation. We participated to the English-French MT and SLT tasks. The development of a reference translation system (MT task), as well as an ASR output translation system (SLT task) are presented. We focus this year on the SLT task and on the use of multiple 1-best ASR outputs to improve overall translation quality. The main experiment presented here compares the performance of a SLT system where multiple ASR 1-best are combined before translation (source combination), with a SLT system where multiple ASR 1-best are translated, the system combination being conducted afterwards on the target side (target combination). The experimental results show that the second approach (target combination) overpasses the first one, when the performance is measured with BLEU.</abstract>
@@ -150,7 +150,7 @@
       <author><first>Eunach</first><last>Cho</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
       <author><first>Teresa</first><last>Herrmann</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>73-78</pages>
       <url hash="27a8d529">2011.iwslt-evaluation.9</url>
       <abstract>This paper presents the KIT system participating in the English→French TALK Translation tasks in the framework of the IWSLT 2011 machine translation evaluation. Our system is a phrase-based translation system using POS-based reordering extended with many additional features. First of all, a special preprocessing is devoted to the Giga corpus in order to minimize the effect of the great amount of noise it contains. In addition, the system gives more importance to the in-domain data by adapting the translation and the language models as well as by using a wordcluster language model. Furthermore, the system is extended by a bilingual language model and a discriminative word lexicon. The automatic speech transcription input usually has no or wrong punctuation marks, therefore these marks were especially removed from the source training data for the SLT system training.</abstract>
@@ -160,7 +160,7 @@
       <title><fixed-case>LIUM</fixed-case>’s systems for the <fixed-case>IWSLT</fixed-case> 2011 speech translation tasks</title>
       <author><first>Anthony</first><last>Rousseau</last></author>
       <author><first>Fethi</first><last>Bougares</last></author>
-      <author><first>Paul</first><last>Deléglise</last></author>
+      <author id="paul-deleglise"><first>Paul</first><last>Deléglise</last></author>
       <author><first>Holger</first><last>Schwenk</last></author>
       <author><first>Yannick</first><last>Estève</last></author>
       <pages>79-85</pages>
@@ -185,10 +185,10 @@
     </paper>
     <paper id="12">
       <title>The 2011 <fixed-case>KIT</fixed-case> <fixed-case>E</fixed-case>nglish <fixed-case>ASR</fixed-case> system for the <fixed-case>IWSLT</fixed-case> evaluation</title>
-      <author><first>Sebastian</first><last>Stüker</last></author>
+      <author id="sebastian-stuker"><first>Sebastian</first><last>Stüker</last></author>
       <author><first>Kevin</first><last>Kilgour</last></author>
       <author><first>Christian</first><last>Saam</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>94-97</pages>
       <url hash="b6da6b7b">2011.iwslt-evaluation.12</url>
       <abstract>This paper describes our English Speech-to-Text (STT) system for the 2011 IWSLT ASR track. The system consists of 2 subsystems with different front-ends—one MVDR based, one MFCC based—which are combined using confusion network combination to provide a base for a second pass speaker adapted MVDR system. We demonstrate that this set-up produces competitive results on the IWSLT 2010 dev and test sets.</abstract>
@@ -198,7 +198,7 @@
       <title><fixed-case>DFKI</fixed-case>’s <fixed-case>SC</fixed-case> and <fixed-case>MT</fixed-case> submissions to <fixed-case>IWSLT</fixed-case> 2011</title>
       <author><first>David</first><last>Vilar</last></author>
       <author><first>Eleftherios</first><last>Avramidis</last></author>
-      <author><first>Maja</first><last>Popović</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
       <author><first>Sabine</first><last>Hunsicker</last></author>
       <pages>98-105</pages>
       <url hash="d667bfbc">2011.iwslt-evaluation.13</url>
@@ -214,7 +214,7 @@
       <author><first>Minwei</first><last>Feng</last></author>
       <author><first>Stephan</first><last>Peitz</last></author>
       <author><first>Christoph</first><last>Schmidt</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>106-113</pages>
       <url hash="05e5a9d7">2011.iwslt-evaluation.14</url>
       <abstract>In this paper the statistical machine translation (SMT) systems of RWTH Aachen University developed for the evaluation campaign of the International Workshop on Spoken Language Translation (IWSLT) 2011 is presented. We participated in the MT (English-French, Arabic-English, ChineseEnglish) and SLT (English-French) tracks. Both hierarchical and phrase-based SMT decoders are applied. A number of different techniques are evaluated, including domain adaptation via monolingual and bilingual data selection, phrase training, different lexical smoothing methods, additional reordering models for the hierarchical system, various Arabic and Chinese segmentation methods, punctuation prediction for speech recognition output, and system combination. By application of these methods we can show considerable improvements over the respective baseline systems.</abstract>
@@ -223,17 +223,17 @@
     <paper id="15">
       <title>Advances on spoken language translation in the Quaero program</title>
       <author><first>Karim</first><last>Boudahmane</last></author>
-      <author><first>Bianka</first><last>Buschbeck</last></author>
+      <author id="bianka-buschbeck"><first>Bianka</first><last>Buschbeck</last></author>
       <author><first>Eunah</first><last>Cho</last></author>
-      <author><first>Josep Maria</first><last>Crego</last></author>
+      <author id="josep-m-crego"><first>Josep Maria</first><last>Crego</last></author>
       <author><first>Markus</first><last>Freitag</last></author>
       <author><first>Thomas</first><last>Lavergne</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
       <author><first>Stephan</first><last>Peitz</last></author>
       <author><first>Jean</first><last>Senellart</last></author>
-      <author><first>Artem</first><last>Sokolov</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="artem-sokolov"><first>Artem</first><last>Sokolov</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <author><first>Tonio</first><last>Wandmacher</last></author>
       <author><first>Joern</first><last>Wuebker</last></author>
       <author><first>François</first><last>Yvon</last></author>
@@ -244,7 +244,7 @@
     </paper>
     <paper id="16">
       <title>Speech recognition for machine translation in Quaero</title>
-      <author><first>Lori</first><last>Lamel</last></author>
+      <author id="lori-lamel"><first>Lori</first><last>Lamel</last></author>
       <author><first>Sandrine</first><last>Courcinous</last></author>
       <author><first>Julien</first><last>Despres</last></author>
       <author><first>Jean-Luc</first><last>Gauvain</last></author>
@@ -252,18 +252,18 @@
       <author><first>Kevin</first><last>Kilgour</last></author>
       <author><first>Florian</first><last>Kraft</last></author>
       <author><first>Viet-Bac</first><last>Le</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <author><first>Markus</first><last>Nußbaum-Thom</last></author>
       <author><first>Ilya</first><last>Oparin</last></author>
       <author><first>Tim</first><last>Schlippe</last></author>
-      <author><first>Ralf</first><last>Schlüter</last></author>
+      <author id="ralf-schlueter"><first>Ralf</first><last>Schlüter</last></author>
       <author><first>Tanja</first><last>Schultz</last></author>
       <author><first>Thiago</first><last>Fraga da Silva</last></author>
-      <author><first>Sebastian</first><last>Stüker</last></author>
+      <author id="sebastian-stuker"><first>Sebastian</first><last>Stüker</last></author>
       <author><first>Martin</first><last>Sundermeyer</last></author>
       <author><first>Bianca</first><last>Vieru</last></author>
       <author><first>Ngoc Thang</first><last>Vu</last></author>
-      <author><first>Alexander</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alexander</first><last>Waibel</last></author>
       <author><first>Cécile</first><last>Woehrling</last></author>
       <pages>121-128</pages>
       <url hash="2cbe7229">2011.iwslt-evaluation.16</url>
@@ -273,7 +273,7 @@
     <paper id="17">
       <title>Protocol and lessons learnt from the production of parallel corpora for the evaluation of speech translation systems</title>
       <author><first>Victoria</first><last>Arranz</last></author>
-      <author><first>Olivier</first><last>Hamon</last></author>
+      <author id="olivier-hamon"><first>Olivier</first><last>Hamon</last></author>
       <author><first>Karim</first><last>Boudahmane</last></author>
       <author><first>Martine</first><last>Garnier-Rizet</last></author>
       <pages>129-135</pages>
@@ -323,10 +323,10 @@
     </paper>
     <paper id="22">
       <title>Investigation of the effects of <fixed-case>ASR</fixed-case> tuning on speech translation performance</title>
-      <author><first>Paul R.</first><last>Dixon</last></author>
+      <author id="paul-dixon"><first>Paul R.</first><last>Dixon</last></author>
       <author><first>Andrew</first><last>Finch</last></author>
       <author><first>Chiori</first><last>Hori</last></author>
-      <author><first>Hideki</first><last>Kashioka</last></author>
+      <author id="hideki-kashioka"><first>Hideki</first><last>Kashioka</last></author>
       <pages>167-174</pages>
       <url hash="b94f7a70">2011.iwslt-evaluation.22</url>
       <abstract>In this paper we describe some of our recent investigations into ASR and SMT coupling issues from an ASR perspective. Our study was motivated by several areas: Firstly, to understand how standard ASR tuning procedures effect the SMT performance and whether it is safe to perform this tuning in isolation. Secondly, to investigate how vocabulary and segmentation mismatches between the ASR and SMT system effect the performance. Thirdly, to uncover any practical issues that arise when using a WFST based speech decoder for tight coupling as opposed to a more traditional tree-search decoding architecture. On the IWSLT07 Japanese-English task we found that larger language model weights only helped the SMT performance when the ASR decoder was tuned in a sub-optimal manner. When we considered the performance with suitable wide beams that ensured the ASR accuracy had converged we observed the language model weight had little influence on the SMT BLEU scores. After the construction of the phrase table the actual SMT vocabulary can be less than the training data vocabulary. By reducing the ASR lexicon to only cover the words the SMT system could accept, we found this lead to an increase in the ASR error rates, however the SMT BLEU scores were nearly unchanged. From a practical point of view this is a useful result as it means we can significantly reduce the memory footprint of the ASR system. We also investigated coupling WFST based ASR to a simple WFST based translation decoder and found it was crucial to perform phrase table expansion to avoid OOV problems. For the WFST translation decoder we describe a semiring based approach for optimizing the log-linear weights.</abstract>
@@ -336,7 +336,7 @@
       <title>Extending a probabilistic phrase alignment approach for <fixed-case>SMT</fixed-case></title>
       <author><first>Mridul</first><last>Gupta</last></author>
       <author><first>Sanjika</first><last>Hewavitharana</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>175-182</pages>
       <url hash="fba3de80">2011.iwslt-evaluation.23</url>
       <abstract>Phrase alignment is a crucial step in phrase-based statistical machine translation. We explore a way of improving phrase alignment by adding syntactic information in the form of chunks as soft constraints guided by an in-depth and detailed analysis on a hand-aligned data set. We extend a probabilistic phrase alignment model that extracts phrase pairs by optimizing phrase pair boundaries over the sentence pair [1]. The boundaries of the target phrase are chosen such that the overall sentence alignment probability is optimal. Viterbi alignment information is also added in the extended model with a view of improving phrase alignment. We extract phrase pairs using a relatively larger number of features which are discriminatively trained using a large-margin online learning algorithm, i.e., Margin Infused Relaxed Algorithm (MIRA) and integrate it in our approach. Initial experiments show improvements in both phrase alignment and translation quality for Arabic-English on a moderate-size translation task.</abstract>
@@ -372,7 +372,7 @@
       <author><first>Matthias</first><last>Huck</last></author>
       <author><first>Saab</first><last>Mansour</last></author>
       <author><first>Simon</first><last>Wiesler</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>191-198</pages>
       <url hash="32e16503">2011.iwslt-papers.1</url>
       <abstract>In this paper, we investigate lexicon models for hierarchical phrase-based statistical machine translation. We study five types of lexicon models: a model which is extracted from word-aligned training data and—given the word alignment matrix—relies on pure relative frequencies [1]; the IBM model 1 lexicon [2]; a regularized version of IBM model 1; a triplet lexicon model variant [3]; and a discriminatively trained word lexicon model [4]. We explore sourceto-target models with phrase-level as well as sentence-level scoring and target-to-source models with scoring on phrase level only. For the first two types of lexicon models, we compare several scoring variants. All models are used during search, i.e. they are incorporated directly into the log-linear model combination of the decoder. Phrase table smoothing with triplet lexicon models and with discriminative word lexicons are novel contributions. We also propose a new regularization technique for IBM model 1 by means of the Kullback-Leibler divergence with the empirical unigram distribution as regularization term. Experiments are carried out on the large-scale NIST Chinese→English translation task and on the English→French and Arabic→English IWSLT TED tasks. For Chinese→English and English→French, we obtain the best results by using the discriminative word lexicon to smooth our phrase tables.</abstract>
@@ -383,8 +383,8 @@
       <author><first>Kevin</first><last>Kilgour</last></author>
       <author><first>Christian</first><last>Saam</last></author>
       <author><first>Christian</first><last>Mohr</last></author>
-      <author><first>Sebastian</first><last>Stüker</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="sebastian-stuker"><first>Sebastian</first><last>Stüker</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>199-205</pages>
       <url hash="feecd0c9">2011.iwslt-papers.2</url>
       <abstract>This paper describes our current Spanish speech-to-text (STT) system with which we participated in the 2011 Quaero STT evaluation that is being developed within the Quaero program. The system consists of 4 separate subsystems, as well as the standard MFCC and MVDR phoneme based subsystems we included a both a phoneme and grapheme based bottleneck subsystem. We carefully evaluate the performance of each subsystem. After including several new techniques we were able to reduce the WER by over 30% from 20.79% to 14.53%.</abstract>
@@ -396,8 +396,8 @@
       <author><first>Pável</first><last>Calado</last></author>
       <author><first>Bruno</first><last>Martins</last></author>
       <author><first>Isabel</first><last>Trancoso</last></author>
-      <author><first>Alan</first><last>Black</last></author>
-      <author><first>Luísa</first><last>Coheur</last></author>
+      <author id="alan-w-black"><first>Alan</first><last>Black</last></author>
+      <author id="luisa-coheur"><first>Luísa</first><last>Coheur</last></author>
       <pages>206-213</pages>
       <url hash="5c15b5a0">2011.iwslt-papers.3</url>
       <abstract>This work describes a process to extract Named Entity (NE) translations from the text available in web links (anchor texts). It translates a NE by retrieving a list of web documents in the target language, extracting the anchor texts from the links to those documents and finding the best translation from the anchor texts, using a combination of features, some of which, are specific to anchor texts. Experiments performed on a manually built corpora, suggest that over 70% of the NEs, ranging from unpopular to popular entities, can be translated correctly using sorely anchor texts. Tests on a Machine Translation task indicate that the system can be used to improve the quality of the translations of state-of-the-art statistical machine translation systems.</abstract>
@@ -408,7 +408,7 @@
       <author><first>Paul</first><last>Maergner</last></author>
       <author><first>Kevin</first><last>Kilgour</last></author>
       <author><first>Ian</first><last>Lane</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>214-221</pages>
       <url hash="dcbe1e2e">2011.iwslt-papers.4</url>
       <abstract>In this work, we propose a novel method for vocabulary selection which enables simultaneous speech recognition systems for lectures to automatically adapt to the diverse topics that occur in educational and scientific lectures. Utilizing materials that are available before the lecture begins, such as lecture slides, our proposed framework iteratively searches for related documents on the World Wide Web and generates a lecture-specific vocabulary and language model based on the resulting documents. In this paper, we introduce a novel method for vocabulary selection where we rank vocabulary that occurs in the collected documents based on a relevance score which is calculated using a combination of word features. Vocabulary selection is a critical component for topic adaptation that has typically been overlooked in prior works. On the interACT German-English simultaneous lecture translation system our proposed approach significantly improved vocabulary coverage, reducing the out-of-vocabulary rate on average by 57.0% and up to 84.9%, compared to a lecture-independent baseline. Furthermore, our approach reduced the word error rate by up to 25.3% (on average 13.2% across all lectures), compared to a lectureindependent baseline.</abstract>
@@ -418,7 +418,7 @@
       <title>Combining translation and language model scoring for domain-specific data filtering</title>
       <author><first>Saab</first><last>Mansour</last></author>
       <author><first>Joern</first><last>Wuebker</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>222-229</pages>
       <url hash="cb3df999">2011.iwslt-papers.5</url>
       <abstract>The increasing popularity of statistical machine translation (SMT) systems is introducing new domains of translation that need to be tackled. As many resources are already available, domain adaptation methods can be applied to utilize these recourses in the most beneficial way for the new domain. We explore adaptation via filtering, using the crossentropy scores to discard irrelevant sentences. We focus on filtering for two important components of an SMT system, namely the language model (LM) and the translation model (TM). Previous work has already applied LM cross-entropy based scoring for filtering. We argue that LM cross-entropy might be appropriate for LM filtering, but not as much for TM filtering. We develop a novel filtering approach based on a combined TM and LM cross-entropy scores. We experiment with two large-scale translation tasks, the Arabic-to-English and English-to-French IWSLT 2011 TED Talks MT tasks. For LM filtering, we achieve strong perplexity improvements which carry over to the translation quality with improvements up to +0.4% BLEU. For TM filtering, the combined method achieves small but consistent improvements over the standalone methods. As a side effect of adaptation via filtering, the fully fledged SMT system vocabulary size and phrase table size are reduced by a factor of at least 2 while up to +0.6% BLEU improvement is observed.</abstract>
@@ -427,7 +427,7 @@
     <paper id="6">
       <title>Using <fixed-case>W</fixed-case>ikipedia to translate domain-specific terms in <fixed-case>SMT</fixed-case></title>
       <author><first>Jan</first><last>Niehues</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>230-237</pages>
       <url hash="70e6ac10">2011.iwslt-papers.6</url>
       <abstract>When building a university lecture translation system, one important step is to adapt it to the target domain. One problem in this adaptation task is to acquire translations for domain specific terms. In this approach we tried to get these translations from Wikipedia, which provides articles on very specific topics in many different languages. To extract translations for the domain specific terms, we used the interlanguage links of Wikipedia . We analyzed different methods to integrate this corpus into our system and explored methods to disambiguate between different translations by using the text of the articles. In addition, we developed methods to handle different morphological forms of the specific terms in morphologically rich input languages like German. The results show that the number of out-of-vocabulary (OOV) words could be reduced by 50% on computer science lectures and the translation quality could be improved by more than 1 BLEU point.</abstract>
@@ -438,7 +438,7 @@
       <author><first>Stephan</first><last>Peitz</last></author>
       <author><first>Markus</first><last>Freitag</last></author>
       <author><first>Arne</first><last>Mauser</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>238-245</pages>
       <url hash="d8249e50">2011.iwslt-papers.7</url>
       <abstract>Punctuation prediction is an important task in Spoken Language Translation. The output of speech recognition systems does not typically contain punctuation marks. In this paper we analyze different methods for punctuation prediction and show improvements in the quality of the final translation output. In our experiments we compare the different approaches and show improvements of up to 0.8 BLEU points on the IWSLT 2011 English French Speech Translation of Talks task using a translation system to translate from unpunctuated to punctuated text instead of a language model based punctuation prediction method. Furthermore, we do a system combination of the hypotheses of all our different approaches and get an additional improvement of 0.4 points in BLEU.</abstract>
@@ -448,7 +448,7 @@
       <title>Soft string-to-dependency hierarchical machine translation</title>
       <author><first>Jan-Thorsten</first><last>Peter</last></author>
       <author><first>Matthias</first><last>Huck</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <author><first>Daniel</first><last>Stein</last></author>
       <pages>246-253</pages>
       <url hash="5cfa3382">2011.iwslt-papers.8</url>
@@ -481,7 +481,7 @@
       <author><first>Keiji</first><last>Yasuda</last></author>
       <author><first>Hideo</first><last>Okuma</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>269-274</pages>
       <url hash="7c003b73">2011.iwslt-papers.11</url>
       <abstract>In order to efficiently improve machine translation systems, we propose a method which selects data to be annotated (manually translated) from speech-to-speech translation field data. For the selection experiments, we used data from field experiments conducted during the 2009 fiscal year in five areas of Japan. For the selection experiments, we used data sets from two areas: one data set giving the lowest baseline speech translation performance for its test set, and another data set giving the highest. In the experiments, we compare two methods for selecting data to be manually translated from the field data. Both of them use source side language models for data selection, but in different manners. According to the experimental results, either or both of the methods show larger improvements compared to a random data selection.</abstract>
diff --git a/data/xml/2011.jeptalnrecital.xml b/data/xml/2011.jeptalnrecital.xml
index f9c2e98cdf..3d468fbbb7 100644
--- a/data/xml/2011.jeptalnrecital.xml
+++ b/data/xml/2011.jeptalnrecital.xml
@@ -26,9 +26,9 @@
     </paper>
     <paper id="2">
       <title>Theorie et Praxis Une optique sur les travaux en <fixed-case>TAL</fixed-case> sur le discours et le dialogue (Theory and Praxis A view on the <fixed-case>NLP</fixed-case> works in discourse and dialogue)</title>
-      <author><first>Nicholas</first><last>Asher</last></author>
+      <author id="nicholas-asher"><first>Nicholas</first><last>Asher</last></author>
       <pages>17–17</pages>
-      <abstract/>
+      <abstract></abstract>
       <url hash="1625c504">2011.jeptalnrecital-invite.2</url>
       <language>fra</language>
       <bibkey>asher-2011-theorie</bibkey>
@@ -70,9 +70,9 @@
     </paper>
     <paper id="2">
       <title>Génération automatique de motifs de détection d’entités nommées en utilisant des contenus encyclopédiques (Automatic generation of named entity detection patterns using encyclopedic contents)</title>
-      <author><first>Eric</first><last>Charton</last></author>
+      <author id="eric-charton"><first>Eric</first><last>Charton</last></author>
       <author><first>Michel</first><last>Gagnon</last></author>
-      <author><first>Benoit</first><last>Ozell</last></author>
+      <author id="benoit-ozell"><first>Benoit</first><last>Ozell</last></author>
       <pages>13–24</pages>
       <abstract>Les encyclopédies numériques contiennent aujourd’hui de vastes inventaires de formes d’écritures pour des noms de personnes, de lieux, de produits ou d’organisation. Nous présentons un système hybride de détection d’entités nommées qui combine un classifieur à base de Champs Conditionnel Aléatoires avec un ensemble de motifs de détection extraits automatiquement d’un contenu encyclopédique. Nous proposons d’extraire depuis des éditions en plusieurs langues de l’encyclopédie Wikipédia de grandes quantités de formes d’écriture que nous utilisons en tant que motifs de détection des entités nommées. Nous décrivons une méthode qui nous assure de ne conserver dans cette ressources que des formes non ambiguës susceptibles de venir renforcer un système de détection d’entités nommées automatique. Nous procédons à un ensemble d’expériences qui nous permettent de comparer un système d’étiquetage à base de CRF avec un système utilisant exclusivement des motifs de détection. Puis nous fusionnons les résultats des deux systèmes et montrons qu’un gain de performances est obtenu grâce à cette proposition.</abstract>
       <url hash="b98c91fc">2011.jeptalnrecital-long.2</url>
@@ -105,7 +105,7 @@
       <title>Utilisation d’un score de qualité de traduction pour le résumé multi-document cross-lingue (Using translation quality scores for cross-language multi-document summarization)</title>
       <author><first>Stéphane</first><last>Huet</last></author>
       <author><first>Florian</first><last>Boudin</last></author>
-      <author><first>Juan-Manuel</first><last>Torres-Moreno</last></author>
+      <author id="juan-manuel-torres-moreno"><first>Juan-Manuel</first><last>Torres-Moreno</last></author>
       <pages>49–58</pages>
       <abstract>Le résumé automatique cross-lingue consiste à générer un résumé rédigé dans une langue différente de celle utilisée dans les documents sources. Dans cet article, nous proposons une approche de résumé automatique multi-document, basée sur une représentation par graphe, qui prend en compte des scores de qualité de traduction lors du processus de sélection des phrases. Nous évaluons notre méthode sur un sous-ensemble manuellement traduit des données utilisées lors de la campagne d’évaluation internationale DUC 2004. Les résultats expérimentaux indiquent que notre approche permet d’améliorer la lisibilité des résumés générés, sans pour autant dégrader leur informativité.</abstract>
       <url hash="7cb63a24">2011.jeptalnrecital-long.5</url>
@@ -117,8 +117,8 @@
       <author><first>Cyril</first><last>Grouin</last></author>
       <author><first>Louise</first><last>Deléger</last></author>
       <author><first>Bruno</first><last>Cartoni</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <pages>59–70</pages>
       <abstract>Pourtant essentiel pour appréhender rapidement et globalement l’état de santé des patients, l’accès aux informations médicales liées aux prescriptions médicamenteuses et aux concepts médicaux par les outils informatiques se révèle particulièrement difficile. Ces informations sont en effet généralement rédigées en texte libre dans les comptes rendus hospitaliers et nécessitent le développement de techniques dédiées. Cet article présente les stratégies mises en oeuvre pour extraire les prescriptions médicales et les concepts médicaux dans des comptes rendus hospitaliers rédigés en anglais. Nos systèmes, fondés sur des approches à base de règles et d’apprentissage automatique, obtiennent une F1-mesure globale de 0,773 dans l’extraction des prescriptions médicales et dans le repérage et le typage des concepts médicaux.</abstract>
       <url hash="6204f977">2011.jeptalnrecital-long.6</url>
@@ -128,8 +128,8 @@
     <paper id="7">
       <title>Comparaison et combinaison d’approches pour la portabilité vers une nouvelle langue d’un système de compréhension de l’oral (Comparison and combination of approaches for the portability to a new language of an oral comprehension system)</title>
       <author><first>Bassam</first><last>Jabaian</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
-      <author><first>Fabrice</first><last>Lefèvre</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
+      <author id="fabrice-lefevre"><first>Fabrice</first><last>Lefèvre</last></author>
       <pages>71–82</pages>
       <abstract>Dans cet article, nous proposons plusieurs approches pour la portabilité du module de compréhension de la parole (SLU) d’un système de dialogue d’une langue vers une autre. On montre que l’utilisation des traductions automatiques statistiques (SMT) aide à réduire le temps et le cout de la portabilité d’un tel système d’une langue source vers une langue cible. Pour la tache d’étiquetage sémantique on propose d’utiliser soit les champs aléatoires conditionnels (CRF), soit l’approche à base de séquences (PH-SMT). Les résultats expérimentaux montrent l’efficacité des méthodes proposées pour une portabilité rapide du SLU vers une nouvelle langue. On propose aussi deux méthodes pour accroître la robustesse du SLU aux erreurs de traduction. Enfin on montre que la combinaison de ces approches réduit les erreurs du système. Ces travaux sont motivés par la disponibilité du corpus MEDIA français et de la traduction manuelle vers l’italien d’une sous partie de ce corpus.</abstract>
       <url hash="0eebc883">2011.jeptalnrecital-long.7</url>
@@ -141,7 +141,7 @@
       <author><first>Thierry</first><last>Bazillon</last></author>
       <author><first>Benjamin</first><last>Maza</last></author>
       <author><first>Mickael</first><last>Rouvier</last></author>
-      <author><first>Frédéric</first><last>Béchet</last></author>
+      <author id="frederic-bechet"><first>Frédéric</first><last>Béchet</last></author>
       <author><first>Alexis</first><last>Nasr</last></author>
       <pages>83–93</pages>
       <abstract>La fouille de données orales est un domaine de recherche visant à caractériser un flux audio contenant de la parole d’un ou plusieurs locuteurs, à l’aide de descripteurs liés à la forme et au contenu du signal. Outre la transcription automatique en mots des paroles prononcées, des informations sur le type de flux audio traité ainsi que sur le rôle et l’identité des locuteurs sont également cruciales pour permettre des requêtes complexes telles que : « chercher des débats sur le thème X », « trouver toutes les interviews de Y », etc. Dans ce cadre, et en traitant des conversations enregistrées lors d’émissions de radio ou de télévision, nous étudions la manière dont les locuteurs expriment des questions dans les conversations, en partant de l’intuition initiale que la forme des questions posées est une signature du rôle du locuteur dans la conversation (présentateur, invité, auditeur, etc.). En proposant une classification du type des questions et en utilisant ces informations en complément des descripteurs généralement utilisés dans la littérature pour classer les locuteurs par rôle, nous espérons améliorer l’étape de classification, et valider par la même occasion notre intuition initiale.</abstract>
@@ -162,9 +162,9 @@
     </paper>
     <paper id="10">
       <title>Extraction de patrons sémantiques appliquée à la classification d’Entités Nommées (Extraction of semantic patterns applied to the classification of named entities)</title>
-      <author><first>Ismaïl</first><last>El Maarouf</last></author>
-      <author><first>Jeanne</first><last>Villaneau</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="ismail-el-maarouf"><first>Ismaïl</first><last>El Maarouf</last></author>
+      <author id="jeanne-villaneau"><first>Jeanne</first><last>Villaneau</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <pages>106–116</pages>
       <abstract>La variabilité des corpus constitue un problème majeur pour les systèmes de reconnaissance d’entités nommées. L’une des pistes possibles pour y remédier est l’utilisation d’approches linguistiques pour les adapter à de nouveaux contextes : la construction de patrons sémantiques peut permettre de désambiguïser les entités nommées en structurant leur environnement syntaxico-sémantique. Cet article présente une première réalisation sur un corpus de presse d’un système de correction. Après une étape de segmentation sur des critères discursifs de surface, le système extrait et pondère les patrons liés à une classe d’entité nommée fournie par un analyseur. Malgré des modèles encore relativement élémentaires, les résultats obtenus sont encourageants et montrent la nécessité d’un traitement plus approfondi de la classe Organisation.</abstract>
       <url hash="cdceab96">2011.jeptalnrecital-long.10</url>
@@ -174,7 +174,7 @@
     <paper id="11">
       <title>Désambiguïsation lexicale par propagation de mesures sémantiques locales par algorithmes à colonies de fourmis (Lexical disambiguation by propagation of local semantic measures using ant colony algorithms)</title>
       <author><first>Didier</first><last>Schwab</last></author>
-      <author><first>Jérôme</first><last>Goulian</last></author>
+      <author id="jerome-goulian"><first>Jérôme</first><last>Goulian</last></author>
       <author><first>Nathan</first><last>Guillaume</last></author>
       <pages>117–128</pages>
       <abstract>Effectuer une tâche de désambiguïsation lexicale peut permettre d’améliorer de nombreuses applications du traitement automatique des langues comme l’extraction d’informations multilingues, ou la traduction automatique. Schématiquement, il s’agit de choisir quel est le sens le plus approprié pour chaque mot d’un texte. Une des approches classiques consiste à estimer la proximité sémantique qui existe entre deux sens de mots puis de l’étendre à l’ensemble du texte. La méthode la plus directe donne un score à toutes les paires de sens de mots puis choisit la chaîne de sens qui a le meilleur score. La complexité de cet algorithme est exponentielle et le contexte qu’il est calculatoirement possible d’utiliser s’en trouve réduit. Il ne s’agit donc pas d’une solution viable. Dans cet article, nous nous intéressons à une autre méthode, l’adaptation d’un algorithme à colonies de fourmis. Nous présentons ses caractéristiques et montrons qu’il permet de propager à un niveau global les résultats des algorithmes locaux et de tenir compte d’un contexte plus long et plus approprié en un temps raisonnable.</abstract>
@@ -184,10 +184,10 @@
     </paper>
     <paper id="12">
       <title>Un turc mécanique pour les ressources linguistiques : critique de la myriadisation du travail parcellisé (<fixed-case>M</fixed-case>echanical <fixed-case>T</fixed-case>urk for linguistic resources: review of the crowdsourcing of parceled work)</title>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <author><first>Karën</first><last>Fort</last></author>
-      <author><first>Gilles</first><last>Adda</last></author>
-      <author><first>Joseph</first><last>Mariani</last></author>
+      <author id="gilles-adda"><first>Gilles</first><last>Adda</last></author>
+      <author id="joseph-mariani"><first>Joseph</first><last>Mariani</last></author>
       <author><first>Bernard</first><last>Lang</last></author>
       <pages>129–140</pages>
       <abstract>Cet article est une prise de position concernant les plate-formes de type Amazon Mechanical Turk, dont l’utilisation est en plein essor depuis quelques années dans le traitement automatique des langues. Ces plateformes de travail en ligne permettent, selon le discours qui prévaut dans les articles du domaine, de faire développer toutes sortes de ressources linguistiques de qualité, pour un prix imbattable et en un temps très réduit, par des gens pour qui il s’agit d’un passe-temps. Nous allons ici démontrer que la situation est loin d’être aussi idéale, que ce soit sur le plan de la qualité, du prix, du statut des travailleurs ou de l’éthique. Nous rappellerons ensuite les solutions alternatives déjà existantes ou proposées. Notre but est ici double : informer les chercheurs, afin qu’ils fassent leur choix en toute connaissance de cause, et proposer des solutions pratiques et organisationnelles pour améliorer le développement de nouvelles ressources linguistiques en limitant les risques de dérives éthiques et légales, sans que cela se fasse au prix de leur coût ou de leur qualité.</abstract>
@@ -197,8 +197,8 @@
     </paper>
     <paper id="13">
       <title>Degré de comparabilité, extraction lexicale bilingue et recherche d’information interlingue (Degree of comparability, bilingual lexical extraction and cross-language information retrieval)</title>
-      <author id="bo-li"><first>Bo</first><last>Li</last></author>
-      <author><first>Eric</first><last>Gaussier</last></author>
+      <author><first>Bo</first><last>Li</last></author>
+      <author id="eric-gaussier"><first>Eric</first><last>Gaussier</last></author>
       <author><first>Emmanuel</first><last>Morin</last></author>
       <author><first>Amir</first><last>Hazem</last></author>
       <pages>141–152</pages>
@@ -220,7 +220,7 @@
     <paper id="15">
       <title>Comparaison d’une approche miroir et d’une approche distributionnelle pour l’extraction de mots sémantiquement reliés (Comparing a mirror approach and a distributional approach for extracting semantically related words)</title>
       <author><first>Philippe</first><last>Muller</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <pages>165–176</pages>
       <abstract>Dans (Muller &amp; Langlais, 2010), nous avons comparé une approche distributionnelle et une variante de l’approche miroir proposée par Dyvik (2002) sur une tâche d’extraction de synonymes à partir d’un corpus en français. Nous présentons ici une analyse plus fine des relations extraites automatiquement en nous intéressant cette fois-ci à la langue anglaise pour laquelle de plus amples ressources sont disponibles. Différentes façons d’évaluer notre approche corroborent le fait que l’approche miroir se comporte globalement mieux que l’approche distributionnelle décrite dans (Lin, 1998), une approche de référence dans le domaine.</abstract>
       <url hash="8da1c62f">2011.jeptalnrecital-long.15</url>
@@ -284,7 +284,7 @@
       <title>Identifier la cible d’un passage d’opinion dans un corpus multithématique (Identifying the target of an opinion transition in a thematic corpus)</title>
       <author><first>Matthieu</first><last>Vernier</last></author>
       <author><first>Laura</first><last>Monceaux</last></author>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <pages>234–245</pages>
       <abstract>L’identification de la cible d’une d’opinion fait l’objet d’une attention récente en fouille d’opinion. Les méthodes existantes ont été testées sur des corpus monothématiques en anglais. Elles permettent principalement de traiter les cas où la cible se situe dans la même phrase que l’opinion. Dans cet article, nous abordons cette problématique pour le français dans un corpus multithématique et nous présentons une nouvelle méthode pour identifier la cible d’une opinion apparaissant hors du contexte phrastique. L’évaluation de la méthode montre une amélioration des résultats par rapport à l’existant.</abstract>
       <url hash="896ea7c8">2011.jeptalnrecital-long.21</url>
@@ -293,7 +293,7 @@
     </paper>
     <paper id="22">
       <title>Intégrer des connaissances linguistiques dans un <fixed-case>CRF</fixed-case> : application à l’apprentissage d’un segmenteur-étiqueteur du français (Integrating linguistic knowledge in a <fixed-case>CRF</fixed-case>: application to learning a segmenter-tagger of <fixed-case>F</fixed-case>rench)</title>
-      <author><first>Matthieu</first><last>Constant</last></author>
+      <author id="matthieu-constant"><first>Matthieu</first><last>Constant</last></author>
       <author><first>Isabelle</first><last>Tellier</last></author>
       <author><first>Denys</first><last>Duchier</last></author>
       <author><first>Yoann</first><last>Dupont</last></author>
@@ -308,7 +308,7 @@
     <paper id="23">
       <title>Segmentation et induction de lexique non-supervisées du mandarin (Unsupervised segmentation and induction of mandarin lexicon)</title>
       <author><first>Pierre</first><last>Magistry</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <pages>258–269</pages>
       <abstract>Pour la plupart des langues utilisant l’alphabet latin, le découpage d’un texte selon les espaces et les symboles de ponctuation est une bonne approximation d’un découpage en unités lexicales. Bien que cette approximation cache de nombreuses difficultés, elles sont sans comparaison avec celles que l’on rencontre lorsque l’on veut traiter des langues qui, comme le chinois mandarin, n’utilisent pas l’espace. Un grand nombre de systèmes de segmentation ont été proposés parmi lesquels certains adoptent une approche non-supervisée motivée linguistiquement. Cependant les méthodes d’évaluation communément utilisées ne rendent pas compte de toutes les propriétés de tels systèmes. Dans cet article, nous montrons qu’un modèle simple qui repose sur une reformulation en termes d’entropie d’une hypothèse indépendante de la langue énoncée par Harris (1955), permet de segmenter un corpus et d’en extraire un lexique. Testé sur le corpus de l’Academia Sinica, notre système permet l’induction d’une segmentation et d’un lexique qui ont de bonnes propriétés intrinsèques et dont les caractéristiques sont similaires à celles du lexique sous-jacent au corpus segmenté manuellement. De plus, on constate une certaine corrélation entre les résultats du modèle de segmentation et les structures syntaxiques fournies par une sous-partie arborée corpus.</abstract>
       <url hash="a3aa7611">2011.jeptalnrecital-long.23</url>
@@ -338,8 +338,8 @@
     </paper>
     <paper id="26">
       <title>Modèles génératif et discriminant en analyse syntaxique : expériences sur le corpus arboré de <fixed-case>P</fixed-case>aris 7 (Generative and discriminative models in parsing: experiments on the <fixed-case>P</fixed-case>aris 7 Treebank)</title>
-      <author><first>Joseph</first><last>Le Roux</last></author>
-      <author><first>Benoît</first><last>Favre</last></author>
+      <author id="joseph-le-roux"><first>Joseph</first><last>Le Roux</last></author>
+      <author id="benoit-favre"><first>Benoît</first><last>Favre</last></author>
       <author><first>Seyed</first><last>Abolghasem Mirroshandel</last></author>
       <author><first>Alexis</first><last>Nasr</last></author>
       <pages>294–305</pages>
@@ -361,7 +361,7 @@
     </paper>
     <paper id="28">
       <title>Enrichissement de structures en dépendances par réécriture de graphes (Dependency structure enrichment using graph rewriting)</title>
-      <author><first>Guillaume</first><last>Bonfante</last></author>
+      <author id="guillaume-bonfante"><first>Guillaume</first><last>Bonfante</last></author>
       <author><first>Bruno</first><last>Guillaume</last></author>
       <author><first>Mathieu</first><last>Morey</last></author>
       <author><first>Guy</first><last>Perrier</last></author>
@@ -374,7 +374,7 @@
     <paper id="29">
       <title>Classification en polarité de sentiments avec une représentation textuelle à base de sous-graphes d’arbres de dépendances (Sentiment polarity classification using a textual representation based on subgraphs of dependency trees)</title>
       <author><first>Alexander</first><last>Pak</last></author>
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <pages>329–339</pages>
       <abstract>Les approches classiques à base de n-grammes en analyse supervisée de sentiments ne peuvent pas correctement identifier les expressions complexes de sentiments à cause de la perte d’information induite par l’approche « sac de mots » utilisée pour représenter les textes. Dans notre approche, nous avons recours à des sous-graphes extraits des graphes de dépendances syntaxiques comme traits pour la classification de sentiments. Nous représentons un texte par un vecteur composé de ces sous-graphes syntaxiques et nous employons un classifieurs SVM état-de-l’art pour identifier la polarité d’un texte. Nos évaluations expérimentales sur des critiques de jeux vidéo montrent que notre approche à base de sous-graphes est meilleure que les approches standard à modèles « sac de mots » et n-grammes. Dans cet article nous avons travaillé sur le français, mais notre approche peut facilement être adaptée à d’autres langues.</abstract>
       <url hash="3a421710">2011.jeptalnrecital-long.29</url>
@@ -423,7 +423,7 @@
     </paper>
     <paper id="34">
       <title>&lt;<fixed-case>T</fixed-case>ext<fixed-case>C</fixed-case>oop&gt;: un analyseur de discours basé sur les grammaires logiques (&lt;<fixed-case>T</fixed-case>ext<fixed-case>C</fixed-case>oop&gt;: a discourse analyzer based on logical grammars)</title>
-      <author><first>Patrick</first><last>Saint-Dizier</last></author>
+      <author id="patrick-saint-dizier"><first>Patrick</first><last>Saint-Dizier</last></author>
       <pages>388–399</pages>
       <abstract>Dans ce document, nous présentons les principales caractéristiques de &lt;TextCoop&gt;, un environnement basé sur les grammaires logiques dédié à l’analyse de structures discursives. Nous étudions en particulier le langage DisLog qui fixe la structure des règles et des spécifications qui les accompagnent. Nous présentons la structure du moteur de &lt;TextCoop&gt; en indiquant au fur et à mesure du texte l’état du travail, les performances et les orientations en particulier en matière d’environnement, d’aide à l’écriture de règles et de développement applicatif.</abstract>
       <url hash="4790f4fe">2011.jeptalnrecital-long.34</url>
@@ -493,7 +493,7 @@
       <title>Evaluation de la détection des émotions, des opinions ou des sentiments : dictature de la majorité ou respect de la diversité d’opinions ? (Evaluation of the detection of emotions, opinions or sentiments: majority dictatorship or respect for opinion diversity?)</title>
       <author><first>Jean-Yves</first><last>Antoine</last></author>
       <author><first>Marc</first><last>Le Tallec</last></author>
-      <author><first>Jeanne</first><last>Villaneau</last></author>
+      <author id="jeanne-villaneau"><first>Jeanne</first><last>Villaneau</last></author>
       <pages>1–6</pages>
       <abstract>Détection d’émotion, fouille d’opinion et analyse des sentiments sont généralement évalués par comparaison des réponses du système concerné par rapport à celles contenues dans un corpus de référence. Les questions posées dans cet article concernent à la fois la définition de la référence et la fiabilité des métriques les plus fréquemment utilisées pour cette comparaison. Les expérimentations menées pour évaluer le système de détection d’émotions EmoLogus servent de base de réflexion pour ces deux problèmes. L’analyse des résultats d’EmoLogus et la comparaison entre les différentes métriques remettent en cause le choix du vote majoritaire comme référence. Par ailleurs elles montrent également la nécessité de recourir à des outils statistiques plus évolués que ceux généralement utilisés pour obtenir des évaluations fiables de systèmes qui travaillent sur des données intrinsèquement subjectives et incertaines.</abstract>
       <url hash="2007ee3a">2011.jeptalnrecital-court.1</url>
@@ -521,8 +521,8 @@
     </paper>
     <paper id="4">
       <title>Coopération de méthodes statistiques et symboliques pour l’adaptation non-supervisée d’un système d’étiquetage en entités nommées (Statistical and symbolic methods cooperation for the unsupervised adaptation of a named entity recognition system)</title>
-      <author><first>Frédéric</first><last>Béchet</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="frederic-bechet"><first>Frédéric</first><last>Béchet</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <author><first>Rosa</first><last>Stern</last></author>
       <pages>19–24</pages>
       <abstract>La détection et le typage des entités nommées sont des tâches pour lesquelles ont été développés à la fois des systèmes symboliques et probabilistes. Nous présentons les résultats d’une expérience visant à faire interagir le système à base de règles NP, développé sur des corpus provenant de l’AFP, intégrant la base d’entités Aleda et qui a une bonne précision, et le système LIANE, entraîné sur des transcriptions de l’oral provenant du corpus ESTER et qui a un bon rappel. Nous montrons qu’on peut adapter à un nouveau type de corpus, de manière non supervisée, un système probabiliste tel que LIANE grâce à des corpus volumineux annotés automatiquement par NP. Cette adaptation ne nécessite aucune annotation manuelle supplémentaire et illustre la complémentarité des méthodes numériques et symboliques pour la résolution de tâches linguistiques.</abstract>
@@ -532,7 +532,7 @@
     </paper>
     <paper id="5">
       <title>Création de clusters sémantiques dans des familles morphologiques à partir du <fixed-case>TLF</fixed-case>i (Creating semantic clusters in morphological families from the <fixed-case>TLF</fixed-case>i)</title>
-      <author><first>Nuria</first><last>Gala</last></author>
+      <author id="nuria-gala"><first>Nuria</first><last>Gala</last></author>
       <author><first>Nabil</first><last>Hathout</last></author>
       <author><first>Alexis</first><last>Nasr</last></author>
       <author><first>Véronique</first><last>Rey</last></author>
@@ -547,7 +547,7 @@
       <title>Génération automatique de questions à partir de textes en français (Automatic generation of questions from texts in <fixed-case>F</fixed-case>rench)</title>
       <author><first>Louis</first><last>de Viron</last></author>
       <author><first>Delphine</first><last>Bernhard</last></author>
-      <author><first>Véronique</first><last>Moriceau</last></author>
+      <author id="veronique-moriceau"><first>Véronique</first><last>Moriceau</last></author>
       <author><first>Xavier</first><last>Tannier</last></author>
       <pages>31–36</pages>
       <abstract>Nous présentons dans cet article un générateur automatique de questions pour le français. Le système de génération procède par transformation de phrases déclaratives en interrogatives et se base sur une analyse syntaxique préalable de la phrase de base. Nous détaillons les différents types de questions générées. Nous présentons également une évaluation de l’outil, qui démontre que 41 % des questions générées par le système sont parfaitement bien formées.</abstract>
@@ -595,7 +595,7 @@
     <paper id="10">
       <title>Alignement automatique pour la compréhension littérale de l’oral par approche segmentale (Automatic alignment for the literal oral understanding using a segmental approach)</title>
       <author><first>Stéphane</first><last>Huet</last></author>
-      <author><first>Fabrice</first><last>Lefèvre</last></author>
+      <author id="fabrice-lefevre"><first>Fabrice</first><last>Lefèvre</last></author>
       <pages>55–60</pages>
       <abstract>Les approches statistiques les plus performantes actuellement pour la compréhension automatique du langage naturel nécessitent une annotation segmentale des données d’entraînement. Nous étudions dans cet article une alternative permettant d’obtenir de façon non-supervisée un alignement segmental d’unités conceptuelles sur les mots. L’impact de l’alignement automatique sur les performances du système de compréhension est évalué sur une tâche de dialogue oral.</abstract>
       <url hash="64ad0533">2011.jeptalnrecital-court.10</url>
@@ -606,7 +606,7 @@
       <title>Ajout d’informations contextuelles pour la recherche de passages au sein de Wikipédia (Integrating contextual information for passage retrieval in <fixed-case>W</fixed-case>ikipedia)</title>
       <author><first>Romain</first><last>Deveaud</last></author>
       <author><first>Eric</first><last>Sanjuan</last></author>
-      <author><first>Patrice</first><last>Bellot</last></author>
+      <author id="patrice-bellot"><first>Patrice</first><last>Bellot</last></author>
       <pages>61–66</pages>
       <abstract>La recherche de passages consiste à extraire uniquement des passages pertinents par rapport à une requête utilisateur plutôt qu’un ensemble de documents entiers. Cette récupération de passages est souvent handicapée par le manque d’informations complémentaires concernant le contexte de la recherche initiée par l’utilisateur. Des études montrent que l’ajout d’informations contextuelles par l’utilisateur peut améliorer les performances des systèmes de recherche de passages. Nous confirmons ces observations dans cet article, et nous introduisons également une méthode d’enrichissement de la requête à partir d’informations contextuelles issues de documents encyclopédiques. Nous menons des expérimentations en utilisant la collection et les méthodes d’évaluation proposées par la campagne INEX. Les résultats obtenus montrent que l’ajout d’informations contextuelles permet d’améliorer significativement les performances de notre système de recherche de passages. Nous observons également que notre approche automatique obtient les meilleurs résultats parmi les différentes approches que nous évaluons.</abstract>
       <url hash="c4281bba">2011.jeptalnrecital-court.11</url>
@@ -616,7 +616,7 @@
     <paper id="12">
       <title>Construction d’un lexique des adjectifs dénominaux (Construction of a lexicon of denominal adjectives)</title>
       <author><first>Jana</first><last>Strnadová</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <pages>67–72</pages>
       <abstract>Après une brève analyse linguistique des adjectifs dénominaux en français, nous décrivons le processus automatique que nous avons mis en place à partir de lexiques et de corpus volumineux pour construire un lexique d’adjectifs dénominaux dérivés de manière régulière. Nous estimons à la fois la précision et la couverture du lexique dérivationnel obtenu. À terme, ce lexique librement disponible aura été validé manuellement et contiendra également les adjectifs dénominaux à base supplétive.</abstract>
       <url hash="5dc7de69">2011.jeptalnrecital-court.12</url>
@@ -625,7 +625,7 @@
     </paper>
     <paper id="13">
       <title>Développement de ressources pour le persan : <fixed-case>P</fixed-case>er<fixed-case>L</fixed-case>ex 2, nouveau lexique morphologique et <fixed-case>ME</fixed-case>ltfa, étiqueteur morphosyntaxique (Development of resources for <fixed-case>P</fixed-case>ersian: <fixed-case>P</fixed-case>er<fixed-case>L</fixed-case>ex 2, a new morphological lexicon and <fixed-case>ME</fixed-case>ltfa, a morphosyntactic tagger)</title>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <author><first>Géraldine</first><last>Walther</last></author>
       <author><first>Pegah</first><last>Faghiri</last></author>
       <author><first>Pollet</first><last>Samvelian</last></author>
@@ -638,7 +638,7 @@
     <paper id="14">
       <title>Identification de cognats à partir de corpus parallèles français-roumain (Identification of cognates from <fixed-case>F</fixed-case>rench-<fixed-case>R</fixed-case>omanian parallel corpora)</title>
       <author><first>Mirabela</first><last>Navlea</last></author>
-      <author><first>Amalia</first><last>Todiraşcu</last></author>
+      <author id="amalia-todirascu"><first>Amalia</first><last>Todiraşcu</last></author>
       <pages>79–84</pages>
       <abstract>Cet article présente une méthode hybride d’identification de cognats français - roumain. Cette méthode exploite des corpus parallèles alignés au niveau propositionnel, lemmatisés et étiquetés (avec des propriétés morphosyntaxiques). Notre méthode combine des techniques statistiques et des informations linguistiques pour améliorer les résultats obtenus. Nous évaluons le module d’identification de cognats et nous faisons une comparaison avec des méthodes statistiques pures, afin d’étudier l’impact des informations linguistiques utilisées sur la qualité des résultats obtenus. Nous montrons que l’utilisation des informations linguistiques augmente significativement la performance de la méthode.</abstract>
       <url hash="28bcaeeb">2011.jeptalnrecital-court.14</url>
@@ -668,7 +668,7 @@
     </paper>
     <paper id="17">
       <title>Le corpus <fixed-case>T</fixed-case>ext+<fixed-case>B</fixed-case>erg Une ressource parallèle alpin français-allemand (The <fixed-case>T</fixed-case>ext+<fixed-case>B</fixed-case>erg Corpus An Alpine <fixed-case>F</fixed-case>rench-<fixed-case>G</fixed-case>erman Parallel Resource)</title>
-      <author><first>Anne</first><last>Göhring</last></author>
+      <author id="anne-gohring"><first>Anne</first><last>Göhring</last></author>
       <author><first>Martin</first><last>Volk</last></author>
       <pages>97–102</pages>
       <abstract>Cet article présente un corpus parallèle français-allemand de plus de 4 millions de mots issu de la numérisation d’un corpus alpin multilingue. Ce corpus est une précieuse ressource pour de nombreuses études de linguistique comparée et du patrimoine culturel ainsi que pour le développement d’un système statistique de traduction automatique dans un domaine spécifique. Nous avons annoté un échantillon de ce corpus parallèle et aligné les structures arborées au niveau des mots, des constituants et des phrases. Cet “alpine treebank” est le premier corpus arboré parallèle français-allemand de haute qualité (manuellement contrôlé), de libre accès et dans un domaine et un genre nouveau : le récit d’alpinisme.</abstract>
@@ -697,7 +697,7 @@
     </paper>
     <paper id="20">
       <title><fixed-case>F</fixed-case>re<fixed-case>D</fixed-case>ist : Construction automatique d’un thésaurus distributionnel pour le Français (<fixed-case>F</fixed-case>re<fixed-case>D</fixed-case>ist : Automatic construction of distributional thesauri for <fixed-case>F</fixed-case>rench)</title>
-      <author><first>Enrique</first><last>Henestroza Anguiano</last></author>
+      <author id="enrique-henestroza-anguiano"><first>Enrique</first><last>Henestroza Anguiano</last></author>
       <author><first>Pascal</first><last>Denis</last></author>
       <pages>116–121</pages>
       <abstract>Dans cet article, nous présentons FreDist, un logiciel libre pour la construction automatique de thésaurus distributionnels à partir de corpus de texte, ainsi qu’une évaluation des différents ressources ainsi produites. Suivant les travaux de (Lin, 1998) et (Curran, 2004), nous utilisons un corpus journalistique de grande taille et implémentons différentes options pour : le type de relation contexte lexical, la fonction de poids, et la fonction de mesure de similarité. Prenant l’EuroWordNet français et le WOLF comme références, notre évaluation révèle, de manière originale, que c’est l’approche qui combine contextes linéaires (ici, de type bigrammes) et contextes syntaxiques qui semble fournir le meilleur thésaurus. Enfin, nous espérons que notre logiciel, distribué avec nos meilleurs thésaurus pour le français, seront utiles à la communauté TAL.</abstract>
@@ -750,7 +750,7 @@
       <title>Attribution de rôles sémantiques aux actants des lexies verbales (Assigning semantic roles to actants of verbal lexical units)</title>
       <author><first>Fadila</first><last>Hadouche</last></author>
       <author><first>Guy</first><last>Lapalme</last></author>
-      <author><first>Marie-Claude</first><last>L’Homme</last></author>
+      <author id="marie-claude-lhomme"><first>Marie-Claude</first><last>L’Homme</last></author>
       <pages>146–151</pages>
       <abstract>Dans cet article, nous traitons de l’attribution des rôles sémantiques aux actants de lexies verbales en corpus spécialisé en français. Nous proposons une classification de rôles sémantiques par apprentissage machine basée sur un corpus de lexies verbales annotées manuellement du domaine de l’informatique et d’Internet. Nous proposons également une méthode de partitionnement semi-supervisé pour prendre en compte l’annotation de nouvelles lexies ou de nouveaux rôles sémantiques et de les intégrés dans le système. Cette méthode de partitionnement permet de regrouper les instances d’actants selon les valeurs communes correspondantes aux traits de description des actants dans des groupes d’instances d’actants similaires. La classification de rôles sémantique a obtenu une F-mesure de 93% pour Patient, de 90% pour Agent, de 85% pour Destination et de 76% pour les autres rôles pris ensemble. Quand au partitionnement en regroupant les instances selon leur similarité donne une F-mesure de 88% pour Patient, de 81% pour Agent, de 58% pour Destination et de 46% pour les autres rôles.</abstract>
       <url hash="7081007e">2011.jeptalnrecital-court.25</url>
@@ -769,7 +769,7 @@
     <paper id="27">
       <title>Un calcul de termes typés pour la pragmatique lexicale: chemins et voyageurs fictifs dans un corpus de récits de voyage (A calculation of typed terms for lexical pragmatics: paths and fictional travellers in a travel stories corpus)</title>
       <author><first>Richard</first><last>Moot</last></author>
-      <author><first>Laurent</first><last>Prévot</last></author>
+      <author id="laurent-prevot"><first>Laurent</first><last>Prévot</last></author>
       <author><first>Christian</first><last>Retoré</last></author>
       <pages>158–163</pages>
       <abstract>Ce travail s’inscrit dans l’analyse automatique d’un corpus de récits de voyage. À cette fin, nous raffinons la sémantique de Montague pour rendre compte des phénomènes d’adaptation du sens des mots au contexte dans lequel ils apparaissent. Ici, nous modélisons les constructions de type ‘le chemin descend pendant une demi-heure’ où ledit chemin introduit un voyageur fictif qui le parcourt, en étendant des idées que le dernier auteur a développé avec Bassac et Mery. Cette introduction du voyageur utilise la montée de type afin que le quantificateur introduisant le voyageur porte sur toute la phrase et que les propriétés du chemin ne deviennent pas des propriétés du voyageur, fût-il fictif. Cette analyse sémantique (ou plutôt sa traduction en lambda-DRT) est d’ores et déjà implantée pour une partie du lexique de Grail.</abstract>
@@ -792,7 +792,7 @@
     <paper id="29">
       <title>Mesure non-supervisée du degré d’appartenance d’une entité à un type (An unsupervised measure of the degree of belonging of an entity to a type)</title>
       <author><first>Ludovic</first><last>Bonnefoy</last></author>
-      <author><first>Patrice</first><last>Bellot</last></author>
+      <author id="patrice-bellot"><first>Patrice</first><last>Bellot</last></author>
       <author><first>Michel</first><last>Benoit</last></author>
       <pages>170–175</pages>
       <abstract>La recherche d’entités nommées a été le sujet de nombreux travaux. Cependant, la construction des ressources nécessaires à de tels systèmes reste un problème majeur. Dans ce papier, nous proposons une méthode complémentaire aux outils capables de reconnaître des entités de types larges, dont l’objectif est de déterminer si une entité est d’un type donné, et ce de manière non-supervisée et quel que soit le type. Nous proposons pour cela une approche basée sur la comparaison de modèles de langage estimés à partir du Web. L’intérêt de notre approche est validé par une évaluation sur 100 entités et 273 types différents.</abstract>
@@ -852,7 +852,7 @@
     <paper id="35">
       <title>Règles et paradigmes en morphologie informatique lexématique (Rules and paradigms in lexematic computer morphology)</title>
       <author><first>Nabil</first><last>Hathout</last></author>
-      <author><first>Fiammetta</first><last>Namer</last></author>
+      <author id="fiammetta-namer"><first>Fiammetta</first><last>Namer</last></author>
       <pages>206–211</pages>
       <abstract>Les familles de mots produites par deux analyseurs morphologiques, DériF (basé sur des règles) et Morphonette (basé sur l’analogie), appliqués à un même corpus lexical, sont comparées. Cette comparaison conduit à l’examen de trois sous-ensembles : - un sous-ensemble commun aux deux systèmes dont la taille montre que, malgré leurs différences, les approches expérimentées par chaque système sont valides et décrivent en partie la même réalité morphologique. - un sous-ensemble propre à DériF et un autre à Morphonette. Ces ensembles (a) nous renseignent sur les caractéristiques propres à chaque système, et notamment sur ce que l’autre ne peut pas produire, (b) ils mettent en évidence les erreurs d’un système, en ce qu’elles n’apparaissent pas dans l’autre, (c) ils font apparaître certaines limites de la description, notamment celles qui sont liées aux objets et aux notions théoriques comme les familles morphologiques, les bases, l’existence de RCL « transversales » entre les lexèmes qui n’ont pas de relation d’ascendance ou de descendance.</abstract>
       <url hash="78597df8">2011.jeptalnrecital-court.35</url>
@@ -882,8 +882,8 @@
     <paper id="38">
       <title>Exploitation d’un corpus arboré pour non spécialistes par des requêtes guidées et des requêtes sémantiques (Exploiting a Treebank for non-specialists by guided queries and semantic queries)</title>
       <author><first>Achille</first><last>Falaise</last></author>
-      <author><first>Agnès</first><last>Tutin</last></author>
-      <author><first>Olivier</first><last>Kraif</last></author>
+      <author id="agnes-tutin"><first>Agnès</first><last>Tutin</last></author>
+      <author id="olivier-kraif"><first>Olivier</first><last>Kraif</last></author>
       <pages>224–229</pages>
       <abstract>L’exploitation de corpus analysés syntaxiquement (ou corpus arborés) pour le public non spécialiste n’est pas un problème trivial. Si la communauté du TAL souhaite mettre à la disposition des chercheurs non-informaticiens des corpus comportant des annotations linguistiques complexes, elle doit impérativement développer des interfaces simples à manipuler mais permettant des recherches fines. Dans cette communication, nous présentons les modes de recherche « grand public » développé(e)s dans le cadre du projet Scientext, qui met à disposition un corpus d’écrits scientifiques interrogeable par partie textuelle, par partie du discours et par fonction syntaxique. Les modes simples sont décrits : un mode libre et guidé, où l’utilisateur sélectionne lui-même les éléments de la requête, et un mode sémantique, qui comporte des grammaires locales préétablies à l’aide des fonctions syntaxiques.</abstract>
       <url hash="60e855f8">2011.jeptalnrecital-court.38</url>
@@ -893,7 +893,7 @@
     <paper id="39">
       <title>Communautés <fixed-case>I</fixed-case>nternet comme sources de préterminologie (<fixed-case>I</fixed-case>nternet communities as sources of preterminology)</title>
       <author><first>Mohammad</first><last>Daoud</last></author>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <pages>230–235</pages>
       <abstract>Cet article décrit deux expériences sur la construction de ressources terminologiques multilingues (preterminologies) préliminaires, mais grandes, grâce à des communautés Internet, et s’appuie sur ces expériences pour cibler des données terminologiques plus raffinées venant de communautés Internet et d’applications Web 2.0. La première expérience est une passerelle de contribution pour le site Web de la Route de la Soie numérique (DSR). Les visiteurs contribuent en effet à un référentiel lexical multilingue dédié, pendant qu’ils visitent et lisent les livres archivés, parce qu’ils sont intéressés par le domaine et ont tendance à être polygottes. Nous avons recueilli 1400 contributions lexicales en 4 mois. La seconde expérience est basée sur le JeuxDeMots arabe, où les joueurs en ligne contribuent à un réseau lexical arabe. L’expérience a entraîné une croissance régulière du nombre de joueurs et de contributions, ces dernières contenant des termes absents et des mots de dialectes oraux.</abstract>
       <url hash="e6688b30">2011.jeptalnrecital-court.39</url>
@@ -927,7 +927,7 @@
     <paper id="42">
       <title>La traduction automatique des séquences clitiques dans un traducteur à base de règles (Automatic translation clitic sequences in a rule-based <fixed-case>MT</fixed-case> system)</title>
       <author><first>Lorenza</first><last>Russo</last></author>
-      <author><first>Éric</first><last>Wehrli</last></author>
+      <author id="eric-wehrli"><first>Éric</first><last>Wehrli</last></author>
       <pages>248–253</pages>
       <abstract>Dans cet article, nous discutons la méthodologie utilisée par Its-2, un système de traduction à base de règles, pour la traduction des pronoms clitiques. En particulier, nous nous focalisons sur les séquences clitiques, pour la traduction automatique entre le français et l’anglais. Une évaluation basée sur un corpus de phrases construites montre le potentiel de notre approche pour des traductions de bonne qualité.</abstract>
       <url hash="f5b81e2d">2011.jeptalnrecital-court.42</url>
@@ -941,7 +941,7 @@
       <author><first>Jean-Philippe</first><last>Goldman</last></author>
       <author><first>Sharid</first><last>Loáiciga</last></author>
       <author><first>Luka</first><last>Nerima</last></author>
-      <author><first>Éric</first><last>Wehrli</last></author>
+      <author id="eric-wehrli"><first>Éric</first><last>Wehrli</last></author>
       <pages>254–259</pages>
       <abstract>Ce travail décrit la distribution des pronoms selon le style de texte (littéraire ou journalistique) et selon la langue (français, anglais, allemand et italien). Sur la base d’un étiquetage morpho-syntaxique effectué automatiquement puis vérifié manuellement, nous pouvons constater que la proportion des différents types de pronoms varie selon le type de texte et selon la langue. Nous discutons les catégories les plus ambiguës de manière détaillée. Comme nous avons utilisé l’analyseur syntaxique Fips pour l’étiquetage des pronoms, nous l’avons également évalué et obtenu une précision moyenne de plus de 95%.</abstract>
       <url hash="beef3092">2011.jeptalnrecital-court.43</url>
@@ -955,7 +955,7 @@
       <author><first>Jean-Philippe</first><last>Goldman</last></author>
       <author><first>Sharid</first><last>Loáiciga</last></author>
       <author><first>Luka</first><last>Nerima</last></author>
-      <author><first>Éric</first><last>Wehrli</last></author>
+      <author id="eric-wehrli"><first>Éric</first><last>Wehrli</last></author>
       <pages>260–265</pages>
       <abstract>Dans cette étude, notre système de traduction automatique, Its-2, a fait l’objet d’une évaluation manuelle de la traduction des pronoms pour cinq paires de langues et sur deux corpus : un corpus littéraire et un corpus de communiqués de presse. Les résultats montrent que les pourcentages d’erreurs peuvent atteindre 60% selon la paire de langues et le corpus. Nous discutons ainsi deux pistes de recherche pour l’amélioration des performances de Its-2 : la résolution des ambiguïtés d’analyse et la résolution des anaphores pronominales.</abstract>
       <url hash="13c56c27">2011.jeptalnrecital-court.44</url>
@@ -985,9 +985,9 @@
       <author><first>Caroline</first><last>Hagège</last></author>
       <author><first>Denys</first><last>Proux</last></author>
       <author><first>Quentin</first><last>Gicquel</last></author>
-      <author><first>Stéfan</first><last>Darmoni</last></author>
+      <author id="stefan-darmoni"><first>Stéfan</first><last>Darmoni</last></author>
       <author><first>Suzanne</first><last>Pereira</last></author>
-      <author><first>Frédérique</first><last>Segond</last></author>
+      <author id="frederique-segond"><first>Frédérique</first><last>Segond</last></author>
       <author><first>Marie-Helène</first><last>Metzger</last></author>
       <pages>278–283</pages>
       <abstract>Cet article décrit la première version et les résultats de l’évaluation d’un système de détection des épisodes d’infections associées aux soins. Cette détection est basée sur l’analyse automatique de comptes-rendus d’hospitalisation provenant de différents hôpitaux et différents services. Ces comptes-rendus sont sous forme de texte libre. Le système de détection a été développé à partir d’un analyseur linguistique que nous avons adapté au domaine médical et extrait à partir des documents des indices pouvant conduire à une suspicion d’infection. Un traitement de la négation et un traitement temporel des textes sont effectués permettant de restreindre et de raffiner l’extraction d’indices. Nous décrivons dans cet article le système que nous avons développé et donnons les résultats d’une évaluation préliminaire.</abstract>
@@ -1023,10 +1023,10 @@
     </paper>
     <paper id="2">
       <title><fixed-case>S</fixed-case>pati<fixed-case>A</fixed-case>nn, un outil pour annoter l’utilisation de l’espace dans les corpus vidéo (<fixed-case>S</fixed-case>pati<fixed-case>A</fixed-case>nn, a tool for annotating the use of space in video corpora)</title>
-      <author><first>Annelies</first><last>Braffort</last></author>
+      <author id="annelies-braffort"><first>Annelies</first><last>Braffort</last></author>
       <author><first>Laurence</first><last>Bolot</last></author>
       <pages>2–2</pages>
-      <abstract/>
+      <abstract></abstract>
       <url hash="9430a7a3">2011.jeptalnrecital-demonstration.2</url>
       <language>fra</language>
       <bibkey>braffort-bolot-2011-spatiann</bibkey>
@@ -1044,7 +1044,7 @@
     </paper>
     <paper id="4">
       <title>Une application de la grammaire structurelle: L’analyseur syntaxique du français <fixed-case>SYGFRAN</fixed-case> (An application of structural grammar: the <fixed-case>SYGFRAN</fixed-case> syntactic analyser)</title>
-      <author><first>Jacques</first><last>Chauché</last></author>
+      <author id="jacques-chauche"><first>Jacques</first><last>Chauché</last></author>
       <pages>4–4</pages>
       <abstract>La démonstration présentée produit une analyse syntaxique du français. Elle est écrite en SYGMART, fournie avec les actes, exécutable à l’adresse : http ://www.lirmm.fr/ chauche/ExempleAnl.html et téléchargeable à l’adresse : http ://www.sygtext.fr.</abstract>
       <url hash="aa7ebbf7">2011.jeptalnrecital-demonstration.4</url>
@@ -1062,26 +1062,26 @@
     </paper>
     <paper id="6">
       <title><fixed-case>TTC</fixed-case> <fixed-case>T</fixed-case>erm<fixed-case>S</fixed-case>uite : une chaîne de traitement pour la fouille terminologique multilingue (<fixed-case>TTC</fixed-case> <fixed-case>T</fixed-case>erm<fixed-case>S</fixed-case>uite: a processing chain for multilingual terminology mining)</title>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <author><first>Christine</first><last>Jacquin</last></author>
       <author><first>Laura</first><last>Monceaux</last></author>
       <author><first>Emmanuel</first><last>Morin</last></author>
       <author><first>Jérome</first><last>Rocheteau</last></author>
       <pages>6–6</pages>
-      <abstract/>
+      <abstract></abstract>
       <url hash="aa31fcbc">2011.jeptalnrecital-demonstration.6</url>
       <language>fra</language>
       <bibkey>daille-etal-2011-ttc</bibkey>
     </paper>
     <paper id="7">
       <title>Une Suite d’interaction de fouille basée sur la compréhension du langage naturel (An Interaction Mining Suite Based On Natural Language Understanding)</title>
-      <author><first>Rodolfo</first><last>Delmonte</last></author>
+      <author id="rodolfo-delmonte"><first>Rodolfo</first><last>Delmonte</last></author>
       <author><first>Vincenzo</first><last>Pallotta</last></author>
       <author><first>Violeta</first><last>Seretan</last></author>
       <author><first>Lammert</first><last>Vrieling</last></author>
       <author><first>David</first><last>Walker</last></author>
       <pages>7–7</pages>
-      <abstract/>
+      <abstract></abstract>
       <url hash="1c9aaede">2011.jeptalnrecital-demonstration.7</url>
       <language>fra</language>
       <bibkey>delmonte-etal-2011-une</bibkey>
@@ -1089,9 +1089,9 @@
     <paper id="8">
       <title>Démonstration de l’<fixed-case>API</fixed-case> de <fixed-case>NLG</fixed-case>b<fixed-case>A</fixed-case>se (Demonstration of the <fixed-case>NLG</fixed-case>b<fixed-case>A</fixed-case>se <fixed-case>API</fixed-case>)</title>
       <author><first>François-Xavier</first><last>Desmarais</last></author>
-      <author><first>Éric</first><last>Charton</last></author>
+      <author id="eric-charton"><first>Éric</first><last>Charton</last></author>
       <pages>8–8</pages>
-      <abstract/>
+      <abstract></abstract>
       <url hash="690955ad">2011.jeptalnrecital-demonstration.8</url>
       <language>fra</language>
       <bibkey>desmarais-charton-2011-demonstration</bibkey>
@@ -1108,21 +1108,21 @@
     <paper id="10">
       <title><fixed-case>R</fixed-case>ef<fixed-case>G</fixed-case>en, outil d’identification automatique des chaînes de référence en français (<fixed-case>R</fixed-case>ef<fixed-case>G</fixed-case>en, an automatic identification tool of reference chains in <fixed-case>F</fixed-case>rench)</title>
       <author><first>Laurence</first><last>Longo</last></author>
-      <author><first>Amalia</first><last>Todirascu</last></author>
+      <author id="amalia-todirascu"><first>Amalia</first><last>Todirascu</last></author>
       <pages>10–10</pages>
-      <abstract/>
+      <abstract></abstract>
       <url hash="c9daf65b">2011.jeptalnrecital-demonstration.10</url>
       <language>fra</language>
       <bibkey>longo-todirascu-2011-refgen</bibkey>
     </paper>
     <paper id="11">
       <title>Babouk – exploration orientée du web pour la constitution de corpus et de terminologies (Babouk – oriented exploration of the web for the construction of corpora and terminologies)</title>
-      <author><first>Clément</first><last>de Groc</last></author>
+      <author id="clement-de-groc"><first>Clément</first><last>de Groc</last></author>
       <author><first>Javier</first><last>Couto</last></author>
       <author><first>Helena</first><last>Blancafort</last></author>
-      <author><first>Claude</first><last>de Loupy</last></author>
+      <author id="claude-de-loupy"><first>Claude</first><last>de Loupy</last></author>
       <pages>11–11</pages>
-      <abstract/>
+      <abstract></abstract>
       <url hash="a5d0ece2">2011.jeptalnrecital-demonstration.11</url>
       <language>fra</language>
       <bibkey>de-groc-etal-2011-babouk</bibkey>
@@ -1137,10 +1137,10 @@
       <author><first>Delphine</first><last>Bernhard</last></author>
       <author><first>Bruno</first><last>Cartoni</last></author>
       <author><first>Brigitte</first><last>Grau</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <pages>12–12</pages>
-      <abstract/>
+      <abstract></abstract>
       <url hash="3959ab46">2011.jeptalnrecital-demonstration.12</url>
       <language>fra</language>
       <bibkey>grouin-etal-2011-extraction</bibkey>
@@ -1148,9 +1148,9 @@
     <paper id="13">
       <title>Système d’analyse catégorielle <fixed-case>ACCG</fixed-case> : adéquation au traitement de problèmes syntaxiques complexes (<fixed-case>ACCG</fixed-case> categorical analysis system: adequacy to the treatment of complex syntactic problems)</title>
       <author><first>Juyeon</first><last>Kang</last></author>
-      <author><first>Jean-Pierre</first><last>Desclés</last></author>
+      <author id="jean-pierre-descles"><first>Jean-Pierre</first><last>Desclés</last></author>
       <pages>13–13</pages>
-      <abstract/>
+      <abstract></abstract>
       <url hash="93dc0939">2011.jeptalnrecital-demonstration.13</url>
       <language>fra</language>
       <bibkey>kang-descles-2011-systeme</bibkey>
@@ -1161,9 +1161,9 @@
       <author><first>Mickaël</first><last>Mounier</last></author>
       <author><first>Helena</first><last>Blancafort</last></author>
       <author><first>Javier</first><last>Couto</last></author>
-      <author><first>Claude</first><last>de Loupy</last></author>
+      <author id="claude-de-loupy"><first>Claude</first><last>de Loupy</last></author>
       <pages>14–14</pages>
-      <abstract/>
+      <abstract></abstract>
       <url hash="9a09b0c6">2011.jeptalnrecital-demonstration.14</url>
       <language>fra</language>
       <bibkey>ma-etal-2011-lol</bibkey>
@@ -1190,11 +1190,11 @@
     </paper>
     <paper id="17">
       <title><fixed-case>EASYTEXT</fixed-case> : un système opérationnel de génération de textes (<fixed-case>EASYTEXT</fixed-case>: an operational system for text generation)</title>
-      <author><first>Frédéric</first><last>Meunier</last></author>
+      <author id="frederic-meunier"><first>Frédéric</first><last>Meunier</last></author>
       <author><first>Laurence</first><last>Danlos</last></author>
       <author><first>Vanessa</first><last>Combet</last></author>
       <pages>17–17</pages>
-      <abstract/>
+      <abstract></abstract>
       <url hash="6ef6be8d">2011.jeptalnrecital-demonstration.17</url>
       <language>fra</language>
       <bibkey>meunier-etal-2011-easytext</bibkey>
@@ -1203,9 +1203,9 @@
       <title>Restad : un logiciel d’indexation et de stockage relationnel de contenus <fixed-case>XML</fixed-case> (Restad: an indexing and relational storing software for <fixed-case>XML</fixed-case> content)</title>
       <author><first>Yoann</first><last>Moreau</last></author>
       <author><first>Eric</first><last>SanJuan</last></author>
-      <author><first>Patrice</first><last>Bellot</last></author>
+      <author id="patrice-bellot"><first>Patrice</first><last>Bellot</last></author>
       <pages>18–18</pages>
-      <abstract/>
+      <abstract></abstract>
       <url hash="22d9fa6b">2011.jeptalnrecital-demonstration.18</url>
       <language>fra</language>
       <bibkey>moreau-etal-2011-restad</bibkey>
@@ -1256,7 +1256,7 @@
       <title>Extraction Automatique d’Informations Pédagogiques Pertinentes à partir de Documents Textuels</title>
       <author><first>Boutheina</first><last>Smine</last></author>
       <author><first>Rim</first><last>Faiz</last></author>
-      <author><first>Jean-Pierre</first><last>Desclés</last></author>
+      <author id="jean-pierre-descles"><first>Jean-Pierre</first><last>Desclés</last></author>
       <pages>12–23</pages>
       <abstract>Plusieurs utilisateurs ont souvent besoin d’informations pédagogiques pour les intégrer dans leurs ressources pédagogiques, ou pour les utiliser dans un processus d’apprentissage. Une indexation de ces informations s’avère donc utile en vue d’une extraction des informations pédagogiques pertinentes en réponse à une requête utilisateur. La plupart des systèmes d’extraction d’informations pédagogiques existants proposent une indexation basée sur une annotation manuelle ou semi-automatique des informations pédagogiques, tâche qui n’est pas préférée par les utilisateurs. Dans cet article, nous proposons une approche d’indexation d’objets pédagogiques (Définition, Exemple, Exercice, etc.) basée sur une annotation sémantique par Exploration Contextuelle des documents. L’index généré servira à une extraction des objets pertinents répondant à une requête utilisateur sémantique. Nous procédons, ensuite, à un classement des objets extraits selon leur pertinence en utilisant l’algorithme Rocchio. Notre objectif est de mettre en valeur une indexation à partir de contextes sémantiques et non pas à partir de seuls termes linguistiques.</abstract>
       <url hash="c3a17197">2011.jeptalnrecital-recital.2</url>
@@ -1283,7 +1283,7 @@
     </paper>
     <paper id="5">
       <title>Alignment of Monolingual Corpus by Reduction of the Search Space</title>
-      <author><first>Prajol</first><last>Shrestha</last></author>
+      <author id="prajwol-shrestha"><first>Prajol</first><last>Shrestha</last></author>
       <pages>48–56</pages>
       <abstract>Monolingual comparable corpora annotated with alignments between text segments (paragraphs, sentences, etc.) based on similarity are used in a wide range of natural language processing applications like plagiarism detection, information retrieval, summarization and so on. The drawback wanting to use them is that there aren’t many standard corpora which are aligned. Due to this drawback, the corpus is manually created, which is a time consuming and costly task. In this paper, we propose a method to significantly reduce the search space for manual alignment of the monolingual comparable corpus which in turn makes the alignment process faster and easier. This method can be used in making alignments on different levels of text segments. Using this method we create our own gold corpus aligned on the level of paragraph, which will be used for testing and building our algorithms for automatic alignment. We also present some experiments for the reduction of search space on the basis of stem overlap, word overlap, and cosine similarity measure which help us automatize the process to some extent and reduce human effort for alignment.</abstract>
       <url hash="d4f5ffc9">2011.jeptalnrecital-recital.5</url>
@@ -1306,7 +1306,7 @@
     </frontmatter>
     <paper id="1">
       <title>Corpus-Based methods for Short Text Similarity</title>
-      <author><first>Prajol</first><last>Shrestha</last></author>
+      <author id="prajwol-shrestha"><first>Prajol</first><last>Shrestha</last></author>
       <pages>1–6</pages>
       <abstract>This paper presents corpus-based methods to find similarity between short text (sentences, paragraphs, ...) which has many applications in the field of NLP. Previous works on this problem have been based on supervised methods or have used external resources such as WordNet, British National Corpus etc. Our methods are focused on unsupervised corpus-based methods. We present a new method, based on Vector Space Model, to capture the contextual behavior, senses and correlation, of terms and show that this method performs better than the baseline method that uses vector based cosine similarity measure. The performance of existing document similarity measures, Dice and Resemblance, are also evaluated which in our knowledge have not been used for short text similarity. We also show that the performance of the vector-based baseline method is improved when using stems instead of words and using the candidate sentences for computing the parameters rather than some external resource.</abstract>
       <url hash="b0594767">2011.jeptalnrecital-recitalcourt.1</url>
diff --git a/data/xml/2011.mtsummit.xml b/data/xml/2011.mtsummit.xml
index 4542a9f32d..dce8f78ccf 100644
--- a/data/xml/2011.mtsummit.xml
+++ b/data/xml/2011.mtsummit.xml
@@ -25,7 +25,7 @@
     </paper>
     <paper id="3">
       <title>Challenges of Patent <fixed-case>MT</fixed-case> – Term and Structure Translation</title>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <bibkey>tsujii-2011-challenges</bibkey>
     </paper>
     <paper id="4">
@@ -59,7 +59,7 @@
     <paper id="2">
       <title>Training Machine Translation with a Second-Order <fixed-case>T</fixed-case>aylor Approximation of Weighted Translation Instances</title>
       <author><first>Aaron</first><last>Phillips</last></author>
-      <author><first>Ralf</first><last>Brown</last></author>
+      <author id="ralf-d-brown"><first>Ralf</first><last>Brown</last></author>
       <url hash="b4c28fb4">2011.mtsummit-papers.2</url>
       <bibkey>phillips-brown-2011-training</bibkey>
     </paper>
@@ -75,7 +75,7 @@
     <paper id="4">
       <title><fixed-case>POS</fixed-case> Tagging of <fixed-case>E</fixed-case>nglish Particles for Machine Translation</title>
       <author><first>Jianjun</first><last>Ma</last></author>
-      <author><first>Degen</first><last>Huang</last></author>
+      <author id="degen-huang"><first>Degen</first><last>Huang</last></author>
       <author><first>Haixia</first><last>Liu</last></author>
       <author><first>Wenfeng</first><last>Sheng</last></author>
       <url hash="e81665e9">2011.mtsummit-papers.4</url>
@@ -85,8 +85,8 @@
       <title>Multi-stage <fixed-case>C</fixed-case>hinese Dependency Parsing Based on Dependency Direction</title>
       <author><first>Wenjing</first><last>Lang</last></author>
       <author><first>Qiaoli</first><last>Zhou</last></author>
-      <author><first>Guiping</first><last>Zhang</last></author>
-      <author><first>Dongfeng</first><last>Cai</last></author>
+      <author id="guiping-zhang"><first>Guiping</first><last>Zhang</last></author>
+      <author id="dongfeng-cai"><first>Dongfeng</first><last>Cai</last></author>
       <url hash="3a6d5f50">2011.mtsummit-papers.5</url>
       <bibkey>lang-etal-2011-multi</bibkey>
     </paper>
@@ -102,7 +102,7 @@
       <title>Phonetic Representation-Based Speech Translation</title>
       <author><first>Jie</first><last>Jiang</last></author>
       <author><first>Zeeshan</first><last>Ahmed</last></author>
-      <author><first>Julie</first><last>Carson-Berndsen</last></author>
+      <author id="julie-carson-berndsen"><first>Julie</first><last>Carson-Berndsen</last></author>
       <author><first>Peter</first><last>Cahill</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <url hash="979d259e">2011.mtsummit-papers.7</url>
@@ -112,7 +112,7 @@
       <title>Unsupervised Vocabulary Selection for Domain-Independent Simultaneous Lecture Translation</title>
       <author><first>Paul</first><last>Maergner</last></author>
       <author><first>Ian</first><last>Lane</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <url hash="faf63ae1">2011.mtsummit-papers.8</url>
       <bibkey>maergner-etal-2011-unsupervised-domain</bibkey>
     </paper>
@@ -120,7 +120,7 @@
       <title>Context-aware Language Modeling for Conversational Speech Translation</title>
       <author><first>Avneesh</first><last>Saluja</last></author>
       <author><first>Ian</first><last>Lane</last></author>
-      <author><first>Ying</first><last>Zhang</last></author>
+      <author id="ying-zhang"><first>Ying</first><last>Zhang</last></author>
       <url hash="a204770b">2011.mtsummit-papers.9</url>
       <bibkey>saluja-etal-2011-context</bibkey>
     </paper>
@@ -129,7 +129,7 @@
       <author><first>Qin</first><last>Gao</last></author>
       <author><first>Will</first><last>Lewis</last></author>
       <author><first>Chris</first><last>Quirk</last></author>
-      <author><first>Mei-Yuh</first><last>Hwang</last></author>
+      <author id="mei-yuh-hwang"><first>Mei-Yuh</first><last>Hwang</last></author>
       <url hash="36f74a1a">2011.mtsummit-papers.10</url>
       <bibkey>gao-etal-2011-incremental</bibkey>
     </paper>
@@ -145,8 +145,8 @@
     <paper id="12">
       <title>Multi-Strategy Approaches to Active Learning for Statistical Machine Translation</title>
       <author><first>Vamshi</first><last>Ambati</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
       <url hash="f3743dc8">2011.mtsummit-papers.12</url>
       <bibkey>ambati-etal-2011-multi</bibkey>
     </paper>
@@ -170,8 +170,8 @@
     </paper>
     <paper id="15">
       <title>Multimodal Building of Monolingual Dictionaries for Machine Translation by Non-Expert Users</title>
-      <author><first>Miquel</first><last>Esplà-Gomis</last></author>
-      <author><first>Víctor M.</first><last>Sánchez-Cartagena</last></author>
+      <author id="miquel-espla-gomis"><first>Miquel</first><last>Esplà-Gomis</last></author>
+      <author id="victor-m-sanchez-cartagena"><first>Víctor M.</first><last>Sánchez-Cartagena</last></author>
       <author><first>Juan Antonio</first><last>Pérez-Ortiz</last></author>
       <url hash="a9301ac9">2011.mtsummit-papers.15</url>
       <bibkey>espla-gomis-etal-2011-multimodal</bibkey>
@@ -184,7 +184,7 @@
     </paper>
     <paper id="17">
       <title>Qualitative Analysis of Post-Editing for High Quality Machine Translation</title>
-      <author><first>Frédéric</first><last>Blain</last></author>
+      <author id="frederic-blain"><first>Frédéric</first><last>Blain</last></author>
       <author><first>Jean</first><last>Senellart</last></author>
       <author><first>Holger</first><last>Schwenk</last></author>
       <author><first>Mirko</first><last>Plitt</last></author>
@@ -194,9 +194,9 @@
     </paper>
     <paper id="18">
       <title>Using machine translation in computer-aided translation to suggest the target-side words to change</title>
-      <author><first>Miquel</first><last>Esplà-Gomis</last></author>
+      <author id="miquel-espla-gomis"><first>Miquel</first><last>Esplà-Gomis</last></author>
       <author><first>Felipe</first><last>Sánchez-Martínez</last></author>
-      <author><first>Mikel L.</first><last>Forcada</last></author>
+      <author id="mikel-l-forcada"><first>Mikel L.</first><last>Forcada</last></author>
       <url hash="cd3f7dc9">2011.mtsummit-papers.18</url>
       <bibkey>espla-gomis-etal-2011-using</bibkey>
     </paper>
@@ -220,9 +220,9 @@
     <paper id="21">
       <title>Phrase Segmentation Model using Collocation and Translational Entropy</title>
       <author><first>Hyoung-Gyu</first><last>Lee</last></author>
-      <author><first>Joo-Young</first><last>Lee</last></author>
+      <author id="joo-young-lee"><first>Joo-Young</first><last>Lee</last></author>
       <author><first>Min-Jeong</first><last>Kim</last></author>
-      <author><first>Hae-Chang</first><last>Rim</last></author>
+      <author id="hae-chang-rim"><first>Hae-Chang</first><last>Rim</last></author>
       <author><first>Joong-Hwi</first><last>Shin</last></author>
       <author><first>Young-Sook</first><last>Hwang</last></author>
       <url hash="80a61aca">2011.mtsummit-papers.21</url>
@@ -239,7 +239,7 @@
       <title>Handling Multiword Expressions in Phrase-Based Statistical Machine Translation</title>
       <author><first>Santanu</first><last>Pal</last></author>
       <author><first>Tanmoy</first><last>Chakraborty</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <url hash="039343da">2011.mtsummit-papers.23</url>
       <bibkey>pal-etal-2011-handling</bibkey>
     </paper>
@@ -274,7 +274,7 @@
     <paper id="28">
       <title>A Unified and Discriminative Soft Syntactic Constraint Model for Hierarchical Phrase-based Translation</title>
       <author><first>Lemao</first><last>Liu</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <author><first>Chao</first><last>Wang</last></author>
       <author><first>Hailong</first><last>Cao</last></author>
       <url hash="30e6ce9d">2011.mtsummit-papers.28</url>
@@ -285,7 +285,7 @@
       <author><first>Feifei</first><last>Zhai</last></author>
       <author><first>Jiajun</first><last>Zhang</last></author>
       <author><first>Yu</first><last>Zhou</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <url hash="1d61e4db">2011.mtsummit-papers.29</url>
       <bibkey>zhai-etal-2011-simple</bibkey>
     </paper>
@@ -311,10 +311,10 @@
     <paper id="32">
       <title>Domain Adaptation in Statistical Machine Translation of User-Forum Data using Component Level Mixture Modelling</title>
       <author><first>Pratyush</first><last>Banerjee</last></author>
-      <author><first>Sudip Kumar</first><last>Naskar</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last></author>
       <author><first>Johann</first><last>Roturier</last></author>
       <author><first>Andy</first><last>Way</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <url hash="93cecc6b">2011.mtsummit-papers.32</url>
       <bibkey>banerjee-etal-2011-domain</bibkey>
     </paper>
@@ -322,7 +322,7 @@
       <title>Bagging-based System Combination for Domain Adaption</title>
       <author><first>Linfeng</first><last>Song</last></author>
       <author><first>Haitao</first><last>Mi</last></author>
-      <author><first>Yajuan</first><last>Lü</last></author>
+      <author id="yajuan-lu"><first>Yajuan</first><last>Lü</last></author>
       <author><first>Qun</first><last>Liu</last></author>
       <url hash="5585cabc">2011.mtsummit-papers.33</url>
       <bibkey>song-etal-2011-bagging</bibkey>
@@ -339,9 +339,9 @@
     </paper>
     <paper id="35">
       <title>Statistical Post-Editing for a Statistical <fixed-case>MT</fixed-case> System</title>
-      <author><first>Hanna</first><last>Bechara</last></author>
+      <author id="hannah-bechara"><first>Hanna</first><last>Bechara</last></author>
       <author><first>Yanjun</first><last>Ma</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <url hash="db4542a2">2011.mtsummit-papers.35</url>
       <bibkey>bechara-etal-2011-statistical</bibkey>
     </paper>
@@ -359,8 +359,8 @@
       <title>Searching Translation Memories for Paraphrases</title>
       <author><first>Masao</first><last>Utiyama</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
-      <author><first>Takashi</first><last>Onishi</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="takashi-onishi"><first>Takashi</first><last>Onishi</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <url hash="a61932e8">2011.mtsummit-papers.37</url>
       <bibkey>utiyama-etal-2011-searching</bibkey>
     </paper>
@@ -383,7 +383,7 @@
       <author><first>Masamichi</first><last>Ideue</last></author>
       <author><first>Kazuhide</first><last>Yamamoto</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <url hash="a75513d6">2011.mtsummit-papers.40</url>
       <bibkey>ideue-etal-2011-comparison</bibkey>
     </paper>
@@ -391,14 +391,14 @@
       <title>Improving Low-Resource Statistical Machine Translation with a Novel Semantic Word Clustering Algorithm</title>
       <author><first>Jeff</first><last>Ma</last></author>
       <author><first>Spyros</first><last>Matsoukas</last></author>
-      <author><first>Richard</first><last>Schwartz</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
       <url hash="94223a40">2011.mtsummit-papers.41</url>
       <bibkey>ma-etal-2011-improving</bibkey>
     </paper>
     <paper id="42">
       <title>Multi-granularity Word Alignment and Decoding for Agglutinative Language Translation</title>
       <author><first>Zhiyang</first><last>Wang</last></author>
-      <author><first>Yajuan</first><last>Lü</last></author>
+      <author id="yajuan-lu"><first>Yajuan</first><last>Lü</last></author>
       <author><first>Qun</first><last>Liu</last></author>
       <url hash="e661ba61">2011.mtsummit-papers.42</url>
       <bibkey>wang-etal-2011-multi</bibkey>
@@ -429,7 +429,7 @@
     <paper id="46">
       <title>Generating Virtual Parallel Corpus: A Compatibility Centric Method</title>
       <author><first>Jia</first><last>Xu</last></author>
-      <author><first>Weiwei</first><last>Sun</last></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last></author>
       <url hash="09f1e154">2011.mtsummit-papers.46</url>
       <bibkey>xu-sun-2011-generating</bibkey>
     </paper>
@@ -470,8 +470,8 @@
       <title>A Comparison Study of Parsers for Patent Machine Translation</title>
       <author><first>Isao</first><last>Goto</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Takashi</first><last>Onishi</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="takashi-onishi"><first>Takashi</first><last>Onishi</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <url hash="0e32b57f">2011.mtsummit-papers.51</url>
       <bibkey>goto-etal-2011-comparison</bibkey>
     </paper>
@@ -480,7 +480,7 @@
       <author><first>Yifan</first><last>He</last></author>
       <author><first>Yanjun</first><last>Ma</last></author>
       <author><first>Andy</first><last>Way</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <url hash="69ee3cb2">2011.mtsummit-papers.52</url>
       <bibkey>he-etal-2011-rich</bibkey>
     </paper>
@@ -496,20 +496,20 @@
       <title>The Cultivation of a <fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish-<fixed-case>J</fixed-case>apanese Trilingual Parallel Corpus from Comparable Patents</title>
       <author><first>Bin</first><last>Lu</last></author>
       <author><first>Ka Po</first><last>Chow</last></author>
-      <author><first>Benjamin K.</first><last>Tsou</last></author>
+      <author id="benjamin-k-tsou"><first>Benjamin K.</first><last>Tsou</last></author>
       <url hash="5eccb63d">2011.mtsummit-papers.54</url>
       <bibkey>lu-etal-2011-cultivation</bibkey>
     </paper>
     <paper id="55">
       <title>Evaluation Methodology and Results for <fixed-case>E</fixed-case>nglish-to-<fixed-case>A</fixed-case>rabic <fixed-case>MT</fixed-case></title>
-      <author><first>Olivier</first><last>Hamon</last></author>
-      <author><first>Khalid</first><last>Choukri</last></author>
+      <author id="olivier-hamon"><first>Olivier</first><last>Hamon</last></author>
+      <author id="khalid-choukri"><first>Khalid</first><last>Choukri</last></author>
       <url hash="04c9bdb6">2011.mtsummit-papers.55</url>
       <bibkey>hamon-choukri-2011-evaluation</bibkey>
     </paper>
     <paper id="56">
       <title>Example-Based Machine Translation for Low-Resource Language Using Chunk-String Templates</title>
-      <author><first>Md. Anwarus Salam</first><last>Khan</last></author>
+      <author id="md-anwarus-salam-khan"><first>Md. Anwarus Salam</first><last>Khan</last></author>
       <author><first>Setsuo</first><last>Yamada</last></author>
       <author><first>Tetsuro</first><last>Nishino</last></author>
       <url hash="bd1a1c87">2011.mtsummit-papers.56</url>
@@ -517,8 +517,8 @@
     </paper>
     <paper id="57">
       <title>Improve <fixed-case>SMT</fixed-case> with Source-Side “Topic-Document” Distributions</title>
-      <author><first>Zhengxian</first><last>Gong</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="zhengxian-gong"><first>Zhengxian</first><last>Gong</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <author><first>Liangyou</first><last>Li</last></author>
       <url hash="98d194ec">2011.mtsummit-papers.57</url>
       <bibkey>gong-etal-2011-improve</bibkey>
@@ -537,13 +537,13 @@
       <author><first>Luisa</first><last>Bentivogli</last></author>
       <author><first>Marcello</first><last>Federico</last></author>
       <author><first>Giovanni</first><last>Moretti</last></author>
-      <author><first>Michael</first><last>Paul</last></author>
+      <author id="michael-paul"><first>Michael</first><last>Paul</last></author>
       <url hash="b23c2982">2011.mtsummit-papers.59</url>
       <bibkey>bentivogli-etal-2011-getting</bibkey>
     </paper>
     <paper id="60">
       <title>A Framework for Diagnostic Evaluation of <fixed-case>MT</fixed-case> Based on Linguistic Checkpoints</title>
-      <author><first>Sudip Kumar</first><last>Naskar</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last></author>
       <author><first>Antonio</first><last>Toral</last></author>
       <author><first>Federico</first><last>Gaspari</last></author>
       <author><first>Andy</first><last>Way</last></author>
@@ -553,13 +553,13 @@
     <paper id="61">
       <title>Comparative Evaluation of Term Informativeness Measures in Machine Translation Evaluation Metrics</title>
       <author><first>Billy</first><last>Wong</last></author>
-      <author><first>Chunyu</first><last>Kit</last></author>
+      <author id="chunyu-kit"><first>Chunyu</first><last>Kit</last></author>
       <url hash="d5222b07">2011.mtsummit-papers.61</url>
       <bibkey>wong-kit-2011-comparative</bibkey>
     </paper>
     <paper id="62">
       <title>System Combination for Machine Translation Based on Text-to-Text Generation</title>
-      <author><first>Wei-Yun</first><last>Ma</last></author>
+      <author id="wei-yun-ma"><first>Wei-Yun</first><last>Ma</last></author>
       <author><first>Kathleen</first><last>Mckeown</last></author>
       <url hash="166b5632">2011.mtsummit-papers.62</url>
       <bibkey>ma-mckeown-2011-system</bibkey>
@@ -567,15 +567,15 @@
     <paper id="63">
       <title>Hybrid Machine Translation Guided by a Rule–Based System</title>
       <author><first>Cristina</first><last>España-Bonet</last></author>
-      <author><first>Gorka</first><last>Labaka</last></author>
-      <author><first>Arantza</first><last>Díaz de Ilarraza</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="gorka-labaka"><first>Gorka</first><last>Labaka</last></author>
+      <author id="arantza-diaz-de-ilarraza"><first>Arantza</first><last>Díaz de Ilarraza</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <url hash="454bfac8">2011.mtsummit-papers.63</url>
       <bibkey>espana-bonet-etal-2011-hybrid</bibkey>
     </paper>
     <paper id="64">
       <title>Integrating shallow-transfer rules into phrase-based statistical machine translation</title>
-      <author><first>Víctor M.</first><last>Sánchez-Cartagena</last></author>
+      <author id="victor-m-sanchez-cartagena"><first>Víctor M.</first><last>Sánchez-Cartagena</last></author>
       <author><first>Felipe</first><last>Sánchez-Martínez</last></author>
       <author><first>Juan Antonio</first><last>Pérez-Ortiz</last></author>
       <url hash="f701043e">2011.mtsummit-papers.64</url>
@@ -584,7 +584,7 @@
     <paper id="65">
       <title>Hypergraph Training and Decoding of System Combination in <fixed-case>SMT</fixed-case></title>
       <author><first>Yupeng</first><last>Liu</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <author><first>Sheng</first><last>Li</last></author>
       <url hash="92c0f029">2011.mtsummit-papers.65</url>
       <bibkey>liu-etal-2011-hypergraph</bibkey>
@@ -592,7 +592,7 @@
     <paper id="66">
       <title>Study on the Impact Factors of the Translators’ Post-editing Efficiency in a Collaborative Translation Environment</title>
       <author><first>Na</first><last>Ye</last></author>
-      <author><first>Guiping</first><last>Zhang</last></author>
+      <author id="guiping-zhang"><first>Guiping</first><last>Zhang</last></author>
       <url hash="11d1ee53">2011.mtsummit-papers.66</url>
       <bibkey>ye-zhang-2011-study</bibkey>
     </paper>
@@ -643,7 +643,7 @@
       <title>Broadcast news speech-to-text translation experiments</title>
       <author><first>Sylvain</first><last>Raybaud</last></author>
       <author><first>David</first><last>Langlois</last></author>
-      <author><first>Kamel</first><last>Smaïli</last></author>
+      <author id="kamel-smaili"><first>Kamel</first><last>Smaïli</last></author>
       <url hash="f81a59f3">2011.mtsummit-systems.3</url>
       <bibkey>raybaud-etal-2011-broadcast</bibkey>
     </paper>
@@ -658,9 +658,9 @@
     </paper>
     <paper id="5">
       <title><fixed-case>L</fixed-case>ets<fixed-case>MT</fixed-case>!: Cloud-Based Platform for Building User Tailored Machine Translation Engines</title>
-      <author><first>Andrejs</first><last>Vasiljevs</last></author>
+      <author id="andrejs-vasiljevs"><first>Andrejs</first><last>Vasiljevs</last></author>
       <author><first>Raivis</first><last>Skadinš</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <url hash="cb68b11c">2011.mtsummit-systems.5</url>
       <bibkey>vasiljevs-etal-2011-letsmt</bibkey>
     </paper>
@@ -684,14 +684,14 @@
       <title>From the Confidence Estimation of Machine Translation to the Integration of <fixed-case>MT</fixed-case> and Translation Memory</title>
       <author><first>Yanjun</first><last>Ma</last></author>
       <author><first>Yifan</first><last>He</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <url hash="2318c8be">2011.mtsummit-tutorials.2</url>
       <abstract>In this tutorial, we cover techniques that facilitate the integration of Machine Translation (MT) and Translation Memory (TM), which can help the adoption of MT technology in localisation industry. The tutorial covers four parts: i) brief introduction of MT and TM systems, ii) MT confidence estimation measures tailored for the TM environment, iii) segment-level MT and MT integration, iv) sub-segment level MT and TM integration, and v) human evaluation of MT and TM integration. We will first briefly describe and compare how translations are generated in MT and TM systems, and suggest possible avenues to combines these two systems. We will also cover current quality / cost estimation measures applied in MT and TM systems, such as the fuzzy-match score in the TM, and the evaluation/confidence metrics used to judge MT outputs. We then move on to introduce the recent developments in the field of MT confidence estimation tailored towards predicting post-editing efforts. We will especially focus on the confidence metrics proposed by Specia et al., which is shown to have high correlation with human preference, as well as post-editing time. For segment-level MT and TM integration, we present translation recommendation and translation re-ranking models, where the integration happens at the 1-best or the N-best level, respectively. Given an input to be translated, MT-TM recommendation compares the output from the MT and the TM systems, and presents the better one to the post-editor. MT-TM re-ranking, on the other hand, combines k-best lists from both systems, and generates a new list according to estimated post-editing effort. We observe high precision of these models in automatic and human evaluations, indicating that they can be integrated into TM environments without the risk of deteriorating the quality of the post-editing candidate. For sub-segment level MT and TM integration, we try to reuse high quality TM chunks to improve the quality of MT systems. We can also predict whether phrase pairs derived from fuzzy matches should be used to constrain the translation of an input segment. Using a series of linguistically- motivated features, our constraints lead both to more consistent translation output, and to improved translation quality, as is measured by automatic evaluation scores. Finally, we present several methodologies that can be used to track post-editing effort, perform human evaluation of MT-TM integration, or help translators to access MT outputs in a TM environment.</abstract>
       <bibkey>ma-etal-2011-confidence</bibkey>
     </paper>
     <paper id="3">
       <title>Evaluating the Output of Machine Translation Systems</title>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <url hash="2318c8be">2011.mtsummit-tutorials.3</url>
       <abstract>This half-day tutorial provides a broad overview of how to evaluate translations that are produced by machine translation systems. The range of issues covered includes a broad survey of both human evaluation measures and commonly-used automated metrics, and a review of how these are used for various types of evaluation tasks, such as assessing the translation quality of MT-translated sentences, comparing the performance of alternative MT systems, or measuring the productivity gains of incorporating MT into translation workflows.</abstract>
       <bibkey>lavie-2011-evaluating</bibkey>
@@ -737,7 +737,7 @@
     <paper id="4">
       <title>Feedback Selecting of Manually Acquired Rules Using Automatic Evaluation</title>
       <author><first>Xianhua</first><last>Li</last></author>
-      <author><first>Yajuan</first><last>Lü</last></author>
+      <author id="yajuan-lu"><first>Yajuan</first><last>Lü</last></author>
       <author><first>Yao</first><last>Meng</last></author>
       <author><first>Qun</first><last>Liu</last></author>
       <author><first>Hao</first><last>Yu</last></author>
@@ -746,7 +746,7 @@
     </paper>
     <paper id="5">
       <title>Investigation for Translation Disambiguation of Verbs in Patent Sentences using Word Grouping</title>
-      <author><first>Shoichi</first><last>Yokoyama</last></author>
+      <author id="shoichi-yokoyama"><first>Shoichi</first><last>Yokoyama</last></author>
       <author><first>Yuichi</first><last>Takano</last></author>
       <url hash="6f7d91ea">2011.mtsummit-wpt.5</url>
       <bibkey>yokoyama-takano-2011-investigation</bibkey>
@@ -765,8 +765,8 @@
       <author><first>Ramona</first><last>Enache</last></author>
       <author><first>Adam</first><last>Slaski</last></author>
       <author><first>Aarne</first><last>Ranta</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
-      <author><first>Meritxell</first><last>Gonzàlez</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
+      <author id="meritxell-gonzalez"><first>Meritxell</first><last>Gonzàlez</last></author>
       <url hash="261aabde">2011.mtsummit-wpt.7</url>
       <bibkey>espana-bonet-etal-2011-patent</bibkey>
     </paper>
diff --git a/data/xml/2011.tal.xml b/data/xml/2011.tal.xml
index fa34c913a8..3fc02ebcc8 100644
--- a/data/xml/2011.tal.xml
+++ b/data/xml/2011.tal.xml
@@ -3,8 +3,8 @@
   <volume id="1" ingest-date="2023-02-23" type="journal">
     <meta>
       <booktitle>Traitement Automatique des Langues, Volume 52, Numéro 1 : Varia [Varia]</booktitle>
-      <editor><first>Éric</first><last>Villemonte de La Clergerie</last></editor>
-      <editor><first>Béatrice</first><last>Daille</last></editor>
+      <editor id="eric-villemonte-de-la-clergerie"><first>Éric</first><last>Villemonte de La Clergerie</last></editor>
+      <editor id="beatrice-daille"><first>Béatrice</first><last>Daille</last></editor>
       <editor><first>Yves</first><last>Lepage</last></editor>
       <editor><first>François</first><last>Yvon</last></editor>
       <publisher>ATALA (Association pour le Traitement Automatique des Langues)</publisher>
@@ -41,7 +41,7 @@
       <author><first>Denis</first><last>Maurel</last></author>
       <author><first>Nathalie</first><last>Friburger</last></author>
       <author><first>Jean-Yves</first><last>Antoine</last></author>
-      <author><first>Iris</first><last>Eshkol-Taravella</last></author>
+      <author id="iris-eshkol"><first>Iris</first><last>Eshkol-Taravella</last></author>
       <author><first>Damien</first><last>Nouvel</last></author>
       <pages>69–96</pages>
       <url hash="bb00dbdd">2011.tal-1.3</url>
@@ -52,7 +52,7 @@
       <title>Identification des assertions dans les textes médicaux : application à la relation patient, problème médical [Identification of assertions in the medical texts: application to the relation patient, medical problem]</title>
       <author><first>Amandine</first><last>Périnet</last></author>
       <author><first>Natalia</first><last>Grabar</last></author>
-      <author><first>Thierry</first><last>Hamon</last></author>
+      <author id="thierry-hamon"><first>Thierry</first><last>Hamon</last></author>
       <pages>97–132</pages>
       <url hash="dec4ae1f">2011.tal-1.4</url>
       <language>fra</language>
@@ -60,8 +60,8 @@
     </paper>
     <paper id="5">
       <title>Pour une interlangue utile en traduction automatique de la parole dans des domaines limités [Towards an interlingua for speech translation in limited domains]</title>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
-      <author><first>Manny</first><last>Rayner</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
       <author><first>Paula</first><last>Estella</last></author>
       <author><first>Johanna</first><last>Gerlach</last></author>
       <author><first>Maria</first><last>Georgescul</last></author>
@@ -72,8 +72,8 @@
     </paper>
     <paper id="6">
       <title>Comprendre les effets des erreurs d’annotations des plateformes de <fixed-case>TAL</fixed-case>, une étude sur la résolution des anaphores pronominales [Understand the effects of erroneous annotations produced by <fixed-case>NLP</fixed-case> pipelines, a case study on the pronominal anaphora resolution]</title>
-      <author><first>Davy</first><last>Weissenbacher</last></author>
-      <author><first>Adeline</first><last>Nazarenko</last></author>
+      <author id="davy-weissenbacher"><first>Davy</first><last>Weissenbacher</last></author>
+      <author id="adeline-nazarenko"><first>Adeline</first><last>Nazarenko</last></author>
       <pages>161–185</pages>
       <url hash="51ab3c2e">2011.tal-1.6</url>
       <language>fra</language>
@@ -84,7 +84,7 @@
     <meta>
       <booktitle>Traitement Automatique des Langues, Volume 52, Numéro 2 : Vers la morphologie et au-delà [Toward Morphology and beyond]</booktitle>
       <editor><first>Nabil</first><last>Hathout</last></editor>
-      <editor><first>Fiammetta</first><last>Namer</last></editor>
+      <editor id="fiammetta-namer"><first>Fiammetta</first><last>Namer</last></editor>
       <publisher>ATALA (Association pour le Traitement Automatique des Langues)</publisher>
       <address>France</address>
       <year>2011</year>
@@ -107,7 +107,7 @@
     <paper id="2">
       <title>Moranapho: un système multilingue d’analyse morphologique basé sur l’analogie formelle [Moranapho: a multilingual system for morphological analysis based on formal analogy]</title>
       <author><first>Jean-François</first><last>Lavallée</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <pages>17–44</pages>
       <url hash="c0840e87">2011.tal-2.2</url>
       <language>fra</language>
@@ -127,7 +127,7 @@
     <paper id="4">
       <title>Modélisation et implémentation de phénomènes flexionnels non canoniques [Modeling and implementing non canonical morphological phenomena]</title>
       <author><first>Géraldine</first><last>Walther</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <pages>91–122</pages>
       <url hash="d039fad9">2011.tal-2.4</url>
       <language>fra</language>
@@ -155,8 +155,8 @@
   <volume id="3" ingest-date="2023-02-26" type="journal">
     <meta>
       <booktitle>Traitement Automatique des Langues, Volume 52, Numéro 3 : Ressources linguistiques libres [Free Language Resources]</booktitle>
-      <editor><first>Nuria</first><last>Bel</last></editor>
-      <editor><first>Benoît</first><last>Sagot</last></editor>
+      <editor id="nuria-bel"><first>Nuria</first><last>Bel</last></editor>
+      <editor id="benoit-sagot"><first>Benoît</first><last>Sagot</last></editor>
       <publisher>ATALA (Association pour le Traitement Automatique des Langues)</publisher>
       <address>France</address>
       <year>2011</year>
@@ -200,10 +200,10 @@
     </paper>
     <paper id="4">
       <title>Le corpus <fixed-case>ANNODIS</fixed-case>, un corpus enrichi d’annotations discursives [The <fixed-case>ANNODIS</fixed-case> corpus, a corpus enriched with discourse annotations]</title>
-      <author><first>Marie-Paule</first><last>Péry-Woodley</last></author>
+      <author id="marie-paule-pery-woodley"><first>Marie-Paule</first><last>Péry-Woodley</last></author>
       <author><first>Stergos D.</first><last>Afantenos</last></author>
-      <author><first>Lydia-Mai</first><last>Ho-Dac</last></author>
-      <author><first>Nicholas</first><last>Asher</last></author>
+      <author id="lydia-mai-ho-dac"><first>Lydia-Mai</first><last>Ho-Dac</last></author>
+      <author id="nicholas-asher"><first>Nicholas</first><last>Asher</last></author>
       <pages>71–101</pages>
       <url hash="c8823e2b">2011.tal-3.4</url>
       <language>fra</language>
@@ -212,8 +212,8 @@
     <paper id="5">
       <title>Définition et conception d’une interface pour l’exploitation de corpus arborés pour non-informaticiens : la plateforme <fixed-case>S</fixed-case>cien<fixed-case>Q</fixed-case>uest du projet Scientext [Definition and design of an interface for treebanks exploitation by non-computer scientists: the <fixed-case>S</fixed-case>cien<fixed-case>Q</fixed-case>uest platform from Scientext project]</title>
       <author><first>Achille</first><last>Falaise</last></author>
-      <author><first>Agnès</first><last>Tutin</last></author>
-      <author><first>Olivier</first><last>Kraif</last></author>
+      <author id="agnes-tutin"><first>Agnès</first><last>Tutin</last></author>
+      <author id="olivier-kraif"><first>Olivier</first><last>Kraif</last></author>
       <pages>103–128</pages>
       <url hash="0cc654ea">2011.tal-3.5</url>
       <language>fra</language>
@@ -257,7 +257,7 @@
       <author><first>Isabella</first><last>Chiari</last></author>
       <author><first>Elisabetta</first><last>Jezek</last></author>
       <author><first>Laure</first><last>Vieu</last></author>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last></author>
       <pages>217–243</pages>
       <url hash="263cece1">2011.tal-3.9</url>
       <bibkey>vetere-etal-2011-senso</bibkey>
diff --git a/data/xml/2011.tc.xml b/data/xml/2011.tc.xml
index 35dda78019..d3d24f9ae2 100644
--- a/data/xml/2011.tc.xml
+++ b/data/xml/2011.tc.xml
@@ -32,7 +32,7 @@
     </paper>
     <paper id="3">
       <title>An effective model for insertion of translation technologies into <fixed-case>US</fixed-case> government translation environments</title>
-      <author><first>Carol</first><last>Van Ess-Dykema</last></author>
+      <author id="carol-van-ess-dykema"><first>Carol</first><last>Van Ess-Dykema</last></author>
       <url hash="9626273f">2011.tc-1.3</url>
       <bibkey>van-ess-dykema-2011-effective</bibkey>
     </paper>
@@ -63,22 +63,22 @@
     <paper id="8">
       <title>Towards on-line knowledge sharing dictionaries for <fixed-case>E</fixed-case>uropean law: the Legal Taxonomy Syllabus 3.0</title>
       <author><first>Elena</first><last>Grasso</last></author>
-      <author><first>Piercarlo</first><last>Rossi</last></author>
+      <author id="piercarlo-rossi"><first>Piercarlo</first><last>Rossi</last></author>
       <author><first>Andrea</first><last>Violato</last></author>
       <url hash="b57fb111">2011.tc-1.8</url>
       <bibkey>grasso-etal-2011-towards</bibkey>
     </paper>
     <paper id="9">
       <title>Machine translation between uncommon language pairs via a third common language: the case of patents</title>
-      <author><first>Benjamin K.</first><last>Tsou</last></author>
+      <author id="benjamin-k-tsou"><first>Benjamin K.</first><last>Tsou</last></author>
       <author><first>Bin</first><last>Lu</last></author>
       <url hash="8fcbe1dc">2011.tc-1.9</url>
       <bibkey>tsou-lu-2011-machine</bibkey>
     </paper>
     <paper id="10">
       <title>Operationalization of interactive multilingual gateways (i<fixed-case>MAG</fixed-case>s) in the Traouiero project</title>
-      <author><first>Christian</first><last>Boitet</last></author>
-      <author><first>Valérie</first><last>Bellynck</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
+      <author id="valerie-bellynck"><first>Valérie</first><last>Bellynck</last></author>
       <author><first>Achille</first><last>Falaise</last></author>
       <author><first>Nguyen</first><last>Hong-Thai</last></author>
       <url hash="340e67c0">2011.tc-1.10</url>
diff --git a/data/xml/2012.amta.xml b/data/xml/2012.amta.xml
index 5bdc0fac54..54a8f9a760 100644
--- a/data/xml/2012.amta.xml
+++ b/data/xml/2012.amta.xml
@@ -17,9 +17,9 @@
     </meta>
     <paper id="1">
       <title>Domain Adaptation in Machine Translation: Findings from the 2012 <fixed-case>J</fixed-case>ohns <fixed-case>H</fixed-case>opkins Summer Workshop</title>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <author><first>Marine</first><last>Carpuat</last></author>
-      <author><first>Alex</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alex</first><last>Fraser</last></author>
       <author><first>Chris</first><last>Quirk</last></author>
       <url hash="3c03f46d">2012.amta-keynotes.1</url>
       <bibkey>daume-iii-etal-2012-domain</bibkey>
@@ -32,7 +32,7 @@
     </paper>
     <paper id="3">
       <title>Language Research at <fixed-case>DARPA</fixed-case>-Machine Translation and Beyond</title>
-      <author><first>Bonnie J.</first><last>Dorr</last></author>
+      <author id="bonnie-dorr"><first>Bonnie J.</first><last>Dorr</last></author>
       <url hash="5cf2f073">2012.amta-keynotes.3</url>
       <bibkey>dorr-2012-language</bibkey>
     </paper>
@@ -62,7 +62,7 @@
       <title>Hierarchical Phrase-Based <fixed-case>MT</fixed-case> for Phonetic Representation-Based Speech Translation</title>
       <author><first>Zeeshan</first><last>Ahmed</last></author>
       <author><first>Jie</first><last>Jiang</last></author>
-      <author><first>Julie</first><last>Carson-Berndsen</last></author>
+      <author id="julie-carson-berndsen"><first>Julie</first><last>Carson-Berndsen</last></author>
       <author><first>Peter</first><last>Cahill</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <url hash="c5d37dd5">2012.amta-papers.1</url>
@@ -72,7 +72,7 @@
     <paper id="2">
       <title>Identifying Infrequent Translations by Aligning Non Parallel Sentences</title>
       <author><first>Julien</first><last>Bourdaillet</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <url hash="b4e0c572">2012.amta-papers.2</url>
       <abstract>Aligning a sequence of words to one of its infrequent translations is a difficult task. We propose a simple and original solution to this problem that yields to significant gains over a state-of-the-art transpotting task. Our approach consists in aligning non parallel sentences from the training data in order to reinforce online the alignment models. We show that using only a few pairs of non parallel sentences allows to improve significantly the alignment of infrequent translations.</abstract>
       <bibkey>bourdaillet-langlais-2012-identifying</bibkey>
@@ -80,16 +80,16 @@
     <paper id="3">
       <title>Sample Selection for Large-scale <fixed-case>MT</fixed-case> Discriminative Training</title>
       <author><first>Yuan</first><last>Cao</last></author>
-      <author><first>Sanjeev</first><last>Khudanpur</last></author>
+      <author id="sanjeev-khudanpur"><first>Sanjeev</first><last>Khudanpur</last></author>
       <url hash="5ef6c705">2012.amta-papers.3</url>
       <abstract>Discriminative training for MT usually involves numerous features and requires large-scale training set to reach reliable parameter estimation. Other than using the expensive human-labeled parallel corpora for training, semi-supervised methods have been proposed to generate huge amount of “hallucinated” data which relieves the data sparsity problem. However the large training set contains both good samples which are suitable for training and bad ones harmful to the training. How to select training samples from vast amount of data can greatly affect the training performance. In this paper we propose a method for selecting samples that are most suitable for discriminative training according to a criterion measuring the dataset quality. Our experimental results show that by adding samples to the training set selectively, we are able to exceed the performance of system trained with the same amount of samples selected randomly.</abstract>
       <bibkey>cao-khudanpur-2012-sample</bibkey>
     </paper>
     <paper id="4">
       <title>One System, Many Domains: Open-Domain Statistical Machine Translation via Feature Augmentation</title>
-      <author><first>Jonathan</first><last>Clark</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="jonathan-h-clark"><first>Jonathan</first><last>Clark</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <url hash="a6fd4692">2012.amta-papers.4</url>
       <abstract>In this paper, we introduce a simple technique for incorporating domain information into a statistical machine translation system that significantly improves translation quality when test data comes from multiple domains. Our approach augments (conjoins) standard translation model and language model features with domain indicator features and requires only minimal modifications to the optimization and decoding procedures. We evaluate our method on two language pairs with varying numbers of domains, and observe significant improvements of up to 1.0 BLEU.</abstract>
       <bibkey>clark-etal-2012-one</bibkey>
@@ -97,7 +97,7 @@
     <paper id="5">
       <title>Identification of Fertile Translations in Comparable Corpora: A Morpho-Compositional Approach</title>
       <author><first>Estelle</first><last>Delpech</last></author>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <author><first>Emmanuel</first><last>Morin</last></author>
       <author><first>Claire</first><last>Lemaire</last></author>
       <url hash="74de3cb7">2012.amta-papers.5</url>
@@ -107,14 +107,14 @@
     <paper id="6">
       <title>Challenges in Predicting Machine Translation Utility for Human Post-Editors</title>
       <author><first>Michael</first><last>Denkowski</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <url hash="6079e7aa">2012.amta-papers.6</url>
       <abstract>As machine translation quality continues to improve, the idea of using MT to assist human translators becomes increasingly attractive. In this work, we discuss and provide empirical evidence of the challenges faced when adapting traditional MT systems to provide automatic translations for human post-editors to correct. We discuss the differences between this task and traditional adequacy-based tasks and the challenges that arise when using automatic metrics to predict the amount of effort required to post-edit translations. A series of experiments simulating a real-world localization scenario shows that current metrics under-perform on this task, even when tuned to maximize correlation with expert translator judgments, illustrating the need to rethink traditional MT pipelines when addressing the challenges of this translation task.</abstract>
       <bibkey>denkowski-lavie-2012-challenges</bibkey>
     </paper>
     <paper id="7">
       <title>The Impact of Sentence Alignment Errors on Phrase-Based Machine Translation Performance</title>
-      <author><first>Cyril</first><last>Goutte</last></author>
+      <author id="cyril-goutte"><first>Cyril</first><last>Goutte</last></author>
       <author><first>Marine</first><last>Carpuat</last></author>
       <author><first>George</first><last>Foster</last></author>
       <url hash="a2c5e0d2">2012.amta-papers.7</url>
@@ -124,7 +124,7 @@
     <paper id="8">
       <title>Pivot Lightly-Supervised Training for Statistical Machine Translation</title>
       <author><first>Matthias</first><last>Huck</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <url hash="7e397309">2012.amta-papers.8</url>
       <abstract>In this paper, we investigate large-scale lightly-supervised training with a pivot language: We augment a baseline statistical machine translation (SMT) system that has been trained on human-generated parallel training corpora with large amounts of additional unsupervised parallel data; but instead of creating this synthetic data from monolingual source language data with the baseline system itself, or from target language data with a reverse system, we employ a parallel corpus of target language data and data in a pivot language. The pivot language data is automatically translated into the source language, resulting in a trilingual corpus with unsupervised source language side. We augment our baseline system with the unsupervised source-target parallel data. Experiments are conducted for the German-French language pair using the standard WMT newstest sets for development and testing. We obtain the unsupervised data by translating the English side of the English-French 109 corpus to German. With careful system design, we are able to achieve improvements of up to +0.4 points BLEU / -0.7 points TER over the baseline.</abstract>
       <bibkey>huck-ney-2012-pivot</bibkey>
@@ -139,7 +139,7 @@
     </paper>
     <paper id="10">
       <title>Building <fixed-case>MT</fixed-case> for a Severely Under-Resourced Language: White <fixed-case>H</fixed-case>mong</title>
-      <author><first>William</first><last>Lewis</last></author>
+      <author id="william-lewis"><first>William</first><last>Lewis</last></author>
       <author><first>Phong</first><last>Yang</last></author>
       <url hash="1c3951eb">2012.amta-papers.10</url>
       <abstract>In this paper, we discuss the development of statistical machine translation for English to/from White Hmong (Language code: mww). White Hmong is a Hmong-Mien language, originally spoken mostly in Southeast Asia, but now predominantly spoken by a large diaspora throughout the world, with populations in the United States, Australia, France, Thailand and elsewhere. Building statistical translation systems for Hmong proved to be incredibly challenging since there are no known parallel or monolingual corpora for the language; in fact, finding data for Hmong proved to be one of the biggest challenges to getting the project off the ground. It was only through a close collaboration with the Hmong community, and active and tireless participation of Hmong speakers, that it became possible to build up a critical mass of data to make the translation project a reality. We see this effort as potentially replicable for other severely resource poor languages of the world, which is likely the case for the majority of the languages still spoken on the planet. Further, the work here suggests that research and work on other severely under-resourced languages can have significant positive impacts for the affected communities, both for accessibility and language preservation.</abstract>
@@ -147,8 +147,8 @@
     </paper>
     <paper id="11">
       <title>Phrase-level System Combination for Machine Translation Based on Target-to-Target Decoding</title>
-      <author><first>Wei-Yun</first><last>Ma</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="wei-yun-ma"><first>Wei-Yun</first><last>Ma</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <url hash="ee7948c4">2012.amta-papers.11</url>
       <abstract>In this paper, we propose a novel lattice-based MT combination methodology that we call Target-to-Target Decoding (TTD). The combination process is carried out as a “translation” from backbone to the combination result. This perspective suggests the use of existing phrase-based MT techniques in the combination framework. We show how phrase extraction rules and confidence estimations inspired from machine translation improve results. We also propose system-specific LMs for estimating N-gram consensus. Our results show that our approach yields a strong improvement over the best single MT system and competes with other state-of-the-art combination systems.</abstract>
       <bibkey>ma-mckeown-2012-phrase</bibkey>
@@ -157,7 +157,7 @@
       <title>Lost &amp; Found in Translation: Impact of Machine Translated Results on Translingual Information Retrieval</title>
       <author><first>Kristen</first><last>Parton</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <url hash="eb465152">2012.amta-papers.12</url>
       <abstract>In an ideal cross-lingual information retrieval (CLIR) system, a user query would generate a search over documents in a different language and the relevant results would be presented in the user’s language. In practice, CLIR systems are typically evaluated by judging result relevance in the document language, to factor out the effects of translating the results using machine translation (MT). In this paper, we investigate the influence of four different approaches for integrating MT and CLIR on both retrieval accuracy and user judgment of relevancy. We create a corpus with relevance judgments for both human and machine translated results, and use it to quantify the effect that MT quality has on end-to-end relevance. We find that MT errors result in a 16-39% decrease in mean average precision over the ground truth system that uses human translations. MT errors also caused relevant sentences to appear irrelevant – 5-19% of sentences were relevant in human translation, but were judged irrelevant in MT. To counter this degradation, we present two hybrid retrieval models and two automatic MT post-editing techniques and show that these approaches substantially mitigate the errors and improve the end-to-end relevance.</abstract>
       <bibkey>parton-etal-2012-lost</bibkey>
@@ -165,8 +165,8 @@
     <paper id="13">
       <title>A Graph-based Strategy to Streamline Translation Quality Assessments</title>
       <author><first>Daniele</first><last>Pighin</last></author>
-      <author><first>Lluís</first><last>Formiga</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="lluis-formiga"><first>Lluís</first><last>Formiga</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <url hash="4086a65c">2012.amta-papers.13</url>
       <abstract>We present a detailed analysis of a graph-based annotation strategy that we employed to annotate a corpus of 11,292 real-world English to Spanish automatic translations with relative (ranking) and absolute (adequate/non-adequate) quality assessments. The proposed approach, inspired by previous work in Interactive Evolutionary Computation and Interactive Genetic Algorithms, results in a simpler and faster annotation process. We empirically compare the method against a traditional, explicit ranking approach, and show that the graph-based strategy: 1) is considerably faster, and 2) produces consistently more reliable annotations.</abstract>
       <bibkey>pighin-etal-2012-graph</bibkey>
@@ -175,7 +175,7 @@
       <title>Machine Translation with Binary Feedback: a Large-Margin Approach</title>
       <author><first>Avneesh</first><last>Saluja</last></author>
       <author><first>Ian</first><last>Lane</last></author>
-      <author><first>Ying</first><last>Zhang</last></author>
+      <author id="ying-zhang"><first>Ying</first><last>Zhang</last></author>
       <url hash="b0ae725a">2012.amta-papers.14</url>
       <abstract>Viewing machine translation as a structured classification problem has provided a gateway for a host of structured prediction techniques to enter the field. In particular, large-margin structured prediction methods for discriminative training of feature weights, such as the structured perceptron or MIRA, have started to match or exceed the performance of existing methods such as MERT. One issue with structured problems in general is the difficulty in obtaining fully structured labels, e.g., in machine translation, obtaining reference translations or parallel sentence corpora for arbitrary language pairs. Another issue, more specific to the translation domain, is the difficulty in online training of machine translation systems, since existing methods often require bilingual knowledge to correct translation output online. We propose a solution to these two problems, by demonstrating a way to incorporate binary-labeled feedback (i.e., feedback on whether a translation hypothesis is a “good” or understandable one or not), a form of supervision that can be easily integrated in an online manner, into a machine translation framework. Experimental results show marked improvement by incorporating binary feedback on unseen test data, with gains exceeding 5.5 BLEU points.</abstract>
       <bibkey>saluja-etal-2012-machine</bibkey>
@@ -189,8 +189,8 @@
     </paper>
     <paper id="16">
       <title>Compact Rule Extraction for Hierarchical Phrase-based Translation</title>
-      <author><first>Baskaran</first><last>Sankaran</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="baskaran-sankaran"><first>Baskaran</first><last>Sankaran</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <author><first>Anoop</first><last>Sarkar</last></author>
       <url hash="e004c092">2012.amta-papers.16</url>
       <abstract>This paper introduces two novel approaches for extracting compact grammars for hierarchical phrase-based translation. The first is a combinatorial optimization approach and the second is a Bayesian model over Hiero grammars using Variational Bayes for inference. In contrast to the conventional Hiero (Chiang, 2007) rule extraction algorithm , our methods extract compact models reducing model size by 17.8% to 57.6% without impacting translation quality across several language pairs. The Bayesian model is particularly effective for resource-poor languages with evidence from Korean-English translation. To our knowledge, this is the first alternative to Hiero-style rule extraction that finds a more compact synchronous grammar without hurting translation performance.</abstract>
@@ -198,7 +198,7 @@
     </paper>
     <paper id="17">
       <title>Non-linear n-best List Reranking with Few Features</title>
-      <author><first>Artem</first><last>Sokolov</last></author>
+      <author id="artem-sokolov"><first>Artem</first><last>Sokolov</last></author>
       <author><first>Guillaume</first><last>Wisniewski</last></author>
       <author><first>François</first><last>Yvon</last></author>
       <url hash="6b885351">2012.amta-papers.17</url>
@@ -210,7 +210,7 @@
       <author><first>Wei</first><last>Wang</last></author>
       <author><first>Klaus</first><last>Macherey</last></author>
       <author><first>Wolfgang</first><last>Macherey</last></author>
-      <author><first>Franz</first><last>Och</last></author>
+      <author id="franz-josef-och"><first>Franz</first><last>Och</last></author>
       <author><first>Peng</first><last>Xu</last></author>
       <url hash="cd4c7f9b">2012.amta-papers.18</url>
       <abstract>We present a simple and effective infrastructure for domain adaptation for statistical machine translation (MT). To build MT systems for different domains, it trains, tunes and deploys a single translation system that is capable of producing adapted domain translations and preserving the original generic accuracy at the same time. The approach unifies automatic domain detection and domain model parameterization into one system. Experiment results on 20 language pairs demonstrate its viability.</abstract>
@@ -219,7 +219,7 @@
     <paper id="19">
       <title>Detailed Analysis of Different Strategies for Phrase Table Adaptation in <fixed-case>SMT</fixed-case></title>
       <author><first>Jan</first><last>Niehues</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <url hash="90cc2a7b">2012.amta-papers.19</url>
       <abstract>This paper gives a detailed analysis of different approaches to adapt a statistical machine translation system towards a target domain using small amounts of parallel in-domain data. Therefore, we investigate the differences between the approaches addressing adaptation on the two main steps of building a translation model: The candidate selection and the phrase scoring. For the latter step we characterized the differences by four key aspects. We performed experiments on two different tasks of speech translation and analyzed the influence of the different aspects on the overall translation quality. On both tasks we could show significant improvements by using the presented adaptation techniques.</abstract>
       <bibkey>niehues-waibel-2012-detailed</bibkey>
@@ -227,7 +227,7 @@
     <paper id="20">
       <title>Machine Translation of Labeled Discourse Connectives</title>
       <author><first>Thomas</first><last>Meyer</last></author>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <author><first>Najeh</first><last>Hajlaoui</last></author>
       <author><first>Andrea</first><last>Gesmundo</last></author>
       <url hash="48aad260">2012.amta-papers.20</url>
@@ -271,8 +271,8 @@
     </paper>
     <paper id="25">
       <title>Using Source-Language Transformations to Address Register Mismatches in <fixed-case>SMT</fixed-case></title>
-      <author><first>Manny</first><last>Rayner</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Barry</first><last>Haddow</last></author>
       <url hash="775cfdb6">2012.amta-papers.25</url>
       <abstract>Mismatches between training and test data are a ubiquitous problem for real SMT applications. In this paper, we examine a type of mismatch that commonly arises when translating from French and similar languages: available training data is mostly formal register, but test data may well be informal register. We consider methods for defining surface transformations that map common informal language constructions into their formal language counterparts, or vice versa; we then describe two ways to use these mappings, either to create artificial training data or to pre-process source text at run-time. An initial evaluation performed using crowd-sourced comparisons of alternate translations produced by a French-to-English SMT system suggests that both methods can improve performance, with run-time pre-processing being the more effective of the two.</abstract>
@@ -288,8 +288,8 @@
     </paper>
     <paper id="27">
       <title>A Detailed Analysis of Phrase-based and Syntax-based <fixed-case>MT</fixed-case>: The Search for Systematic Differences</title>
-      <author><first>Rasoul</first><last>Samad Zadeh Kaljahi</last></author>
-      <author><first>Raphael</first><last>Rubino</last></author>
+      <author id="rasoul-samad-zadeh-kaljahi"><first>Rasoul</first><last>Samad Zadeh Kaljahi</last></author>
+      <author id="raphael-rubino"><first>Raphael</first><last>Rubino</last></author>
       <author><first>Johann</first><last>Roturier</last></author>
       <author><first>Jennifer</first><last>Foster</last></author>
       <url hash="f0081b07">2012.amta-papers.27</url>
@@ -328,7 +328,7 @@
       <author><first>Wenqian</first><last>Zhao</last></author>
       <author><first>Cheng Chieh</first><last>Lien</last></author>
       <author><first>Ryan</first><last>Knudson</last></author>
-      <author><first>Ying</first><last>Zhang</last></author>
+      <author id="ying-zhang"><first>Ying</first><last>Zhang</last></author>
       <url hash="f086cb4b">2012.amta-commercial.1</url>
       <abstract>This paper describes the role of machine translation (MT) for multilingual information access, a service that is desired by digital libraries that wish to provide cross-cultural access to their collections. To understand the performance of MT, we have developed HeMT: an integrated multilingual evaluation platform (<url>http://txcdk-v10.unt.edu/HeMT/</url>) to facilitate human evaluation of machine translation. The results of human evaluation using HeMT on three online MT services are reported. Challenges and benefits of crowdsourcing and collaboration based on our experience are discussed. Additionally, we present the analysis of the translation errors and propose Multi-engine MT strategies to improve translation performance.</abstract>
       <bibkey>chen-etal-2012-integrating</bibkey>
@@ -457,7 +457,7 @@
     <paper id="17">
       <title><fixed-case>IPT</fixed-case>ranslator: Facilitating Patent Search with Machine Translation</title>
       <author><first>John</first><last>Tinsley</last></author>
-      <author><first>Alexandru</first><last>Ceausu</last></author>
+      <author id="alexandru-ceausu"><first>Alexandru</first><last>Ceausu</last></author>
       <author><first>Jian</first><last>Zhang</last></author>
       <author><first>Heidi</first><last>Depraetere</last></author>
       <author><first>Joeri</first><last>Van de Walle</last></author>
@@ -513,7 +513,7 @@
     </paper>
     <paper id="4">
       <title>Producing Data for Under-Resourced Languages: A <fixed-case>D</fixed-case>ari-<fixed-case>E</fixed-case>nglish Parallel Corpus of Multi-Genre Text</title>
-      <author><first>Sherri</first><last>Condon</last></author>
+      <author id="sherri-condon"><first>Sherri</first><last>Condon</last></author>
       <url hash="83d5322c">2012.amta-government.4</url>
       <abstract>In Developers producing language technology for under-resourced languages often find relatively little machine readable text for data required to train machine translation systems. Typically, the kinds of text that are most accessible for production of parallel data are news and news-related genres, yet the language that requires translation for analysts and decision-makers reflects a broad range of forms and contents. The proposed paper will describe an effort funded by the ODNI FLPO in which the Army Research Laboratory, assisted by MITRE language technology researchers, produced a Dari-English parallel corpus containing text in a variety of styles and genres that more closely resemble the kinds of documents needed by government users than do traditional news genres. The data production effort began with a survey of Dari documents catalogued in a government repository of material obtained from the field in Afghanistan. Because the documents in the repository are not available for creation of parallel corpora, the goal was to quantify the types of documents in the collection and identify their linguistic features in order to find documents that are similar. Document images were obtained from two sources: (1) the Preserving and Creating Access to Unique Afghan Records collection, an online resource produced by the University of Arizona Libraries and the Afghanistan Centre at Kabul University and (2) The University of Nebraska Arthur Paul Afghanistan Collection. For the latter, document images were obtained by camera capture of books and by selecting pdf images of microfiche records. A set of 1395 document page images was selected to provide 250,000 translated English words in 10 content domains. The images were transcribed and translated according to specifications designed to maximize the quality and usefulness of the data. The corpus will be used to create a Dari-English glossary, and an experiment will quantify improvements to Dari-English translation of multi-genre text when a generic Dari-English machine translation system is customized using the corpus. The proposed paper will present highlights from these efforts.</abstract>
       <bibkey>condon-2012-producing</bibkey>
@@ -540,7 +540,7 @@
     </paper>
     <paper id="8">
       <title>Government Catalog of Language Resources (<fixed-case>GCLR</fixed-case>)</title>
-      <author><first>Judith</first><last>Klavans</last></author>
+      <author id="judith-l-klavans"><first>Judith</first><last>Klavans</last></author>
       <abstract>The purpose of this presentation is to discuss recent efforts within the government to address issues of evaluation and return on investment. Pressure to demonstrate value has increased with the growing amount of foreign language information available, with the variety of languages needing to be exploited, and with the increasing gaps between numbers of language-enabled people and the amount of work to be done. This pressure is only growing as budgets shrink, and as global development grows. Over the past year, the ODNI has led an effort to pull together different government stakeholders to determine some baseline standards for determining Return on Investment via task-based evaluation. Stakeholder consensus on major HLT tasks has involved examination of the different approaches to determining return on investment and how it relates use of HLT in the workflow. In addition to reporting on the goals and progress of this group, we will present future directions and invite community input.</abstract>
       <bibkey>klavans-2012-government</bibkey>
     </paper>
@@ -678,7 +678,7 @@
     </paper>
     <paper id="4">
       <title>Reliably Assessing the Quality of Post-edited Translation Based on Formalized Structured Translation Specifications</title>
-      <author><first>Alan K.</first><last>Melby</last></author>
+      <author id="alan-k-melby"><first>Alan K.</first><last>Melby</last></author>
       <author><first>Jason</first><last>Housley</last></author>
       <author><first>Paul J.</first><last>Fields</last></author>
       <author><first>Emily</first><last>Tuioti</last></author>
@@ -690,7 +690,7 @@
       <title>Learning to Automatically Post-Edit Dropped Words in <fixed-case>MT</fixed-case></title>
       <author><first>Jacob</first><last>Mundt</last></author>
       <author><first>Kristen</first><last>Parton</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <url hash="07213e78">2012.amta-wptp.5</url>
       <abstract>Automatic post-editors (APEs) can improve adequacy of MT output by detecting and reinserting dropped content words, but the location where these words are inserted is critical. In this paper, we describe a probabilistic approach for learning reinsertion rules for specific languages and MT systems, as well as a method for synthesizing training data from reference translations. We test the insertion logic on MT systems for Chinese to English and Arabic to English. Our adaptive APE is able to insert within 3 words of the best location 73% of the time (32% in the exact location) in Arabic-English MT output, and 67% of the time in Chinese-English output (30% in the exact location), and delivers improved performance on automated adequacy metrics over a previous rule-based approach to insertion. We consider how particular aspects of the insertion problem make it particularly amenable to machine learning solutions.</abstract>
       <bibkey>mundt-etal-2012-learning</bibkey>
@@ -706,7 +706,7 @@
     <paper id="7">
       <title>To post-edit or not to post-edit? Estimating the benefits of <fixed-case>MT</fixed-case> post-editing for a <fixed-case>E</fixed-case>uropean organization</title>
       <author><first>Alexandros</first><last>Poulis</last></author>
-      <author><first>David</first><last>Kolovratnik</last></author>
+      <author id="david-kolovratnik"><first>David</first><last>Kolovratnik</last></author>
       <url hash="c5d1477f">2012.amta-wptp.7</url>
       <abstract>In the last few years the European Parliament has witnessed a significant increase in translation demand. Although Translation Memory (TM) tools, terminology databases and bilingual concordancers have provided significant leverage in terms of quality and productivity the European Parliament is in need for advanced language technology to keep facing successfully the challenge of multilingualism. This paper describes an ongoing large-scale machine translation post-editing evaluation campaign the purpose of which is to estimate the business benefits from the use of machine translation for the European Parliament. This paper focuses mainly on the design, the methodology and the tools used by the evaluators but it also presents some preliminary results for the following language pairs: Polish-English, Danish-English, Lithuanian-English, English-German and English-French.</abstract>
       <bibkey>poulis-kolovratnik-2012-post</bibkey>
@@ -755,7 +755,7 @@
     <paper id="1">
       <title>Translating <fixed-case>E</fixed-case>nglish Discourse Connectives into <fixed-case>A</fixed-case>rabic: a Corpus-based Analysis and an Evaluation Metric</title>
       <author><first>Najeh</first><last>Hajlaoui</last></author>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <pages>1-8</pages>
       <url hash="19453862">2012.amta-caas14.1</url>
       <abstract>Discourse connectives can often signal multiple discourse relations, depending on their context. The automatic identification of the Arabic translations of seven English discourse connectives shows how these connectives are differently translated depending on their actual senses. Automatic labelling of English source connectives can help a machine translation system to translate them more correctly. The corpus-based analysis of Arabic translations also enables the definition of a connective-specific evaluation metric for machine translation, which is here validated by human judges on sample English/Arabic translation data.</abstract>
@@ -784,7 +784,7 @@
     <paper id="4">
       <title><fixed-case>ARNE</fixed-case> - A tool for Namend Entity Recognition from <fixed-case>A</fixed-case>rabic Text</title>
       <author><first>Carolin</first><last>Shihadeh</last></author>
-      <author><first>Günter</first><last>Neumann</last></author>
+      <author id="gunter-neumann"><first>Günter</first><last>Neumann</last></author>
       <pages>24-31</pages>
       <url hash="80e1127c">2012.amta-caas14.4</url>
       <abstract>In this paper, we study the problem of finding named entities in the Arabic text. For this task we present the development of our pipeline software for Arabic named entity recognition (ARNE), which includes tokenization, morphological analysis, Buckwalter transliteration, part of speech tagging and named entity recognition of person, location and organisation named entities. In our first attempt to recognize named entites, we have used a simple, fast and language independent gazetteer lookup approach. In our second attempt, we have used the morphological analysis provided by our pipeline to remove affixes and observed hence an improvement in our performance. The pipeline presented in this paper, can be used in future as a basis for a named entity recognition system that recognized named entites not only using gazetteers, but also making use of morphological information and part of speech tagging.</abstract>
@@ -801,10 +801,10 @@
     </paper>
     <paper id="6">
       <title>Using <fixed-case>A</fixed-case>rabic Transliteration to Improve Word Alignment from <fixed-case>F</fixed-case>rench- <fixed-case>A</fixed-case>rabic Parallel Corpora</title>
-      <author><first>Houda</first><last>Saadane</last></author>
+      <author id="houda-saadane"><first>Houda</first><last>Saadane</last></author>
       <author><first>Ouafa</first><last>Benterki</last></author>
       <author><first>Nasredine</first><last>Semmar</last></author>
-      <author><first>Christian</first><last>Fluhr</last></author>
+      <author id="christian-fluhr"><first>Christian</first><last>Fluhr</last></author>
       <pages>38-46</pages>
       <url hash="fdb0d0c6">2012.amta-caas14.6</url>
       <abstract>In this paper, we focus on the use of Arabic transliteration to improve the results of a linguistics-based word alignment approach from parallel text corpora. This approach uses, on the one hand, a bilingual lexicon, named entities, cognates and grammatical tags to align single words, and on the other hand, syntactic dependency relations to align compound words. We have evaluated the word aligner integrating Arabic transliteration using two methods: A manual evaluation of the alignment quality and an evaluation of the impact of this alignment on the translation quality by using the Moses statistical machine translation system. The obtained results show that Arabic transliteration improves the quality of both alignment and translation.</abstract>
@@ -842,7 +842,7 @@
       <title>Exploiting <fixed-case>W</fixed-case>ikipedia as a Knowledge Base for the Extraction of Linguistic Resources: Application on <fixed-case>A</fixed-case>rabic-<fixed-case>F</fixed-case>rench Comparable Corpora and Bilingual Lexicons</title>
       <author><first>Rahma</first><last>Sellami</last></author>
       <author><first>Fatiha</first><last>Sadat</last></author>
-      <author><first>Lamia</first><last>Hadrich Belguith</last></author>
+      <author id="lamia-hadrich-belguith"><first>Lamia</first><last>Hadrich Belguith</last></author>
       <pages>72-79</pages>
       <url hash="3f48e68d">2012.amta-caas14.10</url>
       <abstract>We present simple and effective methods for extracting comparable corpora and bilingual lexicons from Wikipedia. We shall exploit the large scale and the structure of Wikipedia articles to extract two resources that will be very useful for natural language applications. We build a comparable corpus from Wikipedia using categories as topic restrictions and we extract bilingual lexicons from inter-language links aligned with statistical method or a combined statistical and linguistic method.</abstract>
@@ -853,7 +853,7 @@
     <meta>
       <booktitle>Workshop on Monolingual Machine Translation</booktitle>
       <editor><first>Tsuyoshi</first><last>Okita</last></editor>
-      <editor><first>Artem</first><last>Sokolov</last></editor>
+      <editor id="artem-sokolov"><first>Artem</first><last>Sokolov</last></editor>
       <editor><first>Taro</first><last>Watanabe</last></editor>
       <publisher>Association for Machine Translation in the Americas</publisher>
       <address>San Diego, California, USA</address>
@@ -867,9 +867,9 @@
     </frontmatter>
     <paper id="1">
       <title>Improving <fixed-case>E</fixed-case>nglish to <fixed-case>S</fixed-case>panish Out-of-Domain Translations by Morphology Generalization and Generation</title>
-      <author><first>Lluís</first><last>Formiga</last></author>
-      <author><first>Adolfo</first><last>Hernández</last></author>
-      <author><first>José B.</first><last>Mariño</last></author>
+      <author id="lluis-formiga"><first>Lluís</first><last>Formiga</last></author>
+      <author id="adolfo-hernandez-h"><first>Adolfo</first><last>Hernández</last></author>
+      <author id="jose-b-marino"><first>José B.</first><last>Mariño</last></author>
       <author><first>Enric</first><last>Monte</last></author>
       <url hash="b0febbbd">2012.amta-monomt.1</url>
       <abstract>This paper presents a detailed study of a method for morphology generalization and generation to address out-of-domain translations in English-to-Spanish phrase-based MT. The paper studies whether the morphological richness of the target language causes poor quality translation when translating out-of-domain. In detail, this approach first translates into Spanish simplified forms and then predicts the final inflected forms through a morphology generation step based on shallow and deep-projected linguistic information available from both the source and target-language sentences. Obtained results highlight the importance of generalization, and therefore generation, for dealing with out-of-domain data.</abstract>
@@ -890,7 +890,7 @@
     <paper id="3">
       <title>Shallow and Deep Paraphrasing for Improved Machine Translation Parameter Optimization</title>
       <author><first>Dennis N.</first><last>Mehay</last></author>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <url hash="a70302f1">2012.amta-monomt.3</url>
       <abstract>String comparison methods such as BLEU (Papineni et al., 2002) are the de facto standard in MT evaluation (MTE) and in MT system parameter tuning (Och, 2003). It is difficult for these metrics to recognize legitimate lexical and grammatical paraphrases, which is important for MT system tuning (Madnani, 2010). We present two methods to address this: a shallow lexical substitution technique and a grammar-driven paraphrasing technique. Grammatically precise paraphrasing is novel in the context of MTE, and demonstrating its usefulness is a key contribution of this paper. We use these techniques to paraphrase a single reference, which, when used for parameter tuning, leads to superior translation performance over baselines that use only human-authored references.</abstract>
       <bibkey>mehay-white-2012-shallow</bibkey>
@@ -906,14 +906,14 @@
     </paper>
     <paper id="5">
       <title>Improving Word Alignment by Exploiting Adapted Word Similarity</title>
-      <author><first>Septina Dian</first><last>Larasati</last></author>
+      <author id="septina-dian-larasati"><first>Septina Dian</first><last>Larasati</last></author>
       <url hash="5616a199">2012.amta-monomt.5</url>
       <abstract>This paper presents a method to improve a word alignment model in a phrase-based Statistical Machine Translation system for a low-resourced language using a string similarity approach. Our method captures similar words that can be seen as semi-monolingual across languages, such as numbers, named entities, and adapted/loan words. We use several string similarity metrics to measure the monolinguality of the words, such as Longest Common Subsequence Ratio (LCSR), Minimum Edit Distance Ratio (MEDR), and we also use a modified BLEU Score (modBLEU). Our approach is to add intersecting alignment points for word pairs that are orthographically similar, before applying a word alignment heuristic, to generate a better word alignment. We demonstrate this approach on Indonesian-to-English translation task, where the languages share many similar words that are poorly aligned given a limited training data. This approach gives a statistically significant improvement by up to 0.66 in terms of BLEU score.</abstract>
       <bibkey>larasati-2012-improving</bibkey>
     </paper>
     <paper id="6">
       <title>Addressing some Issues of Data Sparsity towards Improving <fixed-case>E</fixed-case>nglish- <fixed-case>M</fixed-case>anipuri <fixed-case>SMT</fixed-case> using Morphological Information</title>
-      <author><first>Thoudam Doren</first><last>Singh</last></author>
+      <author id="thoudam-doren-singh"><first>Thoudam Doren</first><last>Singh</last></author>
       <url hash="11df6f2e">2012.amta-monomt.6</url>
       <abstract>The performance of an SMT system heavily depends on the availability of large parallel corpora. Unavailability of these resources in the required amount for many language pair is a challenging issue. The required size of the resource involving morphologically rich and highly agglutinative language is essentially much more for the SMT systems. This paper investigates on some of the issues on enriching the resource for this kind of languages. Handling of inflectional and derivational morphemes of the morphologically rich target language plays important role in the enrichment process. Mapping from the source to the target side is carried out for the English-Manipuri SMT task using factored model. The SMT system developed shows improvement in the performance both in terms of the automatic scoring and subjective evaluation over the baseline system.</abstract>
       <bibkey>singh-2012-addressing</bibkey>
diff --git a/data/xml/2012.eamt.xml b/data/xml/2012.eamt.xml
index 52798f8590..1136682f33 100644
--- a/data/xml/2012.eamt.xml
+++ b/data/xml/2012.eamt.xml
@@ -58,9 +58,9 @@
     <paper id="5">
       <title>User Evaluation of Interactive Machine Translation Systems</title>
       <author><first>Vincent</first><last>Alabau</last></author>
-      <author><first>Luis A.</first><last>Leiva</last></author>
-      <author><first>Daniel</first><last>Ortiz-Martínez</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="luis-a-leiva"><first>Luis A.</first><last>Leiva</last></author>
+      <author id="daniel-ortiz-martinez"><first>Daniel</first><last>Ortiz-Martínez</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <pages>20-23</pages>
       <url hash="04110fcf">2012.eamt-1.5</url>
       <bibkey>alabau-etal-2012-user</bibkey>
@@ -94,11 +94,11 @@
     <paper id="9">
       <title>Building Translation Awareness in Occasional Authors: A User Case from <fixed-case>J</fixed-case>apan</title>
       <author><first>Midori</first><last>Tatsumi</last></author>
-      <author><first>Anthony</first><last>Hartley</last></author>
+      <author id="anthony-hartley"><first>Anthony</first><last>Hartley</last></author>
       <author><first>Hitoshi</first><last>Isahara</last></author>
       <author><first>Kyo</first><last>Kageura</last></author>
       <author><first>Toshio</first><last>Okamoto</last></author>
-      <author><first>Katsumasa</first><last>Shimizu</last></author>
+      <author id="katsumasa-shimizu"><first>Katsumasa</first><last>Shimizu</last></author>
       <pages>53-56</pages>
       <url hash="440032cb">2012.eamt-1.9</url>
       <bibkey>tatsumi-etal-2012-building</bibkey>
@@ -153,7 +153,7 @@
       <author><first>Victor</first><last>Muntés-Mulero</last></author>
       <author><first>Patricia</first><last>Paladini Adell</last></author>
       <author><first>Cristina</first><last>España-Bonet</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <pages>77-80</pages>
       <url hash="9e25bcf4">2012.eamt-1.15</url>
       <bibkey>muntes-mulero-etal-2012-context</bibkey>
@@ -273,9 +273,9 @@
       <title>Can Automatic Post-Editing Make <fixed-case>MT</fixed-case> More Meaningful</title>
       <author><first>Kristen</first><last>Parton</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <author><first>Gonzalo</first><last>Iglesias</last></author>
-      <author><first>Adrià</first><last>de Gispert</last></author>
+      <author id="adria-de-gispert"><first>Adrià</first><last>de Gispert</last></author>
       <pages>111-118</pages>
       <url hash="cd48a7f6">2012.eamt-1.34</url>
       <bibkey>parton-etal-2012-automatic</bibkey>
@@ -291,15 +291,15 @@
     </paper>
     <paper id="36">
       <title>Cascaded Phrase-Based Statistical Machine Translation Systems</title>
-      <author><first>Dan</first><last>Tufiş</last></author>
-      <author><first>Ștefan Daniel</first><last>Dumitrescu</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufiş</last></author>
+      <author id="stefan-daniel-dumitrescu"><first>Ștefan Daniel</first><last>Dumitrescu</last></author>
       <pages>129-136</pages>
       <url hash="c7a2c0d6">2012.eamt-1.36</url>
       <bibkey>tufis-dumitrescu-2012-cascaded</bibkey>
     </paper>
     <paper id="37">
       <title>Hybrid Parallel Sentence Mining from Comparable Corpora</title>
-      <author><first>Dan</first><last>Ștefănescu</last></author>
+      <author id="dan-stefanescu"><first>Dan</first><last>Ștefănescu</last></author>
       <author><first>Radu</first><last>Ion</last></author>
       <author><first>Sabine</first><last>Hunsicker</last></author>
       <pages>137-144</pages>
@@ -312,7 +312,7 @@
       <author><first>Antonio</first><last>Toral</last></author>
       <author><first>Vassilis</first><last>Papavassiliou</last></author>
       <author><first>Prokopis</first><last>Prokopidis</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>145-152</pages>
       <url hash="56b118c4">2012.eamt-1.38</url>
       <bibkey>pecina-etal-2012-domain</bibkey>
@@ -332,7 +332,7 @@
       <author><first>Jacob</first><last>Devlin</last></author>
       <author><first>Huaigu</first><last>Cao</last></author>
       <author><first>Rohit</first><last>Prasad</last></author>
-      <author><first>Premkumar</first><last>Natarajan</last></author>
+      <author id="prem-natarajan"><first>Premkumar</first><last>Natarajan</last></author>
       <pages>161-168</pages>
       <url hash="e749bcf4">2012.eamt-1.40</url>
       <bibkey>chen-etal-2012-automatic</bibkey>
@@ -340,10 +340,10 @@
     <paper id="41">
       <title>Domain Adaptation in <fixed-case>SMT</fixed-case> of User-Generated Forum Content Guided by <fixed-case>OOV</fixed-case> Word Reduction: Normalization and/or Supplementary Data</title>
       <author><first>Pratyush</first><last>Banerjee</last></author>
-      <author><first>Sudip Kumar</first><last>Naskar</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last></author>
       <author><first>Johann</first><last>Roturier</last></author>
       <author><first>Andy</first><last>Way</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>169-176</pages>
       <url hash="87460d52">2012.eamt-1.41</url>
       <bibkey>banerjee-etal-2012-domain</bibkey>
@@ -352,7 +352,7 @@
       <title>Long-distance reordering during search for hierarchical phrase-based <fixed-case>SMT</fixed-case></title>
       <author><first>Fabienne</first><last>Braune</last></author>
       <author><first>Anita</first><last>Gojun</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>177-184</pages>
       <url hash="a188fa45">2012.eamt-1.42</url>
       <bibkey>braune-etal-2012-long</bibkey>
@@ -429,27 +429,27 @@
     </paper>
     <paper id="54">
       <title>Flexible finite-state lexical selection for rule-based machine translation</title>
-      <author><first>Francis M.</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis M.</first><last>Tyers</last></author>
       <author><first>Felipe</first><last>Sánchez-Martínez</last></author>
-      <author><first>Mikel L.</first><last>Forcada</last></author>
+      <author id="mikel-l-forcada"><first>Mikel L.</first><last>Forcada</last></author>
       <pages>213-220</pages>
       <url hash="f75d5d4b">2012.eamt-1.54</url>
       <bibkey>tyers-etal-2012-flexible</bibkey>
     </paper>
     <paper id="55">
       <title>Statistical Post-Editing of Machine Translation for Domain Adaptation</title>
-      <author><first>Raphaël</first><last>Rubino</last></author>
+      <author id="raphael-rubino"><first>Raphaël</first><last>Rubino</last></author>
       <author><first>Stéphane</first><last>Huet</last></author>
-      <author><first>Fabrice</first><last>Lefèvre</last></author>
-      <author><first>Georges</first><last>Linarès</last></author>
+      <author id="fabrice-lefevre"><first>Fabrice</first><last>Lefèvre</last></author>
+      <author id="georges-linares"><first>Georges</first><last>Linarès</last></author>
       <pages>221-228</pages>
       <url hash="c0267d48">2012.eamt-1.55</url>
       <bibkey>rubino-etal-2012-statistical</bibkey>
     </paper>
     <paper id="56">
       <title>Crowd-based <fixed-case>MT</fixed-case> Evaluation for non-<fixed-case>E</fixed-case>nglish Target Languages</title>
-      <author><first>Michael</first><last>Paul</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="michael-paul"><first>Michael</first><last>Paul</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Luisa</first><last>Bentivogli</last></author>
       <author><first>Marcello</first><last>Federico</last></author>
       <pages>229-237</pages>
@@ -458,7 +458,7 @@
     </paper>
     <paper id="57">
       <title>Readability and Translatability Judgments for “Controlled <fixed-case>J</fixed-case>apanese”</title>
-      <author><first>Anthony</first><last>Hartley</last></author>
+      <author id="anthony-hartley"><first>Anthony</first><last>Hartley</last></author>
       <author><first>Midori</first><last>Tatsumi</last></author>
       <author><first>Hitoshi</first><last>Isahara</last></author>
       <author><first>Kyo</first><last>Kageura</last></author>
@@ -485,7 +485,7 @@
     <paper id="60">
       <title><fixed-case>WIT</fixed-case>3: Web Inventory of Transcribed and Translated Talks</title>
       <author><first>Mauro</first><last>Cettolo</last></author>
-      <author><first>Christian</first><last>Girardi</last></author>
+      <author id="christian-girardi"><first>Christian</first><last>Girardi</last></author>
       <author><first>Marcello</first><last>Federico</last></author>
       <pages>261-268</pages>
       <url hash="7b1b9741">2012.eamt-1.60</url>
@@ -496,7 +496,7 @@
       <author><first>Ramona</first><last>Enache</last></author>
       <author><first>Cristina</first><last>España-Bonet</last></author>
       <author><first>Aarne</first><last>Ranta</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <pages>269-276</pages>
       <url hash="1641030f">2012.eamt-1.61</url>
       <bibkey>enache-etal-2012-hybrid</bibkey>
@@ -513,7 +513,7 @@
     <paper id="63">
       <title>Adjunct Alignment in Translation Data with an Application to Phrase Based Statistical Machine Translation</title>
       <author><first>Sophie</first><last>Arnoult</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <pages>287-294</pages>
       <url hash="efc1b754">2012.eamt-1.63</url>
       <bibkey>arnoult-simaan-2012-adjunct</bibkey>
@@ -531,7 +531,7 @@
     <paper id="65">
       <title>Learning Machine Translation from In-domain and Out-of-domain Data</title>
       <author><first>Marco</first><last>Turchi</last></author>
-      <author><first>Cyril</first><last>Goutte</last></author>
+      <author id="cyril-goutte"><first>Cyril</first><last>Goutte</last></author>
       <author><first>Nello</first><last>Cristianini</last></author>
       <pages>305-312</pages>
       <url hash="94c2d278">2012.eamt-1.65</url>
@@ -542,7 +542,7 @@
       <author><first>Matthias</first><last>Huck</last></author>
       <author><first>Stephan</first><last>Peitz</last></author>
       <author><first>Markus</first><last>Freitag</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>313-320</pages>
       <url hash="1eb10d3a">2012.eamt-1.66</url>
       <bibkey>huck-etal-2012-discriminative</bibkey>
diff --git a/data/xml/2012.freeopmt.xml b/data/xml/2012.freeopmt.xml
index 167ccddd1e..c4d9864eb9 100644
--- a/data/xml/2012.freeopmt.xml
+++ b/data/xml/2012.freeopmt.xml
@@ -60,7 +60,7 @@
     <paper id="6">
       <title>A rule-based machine translation system from <fixed-case>S</fixed-case>erbo-<fixed-case>C</fixed-case>roatian to <fixed-case>M</fixed-case>acedonian</title>
       <author><first>Hrvoje</first><last>Peradin</last></author>
-      <author><first>Francis</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last></author>
       <pages>55-64</pages>
       <url hash="87cd4059">2012.freeopmt-1.6</url>
       <abstract>This paper describes the development of a one-way machine translation system from SerboCroatian to Macedonian on the Apertium platform. Details of resources and development methods are given, as well as an evaluation, and general directives for future work.</abstract>
@@ -69,10 +69,10 @@
     <paper id="7">
       <title>Deep evaluation of hybrid architectures: use of different metrics in <fixed-case>MERT</fixed-case> weight optimization</title>
       <author><first>Cristina</first><last>España-Bonet</last></author>
-      <author><first>Gorka</first><last>Labaka</last></author>
+      <author id="gorka-labaka"><first>Gorka</first><last>Labaka</last></author>
       <author><first>Arantza</first><last>Díaz de Ilarranza</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
-      <author><first>Kepa</first><last>Sarasola</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
+      <author id="kepa-sarasola"><first>Kepa</first><last>Sarasola</last></author>
       <pages>65-76</pages>
       <url hash="6f32e987">2012.freeopmt-1.7</url>
       <bibkey>espana-bonet-etal-2012-deep</bibkey>
diff --git a/data/xml/2012.iwslt.xml b/data/xml/2012.iwslt.xml
index 6e47166987..1fdc2d930d 100644
--- a/data/xml/2012.iwslt.xml
+++ b/data/xml/2012.iwslt.xml
@@ -53,10 +53,10 @@
       <author><first>Youzheng</first><last>Wu</last></author>
       <author><first>Chien-Lin</first><last>Huang</last></author>
       <author><first>Xugang</first><last>Lu</last></author>
-      <author><first>Paul R.</first><last>Dixon</last></author>
+      <author id="paul-dixon"><first>Paul R.</first><last>Dixon</last></author>
       <author><first>Shigeki</first><last>Matsuda</last></author>
       <author><first>Chiori</first><last>Hori</last></author>
-      <author><first>Hideki</first><last>Kashioka</last></author>
+      <author id="hideki-kashioka"><first>Hideki</first><last>Kashioka</last></author>
       <pages>34-37</pages>
       <url hash="739e468d">2012.iwslt-evaluation.2</url>
       <abstract>This paper describes our automatic speech recognition (ASR) system for the IWSLT 2012 evaluation campaign. The target data of the campaign is selected from the TED talks, a collection of public speeches on a variety of topics spoken in English. Our ASR system is based on weighted finite-state transducers and exploits an combination of acoustic models for spontaneous speech, language models based on n-gram and factored recurrent neural network trained with effectively selected corpora, and unsupervised topic adaptation framework utilizing ASR results. Accordingly, the system achieved 10.6% and 12.0% word error rate for the tst2011 and tst2012 evaluation set, respectively.</abstract>
@@ -71,7 +71,7 @@
       <author><first>Eunach</first><last>Cho</last></author>
       <author><first>Teresa</first><last>Herrmann</last></author>
       <author><first>Rainer</first><last>Kärgel</last></author>
-      <author><first>Alexander</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alexander</first><last>Waibel</last></author>
       <pages>38-45</pages>
       <url hash="977eb85c">2012.iwslt-evaluation.3</url>
       <abstract>In this paper, we present the KIT systems participating in the English-French TED Translation tasks in the framework of the IWSLT 2012 machine translation evaluation. We also present several additional experiments on the English-German, English-Chinese and English-Arabic translation pairs. Our system is a phrase-based statistical machine translation system, extended with many additional models which were proven to enhance the translation quality. For instance, it uses the part-of-speech (POS)-based reordering, translation and language model adaptation, bilingual language model, word-cluster language model, discriminative word lexica (DWL), and continuous space language model. In addition to this, the system incorporates special steps in the preprocessing and in the post-processing step. In the preprocessing the noisy corpora are filtered by removing the noisy sentence pairs, whereas in the postprocessing the agreement between a noun and its surrounding words in the French translation is corrected based on POS tags with morphological information. Our system deals with speech transcription input by removing case information and punctuation except periods from the text translation model.</abstract>
@@ -100,7 +100,7 @@
       <author><first>Takamoto</first><last>Kano</last></author>
       <author><first>Tetsuo</first><last>Kiso</last></author>
       <author><first>Sakriani</first><last>Sakti</last></author>
-      <author><first>Tomoki</first><last>Toda</last></author>
+      <author id="tomoki-toda"><first>Tomoki</first><last>Toda</last></author>
       <author><first>Satoshi</first><last>Nakamura</last></author>
       <pages>54-60</pages>
       <url hash="3cfe6156">2012.iwslt-evaluation.5</url>
@@ -128,7 +128,7 @@
       <author><first>Joern</first><last>Wuebker</last></author>
       <author><first>Malte</first><last>Nuhn</last></author>
       <author><first>Markus</first><last>Nußbaum-Thom</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>69-76</pages>
       <url hash="770dbacb">2012.iwslt-evaluation.7</url>
       <abstract>In this paper, the automatic speech recognition (ASR) and statistical machine translation (SMT) systems of RWTH Aachen University developed for the evaluation campaign of the International Workshop on Spoken Language Translation (IWSLT) 2012 are presented. We participated in the ASR (English), MT (English-French, Arabic-English, Chinese-English, German-English) and SLT (English-French) tracks. For the MT track both hierarchical and phrase-based SMT decoders are applied. A number of different techniques are evaluated in the MT and SLT tracks, including domain adaptation via data selection, translation model interpolation, phrase training for hierarchical and phrase-based systems, additional reordering model, word class language model, various Arabic and Chinese segmentation methods, postprocessing of speech recognition output with an SMT system, and system combination. By application of these methods we can show considerable improvements over the respective baseline systems.</abstract>
@@ -139,7 +139,7 @@
       <author><first>Xiaoning</first><last>Zhu</last></author>
       <author><first>Yiming</first><last>Cui</last></author>
       <author><first>Conghui</first><last>Zhu</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <author><first>Hailong</first><last>Cao</last></author>
       <pages>77-80</pages>
       <url hash="e611638d">2012.iwslt-evaluation.8</url>
@@ -168,9 +168,9 @@
       <author><first>Sebatian</first><last>Stüker</last></author>
       <author><first>Sakriani</first><last>Sakri</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
-      <author><first>Tomoki</first><last>Toda</last></author>
+      <author id="tomoki-toda"><first>Tomoki</first><last>Toda</last></author>
       <author><first>Satoshi</first><last>Nakamura</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>87-90</pages>
       <url hash="60d1c0e3">2012.iwslt-evaluation.10</url>
       <abstract>This paper describes our English Speech-to-Text (STT) systems for the 2012 IWSLT TED ASR track evaluation. The systems consist of 10 subsystems that are combinations of different front-ends, e.g. MVDR based and MFCC based ones, and two different phone sets. The outputs of the subsystems are combined via confusion network combination. Decoding is done in two stages, where the systems of the second stage are adapted in an unsupervised manner on the combination of the first stage outputs using VTLN, MLLR, and cM-LLR.</abstract>
@@ -182,14 +182,14 @@
       <author><first>Keigo</first><last>Kubo</last></author>
       <author><first>Matthias</first><last>Sperber</last></author>
       <author><first>Sakriani</first><last>Sakti</last></author>
-      <author><first>Sebastian</first><last>Stüker</last></author>
+      <author id="sebastian-stuker"><first>Sebastian</first><last>Stüker</last></author>
       <author><first>Christian</first><last>Saam</last></author>
       <author><first>Kevin</first><last>Kilgour</last></author>
       <author><first>Christian</first><last>Mohr</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
-      <author><first>Tomoki</first><last>Toda</last></author>
+      <author id="tomoki-toda"><first>Tomoki</first><last>Toda</last></author>
       <author><first>Satoshi</first><last>Nakamura</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>91-95</pages>
       <url hash="df90c010">2012.iwslt-evaluation.11</url>
       <abstract>This paper describes the KIT-NAIST (Contrastive) English speech recognition system for the IWSLT 2012 Evaluation Campaign. In particular, we participated in the ASR track of the IWSLT TED task. The system was developed by Karlsruhe Institute of Technology (KIT) and Nara Institute of Science and Technology (NAIST) teams in collaboration within the interACT project. We employ single system decoding with fully continuous and semi-continuous models, as well as a three-stage, multipass system combination framework built with the Janus Recognition Toolkit. On the IWSLT 2010 test set our single system introduced in this work achieves a WER of 17.6%, and our final combination achieves a WER of 14.4%.</abstract>
@@ -207,10 +207,10 @@
     </paper>
     <paper id="13">
       <title>The <fixed-case>LIG</fixed-case> <fixed-case>E</fixed-case>nglish to <fixed-case>F</fixed-case>rench machine translation system for <fixed-case>IWSLT</fixed-case> 2012</title>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <author><first>Benjamin</first><last>Lecouteux</last></author>
       <author><first>Marwen</first><last>Azouzi</last></author>
-      <author><first>Ngoc Quang</first><last>Luong</last></author>
+      <author id="ngoc-quang-luong"><first>Ngoc Quang</first><last>Luong</last></author>
       <pages>102-108</pages>
       <url hash="868fbe52">2012.iwslt-evaluation.13</url>
       <abstract>This paper presents the LIG participation to the E-F MT task of IWSLT 2012. The primary system proposed made a large improvement (more than 3 point of BLEU on tst2010 set) compared to our last year participation. Part of this improvment was due to the use of an extraction from the Gigaword corpus. We also propose a preliminary adaptation of the driven decoding concept for machine translation. This method allows an efficient combination of machine translation systems, by rescoring the log-linear model at the N-best list level according to auxiliary systems: the basis technique is essentially guiding the search using one or previous system outputs. The results show that the approach allows a significant improvement in BLEU score using Google translate to guide our own SMT system. We also try to use a confidence measure as an additional log-linear feature but we could not get any improvment with this technique.</abstract>
@@ -220,7 +220,7 @@
       <title>The <fixed-case>MIT</fixed-case>-<fixed-case>LL</fixed-case>/<fixed-case>AFRL</fixed-case> <fixed-case>IWSLT</fixed-case> 2012 <fixed-case>MT</fixed-case> system</title>
       <author><first>Jennifer</first><last>Drexler</last></author>
       <author><first>Wade</first><last>Shen</last></author>
-      <author><first>Tim</first><last>Anderson</last></author>
+      <author id="tim-anderson"><first>Tim</first><last>Anderson</last></author>
       <author><first>Raymond</first><last>Slyh</last></author>
       <author><first>Brian</first><last>Ore</last></author>
       <author><first>Eric</first><last>Hansen</last></author>
@@ -234,7 +234,7 @@
       <title>Minimum <fixed-case>B</fixed-case>ayes-risk decoding extended with similar examples: <fixed-case>NAIST</fixed-case>-<fixed-case>NCT</fixed-case> at <fixed-case>IWSLT</fixed-case> 2012</title>
       <author><first>Hiroaki</first><last>Shimizu</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Satoshi</first><last>Nakamura</last></author>
       <pages>117-120</pages>
       <url hash="dc1bad73">2012.iwslt-evaluation.15</url>
@@ -245,7 +245,7 @@
       <title>The <fixed-case>NICT</fixed-case> translation system for <fixed-case>IWSLT</fixed-case> 2012</title>
       <author><first>Andrew</first><last>Finch</last></author>
       <author><first>Ohnmar</first><last>Htun</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>121-125</pages>
       <url hash="d3a9fcad">2012.iwslt-evaluation.16</url>
       <bibkey>finch-etal-2012-nict</bibkey>
@@ -271,9 +271,9 @@
       <title><fixed-case>R</fixed-case>omanian to <fixed-case>E</fixed-case>nglish automatic <fixed-case>MT</fixed-case> experiments at <fixed-case>IWSLT</fixed-case>12 – system description paper</title>
       <author><first>Ştefan Daniel</first><last>Dumitrescu</last></author>
       <author><first>Radu</first><last>Ion</last></author>
-      <author><first>Dan</first><last>Ştefănescu</last></author>
-      <author><first>Tiberiu</first><last>Boroş</last></author>
-      <author><first>Dan</first><last>Tufiş</last></author>
+      <author id="dan-stefanescu"><first>Dan</first><last>Ştefănescu</last></author>
+      <author id="tiberiu-boros"><first>Tiberiu</first><last>Boroş</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufiş</last></author>
       <pages>136-143</pages>
       <url hash="9eb05fc9">2012.iwslt-evaluation.19</url>
       <abstract>The paper presents the system developed by RACAI for the ISWLT 2012 competition, TED task, MT track, Romanian to English translation. We describe the starting baseline phrase-based SMT system, the experiments conducted to adapt the language and translation models and our post-translation cascading system designed to improve the translation without external resources. We further present our attempts at creating a better controlled decoder than the open-source Moses system offers.</abstract>
@@ -283,7 +283,7 @@
       <title>The <fixed-case>TÜBİTAK</fixed-case> statistical machine translation system for <fixed-case>IWSLT</fixed-case> 2012</title>
       <author><first>Coşkun</first><last>Mermer</last></author>
       <author><first>Hamza</first><last>Kaya</last></author>
-      <author><first>İlknur Durgar</first><last>El-Kahlout</last></author>
+      <author id="ilknur-durgar-el-kahlout"><first>İlknur Durgar</first><last>El-Kahlout</last></author>
       <author><first>Mehmet Uğur</first><last>Doğan</last></author>
       <pages>144-148</pages>
       <url hash="4a7f50a0">2012.iwslt-evaluation.20</url>
@@ -311,7 +311,7 @@
       <author><first>Aaron</first><last>Challenner</last></author>
       <author><first>Enoch</first><last>Kan</last></author>
       <author><first>Arvid</first><last>Neelakantan</last></author>
-      <author><first>Prem</first><last>Natarajan</last></author>
+      <author id="prem-natarajan"><first>Prem</first><last>Natarajan</last></author>
       <pages>150-157</pages>
       <url hash="78bb542f">2012.iwslt-papers.1</url>
       <abstract>We describe a novel two-way speech-to-speech (S2S) translation system that actively detects a wide variety of common error types and resolves them through user-friendly dialog with the user(s). We present algorithms for detecting out-of-vocabulary (OOV) named entities and terms, sense ambiguities, homophones, idioms, ill-formed input, etc. and discuss novel, interactive strategies for recovering from such errors. We also describe our approach for prioritizing different error types and an extensible architecture for implementing these decisions. We demonstrate the efficacy of our system by presenting analysis on live interactions in the English-to-Iraqi Arabic direction that are designed to invoke different error types for spoken language translation. Our analysis shows that the system can successfully resolve 47% of the errors, resulting in a dramatic improvement in the transfer of problematic concepts.</abstract>
@@ -323,7 +323,7 @@
       <author><first>Sakriani</first><last>Sakti</last></author>
       <author><first>Shinnosuke</first><last>Takamichi</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
-      <author><first>Tomoki</first><last>Toda</last></author>
+      <author id="tomoki-toda"><first>Tomoki</first><last>Toda</last></author>
       <author><first>Satoshi</first><last>Nakamura</last></author>
       <pages>158-163</pages>
       <url hash="31fa55ee">2012.iwslt-papers.2</url>
@@ -333,7 +333,7 @@
     <paper id="3">
       <title>Continuous space language models using restricted Boltzmann machines</title>
       <author><first>Jan</first><last>Niehues</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>164-170</pages>
       <url hash="b5f845c8">2012.iwslt-papers.3</url>
       <abstract>We present a novel approach for continuous space language models in statistical machine translation by using Restricted Boltzmann Machines (RBMs). The probability of an n-gram is calculated by the free energy of the RBM instead of a feedforward neural net. Therefore, the calculation is much faster and can be integrated into the translation process instead of using the language model only in a re-ranking step. Furthermore, it is straightforward to introduce additional word factors into the language model. We observed a faster convergence in training if we include automatically generated word classes as an additional word factor. We evaluated the RBM-based language model on the German to English and English to French translation task of TED lectures. Instead of replacing the conventional n-gram-based language model, we trained the RBM-based language model on the more important but smaller in-domain data and combined them in a log-linear way. With this approach we could show improvements of about half a BLEU point on the translation task.</abstract>
@@ -368,7 +368,7 @@
     <paper id="7">
       <title>A simple and effective weighted phrase extraction for machine translation adaptation</title>
       <author><first>Saab</first><last>Mansour</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>193-200</pages>
       <url hash="a9b84a0c">2012.iwslt-papers.7</url>
       <abstract>The task of domain-adaptation attempts to exploit data mainly drawn from one domain (e.g. news) to maximize the performance on the test domain (e.g. weblogs). In previous work, weighting the training instances was used for filtering dissimilar data. We extend this by incorporating the weights directly into the standard phrase training procedure of statistical machine translation (SMT). This allows the SMT system to make the decision whether to use a phrase translation pair or not, a more methodological way than discarding phrase pairs completely when using filtering. Furthermore, we suggest a combined filtering and weighting procedure to achieve better results while reducing the phrase table size. The proposed methods are evaluated in the context of Arabicto-English translation on various conditions, where significant improvements are reported when using the suggested weighted phrase training. The weighting method also improves over filtering, and the combined filtering and weighting is better than a standalone filtering method. Finally, we experiment with mixture modeling, where additional improvements are reported when using weighted phrase extraction over a variety of baselines.</abstract>
@@ -378,7 +378,7 @@
       <title>Applications of data selection via cross-entropy difference for real-world statistical machine translation</title>
       <author><first>Amittai</first><last>Axelrod</last></author>
       <author><first>QingJun</first><last>Li</last></author>
-      <author><first>William D.</first><last>Lewis</last></author>
+      <author id="william-lewis"><first>William D.</first><last>Lewis</last></author>
       <pages>201-208</pages>
       <url hash="d91156c7">2012.iwslt-papers.8</url>
       <abstract>We broaden the application of data selection methods for domain adaptation to a larger number of languages, data, and decoders than shown in previous work, and explore comparable applications for both monolingual and bilingual cross-entropy difference methods. We compare domain adapted systems against very large general-purpose systems for the same languages, and do so without a bias to a particular direction. We present results against real-world generalpurpose systems tuned on domain-specific data, which are substantially harder to beat than standard research baseline systems. We show better performance for nearly all domain adapted systems, despite the fact that the domainadapted systems are trained on a fraction of the content of their general domain counterparts. The high performance of these methods suggest applicability to a wide variety of contexts, particularly in scenarios where only small supplies of unambiguously domain-specific data are available, yet it is believed that additional similar data is included in larger heterogenous-content general-domain corpora.</abstract>
@@ -388,7 +388,7 @@
       <title>A universal approach to translating numerical and time expressions</title>
       <author><first>Mei</first><last>Tu</last></author>
       <author><first>Yu</first><last>Zhou</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>209-216</pages>
       <url hash="ba3e98dc">2012.iwslt-papers.9</url>
       <abstract>Although statistical machine translation (SMT) has made great progress since it came into being, the translation of numerical and time expressions is still far from satisfactory. Generally speaking, numbers are likely to be out-of-vocabulary (OOV) words due to their non-exhaustive characteristics even when the size of training data is very large, so it is difficult to obtain accurate translation results for the infinite set of numbers only depending on traditional statistical methods. We propose a language-independent framework to recognize and translate numbers more precisely by using a rule-based method. Through designing operators, we succeed to make rules educible and totally separate from codes, thus, we can extend rules to various language-pairs without re-coding, which contributes a lot to the efficient development of an SMT system with good portability. We classify numbers and time expressions into seven types, which are Arabic number, cardinal numbers, ordinal numbers, date, time of day, day of week and figures. A greedy algorithm is developed to deal with rule conflicts. Experiments have shown that our approach can significantly improve the translation performance.</abstract>
@@ -398,8 +398,8 @@
       <title>Evaluation of interactive user corrections for lecture transcription</title>
       <author><first>Heinrich</first><last>Kolkhorst</last></author>
       <author><first>Kevin</first><last>Kilgour</last></author>
-      <author><first>Sebastian</first><last>Stüker</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="sebastian-stuker"><first>Sebastian</first><last>Stüker</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>217-221</pages>
       <url hash="2e2922c8">2012.iwslt-papers.10</url>
       <abstract>In this work, we present and evaluate the usage of an interactive web interface for browsing and correcting lecture transcripts. An experiment performed with potential users without transcription experience provides us with a set of example corrections. On German lecture data, user corrections greatly improve the comprehensibility of the transcripts, yet only reduce the WER to 22%. The precision of user edits is relatively low at 77% and errors in inflection, case and compounds were rarely corrected. Nevertheless, characteristic lecture data errors, such as highly specific terms, were typically corrected, providing valuable additional information.</abstract>
@@ -412,7 +412,7 @@
       <author><first>Xugang</first><last>Lu</last></author>
       <author><first>Shigeki</first><last>Matsuda</last></author>
       <author><first>Chiori</first><last>Hori</last></author>
-      <author><first>Hideki</first><last>Kashioka</last></author>
+      <author id="hideki-kashioka"><first>Hideki</first><last>Kashioka</last></author>
       <pages>222-228</pages>
       <url hash="c76f7ae0">2012.iwslt-papers.11</url>
       <abstract>In this study, we extend recurrent neural network-based language models (RNNLMs) by explicitly integrating morphological and syntactic factors (or features). Our proposed RNNLM is called a factored RNNLM that is expected to enhance RNNLMs. A number of experiments are carried out on top of state-of-the-art LVCSR system that show the factored RNNLM improves the performance measured by perplexity and word error rate. In the IWSLT TED test data sets, absolute word error rate reductions over RNNLM and n-gram LM are 0.4∼0.8 points.</abstract>
@@ -420,7 +420,7 @@
     </paper>
     <paper id="12">
       <title>Incremental adaptation using translation information and post-editing analysis</title>
-      <author><first>Frédéric</first><last>Blain</last></author>
+      <author id="frederic-blain"><first>Frédéric</first><last>Blain</last></author>
       <author><first>Holger</first><last>Schwenk</last></author>
       <author><first>Jean</first><last>Senellart</last></author>
       <pages>229-236</pages>
@@ -450,7 +450,7 @@
       <title>Segmentation and punctuation prediction in speech language translation using a monolingual translation system</title>
       <author><first>Eunah</first><last>Cho</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>252-259</pages>
       <url hash="a4100e43">2012.iwslt-papers.15</url>
       <abstract>In spoken language translation (SLT), finding proper segmentation and reconstructing punctuation marks are not only significant but also challenging tasks. In this paper we present our recent work on speech translation quality analysis for German-English by improving sentence segmentation and punctuation. From oracle experiments, we show an upper bound of translation quality if we had human-generated segmentation and punctuation on the output stream of speech recognition systems. In our oracle experiments we gain 1.78 BLEU points of improvements on the lecture test set. We build a monolingual translation system from German to German implementing segmentation and punctuation prediction as a machine translation task. Using the monolingual translation system we get an improvement of 1.53 BLEU points on the lecture test set, which is a comparable performance against the upper bound drawn by the oracle experiments.</abstract>
@@ -460,7 +460,7 @@
       <title>Sequence labeling-based reordering model for phrase-based <fixed-case>SMT</fixed-case></title>
       <author><first>Minwei</first><last>Feng</last></author>
       <author><first>Jan-Thorsten</first><last>Peter</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>260-267</pages>
       <url hash="06d414b6">2012.iwslt-papers.16</url>
       <abstract>For current statistical machine translation system, reordering is still a major problem for language pairs like Chinese-English, where the source and target language have significant word order differences. In this paper, we propose a novel reordering model based on sequence labeling techniques. Our model converts the reordering problem into a sequence labeling problem, i.e. a tagging task. For the given source sentence, we assign each source token a label which contains the reordering information for that token. We also design an unaligned word tag so that the unaligned word phenomenon is automatically implanted in the proposed model. Our reordering model is conditioned on the whole source sentence. Hence it is able to catch the long dependency in the source sentence. Although the learning on large scale task requests notably amounts of computational resources, the decoder makes use of the tagging information as soft constraints. Therefore, the training procedure of our model is computationally expensive for large task while in the test phase (during translation) our model is very efficient. We carried out experiments on five Chinese-English NIST tasks trained with BOLT data. Results show that our model improves the baseline system by 1.32 BLEU 1.53 TER on average.</abstract>
@@ -481,7 +481,7 @@
       <author><first>Stephan</first><last>Peitz</last></author>
       <author><first>Simon</first><last>Wiesler</last></author>
       <author><first>Markus</first><last>Nußbaum-Thom</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>276-283</pages>
       <url hash="4773fd8d">2012.iwslt-papers.18</url>
       <abstract>In spoken language translation a machine translation system takes speech as input and translates it into another language. A standard machine translation system is trained on written language data and expects written language as input. In this paper we propose an approach to close the gap between the output of automatic speech recognition and the input of machine translation by training the translation system on automatically transcribed speech. In our experiments we show improvements of up to 0.9 BLEU points on the IWSLT 2012 English-to-French speech translation task.</abstract>
@@ -490,8 +490,8 @@
     <paper id="19">
       <title>Towards a better understanding of statistical post-editing</title>
       <author><first>Marion</first><last>Potet</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
-      <author><first>Hervé</first><last>Blanchon</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
+      <author id="herve-blanchon"><first>Hervé</first><last>Blanchon</last></author>
       <author><first>Marwen</first><last>Azouzi</last></author>
       <pages>284-291</pages>
       <url hash="c38e64e3">2012.iwslt-papers.19</url>
diff --git a/data/xml/2012.tal.xml b/data/xml/2012.tal.xml
index 2d9fbbb680..789eed803f 100644
--- a/data/xml/2012.tal.xml
+++ b/data/xml/2012.tal.xml
@@ -27,7 +27,7 @@
     <paper id="2">
       <title>Une étude comparative empirique sur la reconnaissance des entités médicales [An empirical comparative study of medical entity recognition]</title>
       <author><first>Asma</first><last>Ben Abacha</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <pages>39–68</pages>
       <url hash="7807da75">2012.tal-1.2</url>
       <language>fra</language>
@@ -76,7 +76,7 @@
       <author><first>Rémy</first><last>Kessler</last></author>
       <author><first>Xavier</first><last>Tannier</last></author>
       <author><first>Caroline</first><last>Hagège</last></author>
-      <author><first>Véronique</first><last>Moriceau</last></author>
+      <author id="veronique-moriceau"><first>Véronique</first><last>Moriceau</last></author>
       <author><first>André</first><last>Bittar</last></author>
       <pages>57–86</pages>
       <url hash="77692da8">2012.tal-2.3</url>
@@ -85,8 +85,8 @@
     </paper>
     <paper id="4">
       <title>A Linguistically Grounded Annotation Language for Spatial Information</title>
-      <author><first>James</first><last>Pustejovsky</last></author>
-      <author><first>Jessica</first><last>Moszkowicz</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
+      <author id="jessica-moszkowicz"><first>Jessica</first><last>Moszkowicz</last></author>
       <author><first>Marc</first><last>Verhagen</last></author>
       <pages>87–113</pages>
       <url hash="b7de6e6e">2012.tal-2.4</url>
@@ -112,9 +112,9 @@
     </paper>
     <paper id="7">
       <title>Street-level Geolocation from Natural Language Descriptions</title>
-      <author><first>Nate</first><last>Blaylock</last></author>
-      <author><first>James</first><last>Allen</last></author>
-      <author><first>William</first><last>de Beaumont</last></author>
+      <author id="nate-blaylock"><first>Nate</first><last>Blaylock</last></author>
+      <author id="james-allen"><first>James</first><last>Allen</last></author>
+      <author id="william-de-beaumont"><first>William</first><last>de Beaumont</last></author>
       <author><first>Lucian</first><last>Galescu</last></author>
       <author><first>Hyuckchul</first><last>Jung</last></author>
       <pages>177–205</pages>
@@ -153,7 +153,7 @@
     <paper id="3">
       <title>Atténuation des surdétections d’un correcteur grammatical de qualité commerciale [Reducing overdetections in a commercial grade grammar checker]</title>
       <author><first>Fabrizio</first><last>Gotti</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <author><first>Guy</first><last>Lapalme</last></author>
       <author><first>Simon</first><last>Charest</last></author>
       <author><first>Eric</first><last>Brunelle</last></author>
diff --git a/data/xml/2012.tc.xml b/data/xml/2012.tc.xml
index a266c8575c..9ec5254d81 100644
--- a/data/xml/2012.tc.xml
+++ b/data/xml/2012.tc.xml
@@ -12,7 +12,7 @@
     <paper id="1">
       <title><fixed-case>MNH</fixed-case>-<fixed-case>TT</fixed-case>: a collaborative platform for translator training</title>
       <author><first>Bogdan</first><last>Babych</last></author>
-      <author><first>Anthony</first><last>Hartley</last></author>
+      <author id="anthony-hartley"><first>Anthony</first><last>Hartley</last></author>
       <author><first>Kyo</first><last>Kageura</last></author>
       <author><first>Martin</first><last>Thomas</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
@@ -74,7 +74,7 @@
     </paper>
     <paper id="10">
       <title>Linport as a standard for interoperability between translation systems</title>
-      <author><first>Alan K.</first><last>Melby</last></author>
+      <author id="alan-k-melby"><first>Alan K.</first><last>Melby</last></author>
       <author><first>Tyler A.</first><last>Snow</last></author>
       <url hash="0031dcfd">2012.tc-1.10</url>
       <bibkey>melby-snow-2012-linport</bibkey>
@@ -120,7 +120,7 @@
     <paper id="17">
       <title><fixed-case>P</fixed-case>ro<fixed-case>T</fixed-case>ermino: a comprehensive web-based terminological management tool based on knowledge representation</title>
       <author><first>Isabel</first><last>Durán Muñoz</last></author>
-      <author><first>Gloria</first><last>Corpas Pastor</last></author>
+      <author id="gloria-corpas-pastor"><first>Gloria</first><last>Corpas Pastor</last></author>
       <author><first>Le An</first><last>Ha</last></author>
       <url hash="b6eb3374">2012.tc-1.17</url>
       <bibkey>munoz-etal-2012-protermino</bibkey>
diff --git a/data/xml/2013.bitext.xml b/data/xml/2013.bitext.xml
index fcdacc8712..4f3bd3bee8 100644
--- a/data/xml/2013.bitext.xml
+++ b/data/xml/2013.bitext.xml
@@ -3,9 +3,9 @@
   <volume id="1" ingest-date="2024-08-21" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Workshop on Twenty Years of Bitext</booktitle>
-      <editor><first>Chris</first><last>Dyer</last></editor>
-      <editor><first>Noah A.</first><last>Smith</last></editor>
-      <editor><first>Phil</first><last>Blunsom</last></editor>
+      <editor id="chris-dyer"><first>Chris</first><last>Dyer</last></editor>
+      <editor id="noah-a-smith"><first>Noah A.</first><last>Smith</last></editor>
+      <editor id="phil-blunsom"><first>Phil</first><last>Blunsom</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Seattle, Washington, USA</address>
       <month>October</month>
@@ -26,8 +26,8 @@
     </paper>
     <paper id="2">
       <title>Twenty Flavors of One Text</title>
-      <author><first>Daniel</first><last>Zeman</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <bibkey>zeman-bojar-2013-twenty</bibkey>
     </paper>
     <paper id="3">
@@ -44,10 +44,10 @@
     </paper>
     <paper id="5">
       <title>Aligning Words in Bitexts using the Bilingual Web</title>
-      <author><first>Jim</first><last>Chang</last></author>
+      <author id="jim-chang"><first>Jim</first><last>Chang</last></author>
       <author><first>Joseph Chee</first><last>Chang</last></author>
-      <author><first>Jian-cheng</first><last>Wu</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jian-cheng-wu"><first>Jian-cheng</first><last>Wu</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <bibkey>chang-etal-2013-aligning</bibkey>
     </paper>
     <paper id="6">
@@ -57,9 +57,9 @@
     </paper>
     <paper id="7">
       <title>Bitexts as Semantic Mirrors</title>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
-      <author><first>Lonneke</first><last>van der Plas</last></author>
-      <author><first>Begoña</first><last>Villada Moirón</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="lonneke-van-der-plas"><first>Lonneke</first><last>van der Plas</last></author>
+      <author id="begona-villada-moiron"><first>Begoña</first><last>Villada Moirón</last></author>
       <bibkey>tiedemann-etal-2013-bitexts</bibkey>
     </paper>
     <paper id="8">
@@ -86,7 +86,7 @@
     </paper>
     <paper id="12">
       <title>Lexicalized Reordering Model in Chart-based Machine Translation</title>
-      <author><first>ThuyLinh</first><last>Nguyen</last></author>
+      <author id="thuylinh-nguyen"><first>ThuyLinh</first><last>Nguyen</last></author>
       <bibkey>nguyen-2013-lexicalized</bibkey>
     </paper>
     <paper id="13">
diff --git a/data/xml/2013.iwslt.xml b/data/xml/2013.iwslt.xml
index 0b41d31956..8473088122 100644
--- a/data/xml/2013.iwslt.xml
+++ b/data/xml/2013.iwslt.xml
@@ -29,7 +29,7 @@
       <title>Report on the 10th <fixed-case>IWSLT</fixed-case> evaluation campaign</title>
       <author><first>Mauro</first><last>Cettolo</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
-      <author><first>Sebastian</first><last>Stüker</last></author>
+      <author id="sebastian-stuker"><first>Sebastian</first><last>Stüker</last></author>
       <author><first>Luisa</first><last>Bentivogli</last></author>
       <author><first>Marcello</first><last>Federico</last></author>
       <url hash="0253c927">2013.iwslt-evaluation.1</url>
@@ -57,7 +57,7 @@
       <title><fixed-case>MSR</fixed-case>-<fixed-case>FBK</fixed-case> <fixed-case>IWSLT</fixed-case> 2013 <fixed-case>SLT</fixed-case> system description</title>
       <author><first>Anthony</first><last>Aue</last></author>
       <author><first>Qin</first><last>Gao</last></author>
-      <author><first>Hany</first><last>Hassan</last></author>
+      <author id="hany-hassan-awadalla"><first>Hany</first><last>Hassan</last></author>
       <author><first>Xiaodong</first><last>He</last></author>
       <author><first>Gang</first><last>Li</last></author>
       <author><first>Nicholas</first><last>Ruiz</last></author>
@@ -78,7 +78,7 @@
     <paper id="6">
       <title>The <fixed-case>NICT</fixed-case> <fixed-case>ASR</fixed-case> system for <fixed-case>IWSLT</fixed-case> 2013</title>
       <author><first>Chien-Lin</first><last>Huang</last></author>
-      <author><first>Paul R.</first><last>Dixon</last></author>
+      <author id="paul-dixon"><first>Paul R.</first><last>Dixon</last></author>
       <author><first>Shigeki</first><last>Matsuda</last></author>
       <author><first>Youzheng</first><last>Wu</last></author>
       <author><first>Xugang</first><last>Lu</last></author>
@@ -101,12 +101,12 @@
     <paper id="8">
       <title><fixed-case>QCRI</fixed-case> at <fixed-case>IWSLT</fixed-case> 2013: experiments in <fixed-case>A</fixed-case>rabic-<fixed-case>E</fixed-case>nglish and <fixed-case>E</fixed-case>nglish-<fixed-case>A</fixed-case>rabic spoken language translation</title>
       <author><first>Hassan</first><last>Sajjad</last></author>
-      <author><first>Francisco</first><last>Guzmán</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzmán</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Ahmed</first><last>Abdelali</last></author>
       <author><first>Kenton</first><last>Murray</last></author>
       <author><first>Fahad</first><last>Al Obaidli</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <url hash="de35bb70">2013.iwslt-evaluation.8</url>
       <abstract>We describe the Arabic-English and English-Arabic statistical machine translation systems developed by the Qatar Computing Research Institute for the IWSLT’2013 evaluation campaign on spoken language translation. We used one phrase-based and two hierarchical decoders, exploring various settings thereof. We further experimented with three domain adaptation methods, and with various Arabic word segmentation schemes. Combining the output of several systems yielded a gain of up to 3.4 BLEU points over the baseline. Here we also describe a specialized normalization scheme for evaluating Arabic output, which was adopted for the IWSLT’2013 evaluation campaign.</abstract>
       <bibkey>sajjad-etal-2013-qcri-iwslt</bibkey>
@@ -127,7 +127,7 @@
       <author><first>Jan-Thorsten</first><last>Peter</last></author>
       <author><first>Minwei</first><last>Feng</last></author>
       <author><first>Markus</first><last>Freitag</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <url hash="a15389eb">2013.iwslt-evaluation.10</url>
       <abstract>This work describes the statistical machine translation (SMT) systems of RWTH Aachen University developed for the evaluation campaign International Workshop on Spoken Language Translation (IWSLT) 2013. We participated in the English→French, English↔German, Arabic→English, Chinese→English and Slovenian↔English MT tracks and the English→French and English→German SLT tracks. We apply phrase-based and hierarchical SMT decoders, which are augmented by state-of-the-art extensions. The novel techniques we experimentally evaluate include discriminative phrase training, a continuous space language model, a hierarchical reordering model, a word class language model, domain adaptation via data selection and system combination of standard and reverse order models. By application of these methods we can show considerable improvements over the respective baseline systems.</abstract>
       <bibkey>wuebker-etal-2013-rwth</bibkey>
@@ -164,8 +164,8 @@
       <author><first>Jonas</first><last>Gehring</last></author>
       <author><first>Markus</first><last>Müller</last></author>
       <author><first>Matthias</first><last>Sperber</last></author>
-      <author><first>Sebastian</first><last>Stüker</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="sebastian-stuker"><first>Sebastian</first><last>Stüker</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <url hash="e309b235">2013.iwslt-evaluation.13</url>
       <abstract>This paper describes our English Speech-to-Text (STT) systems for the 2013 IWSLT TED ASR track. The systems consist of multiple subsystems that are combinations of different front-ends, e.g. MVDR-MFCC based and lMel based ones, GMM and NN acoustic models and different phone sets. The outputs of the subsystems are combined via confusion network combination. Decoding is done in two stages, where the systems of the second stage are adapted in an unsupervised manner on the combination of the first stage outputs using VTLN, MLLR, and cMLLR.</abstract>
       <bibkey>kilgour-etal-2013-2013</bibkey>
@@ -185,8 +185,8 @@
       <author><first>Simon</first><last>Wiesler</last></author>
       <author><first>Markus</first><last>Nußbaum-Thom</last></author>
       <author><first>Stephan</first><last>Peitz</last></author>
-      <author><first>Ralf</first><last>Schlüter</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="ralf-schlueter"><first>Ralf</first><last>Schlüter</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <url hash="b8bf8ec9">2013.iwslt-evaluation.15</url>
       <abstract>In this paper, German and English large vocabulary continuous speech recognition (LVCSR) systems developed by the RWTH Aachen University for the IWSLT-2013 evaluation campaign are presented. Good improvements are obtained with state-of-the-art monolingual and multilingual bottleneck features. In addition, an open vocabulary approach using morphemic sub-lexical units is investigated along with the language model adaptation for the German LVCSR. For both the languages, competitive WERs are achieved using system combination.</abstract>
       <bibkey>shaik-etal-2013-rwth</bibkey>
@@ -196,7 +196,7 @@
       <author><first>Markus</first><last>Freitag</last></author>
       <author><first>Stephan</first><last>Peitz</last></author>
       <author><first>Joern</first><last>Wuebker</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <author><first>Nadir</first><last>Durrani</last></author>
       <author><first>Matthias</first><last>Huck</last></author>
       <author><first>Philipp</first><last>Koehn</last></author>
@@ -204,7 +204,7 @@
       <author><first>Jan</first><last>Niehues</last></author>
       <author><first>Mohammed</first><last>Mediani</last></author>
       <author><first>Teresa</first><last>Herrmann</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <author><first>Nicola</first><last>Bertoldi</last></author>
       <author><first>Mauro</first><last>Cettolo</last></author>
       <author><first>Marcello</first><last>Federico</last></author>
@@ -220,7 +220,7 @@
       <author><first>Jessica</first><last>Ray</last></author>
       <author><first>Wade</first><last>Shen</last></author>
       <author><first>Terry</first><last>Gleason</last></author>
-      <author><first>Tim</first><last>Anderson</last></author>
+      <author id="tim-anderson"><first>Tim</first><last>Anderson</last></author>
       <author><first>Grant</first><last>Erdmann</last></author>
       <author><first>Lane</first><last>Schwartz</last></author>
       <author><first>Brian</first><last>Ore</last></author>
@@ -234,8 +234,8 @@
     </paper>
     <paper id="18">
       <title>The speech recognition and machine translation system of <fixed-case>IOIT</fixed-case> for <fixed-case>IWSLT</fixed-case> 2013</title>
-      <author><first>Ngoc-Quan</first><last>Pham</last></author>
-      <author><first>Hai-Son</first><last>Le</last></author>
+      <author id="ngoc-quan-pham"><first>Ngoc-Quan</first><last>Pham</last></author>
+      <author id="hai-son-le"><first>Hai-Son</first><last>Le</last></author>
       <author><first>Tat-Thang</first><last>Vu</last></author>
       <author><first>Chi-Mai</first><last>Luong</last></author>
       <url hash="12a10e68">2013.iwslt-evaluation.18</url>
@@ -245,7 +245,7 @@
     <paper id="19">
       <title><fixed-case>TÜBİTAK</fixed-case> <fixed-case>T</fixed-case>urkish-<fixed-case>E</fixed-case>nglish submissions for <fixed-case>IWSLT</fixed-case> 2013</title>
       <author><first>Ertuğrul</first><last>Yılmaz</last></author>
-      <author><first>İlknur Durgar</first><last>El-Kahlout</last></author>
+      <author id="ilknur-durgar-el-kahlout"><first>İlknur Durgar</first><last>El-Kahlout</last></author>
       <author><first>Burak</first><last>Aydın</last></author>
       <author><first>Zişan Sıla</first><last>Özil</last></author>
       <author><first>Coşkun</first><last>Mermer</last></author>
@@ -256,7 +256,7 @@
     <paper id="20">
       <title><fixed-case>FBK</fixed-case>’s machine translation systems for the <fixed-case>IWSLT</fixed-case> 2013 evaluation campaign</title>
       <author><first>Nicola</first><last>Bertoldi</last></author>
-      <author><first>M. Amin</first><last>Farajian</last></author>
+      <author id="m-amin-farajian"><first>M. Amin</first><last>Farajian</last></author>
       <author><first>Prashant</first><last>Mathur</last></author>
       <author><first>Nicholas</first><last>Ruiz</last></author>
       <author><first>Marcello</first><last>Federico</last></author>
@@ -290,7 +290,7 @@
       <author><first>Sakriani</first><last>Sakti</last></author>
       <author><first>Keigo</first><last>Kubo</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
-      <author><first>Tomoki</first><last>Toda</last></author>
+      <author id="tomoki-toda"><first>Tomoki</first><last>Toda</last></author>
       <author><first>Satoshi</first><last>Nakamura</last></author>
       <url hash="e15313ed">2013.iwslt-evaluation.23</url>
       <abstract>This paper describes the NAIST English speech recognition system for the IWSLT 2013 Evaluation Campaign. In particular, we participated in the ASR track of the IWSLT TED task. Last year, we participated in collaboration with Karlsruhe Institute of Technology (KIT). This year is our first time to build a full-fledged ASR system for IWSLT solely developed by NAIST. Our final system utilizes weighted finitestate transducers with four-gram language models. The hypothesis selection is based on the principle of system combination. On the IWSLT official test set our system introduced in this work achieves a WER of 9.1% for tst2011, 10.0% for tst2012, and 16.2% for the new tst2013.</abstract>
@@ -305,7 +305,7 @@
       <author><first>Eunah</first><last>Cho</last></author>
       <author><first>Yuqi</first><last>Zhang</last></author>
       <author><first>Isabel</first><last>Slawik</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <url hash="103642d9">2013.iwslt-evaluation.24</url>
       <abstract>In this paper, we present the KIT systems participating in all three official directions, namely English→German, German→English, and English→French, in translation tasks of the IWSLT 2013 machine translation evaluation. Additionally, we present the results for our submissions to the optional directions English→Chinese and English→Arabic. We used phrase-based translation systems to generate the translations. This year, we focused on adapting the systems towards ASR input. Furthermore, we investigated different reordering models as well as an extended discriminative word lexicon. Finally, we added a data selection approach for domain adaptation.</abstract>
       <bibkey>ha-etal-2013-kit</bibkey>
@@ -336,7 +336,7 @@
       <title>Using viseme recognition to improve a sign language translation system</title>
       <author><first>Christoph</first><last>Schmidt</last></author>
       <author><first>Oscar</first><last>Koller</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <author><first>Thomas</first><last>Hoyoux</last></author>
       <author><first>Justus</first><last>Piater</last></author>
       <url hash="003010b5">2013.iwslt-papers.1</url>
@@ -345,9 +345,9 @@
     </paper>
     <paper id="2">
       <title>The <fixed-case>AMARA</fixed-case> corpus: building resources for translating the web’s educational content</title>
-      <author><first>Francisco</first><last>Guzman</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzman</last></author>
       <author><first>Hassan</first><last>Sajjad</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <author><first>Ahmed</first><last>Abdelali</last></author>
       <url hash="6ea1644e">2013.iwslt-papers.2</url>
       <abstract>In this paper, we introduce a new parallel corpus of subtitles of educational videos: the AMARA corpus for online educational content. We crawl a multilingual collection community generated subtitles, and present the results of processing the Arabic–English portion of the data, which yields a parallel corpus of about 2.6M Arabic and 3.9M English words. We explore different approaches to align the segments, and extrinsically evaluate the resulting parallel corpus on the standard TED-talks tst-2010. We observe that the data can be successfully used for this task, and also observe an absolute improvement of 1.6 BLEU when it is used in combination with TED data. Finally, we analyze some of the specific challenges when translating the educational content.</abstract>
@@ -358,7 +358,7 @@
       <author><first>Hiroaki</first><last>Shimizu</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
       <author><first>Sakriani</first><last>Sakti</last></author>
-      <author><first>Tomoki</first><last>Toda</last></author>
+      <author id="tomoki-toda"><first>Tomoki</first><last>Toda</last></author>
       <author><first>Satoshi</first><last>Nakamura</last></author>
       <url hash="1b85df3a">2013.iwslt-papers.3</url>
       <abstract>There has been a fair amount of work on automatic speech translation systems that translate in real-time, serving as a computerized version of a simultaneous interpreter. It has been noticed in the field of translation studies that simultaneous interpreters perform a number of tricks to make the content easier to understand in real-time, including dividing their translations into small chunks, or summarizing less important content. However, the majority of previous work has not specifically considered this fact, simply using translation data (made by translators) for learning of the machine translation system. In this paper, we examine the possibilities of additionally incorporating simultaneous interpretation data (made by simultaneous interpreters) in the learning process. First we collect simultaneous interpretation data from professional simultaneous interpreters of three levels, and perform an analysis of the data. Next, we incorporate the simultaneous interpretation data in the learning of the machine translation system. As a result, the translation style of the system becomes more similar to that of a highly experienced simultaneous interpreter. We also find that according to automatic evaluation metrics, our system achieves performance similar to that of a simultaneous interpreter that has 1 year of experience.</abstract>
@@ -366,17 +366,17 @@
     </paper>
     <paper id="4">
       <title>Improving the minimum <fixed-case>B</fixed-case>ayes’ risk combination of machine translation systems</title>
-      <author><first>Jesús</first><last>González-Rubio</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="jesus-gonzalez-rubio"><first>Jesús</first><last>González-Rubio</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <url hash="e0d3a795">2013.iwslt-papers.4</url>
       <abstract>We investigate the problem of combining the outputs of different translation systems into a minimum Bayes’ risk consensus translation. We explore different risk formulations based on the BLEU score, and provide a dynamic programming decoding algorithm for each of them. In our experiments, these algorithms generated consensus translations with better risk, and more efficiently, than previous proposals.</abstract>
       <bibkey>gonzalez-rubio-casacuberta-2013-improving</bibkey>
     </paper>
     <paper id="5">
       <title>Emprical study of a two-step approach to estimate translation quality</title>
-      <author><first>Jesús</first><last>González-Rubio</last></author>
+      <author id="jesus-gonzalez-rubio"><first>Jesús</first><last>González-Rubio</last></author>
       <author><first>J. Ramón</first><last>Navarro-Cerdán</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <url hash="5a8d9848">2013.iwslt-papers.5</url>
       <abstract>We present a method to estimate the quality of automatic translations when reference translations are not available. Quality estimation is addressed as a two-step regression problem where multiple features are combined to predict a quality score. Given a set of features, we aim at automatically extracting the variables that better explain translation quality, and use them to predict the quality score. The soundness of our approach is assessed by the encouraging results obtained in an exhaustive experimentation with several feature sets. Moreover, the studied approach is highly-scalable allowing us to employ hundreds of features to predict translation quality.</abstract>
       <bibkey>gonzalez-rubio-etal-2013-emprical</bibkey>
@@ -386,8 +386,8 @@
       <author><first>Joshua</first><last>Winebarger</last></author>
       <author><first>Bao</first><last>Nguyen</last></author>
       <author><first>Jonas</first><last>Gehring</last></author>
-      <author><first>Sebastian</first><last>Stüker</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="sebastian-stuker"><first>Sebastian</first><last>Stüker</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <url hash="7eda2291">2013.iwslt-papers.6</url>
       <abstract>This paper describes our Speech-to-Text (STT) system for French, which was developed as part of our efforts in the Quaero program for the 2013 evaluation. Our STT system consists of six subsystems which were created by combining multiple complementary sources of pronunciation modeling including graphemes with various feature front-ends based on deep neural networks and tonal features. Both speaker-independent and speaker adaptively trained versions of the systems were built. The resulting systems were then combined via confusion network combination and crossadaptation. Through progressive advances and system combination we reach a word error rate (WER) of 16.5% on the 2012 Quaero evaluation data.</abstract>
       <bibkey>winebarger-etal-2013-2013</bibkey>
@@ -404,9 +404,9 @@
     <paper id="8">
       <title>Incremental unsupervised training for university lecture recognition</title>
       <author><first>Michael</first><last>Heck</last></author>
-      <author><first>Sebastian</first><last>Stüker</last></author>
+      <author id="sebastian-stuker"><first>Sebastian</first><last>Stüker</last></author>
       <author><first>Sakriani</first><last>Sakti</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <author><first>Satoshi</first><last>Nakamura</last></author>
       <url hash="33eac22c">2013.iwslt-papers.8</url>
       <abstract>In this paper we describe our work on unsupervised adaptation of the acoustic model of our simultaneous lecture translation system. We trained a speaker independent acoustic model, with which we produce automatic transcriptions of new lectures in order to improve the system for a specific lecturer. We compare our results against a model that was trained in a supervised way on an exact manual transcription. We examine four different ways of processing the decoder outputs of the automatic transcription with respect to the treatment of pronunciation variants and noise words. We will show that, instead of fixating the latter informations in the transcriptions, it is of advantage to let the Viterbi algorithm during training decide which pronunciations to use and where to insert which noise words. Further, we utilize word level posterior probabilities obtained during decoding by weighting and thresholding the words of a transcription.</abstract>
@@ -433,7 +433,7 @@
       <author><first>Teresa</first><last>Herrmann</last></author>
       <author><first>Jochen</first><last>Weiner</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <url hash="b4c15714">2013.iwslt-papers.11</url>
       <abstract>We analyze the performance of source sentence reordering, a common reordering approach, using oracle experiments on German-English and English-German translation. First, we show that the potential of this approach is very promising. Compared to a monotone translation, the optimally reordered source sentence leads to improvements of up to 4.6 and 6.2 BLEU points, depending on the language. Furthermore, we perform a detailed evaluation of the different aspects of the approach. We analyze the impact of the restriction of the search space by reordering lattices and we can show that using more complex rule types for reordering results in better approximation of the optimally reordered source. However, a gap of about 3 to 3.8 BLEU points remains, presenting a promising perspective for research on extending the search space through better reordering rules. When evaluating the ranking of different reordering variants, the results reveal that the search for the best path in the lattice performs very well for German-English translation. For English-German translation there is potential for an improvement of up to 1.4 BLEU points through a better ranking of the different reordering possibilities in the reordering lattice.</abstract>
       <bibkey>herrmann-etal-2013-analyzing</bibkey>
@@ -442,7 +442,7 @@
       <title><fixed-case>CRF</fixed-case>-based disfluency detection using semantic features for <fixed-case>G</fixed-case>erman to <fixed-case>E</fixed-case>nglish spoken language translation</title>
       <author><first>Eunah</first><last>Cho</last></author>
       <author><first>Than-Le</first><last>Ha</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <url hash="1225e6da">2013.iwslt-papers.12</url>
       <abstract>Disfluencies in speech pose severe difficulties in machine translation of spontaneous speech. This paper presents our conditional random field (CRF)-based speech disfluency detection system developed on German to improve spoken language translation performance. In order to detect speech disfluencies considering syntactics and semantics of speech utterances, we carried out a CRF-based approach using information learned from the word representation and the phrase table used for machine translation. The word representation is gained using recurrent neural networks and projected words are clustered using the k-means algorithm. Using the output from the model trained with the word representations and phrase table information, we achieve an improvement of 1.96 BLEU points on the lecture test set. By keeping or removing humanannotated disfluencies, we show an upper bound and lower bound of translation quality. In an oracle experiment we gain 3.16 BLEU points of improvement on the lecture test set, compared to the same set with all disfluencies.</abstract>
       <bibkey>cho-etal-2013-crf</bibkey>
@@ -450,10 +450,10 @@
     <paper id="13">
       <title>Maximum entropy language modeling for <fixed-case>R</fixed-case>ussian <fixed-case>ASR</fixed-case></title>
       <author><first>Evgeniy</first><last>Shin</last></author>
-      <author><first>Sebastian</first><last>Stüker</last></author>
+      <author id="sebastian-stuker"><first>Sebastian</first><last>Stüker</last></author>
       <author><first>Kevin</first><last>Kilgour</last></author>
       <author><first>Christian</first><last>Fügen</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <url hash="256b13b1">2013.iwslt-papers.13</url>
       <abstract>Russian is a challenging language for automatic speech recognition systems due to its rich morphology. This rich morphology stems from Russian’s highly inflectional nature and the frequent use of preand suffixes. Also, Russian has a very free word order, changes in which are used to reflect connotations of the sentences. Dealing with these phenomena is rather difficult for traditional n-gram models. We therefore investigate in this paper the use of a maximum entropy language model for Russian whose features are specifically designed to deal with the inflections in Russian, as well as the loose word order. We combine this with a subword based language model in order to alleviate the problem of large vocabulary sizes necessary for dealing with highly inflecting languages. Applying the maximum entropy language model during re-scoring improves the word error rate of our recognition system by 1.2% absolute, while the use of the sub-word based language model reduces the vocabulary size from 120k to 40k and the OOV rate from 4.8% to 2.1%.</abstract>
       <bibkey>shin-etal-2013-maximum</bibkey>
@@ -465,7 +465,7 @@
       <author><first>Adam</first><last>Lopez</last></author>
       <author><first>Damianos</first><last>Karakos</last></author>
       <author><first>Chris</first><last>Callison-Burch</last></author>
-      <author><first>Sanjeev</first><last>Khudanpur</last></author>
+      <author id="sanjeev-khudanpur"><first>Sanjeev</first><last>Khudanpur</last></author>
       <url hash="431f26fc">2013.iwslt-papers.14</url>
       <abstract>Research into the translation of the output of automatic speech recognition (ASR) systems is hindered by the dearth of datasets developed for that explicit purpose. For SpanishEnglish translation, in particular, most parallel data available exists only in vastly different domains and registers. In order to support research on cross-lingual speech applications, we introduce the Fisher and Callhome Spanish-English Speech Translation Corpus, supplementing existing LDC audio and transcripts with (a) ASR 1-best, lattice, and oracle output produced by the Kaldi recognition system and (b) English translations obtained on Amazon’s Mechanical Turk. The result is a four-way parallel dataset of Spanish audio, transcriptions, ASR lattices, and English translations of approximately 38 hours of speech, with defined training, development, and held-out test sets. We conduct baseline machine translation experiments using models trained on the provided training data, and validate the dataset by corroborating a number of known results in the field, including the utility of in-domain (information, conversational) training data, increased performance translating lattices (instead of recognizer 1-best output), and the relationship between word error rate and BLEU score.</abstract>
       <bibkey>post-etal-2013-improved</bibkey>
@@ -491,7 +491,7 @@
       <author><first>Sankaranarayanan</first><last>Ananthakrishnan</last></author>
       <author><first>Wei</first><last>Chen</last></author>
       <author><first>Rohit</first><last>Kumar</last></author>
-      <author><first>Dennis</first><last>Mehay</last></author>
+      <author id="dennis-mehay"><first>Dennis</first><last>Mehay</last></author>
       <url hash="10700b82">2013.iwslt-papers.17</url>
       <abstract>Spoken language translation (SLT) systems typically follow a pipeline architecture, in which the best automatic speech recognition (ASR) hypothesis of an input utterance is fed into a statistical machine translation (SMT) system. Conversational speech often generates unrecoverable ASR errors owing to its rich vocabulary (e.g. out-of-vocabulary (OOV) named entities). In this paper, we study the possibility of alleviating the impact of unrecoverable ASR errors on translation performance by minimizing the contextual effects of incorrect source words in target hypotheses. Our approach is driven by locally-derived penalties applied to bilingual phrase pairs as well as target language model (LM) likelihoods in the vicinity of source errors. With oracle word error labels on an OOV word-rich English-to-Iraqi Arabic translation task, we show statistically significant relative improvements of 3.2% BLEU and 2.0% METEOR over an error-agnostic baseline SMT system. We then investigate the impact of imperfect source error labels on error-aware translation performance. Simulation experiments reveal that modest translation improvements are to be gained with this approach even when the source error labels are noisy.</abstract>
       <bibkey>ananthakrishnan-etal-2013-source</bibkey>
diff --git a/data/xml/2013.mtsummit.xml b/data/xml/2013.mtsummit.xml
index f6e11a199c..0bd5ddadf0 100644
--- a/data/xml/2013.mtsummit.xml
+++ b/data/xml/2013.mtsummit.xml
@@ -11,7 +11,7 @@
     </meta>
     <paper id="1">
       <title>The Operation Sequence Model: Integrating Translation and Reordering Operations in a Single Left-to-Right Model</title>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <url hash="cfd9f23d">2013.mtsummit-plenaries.1</url>
       <bibkey>schutze-2013-operation</bibkey>
     </paper>
@@ -43,7 +43,7 @@
     </paper>
     <paper id="2">
       <title>Generative and Discriminative Methods for Online Adaptation in <fixed-case>SMT</fixed-case></title>
-      <author><first>Katharina</first><last>Wäschle</last></author>
+      <author id="katharina-waschle"><first>Katharina</first><last>Wäschle</last></author>
       <author><first>Patrick</first><last>Simianer</last></author>
       <author><first>Nicola</first><last>Bertoldi</last></author>
       <author><first>Stefan</first><last>Riezler</last></author>
@@ -56,7 +56,7 @@
       <author><first>Keiko</first><last>Taguchi</last></author>
       <author><first>Andrew</first><last>Finch</last></author>
       <author><first>Seiichi</first><last>Yamamoto</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <url hash="84470eb8">2013.mtsummit-papers.3</url>
       <bibkey>taguchi-etal-2013-inducing</bibkey>
     </paper>
@@ -83,7 +83,7 @@
       <author><first>Rohit</first><last>Kumar</last></author>
       <author><first>Enoch</first><last>Kan</last></author>
       <author><first>Rohit</first><last>Prasad</last></author>
-      <author><first>Prem</first><last>Natarajan</last></author>
+      <author id="prem-natarajan"><first>Prem</first><last>Natarajan</last></author>
       <url hash="b49fd932">2013.mtsummit-papers.6</url>
       <bibkey>ananthakrishnan-etal-2013-semi</bibkey>
     </paper>
@@ -91,22 +91,22 @@
       <title>Listwise Approach to Learning to Rank for Automatic Evaluation of Machine Translation</title>
       <author><first>Maoxi</first><last>Li</last></author>
       <author><first>Aiwen</first><last>Jiang</last></author>
-      <author><first>Mingwen</first><last>Wang</last></author>
+      <author id="mingwen-wang"><first>Mingwen</first><last>Wang</last></author>
       <url hash="7c2e11dc">2013.mtsummit-papers.7</url>
       <bibkey>li-etal-2013-listwise</bibkey>
     </paper>
     <paper id="8">
       <title><fixed-case>MWE</fixed-case> Alignment in Phrase Based Statistical Machine Translation</title>
       <author><first>Santanu</first><last>Pal</last></author>
-      <author><first>Sudip Kumar</first><last>Naskar</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <url hash="4d55f151">2013.mtsummit-papers.8</url>
       <bibkey>pal-etal-2013-mwe</bibkey>
     </paper>
     <paper id="9">
       <title>Real-life Translation Quality Estimation for <fixed-case>MT</fixed-case> System Selection</title>
-      <author><first>Lluis</first><last>Formiga</last></author>
-      <author><first>Lluis</first><last>Marquez</last></author>
+      <author id="lluis-formiga"><first>Lluis</first><last>Formiga</last></author>
+      <author id="lluis-marquez"><first>Lluis</first><last>Marquez</last></author>
       <author><first>Jaume</first><last>Pujantell</last></author>
       <url hash="a8153f5d">2013.mtsummit-papers.9</url>
       <bibkey>formiga-etal-2013-real</bibkey>
@@ -114,7 +114,7 @@
     <paper id="10">
       <title>Yet Another Fast, Robust and Open Source Sentence Aligner. Time to<fixed-case>R</fixed-case>econsider Sentence Alignment?</title>
       <author><first>Fethi</first><last>Lamraoui</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <url hash="b27b06e7">2013.mtsummit-papers.10</url>
       <bibkey>lamraoui-langlais-2013-yet</bibkey>
     </paper>
@@ -137,9 +137,9 @@
     <paper id="13">
       <title>Quality Estimation-guided Data Selection for Domain Adaptation of <fixed-case>SMT</fixed-case></title>
       <author><first>Pratyush</first><last>Banerjee</last></author>
-      <author><first>Raphael</first><last>Rubino</last></author>
+      <author id="raphael-rubino"><first>Raphael</first><last>Rubino</last></author>
       <author><first>Johann</first><last>Roturier</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <url hash="c7a747f1">2013.mtsummit-papers.13</url>
       <bibkey>banerjee-etal-2013-quality</bibkey>
     </paper>
@@ -154,7 +154,7 @@
     <paper id="15">
       <title>Design and Analysis of a Large Corpus of Post-Edited Translations: Quality Estimation, Failure Analysis and the Variability of Post-Edition</title>
       <author><first>Guillaume</first><last>Wisniewski</last></author>
-      <author><first>Anil Kumar</first><last>Singh</last></author>
+      <author id="anil-kumar-singh"><first>Anil Kumar</first><last>Singh</last></author>
       <author><first>Natalia</first><last>Segal</last></author>
       <author><first>François</first><last>Yvon</last></author>
       <url hash="8ac47aee">2013.mtsummit-papers.15</url>
@@ -171,7 +171,7 @@
     </paper>
     <paper id="17">
       <title>Meta-Evaluation of a Diagnostic Quality Metric for Machine Translation</title>
-      <author><first>Sudip Kumar</first><last>Naskar</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last></author>
       <author><first>Antonio</first><last>Toral</last></author>
       <author><first>Federico</first><last>Gaspari</last></author>
       <author><first>Declan</first><last>Groves</last></author>
@@ -182,7 +182,7 @@
       <title>Towards a Generic Approach for Bilingual Lexicon Extraction from Comparable Corpora</title>
       <author><first>Dhouha</first><last>Bouamor</last></author>
       <author><first>Nasredine</first><last>Semmar</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <url hash="bcbd68d8">2013.mtsummit-papers.18</url>
       <bibkey>bouamor-etal-2013-towards</bibkey>
     </paper>
@@ -191,7 +191,7 @@
       <author><first>Patrick</first><last>Lehnen</last></author>
       <author><first>Jorn Wiibker</first><last>Jan-Thorsten Peter</last></author>
       <author><first>Stephan</first><last>Peitz</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <url hash="11cc4370">2013.mtsummit-papers.19</url>
       <bibkey>lehnen-etal-2013-hidden</bibkey>
     </paper>
@@ -201,7 +201,7 @@
       <author><first>Minwei</first><last>Feng</last></author>
       <author><first>Matthias</first><last>Huck</last></author>
       <author><first>Stephan</first><last>Peitz</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <url hash="3f5a080f">2013.mtsummit-papers.20</url>
       <bibkey>freitag-etal-2013-reverse</bibkey>
     </paper>
@@ -216,8 +216,8 @@
     <paper id="22">
       <title>A Free/Open-source <fixed-case>K</fixed-case>azakh-<fixed-case>T</fixed-case>atar Machine Translation System</title>
       <author><first>Ilnar</first><last>Salimzyanov</last></author>
-      <author><first>Jonathan</first><last>Washington</last></author>
-      <author><first>Francis</first><last>Tyers</last></author>
+      <author id="jonathan-washington"><first>Jonathan</first><last>Washington</last></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last></author>
       <url hash="b497d01f">2013.mtsummit-papers.22</url>
       <bibkey>salimzyanov-etal-2013-free</bibkey>
     </paper>
@@ -250,7 +250,7 @@
     </meta>
     <paper id="1">
       <title>Translating the <fixed-case>FINREP</fixed-case> Taxonomy using a Domain-specific Corpus</title>
-      <author><first>Mihael</first><last>Arcan</last></author>
+      <author id="mihael-arcan"><first>Mihael</first><last>Arcan</last></author>
       <author><first>Susan Marie</first><last>Thomas</last></author>
       <author><first>Derek</first><last>De Brandt</last></author>
       <author><first>Paul</first><last>Buitelaar</last></author>
@@ -277,7 +277,7 @@
     </paper>
     <paper id="4">
       <title>A <fixed-case>CCG</fixed-case>-based Quality Estimation Metric for Statistical Machine Translation Learning from Human Judgments of Machine Translation Output</title>
-      <author><first>Maja</first><last>Popovic</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popovic</last></author>
       <author><first>Eleftherios</first><last>Avramidis</last></author>
       <author><first>Aljoscha</first><last>Burchardt</last></author>
       <author><first>Sabine</first><last>Hunsicker</last></author>
@@ -289,7 +289,7 @@
     </paper>
     <paper id="5">
       <title>Learning from Human Judgments of Machine Translation Output</title>
-      <author><first>Maja</first><last>Popovic</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popovic</last></author>
       <author><first>Eleftherios</first><last>Avramidis</last></author>
       <author><first>Aljoscha</first><last>Burchardt</last></author>
       <author><first>Sabine</first><last>Hunsicker</last></author>
@@ -301,8 +301,8 @@
     </paper>
     <paper id="6">
       <title>Towards the Supervised Machine Translation: Real Word Alignments and Translations in a Multi-task Active Learning process</title>
-      <author><first>Martha-Alicia</first><last>Rocha</last></author>
-      <author><first>Joan-Andreu</first><last>Sanchez</last></author>
+      <author id="martha-alicia-rocha"><first>Martha-Alicia</first><last>Rocha</last></author>
+      <author id="joan-andreu-sanchez"><first>Joan-Andreu</first><last>Sanchez</last></author>
       <url hash="7f5f2315">2013.mtsummit-posters.6</url>
       <bibkey>rocha-sanchez-2013-towards</bibkey>
     </paper>
@@ -310,7 +310,7 @@
       <title>Comparing Forum Data Post-Editing Performance Using Translation Memory and Machine Translation Output: A Pilot Study</title>
       <author><first>Lucia</first><last>Morado Vazquez</last></author>
       <author><first>Silvia</first><last>Rodriguez Vazquez</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <url hash="8d12867d">2013.mtsummit-posters.7</url>
       <bibkey>morado-vazquez-etal-2013-comparing</bibkey>
     </paper>
@@ -337,7 +337,7 @@
       <author><first>Maria</first><last>Mateva</last></author>
       <author><first>Ramona</first><last>Enache</last></author>
       <author><first>Cristina</first><last>Espana-Bonet</last></author>
-      <author><first>Lluis</first><last>Marquez</last></author>
+      <author id="lluis-marquez"><first>Lluis</first><last>Marquez</last></author>
       <author><first>Borislav</first><last>Popov</last></author>
       <author><first>Aarne</first><last>Ranta</last></author>
       <url hash="35890b4b">2013.mtsummit-posters.10</url>
@@ -346,15 +346,15 @@
     <paper id="11">
       <title>Application of Online Terminology Services in Statistical Machine Translation</title>
       <author><first>Raivis</first><last>Skadins</last></author>
-      <author><first>Marcis</first><last>Pinnis</last></author>
+      <author id="marcis-pinnis"><first>Marcis</first><last>Pinnis</last></author>
       <author><first>Tatiana</first><last>Gornostay</last></author>
-      <author><first>Andrejs</first><last>Vasiljevs</last></author>
+      <author id="andrejs-vasiljevs"><first>Andrejs</first><last>Vasiljevs</last></author>
       <url hash="1aa02a0e">2013.mtsummit-posters.11</url>
       <bibkey>skadins-etal-2013-application</bibkey>
     </paper>
     <paper id="12">
       <title>Key Problems in Conversion from Simplified to Traditional <fixed-case>C</fixed-case>hinese Characters Topic Models for Translation Quality Estimation for Gisting Purposes</title>
-      <author><first>Raphael</first><last>Rubino</last></author>
+      <author id="raphael-rubino"><first>Raphael</first><last>Rubino</last></author>
       <author><first>Jose Guilherme</first><last>Camargo de Souza</last></author>
       <author><first>Jennifer</first><last>Foster</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
@@ -363,7 +363,7 @@
     </paper>
     <paper id="13">
       <title>Topic Models for Translation Quality Estimation for Gisting Purposes</title>
-      <author><first>Raphael</first><last>Rubino</last></author>
+      <author id="raphael-rubino"><first>Raphael</first><last>Rubino</last></author>
       <author><first>Jose Guilherme</first><last>Camargo de Souza</last></author>
       <author><first>Jennifer</first><last>Foster</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
@@ -384,7 +384,7 @@
     </meta>
     <paper id="1">
       <title>Analyzing and Predicting <fixed-case>MT</fixed-case> Utility and Post-Editing Productivity in Enterprise-scale Translation Projects</title>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <author><first>Olga</first><last>Beregovaya</last></author>
       <author><first>Michael</first><last>Denkowski</last></author>
       <author><first>David</first><last>Clarke</last></author>
@@ -438,8 +438,8 @@
     </paper>
     <paper id="8">
       <title>Let’s<fixed-case>MT</fixed-case>! as a Learning Platform for <fixed-case>SMT</fixed-case></title>
-      <author><first>Hanne</first><last>Fersøe</last></author>
-      <author><first>Dorte Haltrup</first><last>Hansen</last></author>
+      <author id="hanne-fersoe"><first>Hanne</first><last>Fersøe</last></author>
+      <author id="dorte-haltrup-hansen"><first>Dorte Haltrup</first><last>Hansen</last></author>
       <author><first>Lene</first><last>Offersgaard</last></author>
       <author><first>Susi</first><last>Olsen</last></author>
       <author><first>Claus</first><last>Povlsen</last></author>
@@ -448,12 +448,12 @@
     </paper>
     <paper id="9">
       <title>User Evaluation of Advanced Interaction Features for a Computer-Assisted Translation Workbench</title>
-      <author><first>Vicente</first><last>Alabau</last></author>
+      <author id="vicent-alabau"><first>Vicente</first><last>Alabau</last></author>
       <author><first>Jesus</first><last>Gonzalez-Rubio</last></author>
-      <author><first>Luis A.</first><last>Leiva</last></author>
-      <author><first>Daniel</first><last>Ortiz-Martínez</last></author>
+      <author id="luis-a-leiva"><first>Luis A.</first><last>Leiva</last></author>
+      <author id="daniel-ortiz-martinez"><first>Daniel</first><last>Ortiz-Martínez</last></author>
       <author><first>German</first><last>Sanchis-Trilles</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <author><first>Bartolomé</first><last>Mesa-Lao</last></author>
       <author><first>Ragnar</first><last>Bonk</last></author>
       <author><first>Michael</first><last>Carl</last></author>
@@ -466,7 +466,7 @@
       <author><first>Thierry</first><last>Etchegoyhen</last></author>
       <author><first>Mark</first><last>Fishel</last></author>
       <author><first>Jie</first><last>Jiang</last></author>
-      <author><first>Mirjam Sepesy</first><last>Maucec</last></author>
+      <author id="mirjam-sepesy-maucec"><first>Mirjam Sepesy</first><last>Maucec</last></author>
       <url hash="fc2ea7dd">2013.mtsummit-user.10</url>
       <bibkey>etchegoyhen-etal-2013-smt</bibkey>
     </paper>
@@ -523,7 +523,7 @@
     </meta>
     <paper id="1">
       <title>Automated Community Content Editing <fixed-case>P</fixed-case>or<fixed-case>T</fixed-case>al (<fixed-case>ACCEPT</fixed-case>)</title>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <url hash="2f2777c0">2013.mtsummit-european.1</url>
       <bibkey>bouillon-2013-automated</bibkey>
     </paper>
@@ -539,7 +539,7 @@
       <title><fixed-case>CASMACAT</fixed-case>: Cognitive Analysis and Statistical Methods for Advanced Computer Aided Translation</title>
       <author><first>Philipp</first><last>Koehn</last></author>
       <author><first>Michael</first><last>Carl</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <author><first>Eva</first><last>Marcos</last></author>
       <url hash="7a8f2113">2013.mtsummit-european.3</url>
       <bibkey>koehn-etal-2013-casmacat</bibkey>
@@ -553,18 +553,18 @@
     <paper id="5">
       <title>Bridges Across the Language Divide — <fixed-case>EU</fixed-case>-<fixed-case>BRIDGE</fixed-case> Excitement: Exploring Customer Interactions through Textual <fixed-case>E</fixed-case>ntail<fixed-case>MENT</fixed-case></title>
       <author><first>Ido</first><last>Dagan</last></author>
-      <author><first>Bernardo</first><last>Magnini</last></author>
-      <author><first>Guenter</first><last>Neumann</last></author>
-      <author><first>Sebastian</first><last>Pado</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
+      <author id="gunter-neumann"><first>Guenter</first><last>Neumann</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Pado</last></author>
       <url hash="684a9545">2013.mtsummit-european.5</url>
       <bibkey>dagan-etal-2013-bridges</bibkey>
     </paper>
     <paper id="6">
       <title>Excitement: Exploring Customer Interactions through Textual <fixed-case>E</fixed-case>ntail<fixed-case>MENT</fixed-case></title>
       <author><first>Ido</first><last>Dagan</last></author>
-      <author><first>Bernardo</first><last>Magnini</last></author>
-      <author><first>Guenter</first><last>Neumann</last></author>
-      <author><first>Sebastian</first><last>Pado</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
+      <author id="gunter-neumann"><first>Guenter</first><last>Neumann</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Pado</last></author>
       <url hash="18d91d85">2013.mtsummit-european.6</url>
       <bibkey>dagan-etal-2013-excitement</bibkey>
     </paper>
@@ -573,7 +573,7 @@
       <author><first>Manuel</first><last>Herranz</last></author>
       <author><first>Alex</first><last>Helle</last></author>
       <author><first>Elia</first><last>Yuste</last></author>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <url hash="750e21ce">2013.mtsummit-european.7</url>
       <bibkey>herranz-etal-2013-pangeanic</bibkey>
@@ -581,7 +581,7 @@
     <paper id="8">
       <title><fixed-case>FAUST</fixed-case>: Feedback Analysis for User Adaptive Statistical Translation</title>
       <author id="bill-byrne"><first>William</first><last>Byrne</last></author>
-      <author><first>Lluis</first><last>Marquez</last></author>
+      <author id="lluis-marquez"><first>Lluis</first><last>Marquez</last></author>
       <url hash="5207c5bf">2013.mtsummit-european.8</url>
       <bibkey>byrne-marquez-2013-faust</bibkey>
     </paper>
@@ -615,14 +615,14 @@
     </paper>
     <paper id="13">
       <title><fixed-case>MONNET</fixed-case>: Multilingual Ontologies for Networked Knowledge</title>
-      <author><first>Mihael</first><last>Arcan</last></author>
+      <author id="mihael-arcan"><first>Mihael</first><last>Arcan</last></author>
       <author><first>Paul</first><last>Buitelaar</last></author>
       <url hash="f3b96153">2013.mtsummit-european.13</url>
       <bibkey>arcan-buitelaar-2013-monnet</bibkey>
     </paper>
     <paper id="14">
       <title><fixed-case>M</fixed-case>oses<fixed-case>C</fixed-case>ore: <fixed-case>M</fixed-case>oses Open Source Evaluation and Support Co-ordination for <fixed-case>O</fixed-case>ut<fixed-case>R</fixed-case>each and Exploitation <fixed-case>PANACEA</fixed-case>: Platform for Automatic, Normalised Annotation and Cost-Effective Acquisition of Language Resources for Human Language Technologies</title>
-      <author><first>Nuria</first><last>Bel</last></author>
+      <author id="nuria-bel"><first>Nuria</first><last>Bel</last></author>
       <author><first>Marc</first><last>Poch</last></author>
       <author><first>Antonio</first><last>Toral</last></author>
       <url hash="be924007">2013.mtsummit-european.14</url>
@@ -630,7 +630,7 @@
     </paper>
     <paper id="15">
       <title><fixed-case>PANACEA</fixed-case>: Platform for Automatic, Normalised Annotation and Cost-Effective Acquisition of Language Resources for Human Language Technologies</title>
-      <author><first>Nuria</first><last>Bel</last></author>
+      <author id="nuria-bel"><first>Nuria</first><last>Bel</last></author>
       <author><first>Marc</first><last>Poch</last></author>
       <author><first>Antonio</first><last>Toral</last></author>
       <url hash="342df3e9">2013.mtsummit-european.15</url>
@@ -648,12 +648,12 @@
       <title><fixed-case>QTL</fixed-case>aunchpad</title>
       <author><first>Stephen</first><last>Doherty</last></author>
       <author><first>Declan</first><last>Groves</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <author><first>Arle</first><last>Lommel</last></author>
       <author><first>Aljoscha</first><last>Burchardt</last></author>
       <author><first>Hans</first><last>Uszkoreit</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
-      <author><first>Stelios</first><last>Piperidis</last></author>
+      <author id="stelios-piperidis"><first>Stelios</first><last>Piperidis</last></author>
       <url hash="bb37569c">2013.mtsummit-european.17</url>
       <bibkey>doherty-etal-2013-qtlaunchpad</bibkey>
     </paper>
@@ -661,7 +661,7 @@
       <title><fixed-case>SIGNSPEAK</fixed-case>: Scientific Understanding and Vision-based Technological Development for Continuous Sign Language Recognition and Translation</title>
       <author><first>Jens</first><last>Forster</last></author>
       <author><first>Christoph</first><last>Schmidt</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <url hash="19ddcc50">2013.mtsummit-european.18</url>
       <bibkey>forster-etal-2013-signspeak</bibkey>
     </paper>
@@ -675,14 +675,14 @@
       <author><first>G.</first><last>van Loenhout</last></author>
       <author><first>A.</first><last>del Pozo</last></author>
       <author><first>D.</first><last>Spiliotopoulos</last></author>
-      <author><first>Mirjam Sepesy</first><last>Maucec</last></author>
+      <author id="mirjam-sepesy-maucec"><first>Mirjam Sepesy</first><last>Maucec</last></author>
       <author><first>A.</first><last>Turner</last></author>
       <url hash="37a38f9d">2013.mtsummit-european.19</url>
       <bibkey>georgakopoulou-etal-2013-sumat</bibkey>
     </paper>
     <paper id="20">
       <title><fixed-case>T</fixed-case>aa<fixed-case>S</fixed-case>: Terminology as a Service</title>
-      <author><first>Andrejs</first><last>Vasiljevs</last></author>
+      <author id="andrejs-vasiljevs"><first>Andrejs</first><last>Vasiljevs</last></author>
       <author><first>Tatiana</first><last>Gornostay</last></author>
       <url hash="4a9d8fa4">2013.mtsummit-european.20</url>
       <bibkey>vasiljevs-gornostay-2013-taas</bibkey>
@@ -712,7 +712,7 @@
       <address>Nice, France</address>
       <month>September 2</month>
       <year>2013</year>
-      <editor><first>Shoichi</first><last>Yokoyama</last></editor>
+      <editor id="shoichi-yokoyama"><first>Shoichi</first><last>Yokoyama</last></editor>
       <venue>mtsummit</venue>
       <venue>pslt</venue>
     </meta>
@@ -747,7 +747,7 @@
       <author><first>Yun</first><last>Jin</last></author>
       <author><first>Oh-Woog</first><last>Kwon</last></author>
       <author><first>Seung-Hoon</first><last>Na</last></author>
-      <author><first>Young-Gil</first><last>Kim</last></author>
+      <author id="young-gil-kim"><first>Young-Gil</first><last>Kim</last></author>
       <url hash="aeb54628">2013.mtsummit-wpt.4</url>
       <bibkey>jin-etal-2013-patent</bibkey>
     </paper>
@@ -755,7 +755,7 @@
       <title>Exploiting multiple resources for <fixed-case>J</fixed-case>apanese to <fixed-case>E</fixed-case>nglish patent translation</title>
       <author><first>Rahma</first><last>Sellami</last></author>
       <author><first>Fatiha</first><last>Sadat</last></author>
-      <author><first>Lamia</first><last>Hadrich Belguith</last></author>
+      <author id="lamia-hadrich-belguith"><first>Lamia</first><last>Hadrich Belguith</last></author>
       <url hash="79a4e359">2013.mtsummit-wpt.5</url>
       <bibkey>sellami-etal-2013-exploiting</bibkey>
     </paper>
@@ -779,7 +779,7 @@
     </paper>
     <paper id="2">
       <title>What can we learn about the selection mechanism for post-editing?</title>
-      <author><first>Maja</first><last>Popović</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
       <author><first>Eleftherios</first><last>Avramidis</last></author>
       <author><first>Aljoscha</first><last>Burchardt</last></author>
       <author><first>David</first><last>Vilar</last></author>
@@ -815,26 +815,26 @@
       <title>Combining pre-editing and post-editing to improve <fixed-case>SMT</fixed-case> of user-generated content</title>
       <author><first>Johanna</first><last>Gerlach</last></author>
       <author><first>Victoria</first><last>Porro</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Sabine</first><last>Lehmann</last></author>
       <url hash="fb3cecb5">2013.mtsummit-wptp.6</url>
       <bibkey>gerlach-etal-2013-combining</bibkey>
     </paper>
     <paper id="7">
       <title>Advanced computer aided translation with a web-based workbench</title>
-      <author><first>Vicent</first><last>Alabau</last></author>
+      <author id="vicent-alabau"><first>Vicent</first><last>Alabau</last></author>
       <author><first>Ragnar</first><last>Bonk</last></author>
       <author><first>Christian</first><last>Buck</last></author>
       <author><first>Michael</first><last>Carl</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
-      <author><first>Mercedes</first><last>García-Martínez</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="mercedes-garcia-martinez"><first>Mercedes</first><last>García-Martínez</last></author>
       <author><first>Jesús</first><last>González</last></author>
       <author><first>Philipp</first><last>Koehn</last></author>
-      <author><first>Luis</first><last>Leiva</last></author>
+      <author id="luis-a-leiva"><first>Luis</first><last>Leiva</last></author>
       <author><first>Bartolomé</first><last>Mesa-Lao</last></author>
       <author><first>Daniel</first><last>Oriz</last></author>
-      <author><first>Hervé</first><last>Saint-Amand</last></author>
-      <author><first>Germán</first><last>Sanchis</last></author>
+      <author id="herve-saint-amand"><first>Hervé</first><last>Saint-Amand</last></author>
+      <author id="german-sanchis-trilles"><first>Germán</first><last>Sanchis</last></author>
       <author><first>Chara</first><last>Tsiukala</last></author>
       <url hash="29658f2b">2013.mtsummit-wptp.7</url>
       <bibkey>alabau-etal-2013-advanced</bibkey>
@@ -874,15 +874,15 @@
     </paper>
     <paper id="12">
       <title>Online production of <fixed-case>HQ</fixed-case> parallel corpora and permanent task-based evaluation of multiple <fixed-case>MT</fixed-case> systems: both can be obtained through i<fixed-case>MAG</fixed-case>s with no added cost</title>
-      <author><first>Lingxiao</first><last>Wang</last></author>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="ling-xiao-wang"><first>Lingxiao</first><last>Wang</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <url hash="05bd9683">2013.mtsummit-wptp.12</url>
       <bibkey>wang-boitet-2013-online</bibkey>
     </paper>
     <paper id="13">
       <title>Issues in incremental adaptation of statistical <fixed-case>MT</fixed-case> from human post-edits</title>
       <author><first>Mauro</first><last>Cettolo</last></author>
-      <author><first>Christophe</first><last>Servan</last></author>
+      <author id="christophe-servan"><first>Christophe</first><last>Servan</last></author>
       <author><first>Nicola</first><last>Bertoldi</last></author>
       <author><first>Marcello</first><last>Federico</last></author>
       <author><first>Loïc</first><last>Barrault</last></author>
@@ -913,7 +913,7 @@
     </meta>
     <paper id="1">
       <title>All that glitters is not gold when translating phraseological units</title>
-      <author><first>Gloria</first><last>Corpas Pastor</last></author>
+      <author id="gloria-corpas-pastor"><first>Gloria</first><last>Corpas Pastor</last></author>
       <attachment type="presentation" hash="7c8837b7">2013.mtsummit-wmwumttt.1.Presentation.pdf</attachment>
       <bibkey>corpas-pastor-2013-glitters</bibkey>
     </paper>
@@ -925,7 +925,7 @@
     </paper>
     <paper id="3">
       <title>Anaphora resolution, collocations and translation</title>
-      <author><first>Eric</first><last>Wehrli</last></author>
+      <author id="eric-wehrli"><first>Eric</first><last>Wehrli</last></author>
       <author><first>Luka</first><last>Nerima</last></author>
       <url hash="d68410a0">2013.mtsummit-wmwumttt.3</url>
       <bibkey>wehrli-nerima-2013-anaphora</bibkey>
@@ -933,8 +933,8 @@
     <paper id="4">
       <title>A flexible framework for collocation retrieval and translation from parallel and comparable corpora</title>
       <author><first>Oscar Mendoza</first><last>Rivera</last></author>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
-      <author><first>Gloria</first><last>Corpas Pastor</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="gloria-corpas-pastor"><first>Gloria</first><last>Corpas Pastor</last></author>
       <url hash="89736989">2013.mtsummit-wmwumttt.4</url>
       <bibkey>rivera-etal-2013-flexible</bibkey>
     </paper>
@@ -967,7 +967,7 @@
     <paper id="8">
       <title>How hard is it to automatically translate phrasal verbs from <fixed-case>E</fixed-case>nglish to <fixed-case>F</fixed-case>rench?</title>
       <author><first>Carlos</first><last>Ramish</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <author><first>Alexander</first><last>Kobzar</last></author>
       <url hash="cba9c228">2013.mtsummit-wmwumttt.8</url>
       <bibkey>ramish-etal-2013-hard</bibkey>
diff --git a/data/xml/2013.tal.xml b/data/xml/2013.tal.xml
index 9c91fea39d..75f0730ddd 100644
--- a/data/xml/2013.tal.xml
+++ b/data/xml/2013.tal.xml
@@ -25,7 +25,7 @@
     </paper>
     <paper id="2">
       <title>Stratégies discriminantes pour intégrer la reconnaissance des mots composés dans un analyseur syntaxique en constituants [Discriminative strategies for integrating multiword expression recognition in a constituent parser]</title>
-      <author><first>Matthieu</first><last>Constant</last></author>
+      <author id="matthieu-constant"><first>Matthieu</first><last>Constant</last></author>
       <author><first>Anthony</first><last>Sigogne</last></author>
       <author><first>Patrick</first><last>Watrin</last></author>
       <pages>47–70</pages>
@@ -36,7 +36,7 @@
     <paper id="3">
       <title>Evaluer et améliorer une ressource distributionnelle: protocole d’annotation de liens sémantiques en contexte [Evaluating and improving a distributional resource: protocol for in-context annotation of semantic links]</title>
       <author><first>Clémentine</first><last>Adam</last></author>
-      <author><first>Cécile</first><last>Fabre</last></author>
+      <author id="cecile-fabre"><first>Cécile</first><last>Fabre</last></author>
       <author><first>Philippe</first><last>Muller</last></author>
       <pages>71–97</pages>
       <url hash="8a47283a">2013.tal-1.3</url>
@@ -46,7 +46,7 @@
     <paper id="4">
       <title>Désambiguïsation lexicale de textes : efficacité qualitative et temporelle d’un algorithme à colonies de fourmis [Lexical disambiguation of texts: qualitative and temporal efficiency of an ant colony algorithm]</title>
       <author><first>Didier</first><last>Schwab</last></author>
-      <author><first>Jérôme</first><last>Goulian</last></author>
+      <author id="jerome-goulian"><first>Jérôme</first><last>Goulian</last></author>
       <author><first>Andon</first><last>Tchechmedjiev</last></author>
       <pages>99–138</pages>
       <url hash="8b28688f">2013.tal-1.4</url>
@@ -66,7 +66,7 @@
     <paper id="6">
       <title>Les apports du <fixed-case>TAL</fixed-case> à la lisibilité du français langue étrangère [Contributions of <fixed-case>NLP</fixed-case> to the readability of <fixed-case>F</fixed-case>rench as a foreign language]</title>
       <author><first>Thomas</first><last>François</last></author>
-      <author><first>Cédrick</first><last>Fairon</last></author>
+      <author id="cedrick-fairon"><first>Cédrick</first><last>Fairon</last></author>
       <pages>171–202</pages>
       <url hash="56332a3e">2013.tal-1.6</url>
       <language>fra</language>
@@ -98,7 +98,7 @@
       <title>Préface [Foreword]</title>
       <author><first>Sophia</first><last>Anamiadou</last></author>
       <author><first>Nathalie</first><last>Friburger</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <pages>7–11</pages>
       <url hash="80e66cda">2013.tal-2.1</url>
       <language>fra</language>
@@ -141,7 +141,7 @@
     <paper id="5">
       <title>Traitement automatique des entités nommées en arabe: détection et traduction [Automatic processing of <fixed-case>A</fixed-case>rabic named entities: detection and translation]</title>
       <author><first>Souhir</first><last>Gahbiche-Braham</last></author>
-      <author><first>Hélène</first><last>Bonneau-Maynard</last></author>
+      <author id="helene-bonneau-maynard"><first>Hélène</first><last>Bonneau-Maynard</last></author>
       <author><first>François</first><last>Yvon</last></author>
       <pages>101–132</pages>
       <url hash="f2a0cb7f">2013.tal-2.5</url>
@@ -184,7 +184,7 @@
     <paper id="3">
       <title>Code-Mixing in Social Media Text</title>
       <author><first>Amitava</first><last>Das</last></author>
-      <author><first>Björn</first><last>Gambäck</last></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gambäck</last></author>
       <pages>41–64</pages>
       <url hash="26a18c83">2013.tal-3.3</url>
       <bibkey>das-gamback-2013-code</bibkey>
diff --git a/data/xml/2014.amta.xml b/data/xml/2014.amta.xml
index 748d2e31a2..6965b18d7f 100644
--- a/data/xml/2014.amta.xml
+++ b/data/xml/2014.amta.xml
@@ -22,7 +22,7 @@
     </paper>
     <paper id="2">
       <title><fixed-case>B</fixed-case>ayesian iterative-cascade framework for hierarchical phrase-based translation</title>
-      <author><first>Baskaran</first><last>Sankaran</last></author>
+      <author id="baskaran-sankaran"><first>Baskaran</first><last>Sankaran</last></author>
       <author><first>Anoop</first><last>Sarkar</last></author>
       <pages>15-27</pages>
       <url hash="9479fcc5">2014.amta-researchers.2</url>
@@ -44,7 +44,7 @@
       <title>Using any machine translation source for fuzzy-match repair in a computer-aided translation setting</title>
       <author><first>John E.</first><last>Ortega</last></author>
       <author><first>Felipe</first><last>Sánchez-Martinez</last></author>
-      <author><first>Mikel L.</first><last>Forcada</last></author>
+      <author id="mikel-l-forcada"><first>Mikel L.</first><last>Forcada</last></author>
       <pages>42-53</pages>
       <url hash="079ad257">2014.amta-researchers.4</url>
       <abstract>When a computer-assisted translation (CAT) tool does not find an exact match for the source segment to translate in its translation memory (TM), translators must use fuzzy matches that come from translation units in the translation memory that do not completely match the source segment. We explore the use of a fuzzy-match repair technique called patching to repair translation proposals from a TM in a CAT environment using any available machine translation system, or any external bilingual source, regardless of its internals. Patching attempts to aid CAT tool users by repairing fuzzy matches and proposing improved translations. Our results show that patching improves the quality of translation proposals and reduces the amount of edit operations to perform, especially when a specific set of restrictions is applied.</abstract>
@@ -52,7 +52,7 @@
     </paper>
     <paper id="5">
       <title>Enhancing statistical machine translation with bilingual terminology in a <fixed-case>CAT</fixed-case> environment</title>
-      <author><first>Mihael</first><last>Arcan</last></author>
+      <author id="mihael-arcan"><first>Mihael</first><last>Arcan</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <author><first>Sara</first><last>Topelli</last></author>
       <author><first>Paul</first><last>Buitelaar</last></author>
@@ -92,7 +92,7 @@
       <title>Document-level re-ranking with soft lexical and semantic features for statistical machine translation</title>
       <author><first>Chenchen</first><last>Ding</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>110-123</pages>
       <url hash="0976332d">2014.amta-researchers.9</url>
       <abstract>We introduce two document-level features to polish baseline sentence-level translations generated by a state-of-the-art statistical machine translation (SMT) system. One feature uses the word-embedding technique to model the relation between a sentence and its context on the target side; the other feature is a crisp document-level token-type ratio of target-side translations for source-side words to model the lexical consistency in translation. The weights of introduced features are tuned to optimize the sentence- and document-level metrics simultaneously on the basis of Pareto optimality. Experimental results on two different schemes with different corpora illustrate that the proposed approach can efficiently and stably integrate document-level information into a sentence-level SMT system. The best improvements were approximately 0.5 BLEU on test sets with statistical significance.</abstract>
@@ -123,7 +123,7 @@
       <author><first>Prashant</first><last>Mathur</last></author>
       <author><first>Mauro</first><last>Cettolo</last></author>
       <author><first>Marcello</first><last>Federico</last></author>
-      <author><first>José G.C.</first><last>de Souza</last></author>
+      <author id="jose-g-c-de-souza"><first>José G.C.</first><last>de Souza</last></author>
       <pages>152-165</pages>
       <url hash="ebe0dc73">2014.amta-researchers.12</url>
       <abstract>In this paper we investigate the problem of adapting a machine translation system to the feedback provided by multiple post-editors. It is well know that translators might have very different post-editing styles and that this variability hinders the application of online learning methods, which indeed assume a homogeneous source of adaptation data. We hence propose multi-task learning to leverage bias information from each single post-editors in order to constrain the evolution of the SMT system. A new framework for significance testing with sentence level metrics is described which shows that Multi-Task learning approaches outperforms existing online learning approaches, with significant gains of 1.24 and 1.88 TER score over a strong online adaptive baseline, on a test set of post-edits produced by four translators texts and on a popular benchmark with multiple references, respectively.</abstract>
@@ -142,7 +142,7 @@
     <paper id="14">
       <title>Expanding machine translation training data with an out-of-domain corpus using language modeling based vocabulary saturation</title>
       <author><first>Burak</first><last>Aydın</last></author>
-      <author><first>Arzucan</first><last>Özgür</last></author>
+      <author id="arzucan-ozgur"><first>Arzucan</first><last>Özgür</last></author>
       <pages>180-192</pages>
       <url hash="1ee3cee3">2014.amta-researchers.14</url>
       <abstract>The training data size is of utmost importance for statistical machine translation (SMT), since it affects the training time, model size, decoding speed, as well as the system’s overall success. One of the challenges for developing SMT systems for languages with less resources is the limited sizes of the available training data. In this paper, we propose an approach for expanding the training data by including parallel texts from an out-of-domain corpus. Selecting the best out-of-domain sentences for inclusion in the training set is important for the overall performance of the system. Our method is based on first ranking the out-of-domain sentences using a language modeling approach, and then, including the sentences to the training set by using the vocabulary saturation filter technique. We evaluated our approach for the English-Turkish language pair and obtained promising results. Performance improvements of up to +0.8 BLEU points for the English-Turkish translation system are achieved. We compared our results with the translation model combination approaches as well and reported the improvements. Moreover, we implemented our system with dependency parse tree based language modeling in addition to the n-gram based language modeling and reported comparable results.</abstract>
@@ -151,11 +151,11 @@
     <paper id="15">
       <title>Comparison of data selection techniques for the translation of video lectures</title>
       <author><first>Joern</first><last>Wuebker</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <author><first>Adrià</first><last>Martínez-Villaronga</last></author>
       <author><first>Adrià</first><last>Giménez</last></author>
-      <author><first>Alfons</first><last>Juan</last></author>
-      <author><first>Christophe</first><last>Servan</last></author>
+      <author id="alfons-juan"><first>Alfons</first><last>Juan</last></author>
+      <author id="christophe-servan"><first>Christophe</first><last>Servan</last></author>
       <author><first>Marc</first><last>Dymetman</last></author>
       <author><first>Shachar</first><last>Mirkin</last></author>
       <pages>193-207</pages>
@@ -169,7 +169,7 @@
       <author><first>Heng</first><last>Yu</last></author>
       <author><first>Hongmei</first><last>Zhao</last></author>
       <author><first>Qun</first><last>Liu</last></author>
-      <author><first>Yajuan</first><last>Lü</last></author>
+      <author id="yajuan-lu"><first>Yajuan</first><last>Lü</last></author>
       <pages>208-221</pages>
       <url hash="71d868bd">2014.amta-researchers.16</url>
       <abstract>This paper gives a general review and detailed analysis of China Workshop on Machine Translation (CWMT) Evaluation. Compared with the past CWMT evaluation campaigns, CWMT2013 evaluation is characterized as follows: first, adopting gray-box evaluation which makes the results more replicable and controllable; second, adding one rule-based system as a counterpart; third, carrying out manual evaluations on some specific tasks to give a more comprehensive analysis of the translation errors. Boosted by those new features, our analysis and case study on the evaluation results shows the pros and cons of both rule-based and statistical systems, and reveals some interesting correlations bewteen automatic and manual evaluation metrics on different translation systems.</abstract>
@@ -180,7 +180,7 @@
       <author><first>Jan</first><last>Niehues</last></author>
       <author><first>Alexander</first><last>Allauzen</last></author>
       <author><first>François</first><last>Yvon</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>222-233</pages>
       <url hash="2c3a386d">2014.amta-researchers.17</url>
       <abstract>This paper presents two improvements of language models based on Restricted Boltzmann Machine (RBM) for large machine translation tasks. In contrast to other continuous space approach, RBM based models can easily be integrated into the decoder and are able to directly learn a hidden representation of the n-gram. Previous work on RBM-based language models do not use a shared word representation and therefore, they might suffer of a lack of generalization for larger contexts. Moreover, since the training step is very time consuming, they are only used for quite small copora. In this work we add a shared word representation for the RBM-based language model by factorizing the weight matrix. In addition, we propose an efficient and tailored sampling algorithm that allows us to drastically speed up the training process. Experiments are carried out on two German to English translation tasks and the results show that the training time could be reduced by a factor of 10 without any drop in performance. Furthermore, the RBM-based model can also be trained on large size corpora.</abstract>
@@ -219,8 +219,8 @@
     <paper id="21">
       <title>Using noun class information to model selectional preferences for translating prepositions in <fixed-case>SMT</fixed-case></title>
       <author><first>Marion</first><last>Weller</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>275-287</pages>
       <url hash="91701cda">2014.amta-researchers.21</url>
       <abstract>Translating prepositions is a difficult and under-studied problem in SMT. We present a novel method to improve the translation of prepositions by using noun classes to model their selectional preferences. We compare three variants of noun class information: (i) classes induced from the lexical resource GermaNet or obtained from clusterings based on either (ii) window information or (iii) syntactic features. Furthermore, we experiment with PP rule generalization. While we do not significantly improve over the baseline, our results demonstrate that (i) integrating selectional preferences as rigid class annotation in the parse tree is sub-optimal, and that (ii) clusterings based on window co-occurrence are more robust than syntax-based clusters or GermaNet classes for the task of modeling selectional preferences.</abstract>
@@ -238,7 +238,7 @@
     <paper id="23">
       <title>Data selection for compact adapted <fixed-case>SMT</fixed-case> models</title>
       <author><first>Shachar</first><last>Mirkin</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <pages>301-314</pages>
       <url hash="bdf685c3">2014.amta-researchers.23</url>
       <abstract>Data selection is a common technique for adapting statistical translation models for a specific domain, which has been shown to both improve translation quality and to reduce model size. Selection relies on some in-domain data, of the same domain of the texts expected to be translated. Selecting the sentence-pairs that are most similar to the in-domain data from a pool of parallel texts has been shown to be effective; yet, this approach holds the risk of resulting in a limited coverage, when necessary n-grams that do appear in the pool are less similar to in-domain data that is available in advance. Some methods select additional data based on the actual text that needs to be translated. While useful, this is not always a practical scenario. In this work we describe an extensive exploration of data selection techniques over Arabic to French datasets, and propose methods to address both similarity and coverage considerations while maintaining a limited model size.</abstract>
@@ -266,9 +266,9 @@
     <paper id="26">
       <title>Automatic dialect classification for statistical machine translation</title>
       <author><first>Saab</first><last>Mansour</last></author>
-      <author><first>Yaser</first><last>Al-Onaizan</last></author>
+      <author id="yaser-al-onaizan"><first>Yaser</first><last>Al-Onaizan</last></author>
       <author><first>Graeme</first><last>Blackwood</last></author>
-      <author><first>Christoph</first><last>Tillmann</last></author>
+      <author id="christoph-tillmann"><first>Christoph</first><last>Tillmann</last></author>
       <pages>342-355</pages>
       <url hash="080a6f0f">2014.amta-researchers.26</url>
       <abstract>The training data for statistical machine translation are gathered from various sources representing a mixture of domains. In this work, we argue that when translating dialects representing varieties of the same language, a manually assigned data source is not a reliable indicator of the dialect. We resort to automatic dialect classification to refine the training corpora according to the different dialects and build improved dialect specific systems. A fairly standard classifier for Arabic developed within this work achieves state-of-the-art performance, with classification precision above 90%, making it usefully accurate for our application. The classification of the data is then used to distinguish between the different dialects, split the data accordingly, and utilize the new splits for several adaptation techniques. Performing translation experiments on a large scale dialectal Arabic to English translation task, our results show that the classifier generates better contrast between the dialects and achieves superior translation quality than using the original manual corpora splits.</abstract>
@@ -347,9 +347,9 @@
     </paper>
     <paper id="7">
       <title>Real-world challenges in application of <fixed-case>MT</fixed-case> for localization: the <fixed-case>B</fixed-case>altic case</title>
-      <author><first>Mārcis</first><last>Pinnis</last></author>
+      <author id="marcis-pinnis"><first>Mārcis</first><last>Pinnis</last></author>
       <author><first>Raivis</first><last>Skadiņš</last></author>
-      <author><first>Andrejs</first><last>Vasiļjevs</last></author>
+      <author id="andrejs-vasiljevs"><first>Andrejs</first><last>Vasiļjevs</last></author>
       <pages>66-79</pages>
       <attachment type="presentation" hash="a7c81efa">2014.amta-users.7.Presentation.pdf</attachment>
       <bibkey>pinnis-etal-2014-real</bibkey>
@@ -391,9 +391,9 @@
     </paper>
     <paper id="13">
       <title>Machine translation for e-government – the <fixed-case>B</fixed-case>altic case</title>
-      <author><first>Andrejs</first><last>Vasiļjevs</last></author>
-      <author><first>Rihards</first><last>Kalniņš</last></author>
-      <author><first>Mārcis</first><last>Pinnis</last></author>
+      <author id="andrejs-vasiljevs"><first>Andrejs</first><last>Vasiļjevs</last></author>
+      <author id="rihards-kalnins"><first>Rihards</first><last>Kalniņš</last></author>
+      <author id="marcis-pinnis"><first>Mārcis</first><last>Pinnis</last></author>
       <author><first>Raivis</first><last>Skadiņš</last></author>
       <pages>181-193</pages>
       <attachment type="presentation" hash="07d1f5c6">2014.amta-users.13.Presentation.pdf</attachment>
@@ -404,16 +404,16 @@
       <author><first>Tanya</first><last>Helmen</last></author>
       <author><first>Vanesa</first><last>Jurica</last></author>
       <author><first>Danielle</first><last>Silverman</last></author>
-      <author><first>Elizabeth</first><last>Richerson</last></author>
+      <author id="elizabeth-schroeder"><first>Elizabeth</first><last>Richerson</last></author>
       <pages>194-202</pages>
       <attachment type="presentation" hash="0b3770b0">2014.amta-users.14.Presentation.pdf</attachment>
       <bibkey>helmen-etal-2014-panel</bibkey>
     </paper>
     <paper id="15">
       <title>A novel use of <fixed-case>MT</fixed-case> in the development of a text level analytic for language learning</title>
-      <author><first>Carol</first><last>Van Ess-Dykema</last></author>
-      <author><first>Salim</first><last>Roukos</last></author>
-      <author><first>Amy</first><last>Weinberg</last></author>
+      <author id="carol-van-ess-dykema"><first>Carol</first><last>Van Ess-Dykema</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
+      <author id="amy-weinberg"><first>Amy</first><last>Weinberg</last></author>
       <pages>203-212</pages>
       <attachment type="presentation" hash="92af34aa">2014.amta-users.15.Presentation.pdf</attachment>
       <bibkey>van-ess-dykema-etal-2014-novel</bibkey>
@@ -436,9 +436,9 @@
     </meta>
     <paper id="1">
       <title>Handling entities in <fixed-case>MT</fixed-case>/<fixed-case>CAT</fixed-case>/<fixed-case>HLT</fixed-case></title>
-      <author><first>Keith</first><last>Miller</last></author>
+      <author id="keith-j-miller"><first>Keith</first><last>Miller</last></author>
       <author><first>Linda</first><last>Moreau</last></author>
-      <author><first>Sherri</first><last>Condon</last></author>
+      <author id="sherri-condon"><first>Sherri</first><last>Condon</last></author>
       <attachment type="presentation" hash="26e6287d">2014.amta-tutorials.1.Presentation.pdf</attachment>
       <bibkey>miller-etal-2014-handling</bibkey>
     </paper>
@@ -489,12 +489,12 @@
     </meta>
     <paper id="1">
       <title>Integrating online and active learning in a computer-assisted translation workbench</title>
-      <author><first>Vicent</first><last>Alabau</last></author>
-      <author><first>Jesús</first><last>González-Rubio</last></author>
-      <author><first>Daniel</first><last>Ortiz-Martínez</last></author>
-      <author><first>Germán</first><last>Sanchis-Trilles</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
-      <author><first>Mercedes</first><last>García-Martínez</last></author>
+      <author id="vicent-alabau"><first>Vicent</first><last>Alabau</last></author>
+      <author id="jesus-gonzalez-rubio"><first>Jesús</first><last>González-Rubio</last></author>
+      <author id="daniel-ortiz-martinez"><first>Daniel</first><last>Ortiz-Martínez</last></author>
+      <author id="german-sanchis-trilles"><first>Germán</first><last>Sanchis-Trilles</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="mercedes-garcia-martinez"><first>Mercedes</first><last>García-Martínez</last></author>
       <author><first>Bartolomé</first><last>Mesa-Lao</last></author>
       <author><first>Dan Cheung</first><last>Petersen</last></author>
       <author><first>Barbara</first><last>Dragsted</last></author>
@@ -506,9 +506,9 @@
     </paper>
     <paper id="2">
       <title>Towards a combination of online and multitask learning for <fixed-case>MT</fixed-case> quality estimation: a preliminary study</title>
-      <author><first>José G.C.</first><last>de Souza</last></author>
+      <author id="jose-g-c-de-souza"><first>José G.C.</first><last>de Souza</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <pages>9-19</pages>
       <url hash="d8c4748e">2014.amta-workshop.2</url>
       <abstract>Quality estimation (QE) for machine translation has emerged as a promising way to provide real-world applications with methods to estimate at run-time the reliability of automatic translations. Real-world applications, however, pose challenges that go beyond those of current QE evaluation settings. For instance, the heterogeneity and the scarce availability of training data might contribute to significantly raise the bar. To address these issues we compare two alternative machine learning paradigms, namely online and multi-task learning, measuring their capability to overcome the limitations of current batch methods. The results of our experiments, which are carried out in the same experimental setting, demonstrate the effectiveness of the two methods and suggest their complementarity. This indicates, as a promising research avenue, the possibility to combine their strengths into an online multi-task approach to the problem.</abstract>
@@ -546,7 +546,7 @@
       <author><first>David</first><last>Orrego-Carmona</last></author>
       <author><first>Ashleigh Rhea</first><last>Gonzales</last></author>
       <author><first>Michael</first><last>Carl</last></author>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
       <pages>51-60</pages>
       <url hash="a766f32c">2014.amta-workshop.6</url>
       <abstract>The purpose of the current investigation is to predict post-editor profiles based on user behaviour and demographics using machine learning techniques to gain a better understanding of post-editor styles. Our study extracts process unit features from the CasMaCat LS14 database from the CRITT Translation Process Research Database (TPR-DB). The analysis has two main research goals: We create n-gram models based on user activity and part-of-speech sequences to automatically cluster post-editors, and we use discriminative classifier models to characterize post-editors based on a diverse range of translation process features. The classification and clustering of participants resulting from our study suggest this type of exploration could be used as a tool to develop new translation tool features or customization possibilities.</abstract>
@@ -577,9 +577,9 @@
     <paper id="2">
       <title>Comparison of post-editing productivity between professional translators and lay users</title>
       <author><first>Nora</first><last>Aranberri</last></author>
-      <author><first>Gorka</first><last>Labaka</last></author>
-      <author><first>Arantza</first><last>Diaz de Ilarraza</last></author>
-      <author><first>Kepa</first><last>Sarasola</last></author>
+      <author id="gorka-labaka"><first>Gorka</first><last>Labaka</last></author>
+      <author id="arantza-diaz-de-ilarraza"><first>Arantza</first><last>Diaz de Ilarraza</last></author>
+      <author id="kepa-sarasola"><first>Kepa</first><last>Sarasola</last></author>
       <pages>20-33</pages>
       <url hash="3eb8da83">2014.amta-wptp.2</url>
       <abstract>This work compares the post-editing productivity of professional translators and lay users. We integrate an English to Basque MT system within Bologna Translation Service, an end-to-end translation management platform, and perform a producitivity experiment in a real working environment. Six translators and six lay users translate or post-edit two texts from English into Basque. Results suggest that overall, post-editing increases translation throughput for both translators and users, although the latter seem to benefit more from the MT output. We observe that translators and users perceive MT differently. Additionally, a preliminary analysis seems to suggest that familiarity with the domain, source text complexity and MT quality might affect potential productivity gain.</abstract>
@@ -605,7 +605,7 @@
       <title>Perception vs. reality: measuring machine translation post-editing productivity</title>
       <author><first>Federico</first><last>Gaspari</last></author>
       <author><first>Antonio</first><last>Toral</last></author>
-      <author><first>Sudip Kumar</first><last>Naskar</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last></author>
       <author><first>Declan</first><last>Groves</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <pages>60-72</pages>
@@ -617,7 +617,7 @@
       <title>Cognitive demand and cognitive effort in post-editing</title>
       <author><first>Isabel</first><last>Lacruz</last></author>
       <author><first>Michael</first><last>Denkowski</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <pages>73-84</pages>
       <url hash="b70dcca2">2014.amta-wptp.6</url>
       <abstract>The pause to word ratio, the number of pauses per word in a post-edited MT segment, is an indicator of cognitive effort in post-editing (Lacruz and Shreve, 2014). We investigate how low the pause threshold can reasonably be taken, and we propose that 300 ms is a good choice, as pioneered by Schilperoord (1996). We then seek to identify a good measure of the cognitive demand imposed by MT output on the post-editor, as opposed to the cognitive effort actually exerted by the post-editor during post-editing. Measuring cognitive demand is closely related to measuring MT utility, the MT quality as perceived by the post-editor. HTER, an extrinsic edit to word ratio that does not necessarily correspond to actual edits per word performed by the post-editor, is a well-established measure of MT quality, but it does not comprehensively capture cognitive demand (Koponen, 2012). We investigate intrinsic measures of MT quality, and so of cognitive demand, through edited-error to word metrics. We find that the transfer-error to word ratio predicts cognitive effort better than mechanical-error to word ratio (Koby and Champe, 2013). We identify specific categories of cognitively challenging MT errors whose error to word ratios correlate well with cognitive effort.</abstract>
@@ -685,9 +685,9 @@
     <paper id="14">
       <title>Real time adaptive machine translation: cdec and <fixed-case>T</fixed-case>rans<fixed-case>C</fixed-case>enter</title>
       <author><first>Michael</first><last>Denkowski</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <author><first>Isabel</first><last>Lacruz</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <pages>123</pages>
       <url hash="49448eea">2014.amta-wptp.14</url>
       <abstract>cdec Realtime and TransCenter provide an end-to-end experimental setup for machine translation post-editing research. Realtime provides a framework for building adaptive MT systems that learn from post-editor feedback while TransCenter incorporates a web-based translation interface that connects users to these systems and logs post-editing activity. This combination allows the straightforward deployment of MT systems specifically for post-editing and analysis of translator productivity when working with adaptive systems. Both toolkits are freely available under open source licenses.</abstract>
diff --git a/data/xml/2014.clib.xml b/data/xml/2014.clib.xml
index db2f77a610..81fff76494 100644
--- a/data/xml/2014.clib.xml
+++ b/data/xml/2014.clib.xml
@@ -26,7 +26,7 @@
     </paper>
     <paper id="2">
       <title>Harnessing Language Technologies in Multilingual Information Channelling Services</title>
-      <author><first>Diman</first><last>Karagiozov</last></author>
+      <author id="diman-karagyozov"><first>Diman</first><last>Karagiozov</last></author>
       <pages>6–13</pages>
       <abstract>Scientists and industry have put significant efforts in creating suitable tools to analyze information flows. However, up to now there are no successful solutions for 1) dynamic modeling of the user-defined interests and further personalization of the results, 2) effective cross-language information retrieval, and 3) processing of multilingual content. As a consequence, much of the potentially relevant and otherwise accessible data from the media stream may elude users’ grasp. We present a multilingual information channeling system, MediaTalk, which offers broad integration between language technologies and advanced data processing algorithms for annotation, analysis and classification of multilingual content. As a result, the system not only provides an all-in-one monitoring service that covers both traditional and social media, but also offers dynamic modeling of user profiles, personalization of obtained data and cross-language information retrieval. Bulgarian and English press clipping services relying on this system implement advanced functionalities such as identification of emerging topics, forecasting and trend prediction, all of which allow the users to monitor their standing reputation, events and relations. The architecture of the system is robust, extensible and adheres to the Big Data paradigm.</abstract>
       <url hash="09d9e2ea">2014.clib-1.2</url>
@@ -51,7 +51,7 @@
       <author><first>Maria</first><last>Todorova</last></author>
       <author><first>Tsvetana</first><last>Dimitrova</last></author>
       <author><first>Borislav</first><last>Rizov</last></author>
-      <author><first>Verginica</first><last>Barbu Mititelu</last></author>
+      <author id="verginica-barbu-mititelu"><first>Verginica</first><last>Barbu Mititelu</last></author>
       <author><first>Elena</first><last>Irimia</last></author>
       <pages>23–31</pages>
       <abstract>Romanian and Bulgarian are Balkan languages with rich derivational morphology that, if introduced into their respective wordnets, can aid broadening of the wordnet content and the possible NLP applications. In this paper we present a joint work on introducing derivation into the Bulgarian and the Romanian WordNets, BulNet and RoWordNet, respectively, by identifying and subsequently labelling the derivationally and semantically related noun-verb pairs. Our research aims at providing a framework for a comparative study on derivation in the two languages and offering training material for the automatic identification and assignment of derivational and morphosemantic relations needed in various applications.</abstract>
@@ -62,9 +62,9 @@
       <title>Semi-Automatic Detection of Multiword Expressions in the <fixed-case>S</fixed-case>lovak Dependency Treebank</title>
       <author><first>Daniela</first><last>Majchrakova</last></author>
       <author><first>Ondrej</first><last>Dusek</last></author>
-      <author><first>Jan</first><last>Hajic</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajic</last></author>
       <author><first>Agata</first><last>Karcova</last></author>
-      <author><first>Radovan</first><last>Garabik</last></author>
+      <author id="radovan-garabik"><first>Radovan</first><last>Garabik</last></author>
       <pages>32–39</pages>
       <abstract>We describe a method for semi-automatic extraction of Slovak multiword expressions (MWEs) from a dependency treebank. The process uses an automatic conversion from dependency syntactic trees to deep syntax and automatic tagging of verbal argument nodes based on a valency dictionary. Both the valency dictionary and the treebank conversion were adapted from the corresponding Czech versions; the automatically translated valency dictionary has been manually proofread and corrected. There are two main achievements – a valency dictionary of Slovak MWEs with direct links to corresponding expressions in the Czech dictionary, PDT-Vallex, and a method of extraction of MWEs from the Slovak Dependency Treebank. The extraction reached very high precision but lower recall in a manual evaluation. This is a work in progress, the overall goal of which is twofold: to create a Slovak language valency dictionary paralleling the Czech one, with bilingual links; and to use the extracted verbal frames in a collocation dictionary of Slovak verbs.</abstract>
       <url hash="17f10a1b">2014.clib-1.5</url>
@@ -107,7 +107,7 @@
     <paper id="10">
       <title>Recognize the Generality Relation between Sentences Using Asymmetric Association Measures</title>
       <author><first>Sebastiao</first><last>Pais</last></author>
-      <author><first>Gael</first><last>Dias</last></author>
+      <author id="gael-dias"><first>Gael</first><last>Dias</last></author>
       <author><first>Rumen</first><last>Moraliyski</last></author>
       <pages>73–81</pages>
       <abstract>In this paper we focus on a particular case of entailment, namely entailment by generality. We argue that there exist various types of implication, a range of different levels of entailment reasoning, based on lexical, syntactic, logical and common sense clues, at different levels of difficulty. We introduce the paradigm of Textual Entailment (TE) by Generality, which can be defined as the entailment from a specific statement towards a relatively more general statement. In this context, the Text T entails the Hypothesis H, and at the same time H is more general than T . We propose an unsupervised and language-independent method to recognize TE by Generality given a case of Text − Hypothesis or T − H where entailment relation holds.</abstract>
@@ -117,7 +117,7 @@
     <paper id="11">
       <title>Unsupervised and Language Independent Method to Recognize Textual Entailment by Generality</title>
       <author><first>Sebastiao</first><last>Pais</last></author>
-      <author><first>Gael</first><last>Dias</last></author>
+      <author id="gael-dias"><first>Gael</first><last>Dias</last></author>
       <author><first>Joao</first><last>Cordeiro</last></author>
       <author><first>Rumen</first><last>Moraliyski</last></author>
       <pages>82–90</pages>
diff --git a/data/xml/2014.eamt.xml b/data/xml/2014.eamt.xml
index 52cce634b8..7aaa962921 100644
--- a/data/xml/2014.eamt.xml
+++ b/data/xml/2014.eamt.xml
@@ -36,7 +36,7 @@
     <paper id="3">
       <title>Combining bilingual terminology mining and morphological modeling for domain adaptation in <fixed-case>SMT</fixed-case></title>
       <author><first>Marion</first><last>Weller</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <author><first>Ulrich</first><last>Heid</last></author>
       <pages>11–18</pages>
       <url hash="7d922986">2014.eamt-1.3</url>
@@ -44,11 +44,11 @@
     </paper>
     <paper id="4">
       <title>An efficient method to assist non-expert users in extending dictionaries by assigning stems and inflectional paradigms to unknknown words</title>
-      <author><first>Miquel</first><last>Esplà-Gomis</last></author>
+      <author id="miquel-espla-gomis"><first>Miquel</first><last>Esplà-Gomis</last></author>
       <author><first>Víctor M.</first><last>Sánchez-Cartegna</last></author>
       <author><first>Felipe</first><last>Sánchez-Martínez</last></author>
       <author><first>Rafael C.</first><last>Carrasco</last></author>
-      <author><first>Mikel L.</first><last>Forcada</last></author>
+      <author id="mikel-l-forcada"><first>Mikel L.</first><last>Forcada</last></author>
       <author><first>Juan Antonio</first><last>Pérez-Ortiz</last></author>
       <pages>19–26</pages>
       <url hash="109a298b">2014.eamt-1.4</url>
@@ -56,16 +56,16 @@
     </paper>
     <paper id="5">
       <title>Efficient wordgraph for interactive translation prediction</title>
-      <author><first>Germán</first><last>Sanchis-Trilles</last></author>
-      <author><first>Daniel</first><last>Ortiz-Martínez</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="german-sanchis-trilles"><first>Germán</first><last>Sanchis-Trilles</last></author>
+      <author id="daniel-ortiz-martinez"><first>Daniel</first><last>Ortiz-Martínez</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <pages>27–34</pages>
       <bibkey>sanchis-trilles-etal-2014-efficient</bibkey>
     </paper>
     <paper id="6">
       <title>Translation model based weighting for phrase extraction</title>
       <author><first>Saab</first><last>Mansour</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>35–43</pages>
       <url hash="3956ad0c">2014.eamt-1.6</url>
       <bibkey>mansour-ney-2014-translation</bibkey>
@@ -74,7 +74,7 @@
       <title>Data selection for discriminative training in statistical machine translation</title>
       <author><first>Xingyi</first><last>Song</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>45–52</pages>
       <url hash="426251a4">2014.eamt-1.7</url>
       <bibkey>song-etal-2014-data</bibkey>
@@ -110,7 +110,7 @@
       <title><fixed-case>CASMACAT</fixed-case>: cognitive analysis and statistical methods for advanced computer aided translation</title>
       <author><first>Philipp</first><last>Koehn</last></author>
       <author><first>Michael</first><last>Carl</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <author><first>Eva</first><last>Marcos</last></author>
       <pages>57</pages>
       <url hash="4937d58d">2014.eamt-1.12</url>
@@ -154,13 +154,13 @@
     </paper>
     <paper id="18">
       <title><fixed-case>SEECAT</fixed-case>: <fixed-case>ASR</fixed-case> &amp; Eye-tracking enabled computer-assisted translation</title>
-      <author><first>Mercedes</first><last>García-Martínez</last></author>
+      <author id="mercedes-garcia-martinez"><first>Mercedes</first><last>García-Martínez</last></author>
       <author><first>Karan</first><last>Singla</last></author>
       <author><first>Aniruddha</first><last>Tammewar</last></author>
       <author><first>Bartolomé</first><last>Mesa-Lao</last></author>
       <author><first>Ankita</first><last>Thakur</last></author>
       <author><first>Anusuya</first><last>M.A.</last></author>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
       <author><first>Michael</first><last>Carl</last></author>
       <pages>81-88</pages>
       <url hash="ca5d9ffe">2014.eamt-1.18</url>
@@ -185,7 +185,7 @@
     </paper>
     <paper id="21">
       <title>Document-level translation quality estimation: exploring discourse and pseudo-references</title>
-      <author><first>Carolina</first><last>Scarton</last></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>101–108</pages>
       <url hash="2425cf80">2014.eamt-1.21</url>
@@ -201,8 +201,8 @@
     </paper>
     <paper id="23">
       <title>An efficient two-pass decoder for <fixed-case>SMT</fixed-case> using word confidence estimation</title>
-      <author><first>Ngoc-Quang</first><last>Luong</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="ngoc-quang-luong"><first>Ngoc-Quang</first><last>Luong</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <author><first>Benjamin</first><last>Lecouteux</last></author>
       <pages>117–124</pages>
       <url hash="c4cbf716">2014.eamt-1.23</url>
@@ -282,8 +282,8 @@
     </paper>
     <paper id="35">
       <title>Collaborative web <fixed-case>UI</fixed-case> localization, or how to build feature-rich multilingual datasets</title>
-      <author><first>Vicent</first><last>Alabau</last></author>
-      <author><first>Luis A.</first><last>Leiva</last></author>
+      <author id="vicent-alabau"><first>Vicent</first><last>Alabau</last></author>
+      <author id="luis-a-leiva"><first>Luis A.</first><last>Leiva</last></author>
       <pages>151–154</pages>
       <url hash="9e9af54e">2014.eamt-1.35</url>
       <bibkey>alabau-leiva-2014-collaborative</bibkey>
@@ -309,7 +309,7 @@
       <title>Using a new analytic measure for the annotation and analysis of <fixed-case>MT</fixed-case> errors on real data</title>
       <author><first>Arle</first><last>Lommel</last></author>
       <author><first>Aljoscha</first><last>Burchardt</last></author>
-      <author><first>Maja</first><last>Popović</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
       <author><first>Kim</first><last>Harris</last></author>
       <author><first>Eleftherios</first><last>Avramidis</last></author>
       <author><first>Hans</first><last>Uszkoreit</last></author>
@@ -337,7 +337,7 @@
     </paper>
     <paper id="41">
       <title>Relations between different types of post-editing operations, cognitive effort and temporal effort</title>
-      <author><first>Maja</first><last>Popović</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
       <author><first>Arle</first><last>Lommel</last></author>
       <author><first>Aljoscha</first><last>Burchardt</last></author>
       <author><first>Eleftherios</first><last>Avramidis</last></author>
@@ -356,9 +356,9 @@
     <paper id="43">
       <title>Application of machine translation in localization into low-resourced languages</title>
       <author><first>Raivis</first><last>Skadiņš</last></author>
-      <author><first>Mārcis</first><last>Pinnis</last></author>
-      <author><first>Andrejs</first><last>Vasiļjevs</last></author>
-      <author><first>Inguna</first><last>Skadiņa</last></author>
+      <author id="marcis-pinnis"><first>Mārcis</first><last>Pinnis</last></author>
+      <author id="andrejs-vasiljevs"><first>Andrejs</first><last>Vasiļjevs</last></author>
+      <author id="inguna-skadina"><first>Inguna</first><last>Skadiņa</last></author>
       <author><first>Tomas</first><last>Hudik</last></author>
       <pages>209–216</pages>
       <url hash="641083b8">2014.eamt-1.43</url>
@@ -375,11 +375,11 @@
     <paper id="45">
       <title>Extrinsic evaluation of web-crawlers in machine translation: a study on <fixed-case>C</fixed-case>roatian-<fixed-case>E</fixed-case>nglish for the tourism domain</title>
       <author><first>Antonio</first><last>Toral</last></author>
-      <author><first>Raphael</first><last>Rubino</last></author>
-      <author><first>Miquel</first><last>Esplà-Gomis</last></author>
-      <author><first>Tommi</first><last>Pirinen</last></author>
+      <author id="raphael-rubino"><first>Raphael</first><last>Rubino</last></author>
+      <author id="miquel-espla-gomis"><first>Miquel</first><last>Esplà-Gomis</last></author>
+      <author id="tommi-a-pirinen"><first>Tommi</first><last>Pirinen</last></author>
       <author><first>Andy</first><last>Way</last></author>
-      <author><first>Gema</first><last>Ramírez-Sánchez</last></author>
+      <author id="gema-ramirez-sanchez"><first>Gema</first><last>Ramírez-Sánchez</last></author>
       <pages>221–224</pages>
       <url hash="c8b3d19a">2014.eamt-1.45</url>
       <bibkey>toral-etal-2014-extrinsic</bibkey>
diff --git a/data/xml/2014.iwslt.xml b/data/xml/2014.iwslt.xml
index 2594663f4b..68ae1269d2 100644
--- a/data/xml/2014.iwslt.xml
+++ b/data/xml/2014.iwslt.xml
@@ -25,7 +25,7 @@
       <month>December 4-5</month>
       <year>2014</year>
       <editor><first>Marcello</first><last>Federico</last></editor>
-      <editor><first>Sebastian</first><last>Stüker</last></editor>
+      <editor id="sebastian-stuker"><first>Sebastian</first><last>Stüker</last></editor>
       <editor><first>François</first><last>Yvon</last></editor>
       <venue>iwslt</venue>
     </meta>
@@ -108,7 +108,7 @@
       <author><first>Markus</first><last>Freitag</last></author>
       <author><first>Joern</first><last>Wuebker</last></author>
       <author><first>Stephan</first><last>Peitz</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <author><first>Matthias</first><last>Huck</last></author>
       <author><first>Alexandra</first><last>Birch</last></author>
       <author><first>Nadir</first><last>Durrani</last></author>
@@ -117,7 +117,7 @@
       <author><first>Isabel</first><last>Slawik</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
       <author><first>Eunach</first><last>Cho</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <author><first>Nicola</first><last>Bertoldi</last></author>
       <author><first>Mauro</first><last>Cettolo</last></author>
       <author><first>Marcello</first><last>Federico</last></author>
@@ -133,7 +133,7 @@
       <author><first>Brian</first><last>Thompson</last></author>
       <author><first>Jessica</first><last>Ray</last></author>
       <author><first>Michael</first><last>Coury</last></author>
-      <author><first>Tim</first><last>Anderson</last></author>
+      <author id="tim-anderson"><first>Tim</first><last>Anderson</last></author>
       <author><first>Grant</first><last>Erdmann</last></author>
       <author><first>Jeremy</first><last>Gwinnup</last></author>
       <author><first>Katherine</first><last>Young</last></author>
@@ -151,7 +151,7 @@
       <author><first>Markus</first><last>Müller</last></author>
       <author><first>Matthias</first><last>Sperber</last></author>
       <author><first>Sebastian</first><last>Stüker</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>73-79</pages>
       <url hash="1d71b80b">2014.iwslt-evaluation.9</url>
       <abstract>This paper describes our German, Italian and English Speech-to-Text (STT) systems for the 2014 IWSLT TED ASR track. Our setup uses ROVER and confusion network combination from various subsystems to achieve a good overall performance. The individual subsystems are built by using different front-ends, (e.g., MVDR-MFCC or lMel), acoustic models (GMM or modular DNN) and phone sets and by training on various subsets of the training data. Decoding is performed in two stages, where the GMM systems are adapted in an unsupervised manner on the combination of the first stage outputs using VTLN, MLLR, and cMLLR. The combination setup produces a final hypothesis that has a significantly lower WER than any of the individual subsystems.</abstract>
@@ -199,7 +199,7 @@
       <author><first>Achraf Ben</first><last>Romdhane</last></author>
       <author><first>Salma</first><last>Jamoussi</last></author>
       <author><first>Abdelmajid Ben</first><last>Hamadou</last></author>
-      <author><first>Kamel</first><last>Smaïli</last></author>
+      <author id="kamel-smaili"><first>Kamel</first><last>Smaïli</last></author>
       <pages>96-99</pages>
       <url hash="26576ce5">2014.iwslt-evaluation.13</url>
       <abstract>In this paper, we present our submitted MT system for the IWSLT2014 Evaluation Campaign. We participated in the English-French translation task. In this article we focus on one of the most important component of SMT: the language model. The idea is to use a phrase-based language model. For that, sequences from the source and the target language models are retrieved and used to calculate a phrase n-gram language model. These phrases are used to rewrite the parallel corpus which is then used to calculate a new translation model.</abstract>
@@ -209,7 +209,7 @@
       <title><fixed-case>LIUM</fixed-case> <fixed-case>E</fixed-case>nglish-to-<fixed-case>F</fixed-case>rench spoken language translation system and the Vecsys/<fixed-case>LIUM</fixed-case> automatic speech recognition system for <fixed-case>I</fixed-case>talian language for <fixed-case>IWSLT</fixed-case> 2014</title>
       <author><first>Anthony</first><last>Rousseau</last></author>
       <author><first>Loïc</first><last>Barrault</last></author>
-      <author><first>Paul</first><last>Deléglise</last></author>
+      <author id="paul-deleglise"><first>Paul</first><last>Deléglise</last></author>
       <author><first>Yannick</first><last>Estève</last></author>
       <author><first>Holger</first><last>Schwenk</last></author>
       <author><first>Samir</first><last>Bennacef</last></author>
@@ -223,11 +223,11 @@
     <paper id="15">
       <title><fixed-case>LIMSI</fixed-case> <fixed-case>E</fixed-case>nglish-<fixed-case>F</fixed-case>rench speech translation system</title>
       <author><first>Natalia</first><last>Segal</last></author>
-      <author><first>Hélène</first><last>Bonneau-Maynard</last></author>
-      <author><first>Quoc Khanh</first><last>Do</last></author>
+      <author id="helene-bonneau-maynard"><first>Hélène</first><last>Bonneau-Maynard</last></author>
+      <author id="quoc-khanh-do"><first>Quoc Khanh</first><last>Do</last></author>
       <author><first>Alexandre</first><last>Allauzen</last></author>
       <author><first>Jean-Luc</first><last>Gauvain</last></author>
-      <author><first>Lori</first><last>Lamel</last></author>
+      <author id="lori-lamel"><first>Lori</first><last>Lamel</last></author>
       <author><first>François</first><last>Yvon</last></author>
       <pages>106-112</pages>
       <url hash="74b78226">2014.iwslt-evaluation.15</url>
@@ -256,7 +256,7 @@
       <author><first>Eunah</first><last>Cho</last></author>
       <author><first>Teresa</first><last>Herrmann</last></author>
       <author><first>Thanh-Le</first><last>Ha</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>119-126</pages>
       <url hash="fd7fed03">2014.iwslt-evaluation.17</url>
       <abstract>In this paper, we present the KIT systems participating in the TED translation tasks of the IWSLT 2014 machine translation evaluation. We submitted phrase-based translation systems for all three official directions, namely English→German, German→English, and English→French, as well as for the optional directions English→Chinese and English→Arabic. For the official directions we built systems both for the machine translation as well as the spoken language translation track. This year we improved our systems’ performance over last year through n-best list rescoring using neural network-based translation and language models and novel preordering rules based on tree information of multiple syntactic levels. Furthermore, we could successfully apply a novel phrase extraction algorithm and transliteration of unknown words for Arabic. We also submitted a contrastive system for German→English built with stemmed German adjectives. For the SLT tracks, we used a monolingual translation system to translate the lowercased ASR hypotheses with all punctuation stripped to truecased, punctuated output as a preprocessing step to our usual translation system.</abstract>
@@ -290,7 +290,7 @@
       <author><first>Andrew</first><last>Finch</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
       <author><first>Taro</first><last>Watanabe</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>139-142</pages>
       <url hash="3dad3114">2014.iwslt-evaluation.20</url>
       <abstract>This paper describes NICT’s participation in the IWSLT 2014 evaluation campaign for the TED Chinese-English translation shared-task. Our approach used a combination of phrase-based and hierarchical statistical machine translation (SMT) systems. Our focus was in several areas, specifically system combination, word alignment, and various language modeling techniques including the use of neural network joint models. Our experiments on the test set from the 2013 shared task, showed that an improvement in BLEU score can be gained in translation performance through all of these techniques, with the largest improvements coming from using large data sizes to train the language model.</abstract>
@@ -310,7 +310,7 @@
       <author><first>Joern</first><last>Wuebker</last></author>
       <author><first>Stephan</first><last>Peitz</last></author>
       <author><first>Andreas</first><last>Guta</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>150-154</pages>
       <url hash="1bce5725">2014.iwslt-evaluation.22</url>
       <abstract>This work describes the statistical machine translation (SMT) systems of RWTH Aachen University developed for the evaluation campaign International Workshop on Spoken Language Translation (IWSLT) 2014. We participated in both the MT and SLT tracks for the English→French and German→English language pairs and applied the identical training pipeline and models on both language pairs. Our state-of-the-art phrase-based baseline systems are augmented with maximum expected BLEU training for phrasal, lexical and reordering models. Further, we apply rescoring with novel recurrent neural language and translation models. The same systems are used for the SLT track, where we additionally perform punctuation prediction on the automatic transcriptions employing hierarchical phrase-based translation. We are able to improve RWTH’s 2013 evaluation systems by 1.7-1.8% BLEU absolute.</abstract>
@@ -332,7 +332,7 @@
       <title>Advances in dialectal <fixed-case>A</fixed-case>rabic speech recognition: a study using <fixed-case>T</fixed-case>witter to improve <fixed-case>E</fixed-case>gyptian <fixed-case>ASR</fixed-case></title>
       <author><first>Ahmed</first><last>Ali</last></author>
       <author><first>Hamdy</first><last>Mubarak</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>156-162</pages>
       <url hash="32f17e1e">2014.iwslt-papers.1</url>
       <abstract>This paper reports results in building an Egyptian Arabic speech recognition system as an example for under-resourced languages. We investigated different approaches to build the system using 10 hours for training the acoustic model, and results for both grapheme system and phoneme system using MADA. The phoneme-based system shows better results than the grapheme-based system. In this paper, we explore the use of tweets written in dialectal Arabic. Using 880K Egyptian tweets reduced the Out Of Vocabulary (OOV) rate from 15.1% to 3.2% and the WER from 59.6% to 44.7%, a relative gain 25% in WER.</abstract>
@@ -341,8 +341,8 @@
     <paper id="2">
       <title>Towards simultaneous interpreting: the timing of incremental machine translation and speech synthesis</title>
       <author><first>Timo</first><last>Baumann</last></author>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
       <pages>163-168</pages>
       <url hash="71d2a2d5">2014.iwslt-papers.2</url>
       <abstract>In simultaneous interpreting, human experts incrementally construct and extend partial hypotheses about the source speaker’s message, and start to verbalize a corresponding message in the target language, based on a partial translation – which may have to be corrected occasionally. They commence the target utterance in the hope that they will be able to finish understanding the source speaker’s message and determine its translation in time for the unfolding delivery. Of course, both incremental understanding and translation by humans can be garden-pathed, although experts are able to optimize their delivery so as to balance the goals of minimal latency, translation quality and high speech fluency with few corrections. We investigate the temporal properties of both translation input and output to evaluate the tradeoff between low latency and translation quality. In addition, we estimate the improvements that can be gained with a tempo-elastic speech synthesizer.</abstract>
@@ -364,7 +364,7 @@
       <title>Machine translation of multi-party meetings: segmentation and disfluency removal strategies</title>
       <author><first>Eunah</first><last>Cho</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>176-183</pages>
       <url hash="01c2b606">2014.iwslt-papers.4</url>
       <abstract>Translating meetings presents a challenge since multi-speaker speech shows a variety of disfluencies. In this paper we investigate the importance of transforming speech into well-written input prior to translating multi-party meetings. We first analyze the characteristics of this data and establish oracle scores. Sentence segmentation and punctuation are performed using a language model, turn information, or a monolingual translation system. Disfluencies are removed by a CRF model trained on in-domain and out-of-domain data. For comparison, we build a combined CRF model for punctuation insertion and disfluency removal. By applying these models, multi-party meetings are transformed into fluent input for machine translation. We evaluate the models with regard to translation performance and are able to achieve an improvement of 2.1 to 4.9 BLEU points depending on the availability of turn information.</abstract>
@@ -376,7 +376,7 @@
       <author><first>Ye Kyaw</first><last>Thu</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
       <author><first>Andrew</first><last>Finch</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>184-191</pages>
       <url hash="3c560afd">2014.iwslt-papers.5</url>
       <abstract>We conduct dependency-based head finalization for statistical machine translation (SMT) for Myanmar (Burmese). Although Myanmar is an understudied language, linguistically it is a head-final language with similar syntax to Japanese and Korean. So, applying the efficient techniques of Japanese and Korean processing to Myanmar is a natural idea. Our approach is a combination of two approaches. The first is a head-driven phrase structure grammar (HPSG) based head finalization for English-to-Japanese translation, the second is dependency-based pre-ordering originally designed for English-to-Korean translation. We experiment on Chinese-, English-, and French-to-Myanmar translation, using a statistical pre-ordering approach as a comparison method. Experimental results show the dependency-based head finalization was able to consistently improve a baseline SMT system, for different source languages and different segmentation schemes for the Myanmar language.</abstract>
@@ -384,7 +384,7 @@
     </paper>
     <paper id="6">
       <title>Discriminative adaptation of continuous space translation models</title>
-      <author><first>Quoc-Khanh</first><last>Do</last></author>
+      <author id="quoc-khanh-do"><first>Quoc-Khanh</first><last>Do</last></author>
       <author><first>Alexandre</first><last>Allauzen</last></author>
       <author><first>François</first><last>Yvon</last></author>
       <pages>192-199</pages>
@@ -397,7 +397,7 @@
       <author><first>Matthias</first><last>Eck</last></author>
       <author><first>Yuri</first><last>Zemlyanskiy</last></author>
       <author><first>Joy</first><last>Zhang</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>200-205</pages>
       <url hash="1186a64a">2014.iwslt-papers.7</url>
       <abstract>We introduce two methods to collect additional training data for statistical machine translation systems from public social network content. The first method identifies multilingual content where the author self-translated their own post to reach additional friends, fans or customers. Once identified, we can split the post in the language segments and extract translation pairs from this content. The second methods considers web links (URLs) that users add as part of their post to point the reader to a video, article or website. If the same URL is shared from different language users, there is a chance they might give the same comment in their respective language. We use a support vector machine (SVM) as a classifier to identify true translations from all candidate pairs. We collected additional translation pairs using both methods for the language pairs Spanish-English and Portuguese-English. Testing the collected data as additional training data for statistical machine translations on in-domain test sets resulted in very significant improvements of up to 5 BLEU.</abstract>
@@ -407,7 +407,7 @@
       <title>An exploration of segmentation strategies in stream decoding</title>
       <author><first>Andrew</first><last>Finch</last></author>
       <author><first>Xiaolin</first><last>Wang</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <url hash="a5f473d2">2014.iwslt-papers.8</url>
       <abstract>In this paper we explore segmentation strategies for the stream decoder a method for decoding from a continuous stream of input tokens, rather than the traditional method of decoding from sentence segmented text. The behavior of the decoder is analyzed and modifications to the decoding algorithm are proposed to improve its performance. The experimental results show our proposed decoding strategies to be effective, and add support to the original findings that this approach is capable of approaching the performance of the underlying phrase-based machine translation decoder, at useful levels of latency. Our experiments evaluated the stream decoder on a broader set of language pairs than in previous work. We found most European language pairs were similar in character, and report results on English-Chinese and English-German pairs which are of interest due to the reordering required.</abstract>
       <pages>206-213</pages>
@@ -427,7 +427,7 @@
       <title>Lexical translation model using a deep neural network architecture</title>
       <author><first>Thanh-Le</first><last>Ha</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>223-229</pages>
       <url hash="a8c842dd">2014.iwslt-papers.10</url>
       <abstract>In this paper we combine the advantages of a model using global source sentence contexts, the Discriminative Word Lexicon, and neural networks. By using deep neural networks instead of the linear maximum entropy model in the Discriminative Word Lexicon models, we are able to leverage dependencies between different source words due to the non-linearity. Furthermore, the models for different target words can share parameters and therefore data sparsity problems are effectively reduced. By using this approach in a state-of-the-art translation system, we can improve the performance by up to 0.5 BLEU points for three different language pairs on the TED translation task.</abstract>
@@ -436,10 +436,10 @@
     <paper id="11">
       <title>Anticipatory translation model adaptation for bilingual conversations</title>
       <author><first>Sanjika</first><last>Hewavitharana</last></author>
-      <author><first>Dennis</first><last>Mehay</last></author>
+      <author id="dennis-mehay"><first>Dennis</first><last>Mehay</last></author>
       <author><first>Sankaranarayanan</first><last>Ananthakrishnan</last></author>
       <author><first>Rohit</first><last>Kumar</last></author>
-      <author><first>John</first><last>Makhoul</last></author>
+      <author id="john-makhoul"><first>John</first><last>Makhoul</last></author>
       <pages>230-235</pages>
       <url hash="301a3fd8">2014.iwslt-papers.11</url>
       <abstract>Conversational spoken language translation (CSLT) systems facilitate bilingual conversations in which the two participants speak different languages. Bilingual conversations provide additional contextual information that can be used to improve the underlying machine translation system. In this paper, we describe a novel translation model adaptation method that anticipates a participant’s response in the target language, based on his counterpart’s prior turn in the source language. Our proposed strategy uses the source language utterance to perform cross-language retrieval on a large corpus of bilingual conversations in order to obtain a set of potentially relevant target responses. The responses retrieved are used to bias translation choices towards anticipated responses. On an Iraqi-to-English CSLT task, our method achieves a significant improvement over the baseline system in terms of BLEU, TER and METEOR metrics.</abstract>
@@ -461,8 +461,8 @@
       <author><first>Yuan</first><last>Cao</last></author>
       <author><first>Ryan</first><last>Cotterell</last></author>
       <author><first>Chris</first><last>Callison-Burch</last></author>
-      <author><first>Daniel</first><last>Povey</last></author>
-      <author><first>Sanjeev</first><last>Khudanpur</last></author>
+      <author id="daniel-povey"><first>Daniel</first><last>Povey</last></author>
+      <author id="sanjeev-khudanpur"><first>Sanjeev</first><last>Khudanpur</last></author>
       <pages>244-248</pages>
       <url hash="8de79459">2014.iwslt-papers.13</url>
       <abstract>Translation of the output of automatic speech recognition (ASR) systems, also known as speech translation, has received a lot of research interest recently. This is especially true for programs such as DARPA BOLT which focus on improving spontaneous human-human conversation across languages. However, this research is hindered by the dearth of datasets developed for this explicit purpose. For Egyptian Arabic-English, in particular, no parallel speechtranscription-translation dataset exists in the same domain. In order to support research in speech translation, we introduce the Callhome Egyptian Arabic-English Speech Translation Corpus. This supplements the existing LDC corpus with four reference translations for each utterance in the transcripts. The result is a three-way parallel dataset of Egyptian Arabic Speech, transcriptions and English translations.</abstract>
@@ -472,7 +472,7 @@
       <title>Improving in-domain data selection for small in-domain sets</title>
       <author><first>Mohammed</first><last>Mediani</last></author>
       <author><first>Joshua</first><last>Winebarger</last></author>
-      <author><first>Alexander</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alexander</first><last>Waibel</last></author>
       <pages>249-256</pages>
       <url hash="901c57a4">2014.iwslt-papers.14</url>
       <abstract>Finding sufficient in-domain text data for language modeling is a recurrent challenge. Some methods have already been proposed for selecting parts of out-of-domain text data most closely resembling the in-domain data using a small amount of the latter. Including this new “near-domain” data in training can potentially lead to better language model performance, while reducing training resources relative to incorporating all data. One popular, state-of-the-art selection process based on cross-entropy scores makes use of in-domain and out-ofdomain language models. In order to compensate for the limited availability of the in-domain data required for this method, we introduce enhancements to two of its steps. Firstly, we improve the procedure for drawing the outof-domain sample data used for selection. Secondly, we use word-associations in order to extend the underlying vocabulary of the sample language models used for scoring. These enhancements are applied to selecting text for language modeling of talks given in a technical subject area. Besides comparing perplexity, we judge the resulting language models by their performance in automatic speech recognition and machine translation tasks. We evaluate our method in different contexts. We show that it yields consistent improvements, up to 2% absolute reduction in word error rate and 0.3 Bleu points. We achieve these improvements even given a much smaller in-domain set.</abstract>
@@ -481,10 +481,10 @@
     <paper id="15">
       <title>Multilingual deep bottle neck features: a study on language selection and training techniques</title>
       <author><first>Markus</first><last>Müller</last></author>
-      <author><first>Sebastian</first><last>Stüker</last></author>
+      <author id="sebastian-stuker"><first>Sebastian</first><last>Stüker</last></author>
       <author><first>Zaid</first><last>Sheikh</last></author>
       <author><first>Florian</first><last>Metze</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>257-264</pages>
       <url hash="863f0c97">2014.iwslt-papers.15</url>
       <abstract>Previous work has shown that training the neural networks for bottle neck feature extraction in a multilingual way can lead to improvements in word error rate and average term weighted value in a telephone key word search task. In this work we conduct a systematic study on a) which multilingual training strategy to employ, b) the effect of language selection and amount of multilingual training data used and c) how to find a suitable combination for languages. We conducted our experiment on the key word search task and the languages of the IARPA BABEL program. In a first step, we assessed the performance of a single language out of all available languages in combination with the target language. Based on these results, we then combined a multitude of languages. We also examined the influence of the amount of training data per language, as well as different techniques for combining the languages during network training. Our experiments show that data from arbitrary additional languages does not necessarily increase the performance of a system. But when combining a suitable set of languages, a significant gain in performance can be achieved.</abstract>
@@ -507,7 +507,7 @@
       <title>Better punctuation prediction with hierarchical phrase-based translation</title>
       <author><first>Stephan</first><last>Peitz</last></author>
       <author><first>Markus</first><last>Freitag</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>271-278</pages>
       <url hash="489a6e88">2014.iwslt-papers.17</url>
       <abstract>Punctuation prediction is an important task in spoken language translation and can be performed by using a monolingual phrase-based translation system to translate from unpunctuated to text with punctuation. However, a punctuation prediction system based on phrase-based translation is not able to capture long-range dependencies between words and punctuation marks. In this paper, we propose to employ hierarchical translation in place of phrase-based translation and show that this approach is more robust for unseen word sequences. Furthermore, we analyze different optimization criteria for tuning the scaling factors of a monolingual statistical machine translation system. In our experiments, we compare the new approach with other punctuation prediction methods and show improvements in terms of F1-Score and BLEU on the IWSLT 2014 German→English and English→French translation tasks.</abstract>
@@ -517,7 +517,7 @@
       <title>Rule-based preordering on multiple syntactic levels in statistical machine translation</title>
       <author><first>Ge</first><last>Wu</last></author>
       <author><first>Yuqi</first><last>Zhang</last></author>
-      <author><first>Alexander</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alexander</first><last>Waibel</last></author>
       <pages>279-286</pages>
       <url hash="f2153747">2014.iwslt-papers.18</url>
       <abstract>We propose a novel data-driven rule-based preordering approach, which uses the tree information of multiple syntactic levels. This approach extend the tree-based reordering from one level into multiple levels, which has the capability to process more complicated reordering cases. We have conducted experiments in English-to-Chinese and Chinese-to-English translation directions. Our results show that the approach has led to improved translation quality both when it was applied separately or when it was combined with some other reordering approaches. As our reordering approach was used alone, it showed an improvement of 1.61 in BLEU score in the English-to-Chinese translation direction and an improvement of 2.16 in BLEU score in the Chinese-to-English translation direction, in comparison with the baseline, which used no word reordering. As our preordering approach were combined with the short rule [1], long rule [2] and tree rule [3] based preordering approaches, it showed further improvements of up to 0.43 in BLEU score in the English-to-Chinese translation direction and further improvements of up to 0.3 in BLEU score in the Chinese-to-English translation direction. Through the translations that used our preordering approach, we have also found many translation examples with improved syntactic structures.</abstract>
diff --git a/data/xml/2014.lilt.xml b/data/xml/2014.lilt.xml
index c8f9ec8396..49761bcced 100644
--- a/data/xml/2014.lilt.xml
+++ b/data/xml/2014.lilt.xml
@@ -12,7 +12,7 @@
       <title>Introduction</title>
       <author><first>Annie</first><last>Zaenen</last></author>
       <author><first>Cleo</first><last>Condoravdi</last></author>
-      <author><first>Valeria</first><last>de Paiva</last></author>
+      <author id="valeria-de-paiva"><first>Valeria</first><last>de Paiva</last></author>
       <url hash="d6aae7a0">2014.lilt-9.1</url>
       <bibkey>zaenen-etal-2014-introduction</bibkey>
     </paper>
@@ -34,15 +34,15 @@
     <paper id="4">
       <title>Decomposing Semantic Inference</title>
       <author><first>Elana</first><last>Cabria</last></author>
-      <author><first>Bernardo</first><last>Magnini</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
       <abstract>Beside formal approaches to semantic inference that rely on logical representation of meaning, the notion of Textual Entailment (TE) has been proposed as an applied framework to capture major semantic inference needs across applications in Computational Linguistics. Although several approaches have been tried and evaluation campaigns have shown improvements in TE, a renewed interest is rising in the research community towards a deeper and better understanding of the core phenomena involved in textual inference. Pursuing this direction, we are convinced that crucial progress will derive from a focus on decomposing the complexity of the TE task into basic phenomena and on their combination. In this paper, we carry out a deep analysis on TE data sets, investigating the relations among two relevant aspects of semantic inferences: the logical dimension, i.e. the capacity of the inference to prove the conclusion from its premises, and the linguistic dimension, i.e. the linguistic devices used to accomplish the goal of the inference. We propose a decomposition approach over TE pairs, where single linguistic phenomena are isolated in what we have called atomic inference pairs, and we show that at this granularity level the actual correlation between the linguistic and the logical dimensions of semantic inferences emerges and can be empirically observed.</abstract>
       <url hash="bee7e91e">2014.lilt-9.4</url>
       <bibkey>cabria-magnini-2014-decomposing</bibkey>
     </paper>
     <paper id="5">
       <title>Frege in Space: A Program for Composition Distributional Semantics</title>
-      <author><first>Marco</first><last>Baroni</last></author>
-      <author><first>Raffaella</first><last>Bernardi</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
+      <author id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last></author>
       <author><first>Roberto</first><last>Zamparelli</last></author>
       <abstract>The lexicon of any natural language encodes a huge number of distinct word meanings. Just to understand this article, you will need to know what thousands of words mean. The space of possible sentential meanings is infinite: In this article alone, you will encounter many sentences that express ideas you have never heard before, we hope. Statistical semantics has addressed the issue of the vastness of word meaning by proposing methods to harvest meaning automatically from large collections of text (corpora). Formal semantics in the Fregean tradition has developed methods to account for the infinity of sentential meaning based on the crucial insight of compositionality, the idea that meaning of sentences is built incrementally by combining the meanings of their constituents. This article sketches a new approach to semantics that brings together ideas from statistical and formal semantics to account, in parallel, for the richness of lexical meaning and the combinatorial power of sentential semantics. We adopt, in particular, the idea that word meaning can be approximated by the patterns of co-occurrence of words in corpora from statistical semantics, and the idea that compositionality can be captured in terms of a syntax-driven calculus of function application from formal semantics.</abstract>
       <url hash="026fd1f2">2014.lilt-9.5</url>
@@ -58,7 +58,7 @@
     <paper id="7">
       <title>Recent Progress on Monotonicity</title>
       <author><first>Thomas F.</first><last>Icard III</last></author>
-      <author><first>Lawrence S.</first><last>Moss</last></author>
+      <author id="lawrence-s-moss"><first>Lawrence S.</first><last>Moss</last></author>
       <abstract>This paper serves two purposes. It is a summary of much work concerning formal treatments of monotonicity and polarity in natural language, and it also discusses connections to related work on exclusion relations, and connections to psycholinguistics and computational linguistics. The second part of the paper presents a summary of some new work on a formal Monotonicity Calculus.</abstract>
       <url hash="d49819c5">2014.lilt-9.7</url>
       <bibkey>icard-iii-moss-2014-recent</bibkey>
@@ -72,7 +72,7 @@
     </paper>
     <paper id="9">
       <title><fixed-case>NL</fixed-case>og-like Inference and Commonsense Reasoning</title>
-      <author><first>Lenhart</first><last>Schubert</last></author>
+      <author id="lenhart-schubert"><first>Lenhart</first><last>Schubert</last></author>
       <abstract>Recent implementations of Natural Logic (NLog) have shown that NLog provides a quite direct means of going from sentences in ordinary language to many of the obvious entailments of those sentences. We show here that Episodic Logic (EL) and its Epilog implementation are well-adapted to capturing NLog-like inferences, but beyond that, also support inferences that require a combination of lexical knowledge and world knowledge. However, broad language understanding and commonsense reasoning are still thwarted by the “knowledge acquisition bottleneck”, and we summarize some of our ongoing and contemplated attacks on that persistent difficulty.</abstract>
       <url hash="60e04017">2014.lilt-9.9</url>
       <bibkey>schubert-2014-nlog</bibkey>
@@ -119,10 +119,10 @@
     </paper>
     <paper id="2">
       <title><fixed-case>CALL</fixed-case>-<fixed-case>SLT</fixed-case>: A Spoken <fixed-case>CALL</fixed-case> System Based on Grammar and Speech Recognition</title>
-      <author><first>Manny</first><last>Rayner</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
       <author><first>Nikos</first><last>Isourakis</last></author>
       <author><first>Claudia</first><last>Baur</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Johannna</first><last>Gerlach</last></author>
       <abstract>We describe CALL-SLT, a speech-enabled Computer-Assisted Language Learning application where the central idea is to prompt the student with an abstract representation of what they are supposed to say, and then use a combination of grammar-based speech recognition and rule-based translation to rate their response. The system has been developed to the level of a mature prototype, freely deployed on the web, with versions for several languages. We present an overview of the core system architecture and the various types of content we have developed. Finally, we describe several evaluations, the last of which is a study carried out over about a week using 130 subjects recruited through the Amazon Mechanical Turk, in which CALL-SLT was contrasted against a control version where the speech recognition component was disabled. The improvement in student learning performance between the two groups was significant at p &lt; 0.02.</abstract>
       <issue>2</issue>
@@ -183,7 +183,7 @@
     <paper id="6">
       <title>Démonette, a <fixed-case>F</fixed-case>rench derivational morpho-semantic network</title>
       <author><first>Nabil</first><last>Hathout</last></author>
-      <author><first>Fiammetta</first><last>Namer</last></author>
+      <author id="fiammetta-namer"><first>Fiammetta</first><last>Namer</last></author>
       <abstract>Démonette is a derivational morphological network created from information provided by two existing lexical resources, DériF and Morphonette. It features a formal architecture in which words are associated with semantic types and where morphological relations, labelled with concrete and abstract bi-oriented definitions, connect derived words with their base and indirectly related words with each other.</abstract>
       <issue>5</issue>
       <url hash="d8ebc1eb">2014.lilt-11.6</url>
diff --git a/data/xml/2014.tal.xml b/data/xml/2014.tal.xml
index bdabf866fd..713f4f315e 100644
--- a/data/xml/2014.tal.xml
+++ b/data/xml/2014.tal.xml
@@ -36,7 +36,7 @@
     <paper id="3">
       <title><fixed-case>MEANS</fixed-case> : une approche sémantique pour la recherche de réponses aux questions médicales [<fixed-case>MEANS</fixed-case>: a semantic approach to medical question answering]</title>
       <author><first>Asma</first><last>Ben Abacha</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <pages>71–104</pages>
       <url hash="476dc5e0">2014.tal-1.3</url>
       <language>fra</language>
@@ -74,7 +74,7 @@
     </meta>
     <paper id="1">
       <title>Préface [Foreword]</title>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <author><first>Wolfgang</first><last>Minker</last></author>
       <pages>7–11</pages>
       <url hash="911f1741">2014.tal-2.1</url>
@@ -84,7 +84,7 @@
     <paper id="2">
       <title>Traduire la parole: le cas des <fixed-case>TED</fixed-case> Talks [Speech translation: the <fixed-case>TED</fixed-case> Talks case study]</title>
       <author><first>Natalia</first><last>Segal</last></author>
-      <author><first>Hélène</first><last>Bonneau-Maynard</last></author>
+      <author id="helene-bonneau-maynard"><first>Hélène</first><last>Bonneau-Maynard</last></author>
       <author><first>François</first><last>Yvon</last></author>
       <pages>13–45</pages>
       <url hash="0eb34c53">2014.tal-2.2</url>
@@ -95,7 +95,7 @@
       <title>Ajout de nouveaux noms propres au vocabulaire d’un système de transcription en utilisant un corpus diachronique [Adding proper names to the vocabulary of a speech transcription system using a contemporary diachronic corpus]</title>
       <author><first>Irina</first><last>Illina</last></author>
       <author><first>Dominique</first><last>Fohr</last></author>
-      <author><first>Georges</first><last>Linarès</last></author>
+      <author id="georges-linares"><first>Georges</first><last>Linarès</last></author>
       <pages>47–72</pages>
       <url hash="9d79ec66">2014.tal-2.3</url>
       <language>fra</language>
@@ -104,9 +104,9 @@
     <paper id="4">
       <title>De l’arabe standard vers l’arabe dialectal : projection de corpus et ressources linguistiques en vue du traitement automatique de l’oral dans les médias tunisiens [From <fixed-case>M</fixed-case>odern <fixed-case>S</fixed-case>tandard <fixed-case>A</fixed-case>rabic to <fixed-case>T</fixed-case>unisian dialect: corpus projection and linguistic resources towards the automatic processing of speech in the <fixed-case>T</fixed-case>unisian media]</title>
       <author><first>Rahma</first><last>Boujelbane</last></author>
-      <author><first>Mariem</first><last>Ellouze</last></author>
-      <author><first>Frédéric</first><last>Béchet</last></author>
-      <author><first>Lamia</first><last>Belguith</last></author>
+      <author id="mariem-ellouze-khemekhem"><first>Mariem</first><last>Ellouze</last></author>
+      <author id="frederic-bechet"><first>Frédéric</first><last>Béchet</last></author>
+      <author id="lamia-hadrich-belguith"><first>Lamia</first><last>Belguith</last></author>
       <pages>73–96</pages>
       <url hash="cca6cad8">2014.tal-2.4</url>
       <language>fra</language>
@@ -117,7 +117,7 @@
       <author><first>Adèle</first><last>Désoyer</last></author>
       <author><first>Frédéric</first><last>Landragin</last></author>
       <author><first>Isabelle</first><last>Tellier</last></author>
-      <author><first>Anaïs</first><last>Lefeuvre</last></author>
+      <author id="anais-lefeuvre"><first>Anaïs</first><last>Lefeuvre</last></author>
       <author><first>Jean-Yves</first><last>Antoine</last></author>
       <pages>97–121</pages>
       <url hash="af5b202d">2014.tal-2.5</url>
@@ -126,7 +126,7 @@
     </paper>
     <paper id="6">
       <title>Détection des états affectifs lors d’interactions parlées : robustesse des indices non verbaux [Automatic in-voice affective state detection in spontaneous speech: robustness of non-verbal cues]</title>
-      <author><first>Laurence</first><last>Devillers</last></author>
+      <author id="laurence-devillers"><first>Laurence</first><last>Devillers</last></author>
       <author><first>Marie</first><last>Tahon</last></author>
       <author><first>Mohamed A.</first><last>Sehili</last></author>
       <author><first>Agnès</first><last>Delaborde</last></author>
@@ -184,7 +184,7 @@
       <title>Learning word meanings from images of natural scenes</title>
       <author><first>Ákos</first><last>Kádár</last></author>
       <author><first>Afra</first><last>Alishahi</last></author>
-      <author><first>Grzegorz</first><last>Chrupała</last></author>
+      <author id="grzegorz-chrupala"><first>Grzegorz</first><last>Chrupała</last></author>
       <pages>73–95</pages>
       <url hash="7a51bf3c">2014.tal-3.3</url>
       <bibkey>kadar-etal-2014-learning</bibkey>
@@ -208,9 +208,9 @@
       <author><first>Emmanuel</first><last>Navarro</last></author>
       <author><first>Yann</first><last>Desalle</last></author>
       <author><first>Hintat</first><last>Cheung</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <author><first>Pierre</first><last>Magistry</last></author>
-      <author><first>Laurent</first><last>Prévot</last></author>
+      <author id="laurent-prevot"><first>Laurent</first><last>Prévot</last></author>
       <pages>97–121</pages>
       <url hash="666cdb35">2014.tal-3.5</url>
       <bibkey>gaume-etal-2014-skillex</bibkey>
diff --git a/data/xml/2014.tc.xml b/data/xml/2014.tc.xml
index 7a09367aaf..0ff9b79424 100644
--- a/data/xml/2014.tc.xml
+++ b/data/xml/2014.tc.xml
@@ -17,7 +17,7 @@
     </paper>
     <paper id="2">
       <title>Almost fifty years after the (first?) <fixed-case>ALPAC</fixed-case> report</title>
-      <author><first>Gábor</first><last>Prószéky</last></author>
+      <author id="gabor-proszeky"><first>Gábor</first><last>Prószéky</last></author>
       <url hash="6e71af66">2014.tc-1.2</url>
       <bibkey>proszeky-2014-almost</bibkey>
     </paper>
@@ -31,7 +31,7 @@
       <title>Using cross-language information retrieval and statistical language modelling in example-based machine translation</title>
       <author><first>Nasredine</first><last>Semmar</last></author>
       <author><first>Othman</first><last>Zennaki</last></author>
-      <author><first>Meriama</first><last>Laib</last></author>
+      <author id="meriama-laib"><first>Meriama</first><last>Laib</last></author>
       <url hash="77b3ebe4">2014.tc-1.4</url>
       <bibkey>semmar-etal-2014-using</bibkey>
     </paper>
@@ -44,7 +44,7 @@
     <paper id="6">
       <title>i<fixed-case>C</fixed-case>ompile<fixed-case>C</fixed-case>orpora: a web-based application to semi-automatically compile multilingual comparable corpora</title>
       <author><first>Hernani</first><last>Costa</last></author>
-      <author><first>Gloria</first><last>Corpas Pastor</last></author>
+      <author id="gloria-corpas-pastor"><first>Gloria</first><last>Corpas Pastor</last></author>
       <author><first>Miriam</first><last>Seghiri</last></author>
       <url hash="18c726f1">2014.tc-1.6</url>
       <bibkey>costa-etal-2014-icompilecorpora</bibkey>
@@ -61,7 +61,7 @@
       <title>Rule-based automatic post-processing of <fixed-case>SMT</fixed-case> output to reduce human post-editing effort</title>
       <author><first>Victoria</first><last>Porro</last></author>
       <author><first>Johanna</first><last>Gerlach</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Violeta</first><last>Seretan</last></author>
       <url hash="8808dce9">2014.tc-1.8</url>
       <bibkey>porro-etal-2014-rule</bibkey>
@@ -75,8 +75,8 @@
     <paper id="10">
       <title>Intelligent translation memory matching and retrieval metric exploiting linguistic technology</title>
       <author><first>Rohit</first><last>Gupta</last></author>
-      <author><first>Hanna</first><last>Bechara</last></author>
-      <author><first>Constantin</first><last>Orasan</last></author>
+      <author id="hannah-bechara"><first>Hanna</first><last>Bechara</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orasan</last></author>
       <url hash="96cebedb">2014.tc-1.10</url>
       <bibkey>gupta-etal-2014-intelligent</bibkey>
     </paper>
@@ -124,7 +124,7 @@
     <paper id="18">
       <title>Machine translation quality estimation adapted to the translation workflow</title>
       <author><first>Sabine</first><last>Hunsicker</last></author>
-      <author><first>Alexandru</first><last>Ceausu</last></author>
+      <author id="alexandru-ceausu"><first>Alexandru</first><last>Ceausu</last></author>
       <url hash="ae1b6ca5">2014.tc-1.18</url>
       <bibkey>hunsicker-ceausu-2014-machine</bibkey>
     </paper>
@@ -162,8 +162,8 @@
     <paper id="24">
       <title>A tool for building multilingual voice questionnaires</title>
       <author><first>Alejandro</first><last>Armando</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
-      <author><first>Manny</first><last>Rayner</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
       <author><first>Nikos</first><last>Tsourakis</last></author>
       <url hash="95f2f79d">2014.tc-1.24</url>
       <bibkey>armando-etal-2014-tool</bibkey>
@@ -196,7 +196,7 @@
     <paper id="29">
       <title><fixed-case>T</fixed-case>witter Crowd Translation – design and objectives</title>
       <author><first>Eduard</first><last>Šubert</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <url hash="435073e8">2014.tc-1.29</url>
       <bibkey>subert-bojar-2014-twitter</bibkey>
     </paper>
diff --git a/data/xml/2015.eamt.xml b/data/xml/2015.eamt.xml
index 7c40c90fef..6a744730e8 100644
--- a/data/xml/2015.eamt.xml
+++ b/data/xml/2015.eamt.xml
@@ -29,31 +29,31 @@
     <paper id="2">
       <title>Exploiting portability to build an <fixed-case>RBMT</fixed-case> prototype for a new source language</title>
       <author><first>Nora</first><last>Aranberri</last></author>
-      <author><first>Gorka</first><last>Labaka</last></author>
-      <author><first>Arantza</first><last>Díaz de Ilarraza</last></author>
-      <author><first>Kepa</first><last>Sarasola</last></author>
+      <author id="gorka-labaka"><first>Gorka</first><last>Labaka</last></author>
+      <author id="arantza-diaz-de-ilarraza"><first>Arantza</first><last>Díaz de Ilarraza</last></author>
+      <author id="kepa-sarasola"><first>Kepa</first><last>Sarasola</last></author>
       <url hash="4ade5c59">2015.eamt-1.2</url>
       <bibkey>aranberri-etal-2015-exploiting</bibkey>
     </paper>
     <paper id="3">
       <title>Building hybrid machine translation systems by using an <fixed-case>EBMT</fixed-case> preprocessor to create partialtranslations</title>
       <author><first>Mikel</first><last>Artetxe</last></author>
-      <author><first>Gorka</first><last>Labaka</last></author>
-      <author><first>Kepa</first><last>Sarasola</last></author>
+      <author id="gorka-labaka"><first>Gorka</first><last>Labaka</last></author>
+      <author id="kepa-sarasola"><first>Kepa</first><last>Sarasola</last></author>
       <url hash="c5d71cfc">2015.eamt-1.3</url>
       <bibkey>artetxe-etal-2015-building</bibkey>
     </paper>
     <paper id="4">
       <title>Using on-line available sources of bilingual information for word-level machine translation quality estimation</title>
-      <author><first>Miquel</first><last>Esplà-Gomis</last></author>
+      <author id="miquel-espla-gomis"><first>Miquel</first><last>Esplà-Gomis</last></author>
       <author><first>Felipe</first><last>Sánchez-Martínez</last></author>
-      <author><first>Mikel L.</first><last>Forcada</last></author>
+      <author id="mikel-l-forcada"><first>Mikel L.</first><last>Forcada</last></author>
       <url hash="e2d90f94">2015.eamt-1.4</url>
       <bibkey>espla-gomis-etal-2015-using</bibkey>
     </paper>
     <paper id="5">
       <title>A general framework for minimizing translation effort: towards a principled combination of translation technologies in computer-aided translation</title>
-      <author><first>Mikel L.</first><last>Forcada</last></author>
+      <author id="mikel-l-forcada"><first>Mikel L.</first><last>Forcada</last></author>
       <author><first>Felipe</first><last>Sánchez-Martínez</last></author>
       <url hash="97cb37b3">2015.eamt-1.5</url>
       <bibkey>forcada-sanchez-martinez-2015-general</bibkey>
@@ -61,10 +61,10 @@
     <paper id="6">
       <title>Can Translation Memories afford not to use paraphrasing ?</title>
       <author><first>Rohit</first><last>Gupta</last></author>
-      <author><first>Constantin</first><last>Orasan</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orasan</last></author>
       <author><first>Marcos</first><last>Zampieri</last></author>
       <author><first>Mihaela</first><last>Vela</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <url hash="20568967">2015.eamt-1.6</url>
       <bibkey>gupta-etal-2015-translation</bibkey>
     </paper>
@@ -88,7 +88,7 @@
       <title>Document-Level Machine Translation with Word Vector Models</title>
       <author><first>Eva Martinez</first><last>Garcia</last></author>
       <author><first>Cristina</first><last>Espana-Bonet</last></author>
-      <author><first>Lluis</first><last>Marquez</last></author>
+      <author id="lluis-marquez"><first>Lluis</first><last>Marquez</last></author>
       <url hash="c461490f">2015.eamt-1.9</url>
       <bibkey>garcia-etal-2015-document</bibkey>
     </paper>
@@ -115,21 +115,21 @@
     </paper>
     <paper id="13">
       <title>Dynamic Terminology Integration Methods in Statistical Machine Translation</title>
-      <author><first>Marcis</first><last>Pinnis</last></author>
+      <author id="marcis-pinnis"><first>Marcis</first><last>Pinnis</last></author>
       <url hash="de60219d">2015.eamt-1.13</url>
       <bibkey>pinnis-2015-dynamic</bibkey>
     </paper>
     <paper id="14">
       <title>Identifying main obstacles for statistical machine translation of morphologically rich <fixed-case>S</fixed-case>outh <fixed-case>S</fixed-case>lavic languages</title>
-      <author><first>Maja</first><last>Popovic</last></author>
-      <author><first>Mihael</first><last>Arcan</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popovic</last></author>
+      <author id="mihael-arcan"><first>Mihael</first><last>Arcan</last></author>
       <url hash="dfb3f19f">2015.eamt-1.14</url>
       <bibkey>popovic-arcan-2015-identifying</bibkey>
     </paper>
     <paper id="15">
       <title>Poor man’s lemmatisation for automatic error classification</title>
-      <author><first>Maja</first><last>Popovic</last></author>
-      <author><first>Mihael</first><last>Arcan</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popovic</last></author>
+      <author id="mihael-arcan"><first>Mihael</first><last>Arcan</last></author>
       <author><first>Eleftherios</first><last>Avramidis</last></author>
       <author><first>Aljoscha</first><last>Burchardt</last></author>
       <url hash="dfb3f19f">2015.eamt-1.15</url>
@@ -144,10 +144,10 @@
     </paper>
     <paper id="17">
       <title>Searching for Context: a Study on Document-Level Labels for Translation Quality Estimation</title>
-      <author><first>Carolina</first><last>Scarton</last></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last></author>
       <author><first>Marcos</first><last>Zampieri</last></author>
       <author><first>Mihaela</first><last>Vela</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <url hash="243bac7c">2015.eamt-1.17</url>
       <bibkey>scarton-etal-2015-searching</bibkey>
@@ -156,24 +156,24 @@
       <title>Stripping Adjectives: Integration Techniques for Selective Stemming in <fixed-case>SMT</fixed-case> Systems</title>
       <author><first>Isabel</first><last>Slawik</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <url hash="02944ae9">2015.eamt-1.18</url>
       <bibkey>slawik-etal-2015-stripping</bibkey>
     </paper>
     <paper id="19">
       <title>Evaluating machine translation for assimilation via a gap-filling task</title>
       <author><first>Ekaterina</first><last>Ageeva</last></author>
-      <author><first>Francis M.</first><last>Tyers</last></author>
-      <author><first>Mikel L.</first><last>Forcada</last></author>
+      <author id="francis-tyers"><first>Francis M.</first><last>Tyers</last></author>
+      <author id="mikel-l-forcada"><first>Mikel L.</first><last>Forcada</last></author>
       <author><first>Juan Antonio</first><last>Pérez-Ortiz</last></author>
       <url hash="c461490f">2015.eamt-1.19</url>
       <bibkey>ageeva-etal-2015-evaluating</bibkey>
     </paper>
     <paper id="20">
       <title>Unsupervised training of maximum-entropy models for lexical selection i in rule-based machine translation</title>
-      <author><first>Francis M.</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis M.</first><last>Tyers</last></author>
       <author><first>Felipe</first><last>Sánchez-Martinez</last></author>
-      <author><first>Mikel L.</first><last>Forcada</last></author>
+      <author id="mikel-l-forcada"><first>Mikel L.</first><last>Forcada</last></author>
       <url hash="ea6c922b">2015.eamt-1.20</url>
       <bibkey>tyers-etal-2015-unsupervised</bibkey>
     </paper>
@@ -187,13 +187,13 @@
     <paper id="22">
       <title>Re-assessing the <fixed-case>WMT</fixed-case>2013 Human Evaluation with Professional Translators Trainees</title>
       <author><first>Mihaela</first><last>Vela</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <url hash="7610e41a">2015.eamt-1.22</url>
       <bibkey>vela-van-genabith-2015-assessing</bibkey>
     </paper>
     <paper id="23">
       <title>Integrating a Large, Monolingual Corpus as Translation Memory into Statistical Machine translation</title>
-      <author><first>Katharina</first><last>Wäschle</last></author>
+      <author id="katharina-waschle"><first>Katharina</first><last>Wäschle</last></author>
       <author><first>Stefan</first><last>Riezler</last></author>
       <url hash="289fd6df">2015.eamt-1.23</url>
       <bibkey>waschle-riezler-2015-integrating</bibkey>
@@ -222,7 +222,7 @@
     </paper>
     <paper id="27">
       <title>Pre-reordering for Statistical Machine Translation of Non-fictional Subtitles</title>
-      <author><first>Magdalena</first><last>Plamada</last></author>
+      <author id="magdalena-plamada"><first>Magdalena</first><last>Plamada</last></author>
       <author><first>Gion</first><last>Linder</last></author>
       <author><first>Phillip</first><last>Ströbel</last></author>
       <author><first>Martin</first><last>Volk</last></author>
@@ -246,14 +246,14 @@
     </paper>
     <paper id="30">
       <title><fixed-case>M</fixed-case>ixed<fixed-case>E</fixed-case>motions: Social Semantic Emotion Analysis for Innovative Multilingual Big Data Analytics Markets</title>
-      <author><first>Mihael</first><last>Arcan</last></author>
+      <author id="mihael-arcan"><first>Mihael</first><last>Arcan</last></author>
       <author><first>Paul</first><last>Buitelaar</last></author>
       <url hash="9f4b067f">2015.eamt-1.30</url>
       <bibkey>arcan-buitelaar-2015-mixedemotions</bibkey>
     </paper>
     <paper id="31">
       <title>The <fixed-case>ACCEPT</fixed-case> Academic Portal: Bringing Together Pre-editing, <fixed-case>MT</fixed-case> and Post-editing into a Learning Environment</title>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Johanna</first><last>Gerlach</last></author>
       <author><first>Asheesh</first><last>Gulati</last></author>
       <author><first>Victoria</first><last>Porro</last></author>
@@ -284,7 +284,7 @@
     </paper>
     <paper id="35">
       <title><fixed-case>H</fixed-case>andy<fixed-case>CAT</fixed-case> - An Open-Source Platform for <fixed-case>CAT</fixed-case> Tool Research</title>
-      <author><first>Chris</first><last>Hokamp</last></author>
+      <author id="chris-hokamp"><first>Chris</first><last>Hokamp</last></author>
       <author><first>Qun</first><last>Liu</last></author>
       <url hash="9987d2c2">2015.eamt-1.35</url>
       <bibkey>hokamp-liu-2015-handycat</bibkey>
@@ -296,15 +296,15 @@
       <author><first>Markus</first><last>Egg</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <author><first>Lexi</first><last>Birch</last></author>
-      <author><first>Katia</first><last>Kermanidis</last></author>
+      <author id="katia-lida-kermanidis"><first>Katia</first><last>Kermanidis</last></author>
       <author><first>Vilelmini</first><last>Sosoni</last></author>
       <author><first>Dimitrios</first><last>Tsoumakos</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <author><first>Iris</first><last>Hendrickx</last></author>
       <author><first>Michael</first><last>Papadopoulos</last></author>
       <author><first>Panayota</first><last>Georgakopoulou</last></author>
       <author><first>Maria</first><last>Gialama</last></author>
-      <author><first>Menno</first><last>van Zaanen</last></author>
+      <author id="menno-van-zaanen"><first>Menno</first><last>van Zaanen</last></author>
       <author><first>Ioana</first><last>Buliga</last></author>
       <author><first>Mitja</first><last>Jermol</last></author>
       <author><first>Davor</first><last>Orlic</last></author>
@@ -326,7 +326,7 @@
     </paper>
     <paper id="39">
       <title><fixed-case>FALCON</fixed-case>: Federated Active Linguistic data <fixed-case>C</fixed-case>urati<fixed-case>ON</fixed-case></title>
-      <author><first>David</first><last>Lewis</last></author>
+      <author id="david-d-lewis"><first>David</first><last>Lewis</last></author>
       <url hash="f6f862ee">2015.eamt-1.39</url>
       <bibkey>lewis-2015-falcon</bibkey>
     </paper>
@@ -340,7 +340,7 @@
     </paper>
     <paper id="41">
       <title><fixed-case>O</fixed-case>kapi+<fixed-case>Q</fixed-case>u<fixed-case>E</fixed-case>st: Translation Quality Estimation within Okapi</title>
-      <author><first>Gustavo Henrique</first><last>Paetzold</last></author>
+      <author id="gustavo-paetzold"><first>Gustavo Henrique</first><last>Paetzold</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <author><first>Yves</first><last>Savourel</last></author>
       <url hash="38221f88">2015.eamt-1.41</url>
@@ -368,13 +368,13 @@
     <paper id="45">
       <title><fixed-case>A</fixed-case>bu-<fixed-case>M</fixed-case>a<fixed-case>T</fixed-case>ran: Automatic building of Machine Translation</title>
       <author><first>Antonio</first><last>Toral</last></author>
-      <author><first>Tommi A</first><last>Pirinen</last></author>
+      <author id="tommi-a-pirinen"><first>Tommi A</first><last>Pirinen</last></author>
       <author><first>Andy</first><last>Way</last></author>
-      <author><first>Gema</first><last>Ramírez-Sánchez</last></author>
-      <author><first>Sergio Ortiz</first><last>Rojas</last></author>
-      <author><first>Raphael</first><last>Rubino</last></author>
-      <author><first>Miquel</first><last>Esplà</last></author>
-      <author><first>Mikel</first><last>Forcada</last></author>
+      <author id="gema-ramirez-sanchez"><first>Gema</first><last>Ramírez-Sánchez</last></author>
+      <author id="sergio-ortiz-rojas"><first>Sergio Ortiz</first><last>Rojas</last></author>
+      <author id="raphael-rubino"><first>Raphael</first><last>Rubino</last></author>
+      <author id="miquel-espla-gomis"><first>Miquel</first><last>Esplà</last></author>
+      <author id="mikel-l-forcada"><first>Mikel</first><last>Forcada</last></author>
       <author><first>Vassilis</first><last>Papavassiliou</last></author>
       <author><first>Prokopis</first><last>Prokopidis</last></author>
       <author><first>Nikola</first><last>Ljubešić</last></author>
@@ -386,7 +386,7 @@
       <author><first>Masao</first><last>Utiyama</last></author>
       <author><first>Kyo</first><last>Kageura</last></author>
       <author><first>Martin</first><last>Thomas</last></author>
-      <author><first>Anthony</first><last>Hartley</last></author>
+      <author id="anthony-hartley"><first>Anthony</first><last>Hartley</last></author>
       <url hash="da547e7a">2015.eamt-1.46</url>
       <bibkey>utiyama-etal-2015-mnh</bibkey>
     </paper>
@@ -394,7 +394,7 @@
       <title>Smart Computer Aided Translation Environment</title>
       <author><first>Vincent</first><last>Vandeghinste</last></author>
       <author><first>Tom</first><last>Vanallemeersch</last></author>
-      <author><first>Frank</first><last>Van Eynde</last></author>
+      <author id="frank-van-eynde"><first>Frank</first><last>Van Eynde</last></author>
       <author><first>Geert</first><last>Heyman</last></author>
       <author><first>Sien</first><last>Moens</last></author>
       <author><first>Joris</first><last>Pelemans</last></author>
@@ -402,7 +402,7 @@
       <author><first>Iulianna</first><last>Van der Lek - Ciudin</last></author>
       <author><first>Arda</first><last>Tezcan</last></author>
       <author><first>Lieve</first><last>Macken</last></author>
-      <author><first>Véronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Véronique</first><last>Hoste</last></author>
       <author><first>Eva</first><last>Geurts</last></author>
       <author><first>Mieke</first><last>Haesen</last></author>
       <url hash="fce94d23">2015.eamt-1.47</url>
diff --git a/data/xml/2015.iwslt.xml b/data/xml/2015.iwslt.xml
index b2ea2cd245..7a4667e706 100644
--- a/data/xml/2015.iwslt.xml
+++ b/data/xml/2015.iwslt.xml
@@ -13,7 +13,7 @@
     </meta>
     <paper id="1">
       <title>Improving <fixed-case>SMT</fixed-case> by model filtering and phrase embedding</title>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <url hash="acd05f01">2015.iwslt-keynotes.1</url>
       <bibkey>zong-2015-improving</bibkey>
     </paper>
@@ -33,7 +33,7 @@
       <title>The <fixed-case>IWSLT</fixed-case> 2015 Evaluation Campaign</title>
       <author><first>Mauro</first><last>Cettolo</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
-      <author><first>Sebastian</first><last>Stüker</last></author>
+      <author id="sebastian-stuker"><first>Sebastian</first><last>Stüker</last></author>
       <author><first>Luisa</first><last>Bentivogli</last></author>
       <author><first>Roldano</first><last>Cattoni</last></author>
       <author><first>Marcello</first><last>Federico</last></author>
@@ -48,7 +48,7 @@
       <author><first>Stephan</first><last>Peitz</last></author>
       <author><first>Parnia</first><last>Bahar</last></author>
       <author><first>Andreas</first><last>Guta</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>15-22</pages>
       <url hash="74126976">2015.iwslt-evaluation.2</url>
       <bibkey>peter-etal-2015-rwth-aachen</bibkey>
@@ -58,7 +58,7 @@
       <author><first>Michaeel</first><last>Kazi</last></author>
       <author><first>Brian</first><last>Thompson</last></author>
       <author><first>Elizabeth</first><last>Salesky</last></author>
-      <author><first>Timothy</first><last>Anderson</last></author>
+      <author id="tim-anderson"><first>Timothy</first><last>Anderson</last></author>
       <author><first>Grant</first><last>Erdmann</last></author>
       <author><first>Eric</first><last>Hansen</last></author>
       <author><first>Brian</first><last>Ore</last></author>
@@ -86,7 +86,7 @@
       <author><first>Adrià</first><last>Giménez Pastor</last></author>
       <author><first>José Alberto</first><last>Sanchis Navarro</last></author>
       <author><first>Jorge</first><last>Civera Saiz</last></author>
-      <author><first>Alfons</first><last>Juan-Císcar</last></author>
+      <author id="alfons-juan"><first>Alfons</first><last>Juan-Císcar</last></author>
       <pages>39-44</pages>
       <url hash="c51de038">2015.iwslt-evaluation.5</url>
       <bibkey>del-agua-teba-etal-2015-mllp</bibkey>
@@ -106,7 +106,7 @@
       <author><first>Mercedes</first><last>Garcia Martínez</last></author>
       <author><first>Loïc</first><last>Barrault</last></author>
       <author><first>Anthony</first><last>Rousseau</last></author>
-      <author><first>Paul</first><last>Deléglise</last></author>
+      <author id="paul-deleglise"><first>Paul</first><last>Deléglise</last></author>
       <author><first>Yannick</first><last>Estève</last></author>
       <pages>50-54</pages>
       <url hash="52697556">2015.iwslt-evaluation.7</url>
@@ -126,7 +126,7 @@
       <author><first>Jan</first><last>Niehues</last></author>
       <author><first>Eunah</first><last>Cho</last></author>
       <author><first>Mohammed</first><last>Mediani</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>62-69</pages>
       <url hash="a2235f14">2015.iwslt-evaluation.9</url>
       <bibkey>ha-etal-2015-kit-translation</bibkey>
@@ -138,22 +138,22 @@
       <author><first>Matthias</first><last>Sperber</last></author>
       <author><first>Kevin</first><last>Kilgour</last></author>
       <author><first>Sebastian</first><last>Stuker</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>70-75</pages>
       <url hash="5a793e9c">2015.iwslt-evaluation.10</url>
       <bibkey>mueller-etal-2015-2015</bibkey>
     </paper>
     <paper id="11">
       <title><fixed-case>S</fixed-case>tanford neural machine translation systems for spoken language domains</title>
-      <author><first>Minh-Thang</first><last>Luong</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="minh-thang-luong"><first>Minh-Thang</first><last>Luong</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <pages>76-79</pages>
       <url hash="73d44736">2015.iwslt-evaluation.11</url>
       <bibkey>luong-manning-2015-stanford</bibkey>
     </paper>
     <paper id="12">
       <title>The <fixed-case>E</fixed-case>nglish-<fixed-case>V</fixed-case>ietnamese machine translation system for <fixed-case>IWSLT</fixed-case> 2015</title>
-      <author><first>Viet Hong</first><last>Tran</last></author>
+      <author id="viet-hong-tran"><first>Viet Hong</first><last>Tran</last></author>
       <author><first>Huyen Vu</first><last>Thong</last></author>
       <author><first>Nguyen</first><last>Van-Vinh</last></author>
       <author><first>Trung Le</first><last>Tien</last></author>
@@ -184,7 +184,7 @@
       <title>The <fixed-case>JAIST</fixed-case>-<fixed-case>UET</fixed-case>-<fixed-case>MITI</fixed-case> machine translation systems for <fixed-case>IWSLT</fixed-case> 2015</title>
       <author><first>Hai-Long</first><last>Trieu</last></author>
       <author><first>Thanh-Quyen</first><last>Dang</last></author>
-      <author><first>Phuong-Thai</first><last>Nguyen</last></author>
+      <author id="phuong-thai-nguyen"><first>Phuong-Thai</first><last>Nguyen</last></author>
       <author><first>Le-Minh</first><last>Nuyen</last></author>
       <pages>93-100</pages>
       <url hash="9fee8e9a">2015.iwslt-evaluation.15</url>
@@ -240,7 +240,7 @@
     </paper>
     <paper id="2">
       <title>Applying cross-entropy difference for selecting parallel training data from publicly available sources for conversational machine translation</title>
-      <author><first>William</first><last>Lewis</last></author>
+      <author id="william-lewis"><first>William</first><last>Lewis</last></author>
       <author><first>Christian</first><last>Federmann</last></author>
       <author><first>Ying</first><last>Xin</last></author>
       <pages>126-134</pages>
@@ -251,7 +251,7 @@
       <title>Source discriminative word lexicon for translation disambiguation</title>
       <author><first>Teresa</first><last>Herrmann</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>135-142</pages>
       <url hash="ed6e035f">2015.iwslt-papers.3</url>
       <bibkey>herrmann-etal-2015-source</bibkey>
@@ -276,7 +276,7 @@
     <paper id="6">
       <title>Multifeature modular deep neural network acoustic models</title>
       <author><first>Kevin</first><last>Kilgour</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>159-166</pages>
       <url hash="99eca63b">2015.iwslt-papers.6</url>
       <bibkey>kilgour-waibel-2015-multifeature</bibkey>
@@ -284,7 +284,7 @@
     <paper id="7">
       <title>Using language adaptive deep neural networks for improved multilingual speech recognition</title>
       <author><first>Markus</first><last>Mueller</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>167-172</pages>
       <url hash="efda1d73">2015.iwslt-papers.7</url>
       <bibkey>mueller-waibel-2015-using</bibkey>
@@ -294,7 +294,7 @@
       <author><first>Eunah</first><last>Cho</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
       <author><first>Kevin</first><last>Kilgour</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>173-179</pages>
       <url hash="08dd2417">2015.iwslt-papers.8</url>
       <bibkey>cho-etal-2015-punctuation</bibkey>
@@ -303,7 +303,7 @@
       <title>Class-based N-gram language difference models for data selection</title>
       <author><first>Amittai</first><last>Axelrod</last></author>
       <author><first>Yogarshi</first><last>Vyas</last></author>
-      <author><first>Marianna</first><last>Martindale</last></author>
+      <author id="marianna-martindale"><first>Marianna</first><last>Martindale</last></author>
       <author><first>Marine</first><last>Carpuat</last></author>
       <pages>180-187</pages>
       <url hash="de0acde1">2015.iwslt-papers.9</url>
@@ -319,11 +319,11 @@
     </paper>
     <paper id="11">
       <title>An open-source toolkit for word-level confidence estimation in machine translation</title>
-      <author><first>Christophe</first><last>Servan</last></author>
+      <author id="christophe-servan"><first>Christophe</first><last>Servan</last></author>
       <author><first>Ngoc Tien</first><last>Le</last></author>
-      <author><first>Ngoc Quang</first><last>Luong</last></author>
+      <author id="ngoc-quang-luong"><first>Ngoc Quang</first><last>Luong</last></author>
       <author><first>Benjamin</first><last>Lecouteux</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <pages>196-203</pages>
       <url hash="3bcabde3">2015.iwslt-papers.11</url>
       <bibkey>servan-etal-2015-open</bibkey>
@@ -333,7 +333,7 @@
       <author><first>Quoc Truong</first><last>Do</last></author>
       <author><first>Sakriani</first><last>Sakti</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
-      <author><first>Tomoki</first><last>Toda</last></author>
+      <author id="tomoki-toda"><first>Tomoki</first><last>Toda</last></author>
       <author><first>Satoshi</first><last>Nakamura</last></author>
       <pages>204-208</pages>
       <url hash="fcdc9d69">2015.iwslt-papers.12</url>
@@ -349,7 +349,7 @@
     </paper>
     <paper id="14">
       <title>Learning segmentations that balance latency versus quality in spoken language translation</title>
-      <author><first>Hassan</first><last>Shavarani</last></author>
+      <author id="hassan-s-shavarani"><first>Hassan</first><last>Shavarani</last></author>
       <author><first>Maryam</first><last>Siahbani</last></author>
       <author><first>Ramtin Mehdizadeh</first><last>Seraj</last></author>
       <author><first>Anoop</first><last>Sarkar</last></author>
@@ -384,7 +384,7 @@
       <title>Risk-aware distribution of <fixed-case>SMT</fixed-case> outputs for translation of documents targeting many anonymous readers</title>
       <author><first>Yo</first><last>Ehara</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>240-247</pages>
       <url hash="abe9909b">2015.iwslt-papers.17</url>
       <bibkey>ehara-etal-2015-risk</bibkey>
@@ -393,7 +393,7 @@
       <title>Inducing bilingual lexicons from small quantities of sentence-aligned phonemic transcriptions</title>
       <author><first>Oliver</first><last>Adams</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <author><first>Steven</first><last>Bird</last></author>
       <pages>248-255</pages>
       <url hash="9963c786">2015.iwslt-papers.18</url>
diff --git a/data/xml/2015.jeptalnrecital.xml b/data/xml/2015.jeptalnrecital.xml
index c7b7447739..a2c6fe32f1 100644
--- a/data/xml/2015.jeptalnrecital.xml
+++ b/data/xml/2015.jeptalnrecital.xml
@@ -37,9 +37,9 @@
     <paper id="3">
       <title>Identification de facteurs de risque pour des patients diabétiques à partir de comptes-rendus cliniques par des approches hybrides</title>
       <author><first>Cyril</first><last>Grouin</last></author>
-      <author><first>Véronique</first><last>Moriceau</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="veronique-moriceau"><first>Véronique</first><last>Moriceau</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <pages>25–36</pages>
       <abstract>Dans cet article, nous présentons les méthodes que nous avons développées pour analyser des comptes- rendus hospitaliers rédigés en anglais. L’objectif de cette étude consiste à identifier les facteurs de risque de décès pour des patients diabétiques et à positionner les événements médicaux décrits par rapport à la date de création de chaque document. Notre approche repose sur (i) HeidelTime pour identifier les expressions temporelles, (ii) des CRF complétés par des règles de post-traitement pour identifier les traitements, les maladies et facteurs de risque, et (iii) des règles pour positionner temporellement chaque événement médical. Sur un corpus de 514 documents, nous obtenons une F-mesure globale de 0,8451. Nous observons que l’identification des informations directement mentionnées dans les documents se révèle plus performante que l’inférence d’informations à partir de résultats de laboratoire.</abstract>
       <url hash="e577d861">2015.jeptalnrecital-long.3</url>
@@ -60,7 +60,7 @@
     <paper id="5">
       <title>Analyse d’expressions temporelles dans les dossiers électroniques patients</title>
       <author><first>Mike Donald Tapi</first><last>Nzali</last></author>
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <author><first>Xavier</first><last>Tannier</last></author>
       <pages>49–58</pages>
       <abstract>Les références à des phénomènes du monde réel et à leur caractérisation temporelle se retrouvent dans beaucoup de types de discours en langue naturelle. Ainsi, l’analyse temporelle apparaît comme un élément important en traitement automatique de la langue. Cet article présente une analyse de textes en domaine de spécialité du point de vue temporel. En s’appuyant sur un corpus de documents issus de plusieurs dossiers électroniques patient désidentifiés, nous décrivons la construction d’une ressource annotée en expressions temporelles selon la norme TimeML. Par suite, nous utilisons cette ressource pour évaluer plusieurs méthodes d’extraction automatique d’expressions temporelles adaptées au domaine médical. Notre meilleur système statistique offre une performance de 0,91 de F-mesure, surpassant pour l’identification le système état de l’art HeidelTime. La comparaison de notre corpus de travail avec le corpus journalistique FR-Timebank permet également de caractériser les différences d’utilisation des expressions temporelles dans deux domaines de spécialité.</abstract>
@@ -71,7 +71,7 @@
       <title>Compréhension automatique de la parole sans données de référence</title>
       <author><first>Emmanuel</first><last>Ferreira</last></author>
       <author><first>Bassam</first><last>Jabaian</last></author>
-      <author><first>Fabrice</first><last>Lefèvre</last></author>
+      <author id="fabrice-lefevre"><first>Fabrice</first><last>Lefèvre</last></author>
       <pages>59–70</pages>
       <abstract>La majorité des méthodes état de l’art en compréhension automatique de la parole ont en commun de devoir être apprises sur une grande quantité de données annotées. Cette dépendance aux données constitue un réel obstacle lors du développement d’un système pour une nouvelle tâche/langue. Aussi, dans cette étude, nous présentons une méthode visant à limiter ce besoin par un mécanisme d’apprentissage sans données de référence (zero-shot learning). Cette méthode combine une description ontologique minimale de la tâche visée avec l’utilisation d’un espace sémantique continu appris par des approches à base de réseaux de neurones à partir de données génériques non-annotées. Nous montrons que le modèle simple et peu coûteux obtenu peut atteindre, dès le démarrage, des performances comparables à celles des systèmes état de l’art reposant sur des règles expertes ou sur des approches probabilistes sur des tâches de compréhension de la parole de référence (tests des Dialog State Tracking Challenges, DSTC2 et DSTC3). Nous proposons ensuite une stratégie d’adaptation en ligne permettant d’améliorer encore les performances de notre approche à l’aide d’une supervision faible et ajustable par l’utilisateur.</abstract>
       <url hash="c78462a4">2015.jeptalnrecital-long.6</url>
@@ -92,7 +92,7 @@
       <title>Création rapide et efficace d’un système de désambiguïsation lexicale pour une langue peu dotée</title>
       <author><first>Mohammad</first><last>Nasiruddin</last></author>
       <author><first>Andon</first><last>Tchechmedjiev</last></author>
-      <author><first>Hervé</first><last>Blanchon</last></author>
+      <author id="herve-blanchon"><first>Hervé</first><last>Blanchon</last></author>
       <author><first>Didier</first><last>Schwab</last></author>
       <pages>83–94</pages>
       <abstract>Nous présentons une méthode pour créer rapidement un système de désambiguïsation lexicale (DL) pour une langue L peu dotée pourvu que l’on dispose d’un système de traduction automatique statistique (TAS) d’une langue riche en corpus annotés en sens (ici l’anglais) vers L. Il est, en effet, plus facile de disposer des ressources nécessaires à la création d’un système de TAS que des ressources dédiées nécessaires à la création d’un système de DL pour la langue L. Notre méthode consiste à traduire automatiquement un corpus annoté en sens vers la langue L, puis de créer le système de désambiguïsation pour L par des méthodes supervisées classiques. Nous montrons la faisabilité de la méthode et sa généricité en traduisant le SemCor, un corpus en anglais annoté grâce au Princeton WordNet, de l’anglais vers le bangla et de l’anglais vers le français. Nous montrons la validité de l’approche en évaluant les résultats sur la tâche de désambiguïsation lexicale multilingue de Semeval 2013.</abstract>
@@ -166,7 +166,7 @@
     <paper id="16">
       <title>Extraction automatique de paraphrases grand public pour les termes médicaux</title>
       <author><first>Natalia</first><last>Grabar</last></author>
-      <author><first>Thierry</first><last>Hamon</last></author>
+      <author id="thierry-hamon"><first>Thierry</first><last>Hamon</last></author>
       <pages>182–195</pages>
       <abstract>Nous sommes tous concernés par notre état de santé et restons sensibles aux informations de santé disponibles dans la société moderne à travers par exemple les résultats des recherches scientifiques, les médias sociaux de santé, les documents cliniques, les émissions de télé et de radio ou les nouvelles. Cependant, il est commun de rencontrer dans le domaine médical des termes très spécifiques (e.g., blépharospasme, alexitymie, appendicectomie), qui restent difficiles à comprendre par les non spécialistes. Nous proposons une méthode automatique qui vise l’acquisition de paraphrases pour les termes médicaux, qui soient plus faciles à comprendre que les termes originaux. La méthode est basée sur l’analyse morphologique des termes, l’analyse syntaxique et la fouille de textes non spécialisés. L’analyse et l’évaluation des résultats indiquent que de telles paraphrases peuvent être trouvées dans les documents non spécialisés et présentent une compréhension plus facile. En fonction des paramètres de la méthode, la précision varie entre 86 et 55
 
@@ -179,7 +179,7 @@
       <author><first>Gaël</first><last>Guibon</last></author>
       <author><first>Isabelle</first><last>Tellier</last></author>
       <author><first>Sophie</first><last>Prévost</last></author>
-      <author><first>Matthieu</first><last>Constant</last></author>
+      <author id="matthieu-constant"><first>Matthieu</first><last>Constant</last></author>
       <author><first>Kim</first><last>Gerdes</last></author>
       <pages>196–207</pages>
       <abstract>L’article présente des résultats d’expériences d’apprentissage automatique pour l’étiquetage morpho-syntaxique et l’analyse syntaxique en dépendance de l’ancien français. Ces expériences ont pour objectif de servir une exploration de corpus pour laquelle le corpus arboré SRCMF sert de données de référence. La nature peu standardisée de la langue qui y est utilisée implique des données d’entraînement hétérogènes et quantitativement limitées. Nous explorons donc diverses stratégies, fondées sur différents critères (variabilité du lexique, forme Vers/Prose des textes, dates des textes), pour constituer des corpus d’entrainement menant aux meilleurs résultats possibles.</abstract>
@@ -199,9 +199,9 @@
     <paper id="19">
       <title>Mesurer la similarité entre phrases grâce à Wikipédia en utilisant une indexation aléatoire</title>
       <author><first>Hai Hieu</first><last>Vu</last></author>
-      <author><first>Jeanne</first><last>Villaneau</last></author>
+      <author id="jeanne-villaneau"><first>Jeanne</first><last>Villaneau</last></author>
       <author><first>Farida</first><last>Saïd</last></author>
-      <author><first>Pierre-François</first><last>Marteau</last></author>
+      <author id="pierre-francois-marteau"><first>Pierre-François</first><last>Marteau</last></author>
       <pages>220–231</pages>
       <abstract>Cet article présente une méthode pour mesurer la similarité sémantique entre phrases qui utilise Wikipédia comme unique ressource linguistique et qui est, de ce fait, utilisable pour un grand nombre de langues. Basée sur une représentation vectorielle, elle utilise une indexation aléatoire pour réduire la dimension des espaces manipulés. En outre, elle inclut une technique de calcul des vecteurs de termes qui corrige les défauts engendrés par l’utilisation d’un corpus aussi général que Wikipédia. Le système a été évalué sur les données de SemEval 2014 en anglais avec des résultats très encourageants, au-dessus du niveau moyen des systèmes en compétition. Il a également été testé sur un ensemble de paires de phrases en français, à partir de ressources que nous avons construites et qui seront mises à la libre disposition de la communauté scientifique.</abstract>
       <url hash="6e664a27">2015.jeptalnrecital-long.19</url>
@@ -219,7 +219,7 @@
     </paper>
     <paper id="21">
       <title>Utilisation de mesures de confiance pour améliorer le décodage en traduction de parole</title>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <author><first>Benjamin</first><last>Lecouteux</last></author>
       <author><first>Luong Ngoc</first><last>Quang</last></author>
       <pages>244–254</pages>
@@ -229,7 +229,7 @@
     </paper>
     <paper id="22">
       <title>Multialignement vs bialignement : à plusieurs, c’est mieux !</title>
-      <author><first>Olivier</first><last>Kraif</last></author>
+      <author id="olivier-kraif"><first>Olivier</first><last>Kraif</last></author>
       <pages>255–266</pages>
       <abstract>Dans cet article, nous proposons une méthode originale destinée à effectuer l’alignement d’un corpus multiparallèle, i.e. comportant plus de deux langues, en prenant en compte toutes les langues simultanément (et non en composant une série de bialignements indépendants). Pour ce faire, nous nous appuyons sur les réseaux de correspondances lexicales constitués par les transfuges (chaînes identiques) et cognats (mots apparentés), et nous montrons comment divers tuilages des couples de langues permettent d’exploiter au mieux les ressemblances superficielles liées aux relations génétiques interlinguistiques. Nous évaluons notre méthode par rapport à une méthode de bialignement classique, et montrons en quoi le multialignement permet d’obtenir des résultats à la fois plus précis et plus robustes.</abstract>
       <url hash="fd379c17">2015.jeptalnrecital-long.22</url>
@@ -237,7 +237,7 @@
     </paper>
     <paper id="23">
       <title>Apprentissage discriminant des modèles continus de traduction</title>
-      <author><first>Quoc-Khanh</first><last>Do</last></author>
+      <author id="quoc-khanh-do"><first>Quoc-Khanh</first><last>Do</last></author>
       <author><first>Alexandre</first><last>Allauzen</last></author>
       <author><first>François</first><last>Yvon</last></author>
       <pages>267–278</pages>
@@ -248,7 +248,7 @@
     <paper id="24">
       <title>Utiliser les interjections pour détecter les émotions</title>
       <author><first>Amel</first><last>Fraisse</last></author>
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <pages>279–290</pages>
       <abstract>Bien que les interjections soient un phénomène linguistique connu, elles ont été peu étudiées et cela continue d’être le cas pour les travaux sur les microblogs. Des travaux en analyse de sentiments ont montré l’intérêt des émoticônes et récemment des mots-dièses, qui s’avèrent être très utiles pour la classification en polarité. Mais malgré leur statut grammatical et leur richesse sémantique, les interjections sont restées marginalisées par les systèmes d’analyse de sentiments. Nous montrons dans cet article l’apport majeur des interjections pour la détection des émotions. Nous détaillons la production automatique, basée sur les interjections, d’un corpus étiqueté avec les émotions. Nous expliquons ensuite comment nous avons utilisé ce corpus pour en déduire, automatiquement, un lexique affectif pour le français. Ce lexique a été évalué sur une tâche de détection des émotions, qui a montré un gain en mesure F1 allant, selon les émotions, de +0,04 à +0,21.</abstract>
       <url hash="69d9974d">2015.jeptalnrecital-long.24</url>
@@ -257,7 +257,7 @@
     <paper id="25">
       <title>Comparaison d’architectures neuronales pour l’analyse syntaxique en constituants</title>
       <author><first>Maximin</first><last>Coavoux</last></author>
-      <author><first>Benoît</first><last>Crabbé</last></author>
+      <author id="benoit-crabbe"><first>Benoît</first><last>Crabbé</last></author>
       <pages>291–302</pages>
       <abstract>L’article traite de l’analyse syntaxique lexicalisée pour les grammaires de constituants. On se place dans le cadre de l’analyse par transitions. Les modèles statistiques généralement utilisés pour cette tâche s’appuient sur une représentation non structurée du lexique. Les mots du vocabulaire sont représentés par des symboles discrets sans liens entre eux. À la place, nous proposons d’utiliser des représentations denses du type plongements (embeddings) qui permettent de modéliser la similarité entre symboles, c’est-à-dire entre mots, entre parties du discours et entre catégories syntagmatiques. Nous proposons d’adapter le modèle statistique sous-jacent à ces nouvelles représentations. L’article propose une étude de 3 architectures neuronales de complexité croissante et montre que l’utilisation d’une couche cachée non-linéaire permet de tirer parti des informations données par les plongements.</abstract>
       <url hash="4ba08c76">2015.jeptalnrecital-long.25</url>
@@ -266,7 +266,7 @@
     <paper id="26">
       <title>...des conférences enfin disons des causeries... Détection automatique de segments en relation de paraphrase dans les reformulations de corpus oraux</title>
       <author><first>Natalia</first><last>Grabar</last></author>
-      <author><first>Iris</first><last>Eshkol</last></author>
+      <author id="iris-eshkol"><first>Iris</first><last>Eshkol</last></author>
       <pages>303–316</pages>
       <abstract>Notre travail porte sur la détection automatique des segments en relation de reformulation paraphrastique dans les corpus oraux. L’approche proposée est une approche syntagmatique qui tient compte des marqueurs de reformulation paraphrastique et des spécificités de l’oral. Les données de référence sont consensuelles. Une méthode automatique fondée sur l’apprentissage avec les CRF est proposée afin de détecter les segments paraphrasés. Différents descripteurs sont exploités dans une fenêtre de taille variable. Les tests effectués montrent que les segments en relation de paraphrase sont assez difficiles à détecter, surtout avec leurs frontières correctes. Les meilleures moyennes atteignent 0,65 de F-mesure, 0,75 de précision et 0,63 de rappel. Nous avons plusieurs perspectives à ce travail pour améliorer la détection des segments en relation de paraphrase et pour étudier les données depuis d’autres points de vue.</abstract>
       <url hash="e7bf6847">2015.jeptalnrecital-long.26</url>
@@ -312,7 +312,7 @@
     <paper id="3">
       <title>Vous aimez ?...ou pas ? <fixed-case>L</fixed-case>ike<fixed-case>I</fixed-case>t, un jeu pour construire une ressource lexicale de polarité</title>
       <author><first>Mathieu</first><last>Lafourcade</last></author>
-      <author><first>Nathalie Le</first><last>Brun</last></author>
+      <author id="nathalie-le-brun"><first>Nathalie Le</first><last>Brun</last></author>
       <author><first>Alain</first><last>Joubert</last></author>
       <pages>14–20</pages>
       <abstract>En analyse de discours ou d’opinion, savoir caractériser la connotation générale d’un texte, les sentiments qu’il véhicule, est une aptitude recherchée, qui suppose la constitution préalable d’une ressource lexicale de polarité. Au sein du réseau lexical JeuxDeMots, nous avons mis au point LikeIt, un jeu qui permet d’affecter une valeur positive, négative, ou neutre à un terme, et de constituer ainsi pour chaque terme, à partir des votes, une polarité résultante. Nous présentons ici l’analyse quantitative des données de polarité obtenues, ainsi que la méthode pour les valider qualitativement.</abstract>
@@ -323,7 +323,7 @@
       <title>Étude des verbes introducteurs de noms de médicaments dans les forums de santé</title>
       <author><first>François</first><last>Morlane-Hondère</last></author>
       <author><first>Cyril</first><last>Grouin</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <pages>21–27</pages>
       <abstract>Dans cet article, nous combinons annotations manuelle et automatique pour identifier les verbes utilisés pour introduire un médicament dans les messages sur les forums de santé. Cette information est notamment utile pour identifier la relation entre un médicament et un effet secondaire. La mention d’un médicament dans un message ne garantit pas que l’utilisateur a pris ce traitement mais qu’il effectue un retour. Nous montrons ensuite que ces verbes peuvent servir pour extraire automatiquement des variantes de noms de médicaments. Nous estimons que l’analyse de ces variantes pourrait permettre de modéliser les erreurs faites par les usagers des forums lorsqu’ils écrivent les noms de médicaments, et améliorer en conséquence les systèmes de recherche d’information.</abstract>
       <url hash="a4505c61">2015.jeptalnrecital-court.4</url>
@@ -333,7 +333,7 @@
       <title>Initialisation de Réseaux de Neurones à l’aide d’un Espace Thématique</title>
       <author><first>Mohamed</first><last>Morchid</last></author>
       <author><first>Richard</first><last>Dufour</last></author>
-      <author><first>Georges</first><last>Linarès</last></author>
+      <author id="georges-linares"><first>Georges</first><last>Linarès</last></author>
       <pages>28–33</pages>
       <abstract>Ce papier présente une méthode de traitement de documents parlés intégrant une représentation fondée sur un espace thématique dans un réseau de neurones artificiels (ANN) employé comme classifieur de document. La méthode proposée consiste à configurer la topologie d’un ANN ainsi que d’initialiser les connexions de celui-ci à l’aide des espaces thématiques appris précédemment. Il est attendu que l’initialisation fondée sur les probabilités thématiques permette d’optimiser le processus d’optimisation des poids du réseau ainsi qu’à accélérer la phase d’apprentissage tout en amélioration la précision de la classification d’un document de test. Cette méthode est évaluée lors d’une tâche de catégorisation de dialogues parlés entre des utilisateurs et des agents du service d’appels de la Régie Autonome Des Transports Parisiens (RATP). Les résultats montrent l’intérêt de la méthode proposée d’initialisation d’un réseau, avec un gain observé de plus de 4 points en termes de bonne classification comparativement à l’initialisation aléatoire. De plus, les expérimentations soulignent que les performances sont faiblement dépendantes de la topologie du ANN lorsque les poids de la couche cachée sont initialisés au moyen des espaces de thèmes issus d’une allocation latente de Dirichlet ou latent Dirichlet Allocation (LDA) en comparaison à une initialisation empirique.</abstract>
       <url hash="046cc7b0">2015.jeptalnrecital-court.5</url>
@@ -387,7 +387,7 @@
     </paper>
     <paper id="11">
       <title>Adaptation par enrichissement terminologique en traduction automatique statistique fondée sur la génération et le filtrage de bi-segments virtuels</title>
-      <author><first>Christophe</first><last>Servan</last></author>
+      <author id="christophe-servan"><first>Christophe</first><last>Servan</last></author>
       <author><first>Marc</first><last>Dymetman</last></author>
       <pages>68–74</pages>
       <abstract>Nous présentons des travaux préliminaires sur une approche permettant d’ajouter des termes bilingues à un système de Traduction Automatique Statistique (TAS) à base de segments. Les termes sont non seulement inclus individuellement, mais aussi avec des contextes les englobant. Tout d’abord nous générons ces contextes en généralisant des motifs (ou patrons) observés pour des mots de même nature syntaxique dans un corpus bilingue. Enfin, nous filtrons les contextes qui n’atteignent pas un certain seuil de confiance, à l’aide d’une méthode de sélection de bi-segments inspirée d’une approche de sélection de données, précédemment appliquée à des textes bilingues alignés.</abstract>
@@ -442,7 +442,7 @@
       <title><fixed-case>CANÉPHORE</fixed-case> : un corpus français pour la fouille d’opinion ciblée</title>
       <author><first>Joseph</first><last>Lark</last></author>
       <author><first>Emmanuel</first><last>Morin</last></author>
-      <author><first>Sebastián Peña</first><last>Saldarriaga</last></author>
+      <author id="sebastian-pena-saldarriaga"><first>Sebastián Peña</first><last>Saldarriaga</last></author>
       <pages>102–108</pages>
       <abstract>La fouille d’opinion ciblée (aspect-based sentiment analysis) fait l’objet ces dernières années d’un intérêt particulier, visible dans les sujets des récentes campagnes d’évaluation comme SemEval 2014 et 2015 ou bien DEFT 2015. Cependant les corpus annotés et publiquement disponibles permettant l’évaluation de cette tâche sont rares. Dans ce travail nous présentons en premier lieu un corpus français librement accessible de 10 000 tweets manuellement annotés. Nous accompagnons ce corpus de résultats de référence pour l’extraction de marqueurs d’opinion non supervisée. Nous présentons ensuite une méthode améliorant les résultats de cette extraction, en suivant une approche semi-supervisée.</abstract>
       <url hash="9c828bd6">2015.jeptalnrecital-court.16</url>
@@ -452,7 +452,7 @@
       <title>Extraction de Contextes Riches en Connaissances en corpus spécialisés</title>
       <author><first>Firas</first><last>Hmida</last></author>
       <author><first>Emmanuel</first><last>Morin</last></author>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <pages>109–115</pages>
       <abstract>Les banques terminologiques et les dictionnaires sont des ressources précieuses qui facilitent l’accès aux connaissances des domaines spécialisés. Ces ressources sont souvent assez pauvres et ne proposent pas toujours pour un terme à illustrer des exemples permettant d’appréhender le sens et l’usage de ce terme. Dans ce contexte, nous proposons de mettre en œuvre la notion de Contextes Riches en Connaissances (CRC) pour extraire directement de corpus spécialisés des exemples de contextes illustrant son usage. Nous définissons un cadre unifié pour exploiter tout à la fois des patrons de connaissances et des collocations avec une qualité acceptable pour une révision humaine.</abstract>
       <url hash="acbfe0b1">2015.jeptalnrecital-court.17</url>
@@ -480,7 +480,7 @@
     <paper id="20">
       <title>Vers un diagnostic d’ambiguïté des termes candidats d’un texte</title>
       <author><first>Gaël</first><last>Lejeune</last></author>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <pages>130–136</pages>
       <abstract>Les recherches autour de la désambiguïsation sémantique traitent de la question du sens à accorder à différentes occurrences d’un mot ou plus largement d’une unité lexicale. Dans cet article, nous nous intéressons à l’ambiguïté d’un terme en domaine de spécialité. Nous posons les premiers jalons de nos recherches sur une question connexe que nous nommons le diagnostic d’ambiguïté. Cette tâche consiste à décider si une occurrence d’un terme est ou n’est pas ambiguë. Nous mettons en œuvre une approche d’apprentissage supervisée qui exploite un corpus d’articles de sciences humaines rédigés en français dans lequel les termes ambigus ont été détectés par des experts. Le diagnostic s’appuie sur deux types de traits : syntaxiques et positionnels. Nous montrons l’intérêt de la structuration du texte pour établir le diagnostic d’ambiguïté.</abstract>
       <url hash="958607b7">2015.jeptalnrecital-court.20</url>
@@ -499,9 +499,9 @@
       <title>Détection automatique de l’ironie dans les tweets en français</title>
       <author><first>Jihen</first><last>Karoui</last></author>
       <author><first>Farah Benamara</first><last>Zitoune</last></author>
-      <author><first>Véronique</first><last>Moriceau</last></author>
+      <author id="veronique-moriceau"><first>Véronique</first><last>Moriceau</last></author>
       <author><first>Nathalie</first><last>Aussenac-Gilles</last></author>
-      <author><first>Lamia Hadrich</first><last>Belguith</last></author>
+      <author id="lamia-hadrich-belguith"><first>Lamia Hadrich</first><last>Belguith</last></author>
       <pages>144–149</pages>
       <abstract>Cet article présente une méthode par apprentissage supervisé pour la détection de l’ironie dans les tweets en français. Un classifieur binaire utilise des traits de l’état de l’art dont les performances sont reconnues, ainsi que de nouveaux traits issus de notre étude de corpus. En particulier, nous nous sommes intéressés à la négation et aux oppositions explicites/implicites entre des expressions d’opinion ayant des polarités différentes. Les résultats obtenus sont encourageants.</abstract>
       <url hash="9c36f9bc">2015.jeptalnrecital-court.22</url>
@@ -540,7 +540,7 @@
     </paper>
     <paper id="26">
       <title>Entre écrit et oral ? Analyse comparée de conversations de type tchat et de conversations téléphoniques dans un centre de contact client</title>
-      <author><first>Géraldine</first><last>Damnati</last></author>
+      <author id="geraldine-damnati"><first>Géraldine</first><last>Damnati</last></author>
       <author><first>Aleksandra</first><last>Guerraz</last></author>
       <author><first>Delphine</first><last>Charlet</last></author>
       <pages>171–177</pages>
@@ -560,7 +560,7 @@
       <title>Utilisation d’annotations sémantiques pour la validation automatique d’hypothèses dans des conversations téléphoniques</title>
       <author><first>Carole</first><last>Lailler</last></author>
       <author><first>Yannick</first><last>Estève</last></author>
-      <author><first>Renato</first><last>De Mori</last></author>
+      <author id="renato-de-mori"><first>Renato</first><last>De Mori</last></author>
       <author><first>Mohamed</first><last>Bouallègue</last></author>
       <author><first>Mohamed</first><last>Morchid</last></author>
       <pages>185–191</pages>
@@ -572,7 +572,7 @@
       <title>Etiquetage morpho-syntaxique en domaine de spécialité: le domaine médical</title>
       <author><first>Christelle</first><last>Rabary</last></author>
       <author><first>Thomas</first><last>Lavergne</last></author>
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <pages>192–198</pages>
       <abstract>L’étiquetage morpho-syntaxique est une tâche fondamentale du Traitement Automatique de la Langue, sur laquelle reposent souvent des traitements plus complexes tels que l’extraction d’information ou la traduction automatique. L’étiquetage en domaine de spécialité est limité par la disponibilité d’outils et de corpus annotés spécifiques au domaine. Dans cet article, nous présentons le développement d’un corpus clinique du français annoté morpho-syntaxiquement à l’aide d’un jeu d’étiquettes issus des guides d’annotation French Treebank et Multitag. L’analyse de ce corpus nous permet de caractériser le domaine clinique et de dégager les points clés pour l’adaptation d’outils d’analyse morpho-syntaxique à ce domaine. Nous montrons également les limites d’un outil entraîné sur un corpus journalistique appliqué au domaine clinique. En perspective de ce travail, nous envisageons une application du corpus clinique annoté pour améliorer l’étiquetage morpho-syntaxique des documents cliniques en français.</abstract>
       <url hash="a1f18ddc">2015.jeptalnrecital-court.29</url>
@@ -581,7 +581,7 @@
     <paper id="30">
       <title>Vers une typologie de liens entre contenus journalistiques</title>
       <author><first>Remi</first><last>Bois</last></author>
-      <author><first>Guillaume</first><last>Gravier</last></author>
+      <author id="guillaume-gravier"><first>Guillaume</first><last>Gravier</last></author>
       <author><first>Emmanuel</first><last>Morin</last></author>
       <author><first>Pascale</first><last>Sébillot</last></author>
       <pages>199–205</pages>
@@ -602,7 +602,7 @@
       <title>Utilisation des réseaux de neurones récurrents pour la projection interlingue d’étiquettes morpho-syntaxiques à partir d’un corpus parallèle</title>
       <author><first>Othman</first><last>Zennaki</last></author>
       <author><first>Nasredine</first><last>Semmar</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <pages>213–220</pages>
       <abstract>La construction d’outils d’analyse linguistique pour les langues faiblement dotées est limitée, entre autres, par le manque de corpus annotés. Dans cet article, nous proposons une méthode pour construire automatiquement des outils d’analyse via une projection interlingue d’annotations linguistiques en utilisant des corpus parallèles. Notre approche n’utilise pas d’autres sources d’information, ce qui la rend applicable à un large éventail de langues peu dotées. Nous proposons d’utiliser les réseaux de neurones récurrents pour projeter les annotations d’une langue à une autre (sans utiliser d’information d’alignement des mots). Dans un premier temps, nous explorons la tâche d’annotation morpho-syntaxique. Notre méthode combinée avec une méthode de projection d’annotation basique (utilisant l’alignement mot à mot), donne des résultats comparables à ceux de l’état de l’art sur une tâche similaire.</abstract>
       <url hash="dd136c9f">2015.jeptalnrecital-court.32</url>
@@ -610,8 +610,8 @@
     </paper>
     <paper id="33">
       <title>Segmentation et Titrage Automatique de Journaux Télévisés</title>
-      <author><first>Abdessalam</first><last>Bouchekif</last></author>
-      <author><first>Géraldine</first><last>Damnati</last></author>
+      <author id="abdessalam-bouchekif"><first>Abdessalam</first><last>Bouchekif</last></author>
+      <author id="geraldine-damnati"><first>Géraldine</first><last>Damnati</last></author>
       <author><first>Nathalie</first><last>Camelin</last></author>
       <author><first>Yannick</first><last>Estève</last></author>
       <author><first>Delphine</first><last>Charlet</last></author>
@@ -623,7 +623,7 @@
     <paper id="34">
       <title>Un système hybride pour l’analyse de sentiments associés aux aspects</title>
       <author><first>Caroline</first><last>Brun</last></author>
-      <author><first>Diana Nicoleta</first><last>Popa</last></author>
+      <author id="diana-nicoleta-popa"><first>Diana Nicoleta</first><last>Popa</last></author>
       <author><first>Claude</first><last>Roux</last></author>
       <pages>228–234</pages>
       <abstract>Cet article présente en détails notre participation à la tâche 4 de SemEval2014 (Analyse de Sentiments associés aux Aspects). Nous présentons la tâche et décrivons précisément notre système qui consiste en une combinaison de composants linguistiques et de modules de classification. Nous exposons ensuite les résultats de son évaluation, ainsi que les résultats des meilleurs systèmes. Nous concluons par la présentation de quelques nouvelles expériences réalisées en vue de l’amélioration de ce système.</abstract>
@@ -682,8 +682,8 @@
       <title>Médicaments qui soignent, médicaments qui rendent malades : étude des relations causales pour identifier les effets secondaires</title>
       <author><first>François</first><last>Morlane-Hondère</last></author>
       <author><first>Cyril</first><last>Grouin</last></author>
-      <author><first>Véronique</first><last>Moriceau</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="veronique-moriceau"><first>Véronique</first><last>Moriceau</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <pages>270–276</pages>
       <abstract>Dans cet article, nous nous intéressons à la manière dont sont exprimés les liens qui existent entre un traitement médical et un effet secondaire. Parce que les patients se tournent en priorité vers internet, nous fondons cette étude sur un corpus annoté de messages issus de forums de santé en français. L’objectif de ce travail consiste à mettre en évidence des éléments linguistiques (connecteurs logiques et expressions temporelles) qui pourraient être utiles pour des systèmes automatiques de repérage des effets secondaires. Nous observons que les modalités d’écriture sur les forums ne permettent pas de se fonder sur les expressions temporelles. En revanche, les connecteurs logiques semblent utiles pour identifier les effets secondaires.</abstract>
       <url hash="ffdf2202">2015.jeptalnrecital-court.40</url>
@@ -702,7 +702,7 @@
       <author><first>Killian</first><last>Janod</last></author>
       <author><first>Mohamed</first><last>Morchid</last></author>
       <author><first>Richard</first><last>Dufour</last></author>
-      <author><first>Georges</first><last>Linares</last></author>
+      <author id="georges-linares"><first>Georges</first><last>Linares</last></author>
       <pages>284–290</pages>
       <abstract>Les représentations vectorielles continues des mots sont en plein essor et ont déjà été appliquées avec succès à de nombreuses tâches en traitement automatique de la langue (TAL). Dans cet article, nous proposons d’intégrer l’information temporelle issue du contexte des mots au sein des architectures fondées sur les sacs-de-mots continus (continuous bag-of-words ou CBOW) ou sur les Skip-Grams. Ces approches sont manipulées au travers d’un réseau de neurones, l’architecture CBOW cherchant alors à prédire un mot sachant son contexte, alors que l’architecture Skip-Gram prédit un contexte sachant un mot. Cependant, ces modèles, au travers du réseau de neurones, s’appuient sur des représentations en sac-de-mots et ne tiennent pas compte, explicitement, de l’ordre des mots. En conséquence, chaque mot a potentiellement la même influence dans le réseau de neurones. Nous proposons alors une méthode originale qui intègre l’information temporelle des contextes des mots en utilisant leur position relative. Cette méthode s’inspire des modèles contextuels continus. L’information temporelle est traitée comme coefficient de pondération, en entrée du réseau de neurones par le CBOW et dans la couche de sortie par le Skip-Gram. Les premières expériences ont été réalisées en utilisant un corpus de test mesurant la qualité de la relation sémantique-syntactique des mots. Les résultats préliminaires obtenus montrent l’apport du contexte des mots, avec des gains de 7 et 7,7 points respectivement avec l’architecture Skip-Gram et l’architecture CBOW.</abstract>
       <url hash="4cd158c2">2015.jeptalnrecital-court.42</url>
@@ -721,7 +721,7 @@
     </paper>
     <paper id="44">
       <title>Caractériser les discours académiques et de vulgarisation : quelles propriétés ?</title>
-      <author><first>Amalia</first><last>Todirascu</last></author>
+      <author id="amalia-todirascu"><first>Amalia</first><last>Todirascu</last></author>
       <author><first>Beatriz Sanchez</first><last>Cardenas</last></author>
       <pages>298–304</pages>
       <abstract>L’article présente une étude des propriétés linguistiques (lexicales, morpho-syntaxiques, syntaxiques) permettant la classification automatique de documents selon leur genre (articles scientifiques et articles de vulgarisation), dans deux domaines différentes (médecine et informatique). Notre analyse, effectuée sur des corpus comparables en genre et en thèmes disponibles en français, permet de valider certaines propriétés identifiées dans la littérature comme caractéristiques des discours académiques ou de vulgarisation scientifique. Les premières expériences de classification évaluent l’influence de ces propriétés pour l’identification automatique du genre pour le cas spécifique des textes scientifiques ou de vulgarisation.</abstract>
@@ -842,10 +842,10 @@
       <title>Un patient virtuel dialogant</title>
       <author><first>Leonardo</first><last>Campillos</last></author>
       <author><first>Dhouha</first><last>Bouamor</last></author>
-      <author><first>Éric</first><last>Bilinski</last></author>
+      <author id="eric-bilinski"><first>Éric</first><last>Bilinski</last></author>
       <author><first>Anne-Laure</first><last>Ligozat</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <pages>16–17</pages>
       <abstract>Le démonstrateur que nous décrivons ici est un prototype de système de dialogue dont l’objectif est de simuler un patient. Nous décrivons son fonctionnement général en insistant sur les aspects concernant la langue et surtout le rapport entre langue médicale de spécialité et langue générale.</abstract>
       <url hash="e4017619">2015.jeptalnrecital-demonstration.8</url>
@@ -874,8 +874,8 @@
       <author><first>Cécile</first><last>Robin</last></author>
       <author><first>André</first><last>Bittar</last></author>
       <author><first>Xabier</first><last>Larrucea</last></author>
-      <author><first>Frédérique</first><last>Segond</last></author>
-      <author><first>Marie-Hélène</first><last>Metzger</last></author>
+      <author id="frederique-segond"><first>Frédérique</first><last>Segond</last></author>
+      <author id="marie-helene-metzger"><first>Marie-Hélène</first><last>Metzger</last></author>
       <pages>23–24</pages>
       <abstract>Le projet européen TIER (Integrated strategy for CBRN – Chemical, Biological, Radiological and Nuclear – Threat Identification and Emergency Response) vise à intégrer une stratégie complète et intégrée pour la réponse d’urgence dans un contexte de dangers biologiques, chimiques, radiologiques, nucléaires, ou liés aux explosifs, basée sur l’identification des menaces et d’évaluation des risques. Dans cet article, nous nous focalisons sur les risques biologiques. Nous présentons notre système expert fondé sur une analyse sémantique, permettant l’extraction de données structurées à partir de données non structurées dans le but de raisonner.</abstract>
       <url hash="629b4f63">2015.jeptalnrecital-demonstration.11</url>
@@ -917,7 +917,7 @@
     </paper>
     <paper id="2">
       <title>Pourquoi construire des ressources terminologiques et pourquoi le faire différemment ?</title>
-      <author><first>Marie-Claude</first><last>L’Homme</last></author>
+      <author id="marie-claude-lhomme"><first>Marie-Claude</first><last>L’Homme</last></author>
       <pages>2–2</pages>
       <abstract>Dans cette présentation, je défendrai l’idée selon laquelle des ressources terminologiques décrivant les propriétés lexico-sémantiques des termes constituent un complément nécessaire, voire indispensable, à d’autres types de ressources, À partir d’exemples anglais et français empruntés au domaine de l’environnement, je montrerai, d’une part, que les ressources lexicales générales (y compris celles qui ont une large couverture) n’offrent pas un portait complet du sens des termes ou de la structure lexicale observée du point de vue d’un domaine de spécialité. Je montrerai, d’autre part, que les ressources terminologiques (thésaurus, ontologies, banques de terminologie) souvent d’obédience conceptuelle, se concentrent sur le lien entre les termes et les connaissances dénotées par eux et s’attardent peu sur leur fonctionnement linguistique. Je présenterai un type de ressource décrivant les propriétés lexico-sémantiques des termes d’un domaine (structure actantielle, liens lexicaux, annotations contextuelles, etc.) et des éléments méthodologiques présidant à son élaboration.</abstract>
       <url hash="c50bf130">2015.jeptalnrecital-invite.2</url>
diff --git a/data/xml/2015.lilt.xml b/data/xml/2015.lilt.xml
index a9e633bf71..7ca8e36d60 100644
--- a/data/xml/2015.lilt.xml
+++ b/data/xml/2015.lilt.xml
@@ -39,10 +39,10 @@
     </meta>
     <paper id="1">
       <title>Literature Lifts Up Computational Linguistics</title>
-      <author><first>David K.</first><last>Elson</last></author>
+      <author id="david-elson"><first>David K.</first><last>Elson</last></author>
       <author><first>Anna</first><last>Feldman</last></author>
       <author><first>Anna</first><last>Kazantseva</last></author>
-      <author><first>Stan</first><last>Szpakowicz</last></author>
+      <author id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></author>
       <issue>1</issue>
       <url hash="7adfad43">2015.lilt-12.1</url>
       <bibkey>elson-etal-2015-literature</bibkey>
@@ -60,7 +60,7 @@
     <paper id="3">
       <title>A computational analysis of poetic style: Imagism and its influence on modern professional and amateur poetry</title>
       <author><first>Justine T.</first><last>Kao</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <abstract>How do standards of poetic beauty change as a function of time and expertise? Here we use computational methods to compare the stylistic features of 359 English poems written by 19th century professional poets, Imagist poets, contemporary professional poets, and contemporary amateur poets. Building upon techniques designed to analyze style and sentiment in texts, we examine elements of poetic craft such as imagery, sound devices, emotive language, and diction. We find that contemporary professional poets use significantly more concrete words than 19th century poets, fewer emotional words, and more complex sound devices. These changes are consistent with the tenets of Imagism, an early 20thcentury literary movement. Further analyses show that contemporary amateur poems resemble 19th century professional poems more than contemporary professional poems on several dimensions. The stylistic similarities between contemporary amateur poems and 19th century professional poems suggest that elite standards of poetic beauty in the past “trickled down” to influence amateur works in the present. Our results highlight the influence of Imagism on the modern aesthetic and reveal the dynamics between “high” and “low” art. We suggest that computational linguistics may shed light on the forces and trends that shape poetic style.</abstract>
       <issue>3</issue>
       <url hash="d21f712e">2015.lilt-12.3</url>
diff --git a/data/xml/2015.mtsummit.xml b/data/xml/2015.mtsummit.xml
index 62fba7330a..485a633f52 100644
--- a/data/xml/2015.mtsummit.xml
+++ b/data/xml/2015.mtsummit.xml
@@ -13,8 +13,8 @@
       <author><first>Masaru</first><last>Fuji</last></author>
       <author><first>Atsushi</first><last>Fujita</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <url hash="b17a0de3">2015.mtsummit-papers.1</url>
       <bibkey>fuji-etal-2015-patent</bibkey>
     </paper>
@@ -29,9 +29,9 @@
     <paper id="3">
       <title>Learning bilingual distributed phrase represenations for statistical machine translation</title>
       <author><first>Chaochao</first><last>Wang</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Min</first><last>Zhang</last></author>
-      <author><first>Chunyu</first><last>Kit</last></author>
+      <author id="chunyu-kit"><first>Chunyu</first><last>Kit</last></author>
       <url hash="2684c18e">2015.mtsummit-papers.3</url>
       <bibkey>wang-etal-2015-learning-bilingual</bibkey>
     </paper>
@@ -39,7 +39,7 @@
       <title>Learning bilingual phrase representations with recurrent neural networks</title>
       <author><first>Hideya</first><last>Mino</last></author>
       <author><first>Andrew</first><last>Finch</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <url hash="e6659e54">2015.mtsummit-papers.4</url>
       <bibkey>mino-etal-2015-learning</bibkey>
     </paper>
@@ -60,17 +60,17 @@
     <paper id="7">
       <title><fixed-case>METEOR</fixed-case> for multiple target languages using <fixed-case>DB</fixed-case>nary</title>
       <author><first>Zied</first><last>Elloumi</last></author>
-      <author><first>Hervé</first><last>Blanchon</last></author>
-      <author><first>Gilles</first><last>Serasset</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="herve-blanchon"><first>Hervé</first><last>Blanchon</last></author>
+      <author id="gilles-serasset"><first>Gilles</first><last>Serasset</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <url hash="2edbb228">2015.mtsummit-papers.7</url>
       <bibkey>elloumi-etal-2015-meteor</bibkey>
     </paper>
     <paper id="8">
       <title><fixed-case>J</fixed-case>apanese controlled language rules to improve machine translatability of municipal documents</title>
       <author><first>Rei</first><last>Miyata</last></author>
-      <author><first>Anthony</first><last>Hartley</last></author>
-      <author><first>Cécile</first><last>Paris</last></author>
+      <author id="anthony-hartley"><first>Anthony</first><last>Hartley</last></author>
+      <author id="cecile-paris"><first>Cécile</first><last>Paris</last></author>
       <author><first>Midori</first><last>Tatsumi</last></author>
       <author><first>Kyo</first><last>Kageura</last></author>
       <url hash="30861eeb">2015.mtsummit-papers.8</url>
@@ -87,22 +87,22 @@
       <title>Using joint models or domain adaptation in statistical machine translation</title>
       <author><first>Nadir</first><last>Durrani</last></author>
       <author><first>Hassan</first><last>Sajjad</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <author><first>Ahmed</first><last>Abdelali</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <url hash="ff01afa8">2015.mtsummit-papers.10</url>
       <bibkey>durrani-etal-2015-using</bibkey>
     </paper>
     <paper id="11">
       <title>Machine translation evaluation made fuzzier: a study on post-editing productivity and evaluation metrics in commercial settings</title>
-      <author><first>Carla</first><last>Parra Escartín</last></author>
+      <author id="carla-parra-escartin"><first>Carla</first><last>Parra Escartín</last></author>
       <author><first>Manuel</first><last>Arcedillo</last></author>
       <url hash="dd919825">2015.mtsummit-papers.11</url>
       <bibkey>parra-escartin-arcedillo-2015-machine</bibkey>
     </paper>
     <paper id="12">
       <title>A distributed inflection model for translating into morphologically rich languages</title>
-      <author><first>Ke</first><last>Tran</last></author>
+      <author id="ke-m-tran"><first>Ke</first><last>Tran</last></author>
       <author><first>Arianna</first><last>Bisazza</last></author>
       <author><first>Christof</first><last>Monz</last></author>
       <url hash="d2fa464b">2015.mtsummit-papers.12</url>
@@ -110,7 +110,7 @@
     </paper>
     <paper id="13">
       <title>Bandit structured prediction for learning from partial feedback in statistical machine translation</title>
-      <author><first>Artem</first><last>Sokolov</last></author>
+      <author id="artem-sokolov"><first>Artem</first><last>Sokolov</last></author>
       <author><first>Stefan</first><last>Riezler</last></author>
       <author><first>Tanguy</first><last>Urvoy</last></author>
       <url hash="ca223f23">2015.mtsummit-papers.13</url>
@@ -119,10 +119,10 @@
     <paper id="14">
       <title>An empirical study of segment prioritization for incrementally retrained post-editing-based <fixed-case>SMT</fixed-case></title>
       <author><first>Jinhua</first><last>Du</last></author>
-      <author><first>Ankit</first><last>Srivastava</last></author>
+      <author id="ankit-srivastava"><first>Ankit</first><last>Srivastava</last></author>
       <author><first>Andy</first><last>Way</last></author>
-      <author><first>Alfredo</first><last>Maldonado-Guerra</last></author>
-      <author><first>David</first><last>Lewis</last></author>
+      <author id="alfredo-maldonado"><first>Alfredo</first><last>Maldonado-Guerra</last></author>
+      <author id="david-d-lewis"><first>David</first><last>Lewis</last></author>
       <url hash="a1b61b77">2015.mtsummit-papers.14</url>
       <bibkey>du-etal-2015-empirical</bibkey>
     </paper>
@@ -179,7 +179,7 @@
       <title>Topic adaptation for machine translation of e-commerce content</title>
       <author><first>Prashant</first><last>Mathur</last></author>
       <author><first>Marcello</first><last>Federico</last></author>
-      <author><first>Selçuk</first><last>Köprü</last></author>
+      <author id="selcuk-kopru"><first>Selçuk</first><last>Köprü</last></author>
       <author><first>Sharam</first><last>Khadivi</last></author>
       <author><first>Hassan</first><last>Sawaf</last></author>
       <url hash="3692deda">2015.mtsummit-papers.21</url>
@@ -188,7 +188,7 @@
     <paper id="22">
       <title>Machine translation with source-predicted target morphology</title>
       <author><first>Joachim</first><last>Daiber</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <url hash="f97ca750">2015.mtsummit-papers.22</url>
       <bibkey>daiber-simaan-2015-machine</bibkey>
     </paper>
@@ -205,7 +205,7 @@
     <paper id="24">
       <title>Bilingual distributed phrase representations for statistical machin translation</title>
       <author><first>Peyman</first><last>Passban</last></author>
-      <author><first>Chris</first><last>Hokamp</last></author>
+      <author id="chris-hokamp"><first>Chris</first><last>Hokamp</last></author>
       <author><first>Qun</first><last>Li</last></author>
       <url hash="d5f0a240">2015.mtsummit-papers.24</url>
       <bibkey>passban-etal-2015-bilingual</bibkey>
@@ -290,7 +290,7 @@
     </paper>
     <paper id="8">
       <title>Quality evaluation of four translations of a kidney document: focus on reliability</title>
-      <author><first>Alan K.</first><last>Melby</last></author>
+      <author id="alan-k-melby"><first>Alan K.</first><last>Melby</last></author>
       <url hash="a9cbed60">2015.mtsummit-users.8</url>
       <bibkey>melby-2015-quality</bibkey>
     </paper>
@@ -369,13 +369,13 @@
     </paper>
     <paper id="19">
       <title>Productivity promotion strategies for collaborative translation on huge volume technical documents</title>
-      <author><first>Guiping</first><last>Zhang</last></author>
+      <author id="guiping-zhang"><first>Guiping</first><last>Zhang</last></author>
       <author><first>Na</first><last>Ye</last></author>
       <author><first>Fang</first><last>Cai</last></author>
       <author><first>Chuang</first><last>Wu</last></author>
       <author><first>Xiangkui</first><last>Sun</last></author>
       <author><first>Jinfu</first><last>Yuan</last></author>
-      <author><first>Dongfeng</first><last>Cai</last></author>
+      <author id="dongfeng-cai"><first>Dongfeng</first><last>Cai</last></author>
       <url hash="e75bb828">2015.mtsummit-users.19</url>
       <bibkey>zhang-etal-2015-productivity</bibkey>
     </paper>
@@ -433,7 +433,7 @@
       <author><first>Andy</first><last>Way</last></author>
       <author><first>Zhengwei</first><last>Qiu</last></author>
       <author><first>Asanka</first><last>Wasala</last></author>
-      <author><first>Reinhard</first><last>Schaler</last></author>
+      <author id="reinhard-schaler"><first>Reinhard</first><last>Schaler</last></author>
       <url hash="75ec8078">2015.mtsummit-wptp.5</url>
       <bibkey>du-etal-2015-domain</bibkey>
     </paper>
diff --git a/data/xml/2015.tal.xml b/data/xml/2015.tal.xml
index 2edfb8af66..6e827c9382 100644
--- a/data/xml/2015.tal.xml
+++ b/data/xml/2015.tal.xml
@@ -61,7 +61,7 @@
     </meta>
     <paper id="1">
       <title>Distributional Semantics Today - Introduction to the special issue</title>
-      <author><first>Cécile</first><last>Fabre</last></author>
+      <author id="cecile-fabre"><first>Cécile</first><last>Fabre</last></author>
       <author><first>Alessandro</first><last>Lenci</last></author>
       <pages>7–20</pages>
       <url hash="ec39e272">2015.tal-2.1</url>
@@ -78,7 +78,7 @@
     <paper id="3">
       <title>Méthode semi-compositionnelle pour l’extraction de synonymes des termes complexes [Semi-compositional method for synonym extraction of complex terms]</title>
       <author><first>Amir</first><last>Hazem</last></author>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <pages>51–76</pages>
       <url hash="9ed6de51">2015.tal-2.3</url>
       <language>fra</language>
@@ -87,7 +87,7 @@
     <paper id="4">
       <title>Analyse distributionnelle appliquée aux textes de spécialité - Réduction de la dispersion des données par abstraction des contextes [Distributional analysis applied to domain-specific texts - Data dispersion reduction by context abstraction]</title>
       <author><first>Amandine</first><last>Périnet</last></author>
-      <author><first>Thierry</first><last>Hamon</last></author>
+      <author id="thierry-hamon"><first>Thierry</first><last>Hamon</last></author>
       <pages>77–102</pages>
       <url hash="b939cfc2">2015.tal-2.4</url>
       <language>fra</language>
@@ -119,7 +119,7 @@
     <paper id="1">
       <title>Préface [Foreword]</title>
       <author><first>Vincent</first><last>Claveau</last></author>
-      <author><first>Jian-Yun</first><last>Nie</last></author>
+      <author id="jian-yun-nie"><first>Jian-Yun</first><last>Nie</last></author>
       <pages>7–22</pages>
       <url hash="f3832982">2015.tal-3.1</url>
       <language>fra</language>
@@ -128,8 +128,8 @@
     <paper id="2">
       <title>Analyse en dépendance et classification de requêtes en langue naturelle, application à la recommandation de livres [Dependency parsing and classification of natural language queries: application to book recommendation]</title>
       <author><first>Anaïs</first><last>Ollagnier</last></author>
-      <author><first>Sébastien</first><last>Fournier</last></author>
-      <author><first>Patrice</first><last>Bellot</last></author>
+      <author id="sebastien-fournier"><first>Sébastien</first><last>Fournier</last></author>
+      <author id="patrice-bellot"><first>Patrice</first><last>Bellot</last></author>
       <pages>23–47</pages>
       <url hash="43e97344">2015.tal-3.2</url>
       <language>fra</language>
diff --git a/data/xml/2015.tc.xml b/data/xml/2015.tc.xml
index 5cf902daa5..8c22e02008 100644
--- a/data/xml/2015.tc.xml
+++ b/data/xml/2015.tc.xml
@@ -11,7 +11,7 @@
     </meta>
     <paper id="1">
       <title><fixed-case>QT</fixed-case>21: A new era for translators and the computer</title>
-      <author><first>Alan</first><last>Melby</last></author>
+      <author id="alan-k-melby"><first>Alan</first><last>Melby</last></author>
       <url hash="8add1573">2015.tc-1.1</url>
       <bibkey>melby-2015-qt21</bibkey>
     </paper>
@@ -19,21 +19,21 @@
       <title>The reception of intralingual and interlingual automatic subtitling: An exploratory study within the <fixed-case>HBB</fixed-case>4<fixed-case>ALL</fixed-case> project</title>
       <author><first>Anna</first><last>Matamala</last></author>
       <author><first>Andreu</first><last>Oliver</last></author>
-      <author><first>Aitor</first><last>Álvarez</last></author>
+      <author id="aitor-alvarez"><first>Aitor</first><last>Álvarez</last></author>
       <author><first>Andoni</first><last>Azpeitia</last></author>
       <url hash="94e92032">2015.tc-1.2</url>
       <bibkey>matamala-etal-2015-reception</bibkey>
     </paper>
     <paper id="3">
       <title>The <fixed-case>EXPERT</fixed-case> project: Advancing the state of the art in hybrid translation technologies</title>
-      <author><first>Constantin</first><last>Orasan</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orasan</last></author>
       <author><first>Alessandro</first><last>Cattelan</last></author>
-      <author><first>Gloria</first><last>Corpas Pastor</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="gloria-corpas-pastor"><first>Gloria</first><last>Corpas Pastor</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <author><first>Manuel</first><last>Herranz</last></author>
       <author><first>Juan</first><last>José Arevalillo</last></author>
       <author><first>Qun</first><last>Liu</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <url hash="355ef64e">2015.tc-1.3</url>
       <bibkey>orasan-etal-2015-expert</bibkey>
@@ -72,7 +72,7 @@
     </paper>
     <paper id="9">
       <title>Skype Translator: Breaking down language and hearing barriers. A behind the scenes look at near real-time speech translation</title>
-      <author><first>William</first><last>Lewis</last></author>
+      <author id="william-lewis"><first>William</first><last>Lewis</last></author>
       <url hash="17a6f440">2015.tc-1.9</url>
       <bibkey>lewis-2015-skype</bibkey>
     </paper>
diff --git a/data/xml/2016.amta.xml b/data/xml/2016.amta.xml
index 4b30389ecb..d7dfb31374 100644
--- a/data/xml/2016.amta.xml
+++ b/data/xml/2016.amta.xml
@@ -18,9 +18,9 @@
     </frontmatter>
     <paper id="1">
       <title>Instance Selection for Online Automatic Post-Editing in a multi-domain scenario</title>
-      <author><first>Rajen</first><last>Chatterjee</last></author>
-      <author><first>Mihael</first><last>Arcan</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="rajen-chatterjee"><first>Rajen</first><last>Chatterjee</last></author>
+      <author id="mihael-arcan"><first>Mihael</first><last>Arcan</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <pages>1-15</pages>
       <url hash="40615543">2016.amta-researchers.1</url>
@@ -40,7 +40,7 @@
       <title>Fuzzy-match repair using black-box machine translation systems: what can be expected?</title>
       <author><first>John</first><last>Ortega</last></author>
       <author><first>Felipe</first><last>Sánchez-Martínez</last></author>
-      <author><first>Mikel</first><last>Forcada</last></author>
+      <author id="mikel-l-forcada"><first>Mikel</first><last>Forcada</last></author>
       <pages>27-39</pages>
       <url hash="86456ca6">2016.amta-researchers.3</url>
       <abstract>Computer-aided translation (CAT) tools often use a translation memory (TM) as the key resource to assist translators. A TM contains translation units (TU) which are made up of source and target language segments; translators use the target segments in the TU suggested by the CAT tool by converting them into the desired translation. Proposals from TMs could be made more useful by using techniques such as fuzzy-match repair (FMR) which modify words in the target segment corresponding to mismatches identified in the source segment. Modifications in the target segment are done by translating the mismatched source sub-segments using an external source of bilingual information (SBI) and applying the translations to the corresponding positions in the target segment. Several combinations of translated sub-segments can be applied to the target segment which can produce multiple repair candidates. We provide a formal algorithmic description of a method that is capable of using any SBI to generate all possible fuzzy-match repairs and perform an oracle evaluation on three different language pairs to ascertain the potential of the method to improve translation productivity. Using DGT-TM translation memories and the machine system Apertium as the single source to build repair operators in three different language pairs, we show that the best repaired fuzzy matches are consistently closer to reference translations than either machine-translated segments or unrepaired fuzzy matches.</abstract>
@@ -72,7 +72,7 @@
       <title>Ranking suggestions for black-box interactive translation prediction systems with multilayer perceptrons</title>
       <author><first>Daniel</first><last>Torregrosa</last></author>
       <author><first>Juan Antonio</first><last>Pérez-Ortiz</last></author>
-      <author><first>Mikel</first><last>Forcada</last></author>
+      <author id="mikel-l-forcada"><first>Mikel</first><last>Forcada</last></author>
       <pages>65-78</pages>
       <url hash="bd23a4e6">2016.amta-researchers.6</url>
       <abstract>The objective of interactive translation prediction (ITP), a paradigm of computer-aided translation, is to assist professional translators by offering context-based computer-generated suggestions as they type. While most state-of-the-art ITP systems are tightly coupled to a machine translation (MT) system (often created ad-hoc for this purpose), our proposal follows a resourceagnostic approach, one that does not need access to the inner workings of the bilingual resources (MT systems or any other bilingual resources) used to generate the suggestions, thus allowing to include new resources almost seamlessly. As we do not expect the user to tolerate more than a few proposals each time, the set of potential suggestions need to be filtered and ranked; the resource-agnostic approach has been evaluated before using a set of intuitive length-based and position-based heuristics designed to determine which suggestions to show, achieving promising results. In this paper, we propose a more principled suggestion ranking approach using a regressor (a multilayer perceptron) that achieves significantly better results.</abstract>
@@ -81,7 +81,7 @@
     <paper id="7">
       <title>Multi-domain Adaptation for Statistical Machine Translation Based on Feature Augmentation</title>
       <author><first>Kenji</first><last>Imamura</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>79-92</pages>
       <url hash="ef459463">2016.amta-researchers.7</url>
       <abstract>Domain adaptation is a major challenge when applying machine translation to practical tasks. In this paper, we present domain adaptation methods for machine translation that assume multiple domains. The proposed methods combine two model types: a corpus-concatenated model covering multiple domains and single-domain models that are accurate but sparse in specific domains. We combine the advantages of both models using feature augmentation for domain adaptation in machine learning. Our experimental results show that the BLEU scores of the proposed method clearly surpass those of single-domain models for low-resource domains. For high-resource domains, the scores of the proposed method were superior to those of both single-domain and corpusconcatenated models. Even in domains having a million bilingual sentences, the translation quality was at least preserved and even improved in some domains. These results demonstrate that state-of-the-art domain adaptation can be realized with appropriate settings, even when using standard log-linear models.</abstract>
@@ -122,7 +122,7 @@
     <paper id="11">
       <title>Improving Neural Machine Translation on resource-limited pairs using auxiliary data of a third language</title>
       <author><first>Ander</first><last>Martinez</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>135-148</pages>
       <url hash="378d85cd">2016.amta-researchers.11</url>
       <abstract>In the recent years interest in Deep Neural Networks (DNN) has grown in the field of Natural Language Processing, as new training methods have been proposed. The usage of DNN has achieved state-of-the-art performance in various areas. Neural Machine Translation (NMT) described by Bahdanau et al. (2014) and its successive variations have shown promising results. DNN, however, tend to over-fit on small data-sets, which makes this method impracticable for resource-limited language pairs. This article combines three different ideas (splitting words into smaller units, using an extra dataset of a related language pair and using monolingual data) for improving the performance of NMT models on language pairs with limited data. Our experiments show that, in some cases, our proposed approach to subword-units performs better than BPE (Byte pair encoding) and that auxiliary language-pairs and monolingual data can help improve the performance of languages with limited resources.</abstract>
@@ -160,7 +160,7 @@
       <title>Investigating the Impact of Various Partial Diacritization Schemes on <fixed-case>A</fixed-case>rabic-<fixed-case>E</fixed-case>nglish Statistical Machine Translation</title>
       <author><first>Sawsan</first><last>Alqahtani</last></author>
       <author><first>Mahmoud</first><last>Ghoneim</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>191-204</pages>
       <url hash="07c74871">2016.amta-researchers.15</url>
       <abstract>Most diacritics in Arabic represent short vowels. In Arabic orthography, such diacritics are considered optional. The absence of these diacritics naturally leads to significant word ambiguity to top the inherent ambiguity present in fully diacritized words. Word ambiguity is a significant impediment for machine translation. Despite the ambiguity presented by lack of diacritization, context helps ameliorate the situation. Identifying the appropriate amount of diacritic restoration to reduce word sense ambiguity in the context of machine translation is the object of this paper. Diacritic marks help reduce the number of possible lexical word choices assigned to a source word which leads to better quality translated sentences. We investigate a variety of (linguistically motivated) partial diacritization schemes that preserve some of the semantics that in essence complement the implicit contextual information present in the sentences. We also study the effect of training data size and report results on three standard test sets that represent a combination of different genres. The results show statistically significant improvements for some schemes compared to two baselines: text with no diacritics (the typical writing system adopted for Arabic) and text that is fully diacritized.</abstract>
@@ -236,10 +236,10 @@
     </paper>
     <paper id="8">
       <title>What Can We Really Learn from Post-editing?</title>
-      <author><first>Marcis</first><last>Pinnis</last></author>
-      <author><first>Rihards</first><last>Kalnins</last></author>
+      <author id="marcis-pinnis"><first>Marcis</first><last>Pinnis</last></author>
+      <author id="rihards-kalnins"><first>Rihards</first><last>Kalnins</last></author>
       <author><first>Raivis</first><last>Skadins</last></author>
-      <author><first>Inguna</first><last>Skadina</last></author>
+      <author id="inguna-skadina"><first>Inguna</first><last>Skadina</last></author>
       <pages>86-91</pages>
       <url hash="64207e07">2016.amta-users.8</url>
       <bibkey>pinnis-etal-2016-really</bibkey>
@@ -247,8 +247,8 @@
     <paper id="9">
       <title>An Empirical Study: Post-editing Effort for <fixed-case>E</fixed-case>nglish to <fixed-case>A</fixed-case>rabic Hybrid Machine Translation</title>
       <author><first>Hassan</first><last>Sajjad</last></author>
-      <author><first>Francisco</first><last>Guzman</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzman</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>92-113</pages>
       <attachment type="presentation" hash="011d1517">2016.amta-users.9.Presentation.pdf</attachment>
       <bibkey>sajjad-etal-2016-empirical</bibkey>
@@ -300,7 +300,7 @@
     </paper>
     <paper id="16">
       <title>Improving Machine Translation for Post-Editing via Real Time Adaptation</title>
-      <author><first>Dragos</first><last>Munteanu</last></author>
+      <author id="dragos-stefan-munteanu"><first>Dragos</first><last>Munteanu</last></author>
       <pages>193-221</pages>
       <attachment type="presentation" hash="04bc9ed3">2016.amta-users.16.Presentation.pdf</attachment>
       <bibkey>munteanu-2016-improving</bibkey>
@@ -374,7 +374,7 @@
       <title>Toward Temporally-aware <fixed-case>MT</fixed-case>: Can Information Extraction Help Preserve Temporal Interpretation?</title>
       <author><first>Taylor</first><last>Cassidy</last></author>
       <author><first>Jamal</first><last>Laoudi</last></author>
-      <author><first>Clare</first><last>Voss</last></author>
+      <author id="clare-voss"><first>Clare</first><last>Voss</last></author>
       <pages>371-384</pages>
       <url hash="91e112c9">2016.amta-users.25</url>
       <bibkey>cassidy-etal-2016-toward</bibkey>
@@ -415,7 +415,7 @@
     </paper>
     <paper id="30">
       <title>Proto-<fixed-case>MT</fixed-case> Evaluation for Humanitarian Assistance Disaster Response Scenarios</title>
-      <author><first>Douglas</first><last>Jones</last></author>
+      <author id="douglas-jones"><first>Douglas</first><last>Jones</last></author>
       <pages>551-574</pages>
       <attachment type="presentation" hash="f53c51b1">2016.amta-users.30.Presentation.pdf</attachment>
       <bibkey>jones-2016-proto</bibkey>
@@ -444,7 +444,7 @@
     </paper>
     <paper id="34">
       <title><fixed-case>M</fixed-case>o<fixed-case>J</fixed-case>o: Bringing Hybrid <fixed-case>MT</fixed-case> to the Center for Applied Machine Translation</title>
-      <author><first>Marianna</first><last>Martindale</last></author>
+      <author id="marianna-martindale"><first>Marianna</first><last>Martindale</last></author>
       <pages>654-714</pages>
       <attachment type="presentation" hash="4a56ba64">2016.amta-users.34.Presentation.pdf</attachment>
       <bibkey>martindale-2016-mojo</bibkey>
diff --git a/data/xml/2016.clib.xml b/data/xml/2016.clib.xml
index f72ba3fa9c..ae6fbd5cb0 100644
--- a/data/xml/2016.clib.xml
+++ b/data/xml/2016.clib.xml
@@ -37,7 +37,7 @@
     </paper>
     <paper id="3">
       <title>Linguistic Data Retrievable from a Treebank</title>
-      <author><first>Verginica</first><last>Barbu Mititelu</last></author>
+      <author id="verginica-barbu-mititelu"><first>Verginica</first><last>Barbu Mititelu</last></author>
       <author><first>Elena</first><last>Irimia</last></author>
       <pages>19–27</pages>
       <abstract>This paper describes the Romanian treebank annotated according to the Universal Dependency principles. We present the types of texts included in the treebank, their processing phases and the tools used for doing it, as well as the levels of annotation, with a focus on the syntactic level. We briefly present the syntactic formalism used, the principles followed and the set of relations. The perspective we adopted is the linguist’s who searches the treebank for information with relevance for the study of Romanian. (S)He can interpret the statistics based on the corpus and can also query the treebank for finding examples to support a theory, for testing hypothesis or for discovering new tendencies. We use here the passive constructions in Romanian as a case study for showing how statistical data help understanding this linguistic phenomenon. We also discuss the kinds of linguistic information retrievable and non-retrievable form the treebank, based on the annotation principles.</abstract>
@@ -76,7 +76,7 @@
       <title>Finding Good Answers in Online Forums: Community Question Answering for <fixed-case>B</fixed-case>ulgarian</title>
       <author><first>Tsvetomila</first><last>Mihaylova</last></author>
       <author><first>Ivan</first><last>Koychev</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Ivelina</first><last>Nikolova</last></author>
       <pages>54–63</pages>
       <abstract>Community Question Answering (CQA) is a form of question answering that is getting increasingly popular as a research direction recently. Given a question posted in an online community forum and the thread of answers to it, a common formulation of the task is to rank automatically the answers, so that the good ones are ranked higher than the bad ones. Despite the vast research in CQA for English, very little attention has been paid to other languages. To bridge this gap, here we present our method for Community Question Answering in Bulgarian. We create annotated training and testing datasets for Bulgarian, and we further explore the applicability of machine translation for reusing English CQA data for building a Bulgarian system. The evaluation results show improvement over the baseline and can serve as a basis for further research.</abstract>
diff --git a/data/xml/2016.eamt.xml b/data/xml/2016.eamt.xml
index 575555bc3d..778a67cba2 100644
--- a/data/xml/2016.eamt.xml
+++ b/data/xml/2016.eamt.xml
@@ -14,9 +14,9 @@
       <title><fixed-case>T</fixed-case>ecto<fixed-case>MT</fixed-case> – a deep linguistic core of the combined Cimera <fixed-case>MT</fixed-case> system</title>
       <author><first>Martin</first><last>Popel</last></author>
       <author><first>Roman</first><last>Sudarikov</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <author><first>Rudolf</first><last>Rosa</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
       <url hash="5673521f">2016.eamt-2.1</url>
       <bibkey>popel-etal-2016-tectomt</bibkey>
     </paper>
@@ -41,14 +41,14 @@
     </paper>
     <paper id="4">
       <title>Apertium: a free/open source platform for machine translation and basic language technology</title>
-      <author><first>Mikel L.</first><last>Forcada</last></author>
-      <author><first>Francis M.</first><last>Tyers</last></author>
+      <author id="mikel-l-forcada"><first>Mikel L.</first><last>Forcada</last></author>
+      <author id="francis-tyers"><first>Francis M.</first><last>Tyers</last></author>
       <url hash="19cc70f1">2016.eamt-2.4</url>
       <bibkey>forcada-tyers-2016-apertium</bibkey>
     </paper>
     <paper id="5">
       <title><fixed-case>B</fixed-case>abel<fixed-case>D</fixed-case>r: a web platform for rapid construction of phrasebook-style medical speech translation applications</title>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Hervé</first><last>Spechbach</last></author>
       <url hash="6d487ead">2016.eamt-2.5</url>
       <bibkey>bouillon-spechbach-2016-babeldr</bibkey>
@@ -77,19 +77,19 @@
     <paper id="7">
       <title><fixed-case>H</fixed-case>im<fixed-case>L</fixed-case>: Health in my language</title>
       <author><first>Barry</first><last>Haddow</last></author>
-      <author><first>Alex</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alex</first><last>Fraser</last></author>
       <url hash="0de65c90">2016.eamt-2.7</url>
       <bibkey>haddow-fraser-2016-himl</bibkey>
     </paper>
     <paper id="8">
       <title><fixed-case>OPUS</fixed-case> – parallel corpora for everyone</title>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <url hash="fa1965f7">2016.eamt-2.8</url>
       <bibkey>tiedemann-2016-opus</bibkey>
     </paper>
     <paper id="9">
       <title>Integration of machine translation paradigms</title>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
       <url hash="4a0dfe5c">2016.eamt-2.9</url>
       <bibkey>costa-jussa-2016-integration</bibkey>
     </paper>
@@ -151,7 +151,7 @@
     </paper>
     <paper id="18">
       <title>Amplexor <fixed-case>MTE</fixed-case>xpert – machine translation adapted to the translation workflow</title>
-      <author><first>Alexandru</first><last>Ceausu</last></author>
+      <author id="alexandru-ceausu"><first>Alexandru</first><last>Ceausu</last></author>
       <author><first>Sabine</first><last>Hunsicker</last></author>
       <author><first>Tudy</first><last>Droumaguet</last></author>
       <url hash="3898b28c">2016.eamt-2.18</url>
@@ -160,8 +160,8 @@
     <paper id="19">
       <title><fixed-case>A</fixed-case>bu-<fixed-case>M</fixed-case>a<fixed-case>T</fixed-case>ran: automatic building of machine translation</title>
       <author><first>Antonio</first><last>Toral</last></author>
-      <author><first>Sergio</first><last>Ortiz Rojas</last></author>
-      <author><first>Mikel</first><last>Forcada</last></author>
+      <author id="sergio-ortiz-rojas"><first>Sergio</first><last>Ortiz Rojas</last></author>
+      <author id="mikel-l-forcada"><first>Mikel</first><last>Forcada</last></author>
       <author><first>Nikola</first><last>Lubesic</last></author>
       <author><first>Prokopis</first><last>Prokopidis</last></author>
       <url hash="0d217e5d">2016.eamt-2.19</url>
@@ -179,16 +179,16 @@
       <author><first>Maria</first><last>Gialama</last></author>
       <author><first>Iris</first><last>Hendrickx</last></author>
       <author><first>Mitja</first><last>Jermol</last></author>
-      <author><first>Katia</first><last>Kermanidis</last></author>
+      <author id="katia-lida-kermanidis"><first>Katia</first><last>Kermanidis</last></author>
       <author><first>Joss</first><last>Moorkens</last></author>
       <author><first>Davor</first><last>Orlic</last></author>
       <author><first>Michael</first><last>Papadopoulos</last></author>
-      <author><first>Maja</first><last>Popović</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
       <author><first>Rico</first><last>Sennrich</last></author>
       <author><first>Vilelmini</first><last>Sosoni</last></author>
       <author><first>Dimitrios</first><last>Tsoumakos</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
-      <author><first>Menno</first><last>van Zaanen</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
+      <author id="menno-van-zaanen"><first>Menno</first><last>van Zaanen</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <url hash="b23abbba">2016.eamt-2.20</url>
       <bibkey>kordoni-etal-2016-tramooc</bibkey>
diff --git a/data/xml/2016.gwc.xml b/data/xml/2016.gwc.xml
index d3f9d165c1..629832051b 100644
--- a/data/xml/2016.gwc.xml
+++ b/data/xml/2016.gwc.xml
@@ -5,8 +5,8 @@
       <booktitle>Proceedings of the 8th Global WordNet Conference (GWC)</booktitle>
       <editor><first>Christiane</first><last>Fellbaum</last></editor>
       <editor><first>Piek</first><last>Vossen</last></editor>
-      <editor><first>Verginica Barbu</first><last>Mititelu</last></editor>
-      <editor><first>Corina</first><last>Forascu</last></editor>
+      <editor id="verginica-barbu-mititelu"><first>Verginica Barbu</first><last>Mititelu</last></editor>
+      <editor id="corina-forascu"><first>Corina</first><last>Forascu</last></editor>
       <publisher>Global Wordnet Association</publisher>
       <address>Bucharest, Romania</address>
       <month>27--30 January</month>
@@ -21,7 +21,7 @@
     <paper id="1">
       <title>Adverbs in <fixed-case>S</fixed-case>anskrit <fixed-case>W</fixed-case>ordnet</title>
       <author><first>Tanuja</first><last>Ajotikar</last></author>
-      <author><first>Malhar</first><last>Kulkarni</last></author>
+      <author id="malhar-kulkarni"><first>Malhar</first><last>Kulkarni</last></author>
       <pages>1–8</pages>
       <abstract>The wordnet contains part-of-speech categories such as noun, verb, adjective and adverb. In Sanskrit, there is no formal distinction among nouns, adjectives and adverbs. This poses the question, is an adverb a separate category in Sanskrit? If not, then how do we accommodate it in a lexical resource? To investigate the issue, we attempt to study the complex nature of adverbs in Sanskrit and the policies adopted by Sanskrit lexicographers that would guide us in storing them in the Sanskrit wordnet.</abstract>
       <url hash="d9ab2004">2016.gwc-1.1</url>
@@ -51,7 +51,7 @@
       <title>Detecting Most Frequent Sense using Word Embeddings and <fixed-case>B</fixed-case>abel<fixed-case>N</fixed-case>et</title>
       <author><first>Harpreet Singh</first><last>Arora</last></author>
       <author><first>Sudha</first><last>Bhingardive</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>21–25</pages>
       <abstract>Since the inception of the SENSEVAL evaluation exercises there has been a great deal of recent research into Word Sense Disambiguation (WSD). Over the years, various supervised, unsupervised and knowledge based WSD systems have been proposed. Beating the first sense heuristics is a challenging task for these systems. In this paper, we present our work on Most Frequent Sense (MFS) detection using Word Embeddings and BabelNet features. The semantic features from BabelNet viz., synsets, gloss, relations, etc. are used for generating sense embeddings. We compare word embedding of a word with its sense embeddings to obtain the MFS with the highest similarity. The MFS is detected for six languages viz., English, Spanish, Russian, German, French and Italian. However, this approach can be applied to any language provided that word embeddings are available for that language.</abstract>
       <url hash="4a555d44">2016.gwc-1.4</url>
@@ -70,7 +70,7 @@
       <author><first>Monica</first><last>Berti</last></author>
       <author><first>Yuri</first><last>Bizzoni</last></author>
       <author><first>Federico</first><last>Boschetti</last></author>
-      <author><first>Gregory R.</first><last>Crane</last></author>
+      <author id="gregory-crane"><first>Gregory R.</first><last>Crane</last></author>
       <author><first>Riccardo Del</first><last>Gratta</last></author>
       <author><first>Tariq</first><last>Yousef</last></author>
       <pages>34–38</pages>
@@ -84,7 +84,7 @@
       <author><first>Hanumant</first><last>Redkar</last></author>
       <author><first>Prateek</first><last>Sappadla</last></author>
       <author><first>Dhirendra</first><last>Singh</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>39–43</pages>
       <abstract>Semantic similarity and relatedness measures play an important role in natural language processing applications. In this paper, we present the IndoWordNet::Similarity tool and interface, designed for computing the semantic similarity and relatedness between two words in IndoWordNet. A java based tool and a web interface have been developed to compute this semantic similarity and relatedness. Also, Java API has been developed for this purpose. This tool, web interface and the API are made available for the research purpose.</abstract>
       <url hash="8f3effdc">2016.gwc-1.7</url>
@@ -103,7 +103,7 @@
       <title><fixed-case>CILI</fixed-case>: the Collaborative Interlingual Index</title>
       <author><first>Francis</first><last>Bond</last></author>
       <author><first>Piek</first><last>Vossen</last></author>
-      <author><first>John P.</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></author>
       <author><first>Christiane</first><last>Fellbaum</last></author>
       <pages>50–57</pages>
       <abstract>This paper introduces the motivation for and design of the Collaborative InterLingual Index (CILI). It is designed to make possible coordination between multiple loosely coupled wordnet projects. The structure of the CILI is based on the Interlingual index first proposed in the EuroWordNet project with several pragmatic extensions: an explicit open license, definitions in English and links to wordnets in the Global Wordnet Grid.</abstract>
@@ -123,7 +123,7 @@
     </paper>
     <paper id="11">
       <title>Word Substitution in Short Answer Extraction: A <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et-based Approach</title>
-      <author><first>Qingqing</first><last>Cai</last></author>
+      <author id="qingqing-cai"><first>Qingqing</first><last>Cai</last></author>
       <author><first>James</first><last>Gung</last></author>
       <author><first>Maochen</first><last>Guan</last></author>
       <author><first>Gerald</first><last>Kurlandski</last></author>
@@ -135,11 +135,11 @@
     </paper>
     <paper id="12">
       <title>An overview of <fixed-case>P</fixed-case>ortuguese <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>ets</title>
-      <author><first>Valeria</first><last>de Paiva</last></author>
+      <author id="valeria-de-paiva"><first>Valeria</first><last>de Paiva</last></author>
       <author><first>Livy</first><last>Real</last></author>
-      <author><first>Hugo Gonçalo</first><last>Oliveira</last></author>
+      <author id="hugo-goncalo-oliveira"><first>Hugo Gonçalo</first><last>Oliveira</last></author>
       <author><first>Alexandre</first><last>Rademaker</last></author>
-      <author><first>Cláudia</first><last>Freitas</last></author>
+      <author id="claudia-freitas"><first>Cláudia</first><last>Freitas</last></author>
       <author><first>Alberto</first><last>Simões</last></author>
       <pages>74–82</pages>
       <abstract>Semantic relations between words are key to building systems that aim to understand and manipulate language. For English, the “de facto” standard for representing this kind of knowledge is Princeton’s WordNet. Here, we describe the wordnet-like resources currently available for Portuguese: their origins, methods of creation, sizes, and usage restrictions. We start tackling the problem of comparing them, but only in quantitative terms. Finally, we sketch ideas for potential collaboration between some of the projects that produce Portuguese wordnets.</abstract>
@@ -179,7 +179,7 @@
       <author><first>Anna</first><last>Feltracco</last></author>
       <author><first>Lorenzo</first><last>Gatti</last></author>
       <author><first>Elisabetta</first><last>Jezek</last></author>
-      <author><first>Bernardo</first><last>Magnini</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
       <author><first>Simone</first><last>Magnolini</last></author>
       <pages>101–105</pages>
       <abstract>We present a methodology for building lexical sets for argument slots of Italian verbs. We start from an inventory of semantically typed Italian verb frames and through a mapping to WordNet we automatically annotate the sets of fillers for the argument positions in a corpus of sentences. We evaluate both a baseline algorithm and a syntax driven algorithm and show that the latter performs significantly better in terms of precision.</abstract>
@@ -241,7 +241,7 @@
       <title>Sophisticated Lexical Databases - Simplified Usage: Mobile Applications and Browser Plugins For Wordnets</title>
       <author><first>Diptesh</first><last>Kanojia</last></author>
       <author><first>Raj</first><last>Dabre</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>144–149</pages>
       <abstract>India is a country with 22 officially recognized languages and 17 of these have WordNets, a crucial resource. Web browser based interfaces are available for these WordNets, but are not suited for mobile devices which deters people from effectively using this resource. We present our initial work on developing mobile applications and browser extensions to access WordNets for Indian Languages. Our contribution is two fold: (1) We develop mobile applications for the Android, iOS and Windows Phone OS platforms for Hindi, Marathi and Sanskrit WordNets which allow users to search for words and obtain more information along with their translations in English and other Indian languages. (2) We also develop browser extensions for English, Hindi, Marathi, and Sanskrit WordNets, for both Mozilla Firefox, and Google Chrome. We believe that such applications can be quite helpful in a classroom scenario, where students would be able to access the WordNets as dictionaries as well as lexical knowledge bases. This can help in overcoming the language barrier along with furthering language understanding.</abstract>
       <url hash="77edca65">2016.gwc-1.22</url>
@@ -251,7 +251,7 @@
       <title>A picture is worth a thousand words: Using <fixed-case>O</fixed-case>pen<fixed-case>C</fixed-case>lip<fixed-case>A</fixed-case>rt library for enriching <fixed-case>I</fixed-case>ndo<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et</title>
       <author><first>Diptesh</first><last>Kanojia</last></author>
       <author><first>Shehzaad</first><last>Dhuliawala</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>150–154</pages>
       <abstract>WordNet has proved to be immensely useful for Word Sense Disambiguation, and thence Machine translation, Information Retrieval and Question Answering. It can also be used as a dictionary for educational purposes. The semantic nature of concepts in a WordNet motivates one to try to express this meaning in a more visual way. In this paper, we describe our work of enriching IndoWordNet with image acquisitions from the OpenClipArt library. We describe an approach used to enrich WordNets for eighteen Indian languages. Our contribution is three fold: (1) We develop a system, which, given a synset in English, finds an appropriate image for the synset. The system uses the OpenclipArt library (OCAL) to retrieve images and ranks them. (2) After retrieving the images, we map the results along with the linkages between Princeton WordNet and Hindi WordNet, to link several synsets to corresponding images. We choose and sort top three images based on our ranking heuristic per synset. (3) We develop a tool that allows a lexicographer to manually evaluate these images. The top images are shown to a lexicographer by the evaluation tool for the task of choosing the best image representation. The lexicographer also selects the number of relevant images. Using our system, we obtain an Average Precision (P @ 3) score of 0.30.</abstract>
       <url hash="6888cd4f">2016.gwc-1.23</url>
@@ -324,10 +324,10 @@
     <paper id="30">
       <title>An empirically grounded expansion of the supersense inventory</title>
       <author><first>Hector Martinez</first><last>Alonso</last></author>
-      <author><first>Anders</first><last>Johannsen</last></author>
+      <author id="anders-johannsen"><first>Anders</first><last>Johannsen</last></author>
       <author><first>Sanni</first><last>Nimb</last></author>
       <author><first>Sussi</first><last>Olsen</last></author>
-      <author><first>Bolette</first><last>Pedersen</last></author>
+      <author id="bolette-sandford-pedersen"><first>Bolette</first><last>Pedersen</last></author>
       <pages>199–208</pages>
       <abstract>In this article we present an expansion of the supersense inventory. All new super-senses are extensions of members of the current inventory, which we postulate by identifying semantically coherent groups of synsets. We cover the expansion of the already-established supernsense inventory for nouns and verbs, the addition of coarse supersenses for adjectives in absence of a canonical supersense inventory, and super-senses for verbal satellites. We evaluate the viability of the new senses examining the annotation agreement, frequency and co-ocurrence patterns.</abstract>
       <url hash="6838f1b5">2016.gwc-1.30</url>
@@ -336,7 +336,7 @@
     <paper id="31">
       <title>Adverbs in pl<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et: Theory and Implementation</title>
       <author><first>Marek</first><last>Maziarz</last></author>
-      <author><first>Stan</first><last>Szpakowicz</last></author>
+      <author id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></author>
       <author><first>Michal</first><last>Kalinski</last></author>
       <pages>209–217</pages>
       <abstract>Adverbs are seldom well represented in wordnets. Princeton WordNet, for example, derives from adjectives practically all its adverbs and whatever involvement they have. GermaNet stays away from this part of speech. Adverbs in plWordNet will be emphatically present in all their semantic and syntactic distinctness. We briefly discuss the linguistic background of the lexical system of Polish adverbs. We describe an automated generator of accurate candidate adverbs, and introduce the lexicographic procedures which will ensure high consistency of wordnet editors’ decisions about adverbs.</abstract>
@@ -375,7 +375,7 @@
       <author><first>Anupam</first><last>Mondal</last></author>
       <author><first>Dipankar</first><last>Das</last></author>
       <author><first>Erik</first><last>Cambria</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>243–248</pages>
       <abstract>In order to overcome the lack of medical corpora, we have developed a WordNet for Medical Events (WME) for identifying medical terms and their sense related information using a seed list. The initial WME resource contains 1654 medical terms or concepts. In the present research, we have reported the enhancement of WME with 6415 number of medical concepts along with their conceptual features viz. Parts-of-Speech (POS), gloss, semantics, polarity, sense and affinity. Several polarity lexicons viz. SentiWordNet, SenticNet, Bing Liu’s subjectivity list and Taboda’s adjective list were introduced with WordNet synonyms and hyponyms for expansion. The semantics feature guided us to build a semantic co-reference relation based network between the related medical concepts. These features help to prepare a medical concept network for better sense relation based visualization. Finally, we evaluated with respect to Adaptive Lesk Algorithm and conducted an agreement analysis for validating the expanded WME resource.</abstract>
       <url hash="8e14d542">2016.gwc-1.35</url>
@@ -385,7 +385,7 @@
       <title>Mapping and Generating Classifiers using an Open <fixed-case>C</fixed-case>hinese Ontology</title>
       <author><first>Luis Morgado Da</first><last>Costa</last></author>
       <author><first>Francis</first><last>Bond</last></author>
-      <author><first>Helena</first><last>Gao</last></author>
+      <author id="helena-hong-gao"><first>Helena</first><last>Gao</last></author>
       <pages>249–256</pages>
       <abstract>In languages such as Chinese, classifiers (CLs) play a central role in the quantification of noun-phrases. This can be a problem when generating text from input that does not specify the classifier, as in machine translation (MT) from English to Chinese. Many solutions to this problem rely on dictionaries of noun-CL pairs. However, there is no open large-scale machine-tractable dictionary of noun-CL associations. Many published resources exist, but they tend to focus on how a CL is used (e.g. what kinds of nouns can be used with it, or what features seem to be selected by each CL). In fact, since nouns are open class words, producing an exhaustive definite list of noun-CL associations is not possible, since it would quickly get out of date. Our work tries to address this problem by providing an algorithm for automatic building of a frequency based dictionary of noun-CL pairs, mapped to concepts in the Chinese Open Wordnet (Wang and Bond, 2013), an open machine-tractable dictionary for Chinese. All results will released under an open license.</abstract>
       <url hash="76745559">2016.gwc-1.36</url>
@@ -394,8 +394,8 @@
     <paper id="37">
       <title><fixed-case>I</fixed-case>ndo<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Conversion to Web Ontology Language (<fixed-case>OWL</fixed-case>)</title>
       <author><first>Apurva</first><last>Nagvenkar</last></author>
-      <author><first>Jyoti</first><last>Pawar</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="jyoti-pawar"><first>Jyoti</first><last>Pawar</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>257–260</pages>
       <abstract>WordNet plays a significant role in Linked Open Data (LOD) cloud. It has numerous application ranging from ontology annotation to ontology mapping. IndoWordNet is a linked WordNet connecting 18 Indian language WordNets with Hindi as a source WordNet. The Hindi WordNet was initially developed by linking it to English WordNet. In this paper, we present a data representation of IndoWordNet in Web Ontology Language (OWL). The schema of Princeton WordNet has been enhanced to support the representation of IndoWordNet. This IndoWordNet representation in OWL format is now available to link other web resources. This representation is implemented for eight Indian languages.</abstract>
       <url hash="7c55320a">2016.gwc-1.37</url>
@@ -443,7 +443,7 @@
     <paper id="42">
       <title>pl<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et 3.0 – Almost There</title>
       <author><first>Maciej</first><last>Piasecki</last></author>
-      <author><first>Stan</first><last>Szpakowicz</last></author>
+      <author id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></author>
       <author><first>Marek</first><last>Maziarz</last></author>
       <author><first>Ewa</first><last>Rudnicka</last></author>
       <pages>292–301</pages>
@@ -455,7 +455,7 @@
       <title>Open <fixed-case>D</fixed-case>utch <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et</title>
       <author><first>Marten</first><last>Postma</last></author>
       <author><first>Emiel</first><last>van Miltenburg</last></author>
-      <author><first>Roxane</first><last>Segers</last></author>
+      <author id="roxane-segers"><first>Roxane</first><last>Segers</last></author>
       <author><first>Anneleen</first><last>Schoen</last></author>
       <author><first>Piek</first><last>Vossen</last></author>
       <pages>302–310</pages>
@@ -487,8 +487,8 @@
       <author><first>Nilesh</first><last>Joshi</last></author>
       <author><first>Sandhya</first><last>Singh</last></author>
       <author><first>Irawati</first><last>Kulkarni</last></author>
-      <author><first>Malhar</first><last>Kulkarni</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="malhar-kulkarni"><first>Malhar</first><last>Kulkarni</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>325–332</pages>
       <abstract>Samāsa or compounds are a regular feature of Indian Languages. They are also found in other languages like German, Italian, French, Russian, Spanish, etc. Compound word is constructed from two or more words to form a single word. The meaning of this word is derived from each of the individual words of the compound. To develop a system to generate, identify and interpret compounds, is an important task in Natural Language Processing. This paper introduces a web based tool - Samāsa-Kartā for producing compound words. Here, the focus is on Sanskrit language due to its richness in usage of compounds; however, this approach can be applied to any Indian language as well as other languages. IndoWordNet is used as a resource for words to be compounded. The motivation behind creating compound words is to create, to improve the vocabulary, to reduce sense ambiguity, etc. in order to enrich the WordNet. The Samāsa-Kartā can be used for various applications viz., compound categorization, sandhi creation, morphological analysis, paraphrasing, synset creation, etc.</abstract>
       <url hash="0bf30f66">2016.gwc-1.46</url>
@@ -535,11 +535,11 @@
     </paper>
     <paper id="51">
       <title>The Predicate Matrix and the Event and Implied Situation Ontology: Making More of Events</title>
-      <author><first>Roxane</first><last>Segers</last></author>
+      <author id="roxane-segers"><first>Roxane</first><last>Segers</last></author>
       <author><first>Egoitz</first><last>Laparra</last></author>
       <author><first>Marco</first><last>Rospocher</last></author>
       <author><first>Piek</first><last>Vossen</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <author><first>Filip</first><last>Ilievski</last></author>
       <pages>364–372</pages>
       <abstract>This paper presents the Event and Implied Situation Ontology (ESO), a resource which formalizes the pre and post situations of events and the roles of the entities affected by an event. The ontology reuses and maps across existing resources such as WordNet, SUMO, VerbNet, PropBank and FrameNet. We describe how ESO is injected into a new version of the Predicate Matrix and illustrate how these resources are used to detect information in large document collections that otherwise would have remained implicit. The model targets interpretations of situations rather than the semantics of verbs per se. The event is interpreted as a situation using RDF taking all event components into account. Hence, the ontology and the linked resources need to be considered from the perspective of this interpretation model.</abstract>
@@ -568,7 +568,7 @@
     <paper id="54">
       <title>High, Medium or Low? Detecting Intensity Variation Among polar synonyms in <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et</title>
       <author><first>Raksha</first><last>Sharma</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>389–395</pages>
       <abstract>For fine-grained sentiment analysis, we need to go beyond zero-one polarity and find a way to compare adjectives (synonyms) that share the same sense. Choice of a word from a set of synonyms, provides a way to select the exact polarity-intensity. For example, choosing to describe a person as benevolent rather than kind1 changes the intensity of the expression. In this paper, we present a sense based lexical resource, where synonyms are assigned intensity levels, viz., high, medium and low. We show that the measure P (s|w) (probability of a sense s given the word w) can derive the intensity of a word within the sense. We observe a statistically significant positive correlation between P(s|w) and intensity of synonyms for three languages, viz., English, Marathi and Hindi. The average correlation scores are 0.47 for English, 0.56 for Marathi and 0.58 for Hindi.</abstract>
       <url hash="bc6f00e4">2016.gwc-1.54</url>
@@ -576,7 +576,7 @@
     </paper>
     <paper id="55">
       <title>The Role of the <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Relations in the Knowledge-based Word Sense Disambiguation Task</title>
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <author><first>Alexander</first><last>Popov</last></author>
       <author><first>Petya</first><last>Osenova</last></author>
       <pages>396–403</pages>
@@ -601,7 +601,7 @@
       <author><first>Jaya</first><last>Saraswati</last></author>
       <author><first>Laxmi</first><last>Kashyap</last></author>
       <author><first>Diptesh</first><last>Kanojia</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>411–418</pages>
       <abstract>This paper reports the work of creating bilingual mappings in English for certain synsets of Hindi wordnet, the need for doing this, the methods adopted and the tools created for the task. Hindi wordnet, which forms the foundation for other Indian language wordnets, has been linked to the English WordNet. To maximize linkages, an important strategy of using direct and hypernymy linkages has been followed. However, the hypernymy linkages were found to be inadequate in certain cases and posed a challenge due to sense granularity of language. Thus, the idea of creating bilingual mappings was adopted as a solution. A bilingual mapping means a linkage between a concept in two different languages, with the help of translation and/or transliteration. Such mappings retain meaningful representations, while capturing semantic similarity at the same time. This has also proven to be a great enhancement of Hindi wordnet and can be a crucial resource for multilingual applications in natural language processing, including machine translation and cross language information retrieval.</abstract>
       <url hash="ea2e8428">2016.gwc-1.57</url>
@@ -619,7 +619,7 @@
       <title>Toward a truly multilingual <fixed-case>G</fixed-case>lobal<fixed-case>W</fixed-case>ordnet Grid</title>
       <author><first>Piek</first><last>Vossen</last></author>
       <author><first>Francis</first><last>Bond</last></author>
-      <author><first>John</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John</first><last>McCrae</last></author>
       <pages>424–431</pages>
       <abstract>In this paper, we describe a new and improved Global Wordnet Grid that takes advantage of the Collaborative InterLingual Index (CILI). Currently, the Open Multilingal Wordnet has made many wordnets accessible as a single linked wordnet, but as it used the Princeton Wordnet of English (PWN) as a pivot, it loses concepts that are not part of PWN. The technical solution to this, a central registry of concepts, as proposed in the EuroWordnet project through the InterLingual Index, has been known for many years. However, the practical issues of how to host this index and who decides what goes in remained unsolved. Inspired by current practice in the Semantic Web and the Linked Open Data community, we propose a way to solve this issue. In this paper we define the principles and protocols for contributing to the Grid. We tested them on two use cases, adding version 3.1 of the Princeton WordNet to a CILI based on 3.0 and adding the Open Dutch Wordnet, to validate the current set up. This paper aims to be a call for action that we hope will be further discussed and ultimately taken up by the whole wordnet community.</abstract>
       <url hash="a32bf755">2016.gwc-1.59</url>
@@ -628,8 +628,8 @@
     <paper id="60">
       <title>This Table is Different: A <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et-Based Approach to Identifying References to Document Entities</title>
       <author><first>Shomir</first><last>Wilson</last></author>
-      <author><first>Alan</first><last>Black</last></author>
-      <author><first>Jon</first><last>Oberlander</last></author>
+      <author id="alan-w-black"><first>Alan</first><last>Black</last></author>
+      <author id="jon-oberlander"><first>Jon</first><last>Oberlander</last></author>
       <pages>432–440</pages>
       <abstract>Writing intended to inform frequently contains references to document entities (DEs), a mixed class that includes orthographically structured items (e.g., illustrations, sections, lists) and discourse entities (arguments, suggestions, points). Such references are vital to the interpretation of documents, but they often eschew identifiers such as “Figure 1” for inexplicit phrases like “in this figure” or “from these premises”. We examine inexplicit references to DEs, termed DE references, and recast the problem of their automatic detection into the determination of relevant word senses. We then show the feasibility of machine learning for the detection of DE-relevant word senses, using a corpus of human-labeled synsets from WordNet. We test cross-domain performance by gathering lemmas and synsets from three corpora: website privacy policies, Wikipedia articles, and Wikibooks textbooks. Identifying DE references will enable language technologies to use the information encoded by them, permitting the automatic generation of finely-tuned descriptions of DEs and the presentation of richly-structured information to readers.</abstract>
       <url hash="27025e65">2016.gwc-1.60</url>
diff --git a/data/xml/2016.iwslt.xml b/data/xml/2016.iwslt.xml
index 4b26100104..167d404046 100644
--- a/data/xml/2016.iwslt.xml
+++ b/data/xml/2016.iwslt.xml
@@ -19,7 +19,7 @@
       <title>The <fixed-case>IWSLT</fixed-case> 2016 Evaluation Campaign</title>
       <author><first>Mauro</first><last>Cettolo</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
-      <author><first>Sebastian</first><last>Stüker</last></author>
+      <author id="sebastian-stuker"><first>Sebastian</first><last>Stüker</last></author>
       <author><first>Luisa</first><last>Bentivogli</last></author>
       <author><first>Rolando</first><last>Cattoni</last></author>
       <author><first>Marcello</first><last>Federico</last></author>
@@ -29,16 +29,16 @@
     </paper>
     <paper id="2">
       <title>Integrating Encyclopedic Knowledge into Neural Language Models</title>
-      <author id="yang-zhang"><first>Yang</first><last>Zhang</last></author>
+      <author><first>Yang</first><last>Zhang</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
-      <author><first>Alexander</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alexander</first><last>Waibel</last></author>
       <abstract>Neural models have recently shown big improvements in the performance of phrase-based machine translation. Recurrent language models, in particular, have been a great success due to their ability to model arbitrary long context. In this work, we integrate global semantic information extracted from large encyclopedic sources into neural network language models. We integrate semantic word classes extracted from Wikipedia and sentence level topic information into a recurrent neural network-based language model. The new resulting models exhibit great potential in alleviating data sparsity problems with the additional knowledge provided. This approach of integrating global information is not restricted to language modeling but can also be easily applied to any model that profits from context or further data resources, e.g. neural machine translation. Using this model has improved rescoring quality of a state-of-the-art phrase-based translation system by 0.84 BLEU points. We performed experiments on two language pairs.</abstract>
       <url hash="6e990eb8">2016.iwslt-1.2</url>
       <bibkey>zhang-etal-2016-integrating</bibkey>
     </paper>
     <paper id="3">
       <title>Factored Neural Machine Translation Architectures</title>
-      <author><first>Mercedes</first><last>García-Martínez</last></author>
+      <author id="mercedes-garcia-martinez"><first>Mercedes</first><last>García-Martínez</last></author>
       <author><first>Loïc</first><last>Barrault</last></author>
       <author><first>Fethi</first><last>Bougares</last></author>
       <abstract>In this paper we investigate the potential of the neural machine translation (NMT) when taking into consideration the linguistic aspect of target language. From this standpoint, the NMT approach with attention mechanism [1] is extended in order to produce several linguistically derived outputs. We train our model to simultaneously output the lemma and its corresponding factors (e.g. part-of-speech, gender, number). The word level translation is built with a mapping function using a priori linguistic information. Compared to the standard NMT system, factored architecture increases significantly the vocabulary coverage while decreasing the number of unknown words. With its richer architecture, the Factored NMT approach allows us to implement several training setup that will be discussed in detail along this paper. On the IWSLT’15 English-to-French task, FNMT model outperforms NMT model in terms of BLEU score. A qualitative analysis of the output on a set of test sentences shows the effectiveness of the FNMT model.</abstract>
@@ -49,7 +49,7 @@
       <title>Audio Segmentation for Robust Real-Time Speech Recognition Based on Neural Networks</title>
       <author><first>Micha</first><last>Wetzel</last></author>
       <author><first>Matthias</first><last>Sperber</last></author>
-      <author><first>Alexander</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alexander</first><last>Waibel</last></author>
       <abstract>Speech that contains multimedia content can pose a serious challenge for real-time automatic speech recognition (ASR) for two reasons: (1) The ASR produces meaningless output, hurting the readability of the transcript. (2) The search space of the ASR is blown up when multimedia content is encountered, resulting in large delays that compromise real-time requirements. This paper introduces a segmenter that aims to remove these problems by detecting music and noise segments in real-time and replacing them with silence. We propose a two step approach, consisting of frame classification and smoothing. First, a classifier detects speech and multimedia on the frame level. In the second step the smoothing algorithm considers the temporal context to prevent rapid class fluctuations. We investigate in frame classification and smoothing settings to obtain an appealing accuracy-latency-tradeoff. The proposed segmenter yields increases the transcript quality of an ASR system by removing on average 39 % of the errors caused by non-speech in the audio stream, while maintaining a real-time applicable delay of 270 milliseconds.</abstract>
       <url hash="506aa086">2016.iwslt-1.4</url>
       <bibkey>wetzel-etal-2016-audio</bibkey>
@@ -67,7 +67,7 @@
       <title>Toward Multilingual Neural Machine Translation with Universal Encoder and Decoder</title>
       <author><first>Thanh-Le</first><last>Ha</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <abstract>In this paper, we present our first attempts in building a multilingual Neural Machine Translation framework under a unified approach in which the information shared among languages can be helpful in the translation of individual language pairs. We are then able to employ attention-based Neural Machine Translation for many-to-many multilingual translation tasks. Our approach does not require any special treatment on the network architecture and it allows us to learn minimal number of free parameters in a standard way of training. Our approach has shown its effectiveness in an under-resourced translation scenario with considerable improvements up to 2.6 BLEU points. In addition, we point out a novel way to make use of monolingual data with Neural Machine Translation using the same approach with a 3.15-BLEU-score gain in IWSLT’16 English→German translation task.</abstract>
       <url hash="01ea81e9">2016.iwslt-1.6</url>
       <bibkey>ha-etal-2016-toward</bibkey>
@@ -96,8 +96,8 @@
     <paper id="9">
       <title>Towards Improving Low-Resource Speech Recognition Using Articulatory and Language Features</title>
       <author><first>Markus</first><last>Müller</last></author>
-      <author><first>Sebastian</first><last>Stüker</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="sebastian-stuker"><first>Sebastian</first><last>Stüker</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <abstract>In an increasingly globalized world, there is a rising demand for speech recognition systems. Systems for languages like English, German or French do achieve a decent performance, but there exists a long tail of languages for which such systems do not yet exist. State-of-the-art speech recognition systems feature Deep Neural Networks (DNNs). Being a data driven method and therefore highly dependent on sufficient training data, the lack of resources directly affects the recognition performance. There exist multiple techniques to deal with such resource constraint conditions, one approach is the use of additional data from other languages. In the past, is was demonstrated that multilingually trained systems benefit from adding language feature vectors (LFVs) to the input features, similar to i-Vectors. In this work, we extend this approach by the addition of articulatory features (AFs). We show that AFs also benefit from LFVs and that multilingual system setups benefit from adding both AFs and LFVs. Pretending English to be a low-resource language, we restricted ourselves to use only 10h of English acoustic training data. For system training, we use additional data from French, German and Turkish. By using a combination of AFs and LFVs, we were able to decrease the WER from 18.1% to 17.3% after system combination in our setup using a multilingual phone set.</abstract>
       <url hash="32be4ac6">2016.iwslt-1.9</url>
       <bibkey>muller-etal-2016-towards</bibkey>
@@ -107,14 +107,14 @@
       <author><first>Eunah</first><last>Cho</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
       <author><first>Thanh-Le</first><last>Ha</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <abstract>In this paper, we investigate a multilingual approach for speech disfluency removal. A major challenge of this task comes from the costly nature of disfluency annotation. Motivated by the fact that speech disfluencies are commonly observed throughout different languages, we investigate the potential of multilingual disfluency modeling. We suggest that learning a joint representation of the disfluencies in multiple languages can be a promising solution to the data sparsity issue. In this work, we utilize a multilingual neural machine translation system, where a disfluent speech transcript is directly transformed into a cleaned up text. Disfluency removal experiments on English and German speech transcripts show that multilingual disfluency modeling outperforms the single language systems. In a following experiment, we show that the improvements are also observed in a downstream application using the disfluency-removed transcripts as input.</abstract>
       <url hash="79d08aed">2016.iwslt-1.10</url>
       <bibkey>cho-etal-2016-multilingual</bibkey>
     </paper>
     <paper id="11">
       <title>A Neural Verb Lexicon Model with Source-side Syntactic Context for String-to-Tree Machine Translation</title>
-      <author><first>Maria</first><last>Nădejde</last></author>
+      <author id="maria-nadejde"><first>Maria</first><last>Nădejde</last></author>
       <author><first>Alexandra</first><last>Birch</last></author>
       <author><first>Philipp</first><last>Koehn</last></author>
       <abstract>String-to-tree MT systems translate verbs without lexical or syntactic context on the source side and with limited target-side context. The lack of context is one reason why verb translation recall is as low as 45.5%. We propose a verb lexicon model trained with a feed-forward neural network that predicts the target verb conditioned on a wide source-side context. We show that a syntactic context extracted from the dependency parse of the source sentence improves the model’s accuracy by 1.5% over a baseline trained on a window context. When used as an extra feature for re-ranking the n-best list produced by the string-to-tree MT system, the verb lexicon model improves verb translation recall by more than 7%.</abstract>
@@ -124,7 +124,7 @@
     <paper id="12">
       <title><fixed-case>M</fixed-case>icrosoft Speech Language Translation (<fixed-case>MSLT</fixed-case>) Corpus: The <fixed-case>IWSLT</fixed-case> 2016 release for <fixed-case>E</fixed-case>nglish, <fixed-case>F</fixed-case>rench and <fixed-case>G</fixed-case>erman</title>
       <author><first>Christian</first><last>Federmann</last></author>
-      <author><first>William D.</first><last>Lewis</last></author>
+      <author id="william-lewis"><first>William D.</first><last>Lewis</last></author>
       <abstract>We describe the Microsoft Speech Language Translation (MSLT) corpus, which was created in order to evaluate end-to-end conversational speech translation quality. The corpus was created from actual conversations over Skype, and we provide details on the recording setup and the different layers of associated text data. The corpus release includes Test and Dev sets with reference transcripts for speech recognition. Additionally, cleaned up transcripts and reference translations are available for evaluation of machine translation quality. The IWSLT 2016 release described here includes the source audio, raw transcripts, cleaned up transcripts, and translations to or from English for both French and German.</abstract>
       <url hash="66917ce2">2016.iwslt-1.12</url>
       <bibkey>federmann-lewis-2016-microsoft</bibkey>
@@ -133,7 +133,7 @@
       <title>Joint <fixed-case>ASR</fixed-case> and <fixed-case>MT</fixed-case> Features for Quality Estimation in Spoken Language Translation</title>
       <author><first>Ngoc-Tien</first><last>Le</last></author>
       <author><first>Benjamin</first><last>Lecouteux</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <abstract>This paper aims to unravel the automatic quality assessment for spoken language translation (SLT). More precisely, we propose several effective estimators based on our estimation of transcription (ASR) quality, translation (MT) quality, or both (combined and joint features using ASR and MT information). Our experiments provide an important opportunity to advance the understanding of the prediction quality of words in a SLT output that were revealed by MT and ASR features. These results could be applied to interactive speech translation or computer-assisted translation of speeches and lectures. For reproducible experiments, the code allowing to call our WCE-LIG application and the corpora used are made available to the research community.</abstract>
       <url hash="098bcb93">2016.iwslt-1.13</url>
       <bibkey>le-etal-2016-joint</bibkey>
@@ -150,15 +150,15 @@
     </paper>
     <paper id="15">
       <title><fixed-case>FBK</fixed-case>’s Neural Machine Translation Systems for <fixed-case>IWSLT</fixed-case> 2016</title>
-      <author><first>M. Amin</first><last>Farajian</last></author>
-      <author><first>Rajen</first><last>Chatterjee</last></author>
+      <author id="m-amin-farajian"><first>M. Amin</first><last>Farajian</last></author>
+      <author id="rajen-chatterjee"><first>Rajen</first><last>Chatterjee</last></author>
       <author><first>Costanza</first><last>Conforti</last></author>
       <author><first>Shahab</first><last>Jalalvand</last></author>
       <author><first>Vevake</first><last>Balaraman</last></author>
-      <author><first>Mattia A.</first><last>Di Gangi</last></author>
+      <author id="mattia-a-di-gangi"><first>Mattia A.</first><last>Di Gangi</last></author>
       <author><first>Duygu</first><last>Ataman</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marcello</first><last>Federico</last></author>
       <abstract>In this paper, we describe FBK’s neural machine translation (NMT) systems submitted at the International Workshop on Spoken Language Translation (IWSLT) 2016. The systems are based on the state-of-the-art NMT architecture that is equipped with a bi-directional encoder and an attention mechanism in the decoder. They leverage linguistic information such as lemmas and part-of-speech tags of the source words in the form of additional factors along with the words. We compare performances of word and subword NMT systems along with different optimizers. Further, we explore different ensemble techniques to leverage multiple models within the same and across different networks. Several reranking methods are also explored. Our submissions cover all directions of the MSLT task, as well as en-{de, fr} and {de, fr}-en directions of TED. Compared to previously published best results on the TED 2014 test set, our models achieve comparable results on en-de and surpass them on en-fr (+2 BLEU) and fr-en (+7.7 BLEU) language pairs.</abstract>
       <url hash="b1c7f09d">2016.iwslt-1.15</url>
@@ -171,7 +171,7 @@
       <author><first>Thanh-Le</first><last>Ha</last></author>
       <author><first>Matthias</first><last>Sperber</last></author>
       <author><first>Mohammed</first><last>Mediani</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <abstract>In this paper, we present the KIT systems of the IWSLT 2016 machine translation evaluation. We participated in the machine translation (MT) task as well as the spoken language language translation (SLT) track for English→German and German→English translation. We use attentional neural machine translation (NMT) for all our submissions. We investigated different methods to adapt the system using small in-domain data as well as methods to train the system on these small corpora. In addition, we investigated methods to combine NMT systems that encode the input as well as the output differently. We combine systems using different vocabularies, reverse translation systems, multi-source translation system. In addition, we used pre-translation systems that facilitate phrase-based machine translation systems. Results show that applying domain adaptation and ensemble technique brings a crucial improvement of 3-4 BLEU points over the baseline system. In addition, system combination using n-best lists yields further 1-2 BLEU points.</abstract>
       <url hash="9bca9a75">2016.iwslt-1.16</url>
       <bibkey>cho-etal-2016-adaptation</bibkey>
@@ -181,8 +181,8 @@
       <author><first>Wilfried</first><last>Michel</last></author>
       <author><first>Zoltán</first><last>Tüske</last></author>
       <author><first>M. Ali Basha</first><last>Shaik</last></author>
-      <author><first>Ralf</first><last>Schlüter</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="ralf-schlueter"><first>Ralf</first><last>Schlüter</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <abstract>In this paper the RWTH large vocabulary continuous speech recognition (LVCSR) systems developed for the IWSLT-2016 evaluation campaign are described. This evaluation campaign focuses on transcribing spontaneous speech from Skype recordings. State-of-the-art bidirectional long short-term memory (LSTM) and deep, multilingually boosted feed-forward neural network (FFNN) acoustic models are trained an narrow and broadband features. An open vocabulary approach using subword units is also considered. LSTM and count-based full word and hybrid backoff language modeling methods are used to model the morphological richness of the German language. All these approaches are combined using confusion network combination (CNC) to yield a competitive WER.</abstract>
       <url hash="4f48eb02">2016.iwslt-1.17</url>
       <bibkey>michel-etal-2016-rwth</bibkey>
@@ -192,7 +192,7 @@
       <author><first>Nadir</first><last>Durrani</last></author>
       <author><first>Fahim</first><last>Dalvi</last></author>
       <author><first>Hassan</first><last>Sajjad</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <abstract>This paper describes QCRI’s machine translation systems for the IWSLT 2016 evaluation campaign. We participated in the Arabic→English and English→Arabic tracks. We built both Phrase-based and Neural machine translation models, in an effort to probe whether the newly emerged NMT framework surpasses the traditional phrase-based systems in Arabic-English language pairs. We trained a very strong phrase-based system including, a big language model, the Operation Sequence Model, Neural Network Joint Model and Class-based models along with different domain adaptation techniques such as MML filtering, mixture modeling and using fine tuning over NNJM model. However, a Neural MT system, trained by stacking data from different genres through fine-tuning, and applying ensemble over 8 models, beat our very strong phrase-based system by a significant 2 BLEU points margin in Arabic→English direction. We did not obtain similar gains in the other direction but were still able to outperform the phrase-based system. We also applied system combination on phrase-based and NMT outputs.</abstract>
       <url hash="513281b9">2016.iwslt-1.18</url>
       <bibkey>durrani-etal-2016-qcris</bibkey>
@@ -214,8 +214,8 @@
       <author><first>Sonia</first><last>Pipa</last></author>
       <author><first>Alin Florentin</first><last>Vasile</last></author>
       <author><first>Ioana</first><last>Ionașcu</last></author>
-      <author><first>Stefan Daniel</first><last>Dumitrescu</last></author>
-      <author><first>Tiberiu</first><last>Boros</last></author>
+      <author id="stefan-daniel-dumitrescu"><first>Stefan Daniel</first><last>Dumitrescu</last></author>
+      <author id="tiberiu-boros"><first>Tiberiu</first><last>Boros</last></author>
       <abstract>Spoken Language Translation is currently a hot topic in the research community. This task is very complex, involving automatic speech recognition, text-normalization and machine translation. We present our speech translation system, which was compared against the other systems participating in the IWSLT 2016 Shared Task. We introduce our ASR system for English and our MT system for English to French (En-Fr) and English to German (En-De) language pairs. Additionally, for the English to French Challenge we introduce a methodology that enables the enhancement of statistical phrase-based translation with translation equivalents deduced from monolingual corpora using neural word embedding.</abstract>
       <url hash="99460abc">2016.iwslt-1.20</url>
       <bibkey>pipa-etal-2016-racai</bibkey>
@@ -227,7 +227,7 @@
       <author><first>Brian</first><last>Thompson</last></author>
       <author><first>Jonathan</first><last>Taylor</last></author>
       <author><first>Jeremy</first><last>Gwinnup</last></author>
-      <author><first>Timothy</first><last>Anderson</last></author>
+      <author id="tim-anderson"><first>Timothy</first><last>Anderson</last></author>
       <author><first>Grant</first><last>Erdmann</last></author>
       <author><first>Eric</first><last>Hansen</last></author>
       <author><first>Brian</first><last>Ore</last></author>
@@ -243,7 +243,7 @@
       <author><first>Andreas</first><last>Guta</last></author>
       <author><first>Nick</first><last>Rossenbach</last></author>
       <author><first>Miguel</first><last>Graça</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <abstract>This work describes the statistical machine translation (SMT) systems of RWTH Aachen University developed for the evaluation campaign of International Workshop on Spoken Language Translation (IWSLT) 2016. We have participated in the MT track for the German→English language pair employing our state-of-the-art phrase-based system, neural machine translation implementation and our joint translation and reordering decoder. Furthermore, we have applied feed-forward and recurrent neural language and translation models for reranking. The attention-based approach has been used for reranking the n-best lists for both phrasebased and hierarchical setups. On top of these systems, we make use of system combination to enhance the translation quality by combining individually trained systems.</abstract>
       <url hash="9a620a87">2016.iwslt-1.22</url>
       <bibkey>peter-etal-2016-rwth-aachen</bibkey>
@@ -255,17 +255,17 @@
       <author><first>Matthias</first><last>Sperber</last></author>
       <author><first>Thomas</first><last>Zenkel</last></author>
       <author><first>Kevin</first><last>Kilgour</last></author>
-      <author><first>Sebastian</first><last>Stüker</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="sebastian-stuker"><first>Sebastian</first><last>Stüker</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <abstract>This paper describes our German and English Speech-to-Text (STT) systems for the 2016 IWSLT evaluation campaign. The campaign focuses on the transcription of unsegmented TED talks. Our setup includes systems using both the Janus and Kaldi frameworks. We combined the outputs using both ROVER [1] and confusion network combination (CNC) [2] to archieve a good overall performance. The individual subsystems are built by using different speaker-adaptive feature combination (e.g., lMEL with i-vector or bottleneck speaker vector), acoustic models (GMM or DNN) and speaker adaption (MLLR or fMLLR). Decoding is performed in two stages, where the GMM and DNN systems are adapted on the combination of the first stage outputs using MLLR, and fMLLR. The combination setup produces a final hypothesis that has a significantly lower WER than any of the individual subsystems. For the English TED task, our best combination system has a WER of 7.8% on the development set while our other combinations gained 21.8% and 28.7% WERs for the English and German MSLT tasks.</abstract>
       <url hash="ef03fffe">2016.iwslt-1.23</url>
       <bibkey>nguyen-etal-2016-2016</bibkey>
     </paper>
     <paper id="24">
       <title><fixed-case>UFAL</fixed-case> Submissions to the <fixed-case>IWSLT</fixed-case> 2016 <fixed-case>MT</fixed-case> Track</title>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <author><first>Ondřej</first><last>Cífka</last></author>
-      <author><first>Jindřich</first><last>Helcl</last></author>
+      <author id="jindrich-helcl"><first>Jindřich</first><last>Helcl</last></author>
       <author><first>Tom</first><last>Kocmi</last></author>
       <author><first>Roman</first><last>Sudarikov</last></author>
       <abstract>We present our submissions to the IWSLT 2016 machine translation task, as our first attempt to translate subtitles and one of our early experiments with neural machine translation (NMT). We focus primarily on English→Czech translation direction but perform also basic adaptation experiments for NMT with German and also the reverse direction. Three MT systems are tested: (1) our Chimera, a tight combination of phrase-based MT and deep linguistic processing, (2) Neural Monkey, our implementation of a NMT system in TensorFlow and (3) Nematus, an established NMT system.</abstract>
diff --git a/data/xml/2016.jeptalnrecital.xml b/data/xml/2016.jeptalnrecital.xml
index cb92b74be8..a66e4af297 100644
--- a/data/xml/2016.jeptalnrecital.xml
+++ b/data/xml/2016.jeptalnrecital.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Actes de la conférence conjointe JEP-TALN-RECITAL 2016. volume 1 : JEP</booktitle>
       <editor><first>Laurence</first><last>Danlos</last></editor>
-      <editor><first>Thierry</first><last>Hamon</last></editor>
+      <editor id="thierry-hamon"><first>Thierry</first><last>Hamon</last></editor>
       <publisher>AFCP - ATALA</publisher>
       <address>Paris, France</address>
       <month>7</month>
@@ -75,7 +75,7 @@
       <title>Alignement de séquences phonétiques pour une analyse phonologique des erreurs de transcription automatique (Phonetic sequences alignment for a phonemic analysis of automatic speech transcription errors )</title>
       <language>fra</language>
       <author><first>Camille</first><last>Dutrey</last></author>
-      <author><first>Martine</first><last>Adda-Decker</last></author>
+      <author id="martine-adda-decker"><first>Martine</first><last>Adda-Decker</last></author>
       <author><first>Naomi</first><last>Yamaguchi</last></author>
       <pages>46–54</pages>
       <abstract>La transcription automatique de la parole obtient aujourd’hui des performances élevées avec des taux d’erreur qui tombent facilement en dessous de 10% pour une parole journalistique. Cependant, pour des conversations plus libres, ils stagnent souvent autour de 20–30%. En français, une grande partie des erreurs sont dues à des confusions entre homophones n’impliquant pas les niveaux acousticophonétique et phonologique. Cependant, de nombreuses erreurs peuvent s’expliquer par des variantes de productions non prévues par le système. Afin de mieux comprendre quels processus phonologiques pourraient expliquer ces variantes spécifiques de la parole spontanée, nous proposons une analyse des erreurs en comparant prononciations attendue (référence) et reconnue (hypothèse) via un alignement phonétique par programmation dynamique. Les distances locales entre paires de phonèmes appariés correspondent au nombre de traits phonétiques disjoints. Nos analyses permettent d’identifier les traits phonétiques les plus fréquemment impliqués dans les erreurs et donnent des pistes pour des interprétations phonologiques.</abstract>
@@ -107,8 +107,8 @@
       <author><first>Killian</first><last>Janod</last></author>
       <author><first>Mohamed</first><last>Morchid</last></author>
       <author><first>Richard</first><last>Dufour</last></author>
-      <author><first>Georges</first><last>Linarès</last></author>
-      <author><first>Renato</first><last>De Mori</last></author>
+      <author id="georges-linares"><first>Georges</first><last>Linarès</last></author>
+      <author id="renato-de-mori"><first>Renato</first><last>De Mori</last></author>
       <pages>73–81</pages>
       <abstract>Les représentations de documents au moyen d’approches à base de réseaux de neurones ont montré des améliorations significatives dans de nombreuses tâches du traitement du langage naturel. Dans le cadre d’applications réelles, où des conditions d’enregistrement difficiles peuvent être rencontrées, la transcription automatique de documents parlés peut générer un nombre de mots mal transcrits important. Cet article propose une représentation des documents parlés très bruités utilisant des caractéristiques apprises par un auto-encodeur profond supervisé. La méthode proposée s’appuie à la fois sur les documents bruités et leur équivalent propre annoté manuellement pour estimer une représentation plus robuste des documents bruités. Cette représentation est évaluée sur le corpus DECODA sur une tâche de classification thématique de conversations téléphoniques atteignant une précision de 83% avec un gain d’environ 6%.</abstract>
       <url hash="8d2dde63">2016.jeptalnrecital-jep.9</url>
@@ -166,7 +166,7 @@
     <paper id="14">
       <title>Cartopho : un site web de cartographie de variantes de prononciation en français (Cartopho: a website for mapping pronunciation variants in <fixed-case>F</fixed-case>rench)</title>
       <language>fra</language>
-      <author><first>Philippe</first><last>Boula de Mareüil</last></author>
+      <author id="philippe-boula-de-mareuil"><first>Philippe</first><last>Boula de Mareüil</last></author>
       <author><first>Jean-Philippe</first><last>Goldman</last></author>
       <author><first>Albert</first><last>Rilliard</last></author>
       <author><first>Yves</first><last>Scherrer</last></author>
@@ -181,7 +181,7 @@
       <language>fra</language>
       <author><first>Olivier</first><last>Galibert</last></author>
       <author><first>Juliette</first><last>Kahn</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <pages>128–136</pages>
       <abstract>Le travail que nous présentons ici s’inscrit dans le domaine de l’évaluation des systèmes de reconnaissance automatique de la parole en vue de leur utilisation dans une tâche aval, ici la reconnaissance des entités nommées. Plus largement, la question que nous nous posons est “que peut apporter une métrique d’évaluation en dehors d’un score ?". Nous nous intéressons particulièrement aux erreurs des systèmes et à leur analyse et éventuellement à l’utilisation de ce que nous connaissons de ces erreurs. Nous étudions dans ce travail les listes ordonnées d’erreurs générées à partir de différentes métriques et analysons ce qui en ressort. Nous avons appliqué la même méthode sur les sorties de différents systèmes de reconnaissance de la parole. Nos expériences mettent en évidence que certaines métriques apportent une information plus pertinente étant donné une tâche et transverse à différents systèmes.</abstract>
       <url hash="8c9ced7b">2016.jeptalnrecital-jep.15</url>
@@ -192,7 +192,7 @@
       <language>fra</language>
       <author><first>Jonathan</first><last>Chevelu</last></author>
       <author><first>Damien</first><last>Lolive</last></author>
-      <author><first>Sébastien Le</first><last>Maguer</last></author>
+      <author id="sebastien-le-maguer"><first>Sébastien Le</first><last>Maguer</last></author>
       <author><first>David</first><last>Guennec</last></author>
       <pages>137–145</pages>
       <abstract>En proposant une nouvelle approche de synthèse de la parole, les études comportent généralement une évaluation subjective d’échantillons acoustiques produits par un système de référence et un nouveau système. Ces échantillons sont produits à partir d’un petit ensemble de phrases choisies aléatoirement dans un unique domaine. Ainsi, statistiquement, des échantillons pratiquement identiques sont présentés et réduisent les écarts de mesure entre les systèmes, au risque de les considérer comme non significatifs. Pour éviter cette problématique méthodologique, nous comparons deux systèmes sur des milliers d’échantillons de différents domaines. L’évaluation est réalisée uniquement sur les paires d’échantillons les plus pertinentes, c’est-à-dire les plus différentes acoustiquement. Cette méthode est appliquée sur un système de synthèse de type HTS et un second par sélection d’unités. La comparaison avec l’approche classique montre que cette méthode révèle des écarts qui jusqu’alors n’étaient pas significatifs.</abstract>
@@ -241,7 +241,7 @@
       <author><first>Mohamed</first><last>Bouaziz</last></author>
       <author><first>Mohamed</first><last>Morchid</last></author>
       <author><first>Richard</first><last>Dufour</last></author>
-      <author><first>Georges</first><last>Linarès</last></author>
+      <author id="georges-linares"><first>Georges</first><last>Linarès</last></author>
       <author><first>Prosper</first><last>Correa</last></author>
       <pages>173–181</pages>
       <abstract>Cet article présente une méthode de prédiction de genres d’émissions télévisées couvrant 2 jours de diffusion de 4 chaînes TV françaises structurés en émissions annotées en genres. Ce travail traite des médias de masse de flux de chaînes télévisées et rejoint l’effort global d’extraction de connaissance à partir de cette grande quantité de données produites continuellement. Le corpus employé est fourni par l’entreprise EDD, anciennement appelée “L’Européenne de Données”, une entreprise spécialisée dans la gestion des flux multimédias. Les expériences détaillées dans cet article montrent qu’une approche simple fondée sur un modèle de n-grammes permet de prédire le genre d’une émission selon un historique avec une précision avoisinant les 50 %.</abstract>
@@ -254,7 +254,7 @@
       <author><first>Diane</first><last>Caussade</last></author>
       <author><first>Nathalie</first><last>Vallée</last></author>
       <author><first>Nathalie</first><last>Henrich Bernardoni</last></author>
-      <author><first>Jean-Marc</first><last>Colletta</last></author>
+      <author id="jean-marc-colletta"><first>Jean-Marc</first><last>Colletta</last></author>
       <author><first>Silvain</first><last>Gerber</last></author>
       <author><first>Frédérique</first><last>Letué</last></author>
       <author><first>Marie-José</first><last>Martinez</last></author>
@@ -373,8 +373,8 @@
       <language>fra</language>
       <author><first>Olivier</first><last>Galibert</last></author>
       <author><first>Nathalie</first><last>Camelin</last></author>
-      <author><first>Paul</first><last>Deléglise</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="paul-deleglise"><first>Paul</first><last>Deléglise</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <pages>274–282</pages>
       <abstract>Nous nous intéressons à l’évaluation de la qualité des systèmes de reconnaissance de la parole étant donné une tâche de compréhension. L’objectif de ce travail est de fournir un outil permettant la sélection d’un système de reconnaissance automatique de la parole le plus adapté pour un système de dialogue donné. Nous comparons ici différentes métriques, notamment le WER, NE-WER et ATENE métrique proposée récemment pour l’évaluation des systèmes de reconnaissance de la parole étant donné une tâche de reconnaissance d’entités nommées. Cette dernière métrique montrait une meilleure corrélation avec les résultats de la tâche globale que toutes les autres métriques testées. Nos mesures indiquent une très forte corrélation avec la mesure ATENE et une moins forte avec le WER.</abstract>
       <url hash="a8d1c498">2016.jeptalnrecital-jep.31</url>
@@ -426,7 +426,7 @@
       <title>Etude par <fixed-case>EMA</fixed-case> des mouvements de la mâchoire inférieure durant les consonnes de l’arabe marocain (<fixed-case>EMA</fixed-case> study of jaw movements during <fixed-case>M</fixed-case>oroccan <fixed-case>A</fixed-case>rabic consonants)</title>
       <language>fra</language>
       <author><first>Chakir</first><last>Zeroual</last></author>
-      <author><first>Philip</first><last>Hoole</last></author>
+      <author id="philip-hoole"><first>Philip</first><last>Hoole</last></author>
       <author><first>Adamantios</first><last>Gafos</last></author>
       <pages>319–327</pages>
       <abstract>Cette étude est basée sur des données obtenues à l’aide d’EMA (AG500) enregistrant les mouvements de la mâchoire inférieure (Minf) durant les consonnes labiales, coronales, vélaires, uvulaires, pharyngales et laryngales de l’arabe marocain dans les contextes aCa et iCi. Nous avons montré que l’implication de la Minf est cruciale durant /s S t T/ (S T : consonnes emphatiques). Le recul de la racine de la langue n’est pas nécessairement corrélé à la baisse de la Minf. Les consonnes apicales ne sont pas toujours associées à l’abaissement de la Minf. La Minf ne semble pas impliquée durant les laryngales et les pharyngales, ce qui est en accord avec les déductions de Goldstein (1995). Les mouvements verticaux et horizontaux de la Minf sont relativement indépendants.</abstract>
@@ -481,8 +481,8 @@
       <title>Fusion d’espaces de représentations multimodaux pour la reconnaissance du rôle du locuteur dans des documents télévisuels (Multimodal embedding fusion for robust speaker role recognition in video broadcast )</title>
       <language>fra</language>
       <author><first>Sebastien</first><last>Delecraz</last></author>
-      <author><first>Frederic</first><last>Bechet</last></author>
-      <author><first>Benoit</first><last>Favre</last></author>
+      <author id="frederic-bechet"><first>Frederic</first><last>Bechet</last></author>
+      <author id="benoit-favre"><first>Benoit</first><last>Favre</last></author>
       <author><first>Mickael</first><last>Rouvier</last></author>
       <pages>364–372</pages>
       <abstract>L’identification du rôle d’un locuteur dans des émissions de télévision est un problème de classification de personne selon une liste de rôles comme présentateur, journaliste, invité, etc. À cause de la nonsynchronie entre les modalités, ainsi que par le manque de corpus de vidéos annotées dans toutes les modalités, seulement une des modalités est souvent utilisée. Nous présentons dans cet article une fusion multimodale des espaces de représentations de l’audio, du texte et de l’image pour la reconnaissance du rôle du locuteur pour des données asynchrones. Les espaces de représentations monomodaux sont entraînés sur des corpus de données exogènes puis ajustés en utilisant des réseaux de neurones profonds sur un corpus d’émissions françaises pour notre tâche de classification. Les expériences réalisées sur le corpus de données REPERE ont mis en évidence les gains d’une fusion au niveau des espaces de représentations par rapport aux méthodes de fusion tardive standard.</abstract>
@@ -574,7 +574,7 @@
       <author><first>Emmanuel</first><last>Ferreira</last></author>
       <author><first>Alexandre</first><last>Reiffers-Masson</last></author>
       <author><first>Bassam</first><last>Jabaian</last></author>
-      <author><first>Fabrice</first><last>Lefèvre</last></author>
+      <author id="fabrice-lefevre"><first>Fabrice</first><last>Lefèvre</last></author>
       <pages>437–445</pages>
       <abstract>De nombreux modules de compréhension de la parole ont en commun d’être probabilistes et basés sur des algorithmes d’apprentissage automatique. Deux difficultés majeures, rencontrées par toutes les méthodes existantes sont : le coût de la collecte des données et l’adaptation d’un module existant à un nouveau domaine. Dans cet article, nous proposons un processus d’adaptation en ligne avec une politique apprise en utilisant un algorithme de type bandit contre un adversaire. Nous montrons que cette proposition peut permettre d’optimiser un équilibre entre le coût de la collecte des retours demandés aux utilisateurs et la performance globale de la compréhension du langage parlé après sa mise à jour.</abstract>
       <url hash="a808f821">2016.jeptalnrecital-jep.49</url>
@@ -583,7 +583,7 @@
     <paper id="50">
       <title>Patrons Rythmiques et Genres Littéraires en Synthèse de la Parole (How to improve rhythmic patterns according to literary genre in synthesized speech ⇤ )</title>
       <language>fra</language>
-      <author><first>Elisabeth</first><last>Delais-Roussarie</last></author>
+      <author id="elisabeth-delais-roussarie"><first>Elisabeth</first><last>Delais-Roussarie</last></author>
       <author><first>Damien</first><last>Lolive</last></author>
       <author><first>Hiyon</first><last>Yoo</last></author>
       <author><first>David</first><last>Guennec</last></author>
@@ -680,7 +680,7 @@
       <title>Pics mélodiques prétoniques en portugais brésilien : une étude quantitative (Pre-stress pitch peaks in <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese: a quantitative study)</title>
       <language>fra</language>
       <author><first>Plínio</first><last>Barbosa</last></author>
-      <author><first>Philippe Boula</first><last>de Mareüil</last></author>
+      <author id="philippe-boula-de-mareuil"><first>Philippe Boula</first><last>de Mareüil</last></author>
       <pages>527–535</pages>
       <abstract>Le présent travail porte sur un trait prosodique assez typique du portugais brésilien : un pic mélodique en position prétonique en fin d’énoncé déclaratif. Il vise à quantifier le phénomène, à partir d’enregistrements de cinq hommes et cinq femmes de l’état de São Paulo, en lecture et en narration. Il en résulte que des montées sur les prétoniques de 4 demi-tons suivies de descentes de 8 demi-tons, en moyenne, s’observent dans les deux styles de parole, chez les femmes. Chez les hommes, ces valeurs sont respectivement de 3 et 7 demi-tons. Ces montées-descentes d’une tierce et d’une quinte, respectivement, peuvent donner au portugais brésilien cette musicalité particulière et, puisque les descentes sont plus rapides chez les femmes, elles ouvrent des perspectives sociolinguistiques intéressantes.</abstract>
       <url hash="9467eace">2016.jeptalnrecital-jep.59</url>
@@ -704,7 +704,7 @@
       <author><first>Angélique</first><last>Amelot</last></author>
       <author><first>Grégoire</first><last>Bachman</last></author>
       <author><first>Catherine</first><last>Herrgott</last></author>
-      <author><first>Martine</first><last>Adda-Decker</last></author>
+      <author id="martine-adda-decker"><first>Martine</first><last>Adda-Decker</last></author>
       <author><first>Lise</first><last>Crevier-Buchman</last></author>
       <pages>545–553</pages>
       <abstract>Quelles sont les caractéristiques acoustiques et articulatoires des voyelles parlées et chantées du Cantu in Paghjella (polyphonie corse à trois voix), en fonction du chanteur, de la voyelle et de la fréquence fondamentale ? L’analyse acoustique des quatre premiers formants de la parole au chant et celle des mouvements articulatoires lingual et labial, montrent généralement (i) une significative augmentation de F1 avec abaissement lingual mais fermeture labiale, en lien avec une corrélation entre F0 et F1 ; (ii) une baisse de F2 pour les voyelles antérieures, une postériorisation linguale et un recul de l’ombre hyoïdienne uniquement pour le bassu ; (iii) une nette augmentation de F3 et F4 surtout chez le bassu ; (iv) une augmentation du Singing Power Ratio surtout chez les bassu et secunda. Ses valeurs sont toutefois inférieures à celles de chanteurs lyriques, et ne correspondant pas comme ces derniers à un rapprochement de F3 et F4.</abstract>
@@ -773,10 +773,10 @@
     <paper id="67">
       <title>Réalisation phonétique et contraste phonologique marginal : une étude automatique des voyelles du roumain (Phonetic realization and marginal phonemic contrast : an automatic study of the <fixed-case>R</fixed-case>omanian vowels)</title>
       <language>fra</language>
-      <author><first>Ioana</first><last>Vasilescu</last></author>
+      <author id="ioana-vasilescu"><first>Ioana</first><last>Vasilescu</last></author>
       <author><first>Margaret</first><last>Renwick</last></author>
       <author><first>Camille</first><last>Dutrey</last></author>
-      <author><first>Lori</first><last>Lamel</last></author>
+      <author id="lori-lamel"><first>Lori</first><last>Lamel</last></author>
       <author><first>Biana</first><last>Vieru</last></author>
       <pages>597–606</pages>
       <abstract>Cet article est dédié à l’analyse acoustique des voyelles du roumain : des productions en parole continue sont comparées à des prononciations “de laboratoire”. Les objectifs sont : (1) décrire les traits acoustiques des voyelles en fonction du style de parole ; (2) estimer la relation entre traits acoustiques et contrastes phonémiques de la langue ; (3) estimer dans quelle mesure l’étude de l’oral apporte des éclairages au sujet des attributs phonémiques des voyelles centrales [2] et [1], dont le statut (phonèmes vs allophones) est controversé. Nous montrons que les traits acoustiques sont comparables pour la parole journalistique vs contrôlée pour l’ensemble de l’inventaire sauf [2] et [1]. Dans la parole contrôlée [2] et [1] sont distinctes, mais confondues en faveur du timbre [2] à l’oral. La confusion de timbres n’est pas source d’inintelligibilité car [2] et [1] sont en distribution quasicomplémentaire. Ce résultat apporte des éclairages sur la question du contraste phonémique graduel et marginal (Goldsmith, 1995; Scobbie &amp; Stuart-Smith, 2008; Hall, 2013).</abstract>
@@ -821,7 +821,7 @@
       <title>Rôle des contextes lexical et post-lexical dans la réalisation du schwa : apports du traitement automatique de grands corpus (Role of lexical and post-lexical contexts in <fixed-case>F</fixed-case>rench schwa realisations : benefits of automatic processing of large corpora )</title>
       <language>fra</language>
       <author><first>Yaru</first><last>Wu</last></author>
-      <author><first>Martine</first><last>Adda-Decker</last></author>
+      <author id="martine-adda-decker"><first>Martine</first><last>Adda-Decker</last></author>
       <author><first>Cécile</first><last>Fougeron</last></author>
       <pages>633–641</pages>
       <abstract>Le rôle du contexte est connu dans la réalisation ou non du schwa en français. Deux grands corpus oraux de parole journalistique (ETAPE) et de parole familière (NCCFr), dans lesquels la realisation de schwa est déterminée à partir d’un alignement automatique, ont été utilisés pour examiner la contribution du contexte au sein du mot contenant schwa (lexical) vs. au travers de la frontière avec le mot précédent (post-lexical). Nos résultats montrent l’importance du contexte pré-frontière dans l’explication de la chute du schwa dans la première syllabe d’un mot polysyllabique en parole spontanée. Si le mot précédant se termine par une consonne, nous pouvons faire appel à la loi des trois consonnes et au principe de sonorité pour expliquer des différences de comportement en fonction de la nature des consonnes en contact.</abstract>
@@ -832,7 +832,7 @@
       <title>Des Réseaux de Neurones avec Mécanisme d’Attention pour la Compréhension de la Parole (Exploring the use of Attention-Based Recurrent Neural Networks For Spoken Language Understanding )</title>
       <language>fra</language>
       <author><first>Edwin</first><last>Simonnet</last></author>
-      <author><first>Paul</first><last>Deléglise</last></author>
+      <author id="paul-deleglise"><first>Paul</first><last>Deléglise</last></author>
       <author><first>Nathalie</first><last>Camelin</last></author>
       <author><first>Yannick</first><last>Estève</last></author>
       <pages>642–650</pages>
@@ -849,7 +849,7 @@
       <author><first>Richard</first><last>Dufour</last></author>
       <author><first>Killian</first><last>Janod</last></author>
       <author><first>Waad Ben</first><last>Kheder</last></author>
-      <author><first>Georges</first><last>Linarès</last></author>
+      <author id="georges-linares"><first>Georges</first><last>Linarès</last></author>
       <pages>651–659</pages>
       <abstract>Les applications de compréhension du langage parlé sont moins performantes si les documents transcrits automatiquement contiennent un taux d’erreur-mot élevé. Des solutions récentes proposent de projeter ces transcriptions dans un espace de thèmes, comme par exemple l’allocation latente de Dirichlet (LDA), la LDA supervisée ainsi que le modèle author-topic (AT). Une représentation compacte originale, appelée c-vector, a été récemment introduite afin de surmonter la difficulté liée au choix de la taille de ces espaces thématiques. Cette représentation améliore la robustesse aux erreurs de transcription, en compactant les différentes représentations LDA d’un document parlé dans un espace réduit. Le défaut majeur de cette méthode est le nombre élevé de sous-tâches nécessaires à la construction de l’espace c-vector. Cet article propose de corriger ce défaut en utilisant un cadre original fondé sur un espace de caractéristiques robustes de faible dimension provenant d’un ensemble de modèles AT considérant à la fois le contenu du dialogue parlé (les mots) et la classe du document. Les expérimentations, conduites sur le corpus DECODA, montrent que la représentation proposée permet un gain de plus de 2.5 points en termes de conversations correctement classifiées.</abstract>
       <url hash="76aae68d">2016.jeptalnrecital-jep.73</url>
@@ -907,7 +907,7 @@
       <title>Sur les traces acoustiques de /ʃ/ et /ç/ en allemand <fixed-case>L</fixed-case>2 (Acoustic tracing of /<fixed-case>S</fixed-case>/ and /ç/ in <fixed-case>G</fixed-case>erman <fixed-case>L</fixed-case>2)</title>
       <language>fra</language>
       <author><first>Jane</first><last>Wottawa</last></author>
-      <author><first>Martine</first><last>Adda-Decker</last></author>
+      <author id="martine-adda-decker"><first>Martine</first><last>Adda-Decker</last></author>
       <pages>696–704</pages>
       <abstract>Les apprenants français de l’allemand ont des difficultés à produire la fricative palatale sourde allemande /ç/ (Ich-Laut) et ont tendance à la remplacer par la fricative post-alvéolaire /S/. Nous nous demandons si avec des mesures acoustiques ces imprécisions de production peuvent être quantifiées d’une manière plus objective. Deux mesures acoustiques ont été examinées afin de distinguer au mieux /S/ et /ç/ dans un contexte VC en position finale de mot dans des productions de locuteurs germanophones natifs. Elles servent ensuite à quantifier les difficultés de production des apprenants français. 285 tokens de 20 locuteurs natifs et 20 locuteurs L2 ont été analysés. Les mesures appliquées sont le centre de gravité spectral et des rapports d’intensité par bande de fréquence. Sur les productions de locuteurs natifs, les résultats montrent que la mesure la plus fiable pour distinguer acoustiquement /S/ et /ç/ est le ratio d’intensité entre fréquences hautes (4-7 kHz) et basses (1-4 kHz). Les mesures confirment également les difficultés de production des locuteurs natifs français.</abstract>
       <url hash="977a3ba6">2016.jeptalnrecital-jep.78</url>
@@ -927,7 +927,7 @@
     <paper id="80">
       <title>De l’utilisation de descripteurs issus de la linguistique computationnelle dans le cadre de la synthèse par <fixed-case>HMM</fixed-case> (Toward the use of information density based descriptive features in <fixed-case>HMM</fixed-case> based speech synthesis)</title>
       <language>fra</language>
-      <author><first>Sébastien Le</first><last>Maguer</last></author>
+      <author id="sebastien-le-maguer"><first>Sébastien Le</first><last>Maguer</last></author>
       <author><first>Bernd</first><last>Moebius</last></author>
       <author><first>Ingmar</first><last>Steiner</last></author>
       <author><first>Damien</first><last>Lolive</last></author>
@@ -944,7 +944,7 @@
       <author><first>Nathalie</first><last>Camelin</last></author>
       <author><first>Camille</first><last>Dutrey</last></author>
       <author><first>Fabian</first><last>Santiago</last></author>
-      <author><first>Martine</first><last>Adda-Decker</last></author>
+      <author id="martine-adda-decker"><first>Martine</first><last>Adda-Decker</last></author>
       <pages>723–731</pages>
       <abstract>Récemment, l’utilisation des représentations continues de mots a connu beaucoup de succès dans plusieurs tâches de traitement du langage naturel. Dans cet article, nous proposons d’étudier leur utilisation dans une architecture neuronale pour la tâche de détection des erreurs au sein de transcriptions automatiques de la parole. Nous avons également expérimenté et évalué l’utilisation de paramètres prosodiques en suppléments des paramètres classiques (lexicaux, syntaxiques, . . .). La principale contribution de cet article porte sur la combinaison de différentes représentations continues de mots : plusieurs approches de combinaison sont proposées et évaluées afin de tirer profit de leurs complémentarités. Les expériences sont effectuées sur des transcriptions automatiques du corpus ETAPE générées par le système de reconnaissance automatique du LIUM. Les résultats obtenus sont meilleurs que ceux d’un système état de l’art basé sur les champs aléatoires conditionnels. Pour terminer, nous montrons que la mesure de confiance produite est particulièrement bien calibrée selon une évaluation en terme d’Entropie Croisée Normalisée (NCE).</abstract>
       <url hash="95afa30a">2016.jeptalnrecital-jep.81</url>
@@ -1009,7 +1009,7 @@
     <meta>
       <booktitle>Actes de la conférence conjointe JEP-TALN-RECITAL 2016. volume 2 : TALN (Articles longs)</booktitle>
       <editor><first>Laurence</first><last>Danlos</last></editor>
-      <editor><first>Thierry</first><last>Hamon</last></editor>
+      <editor id="thierry-hamon"><first>Thierry</first><last>Hamon</last></editor>
       <publisher>AFCP - ATALA</publisher>
       <address>Paris, France</address>
       <month>7</month>
@@ -1036,8 +1036,8 @@
       <title><fixed-case>B</fixed-case>leu, contusion, ecchymose : tri automatique de synonymes en fonction de leur difficulté de lecture et compréhension (Automatic ranking of synonyms according to their reading and comprehension difficulty)</title>
       <language>fra</language>
       <author><first>Thomas</first><last>Francois</last></author>
-      <author><first>Mokhtar B.</first><last>Billami</last></author>
-      <author><first>Núria</first><last>Gala</last></author>
+      <author id="mokhtar-b-billami"><first>Mokhtar B.</first><last>Billami</last></author>
+      <author id="nuria-gala"><first>Núria</first><last>Gala</last></author>
       <author><first>Delphine</first><last>Bernhard</last></author>
       <pages>15–28</pages>
       <abstract>La lisibilité d’un texte dépend fortement de la difficulté des unités lexicales qui le composent. La simplification lexicale vise ainsi à remplacer les termes complexes par des équivalents sémantiques plus simples à comprendre : par exemple, BLEU (‘résultat d’un choc’) est plus simple que CONTUSION ou ECCHYMOSE. Il est pour cela nécessaire de disposer de ressources qui listent des synonymes pour des sens donnés et les trient par ordre de difficulté. Cet article décrit une méthode pour constituer une ressource de ce type pour le français. Les listes de synonymes sont extraites de BabelNet et de JeuxDeMots, puis triées grâce à un algorithme statistique d’ordonnancement. Les résultats du tri sont évalués par rapport à 36 listes de synonymes ordonnées manuellement par quarante annotateurs.</abstract>
@@ -1059,7 +1059,7 @@
       <title>Construire un lexique de sentiments par crowdsourcing et propagation (Building a sentiment lexicon through crowdsourcing and spreading)</title>
       <language>fra</language>
       <author><first>Mathieu</first><last>Lafourcade</last></author>
-      <author><first>Nathalie Le</first><last>Brun</last></author>
+      <author id="nathalie-le-brun"><first>Nathalie Le</first><last>Brun</last></author>
       <author><first>Alain</first><last>Joubert</last></author>
       <pages>43–56</pages>
       <abstract>Cet article présente une méthode de construction d’une ressource lexicale de sentiments/émotions. Son originalité est d’associer le crowdsourcing via un GWAP (Game With A Purpose) à un algorithme de propagation, les deux ayant pour support et source de données le réseau lexical JeuxDeMots. Nous décrivons le jeu permettant de collecter des informations de sentiments, ainsi que les principes et hypothèses qui sous-tendent le fonctionnement de l’algorithme qui les propage au sein du réseau. Enfin, nous donnons les résultats quantitatifs et expliquons les méthodes d’évaluation qualitative des données obtenues, à la fois par le jeu et par la propagation par l’algorithme. Ces méthodes incluent une comparaison avec Emolex, une autre ressource de sentiments/émotions.</abstract>
@@ -1069,9 +1069,9 @@
     <paper id="5">
       <title>Détection de concepts pertinents pour le résumé automatique de conversations par recombinaison de patrons (Relevant concepts detection for the automatic summary of conversations using patterns recombination )</title>
       <language>fra</language>
-      <author><first>Jérémy</first><last>Trione</last></author>
-      <author><first>Benoit</first><last>Favre</last></author>
-      <author><first>Frederic</first><last>Bechet</last></author>
+      <author id="jeremy-trione"><first>Jérémy</first><last>Trione</last></author>
+      <author id="benoit-favre"><first>Benoit</first><last>Favre</last></author>
+      <author id="frederic-bechet"><first>Frederic</first><last>Bechet</last></author>
       <pages>57–69</pages>
       <abstract>automatique de conversations par recombinaison de patrons Jérémy Trione Benoit Favre Frédéric Béchet Aix-Marseille Université, CNRS, LIF UMR 7279, 13000, Marseille, France prénom.nom@lif.univ-mrs.fr R ÉSUMÉ Ce papier décrit une approche pour créer des résumés de conversations parlées par remplissage de patrons. Les patrons sont générés automatiquement à partir de fragments généralisés depuis un corpus de résumés d’apprentissage. Les informations nécessaires pour remplir les patrons sont détectées dans les transcriptions des conversations et utilisées pour sélectionner les fragments candidats. L’approche obtient un score ROUGE-2 de 0.116 sur le corpus RATP-DECODA. Les résultats obtenus montrent que cette approche abstractive est plus performante que les approches extractives utilisées habituellement dans le domaine du résumé automatique.</abstract>
       <url hash="b2ec870c">2016.jeptalnrecital-long.5</url>
@@ -1095,7 +1095,7 @@
       <language>fra</language>
       <author><first>Nasredine</first><last>Semmar</last></author>
       <author><first>Othman</first><last>Zennaki</last></author>
-      <author><first>Meriama</first><last>Laib</last></author>
+      <author id="meriama-laib"><first>Meriama</first><last>Laib</last></author>
       <pages>84–97</pages>
       <abstract>La traduction automatique statistique bien que performante est aujourd’hui limitée parce qu’elle nécessite de gros volumes de corpus parallèles qui n’existent pas pour tous les couples de langues et toutes les spécialités et que leur production est lente et coûteuse. Nous présentons, dans cet article, un prototype d’un moteur de traduction à base d’exemples utilisant la recherche d’information interlingue et ne nécessitant qu’un corpus de textes en langue cible. Plus particulièrement, nous proposons d’étudier l’impact d’un lexique bilingue de spécialité sur la performance de ce prototype. Nous évaluons ce prototype de traduction et comparons ses résultats à ceux du système de traduction statistique Moses en utilisant les corpus parallèles anglais-français Europarl (European Parliament Proceedings) et Emea (European Medicines Agency Documents). Les résultats obtenus montrent que le score BLEU du prototype du moteur de traduction à base d’exemples est proche de celui du système Moses sur des documents issus du corpus Europarl et meilleur sur des documents extraits du corpus Emea.</abstract>
       <url hash="4b9a74ba">2016.jeptalnrecital-long.7</url>
@@ -1116,7 +1116,7 @@
       <language>fra</language>
       <author><first>Vincent</first><last>Letard</last></author>
       <author><first>Gabriel</first><last>Illouz</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <pages>112–124</pages>
       <abstract>Cet article examine l’utilisation du raisonnement analogique dans le contexte de l’apprentissage incrémental. Le problème d’apprentissage sous-jacent développé est le transfert de requêtes formulées en langue naturelle vers des commandes dans un langage de programmation. Nous y explorons deux questions principales : Comment se comporte le raisonnement par analogie dans le contexte de l’apprentissage incrémental ? De quelle manière la séquence d’apprentissage influence-t-elle la performance globale ? Pour y répondre, nous proposons un protocole expérimental simulant deux utilisateurs et différentes séquences d’apprentissage. Nous montrons que l’ordre dans la séquence d’apprentissage incrémental n’a d’influence notable que sous des conditions spécifiques. Nous constatons également la complémentarité de l’apprentissage incrémental avec l’analogie pour un nombre d’exemples d’apprentissage minimal.</abstract>
       <url hash="1bd02ab5">2016.jeptalnrecital-long.9</url>
@@ -1135,8 +1135,8 @@
     <paper id="11">
       <title>Évaluation dune nouvelle structuration thématique hiérarchique des textes dans un cadre de résumé automatique et de détection d’ancres au sein de vidéos (Evaluation of a novel hierarchical thematic structuring of texts in the framework of text summarization and anchor detection for video hyperlinking )</title>
       <language>fra</language>
-      <author><first>Anca</first><last>Simon</last></author>
-      <author><first>Guillaume</first><last>Gravier</last></author>
+      <author id="anca-roxana-simon"><first>Anca</first><last>Simon</last></author>
+      <author id="guillaume-gravier"><first>Guillaume</first><last>Gravier</last></author>
       <author><first>Pascale</first><last>Sébillot</last></author>
       <pages>139–152</pages>
       <abstract>automatique et de détection d’ancres au sein de vidéos Anca Simon1 Guillaume Gravier2 Pascale Sébillot3 (1) Université de Rennes 1, IRISA &amp; INRIA Rennes, Campus de Beaulieu, 35042 Rennes, France (2) CNRS, IRISA &amp; INRIA Rennes, Campus de Beaulieu, 35042 Rennes, France (3) INSA, IRISA &amp; INRIA Rennes, Campus de Beaulieu, 35042 Rennes, France anca.simon@irisa.fr, guillaume.gravier@irisa.fr, pascale.sebillot@irisa.fr R ÉSUMÉ Dans cet article, nous évaluons, à travers son intérêt pour le résumé automatique et la détection d’ancres dans des vidéos, le potentiel d’une nouvelle structure thématique extraite de données textuelles, composée d’une hiérarchie de fragments thématiquement focalisés. Cette structure est produite par un algorithme exploitant les distributions temporelles d’apparition des mots dans les textes en se fondant sur une analyse de salves lexicales. La hiérarchie obtenue a pour objet de filtrer le contenu non crucial et de ne conserver que l’information saillante des textes, à différents niveaux de détail. Nous montrons qu’elle permet d’améliorer la production de résumés ou au moins de maintenir les résultats de l’état de l’art, tandis que pour la détection d’ancres, elle nous conduit à la meilleure précision dans le contexte de la tâche Search and Anchoring in Video Archives à MediaEval. Les expériences sont réalisées sur du texte écrit et sur un corpus de transcriptions automatiques d’émissions de télévision.</abstract>
@@ -1169,7 +1169,7 @@
       <language>fra</language>
       <author><first>Alexis</first><last>Linard</last></author>
       <author><first>Emmanuel</first><last>Morin</last></author>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <pages>180–193</pages>
       <abstract>L’extraction de lexiques bilingues à partir de corpus comparables se réalise traditionnellement en s’appuyant sur deux langues. Des travaux précédents en extraction de lexiques bilingues à partir de corpus parallèles ont démontré que l’utilisation de plus de deux langues peut être utile pour améliorer la qualité des alignements extraits. Nos travaux montrent qu’il est possible d’utiliser la même stratégie pour des corpus comparables. Nous avons défini deux méthodes originales impliquant des langues pivots et nous les avons évaluées sur quatre langues et deux langues pivots en particulier. Nos expérimentations ont montré que lorsque l’alignement entre la langue source et la langue pivot est de bonne qualité, l’extraction du lexique en langue cible s’en trouve améliorée.</abstract>
       <url hash="514cde1c">2016.jeptalnrecital-long.14</url>
@@ -1205,7 +1205,7 @@
       <author><first>Anaïs</first><last>Tack</last></author>
       <author><first>Thomas</first><last>François</last></author>
       <author><first>Anne-Laure</first><last>Ligozat</last></author>
-      <author><first>Cédrick</first><last>Fairon</last></author>
+      <author id="cedrick-fairon"><first>Cédrick</first><last>Fairon</last></author>
       <pages>221–234</pages>
       <abstract>Cette étude examine l’utilisation de méthodes d’apprentissage incrémental supervisé afin de prédire la compétence lexicale d’apprenants de français langue étrangère (FLE). Les apprenants ciblés sont des néerlandophones ayant un niveau A2/B1 selon le Cadre européen commun de référence pour les langues (CECR). À l’instar des travaux récents portant sur la prédiction de la maîtrise lexicale à l’aide d’indices de complexité, nous élaborons deux types de modèles qui s’adaptent en fonction d’un retour d’expérience, révélant les connaissances de l’apprenant. En particulier, nous définissons (i) un modèle qui prédit la compétence lexicale de tous les apprenants du même niveau de maîtrise et (ii) un modèle qui prédit la compétence lexicale d’un apprenant individuel. Les modèles obtenus sont ensuite évalués par rapport à un modèle de référence déterminant la compétence lexicale à partir d’un lexique spécialisé pour le FLE et s’avèrent gagner significativement en exactitude (9%-17%).</abstract>
       <url hash="6dcf088f">2016.jeptalnrecital-long.17</url>
@@ -1216,7 +1216,7 @@
       <language>fra</language>
       <author><first>Adrien</first><last>Bougouin</last></author>
       <author><first>Florian</first><last>Boudin</last></author>
-      <author><first>Beatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Beatrice</first><last>Daille</last></author>
       <pages>235–247</pages>
       <abstract>Dans cet article, nous nous intéressons à l’indexation de documents de domaines de spécialité par l’intermédiaire de leurs termes-clés. Plus particulièrement, nous nous intéressons à l’indexation telle qu’elle est réalisée par les documentalistes de bibliothèques numériques. Après analyse de la méthodologie de ces indexeurs professionnels, nous proposons une méthode à base de graphe combinant les informations présentes dans le document et la connaissance du domaine pour réaliser une indexation (hybride) libre et contrôlée. Notre méthode permet de proposer des termes-clés ne se trouvant pas nécessairement dans le document. Nos expériences montrent aussi que notre méthode surpasse significativement l’approche à base de graphe état de l’art.</abstract>
       <url hash="a530d2e9">2016.jeptalnrecital-long.18</url>
@@ -1237,7 +1237,7 @@
       <title>Prédiction automatique de fonctions pragmatiques dans les reformulations (Automatic prediction of pragmatic functions in reformulations)</title>
       <language>fra</language>
       <author><first>Natalia</first><last>Grabar</last></author>
-      <author><first>Iris</first><last>Eshkol-Taravella</last></author>
+      <author id="iris-eshkol"><first>Iris</first><last>Eshkol-Taravella</last></author>
       <pages>262–275</pages>
       <abstract>La reformulation participe à la structuration du discours, notamment dans le cas des dialogues, et contribue également à la dynamique du discours. Reformuler est un acte significatif qui poursuit des objectifs précis. L’objectif de notre travail est de prédire automatiquement la raison pour laquelle un locuteur effectue une reformulation. Nous utilisons une classification de onze fonctions pragmatiques inspirées des travaux existants et des données analysées. Les données de référence sont issues d’annotations manuelles et consensuelles des reformulations spontanées formées autour de trois marqueurs (c’est-à-dire, je veux dire, disons). Les données proviennent d’un corpus oral et d’un corpus de discussions sur les forums de santé. Nous exploitons des algorithmes de catégorisation supervisée et un ensemble de plusieurs descripteurs (syntaxiques, formels, sémantiques et discursifs) pour prédire les catégories de reformulation. La distribution des énoncés et phrases selon les catégories n’est pas homogène. Les expériences sont positionnées à deux niveaux : générique et spécifique. Nos résultats indiquent qu’il est plus facile de prédire les types de fonctions au niveau générique (la moyenne des F-mesures est autour de 0,80), qu’au niveau des catégories individuelles (la moyenne des F-mesures est autour de 0,40). L’influence de différents paramètres est étudiée.</abstract>
       <url hash="8d07a065">2016.jeptalnrecital-long.20</url>
@@ -1248,7 +1248,7 @@
       <language>fra</language>
       <author><first>Othman</first><last>Zennaki</last></author>
       <author><first>Nasredine</first><last>Semmar</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <pages>276–289</pages>
       <abstract>Nos travaux portent sur la construction rapide d’outils d’analyse linguistique pour des langues peu dotées en ressources. Dans une précédente contribution, nous avons proposé une méthode pour la construction automatique d’un analyseur morpho-syntaxique via une projection interlingue d’annotations linguistiques à partir de corpus parallèles (méthode fondée sur les réseaux de neurones récurrents). Nous présentons, dans cet article, une amélioration de notre modèle neuronal, avec la prise en compte d’informations linguistiques externes pour un annotateur plus complexe. En particulier, nous proposons d’intégrer des annotations morpho-syntaxiques dans notre architecture neuronale pour l’apprentissage non supervisé d’annotateurs sémantiques multilingues à gros grain (annotation en SuperSenses). Nous montrons la validité de notre méthode et sa généricité sur l’italien et le français et étudions aussi l’impact de la qualité du corpus parallèle sur notre approche (généré par traduction manuelle ou automatique). Nos expériences portent sur la projection d’annotations de l’anglais vers le français et l’italien.</abstract>
       <url hash="5fe2ddac">2016.jeptalnrecital-long.21</url>
@@ -1269,10 +1269,10 @@
     <paper id="23">
       <title><fixed-case>W</fixed-case>ord2<fixed-case>V</fixed-case>ec vs <fixed-case>DB</fixed-case>nary ou comment (ré)concilier représentations distribuées et réseaux lexico-sémantiques ? Le cas de l’évaluation en traduction automatique (<fixed-case>W</fixed-case>ord2<fixed-case>V</fixed-case>ec vs <fixed-case>DB</fixed-case>nary or how to bring back together vector representations and lexical resources ? A case study for machine translation evaluation)</title>
       <language>fra</language>
-      <author><first>Christophe</first><last>Servan</last></author>
+      <author id="christophe-servan"><first>Christophe</first><last>Servan</last></author>
       <author><first>Zied</first><last>Elloumi</last></author>
-      <author><first>Hervé</first><last>Blanchon</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="herve-blanchon"><first>Hervé</first><last>Blanchon</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <pages>304–317</pages>
       <abstract>Cet article présente une approche associant réseaux lexico-sémantiques et représentations distribuées de mots appliquée à l’évaluation de la traduction automatique. Cette étude est faite à travers l’enrichissement d’une métrique bien connue pour évaluer la traduction automatique (TA) : METEOR. METEOR permet un appariement approché (similarité morphologique ou synonymie) entre une sortie de système automatique et une traduction de référence. Nos expérimentations s’appuient sur la tâche Metrics de la campagne d’évaluation WMT 2014 et montrent que les représentations distribuées restent moins performantes que les ressources lexico-sémantiques pour l’évaluation en TA mais peuvent néammoins apporter un complément d’information intéressant à ces dernières.</abstract>
       <url hash="15d533fc">2016.jeptalnrecital-long.23</url>
@@ -1283,7 +1283,7 @@
     <meta>
       <booktitle>Actes de la conférence conjointe JEP-TALN-RECITAL 2016. volume 2 : TALN (Posters)</booktitle>
       <editor><first>Laurence</first><last>Danlos</last></editor>
-      <editor><first>Thierry</first><last>Hamon</last></editor>
+      <editor id="thierry-hamon"><first>Thierry</first><last>Hamon</last></editor>
       <publisher>AFCP - ATALA</publisher>
       <address>Paris, France</address>
       <month>7</month>
@@ -1298,7 +1298,7 @@
       <title>Amélioration de la traduction automatique d’un corpus annoté (Improvement of the automatic translation of an annotated corpus)</title>
       <language>fra</language>
       <author><first>Marwa Hadj</first><last>Salah</last></author>
-      <author><first>Hervé</first><last>Blanchon</last></author>
+      <author id="herve-blanchon"><first>Hervé</first><last>Blanchon</last></author>
       <author><first>Mounir</first><last>Zrigui</last></author>
       <author><first>Didier</first><last>Schwab</last></author>
       <pages>318–324</pages>
@@ -1310,7 +1310,7 @@
       <title>Analyse d’une tâche de substitution lexicale : quelles sont les sources de difficulté ? (Difficulty analysis for a lexical substitution task)</title>
       <language>fra</language>
       <author><first>Ludovic</first><last>Tanguy</last></author>
-      <author><first>Cécile</first><last>Fabre</last></author>
+      <author id="cecile-fabre"><first>Cécile</first><last>Fabre</last></author>
       <author><first>Camille</first><last>Mercier</last></author>
       <pages>325–332</pages>
       <abstract>Nous proposons dans cet article une analyse des résultats de la campagne SemDis 2014 qui proposait une tâche de substitution lexicale en français. Pour les 300 phrases du jeu de test, des annotateurs ont proposé des substituts à un mot cible, permettant ainsi d’établir un gold standard sur lequel les systèmes participants ont été évalués. Nous cherchons à identifier les principales caractéristiques des items du jeu de test qui peuvent expliquer les variations de performance pour les humains comme pour les systèmes, en nous basant sur l’accord inter-annotateurs des premiers et les scores de rappel des seconds. Nous montrons que si plusieurs caractéristiques communes sont associées aux deux types de difficulté (rareté du sens dans lequel le mot-cible est employé, fréquence d’emploi du mot-cible), d’autres sont spécifiques aux systèmes (degré de polysémie du mot-cible, complexité syntaxique).</abstract>
@@ -1320,7 +1320,7 @@
     <paper id="3">
       <title>L’anti-correcteur : outil d’évaluation positive de l’orthographe et de la grammaire (The ”anticorrecteur”: a positive evaluation module for spell and grammar checking)</title>
       <language>fra</language>
-      <author><first>Lydia-Mai</first><last>Ho-Dac</last></author>
+      <author id="lydia-mai-ho-dac"><first>Lydia-Mai</first><last>Ho-Dac</last></author>
       <author><first>Sophie</first><last>Muller</last></author>
       <author><first>Valentine</first><last>Delbar</last></author>
       <pages>333–341</pages>
@@ -1342,7 +1342,7 @@
     <paper id="5">
       <title>Approximate unsupervised summary optimisation for selections of <fixed-case>ROUGE</fixed-case></title>
       <author><first>Natalie</first><last>Schluter</last></author>
-      <author><first>Héctor</first><last>Martínez Alonso</last></author>
+      <author id="hector-martinez-alonso"><first>Héctor</first><last>Martínez Alonso</last></author>
       <pages>349–354</pages>
       <abstract>Approximate summary optimisation for selections of ROUGE It is standard to measure automatic summariser performance using the ROUGE metric. Unfortunately, ROUGE is not appropriate for unsupervised summarisation approaches. On the other hand, we show that it is possible to optimise approximately for ROUGE-n by using a document-weighted ROUGE objective. Doing so results in state-of-the-art summariser performance for single and multiple document summaries for both English and French. This is despite a non-correlation of the documentweighted ROUGE metric with human judgments, unlike the original ROUGE metric. These findings suggest a theoretical approximation link between the two metrics.</abstract>
       <url hash="21c32400">2016.jeptalnrecital-poster.5</url>
@@ -1352,7 +1352,7 @@
       <title>L’architecture d’un modèle hybride pour la normalisation de <fixed-case>SMS</fixed-case> (A hybrid model architecture for <fixed-case>SMS</fixed-case> normalization)</title>
       <language>fra</language>
       <author><first>Eleni</first><last>Kogkitsidou</last></author>
-      <author><first>Georges</first><last>Antoniadis</last></author>
+      <author id="georges-antoniadis"><first>Georges</first><last>Antoniadis</last></author>
       <pages>355–363</pages>
       <abstract>La communication par SMS (Short Message Service), aussi bien que tout autre type de communication virtuelle sous forme de textes courts (mails, microblogs, tweets, etc.), présente certaines particularités spécifiques (syntaxe irrégulière, fusionnement et phonétisation de mots, formes abrégées, etc.). A cause de ces caractéristiques, l’application d’outils en Traitement Automatique du Langage (TAL) rend difficile l’exploitation d’informations utiles contenues dans des messages bruités. Nous proposons un modèle de normalisation en deux étapes fondé sur une approche symbolique et statistique. La première partie vise à produire une représentation intermédiaire du message SMS par l’application des grammaires locales, tandis que la deuxième utilise un système de traduction automatique à base de règles pour convertir la représentation intermédiaire vers une forme standard.</abstract>
       <url hash="8b533f14">2016.jeptalnrecital-poster.6</url>
@@ -1361,7 +1361,7 @@
     <paper id="7">
       <title>Une catégorisation de fins de lignes non-supervisée (End-of-line classification with no supervision)</title>
       <language>fra</language>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <author><first>Cyril</first><last>Grouin</last></author>
       <author><first>Thomas</first><last>Lavergne</last></author>
       <pages>364–371</pages>
@@ -1375,7 +1375,7 @@
       <author><first>Adeline</first><last>Müller</last></author>
       <author><first>Thomas</first><last>Francois</last></author>
       <author><first>Sophie</first><last>Roekhaut</last></author>
-      <author><first>Cedrick</first><last>Fairon</last></author>
+      <author id="cedrick-fairon"><first>Cedrick</first><last>Fairon</last></author>
       <pages>372–380</pages>
       <abstract>Cet article présente une approche visant à évaluer automatiquement la difficulté de dictées en vue de les intégrer dans une plateforme d’apprentissage de l’orthographe. La particularité de l’exercice de la dictée est de devoir percevoir du code oral et de le retranscrire via le code écrit. Nous envisageons ce double niveau de difficulté à l’aide de 375 variables mesurant la difficulté de compréhension d’un texte ainsi que les phénomènes orthographiques et grammaticaux complexes qu’il contient. Un sous-ensemble optimal de ces variables est combiné à l’aide d’un modèle par machines à vecteurs de support (SVM) qui classe correctement 56% des textes. Les variables lexicales basées sur la liste orthographique de Catach (1984) se révèlent les plus informatives pour le modèle.</abstract>
       <url hash="1065e5ce">2016.jeptalnrecital-poster.8</url>
@@ -1395,7 +1395,7 @@
       <title>Comparing Named-Entity Recognizers in a Targeted Domain: Handcrafted Rules vs Machine Learning</title>
       <author><first>Ioannis</first><last>Partalas</last></author>
       <author><first>Cédric</first><last>Lopez</last></author>
-      <author><first>Frédérique</first><last>Segond</last></author>
+      <author id="frederique-segond"><first>Frédérique</first><last>Segond</last></author>
       <pages>389–395</pages>
       <abstract>Comparing Named-Entity Recognizers in a Targeted Domain : Handcrafted Rules vs. Machine Learning Named-Entity Recognition concerns the classification of textual objects in a predefined set of categories such as persons, organizations, and localizations. While Named-Entity Recognition is well studied since 20 years, the application to specialized domains still poses challenges for current systems. We developed a rule-based system and two machine learning approaches to tackle the same task : recognition of product names, brand names, etc., in the domain of Cosmetics, for French. Our systems can thus be compared under ideal conditions. In this paper, we introduce both systems and we compare them.</abstract>
       <url hash="a5c636c2">2016.jeptalnrecital-poster.10</url>
@@ -1427,7 +1427,7 @@
     <paper id="13">
       <title>Description de la juxtaposition en Langue des Signes Française à partir d’une grammaire récursive (The present communication tackles formal grammar developpement of <fixed-case>F</fixed-case>rench <fixed-case>S</fixed-case>ign <fixed-case>L</fixed-case>anguage (<fixed-case>LSF</fixed-case>))</title>
       <language>fra</language>
-      <author><first>Mohamed Nassime</first><last>Hadjadj</last></author>
+      <author id="mohamed-nassime-hadjadj"><first>Mohamed Nassime</first><last>Hadjadj</last></author>
       <author><first>Michael</first><last>Filhol</last></author>
       <pages>411–418</pages>
       <abstract>La présente communication s’inscrit dans le cadre du développement d’une grammaire formelle pour la langue des signes française (LSF). Générer automatiquement des énoncés en LSF implique la définition de certaines règles de production pour synchroniser les différents articulateurs du corps, signes, mouvements, etc. Cet article présente dans sa première partie notre méthodologie pour définir des règles de production à partir d’une étude de corpus. Dans la deuxième partie nous présenterons notre étude qui portera sur deux règles de production pour juxtaposer quelques types de structures en LSF. Nous finissons par une discussion sur la nature et l’apport de notre démarche par rapport aux approches existantes.</abstract>
@@ -1457,7 +1457,7 @@
     <paper id="16">
       <title>Étiquetage multilingue en parties du discours avec <fixed-case>ME</fixed-case>lt (Multilingual part-of-speech tagging with <fixed-case>ME</fixed-case>lt)</title>
       <language>fra</language>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <pages>435–442</pages>
       <abstract>Nous présentons des travaux récents réalisés autour de MElt, système discriminant d’étiquetage en parties du discours. MElt met l’accent sur l’exploitation optimale d’informations lexicales externes pour améliorer les performances des étiqueteurs par rapport aux modèles entraînés seulement sur des corpus annotés. Nous avons entraîné MElt sur plus d’une quarantaine de jeux de données couvrant plus d’une trentaine de langues. Comparé au système état-de-l’art MarMoT, MElt obtient en moyenne des résultats légèrement moins bons en l’absence de lexique externe, mais meilleurs lorsque de telles ressources sont disponibles, produisant ainsi des étiqueteurs état-de-l’art pour plusieurs langues.</abstract>
       <url hash="3fa1b564">2016.jeptalnrecital-poster.16</url>
@@ -1469,7 +1469,7 @@
       <author><first>Grégoire</first><last>Jadi</last></author>
       <author><first>Laura</first><last>Monceaux</last></author>
       <author><first>Vincent</first><last>Claveau</last></author>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <pages>443–450</pages>
       <abstract>Dans cet article, nous présentons le développement d’un système d’extraction d’expressions-cibles pour l’anglais et sa transposition au français. En complément, nous avons réalisé une étude de l’efficacité des traits en anglais et en français qui tend à montrer qu’il est possible de réaliser un système d’extraction d’expressions-cibles indépendant du domaine. Pour finir, nous proposons une analyse comparative des erreurs commises par nos systèmes en anglais et français et envisageons différentes solutions à ces problèmes.</abstract>
       <url hash="fdda5422">2016.jeptalnrecital-poster.17</url>
@@ -1480,7 +1480,7 @@
       <language>fra</language>
       <author><first>Joseph</first><last>Lark</last></author>
       <author><first>Emmanuel</first><last>Morin</last></author>
-      <author><first>Sebastián Peña</first><last>Saldarriaga</last></author>
+      <author id="sebastian-pena-saldarriaga"><first>Sebastián Peña</first><last>Saldarriaga</last></author>
       <pages>451–458</pages>
       <abstract>Nous détectons dans des corpus d’avis clients en français des expressions d’opinion ne contenant pas de marqueur d’opinion explicitement positif ou négatif. Nous procédons pour cela en deux étapes en nous appuyant sur des méthodes existantes : nous identifions ces expressions à l’aide de fenêtres de mots puis nous les classifions en polarité. Le processus global présente des résultats satisfaisants pour notre cadre applicatif demandant une haute précision.</abstract>
       <url hash="92a30731">2016.jeptalnrecital-poster.18</url>
@@ -1491,7 +1491,7 @@
       <language>fra</language>
       <author><first>Julien</first><last>Tourille</last></author>
       <author><first>Olivier</first><last>Ferret</last></author>
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <author><first>Xavier</first><last>Tannier</last></author>
       <pages>459–466</pages>
       <abstract>L’analyse temporelle des documents cliniques permet d’obtenir des représentations riches des informations contenues dans les dossiers électroniques patient. Cette analyse repose sur l’extraction d’événements, d’expressions temporelles et des relations entre eux. Dans ce travail, nous considérons que nous disposons des événements et des expressions temporelles pertinents et nous nous intéressons aux relations temporelles entre deux événements ou entre un événement et une expression temporelle. Nous présentons des modèles de classification supervisée pour l’extraction de des relations en français et en anglais. Les performances obtenues sont comparables dans les deux langues, suggérant ainsi que différents domaines cliniques et différentes langues pourraient être abordés de manière similaire.</abstract>
@@ -1503,9 +1503,9 @@
       <language>fra</language>
       <author><first>Wafa</first><last>Neifar</last></author>
       <author><first>Thierry</first><last>Hamon</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
-      <author><first>Mariem</first><last>Ellouze</last></author>
-      <author><first>Lamia Hadrich</first><last>Belguith</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="mariem-ellouze-khemekhem"><first>Mariem</first><last>Ellouze</last></author>
+      <author id="lamia-hadrich-belguith"><first>Lamia Hadrich</first><last>Belguith</last></author>
       <pages>467–474</pages>
       <abstract>Nous présentons, dans cet article, une adaptation à l’arabe standard moderne d’un extracteur de termes pour le français et l’anglais. L’adaptation a d’abord consisté à décrire le processus d’extraction des termes de manière similaire à celui défini pour l’anglais et le français en prenant en compte certains particularités morpho-syntaxiques de la langue arabe. Puis, nous avons considéré le phénomène de l’agglutination de la langue arabe. L’évaluation a été réalisée sur un corpus de textes médicaux. Les résultats montrent que parmi 400 termes candidats maximaux analysés, 288 sont jugés corrects par rapport au domaine (72,1%). Les erreurs d’extraction sont dues à l’étiquetage morpho-syntaxique et à la non-voyellation des textes mais aussi à des phénomènes d’agglutination.</abstract>
       <url hash="7b1db6e5">2016.jeptalnrecital-poster.20</url>
@@ -1536,7 +1536,7 @@
       <title>Investigating gender adaptation for speech translation</title>
       <author><first>Rachel</first><last>Bawden</last></author>
       <author><first>Guillaume</first><last>Wisniewski</last></author>
-      <author><first>Hélène</first><last>Maynard</last></author>
+      <author id="helene-bonneau-maynard"><first>Hélène</first><last>Maynard</last></author>
       <pages>490–497</pages>
       <abstract>In this paper we investigate the impact of the integration of context into dialogue translation. We present a new contextual parallel corpus of television subtitles and show how taking into account speaker gender can significantly improve machine translation quality in terms of B LEU and M ETEOR scores. We perform a manual analysis, which suggests that these improvements are not necessary related to the morphological consequences of speaker gender, but to more general linguistic divergences.</abstract>
       <url hash="a524792e">2016.jeptalnrecital-poster.23</url>
@@ -1557,7 +1557,7 @@
       <title>Mise au point d’une méthode d’annotation morphosyntaxique fine du serbe (Developping a method for detailed morphosyntactic tagging of <fixed-case>S</fixed-case>erbian)</title>
       <language>fra</language>
       <author><first>Aleksandra</first><last>Miletic</last></author>
-      <author><first>Cécile</first><last>Fabre</last></author>
+      <author id="cecile-fabre"><first>Cécile</first><last>Fabre</last></author>
       <author><first>Dejan</first><last>Stosic</last></author>
       <pages>506–513</pages>
       <abstract>Cet article présente une expérience d’annotation morphosyntaxique fine du volet serbe du corpus parallèle ParCoLab (corpus serbe-français-anglais). Elle a consisté à enrichir une annotation existante en parties du discours avec des traits morphosyntaxiques fins, afin de préparer une étape ultérieure de parsing. Nous avons comparé trois approches : 1) annotation manuelle ; 2) préannotation avec un étiqueteur entraîné sur le croate suivie d’une correction manuelle ; 3) réentraînement de l’outil sur un petit échantillon validé du corpus, suivi de l’annotation automatique et de la correction manuelle. Le modèle croate maintient une stabilité globale en passant au serbe, mais les différences entre les deux jeux d’étiquettes exigent des interventions manuelles importantes. Le modèle ré-entraîné sur un échantillon de taille limité (20K tokens) atteint la même exactitude que le modèle existant et le gain de temps observé montre que cette méthode optimise la phase de correction.</abstract>
@@ -1595,7 +1595,7 @@
       <author><first>Victor</first><last>Pineau</last></author>
       <author><first>Constance</first><last>Nin</last></author>
       <author><first>Solen</first><last>Quiniou</last></author>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <pages>531–538</pages>
       <abstract>La segmentation d’un texte en rhèses, unités-membres signifiantes de la phrase, permet de fournir des adaptations de celui-ci pour faciliter la lecture aux personnes dyslexiques. Dans cet article, nous proposons une méthode d’identification automatique des rhèses basée sur un apprentissage supervisé à partir d’un corpus que nous avons annoté. Nous comparons celle-ci à l’identification manuelle ainsi qu’à l’utilisation d’outils et de concepts proches, tels que la segmentation d’un texte en chunks.</abstract>
       <url hash="6bf345ba">2016.jeptalnrecital-poster.28</url>
@@ -1638,7 +1638,7 @@
     <meta>
       <booktitle>Actes de la conférence conjointe JEP-TALN-RECITAL 2016. volume 3 : RECITAL</booktitle>
       <editor><first>Laurence</first><last>Danlos</last></editor>
-      <editor><first>Thierry</first><last>Hamon</last></editor>
+      <editor id="thierry-hamon"><first>Thierry</first><last>Hamon</last></editor>
       <publisher>AFCP - ATALA</publisher>
       <address>Paris, France</address>
       <month>7</month>
@@ -1727,7 +1727,7 @@
     <meta>
       <booktitle>Actes de la conférence conjointe JEP-TALN-RECITAL 2016. Volume 4 : Conférences invitées</booktitle>
       <editor><first>Laurence</first><last>Danlos</last></editor>
-      <editor><first>Thierry</first><last>Hamon</last></editor>
+      <editor id="thierry-hamon"><first>Thierry</first><last>Hamon</last></editor>
       <publisher>AFCP - ATALA</publisher>
       <address>Paris, France</address>
       <month>7</month>
@@ -1748,7 +1748,7 @@
     </paper>
     <paper id="2">
       <title>From Human Language Technology to Human Language Science</title>
-      <author><first>Mark</first><last>Liberman</last></author>
+      <author id="mark-liberman"><first>Mark</first><last>Liberman</last></author>
       <pages>3–3</pages>
       <abstract>Thirty years ago, in order to get past roadblocks in Machine Translation and Automatic Speech Recognition, DARPA invented a new way to organize and manage technological R&amp;D : a “common task” is defined by a formal quantitative evaluation metric and a body of shared training data, and researchers join an open competition to compare approaches. Over the past three decades, this method has produced steadily improving technologies, with many practical applications now possible. And Moore’s law has created a sort of digital shadow universe, which increasingly mirrors the real world in flows and stores of bits, while the same improvements in digital hardware and software make it increasingly easy to pull content out of the these rivers and oceans of information. It’s natural to be excited about these technologies, where we can see an open road to rapid improvements beyond the current state of the art, and an explosion of near-term commercial applications. But there are some important opportunities in a less obvious direction. Several areas of scientific and humanistic research are being revolutionized by the application of Human Language Technology. At a minimum, orders of magnitude more data can be addressed with orders of magnitude less effort - but this change also transforms old theoretical questions, and poses new ones. And eventually, new modes of research organization and funding are likely to emerge..</abstract>
       <url hash="aa6cc87f">2016.jeptalnrecital-invite.2</url>
@@ -1759,7 +1759,7 @@
     <meta>
       <booktitle>Actes de la conférence conjointe JEP-TALN-RECITAL 2016. volume 5 : Démonstrations</booktitle>
       <editor><first>Laurence</first><last>Danlos</last></editor>
-      <editor><first>Thierry</first><last>Hamon</last></editor>
+      <editor id="thierry-hamon"><first>Thierry</first><last>Hamon</last></editor>
       <publisher>AFCP - ATALA</publisher>
       <address>Paris, France</address>
       <month>7</month>
@@ -1795,7 +1795,7 @@
       <language>fra</language>
       <author><first>Paul</first><last>Bui-Quang</last></author>
       <author><first>Brigitte</first><last>Grau</last></author>
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <pages>6–8</pages>
       <abstract>AppFM 1 est un outil à mi-chemin entre un environnement de création de chaînes modulaires de TAL et un gestionnaire de services systèmes. Il permet l’intégration d’applications ayant des dépendances complexes en des chaînes de traitements réutilisables facilement par le biais de multiples interfaces.</abstract>
       <url hash="cd675178">2016.jeptalnrecital-demo.3</url>
@@ -1834,7 +1834,7 @@
     <paper id="7">
       <title>Exploration de collections d’archives multimédia dans le contexte des Humanités Numériques : revisiter <fixed-case>TALN</fixed-case>’2015 ? (Exploring multimedia archives in the context of Digital Humanities: browsing <fixed-case>TALN</fixed-case>’2015?)</title>
       <language>fra</language>
-      <author><first>Géraldine</first><last>Damnati</last></author>
+      <author id="geraldine-damnati"><first>Géraldine</first><last>Damnati</last></author>
       <author><first>Marc</first><last>Denjean</last></author>
       <author><first>Delphine</first><last>Charlet</last></author>
       <pages>18–20</pages>
@@ -1855,7 +1855,7 @@
       <title>Héloïse, une plate-forme pour développer des systèmes de <fixed-case>TA</fixed-case> compatibles Ariane en réseau (Heloise, a platform for collaborative development of Ariane-compatible <fixed-case>MT</fixed-case> systems)</title>
       <language>fra</language>
       <author><first>Vincent</first><last>Berment</last></author>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <author><first>Guillaume</first><last>de Malézieux</last></author>
       <pages>24–26</pages>
       <abstract>Dans cette démo, nous montrons comment utiliser Héloïse pour développer des systèmes de TA.</abstract>
@@ -1914,7 +1914,7 @@
       <language>fra</language>
       <author><first>Luka</first><last>Nerima</last></author>
       <author><first>Violeta</first><last>Seretan</last></author>
-      <author><first>Eric</first><last>Wehrli</last></author>
+      <author id="eric-wehrli"><first>Eric</first><last>Wehrli</last></author>
       <pages>37–39</pages>
       <abstract>Cette démonstration présente la version web d’un outil multilingue d’extraction de collocations. Elle est destinée aux lexicographes, aux traducteurs, aux enseignants et apprenants L2 et, plus généralement, aux linguistes désireux d’analyser et d’exploiter leurs propres corpus.</abstract>
       <url hash="a0fcfcab">2016.jeptalnrecital-demo.14</url>
@@ -1959,7 +1959,7 @@
       <author><first>Guillaume</first><last>Dubuisson Duplessis</last></author>
       <author><first>Vincent</first><last>Letard</last></author>
       <author><first>Anne-Laure</first><last>Ligozat</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <pages>49–51</pages>
       <abstract>Cette démonstration présente un système de dialogue en domaine ouvert qui utilise une base d’exemples de dialogue automatiquement constituée depuis un corpus de sous-titres afin de gérer un dialogue social de type « chatbot ».</abstract>
       <url hash="ba6d43c9">2016.jeptalnrecital-demo.18</url>
@@ -1972,7 +1972,7 @@
       <author><first>Elena</first><last>Manishina</last></author>
       <author><first>Maxence</first><last>Busson</last></author>
       <author><first>Fabrice</first><last>Maurel</last></author>
-      <author><first>Stephane</first><last>Ferrari</last></author>
+      <author id="stephane-ferrari"><first>Stephane</first><last>Ferrari</last></author>
       <pages>52–54</pages>
       <abstract>Dans cette démonstration, nous proposons un système qui permettrait aux utilisateurs non-voyants d’obtenir le first glance d’une page web. L’objectif est de réduire le temps d’accès à la structure logico-thématique de la page et de favoriser le développement de stratégies de lecture de haut niveau. Notre concept, appelé Tag Thunder, s’appuie sur une phase de segmentation de la page en zones, suivie d’une étape de représentation des zones par un mot ou groupe de mots, puis une vocalisation simultanée de ces représentants.</abstract>
       <url hash="9110f9bf">2016.jeptalnrecital-demo.19</url>
diff --git a/data/xml/2016.lilt.xml b/data/xml/2016.lilt.xml
index fba0e49c06..833b2fa670 100644
--- a/data/xml/2016.lilt.xml
+++ b/data/xml/2016.lilt.xml
@@ -19,8 +19,8 @@
     </paper>
     <paper id="2">
       <title>Many speakers, many worlds: Interannotator variations in the quantification of feature norms</title>
-      <author><first>Aurélie</first><last>Herbelot</last></author>
-      <author><first>Eva Maria</first><last>Vecchi</last></author>
+      <author id="aurelie-herbelot"><first>Aurélie</first><last>Herbelot</last></author>
+      <author id="eva-maria-vecchi"><first>Eva Maria</first><last>Vecchi</last></author>
       <abstract>Quantification (see e.g. Peters and Westerst ̊ahl, 2006) is probably one of the most extensively studied phenomena in formal semantics. But because of the specific representation of meaning assumed by modeltheoretic semantics (one where a true model of the world is a priori available), research in the area has primarily focused on one question: what is the relation of a quantifier to the truth value of a sentence? In contrast, relatively little has been said about the way the underlying model comes about, and its relation to individual speakers’ conceptual knowledge. In this paper, we make a first step in investigating how native speakers of English model relations between non-grounded sets, by observing how they quantify simple statements. We first give some motivation for our task, from both a theoretical linguistic and computational semantic point of view (§2). We then describe our annotation setup (§3) and follow on with an analysis of the produced dataset, conducting a quantitative evaluation which includes inter-annotator agreement for different classes of predicates (§4). We observe that there is significant agreement between speakers but also noticeable variations. We posit that in settheoretic terms, there are as many worlds as there are speakers (§5), but the overwhelming use of underspecified quantification in ordinary language covers up the individual differences that might otherwise be observed.</abstract>
       <issue>2</issue>
       <url hash="8f5a0c24">2016.lilt-13.2</url>
@@ -85,7 +85,7 @@
     </paper>
     <paper id="4">
       <title>A linguistically-motivated annotation model of modality in <fixed-case>E</fixed-case>nglish and <fixed-case>S</fixed-case>panish: Insights from <fixed-case>MULTINOT</fixed-case></title>
-      <author><first>Julia</first><last>Lavid</last></author>
+      <author id="julia-lavid-lopez"><first>Julia</first><last>Lavid</last></author>
       <author><first>Marta</first><last>Carretrero</last></author>
       <author><first>Juan Rafael</first><last>Zamorano-Mansilla</last></author>
       <abstract>In this paper we present current work on the design and validation of a linguistically-motivated annotation model of modality in English and Spanish in the context of the MULTINOT project. Our annotation model captures four basic modal meanings and their subtypes, on the one hand, and provides a fine-grained characterisation of the syntactic realisations of those meanings in English and Spanish, on the other. We validate the modal tagset proposed through an agreement study performed on a bilingual sample of four hundred sentences extracted from original texts of the MULTINOT corpus, and discuss the difficult cases encountered in the annotation experiment. We also describe current steps in the implementation of the proposed scheme for the large-scale annotation of the bilingual corpus using both automatic and manual procedures.</abstract>
@@ -121,7 +121,7 @@
       <author><first>Koen</first><last>Hallmann</last></author>
       <author><first>Florian</first><last>Kunneman</last></author>
       <author><first>Christine</first><last>Liebrecht</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <author><first>Margot</first><last>van Mulken</last></author>
       <abstract>Verbal irony, or sarcasm, presents a significant technical and conceptual challenge when it comes to automatic detection. Moreover, it can be a disruptive factor in sentiment analysis and opinion mining, because it changes the polarity of a message implicitly. Extant methods for automatic detection are mostly based on overt clues to ironic intent such as hashtags, also known as irony markers. In this paper, we investigate whether people who know each other make use of irony markers less often than people who do not know each other. We trained a machinelearning classifier to detect sarcasm in Twitter messages (tweets) that were addressed to specific users, and in tweets that were not addressed to a particular user. Human coders analyzed the top-1000 features found to be most discriminative into ten categories of irony markers. The classifier was also tested within and across the two categories. We find that tweets with a user mention contain fewer irony markers than tweets not addressed to a particular user. Classification experiments confirm that the irony in the two types of tweets is signaled differently. The within-category performance of the classifier is about 91% for both categories, while cross-category experiments yield substantially lower generalization performance scores of 75% and 71%. We conclude that irony markers are used more often when there is less mutual knowledge between sender and receiver. Senders addressing other Twitter users less often use irony markers, relying on mutual knowledge which should lead the receiver to infer ironic intent from more implicit clues. With regard to automatic detection, we conclude that our classifier is able to detect ironic tweets addressed at another user as reliably as tweets that are not addressed at at a particular person.</abstract>
       <issue>7</issue>
diff --git a/data/xml/2016.tal.xml b/data/xml/2016.tal.xml
index 6156b92bad..f847372131 100644
--- a/data/xml/2016.tal.xml
+++ b/data/xml/2016.tal.xml
@@ -17,7 +17,7 @@
     </meta>
     <paper id="1">
       <title>Predicting Liaison: an Example-Based Approach</title>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <author><first>Alexander</first><last>Greefhorst</last></author>
       <pages>13–32</pages>
       <url hash="8da9c3f2">2016.tal-1.1</url>
@@ -29,7 +29,7 @@
       <author><first>Quentin</first><last>Pradet</last></author>
       <author><first>Lucie</first><last>Barque</last></author>
       <author><first>Takuya</first><last>Nakamura</last></author>
-      <author><first>Matthieu</first><last>Constant</last></author>
+      <author id="matthieu-constant"><first>Matthieu</first><last>Constant</last></author>
       <pages>33–58</pages>
       <url hash="592cfea2">2016.tal-1.2</url>
       <language>fra</language>
@@ -38,7 +38,7 @@
     <paper id="3">
       <title>Prédiction structurée pour l’analyse syntaxique en constituants par transitions : modèles denses et modèles creux [Structured Prediction for Transition-based Constituent Parsing: Dense and Sparse Models]</title>
       <author><first>Maximin</first><last>Coavoux</last></author>
-      <author><first>Benoît</first><last>Crabbé</last></author>
+      <author id="benoit-crabbe"><first>Benoît</first><last>Crabbé</last></author>
       <pages>59–83</pages>
       <url hash="5b774950">2016.tal-1.3</url>
       <language>fra</language>
@@ -47,7 +47,7 @@
     <paper id="4">
       <title>Exploiting morphology for the automatic extraction of general public paraphrases of medical terms</title>
       <author><first>Natalia</first><last>Grabar</last></author>
-      <author><first>Thierry</first><last>Hamon</last></author>
+      <author id="thierry-hamon"><first>Thierry</first><last>Hamon</last></author>
       <pages>85–109</pages>
       <url hash="8cc54e65">2016.tal-1.4</url>
       <language>fra</language>
@@ -55,7 +55,7 @@
     </paper>
     <paper id="5">
       <title>Apprentissage discriminant de modèles neuronaux pour la traduction automatique [Discriminative training of continuous space translation models]</title>
-      <author><first>Quoc-Khanh</first><last>Do</last></author>
+      <author id="quoc-khanh-do"><first>Quoc-Khanh</first><last>Do</last></author>
       <author><first>Alexandre</first><last>Allauzen</last></author>
       <author><first>François</first><last>Yvon</last></author>
       <pages>111–135</pages>
@@ -80,7 +80,7 @@
     <paper id="1">
       <title>Éthique et traitement automatique des langues et de la parole : entre truismes et tabous [Ethics and natural language and speech processing: between truisms and taboos]</title>
       <author><first>Karën</first><last>Fort</last></author>
-      <author><first>Gilles</first><last>Adda</last></author>
+      <author id="gilles-adda"><first>Gilles</first><last>Adda</last></author>
       <author><first>Kevin</first><last>Bretonnel Cohen</last></author>
       <pages>7–19</pages>
       <url hash="9210b918">2016.tal-2.1</url>
@@ -134,7 +134,7 @@
     </meta>
     <paper id="1">
       <title><fixed-case>NLP</fixed-case> for learning and teaching: challenges and opportunities</title>
-      <author><first>Georges</first><last>Antoniadis</last></author>
+      <author id="georges-antoniadis"><first>Georges</first><last>Antoniadis</last></author>
       <author><first>Piet</first><last>Desmet</last></author>
       <pages>7–13</pages>
       <url hash="f83de4a4">2016.tal-3.1</url>
@@ -181,7 +181,7 @@
       <title><fixed-case>M</fixed-case>y<fixed-case>A</fixed-case>nnotator: A Tool for Technology-Mediated Written Corrective Feedback</title>
       <author><first>Marie-Josée</first><last>Hamel</last></author>
       <author><first>Nikolay</first><last>Slavkov</last></author>
-      <author><first>Diana</first><last>Inkpen</last></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last></author>
       <author><first>Dingwen</first><last>Xiao</last></author>
       <pages>119–142</pages>
       <url hash="0740e2e1">2016.tal-3.6</url>
diff --git a/data/xml/2016.tc.xml b/data/xml/2016.tc.xml
index d4530bbc31..0d062aa306 100644
--- a/data/xml/2016.tc.xml
+++ b/data/xml/2016.tc.xml
@@ -91,7 +91,7 @@
     </paper>
     <paper id="14">
       <title>How to configure statistical machine translation with linked open data resources</title>
-      <author><first>Ankit</first><last>Srivastava</last></author>
+      <author id="ankit-srivastava"><first>Ankit</first><last>Srivastava</last></author>
       <author><first>Felix</first><last>Sasaki</last></author>
       <author><first>Peter Bourgonje. Julian</first><last>Moreno-Schneider</last></author>
       <author><first>Jan</first><last>Nehring</last></author>
diff --git a/data/xml/2017.iwslt.xml b/data/xml/2017.iwslt.xml
index 67fe4a98e2..f122c64e50 100644
--- a/data/xml/2017.iwslt.xml
+++ b/data/xml/2017.iwslt.xml
@@ -22,7 +22,7 @@
       <author><first>Marcello</first><last>Federico</last></author>
       <author><first>Luisa</first><last>Bentivogli</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
-      <author><first>Sebastian</first><last>Stüker</last></author>
+      <author id="sebastian-stuker"><first>Sebastian</first><last>Stüker</last></author>
       <author><first>Katsuhito</first><last>Sudoh</last></author>
       <author><first>Koichiro</first><last>Yoshino</last></author>
       <author><first>Christian</first><last>Federmann</last></author>
@@ -34,7 +34,7 @@
     <paper id="2">
       <title>Going beyond zero-shot <fixed-case>MT</fixed-case>: combining phonological, morphological and semantic factors. The <fixed-case>U</fixed-case>d<fixed-case>S</fixed-case>-<fixed-case>DFKI</fixed-case> System at <fixed-case>IWSLT</fixed-case> 2017</title>
       <author><first>Cristina</first><last>España-Bonet</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>15-22</pages>
       <abstract>This paper describes the UdS-DFKI participation to the multilingual task of the IWSLT Evaluation 2017. Our approach is based on factored multilingual neural translation systems following the small data and zero-shot training conditions. Our systems are designed to fully exploit multilinguality by including factors that increase the number of common elements among languages such as phonetic coarse encodings and synsets, besides shallow part-of-speech tags, stems and lemmas. Document level information is also considered by including the topic of every document. This approach improves a baseline without any additional factor for all the language pairs and even allows beyond-zero-shot translation. That is, the translation from unseen languages is possible thanks to the common elements —especially synsets in our models— among languages.</abstract>
       <url hash="15b6343f">2017.iwslt-1.2</url>
@@ -57,7 +57,7 @@
       <author><first>Parnia</first><last>Bahar</last></author>
       <author><first>Jan</first><last>Rosendahl</last></author>
       <author><first>Nick</first><last>Rossenbach</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>29-34</pages>
       <abstract>This work describes the Neural Machine Translation (NMT) system of the RWTH Aachen University developed for the English$German tracks of the evaluation campaign of the International Workshop on Spoken Language Translation (IWSLT) 2017. We use NMT systems which are augmented by state-of-the-art extensions. Furthermore, we experiment with techniques that include data filtering, a larger vocabulary, two extensions to the attention mechanism and domain adaptation. Using these methods, we can show considerable improvements over the respective baseline systems and our IWSLT 2016 submission.</abstract>
       <url hash="b5a701b8">2017.iwslt-1.4</url>
@@ -68,7 +68,7 @@
       <author><first>Surafel M.</first><last>Lakew</last></author>
       <author><first>Quintino F.</first><last>Lotito</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marcello</first><last>Federico</last></author>
       <pages>35-41</pages>
       <abstract>Neural Machine Translation has been shown to enable inference and cross-lingual knowledge transfer across multiple language directions using a single multilingual model. Focusing on this multilingual translation scenario, this work summarizes FBK’s participation in the IWSLT 2017 shared task. Our submissions rely on two multilingual systems trained on five languages (English, Dutch, German, Italian, and Romanian). The first one is a 20 language direction model, which handles all possible combinations of the five languages. The second multilingual system is trained only on 16 directions, leaving the others as zero-shot translation directions (i.e representing a more complex inference task on language pairs not seen at training time). More specifically, our zero-shot directions are Dutch$German and Italian$Romanian (resulting in four language combinations). Despite the small amount of parallel data used for training these systems, the resulting multilingual models are effective, even in comparison with models trained separately for every language pair (i.e. in more favorable conditions). We compare and show the results of the two multilingual models against a baseline single language pair systems. Particularly, we focus on the four zero-shot directions and show how a multilingual model trained with small data can provide reasonable results. Furthermore, we investigate how pivoting (i.e using a bridge/pivot language for inference in a source!pivot!target translations) using a multilingual model can be an alternative to enable zero-shot translation in a low resource setting.</abstract>
@@ -77,12 +77,12 @@
     </paper>
     <paper id="6">
       <title><fixed-case>KIT</fixed-case>’s Multilingual Neural Machine Translation systems for <fixed-case>IWSLT</fixed-case> 2017</title>
-      <author><first>Ngoc-Quan</first><last>Pham</last></author>
+      <author id="ngoc-quan-pham"><first>Ngoc-Quan</first><last>Pham</last></author>
       <author><first>Matthias</first><last>Sperber</last></author>
       <author><first>Elizabeth</first><last>Salesky</last></author>
       <author><first>Thanh-Le</first><last>Ha</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
-      <author><first>Alexander</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alexander</first><last>Waibel</last></author>
       <pages>42-47</pages>
       <abstract>In this paper, we present KIT’s multilingual neural machine translation (NMT) systems for the IWSLT 2017 evaluation campaign machine translation (MT) and spoken language translation (SLT) tasks. For our MT task submissions, we used our multi-task system, modified from a standard attentional neural machine translation framework, instead of building 20 individual NMT systems. We investigated different architectures as well as different data corpora in training such a multilingual system. We also suggested an effective adaptation scheme for multilingual systems which brings great improvements compared to monolingual systems. For the SLT track, in addition to a monolingual neural translation system used to generate correct punctuations and true cases of the data prior to training our multilingual system, we introduced a noise model in order to make our system more robust. Results show that our novel modifications improved our systems considerably on all tasks.</abstract>
       <url hash="039845c0">2017.iwslt-1.6</url>
@@ -100,7 +100,7 @@
     <paper id="8">
       <title><fixed-case>K</fixed-case>yoto <fixed-case>U</fixed-case>niversity <fixed-case>MT</fixed-case> System Description for <fixed-case>IWSLT</fixed-case> 2017</title>
       <author><first>Raj</first><last>Dabre</last></author>
-      <author><first>Fabien</first><last>Cromieres</last></author>
+      <author id="fabien-cromieres"><first>Fabien</first><last>Cromieres</last></author>
       <author><first>Sadao</first><last>Kurohashi</last></author>
       <abstract>We describe here our Machine Translation (MT) model and the results we obtained for the IWSLT 2017 Multilingual Shared Task. Motivated by Zero Shot NMT [1] we trained a Multilingual Neural Machine Translation by combining all the training data into one single collection by appending the tokens to the source sentences in order to indicate the target language they should be translated to. We observed that even in a low resource situation we were able to get translations whose quality surpass the quality of those obtained by Phrase Based Statistical Machine Translation by several BLEU points. The most surprising result we obtained was in the zero shot setting for Dutch-German and Italian-Romanian where we observed that despite using no parallel corpora between these language pairs, the NMT model was able to translate between these languages and the translations were either as good as or better (in terms of BLEU) than the non zero resource setting. We also verify that the NMT models that use feed forward layers and self attention instead of recurrent layers are extremely fast in terms of training which is useful in a NMT experimental setting.</abstract>
       <pages>55-59</pages>
@@ -113,8 +113,8 @@
       <author><first>Markus</first><last>Müller</last></author>
       <author><first>Matthias</first><last>Sperber</last></author>
       <author><first>Thomas</first><last>Zenkel</last></author>
-      <author><first>Sebastian</first><last>Stüker</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="sebastian-stuker"><first>Sebastian</first><last>Stüker</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>60-64</pages>
       <abstract>This paper describes our German and English Speech-to-Text (STT) systems for the 2017 IWSLT evaluation campaign. The campaign focuses on the transcription of unsegmented lecture talks. Our setup includes systems using both the Janus and Kaldi frameworks. We combined the outputs using both ROVER [1] and confusion network combination (CNC) [2] to achieve a good overall performance. The individual subsystems are built by using different speaker-adaptive feature combination (e.g., lMEL with i-vector or bottleneck speaker vector), acoustic models (GMM or DNN) and speaker adaptation (MLLR or fMLLR). Decoding is performed in two stages, where the GMM and DNN systems are adapted on the combination of the first stage outputs using MLLR, and fMLLR. The combination setup produces a final hypothesis that has a significantly lower WER than any of the individual sub-systems. For the English lecture task, our best combination system has a WER of 8.3% on the tst2015 development set while our other combinations gained 25.7% WER for German lecture tasks.</abstract>
       <url hash="7eecb830">2017.iwslt-1.9</url>
@@ -126,7 +126,7 @@
       <author><first>Nadir</first><last>Durrani</last></author>
       <author><first>Fahim</first><last>Dalvi</last></author>
       <author><first>Yonatan</first><last>Belinkov</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>66-73</pages>
       <abstract>In this paper, we explore alternative ways to train a neural machine translation system in a multi-domain scenario. We investigate data concatenation (with fine tuning), model stacking (multi-level fine tuning), data selection and multi-model ensemble. Our findings show that the best translation quality can be achieved by building an initial system on a concatenation of available out-of-domain data and then fine-tuning it on in-domain data. Model stacking works best when training begins with the furthest out-of-domain data and the model is incrementally fine-tuned with the next furthest domain and so on. Data selection did not give the best results, but can be considered as a decent compromise between training time and translation quality. A weighted ensemble of different individual models performed better than data selection. It is beneficial in a scenario when there is no time for fine-tuning an already trained model.</abstract>
       <url hash="8e19d4d2">2017.iwslt-1.10</url>
@@ -136,7 +136,7 @@
       <title>Domain-independent Punctuation and Segmentation Insertion</title>
       <author><first>Eunah</first><last>Cho</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>74-81</pages>
       <abstract>Punctuation and segmentation is crucial in spoken language translation, as it has a strong impact to translation performance. However, the impact of rare or unknown words in the performance of punctuation and segmentation insertion has not been thoroughly studied. In this work, we simulate various degrees of domain-match in testing scenario and investigate their impact to the punctuation insertion task. We explore three rare word generalizing schemes using part-of-speech (POS) tokens. Experiments show that generalizing rare and unknown words greatly improves the punctuation insertion performance, reaching up to 8.8 points of improvement in F-score when applied to the out-of-domain test scenario. We show that this improvement in punctuation quality has a positive impact on a following machine translation (MT) performance, improving it by 2 BLEU points.</abstract>
       <url hash="6adc9900">2017.iwslt-1.11</url>
@@ -144,7 +144,7 @@
     </paper>
     <paper id="12">
       <title>Synthetic Data for Neural Machine Translation of Spoken-Dialects</title>
-      <author><first>Hany</first><last>Hassan</last></author>
+      <author id="hany-hassan-awadalla"><first>Hany</first><last>Hassan</last></author>
       <author><first>Mostafa</first><last>Elaraby</last></author>
       <author><first>Ahmed Y.</first><last>Tawfik</last></author>
       <pages>82-89</pages>
@@ -156,7 +156,7 @@
       <title>Toward Robust Neural Machine Translation for Noisy Input Sequences</title>
       <author><first>Matthias</first><last>Sperber</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>90-96</pages>
       <abstract>Translating noisy inputs, such as the output of a speech recognizer, is a difficult but important challenge for neural machine translation. One way to increase robustness of neural models is by introducing artificial noise to the training data. In this paper, we experiment with appropriate forms of such noise, exploring a middle ground between general-purpose regularizers and highly task-specific forms of noise induction. We show that with a simple generative noise model, moderate gains can be achieved in translating erroneous speech transcripts, provided that type and amount of noise are properly calibrated. The optimal amount of noise at training time is much smaller than the amount of noise in our test data, indicating limitations due to trainability issues. We note that unlike our baseline model, models trained on noisy data are able to generate outputs of proper length even for noisy inputs, while gradually reducing output length for higher amount of noise, as might also be expected from a human translator. We discuss these findings in details and give suggestions for future work.</abstract>
       <url hash="a49b29e5">2017.iwslt-1.13</url>
@@ -164,7 +164,7 @@
     </paper>
     <paper id="14">
       <title>Monolingual Embeddings for Low Resourced Neural Machine Translation</title>
-      <author><first>Mattia Antonino</first><last>Di Gangi</last></author>
+      <author id="mattia-a-di-gangi"><first>Mattia Antonino</first><last>Di Gangi</last></author>
       <author><first>Marcello</first><last>Federico</last></author>
       <pages>97-104</pages>
       <abstract>Neural machine translation (NMT) is the state of the art for machine translation, and it shows the best performance when there is a considerable amount of data available. When only little data exist for a language pair, the model cannot produce good representations for words, particularly for rare words. One common solution consists in reducing data sparsity by segmenting words into sub-words, in order to allow rare words to have shared representations with other words. Taking a different approach, in this paper we present a method to feed an NMT network with word embeddings trained on monolingual data, which are combined with the task-specific embeddings learned at training time. This method can leverage an embedding matrix with a huge number of words, which can therefore extend the word-level vocabulary. Our experiments on two language pairs show good results for the typical low-resourced data scenario (IWSLT in-domain dataset). Our consistent improvements over the baselines represent a positive proof about the possibility to leverage models pre-trained on monolingual data in NMT.</abstract>
@@ -175,7 +175,7 @@
       <title>Effective Strategies in Zero-Shot Neural Machine Translation</title>
       <author><first>Thanh-Le</first><last>Ha</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
-      <author><first>Alexander</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alexander</first><last>Waibel</last></author>
       <pages>105-112</pages>
       <abstract>In this paper, we proposed two strategies which can be applied to a multilingual neural machine translation system in order to better tackle zero-shot scenarios despite not having any parallel corpus. The experiments show that they are effective in terms of both performance and computing resources, especially in multilingual translation of unbalanced data in real zero-resourced condition when they alleviate the language bias problem.</abstract>
       <url hash="c3a1c766">2017.iwslt-1.15</url>
@@ -185,7 +185,7 @@
       <title>Improving Zero-Shot Translation of Low-Resource Languages</title>
       <author><first>Surafel M.</first><last>Lakew</last></author>
       <author><first>Quintino F.</first><last>Lotito</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <author><first>Marcello</first><last>Federico</last></author>
       <pages>113-119</pages>
diff --git a/data/xml/2017.jeptalnrecital.xml b/data/xml/2017.jeptalnrecital.xml
index 76748ec19a..329f8eb9b2 100644
--- a/data/xml/2017.jeptalnrecital.xml
+++ b/data/xml/2017.jeptalnrecital.xml
@@ -3,7 +3,7 @@
   <volume id="long" ingest-date="2020-07-21" type="proceedings">
     <meta>
       <booktitle>Actes des 24ème Conférence sur le Traitement Automatique des Langues Naturelles. Volume 1 - Articles longs</booktitle>
-      <editor><first>Iris</first><last>Eshkol-Taravella</last></editor>
+      <editor id="iris-eshkol"><first>Iris</first><last>Eshkol-Taravella</last></editor>
       <editor><first>Jean-Yves</first><last>Antoine</last></editor>
       <publisher>ATALA</publisher>
       <address>Orléans, France</address>
@@ -61,7 +61,7 @@
       <language>fra</language>
       <author><first>Kamel</first><last>Bouzidi</last></author>
       <author><first>Zied</first><last>Elloumi</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <author><first>Benjamin</first><last>Lecouteux</last></author>
       <author><first>Mohamed-Faouzi</first><last>Benzeghiba</last></author>
       <pages>63–76</pages>
@@ -73,7 +73,7 @@
       <title>Représentation et analyse automatique des discontinuités syntaxiques dans les corpus arborés en constituants du français (Representation and parsing of syntactic discontinuities in <fixed-case>F</fixed-case>rench constituent treebanks)</title>
       <language>fra</language>
       <author><first>Maximin</first><last>Coavoux</last></author>
-      <author><first>Benoît</first><last>Crabbé</last></author>
+      <author id="benoit-crabbe"><first>Benoît</first><last>Crabbé</last></author>
       <pages>77–92</pages>
       <abstract>Nous présentons de nouvelles instanciations de trois corpus arborés en constituants du français, où certains phénomènes syntaxiques à l’origine de dépendances à longue distance sont représentés directement à l’aide de constituants discontinus. Les arbres obtenus relèvent de formalismes grammaticaux légèrement sensibles au contexte (LCFRS). Nous montrons ensuite qu’il est possible d’analyser automatiquement de telles structures de manière efficace à condition de s’appuyer sur une méthode d’inférence approximative. Pour cela, nous présentons un analyseur syntaxique par transitions, qui réalise également l’analyse morphologique et l’étiquetage fonctionnel des mots de la phrase. Enfin, nos expériences montrent que la rareté des phénomènes concernés dans les données françaises pose des difficultés pour l’apprentissage et l’évaluation des structures discontinues.</abstract>
       <url hash="2a2be6d3">2017.jeptalnrecital-long.6</url>
@@ -90,9 +90,9 @@
     </paper>
     <paper id="8">
       <title>Projection Aléatoire Non-Négative pour le Calcul de Word Embedding / Non-Negative Randomized Word Embedding</title>
-      <author><first>Behrang</first><last>Qasemizadeh</last></author>
+      <author id="behrang-qasemizadeh"><first>Behrang</first><last>Qasemizadeh</last></author>
       <author><first>Laura</first><last>Kallmeyer</last></author>
-      <author><first>Aurelie</first><last>Herbelot</last></author>
+      <author id="aurelie-herbelot"><first>Aurelie</first><last>Herbelot</last></author>
       <pages>109–122</pages>
       <abstract>Non-Negative Randomized Word Embedding We propose a word embedding method which is based on a novel random projection technique. We show that weighting methods such as positive pointwise mutual information (PPMI) can be applied to our models after their construction and at a reduced dimensionality. Hence, the proposed technique can efficiently transfer words onto semantically discriminative spaces while demonstrating high computational performance, besides benefits such as ease of update and a simple mechanism for interoperability. We report the performance of our method on several tasks and show that it yields competitive results compared to neural embedding methods in monolingual corpus-based setups.</abstract>
       <url hash="41643705">2017.jeptalnrecital-long.8</url>
@@ -101,8 +101,8 @@
     <paper id="9">
       <title>Création et validation de signatures sémantiques : application à la mesure de similarité sémantique et à la substitution lexicale (Creating and validating semantic signatures : application for measuring semantic similarity and lexical substitution)</title>
       <language>fra</language>
-      <author><first>Mokhtar-Boumedyen</first><last>Billami</last></author>
-      <author><first>Núria</first><last>Gala</last></author>
+      <author id="mokhtar-b-billami"><first>Mokhtar-Boumedyen</first><last>Billami</last></author>
+      <author id="nuria-gala"><first>Núria</first><last>Gala</last></author>
       <pages>123–138</pages>
       <abstract>L’intégration de la notion de similarité sémantique entre les unités lexicales est essentielle dans différentes applications de Traitement Automatique des Langues (TAL). De ce fait, elle a reçu un intérêt considérable qui a eu comme conséquence le développement d’une vaste gamme d’approches pour en déterminer une mesure. Ainsi, plusieurs types de mesures de similarité existent, elles utilisent différentes représentations obtenues à partir d’informations soit dans des ressources lexicales, soit dans de gros corpus de données ou bien dans les deux. Dans cet article, nous nous intéressons à la création de signatures sémantiques décrivant des représentations vectorielles de mots à partir du réseau lexical JeuxDeMots (JDM). L’évaluation de ces signatures est réalisée sur deux tâches différentes : mesures de similarité sémantique et substitution lexicale. Les résultats obtenus sont très satisfaisants et surpassent, dans certains cas, les performances des systèmes de l’état de l’art.</abstract>
       <url hash="e8afda94">2017.jeptalnrecital-long.9</url>
@@ -133,7 +133,7 @@
     <paper id="12">
       <title>Construction automatique d’une base de données étymologiques à partir du wiktionary (Automatic construction of an etymological database using <fixed-case>W</fixed-case>iktionary)</title>
       <language>fra</language>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <pages>169–181</pages>
       <abstract>Les ressources lexicales électroniques ne contiennent quasiment jamais d’informations étymologiques. De telles informations, convenablement formalisées, permettraient pourtant de développer des outils automatiques au service de la linguistique historique et comparative, ainsi que d’améliorer significativement le traitement automatique de langues anciennes. Nous décrivons ici le processus que nous avons mis en œuvre pour extraire des données étymologiques à partir des notices étymologiques du wiktionary, rédigées en anglais. Nous avons ainsi produit une base multilingue de près d’un million de lexèmes et une base de plus d’un demi-million de relations étymologiques entre lexèmes.</abstract>
       <url hash="19c35d74">2017.jeptalnrecital-long.12</url>
@@ -142,12 +142,12 @@
     <paper id="13">
       <title>Apprendre des représentations jointes de mots et d’entités pour la désambiguïsation d’entités (Combining Word and Entity Embeddings for Entity Linking)</title>
       <language>fra</language>
-      <author><first>José G.</first><last>Moreno</last></author>
+      <author id="jose-g-moreno"><first>José G.</first><last>Moreno</last></author>
       <author><first>Romaric</first><last>Besançon</last></author>
       <author><first>Romain</first><last>Beaumont</last></author>
       <author><first>Eva</first><last>D’Hondt</last></author>
       <author><first>Anne-Laure</first><last>Ligozat</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <author><first>Xavier</first><last>Tannier</last></author>
       <author><first>Brigitte</first><last>Grau</last></author>
       <pages>182–195</pages>
@@ -159,7 +159,7 @@
       <title>Analyse et évolution de la compréhension de termes techniques (Analysis and Evolution of Understanding of Technical Terms)</title>
       <language>fra</language>
       <author><first>Natalia</first><last>Grabar</last></author>
-      <author><first>Thierry</first><last>Hamon</last></author>
+      <author id="thierry-hamon"><first>Thierry</first><last>Hamon</last></author>
       <pages>196–211</pages>
       <abstract>Nous faisons l’hypothèse que les mots techniques inconnus dotés d’une structure interne (mots affixés ou composés) peuvent fournir des indices linguistiques à un locuteur, ce qui peut l’aider à analyser et à comprendre ces mots. Afin de tester notre hypothèse, nous proposons de travailler sur un ensemble de mots techniques provenant du domaine médical. Un grand ensemble de mots techniques est annoté par cinq annotateurs. Nous effectuons deux types d’analyses : l’analyse de l’évolution des mots compréhensibles et incompréhensibles (de manière générale et en fonction de certains suffixes) et l’analyse des clusters avec ces mots créés par apprentissage non-supervisé, sur la base des descripteurs linguistiques et extra-linguistiques. Nos résultats indiquent que, selon la sensibilité linguistique des annotateurs, les mots techniques peuvent devenir décodables et compréhensibles. Quant aux clusters, le contenu de certains reflète la difficulté des mots qui les composent et montre également la progression des annotateurs dans leur compréhension. La ressource construite est disponible pour la recherche : <url>http://natalia.grabar.free.fr/rated-lexicon.html</url>.</abstract>
       <url hash="5eb830b7">2017.jeptalnrecital-long.14</url>
@@ -169,7 +169,7 @@
   <volume id="court" ingest-date="2020-07-21" type="proceedings">
     <meta>
       <booktitle>Actes des 24ème Conférence sur le Traitement Automatique des Langues Naturelles. Volume 2 - Articles courts</booktitle>
-      <editor><first>Iris</first><last>Eshkol-Taravella</last></editor>
+      <editor id="iris-eshkol"><first>Iris</first><last>Eshkol-Taravella</last></editor>
       <editor><first>Jean-Yves</first><last>Antoine</last></editor>
       <publisher>ATALA</publisher>
       <address>Orléans, France</address>
@@ -184,8 +184,8 @@
     <paper id="1">
       <title>Annotation d’expressions polylexicales verbales en français (Annotation of verbal multiword expressions in <fixed-case>F</fixed-case>rench)</title>
       <language>fra</language>
-      <author><first>Marie</first><last>Candito</last></author>
-      <author><first>Mathieu</first><last>Constant</last></author>
+      <author id="marie-candito"><first>Marie</first><last>Candito</last></author>
+      <author id="matthieu-constant"><first>Mathieu</first><last>Constant</last></author>
       <author><first>Carlos</first><last>Ramisch</last></author>
       <author><first>Agata</first><last>Savary</last></author>
       <author><first>Yannick</first><last>Parmentier</last></author>
@@ -240,8 +240,8 @@
       <title>Analyse automatique <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et : une étude sur un corpus français de textes encyclopédiques (<fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et automatic analysis : a study on a <fixed-case>F</fixed-case>rench corpus of encyclopedic texts)</title>
       <language>fra</language>
       <author><first>Gabriel</first><last>Marzinotto</last></author>
-      <author><first>Géraldine</first><last>Damnati</last></author>
-      <author><first>Frédéric</first><last>Béchet</last></author>
+      <author id="geraldine-damnati"><first>Géraldine</first><last>Damnati</last></author>
+      <author id="frederic-bechet"><first>Frédéric</first><last>Béchet</last></author>
       <pages>44–51</pages>
       <abstract>Cet article présente un système d’analyse automatique en cadres sémantiques évalué sur un corpus de textes encyclopédiques d’histoire annotés selon le formalisme FrameNet. L’approche choisie repose sur un modèle intégré d’étiquetage de séquence qui optimise conjointement l’identification des cadres, la segmentation et l’identification des rôles sémantiques associés. Nous cherchons dans cette étude à analyser la complexité de la tâche selon plusieurs dimensions. Une analyse détaillée des performances du système est ainsi proposée, à la fois selon l’angle des paramètres du modèle et de la nature des données.</abstract>
       <url hash="8975cd4d">2017.jeptalnrecital-court.6</url>
@@ -251,7 +251,7 @@
       <title>Détection de coréférences de bout en bout en français (End-to-end coreference resolution for <fixed-case>F</fixed-case>rench)</title>
       <language>fra</language>
       <author><first>Elisabeth</first><last>Godbert</last></author>
-      <author><first>Benoit</first><last>Favre</last></author>
+      <author id="benoit-favre"><first>Benoit</first><last>Favre</last></author>
       <pages>52–59</pages>
       <abstract>Notre objectif est l’élaboration d’un système de détection automatique de relations de coréférence le plus général possible, pour le traitement des anaphores pronominales et les coréférences directes. Nous décrivons dans cet article les différentes étapes de traitement des textes dans le système que nous avons développé : (i) l’annotation en traits lexicaux et syntaxiques par le système Macaon ; (ii) le repérage des mentions par un modèle obtenu par apprentissage sur le corpus ANCOR ; (iii) l’annotation sémantique des mentions à partir de deux ressources : le DEM et le LVF ; (iv) l’annotation en coréférences par un système à base de règles. Le système est évalué sur le corpus ANCOR.</abstract>
       <url hash="d2ea8a54">2017.jeptalnrecital-court.7</url>
@@ -315,7 +315,7 @@
       <author><first>Damien</first><last>Sileo</last></author>
       <author><first>Camille</first><last>Pradel</last></author>
       <author><first>Philippe</first><last>Muller</last></author>
-      <author><first>Tim</first><last>Van de Cruys</last></author>
+      <author id="tim-van-de-cruys"><first>Tim</first><last>Van de Cruys</last></author>
       <pages>102–109</pages>
       <abstract>Plusieurs tâches en traitement du langage naturel impliquent de modifier des phrases en conservant au mieux leur sens, comme la reformulation, la compression, la simplification, chacune avec leurs propres données et modèles. Nous introduisons ici une méthode générale s’adressant à tous ces problèmes, utilisant des données plus simples à obtenir : un ensemble de phrases munies d’indicateurs sur leur style, comme des phrases et le type de sentiment qu’elles expriment. Cette méthode repose sur un modèle d’apprentissage de représentations non supervisé (un auto-encodeur variationnel), puis sur le changement des représentations apprises pour correspondre à un style donné. Le résultat est évalué qualitativement, puis quantitativement sur le jeu de données de compression de phrases Microsoft, avec des résultats encourageants.</abstract>
       <url hash="f2afd341">2017.jeptalnrecital-court.13</url>
@@ -348,7 +348,7 @@
       <title>Simbow : une mesure de similarité sémantique entre textes (Simbow : a semantic similarity metric between texts)</title>
       <language>fra</language>
       <author><first>Delphine</first><last>Charlet</last></author>
-      <author><first>Géraldine</first><last>Damnati</last></author>
+      <author id="geraldine-damnati"><first>Géraldine</first><last>Damnati</last></author>
       <pages>126–133</pages>
       <abstract>Cet article décrit une mesure de similarité sémantique non-supervisée qui repose sur l’introduction d’une matrice de relations entre mots, dans un paradigme de mesure cosinus entre sacs de mots. La métrique obtenue, apparentée à soft-cosinus, tient compte des relations entre mots qui peuvent être d’ordre lexical ou sémantique selon la matrice considérée. La mise en œuvre de cette métrique sur la tâche qui consiste à mesurer des similarités sémantiques entre questions posées sur un forum, a remporté la campagne d’évaluation SemEval2017. Si l’approche soumise à la campagne est une combinaison supervisée de différentes mesures non-supervisées, nous présentons dans cet article en détail les métriques non-supervisées, qui présentent l’avantage de produire de bons résultats sans nécessiter de ressources spécifiques autres que des données non annotées du domaine considéré.</abstract>
       <url hash="60d3c3a5">2017.jeptalnrecital-court.16</url>
@@ -383,7 +383,7 @@
       <title>Parcourir, reconnaître et réfléchir. Combinaison de méthodes légères pour l’extraction de relations sémantiques (Browse, recognize and think)</title>
       <language>fra</language>
       <author><first>Mathieu</first><last>Lafourcade</last></author>
-      <author><first>Nathalie</first><last>Le Brun</last></author>
+      <author id="nathalie-le-brun"><first>Nathalie</first><last>Le Brun</last></author>
       <pages>150–157</pages>
       <abstract>La capture de relations sémantiques entre termes à partir de textes est un moyen privilégié de constituer/alimenter une base de connaissances, ressource indispensable pour l’analyse de textes. Nous proposons et évaluons la combinaison de trois méthodes de production de relations lexicosémantiques.</abstract>
       <url hash="1781ba3c">2017.jeptalnrecital-court.19</url>
@@ -394,7 +394,7 @@
       <language>fra</language>
       <author><first>Alain</first><last>Joubert</last></author>
       <author><first>Mathieu</first><last>Lafourcade</last></author>
-      <author><first>Nathalie</first><last>Le Brun</last></author>
+      <author id="nathalie-le-brun"><first>Nathalie</first><last>Le Brun</last></author>
       <pages>158–164</pages>
       <abstract>La correction des erreurs dans une collection de données est un problème délicat. Elle peut être réalisée manuellement par un expert, ou en utilisant des méthodes de crowdsourcing, ou encore automatiquement au moyen d’algorithmes. Nous présentons ici des méthodes automatiques permettant de détecter les erreurs potentielles « secondaires » induites par les mécanismes automatiques d’inférences de relations, lorsqu’ils s’appuient sur des relations erronées « initiales » détectées manuellement. Des résultats encourageants, mesurés sur le réseau JeuxDeMots, nous invitent à envisager également des stratégies qui permettraient de détecter automatiquement les relations erronées « initiales », ce qui pourrait conduire à une détection automatique de la majorité des erreurs présentes dans le réseau.</abstract>
       <url hash="3cef853a">2017.jeptalnrecital-court.20</url>
@@ -405,7 +405,7 @@
       <language>fra</language>
       <author><first>Karën</first><last>Fort</last></author>
       <author><first>Bruno</first><last>Guillaume</last></author>
-      <author><first>Nicolas</first><last>Lefebvre</last></author>
+      <author id="nicolas-lefebvre"><first>Nicolas</first><last>Lefebvre</last></author>
       <author><first>Laura</first><last>Ramírez</last></author>
       <author><first>Mathilde</first><last>Regnault</last></author>
       <author><first>Mary</first><last>Collins</last></author>
@@ -444,7 +444,7 @@
       <author><first>Matthieu</first><last>Riou</last></author>
       <author><first>Bassam</first><last>Jabaian</last></author>
       <author><first>Stéphane</first><last>Huet</last></author>
-      <author><first>Fabrice</first><last>Lefèvre</last></author>
+      <author id="fabrice-lefevre"><first>Fabrice</first><last>Lefèvre</last></author>
       <pages>192–199</pages>
       <abstract>Récemment, de nouveaux modèles à base de réseaux de neurones récurrents ont été proposés pour traiter la génération en langage naturel dans des systèmes de dialogue (Wen et al., 2016a). Ces modèles demandent une grande quantité de données d’apprentissage ; or la collecte et l’annotation de ces données peuvent être laborieuses. Pour répondre à cette problématique, nous nous intéressons ici à la mise en place d’un protocole d’apprentissage en ligne basé sur un apprentissage par renforcement, permettant d’améliorer l’utilisation d’un modèle initial appris sur un corpus plus restreint généré par patrons. Dans cette étude exploratoire, nous proposons une approche basée sur un algorithme de bandit contre un adversaire, afin d’en étudier l’intérêt et les limites.</abstract>
       <url hash="2bd857d8">2017.jeptalnrecital-court.24</url>
@@ -455,7 +455,7 @@
       <language>fra</language>
       <author><first>Loïc</first><last>Grobol</last></author>
       <author><first>Isabelle</first><last>Tellier</last></author>
-      <author><first>Éric</first><last>de La Clergerie</last></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Éric</first><last>de La Clergerie</last></author>
       <author><first>Marco</first><last>Dinarelli</last></author>
       <author><first>Frédéric</first><last>Landragin</last></author>
       <pages>200–208</pages>
@@ -475,8 +475,8 @@
     <paper id="27">
       <title>Adaptation incrémentale de modèles de traduction neuronaux (Incremental adaptation of neural machine translation models)</title>
       <language>fra</language>
-      <author><first>Christophe</first><last>Servan</last></author>
-      <author><first>Josep</first><last>Crego</last></author>
+      <author id="christophe-servan"><first>Christophe</first><last>Servan</last></author>
+      <author id="josep-m-crego"><first>Josep</first><last>Crego</last></author>
       <author><first>Jean</first><last>Senellart</last></author>
       <pages>218–225</pages>
       <abstract>L’adaptation au domaine est un verrou scientifique en traduction automatique. Il englobe généralement l’adaptation de la terminologie et du style, en particulier pour la post-édition humaine dans le cadre d’une traduction assistée par ordinateur. Avec la traduction automatique neuronale, nous étudions une nouvelle approche d’adaptation au domaine que nous appelons “spécialisation” et qui présente des résultats prometteurs tant dans la vitesse d’apprentissage que dans les scores de traduction. Dans cet article, nous proposons d’explorer cette approche.</abstract>
@@ -486,7 +486,7 @@
     <paper id="28">
       <title>Détection de concepts et granularité de l’annotation (Concept detection and annotation granularity )</title>
       <language>fra</language>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <author><first>Thomas</first><last>Lavergne</last></author>
       <pages>226–233</pages>
       <abstract>Nous nous intéressons ici à une tâche de détection de concepts dans des textes sans exigence particulière de passage par une phase de détection d’entités avec leurs frontières. Il s’agit donc d’une tâche de catégorisation de textes multiétiquette, avec des jeux de données annotés au niveau des textes entiers. Nous faisons l’hypothèse qu’une annotation à un niveau de granularité plus fin, typiquement au niveau de l’énoncé, devrait améliorer la performance d’un détecteur automatique entraîné sur ces données. Nous examinons cette hypothèse dans le cas de textes courts particuliers : des certificats de décès où l’on cherche à reconnaître des diagnostics, avec des jeux de données initialement annotés au niveau du certificat entier. Nous constatons qu’une annotation au niveau de la « ligne » améliore effectivement les résultats, mais aussi que le simple fait d’appliquer au niveau de la ligne un classifieur entraîné au niveau du texte est déjà une source d’amélioration.</abstract>
@@ -498,8 +498,8 @@
       <language>fra</language>
       <author><first>Christopher</first><last>Norman</last></author>
       <author><first>Mariska</first><last>Leeflang</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <pages>234–241</pages>
       <abstract>Les revues systématiques de la littérature dans le domaine biomédical reposent essentiellement sur le travail bibliographique manuel d’experts. Nous évaluons les performances de la classification supervisée pour la découverte automatique d’articles à l’aide de plusieurs définitions des critères d’inclusion. Nous appliquons un modèle de regression logistique sur deux corpus issus de revues systématiques conduites dans le domaine du traitement automatique de la langue et de l’efficacité des médicaments. La classification offre une aire sous la courbe moyenne (AUC) de 0.769 si le classifieur est contruit à partir des jugements experts portés sur les titres et résumés des articles, et de 0.835 si on utilise les jugements portés sur le texte intégral. Ces résultats indiquent l’importance des jugements portés dès le début du processus de sélection pour développer un classifieur efficace pour accélérer l’élaboration des revues systématiques à l’aide d’un algorithme de classification standard.</abstract>
       <url hash="e660e625">2017.jeptalnrecital-court.29</url>
@@ -508,10 +508,10 @@
     <paper id="30">
       <title>Une approche linguistique pour la détection des dialectes arabes (A linguistic approach for the detection of <fixed-case>A</fixed-case>rabic dialects)</title>
       <language>fra</language>
-      <author><first>Houda</first><last>Saâdane</last></author>
+      <author id="houda-saadane"><first>Houda</first><last>Saâdane</last></author>
       <author><first>Damien</first><last>Nouvel</last></author>
       <author><first>Hosni</first><last>Seffih</last></author>
-      <author><first>Christian</first><last>Fluhr</last></author>
+      <author id="christian-fluhr"><first>Christian</first><last>Fluhr</last></author>
       <pages>242–250</pages>
       <abstract>Dans cet article, nous présentons un processus d’identification automatique de l’origine dialectale pour la langue arabe de textes écrits en caractères arabes ou en écriture latine (arabizi). Nous décrivons le processus d’annotation des ressources construites et du système de translittération adopté. Deux approches d’identification de la langue sont comparées : la première est linguistique et exploite des dictionnaires, la seconde est statistique et repose sur des méthodes traditionnelles d’apprentissage automatique (n-grammes). L’évaluation de ces approches montre que la méthode linguistique donne des résultats satisfaisants, sans être dépendante des corpus d’apprentissage.</abstract>
       <url hash="9dfe4d07">2017.jeptalnrecital-court.30</url>
@@ -521,7 +521,7 @@
   <volume id="recital" ingest-date="2020-07-21" type="proceedings">
     <meta>
       <booktitle>Actes des 24ème Conférence sur le Traitement Automatique des Langues Naturelles. 19es REncontres jeunes Chercheurs en Informatique pour le TAL (RECITAL 2017)</booktitle>
-      <editor><first>Iris</first><last>Eshkol-Taravella</last></editor>
+      <editor id="iris-eshkol"><first>Iris</first><last>Eshkol-Taravella</last></editor>
       <editor><first>Jean-Yves</first><last>Antoine</last></editor>
       <publisher>ATALA</publisher>
       <address>Orléans, France</address>
@@ -652,7 +652,7 @@
   <volume id="demo" ingest-date="2020-07-21" type="proceedings">
     <meta>
       <booktitle>Actes des 24ème Conférence sur le Traitement Automatique des Langues Naturelles. Volume 3 - Démonstrations</booktitle>
-      <editor><first>Iris</first><last>Eshkol-Taravella</last></editor>
+      <editor id="iris-eshkol"><first>Iris</first><last>Eshkol-Taravella</last></editor>
       <editor><first>Jean-Yves</first><last>Antoine</last></editor>
       <publisher>ATALA</publisher>
       <address>Orléans, France</address>
@@ -704,9 +704,9 @@
     <paper id="5">
       <title>Apprentissage d’agents conversationnels pour la gestion de relations clients (Training chatbots for customer relation management)</title>
       <language>fra</language>
-      <author><first>Benoit</first><last>Favre</last></author>
-      <author><first>Frederic</first><last>Bechet</last></author>
-      <author><first>Géraldine</first><last>Damnati</last></author>
+      <author id="benoit-favre"><first>Benoit</first><last>Favre</last></author>
+      <author id="frederic-bechet"><first>Frederic</first><last>Bechet</last></author>
+      <author id="geraldine-damnati"><first>Géraldine</first><last>Damnati</last></author>
       <author><first>Delphine</first><last>Charlet</last></author>
       <pages>17–18</pages>
       <abstract>Ce travail démontre la faisabilité d’entraîner des chatbots sur des traces de conversations dans le domaine de la relation client. Des systèmes à base de modèles de langage, de recherche d’information et de traduction sont comparés pour la tâche.</abstract>
@@ -716,14 +716,14 @@
     <paper id="6">
       <title>Conception d’une solution de détection d’événements basée sur <fixed-case>T</fixed-case>witter (Design of a solution for event detection from Tweeter)</title>
       <language>fra</language>
-      <author><first>Christophe</first><last>Servan</last></author>
+      <author id="christophe-servan"><first>Christophe</first><last>Servan</last></author>
       <author><first>Catherine</first><last>Kobus</last></author>
       <author><first>Yongchao</first><last>Deng</last></author>
       <author><first>Cyril</first><last>Touffet</last></author>
       <author><first>Jungi</first><last>Kim</last></author>
       <author><first>Inès</first><last>Kapp</last></author>
-      <author><first>Djamel</first><last>Mostefa</last></author>
-      <author><first>Josep</first><last>Crego</last></author>
+      <author id="djamel-mostefa"><first>Djamel</first><last>Mostefa</last></author>
+      <author id="josep-m-crego"><first>Josep</first><last>Crego</last></author>
       <author><first>Aurélien</first><last>Coquard</last></author>
       <author><first>Jean</first><last>Senellart</last></author>
       <pages>19–20</pages>
@@ -736,7 +736,7 @@
       <language>fra</language>
       <author><first>Gaël</first><last>Guibon</last></author>
       <author><first>Magalie</first><last>Ochs</last></author>
-      <author><first>Patrice</first><last>Bellot</last></author>
+      <author id="patrice-bellot"><first>Patrice</first><last>Bellot</last></author>
       <pages>21–23</pages>
       <abstract>Nous présentons une interface de recommandation d’emojis porteurs de sentiments qui utilise un modèle de prédiction appris sur des messages informels privés. Chacun étant associé à deux scores de polarité prédits. Cette interface permet permet également d’enregistrer les choix de l’utilisateur pour confirmer ou infirmer la recommandation.</abstract>
       <url hash="ddd7dffe">2017.jeptalnrecital-demo.7</url>
@@ -780,8 +780,8 @@
       <author><first>Christopher</first><last>Norman</last></author>
       <author><first>Cyril</first><last>Grouin</last></author>
       <author><first>Thomas</first><last>Lavergne</last></author>
-      <author><first>Aurélie</first><last>Névéol</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <pages>33–34</pages>
       <abstract>Nous proposons des démonstrations de trois outils développés par le LIMSI en traitement automatique des langues appliqué au domaine biomédical : la détection de concepts médicaux dans des textes courts, la catégorisation d’articles scientifiques pour l’assistance à l’écriture de revues systématiques, et l’anonymisation de textes cliniques.</abstract>
       <url hash="dde1ceef">2017.jeptalnrecital-demo.11</url>
diff --git a/data/xml/2017.lilt.xml b/data/xml/2017.lilt.xml
index 219be79c15..1afea2dfae 100644
--- a/data/xml/2017.lilt.xml
+++ b/data/xml/2017.lilt.xml
@@ -10,8 +10,8 @@
     </meta>
     <paper id="1">
       <title>Lexical Factorization and Syntactic Behavior</title>
-      <author><first>James</first><last>Pustejovsky</last></author>
-      <author><first>Aravind</first><last>Joshi</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
       <abstract>In this paper, we examine the correlation between lexical semantics and the syntactic realization of the different components of a word’s meaning in natural language. More specifically, we will explore the effect that lexical factorization in verb semantics has on the suppression or expression of semantic features within the sentence. Factorization was a common analytic tool employed in early generative linguistic approaches to lexical decomposition, and continues to play a role in contemporary semantics, in various guises and modified forms. Building on the unpublished analysis of verbs of seeing in Joshi (1972), we argue here that the significance of lexical factorization is twofold: first, current models of verb meaning owe much of their insight to factor-based theories of meaning; secondly, the factorization properties of a lexical item appear to influence, both directly and indirectly, the possible syntactic expressibility of arguments and adjuncts in sentence composition. We argue that this information can be used to compute what we call the factor expression likelihood (FEL) associated with a verb in a sentence. This is the likelihood that the overt syntactic expression of a factor will cooccur with the verb. This has consequences for the compositional mechanisms responsible for computing the meaning of the sentence, as well as significance in the creation of computational models attempting to capture linguistic behavior over large corpora.</abstract>
       <issue>1</issue>
       <url hash="1bb480bf">2017.lilt-15.1</url>
@@ -19,7 +19,7 @@
     </paper>
     <paper id="2">
       <title>Factorization of Verbs: An Analysis of Verbs of Seeing</title>
-      <author><first>Aravind</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
       <issue>1</issue>
       <url hash="5d79d50a">2017.lilt-15.2</url>
       <bibkey>joshi-2017-factorization</bibkey>
diff --git a/data/xml/2017.mtsummit.xml b/data/xml/2017.mtsummit.xml
index fac736bd1b..d64efca60d 100644
--- a/data/xml/2017.mtsummit.xml
+++ b/data/xml/2017.mtsummit.xml
@@ -19,7 +19,7 @@
       <title>Empirical Study of Dropout Scheme for Neural Machine Translation</title>
       <author><first>Xiaolin</first><last>Wang</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>1-14</pages>
       <url hash="00708fe4">2017.mtsummit-papers.1</url>
       <bibkey>wang-etal-2017-empirical</bibkey>
@@ -28,7 +28,7 @@
       <title>A Target Attention Model for Neural Machine Translation</title>
       <author><first>Hideya</first><last>Mino</last></author>
       <author><first>Andrew</first><last>Finch</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>15-26</pages>
       <url hash="1b1fe0c7">2017.mtsummit-papers.2</url>
       <bibkey>mino-etal-2017-target</bibkey>
@@ -55,11 +55,11 @@
       <title>Translation Quality and Productivity: A Study on Rich Morphology Languages</title>
       <author><first>Lucia</first><last>Specia</last></author>
       <author><first>Kim</first><last>Harris</last></author>
-      <author><first>Frédéric</first><last>Blain</last></author>
+      <author id="frederic-blain"><first>Frédéric</first><last>Blain</last></author>
       <author><first>Aljoscha</first><last>Burchardt</last></author>
       <author><first>Viviven</first><last>Macketanz</last></author>
       <author><first>Inguna</first><last>Skadin</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <pages>55-71</pages>
       <url hash="d3ad3e03">2017.mtsummit-papers.5</url>
@@ -68,7 +68,7 @@
     <paper id="6">
       <title>The <fixed-case>M</fixed-case>icrosoft Speech Language Translation (<fixed-case>MSLT</fixed-case>) Corpus for <fixed-case>C</fixed-case>hinese and <fixed-case>J</fixed-case>apanese: Conversational Test data for Machine Translation and Speech Recognition</title>
       <author><first>Christian</first><last>Federmann</last></author>
-      <author><first>William D.</first><last>Lewis</last></author>
+      <author id="william-lewis"><first>William D.</first><last>Lewis</last></author>
       <pages>72-85</pages>
       <url hash="8cfca0bc">2017.mtsummit-papers.6</url>
       <bibkey>federmann-lewis-2017-microsoft</bibkey>
@@ -76,7 +76,7 @@
     <paper id="7">
       <title>Paying Attention to Multi-Word Expressions in Neural Machine Translation</title>
       <author><first>Matīss</first><last>Rikters</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>86-95</pages>
       <url hash="d6d7b87c">2017.mtsummit-papers.7</url>
       <bibkey>rikters-bojar-2017-paying</bibkey>
@@ -84,7 +84,7 @@
     <paper id="8">
       <title>Enabling Multi-Source Neural Machine Translation By Concatenating Source Sentences In Multiple Languages</title>
       <author><first>Raj</first><last>Dabre</last></author>
-      <author><first>Fabien</first><last>Cromieres</last></author>
+      <author id="fabien-cromieres"><first>Fabien</first><last>Cromieres</last></author>
       <author><first>Sadao</first><last>Kurohashi</last></author>
       <pages>96-107</pages>
       <url hash="a4eaedd4">2017.mtsummit-papers.8</url>
@@ -109,7 +109,7 @@
       <author><first>Panayota</first><last>Georgakopoulou</last></author>
       <author><first>Pintu</first><last>Lohar</last></author>
       <author><first>Andy</first><last>Way</last></author>
-      <author><first>Antonio Valerio</first><last>Miceli-Barone</last></author>
+      <author id="antonio-valerio-miceli-barone"><first>Antonio Valerio</first><last>Miceli-Barone</last></author>
       <author><first>Maria</first><last>Gialama</last></author>
       <pages>116-131</pages>
       <url hash="c3472fb3">2017.mtsummit-papers.10</url>
@@ -117,8 +117,8 @@
     </paper>
     <paper id="11">
       <title>One-parameter models for sentence-level post-editing effort estimation</title>
-      <author><first>Mikel L.</first><last>Forcada</last></author>
-      <author><first>Miquel</first><last>Esplà-Gomis</last></author>
+      <author id="mikel-l-forcada"><first>Mikel L.</first><last>Forcada</last></author>
+      <author id="miquel-espla-gomis"><first>Miquel</first><last>Esplà-Gomis</last></author>
       <author><first>Felipe</first><last>Sánchez-Martínez</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>132-143</pages>
@@ -127,7 +127,7 @@
     </paper>
     <paper id="12">
       <title>A Minimal Cognitive Model for Translating and Post-editing</title>
-      <author><first>Moritz</first><last>Schaeffer</last></author>
+      <author id="moritz-schaeffer"><first>Moritz</first><last>Schaeffer</last></author>
       <author><first>Michael</first><last>Carl</last></author>
       <pages>144-155</pages>
       <url hash="a803d275">2017.mtsummit-papers.12</url>
@@ -163,7 +163,7 @@
     <paper id="16">
       <title>Elastic-substitution decoding for Hierarchical <fixed-case>SMT</fixed-case>: efficiency, richer search and double labels</title>
       <author><first>Gideon</first><last>Maillette de Buy Wenniger</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <pages>201-215</pages>
       <url hash="2f4a2a39">2017.mtsummit-papers.16</url>
@@ -172,8 +172,8 @@
     <paper id="17">
       <title>Development of a classifiers/quantifiers dictionary towards <fixed-case>F</fixed-case>rench-<fixed-case>J</fixed-case>apanese <fixed-case>MT</fixed-case></title>
       <author><first>Mutsuko</first><last>Tomokiyo</last></author>
-      <author><first>Mathieu</first><last>Mangeot</last></author>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="mathieu-mangeot"><first>Mathieu</first><last>Mangeot</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <pages>216-226</pages>
       <url hash="952acad3">2017.mtsummit-papers.17</url>
       <bibkey>tomokiyo-etal-2017-development</bibkey>
@@ -192,7 +192,7 @@
     <paper id="19">
       <title>Usefulness of <fixed-case>MT</fixed-case> output for comprehension — an analysis from the point of view of linguistic intercomprehension</title>
       <author><first>Kenneth</first><last>Jordan Núñez</last></author>
-      <author><first>Mikel L.</first><last>Forcada</last></author>
+      <author id="mikel-l-forcada"><first>Mikel L.</first><last>Forcada</last></author>
       <author><first>Esteve</first><last>Clua</last></author>
       <pages>241-253</pages>
       <url hash="fa09c597">2017.mtsummit-papers.19</url>
@@ -200,7 +200,7 @@
     </paper>
     <paper id="20">
       <title>Machine Translation as an Academic Writing Aid for Medical Practitioners</title>
-      <author><first>Carla</first><last>Parra Escartín</last></author>
+      <author id="carla-parra-escartin"><first>Carla</first><last>Parra Escartín</last></author>
       <author><first>Sharon</first><last>O’Brien</last></author>
       <author><first>Marie-Josée</first><last>Goulet</last></author>
       <author><first>Michel</first><last>Simard</last></author>
@@ -211,16 +211,16 @@
     <paper id="21">
       <title>A Multilingual Parallel Corpus for Improving Machine Translation on <fixed-case>S</fixed-case>outheast <fixed-case>A</fixed-case>sian Languages</title>
       <author><first>Hai-Long</first><last>Trieu</last></author>
-      <author><first>Le-Minh</first><last>Nguyen</last></author>
+      <author id="minh-le-nguyen"><first>Le-Minh</first><last>Nguyen</last></author>
       <pages>268-281</pages>
       <url hash="75848541">2017.mtsummit-papers.21</url>
       <bibkey>trieu-nguyen-2017-multilingual</bibkey>
     </paper>
     <paper id="22">
       <title>Exploring Hypotheses Spaces in Neural Machine Translation</title>
-      <author><first>Frédéric</first><last>Blain</last></author>
+      <author id="frederic-blain"><first>Frédéric</first><last>Blain</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
-      <author><first>Pranava</first><last>Madhyastha</last></author>
+      <author id="pranava-swaroop-madhyastha"><first>Pranava</first><last>Madhyastha</last></author>
       <pages>282-298</pages>
       <url hash="1ad31cb7">2017.mtsummit-papers.22</url>
       <bibkey>blain-etal-2017-exploring</bibkey>
@@ -237,7 +237,7 @@
       <title>Disentangling <fixed-case>ASR</fixed-case> and <fixed-case>MT</fixed-case> Errors in Speech Translation</title>
       <author><first>Ngoc-Tien</first><last>Le</last></author>
       <author><first>Benjamin</first><last>Lecouteux</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <pages>312-323</pages>
       <url hash="8ae7b0f0">2017.mtsummit-papers.24</url>
       <bibkey>le-etal-2017-disentangling</bibkey>
@@ -249,7 +249,7 @@
       <author><first>Mohammad</first><last>Hasanuzzaman</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
       <author><first>Andy</first><last>Way</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>324-336</pages>
       <url hash="b2a67947">2017.mtsummit-papers.25</url>
       <bibkey>kamila-etal-2017-temporality</bibkey>
@@ -329,8 +329,8 @@
       <author><first>Sharon</first><last>O’Brien</last></author>
       <author><first>Chao-Hong</first><last>Liu</last></author>
       <author><first>Andy</first><last>Way</last></author>
-      <author><first>João</first><last>Graça</last></author>
-      <author><first>André</first><last>Martins</last></author>
+      <author id="joao-graca"><first>João</first><last>Graça</last></author>
+      <author id="andre-f-t-martins"><first>André</first><last>Martins</last></author>
       <author><first>Helena</first><last>Moniz</last></author>
       <author><first>Ellie</first><last>Kemp</last></author>
       <author><first>Rebecca</first><last>Petras</last></author>
@@ -340,7 +340,7 @@
     </paper>
     <paper id="7">
       <title>A Case Study of Machine Translation in Financial Sentiment Analysis</title>
-      <author id="chong-zhang"><first>Chong</first><last>Zhang</last></author>
+      <author><first>Chong</first><last>Zhang</last></author>
       <author><first>Matteo</first><last>Capelletti</last></author>
       <author><first>Alexandros</first><last>Poulis</last></author>
       <author><first>Thorben</first><last>Stemann</last></author>
diff --git a/data/xml/2017.tal.xml b/data/xml/2017.tal.xml
index 0f41bfddf7..c7144c5d97 100644
--- a/data/xml/2017.tal.xml
+++ b/data/xml/2017.tal.xml
@@ -40,10 +40,10 @@
       <title>Noise or music? Investigating the usefulness of normalisation for robust sentiment analysis on social media data</title>
       <author><first>Cynthia</first><last>Van Hee</last></author>
       <author><first>Marjan</first><last>Van de Kauter</last></author>
-      <author><first>Orphée</first><last>De Clercq</last></author>
+      <author id="orphee-de-clercq"><first>Orphée</first><last>De Clercq</last></author>
       <author><first>Els</first><last>Lefever</last></author>
       <author><first>Bart</first><last>Desmet</last></author>
-      <author><first>Véronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Véronique</first><last>Hoste</last></author>
       <pages>63–87</pages>
       <url hash="0fdf600c">2017.tal-1.3</url>
       <bibkey>van-hee-etal-2017-noise</bibkey>
@@ -52,7 +52,7 @@
   <volume id="2" ingest-date="2023-03-16" type="journal">
     <meta>
       <booktitle>Traitement Automatique des Langues, Volume 58, Numéro 2 : Traitement automatique de la langue juridique [Legal Natural Language Processing]</booktitle>
-      <editor><first>Adeline</first><last>Nazarenko</last></editor>
+      <editor id="adeline-nazarenko"><first>Adeline</first><last>Nazarenko</last></editor>
       <editor><first>Adam</first><last>Wyner</last></editor>
       <publisher>ATALA (Association pour le Traitement Automatique des Langues)</publisher>
       <address>France</address>
@@ -76,9 +76,9 @@
     <paper id="2">
       <title>Sentence Boundary Detection in Adjudicatory Decisions in the <fixed-case>U</fixed-case>nited <fixed-case>S</fixed-case>tates</title>
       <author><first>Jaromir</first><last>Savelka</last></author>
-      <author><first>Vern R.</first><last>Walker</last></author>
+      <author id="vern-walker"><first>Vern R.</first><last>Walker</last></author>
       <author><first>Matthias</first><last>Grabmair</last></author>
-      <author><first>Kevin D.</first><last>Ashley</last></author>
+      <author id="kevin-d-ashley"><first>Kevin D.</first><last>Ashley</last></author>
       <pages>21–45</pages>
       <url hash="7b87ed52">2017.tal-2.2</url>
       <bibkey>savelka-etal-2017-sentence</bibkey>
@@ -96,7 +96,7 @@
   <volume id="3" ingest-date="2023-03-16" type="journal">
     <meta>
       <booktitle>Traitement Automatique des Langues, Volume 58, Numéro 3 : Traitement automatique de l'arabe et des langues apparentées [NLP for Arabic and Related Languages]</booktitle>
-      <editor><first>Mona</first><last>Diab</last></editor>
+      <editor id="mona-diab"><first>Mona</first><last>Diab</last></editor>
       <editor><first>Nizar</first><last>Habash</last></editor>
       <editor><first>Imed</first><last>Zitouni</last></editor>
       <publisher>ATALA (Association pour le Traitement Automatique des Langues)</publisher>
@@ -113,7 +113,7 @@
     <paper id="1">
       <title>Préambule [Preamble]</title>
       <author><first>Emmanuel</first><last>Morin</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <author><first>Pascale</first><last>Sébillot</last></author>
       <pages>8–8</pages>
       <url hash="44bf46fc">2017.tal-3.1</url>
@@ -133,7 +133,7 @@
       <title>Modern Trends in <fixed-case>A</fixed-case>rabic Sentiment Analysis: A Survey</title>
       <author><first>Hala</first><last>Mulki</last></author>
       <author><first>Hatem</first><last>Haddad</last></author>
-      <author><first>Ismail</first><last>Babaoğlu</last></author>
+      <author id="ismail-babaoglu"><first>Ismail</first><last>Babaoğlu</last></author>
       <pages>15–39</pages>
       <url hash="e9c70979">2017.tal-3.3</url>
       <bibkey>mulki-etal-2017-modern</bibkey>
@@ -142,7 +142,7 @@
       <title>Une approche fondée sur les lexiques d’analyse de sentiments du dialecte algérien [A lexicon-based approach for sentiment analysis in the <fixed-case>A</fixed-case>lgerian dialect]</title>
       <author><first>Imane</first><last>Guellil</last></author>
       <author><first>Faical</first><last>Azouaou</last></author>
-      <author><first>Houda</first><last>Saâdane</last></author>
+      <author id="houda-saadane"><first>Houda</first><last>Saâdane</last></author>
       <author><first>Nasredine</first><last>Semmar</last></author>
       <pages>41–65</pages>
       <url hash="aab199de">2017.tal-3.4</url>
diff --git a/data/xml/2018.clib.xml b/data/xml/2018.clib.xml
index d7eeaf62ea..da56265ab4 100644
--- a/data/xml/2018.clib.xml
+++ b/data/xml/2018.clib.xml
@@ -17,7 +17,7 @@
     </frontmatter>
     <paper id="1">
       <title>With a little help from <fixed-case>NLP</fixed-case>: My Language Technology applications with impact on society</title>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <pages>1–4</pages>
       <abstract>The keynote speech presents the speaker’s vision that research should lead to the development of applications which benefit society. To support this, the speaker will present three original methodologies proposed by him which underpin applications jointly implemented with colleagues from across his research group. These Language Technology tools already have a substantial societal impact in the following areas: learning and assessment, translation and care for people with language disabilities.</abstract>
       <url hash="a7f2b993">2018.clib-1.1</url>
@@ -44,7 +44,7 @@
       <title>Abstractive Text Summarization with Application to <fixed-case>B</fixed-case>ulgarian News Articles</title>
       <author><first>Nikola</first><last>Taushanov</last></author>
       <author><first>Ivan</first><last>Koychev</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>15–22</pages>
       <abstract>With the development of the Internet, a huge amount of information is available every day. Therefore, text summarization has become critical part of our first access to the information. There are two major approaches for automatic text summarization: abstractive and extractive. In this work, we apply abstractive summarization algorithms on a corpus of Bulgarian news articles. In particular, we compare selected algorithms of both techniques and we show results which provide evidence that the selected state-of-the-art algorithms for abstractive text summarization perform better than the extractive ones for articles in Bulgarian. For the purpose of our experiments we collected a new dataset consisting of around 70,000 news articles and their topics. For research purposes we are also sharing the tools to easily collect and process such datasets.</abstract>
       <url hash="54860c95">2018.clib-1.4</url>
@@ -69,7 +69,7 @@
     <paper id="7">
       <title>Knowledge and Rule-Based Diacritic Restoration in <fixed-case>S</fixed-case>erbian</title>
       <author><first>Cvetana</first><last>Krstev</last></author>
-      <author><first>Ranka</first><last>Stanković</last></author>
+      <author id="ranka-stankovic"><first>Ranka</first><last>Stanković</last></author>
       <author><first>Duško</first><last>Vitas</last></author>
       <pages>41–51</pages>
       <abstract>In this paper we present a procedure for the restoration of diacritics in Serbian texts written using the degraded Latin alphabet. The procedure relies on the comprehensive lexical resources for Serbian: the morphological electronic dictionaries, the Corpus of Contemporary Serbian and local grammars. Dictionaries are used to identify possible candidates for the restoration, while the data obtained from SrpKor and local grammars assists in making a decision between several candidates in cases of ambiguity. The evaluation results reveal that, depending on the text, accuracy ranges from 95.03% to 99.36%, while the precision (average 98.93%) is always higher than the recall (average 94.94%).</abstract>
@@ -120,8 +120,8 @@
     </paper>
     <paper id="13">
       <title>Ontologies for Natural Language Processing: Case of <fixed-case>R</fixed-case>ussian</title>
-      <author><first>Natalia</first><last>Loukachevitch</last></author>
-      <author><first>Boris</first><last>Dobrov</last></author>
+      <author id="natalia-loukachevitch"><first>Natalia</first><last>Loukachevitch</last></author>
+      <author id="boris-v-dobrov"><first>Boris</first><last>Dobrov</last></author>
       <pages>93–103</pages>
       <abstract>The paper describes the RuThes family of Russian thesauri intended for natural language processing and information retrieval applications. RuThes-like thesauri include, besides RuThes, Sociopolitical thesaurus, Security Thesaurus, and Ontology on Natural Sciences and Technologies. The RuThes format is based on three approaches for developing computer resources: Princeton WordNet, information-retrieval thesauri, and formal ontologies. The published version of RuThes thesaurus (RuThes-lite 2.0) became a basis for semi-automatic generation of RuWordNet, WordNet-like thesaurus for Russian. Currently researchers can use both RuThes-lite or RuWordNet and compare them in applications. Other RuThes-like resources are being prepared to publication.</abstract>
       <url hash="752ecb07">2018.clib-1.13</url>
@@ -129,9 +129,9 @@
     </paper>
     <paper id="14">
       <title>Resource-based <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Augmentation and Enrichment</title>
-      <author><first>Ranka</first><last>Stanković</last></author>
+      <author id="ranka-stankovic"><first>Ranka</first><last>Stanković</last></author>
       <author><first>Miljana</first><last>Mladenović</last></author>
-      <author><first>Ivan</first><last>Obradović</last></author>
+      <author id="ivan-obradovic"><first>Ivan</first><last>Obradović</last></author>
       <author><first>Marko</first><last>Vitas</last></author>
       <author><first>Cvetana</first><last>Krstev</last></author>
       <pages>104–114</pages>
@@ -152,7 +152,7 @@
     <paper id="16">
       <title>A Pilot Study for Enriching the <fixed-case>R</fixed-case>omanian <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et with Medical Terms</title>
       <author><first>Maria</first><last>Mitrofan</last></author>
-      <author><first>Verginica</first><last>Barbu Mititelu</last></author>
+      <author id="verginica-barbu-mititelu"><first>Verginica</first><last>Barbu Mititelu</last></author>
       <author><first>Grigorina</first><last>Mitrofan</last></author>
       <pages>126–134</pages>
       <abstract>This paper presents the preliminary investigations in the process of integrating a specialized vocabulary, namely medical terminology, into the Romanian wordnet. We focus here on four classes from this vocabulary: anatomy (or body parts), disorders, medical procedures and chemicals. In this pilot study we selected two large concepts from each class and created the Romanian terminological (sub)trees for each of them, starting from a medical thesaurus (SNOMED CT) and translating the terms, process which raised various challenges, all of them asking for the expertise of a specialist in the health care domain. The integration of these (sub)trees in the Romanian wordnet also required careful decision making, given the structural differences between a wordnet and a terminological thesaurus. They are presented and discussed herein.</abstract>
diff --git a/data/xml/2018.eamt.xml b/data/xml/2018.eamt.xml
index ab25132105..baac2f4752 100644
--- a/data/xml/2018.eamt.xml
+++ b/data/xml/2018.eamt.xml
@@ -8,12 +8,12 @@
       <year>2018</year>
       <editor><first>Juan Antonio</first><last>Pérez-Ortiz</last></editor>
       <editor><first>Felipe</first><last>Sánchez-Martínez</last></editor>
-      <editor><first>Miquel</first><last>Esplà-Gomis</last></editor>
-      <editor><first>Maja</first><last>Popović</last></editor>
+      <editor id="miquel-espla-gomis"><first>Miquel</first><last>Esplà-Gomis</last></editor>
+      <editor id="maja-popovic"><first>Maja</first><last>Popović</last></editor>
       <editor><first>Celia</first><last>Rico</last></editor>
-      <editor><first>André</first><last>Martins</last></editor>
+      <editor id="andre-f-t-martins"><first>André</first><last>Martins</last></editor>
       <editor><first>Joachim</first><last>Van den Bogaert</last></editor>
-      <editor><first>Mikel L.</first><last>Forcada</last></editor>
+      <editor id="mikel-l-forcada"><first>Mikel L.</first><last>Forcada</last></editor>
       <url hash="4d0559c5">2018.eamt-main</url>
       <venue>eamt</venue>
     </meta>
@@ -26,7 +26,7 @@
       <title>Contextual Handling in Neural Machine Translation: Look behind, ahead and on both sides</title>
       <author><first>Ruchit</first><last>Agrawal</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <pages>31-40</pages>
       <url hash="f15b5650">2018.eamt-main.1</url>
       <abstract>A salient feature of Neural Machine Translation (NMT) is the end-to-end nature of training employed, eschewing the need of separate components to model different linguistic phenomena. Rather, an NMT model learns to translate individual sentences from the labeled data itself. However, traditional NMT methods trained on large parallel corpora with a one-to-one sentence mapping make an implicit assumption of sentence independence. This makes it challenging for current NMT systems to model inter-sentential discourse phenomena. While recent research in this direction mainly leverages a single previous source sentence to model discourse, this paper proposes the incorporation of a context window spanning previous as well as next sentences as source-side context and previously generated output as target-side context, using an effective non-recurrent architecture based on self-attention. Experiments show improvement over non-contextual models as well as contextual methods using only previous context.</abstract>
@@ -44,7 +44,7 @@
     <paper id="3">
       <title>Compositional Source Word Representations for Neural Machine Translation</title>
       <author><first>Duygu</first><last>Ataman</last></author>
-      <author><first>Mattia Antonino</first><last>Di Gangi</last></author>
+      <author id="mattia-a-di-gangi"><first>Mattia Antonino</first><last>Di Gangi</last></author>
       <author><first>Marcello</first><last>Federico</last></author>
       <pages>51-60</pages>
       <url hash="87e3a8e3">2018.eamt-main.3</url>
@@ -64,8 +64,8 @@
       <author><first>Sevilay</first><last>Bayatli</last></author>
       <author><first>Sefer</first><last>Kurnaz</last></author>
       <author><first>Ilnar</first><last>Salimzyanov</last></author>
-      <author><first>Jonathan</first><last>Washington</last></author>
-      <author><first>Francis M.</first><last>Tyers</last></author>
+      <author id="jonathan-washington"><first>Jonathan</first><last>Washington</last></author>
+      <author id="francis-tyers"><first>Francis M.</first><last>Tyers</last></author>
       <pages>69-79</pages>
       <url hash="79ae615c">2018.eamt-main.5</url>
       <abstract>This paper presents a shallow-transfer machine translation (MT) system for translating from Kazakh to Turkish. Background on the differences between the languages is presented, followed by how the system was designed to handle some of these differences. The system is based on the Apertium free/open-source machine translation platform. The structure of the system and how it works is described, along with an evaluation against two competing systems. Linguistic components were developed, including a Kazakh-Turkish bilingual dictionary, Constraint Grammar disambiguation rules, lexical selection rules, and structural transfer rules. With many known issues yet to be addressed, our RBMT system has reached performance comparable to publicly-available corpus-based MT systems between the languages.</abstract>
@@ -102,8 +102,8 @@
     <paper id="9">
       <title>Are Automatic Metrics Robust and Reliable in Specific Machine Translation Tasks?</title>
       <author><first>Mara</first><last>Chinea-Rios</last></author>
-      <author><first>Alvaro</first><last>Peris</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="alvaro-peris"><first>Alvaro</first><last>Peris</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <pages>109-118</pages>
       <url hash="09bb9a5b">2018.eamt-main.9</url>
       <abstract>We present a comparison of automatic metrics against human evaluations of translation quality in several scenarios which were unexplored up to now. Our experimentation was conducted on translation hypotheses that were problematic for the automatic metrics, as the results greatly diverged from one metric to another. We also compared three different translation technologies. Our evaluation shows that in most cases, the metrics capture the human criteria. However, we face failures of the automatic metrics when applied to some domains and systems. Interestingly, we find that automatic metrics applied to the neural machine translation hypotheses provide the most reliable results. Finally, we provide some advice when dealing with these problematic domains.</abstract>
@@ -112,8 +112,8 @@
     <paper id="10">
       <title>Creating the best development corpus for Statistical Machine Translation systems</title>
       <author><first>Mara</first><last>Chinea-Rios</last></author>
-      <author><first>Germán</first><last>Sanchis-Trilles</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="german-sanchis-trilles"><first>Germán</first><last>Sanchis-Trilles</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <pages>119-128</pages>
       <url hash="e2aebeb9">2018.eamt-main.10</url>
       <abstract>We propose and study three different novel approaches for tackling the problem of development set selection in Statistical Machine Translation. We focus on a scenario where a machine translation system is leveraged for translating a specific test set, without further data from the domain at hand. Such test set stems from a real application of machine translation, where the texts of a specific e-commerce were to be translated. For developing our development-set selection techniques, we first conducted experiments in a controlled scenario, where labelled data from different domains was available, and evaluated the techniques both with classification and translation quality metrics. Then, the bestperforming techniques were evaluated on the e-commerce data at hand, yielding consistent improvements across two language directions.</abstract>
@@ -121,8 +121,8 @@
     </paper>
     <paper id="11">
       <title>Training Deployable General Domain <fixed-case>MT</fixed-case> for a Low Resource Language Pair: <fixed-case>E</fixed-case>nglish-<fixed-case>B</fixed-case>angla</title>
-      <author><first>Sandipan</first><last>Dandapat</last></author>
-      <author><first>William</first><last>Lewis</last></author>
+      <author id="sandipan-dandapat"><first>Sandipan</first><last>Dandapat</last></author>
+      <author id="william-lewis"><first>William</first><last>Lewis</last></author>
       <pages>129-138</pages>
       <url hash="d5c84b03">2018.eamt-main.11</url>
       <abstract>A large percentage of the world’s population speaks a language of the Indian subcontinent, what we will call here Indic languages, comprising languages from both Indo-European (e.g., Hindi, Bangla, Gujarati, etc.) and Dravidian (e.g., Tamil, Telugu, Malayalam, etc.) families, upwards of 1.5 Billion people. A universal characteristic of Indic languages is their complex morphology, which, when combined with the general lack of sufficient quantities of high quality parallel data, can make developing machine translation (MT) for these languages difficult. In this paper, we describe our efforts towards developing general domain English–Bangla MT systems which are deployable to the Web. We initially developed and deployed SMT-based systems, but over time migrated to NMT-based systems. Our initial SMT-based systems had reasonably good BLEU scores, however, using NMT systems, we have gained significant improvement over SMT baselines. This is achieved using a number of ideas to boost the data store and counter data sparsity: crowd translation of intelligently selected monolingual data (throughput enhanced by an IME (Input Method Editor) designed specifically for QWERTY keyboard entry for Devanagari scripted languages), back-translation, different regularization techniques, dataset augmentation and early stopping.</abstract>
@@ -130,7 +130,7 @@
     </paper>
     <paper id="12">
       <title>Deep Neural Machine Translation with Weakly-Recurrent Units</title>
-      <author><first>Mattia A.</first><last>Di Gangi</last></author>
+      <author id="mattia-a-di-gangi"><first>Mattia A.</first><last>Di Gangi</last></author>
       <author><first>Marcello</first><last>Federico</last></author>
       <pages>139-148</pages>
       <url hash="4d8a6cd7">2018.eamt-main.12</url>
@@ -140,7 +140,7 @@
     <paper id="13">
       <title>Spelling Normalization of Historical Documents by Using a Machine Translation Approach</title>
       <author><first>Miguel</first><last>Domingo</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <pages>149-158</pages>
       <url hash="ca688a5b">2018.eamt-main.13</url>
       <abstract>The lack of a spelling convention in historical documents makes their orthography to change depending on the author and the time period in which each document was written. This represents a problem for the preservation of the cultural heritage, which strives to create a digital text version of a historical document. With the aim of solving this problem, we propose three approaches—based on statistical, neural and character-based machine translation—to adapt the document’s spelling to modern standards. We tested these approaches in different scenarios, obtaining very encouraging results.</abstract>
@@ -149,14 +149,14 @@
     <paper id="14">
       <title>Neural Machine Translation of <fixed-case>B</fixed-case>asque</title>
       <author><first>Thierry</first><last>Etchegoyhen</last></author>
-      <author><first>Eva</first><last>Martínez Garcia</last></author>
+      <author id="eva-martinez-garcia"><first>Eva</first><last>Martínez Garcia</last></author>
       <author><first>Andoni</first><last>Azpeitia</last></author>
-      <author><first>Gorka</first><last>Labaka</last></author>
-      <author><first>Iñaki</first><last>Alegria</last></author>
+      <author id="gorka-labaka"><first>Gorka</first><last>Labaka</last></author>
+      <author id="inaki-alegria"><first>Iñaki</first><last>Alegria</last></author>
       <author><first>Itziar</first><last>Cortes Etxabe</last></author>
       <author><first>Amaia</first><last>Jauregi Carrera</last></author>
       <author><first>Igor</first><last>Ellakuria Santos</last></author>
-      <author><first>Maite</first><last>Martin</last></author>
+      <author id="m-teresa-martin-valdivia"><first>Maite</first><last>Martin</last></author>
       <author><first>Eusebi</first><last>Calonge</last></author>
       <pages>159-168</pages>
       <url hash="28dba588">2018.eamt-main.14</url>
@@ -165,9 +165,9 @@
     </paper>
     <paper id="15">
       <title>Evaluation of Terminology Translation in Instance-Based Neural <fixed-case>MT</fixed-case> Adaptation</title>
-      <author><first>M. Amin</first><last>Farajian</last></author>
+      <author id="m-amin-farajian"><first>M. Amin</first><last>Farajian</last></author>
       <author><first>Nicola</first><last>Bertoldi</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <author><first>Marcello</first><last>Federico</last></author>
       <pages>169-178</pages>
@@ -227,8 +227,8 @@
     <paper id="21">
       <title>Letting a Neural Network Decide Which Machine Translation System to Use for Black-Box Fuzzy-Match Repair</title>
       <author><first>John E.</first><last>Ortega</last></author>
-      <author><first>Weiyi</first><last>Lu</last></author>
-      <author><first>Adam</first><last>Meyers</last></author>
+      <author id="weiyi-liu"><first>Weiyi</first><last>Lu</last></author>
+      <author id="adam-meyers"><first>Adam</first><last>Meyers</last></author>
       <author><first>Kyunghyun</first><last>Cho</last></author>
       <pages>229-238</pages>
       <url hash="fa54b443">2018.eamt-main.21</url>
@@ -238,8 +238,8 @@
     <paper id="22">
       <title>Data selection for <fixed-case>NMT</fixed-case> using Infrequent n-gram Recovery</title>
       <author><first>Zuzanna</first><last>Parcheta</last></author>
-      <author><first>Germán</first><last>Sanchis-Trilles</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="german-sanchis-trilles"><first>Germán</first><last>Sanchis-Trilles</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <pages>239-248</pages>
       <url hash="914a26b0">2018.eamt-main.22</url>
       <abstract>Neural Machine Translation (NMT) has achieved promising results comparable with Phrase-Based Statistical Machine Translation (PBSMT). However, to train a neural translation engine, much more powerful machines are required than those required to develop translation engines based on PBSMT. One solution to reduce the training cost of NMT systems is the reduction of the training corpus through data selection (DS) techniques. There are many DS techniques applied in PBSMT which bring good results. In this work, we show that the data selection technique based on infrequent n-gram occurrence described in (Gasco ́ et al., 2012) commonly used for PBSMT systems also works well for NMT systems. We focus our work on selecting data according to specific corpora using the previously mentioned technique. The specific-domain corpora used for our experiments are IT domain and medical domain. The DS technique significantly reduces the execution time required to train the model between 87% and 93%. Also, it improves translation quality by up to 2.8 BLEU points. The improvements are obtained with just a small fraction of the data that accounts for between 6% and 20% of the total data.</abstract>
@@ -248,7 +248,7 @@
     <paper id="23">
       <title>Translating Short Segments with <fixed-case>NMT</fixed-case>: A Case Study in <fixed-case>E</fixed-case>nglish-to-<fixed-case>H</fixed-case>indi</title>
       <author><first>Shantipriya</first><last>Parida</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>249-258</pages>
       <url hash="c89fe8a0">2018.eamt-main.23</url>
       <abstract>This paper presents a case study in translating short image captions of the Visual Genome dataset from English into Hindi using out-of-domain data sets of varying size. We experiment with three NMT models: the shallow and deep sequence-tosequence and the Transformer model as implemented in Marian toolkit. Phrase-based Moses serves as the baseline. The results indicate that the Transformer model outperforms others in the large data setting in a number of automatic metrics and manual evaluation, and it also produces the fewest truncated sentences. Transformer training is however very sensitive to the hyperparameters, so it requires more experimenting. The deep sequence-to-sequence model produced more flawless outputs in the small data setting and it was generally more stable, at the cost of more training iterations.</abstract>
@@ -298,7 +298,7 @@
     </paper>
     <paper id="28">
       <title>Integrating <fixed-case>MT</fixed-case> at <fixed-case>S</fixed-case>wiss Post’s Language Service: preliminary results</title>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Sabrina</first><last>Girletti</last></author>
       <author><first>Paula</first><last>Estrella</last></author>
       <author><first>Jonathan</first><last>Mutal</last></author>
@@ -311,7 +311,7 @@
     </paper>
     <paper id="29">
       <title>Iterative Data Augmentation for Neural Machine Translation: a Low Resource Case Study for <fixed-case>E</fixed-case>nglish-<fixed-case>T</fixed-case>elugu</title>
-      <author><first>Sandipan</first><last>Dandapat</last></author>
+      <author id="sandipan-dandapat"><first>Sandipan</first><last>Dandapat</last></author>
       <author><first>Christian</first><last>Federmann</last></author>
       <pages>307-312</pages>
       <url hash="92283fd6">2018.eamt-main.29</url>
@@ -329,7 +329,7 @@
     <paper id="31">
       <title>Implementing a neural machine translation engine for mobile devices: the Lingvanex use case</title>
       <author><first>Zuzanna</first><last>Parcheta</last></author>
-      <author><first>Germán</first><last>Sanchis-Trilles</last></author>
+      <author id="german-sanchis-trilles"><first>Germán</first><last>Sanchis-Trilles</last></author>
       <author><first>Aliaksei</first><last>Rudak</last></author>
       <author><first>Siarhei</first><last>Bratchenia</last></author>
       <pages>317-322</pages>
@@ -340,7 +340,7 @@
     <paper id="32">
       <title>Bootstrapping Multilingual Intent Models via Machine Translation for Dialog Automation</title>
       <author><first>Nicholas</first><last>Ruiz</last></author>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
       <author><first>John</first><last>Chen</last></author>
       <pages>323-328</pages>
       <url hash="c61780a2">2018.eamt-main.32</url>
@@ -472,7 +472,7 @@
     </paper>
     <paper id="47">
       <title>Developing a New <fixed-case>S</fixed-case>wiss Research Centre for Barrier-Free Communication</title>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Silvia</first><last>Rodríguez Vázquez</last></author>
       <author><first>Irene</first><last>Strasly</last></author>
       <pages>367</pages>
@@ -483,10 +483,10 @@
     <paper id="48">
       <title>Massively multilingual accessible audioguides via cell phones</title>
       <author><first>Itziar</first><last>Cortes</last></author>
-      <author><first>Igor</first><last>Leturia</last></author>
+      <author id="igor-leturia"><first>Igor</first><last>Leturia</last></author>
       <author><first>Ińaki</first><last>Alegria</last></author>
       <author><first>Aitzol</first><last>Astigarraga</last></author>
-      <author><first>Kepa</first><last>Sarasola</last></author>
+      <author id="kepa-sarasola"><first>Kepa</first><last>Sarasola</last></author>
       <author><first>Manex</first><last>Garaio</last></author>
       <pages>369</pages>
       <url hash="a13f82b4">2018.eamt-main.48</url>
@@ -498,7 +498,7 @@
       <author><first>Thierry</first><last>Etchegoyhen</last></author>
       <author><first>Borja</first><last>Anza Porras</last></author>
       <author><first>Andoni</first><last>Azpeitia</last></author>
-      <author><first>Eva</first><last>Martínez Garcia</last></author>
+      <author id="eva-martinez-garcia"><first>Eva</first><last>Martínez Garcia</last></author>
       <author><first>Paulo</first><last>Vale</last></author>
       <author><first>José Luis</first><last>Fonseca</last></author>
       <author><first>Teresa</first><last>Lynn</last></author>
@@ -506,13 +506,13 @@
       <author><first>Federico</first><last>Gaspari</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <author><first>Victoria</first><last>Arranz</last></author>
-      <author><first>Khalid</first><last>Choukri</last></author>
+      <author id="khalid-choukri"><first>Khalid</first><last>Choukri</last></author>
       <author><first>Vladimir</first><last>Popescu</last></author>
       <author><first>Pedro</first><last>Neiva</last></author>
       <author><first>Rui</first><last>Neto</last></author>
       <author><first>Maite</first><last>Melero</last></author>
       <author><first>David</first><last>Perez Fernandez</last></author>
-      <author><first>Antonio</first><last>Branco</last></author>
+      <author id="antonio-branco"><first>Antonio</first><last>Branco</last></author>
       <author><first>Ruben</first><last>Branco</last></author>
       <author><first>Luis</first><last>Gomes</last></author>
       <pages>371</pages>
@@ -524,7 +524,7 @@
       <title>The <fixed-case>SUMMA</fixed-case> Platform: Scalable Understanding of Multilingual Media</title>
       <author><first>Ulrich</first><last>Germann</last></author>
       <author><first>Peggy</first><last>van der Kreeft</last></author>
-      <author><first>Guntis</first><last>Barzdins</last></author>
+      <author id="guntis-barzdins"><first>Guntis</first><last>Barzdins</last></author>
       <author><first>Alexandra</first><last>Birch</last></author>
       <pages>373</pages>
       <url hash="50f7442a">2018.eamt-main.50</url>
@@ -597,12 +597,12 @@
       <author><first>Tom</first><last>Vanallemeersch</last></author>
       <author><first>Bram</first><last>Bulté</last></author>
       <author><first>Liesbeth</first><last>Augustinus</last></author>
-      <author><first>Frank</first><last>Van Eynde</last></author>
+      <author id="frank-van-eynde"><first>Frank</first><last>Van Eynde</last></author>
       <author><first>Joris</first><last>Pelemans</last></author>
       <author><first>Lyan</first><last>Verwimp</last></author>
       <author><first>Patrick</first><last>Wambacq</last></author>
       <author><first>Geert</first><last>Heyman</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <author><first>Iulianna</first><last>van der Lek-Ciudin</last></author>
       <author><first>Frieda</first><last>Steurs</last></author>
       <author><first>Ayla</first><last>Rigouts Terryn</last></author>
diff --git a/data/xml/2018.gwc.xml b/data/xml/2018.gwc.xml
index cc992eb283..14ab7c4a81 100644
--- a/data/xml/2018.gwc.xml
+++ b/data/xml/2018.gwc.xml
@@ -33,7 +33,7 @@
       <author><first>Anupam</first><last>Mondal</last></author>
       <author><first>Dipankar</first><last>Das</last></author>
       <author><first>Erik</first><last>Cambria</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>10–16</pages>
       <abstract>Information extraction in the medical domain is laborious and time-consuming due to the insufficient number of domain-specific lexicons and lack of involvement of domain experts such as doctors and medical practitioners. Thus, in the present work, we are motivated to design a new lexicon, WME 3.0 (WordNet of Medical Events), which contains over 10,000 medical concepts along with their part of speech, gloss (descriptive explanations), polarity score, sentiment, similar sentiment words, category, affinity score and gravity score features. In addition, the manual annotators help to validate the overall as well as individual category level of medical concepts of WME 3.0 using Cohen’s Kappa agreement metric. The agreement score indicates almost correct identification of medical concepts and their assigned features in WME 3.0.</abstract>
       <url hash="63b8721e">2018.gwc-1.2</url>
@@ -54,7 +54,7 @@
     <paper id="4">
       <title>Towards Cross-checking <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et and <fixed-case>SUMO</fixed-case> Using Meronymy</title>
       <author><first>Javier</first><last>Álvez</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <pages>25–33</pages>
       <abstract>We describe the practical application of a black-box testing methodology for the validation of the knowledge encoded in WordNet, SUMO and their mapping by using automated theorem provers. In this paper,weconcentrateonthepart-whole information provided by WordNet and create a large set of tests on the basis of few question patterns. From our preliminary evaluation results, we report on some of the detected inconsistencies.</abstract>
       <url hash="a32aa073">2018.gwc-1.4</url>
@@ -62,9 +62,9 @@
     </paper>
     <paper id="5">
       <title>Comparing Two Thesaurus Representations for <fixed-case>R</fixed-case>ussian</title>
-      <author><first>Natalia</first><last>Loukachevitch</last></author>
+      <author id="natalia-loukachevitch"><first>Natalia</first><last>Loukachevitch</last></author>
       <author><first>German</first><last>Lashevich</last></author>
-      <author><first>Boris</first><last>Dobrov</last></author>
+      <author id="boris-v-dobrov"><first>Boris</first><last>Dobrov</last></author>
       <pages>34–43</pages>
       <abstract>In the paper we presented a new Russian wordnet, RuWordNet, which was semi-automatically obtained by transformation of the existing Russian thesaurus RuThes. At the first step, the basic structure of wordnets was reproduced: synsets’ hierarchy for each part of speech and the basic set of relations between synsets (hyponym-hypernym, part-whole, antonyms). At the second stage, we added causation, entailment and domain relations between synsets. Also derivation relations were established for single words and the component structure for phrases included in RuWordNet. The described procedure of transformation highlights the specific features of each type of thesaurus representations.</abstract>
       <url hash="f7f2ae2b">2018.gwc-1.5</url>
@@ -89,7 +89,7 @@
     </paper>
     <paper id="8">
       <title>Mapping <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Instances to <fixed-case>W</fixed-case>ikipedia</title>
-      <author><first>John P.</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></author>
       <pages>61–68</pages>
       <abstract>Lexical resource differ from encyclopaedic resources and represent two distinct types of resource covering general language and named entities respectively. However, many lexical resources, including Princeton WordNet, contain many proper nouns, referring to named entities in the world yet it is not possible or desirable for a lexical resource to cover all named entities that may reasonably occur in a text. In this paper, we propose that instead of including synsets for instance concepts PWN should instead provide links to Wikipedia articles describing the concept. In order to enable this we have created a gold-quality mapping between all of the 7,742 instances in PWN and Wikipedia (where such a mapping is possible). As such, this resource aims to provide a gold standard for link discovery, while also allowing PWN to distinguish itself from other resources such as DBpedia or BabelNet. Moreover, this linking connects PWN to the Linguistic Linked Open Data cloud, thus creating a richer, more usable resource for natural language processing.</abstract>
       <url hash="f49cf29b">2018.gwc-1.8</url>
@@ -109,8 +109,8 @@
     <paper id="10">
       <title>Improving Wordnets for Under-Resourced Languages Using Machine Translation</title>
       <author><first>Bharathi Raja</first><last>Chakravarthi</last></author>
-      <author><first>Mihael</first><last>Arcan</last></author>
-      <author><first>John P.</first><last>McCrae</last></author>
+      <author id="mihael-arcan"><first>Mihael</first><last>Arcan</last></author>
+      <author id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></author>
       <pages>77–86</pages>
       <abstract>Wordnets are extensively used in natural language processing, but the current approaches for manually building a wordnet from scratch involves large research groups for a long period of time, which are typically not available for under-resourced languages. Even if wordnet-like resources are available for under-resourced languages, they are often not easily accessible, which can alter the results of applications using these resources. Our proposed method presents an expand approach for improving and generating wordnets with the help of machine translation. We apply our methods to improve and extend wordnets for the Dravidian languages, i.e., Tamil, Telugu, Kannada, which are severly under-resourced languages. We report evaluation results of the generated wordnet senses in term of precision for these languages. In addition to that, we carried out a manual evaluation of the translations for the Tamil language, where we demonstrate that our approach can aid in improving wordnet resources for under-resourced Dravidian languages.</abstract>
       <url hash="57927f05">2018.gwc-1.10</url>
@@ -196,7 +196,7 @@
     <paper id="19">
       <title>The Company They Keep: Extracting <fixed-case>J</fixed-case>apanese Neologisms Using Language Patterns</title>
       <author><first>James</first><last>Breen</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Francis</first><last>Bond</last></author>
       <pages>163–171</pages>
       <abstract>We describe an investigation into the identification and extraction of unrecorded potential lexical items in Japanese text by detecting text passages containing selected language patterns typically associated with such items. We identified a set of suitable patterns, then tested them with two large collections of text drawn from the WWW and Twitter. Samples of the extracted items were evaluated, and it was demonstrated that the approach has considerable potential for identifying terms for later lexicographic analysis.</abstract>
@@ -216,7 +216,7 @@
     </paper>
     <paper id="21">
       <title>Towards a principled approach to sense clustering – a case study of wordnet and dictionary senses in <fixed-case>D</fixed-case>anish</title>
-      <author><first>Bolette</first><last>Pedersen</last></author>
+      <author id="bolette-sandford-pedersen"><first>Bolette</first><last>Pedersen</last></author>
       <author><first>Manex</first><last>Agirrezabal</last></author>
       <author><first>Sanni</first><last>Nimb</last></author>
       <author><first>Ida</first><last>Olsen</last></author>
@@ -232,7 +232,7 @@
       <author><first>Agnieszka</first><last>Dziob</last></author>
       <author><first>Maciej</first><last>Piasecki</last></author>
       <author><first>Chakaveh</first><last>Saedi</last></author>
-      <author><first>António</first><last>Branco</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
       <pages>190–199</pages>
       <abstract>The paper presents a new re-built and expanded, version 2.0 of WordnetLoom – an open wordnet editor. It facilitates work on a multilingual system of wordnets, is based on efficient software architecture of thin client, and offers more flexibility in enriching wordnet representation. This new version is built on the experience collected during the use of the previous one for more than 10 years of plWordNet development. We discuss its extensions motivated by the collected experience. A special focus is given to the development of a variant for the needs of MultiWordnet of Portuguese, which is based on a very different wordnet development model.</abstract>
       <url hash="1ac23b8c">2018.gwc-1.22</url>
@@ -240,7 +240,7 @@
     </paper>
     <paper id="23">
       <title>Translation Equivalence and Synonymy: Preserving the Synsets in Cross-lingual Wordnets</title>
-      <author><first>Oi Yee</first><last>Kwong</last></author>
+      <author id="olivia-o-y-kwong"><first>Oi Yee</first><last>Kwong</last></author>
       <pages>200–208</pages>
       <abstract>The Princeton WordNet for English was founded on the synonymy relation, and multilingual wordnets are primarily developed by creating equivalent synsets in the respective languages. The process would often rely on translation equivalents obtained from existing bilingual dictionaries. This paper discusses some observations from the Chinese Open Wordnet, especially from the adjective subnet, to illuminate potential blind spots of the approach which may lead to the formation of non-synsets in the new wordnet. With cross-linguistic differences duly taken into account, alternative representations of cross-lingual lexical relations are proposed to better capture the language-specific properties. It is also suggested that such cross-lingual representation encompassing the cognitive as well as linguistic aspects of meaning is beneficial for a lexical resource to be used by both humans and computers.</abstract>
       <url hash="0304c006">2018.gwc-1.23</url>
@@ -284,8 +284,8 @@
       <title>Distant Supervision for Relation Extraction with Multi-sense Word Embedding</title>
       <author><first>Sangha</first><last>Nam</last></author>
       <author><first>Kijong</first><last>Han</last></author>
-      <author><first>Eun-Kyung</first><last>Kim</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="eun-kyung-kim"><first>Eun-Kyung</first><last>Kim</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <pages>239–244</pages>
       <abstract>Distant supervision can automatically generate labeled data between a large-scale corpus and a knowledge base without utilizing human efforts. Therefore, many studies have used the distant supervision approach in relation extraction tasks. However, existing studies have a disadvantage in that they do not reflect the homograph in the word embedding used as an input of the relation extraction model. Thus, it can be seen that the relation extraction model learns without grasping the meaning of the word accurately. In this paper, we propose a relation extraction model with multi-sense word embedding. We learn multi-sense word embedding using a word sense disambiguation module. In addition, we use convolutional neural network and piecewise max pooling convolutional neural network relation extraction models that efficiently grasp key features in sentences. To evaluate the performance of the proposed model, two additional methods of word embedding were learned and compared. Accordingly, our method showed the highest performance among them.</abstract>
       <url hash="461fc3cb">2018.gwc-1.27</url>
@@ -296,7 +296,7 @@
       <author><first>Rahmad</first><last>Mahendra</last></author>
       <author><first>Heninggar</first><last>Septiantri</last></author>
       <author><first>Haryo Akbarianto</first><last>Wibowo</last></author>
-      <author><first>Ruli</first><last>Manurung</last></author>
+      <author id="ruli-manurung"><first>Ruli</first><last>Manurung</last></author>
       <author><first>Mirna</first><last>Adriani</last></author>
       <pages>245–250</pages>
       <abstract>Ambiguity is a problem we frequently face in Natural Language Processing. Word Sense Disambiguation (WSD) is a task to determine the correct sense of an ambiguous word. However, research in WSD for Indonesian is still rare to find. The availability of English-Indonesian parallel corpora and WordNet for both languages can be used as training data for WSD by applying Cross-Lingual WSD method. This training data is used as an input to build a model using supervised machine learning algorithms. Our research also examines the use of Word Embedding features to build the WSD model.</abstract>
@@ -316,7 +316,7 @@
     <paper id="30">
       <title>Simple Embedding-Based Word Sense Disambiguation</title>
       <author><first>Dieke</first><last>Oele</last></author>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <pages>259–265</pages>
       <abstract>We present a simple knowledge-based WSD method that uses word and sense embeddings to compute the similarity between the gloss of a sense and the context of the word. Our method is inspired by the Lesk algorithm as it exploits both the context of the words and the definitions of the senses. It only requires large unlabeled corpora and a sense inventory such as WordNet, and therefore does not rely on annotated data. We explore whether additional extensions to Lesk are compatible with our method. The results of our experiments show that by lexically extending the amount of words in the gloss and context, although it works well for other implementations of Lesk, harms our method. Using a lexical selection method on the context words, on the other hand, improves it. The combination of our method with lexical selection enables our method to outperform state-of the art knowledge-based systems.</abstract>
       <url hash="c19f0f0c">2018.gwc-1.30</url>
@@ -326,7 +326,7 @@
       <title>Semi-automatic <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Linking using Word Embeddings</title>
       <author><first>Kevin</first><last>Patel</last></author>
       <author><first>Diptesh</first><last>Kanojia</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>266–271</pages>
       <abstract>Wordnets are rich lexico-semantic resources. Linked wordnets are extensions of wordnets, which link similar concepts in wordnets of different languages. Such resources are extremely useful in many Natural Language Processing (NLP) applications, primarily those based on knowledge-based approaches. In such approaches, these resources are considered as gold standard/oracle. Thus, it is crucial that these resources hold correct information. Thereby, they are created by human experts. However, manual maintenance of such resources is a tedious and costly affair. Thus techniques that can aid the experts are desirable. In this paper, we propose an approach to link wordnets. Given a synset of the source language, the approach returns a ranked list of potential candidate synsets in the target language from which the human expert can choose the correct one(s). Our technique is able to retrieve a winner synset in the top 10 ranked list for 60% of all synsets and 70% of noun synsets.</abstract>
       <url hash="ba85ab63">2018.gwc-1.31</url>
@@ -343,7 +343,7 @@
     </paper>
     <paper id="33">
       <title>Grammatical Role Embeddings for Enhancements of Relation Density in the <fixed-case>P</fixed-case>rinceton <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et</title>
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <author><first>Alexander</first><last>Popov</last></author>
       <author><first>Iliana</first><last>Simova</last></author>
       <author><first>Petya</first><last>Osenova</last></author>
@@ -355,7 +355,7 @@
     <paper id="34">
       <title>An Iterative Approach for Unsupervised Most Frequent Sense Detection using <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et and Word Embeddings</title>
       <author><first>Kevin</first><last>Patel</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>293–297</pages>
       <abstract>Given a word, what is the most frequent sense in which it occurs in a given corpus? Most Frequent Sense (MFS) is a strong baseline for unsupervised word sense disambiguation. If we have large amounts of sense-annotated corpora, MFS can be trivially created. However, sense-annotated corpora are a rarity. In this paper, we propose a method which can compute MFS from raw corpora. Our approach iteratively exploits the semantic congruity among related words in corpus. Our method performs better compared to another similar work.</abstract>
       <url hash="82a1e150">2018.gwc-1.34</url>
@@ -365,7 +365,7 @@
       <title>Automatic Identification of Basic-Level Categories</title>
       <author><first>Chad</first><last>Mills</last></author>
       <author><first>Francis</first><last>Bond</last></author>
-      <author><first>Gina-Anne</first><last>Levow</last></author>
+      <author id="gina-anne-levow"><first>Gina-Anne</first><last>Levow</last></author>
       <pages>298–305</pages>
       <abstract>Basic-level categories have been shown to be both psychologically significant and useful in a wide range of practical applications. We build a rule-based system to identify basic-level categories in WordNet, achieving 77% accuracy on a test set derived from prior psychological experiments. With additional annotations we found our system also has low precision, in part due to the existence of many categories that do not fit into the three classes (superordinate, basic-level, and subordinate) relied on in basic-level category research.</abstract>
       <url hash="8056cd88">2018.gwc-1.35</url>
@@ -373,7 +373,7 @@
     </paper>
     <paper id="36">
       <title><fixed-case>A</fixed-case>frican <fixed-case>W</fixed-case>ordnet: facilitating language learning in <fixed-case>A</fixed-case>frican languages</title>
-      <author><first>Sonja</first><last>Bosch</last></author>
+      <author id="sonja-bosch"><first>Sonja</first><last>Bosch</last></author>
       <author><first>Marissa</first><last>Griesel</last></author>
       <pages>306–313</pages>
       <abstract>The development of the African Wordnet (AWN) has reached a stage of maturity where the first steps towards an application can be attempted. The AWN is based on the expand method, and to compensate for the general resource scarceness of the African languages, various development strategies were used. The aim of this paper is to investigate the usefulness of the current isiZulu Wordnet in an application such as language learning. The advantage of incorporating the wordnet of a language into a language learning system is that it provides learners with an integrated application to enhance their learning experience by means of the unique sense identification features of wordnets. In this paper it will be demonstrated by means of a variety of examples within the context of a basic free online course how the isiZulu Wordnet can offer the language learner improved decision support.</abstract>
@@ -389,8 +389,8 @@
       <author><first>Laxmi</first><last>Kashyap</last></author>
       <author><first>Diptesh</first><last>Kanojia</last></author>
       <author><first>Preethi</first><last>Jyothi</last></author>
-      <author><first>Malhar</first><last>Kulkarni</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="malhar-kulkarni"><first>Malhar</first><last>Kulkarni</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>314–323</pages>
       <abstract>This paper reports the work related to making Hindi Wordnet1 available as a digital resource for language learning and teaching, and the experiences and lessons that were learnt during the process. The language data of the Hindi Wordnet has been suitably modified and enhanced to make it into a language learning aid. This aid is based on modern pedagogical axioms and is aligned to the learning objectives of the syllabi of the school education in India. To make it into a comprehensive language tool, grammatical information has also been encoded, as far as these can be marked on the lexical items. The delivery of information is multi-layered, multi-sensory and is available across multiple digital platforms. The front end has been designed to offer an eye-catching user-friendly interface which is suitable for learners starting from age six onward. Preliminary testing of the tool has been done and it has been modified as per the feedbacks that were received. Above all, the entire exercise has offered gainful insights into learning based on associative networks and how knowledge based on such networks can be made available to modern learners.</abstract>
       <url hash="add72dd2">2018.gwc-1.37</url>
@@ -418,8 +418,8 @@
     </paper>
     <paper id="40">
       <title><fixed-case>ELEXIS</fixed-case> - a <fixed-case>E</fixed-case>uropean infrastructure fostering cooperation and information exchange among lexicographical research communities</title>
-      <author><first>Bolette</first><last>Pedersen</last></author>
-      <author><first>John</first><last>McCrae</last></author>
+      <author id="bolette-sandford-pedersen"><first>Bolette</first><last>Pedersen</last></author>
+      <author id="john-philip-mccrae"><first>John</first><last>McCrae</last></author>
       <author><first>Carole</first><last>Tiberius</last></author>
       <author><first>Simon</first><last>Krek</last></author>
       <pages>335–340</pages>
@@ -494,7 +494,7 @@
       <title>pyiwn: A Python based <fixed-case>API</fixed-case> to access <fixed-case>I</fixed-case>ndian Language <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>ets</title>
       <author><first>Ritesh</first><last>Panjwani</last></author>
       <author><first>Diptesh</first><last>Kanojia</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>378–383</pages>
       <abstract>Indian language WordNets have their individual web-based browsing interfaces along with a common interface for IndoWordNet. These interfaces prove to be useful for language learners and in an educational domain, however, they do not provide the functionality of connecting to them and browsing their data through a lucid application programming interface or an API. In this paper, we present our work on creating such an easy-to-use framework which is bundled with the data for Indian language WordNets and provides NLTK WordNet interface like core functionalities in Python. Additionally, we use a pre-built speech synthesis system for Hindi language and augment Hindi data with audios for words, glosses, and example sentences. We provide a detailed usage of our API and explain the functions for ease of the user. Also, we package the IndoWordNet data along with the source code and provide it openly for the purpose of research. We aim to provide all our work as an open source framework for further development.</abstract>
       <url hash="5138be68">2018.gwc-1.47</url>
@@ -502,8 +502,8 @@
     </paper>
     <paper id="48">
       <title>Sinitic <fixed-case>W</fixed-case>ordnet: Laying the Groundwork with <fixed-case>C</fixed-case>hinese Varieties Written in Traditional Characters</title>
-      <author><first>Chih-Yao</first><last>Lee</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="chi-yao-lee"><first>Chih-Yao</first><last>Lee</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <pages>384–387</pages>
       <abstract>The present work seeks to make the logographic nature of Chinese script a relevant research ground in wordnet studies. While wordnets are not so much about words as about the concepts represented in words, synset formation inevitably involves the use of orthographic and/or phonetic representations to serve as headword for a given concept. For wordnets of Chinese languages, if their synsets are mapped with each other, the connection from logographic forms to lexicalized concepts can be explored backwards to, for instance, help trace the development of cognates in different varieties of Chinese. The Sinitic Wordnet project is an attempt to construct such an integrated wordnet that aggregates three Chinese varieties that are widely spoken in Taiwan and all written in traditional Chinese characters.</abstract>
       <url hash="533fd25f">2018.gwc-1.48</url>
@@ -513,7 +513,7 @@
       <title>Synthesizing Audio for <fixed-case>H</fixed-case>indi <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et</title>
       <author><first>Diptesh</first><last>Kanojia</last></author>
       <author><first>Preethi</first><last>Jyothi</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>388–393</pages>
       <abstract>In this paper, we describe our work on the creation of a voice model using a speech synthesis system for the Hindi Language. We use pre-existing “voices”, use publicly available speech corpora to create a “voice” using the Festival Speech Synthesis System (Black, 1997). Our contribution is two-fold: (1) We scrutinize multiple speech synthesis systems and provide an extensive report on the currently available state-of-the-art systems. We also develop voices using the existing implementations of the aforementioned systems, and (2) We use these voices to generate sample audios for randomly chosen words; manually evaluate the audio generated, and produce audio for all WordNet words using the winner voice model. We also produce audios for the Hindi WordNet Glosses and Example sentences. We describe our efforts to use pre-existing implementations for WaveNet - a model to generate raw audio using neural nets (Oord et al., 2016) and generate speech for Hindi. Our lexicographers perform a manual evaluation of the audio generated using multiple voices. A qualitative and quantitative analysis reveals that the voice model generated by us performs the best with an accuracy of 0.44.</abstract>
       <url hash="f84b5ab6">2018.gwc-1.49</url>
@@ -531,7 +531,7 @@
     </paper>
     <paper id="51">
       <title>Towards a Crowd-Sourced <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et for Colloquial <fixed-case>E</fixed-case>nglish</title>
-      <author><first>John P.</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></author>
       <author><first>Ian</first><last>Wood</last></author>
       <author><first>Amanda</first><last>Hicks</last></author>
       <pages>401–406</pages>
diff --git a/data/xml/2018.icon.xml b/data/xml/2018.icon.xml
index a5d9d1f9ad..1a96919381 100644
--- a/data/xml/2018.icon.xml
+++ b/data/xml/2018.icon.xml
@@ -3,8 +3,8 @@
   <volume id="1" ingest-date="2023-10-04" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 15th International Conference on Natural Language Processing</booktitle>
-      <editor><first>Gurpreet Singh</first><last>Lehal</last></editor>
-      <editor><first>Dipti Misra</first><last>Sharma</last></editor>
+      <editor id="gurpreet-singh-lehal"><first>Gurpreet Singh</first><last>Lehal</last></editor>
+      <editor id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></editor>
       <editor><first>Rajeev</first><last>Sangal</last></editor>
       <publisher>NLP Association of India</publisher>
       <address>International Institute of Information Technology, Hyderabad, India</address>
@@ -19,7 +19,7 @@
     <paper id="1">
       <title>Abstractive Summarization Using Attentive Neural Techniques</title>
       <author><first>Jacob</first><last>Krantz</last></author>
-      <author><first>Jugal</first><last>Kalita</last></author>
+      <author id="jugal-kalita"><first>Jugal</first><last>Kalita</last></author>
       <pages>1–9</pages>
       <url hash="fd9d0427">2018.icon-1.1</url>
       <bibkey>krantz-kalita-2018-abstractive</bibkey>
@@ -47,7 +47,7 @@
       <author><first>Pranaw</first><last>Kumar</last></author>
       <author><first>BiraChandra</first><last>Singh</last></author>
       <author><first>Prakash B.</first><last>Pimpale</last></author>
-      <author><first>Sasikumar</first><last>M.</last></author>
+      <author id="sasikumar-m"><first>Sasikumar</first><last>M.</last></author>
       <pages>28–34</pages>
       <url hash="b90cbb57">2018.icon-1.4</url>
       <bibkey>ghone-etal-2018-automatic</bibkey>
@@ -74,8 +74,8 @@
       <author><first>Swapnil</first><last>Hingmire</last></author>
       <author><first>Sachin</first><last>Pawar</last></author>
       <author><first>Sangameshwar</first><last>Patil</last></author>
-      <author><first>Girish K.</first><last>Palshikar</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="girish-palshikar"><first>Girish K.</first><last>Palshikar</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <author><first>Vasudeva</first><last>Verma</last></author>
       <pages>50–58</pages>
       <url hash="5778b1dd">2018.icon-1.7</url>
@@ -85,7 +85,7 @@
       <title>Deep Learning methods for Semantic Role Labeling in <fixed-case>I</fixed-case>ndian Languages</title>
       <author><first>Aishwary</first><last>Gupta</last></author>
       <author><first>Akshay</first><last>Pawale</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>59–68</pages>
       <url hash="b43225f2">2018.icon-1.8</url>
       <bibkey>gupta-etal-2018-deep-learning</bibkey>
@@ -126,7 +126,7 @@
       <title>Does Curriculum Learning help Deep Learning for Natural Language Generation?</title>
       <author><first>Sandhya</first><last>Singh</last></author>
       <author><first>Kevin</first><last>Patel</last></author>
-      <author><first>Pushpak</first><last>Bhattacharya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharya</last></author>
       <author><first>Krishnanjan</first><last>Bhattacharjee</last></author>
       <author><first>Hemant</first><last>Darbari</last></author>
       <author><first>Seema</first><last>Verma</last></author>
@@ -138,7 +138,7 @@
       <title><fixed-case>W</fixed-case>up<fixed-case>L</fixed-case>e<fixed-case>B</fixed-case>leu: The Word-net Based Evaluation Metric for Machine Translation</title>
       <author><first>Debajyoty</first><last>Banik</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>99–103</pages>
       <url hash="8bd64877">2018.icon-1.14</url>
       <bibkey>banik-etal-2018-wuplebleu</bibkey>
@@ -146,7 +146,7 @@
     <paper id="15">
       <title>“Is This A Joke?”: A Large Humor Classification Dataset</title>
       <author><first>Faraz</first><last>Faruqi</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>104–109</pages>
       <url hash="c8a4b532">2018.icon-1.15</url>
       <bibkey>faruqi-shrivastava-2018-joke</bibkey>
@@ -163,7 +163,7 @@
       <title>A Content-based Recommendation System for Medical Concepts: Disease and Symptom</title>
       <author><first>Anupam</first><last>Mondal</last></author>
       <author><first>Dipankar</first><last>Das</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>115–121</pages>
       <url hash="d628d0ab">2018.icon-1.17</url>
       <bibkey>mondal-etal-2018-content</bibkey>
@@ -173,7 +173,7 @@
       <author><first>Zishan</first><last>Ahmad</last></author>
       <author><first>Sahoo</first><last>Sovan Kumar</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>122–131</pages>
       <url hash="fcd28aea">2018.icon-1.18</url>
       <bibkey>ahmad-etal-2018-deep</bibkey>
@@ -191,7 +191,7 @@
       <title>Improving Computer Generated Dialog with Auxiliary Loss Functions and Custom Evaluation Metrics</title>
       <author><first>Thomas</first><last>Conley</last></author>
       <author><first>Jack</first><last>St. Clair</last></author>
-      <author><first>Jugal</first><last>Kalita</last></author>
+      <author id="jugal-kalita"><first>Jugal</first><last>Kalita</last></author>
       <pages>138–144</pages>
       <url hash="db18d5dc">2018.icon-1.20</url>
       <bibkey>conley-etal-2018-improving</bibkey>
@@ -230,10 +230,10 @@
     </paper>
     <paper id="25">
       <title><fixed-case>SMT</fixed-case> vs <fixed-case>NMT</fixed-case>: A Comparison over <fixed-case>H</fixed-case>indi and <fixed-case>B</fixed-case>engali Simple Sentences</title>
-      <author><first>Sainik Kumar</first><last>Mahata</last></author>
-      <author><first>Soumil</first><last>Mandal</last></author>
+      <author id="sainik-mahata"><first>Sainik Kumar</first><last>Mahata</last></author>
+      <author id="soumil-mandal"><first>Soumil</first><last>Mandal</last></author>
       <author><first>Dipankar</first><last>Das</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>175–182</pages>
       <url hash="a1b6ce4a">2018.icon-1.25</url>
       <bibkey>mahata-etal-2018-smt</bibkey>
@@ -241,9 +241,9 @@
     <paper id="26">
       <title>Helping each Other: A Framework for Customer-to-Customer Suggestion Mining using a Semi-supervised Deep Neural Network</title>
       <author><first>Hitesh</first><last>Golchha</last></author>
-      <author><first>Deepak</first><last>Gupta</last></author>
+      <author id="deepak-gupta"><first>Deepak</first><last>Gupta</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>183–192</pages>
       <url hash="e6500dba">2018.icon-1.26</url>
       <bibkey>golchha-etal-2018-helping</bibkey>
diff --git a/data/xml/2018.ijclclp.xml b/data/xml/2018.ijclclp.xml
index e7d567e349..1eed3a5a41 100644
--- a/data/xml/2018.ijclclp.xml
+++ b/data/xml/2018.ijclclp.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2021-07-21" type="journal">
     <meta>
       <booktitle>International Journal of Computational Linguistics &amp; <fixed-case>C</fixed-case>hinese Language Processing, Volume 23, Number 1, June 2018</booktitle>
-      <editor><first>Jen-Tzung</first><last>Chien</last></editor>
+      <editor id="jen-tzung-chien"><first>Jen-Tzung</first><last>Chien</last></editor>
       <editor><first>Chia-Hui</first><last>Chang</last></editor>
       <publisher>Association for Computational Linguistics and Chinese Language Processing</publisher>
       <address>Taipei, Taiwan</address>
@@ -30,7 +30,7 @@
       <author><first>Yu-Shuo</first><last>Liu</last></author>
       <author><first>Chin-Po</first><last>Chen</last></author>
       <author><first>Susan Shur-Fen</first><last>Gau</last></author>
-      <author><first>Chi-Chun</first><last>Lee</last></author>
+      <author id="chi-chun-lee"><first>Chi-Chun</first><last>Lee</last></author>
       <url hash="f938e4a8">2018.ijclclp-1.2</url>
       <language>zho</language>
       <bibkey>liu-etal-2018-chang</bibkey>
@@ -38,7 +38,7 @@
     <paper id="3">
       <title>整合個人化磁振造影深度神經網路之演算法技術 (Joint Modeling of Individual Neural Responses using a Deep Voting Fusion Network for Automatic Emotion Perception Decoding)</title>
       <author><first>Wan-Ting</first><last>Hsieh</last></author>
-      <author><first>Chi-Chun</first><last>Lee</last></author>
+      <author id="chi-chun-lee"><first>Chi-Chun</first><last>Lee</last></author>
       <url hash="160c6c85">2018.ijclclp-1.3</url>
       <language>zho</language>
       <bibkey>hsieh-lee-2018-zheng</bibkey>
@@ -64,7 +64,7 @@
     <paper id="1">
       <title>使用長短期記憶類神經網路建構中文語音辨識器之研究 (A Study on <fixed-case>M</fixed-case>andarin Speech Recognition using Long Short- Term Memory Neural Network)</title>
       <author><first>Chien-hung</first><last>Lai</last></author>
-      <author><first>Yih-Ru</first><last>Wang</last></author>
+      <author id="yih-ru-wang"><first>Yih-Ru</first><last>Wang</last></author>
       <url hash="9d04b56b">2018.ijclclp-2.1</url>
       <language>zho</language>
       <bibkey>lai-wang-2018-shi-yong</bibkey>
@@ -108,7 +108,7 @@
     <paper id="6">
       <title>以深層類神經網路標記中文階層式多標籤語意概念 (Hierarchical Multi-Label <fixed-case>C</fixed-case>hinese Word Semantic Labeling using Deep Neural Network)</title>
       <author><first>Wei-Chieh</first><last>Chou</last></author>
-      <author><first>Yih-Ru</first><last>Wang</last></author>
+      <author id="yih-ru-wang"><first>Yih-Ru</first><last>Wang</last></author>
       <url hash="e5fa478e">2018.ijclclp-2.6</url>
       <language>zho</language>
       <bibkey>chou-wang-2018-yi-shen</bibkey>
diff --git a/data/xml/2018.iwslt.xml b/data/xml/2018.iwslt.xml
index 68168ed2d5..e58feae793 100644
--- a/data/xml/2018.iwslt.xml
+++ b/data/xml/2018.iwslt.xml
@@ -21,7 +21,7 @@
       <title>The <fixed-case>IWSLT</fixed-case> 2018 Evaluation Campaign</title>
       <author><first>Jan</first><last>Niehues</last></author>
       <author><first>Rolando</first><last>Cattoni</last></author>
-      <author><first>Sebastian</first><last>Stüker</last></author>
+      <author id="sebastian-stuker"><first>Sebastian</first><last>Stüker</last></author>
       <author><first>Mauro</first><last>Cettolo</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <author><first>Marcello</first><last>Federico</last></author>
@@ -35,7 +35,7 @@
       <author><first>Viktor</first><last>Hangya</last></author>
       <author><first>Fabienne</first><last>Braune</last></author>
       <author><first>Yuliya</first><last>Kalasouskaya</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>7-13</pages>
       <abstract>Mining parallel sentences from comparable corpora is of great interest for many downstream tasks. In the BUCC 2017 shared task, systems performed well by training on gold standard parallel sentences. However, we often want to mine parallel sentences without bilingual supervision. We present a simple approach relying on bilingual word embeddings trained in an unsupervised fashion. We incorporate orthographic similarity in order to handle words with similar surface forms. In addition, we propose a dynamic threshold method to decide if a candidate sentence-pair is parallel which eliminates the need to fine tune a static value for different datasets. Since we do not employ any language specific engineering our approach is highly generic. We show that our approach is effective, on three language-pairs, without the use of any bilingual signal which is important because parallel sentence mining is most useful in low resource scenarios.</abstract>
       <url hash="cb9f14e0">2018.iwslt-1.2</url>
@@ -55,7 +55,7 @@
     <paper id="4">
       <title>Analyzing Knowledge Distillation in Neural Machine Translation</title>
       <author><first>Dakun</first><last>Zhang</last></author>
-      <author><first>Josep</first><last>Crego</last></author>
+      <author id="josep-m-crego"><first>Josep</first><last>Crego</last></author>
       <author><first>Jean</first><last>Senellart</last></author>
       <pages>23-30</pages>
       <abstract>Knowledge distillation has recently been successfully applied to neural machine translation. It allows for building shrunk networks while the resulting systems retain most of the quality of the original model. Despite the fact that many authors report on the benefits of knowledge distillation, few have discussed the actual reasons why it works, especially in the context of neural MT. In this paper, we conduct several experiments aimed at understanding why and how distillation impacts accuracy on an English-German translation task. We show that translation complexity is actually reduced when building a distilled/synthesised bi-text when compared to the reference bi-text. We further remove noisy data from synthesised translations and merge filtered synthesised data together with original reference, thus achieving additional gains in terms of accuracy.</abstract>
@@ -77,7 +77,7 @@
     <paper id="6">
       <title>A Machine Translation Approach for Modernizing Historical Documents Using Backtranslation</title>
       <author><first>Miguel</first><last>Domingo</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <pages>39-47</pages>
       <abstract>Human language evolves with the passage of time. This makes historical documents to be hard to comprehend by contemporary people and, thus, limits their accessibility to scholars specialized in the time period in which a certain document was written. Modernization aims at breaking this language barrier and increase the accessibility of historical documents to a broader audience. To do so, it generates a new version of a historical document, written in the modern version of the document’s original language. In this work, we propose several machine translation approaches for modernizing historical documents. We tested these approaches in different scenarios, obtaining very encouraging results.</abstract>
       <url hash="586a90eb">2018.iwslt-1.6</url>
@@ -99,7 +99,7 @@
       <title>Transfer Learning in Multilingual Neural Machine Translation with Dynamic Vocabulary</title>
       <author><first>Surafel M.</first><last>Lakew</last></author>
       <author><first>Aliia</first><last>Erofeeva</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marcello</first><last>Federico</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <pages>54-61</pages>
@@ -137,7 +137,7 @@
       <title>The <fixed-case>ADAPT</fixed-case> System Description for the <fixed-case>IWSLT</fixed-case> 2018 <fixed-case>B</fixed-case>asque to <fixed-case>E</fixed-case>nglish Translation Task</title>
       <author><first>Alberto</first><last>Poncelas</last></author>
       <author><first>Andy</first><last>Way</last></author>
-      <author><first>Kepa</first><last>Sarasola</last></author>
+      <author id="kepa-sarasola"><first>Kepa</first><last>Sarasola</last></author>
       <pages>76-82</pages>
       <abstract>In this paper we present the ADAPT system built for the Basque to English Low Resource MT Evaluation Campaign. Basque is a low-resourced, morphologically-rich language. This poses a challenge for Neural Machine Translation models which usually achieve better performance when trained with large sets of data. Accordingly, we used synthetic data to improve the translation quality produced by a model built using only authentic data. Our proposal uses back-translated data to: (a) create new sentences, so the system can be trained with more data; and (b) translate sentences that are close to the test set, so the model can be fine-tuned to the document to be translated.</abstract>
       <url hash="42692a7e">2018.iwslt-1.11</url>
@@ -154,7 +154,7 @@
     <paper id="13">
       <title>The <fixed-case>M</fixed-case>e<fixed-case>MAD</fixed-case> Submission to the <fixed-case>IWSLT</fixed-case> 2018 Speech Translation Task</title>
       <author><first>Umut</first><last>Sulubacak</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <author><first>Aku</first><last>Rouhe</last></author>
       <author><first/><last>Stig-ArneGrönroos</last></author>
       <author><first>Mikko</first><last>Kurimo</last></author>
@@ -165,7 +165,7 @@
     </paper>
     <paper id="14">
       <title>Prompsit’s Submission to the <fixed-case>IWSLT</fixed-case> 2018 Low Resource Machine Translation Task</title>
-      <author><first>Víctor M.</first><last>Sánchez-Cartagena</last></author>
+      <author id="victor-m-sanchez-cartagena"><first>Víctor M.</first><last>Sánchez-Cartagena</last></author>
       <pages>95-103</pages>
       <abstract>This paper presents Prompsit Language Engineering’s submission to the IWSLT 2018 Low Resource Machine Translation task. Our submission is based on cross-lingual learning: a multilingual neural machine translation system was created with the sole purpose of improving translation quality on the Basque-to-English language pair. The multilingual system was trained on a combination of in-domain data, pseudo in-domain data obtained via cross-entropy data selection and backtranslated data. We morphologically segmented Basque text with a novel approach that only requires a dictionary such as those used by spell checkers and proved that this segmentation approach outperforms the widespread byte pair encoding strategy for this task.</abstract>
       <url hash="6a2f1ec9">2018.iwslt-1.14</url>
@@ -233,13 +233,13 @@
     <paper id="19">
       <title><fixed-case>KIT</fixed-case>’s <fixed-case>IWSLT</fixed-case> 2018 <fixed-case>SLT</fixed-case> Translation System</title>
       <author><first>Matthias</first><last>Sperber</last></author>
-      <author><first>Ngoc-Quan</first><last>Pham</last></author>
+      <author id="ngoc-quan-pham"><first>Ngoc-Quan</first><last>Pham</last></author>
       <author><first>Thai-Son</first><last>Nguyen</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
       <author><first>Markus</first><last>Müller</last></author>
       <author><first>Thanh-Le</first><last>Ha</last></author>
-      <author><first>Sebastian</first><last>Stüker</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="sebastian-stuker"><first>Sebastian</first><last>Stüker</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>131-135</pages>
       <abstract>This paper describes KIT’s submission to the IWSLT 2018 Translation task. We describe a system participating in the baseline condition and a system participating in the end-to-end condition. The baseline system is a cascade of an ASR system, a system to segment the ASR output and a neural machine translation system. We investigate the combination of different ASR systems. For the segmentation and machine translation components, we focused on transformer-based architectures.</abstract>
       <url hash="c0fa2a6d">2018.iwslt-1.19</url>
@@ -251,7 +251,7 @@
       <author><first>Hongjie</first><last>Chen</last></author>
       <author><first>Kai</first><last>Fan</last></author>
       <author><first>Cheung-Chi</first><last>Leung</last></author>
-      <author id="bo-li"><first>Bo</first><last>Li</last></author>
+      <author><first>Bo</first><last>Li</last></author>
       <author><first>Chongjia</first><last>Ni</last></author>
       <author><first>Rong</first><last>Tong</last></author>
       <author><first>Pei</first><last>Zhang</last></author>
@@ -266,8 +266,8 @@
     <paper id="21">
       <title><fixed-case>CUNI</fixed-case> <fixed-case>B</fixed-case>asque-to-<fixed-case>E</fixed-case>nglish Submission in <fixed-case>IWSLT</fixed-case>18</title>
       <author><first>Tom</first><last>Kocmi</last></author>
-      <author><first>Dušan</first><last>Variš</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="dusan-varis"><first>Dušan</first><last>Variš</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>142-146</pages>
       <abstract>We present our submission to the IWSLT18 Low Resource task focused on the translation from Basque-to-English. Our submission is based on the current state-of-the-art self-attentive neural network architecture, Transformer. We further improve this strong baseline by exploiting available monolingual data using the back-translation technique. We also present further improvements gained by a transfer learning, a technique that trains a model using a high-resource language pair (Czech-English) and then fine-tunes the model using the target low-resource language pair (Basque-English).</abstract>
       <url hash="742d6232">2018.iwslt-1.21</url>
@@ -275,10 +275,10 @@
     </paper>
     <paper id="22">
       <title>Fine-tuning on Clean Data for End-to-End Speech Translation: <fixed-case>FBK</fixed-case> @ <fixed-case>IWSLT</fixed-case> 2018</title>
-      <author><first>Mattia Antonino</first><last>Di Gangi</last></author>
+      <author id="mattia-a-di-gangi"><first>Mattia Antonino</first><last>Di Gangi</last></author>
       <author><first>Roberto</first><last>Dessì</last></author>
       <author><first>Roldano</first><last>Cattoni</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <pages>147-152</pages>
       <abstract>This paper describes FBK’s submission to the end-to-end English-German speech translation task at IWSLT 2018. Our system relies on a state-of-the-art model based on LSTMs and CNNs, where the CNNs are used to reduce the temporal dimension of the audio input, which is in general much higher than machine translation input. Our model was trained only on the audio-to-text parallel data released for the task, and fine-tuned on cleaned subsets of the original training corpus. The addition of weight normalization and label smoothing improved the baseline system by 1.0 BLEU point on our validation set. The final submission also featured checkpoint averaging within a training run and ensemble decoding of models trained during multiple runs. On test data, our best single model obtained a BLEU score of 9.7, while the ensemble obtained a BLEU score of 10.24.</abstract>
@@ -310,7 +310,7 @@
     <paper id="25">
       <title>Learning to Segment Inputs for <fixed-case>NMT</fixed-case> Favors Character-Level Processing</title>
       <author><first>Julia</first><last>Kreutzer</last></author>
-      <author><first>Artem</first><last>Sokolov</last></author>
+      <author id="artem-sokolov"><first>Artem</first><last>Sokolov</last></author>
       <pages>166-172</pages>
       <abstract>Most modern neural machine translation (NMT) systems rely on presegmented inputs. Segmentation granularity importantly determines the input and output sequence lengths, hence the modeling depth, and source and target vocabularies, which in turn determine model size, computational costs of softmax normalization, and handling of out-of-vocabulary words. However, the current practice is to use static, heuristic-based segmentations that are fixed before NMT training. This begs the question whether the chosen segmentation is optimal for the translation task. To overcome suboptimal segmentation choices, we present an algorithm for dynamic segmentation, that is trainable end-to-end and driven by the NMT objective. In an evaluation on four translation tasks we found that, given the freedom to navigate between different segmentation levels, the model prefers to operate on (almost) character level, providing support for purely character-level NMT models from a novel angle.</abstract>
       <url hash="0aa4c724">2018.iwslt-1.25</url>
diff --git a/data/xml/2018.jeptalnrecital.xml b/data/xml/2018.jeptalnrecital.xml
index e4a89eb8d5..a44d6d415f 100644
--- a/data/xml/2018.jeptalnrecital.xml
+++ b/data/xml/2018.jeptalnrecital.xml
@@ -73,7 +73,7 @@
       <language>fra</language>
       <author><first>Pierre</first><last>Magistry</last></author>
       <author><first>Anne-Laure</first><last>Ligozat</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <pages>75–86</pages>
       <abstract>Cet article présente une nouvelle méthode d’étiquetage en parties du discours adaptée aux langues peu dotées : la définition du contexte utilisé pour construire les plongements lexicaux est adaptée à la tâche, et de nouveaux vecteurs sont créés pour les mots inconnus. Les expériences menées sur le picard, le malgache et l’alsacien montrent que cette méthode améliore l’état de l’art pour ces trois langues peu dotées.</abstract>
       <url hash="e63973ed">2018.jeptalnrecital-long.6</url>
@@ -154,7 +154,7 @@
       <language>fra</language>
       <author><first>Sébastien</first><last>Delecraz</last></author>
       <author><first>Leonor</first><last>Becerra-Bonache</last></author>
-      <author><first>Benoît</first><last>Favre</last></author>
+      <author id="benoit-favre"><first>Benoît</first><last>Favre</last></author>
       <author><first>Alexis</first><last>Nasr</last></author>
       <author><first>Frédéric</first><last>Bechet</last></author>
       <pages>171–182</pages>
@@ -204,7 +204,7 @@
       <title>Détection automatique de phrases en domaine de spécialité en français (Sentence boundary detection for specialized domains in <fixed-case>F</fixed-case>rench )</title>
       <language>fra</language>
       <author><first>Arthur</first><last>Boyer</last></author>
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <pages>205–214</pages>
       <abstract>La détection de frontières de phrase est généralement considéré comme un problème résolu. Cependant, les outils performant sur des textes en domaine général, ne le sont pas forcement sur des domaines spécialisés, ce qui peut engendrer des dégradations de performance des outils intervenant en aval dans une chaîne de traitement automatique s’appuyant sur des textes découpés en phrases. Dans cet article, nous évaluons 5 outils de segmentation en phrase sur 3 corpus issus de différent domaines. Nous ré-entrainerons l’un de ces outils sur un corpus de spécialité pour étudier l’adaptation en domaine. Notamment, nous utilisons un nouveau corpus biomédical annoté spécifiquement pour cette tâche. La detection de frontières de phrase à l’aide d’un modèle OpenNLP entraîné sur un corpus clinique offre une F-mesure de .73, contre .66 pour la version standard de l’outil.</abstract>
       <url hash="19006d35">2018.jeptalnrecital-court.2</url>
@@ -226,9 +226,9 @@
       <language>fra</language>
       <author><first>Jeremy</first><last>Auguste</last></author>
       <author><first>Delphine</first><last>Charlet</last></author>
-      <author><first>Géraldine</first><last>Damnati</last></author>
-      <author><first>Benoit</first><last>Favre</last></author>
-      <author><first>Frederic</first><last>Bechet</last></author>
+      <author id="geraldine-damnati"><first>Géraldine</first><last>Damnati</last></author>
+      <author id="benoit-favre"><first>Benoit</first><last>Favre</last></author>
+      <author id="frederic-bechet"><first>Frederic</first><last>Bechet</last></author>
       <pages>225–232</pages>
       <abstract>Cet article présente des méthodes permettant l’évaluation de la satisfaction client à partir de très vastes corpus de conversation de type “chat” entre des clients et des opérateurs. Extraire des connaissances dans ce contexte demeure un défi pour les méthodes de traitement automatique des langues de par la dimension interactive et les propriétés de ce nouveau type de langage à l’intersection du langage écrit et parlé. Nous présentons une étude utilisant des réponses à des sondages utilisateurs comme supervision faible permettant de prédire la satisfaction des usagers d’un service en ligne d’assistance technique et commerciale.</abstract>
       <url hash="db916812">2018.jeptalnrecital-court.4</url>
@@ -238,8 +238,8 @@
       <title>Détection d’erreurs dans des transcriptions <fixed-case>OCR</fixed-case> de documents historiques par réseaux de neurones récurrents multi-niveau (Combining character level and word level <fixed-case>RNN</fixed-case>s for post-<fixed-case>OCR</fixed-case> error detection)</title>
       <language>fra</language>
       <author><first>Thibault</first><last>Magallon</last></author>
-      <author><first>Frederic</first><last>Bechet</last></author>
-      <author><first>Benoit</first><last>Favre</last></author>
+      <author id="frederic-bechet"><first>Frederic</first><last>Bechet</last></author>
+      <author id="benoit-favre"><first>Benoit</first><last>Favre</last></author>
       <pages>233–240</pages>
       <abstract>Le traitement à posteriori de transcriptions OCR cherche à détecter les erreurs dans les sorties d’OCR pour tenter de les corriger, deux tâches évaluées par la compétition ICDAR-2017 Post-OCR Text Correction. Nous présenterons dans ce papier un système de détection d’erreurs basé sur un modèle à réseaux récurrents combinant une analyse du texte au niveau des mots et des caractères en deux temps. Ce système a été classé second dans trois catégories évaluées parmi 11 candidats lors de la compétition.</abstract>
       <url hash="fd0aca58">2018.jeptalnrecital-court.5</url>
@@ -319,7 +319,7 @@
       <author><first>Elvys</first><last>Linhares Pontes</last></author>
       <author><first>Stéphane</first><last>Huet</last></author>
       <author><first>Andréa Carneiro</first><last>Linhares</last></author>
-      <author><first>Juan-Manuel</first><last>Torres-Moreno</last></author>
+      <author id="juan-manuel-torres-moreno"><first>Juan-Manuel</first><last>Torres-Moreno</last></author>
       <pages>311–320</pages>
       <abstract>Semantic Textual Similarity (STS) is the basis of many applications in Natural Language Processing (NLP). Our system combines convolution and recurrent neural networks to measure the semantic similarity of sentences. It uses a convolution network to take account of the local context of words and an LSTM to consider the global context of sentences. This combination of networks helps to preserve the relevant information of sentences and improves the calculation of the similarity between sentences. Our model has achieved good results and is competitive with the best state-of-the-art systems.</abstract>
       <url hash="4b1738f9">2018.jeptalnrecital-court.13</url>
@@ -341,7 +341,7 @@
       <language>fra</language>
       <author><first>Marwa</first><last>Hadj Salah</last></author>
       <author><first>Loïc</first><last>Vial</last></author>
-      <author><first>Hervé</first><last>Blanchon</last></author>
+      <author id="herve-blanchon"><first>Hervé</first><last>Blanchon</last></author>
       <author><first>Mounir</first><last>Zrigui</last></author>
       <author><first>Didier</first><last>Schwab</last></author>
       <pages>329–336</pages>
@@ -393,7 +393,7 @@
       <title>Annotation automatique des types de discours dans des livres audio en vue d’une oralisation par un système de synthèse (Automatic annotation of discourse types in audio-books)</title>
       <language>fra</language>
       <author><first>Aghilas</first><last>Sini</last></author>
-      <author><first>Elisabeth</first><last>Delais-Roussarie</last></author>
+      <author id="elisabeth-delais-roussarie"><first>Elisabeth</first><last>Delais-Roussarie</last></author>
       <author><first>Damien</first><last>Lolive</last></author>
       <pages>375–382</pages>
       <abstract>Pour synthétiser automatiquement et de manière expressive des livres audio, il est nécessaire de connaître le type des discours à oraliser. Ceci étant, dans un roman ou une nouvelle, les perspectives narratives et les types de discours évoluent souvent entre de la narration, du récitatif, du discours direct, du discours rapporté, voire des dialogues. Dans ce travail, nous allons présenter un outil qui a été développé à partir de l’analyse d’un corpus de livres audio (extraits de Madame Bovary et des Mystères de Paris) et qui prend comme unité de base pour l’analyse le paragraphe. Cet outil permet donc non seulement de déterminer automatiquement les types de discours (narration, discours direct, dialogue), et donc de savoir qui parle, mais également d’annoter l’extension des modifications discursives. Ce dernier point est important, notamment dans le cas d’incises de citation où le narrateur reprend la parole dans une séquence au discours direct. Dans sa forme actuelle, l’outil atteint un taux de 89 % de bonne détection.</abstract>
@@ -403,11 +403,11 @@
     <paper id="21">
       <title>Impact du Prétraitement Linguistique sur l’Analyse de Sentiment du Dialecte Tunisien ()</title>
       <language>fra</language>
-      <author><first>Chedi</first><last>Bechikh Ali</last></author>
+      <author id="chedi-bechikh-ali"><first>Chedi</first><last>Bechikh Ali</last></author>
       <author><first>Hala</first><last>Mulki</last></author>
       <author><first>Hatem</first><last>Haddad</last></author>
       <pages>383–392</pages>
-      <abstract/>
+      <abstract></abstract>
       <url hash="3390d347">2018.jeptalnrecital-court.21</url>
       <bibkey>bechikh-ali-etal-2018-impact</bibkey>
     </paper>
@@ -415,7 +415,7 @@
       <title>Detecting context-dependent sentences in parallel corpora</title>
       <author><first>Rachel</first><last>Bawden</last></author>
       <author><first>Thomas</first><last>Lavergne</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <pages>393–400</pages>
       <abstract>In this article, we provide several approaches to the automatic identification of parallel sentences that require sentence-external linguistic context to be correctly translated. Our long-term goal is to automatically construct a test set of context-dependent sentences in order to evaluate machine translation models designed to improve the translation of contextual, discursive phenomena. We provide a discussion and critique that show that current approaches do not allow us to achieve our goal, and suggest that for now evaluating individual phenomena is likely the best solution.</abstract>
       <url hash="79c511fc">2018.jeptalnrecital-court.22</url>
@@ -425,7 +425,7 @@
       <title>Predicting failure of a mediated conversation in the context of asymetric role dialogues</title>
       <author><first>Romain</first><last>Carbou</last></author>
       <author><first>Delphine</first><last>Charlet</last></author>
-      <author><first>Géraldine</first><last>Damnati</last></author>
+      <author id="geraldine-damnati"><first>Géraldine</first><last>Damnati</last></author>
       <author><first>Frédéric</first><last>Landragin</last></author>
       <author><first>Jean</first><last>Léon Bouraoui</last></author>
       <pages>401–408</pages>
@@ -473,12 +473,12 @@
       <title>Détection des couples de termes translittérés à partir d’un corpus parallèle anglais-arabe ()</title>
       <language>fra</language>
       <author><first>Wafa</first><last>Neifar</last></author>
-      <author><first>Thierry</first><last>Hamon</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
-      <author><first>Mariem</first><last>Ellouze</last></author>
+      <author id="thierry-hamon"><first>Thierry</first><last>Hamon</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="mariem-ellouze-khemekhem"><first>Mariem</first><last>Ellouze</last></author>
       <author><first>Lamia-Hadrich</first><last>Belguith</last></author>
       <pages>437–446</pages>
-      <abstract/>
+      <abstract></abstract>
       <url hash="977e3b7c">2018.jeptalnrecital-court.27</url>
       <bibkey>neifar-etal-2018-detection</bibkey>
     </paper>
@@ -548,7 +548,7 @@
     </paper>
     <paper id="34">
       <title>A comparative study of word embeddings and other features for lexical complexity detection in <fixed-case>F</fixed-case>rench</title>
-      <author><first>Aina</first><last>Garí Soler</last></author>
+      <author id="aina-gari-soler"><first>Aina</first><last>Garí Soler</last></author>
       <author><first>Marianna</first><last>Apidianaki</last></author>
       <author><first>Alexandre</first><last>Allauzen</last></author>
       <pages>499–508</pages>
@@ -563,7 +563,7 @@
       <author><first>Azouaou</first><last>Faical</last></author>
       <author><first>Fodil</first><last>Benali</last></author>
       <author><first>Ala</first><last>Eddine Hachani</last></author>
-      <author><first>Houda</first><last>Saadane</last></author>
+      <author id="houda-saadane"><first>Houda</first><last>Saadane</last></author>
       <pages>509–518</pages>
       <abstract>Dans cet article, nous présentons une approche hybride pour la translitération de l’arabizi algérien. Nous avons élaboré un ensemble de règles permettant le passage de l’arabizi vers l’arabe. Á partir de ces règles nous générons un ensemble de candidats pour la translitération de chaque mot en arabizi vers l’arabe, et un parmi ces candidats sera ensuite identifié et extrait comme le meilleur candidat. Cette approche a été expérimentée en utilisant trois corpus de tests. Les résultats obtenus montrent une amélioration du score de précision qui était pour le meilleur des cas de l’ordre de 75,11%. Ces résultats ont aussi permis de vérifier que notre approche est très compétitive par rapport aux travaux traitant de la translitération de l’arabizi en général.</abstract>
       <url hash="ab34aace">2018.jeptalnrecital-court.35</url>
@@ -583,7 +583,7 @@
       <author><first>Julien</first><last>Plu</last></author>
       <author><first>Kevin</first><last>Cousot</last></author>
       <author><first>Mathieu</first><last>Lafourcade</last></author>
-      <author><first>Raphaël</first><last>Troncy</last></author>
+      <author id="raphael-troncy"><first>Raphaël</first><last>Troncy</last></author>
       <author><first>Giuseppe</first><last>Rizzo</last></author>
       <pages>529–538</pages>
       <abstract>Entity linking systems typically rely on encyclopedic knowledge bases such as DBpedia or Freebase. In this paper, we use, instead, a French lexical-semantic network named JeuxDeMots to jointly type and link entities. Our approach combines word embeddings and a path-based similarity resulting in encouraging results over a set of documents from the French Le Monde newspaper.</abstract>
@@ -595,7 +595,7 @@
       <language>fra</language>
       <author><first>Gaël</first><last>Guibon</last></author>
       <author><first>Magalie</first><last>Ochs</last></author>
-      <author><first>Patrice</first><last>Bellot</last></author>
+      <author id="patrice-bellot"><first>Patrice</first><last>Bellot</last></author>
       <pages>539–546</pages>
       <abstract>L’utilisation des emojis dans les messageries sociales n’a eu de cesse d’augmenter ces dernières années. Plusieurs travaux récents ont porté sur la prédiction d’emojis afin d’épargner à l’utillisateur le parcours de librairies d’emojis de plus en plus conséquentes. Nous proposons une méthode permettant de récupérer automatiquement les catégories d’emojis à partir de leur contexte d’utilisation afin d’améliorer la prédiction finale. Pour ce faire nous utilisons des plongements lexicaux en considérant les emojis comme des mots présents dans des tweets. Nous appliquons ensuite un regroupement automatique restreint aux emojis visages afin de vérifier l’adéquation des résultats avec la théorie d’Ekman. L’approche est reproductible et applicable sur tous types d’emojis, ou lorsqu’il est nécessaire de prédire de nombreuses classes.</abstract>
       <url hash="b4c92255">2018.jeptalnrecital-court.38</url>
@@ -665,7 +665,7 @@
       <title>Construction de patrons lexico-syntaxiques d’extraction pour l’acquisition de connaissances à partir du web (Relation pattern extraction and information extraction from the web)</title>
       <language>fra</language>
       <author><first>Chloé</first><last>Monnin</last></author>
-      <author><first>Olivier</first><last>Hamon</last></author>
+      <author id="olivier-hamon"><first>Olivier</first><last>Hamon</last></author>
       <pages>3–16</pages>
       <abstract>Cet article présente une méthode permettant de collecter sur le web des informations complémentaires à une information prédéfinie, afin de remplir une base de connaissances. Notre méthode utilise des patrons lexico-syntaxiques, servant à la fois de requêtes de recherche et de patrons d’extraction permettant l’analyse de documents non structurés. Pour ce faire, il nous a fallu définir au préalable les critères pertinents issus des analyses dans l’objectif de faciliter la découverte de nouvelles valeurs.</abstract>
       <url hash="5f7e3193">2018.jeptalnrecital-recital.1</url>
@@ -701,7 +701,7 @@
       <title>Résumé automatique guidé de textes: État de l’art et perspectives (Guided Summarization : State-of-the-art and perspectives )</title>
       <language>fra</language>
       <author><first>Salima</first><last>Lamsiyah</last></author>
-      <author><first>Said</first><last>Ouatik El Alaoui</last></author>
+      <author id="said-ouatik-el-alaoui"><first>Said</first><last>Ouatik El Alaoui</last></author>
       <author><first>Bernard</first><last>Espinasse</last></author>
       <pages>55–72</pages>
       <abstract>Les systèmes de résumé automatique de textes (SRAT) consistent à produire une représentation condensée et pertinente à partir d’un ou de plusieurs documents textuels. La majorité des SRAT sont basés sur des approches extractives. La tendance actuelle consiste à s’orienter vers les approches abstractives. Dans ce contexte, le résumé guidé défini par la campagne d’évaluation internationale TAC (Text Analysis Conference) en 2010, vise à encourager la recherche sur ce type d’approche, en se basant sur des techniques d’analyse en profondeur de textes. Dans ce papier, nous nous penchons sur le résumé automatique guidé de textes. Dans un premier temps, nous définissons les différentes caractéristiques et contraintes liées à cette tâche. Ensuite, nous dressons un état de l’art des principaux systèmes existants en mettant l’accent sur les travaux les plus récents, et en les classifiant selon les approches adoptées, les techniques utilisées, et leurs évaluations sur des corpus de références. Enfin, nous proposons les grandes étapes d’une méthode spécifique devant permettre le développement d’un nouveau type de systèmes de résumé guidé.</abstract>
@@ -867,7 +867,7 @@
       <title>Un corpus en arabe annoté manuellement avec des sens <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et (<fixed-case>A</fixed-case>rabic Manually Sense Annotated Corpus with <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Senses)</title>
       <language>fra</language>
       <author><first>Marwa</first><last>Hadj Salah</last></author>
-      <author><first>Hervé</first><last>Blanchon</last></author>
+      <author id="herve-blanchon"><first>Hervé</first><last>Blanchon</last></author>
       <author><first>Mounir</first><last>Zrigui</last></author>
       <author><first>Didier</first><last>Schwab</last></author>
       <pages>213–216</pages>
@@ -894,16 +894,16 @@
     <paper id="1">
       <title><fixed-case>DEFT</fixed-case>2018 : recherche d’information et analyse de sentiments dans des tweets concernant les transports en <fixed-case>Î</fixed-case>le de <fixed-case>F</fixed-case>rance (<fixed-case>DEFT</fixed-case>2018 : Information Retrieval and Sentiment Analysis in Tweets about Public Transportation in <fixed-case>Î</fixed-case>le de <fixed-case>F</fixed-case>rance Region )</title>
       <language>fra</language>
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <author><first>Cyril</first><last>Grouin</last></author>
-      <author><first>Patrice</first><last>Bellot</last></author>
+      <author id="patrice-bellot"><first>Patrice</first><last>Bellot</last></author>
       <author><first>Vincent</first><last>Claveau</last></author>
-      <author><first>Iris</first><last>Eshkol-Taravella</last></author>
+      <author id="iris-eshkol"><first>Iris</first><last>Eshkol-Taravella</last></author>
       <author><first>Amel</first><last>Fraisse</last></author>
       <author><first>Agata</first><last>Jackiewicz</last></author>
       <author><first>Jihen</first><last>Karoui</last></author>
       <author><first>Laura</first><last>Monceaux</last></author>
-      <author><first>Juan-Manuel</first><last>Torres-Moreno</last></author>
+      <author id="juan-manuel-torres-moreno"><first>Juan-Manuel</first><last>Torres-Moreno</last></author>
       <pages>219–230</pages>
       <abstract>Cet article présente l’édition 2018 de la campagne d’évaluation DEFT (Défi Fouille de Textes). A partir d’un corpus de tweets, quatre tâches ont été proposées : identifier les tweets sur la thématique des transports, puis parmi ces derniers, identifier la polarité (négatif, neutre, positif, mixte), identifier les marqueurs de sentiment et la cible, et enfin, annoter complètement chaque tweet en source et cible des sentiments exprimés. Douze équipes ont participé, majoritairement sur les deux premières tâches. Sur l’identification de la thématique des transports, la micro F-mesure varie de 0,827 à 0,908. Sur l’identification de la polarité globale, la micro F-mesure varie de 0,381 à 0,823.</abstract>
       <url hash="c646f47b">2018.jeptalnrecital-deft.1</url>
@@ -936,7 +936,7 @@
       <title>Modèles en Caractères pour la Détection de Polarité dans les Tweets (Character-level Models for Polarity Detection in Tweets )</title>
       <language>fra</language>
       <author><first>Davide</first><last>Buscaldi</last></author>
-      <author><first>Joseph</first><last>Le Roux</last></author>
+      <author id="joseph-le-roux"><first>Joseph</first><last>Le Roux</last></author>
       <author><first>Gaël</first><last>Lejeune</last></author>
       <pages>249–258</pages>
       <abstract>Dans cet article, nous présentons notre contribution au Défi Fouille de Textes 2018 au travers de trois méthodes originales pour la classification thématique et la détection de polarité dans des tweets en français. Nous y avons ajouté un système de vote. Notre première méthode est fondée sur des lexiques (mots et emojis), les n-grammes de caractères et un classificateur à vaste marge (ou SVM). tandis que les deux autres sont des méthodes endogènes fondées sur l’extraction de caractéristiques au grain caractères : un modèle à mémoire à court-terme persistante (ou BiLSTM pour Bidirectionnal Long Short-Term Memory) et perceptron multi-couche d’une part et un modèle de séquences de caractères fermées fréquentes et classificateur SVM d’autre part. Le BiLSTM a produit de loin les meilleurs résultats puisqu’il a obtenu la première place sur la tâche 1, classification binaire de tweets selon qu’ils traitent ou non des transports, et la troisième place sur la tâche 2, classification de la polarité en 4 classes. Ce résultat est d’autant plus intéressant que la méthode proposée est faiblement paramétrique, totalement endogène et qu’elle n’implique aucun pré-traitement.</abstract>
@@ -947,7 +947,7 @@
       <title>Concaténation de réseaux de neurones pour la classification de tweets, <fixed-case>DEFT</fixed-case>2018 (Concatenation of neural networks for tweets classification, <fixed-case>DEFT</fixed-case>2018 )</title>
       <language>fra</language>
       <author><first>Damien</first><last>Sileo</last></author>
-      <author><first>Tim</first><last>Van de Cruys</last></author>
+      <author id="tim-van-de-cruys"><first>Tim</first><last>Van de Cruys</last></author>
       <author><first>Philippe</first><last>Muller</last></author>
       <author><first>Camille</first><last>Pradel</last></author>
       <pages>259–264</pages>
@@ -986,7 +986,7 @@
       <author><first>Emmanuelle</first><last>Dusserre</last></author>
       <author><first>Ruslan</first><last>Kalitvianski</last></author>
       <author><first>Mathieu</first><last>Ruhlmann</last></author>
-      <author><first>Muntsa</first><last>Padró</last></author>
+      <author id="muntsa-padro"><first>Muntsa</first><last>Padró</last></author>
       <pages>287–298</pages>
       <abstract>Cet article décrit les systèmes de l’équipe Eloquant pour la catégorisation de tweets en français dans les tâches 1 (détection de la thématique transports en commun) et 2 (détection de la polarité globale) du DEFT 2018. Nos systèmes reposent sur un enrichissement sémantique, l’apprentissage automatique et, pour la tâche 1 une approche symbolique. Nous avons effectué deux runs pour chacune des tâches. Nos meilleures F-mesures (0.897 pour la tâche 1 et 0.800 pour la tâche 2) sont au-dessus de la moyenne globale pour chaque tâche, et nous placent dans les 30% supérieurs de tous les runs pour la tâche 2.</abstract>
       <url hash="48bfcb7e">2018.jeptalnrecital-deft.8</url>
@@ -999,7 +999,7 @@
       <author><first>Hugo</first><last>Linsenmaier</last></author>
       <author><first>Alexandre</first><last>Majed</last></author>
       <author><first>Xavier</first><last>Cadet</last></author>
-      <author><first>Abdessalam</first><last>Bouchekif</last></author>
+      <author id="abdessalam-bouchekif"><first>Abdessalam</first><last>Bouchekif</last></author>
       <pages>299–310</pages>
       <abstract>Dans ce papier, nous décrivons les systèmes développés au LSE pour le DEFT 2018 sur les tâches 1 et 2 qui consistent à classifier des tweets. La première tâche consiste à déterminer si un message concerne les transports ou non. La deuxième, consiste à classifier les tweets selon leur polarité globale. Pour les deux tâches nous avons développé des systèmes basés sur des réseaux de neurones convolutifs (CNN) et récurrents (LSTM, BLSTM et GRU). Chaque mot d’un tweet donné est représenté par un vecteur dense appris à partir des données relativement proches de celles de la compétition. Le score final officiel est de 0.891 pour la tâche 1 et de 0.781 pour la tâche 2.</abstract>
       <url hash="cd7f086d">2018.jeptalnrecital-deft.9</url>
@@ -1010,7 +1010,7 @@
       <language>fra</language>
       <author><first>Chloé</first><last>Monnin</last></author>
       <author><first>Olivier</first><last>Querné</last></author>
-      <author><first>Olivier</first><last>Hamon</last></author>
+      <author id="olivier-hamon"><first>Olivier</first><last>Hamon</last></author>
       <pages>311–318</pages>
       <abstract>Nous présentons la participation de Syllabs à la tâche de classification de tweets dans le domaine du transport lors de DEFT 2018. Pour cette première participation à une campagne DEFT, nous avons choisi de tester plusieurs algorithmes de classification état de l’art. Après une étape de prétraitement commune à l’ensemble des algorithmes, nous effectuons un apprentissage sur le seul contenu des tweets. Les résultats étant somme toute assez proches, nous effectuons un vote majoritaire sur les trois algorithmes ayant obtenus les meilleurs résultats.</abstract>
       <url hash="f3b61ba1">2018.jeptalnrecital-deft.10</url>
diff --git a/data/xml/2018.tal.xml b/data/xml/2018.tal.xml
index 378611fa34..0351c08b25 100644
--- a/data/xml/2018.tal.xml
+++ b/data/xml/2018.tal.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Traitement Automatique des Langues, Volume 59, Numéro 1 : Varia [Varia]</booktitle>
       <editor><first>Emmanuel</first><last>Morin</last></editor>
-      <editor><first>Sophie</first><last>Rosset</last></editor>
+      <editor id="sophie-rosset"><first>Sophie</first><last>Rosset</last></editor>
       <editor><first>Pascale</first><last>Sébillot</last></editor>
       <publisher>ATALA (Association pour le Traitement Automatique des Langues)</publisher>
       <address>France</address>
@@ -50,7 +50,7 @@
     <meta>
       <booktitle>Traitement Automatique des Langues, Volume 59, Numéro 2 : Apprentissage profond pour le traitement automatique des langues [Deep Learning for natural language processing]</booktitle>
       <editor><first>Alexandre</first><last>Allauzen</last></editor>
-      <editor><first>Hinrich</first><last>Schütze</last></editor>
+      <editor id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></editor>
       <publisher>ATALA (Association pour le Traitement Automatique des Langues)</publisher>
       <address>France</address>
       <year>2018</year>
@@ -75,7 +75,7 @@
       <title>Classifying Semantic Clause Types With Recurrent Neural Networks: Analysis of Attention, Context &amp; Genre Characteristics</title>
       <author><first>Maria</first><last>Becker</last></author>
       <author><first>Michael</first><last>Staniek</last></author>
-      <author><first>Vivi</first><last>Nastase</last></author>
+      <author id="vivi-nastase"><first>Vivi</first><last>Nastase</last></author>
       <author><first>Alexis</first><last>Palmer</last></author>
       <author><first>Anette</first><last>Frank</last></author>
       <pages>15–48</pages>
@@ -87,7 +87,7 @@
       <author><first>Zied</first><last>Elloumi</last></author>
       <author><first>Benjamin</first><last>Lecouteux</last></author>
       <author><first>Olivier</first><last>Galibert</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <pages>49–76</pages>
       <url hash="5a39ff0a">2018.tal-2.3</url>
       <language>fra</language>
@@ -131,7 +131,7 @@
     <paper id="2">
       <title>De la constitution d’un corpus arboré à l’analyse syntaxique du serbe [From the constitution of a treebank to the syntactic analysis of the <fixed-case>S</fixed-case>erbian language]</title>
       <author><first>Aleksandra</first><last>Miletic</last></author>
-      <author><first>Cécile</first><last>Fabre</last></author>
+      <author id="cecile-fabre"><first>Cécile</first><last>Fabre</last></author>
       <author><first>Dejan</first><last>Stosic</last></author>
       <pages>15–39</pages>
       <url hash="57f15eaa">2018.tal-3.2</url>
@@ -149,7 +149,7 @@
     </paper>
     <paper id="4">
       <title>Analyse syntaxique de langues faiblement dotées à partir de plongements de mots multilingues [Syntactic analysis of under-resourced languages from multilingual word embeddings]</title>
-      <author><first>KyungTae</first><last>Lim</last></author>
+      <author id="kyungtae-lim"><first>KyungTae</first><last>Lim</last></author>
       <author><first>Niko</first><last>Partanen</last></author>
       <author><first>Thierry</first><last>Poibeau</last></author>
       <pages>67–91</pages>
diff --git a/data/xml/2019.ccnlg.xml b/data/xml/2019.ccnlg.xml
index bdb22ca1be..1446429e59 100644
--- a/data/xml/2019.ccnlg.xml
+++ b/data/xml/2019.ccnlg.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the 4th Workshop on Computational Creativity in Language Generation</booktitle>
       <editor><first>Benjamin</first><last>Burtenshaw</last></editor>
-      <editor><first>Enrique</first><last>Manjavacas</last></editor>
+      <editor id="enrique-manjavacas"><first>Enrique</first><last>Manjavacas</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Tokyo, Japan</address>
       <month>29 October--3 November</month>
@@ -64,7 +64,7 @@
     <paper id="6">
       <title>Noun Generation for Nominalization in Academic Writing</title>
       <author><first>Dariush</first><last>Saberi</last></author>
-      <author><first>John</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
       <pages>47–51</pages>
       <url hash="0453288d">2019.ccnlg-1.6</url>
       <bibkey>saberi-lee-2019-noun</bibkey>
diff --git a/data/xml/2019.gwc.xml b/data/xml/2019.gwc.xml
index 0e3052d410..566f48092f 100644
--- a/data/xml/2019.gwc.xml
+++ b/data/xml/2019.gwc.xml
@@ -35,7 +35,7 @@
     </paper>
     <paper id="3">
       <title>Thesaurus Verification Based on Distributional Similarities</title>
-      <author><first>Natalia</first><last>Loukachevitch</last></author>
+      <author id="natalia-loukachevitch"><first>Natalia</first><last>Loukachevitch</last></author>
       <author><first>Ekaterina</first><last>Parkhomenko</last></author>
       <pages>16–23</pages>
       <abstract>In this paper we consider an approach to verification of large lexical-semantic resources as WordNet. The method of verification procedure is based on the analysis of discrepancies of corpus-based and thesaurus-based word similarities. We calculated such word similarities on the basis of a Russian news collection and Russian wordnet (RuWordNet). We applied the procedure to more than 30 thousand words and found some serious errors in word sense description, including incorrect or absent relations or missed main senses of ambiguous words.</abstract>
@@ -45,7 +45,7 @@
     <paper id="4">
       <title>Including <fixed-case>S</fixed-case>wiss <fixed-case>S</fixed-case>tandard <fixed-case>G</fixed-case>erman in <fixed-case>G</fixed-case>erma<fixed-case>N</fixed-case>et</title>
       <author><first>Eva</first><last>Huber</last></author>
-      <author><first>Erhard</first><last>Hinrichs</last></author>
+      <author id="erhard-hinrichs"><first>Erhard</first><last>Hinrichs</last></author>
       <pages>24–32</pages>
       <abstract>GermaNet (Henrich and Hinrichs, 2010; Hamp and Feldweg, 1997) is a comprehensive wordnet of Standard German spoken in the Federal Republic of Germany. The GermaNet team aims at modelling the basic vocabulary of the language. German is an official language or a minority language in many countries. It is an official language in Austria, Germany and Switzerland, each with its own codified standard variety (Auer, 2014, p. 21), and also in Belgium, Liechtenstein, and Luxemburg. German is recognized as a minority language in thirteen additional countries, including Brasil, Italy, Poland, and Russia. However, the different standard varieties of German are currently not represented in GermaNet. With this project, we make a start on changing this by including one variety, namely Swiss Standard German, into GermaNet. This shall give a more inclusive perspective on the German language. We will argue that Swiss Standard German words, Helvetisms, are best included into the already existing wordnet GermaNet, rather than creating them as a separate wordnet.</abstract>
       <url hash="3bce8e57">2019.gwc-1.4</url>
@@ -87,7 +87,7 @@
     </paper>
     <paper id="9">
       <title>Linking <fixed-case>R</fixed-case>ussian <fixed-case>W</fixed-case>ordnet <fixed-case>R</fixed-case>u<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et to <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et</title>
-      <author><first>Natalia</first><last>Loukachevitch</last></author>
+      <author id="natalia-loukachevitch"><first>Natalia</first><last>Loukachevitch</last></author>
       <author><first>Anastasia</first><last>Gerasimova</last></author>
       <pages>64–71</pages>
       <abstract>In this paper we consider the linking procedure of Russian wordnet (RuWordNet) to Wordnet. The specificity of the procedure in our case is based on the fact that a lot of bilingual (Russian and English) lexical data have been gathered in another Russian thesaurus RuThes, which has a different structure than WordNet. Previously, RuThes has been semi-automatically transformed into RuWordNet, having the WordNet-like structure. Now, the RuThes English data are utilized to establish matching from the RuWordNet synsets to the WordNet synsets.</abstract>
@@ -96,7 +96,7 @@
     </paper>
     <paper id="10">
       <title>Fast developing of a Natural Language Interface for a <fixed-case>P</fixed-case>ortuguese <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et: Leveraging on Sentence Embeddings</title>
-      <author><first>Hugo Gonçalo</first><last>Oliveira</last></author>
+      <author id="hugo-goncalo-oliveira"><first>Hugo Gonçalo</first><last>Oliveira</last></author>
       <author><first>Alexandre</first><last>Rademaker</last></author>
       <pages>72–78</pages>
       <abstract>We describe how a natural language interface can be developed for a wordnet with a small set of handcrafted templates, leveraging on sentence embeddings. The proposed approach does not use rules for parsing natural language queries but experiments showed that the embeddings model is tolerant enough for correctly predicting relation types that do not match known patterns exactly. It was tested with OpenWordNet-PT, for which this method may provide an alternative interface, with benefits also on the curation process.</abstract>
@@ -153,7 +153,7 @@
     </paper>
     <paper id="16">
       <title>Merging <fixed-case>D</fixed-case>an<fixed-case>N</fixed-case>et with <fixed-case>P</fixed-case>rinceton <fixed-case>W</fixed-case>ordnet</title>
-      <author><first>Bolette Sandford</first><last>Pedersen</last></author>
+      <author id="bolette-sandford-pedersen"><first>Bolette Sandford</first><last>Pedersen</last></author>
       <author><first>Sanni</first><last>Nimb</last></author>
       <author><first>Ida Rørmann</first><last>Olsen</last></author>
       <author><first>Sussi</first><last>Olsen</last></author>
@@ -175,9 +175,9 @@
     <paper id="18">
       <title>Synthetic, yet natural: Properties of <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et random walk corpora and the impact of rare words on embedding performance</title>
       <author><first>Filip</first><last>Klubička</last></author>
-      <author><first>Alfredo</first><last>Maldonado</last></author>
+      <author id="alfredo-maldonado"><first>Alfredo</first><last>Maldonado</last></author>
       <author><first>Abhijit</first><last>Mahalunkar</last></author>
-      <author><first>John</first><last>Kelleher</last></author>
+      <author id="john-kelleher"><first>John</first><last>Kelleher</last></author>
       <pages>140–150</pages>
       <abstract>Creating word embeddings that reflect semantic relationships encoded in lexical knowledge resources is an open challenge. One approach is to use a random walk over a knowledge graph to generate a pseudo-corpus and use this corpus to train embeddings. However, the effect of the shape of the knowledge graph on the generated pseudo-corpora, and on the resulting word embeddings, has not been studied. To explore this, we use English WordNet, constrained to the taxonomic (tree-like) portion of the graph, as a case study. We investigate the properties of the generated pseudo-corpora, and their impact on the resulting embeddings. We find that the distributions in the psuedo-corpora exhibit properties found in natural corpora, such as Zipf’s and Heaps’ law, and also observe that the proportion of rare words in a pseudo-corpus affects the performance of its embeddings on word similarity.</abstract>
       <url hash="664ac0d4">2019.gwc-1.18</url>
@@ -186,7 +186,7 @@
     <paper id="19">
       <title>Augmenting <fixed-case>C</fixed-case>hinese <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et semantic relations with contextualized embeddings</title>
       <author><first>Yu-Hsiang</first><last>Tseng</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <pages>151–159</pages>
       <abstract>Constructing semantic relations in WordNet has been a labour-intensive task, especially in a dynamic and fast-changing language environment. Combined with recent advancements of contextualized embeddings, this paper proposes the concept of morphology-guided sense vectors, which can be used to semi-automatically augment semantic relations in Chinese Wordnet (CWN). This paper (1) built sense vectors with pre-trained contextualized embedding models; (2) demonstrated the sense vectors computed were consistent with the sense distinctions made in CWN; and (3) predicted the potential semantically-related sense pairs with high accuracy by sense vectors model.</abstract>
       <url hash="14cfc2a8">2019.gwc-1.19</url>
@@ -235,7 +235,7 @@
     <paper id="24">
       <title>Thinking globally, acting locally – Progress in the <fixed-case>A</fixed-case>frican <fixed-case>W</fixed-case>ordnet Project</title>
       <author><first>Marissa</first><last>Griesel</last></author>
-      <author><first>Sonja</first><last>Bosch</last></author>
+      <author id="sonja-bosch"><first>Sonja</first><last>Bosch</last></author>
       <author><first>Mampaka Lydia</first><last>Mojapelo</last></author>
       <pages>191–196</pages>
       <abstract>The African Wordnet Project (AWN) includes all nine indigenous South African languages, namely isiZulu, isiXhosa, Setswana, Sesotho sa Leboa, Tshivenda, Siswati, Sesotho, isiNdebele and Xitsonga. The AWN currently includes 61 000 synsets as well as definitions and usage examples for a large part of the synsets. The project recently received extended funding from the South African Centre for Digital Language Resources (SADiLaR) and aims to update all aspects of the current resource, including the seed list used for new development, software tools used and mapping the AWN to the latest version of PWN 3.1. As with any resource development project, it is essential to also include phases of focused quality assurance and updating of the basis on which the resource is built. The African languages remain under-resourced. This paper describes progress made in the development of the AWN as well as recent technical improvements.</abstract>
@@ -246,7 +246,7 @@
       <title>Commonsense Reasoning Using <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et and <fixed-case>SUMO</fixed-case>: a Detailed Analysis</title>
       <author><first>Javier</first><last>Álvez</last></author>
       <author><first>Itziar</first><last>Gonzalez-Dios</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <pages>197–205</pages>
       <abstract>We describe a detailed analysis of a sample of large benchmark of commonsense reasoning problems that has been automatically obtained from WordNet, SUMO and their mapping. The objective is to provide a better assessment of the quality of both the benchmark and the involved knowledge resources for advanced commonsense reasoning tasks. By means of this analysis, we are able to detect some knowledge misalignments, mapping errors and lack of knowledge and resources. Our final objective is the extraction of some guidelines towards a better exploitation of this commonsense knowledge framework by the improvement of the included resources.</abstract>
       <url hash="2665f7f4">2019.gwc-1.25</url>
@@ -299,7 +299,7 @@
     </paper>
     <paper id="31">
       <title><fixed-case>E</fixed-case>nglish <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et 2019 – An Open-Source <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et for <fixed-case>E</fixed-case>nglish</title>
-      <author><first>John P.</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></author>
       <author><first>Alexandre</first><last>Rademaker</last></author>
       <author><first>Francis</first><last>Bond</last></author>
       <author><first>Ewa</first><last>Rudnicka</last></author>
@@ -312,9 +312,9 @@
     <paper id="32">
       <title>Assessing Wordnets with <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Embeddings</title>
       <author><first>Ruben</first><last>Branco</last></author>
-      <author><first>João</first><last>Rodrigues</last></author>
+      <author id="joao-rodrigues"><first>João</first><last>Rodrigues</last></author>
       <author><first>Chakaveh</first><last>Saedi</last></author>
-      <author><first>António</first><last>Branco</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
       <pages>253–259</pages>
       <abstract>An effective conversion method was proposed in the literature to obtain a lexical semantic space from a lexical semantic graph, thus permitting to obtain WordNet embeddings from WordNets. In this paper, we propose the exploitation of this conversion methodology as the basis for the comparative assessment of WordNets: given two WordNets, their relative quality in terms of capturing the lexical semantics of a given language, can be assessed by (i) converting each WordNet into the corresponding semantic space (i.e. into WordNet embeddings), (ii) evaluating the resulting WordNet embeddings under the typical semantic similarity prediction task used to evaluate word embeddings in general; and (iii) comparing the performance in that task of the two word embeddings, extracted from the two WordNets. A better performance in that evaluation task results from the word embeddings that are better at capturing the semantic similarity of words, which, in turn, result from the WordNet that is of higher quality at capturing the semantics of words.</abstract>
       <url hash="3b3c91a1">2019.gwc-1.32</url>
@@ -358,7 +358,7 @@
     </paper>
     <paper id="37">
       <title>Aligning the <fixed-case>B</fixed-case>ulgarian <fixed-case>BTB</fixed-case> <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et with the <fixed-case>B</fixed-case>ulgarian <fixed-case>W</fixed-case>ikipedia</title>
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <author><first>Petya</first><last>Osenova</last></author>
       <author><first>Laska</first><last>Laskova</last></author>
       <author><first>Ivajlo</first><last>Radev</last></author>
@@ -460,7 +460,7 @@
     </paper>
     <paper id="47">
       <title><fixed-case>P</fixed-case>ortuguese Manners of Speaking</title>
-      <author><first>Valeria</first><last>de Paiva</last></author>
+      <author id="valeria-de-paiva"><first>Valeria</first><last>de Paiva</last></author>
       <author><first>Alexandre</first><last>Rademaker</last></author>
       <pages>373–377</pages>
       <abstract>Lexical resources need to be as complete as possible. Very little work seems to have been done on adverbs, the smallest part of speech class in Princeton WordNet counting the number of synsets. Amongst adverbs, manner adverbs ending in ‘-ly’ seem the easiest to work with, as their meaning is almost the same as the one of the associated adjective. This phenomenon seems to be parallel in English and Portuguese, where these manner adverbs finish in the suffix ‘-mente’. We use this correspondence to improve the coverage of adverbs in the lexical resource OpenWordNet-PT, a wordnet for Portuguese.</abstract>
@@ -504,8 +504,8 @@
       <title>Utilizing Wordnets for Cognate Detection among <fixed-case>I</fixed-case>ndian Languages</title>
       <author><first>Diptesh</first><last>Kanojia</last></author>
       <author><first>Kevin</first><last>Patel</last></author>
-      <author><first>Malhar</first><last>Kulkarni</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="malhar-kulkarni"><first>Malhar</first><last>Kulkarni</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <author><first>Gholemreza</first><last>Haffari</last></author>
       <pages>404–412</pages>
       <abstract>Automatic Cognate Detection (ACD) is a challenging task which has been utilized to help NLP applications like Machine Translation, Information Retrieval and Computational Phylogenetics. Unidentified cognate pairs can pose a challenge to these applications and result in a degradation of performance. In this paper, we detect cognate word pairs among ten Indian languages with Hindi and use deep learning methodologies to predict whether a word pair is cognate or not. We identify IndoWordnet as a potential resource to detect cognate word pairs based on orthographic similarity-based methods and train neural network models using the data obtained from it. We identify parallel corpora as another potential resource and perform the same experiments for them. We also validate the contribution of Wordnets through further experimentation and report improved performance of up to 26%. We discuss the nuances of cognate detection among closely related Indian languages and release the lists of detected cognates as a dataset. We also observe the behaviour of, to an extent, unrelated Indian language pairs and release the lists of detected cognates among them as well.</abstract>
diff --git a/data/xml/2019.icon.xml b/data/xml/2019.icon.xml
index 511a146f4f..354e6e6317 100644
--- a/data/xml/2019.icon.xml
+++ b/data/xml/2019.icon.xml
@@ -3,8 +3,8 @@
   <volume id="1" ingest-date="2021-05-10" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 16th International Conference on Natural Language Processing</booktitle>
-      <editor><first>Dipti Misra</first><last>Sharma</last></editor>
-      <editor><first>Pushpak</first><last>Bhattacharya</last></editor>
+      <editor id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></editor>
+      <editor id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharya</last></editor>
       <publisher>NLP Association of India</publisher>
       <address>International Institute of Information Technology, Hyderabad, India</address>
       <month>December</month>
@@ -50,7 +50,7 @@
     <paper id="4">
       <title>Introducing Aspects of Creativity in Automatic Poetry Generation</title>
       <author><first>Brendan</first><last>Bena</last></author>
-      <author><first>Jugal</first><last>Kalita</last></author>
+      <author id="jugal-kalita"><first>Jugal</first><last>Kalita</last></author>
       <pages>26–35</pages>
       <abstract>Poetry Generation involves teaching systems to automatically generate text that resembles poetic work. A deep learning system can learn to generate poetry on its own by training on a corpus of poems and modeling the particular style of language. In this paper, we propose taking an approach that fine-tunes GPT-2, a pre-trained language model, to our downstream task of poetry generation. We extend prior work on poetry generation by introducing creative elements. Specifically, we generate poems that express emotion and elicit the same in readers, and poems that use the language of dreams—called dream poetry. We are able to produce poems that correctly elicit the emotions of sadness and joy 87.5 and 85 percent, respectively, of the time. We produce dreamlike poetry by training on a corpus of texts that describe dreams. Poems from this model are shown to capture elements of dream poetry with scores of no less than 3.2 on the Likert scale. We perform crowdsourced human-evaluation for all our poems. We also make use of the Coh-Metrix tool, outlining metrics we use to gauge the quality of text generated.</abstract>
       <url hash="2200f374">2019.icon-1.4</url>
@@ -59,9 +59,9 @@
     <paper id="5">
       <title>Incorporating Sub-Word Level Information in Language Invariant Neural Event Detection</title>
       <author><first>Suhan</first><last>Prabhu</last></author>
-      <author id="pranav-goel"><first>Pranav</first><last>Goel</last></author>
+      <author><first>Pranav</first><last>Goel</last></author>
       <author><first>Alok</first><last>Debnath</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>36–44</pages>
       <abstract>Detection of TimeML events in text have traditionally been done on corpora such as TimeBanks. However, deep learning methods have not been applied to these corpora, because these datasets seldom contain more than 10,000 event mentions. Traditional architectures revolve around highly feature engineered, language specific statistical models. In this paper, we present a Language Invariant Neural Event Detection (ALINED) architecture. ALINED uses an aggregation of both sub-word level features as well as lexical and structural information. This is achieved by combining convolution over character embeddings, with recurrent layers over contextual word embeddings. We find that our model extracts relevant features for event span identification without relying on language specific features. We compare the performance of our language invariant model to the current state-of-the-art in English, Spanish, Italian and French. We outperform the F1-score of the state of the art in English by 1.65 points. We achieve F1-scores of 84.96, 80.87 and 74.81 on Spanish, Italian and French respectively which is comparable to the current states of the art for these languages. We also introduce the automatic annotation of events in Hindi, a low resource language, with an F1-Score of 77.13.</abstract>
       <url hash="dea3fc5f">2019.icon-1.5</url>
@@ -69,10 +69,10 @@
     </paper>
     <paper id="6">
       <title>Event Centric Entity Linking for <fixed-case>H</fixed-case>indi News Articles: A Knowledge Graph Based Approach</title>
-      <author id="pranav-goel"><first>Pranav</first><last>Goel</last></author>
+      <author><first>Pranav</first><last>Goel</last></author>
       <author><first>Suhan</first><last>Prabhu</last></author>
       <author><first>Alok</first><last>Debnath</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>45–55</pages>
       <abstract>We describe the development of a knowledge graph from an event annotated corpus by presenting a pipeline that identifies and extracts the relations between entities and events from Hindi news articles. Due to the semantic implications of argument identification for events in Hindi, we use a combined syntactic argument and semantic role identification methodology. To the best of our knowledge, no other architecture exists for this purpose. The extracted combined role information is incorporated in a knowledge graph that can be queried via subgraph extraction for basic questions. The architectures presented in this paper can be used for participant extraction and event-entity linking in most Indo-Aryan languages, due to similar syntactic and semantic properties of event arguments.</abstract>
       <url hash="fad8c55e">2019.icon-1.6</url>
@@ -184,9 +184,9 @@
     <paper id="17">
       <title>Development of <fixed-case>POS</fixed-case> tagger for <fixed-case>E</fixed-case>nglish-<fixed-case>B</fixed-case>engali Code-Mixed data</title>
       <author><first>Tathagata</first><last>Raha</last></author>
-      <author><first>Sainik</first><last>Mahata</last></author>
+      <author id="sainik-mahata"><first>Sainik</first><last>Mahata</last></author>
       <author><first>Dipankar</first><last>Das</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>143–149</pages>
       <abstract>Code-mixed texts are widespread nowadays due to the advent of social media. Since these texts combine two languages to formulate a sentence, it gives rise to various research problems related to Natural Language Processing. In this paper, we try to excavate one such problem, namely, Parts of Speech tagging of code-mixed texts. We have built a system that can POS tag English-Bengali code-mixed data where the Bengali words were written in Roman script. Our approach initially involves the collection and cleaning of English-Bengali code-mixed tweets. These tweets were used as a development dataset for building our system. The proposed system is a modular approach that starts by tagging individual tokens with their respective languages and then passes them to different POS taggers, designed for different languages (English and Bengali, in our case). Tags given by the two systems are later joined together and the final result is then mapped to a universal POS tag set. Our system was checked using 100 manually POS tagged code-mixed sentences and it returned an accuracy of 75.29%.</abstract>
       <url hash="1091f59b">2019.icon-1.17</url>
@@ -234,7 +234,7 @@
     <paper id="22">
       <title>Kunji : A Resource Management System for Higher Productivity in Computer Aided Translation Tools</title>
       <author><first>Priyank</first><last>Gupta</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <author><first>Dipti Misra</first><last>Sharma</last></author>
       <author><first>Rashid</first><last>Ahmad</last></author>
       <pages>184–192</pages>
diff --git a/data/xml/2019.ijclclp.xml b/data/xml/2019.ijclclp.xml
index 857f5fce13..2d688cbbd6 100644
--- a/data/xml/2019.ijclclp.xml
+++ b/data/xml/2019.ijclclp.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2021-07-21" type="journal">
     <meta>
       <booktitle>International Journal of Computational Linguistics &amp; <fixed-case>C</fixed-case>hinese Language Processing, Volume 24, Number 1, June 2019</booktitle>
-      <editor><first>Jen-Tzung</first><last>Chien</last></editor>
+      <editor id="jen-tzung-chien"><first>Jen-Tzung</first><last>Chien</last></editor>
       <editor><first>Chia-Hui</first><last>Chang</last></editor>
       <publisher>Association for Computational Linguistics and Chinese Language Processing</publisher>
       <address>Taipei, Taiwan</address>
@@ -86,7 +86,7 @@
       <author><first>Bai-Hong</first><last>Huang</last></author>
       <author><first>Yuan-Fu</first><last>Liao</last></author>
       <author><first>Guang-Feng</first><last>Deng</last></author>
-      <author><first>Matúš</first><last>Pleva</last></author>
+      <author id="matus-pleva"><first>Matúš</first><last>Pleva</last></author>
       <author><first>Daniel</first><last>Hládek</last></author>
       <url hash="26c8ec66">2019.ijclclp-2.3</url>
       <language>zho</language>
diff --git a/data/xml/2019.iwslt.xml b/data/xml/2019.iwslt.xml
index 85e4c645df..42a657af9d 100644
--- a/data/xml/2019.iwslt.xml
+++ b/data/xml/2019.iwslt.xml
@@ -24,8 +24,8 @@
       <title>The <fixed-case>IWSLT</fixed-case> 2019 Evaluation Campaign</title>
       <author><first>Jan</first><last>Niehues</last></author>
       <author><first>Rolando</first><last>Cattoni</last></author>
-      <author><first>Sebastian</first><last>Stüker</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="sebastian-stuker"><first>Sebastian</first><last>Stüker</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <author><first>Thanh-Le</first><last>Ha</last></author>
       <author><first>Elizabeth</first><last>Salesky</last></author>
@@ -48,14 +48,14 @@
     </paper>
     <paper id="3">
       <title>The <fixed-case>IWSLT</fixed-case> 2019 <fixed-case>KIT</fixed-case> Speech Translation System</title>
-      <author><first>Ngoc-Quan</first><last>Pham</last></author>
+      <author id="ngoc-quan-pham"><first>Ngoc-Quan</first><last>Pham</last></author>
       <author><first>Thai-Son</first><last>Nguyen</last></author>
       <author><first>Thanh-Le</first><last>Ha</last></author>
       <author><first>Juan</first><last>Hussain</last></author>
       <author><first>Felix</first><last>Schneider</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
-      <author><first>Sebastian</first><last>Stüker</last></author>
-      <author><first>Alexander</first><last>Waibel</last></author>
+      <author id="sebastian-stuker"><first>Sebastian</first><last>Stüker</last></author>
+      <author id="alex-waibel"><first>Alexander</first><last>Waibel</last></author>
       <abstract>This paper describes KIT’s submission to the IWSLT 2019 Speech Translation task on two sub-tasks corresponding to two different datasets. We investigate different end-to-end architectures for the speech recognition module, including our new transformer-based architectures. Overall, our modules in the pipe-line are based on the transformer architecture which has recently achieved great results in various fields. In our systems, using transformer is also advantageous compared to traditional hybrid systems in term of simplicity while still having competent results.</abstract>
       <url hash="208726cc">2019.iwslt-1.3</url>
       <bibkey>pham-etal-2019-iwslt</bibkey>
@@ -93,7 +93,7 @@
     <paper id="7">
       <title>End-to-end Speech Translation System Description of <fixed-case>LIT</fixed-case> for <fixed-case>IWSLT</fixed-case> 2019</title>
       <author><first>Mei</first><last>Tu</last></author>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last></author>
+      <author><first>Wei</first><last>Liu</last></author>
       <author><first>Lijie</first><last>Wang</last></author>
       <author><first>Xiao</first><last>Chen</last></author>
       <author><first>Xue</first><last>Wen</last></author>
@@ -140,7 +140,7 @@
       <author><first>Benjamin</first><last>Lecouteux</last></author>
       <author><first>Didier</first><last>Schwab</last></author>
       <author><first>Hang</first><last>Le</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <abstract>In this paper, we present our submission for the English to Czech Text Translation Task of IWSLT 2019. Our system aims to study how pre-trained language models, used as input embeddings, can improve a specialized machine translation system trained on few data. Therefore, we implemented a Transformer-based encoder-decoder neural system which is able to use the output of a pre-trained language model as input embeddings, and we compared its performance under three configurations: 1) without any pre-trained language model (constrained), 2) using a language model trained on the monolingual parts of the allowed English-Czech data (constrained), and 3) using a language model trained on a large quantity of external monolingual data (unconstrained). We used BERT as external pre-trained language model (configuration 3), and BERT architecture for training our own language model (configuration 2). Regarding the training data, we trained our MT system on a small quantity of parallel text: one set only consists of the provided MuST-C corpus, and the other set consists of the MuST-C corpus and the News Commentary corpus from WMT. We observed that using the external pre-trained BERT improves the scores of our system by +0.8 to +1.5 of BLEU on our development set, and +0.97 to +1.94 of BLEU on the test set. However, using our own language model trained only on the allowed parallel data seems to improve the machine translation performances only when the system is trained on the smallest dataset.</abstract>
       <url hash="61d6c2fb">2019.iwslt-1.11</url>
       <bibkey>vial-etal-2019-lig</bibkey>
@@ -158,15 +158,15 @@
     <paper id="13">
       <title><fixed-case>KIT</fixed-case>’s Submission to the <fixed-case>IWSLT</fixed-case> 2019 Shared Task on Text Translation</title>
       <author><first>Felix</first><last>Schneider</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <abstract>In this paper, we describe KIT’s submission for the IWSLT 2019 shared task on text translation. Our system is based on the transformer model [1] using our in-house implementation. We augment the available training data using back-translation and employ fine-tuning for the final model. For our best results, we used a 12-layer transformer-big config- uration, achieving state-of-the-art results on the WMT2018 test set. We also experiment with student-teacher models to improve performance of smaller models.</abstract>
       <url hash="a61bab92">2019.iwslt-1.13</url>
       <bibkey>schneider-waibel-2019-kits</bibkey>
     </paper>
     <paper id="14">
       <title>Data Augmentation for End-to-End Speech Translation: <fixed-case>FBK</fixed-case>@<fixed-case>IWSLT</fixed-case> ‘19</title>
-      <author><first>Mattia A.</first><last>Di Gangi</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="mattia-a-di-gangi"><first>Mattia A.</first><last>Di Gangi</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Viet Nhat</first><last>Nguyen</last></author>
       <author><first>Amirhossein</first><last>Tebbifakhr</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
@@ -178,8 +178,8 @@
       <title>How Transformer Revitalizes Character-based Neural Machine Translation: An Investigation on <fixed-case>J</fixed-case>apanese-<fixed-case>V</fixed-case>ietnamese Translation Systems</title>
       <author><first>Thi-Vinh</first><last>Ngo</last></author>
       <author><first>Thanh-Le</first><last>Ha</last></author>
-      <author><first>Phuong-Thai</first><last>Nguyen</last></author>
-      <author><first>Le-Minh</first><last>Nguyen</last></author>
+      <author id="phuong-thai-nguyen"><first>Phuong-Thai</first><last>Nguyen</last></author>
+      <author id="minh-le-nguyen"><first>Le-Minh</first><last>Nguyen</last></author>
       <abstract>While translating between East Asian languages, many works have discovered clear advantages of using characters as the translation unit. Unfortunately, traditional recurrent neural machine translation systems hinder the practical usage of those character-based systems due to their architectural limitations. They are unfavorable in handling extremely long sequences as well as highly restricted in parallelizing the computations. In this paper, we demonstrate that the new transformer architecture can perform character-based trans- lation better than the recurrent one. We conduct experiments on a low-resource language pair: Japanese-Vietnamese. Our models considerably outperform the state-of-the-art systems which employ word-based recurrent architectures.</abstract>
       <url hash="b077720d">2019.iwslt-1.15</url>
       <bibkey>ngo-etal-2019-transformer</bibkey>
@@ -189,7 +189,7 @@
       <author><first>Surafel M.</first><last>Lakew</last></author>
       <author><first>Alina</first><last>Karakanta</last></author>
       <author><first>Marcello</first><last>Federico</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <abstract>Multilingual Neural Machine Translation (MNMT) for low- resource languages (LRL) can be enhanced by the presence of related high-resource languages (HRL), but the relatedness of HRL usually relies on predefined linguistic assumptions about language similarity. Recently, adapting MNMT to a LRL has shown to greatly improve performance. In this work, we explore the problem of adapting an MNMT model to an unseen LRL using data selection and model adapta- tion. In order to improve NMT for LRL, we employ perplexity to select HRL data that are most similar to the LRL on the basis of language distance. We extensively explore data selection in popular multilingual NMT settings, namely in (zero-shot) translation, and in adaptation from a multilingual pre-trained model, for both directions (LRL↔en). We further show that dynamic adaptation of the model’s vocabulary results in a more favourable segmentation for the LRL in comparison with direct adaptation. Experiments show re- ductions in training time and significant performance gains over LRL baselines, even with zero LRL data (+13.0 BLEU), up to +17.0 BLEU for pre-trained multilingual model dynamic adaptation with related data selection. Our method outperforms current approaches, such as massively multilingual models and data augmentation, on four LRL.</abstract>
       <url hash="1b241b83">2019.iwslt-1.16</url>
@@ -197,7 +197,7 @@
     </paper>
     <paper id="17">
       <title>Transformers without Tears: Improving the Normalization of Self-Attention</title>
-      <author><first>Toan Q.</first><last>Nguyen</last></author>
+      <author id="toan-q-nguyen"><first>Toan Q.</first><last>Nguyen</last></author>
       <author><first>Julian</first><last>Salazar</last></author>
       <abstract>We evaluate three simple, normalization-centric changes to improve Transformer training. First, we show that pre-norm residual connections (PRENORM) and smaller initializations enable warmup-free, validation-based training with large learning rates. Second, we propose l2 normalization with a single scale parameter (SCALENORM) for faster training and better performance. Finally, we reaffirm the effectiveness of normalizing word embeddings to a fixed length (FIXNORM). On five low-resource translation pairs from TED Talks-based corpora, these changes always converge, giving an average +1.1 BLEU over state-of-the-art bilingual baselines and a new 32.8 BLEU on IWSLT '15 English-Vietnamese. We ob- serve sharper performance curves, more consistent gradient norms, and a linear relationship between activation scaling and decoder depth. Surprisingly, in the high-resource setting (WMT '14 English-German), SCALENORM and FIXNORM remain competitive but PRENORM degrades performance.</abstract>
       <url hash="7cc09ad6">2019.iwslt-1.17</url>
@@ -209,7 +209,7 @@
       <author><first>Liezl</first><last>Puzon</last></author>
       <author><first>Jiatao</first><last>Gu</last></author>
       <author><first>Xutai</first><last>Ma</last></author>
-      <author><first>Arya D.</first><last>McCarthy</last></author>
+      <author id="arya-d-mccarthy"><first>Arya D.</first><last>McCarthy</last></author>
       <author><first>Deepak</first><last>Gopinath</last></author>
       <abstract>For automatic speech translation (AST), end-to-end approaches are outperformed by cascaded models that transcribe with automatic speech recognition (ASR), then trans- late with machine translation (MT). A major cause of the performance gap is that, while existing AST corpora are small, massive datasets exist for both the ASR and MT subsystems. In this work, we evaluate several data augmentation and pretraining approaches for AST, by comparing all on the same datasets. Simple data augmentation by translating ASR transcripts proves most effective on the English–French augmented LibriSpeech dataset, closing the performance gap from 8.2 to 1.4 BLEU, compared to a very strong cascade that could directly utilize copious ASR and MT data. The same end-to-end approach plus fine-tuning closes the gap on the English–Romanian MuST-C dataset from 6.7 to 3.7 BLEU. In addition to these results, we present practical rec- ommendations for augmentation and pretraining approaches. Finally, we decrease the performance gap to 0.01 BLEU us- ing a Transformer-based architecture.</abstract>
       <url hash="2c55855c">2019.iwslt-1.18</url>
@@ -228,7 +228,7 @@
       <author><first>Jan</first><last>Rosendahl</last></author>
       <author><first>Viet Anh Khoa</first><last>Tran</last></author>
       <author><first>Weiyue</first><last>Wang</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <abstract>In this work we analyze and compare the behavior of the Transformer architecture when using different positional encoding methods. While absolute and relative positional encoding perform equally strong overall, we show that relative positional encoding is vastly superior (4.4% to 11.9% BLEU) when translating a sentence that is longer than any observed training sentence. We further propose and analyze variations of relative positional encoding and observe that the number of trainable parameters can be reduced without a performance loss, by using fixed encoding vectors or by removing some of the positional encoding vectors.</abstract>
       <url hash="533f0046">2019.iwslt-1.20</url>
       <bibkey>rosendahl-etal-2019-analysis</bibkey>
@@ -236,7 +236,7 @@
     <paper id="21">
       <title>Using Whole Document Context in Neural Machine Translation</title>
       <author><first>Valentin</first><last>Macé</last></author>
-      <author><first>Christophe</first><last>Servan</last></author>
+      <author id="christophe-servan"><first>Christophe</first><last>Servan</last></author>
       <abstract>In Machine Translation, considering the document as a whole can help to resolve ambiguities and inconsistencies. In this paper, we propose a simple yet promising approach to add contextual information in Neural Machine Translation. We present a method to add source context that capture the whole document with accurate boundaries, taking every word into account. We provide this additional information to a Transformer model and study the impact of our method on three language pairs. The proposed approach obtains promising results in the English-German, English-French and French-English document-level translation tasks. We observe interesting cross-sentential behaviors where the model learns to use document-level information to improve translation coherence.</abstract>
       <url hash="8fabd46c">2019.iwslt-1.21</url>
       <bibkey>mace-servan-2019-using</bibkey>
@@ -245,8 +245,8 @@
       <title>On Using <fixed-case>S</fixed-case>pec<fixed-case>A</fixed-case>ugment for End-to-End Speech Translation</title>
       <author><first>Parnia</first><last>Bahar</last></author>
       <author><first>Albert</first><last>Zeyer</last></author>
-      <author><first>Ralf</first><last>Schlüter</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="ralf-schlueter"><first>Ralf</first><last>Schlüter</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <abstract>This work investigates a simple data augmentation technique, SpecAugment, for end-to-end speech translation. SpecAugment is a low-cost implementation method applied directly to the audio input features and it consists of masking blocks of frequency channels, and/or time steps. We apply SpecAugment on end-to-end speech translation tasks and achieve up to +2.2% BLEU on LibriSpeech Audiobooks En→Fr and +1.2% on IWSLT TED-talks En→De by alleviating overfitting to some extent. We also examine the effectiveness of the method in a variety of data scenarios and show that the method also leads to significant improvements in various data conditions irrespective of the amount of training data.</abstract>
       <url hash="e3ecaec4">2019.iwslt-1.22</url>
       <bibkey>bahar-etal-2019-using</bibkey>
@@ -254,8 +254,8 @@
     <paper id="23">
       <title>Estimating post-editing effort: a study on human judgements, task-based and reference-based metrics of <fixed-case>MT</fixed-case> quality</title>
       <author><first>Scarton</first><last>Scarton</last></author>
-      <author><first>Mikel L.</first><last>Forcada</last></author>
-      <author><first>Miquel</first><last>Esplà-Gomis</last></author>
+      <author id="mikel-l-forcada"><first>Mikel L.</first><last>Forcada</last></author>
+      <author id="miquel-espla-gomis"><first>Miquel</first><last>Esplà-Gomis</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <abstract>Devising metrics to assess translation quality has always been at the core of machine translation (MT) research. Traditional automatic reference-based metrics, such as BLEU, have shown correlations with human judgements of adequacy and fluency and have been paramount for the advancement of MT system development. Crowd-sourcing has popularised and enabled the scalability of metrics based on human judgments, such as subjective direct assessments (DA) of adequacy, that are believed to be more reliable than reference-based automatic metrics. Finally, task-based measurements, such as post-editing time, are expected to provide a more de- tailed evaluation of the usefulness of translations for a specific task. Therefore, while DA averages adequacy judgements to obtain an appraisal of (perceived) quality independently of the task, and reference-based automatic metrics try to objectively estimate quality also in a task-independent way, task-based metrics are measurements obtained either during or after performing a specific task. In this paper we argue that, although expensive, task-based measurements are the most reliable when estimating MT quality in a specific task; in our case, this task is post-editing. To that end, we report experiments on a dataset with newly-collected post-editing indicators and show their usefulness when estimating post-editing effort. Our results show that task-based metrics comparing machine-translated and post-edited versions are the best at tracking post-editing effort, as expected. These metrics are followed by DA, and then by metrics comparing the machine-translated version and independent references. We suggest that MT practitioners should be aware of these differences and acknowledge their implications when decid- ing how to evaluate MT for post-editing purposes.</abstract>
       <url hash="8a61b1b6">2019.iwslt-1.23</url>
@@ -266,7 +266,7 @@
       <author><first>Yingbo</first><last>Gao</last></author>
       <author><first>Christian</first><last>Herold</last></author>
       <author><first>Weiyue</first><last>Wang</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <abstract>Prominently used in support vector machines and logistic re-gressions, kernel functions (kernels) can implicitly map data points into high dimensional spaces and make it easier to learn complex decision boundaries. In this work, by replacing the inner product function in the softmax layer, we explore the use of kernels for contextual word classification. In order to compare the individual kernels, experiments are conducted on standard language modeling and machine translation tasks. We observe a wide range of performances across different kernel settings. Extending the results, we look at the gradient properties, investigate various mixture strategies and examine the disambiguation abilities.</abstract>
       <url hash="f68150f6">2019.iwslt-1.24</url>
       <bibkey>gao-etal-2019-exploring</bibkey>
@@ -282,8 +282,8 @@
     </paper>
     <paper id="26">
       <title>Generic and Specialized Word Embeddings for Multi-Domain Machine Translation</title>
-      <author><first>MinhQuang</first><last>Pham</last></author>
-      <author><first>Josep</first><last>Crego</last></author>
+      <author id="minh-quang-pham"><first>MinhQuang</first><last>Pham</last></author>
+      <author id="josep-m-crego"><first>Josep</first><last>Crego</last></author>
       <author><first>François</first><last>Yvon</last></author>
       <author><first>Jean</first><last>Senellart</last></author>
       <abstract>Supervised machine translation works well when the train and test data are sampled from the same distribution. When this is not the case, adaptation techniques help ensure that the knowledge learned from out-of-domain texts generalises to in-domain sentences. We study here a related setting, multi-domain adaptation, where the number of domains is potentially large and adapting separately to each domain would waste training resources. Our proposal transposes to neural machine translation the feature expansion technique of (Daumé III, 2007): it isolates domain-agnostic from domain-specific lexical representations, while sharing the most of the network across domains. Our experiments use two architectures and two language pairs: they show that our approach, while simple and computationally inexpensive, outperforms several strong baselines and delivers a multi-domain system that successfully translates texts from diverse sources.</abstract>
@@ -293,7 +293,7 @@
     <paper id="27">
       <title>Lexical Micro-adaptation for Neural Machine Translation</title>
       <author><first>Jitao</first><last>Xu</last></author>
-      <author><first>Josep</first><last>Crego</last></author>
+      <author id="josep-m-crego"><first>Josep</first><last>Crego</last></author>
       <author><first>Jean</first><last>Senellart</last></author>
       <abstract>This work is inspired by a typical machine translation industry scenario in which translators make use of in-domain data for facilitating translation of similar or repeating sentences. We introduce a generic framework applied at inference in which a subset of segment pairs are first extracted from training data according to their similarity to the input sentences. These segments are then used to dynamically update the parameters of a generic NMT network, thus performing a lexical micro-adaptation. Our approach demonstrates strong adaptation performance to new and existing datasets including pseudo in-domain data. We evaluate our approach on a heterogeneous English-French training dataset showing accuracy gains on all evaluated domains when compared to strong adaptation baselines.</abstract>
       <url hash="74fc87c1">2019.iwslt-1.27</url>
@@ -322,7 +322,7 @@
     <paper id="30">
       <title>Controlling Utterance Length in <fixed-case>NMT</fixed-case>-based Word Segmentation with Attention</title>
       <author><first>Pierre</first><last>Godard</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <author><first>François</first><last>Yvon</last></author>
       <abstract>One of the basic tasks of computational language documentation (CLD) is to identify word boundaries in an unsegmented phonemic stream. While several unsupervised monolingual word segmentation algorithms exist in the literature, they are challenged in real-world CLD settings by the small amount of available data. A possible remedy is to take advantage of glosses or translation in a foreign, well- resourced, language, which often exist for such data. In this paper, we explore and compare ways to exploit neural machine translation models to perform unsupervised boundary detection with bilingual information, notably introducing a new loss function for jointly learning alignment and segmentation. We experiment with an actual under-resourced language, Mboshi, and show that these techniques can effectively control the output segmentation length.</abstract>
       <url hash="3042bb6d">2019.iwslt-1.30</url>
@@ -331,7 +331,7 @@
     <paper id="31">
       <title>Controlling the Output Length of Neural Machine Translation</title>
       <author><first>Surafel Melaku</first><last>Lakew</last></author>
-      <author><first>Mattia</first><last>Di Gangi</last></author>
+      <author id="mattia-a-di-gangi"><first>Mattia</first><last>Di Gangi</last></author>
       <author><first>Marcello</first><last>Federico</last></author>
       <abstract>The recent advances introduced by neural machine translation (NMT) are rapidly expanding the application fields of machine translation, as well as reshaping the quality level to be targeted. In particular, if translations have to fit some given layout, quality should not only be measured in terms of adequacy and fluency, but also length. Exemplary cases are the translation of document files, subtitles, and scripts for dubbing, where the output length should ideally be as close as possible to the length of the input text. This pa-per addresses for the first time, to the best of our knowledge, the problem of controlling the output length in NMT. We investigate two methods for biasing the output length with a transformer architecture: i) conditioning the output to a given target-source length-ratio class and ii) enriching the transformer positional embedding with length information. Our experiments show that both methods can induce the network to generate shorter translations, as well as acquiring inter- pretable linguistic skills.</abstract>
       <url hash="e032ec61">2019.iwslt-1.31</url>
diff --git a/data/xml/2019.jeptalnrecital.xml b/data/xml/2019.jeptalnrecital.xml
index e99e8dc5fc..2766cd53aa 100644
--- a/data/xml/2019.jeptalnrecital.xml
+++ b/data/xml/2019.jeptalnrecital.xml
@@ -4,8 +4,8 @@
     <meta>
       <booktitle>Actes de la Conférence sur le Traitement Automatique des Langues Naturelles (TALN) PFIA 2019. Volume I : Articles longs</booktitle>
       <editor><first>Emmanuel</first><last>Morin</last></editor>
-      <editor><first>Sophie</first><last>Rosset</last></editor>
-      <editor><first>Pierre</first><last>Zweigenbaum</last></editor>
+      <editor id="sophie-rosset"><first>Sophie</first><last>Rosset</last></editor>
+      <editor id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></editor>
       <publisher>ATALA</publisher>
       <address>Toulouse, France</address>
       <month>7</month>
@@ -62,7 +62,7 @@
       <title>Corpus annoté de cas cliniques en français (Annotated corpus with clinical cases in <fixed-case>F</fixed-case>rench)</title>
       <author><first>Natalia</first><last>Grabar</last></author>
       <author><first>Cyril</first><last>Grouin</last></author>
-      <author><first>Thierry</first><last>Hamon</last></author>
+      <author id="thierry-hamon"><first>Thierry</first><last>Hamon</last></author>
       <author><first>Vincent</first><last>Claveau</last></author>
       <pages>71–84</pages>
       <abstract>Les corpus textuels sont utiles pour diverses applications de traitement automatique des langues (TAL) en fournissant les données nécessaires pour leur création, adaptation ou évaluation. Cependant, dans certains domaines comme le domaine médical, l’accès aux données est rendu compliqué, voire impossible, pour des raisons de confidentialité et d’éthique. Il existe néanmoins de réels besoins en corpus cliniques pour l’enseignement et la recherche. Pour répondre à ce défi, nous présentons dans cet article le corpus CAS contenant des cas cliniques de patients, réels ou fictifs, que nous avons compilés. Ces cas cliniques en français couvrent plusieurs spécialités médicales et focalisent donc sur différentes situations cliniques. Actuellement, le corpus contient 4 300 cas (environ 1,5M d’occurrences de mots). Il est accompagné d’informations (discussions des cas cliniques, mots-clés, etc.) et d’annotations que nous avons effectuées au regard des besoins de la recherche en TAL dans ce domaine. Nous présentons également les résultats de premières expériences de recherche et d’extraction d’information qui ont été effectuées avec ce corpus annoté. Ces expériences peuvent fournir une baseline à d’autres chercheurs souhaitant travailler avec les données.</abstract>
@@ -95,7 +95,7 @@
     </paper>
     <paper id="8">
       <title>La génération automatique de poésie en français (Automatic Poetry Generation in <fixed-case>F</fixed-case>rench)</title>
-      <author><first>Tim</first><last>Van de Cruys</last></author>
+      <author id="tim-van-de-cruys"><first>Tim</first><last>Van de Cruys</last></author>
       <pages>113–126</pages>
       <abstract>La génération automatique de poésie est une tâche ardue pour un système informatique. Pour qu’un poème ait du sens, il est important de prendre en compte à la fois des aspects linguistiques et littéraires. Ces dernières années, un certain nombre d’approches fructueuses sont apparues, capables de modéliser de manière adéquate divers aspects du langage naturel. En particulier, les modèles de langue basés sur les réseaux de neurones ont amélioré l’état de l’art par rapport à la modélisation prédictive de langage, tandis que les topic models sont capables de capturer une certaine cohérence thématique. Dans cet article, on explorera comment ces approches peuvent être adaptées et combinées afin de modéliser les aspects linguistiques et littéraires nécessaires pour la génération de poésie. Le système est exclusivement entraîné sur des textes génériques, et sa sortie est contrainte afin de conférer un caractère poétique au vers généré. Le cadre présenté est appliqué à la génération de poèmes en français, et évalué à l’aide d’une évaluation humaine.</abstract>
       <url hash="a4242b45">2019.jeptalnrecital-long.8</url>
@@ -114,7 +114,7 @@
     </paper>
     <paper id="10">
       <title><fixed-case>P</fixed-case>olylex<fixed-case>FLE</fixed-case> : une base de données d’expressions polylexicales pour le <fixed-case>FLE</fixed-case> (<fixed-case>P</fixed-case>olylex<fixed-case>FLE</fixed-case> : a database of multiword expressions for <fixed-case>F</fixed-case>rench <fixed-case>L</fixed-case>2 language learning)</title>
-      <author><first>Amalia</first><last>Todirascu</last></author>
+      <author id="amalia-todirascu"><first>Amalia</first><last>Todirascu</last></author>
       <author><first>Marion</first><last>Cargill</last></author>
       <author><first>Thomas</first><last>Francois</last></author>
       <pages>143–156</pages>
@@ -128,8 +128,8 @@
     <meta>
       <booktitle>Actes de la Conférence sur le Traitement Automatique des Langues Naturelles (TALN) PFIA 2019. Volume II : Articles courts</booktitle>
       <editor><first>Emmanuel</first><last>Morin</last></editor>
-      <editor><first>Sophie</first><last>Rosset</last></editor>
-      <editor><first>Pierre</first><last>Zweigenbaum</last></editor>
+      <editor id="sophie-rosset"><first>Sophie</first><last>Rosset</last></editor>
+      <editor id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></editor>
       <publisher>ATALA</publisher>
       <address>Toulouse, France</address>
       <month>7</month>
@@ -143,7 +143,7 @@
     <paper id="1">
       <title>Analyse faiblement supervisée de conversation en actes de dialogue (Weakly supervised dialog act analysis)</title>
       <author><first>Catherine</first><last>Thompson</last></author>
-      <author><first>Nicholas</first><last>Asher</last></author>
+      <author id="nicholas-asher"><first>Nicholas</first><last>Asher</last></author>
       <author><first>Philippe</first><last>Muller</last></author>
       <author><first>Jérémy</first><last>Auguste</last></author>
       <pages>159–166</pages>
@@ -169,7 +169,7 @@
       <title>Apprentissage faiblement supervisé de la structure discursive (Learning discourse structure using weak supervision )</title>
       <author><first>Sonia</first><last>Badene</last></author>
       <author><first>Catherine</first><last>Thompson</last></author>
-      <author><first>Nicholas</first><last>Asher</last></author>
+      <author id="nicholas-asher"><first>Nicholas</first><last>Asher</last></author>
       <author><first>Jean-Pierre</first><last>Lorré</last></author>
       <pages>175–184</pages>
       <abstract>L’avènement des techniques d’apprentissage automatique profond a fait naître un besoin énorme de données d’entraînement. De telles données d’entraînement sont extrêmement coûteuses à créer, surtout lorsqu’une expertise dans le domaine est requise. L’une de ces tâches est l’apprentissage de la structure sémantique du discours, tâche très complexe avec des structures récursives avec des données éparses, mais qui est essentielle pour extraire des informations sémantiques profondes du texte. Nous décrivons nos expérimentations sur l’attachement des unités discursives pour former une structure, en utilisant le paradigme du data programming dans lequel peu ou pas d’annotations sont utilisées pour construire un ensemble de données d’entraînement “bruité”. Le corpus de dialogues utilisé illustre des contraintes à la fois linguistiques et non-linguistiques intéressantes qui doivent être apprises. Nous nous concentrons sur la structure des règles utilisées pour construire un modèle génératif et montrons la compétitivité de notre approche par rapport à l’apprentissage supervisé classique.</abstract>
@@ -179,10 +179,10 @@
     </paper>
     <paper id="4">
       <title><fixed-case>CALOR</fixed-case>-<fixed-case>QUEST</fixed-case> : un corpus d’entraînement et d’évaluation pour la compréhension automatique de textes (Machine reading comprehension is a task related to Question-Answering where questions are not generic in scope but are related to a particular document)</title>
-      <author><first>Frederic</first><last>Bechet</last></author>
+      <author id="frederic-bechet"><first>Frederic</first><last>Bechet</last></author>
       <author><first>Cindy</first><last>Aloui</last></author>
       <author><first>Delphine</first><last>Charlet</last></author>
-      <author><first>Geraldine</first><last>Damnati</last></author>
+      <author id="geraldine-damnati"><first>Geraldine</first><last>Damnati</last></author>
       <author><first>Johannes</first><last>Heinecke</last></author>
       <author><first>Alexis</first><last>Nasr</last></author>
       <author><first>Frederic</first><last>Herledan</last></author>
@@ -194,7 +194,7 @@
     </paper>
     <paper id="5">
       <title>Chunker différents types de discours oraux : défis pour l’apprentissage automatique (Chunking different spoken speech types : challenges for machine learning)</title>
-      <author><first>Iris</first><last>Eshkol-Taravella</last></author>
+      <author id="iris-eshkol"><first>Iris</first><last>Eshkol-Taravella</last></author>
       <author><first>Mariame</first><last>Maarouf</last></author>
       <author><first>Marie</first><last>Skrovec</last></author>
       <author><first>Flora</first><last>Badin</last></author>
@@ -227,7 +227,7 @@
     <paper id="8">
       <title>De l’extraction des interactions médicament-médicament vers les interactions aliment-médicament à partir de textes biomédicaux: Adaptation de domaine (From the extraction of drug-drug interactions to the food-drug interactions in biomedical texts : domain adaptation)</title>
       <author><first>Tsanta</first><last>Randriatsitohaina</last></author>
-      <author><first>Thierry</first><last>Hamon</last></author>
+      <author id="thierry-hamon"><first>Thierry</first><last>Hamon</last></author>
       <pages>223–232</pages>
       <abstract>Les interactions aliments-médicaments (FDI) se produisent lorsque des aliments et des médicaments sont pris simultanément et provoquent un effet inattendu. Nous considérons l’extraction de ces interactions dans les textes comme une tâche d’extraction de relation pouvant être résolue par des méthodes de classification. Toutefois, étant donné que ces interactions sont décrites de manière très fine, nous sommes confrontés au manque de données et au manque d’exemples par type de relation. Pour résoudre ce problème, nous proposons d’appliquer une adaptation de domaine à partir des interactions médicament-médicament (DDI) qui est une tâche similaire, afin d’établir une correspondance entre les types de relations et d’étiqueter les instances FDI selon les types DDI. Notre approche confirme une cohérence entre les 2 domaines et fournit une base pour la spécification des relations et la pré-annotation de nouvelles données. Les performances des modèles de classification appuie également l’efficacité de l’adaptation de domaine sur notre tâche.</abstract>
       <url hash="cab3772b">2019.jeptalnrecital-court.8</url>
@@ -236,7 +236,7 @@
     </paper>
     <paper id="9">
       <title>Demonette2 - Une base de données dérivationnelle du français à grande échelle : premiers résultats (Demonette2 – A large scale derivational database for <fixed-case>F</fixed-case>rench: first results)</title>
-      <author><first>Fiammetta</first><last>Namer</last></author>
+      <author id="fiammetta-namer"><first>Fiammetta</first><last>Namer</last></author>
       <author><first>Lucie</first><last>Barque</last></author>
       <author><first>Olivier</first><last>Bonami</last></author>
       <author><first>Pauline</first><last>Haas</last></author>
@@ -270,7 +270,7 @@
     </paper>
     <paper id="12">
       <title>Développement d’un lexique morphologique et syntaxique de l’ancien français (Development of a morphological and syntactic lexicon of <fixed-case>O</fixed-case>ld <fixed-case>F</fixed-case>rench)</title>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <pages>265–274</pages>
       <abstract>Nous décrivons dans cet article notre travail de développement d’un lexique morphologique et syntaxique à grande échelle de l’ancien français pour le traitement automatique des langues. Nous nous sommes appuyés sur des ressources dictionnairiques et lexicales dans lesquelles l’extraction d’informations structurées et exploitables a nécessité des développements spécifiques. De plus, la mise en correspondance d’informations provenant de ces différentes sources a soulevé des difficultés. Nous donnons quelques indications quantitatives sur le lexique obtenu, et discutons de sa fiabilité dans sa version actuelle et des perspectives d’amélioration permises par l’existence d’une première version, notamment au travers de l’analyse automatique de données textuelles.</abstract>
       <url hash="2a17ab2d">2019.jeptalnrecital-court.12</url>
@@ -316,7 +316,7 @@
     <paper id="16">
       <title>Exploring sentence informativeness</title>
       <author><first>Syrielle</first><last>Montariol</last></author>
-      <author><first>Aina</first><last>Garí Soler</last></author>
+      <author id="aina-gari-soler"><first>Aina</first><last>Garí Soler</last></author>
       <author><first>Alexandre</first><last>Allauzen</last></author>
       <pages>303–312</pages>
       <abstract>This study is a preliminary exploration of the concept of informativeness –how much information a sentence gives about a word it contains– and its potential benefits to building quality word representations from scarce data. We propose several sentence-level classifiers to predict informativeness, and we perform a manual annotation on a set of sentences. We conclude that these two measures correspond to different notions of informativeness. However, our experiments show that using the classifiers’ predictions to train word embeddings has an impact on embedding quality.</abstract>
@@ -373,8 +373,8 @@
     <paper id="21">
       <title>Multilingual and Multitarget Hate Speech Detection in Tweets</title>
       <author><first>Patricia</first><last>Chiril</last></author>
-      <author><first>Farah</first><last>Benamara Zitoune</last></author>
-      <author><first>Véronique</first><last>Moriceau</last></author>
+      <author id="farah-benamara"><first>Farah</first><last>Benamara Zitoune</last></author>
+      <author id="veronique-moriceau"><first>Véronique</first><last>Moriceau</last></author>
       <author><first>Marlène</first><last>Coulomb-Gully</last></author>
       <author><first>Abhishek</first><last>Kumar</last></author>
       <pages>351–360</pages>
@@ -384,7 +384,7 @@
     </paper>
     <paper id="22">
       <title>Observation de l’expérience client dans les restaurants (Mapping Reviewers’ Experience in Restaurants)</title>
-      <author><first>Iris</first><last>Eshkol-Taravella</last></author>
+      <author id="iris-eshkol"><first>Iris</first><last>Eshkol-Taravella</last></author>
       <author><first>Hyun</first><last>Jung Kang</last></author>
       <pages>361–370</pages>
       <abstract>Ces dernières années, les recherches sur la fouille d’opinions ou l’analyse des sentiments sont menées activement dans le domaine du Traitement Automatique des Langues (TAL). De nombreuses études scientifiques portent sur l’extraction automatique des opinions positives ou négatives et de leurs cibles. Ce travail propose d’identifier automatiquement une évaluation, exprimée explicitement ou implicitement par des internautes dans le corpus d’avis tiré du Web. Six catégories d’évaluation sont proposées : opinion positive, opinion négative, opinion mixte, intention, suggestion et description. La méthode utilisée est fondée sur l’apprentissage supervisé qui tient compte des caractéristiques linguistiques de chaque catégorie retenue. L’une des difficultés que nous avons rencontrée concerne le déséquilibre entre les classes d’évaluation créées, cependant, cet obstacle a pu être surmonté dans l’apprentissage grâce aux stratégies de sur-échantillonnage et aux stratégies algorithmiques.</abstract>
@@ -410,7 +410,7 @@
       <author><first>Nathalie</first><last>Camelin</last></author>
       <author><first>Chafik</first><last>Aloulou</last></author>
       <author><first>Yannick</first><last>Estève</last></author>
-      <author><first>Lamia</first><last>Hadrich Belguith</last></author>
+      <author id="lamia-hadrich-belguith"><first>Lamia</first><last>Hadrich Belguith</last></author>
       <pages>381–390</pages>
       <abstract>Nous nous intéressons, dans cet article, à la tâche d’analyse d’opinions en arabe. Nous étudions la spécificité de la langue arabe pour la détection de polarité. Nous nous focalisons ici sur les caractéristiques d’agglutination et de richesse morphologique de cette langue. Nous avons particulièrement étudié différentes représentations d’unité lexicale : token, lemme et light stemme. Nous avons construit et testé des espaces continus de ces différentes représentations lexicales. Nous avons mesuré l’apport de tels types de representations vectorielles dans notre cadre spécifique. Les performances du réseau CNN montrent un gain significatif de 2% par rapport à l’état de l’art.</abstract>
       <url hash="3748189b">2019.jeptalnrecital-court.24</url>
@@ -450,7 +450,7 @@
     <paper id="28">
       <title>Réutilisation de Textes dans les Manuscrits Anciens (Text Reuse in Ancient Manuscripts)</title>
       <author><first>Amir</first><last>Hazem</last></author>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <author><first>Dominique</first><last>Stutzmann</last></author>
       <author><first>Jacob</first><last>Currie</last></author>
       <author><first>Christine</first><last>Jacquin</last></author>
@@ -502,8 +502,8 @@
     <meta>
       <booktitle>Actes de la Conférence sur le Traitement Automatique des Langues Naturelles (TALN) PFIA 2019. Volume III : RECITAL</booktitle>
       <editor><first>Emmanuel</first><last>Morin</last></editor>
-      <editor><first>Sophie</first><last>Rosset</last></editor>
-      <editor><first>Pierre</first><last>Zweigenbaum</last></editor>
+      <editor id="sophie-rosset"><first>Sophie</first><last>Rosset</last></editor>
+      <editor id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></editor>
       <publisher>ATALA</publisher>
       <address>Toulouse, France</address>
       <month>7</month>
@@ -618,8 +618,8 @@
     <meta>
       <booktitle>Actes de la Conférence sur le Traitement Automatique des Langues Naturelles (TALN) PFIA 2019. Volume IV : Démonstrations</booktitle>
       <editor><first>Emmanuel</first><last>Morin</last></editor>
-      <editor><first>Sophie</first><last>Rosset</last></editor>
-      <editor><first>Pierre</first><last>Zweigenbaum</last></editor>
+      <editor id="sophie-rosset"><first>Sophie</first><last>Rosset</last></editor>
+      <editor id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></editor>
       <publisher>ATALA</publisher>
       <address>Toulouse, France</address>
       <month>7</month>
@@ -659,7 +659,7 @@
       <title>Démonstrateur en-ligne du projet <fixed-case>ANR</fixed-case> <fixed-case>PARSEME</fixed-case>-<fixed-case>FR</fixed-case> sur les expressions polylexicales (On-line demonstrator of the <fixed-case>PARSEME</fixed-case>-<fixed-case>FR</fixed-case> project on multiword expressions)</title>
       <author><first>Marine</first><last>Schmitt</last></author>
       <author><first>Elise</first><last>Moreau</last></author>
-      <author><first>Mathieu</first><last>Constant</last></author>
+      <author id="matthieu-constant"><first>Mathieu</first><last>Constant</last></author>
       <author><first>Agata</first><last>Savary</last></author>
       <pages>627–630</pages>
       <abstract>Nous présentons le démonstrateur en-ligne du projet ANR PARSEME-FR dédié aux expressions polylexicales. Il inclut différents outils d’identification de telles expressions et un outil d’exploration des ressources linguistiques de ce projet.</abstract>
@@ -669,7 +669,7 @@
     </paper>
     <paper id="4">
       <title><fixed-case>S</fixed-case>yl<fixed-case>N</fixed-case>ews, un agréfilter multilingue (<fixed-case>S</fixed-case>yl<fixed-case>N</fixed-case>ews, a multilingual aggrefilter)</title>
-      <author><first>Olivier</first><last>Hamon</last></author>
+      <author id="olivier-hamon"><first>Olivier</first><last>Hamon</last></author>
       <author><first>Kévin</first><last>Espasa</last></author>
       <author><first>Sara</first><last>Quispe</last></author>
       <pages>631–634</pages>
@@ -683,9 +683,9 @@
       <author><first>Ioan</first><last>Calapodescu</last></author>
       <author><first>Caroline</first><last>Brun</last></author>
       <author><first>Vassilina</first><last>Nikoulina</last></author>
-      <author><first>Salah</first><last>Aït-Mokhtar</last></author>
+      <author id="salah-ait-mokhtar"><first>Salah</first><last>Aït-Mokhtar</last></author>
       <pages>635–638</pages>
-      <abstract/>
+      <abstract></abstract>
       <url hash="e956efd3">2019.jeptalnrecital-demo.5</url>
       <language>fra</language>
       <bibkey>calapodescu-etal-2019-sentiment</bibkey>
@@ -708,8 +708,8 @@
     <meta>
       <booktitle>Actes de la Conférence sur le Traitement Automatique des Langues Naturelles (TALN) PFIA 2019. Défi Fouille de Textes (atelier TALN-RECITAL)</booktitle>
       <editor><first>Emmanuel</first><last>Morin</last></editor>
-      <editor><first>Sophie</first><last>Rosset</last></editor>
-      <editor><first>Pierre</first><last>Zweigenbaum</last></editor>
+      <editor id="sophie-rosset"><first>Sophie</first><last>Rosset</last></editor>
+      <editor id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></editor>
       <publisher>ATALA</publisher>
       <address>Toulouse, France</address>
       <month>7</month>
@@ -724,7 +724,7 @@
       <title>Recherche et extraction d’information dans des cas cliniques. Présentation de la campagne d’évaluation <fixed-case>DEFT</fixed-case> 2019 (Information Retrieval and Information Extraction from Clinical Cases)</title>
       <author><first>Natalia</first><last>Grabar</last></author>
       <author><first>Cyril</first><last>Grouin</last></author>
-      <author><first>Thierry</first><last>Hamon</last></author>
+      <author id="thierry-hamon"><first>Thierry</first><last>Hamon</last></author>
       <author><first>Vincent</first><last>Claveau</last></author>
       <pages>7–16</pages>
       <abstract>Cet article présente la campagne d’évaluation DEFT 2019 sur l’analyse de textes cliniques rédigés en français. Le corpus se compose de cas cliniques publiés et discutés dans des articles scientifiques, et indexés par des mots-clés. Nous proposons trois tâches indépendantes : l’indexation des cas cliniques et discussions, évaluée prioritairement par la MAP (mean average precision), l’appariement entre cas cliniques et discussions, évalué au moyen d’une précision, et l’extraction d’information parmi quatre catégories (âge, genre, origine de la consultation, issue), évaluée en termes de rappel, précision et F-mesure. Nous présentons les résultats obtenus par les participants sur chaque tâche.</abstract>
@@ -770,7 +770,7 @@
       <title>Indexation et appariements de documents cliniques pour le Deft 2019 (Indexing and pairing texts of the medical domain )</title>
       <author><first>Davide</first><last>Buscaldi</last></author>
       <author><first>Dhaou</first><last>Ghoul</last></author>
-      <author><first>Joseph</first><last>Le Roux</last></author>
+      <author id="joseph-le-roux"><first>Joseph</first><last>Le Roux</last></author>
       <author><first>Gaël</first><last>Lejeune</last></author>
       <pages>49–56</pages>
       <abstract>Dans cet article, nous présentons nos méthodes pour les tâches d’indexation et d’appariements du Défi Fouile de Textes (Deft) 2019. Pour la taĉhe d’indexation nous avons testé deux méthodes, une fondée sur l’appariemetn préalable des documents du jeu de tset avec les documents du jeu d’entraînement et une autre méthode fondée sur l’annotation terminologique. Ces méthodes ont malheureusement offert des résultats assez faible. Pour la tâche d’appariement, nous avons dévellopé une méthode sans apprentissage fondée sur des similarités de chaînes de caractères ainsi qu’une méthode exploitant des réseaux siamois. Là encore les résultats ont été plutôt décevant même si la méthode non supervisée atteint un score plutôt honorable pour une méthode non-supervisée : 62% .</abstract>
@@ -794,7 +794,7 @@
       <author><first>Estelle</first><last>Maudet</last></author>
       <author><first>Oralie</first><last>Cattan</last></author>
       <author><first>Maureen</first><last>de Seyssel</last></author>
-      <author><first>Christophe</first><last>Servan</last></author>
+      <author id="christophe-servan"><first>Christophe</first><last>Servan</last></author>
       <pages>67–80</pages>
       <abstract>Dans ce papier, nous présentons la participation de Qwant Research aux tâches 2 et 3 de l’édition 2019 du défi fouille de textes (DEFT) portant sur l’analyse de documents cliniques rédigés en français. La tâche 2 est une tâche de similarité sémantique qui demande d’apparier cas cliniques et discussions médicales. Pour résoudre cette tâche, nous proposons une approche reposant sur des modèles de langue et évaluons l’impact de différents pré-traitements et de différentes techniques d’appariement sur les résultats. Pour la tâche 3, nous avons développé un système d’extraction d’information qui produit des résultats encourageants en termes de précision. Nous avons expérimenté deux approches différentes, l’une se fondant exclusivement sur l’utilisation de réseaux de neurones pour traiter la tâche, l’autre reposant sur l’exploitation des informations linguistiques issues d’une analyse syntaxique.</abstract>
       <url hash="c6b28e4a">2019.jeptalnrecital-deft.7</url>
@@ -804,7 +804,7 @@
     <paper id="8">
       <title>Aprentissage non-supervisé pour l’appariement et l’étiquetage de cas cliniques en français - <fixed-case>DEFT</fixed-case>2019 (Unsupervised learning for matching and labelling of <fixed-case>F</fixed-case>rench clinical cases - <fixed-case>DEFT</fixed-case>2019 )</title>
       <author><first>Damien</first><last>Sileo</last></author>
-      <author><first>Tim</first><last>Van de Cruys</last></author>
+      <author id="tim-van-de-cruys"><first>Tim</first><last>Van de Cruys</last></author>
       <author><first>Philippe</first><last>Muller</last></author>
       <author><first>Camille</first><last>Pradel</last></author>
       <pages>81–90</pages>
@@ -830,8 +830,8 @@
     <meta>
       <booktitle>Actes de la Conférence sur le Traitement Automatique des Langues Naturelles (TALN) PFIA 2019. Terminologie et Intelligence Artificielle (atelier TALN-RECITAL \&amp; IC)</booktitle>
       <editor><first>Emmanuel</first><last>Morin</last></editor>
-      <editor><first>Sophie</first><last>Rosset</last></editor>
-      <editor><first>Pierre</first><last>Zweigenbaum</last></editor>
+      <editor id="sophie-rosset"><first>Sophie</first><last>Rosset</last></editor>
+      <editor id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></editor>
       <publisher>ATALA</publisher>
       <address>Toulouse, France</address>
       <month>7</month>
@@ -845,7 +845,7 @@
     <paper id="1">
       <title>Terminology systematization for Cybersecurity domain in <fixed-case>I</fixed-case>talian Language</title>
       <author><first>Claudia</first><last>Lanza</last></author>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <pages>7–18</pages>
       <abstract>This paper aims at presenting the first steps to improve the quality of the first draft of an Italian thesaurus for Cybersecurity terminology that has been realized for a specific project activity in collaboration with CybersecurityLab at Informatics and Telematics Institute (IIT) of the National Council of Research (CNR) in Italy. In particular, the paper will focus, first, on the terminological knowledge base built to retrieve the most representative candidate terms of Cybersecurity domain in Italian language, giving examples of the main gold standard repositories that have been used to build this semantic tool. Attention will be then given to the methodology and software employed to configure a system of NLP rules to get the desired semantic results and to proceed with the enhancement of the candidate terms selection which are meant to be inserted in the controlled vocabulary.</abstract>
       <url hash="de1e5938">2019.jeptalnrecital-tia.1</url>
@@ -854,7 +854,7 @@
     <paper id="2">
       <title>Identification des catégories de relations aliment-médicament (Identification of categories of food-drug relations)</title>
       <author><first>Tsanta</first><last>Randriatsitohaina</last></author>
-      <author><first>Thierry</first><last>Hamon</last></author>
+      <author id="thierry-hamon"><first>Thierry</first><last>Hamon</last></author>
       <pages>19–30</pages>
       <abstract>Les interactions aliment-médicament se produisent lorsque des aliments et des médicaments pris ensembles provoquent un effet inattendu. Leur reconnaissance automatique dans les textes peut être considérée comme une tâche d’extraction de relation à l’aide de méthodes de classification. Toutefois, étant donné que ces interactions sont décrites de manière très fine, nous sommes confrontés au manque de données et au manque d’exemples par type de relation. Pour résoudre ce problème, nous proposons une approche efficace pour regrouper des relations partageant une représentation similaire en groupes et réduire le manque d’exemples. Notre approche améliore les performances de la classification des FDI. Enfin, nous contrastons une méthode de regroupement intuitive basée sur la définition des types de relation et un apprentissage non supervisé basé sur les instances de chaque type de relation.</abstract>
       <url hash="90edd329">2019.jeptalnrecital-tia.2</url>
@@ -864,7 +864,7 @@
     <paper id="3">
       <title>Terminology-based Text Embedding for Computing Document Similarities on Technical Content</title>
       <author><first>Hamid</first><last>Mirisaee</last></author>
-      <author><first>Eric</first><last>Gaussier</last></author>
+      <author id="eric-gaussier"><first>Eric</first><last>Gaussier</last></author>
       <author><first>Cedric</first><last>Lagnier</last></author>
       <author><first>Agnes</first><last>Guerraz</last></author>
       <pages>31–42</pages>
diff --git a/data/xml/2019.lilt.xml b/data/xml/2019.lilt.xml
index f4da0fad44..48809d66ab 100644
--- a/data/xml/2019.lilt.xml
+++ b/data/xml/2019.lilt.xml
@@ -11,7 +11,7 @@
     <paper id="1">
       <title>Syntactic composition and selectional preferences in <fixed-case>H</fixed-case>indi Light Verb Constructions</title>
       <author><first>Ashwini</first><last>Vaidya</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <abstract>Previous work on light verb constructions (e.g. chorii kar ‘theft do; steal’) in Hindi describes their syntactic formation via co-predication (Ahmed et al., 2012, Butt, 2014). This implies that both noun and light verb contribute their arguments, and these overlapping argument structures must be composed in the syntax. In this paper, we present a co-predication analysis using Tree-Adjoining Grammar, which models syntactic composition and semantic selectional preferences without transformations (deletion or argument identification). The analysis has two key components (i) an underspecified category for the nominal and (ii) combinatorial constraints on the noun and light verb to specify selectional preferences. The former has the advantage of syntactic composition without argument identification and the latter prevents over-generalization, while recognizing the semantic contribution of both predicates. This work additionally accounts for the agreement facts for the Hindi LVC.</abstract>
       <issue>1</issue>
       <url hash="4165b47d">2019.lilt-17.1</url>
@@ -27,7 +27,7 @@
     </paper>
     <paper id="3">
       <title>Complex predicates: Structure, potential structure and underspecification</title>
-      <author><first>Stefan</first><last>Müller</last></author>
+      <author id="stefan-muller"><first>Stefan</first><last>Müller</last></author>
       <abstract>This paper compares a recent TAG-based analysis of complex predicates in Hindi/Urdu with its HPSG analog. It points out that TAG combines actual structure while HPSG (and Categorial Grammar and other valence-based frameworks) specify valence of lexical items and hence potential structure. This makes it possible to have light verbs decide which arguments of embedded heads get realized, somthing that is not possible in TAG. TAG has to retreat to disjunctions instead. While this allows straight-forward analyses of active/passive alternations based on the light verb in valence-based frameworks, such an option does not exist for TAG and it has to be assumed that preverbs come with different sets of arguments.</abstract>
       <issue>3</issue>
       <url hash="c763c515">2019.lilt-17.3</url>
@@ -56,7 +56,7 @@
       <author><first>Prashant</first><last>Pardeshi</last></author>
       <author><first>Alistair</first><last>Butler</last></author>
       <author><first>Stephen</first><last>Horn</last></author>
-      <author><first>Kei</first><last>Yoshimoto</last></author>
+      <author id="kei-yoshimoto"><first>Kei</first><last>Yoshimoto</last></author>
       <author><first>Iku</first><last>Nagasaki</last></author>
       <issue>0</issue>
       <url hash="f58f1f11">2019.lilt-18.1</url>
diff --git a/data/xml/2019.nsurl.xml b/data/xml/2019.nsurl.xml
index 64a2ead1b5..aae0b37ae5 100644
--- a/data/xml/2019.nsurl.xml
+++ b/data/xml/2019.nsurl.xml
@@ -97,14 +97,14 @@
     <paper id="9">
       <title>Motivations, challenges, and perspectives for the development of an Automatic Speech Recognition System for the under-resourced <fixed-case>N</fixed-case>giemboon Language</title>
       <author><first>Patrice</first><last>Yemmene</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <pages>59–67</pages>
       <url hash="9eb12584">2019.nsurl-1.9</url>
       <bibkey>yemmene-besacier-2019-motivations</bibkey>
     </paper>
     <paper id="10">
       <title><fixed-case>NITK</fixed-case>-<fixed-case>IT</fixed-case>_<fixed-case>NLP</fixed-case>@<fixed-case>NSURL</fixed-case>2019: Transfer Learning based <fixed-case>POS</fixed-case> Tagger for Under Resourced <fixed-case>B</fixed-case>hojpuri and <fixed-case>M</fixed-case>agahi Language</title>
-      <author><first>Anand Kumar</first><last>M</last></author>
+      <author id="anand-kumar-m"><first>Anand Kumar</first><last>M</last></author>
       <pages>68–72</pages>
       <url hash="086d6a06">2019.nsurl-1.10</url>
       <bibkey>m-2019-nitk</bibkey>
diff --git a/data/xml/2019.rocling.xml b/data/xml/2019.rocling.xml
index 14351c27a5..82d9be5f28 100644
--- a/data/xml/2019.rocling.xml
+++ b/data/xml/2019.rocling.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the 31st Conference on Computational Linguistics and Speech Processing (ROCLING 2019)</booktitle>
       <editor><first>Chen-Yu</first><last>Chiag</last></editor>
       <editor><first>Min-Yuh</first><last>Day</last></editor>
-      <editor><first>Jen-Tzung</first><last>Chien</last></editor>
+      <editor id="jen-tzung-chien"><first>Jen-Tzung</first><last>Chien</last></editor>
       <publisher>The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)</publisher>
       <address>New Taipei City, Taiwan</address>
       <month>October</month>
@@ -133,7 +133,7 @@
     <paper id="12">
       <title>Sequence to Sequence Convolutional Neural Network for Automatic Spelling Correction</title>
       <author><first>Daniel</first><last>Hládek</last></author>
-      <author><first>Matúš</first><last>Pleva</last></author>
+      <author id="matus-pleva"><first>Matúš</first><last>Pleva</last></author>
       <author><first>Ján</first><last>Staš</last></author>
       <author><first>Yuan-Fu</first><last>Liao</last></author>
       <pages>102–111</pages>
@@ -144,7 +144,7 @@
       <title>基於深度學習之簡答題問答系統初步探討(A Preliminary Study on Deep Learning-based Short Answer Question Answering System)</title>
       <author><first>Yu-Chen</first><last>Lin</last></author>
       <author><first>Yuan-Fu</first><last>Liao</last></author>
-      <author><first>Matúš</first><last>Pleva</last></author>
+      <author id="matus-pleva"><first>Matúš</first><last>Pleva</last></author>
       <author><first>Daniel</first><last>Hládek</last></author>
       <pages>112–121</pages>
       <url hash="4d909ca2">2019.rocling-1.13</url>
@@ -168,7 +168,7 @@
       <author><first>Yuan-Fu</first><last>Liao</last></author>
       <author><first>Chen-Ming</first><last>Pan</last></author>
       <author><first>Tzu-Hsiu</first><last>Kuo</last></author>
-      <author><first>Matúš</first><last>Pleva</last></author>
+      <author id="matus-pleva"><first>Matúš</first><last>Pleva</last></author>
       <author><first>Daniel</first><last>Hládek</last></author>
       <pages>137–151</pages>
       <url hash="eeb7140b">2019.rocling-1.15</url>
@@ -179,7 +179,7 @@
       <title>適合漸凍人使用之語音轉換系統初步研究(Deep Neural-Network Bandwidth Extension and Denoising Voice Conversion System for <fixed-case>ALS</fixed-case> Patients)</title>
       <author><first>Bai-Hong</first><last>Huang</last></author>
       <author><first>Yuan-Fu</first><last>Liao</last></author>
-      <author><first>Matúš</first><last>Pleva</last></author>
+      <author id="matus-pleva"><first>Matúš</first><last>Pleva</last></author>
       <author><first>Daniel</first><last>Hládek</last></author>
       <pages>152–166</pages>
       <url hash="b8da6baf">2019.rocling-1.16</url>
@@ -198,7 +198,7 @@
     <paper id="18">
       <title>基於卷積神經網路之台語關鍵詞辨識(<fixed-case>T</fixed-case>aiwanese keyword recognition using Convolutional Neural Networks)</title>
       <author><first>Chi-Hung</first><last>Liu</last></author>
-      <author><first>Ren-Yuan</first><last>Lyu</last></author>
+      <author id="ren-yuan-lyu"><first>Ren-Yuan</first><last>Lyu</last></author>
       <author><first>Wei-Zhong</first><last>Zhan</last></author>
       <author><first>Jie-Shu</first><last>Wu</last></author>
       <author><first>Da-Dao</first><last>Zhu</last></author>
@@ -211,7 +211,7 @@
     <paper id="19">
       <title>Extracting Semantic Representations of Sexual Biases from Word Vectors</title>
       <author><first>Ying-Yu</first><last>Chen</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <pages>192–201</pages>
       <url hash="6dfd2bb6">2019.rocling-1.19</url>
       <bibkey>chen-hsieh-2019-extracting</bibkey>
@@ -242,7 +242,7 @@
       <author><first>Kuan-Yi</first><last>Liu</last></author>
       <author><first>Syu-Siang</first><last>Wang</last></author>
       <author><first>Yu</first><last>Tsao</last></author>
-      <author><first>Jeih-weih</first><last>Hung</last></author>
+      <author id="jeih-weih-hung"><first>Jeih-weih</first><last>Hung</last></author>
       <pages>226–240</pages>
       <url hash="b4138af7">2019.rocling-1.22</url>
       <bibkey>liu-etal-2019-speech</bibkey>
@@ -252,7 +252,7 @@
       <author><first>Wen-Chao</first><last>Yeh</last></author>
       <author><first>Yu-Lun</first><last>Hsieh</last></author>
       <author><first>Yung-Chun</first><last>Chang</last></author>
-      <author><first>Wen-Lian</first><last>Hsu</last></author>
+      <author id="wen-lian-hsu"><first>Wen-Lian</first><last>Hsu</last></author>
       <pages>241–245</pages>
       <url hash="f11c6781">2019.rocling-1.23</url>
       <language>zho</language>
@@ -301,7 +301,7 @@
       <title>Influences of Prosodic Feature Replacement on the Perceived Singing Voice Identity</title>
       <author><first>Kuan-Yi</first><last>Kang</last></author>
       <author><first>Yi-Wen</first><last>Liu</last></author>
-      <author><first>Hsin-Min</first><last>Wang</last></author>
+      <author id="hsin-min-wang"><first>Hsin-Min</first><last>Wang</last></author>
       <pages>296–309</pages>
       <url hash="c76727dc">2019.rocling-1.28</url>
       <bibkey>kang-etal-2019-influences</bibkey>
@@ -322,10 +322,10 @@
     </paper>
     <paper id="30">
       <title>Building of children speech corpus for improving automatic subtitling services</title>
-      <author><first>Matus</first><last>Pleva</last></author>
+      <author id="matus-pleva"><first>Matus</first><last>Pleva</last></author>
       <author><first>Stanislav</first><last>Ondas</last></author>
       <author><first>Daniel</first><last>Hládek</last></author>
-      <author><first>Jozef</first><last>Juhar</last></author>
+      <author id="jozef-juhar"><first>Jozef</first><last>Juhar</last></author>
       <author><first>Ján</first><last>Staš</last></author>
       <author><first>Yuan-Fu</first><last>Liao</last></author>
       <pages>325–333</pages>
@@ -336,7 +336,7 @@
       <title>基於階層式編碼架構之文本可讀性預測(A Hierarchical Encoding Framework for Text Readability Prediction)</title>
       <author><first>Shi-Yan</first><last>Weng</last></author>
       <author><first>Hou-Chiang</first><last>Tseng</last></author>
-      <author><first>Yao-Ting</first><last>Sung</last></author>
+      <author id="yao-ting-sung"><first>Yao-Ting</first><last>Sung</last></author>
       <author><first>Berlin</first><last>Chen</last></author>
       <pages>334–342</pages>
       <url hash="a5f2dc18">2019.rocling-1.31</url>
@@ -346,7 +346,7 @@
     <paper id="32">
       <title>國語語音辨識系統中之人名語言模型(The Personal Name Modeling in <fixed-case>M</fixed-case>andarin <fixed-case>ASR</fixed-case> System)</title>
       <author><first>Hong-Bin</first><last>Liang</last></author>
-      <author><first>Yih-Ru</first><last>Wang</last></author>
+      <author id="yih-ru-wang"><first>Yih-Ru</first><last>Wang</last></author>
       <pages>343–357</pages>
       <url hash="4d158d3c">2019.rocling-1.32</url>
       <language>zho</language>
@@ -393,7 +393,7 @@
     </paper>
     <paper id="37">
       <title>Four-word Idioms Containing Opposites in <fixed-case>M</fixed-case>andarin</title>
-      <author><first>Siaw-Fong</first><last>Chung</last></author>
+      <author id="siaw-fong-chung"><first>Siaw-Fong</first><last>Chung</last></author>
       <pages>398–407</pages>
       <url hash="1e252d89">2019.rocling-1.37</url>
       <bibkey>chung-2019-four</bibkey>
@@ -404,7 +404,7 @@
       <author><first>Ching-Yu Helen</first><last>Yang</last></author>
       <author><first>Ying-Zhu</first><last>Chen</last></author>
       <author><first>Jhih-Jie</first><last>Chen</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>408–422</pages>
       <url hash="cad4cded">2019.rocling-1.38</url>
       <language>zho</language>
@@ -423,7 +423,7 @@
       <title>標註英中同步樣式文法之研究(Annotating Synchronous Grammar Patterns across <fixed-case>E</fixed-case>nglish and <fixed-case>C</fixed-case>hinese)</title>
       <author><first>Ching-Yu Helen</first><last>Yang</last></author>
       <author><first>Ying-Zhu</first><last>Chen</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <author><first>Yi-Chien</first><last>Lin</last></author>
       <author><first>Wei-Tien Dylan</first><last>Tsai</last></author>
       <pages>424–433</pages>
diff --git a/data/xml/2019.tal.xml b/data/xml/2019.tal.xml
index aa3b53069b..0da6c8b182 100644
--- a/data/xml/2019.tal.xml
+++ b/data/xml/2019.tal.xml
@@ -3,9 +3,9 @@
   <volume id="1" ingest-date="2023-03-16" type="journal">
     <meta>
       <booktitle>Traitement Automatique des Langues, Volume 60, Numéro 1 : Varia [Varia]</booktitle>
-      <editor><first>Cécile</first><last>Fabre</last></editor>
+      <editor id="cecile-fabre"><first>Cécile</first><last>Fabre</last></editor>
       <editor><first>Emmanuel</first><last>Morin</last></editor>
-      <editor><first>Sophie</first><last>Rosset</last></editor>
+      <editor id="sophie-rosset"><first>Sophie</first><last>Rosset</last></editor>
       <editor><first>Pascale</first><last>Sébillot</last></editor>
       <publisher>ATALA (Association pour le Traitement Automatique des Langues)</publisher>
       <address>France</address>
@@ -32,8 +32,8 @@
   <volume id="2" ingest-date="2023-03-16" type="journal">
     <meta>
       <booktitle>Traitement Automatique des Langues, Volume 60, Numéro 2 : Corpus annotés [Annotated corpora]</booktitle>
-      <editor><first>Marie</first><last>Candito</last></editor>
-      <editor><first>Mark</first><last>Liberman</last></editor>
+      <editor id="marie-candito"><first>Marie</first><last>Candito</last></editor>
+      <editor id="mark-liberman"><first>Mark</first><last>Liberman</last></editor>
       <publisher>ATALA (Association pour le Traitement Automatique des Langues)</publisher>
       <address>France</address>
       <year>2019</year>
@@ -55,7 +55,7 @@
     </paper>
     <paper id="2">
       <title>Un corpus arboré pour le français : le <fixed-case>F</fixed-case>rench Treebank [A parsed corpus for <fixed-case>F</fixed-case>rench: the <fixed-case>F</fixed-case>rench treebank]</title>
-      <author><first>Anne</first><last>Abeillé</last></author>
+      <author id="anne-abeille"><first>Anne</first><last>Abeillé</last></author>
       <author><first>Lionel</first><last>Clément</last></author>
       <author><first>Loïc</first><last>Liégeois</last></author>
       <pages>19–43</pages>
@@ -67,7 +67,7 @@
       <title>Redonner du sens à l’accord interannotateur : vers une interprétation des mesures d’accord en termes de reproductibilité de l’annotation [Interpreting inter-annotator agreement measures : towards an interpretation in terms of annotation reproducibility]</title>
       <author><first>Dany</first><last>Bregeon</last></author>
       <author><first>Jean-Yves</first><last>Antoine</last></author>
-      <author><first>Jeanne</first><last>Villaneau</last></author>
+      <author id="jeanne-villaneau"><first>Jeanne</first><last>Villaneau</last></author>
       <author><first>Anaïs</first><last>Halftermeyer</last></author>
       <pages>45–69</pages>
       <url hash="4d2d0a16">2019.tal-2.3</url>
@@ -77,7 +77,7 @@
     <paper id="4">
       <title>Conversion et améliorations de corpus du français annotés en <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies [Conversion and Improvement of <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies <fixed-case>F</fixed-case>rench corpora]</title>
       <author><first>Bruno</first><last>Guillaume</last></author>
-      <author><first>Marie-Catherine</first><last>de Marneffe</last></author>
+      <author id="marie-catherine-de-marneffe"><first>Marie-Catherine</first><last>de Marneffe</last></author>
       <author><first>Guy</first><last>Perrier</last></author>
       <pages>71–95</pages>
       <url hash="a86587fa">2019.tal-2.4</url>
@@ -112,7 +112,7 @@
     </paper>
     <paper id="2">
       <title>Transcription automatique et segmentation thématique de livres d’heures manuscrits [Automatic transcription and thematic segmentation of Books of Hours]</title>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <author><first>Amir</first><last>Hazem</last></author>
       <author><first>Christopher</first><last>Kermorvant</last></author>
       <author><first>Martin</first><last>Maarand</last></author>
diff --git a/data/xml/2020.aacl.xml b/data/xml/2020.aacl.xml
index 38ffb8dd2e..b772726ce7 100644
--- a/data/xml/2020.aacl.xml
+++ b/data/xml/2020.aacl.xml
@@ -3,7 +3,7 @@
   <volume id="main" ingest-date="2020-12-02" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 1st Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 10th International Joint Conference on Natural Language Processing</booktitle>
-      <editor><first>Kam-Fai</first><last>Wong</last></editor>
+      <editor id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></editor>
       <editor><first>Kevin</first><last>Knight</last></editor>
       <editor><first>Hua</first><last>Wu</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -25,7 +25,7 @@
       <author><first>Jiajun</first><last>Zhang</last></author>
       <author><first>Lemao</first><last>Liu</last></author>
       <author><first>Guoping</first><last>Huang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>1–11</pages>
       <abstract>We propose a touch-based editing method for translation, which is more flexible than traditional keyboard-mouse-based translation postediting. This approach relies on touch actions that users perform to indicate translation errors. We present a dual-encoder model to handle the actions and generate refined translations. To mimic the user feedback, we adopt the TER algorithm comparing between draft translations and references to automatically extract the simulated actions for training data construction. Experiments on translation datasets with simulated editing actions show that our method significantly improves original translation of Transformer (up to 25.31 BLEU) and outperforms existing interactive translation methods (up to 16.64 BLEU). We also conduct experiments on post-editing dataset to further prove the robustness and effectiveness of our method.</abstract>
       <url hash="69f9ea60">2020.aacl-main.1</url>
@@ -38,7 +38,7 @@
       <author><first>Li</first><last>Dong</last></author>
       <author><first>Furu</first><last>Wei</last></author>
       <author><first>Xianling</first><last>Mao</last></author>
-      <author><first>Heyan</first><last>Huang</last></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last></author>
       <pages>12–17</pages>
       <abstract>Multilingual pretrained language models (such as multilingual BERT) have achieved impressive results for cross-lingual transfer. However, due to the constant model capacity, multilingual pre-training usually lags behind the monolingual competitors. In this work, we present two approaches to improve zero-shot cross-lingual classification, by transferring the knowledge from monolingual pretrained models to multilingual ones. Experimental results on two cross-lingual classification benchmarks show that our methods outperform vanilla multilingual fine-tuning.</abstract>
       <url hash="8441b124">2020.aacl-main.2</url>
@@ -60,7 +60,7 @@
       <title>Graph Attention Network with Memory Fusion for Aspect-level Sentiment Analysis</title>
       <author><first>Li</first><last>Yuan</last></author>
       <author><first>Jin</first><last>Wang</last></author>
-      <author><first>Liang-Chih</first><last>Yu</last></author>
+      <author id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last></author>
       <author><first>Xuejie</first><last>Zhang</last></author>
       <pages>27–36</pages>
       <abstract>Aspect-level sentiment analysis(ASC) predicts each specific aspect term’s sentiment polarity in a given text or review. Recent studies used attention-based methods that can effectively improve the performance of aspect-level sentiment analysis. These methods ignored the syntactic relationship between the aspect and its corresponding context words, leading the model to focus on syntactically unrelated words mistakenly. One proposed solution, the graph convolutional network (GCN), cannot completely avoid the problem. While it does incorporate useful information about syntax, it assigns equal weight to all the edges between connected words. It may still incorrectly associate unrelated words to the target aspect through the iterations of graph convolutional propagation. In this study, a graph attention network with memory fusion is proposed to extend GCN’s idea by assigning different weights to edges. Syntactic constraints can be imposed to block the graph convolutional propagation of unrelated words. A convolutional layer and a memory fusion were applied to learn and exploit multiword relations and draw different weights of words to improve performance further. Experimental results on five datasets show that the proposed method yields better performance than existing methods.</abstract>
@@ -134,7 +134,7 @@
       <author><first>Chenlong</first><last>Hu</last></author>
       <author><first>Hidetaka</first><last>Kamigaito</last></author>
       <author><first>Hiroya</first><last>Takamura</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>80–86</pages>
       <abstract>We propose a simple and effective method for incorporating word clusters into the Continuous Bag-of-Words (CBOW) model. Specifically, we propose to replace infrequent input and output words in CBOW model with their clusters. The resulting cluster-incorporated CBOW model produces embeddings of frequent words and a small amount of cluster embeddings, which will be fine-tuned in downstream tasks. We empirically show our replacing method works well on several downstream tasks. Through our analysis, we show that our method might be also useful for other similar models which produce word embeddings.</abstract>
       <url hash="ee96515b">2020.aacl-main.10</url>
@@ -167,7 +167,7 @@
       <title>High-order Refining for End-to-end <fixed-case>C</fixed-case>hinese Semantic Role Labeling</title>
       <author><first>Hao</first><last>Fei</last></author>
       <author><first>Yafeng</first><last>Ren</last></author>
-      <author><first>Donghong</first><last>Ji</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
       <pages>100–105</pages>
       <abstract>Current end-to-end semantic role labeling is mostly accomplished via graph-based neural models. However, these all are first-order models, where each decision for detecting any predicate-argument pair is made in isolation with local features. In this paper, we present a high-order refining mechanism to perform interaction between all predicate-argument pairs. Based on the baseline graph model, our high-order refining module learns higher-order features between all candidate pairs via attention calculation, which are later used to update the original token representations. After several iterations of refinement, the underlying token representations can be enriched with globally interacted features. Our high-order model achieves state-of-the-art results on Chinese SRL data, including CoNLL09 and Universal Proposition Bank, meanwhile relieving the long-range dependency issues.</abstract>
       <url hash="a2d1a976">2020.aacl-main.13</url>
@@ -176,7 +176,7 @@
     </paper>
     <paper id="14">
       <title>Exploiting <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Synset and Hypernym Representations for Answer Selection</title>
-      <author><first>Weikang</first><last>Li</last></author>
+      <author id="weigang-li"><first>Weikang</first><last>Li</last></author>
       <author><first>Yunfang</first><last>Wu</last></author>
       <pages>106–115</pages>
       <abstract>Answer selection (AS) is an important subtask of document-based question answering (DQA). In this task, the candidate answers come from the same document, and each answer sentence is semantically related to the given question, which makes it more challenging to select the true answer. WordNet provides powerful knowledge about concepts and their semantic relations so we employ WordNet to enrich the abilities of paraphrasing and reasoning of the network-based question answering model. Specifically, we exploit the synset and hypernym concepts to enrich the word representation and incorporate the similarity scores of two concepts that share the synset or hypernym relations into the attention mechanism. The proposed WordNet-enhanced hierarchical model (WEHM) consists of four modules, including WordNet-enhanced word representation, sentence encoding, WordNet-enhanced attention mechanism, and hierarchical document encoding. Extensive experiments on the public WikiQA and SelQA datasets demonstrate that our proposed model significantly improves the baseline system and outperforms all existing state-of-the-art methods by a large margin.</abstract>
@@ -200,7 +200,7 @@
       <author><first>Zheng</first><last>Zhang</last></author>
       <author><first>Lizi</first><last>Liao</last></author>
       <author><first>Xiaoyan</first><last>Zhu</last></author>
-      <author><first>Tat-Seng</first><last>Chua</last></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last></author>
       <author><first>Zitao</first><last>Liu</last></author>
       <author><first>Yan</first><last>Huang</last></author>
       <author><first>Minlie</first><last>Huang</last></author>
@@ -316,7 +316,7 @@
       <author><first>Weiyue</first><last>Wang</last></author>
       <author><first>Christian</first><last>Herold</last></author>
       <author><first>Zijian</first><last>Yang</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>212–223</pages>
       <abstract>In order to combat overfitting and in pursuit of better generalization, label smoothing is widely applied in modern neural machine translation systems. The core idea is to penalize over-confident outputs and regularize the model so that its outputs do not diverge too much from some prior distribution. While training perplexity generally gets worse, label smoothing is found to consistently improve test performance. In this work, we aim to better understand label smoothing in the context of neural machine translation. Theoretically, we derive and explain exactly what label smoothing is optimizing for. Practically, we conduct extensive experiments by varying which tokens to smooth, tuning the probability mass to be deducted from the true targets and considering different prior distributions. We show that label smoothing is theoretically well-motivated, and by carefully choosing hyperparameters, the practical performance of strong neural machine translation systems can be further improved.</abstract>
       <url hash="480914d7">2020.aacl-main.25</url>
@@ -387,7 +387,7 @@
       <author><first>Dushyant Singh</first><last>Chauhan</last></author>
       <author><first>Dhanush</first><last>S R</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>281–290</pages>
       <abstract>In this paper, we aim at learning the relationships and similarities of a variety of tasks, such as humour detection, sarcasm detection, offensive content detection, motivational content detection and sentiment analysis on a somewhat complicated form of information, i.e., memes. We propose a multi-task, multi-modal deep learning framework to solve multiple tasks simultaneously. For multi-tasking, we propose two attention-like mechanisms viz., Inter-task Relationship Module (iTRM) and Inter-class Relationship Module (iCRM). The main motivation of iTRM is to learn the relationship between the tasks to realize how they help each other. In contrast, iCRM develops relations between the different classes of tasks. Finally, representations from both the attentions are concatenated and shared across the five tasks (i.e., humour, sarcasm, offensive, motivational, and sentiment) for multi-tasking. We use the recently released dataset in the Memotion Analysis task @ SemEval 2020, which consists of memes annotated for the classes as mentioned above. Empirical results on Memotion dataset show the efficacy of our proposed approach over the existing state-of-the-art systems (Baseline and SemEval 2020 winner). The evaluation also indicates that the proposed multi-task framework yields better performance over the single-task learning.</abstract>
       <url hash="63d636f7">2020.aacl-main.31</url>
@@ -411,7 +411,7 @@
       <author><first>Mukuntha</first><last>Narayanan Sundararaman</last></author>
       <author><first>Zishan</first><last>Ahmad</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>303–312</pages>
       <abstract>Unsupervised style transfer in text has previously been explored through the sentiment transfer task. The task entails inverting the overall sentiment polarity in a given input sentence, while preserving its content. From the Aspect-Based Sentiment Analysis (ABSA) task, we know that multiple sentiment polarities can often be present together in a sentence with multiple aspects. In this paper, the task of aspect-level sentiment controllable style transfer is introduced, where each of the aspect-level sentiments can individually be controlled at the output. To achieve this goal, a BERT-based encoder-decoder architecture with saliency weighted polarity injection is proposed, with unsupervised training strategies, such as ABSA masked-language-modelling. Through both automatic and manual evaluation, we show that the system is successful in controlling aspect-level sentiments.</abstract>
       <url hash="ddbcd8b1">2020.aacl-main.33</url>
@@ -448,7 +448,7 @@
       <author><first>Moin</first><last>Nadeem</last></author>
       <author><first>Tianxing</first><last>He</last></author>
       <author><first>Kyunghyun</first><last>Cho</last></author>
-      <author><first>James</first><last>Glass</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
       <pages>334–346</pages>
       <abstract>This work studies the widely adopted ancestral sampling algorithms for auto-regressive language models. We use the quality-diversity (Q-D) trade-off to investigate three popular sampling methods (top-k, nucleus and tempered sampling). We focus on the task of open-ended language generation, and first show that the existing sampling algorithms have similar performance. By carefully inspecting the transformations defined by different sampling algorithms, we identify three key properties that are shared among them: entropy reduction, order preservation, and slope preservation. To validate the importance of the identified properties, we design two sets of new sampling methods: one set in which each algorithm satisfies all three properties, and one set in which each algorithm violates at least one of the properties. We compare their performance with existing algorithms, and find that violating the identified properties could lead to drastic performance degradation, as measured by the Q-D trade-off. On the other hand, we find that the set of sampling algorithms that satisfy these properties performs on par with the existing sampling algorithms.</abstract>
       <url hash="7fea9f82">2020.aacl-main.36</url>
@@ -474,7 +474,7 @@
       <author><first>Renxuan Albert</first><last>Li</last></author>
       <author><first>Ihab</first><last>Hajjar</last></author>
       <author><first>Felicia</first><last>Goldstein</last></author>
-      <author><first>Jinho D.</first><last>Choi</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
       <pages>358–365</pages>
       <abstract>This paper presents a new dataset, B-SHARP, that can be used to develop NLP models for the detection of Mild Cognitive Impairment (MCI) known as an early sign of Alzheimer’s disease. Our dataset contains 1-2 min speech segments from 326 human subjects for 3 topics, (1) daily activity, (2) room environment, and (3) picture description, and their transcripts so that a total of 650 speech segments are collected. Given the B-SHARP dataset, several hierarchical text classification models are developed that jointly learn combinatory features across all 3 topics. The best performance of 74.1% is achieved by an ensemble model that adapts 3 types of transformer encoders. To the best of our knowledge, this is the first work that builds deep learning-based text classification models on multiple contents for the detection of MCI.</abstract>
       <url hash="573e8e8c">2020.aacl-main.38</url>
@@ -485,11 +485,11 @@
       <title>An Exploratory Study on Multilingual Quality Estimation</title>
       <author><first>Shuo</first><last>Sun</last></author>
       <author><first>Marina</first><last>Fomicheva</last></author>
-      <author><first>Frédéric</first><last>Blain</last></author>
+      <author id="frederic-blain"><first>Frédéric</first><last>Blain</last></author>
       <author><first>Vishrav</first><last>Chaudhary</last></author>
       <author><first>Ahmed</first><last>El-Kishky</last></author>
       <author><first>Adithya</first><last>Renduchintala</last></author>
-      <author><first>Francisco</first><last>Guzmán</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzmán</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>366–377</pages>
       <abstract>Predicting the quality of machine translation has traditionally been addressed with language-specific models, under the assumption that the quality label distribution or linguistic features exhibit traits that are not shared across languages. An obvious disadvantage of this approach is the need for labelled data for each given language pair. We challenge this assumption by exploring different approaches to multilingual Quality Estimation (QE), including using scores from translation models. We show that these outperform single-language models, particularly in less balanced quality label distributions and low-resource settings. In the extreme case of zero-shot QE, we show that it is possible to accurately predict quality for any given new language from models trained on other languages. Our findings indicate that state-of-the-art neural QE models based on powerful pre-trained representations generalise well across languages, making them more applicable in real-world settings.</abstract>
@@ -500,7 +500,7 @@
     <paper id="40">
       <title><fixed-case>E</fixed-case>nglish-to-<fixed-case>C</fixed-case>hinese Transliteration with Phonetic Auxiliary Task</title>
       <author><first>Yuan</first><last>He</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <pages>378–388</pages>
       <abstract>Approaching named entities transliteration as a Neural Machine Translation (NMT) problem is common practice. While many have applied various NMT techniques to enhance machine transliteration models, few focus on the linguistic features particular to the relevant languages. In this paper, we investigate the effect of incorporating phonetic features for English-to-Chinese transliteration under the multi-task learning (MTL) setting—where we define a phonetic auxiliary task aimed to improve the generalization performance of the main transliteration task. In addition to our system, we also release a new English-to-Chinese dataset and propose a novel evaluation metric which considers multiple possible transliterations given a source name. Our results show that the multi-task model achieves similar performance as the previous state of the art with a model of a much smaller size.</abstract>
       <url hash="2bfc4e0f">2020.aacl-main.40</url>
@@ -512,7 +512,7 @@
       <author><first>Zijian</first><last>Yang</last></author>
       <author><first>Yingbo</first><last>Gao</last></author>
       <author><first>Weiyue</first><last>Wang</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>389–395</pages>
       <abstract>Attention-based encoder-decoder models have achieved great success in neural machine translation tasks. However, the lengths of the target sequences are not explicitly predicted in these models. This work proposes length prediction as an auxiliary task and set up a sub-network to obtain the length information from the encoder. Experimental results show that the length prediction sub-network brings improvements over the strong baseline system and that the predicted length can be used as an alternative to length normalization during decoding.</abstract>
       <url hash="f6b395e0">2020.aacl-main.41</url>
@@ -533,7 +533,7 @@
       <title>Heads-up! Unsupervised Constituency Parsing via Self-Attention Heads</title>
       <author><first>Bowen</first><last>Li</last></author>
       <author><first>Taeuk</first><last>Kim</last></author>
-      <author><first>Reinald Kim</first><last>Amplayo</last></author>
+      <author id="reinald-kim-amplayo"><first>Reinald Kim</first><last>Amplayo</last></author>
       <author><first>Frank</first><last>Keller</last></author>
       <pages>409–424</pages>
       <abstract>Transformer-based pre-trained language models (PLMs) have dramatically improved the state of the art in NLP across many tasks. This has led to substantial interest in analyzing the syntactic knowledge PLMs learn. Previous approaches to this question have been limited, mostly using test suites or probes. Here, we propose a novel fully unsupervised parsing approach that extracts constituency trees from PLM attention heads. We rank transformer attention heads based on their inherent properties, and create an ensemble of high-ranking heads to produce the final tree. Our method is adaptable to low-resource languages, as it does not rely on development sets, which can be expensive to annotate. Our experiments show that the proposed method often outperform existing approaches if there is no development set present. Our unsupervised parser can also be used as a tool to analyze the grammars PLMs learn implicitly. For this, we use the parse trees induced by our method to train a neural PCFG and compare it to a grammar derived from a human-annotated treebank.</abstract>
@@ -545,7 +545,7 @@
       <title>Building Location Embeddings from Physical Trajectories and Textual Representations</title>
       <author><first>Laura</first><last>Biester</last></author>
       <author><first>Carmen</first><last>Banea</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>425–434</pages>
       <abstract>Word embedding methods have become the de-facto way to represent words, having been successfully applied to a wide array of natural language processing tasks. In this paper, we explore the hypothesis that embedding methods can also be effectively used to represent spatial locations. Using a new dataset consisting of the location trajectories of 729 students over a seven month period and text data related to those locations, we implement several strategies to create location embeddings, which we then use to create embeddings of the sequences of locations a student has visited. To identify the surface level properties captured in the representations, we propose a number of probing tasks such as the presence of a specific location in a sequence or the type of activities that take place at a location. We then leverage the representations we generated and employ them in more complex downstream tasks ranging from predicting a student’s area of study to a student’s depression level, showing the effectiveness of these location embeddings.</abstract>
       <url hash="59322e4b">2020.aacl-main.44</url>
@@ -609,7 +609,7 @@
       <title>Systematic Generalization on g<fixed-case>SCAN</fixed-case> with Language Conditioned Embedding</title>
       <author><first>Tong</first><last>Gao</last></author>
       <author><first>Qi</first><last>Huang</last></author>
-      <author><first>Raymond</first><last>Mooney</last></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last></author>
       <pages>491–503</pages>
       <abstract>Systematic Generalization refers to a learning algorithm’s ability to extrapolate learned behavior to unseen situations that are distinct but semantically similar to its training data. As shown in recent work, state-of-the-art deep learning models fail dramatically even on tasks for which they are designed when the test set is systematically different from the training data. We hypothesize that explicitly modeling the relations between objects in their contexts while learning their representations will help achieve systematic generalization. Therefore, we propose a novel method that learns objects’ contextualized embeddings with dynamic message passing conditioned on the input natural language and end-to-end trainable with other downstream deep learning modules. To our knowledge, this model is the first one that significantly outperforms the provided baseline and reaches state-of-the-art performance on grounded SCAN (gSCAN), a grounded natural language navigation dataset designed to require systematic generalization in its test splits.</abstract>
       <url hash="545b660a">2020.aacl-main.49</url>
@@ -619,7 +619,7 @@
     <paper id="50">
       <title>Are Scene Graphs Good Enough to Improve Image Captioning?</title>
       <author><first>Victor</first><last>Milewski</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <author><first>Iacer</first><last>Calixto</last></author>
       <pages>504–515</pages>
       <abstract>Many top-performing image captioning models rely solely on object features computed with an object detection model to generate image descriptions. However, recent studies propose to directly use scene graphs to introduce information about object relations into captioning, hoping to better describe interactions between objects. In this work, we thoroughly investigate the use of scene graphs in image captioning. We empirically study whether using additional scene graph encoders can lead to better image descriptions and propose a conditional graph attention network (C-GAT), where the image captioning decoder state is used to condition the graph updates. Finally, we determine to what extent noise in the predicted scene graphs influence caption quality. Overall, we find no significant difference between models that use scene graph features and models that only use object detection features across different captioning metrics, which suggests that existing scene graph generation models are still too noisy to be useful in image captioning. Moreover, although the quality of predicted scene graphs is very low in general, when using high quality scene graphs we obtain gains of up to 3.3 CIDEr compared to a strong Bottom-Up Top-Down baseline.</abstract>
@@ -656,7 +656,7 @@
       <author><first>Chenguang</first><last>Zhu</last></author>
       <author><first>Yu</first><last>Shi</last></author>
       <author><first>Michael</first><last>Zeng</last></author>
-      <author><first>Xuedong</first><last>Huang</last></author>
+      <author id="xuedong-huang"><first>Xuedong</first><last>Huang</last></author>
       <pages>536–541</pages>
       <abstract>Cross-lingual Summarization (CLS) aims at producing a summary in the target language for an article in the source language. Traditional solutions employ a two-step approach, i.e. translate -&gt; summarize or summarize -&gt; translate. Recently, end-to-end models have achieved better results, but these approaches are mostly limited by their dependence on large-scale labeled data. We propose a solution based on mixed-lingual pre-training that leverages both cross-lingual tasks such as translation and monolingual tasks like masked language models. Thus, our model can leverage the massive monolingual data to enhance its modeling of language. Moreover, the architecture has no task-specific components, which saves memory and increases optimization efficiency. We show in experiments that this pre-training scheme can effectively boost the performance of cross-lingual summarization. In NCLS dataset, our model achieves an improvement of 2.82 (English to Chinese) and 1.15 (Chinese to English) ROUGE-1 scores over state-of-the-art results.</abstract>
       <url hash="9204b96a">2020.aacl-main.53</url>
@@ -677,11 +677,11 @@
     <paper id="55">
       <title>Leveraging Structured Metadata for Improving Question Answering on the Web</title>
       <author><first>Xinya</first><last>Du</last></author>
-      <author><first>Ahmed Hassan</first><last>Awadallah</last></author>
+      <author id="ahmed-hassan"><first>Ahmed Hassan</first><last>Awadallah</last></author>
       <author><first>Adam</first><last>Fourney</last></author>
       <author><first>Robert</first><last>Sim</last></author>
-      <author><first>Paul</first><last>Bennett</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="paul-bennett"><first>Paul</first><last>Bennett</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>551–556</pages>
       <abstract>We show that leveraging metadata information from web pages can improve the performance of models for answer passage selection/reranking. We propose a neural passage selection model that leverages metadata information with a fine-grained encoding strategy, which learns the representation for metadata predicates in a hierarchical way. The models are evaluated on the MS MARCO (Nguyen et al., 2016) and Recipe-MARCO datasets. Results show that our models significantly outperform baseline models, which do not incorporate metadata. We also show that the fine-grained encoding’s advantage over other strategies for encoding the metadata.</abstract>
       <url hash="f361ccd8">2020.aacl-main.55</url>
@@ -696,8 +696,8 @@
       <author><first>Yada</first><last>Pruksachatkun</last></author>
       <author><first>Haokun</first><last>Liu</last></author>
       <author><first>Clara</first><last>Vania</last></author>
-      <author><first>Katharina</first><last>Kann</last></author>
-      <author><first>Samuel R.</first><last>Bowman</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
+      <author id="samuel-bowman"><first>Samuel R.</first><last>Bowman</last></author>
       <pages>557–575</pages>
       <abstract>Intermediate-task training—fine-tuning a pretrained model on an intermediate task before fine-tuning again on the target task—often improves model performance substantially on language understanding tasks in monolingual English settings. We investigate whether English intermediate-task training is still helpful on non-English target tasks. Using nine intermediate language-understanding tasks, we evaluate intermediate-task transfer in a zero-shot cross-lingual setting on the XTREME benchmark. We see large improvements from intermediate training on the BUCC and Tatoeba sentence retrieval tasks and moderate improvements on question-answering target tasks. MNLI, SQuAD and HellaSwag achieve the best overall results as intermediate tasks, while multi-task intermediate offers small additional improvements. Using our best intermediate-task models for each target task, we obtain a 5.4 point improvement over XLM-R Large on the XTREME benchmark, setting the state of the art as of June 2020. We also investigate continuing multilingual MLM during intermediate-task training and using machine-translated intermediate-task data, but neither consistently outperforms simply performing English intermediate-task training.</abstract>
       <url hash="52e751af">2020.aacl-main.56</url>
@@ -739,7 +739,7 @@
       <title>Liputan6: A Large-scale <fixed-case>I</fixed-case>ndonesian Dataset for Text Summarization</title>
       <author><first>Fajri</first><last>Koto</last></author>
       <author><first>Jey Han</first><last>Lau</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>598–608</pages>
       <abstract>In this paper, we introduce a large-scale Indonesian summarization dataset. We harvest articles from Liputan6.com, an online news portal, and obtain 215,827 document–summary pairs. We leverage pre-trained language models to develop benchmark extractive and abstractive summarization methods over the dataset with multilingual and monolingual BERT-based models. We include a thorough error analysis by examining machine-generated summaries that have low ROUGE scores, and expose both issues with ROUGE itself, as well as with extractive and abstractive summarization models.</abstract>
       <url hash="46725b36">2020.aacl-main.60</url>
@@ -760,7 +760,7 @@
     <paper id="62">
       <title>Massively Multilingual Document Alignment with Cross-lingual Sentence-Mover’s Distance</title>
       <author><first>Ahmed</first><last>El-Kishky</last></author>
-      <author><first>Francisco</first><last>Guzmán</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzmán</last></author>
       <pages>616–625</pages>
       <abstract>Document alignment aims to identify pairs of documents in two distinct languages that are of comparable content or translations of each other. Such aligned data can be used for a variety of NLP tasks from training cross-lingual representations to mining parallel data for machine translation. In this paper we develop an unsupervised scoring function that leverages cross-lingual sentence embeddings to compute the semantic distance between documents in different languages. These semantic distances are then used to guide a document alignment algorithm to properly pair cross-lingual web documents across a variety of low, mid, and high-resource language pairs. Recognizing that our proposed scoring function and other state of the art methods are computationally intractable for long web documents, we utilize a more tractable greedy algorithm that performs comparably. We experimentally demonstrate that our distance metric performs better alignment than current baselines outperforming them by 7% on high-resource language pairs, 15% on mid-resource language pairs, and 22% on low-resource language pairs.</abstract>
       <url hash="fa121ac9">2020.aacl-main.62</url>
@@ -795,7 +795,7 @@
       <title><fixed-case>DAPPER</fixed-case>: Learning Domain-Adapted Persona Representation Using Pretrained <fixed-case>BERT</fixed-case> and External Memory</title>
       <author><first>Prashanth</first><last>Vijayaraghavan</last></author>
       <author><first>Eric</first><last>Chu</last></author>
-      <author><first>Deb</first><last>Roy</last></author>
+      <author id="deb-roy"><first>Deb</first><last>Roy</last></author>
       <pages>643–652</pages>
       <abstract>Research in building intelligent agents have emphasized the need for understanding characteristic behavior of people. In order to reflect human-like behavior, agents require the capability to comprehend the context, infer individualized persona patterns and incrementally learn from experience. In this paper, we present a model called DAPPER that can learn to embed persona from natural language and alleviate task or domain-specific data sparsity issues related to personas. To this end, we implement a text encoding strategy that leverages a pretrained language model and an external memory to produce domain-adapted persona representations. Further, we evaluate the transferability of these embeddings by simulating low-resource scenarios. Our comparative study demonstrates the capability of our method over other approaches towards learning rich transferable persona embeddings. Empirical evidence suggests that the learnt persona embeddings can be effective in downstream tasks like hate speech detection.</abstract>
       <url hash="74c10035">2020.aacl-main.65</url>
@@ -828,7 +828,7 @@
       <title><fixed-case>A</fixed-case>sking <fixed-case>C</fixed-case>rowdworkers to <fixed-case>W</fixed-case>rite <fixed-case>E</fixed-case>ntailment <fixed-case>E</fixed-case>xamples: <fixed-case>T</fixed-case>he <fixed-case>B</fixed-case>est of <fixed-case>B</fixed-case>ad Options</title>
       <author><first>Clara</first><last>Vania</last></author>
       <author><first>Ruijie</first><last>Chen</last></author>
-      <author><first>Samuel R.</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel R.</first><last>Bowman</last></author>
       <pages>672–686</pages>
       <abstract>Large-scale natural language inference (NLI) datasets such as SNLI or MNLI have been created by asking crowdworkers to read a premise and write three new hypotheses, one for each possible semantic relationships (entailment, contradiction, and neutral). While this protocol has been used to create useful benchmark data, it remains unclear whether the writing-based annotation protocol is optimal for any purpose, since it has not been evaluated directly. Furthermore, there is ample evidence that crowdworker writing can introduce artifacts in the data. We investigate two alternative protocols which automatically create candidate (premise, hypothesis) pairs for annotators to label. Using these protocols and a writing-based baseline, we collect several new English NLI datasets of over 3k examples each, each using a fixed amount of annotator time, but a varying number of examples to fit that time budget. Our experiments on NLI and transfer learning show negative results: None of the alternative protocols outperforms the baseline in evaluations of generalization within NLI or on transfer to outside target tasks. We conclude that crowdworker writing still the best known option for entailment data, highlighting the need for further data collection work to focus on improving writing-based annotation processes.</abstract>
       <url hash="310210a4">2020.aacl-main.68</url>
@@ -867,8 +867,8 @@
       <author><first>Haimin</first><last>Zhang</last></author>
       <author><first>Debanjan</first><last>Mahata</last></author>
       <author><first>Rakesh</first><last>Gosangi</last></author>
-      <author><first>Rajiv Ratn</first><last>Shah</last></author>
-      <author><first>Amanda</first><last>Stent</last></author>
+      <author id="rajiv-shah"><first>Rajiv Ratn</first><last>Shah</last></author>
+      <author id="amanda-stent"><first>Amanda</first><last>Stent</last></author>
       <pages>706–719</pages>
       <abstract>An NLP model’s ability to reason should be independent of language. Previous works utilize Natural Language Inference (NLI) to understand the reasoning ability of models, mostly focusing on high resource languages like English. To address scarcity of data in low-resource languages such as Hindi, we use data recasting to create NLI datasets for four existing text classification datasets. Through experiments, we show that our recasted dataset is devoid of statistical irregularities and spurious patterns. We further study the consistency in predictions of the textual entailment models and propose a consistency regulariser to remove pairwise-inconsistencies in predictions. We propose a novel two-step classification method which uses textual-entailment predictions for classification task. We further improve the performance by using a joint-objective for classification and textual entailment. We therefore highlight the benefits of data recasting and improvements on classification performance using our approach with supporting experimental results.</abstract>
       <url hash="a91456ff">2020.aacl-main.71</url>
@@ -880,7 +880,7 @@
       <author><first>Keng-Te</first><last>Liao</last></author>
       <author><first>Cheng-Syuan</first><last>Lee</last></author>
       <author><first>Zhong-Yu</first><last>Huang</last></author>
-      <author><first>Shou-de</first><last>Lin</last></author>
+      <author id="shou-de-lin"><first>Shou-de</first><last>Lin</last></author>
       <pages>720–725</pages>
       <abstract>Disentangled representations have attracted increasing attention recently. However, how to transfer the desired properties of disentanglement to word representations is unclear. In this work, we propose to transform typical dense word vectors into disentangled embeddings featuring improved interpretability via encoding polysemous semantics separately. We also found the modular structure of our disentangled word embeddings helps generate more efficient and effective features for natural language processing tasks.</abstract>
       <url hash="18e4371f">2020.aacl-main.72</url>
@@ -893,7 +893,7 @@
       <author><first>Mengfei</first><last>Guo</last></author>
       <author><first>Yufeng</first><last>Chen</last></author>
       <author><first>Ying</first><last>Li</last></author>
-      <author><first>Jinan</first><last>Xu</last></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last></author>
       <author><first>Yajuan</first><last>Lyu</last></author>
       <author><first>Yong</first><last>Zhu</last></author>
       <pages>726–734</pages>
@@ -985,7 +985,7 @@
     <paper id="80">
       <title>Point-of-Interest Type Inference from Social Media Text</title>
       <author><first>Danae</first><last>Sánchez Villegas</last></author>
-      <author><first>Daniel</first><last>Preotiuc-Pietro</last></author>
+      <author id="daniel-preotiuc-pietro"><first>Daniel</first><last>Preotiuc-Pietro</last></author>
       <author><first>Nikolaos</first><last>Aletras</last></author>
       <pages>804–810</pages>
       <abstract>Physical places help shape how we perceive the experiences we have there. We study the relationship between social media text and the type of the place from where it was posted, whether a park, restaurant, or someplace else. To facilitate this, we introduce a novel data set of ~200,000 English tweets published from 2,761 different points-of-interest in the U.S., enriched with place type information. We train classifiers to predict the type of the location a tweet was sent from that reach a macro F1 of 43.67 across eight classes and uncover the linguistic markers associated with each type of place. The ability to predict semantic place information from a tweet has applications in recommendation systems, personalization services and cultural geography.</abstract>
@@ -1039,7 +1039,7 @@
       <author><first>Mingyu</first><last>Wan</last></author>
       <author><first>Qi</first><last>Su</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <pages>833–842</pages>
       <abstract>Mandarin Alphabetical Word (MAW) is one indispensable component of Modern Chinese that demonstrates unique code-mixing idiosyncrasies influenced by language exchanges. Yet, this interesting phenomenon has not been properly addressed and is mostly excluded from the Chinese language system. This paper addresses the core problem of MAW identification and proposes to construct a large collection of MAWs from Sina Weibo (SMAW) using an automatic web-based technique which includes rule-based identification, informatics-based extraction, as well as Baidu search engine validation. A collection of 16,207 qualified SMAWs are obtained using this technique along with an annotated corpus of more than 200,000 sentences for linguistic research and applicable inquiries.</abstract>
       <url hash="1e6f05d4">2020.aacl-main.84</url>
@@ -1051,7 +1051,7 @@
       <title><fixed-case>I</fixed-case>ndo<fixed-case>NLU</fixed-case>: Benchmark and Resources for Evaluating <fixed-case>I</fixed-case>ndonesian Natural Language Understanding</title>
       <author><first>Bryan</first><last>Wilie</last></author>
       <author><first>Karissa</first><last>Vincentio</last></author>
-      <author><first>Genta Indra</first><last>Winata</last></author>
+      <author id="genta-indra-winata"><first>Genta Indra</first><last>Winata</last></author>
       <author><first>Samuel</first><last>Cahyawijaya</last></author>
       <author><first>Xiaohong</first><last>Li</last></author>
       <author><first>Zhi Yuan</first><last>Lim</last></author>
@@ -1072,7 +1072,7 @@
       <author><first>Rudra</first><last>Murthy</last></author>
       <author><first>Diptesh</first><last>Kanojia</last></author>
       <author><first>Abhijit</first><last>Mishra</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>858–872</pages>
       <abstract>The gaze behaviour of a reader is helpful in solving several NLP tasks such as automatic essay grading. However, collecting gaze behaviour from readers is costly in terms of time and money. In this paper, we propose a way to improve automatic essay grading using gaze behaviour, which is learnt at run time using a multi-task learning framework. To demonstrate the efficacy of this multi-task learning based approach to automatic essay grading, we collect gaze behaviour for 48 essays across 4 essay sets, and learn gaze behaviour for the rest of the essays, numbering over 7000 essays. Using the learnt gaze behaviour, we can achieve a statistically significant improvement in performance over the state-of-the-art system for the essay sets where we have gaze data. We also achieve a statistically significant improvement for 4 other essay sets, numbering about 6000 essays, where we have no gaze behaviour data available. Our approach establishes that learning gaze behaviour improves automatic essay grading.</abstract>
       <url hash="e95c666d">2020.aacl-main.86</url>
@@ -1105,7 +1105,7 @@
       <author><first>Masato</first><last>Neishi</last></author>
       <author><first>Yuta</first><last>Hayashibe</last></author>
       <author><first>Hiroki</first><last>Ouchi</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <pages>890–899</pages>
       <abstract>Explainable recommendation is a good way to improve user satisfaction. However, explainable recommendation in dialogue is challenging since it has to handle natural language as both input and output. To tackle the challenge, this paper proposes a novel and practical task to explain evidences in recommending hotels given vague requests expressed freely in natural language. We decompose the process into two subtasks on hotel reviews: Evidence Identification and Evidence Explanation. The former predicts whether or not a sentence contains evidence that expresses why a given request is satisfied. The latter generates a recommendation sentence given a request and an evidence sentence. In order to address these subtasks, we build an Evidence-based Explanation dataset, which is the largest dataset for explaining evidences in recommending hotels for vague requests. The experimental results demonstrate that the BERT model can find evidence sentences with respect to various vague requests and that the LSTM-based model can generate recommendation sentences.</abstract>
       <url hash="da69c7ae">2020.aacl-main.89</url>
@@ -1114,10 +1114,10 @@
     </paper>
     <paper id="90">
       <title>A Unified Framework for Multilingual and Code-Mixed Visual Question Answering</title>
-      <author><first>Deepak</first><last>Gupta</last></author>
+      <author id="deepak-gupta"><first>Deepak</first><last>Gupta</last></author>
       <author><first>Pabitra</first><last>Lenka</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>900–913</pages>
       <abstract>In this paper, we propose an effective deep learning framework for multilingual and code- mixed visual question answering. The pro- posed model is capable of predicting answers from the questions in Hindi, English or Code- mixed (Hinglish: Hindi-English) languages. The majority of the existing techniques on Vi- sual Question Answering (VQA) focus on En- glish questions only. However, many applica- tions such as medical imaging, tourism, visual assistants require a multilinguality-enabled module for their widespread usages. As there is no available dataset in English-Hindi VQA, we firstly create Hindi and Code-mixed VQA datasets by exploiting the linguistic properties of these languages. We propose a robust tech- nique capable of handling the multilingual and code-mixed question to provide the answer against the visual information (image). To better encode the multilingual and code-mixed questions, we introduce a hierarchy of shared layers. We control the behaviour of these shared layers by an attention-based soft layer sharing mechanism, which learns how shared layers are applied in different ways for the dif- ferent languages of the question. Further, our model uses bi-linear attention with a residual connection to fuse the language and image fea- tures. We perform extensive evaluation and ablation studies for English, Hindi and Code- mixed VQA. The evaluation shows that the proposed multilingual model achieves state-of- the-art performance in all these settings.</abstract>
       <url hash="55f2bbad">2020.aacl-main.90</url>
@@ -1128,8 +1128,8 @@
       <title>Toxic Language Detection in Social Media for <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese: New Dataset and Multilingual Analysis</title>
       <author><first>João Augusto</first><last>Leite</last></author>
       <author><first>Diego</first><last>Silva</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
-      <author><first>Carolina</first><last>Scarton</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last></author>
       <pages>914–924</pages>
       <abstract>Hate speech and toxic comments are a common concern of social media platform users. Although these comments are, fortunately, the minority in these platforms, they are still capable of causing harm. Therefore, identifying these comments is an important task for studying and preventing the proliferation of toxicity in social media. Previous work in automatically detecting toxic comments focus mainly in English, with very few work in languages like Brazilian Portuguese. In this paper, we propose a new large-scale dataset for Brazilian Portuguese with tweets annotated as either toxic or non-toxic or in different types of toxicity. We present our dataset collection and annotation process, where we aimed to select candidates covering multiple demographic groups. State-of-the-art BERT models were able to achieve 76% macro-F1 score using monolingual data in the binary case. We also show that large-scale monolingual data is still needed to create more accurate models, despite recent advances in multilingual approaches. An error analysis and experiments with multi-label classification show the difficulty of classifying certain types of toxic comments that appear less frequently in our data and highlights the need to develop models that are aware of different categories of toxicity.</abstract>
       <url hash="2c1bc0e5">2020.aacl-main.91</url>
@@ -1138,9 +1138,9 @@
     </paper>
     <paper id="92">
       <title>Measuring What Counts: The Case of Rumour Stance Classification</title>
-      <author><first>Carolina</first><last>Scarton</last></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last></author>
       <author><first>Diego</first><last>Silva</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
       <pages>925–932</pages>
       <abstract>Stance classification can be a powerful tool for understanding whether and which users believe in online rumours. The task aims to automatically predict the stance of replies towards a given rumour, namely support, deny, question, or comment. Numerous methods have been proposed and their performance compared in the RumourEval shared tasks in 2017 and 2019. Results demonstrated that this is a challenging problem since naturally occurring rumour stance data is highly imbalanced. This paper specifically questions the evaluation metrics used in these shared tasks. We re-evaluate the systems submitted to the two RumourEval tasks and show that the two widely adopted metrics – accuracy and macro-F1 – are not robust for the four-class imbalanced task of rumour stance classification, as they wrongly favour systems with highly skewed accuracy towards the majority class. To overcome this problem, we propose new evaluation metrics for rumour stance detection. These are not only robust to imbalanced data but also score higher systems that are capable of recognising the two most informative minority classes (support and deny).</abstract>
       <url hash="551d0d20">2020.aacl-main.92</url>
@@ -1278,7 +1278,7 @@
     <paper id="11">
       <title>Formal <fixed-case>S</fixed-case>anskrit Syntax: A Specification for Programming Language</title>
       <author><first>K. Kabi</first><last>Khanganba</last></author>
-      <author><first>Girish</first><last>Jha</last></author>
+      <author id="girish-nath-jha"><first>Girish</first><last>Jha</last></author>
       <pages>72–78</pages>
       <abstract>The paper discusses the syntax of the primary statements of the Sanskritam, a programming language specification based on natural Sanskrit under a doctoral thesis. By a statement, we mean a syntactic unit regardless of its computational operations of variable declarations, program executions or evaluations of Boolean expressions etc. We have selected six common primary statements of declaration, assignment, inline initialization, if-then-else, for loop and while loop. The specification partly overlaps the ideas of natural language programming, Controlled Natural Language (Kunh, 2013), and Natural Language subset. The practice and application of structured natural language set in a discourse are deeply rooted in the theoretical text tradition of Sanskrit, like the sūtra-based disciplines and Navya-Nyāya (NN) formal language, etc. The effort is a kind of continuation and application of such traditions and their techniques in the modern field of Sanskrit NLP.</abstract>
       <url hash="caef521b">2020.aacl-srw.11</url>
@@ -1288,7 +1288,7 @@
     <paper id="12">
       <title>Resource Creation and Evaluation of Aspect Based Sentiment Analysis in <fixed-case>U</fixed-case>rdu</title>
       <author><first>Sadaf</first><last>Rani</last></author>
-      <author><first>Muhammad Waqas</first><last>Anwar</last></author>
+      <author id="waqas-anwar"><first>Muhammad Waqas</first><last>Anwar</last></author>
       <pages>79–84</pages>
       <abstract>Along with the rise of people generated content on social sites, sentiment analysis has gained more importance. Aspect Based Sentiment Analysis (ABSA) is a task of identifying the sentiment at aspect level. It has more importance than sentiment analysis from commercial point of view. To the best of our knowledge, there is very few work on ABSA in Urdu language. Recent work on ABSA has limitations. Only predefined aspects are identified in a specific domain. So our focus is on the creation and evaluation of dataset for ABSA in Urdu language which will support multiple aspects. This dataset will provide a baseline evaluation for ABSA systems.</abstract>
       <url hash="ceb3fefd">2020.aacl-srw.12</url>
@@ -1298,7 +1298,7 @@
     <paper id="13">
       <title>Making a Point: Pointer-Generator Transformers for Disjoint Vocabularies</title>
       <author><first>Nikhil</first><last>Prabhu</last></author>
-      <author><first>Katharina</first><last>Kann</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
       <pages>85–92</pages>
       <abstract>Explicit mechanisms for copying have improved the performance of neural models for sequence-to-sequence tasks in the low-resource setting. However, they rely on an overlap between source and target vocabularies. Here, we propose a model that does not: a pointer-generator transformer for disjoint vocabularies. We apply our model to a low-resource version of the grapheme-to-phoneme conversion (G2P) task, and show that it outperforms a standard transformer by an average of 5.1 WER over 15 languages. While our model does not beat the the best performing baseline, we demonstrate that it provides complementary information to it: an oracle that combines the best outputs of the two models improves over the strongest baseline by 7.7 WER on average in the low-resource setting. In the high-resource setting, our model performs comparably to a standard transformer.</abstract>
       <url hash="3f5bdd2a">2020.aacl-srw.13</url>
@@ -1319,7 +1319,7 @@
     <paper id="15">
       <title>Document-Level Neural Machine Translation Using <fixed-case>BERT</fixed-case> as Context Encoder</title>
       <author><first>Zhiyu</first><last>Guo</last></author>
-      <author><first>Minh Le</first><last>Nguyen</last></author>
+      <author id="minh-le-nguyen"><first>Minh Le</first><last>Nguyen</last></author>
       <pages>101–107</pages>
       <abstract>Large-scale pre-trained representations such as BERT have been widely used in many natural language understanding tasks. The methods of incorporating BERT into document-level machine translation are still being explored. BERT is able to understand sentence relationships since BERT is pre-trained using the next sentence prediction task. In our work, we leverage this property to improve document-level machine translation. In our proposed model, BERT performs as a context encoder to achieve document-level contextual information, which is then integrated into both the encoder and decoder. Experiment results show that our proposed method can significantly outperform strong document-level machine translation baselines on BLEU score. Moreover, the ablation study shows our method can capture document-level context information to boost translation performance.</abstract>
       <url hash="c006c668">2020.aacl-srw.15</url>
@@ -1418,7 +1418,7 @@
       <author><first>Ankur</first><last>Sonawane</last></author>
       <author><first>Sujeet Kumar</first><last>Vishwakarma</last></author>
       <author><first>Bhavana</first><last>Srivastava</last></author>
-      <author><first>Anil</first><last>Kumar Singh</last></author>
+      <author id="anil-kumar-singh"><first>Anil</first><last>Kumar Singh</last></author>
       <pages>165–171</pages>
       <abstract>Automated grammatical error correction has been explored as an important research problem within NLP, with the majority of the work being done on English and similar resource-rich languages. Grammar correction using neural networks is a data-heavy task, with the recent state of the art models requiring datasets with millions of annotated sentences for proper training. It is difficult to find such resources for Indic languages due to their relative lack of digitized content and complex morphology, compared to English. We address this problem by generating a large corpus of artificial inflectional errors for training GEC models. Moreover, to evaluate the performance of models trained on this dataset, we create a corpus of real Hindi errors extracted from Wikipedia edits. Analyzing this dataset with a modified version of the ERRANT error annotation toolkit, we find that inflectional errors are very common in this language. Finally, we produce the initial baseline results using state of the art methods developed for English.</abstract>
       <url hash="28d700f6">2020.aacl-srw.24</url>
@@ -1461,8 +1461,8 @@
     <paper id="2">
       <title><fixed-case>A</fixed-case>uto<fixed-case>NLU</fixed-case>: An On-demand Cloud-based Natural Language Understanding System for Enterprises</title>
       <author><first>Nham</first><last>Le</last></author>
-      <author><first>Tuan</first><last>Lai</last></author>
-      <author><first>Trung</first><last>Bui</last></author>
+      <author id="tuan-lai"><first>Tuan</first><last>Lai</last></author>
+      <author id="trung-bui"><first>Trung</first><last>Bui</last></author>
       <author><first>Doo Soon</first><last>Kim</last></author>
       <pages>8–13</pages>
       <abstract>With the renaissance of deep learning, neural networks have achieved promising results on many natural language understanding (NLU) tasks. Even though the source codes of many neural network models are publicly available, there is still a large gap from open-sourced models to solving real-world problems in enterprises. Therefore, to fill this gap, we introduce AutoNLU, an on-demand cloud-based system with an easy-to-use interface that covers all common use-cases and steps in developing an NLU model. AutoNLU has supported many product teams within Adobe with different use-cases and datasets, quickly delivering them working models. To demonstrate the effectiveness of AutoNLU, we present two case studies. i) We build a practical NLU model for handling various image-editing requests in Photoshop. ii) We build powerful keyphrase extraction models that achieve state-of-the-art results on two public benchmarks. In both cases, end users only need to write a small amount of code to convert their datasets into a common format used by AutoNLU.</abstract>
@@ -1472,8 +1472,8 @@
     </paper>
     <paper id="3">
       <title><fixed-case>ISA</fixed-case>: An Intelligent Shopping Assistant</title>
-      <author><first>Tuan</first><last>Lai</last></author>
-      <author><first>Trung</first><last>Bui</last></author>
+      <author id="tuan-lai"><first>Tuan</first><last>Lai</last></author>
+      <author id="trung-bui"><first>Trung</first><last>Bui</last></author>
       <author><first>Nedim</first><last>Lipka</last></author>
       <pages>14–19</pages>
       <abstract>Despite the growth of e-commerce, brick-and-mortar stores are still the preferred destinations for many people. In this paper, we present ISA, a mobile-based intelligent shopping assistant that is designed to improve shopping experience in physical stores. ISA assists users by leveraging advanced techniques in computer vision, speech processing, and natural language processing. An in-store user only needs to take a picture or scan the barcode of the product of interest, and then the user can talk to the assistant about the product. The assistant can also guide the user through the purchase process or recommend other similar products to the user. We take a data-driven approach in building the engines of ISA’s natural language processing component, and the engines achieve good performance.</abstract>
diff --git a/data/xml/2020.acl.xml b/data/xml/2020.acl.xml
index 1a6c45f663..02e79339f1 100644
--- a/data/xml/2020.acl.xml
+++ b/data/xml/2020.acl.xml
@@ -3,10 +3,10 @@
   <volume id="main" ingest-date="2020-06-21" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics</booktitle>
-      <editor><first>Dan</first><last>Jurafsky</last></editor>
-      <editor><first>Joyce</first><last>Chai</last></editor>
+      <editor id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></editor>
+      <editor id="joyce-chai"><first>Joyce</first><last>Chai</last></editor>
       <editor><first>Natalie</first><last>Schluter</last></editor>
-      <editor><first>Joel</first><last>Tetreault</last></editor>
+      <editor id="joel-tetreault"><first>Joel</first><last>Tetreault</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Online</address>
       <month>July</month>
@@ -21,7 +21,7 @@
     <paper id="1">
       <title>Learning to Understand Child-directed and Adult-directed Speech</title>
       <author><first>Lieke</first><last>Gelderloos</last></author>
-      <author><first>Grzegorz</first><last>Chrupała</last></author>
+      <author id="grzegorz-chrupala"><first>Grzegorz</first><last>Chrupała</last></author>
       <author><first>Afra</first><last>Alishahi</last></author>
       <pages>1–6</pages>
       <abstract>Speech directed to children differs from adult-directed speech in linguistic aspects such as repetition, word choice, and sentence length, as well as in aspects of the speech signal itself, such as prosodic and phonemic variation. Human language acquisition research indicates that child-directed speech helps language learners. This study explores the effect of child-directed speech when learning to extract semantic information from speech directly. We compare the task performance of models trained on adult-directed speech (ADS) and child-directed speech (CDS). We find indications that CDS helps in the initial stages of learning, but eventually, models trained on ADS reach comparable task performance, and generalize better. The results suggest that this is at least partially due to linguistic rather than acoustic properties of the two registers, as we see the same pattern when looking at models trained on acoustically comparable synthetic speech.</abstract>
@@ -34,7 +34,7 @@
     <paper id="2">
       <title>Predicting Depression in Screening Interviews from Latent Categorization of Interview Prompts</title>
       <author><first>Alex</first><last>Rinaldi</last></author>
-      <author><first>Jean</first><last>Fox Tree</last></author>
+      <author id="jean-e-fox-tree"><first>Jean</first><last>Fox Tree</last></author>
       <author><first>Snigdha</first><last>Chaturvedi</last></author>
       <pages>7–18</pages>
       <abstract>Accurately diagnosing depression is difficult– requiring time-intensive interviews, assessments, and analysis. Hence, automated methods that can assess linguistic patterns in these interviews could help psychiatric professionals make faster, more informed decisions about diagnosis. We propose JLPC, a model that analyzes interview transcripts to identify depression while jointly categorizing interview prompts into latent categories. This latent categorization allows the model to define high-level conversational contexts that influence patterns of language in depressed individuals. We show that the proposed model not only outperforms competitive baselines, but that its latent prompt categories provide psycholinguistic insights about depression.</abstract>
@@ -46,7 +46,7 @@
     <paper id="3">
       <title><fixed-case>C</fixed-case>oach: A Coarse-to-Fine Approach for Cross-domain Slot Filling</title>
       <author><first>Zihan</first><last>Liu</last></author>
-      <author><first>Genta Indra</first><last>Winata</last></author>
+      <author id="genta-indra-winata"><first>Genta Indra</first><last>Winata</last></author>
       <author><first>Peng</first><last>Xu</last></author>
       <author><first>Pascale</first><last>Fung</last></author>
       <pages>19–25</pages>
@@ -72,10 +72,10 @@
       <title>Dialogue State Tracking with Explicit Slot Connection Modeling</title>
       <author><first>Yawen</first><last>Ouyang</last></author>
       <author><first>Moxin</first><last>Chen</last></author>
-      <author><first>Xinyu</first><last>Dai</last></author>
+      <author id="xinyu-dai"><first>Xinyu</first><last>Dai</last></author>
       <author><first>Yinggong</first><last>Zhao</last></author>
       <author><first>Shujian</first><last>Huang</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
       <pages>34–40</pages>
       <abstract>Recent proposed approaches have made promising progress in dialogue state tracking (DST). However, in multi-domain scenarios, ellipsis and reference are frequently adopted by users to express values that have been mentioned by slots from other domains. To handle these phenomena, we propose a Dialogue State Tracking with Slot Connections (DST-SC) model to explicitly consider slot correlations across different domains. Given a target slot, the slot connecting mechanism in DST-SC can infer its source slot and copy the source slot value directly, thus significantly reducing the difficulty of learning and reasoning. Experimental results verify the benefits of explicit slot connection modeling, and our model achieves state-of-the-art performance on MultiWOZ 2.0 and MultiWOZ 2.1 datasets.</abstract>
       <url hash="745c331d">2020.acl-main.5</url>
@@ -199,7 +199,7 @@
     <paper id="14">
       <title><fixed-case>T</fixed-case>rans<fixed-case>S</fixed-case>-Driven Joint Learning Architecture for Implicit Discourse Relation Recognition</title>
       <author><first>Ruifang</first><last>He</last></author>
-      <author id="jian-wang"><first>Jian</first><last>Wang</last></author>
+      <author><first>Jian</first><last>Wang</last></author>
       <author><first>Fengyu</first><last>Guo</last></author>
       <author><first>Yugui</first><last>Han</last></author>
       <pages>139–148</pages>
@@ -250,7 +250,7 @@
     </paper>
     <paper id="18">
       <title>Few-Shot <fixed-case>NLG</fixed-case> with Pre-Trained Language Model</title>
-      <author id="zhiyu-chen"><first>Zhiyu</first><last>Chen</last></author>
+      <author><first>Zhiyu</first><last>Chen</last></author>
       <author><first>Harini</first><last>Eavani</last></author>
       <author><first>Wenhu</first><last>Chen</last></author>
       <author><first>Yinyin</first><last>Liu</last></author>
@@ -312,7 +312,7 @@
     </paper>
     <paper id="23">
       <title>Pre-train and Plug-in: Flexible Conditional Text Generation with Variational Auto-Encoders</title>
-      <author><first>Yu</first><last>Duan</last></author>
+      <author id="yuguang-duan"><first>Yu</first><last>Duan</last></author>
       <author><first>Canwen</first><last>Xu</last></author>
       <author><first>Jiaxin</first><last>Pei</last></author>
       <author><first>Jialong</first><last>Han</last></author>
@@ -400,8 +400,8 @@
       <author><first>Joe</first><last>Barrow</last></author>
       <author><first>Rajiv</first><last>Jain</last></author>
       <author><first>Vlad</first><last>Morariu</last></author>
-      <author><first>Varun</first><last>Manjunatha</last></author>
-      <author><first>Douglas</first><last>Oard</last></author>
+      <author id="varun-manjunatha"><first>Varun</first><last>Manjunatha</last></author>
+      <author id="douglas-w-oard"><first>Douglas</first><last>Oard</last></author>
       <author><first>Philip</first><last>Resnik</last></author>
       <pages>313–322</pages>
       <abstract>Text segmentation aims to uncover latent structure by dividing text from a document into coherent sections. Where previous work on text segmentation considers the tasks of document segmentation and segment labeling separately, we show that the tasks contain complementary information and are best addressed jointly. We introduce Segment Pooling LSTM (S-LSTM), which is capable of jointly segmenting a document and labeling segments. In support of joint training, we develop a method for teaching the model to recover from errors by aligning the predicted and ground truth segments. We show that S-LSTM reduces segmentation error by 30% on average, while also improving segment labeling.</abstract>
@@ -471,7 +471,7 @@
       <author><first>Kehai</first><last>Chen</last></author>
       <author><first>Rui</first><last>Wang</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>358–364</pages>
       <abstract>Neural machine translation (NMT) encodes the source sentence in a universal way to generate the target sentence word-by-word. However, NMT does not consider the importance of word in the sentence meaning, for example, some words (i.e., content words) express more important meaning than others (i.e., function words). To address this limitation, we first utilize word frequency information to distinguish between content and function words in a sentence, and then design a content word-aware NMT to improve translation performance. Empirical results on the WMT14 English-to-German, WMT14 English-to-French, and WMT17 Chinese-to-English translation tasks show that the proposed methods can significantly improve the performance of Transformer-based NMT.</abstract>
       <url hash="1a69dc8f">2020.acl-main.34</url>
@@ -509,8 +509,8 @@
     <paper id="37">
       <title>Learning Source Phrase Representations for Neural Machine Translation</title>
       <author><first>Hongfei</first><last>Xu</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Qiuhui</first><last>Liu</last></author>
       <author><first>Jingyi</first><last>Zhang</last></author>
       <pages>386–396</pages>
@@ -524,8 +524,8 @@
       <title>Lipschitz Constrained Parameter Initialization for Deep Transformers</title>
       <author><first>Hongfei</first><last>Xu</last></author>
       <author><first>Qiuhui</first><last>Liu</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Jingyi</first><last>Zhang</last></author>
       <pages>397–402</pages>
       <abstract>The Transformer translation model employs residual connection and layer normalization to ease the optimization difficulties caused by its multi-layer encoder/decoder structure. Previous research shows that even with residual connection and layer normalization, deep Transformers still have difficulty in training, and particularly Transformer models with more than 12 encoder/decoder layers fail to converge. In this paper, we first empirically demonstrate that a simple modification made in the official implementation, which changes the computation order of residual connection and layer normalization, can significantly ease the optimization of deep Transformers. We then compare the subtle differences in computation order in considerable detail, and present a parameter initialization method that leverages the Lipschitz constraint on the initialization of Transformer parameters that effectively ensures training convergence. In contrast to findings in previous research we further demonstrate that with Lipschitz parameter initialization, deep Transformers with the original computation order can converge, and obtain significant BLEU improvements with up to 24 layers. In contrast to previous research which focuses on deep encoders, our approach additionally enables Transformers to also benefit from deep decoders.</abstract>
@@ -595,7 +595,7 @@
       <author><first>Gail</first><last>Weiss</last></author>
       <author><first>Yoav</first><last>Goldberg</last></author>
       <author><first>Roy</first><last>Schwartz</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <author><first>Eran</first><last>Yahav</last></author>
       <pages>443–459</pages>
       <abstract>We develop a formal hierarchy of the expressive capacity of RNN architectures. The hierarchy is based on two formal properties: space complexity, which measures the RNN’s memory, and rational recurrence, defined as whether the recurrent update can be described by a weighted finite-state machine. We place several RNN variants within this hierarchy. For example, we prove the LSTM is not rational, which formally separates it from the related QRNN (Bradbury et al., 2016). We also show how these models’ expressive capacity is expanded by stacking multiple layers or composing them with different pooling functions. Our results build on the theory of “saturated” RNNs (Merrill, 2019). While formally extending these findings to unsaturated RNNs is left to future work, we hypothesize that the practical learnable capacity of unsaturated RNNs obeys a similar hierarchy. We provide empirical results to support this conjecture. Experimental findings from training unsaturated networks on formal languages support this conjecture.</abstract>
@@ -608,7 +608,7 @@
       <title>A Three-Parameter Rank-Frequency Relation in Natural Languages</title>
       <author><first>Chenchen</first><last>Ding</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>460–464</pages>
       <abstract>We present that, the rank-frequency relation in textual data follows <tex-math>f \propto r^{-\alpha}(r+\gamma)^{-\beta}</tex-math>, where <tex-math>f</tex-math> is the token frequency and <tex-math>r</tex-math> is the rank by frequency, with (<tex-math>\alpha</tex-math>, <tex-math>\beta</tex-math>, <tex-math>\gamma</tex-math>) as parameters. The formulation is derived based on the empirical observation that <tex-math>d^2 (x+y)/dx^2</tex-math> is a typical impulse function, where <tex-math>(x,y)=(\log r, \log f)</tex-math>. The formulation is the power law when <tex-math>\beta=0</tex-math> and the Zipf–Mandelbrot law when <tex-math>\alpha=0</tex-math>. We illustrate that <tex-math>\alpha</tex-math> is related to the analytic features of syntax and <tex-math>\beta+\gamma</tex-math> to those of morphology in natural languages from an investigation of multilingual corpora.</abstract>
       <url hash="8535a004">2020.acl-main.44</url>
@@ -687,7 +687,7 @@
       <author><first>Peter</first><last>Stefanov</last></author>
       <author><first>Kareem</first><last>Darwish</last></author>
       <author><first>Atanas</first><last>Atanasov</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>527–537</pages>
       <abstract>Discovering the stances of media outlets and influential people on current, debatable topics is important for social statisticians and policy makers. Many supervised solutions exist for determining viewpoints, but manually annotating training data is costly. In this paper, we propose a cascaded method that uses unsupervised learning to ascertain the stance of Twitter users with respect to a polarizing topic by leveraging their retweet behavior; then, it uses supervised learning based on user labels to characterize both the general political leaning of online media and of popular Twitter users, as well as their stance with respect to the target polarizing topic. We evaluate the model by comparing its predictions to gold labels from the Media Bias/Fact Check website, achieving 82.6% accuracy.</abstract>
       <url hash="63db959b">2020.acl-main.50</url>
@@ -699,7 +699,7 @@
       <title>Simple, Interpretable and Stable Method for Detecting Words with Usage Change across Corpora</title>
       <author><first>Hila</first><last>Gonen</last></author>
       <author><first>Ganesh</first><last>Jawahar</last></author>
-      <author><first>Djamé</first><last>Seddah</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
       <author><first>Yoav</first><last>Goldberg</last></author>
       <pages>538–555</pages>
       <abstract>The problem of comparing two bodies of text and searching for words that differ in their usage between them arises often in digital humanities and computational social science. This is commonly approached by training word embeddings on each corpus, aligning the vector spaces, and looking for words whose cosine distance in the aligned space is large. However, these methods often require extensive filtering of the vocabulary to perform well, and - as we show in this work - result in unstable, and hence less reliable, results. We propose an alternative approach that does not use vector space alignment, and instead considers the neighbors of each word. The method is simple, interpretable and stable. We demonstrate its effectiveness in 9 different setups, considering different corpus splitting criteria (age, gender and profession of tweet authors, time of tweet) and different languages (English, French and Hebrew).</abstract>
@@ -881,8 +881,8 @@
       <author><first>Jiahuan</first><last>Li</last></author>
       <author><first>Yu</first><last>Bao</last></author>
       <author><first>Shujian</first><last>Huang</last></author>
-      <author><first>Xinyu</first><last>Dai</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
+      <author id="xinyu-dai"><first>Xinyu</first><last>Dai</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
       <pages>708–717</pages>
       <abstract>Definition generation, which aims to automatically generate dictionary definitions for words, has recently been proposed to assist the construction of dictionaries and help people understand unfamiliar texts. However, previous works hardly consider explicitly modeling the “components” of definitions, leading to under-specific generation results. In this paper, we propose ESD, namely Explicit Semantic Decomposition for definition Generation, which explicitly decomposes the meaning of words into semantic components, and models them with discrete latent variables for definition generation. Experimental results show that achieves top results on WordNet and Oxford benchmarks, outperforming strong previous baselines.</abstract>
       <url hash="0a961d6b">2020.acl-main.65</url>
@@ -893,7 +893,7 @@
     <paper id="66">
       <title>Improved Natural Language Generation via Loss Truncation</title>
       <author><first>Daniel</first><last>Kang</last></author>
-      <author><first>Tatsunori B.</first><last>Hashimoto</last></author>
+      <author id="tatsunori-b-hashimoto"><first>Tatsunori B.</first><last>Hashimoto</last></author>
       <pages>718–731</pages>
       <abstract>Neural language models are usually trained to match the distributional properties of large-scale corpora by minimizing the log loss. While straightforward to optimize, this approach forces the model to reproduce all variations in the dataset, including noisy and invalid references (e.g., misannotations and hallucinated facts). Even a small fraction of noisy data can degrade the performance of log loss. As an alternative, prior work has shown that minimizing the distinguishability of generated samples is a principled and robust loss that can handle invalid references. However, distinguishability has not been used in practice due to challenges in optimization and estimation. We propose loss truncation: a simple and scalable procedure which adaptively removes high log loss examples as a way to optimize for distinguishability. Empirically, we demonstrate that loss truncation outperforms existing baselines on distinguishability on a summarization task. Furthermore, we show that samples generated by the loss truncation model have factual accuracy ratings that exceed those of baselines and match human references.</abstract>
       <url hash="898301fc">2020.acl-main.66</url>
@@ -932,13 +932,13 @@
     <paper id="69">
       <title>Syn-<fixed-case>QG</fixed-case>: Syntactic and Shallow Semantic Rules for Question Generation</title>
       <author><first>Kaustubh</first><last>Dhole</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>752–765</pages>
       <abstract>Question Generation (QG) is fundamentally a simple syntactic transformation; however, many aspects of semantics influence what questions are good to form. We implement this observation by developing Syn-QG, a set of transparent syntactic rules leveraging universal dependencies, shallow semantic parsing, lexical resources, and custom rules which transform declarative sentences into question-answer pairs. We utilize PropBank argument descriptions and VerbNet state predicates to incorporate shallow semantic content, which helps generate questions of a descriptive nature and produce inferential and semantically richer questions than existing systems. In order to improve syntactic fluency and eliminate grammatically incorrect questions, we employ back-translation over the output of these syntactic rules. A set of crowd-sourced evaluations shows that our system can generate a larger number of highly grammatical and relevant questions than previous QG systems and that back-translation drastically improves grammaticality at a slight cost of generating irrelevant questions.</abstract>
       <url hash="375fa494">2020.acl-main.69</url>
       <attachment type="Source" hash="7a4686f8">2020.acl-main.69.Source.zip</attachment>
-      <doi>10.18653/v1/2020.acl-main.69</doi>
       <attachment type="Dataset" hash="89f92503">2020.acl-main.69.Dataset.pdf</attachment>
+      <doi>10.18653/v1/2020.acl-main.69</doi>
       <video href="http://slideslive.com/38929019"/>
       <bibkey>dhole-manning-2020-syn</bibkey>
       <revision id="1" href="2020.acl-main.69v1" hash="39009e68"/>
@@ -1136,14 +1136,14 @@
     </paper>
     <paper id="84">
       <title>A Methodology for Creating Question Answering Corpora Using Inverse Data Annotation</title>
-      <author><first>Jan</first><last>Deriu</last></author>
+      <author id="jan-milan-deriu"><first>Jan</first><last>Deriu</last></author>
       <author><first>Katsiaryna</first><last>Mlynchyk</last></author>
       <author><first>Philippe</first><last>Schläpfer</last></author>
-      <author><first>Alvaro</first><last>Rodrigo</last></author>
-      <author><first>Dirk</first><last>von Grünigen</last></author>
+      <author id="alvaro-rodrigo"><first>Alvaro</first><last>Rodrigo</last></author>
+      <author id="dirk-von-gruenigen"><first>Dirk</first><last>von Grünigen</last></author>
       <author><first>Nicolas</first><last>Kaiser</last></author>
       <author><first>Kurt</first><last>Stockinger</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <author><first>Mark</first><last>Cieliebak</last></author>
       <pages>897–911</pages>
       <abstract>In this paper, we introduce a novel methodology to efficiently construct a corpus for question answering over structured data. For this, we introduce an intermediate representation that is based on the logical query plan in a database, called Operation Trees (OT). This representation allows us to invert the annotation process without loosing flexibility in the types of queries that we generate. Furthermore, it allows for fine-grained alignment of the tokens to the operations. Thus, we randomly generate OTs from a context free grammar and annotators just have to write the appropriate question and assign the tokens. We compare our corpus OTTA (Operation Trees and Token Assignment), a large semantic parsing corpus for evaluating natural language interfaces to databases, to Spider and LC-QuaD 2.0 and show that our methodology more than triples the annotation speed while maintaining the complexity of the queries. Finally, we train a state-of-the-art semantic parsing model on our data and show that our dataset is a challenging dataset and that the token alignment can be leveraged to significantly increase the performance.</abstract>
@@ -1200,7 +1200,7 @@
       <title>Explicit Memory Tracker with Coarse-to-Fine Reasoning for Conversational Machine Reading</title>
       <author><first>Yifan</first><last>Gao</last></author>
       <author><first>Chien-Sheng</first><last>Wu</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <author><first>Caiming</first><last>Xiong</last></author>
       <author><first>Richard</first><last>Socher</last></author>
       <author><first>Irwin</first><last>King</last></author>
@@ -1286,7 +1286,7 @@
     <paper id="95">
       <title>Moving Down the Long Tail of Word Sense Disambiguation with Gloss Informed Bi-encoders</title>
       <author><first>Terra</first><last>Blevins</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>1006–1017</pages>
       <abstract>A major obstacle in Word Sense Disambiguation (WSD) is that word senses are not uniformly distributed, causing existing models to generally perform poorly on senses that are either rare or unseen during training. We propose a bi-encoder model that independently embeds (1) the target word with its surrounding context and (2) the dictionary definition, or gloss, of each sense. The encoders are jointly optimized in the same representation space, so that sense disambiguation can be performed by finding the nearest sense embedding for each target word embedding. Our system outperforms previous state-of-the-art models on English all-words WSD; these gains predominantly come from improved performance on rare senses, leading to a 31.1% error reduction on less frequent senses over prior work. This demonstrates that rare senses can be more effectively disambiguated by modeling their definitions.</abstract>
       <url hash="e8ec82e0">2020.acl-main.95</url>
@@ -1326,7 +1326,7 @@
       <title>Towards Conversational Recommendation over Multi-Type Dialogs</title>
       <author><first>Zeming</first><last>Liu</last></author>
       <author><first>Haifeng</first><last>Wang</last></author>
-      <author><first>Zheng-Yu</first><last>Niu</last></author>
+      <author id="zheng-yu-niu"><first>Zheng-Yu</first><last>Niu</last></author>
       <author><first>Hua</first><last>Wu</last></author>
       <author><first>Wanxiang</first><last>Che</last></author>
       <author><first>Ting</first><last>Liu</last></author>
@@ -1363,7 +1363,7 @@
       <author><first>Liangming</first><last>Pan</last></author>
       <author><first>Min-Yen</first><last>Kan</last></author>
       <author><first>Zhiyuan</first><last>Liu</last></author>
-      <author><first>Tat-Seng</first><last>Chua</last></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last></author>
       <pages>1061–1071</pages>
       <abstract>The curse of knowledge can impede communication between experts and laymen. We propose a new task of expertise style transfer and contribute a manually annotated dataset with the goal of alleviating such cognitive biases. Solving this task not only simplifies the professional language, but also improves the accuracy and expertise level of laymen descriptions using simple words. This is a challenging task, unaddressed in previous work, as it requires the models to have expert intelligence in order to modify text with a deep understanding of domain knowledge and structures. We establish the benchmark performance of five state-of-the-art models for style transfer and text simplification. The results demonstrate a significant gap between machine and human performance. We also discuss the challenges of automatic evaluation, to provide insights into future research directions. The dataset is publicly available at <url>https://srhthu.github.io/expertise-style-transfer/</url>.</abstract>
       <url hash="e451e7ef">2020.acl-main.100</url>
@@ -1433,7 +1433,7 @@
       <title>Keyphrase Generation for Scientific Document Retrieval</title>
       <author><first>Florian</first><last>Boudin</last></author>
       <author><first>Ygor</first><last>Gallina</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>1118–1126</pages>
       <abstract>Sequence-to-sequence models have lead to significant progress in keyphrase generation, but it remains unknown whether they are reliable enough to be beneficial for document retrieval. This study provides empirical evidence that such models can significantly improve retrieval performance, and introduces a new extrinsic evaluation framework that allows for a better understanding of the limitations of keyphrase generation models. Using this framework, we point out and discuss the difficulties encountered with supplementing documents with -not present in text- keyphrases, and generalizing models across domains. Our code is available at <url>https://github.com/boudinfl/ir-using-kg</url></abstract>
       <url hash="08752c97">2020.acl-main.105</url>
@@ -1444,8 +1444,8 @@
     <paper id="106">
       <title>A Graph Auto-encoder Model of Derivational Morphology</title>
       <author><first>Valentin</first><last>Hofmann</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
-      <author><first>Janet</first><last>Pierrehumbert</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
+      <author id="janet-pierrehumbert"><first>Janet</first><last>Pierrehumbert</last></author>
       <pages>1127–1138</pages>
       <abstract>There has been little work on modeling the morphological well-formedness (MWF) of derivatives, a problem judged to be complex and difficult in linguistics. We present a graph auto-encoder that learns embeddings capturing information about the compatibility of affixes and stems in derivation. The auto-encoder models MWF in English surprisingly well by combining syntactic and semantic information with associative information from the mental lexicon.</abstract>
       <url hash="937c23be">2020.acl-main.106</url>
@@ -1455,13 +1455,13 @@
     </paper>
     <paper id="107">
       <title>Building a User-Generated Content <fixed-case>N</fixed-case>orth-<fixed-case>A</fixed-case>frican <fixed-case>A</fixed-case>rabizi Treebank: Tackling Hell</title>
-      <author><first>Djamé</first><last>Seddah</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
       <author><first>Farah</first><last>Essaidi</last></author>
       <author><first>Amal</first><last>Fethi</last></author>
       <author><first>Matthieu</first><last>Futeral</last></author>
       <author><first>Benjamin</first><last>Muller</last></author>
-      <author><first>Pedro Javier</first><last>Ortiz Suárez</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="pedro-ortiz-suarez"><first>Pedro Javier</first><last>Ortiz Suárez</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <author><first>Abhishek</first><last>Srivastava</last></author>
       <pages>1139–1150</pages>
       <abstract>We introduce the first treebank for a romanized user-generated content variety of Algerian, a North-African Arabic dialect known for its frequent usage of code-switching. Made of 1500 sentences, fully annotated in morpho-syntax and Universal Dependency syntax, with full translation at both the word and the sentence levels, this treebank is made freely available. It is supplemented with 50k unlabeled sentences collected from Common Crawl and web-crawled data using intensive data-mining techniques. Preliminary experiments demonstrate its usefulness for POS tagging and dependency parsing. We believe that what we present in this paper is useful beyond the low-resource language community. This is the first time that enough unlabeled and annotated data is provided for an emerging user-generated content dialectal language with rich morphology and code switching, making it an challenging test-bed for most recent NLP approaches.</abstract>
@@ -1473,7 +1473,7 @@
     <paper id="108">
       <title>Crawling and Preprocessing Mailing Lists At Scale for Dialog Analysis</title>
       <author><first>Janek</first><last>Bevendorff</last></author>
-      <author><first>Khalid</first><last>Al Khatib</last></author>
+      <author id="khalid-al-khatib"><first>Khalid</first><last>Al Khatib</last></author>
       <author><first>Martin</first><last>Potthast</last></author>
       <author><first>Benno</first><last>Stein</last></author>
       <pages>1151–1158</pages>
@@ -1527,7 +1527,7 @@
     </paper>
     <paper id="112">
       <title>Learning and Evaluating Emotion Lexicons for 91 Languages</title>
-      <author><first>Sven</first><last>Buechel</last></author>
+      <author id="sven-buechel"><first>Sven</first><last>Buechel</last></author>
       <author><first>Susanna</first><last>Rücker</last></author>
       <author><first>Udo</first><last>Hahn</last></author>
       <pages>1202–1217</pages>
@@ -1541,7 +1541,7 @@
       <title>Multi-Hypothesis Machine Translation Evaluation</title>
       <author><first>Marina</first><last>Fomicheva</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
-      <author><first>Francisco</first><last>Guzmán</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzmán</last></author>
       <pages>1218–1232</pages>
       <abstract>Reliably evaluating Machine Translation (MT) through automated metrics is a long-standing problem. One of the main challenges is the fact that multiple outputs can be equally valid. Attempts to minimise this issue include metrics that relax the matching of MT output and reference strings, and the use of multiple references. The latter has been shown to significantly improve the performance of evaluation metrics. However, collecting multiple references is expensive and in practice a single reference is generally used. In this paper, we propose an alternative approach: instead of modelling linguistic variation in human reference we exploit the MT model uncertainty to generate multiple diverse translations and use these: (i) as surrogates to reference translations; (ii) to obtain a quantification of translation variability to either complement existing metric scores or (iii) replace references altogether. We show that for a number of popular evaluation metrics our variability estimates lead to substantial improvements in correlation with human judgements of quality by up 15%.</abstract>
       <url hash="2705849d">2020.acl-main.113</url>
@@ -1552,7 +1552,7 @@
     <paper id="114">
       <title>Multimodal Quality Estimation for Machine Translation</title>
       <author><first>Shu</first><last>Okabe</last></author>
-      <author><first>Frédéric</first><last>Blain</last></author>
+      <author id="frederic-blain"><first>Frédéric</first><last>Blain</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>1233–1240</pages>
       <abstract>We propose approaches to Quality Estimation (QE) for Machine Translation that explore both text and visual modalities for Multimodal QE. We compare various multimodality integration and fusion strategies. For both sentence-level and document-level predictions, we show that state-of-the-art neural and feature-based QE frameworks obtain better results when using the additional modality.</abstract>
@@ -1563,7 +1563,7 @@
     </paper>
     <paper id="115">
       <title><fixed-case>P</fixed-case>uzz<fixed-case>L</fixed-case>ing <fixed-case>M</fixed-case>achines: <fixed-case>A</fixed-case> <fixed-case>C</fixed-case>hallenge on <fixed-case>L</fixed-case>earning <fixed-case>F</fixed-case>rom <fixed-case>S</fixed-case>mall <fixed-case>D</fixed-case>ata</title>
-      <author><first>Gözde Gül</first><last>Şahin</last></author>
+      <author id="gozde-gul-sahin"><first>Gözde Gül</first><last>Şahin</last></author>
       <author><first>Yova</first><last>Kementchedjhieva</last></author>
       <author><first>Phillip</first><last>Rust</last></author>
       <author><first>Iryna</first><last>Gurevych</last></author>
@@ -1596,20 +1596,20 @@
       <author><first>Rishav</first><last>Chakravarti</last></author>
       <author><first>Saswati</first><last>Dana</last></author>
       <author><first>Anthony</first><last>Ferritto</last></author>
-      <author><first>Radu</first><last>Florian</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
       <author><first>Martin</first><last>Franz</last></author>
       <author><first>Dinesh</first><last>Garg</last></author>
       <author><first>Dinesh</first><last>Khandelwal</last></author>
-      <author><first>Scott</first><last>McCarley</last></author>
+      <author id="j-scott-mccarley"><first>Scott</first><last>McCarley</last></author>
       <author><first>Michael</first><last>McCawley</last></author>
       <author><first>Mohamed</first><last>Nasr</last></author>
       <author><first>Lin</first><last>Pan</last></author>
       <author><first>Cezar</first><last>Pendus</last></author>
-      <author><first>John</first><last>Pitrelli</last></author>
+      <author id="john-f-pitrelli"><first>John</first><last>Pitrelli</last></author>
       <author><first>Saurabh</first><last>Pujar</last></author>
-      <author><first>Salim</first><last>Roukos</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
       <author><first>Andrzej</first><last>Sakrajda</last></author>
-      <author><first>Avi</first><last>Sil</last></author>
+      <author id="avirup-sil"><first>Avi</first><last>Sil</last></author>
       <author><first>Rosario</first><last>Uceda-Sosa</last></author>
       <author><first>Todd</first><last>Ward</last></author>
       <author><first>Rong</first><last>Zhang</last></author>
@@ -1646,8 +1646,8 @@
     <paper id="120">
       <title>A Large-Scale Multi-Document Summarization Dataset from the <fixed-case>W</fixed-case>ikipedia Current Events Portal</title>
       <author><first>Demian</first><last>Gholipour Ghalandari</last></author>
-      <author><first>Chris</first><last>Hokamp</last></author>
-      <author><first>Nghia The</first><last>Pham</last></author>
+      <author id="chris-hokamp"><first>Chris</first><last>Hokamp</last></author>
+      <author id="nghia-the-pham"><first>Nghia The</first><last>Pham</last></author>
       <author><first>John</first><last>Glover</last></author>
       <author><first>Georgiana</first><last>Ifrim</last></author>
       <pages>1302–1308</pages>
@@ -1662,7 +1662,7 @@
       <author><first>Junnan</first><last>Zhu</last></author>
       <author><first>Yu</first><last>Zhou</last></author>
       <author><first>Jiajun</first><last>Zhang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>1309–1321</pages>
       <abstract>Cross-lingual summarization aims at summarizing a document in one language (e.g., Chinese) into another language (e.g., English). In this paper, we propose a novel method inspired by the translation pattern in the process of obtaining a cross-lingual summary. We first attend to some words in the source text, then translate them into the target language, and summarize to get the final summary. Specifically, we first employ the encoder-decoder attention distribution to attend to the source words. Second, we present three strategies to acquire the translation probability, which helps obtain the translation candidates for each source word. Finally, each summary word is generated either from the neural distribution or from the translation candidates of source words. Experimental results on Chinese-to-English and English-to-Chinese summarization tasks have shown that our proposed method can significantly outperform the baselines, achieving comparable performance with the state-of-the-art.</abstract>
       <url hash="47236ebf">2020.acl-main.121</url>
@@ -1685,7 +1685,7 @@
       <title>Improving Truthfulness of Headline Generation</title>
       <author><first>Kazuki</first><last>Matsumaru</last></author>
       <author><first>Sho</first><last>Takase</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <pages>1335–1346</pages>
       <abstract>Most studies on abstractive summarization report ROUGE scores between system and reference summaries. However, we have a concern about the truthfulness of generated summaries: whether all facts of a generated summary are mentioned in the source text. This paper explores improving the truthfulness in headline generation on two popular datasets. Analyzing headlines generated by the state-of-the-art encoder-decoder model, we show that the model sometimes generates untruthful headlines. We conjecture that one of the reasons lies in untruthful supervision data used for training the model. In order to quantify the truthfulness of article-headline pairs, we consider the textual entailment of whether an article entails its headline. After confirming quite a few untruthful instances in the datasets, this study hypothesizes that removing untruthful instances from the supervision data may remedy the problem of the untruthful behaviors of the model. Building a binary classifier that predicts an entailment relation between an article and its headline, we filter out untruthful instances from the supervision data. Experimental results demonstrate that the headline generation model trained on filtered supervision data shows no clear difference in ROUGE scores but remarkable improvements in automatic and manual evaluations of the generated headlines.</abstract>
       <url hash="0edbbbf8">2020.acl-main.123</url>
@@ -1766,8 +1766,8 @@
     <paper id="129">
       <title><fixed-case>L</fixed-case>earning <fixed-case>D</fixed-case>ialog <fixed-case>P</fixed-case>olicies from <fixed-case>W</fixed-case>eak <fixed-case>D</fixed-case>emonstrations</title>
       <author><first>Gabriel</first><last>Gordon-Hall</last></author>
-      <author><first>Philip John</first><last>Gorinski</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="philip-gorinski"><first>Philip John</first><last>Gorinski</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <pages>1394–1405</pages>
       <abstract>Deep reinforcement learning is a promising approach to training a dialog manager, but current methods struggle with the large state and action spaces of multi-domain dialog systems. Building upon Deep Q-learning from Demonstrations (DQfD), an algorithm that scores highly in difficult Atari games, we leverage dialog data to guide the agent to successfully respond to a user’s requests. We make progressively fewer assumptions about the data needed, using labeled, reduced-labeled, and even unlabeled data to train expert demonstrators. We introduce Reinforced Fine-tune Learning, an extension to DQfD, enabling us to overcome the domain gap between the datasets and the environment. Experiments in a challenging multi-domain dialog system framework validate our approaches, and get high success rates even when trained on out-of-domain data.</abstract>
       <url hash="bcea3b2e">2020.acl-main.129</url>
@@ -1848,7 +1848,7 @@
       <author><first>Liangming</first><last>Pan</last></author>
       <author><first>Yuxi</first><last>Xie</last></author>
       <author><first>Yansong</first><last>Feng</last></author>
-      <author><first>Tat-Seng</first><last>Chua</last></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last></author>
       <author><first>Min-Yen</first><last>Kan</last></author>
       <pages>1463–1475</pages>
       <abstract>This paper proposes the problem of Deep Question Generation (DQG), which aims to generate complex questions that require reasoning over multiple pieces of information about the input passage. In order to capture the global structure of the document and facilitate reasoning, we propose a novel framework that first constructs a semantic-level graph for the input document and then encodes the semantic graph by introducing an attention-based GGNN (Att-GGNN). Afterward, we fuse the document-level and graph-level representations to perform joint training of content selection and question decoding. On the HotpotQA deep-question centric dataset, our model greatly improves performance over questions requiring reasoning over multiple facts, leading to state-of-the-art performance. The code is publicly available at <url>https://github.com/WING-NUS/SG-Deep-Question-Generation</url>.</abstract>
@@ -1889,7 +1889,7 @@
       <title><fixed-case>NAT</fixed-case>: Noise-Aware Training for Robust Neural Sequence Labeling</title>
       <author><first>Marcin</first><last>Namysl</last></author>
       <author><first>Sven</first><last>Behnke</last></author>
-      <author><first>Joachim</first><last>Köhler</last></author>
+      <author id="joachim-kohler"><first>Joachim</first><last>Köhler</last></author>
       <pages>1501–1517</pages>
       <abstract>Sequence labeling systems should perform reliably not only under ideal conditions but also with corrupted inputs—as these systems often process user-generated text or follow an error-prone upstream component. To this end, we formulate the noisy sequence labeling problem, where the input may undergo an unknown noising process and propose two Noise-Aware Training (NAT) objectives that improve robustness of sequence labeling performed on perturbed input: Our data augmentation method trains a neural model using a mixture of clean and noisy samples, whereas our stability training algorithm encourages the model to create a noise-invariant latent representation. We employ a vanilla noise model at training time. For evaluation, we use both the original data and its variants perturbed with real OCR errors and misspellings. Extensive experiments on English and German named entity recognition benchmarks confirmed that NAT consistently improved robustness of popular sequence labeling models, preserving accuracy on the original input. We make our code and data publicly available for the research community.</abstract>
       <url hash="c35fcf97">2020.acl-main.138</url>
@@ -1968,7 +1968,7 @@
     <paper id="144">
       <title>Boosting Neural Machine Translation with Similar Translations</title>
       <author><first>Jitao</first><last>Xu</last></author>
-      <author><first>Josep</first><last>Crego</last></author>
+      <author id="josep-m-crego"><first>Josep</first><last>Crego</last></author>
       <author><first>Jean</first><last>Senellart</last></author>
       <pages>1580–1590</pages>
       <abstract>This paper explores data augmentation methods for training Neural Machine Translation to make use of similar translations, in a comparable way a human translator employs fuzzy matches. In particular, we show how we can simply present the neural model with information of both source and target sides of the fuzzy matches, we also extend the similarity to include semantically related translations retrieved using sentence distributed representations. We show that translations based on fuzzy matching provide the model with “copy” information while translations based on embedding similarities tend to extend the translation “context”. Results indicate that the effect from both similar sentences are adding up to further boost accuracy, combine naturally with model fine-tuning and are providing dynamic adaptation for unseen translation pairs. Tests on multiple data sets and domains show consistent accuracy improvements. To foster research around these techniques, we also release an Open-Source toolkit with efficient and flexible fuzzy-match implementation.</abstract>
@@ -1980,7 +1980,7 @@
     <paper id="145">
       <title>Character-Level Translation with Self-attention</title>
       <author><first>Yingqiang</first><last>Gao</last></author>
-      <author><first>Nikola I.</first><last>Nikolov</last></author>
+      <author id="nikola-i-nikolov"><first>Nikola I.</first><last>Nikolov</last></author>
       <author><first>Yuhuang</first><last>Hu</last></author>
       <author><first>Richard H.R.</first><last>Hahnloser</last></author>
       <pages>1591–1604</pages>
@@ -2005,7 +2005,7 @@
     <paper id="147">
       <title>Enhancing Machine Translation with Dependency-Aware Self-Attention</title>
       <author><first>Emanuele</first><last>Bugliarello</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <pages>1618–1627</pages>
       <abstract>Most neural machine translation models only rely on pairs of parallel sentences, assuming syntactic information is automatically learned by an attention mechanism. In this work, we investigate different approaches to incorporate syntactic knowledge in the Transformer model and also propose a novel, parameter-free, dependency-aware self-attention mechanism that improves its translation quality, especially for long sentences and in low-resource scenarios. We show the efficacy of each approach on WMT English-German and English-Turkish, and WAT English-Japanese translation tasks.</abstract>
       <url hash="70a6334e">2020.acl-main.147</url>
@@ -2029,10 +2029,10 @@
     <paper id="149">
       <title>It’s Easier to Translate out of <fixed-case>E</fixed-case>nglish than into it: <fixed-case>M</fixed-case>easuring Neural Translation Difficulty by Cross-Mutual Information</title>
       <author><first>Emanuele</first><last>Bugliarello</last></author>
-      <author><first>Sabrina J.</first><last>Mielke</last></author>
+      <author id="sabrina-j-mielke"><first>Sabrina J.</first><last>Mielke</last></author>
       <author><first>Antonios</first><last>Anastasopoulos</last></author>
       <author><first>Ryan</first><last>Cotterell</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <pages>1640–1649</pages>
       <abstract>The performance of neural machine translation systems is commonly evaluated in terms of BLEU. However, due to its reliance on target language properties and generation, the BLEU metric does not allow an assessment of which translation directions are more difficult to model. In this paper, we propose cross-mutual information (XMI): an asymmetric information-theoretic metric of machine translation difficulty that exploits the probabilistic nature of most neural machine translation models. XMI allows us to better evaluate the difficulty of translating text into the target language while controlling for the difficulty of the target-side generation component independent of the translation task. We then present the first systematic and controlled study of cross-lingual translation difficulties using modern neural translation systems. Code for replicating our experiments is available online at <url>https://github.com/e-bug/nmt-difficulty</url>.</abstract>
       <url hash="c8edffbb">2020.acl-main.149</url>
@@ -2114,7 +2114,7 @@
       <author><first>Kalliopi</first><last>Meladaki</last></author>
       <author><first>Mahsa</first><last>Monshizadeh</last></author>
       <author><first>Antonio</first><last>Krüger</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>1691–1702</pages>
       <abstract>Current advances in machine translation (MT) increase the need for translators to switch from traditional translation to post-editing (PE) of machine-translated text, a process that saves time and reduces errors. This affects the design of translation interfaces, as the task changes from mainly generating text to correcting errors within otherwise helpful translation proposals. Since this paradigm shift offers potential for modalities other than mouse and keyboard, we present MMPE, the first prototype to combine traditional input modes with pen, touch, and speech modalities for PE of MT. The results of an evaluation with professional translators suggest that pen and touch interaction are suitable for deletion and reordering tasks, while they are of limited use for longer insertions. On the other hand, speech and multi-modal combinations of select &amp; speech are considered suitable for replacements and insertions but offer less potential for deletion and reordering. Overall, participants were enthusiastic about the new modalities and saw them as good extensions to mouse &amp; keyboard, but not as a complete substitute.</abstract>
       <url hash="f4fcc467">2020.acl-main.155</url>
@@ -2124,9 +2124,9 @@
     </paper>
     <paper id="156">
       <title>A Monolingual Approach to Contextualized Word Embeddings for Mid-Resource Languages</title>
-      <author><first>Pedro Javier</first><last>Ortiz Suárez</last></author>
-      <author><first>Laurent</first><last>Romary</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="pedro-ortiz-suarez"><first>Pedro Javier</first><last>Ortiz Suárez</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <pages>1703–1714</pages>
       <abstract>We use the multilingual OSCAR corpus, extracted from Common Crawl via language classification, filtering and cleaning, to train monolingual contextualized word embeddings (ELMo) for five mid-resource languages. We then compare the performance of OSCAR-based and Wikipedia-based ELMo embeddings for these languages on the part-of-speech tagging and parsing tasks. We show that, despite the noise in the Common-Crawl-based OSCAR data, embeddings trained on OSCAR perform much better than monolingual embeddings trained on Wikipedia. They actually equal or improve the current state of the art in tagging and parsing for all five languages. In particular, they also improve over multilingual Wikipedia-based contextual embeddings (multilingual BERT), which almost always constitutes the previous state of the art, thereby showing that the benefit of a larger, more diverse corpus surpasses the cross-lingual benefit of multilingual embedding architectures.</abstract>
       <url hash="cadfe1d2">2020.acl-main.156</url>
@@ -2155,7 +2155,7 @@
       <author><first>Jon</first><last>Gauthier</last></author>
       <author><first>Peng</first><last>Qian</last></author>
       <author><first>Ethan</first><last>Wilcox</last></author>
-      <author><first>Roger</first><last>Levy</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
       <pages>1725–1744</pages>
       <abstract>While state-of-the-art neural network models continue to achieve lower perplexity scores on language modeling benchmarks, it remains unknown whether optimizing for broad-coverage predictive performance leads to human-like syntactic knowledge. Furthermore, existing work has not provided a clear picture about the model properties required to produce proper syntactic generalizations. We present a systematic evaluation of the syntactic knowledge of neural language models, testing 20 combinations of model types and data sizes on a set of 34 English-language syntactic test suites. We find substantial differences in syntactic generalization performance by model architecture, with sequential models underperforming other architectures. Factorially manipulating model architecture and training dataset size (1M-40M words), we find that variability in syntactic generalization performance is substantially greater by architecture than by dataset size for the corpora tested in our experiments. Our results also reveal a dissociation between perplexity and syntactic generalization performance.</abstract>
       <url hash="8ccfd93d">2020.acl-main.158</url>
@@ -2166,7 +2166,7 @@
     <paper id="159">
       <title>Inflecting When There’s No Majority: Limitations of Encoder-Decoder Neural Networks as Cognitive Models for <fixed-case>G</fixed-case>erman Plurals</title>
       <author><first>Kate</first><last>McCurdy</last></author>
-      <author><first>Sharon</first><last>Goldwater</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
       <author><first>Adam</first><last>Lopez</last></author>
       <pages>1745–1756</pages>
       <abstract>Can artificial neural networks learn to represent inflectional morphology and generalize to new words as human speakers do? Kirov and Cotterell (2018) argue that the answer is yes: modern Encoder-Decoder (ED) architectures learn human-like behavior when inflecting English verbs, such as extending the regular past tense form /-(e)d/ to novel words. However, their work does not address the criticism raised by Marcus et al. (1995): that neural models may learn to extend not the regular, but the most frequent class — and thus fail on tasks like German number inflection, where infrequent suffixes like /-s/ can still be productively generalized. To investigate this question, we first collect a new dataset from German speakers (production and ratings of plural forms for novel nouns) that is designed to avoid sources of information unavailable to the ED model. The speaker data show high variability, and two suffixes evince ‘regular’ behavior, appearing more often with phonologically atypical inputs. Encoder-decoder models do generalize the most frequently produced plural class, but do not show human-like variability or ‘regular’ extension of these other plural markers. We conclude that modern neural models may still struggle with minority-class generalization.</abstract>
@@ -2257,7 +2257,7 @@
       <title>Conversational Graph Grounded Policy Learning for Open-Domain Conversation Generation</title>
       <author><first>Jun</first><last>Xu</last></author>
       <author><first>Haifeng</first><last>Wang</last></author>
-      <author><first>Zheng-Yu</first><last>Niu</last></author>
+      <author id="zheng-yu-niu"><first>Zheng-Yu</first><last>Niu</last></author>
       <author><first>Hua</first><last>Wu</last></author>
       <author><first>Wanxiang</first><last>Che</last></author>
       <author><first>Ting</first><last>Liu</last></author>
@@ -2271,12 +2271,12 @@
     <paper id="167">
       <title><fixed-case>GPT</fixed-case>-too: A Language-Model-First Approach for <fixed-case>AMR</fixed-case>-to-Text Generation</title>
       <author><first>Manuel</first><last>Mager</last></author>
-      <author><first>Ramón</first><last>Fernandez Astudillo</last></author>
+      <author id="ramon-fernandez-astudillo"><first>Ramón</first><last>Fernandez Astudillo</last></author>
       <author><first>Tahira</first><last>Naseem</last></author>
-      <author><first>Md Arafat</first><last>Sultan</last></author>
+      <author id="md-arafat-sultan"><first>Md Arafat</first><last>Sultan</last></author>
       <author><first>Young-Suk</first><last>Lee</last></author>
-      <author><first>Radu</first><last>Florian</last></author>
-      <author><first>Salim</first><last>Roukos</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
       <pages>1846–1852</pages>
       <abstract>Abstract Meaning Representations (AMRs) are broad-coverage sentence-level semantic graphs. Existing approaches to generating text from AMR have focused on training sequence-to-sequence or graph-to-sequence models on AMR annotated data only. In this paper, we propose an alternative approach that combines a strong pre-trained language model with cycle consistency-based re-scoring. Despite the simplicity of the approach, our experimental results show these models outperform all previous techniques on the English LDC2017T10 dataset, including the recent use of transformer architectures. In addition to the standard evaluation metrics, we provide human evaluation experiments that further substantiate the strength of our approach.</abstract>
       <url hash="cbea87bb">2020.acl-main.167</url>
@@ -2290,7 +2290,7 @@
       <author><first>Pengyu</first><last>Nie</last></author>
       <author><first>Milos</first><last>Gligoric</last></author>
       <author><first>Junyi Jessy</first><last>Li</last></author>
-      <author><first>Raymond</first><last>Mooney</last></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last></author>
       <pages>1853–1868</pages>
       <abstract>We formulate the novel task of automatically updating an existing natural language comment based on changes in the body of code it accompanies. We propose an approach that learns to correlate changes across two distinct language representations, to generate a sequence of edits that are applied to the existing comment to reflect the source code modifications. We train and evaluate our model using a dataset that we collected from commit histories of open-source software projects, with each example consisting of a concurrent update to a method and its corresponding comment. We compare our approach against multiple baselines using both automatic metrics and human evaluation. Results reflect the challenge of this task and that our model outperforms baselines with respect to making edits.</abstract>
       <url hash="faaedb7f">2020.acl-main.168</url>
@@ -2307,7 +2307,7 @@
       <author><first>Graham</first><last>Neubig</last></author>
       <author><first>Yiming</first><last>Yang</last></author>
       <author><first>Ruslan</first><last>Salakhutdinov</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <author><first>Shrimai</first><last>Prabhumoye</last></author>
       <pages>1869–1881</pages>
       <abstract>This paper introduces a new task of politeness transfer which involves converting non-polite sentences to polite sentences while preserving the meaning. We also provide a dataset of more than 1.39 instances automatically labeled for politeness to encourage benchmark evaluations on this new task. We design a tag and generate pipeline that identifies stylistic attributes and subsequently generates a sentence in the target style while preserving most of the source content. For politeness as well as five other transfer tasks, our model outperforms the state-of-the-art methods on automatic metrics for content preservation, with a comparable or better performance on style transfer accuracy. Additionally, our model surpasses existing methods on human evaluations for grammaticality, meaning preservation and transfer accuracy across all the six style transfer tasks. The data and code is located at <url>https://github.com/tag-and-generate</url>.</abstract>
@@ -2379,7 +2379,7 @@
     </paper>
     <paper id="175">
       <title>Unsupervised Opinion Summarization with Noising and Denoising</title>
-      <author><first>Reinald Kim</first><last>Amplayo</last></author>
+      <author id="reinald-kim-amplayo"><first>Reinald Kim</first><last>Amplayo</last></author>
       <author><first>Mirella</first><last>Lapata</last></author>
       <pages>1934–1945</pages>
       <abstract>The supervised training of high-capacity models on large datasets containing hundreds of thousands of document-summary pairs is critical to the recent success of deep learning techniques for abstractive summarization. Unfortunately, in most domains (other than news) such training data is not available and cannot be easily sourced. In this paper we enable the use of supervised learning for the setting where there are only documents available (e.g., product or business reviews) without ground truth summaries. We create a synthetic dataset from a corpus of user reviews by sampling a review, pretending it is a summary, and generating noisy versions thereof which we treat as pseudo-review input. We introduce several linguistically motivated noise generation functions and a summarization model which learns to denoise the input and generate the original review. At test time, the model accepts genuine reviews and generates a summary containing salient opinions, treating those that do not reach consensus as noise. Extensive automatic and human evaluation shows that our model brings substantial improvements over both abstractive and extractive baselines.</abstract>
@@ -2390,8 +2390,8 @@
     </paper>
     <paper id="176">
       <title>A Tale of Two Perplexities: Sensitivity of Neural Language Models to Lexical Retrieval Deficits in Dementia of the <fixed-case>A</fixed-case>lzheimer’s Type</title>
-      <author><first>Trevor</first><last>Cohen</last></author>
-      <author><first>Serguei</first><last>Pakhomov</last></author>
+      <author id="trevor-cohen"><first>Trevor</first><last>Cohen</last></author>
+      <author id="serguei-pakhomov"><first>Serguei</first><last>Pakhomov</last></author>
       <pages>1946–1957</pages>
       <abstract>In recent years there has been a burgeoning interest in the use of computational methods to distinguish between elicited speech samples produced by patients with dementia, and those from healthy controls. The difference between perplexity estimates from two neural language models (LMs) - one trained on transcripts of speech produced by healthy participants and one trained on those with dementia - as a single feature for diagnostic classification of unseen transcripts has been shown to produce state-of-the-art performance. However, little is known about why this approach is effective, and on account of the lack of case/control matching in the most widely-used evaluation set of transcripts (DementiaBank), it is unclear if these approaches are truly diagnostic, or are sensitive to other variables. In this paper, we interrogate neural LMs trained on participants with and without dementia by using synthetic narratives previously developed to simulate progressive semantic dementia by manipulating lexical frequency. We find that perplexity of neural LMs is strongly and differentially associated with lexical frequency, and that using a mixture model resulting from interpolating control and dementia LMs improves upon the current state-of-the-art for models trained on transcript text exclusively.</abstract>
       <url hash="2764f941">2020.acl-main.176</url>
@@ -2405,7 +2405,7 @@
       <title>Probing Linguistic Systematicity</title>
       <author><first>Emily</first><last>Goodwin</last></author>
       <author><first>Koustuv</first><last>Sinha</last></author>
-      <author><first>Timothy J.</first><last>O’Donnell</last></author>
+      <author id="timothy-odonnell"><first>Timothy J.</first><last>O’Donnell</last></author>
       <pages>1958–1969</pages>
       <abstract>Recently, there has been much interest in the question of whether deep natural language understanding (NLU) models exhibit systematicity, generalizing such that units like words make consistent contributions to the meaning of the sentences in which they appear. There is accumulating evidence that neural models do not learn systematically. We examine the notion of systematicity from a linguistic perspective, defining a set of probing tasks and a set of metrics to measure systematic behaviour. We also identify ways in which network architectures can generalize non-systematically, and discuss why such forms of generalization may be unsatisfying. As a case study, we perform a series of experiments in the setting of natural language inference (NLI). We provide evidence that current state-of-the-art NLU systems do not generalize systematically, despite overall high performance.</abstract>
       <url hash="4d479c75">2020.acl-main.177</url>
@@ -2418,7 +2418,7 @@
       <author><first>Maarten</first><last>Sap</last></author>
       <author><first>Eric</first><last>Horvitz</last></author>
       <author><first>Yejin</first><last>Choi</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <author><first>James</first><last>Pennebaker</last></author>
       <pages>1970–1978</pages>
       <abstract>We investigate the use of NLP as a measure of the cognitive processes involved in storytelling, contrasting imagination and recollection of events. To facilitate this, we collect and release Hippocorpus, a dataset of 7,000 stories about imagined and recalled events. We introduce a measure of narrative flow and use this to examine the narratives for imagined and recalled events. Additionally, we measure the differential recruitment of knowledge attributed to semantic memory versus episodic memory (Tulving, 1972) for imagined and recalled storytelling by comparing the frequency of descriptions of general commonsense events with more specific realis events. Our analyses show that imagined stories have a substantially more linear narrative flow, compared to recalled stories in which adjacent sentences are more disconnected. In addition, while recalled stories rely more on autobiographical events based on episodic memory, imagined stories express more commonsense knowledge based on semantic memory. Finally, our measures reveal the effect of narrativization of memories in stories (e.g., stories about frequently recalled memories flow more linearly; Bartlett, 1932). Our findings highlight the potential of using NLP tools to study the traces of human cognition in language.</abstract>
@@ -2430,7 +2430,7 @@
     <paper id="179">
       <title>Recurrent Neural Network Language Models Always Learn <fixed-case>E</fixed-case>nglish-Like Relative Clause Attachment</title>
       <author><first>Forrest</first><last>Davis</last></author>
-      <author><first>Marten</first><last>van Schijndel</last></author>
+      <author id="marten-van-schijndel"><first>Marten</first><last>van Schijndel</last></author>
       <pages>1979–1990</pages>
       <abstract>A standard approach to evaluating language models analyzes how models assign probabilities to valid versus invalid syntactic constructions (i.e. is a grammatical sentence more probable than an ungrammatical sentence). Our work uses ambiguous relative clause attachment to extend such evaluations to cases of multiple simultaneous valid interpretations, where stark grammaticality differences are absent. We compare model performance in English and Spanish to show that non-linguistic biases in RNN LMs advantageously overlap with syntactic structure in English but not Spanish. Thus, English models may appear to acquire human-like syntactic preferences, while models trained on Spanish fail to acquire comparable human-like preferences. We conclude by relating these results to broader concerns about the relationship between comprehension (i.e. typical language model use cases) and production (which generates the training data for language models), suggesting that necessary linguistic biases are not present in the training signal at all.</abstract>
       <url hash="8eae95ed">2020.acl-main.179</url>
@@ -2505,7 +2505,7 @@
     <paper id="185">
       <title>Negative Training for Neural Dialogue Response Generation</title>
       <author><first>Tianxing</first><last>He</last></author>
-      <author><first>James</first><last>Glass</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
       <pages>2044–2058</pages>
       <abstract>Although deep learning models have brought tremendous advancements to the field of open-domain dialogue response generation, recent research results have revealed that the trained models have undesirable generation behaviors, such as malicious responses and generic (boring) responses. In this work, we propose a framework named “Negative Training” to minimize such behaviors. Given a trained model, the framework will first find generated samples that exhibit the undesirable behavior, and then use them to feed negative training signals for fine-tuning the model. Our experiments show that negative training can significantly reduce the hit rate of malicious responses, or discourage frequent responses and improve response diversity.</abstract>
       <url hash="715c6401">2020.acl-main.185</url>
@@ -2539,7 +2539,7 @@
     </paper>
     <paper id="188">
       <title>Calibrating Structured Output Predictors for Natural Language Processing</title>
-      <author><first>Abhyuday</first><last>Jagannatha</last></author>
+      <author id="abhyuday-jagannatha"><first>Abhyuday</first><last>Jagannatha</last></author>
       <author><first>Hong</first><last>Yu</last></author>
       <pages>2078–2092</pages>
       <abstract>We address the problem of calibrating prediction confidence for output entities of interest in natural language processing (NLP) applications. It is important that NLP applications such as named entity recognition and question answering produce calibrated confidence scores for their predictions, especially if the applications are to be deployed in a safety-critical domain such as healthcare. However the output space of such structured prediction models are often too large to directly adapt binary or multi-class calibration methods. In this study, we propose a general calibration scheme for output entities of interest in neural network based structured prediction models. Our proposed method can be used with any binary class calibration scheme and a neural network model. Additionally, we show that our calibration method can also be used as an uncertainty-aware, entity-specific decoding step to improve the performance of the underlying model at no additional training cost or data requirements. We show that our method outperforms current calibration techniques for Named Entity Recognition, Part-of-speech tagging and Question Answering systems. We also observe an improvement in model performance from our decoding step across several tasks and benchmark datasets. Our method improves the calibration and model performance on out-of-domain test scenarios as well.</abstract>
@@ -2552,7 +2552,7 @@
       <title>Active Imitation Learning with Noisy Guidance</title>
       <author><first>Kianté</first><last>Brantley</last></author>
       <author><first>Amr</first><last>Sharaf</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <pages>2093–2105</pages>
       <abstract>Imitation learning algorithms provide state-of-the-art results on many structured prediction tasks by learning near-optimal search policies. Such algorithms assume training-time access to an expert that can provide the optimal action at any queried state; unfortunately, the number of such queries is often prohibitive, frequently rendering these approaches impractical. To combat this query complexity, we consider an active learning setting in which the learning algorithm has additional access to a much cheaper noisy heuristic that provides noisy guidance. Our algorithm, LEAQI, learns a difference classifier that predicts when the expert is likely to disagree with the heuristic, and queries the expert only when necessary. We apply LEAQI to three sequence labelling tasks, demonstrating significantly fewer queries to the expert and comparable (or better) accuracies over a passive approach.</abstract>
       <url hash="badfe3f8">2020.acl-main.189</url>
@@ -2577,7 +2577,7 @@
       <title><fixed-case>GAN</fixed-case>-<fixed-case>BERT</fixed-case>: Generative Adversarial Learning for Robust Text Classification with a Bunch of Labeled Examples</title>
       <author><first>Danilo</first><last>Croce</last></author>
       <author><first>Giuseppe</first><last>Castellucci</last></author>
-      <author><first>Roberto</first><last>Basili</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
       <pages>2114–2119</pages>
       <abstract>Recent Transformer-based architectures, e.g., BERT, provide impressive results in many Natural Language Processing tasks. However, most of the adopted benchmarks are made of (sometimes hundreds of) thousands of examples. In many real scenarios, obtaining high- quality annotated data is expensive and time consuming; in contrast, unlabeled examples characterizing the target task can be, in general, easily collected. One promising method to enable semi-supervised learning has been proposed in image processing, based on Semi- Supervised Generative Adversarial Networks. In this paper, we propose GAN-BERT that ex- tends the fine-tuning of BERT-like architectures with unlabeled data in a generative adversarial setting. Experimental results show that the requirement for annotated examples can be drastically reduced (up to only 50-100 annotated examples), still obtaining good performances in several sentence classification tasks.</abstract>
       <url hash="140699c1">2020.acl-main.191</url>
@@ -2602,7 +2602,7 @@
       <title>Learning to Contextually Aggregate Multi-Source Supervision for Sequence Labeling</title>
       <author><first>Ouyu</first><last>Lan</last></author>
       <author><first>Xiao</first><last>Huang</last></author>
-      <author><first>Bill Yuchen</first><last>Lin</last></author>
+      <author id="bill-yuchen-lin"><first>Bill Yuchen</first><last>Lin</last></author>
       <author><first>He</first><last>Jiang</last></author>
       <author><first>Liyuan</first><last>Liu</last></author>
       <author><first>Xiang</first><last>Ren</last></author>
@@ -2685,7 +2685,7 @@
       <author><first>Sarthak</first><last>Dash</last></author>
       <author><first>Md. Faisal Mahbub</first><last>Chowdhury</last></author>
       <author><first>Nandana</first><last>Mihindukulasooriya</last></author>
-      <author><first>Alfio</first><last>Gliozzo</last></author>
+      <author id="alfio-gliozzo"><first>Alfio</first><last>Gliozzo</last></author>
       <pages>2198–2208</pages>
       <abstract>Extracting lexico-semantic relations as graph-structured taxonomies, also known as taxonomy construction, has been beneficial in a variety of NLP applications. Recently Graph Neural Network (GNN) has shown to be powerful in successfully tackling many tasks. However, there has been no attempt to exploit GNN to create taxonomies. In this paper, we propose Graph2Taxo, a GNN-based cross-domain transfer framework for the taxonomy construction task. Our main contribution is to learn the latent features of taxonomy construction from existing domains to guide the structure learning of an unseen domain. We also propose a novel method of directed acyclic graph (DAG) generation for taxonomy construction. Specifically, our proposed Graph2Taxo uses a noisy graph constructed from automatically extracted noisy hyponym hypernym candidate pairs, and a set of taxonomies for some known domains for training. The learned model is then used to generate taxonomy for a new unknown domain given a set of terms for that domain. Experiments on benchmark datasets from science and environment domains show that our approach attains significant improvements correspondingly over the state of the art.</abstract>
       <url hash="8f244ae7">2020.acl-main.199</url>
@@ -2709,7 +2709,7 @@
       <title>Why Overfitting Isn’t Always Bad: Retrofitting Cross-Lingual Word Embeddings to Dictionaries</title>
       <author><first>Mozhi</first><last>Zhang</last></author>
       <author><first>Yoshinari</first><last>Fujinuma</last></author>
-      <author><first>Michael J.</first><last>Paul</last></author>
+      <author id="michael-paul"><first>Michael J.</first><last>Paul</last></author>
       <author><first>Jordan</first><last>Boyd-Graber</last></author>
       <pages>2214–2220</pages>
       <abstract>Cross-lingual word embeddings (CLWE) are often evaluated on bilingual lexicon induction (BLI). Recent CLWE methods use linear projections, which underfit the training dictionary, to generalize on BLI. However, underfitting can hinder generalization to other downstream tasks that rely on words from the training dictionary. We address this limitation by retrofitting CLWE to the training dictionary, which pulls training translation pairs closer in the embedding space and overfits the training dictionary. This simple post-processing step often improves accuracy on two downstream tasks, despite lowering BLI test accuracy. We also retrofit to both the training dictionary and a synthetic dictionary induced from CLWE, which sometimes generalizes even better on downstream tasks. Our results confirm the importance of fully exploiting training dictionary in downstream tasks and explains why BLI is a flawed CLWE evaluation.</abstract>
@@ -2759,8 +2759,8 @@
       <title>Efficient Strategies for Hierarchical Text Classification: External Knowledge and Auxiliary Tasks</title>
       <author><first>Kervy</first><last>Rivas Rojas</last></author>
       <author><first>Gina</first><last>Bustamante</last></author>
-      <author><first>Arturo</first><last>Oncevay</last></author>
-      <author><first>Marco Antonio</first><last>Sobrevilla Cabezudo</last></author>
+      <author id="arturo-oncevay"><first>Arturo</first><last>Oncevay</last></author>
+      <author id="marco-antonio-sobrevilla-cabezudo"><first>Marco Antonio</first><last>Sobrevilla Cabezudo</last></author>
       <pages>2252–2257</pages>
       <abstract>In hierarchical text classification, we perform a sequence of inference steps to predict the category of a document from top to bottom of a given class taxonomy. Most of the studies have focused on developing novels neural network architectures to deal with the hierarchical structure, but we prefer to look for efficient ways to strengthen a baseline model. We first define the task as a sequence-to-sequence problem. Afterwards, we propose an auxiliary synthetic task of bottom-up-classification. Then, from external dictionaries, we retrieve textual definitions for the classes of all the hierarchy’s layers, and map them into the word vector space. We use the class-definition embeddings as an additional input to condition the prediction of the next layer and in an adapted beam search. Whereas the modified search did not provide large gains, the combination of the auxiliary task and the additional input of class-definitions significantly enhance the classification accuracy. With our efficient approaches, we outperform previous studies, using a drastically reduced number of parameters, in two well-known English datasets.</abstract>
       <url hash="745d97e0">2020.acl-main.205</url>
@@ -2787,7 +2787,7 @@
       <author><first>Sergey</first><last>Feldman</last></author>
       <author><first>Iz</first><last>Beltagy</last></author>
       <author><first>Doug</first><last>Downey</last></author>
-      <author><first>Daniel</first><last>Weld</last></author>
+      <author id="daniel-s-weld"><first>Daniel</first><last>Weld</last></author>
       <pages>2270–2282</pages>
       <abstract>Representation learning is a critical ingredient for natural language processing systems. Recent Transformer language models like BERT learn powerful textual representations, but these models are targeted towards token- and sentence-level training objectives and do not leverage information on inter-document relatedness, which limits their document-level representation power. For applications on scientific documents, such as classification and recommendation, accurate embeddings of documents are a necessity. We propose SPECTER, a new method to generate document-level embedding of scientific papers based on pretraining a Transformer language model on a powerful signal of document-level relatedness: the citation graph. Unlike existing pretrained language models, Specter can be easily applied to downstream applications without task-specific fine-tuning. Additionally, to encourage further research on document-level models, we introduce SciDocs, a new evaluation benchmark consisting of seven document-level tasks ranging from citation prediction, to document classification and recommendation. We show that Specter outperforms a variety of competitive baselines on the benchmark.</abstract>
       <url hash="35f7b17e">2020.acl-main.207</url>
@@ -2842,7 +2842,7 @@
       <author><first>Jie</first><last>Fu</last></author>
       <author><first>Marc-Alexandre</first><last>Côté</last></author>
       <author><first>Yi</first><last>Tay</last></author>
-      <author><first>Chris</first><last>Pal</last></author>
+      <author id="christopher-pal"><first>Chris</first><last>Pal</last></author>
       <author><first>Adam</first><last>Trischler</last></author>
       <pages>2325–2338</pages>
       <abstract>Existing machine reading comprehension (MRC) models do not scale effectively to real-world applications like web-level information retrieval and question answering (QA). We argue that this stems from the nature of MRC datasets: most of these are static environments wherein the supporting documents and all necessary information are fully observed. In this paper, we propose a simple method that reframes existing MRC datasets as interactive, partially observable environments. Specifically, we “occlude” the majority of a document’s text and add context-sensitive commands that reveal “glimpses” of the hidden text to a model. We repurpose SQuAD and NewsQA as an initial case study, and then show how the interactive corpora can be used to train a model that seeks relevant information through sequential decision making. We believe that this setting can contribute in scaling models to web-level QA scenarios.</abstract>
@@ -2854,7 +2854,7 @@
     <paper id="212">
       <title>Syntactic Data Augmentation Increases Robustness to Inference Heuristics</title>
       <author><first>Junghyun</first><last>Min</last></author>
-      <author><first>R. Thomas</first><last>McCoy</last></author>
+      <author id="r-thomas-mccoy"><first>R. Thomas</first><last>McCoy</last></author>
       <author><first>Dipanjan</first><last>Das</last></author>
       <author><first>Emily</first><last>Pitler</last></author>
       <author><first>Tal</first><last>Linzen</last></author>
@@ -2868,7 +2868,7 @@
     <paper id="213">
       <title>Improved Speech Representations with Multi-Target Autoregressive Predictive Coding</title>
       <author><first>Yu-An</first><last>Chung</last></author>
-      <author><first>James</first><last>Glass</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
       <pages>2353–2358</pages>
       <abstract>Training objectives based on predictive coding have recently been shown to be very effective at learning meaningful representations from unlabeled speech. One example is Autoregressive Predictive Coding (Chung et al., 2019), which trains an autoregressive RNN to generate an unseen future frame given a context such as recent past frames. The basic hypothesis of these approaches is that hidden states that can accurately predict future frames are a useful representation for many downstream tasks. In this paper we extend this hypothesis and aim to enrich the information encoded in the hidden states by training the model to make more accurate future predictions. We propose an auxiliary objective that serves as a regularization to improve generalization of the future frame prediction task. Experimental results on phonetic classification, speech recognition, and speech translation not only support the hypothesis, but also demonstrate the effectiveness of our approach in learning representations that contain richer phonetic content.</abstract>
       <url hash="20b33a5f">2020.acl-main.213</url>
@@ -2902,7 +2902,7 @@
       <author><first>Lorenzo</first><last>Belgrano</last></author>
       <author><first>Nicolai</first><last>Jacobsen</last></author>
       <author><first>Regitze</first><last>Sdun</last></author>
-      <author><first>Željko</first><last>Agić</last></author>
+      <author id="zeljko-agic"><first>Željko</first><last>Agić</last></author>
       <pages>2370–2380</pages>
       <abstract>We address a challenging and practical task of labeling questions in speech in real time during telephone calls to emergency medical services in English, which embeds within a broader decision support system for emergency call-takers. We propose a novel multimodal approach to real-time sequence labeling in speech. Our model treats speech and its own textual representation as two separate modalities or views, as it jointly learns from streamed audio and its noisy transcription into text via automatic speech recognition. Our results show significant gains of jointly learning from the two modalities when compared to text or audio only, under adverse noise and limited volume of training data. The results generalize to medical symptoms detection where we observe a similar pattern of improvements with multimodal learning.</abstract>
       <url hash="a77ccef2">2020.acl-main.215</url>
@@ -2926,7 +2926,7 @@
     <paper id="217">
       <title>Phone Features Improve Speech Translation</title>
       <author><first>Elizabeth</first><last>Salesky</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <pages>2388–2397</pages>
       <abstract>End-to-end models for speech translation (ST) more tightly couple speech recognition (ASR) and machine translation (MT) than a traditional cascade of separate ASR and MT models, with simpler model architectures and the potential for reduced error propagation. Their performance is often assumed to be superior, though in many conditions this is not yet the case. We compare cascaded and end-to-end models across high, medium, and low-resource conditions, and show that cascades remain stronger baselines. Further, we introduce two methods to incorporate phone features into ST models. We show that these features improve both architectures, closing the gap between end-to-end models and cascades, and outperforming previous academic work – by up to 9 BLEU on our low-resource setting.</abstract>
       <url hash="dc79ec0d">2020.acl-main.217</url>
@@ -3002,7 +3002,7 @@
     </paper>
     <paper id="223">
       <title>Automatic Poetry Generation from Prosaic Text</title>
-      <author><first>Tim</first><last>Van de Cruys</last></author>
+      <author id="tim-van-de-cruys"><first>Tim</first><last>Van de Cruys</last></author>
       <pages>2471–2480</pages>
       <abstract>In the last few years, a number of successful approaches have emerged that are able to adequately model various aspects of natural language. In particular, language models based on neural networks have improved the state of the art with regard to predictive language modeling, while topic models are successful at capturing clear-cut, semantic dimensions. In this paper, we will explore how these approaches can be adapted and combined to model the linguistic and literary aspects needed for poetry generation. The system is exclusively trained on standard, non-poetic text, and its output is constrained in order to confer a poetic character to the generated verse. The framework is applied to the generation of poems in both English and French, and is equally evaluated for both languages. Even though it only uses standard, non-poetic text as input, the system yields state of the art results for poetry generation.</abstract>
       <url hash="47945088">2020.acl-main.223</url>
@@ -3013,7 +3013,7 @@
     <paper id="224">
       <title>Bridging the Structural Gap Between Encoding and Decoding for Data-To-Text Generation</title>
       <author><first>Chao</first><last>Zhao</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <author><first>Snigdha</first><last>Chaturvedi</last></author>
       <pages>2481–2491</pages>
       <abstract>Generating sequential natural language descriptions from graph-structured data (e.g., knowledge graph) is challenging, partly because of the structural differences between the input graph and the output text. Hence, popular sequence-to-sequence models, which require serialized input, are not a natural fit for this task. Graph neural networks, on the other hand, can better encode the input graph but broaden the structural gap between the encoder and decoder, making faithful generation difficult. To narrow this gap, we propose DualEnc, a dual encoding model that can not only incorporate the graph structure, but can also cater to the linear structure of the output text. Empirical comparisons with strong single-encoder baselines demonstrate that dual encoding can significantly improve the quality of the generated text.</abstract>
@@ -3068,7 +3068,7 @@
       <title>Simple and Effective Retrieve-Edit-Rerank Text Generation</title>
       <author><first>Nabil</first><last>Hossain</last></author>
       <author><first>Marjan</first><last>Ghazvininejad</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>2532–2538</pages>
       <abstract>Retrieve-and-edit seq2seq methods typically retrieve an output from the training set and learn a model to edit it to produce the final output. We propose to extend this framework with a simple and effective post-generation ranking approach. Our framework (i) retrieves several potentially relevant outputs for each input, (ii) edits each candidate independently, and (iii) re-ranks the edited candidates to select the final output. We use a standard editing model with simple task-specific re-ranking approaches, and we show empirically that this approach outperforms existing, significantly more complex methodologies. Experiments on two machine translation (MT) datasets show new state-of-art results. We also achieve near state-of-art performance on the Gigaword summarization dataset, where our analyses show that there is significant room for performance improvement with better candidate output selection in future work.</abstract>
       <url hash="192aca94">2020.acl-main.228</url>
@@ -3100,7 +3100,7 @@
       <author><first>Spencer</first><last>Whitehead</last></author>
       <author><first>Di</first><last>Lu</last></author>
       <author><first>Heng</first><last>Ji</last></author>
-      <author><first>Shih-Fu</first><last>Chang</last></author>
+      <author id="shih-fu-chang"><first>Shih-Fu</first><last>Chang</last></author>
       <pages>2557–2568</pages>
       <abstract>We introduce a new task, MultiMedia Event Extraction, which aims to extract events and their arguments from multimedia documents. We develop the first benchmark and collect a dataset of 245 multimedia news articles with extensively annotated events and arguments. We propose a novel method, Weakly Aligned Structured Embedding (WASE), that encodes structured representations of semantic information from textual and visual data into a common embedding space. The structures are aligned across modalities by employing a weakly supervised training strategy, which enables exploiting available resources without explicit cross-media annotation. Compared to uni-modal state-of-the-art methods, our approach achieves 4.0% and 9.8% absolute F-score gains on text event argument role labeling and visual event extraction. Compared to state-of-the-art multimedia unstructured representations, we achieve 8.3% and 5.0% absolute F-score gains on multimedia event extraction and argument role labeling, respectively. By utilizing images, we extract 21.4% more event mentions than traditional text-only methods.</abstract>
       <url hash="5dd1b872">2020.acl-main.230</url>
@@ -3112,8 +3112,8 @@
       <title>Learning to Segment Actions from Observation and Narration</title>
       <author><first>Daniel</first><last>Fried</last></author>
       <author><first>Jean-Baptiste</first><last>Alayrac</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <author><first>Stephen</first><last>Clark</last></author>
       <author><first>Aida</first><last>Nematzadeh</last></author>
       <pages>2569–2588</pages>
@@ -3141,7 +3141,7 @@
       <author><first>Liwei</first><last>Wang</last></author>
       <author><first>Yelong</first><last>Shen</last></author>
       <author><first>Dong</first><last>Yu</last></author>
-      <author><first>Tamara</first><last>Berg</last></author>
+      <author id="tamara-berg"><first>Tamara</first><last>Berg</last></author>
       <author><first>Mohit</first><last>Bansal</last></author>
       <pages>2603–2614</pages>
       <abstract>Generating multi-sentence descriptions for videos is one of the most challenging captioning tasks due to its high requirements for not only visual relevance but also discourse-based coherence across the sentences in the paragraph. Towards this goal, we propose a new approach called Memory-Augmented Recurrent Transformer (MART), which uses a memory module to augment the transformer architecture. The memory module generates a highly summarized memory state from the video segments and the sentence history so as to help better prediction of the next sentence (w.r.t. coreference and repetition aspects), thus encouraging coherent paragraph generation. Extensive experiments, human evaluations, and qualitative analyses on two popular datasets ActivityNet Captions and YouCookII show that MART generates more coherent and less repetitive paragraph captions than baseline methods, while maintaining relevance to the input video events.</abstract>
@@ -3154,7 +3154,7 @@
       <title>What is Learned in Visually Grounded Neural Syntax Acquisition</title>
       <author><first>Noriyuki</first><last>Kojima</last></author>
       <author><first>Hadar</first><last>Averbuch-Elor</last></author>
-      <author><first>Alexander</first><last>Rush</last></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last></author>
       <author><first>Yoav</first><last>Artzi</last></author>
       <pages>2615–2635</pages>
       <abstract>Visual features are a promising signal for learning bootstrap textual models. However, blackbox learning models make it difficult to isolate the specific contribution of visual components. In this analysis, we consider the case study of the Visually Grounded Neural Syntax Learner (Shi et al., 2019), a recent approach for learning syntax from a visual training signal. By constructing simplified versions of the model, we isolate the core factors that yield the model’s strong performance. Contrary to what the model might be capable of learning, we find significantly less expressive versions produce similar predictions and perform just as well, or even better. We also find that a simple lexical signal of noun concreteness plays the main role in the model’s predictions as opposed to more complex syntactic reasoning.</abstract>
@@ -3195,7 +3195,7 @@
       <title>Interactive Classification by Asking Informative Questions</title>
       <author><first>Lili</first><last>Yu</last></author>
       <author><first>Howard</first><last>Chen</last></author>
-      <author><first>Sida I.</first><last>Wang</last></author>
+      <author id="sida-i-wang"><first>Sida I.</first><last>Wang</last></author>
       <author><first>Tao</first><last>Lei</last></author>
       <author><first>Yoav</first><last>Artzi</last></author>
       <pages>2664–2680</pages>
@@ -3232,7 +3232,7 @@
       <title>Masked Language Model Scoring</title>
       <author><first>Julian</first><last>Salazar</last></author>
       <author><first>Davis</first><last>Liang</last></author>
-      <author><first>Toan Q.</first><last>Nguyen</last></author>
+      <author id="toan-q-nguyen"><first>Toan Q.</first><last>Nguyen</last></author>
       <author><first>Katrin</first><last>Kirchhoff</last></author>
       <pages>2699–2712</pages>
       <abstract>Pretrained masked language models (MLMs) require finetuning for most NLP tasks. Instead, we evaluate MLMs out of the box via their pseudo-log-likelihood scores (PLLs), which are computed by masking tokens one by one. We show that PLLs outperform scores from autoregressive language models like GPT-2 in a variety of tasks. By rescoring ASR and NMT hypotheses, RoBERTa reduces an end-to-end LibriSpeech model’s WER by 30% relative and adds up to +1.7 BLEU on state-of-the-art baselines for low-resource translation pairs, with further gains from domain adaptation. We attribute this success to PLL’s unsupervised expression of linguistic acceptability without a left-to-right bias, greatly improving on scores from GPT-2 (+10 points on island effects, NPI licensing in BLiMP). One can finetune MLMs to give scores without masking, enabling computation in a single inference pass. In all, PLLs and their associated pseudo-perplexities (PPPLs) enable plug-and-play use of the growing number of pretrained MLMs; e.g., we use a single cross-lingual model to rescore translations in multiple languages. We release our library for language model scoring at <url>https://github.com/awslabs/mlm-scoring</url>.</abstract>
@@ -3245,7 +3245,7 @@
     <paper id="241">
       <title>Orthogonal Relation Transforms with Graph Context Modeling for Knowledge Graph Embedding</title>
       <author><first>Yun</first><last>Tang</last></author>
-      <author id="jing-huang"><first>Jing</first><last>Huang</last></author>
+      <author><first>Jing</first><last>Huang</last></author>
       <author><first>Guangtao</first><last>Wang</last></author>
       <author><first>Xiaodong</first><last>He</last></author>
       <author><first>Bowen</first><last>Zhou</last></author>
@@ -3273,7 +3273,7 @@
     <paper id="243">
       <title>Posterior Control of Blackbox Generation</title>
       <author><first>Xiang Lisa</first><last>Li</last></author>
-      <author><first>Alexander</first><last>Rush</last></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last></author>
       <pages>2731–2743</pages>
       <abstract>Text generation often requires high-precision output that obeys task-specific rules. This fine-grained control is difficult to enforce with off-the-shelf deep learning models. In this work, we consider augmenting neural generation models with discrete control states learned through a structured latent-variable approach. Under this formulation, task-specific knowledge can be encoded through a range of rich, posterior constraints that are effectively trained into the model. This approach allows users to ground internal model decisions based on prior knowledge, without sacrificing the representational power of neural generative models. Experiments consider applications of this approach for text generation. We find that this method improves over standard benchmarks, while also providing fine-grained control.</abstract>
       <url hash="b1550543">2020.acl-main.243</url>
@@ -3326,14 +3326,14 @@
     </paper>
     <paper id="247">
       <title>Span Selection Pre-training for Question Answering</title>
-      <author><first>Michael</first><last>Glass</last></author>
-      <author><first>Alfio</first><last>Gliozzo</last></author>
+      <author id="michael-glass"><first>Michael</first><last>Glass</last></author>
+      <author id="alfio-gliozzo"><first>Alfio</first><last>Gliozzo</last></author>
       <author><first>Rishav</first><last>Chakravarti</last></author>
       <author><first>Anthony</first><last>Ferritto</last></author>
       <author><first>Lin</first><last>Pan</last></author>
       <author><first>G P Shrivatsa</first><last>Bhargav</last></author>
       <author><first>Dinesh</first><last>Garg</last></author>
-      <author><first>Avi</first><last>Sil</last></author>
+      <author id="avirup-sil"><first>Avi</first><last>Sil</last></author>
       <pages>2773–2782</pages>
       <abstract>BERT (Bidirectional Encoder Representations from Transformers) and related pre-trained Transformers have provided large gains across many language understanding tasks, achieving a new state-of-the-art (SOTA). BERT is pretrained on two auxiliary tasks: Masked Language Model and Next Sentence Prediction. In this paper we introduce a new pre-training task inspired by reading comprehension to better align the pre-training from memorization to understanding. Span Selection PreTraining (SSPT) poses cloze-like training instances, but rather than draw the answer from the model’s parameters, it is selected from a relevant passage. We find significant and consistent improvements over both BERT-BASE and BERT-LARGE on multiple Machine Reading Comprehension (MRC) datasets. Specifically, our proposed model has strong empirical evidence as it obtains SOTA results on Natural Questions, a new benchmark MRC dataset, outperforming BERT-LARGE by 3 F1 points on short answer prediction. We also show significant impact in HotpotQA, improving answer prediction F1 by 4 points and supporting fact prediction F1 by 1 point and outperforming the previous best system. Moreover, we show that our pre-training approach is particularly effective when training data is limited, improving the learning curve by a large amount.</abstract>
       <url hash="fadd11e8">2020.acl-main.247</url>
@@ -3345,7 +3345,7 @@
       <title>Topological Sort for Sentence Ordering</title>
       <author><first>Shrimai</first><last>Prabhumoye</last></author>
       <author><first>Ruslan</first><last>Salakhutdinov</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <pages>2783–2792</pages>
       <abstract>Sentence ordering is the task of arranging the sentences of a given text in the correct order. Recent work using deep neural networks for this task has framed it as a sequence prediction problem. In this paper, we propose a new framing of this task as a constraint solving problem and introduce a new technique to solve it. Additionally, we propose a human evaluation for this task. The results on both automatic and human metrics across four different datasets show that this new technique is better at capturing coherence in documents.</abstract>
       <url hash="a2fceefc">2020.acl-main.248</url>
@@ -3395,7 +3395,7 @@
       <author><first>Ankur</first><last>Bapna</last></author>
       <author><first>Yuan</first><last>Cao</last></author>
       <author><first>Orhan</first><last>Firat</last></author>
-      <author><first>Mia</first><last>Chen</last></author>
+      <author id="mia-xu-chen"><first>Mia</first><last>Chen</last></author>
       <author><first>Sneha</first><last>Kudugunta</last></author>
       <author><first>Naveen</first><last>Arivazhagan</last></author>
       <author><first>Yonghui</first><last>Wu</last></author>
@@ -3450,7 +3450,7 @@
       <title><fixed-case>G</fixed-case>lyph2<fixed-case>V</fixed-case>ec: Learning <fixed-case>C</fixed-case>hinese Out-of-Vocabulary Word Embedding from Glyphs</title>
       <author><first>Hong-You</first><last>Chen</last></author>
       <author><first>Sz-Han</first><last>Yu</last></author>
-      <author><first>Shou-de</first><last>Lin</last></author>
+      <author id="shou-de-lin"><first>Shou-de</first><last>Lin</last></author>
       <pages>2865–2871</pages>
       <abstract>Chinese NLP applications that rely on large text often contain huge amounts of vocabulary which are sparse in corpus. We show that characters’ written form, <i>Glyphs</i>, in ideographic languages could carry rich semantics. We present a multi-modal model, <i>Glyph2Vec</i>, to tackle Chinese out-of-vocabulary word embedding problem. Glyph2Vec extracts visual features from word glyphs to expand current word embedding space for out-of-vocabulary word embedding, without the need of accessing any corpus, which is useful for improving Chinese NLP systems, especially for low-resource scenarios. Experiments across different applications show the significant effectiveness of our model.</abstract>
       <url hash="c6491f1c">2020.acl-main.256</url>
@@ -3477,7 +3477,7 @@
       <author><first>Anna</first><last>Hätty</last></author>
       <author><first>Dominik</first><last>Schlechtweg</last></author>
       <author><first>Michael</first><last>Dorna</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>2883–2889</pages>
       <abstract>While automatic term extraction is a well-researched area, computational approaches to distinguish between degrees of technicality are still understudied. We semi-automatically create a German gold standard of technicality across four domains, and illustrate the impact of a web-crawled general-language corpus on technicality prediction. When defining a classification approach that combines general-language and domain-specific word embeddings, we go beyond previous work and align vector spaces to gain comparative embeddings. We suggest two novel models to exploit general- vs. domain-specific comparisons: a simple neural network model with pre-computed comparative-embedding information as input, and a multi-channel model computing the comparison internally. Both models outperform previous approaches, with the multi-channel model performing best.</abstract>
       <url hash="c431e41f">2020.acl-main.258</url>
@@ -3516,7 +3516,7 @@
       <title>Give Me Convenience and Give Her Death: Who Should Decide What Uses of <fixed-case>NLP</fixed-case> are Appropriate, and on What Basis?</title>
       <author><first>Kobi</first><last>Leins</last></author>
       <author><first>Jey Han</first><last>Lau</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>2908–2913</pages>
       <abstract>As part of growing NLP capabilities, coupled with an awareness of the ethical dimensions of research, questions have been raised about whether particular datasets and tasks should be deemed off-limits for NLP research. We examine this question with respect to a paper on automatic legal sentencing from EMNLP 2019 which was a source of some debate, in asking whether the paper should have been allowed to be published, who should have been charged with making such a decision, and on what basis. We focus in particular on the role of data statements in ethically assessing research, but also discuss the topic of dual use, and examine the outcomes of similar debates in other scientific disciplines.</abstract>
       <url hash="6fcc1a05">2020.acl-main.261</url>
@@ -3537,7 +3537,7 @@
     <paper id="263">
       <title>It’s Morphin’ Time! <fixed-case>C</fixed-case>ombating Linguistic Discrimination with Inflectional Perturbations</title>
       <author><first>Samson</first><last>Tan</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <author><first>Min-Yen</first><last>Kan</last></author>
       <author><first>Richard</first><last>Socher</last></author>
       <pages>2920–2935</pages>
@@ -3584,7 +3584,7 @@
     <paper id="266">
       <title>A Probabilistic Generative Model for Typographical Analysis of Early Modern Printing</title>
       <author><first>Kartik</first><last>Goyal</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <author><first>Christopher</first><last>Warren</last></author>
       <author><first>Maxwell</first><last>G’Sell</last></author>
       <author><first>Taylor</first><last>Berg-Kirkpatrick</last></author>
@@ -3612,7 +3612,7 @@
     <paper id="268">
       <title>Estimating the influence of auxiliary tasks for multi-task learning of sequence tagging tasks</title>
       <author><first>Fynn</first><last>Schröder</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>2971–2985</pages>
       <abstract>Multi-task learning (MTL) and transfer learning (TL) are techniques to overcome the issue of data scarcity when training state-of-the-art neural networks. However, finding beneficial auxiliary datasets for MTL or TL is a time- and resource-consuming trial-and-error approach. We propose new methods to automatically assess the similarity of sequence tagging datasets to identify beneficial auxiliary data for MTL or TL setups. Our methods can compute the similarity between any two sequence tagging datasets, they do not need to be annotated with the same tagset or multiple labels in parallel. Additionally, our methods take tokens and their labels into account, which is more robust than only using either of them as an information source, as conducted in prior work. We empirically show that our similarity measures correlate with the change in test score of neural networks that use the auxiliary dataset for MTL to increase the main task performance. We provide an efficient, open-source implementation.</abstract>
       <url hash="a9333644">2020.acl-main.268</url>
@@ -3638,7 +3638,7 @@
     <paper id="270">
       <title>Improving Transformer Models by Reordering their Sublayers</title>
       <author><first>Ofir</first><last>Press</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <author><first>Omer</first><last>Levy</last></author>
       <pages>2996–3005</pages>
       <abstract>Multilayer transformer networks consist of interleaved self-attention and feedforward sublayers. Could ordering the sublayers in a different pattern lead to better performance? We generate randomly ordered transformers and train them with the language modeling objective. We observe that some of these models are able to achieve better performance than the interleaved baseline, and that those successful variants tend to have more self-attention at the bottom and more feedforward sublayers at the top. We propose a new transformer pattern that adheres to this property, the sandwich transformer, and show that it improves perplexity on multiple word-level and character-level language modeling benchmarks, at no cost in parameters, memory, or training time. However, the sandwich reordering pattern does not guarantee performance gains across every task, as we demonstrate on machine translation models. Instead, we suggest that further exploration of task-specific sublayer reorderings is needed in order to unlock additional gains.</abstract>
@@ -3709,7 +3709,7 @@
     <paper id="275">
       <title>Dynamic Programming Encoding for Subword Segmentation in Neural Machine Translation</title>
       <author><first>Xuanli</first><last>He</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <author><first>Mohammad</first><last>Norouzi</last></author>
       <pages>3042–3051</pages>
       <abstract>This paper introduces Dynamic Programming Encoding (DPE), a new segmentation algorithm for tokenizing sentences into subword units. We view the subword segmentation of output sentences as a latent variable that should be marginalized out for learning and inference. A mixed character-subword transformer is proposed, which enables exact log marginal likelihood estimation and exact MAP inference to find target segmentations with maximum posterior probability. DPE uses a lightweight mixed character-subword transformer as a means of pre-processing parallel data to segment output sentences using dynamic programming. Empirical results on machine translation suggest that DPE is effective for segmenting output sentences and can be combined with BPE dropout for stochastic segmentation of source sentences. DPE achieves an average improvement of 0.9 BLEU over BPE (Sennrich et al., 2016) and an average improvement of 0.55 BLEU over BPE dropout (Provilkov et al., 2019) on several WMT datasets including English &lt;=&gt; (German, Romanian, Estonian, Finnish, Hungarian).</abstract>
@@ -3749,7 +3749,7 @@
       <author><first>Shuo</first><last>Wang</last></author>
       <author><first>Zhaopeng</first><last>Tu</last></author>
       <author><first>Shuming</first><last>Shi</last></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <pages>3070–3079</pages>
       <abstract>Confidence calibration, which aims to make model predictions equal to the true correctness measures, is important for neural machine translation (NMT) because it is able to offer useful indicators of translation errors in the generated output. While prior studies have shown that NMT models trained with label smoothing are well-calibrated on the ground-truth training data, we find that miscalibration still remains a severe challenge for NMT during inference due to the discrepancy between training and inference. By carefully designing experiments on three language pairs, our work provides in-depth analyses of the correlation between calibration and translation performance as well as linguistic properties of miscalibration and reports a number of interesting findings that might help humans better analyze, understand and improve NMT models. Based on these observations, we further propose a new graduated label smoothing method that can improve both inference calibration and translation performance.</abstract>
       <url hash="237d85e9">2020.acl-main.278</url>
@@ -3761,7 +3761,7 @@
       <title>Camouflaged <fixed-case>C</fixed-case>hinese Spam Content Detection with Semi-supervised Generative Active Learning</title>
       <author><first>Zhuoren</first><last>Jiang</last></author>
       <author><first>Zhe</first><last>Gao</last></author>
-      <author><first>Yu</first><last>Duan</last></author>
+      <author id="yuguang-duan"><first>Yu</first><last>Duan</last></author>
       <author><first>Yangyang</first><last>Kang</last></author>
       <author><first>Changlong</first><last>Sun</last></author>
       <author><first>Qiong</first><last>Zhang</last></author>
@@ -3792,7 +3792,7 @@
       <title>Hiring Now: A Skill-Aware Multi-Attention Model for Job Posting Generation</title>
       <author><first>Liting</first><last>Liu</last></author>
       <author><first>Jie</first><last>Liu</last></author>
-      <author id="wenzheng-zhang"><first>Wenzheng</first><last>Zhang</last></author>
+      <author><first>Wenzheng</first><last>Zhang</last></author>
       <author><first>Ziming</first><last>Chi</last></author>
       <author><first>Wenxuan</first><last>Shi</last></author>
       <author><first>Yalou</first><last>Huang</last></author>
@@ -3881,14 +3881,14 @@
       <title><fixed-case>A</fixed-case>nalyzing the <fixed-case>P</fixed-case>ersuasive <fixed-case>E</fixed-case>ffect of <fixed-case>S</fixed-case>tyle in <fixed-case>N</fixed-case>ews <fixed-case>E</fixed-case>ditorial <fixed-case>A</fixed-case>rgumentation</title>
       <author><first>Roxanne</first><last>El Baff</last></author>
       <author><first>Henning</first><last>Wachsmuth</last></author>
-      <author><first>Khalid</first><last>Al Khatib</last></author>
+      <author id="khalid-al-khatib"><first>Khalid</first><last>Al Khatib</last></author>
       <author><first>Benno</first><last>Stein</last></author>
       <pages>3154–3160</pages>
       <abstract>News editorials argue about political issues in order to challenge or reinforce the stance of readers with different ideologies. Previous research has investigated such persuasive effects for argumentative content. In contrast, this paper studies how important the style of news editorials is to achieve persuasion. To this end, we first compare content- and style-oriented classifiers on editorials from the liberal NYTimes with ideology-specific effect annotations. We find that conservative readers are resistant to NYTimes style, but on liberals, style even has more impact than content. Focusing on liberals, we then cluster the leads, bodies, and endings of editorials, in order to learn about writing style patterns of effective argumentation.</abstract>
       <url hash="40e371f2">2020.acl-main.287</url>
       <attachment type="Software" hash="bced880b">2020.acl-main.287.Software.zip</attachment>
-      <doi>10.18653/v1/2020.acl-main.287</doi>
       <attachment type="Dataset" hash="11af06d9">2020.acl-main.287.Dataset.pdf</attachment>
+      <doi>10.18653/v1/2020.acl-main.287</doi>
       <video href="http://slideslive.com/38929171"/>
       <bibkey>el-baff-etal-2020-analyzing</bibkey>
     </paper>
@@ -3948,7 +3948,7 @@
       <author><first>Devamanyu</first><last>Hazarika</last></author>
       <author><first>Abhinaba</first><last>Roy</last></author>
       <author><first>Navonil</first><last>Majumder</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <author><first>Soujanya</first><last>Poria</last></author>
       <pages>3198–3210</pages>
       <abstract>Cross-domain sentiment analysis has received significant attention in recent years, prompted by the need to combat the domain gap between different applications that make use of sentiment analysis. In this paper, we take a novel perspective on this task by exploring the role of external commonsense knowledge. We introduce a new framework, KinGDOM, which utilizes the ConceptNet knowledge graph to enrich the semantics of a document by providing both domain-specific and domain-general background concepts. These concepts are learned by training a graph convolutional autoencoder that leverages inter-domain concepts in a domain-invariant manner. Conditioning a popular domain-adversarial baseline method with these learned concepts helps improve its performance over state-of-the-art approaches, demonstrating the efficacy of our proposed framework.</abstract>
@@ -4042,7 +4042,7 @@
       <title>A Span-based Linearization for Constituent Trees</title>
       <author><first>Yang</first><last>Wei</last></author>
       <author><first>Yuanbin</first><last>Wu</last></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <pages>3267–3277</pages>
       <abstract>We propose a novel linearization of a constituent tree, together with a new locally normalized model. For each split point in a sentence, our model computes the normalizer on all spans ending with that split point, and then predicts a tree span from them. Compared with global models, our model is fast and parallelizable. Different from previous local models, our linearization method is tied on the spans directly and considers more local features when performing span prediction, which is more interpretable and effective. Experiments on PTB (95.8 F1) and CTB (92.4 F1) show that our model significantly outperforms existing local models and efficiently achieves competitive results with global models.</abstract>
       <url hash="b903b3e3">2020.acl-main.299</url>
@@ -4068,7 +4068,7 @@
       <title>Efficient Constituency Parsing by Pointing</title>
       <author><first>Thanh-Tung</first><last>Nguyen</last></author>
       <author><first>Xuan-Phi</first><last>Nguyen</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <author><first>Xiaoli</first><last>Li</last></author>
       <pages>3284–3294</pages>
       <abstract>We propose a novel constituency parsing model that casts the parsing problem into a series of pointing tasks. Specifically, our model estimates the likelihood of a span being a legitimate tree constituent via the pointing score corresponding to the boundary words of the span. Our parsing model supports efficient top-down decoding and our learning objective is able to enforce structural consistency without resorting to the expensive CKY inference. The experiments on the standard English Penn Treebank parsing task show that our method achieves 92.78 F1 without using pre-trained models, which is higher than all the existing methods with similar time complexity. Using pre-trained BERT, our model achieves 95.48 F1, which is competitive with the state-of-the-art while being faster. Our approach also establishes new state-of-the-art in Basque and Swedish in the SPMRL shared tasks on multilingual constituency parsing.</abstract>
@@ -4093,7 +4093,7 @@
       <title>Representations of Syntax <fixed-case>[MASK]</fixed-case> Useful: <fixed-case>E</fixed-case>ffects of Constituency and Dependency Structure in Recursive <fixed-case>LSTM</fixed-case>s</title>
       <author><first>Michael</first><last>Lepori</last></author>
       <author><first>Tal</first><last>Linzen</last></author>
-      <author><first>R. Thomas</first><last>McCoy</last></author>
+      <author id="r-thomas-mccoy"><first>R. Thomas</first><last>McCoy</last></author>
       <pages>3306–3316</pages>
       <abstract>Sequence-based neural networks show significant sensitivity to syntactic structure, but they still perform less well on syntactic tasks than tree-based networks. Such tree-based networks can be provided with a constituency parse, a dependency parse, or both. We evaluate which of these two representational schemes more effectively introduces biases for syntactic structure that increase performance on the subject-verb agreement prediction task. We find that a constituency-based network generalizes more robustly than a dependency-based one, and that combining the two types of structure does not yield further improvement. Finally, we show that the syntactic robustness of sequential models can be substantially improved by fine-tuning on a small amount of constructed data, suggesting that data augmentation is a viable alternative to explicit constituency structure for imparting the syntactic biases that sequential models are lacking.</abstract>
       <url hash="0a1b5e66">2020.acl-main.303</url>
@@ -4122,7 +4122,7 @@
       <author><first>Jing</first><last>Li</last></author>
       <author><first>Lu</first><last>Wang</last></author>
       <author><first>Zhiming</first><last>Mao</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <pages>3331–3341</pages>
       <abstract>Trending topics in social media content evolve over time, and it is therefore crucial to understand social media users and their interpersonal communications in a dynamic manner. Here we study dynamic online conversation recommendation, to help users engage in conversations that satisfy their evolving interests. While most prior work assumes static user interests, our model is able to capture the temporal aspects of user interests, and further handle future conversations that are unseen during training time. Concretely, we propose a neural architecture to exploit changes of user interactions and interests over time, to predict which discussions they are likely to enter. We conduct experiments on large-scale collections of Reddit conversations, and results on three subreddits show that our model significantly outperforms state-of-the-art models that make a static assumption of user interests. We further evaluate on handling “cold start”, and observe consistently better performance by our model when considering various degrees of sparsity of user’s chatting history and conversation contexts. Lastly, analyses on our model outputs indicate user interest change, explaining the advantage and efficacy of our approach.</abstract>
       <url hash="100b6924">2020.acl-main.305</url>
@@ -4146,7 +4146,7 @@
     <paper id="307">
       <title>Stock Embeddings Acquired from News Articles and Price History, and an Application to Portfolio Optimization</title>
       <author><first>Xin</first><last>Du</last></author>
-      <author><first>Kumiko</first><last>Tanaka-Ishii</last></author>
+      <author id="kumiko-tanaka-ishii"><first>Kumiko</first><last>Tanaka-Ishii</last></author>
       <pages>3353–3363</pages>
       <abstract>Previous works that integrated news articles to better process stock prices used a variety of neural networks to predict price movements. The textual and price information were both encoded in the neural network, and it is therefore difficult to apply this approach in situations other than the original framework of the notoriously hard problem of price prediction. In contrast, this paper presents a method to encode the influence of news articles through a vector representation of stocks called a <i>stock embedding</i>. The stock embedding is acquired with a deep learning framework using both news articles and price history. Because the embedding takes the operational form of a vector, it is applicable to other financial problems besides price prediction. As one example application, we show the results of portfolio optimization using Reuters &amp; Bloomberg headlines, producing a capital gain 2.8 times larger than that obtained with a baseline method using only stock price data. This suggests that the proposed stock embedding can leverage textual financial semantics to solve financial prediction problems.</abstract>
       <url hash="2a45ced6">2020.acl-main.307</url>
@@ -4162,8 +4162,8 @@
       <author><first>Haewoon</first><last>Kwak</last></author>
       <author><first>Yoan</first><last>Dinkov</last></author>
       <author><first>Ahmed</first><last>Ali</last></author>
-      <author><first>James</first><last>Glass</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>3364–3374</pages>
       <abstract>Predicting the political bias and the factuality of reporting of entire news outlets are critical elements of media profiling, which is an understudied but an increasingly important research direction. The present level of proliferation of fake, biased, and propagandistic content online has made it impossible to fact-check every single suspicious claim, either manually or automatically. Thus, it has been proposed to profile entire news outlets and to look for those that are likely to publish fake or biased content. This makes it possible to detect likely “fake news” the moment they are published, by simply checking the reliability of their source. From a practical perspective, political bias and factuality of reporting have a linguistic aspect but also a social context. Here, we study the impact of both, namely (i) what was written (i.e., what was published by the target medium, and how it describes itself in Twitter) vs. (ii) who reads it (i.e., analyzing the target medium’s audience on social media). We further study (iii) what was written about the target medium (in Wikipedia). The evaluation results show that what was written matters most, and we further show that putting all information sources together yields huge improvements over the current state-of-the-art.</abstract>
       <url hash="d42ce860">2020.acl-main.308</url>
@@ -4198,7 +4198,7 @@
     <paper id="311">
       <title>Roles and Utilization of Attention Heads in Transformer-based Neural Language Models</title>
       <author><first>Jae-young</first><last>Jo</last></author>
-      <author><first>Sung-Hyon</first><last>Myaeng</last></author>
+      <author id="sung-hyon-myaeng"><first>Sung-Hyon</first><last>Myaeng</last></author>
       <pages>3404–3417</pages>
       <abstract>Sentence encoders based on the transformer architecture have shown promising results on various natural language tasks. The main impetus lies in the pre-trained neural language models that capture long-range dependencies among words, owing to multi-head attention that is unique in the architecture. However, little is known for how linguistic properties are processed, represented, and utilized for downstream tasks among hundreds of attention heads inside the pre-trained transformer-based model. For the initial goal of examining the roles of attention heads in handling a set of linguistic features, we conducted a set of experiments with ten probing tasks and three downstream tasks on four pre-trained transformer families (GPT, GPT2, BERT, and ELECTRA). Meaningful insights are shown through the lens of heat map visualization and utilized to propose a relatively simple sentence representation method that takes advantage of most influential attention heads, resulting in additional performance improvements on the downstream tasks.</abstract>
       <url hash="ca6955c7">2020.acl-main.311</url>
@@ -4297,14 +4297,14 @@
       <author><first>Wei</first><last>Zou</last></author>
       <author><first>Shujian</first><last>Huang</last></author>
       <author><first>Jun</first><last>Xie</last></author>
-      <author><first>Xinyu</first><last>Dai</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
+      <author id="xinyu-dai"><first>Xinyu</first><last>Dai</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
       <pages>3486–3497</pages>
       <abstract>Neural machine translation systems tend to fail on less decent inputs despite its significant efficacy, which may significantly harm the credibility of these systems—fathoming how and when neural-based systems fail in such cases is critical for industrial maintenance. Instead of collecting and analyzing bad cases using limited handcrafted error features, here we investigate this issue by generating adversarial examples via a new paradigm based on reinforcement learning. Our paradigm could expose pitfalls for a given performance metric, e.g., BLEU, and could target any given neural machine translation architecture. We conduct experiments of adversarial attacks on two mainstream neural machine translation architectures, RNN-search, and Transformer. The results show that our method efficiently produces stable attacks with meaning-preserving adversarial examples. We also present a qualitative and quantitative analysis for the preference pattern of the attack, demonstrating its capability of pitfall exposure.</abstract>
       <url hash="61c3189a">2020.acl-main.319</url>
       <attachment type="Software" hash="ce483f73">2020.acl-main.319.Software.zip</attachment>
-      <doi>10.18653/v1/2020.acl-main.319</doi>
       <attachment type="Dataset" hash="79345d52">2020.acl-main.319.Dataset.pdf</attachment>
+      <doi>10.18653/v1/2020.acl-main.319</doi>
       <video href="http://slideslive.com/38929000"/>
       <bibkey>zou-etal-2020-reinforced</bibkey>
     </paper>
@@ -4354,8 +4354,8 @@
     <paper id="323">
       <title>Dynamically Adjusting Transformer Batch Size by Monitoring Gradient Direction Change</title>
       <author><first>Hongfei</first><last>Xu</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Qiuhui</first><last>Liu</last></author>
       <pages>3519–3524</pages>
       <abstract>The choice of hyper-parameters affects the performance of neural models. While much previous research (Sutskever et al., 2013; Duchi et al., 2011; Kingma and Ba, 2015) focuses on accelerating convergence and reducing the effects of the learning rate, comparatively few papers concentrate on the effect of batch size. In this paper, we analyze how increasing batch size affects gradient direction, and propose to evaluate the stability of gradients with their angle change. Based on our observations, the angle change of gradient direction first tends to stabilize (i.e. gradually decrease) while accumulating mini-batches, and then starts to fluctuate. We propose to automatically and dynamically determine batch sizes by accumulating gradients of mini-batches and performing an optimization step at just the time when the direction of gradients starts to fluctuate. To improve the efficiency of our approach for large models, we propose a sampling approach to select gradients of parameters sensitive to the batch size. Our approach dynamically determines proper and efficient batch sizes during training. In our experiments on the WMT 14 English to German and English to French tasks, our approach improves the Transformer with a fixed 25k batch size by +0.73 and +0.82 BLEU respectively.</abstract>
@@ -4370,8 +4370,8 @@
       <author><first>Rui</first><last>Wang</last></author>
       <author><first>Kehai</first><last>Chen</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <pages>3525–3535</pages>
       <abstract>Unsupervised neural machine translation (UNMT) has recently achieved remarkable results for several language pairs. However, it can only translate between a single language pair and cannot produce translation results for multiple language pairs at the same time. That is, research on multilingual UNMT has been limited. In this paper, we empirically introduce a simple method to translate between thirteen languages using a single encoder and a single decoder, making use of multilingual data to improve UNMT for all language pairs. On the basis of the empirical findings, we propose two knowledge distillation methods to further enhance multilingual UNMT performance. Our experiments on a dataset with English translated to and from twelve other languages (including three language families and six language branches) show remarkable results, surpassing strong unsupervised individual baselines while achieving promising performance between non-English language pairs in zero-shot translation scenarios and alleviating poor performance in low-resource language pairs.</abstract>
       <url hash="251ce400">2020.acl-main.324</url>
@@ -4417,7 +4417,7 @@
     <paper id="328">
       <title><fixed-case>C</fixed-case>hart<fixed-case>D</fixed-case>ialogs: <fixed-case>P</fixed-case>lotting from <fixed-case>N</fixed-case>atural <fixed-case>L</fixed-case>anguage <fixed-case>I</fixed-case>nstructions</title>
       <author><first>Yutong</first><last>Shao</last></author>
-      <author><first>Ndapa</first><last>Nakashole</last></author>
+      <author id="ndapandula-nakashole"><first>Ndapa</first><last>Nakashole</last></author>
       <pages>3559–3574</pages>
       <abstract>This paper presents the problem of conversational plotting agents that carry out plotting actions from natural language instructions. To facilitate the development of such agents, we introduce ChartDialogs, a new multi-turn dialog dataset, covering a popular plotting library, matplotlib. The dataset contains over 15,000 dialog turns from 3,200 dialogs covering the majority of matplotlib plot types. Extensive experiments show the best-performing method achieving 61% plotting accuracy, demonstrating that the dataset presents a non-trivial challenge for future research on this task.</abstract>
       <url hash="86c8d671">2020.acl-main.328</url>
@@ -4428,7 +4428,7 @@
     <paper id="329">
       <title><fixed-case>GLUEC</fixed-case>o<fixed-case>S</fixed-case>: An Evaluation Benchmark for Code-Switched <fixed-case>NLP</fixed-case></title>
       <author><first>Simran</first><last>Khanuja</last></author>
-      <author><first>Sandipan</first><last>Dandapat</last></author>
+      <author id="sandipan-dandapat"><first>Sandipan</first><last>Dandapat</last></author>
       <author><first>Anirudh</first><last>Srinivasan</last></author>
       <author><first>Sunayana</first><last>Sitaram</last></author>
       <author><first>Monojit</first><last>Choudhury</last></author>
@@ -4478,7 +4478,7 @@
       <author><first>Shaden</first><last>Shaar</last></author>
       <author><first>Nikolay</first><last>Babulkov</last></author>
       <author><first>Giovanni</first><last>Da San Martino</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>3607–3618</pages>
       <abstract>The recent proliferation of ”fake news” has triggered a number of responses, most notably the emergence of several manual fact-checking initiatives. As a result and over time, a large number of fact-checked claims have been accumulated, which increases the likelihood that a new claim in social media or a new statement by a politician might have already been fact-checked by some trusted fact-checking organization, as viral claims often come back after a while in social media, and politicians like to repeat their favorite statements, true or false, over and over again. As manual fact-checking is very time-consuming (and fully automatic fact-checking has credibility issues), it is important to try to save this effort and to avoid wasting time on claims that have already been fact-checked. Interestingly, despite the importance of the task, it has been largely ignored by the research community so far. Here, we aim to bridge this gap. In particular, we formulate the task and we discuss how it relates to, but also differs from, previous work. We further create a specialized dataset, which we release to the research community. Finally, we present learning-to-rank experiments that demonstrate sizable improvements over state-of-the-art retrieval and textual similarity approaches.</abstract>
       <url hash="5b0957da">2020.acl-main.332</url>
@@ -4557,7 +4557,7 @@
       <author><first>Shoushan</first><last>Li</last></author>
       <author><first>Luo</first><last>Si</last></author>
       <author><first>Min</first><last>Zhang</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>3667–3677</pages>
       <abstract>In the literature, existing studies always consider Aspect Sentiment Classification (ASC) as an independent sentence-level classification problem aspect by aspect, which largely ignore the document-level sentiment preference information, though obviously such information is crucial for alleviating the information deficiency problem in ASC. In this paper, we explore two kinds of sentiment preference information inside a document, i.e., contextual sentiment consistency w.r.t. the same aspect (namely intra-aspect sentiment consistency) and contextual sentiment tendency w.r.t. all the related aspects (namely inter-aspect sentiment tendency). On the basis, we propose a Cooperative Graph Attention Networks (CoGAN) approach for cooperatively learning the aspect-related sentence representation. Specifically, two graph attention networks are leveraged to model above two kinds of document-level sentiment preference information respectively, followed by an interactive mechanism to integrate the two-fold preference. Detailed evaluation demonstrates the great advantage of the proposed approach to ASC over the state-of-the-art baselines. This justifies the importance of the document-level sentiment preference information to ASC and the effectiveness of our approach capturing such information.</abstract>
       <url hash="201d384d">2020.acl-main.338</url>
@@ -4571,7 +4571,7 @@
       <author><first>Yu</first><last>Hong</last></author>
       <author><first>Bowei</first><last>Zou</last></author>
       <author><first>Meng</first><last>Cheng</last></author>
-      <author><first>Jianmin</first><last>Yao</last></author>
+      <author id="jianmin-yao"><first>Jianmin</first><last>Yao</last></author>
       <pages>3678–3684</pages>
       <abstract>The current aspect extraction methods suffer from boundary errors. In general, these errors lead to a relatively minor difference between the extracted aspects and the ground-truth. However, they hurt the performance severely. In this paper, we propose to utilize a pointer network for repositioning the boundaries. Recycling mechanism is used, which enables the training data to be collected without manual intervention. We conduct the experiments on the benchmark datasets SE14 of laptop and SE14-16 of restaurant. Experimental results show that our method achieves substantial improvements over the baseline, and outperforms state-of-the-art methods.</abstract>
       <url hash="7ea92669">2020.acl-main.339</url>
@@ -4684,7 +4684,7 @@
     </paper>
     <paper id="348">
       <title>Meta-Transfer Learning for Code-Switched Speech Recognition</title>
-      <author><first>Genta Indra</first><last>Winata</last></author>
+      <author id="genta-indra-winata"><first>Genta Indra</first><last>Winata</last></author>
       <author><first>Samuel</first><last>Cahyawijaya</last></author>
       <author><first>Zhaojiang</first><last>Lin</last></author>
       <author><first>Zihan</first><last>Liu</last></author>
@@ -4714,7 +4714,7 @@
       <author><first>Yi</first><last>Ren</last></author>
       <author><first>Jinglin</first><last>Liu</last></author>
       <author><first>Xu</first><last>Tan</last></author>
-      <author id="chen-zhang"><first>Chen</first><last>Zhang</last></author>
+      <author><first>Chen</first><last>Zhang</last></author>
       <author><first>Tao</first><last>Qin</last></author>
       <author><first>Zhou</first><last>Zhao</last></author>
       <author><first>Tie-Yan</first><last>Liu</last></author>
@@ -4730,7 +4730,7 @@
       <author><first>Karan</first><last>Singla</last></author>
       <author><first>Zhuohao</first><last>Chen</last></author>
       <author><first>David</first><last>Atkins</last></author>
-      <author><first>Shrikanth</first><last>Narayanan</last></author>
+      <author id="shrikanth-narayanan"><first>Shrikanth</first><last>Narayanan</last></author>
       <pages>3797–3803</pages>
       <abstract>Spoken language understanding tasks usually rely on pipelines involving complex processing blocks such as voice activity detection, speaker diarization and Automatic speech recognition (ASR). We propose a novel framework for predicting utterance level labels directly from speech features, thus removing the dependency on first generating transcripts, and transcription free behavioral coding. Our classifier uses a pretrained Speech-2-Vector encoder as bottleneck to generate word-level representations from speech features. This pretrained encoder learns to encode speech features for a word using an objective similar to Word2Vec. Our proposed approach just uses speech features and word segmentation information for predicting spoken utterance-level target labels. We show that our model achieves competitive results to other state-of-the-art approaches which use transcribed text for the task of predicting psychotherapy-relevant behavior codes.</abstract>
       <url hash="ce5c6dd4">2020.acl-main.351</url>
@@ -4770,7 +4770,7 @@
       <author><first>Inkit</first><last>Padhi</last></author>
       <author><first>Pierre</first><last>Dognin</last></author>
       <author><first>Ke</first><last>Bai</last></author>
-      <author><first>Cícero</first><last>Nogueira dos Santos</last></author>
+      <author id="cicero-dos-santos"><first>Cícero</first><last>Nogueira dos Santos</last></author>
       <author><first>Vijil</first><last>Chenthamarakshan</last></author>
       <author><first>Youssef</first><last>Mroueh</last></author>
       <author><first>Payel</first><last>Das</last></author>
@@ -4852,7 +4852,7 @@
       <title>Successfully Applying the Stabilized Lottery Ticket Hypothesis to the Transformer Architecture</title>
       <author><first>Christopher</first><last>Brix</last></author>
       <author><first>Parnia</first><last>Bahar</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>3909–3915</pages>
       <abstract>Sparse models require less memory for storage and enable a faster inference by reducing the necessary number of FLOPs. This is relevant both for time-critical and on-device computations using neural networks. The stabilized lottery ticket hypothesis states that networks can be pruned after none or few training iterations, using a mask computed based on the unpruned converged model. On the transformer architecture and the WMT 2014 English-to-German and English-to-French tasks, we show that stabilized lottery ticket pruning performs similar to magnitude pruning for sparsity levels of up to 85%, and propose a new combination of pruning techniques that outperforms all other techniques for even higher levels of sparsity. Furthermore, we confirm that the parameter’s initial sign and not its specific value is the primary factor for successful training, and show that magnitude pruning cannot be used to find winning lottery tickets.</abstract>
       <url hash="8b0b4b55">2020.acl-main.360</url>
@@ -4893,7 +4893,7 @@
     </paper>
     <paper id="363">
       <title>An Effectiveness Metric for Ordinal Classification: Formal Properties and Experimental Results</title>
-      <author><first>Enrique</first><last>Amigo</last></author>
+      <author id="enrique-amigo"><first>Enrique</first><last>Amigo</last></author>
       <author><first>Julio</first><last>Gonzalo</last></author>
       <author><first>Stefano</first><last>Mizzaro</last></author>
       <author><first>Jorge</first><last>Carrillo-de-Albornoz</last></author>
@@ -4920,7 +4920,7 @@
       <title>Analysing Lexical Semantic Change with Contextualised Word Representations</title>
       <author><first>Mario</first><last>Giulianelli</last></author>
       <author><first>Marco</first><last>Del Tredici</last></author>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <pages>3960–3973</pages>
       <abstract>This paper presents the first unsupervised approach to lexical semantic change that makes use of contextualised word representations. We propose a novel method that exploits the BERT neural language model to obtain representations of word usages, clusters these representations into usage types, and measures change along time with three proposed metrics. We create a new evaluation dataset and show that the model representations and the detected semantic shifts are positively correlated with human judgements. Our extensive qualitative analysis demonstrates that our method captures a variety of synchronic and diachronic linguistic phenomena. We expect our work to inspire further research in this direction.</abstract>
       <url hash="8bec37f3">2020.acl-main.365</url>
@@ -4956,7 +4956,7 @@
     <paper id="368">
       <title><fixed-case>BERTRAM</fixed-case>: Improved Word Embeddings Have Big Impact on Contextualized Model Performance</title>
       <author><first>Timo</first><last>Schick</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>3996–4007</pages>
       <abstract>Pretraining deep language models has led to large performance gains in NLP. Despite this success, Schick and Schütze (2020) recently showed that these models struggle to understand rare words. For static word embeddings, this problem has been addressed by separately learning representations for rare words. In this work, we transfer this idea to pretrained language models: We introduce BERTRAM, a powerful architecture based on BERT that is capable of inferring high-quality embeddings for rare words that are suitable as input representations for deep language models. This is achieved by enabling the surface form and contexts of a word to interact with each other in a deep architecture. Integrating BERTRAM into BERT leads to large performance increases due to improved representations of rare and medium frequency words on both a rare word probing task and three downstream tasks.</abstract>
       <url hash="2cdbd1a5">2020.acl-main.368</url>
@@ -5023,8 +5023,8 @@
     <paper id="373">
       <title>He said “who’s gonna take care of your children when you are at <fixed-case>ACL</fixed-case>?”: Reported Sexist Acts are Not Sexist</title>
       <author><first>Patricia</first><last>Chiril</last></author>
-      <author><first>Véronique</first><last>Moriceau</last></author>
-      <author><first>Farah</first><last>Benamara</last></author>
+      <author id="veronique-moriceau"><first>Véronique</first><last>Moriceau</last></author>
+      <author id="farah-benamara"><first>Farah</first><last>Benamara</last></author>
       <author><first>Alda</first><last>Mari</last></author>
       <author><first>Gloria</first><last>Origgi</last></author>
       <author><first>Marlène</first><last>Coulomb-Gully</last></author>
@@ -5081,7 +5081,7 @@
     <paper id="377">
       <title>Exact yet Efficient Graph Parsing, Bi-directional Locality and the Constructivist Hypothesis</title>
       <author><first>Yajie</first><last>Ye</last></author>
-      <author><first>Weiwei</first><last>Sun</last></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last></author>
       <pages>4100–4110</pages>
       <abstract>A key problem in processing graph-based meaning representations is graph parsing, i.e. computing all possible derivations of a given graph according to a (competence) grammar. We demonstrate, for the first time, that exact graph parsing can be efficient for large graphs and with large Hyperedge Replacement Grammars (HRGs). The advance is achieved by exploiting locality as terminal edge-adjacency in HRG rules. In particular, we highlight the importance of 1) a terminal edge-first parsing strategy, 2) a categorization of a subclass of HRG, i.e. what we call Weakly Regular Graph Grammar, and 3) distributing argument-structures to both lexical and phrasal rules.</abstract>
       <url hash="231b0280">2020.acl-main.377</url>
@@ -5093,7 +5093,7 @@
     <paper id="378">
       <title>Max-Margin Incremental <fixed-case>CCG</fixed-case> Parsing</title>
       <author><first>Miloš</first><last>Stanojević</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>4111–4122</pages>
       <abstract>Incremental syntactic parsing has been an active research area both for cognitive scientists trying to model human sentence processing and for NLP researchers attempting to combine incremental parsing with language modelling for ASR and MT. Most effort has been directed at designing the right transition mechanism, but less has been done to answer the question of what a probabilistic model for those transition parsers should look like. A very incremental transition mechanism of a recently proposed CCG parser when trained in straightforward locally normalised discriminative fashion produces very bad results on English CCGbank. We identify three biases as the causes of this problem: label bias, exposure bias and imbalanced probabilities bias. While known techniques for tackling these biases improve results, they still do not make the parser state of the art. Instead, we tackle all of these three biases at the same time using an improved version of beam search optimisation that minimises all beam search violations instead of minimising only the biggest violation. The new incremental parser gives better results than all previously published incremental CCG parsers, and outperforms even some widely used non-incremental CCG parsers.</abstract>
       <url hash="f64a3c2e">2020.acl-main.378</url>
@@ -5121,7 +5121,7 @@
       <author><first>Junqi</first><last>Zhang</last></author>
       <author><first>Kun</first><last>Bai</last></author>
       <author><first>Conghui</first><last>Zhu</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <pages>4134–4145</pages>
       <abstract>With the recent proliferation of the use of text classifications, researchers have found that there are certain unintended biases in text classification datasets. For example, texts containing some demographic identity-terms (e.g., “gay”, “black”) are more likely to be abusive in existing abusive language detection datasets. As a result, models trained with these datasets may consider sentences like “She makes me happy to be gay” as abusive simply because of the word “gay.” In this paper, we formalize the unintended biases in text classification datasets as a kind of selection bias from the non-discrimination distribution to the discrimination distribution. Based on this formalization, we further propose a model-agnostic debiasing training framework by recovering the non-discrimination distribution using instance weighting, which does not require any extra resources or annotations apart from a pre-defined set of demographic identity-terms. Experiments demonstrate that our method can effectively alleviate the impacts of the unintended biases without significantly hurting models’ generalization ability.</abstract>
       <url hash="461a1aa1">2020.acl-main.380</url>
@@ -5131,7 +5131,7 @@
     </paper>
     <paper id="381">
       <title>Analyzing analytical methods: The case of phonology in neural models of spoken language</title>
-      <author><first>Grzegorz</first><last>Chrupała</last></author>
+      <author id="grzegorz-chrupala"><first>Grzegorz</first><last>Chrupała</last></author>
       <author><first>Bertrand</first><last>Higy</last></author>
       <author><first>Afra</first><last>Alishahi</last></author>
       <pages>4146–4156</pages>
@@ -5147,7 +5147,7 @@
       <author><first>Brendan</first><last>Shillingford</last></author>
       <author><first>Pasquale</first><last>Minervini</last></author>
       <author><first>Thomas</first><last>Lukasiewicz</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
       <pages>4157–4165</pages>
       <abstract>To increase trust in artificial intelligence systems, a promising research direction consists of designing neural models capable of generating natural language explanations for their predictions. In this work, we show that such models are nonetheless prone to generating mutually inconsistent explanations, such as ”Because there is a dog in the image.” and ”Because there is no dog in the [same] image.”, exposing flaws in either the decision-making process of the model or in the generation of the explanations. We introduce a simple yet effective adversarial framework for sanity checking models against the generation of inconsistent natural language explanations. Moreover, as part of the framework, we address the problem of adversarial attacks with full target sequences, a scenario that was not previously addressed in sequence-to-sequence attacks. Finally, we apply our framework on a state-of-the-art neural natural language inference model that provides natural language explanations for its predictions. Our framework shows that this model is capable of generating a significant number of inconsistent explanations.</abstract>
       <url hash="ff118b3f">2020.acl-main.382</url>
@@ -5170,9 +5170,9 @@
     </paper>
     <paper id="384">
       <title>Probing for Referential Information in Language Models</title>
-      <author><first>Ionut-Teodor</first><last>Sorodoc</last></author>
+      <author id="ionut-sorodoc"><first>Ionut-Teodor</first><last>Sorodoc</last></author>
       <author><first>Kristina</first><last>Gulordava</last></author>
-      <author><first>Gemma</first><last>Boleda</last></author>
+      <author id="gemma-boleda"><first>Gemma</first><last>Boleda</last></author>
       <pages>4177–4189</pages>
       <abstract>Language models keep track of complex information about the preceding context – including, e.g., syntactic relations in a sentence. We investigate whether they also capture information beneficial for resolving pronominal anaphora in English. We analyze two state of the art models with LSTM and Transformer architectures, via probe tasks and analysis on a coreference annotated corpus. The Transformer outperforms the LSTM in all analyses. Our results suggest that language models are more successful at learning grammatical constraints than they are at learning truly referential information, in the sense of capturing the fact that we use language to refer to entities in the world. However, we find traces of the latter aspect, too.</abstract>
       <url hash="ea156a8e">2020.acl-main.384</url>
@@ -5207,9 +5207,9 @@
       <author><first>Akash Kumar</first><last>Mohankumar</last></author>
       <author><first>Preksha</first><last>Nema</last></author>
       <author><first>Sharan</first><last>Narasimhan</last></author>
-      <author><first>Mitesh M.</first><last>Khapra</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh M.</first><last>Khapra</last></author>
       <author><first>Balaji Vasan</first><last>Srinivasan</last></author>
-      <author><first>Balaraman</first><last>Ravindran</last></author>
+      <author id="balaraman-ravindran"><first>Balaraman</first><last>Ravindran</last></author>
       <pages>4206–4216</pages>
       <abstract>Recent studies on interpretability of attention distributions have led to notions of faithful and plausible explanations for a model’s predictions. Attention distributions can be considered a faithful explanation if a higher attention weight implies a greater impact on the model’s prediction. They can be considered a plausible explanation if they provide a human-understandable justification for the model’s predictions. In this work, we first explain why current attention mechanisms in LSTM based encoders can neither provide a faithful nor a plausible explanation of the model’s predictions. We observe that in LSTM based encoders the hidden representations at different time-steps are very similar to each other (high conicity) and attention weights in these situations do not carry much meaning because even a random permutation of the attention weights does not affect the model’s predictions. Based on experiments on a wide variety of tasks and datasets, we observe attention distributions often attribute the model’s predictions to unimportant words such as punctuation and fail to offer a plausible explanation for the predictions. To make attention mechanisms more faithful and plausible, we propose a modified LSTM cell with a diversity-driven training objective that ensures that the hidden representations learned at different time steps are diverse. We show that the resulting attention distributions offer more transparency as they (i) provide a more precise importance ranking of the hidden states (ii) are better indicative of words important for the model’s predictions (iii) correlate better with gradient-based attribution methods. Human evaluations indicate that the attention distributions learned by our model offer a plausible explanation of the model’s predictions. Our code has been made publicly available at <url>https://github.com/akashkm99/Interpretable-Attention</url></abstract>
       <url hash="e5d5d192">2020.acl-main.387</url>
@@ -5233,8 +5233,8 @@
     </paper>
     <paper id="389">
       <title>Modeling Word Formation in <fixed-case>E</fixed-case>nglish–<fixed-case>G</fixed-case>erman Neural Machine Translation</title>
-      <author><first>Marion</first><last>Weller-Di Marco</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="marion-weller-di-marco"><first>Marion</first><last>Weller-Di Marco</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>4227–4232</pages>
       <abstract>This paper studies strategies to model word formation in NMT using rich linguistic information, namely a word segmentation approach that goes beyond splitting into substrings by considering fusional morphology. Our linguistically sound segmentation is combined with a method for target-side inflection to accommodate modeling word formation. The best system variants employ source-side morphological analysis and model complex target-side words, improving over a standard system.</abstract>
       <url hash="d26a2931">2020.acl-main.389</url>
@@ -5290,7 +5290,7 @@
       <title>Identifying Principals and Accessories in a Complex Case based on the Comprehension of Fact Description</title>
       <author><first>Yakun</first><last>Hu</last></author>
       <author><first>Zhunchen</first><last>Luo</last></author>
-      <author><first>Wenhan</first><last>Chao</last></author>
+      <author id="wenhan-chao"><first>Wenhan</first><last>Chao</last></author>
       <pages>4265–4269</pages>
       <abstract>In this paper, we study the problem of identifying the principals and accessories from the fact description with multiple defendants in a criminal case. We treat the fact descriptions as narrative texts and the defendants as roles over the narrative story. We propose to model the defendants with <i>behavioral semantic information</i> and <i>statistical characteristics</i>, then learning the importances of defendants within a learning-to-rank framework. Experimental results on a real-world dataset demonstrate the behavior analysis can effectively model the defendants’ impacts in a complex case.</abstract>
       <url hash="7a25f1ef">2020.acl-main.393</url>
@@ -5327,7 +5327,7 @@
     <paper id="396">
       <title>Toxicity Detection: Does Context Really Matter?</title>
       <author><first>John</first><last>Pavlopoulos</last></author>
-      <author><first>Jeffrey</first><last>Sorensen</last></author>
+      <author id="jeffrey-sorensen"><first>Jeffrey</first><last>Sorensen</last></author>
       <author><first>Lucas</first><last>Dixon</last></author>
       <author><first>Nithum</first><last>Thain</last></author>
       <author><first>Ion</first><last>Androutsopoulos</last></author>
@@ -5342,7 +5342,7 @@
       <title><fixed-case>AMR</fixed-case> Parsing with Latent Structural Information</title>
       <author><first>Qiji</first><last>Zhou</last></author>
       <author><first>Yue</first><last>Zhang</last></author>
-      <author><first>Donghong</first><last>Ji</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
       <author><first>Hao</first><last>Tang</last></author>
       <pages>4306–4319</pages>
       <abstract>Abstract Meaning Representations (AMRs) capture sentence-level semantics structural representations to broad-coverage natural sentences. We investigate parsing AMR with explicit dependency structures and interpretable latent structures. We generate the latent soft structure without additional annotations, and fuse both dependency and latent structure via an extended graph neural networks. The fused structural information helps our experiments results to achieve the best reported results on both AMR 2.0 (77.5% Smatch F1 on LDC2017T10) and AMR 1.0 ((71.8% Smatch F1 on LDC2014T12).</abstract>
@@ -5355,7 +5355,7 @@
       <title><fixed-case>T</fixed-case>a<fixed-case>P</fixed-case>as: Weakly Supervised Table Parsing via Pre-training</title>
       <author><first>Jonathan</first><last>Herzig</last></author>
       <author><first>Pawel Krzysztof</first><last>Nowak</last></author>
-      <author><first>Thomas</first><last>Müller</last></author>
+      <author id="thomas-mueller"><first>Thomas</first><last>Müller</last></author>
       <author><first>Francesco</first><last>Piccinno</last></author>
       <author><first>Julian</first><last>Eisenschlos</last></author>
       <pages>4320–4333</pages>
@@ -5394,7 +5394,7 @@
       <author><first>Dushyant Singh</first><last>Chauhan</last></author>
       <author><first>Dhanush</first><last>S R</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>4351–4360</pages>
       <abstract>In this paper, we hypothesize that sarcasm is closely related to sentiment and emotion, and thereby propose a multi-task deep learning framework to solve all these three problems simultaneously in a multi-modal conversational scenario. We, at first, manually annotate the recently released multi-modal MUStARD sarcasm dataset with sentiment and emotion classes, both implicit and explicit. For multi-tasking, we propose two attention mechanisms, viz. Inter-segment Inter-modal Attention (Ie-Attention) and Intra-segment Inter-modal Attention (Ia-Attention). The main motivation of Ie-Attention is to learn the relationship between the different segments of the sentence across the modalities. In contrast, Ia-Attention focuses within the same segment of the sentence across the modalities. Finally, representations from both the attentions are concatenated and shared across the five classes (i.e., sarcasm, implicit sentiment, explicit sentiment, implicit emotion, explicit emotion) for multi-tasking. Experimental results on the extended version of the MUStARD dataset show the efficacy of our proposed approach for sarcasm detection over the existing state-of-the-art systems. The evaluation also shows that the proposed multi-task framework yields better performance for the primary task, i.e., sarcasm detection, with the help of two secondary tasks, emotion and sentiment analysis.</abstract>
       <url hash="b6fcf2e7">2020.acl-main.401</url>
@@ -5407,7 +5407,7 @@
       <author><first>Tulika</first><last>Saha</last></author>
       <author><first>Aditya</first><last>Patra</last></author>
       <author><first>Sriparna</first><last>Saha</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>4361–4372</pages>
       <abstract>The task of Dialogue Act Classification (DAC) that purports to capture communicative intent has been studied extensively. But these studies limit themselves to text. Non-verbal features (change of tone, facial expressions etc.) can provide cues to identify DAs, thus stressing the benefit of incorporating multi-modal inputs in the task. Also, the emotional state of the speaker has a substantial effect on the choice of the dialogue act, since conversations are often influenced by emotions. Hence, the effect of emotion too on automatic identification of DAs needs to be studied. In this work, we address the role of <i>both</i> multi-modality and emotion recognition (ER) in DAC. DAC and ER help each other by way of multi-task learning. One of the major contributions of this work is a new dataset- multimodal Emotion aware Dialogue Act dataset called EMOTyDA, collected from open-sourced dialogue datasets. To demonstrate the utility of EMOTyDA, we build an attention based (self, inter-modal, inter-task) multi-modal, multi-task Deep Neural Network (DNN) for joint learning of DAs and emotions. We show empirically that multi-modality and multi-tasking achieve better performance of DAC compared to uni-modal and single task DAC variants.</abstract>
       <url hash="f07854af">2020.acl-main.402</url>
@@ -5419,7 +5419,7 @@
       <title>Analyzing Political Parody in Social Media</title>
       <author><first>Antonis</first><last>Maronikolakis</last></author>
       <author><first>Danae</first><last>Sánchez Villegas</last></author>
-      <author><first>Daniel</first><last>Preotiuc-Pietro</last></author>
+      <author id="daniel-preotiuc-pietro"><first>Daniel</first><last>Preotiuc-Pietro</last></author>
       <author><first>Nikolaos</first><last>Aletras</last></author>
       <pages>4373–4384</pages>
       <abstract>Parody is a figurative device used to imitate an entity for comedic or critical purposes and represents a widespread phenomenon in social media through many popular parody accounts. In this paper, we present the first computational study of parody. We introduce a new publicly available data set of tweets from real politicians and their corresponding parody accounts. We run a battery of supervised machine learning models for automatically detecting parody tweets with an emphasis on robustness by testing on tweets from accounts unseen in training, across different genders and across countries. Our results show that political parody tweets can be predicted with an accuracy up to 90%. Finally, we identify the markers of parody through a linguistic analysis. Beyond research in linguistics and political communication, accurately and automatically detecting parody is important to improving fact checking for journalists and analytics such as sentiment analysis through filtering out parodical utterances.</abstract>
@@ -5431,7 +5431,7 @@
     <paper id="404">
       <title>Masking Actor Information Leads to Fairer Political Claims Detection</title>
       <author><first>Erenay</first><last>Dayanik</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <pages>4385–4391</pages>
       <abstract>A central concern in Computational Social Sciences (CSS) is fairness: where the role of NLP is to scale up text analysis to large corpora, the quality of automatic analyses should be as independent as possible of textual properties. We analyze the performance of a state-of-the-art neural model on the task of political claims detection (i.e., the identification of forward-looking statements made by political actors) and identify a strong frequency bias: claims made by frequent actors are recognized better. We propose two simple debiasing methods which mask proper names and pronouns during training of the model, thus removing personal information bias. We find that (a) these methods significantly decrease frequency bias while keeping the overall performance stable; and (b) the resulting models improve when evaluated in an out-of-domain setting.</abstract>
       <url hash="0a3dc858">2020.acl-main.404</url>
@@ -5470,7 +5470,7 @@
       <author><first>Eugene</first><last>Kharitonov</last></author>
       <author><first>Diane</first><last>Bouchacourt</last></author>
       <author><first>Emmanuel</first><last>Dupoux</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <pages>4427–4442</pages>
       <abstract>Natural language allows us to refer to novel composite concepts by combining expressions denoting their parts according to systematic rules, a property known as compositionality. In this paper, we study whether the language emerging in deep multi-agent simulations possesses a similar ability to refer to novel primitive combinations, and whether it accomplishes this feat by strategies akin to human-language compositionality. Equipped with new ways to measure compositionality in emergent languages inspired by disentanglement in representation learning, we establish three main results: First, given sufficiently large input spaces, the emergent language will naturally develop the ability to refer to novel composite concepts. Second, there is no correlation between the degree of compositionality of an emergent language and its ability to generalize. Third, while compositionality is not necessary for generalization, it provides an advantage in terms of language transmission: The more compositional a language is, the more easily it will be picked up by new learners, even when the latter differ in architecture from the original agents. We conclude that compositionality does not arise from simple generalization pressure, but if an emergent language does chance upon it, it will be more likely to survive and thrive.</abstract>
       <url hash="0bfe5d04">2020.acl-main.407</url>
@@ -5486,7 +5486,7 @@
       <author><first>Eric</first><last>Lehman</last></author>
       <author><first>Caiming</first><last>Xiong</last></author>
       <author><first>Richard</first><last>Socher</last></author>
-      <author><first>Byron C.</first><last>Wallace</last></author>
+      <author id="byron-c-wallace"><first>Byron C.</first><last>Wallace</last></author>
       <pages>4443–4458</pages>
       <abstract>State-of-the-art models in NLP are now predominantly based on deep neural networks that are opaque in terms of how they come to make predictions. This limitation has increased interest in designing more interpretable deep models for NLP that reveal the ‘reasoning’ behind model outputs. But work in this direction has been conducted on different datasets and tasks with correspondingly unique aims and metrics; this makes it difficult to track progress. We propose the <b>E</b>valuating <b>R</b>ationales <b>A</b>nd <b>S</b>imple <b>E</b>nglish <b>R</b>easoning (<b>ERASER</b> a benchmark to advance research on interpretable models in NLP. This benchmark comprises multiple datasets and tasks for which human annotations of “rationales” (supporting evidence) have been collected. We propose several metrics that aim to capture how well the rationales provided by models align with human rationales, and also how <i>faithful</i> these rationales are (i.e., the degree to which provided rationales influenced the corresponding predictions). Our hope is that releasing this benchmark facilitates progress on designing more interpretable NLP systems. The benchmark, code, and documentation are available at <url>https://www.eraserbenchmark.com/</url></abstract>
       <url hash="296cde2d">2020.acl-main.408</url>
@@ -5500,7 +5500,7 @@
       <author><first>Sarthak</first><last>Jain</last></author>
       <author><first>Sarah</first><last>Wiegreffe</last></author>
       <author><first>Yuval</first><last>Pinter</last></author>
-      <author><first>Byron C.</first><last>Wallace</last></author>
+      <author id="byron-c-wallace"><first>Byron C.</first><last>Wallace</last></author>
       <pages>4459–4473</pages>
       <abstract>In many settings it is important for one to be able to understand why a model made a particular prediction. In NLP this often entails extracting snippets of an input text ‘responsible for’ corresponding model output; when such a snippet comprises tokens that indeed informed the model’s prediction, it is a faithful explanation. In some settings, faithfulness may be critical to ensure transparency. Lei et al. (2016) proposed a model to produce faithful rationales for neural text classification by defining independent snippet extraction and prediction modules. However, the discrete selection over input tokens performed by this method complicates training, leading to high variance and requiring careful hyperparameter tuning. We propose a simpler variant of this approach that provides faithful explanations by construction. In our scheme, named FRESH, arbitrary feature importance scores (e.g., gradients from a trained model) are used to induce binary labels over token inputs, which an extractor can be trained to predict. An independent classifier module is then trained exclusively on snippets provided by the extractor; these snippets thus constitute faithful explanations, even if the classifier is arbitrarily complex. In both automatic and manual evaluations we find that variants of this simple framework yield predictive performance superior to ‘end-to-end’ approaches, while being more general and easier to train. Code is available at <url>https://github.com/successar/FRESH</url>.</abstract>
       <url hash="11a5d5f9">2020.acl-main.409</url>
@@ -5537,7 +5537,7 @@
       <title>Improving Multi-hop Question Answering over Knowledge Graphs using Knowledge Base Embeddings</title>
       <author><first>Apoorv</first><last>Saxena</last></author>
       <author><first>Aditay</first><last>Tripathi</last></author>
-      <author><first>Partha</first><last>Talukdar</last></author>
+      <author id="partha-talukdar"><first>Partha</first><last>Talukdar</last></author>
       <pages>4498–4507</pages>
       <abstract>Knowledge Graphs (KG) are multi-relational graphs consisting of entities as nodes and relations among them as typed edges. Goal of the Question Answering over KG (KGQA) task is to answer natural language queries posed over the KG. Multi-hop KGQA requires reasoning over multiple edges of the KG to arrive at the right answer. KGs are often incomplete with many missing links, posing additional challenges for KGQA, especially for multi-hop KGQA. Recent research on multi-hop KGQA has attempted to handle KG sparsity using relevant external text, which isn’t always readily available. In a separate line of research, KG embedding methods have been proposed to reduce KG sparsity by performing missing link prediction. Such KG embedding methods, even though highly relevant, have not been explored for multi-hop KGQA so far. We fill this gap in this paper and propose EmbedKGQA. EmbedKGQA is particularly effective in performing multi-hop KGQA over sparse KGs. EmbedKGQA also relaxes the requirement of answer selection from a pre-specified neighborhood, a sub-optimal constraint enforced by previous multi-hop KGQA methods. Through extensive experiments on multiple benchmark datasets, we demonstrate EmbedKGQA’s effectiveness over other state-of-the-art baselines.</abstract>
       <url hash="d73af03f">2020.acl-main.412</url>
@@ -5547,7 +5547,7 @@
     </paper>
     <paper id="413">
       <title>Template-Based Question Generation from Retrieved Sentences for Improved Unsupervised Question Answering</title>
-      <author><first>Alexander</first><last>Fabbri</last></author>
+      <author id="alexander-richard-fabbri"><first>Alexander</first><last>Fabbri</last></author>
       <author><first>Patrick</first><last>Ng</last></author>
       <author><first>Zhiguo</first><last>Wang</last></author>
       <author><first>Ramesh</first><last>Nallapati</last></author>
@@ -5578,8 +5578,8 @@
       <author><first>Tiago</first><last>Pimentel</last></author>
       <author><first>Matthew</first><last>Wiesner</last></author>
       <author><first>Ryan</first><last>Cotterell</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>4526–4546</pages>
       <abstract>A major hurdle in data-driven research on typology is having sufficient data in many languages to draw meaningful conclusions. We present VoxClamantis v1.0, the first large-scale corpus for phonetic typology, with aligned segments and estimated phoneme-level labels in 690 readings spanning 635 languages, along with acoustic-phonetic measures of vowels and sibilants. Access to such data can greatly facilitate investigation of phonetic typology at a large scale and across many languages. However, it is non-trivial and computationally intensive to obtain such alignments for hundreds of languages, many of which have few to no resources presently available. We describe the methodology to create our corpus, discuss caveats with current methods and their impact on the utility of this data, and illustrate possible research directions through a series of case studies on the 48 highest-quality readings. Our corpus and scripts are publicly available for non-commercial use at <url>https://voxclamantisproject.github.io</url>.</abstract>
       <url hash="db1df61e">2020.acl-main.415</url>
@@ -5590,7 +5590,7 @@
     <paper id="416">
       <title><fixed-case>D</fixed-case>scorer: A Fast Evaluation Metric for Discourse Representation Structure Parsing</title>
       <author><first>Jiangming</first><last>Liu</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <author><first>Mirella</first><last>Lapata</last></author>
       <pages>4547–4554</pages>
       <abstract>Discourse representation structures (DRSs) are scoped semantic representations for texts of arbitrary length. Evaluating the accuracy of predicted DRSs plays a key role in developing semantic parsers and improving their performance. DRSs are typically visualized as boxes which are not straightforward to process automatically. Counter transforms DRSs to clauses and measures clause overlap by searching for variable mappings between two DRSs. However, this metric is computationally costly (with respect to memory and CPU time) and does not scale with longer texts. We introduce Dscorer, an efficient new metric which converts box-style DRSs to graphs and then measures the overlap of n-grams. Experiments show that Dscorer computes accuracy scores that are correlated with Counter at a fraction of the time.</abstract>
@@ -5606,14 +5606,14 @@
       <author><first>Barry</first><last>Haddow</last></author>
       <author><first>Kenneth</first><last>Heafield</last></author>
       <author><first>Hieu</first><last>Hoang</last></author>
-      <author><first>Miquel</first><last>Esplà-Gomis</last></author>
-      <author><first>Mikel L.</first><last>Forcada</last></author>
+      <author id="miquel-espla-gomis"><first>Miquel</first><last>Esplà-Gomis</last></author>
+      <author id="mikel-l-forcada"><first>Mikel L.</first><last>Forcada</last></author>
       <author><first>Amir</first><last>Kamran</last></author>
       <author><first>Faheem</first><last>Kirefu</last></author>
       <author><first>Philipp</first><last>Koehn</last></author>
-      <author><first>Sergio</first><last>Ortiz Rojas</last></author>
+      <author id="sergio-ortiz-rojas"><first>Sergio</first><last>Ortiz Rojas</last></author>
       <author><first>Leopoldo</first><last>Pla Sempere</last></author>
-      <author><first>Gema</first><last>Ramírez-Sánchez</last></author>
+      <author id="gema-ramirez-sanchez"><first>Gema</first><last>Ramírez-Sánchez</last></author>
       <author><first>Elsa</first><last>Sarrías</last></author>
       <author><first>Marek</first><last>Strelec</last></author>
       <author><first>Brian</first><last>Thompson</last></author>
@@ -5630,7 +5630,7 @@
     <paper id="418">
       <title>Toward Gender-Inclusive Coreference Resolution</title>
       <author><first>Yang Trista</first><last>Cao</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <pages>4568–4595</pages>
       <abstract>Correctly resolving textual mentions of people fundamentally entails making inferences about those people. Such inferences raise the risk of systemic biases in coreference resolution systems, including biases that can harm binary and non-binary trans and cis stakeholders. To better understand such biases, we foreground nuanced conceptualizations of gender from sociology and sociolinguistics, and develop two new datasets for interrogating bias in crowd annotations and in existing coreference resolution systems. Through these studies, conducted on English text, we confirm that without acknowledging and building systems that recognize the complexity of gender, we build systems that lead to many potential harms.</abstract>
       <url hash="322544ba">2020.acl-main.418</url>
@@ -5686,7 +5686,7 @@
       <author><first>Hassan</first><last>Sajjad</last></author>
       <author><first>Nadir</first><last>Durrani</last></author>
       <author><first>Fahim</first><last>Dalvi</last></author>
-      <author><first>James</first><last>Glass</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
       <pages>4638–4655</pages>
       <abstract>This paper investigates contextual word representation models from the lens of similarity analysis. Given a collection of trained models, we measure the similarity of their internal representations and attention. Critically, these models come from vastly different architectures. We use existing and novel similarity measures that aim to gauge the level of localization of information in the deep models, and facilitate the investigation of which design factors affect model similarity, without requiring any external linguistic annotation. The analysis reveals that models within the same family are more similar to one another, as may be expected. Surprisingly, different architectures have rather similar representations, but different individual neurons. We also observed differences in information localization in lower and higher layers and found that higher layers are more affected by fine-tuning on downstream tasks.</abstract>
       <url hash="496ea163">2020.acl-main.422</url>
@@ -5717,8 +5717,8 @@
       <author><first>Fernando</first><last>Alva-Manchego</last></author>
       <author><first>Louis</first><last>Martin</last></author>
       <author><first>Antoine</first><last>Bordes</last></author>
-      <author><first>Carolina</first><last>Scarton</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>4668–4679</pages>
       <abstract>In order to simplify a sentence, human editors perform multiple rewriting transformations: they split it into several shorter sentences, paraphrase words (i.e. replacing complex words or phrases by simpler synonyms), reorder components, and/or delete information deemed unnecessary. Despite these varied range of possible text alterations, current models for automatic sentence simplification are evaluated using datasets that are focused on a single transformation, such as lexical paraphrasing or splitting. This makes it impossible to understand the ability of simplification models in more realistic settings. To alleviate this limitation, this paper introduces ASSET, a new dataset for assessing sentence simplification in English. ASSET is a crowdsourced multi-reference corpus where each simplification was produced by executing several rewriting transformations. Through quantitative and qualitative experiments, we show that simplifications in ASSET are better at capturing characteristics of simplicity when compared to other standard evaluation datasets for the task. Furthermore, we motivate the need for developing better methods for automatic evaluation using ASSET, since we show that current popular metrics may not be suitable when multiple simplification transformations are performed.</abstract>
@@ -5746,7 +5746,7 @@
       <author><first>Heeyoung</first><last>Kwon</last></author>
       <author><first>Mohaddeseh</first><last>Bastan</last></author>
       <author><first>Niranjan</first><last>Balasubramanian</last></author>
-      <author><first>Nathanael</first><last>Chambers</last></author>
+      <author id="nathanael-chambers"><first>Nathanael</first><last>Chambers</last></author>
       <pages>4687–4692</pages>
       <abstract>Predicting how events induce emotions in the characters of a story is typically seen as a standard multi-label classification task, which usually treats labels as anonymous classes to predict. They ignore information that may be conveyed by the emotion labels themselves. We propose that the semantics of emotion labels can guide a model’s attention when representing the input story. Further, we observe that the emotions evoked by an event are often related: an event that evokes joy is unlikely to also evoke sadness. In this work, we explicitly model label classes via label embeddings, and add mechanisms that track label-label correlations both during training and inference. We also introduce a new semi-supervision strategy that regularizes for the correlations on unlabeled data. Our empirical evaluations show that modeling label semantics yields consistent benefits, and we advance the state-of-the-art on an emotion inference task.</abstract>
       <url hash="cf87a0ad">2020.acl-main.426</url>
@@ -5813,7 +5813,7 @@
       <title><fixed-case>I</fixed-case>nterpreting <fixed-case>P</fixed-case>retrained <fixed-case>C</fixed-case>ontextualized <fixed-case>R</fixed-case>epresentations via <fixed-case>R</fixed-case>eductions to <fixed-case>S</fixed-case>tatic <fixed-case>E</fixed-case>mbeddings</title>
       <author><first>Rishi</first><last>Bommasani</last></author>
       <author><first>Kelly</first><last>Davis</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>4758–4781</pages>
       <abstract>Contextualized representations (e.g. ELMo, BERT) have become the default pretrained representations for downstream NLP applications. In some settings, this transition has rendered their static embedding predecessors (e.g. Word2Vec, GloVe) obsolete. As a side-effect, we observe that older interpretability methods for static embeddings — while more diverse and mature than those available for their dynamic counterparts — are underutilized in studying newer contextualized representations. Consequently, we introduce simple and fully general methods for converting from contextualized representations to static lookup-table embeddings which we apply to 5 popular pretrained models and 9 sets of pretrained weights. Our analysis of the resulting static embeddings notably reveals that pooling over many contexts significantly improves representational quality under intrinsic evaluation. Complementary to analyzing representational quality, we consider social biases encoded in pretrained representations with respect to gender, race/ethnicity, and religion and find that bias is encoded disparately across pretrained models and internal layers even for models with the same training data. Concerningly, we find dramatic inconsistencies between social bias estimators for word embeddings.</abstract>
       <url hash="a7d2a441">2020.acl-main.431</url>
@@ -5828,7 +5828,7 @@
       <author><first>Mansi</first><last>Gupta</last></author>
       <author><first>Bhuwan</first><last>Dhingra</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
-      <author><first>Zachary C.</first><last>Lipton</last></author>
+      <author id="zachary-c-lipton"><first>Zachary C.</first><last>Lipton</last></author>
       <pages>4782–4793</pages>
       <abstract>Attention mechanisms are ubiquitous components in neural architectures applied to natural language processing. In addition to yielding gains in predictive accuracy, attention weights are often claimed to confer interpretability, purportedly useful both for providing insights to practitioners and for explaining why a model makes its decisions to stakeholders. We call the latter use of attention mechanisms into question by demonstrating a simple method for training models to produce deceptive attention masks. Our method diminishes the total weight assigned to designated impermissible tokens, even when the models can be shown to nevertheless rely on these features to drive predictions. Across multiple models and tasks, our approach manipulates attention weights while paying surprisingly little cost in accuracy. Through a human study, we show that our manipulated attention-based explanations deceive people into thinking that predictions from a model biased against gender minorities do not rely on the gender. Consequently, our results cast doubt on attention’s reliability as a tool for auditing algorithms in the context of fairness and accountability.</abstract>
       <url hash="3ac0d71b">2020.acl-main.432</url>
@@ -5875,7 +5875,7 @@
       <title>Shaping Visual Representations with Language for Few-Shot Classification</title>
       <author><first>Jesse</first><last>Mu</last></author>
       <author><first>Percy</first><last>Liang</last></author>
-      <author><first>Noah</first><last>Goodman</last></author>
+      <author id="noah-goodman"><first>Noah</first><last>Goodman</last></author>
       <pages>4823–4830</pages>
       <abstract>By describing the features and abstractions of our world, language is a crucial tool for human learning and a promising source of supervision for machine learning models. We use language to improve few-shot visual classification in the underexplored scenario where natural language task descriptions are available during training, but unavailable for novel tasks at test time. Existing models for this setting sample new descriptions at test time and use those to classify images. Instead, we propose language-shaped learning (LSL), an end-to-end model that regularizes visual representations to predict language. LSL is conceptually simpler, more data efficient, and outperforms baselines in two challenging few-shot domains.</abstract>
       <url hash="7fb555ff">2020.acl-main.436</url>
@@ -5929,7 +5929,7 @@
       <author><first>Elnaz</first><last>Nouri</last></author>
       <author><first>Chris</first><last>Brockett</last></author>
       <author><first>Debadeepta</first><last>Dey</last></author>
-      <author><first>Bill</first><last>Dolan</last></author>
+      <author id="william-b-dolan"><first>Bill</first><last>Dolan</last></author>
       <pages>4871–4884</pages>
       <abstract>Many high-level procedural tasks can be decomposed into sequences of instructions that vary in their order and choice of tools. In the cooking domain, the web offers many, partially-overlapping, text and video recipes (i.e. procedures) that describe how to make the same dish (i.e. high-level task). Aligning instructions for the same dish across different sources can yield descriptive visual explanations that are far richer semantically than conventional textual instructions, providing commonsense insight into how real-world procedures are structured. Learning to align these different instruction sets is challenging because: a) different recipes vary in their order of instructions and use of ingredients; and b) video instructions can be noisy and tend to contain far more information than text instructions. To address these challenges, we use an unsupervised alignment algorithm that learns pairwise alignments between instructions of different recipes for the same dish. We then use a graph algorithm to derive a joint alignment between multiple text and multiple video recipes for the same dish. We release the Microsoft Research Multimodal Aligned Recipe Corpus containing ~150K pairwise alignments between recipes across 4262 dishes with rich commonsense information.</abstract>
       <url hash="e6710ebd">2020.acl-main.440</url>
@@ -5954,7 +5954,7 @@
     </paper>
     <paper id="442">
       <title>Beyond Accuracy: Behavioral Testing of <fixed-case>NLP</fixed-case> Models with <fixed-case>C</fixed-case>heck<fixed-case>L</fixed-case>ist</title>
-      <author><first>Marco Tulio</first><last>Ribeiro</last></author>
+      <author id="marco-tulio-ribeiro"><first>Marco Tulio</first><last>Ribeiro</last></author>
       <author><first>Tongshuang</first><last>Wu</last></author>
       <author><first>Carlos</first><last>Guestrin</last></author>
       <author><first>Sameer</first><last>Singh</last></author>
@@ -5984,7 +5984,7 @@
       <title>Dialogue-Based Relation Extraction</title>
       <author><first>Dian</first><last>Yu</last></author>
       <author><first>Kai</first><last>Sun</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <author><first>Dong</first><last>Yu</last></author>
       <pages>4927–4940</pages>
       <abstract>We present the first human-annotated dialogue-based relation extraction (RE) dataset DialogRE, aiming to support the prediction of relation(s) between two arguments that appear in a dialogue. We further offer DialogRE as a platform for studying cross-sentence RE as most facts span multiple sentences. We argue that speaker-related information plays a critical role in the proposed task, based on an analysis of similarities and differences between dialogue-based and traditional RE tasks. Considering the timeliness of communication in a dialogue, we design a new metric to evaluate the performance of RE methods in a conversational setting and investigate the performance of several representative RE methods on DialogRE. Experimental results demonstrate that a speaker-aware extension on the best-performing model leads to gains in both the standard and conversational evaluation settings. DialogRE is available at <url>https://dataset.org/dialogre/</url>.</abstract>
@@ -6011,7 +6011,7 @@
       <title>More Diverse Dialogue Datasets via Diversity-Informed Data Collection</title>
       <author><first>Katherine</first><last>Stasaski</last></author>
       <author><first>Grace Hui</first><last>Yang</last></author>
-      <author><first>Marti A.</first><last>Hearst</last></author>
+      <author id="marti-a-hearst"><first>Marti A.</first><last>Hearst</last></author>
       <pages>4958–4968</pages>
       <abstract>Automated generation of conversational dialogue using modern neural architectures has made notable advances. However, these models are known to have a drawback of often producing uninteresting, predictable responses; this is known as the diversity problem. We introduce a new strategy to address this problem, called Diversity-Informed Data Collection. Unlike prior approaches, which modify model architectures to solve the problem, this method uses dynamically computed corpus-level statistics to determine which conversational participants to collect data from. Diversity-Informed Data Collection produces significantly more diverse data than baseline data collection methods, and better results on two downstream tasks: emotion classification and dialogue generation. This method is generalizable and can be used with other corpus-level metrics.</abstract>
       <url hash="ead117cc">2020.acl-main.446</url>
@@ -6022,10 +6022,10 @@
     <paper id="447">
       <title><fixed-case>S</fixed-case>2<fixed-case>ORC</fixed-case>: The Semantic Scholar Open Research Corpus</title>
       <author><first>Kyle</first><last>Lo</last></author>
-      <author><first>Lucy Lu</first><last>Wang</last></author>
+      <author id="lucy-lu-wang"><first>Lucy Lu</first><last>Wang</last></author>
       <author><first>Mark</first><last>Neumann</last></author>
       <author><first>Rodney</first><last>Kinney</last></author>
-      <author><first>Daniel</first><last>Weld</last></author>
+      <author id="daniel-s-weld"><first>Daniel</first><last>Weld</last></author>
       <pages>4969–4983</pages>
       <abstract>We introduce S2ORC, a large corpus of 81.1M English-language academic papers spanning many academic disciplines. The corpus consists of rich metadata, paper abstracts, resolved bibliographic references, as well as structured full text for 8.1M open access papers. Full text is annotated with automatically-detected inline mentions of citations, figures, and tables, each linked to their corresponding paper objects. In S2ORC, we aggregate papers from hundreds of academic publishers and digital archives into a unified source, and create the largest publicly-available collection of machine-readable academic text to date. We hope this resource will facilitate research and development of tools and tasks for text mining over academic text.</abstract>
       <url hash="531e4605">2020.acl-main.447</url>
@@ -6036,8 +6036,8 @@
     <paper id="448">
       <title>Tangled up in <fixed-case>BLEU</fixed-case>: Reevaluating the Evaluation of Automatic Machine Translation Evaluation Metrics</title>
       <author><first>Nitika</first><last>Mathur</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>4984–4997</pages>
       <abstract>Automatic metrics are fundamental for the development and evaluation of machine translation systems. Judging whether, and to what extent, automatic metrics concur with the gold standard of human evaluation is not a straightforward problem. We show that current methods for judging metrics are highly sensitive to the translations used for assessment, particularly the presence of outliers, which often leads to falsely confident conclusions about a metric’s efficacy. Finally, we turn to pairwise system ranking, developing a method for thresholding performance improvement under an automatic metric against human judgements, which allows quantification of type I versus type II errors incurred, i.e., insignificant human differences in system quality that are accepted, and significant human differences that are rejected. Together, these findings suggest improvements to the protocols for metric evaluation and system performance evaluation in machine translation.</abstract>
       <award>Honorable Mention for Best Overall Paper</award>
@@ -6048,7 +6048,7 @@
     </paper>
     <paper id="449">
       <title>A Transformer-based Approach for Source Code Summarization</title>
-      <author><first>Wasi</first><last>Ahmad</last></author>
+      <author id="wasi-ahmad"><first>Wasi</first><last>Ahmad</last></author>
       <author><first>Saikat</first><last>Chakraborty</last></author>
       <author><first>Baishakhi</first><last>Ray</last></author>
       <author><first>Kai-Wei</first><last>Chang</last></author>
@@ -6103,8 +6103,8 @@
       <title>Exploring Content Selection in Summarization of Novel Chapters</title>
       <author><first>Faisal</first><last>Ladhak</last></author>
       <author><first>Bryan</first><last>Li</last></author>
-      <author><first>Yaser</first><last>Al-Onaizan</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="yaser-al-onaizan"><first>Yaser</first><last>Al-Onaizan</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>5043–5054</pages>
       <abstract>We present a new summarization task, generating summaries of novel chapters using summary/chapter pairs from online study guides. This is a harder task than the news summarization task, given the chapter length as well as the extreme paraphrasing and generalization found in the summaries. We focus on extractive summarization, which requires the creation of a gold-standard set of extractive summaries. We present a new metric for aligning reference summary sentences with chapter sentences to create gold extracts and also experiment with different alignment methods. Our experiments demonstrate significant improvement over prior alignment approaches for our task as shown through automatic metrics and a crowd-sourced pyramid analysis.</abstract>
       <url hash="62f48428">2020.acl-main.453</url>
@@ -6116,7 +6116,7 @@
       <title><fixed-case>FEQA</fixed-case>: A Question Answering Evaluation Framework for Faithfulness Assessment in Abstractive Summarization</title>
       <author><first>Esin</first><last>Durmus</last></author>
       <author><first>He</first><last>He</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>5055–5070</pages>
       <abstract>Neural abstractive summarization models are prone to generate content inconsistent with the source document, i.e. unfaithful. Existing automatic metrics do not capture such mistakes effectively. We tackle the problem of evaluating faithfulness of a generated summary given its source document. We first collected human annotations of faithfulness for outputs from numerous models on two datasets. We find that current models exhibit a trade-off between abstractiveness and faithfulness: outputs with less word overlap with the source document are more likely to be unfaithful. Next, we propose an automatic question answering (QA) based metric for faithfulness, FEQA, which leverages recent advances in reading comprehension. Given question-answer pairs generated from the summary, a QA model extracts answers from the document; non-matched answers indicate unfaithful information in the summary. Among metrics based on word overlap, embedding similarity, and learned language understanding models, our QA-based metric has significantly higher correlation with human faithfulness scores, especially on highly abstractive summaries.</abstract>
       <url hash="bc64ce33">2020.acl-main.454</url>
@@ -6169,7 +6169,7 @@
       <author><first>Yuhao</first><last>Zhang</last></author>
       <author><first>Derek</first><last>Merck</last></author>
       <author><first>Emily</first><last>Tsai</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <author><first>Curtis</first><last>Langlotz</last></author>
       <pages>5108–5120</pages>
       <abstract>Neural abstractive summarization models are able to generate summaries which have high overlap with human references. However, existing models are not optimized for factual correctness, a critical metric in real-world applications. In this work, we develop a general framework where we evaluate the factual correctness of a generated summary by fact-checking it automatically against its reference using an information extraction module. We further propose a training strategy which optimizes a neural summarization model with a factual correctness reward via reinforcement learning. We apply the proposed method to the summarization of radiology reports, where factual correctness is a key requirement. On two separate datasets collected from hospitals, we show via both automatic and human evaluation that the proposed approach substantially improves the factual correctness and overall quality of outputs over a competitive neural summarization system, producing radiology summaries that approach the quality of human-authored ones.</abstract>
@@ -6195,7 +6195,7 @@
       <author><first>Philippe</first><last>Laban</last></author>
       <author><first>Andrew</first><last>Hsi</last></author>
       <author><first>John</first><last>Canny</last></author>
-      <author><first>Marti A.</first><last>Hearst</last></author>
+      <author id="marti-a-hearst"><first>Marti A.</first><last>Hearst</last></author>
       <pages>5135–5150</pages>
       <abstract>This work presents a new approach to unsupervised abstractive summarization based on maximizing a combination of coverage and fluency for a given length constraint. It introduces a novel method that encourages the inclusion of key terms from the original document into the summary: key terms are masked out of the original document and must be filled in by a coverage model using the current generated summary. A novel unsupervised training procedure leverages this coverage model along with a fluency model to generate and score summaries. When tested on popular news summarization datasets, the method outperforms previous unsupervised methods by more than 2 R-1 points, and approaches results of competitive supervised methods. Our model attains higher levels of abstraction with copied passages roughly two times shorter than prior work, and learns to compress and merge sentences without supervision.</abstract>
       <url hash="783cbbd6">2020.acl-main.460</url>
@@ -6218,7 +6218,7 @@
     <paper id="462">
       <title>(<fixed-case>R</fixed-case>e)construing Meaning in <fixed-case>NLP</fixed-case></title>
       <author><first>Sean</first><last>Trott</last></author>
-      <author><first>Tiago Timponi</first><last>Torrent</last></author>
+      <author id="tiago-timponi-torrent"><first>Tiago Timponi</first><last>Torrent</last></author>
       <author><first>Nancy</first><last>Chang</last></author>
       <author><first>Nathan</first><last>Schneider</last></author>
       <pages>5170–5184</pages>
@@ -6230,7 +6230,7 @@
     </paper>
     <paper id="463">
       <title>Climbing towards <fixed-case>NLU</fixed-case>: <fixed-case>On</fixed-case> Meaning, Form, and Understanding in the Age of Data</title>
-      <author><first>Emily M.</first><last>Bender</last></author>
+      <author id="emily-m-bender"><first>Emily M.</first><last>Bender</last></author>
       <author><first>Alexander</first><last>Koller</last></author>
       <pages>5185–5198</pages>
       <abstract>The success of the large neural language models on many NLP tasks is exciting. However, we find that these successes sometimes lead to hype in which these models are being described as “understanding” language or capturing “meaning”. In this position paper, we argue that a system trained only on form has a priori no way to learn meaning. In keeping with the ACL 2020 theme of “Taking Stock of Where We’ve Been and Where We’re Going”, we argue that a clear understanding of the distinction between form and meaning will help guide the field towards better science around natural language understanding.</abstract>
@@ -6242,7 +6242,7 @@
     </paper>
     <paper id="464">
       <title>Examining Citations of Natural Language Processing Literature</title>
-      <author><first>Saif M.</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif M.</first><last>Mohammad</last></author>
       <pages>5199–5209</pages>
       <abstract>We extracted information from the ACL Anthology (AA) and Google Scholar (GS) to examine trends in citations of NLP papers. We explore questions such as: how well cited are papers of different types (journal articles, conference papers, demo papers, etc.)? how well cited are papers from different areas of within NLP? etc. Notably, we show that only about 56% of the papers in AA are cited ten or more times. CL Journal has the most cited papers, but its citation dominance has lessened in recent years. On average, long papers get almost three times as many citations as short papers; and papers on sentiment classification, anaphora resolution, and entity recognition have the highest median citations. The analyses presented here, and the associated dataset of NLP papers mapped to citations, have a number of uses including: understanding how the field is growing and quantifying the impact of different types of papers.</abstract>
       <url hash="0c740bbd">2020.acl-main.464</url>
@@ -6285,8 +6285,8 @@
       <author><first>Xiaoyi</first><last>Zhang</last></author>
       <author><first>Richard Yuanzhe</first><last>Pang</last></author>
       <author><first>Clara</first><last>Vania</last></author>
-      <author><first>Katharina</first><last>Kann</last></author>
-      <author><first>Samuel R.</first><last>Bowman</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
+      <author id="samuel-bowman"><first>Samuel R.</first><last>Bowman</last></author>
       <pages>5231–5247</pages>
       <abstract>While pretrained models such as BERT have shown large gains across natural language understanding tasks, their performance can be improved by further training the model on a data-rich intermediate task, before fine-tuning it on a target task. However, it is still poorly understood when and why intermediate-task training is beneficial for a given target task. To investigate this, we perform a large-scale study on the pretrained RoBERTa model with 110 intermediate-target task combinations. We further evaluate all trained models with 25 probing tasks meant to reveal the specific skills that drive transfer. We observe that intermediate tasks requiring high-level inference and reasoning abilities tend to work best. We also observe that target task performance is strongly correlated with higher-level abilities such as coreference resolution. However, we fail to observe more granular correlations between probing and target task performance, highlighting the need for further work on broad-coverage probing benchmarks. We also observe evidence that the forgetting of knowledge learned during pretraining may limit our analysis, highlighting the need for further work on transfer learning methods in these settings.</abstract>
       <url hash="4bdbbd8d">2020.acl-main.467</url>
@@ -6297,7 +6297,7 @@
     <paper id="468">
       <title>Predictive Biases in Natural Language Processing Models: A Conceptual Framework and Overview</title>
       <author><first>Deven Santosh</first><last>Shah</last></author>
-      <author><first>H. Andrew</first><last>Schwartz</last></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last></author>
       <author><first>Dirk</first><last>Hovy</last></author>
       <pages>5248–5264</pages>
       <abstract>An increasing number of natural language processing papers address the effect of bias on predictions, introducing mitigation techniques at different parts of the standard NLP pipeline (data and models). However, these works have been conducted individually, without a unifying framework to organize efforts within the field. This situation leads to repetitive approaches, and focuses overly on bias symptoms/effects, rather than on their origins, which could limit the development of effective countermeasures. In this paper, we propose a unifying predictive bias framework for NLP. We summarize the NLP literature and suggest general mathematical definitions of predictive bias. We differentiate two consequences of bias: outcome disparities and error disparities, as well as four potential origins of biases: label bias, selection bias, model overamplification, and semantic bias. Our framework serves as an overview of predictive bias in NLP, integrating existing work into a single structure, and providing a conceptual baseline for improved frameworks.</abstract>
@@ -6349,7 +6349,7 @@
       <title>Hierarchical Modeling for User Personality Prediction: The Role of Message-Level Attention</title>
       <author><first>Veronica</first><last>Lynn</last></author>
       <author><first>Niranjan</first><last>Balasubramanian</last></author>
-      <author><first>H. Andrew</first><last>Schwartz</last></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last></author>
       <pages>5306–5316</pages>
       <abstract>Not all documents are equally important. Language processing is increasingly finding use as a supplement for questionnaires to assess psychological attributes of consenting individuals, but most approaches neglect to consider whether all documents of an individual are equally informative. In this paper, we present a novel model that uses message-level attention to learn the relative weight of users’ social media posts for assessing their five factor personality traits. We demonstrate that models with message-level attention outperform those with word-level attention, and ultimately yield state-of-the-art accuracies for all five traits by using both word and message attention in combination with past approaches (an average increase in Pearson r of 2.5%). In addition, examination of the high-signal posts identified by our model provides insight into the relationship between language and personality, helping to inform future work.</abstract>
       <url hash="2284aac2">2020.acl-main.472</url>
@@ -6362,7 +6362,7 @@
       <title>Measuring Forecasting Skill from Text</title>
       <author><first>Shi</first><last>Zong</last></author>
       <author><first>Alan</first><last>Ritter</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>5317–5331</pages>
       <abstract>People vary in their ability to make accurate predictions about the future. Prior studies have shown that some individuals can predict the outcome of future events with consistently better accuracy. This leads to a natural question: what makes some forecasters better than others? In this paper we explore connections between the language people use to describe their predictions and their forecasting skill. Datasets from two different forecasting domains are explored: (1) geopolitical forecasts from Good Judgment Open, an online prediction forum and (2) a corpus of company earnings forecasts made by financial analysts. We present a number of linguistic metrics which are computed over text associated with people’s predictions about the future including: uncertainty, readability, and emotion. By studying linguistic factors associated with predictions, we are able to shed some light on the approach taken by skilled forecasters. Furthermore, we demonstrate that it is possible to accurately predict forecasting skill using a model that is based solely on language. This could potentially be useful for identifying accurate predictions or potentially skilled forecasters earlier.</abstract>
       <url hash="3930f9fd">2020.acl-main.473</url>
@@ -6386,7 +6386,7 @@
       <title>Text-Based Ideal Points</title>
       <author><first>Keyon</first><last>Vafa</last></author>
       <author><first>Suresh</first><last>Naidu</last></author>
-      <author><first>David</first><last>Blei</last></author>
+      <author id="david-blei"><first>David</first><last>Blei</last></author>
       <pages>5345–5357</pages>
       <abstract>Ideal point models analyze lawmakers’ votes to quantify their political positions, or ideal points. But votes are not the only way to express a political position. Lawmakers also give speeches, release press statements, and post tweets. In this paper, we introduce the text-based ideal point model (TBIP), an unsupervised probabilistic topic model that analyzes texts to quantify the political positions of its authors. We demonstrate the TBIP with two types of politicized text data: U.S. Senate speeches and senator tweets. Though the model does not analyze their votes or political affiliations, the TBIP separates lawmakers by party, learns interpretable politicized topics, and infers ideal points close to the classical vote-based ideal points. One benefit of analyzing texts, as opposed to votes, is that the TBIP can estimate ideal points of anyone who authors political texts, including non-voting actors. To this end, we use it to study tweets from the 2020 Democratic presidential candidates. Using only the texts of their tweets, it identifies them along an interpretable progressive-to-moderate spectrum.</abstract>
       <url hash="66ab563e">2020.acl-main.475</url>
@@ -6412,9 +6412,9 @@
       <author><first>Donovan</first><last>Ong</last></author>
       <author><first>Jie</first><last>Fu</last></author>
       <author><first>Alvin</first><last>Chan</last></author>
-      <author><first>Nancy</first><last>Chen</last></author>
-      <author><first>Anh Tuan</first><last>Luu</last></author>
-      <author><first>Chris</first><last>Pal</last></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last></author>
+      <author id="christopher-pal"><first>Chris</first><last>Pal</last></author>
       <pages>5369–5373</pages>
       <abstract>Understanding human preferences, along with cultural and social nuances, lives at the heart of natural language understanding. Concretely, we present a new task and corpus for learning alignments between machine and human preferences. Our newly introduced problem is concerned with predicting the preferable options from two sentences describing scenarios that may involve social and cultural situations. Our problem is framed as a natural language inference task with crowd-sourced preference votes by human players, obtained from a gamified voting platform. We benchmark several state-of-the-art neural models, along with BERT and friends on this task. Our experimental results show that current state-of-the-art NLP models still leave much room for improvement.</abstract>
       <url hash="0a4f3d02">2020.acl-main.477</url>
@@ -6426,7 +6426,7 @@
     </paper>
     <paper id="478">
       <title>Discourse as a Function of Event: Profiling Discourse Structure in News Articles around the Main Event</title>
-      <author><first>Prafulla Kumar</first><last>Choubey</last></author>
+      <author id="prafulla-kumar-choubey"><first>Prafulla Kumar</first><last>Choubey</last></author>
       <author><first>Aaron</first><last>Lee</last></author>
       <author><first>Ruihong</first><last>Huang</last></author>
       <author><first>Lu</first><last>Wang</last></author>
@@ -6508,7 +6508,7 @@
     <paper id="484">
       <title>Double-Hard Debias: Tailoring Word Embeddings for Gender Bias Mitigation</title>
       <author><first>Tianlu</first><last>Wang</last></author>
-      <author><first>Xi Victoria</first><last>Lin</last></author>
+      <author id="xi-victoria-lin"><first>Xi Victoria</first><last>Lin</last></author>
       <author><first>Nazneen Fatema</first><last>Rajani</last></author>
       <author><first>Bryan</first><last>McCann</last></author>
       <author><first>Vicente</first><last>Ordonez</last></author>
@@ -6524,8 +6524,8 @@
       <title>Language (Technology) is Power: A Critical Survey of “Bias” in <fixed-case>NLP</fixed-case></title>
       <author><first>Su Lin</first><last>Blodgett</last></author>
       <author><first>Solon</first><last>Barocas</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
-      <author><first>Hanna</first><last>Wallach</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
+      <author id="hanna-wallach"><first>Hanna</first><last>Wallach</last></author>
       <pages>5454–5476</pages>
       <abstract>We survey 146 papers analyzing “bias” in NLP systems, finding that their motivations are often vague, inconsistent, and lacking in normative reasoning, despite the fact that analyzing “bias” is an inherently normative process. We further find that these papers’ proposed quantitative techniques for measuring or mitigating “bias” are poorly matched to their motivations and do not engage with the relevant literature outside of NLP. Based on these findings, we describe the beginnings of a path forward by proposing three recommendations that should guide work analyzing “bias” in NLP systems. These recommendations rest on a greater recognition of the relationships between language and social hierarchies, encouraging researchers and practitioners to articulate their conceptualizations of “bias”—i.e., what kinds of system behaviors are harmful, in what ways, to whom, and why, as well as the normative reasoning underlying these statements—and to center work around the lived experiences of members of communities affected by NLP systems, while interrogating and reimagining the power relations between technologists and such communities.</abstract>
       <url hash="6c685422">2020.acl-main.485</url>
@@ -6539,7 +6539,7 @@
       <author><first>Saadia</first><last>Gabriel</last></author>
       <author><first>Lianhui</first><last>Qin</last></author>
       <author><first>Dan</first><last>Jurafsky</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <author><first>Yejin</first><last>Choi</last></author>
       <pages>5477–5490</pages>
       <abstract>Warning: this paper contains content that may be offensive or upsetting. Language has the power to reinforce stereotypes and project social biases onto others. At the core of the challenge is that it is rarely what is stated explicitly, but rather the implied meanings, that frame people’s judgments about others. For example, given a statement that “we shouldn’t lower our standards to hire more women,” most listeners will infer the implicature intended by the speaker - that “women (candidates) are less qualified.” Most semantic formalisms, to date, do not capture such pragmatic implications in which people express social biases and power differentials in language. We introduce Social Bias Frames, a new conceptual formalism that aims to model the pragmatic frames in which people project social biases and stereotypes onto others. In addition, we introduce the Social Bias Inference Corpus to support large-scale modelling and evaluation with 150k structured annotations of social media posts, covering over 34k implications about a thousand demographic groups. We then establish baseline approaches that learn to recover Social Bias Frames from unstructured text. We find that while state-of-the-art neural models are effective at high-level categorization of whether a given statement projects unwanted social bias (80% F1), they are not effective at spelling out more detailed explanations in terms of Social Bias Frames. Our study motivates future work that combines structured pragmatic inference with commonsense reasoning on social implications.</abstract>
@@ -6584,7 +6584,7 @@
       <author><first>Zhiqing</first><last>Sun</last></author>
       <author><first>Shikhar</first><last>Vashishth</last></author>
       <author><first>Soumya</first><last>Sanyal</last></author>
-      <author><first>Partha</first><last>Talukdar</last></author>
+      <author id="partha-talukdar"><first>Partha</first><last>Talukdar</last></author>
       <author><first>Yiming</first><last>Yang</last></author>
       <pages>5516–5522</pages>
       <abstract>Knowledge Graph Completion (KGC) aims at automatically predicting missing links for large-scale knowledge graphs. A vast number of state-of-the-art KGC techniques have got published at top conferences in several research fields, including data mining, machine learning, and natural language processing. However, we notice that several recent papers report very high performance, which largely outperforms previous state-of-the-art methods. In this paper, we find that this can be attributed to the inappropriate evaluation protocol used by them and propose a simple evaluation protocol to address this problem. The proposed protocol is robust to handle bias in the model, which can substantially affect the final results. We conduct extensive experiments and report performance of several existing methods using our protocol. The reproducible code has been made publicly available.</abstract>
@@ -6621,7 +6621,7 @@
     <paper id="492">
       <title>Explaining Black Box Predictions and Unveiling Data Artifacts through Influence Functions</title>
       <author><first>Xiaochuang</first><last>Han</last></author>
-      <author><first>Byron C.</first><last>Wallace</last></author>
+      <author id="byron-c-wallace"><first>Byron C.</first><last>Wallace</last></author>
       <author><first>Yulia</first><last>Tsvetkov</last></author>
       <pages>5553–5563</pages>
       <abstract>Modern deep learning models for NLP are notoriously opaque. This has motivated the development of methods for interpreting such models, e.g., via gradient-based saliency maps or the visualization of attention weights. Such approaches aim to provide explanations for a particular model prediction by highlighting important words in the corresponding input text. While this might be useful for tasks where decisions are explicitly influenced by individual tokens in the input, we suspect that such highlighting is not suitable for tasks where model decisions should be driven by more complex reasoning. In this work, we investigate the use of influence functions for NLP, providing an alternative approach to interpreting neural text classifiers. Influence functions explain the decisions of a model by identifying influential training examples. Despite the promise of this approach, influence functions have not yet been extensively evaluated in the context of NLP, a gap addressed by this work. We conduct a comparison between influence functions and common word-saliency methods on representative tasks. As suspected, we find that influence functions are particularly useful for natural language inference, a task in which ‘saliency maps’ may not have clear interpretation. Furthermore, we develop a new quantitative measure based on influence functions that can reveal artifacts in training data.</abstract>
@@ -6634,7 +6634,7 @@
       <title>Finding Universal Grammatical Relations in Multilingual <fixed-case>BERT</fixed-case></title>
       <author><first>Ethan A.</first><last>Chi</last></author>
       <author><first>John</first><last>Hewitt</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>5564–5577</pages>
       <abstract>Recent work has found evidence that Multilingual BERT (mBERT), a transformer-based multilingual masked language model, is capable of zero-shot cross-lingual transfer, suggesting that some aspects of its representations are shared cross-lingually. To better understand this overlap, we extend recent work on finding syntactic trees in neural networks’ internal representations to the multilingual setting. We show that subspaces of mBERT representations recover syntactic tree distances in languages other than English, and that these subspaces are approximately shared across languages. Motivated by these results, we present an unsupervised analysis method that provides evidence mBERT learns representations of syntactic dependency labels, in the form of clusters which largely agree with the Universal Dependencies taxonomy. This evidence suggests that even without explicit supervision, multilingual masked language models learn certain linguistic universals.</abstract>
       <url hash="6b65a4aa">2020.acl-main.493</url>
@@ -6723,9 +6723,9 @@
     </paper>
     <paper id="500">
       <title>On the Importance of Diversity in Question Generation for <fixed-case>QA</fixed-case></title>
-      <author><first>Md Arafat</first><last>Sultan</last></author>
+      <author id="md-arafat-sultan"><first>Md Arafat</first><last>Sultan</last></author>
       <author><first>Shubham</first><last>Chandel</last></author>
-      <author><first>Ramón</first><last>Fernandez Astudillo</last></author>
+      <author id="ramon-fernandez-astudillo"><first>Ramón</first><last>Fernandez Astudillo</last></author>
       <author><first>Vittorio</first><last>Castelli</last></author>
       <pages>5651–5656</pages>
       <abstract>Automatic question generation (QG) has shown promise as a source of synthetic training data for question answering (QA). In this paper we ask: Is textual diversity in QG beneficial for downstream QA? Using top-p nucleus sampling to derive samples from a transformer-based question generator, we show that diversity-promoting QG indeed provides better QA training than likelihood maximization approaches such as beam search. We also show that standard QG evaluation metrics such as BLEU, ROUGE and METEOR are inversely correlated with diversity, and propose a diversity-aware intrinsic measure of overall QG quality that correlates well with extrinsic evaluation on QA.</abstract>
@@ -6751,7 +6751,7 @@
       <title><fixed-case>SCDE</fixed-case>: Sentence Cloze Dataset with High Quality Distractors From Examinations</title>
       <author><first>Xiang</first><last>Kong</last></author>
       <author><first>Varun</first><last>Gangal</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>5668–5683</pages>
       <abstract>We introduce SCDE, a dataset to evaluate the performance of computational models through sentence prediction. SCDE is a human created sentence cloze dataset, collected from public school English examinations. Our task requires a model to fill up multiple blanks in a passage from a shared candidate set with distractors designed by English teachers. Experimental results demonstrate that this task requires the use of non-local, discourse-level context beyond the immediate sentence neighborhood. The blanks require joint solving and significantly impair each other’s context. Furthermore, through ablations, we show that the distractors are of high quality and make the task more challenging. Our experiments show that there is a significant performance gap between advanced models (72%) and humans (87%), encouraging future models to bridge this gap.</abstract>
       <url hash="98d79945">2020.acl-main.502</url>
@@ -6785,7 +6785,7 @@
     <paper id="505">
       <title>Transformers to Learn Hierarchical Contexts in Multiparty Dialogue for Span-based Question Answering</title>
       <author><first>Changmao</first><last>Li</last></author>
-      <author><first>Jinho D.</first><last>Choi</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
       <pages>5709–5714</pages>
       <abstract>We introduce a novel approach to transformers that learns hierarchical representations in multiparty dialogue. First, three language modeling tasks are used to pre-train the transformers, token- and utterance-level language modeling and utterance order prediction, that learn both token and utterance embeddings for better understanding in dialogue contexts. Then, multi-task learning between the utterance prediction and the token span prediction is applied to fine-tune for span-based question answering (QA). Our approach is evaluated on the FriendsQA dataset and shows improvements of 3.8% and 1.4% over the two state-of-the-art transformer models, BERT and RoBERTa, respectively.</abstract>
       <url hash="2e08f0bb">2020.acl-main.505</url>
@@ -6811,7 +6811,7 @@
       <title><fixed-case>STARC</fixed-case>: Structured Annotations for Reading Comprehension</title>
       <author><first>Yevgeni</first><last>Berzak</last></author>
       <author><first>Jonathan</first><last>Malmaud</last></author>
-      <author><first>Roger</first><last>Levy</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
       <pages>5726–5735</pages>
       <abstract>We present STARC (Structured Annotations for Reading Comprehension), a new annotation framework for assessing reading comprehension with multiple choice questions. Our framework introduces a principled structure for the answer choices and ties them to textual span annotations. The framework is implemented in OneStopQA, a new high-quality dataset for evaluation and analysis of reading comprehension in English. We use this dataset to demonstrate that STARC can be leveraged for a key new application for the development of SAT-like reading comprehension materials: automatic annotation quality probing via span ablation experiments. We further show that it enables in-depth analyses and comparisons between machine and human reading comprehension behavior, including error distributions and guessing ability. Our experiments also reveal that the standard multiple choice dataset in NLP, RACE, is limited in its ability to measure reading comprehension. 47% of its questions can be guessed by machines without accessing the passage, and 18% are unanimously judged by humans as not having a unique correct answer. OneStopQA provides an alternative test set for reading comprehension which alleviates these shortcomings and has a substantially higher human ceiling performance.</abstract>
       <url hash="77d846d7">2020.acl-main.507</url>
@@ -6882,7 +6882,7 @@
     </paper>
     <paper id="513">
       <title><fixed-case>O</fixed-case>pinion<fixed-case>D</fixed-case>igest: A Simple Framework for Opinion Summarization</title>
-      <author><first>Yoshihiko</first><last>Suhara</last></author>
+      <author id="yoshi-suhara"><first>Yoshihiko</first><last>Suhara</last></author>
       <author><first>Xiaolan</first><last>Wang</last></author>
       <author><first>Stefanos</first><last>Angelidis</last></author>
       <author><first>Wang-Chiew</first><last>Tan</last></author>
@@ -6924,7 +6924,7 @@
       <title>Generate, Delete and Rewrite: A Three-Stage Framework for Improving Persona Consistency of Dialogue Generation</title>
       <author><first>Haoyu</first><last>Song</last></author>
       <author><first>Yan</first><last>Wang</last></author>
-      <author><first>Wei-Nan</first><last>Zhang</last></author>
+      <author id="weinan-zhang"><first>Wei-Nan</first><last>Zhang</last></author>
       <author><first>Xiaojiang</first><last>Liu</last></author>
       <author><first>Ting</first><last>Liu</last></author>
       <pages>5821–5831</pages>
@@ -6976,10 +6976,10 @@
     </paper>
     <paper id="520">
       <title>An Effective Transition-based Model for Discontinuous <fixed-case>NER</fixed-case></title>
-      <author><first>Xiang</first><last>Dai</last></author>
+      <author id="xiang-dai"><first>Xiang</first><last>Dai</last></author>
       <author><first>Sarvnaz</first><last>Karimi</last></author>
       <author><first>Ben</first><last>Hachey</last></author>
-      <author><first>Cecile</first><last>Paris</last></author>
+      <author id="cecile-paris"><first>Cecile</first><last>Paris</last></author>
       <pages>5860–5870</pages>
       <abstract>Unlike widely used Named Entity Recognition (NER) data sets in generic domains, biomedical NER data sets often contain mentions consisting of discontinuous spans. Conventional sequence tagging techniques encode Markov assumptions that are efficient but preclude recovery of these mentions. We propose a simple, effective transition-based model with generic neural encoding for discontinuous NER. Through extensive experiments on three biomedical data sets, we show that our model can effectively recognize discontinuous mentions without sacrificing the accuracy on continuous mentions.</abstract>
       <url hash="22fd1874">2020.acl-main.520</url>
@@ -7059,7 +7059,7 @@
       <author><first>Zhiwen</first><last>Xie</last></author>
       <author><first>Guangyou</first><last>Zhou</last></author>
       <author><first>Jin</first><last>Liu</last></author>
-      <author><first>Jimmy Xiangji</first><last>Huang</last></author>
+      <author id="xiangji-huang"><first>Jimmy Xiangji</first><last>Huang</last></author>
       <pages>5929–5939</pages>
       <abstract>The goal of Knowledge graph embedding (KGE) is to learn how to represent the low dimensional vectors for entities and relations based on the observed triples. The conventional shallow models are limited to their expressiveness. ConvE (Dettmers et al., 2018) takes advantage of CNN and improves the expressive power with parameter efficient operators by increasing the interactions between head and relation embeddings. However, there is no structural information in the embedding space of ConvE, and the performance is still limited by the number of interactions. The recent KBGAT (Nathani et al., 2019) provides another way to learn embeddings by adaptively utilizing structural information. In this paper, we take the benefits of ConvE and KBGAT together and propose a Relation-aware Inception network with joint local-global structural information for knowledge graph Embedding (ReInceptionE). Specifically, we first explore the Inception network to learn query embedding, which aims to further increase the interactions between head and relation embeddings. Then, we propose to use a relation-aware attention mechanism to enrich the query embedding with the local neighborhood and global entity information. Experimental results on both WN18RR and FB15k-237 datasets demonstrate that ReInceptionE achieves competitive performance compared with state-of-the-art methods.</abstract>
       <url hash="4e7a9332">2020.acl-main.526</url>
@@ -7086,7 +7086,7 @@
       <author><first>Minlong</first><last>Peng</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
       <author><first>Zhongyu</first><last>Wei</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>5951–5960</pages>
       <abstract>Recently, many works have tried to augment the performance of Chinese named entity recognition (NER) using word lexicons. As a representative, Lattice-LSTM has achieved new benchmark results on several public Chinese NER datasets. However, Lattice-LSTM has a complex model architecture. This limits its application in many industrial areas where real-time NER responses are needed. In this work, we propose a simple but effective method for incorporating the word lexicon into the character representations. This method avoids designing a complicated sequence modeling architecture, and for any neural NER model, it requires only subtle adjustment of the character representation layer to introduce the lexicon information. Experimental studies on four benchmark Chinese NER datasets show that our method achieves an inference speed up to 6.15 times faster than those of state-of-the-art methods, along with a better performance. The experimental results also show that the proposed method can be easily incorporated with pre-trained models like BERT.</abstract>
       <url hash="104bcce0">2020.acl-main.528</url>
@@ -7113,7 +7113,7 @@
       <title>Contextual Neural Machine Translation Improves Translation of Cataphoric Pronouns</title>
       <author><first>KayYen</first><last>Wong</last></author>
       <author><first>Sameen</first><last>Maruf</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>5971–5978</pages>
       <abstract>The advent of context-aware NMT has resulted in promising improvements in the overall translation quality and specifically in the translation of discourse phenomena such as pronouns. Previous works have mainly focused on the use of past sentences as context with a focus on anaphora translation. In this work, we investigate the effect of future sentences as context by comparing the performance of a contextual NMT model trained with the future context to the one trained with the past context. Our experiments and evaluation, using generic and pronoun-focused automatic metrics, show that the use of future context not only achieves significant improvements over the context-agnostic Transformer, but also demonstrates comparable and in some cases improved performance over its counterpart trained on past context. We also perform an evaluation on a targeted cataphora test suite and report significant gains over the context-agnostic Transformer in terms of BLEU.</abstract>
       <url hash="0ee0fa71">2020.acl-main.530</url>
@@ -7138,7 +7138,7 @@
     <paper id="532">
       <title>Tagged Back-translation Revisited: Why Does It Really Work?</title>
       <author><first>Benjamin</first><last>Marie</last></author>
-      <author><first>Raphael</first><last>Rubino</last></author>
+      <author id="raphael-rubino"><first>Raphael</first><last>Rubino</last></author>
       <author><first>Atsushi</first><last>Fujita</last></author>
       <pages>5990–5997</pages>
       <abstract>In this paper, we show that neural machine translation (NMT) systems trained on large back-translated data overfit some of the characteristics of machine-translated texts. Such NMT systems better translate human-produced translations, i.e., translationese, but may largely worsen the translation quality of original texts. Our analysis reveals that adding a simple tag to back-translations prevents this quality degradation and improves on average the overall translation quality by helping the NMT system to distinguish back-translated data from original parallel data during training. We also show that, in contrast to high-resource configurations, NMT systems trained in low-resource settings are much less vulnerable to overfit back-translations. We conclude that the back-translations in the training data should always be tagged especially when the origin of the text to be translated is unknown.</abstract>
@@ -7192,7 +7192,7 @@
       <author><first>Alexis</first><last>Conneau</last></author>
       <author><first>Shijie</first><last>Wu</last></author>
       <author><first>Haoran</first><last>Li</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <author><first>Veselin</first><last>Stoyanov</last></author>
       <pages>6022–6034</pages>
       <abstract>We study the problem of multilingual masked language modeling, i.e. the training of a single model on concatenated text from multiple languages, and present a detailed study of several factors that influence why these models are so effective for cross-lingual transfer. We show, contrary to what was previously hypothesized, that transfer is possible even when there is no shared vocabulary across the monolingual corpora and also when the text comes from very different domains. The only requirement is that there are some shared parameters in the top layers of the multi-lingual encoder. To better understand this result, we also show that representations from monolingual BERT models in different languages can be aligned post-hoc quite effectively, strongly suggesting that, much like for non-contextual word embeddings, there are universal latent symmetries in the learned embedding spaces. For multilingual masked language modeling, these symmetries are automatically discovered and aligned during the joint training process.</abstract>
@@ -7218,7 +7218,7 @@
     </paper>
     <paper id="538">
       <title>Incorporating External Knowledge through Pre-training for Natural Language to Code Generation</title>
-      <author><first>Frank F.</first><last>Xu</last></author>
+      <author id="frank-f-xu"><first>Frank F.</first><last>Xu</last></author>
       <author><first>Zhengbao</first><last>Jiang</last></author>
       <author><first>Pengcheng</first><last>Yin</last></author>
       <author><first>Bogdan</first><last>Vasilescu</last></author>
@@ -7341,7 +7341,7 @@
       <author><first>Xiaolong</first><last>Jin</last></author>
       <author><first>Jiafeng</first><last>Guo</last></author>
       <author><first>Yuanzhuo</first><last>Wang</last></author>
-      <author><first>Xueqi</first><last>Cheng</last></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last></author>
       <pages>6141–6151</pages>
       <abstract>Knowledge inference on knowledge graph has attracted extensive attention, which aims to find out connotative valid facts in knowledge graph and is very helpful for improving the performance of many downstream applications. However, researchers have mainly poured attention to knowledge inference on binary facts. The studies on n-ary facts are relatively scarcer, although they are also ubiquitous in the real world. Therefore, this paper addresses knowledge inference on n-ary facts. We represent each n-ary fact as a primary triple coupled with a set of its auxiliary descriptive attribute-value pair(s). We further propose a neural network model, NeuInfer, for knowledge inference on n-ary facts. Besides handling the common task to infer an unknown element in a whole fact, NeuInfer can cope with a new type of task, flexible knowledge inference. It aims to infer an unknown element in a partial fact consisting of the primary triple coupled with any number of its auxiliary description(s). Experimental results demonstrate the remarkable superiority of NeuInfer.</abstract>
       <url hash="b99c5006">2020.acl-main.546</url>
@@ -7429,7 +7429,7 @@
       <author><first>Yiran</first><last>Chen</last></author>
       <author><first>Danqing</first><last>Wang</last></author>
       <author><first>Xipeng</first><last>Qiu</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>6197–6208</pages>
       <abstract>This paper creates a paradigm shift with regard to the way we build neural extractive summarization systems. Instead of following the commonly used framework of extracting sentences individually and modeling the relationship between sentences, we formulate the extractive summarization task as a semantic text matching problem, in which a source document and candidate summaries will be (extracted from the original text) matched in a semantic space. Notably, this paradigm shift to semantic matching framework is well-grounded in our comprehensive analysis of the inherent gap between sentence-level and summary-level extractors based on the property of the dataset. Besides, even instantiating the framework with a simple form of a matching model, we have driven the state-of-the-art extractive result on CNN/DailyMail to a new level (44.41 in ROUGE-1). Experiments on the other five datasets also show the effectiveness of the matching framework. We believe the power of this matching-based summarization framework has not been fully exploited. To encourage more instantiations in the future, we have released our codes, processed dataset, as well as generated summaries in <url>https://github.com/maszhongming/MatchSum</url>.</abstract>
       <url hash="4a701f4d">2020.acl-main.552</url>
@@ -7443,7 +7443,7 @@
       <author><first>Pengfei</first><last>Liu</last></author>
       <author><first>Yining</first><last>Zheng</last></author>
       <author><first>Xipeng</first><last>Qiu</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>6209–6219</pages>
       <abstract>As a crucial step in extractive document summarization, learning cross-sentence relations has been explored by a plethora of approaches. An intuitive way is to put them in the graph-based neural network, which has a more complex structure for capturing inter-sentence relationships. In this paper, we present a heterogeneous graph-based neural network for extractive summarization (HETERSUMGRAPH), which contains semantic nodes of different granularity levels apart from sentences. These additional nodes act as the intermediary between sentences and enrich the cross-sentence relations. Besides, our graph structure is flexible in natural extension from a single-document setting to multi-document via introducing document nodes. To our knowledge, we are the first one to introduce different types of nodes into graph-based neural networks for extractive document summarization and perform a comprehensive qualitative analysis to investigate their benefits. The code will be released on Github.</abstract>
       <url hash="59cc20c5">2020.acl-main.553</url>
@@ -7504,7 +7504,7 @@
     <paper id="558">
       <title>Are we Estimating or Guesstimating Translation Quality?</title>
       <author><first>Shuo</first><last>Sun</last></author>
-      <author><first>Francisco</first><last>Guzmán</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzmán</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>6262–6267</pages>
       <abstract>Recent advances in pre-trained multilingual language models lead to state-of-the-art results on the task of quality estimation (QE) for machine translation. A carefully engineered ensemble of such models won the QE shared task at WMT19. Our in-depth analysis, however, shows that the success of using pre-trained language models for QE is over-estimated due to three issues we observed in current QE datasets: (i) The distributions of quality scores are imbalanced and skewed towards good quality scores; (iii) QE models can perform well on these datasets while looking at only source or translated sentences; (iii) They contain statistical artifacts that correlate well with human-annotated QE labels. Our findings suggest that although QE models might capture fluency of translated sentences and complexity of source sentences, they cannot model adequacy of translations effectively.</abstract>
@@ -7519,7 +7519,7 @@
       <author><first>Ronen</first><last>Tamari</last></author>
       <author><first>Chen</first><last>Shani</last></author>
       <author><first>Tom</first><last>Hope</last></author>
-      <author><first>Miriam R L</first><last>Petruck</last></author>
+      <author id="miriam-r-l-petruck"><first>Miriam R L</first><last>Petruck</last></author>
       <author><first>Omri</first><last>Abend</last></author>
       <author><first>Dafna</first><last>Shahaf</last></author>
       <pages>6268–6281</pages>
@@ -7545,7 +7545,7 @@
     </paper>
     <paper id="561">
       <title>The Unstoppable Rise of Computational Linguistics in Deep Learning</title>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <pages>6294–6306</pages>
       <abstract>In this paper, we trace the history of neural networks applied to natural language understanding tasks, and identify key contributions which the nature of language has made to the development of neural network architectures. We focus on the importance of variable binding and its instantiation in attention-based models, and argue that Transformer is not a sequence model but an induced-structure model. This perspective leads to predictions of the challenges facing research in deep learning architectures for natural language understanding.</abstract>
       <url hash="59cee806">2020.acl-main.561</url>
@@ -7616,7 +7616,7 @@
       <title>Learning Efficient Dialogue Policy from Demonstrations through Shaping</title>
       <author><first>Huimin</first><last>Wang</last></author>
       <author><first>Baolin</first><last>Peng</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <pages>6355–6365</pages>
       <abstract>Training a task-oriented dialogue agent with reinforcement learning is prohibitively expensive since it requires a large volume of interactions with users. Human demonstrations can be used to accelerate learning progress. However, how to effectively leverage demonstrations to learn dialogue policy remains less explored. In this paper, we present Sˆ2Agent that efficiently learns dialogue policy from demonstrations through policy shaping and reward shaping. We use an imitation model to distill knowledge from demonstrations, based on which policy shaping estimates feedback on how the agent should act in policy space. Reward shaping is then incorporated to bonus state-actions similar to demonstrations explicitly in value space encouraging better exploration. The effectiveness of the proposed Sˆ2Agentt is demonstrated in three dialogue domains and a challenging domain adaptation task with both user simulator evaluation and human evaluation.</abstract>
       <url hash="1e967c17">2020.acl-main.566</url>
@@ -7641,7 +7641,7 @@
     <paper id="568">
       <title>Speaker Sensitive Response Evaluation Model</title>
       <author><first>JinYeong</first><last>Bak</last></author>
-      <author><first>Alice</first><last>Oh</last></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last></author>
       <pages>6376–6385</pages>
       <abstract>Automatic evaluation of open-domain dialogue response generation is very challenging because there are many appropriate responses for a given context. Existing evaluation models merely compare the generated response with the ground truth response and rate many of the appropriate responses as inappropriate if they deviate from the ground truth. One approach to resolve this problem is to consider the similarity of the generated response with the conversational context. In this paper, we propose an automatic evaluation model based on that idea and learn the model parameters from an unlabeled conversation corpus. Our approach considers the speakers in defining the different levels of similar context. We use a Twitter conversation corpus that contains many speakers and conversations to test our evaluation model. Experiments show that our model outperforms the other existing evaluation metrics in terms of high correlation with human annotation scores. We also show that our model trained on Twitter can be applied to movie dialogues without any additional training. We provide our code and the learned parameters so that they can be used for automatic evaluation of dialogue response generation models.</abstract>
       <url hash="2773a980">2020.acl-main.568</url>
@@ -7655,13 +7655,13 @@
       <author><first>Yuqing</first><last>Xing</last></author>
       <author><first>Fang</first><last>Kong</last></author>
       <author><first>Peifeng</first><last>Li</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>6386–6395</pages>
       <abstract>Due to its great importance in deep natural language understanding and various down-stream applications, text-level parsing of discourse rhetorical structure (DRS) has been drawing more and more attention in recent years. However, all the previous studies on text-level discourse parsing adopt bottom-up approaches, which much limit the DRS determination on local information and fail to well benefit from global information of the overall discourse. In this paper, we justify from both computational and perceptive points-of-view that the top-down architecture is more suitable for text-level DRS parsing. On the basis, we propose a top-down neural architecture toward text-level DRS parsing. In particular, we cast discourse parsing as a recursive split point ranking task, where a split point is classified to different levels according to its rank and the elementary discourse units (EDUs) associated with it are arranged accordingly. In this way, we can determine the complete DRS as a hierarchical tree structure via an encoder-decoder with an internal stack. Experimentation on both the English RST-DT corpus and the Chinese CDTB corpus shows the great effectiveness of our proposed top-down approach towards text-level DRS parsing.</abstract>
       <url hash="1098f9fc">2020.acl-main.569</url>
       <attachment type="Software" hash="00fcb529">2020.acl-main.569.Software.zip</attachment>
-      <doi>10.18653/v1/2020.acl-main.569</doi>
       <attachment type="Dataset" hash="65803fc5">2020.acl-main.569.Dataset.pdf</attachment>
+      <doi>10.18653/v1/2020.acl-main.569</doi>
       <video href="http://slideslive.com/38928746"/>
       <bibkey>zhang-etal-2020-top</bibkey>
     </paper>
@@ -7768,7 +7768,7 @@
       <title>Named Entity Recognition as Dependency Parsing</title>
       <author><first>Juntao</first><last>Yu</last></author>
       <author><first>Bernd</first><last>Bohnet</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>6470–6476</pages>
       <abstract>Named Entity Recognition (NER) is a fundamental task in Natural Language Processing, concerned with identifying spans of text expressing references to entities. NER research is often focused on flat entities only (flat NER), ignoring the fact that entity references can be nested, as in [Bank of [China]] (Finkel and Manning, 2009). In this paper, we use ideas from graph-based dependency parsing to provide our model a global view on the input via a biaffine model (Dozat and Manning, 2017). The biaffine model scores pairs of start and end tokens in a sentence which we use to explore all spans, so that the model is able to predict named entities accurately. We show that the model works well for both nested and flat NER through evaluation on 8 corpora and achieving SoTA performance on all of them, with accuracy gains of up to 2.2 percentage points.</abstract>
       <url hash="ece53cd4">2020.acl-main.577</url>
@@ -7822,7 +7822,7 @@
       <title>Single-/Multi-Source Cross-Lingual <fixed-case>NER</fixed-case> via Teacher-Student Learning on Unlabeled Data in Target Language</title>
       <author><first>Qianhui</first><last>Wu</last></author>
       <author><first>Zijia</first><last>Lin</last></author>
-      <author><first>Börje F.</first><last>Karlsson</last></author>
+      <author id="borje-f-karlsson"><first>Börje F.</first><last>Karlsson</last></author>
       <author><first>Jian-Guang</first><last>Lou</last></author>
       <author><first>Biqing</first><last>Huang</last></author>
       <pages>6505–6514</pages>
@@ -7837,7 +7837,7 @@
       <author><first>Shaowei</first><last>Chen</last></author>
       <author><first>Jie</first><last>Liu</last></author>
       <author><first>Yu</first><last>Wang</last></author>
-      <author id="wenzheng-zhang"><first>Wenzheng</first><last>Zhang</last></author>
+      <author><first>Wenzheng</first><last>Zhang</last></author>
       <author><first>Ziming</first><last>Chi</last></author>
       <pages>6515–6524</pages>
       <abstract>Opinion entity extraction is a fundamental task in fine-grained opinion mining. Related studies generally extract aspects and/or opinion expressions without recognizing the relations between them. However, the relations are crucial for downstream tasks, including sentiment classification, opinion summarization, etc. In this paper, we explore Aspect-Opinion Pair Extraction (AOPE) task, which aims at extracting aspects and opinion expressions in pairs. To deal with this task, we propose Synchronous Double-channel Recurrent Network (SDRN) mainly consisting of an opinion entity extraction unit, a relation detection unit, and a synchronization unit. The opinion entity extraction unit and the relation detection unit are developed as two channels to extract opinion entities and relations simultaneously. Furthermore, within the synchronization unit, we design Entity Synchronization Mechanism (ESM) and Relation Synchronization Mechanism (RSM) to enhance the mutual benefit on the above two channels. To verify the performance of SDRN, we manually build three datasets based on SemEval 2014 and 2015 benchmarks. Extensive experiments demonstrate that SDRN achieves state-of-the-art performances.</abstract>
@@ -7864,8 +7864,8 @@
     </paper>
     <paper id="584">
       <title><fixed-case>K</fixed-case>nowledge Supports Visual Language Grounding: <fixed-case>A</fixed-case> Case Study on Colour Terms</title>
-      <author><first>Simeon</first><last>Schüz</last></author>
-      <author><first>Sina</first><last>Zarrieß</last></author>
+      <author id="simeon-junker"><first>Simeon</first><last>Schüz</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
       <pages>6536–6542</pages>
       <abstract>In human cognition, world knowledge supports the perception of object colours: knowing that trees are typically green helps to perceive their colour in certain contexts. We go beyond previous studies on colour terms using isolated colour swatches and study visual grounding of colour terms in realistic objects. Our models integrate processing of visual information and object-specific knowledge via hard-coded (late) or learned (early) fusion. We find that both models consistently outperform a bottom-up baseline that predicts colour terms solely from visual inputs, but show interesting differences when predicting atypical colours of so-called colour diagnostic objects. Our models also achieve promising results when tested on new object categories not seen during training.</abstract>
       <url hash="7764c72d">2020.acl-main.584</url>
@@ -7890,8 +7890,8 @@
       <title>Words Aren’t Enough, Their Order Matters: On the Robustness of Grounding Visual Referring Expressions</title>
       <author><first>Arjun</first><last>Akula</last></author>
       <author><first>Spandana</first><last>Gella</last></author>
-      <author><first>Yaser</first><last>Al-Onaizan</last></author>
-      <author><first>Song-Chun</first><last>Zhu</last></author>
+      <author id="yaser-al-onaizan"><first>Yaser</first><last>Al-Onaizan</last></author>
+      <author id="song-chun-zhu"><first>Song-Chun</first><last>Zhu</last></author>
       <author><first>Siva</first><last>Reddy</last></author>
       <pages>6555–6565</pages>
       <abstract>Visual referring expression recognition is a challenging task that requires natural language understanding in the context of an image. We critically examine RefCOCOg, a standard benchmark for this task, using a human study and show that 83.7% of test instances do not require reasoning on linguistic structure, i.e., words are enough to identify the target object, the word order doesn’t matter. To measure the true progress of existing models, we split the test set into two sets, one which requires reasoning on linguistic structure and the other which doesn’t. Additionally, we create an out-of-distribution dataset Ref-Adv by asking crowdworkers to perturb in-domain examples such that the target object changes. Using these datasets, we empirically show that existing methods fail to exploit linguistic structure and are 12% to 23% lower in performance than the established progress for this task. We also propose two methods, one based on contrastive learning and the other based on multi-task learning, to increase the robustness of ViLBERT, the current state-of-the-art model for this task. Our datasets are publicly available at <url>https://github.com/aws/aws-refcocog-adv</url>.</abstract>
@@ -7905,7 +7905,7 @@
       <author><first>Hao</first><last>Peng</last></author>
       <author><first>Roy</first><last>Schwartz</last></author>
       <author><first>Dianqi</first><last>Li</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>6566–6577</pages>
       <abstract>Multi-head attentive neural architectures have achieved state-of-the-art results on a variety of natural language processing tasks. Evidence has shown that they are overparameterized; attention heads can be pruned without significant performance loss. In this work, we instead “reallocate” them—the model learns to activate different heads on different inputs. Drawing connections between multi-head attention and mixture of experts, we propose the mixture of attentive experts model (MAE). MAE is trained using a block coordinate descent algorithm that alternates between updating (1) the responsibilities of the experts and (2) their parameters. Experiments on machine translation and language modeling show that MAE outperforms strong baselines on both tasks. Particularly, on the WMT14 English to German translation dataset, MAE improves over “transformer-base” by 0.8 BLEU, with a comparable number of parameters. Our analysis shows that our model learns to specialize different experts to different inputs.</abstract>
       <url hash="d131658d">2020.acl-main.587</url>
@@ -7916,7 +7916,7 @@
     <paper id="588">
       <title>Dependency Graph Enhanced Dual-transformer Structure for Aspect-based Sentiment Classification</title>
       <author><first>Hao</first><last>Tang</last></author>
-      <author><first>Donghong</first><last>Ji</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
       <author><first>Chenliang</first><last>Li</last></author>
       <author><first>Qiji</first><last>Zhou</last></author>
       <pages>6578–6588</pages>
@@ -7931,7 +7931,7 @@
       <title>Differentiable Window for Dynamic Local Attention</title>
       <author><first>Thanh-Tung</first><last>Nguyen</last></author>
       <author><first>Xuan-Phi</first><last>Nguyen</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <author><first>Xiaoli</first><last>Li</last></author>
       <pages>6589–6599</pages>
       <abstract>We propose Differentiable Window, a new neural module and general purpose component for dynamic window selection. While universally applicable, we demonstrate a compelling use case of utilizing Differentiable Window to improve standard attention modules by enabling more focused attentions over the input regions. We propose two variants of Differentiable Window, and integrate them within the Transformer architecture in two novel ways. We evaluate our proposed approach on a myriad of NLP tasks, including machine translation, sentiment analysis, subject-verb agreement and language modeling. Our experimental results demonstrate consistent and sizable improvements across all tasks.</abstract>
@@ -7947,7 +7947,7 @@
       <author><first>Yi</first><last>Zhou</last></author>
       <author><first>Cho-Jui</first><last>Hsieh</last></author>
       <author><first>Minhao</first><last>Cheng</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>6600–6610</pages>
       <abstract>Despite achieving prominent performance on many important tasks, it has been reported that neural networks are vulnerable to adversarial examples. Previously studies along this line mainly focused on semantic tasks such as sentiment analysis, question answering and reading comprehension. In this study, we show that adversarial examples also exist in dependency parsing: we propose two approaches to study where and how parsers make mistakes by searching over perturbations to existing texts at sentence and phrase levels, and design algorithms to construct such examples in both of the black-box and white-box settings. Our experiments with one of state-of-the-art parsers on the English Penn Treebank (PTB) show that up to 77% of input examples admit adversarial perturbations, and we also show that the robustness of parsing models can be improved by crafting high-quality adversaries and including them in the training stage, while suffering little to no performance drop on the clean input data.</abstract>
       <url hash="b2270fa7">2020.acl-main.590</url>
@@ -7960,7 +7960,7 @@
       <author><first>Wenyu</first><last>Du</last></author>
       <author><first>Zhouhan</first><last>Lin</last></author>
       <author><first>Yikang</first><last>Shen</last></author>
-      <author><first>Timothy J.</first><last>O’Donnell</last></author>
+      <author id="timothy-odonnell"><first>Timothy J.</first><last>O’Donnell</last></author>
       <author><first>Yoshua</first><last>Bengio</last></author>
       <author><first>Yue</first><last>Zhang</last></author>
       <pages>6611–6628</pages>
@@ -7994,7 +7994,7 @@
       <author><first>Gabriel</first><last>Stanovsky</last></author>
       <author><first>Swabha</first><last>Swayamdipta</last></author>
       <author><first>Jesse</first><last>Dodge</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>6640–6651</pages>
       <abstract>As NLP models become larger, executing a trained model requires significant computational resources incurring monetary and environmental costs. To better respect a given inference budget, we propose a modification to contextual representation fine-tuning which, during inference, allows for an early (and fast) “exit” from neural network calculations for simple instances, and late (and accurate) exit for hard instances. To achieve this, we add classifiers to different layers of BERT and use their calibrated confidence scores to make early exit decisions. We test our proposed modification on five different datasets in two tasks: three text classification datasets and two natural language inference benchmarks. Our method presents a favorable speed/accuracy tradeoff in almost all cases, producing models which are up to five times faster than the state of the art, while preserving their accuracy. Our method also requires almost no additional training resources (in either time or parameters) compared to the baseline BERT model. Finally, our method alleviates the need for costly retraining of multiple models at different levels of efficiency; we allow users to control the inference speed/accuracy tradeoff using a single trained model, by setting a single variable at inference time. We publicly release our code.</abstract>
       <url hash="e591a272">2020.acl-main.593</url>
@@ -8033,8 +8033,8 @@
       <title>Modeling Morphological Typology for Unsupervised Learning of Language Morphology</title>
       <author><first>Hongzhi</first><last>Xu</last></author>
       <author><first>Jordan</first><last>Kodner</last></author>
-      <author><first>Mitchell</first><last>Marcus</last></author>
-      <author><first>Charles</first><last>Yang</last></author>
+      <author id="mitch-marcus"><first>Mitchell</first><last>Marcus</last></author>
+      <author id="charles-yang"><first>Charles</first><last>Yang</last></author>
       <pages>6672–6681</pages>
       <abstract>This paper describes a language-independent model for fully unsupervised morphological analysis that exploits a universal framework leveraging morphological typology. By modeling morphological processes including suffixation, prefixation, infixation, and full and partial reduplication with constrained stem change rules, our system effectively constrains the search space and offers a wide coverage in terms of morphological typology. The system is tested on nine typologically and genetically diverse languages, and shows superior performance over leading systems. We also investigate the effect of an oracle that provides only a handful of bits per language to signal morphological type.</abstract>
       <url hash="d8ca60eb">2020.acl-main.596</url>
@@ -8047,15 +8047,15 @@
       <author><first>Adina</first><last>Williams</last></author>
       <author><first>Tiago</first><last>Pimentel</last></author>
       <author><first>Hagen</first><last>Blix</last></author>
-      <author><first>Arya D.</first><last>McCarthy</last></author>
+      <author id="arya-d-mccarthy"><first>Arya D.</first><last>McCarthy</last></author>
       <author><first>Eleanor</first><last>Chodroff</last></author>
       <author><first>Ryan</first><last>Cotterell</last></author>
       <pages>6682–6695</pages>
       <abstract>The noun lexica of many natural languages are divided into several declension classes with characteristic morphological properties. Class membership is far from deterministic, but the phonological form of a noun and/or its meaning can often provide imperfect clues. Here, we investigate the strength of those clues. More specifically, we operationalize this by measuring how much information, in bits, we can glean about declension class from knowing the form and/or meaning of nouns. We know that form and meaning are often also indicative of grammatical gender—which, as we quantitatively verify, can itself share information with declension class—so we also control for gender. We find for two Indo-European languages (Czech and German) that form and meaning respectively share significant amounts of information with class (and contribute additional information above and beyond gender). The three-way interaction between class, form, and meaning (given gender) is also significant. Our study is important for two reasons: First, we introduce a new method that provides additional quantitative support for a classic linguistic finding that form and meaning are relevant for the classification of nouns into declensions. Secondly, we show not only that individual declensions classes vary in the strength of their clues within a language, but also that these variations themselves vary across languages.</abstract>
       <url hash="17bb637a">2020.acl-main.597</url>
       <attachment type="Software" hash="d9ab711a">2020.acl-main.597.Software.zip</attachment>
-      <doi>10.18653/v1/2020.acl-main.597</doi>
       <attachment type="Dataset" hash="4d03ad74">2020.acl-main.597.Dataset.pdf</attachment>
+      <doi>10.18653/v1/2020.acl-main.597</doi>
       <video href="http://slideslive.com/38929092"/>
       <bibkey>williams-etal-2020-predicting</bibkey>
     </paper>
@@ -8065,8 +8065,8 @@
       <author><first>Liwei</first><last>Cai</last></author>
       <author><first>Yihui</first><last>Peng</last></author>
       <author><first>Chen</first><last>Xia</last></author>
-      <author><first>Arya</first><last>McCarthy</last></author>
-      <author><first>Katharina</first><last>Kann</last></author>
+      <author id="arya-d-mccarthy"><first>Arya</first><last>McCarthy</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
       <pages>6696–6707</pages>
       <abstract>We propose the task of unsupervised morphological paradigm completion. Given only raw text and a lemma list, the task consists of generating the morphological paradigms, i.e., all inflected forms, of the lemmas. From a natural language processing (NLP) perspective, this is a challenging unsupervised task, and high-performing systems have the potential to improve tools for low-resource languages or to assist linguistic annotators. From a cognitive science perspective, this can shed light on how children acquire morphological knowledge. We further introduce a system for the task, which generates morphological paradigms via the following steps: (i) EDIT TREE retrieval, (ii) additional lemma retrieval, (iii) paradigm size discovery, and (iv) inflection generation. We perform an evaluation on 14 typologically diverse languages. Our system outperforms trivial baselines with ease and, for some languages, even obtains a higher accuracy than minimally supervised systems.</abstract>
       <url hash="98f4974a">2020.acl-main.598</url>
@@ -8108,7 +8108,7 @@
     <paper id="601">
       <title>Low-Resource Generation of Multi-hop Reasoning Questions</title>
       <author><first>Jianxing</first><last>Yu</last></author>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last></author>
+      <author><first>Wei</first><last>Liu</last></author>
       <author><first>Shuang</first><last>Qiu</last></author>
       <author><first>Qinliang</first><last>Su</last></author>
       <author><first>Kai</first><last>Wang</last></author>
@@ -8167,7 +8167,7 @@
     <paper id="605">
       <title>Parsing into Variable-in-situ Logico-Semantic Graphs</title>
       <author><first>Yufei</first><last>Chen</last></author>
-      <author><first>Weiwei</first><last>Sun</last></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last></author>
       <pages>6772–6782</pages>
       <abstract>We propose variable-in-situ logico-semantic graphs to bridge the gap between semantic graph and logical form parsing. The new type of graph-based meaning representation allows us to include analysis for scope-related phenomena, such as quantification, negation and modality, in a way that is consistent with the state-of-the-art underspecification approach. Moreover, the well-formedness of such a graph is clear, since model-theoretic interpretation is available. We demonstrate the effectiveness of this new perspective by developing a new state-of-the-art semantic parser for English Resource Semantics. At the core of this parser is a novel neural graph rewriting system which combines the strengths of Hyperedge Replacement Grammar, a knowledge-intensive model, and Graph Neural Networks, a data-intensive model. Our parser achieves an accuracy of 92.39% in terms of elementary dependency match, which is a 2.88 point improvement over the best data-driven model in the literature. The output of our parser is highly coherent: at least 91% graphs are valid, in that they allow at least one sound scope-resolved logical form.</abstract>
       <url hash="bd78729c">2020.acl-main.605</url>
@@ -8179,7 +8179,7 @@
     <paper id="606">
       <title>Semantic Parsing for <fixed-case>E</fixed-case>nglish as a Second Language</title>
       <author><first>Yuanyuan</first><last>Zhao</last></author>
-      <author><first>Weiwei</first><last>Sun</last></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last></author>
       <author><first>Junjie</first><last>Cao</last></author>
       <author><first>Xiaojun</first><last>Wan</last></author>
       <pages>6783–6794</pages>
@@ -8207,7 +8207,7 @@
       <author><first>Ruisheng</first><last>Cao</last></author>
       <author><first>Su</first><last>Zhu</last></author>
       <author><first>Chenyu</first><last>Yang</last></author>
-      <author id="chen-liu"><first>Chen</first><last>Liu</last></author>
+      <author><first>Chen</first><last>Liu</last></author>
       <author><first>Rao</first><last>Ma</last></author>
       <author><first>Yanbin</first><last>Zhao</last></author>
       <author><first>Lu</first><last>Chen</last></author>
@@ -8249,7 +8249,7 @@
       <author><first>Xiaonan</first><last>Li</last></author>
       <author><first>Hang</first><last>Yan</last></author>
       <author><first>Xipeng</first><last>Qiu</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>6836–6842</pages>
       <abstract>Recently, the character-word lattice structure has been proved to be effective for Chinese named entity recognition (NER) by incorporating the word information. However, since the lattice structure is complex and dynamic, the lattice-based models are hard to fully utilize the parallel computation of GPUs and usually have a low inference speed. In this paper, we propose FLAT: Flat-LAttice Transformer for Chinese NER, which converts the lattice structure into a flat structure consisting of spans. Each span corresponds to a character or latent word and its position in the original lattice. With the power of Transformer and well-designed position encoding, FLAT can fully leverage the lattice information and has an excellent parallel ability. Experiments on four datasets show FLAT outperforms other lexicon-based models in performance and efficiency.</abstract>
       <url hash="5d72eec5">2020.acl-main.611</url>
@@ -8335,7 +8335,7 @@
     </paper>
     <paper id="618">
       <title>Classification-Based Self-Learning for Weakly Supervised Bilingual Lexicon Induction</title>
-      <author><first>Vanja Mladen</first><last>Karan</last></author>
+      <author id="vanja-m-karan"><first>Vanja Mladen</first><last>Karan</last></author>
       <author><first>Ivan</first><last>Vulić</last></author>
       <author><first>Anna</first><last>Korhonen</last></author>
       <author><first>Goran</first><last>Glavaš</last></author>
@@ -8350,8 +8350,8 @@
       <title>Gender in Danger? Evaluating Speech Translation Technology on the <fixed-case>M</fixed-case>u<fixed-case>ST</fixed-case>-<fixed-case>SHE</fixed-case> Corpus</title>
       <author><first>Luisa</first><last>Bentivogli</last></author>
       <author><first>Beatrice</first><last>Savoldi</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
-      <author><first>Mattia A.</first><last>Di Gangi</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
+      <author id="mattia-a-di-gangi"><first>Mattia A.</first><last>Di Gangi</last></author>
       <author><first>Roldano</first><last>Cattoni</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <pages>6923–6933</pages>
@@ -8446,7 +8446,7 @@
       <author><first>Jonathan</first><last>Mamou</last></author>
       <author><first>Julian</first><last>Michael</last></author>
       <author><first>Gabriel</first><last>Stanovsky</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <author><first>Ido</first><last>Dagan</last></author>
       <pages>7008–7013</pages>
       <abstract>Question-answer driven Semantic Role Labeling (QA-SRL) was proposed as an attractive open and natural flavour of SRL, potentially attainable from laymen. Recently, a large-scale crowdsourced QA-SRL corpus and a trained parser were released. Trying to replicate the QA-SRL annotation for new texts, we found that the resulting annotations were lacking in quality, particularly in coverage, making them insufficient for further research and evaluation. In this paper, we present an improved crowdsourcing protocol for complex semantic annotation, involving worker selection and training, and a data consolidation phase. Applying this protocol to QA-SRL yielded high-quality annotation with drastically higher coverage, producing a new gold evaluation dataset. We believe that our annotation protocol and gold standard will facilitate future replicable research of natural semantic annotations.</abstract>
@@ -8459,7 +8459,7 @@
       <title>Cross-Lingual Semantic Role Labeling with High-Quality Translated Training Corpus</title>
       <author><first>Hao</first><last>Fei</last></author>
       <author><first>Meishan</first><last>Zhang</last></author>
-      <author><first>Donghong</first><last>Ji</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
       <pages>7014–7026</pages>
       <abstract>Many efforts of research are devoted to semantic role labeling (SRL) which is crucial for natural language understanding. Supervised approaches have achieved impressing performances when large-scale corpora are available for resource-rich languages such as English. While for the low-resource languages with no annotated SRL dataset, it is still challenging to obtain competitive performances. Cross-lingual SRL is one promising way to address the problem, which has achieved great advances with the help of model transferring and annotation projection. In this paper, we propose a novel alternative based on corpus translation, constructing high-quality training datasets for the target languages from the source gold-standard SRL annotations. Experimental results on Universal Proposition Bank show that the translation-based method is highly effective, and the automatic pseudo datasets can improve the target-language SRL performances significantly.</abstract>
       <url hash="e8dd86d1">2020.acl-main.627</url>
@@ -8471,7 +8471,7 @@
       <title>Sentence Meta-Embeddings for Unsupervised Semantic Textual Similarity</title>
       <author><first>Nina</first><last>Poerner</last></author>
       <author><first>Ulli</first><last>Waltinger</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>7027–7034</pages>
       <abstract>We address the task of unsupervised Semantic Textual Similarity (STS) by ensembling diverse pre-trained sentence encoders into sentence meta-embeddings. We apply, extend and evaluate different meta-embedding methods from the word embedding literature at the sentence level, including dimensionality reduction (Yin and Schütze, 2016), generalized Canonical Correlation Analysis (Rastogi et al., 2015) and cross-view auto-encoders (Bollegala and Bao, 2018). Our sentence meta-embeddings set a new unsupervised State of The Art (SoTA) on the STS Benchmark and on the STS12-STS16 datasets, with gains of between 3.7% and 6.4% Pearson’s r over single-source systems.</abstract>
       <url hash="2e652b80">2020.acl-main.628</url>
@@ -8518,7 +8518,7 @@
     </paper>
     <paper id="632">
       <title>Exploiting Personal Characteristics of Debaters for Predicting Persuasiveness</title>
-      <author><first>Khalid</first><last>Al Khatib</last></author>
+      <author id="khalid-al-khatib"><first>Khalid</first><last>Al Khatib</last></author>
       <author><first>Michael</first><last>Völske</last></author>
       <author><first>Shahbaz</first><last>Syed</last></author>
       <author><first>Nikolay</first><last>Kolyada</last></author>
@@ -8595,7 +8595,7 @@
     <paper id="637">
       <title>Modeling Long Context for Task-Oriented Dialogue State Generation</title>
       <author><first>Jun</first><last>Quan</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <pages>7119–7124</pages>
       <abstract>Based on the recently proposed transferable dialogue state generator (TRADE) that predicts dialogue states from utterance-concatenated dialogue context, we propose a multi-task learning model with a simple yet effective utterance tagging technique and a bidirectional language model as an auxiliary task for task-oriented dialogue state generation. By enabling the model to learn a better representation of the long dialogue context, our approaches attempt to solve the problem that the performance of the baseline significantly drops when the input dialogue context sequence is long. In our experiments, our proposed model achieves a 7.03% relative improvement over the baseline, establishing a new state-of-the-art joint goal accuracy of 52.04% on the MultiWOZ 2.0 dataset.</abstract>
       <url hash="416c3f2b">2020.acl-main.637</url>
@@ -8606,7 +8606,7 @@
     <paper id="638">
       <title>Multi-Domain Dialogue Acts and Response Co-Generation</title>
       <author><first>Kai</first><last>Wang</last></author>
-      <author><first>Junfeng</first><last>Tian</last></author>
+      <author id="junfeng-tian"><first>Junfeng</first><last>Tian</last></author>
       <author><first>Rui</first><last>Wang</last></author>
       <author><first>Xiaojun</first><last>Quan</last></author>
       <author><first>Jianxing</first><last>Yu</last></author>
@@ -8620,7 +8620,7 @@
     <paper id="639">
       <title>Exploring Contextual Word-level Style Relevance for Unsupervised Style Transfer</title>
       <author><first>Chulun</first><last>Zhou</last></author>
-      <author><first>Liangyu</first><last>Chen</last></author>
+      <author id="liang-yu-chen"><first>Liangyu</first><last>Chen</last></author>
       <author><first>Jiachen</first><last>Liu</last></author>
       <author><first>Xinyan</first><last>Xiao</last></author>
       <author><first>Jinsong</first><last>Su</last></author>
@@ -8701,12 +8701,12 @@
       <title><fixed-case>C</fixed-case>amem<fixed-case>BERT</fixed-case>: a Tasty <fixed-case>F</fixed-case>rench Language Model</title>
       <author><first>Louis</first><last>Martin</last></author>
       <author><first>Benjamin</first><last>Muller</last></author>
-      <author><first>Pedro Javier</first><last>Ortiz Suárez</last></author>
+      <author id="pedro-ortiz-suarez"><first>Pedro Javier</first><last>Ortiz Suárez</last></author>
       <author><first>Yoann</first><last>Dupont</last></author>
-      <author><first>Laurent</first><last>Romary</last></author>
-      <author><first>Éric</first><last>de la Clergerie</last></author>
-      <author><first>Djamé</first><last>Seddah</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Éric</first><last>de la Clergerie</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <pages>7203–7219</pages>
       <abstract>Pretrained language models are now ubiquitous in Natural Language Processing. Despite their success, most available models have either been trained on English data or on the concatenation of data in multiple languages. This makes practical use of such models –in all languages except English– very limited. In this paper, we investigate the feasibility of training monolingual Transformer-based language models for other languages, taking French as an example and evaluating our language models on part-of-speech tagging, dependency parsing, named entity recognition and natural language inference tasks. We show that the use of web crawled data is preferable to the use of Wikipedia data. More surprisingly, we show that a relatively small web crawled dataset (4GB) leads to results that are as good as those obtained using larger datasets (130+GB). Our best performing model CamemBERT reaches or improves the state of the art in all four downstream tasks.</abstract>
       <url hash="1ba3f6f0">2020.acl-main.645</url>
@@ -8741,7 +8741,7 @@
     </paper>
     <paper id="648">
       <title>2kenize: Tying Subword Sequences for <fixed-case>C</fixed-case>hinese Script Conversion</title>
-      <author><first>Pranav</first><last>A</last></author>
+      <author id="pranav-a"><first>Pranav</first><last>A</last></author>
       <author><first>Isabelle</first><last>Augenstein</last></author>
       <pages>7257–7272</pages>
       <abstract>Simplified Chinese to Traditional Chinese character conversion is a common preprocessing step in Chinese NLP. Despite this, current approaches have insufficient performance because they do not take into account that a simplified Chinese character can correspond to multiple traditional characters. Here, we propose a model that can disambiguate between mappings and convert between the two scripts. The model is based on subword segmentation, two language models, as well as a method for mapping between subword sequences. We further construct benchmark datasets for topic classification and script conversion. Our proposed method outperforms previous Chinese Character conversion approaches by 6 points in accuracy. These results are further confirmed in a downstream application, where 2kenize is used to convert pretraining dataset for topic classification. An error analysis reveals that our method’s particular strengths are in dealing with code mixing and named entities.</abstract>
@@ -8753,8 +8753,8 @@
     <paper id="649">
       <title>Predicting the Growth of Morphological Families from Social and Linguistic Factors</title>
       <author><first>Valentin</first><last>Hofmann</last></author>
-      <author><first>Janet</first><last>Pierrehumbert</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="janet-pierrehumbert"><first>Janet</first><last>Pierrehumbert</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>7273–7283</pages>
       <abstract>We present the first study that examines the evolution of morphological families, i.e., sets of morphologically related words such as “trump”, “antitrumpism”, and “detrumpify”, in social media. We introduce the novel task of Morphological Family Expansion Prediction (MFEP) as predicting the increase in the size of a morphological family. We create a ten-year Reddit corpus as a benchmark for MFEP and evaluate a number of baselines on this benchmark. Our experiments demonstrate very good performance on MFEP.</abstract>
       <url hash="0977a251">2020.acl-main.649</url>
@@ -8776,7 +8776,7 @@
     <paper id="651">
       <title><fixed-case>C</fixed-case>lar<fixed-case>Q</fixed-case>: A large-scale and diverse dataset for Clarification Question Generation</title>
       <author><first>Vaibhav</first><last>Kumar</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <pages>7296–7301</pages>
       <abstract>Question answering and conversational systems are often baffled and need help clarifying certain ambiguities. However, limitations of existing datasets hinder the development of large-scale models capable of generating and utilising clarification questions. In order to overcome these limitations, we devise a novel bootstrapping framework (based on self-supervision) that assists in the creation of a diverse, large-scale dataset of clarification questions based on post-comment tuples extracted from stackexchange. The framework utilises a neural network based architecture for classifying clarification questions. It is a two-step method where the first aims to increase the precision of the classifier and second aims to increase its recall. We quantitatively demonstrate the utility of the newly created dataset by applying it to the downstream task of question-answering. The final dataset, ClarQ, consists of ~2M examples distributed across 173 domains of stackexchange. We release this dataset in order to foster research into the field of clarification question generation with the larger goal of enhancing dialog and question answering systems.</abstract>
       <url hash="1e80bf0c">2020.acl-main.651</url>
@@ -8788,10 +8788,10 @@
       <title><fixed-case>D</fixed-case>o<fixed-case>QA</fixed-case> - Accessing Domain-Specific <fixed-case>FAQ</fixed-case>s via Conversational <fixed-case>QA</fixed-case></title>
       <author><first>Jon Ander</first><last>Campos</last></author>
       <author><first>Arantxa</first><last>Otegi</last></author>
-      <author><first>Aitor</first><last>Soroa</last></author>
-      <author><first>Jan</first><last>Deriu</last></author>
+      <author id="aitor-soroa"><first>Aitor</first><last>Soroa</last></author>
+      <author id="jan-milan-deriu"><first>Jan</first><last>Deriu</last></author>
       <author><first>Mark</first><last>Cieliebak</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <pages>7302–7314</pages>
       <abstract>The goal of this work is to build conversational Question Answering (QA) interfaces for the large body of domain-specific information available in FAQ sites. We present DoQA, a dataset with 2,437 dialogues and 10,917 QA pairs. The dialogues are collected from three Stack Exchange sites using the Wizard of Oz method with crowdsourcing. Compared to previous work, DoQA comprises well-defined information needs, leading to more coherent and natural conversations with less factoid questions and is multi-domain. In addition, we introduce a more realistic information retrieval (IR) scenario where the system needs to find the answer in any of the FAQ documents. The results of an existing, strong, system show that, thanks to transfer learning from a Wikipedia QA dataset and fine tuning on a single FAQ domain, it is possible to build high quality conversational QA systems for FAQs without in-domain training data. The good results carry over into the more challenging IR scenario. In both cases, there is still ample room for improvement, as indicated by the higher human upperbound.</abstract>
       <url hash="4c934249">2020.acl-main.652</url>
@@ -8856,7 +8856,7 @@
     <paper id="657">
       <title>Premise Selection in Natural Language Mathematical Texts</title>
       <author><first>Deborah</first><last>Ferreira</last></author>
-      <author><first>André</first><last>Freitas</last></author>
+      <author id="andre-freitas"><first>André</first><last>Freitas</last></author>
       <pages>7365–7374</pages>
       <abstract>The discovery of supporting evidence for addressing complex mathematical problems is a semantically challenging task, which is still unexplored in the field of natural language processing for mathematical text. The natural language premise selection task consists in using conjectures written in both natural language and mathematical formulae to recommend premises that most likely will be useful to prove a particular statement. We propose an approach to solve this task as a link prediction problem, using Deep Convolutional Graph Neural Networks. This paper also analyses how different baselines perform in this task and shows that a graph structure can provide higher F1-score, especially when considering multi-hop premise selection.</abstract>
       <url hash="72c89168">2020.acl-main.657</url>
@@ -8869,8 +8869,8 @@
       <author><first>Mikel</first><last>Artetxe</last></author>
       <author><first>Sebastian</first><last>Ruder</last></author>
       <author><first>Dani</first><last>Yogatama</last></author>
-      <author><first>Gorka</first><last>Labaka</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="gorka-labaka"><first>Gorka</first><last>Labaka</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <pages>7375–7388</pages>
       <abstract>We review motivations, definition, approaches, and methodology for unsupervised cross-lingual learning and call for a more rigorous position in each of them. An existing rationale for such research is based on the lack of parallel data for many of the world’s languages. However, we argue that a scenario without any parallel data and abundant monolingual data is unrealistic in practice. We also discuss different training signals that have been used in previous work, which depart from the pure unsupervised setting. We then describe common methodological issues in tuning and evaluation of unsupervised cross-lingual models and present best practices. Finally, we provide a unified outlook for different types of research in this area (i.e., cross-lingual word embeddings, deep multilingual pretraining, and unsupervised machine translation) and argue for comparable evaluation of these models.</abstract>
       <url hash="43d6e6db">2020.acl-main.658</url>
@@ -8919,7 +8919,7 @@
     <paper id="662">
       <title>What Question Answering can Learn from Trivia Nerds</title>
       <author><first>Jordan</first><last>Boyd-Graber</last></author>
-      <author><first>Benjamin</first><last>Börschinger</last></author>
+      <author id="benjamin-borschinger"><first>Benjamin</first><last>Börschinger</last></author>
       <pages>7422–7435</pages>
       <abstract>In addition to the traditional task of machines answering questions, question answering (QA) research creates interesting, challenging questions that help systems how to answer questions and reveal the best systems. We argue that creating a QA dataset—and the ubiquitous leaderboard that goes with it—closely resembles running a trivia tournament: you write questions, have agents (either humans or machines) answer the questions, and declare a winner. However, the research community has ignored the hard-learned lessons from decades of the trivia community creating vibrant, fair, and effective question answering competitions. After detailing problems with existing QA datasets, we outline the key lessons—removing ambiguity, discriminating skill, and adjudicating disputes—that can transfer to QA research and how they might be implemented.</abstract>
       <url hash="86ef92a8">2020.acl-main.662</url>
@@ -8982,7 +8982,7 @@
       <author><first>Xiang</first><last>Kong</last></author>
       <author><first>Zhengzhong</first><last>Liu</last></author>
       <author><first>Xuezhe</first><last>Ma</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>7479–7485</pages>
       <abstract>In this work, we explore the implicit event argument detection task, which studies event arguments beyond sentence boundaries. The addition of cross-sentence argument candidates imposes great challenges for modeling. To reduce the number of candidates, we adopt a two-step approach, decomposing the problem into two sub-problems: argument head-word detection and head-to-span expansion. Evaluated on the recent RAMS dataset (Ebner et al., 2020), our model achieves overall better performance than a strong sequence labeling baseline. We further provide detailed error analysis, presenting where the model mainly makes errors and indicating directions for future improvements. It remains a challenge to detect implicit arguments, calling for more future work of document-level modeling for this task.</abstract>
       <url hash="706f8000">2020.acl-main.667</url>
@@ -8995,7 +8995,7 @@
       <author><first>Or</first><last>Honovich</last></author>
       <author><first>Lucas</first><last>Torroba Hennigen</last></author>
       <author><first>Omri</first><last>Abend</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <pages>7486–7497</pages>
       <abstract>Machine reading is an ambitious goal in NLP that subsumes a wide range of text understanding capabilities. Within this broad framework, we address the task of machine reading the time of historical events, compile datasets for the task, and develop a model for tackling it. Given a brief textual description of an event, we show that good performance can be achieved by extracting relevant sentences from Wikipedia, and applying a combination of task-specific and general-purpose feature embeddings for the classification. Furthermore, we establish a link between the historical event ordering task and the event focus time task from the information retrieval literature, showing they also provide a challenging test case for machine reading algorithms.</abstract>
       <url hash="a914a6e9">2020.acl-main.668</url>
@@ -9136,7 +9136,7 @@
       <author><first>Maria</first><last>Barrett</last></author>
       <author><first>Yonatan</first><last>Belinkov</last></author>
       <author><first>Desmond</first><last>Elliott</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>7590–7604</pages>
       <abstract>Large-scale pretrained language models are the major driving force behind recent improvements in perfromance on the Winograd Schema Challenge, a widely employed test of commonsense reasoning ability. We show, however, with a new diagnostic dataset, that these models are sensitive to linguistic perturbations of the Winograd examples that minimally affect human understanding. Our results highlight interesting differences between humans and language models: language models are more sensitive to number or gender alternations and synonym replacements than humans, and humans are more stable and consistent in their predictions, maintain a much higher absolute performance, and perform better on non-associative instances than associative ones.</abstract>
       <url hash="e05fe25e">2020.acl-main.679</url>
@@ -9148,7 +9148,7 @@
     <paper id="680">
       <title>Temporally-Informed Analysis of Named Entity Recognition</title>
       <author><first>Shruti</first><last>Rijhwani</last></author>
-      <author><first>Daniel</first><last>Preotiuc-Pietro</last></author>
+      <author id="daniel-preotiuc-pietro"><first>Daniel</first><last>Preotiuc-Pietro</last></author>
       <pages>7605–7617</pages>
       <abstract>Natural language processing models often have to make predictions on text data that evolves over time as a result of changes in language use or the information described in the text. However, evaluation results on existing data sets are seldom reported by taking the timestamp of the document into account. We analyze and propose methods that make better use of temporally-diverse training data, with a focus on the task of named entity recognition. To support these experiments, we introduce a novel data set of English tweets annotated with named entities. We empirically demonstrate the effect of temporal drift on performance, and how the temporal information of documents can be used to obtain better models compared to those that disregard temporal information. Our analysis gives insights into why this information is useful, in the hope of informing potential avenues of improvement for named entity recognition as well as other NLP tasks under similar experimental setups.</abstract>
       <url hash="6567098c">2020.acl-main.680</url>
@@ -9159,7 +9159,7 @@
     <paper id="681">
       <title>Towards Open Domain Event Trigger Identification using Adversarial Domain Adaptation</title>
       <author><first>Aakanksha</first><last>Naik</last></author>
-      <author><first>Carolyn</first><last>Rose</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rose</last></author>
       <pages>7618–7624</pages>
       <abstract>We tackle the task of building supervised event trigger identification models which can generalize better across domains. Our work leverages the adversarial domain adaptation (ADA) framework to introduce domain-invariance. ADA uses adversarial training to construct representations that are predictive for trigger identification, but not predictive of the example’s domain. It requires no labeled data from the target domain, making it completely unsupervised. Experiments with two domains (English literature and news) show that ADA leads to an average F1 score improvement of 3.9 on out-of-domain data. Our best performing model (BERT-A) reaches 44-49 F1 across both domains, using no labeled target data. Preliminary experiments reveal that finetuning on 1% labeled data, followed by self-training leads to substantial improvement, reaching 51.5 and 67.2 F1 on literature and news respectively.</abstract>
       <url hash="a904bece">2020.acl-main.681</url>
@@ -9200,7 +9200,7 @@
       <author><first>Shashank</first><last>Srivastava</last></author>
       <author><first>Oleksandr</first><last>Polozov</last></author>
       <author><first>Nebojsa</first><last>Jojic</last></author>
-      <author><first>Christopher</first><last>Meek</last></author>
+      <author id="christopher-meek"><first>Christopher</first><last>Meek</last></author>
       <pages>7652–7662</pages>
       <abstract>We explore learning web-based tasks from a human teacher through natural language explanations and a single demonstration. Our approach investigates a new direction for semantic parsing that models explaining a demonstration in a context, rather than mapping explanations to demonstrations. By leveraging the idea of inverse semantics from program synthesis to reason backwards from observed demonstrations, we ensure that all considered interpretations are consistent with executable actions in any context, thus simplifying the problem of search over logical forms. We present a dataset of explanations paired with demonstrations for web-based tasks. Our methods show better task completion rates than a supervised semantic parsing baseline (40% relative improvement on average), and are competitive with simple exploration-and-demonstration based methods, while requiring no exploration of the environment. In learning to align explanations with demonstrations, basic properties of natural language syntax emerge as learned behavior. This is an interesting example of pragmatic language acquisition without any linguistic annotation.</abstract>
       <url hash="ce78dc1f">2020.acl-main.684</url>
@@ -9378,7 +9378,7 @@
     <paper id="698">
       <title>Negated and Misprimed Probes for Pretrained Language Models: Birds Can Talk, But Cannot Fly</title>
       <author><first>Nora</first><last>Kassner</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>7811–7818</pages>
       <abstract>Building on Petroni et al. 2019, we propose two new probing tasks analyzing factual knowledge stored in Pretrained Language Models (PLMs). (1) Negation. We find that PLMs do not distinguish between negated (‘‘Birds cannot [MASK]”) and non-negated (‘‘Birds can [MASK]”) cloze questions. (2) Mispriming. Inspired by priming methods in human psychology, we add “misprimes” to cloze questions (‘‘Talk? Birds can [MASK]”). We find that PLMs are easily distracted by misprimes. These results suggest that PLMs still have a long way to go to adequately learn human-like factual knowledge.</abstract>
       <url hash="f45551e5">2020.acl-main.698</url>
@@ -9414,8 +9414,8 @@
       <author><first>Jesse</first><last>Dunietz</last></author>
       <author><first>Greg</first><last>Burnham</last></author>
       <author><first>Akash</first><last>Bharadwaj</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
-      <author><first>Jennifer</first><last>Chu-Carroll</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
+      <author id="jennifer-chu-carroll"><first>Jennifer</first><last>Chu-Carroll</last></author>
       <author><first>Dave</first><last>Ferrucci</last></author>
       <pages>7839–7859</pages>
       <abstract>Many tasks aim to measure machine reading comprehension (MRC), often focusing on question types presumed to be difficult. Rarely, however, do task designers start by considering what systems should in fact comprehend. In this paper we make two key contributions. First, we argue that existing approaches do not adequately define comprehension; they are too unsystematic about what content is tested. Second, we present a detailed definition of comprehension—a “Template of Understanding”—for a widely useful class of texts, namely short narratives. We then conduct an experiment that strongly suggests existing systems are not up to the task of narrative understanding as we define it.</abstract>
@@ -9427,7 +9427,7 @@
     </paper>
     <paper id="702">
       <title>Gender Gap in Natural Language Processing Research: Disparities in Authorship and Citations</title>
-      <author><first>Saif M.</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif M.</first><last>Mohammad</last></author>
       <pages>7860–7870</pages>
       <abstract>Disparities in authorship and citations across gender can have substantial adverse consequences not just on the disadvantaged genders, but also on the field of study as a whole. Measuring gender gaps is a crucial step towards addressing them. In this work, we examine female first author percentages and the citations to their papers in Natural Language Processing (1965 to 2019). We determine aggregate-level statistics using an existing manually curated author–gender list as well as first names strongly associated with a gender. We find that only about 29% of first authors are female and only about 25% of last authors are female. Notably, this percentage has not improved since the mid 2000s. We also show that, on average, female first authors are cited less than male first authors, even when controlling for experience and area of research. Finally, we discuss the ethical considerations involved in automatic demographic analysis.</abstract>
       <url hash="3bbf1e45">2020.acl-main.702</url>
@@ -9446,7 +9446,7 @@
       <author><first>Abdelrahman</first><last>Mohamed</last></author>
       <author><first>Omer</first><last>Levy</last></author>
       <author><first>Veselin</first><last>Stoyanov</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>7871–7880</pages>
       <abstract>We present BART, a denoising autoencoder for pretraining sequence-to-sequence models. BART is trained by (1) corrupting text with an arbitrary noising function, and (2) learning a model to reconstruct the original text. It uses a standard Tranformer-based neural machine translation architecture which, despite its simplicity, can be seen as generalizing BERT (due to the bidirectional encoder), GPT (with the left-to-right decoder), and other recent pretraining schemes. We evaluate a number of noising approaches, finding the best performance by both randomly shuffling the order of sentences and using a novel in-filling scheme, where spans of text are replaced with a single mask token. BART is particularly effective when fine tuned for text generation but also works well for comprehension tasks. It matches the performance of RoBERTa on GLUE and SQuAD, and achieves new state-of-the-art results on a range of abstractive dialogue, question answering, and summarization tasks, with gains of up to 3.5 ROUGE. BART also provides a 1.1 BLEU increase over a back-translation system for machine translation, with only target language pretraining. We also replicate other pretraining schemes within the BART framework, to understand their effect on end-task performance.</abstract>
       <url hash="c4fc7657">2020.acl-main.703</url>
@@ -9458,7 +9458,7 @@
       <title><fixed-case>BLEURT</fixed-case>: Learning Robust Metrics for Text Generation</title>
       <author><first>Thibault</first><last>Sellam</last></author>
       <author><first>Dipanjan</first><last>Das</last></author>
-      <author><first>Ankur</first><last>Parikh</last></author>
+      <author id="ankur-parikh"><first>Ankur</first><last>Parikh</last></author>
       <pages>7881–7892</pages>
       <abstract>Text generation has made significant advances in the last few years. Yet, evaluation metrics have lagged behind, as the most popular choices (e.g., BLEU and ROUGE) may correlate poorly with human judgment. We propose BLEURT, a learned evaluation metric for English based on BERT. BLEURT can model human judgment with a few thousand possibly biased training examples. A key aspect of our approach is a novel pre-training scheme that uses millions of synthetic examples to help the model generalize. BLEURT provides state-of-the-art results on the last three years of the WMT Metrics shared task and the WebNLG data set. In contrast to a vanilla BERT-based approach, it yields superior results even when the training data is scarce and out-of-distribution.</abstract>
       <url hash="197d05ab">2020.acl-main.704</url>
@@ -9491,7 +9491,7 @@
       <author><first>Abhijit</first><last>Gupta</last></author>
       <author><first>Caiming</first><last>Xiong</last></author>
       <author><first>Richard</first><last>Socher</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <pages>7906–7917</pages>
       <abstract>Neural networks lack the ability to reason about qualitative physics and so cannot generalize to scenarios and tasks unseen during training. We propose ESPRIT, a framework for commonsense reasoning about qualitative physics in natural language that generates interpretable descriptions of physical events. We use a two-step approach of first identifying the pivotal physical events in an environment and then generating natural language descriptions of those events using a data-to-text approach. Our framework learns to generate explanations of how the physical simulation will causally evolve so that an agent or a human can easily reason about a solution using those interpretable descriptions. Human evaluations indicate that ESPRIT produces crucial fine-grained details and has high coverage of physical concepts compared to even human annotations. Dataset, code and documentation are available at <url>https://github.com/salesforce/esprit</url>.</abstract>
       <url hash="133796f3">2020.acl-main.706</url>
@@ -9517,7 +9517,7 @@
       <author><first>Wenhu</first><last>Chen</last></author>
       <author><first>Jianshu</first><last>Chen</last></author>
       <author><first>Yu</first><last>Su</last></author>
-      <author id="zhiyu-chen"><first>Zhiyu</first><last>Chen</last></author>
+      <author><first>Zhiyu</first><last>Chen</last></author>
       <author><first>William Yang</first><last>Wang</last></author>
       <pages>7929–7942</pages>
       <abstract>Neural natural language generation (NLG) models have recently shown remarkable progress in fluency and coherence. However, existing studies on neural NLG are primarily focused on surface-level realizations with limited emphasis on logical inference, an important aspect of human thinking and language. In this paper, we suggest a new NLG task where a model is tasked with generating natural language statements that can be <i>logically entailed</i> by the facts in an open-domain semi-structured table. To facilitate the study of the proposed logical NLG problem, we use the existing TabFact dataset~(CITATION) featured with a wide range of logical/symbolic inferences as our testbed, and propose new automatic metrics to evaluate the fidelity of generation models w.r.t. logical inference. The new task poses challenges to the existing monotonic generation frameworks due to the mismatch between sequence order and logical order. In our experiments, we comprehensively survey different generation architectures (LSTM, Transformer, Pre-Trained LM) trained with different algorithms (RL, Adversarial Training, Coarse-to-Fine) on the dataset and made following observations: 1) Pre-Trained LM can significantly boost both the fluency and logical fidelity metrics, 2) RL and Adversarial Training are trading fluency for fidelity, 3) Coarse-to-Fine generation can help partially alleviate the fidelity issue while maintaining high language fluency. The code and data are available at <url>https://github.com/wenhuchen/LogicNLG</url>.</abstract>
@@ -9604,7 +9604,7 @@
     <paper id="714">
       <title>Document-Level Event Role Filler Extraction using Multi-Granularity Contextualized Encoding</title>
       <author><first>Xinya</first><last>Du</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>8010–8020</pages>
       <abstract>Few works in the literature of event extraction have gone beyond individual sentences to make extraction decisions. This is problematic when the information needed to recognize an event argument is spread across multiple sentences. We argue that document-level event extraction is a difficult task since it requires a view of a larger context to determine which spans of text correspond to event role fillers. We first investigate how end-to-end neural sequence models (with pre-trained language model representations) perform on document-level role filler extraction, as well as how the length of context captured affects the models’ performance. To dynamically aggregate information captured by neural representations learned at different levels of granularity (e.g., the sentence- and paragraph-level), we propose a novel multi-granularity reader. We evaluate our models on the MUC-4 event extraction dataset, and show that our best system performs substantially better than prior work. We also report findings on the relationship between context length and neural model performance on the task.</abstract>
       <url hash="2c3090d1">2020.acl-main.714</url>
@@ -9694,7 +9694,7 @@
       <title><fixed-case>Z</fixed-case>ero<fixed-case>S</fixed-case>hot<fixed-case>C</fixed-case>eres: Zero-Shot Relation Extraction from Semi-Structured Webpages</title>
       <author><first>Colin</first><last>Lockard</last></author>
       <author><first>Prashant</first><last>Shiralkar</last></author>
-      <author><first>Xin Luna</first><last>Dong</last></author>
+      <author id="xin-luna-dong"><first>Xin Luna</first><last>Dong</last></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last></author>
       <pages>8105–8117</pages>
       <abstract>In many documents, such as semi-structured webpages, textual semantics are augmented with additional information conveyed using visual elements including layout, font size, and color. Prior work on information extraction from semi-structured websites has required learning an extraction model specific to a given template via either manually labeled or distantly supervised data from that template. In this work, we propose a solution for “zero-shot” open-domain relation extraction from webpages with a previously unseen template, including from websites with little overlap with existing sources of knowledge for distant supervision and websites in entirely new subject verticals. Our model uses a graph neural network-based approach to build a rich representation of text fields on a webpage and the relationships between them, enabling generalization to new templates. Experiments show this approach provides a 31% F1 gain over a baseline for zero-shot extraction in a new subject vertical.</abstract>
@@ -9708,7 +9708,7 @@
       <author><first>Shruti</first><last>Rijhwani</last></author>
       <author><first>Shuyan</first><last>Zhou</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
       <pages>8118–8123</pages>
       <abstract>Traditional named entity recognition models use gazetteers (lists of entities) as features to improve performance. Although modern neural network models do not require such hand-crafted features for strong performance, recent work has demonstrated their utility for named entity recognition on English data. However, designing such features for low-resource languages is challenging, because exhaustive entity gazetteers do not exist in these languages. To address this problem, we propose a method of “soft gazetteers” that incorporates ubiquitously available information from English knowledge bases, such as Wikipedia, into neural named entity recognition models through cross-lingual entity linking. Our experiments on four low-resource languages show an average improvement of 4 points in F1 score.</abstract>
       <url hash="250c833f">2020.acl-main.722</url>
@@ -9720,7 +9720,7 @@
       <title>A Prioritization Model for Suicidality Risk Assessment</title>
       <author><first>Han-Chin</first><last>Shing</last></author>
       <author><first>Philip</first><last>Resnik</last></author>
-      <author><first>Douglas</first><last>Oard</last></author>
+      <author id="douglas-w-oard"><first>Douglas</first><last>Oard</last></author>
       <pages>8124–8137</pages>
       <abstract>We reframe suicide risk assessment from social media as a ranking problem whose goal is maximizing detection of severely at-risk individuals given the time available. Building on measures developed for resource-bounded document retrieval, we introduce a well founded evaluation paradigm, and demonstrate using an expert-annotated test collection that meaningful improvements over plausible cascade model baselines can be achieved using an approach that jointly ranks individuals and their social media posts.</abstract>
       <url hash="eb08ff8c">2020.acl-main.723</url>
@@ -9783,7 +9783,7 @@
     <paper id="728">
       <title>History for Visual Dialog: Do we really need it?</title>
       <author><first>Shubham</first><last>Agarwal</last></author>
-      <author><first>Trung</first><last>Bui</last></author>
+      <author id="trung-bui"><first>Trung</first><last>Bui</last></author>
       <author><first>Joon-Young</first><last>Lee</last></author>
       <author><first>Ioannis</first><last>Konstas</last></author>
       <author><first>Verena</first><last>Rieser</last></author>
@@ -9812,7 +9812,7 @@
       <title><fixed-case>TVQA</fixed-case>+: Spatio-Temporal Grounding for Video Question Answering</title>
       <author><first>Jie</first><last>Lei</last></author>
       <author><first>Licheng</first><last>Yu</last></author>
-      <author><first>Tamara</first><last>Berg</last></author>
+      <author id="tamara-berg"><first>Tamara</first><last>Berg</last></author>
       <author><first>Mohit</first><last>Bansal</last></author>
       <pages>8211–8225</pages>
       <abstract>We present the task of Spatio-Temporal Video Question Answering, which requires intelligent systems to simultaneously retrieve relevant moments and detect referenced visual concepts (people and objects) to answer natural language questions about videos. We first augment the TVQA dataset with 310.8K bounding boxes, linking depicted objects to visual concepts in questions and answers. We name this augmented version as TVQA+. We then propose Spatio-Temporal Answerer with Grounded Evidence (STAGE), a unified framework that grounds evidence in both spatial and temporal domains to answer questions about videos. Comprehensive experiments and analyses demonstrate the effectiveness of our framework and how the rich annotations in our TVQA+ dataset can contribute to the question answering task. Moreover, by performing this joint task, our model is able to produce insightful and interpretable spatio-temporal attention visualizations.</abstract>
@@ -9826,7 +9826,7 @@
       <author><first>Po-Yao</first><last>Huang</last></author>
       <author><first>Junjie</first><last>Hu</last></author>
       <author><first>Xiaojun</first><last>Chang</last></author>
-      <author><first>Alexander</first><last>Hauptmann</last></author>
+      <author id="alexander-g-hauptmann"><first>Alexander</first><last>Hauptmann</last></author>
       <pages>8226–8237</pages>
       <abstract>Unsupervised machine translation (MT) has recently achieved impressive results with monolingual corpora only. However, it is still challenging to associate source-target sentences in the latent space. As people speak different languages biologically share similar visual systems, the potential of achieving better alignment through visual content is promising yet under-explored in unsupervised multimodal MT (MMT). In this paper, we investigate how to utilize visual content for disambiguation and promoting latent space alignment in unsupervised MMT. Our model employs multimodal back-translation and features pseudo visual pivoting in which we learn a shared multilingual visual-semantic embedding space and incorporate visually-pivoted captioning as additional weak supervision. The experimental results on the widely used Multi30K dataset show that the proposed model significantly improves over the state-of-the-art methods and generalizes well when images are not available at the testing time.</abstract>
       <url hash="4062a687">2020.acl-main.731</url>
@@ -9838,7 +9838,7 @@
       <title>A Multitask Learning Approach for Diacritic Restoration</title>
       <author><first>Sawsan</first><last>Alqahtani</last></author>
       <author><first>Ajay</first><last>Mishra</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>8238–8247</pages>
       <abstract>In many languages like Arabic, diacritics are used to specify pronunciations as well as meanings. Such diacritics are often omitted in written text, increasing the number of possible pronunciations and meanings for a word. This results in a more ambiguous text making computational processing on such text more difficult. Diacritic restoration is the task of restoring missing diacritics in the written text. Most state-of-the-art diacritic restoration models are built on character level information which helps generalize the model to unseen data, but presumably lose useful information at the word level. Thus, to compensate for this loss, we investigate the use of multi-task learning to jointly optimize diacritic restoration with related NLP problems namely word segmentation, part-of-speech tagging, and syntactic diacritization. We use Arabic as a case study since it has sufficient data resources for tasks that we consider in our joint modeling. Our joint models significantly outperform the baselines and are comparable to the state-of-the-art models that are more complex relying on morphological analyzers and/or a lot more data (e.g. dialectal data).</abstract>
       <url hash="782cf3f3">2020.acl-main.732</url>
@@ -9905,7 +9905,7 @@
     <paper id="737">
       <title>Phonetic and Visual Priors for Decipherment of Informal <fixed-case>R</fixed-case>omanization</title>
       <author><first>Maria</first><last>Ryskina</last></author>
-      <author><first>Matthew R.</first><last>Gormley</last></author>
+      <author id="matthew-r-gormley"><first>Matthew R.</first><last>Gormley</last></author>
       <author><first>Taylor</first><last>Berg-Kirkpatrick</last></author>
       <pages>8308–8319</pages>
       <abstract>Informal romanization is an idiosyncratic process used by humans in informal digital communication to encode non-Latin script languages into Latin character sets found on common keyboards. Character substitution choices differ between users but have been shown to be governed by the same main principles observed across a variety of languages—namely, character pairs are often associated through phonetic or visual similarity. We propose a noisy-channel WFST cascade model for deciphering the original non-Latin script from observed romanized text in an unsupervised fashion. We train our model directly on romanized data from two languages: Egyptian Arabic and Russian. We demonstrate that adding inductive bias through phonetic and visual priors on character mappings substantially improves the model’s performance on both languages, yielding results much closer to the supervised skyline. Finally, we introduce a new dataset of romanized Russian, collected from a Russian social network website and partially annotated for our experiments.</abstract>
@@ -9916,9 +9916,9 @@
     </paper>
     <paper id="738">
       <title>Active Learning for Coreference Resolution using Discrete Annotation</title>
-      <author><first>Belinda Z.</first><last>Li</last></author>
+      <author id="belinda-z-li"><first>Belinda Z.</first><last>Li</last></author>
       <author><first>Gabriel</first><last>Stanovsky</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>8320–8331</pages>
       <abstract>We improve upon pairwise annotation for active learning in coreference resolution, by asking annotators to identify mention antecedents if a presented mention pair is deemed not coreferent. This simple modification, when combined with a novel mention clustering algorithm for selecting which examples to label, is much more efficient in terms of the performance obtained per annotation budget. In experiments with existing benchmark coreference datasets, we show that the signal from this additional question leads to significant performance gains per human-annotation hour. Future work can use our annotation protocol to effectively develop coreference models for new domains. Our code is publicly available.</abstract>
       <url hash="611bd632">2020.acl-main.738</url>
@@ -9946,7 +9946,7 @@
       <author><first>Kyle</first><last>Lo</last></author>
       <author><first>Iz</first><last>Beltagy</last></author>
       <author><first>Doug</first><last>Downey</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>8342–8360</pages>
       <abstract>Language models pretrained on text from a wide variety of sources form the foundation of today’s NLP. In light of the success of these broad-coverage models, we investigate whether it is still helpful to tailor a pretrained model to the domain of a target task. We present a study across four domains (biomedical and computer science publications, news, and reviews) and eight classification tasks, showing that a second phase of pretraining in-domain (domain-adaptive pretraining) leads to performance gains, under both high- and low-resource settings. Moreover, adapting to the task’s unlabeled data (task-adaptive pretraining) improves performance even after domain-adaptive pretraining. Finally, we show that adapting to a task corpus augmented using simple data selection strategies is an effective alternative, especially when resources for domain-adaptive pretraining might be unavailable. Overall, we consistently find that multi-phase adaptive pretraining offers large gains in task performance.</abstract>
       <award>Honorable Mention for Best Overall Paper</award>
@@ -9997,7 +9997,7 @@
       <title>Structured Tuning for Semantic Role Labeling</title>
       <author><first>Tao</first><last>Li</last></author>
       <author><first>Parth Anand</first><last>Jawale</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Vivek</first><last>Srikumar</last></author>
       <pages>8402–8412</pages>
       <abstract>Recent neural network-driven semantic role labeling (SRL) systems have shown impressive improvements in F1 scores. These improvements are due to expressive input representations, which, at least at the surface, are orthogonal to knowledge-rich constrained decoding mechanisms that helped linear SRL models. Introducing the benefits of structure to inform neural models presents a methodological challenge. In this paper, we present a structured tuning framework to improve models using softened constraints only at training time. Our framework leverages the expressiveness of neural networks and provides supervision with structured loss components. We start with a strong baseline (RoBERTa) to validate the impact of our approach, and show that our framework outperforms the baseline by learning to comply with declarative constraints. Additionally, our experiments with smaller training sizes show that we can achieve consistent improvements under low-resource scenarios.</abstract>
@@ -10010,7 +10010,7 @@
       <title><fixed-case>T</fixed-case>a<fixed-case>BERT</fixed-case>: Pretraining for Joint Understanding of Textual and Tabular Data</title>
       <author><first>Pengcheng</first><last>Yin</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <author><first>Sebastian</first><last>Riedel</last></author>
       <pages>8413–8426</pages>
       <abstract>Recent years have witnessed the burgeoning of pretrained language models (LMs) for text-based natural language (NL) understanding tasks. Such models are typically trained on free-form NL text, hence may not be suitable for tasks like semantic parsing over structured data, which require reasoning over both free-form NL questions and structured tabular data (e.g., database tables). In this paper we present TaBERT, a pretrained LM that jointly learns representations for NL sentences and (semi-)structured tables. TaBERT is trained on a large corpus of 26 million tables and their English contexts. In experiments, neural semantic parsers using TaBERT as feature representation layers achieve new best results on the challenging weakly-supervised semantic parsing benchmark WikiTableQuestions, while performing competitively on the text-to-SQL dataset Spider.</abstract>
@@ -10039,10 +10039,10 @@
       <author><first>Naman</first><last>Goyal</last></author>
       <author><first>Vishrav</first><last>Chaudhary</last></author>
       <author><first>Guillaume</first><last>Wenzek</last></author>
-      <author><first>Francisco</first><last>Guzmán</last></author>
-      <author><first>Edouard</first><last>Grave</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzmán</last></author>
+      <author id="edouard-grave"><first>Edouard</first><last>Grave</last></author>
       <author><first>Myle</first><last>Ott</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <author><first>Veselin</first><last>Stoyanov</last></author>
       <pages>8440–8451</pages>
       <abstract>This paper shows that pretraining multilingual language models at scale leads to significant performance gains for a wide range of cross-lingual transfer tasks. We train a Transformer-based masked language model on one hundred languages, using more than two terabytes of filtered CommonCrawl data. Our model, dubbed XLM-R, significantly outperforms multilingual BERT (mBERT) on a variety of cross-lingual benchmarks, including +14.6% average accuracy on XNLI, +13% average F1 score on MLQA, and +2.4% F1 score on NER. XLM-R performs particularly well on low-resource languages, improving 15.7% in XNLI accuracy for Swahili and 11.4% for Urdu over previous XLM models. We also present a detailed empirical analysis of the key factors that are required to achieve these gains, including the trade-offs between (1) positive transfer and capacity dilution and (2) the performance of high and low resource languages at scale. Finally, we show, for the first time, the possibility of multilingual modeling without sacrificing per-language performance; XLM-R is very competitive with strong monolingual models on the GLUE and XNLI benchmarks. We will make our code and models publicly available.</abstract>
@@ -10079,7 +10079,7 @@
       <title>Multi-Domain Named Entity Recognition with Genre-Aware and Agnostic Inference</title>
       <author><first>Jing</first><last>Wang</last></author>
       <author><first>Mayank</first><last>Kulkarni</last></author>
-      <author><first>Daniel</first><last>Preotiuc-Pietro</last></author>
+      <author id="daniel-preotiuc-pietro"><first>Daniel</first><last>Preotiuc-Pietro</last></author>
       <pages>8476–8488</pages>
       <abstract>Named entity recognition is a key component of many text processing pipelines and it is thus essential for this component to be robust to different types of input. However, domain transfer of NER models with data from multiple genres has not been widely studied. To this end, we conduct NER experiments in three predictive setups on data from: a) multiple domains; b) multiple domains where the genre label is unknown at inference time; c) domains not encountered in training. We introduce a new architecture tailored to this task by using shared and private domain parameters and multi-task learning. This consistently outperforms all other baseline and competitive methods on all three experimental setups, with differences ranging between +1.95 to +3.11 average F1 across multiple genres when compared to standard approaches. These results illustrate the challenges that need to be taken into account when building real-world NLP applications that are robust to various types of text and the methods that can help, at least partially, alleviate these issues.</abstract>
       <url hash="ab277b00">2020.acl-main.750</url>
@@ -10091,7 +10091,7 @@
       <title><fixed-case>TX</fixed-case>tract: Taxonomy-Aware Knowledge Extraction for Thousands of Product Categories</title>
       <author><first>Giannis</first><last>Karamanolakis</last></author>
       <author><first>Jun</first><last>Ma</last></author>
-      <author><first>Xin Luna</first><last>Dong</last></author>
+      <author id="xin-luna-dong"><first>Xin Luna</first><last>Dong</last></author>
       <pages>8489–8502</pages>
       <abstract>Extracting structured knowledge from product profiles is crucial for various applications in e-Commerce. State-of-the-art approaches for knowledge extraction were each designed for a single category of product, and thus do not apply to real-life e-Commerce scenarios, which often contain thousands of diverse categories. This paper proposes TXtract, a taxonomy-aware knowledge extraction model that applies to thousands of product categories organized in a hierarchical taxonomy. Through category conditional self-attention and multi-task learning, our approach is both scalable, as it trains a single model for thousands of categories, and effective, as it extracts category-specific attribute values. Experiments on products from a taxonomy with 4,000 categories show that TXtract outperforms state-of-the-art approaches by up to 10% in F1 and 15% in coverage across all categories.</abstract>
       <url hash="4eab8b76">2020.acl-main.751</url>
@@ -10101,7 +10101,7 @@
     </paper>
     <paper id="752">
       <title><fixed-case>T</fixed-case>rigger<fixed-case>NER</fixed-case>: Learning with Entity Triggers as Explanations for Named Entity Recognition</title>
-      <author><first>Bill Yuchen</first><last>Lin</last></author>
+      <author id="bill-yuchen-lin"><first>Bill Yuchen</first><last>Lin</last></author>
       <author><first>Dong-Ho</first><last>Lee</last></author>
       <author><first>Ming</first><last>Shen</last></author>
       <author><first>Ryan</first><last>Moreno</last></author>
@@ -10117,7 +10117,7 @@
     </paper>
     <paper id="753">
       <title>Addressing Posterior Collapse with Mutual Information for Improved Variational Neural Machine Translation</title>
-      <author><first>Arya D.</first><last>McCarthy</last></author>
+      <author id="arya-d-mccarthy"><first>Arya D.</first><last>McCarthy</last></author>
       <author><first>Xian</first><last>Li</last></author>
       <author><first>Jiatao</first><last>Gu</last></author>
       <author><first>Ning</first><last>Dong</last></author>
@@ -10144,8 +10144,8 @@
       <title>Evaluating Robustness to Input Perturbations for Neural Machine Translation</title>
       <author><first>Xing</first><last>Niu</last></author>
       <author><first>Prashant</first><last>Mathur</last></author>
-      <author><first>Georgiana</first><last>Dinu</last></author>
-      <author><first>Yaser</first><last>Al-Onaizan</last></author>
+      <author id="georgiana-dinu"><first>Georgiana</first><last>Dinu</last></author>
+      <author id="yaser-al-onaizan"><first>Yaser</first><last>Al-Onaizan</last></author>
       <pages>8538–8544</pages>
       <abstract>Neural Machine Translation (NMT) models are sensitive to small perturbations in the input. Robustness to such perturbations is typically measured using translation quality metrics such as BLEU on the noisy input. This paper proposes additional metrics which measure the relative degradation and changes in translation when small perturbations are added to the input. We focus on a class of models employing subword regularization to address robustness and perform extensive evaluations of these models using the robustness measures proposed. Results show that our proposed metrics reveal a clear trend of improved robustness to perturbations when subword regularization methods are used.</abstract>
       <url hash="559abcaa">2020.acl-main.755</url>
@@ -10195,7 +10195,7 @@
     <paper id="759">
       <title>Automated Topical Component Extraction Using Neural Network Attention Scores from Source-based Essay Scoring</title>
       <author><first>Haoran</first><last>Zhang</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <pages>8569–8584</pages>
       <abstract>While automated essay scoring (AES) can reliably grade essays at scale, automated writing evaluation (AWE) additionally provides formative feedback to guide essay revision. However, a neural AES typically does not provide useful feature representations for supporting AWE. This paper presents a method for linking AWE and neural AES, by extracting Topical Components (TCs) representing evidence from a source text using the intermediate output of attention layers. We evaluate performance using a feature-based AES requiring TCs. Results show that performance is comparable whether using automatically or manually constructed TCs for 1) representing essays as rubric-based features, 2) grading essays.</abstract>
       <url hash="25885a0e">2020.acl-main.759</url>
@@ -10222,7 +10222,7 @@
       <author><first>Tariq</first><last>Alhindi</last></author>
       <author><first>Siddharth</first><last>Varia</last></author>
       <author><first>Kriste</first><last>Krstovski</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <author><first>Smaranda</first><last>Muresan</last></author>
       <pages>8593–8606</pages>
       <abstract>The increased focus on misinformation has spurred development of data and systems for detecting the veracity of a claim as well as retrieving authoritative evidence. The Fact Extraction and VERification (FEVER) dataset provides such a resource for evaluating endto- end fact-checking, requiring retrieval of evidence from Wikipedia to validate a veracity prediction. We show that current systems for FEVER are vulnerable to three categories of realistic challenges for fact-checking – multiple propositions, temporal reasoning, and ambiguity and lexical variation – and introduce a resource with these types of claims. Then we present a system designed to be resilient to these “attacks” using multiple pointer networks for document selection and jointly modeling a sequence of evidence sentences and veracity relation predictions. We find that in handling these attacks we obtain state-of-the-art results on FEVER, largely due to improved evidence retrieval.</abstract>
@@ -10254,7 +10254,7 @@
       <author><first>Randa</first><last>Elanwar</last></author>
       <author><first>Prakash</first><last>Ishwar</last></author>
       <author><first>Margrit</first><last>Betke</last></author>
-      <author><first>Derry Tanti</first><last>Wijaya</last></author>
+      <author id="derry-tanti-wijaya"><first>Derry Tanti</first><last>Wijaya</last></author>
       <pages>8614–8624</pages>
       <abstract>News framing refers to the practice in which aspects of specific issues are highlighted in the news to promote a particular interpretation. In NLP, although recent works have studied framing in English news, few have studied how the analysis can be extended to other languages and in a multi-label setting. In this work, we explore multilingual transfer learning to detect multiple frames from just the news headline in a genuinely low-resource context where there are few/no frame annotations in the target language. We propose a novel method that can leverage elementary resources consisting of a dictionary and few annotations to detect frames in the target language. Our method performs comparably or better than translating the entire target language headline to the source language for which we have annotated data. This work opens up an exciting new capability of scaling up frame analysis to many languages, even those without existing translation technologies. Lastly, we apply our method to detect frames on the issue of U.S. gun violence in multiple languages and obtain exciting insights on the relationship between different frames of the same problem across different countries with different languages.</abstract>
       <url hash="db3462f0">2020.acl-main.763</url>
@@ -10281,7 +10281,7 @@
       <author><first>Yutao</first><last>Zhu</last></author>
       <author><first>Ruihua</first><last>Song</last></author>
       <author><first>Zhicheng</first><last>Dou</last></author>
-      <author><first>Jian-Yun</first><last>Nie</last></author>
+      <author id="jian-yun-nie"><first>Jian-Yun</first><last>Nie</last></author>
       <author><first>Jin</first><last>Zhou</last></author>
       <pages>8647–8657</pages>
       <abstract>It is appealing to have a system that generates a story or scripts automatically from a storyline, even though this is still out of our reach. In dialogue systems, it would also be useful to drive dialogues by a dialogue plan. In this paper, we address a key problem involved in these applications - guiding a dialogue by a narrative. The proposed model ScriptWriter selects the best response among the candidates that fit the context as well as the given narrative. It keeps track of what in the narrative has been said and what is to be said. A narrative plays a different role than the context (i.e., previous utterances), which is generally used in current dialogue systems. Due to the unavailability of data for this new application, we construct a new large-scale data collection GraphMovie from a movie website where end- users can upload their narratives freely when watching a movie. Experimental results on the dataset show that our proposed approach based on narratives significantly outperforms the baselines that simply use the narrative as a kind of context.</abstract>
@@ -10307,7 +10307,7 @@
       <author><first>Subhabrata</first><last>Mukherjee</last></author>
       <author><first>Marcello</first><last>Hasegawa</last></author>
       <author><first>Ahmed</first><last>Hassan Awadallah</last></author>
-      <author><first>Ryen</first><last>White</last></author>
+      <author id="ryen-white"><first>Ryen</first><last>White</last></author>
       <pages>8680–8689</pages>
       <abstract>Intelligent features in email service applications aim to increase productivity by helping people organize their folders, compose their emails and respond to pending tasks. In this work, we explore a new application, Smart-To-Do, that helps users with task management over emails. We introduce a new task and dataset for automatically generating To-Do items from emails where the sender has promised to perform an action. We design a two-stage process leveraging recent advances in neural text generation and sequence-to-sequence learning, obtaining BLEU and ROUGE scores of 0.23 and 0.63 for this task. To the best of our knowledge, this is the first work to address the problem of composing To-Do items from emails.</abstract>
       <url hash="1cc47ee6">2020.acl-main.767</url>
@@ -10333,7 +10333,7 @@
       <title>End-to-End Bias Mitigation by Modelling Biases in Corpora</title>
       <author><first>Rabeeh</first><last>Karimi Mahabadi</last></author>
       <author><first>Yonatan</first><last>Belinkov</last></author>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <pages>8706–8716</pages>
       <abstract>Several recent studies have shown that strong natural language understanding (NLU) models are prone to relying on unwanted dataset biases without learning the underlying task, resulting in models that fail to generalize to out-of-domain datasets and are likely to perform poorly in real-world scenarios. We propose two learning strategies to train neural models, which are more robust to such biases and transfer better to out-of-domain datasets. The biases are specified in terms of one or more bias-only models, which learn to leverage the dataset biases. During training, the bias-only models’ predictions are used to adjust the loss of the base model to reduce its reliance on biases by down-weighting the biased examples and focusing the training on the hard examples. We experiment on large-scale natural language inference and fact verification benchmarks, evaluating on out-of-domain datasets that are specifically designed to assess the robustness of models against known biases in the training data. Results show that our debiasing methods greatly improve robustness in all settings and better transfer to other textual entailment datasets. Our code and data are publicly available in <url>https://github.com/rabeehk/robust-nli</url>.</abstract>
       <url hash="351d68e2">2020.acl-main.769</url>
@@ -10346,7 +10346,7 @@
     <paper id="770">
       <title>Mind the Trade-off: Debiasing <fixed-case>NLU</fixed-case> Models without Degrading the In-distribution Performance</title>
       <author><first>Prasetya Ajie</first><last>Utama</last></author>
-      <author><first>Nafise Sadat</first><last>Moosavi</last></author>
+      <author id="nafise-sadat-moosavi"><first>Nafise Sadat</first><last>Moosavi</last></author>
       <author><first>Iryna</first><last>Gurevych</last></author>
       <pages>8717–8729</pages>
       <abstract>Models for natural language understanding (NLU) tasks often rely on the idiosyncratic biases of the dataset, which make them brittle against test cases outside the training distribution. Recently, several proposed debiasing methods are shown to be very effective in improving out-of-distribution performance. However, their improvements come at the expense of performance drop when models are evaluated on the in-distribution data, which contain examples with higher diversity. This seemingly inevitable trade-off may not tell us much about the changes in the reasoning and understanding capabilities of the resulting models on broader types of examples beyond the small subset represented in the out-of-distribution data. In this paper, we address this trade-off by introducing a novel debiasing method, called confidence regularization, which discourage models from exploiting biases while enabling them to receive enough incentive to learn from all the training examples. We evaluate our method on three NLU tasks and show that, in contrast to its predecessors, it improves the performance on out-of-distribution datasets (e.g., 7pp gain on HANS dataset) while maintaining the original in-distribution accuracy.</abstract>
@@ -10358,7 +10358,7 @@
     <paper id="771">
       <title><fixed-case>NILE</fixed-case> : Natural Language Inference with Faithful Natural Language Explanations</title>
       <author><first>Sawan</first><last>Kumar</last></author>
-      <author><first>Partha</first><last>Talukdar</last></author>
+      <author id="partha-talukdar"><first>Partha</first><last>Talukdar</last></author>
       <pages>8730–8742</pages>
       <abstract>The recent growth in the popularity and success of deep learning models on NLP classification tasks has accompanied the need for generating some form of natural language explanation of the predicted labels. Such generated natural language (NL) explanations are expected to be faithful, i.e., they should correlate well with the model’s internal decision making. In this work, we focus on the task of natural language inference (NLI) and address the following question: can we build NLI systems which produce labels with high accuracy, while also generating faithful explanations of its decisions? We propose Natural-language Inference over Label-specific Explanations (NILE), a novel NLI method which utilizes auto-generated label-specific NL explanations to produce labels along with its faithful explanation. We demonstrate NILE’s effectiveness over previously reported methods through automated and human evaluation of the produced labels and explanations. Our evaluation of NILE also supports the claim that accurate systems capable of providing testable explanations of their decisions can be designed. We discuss the faithfulness of NILE’s explanations in terms of sensitivity of the decisions to the corresponding explanations. We argue that explicit evaluation of faithfulness, in addition to label and explanation accuracy, is an important step in evaluating model’s explanations. Further, we demonstrate that task-specific probes are necessary to establish such sensitivity.</abstract>
       <url hash="671cd7c7">2020.acl-main.771</url>
@@ -10392,7 +10392,7 @@
     <paper id="774">
       <title>Uncertain Natural Language Inference</title>
       <author><first>Tongfei</first><last>Chen</last></author>
-      <author><first>Zhengping</first><last>Jiang</last></author>
+      <author id="zheng-ping-jiang"><first>Zhengping</first><last>Jiang</last></author>
       <author><first>Adam</first><last>Poliak</last></author>
       <author><first>Keisuke</first><last>Sakaguchi</last></author>
       <author><first>Benjamin</first><last>Van Durme</last></author>
@@ -10416,8 +10416,8 @@
     </paper>
     <paper id="776">
       <title>Revisiting Higher-Order Dependency Parsers</title>
-      <author><first>Erick</first><last>Fonseca</last></author>
-      <author><first>André F. T.</first><last>Martins</last></author>
+      <author id="erick-fonseca"><first>Erick</first><last>Fonseca</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
       <pages>8795–8800</pages>
       <abstract>Neural encoders have allowed dependency parsers to shift from higher-order structured models to simpler first-order ones, making decoding faster and still achieving better accuracy than non-neural parsers. This has led to a belief that neural encoders can implicitly encode structural constraints, such as siblings and grandparents in a tree. We tested this hypothesis and found that neural parsers may benefit from higher-order features, even when employing a powerful pre-trained encoder, such as BERT. While the gains of higher-order features are small in the presence of a powerful encoder, they are consistent for long-range dependencies and long sentences. In particular, higher-order models are more accurate on full sentence parses and on the exact match of modifier lists, indicating that they deal better with larger, more complex structures.</abstract>
       <url hash="02e32a41">2020.acl-main.776</url>
@@ -10589,8 +10589,8 @@
       <title><fixed-case>T</fixed-case>rialstreamer: Mapping and Browsing Medical Evidence in Real-Time</title>
       <author><first>Benjamin</first><last>Nye</last></author>
       <author><first>Ani</first><last>Nenkova</last></author>
-      <author><first>Iain</first><last>Marshall</last></author>
-      <author><first>Byron C.</first><last>Wallace</last></author>
+      <author id="iain-marshall"><first>Iain</first><last>Marshall</last></author>
+      <author id="byron-c-wallace"><first>Byron C.</first><last>Wallace</last></author>
       <pages>63–69</pages>
       <abstract>We introduce Trialstreamer, a living database of clinical trial reports. Here we mainly describe the evidence extraction component; this extracts from biomedical abstracts key pieces of information that clinicians need when appraising the literature, and also the relations between these. Specifically, the system extracts descriptions of trial participants, the treatments compared in each arm (the interventions), and which outcomes were measured. The system then attempts to infer which interventions were reported to work best by determining their relationship with identified trial outcome measures. In addition to summarizing individual trials, these extracted data elements allow automatic synthesis of results across many trials on the same topic. We apply the system at scale to all reports of randomized controlled trials indexed in MEDLINE, powering the automatic generation of evidence maps, which provide a global view of the efficacy of different interventions combining data from all relevant clinical trials on a topic. We make all code and models freely available alongside a demonstration of the web interface.</abstract>
       <url hash="56979cac">2020.acl-demos.9</url>
@@ -10604,7 +10604,7 @@
       <author><first>Jennifer</first><last>Hu</last></author>
       <author><first>Ethan</first><last>Wilcox</last></author>
       <author><first>Peng</first><last>Qian</last></author>
-      <author><first>Roger</first><last>Levy</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
       <pages>70–76</pages>
       <abstract>Targeted syntactic evaluations have yielded insights into the generalizations learned by neural network language models. However, this line of research requires an uncommon confluence of skills: both the theoretical knowledge needed to design controlled psycholinguistic experiments, and the technical proficiency needed to train and deploy large-scale language models. We present SyntaxGym, an online platform designed to make targeted evaluations accessible to both experts in NLP and linguistics, reproducible across computing environments, and standardized following the norms of psycholinguistic experimental design. This paper releases two tools of independent value for the computational linguistics community: 1. A website, syntaxgym.org, which centralizes the process of targeted syntactic evaluation and provides easy tools for analysis and visualization; 2. Two command-line tools, ‘syntaxgym‘ and ‘lm-zoo‘, which allow any user to reproduce targeted syntactic evaluations and general language model inference on their own machine.</abstract>
       <url hash="f7fba435">2020.acl-demos.10</url>
@@ -10622,8 +10622,8 @@
       <author><first>Brian</first><last>Chen</last></author>
       <author><first>Bo</first><last>Wu</last></author>
       <author><first>Heng</first><last>Ji</last></author>
-      <author><first>Shih-Fu</first><last>Chang</last></author>
-      <author><first>Clare</first><last>Voss</last></author>
+      <author id="shih-fu-chang"><first>Shih-Fu</first><last>Chang</last></author>
+      <author id="clare-voss"><first>Clare</first><last>Voss</last></author>
       <author><first>Daniel</first><last>Napierski</last></author>
       <author><first>Marjorie</first><last>Freedman</last></author>
       <pages>77–86</pages>
@@ -10676,7 +10676,7 @@
       <author><first>Yuhao</first><last>Zhang</last></author>
       <author><first>Yuhui</first><last>Zhang</last></author>
       <author><first>Jason</first><last>Bolton</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>101–108</pages>
       <abstract>We introduce Stanza, an open-source Python natural language processing toolkit supporting 66 human languages. Compared to existing widely used toolkits, Stanza features a language-agnostic fully neural pipeline for text analysis, including tokenization, multi-word token expansion, lemmatization, part-of-speech and morphological feature tagging, dependency parsing, and named entity recognition. We have trained Stanza on a total of 112 datasets, including the Universal Dependencies treebanks and other multilingual corpora, and show that the same neural architecture generalizes well and achieves competitive performance on all languages tested. Additionally, Stanza includes a native Python interface to the widely used Java Stanford CoreNLP software, which further extends its functionality to cover other tasks such as coreference resolution and relation extraction. Source code, documentation, and pretrained models for 66 languages are available at <url>https://stanfordnlp.github.io/stanza/</url>.</abstract>
       <url hash="f70edbdf">2020.acl-demos.14</url>
@@ -10693,7 +10693,7 @@
       <author><first>Phu Mon</first><last>Htut</last></author>
       <author><first>Alex</first><last>Wang</last></author>
       <author><first>Ian</first><last>Tenney</last></author>
-      <author><first>Samuel R.</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel R.</first><last>Bowman</last></author>
       <pages>109–117</pages>
       <abstract>We introduce jiant, an open source toolkit for conducting multitask and transfer learning experiments on English NLU tasks. jiant enables modular and configuration driven experimentation with state-of-the-art models and a broad set of tasks for probing, transfer learning, and multitask training experiments. jiant implements over 50 NLU tasks, including all GLUE and SuperGLUE benchmark tasks. We demonstrate that jiant reproduces published performance on a variety of tasks and models, e.g., RoBERTa and BERT.</abstract>
       <url hash="873a80eb">2020.acl-demos.15</url>
@@ -10726,7 +10726,7 @@
       <author><first>Chung-Ting</first><last>Tsai</last></author>
       <author><first>Jhih-Jie</first><last>Chen</last></author>
       <author><first>Ching-Yu</first><last>Yang</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>127–133</pages>
       <abstract>This paper presents LinggleWrite, a writing coach that provides writing suggestions, assesses writing proficiency levels, detects grammatical errors, and offers corrective feedback in response to user’s essay. The method involves extracting grammar patterns, training models for automated essay scoring (AES) and grammatical error detection (GED), and finally retrieving plausible corrections from a n-gram search engine. Experiments on public test sets indicate that both AES and GED models achieve state-of-the-art performance. These results show that LinggleWrite is potentially useful in helping learners improve their writing skills.</abstract>
       <url hash="c68de9e6">2020.acl-demos.17</url>
@@ -10769,7 +10769,7 @@
       <title><fixed-case>O</fixed-case>pus<fixed-case>F</fixed-case>ilter: A Configurable Parallel Corpus Filtering Toolbox</title>
       <author><first>Mikko</first><last>Aulamo</last></author>
       <author><first>Sami</first><last>Virpioja</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>150–156</pages>
       <abstract>This paper introduces OpusFilter, a flexible and modular toolbox for filtering parallel corpora. It implements a number of components based on heuristic filters, language identification libraries, character-based language models, and word alignment tools, and it can easily be extended with custom filters. Bitext segments can be ranked according to their quality or domain match using single features or a logistic regression model that can be trained without manually labeled training data. We demonstrate the effectiveness of OpusFilter on the example of a Finnish-English news translation task based on noisy web-crawled training data. Applying our tool leads to improved translation quality while significantly reducing the size of the training data, also clearly outperforming an alternative ranking given in the crawled data set. Furthermore, we show the ability of OpusFilter to perform data selection for domain adaptation.</abstract>
       <url hash="8297dd2b">2020.acl-demos.20</url>
@@ -10820,7 +10820,7 @@
     <paper id="24">
       <title><fixed-case>P</fixed-case>hoton: A Robust Cross-Domain Text-to-<fixed-case>SQL</fixed-case> System</title>
       <author><first>Jichuan</first><last>Zeng</last></author>
-      <author><first>Xi Victoria</first><last>Lin</last></author>
+      <author id="xi-victoria-lin"><first>Xi Victoria</first><last>Lin</last></author>
       <author><first>Steven C.H.</first><last>Hoi</last></author>
       <author><first>Richard</first><last>Socher</last></author>
       <author><first>Caiming</first><last>Xiong</last></author>
@@ -10836,7 +10836,7 @@
     <paper id="25">
       <title>Interactive Task Learning from <fixed-case>GUI</fixed-case>-Grounded Natural Language Instructions and Demonstrations</title>
       <author><first>Toby Jia-Jun</first><last>Li</last></author>
-      <author><first>Tom</first><last>Mitchell</last></author>
+      <author id="tom-mitchell"><first>Tom</first><last>Mitchell</last></author>
       <author><first>Brad A.</first><last>Myers</last></author>
       <pages>215–223</pages>
       <abstract>We show SUGILITE, an intelligent task automation agent that can learn new tasks and relevant associated concepts interactively from the user’s natural language instructions and demonstrations, using the graphical user interfaces (GUIs) of third-party mobile apps. This system provides several interesting features: (1) it allows users to teach new task procedures and concepts through verbal instructions together with demonstration of the steps of a script using GUIs; (2) it supports users in clarifying their intents for demonstrated actions using GUI-grounded verbal instructions; (3) it infers parameters of tasks and their possible values in utterances using the hierarchical structures of the underlying app GUIs; and (4) it generalizes taught concepts to different contexts and task domains. We describe the architecture of the SUGILITE system, explain the design and implementation of its key features, and show a prototype in the form of a conversational assistant on Android.</abstract>
@@ -10849,7 +10849,7 @@
       <title><fixed-case>M</fixed-case>ixing<fixed-case>B</fixed-case>oard: a Knowledgeable Stylized Integrated Text Generation Platform</title>
       <author><first>Xiang</first><last>Gao</last></author>
       <author><first>Michel</first><last>Galley</last></author>
-      <author><first>Bill</first><last>Dolan</last></author>
+      <author id="william-b-dolan"><first>Bill</first><last>Dolan</last></author>
       <pages>224–231</pages>
       <abstract>We present MixingBoard, a platform for quickly building demos with a focus on knowledge grounded stylized text generation. We unify existing text generation algorithms in a shared codebase and further adapt earlier algorithms for constrained generation. To borrow advantages from different models, we implement strategies for cross-model integration, from the token probability level to the latent space level. An interface to external knowledge is provided via a module that retrieves, on-the-fly, relevant knowledge from passages on the web or a document collection. A user interface for local development, remote webpage access, and a RESTful API are provided to make it simple for users to build their own demos.</abstract>
       <url hash="99bb0b4e">2020.acl-demos.26</url>
@@ -10859,7 +10859,7 @@
     </paper>
     <paper id="27">
       <title><fixed-case>NLP</fixed-case> Scholar: An Interactive Visual Explorer for Natural Language Processing Literature</title>
-      <author><first>Saif M.</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif M.</first><last>Mohammad</last></author>
       <pages>232–255</pages>
       <abstract>As part of the NLP Scholar project, we created a single unified dataset of NLP papers and their meta-information (including citation numbers), by extracting and aligning information from the ACL Anthology and Google Scholar. In this paper, we describe several interconnected interactive visualizations (dashboards) that present various aspects of the data. Clicking on an item within a visualization or entering query terms in the search boxes filters the data in all visualizations in the dashboard. This allows users to search for papers in the area of their interest, published within specific time periods, published by specified authors, etc. The interactive visualizations presented here, and the associated dataset of papers mapped to citations, have additional uses as well including understanding how the field is growing (both overall and across sub-areas), as well as quantifying the impact of different types of papers on subsequent publications.</abstract>
       <url hash="b657b9f5">2020.acl-demos.27</url>
@@ -10882,7 +10882,7 @@
     </paper>
     <paper id="29">
       <title><fixed-case>U</fixed-case>snea: An Authorship Tool for Interactive Fiction using Retrieval Based Semantic Parsing</title>
-      <author><first>Ben</first><last>Swanson</last></author>
+      <author id="ben-swanson"><first>Ben</first><last>Swanson</last></author>
       <author><first>Boris</first><last>Smus</last></author>
       <pages>263–269</pages>
       <abstract>The reader of a choose your own adventure novel and the user of a modern virtual assistant have a subtle similarity; both may, through the right lens, be viewed as engaging with a work of Interactive Fiction. This literary form emerged in the 1970s and has grown like a vine along the branch of modern technology, one guided by the advances of the other. In this work we weave together threads from the Interactive Fiction community and neural semantic parsing for dialog systems, defining the data model and necessary algorithms for a novel type of Interactive Fiction and open sourcing its accompanying authoring tool. Specifically, our work integrates retrieval based semantic parsing predicates into the branching story structures well known to the Interactive Fiction community, relaxing the relatively strict lexical options of preexisting systems.</abstract>
@@ -10901,7 +10901,7 @@
       <author><first>Xiang</first><last>Gao</last></author>
       <author><first>Jianfeng</first><last>Gao</last></author>
       <author><first>Jingjing</first><last>Liu</last></author>
-      <author><first>Bill</first><last>Dolan</last></author>
+      <author id="william-b-dolan"><first>Bill</first><last>Dolan</last></author>
       <pages>270–278</pages>
       <abstract>We present a large, tunable neural conversational response generation model, DIALOGPT (dialogue generative pre-trained transformer). Trained on 147M conversation-like exchanges extracted from Reddit comment chains over a period spanning from 2005 through 2017, DialoGPT extends the Hugging Face PyTorch transformer to attain a performance close to human both in terms of automatic and human evaluation in single-turn dialogue settings. We show that conversational systems that leverage DialoGPT generate more relevant, contentful and context-consistent responses than strong baseline systems. The pre-trained model and training pipeline are publicly released to facilitate research into neural response generation and the development of more intelligent open-domain dialogue systems.</abstract>
       <url hash="91290228">2020.acl-demos.30</url>
@@ -10937,7 +10937,7 @@
       <author><first>Yifan</first><last>Zhang</last></author>
       <author><first>Seunghak</first><last>Yu</last></author>
       <author><first>Alberto</first><last>Barrón-Cedeño</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>287–293</pages>
       <abstract>Recent events, such as the 2016 US Presidential Campaign, Brexit and the COVID-19 “infodemic”, have brought into the spotlight the dangers of online disinformation. There has been a lot of research focusing on fact-checking and disinformation detection. However, little attention has been paid to the specific rhetorical and psychological techniques used to convey propaganda messages. Revealing the use of such techniques can help promote media literacy and critical thinking, and eventually contribute to limiting the impact of “fake news” and disinformation campaigns. Prta (Propaganda Persuasion Techniques Analyzer) allows users to explore the articles crawled on a regular basis by highlighting the spans in which propaganda techniques occur and to compare them on the basis of their use of propaganda techniques. The system further reports statistics about the use of such techniques, overall and over time, or according to filtering criteria specified by the user based on time interval, keywords, and/or political orientation of the media. Moreover, it allows users to analyze any text or URL through a dedicated interface or via an API. The system is available online: <url>https://www.tanbih.org/prta</url>.</abstract>
       <award>Honorable Mention for Best Demonstration Paper</award>
@@ -10983,7 +10983,7 @@
     </paper>
     <paper id="35">
       <title><fixed-case>P</fixed-case>enman: An Open-Source Library and Tool for <fixed-case>AMR</fixed-case> Graphs</title>
-      <author><first>Michael Wayne</first><last>Goodman</last></author>
+      <author id="michael-wayne-goodman"><first>Michael Wayne</first><last>Goodman</last></author>
       <pages>312–319</pages>
       <abstract>Abstract Meaning Representation (AMR) (Banarescu et al., 2013) is a framework for semantic dependencies that encodes its rooted and directed acyclic graphs in a format called PENMAN notation. The format is simple enough that users of AMR data often write small scripts or libraries for parsing it into an internal graph representation, but there is enough complexity that these users could benefit from a more sophisticated and well-tested solution. The open-source Python library Penman provides a robust parser, functions for graph inspection and manipulation, and functions for formatting graphs into PENMAN notation. Many functions are also available in a command-line tool, thus extending its utility to non-Python setups.</abstract>
       <url hash="6cedabb1">2020.acl-demos.35</url>
@@ -10995,7 +10995,7 @@
       <title>Embedding-based Scientific Literature Discovery in a Text Editor Application</title>
       <author><first>Onur</first><last>Gökçe</last></author>
       <author><first>Jonathan</first><last>Prada</last></author>
-      <author><first>Nikola I.</first><last>Nikolov</last></author>
+      <author id="nikola-i-nikolov"><first>Nikola I.</first><last>Nikolov</last></author>
       <author><first>Nianlong</first><last>Gu</last></author>
       <author><first>Richard H.R.</first><last>Hahnloser</last></author>
       <pages>320–326</pages>
@@ -11014,7 +11014,7 @@
       <author><first>Mahsa</first><last>Monshizadeh</last></author>
       <author><first>Vladislav</first><last>Hnatovskiy</last></author>
       <author><first>Antonio</first><last>Krüger</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>327–334</pages>
       <abstract>The shift from traditional translation to post-editing (PE) of machine-translated (MT) text can save time and reduce errors, but it also affects the design of translation interfaces, as the task changes from mainly generating text to correcting errors within otherwise helpful translation proposals. Since this paradigm shift offers potential for modalities other than mouse and keyboard, we present MMPE, the first prototype to combine traditional input modes with pen, touch, and speech modalities for PE of MT. Users can directly cross out or hand-write new text, drag and drop words for reordering, or use spoken commands to update the text in place. All text manipulations are logged in an easily interpretable format to simplify subsequent translation process research. The results of an evaluation with professional translators suggest that pen and touch interaction are suitable for deletion and reordering tasks, while speech and multi-modal combinations of select &amp; speech are considered suitable for replacements and insertions. Overall, experiment participants were enthusiastic about the new modalities and saw them as useful extensions to mouse &amp; keyboard, but not as a complete substitute.</abstract>
       <url hash="e594eee8">2020.acl-demos.37</url>
@@ -11024,7 +11024,7 @@
     </paper>
     <paper id="38">
       <title>Torch-Struct: Deep Structured Prediction Library</title>
-      <author><first>Alexander</first><last>Rush</last></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last></author>
       <pages>335–342</pages>
       <abstract>The literature on structured prediction for NLP describes a rich collection of distributions and algorithms over sequences, segmentations, alignments, and trees; however, these algorithms are difficult to utilize in deep learning frameworks. We introduce Torch-Struct, a library for structured prediction designed to take advantage of and integrate with vectorized, auto-differentiation based frameworks. Torch-Struct includes a broad collection of probabilistic structures accessed through a simple and flexible distribution-based API that connects to any deep learning model. The library utilizes batched, vectorized operations and exploits auto-differentiation to produce readable, fast, and testable code. Internally, we also include a number of general-purpose optimizations to provide cross-algorithm efficiency. Experiments show significant performance gains over fast baselines and case-studies demonstrate the benefits of the library. Torch-Struct is available at <url>https://github.com/harvardnlp/pytorch-struct</url>.</abstract>
       <url hash="88f26f6c">2020.acl-demos.38</url>
@@ -11070,7 +11070,7 @@
     </paper>
     <paper id="41">
       <title><fixed-case>SUPP</fixed-case>.<fixed-case>AI</fixed-case>: finding evidence for supplement-drug interactions</title>
-      <author><first>Lucy Lu</first><last>Wang</last></author>
+      <author id="lucy-lu-wang"><first>Lucy Lu</first><last>Wang</last></author>
       <author><first>Oyvind</first><last>Tafjord</last></author>
       <author><first>Arman</first><last>Cohan</last></author>
       <author><first>Sarthak</first><last>Jain</last></author>
@@ -11089,7 +11089,7 @@
       <title><fixed-case>LEAN</fixed-case>-<fixed-case>LIFE</fixed-case>: A Label-Efficient Annotation Framework Towards Learning from Explanation</title>
       <author><first>Dong-Ho</first><last>Lee</last></author>
       <author><first>Rahul</first><last>Khanna</last></author>
-      <author><first>Bill Yuchen</first><last>Lin</last></author>
+      <author id="bill-yuchen-lin"><first>Bill Yuchen</first><last>Lin</last></author>
       <author><first>Seyeon</first><last>Lee</last></author>
       <author><first>Qinyuan</first><last>Ye</last></author>
       <author><first>Elizabeth</first><last>Boschee</last></author>
@@ -11106,7 +11106,7 @@
       <title>What’s The Latest? A Question-driven News Chatbot</title>
       <author><first>Philippe</first><last>Laban</last></author>
       <author><first>John</first><last>Canny</last></author>
-      <author><first>Marti A.</first><last>Hearst</last></author>
+      <author id="marti-a-hearst"><first>Marti A.</first><last>Hearst</last></author>
       <pages>380–387</pages>
       <abstract>This work describes an automatic news chatbot that draws content from a diverse set of news articles and creates conversations with a user about the news. Key components of the system include the automatic organization of news articles into topical chatrooms, integration of automatically generated questions into the conversation, and a novel method for choosing which questions to present which avoids repetitive suggestions. We describe the algorithmic framework and present the results of a usability study that shows that news readers using the system successfully engage in multi-turn conversations about specific news stories.</abstract>
       <url hash="4cb89814">2020.acl-demos.43</url>
@@ -11158,7 +11158,7 @@
       <author><first>Sharan</first><last>Pai</last></author>
       <author><first>Nikhil</first><last>Sachdeva</last></author>
       <author><first>Prince</first><last>Sachdeva</last></author>
-      <author><first>Rajiv Ratn</first><last>Shah</last></author>
+      <author id="rajiv-shah"><first>Rajiv Ratn</first><last>Shah</last></author>
       <pages>13–19</pages>
       <abstract>Aphasia is a speech and language disorder which results from brain damage, often characterized by word retrieval deficit (anomia) resulting in naming errors (paraphasia). Automatic paraphasia detection has many benefits for both treatment and diagnosis of Aphasia and its type. But supervised learning methods cant be properly utilized as there is a lack of aphasic speech data. In this paper, we describe our novel unsupervised method which can be implemented without the need for labeled paraphasia data. Our evaluations show that our method outperforms previous work based on supervised learning and transfer learning approaches for English. We demonstrate the utility of our method as an essential first step in developing augmentative and alternative communication (AAC) devices for patients suffering from aphasia in any language.</abstract>
       <url hash="015cad9e">2020.acl-srw.3</url>
@@ -11239,8 +11239,8 @@
     <paper id="10">
       <title>Combining Subword Representations into Word-level Representations in the Transformer Architecture</title>
       <author><first>Noe</first><last>Casas</last></author>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
-      <author><first>José A. R.</first><last>Fonollosa</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="jose-a-r-fonollosa"><first>José A. R.</first><last>Fonollosa</last></author>
       <pages>66–71</pages>
       <abstract>In Neural Machine Translation, using word-level tokens leads to degradation in translation quality. The dominant approaches use subword-level tokens, but this increases the length of the sequences and makes it difficult to profit from word-level information such as POS tags or semantic dependencies. We propose a modification to the Transformer model to combine subword-level representations into word-level ones in the first layers of the encoder, reducing the effective length of the sequences in the following layers and providing a natural point to incorporate extra word-level information. Our experiments show that this approach maintains the translation quality with respect to the normal Transformer model when no extra word-level information is injected and that it is superior to the currently dominant method for incorporating word-level source language information to models based on subword-level vocabularies.</abstract>
       <url hash="7e1dec8b">2020.acl-srw.10</url>
@@ -11274,7 +11274,7 @@
       <title><fixed-case>SCAR</fixed-case>: Sentence Compression using Autoencoders for Reconstruction</title>
       <author><first>Chanakya</first><last>Malireddy</last></author>
       <author><first>Tirth</first><last>Maniar</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>88–94</pages>
       <abstract>Sentence compression is the task of shortening a sentence while retaining its meaning. Most methods proposed for this task rely on labeled or paired corpora (containing pairs of verbose and compressed sentences), which is often expensive to collect. To overcome this limitation, we present a novel unsupervised deep learning framework (SCAR) for deletion-based sentence compression. SCAR is primarily composed of two encoder-decoder pairs: a compressor and a reconstructor. The compressor masks the input, and the reconstructor tries to regenerate it. The model is entirely trained on unlabeled data and does not require additional inputs such as explicit syntactic information or optimal compression length. SCAR’s merit lies in the novel Linkage Loss function, which correlates the compressor and its effect on reconstruction, guiding it to drop inferable tokens. SCAR achieves higher ROUGE scores on benchmark datasets than the existing state-of-the-art methods and baselines. We also conduct a user study to demonstrate the application of our model as a text highlighting system. Using our model to underscore salient information facilitates speed-reading and reduces the time required to skim a document.</abstract>
       <url hash="a1b11262">2020.acl-srw.13</url>
@@ -11344,8 +11344,8 @@
     <paper id="19">
       <title>A Simple and Effective Dependency Parser for <fixed-case>T</fixed-case>elugu</title>
       <author><first>Sneha</first><last>Nallani</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
-      <author><first>Dipti</first><last>Sharma</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti</first><last>Sharma</last></author>
       <pages>143–149</pages>
       <abstract>We present a simple and effective dependency parser for Telugu, a morphologically rich, free word order language. We propose to replace the rich linguistic feature templates used in the past approaches with a minimal feature function using contextual vector representations. We train a BERT model on the Telugu Wikipedia data and use vector representations from this model to train the parser. Each sentence token is associated with a vector representing the token in the context of that sentence and the feature vectors are constructed by concatenating two token representations from the stack and one from the buffer. We put the feature representations through a feedforward network and train with a greedy transition based approach. The resulting parser has a very simple architecture with minimal feature engineering and achieves state-of-the-art results for Telugu.</abstract>
       <url hash="249d98d6">2020.acl-srw.19</url>
@@ -11356,9 +11356,9 @@
     </paper>
     <paper id="20">
       <title>Pointwise Paraphrase Appraisal is Potentially Problematic</title>
-      <author><first>Hannah</first><last>Chen</last></author>
+      <author id="hannah-cyberey"><first>Hannah</first><last>Chen</last></author>
       <author><first>Yangfeng</first><last>Ji</last></author>
-      <author><first>David</first><last>Evans</last></author>
+      <author id="david-k-evans"><first>David</first><last>Evans</last></author>
       <pages>150–155</pages>
       <abstract>The prevailing approach for training and evaluating paraphrase identification models is constructed as a binary classification problem: the model is given a pair of sentences, and is judged by how accurately it classifies pairs as either paraphrases or non-paraphrases. This pointwise-based evaluation method does not match well the objective of most real world applications, so the goal of our work is to understand how models which perform well under pointwise evaluation may fail in practice and find better methods for evaluating paraphrase identification models. As a first step towards that goal, we show that although the standard way of fine-tuning BERT for paraphrase identification by pairing two sentences as one sequence results in a model with state-of-the-art performance, that model may perform poorly on simple tasks like identifying pairs with two identical sentences. Moreover, we show that these models may even predict a pair of randomly-selected sentences with higher paraphrase score than a pair of identical ones.</abstract>
       <url hash="a7826a9e">2020.acl-srw.20</url>
@@ -11370,7 +11370,7 @@
       <title>Efficient Neural Machine Translation for Low-Resource Languages via Exploiting Related Languages</title>
       <author><first>Vikrant</first><last>Goyal</last></author>
       <author><first>Sourav</first><last>Kumar</last></author>
-      <author><first>Dipti Misra</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></author>
       <pages>162–168</pages>
       <abstract>A large percentage of the world’s population speaks a language of the Indian subcontinent, comprising languages from both Indo-Aryan (e.g. Hindi, Punjabi, Gujarati, etc.) and Dravidian (e.g. Tamil, Telugu, Malayalam, etc.) families. A universal characteristic of Indian languages is their complex morphology, which, when combined with the general lack of sufficient quantities of high-quality parallel data, can make developing machine translation (MT) systems for these languages difficult. Neural Machine Translation (NMT) is a rapidly advancing MT paradigm and has shown promising results for many language pairs, especially in large training data scenarios. Since the condition of large parallel corpora is not met for Indian-English language pairs, we present our efforts towards building efficient NMT systems between Indian languages (specifically Indo-Aryan languages) and English via efficiently exploiting parallel data from the related languages. We propose a technique called Unified Transliteration and Subword Segmentation to leverage language similarity while exploiting parallel data from related language pairs. We also propose a Multilingual Transfer Learning technique to leverage parallel data from multiple related languages to assist translation for low resource language pair of interest. Our experiments demonstrate an overall average improvement of 5 BLEU points over the standard Transformer-based NMT baselines.</abstract>
       <url hash="de3039b6">2020.acl-srw.22</url>
@@ -11383,7 +11383,7 @@
     <paper id="23">
       <title>Exploring Interpretability in Event Extraction: Multitask Learning of a Neural Event Classifier and an Explanation Decoder</title>
       <author><first>Zheng</first><last>Tang</last></author>
-      <author><first>Gus</first><last>Hahn-Powell</last></author>
+      <author id="gus-hahn-powell"><first>Gus</first><last>Hahn-Powell</last></author>
       <author><first>Mihai</first><last>Surdeanu</last></author>
       <pages>169–175</pages>
       <abstract>We propose an interpretable approach for event extraction that mitigates the tension between generalization and interpretability by jointly training for the two goals. Our approach uses an encoder-decoder architecture, which jointly trains a classifier for event extraction, and a rule decoder that generates syntactico-semantic rules that explain the decisions of the event classifier. We evaluate the proposed approach on three biomedical events and show that the decoder generates interpretable rules that serve as accurate explanations for the event classifier’s decisions, and, importantly, that the joint training generally improves the performance of the event classifier. Lastly, we show that our approach can be used for semi-supervised learning, and that its performance improves when trained on automatically-labeled data generated by a rule-based system.</abstract>
@@ -11528,9 +11528,9 @@
       <title>Unsupervised Multilingual Sentence Embeddings for Parallel Corpus Mining</title>
       <author><first>Ivana</first><last>Kvapilíková</last></author>
       <author><first>Mikel</first><last>Artetxe</last></author>
-      <author><first>Gorka</first><last>Labaka</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="gorka-labaka"><first>Gorka</first><last>Labaka</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>255–262</pages>
       <abstract>Existing models of multilingual sentence embeddings require large parallel data resources which are not available for low-resource languages. We propose a novel unsupervised method to derive multilingual sentence embeddings relying only on monolingual data. We first produce a synthetic parallel corpus using unsupervised machine translation, and use it to fine-tune a pretrained cross-lingual masked language model (XLM) to derive the multilingual sentence representations. The quality of the representations is evaluated on two parallel corpus mining tasks with improvements of up to 22 F1 points over vanilla XLM. In addition, we observe that a single synthetic bilingual corpus is able to improve results for other language pairs.</abstract>
       <url hash="15c011b6">2020.acl-srw.34</url>
@@ -11554,7 +11554,7 @@
       <title>Enhancing Word Embeddings with Knowledge Extracted from Lexical Resources</title>
       <author><first>Magdalena</first><last>Biesialska</last></author>
       <author><first>Bardia</first><last>Rafieian</last></author>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
       <pages>271–278</pages>
       <abstract>In this work, we present an effective method for semantic specialization of word vector representations. To this end, we use traditional word embeddings and apply specialization methods to better capture semantic relations between words. In our approach, we leverage external knowledge from rich lexical resources such as BabelNet. We also show that our proposed post-specialization method based on an adversarial neural network with the Wasserstein distance allows to gain improvements over state-of-the-art methods on two tasks: word similarity and dialog state tracking.</abstract>
       <url hash="a57bf190">2020.acl-srw.36</url>
@@ -11569,7 +11569,7 @@
       <author><first>Zhuoyuan</first><last>Mao</last></author>
       <author><first>Fei</first><last>Cheng</last></author>
       <author><first>Sadao</first><last>Kurohashi</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>279–285</pages>
       <abstract>Sequence-to-sequence (S2S) pre-training using large monolingual data is known to improve performance for various S2S NLP tasks. However, large monolingual corpora might not always be available for the languages of interest (LOI). Thus, we propose to exploit monolingual corpora of other languages to complement the scarcity of monolingual corpora for the LOI. We utilize script mapping (Chinese to Japanese) to increase the similarity (number of cognates) between the monolingual corpora of helping languages and LOI. An empirical case study of low-resource Japanese-English neural machine translation (NMT) reveals that leveraging large Chinese and French monolingual corpora can help overcome the shortage of Japanese and English monolingual corpora, respectively, for S2S pre-training. Using only Chinese and French monolingual corpora, we were able to improve Japanese-English translation quality by up to 8.5 BLEU in low-resource scenarios.</abstract>
       <url hash="7d1db2d0">2020.acl-srw.37</url>
@@ -11581,7 +11581,7 @@
     <paper id="38">
       <title>Checkpoint Reranking: An Approach to Select Better Hypothesis for Neural Machine Translation Systems</title>
       <author><first>Vinay</first><last>Pandramish</last></author>
-      <author><first>Dipti Misra</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></author>
       <pages>286–291</pages>
       <abstract>In this paper, we propose a method of re-ranking the outputs of Neural Machine Translation (NMT) systems. After the decoding process, we select a few last iteration outputs in the training process as the <tex-math>N</tex-math>-best list. After training a Neural Machine Translation (NMT) baseline system, it has been observed that these iteration outputs have an oracle score higher than baseline up to 1.01 BLEU points compared to the last iteration of the trained system.We come up with a ranking mechanism by solely focusing on the decoder’s ability to generate distinct tokens and without the usage of any language model or data. With this method, we achieved a translation improvement up to +0.16 BLEU points over baseline.We also evaluate our approach by applying the coverage penalty to the training process.In cases of moderate coverage penalty, the oracle scores are higher than the final iteration up to +0.99 BLEU points, and our algorithm gives an improvement up to +0.17 BLEU points.With excessive penalty, there is a decrease in translation quality compared to the baseline system. Still, an increase in oracle scores up to +1.30 is observed with the re-ranking algorithm giving an improvement up to +0.15 BLEU points is found in case of excessive penalty.The proposed re-ranking method is a generic one and can be extended to other language pairs as well.</abstract>
       <url hash="25d03b8e">2020.acl-srw.38</url>
@@ -11616,7 +11616,7 @@
     <paper id="41">
       <title>Exploring the Role of Context to Distinguish Rhetorical and Information-Seeking Questions</title>
       <author><first>Yuan</first><last>Zhuang</last></author>
-      <author><first>Ellen</first><last>Riloff</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
       <pages>306–312</pages>
       <abstract>Social media posts often contain questions, but many of the questions are rhetorical and do not seek information. Our work studies the problem of distinguishing rhetorical and information-seeking questions on Twitter. Most work has focused on features of the question itself, but we hypothesize that the prior context plays a role too. This paper introduces a new dataset containing questions in tweets paired with their prior tweets to provide context. We create classification models to assess the difficulty of distinguishing rhetorical and information-seeking questions, and experiment with different properties of the prior context. Our results show that the prior tweet and topic features can improve performance on this task.</abstract>
       <url hash="398add04">2020.acl-srw.41</url>
@@ -11679,7 +11679,7 @@
     </paper>
     <paper id="2">
       <title>Integrating Ethics into the <fixed-case>NLP</fixed-case> Curriculum</title>
-      <author><first>Emily M.</first><last>Bender</last></author>
+      <author id="emily-m-bender"><first>Emily M.</first><last>Bender</last></author>
       <author><first>Dirk</first><last>Hovy</last></author>
       <author><first>Alexandra</first><last>Schofield</last></author>
       <pages>6–9</pages>
@@ -11700,10 +11700,10 @@
     </paper>
     <paper id="4">
       <title>Reviewing Natural Language Processing Research</title>
-      <author><first>Kevin</first><last>Cohen</last></author>
+      <author id="k-bretonnel-cohen"><first>Kevin</first><last>Cohen</last></author>
       <author><first>Karën</first><last>Fort</last></author>
       <author><first>Margot</first><last>Mieskes</last></author>
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <pages>16–18</pages>
       <abstract>This tutorial will cover the theory and practice of reviewing research in natural language processing. Heavy reviewing burdens on natural language processing researchers have made it clear that our community needs to increase the size of our pool of potential reviewers. Simultaneously, notable “false negatives”—rejection by our conferences of work that was later shown to be tremendously important after acceptance by other conferences—have raised awareness of the fact that our reviewing practices leave something to be desired. We do not often talk about “false positives” with respect to conference papers, but leaders in the field have noted that we seem to have a publication bias towards papers that report high performance, with perhaps not much else of interest in them. It need not be this way. Reviewing is a learnable skill, and you will learn it here via lectures and a considerable amount of hands-on practice.</abstract>
       <url hash="ea7bb915">2020.acl-tutorials.4</url>
@@ -11722,7 +11722,7 @@
     </paper>
     <paper id="6">
       <title>Multi-modal Information Extraction from Text, Semi-structured, and Tabular Data on the Web</title>
-      <author><first>Xin Luna</first><last>Dong</last></author>
+      <author id="xin-luna-dong"><first>Xin Luna</first><last>Dong</last></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last></author>
       <author><first>Colin</first><last>Lockard</last></author>
       <author><first>Prashant</first><last>Shiralkar</last></author>
@@ -11748,7 +11748,7 @@
     <paper id="8">
       <title>Open-Domain Question Answering</title>
       <author><first>Danqi</first><last>Chen</last></author>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <pages>34–37</pages>
       <abstract>This tutorial provides a comprehensive and coherent overview of cutting-edge research in open-domain question answering (QA), the task of answering questions using a large collection of documents of diversified topics. We will start by first giving a brief historical background, discussing the basic setup and core technical challenges of the research problem, and then describe modern datasets with the common evaluation metrics and benchmarks. The focus will then shift to cutting-edge models proposed for open-domain QA, including two-stage retriever-reader approaches, dense retriever and end-to-end training, and retriever-free methods. Finally, we will cover some hybrid approaches using both text and large knowledge bases and conclude the tutorial with important open questions. We hope that the tutorial will not only help the audience to acquire up-to-date knowledge but also provide new perspectives to stimulate the advances of open-domain QA research in the next phase.</abstract>
       <url hash="3df726d6">2020.acl-tutorials.8</url>
diff --git a/data/xml/2020.aespen.xml b/data/xml/2020.aespen.xml
index 37b7b4bc59..72d0fbd342 100644
--- a/data/xml/2020.aespen.xml
+++ b/data/xml/2020.aespen.xml
@@ -3,10 +3,10 @@
   <volume id="1" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Workshop on Automated Extraction of Socio-political Events from News 2020</booktitle>
-      <editor><first>Ali</first><last>Hürriyetoğlu</last></editor>
+      <editor id="ali-hurriyetoglu"><first>Ali</first><last>Hürriyetoğlu</last></editor>
       <editor><first>Erdem</first><last>Yörük</last></editor>
       <editor><first>Vanni</first><last>Zavarella</last></editor>
-      <editor><first>Hristo</first><last>Tanev</last></editor>
+      <editor id="hristo-tanev"><first>Hristo</first><last>Tanev</last></editor>
       <publisher>European Language Resources Association (ELRA)</publisher>
       <address>Marseille, France</address>
       <month>May</month>
@@ -54,7 +54,7 @@
       <title>Analyzing <fixed-case>ELM</fixed-case>o and <fixed-case>D</fixed-case>istil<fixed-case>BERT</fixed-case> on Socio-political News Classification</title>
       <author><first>Berfu</first><last>Büyüköz</last></author>
       <author><first>Ali</first><last>Hürriyetoğlu</last></author>
-      <author><first>Arzucan</first><last>Özgür</last></author>
+      <author id="arzucan-ozgur"><first>Arzucan</first><last>Özgür</last></author>
       <pages>9–18</pages>
       <abstract>This study evaluates the robustness of two state-of-the-art deep contextual language representations, ELMo and DistilBERT, on supervised learning of binary protest news classification (PC) and sentiment analysis (SA) of product reviews. A ”cross-context” setting is enabled using test sets that are distinct from the training data. The models are fine-tuned and fed into a Feed-Forward Neural Network (FFNN) and a Bidirectional Long Short Term Memory network (BiLSTM). Multinomial Naive Bayes (MNB) and Linear Support Vector Machine (LSVM) are used as traditional baselines. The results suggest that DistilBERT can transfer generic semantic knowledge to other domains better than ELMo. DistilBERT is also 30% smaller and 83% faster than ELMo, which suggests superiority for smaller computational training budgets. When generalization is not the utmost preference and test domain is similar to the training domain, the traditional machine learning (ML) algorithms can still be considered as more economic alternatives to deep language representations.</abstract>
       <url hash="d1a94180">2020.aespen-1.4</url>
@@ -121,7 +121,7 @@
     <paper id="10">
       <title>Protest Event Analysis: A Longitudinal Analysis for <fixed-case>G</fixed-case>reece</title>
       <author><first>Konstantina</first><last>Papanikolaou</last></author>
-      <author><first>Haris</first><last>Papageorgiou</last></author>
+      <author id="harris-papageorgiou"><first>Haris</first><last>Papageorgiou</last></author>
       <pages>57–62</pages>
       <abstract>The advent of Big Data has shifted social science research towards computational methods. The volume of data that is nowadays available has brought a radical change in traditional approaches due to the cost and effort needed for processing. Knowledge extraction from heterogeneous and ample data is not an easy task to tackle. Thus, interdisciplinary approaches are necessary, combining experts of both social and computer science. This paper aims to present a work in the context of protest analysis, which falls into the scope of Computational Social Science. More specifically, the contribution of this work is to describe a Computational Social Science methodology for Event Analysis. The presented methodology is generic in the sense that it can be applied in every event typology and moreover, it is innovative and suitable for interdisciplinary tasks as it incorporates the human-in-the-loop. Additionally, a case study is presented concerning Protest Analysis in Greece over the last two decades. The conceptual foundation lies mainly upon claims analysis, and newspaper data were used in order to map, document and discuss protests in Greece in a longitudinal perspective.</abstract>
       <url hash="0673559d">2020.aespen-1.10</url>
diff --git a/data/xml/2020.alta.xml b/data/xml/2020.alta.xml
index 20faf76de7..da733c09f5 100644
--- a/data/xml/2020.alta.xml
+++ b/data/xml/2020.alta.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the 18th Annual Workshop of the Australasian Language Technology Association</booktitle>
       <editor><first>Maria</first><last>Kim</last></editor>
-      <editor><first>Daniel</first><last>Beck</last></editor>
+      <editor id="daniel-beck"><first>Daniel</first><last>Beck</last></editor>
       <editor><first>Meladel</first><last>Mistica</last></editor>
       <publisher>Australasian Language Technology Association</publisher>
       <address>Virtual Workshop</address>
@@ -20,7 +20,7 @@
     <paper id="1">
       <title>Domain Adaptative Causality Encoder</title>
       <author><first>Farhad</first><last>Moghimifar</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <author><first>Mahsa</first><last>Baktashmotlagh</last></author>
       <pages>1–10</pages>
       <abstract>Automated discovery of causal relationships from text is a challenging task. Current approaches which are mainly based on the extraction of low-level relations among individual events are limited by the shortage of publicly available labelled data. Therefore, the resulting models perform poorly when applied to a distributionally different domain for which labelled data did not exist at the time of training. To overcome this limitation, in this paper, we leverage the characteristics of dependency trees and adversarial learning to address the tasks of adaptive causality identification and localisation. The term adaptive is used since the training and test data come from two distributionally different datasets, which to the best of our knowledge, this work is the first to address. Moreover, we present a new causality dataset, namely MedCaus, which integrates all types of causality in the text. Our experiments on four different benchmark causality datasets demonstrate the superiority of our approach over the existing baselines, by up to 7% improvement, on the tasks of identification and localisation of the causal relations from the text.</abstract>
@@ -113,7 +113,7 @@
       <author><first>Xiangjue</first><last>Dong</last></author>
       <author><first>Mohammed Ali</first><last>Al-Garadi</last></author>
       <author><first>Abeed</first><last>Sarker</last></author>
-      <author><first>Cecile</first><last>Paris</last></author>
+      <author id="cecile-paris"><first>Cecile</first><last>Paris</last></author>
       <author><first>Diego Mollá</first><last>Aliod</last></author>
       <pages>86–91</pages>
       <abstract>Free text data from social media is now widely used in natural language processing research, and one of the most common machine learning tasks performed on this data is classification. Generally speaking, performances of supervised classification algorithms on social media datasets are lower than those on texts from other sources, but recently-proposed transformer-based models have considerably improved upon legacy state-of-the-art systems. Currently, there is no study that compares the performances of different variants of transformer-based models on a wide range of social media text classification datasets. In this paper, we benchmark the performances of transformer-based pre-trained models on 25 social media text classification datasets, 6 of which are health-related. We compare three pre-trained language models, RoBERTa-base, BERTweet and ClinicalBioBERT in terms of classification accuracy. Our experiments show that RoBERTa-base and BERTweet perform comparably on most datasets, and considerably better than ClinicalBioBERT, even on health-related datasets.</abstract>
@@ -140,7 +140,7 @@
       <author><first>Rohit Kumar</first><last>Gupta</last></author>
       <author><first>Saket</first><last>Khandelwal</last></author>
       <author><first>Jeannie</first><last>Paterson</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Daniel</first><last>Beck</last></author>
       <pages>98–103</pages>
       <abstract>‘Common Law’ judicial systems follow the doctrine of precedent, which means the legal principles articulated in court judgements are binding in subsequent cases in lower courts. For this reason, lawyers must search prior judgements for the legal principles that are relevant to their case. The difficulty for those within the legal profession is that the information that they are looking for may be contained within a few paragraphs or sentences, but those few paragraphs may be buried within a hundred-page document. In this study, we create a schema based on the relevant information that legal professionals seek within judgements and perform text classification based on it, with the aim of not only assisting lawyers in researching cases, but eventually enabling large-scale analysis of legal judgements to find trends in court outcomes over time.</abstract>
@@ -151,7 +151,7 @@
       <title>Convolutional and Recurrent Neural Networks for Spoken Emotion Recognition</title>
       <author><first>Aaron</first><last>Keesing</last></author>
       <author><first>Ian</first><last>Watson</last></author>
-      <author><first>Michael</first><last>Witbrock</last></author>
+      <author id="michael-j-witbrock"><first>Michael</first><last>Witbrock</last></author>
       <pages>104–109</pages>
       <abstract>We test four models proposed in the speech emotion recognition (SER) literature on 15 public and academic licensed datasets in speaker-independent cross-validation. Results indicate differences in the performance of the models which is partly dependent on the dataset and features used. We also show that a standard utterance-level feature set still performs competitively with neural models on some datasets. This work serves as a starting point for future model comparisons, in addition to open-sourcing the testing code.</abstract>
       <url hash="2dbe4fb8">2020.alta-1.13</url>
@@ -160,8 +160,8 @@
     <paper id="14">
       <title>Popularity Prediction of Online Petitions using a Multimodal <fixed-case>D</fixed-case>eep<fixed-case>R</fixed-case>egression Model</title>
       <author><first>Kotaro</first><last>Kitayama</last></author>
-      <author><first>Shivashankar</first><last>Subramanian</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="shivashankar-subramanian"><first>Shivashankar</first><last>Subramanian</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>110–114</pages>
       <abstract>Online petitions offer a mechanism for peopleto initiate a request for change and gather sup-port from others to demonstrate support for thecause. In this work, we model the task of peti-tion popularity using both text and image rep-resentations across four different languages,and including petition metadata. We evaluateour proposed approach using a dataset of 75kpetitions from Avaaz.org, and find strong com-plementarity between text and images.</abstract>
       <url hash="94049e44">2020.alta-1.14</url>
@@ -188,7 +188,7 @@
     </paper>
     <paper id="17">
       <title>Overview of the 2020 <fixed-case>ALTA</fixed-case> Shared Task: Assess Human Behaviour</title>
-      <author><first>Diego</first><last>Mollá</last></author>
+      <author id="diego-molla"><first>Diego</first><last>Mollá</last></author>
       <pages>127–130</pages>
       <abstract>The 2020 ALTA shared task is the 11th in stance of a series of shared tasks organised by ALTA since 2010. The task is to classify texts posted in social media according to human judgements expressed in them. The data used for this task is a subset of SemEval 2018 AIT DISC, which has been annotated by domain experts for this task. In this paper we introduce the task, describe the data and present the results of participating systems.</abstract>
       <url hash="5ea36a9e">2020.alta-1.17</url>
@@ -197,7 +197,7 @@
     <paper id="18">
       <title>Automatically Predicting Judgement Dimensions of Human Behaviour</title>
       <author><first>Segun Taofeek</first><last>Aroyehun</last></author>
-      <author><first>Alexander</first><last>Gelbukh</last></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last></author>
       <pages>131–134</pages>
       <abstract>This paper describes our submission to the ALTA-2020 shared task on assessing behaviour from short text, We evaluate the effectiveness of traditional machine learning and recent transformers pre-trained models. Our submission with the Roberta-large model and prediction threshold achieved first place on the private leaderboard.</abstract>
       <url hash="7d171dde">2020.alta-1.18</url>
diff --git a/data/xml/2020.alvr.xml b/data/xml/2020.alvr.xml
index 66f07829ad..e29eeb722d 100644
--- a/data/xml/2020.alvr.xml
+++ b/data/xml/2020.alvr.xml
@@ -62,7 +62,7 @@
       <title>On the role of effective and referring questions in <fixed-case>G</fixed-case>uess<fixed-case>W</fixed-case>hat?!</title>
       <author><first>Mauricio</first><last>Mazuecos</last></author>
       <author><first>Alberto</first><last>Testoni</last></author>
-      <author><first>Raffaella</first><last>Bernardi</last></author>
+      <author id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last></author>
       <author><first>Luciana</first><last>Benotti</last></author>
       <pages>19–25</pages>
       <abstract>Task success is the standard metric used to evaluate referential visual dialogue systems. In this paper we propose two new metrics that evaluate how each question contributes to the goal. First, we measure how effective each question is by evaluating whether the question discards objects that are not the referent. Second, we define referring questions as those that univocally identify one object in the image. We report the new metrics for human dialogues and for state of the art publicly available models on GuessWhat?!. Regarding our first metric, we find that successful dialogues do not have a higher percentage of effective questions for most models. With respect to the second metric, humans make questions at the end of the dialogue that are referring, confirming their guess before guessing. Human dialogues that use this strategy have a higher task success but models do not seem to learn it.</abstract>
diff --git a/data/xml/2020.alw.xml b/data/xml/2020.alw.xml
index 9e215f1f8b..1b902c9e4f 100644
--- a/data/xml/2020.alw.xml
+++ b/data/xml/2020.alw.xml
@@ -6,7 +6,7 @@
       <editor><first>Seyi</first><last>Akiwowo</last></editor>
       <editor><first>Bertie</first><last>Vidgen</last></editor>
       <editor><first>Vinodkumar</first><last>Prabhakaran</last></editor>
-      <editor><first>Zeerak</first><last>Waseem</last></editor>
+      <editor id="zeerak-talat"><first>Zeerak</first><last>Waseem</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Online</address>
       <month>November</month>
@@ -35,7 +35,7 @@
       <author><first>Julia</first><last>Guo</last></author>
       <author><first>Sarah Ita</first><last>Levitan</last></author>
       <author><first>Susan</first><last>McGregor</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
       <pages>7–15</pages>
       <abstract>Most efforts at identifying abusive speech online rely on public corpora that have been scraped from websites using keyword-based queries or released by site or platform owners for research purposes. These are typically labeled by crowd-sourced annotators – not the targets of the abuse themselves. While this method of data collection supports fast development of machine learning classifiers, the models built on them often fail in the context of real-world harassment and abuse, which contain nuances less easily identified by non-targets. Here, we present a mixed-methods approach to create classifiers for abuse and harassment which leverages direct engagement with the target group in order to achieve high quality and ecological validity of data sets and labels, and to generate deeper insights into the key tactics of bad actors. We use women journalists’ experience on Twitter as an initial community of focus. We identify several structural mechanisms of abuse that we believe will generalize to other target communities.</abstract>
       <url hash="bda1d9f5">2020.alw-1.2</url>
@@ -46,7 +46,7 @@
     <paper id="3">
       <title>Using Transfer-based Language Models to Detect Hateful and Offensive Language Online</title>
       <author><first>Vebjørn</first><last>Isaksen</last></author>
-      <author><first>Björn</first><last>Gambäck</last></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gambäck</last></author>
       <pages>16–27</pages>
       <abstract>Distinguishing hate speech from non-hate offensive language is challenging, as hate speech not always includes offensive slurs and offensive language not always express hate. Here, four deep learners based on the Bidirectional Encoder Representations from Transformers (BERT), with either general or domain-specific language models, were tested against two datasets containing tweets labelled as either ‘Hateful’, ‘Normal’ or ‘Offensive’. The results indicate that the attention-based models profoundly confuse hate speech with offensive and normal language. However, the pre-trained models outperform state-of-the-art results in terms of accurately predicting the hateful instances.</abstract>
       <url hash="b6670fac">2020.alw-1.3</url>
@@ -120,7 +120,7 @@
     <paper id="9">
       <title>Investigating Sampling Bias in Abusive Language Detection</title>
       <author><first>Dante</first><last>Razo</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <pages>70–78</pages>
       <abstract>Abusive language detection is becoming increasingly important, but we still understand little about the biases in our datasets for abusive language detection, and how these biases affect the quality of abusive language detection. In the work reported here, we reproduce the investigation of Wiegand et al. (2019) to determine differences between different sampling strategies. They compared boosted random sampling, where abusive posts are upsampled, and biased topic sampling, which focuses on topics that are known to cause abusive language. Instead of comparing individual datasets created using these sampling strategies, we use the sampling strategies on a single, large dataset, thus eliminating the textual source of the dataset as a potential confounding factor. We show that differences in the textual source can have more effect than the chosen sampling strategy.</abstract>
       <url hash="77fb934f">2020.alw-1.9</url>
@@ -204,7 +204,7 @@
       <author><first>Guillaume</first><last>Sylvain</last></author>
       <author><first>Nithum</first><last>Thain</last></author>
       <author><first>Lucas</first><last>Dixon</last></author>
-      <author><first>Jeffrey</first><last>Sorensen</last></author>
+      <author id="jeffrey-sorensen"><first>Jeffrey</first><last>Sorensen</last></author>
       <pages>114–124</pages>
       <abstract>We present a new dataset of approximately 44000 comments labeled by crowdworkers. Each comment is labelled as either ‘healthy’ or ‘unhealthy’, in addition to binary labels for the presence of six potentially ‘unhealthy’ sub-attributes: (1) hostile; (2) antagonistic, insulting, provocative or trolling; (3) dismissive; (4) condescending or patronising; (5) sarcastic; and/or (6) an unfair generalisation. Each label also has an associated confidence score. We argue that there is a need for datasets which enable research based on a broad notion of ‘unhealthy online conversation’. We build this typology to encompass a substantial proportion of the individual comments which contribute to unhealthy online conversation. For some of these attributes, this is the first publicly available dataset of this scale. We explore the quality of the dataset, present some summary statistics and initial models to illustrate the utility of this data, and highlight limitations and directions for further research.</abstract>
       <url hash="f542eb80">2020.alw-1.15</url>
@@ -253,7 +253,7 @@
     <paper id="19">
       <title>Detecting <fixed-case>E</fixed-case>ast <fixed-case>A</fixed-case>sian Prejudice on Social Media</title>
       <author><first>Bertie</first><last>Vidgen</last></author>
-      <author><first>Scott A.</first><last>Hale</last></author>
+      <author id="scott-a-hale"><first>Scott A.</first><last>Hale</last></author>
       <author><first>Ella</first><last>Guest</last></author>
       <author><first>Helen</first><last>Margetts</last></author>
       <author><first>David</first><last>Broniatowski</last></author>
diff --git a/data/xml/2020.amta.xml b/data/xml/2020.amta.xml
index c606e9384d..a5d3c8434e 100644
--- a/data/xml/2020.amta.xml
+++ b/data/xml/2020.amta.xml
@@ -19,7 +19,7 @@
     <paper id="1">
       <title>A New Approach to Parameter-Sharing in Multilingual Neural Machine Translation</title>
       <author><first>Benyamin</first><last>Ahmadnia</last></author>
-      <author><first>Bonnie</first><last>Dorr</last></author>
+      <author id="bonnie-dorr"><first>Bonnie</first><last>Dorr</last></author>
       <pages>1-6</pages>
       <bibkey>ahmadnia-dorr-2020-new</bibkey>
       <removed date="2021-10-05">This paper has been removed by the Association for Machine Translation in the Americas (AMTA) due to duplication of previous scholarly work, known to the first author, without attribution.</removed>
@@ -28,7 +28,7 @@
       <title>Investigation of Transformer-based Latent Attention Models for Neural Machine Translation</title>
       <author><first>Parnia</first><last>Bahar</last></author>
       <author><first>Nikita</first><last>Makarov</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>7-20</pages>
       <url hash="ab377789">2020.amta-research.2</url>
       <bibkey>bahar-etal-2020-investigation</bibkey>
@@ -78,7 +78,7 @@
     <paper id="8">
       <title>Towards Handling Compositionality in Low-Resource Bilingual Word Induction</title>
       <author><first>Viktor</first><last>Hangya</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>89-101</pages>
       <url hash="06ef2839">2020.amta-research.8</url>
       <bibkey>hangya-fraser-2020-towards</bibkey>
@@ -129,9 +129,9 @@
     </paper>
     <paper id="13">
       <title>On Target Segmentation for Direct Speech Translation</title>
-      <author><first>Mattia A.</first><last>Di Gangi</last></author>
+      <author id="mattia-a-di-gangi"><first>Mattia A.</first><last>Di Gangi</last></author>
       <author><first>Marco</first><last>Gaido</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <pages>137-150</pages>
       <url hash="351d5d8d">2020.amta-research.13</url>
@@ -140,7 +140,7 @@
     <paper id="14">
       <title>Domain Robustness in Neural Machine Translation</title>
       <author><first>Mathias</first><last>Müller</last></author>
-      <author><first>Annette</first><last>Rios</last></author>
+      <author id="annette-rios-gonzales"><first>Annette</first><last>Rios</last></author>
       <author><first>Rico</first><last>Sennrich</last></author>
       <pages>151-164</pages>
       <url hash="4c44c3d5">2020.amta-research.14</url>
@@ -204,7 +204,7 @@
       <author><first>Craig</first><last>Stewart</last></author>
       <author><first>Ricardo</first><last>Rei</last></author>
       <author><first>Catarina</first><last>Farinha</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <pages>78-109</pages>
       <attachment type="presentation" hash="3fc0edf6">2020.amta-user.4.Presentation.pdf</attachment>
       <bibkey>stewart-etal-2020-comet</bibkey>
@@ -261,7 +261,7 @@
       <author><first>Rubén</first><last>Martínez-Domínguez</last></author>
       <author><first>Matīss</first><last>Rikters</last></author>
       <author><first>Artūrs</first><last>Vasiļevskis</last></author>
-      <author><first>Mārcis</first><last>Pinnis</last></author>
+      <author id="marcis-pinnis"><first>Mārcis</first><last>Pinnis</last></author>
       <author><first>Paula</first><last>Reichenberg</last></author>
       <pages>217-223</pages>
       <url hash="c50305b2">2020.amta-user.11</url>
@@ -370,12 +370,12 @@
     </paper>
     <paper id="25">
       <title>A Tale of Eight Countries or the <fixed-case>EU</fixed-case> Council Presidency Translator in Retrospect</title>
-      <author><first>Mārcis</first><last>Pinnis</last></author>
+      <author id="marcis-pinnis"><first>Mārcis</first><last>Pinnis</last></author>
       <author><first>Toms</first><last>Bergmanis</last></author>
       <author><first>Kristīne</first><last>Metuzāle</last></author>
       <author><first>Valters</first><last>Šics</last></author>
       <author><first>Artūrs</first><last>Vasiļevskis</last></author>
-      <author><first>Andrejs</first><last>Vasiļjevs</last></author>
+      <author id="andrejs-vasiljevs"><first>Andrejs</first><last>Vasiļjevs</last></author>
       <pages>525-546</pages>
       <url hash="e759a91a">2020.amta-user.25</url>
       <attachment type="presentation" hash="5f0d71c0">2020.amta-user.25.Presentation.pdf</attachment>
@@ -412,8 +412,8 @@
       <year>2020</year>
       <editor><first>John E.</first><last>Ortega</last></editor>
       <editor><first>Marcello</first><last>Federico</last></editor>
-      <editor><first>Constantin</first><last>Orasan</last></editor>
-      <editor><first>Maja</first><last>Popovic</last></editor>
+      <editor id="constantin-orasan"><first>Constantin</first><last>Orasan</last></editor>
+      <editor id="maja-popovic"><first>Maja</first><last>Popovic</last></editor>
       <venue>amta</venue>
     </meta>
     <frontmatter>
@@ -452,7 +452,7 @@
     <paper id="5">
       <title><fixed-case>COPECO</fixed-case>: a Collaborative Post-Editing Corpus in Pedagogical Context</title>
       <author><first>Jonathan</first><last>Mutal</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Perrine</first><last>Schumacher</last></author>
       <author><first>Johanna</first><last>Gerlach</last></author>
       <pages>61-78</pages>
@@ -464,7 +464,7 @@
       <author><first>Maarit</first><last>Koponen</last></author>
       <author><first>Umut</first><last>Sulubacak</last></author>
       <author><first>Kaisa</first><last>Vitikainen</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>79-92</pages>
       <url hash="2156b579">2020.amta-pemdt.6</url>
       <bibkey>koponen-etal-2020-mt-subtitling</bibkey>
@@ -476,7 +476,7 @@
       <author><first>Tim</first><last>Düwel</last></author>
       <author><first>Raksha</first><last>Shenoy</last></author>
       <author><first>Antonio</first><last>Krüger</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>93-108</pages>
       <url hash="aeece50b">2020.amta-pemdt.7</url>
       <bibkey>herbig-etal-2020-improving</bibkey>
@@ -515,7 +515,7 @@
     </paper>
     <paper id="2">
       <title>Responsible ‘Gist’ <fixed-case>MT</fixed-case> Use in the Age of Neural <fixed-case>MT</fixed-case></title>
-      <author><first>Marianna J.</first><last>Martindale</last></author>
+      <author id="marianna-martindale"><first>Marianna J.</first><last>Martindale</last></author>
       <pages>18-45</pages>
       <attachment type="presentation" hash="a4973739">2020.amta-impact.2.Presentation.pdf</attachment>
       <bibkey>martindale-2020-responsible</bibkey>
diff --git a/data/xml/2020.argmining.xml b/data/xml/2020.argmining.xml
index ab76ade288..c26db7cc31 100644
--- a/data/xml/2020.argmining.xml
+++ b/data/xml/2020.argmining.xml
@@ -87,8 +87,8 @@
       <author><first>Prakash</first><last>Poudyal</last></author>
       <author><first>Jaromir</first><last>Savelka</last></author>
       <author><first>Aagje</first><last>Ieven</last></author>
-      <author><first>Marie Francine</first><last>Moens</last></author>
-      <author><first>Teresa</first><last>Goncalves</last></author>
+      <author id="marie-francine-moens"><first>Marie Francine</first><last>Moens</last></author>
+      <author id="teresa-goncalves"><first>Teresa</first><last>Goncalves</last></author>
       <author><first>Paulo</first><last>Quaresma</last></author>
       <pages>67–75</pages>
       <abstract>In this paper, we publicly release an annotated corpus of 42 decisions of the European Court of Human Rights (ECHR). The corpus is annotated in terms of three types of clauses useful in argument mining: premise, conclusion, and non-argument parts of the text. Furthermore, relationships among the premises and conclusions are mapped. We present baselines for three tasks that lead from unstructured texts to structured arguments. The tasks are argument clause recognition, clause relation prediction, and premise/conclusion recognition. Despite a straightforward application of the bidirectional encoders from Transformers (BERT), we obtained very promising results F1 0.765 on argument recognition, 0.511 on relation prediction, and 0.859/0.628 on premise/conclusion recognition). The results suggest the usefulness of pre-trained language models based on deep neural network architectures in argument mining. Because of the simplicity of the baselines, there is ample space for improvement in future work based on the released corpus.</abstract>
@@ -107,7 +107,7 @@
     <paper id="10">
       <title>Use of Claim Graphing and Argumentation Schemes in Biomedical Literature: A Manual Approach to Analysis</title>
       <author><first>Eli</first><last>Moser</last></author>
-      <author><first>Robert E.</first><last>Mercer</last></author>
+      <author id="robert-e-mercer"><first>Robert E.</first><last>Mercer</last></author>
       <pages>88–99</pages>
       <abstract>Argumentation in an experimental life science paper consists of a main claim being supported with reasoned argumentative steps based on the data garnered from the experiments that were carried out. In this paper we report on an investigation of the large scale argumentation structure found when examining five biochemistry journal publications. One outcome of this investigation of biochemistry articles suggests that argumentation schemes originally designed for genetic research articles may transfer to experimental biomedical literature in general. Our use of these argumentation schemes shows that claims depend not only on experimental data but also on other claims. The tendency for claims to use other claims as their supporting evidence in addition to the experimental data led to two novel models that have provided a better understanding of the large scale argumentation structure of a complete biochemistry paper. First, the claim graph displays the claims within a paper, their interactions, and their evidence. Second, another aspect of this argumentation network is further illustrated by the Model of Informational Hierarchy (MIH) which visualizes at a meta-level the flow of reasoning provided by the authors of the paper and also connects the main claim to the paper’s title. Together, these models, which have been produced by a manual examination of the biochemistry articles, would be likely candidates for a computational method that analyzes the large scale argumentation structure.</abstract>
       <url hash="bcee83fe">2020.argmining-1.10</url>
@@ -123,7 +123,7 @@
     </paper>
     <paper id="12">
       <title>Style Analysis of Argumentative Texts by Mining Rhetorical Devices</title>
-      <author><first>Khalid</first><last>Al Khatib</last></author>
+      <author id="khalid-al-khatib"><first>Khalid</first><last>Al Khatib</last></author>
       <author><first>Viorel</first><last>Morari</last></author>
       <author><first>Benno</first><last>Stein</last></author>
       <pages>106–116</pages>
@@ -135,7 +135,7 @@
       <title>Creating a Domain-diverse Corpus for Theory-based Argument Quality Assessment</title>
       <author><first>Lily</first><last>Ng</last></author>
       <author><first>Anne</first><last>Lauscher</last></author>
-      <author><first>Joel</first><last>Tetreault</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
       <author><first>Courtney</first><last>Napoles</last></author>
       <pages>117–126</pages>
       <abstract>Computational models of argument quality (AQ) have focused primarily on assessing the overall quality or just one specific characteristic of an argument, such as its convincingness or its clarity. However, previous work has claimed that assessment based on theoretical dimensions of argumentation could benefit writers, but developing such models has been limited by the lack of annotated data. In this work, we describe GAQCorpus, the first large, domain-diverse annotated corpus of theory-based AQ. We discuss how we designed the annotation task to reliably collect a large number of judgments with crowdsourcing, formulating theory-based guidelines that helped make subjective judgments of AQ more objective. We demonstrate how to identify arguments and adapt the annotation task for three diverse domains. Our work will inform research on theory-based argumentation annotation and enable the creation of more diverse corpora to support computational AQ assessment.</abstract>
diff --git a/data/xml/2020.autosimtrans.xml b/data/xml/2020.autosimtrans.xml
index 5f155eae7d..0a889da08c 100644
--- a/data/xml/2020.autosimtrans.xml
+++ b/data/xml/2020.autosimtrans.xml
@@ -7,7 +7,7 @@
       <editor><first>Colin</first><last>Cherry</last></editor>
       <editor><first>Liang</first><last>Huang</last></editor>
       <editor><first>Zhongjun</first><last>He</last></editor>
-      <editor><first>Mark</first><last>Liberman</last></editor>
+      <editor id="mark-liberman"><first>Mark</first><last>Liberman</last></editor>
       <editor><first>James</first><last>Cross</last></editor>
       <editor id="yang-liu-ict"><first>Yang</first><last>Liu</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -36,8 +36,8 @@
       <title>End-to-End Speech Translation with Adversarial Training</title>
       <author><first>Xuancai</first><last>Li</last></author>
       <author><first>Chen</first><last>Kehai</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
-      <author><first>Muyun</first><last>Yang</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
+      <author id="muyun-yang"><first>Muyun</first><last>Yang</last></author>
       <pages>10–14</pages>
       <abstract>End-to-End speech translation usually leverages audio-to-text parallel data to train an available speech translation model which has shown impressive results on various speech translation tasks. Due to the artificial cost of collecting audio-to-text parallel data, the speech translation is a natural low-resource translation scenario, which greatly hinders its improvement. In this paper, we proposed a new adversarial training method to leverage target monolingual data to relieve the low-resource shortcoming of speech translation. In our method, the existing speech translation model is considered as a Generator to gain a target language output, and another neural Discriminator is used to guide the distinction between outputs of speech translation model and true target monolingual sentences. Experimental results on the CCMT 2019-BSTC dataset speech translation task demonstrate that the proposed methods can significantly improve the performance of the End-to-End speech translation system.</abstract>
       <url hash="5403eb0f">2020.autosimtrans-1.2</url>
@@ -62,7 +62,7 @@
       <title>Improving Autoregressive <fixed-case>NMT</fixed-case> with Non-Autoregressive Model</title>
       <author><first>Long</first><last>Zhou</last></author>
       <author><first>Jiajun</first><last>Zhang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>24–29</pages>
       <abstract>Autoregressive neural machine translation (NMT) models are often used to teach non-autoregressive models via knowledge distillation. However, there are few studies on improving the quality of autoregressive translation (AT) using non-autoregressive translation (NAT). In this work, we propose a novel Encoder-NAD-AD framework for NMT, aiming at boosting AT with global information produced by NAT model. Specifically, under the semantic guidance of source-side context captured by the encoder, the non-autoregressive decoder (NAD) first learns to generate target-side hidden state sequence in parallel. Then the autoregressive decoder (AD) performs translation from left to right, conditioned on source-side and target-side hidden states. Since AD has global information generated by low-latency NAD, it is more likely to produce a better translation with less time delay. Experiments on WMT14 En-De, WMT16 En-Ro, and IWSLT14 De-En translation tasks demonstrate that our framework achieves significant improvements with only 8% speed degeneration over the autoregressive NMT.</abstract>
       <url hash="57a3c992">2020.autosimtrans-1.4</url>
diff --git a/data/xml/2020.bea.xml b/data/xml/2020.bea.xml
index 2beb4f2dce..2056b9f138 100644
--- a/data/xml/2020.bea.xml
+++ b/data/xml/2020.bea.xml
@@ -25,7 +25,7 @@
       <title>Linguistic Features for Readability Assessment</title>
       <author><first>Tovly</first><last>Deutsch</last></author>
       <author><first>Masoud</first><last>Jasbi</last></author>
-      <author><first>Stuart</first><last>Shieber</last></author>
+      <author id="stuart-m-shieber"><first>Stuart</first><last>Shieber</last></author>
       <pages>1–17</pages>
       <abstract>Readability assessment aims to automatically classify text by the level appropriate for learning readers. Traditional approaches to this task utilize a variety of linguistically motivated features paired with simple machine learning models. More recent methods have improved performance by discarding these features and utilizing deep learning models. However, it is unknown whether augmenting deep learning models with linguistically motivated features would improve performance further. This paper combines these two approaches with the goal of improving overall model performance and addressing this question. Evaluating on two large readability corpora, we find that, given sufficient training data, augmenting deep learning models with linguistically motivated features does not improve state-of-the-art performance. Our results provide preliminary evidence for the hypothesis that the state-of-the-art deep learning models represent linguistic features of the text related to readability. Future research on the nature of representations formed in these models can shed light on the learned features and their relations to linguistically motivated ones hypothesized in traditional approaches.</abstract>
       <url hash="a5ef0d94">2020.bea-1.1</url>
@@ -51,11 +51,11 @@
     <paper id="3">
       <title>Multiple Instance Learning for Content Feedback Localization without Annotation</title>
       <author><first>Scott</first><last>Hellman</last></author>
-      <author><first>William</first><last>Murray</last></author>
+      <author id="william-r-murray"><first>William</first><last>Murray</last></author>
       <author><first>Adam</first><last>Wiemerslage</last></author>
       <author><first>Mark</first><last>Rosenstein</last></author>
-      <author><first>Peter</first><last>Foltz</last></author>
-      <author><first>Lee</first><last>Becker</last></author>
+      <author id="peter-foltz"><first>Peter</first><last>Foltz</last></author>
+      <author id="lee-becker"><first>Lee</first><last>Becker</last></author>
       <author><first>Marcia</first><last>Derr</last></author>
       <pages>30–40</pages>
       <abstract>Automated Essay Scoring (AES) can be used to automatically generate holistic scores with reliability comparable to human scoring. In addition, AES systems can provide formative feedback to learners, typically at the essay level. In contrast, we are interested in providing feedback specialized to the content of the essay, and specifically for the content areas required by the rubric. A key objective is that the feedback should be localized alongside the relevant essay text. An important step in this process is determining where in the essay the rubric designated points and topics are discussed. A natural approach to this task is to train a classifier using manually annotated data; however, collecting such data is extremely resource intensive. Instead, we propose a method to predict these annotation spans without requiring any labeled annotation data. Our approach is to consider AES as a Multiple Instance Learning (MIL) task. We show that such models can both predict content scores and localize content by leveraging their sentence-level score predictions. This capability arises despite never having access to annotation training data. Implications are discussed for improving formative feedback and explainable AES models.</abstract>
@@ -78,7 +78,7 @@
       <title><fixed-case>CIMA</fixed-case>: A Large Open Access Dialogue Dataset for Tutoring</title>
       <author><first>Katherine</first><last>Stasaski</last></author>
       <author><first>Kimberly</first><last>Kao</last></author>
-      <author><first>Marti A.</first><last>Hearst</last></author>
+      <author id="marti-a-hearst"><first>Marti A.</first><last>Hearst</last></author>
       <pages>52–64</pages>
       <abstract>One-to-one tutoring is often an effective means to help students learn, and recent experiments with neural conversation systems are promising. However, large open datasets of tutoring conversations are lacking. To remedy this, we propose a novel asynchronous method for collecting tutoring dialogue via crowdworkers that is both amenable to the needs of deep learning algorithms and reflective of pedagogical concerns. In this approach, extended conversations are obtained between crowdworkers role-playing as both students and tutors. The CIMA collection, which we make publicly available, is novel in that students are exposed to overlapping grounded concepts between exercises and multiple relevant tutoring responses are collected for the same input. CIMA contains several compelling properties from an educational perspective: student role-players complete exercises in fewer turns during the course of the conversation and tutor players adopt strategies that conform with some educational conversational norms, such as providing hints versus asking questions in appropriate contexts. The dataset enables a model to be trained to generate the next tutoring utterance in a conversation, conditioned on a provided action strategy.</abstract>
       <url hash="01bc5caa">2020.bea-1.5</url>
@@ -102,7 +102,7 @@
       <title>Annotation and Classification of Evidence and Reasoning Revisions in Argumentative Writing</title>
       <author><first>Tazin</first><last>Afrin</last></author>
       <author><first>Elaine Lin</first><last>Wang</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <author><first>Lindsay Clare</first><last>Matsumura</last></author>
       <author><first>Richard</first><last>Correnti</last></author>
       <pages>75–84</pages>
@@ -115,7 +115,7 @@
     <paper id="8">
       <title>Can Neural Networks Automatically Score Essay Traits?</title>
       <author><first>Sandeep</first><last>Mathias</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>85–91</pages>
       <abstract>Essay traits are attributes of an essay that can help explain how well written (or badly written) the essay is. Examples of traits include Content, Organization, Language, Sentence Fluency, Word Choice, etc. A lot of research in the last decade has dealt with automatic holistic essay scoring - where a machine rates an essay and gives a score for the essay. However, writers need feedback, especially if they want to improve their writing - which is why trait-scoring is important. In this paper, we show how a deep-learning based system can outperform feature-based machine learning systems, as well as a string kernel system in scoring essay traits.</abstract>
       <url hash="7665989d">2020.bea-1.8</url>
@@ -201,7 +201,7 @@
     <paper id="15">
       <title>Should You Fine-Tune <fixed-case>BERT</fixed-case> for Automated Essay Scoring?</title>
       <author><first>Elijah</first><last>Mayfield</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <pages>151–162</pages>
       <abstract>Most natural language processing research now recommends large Transformer-based models with fine-tuning for supervised classification tasks; older strategies like bag-of-words features and linear models have fallen out of favor. Here we investigate whether, in automated essay scoring (AES) research, deep neural models are an appropriate technological choice. We find that fine-tuning BERT produces similar performance to classical models at significant additional cost. We argue that while state-of-the-art strategies do match existing best results, they come with opportunity costs in computational resources. We conclude with a review of promising areas for research on student essays where the unique characteristics of Transformers may provide benefits over classical methods to justify the costs.</abstract>
       <url hash="e55e6ec6">2020.bea-1.15</url>
@@ -233,7 +233,7 @@
     <paper id="18">
       <title>Automated Scoring of Clinical Expressive Language Evaluation Tasks</title>
       <author><first>Yiyi</first><last>Wang</last></author>
-      <author><first>Emily</first><last>Prud’hommeaux</last></author>
+      <author id="emily-prudhommeaux"><first>Emily</first><last>Prud’hommeaux</last></author>
       <author><first>Meysam</first><last>Asgari</last></author>
       <author><first>Jill</first><last>Dolata</last></author>
       <pages>177–185</pages>
diff --git a/data/xml/2020.bionlp.xml b/data/xml/2020.bionlp.xml
index 65b7a4d449..c36ef56d4d 100644
--- a/data/xml/2020.bionlp.xml
+++ b/data/xml/2020.bionlp.xml
@@ -4,9 +4,9 @@
     <meta>
       <booktitle>Proceedings of the 19th SIGBioMed Workshop on Biomedical Language Processing</booktitle>
       <editor><first>Dina</first><last>Demner-Fushman</last></editor>
-      <editor><first>Kevin Bretonnel</first><last>Cohen</last></editor>
+      <editor id="k-bretonnel-cohen"><first>Kevin Bretonnel</first><last>Cohen</last></editor>
       <editor><first>Sophia</first><last>Ananiadou</last></editor>
-      <editor><first>Junichi</first><last>Tsujii</last></editor>
+      <editor id="junichi-tsujii"><first>Junichi</first><last>Tsujii</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Online</address>
       <month>July</month>
@@ -33,7 +33,7 @@
       <title>Sequence-to-Set Semantic Tagging for Complex Query Reformulation and Automated Text Categorization in Biomedical <fixed-case>IR</fixed-case> using Self-Attention</title>
       <author><first>Manirupa</first><last>Das</last></author>
       <author><first>Juanxi</first><last>Li</last></author>
-      <author><first>Eric</first><last>Fosler-Lussier</last></author>
+      <author id="eric-fosler-lussier"><first>Eric</first><last>Fosler-Lussier</last></author>
       <author><first>Simon</first><last>Lin</last></author>
       <author><first>Steve</first><last>Rust</last></author>
       <author><first>Yungui</first><last>Huang</last></author>
@@ -64,7 +64,7 @@
       <title>Improving Biomedical Analogical Retrieval with Embedding of Structural Dependencies</title>
       <author><first>Amandalynne</first><last>Paullada</last></author>
       <author><first>Bethany</first><last>Percha</last></author>
-      <author><first>Trevor</first><last>Cohen</last></author>
+      <author id="trevor-cohen"><first>Trevor</first><last>Cohen</last></author>
       <pages>38–48</pages>
       <abstract>Inferring the nature of the relationships between biomedical entities from text is an important problem due to the difficulty of maintaining human-curated knowledge bases in rapidly evolving fields. Neural word embeddings have earned attention for an apparent ability to encode relational information. However, word embedding models that disregard syntax during training are limited in their ability to encode the structural relationships fundamental to cognitive theories of analogy. In this paper, we demonstrate the utility of encoding dependency structure in word embeddings in a model we call Embedding of Structural Dependencies (ESD) as a way to represent biomedical relationships in two analogical retrieval tasks: a relationship retrieval (RR) task, and a literature-based discovery (LBD) task meant to hypothesize plausible relationships between pairs of entities unseen in training. We compare our model to skip-gram with negative sampling (SGNS), using 19 databases of biomedical relationships as our evaluation data, with improvements in performance on 17 (LBD) and 18 (RR) of these sets. These results suggest embeddings encoding dependency path information are of value for biomedical analogy retrieval.</abstract>
       <url hash="fa2e9527">2020.bionlp-1.4</url>
@@ -76,7 +76,7 @@
       <author><first>Anna</first><last>Koroleva</last></author>
       <author><first>Sanjay</first><last>Kamath</last></author>
       <author><first>Patrick</first><last>Bossuyt</last></author>
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <pages>49–59</pages>
       <abstract>Improving the quality of medical research reporting is crucial to reduce avoidable waste in research and to improve the quality of health care. Despite various initiatives aiming at improving research reporting – guidelines, checklists, authoring aids, peer review procedures, etc. – overinterpretation of research results, also known as spin, is still a serious issue in research reporting. In this paper, we propose a Natural Language Processing (NLP) system for detecting several types of spin in biomedical articles reporting randomized controlled trials (RCTs). We use a combination of rule-based and machine learning approaches to extract important information on trial design and to detect potential spin. The proposed spin detection system includes algorithms for text structure analysis, sentence classification, entity and relation extraction, semantic similarity assessment. Our algorithms achieved operational performance for the these tasks, F-measure ranging from 79,42 to 97.86% for different tasks. The most difficult task is extracting reported outcomes. Our tool is intended to be used as a semi-automated aid tool for assisting both authors and peer reviewers to detect potential spin. The tool incorporates a simple interface that allows to run the algorithms and visualize their output. It can also be used for manual annotation and correction of the errors in the outputs. The proposed tool is the first tool for spin detection. The tool and the annotated dataset are freely available.</abstract>
       <url hash="d606a19f">2020.bionlp-1.5</url>
@@ -106,7 +106,7 @@
     <paper id="7">
       <title>A <fixed-case>BERT</fixed-case>-based One-Pass Multi-Task Model for Clinical Temporal Relation Extraction</title>
       <author><first>Chen</first><last>Lin</last></author>
-      <author><first>Timothy</first><last>Miller</last></author>
+      <author id="timothy-miller"><first>Timothy</first><last>Miller</last></author>
       <author><first>Dmitriy</first><last>Dligach</last></author>
       <author><first>Farig</first><last>Sadeque</last></author>
       <author><first>Steven</first><last>Bethard</last></author>
@@ -135,7 +135,7 @@
       <author><first>Zeljko</first><last>Kraljevic</last></author>
       <author><first>Daniel</first><last>Bean</last></author>
       <author><first>Richard</first><last>Dobson</last></author>
-      <author><first>Robert</first><last>Stewart</last></author>
+      <author id="robert-stewart"><first>Robert</first><last>Stewart</last></author>
       <author><first>Rebecca</first><last>Bendayan</last></author>
       <author><first>Angus</first><last>Roberts</last></author>
       <pages>86–94</pages>
@@ -151,7 +151,7 @@
       <author><first>Liyan</first><last>Xu</last></author>
       <author><first>Julien</first><last>Hogan</last></author>
       <author><first>Rachel E.</first><last>Patzer</last></author>
-      <author><first>Jinho D.</first><last>Choi</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
       <pages>95–104</pages>
       <abstract>This paper presents a reinforcement learning approach to extract noise in long clinical documents for the task of readmission prediction after kidney transplant. We face the challenges of developing robust models on a small dataset where each document may consist of over 10K tokens with full of noise including tabular text and task-irrelevant sentences. We first experiment four types of encoders to empirically decide the best document representation, and then apply reinforcement learning to remove noisy text from the long documents, which models the noise extraction process as a sequential decision problem. Our results show that the old bag-of-words encoder outperforms deep learning-based encoders on this task, and reinforcement learning is able to improve upon baseline while pruning out 25% text segments. Our analysis depicts that reinforcement learning is able to identify both typical noisy tokens and task-specific noisy text.</abstract>
       <url hash="de916f91">2020.bionlp-1.10</url>
@@ -162,8 +162,8 @@
       <title>Evaluating the Utility of Model Configurations and Data Augmentation on Clinical Semantic Textual Similarity</title>
       <author><first>Yuxia</first><last>Wang</last></author>
       <author id="fei-liu-unimelb"><first>Fei</first><last>Liu</last></author>
-      <author><first>Karin</first><last>Verspoor</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>105–111</pages>
       <abstract>In this paper, we apply pre-trained language models to the Semantic Textual Similarity (STS) task, with a specific focus on the clinical domain. In low-resource setting of clinical STS, these large models tend to be impractical and prone to overfitting. Building on BERT, we study the impact of a number of model design choices, namely different fine-tuning and pooling strategies. We observe that the impact of domain-specific fine-tuning on clinical STS is much less than that in the general domain, likely due to the concept richness of the domain. Based on this, we propose two data augmentation techniques. Experimental results on N2C2-STS 1 demonstrate substantial improvements, validating the utility of the proposed methods.</abstract>
       <url hash="83d7c2e8">2020.bionlp-1.11</url>
@@ -190,8 +190,8 @@
       <author><first>Jay</first><last>DeYoung</last></author>
       <author><first>Eric</first><last>Lehman</last></author>
       <author><first>Benjamin</first><last>Nye</last></author>
-      <author><first>Iain</first><last>Marshall</last></author>
-      <author><first>Byron C.</first><last>Wallace</last></author>
+      <author id="iain-marshall"><first>Iain</first><last>Marshall</last></author>
+      <author id="byron-c-wallace"><first>Byron C.</first><last>Wallace</last></author>
       <pages>123–132</pages>
       <abstract>How do we most effectively treat a disease or condition? Ideally, we could consult a database of evidence gleaned from clinical trials to answer such questions. Unfortunately, no such database exists; clinical trial results are instead disseminated primarily via lengthy natural language articles. Perusing all such articles would be prohibitively time-consuming for healthcare practitioners; they instead tend to depend on manually compiled <i>systematic reviews</i> of medical literature to inform care. NLP may speed this process up, and eventually facilitate immediate consult of published evidence. The <i>Evidence Inference</i> dataset was recently released to facilitate research toward this end. This task entails inferring the comparative performance of two treatments, with respect to a given outcome, from a particular article (describing a clinical trial) and identifying supporting evidence. For instance: Does this article report that <i>chemotherapy</i> performed better than <i>surgery</i> for <i>five-year survival rates</i> of operable cancers? In this paper, we collect additional annotations to expand the Evidence Inference dataset by 25%, provide stronger baseline models, systematically inspect the errors that these make, and probe dataset quality. We also release an <i>abstract only</i> (as opposed to full-texts) version of the task for rapid model prototyping. The updated corpus, documentation, and code for new baselines and evaluations are available at <url>http://evidence-inference.ebm-nlp.com/</url>.</abstract>
       <url hash="672114b7">2020.bionlp-1.13</url>
@@ -236,8 +236,8 @@
     <paper id="17">
       <title>Domain Adaptation and Instance Selection for Disease Syndrome Classification over Veterinary Clinical Notes</title>
       <author><first>Brian</first><last>Hur</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
-      <author><first>Karin</first><last>Verspoor</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last></author>
       <author><first>Laura</first><last>Hardefeldt</last></author>
       <author><first>James</first><last>Gilkerson</last></author>
       <pages>156–166</pages>
@@ -265,7 +265,7 @@
     <paper id="19">
       <title>Extensive Error Analysis and a Learning-Based Evaluation of Medical Entity Recognition Systems to Approximate User Experience</title>
       <author><first>Isar</first><last>Nejadgholi</last></author>
-      <author><first>Kathleen C.</first><last>Fraser</last></author>
+      <author id="kathleen-c-fraser"><first>Kathleen C.</first><last>Fraser</last></author>
       <author><first>Berry</first><last>de Bruijn</last></author>
       <pages>177–186</pages>
       <abstract>When comparing entities extracted by a medical entity recognition system with gold standard annotations over a test set, two types of mismatches might occur, label mismatch or span mismatch. Here we focus on span mismatch and show that its severity can vary from a serious error to a fully acceptable entity extraction due to the subjectivity of span annotations. For a domain-specific BERT-based NER system, we showed that 25% of the errors have the same labels and overlapping span with gold standard entities. We collected expert judgement which shows more than 90% of these mismatches are accepted or partially accepted by the user. Using the training set of the NER system, we built a fast and lightweight entity classifier to approximate the user experience of such mismatches through accepting or rejecting them. The decisions made by this classifier are used to calculate a learning-based F-score which is shown to be a better approximation of a forgiving user’s experience than the relaxed F-score. We demonstrated the results of applying the proposed evaluation metric for a variety of deep learning medical entity recognition models trained with two datasets.</abstract>
@@ -278,7 +278,7 @@
       <author><first>Saadullah</first><last>Amin</last></author>
       <author><first>Katherine Ann</first><last>Dunfield</last></author>
       <author><first>Anna</first><last>Vechkaeva</last></author>
-      <author><first>Guenter</first><last>Neumann</last></author>
+      <author id="gunter-neumann"><first>Guenter</first><last>Neumann</last></author>
       <pages>187–194</pages>
       <abstract>Fact triples are a common form of structured knowledge used within the biomedical domain. As the amount of unstructured scientific texts continues to grow, manual annotation of these texts for the task of relation extraction becomes increasingly expensive. Distant supervision offers a viable approach to combat this by quickly producing large amounts of labeled, but considerably noisy, data. We aim to reduce such noise by extending an entity-enriched relation classification BERT model to the problem of multiple instance learning, and defining a simple data encoding scheme that significantly reduces noise, reaching state-of-the-art performance for distantly-supervised biomedical relation extraction. Our approach further encodes knowledge about the direction of relation triples, allowing for increased focus on relation learning by reducing noise and alleviating the need for joint learning with knowledge graph completion.</abstract>
       <url hash="51024790">2020.bionlp-1.20</url>
@@ -290,7 +290,7 @@
     <paper id="21">
       <title>Global Locality in Biomedical Relation and Event Extraction</title>
       <author><first>Elaheh</first><last>ShafieiBavani</last></author>
-      <author><first>Antonio</first><last>Jimeno Yepes</last></author>
+      <author id="antonio-jimeno-yepes"><first>Antonio</first><last>Jimeno Yepes</last></author>
       <author><first>Xu</first><last>Zhong</last></author>
       <author><first>David</first><last>Martinez Iraola</last></author>
       <pages>195–204</pages>
@@ -301,7 +301,7 @@
     </paper>
     <paper id="22">
       <title>An Empirical Study of Multi-Task Learning on <fixed-case>BERT</fixed-case> for Biomedical Text Mining</title>
-      <author><first>Yifan</first><last>Peng</last></author>
+      <author id="yifan-peng-cmu"><first>Yifan</first><last>Peng</last></author>
       <author><first>Qingyu</first><last>Chen</last></author>
       <author><first>Zhiyong</first><last>Lu</last></author>
       <pages>205–214</pages>
diff --git a/data/xml/2020.blackboxnlp.xml b/data/xml/2020.blackboxnlp.xml
index 22629e4ade..c8157e05f7 100644
--- a/data/xml/2020.blackboxnlp.xml
+++ b/data/xml/2020.blackboxnlp.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the Third BlackboxNLP Workshop on Analyzing and Interpreting Neural Networks for NLP</booktitle>
       <editor><first>Afra</first><last>Alishahi</last></editor>
       <editor><first>Yonatan</first><last>Belinkov</last></editor>
-      <editor><first>Grzegorz</first><last>Chrupała</last></editor>
+      <editor id="grzegorz-chrupala"><first>Grzegorz</first><last>Chrupała</last></editor>
       <editor><first>Dieuwke</first><last>Hupkes</last></editor>
       <editor><first>Yuval</first><last>Pinter</last></editor>
       <editor><first>Hassan</first><last>Sajjad</last></editor>
@@ -22,7 +22,7 @@
     <paper id="1">
       <title><fixed-case>BERT</fixed-case>ering <fixed-case>RAMS</fixed-case>: What and How Much does <fixed-case>BERT</fixed-case> Already Know About Event Arguments? - A Study on the <fixed-case>RAMS</fixed-case> Dataset</title>
       <author><first>Varun</first><last>Gangal</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>1–10</pages>
       <abstract>Using the attention map based probing framework from (Clark et al., 2019), we observe that, on the RAMS dataset (Ebner et al., 2020), BERT’s attention heads have modest but well above-chance ability to spot event arguments sans any training or domain finetuning, varying from a low of 17.77% for Place to a high of 51.61% for Artifact. Next, we find that linear combinations of these heads, estimated with approx. 11% of available total event argument detection supervision, can push performance well higher for some roles — highest two being Victim (68.29% Accuracy) and Artifact (58.82% Accuracy). Furthermore, we investigate how well our methods do for cross-sentence event arguments. We propose a procedure to isolate “best heads” for cross-sentence argument detection separately of those for intra-sentence arguments. The heads thus estimated have superior cross-sentence performance compared to their jointly estimated equivalents, albeit only under the unrealistic assumption that we already know the argument is present in another sentence. Lastly, we seek to isolate to what extent our numbers stem from lexical frequency based associations between gold arguments and roles. We propose NONCE, a scheme to create adversarial test examples by replacing gold arguments with randomly generated “nonce” words. We find that learnt linear combinations are robust to NONCE, though individual best heads can be more sensitive.</abstract>
       <url hash="22c98ad3">2020.blackboxnlp-1.1</url>
@@ -33,7 +33,7 @@
     <paper id="2">
       <title>Emergent Language Generalization and Acquisition Speed are not tied to Compositionality</title>
       <author><first>Eugene</first><last>Kharitonov</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <pages>11–15</pages>
       <abstract>Studies of discrete languages emerging when neural agents communicate to solve a joint task often look for evidence of compositional structure. This stems for the expectation that such a structure would allow languages to be acquired faster by the agents and enable them to generalize better. We argue that these beneficial properties are only loosely connected to compositionality. In two experiments, we demonstrate that, depending on the task, non-compositional languages might show equal, or better, generalization performance and acquisition speed than compositional ones. Further research in the area should be clearer about what benefits are expected from compositionality, and how the latter would lead to them.</abstract>
       <url hash="30a2f114">2020.blackboxnlp-1.2</url>
@@ -126,7 +126,7 @@
     <paper id="10">
       <title>The Explanation Game: Towards Prediction Explainability through Sparse Communication</title>
       <author><first>Marcos</first><last>Treviso</last></author>
-      <author><first>André F. T.</first><last>Martins</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
       <pages>107–118</pages>
       <abstract>Explainability is a topic of growing importance in NLP. In this work, we provide a unified perspective of explainability as a communication problem between an explainer and a layperson about a classifier’s decision. We use this framework to compare several explainers, including gradient methods, erasure, and attention mechanisms, in terms of their communication success. In addition, we reinterpret these methods in the light of classical feature selection, and use this as inspiration for new embedded explainers, through the use of selective, sparse attention. Experiments in text classification and natural language inference, using different configurations of explainers and laypeople (including both machines and humans), reveal an advantage of attention-based explainers over gradient and erasure methods, and show that selective attention is a simpler alternative to stochastic rationalizers. Human experiments show strong results on text classification with post-hoc explainers trained to optimize communication success.</abstract>
       <url hash="b97a2a97">2020.blackboxnlp-1.10</url>
@@ -162,7 +162,7 @@
       <title>Controlling the Imprint of Passivization and Negation in Contextualized Representations</title>
       <author><first>Hande</first><last>Celikkanat</last></author>
       <author><first>Sami</first><last>Virpioja</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <author><first>Marianna</first><last>Apidianaki</last></author>
       <pages>136–148</pages>
       <abstract>Contextualized word representations encode rich information about syntax and semantics, alongside specificities of each context of use. While contextual variation does not always reflect actual meaning shifts, it can still reduce the similarity of embeddings for word instances having the same meaning. We explore the imprint of two specific linguistic alternations, namely passivization and negation, on the representations generated by neural models trained with two different objectives: masked language modeling and translation. Our exploration methodology is inspired by an approach previously proposed for removing societal biases from word vectors. We show that passivization and negation leave their traces on the representations, and that neutralizing this information leads to more similar embeddings for words that should preserve their meaning in the transformation. We also find clear differences in how the respective features generalize across datasets.</abstract>
@@ -244,7 +244,7 @@
     <paper id="20">
       <title>Exploring Neural Entity Representations for Semantic Information</title>
       <author><first>Andrew</first><last>Runge</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>204–216</pages>
       <abstract>Neural methods for embedding entities are typically extrinsically evaluated on downstream tasks and, more recently, intrinsically using probing tasks. Downstream task-based comparisons are often difficult to interpret due to differences in task structure, while probing task evaluations often look at only a few attributes and models. We address both of these issues by evaluating a diverse set of eight neural entity embedding methods on a set of simple probing tasks, demonstrating which methods are able to remember words used to describe entities, learn type, relationship and factual information, and identify how frequently an entity is mentioned. We also compare these methods in a unified framework on two entity linking tasks and discuss how they generalize to different model architectures and datasets.</abstract>
       <url hash="3b1958b1">2020.blackboxnlp-1.20</url>
@@ -253,7 +253,7 @@
     </paper>
     <paper id="21">
       <title><fixed-case>BERT</fixed-case>s of a feather do not generalize together: Large variability in generalization across models with similar test set performance</title>
-      <author><first>R. Thomas</first><last>McCoy</last></author>
+      <author id="r-thomas-mccoy"><first>R. Thomas</first><last>McCoy</last></author>
       <author><first>Junghyun</first><last>Min</last></author>
       <author><first>Tal</first><last>Linzen</last></author>
       <pages>217–227</pages>
@@ -276,7 +276,7 @@
     <paper id="23">
       <title>Discovering the Compositional Structure of Vector Representations with Role Learning Networks</title>
       <author><first>Paul</first><last>Soulos</last></author>
-      <author><first>R. Thomas</first><last>McCoy</last></author>
+      <author id="r-thomas-mccoy"><first>R. Thomas</first><last>McCoy</last></author>
       <author><first>Tal</first><last>Linzen</last></author>
       <author><first>Paul</first><last>Smolensky</last></author>
       <pages>238–254</pages>
@@ -301,7 +301,7 @@
       <title>Investigating Novel Verb Learning in <fixed-case>BERT</fixed-case>: Selectional Preference Classes and Alternation-Based Syntactic Generalization</title>
       <author><first>Tristan</first><last>Thrush</last></author>
       <author><first>Ethan</first><last>Wilcox</last></author>
-      <author><first>Roger</first><last>Levy</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
       <pages>265–275</pages>
       <abstract>Previous studies investigating the syntactic abilities of deep learning models have not targeted the relationship between the strength of the grammatical generalization and the amount of evidence to which the model is exposed during training. We address this issue by deploying a novel word-learning paradigm to test BERT’s few-shot learning capabilities for two aspects of English verbs: alternations and classes of selectional preferences. For the former, we fine-tune BERT on a single frame in a verbal-alternation pair and ask whether the model expects the novel verb to occur in its sister frame. For the latter, we fine-tune BERT on an incomplete selectional network of verbal objects and ask whether it expects unattested but plausible verb/object pairs. We find that BERT makes robust grammatical generalizations after just one or two instances of a novel word in fine-tuning. For the verbal alternation tests, we find that the model displays behavior that is consistent with a transitivity bias: verbs seen few times are expected to take direct objects, but verbs seen with direct objects are not expected to occur intransitively.</abstract>
       <url hash="82f892db">2020.blackboxnlp-1.25</url>
@@ -313,7 +313,7 @@
       <author><first>Benjamin</first><last>Newman</last></author>
       <author><first>John</first><last>Hewitt</last></author>
       <author><first>Percy</first><last>Liang</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>276–291</pages>
       <abstract>Extrapolation to unseen sequence lengths is a challenge for neural generative models of language. In this work, we characterize the effect on length extrapolation of a modeling decision often overlooked: predicting the end of the generative process through the use of a special end-of-sequence (EOS) vocabulary item. We study an oracle setting - forcing models to generate to the correct sequence length at test time - to compare the length-extrapolative behavior of networks trained to predict EOS (+EOS) with networks not trained to (-EOS). We find that -EOS substantially outperforms +EOS, for example extrapolating well to lengths 10 times longer than those seen at training time in a bracket closing task, as well as achieving a 40% improvement over +EOS in the difficult SCAN dataset length generalization task. By comparing the hidden states and dynamics of -EOS and +EOS models, we observe that +EOS models fail to generalize because they (1) unnecessarily stratify their hidden states by their linear position is a sequence (structures we call length manifolds) or (2) get stuck in clusters (which we refer to as length attractors) once the EOS token is the highest-probability prediction.</abstract>
       <url hash="9400b4dc">2020.blackboxnlp-1.26</url>
diff --git a/data/xml/2020.bucc.xml b/data/xml/2020.bucc.xml
index a14ceb1aa1..8a5e26a1b6 100644
--- a/data/xml/2020.bucc.xml
+++ b/data/xml/2020.bucc.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the 13th Workshop on Building and Using Comparable Corpora</booktitle>
       <editor><first>Reinhard</first><last>Rapp</last></editor>
-      <editor><first>Pierre</first><last>Zweigenbaum</last></editor>
+      <editor id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></editor>
       <editor><first>Serge</first><last>Sharoff</last></editor>
       <publisher>European Language Resources Association</publisher>
       <address>Marseille, France</address>
@@ -65,7 +65,7 @@
     </paper>
     <paper id="5">
       <title>Mining Semantic Relations from Comparable Corpora through Intersections of Word Embeddings</title>
-      <author><first>Špela</first><last>Vintar</last></author>
+      <author id="spela-vintar"><first>Špela</first><last>Vintar</last></author>
       <author><first>Larisa</first><last>Grčić Simeunović</last></author>
       <author><first>Matej</first><last>Martinc</last></author>
       <author><first>Senja</first><last>Pollak</last></author>
@@ -101,8 +101,8 @@
       <title><fixed-case>LMU</fixed-case> Bilingual Dictionary Induction System with Word Surface Similarity Scores for <fixed-case>BUCC</fixed-case> 2020</title>
       <author><first>Silvia</first><last>Severini</last></author>
       <author><first>Viktor</first><last>Hangya</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>49–55</pages>
       <abstract>The task of Bilingual Dictionary Induction (BDI) consists of generating translations for source language words which is important in the framework of machine translation (MT). The aim of the BUCC 2020 shared task is to perform BDI on various language pairs using comparable corpora. In this paper, we present our approach to the task of English-German and English-Russian language pairs. Our system relies on Bilingual Word Embeddings (BWEs) which are often used for BDI when only a small seed lexicon is available making them particularly effective in a low-resource setting. On the other hand, they perform well on high frequency words only. In order to improve the performance on rare words as well, we combine BWE based word similarity with word surface similarity methods, such as orthography In addition to the often used top-n translation method, we experiment with a margin based approach aiming for dynamic number of translations for each source word. We participate in both the open and closed tracks of the shared task and we show improved results of our method compared to simple vector similarity based approaches. Our system was ranked in the top-3 teams and achieved the best results for English-Russian.</abstract>
       <url hash="b20c67c8">2020.bucc-1.8</url>
diff --git a/data/xml/2020.calcs.xml b/data/xml/2020.calcs.xml
index f984828f07..dc1a542bfe 100644
--- a/data/xml/2020.calcs.xml
+++ b/data/xml/2020.calcs.xml
@@ -8,7 +8,7 @@
       <editor><first>Kalika</first><last>Bali</last></editor>
       <editor><first>Sunayana</first><last>Sitaram</last></editor>
       <editor><first>Amitava</first><last>Das</last></editor>
-      <editor><first>Mona</first><last>Diab</last></editor>
+      <editor id="mona-diab"><first>Mona</first><last>Diab</last></editor>
       <publisher>European Language Resources Association</publisher>
       <address>Marseille, France</address>
       <month>May</month>
@@ -32,7 +32,7 @@
     <paper id="2">
       <title>A New Dataset for Natural Language Inference from Code-mixed Conversations</title>
       <author><first>Simran</first><last>Khanuja</last></author>
-      <author><first>Sandipan</first><last>Dandapat</last></author>
+      <author id="sandipan-dandapat"><first>Sandipan</first><last>Dandapat</last></author>
       <author><first>Sunayana</first><last>Sitaram</last></author>
       <author><first>Monojit</first><last>Choudhury</last></author>
       <pages>9–16</pages>
@@ -97,7 +97,7 @@
     <paper id="8">
       <title>Code-mixed parse trees and how to find them</title>
       <author><first>Anirudh</first><last>Srinivasan</last></author>
-      <author><first>Sandipan</first><last>Dandapat</last></author>
+      <author id="sandipan-dandapat"><first>Sandipan</first><last>Dandapat</last></author>
       <author><first>Monojit</first><last>Choudhury</last></author>
       <pages>57–64</pages>
       <abstract>In this paper, we explore the methods of obtaining parse trees of code-mixed sentences and analyse the obtained trees. Existing work has shown that linguistic theories can be used to generate code-mixed sentences from a set of parallel sentences. We build upon this work, using one of these theories, the Equivalence-Constraint theory to obtain the parse trees of synthetically generated code-mixed sentences and evaluate them with a neural constituency parser. We highlight the lack of a dataset non-synthetic code-mixed constituency parse trees and how it makes our evaluation difficult. To complete our evaluation, we convert a code-mixed dependency parse tree set into “pseudo constituency trees” and find that a parser trained on synthetically generated trees is able to decently parse these as well.</abstract>
diff --git a/data/xml/2020.ccl.xml b/data/xml/2020.ccl.xml
index fe19e40900..b97e06c7a5 100644
--- a/data/xml/2020.ccl.xml
+++ b/data/xml/2020.ccl.xml
@@ -30,7 +30,7 @@
     <paper id="2">
       <title>基于语料库的武侠与仙侠网络小说文体、词汇及主题对比分析(A Corpus-based Contrastive Analysis of Style, Vocabulary and Theme of Wuxia and Xianxia <fixed-case>I</fixed-case>nternet Novels)</title>
       <author><first>Sanle</first><last>Zhang</last><variant script="hani"><first>三乐</first><last>张</last></variant></author>
-      <author><first>Pengyuan</first><last>Liu</last><variant script="hani"><first>鹏远</first><last>刘</last></variant></author>
+      <author id="pengyuan-liu"><first>Pengyuan</first><last>Liu</last><variant script="hani"><first>鹏远</first><last>刘</last></variant></author>
       <author><first>Hu</first><last>Zhang</last><variant script="hani"><first>虎</first><last>张</last></variant></author>
       <pages>10–19</pages>
       <abstract>网络文学在我国发展迅猛,其数量和影响力呈现逐年上升的趋势,但目前尚无公开的较大规模网络文学作品语料库,鲜见基于语料库对网络文学具体类别作品的定量研究。本文初步建立了一个网络文学语料库,其中包括武侠和仙侠网络小说,使用文本计量、词频统计以及主题挖掘的方法对两类小说的文体风格、具体词汇使用和小说主题进行对比分析。通过比较,我们发现两类小说的文体风格大致相同,它们在词汇的使用和主题上既有共性又各具特色。从微观到宏观,从表面到内容,将定量统计和定性分析相结合,多角度、多层次的对武侠和仙侠网络小说进行比较。</abstract>
@@ -41,7 +41,7 @@
     <paper id="3">
       <title>基于计量的百年中国人名用字性别特征研究(A Quantified Research on Gender Characteristics of <fixed-case>C</fixed-case>hinese Names in A Century)</title>
       <author><first>Bingjie</first><last>Du</last><variant script="hani"><first>冰洁</first><last>杜</last></variant></author>
-      <author><first>Pengyuan</first><last>Liu</last><variant script="hani"><first>鹏远</first><last>刘</last></variant></author>
+      <author id="pengyuan-liu"><first>Pengyuan</first><last>Liu</last><variant script="hani"><first>鹏远</first><last>刘</last></variant></author>
       <author><first>Yongsheng</first><last>Tian</last><variant script="hani"><first>永胜</first><last>田</last></variant></author>
       <pages>20–30</pages>
       <abstract>本文构建了一个包含11万以上条目规模的中国名人人名数据库,每条数据含有人名、性别、出生地等社会文化标签,同时含有拼音、笔画、偏旁等文字信息标签,这是目前已知最大的可用于研究的汉语真人人名数据库。基于该数据库,本文从中选择1919年至今的人名,用定性与定量结合的方法探究人名中汉字的特征和其性别差异以及历时变化。从人名长度来看,男性人名比女性人名长;从人名用字的难易度来看,女性用字比男性更复杂;从用字丰富度来看,人名用字越来越单一和集中化,男性人名的用字丰富度大于女性人名。计算人名用字的性别偏度后发现女性人名的专用自更多。两性用字意象有明显的不同,用字的意象随着时间发生改变,但改变最明显的时间节点是改革开放前后,其中女性的变化比男性显著。除此之外,我们还得出人名中的性别极性字表、各个阶段的高频字表、用字变化趋势表等。</abstract>
@@ -52,7 +52,7 @@
     <paper id="4">
       <title>伟大的男人和倔强的女人:基于语料库的形容词性别偏度历时研究(Great Males and Stubborn Females: A Diachronic Study of Corpus-Based Gendered Skewness in <fixed-case>C</fixed-case>hinese Adjectives)</title>
       <author><first>Shucheng</first><last>Zhu</last><variant script="hani"><first>述承</first><last>朱</last></variant></author>
-      <author><first>Pengyuan</first><last>Liu</last><variant script="hani"><first>鹏远</first><last>刘</last></variant></author>
+      <author id="pengyuan-liu"><first>Pengyuan</first><last>Liu</last><variant script="hani"><first>鹏远</first><last>刘</last></variant></author>
       <pages>31–42</pages>
       <abstract>性别偏见现象是社会语言学和计算语学学者均关注的研究热点,但目前大多数研究都是基于英语的,鲜有对汉语中性别偏见现象,特别是基于形容词的研究缺乏。而形容词是衡量社会对男性和女性角色规约的有力抓手。本文首先利用调查问卷的方法,构建了一个含有466个形容词的数据集,定义性别偏度为特定形容词词义和男性或女性群体相匹配的程度,并计算了数据集中每个形容词的性别偏度。然后基于DCC语料库,研究了《人民日报》的形容词性别偏度的历时总体变化,并考察了和姓名搭配的形容词的历时变化。发现《人民日报》所使用的形容词随时间的推移整体呈现中性化趋势,但在文化大革命期间呈现非常男性化的特征,和男性姓名搭配的形容词整体呈现中性化趋势。</abstract>
       <url hash="a5a64215">2020.ccl-1.4</url>
@@ -184,7 +184,7 @@
     <paper id="16">
       <title>融合目标端句法的<fixed-case>AMR</fixed-case>-to-Text生成(<fixed-case>AMR</fixed-case>-to-Text Generation with Target Syntax)</title>
       <author><first>Jie</first><last>Zhu</last><variant script="hani"><first>杰</first><last>朱</last></variant></author>
-      <author><first>Junhui</first><last>Li</last><variant script="hani"><first>军辉</first><last>李</last></variant></author>
+      <author id="junhui-li"><first>Junhui</first><last>Li</last><variant script="hani"><first>军辉</first><last>李</last></variant></author>
       <pages>162–171</pages>
       <abstract>抽象语义表示到文本(AMR-to-Text)生成的任务是给定AMR图,生成相同语义表示的文本。可以把此任务当作一个从源端AMR图到目标端句子的机器翻译任务。目前存在的一些方法都在探索如何更好的对图结构进行建模。然而,它们都存在一个未限定的问题,因为在生成阶段许多句法的决策并不受语义图的约束,从而忽略了句子内部潜藏的句法信息。为了明确考虑这一不足,该文提出一种直接而有效的方法,显示的在AMR-to-Text生成的任务中融入句法信息,并在Transformer和目前该任务最优性能的模型上进行了实验。实验结果表明,在现存的两份标准英文数据集LDC2018E86和LDC2017T10上,都取得了显著的提升,达到了新的最高性能。</abstract>
       <url hash="b8ad32a5">2020.ccl-1.16</url>
@@ -211,7 +211,7 @@
       <author><first>Feng</first><last>Jiang</last><variant script="hani"><first>峰</first><last>蒋</last></variant></author>
       <author><first>Xiaomin</first><last>Chu</last><variant script="hani"><first>晓敏</first><last>褚</last></variant></author>
       <author><first>Peifeng</first><last>Li</last><variant script="hani"><first>培峰</first><last>李</last></variant></author>
-      <author><first>Qiaoming</first><last>Zhu</last><variant script="hani"><first>巧明</first><last>朱</last></variant></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last><variant script="hani"><first>巧明</first><last>朱</last></variant></author>
       <pages>183–194</pages>
       <abstract>作为宏观篇章分析中的基础任务,篇章结构识别任务的目的是识别相邻篇章单元之间的结构,并层次化构建篇章结构树。已有的工作只考虑局部的结构和语义信息或只考虑全局信息。因此,本文提出了一种融合全局和局部信息的指针网络模型,该模型在考虑全局的语义信息同时,又考虑局部段落间的语义关系密切程度,从而有效地提高宏观篇章结构识别的能力。在汉语宏观篇章树库(MCDTB)的实验结果表明,本文所提出的模型性能优于目前性能最好的模型。</abstract>
       <url hash="bc3aad3e">2020.ccl-1.18</url>
@@ -224,7 +224,7 @@
       <author><first>Mingtong</first><last>Liu</last><variant script="hani"><first>明童</first><last>刘</last></variant></author>
       <author><first>Yuanmeng</first><last>Chen</last><variant script="hani"><first>圆梦</first><last>陈</last></variant></author>
       <author><first>Yujie</first><last>Zhang</last><variant script="hani"><first>玉洁</first><last>张</last></variant></author>
-      <author><first>Jinan</first><last>Xu</last><variant script="hani"><first>金安</first><last>徐</last></variant></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last><variant script="hani"><first>金安</first><last>徐</last></variant></author>
       <author><first>Yufeng</first><last>Chen</last><variant script="hani"><first>钰枫</first><last>陈</last></variant></author>
       <pages>195–206</pages>
       <abstract>组合原则表明句子的语义由其构成成分的语义按照一定规则组合而成, 由此基于句法结构的语义组合计算一直是一个重要的探索方向,其中采用树结构的组合计算方法最具有代表性。但是该方法难以应用于大规模数据处理,主要问题是其语义组合的顺序依赖于具体树的结构,无法实现并行处理。本文提出一种基于图的依存句法分析和语义组合计算的联合框架,并借助复述识别任务训练语义组合模型和句法分析模型。一方面图模型可以在训练和预测阶段采用并行处理,极大缩短计算时间;另一方面联合句法分析的语义组合框架不必依赖外部句法分析器,同时两个任务的联合学习可使语义表示同时学习句法结构和语义的上下文信息。我们在公开汉语复述识别数据集LCQMC上进行评测,实验结果显示准确率接近树结构组合方法,达到79.54%,而预测速度提升高达30倍。</abstract>
@@ -247,7 +247,7 @@
       <title>联合依存分析的汉语语义组合模型(<fixed-case>C</fixed-case>hinese Semantic Composition Model with Dependency Parsing)</title>
       <author><first>Yuanmeng</first><last>Chen</last><variant script="hani"><first>圆梦</first><last>陈</last></variant></author>
       <author><first>Yujie</first><last>Zhang</last><variant script="hani"><first>玉洁</first><last>张</last></variant></author>
-      <author><first>Jinan</first><last>Xu</last><variant script="hani"><first>金安</first><last>徐</last></variant></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last><variant script="hani"><first>金安</first><last>徐</last></variant></author>
       <author><first>Yufeng</first><last>Chen</last><variant script="hani"><first>钰枫</first><last>陈</last></variant></author>
       <pages>215–224</pages>
       <abstract>在语义组合方法中,结构化方法强调以结构信息指导词义表示的组合方式。现有结构化语义组合方法使用外部分析器获取句法结构信息,导致句法分析与语义组合相互割裂,句法分析的精度严重制约语义组合模型的性能,且训练数据领域不一致等问题会进一步加剧性能的下降。对此,本文提出联合依存分析的语义组合模型,将依存分析与语义组合进行联合,一方面在训练语义组合模型时对依存分析模型进行微调,使其能够更适应语义组合模型使用的训练数据的领域特点;另一方面,在语义组合部分加入依存分析的中间信息表示,获取更丰富的结构信息和语义信息,以此来降低语义组合模型对依存分析错误结果的敏感度,提升模型的鲁棒性。我们以汉语为具体研究对象,将语义组合模型用于复述识别任务,并在CTB5汉语依存分析数据和LCQMC汉语复述识别数据上验证本文提出的模型。实验结果显示,本文所提方法在复述识别任务上的预测正确率和F1值上分别达到76.81%和78.03%;我们进一步设计实验对联合学习和中间信息利用的有效性进行验证,并与相关代表性工作进行了对比分析。</abstract>
@@ -260,7 +260,7 @@
       <author><first>Mengyu</first><last>Guan</last><variant script="hani"><first>梦雨</first><last>管</last></variant></author>
       <author><first>Zhongqing</first><last>Wang</last><variant script="hani"><first>中卿</first><last>王</last></variant></author>
       <author><first>Shoushan</first><last>Li</last><variant script="hani"><first>寿山</first><last>李</last></variant></author>
-      <author><first>Guodong</first><last>Zhou</last><variant script="hani"><first>国栋</first><last>周</last></variant></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last><variant script="hani"><first>国栋</first><last>周</last></variant></author>
       <pages>225–235</pages>
       <abstract>现有的对话系统中存在着生成“好的”、“我不知道”等无意义的安全回复问题。日常对话中,对话者通常围绕特定的主题进行讨论且每句话都有明显的情感和意图。因此该文提出了基于对话约束的回复生成模型,即在Seq2Seq模型的基础上,结合对对话的主题、情感、意图的识别。该方法对生成回复的主题、情感和意图进行约束,从而生成具有合理的情感和意图且与对话主题相关的回复。实验证明,该文提出的方法能有效地提高生成回复的质量。</abstract>
       <url hash="6b251be6">2020.ccl-1.22</url>
@@ -400,7 +400,7 @@
     <paper id="34">
       <title>小样本关系分类研究综述(Few-Shot Relation Classification: A Survey)</title>
       <author><first>Han</first><last>Hu</last><variant script="hani"><first>晗</first><last>胡</last></variant></author>
-      <author><first>Pengyuan</first><last>Liu</last><variant script="hani"><first>鹏远</first><last>刘</last></variant></author>
+      <author id="pengyuan-liu"><first>Pengyuan</first><last>Liu</last><variant script="hani"><first>鹏远</first><last>刘</last></variant></author>
       <pages>363–375</pages>
       <abstract>关系分类作为构建结构化知识的重要一环,在自然语言处理领域备受关注。但在很多应用领域中(医疗、金融领域),收集充足的用于训练关系分类模型的数据是十分困难的。近年来,仅需要少量训练样本的小样本学习研究逐渐新兴于各大领域。本文对近期小样本关系分类模型与方法进行了系统的综述。根据度量方法的不同,将现有方法分为原型式和分布式两大类。根据是否利用额外信息,将模型分为预训练和非预训练两大类。此外,除了常规设定下的小样本学习,本文还梳理了跨领域和稀缺资源场景下的小样本学习,并探讨了目前小样本关系分类方法的局限性,分析了跨领域小样本 学习面临的技术挑战。最后,展望了小样本关系分类未来的发展方向。</abstract>
       <url hash="c426fb06">2020.ccl-1.34</url>
@@ -413,7 +413,7 @@
       <author><first>Fan</first><last>Wu</last><variant script="hani"><first>凡</first><last>吴</last></variant></author>
       <author><first>Zhongqing</first><last>Wang</last><variant script="hani"><first>中卿</first><last>王</last></variant></author>
       <author><first>Peifeng</first><last>Li</last><variant script="hani"><first>培峰</first><last>李</last></variant></author>
-      <author><first>Qiaoming</first><last>Zhu</last><variant script="hani"><first>巧明</first><last>朱</last></variant></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last><variant script="hani"><first>巧明</first><last>朱</last></variant></author>
       <pages>376–389</pages>
       <abstract>传统的事件论元抽取方法把该任务当作句子中实体提及的多分类或序列标注任务,论元角色的类别在这些方法中只能作为向量表示,而忽略了论元角色的先验信息。实际上,论元角色的语义和论元本身有很大关系。对此,本文提议将其当作机器阅读理解任务,把论元角色表述为自然语言描述的问题,通过在上下文中回答这些问题来抽取论元。该方法更好地利用了论元角色类别的先验信息,在ACE2005中文语料上的实验证明了该方法的有效性。</abstract>
       <url hash="1ee01eeb">2020.ccl-1.35</url>
@@ -456,7 +456,7 @@
     <paper id="39">
       <title>“细粒度英汉机器翻译错误分析语料库”的构建与思考(Construction of Fine-Grained Error Analysis Corpus of <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>hinese Machine Translation and Its Implications)</title>
       <author><first>Bailian</first><last>Qiu</last><variant script="hani"><first>白莲</first><last>裘</last></variant></author>
-      <author><first>Mingwen</first><last>Wang</last><variant script="hani"><first>明文</first><last>王</last></variant></author>
+      <author id="mingwen-wang"><first>Mingwen</first><last>Wang</last><variant script="hani"><first>明文</first><last>王</last></variant></author>
       <author><first>Maoxi</first><last>Li</last><variant script="hani"><first>茂西</first><last>李</last></variant></author>
       <author><first>Cong</first><last>Chen</last><variant script="hani"><first>聪</first><last>陈</last></variant></author>
       <author><first>Fan</first><last>Xu</last><variant script="hani"><first>凡</first><last>徐</last></variant></author>
@@ -469,8 +469,8 @@
     <paper id="40">
       <title>层次化结构全局上下文增强的篇章级神经机器翻译(Hierarchical Global Context Augmented Document-level Neural Machine Translation)</title>
       <author><first>Linqing</first><last>Chen</last><variant script="hani"><first>林卿</first><last>陈</last></variant></author>
-      <author><first>Junhui</first><last>Li</last><variant script="hani"><first>军辉</first><last>李</last></variant></author>
-      <author><first>Zhengxian</first><last>Gong</last><variant script="hani"><first>正仙</first><last>贡</last></variant></author>
+      <author id="junhui-li"><first>Junhui</first><last>Li</last><variant script="hani"><first>军辉</first><last>李</last></variant></author>
+      <author id="zhengxian-gong"><first>Zhengxian</first><last>Gong</last><variant script="hani"><first>正仙</first><last>贡</last></variant></author>
       <pages>434–445</pages>
       <abstract>如何有效利用篇章上下文信息一直是篇章级神经机器翻译研究领域的一大挑战。本文提出利用来源于整个篇章的层次化全局上下文提高篇章级神经机器翻译性能。为了实现该目标,本文模型分别获取当前句内单词与篇章内所有句子及单词之间的依赖关系,结合不同层次的依赖关系以获取含有层次化篇章信息的全局上下文。最终源语言当前句子中的每个单词都能获取其独有的综合词和句级别依赖关系的上下文。为了充分利用平行句对语料在训练中的优势本文使用两步训练法,在句子级语料训练模型的基础上使用含有篇章信息的语料进行二次训练以获得捕获全局上下文的能力。在若干基准语料数据集上的实验表明本文提出的模型与若干强基准模型相比取得了有意义的翻译质量提升。实验进一步表明,结合层次化篇章信息的上下文比仅使用词级别上下文更具优势。除此之外,本文尝试通过不同方式将全局上下文与翻译模型结合并观察其对模型性能的影响,并初步探究篇章翻译中全局上下文在篇章中的分布情况。</abstract>
       <url hash="3360adfb">2020.ccl-1.40</url>
@@ -614,9 +614,9 @@
       <title>面向医学文本处理的医学实体标注规范(Medical Entity Annotation Standard for Medical Text Processing)</title>
       <author><first>Huan</first><last>Zhang</last><variant script="hani"><first>欢</first><last>张</last></variant></author>
       <author><first>Yuan</first><last>Zong</last><variant script="hani"><first>源</first><last>宗</last></variant></author>
-      <author><first>Baobao</first><last>Chang</last><variant script="hani"><first>宝宝</first><last>常</last></variant></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last><variant script="hani"><first>宝宝</first><last>常</last></variant></author>
       <author><first>Zhifang</first><last>Sui</last><variant script="hani"><first>志方</first><last>穗</last></variant></author>
-      <author><first>Hongying</first><last>Zan</last><variant script="hani"><first>红英</first><last>昝</last></variant></author>
+      <author id="hongying-zan"><first>Hongying</first><last>Zan</last><variant script="hani"><first>红英</first><last>昝</last></variant></author>
       <author><first>Kunli</first><last>Zhang</last><variant script="hani"><first>坤丽</first><last>张</last></variant></author>
       <pages>561–571</pages>
       <abstract>随着智慧医疗的普及,利用自然语言处理技术识别医学信息的需求日益增长。目前,针对医学实体而言,医学共享语料库仍处于空白状态,这对医学文本信息处理各项任务的进展造成了巨大阻力。如何判断不同的医学实体类别?如何界定不同实体间的涵盖范围?这些问题导致缺乏类似通用场景的大规模规范标注的医学文本数据。针对上述问题,该文参考了UMLS中定义的语义类型,提出面向医学文本信息处理的医学实体标注规范,涵盖了疾病、临床表现、医疗程序、医疗设备等9种医学实体,以及基于规范构建医学实体标注语料库。该文综述了标注规范的描述体系、分类原则、混淆处理、语料标注过程以及医学实体自动标注基线实验等相关问题,希望能为医学实体语料库的构建提供可参考的标注规范,以及为医学实体识别提供语料支持。</abstract>
@@ -653,7 +653,7 @@
     </paper>
     <paper id="55">
       <title><fixed-case>CDCPP</fixed-case>:跨领域中文标点符号预测(<fixed-case>CDCPP</fixed-case>: Cross-Domain <fixed-case>C</fixed-case>hinese Punctuation Prediction)</title>
-      <author><first>Pengyuan</first><last>Liu</last><variant script="hani"><first>鹏远</first><last>刘</last></variant></author>
+      <author id="pengyuan-liu"><first>Pengyuan</first><last>Liu</last><variant script="hani"><first>鹏远</first><last>刘</last></variant></author>
       <author><first>Weikang</first><last>Wang</last><variant script="hani"><first>伟康</first><last>王</last></variant></author>
       <author><first>Likun</first><last>Qiu</last><variant script="hani"><first>立坤</first><last>邱</last></variant></author>
       <author><first>Bingjie</first><last>Du</last><variant script="hani"><first>冰洁</first><last>杜</last></variant></author>
@@ -665,7 +665,7 @@
     </paper>
     <paper id="56">
       <title>多目标情感分类中文数据集构建及分析研究(Construction and Analysis of <fixed-case>C</fixed-case>hinese Multi-Target Sentiment Classification Dataset)</title>
-      <author><first>Pengyuan</first><last>Liu</last><variant script="hani"><first>鹏远</first><last>刘</last></variant></author>
+      <author id="pengyuan-liu"><first>Pengyuan</first><last>Liu</last><variant script="hani"><first>鹏远</first><last>刘</last></variant></author>
       <author><first>Yongsheng</first><last>Tian</last><variant script="hani"><first>永胜</first><last>田</last></variant></author>
       <author><first>Chengyu</first><last>Du</last><variant script="hani"><first>成玉</first><last>杜</last></variant></author>
       <author><first>Likun</first><last>Qiu</last><variant script="hani"><first>立坤</first><last>邱</last></variant></author>
@@ -704,7 +704,7 @@
       <author><first>Hengrui</first><last>Guo</last><variant script="hani"><first>恒睿</first><last>郭</last></variant></author>
       <author><first>Zhongqing</first><last>Wang</last><variant script="hani"><first>中卿</first><last>王</last></variant></author>
       <author><first>Peifeng</first><last>Li</last><variant script="hani"><first>培峰</first><last>李</last></variant></author>
-      <author><first>Qiaoming</first><last>Zhu</last><variant script="hani"><first>巧明</first><last>朱</last></variant></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last><variant script="hani"><first>巧明</first><last>朱</last></variant></author>
       <pages>634–644</pages>
       <abstract>面向社交媒体的事件聚类旨在根据事件特征对短文本聚类。目前,事件聚类模型主要分为无监督模型和有监督模型。无监督模型聚类效果较差,有监督模型依赖大量标注数据。基于此,本文提出了一种半监督事件聚类模型(SemiEC),该模型在小规模标注数据的基础上,利用LSTM表征事件,利用线性模型计算文本相似度,进行增量聚类,利用增量聚类产生的标注数据对模型再训练,结束后对不确定样本再聚类。实验表明,SemiEC的性能相比其他模型均有所提高。</abstract>
       <url hash="c13368f7">2020.ccl-1.59</url>
@@ -910,7 +910,7 @@
       <author><first>Xingchen</first><last>Li</last></author>
       <author><first>Mingtong</first><last>Liu</last></author>
       <author><first>Yujie</first><last>Zhang</last></author>
-      <author><first>Jinan</first><last>Xu</last></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last></author>
       <author><first>Yufeng</first><last>Chen</last></author>
       <pages>820–830</pages>
       <abstract>In Chinese dependency parsing, the joint model of word segmentation, POS tagging and dependency parsing has become the mainstream framework because it can eliminate error propagation and share knowledge, where the transition-based model with feature templates maintains the best performance. Recently, the graph-based joint model (Yan et al., 2019) on word segmentation and dependency parsing has achieved better performance, demonstrating the advantages of the graph-based models. However, this work can not provide POS information for downstream tasks, and the POS tagging task was proved to be helpful to the dependency parsing according to the research of the transition-based model. Therefore, we propose a graph-based joint model for Chinese word segmentation, POS tagging and dependency parsing. We designed a charater-level POS tagging task, and then train it jointly with the model of Yan et al. (2019). We adopt two methods of joint POS tagging task, one is by sharing parameters, the other is by using tag attention mechanism, which enables the three tasks to better share intermediate information and improve each other’s performance. The experimental results on the Penn Chinese treebank (CTB5) show that our proposed joint model improved by 0.38% on dependency parsing than the model of Yan et al. (2019). Compared with the best transition-based joint model, our model improved by 0.18%, 0.35% and 5.99% respectively in terms of word segmentation, POS tagging and dependency parsing.</abstract>
@@ -968,7 +968,7 @@
     <paper id="81">
       <title><fixed-case>C</fixed-case>hinese Long and Short Form Choice Exploiting Neural Network Language Modeling Approaches</title>
       <author><first>Lin</first><last>Li</last></author>
-      <author><first>Kees</first><last>van Deemter</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
       <author><first>Denis</first><last>Paperno</last></author>
       <pages>874–880</pages>
       <abstract>This paper presents our work in long and short form choice, a significant question of lexical choice, which plays an important role in many Natural Language Understanding tasks. Long and short form sharing at least one identical word meaning but with different number of syllables is a highly frequent linguistic phenomenon in Chinese like <i>老虎-虎(laohu-hu, tiger)</i></abstract>
@@ -1218,7 +1218,7 @@
       <title><fixed-case>CAN</fixed-case>-<fixed-case>GRU</fixed-case>: a Hierarchical Model for Emotion Recognition in Dialogue</title>
       <author><first>Ting</first><last>Jiang</last></author>
       <author><first>Bing</first><last>Xu</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <author><first>Sheng</first><last>Li</last></author>
       <pages>1101–1111</pages>
       <abstract>Emotion recognition in dialogue systems has gained attention in the field of natural language processing recent years, because it can be applied in opinion mining from public conversational data on social media. In this paper, we propose a hierarchical model to recognize emotions in the dialogue. In the first layer, in order to extract textual features of utterances, we propose a convolutional self-attention network(CAN). Convolution is used to capture n-gram information and attention mechanism is used to obtain the relevant semantic information among words in the utterance. In the second layer, a GRU-based network helps to capture contextual information in the conversation. Furthermore, we discuss the effects of unidirectional and bidirectional networks. We conduct experiments on Friends dataset and EmotionPush dataset. The results show that our proposed model(CAN-GRU) and its variants achieve better performance than baselines.</abstract>
@@ -1286,7 +1286,7 @@
       <author><first>Xu</first><last>Zhao</last></author>
       <author><first>Lei</first><last>Zhuang</last></author>
       <author><first>Qi</first><last>Xie</last></author>
-      <author><first>Hongying</first><last>Zan</last></author>
+      <author id="hongying-zan"><first>Hongying</first><last>Zan</last></author>
       <pages>1155–1165</pages>
       <abstract>The obstetric Electronic Medical Record (EMR) contains a large amount of medical data and health information. It plays a vital role in improving the quality of the diagnosis assistant service. In this paper, we treat the diagnosis assistant as a multi-label classification task and propose a Knowledge-Enabled Diagnosis Assistant (KEDA) model for the obstetric diagnosis assistant. We utilize the numerical information in EMRs and the external knowledge from Chinese Obstetric Knowledge Graph (COKG) to enhance the text representation of EMRs. Specifically, the bidirectional maximum matching method and similarity-based approach are used to obtain the entities set contained in EMRs and linked to the COKG. The final knowledge representation is obtained by a weight-based disease prediction algorithm, and it is fused with the text representation through a linear weighting method. Experiment results show that our approach can bring about +3.53 F1 score improvements upon the strong BERT baseline in the diagnosis assistant task.</abstract>
       <url hash="2a66410f">2020.ccl-1.107</url>
diff --git a/data/xml/2020.cl.xml b/data/xml/2020.cl.xml
index df25199de4..b684195154 100644
--- a/data/xml/2020.cl.xml
+++ b/data/xml/2020.cl.xml
@@ -16,7 +16,7 @@
       <author><first>Nadir</first><last>Durrani</last></author>
       <author><first>Fahim</first><last>Dalvi</last></author>
       <author><first>Hassan</first><last>Sajjad</last></author>
-      <author><first>James</first><last>Glass</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
       <doi>10.1162/coli_a_00367</doi>
       <abstract>Despite the recent success of deep neural networks in natural language processing and other spheres of artificial intelligence, their interpretability remains a challenge. We analyze the representations learned by neural machine translation (NMT) models at various levels of granularity and evaluate their quality through relevant extrinsic properties. In particular, we seek answers to the following questions: (i) How accurately is word structure captured within the learned representations, which is an important aspect in translating morphologically rich languages? (ii) Do the representations capture long-range dependencies, and effectively handle syntactically divergent languages? (iii) Do the representations capture lexical semantics? We conduct a thorough investigation along several parameters: (i) Which layers in the architecture capture each of these linguistic phenomena; (ii) How does the choice of translation unit (word, character, or subword unit) impact the linguistic properties captured by the underlying representations? (iii) Do the encoder and decoder learn differently and independently? (iv) Do the representations learned by multilingual NMT models capture the same amount of linguistic information as their bilingual counterparts? Our data-driven, quantitative evaluation illuminates important aspects in NMT models and their ability to capture various linguistic phenomena. We show that deep NMT models trained in an end-to-end fashion, without being provided any direct supervision during the training process, learn a non-trivial amount of linguistic information. Notable findings include the following observations: (i) Word morphology and part-of-speech information are captured at the lower layers of the model; (ii) In contrast, lexical semantics or non-local syntactic and semantic dependencies are better represented at the higher layers of the model; (iii) Representations learned using characters are more informed about word-morphology compared to those learned using subword units; and (iv) Representations learned by multilingual models are richer compared to bilingual models.</abstract>
       <pages>1–52</pages>
@@ -28,7 +28,7 @@
       <author><first>Li</first><last>Zhou</last></author>
       <author><first>Jianfeng</first><last>Gao</last></author>
       <author><first>Di</first><last>Li</last></author>
-      <author><first>Heung-Yeung</first><last>Shum</last></author>
+      <author id="heung-yeung-shum"><first>Heung-Yeung</first><last>Shum</last></author>
       <doi>10.1162/coli_a_00368</doi>
       <abstract>This article describes the development of Microsoft XiaoIce, the most popular social chatbot in the world. XiaoIce is uniquely designed as an artifical intelligence companion with an emotional connection to satisfy the human need for communication, affection, and social belonging. We take into account both intelligent quotient and emotional quotient in system design, cast human–machine social chat as decision-making over Markov Decision Processes, and optimize XiaoIce for long-term user engagement, measured in expected Conversation-turns Per Session (CPS). We detail the system architecture and key components, including dialogue manager, core chat, skills, and an empathetic computing module. We show how XiaoIce dynamically recognizes human feelings and states, understands user intent, and responds to user needs throughout long conversations. Since the release in 2014, XiaoIce has communicated with over 660 million active users and succeeded in establishing long-term relationships with many of them. Analysis of large-scale online logs shows that XiaoIce has achieved an average CPS of 23, which is significantly higher than that of other chatbots and even human conversations.</abstract>
       <pages>53–93</pages>
@@ -38,7 +38,7 @@
     <paper id="3">
       <title>An Empirical Study on Crosslingual Transfer in Probabilistic Topic Models</title>
       <author><first>Shudong</first><last>Hao</last></author>
-      <author><first>Michael J.</first><last>Paul</last></author>
+      <author id="michael-paul"><first>Michael J.</first><last>Paul</last></author>
       <doi>10.1162/coli_a_00369</doi>
       <abstract>Probabilistic topic modeling is a common first step in crosslingual tasks to enable knowledge transfer and extract multilingual features. Although many multilingual topic models have been developed, their assumptions about the training corpus are quite varied, and it is not clear how well the different models can be utilized under various training conditions. In this article, the knowledge transfer mechanisms behind different multilingual topic models are systematically studied, and through a broad set of experiments with four models on ten languages, we provide empirical insights that can inform the selection and future development of multilingual topic models.</abstract>
       <pages>95–134</pages>
@@ -48,7 +48,7 @@
     <paper id="4">
       <title>Data-Driven Sentence Simplification: Survey and Benchmark</title>
       <author><first>Fernando</first><last>Alva-Manchego</last></author>
-      <author><first>Carolina</first><last>Scarton</last></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <doi>10.1162/coli_a_00370</doi>
       <abstract>Sentence Simplification (SS) aims to modify a sentence in order to make it easier to read and understand. In order to do so, several rewriting transformations can be performed such as replacement, reordering, and splitting. Executing these transformations while keeping sentences grammatical, preserving their main idea, and generating simpler output, is a challenging and still far from solved problem. In this article, we survey research on SS, focusing on approaches that attempt to learn how to simplify using corpora of aligned original-simplified sentence pairs in English, which is the dominant paradigm nowadays. We also include a benchmark of different approaches on common data sets so as to compare them and highlight their strengths and limitations. We expect that this survey will serve as a starting point for researchers interested in the task and help spark new ideas for future developments.</abstract>
@@ -58,10 +58,10 @@
     </paper>
     <paper id="5">
       <title>Corpora Annotated with Negation: An Overview</title>
-      <author><first>Salud María</first><last>Jiménez-Zafra</last></author>
+      <author id="salud-maria-jimenez-zafra"><first>Salud María</first><last>Jiménez-Zafra</last></author>
       <author><first>Roser</first><last>Morante</last></author>
-      <author><first>María Teresa</first><last>Martín-Valdivia</last></author>
-      <author><first>L. Alfonso</first><last>Ureña-López</last></author>
+      <author id="m-teresa-martin-valdivia"><first>María Teresa</first><last>Martín-Valdivia</last></author>
+      <author id="l-alfonso-urena-lopez"><first>L. Alfonso</first><last>Ureña-López</last></author>
       <doi>10.1162/coli_a_00371</doi>
       <abstract>Negation is a universal linguistic phenomenon with a great qualitative impact on natural language processing applications. The availability of corpora annotated with negation is essential to training negation processing systems. Currently, most corpora have been annotated for English, but the presence of languages other than English on the Internet, such as Chinese or Spanish, is greater every day. In this study, we present a review of the corpora annotated with negation information in several languages with the goal of evaluating what aspects of negation have been annotated and how compatible the corpora are. We conclude that it is very difficult to merge the existing corpora because we found differences in the annotation schemes used, and most importantly, in the annotation guidelines: the way in which each corpus was tokenized and the negation elements that have been annotated. Differently than for other well established tasks like semantic role labeling or parsing, for negation there is no standard annotation scheme nor guidelines, which hampers progress in its treatment.</abstract>
       <pages>1–52</pages>
@@ -80,10 +80,10 @@
     </meta>
     <paper id="1">
       <title>Multilingual and Interlingual Semantic Representations for Natural Language Processing: A Brief Introduction</title>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
       <author><first>Cristina</first><last>España-Bonet</last></author>
       <author><first>Pascale</first><last>Fung</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <doi>10.1162/coli_a_00373</doi>
       <abstract>We introduce the Computational Linguistics special issue on Multilingual and Interlingual Semantic Representations for Natural Language Processing. We situate the special issue’s five articles in the context of our fast-changing field, explaining our motivation for this project. We offer a brief summary of the work in the issue, which includes developments on lexical and sentential semantic representations, from symbolic and neural perspectives.</abstract>
       <pages>249–255</pages>
@@ -92,8 +92,8 @@
     </paper>
     <paper id="2">
       <title>Unsupervised Word Translation with Adversarial Autoencoder</title>
-      <author><first>Tasnim</first><last>Mohiuddin</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="muhammad-tasnim-mohiuddin"><first>Tasnim</first><last>Mohiuddin</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <doi>10.1162/coli_a_00374</doi>
       <abstract>Crosslingual word embeddings learned from monolingual embeddings have a crucial role in many downstream tasks, ranging from machine translation to transfer learning. Adversarial training has shown impressive success in learning crosslingual embeddings and the associated word translation task without any parallel data by mapping monolingual embeddings to a shared space. However, recent work has shown superior performance for non-adversarial methods in more challenging language pairs. In this article, we investigate adversarial autoencoder for unsupervised word translation and propose two novel extensions to it that yield more stable training and improved results. Our method includes regularization terms to enforce cycle consistency and input reconstruction, and puts the target encoders as an adversary against the corresponding discriminator. We use two types of refinement procedures sequentially after obtaining the trained encoders and mappings from the adversarial training, namely, refinement with Procrustes solution and refinement with symmetric re-weighting. Extensive experimentations with high- and low-resource languages from two different data sets show that our method achieves better performance than existing adversarial and non-adversarial approaches and is also competitive with the supervised system. Along with performing comprehensive ablation studies to understand the contribution of different components of our adversarial model, we also conduct a thorough analysis of the refinement procedures to understand their effects.</abstract>
       <pages>257–288</pages>
@@ -113,7 +113,7 @@
     </paper>
     <paper id="4">
       <title><fixed-case>LINSPECTOR</fixed-case>: Multilingual Probing Tasks for Word Representations</title>
-      <author><first>Gözde Gül</first><last>Şahin</last></author>
+      <author id="gozde-gul-sahin"><first>Gözde Gül</first><last>Şahin</last></author>
       <author><first>Clara</first><last>Vania</last></author>
       <author><first>Ilia</first><last>Kuznetsov</last></author>
       <author><first>Iryna</first><last>Gurevych</last></author>
@@ -128,7 +128,7 @@
       <author><first>Raúl</first><last>Vázquez</last></author>
       <author><first>Alessandro</first><last>Raganato</last></author>
       <author><first>Mathias</first><last>Creutz</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <doi>10.1162/coli_a_00377</doi>
       <abstract>Neural machine translation has considerably improved the quality of automatic translations by learning good representations of input sentences. In this article, we explore a multilingual translation model capable of producing fixed-size sentence representations by incorporating an intermediate crosslingual shared layer, which we refer to as attention bridge. This layer exploits the semantics from each language and develops into a language-agnostic meaning representation that can be efficiently used for transfer learning. We systematically study the impact of the size of the attention bridge and the effect of including additional languages in the model. In contrast to related previous work, we demonstrate that there is no conflict between translation performance and the use of sentence representations in downstream tasks. In particular, we show that larger intermediate layers not only improve translation quality, especially for long sentences, but also push the accuracy of trainable classification tasks. Nevertheless, shorter representations lead to increased compression that is beneficial in non-trainable similarity tasks. Similarly, we show that trainable downstream tasks benefit from multilingual models, whereas additional language signals do not improve performance in non-trainable benchmarks. This is an important insight that helps to properly design models for specific applications. Finally, we also include an in-depth analysis of the proposed attention bridge and its ability to encode linguistic properties. We carefully analyze the information that is captured by individual attention heads and identify interesting patterns that explain the performance of specific settings in linguistic probing tasks.</abstract>
       <pages>387–424</pages>
@@ -139,7 +139,7 @@
       <title>Abstract Syntax as Interlingua: Scaling Up the Grammatical Framework from Controlled Languages to Robust Pipelines</title>
       <author><first>Aarne</first><last>Ranta</last></author>
       <author><first>Krasimir</first><last>Angelov</last></author>
-      <author><first>Normunds</first><last>Gruzitis</last></author>
+      <author id="normunds-gruzitis"><first>Normunds</first><last>Gruzitis</last></author>
       <author><first>Prasanth</first><last>Kolachina</last></author>
       <doi>10.1162/coli_a_00378</doi>
       <abstract>Abstract syntax is an interlingual representation used in compilers. Grammatical Framework (GF) applies the abstract syntax idea to natural languages. The development of GF started in 1998, first as a tool for controlled language implementations, where it has gained an established position in both academic and commercial projects. GF provides grammar resources for over 40 languages, enabling accurate generation and translation, as well as grammar engineering tools and components for mobile and Web applications. On the research side, the focus in the last ten years has been on scaling up GF to wide-coverage language processing. The concept of abstract syntax offers a unified view on many other approaches: Universal Dependencies, WordNets, FrameNets, Construction Grammars, and Abstract Meaning Representations. This makes it possible for GF to utilize data from the other approaches and to build robust pipelines. In return, GF can contribute to data-driven approaches by methods to transfer resources from one language to others, to augment data by rule-based generation, to check the consistency of hand-annotated corpora, and to pipe analyses into high-precision semantic back ends. This article gives an overview of the use of abstract syntax as interlingua through both established and emerging NLP applications involving GF.</abstract>
@@ -182,8 +182,8 @@
     </meta>
     <paper id="1">
       <title>Tractable <fixed-case>L</fixed-case>exical-<fixed-case>F</fixed-case>unctional <fixed-case>G</fixed-case>rammar</title>
-      <author><first>Jürgen</first><last>Wedekind</last></author>
-      <author><first>Ronald M.</first><last>Kaplan</last></author>
+      <author id="jurgen-wedekind"><first>Jürgen</first><last>Wedekind</last></author>
+      <author id="ronald-m-kaplan"><first>Ronald M.</first><last>Kaplan</last></author>
       <doi>10.1162/coli_a_00384</doi>
       <abstract>The formalism for Lexical-Functional Grammar (LFG) was introduced in the 1980s as one of the first constraint-based grammatical formalisms for natural language. It has led to substantial contributions to the linguistic literature and to the construction of large-scale descriptions of particular languages. Investigations of its mathematical properties have shown that, without further restrictions, the recognition, emptiness, and generation problems are undecidable, and that they are intractable in the worst case even with commonly applied restrictions. However, grammars of real languages appear not to invoke the full expressive power of the formalism, as indicated by the fact that algorithms and implementations for recognition and generation have been developed that run—even for broad-coverage grammars—in typically polynomial time. This article formalizes some restrictions on the notation and its interpretation that are compatible with conventions and principles that have been implicit or informally stated in linguistic theory. We show that LFG grammars that respect these restrictions, while still suitable for the description of natural languages, are equivalent to linear context-free rewriting systems and allow for tractable computation.</abstract>
       <pages>515–569</pages>
@@ -202,8 +202,8 @@
     </paper>
     <paper id="3">
       <title>Sentence Meaning Representations Across Languages: What Can We Learn from Existing Frameworks?</title>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <author><first>Magda</first><last>Ševčíková</last></author>
       <doi>10.1162/coli_a_00385</doi>
       <abstract>This article gives an overview of how sentence meaning is represented in eleven deep-syntactic frameworks, ranging from those based on linguistic theories elaborated for decades to rather lightweight NLP-motivated approaches. We outline the most important characteristics of each framework and then discuss how particular language phenomena are treated across those frameworks, while trying to shed light on commonalities as well as differences.</abstract>
@@ -267,7 +267,7 @@
       <author><first>Amrith</first><last>Krishna</last></author>
       <author><first>Bishal</first><last>Santra</last></author>
       <author><first>Ashim</first><last>Gupta</last></author>
-      <author><first>Pavankumar</first><last>Satuluri</last></author>
+      <author id="pavankumar-satuluri"><first>Pavankumar</first><last>Satuluri</last></author>
       <author><first>Pawan</first><last>Goyal</last></author>
       <doi>10.1162/coli_a_00390</doi>
       <abstract>We propose a framework using energy-based models for multiple structured prediction tasks in Sanskrit. Ours is an arc-factored model, similar to the graph-based parsing approaches, and we consider the tasks of word segmentation, morphological parsing, dependency parsing, syntactic linearization, and prosodification, a “prosody-level” task we introduce in this work. Ours is a search-based structured prediction framework, which expects a graph as input, where relevant linguistic information is encoded in the nodes, and the edges are then used to indicate the association between these nodes. Typically, the state-of-the-art models for morphosyntactic tasks in morphologically rich languages still rely on hand-crafted features for their performance. But here, we automate the learning of the feature function. The feature function so learned, along with the search space we construct, encode relevant linguistic information for the tasks we consider. This enables us to substantially reduce the training data requirements to as low as 10%, as compared to the data requirements for the neural state-of-the-art models. Our experiments in Czech and Sanskrit show the language-agnostic nature of the framework, where we train highly competitive models for both the languages. Moreover, our framework enables us to incorporate language-specific constraints to prune the search space and to filter the candidates during inference. We obtain significant improvements in morphosyntactic tasks for Sanskrit by incorporating language-specific constraints into the model. In all the tasks we discuss for Sanskrit, we either achieve state-of-the-art results or ours is the only data-driven solution for those tasks.</abstract>
diff --git a/data/xml/2020.clib.xml b/data/xml/2020.clib.xml
index 41ab77f7fd..66349c93bc 100644
--- a/data/xml/2020.clib.xml
+++ b/data/xml/2020.clib.xml
@@ -106,7 +106,7 @@
     <paper id="11">
       <title>It Takes Two to Tango – Towards a Multilingual <fixed-case>MWE</fixed-case> Resource</title>
       <author><first>Svetlozara</first><last>Leseva</last></author>
-      <author><first>Verginica Barbu</first><last>Mititelu</last></author>
+      <author id="verginica-barbu-mititelu"><first>Verginica Barbu</first><last>Mititelu</last></author>
       <author><first>Ivelina</first><last>Stoyanova</last></author>
       <pages>101–111</pages>
       <abstract>Mature wordnets offer the opportunity of digging out interesting linguistic information otherwise not explicitly marked in the network. The focus in this paper is on the ways the results already obtained at two levels, derivation and multiword expressions, may be further employed. The parallel recent development of the two resources under discussion, the Bulgarian and the Romanian wordnets, has enabled interlingual analyses that reveal similarities and differences between the linguistic knowledge encoded in the two wordnets. In this paper we show how the resources developed and the knowledge gained are put together towards devising a linked MWE resource that is informed by layered dictionary representation and corpus annotation and analysis. This work is a proof of concept for the adopted method of compiling a multilingual MWE resource on the basis of information extracted from the Bulgarian, the Romanian and the Princeton wordnet, as well as additional language resources and automatic procedures.</abstract>
@@ -150,7 +150,7 @@
     <paper id="16">
       <title>A Customizable <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Editor</title>
       <author><first>Andrei-Marius</first><last>Avram</last></author>
-      <author><first>Verginica</first><last>Barbu Mititelu</last></author>
+      <author id="verginica-barbu-mititelu"><first>Verginica</first><last>Barbu Mititelu</last></author>
       <pages>147–154</pages>
       <abstract>This paper presents an open-source wordnet editor that has been developed to ensure further expansion of the Romanian wordnet. It comes with a web interface that offers capabilities in selecting new synsets to be implemented, editing the list of literals and their sense numbers and adding these new synsets to the existing network, by importing from Princeton WordNet (and adjusting, when necessary) all the relations in which the newly created synsets and their literals are involved. The application also comes with an authorization mechanism that ensures control of the new synsets added in novice or lexicographer accounts. Although created to serve the current (more or less specific) needs in the development of the Romanian wordnet, it can be customized to fulfill new requirements from developers, either of the same wordnet or of a different one for which a similar approach is adopted.</abstract>
       <url hash="0c9305f2">2020.clib-1.16</url>
@@ -159,7 +159,7 @@
     <paper id="17">
       <title>Comparison of Genres in Word Sense Disambiguation using Automatically Generated Text Collections</title>
       <author><first>Angelina</first><last>Bolshina</last></author>
-      <author><first>Natalia</first><last>Loukachevitch</last></author>
+      <author id="natalia-loukachevitch"><first>Natalia</first><last>Loukachevitch</last></author>
       <pages>155–164</pages>
       <abstract>The best approaches in Word Sense Disambiguation (WSD) are supervised and rely on large amounts of hand-labelled data, which is not always available and costly to create. In our work we describe an approach that is used to create an automatically labelled collection based on the monosemous relatives (related unambiguous entries) for Russian. The main contribution of our work is that we extracted monosemous relatives that can be located at relatively long distances from a target ambiguous word and ranked them according to the similarity measure to the target sense. We evaluated word sense disambiguation models based on a nearest neighbour classification on BERT and ELMo embeddings and two text collections. Our work relies on the Russian wordnet RuWordNet.</abstract>
       <url hash="fa238967">2020.clib-1.17</url>
diff --git a/data/xml/2020.clinicalnlp.xml b/data/xml/2020.clinicalnlp.xml
index 3dbe93e382..98aa4fd357 100644
--- a/data/xml/2020.clinicalnlp.xml
+++ b/data/xml/2020.clinicalnlp.xml
@@ -64,7 +64,7 @@
       <title>Incorporating Risk Factor Embeddings in Pre-trained Transformers Improves Sentiment Prediction in Psychiatric Discharge Summaries</title>
       <author><first>Xiyu</first><last>Ding</last></author>
       <author><first>Mei-Hua</first><last>Hall</last></author>
-      <author><first>Timothy</first><last>Miller</last></author>
+      <author id="timothy-miller"><first>Timothy</first><last>Miller</last></author>
       <pages>35–40</pages>
       <abstract>Reducing rates of early hospital readmission has been recognized and identified as a key to improve quality of care and reduce costs. There are a number of risk factors that have been hypothesized to be important for understanding re-admission risk, including such factors as problems with substance abuse, ability to maintain work, relations with family. In this work, we develop Roberta-based models to predict the sentiment of sentences describing readmission risk factors in discharge summaries of patients with psychosis. We improve substantially on previous results by a scheme that shares information across risk factors while also allowing the model to learn risk factor-specific information.</abstract>
       <url hash="4a86ef4f">2020.clinicalnlp-1.4</url>
@@ -108,7 +108,7 @@
       <author><first>Jenny</first><last>Copara</last></author>
       <author><first>Yohan Bonescki</first><last>Gumiel</last></author>
       <author><first>Lucas Ferro Antunes de</first><last>Oliveira</last></author>
-      <author><first>Emerson Cabrera</first><last>Paraiso</last></author>
+      <author id="emerson-cabrera-paraiso"><first>Emerson Cabrera</first><last>Paraiso</last></author>
       <author><first>Douglas</first><last>Teodoro</last></author>
       <author><first>Cláudia Maria Cabral Moro</first><last>Barra</last></author>
       <pages>65–72</pages>
@@ -138,7 +138,7 @@
       <author><first>Tore</first><last>Gundersen</last></author>
       <author><first>Haldor</first><last>Husby</last></author>
       <author><first>Øystein</first><last>Nytrø</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <pages>79–84</pages>
       <abstract>Loss of consciousness, so-called syncope, is a commonly occurring symptom associated with worse prognosis for a number of heart-related diseases. We present a comparison of methods for a diagnosis classification task in Norwegian clinical notes, targeting syncope, i.e. fainting cases. We find that an often neglected baseline with keyword matching constitutes a rather strong basis, but more advanced methods do offer some improvement in classification performance, especially a convolutional neural network model. The developed pipeline is planned to be used for quantifying unregistered syncope cases in Norway.</abstract>
       <url hash="24c0f953">2020.clinicalnlp-1.9</url>
@@ -177,7 +177,7 @@
     </paper>
     <paper id="12">
       <title>Automatic recognition of abdominal lymph nodes from clinical text</title>
-      <author><first>Yifan</first><last>Peng</last></author>
+      <author id="yifan-peng-cmu"><first>Yifan</first><last>Peng</last></author>
       <author><first>Sungwon</first><last>Lee</last></author>
       <author><first>Daniel C.</first><last>Elton</last></author>
       <author><first>Thomas</first><last>Shen</last></author>
@@ -235,7 +235,7 @@
       <title>Knowledge Grounded Conversational Symptom Detection with Graph Memory Networks</title>
       <author><first>Hongyin</first><last>Luo</last></author>
       <author><first>Shang-Wen</first><last>Li</last></author>
-      <author><first>James</first><last>Glass</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
       <pages>136–145</pages>
       <abstract>In this work, we propose a novel goal-oriented dialog task, automatic symptom detection. We build a system that can interact with patients through dialog to detect and collect clinical symptoms automatically, which can save a doctor’s time interviewing the patient. Given a set of explicit symptoms provided by the patient to initiate a dialog for diagnosing, the system is trained to collect implicit symptoms by asking questions, in order to collect more information for making an accurate diagnosis. After getting the reply from the patient for each question, the system also decides whether current information is enough for a human doctor to make a diagnosis. To achieve this goal, we propose two neural models and a training pipeline for the multi-step reasoning task. We also build a knowledge graph as additional inputs to further improve model performance. Experiments show that our model significantly outperforms the baseline by 4%, discovering 67% of implicit symptoms on average with a limited number of questions.</abstract>
       <url hash="61beec04">2020.clinicalnlp-1.16</url>
@@ -272,11 +272,11 @@
       <author><first>Zixu</first><last>Wang</last></author>
       <author><first>Julia</first><last>Ive</last></author>
       <author><first>Sinead</first><last>Moylett</last></author>
-      <author><first>Christoph</first><last>Mueller</last></author>
+      <author id="christoph-muller"><first>Christoph</first><last>Mueller</last></author>
       <author><first>Rudolf</first><last>Cardinal</last></author>
       <author><first>Sumithra</first><last>Velupillai</last></author>
       <author><first>John</first><last>O’Brien</last></author>
-      <author><first>Robert</first><last>Stewart</last></author>
+      <author id="robert-stewart"><first>Robert</first><last>Stewart</last></author>
       <pages>168–177</pages>
       <abstract>While Dementia with Lewy Bodies (DLB) is the second most common type of neurodegenerative dementia following Alzheimer’s Disease (AD), it is difficult to distinguish from AD. We propose a method for DLB detection by using mental health record (MHR) documents from a (3-month) period before a patient has been diagnosed with DLB or AD. Our objective is to develop a model that could be clinically useful to differentiate between DLB and AD across datasets from different healthcare institutions. We cast this as a classification task using Convolutional Neural Network (CNN), an efficient neural model for text classification. We experiment with different representation models, and explore the features that contribute to model performances. In addition, we apply temperature scaling, a simple but efficient model calibration method, to produce more reliable predictions. We believe the proposed method has important potential for clinical applications using routine healthcare records, and for generalising to other relevant clinical record datasets. To the best of our knowledge, this is the first attempt to distinguish DLB from AD using mental health records, and to improve the reliability of DLB predictions.</abstract>
       <url hash="40631b32">2020.clinicalnlp-1.19</url>
@@ -299,7 +299,7 @@
     <paper id="21">
       <title>Extracting Relations between Radiotherapy Treatment Details</title>
       <author><first>Danielle</first><last>Bitterman</last></author>
-      <author><first>Timothy</first><last>Miller</last></author>
+      <author id="timothy-miller"><first>Timothy</first><last>Miller</last></author>
       <author><first>David</first><last>Harris</last></author>
       <author><first>Chen</first><last>Lin</last></author>
       <author><first>Sean</first><last>Finan</last></author>
@@ -365,8 +365,8 @@
     <paper id="25">
       <title>Learning from Unlabelled Data for Clinical Semantic Textual Similarity</title>
       <author><first>Yuxia</first><last>Wang</last></author>
-      <author><first>Karin</first><last>Verspoor</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>227–233</pages>
       <abstract>Domain pretraining followed by task fine-tuning has become the standard paradigm for NLP tasks, but requires in-domain labelled data for task fine-tuning. To overcome this, we propose to utilise domain unlabelled data by assigning pseudo labels from a general model. We evaluate the approach on two clinical STS datasets, and achieve r= 0.80 on N2C2-STS. Further investigation reveals that if the data distribution of unlabelled sentence pairs is closer to the test data, we can obtain better performance. By leveraging a large general-purpose STS dataset and small-scale in-domain training data, we obtain further improvements to r= 0.90, a new SOTA.</abstract>
       <url hash="3d5250b1">2020.clinicalnlp-1.25</url>
diff --git a/data/xml/2020.cllrd.xml b/data/xml/2020.cllrd.xml
index 2a7aebc441..46550a7561 100644
--- a/data/xml/2020.cllrd.xml
+++ b/data/xml/2020.cllrd.xml
@@ -4,8 +4,8 @@
     <meta>
       <booktitle>Proceedings of the LREC 2020 Workshop on "Citizen Linguistics in Language Resource Development"</booktitle>
       <editor><first>James</first><last>Fiumara</last></editor>
-      <editor><first>Christopher</first><last>Cieri</last></editor>
-      <editor><first>Mark</first><last>Liberman</last></editor>
+      <editor id="christopher-cieri"><first>Christopher</first><last>Cieri</last></editor>
+      <editor id="mark-liberman"><first>Mark</first><last>Liberman</last></editor>
       <editor><first>Chris</first><last>Callison-Burch</last></editor>
       <publisher>European Language Resources Association</publisher>
       <address>Marseille, France</address>
@@ -54,8 +54,8 @@
     <paper id="4">
       <title>Speaking Outside the Box: Exploring the Benefits of Unconstrained Input in Crowdsourcing and Citizen Science Platforms</title>
       <author><first>Jon</first><last>Chamberlain</last></author>
-      <author><first>Udo</first><last>Kruschwitz</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="udo-kruschwitz"><first>Udo</first><last>Kruschwitz</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>26–34</pages>
       <abstract>Crowdsourcing approaches provide a difficult design challenge for developers. There is a trade-off between the efficiency of the task to be done and the reward given to the user for participating, whether it be altruism, social enhancement, entertainment or money. This paper explores how crowdsourcing and citizen science systems collect data and complete tasks, illustrated by a case study from the online language game-with-a-purpose Phrase Detectives. The game was originally developed to be a constrained interface to prevent player collusion, but subsequently benefited from posthoc analysis of over 76k unconstrained inputs from users. Understanding the interface design and task deconstruction are critical for enabling users to participate in such systems and the paper concludes with a discussion of the idea that social networks can be viewed as form of citizen science platform with both constrained and unconstrained inputs making for a highly complex dataset.</abstract>
       <url hash="576c40f4">2020.cllrd-1.4</url>
@@ -66,7 +66,7 @@
       <title>Leveraging Non-Specialists for Accurate and Time Efficient <fixed-case>AMR</fixed-case> Annotation</title>
       <author><first>Mary</first><last>Martin</last></author>
       <author><first>Cecilia</first><last>Mauceri</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Christoffer</first><last>Heckman</last></author>
       <pages>35–39</pages>
       <abstract>Abstract Meaning Representations (AMRs), a syntax-free representation of phrase semantics are useful for capturing the meaning of a phrase and reflecting the relationship between concepts that are referred to. However, annotating AMRs are time consuming and expensive. The existing annotation process requires expertly trained workers who have knowledge of an extensive set of guidelines for parsing phrases. In this paper, we propose a cost-saving two-step process for the creation of a corpus of AMR-phrase pairs for spatial referring expressions. The first step uses non-specialists to perform simple annotations that can be leveraged in the second step to accelerate the annotation performed by the experts. We hypothesize that our process will decrease the cost per annotation and improve consistency across annotators. Few corpora of spatial referring expressions exist and the resulting language resource will be valuable for referring expression comprehension and generation modeling.</abstract>
diff --git a/data/xml/2020.clssts.xml b/data/xml/2020.clssts.xml
index fe033dc714..9254060057 100644
--- a/data/xml/2020.clssts.xml
+++ b/data/xml/2020.clssts.xml
@@ -3,10 +3,10 @@
   <volume id="1" type="proceedings">
     <meta>
       <booktitle>Proceedings of the workshop on Cross-Language Search and Summarization of Text and Speech (CLSSTS2020)</booktitle>
-      <editor><first>Kathy</first><last>McKeown</last></editor>
-      <editor><first>Douglas W.</first><last>Oard</last></editor>
+      <editor id="kathleen-mckeown"><first>Kathy</first><last>McKeown</last></editor>
+      <editor id="douglas-w-oard"><first>Douglas W.</first><last>Oard</last></editor>
       <editor><first/><last>Elizabeth</last></editor>
-      <editor><first>Richard</first><last>Schwartz</last></editor>
+      <editor id="richard-schwartz"><first>Richard</first><last>Schwartz</last></editor>
       <publisher>European Language Resources Association</publisher>
       <address>Marseille, France</address>
       <month>May</month>
@@ -99,7 +99,7 @@
       <author><first>Rabih</first><last>Zbib</last></author>
       <author><first>William</first><last>Hartmann</last></author>
       <author><first>Richard</first><last>Schwartz</last></author>
-      <author><first>John</first><last>Makhoul</last></author>
+      <author id="john-makhoul"><first>John</first><last>Makhoul</last></author>
       <pages>38–43</pages>
       <abstract>In the IARPA MATERIAL program, information retrieval (IR) is treated as a hard detection problem; the system has to output a single global ranking over all queries, and apply a hard threshold on this global list to come up with all the hypothesized relevant documents. This means that how queries are ranked relative to each other can have a dramatic impact on performance. In this paper, we study such a performance measure, the Average Query Weighted Value (AQWV), which is a combination of miss and false alarm rates. AQWV requires that the same detection threshold is applied to all queries. Hence, detection scores of different queries should be comparable, and, to do that, a score normalization technique (commonly used in keyword spotting from speech) should be used. We describe unsupervised methods for score normalization, which are borrowed from the speech field and adapted accordingly for IR, and demonstrate that they greatly improve AQWV on the task of cross-language information retrieval (CLIR), on three low-resource languages used in MATERIAL. We also present a novel supervised score normalization approach which gives additional gains.</abstract>
       <url hash="e7854247">2020.clssts-1.7</url>
@@ -119,7 +119,7 @@
       <author><first>Lingjun</first><last>Zhao</last></author>
       <author><first>Zhuolin</first><last>Jiang</last></author>
       <author><first>Richard</first><last>Schwartz</last></author>
-      <author><first>John</first><last>Makhoul</last></author>
+      <author id="john-makhoul"><first>John</first><last>Makhoul</last></author>
       <pages>44–51</pages>
       <abstract>In this paper, we describe a cross-lingual information retrieval (CLIR) system that, given a query in English, and a set of audio and text documents in a foreign language, can return a scored list of relevant documents, and present findings in a summary form in English. Foreign audio documents are first transcribed by a state-of-the-art pretrained multilingual speech recognition model that is finetuned to the target language. For text documents, we use multiple multilingual neural machine translation (MT) models to achieve good translation results, especially for low/medium resource languages. The processed documents and queries are then scored using a probabilistic CLIR model that makes use of the probability of translation from GIZA translation tables and scores from a Neural Network Lexical Translation Model (NNLTM). Additionally, advanced score normalization, combination, and thresholding schemes are employed to maximize the Average Query Weighted Value (AQWV) scores. The CLIR output, together with multiple translation renderings, are selected and translated into English snippets via a summarization model. Our turnkey system is language agnostic and can be quickly trained for a new low-resource language in few days.</abstract>
       <url hash="605f40f0">2020.clssts-1.8</url>
@@ -129,7 +129,7 @@
     <paper id="9">
       <title>What Set of Documents to Present to an Analyst?</title>
       <author><first>Richard</first><last>Schwartz</last></author>
-      <author><first>John</first><last>Makhoul</last></author>
+      <author id="john-makhoul"><first>John</first><last>Makhoul</last></author>
       <author><first>Lee</first><last>Tarlin</last></author>
       <author><first>Damianos</first><last>Karakos</last></author>
       <pages>52–57</pages>
@@ -155,7 +155,7 @@
     <paper id="11">
       <title>Subtitles to Segmentation: Improving Low-Resource Speech-to-<fixed-case>T</fixed-case>ext<fixed-case>T</fixed-case>ranslation Pipelines</title>
       <author><first>David</first><last>Wan</last></author>
-      <author><first>Zhengping</first><last>Jiang</last></author>
+      <author id="zheng-ping-jiang"><first>Zhengping</first><last>Jiang</last></author>
       <author><first>Chris</first><last>Kedzie</last></author>
       <author><first>Elsbeth</first><last>Turcan</last></author>
       <author><first>Peter</first><last>Bell</last></author>
diff --git a/data/xml/2020.cmcl.xml b/data/xml/2020.cmcl.xml
index 3c76c9a2be..fcc322da9e 100644
--- a/data/xml/2020.cmcl.xml
+++ b/data/xml/2020.cmcl.xml
@@ -4,9 +4,9 @@
     <meta>
       <booktitle>Proceedings of the Workshop on Cognitive Modeling and Computational Linguistics</booktitle>
       <editor><first>Emmanuele</first><last>Chersoni</last></editor>
-      <editor><first>Cassandra</first><last>Jacobs</last></editor>
+      <editor id="cassandra-l-jacobs"><first>Cassandra</first><last>Jacobs</last></editor>
       <editor><first>Yohei</first><last>Oseki</last></editor>
-      <editor><first>Laurent</first><last>Prévot</last></editor>
+      <editor id="laurent-prevot"><first>Laurent</first><last>Prévot</last></editor>
       <editor><first>Enrico</first><last>Santus</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Online</address>
@@ -44,7 +44,7 @@
     </paper>
     <paper id="3">
       <title>Production-based Cognitive Models as a Test Suite for Reinforcement Learning Algorithms</title>
-      <author><first>Adrian</first><last>Brasoveanu</last></author>
+      <author id="adrian-brasoveanu"><first>Adrian</first><last>Brasoveanu</last></author>
       <author><first>Jakub</first><last>Dotlacil</last></author>
       <pages>28–37</pages>
       <abstract>We introduce a framework in which production-rule based computational cognitive modeling and Reinforcement Learning can systematically interact and inform each other. We focus on linguistic applications because the sophisticated rule-based cognitive models needed to capture linguistic behavioral data promise to provide a stringent test suite for RL algorithms, connecting RL algorithms to both accuracy and reaction-time experimental data. Thus, we open a path towards assembling an experimentally rigorous and cognitively realistic benchmark for RL algorithms. We extend our previous work on lexical decision tasks and tabular RL algorithms (Brasoveanu and Dotlačil, 2020b) with a discussion of neural-network based approaches, and a discussion of how parsing can be formalized as an RL problem.</abstract>
@@ -84,7 +84,7 @@
     <paper id="7">
       <title>Development of Multi-level Linguistic Alignment in Child-adult Conversations</title>
       <author><first>Thomas</first><last>Misiek</last></author>
-      <author><first>Benoit</first><last>Favre</last></author>
+      <author id="benoit-favre"><first>Benoit</first><last>Favre</last></author>
       <author><first>Abdellah</first><last>Fourtassi</last></author>
       <pages>54–58</pages>
       <abstract>Interactive alignment is a major mechanism of linguistic coordination. Here we study the way this mechanism emerges in development across the lexical, syntactic, and conceptual levels. We leverage NLP tools to analyze a large-scale corpus of child-adult conversations between 2 and 5 years old. We found that, across development, children align consistently to adults above chance and that adults align consistently more to children than vice versa (even controlling for language production abilities). Besides these consistencies, we found a diversity of developmental trajectories across linguistic levels. These corpus-based findings provide strong support for an early onset of multi-level linguistic alignment in children and invites new experimental work.</abstract>
@@ -96,7 +96,7 @@
       <title>Conditioning, but on Which Distribution? Grammatical Gender in <fixed-case>G</fixed-case>erman Plural Inflection</title>
       <author><first>Kate</first><last>McCurdy</last></author>
       <author><first>Adam</first><last>Lopez</last></author>
-      <author><first>Sharon</first><last>Goldwater</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
       <pages>59–65</pages>
       <abstract>Grammatical gender is a consistent and informative cue to the plural class of German nouns. We find that neural encoder-decoder models learn to rely on this cue to predict plural class, but adult speakers are relatively insensitive to it. This suggests that the neural models are not an effective cognitive model of German plural formation.</abstract>
       <url hash="3173e9f7">2020.cmcl-1.8</url>
@@ -106,7 +106,7 @@
     <paper id="9">
       <title>Learning Pronoun Case from Distributional Cues: Flexible Frames for Case Acquisition</title>
       <author><first>Xiaomeng</first><last>Ma</last></author>
-      <author><first>Martin</first><last>Chodorow</last></author>
+      <author id="martin-chodorow"><first>Martin</first><last>Chodorow</last></author>
       <author><first>Virginia</first><last>Valian</last></author>
       <pages>66–74</pages>
       <abstract>Case is an abstract grammatical feature that indicates argument relationship in a sentence. In English, cases are expressed on pronouns, as nominative case (e.g. I, he), accusative case (e.g. me, him) and genitive case (e.g. my, his). Children correctly use cased pronouns at a very young age. How do they acquire abstract case in the first place, when different cases are not associated with different meanings? This paper proposes that the distributional patterns in parents’ input could be used to distinguish grammatical cases in English.</abstract>
diff --git a/data/xml/2020.cmlc.xml b/data/xml/2020.cmlc.xml
index 309ed892d9..f62ca6b762 100644
--- a/data/xml/2020.cmlc.xml
+++ b/data/xml/2020.cmlc.xml
@@ -7,7 +7,7 @@
       <editor><first>Adrien</first><last>Barbaresi</last></editor>
       <editor><first>Simon</first><last>Clematide</last></editor>
       <editor><first>Marc</first><last>Kupietz</last></editor>
-      <editor><first>Harald</first><last>Lüngen</last></editor>
+      <editor id="harald-lungen"><first>Harald</first><last>Lüngen</last></editor>
       <editor><first>Ines</first><last>Pisetta</last></editor>
       <publisher>European Language Ressources Association</publisher>
       <address>Marseille, France</address>
@@ -48,9 +48,9 @@
     <paper id="3">
       <title><fixed-case>F</fixed-case>rench Contextualized Word-Embeddings with a sip of <fixed-case>C</fixed-case>a<fixed-case>B</fixed-case>e<fixed-case>R</fixed-case>net: a New <fixed-case>F</fixed-case>rench Balanced Reference Corpus</title>
       <author><first>Murielle</first><last>Popa-Fabre</last></author>
-      <author><first>Pedro Javier</first><last>Ortiz Suárez</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
-      <author><first>Éric</first><last>de la Clergerie</last></author>
+      <author id="pedro-ortiz-suarez"><first>Pedro Javier</first><last>Ortiz Suárez</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Éric</first><last>de la Clergerie</last></author>
       <pages>15–23</pages>
       <abstract>This paper investigates the impact of different types and size of training corpora on language models. By asking the fundamental question of quality versus quantity, we compare four French corpora by pre-training four different ELMos and evaluating them on dependency parsing, POS-tagging and Named Entities Recognition downstream tasks. We present and asses the relevance of a new balanced French corpus, CaBeRnet, that features a representative range of language usage, including a balanced variety of genres (oral transcriptions, newspapers, popular magazines, technical reports, fiction, academic texts), in oral and written styles. We hypothesize that a linguistically representative corpus will allow the language models to be more efficient, and therefore yield better evaluation scores on different evaluation sets and tasks. This paper offers three main contributions: (1) two newly built corpora: (a) CaBeRnet, a French Balanced Reference Corpus and (b) CBT-fr a domain-specific corpus having both oral and written style in youth literature, (2) five versions of ELMo pre-trained on differently built corpora, and (3) a whole array of computational results on downstream tasks that deepen our understanding of the effects of corpus balance and register in NLP evaluation.</abstract>
       <url hash="e1d4beb6">2020.cmlc-1.3</url>
@@ -62,7 +62,7 @@
       <author><first>Rosa</first><last>Filgueira</last></author>
       <author><first>Claire</first><last>Grover</last></author>
       <author><first>Melissa</first><last>Terras</last></author>
-      <author><first>Beatrice</first><last>Alex</last></author>
+      <author id="beatrice-alex"><first>Beatrice</first><last>Alex</last></author>
       <pages>24–30</pages>
       <abstract>This paper describes work in progress on devising automatic and parallel methods for geoparsing large digital historical textual data by combining the strengths of three natural language processing (NLP) tools, the Edinburgh Geoparser, spaCy and defoe, and employing different tokenisation and named entity recognition (NER) techniques. We apply these tools to a large collection of nineteenth century Scottish geographical dictionaries, and describe preliminary results obtained when processing this data.</abstract>
       <url hash="a3c3df6b">2020.cmlc-1.4</url>
diff --git a/data/xml/2020.codi.xml b/data/xml/2020.codi.xml
index 30748b04ff..1dffefe32c 100644
--- a/data/xml/2020.codi.xml
+++ b/data/xml/2020.codi.xml
@@ -40,7 +40,7 @@
     <paper id="3">
       <title>Using Type Information to Improve Entity Coreference Resolution</title>
       <author><first>Sopan</first><last>Khosla</last></author>
-      <author><first>Carolyn</first><last>Rose</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rose</last></author>
       <pages>20–31</pages>
       <abstract>Coreference resolution (CR) is an essential part of discourse analysis. Most recently, neural approaches have been proposed to improve over SOTA models from earlier paradigms. So far none of the published neural models leverage external semantic knowledge such as type information. This paper offers the first such model and evaluation, demonstrating modest gains in accuracy by introducing either gold standard or predicted types. In the proposed approach, type information serves both to (1) improve mention representation and (2) create a soft type consistency check between coreference candidate mentions. Our evaluation covers two different grain sizes of types over four different benchmark corpora.</abstract>
       <url hash="4688eb6d">2020.codi-1.3</url>
@@ -73,7 +73,7 @@
     <paper id="6">
       <title>Exploring Coreference Features in Heterogeneous Data</title>
       <author><first>Ekaterina</first><last>Lapshinova-Koltunski</last></author>
-      <author><first>Kerstin</first><last>Kunz</last></author>
+      <author id="kerstin-kunz"><first>Kerstin</first><last>Kunz</last></author>
       <pages>53–64</pages>
       <abstract>The present paper focuses on variation phenomena in coreference chains. We address the hypothesis that the degree of structural variation between chain elements depends on language-specific constraints and preferences and, even more, on the communicative situation of language production. We define coreference features that also include reference to abstract entities and events. These features are inspired through several sources – cognitive parameters, pragmatic factors and typological status. We pay attention to the distributions of these features in a dataset containing English and German texts of spoken and written discourse mode, which can be classified into seven different registers. We apply text classification and feature selection to find out how these variational dimensions (language, mode and register) impact on coreference features. Knowledge on the variation under analysis is valuable for contrastive linguistics, translation studies and multilingual natural language processing (NLP), e.g. machine translation or cross-lingual coreference resolution.</abstract>
       <url hash="8d1943cb">2020.codi-1.6</url>
@@ -133,7 +133,7 @@
       <author><first>Youmna</first><last>Farag</last></author>
       <author><first>Josef</first><last>Valvoda</last></author>
       <author><first>Helen</first><last>Yannakoudakis</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <pages>102–112</pages>
       <abstract>In this work, we systematically investigate how well current models of coherence can capture aspects of text implicated in discourse organisation. We devise two datasets of various linguistic alterations that undermine coherence and test model sensitivity to changes in syntax and semantics. We furthermore probe discourse embedding space and examine the knowledge that is encoded in representations of coherence. We hope this study shall provide further insight into how to frame the task and improve models of coherence assessment further. Finally, we make our datasets publicly available as a resource for researchers to use to test discourse coherence models.</abstract>
       <url hash="475e34d1">2020.codi-1.11</url>
@@ -144,7 +144,7 @@
     <paper id="12">
       <title>Computational Interpretations of Recency for the Choice of Referring Expressions in Discourse</title>
       <author><first>Fahime</first><last>Same</last></author>
-      <author><first>Kees</first><last>van Deemter</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
       <pages>113–123</pages>
       <abstract>First, we discuss the most common linguistic perspectives on the concept of recency and propose a taxonomy of recency metrics employed in Machine Learning studies for choosing the form of referring expressions in discourse context. We then report on a Multi-Layer Perceptron study and a Sequential Forward Search experiment, followed by Bayes Factor analysis of the outcomes. The results suggest that recency metrics counting paragraphs and sentences contribute to referential choice prediction more than other recency-related metrics. Based on the results of our analysis, we argue that, sensitivity to discourse structure is important for recency metrics used in determining referring expression forms.</abstract>
       <url hash="48d2673d">2020.codi-1.12</url>
@@ -168,7 +168,7 @@
       <title>Extending Implicit Discourse Relation Recognition to the <fixed-case>PDTB</fixed-case>-3</title>
       <author><first>Li</first><last>Liang</last></author>
       <author><first>Zheng</first><last>Zhao</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <pages>135–147</pages>
       <abstract>The PDTB-3 contains many more Implicit discourse relations than the previous PDTB-2. This is in part because implicit relations have now been annotated within sentences as well as between them. In addition, some now co-occur with explicit discourse relations, instead of standing on their own. Here we show that while this can complicate the problem of identifying the location of implicit discourse relations, it can in turn simplify the problem of identifying their senses. We present data to support this claim, as well as methods that can serve as a non-trivial baseline for future state-of-the-art recognizers for implicit discourse relations.</abstract>
       <url hash="d2d7553c">2020.codi-1.14</url>
diff --git a/data/xml/2020.cogalex.xml b/data/xml/2020.cogalex.xml
index 4bf0859010..5830acc371 100644
--- a/data/xml/2020.cogalex.xml
+++ b/data/xml/2020.cogalex.xml
@@ -23,7 +23,7 @@
       <author><first>Lara</first><last>Müller</last></author>
       <author><first>Andre</first><last>Rölke</last></author>
       <author><first>Ralph</first><last>Radach</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>1–11</pages>
       <abstract>The corpus, from which a predictive language model is trained, can be considered the experience of a semantic system. We recorded everyday reading of two participants for two months on a tablet, generating individual corpus samples of 300/500K tokens. Then we trained word2vec models from individual corpora and a 70 million-sentence newspaper corpus to obtain individual and norm-based long-term memory structure. To test whether individual corpora can make better predictions for a cognitive task of long-term memory retrieval, we generated stimulus materials consisting of 134 sentences with uncorrelated individual and norm-based word probabilities. For the subsequent eye tracking study 1-2 months later, our regression analyses revealed that individual, but not norm-corpus-based word probabilities can account for first-fixation duration and first-pass gaze duration. Word length additionally affected gaze duration and total viewing duration. The results suggest that corpora representative for an individual’s long-term memory structure can better explain reading performance than a norm corpus, and that recently acquired information is lexically accessed rapidly.</abstract>
       <url hash="fc19dd91">2020.cogalex-1.1</url>
@@ -51,8 +51,8 @@
     <paper id="4">
       <title>Less is Better: A cognitively inspired unsupervised model for language segmentation</title>
       <author><first>Jinbiao</first><last>Yang</last></author>
-      <author><first>Stefan L.</first><last>Frank</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="stefan-l-frank"><first>Stefan L.</first><last>Frank</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <pages>33–45</pages>
       <abstract>Language users process utterances by segmenting them into many cognitive units, which vary in their sizes and linguistic levels. Although we can do such unitization/segmentation easily, its cognitive mechanism is still not clear. This paper proposes an unsupervised model, Less-is-Better (LiB), to simulate the human cognitive process with respect to language unitization/segmentation. LiB follows the principle of least effort and aims to build a lexicon which minimizes the number of unit tokens (alleviating the effort of analysis) and number of unit types (alleviating the effort of storage) at the same time on any given corpus. LiB’s workflow is inspired by empirical cognitive phenomena. The design makes the mechanism of LiB cognitively plausible and the computational requirement light-weight. The lexicon generated by LiB performs the best among different types of lexicons (e.g. ground-truth words) both from an information-theoretical view and a cognitive view, which suggests that the LiB lexicon may be a plausible proxy of the mental lexicon.</abstract>
       <url hash="57dc6bea">2020.cogalex-1.4</url>
@@ -91,7 +91,7 @@
     <paper id="8">
       <title><fixed-case>C</fixed-case>og<fixed-case>AL</fixed-case>ex-<fixed-case>VI</fixed-case> Shared Task: Bidirectional Transformer based Identification of Semantic Relations</title>
       <author><first>Saurav</first><last>Karmakar</last></author>
-      <author><first>John P.</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></author>
       <pages>65–71</pages>
       <abstract>This paper presents a bidirectional transformer based approach for recognising semantic relationships between a pair of words as proposed by CogALex VI shared task in 2020. The system presented here works by employing BERT embeddings of the words and passing the same over tuned neural network to produce a learning model for the pair of words and their relationships. Afterwards the very same model is used for the relationship between unknown words from the test set. CogALex VI provided Subtask 1 as the identification of relationship of three specific categories amongst English pair of words and the presented system opts to work on that. The resulted relationships of the unknown words are analysed here which shows a balanced performance in overall characteristics with some scope for improvement.</abstract>
       <url hash="560a438b">2020.cogalex-1.8</url>
@@ -109,8 +109,8 @@
     <paper id="10">
       <title>Definition Extraction Feature Analysis: From Canonical to Naturally-Occurring Definitions</title>
       <author><first>Mireia</first><last>Roig Mirapeix</last></author>
-      <author><first>Luis</first><last>Espinosa Anke</last></author>
-      <author><first>Jose</first><last>Camacho-Collados</last></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa Anke</last></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></author>
       <pages>81–91</pages>
       <abstract>Textual definitions constitute a fundamental source of knowledge when seeking the meaning of words, and they are the cornerstone of lexical resources like glossaries, dictionaries, encyclopedia or thesauri. In this paper, we present an in-depth analytical study on the main features relevant to the task of definition extraction. Our main goal is to study whether linguistic structures from canonical (the Aristotelian or genus et differentia model) can be leveraged to retrieve definitions from corpora in different domains of knowledge and textual genres alike. To this end, we develop a simple linear classifier and analyze the contribution of several (sets of) linguistic features. Finally, as a result of our experiments, we also shed light on the particularities of existing benchmarks as well as the most challenging aspects of the task.</abstract>
       <url hash="38348f5b">2020.cogalex-1.10</url>
@@ -143,7 +143,7 @@
     </paper>
     <paper id="14">
       <title>Translating Collocations: The Need for Task-driven Word Associations</title>
-      <author><first>Oi Yee</first><last>Kwong</last></author>
+      <author id="olivia-o-y-kwong"><first>Oi Yee</first><last>Kwong</last></author>
       <pages>112–116</pages>
       <abstract>Existing dictionaries may help collocation translation by suggesting associated words in the form of collocations, thesaurus, and example sentences. We propose to enhance them with task-driven word associations, illustrating the need by a few scenarios and outlining a possible approach based on word embedding. An example is given, using pre-trained word embedding, while more extensive investigation with more refined methods and resources is underway.</abstract>
       <url hash="ac742e0a">2020.cogalex-1.14</url>
@@ -172,7 +172,7 @@
       <title>Automatic Word Association Norms (<fixed-case>AWAN</fixed-case>)</title>
       <author><first>Jorge</first><last>Reyes-Magaña</last></author>
       <author><first>Gerardo</first><last>Sierra Martínez</last></author>
-      <author><first>Gemma</first><last>Bel-Enguix</last></author>
+      <author id="gemma-bel-enguix"><first>Gemma</first><last>Bel-Enguix</last></author>
       <author><first>Helena</first><last>Gomez-Adorno</last></author>
       <pages>142–153</pages>
       <abstract>Word Association Norms (WAN) are collections that present stimuli words and the set of their associated responses. The corpus is widely used in diverse areas of expertise. In order to reduce the effort to have a good quality resource that can be reproduced in many languages with minimum sources, a methodology to build Automatic Word Association Norms is proposed (AWAN). The methodology has an input of two simple elements: a) dictionary, and b) pre-processed Word Embeddings. This new kind of WAN is evaluated in two ways: i) learning word embeddings based on the node2vec algorithm and comparing them with human annotated benchmarks, and ii) performing a lexical search for a reverse dictionary. Both evaluations are done in a weighted graph with the AWAN lexical elements. The results showed that the methodology produces good quality AWANs.</abstract>
diff --git a/data/xml/2020.coling.xml b/data/xml/2020.coling.xml
index d5b3b1cda9..c50fa06035 100644
--- a/data/xml/2020.coling.xml
+++ b/data/xml/2020.coling.xml
@@ -3,9 +3,9 @@
   <volume id="main" ingest-date="2020-11-29" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 28th International Conference on Computational Linguistics</booktitle>
-      <editor><first>Donia</first><last>Scott</last></editor>
-      <editor><first>Nuria</first><last>Bel</last></editor>
-      <editor><first>Chengqing</first><last>Zong</last></editor>
+      <editor id="donia-scott"><first>Donia</first><last>Scott</last></editor>
+      <editor id="nuria-bel"><first>Nuria</first><last>Bel</last></editor>
+      <editor id="chengqing-zong"><first>Chengqing</first><last>Zong</last></editor>
       <publisher>International Committee on Computational Linguistics</publisher>
       <address>Barcelona, Spain (Online)</address>
       <month>December</month>
@@ -19,7 +19,7 @@
     <paper id="1">
       <title>Exploring Controllable Text Generation Techniques</title>
       <author><first>Shrimai</first><last>Prabhumoye</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <author><first>Ruslan</first><last>Salakhutdinov</last></author>
       <pages>1–14</pages>
       <abstract>Neural controllable text generation is an important area gaining attention due to its plethora of applications. Although there is a large body of prior work in controllable text generation, there is no unifying theme. In this work, we provide a new schema of the pipeline of the generation process by classifying it into five modules. The control of attributes in the generation process requires modification of these modules. We present an overview of different techniques used to perform the modulation of these modules. We also provide an analysis on the advantages and disadvantages of these techniques. We further pave ways to develop new architectures based on the combination of the modules described in this paper.</abstract>
@@ -95,7 +95,7 @@
       <author><first>Tao</first><last>Zhang</last></author>
       <author><first>Congying</first><last>Xia</last></author>
       <author><first>Chun-Ta</first><last>Lu</last></author>
-      <author><first>Philip</first><last>Yu</last></author>
+      <author id="philip-s-yu"><first>Philip</first><last>Yu</last></author>
       <pages>77–87</pages>
       <abstract>Named entity typing (NET) is a classification task of assigning an entity mention in the context with given semantic types. However, with the growing size and granularity of the entity types, few previous researches concern with newly emerged entity types. In this paper, we propose MZET, a novel memory augmented FNET (Fine-grained NET) model, to tackle the unseen types in a zero-shot manner. MZET incorporates character-level, word-level, and contextural-level information to learn the entity mention representation. Besides, MZET considers the semantic meaning and the hierarchical structure into the entity type representation. Finally, through the memory component which models the relationship between the entity mention and the entity type, MZET transfers the knowledge from seen entity types to the zero-shot ones. Extensive experiments on three public datasets show the superior performance obtained by MZET, which surpasses the state-of-the-art FNET neural network models with up to 8% gain in Micro-F1 and Macro-F1 score.</abstract>
       <url hash="19703ff3">2020.coling-main.7</url>
@@ -267,7 +267,7 @@
       <title>Understanding Pre-trained <fixed-case>BERT</fixed-case> for Aspect-based Sentiment Analysis</title>
       <author><first>Hu</first><last>Xu</last></author>
       <author><first>Lei</first><last>Shu</last></author>
-      <author><first>Philip</first><last>Yu</last></author>
+      <author id="philip-s-yu"><first>Philip</first><last>Yu</last></author>
       <author><first>Bing</first><last>Liu</last></author>
       <pages>244–250</pages>
       <abstract>This paper analyzes the pre-trained hidden representations learned from reviews on BERT for tasks in aspect-based sentiment analysis (ABSA). Our work is motivated by the recent progress in BERT-based language models for ABSA. However, it is not clear how the general proxy task of (masked) language model trained on unlabeled corpus without annotations of aspects or opinions can provide important features for downstream tasks in ABSA. By leveraging the annotated datasets in ABSA, we investigate both the attentions and the learned representations of BERT pre-trained on reviews. We found that BERT uses very few self-attention heads to encode context words (such as prepositions or pronouns that indicating an aspect) and opinion words for an aspect. Most features in the representation of an aspect are dedicated to the fine-grained semantics of the domain (or product category) and the aspect itself, instead of carrying summarized opinions from its context. We hope this investigation can help future research in improving self-supervised learning, unsupervised learning and fine-tuning for ABSA. The pre-trained model and code can be found at <url>https://github.com/howardhsu/BERT-for-RRC-ABSA</url>.</abstract>
@@ -288,7 +288,7 @@
     <paper id="23">
       <title>Improving Sentiment Analysis over non-<fixed-case>E</fixed-case>nglish Tweets using Multilingual Transformers and Automatic Translation for Data-Augmentation</title>
       <author><first>Valentin</first><last>Barriere</last></author>
-      <author><first>Alexandra</first><last>Balahur</last></author>
+      <author id="alexandra-balahur"><first>Alexandra</first><last>Balahur</last></author>
       <pages>266–271</pages>
       <abstract>Tweets are specific text data when compared to general text. Although sentiment analysis over tweets has become very popular in the last decade for English, it is still difficult to find huge annotated corpora for non-English languages. The recent rise of the transformer models in Natural Language Processing allows to achieve unparalleled performances in many tasks, but these models need a consequent quantity of text to adapt to the tweet domain. We propose the use of a multilingual transformer model, that we pre-train over English tweets on which we apply data-augmentation using automatic translation to adapt the model to non-English languages. Our experiments in French, Spanish, German and Italian suggest that the proposed technique is an efficient way to improve the results of the transformers over small corpora of tweets in a non-English language.</abstract>
       <url hash="afe779aa">2020.coling-main.23</url>
@@ -350,7 +350,7 @@
       <author><first>Shogo</first><last>Fujita</last></author>
       <author><first>Hidetaka</first><last>Kamigaito</last></author>
       <author><first>Hiroya</first><last>Takamura</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>316–327</pages>
       <abstract>We tackle the task of automatically generating a function name from source code. Existing generators face difficulties in generating low-frequency or out-of-vocabulary subwords. In this paper, we propose two strategies for copying low-frequency or out-of-vocabulary subwords in inputs. Our best performing model showed an improvement over the conventional method in terms of our modified F1 and accuracy on the Java-small and Java-large datasets.</abstract>
       <url hash="0ab31438">2020.coling-main.28</url>
@@ -372,7 +372,7 @@
     <paper id="30">
       <title><fixed-case>CEREC</fixed-case>: A Corpus for Entity Resolution in Email Conversations</title>
       <author><first>Parag Pravin</first><last>Dakle</last></author>
-      <author><first>Dan</first><last>Moldovan</last></author>
+      <author id="dan-moldovan"><first>Dan</first><last>Moldovan</last></author>
       <pages>339–349</pages>
       <abstract>We present the first large scale corpus for entity resolution in email conversations (CEREC). The corpus consists of 6001 email threads from the Enron Email Corpus containing 36,448 email messages and 38,996 entity coreference chains. The annotation is carried out as a two-step process with minimal manual effort. Experiments are carried out for evaluating different features and performance of four baselines on the created corpus. For the task of mention identification and coreference resolution, a best performance of 54.1 F1 is reported, highlighting the room for improvement. An in-depth qualitative and quantitative error analysis is presented to understand the limitations of the baselines considered.</abstract>
       <url hash="5fb06018">2020.coling-main.30</url>
@@ -383,7 +383,7 @@
       <title><fixed-case>SQL</fixed-case> Generation via Machine Reading Comprehension</title>
       <author><first>Zeyu</first><last>Yan</last></author>
       <author><first>Jianqiang</first><last>Ma</last></author>
-      <author id="yang-zhang"><first>Yang</first><last>Zhang</last></author>
+      <author><first>Yang</first><last>Zhang</last></author>
       <author><first>Jianping</first><last>Shen</last></author>
       <pages>350–356</pages>
       <abstract>Text-to-SQL systems offers natural language interfaces to databases, which can automatically generates SQL queries given natural language questions. On the WikiSQL benchmark, state-of- the-art text-to-SQL systems typically take a slot-filling approach by building several specialized models for each type of slot. Despite being effective, such modularized systems are complex and also fall short in jointly learning for different slots. To solve these problems, this paper proposes a novel approach that formulates the task as a question answering problem, where different slots are predicted by a unified machine reading comprehension (MRC) model. For this purpose, we use a BERT-based MRC model, which can also benefit from intermediate training on other MRC datasets. The proposed method can achieve competitive results on WikiSQL, suggesting it being a promising direction for text-to-SQL.</abstract>
@@ -397,7 +397,7 @@
       <author><first>Yousuf</first><last>Ali Mohammed</last></author>
       <author><first>Sandra</first><last>Derbring</last></author>
       <author><first>Arild</first><last>Matsson</last></author>
-      <author><first>Beata</first><last>Megyesi</last></author>
+      <author id="beata-megyesi"><first>Beata</first><last>Megyesi</last></author>
       <pages>357–369</pages>
       <abstract>This article reports on an ongoing project aiming at automatization of pseudonymization of learner essays. The process includes three steps: identification of personal information in an unstructured text, labeling for a category, and pseudonymization. We experiment with rule-based methods for detection of 15 categories out of the suggested 19 (Megyesi et al., 2018) that we deem important and/or doable with automatic approaches. For the detection and labeling steps,we use resources covering personal names, geographic names, company and university names and others. For the pseudonymization step, we replace the item using another item of the same type from the above-mentioned resources. Evaluation of the detection and labeling steps are made on a set of manually anonymized essays. The results are promising and show that 89% of the personal information can be successfully identified in learner data, and annotated correctly with an inter-annotator agreement of 86% measured as Fleiss kappa and Krippendorff’s alpha.</abstract>
       <url hash="0d4ab50e">2020.coling-main.32</url>
@@ -442,7 +442,7 @@
     <paper id="36">
       <title>Leveraging <fixed-case>HTML</fixed-case> in Free Text Web Named Entity Recognition</title>
       <author><first>Colin</first><last>Ashby</last></author>
-      <author><first>David</first><last>Weir</last></author>
+      <author id="david-weir"><first>David</first><last>Weir</last></author>
       <pages>407–413</pages>
       <abstract>HTML tags are typically discarded in free text Named Entity Recognition from Web pages. We investigate whether these discarded tags might be used to improve NER performance. We compare Text+Tags sentences with their Text-Only equivalents, over five datasets, two free text segmentation granularities and two NER models. We find an increased F1 performance for Text+Tags of between 0.9% and 13.2% over all datasets, variants and models. This performance increase, over datasets of varying entity types, HTML density and construction quality, indicates our method is flexible and adaptable. These findings imply that a similar technique might be of use in other Web-aware NLP tasks, including the enrichment of deep language models.</abstract>
       <url hash="3fabc411">2020.coling-main.36</url>
@@ -504,7 +504,7 @@
       <author><first>Hsien-chin</first><last>Lin</last></author>
       <author><first>Marco</first><last>Moresi</last></author>
       <author><first>Carel</first><last>van Niekerk</last></author>
-      <author><first>Milica</first><last>Gasic</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gasic</last></author>
       <pages>465–479</pages>
       <abstract>Reinforcement learning (RL) can enable task-oriented dialogue systems to steer the conversation towards successful task completion. In an end-to-end setting, a response can be constructed in a word-level sequential decision making process with the entire system vocabulary as action space. Policies trained in such a fashion do not require expert-defined action spaces, but they have to deal with large action spaces and long trajectories, making RL impractical. Using the latent space of a variational model as action space alleviates this problem. However, current approaches use an uninformed prior for training and optimize the latent distribution solely on the context. It is therefore unclear whether the latent representation truly encodes the characteristics of different actions. In this paper, we explore three ways of leveraging an auxiliary task to shape the latent variable distribution: via pre-training, to obtain an informed prior, and via multitask learning. We choose response auto-encoding as the auxiliary task, as this captures the generative factors of dialogue responses while requiring low computational cost and neither additional data nor labels. Our approach yields a more action-characterized latent representations which support end-to-end dialogue policy optimization and achieves state-of-the-art success rates. These results warrant a more wide-spread use of RL in end-to-end dialogue models.</abstract>
       <url hash="5bfb899f">2020.coling-main.41</url>
@@ -514,7 +514,7 @@
     <paper id="42">
       <title>Recent Neural Methods on Slot Filling and Intent Classification for Task-Oriented Dialogue Systems: A Survey</title>
       <author><first>Samuel</first><last>Louvan</last></author>
-      <author><first>Bernardo</first><last>Magnini</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
       <pages>480–496</pages>
       <abstract>In recent years, fostered by deep learning technologies and by the high demand for conversational AI, various approaches have been proposed that address the capacity to elicit and understand user’s needs in task-oriented dialogue systems. We focus on two core tasks, slot filling (SF) and intent classification (IC), and survey how neural based models have rapidly evolved to address natural language understanding in dialogue systems. We introduce three neural architectures: independent models, which model SF and IC separately, joint models, which exploit the mutual benefit of the two tasks simultaneously, and transfer learning models, that scale the model to new domains. We discuss the current state of the research in SF and IC, and highlight challenges that still require attention.</abstract>
       <url hash="ba25b7f7">2020.coling-main.42</url>
@@ -599,7 +599,7 @@
     <paper id="49">
       <title><fixed-case>S</fixed-case>enti<fixed-case>X</fixed-case>: A Sentiment-Aware Pre-Trained Model for Cross-Domain Sentiment Analysis</title>
       <author><first>Jie</first><last>Zhou</last></author>
-      <author><first>Junfeng</first><last>Tian</last></author>
+      <author id="junfeng-tian"><first>Junfeng</first><last>Tian</last></author>
       <author><first>Rui</first><last>Wang</last></author>
       <author><first>Yuanbin</first><last>Wu</last></author>
       <author><first>Wenming</first><last>Xiao</last></author>
@@ -653,7 +653,7 @@
       <title>Modeling Local Contexts for Joint Dialogue Act Recognition and Sentiment Classification with Bi-channel Dynamic Convolutions</title>
       <author><first>Jingye</first><last>Li</last></author>
       <author><first>Hao</first><last>Fei</last></author>
-      <author><first>Donghong</first><last>Ji</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
       <pages>616–626</pages>
       <abstract>In this paper, we target improving the joint dialogue act recognition (DAR) and sentiment classification (SC) tasks by fully modeling the local contexts of utterances. First, we employ the dynamic convolution network (DCN) as the utterance encoder to capture the dialogue contexts. Further, we propose a novel context-aware dynamic convolution network (CDCN) to better leverage the local contexts when dynamically generating kernels. We extended our frameworks into bi-channel version (i.e., BDCN and BCDCN) under multi-task learning to achieve the joint DAR and SC. Two channels can learn their own feature representations for DAR and SC, respectively, but with latent interaction. Besides, we suggest enhancing the tasks by employing the DiaBERT language model. Our frameworks obtain state-of-the-art performances against all baselines on two benchmark datasets, demonstrating the importance of modeling the local contexts.</abstract>
       <url hash="a59a854b">2020.coling-main.53</url>
@@ -684,10 +684,10 @@
     </paper>
     <paper id="56">
       <title>A Joint Learning Approach based on Self-Distillation for Keyphrase Extraction from Scientific Documents</title>
-      <author><first>Tuan</first><last>Lai</last></author>
-      <author><first>Trung</first><last>Bui</last></author>
+      <author id="tuan-lai"><first>Tuan</first><last>Lai</last></author>
+      <author id="trung-bui"><first>Trung</first><last>Bui</last></author>
       <author><first>Doo Soon</first><last>Kim</last></author>
-      <author><first>Quan Hung</first><last>Tran</last></author>
+      <author id="quan-hung-tran"><first>Quan Hung</first><last>Tran</last></author>
       <pages>649–656</pages>
       <abstract>Keyphrase extraction is the task of extracting a small set of phrases that best describe a document. Most existing benchmark datasets for the task typically have limited numbers of annotated documents, making it challenging to train increasingly complex neural networks. In contrast, digital libraries store millions of scientific articles online, covering a wide range of topics. While a significant portion of these articles contain keyphrases provided by their authors, most other articles lack such kind of annotations. Therefore, to effectively utilize these large amounts of unlabeled articles, we propose a simple and efficient joint learning approach based on the idea of self-distillation. Experimental results show that our approach consistently improves the performance of baseline models for keyphrase extraction. Furthermore, our best models outperform previous methods for the task, achieving new state-of-the-art results on two public benchmarks: Inspec and SemEval-2017.</abstract>
       <url hash="6dbfdb10">2020.coling-main.56</url>
@@ -811,7 +811,7 @@
       <author><first>Fajri</first><last>Koto</last></author>
       <author><first>Afshin</first><last>Rahimi</last></author>
       <author><first>Jey Han</first><last>Lau</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>757–770</pages>
       <abstract>Although the Indonesian language is spoken by almost 200 million people and the 10th most spoken language in the world, it is under-represented in NLP research. Previous work on Indonesian has been hampered by a lack of annotated datasets, a sparsity of language resources, and a lack of resource standardization. In this work, we release the IndoLEM dataset comprising seven tasks for the Indonesian language, spanning morpho-syntax, semantics, and discourse. We additionally release IndoBERT, a new pre-trained language model for Indonesian, and evaluate it over IndoLEM, in addition to benchmarking it against existing resources. Our experiments show that IndoBERT achieves state-of-the-art performance over most of the tasks in IndoLEM.</abstract>
       <url hash="ba234f32">2020.coling-main.66</url>
@@ -858,7 +858,7 @@
       <title>Attention Transfer Network for Aspect-level Sentiment Classification</title>
       <author><first>Fei</first><last>Zhao</last></author>
       <author><first>Zhen</first><last>Wu</last></author>
-      <author><first>Xinyu</first><last>Dai</last></author>
+      <author id="xinyu-dai"><first>Xinyu</first><last>Dai</last></author>
       <pages>811–821</pages>
       <abstract>Aspect-level sentiment classification (ASC) aims to detect the sentiment polarity of a given opinion target in a sentence. In neural network-based methods for ASC, most works employ the attention mechanism to capture the corresponding sentiment words of the opinion target, then aggregate them as evidence to infer the sentiment of the target. However, aspect-level datasets are all relatively small-scale due to the complexity of annotation. Data scarcity causes the attention mechanism sometimes to fail to focus on the corresponding sentiment words of the target, which finally weakens the performance of neural models. To address the issue, we propose a novel Attention Transfer Network (ATN) in this paper, which can successfully exploit attention knowledge from resource-rich document-level sentiment classification datasets to improve the attention capability of the aspect-level sentiment classification task. In the ATN model, we design two different methods to transfer attention knowledge and conduct experiments on two ASC benchmark datasets. Extensive experimental results show that our methods consistently outperform state-of-the-art works. Further analysis also validates the effectiveness of ATN.</abstract>
       <url hash="a011723c">2020.coling-main.70</url>
@@ -1024,7 +1024,7 @@
       <title>A High Precision Pipeline for Financial Knowledge Graph Construction</title>
       <author><first>Sarah</first><last>Elhammadi</last></author>
       <author><first>Laks</first><last>V.S. Lakshmanan</last></author>
-      <author><first>Raymond</first><last>Ng</last></author>
+      <author id="raymond-ng"><first>Raymond</first><last>Ng</last></author>
       <author><first>Michael</first><last>Simpson</last></author>
       <author><first>Baoxing</first><last>Huai</last></author>
       <author><first>Zhefeng</first><last>Wang</last></author>
@@ -1051,9 +1051,9 @@
       <title>Answering Legal Questions by Learning Neural Attentive Text Representation</title>
       <author><first>Phi Manh</first><last>Kien</last></author>
       <author><first>Ha-Thanh</first><last>Nguyen</last></author>
-      <author><first>Ngo Xuan</first><last>Bach</last></author>
+      <author id="ngo-xuan-bach"><first>Ngo Xuan</first><last>Bach</last></author>
       <author><first>Vu</first><last>Tran</last></author>
-      <author><first>Minh Le</first><last>Nguyen</last></author>
+      <author id="minh-le-nguyen"><first>Minh Le</first><last>Nguyen</last></author>
       <author><first>Tu Minh</first><last>Phuong</last></author>
       <pages>988–998</pages>
       <abstract>Text representation plays a vital role in retrieval-based question answering, especially in the legal domain where documents are usually long and complicated. The better the question and the legal documents are represented, the more accurate they are matched. In this paper, we focus on the task of answering legal questions at the article level. Given a legal question, the goal is to retrieve all the correct and valid legal articles, that can be used as the basic to answer the question. We present a retrieval-based model for the task by learning neural attentive text representation. Our text representation method first leverages convolutional neural networks to extract important information in a question and legal articles. Attention mechanisms are then used to represent the question and articles and select appropriate information to align them in a matching process. Experimental results on an annotated corpus consisting of 5,922 Vietnamese legal questions show that our model outperforms state-of-the-art retrieval-based methods for question answering by large margins in terms of both recall and NDCG.</abstract>
@@ -1065,7 +1065,7 @@
       <title>Joint Transformer/<fixed-case>RNN</fixed-case> Architecture for Gesture Typing in Indic Languages</title>
       <author><first>Emil</first><last>Biju</last></author>
       <author><first>Anirudh</first><last>Sriram</last></author>
-      <author><first>Mitesh M.</first><last>Khapra</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh M.</first><last>Khapra</last></author>
       <author><first>Pratyush</first><last>Kumar</last></author>
       <pages>999–1010</pages>
       <abstract>Gesture typing is a method of typing words on a touch-based keyboard by creating a continuous trace passing through the relevant keys. This work is aimed at developing a keyboard that supports gesture typing in Indic languages. We begin by noting that when dealing with Indic languages, one needs to cater to two different sets of users: (i) users who prefer to type in the native Indic script (Devanagari, Bengali, etc.) and (ii) users who prefer to type in the English script but want the transliterated output in the native script. In both cases, we need a model that takes a trace as input and maps it to the intended word. To enable the development of these models, we create and release two datasets. First, we create a dataset containing keyboard traces for 193,658 words from 7 Indic languages. Second, we curate 104,412 English-Indic transliteration pairs from Wikidata across these languages. Using these datasets we build a model that performs path decoding, transliteration and transliteration correction. Unlike prior approaches, our proposed model does not make co-character independence assumptions during decoding. The overall accuracy of our model across the 7 languages varies from 70-95%.</abstract>
@@ -1077,7 +1077,7 @@
       <title>Automatic Charge Identification from Facts: A Few Sentence-Level Charge Annotations is All You Need</title>
       <author><first>Shounak</first><last>Paul</last></author>
       <author><first>Pawan</first><last>Goyal</last></author>
-      <author id="saptarshi-ghosh"><first>Saptarshi</first><last>Ghosh</last></author>
+      <author><first>Saptarshi</first><last>Ghosh</last></author>
       <pages>1011–1022</pages>
       <abstract>Automatic Charge Identification (ACI) is the task of identifying the relevant charges given the facts of a situation and the statutory laws that define these charges, and is a crucial aspect of the judicial process. Existing works focus on learning charge-side representations by modeling relationships between the charges, but not much effort has been made in improving fact-side representations. We observe that only a small fraction of sentences in the facts actually indicates the charges. We show that by using a very small subset (&lt; 3%) of fact descriptions annotated with sentence-level charges, we can achieve an improvement across a range of different ACI models, as compared to modeling just the main document-level task on a much larger dataset. Additionally, we propose a novel model that utilizes sentence-level charge labels as an auxiliary task, coupled with the main task of document-level charge identification in a multi-task learning framework. The proposed model comprehensively outperforms a large number of recent baselines for ACI. The improvement in performance is particularly noticeable for the rare charges which are known to be especially challenging to identify.</abstract>
       <url hash="cadbda1c">2020.coling-main.88</url>
@@ -1108,7 +1108,7 @@
       <author><first>Seid Muhie</first><last>Yimam</last></author>
       <author><first>Hizkiel Mitiku</first><last>Alemayehu</last></author>
       <author><first>Abinew</first><last>Ayele</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>1048–1060</pages>
       <abstract>This paper presents the study of sentiment analysis for Amharic social media texts. As the number of social media users is ever-increasing, social media platforms would like to understand the latent meaning and sentiments of a text to enhance decision-making procedures. However, low-resource languages such as Amharic have received less attention due to several reasons such as lack of well-annotated datasets, unavailability of computing resources, and fewer or no expert researchers in the area. This research addresses three main research questions. We first explore the suitability of existing tools for the sentiment analysis task. Annotation tools are scarce to support large-scale annotation tasks in Amharic. Also, the existing crowdsourcing platforms do not support Amharic text annotation. Hence, we build a social-network-friendly annotation tool called ‘ASAB’ using the Telegram bot. We collect 9.4k tweets, where each tweet is annotated by three Telegram users. Moreover, we explore the suitability of machine learning approaches for Amharic sentiment analysis. The FLAIR deep learning text classifier, based on network embeddings that are computed from a distributional thesaurus, outperforms other supervised classifiers. We further investigate the challenges in building a sentiment analysis system for Amharic and we found that the widespread usage of sarcasm and figurative speech are the main issues in dealing with the problem. To advance the sentiment analysis research in Amharic and other related low-resource languages, we release the dataset, the annotation tool, source code, and models publicly under a permissive.</abstract>
       <url hash="07578b97">2020.coling-main.91</url>
@@ -1141,7 +1141,7 @@
       <author><first>Minghui</first><last>An</last></author>
       <author><first>Jingjing</first><last>Wang</last></author>
       <author><first>Shoushan</first><last>Li</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>1078–1089</pages>
       <abstract>From the perspective of health psychology, human beings with long-term and sustained negativity are highly possible to be diagnosed with depression. Inspired by this, we argue that the global topic information derived from user-generated contents (e.g., texts and images) is crucial to boost the performance of the depression detection task, though this information has been neglected by almost all previous studies on depression detection. To this end, we propose a new Multimodal Topic-enriched Auxiliary Learning (MTAL) approach, aiming at capturing the topic information inside different modalities (i.e., texts and images) for depression detection. Especially, in our approach, a modality-agnostic topic model is proposed to be capable of mining the topical clues from either the discrete textual signals or the continuous visual signals. On this basis, the topic modeling w.r.t. the two modalities are cast as two auxiliary tasks for improving the performance of the primary task (i.e., depression detection). Finally, the detailed evaluation demonstrates the great advantage of our MTAL approach to depression detection over the state-of-the-art baselines. This justifies the importance of the multimodal topic information to depression detection and the effectiveness of our approach in capturing such information.</abstract>
       <url hash="b444abdc">2020.coling-main.94</url>
@@ -1167,7 +1167,7 @@
       <title>Situated and Interactive Multimodal Conversations</title>
       <author><first>Seungwhan</first><last>Moon</last></author>
       <author><first>Satwik</first><last>Kottur</last></author>
-      <author><first>Paul</first><last>Crook</last></author>
+      <author id="paul-a-crook"><first>Paul</first><last>Crook</last></author>
       <author><first>Ankita</first><last>De</last></author>
       <author><first>Shivani</first><last>Poddar</last></author>
       <author><first>Theodore</first><last>Levin</last></author>
@@ -1214,9 +1214,9 @@
     <paper id="99">
       <title><fixed-case>R</fixed-case>-<fixed-case>VGAE</fixed-case>: Relational-variational Graph Autoencoder for Unsupervised Prerequisite Chain Learning</title>
       <author><first>Irene</first><last>Li</last></author>
-      <author><first>Alexander</first><last>Fabbri</last></author>
+      <author id="alexander-richard-fabbri"><first>Alexander</first><last>Fabbri</last></author>
       <author><first>Swapnil</first><last>Hingmire</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <pages>1147–1157</pages>
       <abstract>The task of concept prerequisite chain learning is to automatically determine the existence of prerequisite relationships among concept pairs. In this paper, we frame learning prerequisite relationships among concepts as an unsupervised task with no access to labeled concept pairs during training. We propose a model called the Relational-Variational Graph AutoEncoder (R-VGAE) to predict concept relations within a graph consisting of concept and resource nodes. Results show that our unsupervised approach outperforms graph-based semi-supervised methods and other baseline methods by up to 9.77% and 10.47% in terms of prerequisite relation prediction accuracy and F1 score. Our method is notably the first graph-based model that attempts to make use of deep learning representations for the task of unsupervised prerequisite learning. We also expand an existing corpus which totals 1,717 English Natural Language Processing (NLP)-related lecture slide files and manual concept pair annotations over 322 topics.</abstract>
       <url hash="28cb1dd7">2020.coling-main.99</url>
@@ -1300,7 +1300,7 @@
       <author><first>Jingyi</first><last>He</last></author>
       <author><first>Kc</first><last>Tsiolis</last></author>
       <author><first>Kian</first><last>Kenyon-Dean</last></author>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <pages>1229–1241</pages>
       <abstract>Word embeddings are trained to predict word cooccurrence statistics, which leads them to possess different lexical properties (syntactic, semantic, etc.) depending on the notion of context defined at training time. These properties manifest when querying the embedding space for the most similar vectors, and when used at the input layer of deep neural networks trained to solve downstream NLP problems. Meta-embeddings combine multiple sets of differently trained word embeddings, and have been shown to successfully improve intrinsic and extrinsic performance over equivalent models which use just one set of source embeddings. We introduce word prisms: a simple and efficient meta-embedding method that learns to combine source embeddings according to the task at hand. Word prisms learn orthogonal transformations to linearly combine the input source embeddings, which allows them to be very efficient at inference time. We evaluate word prisms in comparison to other meta-embedding methods on six extrinsic evaluations and observe that word prisms offer improvements in performance on all tasks.</abstract>
       <url hash="f14c9711">2020.coling-main.106</url>
@@ -1309,7 +1309,7 @@
     </paper>
     <paper id="107">
       <title>Always Keep your Target in Mind: Studying Semantics and Improving Performance of Neural Lexical Substitution</title>
-      <author><first>Nikolay</first><last>Arefyev</last></author>
+      <author id="nikolay-arefyev"><first>Nikolay</first><last>Arefyev</last></author>
       <author><first>Boris</first><last>Sheludko</last></author>
       <author><first>Alexander</first><last>Podolskiy</last></author>
       <author><first>Alexander</first><last>Panchenko</last></author>
@@ -1334,7 +1334,7 @@
     <paper id="109">
       <title>How Relevant Are Selectional Preferences for Transformer-based Language Models?</title>
       <author><first>Eleni</first><last>Metheniti</last></author>
-      <author><first>Tim</first><last>Van de Cruys</last></author>
+      <author id="tim-van-de-cruys"><first>Tim</first><last>Van de Cruys</last></author>
       <author><first>Nabil</first><last>Hathout</last></author>
       <pages>1266–1278</pages>
       <abstract>Selectional preference is defined as the tendency of a predicate to favor particular arguments within a certain linguistic context, and likewise, reject others that result in conflicting or implausible meanings. The stellar success of contextual word embedding models such as BERT in NLP tasks has led many to question whether these models have learned linguistic information, but up till now, most research has focused on syntactic information. We investigate whether Bert contains information on the selectional preferences of words, by examining the probability it assigns to the dependent word given the presence of a head word in a sentence. We are using word pairs of head-dependent words in five different syntactic relations from the SP-10K corpus of selectional preference (Zhang et al., 2019b), in sentences from the ukWaC corpus, and we are calculating the correlation of the plausibility score (from SP-10K) and the model probabilities. Our results show that overall, there is no strong positive or negative correlation in any syntactic relation, but we do find that certain head words have a strong correlation and that masking all words but the head word yields the most positive correlations in most scenarios –which indicates that the semantics of the predicate is indeed an integral and influential factor for the selection of the argument.</abstract>
@@ -1345,7 +1345,7 @@
     <paper id="110">
       <title>Embedding Semantic Taxonomies</title>
       <author><first>Alyssa</first><last>Lees</last></author>
-      <author><first>Chris</first><last>Welty</last></author>
+      <author id="chris-welty"><first>Chris</first><last>Welty</last></author>
       <author><first>Shubin</first><last>Zhao</last></author>
       <author><first>Jacek</first><last>Korycki</last></author>
       <author><first>Sara</first><last>Mc Carthy</last></author>
@@ -1359,7 +1359,7 @@
       <title>A Retrofitting Model for Incorporating Semantic Relations into Word Embeddings</title>
       <author><first>Sapan</first><last>Shah</last></author>
       <author><first>Sreedhar</first><last>Reddy</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>1292–1298</pages>
       <abstract>We present a novel retrofitting model that can leverage relational knowledge available in a knowledge resource to improve word embeddings. The knowledge is captured in terms of relation inequality constraints that compare similarity of related and unrelated entities in the context of an anchor entity. These constraints are used as training data to learn a non-linear transformation function that maps original word vectors to a vector space respecting these constraints. The transformation function is learned in a similarity metric learning setting using Triplet network architecture. We applied our model to synonymy, antonymy and hypernymy relations in WordNet and observed large gains in performance over original distributional models as well as other retrofitting approaches on word similarity task and significant overall improvement on lexical entailment detection task.</abstract>
       <url hash="1fe4134f">2020.coling-main.111</url>
@@ -1419,7 +1419,7 @@
       <author><first>Manuela</first><last>Sanguinetti</last></author>
       <author><first>Cristina</first><last>Bosco</last></author>
       <author><first>Paolo</first><last>Rosso</last></author>
-      <author><first>Farah</first><last>Benamara</last></author>
+      <author id="farah-benamara"><first>Farah</first><last>Benamara</last></author>
       <pages>1346–1358</pages>
       <abstract>This paper presents an in-depth investigation of the effectiveness of dependency-based syntactic features on the irony detection task in a multilingual perspective (English, Spanish, French and Italian). It focuses on the contribution from syntactic knowledge, exploiting linguistic resources where syntax is annotated according to the Universal Dependencies scheme. Three distinct experimental settings are provided. In the first, a variety of syntactic dependency-based features combined with classical machine learning classifiers are explored. In the second scenario, two well-known types of word embeddings are trained on parsed data and tested against gold standard datasets. In the third setting, dependency-based syntactic features are combined into the Multilingual BERT architecture. The results suggest that fine-grained dependency-based syntactic information is informative for the detection of irony.</abstract>
       <url hash="e17507df">2020.coling-main.116</url>
@@ -1454,9 +1454,9 @@
       <author><first>Diptesh</first><last>Kanojia</last></author>
       <author><first>Raj</first><last>Dabre</last></author>
       <author><first>Shubham</first><last>Dewangan</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
-      <author><first>Malhar</first><last>Kulkarni</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="malhar-kulkarni"><first>Malhar</first><last>Kulkarni</last></author>
       <pages>1384–1395</pages>
       <abstract>Cognates are variants of the same lexical form across different languages; for example “fonema” in Spanish and “phoneme” in English are cognates, both of which mean “a unit of sound”. The task of automatic detection of cognates among any two languages can help downstream NLP tasks such as Cross-lingual Information Retrieval, Computational Phylogenetics, and Machine Translation. In this paper, we demonstrate the use of cross-lingual word embeddings for detecting cognates among fourteen Indian Languages. Our approach introduces the use of context from a knowledge graph to generate improved feature representations for cognate detection. We, then, evaluate the impact of our cognate detection mechanism on neural machine translation (NMT), as a downstream task. We evaluate our methods to detect cognates on a challenging dataset of twelve Indian languages, namely, Sanskrit, Hindi, Assamese, Oriya, Kannada, Gujarati, Tamil, Telugu, Punjabi, Bengali, Marathi, and Malayalam. Additionally, we create evaluation datasets for two more Indian languages, Konkani and Nepali. We observe an improvement of up to 18% points, in terms of F-score, for cognate detection. Furthermore, we observe that cognates extracted using our method help improve NMT quality by up to 2.76 BLEU. We also release our code, newly constructed datasets and cross-lingual models publicly.</abstract>
       <url hash="6b577a99">2020.coling-main.119</url>
@@ -1555,7 +1555,7 @@
     <paper id="128">
       <title>Contextual Argument Component Classification for Class Discussions</title>
       <author><first>Luca</first><last>Lugini</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <pages>1475–1480</pages>
       <abstract>Argument mining systems often consider contextual information, i.e. information outside of an argumentative discourse unit, when trained to accomplish tasks such as argument component identification, classification, and relation extraction. However, prior work has not carefully analyzed the utility of different contextual properties in context-aware models. In this work, we show how two different types of contextual information, local discourse context and speaker context, can be incorporated into a computational model for classifying argument components in multi-party classroom discussions. We find that both context types can improve performance, although the improvements are dependent on context size and position.</abstract>
       <url hash="3c83c686">2020.coling-main.128</url>
@@ -1590,7 +1590,7 @@
       <title>Event-Guided Denoising for Multilingual Relation Learning</title>
       <author><first>Amith</first><last>Ananthram</last></author>
       <author><first>Emily</first><last>Allaway</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>1505–1512</pages>
       <abstract>General purpose relation extraction has recently seen considerable gains in part due to a massively data-intensive distant supervision technique from Soares et al. (2019) that produces state-of-the-art results across many benchmarks. In this work, we present a methodology for collecting high quality training data for relation extraction from unlabeled text that achieves a near-recreation of their zero-shot and few-shot results at a fraction of the training cost. Our approach exploits the predictable distributional structure of date-marked news articles to build a denoised corpus – the extraction process filters out low quality examples. We show that a smaller multilingual encoder trained on this corpus performs comparably to the current state-of-the-art (when both receive little to no fine-tuning) on few-shot and standard relation benchmarks in English and Spanish despite using many fewer examples (50k vs. 300mil+).</abstract>
       <url hash="47cc2b77">2020.coling-main.131</url>
@@ -1650,7 +1650,7 @@
     </paper>
     <paper id="136">
       <title>Graph Enhanced Dual Attention Network for Document-Level Relation Extraction</title>
-      <author id="bo-li"><first>Bo</first><last>Li</last></author>
+      <author><first>Bo</first><last>Li</last></author>
       <author><first>Wei</first><last>Ye</last></author>
       <author><first>Zhonghao</first><last>Sheng</last></author>
       <author><first>Rui</first><last>Xie</last></author>
@@ -1724,7 +1724,7 @@
       <author><first>Rajdeep</first><last>Sarkar</last></author>
       <author><first>Bharathi Raja</first><last>Chakravarthi</last></author>
       <author><first>Theodorus</first><last>Fransen</last></author>
-      <author><first>John P.</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></author>
       <pages>1606–1617</pages>
       <abstract>Automatic Language Identification (LI) or Dialect Identification (DI) of short texts of closely related languages or dialects, is one of the primary steps in many natural language processing pipelines. Language identification is considered a solved task in many cases; however, in the case of very closely related languages, or in an unsupervised scenario (where the languages are not known in advance), performance is still poor. In this paper, we propose the Unsupervised Deep Language and Dialect Identification (UDLDI) method, which can simultaneously learn sentence embeddings and cluster assignments from short texts. The UDLDI model understands the sentence constructions of languages by applying attention to character relations which helps to optimize the clustering of languages. We have performed our experiments on three short-text datasets for different language families, each consisting of closely related languages or dialects, with very minimal training sets. Our experimental evaluations on these datasets have shown significant improvement over state-of-the-art unsupervised methods and our model has outperformed state-of-the-art LI and DI systems in supervised settings.</abstract>
       <url hash="5a84e94b">2020.coling-main.141</url>
@@ -1763,7 +1763,7 @@
       <title>Biased <fixed-case>T</fixed-case>ext<fixed-case>R</fixed-case>ank: Unsupervised Graph-Based Content Extraction</title>
       <author><first>Ashkan</first><last>Kazemi</last></author>
       <author><first>Verónica</first><last>Pérez-Rosas</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>1642–1652</pages>
       <abstract>We introduce Biased TextRank, a graph-based content extraction method inspired by the popular TextRank algorithm that ranks text spans according to their importance for language processing tasks and according to their relevance to an input “focus.” Biased TextRank enables focused content extraction for text by modifying the random restarts in the execution of TextRank. The random restart probabilities are assigned based on the relevance of the graph nodes to the focus of the task. We present two applications of Biased TextRank: focused summarization and explanation extraction, and show that our algorithm leads to improved performance on two different datasets by significant ROUGE-N score margins. Much like its predecessor, Biased TextRank is unsupervised, easy to implement and orders of magnitude faster and lighter than current state-of-the-art Natural Language Processing methods for similar tasks.</abstract>
       <url hash="a9048070">2020.coling-main.144</url>
@@ -1799,7 +1799,7 @@
     <paper id="147">
       <title>Unsupervised Fact Checking by Counter-Weighted Positive and Negative Evidential Paths in A Knowledge Graph</title>
       <author><first>Jiseong</first><last>Kim</last></author>
-      <author><first>Key-sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-sun</first><last>Choi</last></author>
       <pages>1677–1686</pages>
       <abstract>Misinformation spreads across media, community, and knowledge graphs in the Web by not only human agents but also information extraction algorithms that extract factual statements from unstructured textual data to populate the existing knowledge graphs. Traditional fact checking by experts or crowds is increasingly difficult to keep pace with the volume of newly created misinformation in the Web. Therefore, it is important and necessary to enhance the computational ability to determine whether a given factual statement is truthful or not. We view this problem as a truth scoring task in a knowledge graph. We present a novel rule-based approach that finds positive and negative evidential paths in a knowledge graph for a given factual statement and calculates a truth score for the given statement by unsupervised ensemble of the found positive and negative evidential paths. For example, we can determine the factual statement “United States is the birth place of Barack Obama” as truthful if there is the positive evidential path (Barack Obama, birthPlace, Hawaii) ∧ (Hawaii, country, United States) in a knowledge graph. For another example, we can determine the factual statement “Canada is the nationality of Barack Obama” as untruthful if there is the negative evidential path (Barack Obama, nationality, United States) ∧ (United States, ≠, Canada) in a knowledge graph. For evaluating on a real-world situation, we constructed an evaluation dataset by labeling truth or untruth label on factual statements that were extracted from Wikipedia texts by using the state-of-the-art BERT-based information extraction system. Our evaluation results show that our approach outperforms the state-of-the-art unsupervised approaches significantly by up to 0.12 AUC-ROC and even outperforms the supervised approach by up to 0.05 AUC-ROC not only in our dataset but also in the two different standard datasets.</abstract>
       <url hash="c238149d">2020.coling-main.147</url>
@@ -1864,7 +1864,7 @@
       <author><first>Bosung</first><last>Kim</last></author>
       <author><first>Taesuk</first><last>Hong</last></author>
       <author><first>Youngjoong</first><last>Ko</last></author>
-      <author><first>Jungyun</first><last>Seo</last></author>
+      <author id="jungyun-seo"><first>Jungyun</first><last>Seo</last></author>
       <pages>1737–1743</pages>
       <abstract>As research on utilizing human knowledge in natural language processing has attracted considerable attention in recent years, knowledge graph (KG) completion has come into the spotlight. Recently, a new knowledge graph completion method using a pre-trained language model, such as KG-BERT, is presented and showed high performance. However, its scores in ranking metrics such as Hits@k are still behind state-of-the-art models. We claim that there are two main reasons: 1) failure in sufficiently learning relational information in knowledge graphs, and 2) difficulty in picking out the correct answer from lexically similar candidates. In this paper, we propose an effective multi-task learning method to overcome the limitations of previous works. By combining relation prediction and relevance ranking tasks with our target link prediction, the proposed model can learn more relational properties in KGs and properly perform even when lexical similarity occurs. Experimental results show that we not only largely improve the ranking performances compared to KG-BERT but also achieve the state-of-the-art performances in Mean Rank and Hits@10 on the WN18RR dataset.</abstract>
       <url hash="9aaed09d">2020.coling-main.153</url>
@@ -1936,7 +1936,7 @@
       <title>A Deep Generative Approach to Native Language Identification</title>
       <author><first>Ehsan</first><last>Lotfi</last></author>
       <author><first>Ilia</first><last>Markov</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>1778–1783</pages>
       <abstract>Native language identification (NLI) – identifying the native language (L1) of a person based on his/her writing in the second language (L2) – is useful for a variety of purposes, including marketing, security, and educational applications. From a traditional machine learning perspective,NLI is usually framed as a multi-class classification task, where numerous designed features are combined in order to achieve the state-of-the-art results. We introduce a deep generative language modelling (LM) approach to NLI, which consists in fine-tuning a GPT-2 model separately on texts written by the authors with the same L1, and assigning a label to an unseen text based on the minimum LM loss with respect to one of these fine-tuned GPT-2 models. Our method outperforms traditional machine learning approaches and currently achieves the best results on the benchmark NLI datasets.</abstract>
       <url hash="0607949b">2020.coling-main.159</url>
@@ -1982,7 +1982,7 @@
     <paper id="163">
       <title>Detecting de minimis Code-Switching in Historical <fixed-case>G</fixed-case>erman Books</title>
       <author><first>Shijia</first><last>Liu</last></author>
-      <author><first>David</first><last>Smith</last></author>
+      <author id="david-a-smith"><first>David</first><last>Smith</last></author>
       <pages>1808–1814</pages>
       <abstract>Code-switching has long interested linguists, with computational work in particular focusing on speech and social media data (Sitaram et al., 2019). This paper contrasts these informal instances of code-switching to its appearance in more formal registers, by examining the mixture of languages in the Deutsches Textarchiv (DTA), a corpus of 1406 primarily German books from the 17th to 19th centuries. We automatically annotate and manually inspect spans of six embedded languages (Latin, French, English, Italian, Spanish, and Greek) in the corpus. We quantitatively analyze the differences between code-switching patterns in these books and those in more typically studied speech and social media corpora. Furthermore, we address the practical task of predicting code-switching from features of the matrix language alone in the DTA corpus. Such classifiers can help reduce errors when optical character recognition or speech transcription is applied to a large corpus with rare embedded languages.</abstract>
       <url hash="a484b1d6">2020.coling-main.163</url>
@@ -1993,7 +1993,7 @@
       <title>Lin: Unsupervised Extraction of Tasks from Textual Communication</title>
       <author><first>Parth</first><last>Diwanji</last></author>
       <author><first>Hui</first><last>Guo</last></author>
-      <author><first>Munindar</first><last>Singh</last></author>
+      <author id="munindar-p-singh"><first>Munindar</first><last>Singh</last></author>
       <author><first>Anup</first><last>Kalia</last></author>
       <pages>1815–1819</pages>
       <abstract>Commitments and requests are a hallmark of collaborative communication, especially in team settings. Identifying specific tasks being committed to or request from emails and chat messages can enable important downstream tasks, such as producing todo lists, reminders, and calendar entries. State-of-the-art approaches for task identification rely on large annotated datasets, which are not always available, especially for domain-specific tasks. Accordingly, we propose Lin, an unsupervised approach of identifying tasks that leverages dependency parsing and VerbNet. Our evaluations show that Lin yields comparable or more accurate results than supervised models on domains with large training sets, and maintains its excellent performance on unseen domains.</abstract>
@@ -2089,9 +2089,9 @@
     <paper id="172">
       <title>Humans Meet Models on Object Naming: A New Dataset and Analysis</title>
       <author><first>Carina</first><last>Silberer</last></author>
-      <author><first>Sina</first><last>Zarrieß</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
       <author><first>Matthijs</first><last>Westera</last></author>
-      <author><first>Gemma</first><last>Boleda</last></author>
+      <author id="gemma-boleda"><first>Gemma</first><last>Boleda</last></author>
       <pages>1893–1905</pages>
       <abstract>We release ManyNames v2 (MN v2), a verified version of an object naming dataset that contains dozens of valid names per object for 25K images. We analyze issues in the data collection method originally employed, standard in Language &amp; Vision (L&amp;V), and find that the main source of noise in the data comes from simulating a naming context solely from an image with a target object marked with a bounding box, which causes subjects to sometimes disagree regarding which object is the target. We also find that both the degree of this uncertainty in the original data and the amount of true naming variation in MN v2 differs substantially across object domains. We use MN v2 to analyze a popular L&amp;V model and demonstrate its effectiveness on the task of object naming. However, our fine-grained analysis reveals that what appears to be human-like model behavior is not stable across domains, e.g., the model confuses people and clothing objects much more frequently than humans do. We also find that standard evaluations underestimate the actual effectiveness of the naming model: on the single-label names of the original dataset (Visual Genome), it obtains −27% accuracy points than on MN v2, that includes all valid object names.</abstract>
       <url hash="e53ac11a">2020.coling-main.172</url>
@@ -2113,7 +2113,7 @@
       <title>Language-Driven Region Pointer Advancement for Controllable Image Captioning</title>
       <author><first>Annika</first><last>Lindh</last></author>
       <author><first>Robert</first><last>Ross</last></author>
-      <author><first>John</first><last>Kelleher</last></author>
+      <author id="john-kelleher"><first>John</first><last>Kelleher</last></author>
       <pages>1922–1935</pages>
       <abstract>Controllable Image Captioning is a recent sub-field in the multi-modal task of Image Captioning wherein constraints are placed on which regions in an image should be described in the generated natural language caption. This puts a stronger focus on producing more detailed descriptions, and opens the door for more end-user control over results. A vital component of the Controllable Image Captioning architecture is the mechanism that decides the timing of attending to each region through the advancement of a region pointer. In this paper, we propose a novel method for predicting the timing of region pointer advancement by treating the advancement step as a natural part of the language structure via a NEXT-token, motivated by a strong correlation to the sentence structure in the training data. We find that our timing agrees with the ground-truth timing in the Flickr30k Entities test data with a precision of 86.55% and a recall of 97.92%. Our model implementing this technique improves the state-of-the-art on standard captioning metrics while additionally demonstrating a considerably larger effective vocabulary size.</abstract>
       <url hash="0bbea776">2020.coling-main.174</url>
@@ -2135,7 +2135,7 @@
     <paper id="176">
       <title>Image Caption Generation for News Articles</title>
       <author><first>Zhishen</first><last>Yang</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <pages>1941–1951</pages>
       <abstract>In this paper, we address the task of news-image captioning, which generates a description of an image given the image and its article body as input. This task is more challenging than the conventional image captioning, because it requires a joint understanding of image and text. We present a Transformer model that integrates text and image modalities and attends to textual features from visual features in generating a caption. Experiments based on automatic evaluation metrics and human evaluation show that an article text provides primary information to reproduce news-image captions written by journalists. The results also demonstrate that the proposed model outperforms the state-of-the-art model. In addition, we also confirm that visual features contribute to improving the quality of news-image captions.</abstract>
       <url hash="048fa540">2020.coling-main.176</url>
@@ -2182,7 +2182,7 @@
       <title>The <fixed-case>A</fixed-case>ppos<fixed-case>C</fixed-case>orpus: a new multilingual, multi-domain dataset for factual appositive generation</title>
       <author><first>Yova</first><last>Kementchedjhieva</last></author>
       <author><first>Di</first><last>Lu</last></author>
-      <author><first>Joel</first><last>Tetreault</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
       <pages>1989–2003</pages>
       <abstract>News articles, image captions, product reviews and many other texts mention people and organizations whose name recognition could vary for different audiences. In such cases, background information about the named entities could be provided in the form of an appositive noun phrase, either written by a human or generated automatically. We expand on the previous work in appositive generation with a new, more realistic, end-to-end definition of the task, instantiated by a dataset that spans four languages (English, Spanish, German and Polish), two entity types (person and organization) and two domains (Wikipedia and News). We carry out an extensive analysis of the data and the task, pointing to the various modeling challenges it poses. The results we obtain with standard language generation methods show that the task is indeed non-trivial, and leaves plenty of room for improvement.</abstract>
       <url hash="a97c130d">2020.coling-main.180</url>
@@ -2207,7 +2207,7 @@
       <author><first>Siyuan</first><last>Wang</last></author>
       <author><first>Yameng</first><last>Huang</last></author>
       <author><first>Jian</first><last>Jiao</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <author><first>Nan</first><last>Duan</last></author>
       <author><first>Ruofei</first><last>Zhang</last></author>
       <pages>2014–2025</pages>
@@ -2275,7 +2275,7 @@
     </paper>
     <paper id="188">
       <title>Taking the Correction Difficulty into Account in Grammatical Error Correction Evaluation</title>
-      <author><first>Takumi</first><last>Gotou</last></author>
+      <author id="takumi-goto"><first>Takumi</first><last>Gotou</last></author>
       <author><first>Ryo</first><last>Nagata</last></author>
       <author><first>Masato</first><last>Mita</last></author>
       <author><first>Kazuaki</first><last>Hanawa</last></author>
@@ -2289,7 +2289,7 @@
       <title>Automatic Distractor Generation for Multiple Choice Questions in Standard Tests</title>
       <author><first>Zhaopeng</first><last>Qiu</last></author>
       <author><first>Xian</first><last>Wu</last></author>
-      <author id="wei-fan"><first>Wei</first><last>Fan</last></author>
+      <author><first>Wei</first><last>Fan</last></author>
       <pages>2096–2106</pages>
       <abstract>To assess knowledge proficiency of a learner, multiple choice question is an efficient and widespread form in standard tests. However, the composition of the multiple choice question, especially the construction of distractors is quite challenging. The distractors are required to both incorrect and plausible enough to confuse the learners who did not master the knowledge. Currently, the distractors are generated by domain experts which are both expensive and time-consuming. This urges the emergence of automatic distractor generation, which can benefit various standard tests in a wide range of domains. In this paper, we propose a question and answer guided distractor generation (EDGE) framework to automate distractor generation. EDGE consists of three major modules: (1) the Reforming Question Module and the Reforming Passage Module apply gate layers to guarantee the inherent incorrectness of the generated distractors; (2) the Distractor Generator Module applies attention mechanism to control the level of plausibility. Experimental results on a large-scale public dataset demonstrate that our model significantly outperforms existing models and achieves a new state-of-the-art.</abstract>
       <url hash="b6f44f82">2020.coling-main.189</url>
@@ -2323,7 +2323,7 @@
       <author><first>Tatsuya</first><last>Aoki</last></author>
       <author><first>Hidetaka</first><last>Kamigaito</last></author>
       <author><first>Hiroya</first><last>Takamura</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>2126–2131</pages>
       <abstract>We propose neural models that can normalize text by considering the similarities of word strings and sounds. We experimentally compared a model that considers the similarities of both word strings and sounds, a model that considers only the similarity of word strings or of sounds, and a model without the similarities as a baseline. Results showed that leveraging the word string similarity succeeded in dealing with misspellings and abbreviations, and taking into account the sound similarity succeeded in dealing with phonetic substitutions and emphasized characters. So that the proposed models achieved higher F1 scores than the baseline.</abstract>
       <url hash="1a16cb92">2020.coling-main.192</url>
@@ -2370,7 +2370,7 @@
     <paper id="196">
       <title>Automatic Assistance for Academic Word Usage</title>
       <author><first>Dariush</first><last>Saberi</last></author>
-      <author><first>John</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
       <author><first>Jonathan</first><last>James Webster</last></author>
       <pages>2163–2168</pages>
       <abstract>This paper describes a writing assistance system that helps students improve their academic writing. Given an input text, the system suggests lexical substitutions that aim to incorporate more academic vocabulary. The substitution candidates are drawn from an academic word list and ranked by a masked language model. Experimental results show that lexical formality analysis can improve the quality of the suggestions, in comparison to a baseline that relies on the masked language model only.</abstract>
@@ -2380,10 +2380,10 @@
     </paper>
     <paper id="197">
       <title>Style versus Content: A distinction without a (learnable) difference?</title>
-      <author><first>Somayeh</first><last>Jafaritazehjani</last></author>
+      <author id="somayeh-jafaritazehjani"><first>Somayeh</first><last>Jafaritazehjani</last></author>
       <author><first>Gwénolé</first><last>Lecorvé</last></author>
       <author><first>Damien</first><last>Lolive</last></author>
-      <author><first>John</first><last>Kelleher</last></author>
+      <author id="john-kelleher"><first>John</first><last>Kelleher</last></author>
       <pages>2169–2180</pages>
       <abstract>Textual style transfer involves modifying the style of a text while preserving its content. This assumes that it is possible to separate style from content. This paper investigates whether this separation is possible. We use sentiment transfer as our case study for style transfer analysis. Our experimental methodology frames style transfer as a multi-objective problem, balancing style shift with content preservation and fluency. Due to the lack of parallel data for style transfer we employ a variety of adversarial encoder-decoder networks in our experiments. Also, we use of a probing methodology to analyse how these models encode style-related features in their latent spaces. The results of our experiments which are further confirmed by a human evaluation reveal the inherent trade-off between the multiple style transfer objectives which indicates that style cannot be usefully separated from content within these style-transfer systems.</abstract>
       <url hash="91707b51">2020.coling-main.197</url>
@@ -2393,8 +2393,8 @@
     <paper id="198">
       <title>Contextualized Embeddings for Enriching Linguistic Analyses on Politeness</title>
       <author><first>Ahmad</first><last>Aljanaideh</last></author>
-      <author><first>Eric</first><last>Fosler-Lussier</last></author>
-      <author><first>Marie-Catherine</first><last>de Marneffe</last></author>
+      <author id="eric-fosler-lussier"><first>Eric</first><last>Fosler-Lussier</last></author>
+      <author id="marie-catherine-de-marneffe"><first>Marie-Catherine</first><last>de Marneffe</last></author>
       <pages>2181–2190</pages>
       <abstract>Linguistic analyses in natural language processing (NLP) have often been performed around the static notion of words where the context (surrounding words) is not considered. For example, previous analyses on politeness have focused on comparing the use of static words such as personal pronouns across (im)polite requests without taking the context of those words into account. Current word embeddings in NLP do capture context and thus can be leveraged to enrich linguistic analyses. In this work, we introduce a model which leverages the pre-trained BERT model to cluster contextualized representations of a word based on (1) the context in which the word appears and (2) the labels of items the word occurs in. Using politeness as case study, this model is able to automatically discover interpretable, fine-grained context patterns of words, some of which align with existing theories on politeness. Our model further discovers novel finer-grained patterns associated with (im)polite language. For example, the word please can occur in impolite contexts that are predictable from BERT clustering. The approach proposed here is validated by showing that features based on fine-grained patterns inferred from the clustering improve over politeness-word baselines.</abstract>
       <url hash="3db35047">2020.coling-main.198</url>
@@ -2427,7 +2427,7 @@
       <title>Cycle-Consistent Adversarial Autoencoders for Unsupervised Text Style Transfer</title>
       <author><first>Yufang</first><last>Huang</last></author>
       <author><first>Wentao</first><last>Zhu</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Yiye</first><last>Zhang</last></author>
       <author><first>Changjian</first><last>Hu</last></author>
       <author><first>Feiyu</first><last>Xu</last></author>
@@ -2453,7 +2453,7 @@
       <author><first>Yu</first><last>Wu</last></author>
       <author><first>Lili</first><last>Mou</last></author>
       <author><first>Zhoujun</first><last>Li</last></author>
-      <author><first>WenHan</first><last>Chao</last></author>
+      <author id="wenhan-chao"><first>WenHan</first><last>Chao</last></author>
       <pages>2236–2249</pages>
       <abstract>Conventional approaches for formality style transfer borrow models from neural machine translation, which typically requires massive parallel data for training. However, the dataset for formality style transfer is considerably smaller than translation corpora. Moreover, we observe that informal and formal sentences closely resemble each other, which is different from the translation task where two languages have different vocabularies and grammars. In this paper, we present a new approach, Sequence-to-Sequence with Shared Latent Space (S2S-SLS), for formality style transfer, where we propose two auxiliary losses and adopt joint training of bi-directional transfer and auto-encoding. Experimental results show that S2S-SLS (with either RNN or Transformer architectures) consistently outperforms baselines in various settings, especially when we have limited data.</abstract>
       <url hash="be20c7c6">2020.coling-main.203</url>
@@ -2469,7 +2469,7 @@
       <author><first>Haijun</first><last>Shan</last></author>
       <author><first>Ji</first><last>Zhang</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>2250–2260</pages>
       <abstract>Visual storytelling aims to generate a narrative paragraph from a sequence of images automatically. Existing approaches construct text description independently for each image and roughly concatenate them as a story, which leads to the problem of generating semantically incoherent content. In this paper, we propose a new way for visual storytelling by introducing a topic description task to detect the global semantic context of an image stream. A story is then constructed with the guidance of the topic description. In order to combine the two generation tasks, we propose a multi-agent communication framework that regards the topic description generator and the story generator as two agents and learn them simultaneously via iterative updating mechanism. We validate our approach on VIST dataset, where quantitative results, ablations, and human evaluation demonstrate our method’s good ability in generating stories with higher quality compared to state-of-the-art methods.</abstract>
       <url hash="b354b0e0">2020.coling-main.204</url>
@@ -2479,7 +2479,7 @@
     <paper id="205">
       <title>Referring to what you know and do not know: Making Referring Expression Generation Models Generalize To Unseen Entities</title>
       <author><first>Rossana</first><last>Cunha</last></author>
-      <author><first>Thiago</first><last>Castro Ferreira</last></author>
+      <author id="thiago-castro-ferreira"><first>Thiago</first><last>Castro Ferreira</last></author>
       <author><first>Adriana</first><last>Pagano</last></author>
       <author><first>Fabio</first><last>Alves</last></author>
       <pages>2261–2272</pages>
@@ -2505,7 +2505,7 @@
       <title>Retrieval-Augmented Controllable Review Generation</title>
       <author><first>Jihyeok</first><last>Kim</last></author>
       <author><first>Seungtaek</first><last>Choi</last></author>
-      <author><first>Reinald Kim</first><last>Amplayo</last></author>
+      <author id="reinald-kim-amplayo"><first>Reinald Kim</first><last>Amplayo</last></author>
       <author><first>Seung-won</first><last>Hwang</last></author>
       <pages>2284–2295</pages>
       <abstract>In this paper, we study review generation given a set of attribute identifiers which are user ID, product ID and rating. This is a difficult subtask of natural language generation since models are limited to the given identifiers, without any specific descriptive information regarding the inputs, when generating the text. The capacity of these models is thus confined and dependent to how well the models can capture vector representations of attributes. We thus propose to additionally leverage references, which are selected from a large pool of texts labeled with one of the attributes, as textual information that enriches inductive biases of given attributes. With these references, we can now pose the problem as an instance of text-to-text generation, which makes the task easier since texts that are syntactically, semantically similar with the output text are provided as input. Using this framework, we address issues such as selecting references from a large candidate set without textual context and improving the model complexity for generation. Our experiments show that our models improve over previous approaches on both automatic and human evaluation metrics.</abstract>
@@ -2528,11 +2528,11 @@
       <title>A Learning-Exploring Method to Generate Diverse Paraphrases with Multi-Objective Deep Reinforcement Learning</title>
       <author><first>Mingtong</first><last>Liu</last></author>
       <author><first>Erguang</first><last>Yang</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Yujie</first><last>Zhang</last></author>
       <author><first>Yao</first><last>Meng</last></author>
       <author><first>Changjian</first><last>Hu</last></author>
-      <author><first>Jinan</first><last>Xu</last></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last></author>
       <author><first>Yufeng</first><last>Chen</last></author>
       <pages>2310–2321</pages>
       <abstract>Paraphrase generation (PG) is of great importance to many downstream tasks in natural language processing. Diversity is an essential nature to PG for enhancing generalization capability and robustness of downstream applications. Recently, neural sequence-to-sequence (Seq2Seq) models have shown promising results in PG. However, traditional model training for PG focuses on optimizing model prediction against single reference and employs cross-entropy loss, which objective is unable to encourage model to generate diverse paraphrases. In this work, we present a novel approach with multi-objective learning to PG. We propose a learning-exploring method to generate sentences as learning objectives from the learned data distribution, and employ reinforcement learning to combine these new learning objectives for model training. We first design a sample-based algorithm to explore diverse sentences. Then we introduce several reward functions to evaluate the sampled sentences as learning signals in terms of expressive diversity and semantic fidelity, aiming to generate diverse and high-quality paraphrases. To effectively optimize model performance satisfying different evaluating aspects, we use a GradNorm-based algorithm that automatically balances these training objectives. Experiments and analyses on Quora and Twitter datasets demonstrate that our proposed method not only gains a significant increase in diversity but also improves generation quality over several state-of-the-art baselines.</abstract>
@@ -2543,7 +2543,7 @@
     <paper id="210">
       <title>Curious Case of Language Generation Evaluation Metrics: A Cautionary Tale</title>
       <author><first>Ozan</first><last>Caglayan</last></author>
-      <author><first>Pranava</first><last>Madhyastha</last></author>
+      <author id="pranava-swaroop-madhyastha"><first>Pranava</first><last>Madhyastha</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>2322–2328</pages>
       <abstract>Automatic evaluation of language generation systems is a well-studied problem in Natural Language Processing. While novel metrics are proposed every year, a few popular metrics remain as the de facto metrics to evaluate tasks such as image captioning and machine translation, despite their known limitations. This is partly due to ease of use, and partly because researchers expect to see them and know how to interpret them. In this paper, we urge the community for more careful consideration of how they automatically evaluate their models by demonstrating important failure cases on multiple datasets, language pairs and tasks. Our experiments show that metrics (i) usually prefer system outputs to human-authored texts, (ii) can be insensitive to correct translations of rare words, (iii) can yield surprisingly high scores when given a single sentence as system output for the entire test set.</abstract>
@@ -2680,7 +2680,7 @@
       <author><first>Xiujun</first><last>Zhu</last></author>
       <author><first>Yue</first><last>Zhang</last></author>
       <author><first>Shoushan</first><last>Li</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>2448–2458</pages>
       <abstract>Sentiment forecasting in dialog aims to predict the polarity of next utterance to come, and can help speakers revise their utterances in sentimental utterances generation. However, the polarity of next utterance is normally hard to predict, due to the lack of content of next utterance (yet to come). In this study, we propose a Neural Sentiment Forecasting (NSF) model to address inherent challenges. In particular, we employ a neural simulation model to simulate the next utterance based on the context (previous utterances encountered). Moreover, we employ a sequence influence model to learn both pair-wise and seq-wise influence. Empirical studies illustrate the importance of proposed sentiment forecasting task, and justify the effectiveness of our NSF model over several strong baselines.</abstract>
       <url hash="e07ed27f">2020.coling-main.221</url>
@@ -2693,7 +2693,7 @@
       <author><first>Dongsuk</first><last>Oh</last></author>
       <author><first>Yoonna</first><last>Jang</last></author>
       <author><first>Kisu</first><last>Yang</last></author>
-      <author><first>Heuiseok</first><last>Lim</last></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last></author>
       <pages>2459–2471</pages>
       <abstract>CommonsenseQA is a task in which a correct answer is predicted through commonsense reasoning with pre-defined knowledge. Most previous works have aimed to improve the performance with distributed representation without considering the process of predicting the answer from the semantic representation of the question. To shed light upon the semantic interpretation of the question, we propose an AMR-ConceptNet-Pruned (ACP) graph. The ACP graph is pruned from a full integrated graph encompassing Abstract Meaning Representation (AMR) graph generated from input questions and an external commonsense knowledge graph, ConceptNet (CN). Then the ACP graph is exploited to interpret the reasoning path as well as to predict the correct answer on the CommonsenseQA task. This paper presents the manner in which the commonsense reasoning process can be interpreted with the relations and concepts provided by the ACP graph. Moreover, ACP-based models are shown to outperform the baselines.</abstract>
       <url hash="b3a6a8b1">2020.coling-main.222</url>
@@ -2724,7 +2724,7 @@
     <paper id="225">
       <title>Multitask Easy-First Dependency Parsing: Exploiting Complementarities of Different Dependency Representations</title>
       <author><first>Yash</first><last>Kankanampati</last></author>
-      <author><first>Joseph</first><last>Le Roux</last></author>
+      <author id="joseph-le-roux"><first>Joseph</first><last>Le Roux</last></author>
       <author><first>Nadi</first><last>Tomeh</last></author>
       <author><first>Dima</first><last>Taji</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
@@ -2738,7 +2738,7 @@
       <title>Context Dependent Semantic Parsing: A Survey</title>
       <author><first>Zhuang</first><last>Li</last></author>
       <author><first>Lizhen</first><last>Qu</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>2509–2521</pages>
       <abstract>Semantic parsing is the task of translating natural language utterances into machine-readable meaning representations. Currently, most semantic parsing methods are not able to utilize the contextual information (e.g. dialogue and comments history), which has a great potential to boost the semantic parsing systems. To address this issue, context dependent semantic parsing has recently drawn a lot of attention. In this survey, we investigate progress on the methods for the context dependent semantic parsing, together with the current datasets and tasks. We then point out open problems and challenges for future research in this area.</abstract>
       <url hash="3acaa8c9">2020.coling-main.226</url>
@@ -2788,8 +2788,8 @@
       <author><first>Jon Ander</first><last>Campos</last></author>
       <author><first>Kyunghyun</first><last>Cho</last></author>
       <author><first>Arantxa</first><last>Otegi</last></author>
-      <author><first>Aitor</first><last>Soroa</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="aitor-soroa"><first>Aitor</first><last>Soroa</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <author><first>Gorka</first><last>Azkune</last></author>
       <pages>2561–2571</pages>
       <abstract>The interaction of conversational systems with users poses an exciting opportunity for improving them after deployment, but little evidence has been provided of its feasibility. In most applications, users are not able to provide the correct answer to the system, but they are able to provide binary (correct, incorrect) feedback. In this paper we propose feedback-weighted learning based on importance sampling to improve upon an initial supervised system using binary user feedback. We perform simulated experiments on document classification (for development) and Conversational Question Answering datasets like QuAC and DoQA, where binary user feedback is derived from gold annotations. The results show that our method is able to improve over the initial supervised system, getting close to a fully-supervised system that has access to the same labeled examples in in-domain experiments (QuAC), and even matching in out-of-domain experiments (DoQA). Our work opens the prospect to exploit interactions with real users and improve conversational systems after deployment.</abstract>
@@ -2945,8 +2945,8 @@
       <author><first>Bowei</first><last>Zou</last></author>
       <author><first>Yu</first><last>Hong</last></author>
       <author><first>Wei</first><last>Zhang</last></author>
-      <author><first>AiTi</first><last>Aw</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="aiti-aw"><first>AiTi</first><last>Aw</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>2687–2698</pages>
       <abstract>Reading comprehension (RC) on social media such as Twitter is a critical and challenging task due to its noisy, informal, but informative nature. Most existing RC models are developed on formal datasets such as news articles and Wikipedia documents, which severely limit their performances when directly applied to the noisy and informal texts in social media. Moreover, these models only focus on a certain type of RC, extractive or generative, but ignore the integration of them. To well address these challenges, we come up with a noisy user-generated text-oriented RC model. In particular, we first introduce a set of text normalizers to transform the noisy and informal texts to the formal ones. Then, we integrate the extractive and the generative RC model by a multi-task learning mechanism and an answer selection module. Experimental results on TweetQA demonstrate that our NUT-RC model significantly outperforms the state-of-the-art social media-oriented RC models.</abstract>
       <url hash="1e28a441">2020.coling-main.242</url>
@@ -2982,8 +2982,8 @@
     <paper id="245">
       <title>Neural Networks approaches focused on <fixed-case>F</fixed-case>rench Spoken Language Understanding: application to the <fixed-case>MEDIA</fixed-case> Evaluation Task</title>
       <author><first>Sahar</first><last>Ghannay</last></author>
-      <author><first>Christophe</first><last>Servan</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="christophe-servan"><first>Christophe</first><last>Servan</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <pages>2722–2727</pages>
       <abstract>In this paper, we present a study on a French Spoken Language Understanding (SLU) task: the MEDIA task. Many works and studies have been proposed for many tasks, but most of them are focused on English language and tasks. The exploration of a richer language like French within the framework of a SLU task implies to recent approaches to handle this difficulty. Since the MEDIA task seems to be one of the most difficult, according several previous studies, we propose to explore Neural Networks approaches focusing of three aspects: firstly, the Neural Network inputs and more specifically the word embeddings; secondly, we compared French version of BERT against the best setup through different ways; Finally, the comparison against State-of-the-Art approaches. Results show that the word embeddings trained on a small corpus need to be updated during SLU model training. Furthermore, the French BERT fine-tuned approaches outperform the classical Neural Network Architectures and achieves state of the art results. However, the contextual embeddings extracted from one of the French BERT approaches achieve comparable results in comparison to word embedding, when integrated into the proposed neural architecture.</abstract>
       <url hash="053b48b2">2020.coling-main.245</url>
@@ -3018,9 +3018,9 @@
       <title>Robust Machine Reading Comprehension by Learning Soft labels</title>
       <author><first>Zhenyu</first><last>Zhao</last></author>
       <author><first>Shuangzhi</first><last>Wu</last></author>
-      <author><first>Muyun</first><last>Yang</last></author>
+      <author id="muyun-yang"><first>Muyun</first><last>Yang</last></author>
       <author><first>Kehai</first><last>Chen</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <pages>2754–2759</pages>
       <abstract>Neural models have achieved great success on the task of machine reading comprehension (MRC), which are typically trained on hard labels. We argue that hard labels limit the model capability on generalization due to the label sparseness problem. In this paper, we propose a robust training method for MRC models to address this problem. Our method consists of three strategies, 1) label smoothing, 2) word overlapping, 3) distribution prediction. All of them help to train models on soft labels. We validate our approach on the representative architecture - ALBERT. Experimental results show that our method can greatly boost the baseline with 1% improvement in average, and achieve state-of-the-art performance on NewsQA and QUOREF.</abstract>
       <url hash="193d47b6">2020.coling-main.248</url>
@@ -3029,11 +3029,11 @@
     </paper>
     <paper id="249">
       <title>Reinforced Multi-task Approach for Multi-hop Question Generation</title>
-      <author><first>Deepak</first><last>Gupta</last></author>
+      <author id="deepak-gupta"><first>Deepak</first><last>Gupta</last></author>
       <author><first>Hardik</first><last>Chauhan</last></author>
       <author><first>Ravi Tej</first><last>Akella</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>2760–2775</pages>
       <abstract>Question generation (QG) attempts to solve the inverse of question answering (QA) problem by generating a natural language question given a document and an answer. While sequence to sequence neural models surpass rule-based systems for QG, they are limited in their capacity to focus on more than one supporting fact. For QG, we often require multiple supporting facts to generate high-quality questions. Inspired by recent works on multi-hop reasoning in QA, we take up Multi-hop question generation, which aims at generating relevant questions based on supporting facts in the context. We employ multitask learning with the auxiliary task of answer-aware supporting fact prediction to guide the question generator. In addition, we also proposed a question-aware reward function in a Reinforcement Learning (RL) framework to maximize the utilization of the supporting facts. We demonstrate the effectiveness of our approach through experiments on the multi-hop question answering dataset, HotPotQA. Empirical evaluation shows our model to outperform the single-hop neural question generation models on both automatic evaluation metrics such as BLEU, METEOR, and ROUGE and human evaluation metrics for quality and coverage of the generated questions.</abstract>
       <url hash="d1b4f202">2020.coling-main.249</url>
@@ -3084,7 +3084,7 @@
       <author><first>Aparna</first><last>Garimella</last></author>
       <author><first>Carmen</first><last>Banea</last></author>
       <author><first>Nabil</first><last>Hossain</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>2814–2825</pages>
       <abstract>The subjective nature of humor makes computerized humor generation a challenging task. We propose an automatic humor generation framework for filling the blanks in Mad Libs® stories, while accounting for the demographic backgrounds of the desired audience. We collect a dataset consisting of such stories, which are filled in and judged by carefully selected workers on Amazon Mechanical Turk. We build upon the BERT platform to predict location-biased word fillings in incomplete sentences, and we fine-tune BERT to classify location-specific humor in a sentence. We leverage these components to produce YodaLib, a fully-automated Mad Libs style humor generation framework, which selects and ranks appropriate candidate words and sentences in order to generate a coherent and funny story tailored to certain demographics. Our experimental results indicate that YodaLib outperforms a previous semi-automated approach proposed for this task, while also surpassing human annotators in both qualitative and quantitative analyses.</abstract>
       <url hash="5fdb2dea">2020.coling-main.253</url>
@@ -3105,7 +3105,7 @@
     <paper id="255">
       <title>Noise Isn’t Always Negative: Countering Exposure Bias in Sequence-to-Sequence Inflection Models</title>
       <author><first>Garrett</first><last>Nicolai</last></author>
-      <author><first>Miikka</first><last>Silfverberg</last></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last></author>
       <pages>2837–2846</pages>
       <abstract>Morphological inflection, like many sequence-to-sequence tasks, sees great performance from recurrent neural architectures when data is plentiful, but performance falls off sharply in lower-data settings. We investigate one aspect of neural seq2seq models that we hypothesize contributes to overfitting - teacher forcing. By creating different training and test conditions, exposure bias increases the likelihood that a system too closely models its training data. Experiments show that teacher-forced models struggle to recover when they enter unknown territory. However, a simple modification to the training algorithm to more closely mimic test conditions creates models that are better able to generalize to unseen environments.</abstract>
       <url hash="57201e29">2020.coling-main.255</url>
@@ -3137,7 +3137,7 @@
     <paper id="258">
       <title>Computational Modeling of Affixoid Behavior in <fixed-case>C</fixed-case>hinese Morphology</title>
       <author><first>Yu-Hsiang</first><last>Tseng</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <author><first>Pei-Yi</first><last>Chen</last></author>
       <author><first>Sara</first><last>Court</last></author>
       <pages>2879–2888</pages>
@@ -3173,7 +3173,7 @@
     <paper id="261">
       <title>Autoregressive Affective Language Forecasting: A Self-Supervised Task</title>
       <author><first>Matthew</first><last>Matero</last></author>
-      <author><first>H. Andrew</first><last>Schwartz</last></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last></author>
       <pages>2913–2923</pages>
       <abstract>Human natural language is mentioned at a specific point in time while human emotions change over time. While much work has established a strong link between language use and emotional states, few have attempted to model emotional language in time. Here, we introduce the task of <i>affective language forecasting</i> – predicting future change in language based on past changes of language, a task with real-world applications such as treating mental health or forecasting trends in consumer confidence. We establish some of the fundamental autoregressive characteristics of the task (necessary history size, static versus dynamic length, varying time-step resolutions) and then build on popular sequence models for <i>words</i> to instead model sequences of <i>language-based emotion in time</i>. Over a novel Twitter dataset of 1,900 users and weekly + daily scores for 6 emotions and 2 additional linguistic attributes, we find a novel dual-sequence GRU model with decayed hidden states achieves best results (<tex-math>r = .66</tex-math>) significantly out-predicting, e.g., a moving averaging based on the past time-steps (<tex-math>r = .49</tex-math>). We make our anonymized dataset as well as task setup and evaluation code available for others to build on.</abstract>
       <url hash="12f5f064">2020.coling-main.261</url>
@@ -3194,7 +3194,7 @@
       <title>End to End <fixed-case>C</fixed-case>hinese Lexical Fusion Recognition with Sememe Knowledge</title>
       <author><first>Yijiang</first><last>Liu</last></author>
       <author><first>Meishan</first><last>Zhang</last></author>
-      <author><first>Donghong</first><last>Ji</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
       <pages>2935–2946</pages>
       <abstract>In this paper, we present Chinese lexical fusion recognition, a new task which could be regarded as one kind of coreference recognition. First, we introduce the task in detail, showing the relationship with coreference recognition and differences from the existing tasks. Second, we propose an end-to-end model for the task, handling mentions as well as coreference relationship jointly. The model exploits the state-of-the-art contextualized BERT representations as an encoder, and is further enhanced with the sememe knowledge from HowNet by graph attention networks. We manually annotate a benchmark dataset for the task and then conduct experiments on it. Results demonstrate that our final model is effective and competitive for the task. Detailed analysis is offered for comprehensively understanding the new task and our proposed model.</abstract>
       <url hash="71ce220e">2020.coling-main.263</url>
@@ -3271,7 +3271,7 @@
       <title>When Beards Start Shaving Men: A Subject-object Resolution Test Suite for Morpho-syntactic and Semantic Model Introspection</title>
       <author><first>Patricia</first><last>Fischer</last></author>
       <author><first>Daniël</first><last>de Kok</last></author>
-      <author><first>Erhard</first><last>Hinrichs</last></author>
+      <author id="erhard-hinrichs"><first>Erhard</first><last>Hinrichs</last></author>
       <pages>3019–3035</pages>
       <abstract>In this paper, we introduce the SORTS Subject-Object Resolution Test Suite of German minimal sentence pairs for model introspection. The full test suite consists of 18,502 transitive clauses with manual annotations of 8 word order patterns, 5 morphological and syntactic and 11 semantic property classes. The test suite has been constructed such that sentences are minimal pairs with respect to a property class. Each property has been selected with a particular focus on its effect on subject-object resolution, the second-most error-prone task within syntactic parsing of German after prepositional phrase attachment (Fischer et al., 2019). The size and detail of annotations make the test suite a valuable resource for natural language processing applications with syntactic and semantic tasks. We use dependency parsing to demonstrate how the test suite allows insights into the process of subject-object resolution. Based on the test suite annotations, word order and case syncretism can be identified as most important factors that affect subject-object resolution.</abstract>
       <url hash="df5d77d1">2020.coling-main.269</url>
@@ -3297,7 +3297,7 @@
       <author><first>Shuhei</first><last>Kondo</last></author>
       <author><first>Hiroyuki</first><last>Shindo</last></author>
       <author><first>Taro</first><last>Watanabe</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>3043–3049</pages>
       <abstract>We propose a simple method for nominal coordination boundary identification. As the main strength of our method, it can identify the coordination boundaries without training on labeled data, and can be applied even if coordination structure annotations are not available. Our system employs pre-trained word embeddings to measure the similarities of words and detects the span of coordination, assuming that conjuncts share syntactic and semantic similarities. We demonstrate that our method yields good results in identifying coordinated noun phrases in the GENIA corpus and is comparable to a recent supervised method for the case when the coordinator conjoins simple noun phrases.</abstract>
       <url hash="90c56d9f">2020.coling-main.271</url>
@@ -3308,7 +3308,7 @@
       <title>Learning Semantic Correspondences from Noisy Data-text Pairs by Local-to-Global Alignments</title>
       <author><first>Feng</first><last>Nie</last></author>
       <author><first>Jinpeng</first><last>Wang</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <pages>3050–3059</pages>
       <abstract>Learning semantic correspondences between structured input data (e.g., slot-value pairs) and associated texts is a core problem for many downstream NLP applications, e.g., data-to-text generation. Large-scale datasets recently proposed for generation contain loosely corresponding data text pairs, where part of spans in text cannot be aligned to its incomplete paired input. To learn semantic correspondences from such datasets, we propose a two-stage local-to-global alignment (L2GA) framework. First, a local model based on multi-instance learning is applied to build alignments for texts spans that can be directly grounded to its paired structured input. Then, a novel global model built upon a memory-guided conditional random field (CRF) layer aims to infer missing alignments for text spans which not supported by paired incomplete inputs, where the memory is designed to leverage alignment clues provided by the local model to strengthen the global model. In this way, the local model and global model can work jointly to learn semantic correspondences in the same framework. Experimental results show that our proposed method can be generalized to both restaurant and computer domains and improve the alignment accuracy.</abstract>
       <url hash="cab5d260">2020.coling-main.272</url>
@@ -3319,7 +3319,7 @@
       <title>Definition Frames: Using Definitions for Hybrid Concept Representations</title>
       <author><first>Evangelia</first><last>Spiliopoulou</last></author>
       <author><first>Artidoro</first><last>Pagnoni</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>3060–3068</pages>
       <abstract>Advances in word representations have shown tremendous improvements in downstream NLP tasks, but lack semantic interpretability. In this paper, we introduce Definition Frames (DF), a matrix distributed representation extracted from definitions, where each dimension is semantically interpretable. DF dimensions correspond to the Qualia structure relations: a set of relations that uniquely define a term. Our results show that DFs have competitive performance with other distributional semantic approaches on word similarity tasks.</abstract>
       <url hash="b9b53905">2020.coling-main.273</url>
@@ -3334,7 +3334,7 @@
       <author><first>Daniela</first><last>Stepanov</last></author>
       <author><first>Hangfeng</first><last>He</last></author>
       <author><first>Dan</first><last>Roth</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <author><first>Ido</first><last>Dagan</last></author>
       <pages>3069–3083</pages>
       <abstract>We propose a new semantic scheme for capturing predicate-argument relations for nominalizations, termed QANom. This scheme extends the QA-SRL formalism (He et al., 2015), modeling the relations between nominalizations and their arguments via natural language question-answer pairs. We construct the first QANom dataset using controlled crowdsourcing, analyze its quality and compare it to expertly annotated nominal-SRL annotations, as well as to other QA-driven annotations. In addition, we train a baseline QANom parser for identifying nominalizations and labeling their arguments with question-answer pairs. Finally, we demonstrate the extrinsic utility of our annotations for downstream tasks using both indirect supervision and zero-shot settings.</abstract>
@@ -3348,7 +3348,7 @@
       <author><first>Xiaolong</first><last>Jin</last></author>
       <author><first>Saiping</first><last>Guan</last></author>
       <author><first>Jiafeng</first><last>Guo</last></author>
-      <author><first>Xueqi</first><last>Cheng</last></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last></author>
       <pages>3084–3094</pages>
       <abstract>Event coreference resolution aims to classify all event mentions that refer to the same real-world event into the same group, which is necessary to information aggregation and many downstream applications. To resolve event coreference, existing methods usually calculate the similarities between event mentions and between specific kinds of event arguments. However, they fail to accurately identify paraphrase relations between events and may suffer from error propagation while extracting event components (i.e., event mentions and their arguments). Therefore, we propose a new model based on Event-specific Paraphrases and Argument-aware Semantic Embeddings, thus called EPASE, for event coreference resolution. EPASE recognizes deep paraphrase relations in an event-specific context of sentences and can cover event paraphrases of more situations, bringing about a better generalization. Additionally, the embeddings of argument roles are encoded into event embedding without relying on a fixed number and type of arguments, which results in the better scalability of EPASE. Experiments on both within- and cross-document event coreference demonstrate its consistent and significant superiority compared to existing methods.</abstract>
       <url hash="5dfeefea">2020.coling-main.275</url>
@@ -3360,7 +3360,7 @@
       <author><first>Irina</first><last>Nikishina</last></author>
       <author><first>Varvara</first><last>Logacheva</last></author>
       <author><first>Alexander</first><last>Panchenko</last></author>
-      <author><first>Natalia</first><last>Loukachevitch</last></author>
+      <author id="natalia-loukachevitch"><first>Natalia</first><last>Loukachevitch</last></author>
       <pages>3095–3106</pages>
       <abstract>Ontologies, taxonomies, and thesauri have always been in high demand in a large number of NLP tasks. However, most studies are focused on the creation of lexical resources rather than the maintenance of the existing ones and keeping them up-to-date. In this paper, we address the problem of taxonomy enrichment. Namely, we explore the possibilities of taxonomy extension in a resource-poor setting and present several methods which are applicable to a large number of languages. We also create novel English and Russian datasets for training and evaluating taxonomy enrichment systems and describe a technique of creating such datasets for other languages.</abstract>
       <url hash="2c9537f6">2020.coling-main.276</url>
@@ -3434,7 +3434,7 @@
       <author><first>Yu</first><last>Hong</last></author>
       <author><first>Yang</first><last>Xu</last></author>
       <author><first>Zhen</first><last>Huang</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <author><first>Min</first><last>Zhang</last></author>
       <pages>3168–3178</pages>
       <abstract>We tackle implicit discourse relation recognition. Both self-attention and interactive-attention mechanisms have been applied for attention-aware representation learning, which improves the current discourse analysis models. To take advantages of the two attention mechanisms simultaneously, we develop a propagative attention learning model using a cross-coupled two-channel network. We experiment on Penn Discourse Treebank. The test results demonstrate that our model yields substantial improvements over the baselines (BiLSTM and BERT).</abstract>
@@ -3444,15 +3444,15 @@
     </paper>
     <paper id="283">
       <title>Dual Attention Model for Citation Recommendation</title>
-      <author id="yang-zhang"><first>Yang</first><last>Zhang</last></author>
+      <author><first>Yang</first><last>Zhang</last></author>
       <author><first>Qiang</first><last>Ma</last></author>
       <pages>3179–3189</pages>
       <abstract>Based on an exponentially increasing number of academic articles, discovering and citing comprehensive and appropriate resources has become a non-trivial task. Conventional citation recommender methods suffer from severe information loss. For example, they do not consider the section of the paper that the user is writing and for which they need to find a citation, the relatedness between the words in the local context (the text span that describes a citation), or the importance on each word from the local context. These shortcomings make such methods insufficient for recommending adequate citations to academic manuscripts. In this study, we propose a novel embedding-based neural network called “dual attention model for citation recommendation (DACR)” to recommend citations during manuscript preparation. Our method adapts embedding of three semantic information: words in the local context, structural contexts, and the section on which a user is working. A neural network model is designed to maximize the similarity between the embedding of the three input (local context words, section and structural contexts) and the target citation appearing in the context. The core of the neural network model is composed of self-attention and additive attention, where the former aims to capture the relatedness between the contextual words and structural context, and the latter aims to learn the importance of them. The experiments on real-world datasets demonstrate the effectiveness of the proposed approach.</abstract>
       <url hash="6ae8323c">2020.coling-main.283</url>
       <revision id="1" href="2020.coling-main.283v1" hash="dd80be1b"/>
       <revision id="2" href="2020.coling-main.283v2" hash="67f79a24" date="2021-01-01">This revision corrects a mistake in Section 5.3, correcting "200 iterations" to "300 iterations".</revision>
-      <doi>10.18653/v1/2020.coling-main.283</doi>
       <revision id="3" href="2020.coling-main.283v3" hash="6ae8323c" date="2021-04-16">Minor correction to Section 5.3</revision>
+      <doi>10.18653/v1/2020.coling-main.283</doi>
       <bibkey>zhang-ma-2020-dual</bibkey>
     </paper>
     <paper id="284">
@@ -3494,7 +3494,7 @@
       <author><first>Yihuan</first><last>Mao</last></author>
       <author><first>Yujing</first><last>Wang</last></author>
       <author><first>Chufan</first><last>Wu</last></author>
-      <author id="chen-zhang"><first>Chen</first><last>Zhang</last></author>
+      <author><first>Chen</first><last>Zhang</last></author>
       <author><first>Yang</first><last>Wang</last></author>
       <author><first>Quanlu</first><last>Zhang</last></author>
       <author><first>Yaming</first><last>Yang</last></author>
@@ -3561,7 +3561,7 @@
       <title>What Does This Acronym Mean? Introducing a New Dataset for Acronym Identification and Disambiguation</title>
       <author><first>Amir</first><last>Pouran Ben Veyseh</last></author>
       <author><first>Franck</first><last>Dernoncourt</last></author>
-      <author><first>Quan Hung</first><last>Tran</last></author>
+      <author id="quan-hung-tran"><first>Quan Hung</first><last>Tran</last></author>
       <author><first>Thien Huu</first><last>Nguyen</last></author>
       <pages>3285–3301</pages>
       <abstract>Acronyms are the short forms of phrases that facilitate conveying lengthy sentences in documents and serve as one of the mainstays of writing. Due to their importance, identifying acronyms and corresponding phrases (i.e., acronym identification (AI)) and finding the correct meaning of each acronym (i.e., acronym disambiguation (AD)) are crucial for text understanding. Despite the recent progress on this task, there are some limitations in the existing datasets which hinder further improvement. More specifically, limited size of manually annotated AI datasets or noises in the automatically created acronym identification datasets obstruct designing advanced high-performing acronym identification models. Moreover, the existing datasets are mostly limited to the medical domain and ignore other domains. In order to address these two limitations, we first create a manually annotated large AI dataset for scientific domain. This dataset contains 17,506 sentences which is substantially larger than previous scientific AI datasets. Next, we prepare an AD dataset for scientific domain with 62,441 samples which is significantly larger than previous scientific AD dataset. Our experiments show that the existing state-of-the-art models fall far behind human-level performance on both datasets proposed by this work. In addition, we propose a new deep learning model which utilizes the syntactical structure of the sentence to expand an ambiguous acronym in a sentence. The proposed model outperforms the state-of-the-art models on the new AD dataset, providing a strong baseline for future research on this dataset.</abstract>
@@ -3585,9 +3585,9 @@
     <paper id="294">
       <title>Temporal Relations Annotation and Extrapolation Based on Semi-intervals and Boundig Relations</title>
       <author><first>Alejandro</first><last>Pimentel</last></author>
-      <author><first>Gemma</first><last>Bel Enguix</last></author>
+      <author id="gemma-bel-enguix"><first>Gemma</first><last>Bel Enguix</last></author>
       <author><first>Gerardo</first><last>Sierra Martínez</last></author>
-      <author><first>Azucena</first><last>Montes</last></author>
+      <author id="azucena-montes-rendon"><first>Azucena</first><last>Montes</last></author>
       <pages>3313–3323</pages>
       <abstract>The computational treatment of temporal relations is based on the work of Allen, who establishes 13 different types, and Freksa, who designs a cognitive procedure to manage them. Freksa’s notation is not widely used because, although it has cognitive and expressive advantages, it is too complex from the computational perspective. This paper proposes a system for the annotation and management of temporal relations that combines the richness and expressiveness of Freksa’s approach with the simplicity of Allen’s notation. Our method is summarized in the application of bounding relations, thanks to which it is possible to obtain the temporary representation of complete neighborhoods capable of representing vague temporal relations such as those that can be frequently found in a text. Such advantages are obtained without the need to greatly increase the complexity of the labeling process since the markup language is almost the same as TimeML, to which only a second temporary “relType”’ type label relationship is added. Our experiments show that the temporal relationships that present vagueness are in fact much more common than those in which a single relationship can be established precisely. For these reasons, our new labeling system achieves a more agreeable representation of temporal relations.</abstract>
       <url hash="19ca0684">2020.coling-main.294</url>
@@ -3619,7 +3619,7 @@
     <paper id="297">
       <title>An Unsupervised Method for Learning Representations of Multi-word Expressions for Semantic Classification</title>
       <author><first>Robert</first><last>Vacareanu</last></author>
-      <author><first>Marco A.</first><last>Valenzuela-Escárcega</last></author>
+      <author id="marco-a-valenzuela-escarcega"><first>Marco A.</first><last>Valenzuela-Escárcega</last></author>
       <author><first>Rebecca</first><last>Sharp</last></author>
       <author><first>Mihai</first><last>Surdeanu</last></author>
       <pages>3346–3356</pages>
@@ -3676,7 +3676,7 @@
       <title>Collective Wisdom: Improving Low-resource Neural Machine Translation using Adaptive Knowledge Distillation</title>
       <author><first>Fahimeh</first><last>Saleh</last></author>
       <author><first>Wray</first><last>Buntine</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>3413–3421</pages>
       <abstract>Scarcity of parallel sentence-pairs poses a significant hurdle for training high-quality Neural Machine Translation (NMT) models in bilingually low-resource scenarios. A standard approach is transfer learning, which involves taking a model trained on a high-resource language-pair and fine-tuning it on the data of the low-resource MT condition of interest. However, it is not clear generally which high-resource language-pair offers the best transfer learning for the target MT setting. Furthermore, different transferred models may have complementary semantic and/or syntactic strengths, hence using only one model may be sub-optimal. In this paper, we tackle this problem using knowledge distillation, where we propose to distill the knowledge of ensemble of teacher models to a single student model. As the quality of these teacher models varies, we propose an effective adaptive knowledge distillation approach to dynamically adjust the contribution of the teacher models during the distillation process. Experiments on transferring from a collection of six language pairs from IWSLT to five low-resource language-pairs from TED Talks demonstrate the effectiveness of our approach, achieving up to +0.9 BLEU score improvements compared to strong baselines.</abstract>
       <url hash="230256c3">2020.coling-main.302</url>
@@ -3687,7 +3687,7 @@
       <title>Enabling Interactive Transcription in an Indigenous Community</title>
       <author><first>Eric</first><last>Le Ferrand</last></author>
       <author><first>Steven</first><last>Bird</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <pages>3422–3428</pages>
       <abstract>We propose a novel transcription workflow which combines spoken term detection and human-in-the-loop, together with a pilot experiment. This work is grounded in an almost zero-resource scenario where only a few terms have so far been identified, involving two endangered languages. We show that in the early stages of transcription, when the available data is insufficient to train a robust ASR system, it is possible to take advantage of the transcription of a small number of isolated words in order to bootstrap the transcription of a speech collection.</abstract>
       <url hash="1dc7d671">2020.coling-main.303</url>
@@ -3710,7 +3710,7 @@
       <author><first>Congying</first><last>Xia</last></author>
       <author><first>Wenpeng</first><last>Yin</last></author>
       <author><first>Tingting</first><last>Liang</last></author>
-      <author><first>Philip</first><last>Yu</last></author>
+      <author id="philip-s-yu"><first>Philip</first><last>Yu</last></author>
       <author><first>Lifang</first><last>He</last></author>
       <pages>3436–3440</pages>
       <abstract>Mixup is a latest data augmentation technique that linearly interpolates input examples and the corresponding labels. It has shown strong effectiveness in image classification by interpolating images at the pixel level. Inspired by this line of research, in this paper, we explore i) how to apply mixup to natural language processing tasks since text data can hardly be mixed in the raw format; ii) if mixup is still effective in transformer-based learning models,e.g., BERT.To achieve the goal, we incorporate mixup to transformer-based pre-trained architecture, named“mixup-transformer”, for a wide range of NLP tasks while keeping the whole end-to-end training system. We evaluate the proposed framework by running extensive experiments on the GLUEbenchmark. Furthermore, we also examine the performance of mixup-transformer in low-resource scenarios by reducing the training data with a certain ratio. Our studies show that mixup is a domain-independent data augmentation technique to pre-trained language models, resulting in significant performance improvement for transformer-based models.</abstract>
@@ -3723,7 +3723,7 @@
       <author><first>Giwon</first><last>Hong</last></author>
       <author><first>Junmo</first><last>Kang</last></author>
       <author><first>Doyeon</first><last>Lim</last></author>
-      <author><first>Sung-Hyon</first><last>Myaeng</last></author>
+      <author id="sung-hyon-myaeng"><first>Sung-Hyon</first><last>Myaeng</last></author>
       <pages>3441–3448</pages>
       <abstract>Advances in Question Answering (QA) research require additional datasets for new domains, languages, and types of questions, as well as for performance increases. Human creation of a QA dataset like SQuAD, however, is expensive. As an alternative, an unsupervised QA approach has been proposed so that QA training data can be generated automatically. However, the performance of unsupervised QA is much lower than that of supervised QA models. We identify two anomalies in the automatically generated questions and propose how they can be mitigated. We show our approach helps improve unsupervised QA significantly across a number of QA tasks.</abstract>
       <url hash="4d0a06d6">2020.coling-main.306</url>
@@ -3735,7 +3735,7 @@
       <author><first>Jheng-Hong</first><last>Yang</last></author>
       <author><first>Sheng-Chieh</first><last>Lin</last></author>
       <author><first>Rodrigo</first><last>Nogueira</last></author>
-      <author><first>Ming-Feng</first><last>Tsai</last></author>
+      <author id="ming-feng-tsai"><first>Ming-Feng</first><last>Tsai</last></author>
       <author><first>Chuan-Ju</first><last>Wang</last></author>
       <author><first>Jimmy</first><last>Lin</last></author>
       <pages>3449–3453</pages>
@@ -3758,8 +3758,8 @@
     </paper>
     <paper id="309">
       <title>Using Bilingual Patents for Translation Training</title>
-      <author><first>John</first><last>Lee</last></author>
-      <author><first>Benjamin</first><last>Tsou</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
+      <author id="benjamin-k-tsou"><first>Benjamin</first><last>Tsou</last></author>
       <author><first>Tianyuan</first><last>Cai</last></author>
       <pages>3461–3466</pages>
       <abstract>While bilingual corpora have been instrumental for machine translation, their utility for training translators has been less explored. We investigate the use of bilingual corpora as pedagogical tools for translation in the technical domain. In a user study, novice translators revised Chinese translations of English patents through bilingual concordancing. Results show that concordancing with an in-domain bilingual corpus can yield greater improvement in translation quality of technical terms than a general-domain bilingual corpus.</abstract>
@@ -3816,7 +3816,7 @@
       <author><first>Changhan</first><last>Wang</last></author>
       <author><first>Jiatao</first><last>Gu</last></author>
       <author><first>Didier</first><last>Schwab</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <pages>3520–3533</pages>
       <abstract>We introduce dual-decoder Transformer, a new model architecture that jointly performs automatic speech recognition (ASR) and multilingual speech translation (ST). Our models are based on the original Transformer architecture (Vaswani et al., 2017) but consist of two decoders, each responsible for one task (ASR or ST). Our major contribution lies in how these decoders interact with each other: one decoder can attend to different information sources from the other via a dual-attention mechanism. We propose two variants of these architectures corresponding to two different levels of dependencies between the decoders, called the parallel and cross dual-decoder Transformers, respectively. Extensive experiments on the MuST-C dataset show that our models outperform the previously-reported highest translation performance in the multilingual settings, and outperform as well bilingual one-to-one results. Furthermore, our parallel models demonstrate no trade-off between ASR and ST compared to the vanilla multi-task architecture. Our code and pre-trained models are available at <url>https://github.com/formiel/speech-translation</url>.</abstract>
       <url hash="66494499">2020.coling-main.314</url>
@@ -3826,7 +3826,7 @@
     <paper id="315">
       <title>Multitask Learning-Based Neural Bridging Reference Resolution</title>
       <author><first>Juntao</first><last>Yu</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>3534–3546</pages>
       <abstract>We propose a multi task learning-based neural model for resolving bridging references tackling two key challenges. The first challenge is the lack of large corpora annotated with bridging references. To address this, we use multi-task learning to help bridging reference resolution with coreference resolution. We show that substantial improvements of up to 8 p.p. can be achieved on full bridging resolution with this architecture. The second challenge is the different definitions of bridging used in different corpora, meaning that hand-coded systems or systems using special features designed for one corpus do not work well with other corpora. Our neural model only uses a small number of corpus independent features, thus can be applied to different corpora. Evaluations with very different bridging corpora (ARRAU, ISNOTES, BASHI and SCICORP) suggest that our architecture works equally well on all corpora, and achieves the SoTA results on full bridging resolution for all corpora, outperforming the best reported results by up to 36.3 p.p..</abstract>
       <url hash="6f1b6a65">2020.coling-main.315</url>
@@ -3840,7 +3840,7 @@
       <author><first>Christopher</first><last>Klein</last></author>
       <author><first>Mayank</first><last>Gupta</last></author>
       <author><first>William</first><last>Li</last></author>
-      <author><first>Jason D.</first><last>Williams</last></author>
+      <author id="jason-d-williams"><first>Jason D.</first><last>Williams</last></author>
       <pages>3547–3557</pages>
       <abstract>This paper develops and implements a scalable methodology for (a) estimating the noisiness of labels produced by a typical crowdsourcing semantic annotation task, and (b) reducing the resulting error of the labeling process by as much as 20-30% in comparison to other common labeling strategies. Importantly, this new approach to the labeling process, which we name Dynamic Automatic Conflict Resolution (DACR), does not require a ground truth dataset and is instead based on inter-project annotation inconsistencies. This makes DACR not only more accurate but also available to a broad range of labeling tasks. In what follows we present results from a text classification task performed at scale for a commercial personal assistant, and evaluate the inherent ambiguity uncovered by this annotation strategy as compared to other common labeling strategies.</abstract>
       <url hash="267e2b91">2020.coling-main.316</url>
@@ -3850,8 +3850,8 @@
     <paper id="317">
       <title>Automatic Discovery of Heterogeneous Machine Learning Pipelines: An Application to Natural Language Processing</title>
       <author><first>Suilan</first><last>Estevez-Velarde</last></author>
-      <author><first>Yoan</first><last>Gutiérrez</last></author>
-      <author><first>Andres</first><last>Montoyo</last></author>
+      <author id="yoan-gutierrez"><first>Yoan</first><last>Gutiérrez</last></author>
+      <author id="andres-montoyo"><first>Andres</first><last>Montoyo</last></author>
       <author><first>Yudivián</first><last>Almeida Cruz</last></author>
       <pages>3558–3568</pages>
       <abstract>This paper presents AutoGOAL, a system for automatic machine learning (AutoML) that uses heterogeneous techniques. In contrast with existing AutoML approaches, our contribution can automatically build machine learning pipelines that combine techniques and algorithms from different frameworks, including shallow classifiers, natural language processing tools, and neural networks. We define the heterogeneous AutoML optimization problem as the search for the best sequence of algorithms that transforms specific input data into the desired output. This provides a novel theoretical and practical approach to AutoML. Our proposal is experimentally evaluated in diverse machine learning problems and compared with alternative approaches, showing that it is competitive with other AutoML alternatives in standard benchmarks. Furthermore, it can be applied to novel scenarios, such as several NLP tasks, where existing alternatives cannot be directly deployed. The system is freely available and includes in-built compatibility with a large number of popular machine learning frameworks, which makes our approach useful for solving practical problems with relative ease and effort.</abstract>
@@ -3933,7 +3933,7 @@
       <title>Increasing Learning Efficiency of Self-Attention Networks through Direct Position Interactions, Learnable Temperature, and Convoluted Attention</title>
       <author><first>Philipp</first><last>Dufter</last></author>
       <author><first>Martin</first><last>Schmitt</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>3630–3636</pages>
       <abstract>Self-Attention Networks (SANs) are an integral part of successful neural architectures such as Transformer (Vaswani et al., 2017), and thus of pretrained language models such as BERT (Devlin et al., 2019) or GPT-3 (Brown et al., 2020). Training SANs on a task or pretraining them on language modeling requires large amounts of data and compute resources. We are searching for modifications to SANs that enable faster learning, i.e., higher accuracies after fewer update steps. We investigate three modifications to SANs: direct position interactions, learnable temperature, and convoluted attention. When evaluating them on part-of-speech tagging, we find that direct position interactions are an alternative to position embeddings, and convoluted attention has the potential to speed up the learning process.</abstract>
       <url hash="1a4a1834">2020.coling-main.324</url>
@@ -3943,7 +3943,7 @@
     <paper id="325">
       <title>Picking <fixed-case>BERT</fixed-case>’s Brain: Probing for Linguistic Dependencies in Contextualized Embeddings Using Representational Similarity Analysis</title>
       <author><first>Michael</first><last>Lepori</last></author>
-      <author><first>R. Thomas</first><last>McCoy</last></author>
+      <author id="r-thomas-mccoy"><first>R. Thomas</first><last>McCoy</last></author>
       <pages>3637–3651</pages>
       <abstract>As the name implies, contextualized representations of language are typically motivated by their ability to encode context. Which aspects of context are captured by such representations? We introduce an approach to address this question using Representational Similarity Analysis (RSA). As case studies, we investigate the degree to which a verb embedding encodes the verb’s subject, a pronoun embedding encodes the pronoun’s antecedent, and a full-sentence representation encodes the sentence’s head word (as determined by a dependency parse). In all cases, we show that BERT’s contextualized embeddings reflect the linguistic dependency being studied, and that BERT encodes these dependencies to a greater degree than it encodes less linguistically-salient controls. These results demonstrate the ability of our approach to adjudicate between hypotheses about which aspects of context are encoded in representations of language.</abstract>
       <url hash="be244c3d">2020.coling-main.325</url>
@@ -3969,7 +3969,7 @@
       <author><first>Xipeng</first><last>Qiu</last></author>
       <author><first>Qipeng</first><last>Guo</last></author>
       <author><first>Yaru</first><last>Hu</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <author><first>Zheng</first><last>Zhang</last></author>
       <pages>3660–3670</pages>
       <abstract>With the emerging branch of incorporating factual knowledge into pre-trained language models such as BERT, most existing models consider shallow, static, and separately pre-trained entity embeddings, which limits the performance gains of these models. Few works explore the potential of deep contextualized knowledge representation when injecting knowledge. In this paper, we propose the Contextualized Language and Knowledge Embedding (CoLAKE), which jointly learns contextualized representation for both language and knowledge with the extended MLM objective. Instead of injecting only entity embeddings, CoLAKE extracts the knowledge context of an entity from large-scale knowledge bases. To handle the heterogeneity of knowledge context and language context, we integrate them in a unified data structure, word-knowledge graph (WK graph). CoLAKE is pre-trained on large-scale WK graphs with the modified Transformer encoder. We conduct experiments on knowledge-driven tasks, knowledge probing tasks, and language understanding tasks. Experimental results show that CoLAKE outperforms previous counterparts on most of the tasks. Besides, CoLAKE achieves surprisingly high performance on our synthetic task called word-knowledge graph completion, which shows the superiority of simultaneously contextualizing language and knowledge representation.</abstract>
@@ -3993,7 +3993,7 @@
       <author><first>Ran</first><last>Wang</last></author>
       <author><first>Kun</first><last>Tao</last></author>
       <author><first>Jiali</first><last>Zeng</last></author>
-      <author><first>Xinyu</first><last>Dai</last></author>
+      <author id="xinyu-dai"><first>Xinyu</first><last>Dai</last></author>
       <pages>3684–3695</pages>
       <abstract>Machine reading comprehension (MRC) is the task that asks a machine to answer questions based on a given context. For Chinese MRC, due to the non-literal and non-compositional semantic characteristics, Chinese idioms pose unique challenges for machines to understand. Previous studies tend to treat idioms separately without fully exploiting the relationship among them. In this paper, we first define the concept of literal meaning coverage to measure the consistency between semantics and literal meanings for Chinese idioms. With the definition, we prove that the literal meanings of many idioms are far from their semantics, and we also verify that the synonymic relationship can mitigate this inconsistency, which would be beneficial for idiom comprehension. Furthermore, to fully utilize the synonymic relationship, we propose the synonym knowledge enhanced reader. Specifically, for each idiom, we first construct a synonym graph according to the annotations from the high-quality synonym dictionary or the cosine similarity between the pre-trained idiom embeddings and then incorporate the graph attention network and gate mechanism to encode the graph. Experimental results on ChID, a large-scale Chinese idiom reading comprehension dataset, show that our model achieves state-of-the-art performance.</abstract>
       <url hash="26a2b26f">2020.coling-main.329</url>
@@ -4005,7 +4005,7 @@
       <author><first>Haonan</first><last>Li</last></author>
       <author><first>Maria</first><last>Vasardani</last></author>
       <author><first>Martin</first><last>Tomko</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>3696–3707</pages>
       <abstract>Existing metonymy resolution approaches rely on features extracted from external resources like dictionaries and hand-crafted lexical resources. In this paper, we propose an end-to-end word-level classification approach based only on BERT, without dependencies on taggers, parsers, curated dictionaries of place names, or other external resources. We show that our approach achieves the state-of-the-art on 5 datasets, surpassing conventional BERT models and benchmarks by a large margin. We also show that our approach generalises well to unseen data.</abstract>
       <url hash="e7833d8a">2020.coling-main.330</url>
@@ -4153,7 +4153,7 @@
     </paper>
     <paper id="343">
       <title>An Analysis of Simple Data Augmentation for Named Entity Recognition</title>
-      <author><first>Xiang</first><last>Dai</last></author>
+      <author id="xiang-dai"><first>Xiang</first><last>Dai</last></author>
       <author><first>Heike</first><last>Adel</last></author>
       <pages>3861–3867</pages>
       <abstract>Simple yet effective data augmentation techniques have been proposed for sentence-level and sentence-pair natural language processing tasks. Inspired by these efforts, we design and compare data augmentation for named entity recognition, which is usually modeled as a token-level sequence labeling problem. Through experiments on two data sets from the biomedical and materials science domains (i2b2-2010 and MaSciP), we show that simple augmentation can boost performance for both recurrent and transformer-based models, especially for small training sets.</abstract>
@@ -4175,7 +4175,7 @@
       <title>Towards Instance-Level Parser Selection for Cross-Lingual Transfer of Dependency Parsers</title>
       <author><first>Robert</first><last>Litschko</last></author>
       <author><first>Ivan</first><last>Vulić</last></author>
-      <author><first>Željko</first><last>Agić</last></author>
+      <author id="zeljko-agic"><first>Željko</first><last>Agić</last></author>
       <author><first>Goran</first><last>Glavaš</last></author>
       <pages>3886–3898</pages>
       <abstract>Current methods of cross-lingual parser transfer focus on predicting the best parser for a low-resource target language globally, that is, “at treebank level”. In this work, we propose and argue for a novel cross-lingual transfer paradigm: instance-level parser selection (ILPS), and present a proof-of-concept study focused on instance-level selection in the framework of delexicalized parser transfer. Our work is motivated by an empirical observation that different source parsers are the best choice for different Universal POS-sequences (i.e., UPOS sentences) in the target language. We then propose to predict the best parser at the instance level. To this end, we train a supervised regression model, based on the Transformer architecture, to predict parser accuracies for individual POS-sequences. We compare ILPS against two strong single-best parser selection baselines (SBPS): (1) a model that compares POS n-gram distributions between the source and target languages (KL) and (2) a model that selects the source based on the similarity between manually created language vectors encoding syntactic properties of languages (L2V). The results from our extensive evaluation, coupling 42 source parsers and 20 diverse low-resource test languages, show that ILPS outperforms KL and L2V on 13/20 and 14/20 test languages, respectively. Further, we show that by predicting the best parser “at treebank level” (SBPS), using the aggregation of predictions from our instance-level model, we outperform the same baselines on 17/20 and 16/20 test languages.</abstract>
@@ -4208,7 +4208,7 @@
     <paper id="348">
       <title>Integrating Domain Terminology into Neural Machine Translation</title>
       <author><first>Elise</first><last>Michon</last></author>
-      <author><first>Josep</first><last>Crego</last></author>
+      <author id="josep-m-crego"><first>Josep</first><last>Crego</last></author>
       <author><first>Jean</first><last>Senellart</last></author>
       <pages>3925–3937</pages>
       <abstract>This paper extends existing work on terminology integration into Neural Machine Translation, a common industrial practice to dynamically adapt translation to a specific domain. Our method, based on the use of placeholders complemented with morphosyntactic annotation, efficiently taps into the ability of the neural network to deal with symbolic knowledge to surpass the surface generalization shown by alternative techniques. We compare our approach to state-of-the-art systems and benchmark them through a well-defined evaluation framework, focusing on actual application of terminology and not just on the overall performance. Results indicate the suitability of our method in the use-case where terminology is used in a system trained on generic data only.</abstract>
@@ -4218,7 +4218,7 @@
     </paper>
     <paper id="349">
       <title>Understanding the effects of word-level linguistic annotations in under-resourced neural machine translation</title>
-      <author><first>Víctor M.</first><last>Sánchez-Cartagena</last></author>
+      <author id="victor-m-sanchez-cartagena"><first>Víctor M.</first><last>Sánchez-Cartagena</last></author>
       <author><first>Juan Antonio</first><last>Pérez-Ortiz</last></author>
       <author><first>Felipe</first><last>Sánchez-Martínez</last></author>
       <pages>3938–3950</pages>
@@ -4232,7 +4232,7 @@
       <author><first>Marco</first><last>Gaido</last></author>
       <author><first>Beatrice</first><last>Savoldi</last></author>
       <author><first>Luisa</first><last>Bentivogli</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <pages>3951–3964</pages>
       <abstract>In automatic speech translation (ST), traditional cascade approaches involving separate transcription and translation steps are giving ground to increasingly competitive and more robust direct solutions. In particular, by translating speech audio data without intermediate transcription, direct ST models are able to leverage and preserve essential information present in the input (e.g.speaker’s vocal characteristics) that is otherwise lost in the cascade framework. Although such ability proved to be useful for gender translation, direct ST is nonetheless affected by gender bias just like its cascade counterpart, as well as machine translation and numerous other natural language processing applications. Moreover, direct ST systems that exclusively rely on vocal biometric features as a gender cue can be unsuitable or even potentially problematic for certain users. Going beyond speech signals, in this paper we compare different approaches to inform direct ST models about the speaker’s gender and test their ability to handle gender translation from English into Italian and French. To this aim, we manually annotated large datasets with speak-ers’ gender information and used them for experiments reflecting different possible real-world scenarios. Our results show that gender-aware direct ST solutions can significantly outperform strong – but gender-unaware – direct ST models. In particular, the translation of gender-marked words can increase up to 30 points in accuracy while preserving overall translation quality.</abstract>
@@ -4280,8 +4280,8 @@
     </paper>
     <paper id="354">
       <title>Comparative Probing of Lexical Semantics Theories for Cognitive Plausibility and Technological Usefulness</title>
-      <author><first>António</first><last>Branco</last></author>
-      <author><first>João</first><last>António Rodrigues</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
+      <author id="joao-rodrigues"><first>João</first><last>António Rodrigues</last></author>
       <author><first>Malgorzata</first><last>Salawa</last></author>
       <author><first>Ruben</first><last>Branco</last></author>
       <author><first>Chakaveh</first><last>Saedi</last></author>
@@ -4419,7 +4419,7 @@
       <title>Towards Topic-Guided Conversational Recommender System</title>
       <author><first>Kun</first><last>Zhou</last></author>
       <author><first>Yuanhang</first><last>Zhou</last></author>
-      <author><first>Wayne Xin</first><last>Zhao</last></author>
+      <author id="wayne-xin-zhao"><first>Wayne Xin</first><last>Zhao</last></author>
       <author><first>Xiaoke</first><last>Wang</last></author>
       <author><first>Ji-Rong</first><last>Wen</last></author>
       <pages>4128–4139</pages>
@@ -4455,7 +4455,7 @@
       <title>Deconstruct to Reconstruct a Configurable Evaluation Metric for Open-Domain Dialogue Systems</title>
       <author><first>Vitou</first><last>Phy</last></author>
       <author><first>Yang</first><last>Zhao</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>4164–4178</pages>
       <abstract>Many automatic evaluation metrics have been proposed to score the overall quality of a response in open-domain dialogue. Generally, the overall quality is comprised of various aspects, such as relevancy, specificity, and empathy, and the importance of each aspect differs according to the task. For instance, specificity is mandatory in a food-ordering dialogue task, whereas fluency is preferred in a language-teaching dialogue system. However, existing metrics are not designed to cope with such flexibility. For example, BLEU score fundamentally relies only on word overlapping, whereas BERTScore relies on semantic similarity between reference and candidate response. Thus, they are not guaranteed to capture the required aspects, i.e., specificity. To design a metric that is flexible to a task, we first propose making these qualities manageable by grouping them into three groups: understandability, sensibleness, and likability, where likability is a combination of qualities that are essential for a task. We also propose a simple method to composite metrics of each aspect to obtain a single metric called USL-H, which stands for Understandability, Sensibleness, and Likability in Hierarchy. We demonstrated that USL-H score achieves good correlations with human judgment and maintains its configurability towards different aspects and metrics.</abstract>
       <url hash="92ee61f2">2020.coling-main.368</url>
@@ -4466,8 +4466,8 @@
       <title>Suggest me a movie for tonight: Leveraging Knowledge Graphs for Conversational Recommendation</title>
       <author><first>Rajdeep</first><last>Sarkar</last></author>
       <author><first>Koustava</first><last>Goswami</last></author>
-      <author><first>Mihael</first><last>Arcan</last></author>
-      <author><first>John P.</first><last>McCrae</last></author>
+      <author id="mihael-arcan"><first>Mihael</first><last>Arcan</last></author>
+      <author id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></author>
       <pages>4179–4189</pages>
       <abstract>Conversational recommender systems focus on the task of suggesting products to users based on the conversation flow. Recently, the use of external knowledge in the form of knowledge graphs has shown to improve the performance in recommendation and dialogue systems. Information from knowledge graphs aids in enriching those systems by providing additional information such as closely related products and textual descriptions of the items. However, knowledge graphs are incomplete since they do not contain all factual information present on the web. Furthermore, when working on a specific domain, knowledge graphs in its entirety contribute towards extraneous information and noise. In this work, we study several subgraph construction methods and compare their performance across the recommendation task. We incorporate pre-trained embeddings from the subgraphs along with positional embeddings in our models. Extensive experiments show that our method has a relative improvement of at least 5.62% compared to the state-of-the-art on multiple metrics on the recommendation task.</abstract>
       <url hash="1d52f878">2020.coling-main.369</url>
@@ -4477,7 +4477,7 @@
     <paper id="370">
       <title><fixed-case>H</fixed-case>i<fixed-case>T</fixed-case>rans: A Transformer-Based Context- and Speaker-Sensitive Model for Emotion Detection in Conversations</title>
       <author><first>Jingye</first><last>Li</last></author>
-      <author><first>Donghong</first><last>Ji</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
       <author><first>Fei</first><last>Li</last></author>
       <author><first>Meishan</first><last>Zhang</last></author>
       <author><first>Yijiang</first><last>Liu</last></author>
@@ -4514,7 +4514,7 @@
       <title>A Two-Level Interpretation of Modality in Human-Robot Dialogue</title>
       <author><first>Lucia</first><last>Donatelli</last></author>
       <author><first>Kenneth</first><last>Lai</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <pages>4222–4238</pages>
       <abstract>We analyze the use and interpretation of modal expressions in a corpus of situated human-robot dialogue and ask how to effectively represent these expressions for automatic learning. We present a two-level annotation scheme for modality that captures both content and intent, integrating a logic-based, semantic representation and a task-oriented, pragmatic representation that maps to our robot’s capabilities. Data from our annotation task reveals that the interpretation of modal expressions in human-robot dialogue is quite diverse, yet highly constrained by the physical environment and asymmetrical speaker/addressee relationship. We sketch a formal model of human-robot common ground in which modality can be grounded and dynamically interpreted.</abstract>
       <url hash="7187803a">2020.coling-main.373</url>
@@ -4528,8 +4528,8 @@
       <author><first>Kehai</first><last>Chen</last></author>
       <author><first>Xugang</first><last>Lu</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <pages>4239–4250</pages>
       <abstract>Unsupervised neural machine translation (UNMT) has recently attracted great interest in the machine translation community. The main advantage of the UNMT lies in its easy collection of required large training text sentences while with only a slightly worse performance than supervised neural machine translation which requires expensive annotated translation pairs on some translation tasks. In most studies, the UMNT is trained with clean data without considering its robustness to the noisy data. However, in real-world scenarios, there usually exists noise in the collected input sentences which degrades the performance of the translation system since the UNMT is sensitive to the small perturbations of the input sentences. In this paper, we first time explicitly take the noisy data into consideration to improve the robustness of the UNMT based systems. First of all, we clearly defined two types of noises in training sentences, i.e., word noise and word order noise, and empirically investigate its effect in the UNMT, then we propose adversarial training methods with denoising process in the UNMT. Experimental results on several language pairs show that our proposed methods substantially improved the robustness of the conventional UNMT systems in noisy scenarios.</abstract>
       <url hash="9ef2abfe">2020.coling-main.374</url>
@@ -4553,7 +4553,7 @@
       <author><first>Raj</first><last>Dabre</last></author>
       <author><first>Chenchen</first><last>Ding</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>4263–4274</pages>
       <abstract>In this study, linguistic knowledge at different levels are incorporated into the neural machine translation (NMT) framework to improve translation quality for language pairs with extremely limited data. Integrating manually designed or automatically extracted features into the NMT framework is known to be beneficial. However, this study emphasizes that the relevance of the features is crucial to the performance. Specifically, we propose two methods, 1) self relevance and 2) word-based relevance, to improve the representation of features for NMT. Experiments are conducted on translation tasks from English to eight Asian languages, with no more than twenty thousand sentences for training. The proposed methods improve translation quality for all tasks by up to 3.09 BLEU points. Discussions with visualization provide the explainability of the proposed methods where we show that the relevance methods provide weights to features thereby enhancing their impact on low-resource machine translation.</abstract>
       <url hash="ef68f3a1">2020.coling-main.376</url>
@@ -4579,7 +4579,7 @@
       <author><first>Masao</first><last>Utiyama</last></author>
       <author><first>Akihiro</first><last>Tamura</last></author>
       <author><first>Takashi</first><last>Ninomiya</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>4287–4297</pages>
       <abstract>This paper proposed a new subword segmentation method for neural machine translation, “Bilingual Subword Segmentation,” which tokenizes sentences to minimize the difference between the number of subword units in a sentence and that of its translation. While existing subword segmentation methods tokenize a sentence without considering its translation, the proposed method tokenizes a sentence by using subword units induced from bilingual sentences; this method could be more favorable to machine translation. Evaluations on WAT Asian Scientific Paper Excerpt Corpus (ASPEC) English-to-Japanese and Japanese-to-English translation tasks and WMT14 English-to-German and German-to-English translation tasks show that our bilingual subword segmentation improves the performance of Transformer neural machine translation (up to +0.81 BLEU).</abstract>
       <url hash="d97bd9a6">2020.coling-main.378</url>
@@ -4627,7 +4627,7 @@
       <author><first>Supratik</first><last>Bhattacharya</last></author>
       <author><first>Shravan</first><last>Nayak</last></author>
       <author><first>Timo</first><last>Baumann</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <pages>4327–4333</pages>
       <abstract>Dubbing has two shades; synchronisation constraints are applied only when the actor’s mouth is visible on screen, while the translation is unconstrained for off-screen dubbing. Consequently, different synchronisation requirements, and therefore translation strategies, are applied depending on the type of dubbing. In this work, we manually annotate an existing dubbing corpus (Heroes) for this dichotomy. We show that, even though we did not observe distinctive features between on- and off-screen dubbing at the textual level, on-screen dubbing is more difficult for MT (-4 BLEU points). Moreover, synchronisation constraints dramatically decrease translation quality for off-screen dubbing. We conclude that, distinguishing between on-screen and off-screen dubbing is necessary for determining successful strategies for dubbing-customised Machine Translation.</abstract>
@@ -4638,7 +4638,7 @@
     <paper id="383">
       <title>Filtering Back-Translated Data in Unsupervised Neural Machine Translation</title>
       <author><first>Jyotsana</first><last>Khatri</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>4334–4339</pages>
       <abstract>Unsupervised neural machine translation (NMT) utilizes only monolingual data for training. The quality of back-translated data plays an important role in the performance of NMT systems. In back-translation, all generated pseudo parallel sentence pairs are not of the same quality. Taking inspiration from domain adaptation where in-domain sentences are given more weight in training, in this paper we propose an approach to filter back-translated data as part of the training process of unsupervised NMT. Our approach gives more weight to good pseudo parallel sentence pairs in the back-translation phase. We calculate the weight of each pseudo parallel sentence pair using sentence-wise round-trip BLEU score which is normalized batch-wise. We compare our approach with the current state of the art approaches for unsupervised NMT.</abstract>
       <url hash="3ff08612">2020.coling-main.383</url>
@@ -4649,7 +4649,7 @@
       <title>Lost in Back-Translation: Emotion Preservation in Neural Machine Translation</title>
       <author><first>Enrica</first><last>Troiano</last></author>
       <author><first>Roman</first><last>Klinger</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <pages>4340–4354</pages>
       <abstract>Machine translation provides powerful methods to convert text between languages, and is therefore a technology enabling a multilingual world. An important part of communication, however, takes place at the non-propositional level (e.g., politeness, formality, emotions), and it is far from clear whether current MT methods properly translate this information. This paper investigates the specific hypothesis that the non-propositional level of emotions is at least partially lost in MT. We carry out a number of experiments in a back-translation setup and establish that (1) emotions are indeed partially lost during translation; (2) this tendency can be reversed almost completely with a simple re-ranking approach informed by an emotion classifier, taking advantage of diversity in the n-best list; (3) the re-ranking approach can also be applied to change emotions, obtaining a model for emotion style transfer. An in-depth qualitative analysis reveals that there are recurring linguistic changes through which emotions are toned down or amplified, such as change of modality.</abstract>
       <url hash="6cba3e1e">2020.coling-main.384</url>
@@ -4658,8 +4658,8 @@
     </paper>
     <paper id="385">
       <title>Intermediate Self-supervised Learning for Machine Translation Quality Estimation</title>
-      <author><first>Raphael</first><last>Rubino</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="raphael-rubino"><first>Raphael</first><last>Rubino</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>4355–4360</pages>
       <abstract>Pre-training sentence encoders is effective in many natural language processing tasks including machine translation (MT) quality estimation (QE), due partly to the scarcity of annotated QE data required for supervised learning. In this paper, we investigate the use of an intermediate self-supervised learning task for sentence encoder aiming at improving QE performances at the sentence and word levels. Our approach is motivated by a problem inherent to QE: mistakes in translation caused by wrongly inserted and deleted tokens. We modify the translation language model (TLM) training objective of the cross-lingual language model (XLM) to orientate the pre-trained model towards the target task. The proposed method does not rely on annotated data and is complementary to QE methods involving pre-trained sentence encoders and domain adaptation. Experiments on English-to-German and English-to-Russian translation directions show that intermediate learning improves over domain adaptated models. Additionally, our method reaches results in par with state-of-the-art QE models without requiring the combination of several approaches and outperforms similar methods based on pre-trained sentence encoders.</abstract>
       <url hash="62ac7415">2020.coling-main.385</url>
@@ -4670,7 +4670,7 @@
       <title>Unifying Input and Output Smoothing in Neural Machine Translation</title>
       <author><first>Yingbo</first><last>Gao</last></author>
       <author><first>Baohao</first><last>Liao</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>4361–4372</pages>
       <abstract>Soft contextualized data augmentation is a recent method that replaces one-hot representation of words with soft posterior distributions of an external language model, smoothing the input of neural machine translation systems. Label smoothing is another effective method that penalizes over-confident model outputs by discounting some probability mass from the true target word, smoothing the output of neural machine translation systems. Having the benefit of updating all word vectors in each optimization step and better regularizing the models, the two smoothing methods are shown to bring significant improvements in translation performance. In this work, we study how to best combine the methods and stack the improvements. Specifically, we vary the prior distributions to smooth with, the hyperparameters that control the smoothing strength, and the token selection procedures. We conduct extensive experiments on small datasets, evaluate the recipes on larger datasets, and examine the implications when back-translation is further used. Our results confirm cumulative improvements when input and output smoothing are used in combination, giving up to +1.9 BLEU scores on standard machine translation tasks and reveal reasons why these smoothing methods should be preferred.</abstract>
       <url hash="ab25c3e9">2020.coling-main.386</url>
@@ -4681,7 +4681,7 @@
       <title>Neural Transduction for Multilingual Lexical Translation</title>
       <author><first>Dylan</first><last>Lewis</last></author>
       <author><first>Winston</first><last>Wu</last></author>
-      <author><first>Arya D.</first><last>McCarthy</last></author>
+      <author id="arya-d-mccarthy"><first>Arya D.</first><last>McCarthy</last></author>
       <author><first>David</first><last>Yarowsky</last></author>
       <pages>4373–4384</pages>
       <abstract>We present a method for completing multilingual translation dictionaries. Our probabilistic approach can synthesize new word forms, allowing it to operate in settings where correct translations have not been observed in text (cf. cross-lingual embeddings). In addition, we propose an approximate Maximum Mutual Information (MMI) decoding objective to further improve performance in both many-to-one and one-to-one word level translation tasks where we use either multiple input languages for a single target language or more typical single language pair translation. The model is trained in a many-to-many setting, where it can leverage information from related languages to predict words in each of its many target languages. We focus on 6 languages: French, Spanish, Italian, Portuguese, Romanian, and Turkish. When indirect multilingual information is available, ensembling with mixture-of-experts as well as incorporating related languages leads to a 27% relative improvement in whole-word accuracy of predictions over a single-source baseline. To seed the completion when multilingual data is unavailable, it is better to decode with an MMI objective.</abstract>
@@ -4757,7 +4757,7 @@
       <author><first>Mauajama</first><last>Firdaus</last></author>
       <author><first>Hardik</first><last>Chauhan</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>4441–4453</pages>
       <abstract>Emotion and sentiment classification in dialogues is a challenging task that has gained popularity in recent times. Humans tend to have multiple emotions with varying intensities while expressing their thoughts and feelings. Emotions in an utterance of dialogue can either be independent or dependent on the previous utterances, thus making the task complex and interesting. Multi-label emotion detection in conversations is a significant task that provides the ability to the system to understand the various emotions of the users interacting. Sentiment analysis in dialogue/conversation, on the other hand, helps in understanding the perspective of the user with respect to the ongoing conversation. Along with text, additional information in the form of audio and video assist in identifying the correct emotions with the appropriate intensity and sentiments in an utterance of a dialogue. Lately, quite a few datasets have been made available for dialogue emotion and sentiment classification, but these datasets are imbalanced in representing different emotions and consist of an only single emotion. Hence, we present at first a large-scale balanced Multimodal Multi-label Emotion, Intensity, and Sentiment Dialogue dataset (MEISD), collected from different TV series that has textual, audio and visual features, and then establish a baseline setup for further research.</abstract>
       <url hash="373068df">2020.coling-main.393</url>
@@ -4782,7 +4782,7 @@
       <title>Leveraging Discourse Rewards for Document-Level Neural Machine Translation</title>
       <author><first>Inigo</first><last>Jauregi Unanue</last></author>
       <author><first>Nazanin</first><last>Esmaili</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <author><first>Massimo</first><last>Piccardi</last></author>
       <pages>4467–4482</pages>
       <abstract>Document-level machine translation focuses on the translation of entire documents from a source to a target language. It is widely regarded as a challenging task since the translation of the individual sentences in the document needs to retain aspects of the discourse at document level. However, document-level translation models are usually not trained to explicitly ensure discourse quality. Therefore, in this paper we propose a training approach that explicitly optimizes two established discourse metrics, lexical cohesion and coherence, by using a reinforcement learning objective. Experiments over four different language pairs and three translation domains have shown that our training approach has been able to achieve more cohesive and coherent document translations than other competitive approaches, yet without compromising the faithfulness to the reference translation. In the case of the Zh-En language pair, our method has achieved an improvement of 2.46 percentage points (pp) in LC and 1.17 pp in COH over the runner-up, while at the same time improving 0.63 pp in BLEU score and 0.47 pp in F-BERT.</abstract>
@@ -4842,7 +4842,7 @@
     </paper>
     <paper id="400">
       <title>Living Machines: A study of atypical animacy</title>
-      <author><first>Mariona</first><last>Coll Ardanuy</last></author>
+      <author id="mariona-coll-ardanuy"><first>Mariona</first><last>Coll Ardanuy</last></author>
       <author><first>Federico</first><last>Nanni</last></author>
       <author><first>Kaspar</first><last>Beelen</last></author>
       <author><first>Kasra</first><last>Hosseini</last></author>
@@ -4863,7 +4863,7 @@
       <author><first>Thomas</first><last>Kober</last></author>
       <author><first>Malihe</first><last>Alikhani</last></author>
       <author><first>Matthew</first><last>Stone</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>4546–4562</pages>
       <abstract>The interpretation of the lexical aspect of verbs in English plays a crucial role in tasks such as recognizing textual entailment and learning discourse-level inferences. We show that two elementary dimensions of aspectual class, states vs. events, and telic vs. atelic events, can be modelled effectively with distributional semantics. We find that a verb’s local context is most indicative of its aspectual class, and we demonstrate that closed class words tend to be stronger discriminating contexts than content words. Our approach outperforms previous work on three datasets. Further, we present a new dataset of human-human conversations annotated with lexical aspects and present experiments that show the correlation of telicity with genre and discourse goals.</abstract>
       <url hash="33cc1b13">2020.coling-main.401</url>
@@ -4875,7 +4875,7 @@
       <author><first>Anne</first><last>Lauscher</last></author>
       <author><first>Lily</first><last>Ng</last></author>
       <author><first>Courtney</first><last>Napoles</last></author>
-      <author><first>Joel</first><last>Tetreault</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
       <pages>4563–4574</pages>
       <abstract>Though preceding work in computational argument quality (AQ) mostly focuses on assessing overall AQ, researchers agree that writers would benefit from feedback targeting individual dimensions of argumentation theory. However, a large-scale theory-based corpus and corresponding computational models are missing. We fill this gap by conducting an extensive analysis covering three diverse domains of online argumentative writing and presenting GAQCorpus: the first large-scale English multi-domain (community Q&amp;A forums, debate forums, review forums) corpus annotated with theory-based AQ scores. We then propose the first computational approaches to theory-based assessment, which can serve as strong baselines for future work. We demonstrate the feasibility of large-scale AQ annotation, show that exploiting relations between dimensions yields performance improvements, and explore the synergies between theory-based prediction and practical AQ assessment.</abstract>
       <url hash="89c3c023">2020.coling-main.402</url>
@@ -4885,7 +4885,7 @@
     <paper id="403">
       <title>A Linguistic Perspective on Reference: Choosing a Feature Set for Generating Referring Expressions in Context</title>
       <author><first>Fahime</first><last>Same</last></author>
-      <author><first>Kees</first><last>van Deemter</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
       <pages>4575–4586</pages>
       <abstract>This paper reports on a structured evaluation of feature-based Machine Learning algorithms for selecting the form of a referring expression in discourse context. Based on this evaluation, we selected seven feature sets from the literature, amounting to 65 distinct linguistic features. The features were then grouped into 9 broad classes. After building Random Forest models, we used Feature Importance Ranking and Sequential Forward Search methods to assess the “importance” of the features. Combining the results of the two methods, we propose a consensus feature set. The 6 features in our consensus set come from 4 different classes, namely grammatical role, inherent features of the referent, antecedent form and recency.</abstract>
       <url hash="e83273a0">2020.coling-main.403</url>
@@ -4972,7 +4972,7 @@
       <title>Semi-supervised <fixed-case>URL</fixed-case> Segmentation with Recurrent Neural Networks Pre-trained on Knowledge Graph Entities</title>
       <author><first>Hao</first><last>Zhang</last></author>
       <author><first>Jae</first><last>Ro</last></author>
-      <author><first>Richard</first><last>Sproat</last></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last></author>
       <pages>4667–4675</pages>
       <abstract>Breaking domain names such as openresearch into component words open and research is important for applications like Text-to-Speech synthesis and web search. We link this problem to the classic problem of Chinese word segmentation and show the effectiveness of a tagging model based on Recurrent Neural Networks (RNNs) using characters as input. To compensate for the lack of training data, we propose a pre-training method on concatenated entity names in a large knowledge database. Pre-training improves the model by 33% and brings the sequence accuracy to 85%.</abstract>
       <url hash="12db9bed">2020.coling-main.411</url>
@@ -5001,11 +5001,11 @@
     </paper>
     <paper id="414">
       <title>Detecting Urgency Status of Crisis Tweets: A Transfer Learning Approach for Low Resource Languages</title>
-      <author><first>Efsun</first><last>Sarioglu Kayi</last></author>
+      <author id="efsun-sarioglu-kayi"><first>Efsun</first><last>Sarioglu Kayi</last></author>
       <author><first>Linyong</first><last>Nan</last></author>
       <author><first>Bohan</first><last>Qu</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>4693–4703</pages>
       <abstract>We release an urgency dataset that consists of English tweets relating to natural crises, along with annotations of their corresponding urgency status. Additionally, we release evaluation datasets for two low-resource languages, i.e. Sinhala and Odia, and demonstrate an effective zero-shot transfer from English to these two languages by training cross-lingual classifiers. We adopt cross-lingual embeddings constructed using different methods to extract features of the tweets, including a few state-of-the-art contextual embeddings such as BERT, RoBERTa and XLM-R. We train classifiers of different architectures on the extracted features. We also explore semi-supervised approaches by utilizing unlabeled tweets and experiment with ensembling different classifiers. With very limited amounts of labeled data in English and zero data in the low resource languages, we show a successful framework of training monolingual and cross-lingual classifiers using deep learning methods which are known to be data hungry. Specifically, we show that the recent deep contextual embeddings are also helpful when dealing with very small-scale datasets. Classifiers that incorporate RoBERTa yield the best performance for English urgency detection task, with F1 scores that are more than 25 points over our baseline classifier. For the zero-shot transfer to low resource languages, classifiers that use LASER features perform the best for Sinhala transfer while XLM-R features benefit the Odia transfer the most.</abstract>
       <url hash="6aec89a0">2020.coling-main.414</url>
@@ -5042,7 +5042,7 @@
       <author><first>Dario</first><last>Stojanovski</last></author>
       <author><first>Benno</first><last>Krojer</last></author>
       <author><first>Denis</first><last>Peskov</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>4732–4749</pages>
       <abstract>Recent high scores on pronoun translation using context-aware neural machine translation have suggested that current approaches work well. ContraPro is a notable example of a contrastive challenge set for English→German pronoun translation. The high scores achieved by transformer models may suggest that they are able to effectively model the complicated set of inferences required to carry out pronoun translation. This entails the ability to determine which entities could be referred to, identify which entity a source-language pronoun refers to (if any), and access the target-language grammatical gender for that entity. We first show through a series of targeted adversarial attacks that in fact current approaches are not able to model all of this information well. Inserting small amounts of distracting information is enough to strongly reduce scores, which should not be the case. We then create a new template test set ContraCAT, designed to individually assess the ability to handle the specific steps necessary for successful pronoun translation. Our analyses show that current approaches to context-aware NMT rely on a set of surface heuristics, which break down when translations require real reasoning. We also propose an approach for augmenting the training data, with some improvements.</abstract>
       <url hash="c310e6b8">2020.coling-main.417</url>
@@ -5137,7 +5137,7 @@
       <title>Manual Clustering and Spatial Arrangement of Verbs for Multilingual Evaluation and Typology Analysis</title>
       <author><first>Olga</first><last>Majewska</last></author>
       <author><first>Ivan</first><last>Vulić</last></author>
-      <author><first>Diana</first><last>McCarthy</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
       <author><first>Anna</first><last>Korhonen</last></author>
       <pages>4810–4824</pages>
       <abstract>We present the first evaluation of the applicability of a spatial arrangement method (SpAM) to a typologically diverse language sample, and its potential to produce semantic evaluation resources to support multilingual NLP, with a focus on verb semantics. We demonstrate SpAM’s utility in allowing for quick bottom-up creation of large-scale evaluation datasets that balance cross-lingual alignment with language specificity. Starting from a shared sample of 825 English verbs, translated into Chinese, Japanese, Finnish, Polish, and Italian, we apply a two-phase annotation process which produces (i) semantic verb classes and (ii) fine-grained similarity scores for nearly 130 thousand verb pairs. We use the two types of verb data to (a) examine cross-lingual similarities and variation, and (b) evaluate the capacity of static and contextualised representation models to accurately reflect verb semantics, contrasting the performance of large language specific pretraining models with their multilingual equivalent on semantic clustering and lexical similarity, across different domains of verb meaning. We release the data from both phases as a large-scale multilingual resource, comprising 85 verb classes and nearly 130k pairwise similarity scores, offering a wealth of possibilities for further evaluation and research on multilingual verb semantics.</abstract>
@@ -5150,7 +5150,7 @@
       <author><first>Jingun</first><last>Kwon</last></author>
       <author><first>Hidetaka</first><last>Kamigaito</last></author>
       <author><first>Young-In</first><last>Song</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>4825–4834</pages>
       <abstract>Recently, automatic trivia fact extraction has attracted much research interest. Modern search engines have begun to provide trivia facts as the information for entities because they can motivate more user engagement. In this paper, we propose a new unsupervised algorithm that automatically mines trivia facts for a given entity. Unlike previous studies, the proposed algorithm targets at a single Wikipedia article and leverages its hierarchical structure via top-down processing. Thus, the proposed algorithm offers two distinctive advantages: it does not incur high computation time, and it provides a domain-independent approach for extracting trivia facts. Experimental results demonstrate that the proposed algorithm is over 100 times faster than the existing method which considers Wikipedia categories. Human evaluation demonstrates that the proposed algorithm can mine better trivia facts regardless of the target entity domain and outperforms the existing methods.</abstract>
       <url hash="bc3b01da">2020.coling-main.424</url>
@@ -5174,7 +5174,7 @@
       <author><first>Ramit</first><last>Sawhney</last></author>
       <author><first>Arnav</first><last>Wadhwa</last></author>
       <author><first>Shivam</first><last>Agarwal</last></author>
-      <author><first>Rajiv Ratn</first><last>Shah</last></author>
+      <author id="rajiv-shah"><first>Rajiv Ratn</first><last>Shah</last></author>
       <pages>4847–4859</pages>
       <abstract>Parliamentary debates present a valuable language resource for analyzing comprehensive options in electing representatives under a functional, free society. However, the esoteric nature of political speech coupled with non-linguistic aspects such as political cohesion between party members presents a complex and underexplored task of contextual parliamentary debate analysis. We introduce GPolS, a neural model for political speech sentiment analysis jointly exploiting both semantic language representations and relations between debate transcripts, motions, and political party members. Through experiments on real-world English data and by visualizing attention, we provide a use case of GPolS as a tool for political speech analysis and polarity prediction.</abstract>
       <url hash="c4b32a92">2020.coling-main.426</url>
@@ -5243,7 +5243,7 @@
     <paper id="432">
       <title>Balanced Joint Adversarial Training for Robust Intent Detection and Slot Filling</title>
       <author><first>Xu</first><last>Cao</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Chongyang</first><last>Shi</last></author>
       <author><first>Chao</first><last>Wang</last></author>
       <author><first>Yao</first><last>Meng</last></author>
@@ -5270,7 +5270,7 @@
       <title>Understanding Unnatural Questions Improves Reasoning over Text</title>
       <author><first>Xiaoyu</first><last>Guo</last></author>
       <author><first>Yuan-Fang</first><last>Li</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>4949–4955</pages>
       <abstract>Complex question answering (CQA) over raw text is a challenging task. A prominent approach to this task is based on the programmer-interpreter framework, where the programmer maps the question into a sequence of reasoning actions and the interpreter then executes these actions on the raw text. Learning an effective CQA model requires large amounts of human-annotated data, consisting of the ground-truth sequence of reasoning actions, which is time-consuming and expensive to collect at scale. In this paper, we address the challenge of learning a high-quality programmer (parser) by projecting natural human-generated questions into unnatural machine-generated questions which are more convenient to parse. We firstly generate synthetic (question, action sequence) pairs by a data generator, and train a semantic parser that associates synthetic questions with their corresponding action sequences. To capture the diversity when applied to natural questions, we learn a projection model to map natural questions into their most similar unnatural questions for which the parser can work well. Without any natural training data, our projection model provides high-quality action sequences for the CQA task. Experimental results show that the QA model trained exclusively with synthetic data outperforms its state-of-the-art counterpart trained on human-labeled data.</abstract>
       <url hash="d70b1c45">2020.coling-main.434</url>
@@ -5294,7 +5294,7 @@
     <paper id="436">
       <title>A Large-Scale Corpus of <fixed-case>E</fixed-case>-mail Conversations with Standard and Two-Level Dialogue Act Annotations</title>
       <author><first>Motoki</first><last>Taniguchi</last></author>
-      <author><first>Yoshihiro</first><last>Ueda</last></author>
+      <author id="yoshihiro-ueda"><first>Yoshihiro</first><last>Ueda</last></author>
       <author><first>Tomoki</first><last>Taniguchi</last></author>
       <author><first>Tomoko</first><last>Ohkuma</last></author>
       <pages>4969–4980</pages>
@@ -5382,7 +5382,7 @@
       <author><first>Emmanuelle</first><last>Esperança-Rodier</last></author>
       <author><first>Francis</first><last>Brunet-Manquat</last></author>
       <author><first>Jakob</first><last>Verbeek</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <pages>5047–5058</pages>
       <abstract>We conduct in this work an evaluation study comparing offline and online neural machine translation architectures. Two sequence-to-sequence models: convolutional Pervasive Attention (Elbayad et al. 2018) and attention-based Transformer (Vaswani et al. 2017) are considered. We investigate, for both architectures, the impact of online decoding constraints on the translation quality through a carefully designed human evaluation on English-German and German-English language pairs, the latter being particularly sensitive to latency constraints. The evaluation results allow us to identify the strengths and shortcomings of each model when we shift to the online setup.</abstract>
       <url hash="3de0dc31">2020.coling-main.443</url>
@@ -5391,7 +5391,7 @@
     </paper>
     <paper id="444">
       <title>Informative Manual Evaluation of Machine Translation Output</title>
-      <author><first>Maja</first><last>Popović</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
       <pages>5059–5069</pages>
       <abstract>This work proposes a new method for manual evaluation of Machine Translation (MT) output based on marking actual issues in the translated text. The novelty is that the evaluators are not assigning any scores, nor classifying errors, but marking all problematic parts (words, phrases, sentences) of the translation. The main advantage of this method is that the resulting annotations do not only provide overall scores by counting words with assigned tags, but can be further used for analysis of errors and challenging linguistic phenomena, as well as inter-annotator disagreements. Detailed analysis and understanding of actual problems are not enabled by typical manual evaluations where the annotators are asked to assign overall scores or to rank two or more translations. The proposed method is very general: it can be applied on any genre/domain and language pair, and it can be guided by various types of quality criteria. Also, it is not restricted to MT output, but can be used for other types of generated text.</abstract>
       <url hash="3d68c0ac">2020.coling-main.444</url>
@@ -5401,8 +5401,8 @@
     <paper id="445">
       <title><fixed-case>T</fixed-case>rans<fixed-case>Q</fixed-case>uest: Translation Quality Estimation with Cross-lingual Transformers</title>
       <author><first>Tharindu</first><last>Ranasinghe</last></author>
-      <author><first>Constantin</first><last>Orasan</last></author>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orasan</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <pages>5070–5081</pages>
       <abstract>Recent years have seen big advances in the field of sentence-level quality estimation (QE), largely as a result of using neural-based architectures. However, the majority of these methods work only on the language pair they are trained on and need retraining for new language pairs. This process can prove difficult from a technical point of view and is usually computationally expensive. In this paper we propose a simple QE framework based on cross-lingual transformers, and we use it to implement and evaluate two different neural architectures. Our evaluation shows that the proposed methods achieve state-of-the-art results outperforming current open-source quality estimation frameworks when trained on datasets from WMT. In addition, the framework proves very useful in transfer learning settings, especially when dealing with low-resourced languages, allowing us to obtain very competitive results.</abstract>
       <url hash="b5d16bd9">2020.coling-main.445</url>
@@ -5413,7 +5413,7 @@
       <title>Monolingual and Multilingual Reduction of Gender Bias in Contextualized Representations</title>
       <author><first>Sheng</first><last>Liang</last></author>
       <author><first>Philipp</first><last>Dufter</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>5082–5093</pages>
       <abstract>Pretrained language models (PLMs) learn stereotypes held by humans and reflected in text from their training corpora, including gender bias. When PLMs are used for downstream tasks such as picking candidates for a job, people’s lives can be negatively affected by these learned stereotypes. Prior work usually identifies a linear gender subspace and removes gender information by eliminating the subspace. Following this line of work, we propose to use DensRay, an analytical method for obtaining interpretable dense subspaces. We show that DensRay performs on-par with prior approaches, but provide arguments that it is more robust and provide indications that it preserves language model performance better. By applying DensRay to attention heads and layers of BERT we show that gender information is spread across all attention heads and most of the layers. Also we show that DensRay can obtain gender bias scores on both token and sentence levels. Finally, we demonstrate that we can remove bias multilingually, e.g., from Chinese, using only English training data.</abstract>
       <url hash="e7874a34">2020.coling-main.446</url>
@@ -5493,7 +5493,7 @@
       <author><first>Mohammed</first><last>Aldawsari</last></author>
       <author><first>Adrian</first><last>Perez</last></author>
       <author><first>Deya</first><last>Banisakher</last></author>
-      <author><first>Mark</first><last>Finlayson</last></author>
+      <author id="mark-finlayson"><first>Mark</first><last>Finlayson</last></author>
       <pages>5171–5180</pages>
       <abstract>Determining whether an event in a news article is a foreground or background event would be useful in many natural language processing tasks, for example, temporal relation extraction, summarization, or storyline generation. We introduce the task of distinguishing between foreground and background events in news articles as well as identifying the general temporal position of background events relative to the foreground period (past, present, future, and their combinations). We achieve good performance (0.73 F1 for background vs. foreground and temporal position, and 0.79 F1 for background vs. foreground only) on a dataset of news articles by leveraging discourse information in a featurized model. We release our implementation and annotated data for other researchers</abstract>
       <url hash="ed029fa7">2020.coling-main.453</url>
@@ -5526,9 +5526,9 @@
     </paper>
     <paper id="456">
       <title>Explain by Evidence: An Explainable Memory-based Neural Network for Question Answering</title>
-      <author><first>Quan Hung</first><last>Tran</last></author>
+      <author id="quan-hung-tran"><first>Quan Hung</first><last>Tran</last></author>
       <author><first>Nhan</first><last>Dam</last></author>
-      <author><first>Tuan</first><last>Lai</last></author>
+      <author id="tuan-lai"><first>Tuan</first><last>Lai</last></author>
       <author><first>Franck</first><last>Dernoncourt</last></author>
       <author><first>Trung</first><last>Le</last></author>
       <author><first>Nham</first><last>Le</last></author>
@@ -5563,7 +5563,7 @@
       <title>Hy-<fixed-case>NLI</fixed-case>: a Hybrid system for Natural Language Inference</title>
       <author><first>Aikaterini-Lida</first><last>Kalouli</last></author>
       <author><first>Richard</first><last>Crouch</last></author>
-      <author><first>Valeria</first><last>de Paiva</last></author>
+      <author id="valeria-de-paiva"><first>Valeria</first><last>de Paiva</last></author>
       <pages>5235–5249</pages>
       <abstract>Despite the advances in Natural Language Inference through the training of massive deep models, recent work has revealed the generalization difficulties of such models, which fail to perform on adversarial datasets with challenging linguistic phenomena. Such phenomena, however, can be handled well by symbolic systems. Thus, we propose Hy-NLI, a hybrid system that learns to identify an NLI pair as linguistically challenging or not. Based on that, it uses its symbolic or deep learning component, respectively, to make the final inference decision. We show how linguistically less complex cases are best solved by robust state-of-the-art models, like BERT and XLNet, while hard linguistic phenomena are best handled by our implemented symbolic engine. Our thorough evaluation shows that our hybrid system achieves state-of-the-art performance across mainstream and adversarial datasets and opens the way for further research into the hybrid direction.</abstract>
       <url hash="7004313a">2020.coling-main.459</url>
@@ -5584,7 +5584,7 @@
     </paper>
     <paper id="461">
       <title>Global Context-enhanced Graph Convolutional Networks for Document-level Relation Extraction</title>
-      <author><first>Huiwei</first><last>Zhou</last></author>
+      <author id="huiwei-zhou"><first>Huiwei</first><last>Zhou</last></author>
       <author><first>Yibin</first><last>Xu</last></author>
       <author><first>Weihong</first><last>Yao</last></author>
       <author><first>Zhe</first><last>Liu</last></author>
@@ -5614,7 +5614,7 @@
       <author><first>Bing</first><last>Liu</last></author>
       <author><first>Pararth</first><last>Shah</last></author>
       <author><first>Bing</first><last>Liu</last></author>
-      <author><first>Philip</first><last>Yu</last></author>
+      <author id="philip-s-yu"><first>Philip</first><last>Yu</last></author>
       <pages>5288–5308</pages>
       <abstract>We study an end-to-end approach for conversational recommendation that dynamically manages and reasons over users’ past (offline) preferences and current (online) requests through a structured and cumulative user memory knowledge graph. This formulation extends existing state tracking beyond the boundary of a single dialog to user state tracking (UST). For this study, we create a new Memory Graph (MG) &lt;-&gt; Conversational Recommendation parallel corpus called MGConvRex with 7K+ human-to-human role-playing dialogs, grounded on a large-scale user memory bootstrapped from real-world user scenarios. MGConvRex captures human-level reasoning over user memory and has disjoint training/testing sets of users for zero-shot (cold-start) reasoning for recommendation. We propose a simple yet expandable formulation for constructing and updating the MG, and an end-to-end graph-based reasoning model that updates MG from unstructured utterances and predicts optimal dialog policies (eg recommendation) based on updated MG. The prediction of our proposed model inherits the graph structure, providing a natural way to explain policies. Experiments are conducted for both offline metrics and online simulation, showing competitive results.</abstract>
       <url hash="5d0bdbff">2020.coling-main.463</url>
@@ -5625,7 +5625,7 @@
       <title>Diverse and Non-redundant Answer Set Extraction on Community <fixed-case>QA</fixed-case> based on <fixed-case>DPP</fixed-case>s</title>
       <author><first>Shogo</first><last>Fujita</last></author>
       <author><first>Tomohide</first><last>Shibata</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>5309–5320</pages>
       <abstract>In community-based question answering (CQA) platforms, it takes time for a user to get useful information from among many answers. Although one solution is an answer ranking method, the user still needs to read through the top-ranked answers carefully. This paper proposes a new task of selecting a diverse and non-redundant answer set rather than ranking the answers. Our method is based on determinantal point processes (DPPs), and it calculates the answer importance and similarity between answers by using BERT. We built a dataset focusing on a Japanese CQA site, and the experiments on this dataset demonstrated that the proposed method outperformed several baseline methods.</abstract>
       <url hash="d40601f3">2020.coling-main.464</url>
@@ -5663,7 +5663,7 @@
       <author><first>Lizhen</first><last>Qu</last></author>
       <author><first>Yue</first><last>Zhuo</last></author>
       <author><first>Mahsa</first><last>Baktashmotlagh</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>5347–5359</pages>
       <abstract>Commonsense reasoning refers to the ability of evaluating a social situation and acting accordingly. Identification of the implicit causes and effects of a social context is the driving capability which can enable machines to perform commonsense reasoning. The dynamic world of social interactions requires context-dependent on-demand systems to infer such underlying information. However, current approaches in this realm lack the ability to perform commonsense reasoning upon facing an unseen situation, mostly due to incapability of identifying a diverse range of implicit social relations. Hence they fail to estimate the correct reasoning path. In this paper, we present Conditional Seq2Seq-based Mixture model (CosMo), which provides us with the capabilities of dynamic and diverse content generation. We use CosMo to generate context-dependent clauses, which form a dynamic Knowledge Graph (KG) on-the-fly for commonsense reasoning. To show the adaptability of our model to context-dependant knowledge generation, we address the task of zero-shot commonsense question answering. The empirical results indicate an improvement of up to +5.2% over the state-of-the-art models.</abstract>
       <url hash="a0a29d7f">2020.coling-main.467</url>
@@ -5698,7 +5698,7 @@
       <author><first>Shahbaz</first><last>Syed</last></author>
       <author><first>Roxanne</first><last>El Baff</last></author>
       <author><first>Johannes</first><last>Kiesel</last></author>
-      <author><first>Khalid</first><last>Al Khatib</last></author>
+      <author id="khalid-al-khatib"><first>Khalid</first><last>Al Khatib</last></author>
       <author><first>Benno</first><last>Stein</last></author>
       <author><first>Martin</first><last>Potthast</last></author>
       <pages>5384–5396</pages>
@@ -5713,7 +5713,7 @@
       <author><first>Satoru</first><last>Ozaki</last></author>
       <author><first>Antonios</first><last>Anastasopoulos</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
       <pages>5397–5408</pages>
       <abstract>Interlinear Glossed Text (IGT) is a widely used format for encoding linguistic information in language documentation projects and scholarly papers. Manual production of IGT takes time and requires linguistic expertise. We attempt to address this issue by creating automatic glossing models, using modern multi-source neural models that additionally leverage easy-to-collect translations. We further explore cross-lingual transfer and a simple output length control mechanism, further refining our models. Evaluated on three challenging low-resource scenarios, our approach significantly outperforms a recent, state-of-the-art baseline, particularly improving on overall accuracy as well as lemma and tag recall.</abstract>
       <url hash="1784e4a4">2020.coling-main.471</url>
@@ -5777,7 +5777,7 @@
     <paper id="477">
       <title>Words are the Window to the Soul: Language-based User Representations for Fake News Detection</title>
       <author><first>Marco</first><last>Del Tredici</last></author>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <pages>5467–5479</pages>
       <abstract>Cognitive and social traits of individuals are reflected in language use. Moreover, individuals who are prone to spread fake news online often share common traits. Building on these ideas, we introduce a model that creates representations of individuals on social media based only on the language they produce, and use them to detect fake news. We show that language-based user representations are beneficial for this task. We also present an extended analysis of the language of fake news spreaders, showing that its main features are mostly domain independent and consistent across two English datasets. Finally, we exploit the relation between language use and connections in the social graph to assess the presence of the Echo Chamber effect in our data.</abstract>
       <url hash="d1089209">2020.coling-main.477</url>
@@ -5825,7 +5825,7 @@
     <paper id="481">
       <title>Go Simple and Pre-Train on Domain-Specific Corpora: On the Role of Training Data for Text Classification</title>
       <author><first>Aleksandra</first><last>Edwards</last></author>
-      <author><first>Jose</first><last>Camacho-Collados</last></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></author>
       <author><first>Hélène</first><last>De Ribaupierre</last></author>
       <author><first>Alun</first><last>Preece</last></author>
       <pages>5522–5529</pages>
@@ -5852,7 +5852,7 @@
       <author><first>Hikari</first><last>Tanabe</last></author>
       <author><first>Tetsuji</first><last>Ogawa</last></author>
       <author><first>Tetsunori</first><last>Kobayashi</last></author>
-      <author><first>Yoshihiko</first><last>Hayashi</last></author>
+      <author id="yoshihiko-hayashi"><first>Yoshihiko</first><last>Hayashi</last></author>
       <pages>5535–5540</pages>
       <abstract>Recognition of the mental state of a human character in text is a major challenge in natural language processing. In this study, we investigate the efficacy of the narrative context in recognizing the emotional states of human characters in text and discuss an approach to make use of a priori knowledge regarding the employed emotion category system. Specifically, we experimentally show that the accuracy of emotion classification is substantially increased by encoding the preceding context of the target sentence using a BERT-based text encoder. We also compare ways to incorporate a priori knowledge of emotion categories by altering the loss function used in training, in which our proposal of multi-task learning that jointly learns to classify positive/negative polarity of emotions is included. The experimental results suggest that, when using Plutchik’s Wheel of Emotions, it is better to jointly classify the basic emotion categories with positive/negative polarity rather than directly exploiting its characteristic structure in which eight basic categories are arranged in a wheel.</abstract>
       <url hash="b912df67">2020.coling-main.483</url>
@@ -5907,7 +5907,7 @@
       <title>Automatically Identifying Words That Can Serve as Labels for Few-Shot Text Classification</title>
       <author><first>Timo</first><last>Schick</last></author>
       <author><first>Helmut</first><last>Schmid</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>5569–5578</pages>
       <abstract>A recent approach for few-shot text classification is to convert textual inputs to cloze questions that contain some form of task description, process them with a pretrained language model and map the predicted words to labels. Manually defining this mapping between words and labels requires both domain expertise and an understanding of the language model’s abilities. To mitigate this issue, we devise an approach that automatically finds such a mapping given small amounts of training data. For a number of tasks, the mapping found by our approach performs almost as well as hand-crafted label-to-word mappings.</abstract>
       <url hash="d5041c46">2020.coling-main.488</url>
@@ -5917,7 +5917,7 @@
     <paper id="489">
       <title>Knowledge Base Embedding By Cooperative Knowledge Distillation</title>
       <author><first>Raphaël</first><last>Sourty</last></author>
-      <author><first>Jose G.</first><last>Moreno</last></author>
+      <author id="jose-g-moreno"><first>Jose G.</first><last>Moreno</last></author>
       <author><first>François-Paul</first><last>Servant</last></author>
       <author><first>Lynda</first><last>Tamine-Lechani</last></author>
       <pages>5579–5590</pages>
@@ -5991,7 +5991,7 @@
       <title><fixed-case>WSL</fixed-case>-<fixed-case>DS</fixed-case>: Weakly Supervised Learning with Distant Supervision for Query Focused Multi-Document Abstractive Summarization</title>
       <author><first>Md Tahmid Rahman</first><last>Laskar</last></author>
       <author><first>Enamul</first><last>Hoque</last></author>
-      <author><first>Jimmy Xiangji</first><last>Huang</last></author>
+      <author id="xiangji-huang"><first>Jimmy Xiangji</first><last>Huang</last></author>
       <pages>5647–5654</pages>
       <abstract>In the Query Focused Multi-Document Summarization (QF-MDS) task, a set of documents and a query are given where the goal is to generate a summary from these documents based on the given query. However, one major challenge for this task is the lack of availability of labeled training datasets. To overcome this issue, in this paper, we propose a novel weakly supervised learning approach via utilizing distant supervision. In particular, we use datasets similar to the target dataset as the training data where we leverage pre-trained sentence similarity models to generate the weak reference summary of each individual document in a document set from the multi-document gold reference summaries. Then, we iteratively train our summarization model on each single-document to alleviate the computational complexity issue that occurs while training neural summarization models in multiple documents (i.e., long sequences) at once. Experimental results on the Document Understanding Conferences (DUC) datasets show that our proposed approach sets a new state-of-the-art result in terms of various evaluation metrics.</abstract>
       <url hash="c0c5f86c">2020.coling-main.495</url>
@@ -6051,7 +6051,7 @@
       <title>An Anchor-Based Automatic Evaluation Metric for Document Summarization</title>
       <author><first>Kexiang</first><last>Wang</last></author>
       <author><first>Tianyu</first><last>Liu</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <author><first>Zhifang</first><last>Sui</last></author>
       <pages>5696–5701</pages>
       <abstract>The widespread adoption of reference-based automatic evaluation metrics such as ROUGE has promoted the development of document summarization. In this paper, we consider a new protocol for designing reference-based metrics that require the endorsement of source document(s). Following protocol, we propose an anchored ROUGE metric fixing each summary particle on source document, which bases the computation on more solid ground. Empirical results on benchmark datasets validate that source document helps to induce a higher correlation with human judgments for ROUGE metric. Being self-explanatory and easy-to-implement, the protocol can naturally foster various effective designs of reference-based metrics besides the anchored ROUGE introduced here.</abstract>
@@ -6124,7 +6124,7 @@
       <author><first>Xiaomin</first><last>Chu</last></author>
       <author><first>Peifeng</first><last>Li</last></author>
       <author><first>Fang</first><last>Kong</last></author>
-      <author><first>Qiaoming</first><last>Zhu</last></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last></author>
       <pages>5749–5759</pages>
       <abstract>Discourse structure tree construction is the fundamental task of discourse parsing and most previous work focused on English. Due to the cultural and linguistic differences, existing successful methods on English discourse parsing cannot be transformed into Chinese directly, especially in paragraph level suffering from longer discourse units and fewer explicit connectives. To alleviate the above issues, we propose two reading modes, i.e., the global backward reading and the local reverse reading, to construct Chinese paragraph level discourse trees. The former processes discourse units from the end to the beginning in a document to utilize the left-branching bias of discourse structure in Chinese, while the latter reverses the position of paragraphs in a discourse unit to enhance the differentiation of coherence between adjacent discourse units. The experimental results on Chinese MCDTB demonstrate that our model outperforms all strong baselines.</abstract>
       <url hash="49f9e74c">2020.coling-main.506</url>
@@ -6144,7 +6144,7 @@
     </paper>
     <paper id="508">
       <title>Variation in Coreference Strategies across Genres and Production Media</title>
-      <author><first>Berfin</first><last>Aktaş</last></author>
+      <author id="berfin-aktas"><first>Berfin</first><last>Aktaş</last></author>
       <author><first>Manfred</first><last>Stede</last></author>
       <pages>5774–5785</pages>
       <abstract>In response to (i) inconclusive results in the literature as to the properties of coreference chains in written versus spoken language, and (ii) a general lack of work on automatic coreference resolution on both spoken language and social media, we undertake a corpus study involving the various genre sections of Ontonotes, the Switchboard corpus, and a corpus of Twitter conversations. Using a set of measures that previously have been applied individually to different data sets, we find fairly clear patterns of “behavior” for the different genres/media. Besides their role for psycholinguistic investigation (why do we employ different coreference strategies when we write or speak) and for the placement of Twitter in the spoken–written continuum, we see our results as a contribution to approaching genre-/media-specific coreference resolution.</abstract>
@@ -6157,7 +6157,7 @@
       <author><first>Kordula</first><last>De Kuthy</last></author>
       <author><first>Madeeswaran</first><last>Kannan</last></author>
       <author><first>Haemanth</first><last>Santhi Ponnusamy</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <pages>5786–5798</pages>
       <abstract>Questions under Discussion (QUD; Roberts, 2012) are emerging as a conceptually fruitful approach to spelling out the connection between the information structure of a sentence and the nature of the discourse in which the sentence can function. To make this approach useful for analyzing authentic data, Riester, Brunetti &amp; De Kuthy (2018) presented a discourse annotation framework based on explicit pragmatic principles for determining a QUD for every assertion in a text. De Kuthy et al. (2018) demonstrate that this supports more reliable discourse structure annotation, and Ziai and Meurers (2018) show that based on explicit questions, automatic focus annotation becomes feasible. But both approaches are based on manually specified questions. In this paper, we present an automatic question generation approach to partially automate QUD annotation by generating all potentially relevant questions for a given sentence. While transformation rules can concisely capture the typical question formation process, a rule-based approach is not sufficiently robust for authentic data. We therefore employ the transformation rules to generate a large set of sentence-question-answer triples and train a neural question generation model on them to obtain both systematic question type coverage and robustness.</abstract>
       <url hash="8b09e90c">2020.coling-main.509</url>
@@ -6172,7 +6172,7 @@
       <author><first>Youzheng</first><last>Wu</last></author>
       <author><first>Xiaodong</first><last>He</last></author>
       <author><first>Bowen</first><last>Zhou</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <pages>5799–5809</pages>
       <abstract>This paper aims to enhance the few-shot relation classification especially for sentences that jointly describe multiple relations. Due to the fact that some relations usually keep high co-occurrence in the same context, previous few-shot relation classifiers struggle to distinguish them with few annotated instances. To alleviate the above relation confusion problem, we propose CTEG, a model equipped with two novel mechanisms to learn to decouple these easily-confused relations. On the one hand, an Entity -Guided Attention (EGA) mechanism, which leverages the syntactic relations and relative positions between each word and the specified entity pair, is introduced to guide the attention to filter out information causing confusion. On the other hand, a Confusion-Aware Training (CAT) method is proposed to explicitly learn to distinguish relations by playing a pushing-away game between classifying a sentence into a true relation and its confusing relation. Extensive experiments are conducted on the FewRel dataset, and the results show that our proposed model achieves comparable and even much better results to strong baselines in terms of accuracy. Furthermore, the ablation test and case study verify the effectiveness of our proposed EGA and CAT, especially in addressing the relation confusion problem.</abstract>
       <url hash="55e58987">2020.coling-main.510</url>
@@ -6197,7 +6197,7 @@
       <author><first>João Marcos</first><last>Munguba Vieira</last></author>
       <author><first>Erica</first><last>dos Santos Rodrigues</last></author>
       <author><first>Elisângela</first><last>Nogueira Teixeira</last></author>
-      <author><first>Sandra</first><last>Aluísio</last></author>
+      <author id="sandra-aluisio"><first>Sandra</first><last>Aluísio</last></author>
       <pages>5821–5831</pages>
       <abstract>Sentence complexity assessment is a relatively new task in Natural Language Processing. One of its aims is to highlight in a text which sentences are more complex to support the simplification of contents for a target audience (e.g., children, cognitively impaired users, non-native speakers and low-literacy readers (Scarton and Specia, 2018)). This task is evaluated using datasets of pairs of aligned sentences including the complex and simple version of the same sentence. For Brazilian Portuguese, the task was addressed by (Leal et al., 2018), who set up the first dataset to evaluate the task in this language, reaching 87.8% of accuracy with linguistic features. The present work advances these results, using models inspired by (Gonzalez-Garduño and Søgaard, 2018), which hold the state-of-the-art for the English language, with multi-task learning and eye-tracking measures. First-Pass Duration, Total Regression Duration and Total Fixation Duration were used in two moments; first to select a subset of linguistic features and then as an auxiliary task in the multi-task and sequential learning models. The best model proposed here reaches the new state-of-the-art for Portuguese with 97.5% accuracy 1 , an increase of almost 10 points compared to the best previous results, in addition to proposing improvements in the public dataset after analysing the errors of our best model.</abstract>
       <url hash="7976d13b">2020.coling-main.512</url>
@@ -6232,7 +6232,7 @@
       <author><first>Ali</first><last>Emami</last></author>
       <author><first>Kaheer</first><last>Suleman</last></author>
       <author><first>Adam</first><last>Trischler</last></author>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <pages>5855–5865</pages>
       <abstract>The Winograd Schema Challenge (WSC) and variants inspired by it have become important benchmarks for common-sense reasoning (CSR). Model performance on the WSC has quickly progressed from chance-level to near-human using neural language models trained on massive corpora. In this paper, we analyze the effects of varying degrees of overlaps that occur between these corpora and the test instances in WSC-style tasks. We find that a large number of test instances overlap considerably with the pretraining corpora on which state-of-the-art models are trained, and that a significant drop in classification accuracy occurs when models are evaluated on instances with minimal overlap. Based on these results, we provide the WSC-Web dataset, consisting of over 60k pronoun disambiguation problems scraped from web data, being both the largest corpus to date, and having a significantly lower proportion of overlaps with current pretraining corpora.</abstract>
       <url hash="362f2902">2020.coling-main.515</url>
@@ -6251,9 +6251,9 @@
       <author><first>Delaney</first><last>Lothian</last></author>
       <author><first>Aidan</first><last>Pine</last></author>
       <author><first>Caroline</first><last>Running Wolf</last></author>
-      <author><first>Eddie</first><last>Santos</last></author>
+      <author id="eddie-antonio-santos"><first>Eddie</first><last>Santos</last></author>
       <author><first>Darlene</first><last>Stewart</last></author>
-      <author><first>Gilles</first><last>Boulianne</last></author>
+      <author id="gilles-boulianne"><first>Gilles</first><last>Boulianne</last></author>
       <author><first>Vishwa</first><last>Gupta</last></author>
       <author><first>Brian</first><last>Maracle Owennatékha</last></author>
       <author><first>Akwiratékha’</first><last>Martin</last></author>
@@ -6286,7 +6286,7 @@
     <paper id="518">
       <title>Don’t Patronize Me! An Annotated Dataset with Patronizing and Condescending Language towards Vulnerable Communities</title>
       <author><first>Carla</first><last>Perez Almendros</last></author>
-      <author><first>Luis</first><last>Espinosa Anke</last></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa Anke</last></author>
       <author><first>Steven</first><last>Schockaert</last></author>
       <pages>5891–5902</pages>
       <abstract>In this paper, we introduce a new annotated dataset which is aimed at supporting the development of NLP models to identify and categorize language that is patronizing or condescending towards vulnerable communities (e.g. refugees, homeless people, poor families). While the prevalence of such language in the general media has long been shown to have harmful effects, it differs from other types of harmful language, in that it is generally used unconsciously and with good intentions. We furthermore believe that the often subtle nature of patronizing and condescending language (PCL) presents an interesting technical challenge for the NLP community. Our analysis of the proposed dataset shows that identifying PCL is hard for standard NLP models, with language models such as BERT achieving the best results.</abstract>
@@ -6320,7 +6320,7 @@
       <author><first>Kunsong</first><last>Zhao</last></author>
       <author><first>Jin</first><last>Liu</last></author>
       <author><first>Guangyou</first><last>Zhou</last></author>
-      <author><first>Jimmy Xiangji</first><last>Huang</last></author>
+      <author id="xiangji-huang"><first>Jimmy Xiangji</first><last>Huang</last></author>
       <pages>5918–5928</pages>
       <abstract>Cross-lingual entity alignment, which aims to match equivalent entities in KGs with different languages, has attracted considerable focus in recent years. Recently, many graph neural network (GNN) based methods are proposed for entity alignment and obtain promising results. However, existing GNN-based methods consider the two KGs independently and learn embeddings for different KGs separately, which ignore the useful pre-aligned links between two KGs. In this paper, we propose a novel Contextual Alignment Enhanced Cross Graph Attention Network (CAECGAT) for the task of cross-lingual entity alignment, which is able to jointly learn the embeddings in different KGs by propagating cross-KG information through pre-aligned seed alignments. We conduct extensive experiments on three benchmark cross-lingual entity alignment datasets. The experimental results demonstrate that our proposed method obtains remarkable performance gains compared to state-of-the-art methods.</abstract>
       <url hash="8879014d">2020.coling-main.520</url>
@@ -6356,8 +6356,8 @@
     <paper id="523">
       <title><fixed-case>W</fixed-case>iki<fixed-case>UMLS</fixed-case>: Aligning <fixed-case>UMLS</fixed-case> to <fixed-case>W</fixed-case>ikipedia via Cross-lingual Neural Ranking</title>
       <author><first>Afshin</first><last>Rahimi</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
-      <author><first>Karin</first><last>Verspoor</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last></author>
       <pages>5957–5962</pages>
       <abstract>We present our work on aligning the Unified Medical Language System (UMLS) to Wikipedia, to facilitate manual alignment of the two resources. We propose a cross-lingual neural reranking model to match a UMLS concept with a Wikipedia page, which achieves a recall@1of 72%, a substantial improvement of 20% over word- and char-level BM25, enabling manual alignment with minimal effort. We release our resources, including ranked Wikipedia pages for 700k UMLSconcepts, and WikiUMLS, a dataset for training and evaluation of alignment models between UMLS and Wikipedia collected from Wikidata. This will provide easier access to Wikipedia for health professionals, patients, and NLP systems, including in multilingual settings.</abstract>
       <url hash="4044fcdc">2020.coling-main.523</url>
@@ -6369,9 +6369,9 @@
       <author><first>Santanu</first><last>Pal</last></author>
       <author><first>Hongfei</first><last>Xu</last></author>
       <author><first>Nico</first><last>Herbig</last></author>
-      <author><first>Sudip Kumar</first><last>Naskar</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last></author>
       <author><first>Antonio</first><last>Krüger</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>5963–5974</pages>
       <abstract>In automatic post-editing (APE) it makes sense to condition post-editing (pe) decisions on both the source (src) and the machine translated text (mt) as input. This has led to multi-encoder based neural APE approaches. A research challenge now is the search for architectures that best support the capture, preparation and provision of src and mt information and its integration with pe decisions. In this paper we present an efficient multi-encoder based APE model, called transference. Unlike previous approaches, it (i) uses a transformer encoder block for src, (ii) followed by a decoder block, but without masking for self-attention on mt, which effectively acts as second encoder combining src –&gt; mt, and (iii) feeds this representation into a final decoder block generating pe. Our model outperforms the best performing systems by 1 BLEU point on the WMT 2016, 2017, and 2018 English–German APE shared tasks (PBSMT and NMT). Furthermore, the results of our model on the WMT 2019 APE task using NMT data shows a comparable performance to the state-of-the-art system. The inference time of our model is similar to the vanilla transformer-based NMT system although our model deals with two separate encoders. We further investigate the importance of our newly introduced second encoder and find that a too small amount of layers does hurt the performance, while reducing the number of layers of the decoder does not matter much.</abstract>
       <url hash="66fed489">2020.coling-main.524</url>
@@ -6409,7 +6409,7 @@
       <author><first>Martin</first><last>Laville</last></author>
       <author><first>Amir</first><last>Hazem</last></author>
       <author><first>Emmanuel</first><last>Morin</last></author>
-      <author><first>Phillippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Phillippe</first><last>Langlais</last></author>
       <pages>6002–6012</pages>
       <abstract>Narrow specialized comparable corpora are often small in size. This particularity makes it difficult to build efficient models to acquire translation equivalents, especially for less frequent and rare words. One way to overcome this issue is to enrich the specialized corpora with out-of-domain resources. Although some recent studies have shown improvements using data augmentation, the enrichment method was roughly conducted by adding out-of-domain data with no particular attention given to how to enrich words and how to do it optimally. In this paper, we contrast several data selection techniques to improve bilingual lexicon induction from specialized comparable corpora. We first apply two well-established data selection techniques often used in machine translation that is: Tf-Idf and cross entropy. Then, we propose to exploit BERT for data selection. Overall, all the proposed techniques improve the quality of the extracted bilingual lexicons by a large margin. The best performing model is the cross entropy, obtaining a gain of about 4 points in MAP while decreasing computation time by a factor of 10.</abstract>
       <url hash="8944ad0c">2020.coling-main.527</url>
@@ -6450,8 +6450,8 @@
       <title>Combining Word Embeddings with Bilingual Orthography Embeddings for Bilingual Dictionary Induction</title>
       <author><first>Silvia</first><last>Severini</last></author>
       <author><first>Viktor</first><last>Hangya</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>6044–6055</pages>
       <abstract>Bilingual dictionary induction (BDI) is the task of accurately translating words to the target language. It is of great importance in many low-resource scenarios where cross-lingual training data is not available. To perform BDI, bilingual word embeddings (BWEs) are often used due to their low bilingual training signal requirements. They achieve high performance, but problematic cases still remain, such as the translation of rare words or named entities, which often need to be transliterated. In this paper, we enrich BWE-based BDI with transliteration information by using Bilingual Orthography Embeddings (BOEs). BOEs represent source and target language transliteration word pairs with similar vectors. A key problem in our BDI setup is to decide which information source – BWEs (or semantics) vs. BOEs (or orthography) – is more reliable for a particular word pair. We propose a novel classification-based BDI system that uses BWEs, BOEs and a number of other features to make this decision. We test our system on English-Russian BDI and show improved performance. In addition, we show the effectiveness of our BOEs by successfully using them for transliteration mining based on cosine similarity.</abstract>
       <url hash="776a4be3">2020.coling-main.531</url>
@@ -6460,9 +6460,9 @@
     </paper>
     <paper id="532">
       <title>Understanding Translationese in Multi-view Embedding Spaces</title>
-      <author><first>Koel</first><last>Dutta Chowdhury</last></author>
+      <author id="koel-dutta-chowdhury"><first>Koel</first><last>Dutta Chowdhury</last></author>
       <author><first>Cristina</first><last>España-Bonet</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>6056–6062</pages>
       <abstract>Recent studies use a combination of lexical and syntactic features to show that footprints of the source language remain visible in translations, to the extent that it is possible to predict the original source language from the translation. In this paper, we focus on embedding-based semantic spaces, exploiting departures from isomorphism between spaces built from original target language and translations into this target language to predict relations between languages in an unsupervised way. We use different views of the data — words, parts of speech, semantic tags and synsets — to track translationese. Our analysis shows that (i) semantic distances between original target language and translations into this target language can be detected using the notion of isomorphism, (ii) language family ties with characteristics similar to linguistically motivated phylogenetic trees can be inferred from the distances and (iii) with delexicalised embeddings exhibiting source-language interference most significantly, other levels of abstraction display the same tendency, indicating the lexicalised results to be not “just” due to possible topic differences between original and translated texts. To the best of our knowledge, this is the first time departures from isomorphism between embedding spaces are used to track translationese.</abstract>
       <url hash="6c65d093">2020.coling-main.532</url>
@@ -6472,7 +6472,7 @@
     <paper id="533">
       <title>Building The First <fixed-case>E</fixed-case>nglish-<fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese Corpus for Automatic Post-Editing</title>
       <author><first>Felipe</first><last>Almeida Costa</last></author>
-      <author><first>Thiago</first><last>Castro Ferreira</last></author>
+      <author id="thiago-castro-ferreira"><first>Thiago</first><last>Castro Ferreira</last></author>
       <author><first>Adriana</first><last>Pagano</last></author>
       <author><first>Wagner</first><last>Meira</last></author>
       <pages>6063–6069</pages>
@@ -6485,7 +6485,7 @@
       <title>Analysing cross-lingual transfer in lemmatisation for <fixed-case>I</fixed-case>ndian languages</title>
       <author><first>Kumar</first><last>Saurav</last></author>
       <author><first>Kumar</first><last>Saunack</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>6070–6076</pages>
       <abstract>Lemmatization aims to reduce the sparse data problem by relating the inflected forms of a word to its dictionary form. However, most of the prior work on this topic has focused on high resource languages. In this paper, we evaluate cross-lingual approaches for low resource languages, especially in the context of morphologically rich Indian languages. We test our model on six languages from two different families and develop linguistic insights into each model’s performance.</abstract>
       <url hash="3b55cd40">2020.coling-main.534</url>
@@ -6508,7 +6508,7 @@
       <author><first>Labiba</first><last>Jahan</last></author>
       <author><first>Rahul</first><last>Mittal</last></author>
       <author><first>W. Victor</first><last>Yarlott</last></author>
-      <author><first>Mark</first><last>Finlayson</last></author>
+      <author id="mark-finlayson"><first>Mark</first><last>Finlayson</last></author>
       <pages>6089–6100</pages>
       <abstract>One of the most fundamental elements of narrative is character: if we are to understand a narrative, we must be able to identify the characters of that narrative. Therefore, character identification is a critical task in narrative natural language understanding. Most prior work has lacked a narratologically grounded definition of character, instead relying on simplified or implicit definitions that do not capture essential distinctions between characters and other referents in narratives. In prior work we proposed a preliminary definition of character that was based in clear narratological principles: a character is an animate entity that is important to the plot. Here we flesh out this concept, demonstrate that it can be reliably annotated (0.78 Cohen’s κ), and provide annotations of 170 narrative texts, drawn from 3 different corpora, containing 1,347 character co-reference chains and 21,999 non-character chains that include 3,937 animate chains. Furthermore, we have shown that a supervised classifier using a simple set of easily computable features can effectively identify these characters (overall F1 of 0.90). A detailed error analysis shows that character identification is first and foremost affected by co-reference quality, and further, that the shorter a chain is the harder it is to effectively identify as a character. We release our code and data for the benefit of other researchers</abstract>
       <url hash="d7c76355">2020.coling-main.536</url>
@@ -6527,9 +6527,9 @@
     <paper id="538">
       <title>Free the Plural: Unrestricted Split-Antecedent Anaphora Resolution</title>
       <author><first>Juntao</first><last>Yu</last></author>
-      <author><first>Nafise Sadat</first><last>Moosavi</last></author>
+      <author id="nafise-sadat-moosavi"><first>Nafise Sadat</first><last>Moosavi</last></author>
       <author><first>Silviu</first><last>Paun</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>6113–6125</pages>
       <abstract>Now that the performance of coreference resolvers on the simpler forms of anaphoric reference has greatly improved, more attention is devoted to more complex aspects of anaphora. One limitation of virtually all coreference resolution models is the focus on single-antecedent anaphors. Plural anaphors with multiple antecedents-so-called split-antecedent anaphors (as in John met Mary. They went to the movies) have not been widely studied, because they are not annotated in ONTONOTES and are relatively infrequent in other corpora. In this paper, we introduce the first model for unrestricted resolution of split-antecedent anaphors. We start with a strong baseline enhanced by BERT embeddings, and show that we can substantially improve its performance by addressing the sparsity issue. To do this, we experiment with auxiliary corpora where split-antecedent anaphors were annotated by the crowd, and with transfer learning models using element-of bridging references and single-antecedent coreference as auxiliary tasks. Evaluation on the gold annotated ARRAU corpus shows that the out best model uses a combination of three auxiliary corpora achieved F1 scores of 70% and 43.6% when evaluated in a lenient and strict setting, respectively, i.e., 11 and 21 percentage points gain when compared with our baseline.</abstract>
       <url hash="608fd338">2020.coling-main.538</url>
@@ -6553,7 +6553,7 @@
       <title>Fact vs. Opinion: the Role of Argumentation Features in News Classification</title>
       <author><first>Tariq</first><last>Alhindi</last></author>
       <author><first>Smaranda</first><last>Muresan</last></author>
-      <author><first>Daniel</first><last>Preotiuc-Pietro</last></author>
+      <author id="daniel-preotiuc-pietro"><first>Daniel</first><last>Preotiuc-Pietro</last></author>
       <pages>6139–6149</pages>
       <abstract>A 2018 study led by the Media Insight Project showed that most journalists think that a clearmarking of what is news reporting and what is commentary or opinion (e.g., editorial, op-ed)is essential for gaining public trust. We present an approach to classify news articles into newsstories (i.e., reporting of factual information) and opinion pieces using models that aim to sup-plement the article content representation with argumentation features. Our hypothesis is thatthe nature of argumentative discourse is important in distinguishing between news stories andopinion articles. We show that argumentation features outperform linguistic features used previ-ously and improve on fine-tuned transformer-based models when tested on data from publishersunseen in training. Automatically flagging opinion pieces vs. news stories can aid applicationssuch as fact-checking or event extraction.</abstract>
       <url hash="18f3f623">2020.coling-main.540</url>
@@ -6591,7 +6591,7 @@
     <paper id="543">
       <title>Multilingual Epidemiological Text Classification: A Comparative Study</title>
       <author><first>Stephen</first><last>Mutuvi</last></author>
-      <author><first>Emanuela</first><last>Boros</last></author>
+      <author id="emanuela-boros"><first>Emanuela</first><last>Boros</last></author>
       <author><first>Antoine</first><last>Doucet</last></author>
       <author><first>Adam</first><last>Jatowt</last></author>
       <author><first>Gaël</first><last>Lejeune</last></author>
@@ -6634,7 +6634,7 @@
       <author><first>Chieh-Han</first><last>Wu</last></author>
       <author><first>PoChun</first><last>Chen</last></author>
       <author><first>Kuansan</first><last>Wang</last></author>
-      <author><first>Shou-de</first><last>Lin</last></author>
+      <author id="shou-de-lin"><first>Shou-de</first><last>Lin</last></author>
       <pages>6207–6216</pages>
       <abstract>We focus on a recently deployed system built for summarizing academic articles by concept tagging. The system has shown great coverage and high accuracy of concept identification which could be contributed by the knowledge acquired from millions of publications. Provided with the interpretable concepts and knowledge encoded in a pre-trained neural model, we investigate whether the tagged concepts can be applied to a broader class of applications. We propose transforming the tagged concepts into sparse vectors as representations of academic documents. The effectiveness of the representations is analyzed theoretically by a proposed framework. We also empirically show that the representations can have advantages on academic topic discovery and paper recommendation. On these applications, we reveal that the knowledge encoded in the tagging system can be effectively utilized and can help infer additional features from data with limited information.</abstract>
       <url hash="5bbf9662">2020.coling-main.546</url>
@@ -6645,7 +6645,7 @@
       <title>“What is on your mind?” Automated Scoring of Mindreading in Childhood and Early Adolescence</title>
       <author><first>Venelin</first><last>Kovatchev</last></author>
       <author><first>Phillip</first><last>Smith</last></author>
-      <author><first>Mark</first><last>Lee</last></author>
+      <author id="mark-lee"><first>Mark</first><last>Lee</last></author>
       <author><first>Imogen</first><last>Grumley Traynor</last></author>
       <author><first>Irene</first><last>Luque Aguilera</last></author>
       <author><first>Rory</first><last>Devine</last></author>
@@ -6658,8 +6658,8 @@
     <paper id="548">
       <title>A Deep Metric Learning Method for Biomedical Passage Retrieval</title>
       <author><first>Andrés</first><last>Rosso-Mateus</last></author>
-      <author><first>Fabio A.</first><last>González</last></author>
-      <author><first>Manuel</first><last>Montes-y-Gómez</last></author>
+      <author id="fabio-a-gonzalez"><first>Fabio A.</first><last>González</last></author>
+      <author id="manuel-montes"><first>Manuel</first><last>Montes-y-Gómez</last></author>
       <pages>6229–6239</pages>
       <abstract>Passage retrieval is the task of identifying text snippets that are valid answers for a natural language posed question. One way to address this problem is to look at it as a metric learning problem, where we want to induce a metric between questions and passages that assign smaller distances to more relevant passages. In this work, we present a novel method for passage retrieval that learns a metric for questions and passages based on their internal semantic interactions. The method uses a similar approach to that of triplet networks, where the training samples are composed of one anchor (the question) and two positive and negative samples (passages). However,and in contrast with triplet networks, the proposed method uses a novel deep architecture that better exploits the particularities of text and takes into consideration complementary relatedness measures. Besides, the paper presents a sampling strategy that selects both easy and hard negative samples which improves the accuracy of the trained model. The method is particularly well suited for domain-specific passage retrieval where it is very important to take into account different sources of information. The proposed approach was evaluated in a biomedical passage retrieval task, the BioASQ challenge, outperforming standard triplet loss substantially by 10%,and state-of-the-art performance by 26%.</abstract>
       <url hash="f9994fc4">2020.coling-main.548</url>
@@ -6669,7 +6669,7 @@
     <paper id="549">
       <title>Hierarchical Text Segmentation for Medieval Manuscripts</title>
       <author><first>Amir</first><last>Hazem</last></author>
-      <author><first>Beatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Beatrice</first><last>Daille</last></author>
       <author><first>Dominique</first><last>Stutzmann</last></author>
       <author><first>Christopher</first><last>Kermorvant</last></author>
       <author><first>Louis</first><last>Chevalier</last></author>
@@ -6709,7 +6709,7 @@
       <author><first>Eesha</first><last>Dutta</last></author>
       <author><first>Paryul</first><last>Jain</last></author>
       <author><first>Manish</first><last>Gupta</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <author><first>Ponnurangam</first><last>Kumaraguru</last></author>
       <pages>6277–6283</pages>
       <abstract>While extensive popularity of online social media platforms has made information dissemination faster, it has also resulted in widespread online abuse of different types like hate speech, offensive language, sexist and racist opinions, etc. Detection and curtailment of such abusive content is critical for avoiding its psychological impact on victim communities, and thereby preventing hate crimes. Previous works have focused on classifying user posts into various forms of abusive behavior. But there has hardly been any focus on estimating the severity of abuse and the target. In this paper, we present a first of the kind dataset with 7,601 posts from Gab which looks at online abuse from the perspective of presence of abuse, severity and target of abusive behavior. We also propose a system to address these tasks, obtaining an accuracy of ∼80% for abuse presence, ∼82% for abuse target prediction, and ∼65% for abuse severity prediction.</abstract>
@@ -6719,7 +6719,7 @@
     </paper>
     <paper id="553">
       <title>A Survey of Automatic Personality Detection from Texts</title>
-      <author><first>Sanja</first><last>Stajner</last></author>
+      <author id="sanja-stajner"><first>Sanja</first><last>Stajner</last></author>
       <author><first>Seren</first><last>Yenikent</last></author>
       <pages>6284–6295</pages>
       <abstract>Personality profiling has long been used in psychology to predict life outcomes. Recently, automatic detection of personality traits from written messages has gained significant attention in computational linguistics and natural language processing communities, due to its applicability in various fields. In this survey, we show the trajectory of research towards automatic personality detection from purely psychology approaches, through psycholinguistics, to the recent purely natural language processing approaches on large datasets automatically extracted from social media. We point out what has been gained and what lost during that trajectory, and show what can be realistic expectations in the field.</abstract>
@@ -6748,7 +6748,7 @@
       <author><first>Congying</first><last>Xia</last></author>
       <author><first>Jianxin</first><last>Li</last></author>
       <author><first>Lifang</first><last>He</last></author>
-      <author><first>Philip</first><last>Yu</last></author>
+      <author id="philip-s-yu"><first>Philip</first><last>Yu</last></author>
       <pages>6302–6314</pages>
       <abstract>Review rating prediction of text reviews is a rapidly growing technology with a wide range of applications in natural language processing. However, most existing methods either use hand-crafted features or learn features using deep learning with simple text corpus as input for review rating prediction, ignoring the hierarchies among data. In this paper, we propose a Hierarchical bi-directional self-attention Network framework (HabNet) for paper review rating prediction and recommendation, which can serve as an effective decision-making tool for the academic paper review process. Specifically, we leverage the hierarchical structure of the paper reviews with three levels of encoders: sentence encoder (level one), intra-review encoder (level two) and inter-review encoder (level three). Each encoder first derives contextual representation of each level, then generates a higher-level representation, and after the learning process, we are able to identify useful predictors to make the final acceptance decision, as well as to help discover the inconsistency between numerical review ratings and text sentiment conveyed by reviewers. Furthermore, we introduce two new metrics to evaluate models in data imbalance situations. Extensive experiments on a publicly available dataset (PeerRead) and our own collected dataset (OpenReview) demonstrate the superiority of the proposed approach compared with state-of-the-art methods.</abstract>
       <url hash="27a0e813">2020.coling-main.555</url>
@@ -6788,7 +6788,7 @@
     <paper id="559">
       <title><fixed-case>XH</fixed-case>ate-999: Analyzing and Detecting Abusive Language Across Domains and Languages</title>
       <author><first>Goran</first><last>Glavaš</last></author>
-      <author><first>Vanja Mladen</first><last>Karan</last></author>
+      <author id="vanja-m-karan"><first>Vanja Mladen</first><last>Karan</last></author>
       <author><first>Ivan</first><last>Vulić</last></author>
       <pages>6350–6365</pages>
       <abstract>We present XHate-999, a multi-domain and multilingual evaluation data set for abusive language detection. By aligning test instances across six typologically diverse languages, XHate-999 for the first time allows for disentanglement of the domain transfer and language transfer effects in abusive language detection. We conduct a series of domain- and language-transfer experiments with state-of-the-art monolingual and multilingual transformer models, setting strong baseline results and profiling XHate-999 as a comprehensive evaluation resource for abusive language detection. Finally, we show that domain- and language-adaption, via intermediate masked language modeling on abusive corpora in the target language, can lead to substantially improved abusive language detection in the target language in the zero-shot transfer setups.</abstract>
@@ -6816,7 +6816,7 @@
       <author><first>Jing</first><last>Li</last></author>
       <author><first>Baohua</first><last>Zhou</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>6377–6387</pages>
       <abstract>Previous work for rumor resolution concentrates on exploiting time-series characteristics or modeling topology structure separately. However, how local interactive pattern affects global information assemblage has not been explored. In this paper, we attempt to address the problem by learning evolution of message interaction. We model confrontation and reciprocity between message pairs via discrete variational autoencoders which effectively reflects the diversified opinion interactivity. Moreover, we capture the variation of message interaction using a hierarchical framework to better integrate information flow of a rumor cascade. Experiments on PHEME dataset demonstrate our proposed model achieves higher accuracy than existing methods.</abstract>
       <url hash="56d059e8">2020.coling-main.561</url>
@@ -6862,7 +6862,7 @@
     </paper>
     <paper id="565">
       <title>Graph Convolution over Multiple Dependency Sub-graphs for Relation Extraction</title>
-      <author><first>Angrosh</first><last>Mandya</last></author>
+      <author id="angrosh-mandya"><first>Angrosh</first><last>Mandya</last></author>
       <author><first>Danushka</first><last>Bollegala</last></author>
       <author><first>Frans</first><last>Coenen</last></author>
       <pages>6424–6435</pages>
@@ -6890,7 +6890,7 @@
       <title>Multi-choice Relational Reasoning for Machine Reading Comprehension</title>
       <author><first>Wuya</first><last>Chen</last></author>
       <author><first>Xiaojun</first><last>Quan</last></author>
-      <author><first>Chunyu</first><last>Kit</last></author>
+      <author id="chunyu-kit"><first>Chunyu</first><last>Kit</last></author>
       <author><first>Zhengcheng</first><last>Min</last></author>
       <author><first>Jiahai</first><last>Wang</last></author>
       <pages>6448–6458</pages>
@@ -6904,7 +6904,7 @@
       <author><first>Shuai</first><last>Pang</last></author>
       <author><first>Jianqiang</first><last>Ma</last></author>
       <author><first>Zeyu</first><last>Yan</last></author>
-      <author id="yang-zhang"><first>Yang</first><last>Zhang</last></author>
+      <author><first>Yang</first><last>Zhang</last></author>
       <author><first>Jianping</first><last>Shen</last></author>
       <pages>6459–6469</pages>
       <abstract>Recently, pre-trained language models such as BERT have shown state-of-the-art accuracies in text matching. When being applied to IR (or QA), the BERT-based matching models need to online calculate the representations and interactions for all query-candidate pairs. The high inference cost has prohibited the deployments of BERT-based matching models in many practical applications. To address this issue, we propose a novel BERT-based text matching model, in which the representations and the interactions are decoupled. Then, the representations of the candidates can be calculated and stored offline, and directly retrieved during the online matching phase. To conduct the interactions and generate final matching scores, a lightweight attention network is designed. Experiments based on several large scale text matching datasets show that the proposed model, called FASTMATCH, can achieve up to 100X speed-up to BERT and RoBERTa at the online matching phase, while keeping more up to 98.7% of the performance.</abstract>
@@ -6950,7 +6950,7 @@
     <paper id="572">
       <title><fixed-case>NYTWIT</fixed-case>: A Dataset of Novel Words in the <fixed-case>N</fixed-case>ew <fixed-case>Y</fixed-case>ork <fixed-case>T</fixed-case>imes</title>
       <author><first>Yuval</first><last>Pinter</last></author>
-      <author><first>Cassandra L.</first><last>Jacobs</last></author>
+      <author id="cassandra-l-jacobs"><first>Cassandra L.</first><last>Jacobs</last></author>
       <author><first>Max</first><last>Bittker</last></author>
       <pages>6509–6515</pages>
       <abstract>We present the New York Times Word Innovation Types dataset, or NYTWIT, a collection of over 2,500 novel English words published in the New York Times between November 2017 and March 2019, manually annotated for their class of novelty (such as lexical derivation, dialectal variation, blending, or compounding). We present baseline results for both uncontextual and contextual prediction of novelty class, showing that there is room for improvement even for state-of-the-art NLP systems. We hope this resource will prove useful for linguists and NLP practitioners by providing a real-world environment of novel word appearance.</abstract>
@@ -6974,7 +6974,7 @@
       <title>Continual Lifelong Learning in Natural Language Processing: A Survey</title>
       <author><first>Magdalena</first><last>Biesialska</last></author>
       <author><first>Katarzyna</first><last>Biesialska</last></author>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
       <pages>6523–6541</pages>
       <abstract>Continual learning (CL) aims to enable information systems to learn from a continuous data stream across time. However, it is difficult for existing deep learning architectures to learn a new task without largely forgetting previously acquired knowledge. Furthermore, CL is particularly challenging for language learning, as natural language is ambiguous: it is discrete, compositional, and its meaning is context-dependent. In this work, we look at the problem of CL through the lens of various NLP tasks. Our survey discusses major challenges in CL and current methods applied in neural network models. We also provide a critical review of the existing CL evaluation methods and datasets in NLP. Finally, we present our outlook on future research directions.</abstract>
       <url hash="80ca90cd">2020.coling-main.574</url>
@@ -6986,7 +6986,7 @@
       <author><first>Emily</first><last>Öhman</last></author>
       <author><first>Marc</first><last>Pàmies</last></author>
       <author><first>Kaisla</first><last>Kajava</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>6542–6552</pages>
       <abstract>We introduce XED, a multilingual fine-grained emotion dataset. The dataset consists of human-annotated Finnish (25k) and English sentences (30k), as well as projected annotations for 30 additional languages, providing new resources for many low-resource languages. We use Plutchik’s core emotions to annotate the dataset with the addition of neutral to create a multilabel multiclass dataset. The dataset is carefully evaluated using language-specific BERT models and SVMs to show that XED performs on par with other similar datasets and is therefore a useful tool for sentiment analysis and emotion detection.</abstract>
       <url hash="7530b805">2020.coling-main.575</url>
@@ -6997,9 +6997,9 @@
       <title>Human or Neural Translation?</title>
       <author><first>Shivendra</first><last>Bhardwaj</last></author>
       <author><first>David</first><last>Alfonso Hermelo</last></author>
-      <author><first>Phillippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Phillippe</first><last>Langlais</last></author>
       <author><first>Gabriel</first><last>Bernier-Colborne</last></author>
-      <author><first>Cyril</first><last>Goutte</last></author>
+      <author id="cyril-goutte"><first>Cyril</first><last>Goutte</last></author>
       <author><first>Michel</first><last>Simard</last></author>
       <pages>6553–6564</pages>
       <abstract>Deep neural models tremendously improved machine translation. In this context, we investigate whether distinguishing machine from human translations is still feasible. We trained and applied 18 classifiers under two settings: a monolingual task, in which the classifier only looks at the translation; and a bilingual task, in which the source text is also taken into consideration. We report on extensive experiments involving 4 neural MT systems (Google Translate, DeepL, as well as two systems we trained) and varying the domain of texts. We show that the bilingual task is the easiest one and that transfer-based deep-learning classifiers perform best, with mean accuracies around 85% in-domain and 75% out-of-domain .</abstract>
@@ -7023,7 +7023,7 @@
     <paper id="578">
       <title>Domain-Specific Sentiment Lexicons Induced from Labeled Documents</title>
       <author><first>SM Mazharul</first><last>Islam</last></author>
-      <author><first>Xin</first><last>Dong</last></author>
+      <author id="xin-luna-dong"><first>Xin</first><last>Dong</last></author>
       <author><first>Gerard</first><last>de Melo</last></author>
       <pages>6576–6587</pages>
       <abstract>Sentiment analysis is an area of substantial relevance both in industry and in academia, including for instance in social studies. Although supervised learning algorithms have advanced considerably in recent years, in many settings it remains more practical to apply an unsupervised technique. The latter are oftentimes based on sentiment lexicons. However, existing sentiment lexicons reflect an abstract notion of polarity and do not do justice to the substantial differences of word polarities between different domains. In this work, we draw on a collection of domain-specific data to induce a set of 24 domain-specific sentiment lexicons. We rely on initial linear models to induce initial word intensity scores, and then train new deep models based on word vector representations to overcome the scarcity of the original seed data. Our analysis shows substantial differences between domains, which make domain-specific sentiment lexicons a promising form of lexical resource in downstream tasks, and the predicted lexicons indeed perform effectively on tasks such as review classification and cross-lingual word sentiment prediction.</abstract>
@@ -7048,7 +7048,7 @@
       <author><first>Xanh</first><last>Ho</last></author>
       <author><first>Anh-Khoa</first><last>Duong Nguyen</last></author>
       <author><first>Saku</first><last>Sugawara</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>6609–6625</pages>
       <abstract>A multi-hop question answering (QA) dataset aims to test reasoning and inference skills by requiring a model to read multiple paragraphs to answer a given question. However, current datasets do not provide a complete explanation for the reasoning process from the question to the answer. Further, previous studies revealed that many examples in existing multi-hop datasets do not require multi-hop reasoning to answer a question. In this study, we present a new multi-hop QA dataset, called 2WikiMultiHopQA, which uses structured and unstructured data. In our dataset, we introduce the evidence information containing a reasoning path for multi-hop questions. The evidence information has two benefits: (i) providing a comprehensive explanation for predictions and (ii) evaluating the reasoning skills of a model. We carefully design a pipeline and a set of templates when generating a question-answer pair that guarantees the multi-hop steps and the quality of the questions. We also exploit the structured format in Wikidata and use logical rules to create questions that are natural but still require multi-hop reasoning. Through experiments, we demonstrate that our dataset is challenging for multi-hop models and it ensures that multi-hop reasoning is required.</abstract>
       <url hash="9d4f8121">2020.coling-main.580</url>
@@ -7079,7 +7079,7 @@
     </paper>
     <paper id="583">
       <title><fixed-case>D</fixed-case>a<fixed-case>N</fixed-case>+: <fixed-case>D</fixed-case>anish Nested Named Entities and Lexical Normalization</title>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <author><first>Kristian Nørgaard</first><last>Jensen</last></author>
       <author><first>Rob</first><last>van der Goot</last></author>
       <pages>6649–6662</pages>
@@ -7128,7 +7128,7 @@
       <author><first>Abhinav Reddy</first><last>Appidi</last></author>
       <author><first>Vamshi Krishna</first><last>Srirangam</last></author>
       <author><first>Darsi</first><last>Suhas</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>6703–6709</pages>
       <abstract>Emotion prediction is a critical task in the field of Natural Language Processing (NLP). There has been a significant amount of work done in emotion prediction for resource-rich languages. There has been work done on code-mixed social media corpus but not on emotion prediction of Kannada-English code-mixed Twitter data. In this paper, we analyze the problem of emotion prediction on corpus obtained from code-mixed Kannada-English extracted from Twitter annotated with their respective ‘Emotion’ for each tweet. We experimented with machine learning prediction models using features like Character N-Grams, Word N-Grams, Repetitive characters, and others on SVM and LSTM on our corpus, which resulted in an accuracy of 30% and 32% respectively.</abstract>
       <url hash="8aeb7965">2020.coling-main.587</url>
@@ -7177,7 +7177,7 @@
       <title>Multilingual Neural <fixed-case>RST</fixed-case> Discourse Parsing</title>
       <author><first>Zhengyuan</first><last>Liu</last></author>
       <author><first>Ke</first><last>Shi</last></author>
-      <author><first>Nancy</first><last>Chen</last></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last></author>
       <pages>6730–6738</pages>
       <abstract>Text discourse parsing plays an important role in understanding information flow and argumentative structure in natural language. Previous research under the Rhetorical Structure Theory (RST) has mostly focused on inducing and evaluating models from the English treebank. However, the parsing tasks for other languages such as German, Dutch, and Portuguese are still challenging due to the shortage of annotated data. In this work, we investigate two approaches to establish a neural, cross-lingual discourse parser via: (1) utilizing multilingual vector representations; and (2) adopting segment-level translation of the source content. Experiment results show that both methods are effective even with limited training data, and achieve state-of-the-art performance on cross-lingual, document-level discourse parsing on all sub-tasks.</abstract>
       <url hash="a1cc5e02">2020.coling-main.591</url>
@@ -7217,7 +7217,7 @@
     <paper id="595">
       <title>Statistical Parsing of Tree Wrapping Grammars</title>
       <author><first>Tatiana</first><last>Bladier</last></author>
-      <author><first>Jakub</first><last>Waszczuk</last></author>
+      <author id="jakub-waszczuk"><first>Jakub</first><last>Waszczuk</last></author>
       <author><first>Laura</first><last>Kallmeyer</last></author>
       <pages>6759–6766</pages>
       <abstract>We describe an approach to statistical parsing with Tree-Wrapping Grammars (TWG). TWG is a tree-rewriting formalism which includes the tree-combination operations of substitution, sister-adjunction and tree-wrapping substitution. TWGs can be extracted from constituency treebanks and aim at representing long distance dependencies (LDDs) in a linguistically adequate way. We present a parsing algorithm for TWGs based on neural supertagging and A* parsing. We extract a TWG for English from the treebanks for Role and Reference Grammar and discuss first parsing results with this grammar.</abstract>
@@ -7233,7 +7233,7 @@
       <author><first>Nurul</first><last>Lubis</last></author>
       <author><first>Marco</first><last>Moresi</last></author>
       <author><first>Carel</first><last>van Niekerk</last></author>
-      <author><first>Milica</first><last>Gasic</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gasic</last></author>
       <pages>6767–6774</pages>
       <abstract>Dialog state tracking (DST) suffers from severe data sparsity. While many natural language processing (NLP) tasks benefit from transfer learning and multi-task learning, in dialog these methods are limited by the amount of available data and by the specificity of dialog applications. In this work, we successfully utilize non-dialog data from unrelated NLP tasks to train dialog state trackers. This opens the door to the abundance of unrelated NLP corpora to mitigate the data sparsity issue inherent to DST.</abstract>
       <url hash="b3ad69a7">2020.coling-main.596</url>
@@ -7244,7 +7244,7 @@
       <title>Resource Constrained Dialog Policy Learning Via Differentiable Inductive Logic Programming</title>
       <author><first>Zhenpeng</first><last>Zhou</last></author>
       <author><first>Ahmad</first><last>Beirami</last></author>
-      <author><first>Paul</first><last>Crook</last></author>
+      <author id="paul-a-crook"><first>Paul</first><last>Crook</last></author>
       <author><first>Pararth</first><last>Shah</last></author>
       <author><first>Rajen</first><last>Subba</last></author>
       <author><first>Alborz</first><last>Geramifard</last></author>
@@ -7314,7 +7314,7 @@
     <paper id="603">
       <title>Neural Unsupervised Domain Adaptation in <fixed-case>NLP</fixed-case>—<fixed-case>A</fixed-case> Survey</title>
       <author><first>Alan</first><last>Ramponi</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>6838–6855</pages>
       <abstract>Deep neural networks excel at learning from labeled data and achieve state-of-the-art results on a wide array of Natural Language Processing tasks. In contrast, learning from unlabeled data, especially under domain shift, remains a challenge. Motivated by the latest advances, in this survey we review neural unsupervised domain adaptation techniques which do not require labeled target domain data. This is a more challenging yet a more widely applicable setup. We outline methods, from early traditional non-neural methods to pre-trained model transfer. We also revisit the notion of domain, and we uncover a bias in the type of Natural Language Processing tasks which received most attention. Lastly, we outline future directions, particularly the broader need for out-of-distribution generalization of future NLP.</abstract>
       <url hash="e69dceef">2020.coling-main.603</url>
@@ -7326,7 +7326,7 @@
       <author><first>Charles</first><last>Welch</last></author>
       <author><first>Jonathan K.</first><last>Kummerfeld</last></author>
       <author><first>Verónica</first><last>Pérez-Rosas</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>6856–6862</pages>
       <abstract>In this paper, we introduce personalized word embeddings, and examine their value for language modeling. We compare the performance of our proposed prediction model when using personalized versus generic word representations, and study how these representations can be leveraged for improved performance. We provide insight into what types of words can be more accurately predicted when building personalized models. Our results show that a subset of words belonging to specific psycholinguistic categories tend to vary more in their representations across users and that combining generic and personalized word embeddings yields the best performance, with a 4.7% relative reduction in perplexity. Additionally, we show that a language model using personalized word embeddings can be effectively used for authorship attribution.</abstract>
       <url hash="5b24a38e">2020.coling-main.604</url>
@@ -7386,8 +7386,8 @@
       <author><first>Olivier</first><last>Ferret</last></author>
       <author><first>Thomas</first><last>Lavergne</last></author>
       <author><first>Hiroshi</first><last>Noji</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>6903–6915</pages>
       <abstract>Due to the compelling improvements brought by BERT, many recent representation models adopted the Transformer architecture as their main building block, consequently inheriting the wordpiece tokenization system despite it not being intrinsically linked to the notion of Transformers. While this system is thought to achieve a good balance between the flexibility of characters and the efficiency of full words, using predefined wordpiece vocabularies from the general domain is not always suitable, especially when building models for specialized domains (e.g., the medical domain). Moreover, adopting a wordpiece tokenization shifts the focus from the word level to the subword level, making the models conceptually more complex and arguably less convenient in practice. For these reasons, we propose CharacterBERT, a new variant of BERT that drops the wordpiece system altogether and uses a Character-CNN module instead to represent entire words by consulting their characters. We show that this new model improves the performance of BERT on a variety of medical domain tasks while at the same time producing robust, word-level, and open-vocabulary representations.</abstract>
       <url hash="9b77793f">2020.coling-main.609</url>
@@ -7398,7 +7398,7 @@
       <title>Autoregressive Reasoning over Chains of Facts with Transformers</title>
       <author><first>Ruben</first><last>Cartuyvels</last></author>
       <author><first>Graham</first><last>Spinks</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>6916–6930</pages>
       <abstract>This paper proposes an iterative inference algorithm for multi-hop explanation regeneration, that retrieves relevant factual evidence in the form of text snippets, given a natural language question and its answer. Combining multiple sources of evidence or facts for multi-hop reasoning becomes increasingly hard when the number of sources needed to make an inference grows. Our algorithm copes with this by decomposing the selection of facts from a corpus autoregressively, conditioning the next iteration on previously selected facts. This allows us to use a pairwise learning-to-rank loss. We validate our method on datasets of the TextGraphs 2019 and 2020 Shared Tasks for explanation regeneration. Existing work on this task either evaluates facts in isolation or artificially limits the possible chains of facts, thus limiting multi-hop inference. We demonstrate that our algorithm, when used with a pre-trained transformer model, outperforms the previous state-of-the-art in terms of precision, training time and inference efficiency.</abstract>
       <url hash="c97d241b">2020.coling-main.610</url>
@@ -7412,7 +7412,7 @@
       <author><first>Aniket</first><last>Didolkar</last></author>
       <author><first>Di</first><last>Jin</last></author>
       <author><first>Ramit</first><last>Sawhney</last></author>
-      <author><first>Rajiv Ratn</first><last>Shah</last></author>
+      <author id="rajiv-shah"><first>Rajiv Ratn</first><last>Shah</last></author>
       <pages>6931–6936</pages>
       <abstract>Models with a large number of parameters are prone to over-fitting and often fail to capture the underlying input distribution. We introduce Emix, a data augmentation method that uses interpolations of word embeddings and hidden layer representations to construct virtual examples. We show that Emix shows significant improvements over previously used interpolation based regularizers and data augmentation techniques. We also demonstrate how our proposed method is more robust to sparsification. We highlight the merits of our proposed methodology by performing thorough quantitative and qualitative assessments.</abstract>
       <url hash="5f0a0218">2020.coling-main.611</url>
@@ -7424,7 +7424,7 @@
       <author><first>Zhihong</first><last>Lei</last></author>
       <author><first>Weiyue</first><last>Wang</last></author>
       <author><first>Christian</first><last>Dugast</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>6937–6941</pages>
       <abstract>Named entity recognition is a key component in various natural language processing systems, and neural architectures provide significant improvements over conventional approaches. Regardless of different word embedding and hidden layer structures of the networks, a conditional random field layer is commonly used for the output. This work proposes to use a neural language model as an alternative to the conditional random field layer, which is more flexible for the size of the corpus. Experimental results show that the proposed system has a significant advantage in terms of training speed, with a marginal performance degradation.</abstract>
       <url hash="5746d362">2020.coling-main.612</url>
@@ -7487,9 +7487,9 @@
       <title>Demo Application for the <fixed-case>A</fixed-case>uto<fixed-case>GOAL</fixed-case> Framework</title>
       <author><first>Suilan</first><last>Estevez-Velarde</last></author>
       <author><first>Alejandro</first><last>Piad-Morffis</last></author>
-      <author><first>Yoan</first><last>Gutiérrez</last></author>
-      <author><first>Andres</first><last>Montoyo</last></author>
-      <author><first>Rafael</first><last>Muñoz-Guillena</last></author>
+      <author id="yoan-gutierrez"><first>Yoan</first><last>Gutiérrez</last></author>
+      <author id="andres-montoyo"><first>Andres</first><last>Montoyo</last></author>
+      <author id="rafael-munoz"><first>Rafael</first><last>Muñoz-Guillena</last></author>
       <author><first>Yudivián</first><last>Almeida Cruz</last></author>
       <pages>18–22</pages>
       <abstract>This paper introduces a web demo that showcases the main characteristics of the AutoGOAL framework. AutoGOAL is a framework in Python for automatically finding the best way to solve a given task. It has been designed mainly for automatic machine learning(AutoML) but it can be used in any scenario where several possible strategies are available to solve a given computational task. In contrast with alternative frameworks, AutoGOAL can be applied seamlessly to Natural Language Processing as well as structured classification problems. This paper presents an overview of the framework’s design and experimental evaluation in several machine learning problems, including two recent NLP challenges. The accompanying software demo is available online (<url>https://autogoal.github.io/demo</url>) and full source code is provided under the MIT open-source license (<url>https://autogoal.github.io</url>).</abstract>
@@ -7499,7 +7499,7 @@
     </paper>
     <paper id="5">
       <title>Fast Word Predictor for On-Device Application</title>
-      <author><first>Huy Tien</first><last>Nguyen</last></author>
+      <author id="huy-tien-nguyen"><first>Huy Tien</first><last>Nguyen</last></author>
       <author><first>Khoi Tuan</first><last>Nguyen</last></author>
       <author><first>Anh Tuan</first><last>Nguyen</last></author>
       <author><first>Thanh Lac Thi</first><last>Tran</last></author>
@@ -7538,9 +7538,9 @@
       <author><first>Mihaela</first><last>Bornea</last></author>
       <author><first>Kazi</first><last>Hasan</last></author>
       <author><first>Rishav</first><last>Chakravarti</last></author>
-      <author><first>Salim</first><last>Roukos</last></author>
-      <author><first>Radu</first><last>Florian</last></author>
-      <author><first>Avi</first><last>Sil</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
+      <author id="avirup-sil"><first>Avi</first><last>Sil</last></author>
       <pages>41–47</pages>
       <abstract>This paper presents M-GAAMA, a Multilingual Question Answering architecture and demo system. This is the first multilingual machine reading comprehension (MRC) demo which is able to answer questions in over 100 languages. M-GAAMA answers questions from a given passage in the same or different language. It incorporates several existing multilingual models that can be used interchangeably in the demo such as M-BERT and XLM-R. The M-GAAMA demo also improves language accessibility by incorporating the IBM Watson machine translation widget to provide additional capabilities to the user to see an answer in their desired language. We also show how M-GAAMA can be used in downstream tasks by incorporating it into an END-TO-END-QA system using CFO (Chakravarti et al., 2019). We experiment with our system architecture on the Multi-Lingual Question Answering (MLQA) and the COVID-19 CORD (Wang et al., 2020; Tang et al., 2020) datasets to provide insights into the performance of the system.</abstract>
       <url hash="154239fb">2020.coling-demos.8</url>
@@ -7551,7 +7551,7 @@
       <title><fixed-case>X</fixed-case>plai<fixed-case>NLI</fixed-case>: Explainable Natural Language Inference through Visual Analytics</title>
       <author><first>Aikaterini-Lida</first><last>Kalouli</last></author>
       <author><first>Rita</first><last>Sevastjanova</last></author>
-      <author><first>Valeria</first><last>de Paiva</last></author>
+      <author id="valeria-de-paiva"><first>Valeria</first><last>de Paiva</last></author>
       <author><first>Richard</first><last>Crouch</last></author>
       <author><first>Mennatallah</first><last>El-Assady</last></author>
       <pages>48–52</pages>
@@ -7565,7 +7565,7 @@
       <author><first>Luca</first><last>Lugini</last></author>
       <author><first>Christopher</first><last>Olshefski</last></author>
       <author><first>Ravneet</first><last>Singh</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <author><first>Amanda</first><last>Godley</last></author>
       <pages>53–58</pages>
       <abstract>Teaching collaborative argumentation is an advanced skill that many K-12 teachers struggle to develop. To address this, we have developed Discussion Tracker, a classroom discussion analytics system based on novel algorithms for classifying argument moves, specificity, and collaboration. Results from a classroom deployment indicate that teachers found the analytics useful, and that the underlying classifiers perform with moderate to substantial agreement with humans.</abstract>
@@ -7648,7 +7648,7 @@
     <meta>
       <booktitle>Proceedings of the 28th International Conference on Computational Linguistics: Tutorial Abstracts</booktitle>
       <editor><first>Lucia</first><last>Specia</last></editor>
-      <editor><first>Daniel</first><last>Beck</last></editor>
+      <editor id="daniel-beck"><first>Daniel</first><last>Beck</last></editor>
       <publisher>International Committee for Computational Linguistics</publisher>
       <address>Barcelona, Spain (Online)</address>
       <month>December</month>
@@ -7674,7 +7674,7 @@
     </paper>
     <paper id="2">
       <title>Embeddings in Natural Language Processing</title>
-      <author><first>Jose</first><last>Camacho-Collados</last></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></author>
       <author><first>Mohammad Taher</first><last>Pilehvar</last></author>
       <pages>10–15</pages>
       <abstract>Embeddings have been one of the most important topics of interest in NLP for the past decade. Representing knowledge through a low-dimensional vector which is easily integrable in modern machine learning models has played a central role in the development of the field. Embedding techniques initially focused on words but the attention soon started to shift to other forms. This tutorial will provide a high-level synthesis of the main embedding techniques in NLP, in the broad sense. We will start by conventional word embeddings (e.g., Word2Vec and GloVe) and then move to other types of embeddings, such as sense-specific and graph alternatives. We will finalize with an overview of the trending contextualized representations (e.g., ELMo and BERT) and explain their potential and impact in NLP.</abstract>
@@ -7695,7 +7695,7 @@
     </paper>
     <paper id="4">
       <title>Detection and Resolution of Rumors and Misinformation with <fixed-case>NLP</fixed-case></title>
-      <author><first>Leon</first><last>Derczynski</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
       <author><first>Arkaitz</first><last>Zubiaga</last></author>
       <pages>22–26</pages>
       <abstract>Detecting and grounding false and misleading claims on the web has grown to form a substantial sub-field of NLP. The sub-field addresses problems at multiple different levels of misinformation detection: identifying check-worthy claims; tracking claims and rumors; rumor collection and annotation; grounding claims against knowledge bases; using stance to verify claims; and applying style analysis to detect deception. This half-day tutorial presents the theory behind each of these steps as well as the state-of-the-art solutions.</abstract>
@@ -7804,7 +7804,7 @@
       <title>Semantic Diversity for Natural Language Understanding Evaluation in Dialog Systems</title>
       <author><first>Enrico</first><last>Palumbo</last></author>
       <author><first>Andrea</first><last>Mezzalira</last></author>
-      <author><first>Cristina</first><last>Marco</last></author>
+      <author id="cristina-sanchez-marco"><first>Cristina</first><last>Marco</last></author>
       <author><first>Alessandro</first><last>Manzotti</last></author>
       <author><first>Daniele</first><last>Amberti</last></author>
       <pages>44–49</pages>
@@ -7841,7 +7841,7 @@
       <author><first>Anuj</first><last>Kumar</last></author>
       <author><first>Shawn</first><last>Mei</last></author>
       <author><first>Karthik</first><last>Mohan</last></author>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <pages>64–77</pages>
       <abstract>Natural language generation (NLG) is a critical component in conversational systems, owing to its role of formulating a correct and natural text response. Traditionally, NLG components have been deployed using template-based solutions. Although neural network solutions recently developed in the research community have been shown to provide several benefits, deployment of such model-based solutions has been challenging due to high latency, correctness issues, and high data needs. In this paper, we present approaches that have helped us deploy data-efficient neural solutions for NLG in conversational systems to production. We describe a family of sampling and modeling techniques to attain production quality with light-weight neural network models using only a fraction of the data that would be necessary otherwise, and show a thorough comparison between each. Our results show that domain complexity dictates the appropriate approach to achieve high data efficiency. Finally, we distill the lessons from our experimental findings into a list of best practices for production-level NLG model development, and present them in a brief runbook. Importantly, the end products of all of the techniques are small sequence-to-sequence models (~2Mb) that we can reliably deploy in production. These models achieve the same quality as large pretrained models (~1Gb) as judged by human raters.</abstract>
       <url hash="79e6b2fa">2020.coling-industry.7</url>
@@ -7867,9 +7867,9 @@
       <author><first>Anthony</first><last>Ferritto</last></author>
       <author><first>Bhavani</first><last>Iyer</last></author>
       <author><first>Lin</first><last>Pan</last></author>
-      <author><first>Radu</first><last>Florian</last></author>
-      <author><first>Salim</first><last>Roukos</last></author>
-      <author><first>Avi</first><last>Sil</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
+      <author id="avirup-sil"><first>Avi</first><last>Sil</last></author>
       <pages>90–101</pages>
       <abstract>Industry-scale NLP systems necessitate two features. 1. Robustness: “zero-shot transfer learning” (ZSTL) performance has to be commendable and 2. Efficiency: systems have to train efficiently and respond instantaneously. In this paper, we introduce the development of a production model called GAAMA (Go Ahead Ask Me Anything) which possess the above two characteristics. For robustness, it trains on the recently introduced Natural Questions (NQ) dataset. NQ poses additional challenges over older datasets like SQuAD: (a) QA systems need to read and comprehend an entire Wikipedia article rather than a small passage, and (b) NQ does not suffer from observation bias during construction, resulting in less lexical overlap between the question and the article. GAAMA consists of Attention-over-Attention, diversity among attention heads, hierarchical transfer learning, and synthetic data augmentation while being computationally inexpensive. Building on top of the powerful BERTQA model, GAAMA provides a ∼2.0% absolute boost in F1 over the industry-scale state-of-the-art (SOTA) system on NQ. Further, we show that GAAMA transfers zero-shot to unseen real life and important domains as it yields respectable performance on two benchmarks: the BioASQ and the newly introduced CovidQA datasets.</abstract>
       <url hash="53c12c0e">2020.coling-industry.9</url>
@@ -7914,7 +7914,7 @@
     <paper id="13">
       <title>hinglish<fixed-case>N</fixed-case>orm - A Corpus of <fixed-case>H</fixed-case>indi-<fixed-case>E</fixed-case>nglish Code Mixed Sentences for Text Normalization</title>
       <author><first>Piyush</first><last>Makhija</last></author>
-      <author><first>Ankit</first><last>Kumar</last></author>
+      <author id="ankit-srivastava"><first>Ankit</first><last>Kumar</last></author>
       <author><first>Anuj</first><last>Gupta</last></author>
       <pages>136–145</pages>
       <abstract>We present hinglishNorm - a human annotated corpus of Hindi-English code-mixed sentences for text normalization task. Each sentence in the corpus is aligned to its corresponding human annotated normalized form. To the best of our knowledge, there is no corpus of Hindi-English code-mixed sentences for text normalization task that is publicly available. Our work is the first attempt in this direction. The corpus contains 13494 segments annotated for text normalization. Further, we present baseline normalization results on this corpus. We obtain a Word Error Rate (WER) of 15.55, BiLingual Evaluation Understudy (BLEU) score of 71.2, and Metric for Evaluation of Translation with Explicit ORdering (METEOR) score of 0.50.</abstract>
@@ -7925,7 +7925,7 @@
     <paper id="14">
       <title>Assessing Social License to Operate from the Public Discourse on Social Media</title>
       <author><first>Chang</first><last>Xu</last></author>
-      <author><first>Cecile</first><last>Paris</last></author>
+      <author id="cecile-paris"><first>Cecile</first><last>Paris</last></author>
       <author><first>Ross</first><last>Sparks</last></author>
       <author><first>Surya</first><last>Nepal</last></author>
       <author><first>Keith</first><last>VanderLinden</last></author>
@@ -7937,7 +7937,7 @@
     </paper>
     <paper id="15">
       <title>Extreme Model Compression for On-device Natural Language Understanding</title>
-      <author><first>Kanthashree</first><last>Mysore Sathyendra</last></author>
+      <author id="kanthashree-mysore-sathyendra"><first>Kanthashree</first><last>Mysore Sathyendra</last></author>
       <author><first>Samridhi</first><last>Choudhary</last></author>
       <author><first>Leah</first><last>Nicolich-Henkin</last></author>
       <pages>160–171</pages>
@@ -7950,10 +7950,10 @@
       <title>Scalable Cross-lingual Treebank Synthesis for Improved Production Dependency Parsers</title>
       <author><first>Yousef</first><last>El-Kurdi</last></author>
       <author><first>Hiroshi</first><last>Kanayama</last></author>
-      <author><first>Efsun</first><last>Sarioglu Kayi</last></author>
+      <author id="efsun-sarioglu-kayi"><first>Efsun</first><last>Sarioglu Kayi</last></author>
       <author><first>Vittorio</first><last>Castelli</last></author>
       <author><first>Todd</first><last>Ward</last></author>
-      <author><first>Radu</first><last>Florian</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
       <pages>172–178</pages>
       <abstract>We present scalable Universal Dependency (UD) treebank synthesis techniques that exploit advances in language representation modeling which leverage vast amounts of unlabeled general-purpose multilingual text. We introduce a data augmentation technique that uses synthetic treebanks to improve production-grade parsers. The synthetic treebanks are generated using a state-of-the-art biaffine parser adapted with pretrained Transformer models, such as Multilingual BERT (M-BERT). The new parser improves LAS by up to two points on seven languages. The production models’ LAS performance improves as the augmented treebanks scale in size, surpassing performance of production models trained on originally annotated UD treebanks.</abstract>
       <url hash="bb42e85f">2020.coling-industry.16</url>
@@ -8004,7 +8004,7 @@
       <author><first>Vishwas</first><last>Suryanarayanan</last></author>
       <author><first>Chala</first><last>Fufa</last></author>
       <author><first>Pamela</first><last>Bhattacharya</last></author>
-      <author><first>Charles</first><last>Lee</last></author>
+      <author id="charles-c-lee"><first>Charles</first><last>Lee</last></author>
       <pages>214–227</pages>
       <abstract>A prominent problem faced by conversational agents working with large documents (Eg: email-based assistants) is the frequent presence of information in the document that is irrelevant to the assistant. This in turn makes it harder for the agent to accurately detect intents, extract entities relevant to those intents and perform the desired action. To address this issue we present a neural model for scoping relevant information for the agent from a large document. We show that when used as the first step in a popularly used email-based assistant for helping users schedule meetings, our proposed model helps improve the performance of the intent detection and entity extraction tasks required by the agent for correctly scheduling meetings: across a suite of 6 downstream tasks, by using our proposed method, we observe an average gain of 35% in precision without any drop in recall. Additionally, we demonstrate that the same approach can be used for component level analysis in large documents, such as signature block identification.</abstract>
       <url hash="d262d9a1">2020.coling-industry.20</url>
@@ -8015,8 +8015,8 @@
       <title>Uncertainty Modeling for Machine Comprehension Systems using Efficient <fixed-case>B</fixed-case>ayesian Neural Networks</title>
       <author><first>Zhengyuan</first><last>Liu</last></author>
       <author><first>Pavitra</first><last>Krishnaswamy</last></author>
-      <author><first>Ai Ti</first><last>Aw</last></author>
-      <author><first>Nancy</first><last>Chen</last></author>
+      <author id="aiti-aw"><first>Ai Ti</first><last>Aw</last></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last></author>
       <pages>228–235</pages>
       <abstract>While neural approaches have achieved significant improvement in machine comprehension tasks, models often work as a black-box, resulting in lower interpretability, which requires special attention in domains such as healthcare or education. Quantifying uncertainty helps pave the way towards more interpretable neural networks. In classification and regression tasks, Bayesian neural networks have been effective in estimating model uncertainty. However, inference time increases linearly due to the required sampling process in Bayesian neural networks. Thus speed becomes a bottleneck in tasks with high system complexity such as question-answering or dialogue generation. In this work, we propose a hybrid neural architecture to quantify model uncertainty using Bayesian weight approximation but boosts up the inference speed by 80% relative at test time, and apply it for a clinical dialogue comprehension task. The proposed approach is also used to enable active learning so that an updated model can be trained more optimally with new incoming data by selecting samples that are not well-represented in the current training scheme.</abstract>
       <url hash="3bd8bc98">2020.coling-industry.21</url>
diff --git a/data/xml/2020.computerm.xml b/data/xml/2020.computerm.xml
index 26db91e0b0..fed6bde2c2 100644
--- a/data/xml/2020.computerm.xml
+++ b/data/xml/2020.computerm.xml
@@ -3,7 +3,7 @@
   <volume id="1" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 6th International Workshop on Computational Terminology</booktitle>
-      <editor><first>Béatrice</first><last>Daille</last></editor>
+      <editor id="beatrice-daille"><first>Béatrice</first><last>Daille</last></editor>
       <editor><first>Kyo</first><last>Kageura</last></editor>
       <editor><first>Ayla Rigouts</first><last>Terryn</last></editor>
       <publisher>European Language Resources Association</publisher>
@@ -21,7 +21,7 @@
       <title>Automatic Term Extraction from Newspaper Corpora: Making the Most of Specificity and Common Features</title>
       <author><first>Patrick</first><last>Drouin</last></author>
       <author><first>Jean-Benoît</first><last>Morel</last></author>
-      <author><first>Marie-Claude</first><last>L’ Homme</last></author>
+      <author id="marie-claude-lhomme"><first>Marie-Claude</first><last>L’ Homme</last></author>
       <pages>1–7</pages>
       <abstract>The first step of any terminological work is to setup a reliable, specialized corpus composed of documents written by specialists and then to apply automatic term extraction (ATE) methods to this corpus in order to retrieve a first list of potential terms. In this paper, the experiment we describe differs quite drastically from this usual process since we are applying ATE to unspecialized corpora. The corpus used for this study was built from newspaper articles retrieved from the Web using a short list of keywords. The general intuition on which this research is based is that ATE based corpus comparison techniques can be used to capture both similarities and dissimilarities between corpora. The former are exploited through a termhood measure and the latter through word embeddings. Our initial results were validated manually and show that combining a traditional ATE method that focuses on dissimilarities between corpora to newer methods that exploit similarities (more specifically distributional features of candidates) leads to promising results.</abstract>
       <url hash="810d1840">2020.computerm-1.1</url>
@@ -101,7 +101,7 @@
       <author><first>Vid</first><last>Podpečan</last></author>
       <author><first>Dragana</first><last>Miljkovic</last></author>
       <author><first>Uroš</first><last>Stepišnik</last></author>
-      <author><first>Špela</first><last>Vintar</last></author>
+      <author id="spela-vintar"><first>Špela</first><last>Vintar</last></author>
       <pages>55–61</pages>
       <abstract>We present the NetViz terminology visualization tool and apply it to the domain modeling of karstology, a subfield of geography studying karst phenomena. The developed tool allows for high-performance online network visualization where the user can upload the terminological data in a simple CSV format, define the nodes (terms, categories), edges (relations) and their properties (by assigning different node colors), and then edit and interactively explore domain knowledge in the form of a network. We showcase the usefulness of the tool on examples from the karstology domain, where in the first use case we visualize the domain knowledge as represented in a manually annotated corpus of domain definitions, while in the second use case we show the power of visualization for domain understanding by visualizing automatically extracted knowledge in the form of triplets extracted from the karstology domain corpus. The application is entirely web-based without any need for downloading or special configuration. The source code of the web application is also available under the permissive MIT license, allowing future extensions for developing new terminological applications.</abstract>
       <url hash="4ac24597">2020.computerm-1.8</url>
@@ -121,7 +121,7 @@
     </paper>
     <paper id="10">
       <title>Supporting terminology extraction with dependency parses</title>
-      <author><first>Malgorzata</first><last>Marciniak</last></author>
+      <author id="malgorzata-marciniak"><first>Malgorzata</first><last>Marciniak</last></author>
       <author><first>Piotr</first><last>Rychlik</last></author>
       <author><first>Agnieszka</first><last>Mykowiecka</last></author>
       <pages>72–79</pages>
@@ -143,7 +143,7 @@
     <paper id="12">
       <title><fixed-case>T</fixed-case>erm<fixed-case>E</fixed-case>val 2020: Shared Task on Automatic Term Extraction Using the Annotated Corpora for Term Extraction Research (<fixed-case>ACTER</fixed-case>) Dataset</title>
       <author><first>Ayla</first><last>Rigouts Terryn</last></author>
-      <author><first>Veronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Veronique</first><last>Hoste</last></author>
       <author><first>Patrick</first><last>Drouin</last></author>
       <author><first>Els</first><last>Lefever</last></author>
       <pages>85–94</pages>
diff --git a/data/xml/2020.conll.xml b/data/xml/2020.conll.xml
index e49a592d3b..50b9507f2f 100644
--- a/data/xml/2020.conll.xml
+++ b/data/xml/2020.conll.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2020-11-05" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 24th Conference on Computational Natural Language Learning</booktitle>
-      <editor><first>Raquel</first><last>Fernández</last></editor>
+      <editor id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></editor>
       <editor><first>Tal</first><last>Linzen</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Online</address>
@@ -43,7 +43,7 @@
     <paper id="3">
       <title>Neural Proof Nets</title>
       <author><first>Konstantinos</first><last>Kogkalidis</last></author>
-      <author><first>Michael</first><last>Moortgat</last></author>
+      <author id="michael-moortgat"><first>Michael</first><last>Moortgat</last></author>
       <author><first>Richard</first><last>Moot</last></author>
       <pages>26–40</pages>
       <abstract>Linear logic and the linear λ-calculus have a long standing tradition in the study of natural language form and meaning. Among the proof calculi of linear logic, proof nets are of particular interest, offering an attractive geometric representation of derivations that is unburdened by the bureaucratic complications of conventional prooftheoretic formats. Building on recent advances in set-theoretic learning, we propose a neural variant of proof nets based on Sinkhorn networks, which allows us to translate parsing as the problem of extracting syntactic primitives and permuting them into alignment. Our methodology induces a batch-efficient, end-to-end differentiable architecture that actualizes a formally grounded yet highly efficient neuro-symbolic parser. We test our approach on ÆThel, a dataset of type-logical derivations for written Dutch, where it manages to correctly transcribe raw text sentences into proofs and terms of the linear λ-calculus with an accuracy of as high as 70%.</abstract>
@@ -113,7 +113,7 @@
     <paper id="9">
       <title>Understanding the Source of Semantic Regularities in Word Embeddings</title>
       <author><first>Hsiao-Yu</first><last>Chiang</last></author>
-      <author><first>Jose</first><last>Camacho-Collados</last></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></author>
       <author><first>Zachary</first><last>Pardos</last></author>
       <pages>119–131</pages>
       <abstract>Semantic relations are core to how humans understand and express concepts in the real world using language. Recently, there has been a thread of research aimed at modeling these relations by learning vector representations from text corpora. Most of these approaches focus strictly on leveraging the co-occurrences of relationship word pairs within sentences. In this paper, we investigate the hypothesis that examples of a lexical relation in a corpus are fundamental to a neural word embedding’s ability to complete analogies involving the relation. Our experiments, in which we remove all known examples of a relation from training corpora, show only marginal degradation in analogy completion performance involving the removed relation. This finding enhances our understanding of neural word embeddings, showing that co-occurrence information of a particular semantic relation is not the main source of their structural regularity.</abstract>
@@ -125,7 +125,7 @@
       <title>Finding The Right One and Resolving it</title>
       <author><first>Payal</first><last>Khullar</last></author>
       <author><first>Arghya</first><last>Bhattacharya</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>132–141</pages>
       <abstract>One-anaphora has figured prominently in theoretical linguistic literature, but computational linguistics research on the phenomenon is sparse. Not only that, the long standing linguistic controversy between the determinative and the nominal anaphoric element one has propagated in the limited body of computational work on one-anaphora resolution, making this task harder than it is. In the present paper, we resolve this by drawing from an adequate linguistic analysis of the word one in different syntactic environments - once again highlighting the significance of linguistic theory in Natural Language Processing (NLP) tasks. We prepare an annotated corpus marking actual instances of one-anaphora with their textual antecedents, and use the annotations to experiment with state-of-the art neural models for one-anaphora resolution. Apart from presenting a strong neural baseline for this task, we contribute a gold-standard corpus, which is, to the best of our knowledge, the biggest resource on one-anaphora till date.</abstract>
       <url hash="51985920">2020.conll-1.10</url>
@@ -135,7 +135,7 @@
     <paper id="11">
       <title>Bridging Information-Seeking Human Gaze and Machine Reading Comprehension</title>
       <author><first>Jonathan</first><last>Malmaud</last></author>
-      <author><first>Roger</first><last>Levy</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
       <author><first>Yevgeni</first><last>Berzak</last></author>
       <pages>142–152</pages>
       <abstract>In this work, we analyze how human gaze during reading comprehension is conditioned on the given reading comprehension question, and whether this signal can be beneficial for machine reading comprehension. To this end, we collect a new eye-tracking dataset with a large number of participants engaging in a multiple choice reading comprehension task. Our analysis of this data reveals increased fixation times over parts of the text that are most relevant for answering the question. Motivated by this finding, we propose making automated reading comprehension more human-like by mimicking human information-seeking reading behavior during reading comprehension. We demonstrate that this approach leads to performance gains on multiple choice question answering in English for a state-of-the-art reading comprehension model.</abstract>
@@ -147,7 +147,7 @@
       <title>A Corpus of Very Short Scientific Summaries</title>
       <author><first>Yifan</first><last>Chen</last></author>
       <author><first>Tamara</first><last>Polajnar</last></author>
-      <author><first>Colin</first><last>Batchelor</last></author>
+      <author id="colin-batchelor"><first>Colin</first><last>Batchelor</last></author>
       <author><first>Simone</first><last>Teufel</last></author>
       <pages>153–164</pages>
       <abstract>We present a new summarisation task, taking scientific articles and producing journal table-of-contents entries in the chemistry domain. These are one- or two-sentence author-written summaries that present the key findings of a paper. This is a first look at this summarisation task with an open access publication corpus consisting of titles and abstracts, as input texts, and short author-written advertising blurbs, as the ground truth. We introduce the dataset and evaluate it with state-of-the-art summarisation methods.</abstract>
@@ -158,7 +158,7 @@
     <paper id="13">
       <title>Recurrent babbling: evaluating the acquisition of grammar from limited input data</title>
       <author><first>Ludovica</first><last>Pannitto</last></author>
-      <author><first>Aurélie</first><last>Herbelot</last></author>
+      <author id="aurelie-herbelot"><first>Aurélie</first><last>Herbelot</last></author>
       <pages>165–176</pages>
       <abstract>Recurrent Neural Networks (RNNs) have been shown to capture various aspects of syntax from raw linguistic input. In most previous experiments, however, learning happens over unrealistic corpora, which do not reflect the type and amount of data a child would be exposed to. This paper remedies this state of affairs by training an LSTM over a realistically sized subset of child-directed input. The behaviour of the network is analysed over time using a novel methodology which consists in quantifying the level of grammatical abstraction in the model’s generated output (its ‘babbling’), compared to the language it has been exposed to. We show that the LSTM indeed abstracts new structures as learning proceeds.</abstract>
       <url hash="c12d1427">2020.conll-1.13</url>
@@ -192,7 +192,7 @@
     </paper>
     <paper id="16">
       <title>Identifying Incorrect Labels in the <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>-2003 Corpus</title>
-      <author><first>Frederick</first><last>Reiss</last></author>
+      <author id="frederick-reiss"><first>Frederick</first><last>Reiss</last></author>
       <author><first>Hong</first><last>Xu</last></author>
       <author><first>Bryan</first><last>Cutler</last></author>
       <author><first>Karthik</first><last>Muthuraman</last></author>
@@ -226,7 +226,7 @@
     </paper>
     <paper id="19">
       <title>Relations between comprehensibility and adequacy errors in machine translation output</title>
-      <author><first>Maja</first><last>Popović</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
       <pages>256–264</pages>
       <abstract>This work presents a detailed analysis of translation errors perceived by readers as comprehensibility and/or adequacy issues. The main finding is that good comprehensibility, similarly to good fluency, can mask a number of adequacy errors. Of all major adequacy errors, 30% were fully comprehensible, thus fully misleading the reader to accept the incorrect information. Another 25% of major adequacy errors were perceived as almost comprehensible, thus being potentially misleading. Also, a vast majority of omissions (about 70%) is hidden by comprehensibility. Further analysis of misleading translations revealed that the most frequent error types are ambiguity, mistranslation, noun phrase error, word-by-word translation, untranslated word, subject-verb agreement, and spelling error in the source text. However, none of these error types appears exclusively in misleading translations, but are also frequent in fully incorrect (incomprehensible inadequate) and discarded correct (incomprehensible adequate) translations. Deeper analysis is needed to potentially detect underlying phenomena specifically related to misleading translations.</abstract>
       <url hash="8e87c2e8">2020.conll-1.19</url>
@@ -259,7 +259,7 @@
     <paper id="22">
       <title>Catplayinginthesnow: Impact of Prior Segmentation on a Model of Visually Grounded Speech</title>
       <author><first>William</first><last>Havard</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <author><first>Jean-Pierre</first><last>Chevrot</last></author>
       <pages>291–301</pages>
       <abstract>The language acquisition literature shows that children do not build their lexicon by segmenting the spoken input into phonemes and then building up words from them, but rather adopt a top-down approach and start by segmenting word-like units and then break them down into smaller units. This suggests that the ideal way of learning a language is by starting from full semantic units. In this paper, we investigate if this is also the case for a neural model of Visually Grounded Speech trained on a speech-image retrieval task. We evaluated how well such a network is able to learn a reliable speech-to-image mapping when provided with phone, syllable, or word boundary information. We present a simple way to introduce such information into an RNN-based model and investigate which type of boundary is the most efficient. We also explore at which level of the network’s architecture such information should be introduced so as to maximise its performances. Finally, we show that using multiple boundary types at once in a hierarchical structure, by which low-level segments are used to recompose high-level segments, is beneficial and yields better results than using low-level or high-level segments in isolation.</abstract>
@@ -284,7 +284,7 @@
     <paper id="24">
       <title>Representation Learning for Type-Driven Composition</title>
       <author><first>Gijs</first><last>Wijnholds</last></author>
-      <author><first>Mehrnoosh</first><last>Sadrzadeh</last></author>
+      <author id="mehrnoosh-sadrzadeh"><first>Mehrnoosh</first><last>Sadrzadeh</last></author>
       <author><first>Stephen</first><last>Clark</last></author>
       <pages>313–324</pages>
       <abstract>This paper is about learning word representations using grammatical type information. We use the syntactic types of Combinatory Categorial Grammar to develop multilinear representations, i.e. maps with n arguments, for words with different functional types. The multilinear maps of words compose with each other to form sentence representations. We extend the skipgram algorithm from vectors to multi- linear maps to learn these representations and instantiate it on unary and binary maps for transitive verbs. These are evaluated on verb and sentence similarity and disambiguation tasks and a subset of the SICK relatedness dataset. Our model performs better than previous type- driven models and is competitive with state of the art representation learning methods such as BERT and neural sentence encoders.</abstract>
@@ -296,7 +296,7 @@
       <title>Word Representations Concentrate and This is Good News!</title>
       <author><first>Romain</first><last>Couillet</last></author>
       <author><first>Yagmur Gizem</first><last>Cinar</last></author>
-      <author><first>Eric</first><last>Gaussier</last></author>
+      <author id="eric-gaussier"><first>Eric</first><last>Gaussier</last></author>
       <author><first>Muhammad</first><last>Imran</last></author>
       <pages>325–334</pages>
       <abstract>This article establishes that, unlike the legacy tf*idf representation, recent natural language representations (word embedding vectors) tend to exhibit a so-called <i>concentration of measure phenomenon</i>, in the sense that, as the representation size <tex-math>p</tex-math> and database size <tex-math>n</tex-math> are both large, their behavior is similar to that of large dimensional Gaussian random vectors. This phenomenon may have important consequences as machine learning algorithms for natural language data could be amenable to improvement, thereby providing new theoretical insights into the field of natural language processing.</abstract>
@@ -318,7 +318,7 @@
     </paper>
     <paper id="27">
       <title>Re-solve it: simulating the acquisition of core semantic competences from small data</title>
-      <author><first>Aurélie</first><last>Herbelot</last></author>
+      <author id="aurelie-herbelot"><first>Aurélie</first><last>Herbelot</last></author>
       <pages>344–354</pages>
       <abstract>Many tasks are considered to be ‘solved’ in the computational linguistics literature, but the corresponding algorithms operate in ways which are radically different from human cognition. I illustrate this by coming back to the notion of semantic competence, which includes basic linguistic skills encompassing both referential phenomena and generic knowledge, in particular a) the ability to denote, b) the mastery of the lexicon, or c) the ability to model one’s language use on others. Even though each of those faculties has been extensively tested individually, there is still no computational model that would account for their joint acquisition under the conditions experienced by a human. In this paper, I focus on one particular aspect of this problem: the amount of linguistic data available to the child or machine. I show that given the first competence mentioned above (a denotation function), the other two can in fact be learned from very limited data (2.8M token), reaching state-of-the-art performance. I argue that both the nature of the data and the way it is presented to the system matter to acquisition.</abstract>
       <url hash="0ad4f894">2020.conll-1.27</url>
@@ -371,7 +371,7 @@
     <paper id="32">
       <title>Discourse structure interacts with reference but not syntax in neural language models</title>
       <author><first>Forrest</first><last>Davis</last></author>
-      <author><first>Marten</first><last>van Schijndel</last></author>
+      <author id="marten-van-schijndel"><first>Marten</first><last>van Schijndel</last></author>
       <pages>396–407</pages>
       <abstract>Language models (LMs) trained on large quantities of text have been claimed to acquire abstract linguistic representations. Our work tests the robustness of these abstractions by focusing on the ability of LMs to learn interactions between different linguistic representations. In particular, we utilized stimuli from psycholinguistic studies showing that humans can condition reference (i.e. coreference resolution) and syntactic processing on the same discourse structure (implicit causality). We compared both transformer and long short-term memory LMs to find that, contrary to humans, implicit causality only influences LM behavior for reference, not syntax, despite model representations that encode the necessary discourse information. Our results further suggest that LM behavior can contradict not only learned representations of discourse but also syntactic agreement, pointing to shortcomings of standard language modeling.</abstract>
       <url hash="c9e34400">2020.conll-1.32</url>
@@ -384,7 +384,7 @@
       <author><first>Robert</first><last>Hawkins</last></author>
       <author><first>Minae</first><last>Kwon</last></author>
       <author><first>Dorsa</first><last>Sadigh</last></author>
-      <author><first>Noah</first><last>Goodman</last></author>
+      <author id="noah-goodman"><first>Noah</first><last>Goodman</last></author>
       <pages>408–419</pages>
       <abstract>To communicate with new partners in new contexts, humans rapidly form new linguistic conventions. Recent neural language models are able to comprehend and produce the existing conventions present in their training data, but are not able to flexibly and interactively adapt those conventions on the fly as humans do. We introduce an interactive repeated reference task as a benchmark for models of adaptation in communication and propose a regularized continual learning framework that allows an artificial agent initialized with a generic language model to more accurately and efficiently communicate with a partner over time. We evaluate this framework through simulations on COCO and in real-time reference game experiments with human partners.</abstract>
       <url hash="949e6710">2020.conll-1.33</url>
@@ -396,7 +396,7 @@
       <title>Diverse and Relevant Visual Storytelling with Scene Graph Embeddings</title>
       <author><first>Xudong</first><last>Hong</last></author>
       <author><first>Rakshith</first><last>Shetty</last></author>
-      <author><first>Asad</first><last>Sayeed</last></author>
+      <author id="asad-sayeed"><first>Asad</first><last>Sayeed</last></author>
       <author><first>Khushboo</first><last>Mehra</last></author>
       <author><first>Vera</first><last>Demberg</last></author>
       <author><first>Bernt</first><last>Schiele</last></author>
@@ -408,11 +408,11 @@
     </paper>
     <paper id="35">
       <title>Alleviating Digitization Errors in Named Entity Recognition for Historical Documents</title>
-      <author><first>Emanuela</first><last>Boros</last></author>
+      <author id="emanuela-boros"><first>Emanuela</first><last>Boros</last></author>
       <author><first>Ahmed</first><last>Hamdi</last></author>
       <author><first>Elvys</first><last>Linhares Pontes</last></author>
-      <author><first>Luis Adrián</first><last>Cabrera-Diego</last></author>
-      <author><first>Jose G.</first><last>Moreno</last></author>
+      <author id="luis-adrian-cabrera-diego"><first>Luis Adrián</first><last>Cabrera-Diego</last></author>
+      <author id="jose-g-moreno"><first>Jose G.</first><last>Moreno</last></author>
       <author><first>Nicolas</first><last>Sidere</last></author>
       <author><first>Antoine</first><last>Doucet</last></author>
       <pages>431–441</pages>
@@ -458,7 +458,7 @@
     <paper id="39">
       <title>Filler-gaps that neural networks fail to generalize</title>
       <author><first>Debasmita</first><last>Bhattacharya</last></author>
-      <author><first>Marten</first><last>van Schijndel</last></author>
+      <author id="marten-van-schijndel"><first>Marten</first><last>van Schijndel</last></author>
       <pages>486–495</pages>
       <abstract>It can be difficult to separate abstract linguistic knowledge in recurrent neural networks (RNNs) from surface heuristics. In this work, we probe for highly abstract syntactic constraints that have been claimed to govern the behavior of filler-gap dependencies across different surface constructions. For models to generalize abstract patterns in expected ways to unseen data, they must share representational features in predictable ways. We use cumulative priming to test for representational overlap between disparate filler-gap constructions in English and find evidence that the models learn a general representation for the existence of filler-gap dependencies. However, we find no evidence that the models learn any of the shared underlying grammatical constraints we tested. Our work raises questions about the degree to which RNN language models learn abstract linguistic representations.</abstract>
       <url hash="b734a741">2020.conll-1.39</url>
@@ -495,7 +495,7 @@
       <author><first>Leena</first><last>Shekhar</last></author>
       <author><first>Heeyoung</first><last>Kwon</last></author>
       <author><first>Niranjan</first><last>Balasubramanian</last></author>
-      <author><first>Nathanael</first><last>Chambers</last></author>
+      <author id="nathanael-chambers"><first>Nathanael</first><last>Chambers</last></author>
       <pages>520–530</pages>
       <abstract>Early work on narrative modeling used explicit plans and goals to generate stories, but the language generation itself was restricted and inflexible. Modern methods use language models for more robust generation, but often lack an explicit representation of the scaffolding and dynamics that guide a coherent narrative. This paper introduces a new model that integrates explicit narrative structure with neural language models, formalizing narrative modeling as a Switching Linear Dynamical System (SLDS). A SLDS is a dynamical system in which the latent dynamics of the system (i.e. how the state vector transforms over time) is controlled by top-level discrete switching variables. The switching variables represent narrative structure (e.g., sentiment or discourse states), while the latent state vector encodes information on the current state of the narrative. This probabilistic formulation allows us to control generation, and can be learned in a semi-supervised fashion using both labeled and unlabeled data. Additionally, we derive a Gibbs sampler for our model that can “fill in” arbitrary parts of the narrative, guided by the switching variables. Our filled-in (English language) narratives outperform several baselines on both automatic and human evaluations</abstract>
       <url hash="96ab0e53">2020.conll-1.42</url>
@@ -529,7 +529,7 @@
       <title>Are Pretrained Language Models Symbolic Reasoners over Knowledge?</title>
       <author><first>Nora</first><last>Kassner</last></author>
       <author><first>Benno</first><last>Krojer</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>552–564</pages>
       <abstract>How can pretrained language models (PLMs) learn factual knowledge from the training set? We investigate the two most important mechanisms: reasoning and memorization. Prior work has attempted to quantify the number of facts PLMs learn, but we present, using synthetic data, the first study that investigates the causal relation between facts present in training and facts learned by the PLM. For reasoning, we show that PLMs seem to learn to apply some symbolic reasoning rules correctly but struggle with others, including two-hop reasoning. Further analysis suggests that even the application of learned reasoning rules is flawed. For memorization, we identify schema conformity (facts systematically supported by other facts) and frequency as key factors for its success.</abstract>
       <url hash="af9c22c2">2020.conll-1.45</url>
@@ -541,7 +541,7 @@
       <author><first>Tanmay</first><last>Parekh</last></author>
       <author><first>Emily</first><last>Ahn</last></author>
       <author><first>Yulia</first><last>Tsvetkov</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <pages>565–577</pages>
       <abstract>Code-switching is a ubiquitous phenomenon in multilingual communities. Natural language technologies that wish to communicate like humans must therefore adaptively incorporate code-switching techniques when they are deployed in multilingual settings. To this end, we propose a Hindi-English human-machine dialogue system that elicits code-switching conversations in a controlled setting. It uses different code-switching agent strategies to understand how users respond and accommodate to the agent’s language choice. Through this system, we collect and release a new dataset CommonDost, comprising of 439 human-machine multilingual conversations. We adapt pre-defined metrics to discover linguistic accommodation from users to agents. Finally, we compare these dialogues with Spanish-English dialogues collected in a similar setting, and analyze the impact of linguistic and socio-cultural factors on code-switching patterns across the two language pairs.</abstract>
       <url hash="2731f8b2">2020.conll-1.46</url>
@@ -566,7 +566,7 @@
       <author><first>Tianyu</first><last>Liu</last></author>
       <author><first>Zheng</first><last>Xin</last></author>
       <author><first>Xiaoan</first><last>Ding</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <author><first>Zhifang</first><last>Sui</last></author>
       <pages>596–608</pages>
       <abstract>The prior work on natural language inference (NLI) debiasing mainly targets at one or few known biases while not necessarily making the models more robust. In this paper, we focus on the model-agnostic debiasing strategies and explore how to (or is it possible to) make the NLI models robust to multiple distinct adversarial attacks while keeping or even strengthening the models’ generalization power. We firstly benchmark prevailing neural NLI models including pretrained ones on various adversarial datasets. We then try to combat distinct known biases by modifying a mixture of experts (MoE) ensemble method and show that it’s nontrivial to mitigate multiple NLI biases at the same time, and that model-level ensemble method outperforms MoE ensemble method. We also perform data augmentation including text swap, word substitution and paraphrase and prove its efficiency in combating various (though not all) adversarial attacks at the same time. Finally, we investigate several methods to merge heterogeneous training data (1.35M) and perform model ensembling, which are straightforward but effective to strengthen NLI models.</abstract>
@@ -578,7 +578,7 @@
       <title>Cloze Distillation: Improving Neural Language Models with Human Next-Word Prediction</title>
       <author><first>Tiwalayo</first><last>Eisape</last></author>
       <author><first>Noga</first><last>Zaslavsky</last></author>
-      <author><first>Roger</first><last>Levy</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
       <pages>609–619</pages>
       <abstract>Contemporary autoregressive language models (LMs) trained purely on corpus data have been shown to capture numerous features of human incremental processing. However, past work has also suggested dissociations between corpus probabilities and human next-word predictions. Here we evaluate several state-of-the-art language models for their match to human next-word predictions and to reading time behavior from eye movements. We then propose a novel method for distilling the linguistic information implicit in human linguistic predictions into pre-trained LMs: Cloze Distillation. We apply this method to a baseline neural LM and show potential improvement in reading time prediction and generalization to held-out human cloze data.</abstract>
       <url hash="3828b793">2020.conll-1.49</url>
@@ -613,7 +613,7 @@
       <author><first>Michał</first><last>Pietruszka</last></author>
       <author><first>Łukasz</first><last>Borchmann</last></author>
       <author><first>Jakub</first><last>Chłędowski</last></author>
-      <author><first>Filip</first><last>Graliński</last></author>
+      <author id="filip-gralinski"><first>Filip</first><last>Graliński</last></author>
       <pages>641–651</pages>
       <abstract>This paper investigates various Transformer architectures on the WikiReading Information Extraction and Machine Reading Comprehension dataset. The proposed dual-source model outperforms the current state-of-the-art by a large margin. Next, we introduce WikiReading Recycled - a newly developed public dataset, and the task of multiple-property extraction. It uses the same data as WikiReading but does not inherit its predecessor’s identified disadvantages. In addition, we provide a human-annotated test set with diagnostic subsets for a detailed analysis of model performance.</abstract>
       <url hash="ed5e0dcc">2020.conll-1.52</url>
@@ -638,12 +638,12 @@
       <editor><first>Omri</first><last>Abend</last></editor>
       <editor><first>Lasha</first><last>Abzianidze</last></editor>
       <editor><first>Johan</first><last>Bos</last></editor>
-      <editor><first>Jan</first><last>Hajič</last></editor>
+      <editor id="jan-hajic"><first>Jan</first><last>Hajič</last></editor>
       <editor><first>Daniel</first><last>Hershcovich</last></editor>
       <editor><first>Bin</first><last>Li</last></editor>
       <editor><first>Tim</first><last>O'Gorman</last></editor>
       <editor><first>Nianwen</first><last>Xue</last></editor>
-      <editor><first>Daniel</first><last>Zeman</last></editor>
+      <editor id="daniel-zeman"><first>Daniel</first><last>Zeman</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Online</address>
       <month>November</month>
diff --git a/data/xml/2020.crac.xml b/data/xml/2020.crac.xml
index 2f5058251b..cba85b24d9 100644
--- a/data/xml/2020.crac.xml
+++ b/data/xml/2020.crac.xml
@@ -6,7 +6,7 @@
       <editor><first>Maciej</first><last>Ogrodniczuk</last></editor>
       <editor><first>Vincent</first><last>Ng</last></editor>
       <editor><first>Yulia</first><last>Grishina</last></editor>
-      <editor><first>Sameer</first><last>Pradhan</last></editor>
+      <editor id="sameer-pradhan"><first>Sameer</first><last>Pradhan</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Barcelona, Spain (online)</address>
       <month>December</month>
@@ -28,8 +28,8 @@
     </paper>
     <paper id="2">
       <title>It’s absolutely divine! Can fine-grained sentiment analysis benefit from coreference resolution?</title>
-      <author><first>Orphee</first><last>De Clercq</last></author>
-      <author><first>Veronique</first><last>Hoste</last></author>
+      <author id="orphee-de-clercq"><first>Orphee</first><last>De Clercq</last></author>
+      <author id="veronique-hoste"><first>Veronique</first><last>Hoste</last></author>
       <pages>11–21</pages>
       <abstract>While it has been claimed that anaphora or coreference resolution plays an important role in opinion mining, it is not clear to what extent coreference resolution actually boosts performance, if at all. In this paper, we investigate the potential added value of coreference resolution for the aspect-based sentiment analysis of restaurant reviews in two languages, English and Dutch. We focus on the task of aspect category classification and investigate whether including coreference information prior to classification to resolve implicit aspect mentions is beneficial. Because coreference resolution is not a solved task in NLP, we rely on both automatically-derived and gold-standard coreference relations, allowing us to investigate the true upper bound. By training a classifier on a combination of lexical and semantic features, we show that resolving the coreferential relations prior to classification is beneficial in a joint optimization setup. However, this is only the case when relying on gold-standard relations and the result is more outspoken for English than for Dutch. When validating the optimal models, however, we found that only the Dutch pipeline is able to achieve a satisfying performance on a held-out test set and does so regardless of whether coreference information was included.</abstract>
       <url hash="01a14f14">2020.crac-1.2</url>
@@ -38,7 +38,7 @@
     <paper id="3">
       <title>Anaphoric Zero Pronoun Identification: A Multilingual Approach</title>
       <author><first>Abdulrahman</first><last>Aloraini</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>22–32</pages>
       <abstract>Pro-drop languages such as Arabic, Chinese, Italian or Japanese allow morphologically null but referential arguments in certain syntactic positions, called anaphoric zero-pronouns. Much NLP work on anaphoric zero-pronouns (AZP) is based on gold mentions, but models for their identification are a fundamental prerequisite for their resolution in real-life applications. Such identification requires complex language understanding and knowledge of real-world entities. Transfer learning models, such as BERT, have recently shown to learn surface, syntactic, and semantic information,which can be very useful in recognizing AZPs. We propose a BERT-based multilingual model for AZP identification from predicted zero pronoun positions, and evaluate it on the Arabic and Chinese portions of OntoNotes 5.0. As far as we know, this is the first neural network model of AZP identification for Arabic; and our approach outperforms the stateof-the-art for Chinese. Experiment results suggest that BERT implicitly encode information about AZPs through their surrounding context.</abstract>
       <url hash="3b9e3990">2020.crac-1.3</url>
@@ -66,7 +66,7 @@
     </paper>
     <paper id="6">
       <title><fixed-case>T</fixed-case>wi<fixed-case>C</fixed-case>onv: A Coreference-annotated Corpus of <fixed-case>T</fixed-case>witter Conversations</title>
-      <author><first>Berfin</first><last>Aktaş</last></author>
+      <author id="berfin-aktas"><first>Berfin</first><last>Aktaş</last></author>
       <author><first>Annalena</first><last>Kohnert</last></author>
       <pages>47–54</pages>
       <abstract>This article introduces TwiConv, an English coreference-annotated corpus of microblog conversations from Twitter. We describe the corpus compilation process and the annotation scheme, and release the corpus publicly, along with this paper. We manually annotated nominal coreference in 1756 tweets arranged in 185 conversation threads. The annotation achieves satisfactory annotation agreement results. We also present a new method for mapping the tweet contents with distributed stand-off annotations, which can easily be adapted to different annotation tasks.</abstract>
@@ -75,7 +75,7 @@
     </paper>
     <paper id="7">
       <title>Integrating knowledge graph embeddings to improve mention representation for bridging anaphora resolution</title>
-      <author><first>Onkar</first><last>Pandit</last></author>
+      <author id="onkar-arun-pandit"><first>Onkar</first><last>Pandit</last></author>
       <author><first>Pascal</first><last>Denis</last></author>
       <author><first>Liva</first><last>Ralaivola</last></author>
       <pages>55–67</pages>
@@ -102,8 +102,8 @@
     </paper>
     <paper id="10">
       <title>Partially-supervised Mention Detection</title>
-      <author><first>Lesly</first><last>Miculicich</last></author>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="lesly-miculicich-werlen"><first>Lesly</first><last>Miculicich</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <pages>91–98</pages>
       <abstract>Learning to detect entity mentions without using syntactic information can be useful for integration and joint optimization with other tasks. However, it is common to have partially annotated data for this problem. Here, we investigate two approaches to deal with partial annotation of mentions: weighted loss and soft-target classification. We also propose two neural mention detection approaches: a sequence tagging, and an exhaustive search. We evaluate our methods with coreference resolution as a downstream task, using multitask learning. The results show that the recall and F1 score improve for all methods.</abstract>
       <url hash="bd071624">2020.crac-1.10</url>
@@ -113,7 +113,7 @@
       <title>Neural Coreference Resolution for <fixed-case>A</fixed-case>rabic</title>
       <author><first>Abdulrahman</first><last>Aloraini</last></author>
       <author><first>Juntao</first><last>Yu</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>99–110</pages>
       <abstract>No neural coreference resolver for Arabic exists, in fact we are not aware of any learning-based coreference resolver for Arabic since (Björkelund and Kuhn, 2014). In this paper, we introduce a coreference resolution system for Arabic based on Lee et al’s end-to-end architecture combined with the Arabic version of bert and an external mention detector. As far as we know, this is the first neural coreference resolution system aimed specifically to Arabic, and it substantially outperforms the existing state-of-the-art on OntoNotes 5.0 with a gain of 15.2 points conll F1. We also discuss the current limitations of the task for Arabic and possible approaches that can tackle these challenges.</abstract>
       <url hash="0b21fc05">2020.crac-1.11</url>
@@ -122,7 +122,7 @@
     <paper id="12">
       <title>Enhanced Labelling in Active Learning for Coreference Resolution</title>
       <author><first>Vebjørn</first><last>Espeland</last></author>
-      <author><first>Beatrice</first><last>Alex</last></author>
+      <author id="beatrice-alex"><first>Beatrice</first><last>Alex</last></author>
       <author><first>Benjamin</first><last>Bach</last></author>
       <pages>111–121</pages>
       <abstract>In this paper we describe our attempt to increase the amount of information that can be retrieved through active learning sessions compared to previous approaches. We optimise the annotator’s labelling process using active learning in the context of coreference resolution. Using simulated active learning experiments, we suggest three adjustments to ensure the labelling time is spent as efficiently as possible. All three adjustments provide more information to the machine learner than the baseline, though a large impact on the F1 score over time is not observed. Compared to previous models, we report a marginal F1 improvement on the final coreference models trained using for two out of the three approaches tested when applied to the English OntoNotes 2012 Coreference Resolution data. Our best-performing model achieves 58.01 F1, an increase of 0.93 F1 over the baseline model.</abstract>
@@ -132,7 +132,7 @@
     <paper id="13">
       <title>Reference in Team Communication for Robot-Assisted Disaster Response: An Initial Analysis</title>
       <author><first>Natalia</first><last>Skachkova</last></author>
-      <author><first>Ivana</first><last>Kruijff-Korbayova</last></author>
+      <author id="ivana-kruijff-korbayova"><first>Ivana</first><last>Kruijff-Korbayova</last></author>
       <pages>122–132</pages>
       <abstract>We analyze reference phenomena in a corpus of robot-assisted disaster response team communication. The annotation scheme we designed for this purpose distinguishes different types of entities, roles, reference units and relations. We focus particularly on mission-relevant objects, locations and actors and also annotate a rich set of reference links, including co-reference and various other kinds of relations. We explain the categories used in our annotation, present their distribution in the corpus and discuss challenging cases.</abstract>
       <url hash="de0424db">2020.crac-1.13</url>
@@ -142,7 +142,7 @@
       <title>Resolving Pronouns in <fixed-case>T</fixed-case>witter Streams: Context can Help!</title>
       <author><first>Anietie</first><last>Andy</last></author>
       <author><first>Chris</first><last>Callison-Burch</last></author>
-      <author><first>Derry Tanti</first><last>Wijaya</last></author>
+      <author id="derry-tanti-wijaya"><first>Derry Tanti</first><last>Wijaya</last></author>
       <pages>133–138</pages>
       <abstract>Many people live-tweet televised events like Presidential debates and popular TV-shows and discuss people or characters in the event. Naturally, many tweets make pronominal reference to these people/characters. We propose an algorithm for resolving personal pronouns that make reference to people involved in an event, in tweet streams collected during the event.</abstract>
       <url hash="099e87b5">2020.crac-1.14</url>
diff --git a/data/xml/2020.deelio.xml b/data/xml/2020.deelio.xml
index 72935fd9e1..41f196e55f 100644
--- a/data/xml/2020.deelio.xml
+++ b/data/xml/2020.deelio.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2020-11-06" type="proceedings">
     <meta>
       <booktitle>Proceedings of Deep Learning Inside Out (DeeLIO): The First Workshop on Knowledge Extraction and Integration for Deep Learning Architectures</booktitle>
-      <editor><first>Eneko</first><last>Agirre</last></editor>
+      <editor id="eneko-agirre"><first>Eneko</first><last>Agirre</last></editor>
       <editor><first>Marianna</first><last>Apidianaki</last></editor>
       <editor><first>Ivan</first><last>Vulić</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -43,7 +43,7 @@
     <paper id="3">
       <title>Generalization to Mitigate Synonym Substitution Attacks</title>
       <author><first>Basemah</first><last>Alshemali</last></author>
-      <author><first>Jugal</first><last>Kalita</last></author>
+      <author id="jugal-kalita"><first>Jugal</first><last>Kalita</last></author>
       <pages>20–28</pages>
       <abstract>Studies have shown that deep neural networks (DNNs) are vulnerable to adversarial examples – perturbed inputs that cause DNN-based models to produce incorrect results. One robust adversarial attack in the NLP domain is the synonym substitution. In attacks of this variety, the adversary substitutes words with synonyms. Since synonym substitution perturbations aim to satisfy all lexical, grammatical, and semantic constraints, they are difficult to detect with automatic syntax check as well as by humans. In this paper, we propose a structure-free defensive method that is capable of improving the performance of DNN-based models with both clean and adversarial data. Our findings show that replacing the embeddings of the important words in the input samples with the average of their synonyms’ embeddings can significantly improve the generalization of DNN-based classifiers. By doing so, we reduce model sensitivity to particular words in the input samples. Our results indicate that the proposed defense is not only capable of defending against adversarial attacks, but is also capable of improving the performance of DNN-based models when tested on benign data. On average, the proposed defense improved the classification accuracy of the CNN and Bi-LSTM models by 41.30% and 55.66%, respectively, when tested under adversarial attacks. Extended investigation shows that our defensive method can improve the robustness of nonneural models, achieving an average of 17.62% and 22.93% classification accuracy increase on the SVM and XGBoost models, respectively. The proposed defensive method has also shown an average of 26.60% classification accuracy improvement when tested with the infamous BERT model. Our algorithm is generic enough to be applied in any NLP domain and to any model trained on any natural language.</abstract>
       <url hash="d831f583">2020.deelio-1.3</url>
@@ -57,7 +57,7 @@
       <author><first>Varun</first><last>Gangal</last></author>
       <author><first>Dongyeop</first><last>Kang</last></author>
       <author><first>Teruko</first><last>Mitamura</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>29–42</pages>
       <abstract>In this paper, we investigate data augmentation for text generation, which we call GenAug. Text generation and language modeling are important tasks within natural language processing, and are especially challenging for low-data regimes. We propose and evaluate various augmentation methods, including some that incorporate external knowledge, for finetuning GPT-2 on a subset of Yelp Reviews. We also examine the relationship between the amount of augmentation and the quality of the generated text. We utilize several metrics that evaluate important aspects of the generated text including its diversity and fluency. Our experiments demonstrate that insertion of character-level synthetic noise and keyword replacement with hypernyms are effective augmentation methods, and that the quality of generations improves to a peak at approximately three times the amount of original data.</abstract>
       <url hash="70d2167e">2020.deelio-1.4</url>
@@ -121,7 +121,7 @@
       <author><first>Karthik</first><last>Gopalakrishnan</last></author>
       <author><first>Behnam</first><last>Hedayatnia</last></author>
       <author><first>Pei</first><last>Zhou</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></author>
       <pages>74–79</pages>
       <abstract>Pretrained language models have excelled at many NLP tasks recently; however, their social intelligence is still unsatisfactory. To enable this, machines need to have a more general understanding of our complicated world and develop the ability to perform commonsense reasoning besides fitting the specific downstream tasks. External commonsense knowledge graphs (KGs), such as ConceptNet, provide rich information about words and their relationships. Thus, towards general commonsense learning, we propose two approaches to implicitly and explicitly infuse such KGs into pretrained language models. We demonstrate our proposed methods perform well on SocialIQA, a social commonsense reasoning task, in both limited and full training data regimes.</abstract>
       <url hash="9b6e1723">2020.deelio-1.9</url>
diff --git a/data/xml/2020.dmr.xml b/data/xml/2020.dmr.xml
index 762d3dcdc3..334b555cce 100644
--- a/data/xml/2020.dmr.xml
+++ b/data/xml/2020.dmr.xml
@@ -6,11 +6,11 @@
       <editor><first>Nianwen</first><last>Xue</last></editor>
       <editor><first>Johan</first><last>Bos</last></editor>
       <editor><first>William</first><last>Croft</last></editor>
-      <editor><first>Jan</first><last>Hajič</last></editor>
+      <editor id="jan-hajic"><first>Jan</first><last>Hajič</last></editor>
       <editor><first>Chu-Ren</first><last>Huang</last></editor>
       <editor><first>Stephan</first><last>Oepen</last></editor>
-      <editor><first>Martha</first><last>Palmer</last></editor>
-      <editor><first>James</first><last>Pustejovsky</last></editor>
+      <editor id="martha-palmer"><first>Martha</first><last>Palmer</last></editor>
+      <editor id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Barcelona Spain (online)</address>
       <month>December</month>
@@ -88,11 +88,11 @@
     </paper>
     <paper id="7">
       <title><fixed-case>I</fixed-case>nfo<fixed-case>F</fixed-case>orager: Leveraging Semantic Search with <fixed-case>AMR</fixed-case> for <fixed-case>COVID</fixed-case>-19 Research</title>
-      <author><first>Claire</first><last>Bonial</last></author>
-      <author><first>Stephanie M.</first><last>Lukin</last></author>
+      <author id="claire-bonial"><first>Claire</first><last>Bonial</last></author>
+      <author id="stephanie-lukin"><first>Stephanie M.</first><last>Lukin</last></author>
       <author><first>David</first><last>Doughty</last></author>
       <author><first>Steven</first><last>Hill</last></author>
-      <author><first>Clare</first><last>Voss</last></author>
+      <author id="clare-voss"><first>Clare</first><last>Voss</last></author>
       <pages>67–77</pages>
       <abstract>This paper examines how Abstract Meaning Representation (AMR) can be utilized for finding answers to research questions in medical scientific documents, in particular, to advance the study of UV (ultraviolet) inactivation of the novel coronavirus that causes the disease COVID-19. We describe the development of a proof-of-concept prototype tool, InfoForager, which uses AMR to conduct a semantic search, targeting the meaning of the user question, and matching this to sentences in medical documents that may contain information to answer that question. This work was conducted as a sprint over a period of six weeks, and reveals both promising results and challenges in reducing the user search time relating to COVID-19 research, and in general, domain adaption of AMR for this task.</abstract>
       <url hash="cc1319d9">2020.dmr-1.7</url>
diff --git a/data/xml/2020.eamt.xml b/data/xml/2020.eamt.xml
index 3f11d15626..07f1376ddb 100644
--- a/data/xml/2020.eamt.xml
+++ b/data/xml/2020.eamt.xml
@@ -3,13 +3,13 @@
   <volume id="1" ingest-date="2020-08-11" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 22nd Annual Conference of the European Association for Machine Translation</booktitle>
-      <editor><first>André</first><last>Martins</last></editor>
+      <editor id="andre-f-t-martins"><first>André</first><last>Martins</last></editor>
       <editor><first>Helena</first><last>Moniz</last></editor>
       <editor><first>Sara</first><last>Fumega</last></editor>
       <editor><first>Bruno</first><last>Martins</last></editor>
       <editor><first>Fernando</first><last>Batista</last></editor>
-      <editor><first>Luisa</first><last>Coheur</last></editor>
-      <editor><first>Carla</first><last>Parra</last></editor>
+      <editor id="luisa-coheur"><first>Luisa</first><last>Coheur</last></editor>
+      <editor id="carla-parra-escartin"><first>Carla</first><last>Parra</last></editor>
       <editor><first>Isabel</first><last>Trancoso</last></editor>
       <editor><first>Marco</first><last>Turchi</last></editor>
       <editor><first>Arianna</first><last>Bisazza</last></editor>
@@ -17,7 +17,7 @@
       <editor><first>Ana</first><last>Guerberof</last></editor>
       <editor><first>Mary</first><last>Nurminen</last></editor>
       <editor><first>Lena</first><last>Marg</last></editor>
-      <editor><first>Mikel L.</first><last>Forcada</last></editor>
+      <editor id="mikel-l-forcada"><first>Mikel L.</first><last>Forcada</last></editor>
       <publisher>European Association for Machine Translation</publisher>
       <address>Lisboa, Portugal</address>
       <month>November</month>
@@ -49,7 +49,7 @@
     <paper id="3">
       <title>Efficiently Reusing Old Models Across Languages via Transfer Learning</title>
       <author><first>Tom</first><last>Kocmi</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>19–28</pages>
       <abstract>Recent progress in neural machine translation (NMT) is directed towards larger neural networks trained on an increasing amount of hardware resources. As a result, NMT models are costly to train, both financially, due to the electricity and hardware cost, and environmentally, due to the carbon footprint. It is especially true in transfer learning for its additional cost of training the “parent” model before transferring knowledge and training the desired “child” model. In this paper, we propose a simple method of re-using an already trained model for different language pairs where there is no need for modifications in model architecture. Our approach does not need a separate parent model for each investigated language pair, as it is typical in NMT transfer learning. To show the applicability of our method, we recycle a Transformer model trained by different researchers and use it to seed models for different language pairs. We achieve better translation quality and shorter convergence times than when training from random initialization.</abstract>
       <url hash="dc6b5b55">2020.eamt-1.3</url>
@@ -71,7 +71,7 @@
       <title>When and Why is Unsupervised Neural Machine Translation Useless?</title>
       <author><first>Yunsu</first><last>Kim</last></author>
       <author><first>Miguel</first><last>Graça</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>35–44</pages>
       <abstract>This paper studies the practicality of the current state-of-the-art unsupervised methods in neural machine translation (NMT). In ten translation tasks with various data settings, we analyze the conditions under which the unsupervised methods fail to produce reasonable translations. We show that their performance is severely affected by linguistic dissimilarity and domain mismatch between source and target monolingual data. Such conditions are common for low-resource language pairs, where unsupervised learning works poorly. In all of our experiments, supervised and semi-supervised baselines with 50k-sentence bilingual data outperform the best unsupervised results. Our analyses pinpoint the limits of the current unsupervised NMT and also suggest immediate research directions.</abstract>
       <url hash="ca4d93ad">2020.eamt-1.5</url>
@@ -81,9 +81,9 @@
       <title>Incorporating External Annotation to improve Named Entity Translation in <fixed-case>NMT</fixed-case></title>
       <author><first>Maciej</first><last>Modrzejewski</last></author>
       <author><first>Miriam</first><last>Exel</last></author>
-      <author><first>Bianka</first><last>Buschbeck</last></author>
+      <author id="bianka-buschbeck"><first>Bianka</first><last>Buschbeck</last></author>
       <author><first>Thanh-Le</first><last>Ha</last></author>
-      <author><first>Alexander</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alexander</first><last>Waibel</last></author>
       <pages>45–51</pages>
       <abstract>The correct translation of named entities (NEs) still poses a challenge for conventional neural machine translation (NMT) systems. This study explores methods incorporating named entity recognition (NER) into NMT with the aim to improve named entity translation. It proposes an annotation method that integrates named entities and inside–outside–beginning (IOB) tagging into the neural network input with the use of source factors. Our experiments on English→German and English→ Chinese show that just by including different NE classes and IOB tagging, we can increase the BLEU score by around 1 point using the standard test set from WMT2019 and achieve up to 12% increase in NE translation rates over a strong baseline.</abstract>
       <url hash="a639a645">2020.eamt-1.6</url>
@@ -103,7 +103,7 @@
     </paper>
     <paper id="8">
       <title>A multi-source approach for <fixed-case>B</fixed-case>reton–<fixed-case>F</fixed-case>rench hybrid machine translation</title>
-      <author><first>Víctor M.</first><last>Sánchez-Cartagena</last></author>
+      <author id="victor-m-sanchez-cartagena"><first>Víctor M.</first><last>Sánchez-Cartagena</last></author>
       <author><first>Mikel L.</first><last>Forcada</last></author>
       <author><first>Felipe</first><last>Sánchez-Martínez</last></author>
       <pages>61–70</pages>
@@ -126,7 +126,7 @@
       <title>Low-Resource Unsupervised <fixed-case>NMT</fixed-case>: Diagnosing the Problem and Providing a Linguistically Motivated Solution</title>
       <author><first>Lukas</first><last>Edman</last></author>
       <author><first>Antonio</first><last>Toral</last></author>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <pages>81–90</pages>
       <abstract>Unsupervised Machine Translation has been advancing our ability to translate without parallel data, but state-of-the-art methods assume an abundance of monolingual data. This paper investigates the scenario where monolingual data is limited as well, finding that current unsupervised methods suffer in performance under this stricter setting. We find that the performance loss originates from the poor quality of the pretrained monolingual embeddings, and we offer a potential solution: dependency-based word embeddings. These embeddings result in a complementary word representation which offers a boost in performance of around 1.5 BLEU points compared to standard word2vec when monolingual data is limited to 1 million sentences per language. We also find that the inclusion of sub-word information is crucial to improving the quality of the embeddings.</abstract>
       <url hash="77aa3364">2020.eamt-1.10</url>
@@ -158,7 +158,7 @@
       <author><first>Maarit</first><last>Koponen</last></author>
       <author><first>Umut</first><last>Sulubacak</last></author>
       <author><first>Kaisa</first><last>Vitikainen</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>115–124</pages>
       <abstract>This paper presents a user evaluation of machine translation and post-editing for TV subtitles. Based on a process study where 12 professional subtitlers translated and post-edited subtitles, we compare effort in terms of task time and number of keystrokes. We also discuss examples of specific subtitling features like condensation, and how these features may have affected the post-editing results. In addition to overall MT quality, segmentation and timing of the subtitles are found to be important issues to be addressed in future work.</abstract>
       <url hash="3c9c86d2">2020.eamt-1.13</url>
@@ -185,7 +185,7 @@
     </paper>
     <paper id="16">
       <title>Quality In, Quality Out: Learning from Actual Mistakes</title>
-      <author><first>Frederic</first><last>Blain</last></author>
+      <author id="frederic-blain"><first>Frederic</first><last>Blain</last></author>
       <author><first>Nikolaos</first><last>Aletras</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>145–153</pages>
@@ -213,8 +213,8 @@
     <paper id="19">
       <title>Intelligent Translation Memory Matching and Retrieval with Sentence Encoders</title>
       <author><first>Tharindu</first><last>Ranasinghe</last></author>
-      <author><first>Constantin</first><last>Orasan</last></author>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orasan</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <pages>175–184</pages>
       <abstract>Matching and retrieving previously translated segments from the Translation Memory is a key functionality in Translation Memories systems. However this matching and retrieving process is still limited to algorithms based on edit distance which we have identified as a major drawback in Translation Memories systems. In this paper, we introduce sentence encoders to improve matching and retrieving process in Translation Memories systems - an effective and efficient solution to replace edit distance-based algorithms.</abstract>
       <url hash="01a2c19d">2020.eamt-1.19</url>
@@ -233,7 +233,7 @@
       <author><first>Kamal Kumar</first><last>Gupta</last></author>
       <author><first>Rejwanul</first><last>Haque</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <pages>195–204</pages>
       <abstract>In interactive machine translation (MT), human translators correct errors in automatic translations in collaboration with the MT systems, which is seen as an effective way to improve the productivity gain in translation. In this study, we model source-language syntactic constituency parse and target-language syntactic descriptions in the form of supertags as conditional context for interactive prediction in neural MT (NMT). We found that the supertags significantly improve productivity gain in translation in interactive-predictive NMT (INMT), while syntactic parsing somewhat found to be effective in reducing human effort in translation. Furthermore, when we model this source- and target-language syntactic information together as the conditional context, both types complement each other and our fully syntax-informed INMT model statistically significantly reduces human efforts in a French–to–English translation task, achieving 4.30 points absolute (corresponding to 9.18% relative) improvement in terms of word prediction accuracy (WPA) and 4.84 points absolute (corresponding to 9.01% relative) reduction in terms of word stroke ratio (WSR) over the baseline.</abstract>
@@ -262,7 +262,7 @@
     <paper id="24">
       <title>Document-level Neural <fixed-case>MT</fixed-case>: A Systematic Comparison</title>
       <author><first>António</first><last>Lopes</last></author>
-      <author><first>M. Amin</first><last>Farajian</last></author>
+      <author id="m-amin-farajian"><first>M. Amin</first><last>Farajian</last></author>
       <author><first>Rachel</first><last>Bawden</last></author>
       <author><first>Michael</first><last>Zhang</last></author>
       <author><first>André F. T.</first><last>Martins</last></author>
@@ -274,7 +274,7 @@
     <paper id="25">
       <title>Automatic Translation for Multiple <fixed-case>NLP</fixed-case> tasks: a Multi-task Approach to Machine-oriented <fixed-case>NMT</fixed-case> Adaptation</title>
       <author><first>Amirhossein</first><last>Tebbifakhr</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <pages>235–244</pages>
       <abstract>Although machine translation (MT) traditionally pursues “human-oriented” objectives, humans are not the only possible consumers of MT output. For instance, when automatic translations are used to feed downstream Natural Language Processing (NLP) components in cross-lingual settings, they should ideally pursue “machine-oriented” objectives that maximize the performance of these components. Tebbifakhr et al. (2019) recently proposed a reinforcement learning approach to adapt a generic neural MT(NMT) system by exploiting the reward from a downstream sentiment classifier. But what if the downstream NLP tasks to serve are more than one? How to avoid the costs of adapting and maintaining one dedicated NMT system for each task? We address this problem by proposing a multi-task approach to machine-oriented NMT adaptation, which is capable to serve multiple downstream tasks with a single system. Through experiments with Spanish and Italian data covering three different tasks, we show that our approach can outperform a generic NMT system, and compete with single-task models in most of the settings.</abstract>
@@ -311,7 +311,7 @@
     <paper id="29">
       <title>Terminology-Constrained Neural Machine Translation at <fixed-case>SAP</fixed-case></title>
       <author><first>Miriam</first><last>Exel</last></author>
-      <author><first>Bianka</first><last>Buschbeck</last></author>
+      <author id="bianka-buschbeck"><first>Bianka</first><last>Buschbeck</last></author>
       <author><first>Lauritz</first><last>Brandt</last></author>
       <author><first>Simona</first><last>Doneva</last></author>
       <pages>271–280</pages>
@@ -323,7 +323,7 @@
       <title>Ellipsis Translation for a Medical Speech to Speech Translation System</title>
       <author><first>Jonathan</first><last>Mutal</last></author>
       <author><first>Johanna</first><last>Gerlach</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Hervé</first><last>Spechbach</last></author>
       <pages>281–290</pages>
       <abstract>In diagnostic interviews, elliptical utterances allow doctors to question patients in a more efficient and economical way. However, literal translation of such incomplete utterances is rarely possible without affecting communication. Previous studies have focused on automatic ellipsis detection and resolution, but only few specifically address the problem of automatic translation of ellipsis. In this work, we evaluate four different approaches to translate ellipsis in medical dialogues in the context of the speech to speech translation system BabelDr. We also investigate the impact of training data, using an under-sampling method and data with elliptical utterances in context. Results show that the best model is able to translate 88% of elliptical utterances.</abstract>
@@ -332,10 +332,10 @@
     </paper>
     <paper id="31">
       <title>Bifixer and Bicleaner: two open-source tools to clean your parallel data</title>
-      <author><first>Gema</first><last>Ramírez-Sánchez</last></author>
+      <author id="gema-ramirez-sanchez"><first>Gema</first><last>Ramírez-Sánchez</last></author>
       <author><first>Jaume</first><last>Zaragoza-Bernabeu</last></author>
       <author><first>Marta</first><last>Bañón</last></author>
-      <author><first>Sergio Ortiz</first><last>Rojas</last></author>
+      <author id="sergio-ortiz-rojas"><first>Sergio Ortiz</first><last>Rojas</last></author>
       <pages>291–298</pages>
       <abstract>This paper shows the utility of two open-source tools designed for parallel data cleaning: Bifixer and Bicleaner. Already used to clean highly noisy parallel content from crawled multilingual websites, we evaluate their performance in a different scenario: cleaning publicly available corpora commonly used to train machine translation systems. We choose four English–Portuguese corpora which we plan to use internally to compute paraphrases at a later stage. We clean the four corpora using both tools, which are described in detail, and analyse the effect of some of the cleaning steps on them. We then compare machine translation training times and quality before and after cleaning these corpora, showing a positive impact particularly for the noisiest ones.</abstract>
       <url hash="c45fb6c6">2020.eamt-1.31</url>
@@ -344,10 +344,10 @@
     <paper id="32">
       <title>An <fixed-case>E</fixed-case>nglish-<fixed-case>S</fixed-case>wahili parallel corpus and its use for neural machine translation in the news domain</title>
       <author><first>Felipe</first><last>Sánchez-Martínez</last></author>
-      <author><first>Víctor M.</first><last>Sánchez-Cartagena</last></author>
+      <author id="victor-m-sanchez-cartagena"><first>Víctor M.</first><last>Sánchez-Cartagena</last></author>
       <author><first>Juan Antonio</first><last>Pérez-Ortiz</last></author>
       <author><first>Mikel L.</first><last>Forcada</last></author>
-      <author><first>Miquel</first><last>Esplà-Gomis</last></author>
+      <author id="miquel-espla-gomis"><first>Miquel</first><last>Esplà-Gomis</last></author>
       <author><first>Andrew</first><last>Secker</last></author>
       <author><first>Susie</first><last>Coleman</last></author>
       <author><first>Julie</first><last>Wall</last></author>
@@ -368,12 +368,12 @@
     <paper id="34">
       <title>A User Study of the Incremental Learning in <fixed-case>NMT</fixed-case></title>
       <author><first>Miguel</first><last>Domingo</last></author>
-      <author><first>Mercedes</first><last>García-Martínez</last></author>
-      <author><first>Álvaro</first><last>Peris</last></author>
+      <author id="mercedes-garcia-martinez"><first>Mercedes</first><last>García-Martínez</last></author>
+      <author id="alvaro-peris"><first>Álvaro</first><last>Peris</last></author>
       <author><first>Alexandre</first><last>Helle</last></author>
       <author><first>Amando</first><last>Estela</last></author>
       <author><first>Laurent</first><last>Bié</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <author><first>Manuel</first><last>Herranz</last></author>
       <pages>319–328</pages>
       <abstract>In the translation industry, human experts usually supervise and post-edit machine translation hypotheses. Adaptive neural machine translation systems, able to incrementally update the underlying models under an online learning regime, have been proven to be useful to improve the efficiency of this workflow. However, this incremental adaptation is somewhat unstable, and it may lead to undesirable side effects. One of them is the sporadic appearance of made-up words, as a byproduct of an erroneous application of subword segmentation techniques. In this work, we extend previous studies on on-the-fly adaptation of neural machine translation systems. We perform a user study involving professional, experienced post-editors, delving deeper on the aforementioned problems. Results show that adaptive systems were able to learn how to generate the correct translation for task-specific terms, resulting in an improvement of the user’s productivity. We also observed a close similitude, in terms of morphology, between made-up words and the words that were expected.</abstract>
@@ -385,7 +385,7 @@
       <author><first>Daniel Marín</first><last>Buj</last></author>
       <author><first>Daniel</first><last>Ibáñez García</last></author>
       <author><first>Zuzanna</first><last>Parcheta</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <pages>329–338</pages>
       <abstract>In this paper, we present a machine translation system implemented by the Translation Centre for the Bodies of the European Union (CdT). The main goal of this project is to create domain-specific machine translation engines in order to support machine translation services and applications to the Translation Centre’s clients. In this article, we explain the entire implementation process of NICE: Neural Integrated Custom Engines. We describe the problems identified and the solutions provided, and present the final results for different language pairs. Finally, we describe the work that will be done on this project in the future.</abstract>
       <url hash="adf7f158">2020.eamt-1.35</url>
@@ -419,7 +419,7 @@
     </paper>
     <paper id="39">
       <title>On the differences between human translations</title>
-      <author><first>Maja</first><last>Popovic</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popovic</last></author>
       <pages>365–374</pages>
       <abstract>Many studies have confirmed that translated texts exhibit different features than texts originally written in the given language. This work explores texts translated by different translators taking into account expertise and native language. A set of computational analyses was conducted on three language pairs, English-Croatian, German-French and English-Finnish, and the results show that each of the factors has certain influence on the features of the translated texts, especially on sentence length and lexical richness. The results also indicate that for translations used for machine translation evaluation, it is important to specify these factors, especially if comparing machine translation quality with human translation quality is involved.</abstract>
       <url hash="9fe97fc0">2020.eamt-1.39</url>
@@ -433,7 +433,7 @@
       <author><first>Jonathan</first><last>Mutal</last></author>
       <author><first>Sabrina</first><last>Girletti</last></author>
       <author><first>Lise</first><last>Volkart</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <pages>375–382</pages>
       <abstract>We believe that machine translation (MT) must be introduced to translation students as part of their training, in preparation for their professional life. In this paper we present a new version of the tool called MT3, which builds on and extends a joint effort undertaken by the Faculty of Languages of the University of Córdoba and Faculty of Translation and Interpreting of the University of Geneva to develop an open-source web platform to teach MT to translation students. We also report on a pilot experiment with the goal of testing the viability of using <tex-math>MT^3</tex-math> in an MT course. The pilot let us identify areas for improvement and collect students’ feedback about the tool’s usability.</abstract>
       <url hash="a7a30886">2020.eamt-1.40</url>
@@ -501,7 +501,7 @@
       <title>Machine Translation Quality: A comparative evaluation of <fixed-case>SMT</fixed-case>, <fixed-case>NMT</fixed-case> and tailored-<fixed-case>NMT</fixed-case> outputs</title>
       <author><first>Maria</first><last>Stasimioti</last></author>
       <author><first>Vilelmini</first><last>Sosoni</last></author>
-      <author><first>Katia</first><last>Kermanidis</last></author>
+      <author id="katia-lida-kermanidis"><first>Katia</first><last>Kermanidis</last></author>
       <author><first>Despoina</first><last>Mouratidis</last></author>
       <pages>441–450</pages>
       <abstract>The present study aims to compare three systems: a generic statistical machine translation (SMT), a generic neural machine translation (NMT) and a tailored-NMT system focusing on the English to Greek language pair. The comparison is carried out following a mixed-methods approach, i.e. automatic metrics, as well as side-by-side ranking, adequacy and fluency rating, measurement of actual post editing (PE) effort and human error analysis performed by 16 postgraduate Translation students. The findings reveal a higher score for both the generic NMT and the tailored-NMT outputs as regards automatic metrics and human evaluation metrics, with the tailored-NMT output faring even better than the generic NMT output.</abstract>
@@ -554,7 +554,7 @@
     </paper>
     <paper id="52">
       <title><fixed-case>QR</fixed-case>ev: Machine Translation of User Reviews: What Influences the Translation Quality?</title>
-      <author><first>Maja</first><last>Popovic</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popovic</last></author>
       <pages>461–462</pages>
       <abstract>This project aims to identify the important aspects of translation quality of user reviews which will represent a starting point for developing better automatic MT metrics and challenge test sets, and will be also helpful for developing MT systems for this genre. We work on two types of reviews: Amazon products and IMDb movies, written in English and translated into two closely related target languages, Croatian and Serbian.</abstract>
       <url hash="b9a7abfc">2020.eamt-1.52</url>
@@ -562,10 +562,10 @@
     </paper>
     <paper id="53">
       <title><fixed-case>ELITR</fixed-case>: <fixed-case>E</fixed-case>uropean Live Translator</title>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <author><first>Dominik</first><last>Macháček</last></author>
       <author><first>Sangeet</first><last>Sagar</last></author>
-      <author><first>Otakar</first><last>Smrž</last></author>
+      <author id="otakar-smrz"><first>Otakar</first><last>Smrž</last></author>
       <author><first>Jonáš</first><last>Kratochvíl</last></author>
       <author><first>Ebrahim</first><last>Ansari</last></author>
       <author><first>Dario</first><last>Franceschini</last></author>
@@ -574,7 +574,7 @@
       <author><first>Thai-Son</first><last>Nguyen</last></author>
       <author><first>Felix</first><last>Schneider</last></author>
       <author><first>Sebastian</first><last>Stücker</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <author><first>Barry</first><last>Haddow</last></author>
       <author><first>Rico</first><last>Sennrich</last></author>
       <author><first>Philip</first><last>Williams</last></author>
@@ -625,21 +625,21 @@
       <author><first>Victoria</first><last>Arranz</last></author>
       <author><first>Laurent</first><last>Bié</last></author>
       <author><first>Aleix</first><last>Cerdà-i-Cucó</last></author>
-      <author><first>Khalid</first><last>Choukri</last></author>
+      <author id="khalid-choukri"><first>Khalid</first><last>Choukri</last></author>
       <author><first>Montse</first><last>Cuadros</last></author>
       <author><first>Hans</first><last>Degroote</last></author>
       <author><first>Amando</first><last>Estela</last></author>
       <author><first>Thierry</first><last>Etchegoyhen</last></author>
-      <author><first>Mercedes</first><last>García-Martínez</last></author>
+      <author id="mercedes-garcia-martinez"><first>Mercedes</first><last>García-Martínez</last></author>
       <author><first>Aitor</first><last>García-Pablos</last></author>
       <author><first>Manuel</first><last>Herranz</last></author>
       <author><first>Alejandro</first><last>Kohan</last></author>
       <author><first>Maite</first><last>Melero</last></author>
-      <author><first>Mike</first><last>Rosner</last></author>
+      <author id="michael-rosner"><first>Mike</first><last>Rosner</last></author>
       <author><first>Roberts</first><last>Rozis</last></author>
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <author><first>Artūrs</first><last>Vasiļevskis</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <pages>471–472</pages>
       <abstract>We describe the MAPA project, funded under the Connecting Europe Facility programme, whose goal is the development of an open-source de-identification toolkit for all official European Union languages. It will be developed since January 2020 until December 2021.</abstract>
       <url hash="df50817f">2020.eamt-1.57</url>
@@ -672,13 +672,13 @@
       <author><first>Aleix</first><last>Cerdà-i-Cucó</last></author>
       <author><first>Hans</first><last>Degroote</last></author>
       <author><first>Amando</first><last>Estela</last></author>
-      <author><first>Mercedes</first><last>García-Martínez</last></author>
+      <author id="mercedes-garcia-martinez"><first>Mercedes</first><last>García-Martínez</last></author>
       <author><first>Manuel</first><last>Herranz</last></author>
       <author><first>Alejandro</first><last>Kohan</last></author>
       <author><first>Maite</first><last>Melero</last></author>
       <author><first>Tony</first><last>O’Dowd</last></author>
       <author><first>Sinéad</first><last>O’Gorman</last></author>
-      <author><first>Mārcis</first><last>Pinnis</last></author>
+      <author id="marcis-pinnis"><first>Mārcis</first><last>Pinnis</last></author>
       <author><first>Roberts</first><last>Rozis</last></author>
       <author><first>Riccardo</first><last>Superbo</last></author>
       <author><first>Artūrs</first><last>Vasiļevskis</last></author>
@@ -689,7 +689,7 @@
     </paper>
     <paper id="61">
       <title><fixed-case>OPUS</fixed-case>-<fixed-case>MT</fixed-case> – Building open translation services for the World</title>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <author><first>Santhosh</first><last>Thottingal</last></author>
       <pages>479–480</pages>
       <abstract>This paper presents OPUS-MT a project that focuses on the development of free resources and tools for machine translation. The current status is a repository of over 1,000 pre-trained neural machine translation models that are ready to be launched in on-line translation services. For this we also provide open source implementations of web applications that can run efficiently on average desktop hardware with a straightforward setup and installation.</abstract>
@@ -707,7 +707,7 @@
       <author><first>Alina</first><last>Kramchaninova</last></author>
       <author><first>Anna</first><last>Bardadym</last></author>
       <author><first>Tom</first><last>Vanallemeersch</last></author>
-      <author><first>Pavel</first><last>Smrž</last></author>
+      <author id="pavel-smrz"><first>Pavel</first><last>Smrž</last></author>
       <author><first>Michal</first><last>Hradiš</last></author>
       <pages>481–482</pages>
       <abstract>The OCCAM project (Optical Character recognition, ClassificAtion &amp; Machine Translation) aims at integrating the CEF (Connecting Europe Facility) Automated Translation service with image classification, Translation Memories (TMs), Optical Character Recognition (OCR), and Machine Translation (MT). It will support the automated translation of scanned business documents (a document format that, currently, cannot be processed by the CEF eTranslation service) and will also lead to a tool useful for the Digital Humanities domain.</abstract>
@@ -769,7 +769,7 @@
     <paper id="68">
       <title>Project <fixed-case>MAIA</fixed-case>: Multilingual <fixed-case>AI</fixed-case> Agent Assistant</title>
       <author><first>André F. T.</first><last>Martins</last></author>
-      <author><first>Joao</first><last>Graca</last></author>
+      <author id="joao-graca"><first>Joao</first><last>Graca</last></author>
       <author><first>Paulo</first><last>Dimas</last></author>
       <author><first>Helena</first><last>Moniz</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
diff --git a/data/xml/2020.ecnlp.xml b/data/xml/2020.ecnlp.xml
index a11751034c..afe92f77e4 100644
--- a/data/xml/2020.ecnlp.xml
+++ b/data/xml/2020.ecnlp.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2020-06-21" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 3rd Workshop on e-Commerce and NLP</booktitle>
-      <editor><first>Shervin</first><last>Malmasi</last></editor>
+      <editor id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></editor>
       <editor><first>Surya</first><last>Kallumadi</last></editor>
       <editor><first>Nicola</first><last>Ueffing</last></editor>
       <editor><first>Oleg</first><last>Rokhlenko</last></editor>
@@ -102,10 +102,10 @@
     </paper>
     <paper id="7">
       <title>Semi-Supervised Iterative Approach for Domain-Specific Complaint Detection in Social Media</title>
-      <author><first>Akash</first><last>Gautam</last></author>
+      <author id="akash-kumar-gautam"><first>Akash</first><last>Gautam</last></author>
       <author><first>Debanjan</first><last>Mahata</last></author>
       <author><first>Rakesh</first><last>Gosangi</last></author>
-      <author><first>Rajiv Ratn</first><last>Shah</last></author>
+      <author id="rajiv-shah"><first>Rajiv Ratn</first><last>Shah</last></author>
       <pages>46–53</pages>
       <abstract>In this paper, we present a semi-supervised bootstrapping approach to detect product or service related complaints in social media. Our approach begins with a small collection of annotated samples which are used to identify a preliminary set of linguistic indicators pertinent to complaints. These indicators are then used to expand the dataset. The expanded dataset is again used to extract more indicators. This process is applied for several iterations until we can no longer find any new indicators. We evaluated this approach on a Twitter corpus specifically to detect complaints about transportation services. We started with an annotated set of 326 samples of transportation complaints, and after four iterations of the approach, we collected 2,840 indicators and over 3,700 tweets. We annotated a random sample of 700 tweets from the final dataset and observed that nearly half the samples were actual transportation complaints. Lastly, we also studied how different features based on semantics, orthographic properties, and sentiment contribute towards the prediction of complaints.</abstract>
       <url hash="6ca61f4c">2020.ecnlp-1.7</url>
@@ -166,7 +166,7 @@
     <paper id="12">
       <title>e-Commerce and Sentiment Analysis: Predicting Outcomes of Class Action Lawsuits</title>
       <author><first>Stacey</first><last>Taylor</last></author>
-      <author><first>Vlado</first><last>Keselj</last></author>
+      <author id="vlado-keselj"><first>Vlado</first><last>Keselj</last></author>
       <pages>77–85</pages>
       <abstract>In recent years, the focus of e-Commerce research has been on better understanding the relationship between the internet marketplace, customers, and goods and services. This has been done by examining information that can be gleaned from consumer information, recommender systems, click rates, or the way purchasers go about making buying decisions, for example. This paper takes a very different approach and examines the companies themselves. In the past ten years, e-Commerce giants such as Amazon, Skymall, Wayfair, and Groupon have been embroiled in class action security lawsuits promulgated under Rule 10b(5), which, in short, is one of the Securities and Exchange Commission’s main rules surrounding fraud. Lawsuits are extremely expensive to the company and can damage a company’s brand extensively, with the shareholders left to suffer the consequences. We examined the Management Discussion and Analysis and the Market Risks for 96 companies using sentiment analysis on selected financial measures and found that we were able to predict the outcome of the lawsuits in our dataset using sentiment (tone) alone to a recall of 0.8207 using the Random Forest classifier. We believe that this is an important contribution as it has cross-domain implications and potential, and opens up new areas of research in e-Commerce, finance, and law, as the settlements from the class action lawsuits in our dataset alone are in excess of $1.6 billion dollars, in aggregate.</abstract>
       <url hash="4d048e02">2020.ecnlp-1.12</url>
@@ -176,7 +176,7 @@
     </paper>
     <paper id="13">
       <title>On Application of <fixed-case>B</fixed-case>ayesian Parametric and Non-parametric Methods for User Cohorting in Product Search</title>
-      <author id="shashank-gupta"><first>Shashank</first><last>Gupta</last></author>
+      <author><first>Shashank</first><last>Gupta</last></author>
       <pages>86–89</pages>
       <abstract>In this paper, we study the applicability of Bayesian Parametric and Non-parametric methods for user clustering in an E-commerce search setting. To the best of our knowledge, this is the first work that presents a comparative study of various Bayesian clustering methods in the context of product search. Specifically, we cluster users based on their topical patterns from their respective product search queries. To evaluate the quality of the clusters formed, we perform a collaborative query recommendation task. Our findings indicate that simple parametric model like Latent Dirichlet Allocation (LDA) outperforms more sophisticated non-parametric methods like Distance Dependent Chinese Restaurant Process and Dirichlet Process-based clustering in both tasks.</abstract>
       <url hash="b57c3987">2020.ecnlp-1.13</url>
diff --git a/data/xml/2020.ecomnlp.xml b/data/xml/2020.ecomnlp.xml
index e149c724fd..da2ead448b 100644
--- a/data/xml/2020.ecomnlp.xml
+++ b/data/xml/2020.ecomnlp.xml
@@ -23,7 +23,7 @@
     <paper id="1">
       <title><fixed-case>E</fixed-case>-Commerce Content and Collaborative-based Recommendation using K-Nearest Neighbors and Enriched Weighted Vectors</title>
       <author><first>Bardia</first><last>Rafieian</last></author>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
       <pages>1–10</pages>
       <abstract>In this paper, we present two productive and functional recommender methods to improve the ac- curacy of predicting the right product for the user. One proposal is a survey-based recommender system that uses k-nearest neighbors. It recommends products by asking questions from the user, efficiently applying a binary product vector to the product attributes, and processing the request with a minimum error. The second proposal uses an enriched collaborative-based recommender system using enriched weighted vectors. Thanks to the style rules, the enriched collaborative- based method recommends outfits with competitive recommendation quality. We evaluated both of the proposals on a Kaggle fashion-dataset along with iMaterialist and, results show equivalent performance on binary gender and product attributes.</abstract>
       <url hash="18184d24">2020.ecomnlp-1.1</url>
diff --git a/data/xml/2020.emnlp.xml b/data/xml/2020.emnlp.xml
index c99fbf4121..228d9394a6 100644
--- a/data/xml/2020.emnlp.xml
+++ b/data/xml/2020.emnlp.xml
@@ -3,8 +3,8 @@
   <volume id="main" ingest-date="2020-11-05" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)</booktitle>
-      <editor><first>Bonnie</first><last>Webber</last></editor>
-      <editor><first>Trevor</first><last>Cohn</last></editor>
+      <editor id="bonnie-webber"><first>Bonnie</first><last>Webber</last></editor>
+      <editor id="trevor-cohn"><first>Trevor</first><last>Cohn</last></editor>
       <editor><first>Yulan</first><last>He</last></editor>
       <editor id="yang-liu-icsi"><first>Yang</first><last>Liu</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -23,8 +23,8 @@
       <author><first>Yohan</first><last>Jo</last></author>
       <author><first>Seojin</first><last>Bang</last></author>
       <author><first>Emaad</first><last>Manzoor</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
-      <author><first>Chris</first><last>Reed</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
+      <author id="chris-reed"><first>Chris</first><last>Reed</last></author>
       <pages>1–23</pages>
       <abstract>Finding attackable sentences in an argument is the first step toward successful refutation in argumentation. We present a first large-scale analysis of sentence attackability in online arguments. We analyze driving reasons for attacks in argumentation and identify relevant characteristics of sentences. We demonstrate that a sentence’s attackability is associated with many of these characteristics regarding the sentence’s content, proposition types, and tone, and that an external knowledge source can provide useful information about attackability. Building on these findings, we demonstrate that machine learning models can automatically detect attackable sentences in arguments, significantly better than several baselines and comparably well to laypeople.</abstract>
       <url hash="56ee711d">2020.emnlp-main.1</url>
@@ -36,8 +36,8 @@
       <title>Extracting Implicitly Asserted Propositions in Argumentation</title>
       <author><first>Yohan</first><last>Jo</last></author>
       <author><first>Jacky</first><last>Visser</last></author>
-      <author><first>Chris</first><last>Reed</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="chris-reed"><first>Chris</first><last>Reed</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>24–38</pages>
       <abstract>Argumentation accommodates various rhetorical devices, such as questions, reported speech, and imperatives. These rhetorical tools usually assert argumentatively relevant propositions rather implicitly, so understanding their true meaning is key to understanding certain arguments properly. However, most argument mining systems and computational linguistics research have paid little attention to implicitly asserted propositions in argumentation. In this paper, we examine a wide range of computational methods for extracting propositions that are implicitly asserted in questions, reported speech, and imperatives in argumentation. By evaluating the models on a corpus of 2016 U.S. presidential debates and online commentary, we demonstrate the effectiveness and limitations of the computational models. Our study may inform future research on argument mining and the semantics of these rhetorical devices in argumentation.</abstract>
       <url hash="78e06e63">2020.emnlp-main.2</url>
@@ -207,7 +207,7 @@
       <author><first>Yian</first><last>Zhang</last></author>
       <author><first>Xiaocheng</first><last>Li</last></author>
       <author><first>Haokun</first><last>Liu</last></author>
-      <author><first>Samuel R.</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel R.</first><last>Bowman</last></author>
       <pages>217–235</pages>
       <abstract>One reason pretraining on self-supervised linguistic tasks is effective is that it teaches models features that are helpful for language understanding. However, we want pretrained models to learn not only to represent linguistic features, but also to use those features preferentially during fine-turning. With this goal in mind, we introduce a new English-language diagnostic set called MSGS (the Mixed Signals Generalization Set), which consists of 20 ambiguous binary classification tasks that we use to test whether a pretrained model prefers linguistic or surface generalizations during finetuning. We pretrain RoBERTa from scratch on quantities of data ranging from 1M to 1B words and compare their performance on MSGS to the publicly available RoBERTa_BASE. We find that models can learn to represent linguistic features with little pretraining data, but require far more data to learn to prefer linguistic generalizations over surface ones. Eventually, with about 30B words of pretraining data, RoBERTa_BASE does consistently demonstrate a linguistic bias with some regularity. We conclude that while self-supervised pretraining is an effective way to learn helpful inductive biases, there is likely room to improve the rate at which models learn which features matter.</abstract>
       <url hash="c06ad84f">2020.emnlp-main.16</url>
@@ -235,7 +235,7 @@
     </paper>
     <paper id="18">
       <title><fixed-case>KERMIT</fixed-case>: Complementing Transformer Architectures with Encoders of Explicit Syntactic Interpretations</title>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last></author>
       <author><first>Andrea</first><last>Santilli</last></author>
       <author><first>Leonardo</first><last>Ranaldi</last></author>
       <author><first>Dario</first><last>Onorati</last></author>
@@ -270,9 +270,9 @@
     <paper id="20">
       <title>Pre-Training Transformers as Energy-Based Cloze Models</title>
       <author><first>Kevin</first><last>Clark</last></author>
-      <author><first>Minh-Thang</first><last>Luong</last></author>
-      <author><first>Quoc</first><last>Le</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="minh-thang-luong"><first>Minh-Thang</first><last>Luong</last></author>
+      <author id="quoc-le"><first>Quoc</first><last>Le</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>285–294</pages>
       <abstract>We introduce Electric, an energy-based cloze model for representation learning over text. Like BERT, it is a conditional generative model of tokens given their contexts. However, Electric does not use masking or output a full distribution over tokens that could occur in a context. Instead, it assigns a scalar energy score to each input token indicating how likely it is given its context. We train Electric using an algorithm based on noise-contrastive estimation and elucidate how this learning objective is closely related to the recently proposed ELECTRA pre-training method. Electric performs well when transferred to downstream tasks and is particularly effective at producing likelihood scores for text: it re-ranks speech recognition n-best lists better than language models and much faster than masked language models. Furthermore, it offers a clearer and more principled view of what ELECTRA learns during pre-training.</abstract>
       <url hash="5ca72728">2020.emnlp-main.20</url>
@@ -338,7 +338,7 @@
       <author><first>Nathaniel</first><last>Mills</last></author>
       <author><first>Chulaka</first><last>Gunasekara</last></author>
       <author><first>Yosi</first><last>Mass</last></author>
-      <author><first>Sachindra</first><last>Joshi</last></author>
+      <author id="sachindra-joshi"><first>Sachindra</first><last>Joshi</last></author>
       <author><first>Luis</first><last>Lastras</last></author>
       <author><first>David</first><last>Konopnicki</last></author>
       <pages>349–356</pages>
@@ -364,7 +364,7 @@
     <paper id="27">
       <title>Augmented Natural Language for Generative Sequence Labeling</title>
       <author><first>Ben</first><last>Athiwaratkun</last></author>
-      <author><first>Cicero</first><last>Nogueira dos Santos</last></author>
+      <author id="cicero-dos-santos"><first>Cicero</first><last>Nogueira dos Santos</last></author>
       <author><first>Jason</first><last>Krone</last></author>
       <author><first>Bing</first><last>Xiang</last></author>
       <pages>375–385</pages>
@@ -380,7 +380,7 @@
       <author><first>Yizhe</first><last>Zhang</last></author>
       <author><first>Michel</first><last>Galley</last></author>
       <author><first>Chris</first><last>Brockett</last></author>
-      <author><first>Bill</first><last>Dolan</last></author>
+      <author id="william-b-dolan"><first>Bill</first><last>Dolan</last></author>
       <pages>386–395</pages>
       <abstract>Existing open-domain dialog models are generally trained to minimize the perplexity of target human responses. However, some human replies are more engaging than others, spawning more followup interactions. Current conversational models are increasingly capable of producing turns that are context-relevant, but in order to produce compelling agents, these models need to be able to predict and optimize for turns that are genuinely engaging. We leverage social media feedback data (number of replies and upvotes) to build a large-scale training dataset for feedback prediction. To alleviate possible distortion between the feedback and engagingness, we convert the ranking problem to a comparison of response pairs which involve few confounding factors. We trained DialogRPT, a set of GPT-2 based models on 133M pairs of human feedback data and the resulting ranker outperformed several baselines. Particularly, our ranker outperforms the conventional dialog perplexity baseline with a large margin on predicting Reddit feedback. We finally combine the feedback prediction models and a human-like scoring model to rank the machine-generated dialog responses. Crowd-sourced human evaluation shows that our ranking method correlates better with real human preferences than baseline models.</abstract>
       <url hash="070d32a4">2020.emnlp-main.28</url>
@@ -432,7 +432,7 @@
     <paper id="32">
       <title>A Spectral Method for Unsupervised Multi-Document Summarization</title>
       <author><first>Kexiang</first><last>Wang</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <author><first>Zhifang</first><last>Sui</last></author>
       <pages>435–445</pages>
       <abstract>Multi-document summarization (MDS) aims at producing a good-quality summary for several related documents. In this paper, we propose a spectral-based hypothesis, which states that the goodness of summary candidate is closely linked to its so-called spectral impact. Here spectral impact considers the perturbation to the dominant eigenvalue of affinity matrix when dropping the summary candidate from the document cluster. The hypothesis is validated by three theoretical perspectives: semantic scaling, propagation dynamics and matrix perturbation. According to the hypothesis, we formulate the MDS task as the combinatorial optimization of spectral impact and propose an accelerated greedy solution based on a surrogate of spectral impact. The evaluation results on various datasets demonstrate: (1) The performance of the summary candidate is positively correlated with its spectral impact, which accords with our hypothesis; (2) Our spectral-based method has a competitive result as compared to state-of-the-art MDS systems.</abstract>
@@ -533,9 +533,9 @@
     <paper id="39">
       <title>Efficient Meta Lifelong-Learning with Limited Memory</title>
       <author><first>Zirui</first><last>Wang</last></author>
-      <author><first>Sanket Vaibhav</first><last>Mehta</last></author>
+      <author id="sanket-vaibhav-mehta"><first>Sanket Vaibhav</first><last>Mehta</last></author>
       <author><first>Barnabas</first><last>Poczos</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
       <pages>535–548</pages>
       <abstract>Current natural language processing models work well on a single task, yet they often fail to continuously learn new tasks without forgetting previous ones as they are re-trained throughout their lifetime, a challenge known as lifelong learning. State-of-the-art lifelong language learning methods store past examples in episodic memory and replay them at both training and inference time. However, as we show later in our experiments, there are three significant impediments: (1) needing unrealistically large memory module to achieve good performance, (2) suffering from negative transfer, (3) requiring multiple local adaptation steps for each test example that significantly slows down the inference speed. In this paper, we identify three common principles of lifelong learning methods and propose an efficient meta-lifelong framework that combines them in a synergistic fashion. To achieve sample efficiency, our method trains the model in a manner that it learns a better initialization for local adaptation. Extensive experiments on text classification and question answering benchmarks demonstrate the effectiveness of our framework by achieving state-of-the-art performance using merely 1% memory size and narrowing the gap with multi-task learning. We further show that our method alleviates both catastrophic forgetting and negative transfer at the same time.</abstract>
       <url hash="23abf29b">2020.emnlp-main.39</url>
@@ -657,7 +657,7 @@
     <paper id="49">
       <title>Event Extraction by Answering (Almost) Natural Questions</title>
       <author><first>Xinya</first><last>Du</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>671–683</pages>
       <abstract>The problem of event extraction requires detecting the event trigger and extracting its corresponding arguments. Existing work in event argument extraction typically relies heavily on entity recognition as a preprocessing/concurrent step, causing the well-known problem of error propagation. To avoid this issue, we introduce a new paradigm for event extraction by formulating it as a question answering (QA) task that extracts the event arguments in an end-to-end manner. Empirical results demonstrate that our framework outperforms prior methods substantially; in addition, it is capable of extracting event arguments for roles not seen at training time (i.e., in a zero-shot learning setting).</abstract>
       <url hash="a74a1776">2020.emnlp-main.49</url>
@@ -673,8 +673,8 @@
       <author><first>Kyunghyun</first><last>Cho</last></author>
       <author><first>Heng</first><last>Ji</last></author>
       <author><first>Jonathan</first><last>May</last></author>
-      <author><first>Nathanael</first><last>Chambers</last></author>
-      <author><first>Clare</first><last>Voss</last></author>
+      <author id="nathanael-chambers"><first>Nathanael</first><last>Chambers</last></author>
+      <author id="clare-voss"><first>Clare</first><last>Voss</last></author>
       <pages>684–695</pages>
       <abstract>Event schemas can guide our understanding and ability to make predictions with respect to what might happen next. We propose a new Event Graph Schema, where two event types are connected through multiple paths involving entities that fill important roles in a coherent story. We then introduce Path Language Model, an auto-regressive language model trained on event-event paths, and select salient and coherent paths to probabilistically construct these graph schemas. We design two evaluation metrics, instance coverage and instance coherence, to evaluate the quality of graph schema induction, by checking when coherent event instances are covered by the schema graph. Intrinsic evaluations show that our approach is highly effective at inducing salient and coherent schemas. Extrinsic evaluations show the induced schema repository provides significant improvement to downstream end-to-end Information Extraction over a state-of-the-art joint neural extraction model, when used as additional global features to unfold instance graphs.</abstract>
       <url hash="29d1aba4">2020.emnlp-main.50</url>
@@ -909,9 +909,9 @@
       <title><fixed-case>R</fixed-case>i<fixed-case>SAWOZ</fixed-case>: A Large-Scale Multi-Domain <fixed-case>W</fixed-case>izard-of-<fixed-case>O</fixed-case>z Dataset with Rich Semantic Annotations for Task-Oriented Dialogue Modeling</title>
       <author><first>Jun</first><last>Quan</last></author>
       <author><first>Shian</first><last>Zhang</last></author>
-      <author id="qian-cao"><first>Qian</first><last>Cao</last></author>
+      <author><first>Qian</first><last>Cao</last></author>
       <author><first>Zizhong</first><last>Li</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <pages>930–940</pages>
       <abstract>In order to alleviate the shortage of multi-domain data and to capture discourse phenomena for task-oriented dialogue modeling, we propose RiSAWOZ, a large-scale multi-domain Chinese Wizard-of-Oz dataset with Rich Semantic Annotations. RiSAWOZ contains 11.2K human-to-human (H2H) multi-turn semantically annotated dialogues, with more than 150K utterances spanning over 12 domains, which is larger than all previous annotated H2H conversational datasets. Both single- and multi-domain dialogues are constructed, accounting for 65% and 35%, respectively. Each dialogue is labeled with comprehensive dialogue annotations, including dialogue goal in the form of natural language description, domain, dialogue states and acts at both the user and system side. In addition to traditional dialogue annotations, we especially provide linguistic annotations on discourse phenomena, e.g., ellipsis and coreference, in dialogues, which are useful for dialogue coreference and ellipsis resolution tasks. Apart from the fully annotated dataset, we also present a detailed description of the data collection procedure, statistics and analysis of the dataset. A series of benchmark models and results are reported, including natural language understanding (intent detection &amp; slot filling), dialogue state tracking and dialogue context-to-text generation, as well as coreference and ellipsis resolution, which facilitate the baseline comparison for future research on this corpus.</abstract>
       <url hash="aac30b8d">2020.emnlp-main.67</url>
@@ -946,7 +946,7 @@
       <title>Predicting Reference: What do Language Models Learn about Discourse Models?</title>
       <author><first>Shiva</first><last>Upadhye</last></author>
       <author><first>Leon</first><last>Bergen</last></author>
-      <author><first>Andrew</first><last>Kehler</last></author>
+      <author id="andrew-kehler"><first>Andrew</first><last>Kehler</last></author>
       <pages>977–982</pages>
       <abstract>Whereas there is a growing literature that probes neural language models to assess the degree to which they have latently acquired grammatical knowledge, little if any research has investigated their acquisition of discourse modeling ability. We address this question by drawing on a rich psycholinguistic literature that has established how different contexts affect referential biases concerning who is likely to be referred to next. The results reveal that, for the most part, the prediction behavior of neural language models does not resemble that of human language users.</abstract>
       <url hash="96ba1cc6">2020.emnlp-main.70</url>
@@ -1013,8 +1013,8 @@
     <paper id="75">
       <title>Multi-task Learning for Multilingual Neural Machine Translation</title>
       <author><first>Yiren</first><last>Wang</last></author>
-      <author><first>ChengXiang</first><last>Zhai</last></author>
-      <author><first>Hany</first><last>Hassan</last></author>
+      <author id="chengxiang-zhai"><first>ChengXiang</first><last>Zhai</last></author>
+      <author id="hany-hassan-awadalla"><first>Hany</first><last>Hassan</last></author>
       <pages>1022–1034</pages>
       <abstract>While monolingual data has been shown to be useful in improving bilingual neural machine translation (NMT), effectively and efficiently leveraging monolingual data for Multilingual NMT (MNMT) systems is a less explored area. In this work, we propose a multi-task learning (MTL) framework that jointly trains the model with the translation task on bitext data and two denoising tasks on the monolingual data. We conduct extensive empirical studies on MNMT systems with 10 language pairs from WMT datasets. We show that the proposed approach can effectively improve the translation quality for both high-resource and low-resource languages with large margin, achieving significantly better results than the individual bilingual models. We also demonstrate the efficacy of the proposed approach in the zero-shot setup for language pairs without bitext training data. Furthermore, we show the effectiveness of MTL over pre-training approaches for both NMT and cross-lingual transfer learning NLU tasks; the proposed approach outperforms massive scale models trained on single task.</abstract>
       <url hash="c6fd6694">2020.emnlp-main.75</url>
@@ -1067,7 +1067,7 @@
       <title>Incorporating a Local Translation Mechanism into Non-autoregressive Translation</title>
       <author><first>Xiang</first><last>Kong</last></author>
       <author><first>Zhisong</first><last>Zhang</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>1067–1073</pages>
       <abstract>In this work, we introduce a novel local autoregressive translation (LAT) mechanism into non-autoregressive translation (NAT) models so as to capture local dependencies among target outputs. Specifically, for each target decoding position, instead of only one token, we predict a short sequence of tokens in an autoregressive way. We further design an efficient merging algorithm to align and merge the output pieces into one final output sequence. We integrate LAT into the conditional masked language model (CMLM) (Ghazvininejad et al.,2019) and similarly adopt iterative decoding. Empirical results on five translation tasks show that compared with CMLM, our method achieves comparable or better performance with fewer decoding iterations, bringing a 2.5x speedup. Further analysis indicates that our method reduces repeated translations and performs better at longer sentences. Our code will be released to the public.</abstract>
       <url hash="387b7896">2020.emnlp-main.79</url>
@@ -1205,7 +1205,7 @@
     </paper>
     <paper id="89">
       <title><fixed-case>ToTTo</fixed-case>: A Controlled Table-To-Text Generation Dataset</title>
-      <author><first>Ankur</first><last>Parikh</last></author>
+      <author id="ankur-parikh"><first>Ankur</first><last>Parikh</last></author>
       <author><first>Xuezhi</first><last>Wang</last></author>
       <author><first>Sebastian</first><last>Gehrmann</last></author>
       <author><first>Manaal</first><last>Faruqui</last></author>
@@ -1265,7 +1265,7 @@
       <title>Reading Between the Lines: Exploring Infilling in Visual Narratives</title>
       <author><first>Khyathi Raghavi</first><last>Chandu</last></author>
       <author><first>Ruo-Ping</first><last>Dong</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <pages>1220–1229</pages>
       <abstract>Generating long form narratives such as stories and procedures from multiple modalities has been a long standing dream for artificial intelligence. In this regard, there is often crucial subtext that is derived from the surrounding contexts. The general seq2seq training methods render the models shorthanded while attempting to bridge the gap between these neighbouring contexts. In this paper, we tackle this problem by using infilling techniques involving prediction of missing steps in a narrative while generating textual descriptions from a sequence of images. We also present a new large scale visual procedure telling (ViPT) dataset with a total of 46,200 procedures and around 340k pairwise images and textual descriptions that is rich in such contextual dependencies. Generating steps using infilling technique demonstrates the effectiveness in visual procedures with more coherent texts. We conclusively show a METEOR score of 27.51 on procedures which is higher than the state-of-the-art on visual storytelling. We also demonstrate the effects of interposing new text with missing images during inference. The code and the dataset will be publicly available at <url>https://visual-narratives.github.io/Visual-Narratives/</url>.</abstract>
       <url hash="0f0dadd1">2020.emnlp-main.93</url>
@@ -1276,7 +1276,7 @@
     <paper id="94">
       <title>Acrostic Poem Generation</title>
       <author><first>Rajat</first><last>Agarwal</last></author>
-      <author><first>Katharina</first><last>Kann</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
       <pages>1230–1240</pages>
       <abstract>We propose a new task in the area of computational creativity: acrostic poem generation in English. Acrostic poems are poems that contain a hidden message; typically, the first letter of each line spells out a word or short phrase. We define the task as a generation task with multiple constraints: given an input word, 1) the initial letters of each line should spell out the provided word, 2) the poem’s semantics should also relate to it, and 3) the poem should conform to a rhyming scheme. We further provide a baseline model for the task, which consists of a conditional neural language model in combination with a neural rhyming model. Since no dedicated datasets for acrostic poem generation exist, we create training data for our task by first training a separate topic prediction model on a small set of topic-annotated poems and then predicting topics for additional poems. Our experiments show that the acrostic poems generated by our baseline are received well by humans and do not lose much quality due to the additional constraints. Last, we confirm that poems generated by our model are indeed closely related to the provided prompts, and that pretraining on Wikipedia can boost performance.</abstract>
       <url hash="9644e4b6">2020.emnlp-main.94</url>
@@ -1302,7 +1302,7 @@
       <title>Grounded Compositional Outputs for Adaptive Language Modeling</title>
       <author><first>Nikolaos</first><last>Pappas</last></author>
       <author><first>Phoebe</first><last>Mulcaire</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>1252–1267</pages>
       <abstract>Language models have emerged as a central component across NLP, and a great deal of progress depends on the ability to cheaply adapt them (e.g., through finetuning) to new domains and tasks. A language model’s <i>vocabulary</i>—typically selected before training and permanently fixed later—affects its size and is part of what makes it resistant to such adaptation. Prior work has used compositional input embeddings based on surface forms to ameliorate this issue. In this work, we go one step beyond and propose a fully compositional output embedding layer for language models, which is further grounded in information from a structured lexicon (WordNet), namely semantically related words and free-text definitions. To our knowledge, the result is the first word-level language model with a size that does not depend on the training vocabulary. We evaluate the model on conventional language modeling as well as challenging cross-domain settings with an open vocabulary, finding that it matches or outperforms previous state-of-the-art output embedding methods and adaptation approaches. Our analysis attributes the improvements to sample efficiency: our model is more accurate for low-frequency words.</abstract>
       <url hash="e277894f">2020.emnlp-main.96</url>
@@ -1339,7 +1339,7 @@
       <title>Scalable Multi-Hop Relational Reasoning for Knowledge-Aware Question Answering</title>
       <author><first>Yanlin</first><last>Feng</last></author>
       <author><first>Xinyue</first><last>Chen</last></author>
-      <author><first>Bill Yuchen</first><last>Lin</last></author>
+      <author id="bill-yuchen-lin"><first>Bill Yuchen</first><last>Lin</last></author>
       <author><first>Peifeng</first><last>Wang</last></author>
       <author><first>Jun</first><last>Yan</last></author>
       <author><first>Xiang</first><last>Ren</last></author>
@@ -1355,7 +1355,7 @@
       <title>Improving Bilingual Lexicon Induction for Low Frequency Words</title>
       <author><first>Jiaji</first><last>Huang</last></author>
       <author><first>Xingyu</first><last>Cai</last></author>
-      <author><first>Kenneth</first><last>Church</last></author>
+      <author id="kenneth-church"><first>Kenneth</first><last>Church</last></author>
       <pages>1310–1314</pages>
       <abstract>This paper designs a Monolingual Lexicon Induction task and observes that two factors accompany the degraded accuracy of bilingual lexicon induction for rare words. First, a diminishing margin between similarities in low frequency regime, and secondly, exacerbated hubness at low frequency. Based on the observation, we further propose two methods to address these two factors, respectively. The larger issue is hubness. Addressing that improves induction accuracy significantly, especially for low-frequency words.</abstract>
       <url hash="7b31022a">2020.emnlp-main.100</url>
@@ -1382,7 +1382,7 @@
       <author><first>Yuchen</first><last>Zhuang</last></author>
       <author><first>Jie</first><last>Lyu</last></author>
       <author><first>Tuo</first><last>Zhao</last></author>
-      <author><first>Chao</first><last>Zhang</last></author>
+      <author id="chao-zhang-tu"><first>Chao</first><last>Zhang</last></author>
       <pages>1326–1340</pages>
       <abstract>Fine-tuned pre-trained language models can suffer from severe miscalibration for both in-distribution and out-of-distribution (OOD) data due to over-parameterization. To mitigate this issue, we propose a regularized fine-tuning method. Our method introduces two types of regularization for better calibration: (1) On-manifold regularization, which generates pseudo on-manifold samples through interpolation within the data manifold. Augmented training with these pseudo samples imposes a smoothness regularization to improve in-distribution calibration. (2) Off-manifold regularization, which encourages the model to output uniform distributions for pseudo off-manifold samples to address the over-confidence issue for OOD data. Our experiments demonstrate that the proposed method outperforms existing calibration methods for text classification in terms of expectation calibration error, misclassification detection, and OOD detection on six datasets. Our code can be found at <url>https://github.com/Lingkai-Kong/Calibrated-BERT-Fine-Tuning</url>.</abstract>
       <url hash="54641d64">2020.emnlp-main.102</url>
@@ -1393,7 +1393,7 @@
     <paper id="103">
       <title>Scaling Hidden <fixed-case>M</fixed-case>arkov Language Models</title>
       <author><first>Justin</first><last>Chiu</last></author>
-      <author><first>Alexander</first><last>Rush</last></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last></author>
       <pages>1341–1349</pages>
       <abstract>The hidden Markov model (HMM) is a fundamental tool for sequence modeling that cleanly separates the hidden state from the emission structure. However, this separation makes it difficult to fit HMMs to large datasets in modern NLP, and they have fallen out of use due to very poor performance compared to fully observed models. This work revisits the challenge of scaling HMMs to language modeling datasets, taking ideas from recent approaches to neural modeling. We propose methods for scaling HMMs to massive state spaces while maintaining efficient exact inference, a compact parameterization, and effective regularization. Experiments show that this approach leads to models that are much more accurate than previous HMMs and n-gram-based methods, making progress towards the performance of state-of-the-art NN models.</abstract>
       <url hash="66a4f5ab">2020.emnlp-main.103</url>
@@ -1405,7 +1405,7 @@
       <title>Coding Textual Inputs Boosts the Accuracy of Neural Networks</title>
       <author><first>Abdul Rafae</first><last>Khan</last></author>
       <author><first>Jia</first><last>Xu</last></author>
-      <author><first>Weiwei</first><last>Sun</last></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last></author>
       <pages>1350–1360</pages>
       <abstract>Natural Language Processing (NLP) tasks are usually performed word by word on textual inputs. We can use arbitrary symbols to represent the linguistic meaning of a word and use these symbols as inputs. As “alternatives” to a text representation, we introduce Soundex, MetaPhone, NYSIIS, logogram to NLP, and develop fixed-output-length coding and its extension using Huffman coding. Each of those codings combines different character/digital sequences and constructs a new vocabulary based on codewords. We find that the integration of those codewords with text provides more reliable inputs to Neural-Network-based NLP systems through redundancy than text-alone inputs. Experiments demonstrate that our approach outperforms the state-of-the-art models on the application of machine translation, language modeling, and part-of-speech tagging. The source code is available at <url>https://github.com/abdulrafae/coding_nmt</url>.</abstract>
       <url hash="53b5562f">2020.emnlp-main.104</url>
@@ -1418,7 +1418,7 @@
       <author><first>Orion</first><last>Weller</last></author>
       <author><first>Nicholas</first><last>Lourie</last></author>
       <author><first>Matt</first><last>Gardner</last></author>
-      <author><first>Matthew E.</first><last>Peters</last></author>
+      <author id="matthew-e-peters"><first>Matthew E.</first><last>Peters</last></author>
       <pages>1361–1375</pages>
       <abstract>Typically, machine learning systems solve new tasks by training on thousands of examples. In contrast, humans can solve new tasks by reading some instructions, with perhaps an example or two. To take a step toward closing this gap, we introduce a framework for developing NLP systems that solve new tasks after reading their descriptions, synthesizing prior work in this area. We instantiate this frame- work with a new English language dataset, ZEST, structured for task-oriented evaluation on unseen tasks. Formulating task descriptions as questions, we ensure each is general enough to apply to many possible inputs, thus comprehensively evaluating a model’s ability to solve each task. Moreover, the dataset’s structure tests specific types of systematic generalization. We find that the state-of-the-art T5 model achieves a score of 12% on ZEST, leaving a significant challenge for NLP researchers.</abstract>
       <url hash="271873fc">2020.emnlp-main.105</url>
@@ -1563,7 +1563,7 @@
       <author><first>Yining</first><last>Wang</last></author>
       <author><first>Lu</first><last>Xiang</last></author>
       <author><first>Yu</first><last>Zhou</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>1490–1499</pages>
       <abstract>Medical entity normalization, which links medical mentions in the text to entities in knowledge bases, is an important research topic in medical natural language processing. In this paper, we focus on Chinese medical procedure entity normalization. However, nonstandard Chinese expressions and combined procedures present challenges in our problem. The existing strategies relying on the discriminative model are poorly to cope with normalizing combined procedure mentions. We propose a sequence generative framework to directly generate all the corresponding medical procedure entities. we adopt two strategies: category-based constraint decoding and category-based model refining to avoid unrealistic results. The method is capable of linking entities when a mention contains multiple procedure concepts and our comprehensive experiments demonstrate that the proposed model can achieve remarkable improvements over existing baselines, particularly significant in the case of multi-implication Chinese medical procedures.</abstract>
       <url hash="751712ff">2020.emnlp-main.116</url>
@@ -1577,7 +1577,7 @@
       <author><first>Saahil</first><last>Jain</last></author>
       <author><first>Pranav</first><last>Rajpurkar</last></author>
       <author><first>Anuj</first><last>Pareek</last></author>
-      <author><first>Andrew</first><last>Ng</last></author>
+      <author id="andrew-y-ng"><first>Andrew</first><last>Ng</last></author>
       <author><first>Matthew</first><last>Lungren</last></author>
       <pages>1500–1519</pages>
       <abstract>The extraction of labels from radiology text reports enables large-scale training of medical imaging models. Existing approaches to report labeling typically rely either on sophisticated feature engineering based on medical domain knowledge or manual annotations by experts. In this work, we introduce a BERT-based approach to medical image report labeling that exploits both the scale of available rule-based systems and the quality of expert annotations. We demonstrate superior performance of a biomedically pretrained BERT model first trained on annotations of a rule-based labeler and then finetuned on a small set of expert annotations augmented with automated backtranslation. We find that our final model, CheXbert, is able to outperform the previous best rules-based labeler with statistical significance, setting a new SOTA for report labeling on one of the largest datasets of chest x-rays.</abstract>
@@ -1621,7 +1621,7 @@
       <author><first>Haejun</first><last>Lee</last></author>
       <author><first>Drew A.</first><last>Hudson</last></author>
       <author><first>Kangwook</first><last>Lee</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>1551–1562</pages>
       <abstract>We introduce Sentence-level Language Modeling, a new pre-training objective for learning a discourse language representation in a fully self-supervised manner. Recent pre-training methods in NLP focus on learning either bottom or top-level language representations: contextualized word representations derived from language model objectives at one extreme and a whole sequence representation learned by order classification of two given textual segments at the other. However, these models are not directly encouraged to capture representations of intermediate-size structures that exist in natural languages such as sentences and the relationships among them. To that end, we propose a new approach to encourage learning of a contextualized sentence-level representation by shuffling the sequence of input sentences and training a hierarchical transformer model to reconstruct the original ordering. Through experiments on downstream tasks such as GLUE, SQuAD, and DiscoEval, we show that this feature of our model improves the performance of the original BERT by large margins.</abstract>
       <url hash="71f951ce">2020.emnlp-main.120</url>
@@ -1679,7 +1679,7 @@
     <paper id="125">
       <title>Compositional Phrase Alignment and Beyond</title>
       <author><first>Yuki</first><last>Arase</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>1611–1623</pages>
       <abstract>Phrase alignment is the basis for modelling sentence pair interactions, such as paraphrase and textual entailment recognition. Most phrase alignments are compositional processes such that an alignment of a phrase pair is constructed based on the alignments of their child phrases. Nonetheless, studies have revealed that non-compositional alignments involving long-distance phrase reordering are prevalent in practice. We address the phrase alignment problem by combining an unordered tree mapping algorithm and phrase representation modelling that explicitly embeds the similarity distribution in the sentences onto powerful contextualized representations. Experimental results demonstrate that our method effectively handles compositional and non-compositional global phrase alignments. Our method significantly outperforms that used in a previous study and achieves a performance competitive with that of experienced human annotators.</abstract>
       <url hash="772293a3">2020.emnlp-main.125</url>
@@ -1706,7 +1706,7 @@
       <title>Double Graph Based Reasoning for Document-level Relation Extraction</title>
       <author><first>Shuang</first><last>Zeng</last></author>
       <author><first>Runxin</first><last>Xu</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <author><first>Lei</first><last>Li</last></author>
       <pages>1630–1640</pages>
       <abstract>Document-level relation extraction aims to extract relations among entities within a document. Different from sentence-level relation extraction, it requires reasoning over multiple sentences across paragraphs. In this paper, we propose Graph Aggregation-and-Inference Network (GAIN), a method to recognize such relations for long paragraphs. GAIN constructs two graphs, a heterogeneous mention-level graph (MG) and an entity-level graph (EG). The former captures complex interaction among different mentions and the latter aggregates mentions underlying for the same entities. Based on the graphs we propose a novel path reasoning mechanism to infer relations between entities. Experiments on the public dataset, DocRED, show GAIN achieves a significant performance improvement (2.85 on F1) over the previous state-of-the-art. Our code is available at <url>https://github.com/PKUnlp-icler/GAIN</url>.</abstract>
@@ -1805,7 +1805,7 @@
     </paper>
     <paper id="134">
       <title>Beyond [<fixed-case>CLS</fixed-case>] through Ranking by Generation</title>
-      <author><first>Cicero</first><last>Nogueira dos Santos</last></author>
+      <author id="cicero-dos-santos"><first>Cicero</first><last>Nogueira dos Santos</last></author>
       <author><first>Xiaofei</first><last>Ma</last></author>
       <author><first>Ramesh</first><last>Nallapati</last></author>
       <author><first>Zhiheng</first><last>Huang</last></author>
@@ -1821,7 +1821,7 @@
       <title>Tired of Topic Models? Clusters of Pretrained Word Embeddings Make for Fast and Good Topics too!</title>
       <author><first>Suzanna</first><last>Sia</last></author>
       <author><first>Ayush</first><last>Dalmia</last></author>
-      <author><first>Sabrina J.</first><last>Mielke</last></author>
+      <author id="sabrina-j-mielke"><first>Sabrina J.</first><last>Mielke</last></author>
       <pages>1728–1736</pages>
       <abstract>Topic models are a useful analysis tool to uncover the underlying themes within document collections. The dominant approach is to use probabilistic topic models that posit a generative story, but in this paper we propose an alternative way to obtain topics: clustering pre-trained word embeddings while incorporating document information for weighted clustering and reranking top words. We provide benchmarks for the combination of different word embeddings and clustering algorithms, and analyse their performance under dimensionality reduction with PCA. The best performing combination for our approach performs as well as classical topic models, but with lower runtime and computational complexity.</abstract>
       <url hash="fef1e90c">2020.emnlp-main.135</url>
@@ -1884,7 +1884,7 @@
       <title>Incorporating Multimodal Information in Open-Domain Web Keyphrase Extraction</title>
       <author><first>Yansen</first><last>Wang</last></author>
       <author><first>Zhen</first><last>Fan</last></author>
-      <author><first>Carolyn</first><last>Rose</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rose</last></author>
       <pages>1790–1800</pages>
       <abstract>Open-domain Keyphrase extraction (KPE) on the Web is a fundamental yet complex NLP task with a wide range of practical applications within the field of Information Retrieval. In contrast to other document types, web page designs are intended for easy navigation and information finding. Effective designs encode within the layout and formatting signals that point to where the important information can be found. In this work, we propose a modeling approach that leverages these multi-modal signals to aid in the KPE task. In particular, we leverage both lexical and visual features (e.g., size, font, position) at the micro-level to enable effective strategy induction and meta-level features that describe pages at a macro-level to aid in strategy selection. Our evaluation demonstrates that a combination of effective strategy induction and strategy selection within this approach for the KPE task outperforms state-of-the-art models. A qualitative post-hoc analysis illustrates how these features function within the model.</abstract>
       <url hash="b7ec773b">2020.emnlp-main.140</url>
@@ -1953,7 +1953,7 @@
       <title><fixed-case>B</fixed-case>i<fixed-case>ST</fixed-case>: Bi-directional Spatio-Temporal Reasoning for Video-Grounded Dialogues</title>
       <author><first>Hung</first><last>Le</last></author>
       <author><first>Doyen</first><last>Sahoo</last></author>
-      <author><first>Nancy</first><last>Chen</last></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last></author>
       <author><first>Steven C.H.</first><last>Hoi</last></author>
       <pages>1846–1859</pages>
       <abstract>Video-grounded dialogues are very challenging due to (i) the complexity of videos which contain both spatial and temporal variations, and (ii) the complexity of user utterances which query different segments and/or different objects in videos over multiple dialogue turns. However, existing approaches to video-grounded dialogues often focus on superficial temporal-level visual cues, but neglect more fine-grained spatial signals from videos. To address this drawback, we proposed Bi-directional Spatio-Temporal Learning (BiST), a vision-language neural framework for high-resolution queries in videos based on textual cues. Specifically, our approach not only exploits both spatial and temporal-level information, but also learns dynamic information diffusion between the two feature spaces through spatial-to-temporal and temporal-to-spatial reasoning. The bidirectional strategy aims to tackle the evolving semantics of user queries in the dialogue setting. The retrieved visual cues are used as contextual information to construct relevant responses to the users. Our empirical results and comprehensive qualitative analysis show that BiST achieves competitive performance and generates reasonable responses on a large-scale AVSD benchmark. We also adapt our BiST models to the Video QA setting, and substantially outperform prior approaches on the TGIF-QA benchmark.</abstract>
@@ -1967,7 +1967,7 @@
       <author><first>Hung</first><last>Le</last></author>
       <author><first>Doyen</first><last>Sahoo</last></author>
       <author><first>Chenghao</first><last>Liu</last></author>
-      <author><first>Nancy</first><last>Chen</last></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last></author>
       <author><first>Steven C.H.</first><last>Hoi</last></author>
       <pages>1860–1877</pages>
       <abstract>Building an end-to-end conversational agent for multi-domain task-oriented dialogues has been an open challenge for two main reasons. First, tracking dialogue states of multiple domains is non-trivial as the dialogue agent must obtain complete states from all relevant domains, some of which might have shared slots among domains as well as unique slots specifically for one domain only. Second, the dialogue agent must also process various types of information across domains, including dialogue context, dialogue states, and database, to generate natural responses to users. Unlike the existing approaches that are often designed to train each module separately, we propose “UniConv” - a novel unified neural architecture for end-to-end conversational systems in multi-domain task-oriented dialogues, which is designed to jointly train (i) a Bi-level State Tracker which tracks dialogue states by learning signals at both slot and domain level independently, and (ii) a Joint Dialogue Act and Response Generator which incorporates information from various input components and models dialogue acts and target responses simultaneously. We conduct comprehensive experiments in dialogue state tracking, context-to-text, and end-to-end settings on the MultiWOZ2.1 benchmark, achieving superior performance over competitive baselines.</abstract>
@@ -1997,7 +1997,7 @@
       <author><first>Feng</first><last>Shi</last></author>
       <author><first>Tao</first><last>Yuan</last></author>
       <author><first>Zhou</first><last>Yu</last></author>
-      <author><first>Song-Chun</first><last>Zhu</last></author>
+      <author id="song-chun-zhu"><first>Song-Chun</first><last>Zhu</last></author>
       <pages>1889–1899</pages>
       <abstract>Inducing a meaningful structural representation from one or a set of dialogues is a crucial but challenging task in computational linguistics. Advancement made in this area is critical for dialogue system design and discourse analysis. It can also be extended to solve grammatical inference. In this work, we propose to incorporate structured attention layers into a Variational Recurrent Neural Network (VRNN) model with discrete latent states to learn dialogue structure in an unsupervised fashion. Compared to a vanilla VRNN, structured attention enables a model to focus on different parts of the source sentence embeddings while enforcing a structural inductive bias. Experiments show that on two-party dialogue datasets, VRNN with structured attention learns semantic structures that are similar to templates used to generate this dialogue corpus. While on multi-party dialogue datasets, our model learns an interactive structure demonstrating its capability of distinguishing speakers or addresses, automatically disentangling dialogues without explicit human annotation.</abstract>
       <url hash="cce58ed1">2020.emnlp-main.148</url>
@@ -2014,7 +2014,7 @@
       <author><first>Xiaozhong</first><last>Liu</last></author>
       <author><first>Changlong</first><last>Sun</last></author>
       <author><first>Conghui</first><last>Zhu</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <pages>1900–1910</pages>
       <abstract>In the past few years, audiences from different fields witness the achievements of sequence-to-sequence models (e.g., LSTM+attention, Pointer Generator Networks and Transformer) to enhance dialogue content generation. While content fluency and accuracy often serve as the major indicators for model training, dialogue logics, carrying critical information for some particular domains, are often ignored. Take customer service and court debate dialogue as examples, compatible logics can be observed across different dialogue instances, and this information can provide vital evidence for utterance generation. In this paper, we propose a novel network architecture - Cross Copy Networks (CCN) to explore the current dialog context and similar dialogue instances’ logical structure simultaneously. Experiments with two tasks, court debate and customer service content generation, proved that the proposed algorithm is superior to existing state-of-art content generation models.</abstract>
       <url hash="408fb65e">2020.emnlp-main.149</url>
@@ -2027,7 +2027,7 @@
       <author><first>Qi</first><last>Jia</last></author>
       <author><first>Yizhu</first><last>Liu</last></author>
       <author><first>Siyu</first><last>Ren</last></author>
-      <author><first>Kenny</first><last>Zhu</last></author>
+      <author id="kenny-zhu"><first>Kenny</first><last>Zhu</last></author>
       <author><first>Haifeng</first><last>Tang</last></author>
       <pages>1911–1920</pages>
       <abstract>Multi-turn response selection is a task designed for developing dialogue agents. The performance on this task has a remarkable improvement with pre-trained language models. However, these models simply concatenate the turns in dialogue history as the input and largely ignore the dependencies between the turns. In this paper, we propose a dialogue extraction algorithm to transform a dialogue history into threads based on their dependency relations. Each thread can be regarded as a self-contained sub-dialogue. We also propose Thread-Encoder model to encode threads and candidates into compact representations by pre-trained Transformers and finally get the matching score through an attention layer. The experiments show that dependency relations are helpful for dialogue context understanding, and our model outperforms the state-of-the-art baselines on both DSTC7 and DSTC8*, with competitive results on UbuntuV2.</abstract>
@@ -2068,7 +2068,7 @@
       <author><first>Mandar</first><last>Joshi</last></author>
       <author><first>John</first><last>Thickstun</last></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>1938–1952</pages>
       <abstract>Decisions of complex models for language understanding can be explained by limiting the inputs they are provided to a relevant subsequence of the original text — a rationale. Models that condition predictions on a concise rationale, while being more interpretable, tend to be less accurate than models that are able to use the entire context. In this paper, we show that it is possible to better manage the trade-off between concise explanations and high task accuracy by optimizing a bound on the Information Bottleneck (IB) objective. Our approach jointly learns an explainer that predicts sparse binary masks over input sentences without explicit supervision, and an end-task predictor that considers only the residual sentences. Using IB, we derive a learning objective that allows direct control of mask sparsity levels through a tunable sparse prior. Experiments on the ERASER benchmark demonstrate significant gains over previous work for both task performance and agreement with human rationales. Furthermore, we find that in the semi-supervised setting, a modest amount of gold rationales (25% of training examples with gold masks) can close the performance gap with a model that uses the full input.</abstract>
       <url hash="7ca4716f">2020.emnlp-main.153</url>
@@ -2081,7 +2081,7 @@
       <author><first>Nikita</first><last>Nangia</last></author>
       <author><first>Clara</first><last>Vania</last></author>
       <author><first>Rasika</first><last>Bhalerao</last></author>
-      <author><first>Samuel R.</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel R.</first><last>Bowman</last></author>
       <pages>1953–1967</pages>
       <abstract>Pretrained language models, especially masked language models (MLMs) have seen success across many NLP tasks. However, there is ample evidence that they use the cultural biases that are undoubtedly present in the corpora they are trained on, implicitly creating harm with biased representations. To measure some forms of social bias in language models against protected demographic groups in the US, we introduce the Crowdsourced Stereotype Pairs benchmark (CrowS-Pairs). CrowS-Pairs has 1508 examples that cover stereotypes dealing with nine types of bias, like race, religion, and age. In CrowS-Pairs a model is presented with two sentences: one that is more stereotyping and another that is less stereotyping. The data focuses on stereotypes about historically disadvantaged groups and contrasts them with advantaged groups. We find that all three of the widely-used MLMs we evaluate substantially favor sentences that express stereotypes in every category in CrowS-Pairs. As work on building less biased models advances, this dataset can be used as a benchmark to evaluate progress.</abstract>
       <url hash="58544784">2020.emnlp-main.154</url>
@@ -2107,7 +2107,7 @@
       <author><first>Michael</first><last>Hahn</last></author>
       <author><first>Surya</first><last>Ganguli</last></author>
       <author><first>Percy</first><last>Liang</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>1978–2010</pages>
       <abstract>Recurrent neural networks empirically generate natural language with high syntactic fidelity. However, their success is not well-understood theoretically. We provide theoretical insight into this success, proving in a finite-precision setting that RNNs can efficiently generate bounded hierarchical languages that reflect the scaffolding of natural language syntax. We introduce Dyck-<tex-math>(k,m)</tex-math>, the language of well-nested brackets (of <tex-math>k</tex-math> types) and <tex-math>m</tex-math>-bounded nesting depth, reflecting the bounded memory needs and long-distance dependencies of natural language syntax. The best known results use <tex-math>O(k^{\frac{m}{2}})</tex-math> memory (hidden units) to generate these languages. We prove that an RNN with <tex-math>O(m \log k)</tex-math> hidden units suffices, an exponential reduction in memory, by an explicit construction. Finally, we show that no algorithm, even with unbounded computation, can suffice with <tex-math>o(m \log k)</tex-math> hidden units.</abstract>
       <url hash="6eb60a5f">2020.emnlp-main.156</url>
@@ -2266,7 +2266,7 @@
       <title>Retrofitting Structure-aware Transformer Language Model for End Tasks</title>
       <author><first>Hao</first><last>Fei</last></author>
       <author><first>Yafeng</first><last>Ren</last></author>
-      <author><first>Donghong</first><last>Ji</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
       <pages>2151–2161</pages>
       <abstract>We consider retrofitting structure-aware Transformer language model for facilitating end tasks by proposing to exploit syntactic distance to encode both the phrasal constituency and dependency connection into the language model. A middle-layer structural learning strategy is leveraged for structure integration, accomplished with main semantic task training under multi-task learning scheme. Experimental results show that the retrofitted structure-aware Transformer language model achieves improved perplexity, meanwhile inducing accurate syntactic phrases. By performing structure-aware fine-tuning, our model achieves significant improvements for both semantic- and syntactic-dependent tasks.</abstract>
       <url hash="bb986069">2020.emnlp-main.168</url>
@@ -2280,7 +2280,7 @@
       <author><first>Zhijiang</first><last>Guo</last></author>
       <author><first>Zhiyang</first><last>Teng</last></author>
       <author><first>Wei</first><last>Lu</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <author><first>Zuozhu</first><last>Liu</last></author>
       <author><first>Lidong</first><last>Bing</last></author>
       <pages>2162–2172</pages>
@@ -2307,7 +2307,7 @@
       <title>Understanding the Mechanics of <fixed-case>SPIGOT</fixed-case>: Surrogate Gradients for Latent Structure Learning</title>
       <author><first>Tsvetomila</first><last>Mihaylova</last></author>
       <author><first>Vlad</first><last>Niculae</last></author>
-      <author><first>André F. T.</first><last>Martins</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
       <pages>2186–2202</pages>
       <abstract>Latent structure models are a powerful tool for modeling language data: they can mitigate the error propagation and annotation bottleneck in pipeline systems, while simultaneously uncovering linguistic insights about the data. One challenge with end-to-end training of these models is the argmax operation, which has null gradient. In this paper, we focus on surrogate gradients, a popular strategy to deal with this problem. We explore latent structure learning through the angle of pulling back the downstream learning objective. In this paradigm, we discover a principled motivation for both the straight-through estimator (STE) as well as the recently-proposed SPIGOT – a variant of STE for structured models. Our perspective leads to new algorithms in the same family. We empirically compare the known and the novel pulled-back estimators against the popular alternatives, yielding new insight for practitioners and revealing intriguing failure cases.</abstract>
       <url hash="be4f9c2e">2020.emnlp-main.171</url>
@@ -2347,7 +2347,7 @@
       <author><first>Tao</first><last>Lin</last></author>
       <author><first>Fei</first><last>Mi</last></author>
       <author><first>Martin</first><last>Jaggi</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>2226–2241</pages>
       <abstract>We present an efficient method of utilizing pretrained language models, where we learn selective binary masks for pretrained weights in lieu of modifying them through finetuning. Extensive evaluations of masking BERT, RoBERTa, and DistilBERT on eleven diverse NLP tasks show that our masking scheme yields performance comparable to finetuning, yet has a much smaller memory footprint when several tasks need to be inferred. Intrinsic evaluations show that representations computed by our binary masked language models encode information necessary for solving downstream tasks. Analyzing the loss landscape, we show that masking and finetuning produce models that reside in minima that can be connected by a line segment with nearly constant test accuracy. This confirms that masking can be utilized as an efficient alternative to finetuning.</abstract>
       <url hash="15c53fc4">2020.emnlp-main.174</url>
@@ -2360,7 +2360,7 @@
       <author><first>Xiaomian</first><last>Kang</last></author>
       <author><first>Yang</first><last>Zhao</last></author>
       <author><first>Jiajun</first><last>Zhang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>2242–2254</pages>
       <abstract>Document-level neural machine translation has yielded attractive improvements. However, majority of existing methods roughly use all context sentences in a fixed scope. They neglect the fact that different source sentences need different sizes of context. To address this problem, we propose an effective approach to select dynamic context so that the document-level translation model can utilize the more useful selected context sentences to produce better translations. Specifically, we introduce a selection module that is independent of the translation module to score each candidate context sentence. Then, we propose two strategies to explicitly select a variable number of context sentences and feed them into the translation module. We train the two modules end-to-end via reinforcement learning. A novel reward is proposed to encourage the selection and utilization of dynamic context sentences. Experiments demonstrate that our approach can select adaptive context sentences for different source sentences, and significantly improves the performance of document-level translation methods.</abstract>
       <url hash="071deca8">2020.emnlp-main.175</url>
@@ -2386,7 +2386,7 @@
     <paper id="177">
       <title>Pronoun-Targeted Fine-tuning for <fixed-case>NMT</fixed-case> with Hybrid Losses</title>
       <author><first>Prathyusha</first><last>Jwalapuram</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <author><first>Youlin</first><last>Shen</last></author>
       <pages>2267–2279</pages>
       <abstract>Popular Neural Machine Translation model training uses strategies like backtranslation to improve BLEU scores, requiring large amounts of additional data and training. We introduce a class of conditional generative-discriminative hybrid losses that we use to fine-tune a trained machine translation model. Through a combination of targeted fine-tuning objectives and intuitive re-use of the training data the model has failed to adequately learn from, we improve the model performance of both a sentence-level and a contextual model without using any additional data. We target the improvement of pronoun translations through our fine-tuning and evaluate our models on a pronoun benchmark testset. Our sentence-level model shows a 0.5 BLEU improvement on both the WMT14 and the IWSLT13 De-En testsets, while our contextual model achieves the best results, improving from 31.81 to 32 BLEU on WMT14 De-En testset, and from 32.10 to 33.13 on the IWSLT13 De-En testset, with corresponding improvements in pronoun translation. We further show the generalizability of our method by reproducing the improvements on two additional language pairs, Fr-En and Cs-En.</abstract>
@@ -2431,7 +2431,7 @@
       <author><first>Ahmet</first><last>Üstün</last></author>
       <author><first>Arianna</first><last>Bisazza</last></author>
       <author><first>Gosse</first><last>Bouma</last></author>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <pages>2302–2315</pages>
       <abstract>Recent advances in multilingual dependency parsing have brought the idea of a truly universal parser closer to reality. However, cross-language interference and restrained model capacity remain major obstacles. To address this, we propose a novel multilingual task adaptation approach based on contextual parameter generation and adapter modules. This approach enables to learn adapters via language embeddings while sharing model parameters across languages. It also allows for an easy but effective integration of existing linguistic typology features into the parsing network. The resulting parser, UDapter, outperforms strong monolingual and multilingual baselines on the majority of both high-resource and low-resource (zero-shot) languages, showing the success of the proposed adaptation approach. Our in-depth analyses show that soft parameter sharing via typological features is key to this success.</abstract>
       <url hash="27e0a31d">2020.emnlp-main.180</url>
@@ -2447,7 +2447,7 @@
       <author><first>Zhengyan</first><last>Li</last></author>
       <author><first>Zichu</first><last>Fei</last></author>
       <author><first>Yeyun</first><last>Gong</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>2316–2326</pages>
       <abstract>Conditional random fields (CRF) for label decoding has become ubiquitous in sequence labeling tasks. However, the local label dependencies and inefficient Viterbi decoding have always been a problem to be solved. In this work, we introduce a novel two-stage label decoding framework to model long-term label dependencies, while being much more computationally efficient. A base model first predicts draft labels, and then a novel two-stream self-attention model makes refinements on these draft predictions based on long-range label dependencies, which can achieve parallel decoding for a faster prediction. In addition, in order to mitigate the side effects of incorrect draft labels, Bayesian neural networks are used to indicate the labels with a high probability of being wrong, which can greatly assist in preventing error propagation. The experimental results on three sequence labeling benchmarks demonstrated that the proposed method not only outperformed the CRF-based methods but also greatly accelerated the inference process.</abstract>
       <url hash="1f05e790">2020.emnlp-main.181</url>
@@ -2470,7 +2470,7 @@
     </paper>
     <paper id="183">
       <title>Position-Aware Tagging for Aspect Sentiment Triplet Extraction</title>
-      <author id="lu-xu"><first>Lu</first><last>Xu</last></author>
+      <author><first>Lu</first><last>Xu</last></author>
       <author><first>Hao</first><last>Li</last></author>
       <author><first>Wei</first><last>Lu</last></author>
       <author><first>Lidong</first><last>Bing</last></author>
@@ -2487,7 +2487,7 @@
       <author><first>Ozan</first><last>Caglayan</last></author>
       <author><first>Julia</first><last>Ive</last></author>
       <author><first>Veneta</first><last>Haralampieva</last></author>
-      <author><first>Pranava</first><last>Madhyastha</last></author>
+      <author id="pranava-swaroop-madhyastha"><first>Pranava</first><last>Madhyastha</last></author>
       <author><first>Loïc</first><last>Barrault</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>2350–2361</pages>
@@ -2527,7 +2527,7 @@
     </paper>
     <paper id="187">
       <title>Bridging Linguistic Typology and Multilingual Machine Translation with Multi-View Language Representations</title>
-      <author><first>Arturo</first><last>Oncevay</last></author>
+      <author id="arturo-oncevay"><first>Arturo</first><last>Oncevay</last></author>
       <author><first>Barry</first><last>Haddow</last></author>
       <author><first>Alexandra</first><last>Birch</last></author>
       <pages>2391–2406</pages>
@@ -2554,7 +2554,7 @@
       <author><first>Yeon</first><last>Seonwoo</last></author>
       <author><first>Ji-Hoon</first><last>Kim</last></author>
       <author><first>Jung-Woo</first><last>Ha</last></author>
-      <author><first>Alice</first><last>Oh</last></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last></author>
       <pages>2418–2428</pages>
       <abstract>Extractive QA models have shown very promising performance in predicting the correct answer to a question for a given passage. However, they sometimes result in predicting the correct answer text but in a context irrelevant to the given question. This discrepancy becomes especially important as the number of occurrences of the answer text in a passage increases. To resolve this issue, we propose BLANC (BLock AttentioN for Context prediction) based on two main ideas: context prediction as an auxiliary task in multi-task learning manner, and a block attention method that learns the context prediction task. With experiments on reading comprehension, we show that BLANC outperforms the state-of-the-art QA models, and the performance gap increases as the number of answer text occurrences increases. We also conduct an experiment of training the models using SQuAD and predicting the supporting facts on HotpotQA and show that BLANC outperforms all baseline models in this zero-shot setting.</abstract>
       <url hash="143e33f2">2020.emnlp-main.189</url>
@@ -2578,7 +2578,7 @@
       <author><first>Yifan</first><last>Gao</last></author>
       <author><first>Chien-Sheng</first><last>Wu</last></author>
       <author><first>Jingjing</first><last>Li</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <author><first>Steven C.H.</first><last>Hoi</last></author>
       <author><first>Caiming</first><last>Xiong</last></author>
       <author><first>Irwin</first><last>King</last></author>
@@ -2646,10 +2646,10 @@
     <paper id="196">
       <title>Improving <fixed-case>AMR</fixed-case> Parsing with Sequence-to-Sequence Pre-training</title>
       <author><first>Dongqin</first><last>Xu</last></author>
-      <author><first>Junhui</first><last>Li</last></author>
+      <author id="junhui-li"><first>Junhui</first><last>Li</last></author>
       <author><first>Muhua</first><last>Zhu</last></author>
       <author><first>Min</first><last>Zhang</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>2501–2511</pages>
       <abstract>In the literature, the research on abstract meaning representation (AMR) parsing is much restricted by the size of human-curated dataset which is critical to build an AMR parser with good performance. To alleviate such data size restriction, pre-trained models have been drawing more and more attention in AMR parsing. However, previous pre-trained models, like BERT, are implemented for general purpose which may not work as expected for the specific task of AMR parsing. In this paper, we focus on sequence-to-sequence (seq2seq) AMR parsing and propose a seq2seq pre-training approach to build pre-trained models in both single and joint way on three relevant tasks, i.e., machine translation, syntactic parsing, and AMR parsing itself. Moreover, we extend the vanilla fine-tuning method to a multi-task learning fine-tuning method that optimizes for the performance of AMR parsing while endeavors to preserve the response of pre-trained models. Extensive experimental results on two English benchmark datasets show that both the single and joint pre-trained models significantly improve the performance (e.g., from 71.5 to 80.2 on AMR 2.0), which reaches the state of the art. The result is very encouraging since we achieve this with seq2seq models rather than complex models. We make our code and model available at <url>https://github.com/xdqkid/S2S-AMR-Parser</url>.</abstract>
       <url hash="f9c8204a">2020.emnlp-main.196</url>
@@ -2674,7 +2674,7 @@
       <author><first>Sungjoon</first><last>Park</last></author>
       <author><first>Kiwoong</first><last>Park</last></author>
       <author><first>Jaimeen</first><last>Ahn</last></author>
-      <author><first>Alice</first><last>Oh</last></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last></author>
       <pages>2523–2531</pages>
       <abstract>We analyze social media for detecting the suicidal risk of military personnel, which is especially crucial for countries with compulsory military service such as the Republic of Korea. From a widely-used Korean social Q&amp;A site, we collect posts containing military-relevant content written by active-duty military personnel. We then annotate the posts with two groups of experts: military experts and mental health experts. Our dataset includes 2,791 posts with 13,955 corresponding expert annotations of suicidal risk levels, and this dataset is available to researchers who consent to research ethics agreement. Using various fine-tuned state-of-the-art language models, we predict the level of suicide risk, reaching .88 F1 score for classifying the risks.</abstract>
       <url hash="9aaed6c1">2020.emnlp-main.198</url>
@@ -2720,7 +2720,7 @@
     <paper id="202">
       <title>Self-Induced Curriculum Learning in Self-Supervised Neural Machine Translation</title>
       <author><first>Dana</first><last>Ruiter</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <author><first>Cristina</first><last>España-Bonet</last></author>
       <pages>2560–2571</pages>
       <abstract>Self-supervised neural machine translation (SSNMT) jointly learns to identify and select suitable training data from comparable (rather than parallel) corpora and to translate, in a way that the two tasks support each other in a virtuous circle. In this study, we provide an in-depth analysis of the sampling choices the SSNMT model makes during training. We show how, without it having been told to do so, the model self-selects samples of increasing (i) complexity and (ii) task-relevance in combination with (iii) performing a denoising curriculum. We observe that the dynamics of the mutual-supervision signals of both system internal representation types are vital for the extraction and translation performance. We show that in terms of the Gunning-Fog Readability index, SSNMT starts extracting and learning from Wikipedia data suitable for high school students and quickly moves towards content suitable for first year undergraduate students.</abstract>
@@ -2732,7 +2732,7 @@
     <paper id="203">
       <title>Towards Reasonably-Sized Character-Level Transformer <fixed-case>NMT</fixed-case> by Finetuning Subword Systems</title>
       <author><first>Jindřich</first><last>Libovický</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>2572–2579</pages>
       <abstract>Applying the Transformer architecture on the character level usually requires very deep architectures that are difficult and slow to train. These problems can be partially overcome by incorporating a segmentation into tokens in the model. We show that by initially training a subword model and then finetuning it on characters, we can obtain a neural machine translation model that works at the character level without requiring token segmentation. We use only the vanilla 6-layer Transformer Base architecture. Our character-level models better capture morphological phenomena and show more robustness to noise at the expense of somewhat worse overall translation quality. Our study is a significant step towards high-performance and easy to train character-based models that are not extremely large.</abstract>
       <url hash="f0722a0c">2020.emnlp-main.203</url>
@@ -2744,9 +2744,9 @@
     <paper id="204">
       <title>Transfer Learning and Distant Supervision for Multilingual Transformer Models: A Study on <fixed-case>A</fixed-case>frican Languages</title>
       <author><first>Michael A.</first><last>Hedderich</last></author>
-      <author><first>David I.</first><last>Adelani</last></author>
+      <author id="david-ifeoluwa-adelani"><first>David I.</first><last>Adelani</last></author>
       <author><first>Dawei</first><last>Zhu</last></author>
-      <author><first>Jesujoba</first><last>Alabi</last></author>
+      <author id="jesujoba-alabi"><first>Jesujoba</first><last>Alabi</last></author>
       <author><first>Udia</first><last>Markus</last></author>
       <author><first>Dietrich</first><last>Klakow</last></author>
       <pages>2580–2591</pages>
@@ -2760,7 +2760,7 @@
     <paper id="205">
       <title>Translation Quality Estimation by Jointly Learning to Score and Rank</title>
       <author><first>Jingyi</first><last>Zhang</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>2592–2598</pages>
       <abstract>The translation quality estimation (QE) task, particularly the QE as a Metric task, aims to evaluate the general quality of a translation based on the translation and the source sentence without using reference translations. Supervised learning of this QE task requires human evaluation of translation quality as training data. Human evaluation of translation quality can be performed in different ways, including assigning an absolute score to a translation or ranking different translations. In order to make use of different types of human evaluation data for supervised learning, we present a multi-task learning QE model that jointly learns two tasks: score a translation and rank two translations. Our QE model exploits cross-lingual sentence embeddings from pre-trained multilingual language models. We obtain new state-of-the-art results on the WMT 2019 QE as a Metric task and outperform sentBLEU on the WMT 2019 Metrics task.</abstract>
       <url hash="3b065fad">2020.emnlp-main.205</url>
@@ -2775,7 +2775,7 @@
       <author><first>Joan Albert</first><last>Silvestre-Cerdà</last></author>
       <author><first>Pau</first><last>Baquero-Arnal</last></author>
       <author><first>Jorge</first><last>Civera Saiz</last></author>
-      <author><first>Alfons</first><last>Juan</last></author>
+      <author id="alfons-juan"><first>Alfons</first><last>Juan</last></author>
       <pages>2599–2611</pages>
       <abstract>The cascade approach to Speech Translation (ST) is based on a pipeline that concatenates an Automatic Speech Recognition (ASR) system followed by a Machine Translation (MT) system. These systems are usually connected by a segmenter that splits the ASR output into hopefully, semantically self-contained chunks to be fed into the MT system. This is specially challenging in the case of streaming ST, where latency requirements must also be taken into account. This work proposes novel segmentation models for streaming ST that incorporate not only textual, but also acoustic information to decide when the ASR output is split into a chunk. An extensive and throughly experimental setup is carried out on the Europarl-ST dataset to prove the contribution of acoustic information to the performance of the segmentation model in terms of BLEU score in a streaming ST scenario. Finally, comparative results with previous work also show the superiority of the segmentation models proposed in this work.</abstract>
       <url hash="6f6a5d91">2020.emnlp-main.206</url>
@@ -2820,7 +2820,7 @@
       <author><first>Maria</first><last>Barrett</last></author>
       <author><first>Rasmus</first><last>Hvingelby</last></author>
       <author><first>Kellie</first><last>Webster</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>2637–2648</pages>
       <abstract>The one-sided focus on English in previous studies of gender bias in NLP misses out on opportunities in other languages: English challenge datasets such as GAP and WinoGender highlight model preferences that are “hallucinatory”, e.g., disambiguating gender-ambiguous occurrences of ‘doctor’ as male doctors. We show that for languages with type B reflexivization, e.g., Swedish and Russian, we can construct multi-task challenge datasets for detecting gender bias that lead to unambiguously wrong model predictions: In these languages, the direct translation of ‘the doctor removed his mask’ is not ambiguous between a coreferential reading and a disjoint reading. Instead, the coreferential reading requires a non-gendered pronoun, and the gendered, possessive pronouns are anti-reflexive. We present a multilingual, multi-task challenge dataset, which spans four languages and four NLP tasks and focuses only on this phenomenon. We find evidence for gender bias across all task-language combinations and correlate model bias with national labor market statistics.</abstract>
       <url hash="69926c37">2020.emnlp-main.209</url>
@@ -2873,7 +2873,7 @@
       <author><first>Ricardo</first><last>Rei</last></author>
       <author><first>Craig</first><last>Stewart</last></author>
       <author><first>Ana C</first><last>Farinha</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <pages>2685–2702</pages>
       <abstract>We present COMET, a neural framework for training multilingual machine translation evaluation models which obtains new state-of-the-art levels of correlation with human judgements. Our framework leverages recent breakthroughs in cross-lingual pretrained language modeling resulting in highly multilingual and adaptable MT evaluation models that exploit information from both the source input and a target-language reference translation in order to more accurately predict MT quality. To showcase our framework, we train three models with different types of human judgements: Direct Assessments, Human-mediated Translation Edit Rate and Multidimensional Quality Metric. Our models achieve new state-of-the-art performance on the WMT 2019 Metrics shared task and demonstrate robustness to high-performing systems.</abstract>
       <url hash="63c66bdb">2020.emnlp-main.213</url>
@@ -2885,7 +2885,7 @@
       <title><fixed-case>R</fixed-case>eusing a <fixed-case>P</fixed-case>retrained <fixed-case>L</fixed-case>anguage <fixed-case>M</fixed-case>odel on <fixed-case>L</fixed-case>anguages with <fixed-case>L</fixed-case>imited <fixed-case>C</fixed-case>orpora for <fixed-case>U</fixed-case>nsupervised <fixed-case>NMT</fixed-case></title>
       <author><first>Alexandra</first><last>Chronopoulou</last></author>
       <author><first>Dario</first><last>Stojanovski</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>2703–2711</pages>
       <abstract>Using a language model (LM) pretrained on two languages with large monolingual data in order to initialize an unsupervised neural machine translation (UNMT) system yields state-of-the-art results. When limited data is available for one language, however, this method leads to poor translations. We present an effective approach that reuses an LM that is pretrained only on the high-resource language. The monolingual LM is fine-tuned on both languages and is then used to initialize a UNMT model. To reuse the pretrained LM, we have to modify its predefined vocabulary, to account for the new language. We therefore propose a novel vocabulary extension method. Our approach, RE-LM, outperforms a competitive cross-lingual pretraining model (XLM) in English-Macedonian (En-Mk) and English-Albanian (En-Sq), yielding more than +8.3 BLEU points for all four translation directions.</abstract>
       <url hash="4aecdee7">2020.emnlp-main.214</url>
@@ -2896,9 +2896,9 @@
     </paper>
     <paper id="215">
       <title><fixed-case>LNM</fixed-case>ap: Departures from Isomorphic Assumption in Bilingual Lexicon Induction Through Non-Linear Mapping in Latent Space</title>
-      <author><first>Tasnim</first><last>Mohiuddin</last></author>
+      <author id="muhammad-tasnim-mohiuddin"><first>Tasnim</first><last>Mohiuddin</last></author>
       <author><first>M Saiful</first><last>Bari</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <pages>2712–2723</pages>
       <abstract>Most of the successful and predominant methods for Bilingual Lexicon Induction (BLI) are mapping-based, where a linear mapping function is learned with the assumption that the word embedding spaces of different languages exhibit similar geometric structures (i.e. approximately isomorphic). However, several recent studies have criticized this simplified assumption showing that it does not hold in general even for closely related languages. In this work, we propose a novel semi-supervised method to learn cross-lingual word embeddings for BLI. Our model is independent of the isomorphic assumption and uses non-linear mapping in the latent space of two independently pre-trained autoencoders. Through extensive experiments on fifteen (15) different language pairs (in both directions) comprising resource-rich and low-resource languages from two different datasets, we demonstrate that our method outperforms existing models by a good margin. Ablation studies show the importance of different model components and the necessity of non-linear mapping.</abstract>
       <url hash="7cd87708">2020.emnlp-main.215</url>
@@ -2937,7 +2937,7 @@
     </paper>
     <paper id="218">
       <title>Parsing Gapping Constructions Based on Grammatical and Semantic Roles</title>
-      <author><first>Yoshihide</first><last>Kato</last></author>
+      <author id="yoshihide-kato"><first>Yoshihide</first><last>Kato</last></author>
       <author><first>Shigeki</first><last>Matsubara</last></author>
       <pages>2747–2752</pages>
       <abstract>A gapping construction consists of a coordinated structure where redundant elements are elided from all but one conjuncts. This paper proposes a method of parsing sentences with gapping to recover elided elements. The proposed method is based on constituent trees annotated with grammatical and semantic roles that are useful for identifying elided elements. Our method outperforms the previous method in terms of F-measure and recall.</abstract>
@@ -2959,7 +2959,7 @@
     </paper>
     <paper id="220">
       <title>Some Languages Seem Easier to Parse Because Their Treebanks Leak</title>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>2765–2770</pages>
       <abstract>Cross-language differences in (universal) dependency parsing performance are mostly attributed to treebank size, average sentence length, average dependency length, morphological complexity, and domain differences. We point at a factor not previously discussed: If we abstract away from words and dependency labels, how many graphs in the test data were seen in the training data? We compute graph isomorphisms, and show that, treebank size aside, overlap between training and test graphs explain more of the observed variation than standard explanations such as the above.</abstract>
       <url hash="fd0bc433">2020.emnlp-main.220</url>
@@ -2995,7 +2995,7 @@
       <title><fixed-case>TED</fixed-case>-<fixed-case>CDB</fixed-case>: A Large-Scale <fixed-case>C</fixed-case>hinese Discourse Relation Dataset on <fixed-case>TED</fixed-case> Talks</title>
       <author><first>Wanqiu</first><last>Long</last></author>
       <author><first>Bonnie</first><last>Webber</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <pages>2793–2803</pages>
       <abstract>As different genres are known to differ in their communicative properties and as previously, for Chinese, discourse relations have only been annotated over news text, we have created the TED-CDB dataset. TED-CDB comprises a large set of TED talks in Chinese that have been manually annotated according to the goals and principles of Penn Discourse Treebank, but adapted to features that are not present in English. It serves as a unique Chinese corpus of spoken discourse. Benchmark experiments show that TED-CDB poses a challenge for state-of-the-art discourse relation classifiers, whose F1 performance on 4-way classification is 60%. This is a dramatic drop of 35% from performance on the news text in the Chinese Discourse Treebank. Transfer learning experiments have been carried out with the TED-CDB for both same-language cross-domain transfer and same-domain cross-language transfer. Both demonstrate that the TED-CDB can improve the performance of systems being developed for languages other than Chinese and would be helpful for insufficient or unbalanced data in other corpora. The dataset and our Chinese annotation guidelines will be made freely available.</abstract>
       <url hash="a943489e">2020.emnlp-main.223</url>
@@ -3037,7 +3037,7 @@
       <author><first>Mohammad</first><last>Shoeybi</last></author>
       <author><first>Raul</first><last>Puri</last></author>
       <author><first>Pascale</first><last>Fung</last></author>
-      <author><first>Anima</first><last>Anandkumar</last></author>
+      <author id="animashree-anandkumar"><first>Anima</first><last>Anandkumar</last></author>
       <author><first>Bryan</first><last>Catanzaro</last></author>
       <pages>2831–2845</pages>
       <abstract>Existing pre-trained large language models have shown unparalleled generative capabilities. However, they are not controllable. In this paper, we propose MEGATRON-CNTRL, a novel framework that uses large-scale language models and adds control to text generation by incorporating an external knowledge base. Our framework consists of a keyword predictor, a knowledge retriever, a contextual knowledge ranker, and a conditional text generator. As we do not have access to ground-truth supervision for the knowledge ranker, we make use of weak supervision from sentence embedding. The empirical results show that our model generates more fluent, consistent, and coherent stories with less repetition and higher diversity compared to prior work on the ROC story dataset. We showcase the controllability of our model by replacing the keywords used to generate stories and re-running the generation process. Human evaluation results show that 77.5% of these stories are successfully controlled by the new keywords. Furthermore, by scaling our model from 124 million to 8.3 billion parameters we demonstrate that larger models improve both the quality of generation (from 74.5% to 93.0% for consistency) and controllability (from 77.5% to 91.5%).</abstract>
@@ -3289,7 +3289,7 @@
       <title>Multi-Step Inference for Reasoning Over Paragraphs</title>
       <author><first>Jiangming</first><last>Liu</last></author>
       <author><first>Matt</first><last>Gardner</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <author><first>Mirella</first><last>Lapata</last></author>
       <pages>3040–3050</pages>
       <abstract>Complex reasoning over text requires understanding and chaining together free-form predicates and logical connectives. Prior work has largely tried to do this either symbolically or with black-box transformers. We present a middle ground between these two extremes: a compositional model reminiscent of neural module networks that can perform chained logical reasoning. This model first finds relevant sentences in the context and then chains them together using neural modules. Our model gives significant performance improvements (up to 29% relative error reduction when combined with a reranker) on ROPES, a recently-introduced complex reasoning dataset.</abstract>
@@ -3443,7 +3443,7 @@
       <title>Are All Good Word Vector Spaces Isomorphic?</title>
       <author><first>Ivan</first><last>Vulić</last></author>
       <author><first>Sebastian</first><last>Ruder</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>3178–3192</pages>
       <abstract>Existing algorithms for aligning cross-lingual word vector spaces assume that vector spaces are approximately isomorphic. As a result, they perform poorly or fail completely on non-isomorphic spaces. Such non-isomorphism has been hypothesised to result from typological differences between languages. In this work, we ask whether non-isomorphism is also crucially a sign of degenerate word vector spaces. We present a series of experiments across diverse languages which show that variance in performance across language pairs is not only due to typological differences, but can mostly be attributed to the size of the monolingual resources available, and to the properties and duration of monolingual training (e.g. “under-training”).</abstract>
       <url hash="33ce5d62">2020.emnlp-main.257</url>
@@ -3484,7 +3484,7 @@
       <author><first>Madhura</first><last>Pande</last></author>
       <author><first>Preksha</first><last>Nema</last></author>
       <author><first>Pratyush</first><last>Kumar</last></author>
-      <author><first>Mitesh M.</first><last>Khapra</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh M.</first><last>Khapra</last></author>
       <pages>3230–3235</pages>
       <abstract>Given the success of Transformer-based models, two directions of study have emerged: interpreting role of individual attention heads and down-sizing the models for efficiency. Our work straddles these two streams: We analyse the importance of basing pruning strategies on the interpreted role of the attention heads. We evaluate this on Transformer and BERT models on multiple NLP tasks. Firstly, we find that a large fraction of the attention heads can be randomly pruned with limited effect on accuracy. Secondly, for Transformers, we find no advantage in pruning attention heads identified to be important based on existing studies that relate importance to the location of a head. On the BERT model too we find no preference for top or bottom layers, though the latter are reported to have higher importance. However, strategies that avoid pruning middle layers and consecutive layers perform better. Finally, during fine-tuning the compensation for pruned attention heads is roughly equally distributed across the un-pruned heads. Our results thus suggest that interpretation of attention heads does not strongly inform pruning.</abstract>
       <url hash="6c48c5af">2020.emnlp-main.260</url>
@@ -3497,7 +3497,7 @@
       <author><first>Sahana</first><last>Ramnath</last></author>
       <author><first>Preksha</first><last>Nema</last></author>
       <author><first>Deep</first><last>Sahni</last></author>
-      <author><first>Mitesh M.</first><last>Khapra</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh M.</first><last>Khapra</last></author>
       <pages>3236–3242</pages>
       <abstract>BERT and its variants have achieved state-of-the-art performance in various NLP tasks. Since then, various works have been proposed to analyze the linguistic information being captured in BERT. However, the current works do not provide an insight into how BERT is able to achieve near human-level performance on the task of Reading Comprehension based Question Answering. In this work, we attempt to interpret BERT for RCQA. Since BERT layers do not have predefined roles, we define a layer’s role or functionality using Integrated Gradients. Based on the defined roles, we perform a preliminary analysis across all layers. We observed that the initial layers focus on query-passage interaction, whereas later layers focus more on contextual understanding and enhancing the answer prediction. Specifically for quantifier questions (how much/how many), we notice that BERT focuses on confusing words (i.e., on other numerical quantities in the passage) in the later layers, but still manages to predict the answer correctly. The fine-tuning and analysis scripts will be publicly available at <url>https://github.com/iitmnlp/BERT-Analysis-RCQA</url>.</abstract>
       <url hash="3d4c397d">2020.emnlp-main.261</url>
@@ -3508,7 +3508,7 @@
     <paper id="262">
       <title>How do Decisions Emerge across Layers in Neural Models? Interpretation with Differentiable Masking</title>
       <author><first>Nicola</first><last>De Cao</last></author>
-      <author><first>Michael Sejr</first><last>Schlichtkrull</last></author>
+      <author id="michael-schlichtkrull"><first>Michael Sejr</first><last>Schlichtkrull</last></author>
       <author><first>Wilker</first><last>Aziz</last></author>
       <author><first>Ivan</first><last>Titov</last></author>
       <pages>3243–3255</pages>
@@ -3599,7 +3599,7 @@
     <paper id="269">
       <title><fixed-case>VD-BERT</fixed-case>: <fixed-case>A</fixed-case> <fixed-case>U</fixed-case>nified <fixed-case>V</fixed-case>ision and <fixed-case>D</fixed-case>ialog <fixed-case>T</fixed-case>ransformer with <fixed-case>BERT</fixed-case></title>
       <author><first>Yue</first><last>Wang</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <author><first>Michael</first><last>Lyu</last></author>
       <author><first>Irwin</first><last>King</last></author>
       <author><first>Caiming</first><last>Xiong</last></author>
@@ -3656,7 +3656,7 @@
       <title><fixed-case>M</fixed-case>in<fixed-case>TL</fixed-case>: Minimalist Transfer Learning for Task-Oriented Dialogue Systems</title>
       <author><first>Zhaojiang</first><last>Lin</last></author>
       <author><first>Andrea</first><last>Madotto</last></author>
-      <author><first>Genta Indra</first><last>Winata</last></author>
+      <author id="genta-indra-winata"><first>Genta Indra</first><last>Winata</last></author>
       <author><first>Pascale</first><last>Fung</last></author>
       <pages>3391–3405</pages>
       <abstract>In this paper, we propose Minimalist Transfer Learning (MinTL) to simplify the system design process of task-oriented dialogue systems and alleviate the over-dependency on annotated data. MinTL is a simple yet effective transfer learning framework, which allows us to plug-and-play pre-trained seq2seq models, and jointly learn dialogue state tracking and dialogue response generation. Unlike previous approaches, which use a copy mechanism to “carryover” the old dialogue states to the new one, we introduce Levenshtein belief spans (Lev), that allows efficient dialogue state tracking with a minimal generation length. We instantiate our learning framework with two pre-trained backbones: T5 and BART, and evaluate them on MultiWOZ. Extensive experiments demonstrate that: 1) our systems establish new state-of-the-art results on end-to-end response generation, 2) MinTL-based systems are more robust than baseline methods in the low resource setting, and they achieve competitive results with only 20% training data, and 3) Lev greatly improves the inference efficiency.</abstract>
@@ -3670,7 +3670,7 @@
       <author><first>Kang Min</first><last>Yoo</last></author>
       <author><first>Hanbit</first><last>Lee</last></author>
       <author><first>Franck</first><last>Dernoncourt</last></author>
-      <author><first>Trung</first><last>Bui</last></author>
+      <author id="trung-bui"><first>Trung</first><last>Bui</last></author>
       <author><first>Walter</first><last>Chang</last></author>
       <author><first>Sang-goo</first><last>Lee</last></author>
       <pages>3406–3425</pages>
@@ -3699,7 +3699,7 @@
     <paper id="276">
       <title>Counterfactual Off-Policy Training for Neural Dialogue Generation</title>
       <author><first>Qingfu</first><last>Zhu</last></author>
-      <author><first>Wei-Nan</first><last>Zhang</last></author>
+      <author id="weinan-zhang"><first>Wei-Nan</first><last>Zhang</last></author>
       <author><first>Ting</first><last>Liu</last></author>
       <author><first>William Yang</first><last>Wang</last></author>
       <pages>3438–3448</pages>
@@ -3794,7 +3794,7 @@
     <paper id="283">
       <title>Don’t Neglect the Obvious: On the Role of Unambiguous Words in Word Sense Disambiguation</title>
       <author><first>Daniel</first><last>Loureiro</last></author>
-      <author><first>Jose</first><last>Camacho-Collados</last></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></author>
       <pages>3514–3520</pages>
       <abstract>State-of-the-art methods for Word Sense Disambiguation (WSD) combine two different features: the power of pre-trained language models and a propagation method to extend the coverage of such models. This propagation is needed as current sense-annotated corpora lack coverage of many instances in the underlying sense inventory (usually WordNet). At the same time, unambiguous words make for a large portion of all words in WordNet, while being poorly covered in existing sense-annotated corpora. In this paper, we propose a simple method to provide annotations for most unambiguous words in a large corpus. We introduce the UWA (Unambiguous Word Annotations) dataset and show how a state-of-the-art propagation-based model can use it to extend the coverage and quality of its word sense embeddings by a significant margin, improving on its original results on WSD.</abstract>
       <url hash="862d5343">2020.emnlp-main.283</url>
@@ -3828,7 +3828,7 @@
     </paper>
     <paper id="286">
       <title>Convolution over Hierarchical Syntactic and Lexical Graphs for Aspect Level Sentiment Analysis</title>
-      <author id="mi-zhang"><first>Mi</first><last>Zhang</last></author>
+      <author><first>Mi</first><last>Zhang</last></author>
       <author><first>Tieyun</first><last>Qian</last></author>
       <pages>3540–3549</pages>
       <abstract>The state-of-the-art methods in aspect-level sentiment classification have leveraged the graph based models to incorporate the syntactic structure of a sentence. While being effective, these methods ignore the corpus level word co-occurrence information, which reflect the collocations in linguistics like “nothing special”. Moreover, they do not distinguish the different types of syntactic dependency, e.g., a nominal subject relation “food-was” is treated equally as an adjectival complement relation “was-okay” in “food was okay”. To tackle the above two limitations, we propose a novel architecture which convolutes over hierarchical syntactic and lexical graphs. Specifically, we employ a global lexical graph to encode the corpus level word co-occurrence information. Moreover, we build a concept hierarchy on both the syntactic and lexical graphs for differentiating various types of dependency relations or lexical word pairs. Finally, we design a bi-level interactive graph convolution network to fully exploit these two graphs. Extensive experiments on five bench- mark datasets show that our method outperforms the state-of-the-art baselines.</abstract>
@@ -3852,7 +3852,7 @@
     </paper>
     <paper id="288">
       <title>Aspect Sentiment Classification with Aspect-Specific Opinion Spans</title>
-      <author id="lu-xu"><first>Lu</first><last>Xu</last></author>
+      <author><first>Lu</first><last>Xu</last></author>
       <author><first>Lidong</first><last>Bing</last></author>
       <author><first>Wei</first><last>Lu</last></author>
       <author><first>Fei</first><last>Huang</last></author>
@@ -3892,10 +3892,10 @@
       <title>Multi-modal Multi-label Emotion Detection with Modality and Label Dependence</title>
       <author><first>Dong</first><last>Zhang</last></author>
       <author><first>Xincheng</first><last>Ju</last></author>
-      <author><first>Junhui</first><last>Li</last></author>
+      <author id="junhui-li"><first>Junhui</first><last>Li</last></author>
       <author><first>Shoushan</first><last>Li</last></author>
-      <author><first>Qiaoming</first><last>Zhu</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>3584–3593</pages>
       <abstract>As an important research issue in the natural language processing community, multi-label emotion detection has been drawing more and more attention in the last few years. However, almost all existing studies focus on one modality (e.g., textual modality). In this paper, we focus on multi-label emotion detection in a multi-modal scenario. In this scenario, we need to consider both the dependence among different labels (label dependence) and the dependence between each predicting label and different modalities (modality dependence). Particularly, we propose a multi-modal sequence-to-set approach to effectively model both kinds of dependence in multi-modal multi-label emotion detection. The detailed evaluation demonstrates the effectiveness of our approach.</abstract>
       <url hash="53127218">2020.emnlp-main.291</url>
@@ -3910,7 +3910,7 @@
       <author><first>Di</first><last>Jin</last></author>
       <author><first>Bingning</first><last>Wang</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>3594–3605</pages>
       <abstract>Aspect-based sentiment analysis (ABSA) aims to predict the sentiment towards a specific aspect in the text. However, existing ABSA test sets cannot be used to probe whether a model can distinguish the sentiment of the target aspect from the non-target aspects. To solve this problem, we develop a simple but effective approach to enrich ABSA test sets. Specifically, we generate new examples to disentangle the confounding sentiments of the non-target aspects from the target aspect’s sentiment. Based on the SemEval 2014 dataset, we construct the Aspect Robustness Test Set (ARTS) as a comprehensive probe of the aspect robustness of ABSA models. Over 92% data of ARTS show high fluency and desired sentiment on all aspects by human evaluation. Using ARTS, we analyze the robustness of nine ABSA models, and observe, surprisingly, that their accuracy drops by up to 69.73%. We explore several ways to improve aspect robustness, and find that adversarial training can improve models’ performance on ARTS by up to 32.85%. Our code and new test set are available at <url>https://github.com/zhijing-jin/ARTS_TestSet</url></abstract>
       <url hash="0d66a963">2020.emnlp-main.292</url>
@@ -4009,7 +4009,7 @@
       <author><first>Lijie</first><last>Wen</last></author>
       <author><first>Yusong</first><last>Xu</last></author>
       <author><first>Chenwei</first><last>Zhang</last></author>
-      <author><first>Philip</first><last>Yu</last></author>
+      <author id="philip-s-yu"><first>Philip</first><last>Yu</last></author>
       <pages>3673–3682</pages>
       <abstract>Open relation extraction is the task of extracting open-domain relation facts from natural language sentences. Existing works either utilize heuristics or distant-supervised annotations to train a supervised classifier over pre-defined relations, or adopt unsupervised methods with additional assumptions that have less discriminative power. In this work, we propose a self-supervised framework named SelfORE, which exploits weak, self-supervised signals by leveraging large pretrained language model for adaptive clustering on contextualized relational features, and bootstraps the self-supervised signals by improving contextualized features in relation classification. Experimental results on three datasets show the effectiveness and robustness of SelfORE on open-domain Relation Extraction when comparing with competitive baselines.</abstract>
       <url hash="d3d08b77">2020.emnlp-main.299</url>
@@ -4188,7 +4188,7 @@
       <author><first>Yun-Hsuan</first><last>Jen</last></author>
       <author><first>Chieh-Yang</first><last>Huang</last></author>
       <author><first>MeiHua</first><last>Chen</last></author>
-      <author><first>Ting-Hao</first><last>Huang</last></author>
+      <author id="ting-hao-huang"><first>Ting-Hao</first><last>Huang</last></author>
       <author><first>Lun-Wei</first><last>Ku</last></author>
       <pages>3807–3817</pages>
       <abstract>Many English-as-a-second language learners have trouble using near-synonym words (e.g., small vs.little; briefly vs.shortly) correctly, and often look for example sentences to learn how two nearly synonymous terms differ. Prior work uses hand-crafted scores to recommend sentences but has difficulty in adopting such scores to all the near-synonyms as near-synonyms differ in various ways. We notice that the helpfulness of the learning material would reflect on the learners’ performance. Thus, we propose the inference-based learner-like agent to mimic learner behavior and identify good learning materials by examining the agent’s performance. To enable the agent to behave like a learner, we leverage entailment modeling’s capability of inferring answers from the provided materials. Experimental results show that the proposed agent is equipped with good learner-like behavior to achieve the best performance in both fill-in-the-blank (FITB) and good example sentence selection tasks. We further conduct a classroom user study with college ESL learners. The results of the user study show that the proposed agent can find out example sentences that help students learn more easily and efficiently. Compared to other models, the proposed agent improves the score of more than 17% of students after learning.</abstract>
@@ -4244,8 +4244,8 @@
     <paper id="316">
       <title><fixed-case>D</fixed-case>ago<fixed-case>BERT</fixed-case>: <fixed-case>G</fixed-case>enerating Derivational Morphology with a Pretrained Language Model</title>
       <author><first>Valentin</first><last>Hofmann</last></author>
-      <author><first>Janet</first><last>Pierrehumbert</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="janet-pierrehumbert"><first>Janet</first><last>Pierrehumbert</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>3848–3861</pages>
       <abstract>Can pretrained language models (PLMs) generate derivationally complex words? We present the first study investigating this question, taking BERT as the example PLM. We examine BERT’s derivational capabilities in different settings, ranging from using the unmodified pretrained model to full finetuning. Our best model, DagoBERT (Derivationally and generatively optimized BERT), clearly outperforms the previous state of the art in derivation generation (DG). Furthermore, our experiments show that the input segmentation crucially impacts BERT’s derivational knowledge, suggesting that the performance of PLMs could be further improved if a morphologically informed vocabulary of units were used.</abstract>
       <url hash="eaf41c3c">2020.emnlp-main.316</url>
@@ -4267,7 +4267,7 @@
     <paper id="318">
       <title>A Joint Multiple Criteria Model in Transfer Learning for Cross-domain <fixed-case>C</fixed-case>hinese Word Segmentation</title>
       <author><first>Kaiyu</first><last>Huang</last></author>
-      <author><first>Degen</first><last>Huang</last></author>
+      <author id="degen-huang"><first>Degen</first><last>Huang</last></author>
       <author><first>Zhuang</first><last>Liu</last></author>
       <author><first>Fengran</first><last>Mo</last></author>
       <pages>3873–3882</pages>
@@ -4295,7 +4295,7 @@
       <author><first>Nan</first><last>Duan</last></author>
       <author><first>Wanjun</first><last>Zhong</last></author>
       <author><first>Zhongyu</first><last>Wei</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <author><first>Daxin</first><last>Jiang</last></author>
       <author><first>Ming</first><last>Zhou</last></author>
       <pages>3895–3903</pages>
@@ -4367,14 +4367,14 @@
     </paper>
     <paper id="326">
       <title>Spot The Bot: A Robust and Efficient Framework for the Evaluation of Conversational Dialogue Systems</title>
-      <author><first>Jan</first><last>Deriu</last></author>
+      <author id="jan-milan-deriu"><first>Jan</first><last>Deriu</last></author>
       <author><first>Don</first><last>Tuggener</last></author>
       <author><first>Pius</first><last>von Däniken</last></author>
       <author><first>Jon Ander</first><last>Campos</last></author>
-      <author><first>Alvaro</first><last>Rodrigo</last></author>
+      <author id="alvaro-rodrigo"><first>Alvaro</first><last>Rodrigo</last></author>
       <author><first>Thiziri</first><last>Belkacem</last></author>
-      <author><first>Aitor</first><last>Soroa</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="aitor-soroa"><first>Aitor</first><last>Soroa</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <author><first>Mark</first><last>Cieliebak</last></author>
       <pages>3971–3984</pages>
       <award>Honorable Mention Paper</award>
@@ -4469,7 +4469,7 @@
     <paper id="333">
       <title>Towards Better Context-aware Lexical Semantics:Adjusting Contextualized Representations through Static Anchors</title>
       <author><first>Qianchu</first><last>Liu</last></author>
-      <author><first>Diana</first><last>McCarthy</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
       <author><first>Anna</first><last>Korhonen</last></author>
       <pages>4066–4075</pages>
       <abstract>One of the most powerful features of contextualized models is their dynamic embeddings for words in context, leading to state-of-the-art representations for context-aware lexical semantics. In this paper, we present a post-processing technique that enhances these representations by learning a transformation through static anchors. Our method requires only another pre-trained model and no labeled data is needed. We show consistent improvement in a range of benchmark tasks that test contextual variations of meaning both across different usages of a word and across different words as they are used in context. We demonstrate that while the original contextual representations can be improved by another embedding space from both contextualized and static models, the static embeddings, which have lower computational requirements, provide the most gains.</abstract>
@@ -4483,7 +4483,7 @@
       <author><first>Charles</first><last>Welch</last></author>
       <author><first>Jonathan K.</first><last>Kummerfeld</last></author>
       <author><first>Verónica</first><last>Pérez-Rosas</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>4076–4089</pages>
       <abstract>Word embeddings are usually derived from corpora containing text from many individuals, thus leading to general purpose representations rather than individually personalized representations. While personalized embeddings can be useful to improve language model performance and other language processing tasks, they can only be computed for people with a large amount of longitudinal data, which is not the case for new users. We propose a new form of personalized word embeddings that use demographic-specific word representations derived compositionally from full or partial demographic information for a user (i.e., gender, age, location, religion). We show that the resulting demographic-aware word representations outperform generic word representations on two tasks for English: language modeling and word associations. We further explore the trade-off between the number of available attributes and their relative effectiveness and discuss the ethical implications of using them.</abstract>
       <url hash="74c78b71">2020.emnlp-main.334</url>
@@ -4608,7 +4608,7 @@
     <paper id="344">
       <title>Adversarial Semantic Collisions</title>
       <author><first>Congzheng</first><last>Song</last></author>
-      <author><first>Alexander</first><last>Rush</last></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last></author>
       <author><first>Vitaly</first><last>Shmatikov</last></author>
       <pages>4198–4210</pages>
       <abstract>We study <i>semantic collisions</i>: texts that are semantically unrelated but judged as similar by NLP models. We develop gradient-based approaches for generating semantic collisions and demonstrate that state-of-the-art models for many tasks which rely on analyzing the meaning and similarity of texts—including paraphrase identification, document retrieval, response suggestion, and extractive summarization—are vulnerable to semantic collisions. For example, given a target query, inserting a crafted collision into an irrelevant document can shift its retrieval rank from 1000 to top 3. We show how to generate semantic collisions that evade perplexity-based filtering and discuss other potential mitigations. Our code is available at <url>https://github.com/csong27/collision-bert</url>.</abstract>
@@ -4663,7 +4663,7 @@
       <title>Sparse Text Generation</title>
       <author><first>Pedro Henrique</first><last>Martins</last></author>
       <author><first>Zita</first><last>Marinho</last></author>
-      <author><first>André F. T.</first><last>Martins</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
       <pages>4252–4273</pages>
       <abstract>Current state-of-the-art text generators build on powerful language models such as GPT-2, achieving impressive performance. However, to avoid degenerate text, they require sampling from a modified softmax, via temperature parameters or ad-hoc truncation techniques, as in top-<tex-math>k</tex-math> or nucleus sampling. This creates a mismatch between training and testing conditions. In this paper, we use the recently introduced entmax transformation to train and sample from a natively sparse language model, avoiding this mismatch. The result is a text generator with favorable performance in terms of fluency and consistency, fewer repetitions, and n-gram diversity closer to human text. In order to evaluate our model, we propose three new metrics for comparing sparse or truncated distributions: <tex-math>\epsilon</tex-math>-perplexity, sparsemax score, and Jensen-Shannon divergence. Human-evaluated experiments in story completion and dialogue generation show that entmax sampling leads to more engaging and coherent stories and conversations.</abstract>
       <url hash="7ce47f1b">2020.emnlp-main.348</url>
@@ -4699,7 +4699,7 @@
       <title>Content Planning for Neural Story Generation with Aristotelian Rescoring</title>
       <author><first>Seraphina</first><last>Goldfarb-Tarrant</last></author>
       <author><first>Tuhin</first><last>Chakrabarty</last></author>
-      <author><first>Ralph</first><last>Weischedel</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
       <author><first>Nanyun</first><last>Peng</last></author>
       <pages>4319–4338</pages>
       <abstract>Long-form narrative text generated from large language models manages a fluent impersonation of human writing, but only at the local sentence level, and lacks structure or global cohesion. We posit that many of the problems of story generation can be addressed via high-quality content planning, and present a system that focuses on how to learn good plot structures to guide story generation. We utilize a plot-generation language model along with an ensemble of rescoring models that each implement an aspect of good story-writing as detailed in Aristotle’s Poetics. We find that stories written with our more principled plot-structure are both more relevant to a given prompt and higher quality than baselines that do not content plan, or that plan in an unprincipled way.</abstract>
@@ -4727,7 +4727,7 @@
       <author><first>Mario</first><last>Giulianelli</last></author>
       <author><first>Sandro</first><last>Pezzelle</last></author>
       <author><first>Arabella</first><last>Sinclair</last></author>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <pages>4350–4368</pages>
       <abstract>Dialogue participants often refer to entities or situations repeatedly within a conversation, which contributes to its cohesiveness. Subsequent references exploit the common ground accumulated by the interlocutors and hence have several interesting properties, namely, they tend to be shorter and reuse expressions that were effective in previous mentions. In this paper, we tackle the generation of first and subsequent references in visually grounded dialogue. We propose a generation model that produces referring utterances grounded in both the visual and the conversational context. To assess the referring effectiveness of its output, we also implement a reference resolution system. Our experiments and analyses show that the model produces better, more effective referring utterances than a model not grounded in the dialogue context, and generates subsequent references that exhibit linguistic patterns akin to humans.</abstract>
       <url hash="560d4a81">2020.emnlp-main.353</url>
@@ -4789,7 +4789,7 @@
     <paper id="358">
       <title>Identifying Elements Essential for <fixed-case>BERT</fixed-case>’s Multilinguality</title>
       <author><first>Philipp</first><last>Dufter</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>4423–4437</pages>
       <abstract>It has been shown that multilingual BERT (mBERT) yields high quality multilingual representations and enables effective zero-shot transfer. This is surprising given that mBERT does not use any crosslingual signal during training. While recent literature has studied this phenomenon, the reasons for the multilinguality are still somewhat obscure. We aim to identify architectural properties of BERT and linguistic properties of languages that are necessary for BERT to become multilingual. To allow for fast experimentation we propose an efficient setup with small BERT models trained on a mix of synthetic and natural data. Overall, we identify four architectural and two linguistic elements that influence multilinguality. Based on our insights, we experiment with a multilingual pretraining setup that modifies the masking strategy using VecMap, i.e., unsupervised embedding alignment. Experiments on XNLI with three languages indicate that our findings transfer from our small setup to larger scale settings.</abstract>
       <url hash="fc0bac85">2020.emnlp-main.358</url>
@@ -4800,7 +4800,7 @@
     <paper id="359">
       <title>On Negative Interference in Multilingual Models: Findings and A Meta-Learning Treatment</title>
       <author><first>Zirui</first><last>Wang</last></author>
-      <author><first>Zachary C.</first><last>Lipton</last></author>
+      <author id="zachary-c-lipton"><first>Zachary C.</first><last>Lipton</last></author>
       <author><first>Yulia</first><last>Tsvetkov</last></author>
       <pages>4438–4450</pages>
       <abstract>Modern multilingual models are trained on concatenated text from multiple languages in hopes of conferring benefits to each (positive transfer), with the most pronounced benefits accruing to low-resource languages. However, recent work has shown that this approach can degrade performance on high-resource languages, a phenomenon known as negative interference. In this paper, we present the first systematic study of negative interference. We show that, contrary to previous belief, negative interference also impacts low-resource languages. While parameters are maximally shared to learn language-universal structures, we demonstrate that language-specific parameters do exist in multilingual models and they are a potential cause of negative interference. Motivated by these observations, we also present a meta-learning algorithm that obtains better cross-lingual transferability and alleviates negative interference, by adding language-specific layers as meta-parameters and training them in a manner that explicitly improves shared layers’ generalization on all languages. Overall, our results show that negative interference is more common than previously known, suggesting new directions for improving multilingual representations.</abstract>
@@ -4816,7 +4816,7 @@
       <author><first>Xingyuan</first><last>Zhao</last></author>
       <author><first>Yucen</first><last>Li</last></author>
       <author><first>Micaelah</first><last>St Johns</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
       <pages>4451–4464</pages>
       <abstract>Cross-lingual word embedding (CWE) algorithms represent words in multiple languages in a unified vector space. Multi-Word Expressions (MWE) are common in every language. When training word embeddings, each component word of an MWE gets its own separate embedding, and thus, MWEs are not translated by CWEs. We propose a simple method for word translation of MWEs to and from English in ten languages: we first compile lists of MWEs in each language and then tokenize the MWEs as single tokens before training word embeddings. CWEs are trained on a word-translation task using the dictionaries that only contain single words. In order to evaluate MWE translation, we created bilingual word lists from multilingual WordNet that include single-token words and MWEs, and most importantly, include MWEs that correspond to single words in another language. We release these dictionaries to the research community. We show that the pre-tokenization of MWEs as single tokens performs better than averaging the embeddings of the individual tokens of the MWE. We can translate MWEs at a top-10 precision of 30-60%. The tokenization of MWEs makes the occurrences of single words in a training corpus more sparse, but we show that it does not pose negative impacts on single-word translations.</abstract>
       <url hash="6ec20025">2020.emnlp-main.360</url>
@@ -4829,7 +4829,7 @@
       <author><first>Jerin</first><last>Philip</last></author>
       <author><first>Alexandre</first><last>Berard</last></author>
       <author><first>Matthias</first><last>Gallé</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <pages>4465–4470</pages>
       <abstract>We propose a novel adapter layer formalism for adapting multilingual models. They are more parameter-efficient than existing adapter layers while obtaining as good or better performance. The layers are specific to one language (as opposed to bilingual adapters) allowing to compose them and generalize to unseen language-pairs. In this zero-shot setting, they obtain a median improvement of +2.77 BLEU points over a strong 20-language multilingual Transformer baseline trained on TED talks.</abstract>
       <url hash="cef56d40">2020.emnlp-main.361</url>
@@ -4866,7 +4866,7 @@
       <title>Distilling Multiple Domains for Neural Machine Translation</title>
       <author><first>Anna</first><last>Currey</last></author>
       <author><first>Prashant</first><last>Mathur</last></author>
-      <author><first>Georgiana</first><last>Dinu</last></author>
+      <author id="georgiana-dinu"><first>Georgiana</first><last>Dinu</last></author>
       <pages>4500–4511</pages>
       <abstract>Neural machine translation achieves impressive results in high-resource conditions, but performance often suffers when the input domain is low-resource. The standard practice of adapting a separate model for each domain of interest does not scale well in practice from both a quality perspective (brittleness under domain shift) as well as a cost perspective (added maintenance and inference complexity). In this paper, we propose a framework for training a single multi-domain neural machine translation model that is able to translate several domains without increasing inference time or memory usage. We show that this model can improve translation on both high- and low-resource domains over strong multi-domain baselines. In addition, our proposed model is effective when domain labels are unknown during training, as well as robust under noisy data conditions.</abstract>
       <url hash="7fa8aa74">2020.emnlp-main.364</url>
@@ -4929,7 +4929,7 @@
       <author><first>Phillip</first><last>Keung</last></author>
       <author><first>Yichao</first><last>Lu</last></author>
       <author><first>György</first><last>Szarvas</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>4563–4568</pages>
       <abstract>We present the Multilingual Amazon Reviews Corpus (MARC), a large-scale collection of Amazon reviews for multilingual text classification. The corpus contains reviews in English, Japanese, German, French, Spanish, and Chinese, which were collected between 2015 and 2019. Each record in the dataset contains the review text, the review title, the star rating, an anonymized reviewer ID, an anonymized product ID, and the coarse-grained product category (e.g., ‘books’, ‘appliances’, etc.) The corpus is balanced across the 5 possible star ratings, so each rating constitutes 20% of the reviews in each language. For each language, there are 200,000, 5,000, and 5,000 reviews in the training, development, and test sets, respectively. We report baseline results for supervised text classification and zero-shot cross-lingual transfer learning by fine-tuning a multilingual BERT model on reviews data. We propose the use of mean absolute error (MAE) instead of classification accuracy for this task, since MAE accounts for the ordinal nature of the ratings.</abstract>
       <url hash="dc216e03">2020.emnlp-main.369</url>
@@ -4945,7 +4945,7 @@
       <author><first>David</first><last>Buchanan</last></author>
       <author><first>Lauren</first><last>Berkowitz</last></author>
       <author><first>Or</first><last>Biran</last></author>
-      <author><first>Jennifer</first><last>Chu-Carroll</last></author>
+      <author id="jennifer-chu-carroll"><first>Jennifer</first><last>Chu-Carroll</last></author>
       <pages>4569–4586</pages>
       <award>Honorable Mention Paper</award>
       <abstract>When humans read or listen, they make implicit commonsense inferences that frame their understanding of what happened and why. As a step toward AI systems that can build similar mental models, we introduce GLUCOSE, a large-scale dataset of implicit commonsense causal knowledge, encoded as causal mini-theories about the world, each grounded in a narrative context. To construct GLUCOSE, we drew on cognitive psychology to identify ten dimensions of causal explanation, focusing on events, states, motivations, and emotions. Each GLUCOSE entry includes a story-specific causal statement paired with an inference rule generalized from the statement. This paper details two concrete contributions. First, we present our platform for effectively crowdsourcing GLUCOSE data at scale, which uses semi-structured templates to elicit causal explanations. Using this platform, we collected a total of ~670K specific statements and general rules that capture implicit commonsense knowledge about everyday situations. Second, we show that existing knowledge resources and pretrained language models do not include or readily predict GLUCOSE’s rich inferential content. However, when state-of-the-art neural models are trained on this knowledge, they can start to make commonsense inferences on unseen stories that match humans’ mental models.</abstract>
@@ -5016,7 +5016,7 @@
       <author><first>Peng</first><last>Qian</last></author>
       <author><first>Richard</first><last>Futrell</last></author>
       <author><first>Ryosuke</first><last>Kohita</last></author>
-      <author><first>Roger</first><last>Levy</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
       <author><first>Miguel</first><last>Ballesteros</last></author>
       <pages>4640–4652</pages>
       <abstract>Humans can learn structural properties about a word from minimal experience, and deploy their learned syntactic representations uniformly in different grammatical contexts. We assess the ability of modern neural language models to reproduce this behavior in English and evaluate the effect of structural supervision on learning outcomes. First, we assess few-shot learning capabilities by developing controlled experiments that probe models’ syntactic nominal number and verbal argument structure generalizations for tokens seen as few as two times during training. Second, we assess invariance properties of learned representation: the ability of a model to transfer syntactic generalizations from a base context (e.g., a simple declarative active-voice sentence) to a transformed context (e.g., an interrogative sentence). We test four models trained on the same dataset: an n-gram baseline, an LSTM, and two LSTM-variants trained with explicit structural supervision. We find that in most cases, the neural models are able to induce the proper syntactic generalizations after minimal exposure, often from just two examples during training, and that the two structurally supervised models generalize more accurately than the LSTM model. All neural models are able to leverage information learned in base contexts to drive expectations in transformed contexts, indicating that they have learned some invariance properties of syntax.</abstract>
@@ -5029,8 +5029,8 @@
       <title>Investigating representations of verb bias in neural language models</title>
       <author><first>Robert</first><last>Hawkins</last></author>
       <author><first>Takateru</first><last>Yamakoshi</last></author>
-      <author><first>Thomas</first><last>Griffiths</last></author>
-      <author><first>Adele</first><last>Goldberg</last></author>
+      <author id="thomas-l-griffiths"><first>Thomas</first><last>Griffiths</last></author>
+      <author id="adele-goldberg"><first>Adele</first><last>Goldberg</last></author>
       <pages>4653–4663</pages>
       <abstract>Languages typically provide more than one grammatical construction to express certain types of messages. A speaker’s choice of construction is known to depend on multiple factors, including the choice of main verb – a phenomenon known as verb bias. Here we introduce DAIS, a large benchmark dataset containing 50K human judgments for 5K distinct sentence pairs in the English dative alternation. This dataset includes 200 unique verbs and systematically varies the definiteness and length of arguments. We use this dataset, as well as an existing corpus of naturally occurring data, to evaluate how well recent neural language models capture human preferences. Results show that larger models perform better than smaller models, and transformer architectures (e.g. GPT-2) tend to out-perform recurrent architectures (e.g. LSTMs) even under comparable parameter and training settings. Additional analyses of internal feature representations suggest that transformers may better integrate specific lexical information with grammatical constructions.</abstract>
       <url hash="42a82892">2020.emnlp-main.376</url>
@@ -5043,7 +5043,7 @@
       <author><first>Ece</first><last>Takmaz</last></author>
       <author><first>Sandro</first><last>Pezzelle</last></author>
       <author><first>Lisa</first><last>Beinborn</last></author>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <pages>4664–4677</pages>
       <abstract>When speakers describe an image, they tend to look at objects before mentioning them. In this paper, we investigate such sequential cross-modal alignment by modelling the image description generation process computationally. We take as our starting point a state-of-the-art image captioning system and develop several model variants that exploit information from human gaze patterns recorded during language production. In particular, we propose the first approach to image description generation where visual processing is modelled sequentially. Our experiments and analyses confirm that better descriptions can be obtained by exploiting gaze-driven attention and shed light on human cognitive processes by comparing different ways of aligning the gaze modality with language production. We find that processing gaze data sequentially leads to descriptions that are better aligned to those produced by speakers, more diverse, and more natural—particularly when gaze is encoded with a dedicated recurrent component.</abstract>
       <url hash="f6cd4cb4">2020.emnlp-main.377</url>
@@ -5070,7 +5070,7 @@
     <paper id="379">
       <title><fixed-case>B</fixed-case>io<fixed-case>M</fixed-case>egatron: Larger Biomedical Domain Language Model</title>
       <author><first>Hoo-Chang</first><last>Shin</last></author>
-      <author id="yang-zhang"><first>Yang</first><last>Zhang</last></author>
+      <author><first>Yang</first><last>Zhang</last></author>
       <author><first>Evelina</first><last>Bakhturina</last></author>
       <author><first>Raul</first><last>Puri</last></author>
       <author><first>Mostofa</first><last>Patwary</last></author>
@@ -5188,7 +5188,7 @@
       <author><first>Victor</first><last>Martinez</last></author>
       <author><first>Krishna</first><last>Somandepalli</last></author>
       <author><first>Yalda</first><last>Tehranian-Uhls</last></author>
-      <author><first>Shrikanth</first><last>Narayanan</last></author>
+      <author id="shrikanth-narayanan"><first>Shrikanth</first><last>Narayanan</last></author>
       <pages>4780–4790</pages>
       <abstract>Exposure to violent, sexual, or substance-abuse content in media increases the willingness of children and adolescents to imitate similar behaviors. Computational methods that identify portrayals of risk behaviors from audio-visual cues are limited in their applicability to films in post-production, where modifications might be prohibitively expensive. To address this limitation, we propose a model that estimates content ratings based on the language use in movie scripts, making our solution available at the earlier stages of creative production. Our model significantly improves the state-of-the-art by adapting novel techniques to learn better movie representations from the semantic and sentiment aspects of a character’s language use, and by leveraging the co-occurrence of risk behaviors, following a multi-task approach. Additionally, we show how this approach can be useful to learn novel insights on the joint portrayal of these behaviors, and on the subtleties that filmmakers may otherwise not pick up on.</abstract>
       <url hash="7e37a4e2">2020.emnlp-main.387</url>
@@ -5201,7 +5201,7 @@
       <author><first>Amrith</first><last>Krishna</last></author>
       <author><first>Ashim</first><last>Gupta</last></author>
       <author><first>Deepak</first><last>Garasangi</last></author>
-      <author><first>Pavankumar</first><last>Satuluri</last></author>
+      <author id="pavankumar-satuluri"><first>Pavankumar</first><last>Satuluri</last></author>
       <author><first>Pawan</first><last>Goyal</last></author>
       <pages>4791–4797</pages>
       <abstract>Morphologically rich languages seem to benefit from joint processing of morphology and syntax, as compared to pipeline architectures. We propose a graph-based model for joint morphological parsing and dependency parsing in Sanskrit. Here, we extend the Energy based model framework (Krishna et al., 2020), proposed for several structured prediction tasks in Sanskrit, in 2 simple yet significant ways. First, the framework’s default input graph generation method is modified to generate a multigraph, which enables the use of an exact search inference. Second, we prune the input search space using a linguistically motivated approach, rooted in the traditional grammatical analysis of Sanskrit. Our experiments show that the morphological parsing from our joint model outperforms standalone morphological parsers. We report state of the art results in morphological parsing, and in dependency parsing, both in standalone (with gold morphological tags) and joint morphosyntactic parsing setting.</abstract>
@@ -5239,7 +5239,7 @@
       <title>Unsupervised Cross-Lingual Part-of-Speech Tagging for Truly Low-Resource Scenarios</title>
       <author><first>Ramy</first><last>Eskander</last></author>
       <author><first>Smaranda</first><last>Muresan</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <pages>4820–4831</pages>
       <abstract>We describe a fully unsupervised cross-lingual transfer approach for part-of-speech (POS) tagging under a truly low resource scenario. We assume access to parallel translations between the target language and one or more source languages for which POS taggers are available. We use the Bible as parallel data in our experiments: small size, out-of-domain and covering many diverse languages. Our approach innovates in three ways: 1) a robust approach of selecting training instances via cross-lingual annotation projection that exploits best practices of unsupervised type and token constraints, word-alignment confidence and density of projected POS, 2) a Bi-LSTM architecture that uses contextualized word embeddings, affix embeddings and hierarchical Brown clusters, and 3) an evaluation on 12 diverse languages in terms of language family and morphological typology. In spite of the use of limited and out-of-domain parallel data, our experiments demonstrate significant improvements in accuracy over previous work. In addition, we show that using multi-source information, either via projection or output combination, improves the performance for most target languages.</abstract>
       <url hash="3d5e5522">2020.emnlp-main.391</url>
@@ -5265,7 +5265,7 @@
     <paper id="393">
       <title>Utility is in the Eye of the User: A Critique of <fixed-case>NLP</fixed-case> Leaderboards</title>
       <author><first>Kawin</first><last>Ethayarajh</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <pages>4846–4853</pages>
       <abstract>Benchmarks such as GLUE have helped drive advances in NLP by incentivizing the creation of more accurate models. While this leaderboard paradigm has been remarkably successful, a historical focus on performance-based evaluation has been at the expense of other qualities that the NLP community values in models, such as compactness, fairness, and energy efficiency. In this opinion paper, we study the divergence between what is incentivized by leaderboards and what is useful in practice through the lens of microeconomic theory. We frame both the leaderboard and NLP practitioners as consumers and the benefit they get from a model as its utility to them. With this framing, we formalize how leaderboards – in their current form – can be poor proxies for the NLP community at large. For example, a highly inefficient model would provide less utility to practitioners but not to a leaderboard, since it is a cost that only the former must bear. To allow practitioners to better estimate a model’s utility to them, we advocate for more transparency on leaderboards, such as the reporting of statistics that are of practical concern (e.g., model size, energy efficiency, and inference latency).</abstract>
       <url hash="fc94df19">2020.emnlp-main.393</url>
@@ -5302,7 +5302,7 @@
       <title>Dissecting Span Identification Tasks with Performance Prediction</title>
       <author><first>Sean</first><last>Papay</last></author>
       <author><first>Roman</first><last>Klinger</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <pages>4881–4895</pages>
       <abstract>Span identification (in short, span ID) tasks such as chunking, NER, or code-switching detection, ask models to identify and classify relevant spans in a text. Despite being a staple of NLP, and sharing a common structure, there is little insight on how these tasks’ properties influence their difficulty, and thus little guidance on what model families work well on span ID tasks, and why. We analyze span ID tasks via performance prediction, estimating how well neural architectures do on different tasks. Our contributions are: (a) we identify key properties of span ID tasks that can inform performance prediction; (b) we carry out a large-scale experiment on English data, building a model to predict performance for unseen span ID tasks that can support architecture choices; (c), we investigate the parameters of the meta model, yielding new insights on how model and task properties interact to affect span ID performance. We find, e.g., that span frequency is especially important for LSTMs, and that CRFs help when spans are infrequent and boundaries non-distinctive.</abstract>
       <url hash="47e5ea30">2020.emnlp-main.396</url>
@@ -5390,8 +5390,8 @@
       <title>We Can Detect Your Bias: Predicting the Political Ideology of News Articles</title>
       <author><first>Ramy</first><last>Baly</last></author>
       <author><first>Giovanni</first><last>Da San Martino</last></author>
-      <author><first>James</first><last>Glass</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>4982–4991</pages>
       <abstract>We explore the task of predicting the leading political ideology or bias of news articles. First, we collect and release a large dataset of 34,737 articles that were manually annotated for political ideology –left, center, or right–, which is well-balanced across both topics and media. We further use a challenging experimental setup where the test examples come from media that were not seen during training, which prevents the model from learning to detect the source of the target news article instead of predicting its political ideology. From a modeling perspective, we propose an adversarial media adaptation, as well as a specially adapted triplet loss. We further add background information about the source, and we show that it is quite helpful for improving article-level prediction. Our experimental results show very sizable improvements over using state-of-the-art pre-trained Transformers in this challenging setup.</abstract>
       <url hash="c7cbc4d4">2020.emnlp-main.404</url>
@@ -5418,7 +5418,7 @@
     <paper id="406">
       <title>Training for <fixed-case>G</fixed-case>ibbs Sampling on Conditional Random Fields with Neural Scoring Factors</title>
       <author><first>Sida</first><last>Gao</last></author>
-      <author><first>Matthew R.</first><last>Gormley</last></author>
+      <author id="matthew-r-gormley"><first>Matthew R.</first><last>Gormley</last></author>
       <pages>4999–5011</pages>
       <abstract>Most recent improvements in NLP come from changes to the neural network architectures modeling the text input. Yet, state-of-the-art models often rely on simple approaches to model the label space, e.g. bigram Conditional Random Fields (CRFs) in sequence tagging. More expressive graphical models are rarely used due to their prohibitive computational cost. In this work, we present an approach for efficiently training and decoding hybrids of graphical models and neural networks based on Gibbs sampling. Our approach is the natural adaptation of SampleRank (Wick et al., 2011) to neural models, and is widely applicable to tasks beyond sequence tagging. We apply our approach to named entity recognition and present a neural skip-chain CRF model, for which exact inference is impractical. The skip-chain model improves over a strong baseline on three languages from CoNLL-02/03. We obtain new state-of-the-art results on Dutch.</abstract>
       <url hash="e4bc187b">2020.emnlp-main.406</url>
@@ -5430,7 +5430,7 @@
       <title>Multilevel Text Alignment with Cross-Document Attention</title>
       <author><first>Xuhui</first><last>Zhou</last></author>
       <author><first>Nikolaos</first><last>Pappas</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>5012–5025</pages>
       <abstract>Text alignment finds application in tasks such as citation recommendation and plagiarism detection. Existing alignment methods operate at a single, predefined level and cannot learn to align texts at, for example, sentence <i>and</i> document levels. We propose a new learning approach that equips previously established hierarchical attention encoders for representing documents with a cross-document attention component, enabling structural comparisons across different levels (document-to-document and sentence-to-document). Our component is weakly supervised from document pairs and can align at multiple levels. Our evaluation on predicting document-to-document relationships and sentence-to-document relationships on the tasks of citation recommendation and plagiarism detection shows that our approach outperforms previously established hierarchical, attention encoders based on recurrent and transformer contextualization that are unaware of structural correspondence between documents.</abstract>
       <url hash="b20b6d53">2020.emnlp-main.407</url>
@@ -5489,7 +5489,7 @@
       <author><first>Wenhao</first><last>Liu</last></author>
       <author><first>Chien-Sheng</first><last>Wu</last></author>
       <author><first>Yao</first><last>Wan</last></author>
-      <author><first>Philip</first><last>Yu</last></author>
+      <author id="philip-s-yu"><first>Philip</first><last>Yu</last></author>
       <author><first>Richard</first><last>Socher</last></author>
       <author><first>Caiming</first><last>Xiong</last></author>
       <pages>5064–5082</pages>
@@ -5519,7 +5519,7 @@
       <author><first>Xilun</first><last>Chen</last></author>
       <author><first>Asish</first><last>Ghoshal</last></author>
       <author><first>Yashar</first><last>Mehdad</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <author><first>Sonal</first><last>Gupta</last></author>
       <pages>5090–5100</pages>
       <abstract>Task-oriented semantic parsing is a critical component of virtual assistants, which is responsible for understanding the user’s intents (set reminder, play music, etc.). Recent advances in deep learning have enabled several approaches to successfully parse more complex queries (Gupta et al., 2018; Rongali et al.,2020), but these models require a large amount of annotated training data to parse queries on new domains (e.g. reminder, music). In this paper, we focus on adapting task-oriented semantic parsers to low-resource domains, and propose a novel method that outperforms a supervised neural model at a 10-fold data reduction. In particular, we identify two fundamental factors for low-resource domain adaptation: better representation learning and better training techniques. Our representation learning uses BART (Lewis et al., 2019) to initialize our model which outperforms encoder-only pre-trained representations used in previous work. Furthermore, we train with optimization-based meta-learning (Finn et al., 2017) to improve generalization to low-resource domains. This approach significantly outperforms all baseline methods in the experiments on a newly collected multi-domain task-oriented semantic parsing dataset (TOPv2), which we release to the public.</abstract>
@@ -5597,7 +5597,7 @@
     <paper id="419">
       <title>Controllable Meaning Representation to Text Generation: Linearization and Data Augmentation Strategies</title>
       <author><first>Chris</first><last>Kedzie</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>5160–5185</pages>
       <abstract>We study the degree to which neural sequence-to-sequence models exhibit fine-grained controllability when performing natural language generation from a meaning representation. Using two task-oriented dialogue generation benchmarks, we systematically compare the effect of four input linearization strategies on controllability and faithfulness. Additionally, we evaluate how a phrase-based data augmentation method can improve performance. We find that properly aligning input sequences during training leads to highly controllable generation, both when training from scratch or when fine-tuning a larger pre-trained model. Data augmentation further improves control on difficult, randomly generated utterance plans.</abstract>
       <url hash="5d951604">2020.emnlp-main.419</url>
@@ -5635,7 +5635,7 @@
       <author><first>Aditi</first><last>Chaudhary</last></author>
       <author><first>Antonios</first><last>Anastasopoulos</last></author>
       <author><first>Adithya</first><last>Pratapa</last></author>
-      <author><first>David R.</first><last>Mortensen</last></author>
+      <author id="david-r-mortensen"><first>David R.</first><last>Mortensen</last></author>
       <author><first>Zaid</first><last>Sheikh</last></author>
       <author><first>Yulia</first><last>Tsvetkov</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
@@ -5649,8 +5649,8 @@
     <paper id="423">
       <title>Tackling the Low-resource Challenge for Canonical Segmentation</title>
       <author><first>Manuel</first><last>Mager</last></author>
-      <author><first>Özlem</first><last>Çetinoğlu</last></author>
-      <author><first>Katharina</first><last>Kann</last></author>
+      <author id="ozlem-cetinoglu"><first>Özlem</first><last>Çetinoğlu</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
       <pages>5237–5250</pages>
       <abstract>Canonical morphological segmentation consists of dividing words into their standardized morphemes. Here, we are interested in approaches for the task when training data is limited. We compare model performance in a simulated low-resource setting for the high-resource languages German, English, and Indonesian to experiments on new datasets for the truly low-resource languages Popoluca and Tepehua. We explore two new models for the task, borrowing from the closely related area of morphological generation: an LSTM pointer-generator and a sequence-to-sequence model with hard monotonic attention trained with imitation learning. We find that, in the low-resource setting, the novel approaches out-perform existing ones on all languages by up to 11.4% accuracy. However, while accuracy in emulated low-resource scenarios is over 50% for all languages, for the truly low-resource languages Popoluca and Tepehua, our best model only obtains 37.4% and 28.4% accuracy, respectively. Thus, we conclude that canonical segmentation is still a challenging task for low-resource languages.</abstract>
       <url hash="c9b8a7d2">2020.emnlp-main.423</url>
@@ -5660,10 +5660,10 @@
     </paper>
     <paper id="424">
       <title><fixed-case>IGT</fixed-case>2<fixed-case>P</fixed-case>: From Interlinear Glossed Texts to Paradigms</title>
-      <author><first>Sarah</first><last>Moeller</last></author>
+      <author id="sarah-moeller"><first>Sarah</first><last>Moeller</last></author>
       <author><first>Ling</first><last>Liu</last></author>
       <author><first>Changbing</first><last>Yang</last></author>
-      <author><first>Katharina</first><last>Kann</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
       <author><first>Mans</first><last>Hulden</last></author>
       <pages>5251–5262</pages>
       <abstract>An intermediate step in the linguistic analysis of an under-documented language is to find and organize inflected forms that are attested in natural speech. From this data, linguists generate unseen inflected word forms in order to test hypotheses about the language’s inflectional patterns and to complete inflectional paradigm tables. To get the data linguists spend many hours manually creating interlinear glossed texts (IGTs). We introduce a new task that speeds this process and automatically generates new morphological resources for natural language processing systems: IGT-to-paradigms (IGT2P). IGT2P generates entire morphological paradigms from IGT input. We show that existing morphological reinflection models can solve the task with 21% to 64% accuracy, depending on the language. We further find that (i) having a language expert spend only a few hours cleaning the noisy IGT data improves performance by as much as 21 percentage points, and (ii) POS tags, which are generally considered a necessary part of NLP morphological reinflection input, have no effect on the accuracy of the models considered here.</abstract>
@@ -5726,7 +5726,7 @@
       <author><first>Tal</first><last>August</last></author>
       <author><first>Lauren</first><last>Kim</last></author>
       <author><first>Katharina</first><last>Reinecke</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>5327–5344</pages>
       <abstract>Communicating complex scientific ideas without misleading or overwhelming the public is challenging. While science communication guides exist, they rarely offer empirical evidence for how their strategies are used in practice. Writing strategies that can be automatically recognized could greatly support science communication efforts by enabling tools to detect and suggest strategies for writers. We compile a set of writing strategies drawn from a wide range of prescriptive sources and develop an annotation scheme allowing humans to recognize them. We collect a corpus of 128k science writing documents in English and annotate a subset of this corpus. We use the annotations to train transformer-based classifiers and measure the strategies’ use in the larger corpus. We find that the use of strategies, such as storytelling and emphasizing the most important findings, varies significantly across publications with different reader audiences.</abstract>
       <url hash="8b64a852">2020.emnlp-main.429</url>
@@ -5754,7 +5754,7 @@
       <author><first>Alan</first><last>Ramponi</last></author>
       <author><first>Rob</first><last>van der Goot</last></author>
       <author><first>Rosario</first><last>Lombardo</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>5357–5367</pages>
       <abstract>We introduce Biomedical Event Extraction as Sequence Labeling (BeeSL), a joint end-to-end neural information extraction model. BeeSL recasts the task as sequence labeling, taking advantage of a multi-label aware encoding strategy and jointly modeling the intermediate tasks via multi-task learning. BeeSL is fast, accurate, end-to-end, and unlike current methods does not require any external knowledge base or preprocessing tools. BeeSL outperforms the current best system (Li et al., 2019) on the Genia 2011 benchmark by 1.57% absolute F1 score reaching 60.22% F1, establishing a new state of the art for the task. Importantly, we also provide first results on biomedical event extraction without gold entity information. Empirical results show that BeeSL’s speed and accuracy makes it a viable approach for large-scale real-world scenarios.</abstract>
       <url hash="27326b23">2020.emnlp-main.431</url>
@@ -5823,8 +5823,8 @@
       <author><first>Yogarshi</first><last>Vyas</last></author>
       <author><first>Jie</first><last>Ma</last></author>
       <author><first>Parminder</first><last>Bhatia</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
-      <author><first>Yaser</first><last>Al-Onaizan</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
+      <author id="yaser-al-onaizan"><first>Yaser</first><last>Al-Onaizan</last></author>
       <pages>5412–5417</pages>
       <abstract>In this paper, we propose a neural architecture and a set of training methods for ordering events by predicting temporal relations. Our proposed models receive a pair of events within a span of text as input and they identify temporal relations (Before, After, Equal, Vague) between them. Given that a key challenge with this task is the scarcity of annotated data, our models rely on either pretrained representations (i.e. RoBERTa, BERT or ELMo), transfer and multi-task learning (by leveraging complementary datasets), and self-training techniques. Experiments on the MATRES dataset of English documents establish a new state-of-the-art on this task.</abstract>
       <url hash="2f31f33a">2020.emnlp-main.436</url>
@@ -5851,7 +5851,7 @@
       <author><first>Dimitrina</first><last>Zlatkova</last></author>
       <author><first>Yoan</first><last>Dinkov</last></author>
       <author><first>Ivan</first><last>Koychev</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>5427–5444</pages>
       <abstract>We propose EXAMS – a new benchmark dataset for cross-lingual and multilingual question answering for high school examinations. We collected more than 24,000 high-quality high school exam questions in 16 languages, covering 8 language families and 24 school subjects from Natural Sciences and Social Sciences, among others.EXAMS offers unique fine-grained evaluation framework across multiple languages and subjects, which allows precise analysis and comparison of the proposed models. We perform various experiments with existing top-performing multilingual pre-trained models and show that EXAMS offers multiple challenges that require multilingual knowledge and reasoning in multiple domains. We hope that EXAMS will enable researchers to explore challenging reasoning and knowledge transfer methods and pre-trained models for school question answering in various languages which was not possible by now. The data, code, pre-trained models, and evaluation are available at <url>http://github.com/mhardalov/exams-qa</url>.</abstract>
       <url hash="34ad85cd">2020.emnlp-main.438</url>
@@ -5862,7 +5862,7 @@
     <paper id="439">
       <title>End-to-End Synthetic Data Generation for Domain Adaptation of Question Answering Systems</title>
       <author><first>Siamak</first><last>Shakeri</last></author>
-      <author><first>Cicero</first><last>Nogueira dos Santos</last></author>
+      <author id="cicero-dos-santos"><first>Cicero</first><last>Nogueira dos Santos</last></author>
       <author><first>Henghui</first><last>Zhu</last></author>
       <author><first>Patrick</first><last>Ng</last></author>
       <author><first>Feng</first><last>Nan</last></author>
@@ -5880,13 +5880,13 @@
       <title>Multi-Stage Pre-training for Low-Resource Domain Adaptation</title>
       <author><first>Rong</first><last>Zhang</last></author>
       <author><first>Revanth</first><last>Gangi Reddy</last></author>
-      <author><first>Md Arafat</first><last>Sultan</last></author>
+      <author id="md-arafat-sultan"><first>Md Arafat</first><last>Sultan</last></author>
       <author><first>Vittorio</first><last>Castelli</last></author>
       <author><first>Anthony</first><last>Ferritto</last></author>
-      <author><first>Radu</first><last>Florian</last></author>
-      <author><first>Efsun</first><last>Sarioglu Kayi</last></author>
-      <author><first>Salim</first><last>Roukos</last></author>
-      <author><first>Avi</first><last>Sil</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
+      <author id="efsun-sarioglu-kayi"><first>Efsun</first><last>Sarioglu Kayi</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
+      <author id="avirup-sil"><first>Avi</first><last>Sil</last></author>
       <author><first>Todd</first><last>Ward</last></author>
       <pages>5461–5468</pages>
       <abstract>Transfer learning techniques are particularly useful for NLP tasks where a sizable amount of high-quality annotated data is difficult to obtain. Current approaches directly adapt a pretrained language model (LM) on in-domain text before fine-tuning to downstream tasks. We show that extending the vocabulary of the LM with domain-specific terms leads to further gains. To a bigger effect, we utilize structure in the unlabeled data to create auxiliary synthetic tasks, which helps the LM transfer to downstream tasks. We apply these approaches incrementally on a pretrained Roberta-large LM and show considerable performance gain on three tasks in the IT domain: Extractive Reading Comprehension, Document Ranking and Duplicate Question Detection.</abstract>
@@ -5897,7 +5897,7 @@
     </paper>
     <paper id="441">
       <title><fixed-case>ISAAQ</fixed-case> - Mastering Textbook Questions with Pre-trained Transformers and Bottom-Up and Top-Down Attention</title>
-      <author><first>Jose Manuel</first><last>Gomez-Perez</last></author>
+      <author id="jose-manuel-gomez-perez"><first>Jose Manuel</first><last>Gomez-Perez</last></author>
       <author><first>Raúl</first><last>Ortega</last></author>
       <pages>5469–5479</pages>
       <abstract>Textbook Question Answering is a complex task in the intersection of Machine Comprehension and Visual Question Answering that requires reasoning with multimodal information from text and diagrams. For the first time, this paper taps on the potential of transformer language models and bottom-up and top-down attention to tackle the language and visual understanding challenges this task entails. Rather than training a language-visual transformer from scratch we rely on pre-trained transformers, fine-tuning and ensembling. We add bottom-up and top-down attention to identify regions of interest corresponding to diagram constituents and their relationships, improving the selection of relevant visual information for each question and answer options. Our system ISAAQ reports unprecedented success in all TQA question types, with accuracies of 81.36%, 71.11% and 55.12% on true/false, text-only and diagram multiple choice questions. ISAAQ also demonstrates its broad applicability, obtaining state-of-the-art results in other demanding datasets.</abstract>
@@ -5976,7 +5976,7 @@
       <title>Sequence-Level Mixed Sample Data Augmentation</title>
       <author><first>Demi</first><last>Guo</last></author>
       <author><first>Yoon</first><last>Kim</last></author>
-      <author><first>Alexander</first><last>Rush</last></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last></author>
       <pages>5547–5552</pages>
       <abstract>Despite their empirical success, neural networks still have difficulty capturing compositional aspects of natural language. This work proposes a simple data augmentation approach to encourage compositional behavior in neural models for sequence-to-sequence problems. Our approach, SeqMix, creates new synthetic examples by softly combining input/output sequences from the training set. We connect this approach to existing techniques such as SwitchOut and word dropout, and show that these techniques are all essentially approximating variants of a single objective. SeqMix consistently yields approximately 1.0 BLEU improvement on five different translation datasets over strong Transformer baselines. On tasks that require strong compositional generalization such as SCAN and semantic parsing, SeqMix also offers further improvements.</abstract>
       <url hash="6768f85c">2020.emnlp-main.447</url>
@@ -6039,7 +6039,7 @@
       <title>Affective Event Classification with Discourse-enhanced Self-training</title>
       <author><first>Yuan</first><last>Zhuang</last></author>
       <author><first>Tianyu</first><last>Jiang</last></author>
-      <author><first>Ellen</first><last>Riloff</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
       <pages>5608–5617</pages>
       <abstract>Prior research has recognized the need to associate affective polarities with events and has produced several techniques and lexical resources for identifying affective events. Our research introduces new classification models to assign affective polarity to event phrases. First, we present a BERT-based model for affective event classification and show that the classifier achieves substantially better performance than a large affective event knowledge base. Second, we present a discourse-enhanced self-training method that iteratively improves the classifier with unlabeled data. The key idea is to exploit event phrases that occur with a coreferent sentiment expression. The discourse-enhanced self-training algorithm iteratively labels new event phrases based on both the classifier’s predictions and the polarities of the event’s coreferent sentiment expressions. Our results show that discourse-enhanced self-training further improves both recall and precision for affective event classification.</abstract>
       <url hash="1488f613">2020.emnlp-main.452</url>
@@ -6075,7 +6075,7 @@
     <paper id="455">
       <title>Mind Your Inflections! <fixed-case>I</fixed-case>mproving <fixed-case>NLP</fixed-case> for Non-Standard <fixed-case>E</fixed-case>nglishes with <fixed-case>B</fixed-case>ase-<fixed-case>I</fixed-case>nflection <fixed-case>E</fixed-case>ncoding</title>
       <author><first>Samson</first><last>Tan</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <author><first>Lav</first><last>Varshney</last></author>
       <author><first>Min-Yen</first><last>Kan</last></author>
       <pages>5647–5663</pages>
@@ -6089,7 +6089,7 @@
     </paper>
     <paper id="456">
       <title>Measuring the Similarity of Grammatical Gender Systems by Comparing Partitions</title>
-      <author><first>Arya D.</first><last>McCarthy</last></author>
+      <author id="arya-d-mccarthy"><first>Arya D.</first><last>McCarthy</last></author>
       <author><first>Adina</first><last>Williams</last></author>
       <author><first>Shijia</first><last>Liu</last></author>
       <author><first>David</first><last>Yarowsky</last></author>
@@ -6106,7 +6106,7 @@
       <author><first>Jinlan</first><last>Fu</last></author>
       <author><first>Pengfei</first><last>Liu</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>5676–5686</pages>
       <abstract>The performance of the Chinese Word Segmentation (CWS) systems has gradually reached a plateau with the rapid development of deep neural networks, especially the successful use of large pre-trained models. In this paper, we take stock of what we have achieved and rethink what’s left in the CWS task. Methodologically, we propose a fine-grained evaluation for existing CWS systems, which not only allows us to diagnose the strengths and weaknesses of existing models (under the in-dataset setting), but enables us to quantify the discrepancy between different criterion and alleviate the negative transfer problem when doing multi-criteria learning. Strategically, despite not aiming to propose a novel model in this paper, our comprehensive experiments on eight models and seven datasets, as well as thorough analysis, could search for some promising direction for future research. We make all codes publicly available and release an interface that can quickly evaluate and diagnose user’s models: <url>https://github.com/neulab/InterpretEval</url></abstract>
       <url hash="8215118b">2020.emnlp-main.457</url>
@@ -6175,7 +6175,7 @@
       <title><fixed-case>T</fixed-case>e<fixed-case>MP</fixed-case>: Temporal Message Passing for Temporal Knowledge Graph Completion</title>
       <author><first>Jiapeng</first><last>Wu</last></author>
       <author><first>Meng</first><last>Cao</last></author>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <author><first>William L.</first><last>Hamilton</last></author>
       <pages>5730–5746</pages>
       <abstract>Inferring missing facts in temporal knowledge graphs (TKGs) is a fundamental and challenging task. Previous works have approached this problem by augmenting methods for static knowledge graphs to leverage time-dependent representations. However, these methods do not explicitly leverage multi-hop structural information and temporal facts from recent time steps to enhance their predictions. Additionally, prior work does not explicitly address the temporal sparsity and variability of entity distributions in TKGs. We propose the Temporal Message Passing (TeMP) framework to address these challenges by combining graph neural networks, temporal dynamics models, data imputation and frequency-based gating techniques. Experiments on standard TKG tasks show that our approach provides substantial gains compared to the previous state of the art, achieving a 10.7% average relative improvement in Hits@10 across three standard benchmarks. Our analysis also reveals important sources of variability both within and across TKG datasets, and we introduce several simple but strong baselines that outperform the prior state of the art in certain settings.</abstract>
@@ -6203,7 +6203,7 @@
       <title>An Empirical Study of Generation Order for Machine Translation</title>
       <author><first>William</first><last>Chan</last></author>
       <author><first>Mitchell</first><last>Stern</last></author>
-      <author><first>Jamie</first><last>Kiros</last></author>
+      <author id="jamie-kiros"><first>Jamie</first><last>Kiros</last></author>
       <author><first>Jakob</first><last>Uszkoreit</last></author>
       <pages>5764–5773</pages>
       <abstract>In this work, we present an empirical study of generation order for machine translation. Building on recent advances in insertion-based modeling, we first introduce a soft order-reward framework that enables us to train models to follow arbitrary oracle generation policies. We then make use of this framework to explore a large variety of generation orders, including uninformed orders, location-based orders, frequency-based orders, content-based orders, and model-based orders. Curiously, we find that for the WMT’14 English <tex-math>\to</tex-math> German and WMT’18 English <tex-math>\to</tex-math> Chinese translation tasks, order does not have a substantial impact on output quality. Moreover, for English <tex-math>\to</tex-math> German, we even discover that unintuitive orderings such as alphabetical and shortest-first can match the performance of a standard Transformer, suggesting that traditional left-to-right generation may not be necessary to achieve high performance.</abstract>
@@ -6230,7 +6230,7 @@
       <author><first>Sewon</first><last>Min</last></author>
       <author><first>Julian</first><last>Michael</last></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>5783–5797</pages>
       <abstract>Ambiguity is inherent to open-domain question answering; especially when exploring new topics, it can be difficult to ask questions that have a single, unambiguous answer. In this paper, we introduce AmbigQA, a new open-domain question answering task which involves finding every plausible answer, and then rewriting the question for each one to resolve the ambiguity. To study this task, we construct AmbigNQ, a dataset covering 14,042 questions from NQ-open, an existing open-domain QA benchmark. We find that over half of the questions in NQ-open are ambiguous, with diverse sources of ambiguity such as event and entity references. We also present strong baseline models for AmbigQA which we show benefit from weakly supervised learning that incorporates NQ-open, strongly suggesting our new task and data will support significant future research effort. Our data and baselines are available at <url>https://nlp.cs.washington.edu/ambigqa</url>.</abstract>
       <url hash="beed3e95">2020.emnlp-main.466</url>
@@ -6273,7 +6273,7 @@
       <title>Few-Shot Complex Knowledge Base Question Answering via Meta Reinforcement Learning</title>
       <author><first>Yuncheng</first><last>Hua</last></author>
       <author><first>Yuan-Fang</first><last>Li</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <author><first>Guilin</first><last>Qi</last></author>
       <author><first>Tongtong</first><last>Wu</last></author>
       <pages>5827–5837</pages>
@@ -6311,7 +6311,7 @@
       <author><first>Muhammad</first><last>Abdul-Mageed</last></author>
       <author><first>Chiyu</first><last>Zhang</last></author>
       <author><first>AbdelRahim</first><last>Elmadany</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <pages>5855–5876</pages>
       <abstract>Although prediction of dialects is an important language processing task, with a wide range of applications, existing work is largely limited to coarse-grained varieties. Inspired by geolocation research, we propose the novel task of Micro-Dialect Identification (MDI) and introduce MARBERT, a new language model with striking abilities to predict a fine-grained variety (as small as that of a city) given a single, short message. For modeling, we offer a range of novel spatially and linguistically-motivated multi-task learning models. To showcase the utility of our models, we introduce a new, large-scale dataset of Arabic micro-varieties (low-resource) suited to our tasks. MARBERT predicts micro-dialects with 9.9% F1, 76 better than a majority class baseline. Our new language model also establishes new state-of-the-art on several external tasks.</abstract>
       <url hash="4778a663">2020.emnlp-main.472</url>
@@ -6419,7 +6419,7 @@
       <title><fixed-case>CCA</fixed-case>ligned: A Massive Collection of Cross-Lingual Web-Document Pairs</title>
       <author><first>Ahmed</first><last>El-Kishky</last></author>
       <author><first>Vishrav</first><last>Chaudhary</last></author>
-      <author><first>Francisco</first><last>Guzmán</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzmán</last></author>
       <author><first>Philipp</first><last>Koehn</last></author>
       <pages>5960–5969</pages>
       <abstract>Cross-lingual document alignment aims to identify pairs of documents in two distinct languages that are of comparable content or translations of each other. In this paper, we exploit the signals embedded in URLs to label web documents at scale with an average precision of 94.5% across different language pairs. We mine sixty-eight snapshots of the Common Crawl corpus and identify web document pairs that are translations of each other. We release a new web dataset consisting of over 392 million URL pairs from Common Crawl covering documents in 8144 language pairs of which 137 pairs include English. In addition to curating this massive dataset, we introduce baseline methods that leverage cross-lingual representations to identify aligned documents based on their textual content. Finally, we demonstrate the value of this parallel documents dataset through a downstream task of mining parallel sentences and measuring the quality of machine translations from models trained on this mined data. Our objective in releasing this dataset is to foster new research in cross-lingual NLP across a variety of low, medium, and high-resource languages.</abstract>
@@ -6551,7 +6551,7 @@
       <author><first>Lidong</first><last>Bing</last></author>
       <author><first>Canasai</first><last>Kruengkrai</last></author>
       <author><first>Thien Hai</first><last>Nguyen</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <author><first>Luo</first><last>Si</last></author>
       <author><first>Chunyan</first><last>Miao</last></author>
       <pages>6045–6057</pages>
@@ -6595,8 +6595,8 @@
       <author><first>Florian</first><last>Mai</last></author>
       <author><first>Nikolaos</first><last>Pappas</last></author>
       <author><first>Ivan</first><last>Montero</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <pages>6076–6092</pages>
       <abstract>Text autoencoders are commonly used for conditional generation tasks such as style transfer. We propose methods which are plug and play, where any pretrained autoencoder can be used, and only require learning a mapping within the autoencoder’s embedding space, training embedding-to-embedding (Emb2Emb). This reduces the need for labeled training data for the task and makes the training procedure more efficient. Crucial to the success of this method is a loss term for keeping the mapped embedding on the manifold of the autoencoder and a mapping which is trained to navigate the manifold by learning offset vectors. Evaluations on style transfer tasks both with and without sequence-to-sequence supervision show that our method performs better than or comparable to strong baselines while being up to four times faster.</abstract>
       <url hash="801922a1">2020.emnlp-main.491</url>
@@ -6675,9 +6675,9 @@
     </paper>
     <paper id="497">
       <title>Effective Unsupervised Domain Adaptation with Adversarially Trained Language Models</title>
-      <author><first>Thuy-Trang</first><last>Vu</last></author>
+      <author id="thuy-vu"><first>Thuy-Trang</first><last>Vu</last></author>
       <author><first>Dinh</first><last>Phung</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>6163–6173</pages>
       <abstract>Recent work has shown the importance of adaptation of broad-coverage contextualised embedding models on the domain of the target task of interest. Current self-supervised adaptation methods are simplistic, as the training signal comes from a small percentage of <i>randomly</i> masked-out tokens. In this paper, we show that careful masking strategies can bridge the knowledge gap of masked language models (MLMs) about the domains more effectively by allocating self-supervision where it is needed. Furthermore, we propose an effective training strategy by adversarially masking out those tokens which are harder to reconstruct by the underlying MLM. The adversarial objective leads to a challenging combinatorial optimisation problem over <i>subsets</i> of tokens, which we tackle efficiently through relaxation to a variational lowerbound and dynamic programming. On six unsupervised domain adaptation tasks involving named entity recognition, our method strongly outperforms the random masking strategy and achieves up to +1.64 F1 score improvements.</abstract>
       <url hash="e8652a8e">2020.emnlp-main.497</url>
@@ -6757,7 +6757,7 @@
     </paper>
     <paper id="503">
       <title>Interpreting Open-Domain Modifiers: Decomposition of <fixed-case>W</fixed-case>ikipedia Categories into Disambiguated Property-Value Pairs</title>
-      <author><first>Marius</first><last>Pasca</last></author>
+      <author id="marius-pasca"><first>Marius</first><last>Pasca</last></author>
       <pages>6218–6228</pages>
       <abstract>This paper proposes an open-domain method for automatically annotating modifier constituents (20th-century’) within Wikipedia categories (20th-century male writers) with properties (date of birth). The annotations offer a semantically-anchored understanding of the role of the constituents in defining the underlying meaning of the categories. In experiments over an evaluation set of Wikipedia categories, the proposed method annotates constituent modifiers as semantically-anchored properties, rather than as mere strings in a previous method. It does so at a better trade-off between precision and recall.</abstract>
       <url hash="37d541ee">2020.emnlp-main.503</url>
@@ -6799,7 +6799,7 @@
       <author><first>Meng</first><last>Cao</last></author>
       <author><first>Yue</first><last>Dong</last></author>
       <author><first>Jiapeng</first><last>Wu</last></author>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <pages>6251–6258</pages>
       <abstract>Neural abstractive summarization systems have achieved promising progress, thanks to the availability of large-scale datasets and models pre-trained with self-supervised methods. However, ensuring the factual consistency of the generated summaries for abstractive summarization systems is a challenge. We propose a post-editing corrector module to address this issue by identifying and correcting factual errors in generated summaries. The neural corrector model is pre-trained on artificial examples that are created by applying a series of heuristic transformations on reference summaries. These transformations are inspired by the error analysis of state-of-the-art summarization model outputs. Experimental results show that our model is able to correct factual errors in summaries generated by other neural summarization models and outperforms previous models on factual consistency evaluation on the CNN/DailyMail dataset. We also find that transferring from artificial error correction to downstream settings is still very challenging.</abstract>
       <url hash="e034590c">2020.emnlp-main.506</url>
@@ -6850,7 +6850,7 @@
       <title>Summarizing Text on Any Aspects: A Knowledge-Informed Weakly-Supervised Approach</title>
       <author><first>Bowen</first><last>Tan</last></author>
       <author><first>Lianhui</first><last>Qin</last></author>
-      <author><first>Eric</first><last>Xing</last></author>
+      <author id="eric-xing"><first>Eric</first><last>Xing</last></author>
       <author><first>Zhiting</first><last>Hu</last></author>
       <pages>6301–6309</pages>
       <abstract>Given a document and a target aspect (e.g., a topic of interest), aspect-based abstractive summarization attempts to generate a summary with respect to the aspect. Previous studies usually assume a small pre-defined set of aspects and fall short of summarizing on other diverse topics. In this work, we study summarizing on <i>arbitrary</i> aspects relevant to the document, which significantly expands the application of the task in practice. Due to the lack of supervision data, we develop a new weak supervision construction method and an aspect modeling scheme, both of which integrate rich external knowledge sources such as ConceptNet and Wikipedia. Experiments show our approach achieves performance boosts on summarizing both real and synthetic documents given pre-defined or arbitrary aspects.</abstract>
@@ -6873,7 +6873,7 @@
     <paper id="512">
       <title>Online Conversation Disentanglement with Pointer Networks</title>
       <author><first>Tao</first><last>Yu</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <pages>6321–6330</pages>
       <abstract>Huge amounts of textual conversations occur online every day, where multiple conversations take place concurrently. Interleaved conversations lead to difficulties in not only following the ongoing discussions but also extracting relevant information from simultaneous messages. Conversation disentanglement aims to separate intermingled messages into detached conversations. However, existing disentanglement methods rely mostly on handcrafted features that are dataset specific, which hinders generalization and adaptability. In this work, we propose an end-to-end online framework for conversation disentanglement that avoids time-consuming domain-specific feature engineering. We design a novel way to embed the whole utterance that comprises timestamp, speaker, and message text, and propose a custom attention mechanism that models disentanglement as a pointing problem while effectively capturing inter-utterance interactions in an end-to-end fashion. We also introduce a joint-learning objective to better capture contextual information. Our experiments on the Ubuntu IRC dataset show that our method achieves state-of-the-art performance in both link and conversation prediction tasks.</abstract>
       <url hash="c8ec18b2">2020.emnlp-main.512</url>
@@ -6915,7 +6915,7 @@
       <author><first>Liangming</first><last>Pan</last></author>
       <author><first>Juanzi</first><last>Li</last></author>
       <author><first>Zhiyuan</first><last>Liu</last></author>
-      <author><first>Tat-Seng</first><last>Chua</last></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last></author>
       <pages>6355–6364</pages>
       <abstract>Entity alignment (EA) aims at building a unified Knowledge Graph (KG) of rich content by linking the equivalent entities from various KGs. GNN-based EA methods present promising performance by modeling the KG structure defined by relation triples. However, attribute triples can also provide crucial alignment signal but have not been well explored yet. In this paper, we propose to utilize an attributed value encoder and partition the KG into subgraphs to model the various types of attribute triples efficiently. Besides, the performances of current EA methods are overestimated because of the name-bias of existing EA datasets. To make an objective evaluation, we propose a hard experimental setting where we select equivalent entity pairs with very different names as the test set. Under both the regular and hard settings, our method achieves significant improvements (5.10% on average Hits@1 in DBP15k) over 12 baselines in cross-lingual and monolingual datasets. Ablation studies on different subgraphs and a case study about attribute types further demonstrate the effectiveness of our method. Source code and data can be found at <url>https://github.com/thunlp/explore-and-evaluate</url>.</abstract>
       <url hash="b0a1562e">2020.emnlp-main.515</url>
@@ -6966,7 +6966,7 @@
       <author><first>Fabio</first><last>Petroni</last></author>
       <author><first>Martin</first><last>Josifoski</last></author>
       <author><first>Sebastian</first><last>Riedel</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>6397–6407</pages>
       <abstract>This paper introduces a conceptually simple, scalable, and highly effective BERT-based entity linking model, along with an extensive evaluation of its accuracy-speed trade-off. We present a two-stage zero-shot linking algorithm, where each entity is defined only by a short textual description. The first stage does retrieval in a dense space defined by a bi-encoder that independently embeds the mention context and the entity descriptions. Each candidate is then re-ranked with a cross-encoder, that concatenates the mention and entity text. Experiments demonstrate that this approach is state of the art on recent zero-shot benchmarks (6 point absolute gains) and also on more established non-zero-shot evaluations (e.g. TACKBP-2010), despite its relative simplicity (e.g. no explicit entity embeddings or manually engineered mention tables). We also show that bi-encoder linking is very fast with nearest neighbor search (e.g. linking with 5.9 million candidates in 2 milliseconds), and that much of the accuracy gain from the more expensive cross-encoder can be transferred to the bi-encoder via knowledge distillation. Our code and models are available at <url>https://github.com/facebookresearch/BLINK</url>.</abstract>
       <url hash="4ea66737">2020.emnlp-main.519</url>
@@ -6978,12 +6978,12 @@
       <title>A Dataset for Tracking Entities in Open Domain Procedural Text</title>
       <author><first>Niket</first><last>Tandon</last></author>
       <author><first>Keisuke</first><last>Sakaguchi</last></author>
-      <author><first>Bhavana</first><last>Dalvi</last></author>
+      <author id="bhavana-dalvi"><first>Bhavana</first><last>Dalvi</last></author>
       <author><first>Dheeraj</first><last>Rajagopal</last></author>
       <author><first>Peter</first><last>Clark</last></author>
       <author><first>Michal</first><last>Guerquin</last></author>
       <author><first>Kyle</first><last>Richardson</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>6408–6417</pages>
       <abstract>We present the first dataset for tracking state changes in procedural text from arbitrary domains by using an unrestricted (open) vocabulary. For example, in a text describing fog removal using potatoes, a car window may transition between being foggy, sticky, opaque, and clear. Previous formulations of this task provide the text and entities involved, and ask how those entities change for just a small, pre-defined set of attributes (e.g., location), limiting their fidelity. Our solution is a new task formulation where given just a procedural text as input, the task is to generate a set of state change tuples (entity, attribute, before-state, after-state) for each step, where the entity, attribute, and state values must be predicted from an open vocabulary. Using crowdsourcing, we create OPENPI, a high-quality (91.5% coverage as judged by humans and completely vetted), and large-scale dataset comprising 29,928 state changes over 4,050 sentences from 810 procedural real-world paragraphs from WikiHow.com. A current state-of-the-art generation model on this task achieves 16.1% F1 based on BLEU metric, leaving enough room for novel model architectures.</abstract>
       <url hash="4f6b6193">2020.emnlp-main.520</url>
@@ -7007,11 +7007,11 @@
     </paper>
     <paper id="522">
       <title>Efficient One-Pass End-to-End Entity Linking for Questions</title>
-      <author><first>Belinda Z.</first><last>Li</last></author>
+      <author id="belinda-z-li"><first>Belinda Z.</first><last>Li</last></author>
       <author><first>Sewon</first><last>Min</last></author>
       <author><first>Srinivasan</first><last>Iyer</last></author>
       <author><first>Yashar</first><last>Mehdad</last></author>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <pages>6433–6441</pages>
       <abstract>We present ELQ, a fast end-to-end entity linking model for questions, which uses a biencoder to jointly perform mention detection and linking in one pass. Evaluated on WebQSP and GraphQuestions with extended annotations that cover multiple entities per question, ELQ outperforms the previous state of the art by a large margin of +12.7% and +19.6% F1, respectively. With a very fast inference time (1.57 examples/s on a single CPU), ELQ can be useful for downstream question answering systems. In a proof-of-concept experiment, we demonstrate that using ELQ significantly improves the downstream QA performance of GraphRetriever.</abstract>
       <url hash="10da23da">2020.emnlp-main.522</url>
@@ -7025,7 +7025,7 @@
       <author><first>Akari</first><last>Asai</last></author>
       <author><first>Hiroyuki</first><last>Shindo</last></author>
       <author><first>Hideaki</first><last>Takeda</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>6442–6454</pages>
       <abstract>Entity representations are useful in natural language tasks involving entities. In this paper, we propose new pretrained contextualized representations of words and entities based on the bidirectional transformer. The proposed model treats words and entities in a given text as independent tokens, and outputs contextualized representations of them. Our model is trained using a new pretraining task based on the masked language model of BERT. The task involves predicting randomly masked words and entities in a large entity-annotated corpus retrieved from Wikipedia. We also propose an entity-aware self-attention mechanism that is an extension of the self-attention mechanism of the transformer, and considers the types of tokens (words or entities) when computing attention scores. The proposed model achieves impressive empirical performance on a wide range of entity-related tasks. In particular, it obtains state-of-the-art results on five well-known datasets: Open Entity (entity typing), TACRED (relation classification), CoNLL-2003 (named entity recognition), ReCoRD (cloze-style question answering), and SQuAD 1.1 (extractive question answering). Our source code and pretrained representations are available at <url>https://github.com/studio-ousia/luke</url>.</abstract>
       <url hash="2226eb6f">2020.emnlp-main.523</url>
@@ -7065,7 +7065,7 @@
       <author><first>Allison</first><last>Hegel</last></author>
       <author><first>Sudha</first><last>Rao</last></author>
       <author><first>Asli</first><last>Celikyilmaz</last></author>
-      <author><first>Bill</first><last>Dolan</last></author>
+      <author id="william-b-dolan"><first>Bill</first><last>Dolan</last></author>
       <pages>6485–6504</pages>
       <abstract>Existing language models excel at writing from scratch, but many real-world scenarios require rewriting an existing document to fit a set of constraints. Although sentence-level rewriting has been fairly well-studied, little work has addressed the challenge of rewriting an entire document coherently. In this work, we introduce the task of document-level targeted content transfer and address it in the recipe domain, with a recipe as the document and a dietary restriction (such as vegan or dairy-free) as the targeted constraint. We propose a novel model for this task based on the generative pre-trained language model (GPT-2) and train on a large number of roughly-aligned recipe pairs. Both automatic and human evaluations show that our model out-performs existing methods by generating coherent and diverse rewrites that obey the constraint while remaining close to the original document. Finally, we analyze our model’s rewrites to assess progress toward the goal of making language generation more attuned to constraints that are substantive rather than stylistic.</abstract>
       <url hash="e7405ccc">2020.emnlp-main.526</url>
@@ -7100,7 +7100,7 @@
     <paper id="529">
       <title>Plan ahead: Self-Supervised Text Planning for Paragraph Completion Task</title>
       <author><first>Dongyeop</first><last>Kang</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>6533–6543</pages>
       <abstract>Despite the recent success of contextualized language models on various NLP tasks, language model itself cannot capture textual coherence of a long, multi-sentence document (e.g., a paragraph). Humans often make structural decisions on what and how to say about before making utterances. Guiding surface realization with such high-level decisions and structuring text in a coherent way is essentially called a planning process. Where can the model learn such high-level coherence? A paragraph itself contains various forms of inductive coherence signals called self-supervision in this work, such as sentence orders, topical keywords, rhetorical structures, and so on. Motivated by that, this work proposes a new paragraph completion task PARCOM; predicting masked sentences in a paragraph. However, the task suffers from predicting and selecting appropriate topical content with respect to the given context. To address that, we propose a self-supervised text planner SSPlanner that predicts what to say first (content prediction), then guides the pretrained language model (surface realization) using the predicted content. SSPlanner outperforms the baseline generation models on the paragraph completion task in both automatic and human evaluation. We also find that a combination of noun and verb types of keywords is the most effective for content selection. As more number of content keywords are provided, overall generation quality also increases.</abstract>
       <url hash="fd714e70">2020.emnlp-main.529</url>
@@ -7126,7 +7126,7 @@
     <paper id="531">
       <title>Towards Persona-Based Empathetic Conversational Models</title>
       <author><first>Peixiang</first><last>Zhong</last></author>
-      <author id="chen-zhang"><first>Chen</first><last>Zhang</last></author>
+      <author><first>Chen</first><last>Zhang</last></author>
       <author><first>Hao</first><last>Wang</last></author>
       <author><first>Yong</first><last>Liu</last></author>
       <author><first>Chunyan</first><last>Miao</last></author>
@@ -7142,7 +7142,7 @@
       <author><first>Qiongkai</first><last>Xu</last></author>
       <author><first>Lizhen</first><last>Qu</last></author>
       <author><first>Zeyu</first><last>Gao</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>6567–6580</pages>
       <abstract>The global market size of conversational assistants (chatbots) is expected to grow to USD 9.4 billion by 2024, according to MarketsandMarkets. Despite the wide use of chatbots, leakage of personal information through chatbots poses serious privacy concerns for their users. In this work, we propose to protect personal information by warning users of detected suspicious sentences generated by conversational assistants. The detection task is formulated as an alignment optimization problem and a new dataset PERSONA-LEAKAGE is collected for evaluation. In this paper, we propose two novel constrained alignment models, which consistently outperform baseline methods on Moreover, we conduct analysis on the behavior of recently proposed personalized chit-chat dialogue systems. The empirical results show that those systems suffer more from personal information disclosure than the widely used Seq2Seq model and the language model. In those cases, a significant number of information leaking utterances can be detected by our models with high precision.</abstract>
       <url hash="878db305">2020.emnlp-main.532</url>
@@ -7154,7 +7154,7 @@
       <title>Response Selection for Multi-Party Conversations with Dynamic Topic Tracking</title>
       <author><first>Weishi</first><last>Wang</last></author>
       <author><first>Steven C.H.</first><last>Hoi</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <pages>6581–6591</pages>
       <abstract>While participants in a multi-party multi-turn conversation simultaneously engage in multiple conversation topics, existing response selection methods are developed mainly focusing on a two-party single-conversation scenario. Hence, the prolongation and transition of conversation topics are ignored by current methods. In this work, we frame response selection as a dynamic topic tracking task to match the topic between the response and relevant conversation context. With this new formulation, we propose a novel multi-task learning framework that supports efficient encoding through large pretrained models with only two utterances at once to perform dynamic topic disentanglement and response selection. We also propose Topic-BERT an essential pretraining step to embed topic information into BERT with self-supervised learning. Experimental results on the DSTC-8 Ubuntu IRC dataset show state-of-the-art results in response selection and topic disentanglement tasks outperforming existing methods by a good margin.</abstract>
       <url hash="409291ac">2020.emnlp-main.533</url>
@@ -7228,7 +7228,7 @@
       <author><first>Jing</first><last>Li</last></author>
       <author><first>Xingshan</first><last>Zeng</last></author>
       <author><first>Haisong</first><last>Zhang</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <pages>6640–6650</pages>
       <abstract>Quotations are crucial for successful explanations and persuasions in interpersonal communications. However, finding what to quote in a conversation is challenging for both humans and machines. This work studies automatic quotation generation in an online conversation and explores how language consistency affects whether a quotation fits the given context. Here, we capture the contextual consistency of a quotation in terms of latent topics, interactions with the dialogue history, and coherence to the query turn’s existing contents. Further, an encoder-decoder neural framework is employed to continue the context with a quotation via language generation. Experiment results on two large-scale datasets in English and Chinese demonstrate that our quotation generation model outperforms the state-of-the-art models. Further analysis shows that topic, interaction, and query consistency are all helpful to learn how to quote in online conversations.</abstract>
       <url hash="6715f31b">2020.emnlp-main.538</url>
@@ -7239,7 +7239,7 @@
       <title>Profile Consistency Identification for Open-domain Dialogue Agents</title>
       <author><first>Haoyu</first><last>Song</last></author>
       <author><first>Yan</first><last>Wang</last></author>
-      <author><first>Wei-Nan</first><last>Zhang</last></author>
+      <author id="weinan-zhang"><first>Wei-Nan</first><last>Zhang</last></author>
       <author><first>Zhengyu</first><last>Zhao</last></author>
       <author><first>Ting</first><last>Liu</last></author>
       <author><first>Xiaojiang</first><last>Liu</last></author>
@@ -7360,7 +7360,7 @@
       <author><first>Mucheng</first><last>Ren</last></author>
       <author><first>Xiubo</first><last>Geng</last></author>
       <author><first>Tao</first><last>Qin</last></author>
-      <author><first>Heyan</first><last>Huang</last></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last></author>
       <author><first>Daxin</first><last>Jiang</last></author>
       <pages>6745–6758</pages>
       <abstract>We focus on the task of reasoning over paragraph effects in situation, which requires a model to understand the cause and effect described in a background paragraph, and apply the knowledge to a novel situation. Existing works ignore the complicated reasoning process and solve it with a one-step “black box” model. Inspired by human cognitive processes, in this paper we propose a sequential approach for this task which explicitly models each step of the reasoning process with neural network modules. In particular, five reasoning modules are designed and learned in an end-to-end manner, which leads to a more interpretable model. Experimental results on the ROPES dataset demonstrate the effectiveness and explainability of our proposed approach.</abstract>
@@ -7396,7 +7396,7 @@
       <author><first>Ledell</first><last>Wu</last></author>
       <author><first>Sergey</first><last>Edunov</last></author>
       <author><first>Danqi</first><last>Chen</last></author>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <pages>6769–6781</pages>
       <abstract>Open-domain question answering relies on efficient passage retrieval to select candidate contexts, where traditional sparse vector space models, such as TF-IDF or BM25, are the de facto method. In this work, we show that retrieval can be practically implemented using dense representations alone, where embeddings are learned from a small number of questions and passages by a simple dual-encoder framework. When evaluated on a wide range of open-domain QA datasets, our dense retriever outperforms a strong Lucene-BM25 system greatly by 9%-19% absolute in terms of top-20 passage retrieval accuracy, and helps our end-to-end QA system establish new state-of-the-art on multiple open-domain QA benchmarks.</abstract>
       <url hash="351ca9c5">2020.emnlp-main.550</url>
@@ -7444,7 +7444,7 @@
     <paper id="554">
       <title><fixed-case>L</fixed-case>earning <fixed-case>M</fixed-case>usic <fixed-case>H</fixed-case>elps <fixed-case>Y</fixed-case>ou <fixed-case>R</fixed-case>ead: <fixed-case>U</fixed-case>sing Transfer to Study Linguistic Structure in Language Models</title>
       <author><first>Isabel</first><last>Papadimitriou</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <pages>6829–6839</pages>
       <abstract>We propose transfer learning as a method for analyzing the encoding of grammatical structure in neural language models. We train LSTMs on non-linguistic data and evaluate their performance on natural language to assess which kinds of data induce generalizable structural features that LSTMs can use for natural language. We find that training on non-linguistic data with latent structure (MIDI music or Java code) improves test performance on natural language, despite no overlap in surface form or vocabulary. To pinpoint the kinds of abstract structure that models may be encoding to lead to this improvement, we run similar experiments with two artificial parentheses languages: one which has a hierarchical recursive structure, and a control which has paired tokens but no recursion. Surprisingly, training a model on either of these artificial languages leads the same substantial gains when testing on natural language. Further experiments on transfer between natural languages controlling for vocabulary overlap show that zero-shot performance on a test language is highly correlated with typological syntactic similarity to the training language, suggesting that representations induced by pre-training correspond to the cross-linguistic syntactic properties. Our results provide insights into the ways that neural models represent abstract syntactic structure, and also about the kind of structural inductive biases which allow for natural language acquisition.</abstract>
       <url hash="d2d67a12">2020.emnlp-main.554</url>
@@ -7477,7 +7477,7 @@
     </paper>
     <paper id="557">
       <title><fixed-case>B</fixed-case>irds have four legs?! <fixed-case>N</fixed-case>umer<fixed-case>S</fixed-case>ense: <fixed-case>P</fixed-case>robing <fixed-case>N</fixed-case>umerical <fixed-case>C</fixed-case>ommonsense <fixed-case>K</fixed-case>nowledge of <fixed-case>P</fixed-case>re-<fixed-case>T</fixed-case>rained <fixed-case>L</fixed-case>anguage <fixed-case>M</fixed-case>odels</title>
-      <author><first>Bill Yuchen</first><last>Lin</last></author>
+      <author id="bill-yuchen-lin"><first>Bill Yuchen</first><last>Lin</last></author>
       <author><first>Seyeon</first><last>Lee</last></author>
       <author><first>Rahul</first><last>Khanna</last></author>
       <author><first>Xiang</first><last>Ren</last></author>
@@ -7492,8 +7492,8 @@
       <title>Grounded Adaptation for Zero-shot Executable Semantic Parsing</title>
       <author><first>Victor</first><last>Zhong</last></author>
       <author><first>Mike</first><last>Lewis</last></author>
-      <author><first>Sida I.</first><last>Wang</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="sida-i-wang"><first>Sida I.</first><last>Wang</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>6869–6882</pages>
       <abstract>We propose Grounded Adaptation for Zeroshot Executable Semantic Parsing (GAZP) to adapt an existing semantic parser to new environments (e.g. new database schemas). GAZP combines a forward semantic parser with a backward utterance generator to synthesize data (e.g. utterances and SQL queries) in the new environment, then selects cycle-consistent examples to adapt the parser. Unlike data-augmentation, which typically synthesizes unverified examples in the training environment, GAZP synthesizes examples in the new environment whose input-output consistency are verified through execution. On the Spider, Sparc, and CoSQL zero-shot semantic parsing tasks, GAZP improves logical form and execution accuracy of the baseline parser. Our analyses show that GAZP outperforms data-augmentation in the training environment, performance increases with the amount of GAZP-synthesized data, and cycle-consistency is central to successful adaptation.</abstract>
       <url hash="a66d104e">2020.emnlp-main.558</url>
@@ -7506,7 +7506,7 @@
       <title>An Imitation Game for Learning Semantic Parsers from User Interaction</title>
       <author><first>Ziyu</first><last>Yao</last></author>
       <author><first>Yiqi</first><last>Tang</last></author>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <author><first>Huan</first><last>Sun</last></author>
       <author><first>Yu</first><last>Su</last></author>
       <pages>6883–6902</pages>
@@ -7565,7 +7565,7 @@
       <author><first>Jianqiang</first><last>Ma</last></author>
       <author><first>Zeyu</first><last>Yan</last></author>
       <author><first>Shuai</first><last>Pang</last></author>
-      <author id="yang-zhang"><first>Yang</first><last>Zhang</last></author>
+      <author><first>Yang</first><last>Zhang</last></author>
       <author><first>Jianping</first><last>Shen</last></author>
       <pages>6936–6942</pages>
       <abstract>On the WikiSQL benchmark, state-of-the-art text-to-SQL systems typically take a slot- filling approach by building several dedicated models for each type of slots. Such modularized systems are not only complex but also of limited capacity for capturing inter-dependencies among SQL clauses. To solve these problems, this paper proposes a novel extraction-linking approach, where a unified extractor recognizes all types of slot mentions appearing in the question sentence before a linker maps the recognized columns to the table schema to generate executable SQL queries. Trained with automatically generated annotations, the proposed method achieves the first place on the WikiSQL benchmark.</abstract>
@@ -7582,7 +7582,7 @@
       <author><first>Tian</first><last>Gan</last></author>
       <author><first>Wei</first><last>Lu</last></author>
       <author><first>Min-Yen</first><last>Kan</last></author>
-      <author><first>Tat-Seng</first><last>Chua</last></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last></author>
       <pages>6943–6954</pages>
       <abstract>In existing sophisticated text-to-SQL models, schema linking is often considered as a simple, minor component, belying its importance. By providing a schema linking corpus based on the Spider text-to-SQL dataset, we systematically study the role of schema linking. We also build a simple BERT-based baseline, called Schema-Linking SQL (SLSQL) to perform a data-driven study. We find when schema linking is done well, SLSQL demonstrates good performance on Spider despite its structural simplicity. Many remaining errors are attributable to corpus noise. This suggests schema linking is the crux for the current text-to-SQL task. Our analytic studies provide insights on the characteristics of schema linking for future developments of text-to-SQL tasks.</abstract>
       <url hash="dbaff008">2020.emnlp-main.564</url>
@@ -7755,7 +7755,7 @@
       <author><first>Martin</first><last>Schmitt</last></author>
       <author><first>Sahand</first><last>Sharifzadeh</last></author>
       <author><first>Volker</first><last>Tresp</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>7117–7130</pages>
       <abstract>Knowledge graphs (KGs) can vary greatly from one domain to another. Therefore supervised approaches to both graph-to-text generation and text-to-graph knowledge extraction (semantic parsing) will always suffer from a shortage of domain-specific parallel graph-text data; at the same time, adapting a model trained on a different domain is often impossible due to little or no overlap in entities and relations. This situation calls for an approach that (1) does not need large amounts of annotated data and thus (2) does not need to rely on domain adaptation techniques to work well on different domains. To this end, we present the first approach to unsupervised text generation from KGs and show simultaneously how it can be used for unsupervised semantic parsing. We evaluate our approach on WebNLG v2.1 and a new benchmark leveraging scene graphs from Visual Genome. Our system outperforms strong baselines for both text&lt;-&gt;graph conversion tasks without any manual adaptation from one dataset to the other. In additional experiments, we investigate the impact of using different unsupervised objectives.</abstract>
       <url hash="d993bfb7">2020.emnlp-main.577</url>
@@ -7781,7 +7781,7 @@
       <author><first>Qinzhuo</first><last>Wu</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
       <author><first>Jinlan</first><last>Fu</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>7137–7146</pages>
       <abstract>With the advancements in natural language processing tasks, math word problem solving has received increasing attention. Previous methods have achieved promising results but ignore background common-sense knowledge not directly provided by the problem. In addition, during generation, they focus on local features while neglecting global information. To incorporate external knowledge and global expression information, we propose a novel knowledge-aware sequence-to-tree (KA-S2T) network in which the entities in the problem sequences and their categories are modeled as an entity graph. Based on this entity graph, a graph attention network is used to capture knowledge-aware problem representations. Further, we use a tree-structured decoder with a state aggregation mechanism to capture the long-distance dependency and global expression information. Experimental results on the Math23K dataset revealed that the KA-S2T model can achieve better performance than previously reported best results.</abstract>
       <url hash="af08fb74">2020.emnlp-main.579</url>
@@ -7854,7 +7854,7 @@
       <title><fixed-case>XL</fixed-case>-<fixed-case>W</fixed-case>i<fixed-case>C</fixed-case>: A Multilingual Benchmark for Evaluating Semantic Contextualization</title>
       <author><first>Alessandro</first><last>Raganato</last></author>
       <author><first>Tommaso</first><last>Pasini</last></author>
-      <author><first>Jose</first><last>Camacho-Collados</last></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></author>
       <author><first>Mohammad Taher</first><last>Pilehvar</last></author>
       <pages>7193–7206</pages>
       <abstract>The ability to correctly model distinct meanings of a word is crucial for the effectiveness of semantic representation techniques. However, most existing evaluation benchmarks for assessing this criterion are tied to sense inventories (usually WordNet), restricting their usage to a small subset of knowledge-based representation techniques. The Word-in-Context dataset (WiC) addresses the dependence on sense inventories by reformulating the standard disambiguation task as a binary classification problem; but, it is limited to the English language. We put forward a large multilingual benchmark, XL-WiC, featuring gold standards in 12 new languages from varied language families and with different degrees of resource availability, opening room for evaluation scenarios such as zero-shot cross-lingual transfer. We perform a series of experiments to determine the reliability of the datasets and to set performance baselines for several recent contextualized multilingual models. Experimental results show that even when no tagged instances are available for a target language, models trained solely on the English data can attain competitive performance in the task of distinguishing different meanings of a word, even for distant languages. XL-WiC is available at <url>https://pilehvar.github.io/xlwic/</url>.</abstract>
@@ -7893,7 +7893,7 @@
     <paper id="587">
       <title>Cross-lingual Spoken Language Understanding with Regularized Representation Alignment</title>
       <author><first>Zihan</first><last>Liu</last></author>
-      <author><first>Genta Indra</first><last>Winata</last></author>
+      <author id="genta-indra-winata"><first>Genta Indra</first><last>Winata</last></author>
       <author><first>Peng</first><last>Xu</last></author>
       <author><first>Zhaojiang</first><last>Lin</last></author>
       <author><first>Pascale</first><last>Fung</last></author>
@@ -7924,7 +7924,7 @@
       <author><first>Dhiraj</first><last>Madan</last></author>
       <author><first>Danish</first><last>Contractor</last></author>
       <author id="harshit-kumar"><first>Harshit</first><last>Kumar</last></author>
-      <author><first>Sachindra</first><last>Joshi</last></author>
+      <author id="sachindra-joshi"><first>Sachindra</first><last>Joshi</last></author>
       <pages>7263–7269</pages>
       <abstract>Neural Conversational QA tasks such as ShARC require systems to answer questions based on the contents of a given passage. On studying recent state-of-the-art models on the ShARC QA task, we found indications that the model(s) learn spurious clues/patterns in the data-set. Further, a heuristic-based program, built to exploit these patterns, had comparative performance to that of the neural models. In this paper we share our findings about the four types of patterns in the ShARC corpus and how the neural models exploit them. Motivated by the above findings, we create and share a modified data-set that has fewer spurious patterns than the original data-set, consequently allowing models to learn better.</abstract>
       <url hash="a2b16a53">2020.emnlp-main.589</url>
@@ -8042,7 +8042,7 @@
     </paper>
     <paper id="598">
       <title><fixed-case>BERT</fixed-case> Knows <fixed-case>P</fixed-case>unta <fixed-case>C</fixed-case>ana is not just beautiful, it’s gorgeous: Ranking Scalar Adjectives with Contextualised Representations</title>
-      <author><first>Aina</first><last>Garí Soler</last></author>
+      <author id="aina-gari-soler"><first>Aina</first><last>Garí Soler</last></author>
       <author><first>Marianna</first><last>Apidianaki</last></author>
       <pages>7371–7385</pages>
       <abstract>Adjectives like pretty, beautiful and gorgeous describe positive properties of the nouns they modify but with different intensity. These differences are important for natural language understanding and reasoning. We propose a novel BERT-based approach to intensity detection for scalar adjectives. We model intensity by vectors directly derived from contextualised representations and show they can successfully rank scalar adjectives. We evaluate our models both intrinsically, on gold standard datasets, and on an Indirect Question Answering task. Our results demonstrate that BERT encodes rich knowledge about the semantics of scalar adjectives, and is able to provide better quality intensity rankings than static embeddings and previous models with access to dedicated resources.</abstract>
@@ -8129,7 +8129,7 @@
       <title>Keeping Up Appearances: Computational Modeling of Face Acts in Persuasion Oriented Discussions</title>
       <author><first>Ritam</first><last>Dutt</last></author>
       <author><first>Rishabh</first><last>Joshi</last></author>
-      <author><first>Carolyn</first><last>Rose</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rose</last></author>
       <pages>7473–7485</pages>
       <abstract>The notion of face refers to the public self-image of an individual that emerges both from the individual’s own actions as well as from the interaction with others. Modeling face and understanding its state changes throughout a conversation is critical to the study of maintenance of basic human needs in and through interaction. Grounded in the politeness theory of Brown and Levinson (1978), we propose a generalized framework for modeling face acts in persuasion conversations, resulting in a reliable coding manual, an annotated corpus, and computational models. The framework reveals insights about differences in face act utilization between asymmetric roles in persuasion conversations. Using computational models, we are able to successfully identify face acts as well as predict a key conversational outcome (e.g. donation success). Finally, we model a latent representation of the conversational state to analyze the impact of predicted face acts on the probability of a positive conversational outcome and observe several correlations that corroborate previous findings.</abstract>
       <url hash="f5c44f2a">2020.emnlp-main.605</url>
@@ -8185,7 +8185,7 @@
       <author><first>David</first><last>Wadden</last></author>
       <author><first>Shanchuan</first><last>Lin</last></author>
       <author><first>Kyle</first><last>Lo</last></author>
-      <author><first>Lucy Lu</first><last>Wang</last></author>
+      <author id="lucy-lu-wang"><first>Lucy Lu</first><last>Wang</last></author>
       <author><first>Madeleine</first><last>van Zuylen</last></author>
       <author><first>Arman</first><last>Cohan</last></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last></author>
@@ -8200,7 +8200,7 @@
       <title>Semantic Role Labeling as Syntactic Dependency Parsing</title>
       <author><first>Tianze</first><last>Shi</last></author>
       <author><first>Igor</first><last>Malioutov</last></author>
-      <author><first>Ozan</first><last>Irsoy</last></author>
+      <author id="ozan-irsoy"><first>Ozan</first><last>Irsoy</last></author>
       <pages>7551–7571</pages>
       <abstract>We reduce the task of (span-based) PropBank-style semantic role labeling (SRL) to syntactic dependency parsing. Our approach is motivated by our empirical analysis that shows three common syntactic patterns account for over 98% of the SRL annotations for both English and Chinese data. Based on this observation, we present a conversion scheme that packs SRL annotations into dependency tree representations through joint labels that permit highly accurate recovery back to the original format. This representation allows us to train statistical dependency parsers to tackle SRL and achieve competitive performance with the current state of the art. Our findings show the promise of syntactic dependency trees in encoding semantic role relations within their syntactic domain of locality, and point to potential further integration of syntactic methods into semantic role labeling in the future.</abstract>
       <url hash="bccc2aeb">2020.emnlp-main.610</url>
@@ -8237,7 +8237,7 @@
     <paper id="613">
       <title>Towards Debiasing <fixed-case>NLU</fixed-case> Models from Unknown Biases</title>
       <author><first>Prasetya Ajie</first><last>Utama</last></author>
-      <author><first>Nafise Sadat</first><last>Moosavi</last></author>
+      <author id="nafise-sadat-moosavi"><first>Nafise Sadat</first><last>Moosavi</last></author>
       <author><first>Iryna</first><last>Gurevych</last></author>
       <pages>7597–7610</pages>
       <abstract>NLU models often exploit biases to achieve high dataset-specific performance without properly learning the intended task. Recently proposed debiasing methods are shown to be effective in mitigating this tendency. However, these methods rely on a major assumption that the types of bias should be known a-priori, which limits their application to many NLU tasks and datasets. In this work, we present the first step to bridge this gap by introducing a self-debiasing framework that prevents models from mainly utilizing biases without knowing them in advance. The proposed framework is general and complementary to the existing debiasing methods. We show that it allows these existing methods to retain the improvement on the challenge datasets (i.e., sets of examples designed to expose models’ reliance on biases) without specifically targeting certain biases. Furthermore, the evaluation suggests that applying the framework results in improved overall robustness.</abstract>
@@ -8248,7 +8248,7 @@
     </paper>
     <paper id="614">
       <title>On the Role of Supervision in Unsupervised Constituency Parsing</title>
-      <author><first>Haoyue</first><last>Shi</last></author>
+      <author id="freda-shi"><first>Haoyue</first><last>Shi</last></author>
       <author><first>Karen</first><last>Livescu</last></author>
       <author><first>Kevin</first><last>Gimpel</last></author>
       <pages>7611–7621</pages>
@@ -8300,8 +8300,8 @@
     <paper id="618">
       <title>Translation Artifacts in Cross-lingual Transfer Learning</title>
       <author><first>Mikel</first><last>Artetxe</last></author>
-      <author><first>Gorka</first><last>Labaka</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="gorka-labaka"><first>Gorka</first><last>Labaka</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <pages>7674–7684</pages>
       <abstract>Both human and machine translation play a central role in cross-lingual transfer learning: many multilingual datasets have been created through professional translation services, and using machine translation to translate either the test set or the training set is a widely used transfer technique. In this paper, we show that such translation process can introduce subtle artifacts that have a notable impact in existing cross-lingual models. For instance, in natural language inference, translating the premise and the hypothesis independently can reduce the lexical overlap between them, which current models are highly sensitive to. We show that some previous findings in cross-lingual transfer learning need to be reconsidered in the light of this phenomenon. Based on the gained insights, we also improve the state-of-the-art in XNLI for the translate-test and zero-shot approaches by 4.3 and 2.8 points, respectively.</abstract>
       <url hash="c674698c">2020.emnlp-main.618</url>
@@ -8314,7 +8314,7 @@
       <author><first>Ramit</first><last>Sawhney</last></author>
       <author><first>Harshit</first><last>Joshi</last></author>
       <author><first>Saumya</first><last>Gandhi</last></author>
-      <author><first>Rajiv Ratn</first><last>Shah</last></author>
+      <author id="rajiv-shah"><first>Rajiv Ratn</first><last>Shah</last></author>
       <pages>7685–7697</pages>
       <abstract>Social media’s ubiquity fosters a space for users to exhibit suicidal thoughts outside of traditional clinical settings. Understanding the build-up of such ideation is critical for the identification of at-risk users and suicide prevention. Suicide ideation is often linked to a history of mental depression. The emotional spectrum of a user’s historical activity on social media can be indicative of their mental state over time. In this work, we focus on identifying suicidal intent in English tweets by augmenting linguistic models with historical context. We propose STATENet, a time-aware transformer based model for preliminary screening of suicidal risk on social media. STATENet outperforms competitive methods, demonstrating the utility of emotional and temporal contextual cues for suicide risk assessment. We discuss the empirical, qualitative, practical, and ethical aspects of STATENet for suicide ideation detection.</abstract>
       <url hash="8b8e971b">2020.emnlp-main.619</url>
@@ -8335,7 +8335,7 @@
     </paper>
     <paper id="621">
       <title>Where Are the Facts? Searching for Fact-checked Information to Alleviate the Spread of Fake News</title>
-      <author><first>Nguyen</first><last>Vo</last></author>
+      <author id="nguyen-vo"><first>Nguyen</first><last>Vo</last></author>
       <author><first>Kyumin</first><last>Lee</last></author>
       <pages>7717–7731</pages>
       <abstract>Although many fact-checking systems have been developed in academia and industry, fake news is still proliferating on social media. These systems mostly focus on fact-checking but usually neglect online users who are the main drivers of the spread of misinformation. How can we use fact-checked information to improve users’ consciousness of fake news to which they are exposed? How can we stop users from spreading fake news? To tackle these questions, we propose a novel framework to search for fact-checking articles, which address the content of an original tweet (that may contain misinformation) posted by online users. The search can directly warn fake news posters and online users (e.g. the posters’ followers) about misinformation, discourage them from spreading fake news, and scale up verified content on social media. Our framework uses both text and images to search for fact-checking articles, and achieves promising results on real-world datasets. Our code and datasets are released at <url>https://github.com/nguyenvo09/EMNLP2020</url>.</abstract>
@@ -8398,8 +8398,8 @@
       <title><fixed-case>M</fixed-case>ed<fixed-case>F</fixed-case>ilter: <fixed-case>I</fixed-case>mproving <fixed-case>E</fixed-case>xtraction of <fixed-case>T</fixed-case>ask-relevant <fixed-case>U</fixed-case>tterances through <fixed-case>I</fixed-case>ntegration of <fixed-case>D</fixed-case>iscourse <fixed-case>S</fixed-case>tructure and <fixed-case>O</fixed-case>ntological <fixed-case>K</fixed-case>nowledge</title>
       <author><first>Sopan</first><last>Khosla</last></author>
       <author><first>Shikhar</first><last>Vashishth</last></author>
-      <author><first>Jill Fain</first><last>Lehman</last></author>
-      <author><first>Carolyn</first><last>Rose</last></author>
+      <author id="jill-fain-lehman"><first>Jill Fain</first><last>Lehman</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rose</last></author>
       <pages>7781–7797</pages>
       <abstract>Information extraction from conversational data is particularly challenging because the task-centric nature of conversation allows for effective communication of implicit information by humans, but is challenging for machines. The challenges may differ between utterances depending on the role of the speaker within the conversation, especially when relevant expertise is distributed asymmetrically across roles. Further, the challenges may also increase over the conversation as more shared context is built up through information communicated implicitly earlier in the dialogue. In this paper, we propose the novel modeling approach MedFilter, which addresses these insights in order to increase performance at identifying and categorizing task-relevant utterances, and in so doing, positively impacts performance at a downstream information extraction task. We evaluate this approach on a corpus of nearly 7,000 doctor-patient conversations where MedFilter is used to identify medically relevant contributions to the discussion (achieving a 10% improvement over SOTA baselines in terms of area under the PR curve). Identifying task-relevant utterances benefits downstream medical processing, achieving improvements of 15%, 105%, and 23% respectively for the extraction of symptoms, medications, and complaints.</abstract>
       <url hash="f3595ad3">2020.emnlp-main.626</url>
@@ -8451,7 +8451,7 @@
       <title><fixed-case>E</fixed-case>ntity <fixed-case>L</fixed-case>inking in 100 <fixed-case>L</fixed-case>anguages</title>
       <author><first>Jan A.</first><last>Botha</last></author>
       <author><first>Zifei</first><last>Shan</last></author>
-      <author><first>Daniel</first><last>Gillick</last></author>
+      <author id="dan-gillick"><first>Daniel</first><last>Gillick</last></author>
       <pages>7833–7845</pages>
       <abstract>We propose a new formulation for multilingual entity linking, where language-specific mentions resolve to a language-agnostic Knowledge Base. We train a dual encoder in this new setting, building on prior work with improved feature representation, negative mining, and an auxiliary entity-pairing task, to obtain a single entity retrieval model that covers 100+ languages and 20 million entities. The model outperforms state-of-the-art results from a far more limited cross-lingual linking task. Rare entities and low-resource languages pose challenges at this large-scale, so we advocate for an increased focus on zero- and few-shot evaluation. To this end, we provide Mewsli-9, a large new multilingual dataset matched to our setting, and show how frequency-based analysis provided key insights for our model and training enhancements.</abstract>
       <url hash="9d50dc53">2020.emnlp-main.630</url>
@@ -8462,7 +8462,7 @@
     <paper id="631">
       <title><fixed-case>P</fixed-case>atch<fixed-case>BERT</fixed-case>: Just-in-Time, Out-of-Vocabulary Patching</title>
       <author><first>Sangwhan</first><last>Moon</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <pages>7846–7852</pages>
       <abstract>Large scale pre-trained language models have shown groundbreaking performance improvements for transfer learning in the domain of natural language processing. In our paper, we study a pre-trained multilingual BERT model and analyze the OOV rate on downstream tasks, how it introduces information loss, and as a side-effect, obstructs the potential of the underlying model. We then propose multiple approaches for mitigation and demonstrate that it improves performance with the same parameter count when combined with fine-tuning.</abstract>
       <url hash="564f8927">2020.emnlp-main.631</url>
@@ -8514,7 +8514,7 @@
     </paper>
     <paper id="635">
       <title>Exploring and Predicting Transferability across <fixed-case>NLP</fixed-case> Tasks</title>
-      <author><first>Tu</first><last>Vu</last></author>
+      <author id="tu-vu"><first>Tu</first><last>Vu</last></author>
       <author><first>Tong</first><last>Wang</last></author>
       <author><first>Tsendsuren</first><last>Munkhdalai</last></author>
       <author><first>Alessandro</first><last>Sordoni</last></author>
@@ -8537,7 +8537,7 @@
       <author><first>Smaranda</first><last>Muresan</last></author>
       <author><first>Jie</first><last>Ma</last></author>
       <author><first>Faisal</first><last>Ladhak</last></author>
-      <author><first>Yaser</first><last>Al-Onaizan</last></author>
+      <author id="yaser-al-onaizan"><first>Yaser</first><last>Al-Onaizan</last></author>
       <pages>7927–7934</pages>
       <abstract>Leveraging large amounts of unlabeled data using Transformer-like architectures, like BERT, has gained popularity in recent times owing to their effectiveness in learning general representations that can then be further fine-tuned for downstream tasks to much success. However, training these models can be costly both from an economic and environmental standpoint. In this work, we investigate how to effectively use unlabeled data: by exploring the task-specific semi-supervised approach, Cross-View Training (CVT) and comparing it with task-agnostic BERT in multiple settings that include domain and task relevant English data. CVT uses a much lighter model architecture and we show that it achieves similar performance to BERT on a set of sequence tagging tasks, with lesser financial and environmental impact.</abstract>
       <url hash="d160c724">2020.emnlp-main.636</url>
@@ -8559,7 +8559,7 @@
     </paper>
     <paper id="638">
       <title><fixed-case>A</fixed-case>ctive <fixed-case>L</fixed-case>earning for <fixed-case>BERT</fixed-case>: <fixed-case>A</fixed-case>n <fixed-case>E</fixed-case>mpirical <fixed-case>S</fixed-case>tudy</title>
-      <author><first>Liat</first><last>Ein-Dor</last></author>
+      <author id="liat-ein-dor"><first>Liat</first><last>Ein-Dor</last></author>
       <author><first>Alon</first><last>Halfon</last></author>
       <author><first>Ariel</first><last>Gera</last></author>
       <author><first>Eyal</first><last>Shnarch</last></author>
@@ -8592,7 +8592,7 @@
       <author><first>Matthew</first><last>Khoury</last></author>
       <author><first>Rumen</first><last>Dangovski</last></author>
       <author><first>Longwu</first><last>Ou</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Yichen</first><last>Shen</last></author>
       <author><first>Li</first><last>Jing</last></author>
       <pages>7975–7984</pages>
@@ -8618,8 +8618,8 @@
     <paper id="642">
       <title>The role of context in neural pitch accent detection in <fixed-case>E</fixed-case>nglish</title>
       <author><first>Elizabeth</first><last>Nielsen</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
-      <author><first>Sharon</first><last>Goldwater</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
       <pages>7994–8000</pages>
       <abstract>Prosody is a rich information source in natural language, serving as a marker for phenomena such as contrast. In order to make this information available to downstream tasks, we need a way to detect prosodic events in speech. We propose a new model for pitch accent detection, inspired by the work of Stehwien et al. (2018), who presented a CNN-based model for this task. Our model makes greater use of context by using full utterances as input and adding an LSTM layer. We find that these innovations lead to an improvement from 87.5% to 88.7% accuracy on pitch accent detection on American English speech in the Boston University Radio News Corpus, a state-of-the-art result. We also find that a simple baseline that just predicts a pitch accent on every content word yields 82.2% accuracy, and we suggest that this is the appropriate baseline for this task. Finally, we conduct ablation tests that show pitch is the most important acoustic feature for this task and this corpus.</abstract>
       <url hash="b36bf67d">2020.emnlp-main.642</url>
@@ -8634,7 +8634,7 @@
       <author><first>Arshiya</first><last>Aggarwal</last></author>
       <author><first>Taru</first><last>Jain</last></author>
       <author><first>Puneet</first><last>Mathur</last></author>
-      <author><first>Rajiv Ratn</first><last>Shah</last></author>
+      <author id="rajiv-shah"><first>Rajiv Ratn</first><last>Shah</last></author>
       <pages>8001–8013</pages>
       <abstract>Natural language processing has recently made stock movement forecasting and volatility forecasting advances, leading to improved financial forecasting. Transcripts of companies’ earnings calls are well studied for risk modeling, offering unique investment insight into stock performance. However, vocal cues in the speech of company executives present an underexplored rich source of natural language data for estimating financial risk. Additionally, most existing approaches ignore the correlations between stocks. Building on existing work, we introduce a neural model for stock volatility prediction that accounts for stock interdependence via graph convolutions while fusing verbal, vocal, and financial features in a semi-supervised multi-task risk forecasting formulation. Our proposed model, VolTAGE, outperforms existing methods demonstrating the effectiveness of multimodal learning for volatility prediction.</abstract>
       <url hash="fd5f3da4">2020.emnlp-main.643</url>
@@ -8659,8 +8659,8 @@
       <author><first>Haimin</first><last>Zhang</last></author>
       <author><first>Debanjan</first><last>Mahata</last></author>
       <author><first>Rakesh</first><last>Gosangi</last></author>
-      <author><first>Rajiv Ratn</first><last>Shah</last></author>
-      <author><first>Amanda</first><last>Stent</last></author>
+      <author id="rajiv-shah"><first>Rajiv Ratn</first><last>Shah</last></author>
+      <author id="amanda-stent"><first>Amanda</first><last>Stent</last></author>
       <pages>8021–8030</pages>
       <abstract>We introduce a new keyphrase generation approach using Generative Adversarial Networks (GANs). For a given document, the generator produces a sequence of keyphrases, and the discriminator distinguishes between human-curated and machine-generated keyphrases. We evaluated this approach on standard benchmark datasets. We observed that our model achieves state-of-the-art performance in the generation of abstractive keyphrases and is comparable to the best performing extractive techniques. Although we achieve promising results using GANs, they are not significantly better than the state-of-the-art generative models. To our knowledge, this is one of the first works that use GANs for keyphrase generation. We present a detailed analysis of our observations and expect that these findings would help other researchers to further study the use of GANs for the task of keyphrase generation.</abstract>
       <url hash="cf78f526">2020.emnlp-main.645</url>
@@ -8672,7 +8672,7 @@
       <title><fixed-case>TESA</fixed-case>: A <fixed-case>T</fixed-case>ask in <fixed-case>E</fixed-case>ntity <fixed-case>S</fixed-case>emantic <fixed-case>A</fixed-case>ggregation for Abstractive Summarization</title>
       <author><first>Clément</first><last>Jumel</last></author>
       <author><first>Annie</first><last>Louis</last></author>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <pages>8031–8050</pages>
       <abstract>Human-written texts contain frequent generalizations and semantic aggregation of content. In a document, they may refer to a pair of named entities such as ‘London’ and ‘Paris’ with different expressions: “the major cities”, “the capital cities” and “two European cities”. Yet generation, especially, abstractive summarization systems have so far focused heavily on paraphrasing and simplifying the source content, to the exclusion of such semantic abstraction capabilities. In this paper, we present a new dataset and task aimed at the semantic aggregation of entities. TESA contains a dataset of 5.3K crowd-sourced entity aggregations of Person, Organization, and Location named entities. The aggregations are document-appropriate, meaning that they are produced by annotators to match the situational context of a given news article from the New York Times. We then build baseline models for generating aggregations given a tuple of entities and document context. We finetune on TESA an encoder-decoder language model and compare it with simpler classification methods based on linguistically informed features. Our quantitative and qualitative evaluations show reasonable performance in making a choice from a given list of expressions, but free-form expressions are understandably harder to generate and evaluate.</abstract>
       <url hash="fefa09eb">2020.emnlp-main.646</url>
@@ -8709,7 +8709,7 @@
     <paper id="649">
       <title>Intrinsic Evaluation of Summarization Datasets</title>
       <author><first>Rishi</first><last>Bommasani</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>8075–8096</pages>
       <abstract>High quality data forms the bedrock for building meaningful statistical models in NLP. Consequently, data quality must be evaluated either during dataset construction or *post hoc*. Almost all popular summarization datasets are drawn from natural sources and do not come with inherent quality assurance guarantees. In spite of this, data quality has gone largely unquestioned for many of these recent datasets. We perform the first large-scale evaluation of summarization datasets by introducing 5 intrinsic metrics and applying them to 10 popular datasets. We find that data usage in recent summarization research is sometimes inconsistent with the underlying properties of the data. Further, we discover that our metrics can serve the additional purpose of being inexpensive heuristics for detecting generically low quality examples.</abstract>
       <url hash="28540860">2020.emnlp-main.649</url>
@@ -8739,7 +8739,7 @@
       <title>Conversational Semantic Parsing for Dialog State Tracking</title>
       <author><first>Jianpeng</first><last>Cheng</last></author>
       <author><first>Devang</first><last>Agrawal</last></author>
-      <author><first>Héctor</first><last>Martínez Alonso</last></author>
+      <author id="hector-martinez-alonso"><first>Héctor</first><last>Martínez Alonso</last></author>
       <author><first>Shruti</first><last>Bhargava</last></author>
       <author><first>Joris</first><last>Driesen</last></author>
       <author><first>Federico</first><last>Flego</last></author>
@@ -8747,10 +8747,10 @@
       <author><first>Dimitri</first><last>Kartsaklis</last></author>
       <author><first>Lin</first><last>Li</last></author>
       <author><first>Dhivya</first><last>Piraviperumal</last></author>
-      <author><first>Jason D.</first><last>Williams</last></author>
+      <author id="jason-d-williams"><first>Jason D.</first><last>Williams</last></author>
       <author><first>Hong</first><last>Yu</last></author>
       <author><first>Diarmuid</first><last>Ó Séaghdha</last></author>
-      <author><first>Anders</first><last>Johannsen</last></author>
+      <author id="anders-johannsen"><first>Anders</first><last>Johannsen</last></author>
       <pages>8107–8117</pages>
       <abstract>We consider a new perspective on dialog state tracking (DST), the task of estimating a user’s goal through the course of a dialog. By formulating DST as a semantic parsing task over hierarchical representations, we can incorporate semantic compositionality, cross-domain knowledge sharing and co-reference. We present TreeDST, a dataset of 27k conversations annotated with tree-structured dialog states and system acts. We describe an encoder-decoder framework for DST with hierarchical representations, which leads to ~20% improvement over state-of-the-art DST approaches that operate on a flat meaning space of slot-value pairs.</abstract>
       <url hash="304c3119">2020.emnlp-main.651</url>
@@ -8764,7 +8764,7 @@
       <author><first>Hui</first><last>Wan</last></author>
       <author><first>Chulaka</first><last>Gunasekara</last></author>
       <author><first>Siva</first><last>Patel</last></author>
-      <author><first>Sachindra</first><last>Joshi</last></author>
+      <author id="sachindra-joshi"><first>Sachindra</first><last>Joshi</last></author>
       <author><first>Luis</first><last>Lastras</last></author>
       <pages>8118–8128</pages>
       <abstract>We introduce doc2dial, a new dataset of goal-oriented dialogues that are grounded in the associated documents. Inspired by how the authors compose documents for guiding end users, we first construct dialogue flows based on the content elements that corresponds to higher-level relations across text sections as well as lower-level relations between discourse units within a section. Then we present these dialogue flows to crowd contributors to create conversational utterances. The dataset includes over 4500 annotated conversations with an average of 14 turns that are grounded in over 450 documents from four domains. Compared to the prior document-grounded dialogue datasets, this dataset covers a variety of dialogue scenes in information-seeking conversations. For evaluating the versatility of the dataset, we introduce multiple dialogue modeling tasks and present baseline approaches.</abstract>
@@ -8804,7 +8804,7 @@
     <paper id="655">
       <title>Information Seeking in the Spirit of Learning: A Dataset for Conversational Curiosity</title>
       <author><first>Pedro</first><last>Rodriguez</last></author>
-      <author><first>Paul</first><last>Crook</last></author>
+      <author id="paul-a-crook"><first>Paul</first><last>Crook</last></author>
       <author><first>Seungwhan</first><last>Moon</last></author>
       <author><first>Zhiguang</first><last>Wang</last></author>
       <pages>8153–8172</pages>
@@ -8833,7 +8833,7 @@
       <title>Discriminatively-<fixed-case>T</fixed-case>uned <fixed-case>G</fixed-case>enerative <fixed-case>C</fixed-case>lassifiers for <fixed-case>R</fixed-case>obust <fixed-case>N</fixed-case>atural <fixed-case>L</fixed-case>anguage <fixed-case>I</fixed-case>nference</title>
       <author><first>Xiaoan</first><last>Ding</last></author>
       <author><first>Tianyu</first><last>Liu</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <author><first>Zhifang</first><last>Sui</last></author>
       <author><first>Kevin</first><last>Gimpel</last></author>
       <pages>8189–8202</pages>
@@ -8845,7 +8845,7 @@
     </paper>
     <paper id="658">
       <title>New Protocols and Negative Results for Textual Entailment Data Collection</title>
-      <author><first>Samuel R.</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel R.</first><last>Bowman</last></author>
       <author><first>Jennimaria</first><last>Palomaki</last></author>
       <author><first>Livio</first><last>Baldini Soares</last></author>
       <author><first>Emily</first><last>Pitler</last></author>
@@ -8873,7 +8873,7 @@
       <title>Universal Natural Language Processing with Limited Annotations: Try Few-shot Textual Entailment as a Start</title>
       <author><first>Wenpeng</first><last>Yin</last></author>
       <author><first>Nazneen Fatema</first><last>Rajani</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <author><first>Richard</first><last>Socher</last></author>
       <author><first>Caiming</first><last>Xiong</last></author>
       <pages>8229–8239</pages>
@@ -8911,7 +8911,7 @@
       <title>Multitask Learning for Cross-Lingual Transfer of Broad-coverage Semantic Dependencies</title>
       <author><first>Maryam</first><last>Aminian</last></author>
       <author><first>Mohammad Sadegh</first><last>Rasooli</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>8268–8274</pages>
       <abstract>We describe a method for developing broad-coverage semantic dependency parsers for languages for which no semantically annotated resource is available. We leverage a multitask learning framework coupled with annotation projection. We use syntactic parsing as the auxiliary task in our multitask setup. Our annotation projection experiments from English to Czech show that our multitask setup yields 3.1% (4.2%) improvement in labeled F1-score on in-domain (out-of-domain) test set compared to a single-task baseline.</abstract>
       <url hash="c452cabe">2020.emnlp-main.663</url>
@@ -8924,7 +8924,7 @@
       <author><first>Haokun</first><last>Liu</last></author>
       <author><first>William</first><last>Huang</last></author>
       <author><first>Dhara</first><last>Mungra</last></author>
-      <author><first>Samuel R.</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel R.</first><last>Bowman</last></author>
       <pages>8275–8280</pages>
       <abstract>Performance on the Winograd Schema Challenge (WSC), a respected English commonsense reasoning benchmark, recently rocketed from chance accuracy to 89% on the SuperGLUE leaderboard, with relatively little corroborating evidence of a correspondingly large improvement in reasoning ability. We hypothesize that much of this improvement comes from recent changes in task formalization—the combination of input specification, loss function, and reuse of pretrained parameters—by users of the dataset, rather than improvements in the pretrained model’s reasoning ability. We perform an ablation on two Winograd Schema datasets that interpolates between the formalizations used before and after this surge, and find (i) framing the task as multiple choice improves performance dramatically and (ii)several additional techniques, including the reuse of a pretrained language modeling head, can mitigate the model’s extreme sensitivity to hyperparameters. We urge future benchmark creators to impose additional structure to minimize the impact of formalization decisions on reported results.</abstract>
       <url hash="d4b45e09">2020.emnlp-main.664</url>
@@ -8938,7 +8938,7 @@
       <author><first>Pasquale</first><last>Minervini</last></author>
       <author><first>Haim</first><last>Dubossarsky</last></author>
       <author><first>Sebastian</first><last>Riedel</last></author>
-      <author><first>Tim</first><last>Rocktäschel</last></author>
+      <author id="tim-rocktaschel"><first>Tim</first><last>Rocktäschel</last></author>
       <pages>8281–8291</pages>
       <abstract>Natural Language Inference (NLI) datasets contain annotation artefacts resulting in spurious correlations between the natural language utterances and their respective entailment classes. These artefacts are exploited by neural networks even when only considering the hypothesis and ignoring the premise, leading to unwanted biases. Belinkov et al. (2019b) proposed tackling this problem via adversarial training, but this can lead to learned sentence representations that still suffer from the same biases. We show that the bias can be reduced in the sentence representations by using an ensemble of adversaries, encouraging the model to jointly decrease the accuracy of these different adversaries while fitting the data. This approach produces more robust NLI models, outperforming previous de-biasing efforts when generalised to 12 other NLI datasets (Belinkov et al., 2019a; Mahabadi et al., 2020). In addition, we find that the optimal number of adversarial classifiers depends on the dimensionality of the sentence representations, with larger sentence representations being more difficult to de-bias while benefiting from using a greater number of adversaries.</abstract>
       <url hash="05bab017">2020.emnlp-main.665</url>
@@ -9028,7 +9028,7 @@
       <title><fixed-case>C</fixed-case>hapter <fixed-case>C</fixed-case>aptor: <fixed-case>T</fixed-case>ext <fixed-case>S</fixed-case>egmentation in <fixed-case>N</fixed-case>ovels</title>
       <author><first>Charuta</first><last>Pethe</last></author>
       <author><first>Allen</first><last>Kim</last></author>
-      <author><first>Steve</first><last>Skiena</last></author>
+      <author id="steven-skiena"><first>Steve</first><last>Skiena</last></author>
       <pages>8373–8383</pages>
       <abstract>Books are typically segmented into chapters and sections, representing coherent sub-narratives and topics. We investigate the task of predicting chapter boundaries, as a proxy for the general task of segmenting long texts. We build a Project Gutenberg chapter segmentation data set of 9,126 English novels, using a hybrid approach combining neural inference and rule matching to recognize chapter title headers in books, achieving an F1-score of 0.77 on this task. Using this annotated data as ground truth after removing structural cues, we present cut-based and neural methods for chapter segmentation, achieving a F1-score of 0.453 on the challenging task of exact break prediction over book-length documents. Finally, we reveal interesting historical trends in the chapter structure of novels.</abstract>
       <url hash="e4960543">2020.emnlp-main.672</url>
@@ -9064,7 +9064,7 @@
     </paper>
     <paper id="675">
       <title>Towards Modeling Revision Requirements in wiki<fixed-case>H</fixed-case>ow Instructions</title>
-      <author><first>Irshad</first><last>Bhat</last></author>
+      <author id="irshad-bhat"><first>Irshad</first><last>Bhat</last></author>
       <author><first>Talita</first><last>Anthonio</last></author>
       <author><first>Michael</first><last>Roth</last></author>
       <pages>8407–8414</pages>
@@ -9079,7 +9079,7 @@
       <author><first>Ramit</first><last>Sawhney</last></author>
       <author><first>Shivam</first><last>Agarwal</last></author>
       <author><first>Arnav</first><last>Wadhwa</last></author>
-      <author><first>Rajiv Ratn</first><last>Shah</last></author>
+      <author id="rajiv-shah"><first>Rajiv Ratn</first><last>Shah</last></author>
       <pages>8415–8426</pages>
       <abstract>In the financial domain, risk modeling and profit generation heavily rely on the sophisticated and intricate stock movement prediction task. Stock forecasting is complex, given the stochastic dynamics and non-stationary behavior of the market. Stock movements are influenced by varied factors beyond the conventionally studied historical prices, such as social media and correlations among stocks. The rising ubiquity of online content and knowledge mandates an exploration of models that factor in such multimodal signals for accurate stock forecasting. We introduce an architecture that achieves a potent blend of chaotic temporal signals from financial data, social media, and inter-stock relationships via a graph neural network in a hierarchical temporal fashion. Through experiments on real-world S&amp;P 500 index data and English tweets, we show the practical applicability of our model as a tool for investment decision making and trading.</abstract>
       <url hash="00de9641">2020.emnlp-main.676</url>
@@ -9105,7 +9105,7 @@
       <author><first>Barun</first><last>Patra</last></author>
       <author><first>Chala</first><last>Fufa</last></author>
       <author><first>Pamela</first><last>Bhattacharya</last></author>
-      <author><first>Charles</first><last>Lee</last></author>
+      <author id="charles-c-lee"><first>Charles</first><last>Lee</last></author>
       <pages>8445–8455</pages>
       <abstract>State of the art research for date-time entity extraction from text is task agnostic. Consequently, while the methods proposed in literature perform well for generic date-time extraction from texts, they don’t fare as well on task specific date-time entity extraction where only a subset of the date-time entities present in the text are pertinent to solving the task. Furthermore, some tasks require identifying negation constraints associated with the date-time entities to correctly reason over time. We showcase a novel model for extracting task-specific date-time entities along with their negation constraints. We show the efficacy of our method on the task of date-time understanding in the context of scheduling meetings for an email-based digital AI scheduling assistant. Our method achieves an absolute gain of 19% f-score points compared to baseline methods in detecting the date-time entities relevant to scheduling meetings and a 4% improvement over baseline methods for detecting negation constraints over date-time entities.</abstract>
       <url hash="bda9802c">2020.emnlp-main.678</url>
@@ -9120,7 +9120,7 @@
       <author><first>Rebecca</first><last>Thomas</last></author>
       <author><first>Steve</first><last>Pittard</last></author>
       <author><first>Vicki</first><last>Hertzberg</last></author>
-      <author><first>Jinho D.</first><last>Choi</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
       <pages>8456–8466</pages>
       <abstract>This paper presents a comprehensive study on resume classification to reduce the time and labor needed to screen an overwhelming number of applications significantly, while improving the selection of suitable candidates. A total of 6,492 resumes are extracted from 24,933 job applications for 252 positions designated into four levels of experience for Clinical Research Coordinators (CRC). Each resume is manually annotated to its most appropriate CRC position by experts through several rounds of triple annotation to establish guidelines. As a result, a high Kappa score of 61% is achieved for inter-annotator agreement. Given this dataset, novel transformer-based classification models are developed for two tasks: the first task takes a resume and classifies it to a CRC level (T1), and the second task takes both a resume and a job description to apply and predicts if the application is suited to the job (T2). Our best models using section encoding and a multi-head attention decoding give results of 73.3% to T1 and 79.2% to T2. Our analysis shows that the prediction errors are mostly made among adjacent CRC levels, which are hard for even experts to distinguish, implying the practical value of our models in real HR platforms.</abstract>
       <url hash="5cc3ad16">2020.emnlp-main.679</url>
@@ -9134,7 +9134,7 @@
       <author><first>Ophélie</first><last>Lacroix</last></author>
       <author><first>Helen</first><last>Yannakoudakis</last></author>
       <author><first>Marek</first><last>Rei</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>8467–8478</pages>
       <abstract>Evaluation of grammatical error correction (GEC) systems has primarily focused on essays written by non-native learners of English, which however is only part of the full spectrum of GEC applications. We aim to broaden the target domain of GEC and release CWEB, a new benchmark for GEC consisting of website text generated by English speakers of varying levels of proficiency. Website data is a common and important domain that contains far fewer grammatical errors than learner essays, which we show presents a challenge to state-of-the-art GEC systems. We demonstrate that a factor behind this is the inability of systems to rely on a strong internal language model in low error density domains. We hope this work shall facilitate the development of open-domain GEC models that generalize to different topics and genres.</abstract>
       <url hash="478b914d">2020.emnlp-main.680</url>
@@ -9146,7 +9146,7 @@
       <title>Deconstructing word embedding algorithms</title>
       <author><first>Kian</first><last>Kenyon-Dean</last></author>
       <author><first>Edward</first><last>Newell</last></author>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <pages>8479–8484</pages>
       <abstract>Word embeddings are reliable feature representations of words used to obtain high quality results for various NLP applications. Uncontextualized word embeddings are used in many NLP tasks today, especially in resource-limited settings where high memory capacity and GPUs are not available. Given the historical success of word embeddings in NLP, we propose a retrospective on some of the most well-known word embedding algorithms. In this work, we deconstruct Word2vec, GloVe, and others, into a common form, unveiling some of the common conditions that seem to be required for making performant word embeddings. We believe that the theoretical findings in this paper can provide a basis for more informed development of future models.</abstract>
       <url hash="d81fc3fe">2020.emnlp-main.681</url>
@@ -9206,7 +9206,7 @@
     <paper id="686">
       <title>Revealing the Myth of Higher-Order Inference in Coreference Resolution</title>
       <author><first>Liyan</first><last>Xu</last></author>
-      <author><first>Jinho D.</first><last>Choi</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
       <pages>8527–8533</pages>
       <abstract>This paper analyzes the impact of higher-order inference (HOI) on the task of coreference resolution. HOI has been adapted by almost all recent coreference resolution models without taking much investigation on its true effectiveness over representation learning. To make a comprehensive analysis, we implement an end-to-end coreference system as well as four HOI approaches, attended antecedent, entity equalization, span clustering, and cluster merging, where the latter two are our original methods. We find that given a high-performing encoder such as SpanBERT, the impact of HOI is negative to marginal, providing a new perspective of HOI to this task. Our best model using cluster merging shows the Avg-F1 of 80.2 on the CoNLL 2012 shared task dataset in English.</abstract>
       <url hash="74212469">2020.emnlp-main.686</url>
@@ -9269,7 +9269,7 @@
       <title><fixed-case>S</fixed-case>eq<fixed-case>M</fixed-case>ix: Augmenting Active Sequence Labeling via Sequence Mixup</title>
       <author><first>Rongzhi</first><last>Zhang</last></author>
       <author><first>Yue</first><last>Yu</last></author>
-      <author><first>Chao</first><last>Zhang</last></author>
+      <author id="chao-zhang-tu"><first>Chao</first><last>Zhang</last></author>
       <pages>8566–8579</pages>
       <abstract>Active learning is an important technique for low-resource sequence labeling tasks. However, current active sequence labeling methods use the queried samples alone in each iteration, which is an inefficient way of leveraging human annotations. We propose a simple but effective data augmentation method to improve label efficiency of active sequence labeling. Our method, SeqMix, simply augments the queried samples by generating extra labeled sequences in each iteration. The key difficulty is to generate plausible sequences along with token-level labels. In SeqMix, we address this challenge by performing mixup for both sequences and token-level labels of the queried samples. Furthermore, we design a discriminator during sequence mixup, which judges whether the generated sequences are plausible or not. Our experiments on Named Entity Recognition and Event Detection tasks show that SeqMix can improve the standard active sequence labeling method by 2.27%–3.75% in terms of <tex-math>F_1</tex-math> scores. The code and data for SeqMix can be found at <url>https://github.com/rz-zhang/SeqMix</url>.</abstract>
       <url hash="aa50ebdf">2020.emnlp-main.691</url>
@@ -9313,7 +9313,7 @@
       <author><first>Pierre</first><last>Dognin</last></author>
       <author><first>Igor</first><last>Melnyk</last></author>
       <author><first>Inkit</first><last>Padhi</last></author>
-      <author><first>Cicero</first><last>Nogueira dos Santos</last></author>
+      <author id="cicero-dos-santos"><first>Cicero</first><last>Nogueira dos Santos</last></author>
       <author><first>Payel</first><last>Das</last></author>
       <pages>8605–8616</pages>
       <abstract>In this work, we present a dual learning approach for unsupervised text to path and path to text transfers in Commonsense Knowledge Bases (KBs). We investigate the impact of weak supervision by creating a weakly supervised dataset and show that even a slight amount of supervision can significantly improve the model performance and enable better-quality transfers. We examine different model architectures, and evaluation metrics, proposing a novel Commonsense KB completion metric tailored for generative models. Extensive experimental results show that the proposed method compares very favorably to the existing baselines. This approach is a viable step towards a more advanced system for automatic KB construction/expansion and the reverse operation of KB conversion to coherent textual descriptions.</abstract>
@@ -9337,7 +9337,7 @@
     <paper id="696">
       <title>Improving Low Compute Language Modeling with In-Domain Embedding Initialisation</title>
       <author><first>Charles</first><last>Welch</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <author><first>Jonathan K.</first><last>Kummerfeld</last></author>
       <pages>8625–8634</pages>
       <abstract>Many NLP applications, such as biomedical data and technical support, have 10-100 million tokens of in-domain data and limited computational resources for learning from it. How should we train a language model in this scenario? Most language modeling research considers either a small dataset with a closed vocabulary (like the standard 1 million token Penn Treebank), or the whole web with byte-pair encoding. We show that for our target setting in English, initialising and freezing input embeddings using in-domain data can improve language model performance by providing a useful representation of rare words, and this pattern holds across several different domains. In the process, we show that the standard convention of tying input and output embeddings does not improve perplexity when initializing with embeddings trained on in-domain data.</abstract>
@@ -9367,7 +9367,7 @@
       <author><first>Chunyuan</first><last>Li</last></author>
       <author><first>Zhe</first><last>Gan</last></author>
       <author><first>Chris</first><last>Brockett</last></author>
-      <author><first>Bill</first><last>Dolan</last></author>
+      <author id="william-b-dolan"><first>Bill</first><last>Dolan</last></author>
       <pages>8649–8670</pages>
       <abstract>Large-scale pre-trained language models, such as BERT and GPT-2, have achieved excellent performance in language representation learning and free-form text generation. However, these models cannot be directly employed to generate text under specified lexical constraints. To address this challenge, we present POINTER (PrOgressive INsertion-based TransformER), a simple yet novel insertion-based approach for hard-constrained text generation. The proposed method operates by progressively inserting new tokens between existing tokens in a parallel manner. This procedure is recursively applied until a sequence is completed. The resulting coarse-to-fine hierarchy makes the generation process intuitive and interpretable. We pre-train our model with the proposed progressive insertion-based objective on a 12GB Wikipedia dataset, and fine-tune it on downstream hard-constrained generation tasks. Non-autoregressive decoding yields a logarithmic time complexity during inference time. Experimental results on both News and Yelp datasets demonstrate that Pointer achieves state-of-the-art performance on constrained text generation. We released the pre-trained models and the source code to facilitate future research.</abstract>
       <url hash="37ccc4fa">2020.emnlp-main.698</url>
@@ -9433,13 +9433,13 @@
       <author><first>Jesse</first><last>Thomason</last></author>
       <author><first>Jacob</first><last>Andreas</last></author>
       <author><first>Yoshua</first><last>Bengio</last></author>
-      <author><first>Joyce</first><last>Chai</last></author>
+      <author id="joyce-chai"><first>Joyce</first><last>Chai</last></author>
       <author><first>Mirella</first><last>Lapata</last></author>
       <author><first>Angeliki</first><last>Lazaridou</last></author>
       <author><first>Jonathan</first><last>May</last></author>
       <author><first>Aleksandr</first><last>Nisnevich</last></author>
       <author><first>Nicolas</first><last>Pinto</last></author>
-      <author><first>Joseph</first><last>Turian</last></author>
+      <author id="joseph-turian"><first>Joseph</first><last>Turian</last></author>
       <pages>8718–8735</pages>
       <abstract>Language understanding research is held back by a failure to relate language to the physical world it describes and to the social interactions it facilitates. Despite the incredible effectiveness of language processing models to tackle tasks after being trained on text alone, successful linguistic communication relies on a shared experience of the world. It is this shared experience that makes utterances meaningful. Natural language processing is a diverse field, and progress throughout its development has come from new representational theories, modeling techniques, data collection paradigms, and tasks. We posit that the present success of representation learning approaches trained on large, text-only corpora requires the parallel tradition of research on the broader physical and social context of language to address the deeper questions of communication.</abstract>
       <url hash="d1170c00">2020.emnlp-main.703</url>
@@ -9465,7 +9465,7 @@
       <author><first>Adam</first><last>Fisch</last></author>
       <author><first>Kenton</first><last>Lee</last></author>
       <author><first>Ming-Wei</first><last>Chang</last></author>
-      <author><first>Jonathan</first><last>Clark</last></author>
+      <author id="jonathan-h-clark"><first>Jonathan</first><last>Clark</last></author>
       <author><first>Regina</first><last>Barzilay</last></author>
       <pages>8755–8768</pages>
       <abstract>The traditional image captioning task uses generic reference captions to provide textual information about images. Different user populations, however, will care about different visual aspects of images. In this paper, we propose a new task, Captioning with A Purpose (CapWAP). Our goal is to develop systems that can be tailored to be useful for the information needs of an intended population, rather than merely provide generic information about an image. In this task, we use question-answer (QA) pairs—a natural expression of information need—from users, instead of reference captions, for both training and post-inference evaluation. We show that it is possible to use reinforcement learning to directly optimize for the intended information need, by rewarding outputs that allow a question answering model to provide correct answers to sampled user questions. We convert several visual question answering datasets into CapWAP datasets, and demonstrate that under a variety of scenarios our purposeful captioning system learns to anticipate and fulfill specific information needs better than its generic counterparts, as measured by QA performance on user questions from unseen images, when using the caption alone as context.</abstract>
@@ -9478,7 +9478,7 @@
       <title>What is More Likely to Happen Next? Video-and-Language Future Event Prediction</title>
       <author><first>Jie</first><last>Lei</last></author>
       <author><first>Licheng</first><last>Yu</last></author>
-      <author><first>Tamara</first><last>Berg</last></author>
+      <author id="tamara-berg"><first>Tamara</first><last>Berg</last></author>
       <author><first>Mohit</first><last>Bansal</last></author>
       <pages>8769–8784</pages>
       <abstract>Given a video with aligned dialogue, people can often infer what is more likely to happen next. Making such predictions requires not only a deep understanding of the rich dynamics underlying the video and dialogue, but also a significant amount of commonsense knowledge. In this work, we explore whether AI models are able to learn to make such multimodal commonsense next-event predictions. To support research in this direction, we collect a new dataset, named Video-and-Language Event Prediction (VLEP), with 28,726 future event prediction examples (along with their rationales) from 10,234 diverse TV Show and YouTube Lifestyle Vlog video clips. In order to promote the collection of non-trivial challenging examples, we employ an adversarial human-and-model-in-the-loop data collection procedure. We also present a strong baseline incorporating information from video, dialogue, and commonsense knowledge. Experiments show that each type of information is useful for this challenging task, and that compared to the high human performance on VLEP, our model provides a good starting point but leaves large room for future work.</abstract>
@@ -9574,7 +9574,7 @@
       <title>Unsupervised Question Decomposition for Question Answering</title>
       <author><first>Ethan</first><last>Perez</last></author>
       <author><first>Patrick</first><last>Lewis</last></author>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <author><first>Kyunghyun</first><last>Cho</last></author>
       <author><first>Douwe</first><last>Kiela</last></author>
       <pages>8864–8880</pages>
@@ -9610,7 +9610,7 @@
       <title>Exploring the Role of Argument Structure in Online Debate Persuasion</title>
       <author><first>Jialu</first><last>Li</last></author>
       <author><first>Esin</first><last>Durmus</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>8905–8912</pages>
       <abstract>Online debate forums provide users a platform to express their opinions on controversial topics while being exposed to opinions from diverse set of viewpoints. Existing work in Natural Language Processing (NLP) has shown that linguistic features extracted from the debate text and features encoding the characteristics of the audience are both critical in persuasion studies. In this paper, we aim to further investigate the role of discourse structure of the arguments from online debates in their persuasiveness. In particular, we use the factor graph model to obtain features for the argument structure of debates from an online debating platform and incorporate these features to an LSTM-based model to predict the debater that makes the most convincing arguments. We find that incorporating argument structure features play an essential role in achieving the best predictive performance in assessing the persuasiveness of the arguments on online debates.</abstract>
       <url hash="0843fcb2">2020.emnlp-main.716</url>
@@ -9621,7 +9621,7 @@
     <paper id="717">
       <title><fixed-case>Z</fixed-case>ero-<fixed-case>S</fixed-case>hot <fixed-case>S</fixed-case>tance <fixed-case>D</fixed-case>etection: <fixed-case>A</fixed-case> <fixed-case>D</fixed-case>ataset and <fixed-case>M</fixed-case>odel using <fixed-case>G</fixed-case>eneralized <fixed-case>T</fixed-case>opic <fixed-case>R</fixed-case>epresentations</title>
       <author><first>Emily</first><last>Allaway</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>8913–8931</pages>
       <abstract>Stance detection is an important component of understanding hidden influences in everyday life. Since there are thousands of potential topics to take a stance on, most with little to no training data, we focus on zero-shot stance detection: classifying stance from no training examples. In this paper, we present a new dataset for zero-shot stance detection that captures a wider range of topics and lexical variation than in previous datasets. Additionally, we propose a new model for stance detection that implicitly captures relationships between topics using generalized topic representations and show that this model improves performance on a number of challenging linguistic phenomena.</abstract>
       <url hash="637a2b82">2020.emnlp-main.717</url>
@@ -9673,8 +9673,8 @@
       <author><first>Shanshan</first><last>Peng</last></author>
       <author><first>Jiankun</first><last>Lu</last></author>
       <author><first>Deepanway</first><last>Ghosal</last></author>
-      <author><first>Alexander</first><last>Gelbukh</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <author><first>Soujanya</first><last>Poria</last></author>
       <pages>8968–8979</pages>
       <abstract>Current approaches to empathetic response generation view the set of emotions expressed in the input text as a flat structure, where all the emotions are treated uniformly. We argue that empathetic responses often mimic the emotion of the user to a varying degree, depending on its positivity or negativity and content. We show that the consideration of these polarity-based emotion clusters and emotional mimicry results in improved empathy and contextual relevance of the response as compared to the state-of-the-art. Also, we introduce stochasticity into the emotion mixture that yields emotionally more varied empathetic responses than the previous work. We demonstrate the importance of these factors to empathetic response generation using both automatic- and human-based evaluations. The implementation of MIME is publicly available at <url>https://github.com/declare-lab/MIME</url>.</abstract>
@@ -9717,7 +9717,7 @@
       <author><first>Jiaxin</first><last>Huang</last></author>
       <author><first>Chenyan</first><last>Xiong</last></author>
       <author><first>Heng</first><last>Ji</last></author>
-      <author><first>Chao</first><last>Zhang</last></author>
+      <author id="chao-zhang-tu"><first>Chao</first><last>Zhang</last></author>
       <author><first>Jiawei</first><last>Han</last></author>
       <pages>9006–9017</pages>
       <abstract>Current text classification methods typically require a good number of human-labeled documents as training data, which can be costly and difficult to obtain in real applications. Humans can perform classification without seeing any labeled examples but only based on a small set of words describing the categories to be classified. In this paper, we explore the potential of only using the label name of each class to train classification models on unlabeled data, without using any labeled documents. We use pre-trained neural language models both as general linguistic knowledge sources for category understanding and as representation learning models for document classification. Our method (1) associates semantically related words with the label names, (2) finds category-indicative words and trains the model to predict their implied categories, and (3) generalizes the model via self-training. We show that our model achieves around 90% accuracy on four benchmark datasets including topic and sentiment classification without using any labeled documents but learning from unlabeled data supervised by at most 3 words (1 in most cases) per class as the label name.</abstract>
@@ -9788,7 +9788,7 @@
       <author><first>Zengfeng</first><last>Huang</last></author>
       <author><first>Weijian</first><last>Sun</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>9066–9075</pages>
       <abstract>Existing research for question generation encodes the input text as a sequence of tokens without explicitly modeling fact information. These models tend to generate irrelevant and uninformative questions. In this paper, we explore to incorporate facts in the text for question generation in a comprehensive way. We present a novel task of question generation given a query path in the knowledge graph constructed from the input text. We divide the task into two steps, namely, query representation learning and query-based question generation. We formulate query representation learning as a sequence labeling problem for identifying the involved facts to form a query and employ an RNN-based generator for question generation. We first train the two modules jointly in an end-to-end fashion, and further enforce the interaction between these two modules in a variational framework. We construct the experimental datasets on top of SQuAD and results show that our model outperforms other state-of-the-art approaches, and the performance margin is larger when target questions are complex. Human evaluation also proves that our model is able to generate relevant and informative questions.</abstract>
       <url hash="ee1eb1eb">2020.emnlp-main.729</url>
@@ -9801,7 +9801,7 @@
       <title>What time is it? Temporal Analysis of Novels</title>
       <author><first>Allen</first><last>Kim</last></author>
       <author><first>Charuta</first><last>Pethe</last></author>
-      <author><first>Steve</first><last>Skiena</last></author>
+      <author id="steven-skiena"><first>Steve</first><last>Skiena</last></author>
       <pages>9076–9086</pages>
       <abstract>Recognizing the flow of time in a story is a crucial aspect of understanding it. Prior work related to time has primarily focused on identifying temporal expressions or relative sequencing of events, but here we propose computationally annotating each line of a book with wall clock times, even in the absence of explicit time-descriptive phrases. To do so, we construct a data set of hourly time phrases from 52,183 fictional books. We then construct a time-of-day classification model that achieves an average error of 2.27 hours. Furthermore, we show that by analyzing a book in whole using dynamic programming of breakpoints, we can roughly partition a book into segments that each correspond to a particular time-of-day. This approach improves upon baselines by over two hour. Finally, we apply our model to a corpus of literature categorized by different periods in history, to show interesting trends of hourly activity throughout the past. Among several observations we find that the fraction of events taking place past 10 P.M jumps past 1880 - coincident with the advent of the electric light bulb and city lights.</abstract>
       <url hash="64589865">2020.emnlp-main.730</url>
@@ -10020,7 +10020,7 @@
       <author><first>Urvashi</first><last>Khandelwal</last></author>
       <author><first>Robin</first><last>Jia</last></author>
       <author><first>Kyle</first><last>Mahowald</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <pages>9263–9274</pages>
       <abstract>Despite its importance to experimental design, statistical power (the probability that, given a real effect, an experiment will reject the null hypothesis) has largely been ignored by the NLP community. Underpowered experiments make it more difficult to discern the difference between statistical noise and meaningful model improvements, and increase the chances of exaggerated findings. By meta-analyzing a set of existing NLP papers and datasets, we characterize typical power for a variety of settings and conclude that underpowered experiments are common in the NLP literature. In particular, for several tasks in the popular GLUE benchmark, small test sets mean that most attempted comparisons to state of the art models will not be adequately powered. Similarly, based on reasonable assumptions, we find that the most typical experimental design for human rating studies will be underpowered to detect small model differences, of the sort that are frequently studied. For machine translation, we find that typical test sets of 2000 sentences have approximately 75% power to detect differences of 1 BLEU point. To improve the situation going forward, we give an overview of best practices for power analysis in NLP and release a series of notebooks to assist with future power analyses.</abstract>
       <url hash="c7e1b324">2020.emnlp-main.745</url>
@@ -10035,7 +10035,7 @@
       <author><first>Nicholas</first><last>Lourie</last></author>
       <author><first>Yizhong</first><last>Wang</last></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <author><first>Yejin</first><last>Choi</last></author>
       <pages>9275–9293</pages>
       <abstract>Large datasets have become commonplace in NLP research. However, the increased emphasis on data quantity has made it challenging to assess the quality of data. We introduce Data Maps—a model-based tool to characterize and diagnose datasets. We leverage a largely ignored source of information: the behavior of the model on individual instances during training (training dynamics) for building data maps. This yields two intuitive measures for each example—the model’s confidence in the true class, and the variability of this confidence across epochs—obtained in a single run of training. Experiments on four datasets show that these model-dependent measures reveal three distinct regions in the data map, each with pronounced characteristics. First, our data maps show the presence of “ambiguous” regions with respect to the model, which contribute the most towards out-of-distribution generalization. Second, the most populous regions in the data are “easy to learn” for the model, and play an important role in model optimization. Finally, data maps uncover a region with instances that the model finds “hard to learn”; these often correspond to labeling errors. Our results indicate that a shift in focus from quantity to quality of data could lead to robust models and improved out-of-distribution generalization.</abstract>
@@ -10061,7 +10061,7 @@
       <author><first>Jonathan</first><last>Pilault</last></author>
       <author><first>Raymond</first><last>Li</last></author>
       <author><first>Sandeep</first><last>Subramanian</last></author>
-      <author><first>Chris</first><last>Pal</last></author>
+      <author id="christopher-pal"><first>Chris</first><last>Pal</last></author>
       <pages>9308–9319</pages>
       <abstract>We present a method to produce abstractive summaries of long documents that exceed several thousand words via neural abstractive summarization. We perform a simple extractive step before generating a summary, which is then used to condition the transformer language model on relevant information before being tasked with generating a summary. We also show that this approach produces more abstractive summaries compared to prior work that employs a copy mechanism while still achieving higher ROUGE scores. We provide extensive comparisons with strong baseline methods, prior state of the art work as well as multiple variants of our approach including those using only transformers, only extractive techniques and combinations of the two. We examine these models using four different summarization tasks and datasets: arXiv papers, PubMed papers, the Newsroom and BigPatent datasets. We find that transformer based methods produce summaries with fewer n-gram copies, leading to n-gram copying statistics that are more similar to human generated abstracts. We include a human evaluation, finding that transformers are ranked highly for coherence and fluency, but purely extractive methods score higher for informativeness and relevance. We hope that these architectures and experiments may serve as strong points of comparison for future work. Note: The abstract above was collaboratively written by the authors and one of the models presented in this paper based on an earlier draft of this paper.</abstract>
       <url hash="42f410a6">2020.emnlp-main.748</url>
@@ -10075,7 +10075,7 @@
       <author><first>Shuohang</first><last>Wang</last></author>
       <author><first>Zhe</first><last>Gan</last></author>
       <author><first>Yu</first><last>Cheng</last></author>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <author><first>Jingjing</first><last>Liu</last></author>
       <pages>9320–9331</pages>
       <abstract>Pre-trained neural abstractive summarization systems have dominated extractive strategies on news summarization performance, at least in terms of ROUGE. However, system-generated abstractive summaries often face the pitfall of factual inconsistency: generating incorrect facts with respect to the source text. To address this challenge, we propose Span-Fact, a suite of two factual correction models that leverages knowledge learned from question answering models to make corrections in system-generated summaries via span selection. Our models employ single or multi-masking strategies to either iteratively or auto-regressively replace entities in order to ensure semantic consistency w.r.t. the source text, while retaining the syntactic structure of summaries generated by abstractive summarization models. Experiments show that our models significantly boost the factual consistency of system-generated summaries without sacrificing summary quality in terms of both automatic metrics and human evaluation.</abstract>
@@ -10188,7 +10188,7 @@
       <author><first>Hiroyuki</first><last>Shindo</last></author>
       <author><first>Hideaki</first><last>Takeda</last></author>
       <author><first>Yoshiyasu</first><last>Takefuji</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>23–30</pages>
       <abstract>The embeddings of entities in a large knowledge base (e.g., Wikipedia) are highly beneficial for solving various natural language tasks that involve real world knowledge. In this paper, we present Wikipedia2Vec, a Python-based open-source tool for learning the embeddings of words and entities from Wikipedia. The proposed tool enables users to learn the embeddings efficiently by issuing a single command with a Wikipedia dump file as an argument. We also introduce a web-based demonstration of our tool that allows users to visualize and explore the learned embeddings. In our experiments, our tool achieved a state-of-the-art result on the KORE entity relatedness dataset, and competitive results on various standard benchmark datasets. Furthermore, our tool has been used as a key component in various recent studies. We publicize the source code, demonstration, and the pretrained embeddings for 12 languages at <url>https://wikipedia2vec.github.io/</url>.</abstract>
       <url hash="d7e02b98">2020.emnlp-demos.4</url>
@@ -10200,10 +10200,10 @@
       <author><first>Anthony</first><last>Ferritto</last></author>
       <author><first>Lin</first><last>Pan</last></author>
       <author><first>Rishav</first><last>Chakravarti</last></author>
-      <author><first>Salim</first><last>Roukos</last></author>
-      <author><first>Radu</first><last>Florian</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
       <author><first>J. William</first><last>Murdock</last></author>
-      <author><first>Avi</first><last>Sil</last></author>
+      <author id="avirup-sil"><first>Avi</first><last>Sil</last></author>
       <pages>31–37</pages>
       <abstract>We introduce ARES (A Reading Comprehension Ensembling Service): a novel Machine Reading Comprehension (MRC) demonstration system which utilizes an ensemble of models to increase F1 by 2.3 points. While many of the top leaderboard submissions in popular MRC benchmarks such as the Stanford Question Answering Dataset (SQuAD) and Natural Questions (NQ) use model ensembles, the accompanying papers do not publish their ensembling strategies. In this work, we detail and evaluate various ensembling strategies using the NQ dataset. ARES leverages the CFO (Chakravarti et al., 2019) and ReactJS distributed frameworks to provide a scalable interactive Question Answering experience that capitalizes on the agreement (or lack thereof) between models to improve the answer visualization experience.</abstract>
       <url hash="846870fd">2020.emnlp-demos.5</url>
@@ -10233,7 +10233,7 @@
       <author><first>Sylvain</first><last>Gugger</last></author>
       <author><first>Mariama</first><last>Drame</last></author>
       <author><first>Quentin</first><last>Lhoest</last></author>
-      <author><first>Alexander</first><last>Rush</last></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last></author>
       <pages>38–45</pages>
       <award>Honorable Demonstration Paper</award>
       <abstract>Recent progress in natural language processing has been driven by advances in both model architecture and model pretraining. Transformer architectures have facilitated building higher-capacity models and pretraining has made it possible to effectively utilize this capacity for a wide variety of tasks. Transformers is an open-source library with the goal of opening up these advances to the wider machine learning community. The library consists of carefully engineered state-of-the art Transformer architectures under a unified API. Backing this library is a curated collection of pretrained models made by and available for the community. Transformers is designed to be extensible by researchers, simple for practitioners, and fast and robust in industrial deployments. The library is available at <url>https://github.com/huggingface/transformers</url>.</abstract>
@@ -10276,7 +10276,7 @@
       <title><fixed-case>D</fixed-case>eezy<fixed-case>M</fixed-case>atch: A Flexible Deep Learning Approach to Fuzzy String Matching</title>
       <author><first>Kasra</first><last>Hosseini</last></author>
       <author><first>Federico</first><last>Nanni</last></author>
-      <author><first>Mariona</first><last>Coll Ardanuy</last></author>
+      <author id="mariona-coll-ardanuy"><first>Mariona</first><last>Coll Ardanuy</last></author>
       <pages>62–69</pages>
       <abstract>We present DeezyMatch, a free, open-source software library written in Python for fuzzy string matching and candidate ranking. Its pair classifier supports various deep neural network architectures for training new classifiers and for fine-tuning a pretrained model, which paves the way for transfer learning in fuzzy string matching. This approach is especially useful where only limited training examples are available. The learned DeezyMatch models can be used to generate rich vector representations from string inputs. The candidate ranker component in DeezyMatch uses these vector representations to find, for a given query, the best matching candidates in a knowledge base. It uses an adaptive searching algorithm applicable to large knowledge bases and query sets. We describe DeezyMatch’s functionality, design and implementation, accompanied by a use case in toponym matching and candidate ranking in realistic noisy datasets.</abstract>
       <url hash="ea7e2915">2020.emnlp-demos.9</url>
@@ -10285,7 +10285,7 @@
     </paper>
     <paper id="10">
       <title><fixed-case>C</fixed-case>o<fixed-case>S</fixed-case>a<fixed-case>T</fixed-case>a: A Constraint Satisfaction Solver and Interpreted Language for Semi-Structured Tables of Sentences</title>
-      <author><first>Peter</first><last>Jansen</last></author>
+      <author id="peter-jansen"><first>Peter</first><last>Jansen</last></author>
       <pages>70–76</pages>
       <abstract>This work presents CoSaTa, an intuitive constraint satisfaction solver and interpreted language for knowledge bases of semi-structured tables expressed as text. The stand-alone CoSaTa solver allows easily expressing complex compositional “inference patterns” for how knowledge from different tables tends to connect to support inference and explanation construction in question answering and other downstream tasks, while including advanced declarative features and the ability to operate over multiple representations of text (words, lemmas, or part-of-speech tags). CoSaTa also includes a hybrid imperative/declarative interpreted language for expressing simple models through minimally-specified simulations grounded in constraint patterns, helping bridge the gap between question answering, question explanation, and model simulation. The solver and interpreter are released as open source. Screencast Demo: <url>https://youtu.be/t93Acsz7LyE</url></abstract>
       <url hash="c1b464c2">2020.emnlp-demos.10</url>
@@ -10406,8 +10406,8 @@
       <author><first>Kishore</first><last>Vasan</last></author>
       <author><first>Jonathan</first><last>Borchardt</last></author>
       <author><first>Eric</first><last>Horvitz</last></author>
-      <author><first>Daniel</first><last>Weld</last></author>
-      <author><first>Marti</first><last>Hearst</last></author>
+      <author id="daniel-s-weld"><first>Daniel</first><last>Weld</last></author>
+      <author id="marti-a-hearst"><first>Marti</first><last>Hearst</last></author>
       <author><first>Jevin</first><last>West</last></author>
       <pages>135–143</pages>
       <abstract>The COVID-19 pandemic has sparked unprecedented mobilization of scientists, generating a deluge of papers that makes it hard for researchers to keep track and explore new directions. Search engines are designed for targeted queries, not for discovery of connections across a corpus. In this paper, we present SciSight, a system for exploratory search of COVID-19 research integrating two key capabilities: first, exploring associations between biomedical facets automatically extracted from papers (e.g., genes, drugs, diseases, patient outcomes); second, combining textual and network information to search and visualize groups of researchers and their ties. SciSight has so far served over 15K users with over 42K page views and 13% returns.</abstract>
@@ -10442,7 +10442,7 @@
       <author><first>Danish</first><last>Contractor</last></author>
       <author><first>Siva</first><last>Patel</last></author>
       <author><first>Q. Vera</first><last>Liao</last></author>
-      <author><first>Sachindra</first><last>Joshi</last></author>
+      <author id="sachindra-joshi"><first>Sachindra</first><last>Joshi</last></author>
       <author><first>Luis</first><last>Lastras</last></author>
       <author><first>David</first><last>Konopnicki</last></author>
       <pages>151–157</pages>
@@ -10531,7 +10531,7 @@
       <author><first>Haoran</first><last>Shi</last></author>
       <author><first>Xiaodan</first><last>Liang</last></author>
       <author><first>Teruko</first><last>Mitamura</last></author>
-      <author><first>Eric</first><last>Xing</last></author>
+      <author id="eric-xing"><first>Eric</first><last>Xing</last></author>
       <author><first>Zhiting</first><last>Hu</last></author>
       <pages>197–204</pages>
       <abstract>Empirical natural language processing (NLP) systems in application domains (e.g., healthcare, finance, education) involve interoperation among multiple components, ranging from data ingestion, human annotation, to text retrieval, analysis, generation, and visualization. We establish a unified open-source framework to support fast development of such sophisticated NLP workflows in a composable manner. The framework introduces a uniform data representation to encode heterogeneous results by a wide range of NLP tasks. It offers a large repository of processors for NLP tasks, visualization, and annotation, which can be easily assembled with full interoperability under the unified representation. The highly extensible framework allows plugging in custom processors from external off-the-shelf NLP and deep learning libraries. The whole framework is delivered through two modularized yet integratable open-source projects, namely Forte (for workflow infrastructure and NLP function processors) and Stave (for user interaction, visualization, and annotation).</abstract>
@@ -10608,7 +10608,7 @@
     </paper>
     <paper id="2">
       <title>Fact-Checking, Fake News, Propaganda, and Media Bias: Truth Seeking in the Post-Truth Era</title>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Giovanni</first><last>Da San Martino</last></author>
       <pages>7–19</pages>
       <abstract>The rise of social media has democratized content creation and has made it easy for everybody to share and spread information online. On the positive side, this has given rise to citizen journalism, thus enabling much faster dissemination of information compared to what was possible with newspapers, radio, and TV. On the negative side, stripping traditional media from their gate-keeping role has left the public unprotected against the spread of misinformation, which could now travel at breaking-news speed over the same democratic channel. This has given rise to the proliferation of false information specifically created to affect individual people’s beliefs, and ultimately to influence major events such as political elections. There are strong indications that false information was weaponized at an unprecedented scale during Brexit and the 2016 U.S. presidential elections. “Fake news,” which can be defined as fabricated information that mimics news media content in form but not in organizational process or intent, became the Word of the Year for 2017, according to Collins Dictionary. Thus, limiting the spread of “fake news” and its impact has become a major focus for computer scientists, journalists, social media companies, and regulatory authorities. The tutorial will offer an overview of the broad and emerging research area of disinformation, with focus on the latest developments and research directions.</abstract>
@@ -10644,8 +10644,8 @@
     <paper id="5">
       <title>Representation, Learning and Reasoning on Spatial Language for Downstream <fixed-case>NLP</fixed-case> Tasks</title>
       <author><first>Parisa</first><last>Kordjamshidi</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>28–33</pages>
       <abstract>Understating spatial semantics expressed in natural language can become highly complex in real-world applications. This includes applications of language grounding, navigation, visual question answering, and more generic human-machine interaction and dialogue systems. In many of such downstream tasks, explicit representation of spatial concepts and relationships can improve the capabilities of machine learning models in reasoning and deep language understanding. In this tutorial, we overview the cutting-edge research results and existing challenges related to spatial language understanding including semantic annotations, existing corpora, symbolic and sub-symbolic representations, qualitative spatial reasoning, spatial common sense, deep and structured learning models. We discuss the recent results on the above-mentioned applications –that need spatial language learning and reasoning – and highlight the research gaps and future directions.</abstract>
       <url hash="870c860b">2020.emnlp-tutorials.5</url>
diff --git a/data/xml/2020.eval4nlp.xml b/data/xml/2020.eval4nlp.xml
index 883867ca0c..5e703ee274 100644
--- a/data/xml/2020.eval4nlp.xml
+++ b/data/xml/2020.eval4nlp.xml
@@ -7,7 +7,7 @@
       <editor><first>Yang</first><last>Gao</last></editor>
       <editor><first>Maxime</first><last>Peyrard</last></editor>
       <editor><first>Wei</first><last>Zhao</last></editor>
-      <editor><first>Eduard</first><last>Hovy</last></editor>
+      <editor id="eduard-hovy"><first>Eduard</first><last>Hovy</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Online</address>
       <month>November</month>
@@ -45,7 +45,7 @@
     <paper id="3">
       <title>Item Response Theory for Efficient Human Evaluation of Chatbots</title>
       <author><first>João</first><last>Sedoc</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <pages>21–33</pages>
       <abstract>Conversational agent quality is currently assessed using human evaluation, and often requires an exorbitant number of comparisons to achieve statistical significance. In this paper, we introduce Item Response Theory (IRT) for chatbot evaluation, using a paired comparison in which annotators judge which system responds better to the next turn of a conversation. IRT is widely used in educational testing for simultaneously assessing the ability of test takers and the quality of test questions. It is similarly well suited for chatbot evaluation since it allows the assessment of both models and the prompts used to evaluate them. We use IRT to efficiently assess chatbots, and show that different examples from the evaluation set are better suited for comparing high-quality (nearer to human performance) than low-quality systems. Finally, we use IRT to reduce the number of evaluation examples assessed by human annotators while retaining discriminative power.</abstract>
       <url hash="a90a3c48">2020.eval4nlp-1.3</url>
@@ -60,7 +60,7 @@
       <author><first>Seunghyun</first><last>Yoon</last></author>
       <author><first>Franck</first><last>Dernoncourt</last></author>
       <author><first>Doo Soon</first><last>Kim</last></author>
-      <author><first>Trung</first><last>Bui</last></author>
+      <author id="trung-bui"><first>Trung</first><last>Bui</last></author>
       <author><first>Kyomin</first><last>Jung</last></author>
       <pages>34–39</pages>
       <abstract>In this paper, we propose an evaluation metric for image captioning systems using both image and text information. Unlike the previous methods that rely on textual representations in evaluating the caption, our approach uses visiolinguistic representations. The proposed method generates image-conditioned embeddings for each token using ViLBERT from both generated and reference texts. Then, these contextual embeddings from each of the two sentence-pair are compared to compute the similarity score. Experimental results on three benchmark datasets show that our method correlates significantly better with human judgments than all existing metrics.</abstract>
@@ -98,7 +98,7 @@
       <title>On the Evaluation of Machine Translation n-best Lists</title>
       <author><first>Jacob</first><last>Bremerman</last></author>
       <author><first>Huda</first><last>Khayrallah</last></author>
-      <author><first>Douglas</first><last>Oard</last></author>
+      <author id="douglas-w-oard"><first>Douglas</first><last>Oard</last></author>
       <author><first>Matt</first><last>Post</last></author>
       <pages>60–68</pages>
       <abstract>The standard machine translation evaluation framework measures the single-best output of machine translation systems. There are, however, many situations where n-best lists are needed, yet there is no established way of evaluating them. This paper establishes a framework for addressing n-best evaluation by outlining three different questions one could consider when determining how one would define a ‘good’ n-best list and proposing evaluation measures for each question. The first and principal contribution is an evaluation measure that characterizes the translation quality of an entire n-best list by asking whether many of the valid translations are placed near the top of the list. The second is a measure that uses gold translations with preference annotations to ask to what degree systems can produce ranked lists in preference order. The third is a measure that rewards partial matches, evaluating the closeness of the many items in an n-best list to a set of many valid references. These three perspectives make clear that having access to many references can be useful when n-best evaluation is the goal.</abstract>
@@ -160,7 +160,7 @@
       <author><first>Jesper</first><last>Brink Andersen</last></author>
       <author><first>Mikkel</first><last>Bak Bertelsen</last></author>
       <author><first>Mikkel</first><last>Hørby Schou</last></author>
-      <author><first>Manuel R.</first><last>Ciosici</last></author>
+      <author id="manuel-r-ciosici"><first>Manuel R.</first><last>Ciosici</last></author>
       <author><first>Ira</first><last>Assent</last></author>
       <pages>120–130</pages>
       <abstract>Word embeddings are an active topic in the NLP research community. State-of-the-art neural models achieve high performance on downstream tasks, albeit at the cost of computationally expensive training. Cost aware solutions require cheaper models that still achieve good performance. We present several reproduction studies of intrinsic evaluation tasks that evaluate non-contextual word representations in multiple languages. Furthermore, we present 50-8-8, a new data set for the outlier identification task, which avoids limitations of the original data set, such as ambiguous words, infrequent words, and multi-word tokens, while increasing the number of test cases. The data set is expanded to contain semantic and syntactic tests and is multilingual (English, German, and Italian). We provide an in-depth analysis of word embedding models with a range of hyper-parameters. Our analysis shows the suitability of different models and hyper-parameters for different tasks and the greater difficulty of representing German and Italian languages.</abstract>
diff --git a/data/xml/2020.evalnlgeval.xml b/data/xml/2020.evalnlgeval.xml
index c93095a690..89a4db36ef 100644
--- a/data/xml/2020.evalnlgeval.xml
+++ b/data/xml/2020.evalnlgeval.xml
@@ -46,7 +46,7 @@
       <author><first>Emiel</first><last>van Miltenburg</last></author>
       <author><first>Chris</first><last>van der Lee</last></author>
       <author><first>Thiago</first><last>Castro-Ferreira</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <pages>17–27</pages>
       <abstract>NLG researchers often use uncontrolled corpora to train and evaluate their systems, using textual similarity metrics, such as BLEU. This position paper argues in favour of two alternative evaluation strategies, using grammars or rule-based systems. These strategies are particularly useful to identify the strengths and weaknesses of different systems. We contrast our proposals with the (extended) WebNLG dataset, which is revealed to have a skewed distribution of predicates. We predict that this distribution affects the quality of the predictions for systems trained on this data. However, this hypothesis can only be thoroughly tested (without any confounds) once we are able to systematically manipulate the skewness of the data, using a rule-based approach.</abstract>
       <url hash="7bd8ff34">2020.evalnlgeval-1.3</url>
diff --git a/data/xml/2020.fever.xml b/data/xml/2020.fever.xml
index 0b5d2371f5..00d9b3d7e2 100644
--- a/data/xml/2020.fever.xml
+++ b/data/xml/2020.fever.xml
@@ -70,9 +70,9 @@
     <paper id="5">
       <title>Language Models as Fact Checkers?</title>
       <author><first>Nayeon</first><last>Lee</last></author>
-      <author><first>Belinda Z.</first><last>Li</last></author>
+      <author id="belinda-z-li"><first>Belinda Z.</first><last>Li</last></author>
       <author><first>Sinong</first><last>Wang</last></author>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <author><first>Hao</first><last>Ma</last></author>
       <author><first>Madian</first><last>Khabsa</last></author>
       <pages>36–41</pages>
@@ -84,7 +84,7 @@
     </paper>
     <paper id="6">
       <title>Maintaining Quality in <fixed-case>FEVER</fixed-case> Annotation</title>
-      <author><first>Leon</first><last>Derczynski</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
       <author><first>Julie</first><last>Binau</last></author>
       <author><first>Henri</first><last>Schulte</last></author>
       <pages>42–46</pages>
diff --git a/data/xml/2020.figlang.xml b/data/xml/2020.figlang.xml
index df48209ed9..2045ee4f34 100644
--- a/data/xml/2020.figlang.xml
+++ b/data/xml/2020.figlang.xml
@@ -47,10 +47,10 @@
     </paper>
     <paper id="3">
       <title>A Report on the 2020 <fixed-case>VUA</fixed-case> and <fixed-case>TOEFL</fixed-case> Metaphor Detection Shared Task</title>
-      <author><first>Chee Wee (Ben)</first><last>Leong</last></author>
+      <author id="chee-wee-leong"><first>Chee Wee (Ben)</first><last>Leong</last></author>
       <author><first>Beata</first><last>Beigman Klebanov</last></author>
       <author><first>Chris</first><last>Hamill</last></author>
-      <author><first>Egon</first><last>Stemle</last></author>
+      <author id="egon-stemle"><first>Egon</first><last>Stemle</last></author>
       <author><first>Rutuja</first><last>Ubale</last></author>
       <author><first>Xianyang</first><last>Chen</last></author>
       <pages>18–29</pages>
@@ -127,7 +127,7 @@
       <title>Applying Transformers and Aspect-based Sentiment Analysis approaches on Sarcasm Detection</title>
       <author><first>Taha</first><last>Shangipour ataei</last></author>
       <author><first>Soroush</first><last>Javdan</last></author>
-      <author><first>Behrouz</first><last>Minaei-Bidgoli</last></author>
+      <author id="behrouz-minaei-bidgoli"><first>Behrouz</first><last>Minaei-Bidgoli</last></author>
       <pages>67–71</pages>
       <abstract>Sarcasm is a type of figurative language broadly adopted in social media and daily conversations. The sarcasm can ultimately alter the meaning of the sentence, which makes the opinion analysis process error-prone. In this paper, we propose to employ bidirectional encoder representations transformers (BERT), and aspect-based sentiment analysis approaches in order to extract the relation between context dialogue sequence and response and determine whether or not the response is sarcastic. The best performing method of ours obtains an F1 score of 0.73 on the Twitter dataset and 0.734 over the Reddit dataset at the second workshop on figurative language processing Shared Task 2020.</abstract>
       <url hash="fc8bf585">2020.figlang-1.9</url>
@@ -260,7 +260,7 @@
     <paper id="20">
       <title>Recognizing Euphemisms and Dysphemisms Using Sentiment Analysis</title>
       <author><first>Christian</first><last>Felt</last></author>
-      <author><first>Ellen</first><last>Riloff</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
       <pages>136–145</pages>
       <abstract>This paper presents the first research aimed at recognizing euphemistic and dysphemistic phrases with natural language processing. Euphemisms soften references to topics that are sensitive, disagreeable, or taboo. Conversely, dysphemisms refer to sensitive topics in a harsh or rude way. For example, “passed away” and “departed” are euphemisms for death, while “croaked” and “six feet under” are dysphemisms for death. Our work explores the use of sentiment analysis to recognize euphemistic and dysphemistic language. First, we identify near-synonym phrases for three topics (firing, lying, and stealing) using a bootstrapping algorithm for semantic lexicon induction. Next, we classify phrases as euphemistic, dysphemistic, or neutral using lexical sentiment cues and contextual sentiment analysis. We introduce a new gold standard data set and present our experimental results for this task.</abstract>
       <url hash="e06ee38a">2020.figlang-1.20</url>
@@ -284,7 +284,7 @@
     <paper id="22">
       <title>Adaptation of Word-Level Benchmark Datasets for Relation-Level Metaphor Identification</title>
       <author><first>Omnia</first><last>Zayed</last></author>
-      <author><first>John Philip</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John Philip</first><last>McCrae</last></author>
       <author><first>Paul</first><last>Buitelaar</last></author>
       <pages>154–164</pages>
       <abstract>Metaphor processing and understanding has attracted the attention of many researchers recently with an increasing number of computational approaches. A common factor among these approaches is utilising existing benchmark datasets for evaluation and comparisons. The availability, quality and size of the annotated data are among the main difficulties facing the growing research area of metaphor processing. The majority of current approaches pertaining to metaphor processing concentrate on word-level processing due to data availability. On the other hand, approaches that process metaphors on the relation-level ignore the context where the metaphoric expression. This is due to the nature and format of the available data. Word-level annotation is poorly grounded theoretically and is harder to use in downstream tasks such as metaphor interpretation. The conversion from word-level to relation-level annotation is non-trivial. In this work, we attempt to fill this research gap by adapting three benchmark datasets, namely the VU Amsterdam metaphor corpus, the TroFi dataset and the TSV dataset, to suit relation-level metaphor identification. We publish the adapted datasets to facilitate future research in relation-level metaphor processing.</abstract>
@@ -295,7 +295,7 @@
     </paper>
     <paper id="23">
       <title>Generating Ethnographic Models from Communities’ Online Data</title>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
       <author><first>Anna</first><last>Newheiser</last></author>
       <author><first>Nathan</first><last>Kemper</last></author>
       <author><first>Ning</first><last>Sa</last></author>
@@ -305,8 +305,8 @@
       <abstract>In this paper we describe computational ethnography study to demonstrate how machine learning techniques can be utilized to exploit bias resident in language data produced by communities with online presence. Specifically, we leverage the use of figurative language (i.e., the choice of metaphors) in online text (e.g., news media, blogs) produced by distinct communities to obtain models of community worldviews that can be shown to be distinctly biased and thus different from other communities’ models. We automatically construct metaphor-based community models for two distinct scenarios: gun rights and marriage equality. We then conduct a series of experiments to validate the hypothesis that the metaphors found in each community’s online language convey the bias in the community’s worldview.</abstract>
       <url hash="10dcef0a">2020.figlang-1.23</url>
       <attachment type="Software" hash="3084e139">2020.figlang-1.23.Software.zip</attachment>
-      <doi>10.18653/v1/2020.figlang-1.23</doi>
       <attachment type="Dataset" hash="5d983303">2020.figlang-1.23.Dataset.pdf</attachment>
+      <doi>10.18653/v1/2020.figlang-1.23</doi>
       <video href="http://slideslive.com/38929711"/>
       <bibkey>strzalkowski-etal-2020-generating</bibkey>
     </paper>
@@ -361,7 +361,7 @@
       <title>Augmenting Neural Metaphor Detection with Concreteness</title>
       <author><first>Ghadi</first><last>Alnafesah</last></author>
       <author><first>Harish</first><last>Tayyar Madabushi</last></author>
-      <author><first>Mark</first><last>Lee</last></author>
+      <author id="mark-lee"><first>Mark</first><last>Lee</last></author>
       <pages>204–210</pages>
       <abstract>The idea that a shift in concreteness within a sentence indicates the presence of a metaphor has been around for a while. However, recent methods of detecting metaphor that have relied on deep neural models have ignored concreteness and related psycholinguistic information. We hypothesis that this information is not available to these models and that their addition will boost the performance of these models in detecting metaphor. We test this hypothesis on the Metaphor Detection Shared Task 2020 and find that the addition of concreteness information does in fact boost deep neural models. We also run tests on data from a previous shared task and show similar results.</abstract>
       <url hash="7777f0bb">2020.figlang-1.28</url>
@@ -373,7 +373,7 @@
       <author><first>Rafael</first><last>Ehren</last></author>
       <author><first>Timm</first><last>Lichte</last></author>
       <author><first>Laura</first><last>Kallmeyer</last></author>
-      <author><first>Jakub</first><last>Waszczuk</last></author>
+      <author id="jakub-waszczuk"><first>Jakub</first><last>Waszczuk</last></author>
       <pages>211–220</pages>
       <abstract>Supervised disambiguation of verbal idioms (VID) poses special demands on the quality and quantity of the annotated data used for learning and evaluation. In this paper, we present a new VID corpus for German and perform a series of VID disambiguation experiments on it. Our best classifier, based on a neural architecture, yields an error reduction across VIDs of 57% in terms of accuracy compared to a simple majority baseline.</abstract>
       <url hash="ce346dcf">2020.figlang-1.29</url>
@@ -411,7 +411,7 @@
     <paper id="32">
       <title>Go Figure! Multi-task transformer-based architecture for metaphor detection using idioms: <fixed-case>ETS</fixed-case> team in 2020 metaphor shared task</title>
       <author><first>Xianyang</first><last>Chen</last></author>
-      <author><first>Chee Wee (Ben)</first><last>Leong</last></author>
+      <author id="chee-wee-leong"><first>Chee Wee (Ben)</first><last>Leong</last></author>
       <author><first>Michael</first><last>Flor</last></author>
       <author><first>Beata</first><last>Beigman Klebanov</last></author>
       <pages>235–243</pages>
@@ -448,7 +448,7 @@
     </paper>
     <paper id="35">
       <title>Testing the role of metadata in metaphor identification</title>
-      <author><first>Egon</first><last>Stemle</last></author>
+      <author id="egon-stemle"><first>Egon</first><last>Stemle</last></author>
       <author><first>Alexander</first><last>Onysko</last></author>
       <pages>256–263</pages>
       <abstract>This paper describes the adaptation and application of a neural network system for the automatic detection of metaphors. The LSTM BiRNN system participated in the shared task of metaphor identification that was part of the Second Workshop of Figurative Language Processing (FigLang2020) held at the Annual Conference of the Association for Computational Linguistics (ACL2020). The particular focus of our approach is on the potential influence that the metadata given in the ETS Corpus of Non-Native Written English might have on the automatic detection of metaphors in this dataset. The article first discusses the annotated ETS learner data, highlighting some of its peculiarities and inherent biases of metaphor use. A series of evaluations follow in order to test whether specific metadata influence the system performance in the task of automatic metaphor identification. The system is available under the APLv2 open-source license.</abstract>
@@ -463,7 +463,7 @@
       <author><first>Ben</first><last>Burtenshaw</last></author>
       <author><first>Ehsan</first><last>Lotfi</last></author>
       <author><first>Ilia</first><last>Markov</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>264–269</pages>
       <abstract>We present an ensemble approach for the detection of sarcasm in Reddit and Twitter responses in the context of The Second Workshop on Figurative Language Processing held in conjunction with ACL 2020. The ensemble is trained on the predicted sarcasm probabilities of four component models and on additional features, such as the sentiment of the comment, its length, and source (Reddit or Twitter) in order to learn which of the component models is the most reliable for which input. The component models consist of an LSTM with hashtag and emoji representations; a CNN-LSTM with casing, stop word, punctuation, and sentiment representations; an MLP based on Infersent embeddings; and an SVM trained on stylometric and emotion-based features. All component models use the two conversational turns preceding the response as context, except for the SVM, which only uses features extracted from the response. The ensemble itself consists of an adaboost classifier with the decision tree algorithm as base estimator and yields F1-scores of 67% and 74% on the Reddit and Twitter test data, respectively.</abstract>
       <url hash="a15ac2d3">2020.figlang-1.36</url>
@@ -490,7 +490,7 @@
       <title>Transformer-based Context-aware Sarcasm Detection in Conversation Threads from Social Media</title>
       <author><first>Xiangjue</first><last>Dong</last></author>
       <author><first>Changmao</first><last>Li</last></author>
-      <author><first>Jinho D.</first><last>Choi</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
       <pages>276–280</pages>
       <abstract>We present a transformer-based sarcasm detection model that accounts for the context from the entire conversation thread for more robust predictions. Our model uses deep transformer layers to perform multi-head attentions among the target utterance and the relevant context in the thread. The context-aware models are evaluated on two datasets from social media, Twitter and Reddit, and show 3.1% and 7.0% improvements over their baselines. Our best models give the F1-scores of 79.0% and 75.0% for the Twitter and Reddit datasets respectively, becoming one of the highest performing systems among 36 participants in this shared task.</abstract>
       <url hash="0159aa16">2020.figlang-1.38</url>
diff --git a/data/xml/2020.findings.xml b/data/xml/2020.findings.xml
index d823daab71..3dcb8bb3a4 100644
--- a/data/xml/2020.findings.xml
+++ b/data/xml/2020.findings.xml
@@ -3,7 +3,7 @@
   <volume id="emnlp" ingest-date="2020-11-12" type="proceedings">
     <meta>
       <booktitle>Findings of the Association for Computational Linguistics: EMNLP 2020</booktitle>
-      <editor><first>Trevor</first><last>Cohn</last></editor>
+      <editor id="trevor-cohn"><first>Trevor</first><last>Cohn</last></editor>
       <editor><first>Yulan</first><last>He</last></editor>
       <editor id="yang-liu-icsi"><first>Yang</first><last>Liu</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -46,7 +46,7 @@
       <title>Stay Hungry, Stay Focused: Generating Informative and Specific Questions in Information-Seeking Conversations</title>
       <author><first>Peng</first><last>Qi</last></author>
       <author><first>Yuhao</first><last>Zhang</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>25–40</pages>
       <abstract>We investigate the problem of generating informative questions in information-asymmetric conversations. Unlike previous work on question generation which largely assumes knowledge of what the answer might be, we are interested in the scenario where the questioner is not given the context from which answers are drawn, but must reason pragmatically about how to acquire new information, given the shared conversation history. We identify two core challenges: (1) formally defining the informativeness of potential questions, and (2) exploring the prohibitively large space of potential questions to find the good candidates. To generate pragmatic questions, we use reinforcement learning to optimize an informativeness metric we propose, combined with a reward function designed to promote more specific questions. We demonstrate that the resulting pragmatic questioner substantially improves the informativeness and specificity of questions generated over a baseline model, as evaluated by our metrics as well as humans.</abstract>
       <url hash="ce6352f3">2020.findings-emnlp.3</url>
@@ -109,7 +109,7 @@
       <title>Improving Text Understanding via Deep Syntax-Semantics Communication</title>
       <author><first>Hao</first><last>Fei</last></author>
       <author><first>Yafeng</first><last>Ren</last></author>
-      <author><first>Donghong</first><last>Ji</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
       <pages>84–93</pages>
       <abstract>Recent studies show that integrating syntactic tree models with sequential semantic models can bring improved task performance, while these methods mostly employ shallow integration of syntax and semantics. In this paper, we propose a deep neural communication model between syntax and semantics to improve the performance of text understanding. Local communication is performed between syntactic tree encoder and sequential semantic encoder for mutual learning of information exchange. Global communication can further ensure comprehensive information propagation. Results on multiple syntax-dependent tasks show that our model outperforms strong baselines by a large margin. In-depth analysis indicates that our method is highly effective in composing sentence semantics.</abstract>
       <url hash="5589c3bb">2020.findings-emnlp.8</url>
@@ -180,7 +180,7 @@
     </paper>
     <paper id="14">
       <title>Neural Speed Reading Audited</title>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>148–153</pages>
       <abstract>Several approaches to neural speed reading have been presented at major NLP and machine learning conferences in 2017–20; i.e., “human-inspired” recurrent network architectures that learn to “read” text faster by skipping irrelevant words, typically optimizing the joint objective of minimizing classification error rate and FLOPs used at inference time. This paper reflects on the meaningfulness of the speed reading task, showing that (a) better and faster approaches to, say, document classification, already exist, which also learn to ignore part of the input (I give an example with 7% error reduction and a 136x speed-up over the state of the art in neural speed reading); and that (b) any claims that neural speed reading is “human-inspired”, are ill-founded.</abstract>
       <url hash="29e00fa1">2020.findings-emnlp.14</url>
@@ -231,7 +231,7 @@
       <title>Mimic and Conquer: Heterogeneous Tree Structure Distillation for Syntactic <fixed-case>NLP</fixed-case></title>
       <author><first>Hao</first><last>Fei</last></author>
       <author><first>Yafeng</first><last>Ren</last></author>
-      <author><first>Donghong</first><last>Ji</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
       <pages>183–193</pages>
       <abstract>Syntax has been shown useful for various NLP tasks, while existing work mostly encodes singleton syntactic tree using one hierarchical neural network. In this paper, we investigate a simple and effective method, Knowledge Distillation, to integrate heterogeneous structure knowledge into a unified sequential LSTM encoder. Experimental results on four typical syntax-dependent tasks show that our method outperforms tree encoders by effectively integrating rich heterogeneous structure syntax, meanwhile reducing error propagation, and also outperforms ensemble methods, in terms of both the efficiency and accuracy.</abstract>
       <url hash="a398c8b9">2020.findings-emnlp.18</url>
@@ -243,7 +243,7 @@
       <author><first>Chenguang</first><last>Zhu</last></author>
       <author><first>Ruochen</first><last>Xu</last></author>
       <author><first>Michael</first><last>Zeng</last></author>
-      <author><first>Xuedong</first><last>Huang</last></author>
+      <author id="xuedong-huang"><first>Xuedong</first><last>Huang</last></author>
       <pages>194–203</pages>
       <abstract>With the abundance of automatic meeting transcripts, meeting summarization is of great interest to both participants and other parties. Traditional methods of summarizing meetings depend on complex multi-step pipelines that make joint optimization intractable. Meanwhile, there are a handful of deep neural models for text summarization and dialogue systems. However, the semantic structure and styles of meeting transcripts are quite different from articles and conversations. In this paper, we propose a novel abstractive summary network that adapts to the meeting scenario. We design a hierarchical structure to accommodate long meeting transcripts and a role vector to depict the difference among speakers. Furthermore, due to the inadequacy of meeting summary data, we pretrain the model on large-scale news summary data. Empirical results show that our model outperforms previous approaches in both automatic metrics and human evaluation. For example, on ICSI dataset, the ROUGE-1 score increases from 34.66% to 46.28%.</abstract>
       <url hash="a7a97807">2020.findings-emnlp.19</url>
@@ -284,7 +284,7 @@
       <author><first>Fabio</first><last>Petroni</last></author>
       <author><first>Aleksandra</first><last>Piktus</last></author>
       <author><first>Myle</first><last>Ott</last></author>
-      <author><first>Tim</first><last>Rocktäschel</last></author>
+      <author id="tim-rocktaschel"><first>Tim</first><last>Rocktäschel</last></author>
       <author><first>Vassilis</first><last>Plachouras</last></author>
       <author><first>Fabrizio</first><last>Silvestri</last></author>
       <author><first>Sebastian</first><last>Riedel</last></author>
@@ -354,7 +354,7 @@
       <title>Understanding tables with intermediate pre-training</title>
       <author><first>Julian</first><last>Eisenschlos</last></author>
       <author><first>Syrine</first><last>Krichene</last></author>
-      <author><first>Thomas</first><last>Müller</last></author>
+      <author id="thomas-mueller"><first>Thomas</first><last>Müller</last></author>
       <pages>281–296</pages>
       <abstract>Table entailment, the binary classification task of finding if a sentence is supported or refuted by the content of a table, requires parsing language and table structure as well as numerical and discrete reasoning. While there is extensive work on textual entailment, table entailment is less well studied. We adapt TAPAS (Herzig et al., 2020), a table-based BERT model, to recognize entailment. Motivated by the benefits of data augmentation, we create a balanced dataset of millions of automatically created training examples which are learned in an intermediate step prior to fine-tuning. This new data is not only useful for table entailment, but also for SQA (Iyyer et al., 2017), a sequential table QA task. To be able to use long examples as input of BERT models, we evaluate table pruning techniques as a pre-processing step to drastically improve the training and prediction efficiency at a moderate drop in accuracy. The different methods set the new state-of-the-art on the TabFact (Chen et al., 2020) and SQA datasets.</abstract>
       <url hash="7e9b03bf">2020.findings-emnlp.27</url>
@@ -419,7 +419,7 @@
     <paper id="32">
       <title>The <fixed-case>RELX</fixed-case> Dataset and Matching the Multilingual Blanks for Cross-Lingual Relation Classification</title>
       <author><first>Abdullatif</first><last>Köksal</last></author>
-      <author><first>Arzucan</first><last>Özgür</last></author>
+      <author id="arzucan-ozgur"><first>Arzucan</first><last>Özgür</last></author>
       <pages>340–350</pages>
       <abstract>Relation classification is one of the key topics in information extraction, which can be used to construct knowledge bases or to provide useful information for question answering. Current approaches for relation classification are mainly focused on the English language and require lots of training data with human annotations. Creating and annotating a large amount of training data for low-resource languages is impractical and expensive. To overcome this issue, we propose two cross-lingual relation classification models: a baseline model based on Multilingual BERT and a new multilingual pretraining setup, which significantly improves the baseline with distant supervision. For evaluation, we introduce a new public benchmark dataset for cross-lingual relation classification in English, French, German, Spanish, and Turkish, called RELX. We also provide the RELX-Distant dataset, which includes hundreds of thousands of sentences with relations from Wikipedia and Wikidata collected by distant supervision for these languages. Our code and data are available at: <url>https://github.com/boun-tabi/RELX</url></abstract>
       <url hash="493bd041">2020.findings-emnlp.32</url>
@@ -467,7 +467,7 @@
     <paper id="36">
       <title>Contextual Modulation for Relation-Level Metaphor Identification</title>
       <author><first>Omnia</first><last>Zayed</last></author>
-      <author><first>John P.</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></author>
       <author><first>Paul</first><last>Buitelaar</last></author>
       <pages>388–406</pages>
       <abstract>Identifying metaphors in text is very challenging and requires comprehending the underlying comparison. The automation of this cognitive process has gained wide attention lately. However, the majority of existing approaches concentrate on word-level identification by treating the task as either single-word classification or sequential labelling without explicitly modelling the interaction between the metaphor components. On the other hand, while existing relation-level approaches implicitly model this interaction, they ignore the context where the metaphor occurs. In this work, we address these limitations by introducing a novel architecture for identifying relation-level metaphoric expressions of certain grammatical relations based on contextual modulation. In a methodology inspired by works in visual reasoning, our approach is based on conditioning the neural network computation on the deep contextualised features of the candidate expressions using feature-wise linear modulation. We demonstrate that the proposed architecture achieves state-of-the-art results on benchmark datasets. The proposed methodology is generic and could be applied to other textual classification problems that benefit from contextual interaction.</abstract>
@@ -492,7 +492,7 @@
       <author><first>Huaiyu</first><last>Zhu</last></author>
       <author><first>Jonathan K.</first><last>Kummerfeld</last></author>
       <author><first>Yunyao</first><last>Li</last></author>
-      <author><first>Walter</first><last>Lasecki</last></author>
+      <author id="walter-lasecki"><first>Walter</first><last>Lasecki</last></author>
       <pages>415–421</pages>
       <abstract>Resources for Semantic Role Labeling (SRL) are typically annotated by experts at great expense. Prior attempts to develop crowdsourcing methods have either had low accuracy or required substantial expert annotation. We propose a new multi-stage crowd workflow that substantially reduces expert involvement without sacrificing accuracy. In particular, we introduce a unique filter stage based on the key observation that crowd workers are able to almost perfectly filter out incorrect options for labels. Our three-stage workflow produces annotations with 95% accuracy for predicate labels and 93% for argument labels, which is comparable to expert agreement. Compared to prior work on crowdsourcing for SRL, we decrease expert effort by 4x, from 56% to 14% of cases. Our approach enables more scalable annotation of SRL, and could enable annotation of NLP tasks that have previously been considered too complex to effectively crowdsource.</abstract>
       <url hash="ba5398bc">2020.findings-emnlp.38</url>
@@ -612,7 +612,7 @@
     </paper>
     <paper id="48">
       <title>Dynamic Data Selection for Curriculum Learning via Ability Estimation</title>
-      <author><first>John P.</first><last>Lalor</last></author>
+      <author id="john-p-lalor"><first>John P.</first><last>Lalor</last></author>
       <author><first>Hong</first><last>Yu</last></author>
       <pages>545–555</pages>
       <abstract>Curriculum learning methods typically rely on heuristics to estimate the difficulty of training examples or the ability of the model. In this work, we propose replacing difficulty heuristics with learned difficulty parameters. We also propose Dynamic Data selection for Curriculum Learning via Ability Estimation (DDaCLAE), a strategy that probes model ability at each training epoch to select the best training examples at that point. We show that models using learned difficulty and/or ability outperform heuristic-based curriculum learning models on the GLUE classification tasks.</abstract>
@@ -624,7 +624,7 @@
       <title>Fixed Encoder Self-Attention Patterns in Transformer-Based Machine Translation</title>
       <author><first>Alessandro</first><last>Raganato</last></author>
       <author><first>Yves</first><last>Scherrer</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>556–568</pages>
       <abstract>Transformer-based models have brought a radical change to neural machine translation. A key feature of the Transformer architecture is the so-called multi-head attention mechanism, which allows the model to focus simultaneously on different parts of the input. However, recent works have shown that most attention heads learn simple, and often redundant, positional patterns. In this paper, we propose to replace all but one attention head of each encoder layer with simple fixed – non-learnable – attentive patterns that are solely based on position and do not require any external knowledge. Our experiments with different data sizes and multiple language pairs show that fixing the attention heads on the encoder side of the Transformer at training time does not impact the translation quality and even increases BLEU scores by up to 3 points in low-resource scenarios.</abstract>
       <url hash="1c0718e0">2020.findings-emnlp.49</url>
@@ -764,7 +764,7 @@
       <author><first>Lujun</first><last>Zhao</last></author>
       <author><first>Mengxi</first><last>Wei</last></author>
       <author><first>Changlong</first><last>Sun</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>678–688</pages>
       <abstract>In this work, we explore the way to quickly adjust an existing named entity recognition (NER) system to make it capable of recognizing entity types not defined in the system. As an illustrative example, consider the case that a NER system has been built to recognize person and organization names, and now it requires to additionally recognize job titles. Such a situation is common in the industrial areas, where the entity types required to recognize vary a lot in different products and keep changing. To avoid laborious data labeling and achieve fast adaptation, we propose to adjust the existing NER system using the previously labeled data and entity lexicons of the newly introduced entity types. We formulate such a task as a partially supervised learning problem and accordingly propose an effective algorithm to solve the problem. Comprehensive experimental studies on several public NER datasets validate the effectiveness of our method.</abstract>
       <url hash="fb5fd953">2020.findings-emnlp.60</url>
@@ -826,10 +826,10 @@
       <title>Rethinking Self-Attention: Towards Interpretability in Neural Parsing</title>
       <author><first>Khalil</first><last>Mrini</last></author>
       <author><first>Franck</first><last>Dernoncourt</last></author>
-      <author><first>Quan Hung</first><last>Tran</last></author>
-      <author><first>Trung</first><last>Bui</last></author>
+      <author id="quan-hung-tran"><first>Quan Hung</first><last>Tran</last></author>
+      <author id="trung-bui"><first>Trung</first><last>Bui</last></author>
       <author><first>Walter</first><last>Chang</last></author>
-      <author><first>Ndapa</first><last>Nakashole</last></author>
+      <author id="ndapandula-nakashole"><first>Ndapa</first><last>Nakashole</last></author>
       <pages>731–742</pages>
       <abstract>Attention mechanisms have improved the performance of NLP tasks while allowing models to remain explainable. Self-attention is currently widely used, however interpretability is difficult due to the numerous attention distributions. Recent work has shown that model representations can benefit from label-specific information, while facilitating interpretation of predictions. We introduce the Label Attention Layer: a new form of self-attention where attention heads represent labels. We test our novel layer by running constituency and dependency parsing experiments and show our new model obtains new state-of-the-art results for both tasks on both the Penn Treebank (PTB) and Chinese Treebank. Additionally, our model requires fewer self-attention layers compared to existing work. Finally, we find that the Label Attention heads learn relations between syntactic categories and show pathways to analyze errors.</abstract>
       <url hash="04abd82c">2020.findings-emnlp.65</url>
@@ -838,7 +838,7 @@
     </paper>
     <paper id="66">
       <title><fixed-case>P</fixed-case>olicy<fixed-case>QA</fixed-case>: A Reading Comprehension Dataset for Privacy Policies</title>
-      <author><first>Wasi</first><last>Ahmad</last></author>
+      <author id="wasi-ahmad"><first>Wasi</first><last>Ahmad</last></author>
       <author><first>Jianfeng</first><last>Chi</last></author>
       <author><first>Yuan</first><last>Tian</last></author>
       <author><first>Kai-Wei</first><last>Chang</last></author>
@@ -852,7 +852,7 @@
       <title>A Linguistic Analysis of Visually Grounded Dialogues Based on Spatial Expressions</title>
       <author><first>Takuma</first><last>Udagawa</last></author>
       <author><first>Takato</first><last>Yamazaki</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>750–765</pages>
       <abstract>Recent models achieve promising results in visually grounded dialogues. However, existing datasets often contain undesirable biases and lack sophisticated linguistic analyses, which make it difficult to understand how well current models recognize their precise linguistic structures. To address this problem, we make two design choices: first, we focus on OneCommon Corpus (CITATION), a simple yet challenging common grounding dataset which contains minimal bias by design. Second, we analyze their linguistic structures based on spatial expressions and provide comprehensive and reliable annotation for 600 dialogues. We show that our annotation captures important linguistic structures including predicate-argument structure, modification and ellipsis. In our experiments, we assess the model’s understanding of these structures through reference resolution. We demonstrate that our annotation can reveal both the strengths and weaknesses of baseline models in essential levels of detail. Overall, we propose a novel framework and resource for investigating fine-grained language understanding in visually grounded dialogues.</abstract>
       <url hash="62e260d2">2020.findings-emnlp.67</url>
@@ -907,7 +907,7 @@
       <title><fixed-case>E</fixed-case>-<fixed-case>BERT</fixed-case>: Efficient-Yet-Effective Entity Embeddings for <fixed-case>BERT</fixed-case></title>
       <author><first>Nina</first><last>Poerner</last></author>
       <author><first>Ulli</first><last>Waltinger</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>803–818</pages>
       <abstract>We present a novel way of injecting factual knowledge about entities into the pretrained BERT model (Devlin et al., 2019): We align Wikipedia2Vec entity vectors (Yamada et al., 2016) with BERT’s native wordpiece vector space and use the aligned entity vectors as if they were wordpiece vectors. The resulting entity-enhanced version of BERT (called E-BERT) is similar in spirit to ERNIE (Zhang et al., 2019) and KnowBert (Peters et al., 2019), but it requires no expensive further pre-training of the BERT encoder. We evaluate E-BERT on unsupervised question answering (QA), supervised relation classification (RC) and entity linking (EL). On all three tasks, E-BERT outperforms BERT and other baselines. We also show quantitatively that the original BERT model is overly reliant on the surface form of entity names (e.g., guessing that someone with an Italian-sounding name speaks Italian), and that E-BERT mitigates this problem.</abstract>
       <url hash="7a39c8e7">2020.findings-emnlp.71</url>
@@ -918,7 +918,7 @@
     </paper>
     <paper id="72">
       <title>A Multi-task Learning Framework for Opinion Triplet Extraction</title>
-      <author id="chen-zhang"><first>Chen</first><last>Zhang</last></author>
+      <author><first>Chen</first><last>Zhang</last></author>
       <author><first>Qiuchi</first><last>Li</last></author>
       <author><first>Dawei</first><last>Song</last></author>
       <author><first>Benyou</first><last>Wang</last></author>
@@ -946,7 +946,7 @@
     <paper id="74">
       <title>Improving <fixed-case>QA</fixed-case> Generalization by Concurrent Modeling of Multiple Biases</title>
       <author><first>Mingzhu</first><last>Wu</last></author>
-      <author><first>Nafise Sadat</first><last>Moosavi</last></author>
+      <author id="nafise-sadat-moosavi"><first>Nafise Sadat</first><last>Moosavi</last></author>
       <author><first>Andreas</first><last>Rücklé</last></author>
       <author><first>Iryna</first><last>Gurevych</last></author>
       <pages>839–853</pages>
@@ -960,7 +960,7 @@
       <title>Actor-Double-Critic: Incorporating Model-Based Critic for Task-Oriented Dialogue Systems</title>
       <author><first>Yen-chen</first><last>Wu</last></author>
       <author><first>Bo-Hsiang</first><last>Tseng</last></author>
-      <author><first>Milica</first><last>Gasic</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gasic</last></author>
       <pages>854–863</pages>
       <abstract>In order to improve the sample-efficiency of deep reinforcement learning (DRL), we implemented imagination augmented agent (I2A) in spoken dialogue systems (SDS). Although I2A achieves a higher success rate than baselines by augmenting predicted future into a policy network, its complicated architecture introduces unwanted instability. In this work, we propose actor-double-critic (ADC) to improve the stability and overall performance of I2A. ADC simplifies the architecture of I2A to reduce excessive parameters and hyper-parameters. More importantly, a separate model-based critic shares parameters between actions and makes back-propagation explicit. In our experiments on Cambridge Restaurant Booking task, ADC enhances success rates considerably and shows robustness to imperfect environment models. In addition, ADC exhibits the stability and sample-efficiency as significantly reducing the baseline standard deviation of success rates and reaching the 80% success rate with half training data.</abstract>
       <url hash="0ea27564">2020.findings-emnlp.75</url>
@@ -1063,7 +1063,7 @@
       <title>Cross-lingual Alignment Methods for Multilingual <fixed-case>BERT</fixed-case>: A Comparative Study</title>
       <author><first>Saurabh</first><last>Kulshreshtha</last></author>
       <author><first>Jose Luis</first><last>Redondo Garcia</last></author>
-      <author><first>Ching-Yun</first><last>Chang</last></author>
+      <author id="ching-yun-chang"><first>Ching-Yun</first><last>Chang</last></author>
       <pages>933–942</pages>
       <abstract>Multilingual BERT (mBERT) has shown reasonable capability for zero-shot cross-lingual transfer when fine-tuned on downstream tasks. Since mBERT is not pre-trained with explicit cross-lingual supervision, transfer performance can further be improved by aligning mBERT with cross-lingual signal. Prior work propose several approaches to align contextualised embeddings. In this paper we analyse how different forms of cross-lingual supervision and various alignment methods influence the transfer capability of mBERT in zero-shot setting. Specifically, we compare parallel corpora vs dictionary-based supervision and rotational vs fine-tuning based alignment methods. We evaluate the performance of different alignment methodologies across eight languages on two tasks: Name Entity Recognition and Semantic Slot Filling. In addition, we propose a novel normalisation method which consistently improves the performance of rotation-based alignment including a notable 3% F1 improvement for distant and typologically dissimilar languages. Importantly we identify the biases of the alignment methods to the type of task and proximity to the transfer language. We also find that supervision from parallel corpus is generally superior to dictionary alignments.</abstract>
       <url hash="67a4d950">2020.findings-emnlp.83</url>
@@ -1112,11 +1112,11 @@
     <paper id="87">
       <title>Scene Graph Modification Based on Natural Language Commands</title>
       <author><first>Xuanli</first><last>He</last></author>
-      <author><first>Quan Hung</first><last>Tran</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="quan-hung-tran"><first>Quan Hung</first><last>Tran</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <author><first>Walter</first><last>Chang</last></author>
       <author><first>Zhe</first><last>Lin</last></author>
-      <author><first>Trung</first><last>Bui</last></author>
+      <author id="trung-bui"><first>Trung</first><last>Bui</last></author>
       <author><first>Franck</first><last>Dernoncourt</last></author>
       <author><first>Nhan</first><last>Dam</last></author>
       <pages>972–990</pages>
@@ -1139,11 +1139,11 @@
     </paper>
     <paper id="89">
       <title>Transition-based Parsing with Stack-Transformers</title>
-      <author><first>Ramón</first><last>Fernandez Astudillo</last></author>
+      <author id="ramon-fernandez-astudillo"><first>Ramón</first><last>Fernandez Astudillo</last></author>
       <author><first>Miguel</first><last>Ballesteros</last></author>
       <author><first>Tahira</first><last>Naseem</last></author>
       <author><first>Austin</first><last>Blodgett</last></author>
-      <author><first>Radu</first><last>Florian</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
       <pages>1001–1007</pages>
       <abstract>Modeling the parser state is key to good performance in transition-based parsing. Recurrent Neural Networks considerably improved the performance of transition-based systems by modelling the global state, e.g. stack-LSTM parsers, or local state modeling of contextualized features, e.g. Bi-LSTM parsers. Given the success of Transformer architectures in recent parsing systems, this work explores modifications of the sequence-to-sequence Transformer architecture to model either global or local parser states in transition-based parsing. We show that modifications of the cross attention mechanism of the Transformer considerably strengthen performance both on dependency and Abstract Meaning Representation (AMR) parsing tasks, particularly for smaller models or limited training data.</abstract>
       <url hash="b94f3f65">2020.findings-emnlp.89</url>
@@ -1173,7 +1173,7 @@
       <title><fixed-case>H</fixed-case>ybrid<fixed-case>QA</fixed-case>: A Dataset of Multi-Hop Question Answering over Tabular and Textual Data</title>
       <author><first>Wenhu</first><last>Chen</last></author>
       <author><first>Hanwen</first><last>Zha</last></author>
-      <author id="zhiyu-chen"><first>Zhiyu</first><last>Chen</last></author>
+      <author><first>Zhiyu</first><last>Chen</last></author>
       <author><first>Wenhan</first><last>Xiong</last></author>
       <author><first>Hong</first><last>Wang</last></author>
       <author><first>William Yang</first><last>Wang</last></author>
@@ -1195,9 +1195,9 @@
     </paper>
     <paper id="93">
       <title><fixed-case>EST</fixed-case>e<fixed-case>R</fixed-case>: Combining Word Co-occurrences and Word Associations for Unsupervised Emotion Detection</title>
-      <author><first>Sujatha Das</first><last>Gollapalli</last></author>
+      <author id="sujatha-das-gollapalli"><first>Sujatha Das</first><last>Gollapalli</last></author>
       <author><first>Polina</first><last>Rozenshtein</last></author>
-      <author><first>See-Kiong</first><last>Ng</last></author>
+      <author id="see-kiong-ng"><first>See-Kiong</first><last>Ng</last></author>
       <pages>1043–1056</pages>
       <abstract>Accurate detection of emotions in user- generated text was shown to have several applications for e-commerce, public well-being, and disaster management. Currently, the state-of-the-art performance for emotion detection in text is obtained using complex, deep learning models trained on domain-specific, labeled data. In this paper, we propose ESTeR , an unsupervised model for identifying emotions using a novel similarity function based on random walks on graphs. Our model combines large-scale word co-occurrence information with word-associations from lexicons avoiding not only the dependence on labeled datasets, but also an explicit mapping of words to latent spaces used in emotion-enriched word embeddings. Our similarity function can also be computed efficiently. We study a range of datasets including recent tweets related to COVID-19 to illustrate the superior performance of our model and report insights on public emotions during the on-going pandemic.</abstract>
       <url hash="0f51135b">2020.findings-emnlp.93</url>
@@ -1222,7 +1222,7 @@
       <author><first>Peng</first><last>Wu</last></author>
       <author><first>Bowei</first><last>Zou</last></author>
       <author><first>Ridong</first><last>Jiang</last></author>
-      <author><first>AiTi</first><last>Aw</last></author>
+      <author id="aiti-aw"><first>AiTi</first><last>Aw</last></author>
       <pages>1063–1073</pages>
       <abstract>As an essential component of task-oriented dialogue systems, Dialogue State Tracking (DST) takes charge of estimating user intentions and requests in dialogue contexts and extracting substantial goals (states) from user utterances to help the downstream modules to determine the next actions of dialogue systems. For practical usages, a major challenge to constructing a robust DST model is to process a conversation with multi-domain states. However, most existing approaches trained DST on a single domain independently, ignoring the information across domains. To tackle the multi-domain DST task, we first construct a dialogue state graph to transfer structured features among related domain-slot pairs across domains. Then, we encode the graph information of dialogue states by graph convolutional networks and utilize a hard copy mechanism to directly copy historical states from the previous conversation. Experimental results show that our model improves the performances of the multi-domain DST baseline (TRADE) with the absolute joint accuracy of 2.0% and 1.0% on the MultiWOZ 2.0 and 2.1 dialogue datasets, respectively.</abstract>
       <url hash="1e6c1ebf">2020.findings-emnlp.95</url>
@@ -1326,7 +1326,7 @@
       <title><fixed-case>H</fixed-case>yper<fixed-case>T</fixed-case>ext: Endowing <fixed-case>F</fixed-case>ast<fixed-case>T</fixed-case>ext with Hyperbolic Geometry</title>
       <author><first>Yudong</first><last>Zhu</last></author>
       <author><first>Di</first><last>Zhou</last></author>
-      <author><first>Jinghui</first><last>Xiao</last></author>
+      <author id="jinghui-xiao"><first>Jinghui</first><last>Xiao</last></author>
       <author><first>Xin</first><last>Jiang</last></author>
       <author><first>Xiao</first><last>Chen</last></author>
       <author><first>Qun</first><last>Liu</last></author>
@@ -1339,7 +1339,7 @@
     <paper id="105">
       <title><fixed-case>A</fixed-case>uto<fixed-case>ETER</fixed-case>: Automated Entity Type Representation for Knowledge Graph Embedding</title>
       <author><first>Guanglin</first><last>Niu</last></author>
-      <author id="bo-li"><first>Bo</first><last>Li</last></author>
+      <author><first>Bo</first><last>Li</last></author>
       <author><first>Yongfei</first><last>Zhang</last></author>
       <author><first>Shiliang</first><last>Pu</last></author>
       <author><first>Jingyang</first><last>Li</last></author>
@@ -1354,8 +1354,8 @@
       <title>Learning Robust and Multilingual Speech Representations</title>
       <author><first>Kazuya</first><last>Kawakami</last></author>
       <author><first>Luyu</first><last>Wang</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
       <author><first>Aaron</first><last>van den Oord</last></author>
       <pages>1182–1192</pages>
       <abstract>Unsupervised speech representation learning has shown remarkable success at finding representations that correlate with phonetic structures and improve downstream speech recognition performance. However, most research has been focused on evaluating the representations in terms of their ability to improve the performance of speech recognition systems on read English (e.g. Wall Street Journal and LibriSpeech). This evaluation methodology overlooks two important desiderata that speech representations should have: robustness to domain shifts and transferability to other languages. In this paper we learn representations from up to 8000 hours of diverse and noisy speech data and evaluate the representations by looking at their robustness to domain shifts and their ability to improve recognition performance in many languages. We find that our representations confer significant robustness advantages to the resulting recognition systems: we see significant improvements in out-of-domain transfer relative to baseline feature sets and the features likewise provide improvements in 25 phonetically diverse languages.</abstract>
@@ -1381,7 +1381,7 @@
       <author><first>Hoang</first><last>Nguyen</last></author>
       <author><first>Chenwei</first><last>Zhang</last></author>
       <author><first>Congying</first><last>Xia</last></author>
-      <author><first>Philip</first><last>Yu</last></author>
+      <author id="philip-s-yu"><first>Philip</first><last>Yu</last></author>
       <pages>1209–1218</pages>
       <abstract>Few-shot Intent Detection is challenging due to the scarcity of available annotated utterances. Although recent works demonstrate that multi-level matching plays an important role in transferring learned knowledge from seen training classes to novel testing classes, they rely on a static similarity measure and overly fine-grained matching components. These limitations inhibit generalizing capability towards Generalized Few-shot Learning settings where both seen and novel classes are co-existent. In this paper, we propose a novel Semantic Matching and Aggregation Network where semantic components are distilled from utterances via multi-head self-attention with additional dynamic regularization constraints. These semantic components capture high-level information, resulting in more effective matching between instances. Our multi-perspective matching method provides a comprehensive matching measure to enhance representations of both labeled and unlabeled instances. We also propose a more challenging evaluation setting that considers classification on the joint all-class label space. Extensive experimental results demonstrate the effectiveness of our method. Our code and data are publicly available.</abstract>
       <url hash="0c507ba8">2020.findings-emnlp.108</url>
@@ -1395,7 +1395,7 @@
       <author><first>Mengjie</first><last>Zhao</last></author>
       <author><first>Philipp</first><last>Dufter</last></author>
       <author><first>Yadollah</first><last>Yaghoobzadeh</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>1219–1234</pages>
       <abstract>Pretrained language models achieve state-of-the-art results on many NLP tasks, but there are still many open questions about how and why they work so well. We investigate the contextualization of words in BERT. We quantify the amount of contextualization, i.e., how well words are interpreted in context, by studying the extent to which semantic classes of a word can be inferred from its contextualized embedding. Quantifying contextualization helps in understanding and utilizing pretrained language models. We show that the top layer representations support highly accurate inference of semantic classes; that the strongest contextualization effects occur in the lower layers; that local context is mostly sufficient for contextualizing words; and that top layer representations are more task-specific after finetuning while lower layer representations are more transferable. Finetuning uncovers task-related features, but pretrained knowledge about contextualization is still well preserved.</abstract>
       <url hash="cb0815b2">2020.findings-emnlp.109</url>
@@ -1505,11 +1505,11 @@
       <author><first>Daniel</first><last>Khashabi</last></author>
       <author><first>Kevin</first><last>Lin</last></author>
       <author><first>Jiangming</first><last>Liu</last></author>
-      <author><first>Nelson F.</first><last>Liu</last></author>
+      <author id="nelson-f-liu"><first>Nelson F.</first><last>Liu</last></author>
       <author><first>Phoebe</first><last>Mulcaire</last></author>
       <author><first>Qiang</first><last>Ning</last></author>
       <author><first>Sameer</first><last>Singh</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <author><first>Sanjay</first><last>Subramanian</last></author>
       <author><first>Reut</first><last>Tsarfaty</last></author>
       <author><first>Eric</first><last>Wallace</last></author>
@@ -1525,7 +1525,7 @@
       <title>Parsing with Multilingual <fixed-case>BERT</fixed-case>, a Small Corpus, and a Small Treebank</title>
       <author><first>Ethan C.</first><last>Chau</last></author>
       <author><first>Lucy H.</first><last>Lin</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>1324–1334</pages>
       <abstract>Pretrained multilingual contextual representations have shown great success, but due to the limits of their pretraining data, their benefits do not apply equally to all language varieties. This presents a challenge for language varieties unfamiliar to these models, whose labeled and unlabeled data is too limited to train a monolingual model effectively. We propose the use of additional language-specific pretraining and vocabulary augmentation to adapt multilingual models to low-resource settings. Using dependency parsing of four diverse low-resource language varieties as a case study, we show that these methods significantly improve performance over baselines, especially in the lowest-resource cases, and demonstrate the importance of the relationship between such models’ pretraining data and target language varieties.</abstract>
       <url hash="e87873c7">2020.findings-emnlp.118</url>
@@ -1553,7 +1553,7 @@
       <author><first>Sho</first><last>Takase</last></author>
       <author><first>Kei</first><last>Uchiumi</last></author>
       <author><first>Atsushi</first><last>Keyaki</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <pages>1341–1351</pages>
       <abstract>In traditional NLP, we tokenize a given sentence as a preprocessing, and thus the tokenization is unrelated to a target downstream task. To address this issue, we propose a novel method to explore a tokenization which is appropriate for the downstream task. Our proposed method, optimizing tokenization (OpTok), is trained to assign a high probability to such appropriate tokenization based on the downstream task loss. OpTok can be used for any downstream task which uses a vector representation of a sentence such as text classification. Experimental results demonstrate that OpTok improves the performance of sentiment analysis and textual entailment. In addition, we introduce OpTok into BERT, the state-of-the-art contextualized embeddings and report a positive effect.</abstract>
       <url hash="4d55146a">2020.findings-emnlp.120</url>
@@ -1575,7 +1575,7 @@
     <paper id="122">
       <title>A Compare Aggregate Transformer for Understanding Document-grounded Dialogue</title>
       <author><first>Longxuan</first><last>Ma</last></author>
-      <author><first>Wei-Nan</first><last>Zhang</last></author>
+      <author id="weinan-zhang"><first>Wei-Nan</first><last>Zhang</last></author>
       <author><first>Runxin</first><last>Sun</last></author>
       <author><first>Ting</first><last>Liu</last></author>
       <pages>1358–1367</pages>
@@ -1616,7 +1616,7 @@
       <author><first>Alex</first><last>Tamkin</last></author>
       <author><first>Trisha</first><last>Singh</last></author>
       <author><first>Davide</first><last>Giovanardi</last></author>
-      <author><first>Noah</first><last>Goodman</last></author>
+      <author id="noah-goodman"><first>Noah</first><last>Goodman</last></author>
       <pages>1393–1401</pages>
       <abstract>How does language model pretraining help transfer learning? We consider a simple ablation technique for determining the impact of each pretrained layer on transfer task performance. This method, partial reinitialization, involves replacing different layers of a pretrained model with random weights, then finetuning the entire model on the transfer task and observing the change in performance. This technique reveals that in BERT, layers with high probing performance on downstream GLUE tasks are neither necessary nor sufficient for high accuracy on those tasks. Furthermore, the benefit of using pretrained parameters for a layer varies dramatically with finetuning dataset size: parameters that provide tremendous performance improvement when data is plentiful may provide negligible benefits in data-scarce settings. These results reveal the complexity of the transfer learning process, highlighting the limitations of methods that operate on frozen models or single data samples.</abstract>
       <url hash="b2dd5db7">2020.findings-emnlp.125</url>
@@ -1666,7 +1666,7 @@
       <title>ex<fixed-case>BERT</fixed-case>: Extending Pre-trained Models with Domain-specific Vocabulary Under Constrained Training Resources</title>
       <author><first>Wen</first><last>Tai</last></author>
       <author><first>H. T.</first><last>Kung</last></author>
-      <author><first>Xin</first><last>Dong</last></author>
+      <author id="xin-luna-dong"><first>Xin</first><last>Dong</last></author>
       <author><first>Marcus</first><last>Comiter</last></author>
       <author><first>Chang-Fu</first><last>Kuo</last></author>
       <pages>1433–1439</pages>
@@ -1693,7 +1693,7 @@
       <title>Conditional Neural Generation using Sub-Aspect Functions for Extractive News Summarization</title>
       <author><first>Zhengyuan</first><last>Liu</last></author>
       <author><first>Ke</first><last>Shi</last></author>
-      <author><first>Nancy</first><last>Chen</last></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last></author>
       <pages>1453–1463</pages>
       <abstract>Much progress has been made in text summarization, fueled by neural architectures using large-scale training corpora. However, in the news domain, neural models easily overfit by leveraging position-related features due to the prevalence of the inverted pyramid writing style. In addition, there is an unmet need to generate a variety of summaries for different users. In this paper, we propose a neural framework that can flexibly control summary generation by introducing a set of sub-aspect functions (i.e. importance, diversity, position). These sub-aspect functions are regulated by a set of control codes to decide which sub-aspect to focus on during summary generation. We demonstrate that extracted summaries with minimal position bias is comparable with those generated by standard models that take advantage of position preference. We also show that news summaries generated with a focus on diversity can be more preferred by human raters. These results suggest that a more flexible neural summarization framework providing more control options could be desirable in tailoring to different user preferences, which is useful since it is often impractical to articulate such preferences for different applications a priori.</abstract>
       <url hash="baae7a2f">2020.findings-emnlp.131</url>
@@ -1728,7 +1728,7 @@
       <title>Inexpensive Domain Adaptation of Pretrained Language Models: Case Studies on Biomedical <fixed-case>NER</fixed-case> and Covid-19 <fixed-case>QA</fixed-case></title>
       <author><first>Nina</first><last>Poerner</last></author>
       <author><first>Ulli</first><last>Waltinger</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>1482–1490</pages>
       <abstract>Domain adaptation of Pretrained Language Models (PTLMs) is typically achieved by unsupervised pretraining on target-domain text. While successful, this approach is expensive in terms of hardware, runtime and CO 2 emissions. Here, we propose a cheaper alternative: We train Word2Vec on target-domain text and align the resulting word vectors with the wordpiece vectors of a general-domain PTLM. We evaluate on eight English biomedical Named Entity Recognition (NER) tasks and compare against the recently proposed BioBERT model. We cover over 60% of the BioBERT - BERT F1 delta, at 5% of BioBERT’s CO 2 footprint and 2% of its cloud compute cost. We also show how to quickly adapt an existing general-domain Question Answering (QA) model to an emerging domain: the Covid-19 pandemic.</abstract>
       <url hash="7e17e67f">2020.findings-emnlp.134</url>
@@ -1767,7 +1767,7 @@
       <author><first>Huy</first><last>Vu</last></author>
       <author><first>Suhaib</first><last>Abdurahman</last></author>
       <author><first>Sudeep</first><last>Bhatia</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <pages>1512–1524</pages>
       <abstract>Psychologists routinely assess people’s emotions and traits, such as their personality, by collecting their responses to survey questionnaires. Such assessments can be costly in terms of both time and money, and often lack generalizability, as existing data cannot be used to predict responses for new survey questions or participants. In this study, we propose a method for predicting a participant’s questionnaire response using their social media texts and the text of the survey question they are asked. Specifically, we use Natural Language Processing (NLP) tools such as BERT embeddings to represent both participants (via the text they write) and survey questions as embeddings vectors, allowing us to predict responses for out-of-sample participants and questions. Our novel approach can be used by researchers to integrate new participants or new questions into psychological studies without the constraint of costly data collection, facilitating novel practical applications and furthering the development of psychological theory. Finally, as a side contribution, the success of our model also suggests a new approach to study survey questions using NLP tools such as text embeddings rather than response data used in traditional methods.</abstract>
       <url hash="83cf5b5d">2020.findings-emnlp.137</url>
@@ -1778,7 +1778,7 @@
     <paper id="138">
       <title>Will it Unblend?</title>
       <author><first>Yuval</first><last>Pinter</last></author>
-      <author><first>Cassandra L.</first><last>Jacobs</last></author>
+      <author id="cassandra-l-jacobs"><first>Cassandra L.</first><last>Jacobs</last></author>
       <author><first>Jacob</first><last>Eisenstein</last></author>
       <pages>1525–1535</pages>
       <abstract>Natural language processing systems often struggle with out-of-vocabulary (OOV) terms, which do not appear in training data. Blends, such as “innoventor”, are one particularly challenging class of OOV, as they are formed by fusing together two or more bases that relate to the intended meaning in unpredictable manners and degrees. In this work, we run experiments on a novel dataset of English OOV blends to quantify the difficulty of interpreting the meanings of blends by large-scale contextual language models such as BERT. We first show that BERT’s processing of these blends does not fully access the component meanings, leaving their contextual representations semantically impoverished. We find this is mostly due to the loss of characters resulting from blend formation. Then, we assess how easily different models can recognize the structure and recover the origin of blends, and find that context-aware embedding systems outperform character-level and context-free embeddings, although their results are still far from satisfactory.</abstract>
@@ -1867,8 +1867,8 @@
       <author><first>Wentao</first><last>Wang</last></author>
       <author><first>Zichao</first><last>Yang</last></author>
       <author><first>Xiaodan</first><last>Liang</last></author>
-      <author><first>Frank F.</first><last>Xu</last></author>
-      <author><first>Eric</first><last>Xing</last></author>
+      <author id="frank-f-xu"><first>Frank F.</first><last>Xu</last></author>
+      <author id="eric-xing"><first>Eric</first><last>Xing</last></author>
       <author><first>Zhiting</first><last>Hu</last></author>
       <pages>1589–1598</pages>
       <abstract>Recent neural approaches to data-to-text generation have mostly focused on improving content fidelity while lacking explicit control over writing styles (e.g., sentence structures, word choices). More traditional systems use templates to determine the realization of text. Yet manual or automatic construction of high-quality templates is difficult, and a template acting as hard constraints could harm content fidelity when it does not match the record perfectly. We study a new way of stylistic control by using existing sentences as “soft” templates. That is, a model learns to imitate the writing style of any given exemplar sentence, with automatic adaptions to faithfully describe the record. The problem is challenging due to the lack of parallel data. We develop a neural approach that includes a hybrid attention-copy mechanism, learns with weak supervisions, and is enhanced with a new content coverage constraint. We conduct experiments in restaurants and sports domains. Results show our approach achieves stronger performance than a range of comparison methods. Our approach balances well between content fidelity and style control given exemplars that match the records to varying degrees.</abstract>
@@ -1905,7 +1905,7 @@
       <author><first>Masoud</first><last>Jalili Sabet</last></author>
       <author><first>Philipp</first><last>Dufter</last></author>
       <author><first>François</first><last>Yvon</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>1627–1643</pages>
       <abstract>Word alignments are useful for tasks like statistical and neural machine translation (NMT) and cross-lingual annotation projection. Statistical word aligners perform well, as do methods that extract alignments jointly with translations in NMT. However, most approaches require parallel training data and quality decreases as less training data is available. We propose word alignment methods that require no parallel data. The key idea is to leverage multilingual word embeddings – both static and contextualized – for word alignment. Our multilingual embeddings are created from monolingual data only without relying on any parallel data or dictionaries. We find that alignments created from embeddings are superior for four and comparable for two language pairs compared to those produced by traditional statistical aligners – even with abundant parallel data; e.g., contextualized embeddings achieve a word alignment F1 for English-German that is 5 percentage points higher than eflomal, a high-quality statistical aligner, trained on 100k parallel sentences.</abstract>
       <url hash="13c6cff3">2020.findings-emnlp.147</url>
@@ -1916,8 +1916,8 @@
     <paper id="148">
       <title><fixed-case>T</fixed-case>weet<fixed-case>E</fixed-case>val: Unified Benchmark and Comparative Evaluation for Tweet Classification</title>
       <author><first>Francesco</first><last>Barbieri</last></author>
-      <author><first>Jose</first><last>Camacho-Collados</last></author>
-      <author><first>Luis</first><last>Espinosa Anke</last></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa Anke</last></author>
       <author><first>Leonardo</first><last>Neves</last></author>
       <pages>1644–1650</pages>
       <abstract>The experimental landscape in natural language processing for social media is too fragmented. Each year, new shared tasks and datasets are proposed, ranging from classics like sentiment analysis to irony detection or emoji prediction. Therefore, it is unclear what the current state of the art is, as there is no standardized evaluation protocol, neither a strong set of baselines trained on such domain-specific data. In this paper, we propose a new evaluation framework (TweetEval) consisting of seven heterogeneous Twitter-specific classification tasks. We also provide a strong set of baselines as starting point, and compare different language modeling pre-training strategies. Our initial experiments show the effectiveness of starting off with existing pre-trained generic language models, and continue training them on Twitter corpora.</abstract>
@@ -1929,7 +1929,7 @@
       <title>Octa: Omissions and Conflicts in Target-Aspect Sentiment Analysis</title>
       <author><first>Zhe</first><last>Zhang</last></author>
       <author><first>Chung-Wei</first><last>Hang</last></author>
-      <author><first>Munindar</first><last>Singh</last></author>
+      <author id="munindar-p-singh"><first>Munindar</first><last>Singh</last></author>
       <pages>1651–1662</pages>
       <abstract>Sentiments in opinionated text are often determined by both aspects and target words (or targets). We observe that targets and aspects interrelate in subtle ways, often yielding conflicting sentiments. Thus, a naive aggregation of sentiments from aspects and targets treated separately, as in existing sentiment analysis models, impairs performance. We propose Octa, an approach that jointly considers aspects and targets when inferring sentiments. To capture and quantify relationships between targets and context words, Octa uses a selective self-attention mechanism that handles implicit or missing targets. Specifically, Octa involves two layers of attention mechanisms for, respectively, selective attention between targets and context words and attention over words based on aspects. On benchmark datasets, Octa outperforms leading models by a large margin, yielding (absolute) gains in accuracy of 1.6% to 4.3%.</abstract>
       <url hash="0349b765">2020.findings-emnlp.149</url>
@@ -1940,7 +1940,7 @@
       <title>On the Language Neutrality of Pre-trained Multilingual Representations</title>
       <author><first>Jindřich</first><last>Libovický</last></author>
       <author><first>Rudolf</first><last>Rosa</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>1663–1674</pages>
       <abstract>Multilingual contextual embeddings, such as multilingual BERT and XLM-RoBERTa, have proved useful for many multi-lingual tasks. Previous work probed the cross-linguality of the representations indirectly using zero-shot transfer learning on morphological and syntactic tasks. We instead investigate the language-neutrality of multilingual contextual embeddings directly and with respect to lexical semantics. Our results show that contextual embeddings are more language-neutral and, in general, more informative than aligned static word-type embeddings, which are explicitly trained for language neutrality. Contextual embeddings are still only moderately language-neutral by default, so we propose two simple methods for achieving stronger language neutrality: first, by unsupervised centering of the representation for each language and second, by fitting an explicit projection on small parallel data. Besides, we show how to reach state-of-the-art accuracy on language identification and match the performance of statistical methods for word alignment of parallel sentences without using parallel data.</abstract>
       <url hash="8e8743a7">2020.findings-emnlp.150</url>
@@ -1950,10 +1950,10 @@
     </paper>
     <paper id="151">
       <title>Cost-effective Selection of Pretraining Data: A Case Study of Pretraining <fixed-case>BERT</fixed-case> on Social Media</title>
-      <author><first>Xiang</first><last>Dai</last></author>
+      <author id="xiang-dai"><first>Xiang</first><last>Dai</last></author>
       <author><first>Sarvnaz</first><last>Karimi</last></author>
       <author><first>Ben</first><last>Hachey</last></author>
-      <author><first>Cecile</first><last>Paris</last></author>
+      <author id="cecile-paris"><first>Cecile</first><last>Paris</last></author>
       <pages>1675–1681</pages>
       <abstract>Recent studies on domain-specific BERT models show that effectiveness on downstream tasks can be improved when models are pretrained on in-domain data. Often, the pretraining data used in these models are selected based on their subject matter, e.g., biology or computer science. Given the range of applications using social media text, and its unique language variety, we pretrain two models on tweets and forum text respectively, and empirically demonstrate the effectiveness of these two resources. In addition, we investigate how similarity measures can be used to nominate in-domain pretraining data. We publicly release our pretrained models at <url>https://bit.ly/35RpTf0</url>.</abstract>
       <url hash="eec9d22a">2020.findings-emnlp.151</url>
@@ -1967,7 +1967,7 @@
       <author><first>Khushbu</first><last>Saxena</last></author>
       <author><first>Vivek</first><last>Kulkarni</last></author>
       <author><first>Thomas</first><last>Runkler</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>1682–1690</pages>
       <abstract>Prior research notes that BERT’s computational cost grows quadratically with sequence length thus leading to longer training times, higher GPU memory constraints and carbon emissions. While recent work seeks to address these scalability issues at pre-training, these issues are also prominent in fine-tuning especially for long sequence tasks like document classification. Our work thus focuses on optimizing the computational cost of fine-tuning for document classification. We achieve this by complementary learning of both topic and language models in a unified framework, named TopicBERT. This significantly reduces the number of self-attention operations – a main performance bottleneck. Consequently, our model achieves a 1.4x ( 40%) speedup with 40% reduction in CO2 emission while retaining 99.9% performance over 5 datasets.</abstract>
       <url hash="c065d94b">2020.findings-emnlp.152</url>
@@ -2007,7 +2007,7 @@
       <title>Multi-Agent Mutual Learning at Sentence-Level and Token-Level for Neural Machine Translation</title>
       <author><first>Baohao</first><last>Liao</last></author>
       <author><first>Yingbo</first><last>Gao</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>1715–1724</pages>
       <abstract>Mutual learning, where multiple agents learn collaboratively and teach one another, has been shown to be an effective way to distill knowledge for image classification tasks. In this paper, we extend mutual learning to the machine translation task and operate at both the sentence-level and the token-level. Firstly, we co-train multiple agents by using the same parallel corpora. After convergence, each agent selects and learns its poorly predicted tokens from other agents. The poorly predicted tokens are determined by the acceptance-rejection sampling algorithm. Our experiments show that sequential mutual learning at the sentence-level and the token-level improves the results cumulatively. Absolute improvements compared to strong baselines are obtained on various translation tasks. On the IWSLT’14 German-English task, we get a new state-of-the-art BLEU score of 37.0. We also report a competitive result, 29.9 BLEU score, on the WMT’14 English-German task.</abstract>
       <url hash="2f569459">2020.findings-emnlp.155</url>
@@ -2019,7 +2019,7 @@
       <author><first>Hu</first><last>Xu</last></author>
       <author><first>Bing</first><last>Liu</last></author>
       <author><first>Lei</first><last>Shu</last></author>
-      <author><first>Philip</first><last>Yu</last></author>
+      <author id="philip-s-yu"><first>Philip</first><last>Yu</last></author>
       <pages>1725–1731</pages>
       <abstract>This paper focuses on learning domain-oriented language models driven by end tasks, which aims to combine the worlds of both general-purpose language models (such as ELMo and BERT) and domain-specific language understanding. We propose DomBERT, an extension of BERT to learn from both in-domain corpus and relevant domain corpora. This helps in learning domain language models with low-resources. Experiments are conducted on an assortment of tasks in aspect-based sentiment analysis (ABSA), demonstrating promising results.</abstract>
       <url hash="2cfbd858">2020.findings-emnlp.156</url>
@@ -2045,7 +2045,7 @@
       <author><first>Hancheng</first><last>Cao</last></author>
       <author><first>Mengjie</first><last>Cheng</last></author>
       <author><first>Zhepeng</first><last>Cen</last></author>
-      <author><first>Daniel</first><last>McFarland</last></author>
+      <author id="dan-mcfarland"><first>Daniel</first><last>McFarland</last></author>
       <author><first>Xiang</first><last>Ren</last></author>
       <pages>1746–1757</pages>
       <abstract>What kind of basic research ideas are more likely to get applied in practice? There is a long line of research investigating patterns of knowledge transfer, but it generally focuses on documents as the unit of analysis and follow their transfer into practice for a specific scientific domain. Here we study translational research at the level of scientific concepts for all scientific fields. We do this through text mining and predictive modeling using three corpora: 38.6 million paper abstracts, 4 million patent documents, and 0.28 million clinical trials. We extract scientific concepts (i.e., phrases) from corpora as instantiations of “research ideas”, create concept-level features as motivated by literature, and then follow the trajectories of over 450,000 new concepts (emerged from 1995-2014) to identify factors that lead only a small proportion of these ideas to be used in inventions and drug trials. Results from our analysis suggest several mechanisms that distinguish which scientific concept will be adopted in practice, and which will not. We also demonstrate that our derived features can be used to explain and predict knowledge transfer with high accuracy. Our work provides greater understanding of knowledge transfer for researchers, practitioners, and government agencies interested in encouraging translational research.</abstract>
@@ -2071,8 +2071,8 @@
       <title>An Empirical Exploration of Local Ordering Pre-training for Structured Prediction</title>
       <author><first>Zhisong</first><last>Zhang</last></author>
       <author><first>Xiang</first><last>Kong</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>1770–1783</pages>
       <abstract>Recently, pre-training contextualized encoders with language model (LM) objectives has been shown an effective semi-supervised method for structured prediction. In this work, we empirically explore an alternative pre-training method for contextualized encoders. Instead of predicting words in LMs, we “mask out” and predict word order information, with a local ordering strategy and word-selecting objectives. With evaluations on three typical structured prediction tasks (dependency parsing, POS tagging, and NER) over four languages (English, Finnish, Czech, and Italian), we show that our method is consistently beneficial. We further conduct detailed error analysis, including one that examines a specific type of parsing error where the head is misidentified. The results show that pre-trained contextual encoders can bring improvements in a structured way, suggesting that they may be able to capture higher-order patterns and feature combinations from unlabeled data.</abstract>
       <url hash="afd95395">2020.findings-emnlp.160</url>
@@ -2137,7 +2137,7 @@
     </paper>
     <paper id="165">
       <title><fixed-case>C</fixed-case>ommon<fixed-case>G</fixed-case>en: A Constrained Text Generation Challenge for Generative Commonsense Reasoning</title>
-      <author><first>Bill Yuchen</first><last>Lin</last></author>
+      <author id="bill-yuchen-lin"><first>Bill Yuchen</first><last>Lin</last></author>
       <author><first>Wangchunshu</first><last>Zhou</last></author>
       <author><first>Ming</first><last>Shen</last></author>
       <author><first>Pei</first><last>Zhou</last></author>
@@ -2156,7 +2156,7 @@
       <author><first>Daniel</first><last>Pressel</last></author>
       <author><first>Amy</first><last>Hemmeter</last></author>
       <author><first>Sagnik</first><last>Ray Choudhury</last></author>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
       <pages>1841–1848</pages>
       <abstract>Current state-of-the-art models for named entity recognition (NER) are neural models with a conditional random field (CRF) as the final layer. Entities are represented as per-token labels with a special structure in order to decode them into spans. Current work eschews prior knowledge of how the span encoding scheme works and relies on the CRF learning which transitions are illegal and which are not to facilitate global coherence. We find that by constraining the output to suppress illegal transitions we can train a tagger with a cross-entropy loss twice as fast as a CRF with differences in F1 that are statistically insignificant, effectively eliminating the need for a CRF. We analyze the dynamics of tag co-occurrence to explain when these constraints are most effective and provide open source implementations of our tagger in both PyTorch and TensorFlow.</abstract>
       <url hash="87619392">2020.findings-emnlp.166</url>
@@ -2170,7 +2170,7 @@
       <author><first>Tianze</first><last>Shi</last></author>
       <author><first>Chen</first><last>Zhao</last></author>
       <author><first>Jordan</first><last>Boyd-Graber</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <author><first>Lillian</first><last>Lee</last></author>
       <pages>1849–1864</pages>
       <abstract>Large-scale semantic parsing datasets annotated with logical forms have enabled major advances in supervised approaches. But can richer supervision help even more? To explore the utility of fine-grained, lexical-level supervision, we introduce SQUALL, a dataset that enriches 11,276 WIKITABLEQUESTIONS English-language questions with manually created SQL equivalents plus alignments between SQL and question fragments. Our annotation enables new training possibilities for encoderdecoder models, including approaches from machine translation previously precluded by the absence of alignments. We propose and test two methods: (1) supervised attention; (2) adopting an auxiliary objective of disambiguating references in the input queries to table columns. In 5-fold cross validation, these strategies improve over strong baselines by 4.4% execution accuracy. Oracle experiments suggest that annotated alignments can support further accuracy gains of up to 23.9%.</abstract>
@@ -2184,7 +2184,7 @@
       <author><first>Chenguang</first><last>Zhu</last></author>
       <author><first>Robert</first><last>Gmyr</last></author>
       <author><first>Michael</first><last>Zeng</last></author>
-      <author><first>Xuedong</first><last>Huang</last></author>
+      <author id="xuedong-huang"><first>Xuedong</first><last>Huang</last></author>
       <author><first>Eric</first><last>Darve</last></author>
       <pages>1865–1874</pages>
       <abstract>Text summarization aims to extract essential information from a piece of text and transform the text into a concise version. Existing unsupervised abstractive summarization models leverage recurrent neural networks framework while the recently proposed transformer exhibits much more capability. Moreover, most of previous summarization models ignore abundant unlabeled corpora resources available for pretraining. In order to address these issues, we propose TED, a transformer-based unsupervised abstractive summarization system with pretraining on large-scale data. We first leverage the lead bias in news articles to pretrain the model on millions of unlabeled corpora. Next, we finetune TED on target domains through theme modeling and a denoising autoencoder to enhance the quality of generated summaries. Notably, TED outperforms all unsupervised abstractive baselines on NYT, CNN/DM and English Gigaword datasets with various document styles. Further analysis shows that the summaries generated by TED are highly abstractive, and each component in the objective function of TED is highly effective.</abstract>
@@ -2288,7 +2288,7 @@
     <paper id="176">
       <title>Learning Visual-Semantic Embeddings for Reporting Abnormal Findings on Chest <fixed-case>X</fixed-case>-rays</title>
       <author><first>Jianmo</first><last>Ni</last></author>
-      <author><first>Chun-Nan</first><last>Hsu</last></author>
+      <author id="chun-nan-hsu"><first>Chun-Nan</first><last>Hsu</last></author>
       <author><first>Amilcare</first><last>Gentili</last></author>
       <author><first>Julian</first><last>McAuley</last></author>
       <pages>1954–1960</pages>
@@ -2461,7 +2461,7 @@
     </paper>
     <paper id="190">
       <title><fixed-case>L</fixed-case>ogic2<fixed-case>T</fixed-case>ext: High-Fidelity Natural Language Generation from Logical Forms</title>
-      <author id="zhiyu-chen"><first>Zhiyu</first><last>Chen</last></author>
+      <author><first>Zhiyu</first><last>Chen</last></author>
       <author><first>Wenhu</first><last>Chen</last></author>
       <author><first>Hanwen</first><last>Zha</last></author>
       <author><first>Xiyou</first><last>Zhou</last></author>
@@ -2477,7 +2477,7 @@
     <paper id="191">
       <title><fixed-case>M</fixed-case>ed<fixed-case>IC</fixed-case>a<fixed-case>T</fixed-case>: A Dataset of Medical Images, Captions, and Textual References</title>
       <author><first>Sanjay</first><last>Subramanian</last></author>
-      <author><first>Lucy Lu</first><last>Wang</last></author>
+      <author id="lucy-lu-wang"><first>Lucy Lu</first><last>Wang</last></author>
       <author><first>Ben</first><last>Bogin</last></author>
       <author><first>Sachin</first><last>Mehta</last></author>
       <author><first>Madeleine</first><last>van Zuylen</last></author>
@@ -2633,8 +2633,8 @@
       <title>The Role of Reentrancies in <fixed-case>A</fixed-case>bstract <fixed-case>M</fixed-case>eaning <fixed-case>R</fixed-case>epresentation Parsing</title>
       <author><first>Ida</first><last>Szubert</last></author>
       <author><first>Marco</first><last>Damonte</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>2198–2207</pages>
       <abstract>Abstract Meaning Representation (AMR) parsing aims at converting sentences into AMR representations. These are graphs and not trees because AMR supports reentrancies (nodes with more than one parent). Following previous findings on the importance of reen- trancies for AMR, we empirically find and discuss several linguistic phenomena respon- sible for reentrancies in AMR, some of which have not received attention before. We cate- gorize the types of errors AMR parsers make with respect to reentrancies. Furthermore, we find that correcting these errors provides an in- crease of up to 5% Smatch in parsing perfor- mance and 20% in reentrancy prediction</abstract>
       <url hash="0e7f5436">2020.findings-emnlp.199</url>
@@ -2688,8 +2688,8 @@
     <paper id="203">
       <title>Reducing Quantity Hallucinations in Abstractive Summarization</title>
       <author><first>Zheng</first><last>Zhao</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <pages>2237–2249</pages>
       <abstract>It is well-known that abstractive summaries are subject to hallucination—including material that is not supported by the original text. While summaries can be made hallucination-free by limiting them to general phrases, such summaries would fail to be very informative. Alternatively, one can try to avoid hallucinations by verifying that any specific entities in the summary appear in the original text in a similar context. This is the approach taken by our system, Herman. The system learns to recognize and verify quantity entities (dates, numbers, sums of money, etc.) in a beam-worth of abstractive summaries produced by state-of-the-art models, in order to up-rank those summaries whose quantity terms are supported by the original text. Experimental results demonstrate that the ROUGE scores of such up-ranked summaries have a higher Precision than summaries that have not been up-ranked, without a comparable loss in Recall, resulting in higher F1. Preliminary human evaluation of up-ranked vs. original summaries shows people’s preference for the former.</abstract>
       <url hash="e74fe8aa">2020.findings-emnlp.203</url>
@@ -2717,9 +2717,9 @@
     </paper>
     <paper id="206">
       <title>A Semi-supervised Approach to Generate the Code-Mixed Text using Pre-trained Encoder and Transfer Learning</title>
-      <author><first>Deepak</first><last>Gupta</last></author>
+      <author id="deepak-gupta"><first>Deepak</first><last>Gupta</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>2267–2280</pages>
       <abstract>Code-mixing, the interleaving of two or more languages within a sentence or discourse is ubiquitous in multilingual societies. The lack of code-mixed training data is one of the major concerns for the development of end-to-end neural network-based models to be deployed for a variety of natural language processing (NLP) applications. A potential solution is to either manually create or crowd-source the code-mixed labelled data for the task at hand, but that requires much human efforts and often not feasible because of the language specific diversity in the code-mixed text. To circumvent the data scarcity issue, we propose an effective deep learning approach for automatically generating the code-mixed text from English to multiple languages without any parallel data. In order to train the neural network, we create synthetic code-mixed texts from the available parallel corpus by modelling various linguistic properties of code-mixing. Our codemixed text generator is built upon the encoder-decoder framework, where the encoder is augmented with the linguistic and task-agnostic features obtained from the transformer based language model. We also transfer the knowledge from a neural machine translation (NMT) to warm-start the training of code-mixed generator. Experimental results and in-depth analysis show the effectiveness of our proposed code-mixed text generation on eight diverse language pairs.</abstract>
       <url hash="94a42506">2020.findings-emnlp.206</url>
@@ -2731,7 +2731,7 @@
       <title><fixed-case>BERT</fixed-case>-<fixed-case>MK</fixed-case>: Integrating Graph Contextualized Knowledge into Pre-trained Language Models</title>
       <author><first>Bin</first><last>He</last></author>
       <author><first>Di</first><last>Zhou</last></author>
-      <author><first>Jinghui</first><last>Xiao</last></author>
+      <author id="jinghui-xiao"><first>Jinghui</first><last>Xiao</last></author>
       <author><first>Xin</first><last>Jiang</last></author>
       <author><first>Qun</first><last>Liu</last></author>
       <author><first>Nicholas Jing</first><last>Yuan</last></author>
@@ -2748,7 +2748,7 @@
       <author><first>Yikang</first><last>Shen</last></author>
       <author><first>Alessandro</first><last>Sordoni</last></author>
       <author><first>Aaron</first><last>Courville</last></author>
-      <author><first>Timothy J.</first><last>O’Donnell</last></author>
+      <author id="timothy-odonnell"><first>Timothy J.</first><last>O’Donnell</last></author>
       <pages>2291–2307</pages>
       <abstract>We model the recursive production property of context-free grammars for natural and synthetic languages. To this end, we present a dynamic programming algorithm that marginalises over latent binary tree structures with N leaves, allowing us to compute the likelihood of a sequence of N tokens under a latent tree model, which we maximise to train a recursive neural function. We demonstrate performance on two synthetic tasks: SCAN, where it outperforms previous models on the LENGTH split, and English question formation, where it performs comparably to decoders with the ground-truth tree structure. We also present experimental results on German-English translation on the Multi30k dataset, and qualitatively analyse the induced tree structures our model learns for the SCAN tasks and the German-English translation task.</abstract>
       <url hash="c65d586c">2020.findings-emnlp.208</url>
@@ -2759,11 +2759,11 @@
     <paper id="209">
       <title>Guided Dialogue Policy Learning without Adversarial Learning in the Loop</title>
       <author><first>Ziming</first><last>Li</last></author>
-      <author><first>Sungjin</first><last>Lee</last></author>
+      <author id="sungjin-lee"><first>Sungjin</first><last>Lee</last></author>
       <author><first>Baolin</first><last>Peng</last></author>
       <author><first>Jinchao</first><last>Li</last></author>
       <author><first>Julia</first><last>Kiseleva</last></author>
-      <author><first>Maarten</first><last>de Rijke</last></author>
+      <author id="maarten-de-rijke"><first>Maarten</first><last>de Rijke</last></author>
       <author><first>Shahin</first><last>Shayandeh</last></author>
       <author><first>Jianfeng</first><last>Gao</last></author>
       <pages>2308–2317</pages>
@@ -2837,7 +2837,7 @@
       <title>Learning Knowledge Bases with Parameters for Task-Oriented Dialogue Systems</title>
       <author><first>Andrea</first><last>Madotto</last></author>
       <author><first>Samuel</first><last>Cahyawijaya</last></author>
-      <author><first>Genta Indra</first><last>Winata</last></author>
+      <author id="genta-indra-winata"><first>Genta Indra</first><last>Winata</last></author>
       <author><first>Yan</first><last>Xu</last></author>
       <author><first>Zihan</first><last>Liu</last></author>
       <author><first>Zhaojiang</first><last>Lin</last></author>
@@ -2925,7 +2925,7 @@
     </paper>
     <paper id="222">
       <title>Adapting Coreference Resolution to <fixed-case>T</fixed-case>witter Conversations</title>
-      <author><first>Berfin</first><last>Aktaş</last></author>
+      <author id="berfin-aktas"><first>Berfin</first><last>Aktaş</last></author>
       <author><first>Veronika</first><last>Solopova</last></author>
       <author><first>Annalena</first><last>Kohnert</last></author>
       <author><first>Manfred</first><last>Stede</last></author>
@@ -2950,8 +2950,8 @@
       <title><fixed-case>COSMIC</fixed-case>: <fixed-case>CO</fixed-case>mmon<fixed-case>S</fixed-case>ense knowledge for e<fixed-case>M</fixed-case>otion Identification in Conversations</title>
       <author><first>Deepanway</first><last>Ghosal</last></author>
       <author><first>Navonil</first><last>Majumder</last></author>
-      <author><first>Alexander</first><last>Gelbukh</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <author><first>Soujanya</first><last>Poria</last></author>
       <pages>2470–2481</pages>
       <abstract>In this paper, we address the task of utterance level emotion recognition in conversations using commonsense knowledge. We propose COSMIC, a new framework that incorporates different elements of commonsense such as mental states, events, and causal relations, and build upon them to learn interactions between interlocutors participating in a conversation. Current state-of-theart methods often encounter difficulties in context propagation, emotion shift detection, and differentiating between related emotion classes. By learning distinct commonsense representations, COSMIC addresses these challenges and achieves new state-of-the-art results for emotion recognition on four different benchmark conversational datasets. Our code is available at <url>https://github.com/declare-lab/conv-emotion</url>.</abstract>
@@ -2975,11 +2975,11 @@
     <paper id="226">
       <title>Answer Span Correction in Machine Reading Comprehension</title>
       <author><first>Revanth</first><last>Gangi Reddy</last></author>
-      <author><first>Md Arafat</first><last>Sultan</last></author>
-      <author><first>Efsun</first><last>Sarioglu Kayi</last></author>
+      <author id="md-arafat-sultan"><first>Md Arafat</first><last>Sultan</last></author>
+      <author id="efsun-sarioglu-kayi"><first>Efsun</first><last>Sarioglu Kayi</last></author>
       <author><first>Rong</first><last>Zhang</last></author>
       <author><first>Vittorio</first><last>Castelli</last></author>
-      <author><first>Avi</first><last>Sil</last></author>
+      <author id="avirup-sil"><first>Avi</first><last>Sil</last></author>
       <pages>2496–2501</pages>
       <abstract>Answer validation in machine reading comprehension (MRC) consists of verifying an extracted answer against an input context and question pair. Previous work has looked at re-assessing the “answerability” of the question given the extracted answer. Here we address a different problem: the tendency of existing MRC systems to produce partially correct answers when presented with answerable questions. We explore the nature of such errors and propose a post-processing correction method that yields statistically significant performance improvements over state-of-the-art MRC systems in both monolingual and multilingual evaluation.</abstract>
       <url hash="6a6bc511">2020.findings-emnlp.226</url>
@@ -3049,7 +3049,7 @@
       <author><first>Jiezhong</first><last>Qiu</last></author>
       <author><first>Hao</first><last>Ma</last></author>
       <author><first>Omer</first><last>Levy</last></author>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <author><first>Sinong</first><last>Wang</last></author>
       <author><first>Jie</first><last>Tang</last></author>
       <pages>2555–2565</pages>
@@ -3077,7 +3077,7 @@
       <author><first>Chengcan</first><last>Ying</last></author>
       <author><first>Fei</first><last>Zhao</last></author>
       <author><first>Zhifang</first><last>Fan</last></author>
-      <author><first>Xinyu</first><last>Dai</last></author>
+      <author id="xinyu-dai"><first>Xinyu</first><last>Dai</last></author>
       <author><first>Rui</first><last>Xia</last></author>
       <pages>2576–2585</pages>
       <abstract>Aspect-oriented Fine-grained Opinion Extraction (AFOE) aims at extracting aspect terms and opinion terms from review in the form of opinion pairs or additionally extracting sentiment polarity of aspect term to form opinion triplet. Because of containing several opinion factors, the complete AFOE task is usually divided into multiple subtasks and achieved in the pipeline. However, pipeline approaches easily suffer from error propagation and inconvenience in real-world scenarios. To this end, we propose a novel tagging scheme, Grid Tagging Scheme (GTS), to address the AFOE task in an end-to-end fashion only with one unified grid tagging task. Additionally, we design an effective inference strategy on GTS to exploit mutual indication between different opinion factors for more accurate extractions. To validate the feasibility and compatibility of GTS, we implement three different GTS models respectively based on CNN, BiLSTM, and BERT, and conduct experiments on the aspect-oriented opinion pair extraction and opinion triplet extraction datasets. Extensive experimental results indicate that GTS models outperform strong baselines significantly and achieve state-of-the-art performance.</abstract>
@@ -3210,7 +3210,7 @@
       <title>Textual <fixed-case>S</fixed-case>upervision for <fixed-case>V</fixed-case>isually <fixed-case>G</fixed-case>rounded <fixed-case>S</fixed-case>poken <fixed-case>L</fixed-case>anguage <fixed-case>U</fixed-case>nderstanding</title>
       <author><first>Bertrand</first><last>Higy</last></author>
       <author><first>Desmond</first><last>Elliott</last></author>
-      <author><first>Grzegorz</first><last>Chrupała</last></author>
+      <author id="grzegorz-chrupala"><first>Grzegorz</first><last>Chrupała</last></author>
       <pages>2698–2709</pages>
       <abstract>Visually-grounded models of spoken language understanding extract semantic information directly from speech, without relying on transcriptions. This is useful for low-resource languages, where transcriptions can be expensive or impossible to obtain. Recent work showed that these models can be improved if transcriptions are available at training time. However, it is not clear how an end-to-end approach compares to a traditional pipeline-based approach when one has access to transcriptions. Comparing different strategies, we find that the pipeline approach works better when enough text is available. With low-resource languages in mind, we also show that translations can be effectively used in place of transcriptions but more data is needed to obtain similar results.</abstract>
       <url hash="62d3c038">2020.findings-emnlp.244</url>
@@ -3245,12 +3245,12 @@
       <author><first>Sashank</first><last>Santhanam</last></author>
       <author><first>Zhuo</first><last>Cheng</last></author>
       <author><first>Brodie</first><last>Mather</last></author>
-      <author><first>Bonnie</first><last>Dorr</last></author>
+      <author id="bonnie-dorr"><first>Bonnie</first><last>Dorr</last></author>
       <author><first>Archna</first><last>Bhatia</last></author>
       <author><first>Bryanna</first><last>Hebenstreit</last></author>
       <author><first>Alan</first><last>Zemel</last></author>
       <author><first>Adam</first><last>Dalton</last></author>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
       <author><first>Samira</first><last>Shaikh</last></author>
       <pages>2736–2750</pages>
       <abstract>Achieving true human-like ability to conduct a conversation remains an elusive goal for open-ended dialogue systems. We posit this is because extant approaches towards natural language generation (NLG) are typically construed as end-to-end architectures that do not adequately model human generation processes. To investigate, we decouple generation into two separate phases: planning and realization. In the planning phase, we train two planners to generate plans for response utterances. The realization phase uses response plans to produce an appropriate response. Through rigorous evaluations, both automated and human, we demonstrate that decoupling the process into planning and realization performs better than an end-to-end approach.</abstract>
@@ -3265,7 +3265,7 @@
       <author><first>Claudio</first><last>Greco</last></author>
       <author><first>Greta</first><last>Gandolfi</last></author>
       <author><first>Eleonora</first><last>Gualdoni</last></author>
-      <author><first>Raffaella</first><last>Bernardi</last></author>
+      <author id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last></author>
       <pages>2751–2767</pages>
       <abstract>This paper introduces BD2BB, a novel language and vision benchmark that requires multimodal models combine complementary information from the two modalities. Recently, impressive progress has been made to develop universal multimodal encoders suitable for virtually any language and vision tasks. However, current approaches often require them to combine redundant information provided by language and vision. Inspired by real-life communicative contexts, we propose a novel task where either modality is necessary but not sufficient to make a correct prediction. To do so, we first build a dataset of images and corresponding sentences provided by human participants. Second, we evaluate state-of-the-art models and compare their performance against human speakers. We show that, while the task is relatively easy for humans, best-performing models struggle to achieve similar results.</abstract>
       <url hash="afc31625">2020.findings-emnlp.248</url>
@@ -3328,7 +3328,7 @@
       <author><first>Chandra</first><last>Bhagavatula</last></author>
       <author><first>Jae sung</first><last>Park</last></author>
       <author><first>Ronan</first><last>Le Bras</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <author><first>Yejin</first><last>Choi</last></author>
       <pages>2810–2829</pages>
       <abstract>Natural language rationales could provide intuitive, higher-level explanations that are easily understandable by humans, complementing the more broadly studied lower-level explanations based on gradients or attention weights. We present the first study focused on generating natural language rationales across several complex visual reasoning tasks: visual commonsense reasoning, visual-textual entailment, and visual question answering. The key challenge of accurate rationalization is comprehensive image understanding at all levels: not just their explicit content at the pixel level, but their contextual contents at the semantic and pragmatic levels. We present RationaleˆVT Transformer, an integrated model that learns to generate free-text rationales by combining pretrained language models with object recognition, grounded visual semantic frames, and visual commonsense graphs. Our experiments show that free-text rationalization is a promising research direction to complement model interpretability for complex visual-textual reasoning tasks. In addition, we find that integration of richer semantic and pragmatic visual features improves visual fidelity of rationales.</abstract>
@@ -3414,7 +3414,7 @@
       <author><first>Xipeng</first><last>Qiu</last></author>
       <author><first>Hengzhi</first><last>Pei</last></author>
       <author><first>Hang</first><last>Yan</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>2887–2897</pages>
       <abstract>Multi-criteria Chinese word segmentation (MCCWS) aims to exploit the relations among the multiple heterogeneous segmentation criteria and further improve the performance of each single criterion. Previous work usually regards MCCWS as different tasks, which are learned together under the multi-task learning framework. In this paper, we propose a concise but effective unified model for MCCWS, which is fully-shared for all the criteria. By leveraging the powerful ability of the Transformer encoder, the proposed unified model can segment Chinese text according to a unique criterion-token indicating the output criterion. Besides, the proposed unified model can segment both simplified and traditional Chinese and has an excellent transfer capability. Experiments on eight datasets with different criteria show that our model outperforms our single-criterion baseline model and other multi-criteria models. Source codes of this paper are available on Github.</abstract>
       <url hash="e03d06cf">2020.findings-emnlp.260</url>
@@ -3489,7 +3489,7 @@
       <author><first>Jianhan</first><last>Xu</last></author>
       <author><first>Xiaoqing</first><last>Zheng</last></author>
       <author><first>Kai-Wei</first><last>Chang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>2938–2948</pages>
       <abstract>We propose a novel approach to cross-lingual dependency parsing based on word reordering. The words in each sentence of a source language corpus are rearranged to meet the word order in a target language under the guidance of a part-of-speech based language model (LM). To obtain the highest reordering score under the LM, a population-based optimization algorithm and its genetic operators are designed to deal with the combinatorial nature of such word reordering. A parser trained on the reordered corpus then can be used to parse sentences in the target language. We demonstrate through extensive experimentation that our approach achieves better or comparable results across 25 target languages (1.73% increase in average), and outperforms a baseline by a significant margin on the languages that are greatly different from the source one. For example, when transferring the English parser to Hindi and Latin, our approach outperforms the baseline by 15.3% and 6.7% respectively.</abstract>
       <url hash="509eb391">2020.findings-emnlp.265</url>
@@ -3578,7 +3578,7 @@
       <title>Learning to Model and Ignore Dataset Bias with Mixed Capacity Ensembles</title>
       <author><first>Christopher</first><last>Clark</last></author>
       <author><first>Mark</first><last>Yatskar</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>3031–3045</pages>
       <abstract>Many datasets have been shown to contain incidental correlations created by idiosyncrasies in the data collection process. For example, sentence entailment datasets can have spurious word-class correlations if nearly all contradiction sentences contain the word “not”, and image recognition datasets can have tell-tale object-background correlations if dogs are always indoors. In this paper, we propose a method that can automatically detect and ignore these kinds of dataset-specific patterns, which we call dataset biases. Our method trains a lower capacity model in an ensemble with a higher capacity model. During training, the lower capacity model learns to capture relatively shallow correlations, which we hypothesize are likely to reflect dataset bias. This frees the higher capacity model to focus on patterns that should generalize better. We ensure the models learn non-overlapping approaches by introducing a novel method to make them conditionally independent. Importantly, our approach does not require the bias to be known in advance. We evaluate performance on synthetic datasets, and four datasets built to penalize models that exploit known biases on textual entailment, visual question answering, and image recognition tasks. We show improvement in all settings, including a 10 point gain on the visual question answering dataset.</abstract>
       <url hash="0b379b3c">2020.findings-emnlp.272</url>
@@ -3588,7 +3588,7 @@
     <paper id="273">
       <title>Learning to Generalize for Sequential Decision Making</title>
       <author><first>Xusen</first><last>Yin</last></author>
-      <author><first>Ralph</first><last>Weischedel</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
       <author><first>Jonathan</first><last>May</last></author>
       <pages>3046–3063</pages>
       <abstract>We consider problems of making sequences of decisions to accomplish tasks, interacting via the medium of language. These problems are often tackled with reinforcement learning approaches. We find that these models do not generalize well when applied to novel task domains. However, the large amount of computation necessary to adequately train and explore the search space of sequential decision making, under a reinforcement learning paradigm, precludes the inclusion of large contextualized language models, which might otherwise enable the desired generalization ability. We introduce a teacher-student imitation learning methodology and a means of converting a reinforcement learning model into a natural language understanding model. Together, these methodologies enable the introduction of contextualized language models into the sequential decision making problem space. We show that models can learn faster and generalize more, leveraging both the imitation learning and the reformulation. Our models exceed teacher performance on various held-out decision problems, by up to 7% on in-domain problems and 24% on out-of-domain problems.</abstract>
@@ -3641,7 +3641,7 @@
       <author><first>Hsien-chin</first><last>Lin</last></author>
       <author><first>Nurul</first><last>Lubis</last></author>
       <author><first>Marco</first><last>Moresi</last></author>
-      <author><first>Milica</first><last>Gasic</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gasic</last></author>
       <pages>3096–3102</pages>
       <abstract>The ability to accurately track what happens during a conversation is essential for the performance of a dialogue system. Current state-of-the-art multi-domain dialogue state trackers achieve just over 55% accuracy on the current go-to benchmark, which means that in almost every second dialogue turn they place full confidence in an incorrect dialogue state. Belief trackers, on the other hand, maintain a distribution over possible dialogue states. However, they lack in performance compared to dialogue state trackers, and do not produce well calibrated distributions. In this work we present state-of-the-art performance in calibration for multi-domain dialogue belief trackers using a calibrated ensemble of models. Our resulting dialogue belief tracker also outperforms previous dialogue belief tracking models in terms of accuracy.</abstract>
       <url hash="fc9e36ee">2020.findings-emnlp.277</url>
@@ -3712,7 +3712,7 @@
       <author><first>Xavier</first><last>Garcia</last></author>
       <author><first>Pierre</first><last>Foret</last></author>
       <author><first>Thibault</first><last>Sellam</last></author>
-      <author><first>Ankur</first><last>Parikh</last></author>
+      <author id="ankur-parikh"><first>Ankur</first><last>Parikh</last></author>
       <pages>3160–3170</pages>
       <abstract>We present a probabilistic framework for multilingual neural machine translation that encompasses supervised and unsupervised setups, focusing on unsupervised translation. In addition to studying the vanilla case where there is only monolingual data available, we propose a novel setup where one language in the (source, target) pair is not associated with any parallel data, but there may exist auxiliary parallel data that contains the other. This auxiliary data can naturally be utilized in our probabilistic framework via a novel cross-translation loss term. Empirically, we show that our approach results in higher BLEU scores over state-of-the-art unsupervised models on the WMT’14 English-French, WMT’16 English-German, and WMT’16 English-Romanian datasets in most directions.</abstract>
       <url hash="3366d192">2020.findings-emnlp.283</url>
@@ -3778,11 +3778,11 @@
     <paper id="288">
       <title>Pushing the Limits of <fixed-case>AMR</fixed-case> Parsing with Self-Learning</title>
       <author><first>Young-Suk</first><last>Lee</last></author>
-      <author><first>Ramón</first><last>Fernandez Astudillo</last></author>
+      <author id="ramon-fernandez-astudillo"><first>Ramón</first><last>Fernandez Astudillo</last></author>
       <author><first>Tahira</first><last>Naseem</last></author>
       <author><first>Revanth</first><last>Gangi Reddy</last></author>
-      <author><first>Radu</first><last>Florian</last></author>
-      <author><first>Salim</first><last>Roukos</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
       <pages>3208–3214</pages>
       <abstract>Abstract Meaning Representation (AMR) parsing has experienced a notable growth in performance in the last two years, due both to the impact of transfer learning and the development of novel architectures specific to AMR. At the same time, self-learning techniques have helped push the performance boundaries of other natural language processing applications, such as machine translation or question answering. In this paper, we explore different ways in which trained models can be applied to improve AMR parsing performance, including generation of synthetic text and AMR annotations as well as refinement of actions oracle. We show that, without any additional human annotations, these techniques improve an already performant parser and achieve state-of-the-art results on AMR 1.0 and AMR 2.0.</abstract>
       <url hash="b611731c">2020.findings-emnlp.288</url>
@@ -3818,7 +3818,7 @@
       <title>Towards <fixed-case>C</fixed-case>ontrollable <fixed-case>B</fixed-case>iases in <fixed-case>L</fixed-case>anguage <fixed-case>G</fixed-case>eneration</title>
       <author><first>Emily</first><last>Sheng</last></author>
       <author><first>Kai-Wei</first><last>Chang</last></author>
-      <author><first>Prem</first><last>Natarajan</last></author>
+      <author id="prem-natarajan"><first>Prem</first><last>Natarajan</last></author>
       <author><first>Nanyun</first><last>Peng</last></author>
       <pages>3239–3254</pages>
       <abstract>We present a general approach towards controllable societal biases in natural language generation (NLG). Building upon the idea of adversarial triggers, we develop a method to induce societal biases in generated text when input prompts contain mentions of specific demographic groups. We then analyze two scenarios: 1) inducing negative biases for one demographic and positive biases for another demographic, and 2) equalizing biases between demographics. The former scenario enables us to detect the types of biases present in the model. Specifically, we show the effectiveness of our approach at facilitating bias analysis by finding topics that correspond to demographic inequalities in generated text and comparing the relative effectiveness of inducing biases for different demographics. The second scenario is useful for mitigating biases in downstream applications such as dialogue generation. In our experiments, the mitigation technique proves to be effective at equalizing the amount of biases across demographics while simultaneously generating less negatively biased text overall.</abstract>
@@ -3842,7 +3842,7 @@
       <author><first>Junmo</first><last>Kang</last></author>
       <author><first>Giwon</first><last>Hong</last></author>
       <author><first>Haritz</first><last>Puerto San Roman</last></author>
-      <author><first>Sung-Hyon</first><last>Myaeng</last></author>
+      <author id="sung-hyon-myaeng"><first>Sung-Hyon</first><last>Myaeng</last></author>
       <pages>3266–3277</pages>
       <abstract>Unsupervised question answering (UQA) has been proposed to avoid the high cost of creating high-quality datasets for QA. One approach to UQA is to train a QA model with questions generated automatically. However, the generated questions are either too similar to a word sequence in the context or too drifted from the semantics of the context, thereby making it difficult to train a robust QA model. We propose a novel regularization method based on teacher-student architecture to avoid bias toward a particular question generation strategy and modulate the process of generating individual words when a question is generated. Our experiments demonstrate that we have achieved the goal of generating higher-quality questions for UQA across diverse QA datasets and tasks. We also show that this method can be useful for creating a QA model with few-shot learning.</abstract>
       <url hash="28c1564c">2020.findings-emnlp.293</url>
@@ -3852,7 +3852,7 @@
     <paper id="294">
       <title>Graph-to-Graph Transformer for Transition-based Dependency Parsing</title>
       <author><first>Alireza</first><last>Mohammadshahi</last></author>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <pages>3278–3289</pages>
       <abstract>We propose the Graph2Graph Transformer architecture for conditioning on and predicting arbitrary graphs, and apply it to the challenging task of transition-based dependency parsing. After proposing two novel Transformer models of transition-based dependency parsing as strong baselines, we show that adding the proposed mechanisms for conditioning on and predicting graphs of Graph2Graph Transformer results in significant improvements, both with and without BERT pre-training. The novel baselines and their integration with Graph2Graph Transformer significantly outperform the state-of-the-art in traditional transition-based dependency parsing on both English Penn Treebank, and 13 languages of Universal Dependencies Treebanks. Graph2Graph Transformer can be integrated with many previous structured prediction methods, making it easy to apply to a wide range of NLP tasks.</abstract>
       <url hash="4dbdf36d">2020.findings-emnlp.294</url>
@@ -3883,7 +3883,7 @@
       <title>Detecting Stance in Media On Global Warming</title>
       <author><first>Yiwei</first><last>Luo</last></author>
       <author><first>Dallas</first><last>Card</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <pages>3296–3315</pages>
       <abstract>Citing opinions is a powerful yet understudied strategy in argumentation. For example, an environmental activist might say, “Leading scientists agree that global warming is a serious concern,” framing a clause which affirms their own stance (“that global warming is serious”) as an opinion endorsed ("[scientists] agree”) by a reputable source (“leading”). In contrast, a global warming denier might frame the same clause as the opinion of an untrustworthy source with a predicate connoting doubt: “Mistaken scientists claim [...]." Our work studies opinion-framing in the global warming (GW) debate, an increasingly partisan issue that has received little attention in NLP. We introduce DeSMOG, a dataset of stance-labeled GW sentences, and train a BERT classifier to study novel aspects of argumentation in how different sides of a debate represent their own and each other’s opinions. From 56K news articles, we find that similar linguistic devices for self-affirming and opponent-doubting discourse are used across GW-accepting and skeptic media, though GW-skeptical media shows more opponent-doubt. We also find that authors often characterize sources as hypocritical, by ascribing opinions expressing the author’s own view to source entities known to publicly endorse the opposing view. We release our stance dataset, model, and lexicons of framing devices for future work on opinion-framing and the automatic detection of GW stance.</abstract>
       <url hash="626fee76">2020.findings-emnlp.296</url>
@@ -3937,8 +3937,8 @@
       <author><first>Dheeraj</first><last>Rajagopal</last></author>
       <author><first>Niket</first><last>Tandon</last></author>
       <author><first>Peter</first><last>Clark</last></author>
-      <author><first>Bhavana</first><last>Dalvi</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="bhavana-dalvi"><first>Bhavana</first><last>Dalvi</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>3345–3355</pages>
       <abstract>Our goal is to explain the effects of perturbations in procedural text, e.g., given a passage describing a rabbit’s life cycle, explain why illness (the perturbation) may reduce the rabbit population (the effect). Although modern systems are able to solve the original prediction task well (e.g., illness results in less rabbits), the explanation task - identifying the causal chain of events from perturbation to effect - remains largely unaddressed, and is the goal of this research. We present QUARTET, a system that constructs such explanations from paragraphs, by modeling the explanation task as a multitask learning problem. QUARTET constructs explanations from the sentences in the procedural text, achieving ~18 points better on explanation accuracy compared to several strong baselines on a recent process comprehension benchmark. On an end task on this benchmark, we show a surprising finding that good explanations do not have to come at the expense of end task performance, in fact leading to a 7% F1 improvement over SOTA.</abstract>
       <url hash="7e56c1b8">2020.findings-emnlp.300</url>
@@ -3951,7 +3951,7 @@
       <author><first>Suchin</first><last>Gururangan</last></author>
       <author><first>Maarten</first><last>Sap</last></author>
       <author><first>Yejin</first><last>Choi</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>3356–3369</pages>
       <abstract>Pretrained neural language models (LMs) are prone to generating racist, sexist, or otherwise toxic language which hinders their safe deployment. We investigate the extent to which pretrained LMs can be prompted to generate toxic language, and the effectiveness of controllable text generation algorithms at preventing such toxic degeneration. We create and release RealToxicityPrompts, a dataset of 100K naturally occurring, sentence-level prompts derived from a large corpus of English web text, paired with toxicity scores from a widely-used toxicity classifier. Using RealToxicityPrompts, we find that pretrained LMs can degenerate into toxic text even from seemingly innocuous prompts. We empirically assess several controllable generation methods, and find that while data- or compute-intensive methods (e.g., adaptive pretraining on non-toxic data) are more effective at steering away from toxicity than simpler solutions (e.g., banning “bad” words), no current method is failsafe against neural toxic degeneration. To pinpoint the potential cause of such persistent toxic degeneration, we analyze two web text corpora used to pretrain several LMs (including GPT-2; Radford et. al, 2019), and find a significant amount of offensive, factually unreliable, and otherwise toxic content. Our work provides a test bed for evaluating toxic generations by LMs and stresses the need for better data selection processes for pretraining.</abstract>
       <url hash="9b74d94c">2020.findings-emnlp.301</url>
@@ -3962,8 +3962,8 @@
       <title>Improving Event Duration Prediction via Time-aware Pre-training</title>
       <author><first>Zonglin</first><last>Yang</last></author>
       <author><first>Xinya</first><last>Du</last></author>
-      <author><first>Alexander</first><last>Rush</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>3370–3378</pages>
       <abstract>End-to-end models in NLP rarely encode external world knowledge about length of time. We introduce two effective models for duration prediction, which incorporate external knowledge by reading temporal-related news sentences (time-aware pre-training). Specifically, one model predicts the range/unit where the duration value falls in (R-PRED); and the other predicts the exact duration value (E-PRED). Our best model – E-PRED, substantially outperforms previous work, and captures duration information more accurately than R-PRED. We also demonstrate our models are capable of duration prediction in the unsupervised setting, outperforming the baselines.</abstract>
       <url hash="1a764517">2020.findings-emnlp.302</url>
@@ -3974,7 +3974,7 @@
       <title>Composed Variational Natural Language Generation for Few-shot Intents</title>
       <author><first>Congying</first><last>Xia</last></author>
       <author><first>Caiming</first><last>Xiong</last></author>
-      <author><first>Philip</first><last>Yu</last></author>
+      <author id="philip-s-yu"><first>Philip</first><last>Yu</last></author>
       <author><first>Richard</first><last>Socher</last></author>
       <pages>3379–3388</pages>
       <abstract>In this paper, we focus on generating training examples for few-shot intents in the realistic imbalanced scenario. To build connections between existing many-shot intents and few-shot intents, we consider an intent as a combination of a domain and an action, and propose a composed variational natural language generator (CLANG), a transformer-based conditional variational autoencoder. CLANG utilizes two latent variables to represent the utterances corresponding to two different independent parts (domain and action) in the intent, and the latent variables are composed together to generate natural examples. Additionally, to improve the generator learning, we adopt the contrastive regularization loss that contrasts the in-class with the out-of-class utterance generation given the intent. To evaluate the quality of the generated utterances, experiments are conducted on the generalized few-shot intent detection task. Empirical results show that our proposed model achieves state-of-the-art performances on two real-world intent detection datasets.</abstract>
@@ -4022,7 +4022,7 @@
     <paper id="307">
       <title><fixed-case>BERT</fixed-case>-k<fixed-case>NN</fixed-case>: Adding a k<fixed-case>NN</fixed-case> Search Component to Pretrained Language Models for Better <fixed-case>QA</fixed-case></title>
       <author><first>Nora</first><last>Kassner</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>3424–3430</pages>
       <abstract>Khandelwal et al. (2020) use a k-nearest-neighbor (kNN) component to improve language model performance. We show that this idea is beneficial for open-domain question answering (QA). To improve the recall of facts encountered during training, we combine BERT (Devlin et al., 2019) with a traditional information retrieval step (IR) and a kNN search over a large datastore of an embedded text collection. Our contributions are as follows: i) BERT-kNN outperforms BERT on cloze-style QA by large margins without any further training. ii) We show that BERT often identifies the correct response category (e.g., US city), but only kNN recovers the factually correct answer (e.g.,“Miami”). iii) Compared to BERT, BERT-kNN excels for rare facts. iv) BERT-kNN can easily handle facts not covered by BERT’s training set, e.g., recent events.</abstract>
       <url hash="d7d3ea30">2020.findings-emnlp.307</url>
@@ -4112,8 +4112,8 @@
       <author><first>Kyle</first><last>Richardson</last></author>
       <author><first>Liang</first><last>Xu</last></author>
       <author><first>Lu</first><last>Li</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
-      <author><first>Lawrence</first><last>Moss</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
+      <author id="lawrence-s-moss"><first>Lawrence</first><last>Moss</last></author>
       <pages>3512–3526</pages>
       <abstract>Despite the tremendous recent progress on natural language inference (NLI), driven largely by large-scale investment in new datasets (e.g.,SNLI, MNLI) and advances in modeling, most progress has been limited to English due to a lack of reliable datasets for most of the world’s languages. In this paper, we present the first large-scale NLI dataset (consisting of ~56,000 annotated sentence pairs) for Chinese called the Original Chinese Natural Language Inference dataset (OCNLI). Unlike recent attempts at extending NLI to other languages, our dataset does not rely on any automatic translation or non-expert annotation. Instead, we elicit annotations from native speakers specializing in linguistics. We follow closely the annotation protocol used for MNLI, but create new strategies for eliciting diverse hypotheses. We establish several baseline results on our dataset using state-of-the-art pre-trained models for Chinese, and find even the best performing models to be far outpaced by human performance (~12% absolute performance gap), making it a challenging new resource that we hope will help to accelerate progress in Chinese NLU. To the best of our knowledge, this is the first human-elicited MNLI-style corpus for a non-English language.</abstract>
       <url hash="ef440fe2">2020.findings-emnlp.314</url>
@@ -4126,8 +4126,8 @@
       <author><first>Ramesh</first><last>Nallapati</last></author>
       <author><first>Henghui</first><last>Zhu</last></author>
       <author><first>Feng</first><last>Nan</last></author>
-      <author><first>Cicero</first><last>Nogueira dos Santos</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="cicero-dos-santos"><first>Cicero</first><last>Nogueira dos Santos</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <author><first>Bing</first><last>Xiang</last></author>
       <pages>3527–3536</pages>
       <abstract>Unsupervised domain adaptation addresses the problem of leveraging labeled data in a source domain to learn a well-performing model in a target domain where labels are unavailable. In this paper, we improve upon a recent theoretical work (Zhang et al., 2019b) and adopt the Margin Disparity Discrepancy (MDD) unsupervised domain adaptation algorithm to solve the cross-lingual text labeling problems. Experiments on cross-lingual document classification and NER demonstrate the proposed domain adaptation approach advances the state-of-the-art results by a large margin. Specifically, we improve MDD by efficiently optimizing the margin loss on the source domain via Virtual Adversarial Training (VAT). This bridges the gap between theory and the loss function used in the original work Zhang et al.(2019b), and thereby significantly boosts the performance. Our numerical results also indicate that VAT can remarkably improve the generalization performance of both domains for various domain adaptation approaches.</abstract>
@@ -4139,7 +4139,7 @@
       <title>Rethinking Supervised Learning and Reinforcement Learning in Task-Oriented Dialogue Systems</title>
       <author><first>Ziming</first><last>Li</last></author>
       <author><first>Julia</first><last>Kiseleva</last></author>
-      <author><first>Maarten</first><last>de Rijke</last></author>
+      <author id="maarten-de-rijke"><first>Maarten</first><last>de Rijke</last></author>
       <pages>3537–3546</pages>
       <abstract>Dialogue policy learning for task-oriented dialogue systems has enjoyed great progress recently mostly through employing reinforcement learning methods. However, these approaches have become very sophisticated. It is time to re-evaluate it. Are we really making progress developing dialogue agents only based on reinforcement learning? We demonstrate how (1) traditional supervised learning together with (2) a simulator-free adversarial learning method can be used to achieve performance comparable to state-of-the-art reinforcement learning-based methods. First, we introduce a simple dialogue action decoder to predict the appropriate actions. Then, the traditional multi-label classification solution for dialogue policy learning is extended by adding dense layers to improve the dialogue agent performance. Finally, we employ the Gumbel-Softmax estimator to alternatively train the dialogue agent and the dialogue reward model without using reinforcement learning. Based on our extensive experimentation, we can conclude the proposed methods can achieve more stable and higher performance with fewer efforts, such as the domain knowledge required to design a user simulator and the intractable parameter tuning in reinforcement learning. Our main goal is not to beat RL with supervised learning, but to demonstrate the value of rethinking the role of reinforcement learning and supervised learning in optimizing task-oriented dialogue systems.</abstract>
       <url hash="b3154335">2020.findings-emnlp.316</url>
@@ -4164,7 +4164,7 @@
       <author><first>Shuai</first><last>Wang</last></author>
       <author><first>Rishita</first><last>Anubhai</last></author>
       <author><first>Miguel</first><last>Ballesteros</last></author>
-      <author><first>Yaser</first><last>Al-Onaizan</last></author>
+      <author id="yaser-al-onaizan"><first>Yaser</first><last>Al-Onaizan</last></author>
       <pages>3554–3559</pages>
       <abstract>Event argument extraction (EAE) aims to identify the arguments of an event and classify the roles that those arguments play. Despite great efforts made in prior work, there remain many challenges: (1) Data scarcity. (2) Capturing the long-range dependency, specifically, the connection between an event trigger and a distant event argument. (3) Integrating event trigger information into candidate argument representation. For (1), we explore using unlabeled data. For (2), we use Transformer that uses dependency parses to guide the attention mechanism. For (3), we propose a trigger-aware sequence encoder with several types of trigger-dependent sequence representations. We also support argument extraction either from text annotated with gold entities or from plain text. Experiments on the English ACE 2005 benchmark show that our approach achieves a new state-of-the-art.</abstract>
       <url hash="0d2fc419">2020.findings-emnlp.318</url>
@@ -4263,7 +4263,7 @@
       <title>The Box is in the Pen: Evaluating Commonsense Reasoning in Neural Machine Translation</title>
       <author><first>Jie</first><last>He</last></author>
       <author><first>Tao</first><last>Wang</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Qun</first><last>Liu</last></author>
       <pages>3662–3672</pages>
       <abstract>Does neural machine translation yield translations that are congenial with common sense? In this paper, we present a test suite to evaluate the commonsense reasoning capability of neural machine translation. The test suite consists of three test sets, covering lexical and contextless/contextual syntactic ambiguity that requires commonsense knowledge to resolve. We manually create 1,200 triples, each of which contain a source sentence and two contrastive translations, involving 7 different common sense types. Language models pretrained on large-scale corpora, such as BERT, GPT-2, achieve a commonsense reasoning accuracy of lower than 72% on target translations of this test suite. We conduct extensive experiments on the test suite to evaluate commonsense reasoning in neural machine translation and investigate factors that have impact on this capability. Our experiments and analyses demonstrate that neural machine translation performs poorly on commonsense reasoning of the three ambiguity types in terms of both reasoning accuracy ( 6 60.1%) and reasoning consistency (6 31%). We will release our test suite as a machine translation commonsense reasoning testbed to promote future work in this direction.</abstract>
@@ -4291,7 +4291,7 @@
       <author><first>Zi-Yi</first><last>Dou</last></author>
       <author><first>Danqing</first><last>Wang</last></author>
       <author><first>Xipeng</first><last>Qiu</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>3679–3691</pages>
       <abstract>Neural network-based models augmented with unsupervised pre-trained knowledge have achieved impressive performance on text summarization. However, most existing evaluation methods are limited to an in-domain setting, where summarizers are trained and evaluated on the same dataset. We argue that this approach can narrow our understanding of the generalization ability for different summarization systems. In this paper, we perform an in-depth analysis of characteristics of different datasets and investigate the performance of different summarization models under a cross-dataset setting, in which a summarizer trained on one corpus will be evaluated on a range of out-of-domain corpora. A comprehensive study of 11 representative summarization systems on 5 datasets from different domains reveals the effect of model architectures and generation ways (i.e. abstractive and extractive) on model generalization ability. Further, experimental results shed light on the limitations of existing summarizers. Brief introduction and supplementary code can be found in <url>https://github.com/zide05/CDEvalSumm</url>.</abstract>
       <url hash="7673f9eb">2020.findings-emnlp.329</url>
@@ -4354,7 +4354,7 @@
       <author><first>Hanting</first><last>Su</last></author>
       <author><first>David</first><last>Kartchner</last></author>
       <author><first>Cassie</first><last>Mitchell</last></author>
-      <author><first>Chao</first><last>Zhang</last></author>
+      <author id="chao-zhang-tu"><first>Chao</first><last>Zhang</last></author>
       <pages>3739–3754</pages>
       <abstract>We study the problem of learning neural text classifiers without using any labeled data, but only easy-to-provide rules as multiple weak supervision sources. This problem is challenging because rule-induced weak labels are often noisy and incomplete. To address these two challenges, we design a label denoiser, which estimates the source reliability using a conditional soft attention mechanism and then reduces label noise by aggregating rule-annotated weak labels. The denoised pseudo labels then supervise a neural classifier to predicts soft labels for unmatched samples, which address the rule coverage issue. We evaluate our model on five benchmarks for sentiment, topic, and relation classifications. The results show that our model outperforms state-of-the-art weakly-supervised and semi-supervised methods consistently, and achieves comparable performance with fully-supervised methods even without any labeled data. Our code can be found at <url>https://github.com/weakrules/Denoise-multi-weak-sources</url>.</abstract>
       <url hash="04e407a3">2020.findings-emnlp.334</url>
@@ -4413,9 +4413,9 @@
     <paper id="339">
       <title>Controllable Text Generation with Focused Variation</title>
       <author><first>Lei</first><last>Shu</last></author>
-      <author><first>Alexandros</first><last>Papangelis</last></author>
+      <author id="alexandros-papangelis"><first>Alexandros</first><last>Papangelis</last></author>
       <author><first>Yi-Chia</first><last>Wang</last></author>
-      <author><first>Gokhan</first><last>Tur</last></author>
+      <author id="gokhan-tur"><first>Gokhan</first><last>Tur</last></author>
       <author><first>Hu</first><last>Xu</last></author>
       <author><first>Zhaleh</first><last>Feizollahi</last></author>
       <author><first>Bing</first><last>Liu</last></author>
@@ -4434,7 +4434,7 @@
       <author><first>Gargi</first><last>Sawhney</last></author>
       <author><first>Anmol</first><last>Shukla</last></author>
       <author><first>Keerthi Kumar</first><last>Kallur</last></author>
-      <author><first>Nathanael</first><last>Chambers</last></author>
+      <author id="nathanael-chambers"><first>Nathanael</first><last>Chambers</last></author>
       <author><first>Niranjan</first><last>Balasubramanian</last></author>
       <pages>3818–3828</pages>
       <abstract>Preconditions provide a form of logical connection between events that explains why some events occur together and information that is complementary to the more widely studied relations such as causation, temporal ordering, entailment, and discourse relations. Modeling preconditions in text has been hampered in part due to the lack of large scale labeled data grounded in text. This paper introduces PeKo, a crowd-sourced annotation of <i>preconditions</i> between event pairs in newswire, an order of magnitude larger than prior text annotations. To complement this new corpus, we also introduce two challenge tasks aimed at modeling preconditions: (i) Precondition Identification – a standard classification task defined over pairs of event mentions, and (ii) Precondition Generation – a generative task aimed at testing a more general ability to reason about a given event. Evaluation on both tasks shows that modeling preconditions is challenging even for today’s large language models (LM). This suggests that precondition knowledge is not easily accessible in LM-derived representations alone. Our generation results show that fine-tuning an LM on PeKo yields better conditional relations than when trained on raw text or temporally-ordered corpora.</abstract>
@@ -4486,8 +4486,8 @@
       <title>Event-Related Bias Removal for Real-time Disaster Events</title>
       <author><first>Salvador</first><last>Medina Maza</last></author>
       <author><first>Evangelia</first><last>Spiliopoulou</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
-      <author><first>Alexander</first><last>Hauptmann</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
+      <author id="alexander-g-hauptmann"><first>Alexander</first><last>Hauptmann</last></author>
       <pages>3858–3868</pages>
       <abstract>Social media has become an important tool to share information about crisis events such as natural disasters and mass attacks. Detecting actionable posts that contain useful information requires rapid analysis of huge volumes of data in real-time. This poses a complex problem due to the large amount of posts that do not contain any actionable information. Furthermore, the classification of information in real-time systems requires training on out-of-domain data, as we do not have any data from a new emerging crisis. Prior work focuses on models pre-trained on similar event types. However, those models capture unnecessary event-specific biases, like the location of the event, which affect the generalizability and performance of the classifiers on new unseen data from an emerging new event. In our work, we train an adversarial neural model to remove latent event-specific biases and improve the performance on tweet importance classification.</abstract>
       <url hash="e753abc0">2020.findings-emnlp.344</url>
@@ -4514,7 +4514,7 @@
       <author><first>Renjie</first><last>Zheng</last></author>
       <author><first>Hairong</first><last>Liu</last></author>
       <author><first>Kainan</first><last>Peng</last></author>
-      <author><first>Kenneth</first><last>Church</last></author>
+      <author id="kenneth-church"><first>Kenneth</first><last>Church</last></author>
       <author><first>Liang</first><last>Huang</last></author>
       <pages>3886–3896</pages>
       <abstract>Text-to-speech synthesis (TTS) has witnessed rapid progress in recent years, where neural methods became capable of producing audios with high naturalness. However, these efforts still suffer from two types of latencies: (a) the <i>computational latency</i> (synthesizing time), which grows linearly with the sentence length, and (b) the <i>input latency</i> in scenarios where the input text is incrementally available (such as in simultaneous translation, dialog generation, and assistive technologies). To reduce these latencies, we propose a neural incremental TTS approach using the prefix-to-prefix framework from simultaneous translation. We synthesize speech in an online fashion, playing a segment of audio while generating the next, resulting in an O(1) rather than O(n) latency. Experiments on English and Chinese TTS show that our approach achieves similar speech naturalness compared to full sentence TTS, but only with a constant (1-2 words) latency.</abstract>
@@ -4557,7 +4557,7 @@
       <author><first>Baigong</first><last>Zheng</last></author>
       <author><first>Kaibo</first><last>Liu</last></author>
       <author><first>Jiahong</first><last>Yuan</last></author>
-      <author><first>Kenneth</first><last>Church</last></author>
+      <author id="kenneth-church"><first>Kenneth</first><last>Church</last></author>
       <author><first>Liang</first><last>Huang</last></author>
       <pages>3928–3937</pages>
       <abstract>Simultaneous speech-to-speech translation is an extremely challenging but widely useful scenario that aims to generate target-language speech only a few seconds behind the source-language speech. In addition, we have to continuously translate a speech of multiple sentences, but all recent solutions merely focus on the single-sentence scenario. As a result, current approaches will accumulate more and more latencies in later sentences when the speaker talks faster and introduce unnatural pauses into translated speech when the speaker talks slower. To overcome these issues, we propose Self-Adaptive Translation which flexibly adjusts the length of translations to accommodate different source speech rates. At similar levels of translation quality (as measured by BLEU), our method generates more fluent target speech latency than the baseline, in both Zh&lt;-&gt;En directions.</abstract>
@@ -4608,7 +4608,7 @@
       <author><first>Danish</first><last>Pruthi</last></author>
       <author><first>Bhuwan</first><last>Dhingra</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
-      <author><first>Zachary C.</first><last>Lipton</last></author>
+      <author id="zachary-c-lipton"><first>Zachary C.</first><last>Lipton</last></author>
       <pages>3965–3970</pages>
       <abstract>For many prediction tasks, stakeholders desire not only predictions but also supporting evidence that a human can use to verify its correctness. However, in practice, evidence annotations may only be available for a minority of training examples (if available at all). In this paper, we propose new methods to combine few evidence annotations (strong semi-supervision) with abundant document-level labels (weak supervision) for the task of evidence extraction. Evaluating on two classification tasks that feature evidence annotations, we find that our methods outperform baselines adapted from the interpretability literature to our task. Our approach yields gains with as few as hundred evidence annotations.</abstract>
       <url hash="2c611414">2020.findings-emnlp.353</url>
@@ -4669,8 +4669,8 @@
     <paper id="358">
       <title>Effects of Naturalistic Variation in Goal-Oriented Dialog</title>
       <author><first>Jatin</first><last>Ganhotra</last></author>
-      <author><first>Robert</first><last>Moore</last></author>
-      <author><first>Sachindra</first><last>Joshi</last></author>
+      <author id="robert-c-moore"><first>Robert</first><last>Moore</last></author>
+      <author id="sachindra-joshi"><first>Sachindra</first><last>Joshi</last></author>
       <author><first>Kahini</first><last>Wadhawan</last></author>
       <pages>4013–4020</pages>
       <abstract>Existing benchmarks used to evaluate the performance of end-to-end neural dialog systems lack a key component: natural variation present in human conversations. Most datasets are constructed through crowdsourcing, where the crowd workers follow a fixed template of instructions while enacting the role of a user/agent. This results in straight-forward, somewhat routine, and mostly trouble-free conversations, as crowd workers do not think to represent the full range of actions that occur naturally with real users. In this work, we investigate the impact of naturalistic variation on two goal-oriented datasets: bAbI dialog task and Stanford Multi-Domain Dataset (SMD). We also propose new and more effective testbeds for both datasets, by introducing naturalistic variation by the user. We observe that there is a significant drop in performance (more than 60% in Ent. F1 on SMD and 85% in per-dialog accuracy on bAbI task) of recent state-of-the-art end-to-end neural methods such as BossNet and GLMP on both datasets.</abstract>
@@ -4695,8 +4695,8 @@
       <title><fixed-case>W</fixed-case>iki<fixed-case>L</fixed-case>ingua: A New Benchmark Dataset for Cross-Lingual Abstractive Summarization</title>
       <author><first>Faisal</first><last>Ladhak</last></author>
       <author><first>Esin</first><last>Durmus</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>4034–4048</pages>
       <abstract>We introduce WikiLingua, a large-scale, multilingual dataset for the evaluation of cross-lingual abstractive summarization systems. We extract article and summary pairs in 18 languages from WikiHow, a high quality, collaborative resource of how-to guides on a diverse set of topics written by human authors. We create gold-standard article-summary alignments across languages by aligning the images that are used to describe each how-to step in an article. As a set of baselines for further studies, we evaluate the performance of existing cross-lingual abstractive summarization methods on our dataset. We further propose a method for direct cross-lingual summarization (i.e., without requiring translation at inference time) by leveraging synthetic data and Neural Machine Translation as a pre-training step. Our method significantly outperforms the baseline approaches, while being more cost efficient during inference.</abstract>
       <url hash="fd3359d1">2020.findings-emnlp.360</url>
@@ -4837,7 +4837,7 @@
       <author><first>Hai</first><last>Zhao</last></author>
       <author><first>Rui</first><last>Wang</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>4151–4162</pages>
       <abstract>Exploiting a common language as an auxiliary for better translation has a long tradition in machine translation and lets supervised learning-based machine translation enjoy the enhancement delivered by the well-used pivot language in the absence of a source language to target language parallel corpus. The rise of unsupervised neural machine translation (UNMT) almost completely relieves the parallel corpus curse, though UNMT is still subject to unsatisfactory performance due to the vagueness of the clues available for its core back-translation training. Further enriching the idea of pivot translation by extending the use of parallel corpora beyond the source-target paradigm, we propose a new reference language-based framework for UNMT, RUNMT, in which the reference language only shares a parallel corpus with the source, but this corpus still indicates a signal clear enough to help the reconstruction training of UNMT through a proposed reference agreement mechanism. Experimental results show that our methods improve the quality of UNMT over that of a strong baseline that uses only one auxiliary language, demonstrating the usefulness of the proposed reference language-based UNMT and establishing a good start for the community.</abstract>
       <url hash="4b344077">2020.findings-emnlp.371</url>
@@ -4905,7 +4905,7 @@
     <paper id="376">
       <title>Multichannel <fixed-case>G</fixed-case>enerative <fixed-case>L</fixed-case>anguage <fixed-case>M</fixed-case>odel: <fixed-case>L</fixed-case>earning <fixed-case>A</fixed-case>ll <fixed-case>P</fixed-case>ossible <fixed-case>F</fixed-case>actorizations <fixed-case>W</fixed-case>ithin and <fixed-case>A</fixed-case>cross <fixed-case>C</fixed-case>hannels</title>
       <author><first>Harris</first><last>Chan</last></author>
-      <author><first>Jamie</first><last>Kiros</last></author>
+      <author id="jamie-kiros"><first>Jamie</first><last>Kiros</last></author>
       <author><first>William</first><last>Chan</last></author>
       <pages>4208–4220</pages>
       <abstract>A channel corresponds to a viewpoint or transformation of an underlying meaning. A pair of parallel sentences in English and French express the same underlying meaning, but through two separate channels corresponding to their languages. In this work, we present the Multichannel Generative Language Model (MGLM). MGLM is a generative joint distribution model over channels. MGLM marginalizes over all possible factorizations within and across all channels. MGLM endows flexible inference, including unconditional generation, conditional generation (where 1 channel is observed and other channels are generated), and partially observed generation (where incomplete observations are spread across all the channels). We experiment with the Multi30K dataset containing English, French, Czech, and German. We demonstrate experiments with unconditional, conditional, and partially conditional generation. We provide qualitative samples sampled unconditionally from the generative joint distribution. We also quantitatively analyze the quality-diversity trade-offs and find MGLM outperforms traditional bilingual discriminative models.</abstract>
@@ -4964,7 +4964,7 @@
       <author><first>Gabriela</first><last>Pałka</last></author>
       <author><first>Karol</first><last>Kaczmarek</last></author>
       <author><first>Agnieszka</first><last>Kaliska</last></author>
-      <author><first>Filip</first><last>Graliński</last></author>
+      <author id="filip-gralinski"><first>Filip</first><last>Graliński</last></author>
       <pages>4254–4268</pages>
       <abstract>We propose a new shared task of semantic retrieval from legal texts, in which a so-called contract discovery is to be performed – where legal clauses are extracted from documents, given a few examples of similar clauses from other legal acts. The task differs substantially from conventional NLI and shared tasks on legal information extraction (e.g., one has to identify text span instead of a single document, page, or paragraph). The specification of the proposed task is followed by an evaluation of multiple solutions within the unified framework proposed for this branch of methods. It is shown that state-of-the-art pretrained encoders fail to provide satisfactory results on the task proposed. In contrast, Language Model-based solutions perform better, especially when unsupervised fine-tuning is applied. Besides the ablation studies, we addressed questions regarding detection accuracy for relevant text fragments depending on the number of examples available. In addition to the dataset and reference results, LMs specialized in the legal domain were made publicly available.</abstract>
       <url hash="18495437">2020.findings-emnlp.380</url>
@@ -4997,7 +4997,7 @@
     <paper id="383">
       <title>Detecting Media Bias in News Articles using <fixed-case>G</fixed-case>aussian Bias Distributions</title>
       <author><first>Wei-Fan</first><last>Chen</last></author>
-      <author><first>Khalid</first><last>Al Khatib</last></author>
+      <author id="khalid-al-khatib"><first>Khalid</first><last>Al Khatib</last></author>
       <author><first>Benno</first><last>Stein</last></author>
       <author><first>Henning</first><last>Wachsmuth</last></author>
       <pages>4290–4300</pages>
@@ -5034,8 +5034,8 @@
       <title>Looking inside Noun Compounds: Unsupervised Prepositional and Free Paraphrasing</title>
       <author><first>Girishkumar</first><last>Ponkiya</last></author>
       <author><first>Rudra</first><last>Murthy</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
-      <author><first>Girish</first><last>Palshikar</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="girish-palshikar"><first>Girish</first><last>Palshikar</last></author>
       <pages>4313–4323</pages>
       <abstract>A noun compound is a sequence of contiguous nouns that acts as a single noun, although the predicate denoting the semantic relation between its components is dropped. Noun Compound Interpretation is the task of uncovering the relation, in the form of a preposition or a free paraphrase. Prepositional paraphrasing refers to the use of preposition to explain the semantic relation, whereas free paraphrasing refers to invoking an appropriate predicate denoting the semantic relation. In this paper, we propose an unsupervised methodology for these two types of paraphrasing. We use pre-trained contextualized language models to uncover the ‘missing’ words (preposition or predicate). These language models are usually trained to uncover the missing word/words in a given input sentence. Our approach uses templates to prepare the input sequence for the language model. The template uses a special token to indicate the missing predicate. As the model has already been pre-trained to uncover a missing word (or a sequence of words), we exploit it to predict missing words for the input sequence. Our experiments using four datasets show that our unsupervised approach (a) performs comparably to supervised approaches for prepositional paraphrasing, and (b) outperforms supervised approaches for free paraphrasing. Paraphrasing (prepositional or free) using our unsupervised approach is potentially helpful for NLP tasks like machine translation and information extraction.</abstract>
       <url hash="b70bc244">2020.findings-emnlp.386</url>
@@ -5104,7 +5104,7 @@
       <author><first>Yanyan</first><last>Lan</last></author>
       <author><first>Liang</first><last>Pang</last></author>
       <author><first>Jiafeng</first><last>Guo</last></author>
-      <author><first>Xueqi</first><last>Cheng</last></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last></author>
       <pages>4379–4389</pages>
       <abstract>This paper proposes a novel approach to learn commonsense from images, instead of limited raw texts or costly constructed knowledge bases, for the commonsense reasoning problem in NLP. Our motivation comes from the fact that an image is worth a thousand words, where richer scene information could be leveraged to help distill the commonsense knowledge, which is often hidden in languages. Our approach, namely Loire, consists of two stages. In the first stage, a bi-modal sequence-to-sequence approach is utilized to conduct the scene layout generation task, based on a text representation model ViBERT. In this way, the required visual scene knowledge, such as spatial relations, will be encoded in ViBERT by the supervised learning process with some bi-modal data like COCO. Then ViBERT is concatenated with a pre-trained language model to perform the downstream commonsense reasoning tasks. Experimental results on two commonsense reasoning problems, i.e.commonsense question answering and pronoun resolution, demonstrate that Loire outperforms traditional language-based methods. We also give some case studies to show what knowledge is learned from images and explain how the generated scene layout helps the commonsense reasoning process.</abstract>
       <url hash="ada80ba7">2020.findings-emnlp.392</url>
@@ -5137,7 +5137,7 @@
     </paper>
     <paper id="395">
       <title>Visually-Grounded Planning without Vision: Language Models Infer Detailed Plans from High-level Instructions</title>
-      <author><first>Peter</first><last>Jansen</last></author>
+      <author id="peter-jansen"><first>Peter</first><last>Jansen</last></author>
       <pages>4412–4417</pages>
       <abstract>The recently proposed ALFRED challenge task aims for a virtual robotic agent to complete complex multi-step everyday tasks in a virtual home environment from high-level natural language directives, such as “put a hot piece of bread on a plate”. Currently, the best-performing models are able to complete less than 1% of these tasks successfully. In this work we focus on modeling the translation problem of converting natural language directives into detailed multi-step sequences of actions that accomplish those goals in the virtual environment. We empirically demonstrate that it is possible to generate gold multi-step plans from language directives alone without any visual input in 26% of unseen cases. When a small amount of visual information, the starting location in the virtual environment, is incorporated, our best-performing GPT-2 model successfully generates gold command sequences in 58% of cases, suggesting contextualized language models may provide strong planning modules for grounded virtual agents.</abstract>
       <url hash="28f01fc2">2020.findings-emnlp.395</url>
@@ -5266,7 +5266,7 @@
     <paper id="406">
       <title>An Empirical Investigation of Beam-Aware Training in Supertagging</title>
       <author><first>Renato</first><last>Negrinho</last></author>
-      <author><first>Matthew R.</first><last>Gormley</last></author>
+      <author id="matthew-r-gormley"><first>Matthew R.</first><last>Gormley</last></author>
       <author><first>Geoff</first><last>Gordon</last></author>
       <pages>4534–4542</pages>
       <abstract>Structured prediction is often approached by training a locally normalized model with maximum likelihood and decoding approximately with beam search. This approach leads to mismatches as, during training, the model is not exposed to its mistakes and does not use beam search. Beam-aware training aims to address these problems, but unfortunately, it is not yet widely used due to a lack of understanding about how it impacts performance, when it is most useful, and whether it is stable. Recently, Negrinho et al. (2018) proposed a meta-algorithm that captures beam-aware training algorithms and suggests new ones, but unfortunately did not provide empirical results. In this paper, we begin an empirical investigation: we train the supertagging model of Vaswani et al. (2018) and a simpler model with instantiations of the meta-algorithm. We explore the influence of various design choices and make recommendations for choosing them. We observe that beam-aware training improves performance for both models, with large improvements for the simpler model which must effectively manage uncertainty during decoding. Our results suggest that a model must be learned with search to maximize its effectiveness.</abstract>
@@ -5281,7 +5281,7 @@
       <author><first>Amir</first><last>Pouran Ben Veyseh</last></author>
       <author><first>Nasim</first><last>Nouri</last></author>
       <author><first>Franck</first><last>Dernoncourt</last></author>
-      <author><first>Quan Hung</first><last>Tran</last></author>
+      <author id="quan-hung-tran"><first>Quan Hung</first><last>Tran</last></author>
       <author><first>Dejing</first><last>Dou</last></author>
       <author><first>Thien Huu</first><last>Nguyen</last></author>
       <pages>4543–4548</pages>
@@ -5294,7 +5294,7 @@
       <title><fixed-case>D</fixed-case>ecoding Language Spatial Relations to 2<fixed-case>D</fixed-case> Spatial Arrangements</title>
       <author><first>Gorjan</first><last>Radevski</last></author>
       <author><first>Guillem</first><last>Collell</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <author><first>Tinne</first><last>Tuytelaars</last></author>
       <pages>4549–4560</pages>
       <abstract>We address the problem of multimodal spatial understanding by decoding a set of language-expressed spatial relations to a set of 2D spatial arrangements in a multi-object and multi-relationship setting. We frame the task as arranging a scene of clip-arts given a textual description. We propose a simple and effective model architecture Spatial-Reasoning Bert (SR-Bert), trained to decode text to 2D spatial arrangements in a non-autoregressive manner. SR-Bert can decode both explicit and implicit language to 2D spatial arrangements, generalizes to out-of-sample data to a reasonable extent and can generate complete abstract scenes if paired with a clip-arts predictor. Finally, we qualitatively evaluate our method with a user study, validating that our generated spatial arrangements align with human expectation.</abstract>
@@ -5420,7 +5420,7 @@
       <author><first>Chandra</first><last>Bhagavatula</last></author>
       <author><first>Maxwell</first><last>Forbes</last></author>
       <author><first>Ronan</first><last>Le Bras</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <author><first>Yejin</first><last>Choi</last></author>
       <pages>4661–4675</pages>
       <abstract>Defeasible inference is a mode of reasoning in which an inference (X is a bird, therefore X flies) may be weakened or overturned in light of new evidence (X is a penguin). Though long recognized in classical AI and philosophy, defeasible inference has not been extensively studied in the context of contemporary data-driven research on natural language inference and commonsense reasoning. We introduce Defeasible NLI (abbreviated <tex-math>\delta</tex-math>-NLI), a dataset for defeasible inference in natural language. Defeasible NLI contains extensions to three existing inference datasets covering diverse modes of reasoning: common sense, natural language inference, and social norms. From Defeasible NLI, we develop both a classification and generation task for defeasible inference, and demonstrate that the generation task is much more challenging. Despite lagging human performance, however, generative models trained on this data are capable of writing sentences that weaken or strengthen a specified inference up to 68% of the time.</abstract>
@@ -5445,7 +5445,7 @@
     <paper id="420">
       <title>Language-<fixed-case>C</fixed-case>onditioned <fixed-case>F</fixed-case>eature <fixed-case>P</fixed-case>yramids for <fixed-case>V</fixed-case>isual <fixed-case>S</fixed-case>election <fixed-case>T</fixed-case>asks</title>
       <author><first>Taichi</first><last>Iki</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>4687–4697</pages>
       <abstract>Referring expression comprehension, which is the ability to locate language to an object in an image, plays an important role in creating common ground. Many models that fuse visual and linguistic features have been proposed. However, few models consider the fusion of linguistic features with multiple visual features with different sizes of receptive fields, though the proper size of the receptive field of visual features intuitively varies depending on expressions. In this paper, we introduce a neural network architecture that modulates visual features with varying sizes of receptive field by linguistic features. We evaluate our architecture on tasks related to referring expression comprehension in two visual dialogue games. The results show the advantages and broad applicability of our architecture. Source code is available at <url>https://github.com/Alab-NII/lcfp</url> .</abstract>
       <url hash="ef3eca83">2020.findings-emnlp.420</url>
@@ -5474,7 +5474,7 @@
       <author><first>Jiajie</first><last>Peng</last></author>
       <author><first>Zengfeng</first><last>Huang</last></author>
       <author><first>Weijian</first><last>Sun</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>4705–4710</pages>
       <abstract>Terms contained in Gene Ontology (GO) have been widely used in biology and bio-medicine. Most previous research focuses on inferring new GO terms, while the term names that reflect the gene function are still named by the experts. To fill this gap, we propose a novel task, namely term name generation for GO, and build a large-scale benchmark dataset. Furthermore, we present a graph-based generative model that incorporates the relations between genes, words and terms for term name generation, which exhibits great advantages over the strong baselines.</abstract>
       <url hash="42cb6e3a">2020.findings-emnlp.422</url>
@@ -5527,9 +5527,9 @@
     </paper>
     <paper id="426">
       <title>Finding <fixed-case>F</fixed-case>riends and Flipping Frenemies: Automatic Paraphrase Dataset Augmentation Using Graph Theory</title>
-      <author><first>Hannah</first><last>Chen</last></author>
+      <author id="hannah-cyberey"><first>Hannah</first><last>Chen</last></author>
       <author><first>Yangfeng</first><last>Ji</last></author>
-      <author><first>David</first><last>Evans</last></author>
+      <author id="david-k-evans"><first>David</first><last>Evans</last></author>
       <pages>4741–4751</pages>
       <abstract>Most NLP datasets are manually labeled, so suffer from inconsistent labeling or limited size. We propose methods for automatically improving datasets by viewing them as graphs with expected semantic properties. We construct a paraphrase graph from the provided sentence pair labels, and create an augmented dataset by directly inferring labels from the original sentence pairs using a transitivity property. We use structural balance theory to identify likely mislabelings in the graph, and flip their labels. We evaluate our methods on paraphrase models trained using these datasets starting from a pretrained BERT model, and find that the automatically-enhanced training sets result in more accurate models.</abstract>
       <url hash="30da7833">2020.findings-emnlp.426</url>
@@ -5555,7 +5555,7 @@
       <author><first>Isabel</first><last>Cachola</last></author>
       <author><first>Kyle</first><last>Lo</last></author>
       <author><first>Arman</first><last>Cohan</last></author>
-      <author><first>Daniel</first><last>Weld</last></author>
+      <author id="daniel-s-weld"><first>Daniel</first><last>Weld</last></author>
       <pages>4766–4777</pages>
       <abstract>We introduce TLDR generation, a new form of extreme summarization, for scientific papers. TLDR generation involves high source compression and requires expert background knowledge and understanding of complex domain-specific language. To facilitate study on this task, we introduce SCITLDR, a new multi-target dataset of 5.4K TLDRs over 3.2K papers. SCITLDR contains both author-written and expert-derived TLDRs, where the latter are collected using a novel annotation protocol that produces high-quality summaries while minimizing annotation burden. We propose CATTS, a simple yet effective learning strategy for generating TLDRs that exploits titles as an auxiliary training signal. CATTS improves upon strong baselines under both automated metrics and human evaluations. Data and code are publicly available at <url>https://github.com/allenai/scitldr</url>.</abstract>
       <url hash="9071ddb1">2020.findings-emnlp.428</url>
@@ -5682,7 +5682,7 @@
     </paper>
     <paper id="438">
       <title>Bridging Textual and Tabular Data for Cross-Domain Text-to-<fixed-case>SQL</fixed-case> Semantic Parsing</title>
-      <author><first>Xi Victoria</first><last>Lin</last></author>
+      <author id="xi-victoria-lin"><first>Xi Victoria</first><last>Lin</last></author>
       <author><first>Richard</first><last>Socher</last></author>
       <author><first>Caiming</first><last>Xiong</last></author>
       <pages>4870–4888</pages>
@@ -5724,7 +5724,7 @@
       <author><first>Lin</first><last>Qiu</last></author>
       <author><first>Hao</first><last>Zhou</last></author>
       <author><first>Mingxuan</first><last>Wang</last></author>
-      <author><first>Weinan</first><last>Zhang</last></author>
+      <author id="weinan-zhang"><first>Weinan</first><last>Zhang</last></author>
       <author><first>Yong</first><last>Yu</last></author>
       <author><first>Lei</first><last>Li</last></author>
       <pages>4908–4917</pages>
@@ -5777,7 +5777,7 @@
       <author><first>Satish</first><last>Golla</last></author>
       <author><first>Gokul</first><last>N.C.</last></author>
       <author><first>Avik</first><last>Bhattacharyya</last></author>
-      <author><first>Mitesh M.</first><last>Khapra</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh M.</first><last>Khapra</last></author>
       <author><first>Pratyush</first><last>Kumar</last></author>
       <pages>4948–4961</pages>
       <abstract>In this paper, we introduce NLP resources for 11 major Indian languages from two major language families. These resources include: (a) large-scale sentence-level monolingual corpora, (b) pre-trained word embeddings, (c) pre-trained language models, and (d) multiple NLU evaluation datasets (<i>IndicGLUE</i> benchmark). The monolingual corpora contains a total of 8.8 billion tokens across all 11 languages and Indian English, primarily sourced from news crawls. The word embeddings are based on <i>FastText</i>, hence suitable for handling morphological complexity of Indian languages. The pre-trained language models are based on the compact ALBERT model. Lastly, we compile the (<i>IndicGLUE</i> benchmark for Indian language NLU. To this end, we create datasets for the following tasks: Article Genre Classification, Headline Prediction, Wikipedia Section-Title Prediction, Cloze-style Multiple choice QA, Winograd NLI and COPA. We also include publicly available datasets for some Indic languages for tasks like Named Entity Recognition, Cross-lingual Sentence Retrieval, Paraphrase detection, <i>etc.</i> Our embeddings are competitive or better than existing pre-trained embeddings on multiple tasks. We hope that the availability of the dataset will accelerate Indic NLP research which has the potential to impact more than a billion people. It can also help the community in evaluating advances in NLP over a more diverse pool of languages. The data and models are available at <url>https://indicnlp.ai4bharat.org</url>.</abstract>
@@ -5788,7 +5788,7 @@
     <paper id="446">
       <title>Weakly-Supervised Modeling of Contextualized Event Embedding for Discourse Relations</title>
       <author><first>I-Ta</first><last>Lee</last></author>
-      <author><first>Maria Leonor</first><last>Pacheco</last></author>
+      <author id="maria-leonor-pacheco"><first>Maria Leonor</first><last>Pacheco</last></author>
       <author><first>Dan</first><last>Goldwasser</last></author>
       <pages>4962–4972</pages>
       <abstract>Representing, and reasoning over, long narratives requires models that can deal with complex event structures connected through multiple relationship types. This paper suggests to represent this type of information as a narrative graph and learn contextualized event representations over it using a relational graph neural network model. We train our model to capture event relations, derived from the Penn Discourse Tree Bank, on a huge corpus, and show that our multi-relational contextualized event representation can improve performance when learning script knowledge without direct supervision and provide a better representation for the implicit discourse sense classification task.</abstract>
diff --git a/data/xml/2020.finnlp.xml b/data/xml/2020.finnlp.xml
index e7e7d8c405..7ae35654a9 100644
--- a/data/xml/2020.finnlp.xml
+++ b/data/xml/2020.finnlp.xml
@@ -78,7 +78,7 @@
     <paper id="7">
       <title>Using Extractive Lexicon-based Sentiment Analysis to Enhance Understanding ofthe Impact of Non-<fixed-case>GAAP</fixed-case> Measures in Financial Reporting</title>
       <author><first>Stacey</first><last>Taylor</last></author>
-      <author><first>Vlado</first><last>Keselj</last></author>
+      <author id="vlado-keselj"><first>Vlado</first><last>Keselj</last></author>
       <pages>40–46</pages>
       <url hash="b77d6bfd">2020.finnlp-1.7</url>
       <bibkey>taylor-keselj-2020-using</bibkey>
@@ -123,7 +123,7 @@
       <author><first>Aman</first><last>Khullar</last></author>
       <author><first>Sarath Chandra</first><last>Pakala</last></author>
       <author><first>Vishnu</first><last>Ramesh</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>75–80</pages>
       <url hash="2d52931c">2020.finnlp-1.12</url>
       <bibkey>arora-etal-2020-subtl</bibkey>
diff --git a/data/xml/2020.fnp.xml b/data/xml/2020.fnp.xml
index 3752fcbd16..c75a4fb359 100644
--- a/data/xml/2020.fnp.xml
+++ b/data/xml/2020.fnp.xml
@@ -26,7 +26,7 @@
     <paper id="1">
       <title>The Financial Narrative Summarisation Shared Task (<fixed-case>FNS</fixed-case> 2020)</title>
       <author><first>Mahmoud</first><last>El-Haj</last></author>
-      <author><first>Ahmed</first><last>AbuRa’ed</last></author>
+      <author id="ahmed-aburaed"><first>Ahmed</first><last>AbuRa’ed</last></author>
       <author><first>Marina</first><last>Litvak</last></author>
       <author><first>Nikiforos</first><last>Pittaras</last></author>
       <author><first>George</first><last>Giannakopoulos</last></author>
@@ -39,7 +39,7 @@
       <title>The Financial Document Structure Extraction Shared task (<fixed-case>F</fixed-case>in<fixed-case>T</fixed-case>oc 2020)</title>
       <author><first>Najah-Imane</first><last>Bentabet</last></author>
       <author><first>Rémi</first><last>Juge</last></author>
-      <author><first>Ismail</first><last>El Maarouf</last></author>
+      <author id="ismail-el-maarouf"><first>Ismail</first><last>El Maarouf</last></author>
       <author><first>Virginie</first><last>Mouilleron</last></author>
       <author><first>Dialekti</first><last>Valsamou-Stanislawski</last></author>
       <author><first>Mahmoud</first><last>El-Haj</last></author>
@@ -65,7 +65,7 @@
       <title><fixed-case>L</fixed-case>ang<fixed-case>R</fixed-case>esearch<fixed-case>L</fixed-case>ab_<fixed-case>NC</fixed-case> at <fixed-case>F</fixed-case>in<fixed-case>C</fixed-case>ausal 2020, Task 1: A Knowledge Induced Neural Net for Causality Detection</title>
       <author><first>Raksha</first><last>Agarwal</last></author>
       <author><first>Ishaan</first><last>Verma</last></author>
-      <author><first>Niladri</first><last>Chatterjee</last></author>
+      <author id="niladri-chatterjee"><first>Niladri</first><last>Chatterjee</last></author>
       <pages>33–39</pages>
       <abstract>Identifying causal relationships in a text is essential for achieving comprehensive natural language understanding. The present work proposes a combination of features derived from pre-trained BERT with linguistic features for training a supervised classifier for the task of Causality Detection. The Linguistic features help to inject knowledge about the semantic and syntactic structure of the input sentences. Experiments on the FinCausal Shared Task1 datasets indicate that the combination of Linguistic features with BERT improves overall performance for causality detection. The proposed system achieves a weighted average F1 score of 0.952 on the post-evaluation dataset.</abstract>
       <url hash="aa7535c1">2020.fnp-1.4</url>
@@ -84,7 +84,7 @@
       <author><first>Denis</first><last>Gordeev</last></author>
       <author><first>Adis</first><last>Davletov</last></author>
       <author><first>Alexey</first><last>Rey</last></author>
-      <author><first>Nikolay</first><last>Arefiev</last></author>
+      <author id="nikolay-arefyev"><first>Nikolay</first><last>Arefiev</last></author>
       <pages>45–49</pages>
       <abstract>In this paper, we describe the results of team LIORI at the FinCausal 2020 Shared task held as a part of the 1st Joint Workshop on Financial Narrative Processing and MultiLingual Financial Summarisation. The shared task consisted of two subtasks: classifying whether a sentence contains any causality and labelling phrases that indicate causes and consequences. Our team ranked 1st in the first subtask and 4th in the second one. We used Transformer-based models with joint-task learning and their ensembles.</abstract>
       <url hash="9c0046b2">2020.fnp-1.6</url>
@@ -114,7 +114,7 @@
     <paper id="9">
       <title><fixed-case>NITK</fixed-case> <fixed-case>NLP</fixed-case> at <fixed-case>F</fixed-case>in<fixed-case>C</fixed-case>ausal-2020 Task 1 Using <fixed-case>BERT</fixed-case> and Linear models.</title>
       <author><first>Hariharan</first><last>R L</last></author>
-      <author><first>Anand Kumar</first><last>M</last></author>
+      <author id="anand-kumar-m"><first>Anand Kumar</first><last>M</last></author>
       <pages>60–63</pages>
       <abstract>FinCausal-2020 is the shared task which focuses on the causality detection of factual data for financial analysis. The financial data facts don’t provide much explanation on the variability of these data. This paper aims to propose an efficient method to classify the data into one which is having any financial cause or not. Many models were used to classify the data, out of which SVM model gave an F-Score of 0.9435, BERT with specific fine-tuning achieved best results with F-Score of 0.9677.</abstract>
       <url hash="d621f784">2020.fnp-1.9</url>
@@ -242,7 +242,7 @@
     <paper id="22">
       <title>Knowledge Graph and Deep Neural Network for Extractive Text Summarization by Utilizing Triples</title>
       <author><first>Amit</first><last>Vhatkar</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <author><first>Kavi</first><last>Arya</last></author>
       <pages>130–136</pages>
       <abstract>In our research work, we represent the content of the sentence in graphical form after extracting triples from the sentences. In this paper, we will discuss novel methods to generate an extractive summary by scoring the triples. Our work has also touched upon sequence-to-sequence encoding of the content of the sentence, to classify it as a summary or a non-summary sentence. Our findings help to decide the nature of the sentences forming the summary and the length of the system generated summary as compared to the length of the reference summary.</abstract>
@@ -271,7 +271,7 @@
       <title><fixed-case>SUMSUM</fixed-case>@<fixed-case>FNS</fixed-case>-2020 Shared Task</title>
       <author><first>Siyan</first><last>Zheng</last></author>
       <author><first>Anneliese</first><last>Lu</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>148–152</pages>
       <abstract>This paper describes the SUMSUM systems submitted to the Financial Narrative Summarization Shared Task (FNS-2020). We explore a section-based extractive summarization method tailored to the structure of financial reports: our best system parses the report Table of Contents (ToC), splits the report into narrative sections based on the ToC, and applies a BERT-based classifier to each section to determine whether it should be included in the summary. Our best system ranks 4<tex-math>^{\textrm{th}}</tex-math>, 1<tex-math>^{\textrm{st}}</tex-math>, 2<tex-math>^{\textrm{nd}}</tex-math> and 17<tex-math>^{\textrm{th}}</tex-math> on the Rouge-1, Rouge-2, Rouge-SU4, and Rouge-L official metrics, respectively. We also report results on the validation set using an alternative set of Rouge-based metrics that measure performance with respect to the best-matching of the available gold summaries.</abstract>
       <url hash="51a62fce">2020.fnp-1.25</url>
@@ -345,7 +345,7 @@
     </paper>
     <paper id="33">
       <title>Mitigating Silence in Compliance Terminology during Parsing of Utterances</title>
-      <author><first>Esme</first><last>Manandise</last></author>
+      <author id="esmeralda-manandise"><first>Esme</first><last>Manandise</last></author>
       <author><first>Conrad</first><last>de Peuter</last></author>
       <pages>204–212</pages>
       <abstract>This paper reports on an approach to increase multi-token-term recall in a parsing task. We use a compliance-domain parser to extract, during the process of parsing raw text, terms that are unlisted in the terminology. The parser uses a similarity measure (Generalized Dice Coefficient) between listed terms and unlisted term candidates to (i) determine term status, (ii) serve putative terms to the parser, (iii) decrease parsing complexity by glomming multi-tokens as lexical singletons, and (iv) automatically augment the terminology after parsing of an utterance completes. We illustrate a small experiment with examples from the tax-and-regulations domain. Bootstrapping the parsing process to detect out- of-vocabulary terms at runtime increases parsing accuracy in addition to producing other benefits to a natural-language-processing pipeline, which translates arithmetic calculations written in English into computer-executable operations.</abstract>
@@ -374,7 +374,7 @@
     <paper id="36">
       <title>Extracting Fine-Grained Economic Events from Business News</title>
       <author><first>Gilles</first><last>Jacobs</last></author>
-      <author><first>Veronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Veronique</first><last>Hoste</last></author>
       <pages>235–245</pages>
       <abstract>Based on a recently developed fine-grained event extraction dataset for the economic domain, we present in a pilot study for supervised economic event extraction. We investigate how a state-of-the-art model for event extraction performs on the trigger and argument identification and classification. While F1-scores of above 50% are obtained on the task of trigger identification, we observe a large gap in performance compared to results on the benchmark ACE05 dataset. We show that single-token triggers do not provide sufficient discriminative information for a fine-grained event detection setup in a closed domain such as economics, since many classes have a large degree of lexico-semantic and contextual overlap.</abstract>
       <url hash="e44a86fd">2020.fnp-1.36</url>
diff --git a/data/xml/2020.framenet.xml b/data/xml/2020.framenet.xml
index 62c03c1f96..0221946b99 100644
--- a/data/xml/2020.framenet.xml
+++ b/data/xml/2020.framenet.xml
@@ -3,11 +3,11 @@
   <volume id="1" type="proceedings">
     <meta>
       <booktitle>Proceedings of the International FrameNet Workshop 2020: Towards a Global, Multilingual FrameNet</booktitle>
-      <editor><first>Tiago T.</first><last>Torrent</last></editor>
-      <editor><first>Collin F.</first><last>Baker</last></editor>
+      <editor id="tiago-timponi-torrent"><first>Tiago T.</first><last>Torrent</last></editor>
+      <editor id="collin-f-baker"><first>Collin F.</first><last>Baker</last></editor>
       <editor><first>Oliver</first><last>Czulo</last></editor>
       <editor><first>Kyoko</first><last>Ohara</last></editor>
-      <editor><first>Miriam R. L.</first><last>Petruck</last></editor>
+      <editor id="miriam-r-l-petruck"><first>Miriam R. L.</first><last>Petruck</last></editor>
       <publisher>European Language Resources Association</publisher>
       <address>Marseille, France</address>
       <month>May</month>
@@ -54,7 +54,7 @@
       <author><first>Frederico</first><last>Belcavello</last></author>
       <author><first>Marcelo</first><last>Viridiano</last></author>
       <author><first>Alexandre</first><last>Diniz da Costa</last></author>
-      <author><first>Ely Edison da Silva</first><last>Matos</last></author>
+      <author id="ely-edison-da-silva-matos"><first>Ely Edison da Silva</first><last>Matos</last></author>
       <author><first>Tiago Timponi</first><last>Torrent</last></author>
       <pages>23–30</pages>
       <abstract>Multimodal aspects of human communication are key in several applications of Natural Language Processing, such as Machine Translation and Natural Language Generation. Despite recent advances in integrating multimodality into Computational Linguistics, the merge between NLP and Computer Vision techniques is still timid, especially when it comes to providing fine-grained accounts for meaning construction. This paper reports on research aiming to determine appropriate methodology and develop a computational tool to annotate multimodal corpora according to a principled structured semantic representation of events, relations and entities: FrameNet. Taking a Brazilian television travel show as corpus, a pilot study was conducted to annotate the frames that are evoked by the audio and the ones that are evoked by visual elements. We also implemented a Multimodal Annotation tool which allows annotators to choose frames and locate frame elements both in the text and in the images, while keeping track of the time span in which those elements are active in each modality. Results suggest that adding a multimodal domain to the linguistic layer of annotation and analysis contributes both to enrich the kind of information that can be tagged in a corpus, and to enhance FrameNet as a model of linguistic cognition.</abstract>
@@ -87,7 +87,7 @@
     </paper>
     <paper id="7">
       <title><fixed-case>G</fixed-case>reek within the Global <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et Initiative: Challenges and Conclusions so far</title>
-      <author><first>Voula</first><last>Giouli</last></author>
+      <author id="voula-giouli"><first>Voula</first><last>Giouli</last></author>
       <author><first>Vera</first><last>Pilitsidou</last></author>
       <author><first>Hephaestion</first><last>Christopoulos</last></author>
       <pages>48–55</pages>
@@ -98,7 +98,7 @@
     </paper>
     <paper id="8">
       <title>Using Verb Frames for Text Difficulty Assessment</title>
-      <author><first>John</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
       <author><first>Meichun</first><last>Liu</last></author>
       <author><first>Tianyuan</first><last>Cai</last></author>
       <pages>56–62</pages>
@@ -109,11 +109,11 @@
     </paper>
     <paper id="9">
       <title>Deriving a <fixed-case>P</fixed-case>rop<fixed-case>B</fixed-case>ank Corpus from Parallel <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et and <fixed-case>UD</fixed-case> Corpora</title>
-      <author><first>Normunds</first><last>Gruzitis</last></author>
+      <author id="normunds-gruzitis"><first>Normunds</first><last>Gruzitis</last></author>
       <author><first>Roberts</first><last>Darģis</last></author>
       <author><first>Laura</first><last>Rituma</last></author>
       <author><first>Gunta</first><last>Nešpore-Bērzkalne</last></author>
-      <author><first>Baiba</first><last>Saulite</last></author>
+      <author id="baiba-saulite"><first>Baiba</first><last>Saulite</last></author>
       <pages>63–69</pages>
       <abstract>We propose an approach for generating an accurate and consistent PropBank-annotated corpus, given a FrameNet-annotated corpus which has an underlying dependency annotation layer, namely, a parallel Universal Dependencies (UD) treebank. The PropBank annotation layer of such a multi-layer corpus can be semi-automatically derived from the existing FrameNet and UD annotation layers, by providing a mapping configuration from lexical units in [a non-English language] FrameNet to [English language] PropBank predicates, and a mapping configuration from FrameNet frame elements to PropBank semantic arguments for the given pair of a FrameNet frame and a PropBank predicate. The latter mapping generally depends on the underlying UD syntactic relations. To demonstrate our approach, we use Latvian FrameNet, annotated on top of Latvian UD Treebank, for generating Latvian PropBank in compliance with the Universal Propositions approach.</abstract>
       <url hash="966c8930">2020.framenet-1.9</url>
@@ -142,9 +142,9 @@
     </paper>
     <paper id="12">
       <title>Building Multilingual Specialized Resources Based on <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et: Application to the Field of the Environment</title>
-      <author><first>Marie-Claude</first><last>L’ Homme</last></author>
+      <author id="marie-claude-lhomme"><first>Marie-Claude</first><last>L’ Homme</last></author>
       <author><first>Benoît</first><last>Robichaud</last></author>
-      <author><first>Carlos</first><last>Subirats</last></author>
+      <author id="carlos-subirats-ruggeberg"><first>Carlos</first><last>Subirats</last></author>
       <pages>85–92</pages>
       <abstract>The methodology developed within the FrameNet project is being used to compile resources in an increasing number of specialized fields of knowledge. The methodology along with the theoretical principles on which it is based, i.e. Frame Semantics, are especially appealing as they allow domain-specific resources to account for the conceptual background of specialized knowledge and to explain the linguistic properties of terms against this background. This paper presents a methodology for building a multilingual resource that accounts for terms of the environment. After listing some lexical and conceptual differences that need to be managed in such a resource, we explain how the FrameNet methodology is adapted for describing terms in different languages. We first applied our methodology to French and then extended it to English. Extensions to Spanish, Portuguese and Chinese were made more recently. Up to now, we have defined 190 frames: 112 frames are new; 38 are used as such; and 40 are slightly different (a different number of obligatory participants; a significant alternation, etc.) when compared to Berkeley FrameNet.</abstract>
       <url hash="99ffe3df">2020.framenet-1.12</url>
diff --git a/data/xml/2020.gamnlp.xml b/data/xml/2020.gamnlp.xml
index 272055ea90..67b0cac4f1 100644
--- a/data/xml/2020.gamnlp.xml
+++ b/data/xml/2020.gamnlp.xml
@@ -3,7 +3,7 @@
   <volume id="1" type="proceedings">
     <meta>
       <booktitle>Workshop on Games and Natural Language Processing</booktitle>
-      <editor><first>Stephanie M.</first><last>Lukin</last></editor>
+      <editor id="stephanie-lukin"><first>Stephanie M.</first><last>Lukin</last></editor>
       <publisher>European Language Resources Association</publisher>
       <address>Marseille, France</address>
       <month>May</month>
@@ -19,7 +19,7 @@
       <title>Creating a Sentiment Lexicon with Game-Specific Words for Analyzing <fixed-case>NPC</fixed-case> Dialogue in The Elder Scrolls <fixed-case>V</fixed-case>: Skyrim</title>
       <author><first>Thérèse</first><last>Bergsma</last></author>
       <author><first>Judith</first><last>van Stegeren</last></author>
-      <author><first>Mariët</first><last>Theune</last></author>
+      <author id="mariet-theune"><first>Mariët</first><last>Theune</last></author>
       <pages>1–9</pages>
       <abstract>A weak point of rule-based sentiment analysis systems is that the underlying sentiment lexicons are often not adapted to the domain of the text we want to analyze. We created a game-specific sentiment lexicon for video game Skyrim based on the E-ANEW word list and a dataset of Skyrim’s in-game documents. We calculated sentiment ratings for NPC dialogue using both our lexicon and E-ANEW and compared the resulting sentiment ratings to those of human raters. Both lexicons perform comparably well on our evaluation dialogues, but the game-specific extension performs slightly better on the dominance dimension for dialogue segments and the arousal dimension for full dialogues. To our knowledge, this is the first time that a sentiment analysis lexicon has been adapted to the video game domain.</abstract>
       <url hash="4e3a8f74">2020.gamnlp-1.1</url>
@@ -125,7 +125,7 @@
       <author><first>Richard</first><last>Bartle</last></author>
       <author><first>Jon</first><last>Chamberlain</last></author>
       <author><first>Silviu</first><last>Paun</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>79–84</pages>
       <abstract>As the uses of Games-With-A-Purpose (GWAPs) broadens, the systems that incorporate its usages have expanded in complexity. The types of annotations required within the NLP paradigm set such an example, where tasks can involve varying complexity of annotations. Assigning more complex tasks to more skilled players through a progression mechanism can achieve higher accuracy in the collected data while acting as a motivating factor that rewards the more skilled players. In this paper, we present the progression technique implemented in Wormingo , an NLP GWAP that currently includes two layers of task complexity. For the experiment, we have implemented four different progression scenarios on 192 players and compared the accuracy and engagement achieved with each scenario.</abstract>
       <url hash="311a864a">2020.gamnlp-1.11</url>
diff --git a/data/xml/2020.gebnlp.xml b/data/xml/2020.gebnlp.xml
index ba6e2a2935..280a0b3ccf 100644
--- a/data/xml/2020.gebnlp.xml
+++ b/data/xml/2020.gebnlp.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2020-12-10" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Second Workshop on Gender Bias in Natural Language Processing</booktitle>
-      <editor><first>Marta R.</first><last>Costa-jussà</last></editor>
+      <editor id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></editor>
       <editor><first>Christian</first><last>Hardmeier</last></editor>
       <editor><first>Will</first><last>Radford</last></editor>
       <editor><first>Kellie</first><last>Webster</last></editor>
@@ -60,7 +60,7 @@
       <author><first>Masashi</first><last>Takeshita</last></author>
       <author><first>Yuki</first><last>Katsumata</last></author>
       <author><first>Rafal</first><last>Rzepka</last></author>
-      <author><first>Kenji</first><last>Araki</last></author>
+      <author id="kenji-araki"><first>Kenji</first><last>Araki</last></author>
       <pages>44–55</pages>
       <abstract>It is known that word embeddings exhibit biases inherited from the corpus, and those biases reflect social stereotypes. Recently, many studies have been conducted to analyze and mitigate biases in word embeddings. Unsupervised Bias Enumeration (UBE) (Swinger et al., 2019) is one of approach to analyze biases for English, and Hard Debias (Bolukbasi et al., 2016) is the common technique to mitigate gender bias. These methods focused on English, or, in smaller extent, on Indo-European languages. However, it is not clear whether these methods can be generalized to other languages. In this paper, we apply these analyzing and mitigating methods, UBE and Hard Debias, to Japanese word embeddings. Additionally, we examine whether these methods can be used for Japanese. We experimentally show that UBE and Hard Debias cannot be sufficiently adapted to Japanese embeddings.</abstract>
       <url hash="d864d2e5">2020.gebnlp-1.5</url>
@@ -100,7 +100,7 @@
     <paper id="9">
       <title>Investigating Societal Biases in a Poetry Composition System</title>
       <author><first>Emily</first><last>Sheng</last></author>
-      <author><first>David</first><last>Uthus</last></author>
+      <author id="david-c-uthus"><first>David</first><last>Uthus</last></author>
       <pages>93–106</pages>
       <abstract>There is a growing collection of work analyzing and mitigating societal biases in language understanding, generation, and retrieval tasks, though examining biases in creative tasks remains underexplored. Creative language applications are meant for direct interaction with users, so it is important to quantify and mitigate societal biases in these applications. We introduce a novel study on a pipeline to mitigate societal biases when retrieving next verse suggestions in a poetry composition system. Our results suggest that data augmentation through sentiment style transfer has potential for mitigating societal biases.</abstract>
       <url hash="e9629c82">2020.gebnlp-1.9</url>
@@ -111,7 +111,7 @@
       <author><first>Lucy</first><last>Havens</last></author>
       <author><first>Melissa</first><last>Terras</last></author>
       <author><first>Benjamin</first><last>Bach</last></author>
-      <author><first>Beatrice</first><last>Alex</last></author>
+      <author id="beatrice-alex"><first>Beatrice</first><last>Alex</last></author>
       <pages>107–124</pages>
       <abstract>We propose a bias-aware methodology to engage with power relations in natural language processing (NLP) research. NLP research rarely engages with bias in social contexts, limiting its ability to mitigate bias. While researchers have recommended actions, technical methods, and documentation practices, no methodology exists to integrate critical reflections on bias with technical NLP methods. In this paper, after an extensive and interdisciplinary literature review, we contribute a bias-aware methodology for NLP research. We also contribute a definition of biased text, a discussion of the implications of biased NLP systems, and a case study demonstrating how we are executing the bias-aware methodology in research on archival metadata descriptions.</abstract>
       <url hash="273c2634">2020.gebnlp-1.10</url>
@@ -120,7 +120,7 @@
     <paper id="11">
       <title>Gender and sentiment, critics and authors: a dataset of <fixed-case>N</fixed-case>orwegian book reviews</title>
       <author><first>Samia</first><last>Touileb</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <author><first>Erik</first><last>Velldal</last></author>
       <pages>125–138</pages>
       <abstract>Gender bias in models and datasets is widely studied in NLP. The focus has usually been on analysing how females and males express themselves, or how females and males are described. However, a less studied aspect is the combination of these two perspectives, how female and male describe the same or opposite gender. In this paper, we present a new gender annotated sentiment dataset of critics reviewing the works of female and male authors. We investigate if this newly annotated dataset contains differences in how the works of male and female authors are critiqued, in particular in terms of positive and negative sentiment. We also explore the differences in how this is done by male and female critics. We show that there are differences in how critics assess the works of authors of the same or opposite gender. For example, male critics rate crime novels written by females, and romantic and sentimental works written by males, more negatively.</abstract>
diff --git a/data/xml/2020.globalex.xml b/data/xml/2020.globalex.xml
index 3d8a8c99b8..8db25a9a9b 100644
--- a/data/xml/2020.globalex.xml
+++ b/data/xml/2020.globalex.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the 2020 Globalex Workshop on Linked Lexicography</booktitle>
       <editor><first>Ilan</first><last>Kernerman</last></editor>
       <editor><first>Simon</first><last>Krek</last></editor>
-      <editor><first>John P.</first><last>McCrae</last></editor>
+      <editor id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></editor>
       <editor><first>Jorge</first><last>Gracia</last></editor>
       <editor><first>Sina</first><last>Ahmadi</last></editor>
       <editor><first>Besim</first><last>Kabashi</last></editor>
@@ -26,7 +26,7 @@
       <author><first>Maxim</first><last>Ionov</last></author>
       <author><first>Jesse</first><last>de Does</last></author>
       <author><first>Katrien</first><last>Depuydt</last></author>
-      <author><first>Anas Fahad</first><last>Khan</last></author>
+      <author id="fahad-khan"><first>Anas Fahad</first><last>Khan</last></author>
       <author><first>Sander</first><last>Stolk</last></author>
       <author><first>Thierry</first><last>Declerck</last></author>
       <author><first>John Philip</first><last>McCrae</last></author>
@@ -38,10 +38,10 @@
     </paper>
     <paper id="2">
       <title><fixed-case>S</fixed-case>yn<fixed-case>S</fixed-case>em<fixed-case>C</fixed-case>lass Linked Lexicon: Mapping Synonymy between Languages</title>
-      <author><first>Zdenka</first><last>Uresova</last></author>
-      <author><first>Eva</first><last>Fucikova</last></author>
-      <author><first>Eva</first><last>Hajicova</last></author>
-      <author><first>Jan</first><last>Hajic</last></author>
+      <author id="zdenka-uresova"><first>Zdenka</first><last>Uresova</last></author>
+      <author id="eva-fucikova"><first>Eva</first><last>Fucikova</last></author>
+      <author id="eva-hajicova"><first>Eva</first><last>Hajicova</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajic</last></author>
       <pages>10–19</pages>
       <abstract>This paper reports on an extended version of a synonym verb class lexicon, newly called SynSemClass (formerly CzEngClass). This lexicon stores cross-lingual semantically similar verb senses in synonym classes extracted from a richly annotated parallel corpus, the Prague Czech-English Dependency Treebank. When building the lexicon, we make use of predicate-argument relations (valency) and link them to semantic roles; in addition, each entry is linked to several external lexicons of more or less “semantic” nature, namely FrameNet, WordNet, VerbNet, OntoNotes and PropBank, and Czech VALLEX. The aim is to provide a linguistic resource that can be used to compare semantic roles and their syntactic properties and features across languages within and across synonym groups (classes, or ’synsets’), as well as gold standard data for automatic NLP experiments with such synonyms, such as synonym discovery, feature mapping, etc. However, perhaps the most important goal is to eventually build an event type ontology that can be referenced and used as a human-readable and human-understandable “database” for all types of events, processes and states. While the current paper describes primarily the content of the lexicon, we are also presenting a preliminary design of a format compatible with Linked Data, on which we are hoping to get feedback during discussions at the workshop. Once the resource (in whichever form) is applied to corpus annotation, deep analysis will be possible using such combined resources as training data.</abstract>
       <url hash="88c2110f">2020.globalex-1.2</url>
@@ -80,7 +80,7 @@
     </paper>
     <paper id="6">
       <title>Widening the Discussion on “False <fixed-case>F</fixed-case>riends” in Multilingual Wordnets</title>
-      <author><first>Hugo</first><last>Gonçalo Oliveira</last></author>
+      <author id="hugo-goncalo-oliveira"><first>Hugo</first><last>Gonçalo Oliveira</last></author>
       <author><first>Ana</first><last>Luís</last></author>
       <pages>36</pages>
       <abstract>There are wordnets in many languages, many aligned with Princeton WordNet, some of which in a (semi-)automatic process, but we rarely see actual discussions on the role of false friends in this process. Having in mind known issues related to such words in language translation, and further motivated by false friend-related issues on the alignment of a Portuguese wordnet with Princeton Wordnet, we aim to widen this discussion, while suggesting preliminary ideas of how wordnets could benefit from this kind of research.</abstract>
@@ -100,8 +100,8 @@
     <paper id="8">
       <title>Building Sense Representations in <fixed-case>D</fixed-case>anish by Combining Word Embeddings with Lexical Resources</title>
       <author><first>Ida</first><last>Rørmann Olsen</last></author>
-      <author><first>Bolette</first><last>Pedersen</last></author>
-      <author><first>Asad</first><last>Sayeed</last></author>
+      <author id="bolette-sandford-pedersen"><first>Bolette</first><last>Pedersen</last></author>
+      <author id="asad-sayeed"><first>Asad</first><last>Sayeed</last></author>
       <pages>45–52</pages>
       <abstract>Our aim is to identify suitable sense representations for NLP in Danish. We investigate sense inventories that correlate with human interpretations of word meaning and ambiguity as typically described in dictionaries and wordnets and that are well reflected distributionally as expressed in word embeddings. To this end, we study a number of highly ambiguous Danish nouns and examine the effectiveness of sense representations constructed by combining vectors from a distributional model with the information from a wordnet. We establish representations based on centroids obtained from wordnet synests and example sentences as well as representations established via are tested in a word sense disambiguation task. We conclude that the more information extracted from the wordnet entries (example sentence, definition, semantic relations) the more successful the sense representation vector.</abstract>
       <url hash="1122a9b0">2020.globalex-1.8</url>
@@ -140,7 +140,7 @@
     <paper id="12">
       <title><fixed-case>MWSA</fixed-case> Task at <fixed-case>G</fixed-case>loba<fixed-case>L</fixed-case>ex 2020: <fixed-case>RACAI</fixed-case>’s Word Sense Alignment System using a Similarity Measurement of Dictionary Definitions</title>
       <author><first>Vasile</first><last>Pais</last></author>
-      <author><first>Dan</first><last>Tufiș</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufiș</last></author>
       <author><first>Radu</first><last>Ion</last></author>
       <pages>69–75</pages>
       <abstract>This paper describes RACAI’s word sense alignment system, which participated in the Monolingual Word Sense Alignment shared task organized at GlobaLex 2020 workshop. We discuss the system architecture, some of the challenges that we faced as well as present our results on several of the languages available for the task.</abstract>
@@ -173,7 +173,7 @@
     <paper id="15">
       <title><fixed-case>NUIG</fixed-case> at <fixed-case>TIAD</fixed-case>: Combining Unsupervised <fixed-case>NLP</fixed-case> and Graph Metrics for Translation Inference</title>
       <author><first>John Philip</first><last>McCrae</last></author>
-      <author><first>Mihael</first><last>Arcan</last></author>
+      <author id="mihael-arcan"><first>Mihael</first><last>Arcan</last></author>
       <pages>92–97</pages>
       <abstract>In this paper, we present the NUIG system at the TIAD shard task. This system includes graph-based metrics calculated using novel algorithms, with an unsupervised document embedding tool called ONETA and an unsupervised multi-way neural machine translation method. The results are an improvement over our previous system and produce the highest precision among all systems in the task as well as very competitive F-Measure results. Incorporating features from other systems should be easy in the framework we describe in this paper, suggesting this could very easily be extended to an even stronger result.</abstract>
       <url hash="d9dbf941">2020.globalex-1.15</url>
diff --git a/data/xml/2020.icon.xml b/data/xml/2020.icon.xml
index 003dab2521..8c9cc74590 100644
--- a/data/xml/2020.icon.xml
+++ b/data/xml/2020.icon.xml
@@ -3,8 +3,8 @@
   <volume id="main" ingest-date="2021-06-27" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 17th International Conference on Natural Language Processing (ICON)</booktitle>
-      <editor><first>Pushpak</first><last>Bhattacharyya</last></editor>
-      <editor><first>Dipti Misra</first><last>Sharma</last></editor>
+      <editor id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></editor>
+      <editor id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></editor>
       <editor><first>Rajeev</first><last>Sangal</last></editor>
       <publisher>NLP Association of India (NLPAI)</publisher>
       <address>Indian Institute of Technology Patna, Patna, India</address>
@@ -20,7 +20,7 @@
     <paper id="1">
       <title>The <fixed-case>WEAVE</fixed-case> Corpus: Annotating Synthetic Chemical Procedures in Patents with Chemical Named Entities</title>
       <author><first>Ravindra</first><last>Nittala</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>1–9</pages>
       <abstract>The Modern pharmaceutical industry depends on the iterative design of novel synthetic routes for drugs while not infringing on existing intellectual property rights. Such a design process calls for analyzing many existing synthetic chemical reactions and planning the synthesis of novel chemicals. These procedures have been historically available in unstructured raw text form in publications and patents. To facilitate automated synthetic chemical reactions analysis and design the novel synthetic reactions using Natural Language Processing (NLP) methods, we introduce a Named Entity Recognition (NER) dataset of the Examples section in 180 full-text patent documents with 5188 synthetic procedures annotated by domain experts. All the chemical entities which are part of the synthetic discourse were annotated with suitable class labels. We present the second-largest chemical NER corpus with 100,129 annotations and the highest IAA value of 98.73% (F-measure) on a 45 document subset. We discuss this new resource in detail and highlight some specific challenges in annotating synthetic chemical procedures with chemical named entities. We make the corpus available to the community to promote further research and development of downstream NLP systems applications. We also provide baseline results for the NER model to the community to improve on.</abstract>
       <url hash="b38b0528">2020.icon-main.1</url>
@@ -38,7 +38,7 @@
     <paper id="3">
       <title>Treatment of optional forms in Mathematical modelling of <fixed-case>P</fixed-case>āṇini</title>
       <author><first>Anupriya</first><last>Aggarwal</last></author>
-      <author><first>Malhar</first><last>Kulkarni</last></author>
+      <author id="malhar-kulkarni"><first>Malhar</first><last>Kulkarni</last></author>
       <pages>15–21</pages>
       <abstract>Pāṇini in his Aṣṭādhyāyī has written the grammar of Sanskrit in an extremely concise manner in the form of about 4000 sūtras. We have attempted to mathematically remodel the data produced by these sūtras. The mathematical modelling is a way to show that the Pāṇinian approach is a minimal method of capturing the grammatical data for Sanskrit which is a natural language. The sūtras written by Pāṇini can be written as functions, that is for a single input the function produces a single output of the form y=f(x), where x and y is the input and output respectively. However, we observe that for some input dhātus, we get multiple outputs. For such cases, we have written multivalued functions that is the functions which give two or more outputs for a single input. In other words, multivalued function is a way to represent optional output forms which are expressed in Pāṇinian grammar with the help of 3 terms i.e. vā, vibhaṣā, and anyatarasyam. Comparison between the techniques employed by Pāṇini and our notation of functions helps us understand how Pāṇinian techniques ensure brevity and terseness, hence illustrating that Pāṇinian grammar is minimal.</abstract>
       <url hash="0c362af0">2020.icon-main.3</url>
@@ -47,7 +47,7 @@
     <paper id="4">
       <title>Automatic <fixed-case>H</fixed-case>adith Segmentation using <fixed-case>PPM</fixed-case> Compression</title>
       <author><first>Taghreed</first><last>Tarmom</last></author>
-      <author><first>Eric</first><last>Atwell</last></author>
+      <author id="eric-atwell"><first>Eric</first><last>Atwell</last></author>
       <author><first>Mohammad</first><last>Alsalka</last></author>
       <pages>22–29</pages>
       <abstract>In this paper we explore the use of Prediction by partial matching (PPM) compression based to segment Hadith into its two main components (Isnad and Matan). The experiments utilized the PPMD variant of the PPM, showing that PPMD is effective in Hadith segmentation. It was also tested on Hadith corpora of different structures. In the first experiment we used the non- authentic Hadith (NAH) corpus for train- ing models and testing, and in the second experiment we used the NAH corpus for training models and the Leeds University and King Saud University (LK) Hadith cor- pus for testing PPMD segmenter. PPMD of order 7 achieved an accuracy of 92.76% and 90.10% in the first and second experiments, respectively.</abstract>
@@ -78,10 +78,10 @@
     <paper id="7">
       <title><fixed-case>E</fixed-case>nglish to <fixed-case>M</fixed-case>anipuri and Mizo Post-Editing Effort and its Impact on Low Resource Machine Translation</title>
       <author><first>Loitongbam</first><last>Sanayai Meetei</last></author>
-      <author><first>Thoudam Doren</first><last>Singh</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="thoudam-doren-singh"><first>Thoudam Doren</first><last>Singh</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <author><first>Mihaela</first><last>Vela</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>50–59</pages>
       <abstract>We present the first study on the post-editing (PE) effort required to build a parallel dataset for English-Manipuri and English-Mizo, in the context of a project on creating data for machine translation (MT). English source text from a local daily newspaper are machine translated into Manipuri and Mizo using PBSMT systems built in-house. A Computer Assisted Translation (CAT) tool is used to record the time, keystroke and other indicators to measure PE effort in terms of temporal and technical effort. A positive correlation between the technical effort and the number of function words is seen for English-Manipuri and English-Mizo but a negative correlation between the technical effort and the number of noun words for English-Mizo. However, average time spent per token in PE English-Mizo text is negatively correlated with the temporal effort. The main reason for these results are due to (i) English and Mizo using the same script, while Manipuri uses a different script and (ii) the agglutinative nature of Manipuri. Further, we check the impact of training a MT system in an incremental approach, by including the post-edited dataset as additional training data. The result shows an increase in HBLEU of up to 4.6 for English-Manipuri.</abstract>
       <url hash="5209bb04">2020.icon-main.7</url>
@@ -91,7 +91,7 @@
       <title>Learning to Interact: An Adaptive Interaction Framework for Knowledge Graph Embeddings</title>
       <author><first>.</first><last>Chandrahas</last></author>
       <author><first>Nilesh</first><last>Agrawal</last></author>
-      <author><first>Partha</first><last>Talukdar</last></author>
+      <author id="partha-talukdar"><first>Partha</first><last>Talukdar</last></author>
       <pages>60–69</pages>
       <abstract>Knowledge Graph (KG) Embedding methods have been widely studied in the past few years and many methods have been proposed. These methods represent entities and relations in the KG as vectors in a vector space, trained to distinguish correct edges from the incorrect ones. For this distinction, simple functions of vectors’ dimensions, called interactions, are used. These interactions are used to calculate the candidate tail entity vector which is matched against all entities in the KG. However, for most of the existing methods, these interactions are fixed and manually specified. In this work, we propose an automated framework for discovering the interactions while training the KG Embeddings. The proposed method learns relevant interactions along with other parameters during training, allowing it to adapt to different datasets. Many of the existing methods can be seen as special cases of the proposed framework. We demonstrate the effectiveness of the proposed method on link prediction task by extensive experiments on multiple benchmark datasets.</abstract>
       <url hash="2b7c809e">2020.icon-main.8</url>
@@ -102,7 +102,7 @@
       <author><first>.</first><last>Chandrahas</last></author>
       <author><first>Tathagata</first><last>Sengupta</last></author>
       <author><first>Cibi</first><last>Pragadeesh</last></author>
-      <author><first>Partha</first><last>Talukdar</last></author>
+      <author id="partha-talukdar"><first>Partha</first><last>Talukdar</last></author>
       <pages>70–75</pages>
       <abstract>We study the problem of inducing interpretability in Knowledge Graph (KG) embeddings. Learning KG embeddings has been an active area of research in the past few years, resulting in many different models. However, most of these methods do not address the interpretability (semantics) of individual dimensions of the learned embeddings. In this work, we study this problem and propose a method for inducing interpretability in KG embeddings using entity co-occurrence statistics. The proposed method significantly improves the interpretability, while maintaining comparable performance in other KG tasks.</abstract>
       <url hash="31cf7c3b">2020.icon-main.9</url>
@@ -111,7 +111,7 @@
     <paper id="10">
       <title>Solving Arithmetic Word Problems Using Transformer and Pre-processing of Problem Texts</title>
       <author><first>Kaden</first><last>Griffith</last></author>
-      <author><first>Jugal</first><last>Kalita</last></author>
+      <author id="jugal-kalita"><first>Jugal</first><last>Kalita</last></author>
       <pages>76–84</pages>
       <abstract>This paper outlines the use of Transformer networks trained to translate math word problems to equivalent arithmetic expressions in infix, prefix, and postfix notations. We compare results produced by a large number of neural configurations and find that most configurations outperform previously reported approaches on three of four datasets with significant increases in accuracy of over 20 percentage points. The best neural approaches boost accuracy by 30% on average when compared to the previous state-of-the-art.</abstract>
       <url hash="b9bb0f89">2020.icon-main.10</url>
@@ -140,7 +140,7 @@
       <author><first>Abhinav Reddy</first><last>Appidi</last></author>
       <author><first>Vamshi Krishna</first><last>Srirangam</last></author>
       <author><first>Darsi</first><last>Suhas</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>101–107</pages>
       <abstract>Part-of-Speech (POS) is one of the essential tasks for many Natural Language Processing (NLP) applications. There has been a significant amount of work done in POS tagging for resource-rich languages. POS tagging is an essential phase of text analysis in understanding the semantics and context of language. These tags are useful for higher-level tasks such as building parse trees, which can be used for Named Entity Recognition, Coreference resolution, Sentiment Analysis, and Question Answering. There has been work done on code-mixed social media corpus but not on POS tagging of Kannada-English code-mixed data. Here, we present Kannada-English code- mixed social media corpus annotated with corresponding POS tags. We also experimented with machine learning classification models CRF, Bi-LSTM, and Bi-LSTM-CRF models on our corpus.</abstract>
       <url hash="5b6be213">2020.icon-main.13</url>
@@ -218,7 +218,7 @@
       <title>A New Approach to Claim Check-Worthiness Prediction and Claim Verification</title>
       <author><first>Shukrity</first><last>Si</last></author>
       <author><first>Anisha</first><last>Datta</last></author>
-      <author><first>Sudip</first><last>Naskar</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip</first><last>Naskar</last></author>
       <pages>155–160</pages>
       <abstract>The more we are advancing towards a modern world, the more it opens the path to falsification in every aspect of life. Even in case of knowing the surrounding, common people can not judge the actual scenario as the promises, comments and opinions of the influential people at power keep changing every day. Therefore computationally determining the truthfulness of such claims and comments has a very important societal impact. This paper describes a unique method to extract check-worthy claims from the 2016 US presidential debates and verify the truthfulness of the check-worthy claims. We classify the claims for check-worthiness with our modified Tf-Idf model which is used in background training on fact-checking news articles (NBC News and Washington Post). We check the truthfulness of the claims by using POS, sentiment score and cosine similarity features.</abstract>
       <url hash="3c54f0ed">2020.icon-main.20</url>
@@ -227,7 +227,7 @@
     <paper id="21">
       <title>Improving Passage Re-Ranking with Word N-Gram Aware Coattention Encoder</title>
       <author><first>Chaitanya</first><last>Alaparthi</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>161–169</pages>
       <abstract>In text matching applications, coattentions have proved to be highly effective attention mechanisms. Coattention enables the learning to attend based on computing word level affinity scores between two texts. In this paper, we propose two improvements to coattention mechanism in the context of passage ranking (re-ranking). First, we extend the coattention mechanism by applying it across all word n-grams of query and passage. We show that these word n-gram coattentions can capture local context in query and passage to better judge the relevance between them. Second, we further improve the model performance by proposing a query based attention pooling on passage encodings. We evaluate these two methods on MSMARCO passage re-ranking task. The experiment results shows that these two methods resulted in a relative increase of 8.04% in Mean Reciprocal Rank @10 (MRR@10) compared to the naive coattention mechanism. At the time of writing this paper, our methods are the best non transformer model on MS MARCO passage re-ranking task and are competitive to BERT base while only having less than 10% of the parameters.</abstract>
       <url hash="a0cec884">2020.icon-main.21</url>
@@ -236,7 +236,7 @@
     <paper id="22">
       <title>Language Model Metrics and <fixed-case>P</fixed-case>rocrustes Analysis for Improved Vector Transformation of <fixed-case>NLP</fixed-case> Embeddings</title>
       <author><first>Thomas</first><last>Conley</last></author>
-      <author><first>Jugal</first><last>Kalita</last></author>
+      <author id="jugal-kalita"><first>Jugal</first><last>Kalita</last></author>
       <pages>170–174</pages>
       <abstract>Artificial Neural networks are mathematical models at their core. This truism presents some fundamental difficulty when networks are tasked with Natural Language Processing. A key problem lies in measuring the similarity or distance among vectors in NLP embedding space, since the mathematical concept of distance does not always agree with the linguistic concept. We suggest that the best way to measure linguistic distance among vectors is by employing the Language Model (LM) that created them. We introduce Language Model Distance (LMD) for measuring accuracy of vector transformations based on the Distributional Hypothesis ( LMD Accuracy ). We show the efficacy of this metric by applying it to a simple neural network learning the Procrustes algorithm for bilingual word mapping.</abstract>
       <url hash="0e68c2c1">2020.icon-main.22</url>
@@ -256,7 +256,7 @@
     <paper id="24">
       <title>Automated <fixed-case>A</fixed-case>rabic Essay Evaluation</title>
       <author><first>Abeer</first><last>Alqahtani</last></author>
-      <author><first>Amal</first><last>Alsaif</last></author>
+      <author id="amal-al-saif"><first>Amal</first><last>Alsaif</last></author>
       <pages>181–190</pages>
       <abstract>Although the manual evaluation of essays is a time-consuming process, writing essays has a significant role in assessing learning outcomes. Therefore, automated essay evaluation represents a solution, especially for schools, universities, and testing companies. Moreover, the existence of such systems overcomes some factors that influence manual evaluation such as the evaluator’s mental state, the disparity between evaluators, and others. In this paper, we propose an Arabic essay evaluation system based on a support vector regression (SVR) model along with a wide range of features including morphological, syntactic, semantic, and discourse features. The system evaluates essays according to five criteria: spelling, essay structure, coherence level, style, and punctuation marks, without the need for domain-representative essays (a model essay). A specific model is developed for each criterion; thus, the overall evaluation of the essay is a combination of the previous criteria results. We develop our dataset based on essays written by university students and journalists whose native language is Arabic. The dataset is then evaluated by experts. The experimental results show that 96% of our dataset is correctly evaluated in the overall score and the correlation between the system and the experts’ evaluation is 0.87. Additionally, the system shows variant results in evaluating criteria separately.</abstract>
       <url hash="0a365bfd">2020.icon-main.24</url>
@@ -298,7 +298,7 @@
       <title>Self-Supervised Claim Identification for Automated Fact Checking</title>
       <author><first>Archita</first><last>Pathak</last></author>
       <author><first>Mohammad Abuzar</first><last>Shaikh</last></author>
-      <author><first>Rohini</first><last>Srihari</last></author>
+      <author id="rohini-k-srihari"><first>Rohini</first><last>Srihari</last></author>
       <pages>213–227</pages>
       <abstract>We propose a novel, attention-based self-supervised approach to identify “claim-worthy” sentences in a fake news article, an important first step in automated fact-checking. We leverage <i>aboutness</i> of headline and content using attention mechanism for this task. The identified claims can be used for downstream task of claim verification for which we are releasing a benchmark dataset of manually selected compelling articles with veracity labels and associated evidence. This work goes beyond stylistic analysis to identifying content that influences reader belief. Experiments with three datasets show the strength of our model.</abstract>
       <url hash="cd5ce1ae">2020.icon-main.28</url>
@@ -329,7 +329,7 @@
       <title>Parsing <fixed-case>I</fixed-case>ndian <fixed-case>E</fixed-case>nglish News Headlines</title>
       <author><first>Samapika</first><last>Roy</last></author>
       <author><first>Sukhada</first><last>Sukhada</last></author>
-      <author><first>Anil</first><last>Kumar Singh</last></author>
+      <author id="anil-kumar-singh"><first>Anil</first><last>Kumar Singh</last></author>
       <pages>239–242</pages>
       <abstract>Parsing news Headlines is one of the difficult tasks of Natural Language Processing. It is mostly because news Headlines (NHs) are not complete grammatical sentences. News editors use all sorts of tricks to grab readers’ attention, for instance, unusual capitalization as in the headline’ Ear SHOT ashok rajagopalan’; some are world knowledge demanding like ‘Church reformation celebrated’ where the ‘Church reformation’ refers to a historical event and not a piece of news about an ordinary church. The lack of transparency in NHs can be linguistic, cultural, social, or contextual. The lack of space provided for a news headline has led to creative liberty. Though many works like news value extraction, summary generation, emotion classification of NHs have been going on, parsing them had been a tough challenge. Linguists have also been interested in NHs for creativity in the language used by bending traditional grammar rules. Researchers have conducted studies on news reportage, discourse analysis of NHs, and many more. While the creativity seen in NHs is fascinating for language researchers, it poses a computational challenge for Natural Language Processing researchers. This paper presents an outline of the ongoing doctoral research on the parsing of Indian English NHs. The ultimate aim of this research is to provide a module that will generate correctly parsed NHs. The intention is to enhance the broad applicability of newspaper corpus for future Natural Language Processing applications.</abstract>
       <url hash="2e89e26b">2020.icon-main.31</url>
@@ -347,7 +347,7 @@
     <paper id="33">
       <title>Sentimental Poetry Generation</title>
       <author><first>Kasper Aalberg</first><last>Røstvold</last></author>
-      <author><first>Björn</first><last>Gambäck</last></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gambäck</last></author>
       <pages>246–256</pages>
       <abstract>The paper investigates how well poetry can be generated to contain a specific sentiment, and whether readers of the poetry experience the intended sentiment. The poetry generator consists of a bi-directional Long Short-Term Memory (LSTM) model, combined with rhyme pair generation, rule-based word prediction methods, and tree search for extending generation possibilities. The LSTM network was trained on a set of English poetry written and published by users on a public website. Human judges evaluated poems generated by the system, both with a positive and negative sentiment. The results indicate that while there are some weaknesses in the system compared to other state-of-the-art solutions, it is fully capable of generating poetry with an inherent sentiment that is perceived by readers.</abstract>
       <url hash="19e2c85b">2020.icon-main.33</url>
@@ -356,7 +356,7 @@
     <paper id="34">
       <title><fixed-case>WEKA</fixed-case> in Forensic Authorship Analysis: A corpus-based approach of Saudi Authors</title>
       <author><first>Mashael</first><last>AlAmr</last></author>
-      <author><first>Eric</first><last>Atwell</last></author>
+      <author id="eric-atwell"><first>Eric</first><last>Atwell</last></author>
       <pages>257–260</pages>
       <abstract>This is a pilot study that aims to explore the potential of using WEKA in forensic authorship analysis. It is a corpus-based research using data from Twitter collected from thirteen authors from Riyadh, Saudi Arabia. It examines the performance of unbalanced and balanced data sets using different classifiers and parameters of word grams. The attributes are dialect-specific linguistic features categorized as word grams. The findings further support previous studies in computational authorship identification.</abstract>
       <url hash="8811664b">2020.icon-main.34</url>
@@ -365,7 +365,7 @@
     <paper id="35">
       <title>Native-Language Identification with Attention</title>
       <author><first>Stian</first><last>Steinbakken</last></author>
-      <author><first>Björn</first><last>Gambäck</last></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gambäck</last></author>
       <pages>261–271</pages>
       <abstract>The paper explores how an attention-based approach can increase performance on the task of native-language identification (NLI), i.e., to identify an author’s first language given information expressed in a second language. Previously, Support Vector Machines have consistently outperformed deep learning-based methods on the TOEFL11 data set, the de facto standard for evaluating NLI systems. The attention-based system BERT (Bidirectional Encoder Representations from Transformers) was first tested in isolation on the TOEFL11 data set, then used in a meta-classifier stack in combination with traditional techniques to produce an accuracy of 0.853. However, more labelled NLI data is now available, so BERT was also trained on the much larger Reddit-L2 data set, containing 50 times as many examples as previously used for English NLI, giving an accuracy of 0.902 on the Reddit-L2 in-domain test scenario, improving the state-of-the-art by 21.2 percentage points.</abstract>
       <url hash="eba43b9a">2020.icon-main.35</url>
@@ -517,7 +517,7 @@
     <paper id="50">
       <title>Weak Supervision using Linguistic Knowledge for Information Extraction</title>
       <author><first>Sachin</first><last>Pawar</last></author>
-      <author><first>Girish</first><last>Palshikar</last></author>
+      <author id="girish-palshikar"><first>Girish</first><last>Palshikar</last></author>
       <author><first>Ankita</first><last>Jain</last></author>
       <author><first>Jyoti</first><last>Bhat</last></author>
       <author><first>Simi</first><last>Johnson</last></author>
@@ -531,7 +531,7 @@
       <author><first>Parth</first><last>Patel</last></author>
       <author><first>Manthan</first><last>Mehta</last></author>
       <author><first>Pushpak</first><last>Bhattacharya</last></author>
-      <author><first>Arjun</first><last>Atreya</last></author>
+      <author id="arjun-atreya-v"><first>Arjun</first><last>Atreya</last></author>
       <pages>373–378</pages>
       <abstract>In this paper we present a novel transliteration technique based on Orthographic Syllable(OS) segmentation for low-resource Indian languages (ILs). Given that alignment has produced promising results in Statistical Machine Transliteration systems and phonology plays an important role in transliteration, we introduce a new model which uses alignment representation similar to that of IBM model 3 to pre-process the tokenized input sequence and then use pre-trained source and target OS-embeddings for training. We apply our model for transliteration from ILs to English and report our accuracy based on Top-1 Exact Match. We also compare our accuracy with a previously proposed Phrase-Based model and report improvements.</abstract>
       <url hash="64e2c482">2020.icon-main.51</url>
@@ -578,7 +578,7 @@
       <title>A Rule Based Lightweight <fixed-case>B</fixed-case>engali Stemmer</title>
       <author><first>Souvick</first><last>Das</last></author>
       <author><first>Rajat</first><last>Pandit</last></author>
-      <author><first>Sudip Kumar</first><last>Naskar</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last></author>
       <pages>400–408</pages>
       <abstract>In the field of Natural Language Processing (NLP) the process of stemming plays a significant role. Stemmer transforms an inflected word to its root form. Stemmer significantly increases the efficiency of Information Retrieval (IR) systems. It is a very basic yet fundamental text pre-processing task widely used in many NLP tasks. Several important works on stemming have been carried out by researchers in English and other major languages. In this paper, we study and review existing works on stemming in Bengali and other Indian languages. Finally, we propose a rule based approach that explores Bengali morphology and leverages WordNet to achieve better accuracy. Our algorithm produced stemming accuracy of 98.86% for Nouns and 99.75% for Verbs.</abstract>
       <url hash="51012610">2020.icon-main.55</url>
@@ -599,7 +599,7 @@
       <title>Deep Neural Model for <fixed-case>M</fixed-case>anipuri Multiword Named Entity Recognition with Unsupervised Cluster Feature</title>
       <author><first>Jimmy</first><last>Laishram</last></author>
       <author><first>Kishorjit</first><last>Nongmeikapam</last></author>
-      <author><first>Sudip</first><last>Naskar</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip</first><last>Naskar</last></author>
       <pages>420–429</pages>
       <abstract>The recognition task of Multi-Word Named Entities (MNEs) in itself is a challenging task when the language is inflectional and agglutinative. Having breakthrough NLP researches with deep neural network and language modelling techniques, the applicability of such techniques/algorithms for Indian language like Manipuri remains unanswered. In this paper an attempt to recognize Manipuri MNE is performed using a Long Short Term Memory (LSTM) recurrent neural network model in conjunction with Part Of Speech (POS) embeddings. To further improve the classification accuracy, word cluster information using K-means clustering approach is added as a feature embedding. The cluster information is generated using a Skip-gram based words vector that contains the semantic and syntactic information of each word. The model so proposed does not use extensive language morphological features to elevate its accuracy. Finally the model’s performance is compared with the other machine learning based Manipuri MNE models.</abstract>
       <url hash="b8dea91c">2020.icon-main.57</url>
@@ -686,7 +686,7 @@
     <paper id="65">
       <title>Developing a <fixed-case>F</fixed-case>aroese <fixed-case>P</fixed-case>o<fixed-case>S</fixed-case>-tagging solution using <fixed-case>I</fixed-case>celandic methods</title>
       <author><first>Hinrik</first><last>Hafsteinsson</last></author>
-      <author><first>Anton Karl</first><last>Ingason</last></author>
+      <author id="anton-karl-ingason"><first>Anton Karl</first><last>Ingason</last></author>
       <pages>481–490</pages>
       <abstract>We describe the development of a dedicated, high-accuracy part-of-speech (PoS) tagging solution for Faroese, a North Germanic language with about 50,000 speakers. To achieve this, a state-of-the-art neural PoS tagger for Icelandic, ABLTagger, was trained on a 100,000 word PoS-tagged corpus for Faroese, standardised with methods previously applied to Icelandic corpora. This tagger was supplemented with a novel Experimental Database of Faroese Inflection (EDFM), which contains morphological information on 67,488 Faroese words with about one million inflectional forms. This approach produced a PoS-tagging model for Faroese which achieves a 91.40% overall accuracy when evaluated with 10-fold cross validation, which is currently the highest reported accuracy for a dedicated Faroese PoS-tagger. The tagging model, morphological database, proposed revised PoS tagset for Faroese as well as a revised and standardised PoS tagged corpus are all presented as products of this project and are made available for use in further research in Faroese language technology</abstract>
       <url hash="aede1fc4">2020.icon-main.65</url>
@@ -705,12 +705,12 @@
   <volume id="techdofication" ingest-date="2021-06-27" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 17th International Conference on Natural Language Processing (ICON): TechDOfication 2020 Shared Task</booktitle>
-      <editor><first>Dipti Misra</first><last>Sharma</last></editor>
+      <editor id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></editor>
       <editor><first>Asif</first><last>Ekbal</last></editor>
       <editor><first>Karunesh</first><last>Arora</last></editor>
-      <editor><first>Sudip Kumar</first><last>Naskar</last></editor>
+      <editor id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last></editor>
       <editor><first>Dipankar</first><last>Ganguly</last></editor>
-      <editor><first>Sobha</first><last>L</last></editor>
+      <editor id="sobha-l"><first>Sobha</first><last>L</last></editor>
       <editor><first>Radhika</first><last>Mamidi</last></editor>
       <editor><first>Sunita</first><last>Arora</last></editor>
       <editor><first>Pruthwik</first><last>Mishra</last></editor>
@@ -817,12 +817,12 @@
   <volume id="termtraction" ingest-date="2021-06-27" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 17th International Conference on Natural Language Processing (ICON): TermTraction 2020 Shared Task</booktitle>
-      <editor><first>Dipti Misra</first><last>Sharma</last></editor>
+      <editor id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></editor>
       <editor><first>Asif</first><last>Ekbal</last></editor>
       <editor><first>Karunesh</first><last>Arora</last></editor>
-      <editor><first>Sudip Kumar</first><last>Naskar</last></editor>
+      <editor id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last></editor>
       <editor><first>Dipankar</first><last>Ganguly</last></editor>
-      <editor><first>Sobha</first><last>L</last></editor>
+      <editor id="sobha-l"><first>Sobha</first><last>L</last></editor>
       <editor><first>Radhika</first><last>Mamidi</last></editor>
       <editor><first>Sunita</first><last>Arora</last></editor>
       <editor><first>Pruthwik</first><last>Mishra</last></editor>
@@ -871,12 +871,12 @@
   <volume id="adapmt" ingest-date="2021-06-27" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 17th International Conference on Natural Language Processing (ICON): Adap-MT 2020 Shared Task</booktitle>
-      <editor><first>Dipti Misra</first><last>Sharma</last></editor>
+      <editor id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></editor>
       <editor><first>Asif</first><last>Ekbal</last></editor>
       <editor><first>Karunesh</first><last>Arora</last></editor>
-      <editor><first>Sudip Kumar</first><last>Naskar</last></editor>
+      <editor id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last></editor>
       <editor><first>Dipankar</first><last>Ganguly</last></editor>
-      <editor><first>Sobha</first><last>L</last></editor>
+      <editor id="sobha-l"><first>Sobha</first><last>L</last></editor>
       <editor><first>Radhika</first><last>Mamidi</last></editor>
       <editor><first>Sunita</first><last>Arora</last></editor>
       <editor><first>Pruthwik</first><last>Mishra</last></editor>
@@ -894,9 +894,9 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>JUNLP</fixed-case>@<fixed-case>ICON</fixed-case>2020: Low Resourced Machine Translation for Indic Languages</title>
-      <author><first>Sainik</first><last>Mahata</last></author>
+      <author id="sainik-mahata"><first>Sainik</first><last>Mahata</last></author>
       <author><first>Dipankar</first><last>Das</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>1–5</pages>
       <abstract>In the current work, we present the description of the systems submitted to a machine translation shared task organized by ICON 2020: 17th International Conference on Natural Language Processing. The systems were developed to show the capability of general domain machine translation when translating into Indic languages, English-Hindi, in our case. The paper shows the training process and quantifies the performance of two state-of-the-art translation systems, viz., Statistical Machine Translation and Neural Machine Translation. While Statistical Machine Translation systems work better in a low-resource setting, Neural Machine Translation systems are able to generate sentences that are fluent in nature. Since both these systems have contrasting advantages, a hybrid system, incorporating both, was also developed to leverage all the strong points. The submitted systems garnered BLEU scores of 8.701943312, 0.6361336198, and 11.78873307 respectively and the scores of the hybrid system helped us to the fourth spot in the competition leaderboard.</abstract>
       <url hash="d6850c93">2020.icon-adapmt.1</url>
@@ -1009,7 +1009,7 @@
       <title><fixed-case>U</fixed-case>rdu To <fixed-case>P</fixed-case>unjabi Machine Translation System</title>
       <author><first>Umrinder Pal</first><last>Singh</last></author>
       <author><first>Vishal</first><last>Goyal</last></author>
-      <author><first>Gurpreet</first><last>Lehal</last></author>
+      <author id="gurpreet-singh-lehal"><first>Gurpreet</first><last>Lehal</last></author>
       <pages>16–18</pages>
       <abstract>Machine Translation is a popular area of NLP research field. There are various approaches to develop a machine translation system like Rule-Based, Statistical, Neural and Hybrid. A rule-Based system is based on grammatical rules and uses bilingual lexicons. Statistical and Neural use the large parallel corpus for training the respective models. Where the Hybrid MT system is a mixture of different approaches. In these days the corpus-based machine translation system is quite popular in NLP research area. But these models demands huge parallel corpus. In this research, we have used a hybrid approach to develop Urdu to Punjabi machine translation system. In the developed system, statistical and various sub-system based on the linguistic rule has been used. The system yield 80% accuracy on a different set of the sentence related to domains like Political, Entertainment, Tourism, Sports and Health. The complete system has been developed in a C#.NET programming language.</abstract>
       <url hash="cdbc71aa">2020.icon-demos.6</url>
@@ -1173,7 +1173,7 @@
       <title><fixed-case>E</fixed-case>mp<fixed-case>L</fixed-case>ite: A Lightweight Sequence Labeling Model for Emphasis Selection of Short Texts</title>
       <author><first>Vibhav</first><last>Agarwal</last></author>
       <author><first>Sourav</first><last>Ghosh</last></author>
-      <author><first>Kranti</first><last>Chalamalasetti</last></author>
+      <author id="kranti-chalamalasetti"><first>Kranti</first><last>Chalamalasetti</last></author>
       <author><first>Bharath</first><last>Challa</last></author>
       <author><first>Sonal</first><last>Kumari</last></author>
       <author><last>Harshavardhana</last></author>
diff --git a/data/xml/2020.ijclclp.xml b/data/xml/2020.ijclclp.xml
index 16278b9125..d4ed837f5c 100644
--- a/data/xml/2020.ijclclp.xml
+++ b/data/xml/2020.ijclclp.xml
@@ -23,7 +23,7 @@
       <author><first>Hai-Lun</first><last>Tu</last></author>
       <author><first>Ching-Yu</first><last>Yang</last></author>
       <author><first>Chiao-Wen</first><last>Li</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <url hash="3f9bd01e">2020.ijclclp-1.1</url>
       <bibkey>chen-etal-2020-chinese</bibkey>
     </paper>
@@ -48,7 +48,7 @@
     <paper id="4">
       <title>Linguistic Input and Child Vocalization of 7 Children from 5 to 30 Months: A Longitudinal Study with <fixed-case>LENA</fixed-case> Automatic Analysis</title>
       <author><first>Chia-Cheng</first><last>Lee</last></author>
-      <author><first>Li-mei</first><last>Chen</last></author>
+      <author id="li-mei-chen"><first>Li-mei</first><last>Chen</last></author>
       <author><first>D. Kimbrough</first><last>Oller</last></author>
       <url hash="a0da7c86">2020.ijclclp-1.4</url>
       <bibkey>lee-etal-2020-linguistic</bibkey>
@@ -103,7 +103,7 @@
       <title>改善詞彙對齊以擷取片語翻譯之方法 (Improving Word Alignment for Extraction Phrasal Translation)</title>
       <author><first>Yi-Jyun</first><last>Chen</last></author>
       <author><first>Ching-Yu Helen</first><last>Yang</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <url hash="e7845159">2020.ijclclp-2.3</url>
       <language>zho</language>
       <bibkey>chen-etal-2020-gai</bibkey>
@@ -133,7 +133,7 @@
     <paper id="6">
       <title>基於深度聲學模型其狀態精確度最大化之強健語音特徵擷取的初步研究 (The Preliminary Study of Robust Speech Feature Extraction based on Maximizing the Accuracy of States in Deep Acoustic Models)</title>
       <author><first>Li-Chia</first><last>Chang</last></author>
-      <author><first>Jeih-weih</first><last>Hung</last></author>
+      <author id="jeih-weih-hung"><first>Jeih-weih</first><last>Hung</last></author>
       <url hash="43a1d5b6">2020.ijclclp-2.6</url>
       <language>zho</language>
       <bibkey>chang-hung-2020-ji</bibkey>
diff --git a/data/xml/2020.inlg.xml b/data/xml/2020.inlg.xml
index 62e6e0b56d..20a6dae2ec 100644
--- a/data/xml/2020.inlg.xml
+++ b/data/xml/2020.inlg.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the 13th International Conference on Natural Language Generation</booktitle>
       <editor><first>Brian</first><last>Davis</last></editor>
       <editor><first>Yvette</first><last>Graham</last></editor>
-      <editor><first>John</first><last>Kelleher</last></editor>
+      <editor id="john-kelleher"><first>John</first><last>Kelleher</last></editor>
       <editor><first>Yaji</first><last>Sripada</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Dublin, Ireland</address>
@@ -88,7 +88,7 @@
       <title>Studying the Impact of Filling Information Gaps on the Output Quality of Neural Data-to-Text</title>
       <author><first>Craig</first><last>Thomson</last></author>
       <author><first>Zhijie</first><last>Zhao</last></author>
-      <author><first>Somayajulu</first><last>Sripada</last></author>
+      <author id="somayajulu-sripada"><first>Somayajulu</first><last>Sripada</last></author>
       <pages>35–40</pages>
       <abstract>It is unfair to expect neural data-to-text to produce high quality output when there are gaps between system input data and information contained in the training text. Thomson et al. (2020) identify and narrow information gaps in Rotowire, a popular data-to-text dataset. In this paper, we describe a study which finds that a state-of-the-art neural data-to-text system produces higher quality output, according to the information extraction (IE) based metrics, when additional input data is carefully selected from this newly available source. It remains to be shown, however, whether IE metrics used in this study correlate well with humans in judging text quality.</abstract>
       <url hash="c62811bb">2020.inlg-1.6</url>
@@ -132,7 +132,7 @@
       <author><first>Chris</first><last>van der Lee</last></author>
       <author><first>Chris</first><last>Emmery</last></author>
       <author><first>Sander</first><last>Wubben</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <pages>68–79</pages>
       <abstract>This paper describes the CACAPO dataset, built for training both neural pipeline and end-to-end data-to-text language generation systems. The dataset is multilingual (Dutch and English), and contains almost 10,000 sentences from human-written news texts in the sports, weather, stocks, and incidents domain, together with aligned attribute-value paired data. The dataset is unique in that the linguistic variation and indirect ways of expressing data in these texts reflect the challenges of real world NLG tasks.</abstract>
       <url hash="6218b1e2">2020.inlg-1.10</url>
@@ -153,7 +153,7 @@
     <paper id="12">
       <title><fixed-case>S</fixed-case>imple<fixed-case>NLG</fixed-case>-<fixed-case>TI</fixed-case>: Adapting <fixed-case>S</fixed-case>imple<fixed-case>NLG</fixed-case> to <fixed-case>T</fixed-case>ibetan</title>
       <author><first>Zewang</first><last>Kuanzhuo</last></author>
-      <author id="li-lin"><first>Li</first><last>Lin</last></author>
+      <author><first>Li</first><last>Lin</last></author>
       <author><first>Zhao</first><last>Weina</last></author>
       <pages>86–90</pages>
       <abstract>Surface realisation is the last but not the least phase of Natural Language Generation, which aims to produce high-quality natural language text based on meaning representations. In this article, we present our work on SimpleNLG-TI, a Tibetan surface realiser, which follows the design paradigm of SimpleNLG-EN. SimpleNLG-TI is built up by our investigation of the core features of Tibetan morphology and syntax. Through this work, we provide a robust and flexible surface realiser for Tibetan generation systems.</abstract>
@@ -186,7 +186,7 @@
       <author><first>André Luiz</first><last>Rosa Teixeira</last></author>
       <author><first>João</first><last>Campos</last></author>
       <author><first>Rossana</first><last>Cunha</last></author>
-      <author><first>Thiago</first><last>Castro Ferreira</last></author>
+      <author id="thiago-castro-ferreira"><first>Thiago</first><last>Castro Ferreira</last></author>
       <author><first>Adriana</first><last>Pagano</last></author>
       <author><first>Fabio</first><last>Cozman</last></author>
       <pages>103–106</pages>
@@ -275,11 +275,11 @@
     </paper>
     <paper id="23">
       <title>Twenty Years of Confusion in Human Evaluation: <fixed-case>NLG</fixed-case> Needs Evaluation Sheets and Standardised Definitions</title>
-      <author><first>David M.</first><last>Howcroft</last></author>
-      <author><first>Anya</first><last>Belz</last></author>
-      <author><first>Miruna-Adriana</first><last>Clinciu</last></author>
+      <author id="david-m-howcroft"><first>David M.</first><last>Howcroft</last></author>
+      <author id="anja-belz"><first>Anya</first><last>Belz</last></author>
+      <author id="miruna-clinciu"><first>Miruna-Adriana</first><last>Clinciu</last></author>
       <author><first>Dimitra</first><last>Gkatzia</last></author>
-      <author><first>Sadid A.</first><last>Hasan</last></author>
+      <author id="sadid-a-hasan"><first>Sadid A.</first><last>Hasan</last></author>
       <author><first>Saad</first><last>Mahamood</last></author>
       <author><first>Simon</first><last>Mille</last></author>
       <author><first>Emiel</first><last>van Miltenburg</last></author>
@@ -293,9 +293,9 @@
     </paper>
     <paper id="24">
       <title>Disentangling the Properties of Human Evaluation Methods: A Classification System to Support Comparability, Meta-Evaluation and Reproducibility Testing</title>
-      <author><first>Anya</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anya</first><last>Belz</last></author>
       <author><first>Simon</first><last>Mille</last></author>
-      <author><first>David M.</first><last>Howcroft</last></author>
+      <author id="david-m-howcroft"><first>David M.</first><last>Howcroft</last></author>
       <pages>183–194</pages>
       <abstract>Current standards for designing and reporting human evaluations in NLP mean it is generally unclear which evaluations are comparable and can be expected to yield similar results when applied to the same system outputs. This has serious implications for reproducibility testing and meta-evaluation, in particular given that human evaluation is considered the gold standard against which the trustworthiness of automatic metrics is gauged. %and merging others, as well as deciding which evaluations should be able to reproduce each other’s results. Using examples from NLG, we propose a classification system for evaluations based on disentangling (i) what is being evaluated (which aspect of quality), and (ii) how it is evaluated in specific (a) evaluation modes and (b) experimental designs. We show that this approach provides a basis for determining comparability, hence for comparison of evaluations across papers, meta-evaluation experiments, reproducibility testing.</abstract>
       <url hash="08a92927">2020.inlg-1.24</url>
@@ -347,7 +347,7 @@
     </paper>
     <paper id="29">
       <title><fixed-case>R</fixed-case>epro<fixed-case>G</fixed-case>en: Proposal for a Shared Task on Reproducibility of Human Evaluations in <fixed-case>NLG</fixed-case></title>
-      <author><first>Anya</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anya</first><last>Belz</last></author>
       <author><first>Shubham</first><last>Agarwal</last></author>
       <author><first>Anastasia</first><last>Shimorina</last></author>
       <author><first>Ehud</first><last>Reiter</last></author>
@@ -375,7 +375,7 @@
       <title><fixed-case>BERT</fixed-case>-Based Simplification of <fixed-case>J</fixed-case>apanese Sentence-Ending Predicates in Descriptive Text</title>
       <author><first>Taichi</first><last>Kato</last></author>
       <author><first>Rei</first><last>Miyata</last></author>
-      <author><first>Satoshi</first><last>Sato</last></author>
+      <author id="satoshi-sato"><first>Satoshi</first><last>Sato</last></author>
       <pages>242–251</pages>
       <abstract>Japanese sentence-ending predicates intricately combine content words and functional elements, such as aspect, modality, and honorifics; this can often hinder the understanding of language learners and children. Conventional lexical simplification methods, which replace difficult target words with simpler synonyms acquired from lexical resources in a word-by-word manner, are not always suitable for the simplification of such Japanese predicates. Given this situation, we propose a BERT-based simplification method, the core feature of which is the high ability to substitute the whole predicates with simple ones while maintaining their core meanings in the context by utilizing pre-trained masked language models. Experimental results showed that our proposed methods consistently outperformed the conventional thesaurus-based method by a wide margin. Furthermore, we investigated in detail the effectiveness of the average token embedding and dropout, and the remaining errors of our BERT-based methods.</abstract>
       <url hash="356fbb4a">2020.inlg-1.31</url>
@@ -385,7 +385,7 @@
     <paper id="32">
       <title>Amplifying the Range of News Stories with Creativity: Methods and their Evaluation, in <fixed-case>P</fixed-case>ortuguese</title>
       <author><first>Rui</first><last>Mendes</last></author>
-      <author><first>Hugo</first><last>Gonçalo Oliveira</last></author>
+      <author id="hugo-goncalo-oliveira"><first>Hugo</first><last>Gonçalo Oliveira</last></author>
       <pages>252–262</pages>
       <abstract>Headlines are key for attracting people to a story, but writing appealing headlines requires time and talent. This work aims to automate the production of creative short texts (e.g., news headlines) for an input context (e.g., existing headlines), thus amplifying its range. Well-known expressions (e.g., proverbs, movie titles), which typically include word-play and resort to figurative language, are used as a starting point. Given an input text, they can be recommended by exploiting Semantic Textual Similarity (STS) techniques, or adapted towards higher relatedness. For the latter, three methods that exploit static word embeddings are proposed. Experimentation in Portuguese lead to some conclusions, based on human opinions: STS methods that look exclusively at the surface text, recommend more related expressions; resulting expressions are somewhat related to the input, but adaptation leads to higher relatedness and novelty; humour can be an indirect consequence, but most outputs are not funny.</abstract>
       <url hash="4d7477ff">2020.inlg-1.32</url>
@@ -395,7 +395,7 @@
     <paper id="33">
       <title>Lessons from Computational Modelling of Reference Production in <fixed-case>M</fixed-case>andarin and <fixed-case>E</fixed-case>nglish</title>
       <author><first>Guanyi</first><last>Chen</last></author>
-      <author><first>Kees</first><last>van Deemter</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
       <pages>263–272</pages>
       <abstract>Referring expression generation (REG) algorithms offer computational models of the production of referring expressions. In earlier work, a corpus of referring expressions (REs) in Mandarin was introduced. In the present paper, we annotate this corpus, evaluate classic REG algorithms on it, and compare the results with earlier results on the evaluation of REG for English referring expressions. Next, we offer an in-depth analysis of the corpus, focusing on issues that arise from the grammar of Mandarin. We discuss shortcomings of previous REG evaluations that came to light during our investigation and we highlight some surprising results. Perhaps most strikingly, we found a much higher proportion of under-specified expressions than previous studies had suggested, not just in Mandarin but in English as well.</abstract>
       <url hash="eef16ba7">2020.inlg-1.33</url>
@@ -423,7 +423,7 @@
       <author><first>Anjali</first><last>Narayan-Chen</last></author>
       <author><first>Tagyoung</first><last>Chung</last></author>
       <author><first>Anushree</first><last>Venkatesh</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></author>
       <pages>283–295</pages>
       <abstract>Neural network based approaches to data-to-text natural language generation (NLG) have gained popularity in recent years, with the goal of generating a natural language prompt that accurately realizes an input meaning representation. To facilitate the training of neural network models, researchers created large datasets of paired utterances and their meaning representations. However, the creation of such datasets is an arduous task and they mostly consist of simple meaning representations composed of slot and value tokens to be realized. These representations do not include any contextual information that an NLG system can use when trying to generalize, such as domain information and descriptions of slots and values. In this paper, we present the novel task of Schema-Guided Natural Language Generation (SG-NLG). Here, the goal is still to generate a natural language prompt, but in SG-NLG, the input MRs are paired with rich schemata providing contextual information. To generate a dataset for SG-NLG we re-purpose an existing dataset for another task: dialog state tracking, which includes a large and rich schema spanning multiple different attributes, including information about the domain, user intent, and slot descriptions. We train different state-of-the-art models for neural natural language generation on this dataset and show that in many cases, including rich schema information allows our models to produce higher quality outputs both in terms of semantics and diversity. We also conduct experiments comparing model performance on seen versus unseen domains, and present a human evaluation demonstrating high ratings for overall output quality.</abstract>
       <url hash="ab06674d">2020.inlg-1.35</url>
@@ -445,7 +445,7 @@
       <author><first>Aleksandre</first><last>Maskharashvili</last></author>
       <author><first>Amy</first><last>Isard</last></author>
       <author><first>Xintong</first><last>Li</last></author>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <pages>306–315</pages>
       <abstract>While classic NLG systems typically made use of hierarchically structured content plans that included discourse relations as central components, more recent neural approaches have mostly mapped simple, flat inputs to texts without representing discourse relations explicitly. In this paper, we investigate whether it is beneficial to include discourse relations in the input to neural data-to-text generators for texts where discourse relations play an important role. To do so, we reimplement the sentence planning and realization components of a classic NLG system, Methodius, using LSTM sequence-to-sequence (seq2seq) models. We find that although seq2seq models can learn to generate fluent and grammatical texts remarkably well with sufficiently representative Methodius training data, they cannot learn to correctly express Methodius’s similarity and contrast comparisons unless the corresponding RST relations are included in the inputs. Additionally, we experiment with using self-training and reverse model reranking to better handle train/test data mismatches, and find that while these methods help reduce content errors, it remains essential to include discourse relations in the input to obtain optimal performance.</abstract>
       <url hash="59008881">2020.inlg-1.37</url>
@@ -456,10 +456,10 @@
     <paper id="38">
       <title>From “Before” to “After”: Generating Natural Language Instructions from Image Pairs in a Simple Visual Domain</title>
       <author><first>Robin</first><last>Rojowiec</last></author>
-      <author><first>Jana</first><last>Götze</last></author>
+      <author id="jana-gotze"><first>Jana</first><last>Götze</last></author>
       <author><first>Philipp</first><last>Sadler</last></author>
       <author><first>Henrik</first><last>Voigt</last></author>
-      <author><first>Sina</first><last>Zarrieß</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
       <author><first>David</first><last>Schlangen</last></author>
       <pages>316–326</pages>
       <abstract>While certain types of instructions can be com-pactly expressed via images, there are situations where one might want to verbalise them, for example when directing someone. We investigate the task of Instruction Generation from Before/After Image Pairs which is to derive from images an instruction for effecting the implied change. For this, we make use of prior work on instruction following in a visual environment. We take an existing dataset, the BLOCKS data collected by Bisk et al. (2016) and investigate whether it is suitable for training an instruction generator as well. We find that it is, and investigate several simple baselines, taking these from the related task of image captioning. Through a series of experiments that simplify the task (by making image processing easier or completely side-stepping it; and by creating template-based targeted instructions), we investigate areas for improvement. We find that captioning models get some way towards solving the task, but have some difficulty with it, and future improvements must lie in the way the change is detected in the instruction.</abstract>
@@ -508,7 +508,7 @@
     </paper>
     <paper id="42">
       <title>Rapformer: Conditional Rap Lyrics Generation with Denoising Autoencoders</title>
-      <author><first>Nikola I.</first><last>Nikolov</last></author>
+      <author id="nikola-i-nikolov"><first>Nikola I.</first><last>Nikolov</last></author>
       <author><first>Eric</first><last>Malmi</last></author>
       <author><first>Curtis</first><last>Northcutt</last></author>
       <author><first>Loreto</first><last>Parisi</last></author>
@@ -549,11 +549,11 @@
       <title>Gradations of Error Severity in Automatic Image Descriptions</title>
       <author><first>Emiel</first><last>van Miltenburg</last></author>
       <author><first>Wei-Ting</first><last>Lu</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <author><first>Albert</first><last>Gatt</last></author>
       <author><first>Guanyi</first><last>Chen</last></author>
       <author><first>Lin</first><last>Li</last></author>
-      <author><first>Kees</first><last>van Deemter</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
       <pages>398–411</pages>
       <abstract>Earlier research has shown that evaluation metrics based on textual similarity (e.g., BLEU, CIDEr, Meteor) do not correlate well with human evaluation scores for automatically generated text. We carried out an experiment with Chinese speakers, where we systematically manipulated image descriptions to contain different kinds of errors. Because our manipulated descriptions form minimal pairs with the reference descriptions, we are able to assess the impact of different kinds of errors on the perceived quality of the descriptions. Our results show that different kinds of errors elicit significantly different evaluation scores, even though all erroneous descriptions differ in only one character from the reference descriptions. Evaluation metrics based solely on textual similarity are unable to capture these differences, which (at least partially) explains their poor correlation with human judgments. Our work provides the foundations for future work, where we aim to understand why different errors are seen as more or less severe.</abstract>
       <url hash="60a0c14e">2020.inlg-1.45</url>
@@ -568,7 +568,7 @@
       <author><first>Seokhwan</first><last>Kim</last></author>
       <author id="yang-liu-icsi"><first>Yang</first><last>Liu</last></author>
       <author><first>Mihail</first><last>Eric</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></author>
       <pages>412–421</pages>
       <abstract>Open-domain dialog systems aim to generate relevant, informative and engaging responses. In this paper, we propose using a dialog policy to plan the content and style of target, open domain responses in the form of an action plan, which includes knowledge sentences related to the dialog context, targeted dialog acts, topic information, etc. For training, the attributes within the action plan are obtained by automatically annotating the publicly released Topical-Chat dataset. We condition neural response generators on the action plan which is then realized as target utterances at the turn and sentence levels. We also investigate different dialog policy models to predict an action plan given the dialog context. Through automated and human evaluation, we measure the appropriateness of the generated responses and check if the generation models indeed learn to realize the given action plans. We demonstrate that a basic dialog policy that operates at the sentence level generates better responses in comparison to turn level generation as well as baseline models with no action plan. Additionally the basic dialog policy has the added benefit of controllability.</abstract>
       <url hash="009b2e1d">2020.inlg-1.46</url>
diff --git a/data/xml/2020.insights.xml b/data/xml/2020.insights.xml
index 5b5c922b7b..8726f2f0a0 100644
--- a/data/xml/2020.insights.xml
+++ b/data/xml/2020.insights.xml
@@ -44,7 +44,7 @@
     <paper id="3">
       <title>How Far Can We Go with Data Selection? A Case Study on Semantic Sequence Tagging Tasks</title>
       <author><first>Samuel</first><last>Louvan</last></author>
-      <author><first>Bernardo</first><last>Magnini</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
       <pages>15–21</pages>
       <abstract>Although several works have addressed the role of data selection to improve transfer learning for various NLP tasks, there is no consensus about its real benefits and, more generally, there is a lack of shared practices on how it can be best applied. We propose a systematic approach aimed at evaluating data selection in scenarios of increasing complexity. Specifically, we compare the case in which source and target tasks are the same while source and target domains are different, against the more challenging scenario where both tasks and domains are different. We run a number of experiments on semantic sequence tagging tasks, which are relatively less investigated in data selection, and conclude that data selection has more benefit on the scenario when the tasks are the same, while in case of different (although related) tasks from distant domains, a combination of data selection and multi-task learning is ineffective for most cases.</abstract>
       <url hash="f42a715d">2020.insights-1.3</url>
@@ -174,7 +174,7 @@
       <title>Counterfactually-Augmented <fixed-case>SNLI</fixed-case> Training Data Does Not Yield Better Generalization Than Unaugmented Data</title>
       <author><first>William</first><last>Huang</last></author>
       <author><first>Haokun</first><last>Liu</last></author>
-      <author><first>Samuel R.</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel R.</first><last>Bowman</last></author>
       <pages>82–87</pages>
       <abstract>A growing body of work shows that models exploit annotation artifacts to achieve state-of-the-art performance on standard crowdsourced benchmarks—datasets collected from crowdworkers to create an evaluation task—while still failing on out-of-domain examples for the same task. Recent work has explored the use of counterfactually-augmented data—data built by minimally editing a set of seed examples to yield counterfactual labels—to augment training data associated with these benchmarks and build more robust classifiers that generalize better. However, Khashabi et al. (2020) find that this type of augmentation yields little benefit on reading comprehension tasks when controlling for dataset size and cost of collection. We build upon this work by using English natural language inference data to test model generalization and robustness and find that models trained on a counterfactually-augmented SNLI dataset do not generalize better than unaugmented datasets of similar size and that counterfactual augmentation can hurt performance, yielding models that are less robust to challenge examples. Counterfactual augmentation of natural language understanding data through standard crowdsourcing techniques does not appear to be an effective way of collecting training data and further innovation is required to make this general line of work viable.</abstract>
       <url hash="959b2e63">2020.insights-1.13</url>
diff --git a/data/xml/2020.intellang.xml b/data/xml/2020.intellang.xml
index 842ef3bca1..fdd36567b1 100644
--- a/data/xml/2020.intellang.xml
+++ b/data/xml/2020.intellang.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2021-05-12" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Workshop on Intelligent Information Processing and Natural Language Generation</booktitle>
-      <editor><first>Daniel</first><last>Sánchez</last></editor>
+      <editor id="daniel-sanchez-cisneros"><first>Daniel</first><last>Sánchez</last></editor>
       <editor><first>Raquel</first><last>Hervás</last></editor>
       <editor><first>Albert</first><last>Gatt</last></editor>
       <publisher>Association for Computational Lingustics</publisher>
@@ -51,7 +51,7 @@
       <title><fixed-case>S</fixed-case>port<fixed-case>S</fixed-case>ett:Basketball - A robust and maintainable data-set for Natural Language Generation</title>
       <author><first>Craig</first><last>Thomson</last></author>
       <author><first>Ehud</first><last>Reiter</last></author>
-      <author><first>Somayajulu</first><last>Sripada</last></author>
+      <author id="somayajulu-sripada"><first>Somayajulu</first><last>Sripada</last></author>
       <pages>32–40</pages>
       <url hash="12d3821d">2020.intellang-1.4</url>
       <bibkey>thomson-etal-2020-sportsett</bibkey>
@@ -89,7 +89,7 @@
     </paper>
     <paper id="8">
       <title>Fuzzy Logic for Vagueness Management in Referring Expression Generation</title>
-      <author><first>Nicolás</first><last>Marín</last></author>
+      <author id="nicolas-marin"><first>Nicolás</first><last>Marín</last></author>
       <author><first>Gustavo</first><last>Rivas-Gervilla</last></author>
       <author><first>Daniel</first><last>Sánchez</last></author>
       <pages>71–76</pages>
diff --git a/data/xml/2020.intexsempar.xml b/data/xml/2020.intexsempar.xml
index 3dc9bfb634..3058ac9784 100644
--- a/data/xml/2020.intexsempar.xml
+++ b/data/xml/2020.intexsempar.xml
@@ -5,8 +5,8 @@
       <booktitle>Proceedings of the First Workshop on Interactive and Executable Semantic Parsing</booktitle>
       <editor><first>Ben</first><last>Bogin</last></editor>
       <editor><first>Srinivasan</first><last>Iyer</last></editor>
-      <editor><first>Xi Victoria</first><last>Lin</last></editor>
-      <editor><first>Dragomir</first><last>Radev</last></editor>
+      <editor id="xi-victoria-lin"><first>Xi Victoria</first><last>Lin</last></editor>
+      <editor id="dragomir-radev"><first>Dragomir</first><last>Radev</last></editor>
       <editor><first>Alane</first><last>Suhr</last></editor>
       <editor><first/><last>Panupong</last></editor>
       <editor><first>Caiming</first><last>Xiong</last></editor>
diff --git a/data/xml/2020.isa.xml b/data/xml/2020.isa.xml
index ae789a2815..fb32cc8755 100644
--- a/data/xml/2020.isa.xml
+++ b/data/xml/2020.isa.xml
@@ -3,7 +3,7 @@
   <volume id="1" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 16th Joint ACL-ISO Workshop on Interoperable Semantic Annotation</booktitle>
-      <editor><first>Harry</first><last>Bunt</last></editor>
+      <editor id="harry-bunt"><first>Harry</first><last>Bunt</last></editor>
       <publisher>European Language Resources Association</publisher>
       <address>Marseille</address>
       <month>May</month>
@@ -26,11 +26,11 @@
     </paper>
     <paper id="2">
       <title><fixed-case>H</fixed-case>indi <fixed-case>T</fixed-case>ime<fixed-case>B</fixed-case>ank: An <fixed-case>ISO</fixed-case>-<fixed-case>T</fixed-case>ime<fixed-case>ML</fixed-case> Annotated Reference Corpus</title>
-      <author id="pranav-goel"><first>Pranav</first><last>Goel</last></author>
+      <author><first>Pranav</first><last>Goel</last></author>
       <author><first>Suhan</first><last>Prabhu</last></author>
       <author><first>Alok</first><last>Debnath</last></author>
       <author><first>Priyank</first><last>Modi</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>13–21</pages>
       <abstract>ISO-TimeML is an international standard for multilingual event annotation, detection, categorization and linking. In this paper, we present the Hindi TimeBank, an ISO-TimeML annotated reference corpus for the detection and classification of events, states and time expressions, and the links between them. Based on contemporary developments in Hindi event recognition, we propose language independent and language-specific deviations from the ISO-TimeML guidelines, but preserve the schema. These deviations include the inclusion of annotator confidence, and an independent mechanism of identifying and annotating states such as copulars and existentials) With this paper, we present an open-source corpus, the Hindi TimeBank. The Hindi TimeBank is a 1,000 article dataset, with over 25,000 events, 3,500 states and 2,000 time expressions. We analyze the dataset in detail and provide a class-wise distribution of events, states and time expressions. Our guidelines and dataset are backed by high average inter-annotator agreement scores.</abstract>
       <url hash="04f6f201">2020.isa-1.2</url>
@@ -81,7 +81,7 @@
       <title>A Consolidated Dataset for Knowledge-based Question Generation using Predicate Mapping of Linked Data</title>
       <author><first>Johanna</first><last>Melly</last></author>
       <author><first>Gabriel</first><last>Luthier</last></author>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <pages>59–66</pages>
       <abstract>In this paper, we present the ForwardQuestions data set, made of human-generated questions related to knowledge triples. This data set results from the conversion and merger of the existing SimpleDBPediaQA and SimpleQuestionsWikidata data sets, including the mapping of predicates from DBPedia to Wikidata, and the selection of ‘forward’ questions as opposed to ‘backward’ ones. The new data set can be used to generate novel questions given an unseen Wikidata triple, by replacing the subjects of existing questions with the new one and then selecting the best candidate questions using semantic and syntactic criteria. Evaluation results indicate that the question generation method using ForwardQuestions improves the quality of questions by about 20% with respect to a baseline not using ranking criteria.</abstract>
       <url hash="2d5f334e">2020.isa-1.7</url>
@@ -114,7 +114,7 @@
       <author><first>Ujwal</first><last>Narayan</last></author>
       <author><first>Alok</first><last>Debnath</last></author>
       <author><first>Sumukh</first><last>S</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>88–93</pages>
       <abstract>In this paper, we provide the basic guidelines towards the detection and linguistic analysis of events in Kannada. Kannada is a morphologically rich, resource poor Dravidian language spoken in southern India. As most information retrieval and extraction tasks are resource intensive, very little work has been done on Kannada NLP, with almost no efforts in discourse analysis and dataset creation for representing events or other semantic annotations in the text. In this paper, we linguistically analyze what constitutes an event in this language, the challenges faced with discourse level annotation and representation due to the rich derivational morphology of the language that allows free word order, numerous multi-word expressions, adverbial participle constructions and constraints on subject-verb relations. Therefore, this paper is one of the first attempts at a large scale discourse level annotation for Kannada, which can be used for semantic annotation and corpus development for other tasks in the language.</abstract>
       <url hash="0e178f83">2020.isa-1.10</url>
diff --git a/data/xml/2020.iwclul.xml b/data/xml/2020.iwclul.xml
index c165a6d1ce..02ba3cd064 100644
--- a/data/xml/2020.iwclul.xml
+++ b/data/xml/2020.iwclul.xml
@@ -3,8 +3,8 @@
   <volume id="1" ingest-date="2020-03-30" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Sixth International Workshop on Computational Linguistics of <fixed-case>U</fixed-case>ralic Languages</booktitle>
-      <editor><first>Tommi A</first><last>Pirinen</last></editor>
-      <editor><first>Francis M.</first><last>Tyers</last></editor>
+      <editor id="tommi-a-pirinen"><first>Tommi A</first><last>Pirinen</last></editor>
+      <editor id="francis-tyers"><first>Francis M.</first><last>Tyers</last></editor>
       <editor><first>Michael</first><last>Rießler</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Wien, Austria</address>
diff --git a/data/xml/2020.iwdp.xml b/data/xml/2020.iwdp.xml
index 276d0683b8..354054aff5 100644
--- a/data/xml/2020.iwdp.xml
+++ b/data/xml/2020.iwdp.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the Second International Workshop of Discourse Processing</booktitle>
       <editor><first>Qun</first><last>Liu</last></editor>
-      <editor><first>Deyi</first><last>Xiong</last></editor>
+      <editor id="deyi-xiong"><first>Deyi</first><last>Xiong</last></editor>
       <editor><first>Shili</first><last>Ge</last></editor>
       <editor><first>Xiaojun</first><last>Zhang</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -65,7 +65,7 @@
       <author><first>Kaiyu</first><last>Huang</last></author>
       <author><first>Junpeng</first><last>Liu</last></author>
       <author><first>Jingxiang</first><last>Cao</last></author>
-      <author><first>Degen</first><last>Huang</last></author>
+      <author id="degen-huang"><first>Degen</first><last>Huang</last></author>
       <pages>22–28</pages>
       <abstract>Previous neural approaches achieve significant progress for Chinese word segmentation (CWS) as a sentence-level task, but it suffers from limitations on real-world scenario. In this paper, we address this issue with a context-aware method and optimize the solution at document-level. This paper proposes a three-step strategy to improve the performance for discourse CWS. First, the method utilizes an auxiliary segmenter to remedy the limitation on pre-segmenter. Then the context-aware algorithm computes the confidence of each split. The maximum probability path is reconstructed via this algorithm. Besides, in order to evaluate the performance in discourse, we build a new benchmark consisting of the latest news and Chinese medical articles. Extensive experiments on this benchmark show that our proposed method achieves a competitive performance on a document-level real-world scenario for CWS.</abstract>
       <url hash="061c4146">2020.iwdp-1.5</url>
@@ -99,7 +99,7 @@
     </paper>
     <paper id="8">
       <title>Bridging Question Answering and Discourse The case of Multi-Sentence Questions</title>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <pages>48</pages>
       <abstract>In human question-answering (QA), questions are often expressed in the form of multiple sentences. One can see this in both spoken QA interactions, when one person asks a question of another, and written QA, such as are found on-line in FAQs and in what are called ”Community Question-Answering Forums”. Computer-based QA has taken the challenge of these ”multi-sentence questions” to be that of breaking them into an appropriately ordered sequence of separate questions, with both the previous questions and their answers serving as context for the next question. This can be seen, for example, in two recent workshops at AAAI called ”Reasoning for Complex QA” [<url>https://rcqa-ws.github.io/program/</url>]. We claim that, while appropriate for some types of ”multi-sentence questions” (MSQs), it is not appropriate for all, because they are essentially different types of discourse. To support this claim, we need to provide evidence that: • different types of MSQs are answered differently in written or spoken QA between people; • people can (and do) distinguish these different types of MSQs; • systems can be made to both distinguish different types of MSQs and provide appropriate answers.</abstract>
       <url hash="0c0d8441">2020.iwdp-1.8</url>
diff --git a/data/xml/2020.iwltp.xml b/data/xml/2020.iwltp.xml
index 256c7b7d67..6ca9fc54ba 100644
--- a/data/xml/2020.iwltp.xml
+++ b/data/xml/2020.iwltp.xml
@@ -4,11 +4,11 @@
     <meta>
       <booktitle>Proceedings of the 1st International Workshop on Language Technology Platforms</booktitle>
       <editor><first>Georg</first><last>Rehm</last></editor>
-      <editor><first>Kalina</first><last>Bontcheva</last></editor>
-      <editor><first>Khalid</first><last>Choukri</last></editor>
-      <editor><first>Jan</first><last>Hajič</last></editor>
-      <editor><first>Stelios</first><last>Piperidis</last></editor>
-      <editor><first>Andrejs</first><last>Vasiļjevs</last></editor>
+      <editor id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></editor>
+      <editor id="khalid-choukri"><first>Khalid</first><last>Choukri</last></editor>
+      <editor id="jan-hajic"><first>Jan</first><last>Hajič</last></editor>
+      <editor id="stelios-piperidis"><first>Stelios</first><last>Piperidis</last></editor>
+      <editor id="andrejs-vasiljevs"><first>Andrejs</first><last>Vasiļjevs</last></editor>
       <publisher>European Language Resources Association</publisher>
       <address>Marseille, France</address>
       <month>May</month>
@@ -22,11 +22,11 @@
     </frontmatter>
     <paper id="1">
       <title>Infrastructure for the Science and Technology of Language <fixed-case>PORTULAN</fixed-case> <fixed-case>CLARIN</fixed-case></title>
-      <author><first>António</first><last>Branco</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
       <author><first>Amália</first><last>Mendes</last></author>
       <author><first>Paulo</first><last>Quaresma</last></author>
       <author><first>Luís</first><last>Gomes</last></author>
-      <author><first>João</first><last>Silva</last></author>
+      <author id="joao-silva"><first>João</first><last>Silva</last></author>
       <author><first>Andrea</first><last>Teixeira</last></author>
       <pages>1–7</pages>
       <abstract>This paper presents the PORTULAN CLARIN Research Infrastructure for the Science and Technology of Language, which is part of the European research infrastructure CLARIN ERIC as its Portuguese national node, and belongs to the Portuguese National Roadmap of Research Infrastructures of Strategic Relevance. It encompasses a repository, where resources and metadata are deposited for long-term archiving and access, and a workbench, where Language Technology tools and applications are made available through different modes of interaction, among many other services. It is an asset of utmost importance for the technological development of natural languages and for their preparation for the digital age, contributing to ensure the citizenship of their speakers in the information society.</abstract>
@@ -40,7 +40,7 @@
       <author><first>Bettina</first><last>Klimek</last></author>
       <author><first>Christian</first><last>Fäth</last></author>
       <author><first>Thierry</first><last>Declerck</last></author>
-      <author><first>John Philip</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John Philip</first><last>McCrae</last></author>
       <pages>8–15</pages>
       <abstract>In this paper we describe the current state of development of the Linguistic Linked Open Data (LLOD) infrastructure, an LOD(sub-)cloud of linguistic resources, which covers various linguistic data bases, lexicons, corpora, terminology and metadata repositories. We give in some details an overview of the contributions made by the European H2020 projects “Prêt-à-LLOD” (‘Ready-to-useMultilingual Linked Language Data for Knowledge Services across Sectors’) and “ELEXIS” (‘European Lexicographic Infrastructure’) to the further development of the LLOD.</abstract>
       <url hash="f73bc3ae">2020.iwltp-1.2</url>
@@ -66,10 +66,10 @@
       <author><first>Ionuț</first><last>Pistol</last></author>
       <author><first>Șerban</first><last>Boghiu</last></author>
       <author><first>Anca-Diana</first><last>Bibiri</last></author>
-      <author><first>Daniela</first><last>Gîfu</last></author>
+      <author id="daniela-gifu"><first>Daniela</first><last>Gîfu</last></author>
       <author><first>Andrei</first><last>Scutelnicu</last></author>
-      <author><first>Mihaela</first><last>Onofrei</last></author>
-      <author><first>Diana</first><last>Trandabăț</last></author>
+      <author id="mihaela-plamada-onofrei"><first>Mihaela</first><last>Onofrei</last></author>
+      <author id="diana-trandabat"><first>Diana</first><last>Trandabăț</last></author>
       <author><first>George</first><last>Bugeag</last></author>
       <pages>22–27</pages>
       <abstract>This paper describes the on-going work carried out within the CoBiLiRo (Bimodal Corpus for Romanian Language) research project, part of ReTeRom (Resources and Technologies for Developing Human-Machine Interfaces in Romanian). Data annotation finds increasing use in speech recognition and synthesis with the goal to support learning processes. In this context, a variety of different annotation systems for application to Speech and Text Processing environments have been presented. Even if many designs for the data annotations workflow have emerged, the process of handling metadata, to manage complex user-defined annotations, is not covered enough. We propose a design of the format aimed to serve as an annotation standard for bimodal resources, which facilitates searching, editing and statistical analysis operations over it. The design and implementation of an infrastructure that houses the resources are also presented. The goal is widening the dissemination of bimodal corpora for research valorisation and use in applications. Also, this study reports on the main operations of the web Platform which hosts the corpus and the automatic conversion flows that brings the submitted files at the format accepted by the Platform.</abstract>
@@ -83,12 +83,12 @@
       <author><first>Franciska</first><last>de Jong</last></author>
       <author><first>Alexander</first><last>König</last></author>
       <author><first>Darja</first><last>Fišer</last></author>
-      <author><first>Dieter</first><last>Van Uytvanck</last></author>
+      <author id="dieter-van-uytvanck"><first>Dieter</first><last>Van Uytvanck</last></author>
       <author><first>Tero</first><last>Aalto</last></author>
       <author><first>Lars</first><last>Borin</last></author>
       <author><first>Olga</first><last>Gerassimenko</last></author>
       <author><first>Jan</first><last>Hajic</last></author>
-      <author><first>Henk</first><last>van den Heuvel</last></author>
+      <author id="henk-van-den-heuvel"><first>Henk</first><last>van den Heuvel</last></author>
       <author><first>Neeme</first><last>Kahusk</last></author>
       <author><first>Krista</first><last>Liin</last></author>
       <author><first>Martin</first><last>Matthiesen</last></author>
@@ -105,7 +105,7 @@
       <author><first>Thierry</first><last>Etchegoyhen</last></author>
       <author><first>Borja</first><last>Anza Porras</last></author>
       <author><first>Andoni</first><last>Azpeitia</last></author>
-      <author><first>Eva</first><last>Martínez Garcia</last></author>
+      <author id="eva-martinez-garcia"><first>Eva</first><last>Martínez Garcia</last></author>
       <author><first>José Luis</first><last>Fonseca</last></author>
       <author><first>Patricia</first><last>Fonseca</last></author>
       <author><first>Paulo</first><last>Vale</last></author>
@@ -121,7 +121,7 @@
       <author><first>Rui</first><last>Neto</last></author>
       <author><first>Maite</first><last>Melero</last></author>
       <author><first>David</first><last>Perez</last></author>
-      <author><first>António</first><last>Branco</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
       <author><first>Ruben</first><last>Branco</last></author>
       <author><first>Luís</first><last>Gomes</last></author>
       <pages>35–43</pages>
@@ -137,18 +137,18 @@
       <author><first>Ivan</first><last>Simonini</last></author>
       <author><first>Armin</first><last>Schweinfurth</last></author>
       <author><first>Adelheid</first><last>Glott</last></author>
-      <author><first>Sebastian</first><last>Stüker</last></author>
+      <author id="sebastian-stuker"><first>Sebastian</first><last>Stüker</last></author>
       <author><first>Thai-Son</first><last>Nguyen</last></author>
       <author><first>Felix</first><last>Schneider</last></author>
       <author><first>Thanh-Le</first><last>Ha</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <author><first>Barry</first><last>Haddow</last></author>
       <author><first>Philip</first><last>Williams</last></author>
       <author><first>Rico</first><last>Sennrich</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <author><first>Sangeet</first><last>Sagar</last></author>
       <author><first>Dominik</first><last>Macháček</last></author>
-      <author><first>Otakar</first><last>Smrž</last></author>
+      <author id="otakar-smrz"><first>Otakar</first><last>Smrž</last></author>
       <pages>44–49</pages>
       <abstract>This paper presents our progress towards deploying a versatile communication platform in the task of highly multilingual live speech translation for conferences and remote meetings live subtitling. The platform has been designed with a focus on very low latency and high flexibility while allowing research prototypes of speech and text processing tools to be easily connected, regardless of where they physically run. We outline our architecture solution and also briefly compare it with the ELG platform. Technical details are provided on the most important components and we summarize the test deployment events we ran so far.</abstract>
       <url hash="caf2ba11">2020.iwltp-1.7</url>
@@ -157,7 +157,7 @@
     </paper>
     <paper id="8">
       <title><fixed-case>E</fixed-case>co.pangeamt: Industrializing Neural <fixed-case>MT</fixed-case></title>
-      <author><first>Mercedes</first><last>García-Martínez</last></author>
+      <author id="mercedes-garcia-martinez"><first>Mercedes</first><last>García-Martínez</last></author>
       <author><first>Manuel</first><last>Herranz</last></author>
       <author><first>Amando</first><last>Estela</last></author>
       <author><first>Ángela</first><last>Franco</last></author>
@@ -180,7 +180,7 @@
     <paper id="10">
       <title>Towards Standardization of Web Service Protocols for <fixed-case>NLP</fixed-case>aa<fixed-case>S</fixed-case></title>
       <author><first>Jin-Dong</first><last>Kim</last></author>
-      <author><first>Nancy</first><last>Ide</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
       <author><first>Keith</first><last>Suderman</last></author>
       <pages>59–65</pages>
       <abstract>Several web services for various natural language processing (NLP) tasks (‘‘NLP-as-a-service” or NLPaaS) have recently been made publicly available. However, despite their similar functionality these services often differ in the protocols they use, thus complicating the development of clients accessing them. A survey of currently available NLPaaS services suggests that it may be possible to identify a minimal application layer protocol that can be shared by NLPaaS services without sacrificing functionality or convenience, while at the same time simplifying the development of clients for these services. In this paper, we hope to raise awareness of the interoperability problems caused by the variety of existing web service protocols, and describe an effort to identify a set of best practices for NLPaaS protocol design. To that end, we survey and compare protocols used by NLPaaS services and suggest how these protocols may be further aligned to reduce variation.</abstract>
@@ -203,7 +203,7 @@
     </paper>
     <paper id="12">
       <title>A Workflow Manager for Complex <fixed-case>NLP</fixed-case> and Content Curation Workflows</title>
-      <author><first>Julian</first><last>Moreno-Schneider</last></author>
+      <author id="julian-moreno-schneider"><first>Julian</first><last>Moreno-Schneider</last></author>
       <author><first>Peter</first><last>Bourgonje</last></author>
       <author><first>Florian</first><last>Kintzel</last></author>
       <author><first>Georg</first><last>Rehm</last></author>
@@ -217,7 +217,7 @@
       <title>A Processing Platform Relating Data and Tools for <fixed-case>R</fixed-case>omanian Language</title>
       <author><first>Vasile</first><last>Păiș</last></author>
       <author><first>Radu</first><last>Ion</last></author>
-      <author><first>Dan</first><last>Tufiș</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufiș</last></author>
       <pages>81–88</pages>
       <abstract>This paper presents RELATE (<url>http://relate.racai.ro</url>), a high-performance natural language platform designed for Romanian language. It is meant both for demonstration of available services, from text-span annotations to syntactic dependency trees as well as playing or automatically synthesizing Romanian words, and for the development of new annotated corpora. It also incorporates the search engines for the large COROLA reference corpus of contemporary Romanian and the Romanian wordnet. It integrates multiple text and speech processing modules and exposes their functionality through a web interface designed for the linguist researcher. It makes use of a scheduler-runner architecture, allowing processing to be distributed across multiple computing nodes. A series of input/output converters allows large corpora to be loaded, processed and exported according to user preferences.</abstract>
       <url hash="5ed1a5d8">2020.iwltp-1.13</url>
@@ -241,22 +241,22 @@
     <paper id="15">
       <title>Towards an Interoperable Ecosystem of <fixed-case>AI</fixed-case> and <fixed-case>LT</fixed-case> Platforms: A Roadmap for the Implementation of Different Levels of Interoperability</title>
       <author><first>Georg</first><last>Rehm</last></author>
-      <author><first>Dimitris</first><last>Galanis</last></author>
-      <author><first>Penny</first><last>Labropoulou</last></author>
+      <author id="dimitrios-galanis"><first>Dimitris</first><last>Galanis</last></author>
+      <author id="penny-labropoulou"><first>Penny</first><last>Labropoulou</last></author>
       <author><first>Stelios</first><last>Piperidis</last></author>
       <author><first>Martin</first><last>Welß</last></author>
       <author><first>Ricardo</first><last>Usbeck</last></author>
-      <author><first>Joachim</first><last>Köhler</last></author>
+      <author id="joachim-kohler"><first>Joachim</first><last>Köhler</last></author>
       <author><first>Miltos</first><last>Deligiannis</last></author>
       <author><first>Katerina</first><last>Gkirtzou</last></author>
       <author><first>Johannes</first><last>Fischer</last></author>
       <author><first>Christian</first><last>Chiarcos</last></author>
       <author><first>Nils</first><last>Feldhus</last></author>
-      <author><first>Julian</first><last>Moreno-Schneider</last></author>
+      <author id="julian-moreno-schneider"><first>Julian</first><last>Moreno-Schneider</last></author>
       <author><first>Florian</first><last>Kintzel</last></author>
       <author><first>Elena</first><last>Montiel</last></author>
       <author><first>Víctor</first><last>Rodríguez Doncel</last></author>
-      <author><first>John Philip</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John Philip</first><last>McCrae</last></author>
       <author><first>David</first><last>Laqua</last></author>
       <author><first>Irina Patricia</first><last>Theile</last></author>
       <author><first>Christian</first><last>Dittmar</last></author>
diff --git a/data/xml/2020.iwpt.xml b/data/xml/2020.iwpt.xml
index 1929816bde..e3f9f3709b 100644
--- a/data/xml/2020.iwpt.xml
+++ b/data/xml/2020.iwpt.xml
@@ -4,14 +4,14 @@
     <meta>
       <booktitle>Proceedings of the 16th International Conference on Parsing Technologies and the IWPT 2020 Shared Task on Parsing into Enhanced Universal Dependencies</booktitle>
       <editor><first>Gosse</first><last>Bouma</last></editor>
-      <editor><first>Yuji</first><last>Matsumoto</last></editor>
+      <editor id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></editor>
       <editor><first>Stephan</first><last>Oepen</last></editor>
       <editor><first>Kenji</first><last>Sagae</last></editor>
-      <editor><first>Djamé</first><last>Seddah</last></editor>
-      <editor><first>Weiwei</first><last>Sun</last></editor>
-      <editor><first>Anders</first><last>Søgaard</last></editor>
+      <editor id="djame-seddah"><first>Djamé</first><last>Seddah</last></editor>
+      <editor id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last></editor>
+      <editor id="anders-sogaard"><first>Anders</first><last>Søgaard</last></editor>
       <editor><first>Reut</first><last>Tsarfaty</last></editor>
-      <editor><first>Dan</first><last>Zeman</last></editor>
+      <editor id="daniel-zeman"><first>Dan</first><last>Zeman</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Online</address>
       <month>July</month>
@@ -58,7 +58,7 @@
     </paper>
     <paper id="4">
       <title>Integrating Graph-Based and Transition-Based Dependency Parsers in the Deep Contextualized Era</title>
-      <author><first>Agnieszka</first><last>Falenska</last></author>
+      <author id="agnieszka-falenska"><first>Agnieszka</first><last>Falenska</last></author>
       <author><first>Anders</first><last>Björkelund</last></author>
       <author><first>Jonas</first><last>Kuhn</last></author>
       <pages>25–39</pages>
@@ -95,7 +95,7 @@
       <author><first>Zhifeng</first><last>Hu</last></author>
       <author><first>Serhii</first><last>Havrylov</last></author>
       <author><first>Ivan</first><last>Titov</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <pages>62–72</pages>
       <abstract>The goal of homomorphic encryption is to encrypt data such that another party can operate on it without being explicitly exposed to the content of the original data. We introduce an idea for a privacy-preserving transformation on natural language data, inspired by homomorphic encryption. Our primary tool is <i>obfuscation</i>, relying on the properties of natural language. Specifically, a given English text is obfuscated using a neural model that aims to preserve the syntactic relationships of the original sentence so that the obfuscated sentence can be parsed instead of the original one. The model works at the word level, and learns to obfuscate each word separately by changing it into a new word that has a similar syntactic role. The text obfuscated by our model leads to better performance on three syntactic parsers (two dependency and one constituency parsers) in comparison to an upper-bound random substitution baseline. More specifically, the results demonstrate that as more terms are obfuscated (by their part of speech), the substitution upper bound significantly degrades, while the neural model maintains a relatively high performing parser. All of this is done without much sacrifice of privacy compared to the random substitution upper bound. We also further analyze the results, and discover that the substituted words have similar syntactic properties, but different semantic content, compared to the original words.</abstract>
       <url hash="3d25adb8">2020.iwpt-1.7</url>
@@ -107,7 +107,7 @@
       <title>Tensors over Semirings for Latent-Variable Weighted Logic Programs</title>
       <author><first>Esma</first><last>Balkir</last></author>
       <author><first>Daniel</first><last>Gildea</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <pages>73–90</pages>
       <abstract>Semiring parsing is an elegant framework for describing parsers by using semiring weighted logic programs. In this paper we present a generalization of this concept: latent-variable semiring parsing. With our framework, any semiring weighted logic program can be latentified by transforming weights from scalar values of a semiring to rank-n arrays, or tensors, of semiring values, allowing the modelling of latent-variable models within the semiring parsing framework. Semiring is too strong a notion when dealing with tensors, and we have to resort to a weaker structure: a partial semiring. We prove that this generalization preserves all the desired properties of the original semiring framework while strictly increasing its expressiveness.</abstract>
       <url hash="36981a2b">2020.iwpt-1.8</url>
@@ -140,8 +140,8 @@
     <paper id="11">
       <title>Self-Training for Unsupervised Parsing with <fixed-case>PRPN</fixed-case></title>
       <author><first>Anhad</first><last>Mohananey</last></author>
-      <author><first>Katharina</first><last>Kann</last></author>
-      <author><first>Samuel R.</first><last>Bowman</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
+      <author id="samuel-bowman"><first>Samuel R.</first><last>Bowman</last></author>
       <pages>105–110</pages>
       <abstract>Neural unsupervised parsing (UP) models learn to parse without access to syntactic annotations, while being optimized for another task like language modeling. In this work, we propose self-training for neural UP models: we leverage aggregated annotations predicted by copies of our model as supervision for future copies. To be able to use our model’s predictions during training, we extend a recent neural UP architecture, the PRPN (Shen et al., 2018a), such that it can be trained in a semi-supervised fashion. We then add examples with parses predicted by our model to our unlabeled UP training data. Our self-trained model outperforms the PRPN by 8.1% F1 and the previous state of the art by 1.6% F1. In addition, we show that our architecture can also be helpful for semi-supervised parsing in ultra-low-resource settings.</abstract>
       <url hash="e557f12f">2020.iwpt-1.11</url>
@@ -152,7 +152,7 @@
     <paper id="12">
       <title>Span-Based <fixed-case>LCFRS</fixed-case>-2 Parsing</title>
       <author><first>Miloš</first><last>Stanojević</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>111–121</pages>
       <abstract>The earliest models for discontinuous constituency parsers used mildly context-sensitive grammars, but the fashion has changed in recent years to grammar-less transition-based parsers that use strong neural probabilistic models to greedily predict transitions. We argue that grammar-based approaches still have something to contribute on top of what is offered by transition-based parsers. Concretely, by using a grammar formalism to restrict the space of possible trees we can use dynamic programming parsing algorithms for exact search for the most probable tree. Previous chart-based parsers for discontinuous formalisms used probabilistically weak generative models. We instead use a span-based discriminative neural model that preserves the dynamic programming properties of the chart parsers. Our parser does not use an explicit grammar, but it does use explicit grammar formalism constraints: we generate only trees that are within the LCFRS-2 formalism. These properties allow us to construct a new parsing algorithm that runs in lower worst-case time complexity of O(l nˆ4 +nˆ6), where <tex-math>n</tex-math> is the sentence length and <tex-math>l</tex-math> is the number of unique non-terminal labels. This parser is efficient in practice, provides best results among chart-based parsers, and is competitive with the best transition based parsers. We also show that the main bottleneck for further improvement in performance is in the restriction of fan-out to degree 2. We show that well-nestedness is helpful in speeding up parsing, but lowers accuracy.</abstract>
       <url hash="5e18690b">2020.iwpt-1.12</url>
@@ -167,7 +167,7 @@
       <author><first>Hyonsu</first><last>Choe</last></author>
       <author><first>Seokwon</first><last>Park</last></author>
       <author><first>Han</first><last>He</last></author>
-      <author><first>Jinho D.</first><last>Choi</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
       <author><first>Na-Rae</first><last>Han</last></author>
       <author><first>Jena D.</first><last>Hwang</last></author>
       <author><first>Hansaem</first><last>Kim</last></author>
@@ -237,7 +237,7 @@
     <paper id="19">
       <title>Adaptation of Multilingual Transformer Encoder for Robust Enhanced <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependency Parsing</title>
       <author><first>Han</first><last>He</last></author>
-      <author><first>Jinho D.</first><last>Choi</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
       <pages>181–191</pages>
       <abstract>This paper presents our enhanced dependency parsing approach using transformer encoders, coupled with a simple yet powerful ensemble algorithm that takes advantage of both tree and graph dependency parsing. Two types of transformer encoders are compared, a multilingual encoder and language-specific encoders. Our dependency tree parsing (DTP) approach generates only primary dependencies to form trees whereas our dependency graph parsing (DGP) approach handles both primary and secondary dependencies to form graphs. Since DGP does not guarantee the generated graphs are acyclic, the ensemble algorithm is designed to add secondary arcs predicted by DGP to primary arcs predicted by DTP. Our results show that models using the multilingual encoder outperform ones using the language specific encoders for most languages. The ensemble models generally show higher labeled attachment score on enhanced dependencies (ELAS) than the DTP and DGP models. As the result, our best models rank the third place on the macro-average ELAS over 17 languages.</abstract>
       <url hash="4beeb14f">2020.iwpt-1.19</url>
diff --git a/data/xml/2020.iwslt.xml b/data/xml/2020.iwslt.xml
index 5e71039de8..05d083690b 100644
--- a/data/xml/2020.iwslt.xml
+++ b/data/xml/2020.iwslt.xml
@@ -4,14 +4,14 @@
     <meta>
       <booktitle>Proceedings of the 17th International Conference on Spoken Language Translation</booktitle>
       <editor><first>Marcello</first><last>Federico</last></editor>
-      <editor><first>Alex</first><last>Waibel</last></editor>
+      <editor id="alex-waibel"><first>Alex</first><last>Waibel</last></editor>
       <editor><first>Kevin</first><last>Knight</last></editor>
       <editor><first>Satoshi</first><last>Nakamura</last></editor>
-      <editor><first>Hermann</first><last>Ney</last></editor>
+      <editor id="hermann-ney"><first>Hermann</first><last>Ney</last></editor>
       <editor><first>Jan</first><last>Niehues</last></editor>
-      <editor><first>Sebastian</first><last>Stüker</last></editor>
+      <editor id="sebastian-stuker"><first>Sebastian</first><last>Stüker</last></editor>
       <editor><first>Dekai</first><last>Wu</last></editor>
-      <editor><first>Joseph</first><last>Mariani</last></editor>
+      <editor id="joseph-mariani"><first>Joseph</first><last>Mariani</last></editor>
       <editor><first>Francois</first><last>Yvon</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Online</address>
@@ -29,7 +29,7 @@
       <author><first>Ebrahim</first><last>Ansari</last></author>
       <author><first>Amittai</first><last>Axelrod</last></author>
       <author><first>Nguyen</first><last>Bach</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <author><first>Roldano</first><last>Cattoni</last></author>
       <author><first>Fahim</first><last>Dalvi</last></author>
       <author><first>Nadir</first><last>Durrani</last></author>
@@ -40,7 +40,7 @@
       <author><first>Kevin</first><last>Knight</last></author>
       <author><first>Xutai</first><last>Ma</last></author>
       <author><first>Ajay</first><last>Nagesh</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
       <author><first>Juan</first><last>Pino</last></author>
       <author><first>Elizabeth</first><last>Salesky</last></author>
@@ -66,7 +66,7 @@
       <author><first>Antoine</first><last>Caubrière</last></author>
       <author><first>Benjamin</first><last>Lecouteux</last></author>
       <author><first>Yannick</first><last>Estève</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <pages>35–43</pages>
       <abstract>This paper describes the ON-TRAC Consortium translation systems developed for two challenge tracks featured in the Evaluation Campaign of IWSLT 2020, offline speech translation and simultaneous speech translation. ON-TRAC Consortium is composed of researchers from three French academic laboratories: LIA (Avignon Université), LIG (Université Grenoble Alpes), and LIUM (Le Mans Université). Attention-based encoder-decoder models, trained end-to-end, were used for our submissions to the offline speech translation track. Our contributions focused on data augmentation and ensembling of multiple models. In the simultaneous speech translation track, we build on Transformer-based wait-k models for the text-to-text subtask. For speech-to-text simultaneous translation, we attach a wait-k MT system to a hybrid ASR system. We propose an algorithm to control the latency of the ASR+MT cascade and achieve a good latency-quality trade-off on both subtasks.</abstract>
       <url hash="5f16383a">2020.iwslt-1.2</url>
@@ -92,7 +92,7 @@
     </paper>
     <paper id="4">
       <title><fixed-case>KIT</fixed-case>’s <fixed-case>IWSLT</fixed-case> 2020 <fixed-case>SLT</fixed-case> Translation System</title>
-      <author><first>Ngoc-Quan</first><last>Pham</last></author>
+      <author id="ngoc-quan-pham"><first>Ngoc-Quan</first><last>Pham</last></author>
       <author><first>Felix</first><last>Schneider</last></author>
       <author><first>Tuan-Nam</first><last>Nguyen</last></author>
       <author><first>Thanh-Le</first><last>Ha</last></author>
@@ -109,9 +109,9 @@
     </paper>
     <paper id="5">
       <title>End-to-End Simultaneous Translation System for <fixed-case>IWSLT</fixed-case>2020 Using Modality Agnostic Meta-Learning</title>
-      <author><first>Hou Jeung</first><last>Han</last></author>
+      <author id="hyojung-han"><first>Hou Jeung</first><last>Han</last></author>
       <author><first>Mohd Abbas</first><last>Zaidi</last></author>
-      <author><first>Sathish Reddy</first><last>Indurthi</last></author>
+      <author id="sathish-reddy-indurthi"><first>Sathish Reddy</first><last>Indurthi</last></author>
       <author><first>Nikhil Kumar</first><last>Lakumarapu</last></author>
       <author><first>Beomseok</first><last>Lee</last></author>
       <author><first>Sangha</first><last>Kim</last></author>
@@ -138,8 +138,8 @@
       <title>End-to-End Offline Speech Translation System for <fixed-case>IWSLT</fixed-case> 2020 using Modality Agnostic Meta-Learning</title>
       <author><first>Nikhil Kumar</first><last>Lakumarapu</last></author>
       <author><first>Beomseok</first><last>Lee</last></author>
-      <author><first>Sathish Reddy</first><last>Indurthi</last></author>
-      <author><first>Hou Jeung</first><last>Han</last></author>
+      <author id="sathish-reddy-indurthi"><first>Sathish Reddy</first><last>Indurthi</last></author>
+      <author id="hyojung-han"><first>Hou Jeung</first><last>Han</last></author>
       <author><first>Mohd Abbas</first><last>Zaidi</last></author>
       <author><first>Sangha</first><last>Kim</last></author>
       <pages>73–79</pages>
@@ -152,8 +152,8 @@
     <paper id="8">
       <title>End-to-End Speech-Translation with Knowledge Distillation: <fixed-case>FBK</fixed-case>@<fixed-case>IWSLT</fixed-case>2020</title>
       <author><first>Marco</first><last>Gaido</last></author>
-      <author><first>Mattia A.</first><last>Di Gangi</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="mattia-a-di-gangi"><first>Mattia A.</first><last>Di Gangi</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <pages>80–88</pages>
       <abstract>This paper describes FBK’s participation in the IWSLT 2020 offline speech translation (ST) task. The task evaluates systems’ ability to translate English TED talks audio into German texts. The test talks are provided in two versions: one contains the data already segmented with automatic tools and the other is the raw data without any segmentation. Participants can decide whether to work on custom segmentation or not. We used the provided segmentation. Our system is an end-to-end model based on an adaptation of the Transformer for speech data. Its training process is the main focus of this paper and it is based on: i) transfer learning (ASR pretraining and knowledge distillation), ii) data augmentation (SpecAugment, time stretch and synthetic data), iii)combining synthetic and real data marked as different domains, and iv) multi-task learning using the CTC loss. Finally, after the training with word-level knowledge distillation is complete, our ST models are fine-tuned using label smoothed cross entropy. Our best model scored 29 BLEU on the MuST-CEn-De test set, which is an excellent result compared to recent papers, and 23.7 BLEU on the same data segmented with VAD, showing the need for researching solutions addressing this specific data condition.</abstract>
@@ -177,7 +177,7 @@
       <author><first>Raúl</first><last>Vázquez</last></author>
       <author><first>Mikko</first><last>Aulamo</last></author>
       <author><first>Umut</first><last>Sulubacak</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>95–102</pages>
       <abstract>This paper describes the University of Helsinki Language Technology group’s participation in the IWSLT 2020 offline speech translation task, addressing the translation of English audio into German text. In line with this year’s task objective, we train both cascade and end-to-end systems for spoken language translation. We opt for an end-to-end multitasking architecture with shared internal representations and a cascade approach that follows a standard procedure consisting of ASR, correction, and MT stages. We also describe the experiments that served as a basis for the submitted systems. Our experiments reveal that multitasking training with shared internal representations is not only possible but allows for knowledge-transfer across modalities.</abstract>
       <url hash="0b46f3a0">2020.iwslt-1.10</url>
@@ -189,7 +189,7 @@
       <title>The <fixed-case>AFRL</fixed-case> <fixed-case>IWSLT</fixed-case> 2020 Systems: Work-From-Home Edition</title>
       <author><first>Brian</first><last>Ore</last></author>
       <author><first>Eric</first><last>Hansen</last></author>
-      <author><first>Tim</first><last>Anderson</last></author>
+      <author id="tim-anderson"><first>Tim</first><last>Anderson</last></author>
       <author><first>Jeremy</first><last>Gwinnup</last></author>
       <pages>103–108</pages>
       <abstract>This report summarizes the Air Force Research Laboratory (AFRL) submission to the offline spoken language translation (SLT) task as part of the IWSLT 2020 evaluation campaign. As in previous years, we chose to adopt the cascade approach of using separate systems to perform speech activity detection, automatic speech recognition, sentence segmentation, and machine translation. All systems were neural based, including a fully-connected neural network for speech activity detection, a Kaldi factorized time delay neural network with recurrent neural network (RNN) language model rescoring for speech recognition, a bidirectional RNN with attention mechanism for sentence segmentation, and transformer networks trained with OpenNMT and Marian for machine translation. Our primary submission yielded BLEU scores of 21.28 on tst2019 and 23.33 on tst2020.</abstract>
@@ -247,7 +247,7 @@
       <author><first>Long</first><last>Zhou</last></author>
       <author><first>Yang</first><last>Zhao</last></author>
       <author><first>Jiajun</first><last>Zhang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>130–139</pages>
       <abstract>This paper describes the CASIA’s system for the IWSLT 2020 open domain translation task. This year we participate in both Chinese→Japanese and Japanese→Chinese translation tasks. Our system is neural machine translation system based on Transformer model. We augment the training data with knowledge distillation and back translation to improve the translation performance. Domain data classification and weighted domain model ensemble are introduced to generate the final translation result. We compare and analyze the performance on development data with different model settings and different data processing techniques.</abstract>
       <url hash="d120b4ec">2020.iwslt-1.15</url>
@@ -331,7 +331,7 @@
       <author><first>Nikhil</first><last>Saini</last></author>
       <author><first>Jyotsana</first><last>Khatri</last></author>
       <author><first>Preethi</first><last>Jyothi</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>178–186</pages>
       <abstract>Machine translation systems perform reasonably well when the input is well-formed speech or text. Conversational speech is spontaneous and inherently consists of many disfluencies. Producing fluent translations of disfluent source text would typically require parallel disfluent to fluent training data. However, fluent translations of spontaneous speech are an additional resource that is tedious to obtain. This work describes the submission of IIT Bombay to the Conversational Speech Translation challenge at IWSLT 2020. We specifically tackle the problem of disfluency removal in disfluent-to-fluent text-to-text translation assuming no access to fluent references during training. Common patterns of disfluency are extracted from disfluent references and a noise induction model is used to simulate them starting from a clean monolingual corpus. This synthetically constructed dataset is then considered as a proxy for labeled data during training. We also make use of additional fluent text in the target language to help generate fluent translations. This work uses no fluent references during training and beats a baseline model by a margin of 4.21 and 3.11 BLEU points where the baseline uses disfluent and fluent references, respectively. Index Terms- disfluency removal, machine translation, noise induction, leveraging monolingual data, denoising for disfluency removal.</abstract>
       <url hash="a3020024">2020.iwslt-1.22</url>
@@ -362,7 +362,7 @@
       <author><first>Peter</first><last>Polák</last></author>
       <author><first>Sangeet</first><last>Sagar</last></author>
       <author><first>Dominik</first><last>Macháček</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>191–199</pages>
       <abstract>In this paper, we present our submission to the Non-Native Speech Translation Task for IWSLT 2020. Our main contribution is a proposed speech recognition pipeline that consists of an acoustic model and a phoneme-to-grapheme model. As an intermediate representation, we utilize phonemes. We demonstrate that the proposed pipeline surpasses commercially used automatic speech recognition (ASR) and submit it into the ASR track. We complement this ASR with off-the-shelf MT systems to take part also in the speech translation track.</abstract>
       <url hash="c7893255">2020.iwslt-1.24</url>
@@ -375,7 +375,7 @@
       <author><first>Jonáš</first><last>Kratochvíl</last></author>
       <author><first>Sangeet</first><last>Sagar</last></author>
       <author><first>Matúš</first><last>Žilinec</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <author><first>Thai-Son</first><last>Nguyen</last></author>
       <author><first>Felix</first><last>Schneider</last></author>
       <author><first>Philip</first><last>Williams</last></author>
@@ -390,7 +390,7 @@
     <paper id="26">
       <title>Is 42 the Answer to Everything in Subtitling-oriented Speech Translation?</title>
       <author><first>Alina</first><last>Karakanta</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <pages>209–219</pages>
       <abstract>Subtitling is becoming increasingly important for disseminating information, given the enormous amounts of audiovisual content becoming available daily. Although Neural Machine Translation (NMT) can speed up the process of translating audiovisual content, large manual effort is still required for transcribing the source language, and for spotting and segmenting the text into proper subtitles. Creating proper subtitles in terms of timing and segmentation highly depends on information present in the audio (utterance duration, natural pauses). In this work, we explore two methods for applying Speech Translation (ST) to subtitling, a) a direct end-to-end and b) a classical cascade approach. We discuss the benefit of having access to the source language speech for improving the conformity of the generated subtitles to the spatial and temporal subtitling constraints and show that length is not the answer to everything in the case of subtitling-oriented ST.</abstract>
@@ -452,7 +452,7 @@
       <title>From Speech-to-Speech Translation to Automatic Dubbing</title>
       <author><first>Marcello</first><last>Federico</last></author>
       <author><first>Robert</first><last>Enyedi</last></author>
-      <author><first>Roberto</first><last>Barra-Chicote</last></author>
+      <author id="roberto-barra-chicote"><first>Roberto</first><last>Barra-Chicote</last></author>
       <author><first>Ritwik</first><last>Giri</last></author>
       <author><first>Umut</first><last>Isik</last></author>
       <author><first>Arvindh</first><last>Krishnaswamy</last></author>
@@ -466,11 +466,11 @@
     </paper>
     <paper id="32">
       <title>Joint Translation and Unit Conversion for End-to-end Localization</title>
-      <author><first>Georgiana</first><last>Dinu</last></author>
+      <author id="georgiana-dinu"><first>Georgiana</first><last>Dinu</last></author>
       <author><first>Prashant</first><last>Mathur</last></author>
       <author><first>Marcello</first><last>Federico</last></author>
       <author><first>Stanislas</first><last>Lauly</last></author>
-      <author><first>Yaser</first><last>Al-Onaizan</last></author>
+      <author id="yaser-al-onaizan"><first>Yaser</first><last>Al-Onaizan</last></author>
       <pages>265–271</pages>
       <abstract>A variety of natural language tasks require processing of textual data which contains a mix of natural language and formal languages such as mathematical expressions. In this paper, we take unit conversions as an example and propose a data augmentation technique which lead to models learning both translation and conversion tasks as well as how to adequately switch between them for end-to-end localization.</abstract>
       <url hash="1add4efb">2020.iwslt-1.32</url>
@@ -495,8 +495,8 @@
       <author><first>Yuri</first><last>Bizzoni</last></author>
       <author><first>Tom S</first><last>Juzek</last></author>
       <author><first>Cristina</first><last>España-Bonet</last></author>
-      <author><first>Koel</first><last>Dutta Chowdhury</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="koel-dutta-chowdhury"><first>Koel</first><last>Dutta Chowdhury</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <author><first>Elke</first><last>Teich</last></author>
       <pages>280–290</pages>
       <abstract>Translationese is a phenomenon present in human translations, simultaneous interpreting, and even machine translations. Some translationese features tend to appear in simultaneous interpreting with higher frequency than in human text translation, but the reasons for this are unclear. This study analyzes translationese patterns in translation, interpreting, and machine translation outputs in order to explore possible reasons. In our analysis we – (i) detail two non-invasive ways of detecting translationese and (ii) compare translationese across human and machine translations from text and speech. We find that machine translation shows traces of translationese, but does not reproduce the patterns found in human translation, offering support to the hypothesis that such patterns are due to the model (human vs machine) rather than to the data (written vs spoken).</abstract>
diff --git a/data/xml/2020.jeptalnrecital.xml b/data/xml/2020.jeptalnrecital.xml
index 73b2dfad90..8ed626414d 100644
--- a/data/xml/2020.jeptalnrecital.xml
+++ b/data/xml/2020.jeptalnrecital.xml
@@ -96,7 +96,7 @@
     <paper id="8">
       <title>Où en sommes-nous dans la reconnaissance des entités nommées structurées à partir de la parole ? (Where are we in Named Entity Recognition from speech ?)</title>
       <author><first>Antoine</first><last>Caubrière</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <author><first>Yannick</first><last>Estève</last></author>
       <author><first>Antoine</first><last>Laurent</last></author>
       <author><first>Emmanuel</first><last>Morin</last></author>
@@ -111,7 +111,7 @@
       <author><first>Anaïs</first><last>Chanclu</last></author>
       <author><first>Laurianne</first><last>Georgeton</last></author>
       <author><first>Corinne</first><last>Fredouille</last></author>
-      <author><first>Jean-Francois</first><last>Bonastre</last></author>
+      <author id="jean-francois-bonastre"><first>Jean-Francois</first><last>Bonastre</last></author>
       <pages>73–81</pages>
       <abstract>Cet article présente la base de données PTSVOX, créée par le Service Central de la Police Technique et Scientifique (SCPTS) spécifiquement pour la comparaison de voix dans le cadre judiciaire. PTSVOX contient 369 locuteurs et locutrices qui ont été enregistrés au microphone et au téléphone. PTSVOX a été conçue pour mesurer l’influence de différents facteurs de variabilité fréquemment rencontrés dans les cas pratiques en identification judiciaire, comme le type de parole, le temps écoulé et le matériel d’enregistrement. Pour cela, 24 des locuteurs de PTSVOX (12 hommes et 12 femmes) ont été enregistrés une fois par mois pendant 3 mois, en parole spontanée et en parole lue. Dans cet article, nous présentons dans un premier temps la base PTSVOX, puis nous décrivons des protocoles standards ainsi que les systèmes de référence associés à PTSVOX, avec une évaluation de leur performance.</abstract>
       <url hash="ed7f4e94">2020.jeptalnrecital-jep.9</url>
@@ -123,7 +123,7 @@
       <author><first>Estelle</first><last>Chardenon</last></author>
       <author><first>Cécile</first><last>Fougeron</last></author>
       <author><first>Nicolas</first><last>Audibert</last></author>
-      <author><first>Cédric</first><last>Gendrot</last></author>
+      <author id="cedric-gendrot"><first>Cédric</first><last>Gendrot</last></author>
       <pages>82–90</pages>
       <abstract>Si l’étude de la variabilité entre locuteurs permet d’identifier des caractéristiques phonétiques potentiellement discriminantes, voire spécifiques, il est essentiel de comprendre, si et comment, ces caractéristiques varient chez un même locuteur. Ici, nous examinons la variabilité de caractéristiques liées à la gestion temporelle de la parole sur un nombre limité de locuteurs, enregistrés sur plusieurs répétitions dans une même session, et sur 6 à 7 sessions espacées d’une année. Sur cette vingtaine d’enregistrements par locuteur, nous observons comment le débit articulatoire, les modulations de ce débit, et la durée des pauses varient en fonction de la répétition et de la session et en interaction avec le locuteur. Les résultats montrent que c’est dans la variation de gestion temporelle de la parole que les locuteurs se distinguent les uns des autres, en termes de régularité ou non entre enregistrements et au sein d’un même enregistrement.</abstract>
       <url hash="d0d46679">2020.jeptalnrecital-jep.10</url>
@@ -133,7 +133,7 @@
     <paper id="11">
       <title>Caractérisation du locuteur par <fixed-case>CNN</fixed-case> à l’aide des contours d’intensité et d’intonation : comparaison avec le spectrogramme (<fixed-case>CNN</fixed-case> speaker characterisation through prosody : spectrogram comparison )</title>
       <author><first>Gabriele</first><last>Chignoli</last></author>
-      <author><first>Cédric</first><last>Gendrot</last></author>
+      <author id="cedric-gendrot"><first>Cédric</first><last>Gendrot</last></author>
       <author><first>Emmanuel</first><last>Ferragne</last></author>
       <pages>91–99</pages>
       <abstract>Dans ce travail nous avons recours aux variations de f0 et d’intensité de 44 locuteurs francophones à partir de séquences de 4 secondes de parole spontanée pour comprendre comment ces paramètres prosodiques peuvent être utilisés pour caractériser des locuteurs. Une classification automatique est effectuée avec un réseau de neurones convolutifs, fournissant comme réponse des scores de probabilité pour chacun des 44 locuteurs modélisés. Une représentation par spectrogrammes a été utilisée comme référence pour le même système de classification. Nous avons pu mettre en avant la pertinence de l’intensité, et lorsque les deux paramètres prosodiques sont combinés pour représenter les locuteurs nous observons un score qui atteint en moyenne 59 % de bonnes classifications.</abstract>
@@ -195,7 +195,7 @@
     </paper>
     <paper id="17">
       <title>Unités prosodiques et grammaire intonative du français : vers une nouvelle approche (Prosodic Units and Intonational Grammar in <fixed-case>F</fixed-case>rench: towards a new Approach)</title>
-      <author><first>Elisabeth</first><last>Delais-Roussarie</last></author>
+      <author id="elisabeth-delais-roussarie"><first>Elisabeth</first><last>Delais-Roussarie</last></author>
       <author><first>Brechtje</first><last>Post</last></author>
       <author><first>Hiyon</first><last>Yoo</last></author>
       <pages>145–153</pages>
@@ -206,7 +206,7 @@
     </paper>
     <paper id="18">
       <title>Quel type de systèmes utiliser pour la transcription automatique du français ? Les <fixed-case>HMM</fixed-case> font de la résistance (What system for the automatic transcription of <fixed-case>F</fixed-case>rench in audiovisual broadcasts ?)</title>
-      <author><first>Paul</first><last>Deléglise</last></author>
+      <author id="paul-deleglise"><first>Paul</first><last>Deléglise</last></author>
       <author><first>Carole</first><last>Lailler</last></author>
       <pages>154–162</pages>
       <abstract>Forts d’une utilisation couronnée de succès en traduction automatique, les systèmes end-to-end dont la sortie réside en une suite de caractères, ont vu leur utilisation étendue à la transcription automatique de la parole. De nombreuses comparaisons ont alors été effectuées sur des corpus anglais libres de droits, de parole lue. Nous proposons ici de réaliser une comparaison entre deux systèmes état de l’art, non pas sur de la parole lue mais bel et bien sur un corpus d’émissions audiovisuelles françaises présentant différents degrés de spontanéité. Le premier est un end-to-end et le second est un système hybride (HMM/DNN). L’obtention de résultats satisfaisants pour le end-to-end nécessitant un lexique et modèle de langage dédiés, il est intéressant de constater qu’une meilleure intégration dans les systèmes hybrides (HMM/DNN) est source de performances supérieures, notamment en Français où le contexte est primordial pour capturer un énoncé.</abstract>
@@ -240,7 +240,7 @@
       <title>Statistiques des sons naturels et hypothèse du codage efficace pour la perception de la musique et de la parole: Mise en place d’une méthodologie d’évaluation (Natural sound statistics and the efficient coding hypothesis for music and speech perception : setting-up an evaluation methodology)</title>
       <author><first>Agnieszka</first><last>Duniec</last></author>
       <author><first>Olivier</first><last>Crouzet</last></author>
-      <author><first>Elisabeth</first><last>Delais-Roussarie</last></author>
+      <author id="elisabeth-delais-roussarie"><first>Elisabeth</first><last>Delais-Roussarie</last></author>
       <pages>181–189</pages>
       <abstract>L’hypothèse du codage efficace prédit que les systèmes perceptifs sont optimalement adaptés aux propriétés statistiques des signaux naturels. Ce caractère optimal a été récemment évalué sur la base d’analyses statistiques réalisées sur des décompositions spectrales de signaux de parole représentés comme des modulations d’énergie. Ces travaux pourraient trouver des applications directes dans l’amélioration du codage des signaux acoustiques par des implants cochléaires. Cependant, les recherches sur la perception de la musique par des personnes sourdes portant un implant cochléaire mettent en avant des limites qui semblent discordantes avec les performances observées concernant certaines propriétés fondamentales de la parole. Nous comparons les résultats d’analyses statistiques de signaux musicaux avec ceux qui ont été réalisés sur de la parole dans le but d’évaluer les impacts respectifs de ces deux gammes de signaux sonores pour évaluer leurs contributions à cette proposition théorique. Des résultats préliminaires et les perspectives futures sont discutés.</abstract>
       <url hash="8796fb89">2020.jeptalnrecital-jep.21</url>
@@ -329,7 +329,7 @@
       <title>Représentation du genre dans des données open source de parole (Gender representation in open source speech resources 1 With the rise of artificial intelligence (<fixed-case>AI</fixed-case>) and the growing use of deep-learning architectures, the question of ethics and transparency in <fixed-case>AI</fixed-case> systems has become a central concern within the research community)</title>
       <author><first>Mahault</first><last>Garnerin</last></author>
       <author><first>Solange</first><last>Rossato</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <pages>244–252</pages>
       <abstract>Avec l’essor de l’intelligence artificielle (IA) et l’utilisation croissante des architectures d’apprentissage profond, la question de l’éthique et de la transparence des systèmes d’IA est devenue une préoccupation centrale au sein de la communauté de recherche. Dans cet article, nous proposons une étude sur la représentation du genre dans les ressources de parole disponibles sur la plateforme Open Speech and Language Resource. Un tout premier résultat est la difficulté d’accès aux informations sur le genre des locuteurs. Ensuite, nous montrons que l’équilibre entre les catégories de genre dépend de diverses caractéristiques des corpus (discours élicité ou non, tâche adressée). En nous appuyant sur des travaux antérieurs, nous reprenons quelques principes concernant les métadonnées dans l’optique d’assurer une meilleure transparence des systèmes de parole construits à l’aide de ces corpus.</abstract>
       <url hash="4142e4ca">2020.jeptalnrecital-jep.28</url>
@@ -350,7 +350,7 @@
     </paper>
     <paper id="30">
       <title>Informations segmentales pour la caractérisation phonétique du locuteur : variabilité inter- et intra-locuteurs (An automatic classification task involving 44 speakers was performed using convolutional neural networks (<fixed-case>CNN</fixed-case>) on broadband spectrograms extracted from 2-second sequences of a spontaneous speech corpus (<fixed-case>NCCF</fixed-case>r))</title>
-      <author><first>Cedric</first><last>Gendrot</last></author>
+      <author id="cedric-gendrot"><first>Cedric</first><last>Gendrot</last></author>
       <author><first>Emmanuel</first><last>Ferragne</last></author>
       <author><first>Thomas</first><last>Pellegrini</last></author>
       <pages>262–270</pages>
@@ -377,7 +377,7 @@
       <author><first>Adrien</first><last>Gresse</last></author>
       <author><first>Mathias</first><last>Quillot</last></author>
       <author><first>Richard</first><last>Dufour</last></author>
-      <author><first>Jean-François</first><last>Bonastre</last></author>
+      <author id="jean-francois-bonastre"><first>Jean-François</first><last>Bonastre</last></author>
       <pages>280–288</pages>
       <abstract>La recherche d’acteurs vocaux pour les productions audiovisuelles est réalisée par des directeurs artistiques (DA). Les DA sont constamment à la recherche de nouveaux talents vocaux, mais ne peuvent effectuer des auditions à grande échelle. Les outils automatiques capables de suggérer des voix présentent alors un grand intérêt pour l’industrie audiovisuelle. Dans les travaux précédents, nous avons montré l’existence d’informations acoustiques permettant de reproduire des choix du DA. Dans cet article, nous proposons une approche à base de réseaux de neurones pour construire une représentation adaptée aux personnages/rôles visés, appelée p-vecteur. Nous proposons ensuite de tirer parti de données externes pour la représentation de voix, proches de celles d’origine, au moyen de méthodes de distillation de la connaissance. Les expériences menées sur des extraits de voix de jeux vidéo montrent une amélioration significative de l’approche p-vecteur, avec distillation de la connaissance, par rapport à une représentation x-vecteur, état-de-l’art en reconnaissance du locuteur.</abstract>
       <url hash="1149ffa5">2020.jeptalnrecital-jep.32</url>
@@ -388,9 +388,9 @@
       <title>Lénition et fortition des occlusives en coda finale dans deux langues romanes : le français et le roumain (Lenition and fortition of word-final stops in two <fixed-case>R</fixed-case>omance languages: <fixed-case>F</fixed-case>rench and <fixed-case>R</fixed-case>omanian)</title>
       <author><first>Mathilde</first><last>Hutin</last></author>
       <author><first>Adèle</first><last>Jatteau</last></author>
-      <author><first>Ioana</first><last>Vasilescu</last></author>
-      <author><first>Lori</first><last>Lamel</last></author>
-      <author><first>Martine</first><last>Adda-Decker</last></author>
+      <author id="ioana-vasilescu"><first>Ioana</first><last>Vasilescu</last></author>
+      <author id="lori-lamel"><first>Lori</first><last>Lamel</last></author>
+      <author id="martine-adda-decker"><first>Martine</first><last>Adda-Decker</last></author>
       <pages>289–298</pages>
       <abstract>L’exploration automatisée de grands corpus permet d’analyser plus finement la relation entre motifs de variation phonétique synchronique et changements diachroniques : les erreurs dans les transcriptions automatiques sont riches d’enseignements sur la variation contextuelle en parole continue et sur les possibles mutations systémiques sur le point d’apparaître. Dès lors, il est intéressant de se pencher sur des phénomènes phonologiques largement attestés dans les langues en diachronie comme en synchronie pour établir leur émergence ou non dans des langues qui n’y sont pas encore sujettes. La présente étude propose donc d’utiliser l’alignement forcé avec variantes de prononciation pour observer les alternances de voisement en coda finale de mot dans deux langues romanes : le français et le roumain. Il sera mis en évidence, notamment, que voisement et dévoisement non-canoniques des codas françaises comme roumaines ne sont pas le fruit du hasard mais bien des instances de dévoisement final et d’assimilation régressive de trait laryngal, qu’il s’agisse de voisement ou de non-voisement.</abstract>
       <url hash="2a4f5a22">2020.jeptalnrecital-jep.33</url>
@@ -673,7 +673,7 @@
       <author><first>Richard</first><last>Dufour</last></author>
       <author><first>Axel</first><last>Roebel</last></author>
       <author><first>Nicolas</first><last>Obin</last></author>
-      <author><first>Jean-François</first><last>Bonastre</last></author>
+      <author id="jean-francois-bonastre"><first>Jean-François</first><last>Bonastre</last></author>
       <author><first>Emmanuel</first><last>Ethis</last></author>
       <pages>525–533</pages>
       <abstract>La voix actée représente un défi majeur pour les futures interfaces vocales avec un potentiel d’application extrêmement important pour la transformation numérique des secteurs de la culture et de la communication, comme la production ou la post-production de voix pour les séries ou le cinéma. Un aspect central de la voix actée repose sur la notion d’interprétation, un aspect peu étudié dans la communauté scientifique de la parole. Cet article propose un état des lieux et une réflexion sur les défis scientifiques et les applications technologiques de la voix actée : à la croisée de l’acoustique, de la linguistique, de la culture, et de l’apprentissage machine. Une analyse préliminaire des pratiques permet de rendre compte de la diversité de l’écosystème des “métiers de la voix” et de pointer les fonctions et les conventions qui s’y rattachent. Nous nous intéresserons ensuite à la pratique particulière du doublage de voix, en faisant ressortir ses enjeux et problématiques spécifiques puis en présentant des solutions proposées pour modéliser les codes expressifs de la voix d’un acteur ou les choix d’un opérateur pour le doublage.</abstract>
@@ -803,7 +803,7 @@
     <paper id="70">
       <title>Réduction temporelle en français spontané : où se cache-t-elle ? Une étude des segments, des mots et séquences de mots fréquemment réduits ()</title>
       <author><first>Yaru</first><last>Wu</last></author>
-      <author><first>Martine</first><last>Adda-Decker</last></author>
+      <author id="martine-adda-decker"><first>Martine</first><last>Adda-Decker</last></author>
       <pages>627–635</pages>
       <abstract>Cette étude vise à proposer une méthode adaptée à l’étude de divers phénomènes de variation dans les grands corpus utilisant l’alignement automatique de la parole. Cette méthode est appliquée pour étudier la réduction temporelle en français spontané. Nous proposons de qualifier la réduction temporelle comme la réalisation de suites de segments courts consécutifs. Environ 14% du corpus est considéré comme réduit. Les résultats de l’alignement montrent que ces zones impliquent le plus souvent plus d’un mot (81%), et que sinon, la position interne du mot est la plus concernée. Parmi les exemples de suites de mots les plus réduits, on trouve des locutions utilisées comme des marqueurs discursifs.</abstract>
       <url hash="c47cb053">2020.jeptalnrecital-jep.70</url>
@@ -903,7 +903,7 @@
       <title>Classification de relations pour l’intelligence économique et concurrentielle (Relation Classification for Competitive and Economic Intelligence )</title>
       <author><first>Hadjer</first><last>Khaldi</last></author>
       <author><first>Amine</first><last>Abdaoui</last></author>
-      <author><first>Farah</first><last>Benamara</last></author>
+      <author id="farah-benamara"><first>Farah</first><last>Benamara</last></author>
       <author><first>Grégoire</first><last>Sigel</last></author>
       <author><first>Nathalie</first><last>Aussenac-Gilles</last></author>
       <pages>27–39</pages>
@@ -927,12 +927,12 @@
       <title>Les modèles de langue contextuels Camembert pour le français : impact de la taille et de l’hétérogénéité des données d’entrainement (<fixed-case>C</fixed-case> <fixed-case>AMEM</fixed-case> <fixed-case>BERT</fixed-case> Contextual Language Models for <fixed-case>F</fixed-case>rench: Impact of Training Data Size and Heterogeneity )</title>
       <author><first>Louis</first><last>Martin</last></author>
       <author><first>Benjamin</first><last>Muller</last></author>
-      <author><first>Pedro Javier</first><last>Ortiz Suárez</last></author>
+      <author id="pedro-ortiz-suarez"><first>Pedro Javier</first><last>Ortiz Suárez</last></author>
       <author><first>Yoann</first><last>Dupont</last></author>
-      <author><first>Laurent</first><last>Romary</last></author>
-      <author><first>Éric</first><last>Villemonte de la Clergerie</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
-      <author><first>Djamé</first><last>Seddah</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Éric</first><last>Villemonte de la Clergerie</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
       <pages>54–65</pages>
       <abstract>Les modèles de langue neuronaux contextuels sont désormais omniprésents en traitement automatique des langues. Jusqu’à récemment, la plupart des modèles disponibles ont été entraînés soit sur des données en anglais, soit sur la concaténation de données dans plusieurs langues. L’utilisation pratique de ces modèles — dans toutes les langues sauf l’anglais — était donc limitée. La sortie récente de plusieurs modèles monolingues fondés sur BERT (Devlin et al., 2019), notamment pour le français, a démontré l’intérêt de ces modèles en améliorant l’état de l’art pour toutes les tâches évaluées. Dans cet article, à partir d’expériences menées sur CamemBERT (Martin et al., 2019), nous montrons que l’utilisation de données à haute variabilité est préférable à des données plus uniformes. De façon plus surprenante, nous montrons que l’utilisation d’un ensemble relativement petit de données issues du web (4Go) donne des résultats aussi bons que ceux obtenus à partir d’ensembles de données plus grands de deux ordres de grandeurs (138Go).</abstract>
       <url hash="7a47acde">2020.jeptalnrecital-taln.5</url>
@@ -942,7 +942,7 @@
     <paper id="6">
       <title>Génération automatique de définitions pour le français (Definition Modeling in <fixed-case>F</fixed-case>rench)</title>
       <author><first>Timothee</first><last>Mickus</last></author>
-      <author><first>Mathieu</first><last>Constant</last></author>
+      <author id="matthieu-constant"><first>Mathieu</first><last>Constant</last></author>
       <author><first>Denis</first><last>Paperno</last></author>
       <pages>66–80</pages>
       <abstract>La génération de définitions est une tâche récente qui vise à produire des définitions lexicographiques à partir de plongements lexicaux. Nous remarquons deux lacunes : (i) l’état de l’art actuel ne s’est penché que sur l’anglais et le chinois, et (ii) l’utilisation escomptée en tant que méthode d’évaluation des plongements lexicaux doit encore être vérifiée. Pour y remédier, nous proposons un jeu de données pour la génération de définitions en français, ainsi qu’une évaluation des performances d’un modèle de génération de définitions simple selon les plongements lexicaux fournis en entrée.</abstract>
@@ -954,7 +954,7 @@
       <title>Du bon usage d’ingrédients linguistiques spéciaux pour classer des recettes exceptionnelles (Using Special Linguistic Ingredients to Classify Exceptional Recipes )</title>
       <author><first>Elham</first><last>Mohammadi</last></author>
       <author><first>Louis</first><last>Marceau</last></author>
-      <author><first>Eric</first><last>Charton</last></author>
+      <author id="eric-charton"><first>Eric</first><last>Charton</last></author>
       <author><first>Leila</first><last>Kosseim</last></author>
       <author><first>Luka</first><last>Nerima</last></author>
       <author><first>Marie-Jean</first><last>Meurs</last></author>
@@ -988,7 +988,7 @@
     <paper id="10">
       <title>Impact de la structure logique des documents sur les modèles distributionnels : expérimentations sur le corpus <fixed-case>TALN</fixed-case> (Impact of document structure on distributional semantics models: a case study on <fixed-case>NLP</fixed-case> research articles )</title>
       <author><first>Ludovic</first><last>Tanguy</last></author>
-      <author><first>Cécile</first><last>Fabre</last></author>
+      <author id="cecile-fabre"><first>Cécile</first><last>Fabre</last></author>
       <author><first>Yoann</first><last>Bard</last></author>
       <pages>122–135</pages>
       <abstract>Nous présentons une expérience visant à mesurer en quoi la structure logique d’un document impacte les représentations lexicales dans les modèles de sémantique distributionnelle. En nous basant sur des documents structurés (articles de recherche en TAL) nous comparons des modèles construits sur des corpus obtenus par suppression de certaines parties des textes du corpus : titres de section, résumés, introductions et conclusions. Nous montrons que malgré des différences selon les parties et le lexique pris en compte, ces zones réputées particulièrement informatives du contenu d’un article ont un impact globalement moins significatif que le reste du texte sur la construction du modèle.</abstract>
@@ -999,7 +999,7 @@
     <paper id="11">
       <title>Prédire automatiquement les intentions du locuteur dans des questions issues du discours oral spontané (Automatically predicting the speaker’s intentions in questions from spontaneous oral speech)</title>
       <author><first>Angèle</first><last>Barbedette</last></author>
-      <author><first>Iris</first><last>Eshkol-Taravella</last></author>
+      <author id="iris-eshkol"><first>Iris</first><last>Eshkol-Taravella</last></author>
       <pages>137–145</pages>
       <abstract>Cette étude porte sur la classification automatique des intentions exprimées dans des questions issues d’un corpus d’échanges oraux spontanés. Nous proposons une typologie dans laquelle nous distinguons trois classes d’intentions (AVIS, VOLONTÉ et DOUTE). Après plusieurs prétraitements et ajouts de traits lexicaux aux données (lexiques, nombre de mots et de caractères), nous implémentons un algorithme de classification automatique et nous en présentons et évaluons les résultats qui atteignent une F-mesure de 0,62. Nous proposons ensuite une interprétation de ceux-ci, basée sur une comparaison entre les expériences menées et des mesures liées aux traits linguistiques intégrés avant la tâche de classification.</abstract>
       <url hash="7af651bf">2020.jeptalnrecital-taln.11</url>
@@ -1099,7 +1099,7 @@
     <paper id="21">
       <title>Prédire le niveau de langue d’apprenants d’anglais (Predict the language level for <fixed-case>E</fixed-case>nglish learners)</title>
       <author><first>Natalia</first><last>Grabar</last></author>
-      <author><first>Thierry</first><last>Hamon</last></author>
+      <author id="thierry-hamon"><first>Thierry</first><last>Hamon</last></author>
       <author><first>Bert</first><last>Cappelle</last></author>
       <author><first>Cyril</first><last>Grandin</last></author>
       <author><first>Benoît</first><last>Leclercq</last></author>
@@ -1122,7 +1122,7 @@
     <paper id="23">
       <title>Segmentation automatique en périodes pour le français parlé (Automatic Period Segmentation of Oral <fixed-case>F</fixed-case>rench)</title>
       <author><first>Natalia</first><last>Kalashnikova</last></author>
-      <author><first>Iris</first><last>Eshkol-Taravella</last></author>
+      <author id="iris-eshkol"><first>Iris</first><last>Eshkol-Taravella</last></author>
       <author><first>Loïc</first><last>Grobol</last></author>
       <author><first>François</first><last>Delafontaine</last></author>
       <pages>241–248</pages>
@@ -1134,7 +1134,7 @@
     <paper id="24">
       <title>Les avis sur les restaurants à l’épreuve de l’apprentissage automatique (An Empirical Examination of Online Restaurant Reviews)</title>
       <author><first>Hyun</first><last>Jung Kang</last></author>
-      <author><first>Iris</first><last>Eshkol-Taravella</last></author>
+      <author id="iris-eshkol"><first>Iris</first><last>Eshkol-Taravella</last></author>
       <pages>249–257</pages>
       <abstract>Dans la fouille d’opinions, de nombreuses études portent sur l’extraction automatique des opinions positives ou négatives. Cependant les recherches ayant pour objet la fouille de suggestions et d’intentions sont moins importantes, malgré leur lien profond avec l’opinion. Cet article vise à détecter six catégories (opinion positive/mixte/négative, suggestion, intention, description) dans les avis en ligne sur les restaurants en exploitant deux méthodes : l’apprentissage de surface et l’apprentissage profond supervisés. Les performances obtenues pour chaque catégorie sont interprétées ensuite en tenant compte des spécificités du corpus traité.</abstract>
       <url hash="d06890e9">2020.jeptalnrecital-taln.24</url>
@@ -1160,8 +1160,8 @@
       <author><first>Maximin</first><last>Coavoux</last></author>
       <author><first>Benjamin</first><last>Lecouteux</last></author>
       <author><first>Alexandre</first><last>Allauzen</last></author>
-      <author><first>Benoît</first><last>Crabbé</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="benoit-crabbe"><first>Benoît</first><last>Crabbé</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <author><first>Didier</first><last>Schwab</last></author>
       <pages>268–278</pages>
       <abstract>Les modèles de langue pré-entraînés sont désormais indispensables pour obtenir des résultats à l’état-de-l’art dans de nombreuses tâches du TALN. Tirant avantage de l’énorme quantité de textes bruts disponibles, ils permettent d’extraire des représentations continues des mots, contextualisées au niveau de la phrase. L’efficacité de ces représentations pour résoudre plusieurs tâches de TALN a été démontrée récemment pour l’anglais. Dans cet article, nous présentons et partageons FlauBERT, un ensemble de modèles appris sur un corpus français hétérogène et de taille importante. Des modèles de complexité différente sont entraînés à l’aide du nouveau supercalculateur Jean Zay du CNRS. Nous évaluons nos modèles de langue sur diverses tâches en français (classification de textes, paraphrase, inférence en langage naturel, analyse syntaxique, désambiguïsation automatique) et montrons qu’ils surpassent souvent les autres approches sur le référentiel d’évaluation FLUE également présenté ici.</abstract>
@@ -1173,7 +1173,7 @@
       <title>Relation, es-tu là ? Détection de relations par <fixed-case>LSTM</fixed-case> pour améliorer l’extraction de relations (Relation, are you there ? <fixed-case>LSTM</fixed-case>-based relation detection to improve knowledge extraction )</title>
       <author><first>Cyrielle</first><last>Mallart</last></author>
       <author><first>Michel</first><last>Le Nouy</last></author>
-      <author><first>Guillaume</first><last>Gravier</last></author>
+      <author id="guillaume-gravier"><first>Guillaume</first><last>Gravier</last></author>
       <author><first>Pascale</first><last>Sébillot</last></author>
       <pages>279–287</pages>
       <abstract>De nombreuses méthodes d’extraction et de classification de relations ont été proposées et testées sur des données de référence. Cependant, dans des données réelles, le nombre de relations potentielles est énorme et les heuristiques souvent utilisées pour distinguer de vraies relations de co-occurrences fortuites ne détectent pas les signaux faibles pourtant importants. Dans cet article, nous étudions l’apport d’un modèle de détection de relations, identifiant si un couple d’entités dans une phrase exprime ou non une relation, en tant qu’étape préliminaire à la classification des relations. Notre modèle s’appuie sur le plus court chemin de dépendances entre deux entités, modélisé par un LSTM et combiné avec les types des entités. Sur la tâche de détection de relations, nous obtenons de meilleurs résultats qu’un modèle état de l’art pour la classification de relations, avec une robustesse accrue aux relations inédites. Nous montrons aussi qu’une détection binaire en amont d’un modèle de classification améliore significativement ce dernier.</abstract>
@@ -1185,8 +1185,8 @@
       <title>Analyse automatique en cadres sémantiques pour l’apprentissage de modèles de compréhension de texte (Semantic Frame Parsing for training Machine Reading Comprehension models)</title>
       <author><first>Gabriel</first><last>Marzinotto</last></author>
       <author><first>Delphine</first><last>Charlet</last></author>
-      <author><first>Géraldine</first><last>Damnati</last></author>
-      <author><first>Frédéric</first><last>Béchet</last></author>
+      <author id="geraldine-damnati"><first>Géraldine</first><last>Damnati</last></author>
+      <author id="frederic-bechet"><first>Frédéric</first><last>Béchet</last></author>
       <pages>288–295</pages>
       <abstract>Dans le cadre de la compréhension automatique de documents, cet article propose une évaluation intrinsèque et extrinsèque d’un modèle d’analyse automatique en cadres sémantiques (Frames). Le modèle proposé est un modèle état de l’art à base de GRU bi-directionnel, enrichi par l’utilisation d’embeddings contextuels. Nous montrons qu’un modèle de compréhension de documents appris sur un corpus de triplets générés à partir d’un corpus analysé automatiquement avec l’analyseur en cadre sémantique présente des performances inférieures de seulement 2.5% en relatif par rapport à un modèle appris sur un corpus de triplets générés à partir d’un corpus analysé manuellement.</abstract>
       <url hash="bba60147">2020.jeptalnrecital-taln.28</url>
@@ -1198,7 +1198,7 @@
       <author><first>Mohamed</first><last>Amine Menacer</last></author>
       <author><first>Karima</first><last>Abidi</last></author>
       <author><first>Nouha</first><last>Othman</last></author>
-      <author><first>Kamel</first><last>Smaïli</last></author>
+      <author id="kamel-smaili"><first>Kamel</first><last>Smaïli</last></author>
       <pages>296–304</pages>
       <abstract>La plupart des travaux existant sur l’analyse de sentiments traitent l’arabe standard moderne et ne prennent pas en considération les spécificités de l’arabe dialectal. Cet article présente un système d’analyse de sentiments de textes extraits de vidéos exprimées en dialecte algérien. Dans ce travail, nous avons deux défis à surmonter, la reconnaissance automatique de la parole pour le dialecte algérien et l’analyse de sentiments du texte reconnu. Le développement du système de reconnaissance automatique de la parole est basé sur un corpus oral restreint. Pour pallier le manque de données, nous proposons d’exploiter des données ayant un impact sur le dialecte algérien, à savoir l’arabe standard et le français. L’analyse de sentiments est fondée sur la détection automatique de la polarité des mots en fonction de leur proximité sémantique avec d’autres mots ayant une polarité prédéterminée.</abstract>
       <url hash="0b007a88">2020.jeptalnrecital-taln.29</url>
@@ -1229,7 +1229,7 @@
     <paper id="32">
       <title>Identification des problèmes d’annotation pour l’extraction de relations (Identification of annotation problem for the relation extraction)</title>
       <author><first>Tsanta</first><last>Randriatsitohaina</last></author>
-      <author><first>Thierry</first><last>Hamon</last></author>
+      <author id="thierry-hamon"><first>Thierry</first><last>Hamon</last></author>
       <pages>323–331</pages>
       <abstract>L’annotation d’un corpus est une tâche difficile et laborieuse, notamment sur des textes de spécialité comme les textes biomédicaux. Ainsi, dans un contexte comme l’extraction des interactions alimentmédicament (FDI), l’annotation du corpus POMELO a été réalisée par un seul annotateur et présente des risques d’erreur. Dans cet article, nous proposons d’identifier ces problèmes d’annotation en utilisant un corpus Silver Standard (CSS) que nous établissons à partir d’un vote majoritaire parmi les annotations proposées par des modèles entraînés sur un domaine similaire (interaction médicamentmédicament – DDI) et l’annotation manuelle à évaluer. Les résultats obtenus montrent que l’annotation dans POMELO est considérablement éloignée du CSS. L’analyse des erreurs permet d’en identifier les principales causes et de proposer des solutions pour corriger l’annotation existante.</abstract>
       <url hash="85464fed">2020.jeptalnrecital-taln.32</url>
@@ -1242,7 +1242,7 @@
       <author><first>Anne-Laure</first><last>Ligozat</last></author>
       <author><first>Francois</first><last>Yvon</last></author>
       <author><first>Gabriel</first><last>Illouz</last></author>
-      <author><first>Thierry</first><last>Hamon</last></author>
+      <author id="thierry-hamon"><first>Thierry</first><last>Hamon</last></author>
       <pages>332–341</pages>
       <abstract>La simplification de textes a émergé comme un sous-domaine actif du traitement automatique des langues, du fait des problèmes pratiques et théoriques qu’elle permet d’aborder, ainsi que de ses nombreuses applications pratiques. Des corpus de simplification sont nécessaires pour entrainer des systèmes de simplification automatique ; ces ressources sont toutefois rares et n’existent que pour un petit nombre de langues. Nous montrons ici que dans un contexte où les ressources pour la simplification sont rares, il reste néanmoins possible de construire des systèmes de simplification, en ayant recours à des corpus synthétiques, par exemple obtenus par traduction automatique, et nous évaluons diverses manières de les constituer.</abstract>
       <url hash="b0817474">2020.jeptalnrecital-taln.33</url>
@@ -1253,7 +1253,7 @@
       <title>Représentation sémantique des familles dérivationnelles au moyen de frames morphosémantiques (Semantic representation of derivational families by means of morphosemantic frames )</title>
       <author><first>Daniele</first><last>Sanacore</last></author>
       <author><first>Nabil</first><last>Hathout</last></author>
-      <author><first>Fiammetta</first><last>Namer</last></author>
+      <author id="fiammetta-namer"><first>Fiammetta</first><last>Namer</last></author>
       <pages>342–350</pages>
       <abstract>L’article présente un formalisme de représentation des relations morphologiques dérivationnelles inspiré de la Sémantique des Frames. La description morphosémantique y est réalisée au niveau des familles dérivationnelles au moyen de frames morphosémantiques dans lesquels les lexèmes sont définis les uns relativement aux autres. Les frames morphosémantiques permettent de rendre compte de la structure paradigmatique du lexique morphologique par l’alignement des familles qui présentent les mêmes oppositions de sens. La seconde partie de l’article est consacrée aux données qui seront utilisées pour produire (semi-) automatiquement ces représentations.</abstract>
       <url hash="aff86f9b">2020.jeptalnrecital-taln.34</url>
@@ -1264,7 +1264,7 @@
       <title>Modèle neuronal pour la résolution de la coréférence dans les dossiers médicaux électroniques (Neural approach for coreference resolution in electronic health records )</title>
       <author><first>Julien</first><last>Tourille</last></author>
       <author><first>Olivier</first><last>Ferret</last></author>
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <author><first>Xavier</first><last>Tannier</last></author>
       <pages>351–360</pages>
       <abstract>La résolution de la coréférence est un élément essentiel pour la constitution automatique de chronologies médicales à partir des dossiers médicaux électroniques. Dans ce travail, nous présentons une approche neuronale pour la résolution de la coréférence dans des textes médicaux écrits en anglais pour les entités générales et cliniques en nous évaluant dans le cadre de référence pour cette tâche que constitue la tâche 1C de la campagne i2b2 2011.</abstract>
@@ -1275,7 +1275,7 @@
     <paper id="36">
       <title>Un corpus d’évaluation pour un système de simplification discursive (An Evaluation Corpus for Automatic Discourse Simplification)</title>
       <author><first>Rodrigo</first><last>Wilkens</last></author>
-      <author><first>Amalia</first><last>Todirascu</last></author>
+      <author id="amalia-todirascu"><first>Amalia</first><last>Todirascu</last></author>
       <pages>361–369</pages>
       <abstract>Nous présentons un nouveau corpus simplifié, disponible en français pour l’évaluation d’un système de simplification discursive. Ce système utilise des chaînes de référence pour simplifier et pour préserver la cohésion textuelle après simplification. Nous présentons la méthodologie de collecte de corpus (via un formulaire, qui recueille les simplifications manuelles faites par des participants experts), les règles présentées dans le guide, une analyse des types de simplifications et une évaluation de notre corpus, par comparaison avec la sortie du système de simplification automatique.</abstract>
       <url hash="3a27673a">2020.jeptalnrecital-taln.36</url>
@@ -1482,7 +1482,7 @@
     </frontmatter>
     <paper id="1">
       <title>Démo de <fixed-case>AMALD</fixed-case>-serveur et <fixed-case>AMALD</fixed-case>-corpus, dédiés à l’analyse morphologique de l’allemand (Demonstration of <fixed-case>AMALD</fixed-case>-serveur and <fixed-case>AMALD</fixed-case>-corpus, dedicated to the morphological analysis of <fixed-case>G</fixed-case>erman)</title>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <author><first>Vincent</first><last>Berment</last></author>
       <author><first>Jean-Philippe</first><last>Guilbaud</last></author>
       <author><first>Claire</first><last>Lemaire</last></author>
@@ -1521,7 +1521,7 @@
       <author><first>Emmanuelle</first><last>Dusserre</last></author>
       <author><first>Ruslan</first><last>Kalitvianski</last></author>
       <author><first>Mathieu</first><last>Ruhlmann</last></author>
-      <author><first>Muntsa</first><last>Padró</last></author>
+      <author id="muntsa-padro"><first>Muntsa</first><last>Padró</last></author>
       <pages>14–17</pages>
       <abstract>Dans cet article, nous présentons la mise en œuvre d’une chaîne de traitement sémantique complète dédiée aux conversations audio issues de centres d’appel téléphoniques, depuis la phase de transcription automatique jusqu’à l’exploitation des résultats, en passant par l’étape d’analyse sémantique des énoncés. Nous décrivons ici le fonctionnement des différentes analyses que notre équipe développe, ainsi que la plateforme interactive permettant de restituer les résultats agrégés de toutes les conversations analysées.</abstract>
       <url hash="bc5088f9">2020.jeptalnrecital-demos.4</url>
@@ -1550,7 +1550,7 @@
       <author><first>Damien</first><last>Lolive</last></author>
       <author><first>Gwénolé</first><last>Lecorvé</last></author>
       <author><first>Jonathan</first><last>Chevelu</last></author>
-      <author><first>Sébastien</first><last>Le Maguer</last></author>
+      <author id="sebastien-le-maguer"><first>Sébastien</first><last>Le Maguer</last></author>
       <pages>22–25</pages>
       <abstract>Nous présentons FlexEval, un outil de conception et déploiement de tests perceptifs multimédias sous la forme d’un site web léger. S’appuyant sur des technologies standards et ouvertes du web, notamment le framework Flask, FlexEval offre une grande souplesse de conception, des gages de pérennité, ainsi que le support de communautés actives d’utilisateurs. L’application est disponible en open-source via le dépôt Git <url>https://gitlab.inria.fr/expression/tools/flexeval</url>.</abstract>
       <url hash="7c88b397">2020.jeptalnrecital-demos.6</url>
@@ -1560,7 +1560,7 @@
     <paper id="7">
       <title>Vers une analyse automatique de la perception relative à un lieu (Towards an Automatic Analysis of Place Perception)</title>
       <author><first>Hélène</first><last>Flamein</last></author>
-      <author><first>Iris</first><last>Eshkol-Taravella</last></author>
+      <author id="iris-eshkol"><first>Iris</first><last>Eshkol-Taravella</last></author>
       <pages>26–29</pages>
       <abstract>Le travail présenté s’intéresse à la perception qu’ont les habitants de leur ville en se fondant sur un corpus de conversations orales spontanées. La chaîne de traitement conditionnant l’analyse de la perception se décompose en trois étapes : la détection des noms de lieux, l’analyse de la perception identifiée et la visualisation cartographique des informations extraites.</abstract>
       <url hash="a37e1599">2020.jeptalnrecital-demos.7</url>
@@ -1627,7 +1627,7 @@
     <paper id="13">
       <title>Conception d’un système de détection d’intention pour un moteur de recherche sur <fixed-case>I</fixed-case>nternet (Designing a User Intention Detection system for a Web Search Engine)</title>
       <author><first>Estelle</first><last>Maudet</last></author>
-      <author><first>Christophe</first><last>Servan</last></author>
+      <author id="christophe-servan"><first>Christophe</first><last>Servan</last></author>
       <pages>50–52</pages>
       <abstract>Dans les moteurs de recherche sur Internet, l’une des tâches les plus importantes vise à identifier l’intention de l’utilisateur. Cet article présente notre étude pour proposer un nouveau système de détection d’intention pour le moteur de recherche sur Internet Qwant. Des logs de clic au système de détection d’intention, l’ensemble du processus est expliqué, y compris les contraintes industrielles qui ont dû être prises en compte. Une analyse manuelle des données groupées a d’abord été appliquée sur les journaux afin de mieux comprendre les objectifs de l’utilisateur et de choisir les catégories d’intention pertinentes. Lorsque la recherche satisfait aux contraintes industrielles, il faut faire des choix architecturaux et faire des concessions. Cet article explique les contraintes et les résultats obtenus pour ce nouveau système en ligne.</abstract>
       <url hash="05b7e20a">2020.jeptalnrecital-demos.13</url>
@@ -1641,7 +1641,7 @@
       <author><first>Denis</first><last>Jouvet</last></author>
       <author><first>Karima</first><last>Abidi</last></author>
       <author><first>David</first><last>Langlois</last></author>
-      <author><first>Kamel</first><last>Smaïli</last></author>
+      <author id="kamel-smaili"><first>Kamel</first><last>Smaïli</last></author>
       <pages>53–56</pages>
       <abstract>La démonstration de résumé et de traduction automatique de vidéos résulte de nos travaux dans le projet AMIS. L’objectif du projet était d’aider un voyageur à comprendre les nouvelles dans un pays étranger. Pour cela, le projet propose de résumer et traduire automatiquement une vidéo en langue étrangère (ici, l’arabe). Un autre objectif du projet était aussi de comparer les opinions et sentiments exprimés dans plusieurs vidéos comparables. La démonstration porte sur l’aspect résumé, transcription et traduction. Les exemples montrés permettront de comprendre et mesurer qualitativement les résultats du projet.</abstract>
       <url hash="2059bd51">2020.jeptalnrecital-demos.14</url>
@@ -1651,7 +1651,7 @@
     <paper id="15">
       <title>La résolution d’anaphores au-delà de la frontière de la phrase (The Anaphora Resolution Beyond Sentence Boundary)</title>
       <author><first>Luka</first><last>Nerima</last></author>
-      <author><first>Eric</first><last>Wehrli</last></author>
+      <author id="eric-wehrli"><first>Eric</first><last>Wehrli</last></author>
       <pages>57–59</pages>
       <abstract>Cette démonstration présente une extension de nos outils d’analyse syntaxique et d’étiquetage morphosyntaxique qui prend en compte la résolution d’anaphores pronominales non seulement à l’intérieur d’une phrase, mais également si l’antécédent se trouve dans la phrase précédente. Autant l’analyseur que l’étiqueteur effectuant une analyse syntaxique complète des phrases, ces outils affichent également les fonctions grammaticales des constituants (sujet, objet direct, etc.) et les arguments des verbes. Une version de cette démonstration est disponible sur le Web.</abstract>
       <url hash="0cd68df2">2020.jeptalnrecital-demos.15</url>
@@ -1661,7 +1661,7 @@
     <paper id="16">
       <title>Spiderlex et compagnie (Spiderlex &amp; Co)</title>
       <author><first>Sandrine</first><last>Ollinger</last></author>
-      <author><first>Alain</first><last>Polguère</last></author>
+      <author id="alain-polguere"><first>Alain</first><last>Polguère</last></author>
       <author><first>Yannick</first><last>Chudy</last></author>
       <author><first>Bruno</first><last>Gaume</last></author>
       <pages>60–63</pages>
@@ -1693,9 +1693,9 @@
     <paper id="19">
       <title>Analyse sémantique robuste par apprentissage antagoniste pour la généralisation de domaine (Robust Semantic Parsing with Adversarial Learning for Domain Generalization )</title>
       <author><first>Gabriel</first><last>Marzinotto</last></author>
-      <author><first>Géraldine</first><last>Damnati</last></author>
-      <author><first>Frédéric</first><last>Béchet</last></author>
-      <author><first>Benoît</first><last>Favre</last></author>
+      <author id="geraldine-damnati"><first>Géraldine</first><last>Damnati</last></author>
+      <author id="frederic-bechet"><first>Frédéric</first><last>Béchet</last></author>
+      <author id="benoit-favre"><first>Benoît</first><last>Favre</last></author>
       <pages>71–72</pages>
       <abstract>Nous présentons des résumés en français et en anglais de l’article (Marzinotto et al., 2019) présenté à la conférence North American Chapter of the Association for Computational Linguistics : Human Language Technologies en 2019.</abstract>
       <url hash="44fa1b55">2020.jeptalnrecital-demos.19</url>
@@ -1709,7 +1709,7 @@
       <editor><first>Rémi</first><last>Cardon</last></editor>
       <editor><first>Natalia</first><last>Grabar</last></editor>
       <editor><first>Cyril</first><last>Grouin</last></editor>
-      <editor><first>Thierry</first><last>Hamon</last></editor>
+      <editor id="thierry-hamon"><first>Thierry</first><last>Hamon</last></editor>
       <publisher>ATALA et AFCP</publisher>
       <address>Nancy, France</address>
       <month>6</month>
@@ -1737,7 +1737,7 @@
       <author><first>Davide</first><last>Buscaldi</last></author>
       <author><first>Ghazi</first><last>Felhi</last></author>
       <author><first>Dhaou</first><last>Ghoul</last></author>
-      <author><first>Joseph</first><last>Le Roux</last></author>
+      <author id="joseph-le-roux"><first>Joseph</first><last>Le Roux</last></author>
       <author><first>Gaël</first><last>Lejeune</last></author>
       <author><first>Xudong</first><last>Zhang</last></author>
       <pages>14–25</pages>
@@ -1858,7 +1858,7 @@
   <volume id="eternal" ingest-date="2020-08-14" type="proceedings">
     <meta>
       <booktitle>Actes de la 6e conférence conjointe Journées d'Études sur la Parole (JEP, 33e édition), Traitement Automatique des Langues Naturelles (TALN, 27e édition), Rencontre des Étudiants Chercheurs en Informatique pour le Traitement Automatique des Langues (RÉCITAL, 22e édition). 2e atelier Éthique et TRaitemeNt Automatique des Langues (ETeRNAL)</booktitle>
-      <editor><first>Gilles</first><last>Adda</last></editor>
+      <editor id="gilles-adda"><first>Gilles</first><last>Adda</last></editor>
       <editor><first>Maxime</first><last>Amblard</last></editor>
       <editor><first>Karën</first><last>Fort</last></editor>
       <publisher>ATALA et AFCP</publisher>
@@ -1875,7 +1875,7 @@
       <title>Pratiques d’évaluation en <fixed-case>ASR</fixed-case> et biais de performance (Evaluation methodology in <fixed-case>ASR</fixed-case> and performance bias)</title>
       <author><first>Mahault</first><last>Garnerin</last></author>
       <author><first>Solange</first><last>Rossato</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <pages>1–9</pages>
       <abstract>Nous proposons une réflexion sur les pratiques d’évaluation des systèmes de reconnaissance automatique de la parole (ASR). Après avoir défini la notion de discrimination d’un point de vue légal et la notion d’équité dans les systèmes d’intelligence artificielle, nous nous intéressons aux pratiques actuelles lors des grandes campagnes d’évaluation. Nous observons que la variabilité de la parole et plus particulièrement celle de l’individu n’est pas prise en compte dans les protocoles d’évaluation actuels rendant impossible l’étude de biais potentiels dans les systèmes.</abstract>
       <url hash="401b3477">2020.jeptalnrecital-eternal.1</url>
@@ -1918,7 +1918,7 @@
     </paper>
     <paper id="5">
       <title>1990-2020 : retours sur 30 ans d’échanges autour de l’identification de voix en milieu judiciaire (1990-2020: A look back at 30 years of discussions on voice identification in the judicial system)</title>
-      <author><first>Jean-Francois</first><last>Bonastre</last></author>
+      <author id="jean-francois-bonastre"><first>Jean-Francois</first><last>Bonastre</last></author>
       <pages>38–47</pages>
       <abstract>Des enregistrements de voix se trouvent de plus en plus souvent au cœur d’affaires judiciaires importantes, notamment de par l’essor de la téléphonie mobile. La justice demande à ce que des expertises en identification de voix soient réalisées alors que dans le même temps, la pertinence scientifique de telles expertises est fortement mise en cause par les scientifiques. Ainsi, dès 1990, les chercheurs en communication parlée réunis dans le GFCP, devenu depuis AFCP, ont voté une motion affirmant que « l’identification d’un individu par sa voix est à l’heure actuelle un problème à sa connaissance non résolu ». Cette motion est toujours en vigueur, après avoir été réaffirmée en 1997 et renforcée par une pétition en 2002. Malgré cela, des expertises judiciaires en identification de voix sont réalisées en France chaque année. Cet article revient sur les actions menées par le GFCP et l’AFCP depuis la motion initiale jusqu’aux actions contemporaines. Il se propose d’évaluer les répercussions de ces actions, tant au niveau de la Justice qu’au niveau académique.</abstract>
       <url hash="93e06ad4">2020.jeptalnrecital-eternal.5</url>
diff --git a/data/xml/2020.knlp.xml b/data/xml/2020.knlp.xml
index c44b2472b3..aa17def63f 100644
--- a/data/xml/2020.knlp.xml
+++ b/data/xml/2020.knlp.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of Knowledgeable NLP: the First Workshop on Integrating Structured Knowledge and Neural Networks for NLP</booktitle>
       <editor><first>Oren Sar</first><last>Shalom</last></editor>
       <editor><first>Alexander</first><last>Panchenko</last></editor>
-      <editor><first>Cicero</first><last>dos Santos</last></editor>
+      <editor id="cicero-dos-santos"><first>Cicero</first><last>dos Santos</last></editor>
       <editor><first>Varvara</first><last>Logacheva</last></editor>
       <editor><first>Alessandro</first><last>Moschitti</last></editor>
       <editor><first>Ido</first><last>Dagan</last></editor>
diff --git a/data/xml/2020.lantern.xml b/data/xml/2020.lantern.xml
index 920be490f0..34d31c41cc 100644
--- a/data/xml/2020.lantern.xml
+++ b/data/xml/2020.lantern.xml
@@ -6,7 +6,7 @@
       <editor><first>Aditya</first><last>Mogadala</last></editor>
       <editor><first>Sandro</first><last>Pezzelle</last></editor>
       <editor><first>Dietrich</first><last>Klakow</last></editor>
-      <editor><first>Marie-Francine</first><last>Moens</last></editor>
+      <editor id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></editor>
       <editor><first>Zeynep</first><last>Akata</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Barcelona, Spain</address>
diff --git a/data/xml/2020.latechclfl.xml b/data/xml/2020.latechclfl.xml
index b121b1b4b4..c703dcde16 100644
--- a/data/xml/2020.latechclfl.xml
+++ b/data/xml/2020.latechclfl.xml
@@ -6,7 +6,7 @@
       <editor><first>Stefania</first><last>DeGaetano</last></editor>
       <editor><first>Anna</first><last>Kazantseva</last></editor>
       <editor><first>Nils</first><last>Reiter</last></editor>
-      <editor><first>Stan</first><last>Szpakowicz</last></editor>
+      <editor id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></editor>
       <publisher>International Committee on Computational Linguistics</publisher>
       <address>Online</address>
       <month>December</month>
@@ -55,7 +55,7 @@
     <paper id="5">
       <title>Neural Machine Translation of Artwork Titles Using Iconclass Codes</title>
       <author><first>Nikolay</first><last>Banar</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <author><first>Mike</first><last>Kestemont</last></author>
       <pages>42–51</pages>
       <abstract>We investigate the use of Iconclass in the context of neural machine translation for NL&lt;-&gt;EN artwork titles. Iconclass is a widely used iconographic classification system used in the cultural heritage domain to describe and retrieve subjects represented in the visual arts. The resource contains keywords and definitions to encode the presence of objects, people, events and ideas depicted in artworks, such as paintings. We propose a simple concatenation approach that improves the quality of automatically generated title translations for artworks, by leveraging textual information extracted from Iconclass. Our results demonstrate that a neural machine translation system is able to exploit this metadata to boost the translation performance of artwork titles. This technology enables interesting applications of machine learning in resource-scarce domains in the cultural sector.</abstract>
@@ -82,7 +82,7 @@
     </paper>
     <paper id="8">
       <title>Vital Records: Uncover the past from historical handwritten records</title>
-      <author><first>Herve</first><last>Dejean</last></author>
+      <author id="herve-dejean"><first>Herve</first><last>Dejean</last></author>
       <author><first>Jean-Luc</first><last>Meunier</last></author>
       <pages>69–73</pages>
       <abstract>We present Vital Records, a demonstrator based on deep-learning approaches to handwritten-text recognition, table processing and information extraction, which enables data from century-old documents to be parsed and analysed, making it possible to explore death records in space and time. This demonstrator provides a user interface for browsing and visualising data extracted from 80,000 handwritten pages of tabular data.</abstract>
@@ -121,7 +121,7 @@
     <paper id="12">
       <title>Zero-shot cross-lingual identification of direct speech using distant supervision</title>
       <author><first>Murathan</first><last>Kurfalı</last></author>
-      <author><first>Mats</first><last>Wirén</last></author>
+      <author id="mats-wiren"><first>Mats</first><last>Wirén</last></author>
       <pages>105–111</pages>
       <abstract>Prose fiction typically consists of passages alternating between the narrator’s telling of the story and the characters’ direct speech in that story. Detecting direct speech is crucial for the downstream analysis of narrative structure, and may seem easy at first thanks to quotation marks. However, typographical conventions vary across languages, and as a result, almost all approaches to this problem have been monolingual. In contrast, the aim of this paper is to provide a multilingual method for identifying direct speech. To this end, we created a training corpus by using a set of heuristics to automatically find texts where quotation marks appear sufficiently consistently. We then removed the quotation marks and developed a sequence classifier based on multilingual-BERT which classifies each token as belonging to narration or speech. Crucially, by training the classifier with the quotation marks removed, it was forced to learn the linguistic characteristics of direct speech rather than the typography of quotation marks. The results in the zero-shot setting of the proposed model are comparable to the strong supervised baselines, indicating that this is a feasible approach.</abstract>
       <url hash="8b4e07e3">2020.latechclfl-1.12</url>
@@ -211,7 +211,7 @@
       <author><first>Amel</first><last>Fraisse</last></author>
       <author><first>Ronald</first><last>Jenn</last></author>
       <author><first>Shelley Fisher</first><last>Fishkin</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <pages>167–171</pages>
       <abstract>TL-Explorer is a digital humanities tool for mapping and analyzing translated literature, encompassing the World Map and the Translation Dashboard. The World Map displays collected literature of different languages, locations, and cultures and establishes the foundation for further analysis. It comprises three global maps for spatial and temporal interpretation. A further investigation into an individual point on the map leads to the Translation Dashboard. Each point represents one edition or translation. Collected translations are processed in order to build multilingual parallel corpora for a large number of under-resourced languages as well as to highlight the transnational circulation of knowledge. Our first rendition of TL-Explorer was conducted on the well-traveled American novel, Adventures of Huckleberry Finn, by Mark Twain. The maps currently chronicle nearly 400 translations of this novel. And the dashboard supports over 30 collected translations. However, the TL-Explore is easily extended to other works of literature and is not limited to type of texts, such as academic manuscripts or constitutional documents to name a few.</abstract>
       <url hash="9f9cf9fc">2020.latechclfl-1.20</url>
diff --git a/data/xml/2020.law.xml b/data/xml/2020.law.xml
index 6ad7c3552f..6295620d0d 100644
--- a/data/xml/2020.law.xml
+++ b/data/xml/2020.law.xml
@@ -144,7 +144,7 @@
       <author><first>Jie</first><last>Chi</last></author>
       <author><first>Tom</first><last>Hosking</last></author>
       <author><first>Nina</first><last>Markl</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <pages>138–147</pages>
       <abstract>Multi-sentence questions (MSQs) are sequences of questions connected by relations which, unlike sequences of standalone questions, need to be answered as a unit. Following Rhetorical Structure Theory (RST), we recognise that different “question discourse relations” between the subparts of MSQs reflect different speaker intents, and consequently elicit different answering strategies. Correctly identifying these relations is therefore a crucial step in automatically answering MSQs. We identify five different types of MSQs in English, and define five novel relations to describe them. We extract over 162,000 MSQs from Stack Exchange to enable future research. Finally, we implement a high-precision baseline classifier based on surface features.</abstract>
       <url hash="28023456">2020.law-1.13</url>
@@ -153,7 +153,7 @@
     <paper id="14">
       <title>Annotating Errors and Emotions in Human-Chatbot Interactions in <fixed-case>I</fixed-case>talian</title>
       <author><first>Manuela</first><last>Sanguinetti</last></author>
-      <author><first>Alessandro</first><last>Mazzei</last></author>
+      <author id="alessandro-mazzei"><first>Alessandro</first><last>Mazzei</last></author>
       <author><first>Viviana</first><last>Patti</last></author>
       <author><first>Marco</first><last>Scalerandi</last></author>
       <author><first>Dario</first><last>Mana</last></author>
@@ -176,7 +176,7 @@
     </paper>
     <paper id="16">
       <title>py<fixed-case>MMAX</fixed-case>2: Deep Access to <fixed-case>MMAX</fixed-case>2 Projects from Python</title>
-      <author><first>Mark-Christoph</first><last>Müller</last></author>
+      <author id="mark-christoph-muller"><first>Mark-Christoph</first><last>Müller</last></author>
       <pages>167–173</pages>
       <abstract>pyMMAX2 is an API for processing MMAX2 stand-off annotation data in Python. It provides a lightweight basis for the development of code which opens up the Java- and XML-based ecosystem of MMAX2 for more recent, Python-based NLP and data science methods. While pyMMAX2 is pure Python, and most functionality is implemented from scratch, the API re-uses the complex implementation of the essential business logic for MMAX2 annotation schemes by interfacing with the original MMAX2 Java libraries. pyMMAX2 is available for download at <url>http://github.com/nlpAThits/pyMMAX2</url>.</abstract>
       <url hash="fe723a9c">2020.law-1.16</url>
diff --git a/data/xml/2020.ldl.xml b/data/xml/2020.ldl.xml
index b1362afc7f..eab419d11e 100644
--- a/data/xml/2020.ldl.xml
+++ b/data/xml/2020.ldl.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the 7th Workshop on Linked Data in Linguistics (LDL-2020)</booktitle>
       <editor><first>Maxim</first><last>Ionov</last></editor>
-      <editor><first>John P.</first><last>McCrae</last></editor>
+      <editor id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></editor>
       <editor><first>Christian</first><last>Chiarcos</last></editor>
       <editor><first>Thierry</first><last>Declerck</last></editor>
       <editor><first>Julia</first><last>Bosque-Gil</last></editor>
@@ -43,7 +43,7 @@
     </paper>
     <paper id="3">
       <title>Representing Temporal Information in Lexical Linked Data Resources</title>
-      <author><first>Fahad</first><last>Khan</last></author>
+      <author id="fahad-khan"><first>Fahad</first><last>Khan</last></author>
       <pages>15–22</pages>
       <abstract>The increasing recognition of the utility of Linked Data as a means of publishing lexical resource has helped to underline the need for RDF based data models which have the flexibility and expressivity to be able to represent the most salient kinds of information contained in such resources as structured data, including, notably, information relating to time and the temporal dimension. In this article we describe a perdurantist approach to modelling diachronic lexical information which builds upon work which we have previously presented and which is based on the ontolex-lemon vocabulary. We present two extended examples, one taken from the Oxford English Dictionary, the other from a work on etymology, to show how our approach can handle different kinds of temporal information often found in lexical resources.</abstract>
       <url hash="3f30f4f1">2020.ldl-1.3</url>
@@ -66,7 +66,7 @@
     <paper id="5">
       <title>Terme-à-<fixed-case>LLOD</fixed-case>: Simplifying the Conversion and Hosting of Terminological Resources as Linked Data</title>
       <author><first>Maria Pia</first><last>di Buono</last></author>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <author><first>Mohammad Fazleh</first><last>Elahi</last></author>
       <author><first>Frank</first><last>Grimm</last></author>
       <pages>28–35</pages>
@@ -121,7 +121,7 @@
     <paper id="10">
       <title>Involving Lexicographers in the <fixed-case>LLOD</fixed-case> Cloud with <fixed-case>L</fixed-case>ex<fixed-case>O</fixed-case>, an Easy-to-use Editor of Lemon Lexical Resources</title>
       <author><first>Andrea</first><last>Bellandi</last></author>
-      <author><first>Emiliano</first><last>Giovannetti</last></author>
+      <author id="emiliano-giovannetti"><first>Emiliano</first><last>Giovannetti</last></author>
       <pages>70–74</pages>
       <abstract>In this contribution, we show LexO, a user-friendly web collaborative editor of lexical resources based on the lemon model. LexO has been developed in the context of Digital Humanities projects, in which a key point in the design of an editor was the ease of use by lexicographers with no skill in Linked Data or Semantic Web technologies. Though the tool already allows creating a lemon lexicon from scratch and lets a team of users work on it collaboratively, many developments are possible. The involvement of the LLOD community appears now crucial both to find new users and application fields where to test it, and, even more importantly, to understand in which way it should evolve.</abstract>
       <url hash="1de0e9bc">2020.ldl-1.10</url>
diff --git a/data/xml/2020.lifelongnlp.xml b/data/xml/2020.lifelongnlp.xml
index 8d2f4bbe77..e10a3ceab8 100644
--- a/data/xml/2020.lifelongnlp.xml
+++ b/data/xml/2020.lifelongnlp.xml
@@ -4,9 +4,9 @@
     <meta>
       <booktitle>Proceedings of the 2nd Workshop on Life-long Learning for Spoken Language Systems</booktitle>
       <editor><first>William M.</first><last>Campbell</last></editor>
-      <editor><first>Alex</first><last>Waibel</last></editor>
-      <editor><first>Dilek</first><last>Hakkani-Tur</last></editor>
-      <editor><first>Timothy J.</first><last>Hazen</last></editor>
+      <editor id="alex-waibel"><first>Alex</first><last>Waibel</last></editor>
+      <editor id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></editor>
+      <editor id="timothy-j-hazen"><first>Timothy J.</first><last>Hazen</last></editor>
       <editor><first>Kevin</first><last>Kilgour</last></editor>
       <editor><first>Eunah</first><last>Cho</last></editor>
       <editor><first>Varun</first><last>Kumar</last></editor>
@@ -38,7 +38,7 @@
       <author><first>Juan</first><last>Hussain</last></author>
       <author><first>Tuan-Nam</first><last>Nguyen</last></author>
       <author><first>Kaihang</first><last>Song</last></author>
-      <author><first>Sebastian</first><last>Stüker</last></author>
+      <author id="sebastian-stuker"><first>Sebastian</first><last>Stüker</last></author>
       <author><first>Alexander</first><last>Waibel</last></author>
       <pages>9–17</pages>
       <abstract>When training speech recognition systems, one often faces the situation that sufficient amounts of training data for the language in question are available but only small amounts of data for the domain in question. This problem is even bigger for end-to-end speech recognition systems that only accept transcribed speech as training data, which is harder and more expensive to obtain than text data. In this paper we present experiments in adapting end-to-end speech recognition systems by a method which is called batch-weighting and which we contrast against regular fine-tuning, i.e., to continue to train existing neural speech recognition models on adaptation data. We perform experiments using theses techniques in adapting to topic, accent and vocabulary, showing that batch-weighting consistently outperforms fine-tuning. In order to show the generalization capabilities of batch-weighting we perform experiments in several languages, i.e., Arabic, English and German. Due to its relatively small computational requirements batch-weighting is a suitable technique for supervised life-long learning during the life-time of a speech recognition system, e.g., from user corrections.</abstract>
diff --git a/data/xml/2020.lincr.xml b/data/xml/2020.lincr.xml
index 7d40559328..33915d6adf 100644
--- a/data/xml/2020.lincr.xml
+++ b/data/xml/2020.lincr.xml
@@ -80,7 +80,7 @@
       <title>The Little Prince in 26 Languages: Towards a Multilingual Neuro-Cognitive Corpus</title>
       <author><first>Sabrina</first><last>Stehwien</last></author>
       <author><first>Lena</first><last>Henke</last></author>
-      <author><first>John</first><last>Hale</last></author>
+      <author id="john-hale"><first>John</first><last>Hale</last></author>
       <author><first>Jonathan</first><last>Brennan</last></author>
       <author><first>Lars</first><last>Meyer</last></author>
       <pages>43–49</pages>
diff --git a/data/xml/2020.loresmt.xml b/data/xml/2020.loresmt.xml
index be4ed4be79..aacfa10d52 100644
--- a/data/xml/2020.loresmt.xml
+++ b/data/xml/2020.loresmt.xml
@@ -4,14 +4,14 @@
     <meta>
       <booktitle>Proceedings of the 3rd Workshop on Technologies for MT of Low Resource Languages</booktitle>
       <editor><first>Alina</first><last>Karakanta</last></editor>
-      <editor><first>Atul Kr.</first><last>Ojha</last></editor>
+      <editor id="atul-kr-ojha"><first>Atul Kr.</first><last>Ojha</last></editor>
       <editor><first>Chao-Hong</first><last>Liu</last></editor>
       <editor><first>Jade</first><last>Abbott</last></editor>
       <editor><first>John</first><last>Ortega</last></editor>
-      <editor><first>Jonathan</first><last>Washington</last></editor>
+      <editor id="jonathan-washington"><first>Jonathan</first><last>Washington</last></editor>
       <editor><first>Nathaniel</first><last>Oco</last></editor>
       <editor><first>Surafel Melaku</first><last>Lakew</last></editor>
-      <editor><first>Tommi A</first><last>Pirinen</last></editor>
+      <editor id="tommi-a-pirinen"><first>Tommi A</first><last>Pirinen</last></editor>
       <editor><first>Valentin</first><last>Malykh</last></editor>
       <editor><first>Varvara</first><last>Logacheva</last></editor>
       <editor><first>Xiaobing</first><last>Zhao</last></editor>
@@ -84,7 +84,7 @@
       <author><first>Sahinur Rahman</first><last>Laskar</last></author>
       <author><first>Abdullah Faiz Ur Rahman</first><last>Khilji</last></author>
       <author><first>Partha</first><last>Pakray</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>38–42</pages>
       <abstract>Neural machine translation (NMT) is a widely accepted approach in the machine translation (MT) community, translating from one natural language to another natural language. Although, NMT shows remarkable performance in both high and low resource languages, it needs sufficient training corpus. The availability of a parallel corpus in low resource language pairs is one of the challenging tasks in MT. To mitigate this issue, NMT attempts to utilize a monolingual corpus to get better at translation for low resource language pairs. Workshop on Technologies for MT of Low Resource Languages (LoResMT 2020) organized shared tasks of low resource language pair translation using zero-shot NMT. Here, the parallel corpus is not used and only monolingual corpora is allowed. We have participated in the same shared task with our team name CNLP-NITS for the Russian-Hindi language pair. We have used masked sequence to sequence pre-training for language generation (MASS) with only monolingual corpus following the unsupervised NMT architecture. The evaluated results are declared at the LoResMT 2020 shared task, which reports that our system achieves the bilingual evaluation understudy (BLEU) score of 0.59, precision score of 3.43, recall score of 5.48, F-measure score of 4.22, and rank-based intuitive bilingual evaluation score (RIBES) of 0.180147 in Russian to Hindi translation. And for Hindi to Russian translation, we have achieved BLEU, precision, recall, F-measure, and RIBES score of 1.11, 4.72, 4.41, 4.56, and 0.026842 respectively.</abstract>
       <url hash="b77fd717">2020.loresmt-1.5</url>
@@ -95,7 +95,7 @@
       <title>Unsupervised Approach for Zero-Shot Experiments: <fixed-case>B</fixed-case>hojpuri–<fixed-case>H</fixed-case>indi and <fixed-case>M</fixed-case>agahi–<fixed-case>H</fixed-case>indi@<fixed-case>L</fixed-case>o<fixed-case>R</fixed-case>es<fixed-case>MT</fixed-case> 2020</title>
       <author><first>Amit</first><last>Kumar</last></author>
       <author><first>Rajesh Kumar</first><last>Mundotiya</last></author>
-      <author><first>Anil Kumar</first><last>Singh</last></author>
+      <author id="anil-kumar-singh"><first>Anil Kumar</first><last>Singh</last></author>
       <pages>43–46</pages>
       <abstract>This paper reports a Machine Translation (MT) system submitted by the NLPRL team for the Bhojpuri–Hindi and Magahi–Hindi language pairs at LoResMT 2020 shared task. We used an unsupervised domain adaptation approach that gives promising results for zero or extremely low resource languages. Task organizers provide the development and the test sets for evaluation and the monolingual data for training. Our approach is a hybrid approach of domain adaptation and back-translation. Metrics used to evaluate the trained model are BLEU, RIBES, Precision, Recall and F-measure. Our approach gives relatively promising results, with a wide range, of 19.5, 13.71, 2.54, and 3.16 BLEU points for Bhojpuri to Hindi, Magahi to Hindi, Hindi to Bhojpuri and Hindi to Magahi language pairs, respectively.</abstract>
       <url hash="63ed8bbc">2020.loresmt-1.6</url>
@@ -115,10 +115,10 @@
     <paper id="8">
       <title>Improving Multilingual Neural Machine Translation For Low-Resource Languages: <fixed-case>F</fixed-case>rench, <fixed-case>E</fixed-case>nglish - <fixed-case>V</fixed-case>ietnamese</title>
       <author><first>Thi-Vinh</first><last>Ngo</last></author>
-      <author><first>Phuong-Thai</first><last>Nguyen</last></author>
+      <author id="phuong-thai-nguyen"><first>Phuong-Thai</first><last>Nguyen</last></author>
       <author><first>Thanh-Le</first><last>Ha</last></author>
       <author><first>Khac-Quy</first><last>Dinh</last></author>
-      <author><first>Le-Minh</first><last>Nguyen</last></author>
+      <author id="minh-le-nguyen"><first>Le-Minh</first><last>Nguyen</last></author>
       <pages>55–61</pages>
       <abstract>Prior works have demonstrated that a low-resource language pair can benefit from multilingual machine translation (MT) systems, which rely on many language pairs’ joint training. This paper proposes two simple strategies to address the rare word issue in multilingual MT systems for two low-resource language pairs: French-Vietnamese and English-Vietnamese. The first strategy is about dynamical learning word similarity of tokens in the shared space among source languages while another one attempts to augment the translation ability of rare words through updating their embeddings during the training. Besides, we leverage monolingual data for multilingual MT systems to increase the amount of synthetic parallel corpora while dealing with the data sparsity problem. We have shown significant improvements of up to +1.62 and +2.54 BLEU points over the bilingual baseline systems for both language pairs and released our datasets for the research community.</abstract>
       <url hash="9743bbb3">2020.loresmt-1.8</url>
@@ -130,7 +130,7 @@
       <author><first>Sahinur Rahman</first><last>Laskar</last></author>
       <author><first>Abdullah Faiz Ur Rahman</first><last>Khilji</last></author>
       <author><first>Partha</first><last>Pakray</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>62–68</pages>
       <abstract>The corpus preparation is one of the important challenging task for the domain of machine translation especially in low resource language scenarios. Country like India where multiple languages exists, machine translation attempts to minimize the communication gap among people with different linguistic backgrounds. Although Google Translation covers automatic translation of various languages all over the world but it lags in some languages including Assamese. In this paper, we have developed EnAsCorp1.0, corpus of English-Assamese low resource pair where parallel and monolingual data are collected from various online sources. We have also implemented baseline systems with statistical machine translation and neural machine translation approaches for the same corpus.</abstract>
       <url hash="cfd86f51">2020.loresmt-1.9</url>
@@ -140,7 +140,7 @@
     <paper id="10">
       <title>Unsupervised Neural Machine Translation for <fixed-case>E</fixed-case>nglish and <fixed-case>M</fixed-case>anipuri</title>
       <author><first>Salam Michael</first><last>Singh</last></author>
-      <author><first>Thoudam Doren</first><last>Singh</last></author>
+      <author id="thoudam-doren-singh"><first>Thoudam Doren</first><last>Singh</last></author>
       <pages>69–78</pages>
       <abstract>Availability of bitext dataset has been a key challenge in the conventional machine translation system which requires surplus amount of parallel data. In this work, we devise an unsupervised neural machine translation (UNMT) system consisting of a transformer based shared encoder and language specific decoders using denoising autoencoder and backtranslation with an additional Manipuri side multiple test reference. We report our work on low resource setting for English (en) - Manipuri (mni) language pair and attain a BLEU score of 3.1 for en-mni and 2.7 for mni-en respectively. Subjective evaluation on translated output gives encouraging findings.</abstract>
       <url hash="83c21583">2020.loresmt-1.10</url>
diff --git a/data/xml/2020.louhi.xml b/data/xml/2020.louhi.xml
index bdca033a9e..cec22fa463 100644
--- a/data/xml/2020.louhi.xml
+++ b/data/xml/2020.louhi.xml
@@ -4,10 +4,10 @@
     <meta>
       <booktitle>Proceedings of the 11th International Workshop on Health Text Mining and Information Analysis</booktitle>
       <editor><first>Eben</first><last>Holderness</last></editor>
-      <editor><first>Antonio</first><last>Jimeno Yepes</last></editor>
-      <editor><first>Alberto</first><last>Lavelli</last></editor>
+      <editor id="antonio-jimeno-yepes"><first>Antonio</first><last>Jimeno Yepes</last></editor>
+      <editor id="alberto-lavelli"><first>Alberto</first><last>Lavelli</last></editor>
       <editor><first>Anne-Lyse</first><last>Minard</last></editor>
-      <editor><first>James</first><last>Pustejovsky</last></editor>
+      <editor id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></editor>
       <editor><first>Fabio</first><last>Rinaldi</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Online</address>
@@ -57,7 +57,7 @@
     <paper id="4">
       <title>Not a cute stroke: Analysis of Rule- and Neural Network-based Information Extraction Systems for Brain Radiology Reports</title>
       <author><first>Andreas</first><last>Grivas</last></author>
-      <author><first>Beatrice</first><last>Alex</last></author>
+      <author id="beatrice-alex"><first>Beatrice</first><last>Alex</last></author>
       <author><first>Claire</first><last>Grover</last></author>
       <author><first>Richard</first><last>Tobin</last></author>
       <author><first>William</first><last>Whiteley</last></author>
@@ -153,7 +153,7 @@
       <author><first>Nemanja</first><last>Vaci</last></author>
       <author><first>Qiang</first><last>Liu</last></author>
       <author><first>Hao</first><last>Ni</last></author>
-      <author><first>Goran</first><last>Nenadic</last></author>
+      <author id="goran-nenadic"><first>Goran</first><last>Nenadic</last></author>
       <author><first>Alejo</first><last>Nevado-Holgado</last></author>
       <pages>97–103</pages>
       <abstract>In this work we addressed the problem of capturing sequential information contained in longitudinal electronic health records (EHRs). Clinical notes, which is a particular type of EHR data, are a rich source of information and practitioners often develop clever solutions how to maximise the sequential information contained in free-texts. We proposed a systematic methodology for learning from chronological events available in clinical notes. The proposed methodological path signature framework creates a non-parametric hierarchical representation of sequential events of any type and can be used as features for downstream statistical learning tasks. The methodology was developed and externally validated using the largest in the UK secondary care mental health EHR data on a specific task of predicting survival risk of patients diagnosed with Alzheimer’s disease. The signature-based model was compared to a common survival random forest model. Our results showed a 15.4% increase of risk prediction AUC at the time point of 20 months after the first admission to a specialist memory clinic and the signature method outperformed the baseline mixed-effects model by 13.2 %.</abstract>
@@ -166,11 +166,11 @@
       <title>Defining and Learning Refined Temporal Relations in the Clinical Narrative</title>
       <author><first>Kristin</first><last>Wright-Bettner</last></author>
       <author><first>Chen</first><last>Lin</last></author>
-      <author><first>Timothy</first><last>Miller</last></author>
+      <author id="timothy-miller"><first>Timothy</first><last>Miller</last></author>
       <author><first>Steven</first><last>Bethard</last></author>
       <author><first>Dmitriy</first><last>Dligach</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
-      <author><first>James H.</first><last>Martin</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
+      <author id="james-h-martin"><first>James H.</first><last>Martin</last></author>
       <author><first>Guergana</first><last>Savova</last></author>
       <pages>104–114</pages>
       <abstract>We present refinements over existing temporal relation annotations in the Electronic Medical Record clinical narrative. We refined the THYME corpus annotations to more faithfully represent nuanced temporality and nuanced temporal-coreferential relations. The main contributions are in re-defining CONTAINS and OVERLAP relations into CONTAINS, CONTAINS-SUBEVENT, OVERLAP and NOTED-ON. We demonstrate that these refinements lead to substantial gains in learnability for state-of-the-art transformer models as compared to previously reported results on the original THYME corpus. We thus establish a baseline for the automatic extraction of these refined temporal relations. Although our study is done on clinical narrative, we believe it addresses far-reaching challenges that are corpus- and domain- agnostic.</abstract>
@@ -230,10 +230,10 @@
     </paper>
     <paper id="16">
       <title>Detection of Mental Health from <fixed-case>R</fixed-case>eddit via Deep Contextualized Representations</title>
-      <author><first>Zhengping</first><last>Jiang</last></author>
+      <author id="zheng-ping-jiang"><first>Zhengping</first><last>Jiang</last></author>
       <author><first>Sarah Ita</first><last>Levitan</last></author>
       <author><first>Jonathan</first><last>Zomick</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
       <pages>147–156</pages>
       <abstract>We address the problem of automatic detection of psychiatric disorders from the linguistic content of social media posts. We build a large scale dataset of Reddit posts from users with eight disorders and a control user group. We extract and analyze linguistic characteristics of posts and identify differences between diagnostic groups. We build strong classification models based on deep contextualized word representations and show that they outperform previously applied statistical models with simple linguistic features by large margins. We compare user-level and post-level classification performance, as well as an ensembled multiclass model.</abstract>
       <url hash="a341236f">2020.louhi-1.16</url>
diff --git a/data/xml/2020.lr4sshoc.xml b/data/xml/2020.lr4sshoc.xml
index 46548b1c90..a0e3a3822c 100644
--- a/data/xml/2020.lr4sshoc.xml
+++ b/data/xml/2020.lr4sshoc.xml
@@ -3,7 +3,7 @@
   <volume id="1" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Workshop about Language Resources for the SSH Cloud</booktitle>
-      <editor><first>Daan</first><last>Broeder</last></editor>
+      <editor id="daan-broeder"><first>Daan</first><last>Broeder</last></editor>
       <editor><first>Maria</first><last>Eskevich</last></editor>
       <editor><first>Monica</first><last>Monachini</last></editor>
       <publisher>European Language Resources Association</publisher>
@@ -56,7 +56,7 @@
       <author><first>Maria</first><last>Pontiki</last></author>
       <author><first>Maria</first><last>Gavriilidou</last></author>
       <author><first>Dimitris</first><last>Gkoumas</last></author>
-      <author><first>Stelios</first><last>Piperidis</last></author>
+      <author id="stelios-piperidis"><first>Stelios</first><last>Piperidis</last></author>
       <pages>19–26</pages>
       <abstract>We present a replication of a data-driven and linguistically inspired Verbal Aggression analysis framework that was designed to examine Twitter verbal attacks against predefined target groups of interest as an indicator of xenophobic attitudes during the financial crisis in Greece, in particular during the period 2013-2016. The research goal in this paper is to re-examine Verbal Aggression as an indicator of xenophobic attitudes in Greek Twitter three years later, in order to trace possible changes regarding the main targets, the types and the content of the verbal attacks against the same targets in the post crisis era, given also the ongoing refugee crisis and the political landscape in Greece as it was shaped after the elections in 2019. The results indicate an interesting rearrangement of the main targets of the verbal attacks, while the content and the types of the attacks provide valuable insights about the way these targets are being framed as compared to the respective dominant perceptions and stereotypes about them during the period 2013-2016.</abstract>
       <url hash="6f392cf7">2020.lr4sshoc-1.4</url>
@@ -97,7 +97,7 @@
     </paper>
     <paper id="8">
       <title>Stretching Disciplinary Boundaries in Language Resource Development and Use: a <fixed-case>L</fixed-case>inguistic <fixed-case>D</fixed-case>ata <fixed-case>C</fixed-case>onsortium Position Paper</title>
-      <author><first>Christopher</first><last>Cieri</last></author>
+      <author id="christopher-cieri"><first>Christopher</first><last>Cieri</last></author>
       <pages>39–41</pages>
       <abstract>Given the persistent gap between demand and supply, the impetus to reuse language resources is great. Researchers benefit from building upon the work of others including reusing data, tools and methodology. Such reuse should always consider the original intent of the language resource and how that impacts potential reanalysis. When the reuse crosses disciplinary boundaries, the re-user also needs to consider how research standards that differ between social science and humanities on the one hand and human language technologies on the other might lead to differences in unspoken assumptions. Data centers that aim to support multiple research communities have a responsibility to build bridges across disciplinary divides by sharing data in all directions, encouraging re-use and re-sharing and engaging directly in research that improves methodologies.</abstract>
       <url hash="37c1dc08">2020.lr4sshoc-1.8</url>
@@ -106,7 +106,7 @@
     </paper>
     <paper id="9">
       <title>Crossing the <fixed-case>SSH</fixed-case> Bridge with Interview Data</title>
-      <author><first>Henk</first><last>van den Heuvel</last></author>
+      <author id="henk-van-den-heuvel"><first>Henk</first><last>van den Heuvel</last></author>
       <pages>42–44</pages>
       <abstract>Spoken audio data, such as interview data, is a scientific instrument used by researchers in various disciplines crossing the boundaries of social sciences and humanities. In this paper, we will have a closer look at a portal designed to perform speech-to-text conversion on audio recordings through Automatic Speech Recognition (ASR) in the CLARIN infrastructure. Within the cluster cross-domain EU project SSHOC the potential value of such a linguistic tool kit for processing spoken language recording has found uptake in a webinar about the topic, and in a task addressing audio analysis of panel survey data. The objective of this contribution is to show that the processing of interviews as a research instrument has opened up a fascinating and fruitful area of collaboration between Social Sciences and Humanities (SSH).</abstract>
       <url hash="ccb18aa6">2020.lr4sshoc-1.9</url>
diff --git a/data/xml/2020.lrec.xml b/data/xml/2020.lrec.xml
index 8a7fc4d713..02afc075dc 100644
--- a/data/xml/2020.lrec.xml
+++ b/data/xml/2020.lrec.xml
@@ -3,20 +3,20 @@
   <volume id="1" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Twelfth Language Resources and Evaluation Conference</booktitle>
-      <editor><first>Nicoletta</first><last>Calzolari</last></editor>
-      <editor><first>Frédéric</first><last>Béchet</last></editor>
+      <editor id="nicoletta-calzolari"><first>Nicoletta</first><last>Calzolari</last></editor>
+      <editor id="frederic-bechet"><first>Frédéric</first><last>Béchet</last></editor>
       <editor><first>Philippe</first><last>Blache</last></editor>
-      <editor><first>Khalid</first><last>Choukri</last></editor>
-      <editor><first>Christopher</first><last>Cieri</last></editor>
+      <editor id="khalid-choukri"><first>Khalid</first><last>Choukri</last></editor>
+      <editor id="christopher-cieri"><first>Christopher</first><last>Cieri</last></editor>
       <editor><first>Thierry</first><last>Declerck</last></editor>
       <editor><first>Sara</first><last>Goggi</last></editor>
       <editor><first>Hitoshi</first><last>Isahara</last></editor>
-      <editor><first>Bente</first><last>Maegaard</last></editor>
-      <editor><first>Joseph</first><last>Mariani</last></editor>
+      <editor id="bente-maegaard"><first>Bente</first><last>Maegaard</last></editor>
+      <editor id="joseph-mariani"><first>Joseph</first><last>Mariani</last></editor>
       <editor><first>Hélène</first><last>Mazo</last></editor>
-      <editor><first>Asuncion</first><last>Moreno</last></editor>
-      <editor><first>Jan</first><last>Odijk</last></editor>
-      <editor><first>Stelios</first><last>Piperidis</last></editor>
+      <editor id="asuncion-moreno"><first>Asuncion</first><last>Moreno</last></editor>
+      <editor id="jan-odijk"><first>Jan</first><last>Odijk</last></editor>
+      <editor id="stelios-piperidis"><first>Stelios</first><last>Piperidis</last></editor>
       <publisher>European Language Resources Association</publisher>
       <address>Marseille, France</address>
       <month>May</month>
@@ -32,7 +32,7 @@
       <title>Neural Mention Detection</title>
       <author><first>Juntao</first><last>Yu</last></author>
       <author><first>Bernd</first><last>Bohnet</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>1–10</pages>
       <abstract>Mention detection is an important preprocessing step for annotation and interpretation in applications such as NER and coreference resolution, but few stand-alone neural models have been proposed able to handle the full range of mentions. In this work, we propose and compare three neural network-based approaches to mention detection. The first approach is based on the mention detection part of a state of the art coreference resolution system; the second uses ELMO embeddings together with a bidirectional LSTM and a biaffine classifier; the third approach uses the recently introduced BERT model. Our best model (using a biaffine classifier) achieves gains of up to 1.8 percentage points on mention recall when compared with a strong baseline in a HIGH RECALL coreference annotation setting. The same model achieves improvements of up to 5.3 and 6.2 p.p. when compared with the best-reported mention detection F1 on the CONLL and CRAC coreference data sets respectively in a HIGH F1 annotation setting. We then evaluate our models for coreference resolution by using mentions predicted by our best model in start-of-the-art coreference systems. The enhanced model achieved absolute improvements of up to 1.7 and 0.7 p.p. when compared with our strong baseline systems (pipeline system and end-to-end system) respectively. For nested NER, the evaluation of our model on the GENIA corpora shows that our model matches or outperforms state-of-the-art models despite not being specifically designed for this task.</abstract>
       <url hash="a1c9873e">2020.lrec-1.1</url>
@@ -43,7 +43,7 @@
       <title>A Cluster Ranking Model for Full Anaphora Resolution</title>
       <author><first>Juntao</first><last>Yu</last></author>
       <author><first>Alexandra</first><last>Uma</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>11–20</pages>
       <abstract>Anaphora resolution (coreference) systems designed for the CONLL 2012 dataset typically cannot handle key aspects of the full anaphora resolution task such as the identification of singletons and of certain types of non-referring expressions (e.g., expletives), as these aspects are not annotated in that corpus. However, the recently released dataset for the CRAC 2018 Shared Task can now be used for that purpose. In this paper, we introduce an architecture to simultaneously identify non-referring expressions (including expletives, predicative s, and other types) and build coreference chains, including singletons. Our cluster-ranking system uses an attention mechanism to determine the relative importance of the mentions in the same cluster. Additional classifiers are used to identify singletons and non-referring markables. Our contributions are as follows. First all, we report the first result on the CRAC data using system mentions; our result is 5.8% better than the shared task baseline system, which used gold mentions. Second, we demonstrate that the availability of singleton clusters and non-referring expressions can lead to substantially improved performance on non-singleton clusters as well. Third, we show that despite our model not being designed specifically for the CONLL data, it achieves a score equivalent to that of the state-of-the-art system by Kantor and Globerson (2019) on that dataset.</abstract>
       <url hash="6812d27f">2020.lrec-1.2</url>
@@ -74,7 +74,7 @@
       <title><fixed-case>N</fixed-case>o<fixed-case>E</fixed-case>l: An Annotated Corpus for Noun Ellipsis in <fixed-case>E</fixed-case>nglish</title>
       <author><first>Payal</first><last>Khullar</last></author>
       <author><first>Kushal</first><last>Majmundar</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>34–43</pages>
       <abstract>Ellipsis resolution has been identified as an important step to improve the accuracy of mainstream Natural Language Processing (NLP) tasks such as information retrieval, event extraction, dialog systems, etc. Previous computational work on ellipsis resolution has focused on one type of ellipsis, namely Verb Phrase Ellipsis (VPE) and a few other related phenomenon. We extend the study of ellipsis by presenting the No(oun)El(lipsis) corpus - an annotated corpus for noun ellipsis and closely related phenomenon using the first hundred movies of Cornell Movie Dialogs Dataset. The annotations are carried out in a standoff annotation scheme that encodes the position of the licensor, the antecedent boundary, and Part-of-Speech (POS) tags of the licensor and antecedent modifier. Our corpus has 946 instances of exophoric and endophoric noun ellipsis, making it the biggest resource of noun ellipsis in English, to the best of our knowledge. We present a statistical study of our corpus with novel insights on the distribution of noun ellipsis, its licensors and antecedents. Finally, we perform the tasks of detection and resolution of noun ellipsis with different classifiers trained on our corpus and report baseline results.</abstract>
       <url hash="e56af292">2020.lrec-1.5</url>
@@ -106,7 +106,7 @@
       <title>A Study on Entity Resolution for Email Conversations</title>
       <author><first>Parag Pravin</first><last>Dakle</last></author>
       <author><first>Takshak</first><last>Desai</last></author>
-      <author><first>Dan</first><last>Moldovan</last></author>
+      <author id="dan-moldovan"><first>Dan</first><last>Moldovan</last></author>
       <pages>65–73</pages>
       <abstract>This paper investigates the problem of entity resolution for email conversations and presents a seed annotated corpus of email threads labeled with entity coreference chains. Characteristics of email threads concerning reference resolution are first discussed, and then the creation of the corpus and annotation steps are explained. Finally, performance of the current state-of-the-art deep learning models on the seed corpus is evaluated and qualitative error analysis on the predictions obtained is presented.</abstract>
       <url hash="db21d3b8">2020.lrec-1.8</url>
@@ -116,7 +116,7 @@
     <paper id="9">
       <title>Model-based Annotation of Coreference</title>
       <author><first>Rahul</first><last>Aralikatte</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>74–79</pages>
       <abstract>Humans do not make inferences over texts, but over models of what texts are about. When annotators are asked to annotate coreferent spans of text, it is therefore a somewhat unnatural task. This paper presents an alternative in which we preprocess documents, linking entities to a knowledge base, and turn the coreference annotation task – in our case limited to pronouns – into an annotation task where annotators are asked to assign pronouns to entities. Model-based annotation is shown to lead to faster annotation and higher inter-annotator agreement, and we argue that it also opens up an alternative approach to coreference resolution. We present two new coreference benchmark datasets, for English Wikipedia and English teacher-student dialogues, and evaluate state-of-the-art coreference resolvers on them.</abstract>
       <url hash="5326dd5f">2020.lrec-1.9</url>
@@ -128,7 +128,7 @@
       <author><first>Rodrigo</first><last>Wilkens</last></author>
       <author><first>Bruno</first><last>Oberle</last></author>
       <author><first>Frédéric</first><last>Landragin</last></author>
-      <author><first>Amalia</first><last>Todirascu</last></author>
+      <author id="amalia-todirascu"><first>Amalia</first><last>Todirascu</last></author>
       <pages>80–89</pages>
       <abstract>Coreference resolution aims at identifying and grouping all mentions referring to the same entity. In French, most systems run different setups, making their comparison difficult. In this paper, we present an extensive comparison of several coreference resolution systems for French. The systems have been trained on two corpora (ANCOR for spoken language and Democrat for written language) annotated with coreference chains, and augmented with syntactic and semantic information. The models are compared with different configurations (e.g. with and without singletons). In addition, we evaluate mention detection and coreference resolution apart. We present a full-stack model that outperforms other approaches. This model allows us to study the impact of mention detection errors on coreference resolution. Our analysis shows that mention detection can be improved by focusing on boundary identification while advances in the pronoun-noun relation detection can help the coreference task. Another contribution of this work is the first end-to-end neural French coreference resolution model trained on Democrat (written texts), which compares to the state-of-the-art systems for oral French.</abstract>
       <url hash="e4d8945c">2020.lrec-1.10</url>
@@ -138,7 +138,7 @@
     <paper id="11">
       <title>Cross-lingual Zero Pronoun Resolution</title>
       <author><first>Abdulrahman</first><last>Aloraini</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>90–98</pages>
       <abstract>In languages like Arabic, Chinese, Italian, Japanese, Korean, Portuguese, Spanish, and many others, predicate arguments in certain syntactic positions are not realized instead of being realized as overt pronouns, and are thus called zero- or null-pronouns. Identifying and resolving such omitted arguments is crucial to machine translation, information extraction and other NLP tasks, but depends heavily on semantic coherence and lexical relationships. We propose a BERT-based cross-lingual model for zero pronoun resolution, and evaluate it on the Arabic and Chinese portions of OntoNotes 5.0. As far as we know, ours is the first neural model of zero-pronoun resolution for Arabic; and our model also outperforms the state-of-the-art for Chinese. In the paper we also evaluate BERT feature extraction and fine-tune models on the task, and compare them with our model. We also report on an investigation of BERT layers indicating which layer encodes the most suitable representation for the task.</abstract>
       <url hash="ee7035c8">2020.lrec-1.11</url>
@@ -149,7 +149,7 @@
       <title>Exploiting Cross-Lingual Hints to Discover Event Pronouns</title>
       <author><first>Sharid</first><last>Loáiciga</last></author>
       <author><first>Christian</first><last>Hardmeier</last></author>
-      <author><first>Asad</first><last>Sayeed</last></author>
+      <author id="asad-sayeed"><first>Asad</first><last>Sayeed</last></author>
       <pages>99–103</pages>
       <abstract>Non-nominal co-reference is much less studied than nominal coreference, partly because of the lack of annotated corpora. We explore the possibility to exploit parallel multilingual corpora as a means of cheap supervision for the classification of three different readings of the English pronoun ‘it’: entity, event or pleonastic, from their translation in several languages. We found that the ‘event’ reading is not very frequent, but can be easily predicted provided that the construction used to translate the ‘it’ example is a pronoun as well. These cases, nevertheless, are not enough to generalize to other types of non-nominal reference.</abstract>
       <url hash="225c81f3">2020.lrec-1.12</url>
@@ -173,7 +173,7 @@
       <author><first>Yunfei</first><last>Long</last></author>
       <author><first>Mingyu</first><last>Wan</last></author>
       <author><first>Jinghang</first><last>Gu</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <pages>112–119</pages>
       <abstract>Deep neural network models have played a critical role in sentiment analysis with promising results in the recent decade. One of the essential challenges, however, is how external sentiment knowledge can be effectively utilized. In this work, we propose a novel affection-driven approach to incorporating affective knowledge into neural network models. The affective knowledge is obtained in the form of a lexicon under the Affect Control Theory (ACT), which is represented by vectors of three-dimensional attributes in Evaluation, Potency, and Activity (EPA). The EPA vectors are mapped to an affective influence value and then integrated into Long Short-term Memory (LSTM) models to highlight affective terms. Experimental results show a consistent improvement of our approach over conventional LSTM models by 1.0% to 1.5% in accuracy on three large benchmark datasets. Evaluations across a variety of algorithms have also proven the effectiveness of leveraging affective terms for deep model enhancement.</abstract>
@@ -187,7 +187,7 @@
       <author><first>Jonathan</first><last>Brennan</last></author>
       <author><first>Wen-Ming</first><last>Luh</last></author>
       <author><first>Berta</first><last>Franzluebbers</last></author>
-      <author><first>John</first><last>Hale</last></author>
+      <author id="john-hale"><first>John</first><last>Hale</last></author>
       <pages>120–125</pages>
       <abstract>The Alice Datasets are a set of datasets based on magnetic resonance data and electrophysiological data, collected while participants heard a story in English. Along with the datasets and the text of the story, we provide a variety of different linguistic and computational measures ranging from prosodic predictors to predictors capturing hierarchical syntactic information. These ecologically valid datasets can be easily reused to replicate prior work and to test new hypotheses about natural language comprehension in the brain.</abstract>
       <url hash="470ecf5a">2020.lrec-1.15</url>
@@ -209,7 +209,7 @@
     </paper>
     <paper id="17">
       <title>Cortical Speech Databases For Deciphering the Articulatory Code</title>
-      <author><first>Harald</first><last>Höge</last></author>
+      <author id="harald-hoge"><first>Harald</first><last>Höge</last></author>
       <pages>133–137</pages>
       <abstract>The paper relates to following ‘AC-hypotheses’: The articulatory code (AC) is a neural code exchanging multi-item messages between the short-term memory and cortical areas as the vSMC and STG. In these areas already neurons active in the presence of articulatory features have been measured. The AC codes the content of speech segmented in chunks and is the same for both modalities - speech perception and speech production. Each AC-message is related to a syllable. The items of each message relate to coordinated articulatory gestures composing the syllable. The mechanism to transport the AC and to segment the auditory signal is based on Ɵ/γ-oscillations, where a Ɵ-cycle has the duration of a Ɵ-syllable. The paper describes the findings from neuroscience, phonetics and the science of evolution leading to the AC-hypotheses. The paper proposes to verify the AC-hypotheses by measuring the activity of all ensembles of neurons coding and decoding the AC. Due to state of the art, the cortical measurements to be prepared, done and further processed need a high effort from scientists active in different areas. We propose to launch a project to produce cortical speech databases with cortical recordings synchronized with the speech signal allowing to decipher the articulatory code.</abstract>
       <url hash="74163bc0">2020.lrec-1.17</url>
@@ -243,7 +243,7 @@
     <paper id="20">
       <title>The <fixed-case>ACQDIV</fixed-case> Corpus Database and Aggregation Pipeline</title>
       <author><first>Anna</first><last>Jancso</last></author>
-      <author><first>Steven</first><last>Moran</last></author>
+      <author id="steven-moran"><first>Steven</first><last>Moran</last></author>
       <author><first>Sabine</first><last>Stoll</last></author>
       <pages>156–165</pages>
       <abstract>We present the ACQDIV corpus database and aggregation pipeline, a tool developed as part of the European Research Council (ERC) funded project ACQDIV, which aims to identify the universal cognitive processes that allow children to acquire any language. The corpus database represents 15 corpora from 14 typologically maximally diverse languages. Here we give an overview of the project, database, and our extensible software package for adding more corpora to the current language sample. Lastly, we discuss how we use the corpus database to mine for universal patterns in child language acquisition corpora and we describe avenues for future research.</abstract>
@@ -269,7 +269,7 @@
       <title>Orthographic Codes and the Neighborhood Effect: Lessons from Information Theory</title>
       <author><first>Stéphan</first><last>Tulkens</last></author>
       <author><first>Dominiek</first><last>Sandra</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>172–181</pages>
       <abstract>We consider the orthographic neighborhood effect: the effect that words with more orthographic similarity to other words are read faster. The neighborhood effect serves as an important control variable in psycholinguistic studies of word reading, and explains variance in addition to word length and word frequency. Following previous work, we model the neighborhood effect as the average distance to neighbors in feature space for three feature sets: slots, character ngrams and skipgrams. We optimize each of these feature sets and find evidence for language-independent optima, across five megastudy corpora from five alphabetic languages. Additionally, we show that weighting features using the inverse of mutual information (MI) improves the neighborhood effect significantly for all languages. We analyze the inverse feature weighting, and show that, across languages, grammatical morphemes get the lowest weights. Finally, we perform the same experiments on Korean Hangul, a non-alphabetic writing system, where we find the opposite results: slower responses as a function of denser neighborhoods, and a negative effect of inverse feature weighting. This raises the question of whether this is a cognitive effect, or an effect of the way we represent Hangul orthography, and indicates more research is needed.</abstract>
       <url hash="a01cc866">2020.lrec-1.22</url>
@@ -334,8 +334,8 @@
       <author><first>Kijong</first><last>Han</last></author>
       <author><first>Kuntae</first><last>Kim</last></author>
       <author><first>Sooji</first><last>Yoon</last></author>
-      <author><first>Eun-kyung</first><last>Kim</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="eun-kyung-kim"><first>Eun-kyung</first><last>Kim</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <pages>212–219</pages>
       <abstract>Information extraction from unstructured texts plays a vital role in the field of natural language processing. Although there has been extensive research into each information extraction task (i.e., entity linking, coreference resolution, and relation extraction), data are not available for a continuous and coherent evaluation of all information extraction tasks in a comprehensive framework. Given that each task is performed and evaluated with a different dataset, analyzing the effect of the previous task on the next task with a single dataset throughout the information extraction process is impossible. This paper aims to propose a Korean information extraction initiative point and promote research in this field by presenting crowdsourcing data collected for four information extraction tasks from the same corpus and the training and evaluation results for each task of a state-of-the-art model. These machine learning data for Korean information extraction are the first of their kind, and there are plans to continuously increase the data volume. The test results will serve as an initiative result for each Korean information extraction task and are expected to serve as a comparison target for various studies on Korean information extraction using the data collected in this study.</abstract>
       <url hash="e7008284">2020.lrec-1.27</url>
@@ -366,13 +366,13 @@
     </paper>
     <paper id="30">
       <title>Crowdsourcing in the Development of a Multilingual <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et: A Case Study of <fixed-case>K</fixed-case>orean <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et</title>
-      <author><first>Younggyun</first><last>Hahm</last></author>
+      <author id="younggyun-hahm"><first>Younggyun</first><last>Hahm</last></author>
       <author><first>Youngbin</first><last>Noh</last></author>
       <author><first>Ji Yoon</first><last>Han</last></author>
       <author><first>Tae Hwan</first><last>Oh</last></author>
       <author><first>Hyonsu</first><last>Choe</last></author>
       <author><first>Hansaem</first><last>Kim</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <pages>236–244</pages>
       <abstract>Using current methods, the construction of multilingual resources in FrameNet is an expensive and complex task. While crowdsourcing is a viable alternative, it is difficult to include non-native English speakers in such efforts as they often have difficulty with English-based FrameNet tools. In this work, we investigated cross-lingual issues in crowdsourcing approaches for multilingual FrameNets, specifically in the context of the newly constructed Korean FrameNet. To accomplish this, we evaluated the effectiveness of various crowdsourcing settings whereby certain types of information are provided to workers, such as English definitions in FrameNet or translated definitions. We then evaluated whether the crowdsourced results accurately captured the meaning of frames both cross-culturally and cross-linguistically, and found that by allowing the crowd workers to make intuitive choices, they achieved a quality comparable to that of trained FrameNet experts (F1 &gt; 0.75). The outcomes of this work are now publicly available as a new release of Korean FrameNet 1.1.</abstract>
       <url hash="118e54bc">2020.lrec-1.30</url>
@@ -402,7 +402,7 @@
     <paper id="33">
       <title>Do You Believe It Happened? Assessing <fixed-case>C</fixed-case>hinese Readers’ Veridicality Judgments</title>
       <author><first>Yu-Yun</first><last>Chang</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <pages>259–267</pages>
       <abstract>This work collects and studies Chinese readers’ veridicality judgments to news events (whether an event is viewed as happening or not). For instance, in “The FBI alleged in court documents that Zazi had admitted having a handwritten recipe for explosives on his computer”, do people believe that Zazi had a handwritten recipe for explosives? The goal is to observe the pragmatic behaviors of linguistic features under context which affects readers in making veridicality judgments. Exploring from the datasets, it is found that features such as event-selecting predicates (ESP), modality markers, adverbs, temporal information, and statistics have an impact on readers’ veridicality judgments. We further investigated that modality markers with high certainty do not necessarily trigger readers to have high confidence in believing an event happened. Additionally, the source of information introduced by an ESP presents low effects to veridicality judgments, even when an event is attributed to an authority (e.g. “The FBI”). A corpus annotated with Chinese readers’ veridicality judgments is released as the Chinese PragBank for further analysis.</abstract>
       <url hash="26c66370">2020.lrec-1.33</url>
@@ -414,7 +414,7 @@
       <author><first>Lionel</first><last>Nicolas</last></author>
       <author><first>Verena</first><last>Lyding</last></author>
       <author><first>Claudia</first><last>Borg</last></author>
-      <author><first>Corina</first><last>Forascu</last></author>
+      <author id="corina-forascu"><first>Corina</first><last>Forascu</last></author>
       <author><first>Karën</first><last>Fort</last></author>
       <author><first>Katerina</first><last>Zdravkova</last></author>
       <author><first>Iztok</first><last>Kosem</last></author>
@@ -428,7 +428,7 @@
       <author><first>Anisia</first><last>Katinskaia</last></author>
       <author><first>Anabela</first><last>Barreiro</last></author>
       <author><first>Lavinia</first><last>Aparaschivei</last></author>
-      <author><first>Yaakov</first><last>HaCohen-Kerner</last></author>
+      <author id="yaakov-hacohen-kerner"><first>Yaakov</first><last>HaCohen-Kerner</last></author>
       <pages>268–278</pages>
       <abstract>We introduce in this paper a generic approach to combine implicit crowdsourcing and language learning in order to mass-produce language resources (LRs) for any language for which a crowd of language learners can be involved. We present the approach by explaining its core paradigm that consists in pairing specific types of LRs with specific exercises, by detailing both its strengths and challenges, and by discussing how much these challenges have been addressed at present. Accordingly, we also report on on-going proof-of-concept efforts aiming at developing the first prototypical implementation of the approach in order to correct and extend an LR called ConceptNet based on the input crowdsourced from language learners. We then present an international network called the European Network for Combining Language Learning with Crowdsourcing Techniques (enetCollect) that provides the context to accelerate the implementation of this generic approach. Finally, we exemplify how it can be used in several language learning scenarios to produce a multitude of NLP resources and how it can therefore alleviate the long-standing NLP issue of the lack of LRs.</abstract>
       <url hash="40bbaa93">2020.lrec-1.34</url>
@@ -451,7 +451,7 @@
       <author><first>Francisco Javier</first><last>Chiyah Garcia</last></author>
       <author><first>José</first><last>Lopes</last></author>
       <author><first>Xingkun</first><last>Liu</last></author>
-      <author><first>Helen</first><last>Hastie</last></author>
+      <author id="helen-hastie"><first>Helen</first><last>Hastie</last></author>
       <pages>288–297</pages>
       <abstract>Large corpora of task-based and open-domain conversational dialogues are hugely valuable in the field of data-driven dialogue systems. Crowdsourcing platforms, such as Amazon Mechanical Turk, have been an effective method for collecting such large amounts of data. However, difficulties arise when task-based dialogues require expert domain knowledge or rapid access to domain-relevant information, such as databases for tourism. This will become even more prevalent as dialogue systems become increasingly ambitious, expanding into tasks with high levels of complexity that require collaboration and forward planning, such as in our domain of emergency response. In this paper, we propose CRWIZ: a framework for collecting real-time Wizard of Oz dialogues through crowdsourcing for collaborative, complex tasks. This framework uses semi-guided dialogue to avoid interactions that breach procedures and processes only known to experts, while enabling the capture of a wide variety of interactions.</abstract>
       <url hash="fe9447e4">2020.lrec-1.36</url>
@@ -508,13 +508,13 @@
       <author><first>Matthias</first><last>Butterweck</last></author>
       <author><first>Cathy</first><last>Chua</last></author>
       <author><first>Catia</first><last>Cucchiarin</last></author>
-      <author><first>Gülşen</first><last>Eryiğit</last></author>
+      <author id="gulsen-eryigit"><first>Gülşen</first><last>Eryiğit</last></author>
       <author><first>Johanna</first><last>Gerlach</last></author>
       <author><first>Hanieh</first><last>Habibi</last></author>
       <author><first>Neasa</first><last>Ní Chiaráin</last></author>
-      <author><first>Manny</first><last>Rayner</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
       <author><first>Steinþór</first><last>Steingrímsson</last></author>
-      <author><first>Helmer</first><last>Strik</last></author>
+      <author id="helmer-strik"><first>Helmer</first><last>Strik</last></author>
       <pages>323–331</pages>
       <abstract>LARA (Learning and Reading Assistant) is an open source platform whose purpose is to support easy conversion of plain texts into multimodal online versions suitable for use by language learners. This involves semi-automatically tagging the text, adding other annotations and recording audio. The platform is suitable for creating texts in multiple languages via crowdsourcing techniques that can be used for teaching a language via reading and listening. We present results of initial experiments by various collaborators where we measure the time required to produce substantial LARA resources, up to the length of short novels, in Dutch, English, Farsi, French, German, Icelandic, Irish, Swedish and Turkish. The first results are encouraging. Although there are some startup problems, the conversion task seems manageable for the languages tested so far. The resulting enriched texts are posted online and are freely available in both source and compiled form.</abstract>
       <url hash="1aea5156">2020.lrec-1.40</url>
@@ -524,9 +524,9 @@
     <paper id="41">
       <title>A Dataset for Investigating the Impact of Feedback on Student Revision Outcome</title>
       <author><first>Ildiko</first><last>Pilan</last></author>
-      <author><first>John</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
       <author><first>Chak Yan</first><last>Yeung</last></author>
-      <author><first>Jonathan</first><last>Webster</last></author>
+      <author id="jonathan-j-webster"><first>Jonathan</first><last>Webster</last></author>
       <pages>332–339</pages>
       <abstract>We present an annotation scheme and a dataset of teacher feedback provided for texts written by non-native speakers of English. The dataset consists of student-written sentences in their original and revised versions with teacher feedback provided for the errors. Feedback appears both in the form of open-ended comments and error category tags. We focus on a specific error type, namely linking adverbial (e.g. however, moreover) errors. The dataset has been annotated for two aspects: (i) revision outcome establishing whether the re-written student sentence was correct and (ii) directness, indicating whether teachers provided explicitly the correction in their feedback. This dataset allows for studies around the characteristics of teacher feedback and how these influence students’ revision outcome. We describe the data preparation process and we present initial statistical investigations regarding the effect of different feedback characteristics on revision outcome. These show that open-ended comments and mitigating expressions appear in a higher proportion of successful revisions than unsuccessful ones, while directness and metalinguistic terms have no effect. Given that the use of this type of data is relatively unexplored in natural language processing (NLP) applications, we also report some observations and challenges when working with feedback data.</abstract>
       <url hash="41d86e18">2020.lrec-1.41</url>
@@ -574,8 +574,8 @@
       <title>A Process-oriented Dataset of Revisions during Writing</title>
       <author><first>Rianne</first><last>Conijn</last></author>
       <author><first>Emily</first><last>Dux Speltz</last></author>
-      <author><first>Menno</first><last>van Zaanen</last></author>
-      <author><first>Luuk</first><last>Van Waes</last></author>
+      <author id="menno-van-zaanen"><first>Menno</first><last>van Zaanen</last></author>
+      <author id="luuk-van-waes"><first>Luuk</first><last>Van Waes</last></author>
       <author><first>Evgeny</first><last>Chukharev-Hudilainen</last></author>
       <pages>363–368</pages>
       <abstract>Revision plays a major role in writing and the analysis of writing processes. Revisions can be analyzed using a product-oriented approach (focusing on a finished product, the text that has been produced) or a process-oriented approach (focusing on the process that the writer followed to generate this product). Although several language resources exist for the product-oriented approach to revisions, there are hardly any resources available yet for an in-depth analysis of the process of revisions. Therefore, we provide an extensive dataset on revisions made during writing (accessible via <url>https://hdl.handle.net/10411/VBDYGX</url>). This dataset is based on keystroke data and eye tracking data of 65 students from a variety of backgrounds (undergraduate and graduate English as a first language and English as a second language students) and a variety of tasks (argumentative text and academic abstract). In total, 7,120 revisions were identified in the dataset. For each revision, 18 features have been manually annotated and 31 features have been automatically extracted. As a case study, we show two potential use cases of the dataset. In addition, future uses of the dataset are described.</abstract>
@@ -600,7 +600,7 @@
     <paper id="47">
       <title><fixed-case>TLT</fixed-case>-school: a Corpus of Non Native Children Speech</title>
       <author><first>Roberto</first><last>Gretter</last></author>
-      <author><first>Marco</first><last>Matassoni</last></author>
+      <author id="marco-matassoni"><first>Marco</first><last>Matassoni</last></author>
       <author><first>Stefano</first><last>Bannò</last></author>
       <author><first>Falavigna</first><last>Daniele</last></author>
       <pages>378–385</pages>
@@ -624,7 +624,7 @@
       <title>Quality Focused Approach to a Learner Corpus Development</title>
       <author><first>Roberts</first><last>Darģis</last></author>
       <author><first>Ilze</first><last>Auziņa</last></author>
-      <author><first>Kristīne</first><last>Levāne-Petrova</last></author>
+      <author id="kristine-levane-petrova"><first>Kristīne</first><last>Levāne-Petrova</last></author>
       <author><first>Inga</first><last>Kaija</last></author>
       <pages>392–396</pages>
       <abstract>The paper presents quality focused approach to a learner corpus development. The methodology was developed with multiple design considerations put in place to make the annotation process easier and at the same time reduce the amount of mistakes that could be introduced due to inconsistent text correction or carelessness. The approach suggested in this paper consists of multiple parts: comparison of digitized texts by several annotators, text correction, automated morphological analysis, and manual review of annotations. The described approach is used to create Latvian Language Learner corpus (LaVA) which is part of a currently ongoing project Development of Learner corpus of Latvian: methods, tools and applications.</abstract>
@@ -634,7 +634,7 @@
     </paper>
     <paper id="50">
       <title>An Exploratory Study into Automated Précis Grading</title>
-      <author><first>Orphee</first><last>De Clercq</last></author>
+      <author id="orphee-de-clercq"><first>Orphee</first><last>De Clercq</last></author>
       <author><first>Senne</first><last>Van Hoecke</last></author>
       <pages>397–404</pages>
       <abstract>Automated writing evaluation is a popular research field, but the main focus has been on evaluating argumentative essays. In this paper, we consider a different genre, namely précis texts. A précis is a written text that provides a coherent summary of main points of a spoken or written text. We present a corpus of English précis texts which all received a grade assigned by a highly-experienced English language teacher and were subsequently annotated following an exhaustive error typology. With this corpus we trained a machine learning model which relies on a number of linguistic, automatic summarization and AWE features. Our results reveal that this model is able to predict the grade of précis texts with only a moderate error margin.</abstract>
@@ -645,8 +645,8 @@
     <paper id="51">
       <title>Adjusting Image Attributes of Localized Regions with Low-level Dialogue</title>
       <author><first>Tzu-Hsiang</first><last>Lin</last></author>
-      <author><first>Alexander</first><last>Rudnicky</last></author>
-      <author><first>Trung</first><last>Bui</last></author>
+      <author id="alexander-rudnicky"><first>Alexander</first><last>Rudnicky</last></author>
+      <author id="trung-bui"><first>Trung</first><last>Bui</last></author>
       <author><first>Doo Soon</first><last>Kim</last></author>
       <author><first>Jean</first><last>Oh</last></author>
       <pages>405–412</pages>
@@ -658,7 +658,7 @@
     <paper id="52">
       <title>Alignment Annotation for Clinic Visit Dialogue to Clinical Note Sentence Language Generation</title>
       <author><first>Wen-wai</first><last>Yim</last></author>
-      <author><first>Meliha</first><last>Yetisgen</last></author>
+      <author id="meliha-yetisgen-yildiz"><first>Meliha</first><last>Yetisgen</last></author>
       <author><first>Jenny</first><last>Huang</last></author>
       <author><first>Micah</first><last>Grossman</last></author>
       <pages>413–421</pages>
@@ -678,7 +678,7 @@
       <author><first>Adarsh</first><last>Kumar</last></author>
       <author><first>Anuj</first><last>Goyal</last></author>
       <author><first>Peter</first><last>Ku</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></author>
       <pages>422–428</pages>
       <abstract>MultiWOZ 2.0 (Budzianowski et al., 2018) is a recently released multi-domain dialogue dataset spanning 7 distinct domains and containing over 10,000 dialogues. Though immensely useful and one of the largest resources of its kind to-date, MultiWOZ 2.0 has a few shortcomings. Firstly, there are substantial noise in the dialogue state annotations and dialogue utterances which negatively impact the performance of state-tracking models. Secondly, follow-up work (Lee et al., 2019) has augmented the original dataset with user dialogue acts. This leads to multiple co-existent versions of the same dataset with minor modifications. In this work we tackle the aforementioned issues by introducing MultiWOZ 2.1. To fix the noisy state annotations, we use crowdsourced workers to re-annotate state and utterances based on the original utterances in the dataset. This correction process results in changes to over 32% of state annotations across 40% of the dialogue turns. In addition, we fix 146 dialogue utterances by canonicalizing slot values in the utterances to the values in the dataset ontology. To address the second problem, we combined the contributions of the follow-up works into MultiWOZ 2.1. Hence, our dataset also includes user dialogue acts as well as multiple slot descriptions per dialogue state slot. We then benchmark a number of state-of-the-art dialogue state tracking models on the MultiWOZ 2.1 dataset and show the joint state tracking performance on the corrected state annotations. We are publicly releasing MultiWOZ 2.1 to the community, hoping that this dataset resource will allow for more effective models across various dialogue subproblems to be built in the future.</abstract>
       <url hash="2cccfd61">2020.lrec-1.53</url>
@@ -702,8 +702,8 @@
       <author><first>Arantxa</first><last>Otegi</last></author>
       <author><first>Aitor</first><last>Agirre</last></author>
       <author><first>Jon Ander</first><last>Campos</last></author>
-      <author><first>Aitor</first><last>Soroa</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="aitor-soroa"><first>Aitor</first><last>Soroa</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <pages>436–442</pages>
       <abstract>Conversational Question Answering (CQA) systems meet user information needs by having conversations with them, where answers to the questions are retrieved from text. There exist a variety of datasets for English, with tens of thousands of training examples, and pre-trained language models have allowed to obtain impressive results. The goal of our research is to test the performance of CQA systems under low-resource conditions which are common for most non-English languages: small amounts of native annotations and other limitations linked to low resource languages, like lack of crowdworkers or smaller wikipedias. We focus on the Basque language, and present the first non-English CQA dataset and results. Our experiments show that it is possible to obtain good results with low amounts of native data thanks to cross-lingual transfer, with quality comparable to those obtained for English. We also discovered that dialogue history models are not directly transferable to another language, calling for further research. The dataset is publicly available.</abstract>
       <url hash="39d8588d">2020.lrec-1.55</url>
@@ -726,12 +726,12 @@
       <title><fixed-case>BLISS</fixed-case>: An Agent for Collecting Spoken Dialogue Data about Health and Well-being</title>
       <author><first>Jelte</first><last>van Waterschoot</last></author>
       <author><first>Iris</first><last>Hendrickx</last></author>
-      <author><first>Arif</first><last>Khan</last></author>
-      <author><first>Esther</first><last>Klabbers</last></author>
+      <author id="mohammed-arif-khan"><first>Arif</first><last>Khan</last></author>
+      <author id="esther-klabbers"><first>Esther</first><last>Klabbers</last></author>
       <author><first>Marcel</first><last>de Korte</last></author>
-      <author><first>Helmer</first><last>Strik</last></author>
-      <author><first>Catia</first><last>Cucchiarini</last></author>
-      <author><first>Mariët</first><last>Theune</last></author>
+      <author id="helmer-strik"><first>Helmer</first><last>Strik</last></author>
+      <author id="catia-cucchiarini"><first>Catia</first><last>Cucchiarini</last></author>
+      <author id="mariet-theune"><first>Mariët</first><last>Theune</last></author>
       <pages>449–458</pages>
       <abstract>An important objective in health-technology is the ability to gather information about people’s well-being. Structured interviews can be used to obtain this information, but are time-consuming and not scalable. Questionnaires provide an alternative way to extract such information, though typically lack depth. In this paper, we present our first prototype of the BLISS agent, an artificial intelligent agent which intends to automatically discover what makes people happy and healthy. The goal of Behaviour-based Language-Interactive Speaking Systems (BLISS) is to understand the motivations behind people’s happiness by conducting a personalized spoken dialogue based on a happiness model. We built our first prototype of the model to collect 55 spoken dialogues, in which the BLISS agent asked questions to users about their happiness and well-being. Apart from a description of the BLISS architecture, we also provide details about our dataset, which contains over 120 activities and 100 motivations and is made available for usage.</abstract>
       <url hash="f3b4fb2d">2020.lrec-1.57</url>
@@ -852,8 +852,8 @@
     <paper id="67">
       <title>Mapping the Dialog Act Annotations of the <fixed-case>LEGO</fixed-case> Corpus into <fixed-case>ISO</fixed-case> 24617-2 Communicative Functions</title>
       <author><first>Eugénio</first><last>Ribeiro</last></author>
-      <author><first>Ricardo</first><last>Ribeiro</last></author>
-      <author><first>David</first><last>Martins de Matos</last></author>
+      <author id="ricardo-ribeiro"><first>Ricardo</first><last>Ribeiro</last></author>
+      <author id="david-martins-de-matos"><first>David</first><last>Martins de Matos</last></author>
       <pages>531–539</pages>
       <abstract>ISO 24617-2, the ISO standard for dialog act annotation, sets the ground for more comparable research in the area. However, the amount of data annotated according to it is still reduced, which impairs the development of approaches for automatic recognition. In this paper, we describe a mapping of the original dialog act labels of the LEGO corpus, which have been neglected, into the communicative functions of the standard. Although this does not lead to a complete annotation according to the standard, the 347 dialogs provide a relevant amount of data that can be used in the development of automatic communicative function recognition approaches, which may lead to a wider adoption of the standard. Using the 17 English dialogs of the DialogBank as gold standard, our preliminary experiments have shown that including the mapped dialogs during the training phase leads to improved performance while recognizing communicative functions in the Task dimension.</abstract>
       <url hash="6d3b190e">2020.lrec-1.67</url>
@@ -875,13 +875,13 @@
     </paper>
     <paper id="69">
       <title>The <fixed-case>ISO</fixed-case> Standard for Dialogue Act Annotation, Second Edition</title>
-      <author><first>Harry</first><last>Bunt</last></author>
+      <author id="harry-bunt"><first>Harry</first><last>Bunt</last></author>
       <author><first>Volha</first><last>Petukhova</last></author>
       <author><first>Emer</first><last>Gilmartin</last></author>
       <author><first>Catherine</first><last>Pelachaud</last></author>
       <author><first>Alex</first><last>Fang</last></author>
       <author><first>Simon</first><last>Keizer</last></author>
-      <author><first>Laurent</first><last>Prévot</last></author>
+      <author id="laurent-prevot"><first>Laurent</first><last>Prévot</last></author>
       <pages>549–558</pages>
       <abstract>ISO standard 24617-2 for dialogue act annotation, established in 2012, has in the past few years been used both in corpus annotation and in the design of components for spoken and multimodal dialogue systems. This has brought some inaccuracies and undesirbale limitations of the standard to light, which are addressed in a proposed second edition. This second edition allows a more accurate annotation of dependence relations and rhetorical relations in dialogue. Following the ISO 24617-4 principles of semantic annotation, and borrowing ideas from EmotionML, a triple-layered plug-in mechanism is introduced which allows dialogue act descriptions to be enriched with information about their semantic content, about accompanying emotions, and other information, and allows the annotation scheme to be customised by adding application-specific dialogue act types.</abstract>
       <url hash="c567ce00">2020.lrec-1.69</url>
@@ -890,7 +890,7 @@
     </paper>
     <paper id="70">
       <title>The <fixed-case>AICO</fixed-case> Multimodal Corpus – Data Collection and Preliminary Analyses</title>
-      <author><first>Kristiina</first><last>Jokinen</last></author>
+      <author id="kristiina-jokinen"><first>Kristiina</first><last>Jokinen</last></author>
       <pages>559–564</pages>
       <abstract>This paper describes data collection and the first explorative research on the AICO Multimodal Corpus. The corpus contains eye-gaze, Kinect, and video recordings of human-robot and human-human interactions, and was collected to study cooperation, engagement and attention of human participants in task-based as well as in chatty type interactive situations. In particular, the goal was to enable comparison between human-human and human-robot interactions, besides studying multimodal behaviour and attention in the different dialogue activities. The robot partner was a humanoid Nao robot, and it was expected that its agent-like behaviour would render humanrobot interactions similar to human-human interaction but also high-light important differences due to the robot’s limited conversational capabilities. The paper reports on the preliminary studies on the corpus, concerning the participants’ eye-gaze and gesturing behaviours,which were chosen as objective measures to study differences in their multimodal behaviour patterns with a human and a robot partner.</abstract>
       <url hash="952067df">2020.lrec-1.70</url>
@@ -950,7 +950,7 @@
       <title><fixed-case>RDG</fixed-case>-Map: A Multimodal Corpus of Pedagogical Human-Agent Spoken Interactions.</title>
       <author><first>Maike</first><last>Paetzel</last></author>
       <author><first>Deepthi</first><last>Karkada</last></author>
-      <author><first>Ramesh</first><last>Manuvinakurike</last></author>
+      <author id="ramesh-manuvinakurike"><first>Ramesh</first><last>Manuvinakurike</last></author>
       <pages>600–609</pages>
       <abstract>This paper presents a multimodal corpus of 209 spoken game dialogues between a human and a remote-controlled artificial agent. The interactions involve people collaborating with the agent to identify countries on the world map as quickly as possible, which allows studying rapid and spontaneous dialogue with complex anaphoras, disfluent utterances and incorrect descriptions. The corpus consists of two parts: 8 hours of game interactions have been collected with a virtual unembodied agent online and 26.8 hours have been recorded with a physically embodied robot in a research lab. In addition to spoken audio recordings available for both parts, camera recordings and skeleton-, facial expression- and eye-gaze tracking data have been collected for the lab-based part of the corpus. In this paper, we introduce the pedagogical reference resolution game (RDG-Map) and the characteristics of the corpus collected. We also present an annotation scheme we developed in order to study the dialogue strategies utilized by the players. Based on a subset of 330 minutes of interactions annotated so far, we discuss initial insights into these strategies as well as the potential of the corpus for future research.</abstract>
       <url hash="177fcc36">2020.lrec-1.75</url>
@@ -1052,7 +1052,7 @@
       <author><first>Birgit</first><last>Rauchbauer</last></author>
       <author><first>Youssef</first><last>Hmamouche</last></author>
       <author><first>Brigitte</first><last>Bigi</last></author>
-      <author><first>Laurent</first><last>Prévot</last></author>
+      <author id="laurent-prevot"><first>Laurent</first><last>Prévot</last></author>
       <author><first>Magalie</first><last>Ochs</last></author>
       <author><first>Thierry</first><last>Chaminade</last></author>
       <pages>668–675</pages>
@@ -1077,15 +1077,15 @@
     </paper>
     <paper id="86">
       <title>Dialogue-<fixed-case>AMR</fixed-case>: <fixed-case>A</fixed-case>bstract <fixed-case>M</fixed-case>eaning <fixed-case>R</fixed-case>epresentation for Dialogue</title>
-      <author><first>Claire</first><last>Bonial</last></author>
+      <author id="claire-bonial"><first>Claire</first><last>Bonial</last></author>
       <author><first>Lucia</first><last>Donatelli</last></author>
       <author><first>Mitchell</first><last>Abrams</last></author>
-      <author><first>Stephanie M.</first><last>Lukin</last></author>
+      <author id="stephanie-lukin"><first>Stephanie M.</first><last>Lukin</last></author>
       <author><first>Stephen</first><last>Tratz</last></author>
       <author><first>Matthew</first><last>Marge</last></author>
       <author><first>Ron</first><last>Artstein</last></author>
-      <author><first>David</first><last>Traum</last></author>
-      <author><first>Clare</first><last>Voss</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
+      <author id="clare-voss"><first>Clare</first><last>Voss</last></author>
       <pages>684–695</pages>
       <abstract>This paper describes a schema that enriches Abstract Meaning Representation (AMR) in order to provide a semantic representation for facilitating Natural Language Understanding (NLU) in dialogue systems. AMR offers a valuable level of abstraction of the propositional content of an utterance; however, it does not capture the illocutionary force or speaker’s intended contribution in the broader dialogue context (e.g., make a request or ask a question), nor does it capture tense or aspect. We explore dialogue in the domain of human-robot interaction, where a conversational robot is engaged in search and navigation tasks with a human partner. To address the limitations of standard AMR, we develop an inventory of speech acts suitable for our domain, and present “Dialogue-AMR”, an enhanced AMR that represents not only the content of an utterance, but the illocutionary force behind it, as well as tense and aspect. To showcase the coverage of the schema, we use both manual and automatic methods to construct the “DialAMR” corpus—a corpus of human-robot dialogue annotated with standard AMR and our enriched Dialogue-AMR schema. Our automated methods can be used to incorporate AMR into a larger NLU pipeline supporting human-robot dialogue.</abstract>
       <url hash="28ab8a98">2020.lrec-1.86</url>
@@ -1118,7 +1118,7 @@
     <paper id="89">
       <title><fixed-case>B</fixed-case>rain<fixed-case>P</fixed-case>redict: a Tool for Predicting and Visualising Local Brain Activity</title>
       <author><first>Youssef</first><last>Hmamouche</last></author>
-      <author><first>Laurent</first><last>Prévot</last></author>
+      <author id="laurent-prevot"><first>Laurent</first><last>Prévot</last></author>
       <author><first>Magalie</first><last>Ochs</last></author>
       <author><first>Thierry</first><last>Chaminade</last></author>
       <pages>710–716</pages>
@@ -1144,7 +1144,7 @@
       <author><first>Kallirroi</first><last>Georgila</last></author>
       <author><first>Carla</first><last>Gordon</last></author>
       <author><first>Volodymyr</first><last>Yanov</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <pages>726–734</pages>
       <abstract>We collected a corpus of dialogues in a Wizard of Oz (WOz) setting in the Internet of Things (IoT) domain. We asked users participating in these dialogues to rate the system on a number of aspects, namely, intelligence, naturalness, personality, friendliness, their enjoyment, overall quality, and whether they would recommend the system to others. Then we asked dialogue observers, i.e., Amazon Mechanical Turkers (MTurkers), to rate these dialogues on the same aspects. We also generated simulated dialogues between dialogue policies and simulated users and asked MTurkers to rate them again on the same aspects. Using linear regression, we developed dialogue evaluation functions based on features from the simulated dialogues and the MTurkers’ ratings, the WOz dialogues and the MTurkers’ ratings, and the WOz dialogues and the WOz participants’ ratings. We applied all these dialogue evaluation functions to a held-out portion of our WOz dialogues, and we report results on the predictive power of these different types of dialogue evaluation functions. Our results suggest that for three conversational aspects (intelligence, naturalness, overall quality) just training evaluation functions on simulated data could be sufficient.</abstract>
       <url hash="96760407">2020.lrec-1.91</url>
@@ -1155,7 +1155,7 @@
       <title>Which Model Should We Use for a Real-World Conversational Dialogue System? a Cross-Language Relevance Model or a Deep Neural Net?</title>
       <author><first>Seyed Hossein</first><last>Alavi</last></author>
       <author><first>Anton</first><last>Leuski</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <pages>735–742</pages>
       <abstract>We compare two models for corpus-based selection of dialogue responses: one based on cross-language relevance with a cross-language LSTM model. Each model is tested on multiple corpora, collected from two different types of dialogue source material. Results show that while the LSTM model performs adequately on a very large corpus (millions of utterances), its performance is dominated by the cross-language relevance model for a more moderate-sized corpus (ten thousands of utterances).</abstract>
       <url hash="ac5532af">2020.lrec-1.92</url>
@@ -1189,7 +1189,7 @@
       <title>An Annotation Approach for Social and Referential Gaze in Dialogue</title>
       <author><first>Vidya</first><last>Somashekarappa</last></author>
       <author><first>Christine</first><last>Howes</last></author>
-      <author><first>Asad</first><last>Sayeed</last></author>
+      <author id="asad-sayeed"><first>Asad</first><last>Sayeed</last></author>
       <pages>759–765</pages>
       <abstract>This paper introduces an approach for annotating eye gaze considering both its social and the referential functions in multi-modal human-human dialogue. Detecting and interpreting the temporal patterns of gaze behavior cues is natural for humans and also mostly an unconscious process. However, these cues are difficult for conversational agents such as robots or avatars to process or generate. The key factor is to recognize these variants and carry out a successful conversation, as misinterpretation can lead to total failure of the given interaction. This paper introduces an annotation scheme for eye-gaze in human-human dyadic interactions that is intended to facilitate the learning of eye-gaze patterns in multi-modal natural dialogue.</abstract>
       <url hash="c4cd7968">2020.lrec-1.95</url>
@@ -1211,7 +1211,7 @@
     <paper id="97">
       <title>Books of Hours. the First Liturgical Data Set for Text Segmentation.</title>
       <author><first>Amir</first><last>Hazem</last></author>
-      <author><first>Beatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Beatrice</first><last>Daille</last></author>
       <author><first>Christopher</first><last>Kermorvant</last></author>
       <author><first>Dominique</first><last>Stutzmann</last></author>
       <author><first>Marie-Laurence</first><last>Bonhomme</last></author>
@@ -1271,7 +1271,7 @@
     <paper id="102">
       <title>Automatic Section Recognition in Obituaries</title>
       <author><first>Valentino</first><last>Sabbatino</last></author>
-      <author><first>Laura Ana Maria</first><last>Bostan</last></author>
+      <author id="laura-ana-maria-oberlander"><first>Laura Ana Maria</first><last>Bostan</last></author>
       <author><first>Roman</first><last>Klinger</last></author>
       <pages>817–825</pages>
       <abstract>Obituaries contain information about people’s values across times and cultures, which makes them a useful resource for exploring cultural history. They are typically structured similarly, with sections corresponding to Personal Information, Biographical Sketch, Characteristics, Family, Gratitude, Tribute, Funeral Information and Other aspects of the person. To make this information available for further studies, we propose a statistical model which recognizes these sections. To achieve that, we collect a corpus of 20058 English obituaries from TheDaily Item, Remembering.CA and The London Free Press. The evaluation of our annotation guidelines with three annotators on 1008 obituaries shows a substantial agreement of Fleiss κ = 0.87. Formulated as an automatic segmentation task, a convolutional neural network outperforms bag-of-words and embedding-based BiLSTMs and BiLSTM-CRFs with a micro F1 = 0.81.</abstract>
@@ -1292,7 +1292,7 @@
     <paper id="104">
       <title><fixed-case>R</fixed-case>i<fixed-case>Q</fixed-case>u<fixed-case>A</fixed-case>: A Corpus of Rich Quotation Annotation for <fixed-case>E</fixed-case>nglish Literary Text</title>
       <author><first>Sean</first><last>Papay</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <pages>835–841</pages>
       <abstract>We introduce RiQuA (RIch QUotation Annotations), a corpus that provides quotations, including their interpersonal structure (speakers and addressees) for English literary text. The corpus comprises 11 works of 19th-century literature that were manually doubly annotated for direct and indirect quotations. For each quotation, its span, speaker, addressee, and cue are identified (if present). This provides a rich view of dialogue structures not available from other available corpora. We detail the process of creating this dataset, discuss the annotation guidelines, and analyze the resulting corpus in terms of inter-annotator agreement and its properties. RiQuA, along with its annotations guidelines and associated scripts, are publicly available for use, modification, and experimentation.</abstract>
       <url hash="b62d9305">2020.lrec-1.104</url>
@@ -1312,9 +1312,9 @@
       <title>The <fixed-case>BDC</fixed-case>amões Collection of <fixed-case>P</fixed-case>ortuguese Literary Documents: a Research Resource for Digital Humanities and Language Technology</title>
       <author><first>Sara</first><last>Grilo</last></author>
       <author><first>Márcia</first><last>Bolrinha</last></author>
-      <author><first>João</first><last>Silva</last></author>
+      <author id="joao-silva"><first>João</first><last>Silva</last></author>
       <author><first>Rui</first><last>Vaz</last></author>
-      <author><first>António</first><last>Branco</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
       <pages>849–854</pages>
       <abstract>This paper presents the BDCamões Collection of Portuguese Literary Documents, a new corpus of literary texts written in Portuguese that in its inaugural version includes close to 4 million words from over 200 complete documents from 83 authors in 14 genres, covering a time span from the 16th to the 21st century, and adhering to different orthographic conventions. Many of the texts in the corpus have also been automatically parsed with state-of-the-art language processing tools, forming the BDCamões Treebank subcorpus. This set of characteristics makes of BDCamões an invaluable resource for research in language technology (e.g. authorship detection, genre classification, etc.) and in language science and digital humanities (e.g. comparative literature, diachronic linguistics, etc.).</abstract>
       <url hash="f229b2e0">2020.lrec-1.106</url>
@@ -1347,7 +1347,7 @@
     </paper>
     <paper id="109">
       <title><fixed-case>NLP</fixed-case> Scholar: A Dataset for Examining the State of <fixed-case>NLP</fixed-case> Research</title>
-      <author><first>Saif M.</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif M.</first><last>Mohammad</last></author>
       <pages>868–877</pages>
       <abstract>Google Scholar is the largest web search engine for academic literature that also provides access to rich metadata associated with the papers. The ACL Anthology (AA) is the largest repository of articles on Natural Language Processing (NLP). We extracted information from AA for about 44 thousand NLP papers and identified authors who published at least three papers there. We then extracted citation information from Google Scholar for all their papers (not just their AA papers). This resulted in a dataset of 1.1 million papers and associated Google Scholar information. We aligned the information in the AA and Google Scholar datasets to create the NLP Scholar Dataset – a single unified source of information (from both AA and Google Scholar) for tens of thousands of NLP papers. It can be used to identify broad trends in productivity, focus, and impact of NLP research. We present here initial work on analyzing the volume of research in NLP over the years and identifying the most cited papers in NLP. We also list a number of additional potential applications.</abstract>
       <url hash="5eb46a24">2020.lrec-1.109</url>
@@ -1403,7 +1403,7 @@
       <title>“Voices of the Great War”: A Richly Annotated Corpus of <fixed-case>I</fixed-case>talian Texts on the First World War</title>
       <author><first>Federico</first><last>Boschetti</last></author>
       <author><first>Irene</first><last>De Felice</last></author>
-      <author><first>Stefano</first><last>Dei Rossi</last></author>
+      <author id="stefano-dei-rossi"><first>Stefano</first><last>Dei Rossi</last></author>
       <author><first>Felice</first><last>Dell’Orletta</last></author>
       <author><first>Michele</first><last>Di Giorgio</last></author>
       <author><first>Martina</first><last>Miliani</last></author>
@@ -1412,7 +1412,7 @@
       <author><first>Giulia</first><last>Venturi</last></author>
       <author><first>Nicola</first><last>Labanca</last></author>
       <author><first>Alessandro</first><last>Lenci</last></author>
-      <author><first>Simonetta</first><last>Montemagni</last></author>
+      <author id="simonetta-montemagni"><first>Simonetta</first><last>Montemagni</last></author>
       <pages>911–918</pages>
       <abstract>“Voices of the Great War” is the first large corpus of Italian historical texts dating back to the period of First World War. This corpus differs from other existing resources in several respects. First, from the linguistic point of view it gives account of the wide range of varieties in which Italian was articulated in that period, namely from a diastratic (educated vs. uneducated writers), diaphasic (low/informal vs. high/formal registers) and diatopic (regional varieties, dialects) points of view. From the historical perspective, through a collection of texts belonging to different genres it represents different views on the war and the various styles of narrating war events and experiences. The final corpus is balanced along various dimensions, corresponding to the textual genre, the language variety used, the author type and the typology of conveyed contents. The corpus is fully annotated with lemmas, part-of-speech, terminology, and named entities. Significant corpus samples representative of the different “voices” have also been enriched with meta-linguistic and syntactic information. The layer of syntactic annotation forms the first nucleus of an Italian historical treebank complying with the Universal Dependencies standard. The paper illustrates the final resource, the methodology and tools used to build it, and the Web Interface for navigating it.</abstract>
       <url hash="e958b0fd">2020.lrec-1.114</url>
@@ -1422,12 +1422,12 @@
     <paper id="115">
       <title><fixed-case>DE</fixed-case>bate<fixed-case>N</fixed-case>et-mig15:Tracing the 2015 Immigration Debate in <fixed-case>G</fixed-case>ermany Over Time</title>
       <author><first>Gabriella</first><last>Lapesa</last></author>
-      <author><first>Andre</first><last>Blessing</last></author>
+      <author id="andre-blessing"><first>Andre</first><last>Blessing</last></author>
       <author><first>Nico</first><last>Blokker</last></author>
       <author><first>Erenay</first><last>Dayanik</last></author>
       <author><first>Sebastian</first><last>Haunss</last></author>
       <author><first>Jonas</first><last>Kuhn</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <pages>919–927</pages>
       <abstract>DEbateNet-migr15 is a manually annotated dataset for German which covers the public debate on immigration in 2015. The building block of our annotation is the political science notion of a claim, i.e., a statement made by a political actor (a politician, a party, or a group of citizens) that a specific action should be taken (e.g., vacant flats should be assigned to refugees). We identify claims in newspaper articles, assign them to actors and fine-grained categories and annotate their polarity and date. The aim of this paper is two-fold: first, we release the full DEbateNet-mig15 corpus and document it by means of a quantitative and qualitative analysis; second, we demonstrate its application in a discourse network analysis framework, which enables us to capture the temporal dynamics of the political debate</abstract>
       <url hash="ab31fa86">2020.lrec-1.115</url>
@@ -1445,7 +1445,7 @@
     </paper>
     <paper id="117">
       <title>A New <fixed-case>L</fixed-case>atin Treebank for <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies: Charters between <fixed-case>A</fixed-case>ncient <fixed-case>L</fixed-case>atin and <fixed-case>R</fixed-case>omance Languages</title>
-      <author><first>Flavio Massimiliano</first><last>Cecchini</last></author>
+      <author id="flavio-massimiliano-cecchini"><first>Flavio Massimiliano</first><last>Cecchini</last></author>
       <author><first>Timo</first><last>Korkiakangas</last></author>
       <author><first>Marco</first><last>Passarotti</last></author>
       <pages>933–942</pages>
@@ -1534,7 +1534,7 @@
     <paper id="125">
       <title><fixed-case>D</fixed-case>isc<fixed-case>S</fixed-case>ense: Automated Semantic Analysis of Discourse Markers</title>
       <author><first>Damien</first><last>Sileo</last></author>
-      <author><first>Tim</first><last>Van de Cruys</last></author>
+      <author id="tim-van-de-cruys"><first>Tim</first><last>Van de Cruys</last></author>
       <author><first>Camille</first><last>Pradel</last></author>
       <author><first>Philippe</first><last>Muller</last></author>
       <pages>991–999</pages>
@@ -1558,8 +1558,8 @@
       <title>Machine-Aided Annotation for Fine-Grained Proposition Types in Argumentation</title>
       <author><first>Yohan</first><last>Jo</last></author>
       <author><first>Elijah</first><last>Mayfield</last></author>
-      <author><first>Chris</first><last>Reed</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="chris-reed"><first>Chris</first><last>Reed</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>1008–1018</pages>
       <abstract>We introduce a corpus of the 2016 U.S. presidential debates and commentary, containing 4,648 argumentative propositions annotated with fine-grained proposition types. Modern machine learning pipelines for analyzing argument have difficulty distinguishing between types of propositions based on their factuality, rhetorical positioning, and speaker commitment. Inability to properly account for these facets leaves such systems inaccurate in understanding of fine-grained proposition types. In this paper, we demonstrate an approach to annotating for four complex proposition types, namely normative claims, desires, future possibility, and reported speech. We develop a hybrid machine learning and human workflow for annotation that allows for efficient and reliable annotation of complex linguistic phenomena, and demonstrate with preliminary analysis of rhetorical strategies and structure in presidential debates. This new dataset and method can support technical researchers seeking more nuanced representations of argument, as well as argumentation theorists developing new quantitative analyses.</abstract>
       <url hash="1bd30907">2020.lrec-1.127</url>
@@ -1583,8 +1583,8 @@
       <author><first>Wanqiu</first><last>Long</last></author>
       <author><first>Xinyi</first><last>Cai</last></author>
       <author><first>James</first><last>Reid</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <pages>1025–1032</pages>
       <abstract>Text corpora annotated with language-related properties are an important resource for the development of Language Technology. The current work contributes a new resource for Chinese Language Technology and for Chinese-English translation, in the form of a set of TED talks (some originally given in English, some in Chinese) that have been annotated with discourse relations in the style of the Penn Discourse TreeBank, adapted to properties of Chinese text that are not present in English. The resource is currently unique in annotating discourse-level properties of planned spoken monologues rather than of written text. An inter-annotator agreement study demonstrates that the annotation scheme is able to achieve highly reliable results.</abstract>
       <url hash="d384cc4a">2020.lrec-1.129</url>
@@ -1596,7 +1596,7 @@
       <author><first>Christopher</first><last>Olshefski</last></author>
       <author><first>Luca</first><last>Lugini</last></author>
       <author><first>Ravneet</first><last>Singh</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <author><first>Amanda</first><last>Godley</last></author>
       <pages>1033–1043</pages>
       <abstract>Although NLP research on argument mining has advanced considerably in recent years, most studies draw on corpora of asynchronous and written texts, often produced by individuals. Few published corpora of synchronous, multi-party argumentation are available. The Discussion Tracker corpus, collected in high school English classes, is an annotated dataset of transcripts of spoken, multi-party argumentation. The corpus consists of 29 multi-party discussions of English literature transcribed from 985 minutes of audio. The transcripts were annotated for three dimensions of collaborative argumentation: argument moves (claims, evidence, and explanations), specificity (low, medium, high) and collaboration (e.g., extensions of and disagreements about others’ ideas). In addition to providing descriptive statistics on the corpus, we provide performance benchmarks and associated code for predicting each dimension separately, illustrate the use of the multiple annotations in the corpus to improve performance via multi-task learning, and finally discuss other ways the corpus might be used to further NLP research.</abstract>
@@ -1650,7 +1650,7 @@
       <title>Joint Learning of Syntactic Features Helps Discourse Segmentation</title>
       <author><first>Takshak</first><last>Desai</last></author>
       <author><first>Parag Pravin</first><last>Dakle</last></author>
-      <author><first>Dan</first><last>Moldovan</last></author>
+      <author id="dan-moldovan"><first>Dan</first><last>Moldovan</last></author>
       <pages>1073–1080</pages>
       <abstract>This paper describes an accurate framework for carrying out multi-lingual discourse segmentation with BERT (Devlin et al., 2019). The model is trained to identify segments by casting the problem as a token classification problem and jointly learning syntactic features like part-of-speech tags and dependency relations. This leads to significant improvements in performance. Experiments are performed in different languages, such as English, Dutch, German, Portuguese Brazilian and Basque to highlight the cross-lingual effectiveness of the segmenter. In particular, the model achieves a state-of-the-art F-score of 96.7 for the RST-DT corpus (Carlson et al., 2003) improving on the previous best model by 7.2%. Additionally, a qualitative explanation is provided for how proposed changes contribute to model performance by analyzing errors made on the test data.</abstract>
       <url hash="fea1b8ce">2020.lrec-1.135</url>
@@ -1683,7 +1683,7 @@
       <title><fixed-case>D</fixed-case>i<fixed-case>ML</fixed-case>ex-<fixed-case>B</fixed-case>angla: A Lexicon of <fixed-case>B</fixed-case>angla Discourse Connectives</title>
       <author><first>Debopam</first><last>Das</last></author>
       <author><first>Manfred</first><last>Stede</last></author>
-      <author><first>Soumya Sankar</first><last>Ghosh</last></author>
+      <author id="soumya-sankar-ghosh"><first>Soumya Sankar</first><last>Ghosh</last></author>
       <author><first>Lahari</first><last>Chatterjee</last></author>
       <pages>1097–1102</pages>
       <abstract>We present DiMLex-Bangla, a newly developed lexicon of discourse connectives in Bangla. The lexicon, upon completion of its first version, contains 123 Bangla connective entries, which are primarily compiled from the linguistic literature and translation of English discourse connectives. The lexicon compilation is later augmented by adding more connectives from a currently developed corpus, called the Bangla RST Discourse Treebank (Das and Stede, 2018). DiMLex-Bangla provides information on syntactic categories of Bangla connectives, their discourse semantics and non-connective uses (if any). It uses the format of the German connective lexicon DiMLex (Stede and Umbach, 1998), which provides a cross-linguistically applicable XML schema. The resource is the first of its kind in Bangla, and is freely available for use in studies on discourse structure and computational applications.</abstract>
@@ -1748,7 +1748,7 @@
     <paper id="144">
       <title>Simplifying Coreference Chains for Dyslexic Children</title>
       <author><first>Rodrigo</first><last>Wilkens</last></author>
-      <author><first>Amalia</first><last>Todirascu</last></author>
+      <author id="amalia-todirascu"><first>Amalia</first><last>Todirascu</last></author>
       <pages>1142–1151</pages>
       <abstract>We present a work aiming to generate adapted content for dyslexic children for French, in the context of the ALECTOR project. Thus, we developed a system to transform the texts at the discourse level. This system modifies the coreference chains, which are markers of text cohesion, by using rules. These rules were designed following a careful study of coreference chains in both original texts and its simplified versions. Moreover, in order to define reliable transformation rules, we analysed several coreference properties as well as the concurrent simplification operations in the aligned texts. This information is coded together with a coreference resolution system and a text rewritten tool in the proposed system, which comprise a coreference module specialised in written text and seven text transformation operations. The evaluation of the system firstly focused on check the simplification by manual validation of three judges. These errors were grouped into five classes that combined can explain 93% of the errors. The second evaluation step consisted of measuring the simplification perception by 23 judges, which allow us to measure the simplification impact of the proposed rules.</abstract>
       <url hash="220ec684">2020.lrec-1.144</url>
@@ -1769,7 +1769,7 @@
     <paper id="146">
       <title>What Speakers really Mean when they Ask Questions: Classification of Intentions with a Supervised Approach</title>
       <author><first>Angèle</first><last>Barbedette</last></author>
-      <author><first>Iris</first><last>Eshkol-Taravella</last></author>
+      <author id="iris-eshkol"><first>Iris</first><last>Eshkol-Taravella</last></author>
       <pages>1159–1166</pages>
       <abstract>This paper focuses on the automatic detection of hidden intentions of speakers in questions asked during meals. Our corpus is composed of a set of transcripts of spontaneous oral conversations from ESLO’s corpora. We suggest a typology of these intentions based on our research work and the exploration and annotation of the corpus, in which we define two “explicit” categories (request for agreement and request for information) and three “implicit” categories (opinion, will and doubt). We implement a supervised automatic classification model based on annotated data and selected linguistic features and we evaluate its results and performances. We finally try to interpret these results by looking more deeply and specifically into the predictions of the algorithm and the features it used. There are many motivations for this work which are part of ongoing challenges such as opinion analysis, irony detection or the development of conversational agents.</abstract>
       <url hash="42ecb81f">2020.lrec-1.146</url>
@@ -1791,7 +1791,7 @@
       <title>Stigma Annotation Scheme and Stigmatized Language Detection in Health-Care Discussions on Social Media</title>
       <author><first>Nadiya</first><last>Straton</last></author>
       <author><first>Hyeju</first><last>Jang</last></author>
-      <author><first>Raymond</first><last>Ng</last></author>
+      <author id="raymond-ng"><first>Raymond</first><last>Ng</last></author>
       <pages>1178–1190</pages>
       <abstract>Much research has been done within the social sciences on the interpretation and influence of stigma on human behaviour and health, which result in out-of-group exclusion, distancing, cognitive separation, status loss, discrimination, in-group pressure, and often lead to disengagement, non-adherence to treatment plan, and prescriptions by the doctor. However, little work has been conducted on computational identification of stigma in general and in social media discourse in particular. In this paper, we develop the annotation scheme and improve the annotation process for stigma identification, which can be applied to other health-care domains. The data from pro-vaccination and anti-vaccination discussion groups are annotated by trained annotators who have professional background in social science and health-care studies, therefore the group can be considered experts on the subject in comparison to non-expert crowd. Amazon MTurk annotators is another group of annotator with no knowledge on their education background, they are initially treated as non-expert crowd on the subject matter of stigma. We analyze the annotations with visualisation techniques, features from LIWC (Linguistic Inquiry and Word Count) list and make prediction based on bi-grams with traditional and deep learning models. Data augmentation method and application of CNN show high performance accuracy in comparison to other models. Success of the rigorous annotation process on identifying stigma is reconfirmed by achieving high prediction rate with CNN.</abstract>
       <url hash="190f9aca">2020.lrec-1.148</url>
@@ -1809,8 +1809,8 @@
       <author><first>Debanjan</first><last>Mahata</last></author>
       <author><first>Rakesh</first><last>Gosangi</last></author>
       <author><first>Haimin</first><last>Zhang</last></author>
-      <author><first>Rajiv Ratn</first><last>Shah</last></author>
-      <author><first>Amanda</first><last>Stent</last></author>
+      <author id="rajiv-shah"><first>Rajiv Ratn</first><last>Shah</last></author>
+      <author id="amanda-stent"><first>Amanda</first><last>Stent</last></author>
       <pages>1191–1196</pages>
       <abstract>In this paper, we present a new corpus consisting of sentences from Hindi short stories annotated for five different discourse modes argumentative, narrative, descriptive, dialogic and informative. We present a detailed account of the entire data collection and annotation processes. The annotations have a very high inter-annotator agreement (0.87 k-alpha). We analyze the data in terms of label distributions, part of speech tags, and sentence lengths. We characterize the performance of various classification algorithms on this dataset and perform ablation studies to understand the nature of the linguistic models suitable for capturing the nuances of the embedded discourse structures in the presented corpus.</abstract>
       <url hash="4a338e53">2020.lrec-1.149</url>
@@ -1819,7 +1819,7 @@
     </paper>
     <paper id="150">
       <title>Multi-class Multilingual Classification of <fixed-case>W</fixed-case>ikipedia Articles Using Extended Named Entity Tag Set</title>
-      <author><first>Hassan S.</first><last>Shavarani</last></author>
+      <author id="hassan-s-shavarani"><first>Hassan S.</first><last>Shavarani</last></author>
       <author><first>Satoshi</first><last>Sekine</last></author>
       <pages>1197–1201</pages>
       <abstract>Wikipedia is a great source of general world knowledge which can guide NLP models better understand their motivation to make predictions. Structuring Wikipedia is the initial step towards this goal which can facilitate fine-grain classification of articles. In this work, we introduce the Shinra 5-Language Categorization Dataset (SHINRA-5LDS), a large multi-lingual and multi-labeled set of annotated Wikipedia articles in Japanese, English, French, German, and Farsi using Extended Named Entity (ENE) tag set. We evaluate the dataset using the best models provided for ENE label set classification and show that the currently available classification models struggle with large datasets using fine-grained tag sets.</abstract>
@@ -1831,7 +1831,7 @@
       <title>An <fixed-case>A</fixed-case>lgerian Corpus and an Annotation Platform for Opinion and Emotion Analysis</title>
       <author><first>Leila</first><last>Moudjari</last></author>
       <author><first>Karima</first><last>Akli-Astouati</last></author>
-      <author><first>Farah</first><last>Benamara</last></author>
+      <author id="farah-benamara"><first>Farah</first><last>Benamara</last></author>
       <pages>1202–1210</pages>
       <abstract>In this paper, we address the lack of resources for opinion and emotion analysis related to North African dialects, targeting Algerian dialect. We present TWIFIL (TWItter proFILing) a collaborative annotation platform for crowdsourcing annotation of tweets at different levels of granularity. The plateform allowed the creation of the largest Algerian dialect dataset annotated for both sentiment (9,000 tweets), emotion (about 5,000 tweets) and extra-linguistic information including author profiling (age and gender). The annotation resulted also in the creation of the largest Algerien dialect subjectivity lexicon of about 9,000 entries which can constitute a valuable resources for the development of future NLP applications for Algerian dialect. To test the validity of the dataset, a set of deep learning experiments were conducted to classify a given tweet as positive, negative or neutral. We discuss our results and provide an error analysis to better identify classification errors.</abstract>
       <url hash="c31194c9">2020.lrec-1.151</url>
@@ -1861,7 +1861,7 @@
     <paper id="154">
       <title>Cross-domain Author Gender Classification in <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese</title>
       <author><first>Rafael</first><last>Dias</last></author>
-      <author><first>Ivandré</first><last>Paraboni</last></author>
+      <author id="ivandre-paraboni"><first>Ivandré</first><last>Paraboni</last></author>
       <pages>1227–1234</pages>
       <abstract>Author profiling models predict demographic characteristics of a target author based on the text that they have written. Systems of this kind will often follow a single-domain approach, in which the model is trained from a corpus of labelled texts in a given domain, and it is subsequently validated against a test corpus built from precisely the same domain. Although single-domain settings are arguably ideal, this strategy gives rise to the question of how to proceed when no suitable training corpus (i.e., a corpus that matches the test domain) is available. To shed light on this issue, this paper discusses a cross-domain gender classification task based on four domains (Facebook, crowd sourced opinions, Blogs and E-gov requests) in the Brazilian Portuguese language. A number of simple gender classification models using word- and psycholinguistics-based features alike are introduced, and their results are compared in two kinds of cross-domain setting: first, by making use of a single text source as training data for each task, and subsequently by combining multiple sources. Results confirm previous findings related to the effects of corpus size and domain similarity in English, and pave the way for further studies in the field.</abstract>
       <url hash="449e52c5">2020.lrec-1.154</url>
@@ -1929,7 +1929,7 @@
     </paper>
     <paper id="160">
       <title>Corpora and Baselines for Humour Recognition in <fixed-case>P</fixed-case>ortuguese</title>
-      <author><first>Hugo</first><last>Gonçalo Oliveira</last></author>
+      <author id="hugo-goncalo-oliveira"><first>Hugo</first><last>Gonçalo Oliveira</last></author>
       <author><first>André</first><last>Clemêncio</last></author>
       <author><first>Ana</first><last>Alves</last></author>
       <pages>1278–1285</pages>
@@ -1965,7 +1965,7 @@
       <author><first>Sumithra</first><last>Velupillai</last></author>
       <author><first>Rachel</first><last>Holden</last></author>
       <author><first>Maxim</first><last>Kikoler</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
       <author><first>Rina</first><last>Dutta</last></author>
       <author><first>Angus</first><last>Roberts</last></author>
       <pages>1303–1310</pages>
@@ -2008,7 +2008,7 @@
     <paper id="167">
       <title>Email Classification Incorporating Social Networks and Thread Structure</title>
       <author><first>Sakhar</first><last>Alkhereyf</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>1336–1345</pages>
       <abstract>Existing methods for different document classification tasks in the context of social networks typically only capture the semantics of texts, while ignoring the users who exchange the text and the network they form. However, some work has shown that incorporating the social network information in addition to information from language is effective for various NLP applications including sentiment analysis, inferring user attributes, and predicting inter-personal relations. In this paper, we present an empirical study of email classification into “Business” and “Personal” categories. We represent the email communication using various graph structures. As features, we use both the textual information from the email content and social network information from the communication graphs. We also model the thread structure for emails. We focus on detecting personal emails, and we evaluate our methods on two corpora, only one of which we train on. The experimental results reveal that incorporating social network information improves over the performance of an approach based on textual information only. The results also show that considering the thread structure of emails improves the performance further. Furthermore, our approach improves over a state-of-the-art baseline which uses node embeddings based on both lexical and social network information.</abstract>
       <url hash="c42e119c">2020.lrec-1.167</url>
@@ -2017,7 +2017,7 @@
     </paper>
     <paper id="168">
       <title>Development and Validation of a Corpus for Machine Humor Comprehension</title>
-      <author><first>Yuen-Hsien</first><last>Tseng</last></author>
+      <author id="yuen-hsien-tseng"><first>Yuen-Hsien</first><last>Tseng</last></author>
       <author><first>Wun-Syuan</first><last>Wu</last></author>
       <author><first>Chia-Yueh</first><last>Chang</last></author>
       <author><first>Hsueh-Chih</first><last>Chen</last></author>
@@ -2030,7 +2030,7 @@
     </paper>
     <paper id="169">
       <title><fixed-case>A</fixed-case>lector: A Parallel Corpus of Simplified <fixed-case>F</fixed-case>rench Texts with Alignments of Misreadings by Poor and Dyslexic Readers</title>
-      <author><first>Núria</first><last>Gala</last></author>
+      <author id="nuria-gala"><first>Núria</first><last>Gala</last></author>
       <author><first>Anaïs</first><last>Tack</last></author>
       <author><first>Ludivine</first><last>Javourey-Drevet</last></author>
       <author><first>Thomas</first><last>François</last></author>
@@ -2062,9 +2062,9 @@
     <paper id="171">
       <title>Multilingual Stance Detection in Tweets: The <fixed-case>C</fixed-case>atalonia Independence Corpus</title>
       <author><first>Elena</first><last>Zotova</last></author>
-      <author><first>Rodrigo</first><last>Agerri</last></author>
+      <author id="rodrigo-agerri"><first>Rodrigo</first><last>Agerri</last></author>
       <author><first>Manuel</first><last>Nuñez</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <pages>1368–1375</pages>
       <abstract>Stance detection aims to determine the attitude of a given text with respect to a specific topic or claim. While stance detection has been fairly well researched in the last years, most the work has been focused on English. This is mainly due to the relative lack of annotated data in other languages. The TW-10 referendum Dataset released at IberEval 2018 is a previous effort to provide multilingual stance-annotated data in Catalan and Spanish. Unfortunately, the TW-10 Catalan subset is extremely imbalanced. This paper addresses these issues by presenting a new multilingual dataset for stance detection in Twitter for the Catalan and Spanish languages, with the aim of facilitating research on stance detection in multilingual and cross-lingual settings. The dataset is annotated with stance towards one topic, namely, the ndependence of Catalonia. We also provide a semi-automatic method to annotate the dataset based on a categorization of Twitter users. We experiment on the new corpus with a number of supervised approaches, including linear classifiers and deep learning methods. Comparison of our new corpus with the with the TW-1O dataset shows both the benefits and potential of a well balanced corpus for multilingual and cross-lingual research on stance detection. Finally, we establish new state-of-the-art results on the TW-10 dataset, both for Catalan and Spanish.</abstract>
       <url hash="392680a0">2020.lrec-1.171</url>
@@ -2093,7 +2093,7 @@
       <author><first>Noé</first><last>Cécillon</last></author>
       <author><first>Vincent</first><last>Labatut</last></author>
       <author><first>Richard</first><last>Dufour</last></author>
-      <author><first>Georges</first><last>Linarès</last></author>
+      <author id="georges-linares"><first>Georges</first><last>Linarès</last></author>
       <pages>1382–1390</pages>
       <abstract>With the spread of online social networks, it is more and more difficult to monitor all the user-generated content. Automating the moderation process of the inappropriate exchange content on Internet has thus become a priority task. Methods have been proposed for this purpose, but it can be challenging to find a suitable dataset to train and develop them. This issue is especially true for approaches based on information derived from the structure and the dynamic of the conversation. In this work, we propose an original framework, based on the the Wikipedia Comment corpus, with comment-level abuse annotations of different types. The major contribution concerns the reconstruction of conversations, by comparison to existing corpora, which focus only on isolated messages (i.e. taken out of their conversational context). This large corpus of more than 380k annotated messages opens perspectives for online abuse detection and especially for context-based approaches. We also propose, in addition to this corpus, a complete benchmarking platform to stimulate and fairly compare scientific works around the problem of content abuse detection, trying to avoid the recurring problem of result replication. Finally, we apply two classification methods to our dataset to demonstrate its potential.</abstract>
       <url hash="d3a7370f">2020.lrec-1.173</url>
@@ -2114,8 +2114,8 @@
     <paper id="175">
       <title>An Annotated Corpus for Sexism Detection in <fixed-case>F</fixed-case>rench Tweets</title>
       <author><first>Patricia</first><last>Chiril</last></author>
-      <author><first>Véronique</first><last>Moriceau</last></author>
-      <author><first>Farah</first><last>Benamara</last></author>
+      <author id="veronique-moriceau"><first>Véronique</first><last>Moriceau</last></author>
+      <author id="farah-benamara"><first>Farah</first><last>Benamara</last></author>
       <author><first>Alda</first><last>Mari</last></author>
       <author><first>Gloria</first><last>Origgi</last></author>
       <author><first>Marlène</first><last>Coulomb-Gully</last></author>
@@ -2130,10 +2130,10 @@
       <author><first>Roney</first><last>Santos</last></author>
       <author><first>Gabriela</first><last>Pedro</last></author>
       <author><first>Sidney</first><last>Leal</last></author>
-      <author><first>Oto</first><last>Vale</last></author>
+      <author id="oto-vale"><first>Oto</first><last>Vale</last></author>
       <author><first>Thiago</first><last>Pardo</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
-      <author><first>Carolina</first><last>Scarton</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last></author>
       <pages>1404–1413</pages>
       <abstract>The proliferation of fake news is a current issue that influences a number of important areas of society, such as politics, economy and health. In the Natural Language Processing area, recent initiatives tried to detect fake news in different ways, ranging from language-based approaches to content-based verification. In such approaches, the choice of the features for the classification of fake and true news is one of the most important parts of the process. This paper presents a study on the impact of readability features to detect fake news for the Brazilian Portuguese language. The results show that such features are relevant to the task (achieving, alone, up to 92% classification accuracy) and may improve previous classification results.</abstract>
       <url hash="a12c4d8f">2020.lrec-1.176</url>
@@ -2142,7 +2142,7 @@
     </paper>
     <paper id="177">
       <title>When Shallow is Good Enough: Automatic Assessment of Conceptual Text Complexity using Shallow Semantic Features</title>
-      <author><first>Sanja</first><last>Stajner</last></author>
+      <author id="sanja-stajner"><first>Sanja</first><last>Stajner</last></author>
       <author><first>Ioana</first><last>Hulpuș</last></author>
       <pages>1414–1422</pages>
       <abstract>According to psycholinguistic studies, the complexity of concepts used in a text and the relations between mentioned concepts play the most important role in text understanding and maintaining reader’s interest. However, the classical approaches to automatic assessment of text complexity, and their commercial applications, take into consideration mainly syntactic and lexical complexity. Recently, we introduced the task of automatic assessment of conceptual text complexity, proposing a set of graph-based deep semantic features using DBpedia as a proxy to human knowledge. Given that such graphs can be noisy, incomplete, and computationally expensive to deal with, in this paper, we propose the use of textual features and shallow semantic features that only require entity linking. We compare the results obtained with new features with those of the state-of-the-art deep semantic features on two tasks: (1) pairwise comparison of two versions of the same text; and (2) five-level classification of texts. We find that the shallow features achieve state-of-the-art results on both tasks, significantly outperforming performances of the deep semantic features on the five-level classification task. Interestingly, the combination of the shallow and deep semantic features lead to a significant improvement of the performances on that task.</abstract>
@@ -2180,7 +2180,7 @@
       <author><first>Xiaolei</first><last>Huang</last></author>
       <author><first>Linzi</first><last>Xing</last></author>
       <author><first>Franck</first><last>Dernoncourt</last></author>
-      <author><first>Michael J.</first><last>Paul</last></author>
+      <author id="michael-paul"><first>Michael J.</first><last>Paul</last></author>
       <pages>1440–1448</pages>
       <abstract>Existing research on fairness evaluation of document classification models mainly uses synthetic monolingual data without ground truth for author demographic attributes. In this work, we assemble and publish a multilingual Twitter corpus for the task of hate speech detection with inferred four author demographic factors: age, country, gender and race/ethnicity. The corpus covers five languages: English, Italian, Polish, Portuguese and Spanish. We evaluate the inferred demographic labels with a crowdsourcing platform, Figure Eight. To examine factors that can cause biases, we take an empirical analysis of demographic predictability on the English corpus. We measure the performance of four popular document classifiers and evaluate the fairness and bias of the baseline classifiers on the author-level demographic attributes.</abstract>
       <url hash="e0b336f1">2020.lrec-1.180</url>
@@ -2247,8 +2247,8 @@
       <title><fixed-case>E</fixed-case>mo<fixed-case>E</fixed-case>vent: A Multilingual Emotion Corpus based on different Events</title>
       <author><first>Flor Miriam</first><last>Plaza del Arco</last></author>
       <author><first>Carlo</first><last>Strapparava</last></author>
-      <author><first>L. Alfonso</first><last>Urena Lopez</last></author>
-      <author><first>Maite</first><last>Martin</last></author>
+      <author id="l-alfonso-urena-lopez"><first>L. Alfonso</first><last>Urena Lopez</last></author>
+      <author id="m-teresa-martin-valdivia"><first>Maite</first><last>Martin</last></author>
       <pages>1492–1498</pages>
       <abstract>In recent years emotion detection in text has become more popular due to its potential applications in fields such as psychology, marketing, political science, and artificial intelligence, among others. While opinion mining is a well-established task with many standard data sets and well-defined methodologies, emotion mining has received less attention due to its complexity. In particular, the annotated gold standard resources available are not enough. In order to address this shortage, we present a multilingual emotion data set based on different events that took place in April 2019. We collected tweets from the Twitter platform. Then one of seven emotions, six Ekman’s basic emotions plus the “neutral or other emotions”, was labeled on each tweet by 3 Amazon MTurkers. A total of 8,409 in Spanish and 7,303 in English were labeled. In addition, each tweet was also labeled as offensive or no offensive. We report some linguistic statistics about the data set in order to observe the difference between English and Spanish speakers when they express emotions related to the same events. Moreover, in order to validate the effectiveness of the data set, we also propose a machine learning approach for automatically detecting emotions in tweets for both languages, English and Spanish.</abstract>
       <url hash="6e7b2ad9">2020.lrec-1.186</url>
@@ -2261,7 +2261,7 @@
       <author><first>Cristian-Paul</first><last>Bara</last></author>
       <author><first>Yuanhang</first><last>Luo</last></author>
       <author><first>Mihai</first><last>Burzo</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <author><first>Emily Mower</first><last>Provost</last></author>
       <pages>1499–1510</pages>
       <abstract>Endowing automated agents with the ability to provide support, entertainment and interaction with human beings requires sensing of the users’ affective state. These affective states are impacted by a combination of emotion inducers, current psychological state, and various conversational factors. Although emotion classification in both singular and dyadic settings is an established area, the effects of these additional factors on the production and perception of emotion is understudied. This paper presents a new dataset, Multimodal Stressed Emotion (MuSE), to study the multimodal interplay between the presence of stress and expressions of affect. We describe the data collection protocol, the possible areas of use, and the annotations for the emotional content of the recordings. The paper also presents several baselines to measure the performance of multimodal features for emotion and stress classification.</abstract>
@@ -2274,7 +2274,7 @@
       <author><first>Linrui</first><last>Zhang</last></author>
       <author><first>Hsin-Lun</first><last>Huang</last></author>
       <author><first>Yang</first><last>Yu</last></author>
-      <author><first>Dan</first><last>Moldovan</last></author>
+      <author id="dan-moldovan"><first>Dan</first><last>Moldovan</last></author>
       <pages>1511–1516</pages>
       <abstract>People convey sentiments and emotions through language. To understand these affectual states is an essential step towards understanding natural language. In this paper, we propose a transfer-learning based approach to inferring the affectual state of a person from their tweets. As opposed to the traditional machine learning models which require considerable effort in designing task specific features, our model can be well adapted to the proposed tasks with a very limited amount of fine-tuning, which significantly reduces the manual effort in feature engineering. We aim to show that by leveraging the pre-learned knowledge, transfer learning models can achieve competitive results in the affectual content analysis of tweets, compared to the traditional models. As shown by the experiments on SemEval-2018 Task 1: Affect in Tweets, our model ranking 2nd, 4th and 6th place in four of its subtasks proves the effectiveness of our idea.</abstract>
       <url hash="d7b36341">2020.lrec-1.188</url>
@@ -2307,7 +2307,7 @@
     </paper>
     <paper id="191">
       <title>Abusive language in <fixed-case>S</fixed-case>panish children and young teenager’s conversations: data preparation and short text classification with contextual word embeddings</title>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
       <author><first>Esther</first><last>González</last></author>
       <author><first>Asuncion</first><last>Moreno</last></author>
       <author><first>Eudald</first><last>Cumalat</last></author>
@@ -2343,7 +2343,7 @@
     </paper>
     <paper id="194">
       <title><fixed-case>G</fixed-case>ood<fixed-case>N</fixed-case>ews<fixed-case>E</fixed-case>veryone: A Corpus of News Headlines Annotated with Emotions, Semantic Roles, and Reader Perception</title>
-      <author><first>Laura Ana Maria</first><last>Bostan</last></author>
+      <author id="laura-ana-maria-oberlander"><first>Laura Ana Maria</first><last>Bostan</last></author>
       <author><first>Evgeny</first><last>Kim</last></author>
       <author><first>Roman</first><last>Klinger</last></author>
       <pages>1554–1566</pages>
@@ -2357,7 +2357,7 @@
       <author><first>Svetlana</first><last>Kiritchenko</last></author>
       <author><first>Will</first><last>Hipson</last></author>
       <author><first>Robert</first><last>Coplan</last></author>
-      <author><first>Saif M.</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif M.</first><last>Mohammad</last></author>
       <pages>1567–1577</pages>
       <abstract>The state of being alone can have a substantial impact on our lives, though experiences with time alone diverge significantly among individuals. Psychologists distinguish between the concept of solitude, a positive state of voluntary aloneness, and the concept of loneliness, a negative state of dissatisfaction with the quality of one’s social interactions. Here, for the first time, we conduct a large-scale computational analysis to explore how the terms associated with the state of being alone are used in online language. We present SOLO (State of Being Alone), a corpus of over 4 million tweets collected with query terms solitude, lonely, and loneliness. We use SOLO to analyze the language and emotions associated with the state of being alone. We show that the term solitude tends to co-occur with more positive, high-dominance words (e.g., enjoy, bliss) while the terms lonely and loneliness frequently co-occur with negative, low-dominance words (e.g., scared, depressed), which confirms the conceptual distinctions made in psychology. We also show that women are more likely to report on negative feelings of being lonely as compared to men, and there are more teenagers among the tweeters that use the word lonely than among the tweeters that use the word solitude.</abstract>
       <url hash="cb6e6e6c">2020.lrec-1.195</url>
@@ -2367,7 +2367,7 @@
     <paper id="196">
       <title><fixed-case>P</fixed-case>o<fixed-case>K</fixed-case>i: A Large Dataset of Poems by Children</title>
       <author><first>Will</first><last>Hipson</last></author>
-      <author><first>Saif M.</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif M.</first><last>Mohammad</last></author>
       <pages>1578–1589</pages>
       <abstract>Child language studies are crucial in improving our understanding of child well-being; especially in determining the factors that impact happiness, the sources of anxiety, techniques of emotion regulation, and the mechanisms to cope with stress. However, much of this research is stymied by the lack of availability of large child-written texts. We present a new corpus of child-written text, PoKi, which includes about 62 thousand poems written by children from grades 1 to 12. PoKi is especially useful in studying child language because it comes with information about the age of the child authors (their grade). We analyze the words in PoKi along several emotion dimensions (valence, arousal, dominance) and discrete emotions (anger, fear, sadness, joy). We use non-parametric regressions to model developmental differences from early childhood to late-adolescence. Results show decreases in valence that are especially pronounced during mid-adolescence, while arousal and dominance peaked during adolescence. Gender differences in the developmental trajectory of emotions are also observed. Our results support and extend the current state of emotion development research.</abstract>
       <url hash="18243087">2020.lrec-1.196</url>
@@ -2424,7 +2424,7 @@
       <title><fixed-case>CEASE</fixed-case>, a Corpus of Emotion Annotated Suicide notes in <fixed-case>E</fixed-case>nglish</title>
       <author><first>Soumitra</first><last>Ghosh</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>1618–1626</pages>
       <abstract>A suicide note is usually written shortly before the suicide and it provides a chance to comprehend the self-destructive state of mind of the deceased. From a psychological point of view, suicide notes have been utilized for recognizing the motive behind the suicide. To the best of our knowledge, there is no openly accessible suicide note corpus at present, making it challenging for the researchers and developers to deep dive into the area of mental health assessment and suicide prevention. In this paper, we create a fine-grained emotion annotated corpus (CEASE) of suicide notes in English and develop various deep learning models to perform emotion detection on the curated dataset. The corpus consists of 2393 sentences from around 205 suicide notes collected from various sources. Each sentence is annotated with a particular emotion class from a set of 15 fine-grained emotion labels, namely (forgiveness, happiness_peacefulness, love, pride, hopefulness, thankfulness, blame, anger, fear, abuse, sorrow, hopelessness, guilt, information, instructions). For the evaluation, we develop an ensemble architecture, where the base models correspond to three supervised deep learning models, namely Convolutional Neural Network (CNN), Gated Recurrent Unit (GRU) and Long Short Term Memory (LSTM). We obtain the highest test accuracy of 60.17% and cross-validation accuracy of 60.32%</abstract>
       <url hash="e2db9d8f">2020.lrec-1.201</url>
@@ -2445,7 +2445,7 @@
     </paper>
     <paper id="203">
       <title>An Event-comment Social Media Corpus for Implicit Emotion Analysis</title>
-      <author><first>Sophia Yat Mei</first><last>Lee</last></author>
+      <author id="sophia-yat-mei-lee"><first>Sophia Yat Mei</first><last>Lee</last></author>
       <author><first>Helena Yan Ping</first><last>Lau</last></author>
       <pages>1633–1642</pages>
       <abstract>The classification of implicit emotions in text has always been a great challenge to emotion processing. Even though the majority of emotion expressed implicitly, most previous attempts at emotions have focused on the examination of explicit emotions. The poor performance of existing emotion identification and classification models can partly be attributed to the disregard of implicit emotions. In view of this, this paper presents the development of a Chinese event-comment social media emotion corpus. The corpus deals with both explicit and implicit emotions with more emphasis being placed on the implicit ones. This paper specifically describes the data collection and annotation of the corpus. An annotation scheme has been proposed for the annotation of emotion-related information including the emotion type, the emotion cause, the emotion reaction, the use of rhetorical question, the opinion target (i.e. the semantic role in an event that triggers an emotion), etc. Corpus data shows that the annotated items are of great value to the identification of implicit emotions. We believe that the corpus will be a useful resource for both explicit and implicit emotion classification and detection as well as event classification.</abstract>
@@ -2456,8 +2456,8 @@
     <paper id="204">
       <title>An Emotional Mess! Deciding on a Framework for Building a <fixed-case>D</fixed-case>utch Emotion-Annotated Corpus</title>
       <author><first>Luna</first><last>De Bruyne</last></author>
-      <author><first>Orphee</first><last>De Clercq</last></author>
-      <author><first>Veronique</first><last>Hoste</last></author>
+      <author id="orphee-de-clercq"><first>Orphee</first><last>De Clercq</last></author>
+      <author id="veronique-hoste"><first>Veronique</first><last>Hoste</last></author>
       <pages>1643–1651</pages>
       <abstract>Seeing the myriad of existing emotion models, with the categorical versus dimensional opposition the most important dividing line, building an emotion-annotated corpus requires some well thought-out strategies concerning framework choice. In our work on automatic emotion detection in Dutch texts, we investigate this problem by means of two case studies. We find that the labels joy, love, anger, sadness and fear are well-suited to annotate texts coming from various domains and topics, but that the connotation of the labels strongly depends on the origin of the texts. Moreover, it seems that information is lost when an emotional state is forcedly classified in a limited set of categories, indicating that a bi-representational format is desirable when creating an emotion corpus.</abstract>
       <url hash="411fde76">2020.lrec-1.204</url>
@@ -2480,10 +2480,10 @@
     <paper id="206">
       <title>Learning Word Ratings for Empathy and Distress from Document-Level User Responses</title>
       <author><first>João</first><last>Sedoc</last></author>
-      <author><first>Sven</first><last>Buechel</last></author>
+      <author id="sven-buechel"><first>Sven</first><last>Buechel</last></author>
       <author><first>Yehonathan</first><last>Nachmany</last></author>
       <author><first>Anneke</first><last>Buffone</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <pages>1664–1673</pages>
       <abstract>Despite the excellent performance of black box approaches to modeling sentiment and emotion, lexica (sets of informative words and associated weights) that characterize different emotions are indispensable to the NLP community because they allow for interpretable and robust predictions. Emotion analysis of text is increasing in popularity in NLP; however, manually creating lexica for psychological constructs such as empathy has proven difficult. This paper automatically creates empathy word ratings from document-level ratings. The underlying problem of learning word ratings from higher-level supervision has to date only been addressed in an ad hoc fashion and has not used deep learning methods. We systematically compare a number of approaches to learning word ratings from higher-level supervision against a Mixed-Level Feed Forward Network (MLFFN), which we find performs best, and use the MLFFN to create the first-ever empathy lexicon. We then use Signed Spectral Clustering to gain insights into the resulting words. The empathy and distress lexica are publicly available at: <url>http://www.wwbp.org/lexica.html</url>.</abstract>
       <url hash="36a82ef6">2020.lrec-1.206</url>
@@ -2540,7 +2540,7 @@
     <paper id="211">
       <title><fixed-case>H</fixed-case>ard<fixed-case>E</fixed-case>val: Focusing on Challenging Tokens to Assess Robustness of <fixed-case>NER</fixed-case></title>
       <author><first>Gabriel</first><last>Bernier-Colborne</last></author>
-      <author><first>Phillippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Phillippe</first><last>Langlais</last></author>
       <pages>1704–1711</pages>
       <abstract>To assess the robustness of NER systems, we propose an evaluation method that focuses on subsets of tokens that represent specific sources of errors: unknown words and label shift or ambiguity. These subsets provide a system-agnostic basis for evaluating specific sources of NER errors and assessing room for improvement in terms of robustness. We analyze these subsets of challenging tokens in two widely-used NER benchmarks, then exploit them to evaluate NER systems in both in-domain and out-of-domain settings. Results show that these challenging tokens explain the majority of errors made by modern NER systems, although they represent only a small fraction of test tokens. They also indicate that label shift is harder to deal with than unknown words, and that there is much more room for improvement than the standard NER evaluation procedure would suggest. We hope this work will encourage NLP researchers to adopt rigorous and meaningful evaluation methods, and will help them develop more robust models.</abstract>
       <url hash="68abe021">2020.lrec-1.211</url>
@@ -2551,7 +2551,7 @@
       <title>An Evaluation Dataset for Identifying Communicative Functions of Sentences in <fixed-case>E</fixed-case>nglish Scholarly Papers</title>
       <author><first>Kenichi</first><last>Iwatsuki</last></author>
       <author><first>Florian</first><last>Boudin</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>1712–1720</pages>
       <abstract>Formulaic expressions, such as ‘in this paper we propose’, are used by authors of scholarly papers to perform communicative functions; the communicative function of the present example is ‘stating the aim of the paper’. Collecting such expressions and pairing them with their communicative functions would be highly valuable for various tasks, particularly for writing assistance. However, such collection and paring in a principled and automated manner would require high-quality annotated data, which are not available. In this study, we address this shortcoming by creating a manually annotated dataset for detecting communicative functions in sentences. Starting from a seed list of labelled formulaic expressions, we retrieved new sentences from scholarly papers in the ACL Anthology and asked multiple human evaluators to label communicative functions. To show the usefulness of our dataset, we conducted a series of experiments that determined to what extent sentence representations acquired by recent models, such as word2vec and BERT, can be employed to detect communicative functions in sentences.</abstract>
       <url hash="c4fda2a6">2020.lrec-1.212</url>
@@ -2585,7 +2585,7 @@
       <author><first>Yi-An</first><last>Lai</last></author>
       <author><first>Xuan</first><last>Zhu</last></author>
       <author><first>Yi</first><last>Zhang</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>1739–1746</pages>
       <abstract>Summarizing data samples by quantitative measures has a long history, with descriptive statistics being a case in point. However, as natural language processing methods flourish, there are still insufficient characteristic metrics to describe a collection of texts in terms of the words, sentences, or paragraphs they comprise. In this work, we propose metrics of diversity, density, and homogeneity that quantitatively measure the dispersion, sparsity, and uniformity of a text collection. We conduct a series of simulations to verify that each metric holds desired properties and resonates with human intuitions. Experiments on real-world datasets demonstrate that the proposed characteristic metrics are highly correlated with text classification performance of a renowned model, BERT, which could inspire future applications.</abstract>
       <url hash="c9fd0cbe">2020.lrec-1.215</url>
@@ -2608,8 +2608,8 @@
       <author><first>Andrea</first><last>Horbach</last></author>
       <author><first>Itziar</first><last>Aldabe</last></author>
       <author><first>Marie</first><last>Bexte</last></author>
-      <author><first>Oier</first><last>Lopez de Lacalle</last></author>
-      <author><first>Montse</first><last>Maritxalar</last></author>
+      <author id="oier-lopez-de-lacalle"><first>Oier</first><last>Lopez de Lacalle</last></author>
+      <author id="montse-maritxalar"><first>Montse</first><last>Maritxalar</last></author>
       <pages>1753–1762</pages>
       <abstract>Automatic generation of reading comprehension questions is a topic receiving growing interest in the NLP community, but there is currently no consensus on evaluation metrics and many approaches focus on linguistic quality only while ignoring the pedagogic value and appropriateness of questions. This paper overcomes such weaknesses by a new evaluation scheme where questions from the questionnaire are structured in a hierarchical way to avoid confronting human annotators with evaluation measures that do not make sense for a certain question. We show through an annotation study that our scheme can be applied, but that expert annotators with some level of expertise are needed. We also created and evaluated two new evaluation data sets from the biology domain for Basque and German, composed of questions written by people with an educational background, which will be publicly released. Results show that manually generated questions are in general both of higher linguistic as well as pedagogic quality and that among the human generated questions, teacher-generated ones tend to be most useful.</abstract>
       <url hash="b5069ef0">2020.lrec-1.217</url>
@@ -2800,7 +2800,7 @@
       <author><first>Maja</first><last>Buljan</last></author>
       <author><first>Joakim</first><last>Nivre</last></author>
       <author><first>Stephan</first><last>Oepen</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <pages>1902–1909</pages>
       <abstract>We discuss methodological choices in contrastive and diagnostic evaluation in meaning representation parsing, i.e. mapping from natural language utterances to graph-based encodings of its semantic structure. Drawing inspiration from earlier work in syntactic dependency parsing, we transfer and refine several quantitative diagnosis techniques for use in the context of the 2019 shared task on Meaning Representation Parsing (MRP). As in parsing proper, moving evaluation from simple rooted trees to general graphs brings along its own range of challenges. Specifically, we seek to begin to shed light on relative strenghts and weaknesses in different broad families of parsing techniques. In addition to these theoretical reflections, we conduct a pilot experiment on a selection of top-performing MRP systems and one of the five meaning representation frameworks in the shared task. Empirical results suggest that the proposed methodology can be meaningfully applied to parsing into graph-structured target representations, uncovering hitherto unknown properties of the different systems that can inform future development and cross-fertilization across approaches.</abstract>
       <url hash="670b447a">2020.lrec-1.234</url>
@@ -2813,7 +2813,7 @@
       <author><first>Chi-Yen</first><last>Chen</last></author>
       <author><first>Yi-Hui</first><last>Lee</last></author>
       <author><first>Qian-hui</first><last>Zeng</last></author>
-      <author><first>Wei-Yun</first><last>Ma</last></author>
+      <author id="wei-yun-ma"><first>Wei-Yun</first><last>Ma</last></author>
       <author><first>Chen-Yang</first><last>Shih</last></author>
       <author><first>Wei-Jhih</first><last>Chen</last></author>
       <pages>1910–1917</pages>
@@ -2879,7 +2879,7 @@
       <title><fixed-case>WEXEA</fixed-case>: <fixed-case>W</fixed-case>ikipedia <fixed-case>EX</fixed-case>haustive Entity Annotation</title>
       <author><first>Michael</first><last>Strobl</last></author>
       <author><first>Amine</first><last>Trabelsi</last></author>
-      <author><first>Osmar</first><last>Zaiane</last></author>
+      <author id="osmar-r-zaiane"><first>Osmar</first><last>Zaiane</last></author>
       <pages>1951–1958</pages>
       <abstract>Building predictive models for information extraction from text, such as named entity recognition or the extraction of semantic relationships between named entities in text, requires a large corpus of annotated text. Wikipedia is often used as a corpus for these tasks where the annotation is a named entity linked by a hyperlink to its article. However, editors on Wikipedia are only expected to link these mentions in order to help the reader to understand the content, but are discouraged from adding links that do not add any benefit for understanding an article. Therefore, many mentions of popular entities (such as countries or popular events in history), or previously linked articles, as well as the article’s entity itself, are not linked. In this paper, we discuss WEXEA, a Wikipedia EXhaustive Entity Annotation system, to create a text corpus based on Wikipedia with exhaustive annotations of entity mentions, i.e. linking all mentions of entities to their corresponding articles. This results in a huge potential for additional annotations that can be used for downstream NLP tasks, such as Relation Extraction. We show that our annotations are useful for creating distantly supervised datasets for this task. Furthermore, we publish all code necessary to derive a corpus from a raw Wikipedia dump, so that it can be reproduced by everyone.</abstract>
       <url hash="d8f39339">2020.lrec-1.240</url>
@@ -2893,8 +2893,8 @@
       <author><first>Mouhamadou</first><last>Ba</last></author>
       <author><first>Louise</first><last>Deléger</last></author>
       <author><first>Thomas</first><last>Lavergne</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
-      <author><first>Claire</first><last>Nédellec</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="claire-nedellec"><first>Claire</first><last>Nédellec</last></author>
       <pages>1959–1966</pages>
       <abstract>Entity normalization (or entity linking) is an important subtask of information extraction that links entity mentions in text to categories or concepts in a reference vocabulary. Machine learning based normalization methods have good adaptability as long as they have enough training data per reference with a sufficient quality. Distributional representations are commonly used because of their capacity to handle different expressions with similar meanings. However, in specific technical and scientific domains, the small amount of training data and the relatively small size of specialized corpora remain major challenges. Recently, the machine learning-based CONTES method has addressed these challenges for reference vocabularies that are ontologies, as is often the case in life sciences and biomedical domains. And yet, its performance is dependent on manually annotated corpus. Furthermore, like other machine learning based methods, parametrization remains tricky. We propose a new approach to address the scarcity of training data that extends the CONTES method by corpus selection, pre-processing and weak supervision strategies, which can yield high-performance results without any manually annotated examples. We also study which hyperparameters are most influential, with sometimes different patterns compared to previous work. The results show that our approach significantly improves accuracy and outperforms previous state-of-the-art algorithms.</abstract>
       <url hash="e6c42e73">2020.lrec-1.241</url>
@@ -2929,9 +2929,9 @@
       <author><first>Ananya</first><last>Subburathinam</last></author>
       <author><first>Heng</first><last>Ji</last></author>
       <author><first>Jonathan</first><last>May</last></author>
-      <author><first>Shih-Fu</first><last>Chang</last></author>
-      <author><first>Avi</first><last>Sil</last></author>
-      <author><first>Clare</first><last>Voss</last></author>
+      <author id="shih-fu-chang"><first>Shih-Fu</first><last>Chang</last></author>
+      <author id="avirup-sil"><first>Avi</first><last>Sil</last></author>
+      <author id="clare-voss"><first>Clare</first><last>Voss</last></author>
       <pages>1976–1981</pages>
       <abstract>Most of the current cross-lingual transfer learning methods for Information Extraction (IE) have been only applied to name tagging. To tackle more complex tasks such as event extraction we need to transfer graph structures (event trigger linked to multiple arguments with various roles) across languages. We develop a novel share-and-transfer framework to reach this goal with three steps: (1) Convert each sentence in any language to language-universal graph structures; in this paper we explore two approaches based on universal dependency parses and complete graphs, respectively. (2) Represent each node in the graph structure with a cross-lingual word embedding so that all sentences in multiple languages can be represented with one shared semantic space. (3) Using this common semantic space, train event extractors from English training data and apply them to languages that do not have any event annotations. Experimental results on three languages (Spanish, Russian and Ukrainian) without any annotations show this framework achieves comparable performance to a state-of-the-art supervised model trained from more than 1,500 manually annotated event mentions.</abstract>
       <url hash="c9717e4b">2020.lrec-1.243</url>
@@ -2941,7 +2941,7 @@
     <paper id="244">
       <title>Cross-Domain Evaluation of Edge Detection for Biomedical Event Extraction</title>
       <author><first>Alan</first><last>Ramponi</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <author><first>Rosario</first><last>Lombardo</last></author>
       <pages>1982–1989</pages>
       <abstract>Biomedical event extraction is a crucial task in order to automatically extract information from the increasingly growing body of biomedical literature. Despite advances in the methods in recent years, most event extraction systems are still evaluated in-domain and on complete event structures only. This makes it hard to determine the performance of intermediate stages of the task, such as edge detection, across different corpora. Motivated by these limitations, we present the first cross-domain study of edge detection for biomedical event extraction. We analyze differences between five existing gold standard corpora, create a standardized benchmark corpus, and provide a strong baseline model for edge detection. Experiments show a large drop in performance when the baseline is applied on out-of-domain data, confirming the need for domain adaptation methods for the task. To encourage research efforts in this direction, we make both the data and the baseline available to the research community: <url>https://www.cosbi.eu/cfx/9985</url>.</abstract>
@@ -2982,7 +2982,7 @@
       <author><first>Idil</first><last>Hasan</last></author>
       <author><first>Kate</first><last>Baird</last></author>
       <author><first>Sumithra</first><last>Velupillai</last></author>
-      <author><first>Robert</first><last>Stewart</last></author>
+      <author id="robert-stewart"><first>Robert</first><last>Stewart</last></author>
       <author><first>Angus</first><last>Roberts</last></author>
       <pages>2009–2016</pages>
       <abstract>Free text fields within electronic health records (EHRs) contain valuable clinical information which is often missed when conducting research using EHR databases. One such type of information is medications which are not always available in structured fields, especially in mental health records. Most use cases that require medication information also generally require the associated temporal information (e.g. current or past) and attributes (e.g. dose, route, frequency). The purpose of this study is to develop a corpus of medication annotations in mental health records. The aim is to provide a more complete picture behind the mention of medications in the health records, by including additional contextual information around them, and to create a resource for use when developing and evaluating applications for the extraction of medications from EHR text. Thus far, an analysis of temporal information related to medications mentioned in a sample of mental health records has been conducted. The purpose of this analysis was to understand the complexity of medication mentions and their associated temporal information in the free text of EHRs, with a specific focus on the mental health domain.</abstract>
@@ -2992,7 +2992,7 @@
     </paper>
     <paper id="248">
       <title>Do not let the history haunt you: Mitigating Compounding Errors in Conversational Question Answering</title>
-      <author><first>Angrosh</first><last>Mandya</last></author>
+      <author id="angrosh-mandya"><first>Angrosh</first><last>Mandya</last></author>
       <author><first>James</first><last>O’ Neill</last></author>
       <author><first>Danushka</first><last>Bollegala</last></author>
       <author><first>Frans</first><last>Coenen</last></author>
@@ -3118,7 +3118,7 @@
       <title><fixed-case>WN</fixed-case>-Salience: A Corpus of News Articles with Entity Salience Annotations</title>
       <author><first>Chuan</first><last>Wu</last></author>
       <author><first>Evangelos</first><last>Kanoulas</last></author>
-      <author><first>Maarten</first><last>de Rijke</last></author>
+      <author id="maarten-de-rijke"><first>Maarten</first><last>de Rijke</last></author>
       <author><first>Wei</first><last>Lu</last></author>
       <pages>2095–2102</pages>
       <abstract>Entities can be found in various text genres, ranging from tweets and web pages to user queries submitted to web search engines. Existing research either considers all entities in the text equally important, or heuristics are used to measure their salience. We believe that a key reason for the relatively limited work on entity salience is the lack of appropriate datasets. To support research on entity salience, we present a new dataset, the WikiNews Salience dataset (WN-Salience), which can be used to benchmark tasks such as entity salience detection and salient entity linking. WN-Salience is built on top of Wikinews, a Wikimedia project whose mission is to present reliable news articles. Entities in Wikinews articles are identified by the authors of the articles and are linked to Wikinews categories when they are salient or to Wikipedia pages otherwise. The dataset is built automatically, and consists of approximately 7,000 news articles, and 90,000 in-text entity annotations. We compare the WN-Salience dataset against existing datasets on the task and analyze their differences. Furthermore, we conduct experiments on entity salience detection; the results demonstrate that WN-Salience is a challenging testbed that is complementary to existing ones.</abstract>
@@ -3139,8 +3139,8 @@
     </paper>
     <paper id="259">
       <title>Comparing Machine Learning and Deep Learning Approaches on <fixed-case>NLP</fixed-case> Tasks for the <fixed-case>I</fixed-case>talian Language</title>
-      <author><first>Bernardo</first><last>Magnini</last></author>
-      <author><first>Alberto</first><last>Lavelli</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
+      <author id="alberto-lavelli"><first>Alberto</first><last>Lavelli</last></author>
       <author><first>Simone</first><last>Magnolini</last></author>
       <pages>2110–2119</pages>
       <abstract>We present a comparison between deep learning and traditional machine learning methods for various NLP tasks in Italian. We carried on experiments using available datasets (e.g., from the Evalita shared tasks) on two sequence tagging tasks (i.e., named entities recognition and nominal entities recognition) and four classification tasks (i.e., lexical relations among words, semantic relations among sentences, sentiment analysis and text classification). We show that deep learning approaches outperform traditional machine learning algorithms in sequence tagging, while for classification tasks that heavily rely on semantics approaches based on feature engineering are still competitive. We think that a similar analysis could be carried out for other languages to provide an assessment of machine learning / deep learning models across different languages.</abstract>
@@ -3185,7 +3185,7 @@
     <paper id="263">
       <title>Evaluating Information Loss in Temporal Dependency Trees</title>
       <author><first>Mustafa</first><last>Ocal</last></author>
-      <author><first>Mark</first><last>Finlayson</last></author>
+      <author id="mark-finlayson"><first>Mark</first><last>Finlayson</last></author>
       <pages>2148–2156</pages>
       <abstract>Temporal Dependency Trees (TDTs) have emerged as an alternative to full temporal graphs for representing the temporal structure of texts, with a key advantage being that TDTs can be straightforwardly computed using adapted dependency parsers. Relative to temporal graphs, the tree form of TDTs naturally omits some fraction of temporal relationships, which intuitively should decrease the amount of temporal information available, potentially increasing temporal indeterminacy of the global ordering. We demonstrate a new method for quantifying this indeterminacy that relies on solving temporal constraint problems to extract timelines, and show that TDTs result in up to a 109% increase in temporal indeterminacy over their corresponding temporal graphs for the three corpora we examine. On average, the increase in indeterminacy is 32%, and we show that this increase is a result of the TDT representation eliminating on average only 2.4% of total temporal relations. This result suggests that small differences can have big effects in temporal graphs, and the use of TDTs must be balanced against their deficiencies, with tasks requiring an accurate global temporal ordering potentially calling for use of the full temporal graph</abstract>
       <url hash="69f7ef91">2020.lrec-1.263</url>
@@ -3195,8 +3195,8 @@
     <paper id="264">
       <title>Populating Legal Ontologies using Semantic Role Labeling</title>
       <author><first>Llio</first><last>Humphreys</last></author>
-      <author><first>Guido</first><last>Boella</last></author>
-      <author><first>Luigi</first><last>Di Caro</last></author>
+      <author id="guido-boella"><first>Guido</first><last>Boella</last></author>
+      <author id="luigi-di-caro"><first>Luigi</first><last>Di Caro</last></author>
       <author><first>Livio</first><last>Robaldo</last></author>
       <author><first>Leon</first><last>van der Torre</last></author>
       <author><first>Sepideh</first><last>Ghanavati</last></author>
@@ -3221,7 +3221,7 @@
     <paper id="266">
       <title>Natural Language Premise Selection: Finding Supporting Statements for Mathematical Text</title>
       <author><first>Deborah</first><last>Ferreira</last></author>
-      <author><first>André</first><last>Freitas</last></author>
+      <author id="andre-freitas"><first>André</first><last>Freitas</last></author>
       <pages>2175–2182</pages>
       <abstract>Mathematical text is written using a combination of words and mathematical expressions. This combination, along with a specific way of structuring sentences makes it challenging for state-of-art NLP tools to understand and reason on top of mathematical discourse. In this work, we propose a new NLP task, the natural premise selection, which is used to retrieve supporting definitions and supporting propositions that are useful for generating an informal mathematical proof for a particular statement. We also make available a dataset, NL-PS, which can be used to evaluate different approaches for the natural premise selection task. Using different baselines, we demonstrate the underlying interpretation challenges associated with the task.</abstract>
       <url hash="057d9154">2020.lrec-1.266</url>
@@ -3230,8 +3230,8 @@
     </paper>
     <paper id="267">
       <title><fixed-case>O</fixed-case>dinson: A Fast Rule-based Information Extraction Framework</title>
-      <author><first>Marco A.</first><last>Valenzuela-Escárcega</last></author>
-      <author><first>Gus</first><last>Hahn-Powell</last></author>
+      <author id="marco-a-valenzuela-escarcega"><first>Marco A.</first><last>Valenzuela-Escárcega</last></author>
+      <author id="gus-hahn-powell"><first>Gus</first><last>Hahn-Powell</last></author>
       <author><first>Dane</first><last>Bell</last></author>
       <pages>2183–2191</pages>
       <abstract>We present Odinson, a rule-based information extraction framework, which couples a simple yet powerful pattern language that can operate over multiple representations of text, with a runtime system that operates in near real time. In the Odinson query language, a single pattern may combine regular expressions over surface tokens with regular expressions over graphs such as syntactic dependencies. To guarantee the rapid matching of these patterns, our framework indexes most of the necessary information for matching patterns, including directed graphs such as syntactic dependencies, into a custom Lucene index. Indexing minimizes the amount of expensive pattern matching that must take place at runtime. As a result, the runtime system matches a syntax-based graph traversal in 2.8 seconds in a corpus of over 134 million sentences, nearly 150,000 times faster than its predecessor.</abstract>
@@ -3257,7 +3257,7 @@
       <title><fixed-case>M</fixed-case>ath<fixed-case>A</fixed-case>lign: Linking Formula Identifiers to their Contextual Natural Language Descriptions</title>
       <author><first>Maria</first><last>Alexeeva</last></author>
       <author><first>Rebecca</first><last>Sharp</last></author>
-      <author><first>Marco A.</first><last>Valenzuela-Escárcega</last></author>
+      <author id="marco-a-valenzuela-escarcega"><first>Marco A.</first><last>Valenzuela-Escárcega</last></author>
       <author><first>Jennifer</first><last>Kadowaki</last></author>
       <author><first>Adarsh</first><last>Pyarelal</last></author>
       <author><first>Clayton</first><last>Morrison</last></author>
@@ -3270,9 +3270,9 @@
     <paper id="270">
       <title>Domain Adapted Distant Supervision for Pedagogically Motivated Relation Extraction</title>
       <author><first>Oscar</first><last>Sainz</last></author>
-      <author><first>Oier</first><last>Lopez de Lacalle</last></author>
+      <author id="oier-lopez-de-lacalle"><first>Oier</first><last>Lopez de Lacalle</last></author>
       <author><first>Itziar</first><last>Aldabe</last></author>
-      <author><first>Montse</first><last>Maritxalar</last></author>
+      <author id="montse-maritxalar"><first>Montse</first><last>Maritxalar</last></author>
       <pages>2213–2222</pages>
       <abstract>In this paper we present a relation extraction system that given a text extracts pedagogically motivated relation types, as a previous step to obtaining a semantic representation of the text which will make possible to automatically generate questions for reading comprehension. The system maps pedagogically motivated relations with relations from ConceptNet and deploys Distant Supervision for relation extraction. We run a study on a subset of those relationships in order to analyse the viability of our approach. For that, we build a domain-specific relation extraction system and explore two relation extraction models: a state-of-the-art model based on transfer learning and a discrete feature based machine learning model. Experiments show that the neural model obtains better results in terms of F-score and we yield promising results on the subset of relations suitable for pedagogical purposes. We thus consider that distant supervision for relation extraction is a valid approach in our target domain, i.e. biology.</abstract>
       <url hash="25daf77d">2020.lrec-1.270</url>
@@ -3307,7 +3307,7 @@
       <author><first>Sovan Kumar</first><last>Sahoo</last></author>
       <author><first>Saumajit</first><last>Saha</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>2241–2250</pages>
       <abstract>Event Extraction is an important task in the widespread field of Natural Language Processing (NLP). Though this task is adequately addressed in English with sufficient resources, we are unaware of any benchmark setup in Indian languages. Hindi is one of the most widely spoken languages in the world. In this paper, we present an Event Extraction framework for Hindi language by creating an annotated resource for benchmarking, and then developing deep learning based models to set as the baselines. We crawl more than seventeen hundred disaster related Hindi news articles from the various news sources. We also develop deep learning based models for Event Trigger Detection and Classification, Argument Detection and Classification and Event-Argument Linking.</abstract>
       <url hash="22c5e158">2020.lrec-1.273</url>
@@ -3331,7 +3331,7 @@
     <paper id="275">
       <title><fixed-case>NLP</fixed-case> Analytics in Finance with <fixed-case>D</fixed-case>o<fixed-case>R</fixed-case>e: A <fixed-case>F</fixed-case>rench 250<fixed-case>M</fixed-case> Tokens Corpus of Corporate Annual Reports</title>
       <author><first>Corentin</first><last>Masson</last></author>
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <pages>2261–2267</pages>
       <abstract>Recent advances in neural computing and word embeddings for semantic processing open many new applications areas which had been left unaddressed so far because of inadequate language understanding capacity. But this new kind of approaches rely even more on training data to be operational. Corpora for financial applications exists, but most of them concern stock market prediction and are in English. To address this need for the French language and regulation oriented applications which require a deeper understanding of the text content, we hereby present “DoRe”, a French and dialectal French Corpus for NLP analytics in Finance, Regulation and Investment. This corpus is composed of: (a) 1769 Annual Reports from 336 companies among the most capitalized companies in: France (Euronext Paris) &amp; Belgium (Euronext Brussels), covering a time frame from 2009 to 2019, and (b) related MetaData containing information for each company about its ISIN code, capitalization and sector. This corpus is designed to be as modular as possible in order to allow for maximum reuse in different tasks pertaining to Economics, Finance and Regulation. After presenting existing resources, we relate the construction of the DoRe corpus and the rationale behind our choices, concluding on the spectrum of possible uses of this new resource for NLP applications.</abstract>
       <url hash="89b0f793">2020.lrec-1.275</url>
@@ -3341,7 +3341,7 @@
     <paper id="276">
       <title>The Language of Brain Signals: Natural Language Processing of Electroencephalography Reports</title>
       <author><first>Ramon</first><last>Maldonado</last></author>
-      <author><first>Sanda</first><last>Harabagiu</last></author>
+      <author id="sanda-harabagiu"><first>Sanda</first><last>Harabagiu</last></author>
       <pages>2268–2275</pages>
       <abstract>Brain signals are captured by clinical electroencephalography (EEG) which is an excellent tool for probing neural function. When EEG tests are performed, a textual EEG report is generated by the neurologist to document the findings, thus using language that describes the brain signals and its clinical correlations. Even with the impetus provided by the BRAIN initiative (brainitititive.nih.gov), there are no annotations available in texts that capture language describing the brain activities and their correlations with various pathologies. In this paper we describe an annotation effort carried out on a large corpus of EEG reports, providing examples of EEG-specific and clinically relevant concepts. In addition, we detail our annotation schema for brain signal attributes. We also discuss the resulting annotation of long-distance relations between concepts in EEG reports. By exemplifying a self-attention joint-learning to predict similar annotations in the EEG report corpus, we discuss the promising results, hoping that our effort will inform the design of novel knowledge capture techniques that will include the language of brain signals.</abstract>
       <url hash="37a760de">2020.lrec-1.276</url>
@@ -3428,7 +3428,7 @@
     <paper id="283">
       <title>Multiple Knowledge <fixed-case>G</fixed-case>raph<fixed-case>DB</fixed-case> (<fixed-case>MKGDB</fixed-case>)</title>
       <author><first>Stefano</first><last>Faralli</last></author>
-      <author><first>Paola</first><last>Velardi</last></author>
+      <author id="paola-velardi"><first>Paola</first><last>Velardi</last></author>
       <author><first>Farid</first><last>Yusifli</last></author>
       <pages>2325–2331</pages>
       <abstract>We present MKGDB, a large-scale graph database created as a combination of multiple taxonomy backbones extracted from 5 existing knowledge graphs, namely: ConceptNet, DBpedia, WebIsAGraph, WordNet and the Wikipedia category hierarchy. MKGDB, thanks the versatility of the Neo4j graph database manager technology, is intended to favour and help the development of open-domain natural language processing applications relying on knowledge bases, such as information extraction, hypernymy discovery, topic clustering, and others. Our resource consists of a large hypernymy graph which counts more than 37 million nodes and more than 81 million hypernymy relations.</abstract>
@@ -3438,7 +3438,7 @@
     </paper>
     <paper id="284">
       <title>Orchestrating <fixed-case>NLP</fixed-case> Services for the Legal Domain</title>
-      <author><first>Julian</first><last>Moreno-Schneider</last></author>
+      <author id="julian-moreno-schneider"><first>Julian</first><last>Moreno-Schneider</last></author>
       <author><first>Georg</first><last>Rehm</last></author>
       <author><first>Elena</first><last>Montiel-Ponsoda</last></author>
       <author><first>Víctor</first><last>Rodriguez-Doncel</last></author>
@@ -3482,7 +3482,7 @@
     </paper>
     <paper id="287">
       <title>Representing Multiword Term Variation in a Terminological Knowledge Base: a Corpus-Based Study</title>
-      <author><first>Pilar</first><last>León-Araúz</last></author>
+      <author id="pilar-leon-arauz"><first>Pilar</first><last>León-Araúz</last></author>
       <author><first>Arianne</first><last>Reimerink</last></author>
       <author><first>Melania</first><last>Cabezas-García</last></author>
       <pages>2358–2367</pages>
@@ -3567,10 +3567,10 @@
       <author><first>Brian</first><last>Roark</last></author>
       <author><first>Lawrence</first><last>Wolf-Sonkin</last></author>
       <author><first>Christo</first><last>Kirov</last></author>
-      <author><first>Sabrina J.</first><last>Mielke</last></author>
+      <author id="sabrina-j-mielke"><first>Sabrina J.</first><last>Mielke</last></author>
       <author><first>Cibu</first><last>Johny</last></author>
-      <author><first>Isin</first><last>Demirsahin</last></author>
-      <author><first>Keith</first><last>Hall</last></author>
+      <author id="isin-demirsahin"><first>Isin</first><last>Demirsahin</last></author>
+      <author id="keith-hall"><first>Keith</first><last>Hall</last></author>
       <pages>2413–2423</pages>
       <abstract>This paper describes the Dakshina dataset, a new resource consisting of text in both the Latin and native scripts for 12 South Asian languages. The dataset includes, for each language: 1) native script Wikipedia text; 2) a romanization lexicon; and 3) full sentence parallel data in both a native script of the language and the basic Latin alphabet. We document the methods used for preparation and selection of the Wikipedia text in each language; collection of attested romanizations for sampled lexicons; and manual romanization of held-out sentences from the native script collections. We additionally provide baseline results on several tasks made possible by the dataset, including single word transliteration, full sentence transliteration, and language modeling of native script and romanized text.</abstract>
       <url hash="224a8552">2020.lrec-1.294</url>
@@ -3593,7 +3593,7 @@
       <title>Embedding Space Correlation as a Measure of Domain Similarity</title>
       <author><first>Anne</first><last>Beyer</last></author>
       <author><first>Göran</first><last>Kauermann</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>2431–2439</pages>
       <abstract>Prior work has determined domain similarity using text-based features of a corpus. However, when using pre-trained word embeddings, the underlying text corpus might not be accessible anymore. Therefore, we propose the CCA measure, a new measure of domain similarity based directly on the dimension-wise correlations between corresponding embedding spaces. Our results suggest that an inherent notion of domain can be captured this way, as we are able to reproduce our findings for different domain comparisons for English, German, Spanish and Czech as well as in cross-lingual comparisons. We further find a threshold at which the CCA measure indicates that two corpora come from the same domain in a monolingual setting by applying permutation tests. By evaluating the usability of the CCA measure in a domain adaptation application, we also show that it can be used to determine which corpora are more similar to each other in a cross-domain sentiment detection task.</abstract>
       <url hash="881da488">2020.lrec-1.296</url>
@@ -3659,8 +3659,8 @@
       <author><first>Maximin</first><last>Coavoux</last></author>
       <author><first>Benjamin</first><last>Lecouteux</last></author>
       <author><first>Alexandre</first><last>Allauzen</last></author>
-      <author><first>Benoit</first><last>Crabbé</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="benoit-crabbe"><first>Benoit</first><last>Crabbé</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <author><first>Didier</first><last>Schwab</last></author>
       <pages>2479–2490</pages>
       <abstract>Language models have become a key step to achieve state-of-the art results in many different Natural Language Processing (NLP) tasks. Leveraging the huge amount of unlabeled texts nowadays available, they provide an efficient way to pre-train continuous word representations that can be fine-tuned for a downstream task, along with their contextualization at the sentence level. This has been widely demonstrated for English using contextualized representations (Dai and Le, 2015; Peters et al., 2018; Howard and Ruder, 2018; Radford et al., 2018; Devlin et al., 2019; Yang et al., 2019b). In this paper, we introduce and share FlauBERT, a model learned on a very large and heterogeneous French corpus. Models of different sizes are trained using the new CNRS (French National Centre for Scientific Research) Jean Zay supercomputer. We apply our French language models to diverse NLP tasks (text classification, paraphrasing, natural language inference, parsing, word sense disambiguation) and show that most of the time they outperform other pre-training approaches. Different versions of FlauBERT as well as a unified evaluation protocol for the downstream tasks, called FLUE (French Language Understanding Evaluation), are shared to the research community for further reproducible experiments in French NLP.</abstract>
@@ -3670,9 +3670,9 @@
     </paper>
     <paper id="303">
       <title>Accelerated High-Quality Mutual-Information Based Word Clustering</title>
-      <author><first>Manuel R.</first><last>Ciosici</last></author>
+      <author id="manuel-r-ciosici"><first>Manuel R.</first><last>Ciosici</last></author>
       <author><first>Ira</first><last>Assent</last></author>
-      <author><first>Leon</first><last>Derczynski</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
       <pages>2491–2496</pages>
       <abstract>Word clustering groups words that exhibit similar properties. One popular method for this is Brown clustering, which uses short-range distributional information to construct clusters. Specifically, this is a hard hierarchical clustering with a fixed-width beam that employs bi-grams and greedily minimizes global mutual information loss. The result is word clusters that tend to outperform or complement other word representations, especially when constrained by small datasets. However, Brown clustering has high computational complexity and does not lend itself to parallel computation. This, together with the lack of efficient implementations, limits their applicability in NLP. We present efficient implementations of Brown clustering and the alternative Exchange clustering as well as a number of methods to accelerate the computation of both hierarchical and flat clusters. We show empirically that clusters obtained with the accelerated method match the performance of clusters computed using the original methods.</abstract>
       <url hash="97e638d6">2020.lrec-1.303</url>
@@ -3716,7 +3716,7 @@
     <paper id="307">
       <title>Automatic Transcription Challenges for <fixed-case>I</fixed-case>nuktitut, a Low-Resource Polysynthetic Language</title>
       <author><first>Vishwa</first><last>Gupta</last></author>
-      <author><first>Gilles</first><last>Boulianne</last></author>
+      <author id="gilles-boulianne"><first>Gilles</first><last>Boulianne</last></author>
       <pages>2521–2527</pages>
       <abstract>We introduce the first attempt at automatic speech recognition (ASR) in Inuktitut, as a representative for polysynthetic, low-resource languages, like many of the 900 Indigenous languages spoken in the Americas. As most previous work on Inuktitut, we use texts from parliament proceedings, but in addition we have access to 23 hours of transcribed oral stories. With this corpus, we show that Inuktitut displays a much higher degree of polysynthesis than other agglutinative languages usually considered in ASR, such as Finnish or Turkish. Even with a vocabulary of 1.3 million words derived from proceedings and stories, held-out stories have more than 60% of words out-of-vocabulary. We train bi-directional LSTM acoustic models, then investigate word and subword units, morphemes and syllables, and a deep neural network that finds word boundaries in subword sequences. We show that acoustic decoding using syllables decorated with word boundary markers results in the lowest word error rate.</abstract>
       <url hash="5fe900f9">2020.lrec-1.307</url>
@@ -3787,7 +3787,7 @@
       <title>Exploring Bilingual Word Embeddings for <fixed-case>H</fixed-case>iligaynon, a Low-Resource Language</title>
       <author><first>Leah</first><last>Michel</last></author>
       <author><first>Viktor</first><last>Hangya</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>2573–2580</pages>
       <abstract>This paper investigates the use of bilingual word embeddings for mining Hiligaynon translations of English words. There is very little research on Hiligaynon, an extremely low-resource language of Malayo-Polynesian origin with over 9 million speakers in the Philippines (we found just one paper). We use a publicly available Hiligaynon corpus with only 300K words, and match it with a comparable corpus in English. As there are no bilingual resources available, we manually develop a English-Hiligaynon lexicon and use this to train bilingual word embeddings. But we fail to mine accurate translations due to the small amount of data. To find out if the same holds true for a related language pair, we simulate the same low-resource setup on English to German and arrive at similar results. We then vary the size of the comparable English and German corpora to determine the minimum corpus size necessary to achieve competitive results. Further, we investigate the role of the seed lexicon. We show that with the same corpus size but with a smaller seed lexicon, performance can surpass results of previous studies. We release the lexicon of 1,200 English-Hiligaynon word pairs we created to encourage further investigation.</abstract>
       <url hash="45c12667">2020.lrec-1.313</url>
@@ -3798,7 +3798,7 @@
       <title>A Finite-State Morphological Analyser for <fixed-case>E</fixed-case>venki</title>
       <author><first>Anna</first><last>Zueva</last></author>
       <author><first>Anastasia</first><last>Kuznetsova</last></author>
-      <author><first>Francis</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last></author>
       <pages>2581–2589</pages>
       <abstract>It has been widely admitted that morphological analysis is an important step in automated text processing for morphologically rich languages. Evenki is a language with rich morphology, therefore a morphological analyser is highly desirable for processing Evenki texts and developing applications for Evenki. Although two morphological analysers for Evenki have already been developed, they are able to analyse less than a half of the available Evenki corpora. The aim of this paper is to create a new morphological analyser for Evenki. It is implemented using the Helsinki Finite-State Transducer toolkit (HFST). The lexc formalism is used to specify the morphotactic rules, which define the valid orderings of morphemes in a word. Morphophonological alternations and orthographic rules are described using the twol formalism. The lexicon is extracted from available machine-readable dictionaries. Since a part of the corpora belongs to texts in Evenki dialects, a version of the analyser with relaxed rules is developed for processing dialectal features. We evaluate the analyser on available Evenki corpora and estimate precision, recall and F-score. We obtain coverage scores of between 61% and 87% on the available Evenki corpora.</abstract>
       <url hash="237cf3c9">2020.lrec-1.314</url>
@@ -3831,7 +3831,7 @@
       <author><first>Edresson</first><last>Casanova</last></author>
       <author><first>Marcos</first><last>Treviso</last></author>
       <author><first>Lilian</first><last>Hübner</last></author>
-      <author><first>Sandra</first><last>Aluísio</last></author>
+      <author id="sandra-aluisio"><first>Sandra</first><last>Aluísio</last></author>
       <pages>2605–2614</pages>
       <abstract>Automatic analysis of connected speech by natural language processing techniques is a promising direction for diagnosing cognitive impairments. However, some difficulties still remain: the time required for manual narrative transcription and the decision on how transcripts should be divided into sentences for successful application of parsers used in metrics, such as Idea Density, to analyze the transcripts. The main goal of this paper was to develop a generic segmentation system for narratives of neuropsychological language tests. We explored the performance of our previous single-dataset-trained sentence segmentation architecture in a richer scenario involving three new datasets used to diagnose cognitive impairments, comprising different stories and two types of stimulus presentation for eliciting narratives — visual and oral — via illustrated story-book and sequence of scenes, and by retelling. Also, we proposed and evaluated three modifications to our previous RCNN architecture: (i) the inclusion of a Linear Chain CRF; (ii) the inclusion of a self-attention mechanism; and (iii) the replacement of the LSTM recurrent layer by a Quasi-Recurrent Neural Network layer. Our study allowed us to develop two new models for segmenting impaired speech transcriptions, along with an ideal combination of datasets and specific groups of narratives to be used as the training set.</abstract>
       <url hash="6170a25d">2020.lrec-1.317</url>
@@ -3896,7 +3896,7 @@
     <paper id="323">
       <title>The Nisvai Corpus of Oral Narrative Practices from <fixed-case>M</fixed-case>alekula (<fixed-case>V</fixed-case>anuatu) and its Associated Language Resources</title>
       <author><first>Jocelyn</first><last>Aznar</last></author>
-      <author><first>Núria</first><last>Gala</last></author>
+      <author id="nuria-gala"><first>Núria</first><last>Gala</last></author>
       <pages>2649–2656</pages>
       <abstract>In this paper, we present a corpus of oral narratives from the Nisvai linguistic community and four associated language resources. Nisvai is an oral language spoken by 200 native speakers in the South-East of Malekula, an island of Vanuatu, Oceania. This language had never been the focus of a research before the one leading to this article. The corpus we present is made of 32 annotated narratives segmented into intonation units. The audio records were transcribed using the written conventions specifically developed for the language and translated into French. Four associated language resources have been generated by organizing the annotations into written documents: two of them are available online and two in paper format. The online resources allow the users to listen to the audio recordings whilereading the annotations. They were built to share the results of our fieldwork and to communicate on the Nisvai narrative practices with the researchers as well as with a more general audience. The bilingual paper resources, a booklet of narratives and a Nisvai-French French-Nisvai lexicon, were designed for the Nisvai community by taking into account their future uses (i.e. primary school).</abstract>
       <url hash="68641781">2020.lrec-1.323</url>
@@ -3943,7 +3943,7 @@
     <paper id="327">
       <title>Towards a Spell Checker for <fixed-case>Z</fixed-case>amboanga <fixed-case>C</fixed-case>havacano Orthography</title>
       <author><first>Marcelo Yuji</first><last>Himoro</last></author>
-      <author><first>Antonio</first><last>Pareja-Lora</last></author>
+      <author id="antonio-pareja-lora"><first>Antonio</first><last>Pareja-Lora</last></author>
       <pages>2685–2697</pages>
       <abstract>Zamboanga Chabacano (ZC) is the most vibrant variety of Philippine Creole Spanish, with over 400,000 native speakers in the Philippines (as of 2010). Following its introduction as a subject and a medium of instruction in the public schools of Zamboanga City from Grade 1 to 3 in 2012, an official orthography for this variety - the so-called “Zamboanga Chavacano Orthography” - has been approved in 2014. Its complexity, however, is a barrier to most speakers, since it does not necessarily reflect the particular phonetic evolution in ZC, but favours etymology instead. The distance between the correct spelling and the different spelling variations is often so great that delivering acceptable performance with the current de facto spell checking technologies may be challenging. The goals of this research have been to propose i) a spelling error taxonomy for ZC, formalised as an ontology and ii) an adaptive spell checking approach using Character-Based Statistical Machine Translation to correct spelling errors in ZC. Our results show that this approach is suitable for the goals mentioned and that it could be combined with other current spell checking technologies to achieve even higher performance.</abstract>
       <url hash="6d6ff269">2020.lrec-1.327</url>
@@ -3990,8 +3990,8 @@
       <author><first>Lucy</first><last>Linder</last></author>
       <author><first>Sandra</first><last>Djambazovska</last></author>
       <author><first>Alexandros</first><last>Lazaridis</last></author>
-      <author><first>Tanja</first><last>Samardžić</last></author>
-      <author><first>Claudiu</first><last>Musat</last></author>
+      <author id="tanja-samardzic"><first>Tanja</first><last>Samardžić</last></author>
+      <author id="claudiu-musat"><first>Claudiu</first><last>Musat</last></author>
       <pages>2720–2725</pages>
       <abstract>We introduce a dictionary containing normalized forms of common words in various Swiss German dialects into High German. As Swiss German is, for now, a predominantly spoken language, there is a significant variation in the written forms, even between speakers of the same dialect. To alleviate the uncertainty associated with this diversity, we complement the pairs of Swiss German - High German words with the Swiss German phonetic transcriptions (SAMPA). This dictionary becomes thus the first resource to combine large-scale spontaneous translation with phonetic transcriptions. Moreover, we control for the regional distribution and insure the equal representation of the major Swiss dialects. The coupling of the phonetic and written Swiss German forms is powerful. We show that they are sufficient to train a Transformer-based phoneme to grapheme model that generates credible novel Swiss German writings. In addition, we show that the inverse mapping - from graphemes to phonemes - can be modeled with a transformer trained with the novel dictionary. This generation of pronunciations for previously unknown words is key in training extensible automated speech recognition (ASR) systems, which are key beneficiaries of this dictionary.</abstract>
       <url hash="10e4b114">2020.lrec-1.331</url>
@@ -4025,7 +4025,7 @@
       <author><first>Jacqueline</first><last>Brixey</last></author>
       <author><first>David</first><last>Sides</last></author>
       <author><first>Timothy</first><last>Vizthum</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <author><first>Khalil</first><last>Iskarous</last></author>
       <pages>2746–2753</pages>
       <abstract>This work introduces additions to the corpus ChoCo, a multimodal corpus for the American indigenous language Choctaw. Using texts from the corpus, we develop new computational resources by using two off-the-shelf tools: word2vec and Linguistica. Our work illustrates how these tools can be successfully implemented with a small corpus.</abstract>
@@ -4035,9 +4035,9 @@
     </paper>
     <paper id="335">
       <title>Massive vs. Curated Embeddings for Low-Resourced Languages: the Case of <fixed-case>Y</fixed-case>orùbá and <fixed-case>T</fixed-case>wi</title>
-      <author><first>Jesujoba O.</first><last>Alabi</last></author>
+      <author id="jesujoba-alabi"><first>Jesujoba O.</first><last>Alabi</last></author>
       <author><first>Kwabena</first><last>Amponsah-Kaakyire</last></author>
-      <author><first>David I.</first><last>Adelani</last></author>
+      <author id="david-ifeoluwa-adelani"><first>David I.</first><last>Adelani</last></author>
       <author><first>Cristina</first><last>España-Bonet</last></author>
       <pages>2754–2762</pages>
       <abstract>The success of several architectures to learn semantic representations from unannotated text and the availability of these kind of texts in online multilingual resources such as Wikipedia has facilitated the massive and automatic creation of resources for multiple languages. The evaluation of such resources is usually done for the high-resourced languages, where one has a smorgasbord of tasks and test sets to evaluate on. For low-resourced languages, the evaluation is more difficult and normally ignored, with the hope that the impressive capability of deep learning architectures to learn (multilingual) representations in the high-resourced setting holds in the low-resourced setting too. In this paper we focus on two African languages, Yorùbá and Twi, and compare the word embeddings obtained in this way, with word embeddings obtained from curated corpora and a language-dependent processing. We analyse the noise in the publicly available corpora, collect high quality and noisy data for the two languages and quantify the improvements that depend not only on the amount of data but on the quality too. We also use different architectures that learn word representations both from surface forms and characters to further exploit all the available information which showed to be important for these languages. For the evaluation, we manually translate the wordsim-353 word pairs dataset from English into Yorùbá and Twi. We extend the analysis to contextual word embeddings and evaluate multilingual BERT on a named entity recognition task. For this, we annotate with named entities the Global Voices corpus for Yorùbá. As output of the work, we provide corpora, embeddings and the test suits for both languages.</abstract>
@@ -4061,7 +4061,7 @@
     </paper>
     <paper id="337">
       <title>Collection and Annotation of the <fixed-case>R</fixed-case>omanian Legal Corpus</title>
-      <author><first>Dan</first><last>Tufiș</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufiș</last></author>
       <author><first>Maria</first><last>Mitrofan</last></author>
       <author><first>Vasile</first><last>Păiș</last></author>
       <author><first>Radu</first><last>Ion</last></author>
@@ -4095,8 +4095,8 @@
     </paper>
     <paper id="340">
       <title>Building a Task-oriented Dialog System for Languages with no Training Data: the Case for <fixed-case>B</fixed-case>asque</title>
-      <author><first>Maddalen</first><last>López de Lacalle</last></author>
-      <author><first>Xabier</first><last>Saralegi</last></author>
+      <author id="maddalen-lopez-de-lacalle"><first>Maddalen</first><last>López de Lacalle</last></author>
+      <author id="xabier-saralegi"><first>Xabier</first><last>Saralegi</last></author>
       <author><first>Iñaki</first><last>San Vicente</last></author>
       <pages>2796–2802</pages>
       <abstract>This paper presents an approach for developing a task-oriented dialog system for less-resourced languages in scenarios where training data is not available. Both intent classification and slot filling are tackled. We project the existing annotations in rich-resource languages by means of Neural Machine Translation (NMT) and posterior word alignments. We then compare training on the projected monolingual data with direct model transfer alternatives. Intent Classifiers and slot filling sequence taggers are implemented using a BiLSTM architecture or by fine-tuning BERT transformer models. Models learnt exclusively from Basque projected data provide better accuracies for slot filling. Combining Basque projected train data with rich-resource languages data outperforms consistently models trained solely on projected data for intent classification. At any rate, we achieve competitive performance in both tasks, with accuracies of 81% for intent classification and 77% for slot filling.</abstract>
@@ -4108,7 +4108,7 @@
       <title><fixed-case>SENCORPUS</fixed-case>: A <fixed-case>F</fixed-case>rench-<fixed-case>W</fixed-case>olof Parallel Corpus</title>
       <author><first>Elhadji Mamadou</first><last>Nguer</last></author>
       <author><first>Alla</first><last>Lo</last></author>
-      <author><first>Cheikh M. Bamba</first><last>Dione</last></author>
+      <author id="cheikh-m-bamba-dione"><first>Cheikh M. Bamba</first><last>Dione</last></author>
       <author><first>Sileye O.</first><last>Ba</last></author>
       <author><first>Moussa</first><last>Lo</last></author>
       <pages>2803–2811</pages>
@@ -4197,7 +4197,7 @@
     <paper id="347">
       <title>Towards Computational Resource Grammars for <fixed-case>R</fixed-case>unyankore and Rukiga</title>
       <author><first>David</first><last>Bamutura</last></author>
-      <author><first>Peter</first><last>Ljunglöf</last></author>
+      <author id="peter-ljunglof"><first>Peter</first><last>Ljunglöf</last></author>
       <author><first>Peter</first><last>Nebende</last></author>
       <pages>2846–2854</pages>
       <abstract>In this paper, we present computational resource grammars of Runyankore and Rukiga (R&amp;R) languages. Runyankore and Rukiga are two under-resourced Bantu Languages spoken by about 6 million people indigenous to South- Western Uganda, East Africa. We used Grammatical Framework (GF), a multilingual grammar formalism and a special- purpose functional programming language to formalise the descriptive grammar of these languages. To the best of our knowledge, these computational resource grammars are the first attempt to the creation of language resources for R&amp;R. In Future Work, we plan to use these grammars to bootstrap the generation of other linguistic resources such as multilingual corpora that make use of data-driven approaches to natural language processing feasible. In the meantime, they can be used to build Computer-Assisted Language Learning (CALL) applications for these languages among others.</abstract>
@@ -4238,8 +4238,8 @@
       <author><first>Sai Krishna</first><last>Rallabandi</last></author>
       <author><first>Rodolfo</first><last>Vega</last></author>
       <author><first>Antonios</first><last>Anastasopoulos</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <pages>2872–2877</pages>
       <abstract>We present a resource for computational experiments on Mapudungun, a polysynthetic indigenous language spoken in Chile with upwards of 200 thousand speakers. We provide 142 hours of culturally significant conversations in the domain of medical treatment. The conversations are fully transcribed and translated into Spanish. The transcriptions also include annotations for code-switching and non-standard pronunciations. We also provide baseline results on three core NLP tasks: speech recognition, speech synthesis, and machine translation between Spanish and Mapudungun. We further explore other applications for which the corpus will be suitable, including the study of code-switching, historical orthography change, linguistic structure, and sociological and anthropological studies.</abstract>
       <url hash="4f0759c4">2020.lrec-1.350</url>
@@ -4260,7 +4260,7 @@
     </paper>
     <paper id="352">
       <title>The <fixed-case>J</fixed-case>ohns <fixed-case>H</fixed-case>opkins <fixed-case>U</fixed-case>niversity <fixed-case>B</fixed-case>ible Corpus: 1600+ Tongues for Typological Exploration</title>
-      <author><first>Arya D.</first><last>McCarthy</last></author>
+      <author id="arya-d-mccarthy"><first>Arya D.</first><last>McCarthy</last></author>
       <author><first>Rachel</first><last>Wicks</last></author>
       <author><first>Dylan</first><last>Lewis</last></author>
       <author><first>Aaron</first><last>Mueller</last></author>
@@ -4310,7 +4310,7 @@
     <paper id="356">
       <title>No Data to Crawl? Monolingual Corpus Creation from <fixed-case>PDF</fixed-case> Files of Truly low-Resource Languages in <fixed-case>P</fixed-case>eru</title>
       <author><first>Gina</first><last>Bustamante</last></author>
-      <author><first>Arturo</first><last>Oncevay</last></author>
+      <author id="arturo-oncevay"><first>Arturo</first><last>Oncevay</last></author>
       <author><first>Roberto</first><last>Zariquiey</last></author>
       <pages>2914–2923</pages>
       <abstract>We introduce new monolingual corpora for four indigenous and endangered languages from Peru: Shipibo-konibo, Ashaninka, Yanesha and Yine. Given the total absence of these languages in the web, the extraction and processing of texts from PDF files is relevant in a truly low-resource language scenario. Our procedure for monolingual corpus creation considers language-specific and language-agnostic steps, and focuses on educational PDF files with multilingual sentences, noisy pages and low-structured content. Through an evaluation based on language modelling and character-level perplexity on a subset of manually extracted sentences, we determine that our method allows the creation of clean corpora for the four languages, a key resource for natural language processing tasks nowadays.</abstract>
@@ -4321,7 +4321,7 @@
     <paper id="357">
       <title>Creating a Parallel <fixed-case>I</fixed-case>celandic Dependency Treebank from Raw Text to <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies</title>
       <author><first>Hildur</first><last>Jónsdóttir</last></author>
-      <author><first>Anton Karl</first><last>Ingason</last></author>
+      <author id="anton-karl-ingason"><first>Anton Karl</first><last>Ingason</last></author>
       <pages>2924–2931</pages>
       <abstract>Making the low-resource language, Icelandic, accessible and usable in Language Technology is a work in progress and is supported by the Icelandic government. Creating resources and suitable training data (e.g., a dependency treebank) is a fundamental part of that work. We describe work on a parallel Icelandic dependency treebank based on Universal Dependencies (UD). This is important because it is the first parallel treebank resource for the language and since several other languages already have a resource based on the same text. Two Icelandic treebanks based on phrase-structure grammar have been built and ongoing work aims to convert them to UD. Previously, limited work has been done on dependency grammar for Icelandic. The current project aims to ameliorate this situation by creating a small dependency treebank from scratch. Creating a treebank is a laborious task so the process was implemented in an accessible manner using freely available tools and resources. The parallel data in the UD project was chosen as a source because this would furthermore give us the first parallel treebank for Icelandic. The Icelandic parallel UD corpus will be published as part of UD version 2.6.</abstract>
       <url hash="0bcffc57">2020.lrec-1.357</url>
@@ -4356,7 +4356,7 @@
       <title><fixed-case>CPLM</fixed-case>, a Parallel Corpus for <fixed-case>M</fixed-case>exican Languages: Development and Interface</title>
       <author><first>Gerardo</first><last>Sierra Martínez</last></author>
       <author><first>Cynthia</first><last>Montaño</last></author>
-      <author><first>Gemma</first><last>Bel-Enguix</last></author>
+      <author id="gemma-bel-enguix"><first>Gemma</first><last>Bel-Enguix</last></author>
       <author><first>Diego</first><last>Córdova</last></author>
       <author><first>Margarita</first><last>Mota Montoya</last></author>
       <pages>2947–2952</pages>
@@ -4407,7 +4407,7 @@
       <author><first>Hour</first><last>Kaing</last></author>
       <author><first>Khin</first><last>Mar Soe</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>2980–2983</pages>
       <abstract>Transliteration is generally a phonetically based transcription across different writing systems. It is a crucial task for various downstream natural language processing applications. For the Myanmar (Burmese) language, robust automatic transliteration for borrowed English words is a challenging task because of the complex Myanmar writing system and the lack of data. In this study, we constructed a Myanmar-English named entity dictionary containing more than eighty thousand transliteration instances. The data have been released under a CC BY-NC-SA license. We evaluated the automatic transliteration performance using statistical and neural network-based approaches based on the prepared data. The neural network model outperformed the statistical model significantly in terms of the BLEU score on the character level. Different units used in the Myanmar script for processing were also compared and discussed.</abstract>
       <url hash="f72f8f9c">2020.lrec-1.364</url>
@@ -4418,7 +4418,7 @@
       <title><fixed-case>CA</fixed-case>-<fixed-case>EHN</fixed-case>: Commonsense Analogy from <fixed-case>E</fixed-case>-<fixed-case>H</fixed-case>ow<fixed-case>N</fixed-case>et</title>
       <author><first>Peng-Hsuan</first><last>Li</last></author>
       <author><first>Tsan-Yu</first><last>Yang</last></author>
-      <author><first>Wei-Yun</first><last>Ma</last></author>
+      <author id="wei-yun-ma"><first>Wei-Yun</first><last>Ma</last></author>
       <pages>2984–2990</pages>
       <abstract>Embedding commonsense knowledge is crucial for end-to-end models to generalize inference beyond training corpora. However, existing word analogy datasets have tended to be handcrafted, involving permutations of hundreds of words with only dozens of pre-defined relations, mostly morphological relations and named entities. In this work, we model commonsense knowledge down to word-level analogical reasoning by leveraging E-HowNet, an ontology that annotates 88K Chinese words with their structured sense definitions and English translations. We present CA-EHN, the first commonsense word analogy dataset containing 90,505 analogies covering 5,656 words and 763 relations. Experiments show that CA-EHN stands out as a great indicator of how well word representations embed commonsense knowledge. The dataset is publicly available at <url>https://github.com/ckiplab/CA-EHN</url>.</abstract>
       <url hash="02440528">2020.lrec-1.365</url>
@@ -4429,7 +4429,7 @@
       <title>Building Semantic Grams of Human Knowledge</title>
       <author><first>Valentina</first><last>Leone</last></author>
       <author><first>Giovanni</first><last>Siragusa</last></author>
-      <author><first>Luigi</first><last>Di Caro</last></author>
+      <author id="luigi-di-caro"><first>Luigi</first><last>Di Caro</last></author>
       <author><first>Roberto</first><last>Navigli</last></author>
       <pages>2991–3000</pages>
       <abstract>Word senses are typically defined with textual definitions for human consumption and, in computational lexicons, put in context via lexical-semantic relations such as synonymy, antonymy, hypernymy, etc. In this paper we embrace a radically different paradigm that provides a slot-filler structure, called “semagram”, to define the meaning of words in terms of their prototypical semantic information. We propose a semagram-based knowledge model composed of 26 semantic relationships which integrates features from a range of different sources, such as computational lexicons and property norms. We describe an annotation exercise regarding 50 concepts over 10 different categories and put forward different automated approaches for extending the semagram base to thousands of concepts. We finally evaluated the impact of the proposed resource on a semantic similarity task, showing significant improvements over state-of-the-art word embeddings.</abstract>
@@ -4440,7 +4440,7 @@
     <paper id="367">
       <title>Automatically Building a Multilingual Lexicon of False <fixed-case>F</fixed-case>riends With No Supervision</title>
       <author><first>Ana Sabina</first><last>Uban</last></author>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <pages>3001–3007</pages>
       <abstract>Cognate words, defined as words in different languages which derive from a common etymon, can be useful for language learners, who can leverage the orthographical similarity of cognates to more easily understand a text in a foreign language. Deceptive cognates, or false friends, do not share the same meaning anymore; these can be instead deceiving and detrimental for language acquisition or text understanding in a foreign language. We use an automatic method of detecting false friends from a set of cognates, in a fully unsupervised fashion, based on cross-lingual word embeddings. We implement our method for English and five Romance languages, including a low-resource language (Romanian), and evaluate it against two different gold standards. The method can be extended easily to any language pair, requiring only large monolingual corpora for the involved languages and a small bilingual dictionary for the pair. We additionally propose a measure of “falseness” of a false friends pair. We publish freely the database of false friends in the six languages, along with the falseness scores for each cognate pair. The resource is the largest of the kind that we are aware of, both in terms of languages covered and number of word pairs.</abstract>
       <url hash="95439f0f">2020.lrec-1.367</url>
@@ -4533,7 +4533,7 @@
     </paper>
     <paper id="376">
       <title>Odi et <fixed-case>A</fixed-case>mo. Creating, Evaluating and Extending Sentiment Lexicons for <fixed-case>L</fixed-case>atin.</title>
-      <author><first>Rachele</first><last>Sprugnoli</last></author>
+      <author id="rachele-sprugnoli"><first>Rachele</first><last>Sprugnoli</last></author>
       <author><first>Marco</first><last>Passarotti</last></author>
       <author><first>Daniela</first><last>Corbetta</last></author>
       <author><first>Andrea</first><last>Peverelli</last></author>
@@ -4545,7 +4545,7 @@
     </paper>
     <paper id="377">
       <title><fixed-case>W</fixed-case>ord<fixed-case>W</fixed-case>ars: A Dataset to Examine the Natural Selection of Words</title>
-      <author><first>Saif M.</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif M.</first><last>Mohammad</last></author>
       <pages>3087–3095</pages>
       <abstract>There is a growing body of work on how word meaning changes over time: mutation. In contrast, there is very little work on how different words compete to represent the same meaning, and how the degree of success of words in that competition changes over time: natural selection. We present a new dataset, WordWars, with historical frequency data from the early 1800s to the early 2000s for monosemous English words in over 5000 synsets. We explore three broad questions with the dataset: (1) what is the degree to which predominant words in these synsets have changed, (2) how do prominent word features such as frequency, length, and concreteness impact natural selection, and (3) what are the differences between the predominant words of the 2000s and the predominant words of early 1800s. We show that close to one third of the synsets undergo a change in the predominant word in this time period. Manual annotation of these pairs shows that about 15% of these are orthographic variations, 25% involve affix changes, and 60% have completely different roots. We find that frequency, length, and concreteness all impact natural selection, albeit in different ways.</abstract>
       <url hash="b2204c6e">2020.lrec-1.377</url>
@@ -4555,9 +4555,9 @@
     <paper id="378">
       <title>Challenge Dataset of Cognates and False Friend Pairs from <fixed-case>I</fixed-case>ndian Languages</title>
       <author><first>Diptesh</first><last>Kanojia</last></author>
-      <author><first>Malhar</first><last>Kulkarni</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="malhar-kulkarni"><first>Malhar</first><last>Kulkarni</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>3096–3102</pages>
       <abstract>Cognates are present in multiple variants of the same text across different languages (e.g., “hund” in German and “hound” in the English language mean “dog”). They pose a challenge to various Natural Language Processing (NLP) applications such as Machine Translation, Cross-lingual Sense Disambiguation, Computational Phylogenetics, and Information Retrieval. A possible solution to address this challenge is to identify cognates across language pairs. In this paper, we describe the creation of two cognate datasets for twelve Indian languages namely Sanskrit, Hindi, Assamese, Oriya, Kannada, Gujarati, Tamil, Telugu, Punjabi, Bengali, Marathi, and Malayalam. We digitize the cognate data from an Indian language cognate dictionary and utilize linked Indian language Wordnets to generate cognate sets. Additionally, we use the Wordnet data to create a False Friends’ dataset for eleven language pairs. We also evaluate the efficacy of our dataset using previously available baseline cognate detection approaches. We also perform a manual evaluation with the help of lexicographers and release the curated gold-standard dataset with this paper.</abstract>
       <url hash="bbb82c99">2020.lrec-1.378</url>
@@ -4580,7 +4580,7 @@
       <title>A Lexicon-Based Approach for Detecting Hedges in Informal Text</title>
       <author><first>Jumayel</first><last>Islam</last></author>
       <author><first>Lu</first><last>Xiao</last></author>
-      <author><first>Robert E.</first><last>Mercer</last></author>
+      <author id="robert-e-mercer"><first>Robert E.</first><last>Mercer</last></author>
       <pages>3109–3113</pages>
       <abstract>Hedging is a commonly used strategy in conversational management to show the speaker’s lack of commitment to what they communicate, which may signal problems between the speakers. Our project is interested in examining the presence of hedging words and phrases in identifying the tension between an interviewer and interviewee during a survivor interview. While there have been studies on hedging detection in the natural language processing literature, all existing work has focused on structured texts and formal communications. Our project thus investigated a corpus of eight unstructured conversational interviews about the Rwanda Genocide and identified hedging patterns in the interviewees’ responses. Our work produced three manually constructed lists of hedge words, booster words, and hedging phrases. Leveraging these lexicons, we developed a rule-based algorithm that detects sentence-level hedges in informal conversations such as survivor interviews. Our work also produced a dataset of 3000 sentences having the categories Hedge and Non-hedge annotated by three researchers. With experiments on this annotated dataset, we verify the efficacy of our proposed algorithm. Our work contributes to the further development of tools that identify hedges from informal conversations and discussions.</abstract>
       <url hash="10597f16">2020.lrec-1.380</url>
@@ -4621,9 +4621,9 @@
     <paper id="384">
       <title>Towards a Semi-Automatic Detection of Reflexive and Reciprocal Constructions and Their Representation in a Valency Lexicon</title>
       <author><first>Václava</first><last>Kettnerová</last></author>
-      <author><first>Marketa</first><last>Lopatkova</last></author>
+      <author id="marketa-lopatkova"><first>Marketa</first><last>Lopatkova</last></author>
       <author><first>Anna</first><last>Vernerová</last></author>
-      <author><first>Petra</first><last>Barancikova</last></author>
+      <author id="petra-barancikova"><first>Petra</first><last>Barancikova</last></author>
       <pages>3136–3144</pages>
       <abstract>Valency lexicons usually describe valency behavior of verbs in non-reflexive and non-reciprocal constructions. However, reflexive and reciprocal constructions are common morphosyntactic forms of verbs. Both of these constructions are characterized by regular changes in morphosyntactic properties of verbs, thus they can be described by grammatical rules. On the other hand, the possibility to create reflexive and/or reciprocal constructions cannot be trivially derived from the morphosyntactic structure of verbs as it is conditioned by their semantic properties as well. A large-coverage valency lexicon allowing for rule based generation of all well formed verb constructions should thus integrate the information on reflexivity and reciprocity. In this paper, we propose a semi-automatic procedure, based on grammatical constraints on reflexivity and reciprocity, detecting those verbs that form reflexive and reciprocal constructions in corpus data. However, exploitation of corpus data for this purpose is complicated due to the diverse functions of reflexive markers crossing the domain of reflexivity and reciprocity. The list of verbs identified by the previous procedure is thus further used in an automatic experiment, applying word embeddings for detecting semantically similar verbs. These candidate verbs have been manually verified and annotation of their reflexive and reciprocal constructions has been integrated into the valency lexicon of Czech verbs VALLEX.</abstract>
       <url hash="bf8c951b">2020.lrec-1.384</url>
@@ -4669,8 +4669,8 @@
     </paper>
     <paper id="388">
       <title>Modelling Etymology in <fixed-case>LMF</fixed-case>/<fixed-case>TEI</fixed-case>: The Grande Dicionário Houaiss da Língua Portuguesa Dictionary as a Use Case</title>
-      <author><first>Fahad</first><last>Khan</last></author>
-      <author><first>Laurent</first><last>Romary</last></author>
+      <author id="fahad-khan"><first>Fahad</first><last>Khan</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
       <author><first>Ana</first><last>Salgado</last></author>
       <author><first>Jack</first><last>Bowers</last></author>
       <author><first>Mohamed</first><last>Khemakhem</last></author>
@@ -4697,8 +4697,8 @@
       <title>Some Issues with Building a Multilingual <fixed-case>W</fixed-case>ordnet</title>
       <author><first>Francis</first><last>Bond</last></author>
       <author><first>Luis</first><last>Morgado da Costa</last></author>
-      <author><first>Michael Wayne</first><last>Goodman</last></author>
-      <author><first>John Philip</first><last>McCrae</last></author>
+      <author id="michael-wayne-goodman"><first>Michael Wayne</first><last>Goodman</last></author>
+      <author id="john-philip-mccrae"><first>John Philip</first><last>McCrae</last></author>
       <author><first>Ahti</first><last>Lohk</last></author>
       <pages>3189–3197</pages>
       <abstract>In this paper we discuss the experience of bringing together over 40 different wordnets. We introduce some extensions to the GWA wordnet LMF format proposed in Vossen et al. (2016) and look at how this new information can be displayed. Notable extensions include: confidence, corpus frequency, orthographic variants, lexicalized and non-lexicalized synsets and lemmas, new parts of speech, and more. Many of these extensions already exist in multiple wordnets – the challenge was to find a compatible representation. To this end, we introduce a new version of the Open Multilingual Wordnet (Bond and Foster, 2013), that integrates a new set of tools that tests the extensions introduced by this new format, while also ensuring the integrity of the Collaborative Interlingual Index (CILI: Bond et al., 2016), avoiding the same new concept to be introduced through multiple projects.</abstract>
@@ -4718,7 +4718,7 @@
     <paper id="392">
       <title>Methodological Aspects of Developing and Managing an Etymological Lexical Resource: Introducing <fixed-case>E</fixed-case>tym<fixed-case>DB</fixed-case>-2.0</title>
       <author><first>Clémentine</first><last>Fourrier</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <pages>3207–3216</pages>
       <abstract>Diachronic lexical information is not only important in the field of historical linguistics, but is also increasingly used in NLP, most recently for machine translation of low resource languages. Therefore, there is a need for fine-grained, large-coverage and accurate etymological lexical resources. In this paper, we propose a set of guidelines to generate such resources, for each step of the life-cycle of an etymological lexicon: creation, update, evaluation, dissemination, and exploitation. To illustrate the guidelines, we introduce EtymDB 2.0, an etymological database automatically generated from the Wiktionary, which contains 1.8 million lexemes, linked by more than 700,000 fine-grained etymological relations, across 2,536 living and dead languages. We also introduce use cases for which EtymDB 2.0 could represent a key resource, such as phylogenetic tree generation, low resource machine translation or medieval languages study.</abstract>
       <url hash="24cf44e8">2020.lrec-1.392</url>
@@ -4728,7 +4728,7 @@
     <paper id="393">
       <title><fixed-case>OF</fixed-case>r<fixed-case>L</fixed-case>ex: A Computational Morphological and Syntactic Lexicon for <fixed-case>O</fixed-case>ld <fixed-case>F</fixed-case>rench</title>
       <author><first>Gaël</first><last>Guibon</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <pages>3217–3225</pages>
       <abstract>In this paper we describe our work on the development and enrichment of OFrLex, a freely available, large-coverage morphological and syntactic Old French lexicon. We rely on several heterogeneous language resources to extract structured and exploitable information. The extraction follows a semi-automatic procedure with substantial manual steps to respond to difficulties encountered while aligning lexical entries from distinct language resources. OFrLex aims at improving natural language processing tasks on Old French such as part-of-speech tagging and dependency parsing. We provide quantitative information on OFrLex and discuss its reliability. We also describe and evaluate a semi-automatic, word-embedding-based lexical enrichment process aimed at increasing the accuracy of the resource. Results of this extension technique will be manually validated in the near future, a step that will take advantage of OFrLex’s viewing, searching and editing interface, which is already accessible online.</abstract>
       <url hash="ed50ad24">2020.lrec-1.393</url>
@@ -4737,8 +4737,8 @@
     </paper>
     <paper id="394">
       <title>Automatic Reconstruction of Missing <fixed-case>R</fixed-case>omanian Cognates and Unattested <fixed-case>L</fixed-case>atin Words</title>
-      <author><first>Alina Maria</first><last>Ciobanu</last></author>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="alina-maria-ciobanu"><first>Alina Maria</first><last>Ciobanu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <author><first>Laurentiu</first><last>Zoicas</last></author>
       <pages>3226–3231</pages>
       <abstract>Producing related words is a key concern in historical linguistics. Given an input word, the task is to automatically produce either its proto-word, a cognate pair or a modern word derived from it. In this paper, we apply a method for producing related words based on sequence labeling, aiming to fill in the gaps in incomplete cognate sets in Romance languages with Latin etymology (producing Romanian cognates that are missing) and to reconstruct uncertified Latin words. We further investigate an ensemble-based aggregation for combining and re-ranking the word productions of multiple languages.</abstract>
@@ -4749,11 +4749,11 @@
     <paper id="395">
       <title>A Multilingual Evaluation Dataset for Monolingual Word Sense Alignment</title>
       <author><first>Sina</first><last>Ahmadi</last></author>
-      <author><first>John Philip</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John Philip</first><last>McCrae</last></author>
       <author><first>Sanni</first><last>Nimb</last></author>
-      <author><first>Fahad</first><last>Khan</last></author>
+      <author id="fahad-khan"><first>Fahad</first><last>Khan</last></author>
       <author><first>Monica</first><last>Monachini</last></author>
-      <author><first>Bolette</first><last>Pedersen</last></author>
+      <author id="bolette-sandford-pedersen"><first>Bolette</first><last>Pedersen</last></author>
       <author><first>Thierry</first><last>Declerck</last></author>
       <author><first>Tanja</first><last>Wissik</last></author>
       <author><first>Andrea</first><last>Bellandi</last></author>
@@ -4762,7 +4762,7 @@
       <author><first>Sussi</first><last>Olsen</last></author>
       <author><first>Simon</first><last>Krek</last></author>
       <author><first>Veronika</first><last>Lipp</last></author>
-      <author><first>Tamás</first><last>Váradi</last></author>
+      <author id="tamas-varadi"><first>Tamás</first><last>Váradi</last></author>
       <author><first>László</first><last>Simon</last></author>
       <author><first>András</first><last>Gyorffy</last></author>
       <author><first>Carole</first><last>Tiberius</last></author>
@@ -4783,11 +4783,11 @@
       <author><first>José</first><last>Luis Sancho</last></author>
       <author><first>Rafael-J.</first><last>Ureña-Ruiz</last></author>
       <author><first>Jordi</first><last>Porta Zamorano</last></author>
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <author><first>Petya</first><last>Osenova</last></author>
       <author><first>Zara</first><last>Kancheva</last></author>
       <author><first>Ivaylo</first><last>Radev</last></author>
-      <author><first>Ranka</first><last>Stanković</last></author>
+      <author id="ranka-stankovic"><first>Ranka</first><last>Stanković</last></author>
       <author><first>Andrej</first><last>Perdih</last></author>
       <author><first>Dejan</first><last>Gabrovsek</last></author>
       <pages>3232–3242</pages>
@@ -4798,10 +4798,10 @@
     </paper>
     <paper id="396">
       <title>A Broad-Coverage Deep Semantic Lexicon for Verbs</title>
-      <author><first>James</first><last>Allen</last></author>
+      <author id="james-allen"><first>James</first><last>Allen</last></author>
       <author><first>Hannah</first><last>An</last></author>
       <author><first>Ritwik</first><last>Bose</last></author>
-      <author><first>Will</first><last>de Beaumont</last></author>
+      <author id="william-de-beaumont"><first>Will</first><last>de Beaumont</last></author>
       <author><first>Choh Man</first><last>Teng</last></author>
       <pages>3243–3251</pages>
       <abstract>Progress on deep language understanding is inhibited by the lack of a broad coverage lexicon that connects linguistic behavior to ontological concepts and axioms. We have developed COLLIE-V, a deep lexical resource for verbs, with the coverage of WordNet and syntactic and semantic details that meet or exceed existing resources. Bootstrapping from a hand-built lexicon and ontology, new ontological concepts and lexical entries, together with semantic role preferences and entailment axioms, are automatically derived by combining multiple constraints from parsing dictionary definitions and examples. We evaluated the accuracy of the technique along a number of different dimensions and were able to obtain high accuracy in deriving new concepts and lexical entries. COLLIE-V is publicly available.</abstract>
@@ -4883,10 +4883,10 @@
     <paper id="403">
       <title>World Class Language Technology - Developing a Language Technology Strategy for <fixed-case>D</fixed-case>anish</title>
       <author><first>Sabine</first><last>Kirchmeier</last></author>
-      <author><first>Bolette</first><last>Pedersen</last></author>
+      <author id="bolette-sandford-pedersen"><first>Bolette</first><last>Pedersen</last></author>
       <author><first>Sanni</first><last>Nimb</last></author>
       <author><first>Philip</first><last>Diderichsen</last></author>
-      <author><first>Peter Juel</first><last>Henrichsen</last></author>
+      <author id="peter-juel-henrichsen"><first>Peter Juel</first><last>Henrichsen</last></author>
       <pages>3297–3301</pages>
       <abstract>Although Denmark is one of the most digitized countries in Europe, no coordinated efforts have been made in recent years to support the Danish language with regard to language technology and artificial intelligence. In March 2019, however, the Danish government adopted a new, ambitious strategy for LT and artificial intelligence. In this paper, we describe the process behind the development of the language-related parts of the strategy: A Danish Language Technology Committee was constituted and a comprehensive series of workshops were organized in which users, suppliers, developers, and researchers gave their valuable input based on their experiences. We describe how, based on this experience, the focus areas and recommendations for the LT strategy were established, and which steps are currently taken in order to put the strategy into practice.</abstract>
       <url hash="87d9f44b">2020.lrec-1.403</url>
@@ -4908,7 +4908,7 @@
     </paper>
     <paper id="405">
       <title>The <fixed-case>CLARIN</fixed-case> Knowledge Centre for Atypical Communication Expertise</title>
-      <author><first>Henk</first><last>van den Heuvel</last></author>
+      <author id="henk-van-den-heuvel"><first>Henk</first><last>van den Heuvel</last></author>
       <author><first>Nelleke</first><last>Oostdijk</last></author>
       <author><first>Caroline</first><last>Rowland</last></author>
       <author><first>Paul</first><last>Trilsbeek</last></author>
@@ -4920,7 +4920,7 @@
     </paper>
     <paper id="406">
       <title>Corpora of Disordered Speech in the Light of the <fixed-case>GDPR</fixed-case>: Two Use Cases from the <fixed-case>DELAD</fixed-case> Initiative</title>
-      <author><first>Henk</first><last>van den Heuvel</last></author>
+      <author id="henk-van-den-heuvel"><first>Henk</first><last>van den Heuvel</last></author>
       <author><first>Aleksei</first><last>Kelli</last></author>
       <author><first>Katarzyna</first><last>Klessa</last></author>
       <author><first>Satu</first><last>Salaasti</last></author>
@@ -4936,46 +4936,46 @@
       <author><first>Katrin</first><last>Marheinecke</last></author>
       <author><first>Stefanie</first><last>Hegele</last></author>
       <author><first>Stelios</first><last>Piperidis</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
       <author><first>Khalid</first><last>Choukri</last></author>
-      <author><first>Andrejs</first><last>Vasiļjevs</last></author>
+      <author id="andrejs-vasiljevs"><first>Andrejs</first><last>Vasiļjevs</last></author>
       <author><first>Gerhard</first><last>Backfried</last></author>
       <author><first>Christoph</first><last>Prinz</last></author>
-      <author><first>José Manuel</first><last>Gómez-Pérez</last></author>
+      <author id="jose-manuel-gomez-perez"><first>José Manuel</first><last>Gómez-Pérez</last></author>
       <author><first>Luc</first><last>Meertens</last></author>
       <author><first>Paul</first><last>Lukowicz</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <author><first>Andrea</first><last>Lösch</last></author>
       <author><first>Philipp</first><last>Slusallek</last></author>
       <author><first>Morten</first><last>Irgens</last></author>
       <author><first>Patrick</first><last>Gatellier</last></author>
-      <author><first>Joachim</first><last>Köhler</last></author>
+      <author id="joachim-kohler"><first>Joachim</first><last>Köhler</last></author>
       <author><first>Laure</first><last>Le Bars</last></author>
       <author><first>Dimitra</first><last>Anastasiou</last></author>
       <author><first>Albina</first><last>Auksoriūtė</last></author>
-      <author><first>Núria</first><last>Bel</last></author>
-      <author><first>António</first><last>Branco</last></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
       <author><first>Gerhard</first><last>Budin</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
-      <author><first>Koenraad</first><last>De Smedt</last></author>
-      <author><first>Radovan</first><last>Garabík</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
+      <author id="koenraad-de-smedt"><first>Koenraad</first><last>De Smedt</last></author>
+      <author id="radovan-garabik"><first>Radovan</first><last>Garabík</last></author>
       <author><first>Maria</first><last>Gavriilidou</last></author>
       <author><first>Dagmar</first><last>Gromann</last></author>
       <author><first>Svetla</first><last>Koeva</last></author>
       <author><first>Simon</first><last>Krek</last></author>
       <author><first>Cvetana</first><last>Krstev</last></author>
-      <author><first>Krister</first><last>Lindén</last></author>
-      <author><first>Bernardo</first><last>Magnini</last></author>
+      <author id="krister-linden"><first>Krister</first><last>Lindén</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
       <author><first>Jan</first><last>Odijk</last></author>
       <author><first>Maciej</first><last>Ogrodniczuk</last></author>
-      <author><first>Eiríkur</first><last>Rögnvaldsson</last></author>
-      <author><first>Mike</first><last>Rosner</last></author>
-      <author><first>Bolette</first><last>Pedersen</last></author>
-      <author><first>Inguna</first><last>Skadiņa</last></author>
+      <author id="eirikur-rognvaldsson"><first>Eiríkur</first><last>Rögnvaldsson</last></author>
+      <author id="michael-rosner"><first>Mike</first><last>Rosner</last></author>
+      <author id="bolette-sandford-pedersen"><first>Bolette</first><last>Pedersen</last></author>
+      <author id="inguna-skadina"><first>Inguna</first><last>Skadiņa</last></author>
       <author><first>Marko</first><last>Tadić</last></author>
-      <author><first>Dan</first><last>Tufiș</last></author>
-      <author><first>Tamás</first><last>Váradi</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufiș</last></author>
+      <author id="tamas-varadi"><first>Tamás</first><last>Váradi</last></author>
       <author><first>Kadri</first><last>Vider</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <author><first>François</first><last>Yvon</last></author>
@@ -4999,7 +4999,7 @@
       <title>Gigafida 2.0: The Reference Corpus of Written Standard <fixed-case>S</fixed-case>lovene</title>
       <author><first>Simon</first><last>Krek</last></author>
       <author><first>Špela</first><last>Arhar Holdt</last></author>
-      <author><first>Tomaž</first><last>Erjavec</last></author>
+      <author id="tomaz-erjavec"><first>Tomaž</first><last>Erjavec</last></author>
       <author><first>Jaka</first><last>Čibej</last></author>
       <author><first>Andraz</first><last>Repar</last></author>
       <author><first>Polona</first><last>Gantar</last></author>
@@ -5027,10 +5027,10 @@
     <paper id="411">
       <title>A <fixed-case>CLARIN</fixed-case> Transcription Portal for Interview Data</title>
       <author><first>Christoph</first><last>Draxler</last></author>
-      <author><first>Henk</first><last>van den Heuvel</last></author>
+      <author id="henk-van-den-heuvel"><first>Henk</first><last>van den Heuvel</last></author>
       <author><first>Arjan</first><last>van Hessen</last></author>
       <author><first>Silvia</first><last>Calamai</last></author>
-      <author><first>Louise</first><last>Corti</last></author>
+      <author id="louise-corti"><first>Louise</first><last>Corti</last></author>
       <pages>3353–3359</pages>
       <abstract>In this paper we present a first version of a transcription portal for audio files based on automatic speech recognition (ASR) in various languages. The portal is implemented in the CLARIN resources research network and intended for use by non-technical scholars. We explain the background and interdisciplinary nature of interview data, the perks and quirks of using ASR for transcribing the audio in a research context, the dos and don’ts for optimal use of the portal, and future developments foreseen. The portal is promoted in a range of workshops, but there are a number of challenges that have to be met. These challenges concern privacy issues, ASR quality, and cost, amongst others.</abstract>
       <url hash="a18c8ef5">2020.lrec-1.411</url>
@@ -5050,25 +5050,25 @@
     <paper id="413">
       <title><fixed-case>E</fixed-case>uropean Language Grid: An Overview</title>
       <author><first>Georg</first><last>Rehm</last></author>
-      <author><first>Maria</first><last>Berger</last></author>
+      <author id="maria-berger"><first>Maria</first><last>Berger</last></author>
       <author><first>Ela</first><last>Elsholz</last></author>
       <author><first>Stefanie</first><last>Hegele</last></author>
       <author><first>Florian</first><last>Kintzel</last></author>
       <author><first>Katrin</first><last>Marheinecke</last></author>
       <author><first>Stelios</first><last>Piperidis</last></author>
       <author><first>Miltos</first><last>Deligiannis</last></author>
-      <author><first>Dimitris</first><last>Galanis</last></author>
+      <author id="dimitrios-galanis"><first>Dimitris</first><last>Galanis</last></author>
       <author><first>Katerina</first><last>Gkirtzou</last></author>
-      <author><first>Penny</first><last>Labropoulou</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="penny-labropoulou"><first>Penny</first><last>Labropoulou</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
       <author><first>David</first><last>Jones</last></author>
       <author><first>Ian</first><last>Roberts</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
       <author><first>Jana</first><last>Hamrlová</last></author>
       <author><first>Lukáš</first><last>Kačena</last></author>
       <author><first>Khalid</first><last>Choukri</last></author>
       <author><first>Victoria</first><last>Arranz</last></author>
-      <author><first>Andrejs</first><last>Vasiļjevs</last></author>
+      <author id="andrejs-vasiljevs"><first>Andrejs</first><last>Vasiļjevs</last></author>
       <author><first>Orians</first><last>Anvari</last></author>
       <author><first>Andis</first><last>Lagzdiņš</last></author>
       <author><first>Jūlija</first><last>Meļņika</last></author>
@@ -5079,7 +5079,7 @@
       <author><first>Christoph</first><last>Prinz</last></author>
       <author><first>Severin</first><last>Stampler</last></author>
       <author><first>Dorothea</first><last>Thomas-Aniola</last></author>
-      <author><first>José Manuel</first><last>Gómez-Pérez</last></author>
+      <author id="jose-manuel-gomez-perez"><first>José Manuel</first><last>Gómez-Pérez</last></author>
       <author><first>Andres</first><last>Garcia Silva</last></author>
       <author><first>Christian</first><last>Berrío</last></author>
       <author><first>Ulrich</first><last>Germann</last></author>
@@ -5093,8 +5093,8 @@
     </paper>
     <paper id="414">
       <title>The Competitiveness Analysis of the <fixed-case>E</fixed-case>uropean Language Technology Market</title>
-      <author><first>Andrejs</first><last>Vasiļjevs</last></author>
-      <author><first>Inguna</first><last>Skadiņa</last></author>
+      <author id="andrejs-vasiljevs"><first>Andrejs</first><last>Vasiļjevs</last></author>
+      <author id="inguna-skadina"><first>Inguna</first><last>Skadiņa</last></author>
       <author><first>Indra</first><last>Samite</last></author>
       <author><first>Kaspars</first><last>Kauliņš</last></author>
       <author><first>Ēriks</first><last>Ajausks</last></author>
@@ -5109,7 +5109,7 @@
     <paper id="415">
       <title>Constructing a Bilingual <fixed-case>H</fixed-case>adith Corpus Using a Segmentation Tool</title>
       <author><first>Shatha</first><last>Altammami</last></author>
-      <author><first>Eric</first><last>Atwell</last></author>
+      <author id="eric-atwell"><first>Eric</first><last>Atwell</last></author>
       <author><first>Ammar</first><last>Alsalka</last></author>
       <pages>3390–3398</pages>
       <abstract>This article describes the process of gathering and constructing a bilingual parallel corpus of Islamic Hadith, which is the set of narratives reporting different aspects of the prophet Muhammad’s life. The corpus data is gathered from the six canonical Hadith collections using a custom segmentation tool that automatically segments and annotates the two Hadith components with 92% accuracy. This Hadith segmenter minimises the costs of language resource creation and produces consistent results independently from previous knowledge and experiences that usually influence human annotators. The corpus includes more than 10M tokens and will be freely available via the LREC repository.</abstract>
@@ -5133,7 +5133,7 @@
       <author><first>Franciska</first><last>de Jong</last></author>
       <author><first>Bente</first><last>Maegaard</last></author>
       <author><first>Darja</first><last>Fišer</last></author>
-      <author><first>Dieter</first><last>van Uytvanck</last></author>
+      <author id="dieter-van-uytvanck"><first>Dieter</first><last>van Uytvanck</last></author>
       <author><first>Andreas</first><last>Witt</last></author>
       <pages>3406–3413</pages>
       <abstract>CLARIN is a European Research Infrastructure providing access to language resources and technologies for researchers in the humanities and social sciences. It supports the use and study of language data in general and aims to increase the potential for comparative research of cultural and societal phenomena across the boundaries of languages and disciplines, all in line with the European agenda for Open Science. Data infrastructures such as CLARIN have recently embarked on the emerging frameworks for the federation of infrastructural services, such as the European Open Science Cloud and the integration of services resulting from multidisciplinary collaboration in federated services for the wider SSH domain. In this paper we describe the interoperability requirements that arise through the existing ambitions and the emerging frameworks. The interoperability theme will be addressed at several levels, including organisation and ecosystem, design of workflow services, data curation, performance measurement and collaboration.</abstract>
@@ -5145,9 +5145,9 @@
       <title>Language Technology Programme for <fixed-case>I</fixed-case>celandic 2019-2023</title>
       <author><first>Anna</first><last>Nikulásdóttir</last></author>
       <author><first>Jón</first><last>Guðnason</last></author>
-      <author><first>Anton Karl</first><last>Ingason</last></author>
+      <author id="anton-karl-ingason"><first>Anton Karl</first><last>Ingason</last></author>
       <author><first>Hrafn</first><last>Loftsson</last></author>
-      <author><first>Eiríkur</first><last>Rögnvaldsson</last></author>
+      <author id="eirikur-rognvaldsson"><first>Eiríkur</first><last>Rögnvaldsson</last></author>
       <author><first>Einar Freyr</first><last>Sigurðsson</last></author>
       <author><first>Steinþór</first><last>Steingrímsson</last></author>
       <pages>3414–3422</pages>
@@ -5168,20 +5168,20 @@
     </paper>
     <paper id="420">
       <title>Making Metadata Fit for Next Generation Language Technology Platforms: The Metadata Schema of the <fixed-case>E</fixed-case>uropean Language Grid</title>
-      <author><first>Penny</first><last>Labropoulou</last></author>
+      <author id="penny-labropoulou"><first>Penny</first><last>Labropoulou</last></author>
       <author><first>Katerina</first><last>Gkirtzou</last></author>
       <author><first>Maria</first><last>Gavriilidou</last></author>
       <author><first>Miltos</first><last>Deligiannis</last></author>
-      <author><first>Dimitris</first><last>Galanis</last></author>
+      <author id="dimitrios-galanis"><first>Dimitris</first><last>Galanis</last></author>
       <author><first>Stelios</first><last>Piperidis</last></author>
       <author><first>Georg</first><last>Rehm</last></author>
-      <author><first>Maria</first><last>Berger</last></author>
-      <author><first>Valérie</first><last>Mapelli</last></author>
+      <author id="maria-berger"><first>Maria</first><last>Berger</last></author>
+      <author id="valerie-mapelli"><first>Valérie</first><last>Mapelli</last></author>
       <author><first>Michael</first><last>Rigault</last></author>
       <author><first>Victoria</first><last>Arranz</last></author>
       <author><first>Khalid</first><last>Choukri</last></author>
       <author><first>Gerhard</first><last>Backfried</last></author>
-      <author><first>José Manuel</first><last>Gómez-Pérez</last></author>
+      <author id="jose-manuel-gomez-perez"><first>José Manuel</first><last>Gómez-Pérez</last></author>
       <author><first>Andres</first><last>Garcia-Silva</last></author>
       <pages>3428–3437</pages>
       <abstract>The current scientific and technological landscape is characterised by the increasing availability of data resources and processing tools and services. In this setting, metadata have emerged as a key factor facilitating management, sharing and usage of such digital assets. In this paper we present ELG-SHARE, a rich metadata schema catering for the description of Language Resources and Technologies (processing and generation services and tools, models, corpora, term lists, etc.), as well as related entities (e.g., organizations, projects, supporting documents, etc.). The schema powers the European Language Grid platform that aims to be the primary hub and marketplace for industry-relevant Language Technology in Europe. ELG-SHARE has been based on various metadata schemas, vocabularies, and ontologies, as well as related recommendations and guidelines.</abstract>
@@ -5193,7 +5193,7 @@
       <title>Related Works in the <fixed-case>L</fixed-case>inguistic <fixed-case>D</fixed-case>ata <fixed-case>C</fixed-case>onsortium Catalog</title>
       <author><first>Daniel</first><last>Jaquette</last></author>
       <author><first>Christopher</first><last>Cieri</last></author>
-      <author><first>Denise</first><last>DiPersio</last></author>
+      <author id="denise-dipersio"><first>Denise</first><last>DiPersio</last></author>
       <pages>3438–3442</pages>
       <abstract>Defining relations between language resources provides an archive with the ability to better serve its users. This paper covers the development and implementation of a Related Works addition to the Linguistic Data Consortium’s (LDC) catalog. The authors go step-by-step through the development of the Related Works schema, implementation of the software and database changes, and data entry of the relations. The Related Work schema involved developing of a set of controlled terms for relations based on previous work and other schema. Software and database changes consisted of both front and back end interface additions, along with modification and additions to the LDC Catalog database tables. Data entry consisted of two parts: seed data from previous work and 2019 language resources, and ongoing legacy population. Previous work in this area is discussed as well as overview information about the LDC Catalog. A list of the full LDC Related Works terms is included with brief explanations.</abstract>
       <url hash="f7e49d43">2020.lrec-1.421</url>
@@ -5204,7 +5204,7 @@
       <title>Language Data Sharing in <fixed-case>E</fixed-case>uropean Public Services – Overcoming Obstacles and Creating Sustainable Data Sharing Infrastructures</title>
       <author><first>Lilli</first><last>Smal</last></author>
       <author><first>Andrea</first><last>Lösch</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <author><first>Maria</first><last>Giagkou</last></author>
       <author><first>Thierry</first><last>Declerck</last></author>
       <author><first>Stephan</first><last>Busemann</last></author>
@@ -5218,10 +5218,10 @@
       <title>A Progress Report on Activities at the <fixed-case>L</fixed-case>inguistic <fixed-case>D</fixed-case>ata <fixed-case>C</fixed-case>onsortium Benefitting the <fixed-case>LREC</fixed-case> Community</title>
       <author><first>Christopher</first><last>Cieri</last></author>
       <author><first>James</first><last>Fiumara</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <author><first>Jonathan</first><last>Wright</last></author>
-      <author><first>Denise</first><last>DiPersio</last></author>
-      <author><first>Mark</first><last>Liberman</last></author>
+      <author id="denise-dipersio"><first>Denise</first><last>DiPersio</last></author>
+      <author id="mark-liberman"><first>Mark</first><last>Liberman</last></author>
       <pages>3449–3456</pages>
       <abstract>This latest in a series of Linguistic Data Consortium (LDC) progress reports to the LREC community does not describe any single language resource, evaluation campaign or technology but sketches the activities, since the last report, of a data center devoted to supporting the work of LREC attendees among other research communities. Specifically, we describe 96 new corpora released in 2018-2020 to date, a new technology evaluation campaign, ongoing activities to support multiple common task human language technology programs, and innovations to advance the methodology of language data collection and annotation.</abstract>
       <url hash="2172c8c0">2020.lrec-1.423</url>
@@ -5282,7 +5282,7 @@
       <author><first>Namoos Hayat</first><last>Qasmi</last></author>
       <author><first>Haris Bin</first><last>Zia</last></author>
       <author><first>Awais</first><last>Athar</last></author>
-      <author><first>Agha Ali</first><last>Raza</last></author>
+      <author id="agha-ali-raza"><first>Agha Ali</first><last>Raza</last></author>
       <pages>3484–3489</pages>
       <abstract>This paper presents the first attempt at Automatic Text Simplification (ATS) for Urdu, the language of 170 million people worldwide. Being a low-resource language in terms of standard linguistic resources, recent text simplification approaches that rely on manually crafted simplified corpora or lexicons such as WordNet are not applicable to Urdu. Urdu is a morphologically rich language that requires unique considerations such as proper handling of inflectional case and honorifics. We present an unsupervised method for lexical simplification of complex Urdu text. Our method only requires plain Urdu text and makes use of word embeddings together with a set of morphological features to generate simplifications. Our system achieves a BLEU score of 80.15 and SARI score of 42.02 upon automatic evaluation on manually crafted simplified corpora. We also report results for human evaluations for correctness, grammaticality, meaning-preservation and simplicity of the output. Our code and corpus are publicly available to make our results reproducible.</abstract>
       <url hash="92c3cb66">2020.lrec-1.428</url>
@@ -5292,7 +5292,7 @@
     <paper id="429">
       <title>Jamo Pair Encoding: Subcharacter Representation-based Extreme <fixed-case>K</fixed-case>orean Vocabulary Compression for Efficient Subword Tokenization</title>
       <author><first>Sangwhan</first><last>Moon</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <pages>3490–3497</pages>
       <abstract>In the context of multilingual language model pre-training, vocabulary size for languages with a broad set of potential characters is an unsolved problem. We propose two algorithms applicable in any unsupervised multilingual pre-training task, increasing the elasticity of budget required for building the vocabulary in Byte-Pair Encoding inspired tokenizers, significantly reducing the cost of supporting Korean in a multilingual model.</abstract>
       <url hash="a3dd6a21">2020.lrec-1.429</url>
@@ -5302,7 +5302,7 @@
     <paper id="430">
       <title>Offensive Language and Hate Speech Detection for <fixed-case>D</fixed-case>anish</title>
       <author><first>Gudbjartur Ingi</first><last>Sigurbergsson</last></author>
-      <author><first>Leon</first><last>Derczynski</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
       <pages>3498–3508</pages>
       <abstract>The presence of offensive language on social media platforms and the implications this poses is becoming a major concern in modern society. Given the enormous amount of content created every day, automatic methods are required to detect and deal with this type of content. Until now, most of the research has focused on solving the problem for the English language, while the problem is multilingual. We construct a Danish dataset DKhate containing user-generated comments from various social media platforms, and to our knowledge, the first of its kind, annotated for various types and target of offensive language. We develop four automatic classification systems, each designed to work for both the English and the Danish language. In the detection of offensive language in English, the best performing system achieves a macro averaged F1-score of 0.74, and the best performing system for Danish achieves a macro averaged F1-score of 0.70. In the detection of whether or not an offensive post is targeted, the best performing system for English achieves a macro averaged F1-score of 0.62, while the best performing system for Danish achieves a macro averaged F1-score of 0.73. Finally, in the detection of the target type in a targeted offensive post, the best performing system for English achieves a macro averaged F1-score of 0.56, and the best performing system for Danish achieves a macro averaged F1-score of 0.63. Our work for both the English and the Danish language captures the type and targets of offensive language, and present automatic methods for detecting different kinds of offensive language such as hate speech and cyberbullying.</abstract>
       <url hash="8bd3de33">2020.lrec-1.430</url>
@@ -5312,7 +5312,7 @@
     <paper id="431">
       <title>Semi-supervised Deep Embedded Clustering with Anomaly Detection for Semantic Frame Induction</title>
       <author><first>Zheng Xin</first><last>Yong</last></author>
-      <author><first>Tiago Timponi</first><last>Torrent</last></author>
+      <author id="tiago-timponi-torrent"><first>Tiago Timponi</first><last>Torrent</last></author>
       <pages>3509–3519</pages>
       <abstract>Although FrameNet is recognized as one of the most fine-grained lexical databases, its coverage of lexical units is still limited. To tackle this issue, we propose a two-step frame induction process: for a set of lexical units not yet present in Berkeley FrameNet data release 1.7, first remove those that cannot fit into any existing semantic frame in FrameNet; then, assign the remaining lexical units to their correct frames. We also present the Semi-supervised Deep Embedded Clustering with Anomaly Detection (SDEC-AD) model—an algorithm that maps high-dimensional contextualized vector representations of lexical units to a low-dimensional latent space for better frame prediction and uses reconstruction error to identify lexical units that cannot evoke frames in FrameNet. SDEC-AD outperforms the state-of-the-art methods in both steps of the frame induction process. Empirical results also show that definitions provide contextual information for representing and characterizing the frame membership of lexical units.</abstract>
       <url hash="8f706280">2020.lrec-1.431</url>
@@ -5323,7 +5323,7 @@
       <title>Search Query Language Identification Using Weak Labeling</title>
       <author><first>Ritiz</first><last>Tambi</last></author>
       <author><first>Ajinkya</first><last>Kale</last></author>
-      <author><first>Tracy Holloway</first><last>King</last></author>
+      <author id="tracy-holloway-king"><first>Tracy Holloway</first><last>King</last></author>
       <pages>3520–3527</pages>
       <abstract>Language identification is a well-known task for natural language documents. In this paper we explore search query language identification which is usually the first task before any other query understanding. Without loss of generalization, we run our experiments on the Adobe Stock search engine. Even though the domain is relatively generic because Adobe Stock queries cover a broad range of objects and concepts, out-of-the-box language identifiers do not perform well due to the extremely short text found in queries. Unlike other well-studied supervised approaches for this task, we examine a practical approach for the cold start problem for automatically getting large-scale query-language pairs for training. We describe the process of creating weak-labeled training data and then human-annotated evaluation data for the search query language identification task. The effectiveness of this technique is demonstrated by training a gradient boosting model for language classification given a query. We out-perform the open domain text model baselines by a large margin.</abstract>
       <url hash="8a224c5d">2020.lrec-1.432</url>
@@ -5333,9 +5333,9 @@
     <paper id="433">
       <title>Automated Phonological Transcription of <fixed-case>A</fixed-case>kkadian Cuneiform Text</title>
       <author><first>Aleksi</first><last>Sahala</last></author>
-      <author><first>Miikka</first><last>Silfverberg</last></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last></author>
       <author><first>Antti</first><last>Arppe</last></author>
-      <author><first>Krister</first><last>Lindén</last></author>
+      <author id="krister-linden"><first>Krister</first><last>Lindén</last></author>
       <pages>3528–3534</pages>
       <abstract>Akkadian was an East-Semitic language spoken in ancient Mesopotamia. The language is attested on hundreds of thousands of cuneiform clay tablets. Several Akkadian text corpora contain only the transliterated text. In this paper, we investigate automated phonological transcription of the transliterated corpora. The phonological transcription provides a linguistically appealing form to represent Akkadian, because the transcription is normalized according to the grammatical description of a given dialect and explicitly shows the Akkadian renderings for Sumerian logograms. Because cuneiform text does not mark the inflection for logograms, the inflected form needs to be inferred from the sentence context. To the best of our knowledge, this is the first documented attempt to automatically transcribe Akkadian. Using a context-aware neural network model, we are able to automatically transcribe syllabic tokens at near human performance with 96% recall @ 3, while the logogram transcription remains more challenging at 82% recall @ 3.</abstract>
       <url hash="6ceadc58">2020.lrec-1.433</url>
@@ -5344,8 +5344,8 @@
     </paper>
     <paper id="434">
       <title><fixed-case>COSTRA</fixed-case> 1.0: A Dataset of Complex Sentence Transformations</title>
-      <author><first>Petra</first><last>Barancikova</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="petra-barancikova"><first>Petra</first><last>Barancikova</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>3535–3541</pages>
       <abstract>We present COSTRA 1.0, a dataset of complex sentence transformations. The dataset is intended for the study of sentence-level embeddings beyond simple word alternations or standard paraphrasing. This first version of the dataset is limited to sentences in Czech but the construction method is universal and we plan to use it also for other languages. The dataset consist of 4,262 unique sentences with average length of 10 words, illustrating 15 types of modifications such as simplification, generalization, or formal and informal language variation. The hope is that with this dataset, we should be able to test semantic properties of sentence embeddings and perhaps even to find some topologically interesting “skeleton” in the sentence embedding space. A preliminary analysis using LASER, multi-purpose multi-lingual sentence embeddings suggests that the LASER space does not exhibit the desired properties.</abstract>
       <url hash="692e04d3">2020.lrec-1.434</url>
@@ -5432,7 +5432,7 @@
     <paper id="442">
       <title><fixed-case>SEDAR</fixed-case>: a Large Scale <fixed-case>F</fixed-case>rench-<fixed-case>E</fixed-case>nglish Financial Domain Parallel Corpus</title>
       <author><first>Abbas</first><last>Ghaddar</last></author>
-      <author><first>Phillippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Phillippe</first><last>Langlais</last></author>
       <pages>3595–3602</pages>
       <abstract>This paper describes the acquisition, preprocessing and characteristics of SEDAR, a large scale English-French parallel corpus for the financial domain. Our extensive experiments on machine translation show that SEDAR is essential to obtain good performance on finance. We observe a large gain in the performance of machine translation systems trained on SEDAR when tested on finance, which makes SEDAR suitable to study domain adaptation for neural machine translation. The first release of the corpus comprises 8.6 million high quality sentence pairs that are publicly available for research at <url>https://github.com/autorite/sedar-bitext</url>.</abstract>
       <url hash="39636cc8">2020.lrec-1.442</url>
@@ -5490,7 +5490,7 @@
       <author><first>Sho</first><last>Shimazu</last></author>
       <author><first>Sho</first><last>Takase</last></author>
       <author><first>Toshiaki</first><last>Nakazawa</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <pages>3630–3634</pages>
       <abstract>In natural language, we often omit some words that are easily understandable from the context. In particular, pronouns of subject, object, and possessive cases are often omitted in Japanese; these are known as zero pronouns. In translation from Japanese to other languages, we need to find a correct antecedent for each zero pronoun to generate a correct and coherent translation. However, it is difficult for conventional automatic evaluation metrics (e.g., BLEU) to focus on the success of zero pronoun resolution. Therefore, we present a hand-crafted dataset to evaluate whether translation models can resolve the zero pronoun problems in Japanese to English translations. We manually and statistically validate that our dataset can effectively evaluate the correctness of the antecedents selected in translations. Through the translation experiments using our dataset, we reveal shortcomings of an existing context-aware neural machine translation model.</abstract>
       <url hash="7465a19a">2020.lrec-1.447</url>
@@ -5549,7 +5549,7 @@
       <title>An Evaluation Benchmark for Testing the Word Sense Disambiguation Capabilities of Machine Translation Systems</title>
       <author><first>Alessandro</first><last>Raganato</last></author>
       <author><first>Yves</first><last>Scherrer</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>3668–3675</pages>
       <abstract>Lexical ambiguity is one of the many challenging linguistic phenomena involved in translation, i.e., translating an ambiguous word with its correct sense. In this respect, previous work has shown that the translation quality of neural machine translation systems can be improved by explicitly modeling the senses of ambiguous words. Recently, several evaluation test sets have been proposed to measure the word sense disambiguation (WSD) capability of machine translation systems. However, to date, these evaluation test sets do not include any training data that would provide a fair setup measuring the sense distributions present within the training data itself. In this paper, we present an evaluation benchmark on WSD for machine translation for 10 language pairs, comprising training data with known sense distributions. Our approach for the construction of the benchmark builds upon the wide-coverage multilingual sense inventory of BabelNet, the multilingual neural parsing pipeline TurkuNLP, and the OPUS collection of translated texts from the web. The test suite is available at <url>http://github.com/Helsinki-NLP/MuCoW</url>.</abstract>
       <url hash="37aa18a4">2020.lrec-1.452</url>
@@ -5558,8 +5558,8 @@
     </paper>
     <paper id="453">
       <title><fixed-case>MEDLINE</fixed-case> as a Parallel Corpus: a Survey to Gain Insight on <fixed-case>F</fixed-case>rench-, <fixed-case>S</fixed-case>panish- and <fixed-case>P</fixed-case>ortuguese-speaking Authors’ Abstract Writing Practice</title>
-      <author><first>Aurélie</first><last>Névéol</last></author>
-      <author><first>Antonio</first><last>Jimeno Yepes</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
+      <author id="antonio-jimeno-yepes"><first>Antonio</first><last>Jimeno Yepes</last></author>
       <author><first>Mariana</first><last>Neves</last></author>
       <pages>3676–3682</pages>
       <abstract>Background: Parallel corpora are used to train and evaluate machine translation systems. To alleviate the cost of producing parallel resources for evaluation campaigns, existing corpora are leveraged. However, little information may be available about the methods used for producing the corpus, including translation direction. Objective: To gain insight on MEDLINE parallel corpus used in the biomedical task at the Workshop on Machine Translation in 2019 (WMT 2019). Material and Methods: Contact information for the authors of MEDLINE articles included in the English/Spanish (EN/ES), English/French (EN/FR), and English/Portuguese (EN/PT) WMT 2019 test sets was obtained from PubMed and publisher websites. The authors were asked about their abstract writing practices in a survey. Results: The response rate was above 20%. Authors reported that they are mainly native speakers of languages other than English. Although manual translation, sometimes via professional translation services, was commonly used for abstract translation, authors of articles in the EN/ES and EN/PT sets also relied on post-edited machine translation. Discussion: This study provides a characterization of MEDLINE authors’ language skills and abstract writing practices. Conclusion: The information collected in this study will be used to inform test set design for the next WMT biomedical task.</abstract>
@@ -5570,7 +5570,7 @@
     <paper id="454">
       <title><fixed-case>JASS</fixed-case>: <fixed-case>J</fixed-case>apanese-specific Sequence to Sequence Pre-training for Neural Machine Translation</title>
       <author><first>Zhuoyuan</first><last>Mao</last></author>
-      <author><first>Fabien</first><last>Cromieres</last></author>
+      <author id="fabien-cromieres"><first>Fabien</first><last>Cromieres</last></author>
       <author><first>Raj</first><last>Dabre</last></author>
       <author><first>Haiyue</first><last>Song</last></author>
       <author><first>Sadao</first><last>Kurohashi</last></author>
@@ -5601,7 +5601,7 @@
       <title>Linguistically Informed <fixed-case>H</fixed-case>indi-<fixed-case>E</fixed-case>nglish Neural Machine Translation</title>
       <author><first>Vikrant</first><last>Goyal</last></author>
       <author><first>Pruthwik</first><last>Mishra</last></author>
-      <author><first>Dipti Misra</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></author>
       <pages>3698–3703</pages>
       <abstract>Hindi-English Machine Translation is a challenging problem, owing to multiple factors including the morphological complexity and relatively free word order of Hindi, in addition to the lack of sufficient parallel training data. Neural Machine Translation (NMT) is a rapidly advancing MT paradigm and has shown promising results for many language pairs, especially in large training data scenarios. To overcome the data sparsity issue caused by the lack of large parallel corpora for Hindi-English, we propose a method to employ additional linguistic knowledge which is encoded by different phenomena depicted by Hindi. We generalize the embedding layer of the state-of-the-art Transformer model to incorporate linguistic features like POS tag, lemma and morph features to improve the translation performance. We compare the results obtained on incorporating this knowledge with the baseline systems and demonstrate significant performance improvements. Although, the Transformer NMT models have a strong efficacy to learn language constructs, we show that the usage of specific features further help in improving the translation performance.</abstract>
       <url hash="335ae47a">2020.lrec-1.456</url>
@@ -5622,7 +5622,7 @@
       <title>An Analysis of Massively Multilingual Neural Machine Translation for Low-Resource Languages</title>
       <author><first>Aaron</first><last>Mueller</last></author>
       <author><first>Garrett</first><last>Nicolai</last></author>
-      <author><first>Arya D.</first><last>McCarthy</last></author>
+      <author id="arya-d-mccarthy"><first>Arya D.</first><last>McCarthy</last></author>
       <author><first>Dylan</first><last>Lewis</last></author>
       <author><first>Winston</first><last>Wu</last></author>
       <author><first>David</first><last>Yarowsky</last></author>
@@ -5646,7 +5646,7 @@
     <paper id="460">
       <title><fixed-case>M</fixed-case>u<fixed-case>ST</fixed-case>-Cinema: a Speech-to-Subtitles corpus</title>
       <author><first>Alina</first><last>Karakanta</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <pages>3727–3734</pages>
       <abstract>Growing needs in localising audiovisual content in multiple languages through subtitles call for the development of automatic solutions for human subtitling. Neural Machine Translation (NMT) can contribute to the automatisation of subtitling, facilitating the work of human subtitlers and reducing turn-around times and related costs. NMT requires high-quality, large, task-specific training data. The existing subtitling corpora, however, are missing both alignments to the source language audio and important information about subtitle breaks. This poses a significant limitation for developing efficient automatic approaches for subtitling, since the length and form of a subtitle directly depends on the duration of the utterance. In this work, we present MuST-Cinema, a multilingual speech translation corpus built from TED subtitles. The corpus is comprised of (audio, transcription, translation) triplets. Subtitle breaks are preserved by inserting special symbols. We show that the corpus can be used to build models that efficiently segment sentences into subtitles and propose a method for annotating existing subtitling corpora with subtitle breaks, conforming to the constraint of length.</abstract>
@@ -5657,7 +5657,7 @@
     <paper id="461">
       <title>On Context Span Needed for Machine Translation Evaluation</title>
       <author><first>Sheila</first><last>Castilho</last></author>
-      <author><first>Maja</first><last>Popović</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <pages>3735–3742</pages>
       <abstract>Despite increasing efforts to improve evaluation of machine translation (MT) by going beyond the sentence level to the document level, the definition of what exactly constitutes a “document level” is still not clear. This work deals with the context span necessary for a more reliable MT evaluation. We report results from a series of surveys involving three domains and 18 target languages designed to identify the necessary context span as well as issues related to it. Our findings indicate that, despite the fact that some issues and spans are strongly dependent on domain and on the target language, a number of common patterns can be observed so that general guidelines for context-aware MT evaluation can be drawn.</abstract>
@@ -5689,7 +5689,7 @@
     </paper>
     <paper id="464">
       <title>The <fixed-case>MARCELL</fixed-case> Legislative Corpus</title>
-      <author><first>Tamás</first><last>Váradi</last></author>
+      <author id="tamas-varadi"><first>Tamás</first><last>Váradi</last></author>
       <author><first>Svetla</first><last>Koeva</last></author>
       <author><first>Martin</first><last>Yamalov</last></author>
       <author><first>Marko</first><last>Tadić</last></author>
@@ -5697,13 +5697,13 @@
       <author><first>Bartłomiej</first><last>Nitoń</last></author>
       <author><first>Maciej</first><last>Ogrodniczuk</last></author>
       <author><first>Piotr</first><last>Pęzik</last></author>
-      <author><first>Verginica</first><last>Barbu Mititelu</last></author>
+      <author id="verginica-barbu-mititelu"><first>Verginica</first><last>Barbu Mititelu</last></author>
       <author><first>Radu</first><last>Ion</last></author>
       <author><first>Elena</first><last>Irimia</last></author>
       <author><first>Maria</first><last>Mitrofan</last></author>
       <author><first>Vasile</first><last>Păiș</last></author>
-      <author><first>Dan</first><last>Tufiș</last></author>
-      <author><first>Radovan</first><last>Garabík</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufiș</last></author>
+      <author id="radovan-garabik"><first>Radovan</first><last>Garabík</last></author>
       <author><first>Simon</first><last>Krek</last></author>
       <author><first>Andraz</first><last>Repar</last></author>
       <author><first>Matjaž</first><last>Rihtar</last></author>
@@ -5741,7 +5741,7 @@
       <author><first>Mikko</first><last>Aulamo</last></author>
       <author><first>Umut</first><last>Sulubacak</last></author>
       <author><first>Sami</first><last>Virpioja</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>3782–3789</pages>
       <abstract>This paper introduces OpusTools, a package for downloading and processing parallel corpora included in the OPUS corpus collection. The package implements tools for accessing compressed data in their archived release format and make it possible to easily convert between common formats. OpusTools also includes tools for language identification and data filtering as well as tools for importing data from various sources into the OPUS format. We show the use of these tools in parallel corpus creation and data diagnostics. The latter is especially useful for the identification of potential problems and errors in the extensive data set. Using these tools, we can now monitor the validity of data sets and improve the overall quality and consitency of the data collection.</abstract>
       <url hash="c45e97df">2020.lrec-1.467</url>
@@ -5771,7 +5771,7 @@
     </paper>
     <paper id="470">
       <title>The <fixed-case>FISKMÖ</fixed-case> Project: Resources and Tools for <fixed-case>F</fixed-case>innish-<fixed-case>S</fixed-case>wedish Machine Translation and Cross-Linguistic Research</title>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <author><first>Tommi</first><last>Nieminen</last></author>
       <author><first>Mikko</first><last>Aulamo</last></author>
       <author><first>Jenna</first><last>Kanerva</last></author>
@@ -5808,7 +5808,7 @@
       <title>Finite State Machine Pattern-Root <fixed-case>A</fixed-case>rabic Morphological Generator, Analyzer and Diacritizer</title>
       <author><first>Maha</first><last>Alkhairy</last></author>
       <author><first>Afshan</first><last>Jafri</last></author>
-      <author><first>David</first><last>Smith</last></author>
+      <author id="david-a-smith"><first>David</first><last>Smith</last></author>
       <pages>3834–3841</pages>
       <abstract>We describe and evaluate the Finite-State Arabic Morphologizer (FSAM) – a concatenative (prefix-stem-suffix) and templatic (root- pattern) morphologizer that generates and analyzes undiacritized Modern Standard Arabic (MSA) words, and diacritizes them. Our bidirectional unified-architecture finite state machine (FSM) is based on morphotactic MSA grammatical rules. The FSM models the root-pattern structure related to semantics and syntax, making it readily scalable unlike stem-tabulations in prevailing systems. We evaluate the coverage and accuracy of our model, with coverage being percentage of words in Tashkeela (a large corpus) that can be analyzed. Accuracy is computed against a gold standard, comprising words and properties, created from the intersection of UD PADT treebank and Tashkeela. Coverage of analysis (extraction of root and properties from word) is 82%. Accuracy results are: root computed from a word (92%), word generation from a root (100%), non-root properties of a word (97%), and diacritization (84%). FSAM’s non-root results match or surpass MADAMIRA’s, and root result comparisons are not made because of the concatenative nature of publicly available morphologizers.</abstract>
       <url hash="c4d28778">2020.lrec-1.473</url>
@@ -5818,9 +5818,9 @@
     <paper id="474">
       <title>An Unsupervised Method for Weighting Finite-state Morphological Analyzers</title>
       <author><first>Amr</first><last>Keleg</last></author>
-      <author><first>Francis</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last></author>
       <author><first>Nick</first><last>Howell</last></author>
-      <author><first>Tommi</first><last>Pirinen</last></author>
+      <author id="tommi-a-pirinen"><first>Tommi</first><last>Pirinen</last></author>
       <pages>3842–3850</pages>
       <abstract>Morphological analysis is one of the tasks that have been studied for years. Different techniques have been used to develop models for performing morphological analysis. Models based on finite state transducers have proved to be more suitable for languages with low available resources. In this paper, we have developed a method for weighting a morphological analyzer built using finite state transducers in order to disambiguate its results. The method is based on a word2vec model that is trained in a completely unsupervised way using raw untagged corpora and is able to capture the semantic meaning of the words. Most of the methods used for disambiguating the results of a morphological analyzer relied on having tagged corpora that need to manually built. Additionally, the method developed uses information about the token irrespective of its context unlike most of the other techniques that heavily rely on the word’s context to disambiguate its set of candidate analyses.</abstract>
       <url hash="cffce4b4">2020.lrec-1.474</url>
@@ -5842,7 +5842,7 @@
     <paper id="476">
       <title>A Supervised Part-Of-Speech Tagger for the <fixed-case>G</fixed-case>reek Language of the Social Web</title>
       <author><first>Maria Nefeli</first><last>Nikiforos</last></author>
-      <author><first>Katia Lida</first><last>Kermanidis</last></author>
+      <author id="katia-lida-kermanidis"><first>Katia Lida</first><last>Kermanidis</last></author>
       <pages>3861–3867</pages>
       <abstract>The increasing volume of communication via microblogging messages on social networks has created the need for efficient Natural Language Processing (NLP) tools, especially for unstructured text processing. Extracting information from unstructured social text is one of the most demanding NLP tasks. This paper presents the first part-of-speech tagged data set of social text in Greek, as well as the first supervised part-of-speech tagger developed for such data sets.</abstract>
       <url hash="d3252590">2020.lrec-1.476</url>
@@ -5865,7 +5865,7 @@
       <author><first>Nabil</first><last>Hathout</last></author>
       <author><first>Franck</first><last>Sajous</last></author>
       <author><first>Basilio</first><last>Calderone</last></author>
-      <author><first>Fiammetta</first><last>Namer</last></author>
+      <author id="fiammetta-namer"><first>Fiammetta</first><last>Namer</last></author>
       <pages>3877–3885</pages>
       <abstract>Glawinette is a derivational lexicon of French that will be used to feed the Démonette database. It has been created from the GLAWI machine readable dictionary. We collected couples of words from the definitions and the morphological sections of the dictionary and then selected the ones that form regular formal analogies and that instantiate frequent enough formal patterns. The graph structure of the morphological families has then been used to identify for each couple of lexemes derivational patterns that are close to the intuition of the morphologists.</abstract>
       <url hash="d8d0896d">2020.lrec-1.478</url>
@@ -5875,9 +5875,9 @@
     <paper id="479">
       <title><fixed-case>B</fixed-case>aby<fixed-case>FST</fixed-case> - Towards a Finite-State Based Computational Model of Ancient <fixed-case>B</fixed-case>abylonian</title>
       <author><first>Aleksi</first><last>Sahala</last></author>
-      <author><first>Miikka</first><last>Silfverberg</last></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last></author>
       <author><first>Antti</first><last>Arppe</last></author>
-      <author><first>Krister</first><last>Lindén</last></author>
+      <author id="krister-linden"><first>Krister</first><last>Lindén</last></author>
       <pages>3886–3894</pages>
       <abstract>Akkadian is a fairly well resourced extinct language that does not yet have a comprehensive morphological analyzer available. In this paper we describe a general finite-state based morphological model for Babylonian, a southern dialect of the Akkadian language, that can achieve a coverage up to 97.3% and recall up to 93.7% on lemmatization and POS-tagging task on token level from a transcribed input. Since Akkadian word forms exhibit a high degree of morphological ambiguity, in that only 20.1% of running word tokens receive a single unambiguous analysis, we attempt a first pass at weighting our finite-state transducer, using existing extensive Akkadian corpora which have been partially validated for their lemmas and parts-of-speech but not the entire morphological analyses. The resultant weighted finite-state transducer yields a moderate improvement so that for 57.4% of the word tokens the highest ranked analysis is the correct one. We conclude with a short discussion on how morphological ambiguity in the analysis of Akkadian could be further reduced with improvements in the training data used in weighting the finite-state transducer as well as through other, context-based techniques.</abstract>
       <url hash="872a729a">2020.lrec-1.479</url>
@@ -5898,7 +5898,7 @@
     <paper id="481">
       <title>Wikinflection Corpus: A (Better) Multilingual, Morpheme-Annotated Inflectional Corpus</title>
       <author><first>Eleni</first><last>Metheniti</last></author>
-      <author><first>Guenter</first><last>Neumann</last></author>
+      <author id="gunter-neumann"><first>Guenter</first><last>Neumann</last></author>
       <pages>3905–3912</pages>
       <abstract>Multilingual, inflectional corpora are a scarce resource in the NLP community, especially corpora with annotated morpheme boundaries. We are evaluating a generated, multilingual inflectional corpus with morpheme boundaries, generated from the English Wiktionary (Metheniti and Neumann, 2018), against the largest, multilingual, high-quality inflectional corpus of the UniMorph project (Kirov et al., 2018). We confirm that the generated Wikinflection corpus is not of such quality as UniMorph, but we were able to extract a significant amount of words from the intersection of the two corpora. Our Wikinflection corpus benefits from the morpheme segmentations of Wiktionary/Wikinflection and from the manually-evaluated morphological feature tags of the UniMorph project, and has 216K lemmas and 5.4M word forms, in a total of 68 languages.</abstract>
       <url hash="6975de76">2020.lrec-1.481</url>
@@ -5919,16 +5919,16 @@
     </paper>
     <paper id="483">
       <title><fixed-case>U</fixed-case>ni<fixed-case>M</fixed-case>orph 3.0: <fixed-case>U</fixed-case>niversal <fixed-case>M</fixed-case>orphology</title>
-      <author><first>Arya D.</first><last>McCarthy</last></author>
+      <author id="arya-d-mccarthy"><first>Arya D.</first><last>McCarthy</last></author>
       <author><first>Christo</first><last>Kirov</last></author>
       <author><first>Matteo</first><last>Grella</last></author>
       <author><first>Amrit</first><last>Nidhi</last></author>
       <author><first>Patrick</first><last>Xia</last></author>
       <author><first>Kyle</first><last>Gorman</last></author>
       <author><first>Ekaterina</first><last>Vylomova</last></author>
-      <author><first>Sabrina J.</first><last>Mielke</last></author>
+      <author id="sabrina-j-mielke"><first>Sabrina J.</first><last>Mielke</last></author>
       <author><first>Garrett</first><last>Nicolai</last></author>
-      <author><first>Miikka</first><last>Silfverberg</last></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last></author>
       <author><first>Timofey</first><last>Arkhangelskiy</last></author>
       <author><first>Nataly</first><last>Krizhanovsky</last></author>
       <author><first>Andrew</first><last>Krizhanovsky</last></author>
@@ -5937,7 +5937,7 @@
       <author><first>John</first><last>Mansfield</last></author>
       <author><first>Valts</first><last>Ernštreits</last></author>
       <author><first>Yuval</first><last>Pinter</last></author>
-      <author><first>Cassandra L.</first><last>Jacobs</last></author>
+      <author id="cassandra-l-jacobs"><first>Cassandra L.</first><last>Jacobs</last></author>
       <author><first>Ryan</first><last>Cotterell</last></author>
       <author><first>Mans</first><last>Hulden</last></author>
       <author><first>David</first><last>Yarowsky</last></author>
@@ -5994,7 +5994,7 @@
       <title>Fine-grained Morphosyntactic Analysis and Generation Tools for More Than One Thousand Languages</title>
       <author><first>Garrett</first><last>Nicolai</last></author>
       <author><first>Dylan</first><last>Lewis</last></author>
-      <author><first>Arya D.</first><last>McCarthy</last></author>
+      <author id="arya-d-mccarthy"><first>Arya D.</first><last>McCarthy</last></author>
       <author><first>Aaron</first><last>Mueller</last></author>
       <author><first>Winston</first><last>Wu</last></author>
       <author><first>David</first><last>Yarowsky</last></author>
@@ -6010,7 +6010,7 @@
       <author><first>Injy</first><last>Hamed</last></author>
       <author><first>Slim</first><last>Abdennadher</last></author>
       <author><first>Ngoc Thang</first><last>Vu</last></author>
-      <author><first>Özlem</first><last>Çetinoğlu</last></author>
+      <author id="ozlem-cetinoglu"><first>Özlem</first><last>Çetinoğlu</last></author>
       <pages>3973–3977</pages>
       <abstract>Code-switching has become a prevalent phenomenon across many communities. It poses a challenge to NLP researchers, mainly due to the lack of available data needed for training and testing applications. In this paper, we introduce a new resource: a corpus of Egyptian- Arabic code-switch speech data that is fully tokenized, lemmatized and annotated for part-of-speech tags. Beside the corpus itself, we provide annotation guidelines to address the unique challenges of annotating code-switch data. Another challenge that we address is the fact that Egyptian Arabic orthography and grammar are not standardized.</abstract>
       <url hash="c8f147c7">2020.lrec-1.489</url>
@@ -6052,11 +6052,11 @@
       <title>Morphological Segmentation for Low Resource Languages</title>
       <author><first>Justin</first><last>Mott</last></author>
       <author><first>Ann</first><last>Bies</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <author><first>Jordan</first><last>Kodner</last></author>
       <author><first>Caitlin</first><last>Richter</last></author>
       <author><first>Hongzhi</first><last>Xu</last></author>
-      <author><first>Mitchell</first><last>Marcus</last></author>
+      <author id="mitch-marcus"><first>Mitchell</first><last>Marcus</last></author>
       <pages>3996–4002</pages>
       <abstract>This paper describes a new morphology resource created by Linguistic Data Consortium and the University of Pennsylvania for the DARPA LORELEI Program. The data consists of approximately 2000 tokens annotated for morphological segmentation in each of 9 low resource languages, along with root information for 7 of the languages. The languages annotated show a broad diversity of typological features. A minimal annotation scheme for segmentation was developed such that it could capture the patterns of a wide range of languages and also be performed reliably by non-linguist annotators. The basic annotation guidelines were designed to be language-independent, but included language-specific morphological paradigms and other specifications. The resulting annotated corpus is designed to support and stimulate the development of unsupervised morphological segmenters and analyzers by providing a gold standard for their evaluation on a more typologically diverse set of languages than has previously been available. By providing root annotation, this corpus is also a step toward supporting research in identifying richer morphological structures than simple morpheme boundaries.</abstract>
       <url hash="b06bffb9">2020.lrec-1.493</url>
@@ -6069,9 +6069,9 @@
       <author><first>Marie-Anne</first><last>Lachaux</last></author>
       <author><first>Alexis</first><last>Conneau</last></author>
       <author><first>Vishrav</first><last>Chaudhary</last></author>
-      <author><first>Francisco</first><last>Guzmán</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzmán</last></author>
       <author><first>Armand</first><last>Joulin</last></author>
-      <author><first>Edouard</first><last>Grave</last></author>
+      <author id="edouard-grave"><first>Edouard</first><last>Grave</last></author>
       <pages>4003–4012</pages>
       <abstract>Pre-training text representations have led to significant improvements in many areas of natural language processing. The quality of these models benefits greatly from the size of the pretraining corpora as long as its quality is preserved. In this paper, we describe an automatic pipeline to extract massive high-quality monolingual datasets from Common Crawl for a variety of languages. Our pipeline follows the data processing introduced in fastText (Mikolov et al., 2017; Grave et al., 2018), that deduplicates documents and identifies their language. We augment this pipeline with a filtering step to select documents that are close to high quality corpora like Wikipedia.</abstract>
       <url hash="e13947d3">2020.lrec-1.494</url>
@@ -6080,9 +6080,9 @@
     </paper>
     <paper id="495">
       <title>On the Robustness of Unsupervised and Semi-supervised Cross-lingual Word Embedding Learning</title>
-      <author><first>Yerai</first><last>Doval</last></author>
-      <author><first>Jose</first><last>Camacho-Collados</last></author>
-      <author><first>Luis</first><last>Espinosa Anke</last></author>
+      <author id="yerai-doval"><first>Yerai</first><last>Doval</last></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa Anke</last></author>
       <author><first>Steven</first><last>Schockaert</last></author>
       <pages>4013–4023</pages>
       <abstract>Cross-lingual word embeddings are vector representations of words in different languages where words with similar meaning are represented by similar vectors, regardless of the language. Recent developments which construct these embeddings by aligning monolingual spaces have shown that accurate alignments can be obtained with little or no supervision, which usually comes in the form of bilingual dictionaries. However, the focus has been on a particular controlled scenario for evaluation, and there is no strong evidence on how current state-of-the-art systems would fare with noisy text or for language pairs with major linguistic differences. In this paper we present an extensive evaluation over multiple cross-lingual embedding models, analyzing their strengths and limitations with respect to different variables such as target language, training corpora and amount of supervision. Our conclusions put in doubt the view that high-quality cross-lingual embeddings can always be learned without much supervision.</abstract>
@@ -6106,14 +6106,14 @@
     <paper id="497">
       <title><fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies v2: An Evergrowing Multilingual Treebank Collection</title>
       <author><first>Joakim</first><last>Nivre</last></author>
-      <author><first>Marie-Catherine</first><last>de Marneffe</last></author>
+      <author id="marie-catherine-de-marneffe"><first>Marie-Catherine</first><last>de Marneffe</last></author>
       <author><first>Filip</first><last>Ginter</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <author><first>Sampo</first><last>Pyysalo</last></author>
       <author><first>Sebastian</first><last>Schuster</last></author>
-      <author><first>Francis</first><last>Tyers</last></author>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <pages>4034–4043</pages>
       <abstract>Universal Dependencies is an open community effort to create cross-linguistically consistent treebank annotation for many languages within a dependency-based lexicalist framework. The annotation consists in a linguistically motivated word segmentation; a morphological layer comprising lemmas, universal part-of-speech tags, and standardized morphological features; and a syntactic layer focusing on syntactic relations between predicates, arguments and modifiers. In this paper, we describe version 2 of the universal guidelines (UD v2), discuss the major changes from UD v1 to UD v2, and give an overview of the currently available treebanks for 90 languages.</abstract>
       <url hash="dc5741ee">2020.lrec-1.497</url>
@@ -6123,7 +6123,7 @@
     <paper id="498">
       <title><fixed-case>EMPAC</fixed-case>: an <fixed-case>E</fixed-case>nglish–<fixed-case>S</fixed-case>panish Corpus of Institutional Subtitles</title>
       <author><first>Iris</first><last>Serrat Roozen</last></author>
-      <author><first>José Manuel</first><last>Martínez Martínez</last></author>
+      <author id="jose-manuel-martinez"><first>José Manuel</first><last>Martínez Martínez</last></author>
       <pages>4044–4053</pages>
       <abstract>The EuroparlTV Multimedia Parallel Corpus (EMPAC) is a collection of subtitles in English and Spanish for videos from the EuropeanParliament’s Multimedia Centre. The corpus has been compiled with the EMPAC toolkit. The aim of this corpus is to provide a resource to study institutional subtitling on the one hand, and, on the other hand, facilitate the analysis of web accessibility to institutional multimedia content. The corpus covers a time span from 2009 to 2017, it is made up of 4,000 texts amounting to two and half millions of tokens for every language, corresponding to approximately 280 hours of video. This paper provides 1) a review of related corpora; 2) a revision of typical compilation methodologies of subtitle corpora; 3) a detailed account of the corpus compilation methodology followed; and, 4) a description of the corpus. In the conclusion, the key findings are summarised regarding formal aspects of the subtitles conditioning the accessibility to the multimedia content of the EuroparlTV.</abstract>
       <url hash="344f3e01">2020.lrec-1.498</url>
@@ -6133,7 +6133,7 @@
     <paper id="499">
       <title>Cross-Lingual Word Embeddings for <fixed-case>T</fixed-case>urkic Languages</title>
       <author><first>Elmurod</first><last>Kuriyozov</last></author>
-      <author><first>Yerai</first><last>Doval</last></author>
+      <author id="yerai-doval"><first>Yerai</first><last>Doval</last></author>
       <author><first>Carlos</first><last>Gómez-Rodríguez</last></author>
       <pages>4054–4062</pages>
       <abstract>There has been an increasing interest in learning cross-lingual word embeddings to transfer knowledge obtained from a resource-rich language, such as English, to lower-resource languages for which annotated data is scarce, such as Turkish, Russian, and many others. In this paper, we present the first viability study of established techniques to align monolingual embedding spaces for Turkish, Uzbek, Azeri, Kazakh and Kyrgyz, members of the Turkic family which is heavily affected by the low-resource constraint. Those techniques are known to require little explicit supervision, mainly in the form of bilingual dictionaries, hence being easily adaptable to different domains, including low-resource ones. We obtain new bilingual dictionaries and new word embeddings for these languages and show the steps for obtaining cross-lingual word embeddings using state-of-the-art techniques. Then, we evaluate the results using the bilingual dictionary induction task. Our experiments confirm that the obtained bilingual dictionaries outperform previously-available ones, and that word embeddings from a low-resource language can benefit from resource-rich closely-related languages when they are aligned together. Furthermore, evaluation on an extrinsic task (Sentiment analysis on Uzbek) proves that monolingual word embeddings can, although slightly, benefit from cross-lingual alignments.</abstract>
@@ -6166,7 +6166,7 @@
     </paper>
     <paper id="502">
       <title><fixed-case>G</fixed-case>e<fixed-case>B</fixed-case>io<fixed-case>T</fixed-case>oolkit: Automatic Extraction of Gender-Balanced Multilingual Corpus of <fixed-case>W</fixed-case>ikipedia Biographies</title>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
       <author><first>Pau</first><last>Li Lin</last></author>
       <author><first>Cristina</first><last>España-Bonet</last></author>
       <pages>4081–4088</pages>
@@ -6271,8 +6271,8 @@
     </paper>
     <paper id="511">
       <title>Analysis of <fixed-case>G</fixed-case>lobal<fixed-case>P</fixed-case>hone and <fixed-case>E</fixed-case>thiopian Languages Speech Corpora for Multilingual <fixed-case>ASR</fixed-case></title>
-      <author><first>Martha Yifiru</first><last>Tachbelie</last></author>
-      <author><first>Solomon Teferra</first><last>Abate</last></author>
+      <author id="martha-yifiru-tachbelie"><first>Martha Yifiru</first><last>Tachbelie</last></author>
+      <author id="solomon-teferra-abate"><first>Solomon Teferra</first><last>Abate</last></author>
       <author><first>Tanja</first><last>Schultz</last></author>
       <pages>4152–4156</pages>
       <abstract>In this paper, we present the analysis of GlobalPhone (GP) and speech corpora of Ethiopian languages (Amharic, Tigrigna, Oromo and Wolaytta). The aim of the analysis is to select speech data from GP for the development of multilingual Automatic Speech Recognition (ASR) system for the Ethiopian languages. To this end, phonetic overlaps among GP and Ethiopian languages have been analyzed. The result of our analysis shows that there is much phonetic overlap among Ethiopian languages although they are from three different language families. From GP, Turkish, Uyghur and Croatian are found to have much overlap with the Ethiopian languages. On the other hand, Korean has less phonetic overlap with the rest of the languages. Moreover, morphological complexity of the GP and Ethiopian languages, reflected by type to token ration (TTR) and out of vocabulary (OOV) rate, has been analyzed. Both metrics indicated the morphological complexity of the languages. Korean and Amharic have been identified as extremely morphologically complex compared to the other languages. Tigrigna, Russian, Turkish, Polish, etc. are also among the morphologically complex languages.</abstract>
@@ -6292,8 +6292,8 @@
     </paper>
     <paper id="513">
       <title>Large Vocabulary Read Speech Corpora for Four <fixed-case>E</fixed-case>thiopian Languages: <fixed-case>A</fixed-case>mharic, <fixed-case>T</fixed-case>igrigna, <fixed-case>O</fixed-case>romo and <fixed-case>W</fixed-case>olaytta</title>
-      <author><first>Solomon Teferra</first><last>Abate</last></author>
-      <author><first>Martha Yifiru</first><last>Tachbelie</last></author>
+      <author id="solomon-teferra-abate"><first>Solomon Teferra</first><last>Abate</last></author>
+      <author id="martha-yifiru-tachbelie"><first>Martha Yifiru</first><last>Tachbelie</last></author>
       <author><first>Michael</first><last>Melese</last></author>
       <author><first>Hafte</first><last>Abera</last></author>
       <author><first>Tewodros</first><last>Abebe</last></author>
@@ -6301,7 +6301,7 @@
       <author><first>Yaregal</first><last>Assabie</last></author>
       <author><first>Million</first><last>Meshesha</last></author>
       <author><first>Solomon</first><last>Afnafu</last></author>
-      <author><first>Binyam Ephrem</first><last>Seyoum</last></author>
+      <author id="binyam-ephrem-seyoum"><first>Binyam Ephrem</first><last>Seyoum</last></author>
       <pages>4167–4171</pages>
       <abstract>Automatic Speech Recognition (ASR) is one of the most important technologies to support spoken communication in modern life. However, its development benefits from large speech corpus. The development of such a corpus is expensive and most of the human languages, including the Ethiopian languages, do not have such resources. To address this problem, we have developed four large (about 22 hours) speech corpora for four Ethiopian languages: Amharic, Tigrigna, Oromo and Wolaytta. To assess usability of the corpora for (the purpose of) speech processing, we have developed ASR systems for each language. In this paper, we present the corpora and the baseline ASR systems we have developed. We have achieved word error rates (WERs) of 37.65%, 31.03%, 38.02%, 33.89% for Amharic, Tigrigna, Oromo and Wolaytta, respectively. This results show that the corpora are suitable for further investigation towards the development of ASR systems. Thus, the research community can use the corpora to further improve speech processing systems. From our results, it is clear that the collection of text corpora to train strong language models for all of the languages is still required, especially for Oromo and Wolaytta.</abstract>
       <url hash="dd412d53">2020.lrec-1.513</url>
@@ -6312,7 +6312,7 @@
       <title>Incorporating Politeness across Languages in Customer Care Responses: Towards building a Multi-lingual Empathetic Dialogue Agent</title>
       <author><first>Mauajama</first><last>Firdaus</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>4172–4182</pages>
       <abstract>Customer satisfaction is an essential aspect of customer care systems. It is imperative for such systems to be polite while handling customer requests/demands. In this paper, we present a large multi-lingual conversational dataset for English and Hindi. We choose data from Twitter having both generic and courteous responses between customer care agents and aggrieved users. We also propose strong baselines that can induce courteous behaviour in generic customer care response in a multi-lingual scenario. We build a deep learning framework that can simultaneously handle different languages and incorporate polite behaviour in the customer care agent’s responses. Our system is competent in generating responses in different languages (here, English and Hindi) depending on the customer’s preference and also is able to converse with humans in an empathetic manner to ensure customer satisfaction and retention. Experimental results show that our proposed models can converse in both the languages and the information shared between the languages helps in improving the performance of the overall system. Qualitative and quantitative analysis shows that the proposed method can converse in an empathetic manner by incorporating courteousness in the responses and hence increasing customer satisfaction.</abstract>
       <url hash="24283e6b">2020.lrec-1.514</url>
@@ -6323,7 +6323,7 @@
       <title><fixed-case>W</fixed-case>iki<fixed-case>B</fixed-case>ank: Using <fixed-case>W</fixed-case>ikidata to Improve Multilingual Frame-Semantic Parsing</title>
       <author><first>Cezar</first><last>Sas</last></author>
       <author><first>Meriem</first><last>Beloucif</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>4183–4189</pages>
       <abstract>Frame-semantic annotations exist for a tiny fraction of the world’s languages, Wikidata, however, links knowledge base triples to texts in many languages, providing a common, distant supervision signal for semantic parsers. We present WikiBank, a multilingual resource of partial semantic structures that can be used to extend pre-existing resources rather than creating new man-made resources from scratch. We also integrate this form of supervision into an off-the-shelf frame-semantic parser and allow cross-lingual transfer. Using Google’s Sling architecture, we show significant improvements on the English and Spanish CoNLL 2009 datasets, whether training on the full available datasets or small subsamples thereof.</abstract>
       <url hash="a73030d0">2020.lrec-1.515</url>
@@ -6334,7 +6334,7 @@
       <title>Multilingual Corpus Creation for Multilingual Semantic Similarity Task</title>
       <author><first>Mahtab</first><last>Ahmed</last></author>
       <author><first>Chahna</first><last>Dixit</last></author>
-      <author><first>Robert E.</first><last>Mercer</last></author>
+      <author id="robert-e-mercer"><first>Robert E.</first><last>Mercer</last></author>
       <author><first>Atif</first><last>Khan</last></author>
       <author><first>Muhammad Rifayat</first><last>Samee</last></author>
       <author><first>Felipe</first><last>Urra</last></author>
@@ -6388,7 +6388,7 @@
       <author><first>Michael</first><last>Henretty</last></author>
       <author><first>Reuben</first><last>Morais</last></author>
       <author><first>Lindsay</first><last>Saunders</last></author>
-      <author><first>Francis</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last></author>
       <author><first>Gregor</first><last>Weber</last></author>
       <pages>4218–4222</pages>
       <abstract>The Common Voice corpus is a massively-multilingual collection of transcribed speech intended for speech technology research and development. Common Voice is designed for Automatic Speech Recognition purposes but can be useful in other domains (e.g. language identification). To achieve scale and sustainability, the Common Voice project employs crowdsourcing for both data collection and data validation. The most recent release includes 29 languages, and as of November 2019 there are a total of 38 languages collecting data. Over 50,000 individuals have participated so far, resulting in 2,500 hours of collected audio. To our knowledge this is the largest audio corpus in the public domain for speech recognition, both in terms of number of hours and number of languages. As an example use case for Common Voice, we present speech recognition experiments using Mozilla’s DeepSpeech Speech-to-Text toolkit. By applying transfer learning from a source English model, we find an average Character Error Rate improvement of 5.99 ± 5.48 for twelve target languages (German, French, Italian, Turkish, Catalan, Slovenian, Welsh, Irish, Breton, Tatar, Chuvash, and Kabyle). For most of these languages, these are the first ever published results on end-to-end Automatic Speech Recognition.</abstract>
@@ -6404,7 +6404,7 @@
       <author><first>Yeonju</first><last>Lee-Sikka</last></author>
       <author><first>Sean</first><last>Miller</last></author>
       <author><first>Alan</first><last>Wong</last></author>
-      <author><first>Arya D.</first><last>McCarthy</last></author>
+      <author id="arya-d-mccarthy"><first>Arya D.</first><last>McCarthy</last></author>
       <author><first>Kyle</first><last>Gorman</last></author>
       <pages>4223–4228</pages>
       <abstract>We introduce WikiPron, an open-source command-line tool for extracting pronunciation data from Wiktionary, a collaborative multilingual online dictionary. We first describe the design and use of WikiPron. We then discuss the challenges faced scaling this tool to create an automatically-generated database of 1.7 million pronunciations from 165 languages. Finally, we validate the pronunciation database by using it to train and evaluating a collection of generic grapheme-to-phoneme models. The software, pronunciation data, and models are all made available under permissive open-source licenses.</abstract>
@@ -6445,7 +6445,7 @@
       <author><first>Aleksandr</first><last>Khakhmovich</last></author>
       <author><first>Svetlana</first><last>Pavlova</last></author>
       <author><first>Kira</first><last>Kirillova</last></author>
-      <author><first>Nikolay</first><last>Arefyev</last></author>
+      <author id="nikolay-arefyev"><first>Nikolay</first><last>Arefyev</last></author>
       <author><first>Ekaterina</first><last>Savilova</last></author>
       <pages>4247–4255</pages>
       <abstract>Out-of-vocabulary words are still a challenge in cross-lingual Natural Language Processing tasks, for which transliteration from source to target language or script is one of the solutions. In this study, we collect a personal name dataset in 445 Wikidata languages (37 scripts), train Transformer-based multilingual transliteration models on 6 high- and 4 less-resourced languages, compare them with bilingual models from (Merhav and Ash, 2018) and determine that multilingual models perform better for less-resourced languages. We discover that intrinsic evaluation, i.e comparison to a single gold standard, might not be appropriate in the task of transliteration due to its high variability. For this reason, we propose using extrinsic evaluation of transliteration via the cross-lingual named entity list search task (e.g. personal name search in contacts list). Our code and datasets are publicly available online.</abstract>
@@ -6457,7 +6457,7 @@
       <title>Serial Speakers: a Dataset of <fixed-case>TV</fixed-case> Series</title>
       <author><first>Xavier</first><last>Bost</last></author>
       <author><first>Vincent</first><last>Labatut</last></author>
-      <author><first>Georges</first><last>Linares</last></author>
+      <author id="georges-linares"><first>Georges</first><last>Linares</last></author>
       <pages>4256–4264</pages>
       <abstract>For over a decade, TV series have been drawing increasing interest, both from the audience and from various academic fields. But while most viewers are hooked on the continuous plots of TV serials, the few annotated datasets available to researchers focus on standalone episodes of classical TV series. We aim at filling this gap by providing the multimedia/speech processing communities with “Serial Speakers”, an annotated dataset of 155 episodes from three popular American TV serials: “Breaking Bad”, “Game of Thrones” and “House of Cards”. “Serial Speakers” is suitable both for investigating multimedia retrieval in realistic use case scenarios, and for addressing lower level speech related tasks in especially challenging conditions. We publicly release annotations for every speech turn (boundaries, speaker) and scene boundary, along with annotations for shot boundaries, recurring shots, and interacting speakers in a subset of episodes. Because of copyright restrictions, the textual content of the speech turns is encrypted in the public version of the dataset, but we provide the users with a simple online tool to recover the plain text from their own subtitle files.</abstract>
       <url hash="7893f736">2020.lrec-1.525</url>
@@ -6512,7 +6512,7 @@
       <author><first>Nicolas</first><last>Hernandez</last></author>
       <author><first>Richard</first><last>Dufour</last></author>
       <author><first>Delphine</first><last>Charlet</last></author>
-      <author><first>Geraldine</first><last>Damnati</last></author>
+      <author id="geraldine-damnati"><first>Geraldine</first><last>Damnati</last></author>
       <author><first>Solen</first><last>Quiniou</last></author>
       <author><first>Nathalie</first><last>Camelin</last></author>
       <pages>4293–4301</pages>
@@ -6556,7 +6556,7 @@
     </paper>
     <paper id="533">
       <title><fixed-case>E</fixed-case>:Calm Resource: a Resource for Studying Texts Produced by <fixed-case>F</fixed-case>rench Pupils and Students</title>
-      <author><first>Lydia-Mai</first><last>Ho-Dac</last></author>
+      <author id="lydia-mai-ho-dac"><first>Lydia-Mai</first><last>Ho-Dac</last></author>
       <author><first>Serge</first><last>Fleury</last></author>
       <author><first>Claude</first><last>Ponton</last></author>
       <pages>4327–4332</pages>
@@ -6568,7 +6568,7 @@
     <paper id="534">
       <title>Introducing <fixed-case>MULAI</fixed-case>: A Multimodal Database of Laughter during Dyadic Interactions</title>
       <author><first>Michel-Pierre</first><last>Jansen</last></author>
-      <author><first>Khiet P.</first><last>Truong</last></author>
+      <author id="khiet-p-truong"><first>Khiet P.</first><last>Truong</last></author>
       <author><first>Dirk K.J.</first><last>Heylen</last></author>
       <author><first>Deniece S.</first><last>Nazareth</last></author>
       <pages>4333–4342</pages>
@@ -6580,7 +6580,7 @@
     <paper id="535">
       <title>The Connection between the Text and Images of News Articles: New Insights for Multimedia Analysis</title>
       <author><first>Nelleke</first><last>Oostdijk</last></author>
-      <author><first>Hans</first><last>van Halteren</last></author>
+      <author id="hans-van-halteren"><first>Hans</first><last>van Halteren</last></author>
       <author><first>Erkan</first><last>Bașar</last></author>
       <author><first>Martha</first><last>Larson</last></author>
       <pages>4343–4351</pages>
@@ -6597,7 +6597,7 @@
       <author><first>Cristina</first><last>Noujaim</last></author>
       <author><first>Ruoyao</first><last>Wang</last></author>
       <author><first>Jia</first><last>Deng</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>4352–4358</pages>
       <abstract>We introduce LifeQA, a benchmark dataset for video question answering that focuses on day-to-day real-life situations. Current video question answering datasets consist of movies and TV shows. However, it is well-known that these visual domains are not representative of our day-to-day lives. Movies and TV shows, for example, benefit from professional camera movements, clean editing, crisp audio recordings, and scripted dialog between professional actors. While these domains provide a large amount of data for training models, their properties make them unsuitable for testing real-life question answering systems. Our dataset, by contrast, consists of video clips that represent only real-life scenarios. We collect 275 such video clips and over 2.3k multiple-choice questions. In this paper, we analyze the challenging but realistic aspects of LifeQA, and we apply several state-of-the-art video question answering models to provide benchmarks for future research. The full dataset is publicly available at <url>https://lit.eecs.umich.edu/lifeqa/</url>.</abstract>
       <url hash="099d32f9">2020.lrec-1.536</url>
@@ -6609,7 +6609,7 @@
       <author><first>Julia</first><last>Bettinger</last></author>
       <author><first>Anna</first><last>Hätty</last></author>
       <author><first>Michael</first><last>Dorna</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>4359–4367</pages>
       <abstract>We present a dataset with difficulty ratings for 1,030 German closed noun compounds extracted from domain-specific texts for do-it-ourself (DIY), cooking and automotive. The dataset includes two-part compounds for cooking and DIY, and two- to four-part compounds for automotive. The compounds were identified in text using the Simple Compound Splitter (Weller-Di Marco, 2017); a subset was filtered and balanced for frequency and productivity criteria as basis for manual annotation and fine-grained interpretation. This study presents the creation, the final dataset with ratings from 20 annotators and statistics over the dataset, to provide insight into the perception of domain-specific term difficulty. It is particularly striking that annotators agree on a coarse, binary distinction between easy vs. difficult domain-specific compounds but that a more fine grained distinction of difficulty is not meaningful. We finally discuss the challenges of an annotation for difficulty, which includes both the task description as well as the selection of the data basis.</abstract>
       <url hash="a14cd50e">2020.lrec-1.537</url>
@@ -6621,7 +6621,7 @@
       <author><first>Yana</first><last>Strakatova</last></author>
       <author><first>Neele</first><last>Falk</last></author>
       <author><first>Isabel</first><last>Fuhrmann</last></author>
-      <author><first>Erhard</first><last>Hinrichs</last></author>
+      <author id="erhard-hinrichs"><first>Erhard</first><last>Hinrichs</last></author>
       <author><first>Daniela</first><last>Rossmann</last></author>
       <pages>4368–4378</pages>
       <abstract>In this paper we present the GerCo dataset of adjective-noun collocations for German, such as alter Freund ‘old friend’ and tiefe Liebe ‘deep love’. The annotation has been performed by experts based on the annotation scheme introduced in this paper. The resulting dataset contains 4,732 positive and negative instances of collocations and covers all the 16 semantic classes of adjectives as defined in the German wordnet GermaNet. The dataset can serve as a reliable empirical basis for comparing different theoretical frameworks concerned with collocations or as material for data-driven approaches to the studies of collocations including different machine learning experiments. This paper addresses the latter issue by using the GerCo dataset for evaluating different models on the task of automatic collocation identification. We compare lexical association measures with static and contextualized word embeddings. The experiments show that word embeddings outperform methods based on statistical association measures by a wide margin.</abstract>
@@ -6632,7 +6632,7 @@
     <paper id="539">
       <title>Variants of Vector Space Reductions for Predicting the Compositionality of <fixed-case>E</fixed-case>nglish Noun Compounds</title>
       <author><first>Pegah</first><last>Alipoor</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>4379–4387</pages>
       <abstract>Predicting the degree of compositionality of noun compounds such as “snowball” and “butterfly” is a crucial ingredient for lexicography and Natural Language Processing applications, to know whether the compound should be treated as a whole, or through its constituents, and what it means. Computational approaches for an automatic prediction typically represent and compare compounds and their constituents within a vector space and use distributional similarity as a proxy to predict the semantic relatedness between the compounds and their constituents as the compound’s degree of compositionality. This paper provides a systematic evaluation of vector-space reduction variants across kinds, exploring reductions based on part-of-speech next to and also in combination with Principal Components Analysis using Singular Value and word2vec embeddings. We show that word2vec and nouns only dimensionality reductions are the most successful and stable vector space variants for our task.</abstract>
       <url hash="53b654b3">2020.lrec-1.539</url>
@@ -6643,7 +6643,7 @@
       <title>Varying Vector Representations and Integrating Meaning Shifts into a <fixed-case>P</fixed-case>age<fixed-case>R</fixed-case>ank Model for Automatic Term Extraction</title>
       <author><first>Anurag</first><last>Nigam</last></author>
       <author><first>Anna</first><last>Hätty</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>4388–4394</pages>
       <abstract>We perform a comparative study for automatic term extraction from domain-specific language using a PageRank model with different edge-weighting methods. We vary vector space representations within the PageRank graph algorithm, and we go beyond standard co-occurrence and investigate the influence of measures of association strength and first- vs. second-order co-occurrence. In addition, we incorporate meaning shifts from general to domain-specific language as personalized vectors, in order to distinguish between termhood strengths of ambiguous words across word senses. Our study is performed for two domain-specific English corpora: ACL and do-it-yourself (DIY); and a domain-specific German corpus: cooking. The models are assessed by applying average precision and the roc score as evaluation metrices.</abstract>
       <url hash="ac3f89c1">2020.lrec-1.540</url>
@@ -6655,8 +6655,8 @@
       <author><first>Karën</first><last>Fort</last></author>
       <author><first>Bruno</first><last>Guillaume</last></author>
       <author><first>Yann-Alan</first><last>Pilatte</last></author>
-      <author><first>Mathieu</first><last>Constant</last></author>
-      <author><first>Nicolas</first><last>Lefèbvre</last></author>
+      <author id="matthieu-constant"><first>Mathieu</first><last>Constant</last></author>
+      <author id="nicolas-lefebvre"><first>Nicolas</first><last>Lefèbvre</last></author>
       <pages>4395–4401</pages>
       <abstract>We present here Rigor Mortis, a gamified crowdsourcing platform designed to evaluate the intuition of the speakers, then train them to annotate multi-word expressions (MWEs) in French corpora. We previously showed that the speakers’ intuition is reasonably good (65% in recall on non-fixed MWE). We detail here the annotation results, after a training phase using some of the tests developed in the PARSEME-FR project.</abstract>
       <url hash="57f7e930">2020.lrec-1.541</url>
@@ -6668,7 +6668,7 @@
       <author><first>Murathan</first><last>Kurfalı</last></author>
       <author><first>Robert</first><last>Östling</last></author>
       <author><first>Johan</first><last>Sjons</last></author>
-      <author><first>Mats</first><last>Wirén</last></author>
+      <author id="mats-wiren"><first>Mats</first><last>Wirén</last></author>
       <pages>4402–4409</pages>
       <abstract>We present a new set of 96 Swedish multi-word expressions annotated with degree of (non-)compositionality. In contrast to most previous compositionality datasets we also consider syntactically complex constructions and publish a formal specification of each expression. This allows evaluation of computational models beyond word bigrams, which have so far been the norm. Finally, we use the annotations to evaluate a system for automatic compositionality estimation based on distributional semantics. Our analysis of the disagreements between human annotators and the distributional model reveal interesting questions related to the perception of compositionality, and should be informative to future work in the area.</abstract>
       <url hash="23371c56">2020.lrec-1.542</url>
@@ -6689,8 +6689,8 @@
     <paper id="544">
       <title>Dedicated Language Resources for Interdisciplinary Research on Multiword Expressions: Best Thing since Sliced Bread</title>
       <author><first>Ferdy</first><last>Hubers</last></author>
-      <author><first>Catia</first><last>Cucchiarini</last></author>
-      <author><first>Helmer</first><last>Strik</last></author>
+      <author id="catia-cucchiarini"><first>Catia</first><last>Cucchiarini</last></author>
+      <author id="helmer-strik"><first>Helmer</first><last>Strik</last></author>
       <pages>4418–4425</pages>
       <abstract>Multiword expressions such as idioms (beat about the bush), collocations (plastic surgery) and lexical bundles (in the middle of) are challenging for disciplines like Natural Language Processing (NLP), psycholinguistics and second language acquisition, , due to their more or less fixed character. Idiomatic expressions are especially problematic, because they convey a figurative meaning that cannot always be inferred from the literal meanings of the component words. Researchers acknowledge that important properties that characterize idioms such as frequency of exposure, familiarity, transparency, and imageability, should be taken into account in research, but these are typically properties that rely on subjective judgments. This is probably one of the reasons why many studies that investigated idiomatic expressions collected limited information about idiom properties for very small numbers of idioms only. In this paper we report on cross-boundary work aimed at developing a set of tools and language resources that are considered crucial for this kind of multifaceted research. We discuss the results of our research and suggest possible avenues for future research</abstract>
       <url hash="f5fd1c7d">2020.lrec-1.544</url>
@@ -6710,7 +6710,7 @@
     </paper>
     <paper id="546">
       <title>Introducing <fixed-case>RONEC</fixed-case> - the <fixed-case>R</fixed-case>omanian Named Entity Corpus</title>
-      <author><first>Stefan Daniel</first><last>Dumitrescu</last></author>
+      <author id="stefan-daniel-dumitrescu"><first>Stefan Daniel</first><last>Dumitrescu</last></author>
       <author><first>Andrei-Marius</first><last>Avram</last></author>
       <pages>4436–4443</pages>
       <abstract>We present RONEC - the Named Entity Corpus for the Romanian language. The corpus contains over 26000 entities in ~5000 annotated sentences, belonging to 16 distinct classes. The sentences have been extracted from a copy-right free newspaper, covering several styles. This corpus represents the first initiative in the Romanian language space specifically targeted for named entity recognition. It is available in BRAT and CoNLL-U Plus formats, and it is free to use and extend at github.com/dumitrescustefan/ronec</abstract>
@@ -6766,7 +6766,7 @@
       <title>A Dataset of <fixed-case>G</fixed-case>erman Legal Documents for Named Entity Recognition</title>
       <author><first>Elena</first><last>Leitner</last></author>
       <author><first>Georg</first><last>Rehm</last></author>
-      <author><first>Julian</first><last>Moreno-Schneider</last></author>
+      <author id="julian-moreno-schneider"><first>Julian</first><last>Moreno-Schneider</last></author>
       <pages>4478–4485</pages>
       <abstract>We describe a dataset developed for Named Entity Recognition in German federal court decisions. It consists of approx. 67,000 sentences with over 2 million tokens. The resource contains 54,000 manually annotated entities, mapped to 19 fine-grained semantic classes: person, judge, lawyer, country, city, street, landscape, organization, company, institution, court, brand, law, ordinance, European legal norm, regulation, contract, court decision, and legal literature. The legal documents were, furthermore, automatically annotated with more than 35,000 TimeML-based time expressions. The dataset, which is available under a CC-BY 4.0 license in the CoNNL-2002 format, was developed for training an NER service for German legal documents in the EU project Lynx.</abstract>
       <url hash="c50ed7e1">2020.lrec-1.551</url>
@@ -6787,7 +6787,7 @@
     <paper id="553">
       <title>Named Entities in Medical Case Reports: Corpus and Experiments</title>
       <author><first>Sarah</first><last>Schulz</last></author>
-      <author><first>Jurica</first><last>Ševa</last></author>
+      <author id="jurica-seva"><first>Jurica</first><last>Ševa</last></author>
       <author><first>Samuel</first><last>Rodriguez</last></author>
       <author><first>Malte</first><last>Ostendorff</last></author>
       <author><first>Georg</first><last>Rehm</last></author>
@@ -6820,7 +6820,7 @@
     <paper id="556">
       <title>Where are we in Named Entity Recognition from Speech?</title>
       <author><first>Antoine</first><last>Caubrière</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <author><first>Yannick</first><last>Estève</last></author>
       <author><first>Antoine</first><last>Laurent</last></author>
       <author><first>Emmanuel</first><last>Morin</last></author>
@@ -6848,7 +6848,7 @@
       <author><first>Hannah</first><last>Smith</last></author>
       <author><first>Zeyu</first><last>Zhang</last></author>
       <author><first>John</first><last>Culnan</last></author>
-      <author><first>Peter</first><last>Jansen</last></author>
+      <author id="peter-jansen"><first>Peter</first><last>Jansen</last></author>
       <pages>4529–4546</pages>
       <abstract>Named entity recognition identifies common classes of entities in text, but these entity labels are generally sparse, limiting utility to downstream tasks. In this work we present ScienceExamCER, a densely-labeled semantic classification corpus of 133k mentions in the science exam domain where nearly all (96%) of content words have been annotated with one or more fine-grained semantic class labels including taxonomic groups, meronym groups, verb/action groups, properties and values, and synonyms. Semantic class labels are drawn from a manually-constructed fine-grained typology of 601 classes generated through a data-driven analysis of 4,239 science exam questions. We show an off-the-shelf BERT-based named entity recognition model modified for multi-label classification achieves an accuracy of 0.85 F1 on this task, suggesting strong utility for downstream tasks in science domain question answering requiring densely-labeled semantic classification.</abstract>
       <url hash="be9926e1">2020.lrec-1.558</url>
@@ -6860,7 +6860,7 @@
       <author><first>Fredrik</first><last>Jørgensen</last></author>
       <author><first>Tobias</first><last>Aasmoe</last></author>
       <author><first>Anne-Stine</first><last>Ruud Husevåg</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <author><first>Erik</first><last>Velldal</last></author>
       <pages>4547–4556</pages>
       <abstract>This paper presents NorNE, a manually annotated corpus of named entities which extends the annotation of the existing Norwegian Dependency Treebank. Comprising both of the official standards of written Norwegian (Bokmål and Nynorsk), the corpus contains around 600,000 tokens and annotates a rich set of entity types including persons, organizations, locations, geo-political entities, products, and events, in addition to a class corresponding to nominals derived from names. We here present details on the annotation effort, guidelines, inter-annotator agreement and an experimental analysis of the corpus using a neural sequence labeling architecture.</abstract>
@@ -6938,7 +6938,7 @@
       <author><first>Maria</first><last>Barrett</last></author>
       <author><first>Christina</first><last>Rosted</last></author>
       <author><first>Lasse Malm</first><last>Lidegaard</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>4597–4604</pages>
       <abstract>We present a named entity annotation for the Danish Universal Dependencies treebank using the CoNLL-2003 annotation scheme: DaNE. It is the largest publicly available, Danish named entity gold annotation. We evaluate the quality of our annotations intrinsically by double annotating the entire treebank and extrinsically by comparing our annotations to a recently released named entity annotation of the validation and test sections of the Danish Universal Dependencies treebank. We benchmark the new resource by training and evaluating competitive architectures for supervised named entity recognition (NER), including FLAIR, monolingual (Danish) BERT and multilingual BERT. We explore cross-lingual transfer in multilingual BERT from five related languages in zero-shot and direct transfer setups, and we show that even with our modestly-sized training set, we improve Danish NER over a recent cross-lingual approach, as well as over zero-shot transfer from five related languages. Using multilingual BERT, we achieve higher performance by fine-tuning on both DaNE and a larger Bokmål (Norwegian) training set compared to only using DaNE. However, the highest performance isachieved by using a Danish BERT fine-tuned on DaNE. Our dataset enables improvements and applicability for Danish NER beyond cross-lingual methods. We employ a thorough error analysis of the predictions of the best models for seen and unseen entities, as well as their robustness on un-capitalized text. The annotated dataset and all the trained models are made publicly available.</abstract>
       <url hash="2a1d9ad3">2020.lrec-1.565</url>
@@ -6975,7 +6975,7 @@
       <author><first>Joaquim</first><last>Santos</last></author>
       <author><first>Diogo</first><last>Gomes</last></author>
       <author><first>Fabio</first><last>Cordeiro</last></author>
-      <author><first>Renata</first><last>Vieira</last></author>
+      <author id="renata-vieira"><first>Renata</first><last>Vieira</last></author>
       <author><first>Viviane</first><last>Moreira</last></author>
       <pages>4625–4630</pages>
       <abstract>This work focuses on Portuguese Named Entity Recognition (NER) in the Geology domain. The only domain-specific dataset in the Portuguese language annotated for NER is the GeoCorpus. Our approach relies on BiLSTM-CRF neural networks (a widely used type of network for this area of research) that use vector and tensor embedding representations. Three types of embedding models were used (Word Embeddings, Flair Embeddings, and Stacked Embeddings) under two versions (domain-specific and generalized). The domain specific Flair Embeddings model was originally trained with a generalized context in mind, but was then fine-tuned with domain-specific Oil and Gas corpora, as there simply was not enough domain corpora to properly train such a model. Each of these embeddings was evaluated separately, as well as stacked with another embedding. Finally, we achieved state-of-the-art results for this domain with one of our embeddings, and we performed an error analysis on the language model that achieved the best results. Furthermore, we investigated the effects of domain-specific versus generalized embeddings.</abstract>
@@ -6985,11 +6985,11 @@
     </paper>
     <paper id="569">
       <title>Establishing a New State-of-the-Art for <fixed-case>F</fixed-case>rench Named Entity Recognition</title>
-      <author><first>Pedro Javier</first><last>Ortiz Suárez</last></author>
+      <author id="pedro-ortiz-suarez"><first>Pedro Javier</first><last>Ortiz Suárez</last></author>
       <author><first>Yoann</first><last>Dupont</last></author>
       <author><first>Benjamin</first><last>Muller</last></author>
-      <author><first>Laurent</first><last>Romary</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <pages>4631–4638</pages>
       <abstract>The French TreeBank developed at the University Paris 7 is the main source of morphosyntactic and syntactic annotations for French. However, it does not include explicit information related to named entities, which are among the most useful information for several natural language processing tasks and applications. Moreover, no large-scale French corpus with named entity annotations contain referential information, which complement the type and the span of each mention with an indication of the entity it refers to. We have manually annotated the French TreeBank with such information, after an automatic pre-annotation step. We sketch the underlying annotation guidelines and we provide a few figures about the resulting annotations.</abstract>
       <url hash="a610a85a">2020.lrec-1.569</url>
@@ -7012,7 +7012,7 @@
       <author><first>Iva</first><last>Marinova</last></author>
       <author><first>Laska</first><last>Laskova</last></author>
       <author><first>Petya</first><last>Osenova</last></author>
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <author><first>Alexander</first><last>Popov</last></author>
       <pages>4647–4652</pages>
       <abstract>The paper reports on the usage of deep learning methods for improving a Named Entity Recognition (NER) training corpus and for predicting and annotating new types in a test corpus. We show how the annotations in a type-based corpus of named entities (NE) were populated as occurrences within it, thus ensuring density of the training information. A deep learning model was adopted for discovering inconsistencies in the initial annotation and for learning new NE types. The evaluation results get improved after data curation, randomization and deduplication.</abstract>
@@ -7083,8 +7083,8 @@
     <paper id="577">
       <title>Controllable Sentence Simplification</title>
       <author><first>Louis</first><last>Martin</last></author>
-      <author><first>Éric</first><last>de la Clergerie</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Éric</first><last>de la Clergerie</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <author><first>Antoine</first><last>Bordes</last></author>
       <pages>4689–4698</pages>
       <abstract>Text simplification aims at making a text easier to read and understand by simplifying grammar and structure while keeping the underlying information identical. It is often considered an all-purpose generic task where the same simplification is suitable for all; however multiple audiences can benefit from simplified text in different ways. We adapt a discrete parametrization mechanism that provides explicit control on simplification systems based on Sequence-to-Sequence models. As a result, users can condition the simplifications returned by a model on attributes such as length, amount of paraphrasing, lexical complexity and syntactic complexity. We also show that carefully chosen values of these attributes allow out-of-the-box Sequence-to-Sequence models to outperform their standard counterparts on simplification benchmarks. Our model, which we call ACCESS (as shorthand for AudienCe-CEntric Sentence Simplification), establishes the state of the art at 41.87 SARI on the WikiLarge test set, a +1.42 improvement over the best previously reported score.</abstract>
@@ -7173,7 +7173,7 @@
       <title>Towards a Gold Standard for Evaluating <fixed-case>D</fixed-case>anish Word Embeddings</title>
       <author><first>Nina</first><last>Schneidermann</last></author>
       <author><first>Rasmus</first><last>Hvingelby</last></author>
-      <author><first>Bolette</first><last>Pedersen</last></author>
+      <author id="bolette-sandford-pedersen"><first>Bolette</first><last>Pedersen</last></author>
       <pages>4754–4763</pages>
       <abstract>This paper presents the process of compiling a model-agnostic similarity goal standard for evaluating Danish word embeddings based on human judgments made by 42 native speakers of Danish. Word embeddings resemble semantic similarity solely by distribution (meaning that word vectors do not reflect relatedness as differing from similarity), and we argue that this generalization poses a problem in most intrinsic evaluation scenarios. In order to be able to evaluate on both dimensions, our human-generated dataset is therefore designed to reflect the distinction between relatedness and similarity. The goal standard is applied for evaluating the “goodness” of six existing word embedding models for Danish, and it is discussed how a relatively low correlation can be explained by the fact that semantic similarity is substantially more challenging to model than relatedness, and that there seems to be a need for future human judgments to measure similarity in full context and along more than a single spectrum.</abstract>
       <url hash="3d563d99">2020.lrec-1.585</url>
@@ -7206,13 +7206,13 @@
     </paper>
     <paper id="588">
       <title>Give your Text Representation Models some Love: the Case for <fixed-case>B</fixed-case>asque</title>
-      <author><first>Rodrigo</first><last>Agerri</last></author>
+      <author id="rodrigo-agerri"><first>Rodrigo</first><last>Agerri</last></author>
       <author><first>Iñaki</first><last>San Vicente</last></author>
       <author><first>Jon Ander</first><last>Campos</last></author>
       <author><first>Ander</first><last>Barrena</last></author>
-      <author><first>Xabier</first><last>Saralegi</last></author>
-      <author><first>Aitor</first><last>Soroa</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="xabier-saralegi"><first>Xabier</first><last>Saralegi</last></author>
+      <author id="aitor-soroa"><first>Aitor</first><last>Soroa</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <pages>4781–4788</pages>
       <abstract>Word embeddings and pre-trained language models allow to build rich representations of text and have enabled improvements across most NLP tasks. Unfortunately they are very expensive to train, and many small companies and research groups tend to use models that have been pre-trained and made available by third parties, rather than building their own. This is suboptimal as, for many languages, the models have been trained on smaller (or lower quality) corpora. In addition, monolingual pre-trained models for non-English languages are not always available. At best, models for those languages are included in multilingual versions, where each language shares the quota of substrings and parameters with the rest of the languages. This is particularly true for smaller languages such as Basque. In this paper we show that a number of monolingual models (FastText word embeddings, FLAIR and BERT language models) trained with larger Basque corpora produce much better results than publicly available versions in downstream NLP tasks, including topic classification, sentiment classification, PoS tagging and NER. This work sets a new state-of-the-art in those tasks for Basque. All benchmarks and models used in this work are publicly available.</abstract>
       <url hash="6d5ed2bf">2020.lrec-1.588</url>
@@ -7224,7 +7224,7 @@
       <author><first>François</first><last>Torregrossa</last></author>
       <author><first>Vincent</first><last>Claveau</last></author>
       <author><first>Nihel</first><last>Kooli</last></author>
-      <author><first>Guillaume</first><last>Gravier</last></author>
+      <author id="guillaume-gravier"><first>Guillaume</first><last>Gravier</last></author>
       <author><first>Robin</first><last>Allesiardo</last></author>
       <pages>4789–4797</pages>
       <abstract>Word embeddings intervene in a wide range of natural language processing tasks. These geometrical representations are easy to manipulate for automatic systems. Therefore, they quickly invaded all areas of language processing. While they surpass all predecessors, it is still not straightforward why and how they do so. In this article, we propose to investigate all kind of evaluation metrics on various datasets in order to discover how they correlate with each other. Those correlations lead to 1) a fast solution to select the best word embeddings among many others, 2) a new criterion that may improve the current state of static Euclidean word embeddings, and 3) a way to create a set of complementary datasets, i.e. each dataset quantifies a different aspect of word embeddings.</abstract>
@@ -7279,7 +7279,7 @@
       <title>Word Embedding Evaluation in Downstream Tasks and Semantic Analogies</title>
       <author><first>Joaquim</first><last>Santos</last></author>
       <author><first>Bernardo</first><last>Consoli</last></author>
-      <author><first>Renata</first><last>Vieira</last></author>
+      <author id="renata-vieira"><first>Renata</first><last>Vieira</last></author>
       <pages>4828–4834</pages>
       <abstract>Language Models have long been a prolific area of study in the field of Natural Language Processing (NLP). One of the newer kinds of language models, and some of the most used, are Word Embeddings (WE). WE are vector space representations of a vocabulary learned by a non-supervised neural network based on the context in which words appear. WE have been widely used in downstream tasks in many areas of study in NLP. These areas usually use these vector models as a feature in the processing of textual data. This paper presents the evaluation of newly released WE models for the Portuguese langauage, trained with a corpus composed of 4.9 billion tokens. The first evaluation presented an intrinsic task in which WEs had to correctly build semantic and syntactic relations. The second evaluation presented an extrinsic task in which the WE models were used in two downstream tasks: Named Entity Recognition and Semantic Similarity between Sentences. Our results show that a diverse and comprehensive corpus can often outperform a larger, less textually diverse corpus, and that batch training may cause quality loss in WE models.</abstract>
       <url hash="909ea642">2020.lrec-1.594</url>
@@ -7292,10 +7292,10 @@
       <author><first>Sándor</first><last>Darányi</last></author>
       <author><first>Christian</first><last>Geng</last></author>
       <author><first>Moniek</first><last>Kuijpers</last></author>
-      <author><first>Oier</first><last>Lopez de Lacalle</last></author>
+      <author id="oier-lopez-de-lacalle"><first>Oier</first><last>Lopez de Lacalle</last></author>
       <author><first>Jean-Christophe</first><last>Mensonides</last></author>
       <author><first>Simone</first><last>Rebora</last></author>
-      <author><first>Uwe</first><last>Reichel</last></author>
+      <author id="uwe-reichel"><first>Uwe</first><last>Reichel</last></author>
       <pages>4835–4841</pages>
       <abstract>To detect how and when readers are experiencing engagement with a literary work, we bring together empirical literary studies and language technology via focusing on the affective state of absorption. The goal of our resource development is to enable the detection of different levels of reading absorption in millions of user-generated reviews hosted on social reading platforms. We present a corpus of social book reviews in English that we annotated with reading absorption categories. Based on these data, we performed supervised, sentence level, binary classification of the explicit presence vs. absence of the mental state of absorption. We compared the performances of classical machine learners where features comprised sentence representations obtained from a pretrained embedding model (Universal Sentence Encoder) vs. neural classifiers in which sentence embedding vector representations are adapted or fine-tuned while training for the absorption recognition task. We discuss the challenges in creating the labeled data as well as the possibilities for releasing a benchmark corpus.</abstract>
       <url hash="c2f1f34e">2020.lrec-1.595</url>
@@ -7323,15 +7323,15 @@
     </paper>
     <paper id="598">
       <title>The <fixed-case>MWN</fixed-case>.<fixed-case>PT</fixed-case> <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et for <fixed-case>P</fixed-case>ortuguese: Projection, Validation, Cross-lingual Alignment and Distribution</title>
-      <author><first>António</first><last>Branco</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
       <author><first>Sara</first><last>Grilo</last></author>
       <author><first>Márcia</first><last>Bolrinha</last></author>
       <author><first>Chakaveh</first><last>Saedi</last></author>
       <author><first>Ruben</first><last>Branco</last></author>
-      <author><first>João</first><last>Silva</last></author>
+      <author id="joao-silva"><first>João</first><last>Silva</last></author>
       <author><first>Andreia</first><last>Querido</last></author>
       <author><first>Rita</first><last>de Carvalho</last></author>
-      <author><first>Rosa</first><last>Gaudio</last></author>
+      <author id="rosa-del-gaudio"><first>Rosa</first><last>Gaudio</last></author>
       <author><first>Mariana</first><last>Avelãs</last></author>
       <author><first>Clara</first><last>Pinto</last></author>
       <pages>4859–4866</pages>
@@ -7364,7 +7364,7 @@
     <paper id="601">
       <title>Spatial <fixed-case>AMR</fixed-case>: Expanded Spatial Annotation in the Context of a Grounded <fixed-case>M</fixed-case>inecraft Corpus</title>
       <author><first>Julia</first><last>Bonn</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Zheng</first><last>Cai</last></author>
       <author><first>Kristin</first><last>Wright-Bettner</last></author>
       <pages>4883–4892</pages>
@@ -7376,9 +7376,9 @@
     <paper id="602">
       <title><fixed-case>E</fixed-case>nglish <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Random Walk Pseudo-Corpora</title>
       <author><first>Filip</first><last>Klubička</last></author>
-      <author><first>Alfredo</first><last>Maldonado</last></author>
+      <author id="alfredo-maldonado"><first>Alfredo</first><last>Maldonado</last></author>
       <author><first>Abhijit</first><last>Mahalunkar</last></author>
-      <author><first>John</first><last>Kelleher</last></author>
+      <author id="john-kelleher"><first>John</first><last>Kelleher</last></author>
       <pages>4893–4902</pages>
       <abstract>This is a resource description paper that describes the creation and properties of a set of pseudo-corpora generated artificially from a random walk over the English WordNet taxonomy. Our WordNet taxonomic random walk implementation allows the exploration of different random walk hyperparameters and the generation of a variety of different pseudo-corpora. We find that different combinations of parameters result in varying statistical properties of the generated pseudo-corpora. We have published a total of 81 pseudo-corpora that we have used in our previous research, but have not exhausted all possible combinations of hyperparameters, which is why we have also published a codebase that allows the generation of additional WordNet taxonomic pseudo-corpora as needed. Ultimately, such pseudo-corpora can be used to train taxonomic word embeddings, as a way of transferring taxonomic knowledge into a word embedding space.</abstract>
       <url hash="ddce1e9f">2020.lrec-1.602</url>
@@ -7388,7 +7388,7 @@
     <paper id="603">
       <title>On the Formal Standardization of Terminology Resources: The Case Study of <fixed-case>T</fixed-case>ri<fixed-case>MED</fixed-case></title>
       <author><first>Federica</first><last>Vezzani</last></author>
-      <author><first>Giorgio Maria</first><last>Di Nunzio</last></author>
+      <author id="giorgio-maria-di-nunzio"><first>Giorgio Maria</first><last>Di Nunzio</last></author>
       <pages>4903–4910</pages>
       <abstract>The process of standardization plays an important role in the management of terminological resources. In this context, we present the work of re-modeling an existing multilingual terminological database for the medical domain, named TriMED. This resource was conceived in order to tackle some problems related to the complexity of medical terminology and to respond to different users’ needs. We provide a methodology that should be followed in order to make a termbase compliant to the three most recent ISO/TC 37 standards. In particular, we focus on the definition of i) the structural meta-model of the resource, ii) the data categories provided, and iii) the TBX format for its implementation. In addition to the formal standardization of the resource, we describe the realization of a new data category repository for the management of the TriMED terminological data and a Web application that can be used to access the multilingual terminological records.</abstract>
       <url hash="62f48b4d">2020.lrec-1.603</url>
@@ -7398,7 +7398,7 @@
     <paper id="604">
       <title>Metaphorical Expressions in Automatic <fixed-case>A</fixed-case>rabic Sentiment Analysis</title>
       <author><first>Israa</first><last>Alsiyat</last></author>
-      <author><first>Scott</first><last>Piao</last></author>
+      <author id="scott-s-l-piao"><first>Scott</first><last>Piao</last></author>
       <pages>4911–4916</pages>
       <abstract>Over the recent years, Arabic language resources and NLP tools have been under rapid development. One of the important tasks for Arabic natural language processing is the sentiment analysis. While a significant improvement has been achieved in this research area, the existing computational models and tools still suffer from the lack of capability of dealing with Arabic metaphorical expressions. Metaphor has an important role in Arabic language due to its unique history and culture. Metaphors provide a linguistic mechanism for expressing ideas and notions that can be different from their surface form. Therefore, in order to efficiently identify true sentiment of Arabic language data, a computational model needs to be able to “read between lines”. In this paper, we examine the issue of metaphors in automatic Arabic sentiment analysis by carrying out an experiment, in which we observe the performance of a state-of-art Arabic sentiment tool on metaphors and analyse the result to gain a deeper insight into the issue. Our experiment evidently shows that metaphors have a significant impact on the performance of current Arabic sentiment tools, and it is an important task to develop Arabic language resources and computational models for Arabic metaphors.</abstract>
       <url hash="4569305f">2020.lrec-1.604</url>
@@ -7443,7 +7443,7 @@
     <paper id="608">
       <title>An Empirical Examination of Online Restaurant Reviews</title>
       <author><first>Hyun Jung</first><last>Kang</last></author>
-      <author><first>Iris</first><last>Eshkol-Taravella</last></author>
+      <author id="iris-eshkol"><first>Iris</first><last>Eshkol-Taravella</last></author>
       <pages>4942–4947</pages>
       <abstract>In the wake of (Pang et al., 2002; Turney, 2002; Liu, 2012) inter alia, opinion mining and sentiment analysis have focused on extracting either positive or negative opinions from texts and determining the targets of these opinions. In this study, we go beyond the coarse-grained positive vs. negative opposition and propose a corpus-based scheme that detects evaluative language at a finer-grained level. We classify each sentence into one of four evaluation types based on the proposed scheme: (1) the reviewer’s opinion on the restaurant (positive, negative, or mixed); (2) the reviewer’s input/feedback to potential customers and restaurant owners (suggestion, advice, or warning) (3) whether the reviewer wants to return to the restaurant (intention); (4) the factual statement about the experience (description). We apply classical machine learning and deep learning methods to show the effectiveness of our scheme. We also interpret the performances that we obtained for each category by taking into account the specificities of the corpus treated.</abstract>
       <url hash="f45f1193">2020.lrec-1.608</url>
@@ -7466,7 +7466,7 @@
       <author><first>Nathalie</first><last>Camelin</last></author>
       <author><first>Chafik</first><last>Aloulou</last></author>
       <author><first>Yannick</first><last>Estève</last></author>
-      <author><first>Lamia</first><last>Hadrich Belguith</last></author>
+      <author id="lamia-hadrich-belguith"><first>Lamia</first><last>Hadrich Belguith</last></author>
       <pages>4955–4963</pages>
       <abstract>In this paper, we propose several protocols to evaluate specific embeddings for Arabic sentiment analysis (SA) task. In fact, Arabic language is characterized by its agglutination and morphological richness contributing to great sparsity that could affect embedding quality. This work presents a study that compares embeddings based on words and lemmas in SA frame. We propose first to study the evolution of embedding models trained with different types of corpora (polar and non polar) and explore the variation between embeddings by observing the sentiment stability of neighbors in embedding spaces. Then, we evaluate embeddings with a neural architecture based on convolutional neural network (CNN). We make available our pre-trained embeddings to Arabic NLP research community with free to use. We provide also for free resources used to evaluate our embeddings. Experiments are done on the Large Arabic-Book Reviews (LABR) corpus in binary (positive/negative) classification frame. Our best result reaches 91.9%, that is higher than the best previous published one (91.5%).</abstract>
       <url hash="d3eba727">2020.lrec-1.610</url>
@@ -7501,7 +7501,7 @@
       <author><first>Akash</first><last>Sheoran</last></author>
       <author><first>Diptesh</first><last>Kanojia</last></author>
       <author><first>Aditya</first><last>Joshi</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>4982–4990</pages>
       <abstract>Cross-domain sentiment analysis (CDSA) helps to address the problem of data scarcity in scenarios where labelled data for a domain (known as the target domain) is unavailable or insufficient. However, the decision to choose a domain (known as the source domain) to leverage from is, at best, intuitive. In this paper, we investigate text similarity metrics to facilitate source domain selection for CDSA. We report results on 20 domains (all possible pairs) using 11 similarity metrics. Specifically, we compare CDSA performance with these metrics for different domain-pairs to enable the selection of a suitable source domain, given a target domain. These metrics include two novel metrics for evaluating domain adaptability to help source domain selection of labelled data and utilize word and sentence-based embeddings as metrics for unlabelled data. The goal of our experiments is a recommendation chart that gives the K best source domains for CDSA for a given target domain. We show that the best K source domains returned by our similarity metrics have a precision of over 50%, for varying values of K.</abstract>
       <url hash="49a22049">2020.lrec-1.613</url>
@@ -7527,7 +7527,7 @@
       <author><first>Nada</first><last>Naji</last></author>
       <author><first>Louis</first><last>Marceau</last></author>
       <author><first>Marc</first><last>Queudot</last></author>
-      <author><first>Eric</first><last>Charton</last></author>
+      <author id="eric-charton"><first>Eric</first><last>Charton</last></author>
       <author><first>Leila</first><last>Kosseim</last></author>
       <author><first>Marie-Jean</first><last>Meurs</last></author>
       <pages>5000–5009</pages>
@@ -7560,7 +7560,7 @@
     </paper>
     <paper id="618">
       <title>A Fine-grained Sentiment Dataset for <fixed-case>N</fixed-case>orwegian</title>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <author><first>Petter</first><last>Mæhlum</last></author>
       <author><first>Jeremy</first><last>Barnes</last></author>
       <author><first>Erik</first><last>Velldal</last></author>
@@ -7602,7 +7602,7 @@
       <title>Multi-domain Tweet Corpora for Sentiment Analysis: Resource Creation and Evaluation</title>
       <author><first/><last>Mamta</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <author><first>Shikha</first><last>Srivastava</last></author>
       <author><first>Alka</first><last>Kumar</last></author>
       <author><first>Tista</first><last>Saha</last></author>
@@ -7614,10 +7614,10 @@
     </paper>
     <paper id="622">
       <title>Reproduction and Revival of the Argument Reasoning Comprehension Task</title>
-      <author><first>João</first><last>António Rodrigues</last></author>
+      <author id="joao-rodrigues"><first>João</first><last>António Rodrigues</last></author>
       <author><first>Ruben</first><last>Branco</last></author>
-      <author><first>João</first><last>Silva</last></author>
-      <author><first>António</first><last>Branco</last></author>
+      <author id="joao-silva"><first>João</first><last>Silva</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
       <pages>5055–5064</pages>
       <abstract>Reproduction of scientific findings is essential for scientific development across all scientific disciplines and reproducing results of previous works is a basic requirement for validating the hypothesis and conclusions put forward by them. This paper reports on the scientific reproduction of several systems addressing the Argument Reasoning Comprehension Task of SemEval2018. Given a recent publication that pointed out spurious statistical cues in the data set used in the shared task, and that produced a revised version of it, we also evaluated the reproduced systems with this new data set. The exercise reported here shows that, in general, the reproduction of these systems is successful with scores in line with those reported in SemEval2018. However, the performance scores are worst than those, and even below the random baseline, when the reproduced systems are run over the revised data set expunged from data artifacts. This demonstrates that this task is actually a much harder challenge than what could have been perceived from the inflated, close to human-level performance scores obtained with the data set used in SemEval2018. This calls for a revival of this task as there is much room for improvement until systems may come close to the upper bound provided by human performance.</abstract>
       <url hash="0881c0bc">2020.lrec-1.622</url>
@@ -7626,7 +7626,7 @@
     </paper>
     <paper id="623">
       <title>Design and Evaluation of <fixed-case>S</fixed-case>enti<fixed-case>E</fixed-case>con: a fine-grained Economic/Financial Sentiment Lexicon from a Corpus of Business News</title>
-      <author><first>Antonio</first><last>Moreno-Ortiz</last></author>
+      <author id="antonio-moreno-ortiz"><first>Antonio</first><last>Moreno-Ortiz</last></author>
       <author><first>Javier</first><last>Fernandez-Cruz</last></author>
       <author><first>Chantal Pérez Chantal</first><last>Hernández</last></author>
       <pages>5065–5072</pages>
@@ -7638,7 +7638,7 @@
     <paper id="624">
       <title><fixed-case>P</fixed-case>arl<fixed-case>V</fixed-case>ote: A Corpus for Sentiment Analysis of Political Debates</title>
       <author><first>Gavin</first><last>Abercrombie</last></author>
-      <author><first>Riza</first><last>Batista-Navarro</last></author>
+      <author id="riza-theresa-batista-navarro"><first>Riza</first><last>Batista-Navarro</last></author>
       <pages>5073–5078</pages>
       <abstract>Debate transcripts from the UK Parliament contain information about the positions taken by politicians towards important topics, but are difficult for people to process manually. While sentiment analysis of debate speeches could facilitate understanding of the speakers’ stated opinions, datasets currently available for this task are small when compared to the benchmark corpora in other domains. We present ParlVote, a new, larger corpus of parliamentary debate speeches for use in the evaluation of sentiment analysis systems for the political domain. We also perform a number of initial experiments on this dataset, testing a variety of approaches to the classification of sentiment polarity in debate speeches. These include a linear classifier as well as a neural network trained using a transformer word embedding model (BERT), and fine-tuned on the parliamentary speeches. We find that in many scenarios, a linear classifier trained on a bag-of-words text representation achieves the best results. However, with the largest dataset, the transformer-based model combined with a neural classifier provides the best performance. We suggest that further experimentation with classification models and observations of the debate content and structure are required, and that there remains much room for improvement in parliamentary sentiment analysis.</abstract>
       <url hash="180c22f4">2020.lrec-1.624</url>
@@ -7648,7 +7648,7 @@
     <paper id="625">
       <title>Offensive Language Detection Using Brown Clustering</title>
       <author><first>Zuoyu</first><last>Tian</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <pages>5079–5087</pages>
       <abstract>In this study, we investigate the use of Brown clustering for offensive language detection. Brown clustering has been shown to be of little use when the task involves distinguishing word polarity in sentiment analysis tasks. In contrast to previous work, we train Brown clusters separately on positive and negative sentiment data, but then combine the information into a single complex feature per word. This way of representing words results in stable improvements in offensive language detection, when used as the only features or in combination with words or character n-grams. Brown clusters add important information, even when combined with words or character n-grams or with standard word embeddings in a convolutional neural network. However, we also found different trends between the two offensive language data sets we used.</abstract>
       <url hash="268cbfb2">2020.lrec-1.625</url>
@@ -7659,7 +7659,7 @@
       <title>Annotating for Hate Speech: The <fixed-case>M</fixed-case>a<fixed-case>N</fixed-case>e<fixed-case>C</fixed-case>o Corpus and Some Input from Critical Discourse Analysis</title>
       <author><first>Stavros</first><last>Assimakopoulos</last></author>
       <author><first>Rebecca</first><last>Vella Muskat</last></author>
-      <author><first>Lonneke</first><last>van der Plas</last></author>
+      <author id="lonneke-van-der-plas"><first>Lonneke</first><last>van der Plas</last></author>
       <author><first>Albert</first><last>Gatt</last></author>
       <pages>5088–5097</pages>
       <abstract>This paper presents a novel scheme for the annotation of hate speech in corpora of Web 2.0 commentary. The proposed scheme is motivated by the critical analysis of posts made in reaction to news reports on the Mediterranean migration crisis and LGBTIQ+ matters in Malta, which was conducted under the auspices of the EU-funded C.O.N.T.A.C.T. project. Based on the realisation that hate speech is not a clear-cut category to begin with, appears to belong to a continuum of discriminatory discourse and is often realised through the use of indirect linguistic means, it is argued that annotation schemes for its detection should refrain from directly including the label ‘hate speech,’ as different annotators might have different thresholds as to what constitutes hate speech and what not. In view of this, we propose a multi-layer annotation scheme, which is pilot-tested against a binary ±hate speech classification and appears to yield higher inter-annotator agreement. Motivating the postulation of our scheme, we then present the MaNeCo corpus on which it will eventually be used; a substantial corpus of on-line newspaper comments spanning 10 years.</abstract>
@@ -7703,7 +7703,7 @@
     </paper>
     <paper id="630">
       <title>Syntax and Semantics in a Treebank for <fixed-case>E</fixed-case>speranto</title>
-      <author><first>Eckhard</first><last>Bick</last></author>
+      <author id="eckhard-bick"><first>Eckhard</first><last>Bick</last></author>
       <pages>5120–5127</pages>
       <abstract>In this paper we describe and evaluate syntactic and semantic aspects of Arbobanko, a treebank for the artificial language Esperanto, as well as tools and methods used in the production of the treebank. In addition to classical morphosyntax and dependency structure, the treebank was enriched with a lexical-semantic layer covering named entities, a semantic type ontology for nouns and adjectives and a framenet-inspired semantic classification of verbs. For an under-resourced language, the quality of automatic syntactic and semantic pre-annotation is of obvious importance, and by evaluating the underlying parser and the coverage of its semantic ontologies, we try to answer the question whether the language’s extremely regular morphology and transparent semantic affixes translate into a more regular syntax and higher parsing accuracy. On the linguistic side, the treebank allows us to address and quantify typological issues such as the question of word order, auxiliary constructions, lexical transparency and semantic type ambiguity in Esperanto.</abstract>
       <url hash="b136fd6d">2020.lrec-1.630</url>
@@ -7712,7 +7712,7 @@
     </paper>
     <paper id="631">
       <title>Implementation and Evaluation of an <fixed-case>LFG</fixed-case>-based Parser for <fixed-case>W</fixed-case>olof</title>
-      <author><first>Cheikh M. Bamba</first><last>Dione</last></author>
+      <author id="cheikh-m-bamba-dione"><first>Cheikh M. Bamba</first><last>Dione</last></author>
       <pages>5128–5136</pages>
       <abstract>This paper reports on a parsing system for Wolof based on the LFG formalism. The parser covers core constructions of Wolof, including noun classes, cleft, copula, causative and applicative sentences. It also deals with several types of coordination, including same constituent coordination, asymmetric and asyndetic coordination. The system uses a cascade of finite-state transducers for word tokenization and morphological analysis as well as various lexicons. In addition, robust parsing techniques, including fragmenting and skimming, are used to optimize grammar coverage. Parsing coverage is evaluated by running test-suites of naturally occurring Wolof sentences through the parser. The evaluation of parsing coverage reveals that 72.72% of the test sentences receive full parses; 27.27% receive partial parses. To measure accuracy, the parsed sentences are disambiguated manually using an incremental parsebanking approach based on discriminants. The evaluation of parsing quality reveals that the parser achieves 67.2% recall, 92.8% precision and an f-score of 77.9%.</abstract>
       <url hash="36bc372b">2020.lrec-1.631</url>
@@ -7754,7 +7754,7 @@
     </paper>
     <paper id="635">
       <title>Chunk Different Kind of Spoken Discourse: Challenges for Machine Learning</title>
-      <author><first>Iris</first><last>Eshkol-Taravella</last></author>
+      <author id="iris-eshkol"><first>Iris</first><last>Eshkol-Taravella</last></author>
       <author><first>Mariame</first><last>Maarouf</last></author>
       <author><first>Flora</first><last>Badin</last></author>
       <author><first>Marie</first><last>Skrovec</last></author>
@@ -7767,7 +7767,7 @@
     </paper>
     <paper id="636">
       <title><fixed-case>GRAIN</fixed-case>-<fixed-case>S</fixed-case>: Manually Annotated Syntax for <fixed-case>G</fixed-case>erman Interviews</title>
-      <author><first>Agnieszka</first><last>Falenska</last></author>
+      <author id="agnieszka-falenska"><first>Agnieszka</first><last>Falenska</last></author>
       <author><first>Zoltán</first><last>Czesznak</last></author>
       <author><first>Kerstin</first><last>Jung</last></author>
       <author><first>Moritz</first><last>Völkel</last></author>
@@ -7782,7 +7782,7 @@
     <paper id="637">
       <title><fixed-case>Y</fixed-case>orùbá Dependency Treebank (<fixed-case>YTB</fixed-case>)</title>
       <author><first>Olájídé</first><last>Ishola</last></author>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <pages>5178–5186</pages>
       <abstract>Low-resource languages present enormous NLP opportunities as well as varying degrees of difficulties. The newly released treebank of hand-annotated parts of the Yoruba Bible provides an avenue for dependency analysis of the Yoruba language; the application of a new grammar formalism to the language. In this paper, we discuss our choice of Universal Dependencies, important dependency annotation decisions considered in the creation of the first annotation guidelines for Yoruba and results of our parsing experiments. We also lay the foundation for future incorporation of other domains with the initial test on Yoruba Wikipedia articles and highlighted future directions for the rapid expansion of the treebank.</abstract>
       <url hash="5d696d64">2020.lrec-1.637</url>
@@ -7793,7 +7793,7 @@
       <title><fixed-case>E</fixed-case>nglish Recipe Flow Graph Corpus</title>
       <author><first>Yoko</first><last>Yamakata</last></author>
       <author><first>Shinsuke</first><last>Mori</last></author>
-      <author><first>John</first><last>Carroll</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
       <pages>5187–5194</pages>
       <abstract>We present an annotated corpus of English cooking recipe procedures, and describe and evaluate computational methods for learning these annotations. The corpus consists of 300 recipes written by members of the public, which we have annotated with domain-specific linguistic and semantic structure. Each recipe is annotated with (1) ‘recipe named entities’ (r-NEs) specific to the recipe domain, and (2) a flow graph representing in detail the sequencing of steps, and interactions between cooking tools, food ingredients and the products of intermediate steps. For these two kinds of annotations, inter-annotator agreement ranges from 82.3 to 90.5 F1, indicating that our annotation scheme is appropriate and consistent. We experiment with producing these annotations automatically. For r-NE tagging we train a deep neural network NER tool; to compute flow graphs we train a dependency-style parsing procedure which we apply to the entire sequence of r-NEs in a recipe. In evaluations, our systems achieve 71.1 to 87.5 F1, demonstrating that our annotation scheme is learnable.</abstract>
       <url hash="e0e00022">2020.lrec-1.638</url>
@@ -7824,7 +7824,7 @@
     </paper>
     <paper id="641">
       <title><fixed-case>P</fixed-case>rague Dependency Treebank - Consolidated 1.0</title>
-      <author><first>Jan</first><last>Hajič</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
       <author><first>Eduard</first><last>Bejček</last></author>
       <author><first>Jaroslava</first><last>Hlavacova</last></author>
       <author><first>Marie</first><last>Mikulová</last></author>
@@ -7851,7 +7851,7 @@
       <title>Parsing as Tagging</title>
       <author><first>Robert</first><last>Vacareanu</last></author>
       <author><first>George Caique</first><last>Gouveia Barbosa</last></author>
-      <author><first>Marco A.</first><last>Valenzuela-Escárcega</last></author>
+      <author id="marco-a-valenzuela-escarcega"><first>Marco A.</first><last>Valenzuela-Escárcega</last></author>
       <author><first>Mihai</first><last>Surdeanu</last></author>
       <pages>5225–5231</pages>
       <abstract>We propose a simple yet accurate method for dependency parsing that treats parsing as tagging (PaT). That is, our approach addresses the parsing of dependency trees with a sequence model implemented with a bidirectional LSTM over BERT embeddings, where the “tag” to be predicted at each token position is the relative position of the corresponding head. For example, for the sentence John eats cake, the tag to be predicted for the token cake is -1 because its head (eats) occurs one token to the left. Despite its simplicity, our approach performs well. For example, our approach outperforms the state-of-the-art method of (Fernández-González and Gómez-Rodríguez, 2019) on Universal Dependencies (UD) by 1.76% unlabeled attachment score (UAS) for English, 1.98% UAS for French, and 1.16% UAS for German. On average, on 12 UD languages, our method with minimal tuning performs comparably with this state-of-the-art approach: better by 0.11% UAS, and worse by 0.58% LAS.</abstract>
@@ -7876,12 +7876,12 @@
       <author><first>Manuela</first><last>Sanguinetti</last></author>
       <author><first>Cristina</first><last>Bosco</last></author>
       <author><first>Lauren</first><last>Cassidy</last></author>
-      <author><first>Özlem</first><last>Çetinoğlu</last></author>
+      <author id="ozlem-cetinoglu"><first>Özlem</first><last>Çetinoğlu</last></author>
       <author><first>Alessandra Teresa</first><last>Cignarella</last></author>
       <author><first>Teresa</first><last>Lynn</last></author>
       <author><first>Ines</first><last>Rehbein</last></author>
       <author><first>Josef</first><last>Ruppenhofer</last></author>
-      <author><first>Djamé</first><last>Seddah</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
       <author><first>Amir</first><last>Zeldes</last></author>
       <pages>5240–5250</pages>
       <abstract>The paper presents a discussion on the main linguistic phenomena of user-generated texts found in web and social media, and proposes a set of annotation guidelines for their treatment within the Universal Dependencies (UD) framework. Given on the one hand the increasing number of treebanks featuring user-generated content, and its somewhat inconsistent treatment in these resources on the other, the aim of this paper is twofold: (1) to provide a short, though comprehensive, overview of such treebanks - based on available literature - along with their main features and a comparative analysis of their annotation criteria, and (2) to propose a set of tentative UD-based annotation guidelines, to promote consistent treatment of the particular phenomena found in these types of texts. The main goal of this paper is to provide a common framework for those teams interested in developing similar resources in UD, thus enabling cross-linguistic consistency, which is a principle that has always been in the spirit of UD.</abstract>
@@ -7902,7 +7902,7 @@
     <paper id="647">
       <title><fixed-case>ÆTHEL</fixed-case>: Automatically Extracted Typelogical Derivations for <fixed-case>D</fixed-case>utch</title>
       <author><first>Konstantinos</first><last>Kogkalidis</last></author>
-      <author><first>Michael</first><last>Moortgat</last></author>
+      <author id="michael-moortgat"><first>Michael</first><last>Moortgat</last></author>
       <author><first>Richard</first><last>Moot</last></author>
       <pages>5257–5266</pages>
       <abstract>We present ÆTHEL, a semantic compositionality dataset for written Dutch. ÆTHEL consists of two parts. First, it contains a lexicon of supertags for about 900 000 words in context. The supertags correspond to types of the simply typed linear lambda-calculus, enhanced with dependency decorations that capture grammatical roles supplementary to function-argument structures. On the basis of these types, ÆTHEL further provides 72 192 validated derivations, presented in four formats: natural-deduction and sequent-style proofs, linear logic proofnets and the associated programs (lambda terms) for meaning composition. ÆTHEL’s types and derivations are obtained by means of an extraction algorithm applied to the syntactic analyses of LASSY Small, the gold standard corpus of written Dutch. We discuss the extraction algorithm and show how ‘virtual elements’ in the original LASSY annotation of unbounded dependencies and coordination phenomena give rise to higher-order types. We suggest some example usecases highlighting the benefits of a type-driven approach at the syntax semantics interface. The following resources are open-sourced with ÆTHEL: the lexical mappings between words and types, a subset of the dataset consisting of 7 924 semantic parses, and the Python code that implements the extraction algorithm.</abstract>
@@ -7987,7 +7987,7 @@
       <author><first>Eitan</first><last>Grossman</last></author>
       <author><first>Elad</first><last>Eisen</last></author>
       <author><first>Dmitry</first><last>Nikolaev</last></author>
-      <author><first>Steven</first><last>Moran</last></author>
+      <author id="steven-moran"><first>Steven</first><last>Moran</last></author>
       <pages>5316–5322</pages>
       <abstract>Phonological segment borrowing is a process through which languages acquire new contrastive speech sounds as the result of borrowing new words from other languages. Despite the fact that phonological segment borrowing is documented in many of the world’s languages, to date there has been no large-scale quantitative study of the phenomenon. In this paper, we present SegBo, a novel cross-linguistic database of borrowed phonological segments. We describe our data aggregation pipeline and the resulting language sample. We also present two short case studies based on the database. The first deals with the impact of large colonial languages on the sound systems of the world’s languages; the second deals with universals of borrowing in the domain of rhotic consonants.</abstract>
       <url hash="909e8e48">2020.lrec-1.654</url>
@@ -8007,13 +8007,13 @@
     </paper>
     <paper id="656">
       <title><fixed-case>A</fixed-case>llo<fixed-case>V</fixed-case>era: A Multilingual Allophone Database</title>
-      <author><first>David R.</first><last>Mortensen</last></author>
+      <author id="david-r-mortensen"><first>David R.</first><last>Mortensen</last></author>
       <author><first>Xinjian</first><last>Li</last></author>
       <author><first>Patrick</first><last>Littell</last></author>
       <author><first>Alexis</first><last>Michaud</last></author>
       <author><first>Shruti</first><last>Rijhwani</last></author>
       <author><first>Antonios</first><last>Anastasopoulos</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <author><first>Florian</first><last>Metze</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
       <pages>5329–5336</pages>
@@ -8040,7 +8040,7 @@
       <author><first>Andre</first><last>Kåsen</last></author>
       <author><first>Kristin</first><last>Hagen</last></author>
       <author><first>Anders</first><last>Nøklestad</last></author>
-      <author><first>Joel</first><last>Priestley</last></author>
+      <author id="joel-priestley"><first>Joel</first><last>Priestley</last></author>
       <pages>5343–5350</pages>
       <abstract>The present article presents four experiments with two different methods for measuring dialect similarity in Norwegian: the Levenshtein method and the neural long short term memory (LSTM) autoencoder network, a machine learning algorithm. The visual output in the form of dialect maps is then compared with canonical maps found in the dialect literature. All of this enables us to say that one does not need fine-grained transcriptions of speech to replicate classical classification patterns.</abstract>
       <url hash="ad311206">2020.lrec-1.658</url>
@@ -8062,9 +8062,9 @@
       <title>A Framework for Evaluation of Machine Reading Comprehension Gold Standards</title>
       <author><first>Viktor</first><last>Schlegel</last></author>
       <author><first>Marco</first><last>Valentino</last></author>
-      <author><first>Andre</first><last>Freitas</last></author>
-      <author><first>Goran</first><last>Nenadic</last></author>
-      <author><first>Riza</first><last>Batista-Navarro</last></author>
+      <author id="andre-freitas"><first>Andre</first><last>Freitas</last></author>
+      <author id="goran-nenadic"><first>Goran</first><last>Nenadic</last></author>
+      <author id="riza-theresa-batista-navarro"><first>Riza</first><last>Batista-Navarro</last></author>
       <pages>5359–5369</pages>
       <abstract>Machine Reading Comprehension (MRC) is the task of answering a question over a paragraph of text. While neural MRC systems gain popularity and achieve noticeable performance, issues are being raised with the methodology used to establish their performance, particularly concerning the data design of gold standards that are used to evaluate them. There is but a limited understanding of the challenges present in this data, which makes it hard to draw comparisons and formulate reliable hypotheses. As a first step towards alleviating the problem, this paper proposes a unifying framework to systematically investigate the present linguistic features, required reasoning and background knowledge and factual correctness on one hand, and the presence of lexical cues as a lower bound for the requirement of understanding on the other hand. We propose a qualitative annotation schema for the first and a set of approximative metrics for the latter. In a first application of the framework, we analyse modern MRC gold standards and present our findings: the absence of features that contribute towards lexical ambiguity, the varying factual correctness of the expected answers and the presence of lexical cues, all of which potentially lower the reading comprehension complexity and quality of the evaluation data.</abstract>
       <url hash="fee0bcc7">2020.lrec-1.660</url>
@@ -8074,7 +8074,7 @@
     <paper id="661">
       <title>Multi-class Hierarchical Question Classification for Multiple Choice Science Exams</title>
       <author><first>Dongfang</first><last>Xu</last></author>
-      <author><first>Peter</first><last>Jansen</last></author>
+      <author id="peter-jansen"><first>Peter</first><last>Jansen</last></author>
       <author><first>Jaycie</first><last>Martin</last></author>
       <author><first>Zhengnan</first><last>Xie</last></author>
       <author><first>Vikas</first><last>Yadav</last></author>
@@ -8114,7 +8114,7 @@
       <author><first>Seunghyun</first><last>Yoon</last></author>
       <author><first>Franck</first><last>Dernoncourt</last></author>
       <author><first>Doo Soon</first><last>Kim</last></author>
-      <author><first>Trung</first><last>Bui</last></author>
+      <author id="trung-bui"><first>Trung</first><last>Bui</last></author>
       <author><first>Kyomin</first><last>Jung</last></author>
       <pages>5400–5407</pages>
       <abstract>In this study, we propose a novel graph neural network called propagate-selector (PS), which propagates information over sentences to understand information that cannot be inferred when considering sentences in isolation. First, we design a graph structure in which each node represents an individual sentence, and some pairs of nodes are selectively connected based on the text structure. Then, we develop an iterative attentive aggregation and a skip-combine method in which a node interacts with its neighborhood nodes to accumulate the necessary information. To evaluate the performance of the proposed approaches, we conduct experiments with the standard HotpotQA dataset. The empirical results demonstrate the superiority of our proposed approach, which obtains the best performances, compared to the widely used answer-selection models that do not consider the intersentential relationship.</abstract>
@@ -8175,12 +8175,12 @@
     </paper>
     <paper id="669">
       <title><fixed-case>AIA</fixed-case>-<fixed-case>BDE</fixed-case>: A Corpus of <fixed-case>FAQ</fixed-case>s in <fixed-case>P</fixed-case>ortuguese and their Variations</title>
-      <author><first>Hugo</first><last>Gonçalo Oliveira</last></author>
+      <author id="hugo-goncalo-oliveira"><first>Hugo</first><last>Gonçalo Oliveira</last></author>
       <author><first>João</first><last>Ferreira</last></author>
       <author><first>José</first><last>Santos</last></author>
       <author><first>Pedro</first><last>Fialho</last></author>
       <author><first>Ricardo</first><last>Rodrigues</last></author>
-      <author><first>Luisa</first><last>Coheur</last></author>
+      <author id="luisa-coheur"><first>Luisa</first><last>Coheur</last></author>
       <author><first>Ana</first><last>Alves</last></author>
       <pages>5442–5449</pages>
       <abstract>We present AIA-BDE, a corpus of 380 domain-oriented FAQs in Portuguese and their variations, i.e., paraphrases or entailed questions, created manually, by humans, or automatically, with Google Translate. Its aims to be used as a benchmark for FAQ retrieval and automatic question-answering, but may be useful in other contexts, such as the development of task-oriented dialogue systems, or models for natural language inference in an interrogative context. We also report on two experiments. Matching variations with their original questions was not trivial with a set of unsupervised baselines, especially for manually created variations. Besides high performances obtained with ELMo and BERT embeddings, an Information Retrieval system was surprisingly competitive when considering only the first hit. In the second experiment, text classifiers were trained with the original questions, and tested when assigning each variation to one of three possible sources, or assigning them as out-of-domain. Here, the difference between manual and automatic variations was not so significant.</abstract>
@@ -8194,7 +8194,7 @@
       <author><first>Seokhwan</first><last>Kim</last></author>
       <author><first>Franck</first><last>Dernoncourt</last></author>
       <author><first>Siddhesh</first><last>Gupte</last></author>
-      <author><first>Zhe</first><last>Wang</last></author>
+      <author id="daisy-zhe-wang"><first>Zhe</first><last>Wang</last></author>
       <author><first>Doo Soon</first><last>Kim</last></author>
       <pages>5450–5455</pages>
       <abstract>Despite the number of currently available datasets on video-question answering, there still remains a need for a dataset involving multi-step and non-factoid answers. Moreover, relying on video transcripts remains an under-explored topic. To adequately address this, we propose a new question answering task on instructional videos, because of their verbose and narrative nature. While previous studies on video question answering have focused on generating a short text as an answer, given a question and video clip, our task aims to identify a span of a video segment as an answer which contains instructional details with various granularities. This work focuses on screencast tutorial videos pertaining to an image editing program. We introduce a dataset, TutorialVQA, consisting of about 6,000 manually collected triples of (video, question, answer span). We also provide experimental results with several baseline algorithms using the video transcripts. The results indicate that the task is challenging and call for the investigation of new algorithms.</abstract>
@@ -8209,7 +8209,7 @@
       <author><first>Jaycie</first><last>Martin</last></author>
       <author><first>Elizabeth</first><last>Wainwright</last></author>
       <author><first>Steven</first><last>Marmorstein</last></author>
-      <author><first>Peter</first><last>Jansen</last></author>
+      <author id="peter-jansen"><first>Peter</first><last>Jansen</last></author>
       <pages>5456–5473</pages>
       <abstract>Explainable question answering for complex questions often requires combining large numbers of facts to answer a question while providing a human-readable explanation for the answer, a process known as multi-hop inference. Standardized science questions require combining an average of 6 facts, and as many as 16 facts, in order to answer and explain, but most existing datasets for multi-hop reasoning focus on combining only two facts, significantly limiting the ability of multi-hop inference algorithms to learn to generate large inferences. In this work we present the second iteration of the WorldTree project, a corpus of 5,114 standardized science exam questions paired with large detailed multi-fact explanations that combine core scientific knowledge and world knowledge. Each explanation is represented as a lexically-connected “explanation graph” that combines an average of 6 facts drawn from a semi-structured knowledge base of 9,216 facts across 66 tables. We use this explanation corpus to author a set of 344 high-level science domain inference patterns similar to semantic frames supporting multi-hop inference. Together, these resources provide training data and instrumentation for developing many-fact multi-hop inference models for question answering.</abstract>
       <url hash="72eb3814">2020.lrec-1.671</url>
@@ -8219,7 +8219,7 @@
     <paper id="672">
       <title>Chat or Learn: a Data-Driven Robust Question-Answering System</title>
       <author><first>Gabriel</first><last>Luthier</last></author>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <pages>5474–5480</pages>
       <abstract>We present a voice-based conversational agent which combines the robustness of chatbots and the utility of question answering (QA) systems. Indeed, while data-driven chatbots are typically user-friendly but not goal-oriented, QA systems tend to perform poorly at chitchat. The proposed chatbot relies on a controller which performs dialogue act classification and feeds user input either to a sequence-to-sequence chatbot or to a QA system. The resulting chatbot is a spoken QA application for the Google Home smart speaker. The system is endowed with general-domain knowledge from Wikipedia articles and uses coreference resolution to detect relatedness between questions. We present our choices of data sets for training and testing the components, and present the experimental results that helped us optimize the parameters of the chatbot. In particular, we discuss the appropriateness of using the SQuAD dataset for evaluating end-to-end QA, in the light of our system’s behavior.</abstract>
       <url hash="0a0ed06c">2020.lrec-1.672</url>
@@ -8245,7 +8245,7 @@
     <paper id="674">
       <title>Cross-lingual and Cross-domain Evaluation of Machine Reading Comprehension with Squad and <fixed-case>CALOR</fixed-case>-Quest Corpora</title>
       <author><first>Delphine</first><last>Charlet</last></author>
-      <author><first>Geraldine</first><last>Damnati</last></author>
+      <author id="geraldine-damnati"><first>Geraldine</first><last>Damnati</last></author>
       <author><first>Frederic</first><last>Bechet</last></author>
       <author><first>Gabriel</first><last>Marzinotto</last></author>
       <author><first>Johannes</first><last>Heinecke</last></author>
@@ -8259,7 +8259,7 @@
       <title><fixed-case>S</fixed-case>cholarly<fixed-case>R</fixed-case>ead: A New Dataset for Scientific Article Reading Comprehension</title>
       <author><first>Tanik</first><last>Saikh</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>5498–5504</pages>
       <abstract>We present ScholarlyRead, span-of-word-based scholarly articles’ Reading Comprehension (RC) dataset with approximately 10K manually checked passage-question-answer instances. ScholarlyRead was constructed in semi-automatic way. We consider the articles from two popular journals of a reputed publishing house. Firstly, we generate questions from these articles in an automatic way. Generated questions are then manually checked by the human annotators. We propose a baseline model based on Bi-Directional Attention Flow (BiDAF) network that yields the F1 score of 37.31%. The framework would be useful for building Question-Answering (QA) systems on scientific articles.</abstract>
       <url hash="87a6e810">2020.lrec-1.675</url>
@@ -8269,7 +8269,7 @@
     <paper id="676">
       <title>Contextualized Embeddings based Transformer Encoder for Sentence Similarity Modeling in Answer Selection Task</title>
       <author><first>Md Tahmid Rahman</first><last>Laskar</last></author>
-      <author><first>Jimmy Xiangji</first><last>Huang</last></author>
+      <author id="xiangji-huang"><first>Jimmy Xiangji</first><last>Huang</last></author>
       <author><first>Enamul</first><last>Hoque</last></author>
       <pages>5505–5514</pages>
       <abstract>Word embeddings that consider context have attracted great attention for various natural language processing tasks in recent years. In this paper, we utilize contextualized word embeddings with the transformer encoder for sentence similarity modeling in the answer selection task. We present two different approaches (feature-based and fine-tuning-based) for answer selection. In the feature-based approach, we utilize two types of contextualized embeddings, namely the Embeddings from Language Models (ELMo) and the Bidirectional Encoder Representations from Transformers (BERT) and integrate each of them with the transformer encoder. We find that integrating these contextual embeddings with the transformer encoder is effective to improve the performance of sentence similarity modeling. In the second approach, we fine-tune two pre-trained transformer encoder models for the answer selection task. Based on our experiments on six datasets, we find that the fine-tuning approach outperforms the feature-based approach on all of them. Among our fine-tuning-based models, the Robustly Optimized BERT Pretraining Approach (RoBERTa) model results in new state-of-the-art performance across five datasets.</abstract>
@@ -8280,8 +8280,8 @@
     <paper id="677">
       <title>Automatic <fixed-case>S</fixed-case>panish Translation of <fixed-case>SQ</fixed-case>u<fixed-case>AD</fixed-case> Dataset for Multi-lingual Question Answering</title>
       <author><first>Casimiro Pio</first><last>Carrino</last></author>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
-      <author><first>José A. R.</first><last>Fonollosa</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="jose-a-r-fonollosa"><first>José A. R.</first><last>Fonollosa</last></author>
       <pages>5515–5523</pages>
       <abstract>Recently, multilingual question answering became a crucial research topic, and it is receiving increased interest in the NLP community. However, the unavailability of large-scale datasets makes it challenging to train multilingual QA systems with performance comparable to the English ones. In this work, we develop the Translate Align Retrieve (TAR) method to automatically translate the Stanford Question Answering Dataset (SQuAD) v1.1 to Spanish. We then used this dataset to train Spanish QA systems by fine-tuning a Multilingual-BERT model. Finally, we evaluated our QA models with the recently proposed MLQA and XQuAD benchmarks for cross-lingual Extractive QA. Experimental results show that our models outperform the previous Multilingual-BERT baselines achieving the new state-of-the-art values of 68.1 F1 on the Spanish MLQA corpus and 77.6 F1 on the Spanish XQuAD corpus. The resulting, synthetically generated SQuAD-es v1.1 corpora, with almost 100% of data contained in the original English version, to the best of our knowledge, is the first large-scale QA training resource for Spanish.</abstract>
       <url hash="0dfca727">2020.lrec-1.677</url>
@@ -8310,12 +8310,12 @@
     </paper>
     <paper id="680">
       <title>A Shared Task of a New, Collaborative Type to Foster Reproducibility: A First Exercise in the Area of Language Science and Technology with <fixed-case>REPROLANG</fixed-case>2020</title>
-      <author><first>António</first><last>Branco</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
       <author><first>Nicoletta</first><last>Calzolari</last></author>
       <author><first>Piek</first><last>Vossen</last></author>
-      <author><first>Gertjan</first><last>Van Noord</last></author>
-      <author><first>Dieter</first><last>van Uytvanck</last></author>
-      <author><first>João</first><last>Silva</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>Van Noord</last></author>
+      <author id="dieter-van-uytvanck"><first>Dieter</first><last>van Uytvanck</last></author>
+      <author id="joao-silva"><first>João</first><last>Silva</last></author>
       <author><first>Luís</first><last>Gomes</last></author>
       <author><first>André</first><last>Moreira</last></author>
       <author><first>Willem</first><last>Elbers</last></author>
@@ -8363,7 +8363,7 @@
       <author><first>Kyeongmin</first><last>Rim</last></author>
       <author><first>Jingxuan</first><last>Tu</last></author>
       <author><first>Kelley</first><last>Lynch</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <pages>5569–5578</pages>
       <abstract>Within the natural language processing (NLP) community, shared tasks play an important role. They define a common goal and allowthe the comparison of different methods on the same data. SemEval-2018 Task 7 involves the identification and classification of relationsin abstracts from computational linguistics (CL) publications. In this paper we describe an attempt to reproduce the methods and resultsfrom the top performing system at for SemEval-2018 Task 7. We describe challenges we encountered in the process, report on the resultsof our system, and discuss the ways that our attempt at reproduction can inform best practices.</abstract>
       <url hash="8cc3bc61">2020.lrec-1.684</url>
@@ -8478,14 +8478,14 @@
     <paper id="695">
       <title>Recent Developments for the Linguistic Linked Open Data Infrastructure</title>
       <author><first>Thierry</first><last>Declerck</last></author>
-      <author><first>John Philip</first><last>McCrae</last></author>
-      <author><first>Matthias</first><last>Hartung</last></author>
+      <author id="john-philip-mccrae"><first>John Philip</first><last>McCrae</last></author>
+      <author id="matthias-hartung"><first>Matthias</first><last>Hartung</last></author>
       <author><first>Jorge</first><last>Gracia</last></author>
       <author><first>Christian</first><last>Chiarcos</last></author>
       <author><first>Elena</first><last>Montiel-Ponsoda</last></author>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <author><first>Artem</first><last>Revenko</last></author>
-      <author><first>Roser</first><last>Saurí</last></author>
+      <author id="roser-sauri"><first>Roser</first><last>Saurí</last></author>
       <author><first>Deirdre</first><last>Lee</last></author>
       <author><first>Stefania</first><last>Racioppa</last></author>
       <author><first>Jamal</first><last>Abdul Nasir</last></author>
@@ -8575,7 +8575,7 @@
       <author><first>Yunfei</first><last>Long</last></author>
       <author><first>Anran</first><last>Li</last></author>
       <author><first>Emmanuele</first><last>Chersoni</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <pages>5714–5720</pages>
       <abstract>Automatic Chinese irony detection is a challenging task, and it has a strong impact on linguistic research. However, Chinese irony detection often lacks labeled benchmark datasets. In this paper, we introduce Ciron, the first Chinese benchmark dataset available for irony detection for machine learning models. Ciron includes more than 8.7K posts, collected from Weibo, a micro blogging platform. Most importantly, Ciron is collected with no pre-conditions to ensure a much wider coverage. Evaluation on seven different machine learning classifiers proves the usefulness of Ciron as an important resource for Chinese irony detection.</abstract>
@@ -8586,7 +8586,7 @@
     <paper id="702">
       <title>wiki<fixed-case>H</fixed-case>ow<fixed-case>T</fixed-case>o<fixed-case>I</fixed-case>mprove: A Resource and Analyses on Edits in Instructional Texts</title>
       <author><first>Talita</first><last>Anthonio</last></author>
-      <author><first>Irshad</first><last>Bhat</last></author>
+      <author id="irshad-bhat"><first>Irshad</first><last>Bhat</last></author>
       <author><first>Michael</first><last>Roth</last></author>
       <pages>5721–5729</pages>
       <abstract>Instructional texts, such as articles in wikiHow, describe the actions necessary to accomplish a certain goal. In wikiHow and other resources, such instructions are subject to revision edits on a regular basis. Do these edits improve instructions only in terms of style and correctness, or do they provide clarifications necessary to follow the instructions and to accomplish the goal? We describe a resource and first studies towards answering this question. Specifically, we create wikiHowToImprove, a collection of revision histories for about 2.7 million sentences from about 246000 wikiHow articles. We describe human annotation studies on categorizing a subset of sentence-level edits and provide baseline models for the task of automatically distinguishing “older” from “newer” revisions of a sentence.</abstract>
@@ -8617,7 +8617,7 @@
     <paper id="705">
       <title>Spatial Multi-Arrangement for Clustering and Multi-way Similarity Dataset Construction</title>
       <author><first>Olga</first><last>Majewska</last></author>
-      <author><first>Diana</first><last>McCarthy</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
       <author><first>Jasper</first><last>van den Bosch</last></author>
       <author><first>Nikolaus</first><last>Kriegeskorte</last></author>
       <author><first>Ivan</first><last>Vulić</last></author>
@@ -8631,7 +8631,7 @@
     <paper id="706">
       <title>A Short Survey on Sense-Annotated Corpora</title>
       <author><first>Tommaso</first><last>Pasini</last></author>
-      <author><first>Jose</first><last>Camacho-Collados</last></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></author>
       <pages>5759–5765</pages>
       <abstract>Large sense-annotated datasets are increasingly necessary for training deep supervised systems in Word Sense Disambiguation. However, gathering high-quality sense-annotated data for as many instances as possible is a laborious and expensive task. This has led to the proliferation of automatic and semi-automatic methods for overcoming the so-called knowledge-acquisition bottleneck. In this short survey we present an overview of sense-annotated corpora, annotated either manually- or (semi)automatically, that are currently available for different languages and featuring distinct lexical resources as inventory of senses, i.e. WordNet, Wikipedia, BabelNet. Furthermore, we provide the reader with general statistics of each dataset and an analysis of their specific features.</abstract>
       <url hash="8f281e9a">2020.lrec-1.706</url>
@@ -8654,7 +8654,7 @@
       <author><first>Salvador</first><last>Lima Lopez</last></author>
       <author><first>Naiara</first><last>Perez</last></author>
       <author><first>Montse</first><last>Cuadros</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <pages>5772–5781</pages>
       <abstract>This paper introduces the first version of the NUBes corpus (Negation and Uncertainty annotations in Biomedical texts in Spanish). The corpus is part of an on-going research and currently consists of 29,682 sentences obtained from anonymised health records annotated with negation and uncertainty. The article includes an exhaustive comparison with similar corpora in Spanish, and presents the main annotation and design decisions. Additionally, we perform preliminary experiments using deep learning algorithms to validate the annotated dataset. As far as we know, NUBes is the largest available corpora for negation in Spanish and the first that also incorporates the annotation of speculation cues, scopes, and events.</abstract>
       <url hash="fa939cdf">2020.lrec-1.708</url>
@@ -8665,7 +8665,7 @@
       <title>Decomposing and Comparing Meaning Relations: Paraphrasing, Textual Entailment, Contradiction, and Specificity</title>
       <author><first>Venelin</first><last>Kovatchev</last></author>
       <author><first>Darina</first><last>Gold</last></author>
-      <author><first>M. Antonia</first><last>Marti</last></author>
+      <author id="m-antonia-marti"><first>M. Antonia</first><last>Marti</last></author>
       <author><first>Maria</first><last>Salamo</last></author>
       <author><first>Torsten</first><last>Zesch</last></author>
       <pages>5782–5791</pages>
@@ -8677,8 +8677,8 @@
     <paper id="710">
       <title>Object Naming in Language and Vision: A Survey and a New Dataset</title>
       <author><first>Carina</first><last>Silberer</last></author>
-      <author><first>Sina</first><last>Zarrieß</last></author>
-      <author><first>Gemma</first><last>Boleda</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
+      <author id="gemma-boleda"><first>Gemma</first><last>Boleda</last></author>
       <pages>5792–5801</pages>
       <abstract>People choose particular names for objects, such as dog or puppy for a given dog. Object naming has been studied in Psycholinguistics, but has received relatively little attention in Computational Linguistics. We review resources from Language and Vision that could be used to study object naming on a large scale, discuss their shortcomings, and create a new dataset that affords more opportunities for analysis and modeling. Our dataset, ManyNames, provides 36 name annotations for each of 25K objects in images selected from VisualGenome. We highlight the challenges involved and provide a preliminary analysis of the ManyNames data, showing that there is a high level of agreement in naming, on average. At the same time, the average number of name types associated with an object is much higher in our dataset than in existing corpora for Language and Vision, such that ManyNames provides a rich resource for studying phenomena like hierarchical variation (chihuahua vs. dog), which has been discussed at length in the theoretical literature, and other less well studied phenomena like cross-classification (cake vs. dessert).</abstract>
       <url hash="23d8b141">2020.lrec-1.710</url>
@@ -8701,7 +8701,7 @@
     <paper id="712">
       <title>Figure Me Out: A Gold Standard Dataset for Metaphor Interpretation</title>
       <author><first>Omnia</first><last>Zayed</last></author>
-      <author><first>John Philip</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John Philip</first><last>McCrae</last></author>
       <author><first>Paul</first><last>Buitelaar</last></author>
       <pages>5810–5819</pages>
       <abstract>Metaphor comprehension and understanding is a complex cognitive task that requires interpreting metaphors by grasping the interaction between the meaning of their target and source concepts. This is very challenging for humans, let alone computers. Thus, automatic metaphor interpretation is understudied in part due to the lack of publicly available datasets. The creation and manual annotation of such datasets is a demanding task which requires huge cognitive effort and time. Moreover, there will always be a question of accuracy and consistency of the annotated data due to the subjective nature of the problem. This work addresses these issues by presenting an annotation scheme to interpret verb-noun metaphoric expressions in text. The proposed approach is designed with the goal of reducing the workload on annotators and maintain consistency. Our methodology employs an automatic retrieval approach which utilises external lexical resources, word embeddings and semantic similarity to generate possible interpretations of identified metaphors in order to enable quick and accurate annotation. We validate our proposed approach by annotating around 1,500 metaphors in tweets which were annotated by six native English speakers. As a result of this work, we publish as linked data the first gold standard dataset for metaphor interpretation which will facilitate research in this area.</abstract>
@@ -8750,7 +8750,7 @@
       <title>Word Attribute Prediction Enhanced by Lexical Entailment Tasks</title>
       <author><first>Mika</first><last>Hasegawa</last></author>
       <author><first>Tetsunori</first><last>Kobayashi</last></author>
-      <author><first>Yoshihiko</first><last>Hayashi</last></author>
+      <author id="yoshihiko-hayashi"><first>Yoshihiko</first><last>Hayashi</last></author>
       <pages>5846–5854</pages>
       <abstract>Human semantic knowledge about concepts acquired through perceptual inputs and daily experiences can be expressed as a bundle of attributes. Unlike the conventional distributed word representations that are purely induced from a text corpus, a semantic attribute is associated with a designated dimension in attribute-based vector representations. Thus, semantic attribute vectors can effectively capture the commonalities and differences among concepts. However, as semantic attributes have been generally created by psychological experimental settings involving human annotators, an automatic method to create or extend such resources is highly demanded in terms of language resource development and maintenance. This study proposes a two-stage neural network architecture, Word2Attr, in which initially acquired attribute representations are then fine-tuned by employing supervised lexical entailment tasks. The quantitative empirical results demonstrated that the fine-tuning was indeed effective in improving the performances of semantic/visual similarity/relatedness evaluation tasks. Although the qualitative analysis confirmed that the proposed method could often discover valid but not-yet human-annotated attributes, they also exposed future issues to be worked: we should refine the inventory of semantic attributes that currently relies on an existing dataset.</abstract>
       <url hash="a8c11191">2020.lrec-1.716</url>
@@ -8764,7 +8764,7 @@
       <author><first>Julia</first><last>Bonn</last></author>
       <author><first>Archna</first><last>Bhatia</last></author>
       <author><first>Zheng</first><last>Cai</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Dan</first><last>Roth</last></author>
       <pages>5855–5864</pages>
       <abstract>Spatial Reasoning from language is essential for natural language understanding. Supporting it requires a representation scheme that can capture spatial phenomena encountered in language as well as in images and videos. Existing spatial representations are not sufficient for describing spatial configurations used in complex tasks. This paper extends the capabilities of existing spatial representation languages and increases coverage of the semantic aspects that are needed to ground spatial meaning of natural language text in the world. Our spatial relation language is able to represent a large, comprehensive set of spatial concepts crucial for reasoning and is designed to support composition of static and dynamic spatial configurations. We integrate this language with the Abstract Meaning Representation (AMR) annotation schema and present a corpus annotated by this extended AMR. To exhibit the applicability of our representation scheme, we annotate text taken from diverse datasets and show how we extend the capabilities of existing spatial representation languages with fine-grained decomposition of semantics and blend it seamlessly with AMRs of sentences and discourse representations as a whole.</abstract>
@@ -8785,7 +8785,7 @@
     <paper id="719">
       <title>Are White Ravens Ever White? - Non-Literal Adjective-Noun Phrases in <fixed-case>P</fixed-case>olish</title>
       <author><first>Agnieszka</first><last>Mykowiecka</last></author>
-      <author><first>Malgorzata</first><last>Marciniak</last></author>
+      <author id="malgorzata-marciniak"><first>Malgorzata</first><last>Marciniak</last></author>
       <pages>5871–5877</pages>
       <abstract>In the paper we describe two resources of Polish data focused on literal and metaphorical meanings of adjective-noun phrases. The first one is FigAN and consists of isolated phrases which are divided into three types: phrases with only literal meaning, with only metaphorical meaning, and phrases which can be interpreted as literal or metaphorical ones depending on a context of use. The second data is the FigSen corpus which consists of 1833 short fragments of texts containing at least one phrase from the FigAN data which may have both meanings. The corpus is annotated in two ways. One approach concerns annotation of all adjective-noun phrases. In the second approach, literal or metaphorical senses are assigned to all adjectives and nouns in the data. The paper addresses statistics of data and compares two types of annotation. The corpora were used in experiments of automatic recognition of Polish non-literal adjective noun phrases.</abstract>
       <url hash="375d555a">2020.lrec-1.719</url>
@@ -8823,8 +8823,8 @@
       <title>Automatic Compilation of Resources for Academic Writing and Evaluating with Informal Word Identification and Paraphrasing System</title>
       <author><first>Seid Muhie</first><last>Yimam</last></author>
       <author><first>Gopalakrishnan</first><last>Venkatesh</last></author>
-      <author><first>John</first><last>Lee</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>5896–5904</pages>
       <abstract>We present the first approach to automatically building resources for academic writing. The aim is to build a writing aid system that automatically edits a text so that it better adheres to the academic style of writing. On top of existing academic resources, such as the Corpus of Contemporary American English (COCA) academic Word List, the New Academic Word List, and the Academic Collocation List, we also explore how to dynamically build such resources that would be used to automatically identify informal or non-academic words or phrases. The resources are compiled using different generic approaches that can be extended for different domains and languages. We describe the evaluation of resources with a system implementation. The system consists of an informal word identification (IWI), academic candidate paraphrase generation, and paraphrase ranking components. To generate candidates and rank them in context, we have used the PPDB and WordNet paraphrase resources. We use the Concepts in Context (CoInCO) “All-Words” lexical substitution dataset both for the informal word identification and paraphrase generation experiments. Our informal word identification component achieves an F-1 score of 82%, significantly outperforming a stratified classifier baseline. The main contribution of this work is a domain-independent methodology to build targeted resources for writing aids.</abstract>
       <url hash="754d04c5">2020.lrec-1.722</url>
@@ -8848,8 +8848,8 @@
       <author><first>Pauline</first><last>Haas</last></author>
       <author><first>Richard</first><last>Huyghe</last></author>
       <author><first>Delphine</first><last>Tribout</last></author>
-      <author><first>Marie</first><last>Candito</last></author>
-      <author><first>Benoit</first><last>Crabbé</last></author>
+      <author id="marie-candito"><first>Marie</first><last>Candito</last></author>
+      <author id="benoit-crabbe"><first>Benoit</first><last>Crabbé</last></author>
       <author><first>Vincent</first><last>Segonne</last></author>
       <pages>5912–5918</pages>
       <abstract>French, as many languages, lacks semantically annotated corpus data. Our aim is to provide the linguistic and NLP research communities with a gold standard sense-annotated corpus of French, using WordNet Unique Beginners as semantic tags, thus allowing for interoperability. In this paper, we report on the first phase of the project, which focused on the annotation of common nouns. The resulting dataset consists of more than 12,000 French noun occurrences which were annotated in double blind and adjudicated according to a carefully redefined set of supersenses. The resource is released online under a Creative Commons Licence.</abstract>
@@ -8860,7 +8860,7 @@
     <paper id="725">
       <title>A Formal Analysis of Multimodal Referring Strategies Under Common Ground</title>
       <author><first>Nikhil</first><last>Krishnaswamy</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <pages>5919–5927</pages>
       <abstract>In this paper, we present an analysis of computationally generated mixed-modality definite referring expressions using combinations of gesture and linguistic descriptions. In doing so, we expose some striking formal semantic properties of the interactions between gesture and language, conditioned on the introduction of content into the common ground between the (computational) speaker and (human) viewer, and demonstrate how these formal features can contribute to training better models to predict viewer judgment of referring expressions, and potentially to the generation of more natural and informative referring expressions.</abstract>
       <url hash="f54517f6">2020.lrec-1.725</url>
@@ -8870,7 +8870,7 @@
     <paper id="726">
       <title>Improving Neural Metaphor Detection with Visual Datasets</title>
       <author><first>Gitit</first><last>Kehat</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <pages>5928–5933</pages>
       <abstract>We present new results on Metaphor Detection by using text from visual datasets. Using a straightforward technique for sampling text from Vision-Language datasets, we create a data structure we term a visibility word embedding. We then combine these embeddings in a relatively simple BiLSTM module augmented with contextualized word representations (ELMo), and show improvement over previous state-of-the-art approaches that use more complex neural network architectures and richer linguistic features, for the task of verb classification.</abstract>
       <url hash="45d9eb0c">2020.lrec-1.726</url>
@@ -8896,8 +8896,8 @@
       <author><first>Dmitry</first><last>Ustalov</last></author>
       <author><first>Andrey</first><last>Kutuzov</last></author>
       <author><first>Ekaterina</first><last>Artemova</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <author><first>Alexander</first><last>Panchenko</last></author>
       <pages>5943–5952</pages>
       <abstract>Disambiguation of word senses in context is easy for humans, but is a major challenge for automatic approaches. Sophisticated supervised and knowledge-based models were developed to solve this task. However, (i) the inherent Zipfian distribution of supervised training instances for a given word and/or (ii) the quality of linguistic knowledge representations motivate the development of completely unsupervised and knowledge-free approaches to word sense disambiguation (WSD). They are particularly useful for under-resourced languages which do not have any resources for building either supervised and/or knowledge-based models. In this paper, we present a method that takes as input a standard pre-trained word embedding model and induces a fully-fledged word sense inventory, which can be used for disambiguation in context. We use this method to induce a collection of sense inventories for 158 languages on the basis of the original pre-trained fastText word embeddings by Grave et al., (2018), enabling WSD in these languages. Models and system are available online.</abstract>
@@ -8909,7 +8909,7 @@
       <title>Extraction of Hyponymic Relations in <fixed-case>F</fixed-case>rench with Knowledge-Pattern-Based Word Sketches</title>
       <author><first>Antonio</first><last>San Martín</last></author>
       <author><first>Catherine</first><last>Trekker</last></author>
-      <author><first>Pilar</first><last>León-Araúz</last></author>
+      <author id="pilar-leon-arauz"><first>Pilar</first><last>León-Araúz</last></author>
       <pages>5953–5961</pages>
       <abstract>Hyponymy is the cornerstone of taxonomies and concept hierarchies. However, the extraction of hypernym-hyponym pairs from a corpus can be time-consuming, and reconstructing the hierarchical network of a domain is often an extremely complex process. This paper presents the development and evaluation of the French EcoLexicon Semantic Sketch Grammar (ESSG-fr), a French hyponymic sketch grammar for Sketch Engine based on knowledge patterns. It offers a user-friendly way of extracting hyponymic pairs in the form of word sketches in any user-owned corpus. The ESSG-fr contains three times more hyponymic patterns than its English counterpart and has been tested in a multidisciplinary corpus. It is thus expected to be domain-independent. Moreover, the following methodological innovations have been included in its development: (1) use of English hyponymic patterns in a parallel corpus to find new French patterns; (2) automatic inclusion of the results of the Sketch Engine thesaurus to find new variants of the patterns. As for its evaluation, the ESSG-fr returns 70% valid hyperonyms and hyponyms, measured on 180 extracted pairs of terms in three different domains.</abstract>
       <url hash="5cc85744">2020.lrec-1.729</url>
@@ -8940,7 +8940,7 @@
     </paper>
     <paper id="732">
       <title>One Classifier for All Ambiguous Words: Overcoming Data Sparsity by Utilizing Sense Correlations Across Words</title>
-      <author><first>Prafulla Kumar</first><last>Choubey</last></author>
+      <author id="prafulla-kumar-choubey"><first>Prafulla Kumar</first><last>Choubey</last></author>
       <author><first>Ruihong</first><last>Huang</last></author>
       <pages>5978–5985</pages>
       <abstract>Most supervised word sense disambiguation (WSD) systems build word-specific classifiers by leveraging labeled data. However, when using word-specific classifiers, the sparseness of annotations leads to inferior sense disambiguation performance on less frequently seen words. To combat data sparsity, we propose to learn a single model that derives sense representations and meanwhile enforces congruence between a word instance and its right sense by using both sense-annotated data and lexical resources. The model is shared across words that allows utilizing sense correlations across words, and therefore helps to transfer common disambiguation rules from annotation-rich words to annotation-lean words. Empirical evaluation on benchmark datasets shows that the proposed shared model outperforms the equivalent classifier-based models by 1.7%, 2.5% and 3.8% in F1-score when using GloVe, ELMo and BERT word embeddings respectively.</abstract>
@@ -8964,9 +8964,9 @@
     </paper>
     <paper id="734">
       <title>The <fixed-case>R</fixed-case>ussian <fixed-case>P</fixed-case>rop<fixed-case>B</fixed-case>ank</title>
-      <author><first>Sarah</first><last>Moeller</last></author>
+      <author id="sarah-moeller"><first>Sarah</first><last>Moeller</last></author>
       <author><first>Irina</first><last>Wagner</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Kathryn</first><last>Conger</last></author>
       <author><first>Skatje</first><last>Myers</last></author>
       <pages>5995–6002</pages>
@@ -9011,7 +9011,7 @@
     <paper id="738">
       <title>Annotating a Fable in <fixed-case>I</fixed-case>talian <fixed-case>S</fixed-case>ign <fixed-case>L</fixed-case>anguage (<fixed-case>LIS</fixed-case>)</title>
       <author><first>Serena</first><last>Trolvi</last></author>
-      <author><first>Rodolfo</first><last>Delmonte</last></author>
+      <author id="rodolfo-delmonte"><first>Rodolfo</first><last>Delmonte</last></author>
       <pages>6025–6034</pages>
       <abstract>This paper introduces work carried out for the automatic generation of a written text in Italian starting from glosses of a fable in Italian Sign Language (LIS). The paper gives a brief overview of sign languages (SLs) and some peculiarities of SL fables such as the use of space, the strategy of Role Shift and classifiers. It also presents the annotation of the fable “The Tortoise and the Hare” - signed in LIS and made available by Alba Cooperativa Sociale -, which was annotated manually by first author for her master’s thesis. The annotation was the starting point of a generation process that allowed us to automatically generate a text in Italian starting from LIS glosses. LIS sentences have been transcribed with Italian words into tables on simultaneous layers, each of which contains specific linguistic or non-linguistic pieces of information. In addition, the present work discusses problems encountered in the annotation and generation process.</abstract>
       <url hash="0f0095a0">2020.lrec-1.738</url>
@@ -9021,7 +9021,7 @@
     <paper id="739">
       <title><fixed-case>H</fixed-case>am<fixed-case>N</fixed-case>o<fixed-case>S</fixed-case>y<fixed-case>S</fixed-case>2<fixed-case>S</fixed-case>i<fixed-case>GML</fixed-case>: Translating <fixed-case>H</fixed-case>am<fixed-case>N</fixed-case>o<fixed-case>S</fixed-case>ys Into <fixed-case>S</fixed-case>i<fixed-case>GML</fixed-case></title>
       <author><first>Carolina</first><last>Neves</last></author>
-      <author><first>Luísa</first><last>Coheur</last></author>
+      <author id="luisa-coheur"><first>Luísa</first><last>Coheur</last></author>
       <author><first>Hugo</first><last>Nicolau</last></author>
       <pages>6035–6039</pages>
       <abstract>Sign Languages are visual languages and the main means of communication used by Deaf people. However, the majority of the information available online is presented through written form. Hence, it is not of easy access to the Deaf community. Avatars that can animate sign languages have gained an increase of interest in this area due to their flexibility in the process of generation and edition. Synthetic animation of conversational agents can be achieved through the use of notation systems. HamNoSys is one of these systems, which describes movements of the body through symbols. Its XML-compliant, SiGML, is a machine-readable input of HamNoSys able to animate avatars. Nevertheless, current tools have no freely available open source libraries that allow the conversion from HamNoSys to SiGML. Our goal is to develop a tool of open access, which can perform this conversion independently from other platforms. This system represents a crucial intermediate step in the bigger pipeline of animating signing avatars. Two cases studies are described in order to illustrate different applications of our tool.</abstract>
@@ -9032,7 +9032,7 @@
     <paper id="740">
       <title><fixed-case>D</fixed-case>icta-<fixed-case>S</fixed-case>ign-<fixed-case>LSF</fixed-case>-v2: Remake of a Continuous <fixed-case>F</fixed-case>rench <fixed-case>S</fixed-case>ign <fixed-case>L</fixed-case>anguage Dialogue Corpus and a First Baseline for Automatic Sign Language Processing</title>
       <author><first>Valentin</first><last>Belissen</last></author>
-      <author><first>Annelies</first><last>Braffort</last></author>
+      <author id="annelies-braffort"><first>Annelies</first><last>Braffort</last></author>
       <author><first>Michèle</first><last>Gouiffès</last></author>
       <pages>6040–6048</pages>
       <abstract>While the research in automatic Sign Language Processing (SLP) is growing, it has been almost exclusively focused on recognizing lexical signs, whether isolated or within continuous SL production. However, Sign Languages include many other gestural units like iconic structures, which need to be recognized in order to go towards a true SL understanding. In this paper, we propose a newer version of the publicly available SL corpus Dicta-Sign, limited to its French Sign Language part. Involving 16 different signers, this dialogue corpus was produced with very few constraints on the style and content. It includes lexical and non-lexical annotations over 11 hours of video recording, with 35000 manual units. With the aim of stimulating research in SL understanding, we also provide a baseline for the recognition of lexical signs and non-lexical structures on this corpus. A very compact modeling of a signer is built and a Convolutional-Recurrent Neural Network is trained and tested on Dicta-Sign-LSF-v2, with state-of-the-art results, including the ability to detect iconicity in SL production.</abstract>
@@ -9064,7 +9064,7 @@
     <paper id="743">
       <title><fixed-case>MEDIAPI</fixed-case>-<fixed-case>SKEL</fixed-case> - A 2<fixed-case>D</fixed-case>-Skeleton Video Database of <fixed-case>F</fixed-case>rench <fixed-case>S</fixed-case>ign <fixed-case>L</fixed-case>anguage With Aligned <fixed-case>F</fixed-case>rench Subtitles</title>
       <author><first>Hannah</first><last>Bull</last></author>
-      <author><first>Annelies</first><last>Braffort</last></author>
+      <author id="annelies-braffort"><first>Annelies</first><last>Braffort</last></author>
       <author><first>Michèle</first><last>Gouiffès</last></author>
       <pages>6063–6068</pages>
       <abstract>This paper presents MEDIAPI-SKEL, a 2D-skeleton database of French Sign Language videos aligned with French subtitles. The corpus contains 27 hours of video of body, face and hand keypoints, aligned to subtitles with a vocabulary size of 17k tokens. In contrast to existing sign language corpora such as videos produced under laboratory conditions or translations of TV programs into sign language, this database is constructed using original sign language content largely produced by deaf journalists at the media company Média-Pi. Moreover, the videos are accurately synchronized with French subtitles. We propose three challenges appropriate for this corpus that are related to processing units of signs in context: automatic alignment of text and video, semantic segmentation of sign language, and production of video-text embeddings for cross-modal retrieval. These challenges deviate from the classic task of identifying a limited number of lexical signs in a video stream.</abstract>
@@ -9125,7 +9125,7 @@
       <author><first>Jie</first><last>Gao</last></author>
       <author><first>Sooji</first><last>Han</last></author>
       <author><first>Xingyi</first><last>Song</last></author>
-      <author><first>Fabio</first><last>Ciravegna</last></author>
+      <author id="fabio-ciravegna"><first>Fabio</first><last>Ciravegna</last></author>
       <pages>6094–6105</pages>
       <abstract>Early rumor detection (ERD) on social media platform is very challenging when limited, incomplete and noisy information is available. Most of the existing methods have largely worked on event-level detection that requires the collection of posts relevant to a specific event and relied only on user-generated content. They are not appropriate to detect rumor sources in the very early stages, before an event unfolds and becomes widespread. In this paper, we address the task of ERD at the message level. We present a novel hybrid neural network architecture, which combines a task-specific character-based bidirectional language model and stacked Long Short-Term Memory (LSTM) networks to represent textual contents and social-temporal contexts of input source tweets, for modelling propagation patterns of rumors in the early stages of their development. We apply multi-layered attention models to jointly learn attentive context embeddings over multiple context inputs. Our experiments employ a stringent leave-one-out cross-validation (LOO-CV) evaluation setup on seven publicly available real-life rumor event data sets. Our models achieve state-of-the-art(SoA) performance for detecting unseen rumors on large augmented data which covers more than 12 events and 2,967 rumors. An ablation study is conducted to understand the relative contribution of each component of our proposed model.</abstract>
       <url hash="c7cbc1a7">2020.lrec-1.748</url>
@@ -9147,7 +9147,7 @@
       <title>Searching <fixed-case>B</fixed-case>razilian <fixed-case>T</fixed-case>witter for Signs of Mental Health Issues</title>
       <author><first>Wesley</first><last>Santos</last></author>
       <author><first>Amanda</first><last>Funabashi</last></author>
-      <author><first>Ivandré</first><last>Paraboni</last></author>
+      <author id="ivandre-paraboni"><first>Ivandré</first><last>Paraboni</last></author>
       <pages>6111–6117</pages>
       <abstract>Depression and related mental health issues are often reflected in the language employed by the individuals who suffer from these conditions and, accordingly, research in Natural Language Processing (NLP) and related fields have developed an increasing number of studies devoted to their recognition in social media text. Some of these studies have also attempted to go beyond recognition by focusing on the early signs of these illnesses, and by analysing the users’ publication history over time to potentially prevent further harm. The two kinds of study are of course overlapping, and often make use of supervised machine learning methods based on annotated corpora. However, as in many other fields, existing resources are largely devoted to English NLP, and there is little support for these studies in under resourced languages. To bridge this gap, in this paper we describe the initial steps towards building a novel resource of this kind - a corpus intended to support both the recognition of mental health issues and the temporal analysis of these illnesses - in the Brazilian Portuguese language, and initial results of a number of experiments in text classification addressing both tasks.</abstract>
       <url hash="dd5f2946">2020.lrec-1.750</url>
@@ -9168,7 +9168,7 @@
     </paper>
     <paper id="752">
       <title>An Annotated Social Media Corpus for <fixed-case>G</fixed-case>erman</title>
-      <author><first>Eckhard</first><last>Bick</last></author>
+      <author id="eckhard-bick"><first>Eckhard</first><last>Bick</last></author>
       <pages>6127–6135</pages>
       <abstract>This paper presents the German Twitter section of a large (2 billion word) bilingual Social Media corpus for Hate Speech research, discussing the compilation, pseudonymization and grammatical annotation of the corpus, as well as special linguistic features and peculiarities encountered in the data. Among other things, compounding, accidental and intentional orthographic variation, gendering and the use of emoticons/emojis are addressed in a genre-specific fashion. We present the different layers of linguistic annotation (morphosyntactic, dependencies and semantic types) and explain how a general parser (GerGram) can be made to work on Social Media data, pointing out necessary adaptations and extensions. In an evaluation run on a random cross-section of tweets, the modified parser achieved F-scores of 97% for morphology (fine-grained POS) and 92% for syntax (labeled attachment score). Predictably, performance was twice as good in tweets with standard orthography than in tweets with spelling/casing irregularities or lack of sentence separation, the effect being more marked for morphology than for syntax.</abstract>
       <url hash="ba8a9b50">2020.lrec-1.752</url>
@@ -9213,7 +9213,7 @@
     <paper id="756">
       <title>Optimising <fixed-case>T</fixed-case>witter-based Political Election Prediction with Relevance and<fixed-case>S</fixed-case>entiment Filters</title>
       <author><first>Eric</first><last>Sanders</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <pages>6158–6165</pages>
       <abstract>We study the relation between the number of mentions of political parties in the last weeks before the elections and the election results. In this paper we focus on the Dutch elections of the parliament in 2012 and for the provinces (and the senate) in 2011 and 2015. With raw counts, without adaptations, we achieve a mean absolute error (MAE) of 2.71% for 2011, 2.02% for 2012 and 2.89% for 2015. A set of over 17,000 tweets containing political party names were annotated by at least three annotators per tweet on ten features denoting communicative intent (including the presence of sarcasm, the message’s polarity, the presence of an explicit voting endorsement or explicit voting advice, etc.). The annotations were used to create oracle (gold-standard) filters. Tweets with or without a certain majority annotation are held out from the tweet counts, with the goal of attaining lower MAEs. With a grid search we tested all combinations of filters and their responding MAE to find the best filter ensemble. It appeared that the filters show markedly different behaviour for the three elections and only a small MAE improvement is possible when optimizing on all three elections. Larger improvements for one election are possible, but result in deterioration of the MAE for the other elections.</abstract>
       <url hash="5792d337">2020.lrec-1.756</url>
@@ -9223,7 +9223,7 @@
     <paper id="757">
       <title>A Real-Time System for Credibility on <fixed-case>T</fixed-case>witter</title>
       <author><first>Adrian</first><last>Iftene</last></author>
-      <author><first>Daniela</first><last>Gifu</last></author>
+      <author id="daniela-gifu"><first>Daniela</first><last>Gifu</last></author>
       <author><first>Andrei-Remus</first><last>Miron</last></author>
       <author><first>Mihai-Stefan</first><last>Dudu</last></author>
       <pages>6166–6173</pages>
@@ -9392,7 +9392,7 @@
       <title>Small Town or Metropolis? Analyzing the Relationship between Population Size and Language</title>
       <author><first>Amy</first><last>Rechkemmer</last></author>
       <author><first>Steven</first><last>Wilson</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>6287–6291</pages>
       <abstract>The variance in language used by different cultures has been a topic of study for researchers in linguistics and psychology, but often times, language is compared across multiple countries in order to show a difference in culture. As a geographically large country that is diverse in population in terms of the background and experiences of its citizens, the U.S. also contains cultural differences within its own borders. Using a set of over 2 million posts from distinct Twitter users around the country dating back as far as 2014, we ask the following question: is there a difference in how Americans express themselves online depending on whether they reside in an urban or rural area? We categorize Twitter users as either urban or rural and identify ideas and language that are more commonly expressed in tweets written by one population over the other. We take this further by analyzing how the language from specific cities of the U.S. compares to the language of other cities and by training predictive models to predict whether a user is from an urban or rural area. We publicly release the tweet and user IDs that can be used to reconstruct the dataset for future studies in this direction.</abstract>
       <url hash="55a626bf">2020.lrec-1.771</url>
@@ -9403,7 +9403,7 @@
       <title>Inferring Social Media Users’ Mental Health Status from Multimodal Information</title>
       <author><first>Zhentao</first><last>Xu</last></author>
       <author><first>Verónica</first><last>Pérez-Rosas</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>6292–6299</pages>
       <abstract>Worldwide, an increasing number of people are suffering from mental health disorders such as depression and anxiety. In the United States alone, one in every four adults suffers from a mental health condition, which makes mental health a pressing concern. In this paper, we explore the use of multimodal cues present in social media posts to predict users’ mental health status. Specifically, we focus on identifying social media activity that either indicates a mental health condition or its onset. We collect posts from Flickr and apply a multimodal approach that consists of jointly analyzing language, visual, and metadata cues and their relation to mental health. We conduct several classification experiments aiming to discriminate between (1) healthy users and users affected by a mental health illness; and (2) healthy users and users prone to mental illness. Our experimental results indicate that using multiple modalities can improve the performance of this classification task as compared to the use of one modality at a time, and can provide important cues into a user’s mental status.</abstract>
       <url hash="18b4e839">2020.lrec-1.772</url>
@@ -9477,7 +9477,7 @@
       <title>Evaluating and Improving Child-Directed Automatic Speech Recognition</title>
       <author><first>Eric</first><last>Booth</last></author>
       <author><first>Jake</first><last>Carns</last></author>
-      <author><first>Casey</first><last>Kennington</last></author>
+      <author id="casey-kennington"><first>Casey</first><last>Kennington</last></author>
       <author><first>Nader</first><last>Rafla</last></author>
       <pages>6340–6345</pages>
       <abstract>Speech recognition has seen dramatic improvements in the last decade, though those improvements have focused primarily on adult speech. In this paper, we assess child-directed speech recognition and leverage a transfer learning approach to improve child-directed speech recognition by training the recent DeepSpeech2 model on adult data, then apply additional tuning to varied amounts of child speech data. We evaluate our model using the CMU Kids dataset as well as our own recordings of child-directed prompts. The results from our experiment show that even a small amount of child audio data improves significantly over a baseline of adult-only or child-only trained models. We report a final general Word-Error-Rate of 29% over a baseline of 62% that uses the adult-trained model. Our analyses show that our model adapts quickly using a small amount of data and that the general child model works better than school grade-specific models. We make available our trained model and our data collection tool.</abstract>
@@ -9502,7 +9502,7 @@
       <author><first>Oliver</first><last>Walter</last></author>
       <author><first>Christoph</first><last>Schmidt</last></author>
       <author><first>Sven</first><last>Behnke</last></author>
-      <author><first>Joachim</first><last>Köhler</last></author>
+      <author id="joachim-kohler"><first>Joachim</first><last>Köhler</last></author>
       <pages>6354–6362</pages>
       <abstract>While recent automatic speech recognition systems achieve remarkable performance when large amounts of adequate, high quality annotated speech data is used for training, the same systems often only achieve an unsatisfactory result for tasks in domains that greatly deviate from the conditions represented by the training data. For many real-world applications, there is a lack of sufficient data that can be directly used for training robust speech recognition systems. To address this issue, we propose and investigate an approach that performs a robust acoustic model adaption to a target domain in a cross-lingual, multi-staged manner. Our approach enables the exploitation of large-scale training data from other domains in both the same and other languages. We evaluate our approach using the challenging task of German oral history interviews, where we achieve a relative reduction of the word error rate by more than 30% compared to a model trained from scratch only on the target domain, and 6-7% relative compared to a model trained robustly on 1000 hours of same-language out-of-domain training data.</abstract>
       <url hash="e574454c">2020.lrec-1.780</url>
@@ -9513,7 +9513,7 @@
       <title>Large Corpus of <fixed-case>C</fixed-case>zech Parliament Plenary Hearings</title>
       <author><first>Jonáš</first><last>Kratochvil</last></author>
       <author><first>Peter</first><last>Polák</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>6363–6367</pages>
       <abstract>We present a large corpus of Czech parliament plenary sessions. The corpus consists of approximately 1200 hours of speech data and corresponding text transcriptions. The whole corpus has been segmented to short audio segments making it suitable for both training and evaluation of automatic speech recognition (ASR) systems. The source language of the corpus is Czech, which makes it a valuable resource for future research as only a few public datasets are available in the Czech language. We complement the data release with experiments of two baseline ASR systems trained on the presented data: the more traditional approach implemented in the Kaldi ASRtoolkit which combines hidden Markov models and deep neural networks (NN) and a modern ASR architecture implemented in Jaspertoolkit which uses deep NNs in an end-to-end fashion.</abstract>
       <url hash="ad20b8c7">2020.lrec-1.781</url>
@@ -9549,7 +9549,7 @@
       <author><first>Albert</first><last>Gatt</last></author>
       <author><first>Andrea</first><last>DeMarco</last></author>
       <author><first>Claudia</first><last>Borg</last></author>
-      <author><first>Lonneke</first><last>van der Plas</last></author>
+      <author id="lonneke-van-der-plas"><first>Lonneke</first><last>van der Plas</last></author>
       <author><first>Amanda</first><last>Muscat</last></author>
       <author><first>Ian</first><last>Padovani</last></author>
       <pages>6381–6388</pages>
@@ -9562,7 +9562,7 @@
       <title>Automatic Period Segmentation of Oral <fixed-case>F</fixed-case>rench</title>
       <author><first>Natalia</first><last>Kalashnikova</last></author>
       <author><first>Loïc</first><last>Grobol</last></author>
-      <author><first>Iris</first><last>Eshkol-Taravella</last></author>
+      <author id="iris-eshkol"><first>Iris</first><last>Eshkol-Taravella</last></author>
       <author><first>François</first><last>Delafontaine</last></author>
       <pages>6389–6394</pages>
       <abstract>Natural Language Processing in oral speech segmentation is still looking for a minimal unit to analyze. In this work, we present a comparison of two automatic segmentation methods of macro-syntactic periods which allows to take into account syntactic and prosodic components of speech. We compare the performances of an existing tool Analor (Avanzi, Lacheret-Dujour, Victorri, 2008) developed for automatic segmentation of prosodic periods and of CRF models relying on syntactic and / or prosodic features. We find that Analor tends to divide speech into smaller segments and that CRF models detect larger segments rather than macro-syntactic periods. However, in general CRF models perform better results than Analor in terms of F-measure.</abstract>
@@ -9671,9 +9671,9 @@
       <title>The <fixed-case>SAFE</fixed-case>-<fixed-case>T</fixed-case> Corpus: A New Resource for Simulated Public Safety Communications</title>
       <author><first>Dana</first><last>Delgado</last></author>
       <author><first>Kevin</first><last>Walker</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
-      <author><first>Karen</first><last>Jones</last></author>
-      <author><first>Christopher</first><last>Caruso</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
+      <author id="karen-sparck-jones"><first>Karen</first><last>Jones</last></author>
+      <author id="christopher-caruso"><first>Christopher</first><last>Caruso</last></author>
       <author><first>David</first><last>Graff</last></author>
       <pages>6450–6457</pages>
       <abstract>We introduce a new resource, the SAFE-T (Speech Analysis for Emergency Response Technology) Corpus, designed to simulate first-responder communications by inducing high vocal effort and urgent speech with situational background noise in a game-based collection protocol. Linguistic Data Consortium developed the SAFE-T Corpus to support the NIST (National Institute of Standards and Technology) OpenSAT (Speech Analytic Technologies) evaluation series, whose goal is to advance speech analytic technologies including automatic speech recognition, speech activity detection and keyword search in multiple domains including simulated public safety communications data. The corpus comprises over 300 hours of audio from 115 unique speakers engaged in a collaborative problem-solving activity representative of public safety communications in terms of speech content, noise types and noise levels. Portions of the corpus have been used in the OpenSAT 2019 evaluation and the full corpus will be published in the LDC catalog. We describe the design and implementation of the SAFE-T Corpus collection, discuss the approach of capturing spontaneous speech from study participants through game-based speech collection, and report on the collection results including several challenges associated with the collection.</abstract>
@@ -9711,7 +9711,7 @@
       <author><first>Kallirroi</first><last>Georgila</last></author>
       <author><first>Anton</first><last>Leuski</last></author>
       <author><first>Volodymyr</first><last>Yanov</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <pages>6469–6476</pages>
       <abstract>We evaluate several publicly available off-the-shelf (commercial and research) automatic speech recognition (ASR) systems across diverse dialogue domains (in US-English). Our evaluation is aimed at non-experts with limited experience in speech recognition. Our goal is not only to compare a variety of ASR systems on several diverse data sets but also to measure how much ASR technology has advanced since our previous large-scale evaluations on the same data sets. Our results show that the performance of each speech recognizer can vary significantly depending on the domain. Furthermore, despite major recent progress in ASR technology, current state-of-the-art speech recognizers perform poorly in domains that require special vocabulary and language models, and under noisy conditions. We expect that our evaluation will prove useful to ASR consumers and dialogue system designers.</abstract>
       <url hash="5d81a3f6">2020.lrec-1.797</url>
@@ -9721,7 +9721,7 @@
     <paper id="798">
       <title><fixed-case>CEASR</fixed-case>: A Corpus for Evaluating Automatic Speech Recognition</title>
       <author><first>Malgorzata Anna</first><last>Ulasik</last></author>
-      <author><first>Manuela</first><last>Hürlimann</last></author>
+      <author id="manuela-huerlimann"><first>Manuela</first><last>Hürlimann</last></author>
       <author><first>Fabian</first><last>Germann</last></author>
       <author><first>Esin</first><last>Gedik</last></author>
       <author><first>Fernando</first><last>Benites</last></author>
@@ -9738,7 +9738,7 @@
       <author><first>William</first><last>Havard</last></author>
       <author><first>Mahault</first><last>Garnerin</last></author>
       <author><first>Éric</first><last>Le Ferrand</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <pages>6486–6493</pages>
       <abstract>The CMU Wilderness Multilingual Speech Dataset (Black, 2019) is a newly published multilingual speech dataset based on recorded readings of the New Testament. It provides data to build Automatic Speech Recognition (ASR) and Text-to-Speech (TTS) models for potentially 700 languages. However, the fact that the source content (the Bible) is the same for all the languages is not exploited to date. Therefore, this article proposes to add multilingual links between speech segments in different languages, and shares a large and clean dataset of 8,130 parallel spoken utterances across 8 languages (56 language pairs). We name this corpus MaSS (Multilingual corpus of Sentence-aligned Spoken utterances). The covered languages (Basque, English, Finnish, French, Hungarian, Romanian, Russian and Spanish) allow researches on speech-to-speech alignment as well as on translation for typologically different language pairs. The quality of the final corpus is attested by human evaluation performed on a corpus subset (100 utterances, 8 language pairs). Lastly, we showcase the usefulness of the final product on a bilingual speech retrieval task.</abstract>
       <url hash="7109da08">2020.lrec-1.799</url>
@@ -9753,7 +9753,7 @@
       <author><first>Clara</first><last>Rivera</last></author>
       <author><first>Anna</first><last>Katanova</last></author>
       <author><first>Alexander</first><last>Gutkin</last></author>
-      <author><first>Isin</first><last>Demirsahin</last></author>
+      <author id="isin-demirsahin"><first>Isin</first><last>Demirsahin</last></author>
       <author><first>Cibu</first><last>Johny</last></author>
       <author><first>Martin</first><last>Jansche</last></author>
       <author><first>Supheakmungkol</first><last>Sarin</last></author>
@@ -9767,7 +9767,7 @@
     <paper id="801">
       <title>Crowdsourcing <fixed-case>L</fixed-case>atin <fixed-case>A</fixed-case>merican <fixed-case>S</fixed-case>panish for Low-Resource Text-to-Speech</title>
       <author><first>Adriana</first><last>Guevara-Rukoz</last></author>
-      <author><first>Isin</first><last>Demirsahin</last></author>
+      <author id="isin-demirsahin"><first>Isin</first><last>Demirsahin</last></author>
       <author><first>Fei</first><last>He</last></author>
       <author><first>Shan-Hui Cathy</first><last>Chu</last></author>
       <author><first>Supheakmungkol</first><last>Sarin</last></author>
@@ -9805,7 +9805,7 @@
     </paper>
     <paper id="804">
       <title>Open-source Multi-speaker Corpora of the <fixed-case>E</fixed-case>nglish Accents in the <fixed-case>B</fixed-case>ritish Isles</title>
-      <author><first>Isin</first><last>Demirsahin</last></author>
+      <author id="isin-demirsahin"><first>Isin</first><last>Demirsahin</last></author>
       <author><first>Oddur</first><last>Kjartansson</last></author>
       <author><first>Alexander</first><last>Gutkin</last></author>
       <author><first>Clara</first><last>Rivera</last></author>
@@ -9922,7 +9922,7 @@
       <title>Gender Representation in Open Source Speech Resources</title>
       <author><first>Mahault</first><last>Garnerin</last></author>
       <author><first>Solange</first><last>Rossato</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <pages>6599–6605</pages>
       <abstract>With the rise of artificial intelligence (AI) and the growing use of deep-learning architectures, the question of ethics, transparency and fairness of AI systems has become a central concern within the research community. We address transparency and fairness in spoken language systems by proposing a study about gender representation in speech resources available through the Open Speech and Language Resource platform. We show that finding gender information in open source corpora is not straightforward and that gender balance depends on other corpus characteristics (elicited/non elicited speech, low/high resource language, speech task targeted). The paper ends with recommendations about metadata and gender information for researchers in order to assure better transparency of the speech systems built using such corpora.</abstract>
       <url hash="a35d88a6">2020.lrec-1.813</url>
@@ -9954,8 +9954,8 @@
     </paper>
     <paper id="816">
       <title>Call My Net 2: A New Resource for Speaker Recognition</title>
-      <author><first>Karen</first><last>Jones</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="karen-sparck-jones"><first>Karen</first><last>Jones</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <author><first>Kevin</first><last>Walker</last></author>
       <author><first>Jonathan</first><last>Wright</last></author>
       <pages>6621–6626</pages>
@@ -9968,8 +9968,8 @@
       <title><fixed-case>D</fixed-case>a<fixed-case>CT</fixed-case>o<fixed-case>R</fixed-case>: A Data Collection Tool for the <fixed-case>RELATER</fixed-case> Project</title>
       <author><first>Juan</first><last>Hussain</last></author>
       <author><first>Oussama</first><last>Zenkri</last></author>
-      <author><first>Sebastian</first><last>Stüker</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="sebastian-stuker"><first>Sebastian</first><last>Stüker</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>6627–6632</pages>
       <abstract>Collecting domain-specific data for under-resourced languages, e.g., dialects of languages, can be very expensive, potentially financially prohibitive and taking long time. Moreover, in the case of rarely written languages, the normalization of non-canonical transcription might be another time consuming but necessary task. In order to collect domain-specific data in such circumstances in a time and cost-efficient way, collecting read data of pre-prepared texts is often a viable option. In order to collect data in the domain of psychiatric diagnosis in Arabic dialects for the project RELATER, we have prepared the data collection tool DaCToR for collecting read texts by speakers in the respective countries and districts in which the dialects are spoken. In this paper we describe our tool, its purpose within the project RELATER and the dialects which we have started to collect with the tool.</abstract>
       <url hash="61e54f29">2020.lrec-1.817</url>
@@ -9979,8 +9979,8 @@
     <paper id="818">
       <title>Development and Evaluation of Speech Synthesis Corpora for <fixed-case>L</fixed-case>atvian</title>
       <author><first>Roberts</first><last>Darģis</last></author>
-      <author><first>Peteris</first><last>Paikens</last></author>
-      <author><first>Normunds</first><last>Gruzitis</last></author>
+      <author id="peteris-paikens"><first>Peteris</first><last>Paikens</last></author>
+      <author id="normunds-gruzitis"><first>Normunds</first><last>Gruzitis</last></author>
       <author><first>Ilze</first><last>Auzina</last></author>
       <author><first>Agate</first><last>Akmane</last></author>
       <pages>6633–6637</pages>
@@ -9991,7 +9991,7 @@
     </paper>
     <paper id="819">
       <title>Abstractive Document Summarization without Parallel Data</title>
-      <author><first>Nikola I.</first><last>Nikolov</last></author>
+      <author id="nikola-i-nikolov"><first>Nikola I.</first><last>Nikolov</last></author>
       <author><first>Richard</first><last>Hahnloser</last></author>
       <pages>6638–6644</pages>
       <abstract>Abstractive summarization typically relies on large collections of paired articles and summaries. However, in many cases, parallel data is scarce and costly to obtain. We develop an abstractive summarization system that relies only on large collections of example summaries and non-matching articles. Our approach consists of an unsupervised sentence extractor that selects salient sentences to include in the final summary, as well as a sentence abstractor that is trained on pseudo-parallel and synthetic data, that paraphrases each of the extracted sentences. We perform an extensive evaluation of our method: on the CNN/DailyMail benchmark, on which we compare our approach to fully supervised baselines, as well as on the novel task of automatically generating a press release from a scientific journal article, which is well suited for our system. We show promising performance on both tasks, without relying on any article-summary pairs.</abstract>
@@ -10031,7 +10031,7 @@
     <paper id="823">
       <title>Two Huge Title and Keyword Generation Corpora of Research Articles</title>
       <author><first>Erion</first><last>Çano</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>6663–6671</pages>
       <abstract>Recent developments in sequence-to-sequence learning with neural networks have considerably improved the quality of automatically generated text summaries and document keywords, stipulating the need for even bigger training corpora. Metadata of research articles are usually easy to find online and can be used to perform research on various tasks. In this paper, we introduce two huge datasets for text summarization (OAGSX) and keyword generation (OAGKX) research, containing 34 million and 23 million records, respectively. The data were retrieved from the Open Academic Graph which is a network of research profiles and publications. We carefully processed each record and also tried several extractive and abstractive methods of both tasks to create performance baselines for other researchers. We further illustrate the performance of those methods previewing their outputs. In the near future, we would like to apply topic modeling on the two sets to derive subsets of research articles from more specific disciplines.</abstract>
       <url hash="45f47449">2020.lrec-1.823</url>
@@ -10040,7 +10040,7 @@
     </paper>
     <paper id="824">
       <title>A Multi-level Annotated Corpus of Scientific Papers for Scientific Document Summarization and Cross-document Relation Discovery</title>
-      <author><first>Ahmed</first><last>AbuRa’ed</last></author>
+      <author id="ahmed-aburaed"><first>Ahmed</first><last>AbuRa’ed</last></author>
       <author><first>Horacio</first><last>Saggion</last></author>
       <author><first>Luis</first><last>Chiruzzo</last></author>
       <pages>6672–6679</pages>
@@ -10052,7 +10052,7 @@
     <paper id="825">
       <title>Abstractive Text Summarization based on Language Model Conditioning and Locality Modeling</title>
       <author><first>Dmitrii</first><last>Aksenov</last></author>
-      <author><first>Julian</first><last>Moreno-Schneider</last></author>
+      <author id="julian-moreno-schneider"><first>Julian</first><last>Moreno-Schneider</last></author>
       <author><first>Peter</first><last>Bourgonje</last></author>
       <author><first>Robert</first><last>Schwarzenberg</last></author>
       <author><first>Leonhard</first><last>Hennig</last></author>
@@ -10066,7 +10066,7 @@
     <paper id="826">
       <title>A Data Set for the Analysis of Text Quality Dimensions in Summarization Evaluation</title>
       <author><first>Margot</first><last>Mieskes</last></author>
-      <author><first>Eneldo</first><last>Loza Mencía</last></author>
+      <author id="eneldo-loza-mencia"><first>Eneldo</first><last>Loza Mencía</last></author>
       <author><first>Tim</first><last>Kronsbein</last></author>
       <pages>6690–6699</pages>
       <abstract>Automatic evaluation of summarization focuses on developing a metric to represent the quality of the resulting text. However, text qualityis represented in a variety of dimensions ranging from grammaticality to readability and coherence. In our work, we analyze the depen-dencies between a variety of quality dimensions on automatically created multi-document summaries and which dimensions automaticevaluation metrics such as ROUGE, PEAK or JSD are able to capture. Our results indicate that variants of ROUGE are correlated tovarious quality dimensions and that some automatic summarization methods achieve higher quality summaries than others with respectto individual summary quality dimensions. Our results also indicate that differentiating between quality dimensions facilitates inspectionand fine-grained comparison of summarization methods and its characteristics. We make the data from our two summarization qualityevaluation experiments publicly available in order to facilitate the future development of specialized automatic evaluation methods.</abstract>
@@ -10292,7 +10292,7 @@
       <title><fixed-case>H</fixed-case>ypo<fixed-case>NLI</fixed-case>: Exploring the Artificial Patterns of Hypothesis-only Bias in Natural Language Inference</title>
       <author><first>Tianyu</first><last>Liu</last></author>
       <author><first>Zheng</first><last>Xin</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <author><first>Zhifang</first><last>Sui</last></author>
       <pages>6852–6860</pages>
       <abstract>Many recent studies have shown that for models trained on datasets for natural language inference (NLI), it is possible to make correct predictions by merely looking at the hypothesis while completely ignoring the premise. In this work, we manage to derive adversarial examples in terms of the hypothesis-only bias and explore eligible ways to mitigate such bias. Specifically, we extract various phrases from the hypotheses (artificial patterns) in the training sets, and show that they have been strong indicators to the specific labels. We then figure out ‘hard’ and ‘easy’ instances from the original test sets whose labels are opposite to or consistent with those indications. We also set up baselines including both pretrained models (BERT, RoBerta, XLNet) and competitive non-pretrained models (InferSent, DAM, ESIM). Apart from the benchmark and baselines, we also investigate two debiasing approaches which exploit the artificial pattern modeling to mitigate such hypothesis-only bias: down-sampling and adversarial training. We believe those methods can be treated as competitive baselines in NLI debiasing tasks.</abstract>
@@ -10368,11 +10368,11 @@
     </paper>
     <paper id="853">
       <title>Detecting Negation Cues and Scopes in <fixed-case>S</fixed-case>panish</title>
-      <author><first>Salud María</first><last>Jiménez-Zafra</last></author>
+      <author id="salud-maria-jimenez-zafra"><first>Salud María</first><last>Jiménez-Zafra</last></author>
       <author><first>Roser</first><last>Morante</last></author>
       <author><first>Eduardo</first><last>Blanco</last></author>
       <author><first>María Teresa</first><last>Martín Valdivia</last></author>
-      <author><first>L. Alfonso</first><last>Ureña López</last></author>
+      <author id="l-alfonso-urena-lopez"><first>L. Alfonso</first><last>Ureña López</last></author>
       <pages>6902–6911</pages>
       <abstract>In this work we address the processing of negation in Spanish. We first present a machine learning system that processes negation in Spanish. Specifically, we focus on two tasks: i) negation cue detection and ii) scope identification. The corpus used in the experimental framework is the SFU Corpus. The results for cue detection outperform state-of-the-art results, whereas for scope detection this is the first system that performs the task for Spanish. Moreover, we provide a qualitative error analysis aimed at understanding the limitations of the system and showing which negation cues and scopes are straightforward to predict automatically, and which ones are challenging.</abstract>
       <url hash="63a3022c">2020.lrec-1.853</url>
@@ -10398,9 +10398,9 @@
       <author><first>Matthew</first><last>Coole</last></author>
       <author><first>Ignatius</first><last>Ezeani</last></author>
       <author><first>Sheryl</first><last>Prentice</last></author>
-      <author><first>Nancy</first><last>Ide</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
       <author><first>Jo</first><last>Knight</last></author>
-      <author><first>Scott</first><last>Piao</last></author>
+      <author id="scott-s-l-piao"><first>Scott</first><last>Piao</last></author>
       <author><first>John</first><last>Mariani</last></author>
       <author><first>Paul</first><last>Rayson</last></author>
       <author><first>Keith</first><last>Suderman</last></author>
@@ -10428,7 +10428,7 @@
       <author><first>Jerry Alan</first><last>Fails</last></author>
       <author><first>Sole</first><last>Pera</last></author>
       <author><first>Katherine</first><last>Wright</last></author>
-      <author><first>Casey</first><last>Kennington</last></author>
+      <author id="casey-kennington"><first>Casey</first><last>Kennington</last></author>
       <pages>6937–6946</pages>
       <abstract>For help with their spelling errors, children often turn to spellcheckers integrated in software applications like word processors and search engines. However, existing spellcheckers are usually tuned to the needs of traditional users (i.e., adults) and generally prove unsatisfactory for children. Motivated by this issue, we introduce KidSpell, an English spellchecker oriented to the spelling needs of children. KidSpell applies (i) an encoding strategy for mapping both misspelled words and spelling suggestions to their phonetic keys and (ii) a selection process that prioritizes candidate spelling suggestions that closely align with the misspelled word based on their respective keys. To assess the effectiveness of, we compare the model’s performance against several popular, mainstream spellcheckers in a number of offline experiments using existing and novel datasets. The results of these experiments show that KidSpell outperforms existing spellcheckers, as it accurately prioritizes relevant spelling corrections when handling misspellings generated by children in both essay writing and online search tasks. As a byproduct of our study, we create two new datasets comprised of spelling errors generated by children from hand-written essays and web search inquiries, which we make available to the research community.</abstract>
       <url hash="08bc69b0">2020.lrec-1.857</url>
@@ -10439,10 +10439,10 @@
       <title><fixed-case>T</fixed-case>hai<fixed-case>LMC</fixed-case>ut: Unsupervised Pretraining for <fixed-case>T</fixed-case>hai Word Segmentation</title>
       <author><first>Suteera</first><last>Seeha</last></author>
       <author><first>Ivan</first><last>Bilan</last></author>
-      <author><first>Liliana</first><last>Mamani Sanchez</last></author>
+      <author id="liliana-mamani-sanchez"><first>Liliana</first><last>Mamani Sanchez</last></author>
       <author><first>Johannes</first><last>Huber</last></author>
       <author><first>Michael</first><last>Matuschek</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>6947–6957</pages>
       <abstract>We propose ThaiLMCut, a semi-supervised approach for Thai word segmentation which utilizes a bi-directional character language model (LM) as a way to leverage useful linguistic knowledge from unlabeled data. After the language model is trained on substantial unlabeled corpora, the weights of its embedding and recurrent layers are transferred to a supervised word segmentation model which continues fine-tuning them on a word segmentation task. Our experimental results demonstrate that applying the LM always leads to a performance gain, especially when the amount of labeled data is small. In such cases, the F1 Score increased by up to 2.02%. Even on abig labeled dataset, a small improvement gain can still be obtained. The approach has also shown to be very beneficial for out-of-domain settings with a gain in F1 Score of up to 3.13%. Finally, we show that ThaiLMCut can outperform other open source state-of-the-art models achieving an F1 Score of 98.78% on the standard benchmark, InterBEST2009.</abstract>
       <url hash="a76f2ae2">2020.lrec-1.858</url>
@@ -10454,7 +10454,7 @@
       <author><first>Reem</first><last>Alatrash</last></author>
       <author><first>Dominik</first><last>Schlechtweg</last></author>
       <author><first>Jonas</first><last>Kuhn</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>6958–6966</pages>
       <abstract>Modelling language change is an increasingly important area of interest within the fields of sociolinguistics and historical linguistics. In recent years, there has been a growing number of publications whose main concern is studying changes that have occurred within the past centuries. The Corpus of Historical American English (COHA) is one of the most commonly used large corpora in diachronic studies in English. This paper describes methods applied to the downloadable version of the COHA corpus in order to overcome its main limitations, such as inconsistent lemmas and malformed tokens, without compromising its qualitative and distributional properties. The resulting corpus CCOHA contains a larger number of cleaned word tokens which can offer better insights into language change and allow for a larger variety of tasks to be performed.</abstract>
       <url hash="89e0d646">2020.lrec-1.859</url>
@@ -10464,7 +10464,7 @@
     <paper id="860">
       <title>Outbound Translation User Interface Ptakopět: A Pilot Study</title>
       <author><first>Vilém</first><last>Zouhar</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>6967–6975</pages>
       <abstract>It is not uncommon for Internet users to have to produce a text in a foreign language they have very little knowledge of and are unable to verify the translation quality. We call the task “outbound translation” and explore it by introducing an open-source modular system Ptakopět. Its main purpose is to inspect human interaction with MT systems enhanced with additional subsystems, such as backward translation and quality estimation. We follow up with an experiment on (Czech) human annotators tasked to produce questions in a language they do not speak (German), with the help of Ptakopět. We focus on three real-world use cases (communication with IT support, describing administrative issues and asking encyclopedic questions) from which we gain insight into different strategies users take when faced with outbound translation tasks. Round trip translation is known to be unreliable for evaluating MT systems but our experimental evaluation documents that it works very well for users, at least on MT systems of mid-range quality.</abstract>
       <url hash="6e0dcbb2">2020.lrec-1.860</url>
@@ -10475,7 +10475,7 @@
       <title><fixed-case>S</fixed-case>eshat: a Tool for Managing and Verifying Annotation Campaigns of Audio Data</title>
       <author><first>Hadrien</first><last>Titeux</last></author>
       <author><first>Rachid</first><last>Riad</last></author>
-      <author><first>Xuan-Nga</first><last>Cao</last></author>
+      <author id="xuan-nga-cao"><first>Xuan-Nga</first><last>Cao</last></author>
       <author><first>Nicolas</first><last>Hamilakis</last></author>
       <author><first>Kris</first><last>Madden</last></author>
       <author><first>Alejandrina</first><last>Cristia</last></author>
@@ -10643,9 +10643,9 @@
       <author><first>Yutaka</first><last>Yagi</last></author>
       <author><first>Takenori</first><last>Nakamura</last></author>
       <author><first>Masayuki</first><last>Asahara</last></author>
-      <author><first>Kikuo</first><last>Maekawa</last></author>
+      <author id="kikuo-maekawa"><first>Kikuo</first><last>Maekawa</last></author>
       <author><first>Toshinobu</first><last>Ogiso</last></author>
-      <author><first>Hanae</first><last>Koiso</last></author>
+      <author id="hanae-koiso"><first>Hanae</first><last>Koiso</last></author>
       <author><first>Kumiko</first><last>Sakoda</last></author>
       <author><first>Nobuko</first><last>Kibe</last></author>
       <pages>7077–7083</pages>
@@ -10691,7 +10691,7 @@
       <author><first>Ramy</first><last>Eskander</last></author>
       <author><first>Francesca</first><last>Callejas</last></author>
       <author><first>Elizabeth</first><last>Nichols</last></author>
-      <author><first>Judith</first><last>Klavans</last></author>
+      <author id="judith-l-klavans"><first>Judith</first><last>Klavans</last></author>
       <author><first>Smaranda</first><last>Muresan</last></author>
       <pages>7112–7122</pages>
       <abstract>Computational morphological segmentation has been an active research topic for decades as it is beneficial for many natural language processing tasks. With the high cost of manually labeling data for morphology and the increasing interest in low-resource languages, unsupervised morphological segmentation has become essential for processing a typologically diverse set of languages, whether high-resource or low-resource. In this paper, we present and release MorphAGram, a publicly available framework for unsupervised morphological segmentation that uses Adaptor Grammars (AG) and is based on the work presented by Eskander et al. (2016). We conduct an extensive quantitative and qualitative evaluation of this framework on 12 languages and show that the framework achieves state-of-the-art results across languages of different typologies (from fusional to polysynthetic and from high-resource to low-resource).</abstract>
@@ -10703,7 +10703,7 @@
       <title><fixed-case>CTAP</fixed-case> for <fixed-case>I</fixed-case>talian: Integrating Components for the Analysis of <fixed-case>I</fixed-case>talian into a Multilingual Linguistic Complexity Analysis Tool</title>
       <author><first>Nadezda</first><last>Okinina</last></author>
       <author><first>Jennifer-Carmen</first><last>Frey</last></author>
-      <author><first>Zarah</first><last>Weiss</last></author>
+      <author id="zarah-weiss"><first>Zarah</first><last>Weiss</last></author>
       <pages>7123–7131</pages>
       <abstract>Linguistic complexity research being a very actively developing field, an increasing number of text analysis tools are created that use natural language processing techniques for the automatic extraction of quantifiable measures of linguistic complexity. While most tools are designed to analyse only one language, the CTAP open source linguistic complexity measurement tool is capable of processing multiple languages, making cross-lingual comparisons possible. Although it was originally developed for English, the architecture has been ex-tended to support multi-lingual analyses. Here we present the Italian component of CTAP, describe its implementation and compare it to the existing linguistic complexity tools for Italian. Offering general text length statistics and features for lexical, syntactic, and morpho-syntactic complexity (including measures of lexical frequency, lexical diversity, lexical and syntactical variation, part-of-speech density), CTAP is currently the most comprehensive linguistic complexity measurement tool for Italian and the only one allowing the comparison of Italian texts to multiple other languages within one tool.</abstract>
       <url hash="cc013ee5">2020.lrec-1.880</url>
@@ -10713,7 +10713,7 @@
     <paper id="881">
       <title>Do you Feel Certain about your Annotation? A Web-based Semantic Frame Annotation Tool Considering Annotators’ Concerns and Behaviors</title>
       <author><first>Regina</first><last>Stodden</last></author>
-      <author><first>Behrang</first><last>QasemiZadeh</last></author>
+      <author id="behrang-qasemizadeh"><first>Behrang</first><last>QasemiZadeh</last></author>
       <author><first>Laura</first><last>Kallmeyer</last></author>
       <pages>7132–7139</pages>
       <abstract>In this system demonstration paper, we present an open-source web-based application with a responsive design for modular semantic frame annotation (SFA). Besides letting experienced and inexperienced users do suggestion-based and slightly-controlled annotations, the system keeps track of the time and changes during the annotation process and stores the users’ confidence with the current annotation. This collected metadata can be used to get insights regarding the difficulty of an annotation with the same type or frame or can be used as an input of an annotation cost measurement for an active learning algorithm. The tool was already used to build a manually annotated corpus with semantic frames and its arguments for task 2 of SemEval 2019 regarding unsupervised lexical frame induction (QasemiZadeh et al., 2019). Although English sentences from the Wall Street Journal corpus of the Penn Treebank were annotated for this task, it is also possible to use the proposed tool for the annotation of sentences in other languages.</abstract>
@@ -10738,7 +10738,7 @@
       <author><first>Andrea</first><last>Cimino</last></author>
       <author><first>Felice</first><last>Dell’Orletta</last></author>
       <author><first>Giulia</first><last>Venturi</last></author>
-      <author><first>Simonetta</first><last>Montemagni</last></author>
+      <author id="simonetta-montemagni"><first>Simonetta</first><last>Montemagni</last></author>
       <pages>7145–7151</pages>
       <abstract>In this paper, we introduce Profiling–UD, a new text analysis tool inspired to the principles of linguistic profiling that can support language variation research from different perspectives. It allows the extraction of more than 130 features, spanning across different levels of linguistic description. Beyond the large number of features that can be monitored, a main novelty of Profiling–UD is that it has been specifically devised to be multilingual since it is based on the Universal Dependencies framework. In the second part of the paper, we demonstrate the effectiveness of these features in a number of theoretical and applicative studies in which they were successfully used for text and author profiling.</abstract>
       <url hash="1f48b984">2020.lrec-1.883</url>
@@ -10779,7 +10779,7 @@
     </paper>
     <paper id="887">
       <title><fixed-case>C</fixed-case>o<fixed-case>C</fixed-case>o: A Tool for Automatically Assessing Conceptual Complexity of Texts</title>
-      <author><first>Sanja</first><last>Stajner</last></author>
+      <author id="sanja-stajner"><first>Sanja</first><last>Stajner</last></author>
       <author><first>Sergiu</first><last>Nisioi</last></author>
       <author><first>Ioana</first><last>Hulpuș</last></author>
       <pages>7179–7186</pages>
@@ -10841,7 +10841,7 @@
     </paper>
     <paper id="892">
       <title>Contemplata, a Free Platform for Constituency Treebank Annotation</title>
-      <author><first>Jakub</first><last>Waszczuk</last></author>
+      <author id="jakub-waszczuk"><first>Jakub</first><last>Waszczuk</last></author>
       <author><first>Ilaine</first><last>Wang</last></author>
       <author><first>Jean-Yves</first><last>Antoine</last></author>
       <author><first>Anaïs</first><last>Halftermeyer</last></author>
@@ -10856,8 +10856,8 @@
       <author><first>Kyeongmin</first><last>Rim</last></author>
       <author><first>Kelley</first><last>Lynch</last></author>
       <author><first>Marc</first><last>Verhagen</last></author>
-      <author><first>Nancy</first><last>Ide</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <pages>7230–7237</pages>
       <abstract>Promoting interoperrable computational linguistics (CL) and natural language processing (NLP) application platforms and interchange-able data formats have contributed improving discoverabilty and accessbility of the openly available NLP software. In this paper, wediscuss the enhanced data visualization capabilities that are also enabled by inter-operating NLP pipelines and interchange formats. For adding openly available visualization tools and graphical annotation tools to the Language Applications Grid (LAPPS Grid) andComputational Linguistics Applications for Multimedia Services (CLAMS) toolboxes, we have developed interchange formats that cancarry annotations and metadata for text and audiovisual source data. We descibe those data formats and present case studies where wesuccessfully adopt open-source visualization tools and combine them with CL tools.</abstract>
       <url hash="1c39c15a">2020.lrec-1.893</url>
@@ -10886,7 +10886,7 @@
       <author><first>Robson</first><last>Fidalgo</last></author>
       <author><first>Adrian</first><last>Chifu</last></author>
       <author><first>Bernard</first><last>Espinasse</last></author>
-      <author><first>Sébastien</first><last>Fournier</last></author>
+      <author id="sebastien-fournier"><first>Sébastien</first><last>Fournier</last></author>
       <pages>7244–7251</pages>
       <abstract>Natural Language Processing (NLP) of textual data is usually broken down into a sequence of several subtasks, where the output of one the subtasks becomes the input to the following one, which constitutes an NLP pipeline. Many third-party NLP tools are currently available, each performing distinct NLP subtasks. However, it is difficult to integrate several NLP toolkits into a pipeline due to many problems, including different input/output representations or formats, distinct programming languages, and tokenization issues. This paper presents DeepNLPF, a framework that enables easy integration of third-party NLP tools, allowing the user to preprocess natural language texts at lexical, syntactic, and semantic levels. The proposed framework also provides an API for complete pipeline customization including the definition of input/output formats, integration plugin management, transparent ultiprocessing execution strategies, corpus-level statistics, and database persistence. Furthermore, the DeepNLPF user-friendly GUI allows its use even by a non-expert NLP user. We conducted runtime performance analysis showing that DeepNLPF not only easily integrates existent NLP toolkits but also reduces significant runtime processing compared to executing the same NLP pipeline in a sequential manner.</abstract>
       <url hash="338e0767">2020.lrec-1.895</url>
diff --git a/data/xml/2020.lt4gov.xml b/data/xml/2020.lt4gov.xml
index 9dc3e88559..afab9bf3c1 100644
--- a/data/xml/2020.lt4gov.xml
+++ b/data/xml/2020.lt4gov.xml
@@ -45,7 +45,7 @@
     <paper id="3">
       <title>Enhancing Job Searches in <fixed-case>M</fixed-case>exico City with Language Technologies</title>
       <author><first>Gerardo</first><last>Sierra Martínez</last></author>
-      <author><first>Gemma</first><last>Bel-Enguix</last></author>
+      <author id="gemma-bel-enguix"><first>Gemma</first><last>Bel-Enguix</last></author>
       <author><first>Helena</first><last>Gómez-Adorno</last></author>
       <author><first>Juan Manuel</first><last>Torres Moreno</last></author>
       <author><first>Tonatiuh</first><last>Hernández-García</last></author>
@@ -65,7 +65,7 @@
       <author><first>Ioanna</first><last>Grypari</last></author>
       <author><first>Dimitris</first><last>Pappas</last></author>
       <author><first>Natalia</first><last>Manola</last></author>
-      <author><first>Haris</first><last>Papageorgiou</last></author>
+      <author id="harris-papageorgiou"><first>Haris</first><last>Papageorgiou</last></author>
       <pages>22–27</pages>
       <abstract>Cat. 2 Show-case: We present the Data4Impact (D4I) platform, a novel end-to-end system for evidence-based, timely and accurate monitoring and evaluation of research and innovation (R&amp;I) activities. Using the latest technological advances in Human Language Technology (HLT) and our data-driven methodology, we build a novel set of indicators in order to track funded projects and their impact on science, the economy and the society as a whole, during and after the project life-cycle. We develop our methodology by targeting Health-related EC projects from 2007 to 2019 to produce solutions that meet the needs of stakeholders (mainly policy-makers and research funders). Various D4I text analytics workflows process datasets and their metadata, extract valuable insights and estimate intermediate results and metrics, culminating in a set of robust indicators that the users can interact with through our dashboard, the D4I Monitor (available at monitor.data4impact.eu). Therefore, our approach, which can be generalized to different contexts, is multidimensional (technology, tools, indicators, dashboard) and the resulting system can provide an innovative solution for public administrators in their policy-making needs related to RDI funding allocation.</abstract>
       <url hash="9c318a57">2020.lt4gov-1.4</url>
diff --git a/data/xml/2020.lt4hala.xml b/data/xml/2020.lt4hala.xml
index 354421b778..a97d5cdc74 100644
--- a/data/xml/2020.lt4hala.xml
+++ b/data/xml/2020.lt4hala.xml
@@ -3,7 +3,7 @@
   <volume id="1" type="proceedings">
     <meta>
       <booktitle>Proceedings of LT4HALA 2020 - 1st Workshop on Language Technologies for Historical and Ancient Languages</booktitle>
-      <editor><first>Rachele</first><last>Sprugnoli</last></editor>
+      <editor id="rachele-sprugnoli"><first>Rachele</first><last>Sprugnoli</last></editor>
       <editor><first>Marco</first><last>Passarotti</last></editor>
       <publisher>European Language Resources Association (ELRA)</publisher>
       <address>Marseille, France</address>
@@ -59,7 +59,7 @@
     <paper id="5">
       <title>Computerized Forward Reconstruction for Analysis in Diachronic Phonology, and <fixed-case>L</fixed-case>atin to <fixed-case>F</fixed-case>rench Reflex Prediction</title>
       <author><first>Clayton</first><last>Marr</last></author>
-      <author><first>David R.</first><last>Mortensen</last></author>
+      <author id="david-r-mortensen"><first>David R.</first><last>Mortensen</last></author>
       <pages>28–36</pages>
       <abstract>Traditionally, historical phonologists have relied on tedious manual derivations to calibrate the sequences of sound changes that shaped the phonological evolution of languages. However, humans are prone to errors, and cannot track thousands of parallel word derivations in any efficient manner. We propose to instead automatically derive each lexical item in parallel, and we demonstrate forward reconstruction as both a computational task with metrics to optimize, and as an empirical tool for inquiry. For this end we present DiaSim, a user-facing application that simulates “cascades” of diachronic developments over a language’s lexicon and provides diagnostics for “debugging” those cascades. We test our methodology on a Latin-to-French reflex prediction task, using a newly compiled dataset FLLex with 1368 paired Latin/French forms. We also present, FLLAPS, which maps 310 Latin reflexes through five stages until Modern French, derived from Pope (1934)’s sound tables. Our publicly available rule cascades include the baselines BaseCLEF and BaseCLEF*, representing the received view of Latin to French development, and DiaCLEF, build by incremental corrections to BaseCLEF aided by DiaSim’s diagnostics. DiaCLEF vastly outperforms the baselines, improving final accuracy on FLLex from 3.2%to 84.9%, and similar improvements across FLLAPS’ stages.</abstract>
       <url hash="cb49a06c">2020.lt4hala-1.5</url>
@@ -126,8 +126,8 @@
     <paper id="11">
       <title>Word Probability Findings in the <fixed-case>V</fixed-case>oynich Manuscript</title>
       <author><first>Colin</first><last>Layfield</last></author>
-      <author><first>Lonneke</first><last>van der Plas</last></author>
-      <author><first>Michael</first><last>Rosner</last></author>
+      <author id="lonneke-van-der-plas"><first>Lonneke</first><last>van der Plas</last></author>
+      <author id="michael-rosner"><first>Michael</first><last>Rosner</last></author>
       <author><first>John</first><last>Abela</last></author>
       <pages>74–78</pages>
       <abstract>The Voynich Manuscript has baffled scholars for centuries. Some believe the elaborate 15th century codex to be a hoax whilst others believe it is a real medieval manuscript whose contents are as yet unknown. In this paper, we provide additional evidence that the text of the manuscript displays the hallmarks of a proper natural language with respect to the relationship between word probabilities and (i) average information per subword segment and (ii) the relative positioning of consecutive subword segments necessary to uniquely identify words of different probabilities.</abstract>
@@ -138,7 +138,7 @@
     <paper id="12">
       <title>Comparing Statistical and Neural Models for Learning Sound Correspondences</title>
       <author><first>Clémentine</first><last>Fourrier</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <pages>79–83</pages>
       <abstract>Cognate prediction and proto-form reconstruction are key tasks in computational historical linguistics that rely on the study of sound change regularity. Solving these tasks appears to be very similar to machine translation, though methods from that field have barely been applied to historical linguistics. Therefore, in this paper, we investigate the learnability of sound correspondences between a proto-language and daughter languages for two machine-translation-inspired models, one statistical, the other neural. We first carry out our experiments on plausible artificial languages, without noise, in order to study the role of each parameter on the algorithms respective performance under almost perfect conditions. We then study real languages, namely Latin, Italian and Spanish, to see if those performances generalise well. We show that both model types manage to learn sound changes despite data scarcity, although the best performing model type depends on several parameters such as the size of the training data, the ambiguity, and the prediction direction.</abstract>
       <url hash="50e462e3">2020.lt4hala-1.12</url>
@@ -160,7 +160,7 @@
     </paper>
     <paper id="14">
       <title><fixed-case>L</fixed-case>atin-<fixed-case>S</fixed-case>panish Neural Machine Translation: from the <fixed-case>B</fixed-case>ible to Saint Augustine</title>
-      <author><first>Eva</first><last>Martínez Garcia</last></author>
+      <author id="eva-martinez-garcia"><first>Eva</first><last>Martínez Garcia</last></author>
       <author><first>Álvaro</first><last>García Tejedor</last></author>
       <pages>94–99</pages>
       <abstract>Although there are several sources where to find historical texts, they usually are available in the original language that makes them generally inaccessible. This paper presents the development of state-of-the-art Neural Machine Systems for the low-resourced Latin-Spanish language pair. First, we build a Transformer-based Machine Translation system on the Bible parallel corpus. Then, we build a comparable corpus from Saint Augustine texts and their translations. We use this corpus to study the domain adaptation case from the Bible texts to Saint Augustine’s works. Results show the difficulties of handling a low-resourced language as Latin. First, we noticed the importance of having enough data, since the systems do not achieve high BLEU scores. Regarding domain adaptation, results show how using in-domain data helps systems to achieve a better quality translation. Also, we observed that it is needed a higher amount of data to perform an effective vocabulary extension that includes in-domain vocabulary.</abstract>
@@ -187,7 +187,7 @@
       <title>Overview of the <fixed-case>E</fixed-case>va<fixed-case>L</fixed-case>atin 2020 Evaluation Campaign</title>
       <author><first>Rachele</first><last>Sprugnoli</last></author>
       <author><first>Marco</first><last>Passarotti</last></author>
-      <author><first>Flavio Massimiliano</first><last>Cecchini</last></author>
+      <author id="flavio-massimiliano-cecchini"><first>Flavio Massimiliano</first><last>Cecchini</last></author>
       <author><first>Matteo</first><last>Pellegrini</last></author>
       <pages>105–110</pages>
       <abstract>This paper describes the first edition of EvaLatin, a campaign totally devoted to the evaluation of NLP tools for Latin. The two shared tasks proposed in EvaLatin 2020, i. e. Lemmatization and Part-of-Speech tagging, are aimed at fostering research in the field of language technologies for Classical languages. The shared dataset consists of texts taken from the Perseus Digital Library, processed with UDPipe models and then manually corrected by Latin experts. The training set includes only prose texts by Classical authors. The test set, alongside with prose texts by the same authors represented in the training set, also includes data relative to poetry and to the Medieval period. This also allows us to propose the Cross-genre and Cross-time subtasks for each task, in order to evaluate the portability of NLP tools for Latin across different genres and time periods. The results obtained by the participants for each task and subtask are presented and discussed.</abstract>
diff --git a/data/xml/2020.mmw.xml b/data/xml/2020.mmw.xml
index 8b062093bc..c1bec5f118 100644
--- a/data/xml/2020.mmw.xml
+++ b/data/xml/2020.mmw.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the LREC 2020 Workshop on Multimodal Wordnets (MMW2020)</booktitle>
       <editor><first>Thierry</first><last>Declerk</last></editor>
       <editor><first>Itziar</first><last>Gonzalez-Dios</last></editor>
-      <editor><first>German</first><last>Rigau</last></editor>
+      <editor id="german-rigau"><first>German</first><last>Rigau</last></editor>
       <publisher>The European Language Resources Association (ELRA)</publisher>
       <address>Marseille, France</address>
       <month>May</month>
@@ -43,7 +43,7 @@
     </paper>
     <paper id="3">
       <title><fixed-case>E</fixed-case>nglish <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et 2020: Improving and Extending a <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et for <fixed-case>E</fixed-case>nglish using an Open-Source Methodology</title>
-      <author><first>John Philip</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John Philip</first><last>McCrae</last></author>
       <author><first>Alexandre</first><last>Rademaker</last></author>
       <author><first>Ewa</first><last>Rudnicka</last></author>
       <author><first>Francis</first><last>Bond</last></author>
diff --git a/data/xml/2020.msr.xml b/data/xml/2020.msr.xml
index e53d1889c5..e3891f9235 100644
--- a/data/xml/2020.msr.xml
+++ b/data/xml/2020.msr.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2020-11-29" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Third Workshop on Multilingual Surface Realisation</booktitle>
-      <editor><first>Anya</first><last>Belz</last></editor>
+      <editor id="anja-belz"><first>Anya</first><last>Belz</last></editor>
       <editor><first>Bernd</first><last>Bohnet</last></editor>
       <editor><first>Thiago Castro</first><last>Ferreira</last></editor>
       <editor><first>Yvette</first><last>Graham</last></editor>
@@ -24,7 +24,7 @@
       <author><first>Simon</first><last>Mille</last></author>
       <author><first>Anya</first><last>Belz</last></author>
       <author><first>Bernd</first><last>Bohnet</last></author>
-      <author><first>Thiago</first><last>Castro Ferreira</last></author>
+      <author id="thiago-castro-ferreira"><first>Thiago</first><last>Castro Ferreira</last></author>
       <author><first>Yvette</first><last>Graham</last></author>
       <author><first>Leo</first><last>Wanner</last></author>
       <pages>1–20</pages>
@@ -73,7 +73,7 @@
     </paper>
     <paper id="6">
       <title><fixed-case>NILC</fixed-case> at <fixed-case>SR</fixed-case>’20: Exploring Pre-Trained Models in Surface Realisation</title>
-      <author><first>Marco Antonio</first><last>Sobrevilla Cabezudo</last></author>
+      <author id="marco-antonio-sobrevilla-cabezudo"><first>Marco Antonio</first><last>Sobrevilla Cabezudo</last></author>
       <author><first>Thiago</first><last>Pardo</last></author>
       <pages>50–56</pages>
       <abstract>This paper describes the submission by the NILC Computational Linguistics research group of the University of S ̃ao Paulo/Brazil to the English Track 2 (closed sub-track) at the Surface Realisation Shared Task 2020. The success of the current pre-trained models like BERT or GPT-2 in several tasks is well-known, however, this is not the case for data-to-text generation tasks and just recently some initiatives focused on it. This way, we explore how a pre-trained model (GPT-2) performs on the UD-to-text generation task. In general, the achieved results were poor, but there are some interesting ideas to explore. Among the learned lessons we may note that it is necessary to study strategies to represent UD inputs and to introduce structural knowledge into these pre-trained models.</abstract>
diff --git a/data/xml/2020.multilingualbio.xml b/data/xml/2020.multilingualbio.xml
index 18fbcb302d..bb91183398 100644
--- a/data/xml/2020.multilingualbio.xml
+++ b/data/xml/2020.multilingualbio.xml
@@ -29,7 +29,7 @@
       <title>Building a <fixed-case>N</fixed-case>orwegian Lexical Resource for Medical Entity Recognition</title>
       <author><first>Ildiko</first><last>Pilan</last></author>
       <author><first>Pål H.</first><last>Brekke</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <pages>9–14</pages>
       <abstract>We present a large Norwegian lexical resource of categorized medical terms. The resource, which merges information from large medical databases, contains over 56,000 entries, including automatically mapped terms from a Norwegian medical dictionary. We describe the methodology behind this automatic dictionary entry mapping based on keywords and suffixes and further present the results of a manual evaluation performed on a subset by a domain expert. The evaluation indicated that ca. 80% of the mappings were correct.</abstract>
       <url hash="57347173">2020.multilingualbio-1.2</url>
@@ -38,7 +38,7 @@
     </paper>
     <paper id="3">
       <title>Localising the Clinical Terminology <fixed-case>SNOMED</fixed-case> <fixed-case>CT</fixed-case> by Semi-automated Creation of a <fixed-case>G</fixed-case>erman Interface Vocabulary</title>
-      <author><first>Stefan</first><last>Schulz</last></author>
+      <author id="stefan-schulz"><first>Stefan</first><last>Schulz</last></author>
       <author><first>Larissa</first><last>Hammer</last></author>
       <author><first>David</first><last>Hashemian-Nik</last></author>
       <author><first>Markus</first><last>Kreuzthaler</last></author>
@@ -64,8 +64,8 @@
       <title>Transfer learning applied to text classification in <fixed-case>S</fixed-case>panish radiological reports</title>
       <author><first>Pilar</first><last>López Úbeda</last></author>
       <author><first>Manuel Carlos</first><last>Díaz-Galiano</last></author>
-      <author><first>L. Alfonso</first><last>Urena Lopez</last></author>
-      <author><first>Maite</first><last>Martin</last></author>
+      <author id="l-alfonso-urena-lopez"><first>L. Alfonso</first><last>Urena Lopez</last></author>
+      <author id="m-teresa-martin-valdivia"><first>Maite</first><last>Martin</last></author>
       <author><first>Teodoro</first><last>Martín-Noguerol</last></author>
       <author><first>Antonio</first><last>Luna</last></author>
       <pages>29–32</pages>
diff --git a/data/xml/2020.mwe.xml b/data/xml/2020.mwe.xml
index 0fbf83ceb2..366d17c43a 100644
--- a/data/xml/2020.mwe.xml
+++ b/data/xml/2020.mwe.xml
@@ -3,8 +3,8 @@
   <volume id="1" ingest-date="2020-11-29" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Joint Workshop on Multiword Expressions and Electronic Lexicons</booktitle>
-      <editor><first>Stella</first><last>Markantonatou</last></editor>
-      <editor><first>John</first><last>McCrae</last></editor>
+      <editor id="stella-markantonatou"><first>Stella</first><last>Markantonatou</last></editor>
+      <editor id="john-philip-mccrae"><first>John</first><last>McCrae</last></editor>
       <editor><first>Jelena</first><last>Mitrović</last></editor>
       <editor><first>Carole</first><last>Tiberius</last></editor>
       <editor><first>Carlos</first><last>Ramisch</last></editor>
@@ -24,7 +24,7 @@
     <paper id="1">
       <title><fixed-case>C</fixed-case>oll<fixed-case>F</fixed-case>r<fixed-case>E</fixed-case>n: Rich Bilingual <fixed-case>E</fixed-case>nglish–<fixed-case>F</fixed-case>rench Collocation Resource</title>
       <author><first>Beatriz</first><last>Fisas</last></author>
-      <author><first>Luis</first><last>Espinosa Anke</last></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa Anke</last></author>
       <author><first>Joan</first><last>Codina-Filbá</last></author>
       <author><first>Leo</first><last>Wanner</last></author>
       <pages>1–12</pages>
@@ -62,7 +62,7 @@
     <paper id="5">
       <title><fixed-case>P</fixed-case>olish corpus of verbal multiword expressions</title>
       <author><first>Agata</first><last>Savary</last></author>
-      <author><first>Jakub</first><last>Waszczuk</last></author>
+      <author id="jakub-waszczuk"><first>Jakub</first><last>Waszczuk</last></author>
       <pages>32–43</pages>
       <abstract>This paper describes a manually annotated corpus of verbal multi-word expressions in Polish. It is among the 4 biggest datasets in release 1.2 of the PARSEME multiligual corpus. We describe the data sources, as well as the annotation process and its outcomes. We also present interesting phenomena encountered during the annotation task and put forward enhancements for the PARSEME annotation guidelines.</abstract>
       <url hash="41e86711">2020.mwe-1.5</url>
@@ -110,7 +110,7 @@
     </paper>
     <paper id="10">
       <title>Multi-word Expressions for Abusive Speech Detection in <fixed-case>S</fixed-case>erbian</title>
-      <author><first>Ranka</first><last>Stanković</last></author>
+      <author id="ranka-stankovic"><first>Ranka</first><last>Stanković</last></author>
       <author><first>Jelena</first><last>Mitrović</last></author>
       <author><first>Danka</first><last>Jokić</last></author>
       <author><first>Cvetana</first><last>Krstev</last></author>
@@ -150,13 +150,13 @@
       <author><first>Carlos</first><last>Ramisch</last></author>
       <author><first>Agata</first><last>Savary</last></author>
       <author><first>Bruno</first><last>Guillaume</last></author>
-      <author><first>Jakub</first><last>Waszczuk</last></author>
-      <author><first>Marie</first><last>Candito</last></author>
+      <author id="jakub-waszczuk"><first>Jakub</first><last>Waszczuk</last></author>
+      <author id="marie-candito"><first>Marie</first><last>Candito</last></author>
       <author><first>Ashwini</first><last>Vaidya</last></author>
-      <author><first>Verginica</first><last>Barbu Mititelu</last></author>
+      <author id="verginica-barbu-mititelu"><first>Verginica</first><last>Barbu Mititelu</last></author>
       <author><first>Archna</first><last>Bhatia</last></author>
       <author><first>Uxoa</first><last>Iñurrieta</last></author>
-      <author><first>Voula</first><last>Giouli</last></author>
+      <author id="voula-giouli"><first>Voula</first><last>Giouli</last></author>
       <author><first>Tunga</first><last>Güngör</last></author>
       <author><first>Menghan</first><last>Jiang</last></author>
       <author><first>Timm</first><last>Lichte</last></author>
diff --git a/data/xml/2020.ngt.xml b/data/xml/2020.ngt.xml
index 0ab43387b0..46a8c46df3 100644
--- a/data/xml/2020.ngt.xml
+++ b/data/xml/2020.ngt.xml
@@ -56,7 +56,7 @@
     <paper id="3">
       <title>Balancing Cost and Benefit with Tied-Multi Transformers</title>
       <author><first>Raj</first><last>Dabre</last></author>
-      <author><first>Raphael</first><last>Rubino</last></author>
+      <author id="raphael-rubino"><first>Raphael</first><last>Rubino</last></author>
       <author><first>Atsushi</first><last>Fujita</last></author>
       <pages>24–34</pages>
       <abstract>We propose a novel procedure for training multiple Transformers with tied parameters which compresses multiple models into one enabling the dynamic choice of the number of encoder and decoder layers during decoding. In training an encoder-decoder model, typically, the output of the last layer of the N-layer encoder is fed to the M-layer decoder, and the output of the last decoder layer is used to compute loss. Instead, our method computes a single loss consisting of NxM losses, where each loss is computed from the output of one of the M decoder layers connected to one of the N encoder layers. Such a model subsumes NxM models with different number of encoder and decoder layers, and can be used for decoding with fewer than the maximum number of encoder and decoder layers. Given our flexible tied model, we also address to a-priori selection of the number of encoder and decoder layers for faster decoding, and explore recurrent stacking of layers and knowledge distillation for model compression. We present a cost-benefit analysis of applying the proposed approaches for neural machine translation and show that they reduce decoding costs while preserving translation quality.</abstract>
@@ -79,8 +79,8 @@
     <paper id="5">
       <title>Meta-Learning for Few-Shot <fixed-case>NMT</fixed-case> Adaptation</title>
       <author><first>Amr</first><last>Sharaf</last></author>
-      <author><first>Hany</first><last>Hassan</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hany-hassan-awadalla"><first>Hany</first><last>Hassan</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <pages>43–53</pages>
       <abstract>We present META-MT, a meta-learning approach to adapt Neural Machine Translation (NMT) systems in a few-shot setting. META-MT provides a new approach to make NMT models easily adaptable to many target do- mains with the minimal amount of in-domain data. We frame the adaptation of NMT systems as a meta-learning problem, where we learn to adapt to new unseen domains based on simulated offline meta-training domain adaptation tasks. We evaluate the proposed meta-learning strategy on ten domains with general large scale NMT systems. We show that META-MT significantly outperforms classical domain adaptation when very few in- domain examples are available. Our experiments shows that META-MT can outperform classical fine-tuning by up to 2.5 BLEU points after seeing only 4, 000 translated words (300 parallel sentences).</abstract>
       <url hash="11a07b03">2020.ngt-1.5</url>
@@ -129,7 +129,7 @@
       <author><first>Sascha</first><last>Rothe</last></author>
       <author><first>Simon</first><last>Baumgartner</last></author>
       <author><first>Cong</first><last>Yu</last></author>
-      <author><first>Abe</first><last>Ittycheriah</last></author>
+      <author id="abe-ittycheriah"><first>Abe</first><last>Ittycheriah</last></author>
       <pages>79–87</pages>
       <abstract>We evaluate the performance of transformer encoders with various decoders for information organization through a new task: generation of section headings for Wikipedia articles. Our analysis shows that decoders containing attention mechanisms over the encoder output achieve high-scoring results by generating extractive text. In contrast, a decoder without attention better facilitates semantic encoding and can be used to generate section embeddings. We additionally introduce a new loss function, which further encourages the decoder to generate high-quality embeddings.</abstract>
       <url hash="38324904">2020.ngt-1.9</url>
@@ -233,7 +233,7 @@
       <title>Expand and Filter: <fixed-case>CUNI</fixed-case> and <fixed-case>LMU</fixed-case> Systems for the <fixed-case>WNGT</fixed-case> 2020 <fixed-case>D</fixed-case>uolingo Shared Task</title>
       <author><first>Jindřich</first><last>Libovický</last></author>
       <author><first>Zdeněk</first><last>Kasner</last></author>
-      <author><first>Jindřich</first><last>Helcl</last></author>
+      <author id="jindrich-helcl"><first>Jindřich</first><last>Helcl</last></author>
       <author><first>Ondřej</first><last>Dušek</last></author>
       <pages>153–160</pages>
       <abstract>We present our submission to the Simultaneous Translation And Paraphrase for Language Education (STAPLE) challenge. We used a standard Transformer model for translation, with a crosslingual classifier predicting correct translations on the output n-best list. To increase the diversity of the outputs, we used additional data to train the translation model, and we trained a paraphrasing model based on the Levenshtein Transformer architecture to generate further synonymous translations. The paraphrasing results were again filtered using our classifier. While the use of additional data and our classifier filter were able to improve results, the paraphrasing model produced too many invalid outputs to further improve the output quality. Our model without the paraphrasing component finished in the middle of the field for the shared task, improving over the best baseline by a margin of 10-22 % weighted F1 absolute.</abstract>
@@ -281,7 +281,7 @@
       <title>The <fixed-case>JHU</fixed-case> Submission to the 2020 <fixed-case>D</fixed-case>uolingo Shared Task on Simultaneous Translation and Paraphrase for Language Education</title>
       <author><first>Huda</first><last>Khayrallah</last></author>
       <author><first>Jacob</first><last>Bremerman</last></author>
-      <author><first>Arya D.</first><last>McCarthy</last></author>
+      <author id="arya-d-mccarthy"><first>Arya D.</first><last>McCarthy</last></author>
       <author><first>Kenton</first><last>Murray</last></author>
       <author><first>Winston</first><last>Wu</last></author>
       <author><first>Matt</first><last>Post</last></author>
@@ -324,7 +324,7 @@
       <author><first>Guillaume</first><last>Klein</last></author>
       <author><first>Dakun</first><last>Zhang</last></author>
       <author><first>Clément</first><last>Chouteau</last></author>
-      <author><first>Josep</first><last>Crego</last></author>
+      <author id="josep-m-crego"><first>Josep</first><last>Crego</last></author>
       <author><first>Jean</first><last>Senellart</last></author>
       <pages>211–217</pages>
       <abstract>This paper describes the OpenNMT submissions to the WNGT 2020 efficiency shared task. We explore training and acceleration of Transformer models with various sizes that are trained in a teacher-student setup. We also present a custom and optimized C++ inference engine that enables fast CPU and GPU decoding with few dependencies. By combining additional optimizations and parallelization techniques, we create small, efficient, and high-quality neural machine translation models.</abstract>
diff --git a/data/xml/2020.nl4xai.xml b/data/xml/2020.nl4xai.xml
index 451293d300..8a481b7984 100644
--- a/data/xml/2020.nl4xai.xml
+++ b/data/xml/2020.nl4xai.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2021-01-18" type="proceedings">
     <meta>
       <booktitle>2nd Workshop on Interactive Natural Language Technology for Explainable Artificial Intelligence</booktitle>
-      <editor><first>Jose M.</first><last>Alonso</last></editor>
+      <editor id="jose-m-alonso"><first>Jose M.</first><last>Alonso</last></editor>
       <editor><first>Alejandro</first><last>Catala</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Dublin, Ireland</address>
@@ -37,7 +37,7 @@
       <author><first>Luca</first><last>Anselma</last></author>
       <author><first>Mirko</first><last>Di Lascio</last></author>
       <author><first>Dario</first><last>Mana</last></author>
-      <author><first>Alessandro</first><last>Mazzei</last></author>
+      <author id="alessandro-mazzei"><first>Alessandro</first><last>Mazzei</last></author>
       <author><first>Manuela</first><last>Sanguinetti</last></author>
       <pages>5–10</pages>
       <abstract>This paper describes a content selection module for the generation of explanations in a dialogue system designed for customer care domain. First we describe the construction of a corpus of a dialogues containing explanation requests from customers to a virtual agent of a telco, and second we study and formalize the importance of a specific information content for the generated message. In particular, we adapt the notions of importance and relevance in the case of schematic knowledge bases.</abstract>
@@ -81,7 +81,7 @@
     <paper id="7">
       <title>Explaining <fixed-case>B</fixed-case>ayesian Networks in Natural Language: State of the Art and Challenges</title>
       <author><first>Conor</first><last>Hennessy</last></author>
-      <author><first>Alberto</first><last>Bugarín</last></author>
+      <author id="alberto-bugarin-diz"><first>Alberto</first><last>Bugarín</last></author>
       <author><first>Ehud</first><last>Reiter</last></author>
       <pages>28–33</pages>
       <abstract>In order to increase trust in the usage of Bayesian Networks and to cement their role as a model which can aid in critical decision making, the challenge of explainability must be faced. Previous attempts at explaining Bayesian Networks have largely focused on graphical or visual aids. In this paper we aim to highlight the importance of a natural language approach to explanation and to discuss some of the previous and state of the art attempts of the textual explanation of Bayesian Networks. We outline several challenges that remain to be addressed in the generation and validation of natural language explanations of Bayesian Networks. This can serve as a reference for future work on natural language explanations of Bayesian Networks.</abstract>
@@ -99,7 +99,7 @@
     <paper id="9">
       <title>Towards Generating Effective Explanations of Logical Formulas: Challenges and Strategies</title>
       <author><first>Alexandra</first><last>Mayn</last></author>
-      <author><first>Kees</first><last>van Deemter</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
       <pages>39–43</pages>
       <abstract>While the problem of natural language generation from logical formulas has a long tradition, thus far little attention has been paid to ensuring that the generated explanations are optimally effective for the user. We discuss issues related to deciding what such output should look like and strategies for addressing those issues. We stress the importance of informing generation of NL explanations of logical formulas through reader studies and findings on the comprehension of logic from Pragmatics and Cognitive Science. We then illustrate the discussed issues and potential ways of addressing them using a simple demo system’s output generated from a propositional logic formula.</abstract>
       <url hash="7f1950ef">2020.nl4xai-1.9</url>
@@ -119,7 +119,7 @@
     <paper id="11">
       <title>Toward Natural Language Mitigation Strategies for Cognitive Biases in Recommender Systems</title>
       <author><first>Alisa</first><last>Rieger</last></author>
-      <author><first>Mariët</first><last>Theune</last></author>
+      <author id="mariet-theune"><first>Mariët</first><last>Theune</last></author>
       <author><first>Nava</first><last>Tintarev</last></author>
       <pages>50–54</pages>
       <abstract>Cognitive biases in the context of consuming online information filtered by recommender systems may lead to sub-optimal choices. One approach to mitigate such biases is through interface and interaction design. This survey reviews studies focused on cognitive bias mitigation of recommender system users during two processes: 1) item selection and 2) preference elicitation. It highlights a number of promising directions for Natural Language Generation research for mitigating cognitive bias including: the need for personalization, as well as for transparency and control.</abstract>
diff --git a/data/xml/2020.nli.xml b/data/xml/2020.nli.xml
index 49ee45cb84..9177048203 100644
--- a/data/xml/2020.nli.xml
+++ b/data/xml/2020.nli.xml
@@ -3,10 +3,10 @@
   <volume id="1" ingest-date="2020-06-21" type="proceedings">
     <meta>
       <booktitle>Proceedings of the First Workshop on Natural Language Interfaces</booktitle>
-      <editor><first>Ahmed Hassan</first><last>Awadallah</last></editor>
+      <editor id="ahmed-hassan"><first>Ahmed Hassan</first><last>Awadallah</last></editor>
       <editor><first>Yu</first><last>Su</last></editor>
       <editor><first>Huan</first><last>Sun</last></editor>
-      <editor><first>Scott Wen-tau</first><last>Yih</last></editor>
+      <editor id="wen-tau-yih"><first>Scott Wen-tau</first><last>Yih</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Online</address>
       <month>July</month>
@@ -61,7 +61,7 @@
     <paper id="4">
       <title>Efficient Deployment of Conversational Natural Language Interfaces over Databases</title>
       <author><first>Anthony</first><last>Colas</last></author>
-      <author><first>Trung</first><last>Bui</last></author>
+      <author id="trung-bui"><first>Trung</first><last>Bui</last></author>
       <author><first>Franck</first><last>Dernoncourt</last></author>
       <author><first>Moumita</first><last>Sinha</last></author>
       <author><first>Doo Soon</first><last>Kim</last></author>
diff --git a/data/xml/2020.nlp4call.xml b/data/xml/2020.nlp4call.xml
index 5e5f23b870..f847c048bd 100644
--- a/data/xml/2020.nlp4call.xml
+++ b/data/xml/2020.nlp4call.xml
@@ -22,7 +22,7 @@
     <paper id="1">
       <title>Substituto – A Synchronous Educational Language Game for Simultaneous Teaching and Crowdsourcing</title>
       <author><first>Marianne Grace</first><last>Araneta</last></author>
-      <author><first>Gülşen</first><last>Eryiğit</last></author>
+      <author id="gulsen-eryigit"><first>Gülşen</first><last>Eryiğit</last></author>
       <author><first>Alexander</first><last>König</last></author>
       <author><first>Ji-Ung</first><last>Lee</last></author>
       <author><first>Ana</first><last>Luís</last></author>
@@ -50,7 +50,7 @@
     <paper id="3">
       <title>Polygloss - A conversational agent for language practice</title>
       <author><first>Etiene</first><last>da Cruz Dalcol</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>21–36</pages>
       <url hash="9e2a3752">2020.nlp4call-1.3</url>
       <bibkey>da-cruz-dalcol-poesio-2020-polygloss</bibkey>
diff --git a/data/xml/2020.nlp4convai.xml b/data/xml/2020.nlp4convai.xml
index 81712e0d78..c2bea5bea8 100644
--- a/data/xml/2020.nlp4convai.xml
+++ b/data/xml/2020.nlp4convai.xml
@@ -6,7 +6,7 @@
       <editor><first>Tsung-Hsien</first><last>Wen</last></editor>
       <editor><first>Asli</first><last>Celikyilmaz</last></editor>
       <editor><first>Zhou</first><last>Yu</last></editor>
-      <editor><first>Alexandros</first><last>Papangelis</last></editor>
+      <editor id="alexandros-papangelis"><first>Alexandros</first><last>Papangelis</last></editor>
       <editor><first>Mihail</first><last>Eric</last></editor>
       <editor><first>Anuj</first><last>Kumar</last></editor>
       <editor><first>Iñigo</first><last>Casanueva</last></editor>
@@ -39,8 +39,8 @@
       <title>On Incorporating Structural Information to improve Dialogue Response Generation</title>
       <author><first>Nikita</first><last>Moghe</last></author>
       <author><first>Priyesh</first><last>Vijayan</last></author>
-      <author><first>Balaraman</first><last>Ravindran</last></author>
-      <author><first>Mitesh M.</first><last>Khapra</last></author>
+      <author id="balaraman-ravindran"><first>Balaraman</first><last>Ravindran</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh M.</first><last>Khapra</last></author>
       <pages>11–24</pages>
       <abstract>We consider the task of generating dialogue responses from background knowledge comprising of domain specific resources. Specifically, given a conversation around a movie, the task is to generate the next response based on background knowledge about the movie such as the plot, review, Reddit comments etc. This requires capturing structural, sequential and semantic information from the conversation context and the background resources. We propose a new architecture that uses the ability of BERT to capture deep contextualized representations in conjunction with explicit structure and sequence information. More specifically, we use (i) Graph Convolutional Networks (GCNs) to capture structural information, (ii) LSTMs to capture sequential information and (iii) BERT for the deep contextualized representations that capture semantic information. We analyze the proposed architecture extensively. To this end, we propose a plug-and-play Semantics-Sequences-Structures (SSS) framework which allows us to effectively combine such linguistic information. Through a series of experiments we make some interesting observations. First, we observe that the popular adaptation of the GCN model for NLP tasks where structural information (GCNs) was added on top of sequential information (LSTMs) performs poorly on our task. This leads us to explore interesting ways of combining semantic and structural information to improve the performance. Second, we observe that while BERT already outperforms other deep contextualized representations such as ELMo, it still benefits from the additional structural information explicitly added using GCNs. This is a bit surprising given the recent claims that BERT already captures structural information. Lastly, the proposed SSS framework gives an improvement of 7.95% on BLUE score over the baseline.</abstract>
       <url hash="da7464a7">2020.nlp4convai-1.2</url>
@@ -52,7 +52,7 @@
       <title><fixed-case>C</fixed-case>opy<fixed-case>BERT</fixed-case>: A Unified Approach to Question Generation with Self-Attention</title>
       <author><first>Stalin</first><last>Varanasi</last></author>
       <author><first>Saadullah</first><last>Amin</last></author>
-      <author><first>Guenter</first><last>Neumann</last></author>
+      <author id="gunter-neumann"><first>Guenter</first><last>Neumann</last></author>
       <pages>25–31</pages>
       <abstract>Contextualized word embeddings provide better initialization for neural networks that deal with various natural language understanding (NLU) tasks including Question Answering (QA) and more recently, Question Generation(QG). Apart from providing meaningful word representations, pre-trained transformer models (Vaswani et al., 2017), such as BERT (Devlin et al., 2019) also provide self-attentions which encode syntactic information that can be probed for dependency parsing (Hewitt and Manning, 2019) and POStagging (Coenen et al., 2019). In this paper, we show that the information from selfattentions of BERT are useful for language modeling of questions conditioned on paragraph and answer phrases. To control the attention span, we use semi-diagonal mask and utilize a shared model for encoding and decoding, unlike sequence-to-sequence. We further employ copy-mechanism over self-attentions to acheive state-of-the-art results for Question Generation on SQuAD v1.1 (Rajpurkar et al., 2016).</abstract>
       <url hash="7edb9138">2020.nlp4convai-1.3</url>
@@ -146,7 +146,7 @@
       <author><first>Sanchit</first><last>Agarwal</last></author>
       <author><first>Di</first><last>Jin</last></author>
       <author><first>Tagyoung</first><last>Chung</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></author>
       <pages>79–89</pages>
       <abstract>Dialogue state tracking (DST) is at the heart of task-oriented dialogue systems. However, the scarcity of labeled data is an obstacle to building accurate and robust state tracking systems that work across a variety of domains. Existing approaches generally require some dialogue data with state information and their ability to generalize to unknown domains is limited. In this paper, we propose using machine reading comprehension (RC) in state tracking from two perspectives: model architectures and datasets. We divide the slot types in dialogue state into categorical or extractive to borrow the advantages from both multiple-choice and span-based reading comprehension models. Our method achieves near the current state-of-the-art in joint goal accuracy on MultiWOZ 2.1 given full training data. More importantly, by leveraging machine reading comprehension datasets, our method outperforms the existing approaches by many a large margin in few-shot scenarios when the availability of in-domain data is limited. Lastly, even without any state tracking data, i.e., zero-shot scenario, our proposed approach achieves greater than 90% average slot accuracy in 12 out of 30 slots in MultiWOZ 2.1.</abstract>
       <url hash="624d6e71">2020.nlp4convai-1.10</url>
@@ -170,7 +170,7 @@
       <title>Learning to Classify Intents and Slot Labels Given a Handful of Examples</title>
       <author><first>Jason</first><last>Krone</last></author>
       <author><first>Yi</first><last>Zhang</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>96–108</pages>
       <abstract>Intent classification (IC) and slot filling (SF) are core components in most goal-oriented dialogue systems. Current IC/SF models perform poorly when the number of training examples per class is small. We propose a new few-shot learning task, few-shot IC/SF, to study and improve the performance of IC and SF models on classes not seen at training time in ultra low resource scenarios. We establish a few-shot IC/SF benchmark by defining few-shot splits for three public IC/SF datasets, ATIS, TOP, and Snips. We show that two popular few-shot learning algorithms, model agnostic meta learning (MAML) and prototypical networks, outperform a fine-tuning baseline on this benchmark. Prototypical networks achieves significant gains in IC performance on the ATIS and TOP datasets, while both prototypical networks and MAML outperform the baseline with respect to SF on all three datasets. In addition, we demonstrate that joint training as well as the use of pre-trained language models, ELMo and BERT in our case, are complementary to these few-shot learning methods and yield further gains.</abstract>
       <url hash="03b39705">2020.nlp4convai-1.12</url>
@@ -213,7 +213,7 @@
       <author><first>Tovly</first><last>Deutsch</last></author>
       <author><first>Stephen</first><last>Casper</last></author>
       <author><first>Yonatan</first><last>Belinkov</last></author>
-      <author><first>Stuart</first><last>Shieber</last></author>
+      <author id="stuart-m-shieber"><first>Stuart</first><last>Shieber</last></author>
       <pages>132–143</pages>
       <abstract>The predominant approach to open-domain dialog generation relies on end-to-end training of neural models on chat datasets. However, this approach provides little insight as to what these models learn (or do not learn) about engaging in dialog. In this study, we analyze the internal representations learned by neural open-domain dialog systems and evaluate the quality of these representations for learning basic conversational skills. Our results suggest that standard open-domain dialog systems struggle with answering questions, inferring contradiction, and determining the topic of conversation, among other tasks. We also find that the dyadic, turn-taking nature of dialog is not fully leveraged by these models. By exploring these limitations, we highlight the need for additional research into architectures and training methods that can better capture high-level information about dialog.</abstract>
       <url hash="9360d36d">2020.nlp4convai-1.15</url>
diff --git a/data/xml/2020.nlp4if.xml b/data/xml/2020.nlp4if.xml
index 853e730df9..1ba4f6f9e1 100644
--- a/data/xml/2020.nlp4if.xml
+++ b/data/xml/2020.nlp4if.xml
@@ -8,7 +8,7 @@
       <editor><first>Giovanni Luca</first><last>Ciampaglia</last></editor>
       <editor><first>Anna</first><last>Feldman</last></editor>
       <editor><first>Chris</first><last>Leberknight</last></editor>
-      <editor><first>Preslav</first><last>Nakov</last></editor>
+      <editor id="preslav-nakov"><first>Preslav</first><last>Nakov</last></editor>
       <publisher>International Committee on Computational Linguistics (ICCL)</publisher>
       <address>Barcelona, Spain (Online)</address>
       <month>December</month>
diff --git a/data/xml/2020.nlp4musa.xml b/data/xml/2020.nlp4musa.xml
index 32c46b5949..c75b18ca3e 100644
--- a/data/xml/2020.nlp4musa.xml
+++ b/data/xml/2020.nlp4musa.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the 1st Workshop on NLP for Music and Audio (NLP4MusA)</booktitle>
       <editor><first>Sergio</first><last>Oramas</last></editor>
-      <editor><first>Luis</first><last>Espinosa-Anke</last></editor>
+      <editor id="luis-espinosa-anke"><first>Luis</first><last>Espinosa-Anke</last></editor>
       <editor><first>Elena</first><last>Epure</last></editor>
       <editor><first>Rosie</first><last>Jones</last></editor>
       <editor><first>Mohamed</first><last>Sordo</last></editor>
diff --git a/data/xml/2020.nlpbt.xml b/data/xml/2020.nlpbt.xml
index 41b3a47f2f..dc0633ab7c 100644
--- a/data/xml/2020.nlpbt.xml
+++ b/data/xml/2020.nlpbt.xml
@@ -57,7 +57,7 @@
     </paper>
     <paper id="4">
       <title>A Benchmark for Structured Procedural Knowledge Extraction from Cooking Videos</title>
-      <author><first>Frank F.</first><last>Xu</last></author>
+      <author id="frank-f-xu"><first>Frank F.</first><last>Xu</last></author>
       <author><first>Lei</first><last>Ji</last></author>
       <author><first>Botian</first><last>Shi</last></author>
       <author><first>Junyi</first><last>Du</last></author>
@@ -107,7 +107,7 @@
       <title>Towards End-to-End In-Image Neural Machine Translation</title>
       <author><first>Elman</first><last>Mansimov</last></author>
       <author><first>Mitchell</first><last>Stern</last></author>
-      <author><first>Mia</first><last>Chen</last></author>
+      <author id="mia-xu-chen"><first>Mia</first><last>Chen</last></author>
       <author><first>Orhan</first><last>Firat</last></author>
       <author><first>Jakob</first><last>Uszkoreit</last></author>
       <author><first>Puneet</first><last>Jain</last></author>
diff --git a/data/xml/2020.nlpcovid19.xml b/data/xml/2020.nlpcovid19.xml
index cc9bf4a325..747481bc3b 100644
--- a/data/xml/2020.nlpcovid19.xml
+++ b/data/xml/2020.nlpcovid19.xml
@@ -3,14 +3,14 @@
   <volume id="acl" ingest-date="2020-10-13" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 1st Workshop on <fixed-case>NLP</fixed-case> for <fixed-case>COVID-19</fixed-case> at <fixed-case>ACL</fixed-case> 2020</booktitle>
-      <editor><first>Karin</first><last>Verspoor</last></editor>
-      <editor><first>Kevin Bretonnel</first><last>Cohen</last></editor>
+      <editor id="karin-verspoor"><first>Karin</first><last>Verspoor</last></editor>
+      <editor id="k-bretonnel-cohen"><first>Kevin Bretonnel</first><last>Cohen</last></editor>
       <editor><first>Mark</first><last>Dredze</last></editor>
       <editor><first>Emilio</first><last>Ferrara</last></editor>
       <editor><first>Jonathan</first><last>May</last></editor>
       <editor><first>Robert</first><last>Munro</last></editor>
-      <editor><first>Cecile</first><last>Paris</last></editor>
-      <editor><first>Byron</first><last>Wallace</last></editor>
+      <editor id="cecile-paris"><first>Cecile</first><last>Paris</last></editor>
+      <editor id="byron-c-wallace"><first>Byron</first><last>Wallace</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Online</address>
       <month>July</month>
@@ -23,7 +23,7 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>CORD-19</fixed-case>: The <fixed-case>COVID-19</fixed-case> Open Research Dataset</title>
-      <author><first>Lucy Lu</first><last>Wang</last></author>
+      <author id="lucy-lu-wang"><first>Lucy Lu</first><last>Wang</last></author>
       <author><first>Kyle</first><last>Lo</last></author>
       <author><first>Yoganand</first><last>Chandrasekhar</last></author>
       <author><first>Russell</first><last>Reas</last></author>
@@ -48,7 +48,7 @@
       <author><first>Christopher</first><last>Wilhelm</last></author>
       <author><first>Boya</first><last>Xie</last></author>
       <author><first>Douglas M.</first><last>Raymond</last></author>
-      <author><first>Daniel S.</first><last>Weld</last></author>
+      <author id="daniel-s-weld"><first>Daniel S.</first><last>Weld</last></author>
       <author><first>Oren</first><last>Etzioni</last></author>
       <author><first>Sebastian</first><last>Kohlmeier</last></author>
       <abstract>The COVID-19 Open Research Dataset (CORD-19) is a growing resource of scientific papers on COVID-19 and related historical coronavirus research. CORD-19 is designed to facilitate the development of text mining and information retrieval systems over its rich collection of metadata and structured full text papers. Since its release, CORD-19 has been downloaded over 200K times and has served as the basis of many COVID-19 text mining and discovery systems. In this article, we describe the mechanics of dataset construction, highlighting challenges and key design decisions, provide an overview of how CORD-19 has been used, and describe several shared tasks built around the dataset. We hope this resource will continue to bring together the computing community, biomedical experts, and policy makers in the search for effective treatments and management policies for COVID-19.</abstract>
@@ -98,7 +98,7 @@
     </paper>
     <paper id="6">
       <title><fixed-case>CODA-19</fixed-case>: Using a Non-Expert Crowd to Annotate Research Aspects on 10,000+ Abstracts in the <fixed-case>COVID-19</fixed-case> Open Research Dataset</title>
-      <author><first>Ting-Hao Kenneth</first><last>Huang</last></author>
+      <author id="ting-hao-huang"><first>Ting-Hao Kenneth</first><last>Huang</last></author>
       <author><first>Chieh-Yang</first><last>Huang</last></author>
       <author><first>Chien-Kuang Cornelia</first><last>Ding</last></author>
       <author><first>Yen-Chia</first><last>Hsu</last></author>
@@ -231,13 +231,13 @@
   <volume id="2" ingest-date="2020-11-06" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 1st Workshop on <fixed-case>NLP</fixed-case> for <fixed-case>COVID</fixed-case>-19 (Part 2) at <fixed-case>EMNLP</fixed-case> 2020</booktitle>
-      <editor><first>Karin</first><last>Verspoor</last></editor>
-      <editor><first>Kevin Bretonnel</first><last>Cohen</last></editor>
+      <editor id="karin-verspoor"><first>Karin</first><last>Verspoor</last></editor>
+      <editor id="k-bretonnel-cohen"><first>Kevin Bretonnel</first><last>Cohen</last></editor>
       <editor><first>Michael</first><last>Conway</last></editor>
       <editor><first>Berry</first><last>de Bruijn</last></editor>
       <editor><first>Mark</first><last>Dredze</last></editor>
-      <editor><first>Rada</first><last>Mihalcea</last></editor>
-      <editor><first>Byron</first><last>Wallace</last></editor>
+      <editor id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></editor>
+      <editor id="byron-c-wallace"><first>Byron</first><last>Wallace</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Online</address>
       <month>December</month>
@@ -390,7 +390,7 @@
       <author><first>Brandon</first><last>Waldon</last></author>
       <author><first>Shrinidhi K</first><last>Lakshmikanth</last></author>
       <author><first>Ishan</first><last>Shah</last></author>
-      <author><first>Sharath Chandra</first><last>Guntuku</last></author>
+      <author id="sharath-chandra-guntuku"><first>Sharath Chandra</first><last>Guntuku</last></author>
       <author><first>Garrick</first><last>Sherman</last></author>
       <author><first>James</first><last>Zou</last></author>
       <author><first>Johannes</first><last>Eichstaedt</last></author>
@@ -415,10 +415,10 @@
     </paper>
     <paper id="12">
       <title>Improved Topic Representations of Medical Documents to Assist <fixed-case>COVID</fixed-case>-19 Literature Exploration</title>
-      <author><first>Yulia</first><last>Otmakhova</last></author>
+      <author id="julia-otmakhova"><first>Yulia</first><last>Otmakhova</last></author>
       <author><first>Karin</first><last>Verspoor</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
-      <author><first>Simon</first><last>Šuster</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
+      <author id="simon-suster"><first>Simon</first><last>Šuster</last></author>
       <abstract>Efficient discovery and exploration of biomedical literature has grown in importance in the context of the COVID-19 pandemic, and topic-based methods such as latent Dirichlet allocation (LDA) are a useful tool for this purpose. In this study we compare traditional topic models based on word tokens with topic models based on medical concepts, and propose several ways to improve topic coherence and specificity.</abstract>
       <url hash="4bee2d49">2020.nlpcovid19-2.12</url>
       <doi>10.18653/v1/2020.nlpcovid19-2.12</doi>
@@ -427,7 +427,7 @@
     </paper>
     <paper id="13">
       <title>A System for Worldwide <fixed-case>COVID</fixed-case>-19 Information Aggregation</title>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <author><first>Frederic</first><last>Bergeron</last></author>
       <author><first>Junjie</first><last>Chen</last></author>
       <author><first>Fei</first><last>Cheng</last></author>
@@ -447,7 +447,7 @@
       <author><first>Yugo</first><last>Murawaki</last></author>
       <author><first>Kazumasa</first><last>Omura</last></author>
       <author><first>Haiyue</first><last>Song</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Shinji</first><last>Suzuki</last></author>
       <author><first>Ribeka</first><last>Tanaka</last></author>
       <author><first>Yu</first><last>Tanaka</last></author>
@@ -481,8 +481,8 @@
       <author><first>Arantxa</first><last>Otegi</last></author>
       <author><first>Jon Ander</first><last>Campos</last></author>
       <author><first>Gorka</first><last>Azkune</last></author>
-      <author><first>Aitor</first><last>Soroa</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="aitor-soroa"><first>Aitor</first><last>Soroa</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <abstract>We present a Question Answering (QA) system that won one of the tasks of the Kaggle CORD-19 Challenge, according to the qualitative evaluation of experts. The system is a combination of an Information Retrieval module and a reading comprehension module that finds the answers in the retrieved passages. In this paper we present a quantitative and qualitative analysis of the system. The quantitative evaluation using manually annotated datasets contradicted some of our design choices, e.g. the fact that using QuAC for fine-tuning provided better answers over just using SQuAD. We analyzed this mismatch with an additional A/B test which showed that the system using QuAC was indeed preferred by users, confirming our intuition. Our analysis puts in question the suitability of automatic metrics and its correlation to user preferences. We also show that automatic metrics are highly dependent on the characteristics of the gold standard, such as the average length of the answers.</abstract>
       <url hash="da24810c">2020.nlpcovid19-2.15</url>
       <doi>10.18653/v1/2020.nlpcovid19-2.15</doi>
@@ -506,7 +506,7 @@
       <title>Public Sentiment on Governmental <fixed-case>COVID</fixed-case>-19 Measures in <fixed-case>D</fixed-case>utch Social Media</title>
       <author><first>Shihan</first><last>Wang</last></author>
       <author><first>Marijn</first><last>Schraagen</last></author>
-      <author><first>Erik</first><last>Tjong Kim Sang</last></author>
+      <author id="erik-tjong-kim-sang"><first>Erik</first><last>Tjong Kim Sang</last></author>
       <author><first>Mehdi</first><last>Dastani</last></author>
       <abstract>Public sentiment (the opinion, attitude or feeling that the public expresses) is a factor of interest for government, as it directly influences the implementation of policies. Given the unprecedented nature of the COVID-19 crisis, having an up-to-date representation of public sentiment on governmental measures and announcements is crucial. In this paper, we analyse Dutch public sentiment on governmental COVID-19 measures from text data collected across three online media sources (Twitter, Reddit and Nu.nl) from February to September 2020. We apply sentiment analysis methods to analyse polarity over time, as well as to identify stance towards two specific pandemic policies regarding social distancing and wearing face masks. The presented preliminary results provide valuable insights into the narratives shown in vast social media text data, which help understand the influence of COVID-19 measures on the general public.</abstract>
       <url hash="d680dc23">2020.nlpcovid19-2.17</url>
@@ -540,10 +540,10 @@
     <paper id="20">
       <title><fixed-case>COVID</fixed-case>-19: A Semantic-Based Pipeline for Recommending Biomedical Entities</title>
       <author><first>Marcia Afonso</first><last>Barros</last></author>
-      <author><first>Andre</first><last>Lamurias</last></author>
+      <author id="andre-lamurias"><first>Andre</first><last>Lamurias</last></author>
       <author><first>Diana</first><last>Sousa</last></author>
       <author><first>Pedro</first><last>Ruas</last></author>
-      <author><first>Francisco M.</first><last>Couto</last></author>
+      <author id="francisco-m-couto"><first>Francisco M.</first><last>Couto</last></author>
       <abstract>With the increasing number of publications about COVID-19, it is a challenge to extract personalized knowledge suitable for each researcher. This work aims to build a new semantic-based pipeline for recommending biomedical entities to scientific researchers. To this end, we developed a pipeline that creates an implicit feedback matrix based on Named Entity Recognition (NER) on a corpus of documents, using multidisciplinary ontologies for recognizing and linking the entities. Our hypothesis is that by using ontologies from different fields in the NER phase, we can improve the results for state-of-the-art collaborative-filtering recommender systems applied to the dataset created. The tests performed using the COVID-19 Open Research Dataset (CORD-19) dataset show that when using four ontologies, the results for precision@k, for example, reach the 80%, whereas when using only one ontology, the results for precision@k drops to 20%, for the same users. Furthermore, the use of multi-fields entities may help in the discovery of new items, even if the researchers do not have items from that field in their set of preferences.</abstract>
       <url hash="921db543">2020.nlpcovid19-2.20</url>
       <doi>10.18653/v1/2020.nlpcovid19-2.20</doi>
@@ -555,7 +555,7 @@
       <author><first>Hilal</first><last>Dönmez</last></author>
       <author><first>Rıza</first><last>Özçelik</last></author>
       <author><first>Elif</first><last>Ozkirimli</last></author>
-      <author><first>Arzucan</first><last>Özgür</last></author>
+      <author id="arzucan-ozgur"><first>Arzucan</first><last>Özgür</last></author>
       <abstract>Coronavirus Disease of 2019 (COVID-19) created dire consequences globally and triggered an intense scientific effort from different domains. The resulting publications created a huge text collection in which finding the studies related to a biomolecule of interest is challenging for general purpose search engines because the publications are rich in domain specific terminology. Here, we present Vapur: an online COVID-19 search engine specifically designed to find related protein - chemical pairs. Vapur is empowered with a relation-oriented inverted index that is able to retrieve and group studies for a query biomolecule with respect to its related entities. The inverted index of Vapur is automatically created with a BioNLP pipeline and integrated with an online user interface. The online interface is designed for the smooth traversal of the current literature by domain researchers and is publicly available at <url>https://tabilab.cmpe.boun.edu.tr/vapur/</url>.</abstract>
       <url hash="f78d2f74">2020.nlpcovid19-2.21</url>
       <doi>10.18653/v1/2020.nlpcovid19-2.21</doi>
@@ -566,9 +566,9 @@
       <author><first>Alejandro</first><last>Piad-Morffis</last></author>
       <author><first>Suilan</first><last>Estevez-Velarde</last></author>
       <author><first>Ernesto Luis</first><last>Estevanell-Valladares</last></author>
-      <author><first>Yoan</first><last>Gutiérrez</last></author>
-      <author><first>Andrés</first><last>Montoyo</last></author>
-      <author><first>Rafael</first><last>Muñoz</last></author>
+      <author id="yoan-gutierrez"><first>Yoan</first><last>Gutiérrez</last></author>
+      <author id="andres-montoyo"><first>Andrés</first><last>Montoyo</last></author>
+      <author id="rafael-munoz"><first>Rafael</first><last>Muñoz</last></author>
       <author><first>Yudivián</first><last>Almeida-Cruz</last></author>
       <abstract>This paper presents the preliminary results of an ongoing project that analyzes the growing body of scientific research published around the COVID-19 pandemic. In this research, a general-purpose semantic model is used to double annotate a batch of 500 sentences that were manually selected by the researchers from the CORD-19 corpus. Afterwards, a baseline text-mining pipeline is designed and evaluated via a large batch of 100,959 sentences. We present a qualitative analysis of the most interesting facts automatically extracted and highlight possible future lines of development. The preliminary results show that general-purpose semantic models are a useful tool for discovering fine-grained knowledge in large corpora of scientific documents.</abstract>
       <url hash="25a99692">2020.nlpcovid19-2.22</url>
@@ -634,10 +634,10 @@
     <paper id="28">
       <title><fixed-case>A</fixed-case>sk<fixed-case>M</fixed-case>e: A <fixed-case>LAPPS</fixed-case> <fixed-case>G</fixed-case>rid-based <fixed-case>NLP</fixed-case> Query and Retrieval System for Covid-19 Literature</title>
       <author><first>Keith</first><last>Suderman</last></author>
-      <author><first>Nancy</first><last>Ide</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
       <author><first>Verhagen</first><last>Marc</last></author>
       <author><first>Brent</first><last>Cochran</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <abstract>In a recent project, the Language Application Grid was augmented to support the mining of scientific publications. The results of that ef- fort have now been repurposed to focus on Covid-19 literature, including modification of the LAPPS Grid “AskMe” query and retrieval engine. We describe the AskMe system and discuss its functionality as compared to other query engines available to search covid-related publications.</abstract>
       <url hash="9c6532e1">2020.nlpcovid19-2.28</url>
       <doi>10.18653/v1/2020.nlpcovid19-2.28</doi>
@@ -708,7 +708,7 @@
     <paper id="34">
       <title><fixed-case>W</fixed-case>eibo-<fixed-case>COV</fixed-case>: A Large-Scale <fixed-case>COVID</fixed-case>-19 Social Media Dataset from <fixed-case>W</fixed-case>eibo</title>
       <author><first>Yong</first><last>Hu</last></author>
-      <author><first>Heyan</first><last>Huang</last></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last></author>
       <author><first>Anfan</first><last>Chen</last></author>
       <author><first>Xian-Ling</first><last>Mao</last></author>
       <abstract>With the rapid development of COVID-19 around the world, people are requested to maintain “social distance” and “stay at home”. In this scenario, extensive social interactions transfer to cyberspace, especially on social media platforms like Twitter and Sina Weibo. People generate posts to share information, express opinions and seek help during the pandemic outbreak, and these kinds of data on social media are valuable for studies to prevent COVID-19 transmissions, such as early warning and outbreaks detection. Therefore, in this paper, we release a novel and fine-grained large-scale COVID-19 social media dataset collected from Sina Weibo, named Weibo-COV, contains more than 40 million posts ranging from December 1, 2019 to April 30, 2020. Moreover, this dataset includes comprehensive information nuggets like post-level information, interactive information, location information, and repost network. We hope this dataset can promote studies of COVID-19 from multiple perspectives and enable better and rapid researches to suppress the spread of this pandemic.</abstract>
@@ -719,10 +719,10 @@
     <paper id="35">
       <title>Detecting Emerging Symptoms of <fixed-case>COVID</fixed-case>-19 using Context-based <fixed-case>T</fixed-case>witter Embeddings</title>
       <author><first>Roshan</first><last>Santosh</last></author>
-      <author><first>H. Andrew</first><last>Schwartz</last></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last></author>
       <author><first>Johannes</first><last>Eichstaedt</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
-      <author><first>Sharath Chandra</first><last>Guntuku</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
+      <author id="sharath-chandra-guntuku"><first>Sharath Chandra</first><last>Guntuku</last></author>
       <abstract>In this paper, we present an iterative graph-based approach for the detection of symptoms of COVID-19, the pathology of which seems to be evolving. More generally, the method can be applied to finding context-specific words and texts (e.g. symptom mentions) in large imbalanced corpora (e.g. all tweets mentioning #COVID-19). Given the novelty of COVID-19, we also test if the proposed approach generalizes to the problem of detecting Adverse Drug Reaction (ADR). We find that the approach applied to Twitter data can detect symptom mentions substantially before to their being reported by the Centers for Disease Control (CDC).</abstract>
       <url hash="0289281f">2020.nlpcovid19-2.35</url>
       <doi>10.18653/v1/2020.nlpcovid19-2.35</doi>
diff --git a/data/xml/2020.nlpcss.xml b/data/xml/2020.nlpcss.xml
index 009dd63e32..c1eff98353 100644
--- a/data/xml/2020.nlpcss.xml
+++ b/data/xml/2020.nlpcss.xml
@@ -50,7 +50,7 @@
       <author><first>Nico</first><last>Blokker</last></author>
       <author><first>Erenay</first><last>Dayanik</last></author>
       <author><first>Gabriella</first><last>Lapesa</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <pages>24–34</pages>
       <abstract>Manifestos are official documents of political parties, providing a comprehensive topical overview of the electoral programs. Voters, however, seldom read them and often prefer other channels, such as newspaper articles, to understand the party positions on various policy issues. The natural question to ask is how compatible these two formats (manifesto and newspaper reports) are in their representation of party positioning. We address this question with an approach that combines political science (manual annotation and analysis) and natural language processing (supervised claim identification) in a cross-text type setting: we train a classifier on annotated newspaper data and test its performance on manifestos. Our findings show a) strong performance for supervised classification even across text types and b) a substantive overlap between the two formats in terms of party positioning, with differences regarding the salience of specific issues.</abstract>
       <url hash="c3816eb8">2020.nlpcss-1.3</url>
@@ -61,7 +61,7 @@
     <paper id="4">
       <title>Does Social Support (Expressed in Post Titles) Elicit Comments in Online Substance Use Recovery Forums?</title>
       <author><first>Anietie</first><last>Andy</last></author>
-      <author><first>Sharath Chandra</first><last>Guntuku</last></author>
+      <author id="sharath-chandra-guntuku"><first>Sharath Chandra</first><last>Guntuku</last></author>
       <pages>35–40</pages>
       <abstract>Individuals recovering from substance use often seek social support (emotional and informational) on online recovery forums, where they can both write and comment on posts, expressing their struggles and successes. A common challenge in these forums is that certain posts (some of which may be support seeking) receive no comments. In this work, we use data from two Reddit substance recovery forums: /r/Leaves and /r/OpiatesRecovery, to determine the relationship between the social supports expressed in the titles of posts and the number of comments they receive. We show that the types of social support expressed in post titles that elicit comments vary from one substance use recovery forum to the other.</abstract>
       <url hash="286d4acc">2020.nlpcss-1.4</url>
@@ -83,7 +83,7 @@
     </paper>
     <paper id="6">
       <title>Assessing population-level symptoms of anxiety, depression, and suicide risk in real time using <fixed-case>NLP</fixed-case> applied to social media data</title>
-      <author><first>Alex</first><last>Fine</last></author>
+      <author id="alex-fine"><first>Alex</first><last>Fine</last></author>
       <author><first>Patrick</first><last>Crutchley</last></author>
       <author><first>Jenny</first><last>Blase</last></author>
       <author><first>Joshua</first><last>Carroll</last></author>
@@ -188,7 +188,7 @@
     <paper id="14">
       <title>Recalibrating classifiers for interpretable abusive content detection</title>
       <author><first>Bertie</first><last>Vidgen</last></author>
-      <author><first>Scott A.</first><last>Hale</last></author>
+      <author id="scott-a-hale"><first>Scott A.</first><last>Hale</last></author>
       <author><first>Sam</first><last>Staton</last></author>
       <author><first>Tom</first><last>Melham</last></author>
       <author><first>Helen</first><last>Margetts</last></author>
@@ -204,7 +204,7 @@
     <paper id="15">
       <title>Predicting independent living outcomes from written reports of social workers</title>
       <author><first>Angelika</first><last>Maier</last></author>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <pages>139–148</pages>
       <abstract>In social care environments, the main goal of social workers is to foster independent living by their clients. An important task is thus to monitor progress towards reaching independence in different areas of their patients’ life. To support this task, we present an approach that extracts indications of independence on different life aspects from the day-to-day documentation that social workers create. We describe the process of collecting and annotating a corresponding corpus created from data records of two social work institutions with a focus on disability care. We show that the agreement on the task of annotating the observations of social workers with respect to discrete independent levels yields a high agreement of .74 as measured by Fleiss’ Kappa. We present a classification approach towards automatically classifying an observation into the discrete independence levels and present results for different types of classifiers. Against our original expectation, we show that we reach F-Measures (macro) of 95% averaged across topics, showing that this task can be automatically solved.</abstract>
       <url hash="a67cc98a">2020.nlpcss-1.15</url>
@@ -215,7 +215,7 @@
     <paper id="16">
       <title>Analyzing Political Bias and Unfairness in News Articles at Different Levels of Granularity</title>
       <author><first>Wei-Fan</first><last>Chen</last></author>
-      <author><first>Khalid</first><last>Al Khatib</last></author>
+      <author id="khalid-al-khatib"><first>Khalid</first><last>Al Khatib</last></author>
       <author><first>Henning</first><last>Wachsmuth</last></author>
       <author><first>Benno</first><last>Stein</last></author>
       <pages>149–154</pages>
@@ -260,7 +260,7 @@
     <paper id="20">
       <title>Social media data as a lens onto care-seeking behavior among women veterans of the <fixed-case>US</fixed-case> armed forces</title>
       <author><first>Kacie</first><last>Kelly</last></author>
-      <author><first>Alex</first><last>Fine</last></author>
+      <author id="alex-fine"><first>Alex</first><last>Fine</last></author>
       <author><first>Glen</first><last>Coppersmith</last></author>
       <pages>184–192</pages>
       <abstract>In this article, we examine social media data as a lens onto support-seeking among women veterans of the US armed forces. Social media data hold a great deal of promise as a source of information on needs and support-seeking among individuals who are excluded from or systematically prevented from accessing clinical or other institutions ostensibly designed to support them. We apply natural language processing (NLP) techniques to more than 3 million Tweets collected from 20,000 Twitter users. We find evidence that women veterans are more likely to use social media to seek social and community engagement and to discuss mental health and veterans’ issues significantly more frequently than their male counterparts. By contrast, male veterans tend to use social media to amplify political ideologies or to engage in partisan debate. Our results have implications for how organizations can provide outreach and services to this uniquely vulnerable population, and illustrate the utility of non-traditional observational data sources such as social media to understand the needs of marginalized groups.</abstract>
@@ -272,9 +272,9 @@
     <paper id="21">
       <title>Understanding Weekly <fixed-case>COVID</fixed-case>-19 Concerns through Dynamic Content-Specific <fixed-case>LDA</fixed-case> Topic Modeling</title>
       <author><first>Mohammadzaman</first><last>Zamani</last></author>
-      <author><first>H. Andrew</first><last>Schwartz</last></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last></author>
       <author><first>Johannes</first><last>Eichstaedt</last></author>
-      <author><first>Sharath Chandra</first><last>Guntuku</last></author>
+      <author id="sharath-chandra-guntuku"><first>Sharath Chandra</first><last>Guntuku</last></author>
       <author><first>Adithya</first><last>Virinchipuram Ganesan</last></author>
       <author><first>Sean</first><last>Clouston</last></author>
       <author><first>Salvatore</first><last>Giorgi</last></author>
diff --git a/data/xml/2020.nlpmc.xml b/data/xml/2020.nlpmc.xml
index 7c85057190..468d61a8fe 100644
--- a/data/xml/2020.nlpmc.xml
+++ b/data/xml/2020.nlpmc.xml
@@ -6,11 +6,11 @@
       <editor><first>Parminder</first><last>Bhatia</last></editor>
       <editor><first>Steven</first><last>Lin</last></editor>
       <editor><first>Rashmi</first><last>Gangadharaiah</last></editor>
-      <editor><first>Byron</first><last>Wallace</last></editor>
+      <editor id="byron-c-wallace"><first>Byron</first><last>Wallace</last></editor>
       <editor><first>Izhak</first><last>Shafran</last></editor>
       <editor><first>Chaitanya</first><last>Shivade</last></editor>
       <editor><first>Nan</first><last>Du</last></editor>
-      <editor><first>Mona</first><last>Diab</last></editor>
+      <editor id="mona-diab"><first>Mona</first><last>Diab</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Online</address>
       <month>July</month>
@@ -27,7 +27,7 @@
       <author><first>Xiyu</first><last>Ding</last></author>
       <author><first>Michael</first><last>Barnett</last></author>
       <author><first>Ateev</first><last>Mehrotra</last></author>
-      <author><first>Timothy</first><last>Miller</last></author>
+      <author id="timothy-miller"><first>Timothy</first><last>Miller</last></author>
       <pages>1–6</pages>
       <abstract>Electronic consult (eConsult) systems allow specialists more flexibility to respond to referrals more efficiently, thereby increasing access in under-resourced healthcare settings like safety net systems. Understanding the usage patterns of eConsult system is an important part of improving specialist efficiency. In this work, we develop and apply classifiers to a dataset of eConsult questions from primary care providers to specialists, classifying the messages for how they were triaged by the specialist office, and the underlying type of clinical question posed by the primary care provider. We show that pre-trained transformer models are strong baselines, with improving performance from domain-specific training and shared representations.</abstract>
       <url hash="187aa064">2020.nlpmc-1.1</url>
@@ -104,7 +104,7 @@
       <author><first>Itika</first><last>Gupta</last></author>
       <author><first>Barbara</first><last>Di Eugenio</last></author>
       <author><first>Devika</first><last>Salunke</last></author>
-      <author><first>Andrew</first><last>Boyd</last></author>
+      <author id="andrew-boyd"><first>Andrew</first><last>Boyd</last></author>
       <author><first>Paula</first><last>Allen-Meares</last></author>
       <author><first>Carolyn</first><last>Dickens</last></author>
       <author><first>Olga</first><last>Garcia</last></author>
@@ -119,7 +119,7 @@
       <title>On the Utility of Audiovisual Dialog Technologies and Signal Analytics for Real-time Remote Monitoring of Depression Biomarkers</title>
       <author><first>Michael</first><last>Neumann</last></author>
       <author><first>Oliver</first><last>Roessler</last></author>
-      <author><first>David</first><last>Suendermann-Oeft</last></author>
+      <author id="david-suendermann-oeft"><first>David</first><last>Suendermann-Oeft</last></author>
       <author><first>Vikram</first><last>Ramanarayanan</last></author>
       <pages>47–52</pages>
       <abstract>We investigate the utility of audiovisual dialog systems combined with speech and video analytics for real-time remote monitoring of depression at scale in uncontrolled environment settings. We collected audiovisual conversational data from participants who interacted with a cloud-based multimodal dialog system, and automatically extracted a large set of speech and vision metrics based on the rich existing literature of laboratory studies. We report on the efficacy of various audio and video metrics in differentiating people with mild, moderate and severe depression, and discuss the implications of these results for the deployment of such technologies in real-world neurological diagnosis and monitoring applications.</abstract>
diff --git a/data/xml/2020.nlposs.xml b/data/xml/2020.nlposs.xml
index ae70e41e8b..de90860b24 100644
--- a/data/xml/2020.nlposs.xml
+++ b/data/xml/2020.nlposs.xml
@@ -6,7 +6,7 @@
       <editor><first>Eunjeong L.</first><last>Park</last></editor>
       <editor><first>Masato</first><last>Hagiwara</last></editor>
       <editor><first>Dmitrijs</first><last>Milajevs</last></editor>
-      <editor><first>Nelson F.</first><last>Liu</last></editor>
+      <editor id="nelson-f-liu"><first>Nelson F.</first><last>Liu</last></editor>
       <editor><first>Geeticka</first><last>Chauhan</last></editor>
       <editor><first>Liling</first><last>Tan</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -83,7 +83,7 @@
     <paper id="6">
       <title>Flexible retrieval with <fixed-case>NMSLIB</fixed-case> and <fixed-case>F</fixed-case>lex<fixed-case>N</fixed-case>eu<fixed-case>ART</fixed-case></title>
       <author><first>Leonid</first><last>Boytsov</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <pages>32–43</pages>
       <abstract>Our objective is to introduce to the NLP community NMSLIB, describe a new retrieval toolkit FlexNeuART, as well as their integration capabilities. NMSLIB, while being one the fastest k-NN search libraries, is quite generic and supports a variety of distance/similarity functions. Because the library relies on the distance-based structure-agnostic algorithms, it can be further extended by adding new distances. FlexNeuART is a modular, extendible and flexible toolkit for candidate generation in IR and QA applications, which supports mixing of classic and neural ranking signals. FlexNeuART can efficiently retrieve mixed dense and sparse representations (with weights learned from training data), which is achieved by extending NMSLIB. In that, other retrieval systems work with purely sparse representations (e.g., Lucene), purely dense representations (e.g., FAISS and Annoy), or only perform mixing at the re-ranking stage.</abstract>
       <url hash="2656705d">2020.nlposs-1.6</url>
@@ -234,7 +234,7 @@
       <author><first>Pasquale</first><last>Lisena</last></author>
       <author><first>Ismail</first><last>Harrando</last></author>
       <author><first>Oussama</first><last>Kandakji</last></author>
-      <author><first>Raphael</first><last>Troncy</last></author>
+      <author id="raphael-troncy"><first>Raphael</first><last>Troncy</last></author>
       <pages>132–140</pages>
       <abstract>From LDA to neural models, different topic modeling approaches have been proposed in the literature. However, their suitability and performance is not easy to compare, particularly when the algorithms are being used in the wild on heterogeneous datasets. In this paper, we introduce ToModAPI (TOpic MOdeling API), a wrapper library to easily train, evaluate and infer using different topic modeling algorithms through a unified interface. The library is extensible and can be used in Python environments or through a Web API.</abstract>
       <url hash="c15ed09f">2020.nlposs-1.19</url>
@@ -256,7 +256,7 @@
     <paper id="21">
       <title><fixed-case>WAFFLE</fixed-case>: A Graph for <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Applied to <fixed-case>F</fixed-case>ree<fixed-case>F</fixed-case>orm Linguistic Exploration</title>
       <author><first>Berk</first><last>Ekmekci</last></author>
-      <author><first>Blake</first><last>Howald</last></author>
+      <author id="blake-howald"><first>Blake</first><last>Howald</last></author>
       <pages>147–157</pages>
       <abstract>The WordNet database of English (Fellbaum, 1998) is a key source of semantic information for research and development of natural language processing applications. As the sophistication of these applications increases with the use of large datasets, deep learning, and graph-based methods, so should the use of WordNet. To this end, we introduce WAFFLE: WordNet Applied to FreeForm Linguistic Exploration which makes WordNet available in an open source graph data structure. The WAFFLE graph relies on platform agnostic formats for robust interrogation and flexibility. Where existing implementations of WordNet offer dictionary-like lookup, single degree neighborhood operations, and path based similarity-scoring, the WAFFLE graph makes all nodes (semantic relation sets) and relationships queryable at scale, enabling local and global analysis of all relationships without the need for custom code. We demonstrate WAFFLE’s ease of use, visualization capabilities, and scalable efficiency with common queries, operations, and interactions. WAFFLE is available at github.com/TRSS-NLP/WAFFLE.</abstract>
       <url hash="b573660b">2020.nlposs-1.21</url>
diff --git a/data/xml/2020.nlptea.xml b/data/xml/2020.nlptea.xml
index 75c759f549..a419ebf7e0 100644
--- a/data/xml/2020.nlptea.xml
+++ b/data/xml/2020.nlptea.xml
@@ -100,7 +100,7 @@
       <author><first>Yongchang</first><last>Cao</last></author>
       <author><first>Liang</first><last>He</last></author>
       <author><first>Robert</first><last>Ridley</last></author>
-      <author><first>Xinyu</first><last>Dai</last></author>
+      <author id="xinyu-dai"><first>Xinyu</first><last>Dai</last></author>
       <pages>49–56</pages>
       <abstract>This paper describes our proposed model for the Chinese Grammatical Error Diagnosis (CGED) task in NLPTEA2020. The goal of CGED is to use natural language processing techniques to automatically diagnose Chinese grammatical errors in sentences. To this end, we design and implement a CGED model named BERT with Score-feature Gates Error Diagnoser (BSGED), which is based on the BERT model, Bidirectional Long Short-Term Memory (BiLSTM) and conditional random field (CRF). In order to address the problem of losing partial-order relationships when embedding continuous feature items as with previous works, we propose a gating mechanism for integrating continuous feature items, which effectively retains the partial-order relationships between feature items. We perform LSTM processing on the encoding result of the BERT model, and further extract the sequence features. In the final test-set evaluation, we obtained the highest F1 score at the detection level and are among the top 3 F1 scores at the identification level.</abstract>
       <url hash="7055b8ac">2020.nlptea-1.7</url>
@@ -127,7 +127,7 @@
       <author><first>Meiyuan</first><last>Fang</last></author>
       <author><first>Kai</first><last>Fu</last></author>
       <author><first>Jiping</first><last>Wang</last></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <author><first>Jin</first><last>Huang</last></author>
       <author><first>Yitao</first><last>Duan</last></author>
       <pages>67–77</pages>
@@ -177,7 +177,7 @@
       <author><first>Yingjie</first><last>Yan</last></author>
       <author><first>Yangchao</first><last>Han</last></author>
       <author><first>Rui</first><last>Chao</last></author>
-      <author><first>Hongying</first><last>Zan</last></author>
+      <author id="hongying-zan"><first>Hongying</first><last>Zan</last></author>
       <pages>97–101</pages>
       <abstract>Chinese Grammatical Error Diagnosis (CGED) is a natural language processing task for the NLPTEA6 workshop. The goal of this task is to automatically diagnose grammatical errors in Chinese sentences written by L2 learners. This paper proposes a RoBERTa-BiLSTM-CRF model to detect grammatical errors in sentences. Firstly, RoBERTa model is used to obtain word vectors. Secondly, word vectors are input into BiLSTM layer to learn context features. Last, CRF layer without hand-craft features work for processing the output by BiLSTM. The optimal global sequences are obtained according to state transition matrix of CRF and adjacent labels of training data. In experiments, the result of RoBERTa-CRF model and ERNIE-BiLSTM-CRF model are compared, and the impacts of parameters of the models and the testing datasets are analyzed. In terms of evaluation results, our recall score of RoBERTa-BiLSTM-CRF ranks fourth at the detection level.</abstract>
       <url hash="53f4a543">2020.nlptea-1.13</url>
@@ -188,7 +188,7 @@
     </paper>
     <paper id="14">
       <title><fixed-case>C</fixed-case>hinese Grammatical Errors Diagnosis System Based on <fixed-case>BERT</fixed-case> at <fixed-case>NLPTEA</fixed-case>-2020 <fixed-case>CGED</fixed-case> Shared Task</title>
-      <author><first>Hongying</first><last>Zan</last></author>
+      <author id="hongying-zan"><first>Hongying</first><last>Zan</last></author>
       <author><first>Yangchao</first><last>Han</last></author>
       <author><first>Haotian</first><last>Huang</last></author>
       <author><first>Yingjie</first><last>Yan</last></author>
diff --git a/data/xml/2020.nuse.xml b/data/xml/2020.nuse.xml
index 84071bf9f9..93178fe07d 100644
--- a/data/xml/2020.nuse.xml
+++ b/data/xml/2020.nuse.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2020-06-21" type="proceedings">
     <meta>
       <booktitle>Proceedings of the First Joint Workshop on Narrative Understanding, Storylines, and Events</booktitle>
-      <editor><first>Claire</first><last>Bonial</last></editor>
+      <editor id="claire-bonial"><first>Claire</first><last>Bonial</last></editor>
       <editor><first>Tommaso</first><last>Caselli</last></editor>
       <editor><first>Snigdha</first><last>Chaturvedi</last></editor>
       <editor><first>Elizabeth</first><last>Clark</last></editor>
@@ -15,7 +15,7 @@
       <editor><first>Ben</first><last>Miller</last></editor>
       <editor><first>Teruko</first><last>Mitamura</last></editor>
       <editor><first>Nanyun</first><last>Peng</last></editor>
-      <editor><first>Joel</first><last>Tetreault</last></editor>
+      <editor id="joel-tetreault"><first>Joel</first><last>Tetreault</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Online</address>
       <month>July</month>
@@ -31,7 +31,7 @@
     <paper id="1">
       <title>New Insights into Cross-Document Event Coreference: Systematic Comparison and a Simplified Approach</title>
       <author><first>Andres</first><last>Cremisini</last></author>
-      <author><first>Mark</first><last>Finlayson</last></author>
+      <author id="mark-finlayson"><first>Mark</first><last>Finlayson</last></author>
       <pages>1–10</pages>
       <abstract>Cross-Document Event Coreference (CDEC) is the task of finding coreference relationships between events in separate documents, most commonly assessed using the Event Coreference Bank+ corpus (ECB+). At least two different approaches have been proposed for CDEC on ECB+ that use only event triggers, and at least four have been proposed that use both triggers and entities. Comparing these approaches is complicated by variation in the systems’ use of gold vs. computed labels, as well as variation in the document clustering pre-processing step. We present an approach that matches or slightly beats state-of-the-art performance on CDEC over ECB+ with only event trigger annotations, but with a significantly simpler framework and much smaller feature set relative to prior work. This study allows us to directly compare with prior systems and draw conclusions about the effectiveness of various strategies. Additionally, we provide the first cross-validated evaluation on the ECB+ dataset; the first explicit evaluation of the pairwise event coreference classification step; and the first quantification of the effect of document clustering on system performance. The last in particular reveals that while document clustering is a crucial pre-processing step, improvements can at most provide for a 3 point improvement in CDEC performance, though this might be attributable to ease of document clustering on ECB+.</abstract>
       <url hash="9c03f744">2020.nuse-1.1</url>
@@ -44,7 +44,7 @@
       <author><first>Ming-Chang</first><last>Chiu</last></author>
       <author><first>Tiantian</first><last>Feng</last></author>
       <author><first>Xiang</first><last>Ren</last></author>
-      <author><first>Shrikanth</first><last>Narayanan</last></author>
+      <author id="shrikanth-narayanan"><first>Shrikanth</first><last>Narayanan</last></author>
       <pages>11–16</pages>
       <abstract>Deciding which scripts to turn into movies is a costly and time-consuming process for filmmakers. Thus, building a tool to aid script selection, an initial phase in movie production, can be very beneficial. Toward that goal, in this work, we present a method to evaluate the quality of a screenplay based on linguistic cues. We address this in a two-fold approach: (1) we define the task as predicting nominations of scripts at major film awards with the hypothesis that the peer-recognized scripts should have a greater chance to succeed. (2) based on industry opinions and narratology, we extract and integrate domain-specific features into common classification techniques. We face two challenges (1) scripts are much longer than other document datasets (2) nominated scripts are limited and thus difficult to collect. However, with narratology-inspired modeling and domain features, our approach offers clear improvements over strong baselines. Our work provides a new approach for future work in screenplay analysis.</abstract>
       <url hash="41d379bf">2020.nuse-1.2</url>
@@ -58,7 +58,7 @@
       <author><first>W. Victor</first><last>Yarlott</last></author>
       <author><first>Mohammed</first><last>Aldawsari</last></author>
       <author><first>Naphtali</first><last>Rishe</last></author>
-      <author><first>Mark</first><last>Finlayson</last></author>
+      <author id="mark-finlayson"><first>Mark</first><last>Finlayson</last></author>
       <pages>17–25</pages>
       <abstract>Identifying the discourse structure of documents is an important task in understanding written text. Building on prior work, we demonstrate an improved approach to automatically identifying the discourse function of paragraphs in news articles. We start with the hierarchical theory of news discourse developed by van Dijk (1988) which proposes how paragraphs function within news articles. This discourse information is a level intermediate between phrase- or sentence-sized discourse segments and document genre, characterizing how individual paragraphs convey information about the events in the storyline of the article. Specifically, the theory categorizes the relationships between narrated events and (1) the overall storyline (such as Main Events, Background, or Consequences) as well as (2) commentary (such as Verbal Reactions and Evaluations). We trained and tested a linear chain conditional random field (CRF) with new features to model van Dijk’s labels and compared it against several machine learning models presented in previous work. Our model significantly outperformed all baselines and prior approaches, achieving an average of 0.71 F1 score which represents a 31.5% improvement over the previously best-performing support vector machine model.</abstract>
       <url hash="f4f0aef0">2020.nuse-1.3</url>
@@ -69,7 +69,7 @@
     <paper id="4">
       <title>Systematic Evaluation of a Framework for Unsupervised Emotion Recognition for Narrative Text</title>
       <author><first>Samira</first><last>Zad</last></author>
-      <author><first>Mark</first><last>Finlayson</last></author>
+      <author id="mark-finlayson"><first>Mark</first><last>Finlayson</last></author>
       <pages>26–37</pages>
       <abstract>Identifying emotions as expressed in text (a.k.a. text emotion recognition) has received a lot of attention over the past decade. Narratives often involve a great deal of emotional expression, and so emotion recognition on narrative text is of great interest to computational approaches to narrative understanding. Prior work by Kim et al. 2010 was the work with the highest reported emotion detection performance, on a corpus of fairy tales texts. Close inspection of that work, however, revealed significant reproducibility problems, and we were unable to reimplement Kim’s approach as described. As a consequence, we implemented a framework inspired by Kim’s approach, where we carefully evaluated the major design choices. We identify the highest-performing combination, which outperforms Kim’s reported performance by 7.6 <tex-math>F_1</tex-math> points on average. Close inspection of the annotated data revealed numerous missing and incorrect emotion terms in the relevant lexicon, WordNetAffect (WNA; Strapparava and Valitutti, 2004), which allowed us to augment it in a useful way. More generally, this showed that numerous clearly emotive words and phrases are missing from WNA, which suggests that effort invested in augmenting or refining emotion ontologies could be useful for improving the performance of emotion recognition systems. We release our code and data to definitely enable future reproducibility of this work.</abstract>
       <url hash="6676e35e">2020.nuse-1.4</url>
@@ -95,13 +95,13 @@
       <author><first>Maarten</first><last>Sap</last></author>
       <author><first>Elizabeth</first><last>Clark</last></author>
       <author><first>Katharina</first><last>Reinecke</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>46–54</pages>
       <abstract>Current story writing or story editing systems rely on human judgments of story quality for evaluating performance, often ignoring the subjectivity in ratings. We analyze the effect of author and reader characteristics and story writing setup on the quality of stories in a short storytelling task. To study this effect, we create and release STORIESINTHEWILD, containing 1,630 stories collected on a volunteer-based crowdsourcing platform. Each story is rated by three different readers, and comes paired with the author’s and reader’s age, gender, and personality. Our findings show significant effects of authors’ and readers’ identities, as well as writing setup, on story writing and ratings. Notably, compared to younger readers, readers age 45 and older consider stories significantly less creative and less entertaining. Readers also prefer stories written all at once, rather than in chunks, finding them more coherent and creative. We also observe linguistic differences associated with authors’ demographics (e.g., older authors wrote more vivid and emotional stories). Our findings suggest that reader and writer demographics, as well as writing setup, should be accounted for in story writing evaluations.</abstract>
       <url hash="c659338e">2020.nuse-1.6</url>
       <attachment type="Software" hash="0118ef2f">2020.nuse-1.6.Software.zip</attachment>
-      <doi>10.18653/v1/2020.nuse-1.6</doi>
       <attachment type="Dataset" hash="fbb502be">2020.nuse-1.6.Dataset.pdf</attachment>
+      <doi>10.18653/v1/2020.nuse-1.6</doi>
       <video href="http://slideslive.com/38929745"/>
       <bibkey>august-etal-2020-exploring</bibkey>
     </paper>
@@ -141,7 +141,7 @@
     <paper id="10">
       <title>Exploring aspects of similarity between spoken personal narratives by disentangling them into narrative clause types</title>
       <author><first>Belen</first><last>Saldias</last></author>
-      <author><first>Deb</first><last>Roy</last></author>
+      <author id="deb-roy"><first>Deb</first><last>Roy</last></author>
       <pages>78–86</pages>
       <abstract>Sharing personal narratives is a fundamental aspect of human social behavior as it helps share our life experiences. We can tell stories and rely on our background to understand their context, similarities, and differences. A substantial effort has been made towards developing storytelling machines or inferring characters’ features. However, we don’t usually find models that compare narratives. This task is remarkably challenging for machines since they, as sometimes we do, lack an understanding of what similarity means. To address this challenge, we first introduce a corpus of real-world spoken personal narratives comprising 10,296 narrative clauses from 594 video transcripts. Second, we ask non-narrative experts to annotate those clauses under Labov’s sociolinguistic model of personal narratives (i.e., action, orientation, and evaluation clause types) and train a classifier that reaches 84.7% F-score for the highest-agreed clauses. Finally, we match stories and explore whether people implicitly rely on Labov’s framework to compare narratives. We show that actions followed by the narrator’s evaluation of these are the aspects non-experts consider the most. Our approach is intended to help inform machine learning methods aimed at studying or representing personal narratives.</abstract>
       <url hash="64dc1ef4">2020.nuse-1.10</url>
@@ -155,8 +155,8 @@
       <author><first>Nitin</first><last>Ramrakhiyani</last></author>
       <author><first>Avinash Kumar</first><last>Singh</last></author>
       <author><first>Sangameshwar</first><last>Patil</last></author>
-      <author><first>Girish</first><last>Palshikar</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="girish-palshikar"><first>Girish</first><last>Palshikar</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <author><first>Vasudeva</first><last>Varma</last></author>
       <pages>87–96</pages>
       <abstract>In this paper, we propose the use of Message Sequence Charts (MSC) as a representation for visualizing narrative text in Hindi. An MSC is a formal representation allowing the depiction of actors and interactions among these actors in a scenario, apart from supporting a rich framework for formal inference. We propose an approach to extract MSC actors and interactions from a Hindi narrative. As a part of the approach, we enrich an existing event annotation scheme where we provide guidelines for annotation of the mood of events (realis vs irrealis) and guidelines for annotation of event arguments. We report performance on multiple evaluation criteria by experimenting with Hindi narratives from Indian History. Though Hindi is the fourth most-spoken first language in the world, from the NLP perspective it has comparatively lesser resources than English. Moreover, there is relatively less work in the context of event processing in Hindi. Hence, we believe that this work is among the initial works for Hindi event processing.</abstract>
diff --git a/data/xml/2020.onion.xml b/data/xml/2020.onion.xml
index 25ee76e27d..26719be39b 100644
--- a/data/xml/2020.onion.xml
+++ b/data/xml/2020.onion.xml
@@ -32,7 +32,7 @@
     <paper id="2">
       <title>Analysis of Body Behaviours in Human-Human and Human-Robot Interactions</title>
       <author><first>Taiga</first><last>Mori</last></author>
-      <author><first>Kristiina</first><last>Jokinen</last></author>
+      <author id="kristiina-jokinen"><first>Kristiina</first><last>Jokinen</last></author>
       <author><first>Yasuharu</first><last>Den</last></author>
       <pages>7–14</pages>
       <abstract>We conducted preliminary comparison of human-robot (HR) interaction with human-human (HH) interaction conducted in English and in Japanese. As the result, body gestures increased in HR, while hand and head gestures decreased in HR. Concerning hand gesture, they were composed of more diverse and complex forms, trajectories and functions in HH than in HR. Moreover, English speakers produced 6 times more hand gestures than Japanese speakers in HH. Regarding head gesture, even though there was no difference in the frequency of head gestures between English speakers and Japanese speakers in HH, Japanese speakers produced slightly more nodding during the robot’s speaking than English speakers in HR. Furthermore, positions of nod were different depending on the language. Concerning body gesture, participants produced body gestures mostly to regulate appropriate distance with the robot in HR. Additionally, English speakers produced slightly more body gestures than Japanese speakers.</abstract>
diff --git a/data/xml/2020.osact.xml b/data/xml/2020.osact.xml
index 03bc106be5..52cd8a4eec 100644
--- a/data/xml/2020.osact.xml
+++ b/data/xml/2020.osact.xml
@@ -125,7 +125,7 @@
     <paper id="10">
       <title><fixed-case>ASU</fixed-case>_<fixed-case>OPTO</fixed-case> at <fixed-case>OSACT</fixed-case>4 - Offensive Language Detection for <fixed-case>A</fixed-case>rabic text</title>
       <author><first>Amr</first><last>Keleg</last></author>
-      <author><first>Samhaa R.</first><last>El-Beltagy</last></author>
+      <author id="samhaa-r-el-beltagy"><first>Samhaa R.</first><last>El-Beltagy</last></author>
       <author><first>Mahmoud</first><last>Khalil</last></author>
       <pages>66–70</pages>
       <abstract>In the past years, toxic comments and offensive speech are polluting the internet and manual inspection of these comments is becoming a tiresome task to manage. Having a machine learning based model that is able to filter offensive Arabic content is of high need nowadays. In this paper, we describe the model that was submitted to the Shared Task on Offensive Language Detection that is organized by (The 4th Workshop on Open-Source Arabic Corpora and Processing Tools). Our model makes use transformer based model (BERT) to detect offensive content. We came in the fourth place in subtask A (detecting Offensive Speech) and in the third place in subtask B (detecting Hate Speech).</abstract>
@@ -179,7 +179,7 @@
     <paper id="15">
       <title>Combining Character and Word Embeddings for the Detection of Offensive Language in <fixed-case>A</fixed-case>rabic</title>
       <author><first>Abdullah I.</first><last>Alharbi</last></author>
-      <author><first>Mark</first><last>Lee</last></author>
+      <author id="mark-lee"><first>Mark</first><last>Lee</last></author>
       <pages>91–96</pages>
       <abstract>Twitter and other social media platforms offer users the chance to share their ideas via short posts. While the easy exchange of ideas has value, these microblogs can be leveraged by people who want to share hatred. and such individuals can share negative views about an individual, race, or group with millions of people at the click of a button. There is thus an urgent need to establish a method that can automatically identify hate speech and offensive language. To contribute to this development, during the OSACT4 workshop, a shared task was undertaken to detect offensive language in Arabic. A key challenge was the uniqueness of the language used on social media, prompting the out-of-vocabulary (OOV) problem. In addition, the use of different dialects in Arabic exacerbates this problem. To deal with the issues associated with OOV, we generated a character-level embeddings model, which was trained on a massive data collected carefully. This level of embeddings can work effectively in resolving the problem of OOV words through its ability to learn the vectors of character n-grams or parts of words. The proposed systems were ranked 7th and 8th for Subtasks A and B, respectively.</abstract>
       <url hash="35203439">2020.osact-1.15</url>
diff --git a/data/xml/2020.paclic.xml b/data/xml/2020.paclic.xml
index 8ab54ff56f..cdf177430a 100644
--- a/data/xml/2020.paclic.xml
+++ b/data/xml/2020.paclic.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2021-05-11" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 34th Pacific Asia Conference on Language, Information and Computation</booktitle>
-      <editor><first>Minh Le</first><last>Nguyen</last></editor>
+      <editor id="minh-le-nguyen"><first>Minh Le</first><last>Nguyen</last></editor>
       <editor><first>Mai Chi</first><last>Luong</last></editor>
       <editor><first>Sanghoun</first><last>Song</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -31,7 +31,7 @@
       <title>Improving Sequence Tagging for <fixed-case>V</fixed-case>ietnamese Text using Transformer-based Neural Models</title>
       <author><first>The Viet</first><last>Bui</last></author>
       <author><first>Thi Oanh</first><last>Tran</last></author>
-      <author><first>Phuong</first><last>Le-Hong</last></author>
+      <author id="phuong-le-hong"><first>Phuong</first><last>Le-Hong</last></author>
       <pages>13–20</pages>
       <url hash="456adb46">2020.paclic-1.2</url>
       <bibkey>bui-etal-2020-improving</bibkey>
@@ -58,7 +58,7 @@
     <paper id="5">
       <title>Exploiting weak-supervision for classifying Non-Sentential Utterances in <fixed-case>M</fixed-case>andarin Conversations</title>
       <author><first>Xin-Yi</first><last>Chen</last></author>
-      <author><first>Laurent</first><last>Prévot</last></author>
+      <author id="laurent-prevot"><first>Laurent</first><last>Prévot</last></author>
       <pages>42–50</pages>
       <url hash="0dedf56d">2020.paclic-1.5</url>
       <bibkey>chen-prevot-2020-exploiting</bibkey>
@@ -73,7 +73,7 @@
     </paper>
     <paper id="7">
       <title>Metaphoricity Rating of <fixed-case>C</fixed-case>hinese <fixed-case>KIND</fixed-case> Metaphor Expressions</title>
-      <author><first>Siaw-Fong</first><last>Chung</last></author>
+      <author id="siaw-fong-chung"><first>Siaw-Fong</first><last>Chung</last></author>
       <author><first>Meng-Hsien</first><last>Shih</last></author>
       <author><first>Yu-Hsiang</first><last>Shen</last></author>
       <author><first>Wei-Ting</first><last>Tseng</last></author>
@@ -129,7 +129,7 @@
     </paper>
     <paper id="13">
       <title>From Sense to Action: A Word-Action Disambiguation Task in <fixed-case>NLP</fixed-case></title>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <author><first>Yu-Hsiang</first><last>Tseng</last></author>
       <author><first>Chiung-Yu</first><last>Chiang</last></author>
       <author><first>Richard</first><last>Lian</last></author>
@@ -196,7 +196,7 @@
     <paper id="20">
       <title>Simple is Better! Lightweight Data Augmentation for Low Resource Slot Filling and Intent Classification</title>
       <author><first>Samuel</first><last>Louvan</last></author>
-      <author><first>Bernardo</first><last>Magnini</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
       <pages>167–177</pages>
       <url hash="0a80c7d1">2020.paclic-1.20</url>
       <bibkey>louvan-magnini-2020-simple</bibkey>
@@ -236,7 +236,7 @@
     <paper id="24">
       <title>Iterative Multilingual Neural Machine Translation for Less-Common and Zero-Resource Language Pairs</title>
       <author><first>Minh Thuan</first><last>Nguyen</last></author>
-      <author><first>Phuong Thai</first><last>Nguyen</last></author>
+      <author id="phuong-thai-nguyen"><first>Phuong Thai</first><last>Nguyen</last></author>
       <author><first>Van Vinh</first><last>Nguyen</last></author>
       <author><first>Minh Cong Nguyen</first><last>Hoang</last></author>
       <pages>207–215</pages>
@@ -251,12 +251,12 @@
       <author><first>Cheolhun</first><last>Heo</last></author>
       <author><first>Yongbin</first><last>Jeong</last></author>
       <author><first>Yoosung</first><last>Jeong</last></author>
-      <author><first>Younggyun</first><last>Hahm</last></author>
+      <author id="younggyun-hahm"><first>Younggyun</first><last>Hahm</last></author>
       <author><first>Taehwan</first><last>Oh</last></author>
       <author><first>Hyonsu</first><last>Choe</last></author>
       <author><first>Seokwon</first><last>Park</last></author>
       <author><first>Jin-Dong</first><last>Kim</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <pages>216–224</pages>
       <url hash="196a651e">2020.paclic-1.25</url>
       <bibkey>noh-etal-2020-enhancing</bibkey>
@@ -346,7 +346,7 @@
     </paper>
     <paper id="35">
       <title>A corpus-based comparative study of light verbs in three <fixed-case>C</fixed-case>hinese speech communities</title>
-      <author><first>Benjamin K</first><last>Tsou</last></author>
+      <author id="benjamin-k-tsou"><first>Benjamin K</first><last>Tsou</last></author>
       <author><first>Ka-Fai</first><last>Yip</last></author>
       <pages>302–311</pages>
       <url hash="2ba76dce">2020.paclic-1.35</url>
@@ -357,7 +357,7 @@
       <author><first>Mingyu</first><last>Wan</last></author>
       <author><first>Baixi</first><last>Xing</last></author>
       <author><first>Qi</first><last>Su</last></author>
-      <author><first>Pengyuan</first><last>Liu</last></author>
+      <author id="pengyuan-liu"><first>Pengyuan</first><last>Liu</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <pages>312–317</pages>
       <url hash="de8fa4aa">2020.paclic-1.36</url>
@@ -410,7 +410,7 @@
       <title>Imbalanced <fixed-case>C</fixed-case>hinese Multi-label Text Classification Based on Alternating Attention</title>
       <author><first>Hongliang</first><last>Bi</last></author>
       <author><first>Han</first><last>Hu</last></author>
-      <author><first>Pengyuan</first><last>Liu</last></author>
+      <author id="pengyuan-liu"><first>Pengyuan</first><last>Liu</last></author>
       <pages>368–374</pages>
       <url hash="597983fe">2020.paclic-1.42</url>
       <bibkey>bi-etal-2020-imbalanced</bibkey>
@@ -455,7 +455,7 @@
     <paper id="47">
       <title>Exploring Discourse on Same-sex Marriage in <fixed-case>T</fixed-case>aiwan: A Case Study of Near-Synonym of <fixed-case>HOMOSEXUAL</fixed-case> in Opposing Stances</title>
       <author><first>Han-Tang</first><last>Hung</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <pages>411–419</pages>
       <url hash="549ab4b1">2020.paclic-1.47</url>
       <bibkey>hung-hsieh-2020-exploring</bibkey>
@@ -492,7 +492,7 @@
       <title>Identifying Authors Based on Stylometric measures of <fixed-case>V</fixed-case>ietnamese texts</title>
       <author><first>Ho Ngoc</first><last>Lam</last></author>
       <author><first>Vo Diep</first><last>Nhu</last></author>
-      <author><first>Dinh</first><last>Dien</last></author>
+      <author id="dinh-dien"><first>Dinh</first><last>Dien</last></author>
       <author><first>Nguyen Tuyet</first><last>Nhung</last></author>
       <pages>447–452</pages>
       <url hash="42d4cd75">2020.paclic-1.51</url>
@@ -521,7 +521,7 @@
       <author><first>Rajesh Kumar</first><last>Mundotiya</last></author>
       <author><first>Vikrant</first><last>Kumar</last></author>
       <author><first>Arpit</first><last>Mehta</last></author>
-      <author><first>Anil Kumar</first><last>Singh</last></author>
+      <author id="anil-kumar-singh"><first>Anil Kumar</first><last>Singh</last></author>
       <pages>471–477</pages>
       <url hash="c4b92752">2020.paclic-1.54</url>
       <bibkey>mundotiya-etal-2020-attention</bibkey>
@@ -564,7 +564,7 @@
     </paper>
     <paper id="59">
       <title>Redefining Verbal Nouns in <fixed-case>J</fixed-case>apanese: From the Perspective of Polycategoriality</title>
-      <author><first>David Y.</first><last>Oshima</last></author>
+      <author id="david-yoshikazu-oshima"><first>David Y.</first><last>Oshima</last></author>
       <author><first>Midori</first><last>Hayashi</last></author>
       <pages>514–522</pages>
       <url hash="78bad734">2020.paclic-1.59</url>
@@ -642,9 +642,9 @@
     </paper>
     <paper id="68">
       <title>Bilingual Multi-word Expressions, Multiple-correspondence, and their cultivation from parallel patents: The <fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish case</title>
-      <author><first>Benjamin K.</first><last>Tsou</last></author>
+      <author id="benjamin-k-tsou"><first>Benjamin K.</first><last>Tsou</last></author>
       <author><first>Ka Po</first><last>Chow</last></author>
-      <author><first>John</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
       <author><first>Ka-Fai</first><last>Yip</last></author>
       <author><first>Yaxuan</first><last>Ji</last></author>
       <author><first>Kevin</first><last>Wu</last></author>
diff --git a/data/xml/2020.pam.xml b/data/xml/2020.pam.xml
index d4a7ac1188..c710cc7228 100644
--- a/data/xml/2020.pam.xml
+++ b/data/xml/2020.pam.xml
@@ -119,7 +119,7 @@
     </paper>
     <paper id="12">
       <title>A toy distributional model for fuzzy generalised quantifiers</title>
-      <author><first>Mehrnoosh</first><last>Sadrzadeh</last></author>
+      <author id="mehrnoosh-sadrzadeh"><first>Mehrnoosh</first><last>Sadrzadeh</last></author>
       <author><first>Gijs</first><last>Wijnholds</last></author>
       <pages>86–94</pages>
       <abstract>Recent work in compositional distributional semantics showed how bialgebras model generalised quantifiers of natural language. That technique requires working with vector space over power sets of bases, and therefore is computationally costly. It is possible to overcome the computational hurdles by working with fuzzy generalised quantifiers. In this paper, we show that the compositional notion of semantics of natural language, guided by a grammar, extends from a binary to a many valued setting and instantiate in it the fuzzy computations. We import vector representations of words and predicates, learnt from large scale compositional distributional semantics, interpret them as fuzzy sets, and analyse their performance on a toy inference dataset.</abstract>
@@ -131,7 +131,7 @@
       <author><first>Saba</first><last>Anwar</last></author>
       <author><first>Artem</first><last>Shelmanov</last></author>
       <author><first>Alexander</first><last>Panchenko</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>95–103</pages>
       <abstract>Semantic frames are formal linguistic structures describing situations/actions/events, e.g. Commercial transfer of goods. Each frame provides a set of roles corresponding to the situation participants, e.g. Buyer and Goods, and lexical units (LUs) – words and phrases that can evoke this particular frame in texts, e.g. Sell. The scarcity of annotated resources hinders wider adoption of frame semantics across languages and domains. We investigate a simple yet effective method, lexical substitution with word representation models, to automatically expand a small set of frame-annotated sentences with new words for their respective roles and LUs. We evaluate the expansion quality using FrameNet. Contextualized models demonstrate overall superior performance compared to the non-contextualized ones on roles. However, the latter show comparable performance on the task of LU expansion.</abstract>
       <url hash="bad2d4b8">2020.pam-1.13</url>
@@ -146,7 +146,7 @@
       <author><first>Shijie</first><last>Zhao</last></author>
       <author><first>Shawn</first><last>Lin</last></author>
       <author><first>Wenxing</first><last>Liu</last></author>
-      <author><first>Derry</first><last>Wijaya</last></author>
+      <author id="derry-tanti-wijaya"><first>Derry</first><last>Wijaya</last></author>
       <pages>104–108</pages>
       <abstract>At the intersection between computer vision and natural language processing, there has been recent progress on two natural language generation tasks: Dense Image Captioning and Referring Expression Generation for objects in complex scenes. The former aims to provide a caption for a specified object in a complex scene for the benefit of an interlocutor who may not be able to see it. The latter aims to produce a referring expression that will serve to identify a given object in a scene that the interlocutor can see. The two tasks are designed for different assumptions about the common ground between the interlocutors, and serve very different purposes, although they both associate a linguistic description with an object in a complex scene. Despite these fundamental differences, the distinction between these two tasks is sometimes overlooked. Here, we undertake a side-by-side comparison between image captioning and reference game human datasets and show that they differ systematically with respect to informativity. We hope that an understanding of the systematic differences among these human datasets will ultimately allow them to be leveraged more effectively in the associated engineering tasks.</abstract>
       <url hash="fe6d7422">2020.pam-1.14</url>
@@ -174,7 +174,7 @@
     <paper id="17">
       <title>Word Sense Distance in Human Similarity Judgements and Contextualised Word Embeddings</title>
       <author><first>Janosch</first><last>Haber</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>128–145</pages>
       <abstract>Homonymy is often used to showcase one of the advantages of context-sensitive word embedding techniques such as ELMo and BERT. In this paper we want to shift the focus to the related but less exhaustively explored phenomenon of polysemy, where a word expresses various distinct but related senses in different contexts. Specifically, we aim to i) investigate a recent model of polyseme sense clustering proposed by Ortega-Andres &amp; Vicente (2019) through analysing empirical evidence of word sense grouping in human similarity judgements, ii) extend the evaluation of context-sensitive word embedding systems by examining whether they encode differences in word sense similarity and iii) compare the word sense similarities of both methods to assess their correlation and gain some intuition as to how well contextualised word embeddings could be used as surrogate word sense similarity judgements in linguistic experiments.</abstract>
       <url hash="a2281392">2020.pam-1.17</url>
diff --git a/data/xml/2020.parlaclarin.xml b/data/xml/2020.parlaclarin.xml
index aa7274c2bc..d4c93bed44 100644
--- a/data/xml/2020.parlaclarin.xml
+++ b/data/xml/2020.parlaclarin.xml
@@ -49,7 +49,7 @@
     </paper>
     <paper id="4">
       <title>Compiling <fixed-case>C</fixed-case>zech Parliamentary Stenographic Protocols into a Corpus</title>
-      <author><first>Barbora</first><last>Hladka</last></author>
+      <author id="barbora-hladka"><first>Barbora</first><last>Hladka</last></author>
       <author><first>Matyáš</first><last>Kopp</last></author>
       <author><first>Pavel</first><last>Straňák</last></author>
       <pages>18–22</pages>
@@ -72,7 +72,7 @@
     <paper id="6">
       <title>The si<fixed-case>P</fixed-case>arl corpus of <fixed-case>S</fixed-case>lovene parliamentary proceedings</title>
       <author><first>Andrej</first><last>Pancur</last></author>
-      <author><first>Tomaž</first><last>Erjavec</last></author>
+      <author id="tomaz-erjavec"><first>Tomaž</first><last>Erjavec</last></author>
       <pages>28–34</pages>
       <abstract>The paper describes the process of acquisition, up-translation, encoding, annotation, and distribution of siParl, a collection of the parliamentary debates from the Assembly of the Republic of Slovenia from 1990–2018, covering the period from just before Slovenia became an independent country in 1991, and almost up to the present. The entire corpus, comprising over 8 thousand sessions, 1 million speeches and 200 million words was uniformly encoded in accordance with the TEI-based Parla-CLARIN schema for encoding corpora of parliamentary debates, and contains extensive meta-data about the speakers, a typology of sessions etc. and structural and editorial annotations. The corpus was also part-of-speech tagged and lemmatised using state-of-the-art tools. The corpus is maintained on GitHub with its major versions archived in the CLARIN.SI repository and is available for linguistic analysis in the scope of the on-line CLARIN.SI concordancers, thus offering an invaluable resource for scholars studying Slovenian political history.</abstract>
       <url hash="39c641a1">2020.parlaclarin-1.6</url>
diff --git a/data/xml/2020.peoples.xml b/data/xml/2020.peoples.xml
index 732836527c..985d5c46de 100644
--- a/data/xml/2020.peoples.xml
+++ b/data/xml/2020.peoples.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the Third Workshop on Computational Modeling of People's Opinions, Personality, and Emotion's in Social Media</booktitle>
       <editor><first>Malvina</first><last>Nissim</last></editor>
       <editor><first>Viviana</first><last>Patti</last></editor>
-      <editor><first>Barbara</first><last>Plank</last></editor>
+      <editor id="barbara-plank"><first>Barbara</first><last>Plank</last></editor>
       <editor><first>Esin</first><last>Durmus</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Barcelona, Spain (Online)</address>
@@ -48,7 +48,7 @@
     <paper id="4">
       <title>Persuasiveness of News Editorials depending on Ideology and Personality</title>
       <author><first>Roxanne</first><last>El Baff</last></author>
-      <author><first>Khalid</first><last>Al Khatib</last></author>
+      <author id="khalid-al-khatib"><first>Khalid</first><last>Al Khatib</last></author>
       <author><first>Benno</first><last>Stein</last></author>
       <author><first>Henning</first><last>Wachsmuth</last></author>
       <pages>29–40</pages>
@@ -89,7 +89,7 @@
     <paper id="8">
       <title>Social Media Unrest Prediction during the <fixed-case>COVID</fixed-case>-19 Pandemic: Neural Implicit Motive Pattern Recognition as Psychometric Signs of Severe Crises</title>
       <author><first>Dirk</first><last>Johannßen</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>74–86</pages>
       <abstract>The COVID-19 pandemic has caused international social tension and unrest. Besides the crisis itself, there are growing signs of rising conflict potential of societies around the world. Indicators of global mood changes are hard to detect and direct questionnaires suffer from social desirability biases. However, so-called implicit methods can reveal humans intrinsic desires from e.g. social media texts. We present psychologically validated social unrest predictors and replicate scalable and automated predictions, setting a new state of the art on a recent German shared task dataset. We employ this model to investigate a change of language towards social unrest during the COVID-19 pandemic by comparing established psychological predictors on samples of tweets from spring 2019 with spring 2020. The results show a significant increase of the conflict indicating psychometrics. With this work, we demonstrate the applicability of automated NLP-based approaches to quantitative psychological research.</abstract>
       <url hash="58fd7497">2020.peoples-1.8</url>
@@ -126,7 +126,7 @@
     </paper>
     <paper id="12">
       <title>Experiencers, Stimuli, or Targets: Which Semantic Roles Enable Machine Learning to Infer the Emotions?</title>
-      <author><first>Laura Ana Maria</first><last>Oberländer</last></author>
+      <author id="laura-ana-maria-oberlander"><first>Laura Ana Maria</first><last>Oberländer</last></author>
       <author><first>Kevin</first><last>Reich</last></author>
       <author><first>Roman</first><last>Klinger</last></author>
       <pages>119–128</pages>
@@ -136,10 +136,10 @@
     </paper>
     <paper id="13">
       <title>Learning Emotion from 100 Observations: Unexpected Robustness of Deep Learning under Strong Data Limitations</title>
-      <author><first>Sven</first><last>Buechel</last></author>
+      <author id="sven-buechel"><first>Sven</first><last>Buechel</last></author>
       <author><first>João</first><last>Sedoc</last></author>
-      <author><first>H. Andrew</first><last>Schwartz</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <pages>129–139</pages>
       <abstract>One of the major downsides of Deep Learning is its supposed need for vast amounts of training data. As such, these techniques appear ill-suited for NLP areas where annotated data is limited, such as less-resourced languages or emotion analysis, with its many nuanced and hard-to-acquire annotation formats. We conduct a questionnaire study indicating that indeed the vast majority of researchers in emotion analysis deems neural models inferior to traditional machine learning when training data is limited. In stark contrast to those survey results, we provide empirical evidence for English, Polish, and Portuguese that commonly used neural architectures can be trained on surprisingly few observations, outperforming n-gram based ridge regression on only 100 data points. Our analysis suggests that high-quality, pre-trained word embeddings are a main factor for achieving those results.</abstract>
       <url hash="46a97324">2020.peoples-1.13</url>
@@ -160,7 +160,7 @@
       <author><first>Nikola</first><last>Ljubešić</last></author>
       <author><first>Ilia</first><last>Markov</last></author>
       <author><first>Darja</first><last>Fišer</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>153–157</pages>
       <abstract>In this paper, we present emotion lexicons of Croatian, Dutch and Slovene, based on manually corrected automatic translations of the English NRC Emotion lexicon. We evaluate the impact of the translation changes by measuring the change in supervised classification results of socially unacceptable utterances when lexicon information is used for feature construction. We further showcase the usage of the lexicons by calculating the difference in emotion distributions in texts containing and not containing socially unacceptable discourse, comparing them across four languages (English, Croatian, Dutch, Slovene) and two topics (migrants and LGBT). We show significant and consistent improvements in automatic classification across all languages and topics, as well as consistent (and expected) emotion distributions across all languages and topics, proving for the manually corrected lexicons to be a useful addition to the severely lacking area of emotion lexicons, the crucial resource for emotive analysis of text.</abstract>
       <url hash="c5f84e9c">2020.peoples-1.15</url>
diff --git a/data/xml/2020.privatenlp.xml b/data/xml/2020.privatenlp.xml
index 5570d5fc28..50dd1fb66f 100644
--- a/data/xml/2020.privatenlp.xml
+++ b/data/xml/2020.privatenlp.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the Second Workshop on Privacy in NLP</booktitle>
       <editor><first>Oluwaseyi</first><last>Feyisetan</last></editor>
       <editor><first>Sepideh</first><last>Ghanavati</last></editor>
-      <editor><first>Shervin</first><last>Malmasi</last></editor>
+      <editor id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></editor>
       <editor><first>Patricia</first><last>Thaine</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Online</address>
diff --git a/data/xml/2020.rail.xml b/data/xml/2020.rail.xml
index a404a2fcac..ced8daab59 100644
--- a/data/xml/2020.rail.xml
+++ b/data/xml/2020.rail.xml
@@ -7,7 +7,7 @@
       <editor><first>Phathutshedzo</first><last>Ramukhadi</last></editor>
       <editor><first>Mmasibidi</first><last>Setaka</last></editor>
       <editor><first>Valencia</first><last>Wagner</last></editor>
-      <editor><first>Menno</first><last>van Zaanen</last></editor>
+      <editor id="menno-van-zaanen"><first>Menno</first><last>van Zaanen</last></editor>
       <publisher>European Language Resources Association (ELRA)</publisher>
       <address>Marseille, France</address>
       <month>May</month>
@@ -32,7 +32,7 @@
     <paper id="2">
       <title>Usability and Accessibility of <fixed-case>B</fixed-case>antu Language Dictionaries in the Digital Age: Mobile Access in an Open Environment</title>
       <author><first>Thomas</first><last>Eckart</last></author>
-      <author><first>Sonja</first><last>Bosch</last></author>
+      <author id="sonja-bosch"><first>Sonja</first><last>Bosch</last></author>
       <author><first>Uwe</first><last>Quasthoff</last></author>
       <author><first>Erik</first><last>Körner</last></author>
       <author><first>Dirk</first><last>Goldhahn</last></author>
@@ -75,9 +75,9 @@
     </paper>
     <paper id="5">
       <title>Comparing Neural Network Parsers for a Less-resourced and Morphologically-rich Language: <fixed-case>A</fixed-case>mharic Dependency Parser</title>
-      <author><first>Binyam Ephrem</first><last>Seyoum</last></author>
+      <author id="binyam-ephrem-seyoum"><first>Binyam Ephrem</first><last>Seyoum</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
-      <author><first>Baye Yimam</first><last>Mekonnen</last></author>
+      <author id="baye-yimam-mekonnen"><first>Baye Yimam</first><last>Mekonnen</last></author>
       <pages>25–30</pages>
       <abstract>In this paper, we compare four state-of-the-art neural network dependency parsers for the Semitic language Amharic. As Amharic is a morphologically-rich and less-resourced language, the out-of-vocabulary (OOV) problem will be higher when we develop data-driven models. This fact limits researchers to develop neural network parsers because the neural network requires large quantities of data to train a model. We empirically evaluate neural network parsers when a small Amharic treebank is used for training. Based on our experiment, we obtain an 83.79 LAS score using the UDPipe system. Better accuracy is achieved when the neural parsing system uses external resources like word embedding. Using such resources, the LAS score for UDPipe improves to 85.26. Our experiment shows that the neural networks can learn dependency relations better from limited data while segmentation and POS tagging require much data.</abstract>
       <url hash="2b143545">2020.rail-1.5</url>
@@ -105,7 +105,7 @@
     <paper id="8">
       <title>Navigating Challenges of Multilingual Resource Development for Under-Resourced Languages: The Case of the <fixed-case>A</fixed-case>frican <fixed-case>W</fixed-case>ordnet Project</title>
       <author><first>Marissa</first><last>Griesel</last></author>
-      <author><first>Sonja</first><last>Bosch</last></author>
+      <author id="sonja-bosch"><first>Sonja</first><last>Bosch</last></author>
       <pages>45–50</pages>
       <abstract>Creating a new wordnet is by no means a trivial task and when the target language is under-resourced as is the case for the languages currently included in the multilingual African Wordnet (AfWN), developers need to rely heavily on human expertise. During the different phases of development of the AfWN, we incorporated various methods of fast-tracking to ease the tedious and time-consuming work. Some methods have proven effective while others seem to have little positive impact on the work rate. As in the case of many other under-resourced languages, the expand model was implemented throughout, thus depending on English source data such as the English Princeton Wordnet (PWN) which is then translated into the target language with the assumption that the new language shares an underlying structure with the PWN. The paper discusses some problems encountered along the way and points out various possibilities of (semi) automated quality assurance measures and further refinement of the AfWN to ensure accelerated growth. In this paper we aim to highlight some of the lessons learnt from hands-on experience in order to facilitate similar projects, in particular for languages from other African countries.</abstract>
       <url hash="4647aa70">2020.rail-1.8</url>
diff --git a/data/xml/2020.rdsm.xml b/data/xml/2020.rdsm.xml
index a415e3ca32..cd891136d3 100644
--- a/data/xml/2020.rdsm.xml
+++ b/data/xml/2020.rdsm.xml
@@ -49,8 +49,8 @@
     </paper>
     <paper id="4">
       <title>Revisiting Rumour Stance Classification: Dealing with Imbalanced Data</title>
-      <author id="yue-li"><first>Yue</first><last>Li</last></author>
-      <author><first>Carolina</first><last>Scarton</last></author>
+      <author><first>Yue</first><last>Li</last></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last></author>
       <pages>38–44</pages>
       <abstract>Correctly classifying stances of replies can be significantly helpful for the automatic detection and classification of online rumours. One major challenge is that there are considerably more non-relevant replies (comments) than informative ones (supports and denies), making the task highly imbalanced. In this paper we revisit the task of rumour stance classification, aiming to improve the performance over the informative minority classes. We experiment with traditional methods for imbalanced data treatment with feature- and BERT-based classifiers. Our models outperform all systems in RumourEval 2017 shared task and rank second in RumourEval 2019.</abstract>
       <url hash="dd22f230">2020.rdsm-1.4</url>
@@ -79,7 +79,7 @@
     <paper id="7">
       <title>Fake or Real? A Study of <fixed-case>A</fixed-case>rabic Satirical Fake News</title>
       <author><first>Hadeel</first><last>Saadany</last></author>
-      <author><first>Constantin</first><last>Orasan</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orasan</last></author>
       <author><first>Emad</first><last>Mohamed</last></author>
       <pages>70–80</pages>
       <abstract>One very common type of fake news is satire which comes in a form of a news website or an online platform that parodies reputable real news agencies to create a sarcastic version of reality. This type of fake news is often disseminated by individuals on their online platforms as it has a much stronger effect in delivering criticism than through a straightforward message. However, when the satirical text is disseminated via social media without mention of its source, it can be mistaken for real news. This study conducts several exploratory analyses to identify the linguistic properties of Arabic fake news with satirical content. It shows that although it parodies real news, Arabic satirical news has distinguishing features on the lexico-grammatical level. We exploit these features to build a number of machine learning models capable of identifying satirical fake news with an accuracy of up to 98.6%. The study introduces a new dataset (3185 articles) scraped from two Arabic satirical news websites (‘Al-Hudood’ and ‘Al-Ahram Al-Mexici’) which consists of fake news. The real news dataset consists of 3710 articles collected from three official news sites: the ‘BBC-Arabic’, the ‘CNN-Arabic’ and ‘Al-Jazeera news’. Both datasets are concerned with political issues related to the Middle East.</abstract>
diff --git a/data/xml/2020.readi.xml b/data/xml/2020.readi.xml
index cc7f12a679..677e27ac52 100644
--- a/data/xml/2020.readi.xml
+++ b/data/xml/2020.readi.xml
@@ -3,7 +3,7 @@
   <volume id="1" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 1st Workshop on Tools and Resources to Empower People with REAding DIfficulties (READI)</booktitle>
-      <editor><first>Núria</first><last>Gala</last></editor>
+      <editor id="nuria-gala"><first>Núria</first><last>Gala</last></editor>
       <editor><first>Rodrigo</first><last>Wilkens</last></editor>
       <publisher>European Language Resources Association</publisher>
       <address>Marseille, France</address>
@@ -19,7 +19,7 @@
     <paper id="1">
       <title>Disambiguating Confusion Sets as an Aid for Dyslexic Spelling</title>
       <author><first>Steinunn Rut</first><last>Friðriksdóttir</last></author>
-      <author><first>Anton Karl</first><last>Ingason</last></author>
+      <author id="anton-karl-ingason"><first>Anton Karl</first><last>Ingason</last></author>
       <pages>1–5</pages>
       <abstract>Spell checkers and other proofreading software are crucial tools for people with dyslexia and other reading disabilities. Most spell checkers automatically detect spelling mistakes by looking up individual words and seeing if they exist in the vocabulary. However, one of the biggest challenges of automatic spelling correction is how to deal with real-word errors, i.e. spelling mistakes which lead to a real but unintended word, such as when then is written in place of than. These errors account for 20% of all spelling mistakes made by people with dyslexia. As both words exist in the vocabulary, a simple dictionary lookup will not detect the mistake. The only way to disambiguate which word was actually intended is to look at the context in which the word appears. This problem is particularly apparent in languages with rich morphology where there is often minimal orthographic difference between grammatical items. In this paper, we present our novel confusion set corpus for Icelandic and discuss how it could be used for context-sensitive spelling correction. We have collected word pairs from seven different categories, chosen for their homophonous properties, along with sentence examples and frequency information from said pairs. We present a small-scale machine learning experiment using a decision tree binary classification which results range from 73% to 86% average accuracy with 10-fold cross validation. While not intended as a finalized result, the method shows potential and will be improved in future research.</abstract>
       <url hash="c2ddf349">2020.readi-1.1</url>
@@ -49,9 +49,9 @@
     <paper id="4">
       <title>Automatically Assess Children’s Reading Skills</title>
       <author><first>Ornella</first><last>Mich</last></author>
-      <author><first>Nadia</first><last>Mana</last></author>
+      <author id="nadia-mana"><first>Nadia</first><last>Mana</last></author>
       <author><first>Roberto</first><last>Gretter</last></author>
-      <author><first>Marco</first><last>Matassoni</last></author>
+      <author id="marco-matassoni"><first>Marco</first><last>Matassoni</last></author>
       <author><first>Daniele</first><last>Falavigna</last></author>
       <pages>20–26</pages>
       <abstract>Assessing reading skills is an important task teachers have to perform at the beginning of a new scholastic year to evaluate the starting level of the class and properly plan next learning activities. Digital tools based on automatic speech recognition (ASR) may be really useful to support teachers in this task, currently very time consuming and prone to human errors. This paper presents a web application for automatically assessing fluency and accuracy of oral reading in children attending Italian primary and lower secondary schools. Our system, based on ASR technology, implements the Cornoldi’s MT battery, which is a well-known Italian test to assess reading skills. The front-end of the system has been designed following the participatory design approach by involving end users from the beginning of the creation process. Teachers may use our system to both test student’s reading skills and monitor their performance over time. In fact, the system offers an effective graphical visualization of the assessment results for both individual students and entire class. The paper also presents the results of a pilot study to evaluate the system usability with teachers.</abstract>
@@ -99,7 +99,7 @@
     <paper id="8">
       <title>Visualizing Facets of Text Complexity across Registers</title>
       <author><first>Marina</first><last>Santini</last></author>
-      <author><first>Arne</first><last>Jonsson</last></author>
+      <author id="arne-jonsson"><first>Arne</first><last>Jonsson</last></author>
       <author><first>Evelina</first><last>Rennes</last></author>
       <pages>49–56</pages>
       <abstract>In this paper, we propose visualizing results of a corpus-based study on text complexity using radar charts. We argue that the added value of this type of visualisation is the polygonal shape that provides an intuitive grasp of text complexity similarities across the registers of a corpus. The results that we visualize come from a study where we explored whether it is possible to automatically single out different facets of text complexity across the registers of a Swedish corpus. To this end, we used factor analysis as applied in Biber’s Multi-Dimensional Analysis framework. The visualization of text complexity facets with radar charts indicates that there is correspondence between linguistic similarity and similarity of shape across registers.</abstract>
@@ -169,7 +169,7 @@
       <title>Coreference-Based Text Simplification</title>
       <author><first>Rodrigo</first><last>Wilkens</last></author>
       <author><first>Bruno</first><last>Oberle</last></author>
-      <author><first>Amalia</first><last>Todirascu</last></author>
+      <author id="amalia-todirascu"><first>Amalia</first><last>Todirascu</last></author>
       <pages>93–100</pages>
       <abstract>Text simplification aims at adapting documents to make them easier to read by a given audience. Usually, simplification systems consider only lexical and syntactic levels, and, moreover, are often evaluated at the sentence level. Thus, studies on the impact of simplification in text cohesion are lacking. Some works add coreference resolution in their pipeline to address this issue. In this paper, we move forward in this direction and present a rule-based system for automatic text simplification, aiming at adapting French texts for dyslexic children. The architecture of our system takes into account not only lexical and syntactic but also discourse information, based on coreference chains. Our system has been manually evaluated in terms of grammaticality and cohesion. We have also built and used an evaluation corpus containing multiple simplification references for each sentence. It has been annotated by experts following a set of simplification guidelines, and can be used to run automatic evaluation of other simplification systems. Both the system and the evaluation corpus are freely available.</abstract>
       <url hash="b4fa1214">2020.readi-1.14</url>
diff --git a/data/xml/2020.repl4nlp.xml b/data/xml/2020.repl4nlp.xml
index d9094b4459..d2189c9b54 100644
--- a/data/xml/2020.repl4nlp.xml
+++ b/data/xml/2020.repl4nlp.xml
@@ -25,7 +25,7 @@
     <paper id="1">
       <title>Zero-Resource Cross-Domain Named Entity Recognition</title>
       <author><first>Zihan</first><last>Liu</last></author>
-      <author><first>Genta Indra</first><last>Winata</last></author>
+      <author id="genta-indra-winata"><first>Genta Indra</first><last>Winata</last></author>
       <author><first>Pascale</first><last>Fung</last></author>
       <pages>1–6</pages>
       <abstract>Existing models for cross-domain named entity recognition (NER) rely on numerous unlabeled corpus or labeled NER training data in target domains. However, collecting data for low-resource target domains is not only expensive but also time-consuming. Hence, we propose a cross-domain NER model that does not use any external resources. We first introduce a Multi-Task Learning (MTL) by adding a new objective function to detect whether tokens are named entities or not. We then introduce a framework called Mixture of Entity Experts (MoEE) to improve the robustness for zero-resource domain adaptation. Finally, experimental results show that our model outperforms strong unsupervised cross-domain sequence labeling models, and the performance of our model is close to that of the state-of-the-art model which leverages extensive resources.</abstract>
@@ -37,7 +37,7 @@
     <paper id="2">
       <title>Encodings of Source Syntax: Similarities in <fixed-case>NMT</fixed-case> Representations Across Target Languages</title>
       <author><first>Tyler A.</first><last>Chang</last></author>
-      <author><first>Anna</first><last>Rafferty</last></author>
+      <author id="anna-n-rafferty"><first>Anna</first><last>Rafferty</last></author>
       <pages>7–16</pages>
       <abstract>We train neural machine translation (NMT) models from English to six target languages, using NMT encoder representations to predict ancestor constituent labels of source language words. We find that NMT encoders learn similar source syntax regardless of NMT target language, relying on explicit morphosyntactic cues to extract syntactic features from source sentences. Furthermore, the NMT encoders outperform RNNs trained directly on several of the constituent label prediction tasks, suggesting that NMT encoder representations can be used effectively for natural language tasks involving syntax. However, both the NMT encoders and the directly-trained RNNs learn substantially different syntactic information from a probabilistic context-free grammar (PCFG) parser. Despite lower overall accuracy scores, the PCFG often performs well on sentences for which the RNN-based models perform poorly, suggesting that RNN architectures are constrained in the types of syntax they can learn.</abstract>
       <url hash="09498e6e">2020.repl4nlp-1.2</url>
@@ -61,7 +61,7 @@
       <author><first>Siddharth</first><last>Bhat</last></author>
       <author><first>Alok</first><last>Debnath</last></author>
       <author><first>Souvik</first><last>Banerjee</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>24–33</pages>
       <abstract>In this paper, we provide an alternate perspective on word representations, by reinterpreting the dimensions of the vector space of a word embedding as a collection of features. In this reinterpretation, every component of the word vector is normalized against all the word vectors in the vocabulary. This idea now allows us to view each vector as an <tex-math>n</tex-math>-tuple (akin to a fuzzy set), where <tex-math>n</tex-math> is the dimensionality of the word representation and each element represents the probability of the word possessing a feature. Indeed, this representation enables the use fuzzy set theoretic operations, such as union, intersection and difference. Unlike previous attempts, we show that this representation of words provides a notion of similarity which is inherently asymmetric and hence closer to human similarity judgements. We compare the performance of this representation with various benchmarks, and explore some of the unique properties including function word detection, detection of polysemous words, and some insight into the interpretability provided by set theoretic operations.</abstract>
       <url hash="12eb4a8f">2020.repl4nlp-1.4</url>
@@ -165,7 +165,7 @@
       <title>A Metric Learning Approach to Misogyny Categorization</title>
       <author><first>Juan Manuel</first><last>Coria</last></author>
       <author><first>Sahar</first><last>Ghannay</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <author><first>Hervé</first><last>Bredin</last></author>
       <pages>89–94</pages>
       <abstract>The task of automatic misogyny identification and categorization has not received as much attention as other natural language tasks have, even though it is crucial for identifying hate speech in social Internet interactions. In this work, we address this sentence classification task from a representation learning perspective, using both a bidirectional LSTM and BERT optimized with the following metric learning loss functions: contrastive loss, triplet loss, center loss, congenerous cosine loss and additive angular margin loss. We set new state-of-the-art for the task with our fine-tuned BERT, whose sentence embeddings can be compared with a simple cosine distance, and we release all our code as open source for easy reproducibility. Moreover, we find that almost every loss function performs equally well in this setting, matching the regular cross entropy loss.</abstract>
@@ -261,7 +261,7 @@
     <paper id="20">
       <title>A Cross-Task Analysis of Text Span Representations</title>
       <author><first>Shubham</first><last>Toshniwal</last></author>
-      <author><first>Haoyue</first><last>Shi</last></author>
+      <author id="freda-shi"><first>Haoyue</first><last>Shi</last></author>
       <author><first>Bowen</first><last>Shi</last></author>
       <author><first>Lingyu</first><last>Gao</last></author>
       <author><first>Karen</first><last>Livescu</last></author>
diff --git a/data/xml/2020.rocling.xml b/data/xml/2020.rocling.xml
index 1c504cf607..1e7c533902 100644
--- a/data/xml/2020.rocling.xml
+++ b/data/xml/2020.rocling.xml
@@ -40,7 +40,7 @@
       <title>Improving Phrase Translation Based on Sentence Alignment of <fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish Parallel Corpus</title>
       <author><first>Yi-Jyun</first><last>Chen</last></author>
       <author><first>Ching-Yu Helen</first><last>Yang</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>6–7</pages>
       <url hash="5accbd0d">2020.rocling-1.3</url>
       <bibkey>chen-etal-2020-improving-phrase</bibkey>
@@ -48,7 +48,7 @@
     <paper id="4">
       <title>Mitigating Impacts of Word Segmentation Errors on Collocation Extraction in <fixed-case>C</fixed-case>hinese</title>
       <author><first>Yongfu</first><last>Liao</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <pages>8–20</pages>
       <url hash="12f12108">2020.rocling-1.4</url>
       <bibkey>liao-hsieh-2020-mitigating</bibkey>
@@ -142,7 +142,7 @@
     <paper id="14">
       <title>The preliminary study of robust speech feature extraction based on maximizing the accuracy of states in deep acoustic models</title>
       <author><first>Li-chia</first><last>Chang</last></author>
-      <author><first>Jeih-weih</first><last>Hung</last></author>
+      <author id="jeih-weih-hung"><first>Jeih-weih</first><last>Hung</last></author>
       <pages>118–119</pages>
       <url hash="39f9d500">2020.rocling-1.14</url>
       <bibkey>chang-hung-2020-preliminary</bibkey>
@@ -150,7 +150,7 @@
     <paper id="15">
       <title>Multi-view Attention-based Speech Enhancement Model for Noise-robust Automatic Speech Recognition</title>
       <author><first>Fu-An</first><last>Chao</last></author>
-      <author><first>Jeih-weih</first><last>Hung</last></author>
+      <author id="jeih-weih-hung"><first>Jeih-weih</first><last>Hung</last></author>
       <author><first>Berlin</first><last>Chen</last></author>
       <pages>120–135</pages>
       <url hash="ff1bc91b">2020.rocling-1.15</url>
@@ -176,7 +176,7 @@
       <title>Lectal Variation of the Two <fixed-case>C</fixed-case>hinese Causative Auxiliaries</title>
       <author><first>Cing-Fang</first><last>Shih</last></author>
       <author><first>Mao-Chang</first><last>Ku</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <pages>163–177</pages>
       <url hash="6c68db5f">2020.rocling-1.18</url>
       <bibkey>shih-etal-2020-lectal</bibkey>
@@ -184,7 +184,7 @@
     <paper id="19">
       <title>The Semantic Features and Cognitive Concepts of Mang2 ‘Busy’: A Corpus-Based Study</title>
       <author><first>Hsin-Rou</first><last>Lin</last></author>
-      <author><first>Siaw-Fong</first><last>Chung</last></author>
+      <author id="siaw-fong-chung"><first>Siaw-Fong</first><last>Chung</last></author>
       <pages>178–192</pages>
       <url hash="9a576c95">2020.rocling-1.19</url>
       <bibkey>lin-chung-2020-semantic</bibkey>
@@ -192,7 +192,7 @@
     <paper id="20">
       <title>An Analysis of Multimodal Document Intent in <fixed-case>I</fixed-case>nstagram Posts</title>
       <author><first>Ying-Yu</first><last>Chen</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <pages>193–207</pages>
       <url hash="6ba3a368">2020.rocling-1.20</url>
       <bibkey>chen-hsieh-2020-analysis</bibkey>
@@ -202,7 +202,7 @@
       <author><first>Wen-jet</first><last>Wang</last></author>
       <author><first>Chia-Jung</first><last>Chen</last></author>
       <author><first>Chien-yu</first><last>Lai</last></author>
-      <author><first>Chia-ming</first><last>Lee</last></author>
+      <author id="chia-ming-lee"><first>Chia-ming</first><last>Lee</last></author>
       <author><first>Hsin-hung</first><last>Lin</last></author>
       <pages>208–222</pages>
       <url hash="16af5cf7">2020.rocling-1.21</url>
@@ -211,7 +211,7 @@
     <paper id="22">
       <title>An Adaptive Method for Building a <fixed-case>C</fixed-case>hinese Dimensional Sentiment Lexicon</title>
       <author><first>Ying-Lung</first><last>Lin</last></author>
-      <author><first>Liang-Chih</first><last>Yu</last></author>
+      <author id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last></author>
       <pages>223–237</pages>
       <url hash="ac0fd4b1">2020.rocling-1.22</url>
       <bibkey>lin-yu-2020-adaptive</bibkey>
@@ -244,7 +244,7 @@
     <paper id="26">
       <title>Sentiment Analysis for Investment Atmosphere Scoring</title>
       <author><first>Chih-Hsiang</first><last>Peng</last></author>
-      <author><first>Liang-Chih</first><last>Yu</last></author>
+      <author id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last></author>
       <pages>275–289</pages>
       <url hash="116a0a77">2020.rocling-1.26</url>
       <bibkey>peng-yu-2020-sentiment</bibkey>
@@ -278,7 +278,7 @@
       <title>The Analysis and Annotation of Propaganda Techniques in <fixed-case>C</fixed-case>hinese News Texts</title>
       <author><first>Meng-Hsien</first><last>Shih</last></author>
       <author><first>Ren-feng</first><last>Duann</last></author>
-      <author><first>Siaw-Fong</first><last>Chung</last></author>
+      <author id="siaw-fong-chung"><first>Siaw-Fong</first><last>Chung</last></author>
       <pages>331–345</pages>
       <url hash="4b803d68">2020.rocling-1.30</url>
       <bibkey>shih-etal-2020-analysis</bibkey>
@@ -296,7 +296,7 @@
       <author><first>Yuh-Shyang</first><last>Wang</last></author>
       <author><first>Lung-Hao</first><last>Lee</last></author>
       <author><first>Bo-Lin</first><last>Lin</last></author>
-      <author><first>Liang-Chih</first><last>Yu</last></author>
+      <author id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last></author>
       <pages>359–371</pages>
       <url hash="3844d63f">2020.rocling-1.32</url>
       <bibkey>wang-etal-2020-scientific</bibkey>
@@ -311,7 +311,7 @@
     </paper>
     <paper id="34">
       <title>Email Writing Assistant System</title>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <author><first>Ching-Yu</first><last>Yang</last></author>
       <author><first>Guan-Fu</first><last>Peng</last></author>
       <pages>387–397</pages>
diff --git a/data/xml/2020.scil.xml b/data/xml/2020.scil.xml
index 51ad1ee71d..28d867adf4 100644
--- a/data/xml/2020.scil.xml
+++ b/data/xml/2020.scil.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the Society for Computation in Linguistics 2020</booktitle>
       <editor><first>Allyson</first><last>Ettinger</last></editor>
-      <editor><first>Gaja</first><last>Jarosz</last></editor>
+      <editor id="gaja-jarosz"><first>Gaja</first><last>Jarosz</last></editor>
       <editor><first>Joe</first><last>Pater</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>New York, New York</address>
@@ -116,7 +116,7 @@
     </paper>
     <paper id="13">
       <title>Evolving constraints and rules in Harmonic Grammar</title>
-      <author><first>Elliott</first><last>Moreton</last></author>
+      <author id="elliott-moreton"><first>Elliott</first><last>Moreton</last></author>
       <pages>100–111</pages>
       <url hash="22777ce7">2020.scil-1.13</url>
       <bibkey>moreton-2020-evolving</bibkey>
@@ -166,7 +166,7 @@
     </paper>
     <paper id="19">
       <title>Acquisition of Inflectional Morphology in Artificial Neural Networks With Prior Knowledge</title>
-      <author><first>Katharina</first><last>Kann</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
       <pages>144–154</pages>
       <url hash="8acbba09">2020.scil-1.19</url>
       <bibkey>kann-2020-acquisition</bibkey>
@@ -258,7 +258,7 @@
     <paper id="30">
       <title>Frequency-(in)dependent regularization in language production and cultural transmission</title>
       <author><first>Emily</first><last>Morgan</last></author>
-      <author><first>Roger</first><last>Levy</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
       <pages>248–249</pages>
       <url hash="d8ee26d9">2020.scil-1.30</url>
       <bibkey>morgan-levy-2020-frequency</bibkey>
@@ -266,7 +266,7 @@
     <paper id="31">
       <title>Graph-to-Graph Meaning Representation Transformations for Human-Robot Dialogue</title>
       <author><first>Mitchell</first><last>Abrams</last></author>
-      <author><first>Claire</first><last>Bonial</last></author>
+      <author id="claire-bonial"><first>Claire</first><last>Bonial</last></author>
       <author><first>Lucia</first><last>Donatelli</last></author>
       <pages>250–253</pages>
       <url hash="7ccb0bbd">2020.scil-1.31</url>
@@ -277,7 +277,7 @@
       <author><first>Emily</first><last>Ahn</last></author>
       <author><first>Cecilia</first><last>Jimenez</last></author>
       <author><first>Yulia</first><last>Tsvetkov</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <pages>254–264</pages>
       <url hash="0ee45e0f">2020.scil-1.32</url>
       <bibkey>ahn-etal-2020-code</bibkey>
@@ -294,7 +294,7 @@
     </paper>
     <paper id="34">
       <title>Tensor Product Decomposition Networks: Uncovering Representations of Structure Learned by Neural Networks</title>
-      <author><first>R. Thomas</first><last>McCoy</last></author>
+      <author id="r-thomas-mccoy"><first>R. Thomas</first><last>McCoy</last></author>
       <author><first>Tal</first><last>Linzen</last></author>
       <author><first>Ewan</first><last>Dunbar</last></author>
       <author><first>Paul</first><last>Smolensky</last></author>
@@ -306,8 +306,8 @@
       <title>What do you mean, <fixed-case>BERT</fixed-case>?</title>
       <author><first>Timothee</first><last>Mickus</last></author>
       <author><first>Denis</first><last>Paperno</last></author>
-      <author><first>Mathieu</first><last>Constant</last></author>
-      <author><first>Kees</first><last>van Deemter</last></author>
+      <author id="matthieu-constant"><first>Mathieu</first><last>Constant</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
       <pages>279–290</pages>
       <url hash="b712dde8">2020.scil-1.35</url>
       <bibkey>mickus-etal-2020-mean</bibkey>
@@ -332,7 +332,7 @@
       <author><first>Shohini</first><last>Bhattasali</last></author>
       <author><first>Murielle</first><last>Fabre</last></author>
       <author><first>Christophe</first><last>Pallier</last></author>
-      <author><first>John</first><last>Hale</last></author>
+      <author id="john-hale"><first>John</first><last>Hale</last></author>
       <pages>313–322</pages>
       <url hash="d318d363">2020.scil-1.38</url>
       <bibkey>bhattasali-etal-2020-modeling</bibkey>
@@ -341,7 +341,7 @@
       <title>A closer look at the performance of neural language models on reflexive anaphor licensing</title>
       <author><first>Jennifer</first><last>Hu</last></author>
       <author><first>Sherry Yong</first><last>Chen</last></author>
-      <author><first>Roger</first><last>Levy</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
       <pages>323–333</pages>
       <url hash="b74f9870">2020.scil-1.39</url>
       <bibkey>hu-etal-2020-closer</bibkey>
@@ -352,8 +352,8 @@
       <author><first>Qi</first><last>Chen</last></author>
       <author><first>Kyle</first><last>Richardson</last></author>
       <author><first>Atreyee</first><last>Mukherjee</last></author>
-      <author><first>Lawrence S.</first><last>Moss</last></author>
-      <author><first>Sandra</first><last>Kuebler</last></author>
+      <author id="lawrence-s-moss"><first>Lawrence S.</first><last>Moss</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kuebler</last></author>
       <pages>334–344</pages>
       <url hash="c0bdd7fb">2020.scil-1.40</url>
       <bibkey>hu-etal-2020-monalog</bibkey>
@@ -361,7 +361,7 @@
     <paper id="41">
       <title>Modeling the learning of the Person Case Constraint</title>
       <author><first>Adam</first><last>Liter</last></author>
-      <author><first>Naomi</first><last>Feldman</last></author>
+      <author id="naomi-feldman"><first>Naomi</first><last>Feldman</last></author>
       <pages>345–354</pages>
       <url hash="91dbdd66">2020.scil-1.41</url>
       <bibkey>liter-feldman-2020-modeling</bibkey>
@@ -378,7 +378,7 @@
       <author><first>Maria</first><last>Ryskina</last></author>
       <author><first>Ella</first><last>Rabinovich</last></author>
       <author><first>Taylor</first><last>Berg-Kirkpatrick</last></author>
-      <author><first>David</first><last>Mortensen</last></author>
+      <author id="david-r-mortensen"><first>David</first><last>Mortensen</last></author>
       <author><first>Yulia</first><last>Tsvetkov</last></author>
       <pages>367–376</pages>
       <url hash="74369afc">2020.scil-1.43</url>
@@ -416,7 +416,7 @@
       <author><first>Anhad</first><last>Mohananey</last></author>
       <author><first>Wei</first><last>Peng</last></author>
       <author><first>Sheng-Fu</first><last>Wang</last></author>
-      <author><first>Samuel R.</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel R.</first><last>Bowman</last></author>
       <pages>409–410</pages>
       <url hash="5dead734">2020.scil-1.47</url>
       <bibkey>warstadt-etal-2020-blimp</bibkey>
@@ -453,7 +453,7 @@
     </paper>
     <paper id="52">
       <title>Lexical databases for computational analyses: A linguistic perspective</title>
-      <author><first>Robert</first><last>Malouf</last></author>
+      <author id="robert-malouf"><first>Robert</first><last>Malouf</last></author>
       <author><first>Farrell</first><last>Ackerman</last></author>
       <author><first>Arturs</first><last>Semenuks</last></author>
       <pages>446–456</pages>
@@ -498,7 +498,7 @@
     </paper>
     <paper id="58">
       <title>Interpreting Sequence-to-Sequence Models for <fixed-case>R</fixed-case>ussian Inflectional Morphology</title>
-      <author><first>David</first><last>King</last></author>
+      <author id="david-king"><first>David</first><last>King</last></author>
       <author><first>Andrea</first><last>Sims</last></author>
       <author><first>Micha</first><last>Elsner</last></author>
       <pages>481–490</pages>
diff --git a/data/xml/2020.sdp.xml b/data/xml/2020.sdp.xml
index db6adb6692..b20e988e28 100644
--- a/data/xml/2020.sdp.xml
+++ b/data/xml/2020.sdp.xml
@@ -3,12 +3,12 @@
   <volume id="1" ingest-date="2020-11-06" type="proceedings">
     <meta>
       <booktitle>Proceedings of the First Workshop on Scholarly Document Processing</booktitle>
-      <editor><first>Muthu Kumar</first><last>Chandrasekaran</last></editor>
+      <editor id="muthu-kumar-chandrasekaran"><first>Muthu Kumar</first><last>Chandrasekaran</last></editor>
       <editor><first>Anita</first><last>de Waard</last></editor>
       <editor><first>Guy</first><last>Feigenblat</last></editor>
-      <editor><first>Dayne</first><last>Freitag</last></editor>
+      <editor id="dayne-freitag"><first>Dayne</first><last>Freitag</last></editor>
       <editor><first>Tirthankar</first><last>Ghosal</last></editor>
-      <editor><first>Eduard</first><last>Hovy</last></editor>
+      <editor id="eduard-hovy"><first>Eduard</first><last>Hovy</last></editor>
       <editor><first>Petr</first><last>Knoth</last></editor>
       <editor><first>David</first><last>Konopnicki</last></editor>
       <editor><first>Philipp</first><last>Mayr</last></editor>
@@ -139,7 +139,7 @@
     </paper>
     <paper id="9">
       <title>Reconstructing Manual Information Extraction with <fixed-case>DB</fixed-case>-to-Document Backprojection: Experiments in the Life Science Domain</title>
-      <author><first>Mark-Christoph</first><last>Müller</last></author>
+      <author id="mark-christoph-muller"><first>Mark-Christoph</first><last>Müller</last></author>
       <author><first>Sucheta</first><last>Ghosh</last></author>
       <author><first>Maja</first><last>Rey</last></author>
       <author><first>Ulrike</first><last>Wittig</last></author>
@@ -155,7 +155,7 @@
     <paper id="10">
       <title><fixed-case>D</fixed-case>eep<fixed-case>P</fixed-case>aper<fixed-case>C</fixed-case>omposer: A Simple Solution for Training Data Preparation for Parsing Research Papers</title>
       <author><first>Meng</first><last>Ling</last></author>
-      <author id="jian-chen"><first>Jian</first><last>Chen</last></author>
+      <author><first>Jian</first><last>Chen</last></author>
       <pages>91–96</pages>
       <abstract>We present DeepPaperComposer, a simple solution for preparing highly accurate (100%) training data without manual labeling to extract content from scholarly articles using convolutional neural networks (CNNs). We used our approach to generate data and trained CNNs to extract eight categories of both textual (titles, abstracts, headers, figure and table captions, and other texts) and non-textural content (figures and tables) from 30 years of IEEE VIS conference papers, of which a third were scanned bitmap PDFs. We curated this dataset and named it VISpaper-3K. We then showed our initial benchmark performance using VISpaper-3K over itself and CS-150 using YOLOv3 and Faster-RCNN. We open-source DeepPaperComposer of our training data generation and released the resulting annotation data VISpaper-3K to promote re-producible research.</abstract>
       <url hash="4de2b873">2020.sdp-1.10</url>
@@ -226,7 +226,7 @@
       <title>Towards Grounding of Formulae</title>
       <author><first>Takuto</first><last>Asakura</last></author>
       <author><first>André</first><last>Greiner-Petter</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
       <pages>138–147</pages>
       <abstract>A large amount of scientific knowledge is represented within mixed forms of natural language texts and mathematical formulae. Therefore, a collaboration of natural language processing and formula analyses, so-called mathematical language processing, is necessary to enable computers to understand and retrieve information from the documents. However, as we will show in this project, a mathematical notation can change its meaning even within the scope of a single paragraph. This flexibility makes it difficult to extract the exact meaning of a mathematical formula. In this project, we will propose a new task direction for grounding mathematical formulae. Particularly, we are addressing the widespread misconception of various research projects in mathematical information retrieval, which presume that mathematical notations have a fixed meaning within a single document. We manually annotated a long scientific paper to illustrate the task concept. Our high inter-annotator agreement shows that the task is well understood for humans. Our results indicate that it is worthwhile to grow the techniques for the proposed task to contribute to the further progress of mathematical language processing.</abstract>
@@ -279,7 +279,7 @@
       <title>On the Use of Web Search to Improve Scientific Collections</title>
       <author><first>Krutarth</first><last>Patel</last></author>
       <author><first>Cornelia</first><last>Caragea</last></author>
-      <author><first>Sujatha Das</first><last>Gollapalli</last></author>
+      <author id="sujatha-das-gollapalli"><first>Sujatha Das</first><last>Gollapalli</last></author>
       <pages>174–183</pages>
       <abstract>Despite the advancements in search engine features, ranking methods, technologies, and the availability of programmable APIs, current-day open-access digital libraries still rely on crawl-based approaches for acquiring their underlying document collections. In this paper, we propose a novel search-driven framework for acquiring documents for such scientific portals. Within our framework, publicly-available research paper titles and author names are used as queries to a Web search engine. We were able to obtain ~267,000 unique research papers through our fully-automated framework using ~76,000 queries, resulting in almost 200,000 more papers than the number of queries. Moreover, through a combination of title and author name search, we were able to recover 78% of the original searched titles.</abstract>
       <url hash="6f0e74f8">2020.sdp-1.20</url>
@@ -307,8 +307,8 @@
       <author><first>Andrew</first><last>Head</last></author>
       <author><first>Risham</first><last>Sidhu</last></author>
       <author><first>Kyle</first><last>Lo</last></author>
-      <author><first>Daniel</first><last>Weld</last></author>
-      <author><first>Marti A.</first><last>Hearst</last></author>
+      <author id="daniel-s-weld"><first>Daniel</first><last>Weld</last></author>
+      <author id="marti-a-hearst"><first>Marti A.</first><last>Hearst</last></author>
       <pages>196–206</pages>
       <abstract>The task of definition detection is important for scholarly papers, because papers often make use of technical terminology that may be unfamiliar to readers. Despite prior work on definition detection, current approaches are far from being accurate enough to use in realworld applications. In this paper, we first perform in-depth error analysis of the current best performing definition detection system and discover major causes of errors. Based on this analysis, we develop a new definition detection system, HEDDEx, that utilizes syntactic features, transformer encoders, and heuristic filters, and evaluate it on a standard sentence-level benchmark. Because current benchmarks evaluate randomly sampled sentences, we propose an alternative evaluation that assesses every sentence within a document. This allows for evaluating recall in addition to precision. HEDDEx outperforms the leading system on both the sentence-level and the document-level tasks, by 12.7 F1 points and 14.4 F1 points, respectively. We note that performance on the high-recall document-level task is much lower than in the standard evaluation approach, due to the necessity of incorporation of document structure as features. We discuss remaining challenges in document-level definition detection, ideas for improvements, and potential issues for the development of reading aid applications.</abstract>
       <url hash="f37ba031">2020.sdp-1.22</url>
@@ -380,7 +380,7 @@
       <author><first>Saichethan</first><last>Reddy</last></author>
       <author><first>Naveen</first><last>Saini</last></author>
       <author><first>Sriparna</first><last>Saha</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>242–250</pages>
       <abstract>In this paper, we present the IIIT Bhagalpur and IIT Patna team’s effort to solve the three shared tasks namely, CL-SciSumm 2020, CL-LaySumm 2020, LongSumm 2020 at SDP 2020. The theme of these tasks is to generate medium-scale, lay and long summaries, respectively, for scientific articles. For the first two tasks, unsupervised systems are developed, while for the third one, we develop a supervised system. The performances of all the systems were evaluated on the associated datasets with the shared tasks in term of well-known ROUGE metric.</abstract>
       <url hash="99701ac1">2020.sdp-1.27</url>
@@ -418,7 +418,7 @@
       <author><first>Harshavardhan</first><last>Kundarapu</last></author>
       <author><first>Naveen</first><last>Saini</last></author>
       <author><first>Sriparna</first><last>Saha</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>270–276</pages>
       <abstract>The publication rate of scientific literature increases rapidly, which poses a challenge for researchers to keep themselves updated with new state-of-the-art. Scientific document summarization solves this problem by summarizing the essential fact and findings of the document. In the current paper, we present the participation of IITP-AI-NLP-ML team in three shared tasks, namely, CL-SciSumm 2020, LaySumm 2020, LongSumm 2020, which aims to generate medium, lay, and long summaries of the scientific articles, respectively. To solve CL-SciSumm 2020 and LongSumm 2020 tasks, three well-known clustering techniques are used, and then various sentence scoring functions, including textual entailment, are used to extract the sentences from each cluster for a summary generation. For LaySumm 2020, an encoder-decoder based deep learning model has been utilized. Performances of our developed systems are evaluated in terms of ROUGE measures on the associated datasets with the shared task.</abstract>
       <url hash="9e28cc83">2020.sdp-1.30</url>
diff --git a/data/xml/2020.semeval.xml b/data/xml/2020.semeval.xml
index 209b5dd0e9..04c87e72ea 100644
--- a/data/xml/2020.semeval.xml
+++ b/data/xml/2020.semeval.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2020-11-29" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Fourteenth Workshop on Semantic Evaluation</booktitle>
-      <editor><first>Aurelie</first><last>Herbelot</last></editor>
+      <editor id="aurelie-herbelot"><first>Aurelie</first><last>Herbelot</last></editor>
       <editor><first>Xiaodan</first><last>Zhu</last></editor>
       <editor><first>Alexis</first><last>Palmer</last></editor>
       <editor><first>Nathan</first><last>Schneider</last></editor>
@@ -37,7 +37,7 @@
       <author><first>Goran</first><last>Glavaš</last></author>
       <author><first>Ivan</first><last>Vulić</last></author>
       <author><first>Anna</first><last>Korhonen</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <pages>24–35</pages>
       <abstract>Lexical entailment (LE) is a fundamental asymmetric lexico-semantic relation, supporting the hierarchies in lexical resources (e.g., WordNet, ConceptNet) and applications like natural language inference and taxonomy induction. Multilingual and cross-lingual NLP applications warrant models for LE detection that go beyond language boundaries. As part of SemEval 2020, we carried out a shared task (Task 2) on multilingual and cross-lingual LE. The shared task spans three dimensions: (1) monolingual vs. cross-lingual LE, (2) binary vs. graded LE, and (3) a set of 6 diverse languages (and 15 corresponding language pairs). We offered two different evaluation tracks: (a) Dist: for unsupervised, fully distributional models that capture LE solely on the basis of unannotated corpora, and (b) Any: for externally informed models, allowed to leverage any resources, including lexico-semantic networks (e.g., WordNet or BabelNet). In the Any track, we recieved runs that push state-of-the-art across all languages and language pairs, for both binary LE detection and graded LE prediction.</abstract>
       <url hash="592bb6ad">2020.semeval-1.2</url>
@@ -109,7 +109,7 @@
       <author><first>Jens</first><last>Kaiser</last></author>
       <author><first>Dominik</first><last>Schlechtweg</last></author>
       <author><first>Sean</first><last>Papay</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>81–89</pages>
       <abstract>We present the results of our system for SemEval-2020 Task 1 that exploits a commonly used lexical semantic change detection model based on Skip-Gram with Negative Sampling. Our system focuses on Vector Initialization (VI) alignment, compares VI to the currently top-ranking models for Subtask 2 and demonstrates that these can be outperformed if we optimize VI dimensionality. We demonstrate that differences in performance can largely be attributed to model-specific sources of noise, and we reveal a strong relationship between dimensionality and frequency-induced noise in VI alignment. Our results suggest that lexical semantic change models integrating vector space alignment should pay more attention to the role of the dimensionality parameter.</abstract>
       <url hash="5be9b35c">2020.semeval-1.8</url>
@@ -204,7 +204,7 @@
     <paper id="17">
       <title><fixed-case>M</fixed-case>ineria<fixed-case>UNAM</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 3: Predicting Contextual <fixed-case>W</fixed-case>ord<fixed-case>S</fixed-case>imilarity Using a Centroid Based Approach and Word Embeddings</title>
       <author><first>Helena</first><last>Gomez-Adorno</last></author>
-      <author><first>Gemma</first><last>Bel-Enguix</last></author>
+      <author id="gemma-bel-enguix"><first>Gemma</first><last>Bel-Enguix</last></author>
       <author><first>Jorge</first><last>Reyes-Magaña</last></author>
       <author><first>Benjamín</first><last>Moreno</last></author>
       <author><first>Ramón</first><last>Casillas</last></author>
@@ -217,7 +217,7 @@
     </paper>
     <paper id="18">
       <title><fixed-case>MULTISEM</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 3: Fine-tuning <fixed-case>BERT</fixed-case> for Lexical Meaning</title>
-      <author><first>Aina</first><last>Garí Soler</last></author>
+      <author id="aina-gari-soler"><first>Aina</first><last>Garí Soler</last></author>
       <author><first>Marianna</first><last>Apidianaki</last></author>
       <pages>158–165</pages>
       <abstract>We present the MULTISEM systems submitted to SemEval 2020 Task 3: Graded Word Similarity in Context (GWSC). We experiment with injecting semantic knowledge into pre-trained BERT models through fine-tuning on lexical semantic tasks related to GWSC. We use existing semantically annotated datasets, and propose to approximate similarity through automatically generated lexical substitutes in context. We participate in both GWSC subtasks and address two languages, English and Finnish. Our best English models occupy the third and fourth positions in the ranking for the two subtasks. Performance is lower for the Finnish models which are mid-ranked in the respective subtasks, highlighting the important role of data availability for fine-tuning.</abstract>
@@ -236,7 +236,7 @@
     </paper>
     <paper id="20">
       <title><fixed-case>BOS</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 1: Word Sense Induction via Lexical Substitution for Lexical Semantic Change Detection</title>
-      <author><first>Nikolay</first><last>Arefyev</last></author>
+      <author id="nikolay-arefyev"><first>Nikolay</first><last>Arefyev</last></author>
       <author><first>Vasily</first><last>Zhikov</last></author>
       <pages>171–179</pages>
       <abstract>SemEval-2020 Task 1 is devoted to detection of changes in word meaning over time. The first subtask raises a question if a particular word has acquired or lost any of its senses during the given time period. The second subtask requires estimating the change in frequencies of the word senses. We have submitted two solutions for both subtasks. The first solution performs word sense induction (WSI) first, then makes the decision based on the induced word senses. We extend the existing WSI method based on clustering of lexical substitutes generated with neural language models and adapt it to the task. The second solution exploits a well-known approach to semantic change detection, that includes building word2vec SGNS vectors, aligning them with Orthogonal Procrustes and calculating cosine distance between resulting vectors. While WSI-based solution performs better in Subtask 1, which requires binary decisions, the second solution outperforms it in Subtask 2 and obtains the 3rd best result in this subtask.</abstract>
@@ -279,7 +279,7 @@
       <title><fixed-case>E</fixed-case>mb<fixed-case>L</fixed-case>ex<fixed-case>C</fixed-case>hange at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 1: Unsupervised Embedding-based Detection of Lexical Semantic Changes</title>
       <author><first>Ehsaneddin</first><last>Asgari</last></author>
       <author><first>Christoph</first><last>Ringlstetter</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>201–207</pages>
       <abstract>This paper describes EmbLexChange, a system introduced by the “Life-Language” team for SemEval-2020 Task 1, on unsupervised detection of lexical-semantic changes. EmbLexChange is defined as the divergence between the embedding based profiles of word w (calculated with respect to a set of reference words) in the source and the target domains (source and target domains can be simply two time frames t_1 and t_2). The underlying assumption is that the lexical-semantic change of word <tex-math>w</tex-math> would affect its co-occurring words and subsequently alters the neighborhoods in the embedding spaces. We show that using a resampling framework for the selection of reference words (with conserved senses), we can more reliably detect lexical-semantic changes in English, German, Swedish, and Latin. EmbLexChange achieved second place in the binary detection of semantic changes in the SemEval-2020.</abstract>
       <url hash="488a5150">2020.semeval-1.24</url>
@@ -513,7 +513,7 @@
     <paper id="44">
       <title><fixed-case>C</fixed-case>ardiff <fixed-case>U</fixed-case>niversity at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 6: Fine-tuning <fixed-case>BERT</fixed-case> for Domain-Specific Definition Classification</title>
       <author><first>Shelan</first><last>Jeawak</last></author>
-      <author><first>Luis</first><last>Espinosa-Anke</last></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa-Anke</last></author>
       <author><first>Steven</first><last>Schockaert</last></author>
       <pages>361–366</pages>
       <abstract>We describe the system submitted to SemEval-2020 Task 6, Subtask 1. The aim of this subtask is to predict whether a given sentence contains a definition or not. Unsurprisingly, we found that strong results can be achieved by fine-tuning a pre-trained BERT language model. In this paper, we analyze the performance of this strategy. Among others, we show that results can be improved by using a two-step fine-tuning process, in which the BERT model is first fine-tuned on the full training set, and then further specialized towards a target domain.</abstract>
@@ -526,7 +526,7 @@
       <author><first>Anandh</first><last>Konar</last></author>
       <author><first>Chenyang</first><last>Huang</last></author>
       <author><first>Amine</first><last>Trabelsi</last></author>
-      <author><first>Osmar</first><last>Zaiane</last></author>
+      <author id="osmar-r-zaiane"><first>Osmar</first><last>Zaiane</last></author>
       <pages>367–373</pages>
       <abstract>In this paper, we describe our mUlti-task learNIng for cOmmonsense reasoNing (UNION) system submitted for Task C of the SemEval2020 Task 4, which is to generate a reason explaining why a given false statement is non-sensical. However, we found in the early experiments that simple adaptations such as fine-tuning GPT2 often yield dull and non-informative generations (e.g. simple negations). In order to generate more meaningful explanations, we propose UNION, a unified end-to-end framework, to utilize several existing commonsense datasets so that it allows a model to learn more dynamics under the scope of commonsense reasoning. In order to perform model selection efficiently, accurately, and promptly, we also propose a couple of auxiliary automatic evaluation metrics so that we can extensively compare the models from different perspectives. Our submitted system not only results in a good performance in the proposed metrics but also outperforms its competitors with the highest achieved score of 2.10 for human evaluation while remaining a BLEU score of 15.7. Our code is made publicly available.</abstract>
       <url hash="32e36869">2020.semeval-1.45</url>
@@ -538,7 +538,7 @@
       <author><first>Josef</first><last>Jon</last></author>
       <author><first>Martin</first><last>Fajcik</last></author>
       <author><first>Martin</first><last>Docekal</last></author>
-      <author><first>Pavel</first><last>Smrz</last></author>
+      <author id="pavel-smrz"><first>Pavel</first><last>Smrz</last></author>
       <pages>374–390</pages>
       <abstract>We participated in all three subtasks. In subtasks A and B, our submissions are based on pretrained language representation models (namely ALBERT) and data augmentation. We experimented with solving the task for another language, Czech, by means of multilingual models and machine translated dataset, or translated model inputs. We show that with a strong machine translation system, our system can be used in another language with a small accuracy loss. In subtask C, our submission, which is based on pretrained sequence-to-sequence model (BART), ranked 1st in BLEU score ranking, however, we show that the correlation between BLEU and human evaluation, in which our submission ended up 4th, is low. We analyse the metrics used in the evaluation and we propose an additional score based on model from subtask B, which correlates well with our manual ranking, as well as reranking method based on the same principle. We performed an error and dataset analysis for all subtasks and we present our findings.</abstract>
       <url hash="ab673fcf">2020.semeval-1.46</url>
@@ -553,7 +553,7 @@
       <author><first>Kwong Sak</first><last>Leung</last></author>
       <author><first>Jia</first><last>Zhu</last></author>
       <author><first>Gabriel Pui Cheong</first><last>Fung</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <pages>391–400</pages>
       <abstract>This paper describes our system submitted to task 4 of SemEval 2020: Commonsense Validation and Explanation (ComVE) which consists of three sub-tasks. The task is to directly validate the given sentence whether or not to make sense and require the model to explain it. Based on BERT architecture with the multi-task setting, we propose an effective and interpretable “Explain, Reason and Predict” (ERP) system to solve the three sub-tasks about commonsense: (a) Validation, (b) Reasoning, and (c) Explanation. Inspired by cognitive studies of common sense, our system first generates a reason or understanding of the sentences and then choose which one statement makes sense, which is achieved by multi-task learning. During the post-evaluation, our system has reached 92.9% accuracy in subtask A (rank 11), 89.7% accuracy in subtask B (rank 9), and BLEU score of 12.9 in subtask C (rank 8).</abstract>
       <url hash="e82e4b8e">2020.semeval-1.47</url>
@@ -595,7 +595,7 @@
     <paper id="51">
       <title><fixed-case>SWAG</fixed-case>ex at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 4: Commonsense Explanation as Next Event Prediction</title>
       <author><first>Wiem</first><last>Ben Rim</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <pages>422–429</pages>
       <abstract>We describe the system submitted by the SWAGex team to the SemEval-2020 Commonsense Validation and Explanation Task. We use multiple methods on the pre-trained language model BERT (Devlin et al., 2018) for tasks that require the system to recognize sentences against commonsense and justify the reasoning behind this decision. Our best performing model is BERT trained on SWAG and fine-tuned for the task. We investigate the ability to transfer commonsense knowledge from SWAG to SemEval-2020 by training a model for the Explanation task with Next Event Prediction data</abstract>
       <url hash="17d6df57">2020.semeval-1.51</url>
@@ -607,7 +607,7 @@
       <author><first>Thanet</first><last>Markchom</last></author>
       <author><first>Bhuvana</first><last>Dhruva</last></author>
       <author><first>Chandresh</first><last>Pravin</last></author>
-      <author><first>Huizhi</first><last>Liang</last></author>
+      <author id="huizhi-liang"><first>Huizhi</first><last>Liang</last></author>
       <pages>430–436</pages>
       <abstract>SemEval Task 4 Commonsense Validation and Explanation Challenge is to validate whether a system can differentiate natural language statements that make sense from those that do not make sense. Two subtasks, A and B, are focused in this work, i.e., detecting against-common-sense statements and selecting explanations of why they are false from the given options. Intuitively, commonsense validation requires additional knowledge beyond the given statements. Therefore, we propose a system utilising pre-trained sentence transformer models based on BERT, RoBERTa and DistillBERT architectures to embed the statements before classification. According to the results, these embeddings can improve the performance of the typical MLP and LSTM classifiers as downstream models of both subtasks compared to regular tokenised statements. These embedded statements are shown to comprise additional information from external resources which help validate common sense in natural language.</abstract>
       <url hash="4da66650">2020.semeval-1.52</url>
@@ -619,7 +619,7 @@
       <author><first>Martin</first><last>Fajcik</last></author>
       <author><first>Josef</first><last>Jon</last></author>
       <author><first>Martin</first><last>Docekal</last></author>
-      <author><first>Pavel</first><last>Smrz</last></author>
+      <author id="pavel-smrz"><first>Pavel</first><last>Smrz</last></author>
       <pages>437–444</pages>
       <abstract>This paper describes BUT-FIT’s submission at SemEval-2020 Task 5: Modelling Causal Reasoning in Language: Detecting Counterfactuals. The challenge focused on detecting whether a given statement contains a counterfactual (Subtask 1) and extracting both antecedent and consequent parts of the counterfactual from the text (Subtask 2). We experimented with various state-of-the-art language representation models (LRMs). We found RoBERTa LRM to perform the best in both subtasks. We achieved the first place in both exact match and F1 for Subtask 2 and ranked second for Subtask 1.</abstract>
       <url hash="cf93293d">2020.semeval-1.53</url>
@@ -651,7 +651,7 @@
       <title><fixed-case>IITK</fixed-case>-<fixed-case>RSA</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 5: Detecting Counterfactuals</title>
       <author><first>Anirudh Anil</first><last>Ojha</last></author>
       <author><first>Rohin</first><last>Garg</last></author>
-      <author id="shashank-gupta"><first>Shashank</first><last>Gupta</last></author>
+      <author><first>Shashank</first><last>Gupta</last></author>
       <author><first>Ashutosh</first><last>Modi</last></author>
       <pages>458–467</pages>
       <abstract>This paper describes our efforts in tackling Task 5 of SemEval-2020. The task involved detecting a class of textual expressions known as counterfactuals and separating them into their constituent elements. Our final submitted approaches were an ensemble of various fine-tuned transformer-based and CNN-based models for the first subtask and a transformer model with dependency tree information for the second subtask. We ranked 4-th and 9-th in the overall leaderboard. We also explored various other approaches that involved classical methods, other neural architectures and incorporation of different linguistic features.</abstract>
@@ -685,7 +685,7 @@
     <paper id="59">
       <title>Gorynych Transformer at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 6: Multi-task Learning for Definition Extraction</title>
       <author><first>Adis</first><last>Davletov</last></author>
-      <author><first>Nikolay</first><last>Arefyev</last></author>
+      <author id="nikolay-arefyev"><first>Nikolay</first><last>Arefyev</last></author>
       <author><first>Alexander</first><last>Shatilov</last></author>
       <author><first>Denis</first><last>Gordeev</last></author>
       <author><first>Alexey</first><last>Rey</last></author>
@@ -702,7 +702,7 @@
       <author><first>Yang</first><last>Fan</last></author>
       <author><first>Peng</first><last>Jin</last></author>
       <author><first>Yuanchao</first><last>Liu</last></author>
-      <author><first>Bingquan</first><last>Liu</last></author>
+      <author id="bingquan-liu"><first>Bingquan</first><last>Liu</last></author>
       <pages>494–500</pages>
       <abstract>This paper describes our system that participated in the SemEval-2020 task 4: Commonsense Validation and Explanation. For this task, it is obvious that external knowledge, such as Knowledge graph, can help the model understand commonsense in natural language statements. But how to select the right triples for statements remains unsolved, so how to reduce the interference of irrelevant triples on model performance is a research focus. This paper adopt a modified K-BERT as the language encoder, to enhance language representation through triples from knowledge graphs. Experiments show that our method is better than models without external knowledge, and is slightly better than the original K-BERT. We got an accuracy score of 0.97 in subtaskA, ranking 1/45, and got an accuracy score of 0.948, ranking 2/35.</abstract>
       <url hash="c18d8eff">2020.semeval-1.60</url>
@@ -1075,8 +1075,8 @@
       <title><fixed-case>RGCL</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 6: Neural Approaches to <fixed-case>D</fixed-case>efinition<fixed-case>E</fixed-case>xtraction</title>
       <author><first>Tharindu</first><last>Ranasinghe</last></author>
       <author><first>Alistair</first><last>Plum</last></author>
-      <author><first>Constantin</first><last>Orasan</last></author>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orasan</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <pages>717–723</pages>
       <abstract>This paper presents the RGCL team submission to SemEval 2020 Task 6: DeftEval, subtasks 1 and 2. The system classifies definitions at the sentence and token levels. It utilises state-of-the-art neural network architectures, which have some task-specific adaptations, including an automatically extended training set. Overall, the approach achieves acceptable evaluation scores, while maintaining flexibility in architecture selection.</abstract>
       <url hash="dc203e7e">2020.semeval-1.94</url>
@@ -1140,7 +1140,7 @@
       <author><first>Amitava</first><last>Das</last></author>
       <author><first>Tanmoy</first><last>Chakraborty</last></author>
       <author><first>Viswanath</first><last>Pulabaigari</last></author>
-      <author><first>Björn</first><last>Gambäck</last></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gambäck</last></author>
       <pages>759–773</pages>
       <abstract>Information on social media comprises of various modalities such as textual, visual and audio. NLP and Computer Vision communities often leverage only one prominent modality in isolation to study social media. However, computational processing of Internet memes needs a hybrid approach. The growing ubiquity of Internet memes on social media platforms such as Facebook, Instagram, and Twitter further suggests that we can not ignore such multimodal content anymore. To the best of our knowledge, there is not much attention towards meme emotion analysis. The objective of this proposal is to bring the attention of the research community towards the automatic processing of Internet memes. The task Memotion analysis released approx 10K annotated memes- with human annotated labels namely sentiment(positive, negative, neutral), type of emotion(sarcastic,funny,offensive, motivation) and their corresponding intensity. The challenge consisted of three subtasks: sentiment (positive, negative, and neutral) analysis of memes,overall emotion (humor, sarcasm, offensive, and motivational) classification of memes, and classifying intensity of meme emotion. The best performances achieved were F1 (macro average) scores of 0.35, 0.51 and 0.32, respectively for each of the three subtasks.</abstract>
       <url hash="7edbdc81">2020.semeval-1.99</url>
@@ -1154,7 +1154,7 @@
       <author><first>Sudipta</first><last>Kar</last></author>
       <author><first>Suraj</first><last>Pandey</last></author>
       <author><first>Srinivas</first><last>PYKL</last></author>
-      <author><first>Björn</first><last>Gambäck</last></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gambäck</last></author>
       <author><first>Tanmoy</first><last>Chakraborty</last></author>
       <author><first>Thamar</first><last>Solorio</last></author>
       <author><first>Amitava</first><last>Das</last></author>
@@ -1209,7 +1209,7 @@
       <author><first>Nicolaj Filrup</first><last>Rasmussen</last></author>
       <author><first>Thai</first><last>Wang</last></author>
       <author><first>Marco</first><last>Placenti</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>824–832</pages>
       <abstract>This paper describes a system that aims at assessing humour intensity in edited news headlines as part of the 7th task of SemEval-2020 on “Humor, Emphasis and Sentiment”. Various factors need to be accounted for in order to assess the funniness of an edited headline. We propose an architecture that uses hand-crafted features, knowledge bases and a language model to understand humour, and combines them in a regression model. Our system outperforms two baselines. In general, automatic humour assessment remains a difficult task.</abstract>
       <url hash="8e995fbd">2020.semeval-1.104</url>
@@ -1232,7 +1232,7 @@
       <author><first>Martin</first><last>Docekal</last></author>
       <author><first>Martin</first><last>Fajcik</last></author>
       <author><first>Josef</first><last>Jon</last></author>
-      <author><first>Pavel</first><last>Smrz</last></author>
+      <author id="pavel-smrz"><first>Pavel</first><last>Smrz</last></author>
       <pages>843–851</pages>
       <abstract>This paper describes our system that was designed for Humor evaluation within the SemEval-2020 Task 7. The system is based on convolutional neural network architecture. We investigate the system on the official dataset, and we provide more insight to model itself to see how the learned inner features look.</abstract>
       <url hash="f0b875f0">2020.semeval-1.106</url>
@@ -1362,7 +1362,7 @@
       <title><fixed-case>FII</fixed-case>-<fixed-case>UAIC</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 9: Sentiment Analysis for Code-Mixed Social Media Text Using <fixed-case>CNN</fixed-case></title>
       <author><first>Lavinia</first><last>Aparaschivei</last></author>
       <author><first>Andrei</first><last>Palihovici</last></author>
-      <author><first>Daniela</first><last>Gîfu</last></author>
+      <author id="daniela-gifu"><first>Daniela</first><last>Gîfu</last></author>
       <pages>928–933</pages>
       <abstract>The “Sentiment Analysis for Code-Mixed Social Media Text” task at the SemEval 2020 competition focuses on sentiment analysis in code-mixed social media text , specifically, on the combination of English with Spanish (Spanglish) and Hindi (Hinglish). In this paper, we present a system able to classify tweets, from Spanish and English languages, into positive, negative and neutral. Firstly, we built a classifier able to provide corresponding sentiment labels. Besides the sentiment labels, we provide the language labels at the word level. Secondly, we generate a word-level representation, using Convolutional Neural Network (CNN) architecture. Our solution indicates promising results for the Sentimix Spanglish-English task (0.744), the team, Lavinia_Ap, occupied the 9th place. However, for the Sentimix Hindi-English task (0.324) the results have to be improved.</abstract>
       <url hash="5bc816a5">2020.semeval-1.118</url>
@@ -1416,7 +1416,7 @@
       <author><first>Jason</first><last>Angel</last></author>
       <author><first>Segun Taofeek</first><last>Aroyehun</last></author>
       <author><first>Antonio</first><last>Tamayo</last></author>
-      <author><first>Alexander</first><last>Gelbukh</last></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last></author>
       <pages>957–962</pages>
       <abstract>Code-switching is a phenomenon in which two or more languages are used in the same message. Nowadays, it is quite common to find messages with languages mixed in social media. This phenomenon presents a challenge for sentiment analysis. In this paper, we use a standard convolutional neural network model to predict the sentiment of tweets in a blend of Spanish and English languages. Our simple approach achieved a F1-score of 0:71 on test set on the competition. We analyze our best model capabilities and perform error analysis to expose important difficulties for classifying sentiment in a code-switching setting.</abstract>
       <url hash="de54a9f8">2020.semeval-1.123</url>
@@ -1439,7 +1439,7 @@
       <author><first>Priya</first><last>Rani</last></author>
       <author><first>Bharathi Raja</first><last>Chakravarthi</last></author>
       <author><first>Theodorus</first><last>Fransen</last></author>
-      <author><first>John P.</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></author>
       <pages>968–974</pages>
       <abstract>Code mixing is a common phenomena in multilingual societies where people switch from one language to another for various reasons. Recent advances in public communication over different social media sites have led to an increase in the frequency of code-mixed usage in written language. In this paper, we present the Generative Morphemes with Attention (GenMA) Model sentiment analysis system contributed to SemEval 2020 Task 9 SentiMix. The system aims to predict the sentiments of the given English-Hindi code-mixed tweets without using word-level language tags instead inferring this automatically using a morphological model. The system is based on a novel deep neural network (DNN) architecture, which has outperformed the baseline F1-score on the test data-set as well as the validation data-set. Our results can be found under the user name “koustava” on the “Sentimix Hindi English” page.</abstract>
       <url hash="caa55042">2020.semeval-1.125</url>
@@ -1484,7 +1484,7 @@
       <title><fixed-case>ECNU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 7: Assessing Humor in Edited News Headlines Using <fixed-case>B</fixed-case>i<fixed-case>LSTM</fixed-case> with Attention</title>
       <author><first>Tiantian</first><last>Zhang</last></author>
       <author><first>Zhixuan</first><last>Chen</last></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <pages>995–1000</pages>
       <abstract>In this paper we describe our system submitted to SemEval 2020 Task 7: “Assessing Humor in Edited News Headlines”. We participated in all subtasks, in which the main goal is to predict the mean funniness of the edited headline given the original and the edited headline. Our system involves two similar sub-networks, which generate vector representations for the original and edited headlines respectively. And then we do a subtract operation of the outputs from two sub-networks to predict the funniness of the edited headline.</abstract>
       <url hash="7cdd3cbb">2020.semeval-1.129</url>
@@ -1551,7 +1551,7 @@
       <author><first>Sofie</first><last>Labat</last></author>
       <author><first>Olha</first><last>Kaminska</last></author>
       <author><first>Els</first><last>Lefever</last></author>
-      <author><first>Veronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Veronique</first><last>Hoste</last></author>
       <pages>1033–1040</pages>
       <abstract>This paper presents two different systems for the SemEval shared task 7 on Assessing Humor in Edited News Headlines, sub-task 1, where the aim was to estimate the intensity of humor generated in edited headlines. Our first system is a feature-based machine learning system that combines different types of information (e.g. word embeddings, string similarity, part-of-speech tags, perplexity scores, named entity recognition) in a Nu Support Vector Regressor (NuSVR). The second system is a deep learning-based approach that uses the pre-trained language model RoBERTa to learn latent features in the news headlines that are useful to predict the funniness of each headline. The latter system was also our final submission to the competition and is ranked seventh among the 49 participating teams, with a root-mean-square error (RMSE) of 0.5253.</abstract>
       <url hash="b972a3bd">2020.semeval-1.135</url>
@@ -1601,7 +1601,7 @@
     </paper>
     <paper id="140">
       <title><fixed-case>UTFPR</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 7: Using Co-occurrence Frequencies to Capture Unexpectedness</title>
-      <author><first>Gustavo Henrique</first><last>Paetzold</last></author>
+      <author id="gustavo-paetzold"><first>Gustavo Henrique</first><last>Paetzold</last></author>
       <pages>1066–1070</pages>
       <abstract>We describe the UTFPR system for SemEval-2020’s Task 7: Assessing Humor in Edited News Headlines. Ours is a minimalist unsupervised system that uses word co-occurrence frequencies from large corpora to capture unexpectedness as a mean to capture funniness. Our system placed 22nd on the shared task’s Task 2. We found that our approach requires more text than we used to perform reliably, and that unexpectedness alone is not sufficient to gauge funniness for humorous content that targets a diverse target audience.</abstract>
       <url hash="b9487b83">2020.semeval-1.140</url>
@@ -1660,7 +1660,7 @@
       <author><first>Zhen</first><last>Li</last></author>
       <author><first>Yaojie</first><last>Zhang</last></author>
       <author><first>Bing</first><last>Xu</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <pages>1100–1105</pages>
       <abstract>Internet memes emotion recognition is focused by many researchers. In this paper, we adopt BERT and ResNet for evaluation of detecting the emotions of Internet memes. We focus on solving the problem of data imbalance and data contains noise. We use RandAugment to enhance the data of the picture, and use Training Signal Annealing (TSA) to solve the impact of the imbalance of the label. At the same time, a new loss function is designed to ensure that the model is not affected by input noise which will improve the robustness of the model. We participated in sub-task a and our model based on BERT obtains 34.58% macro F1 score, ranking 10/32.</abstract>
       <url hash="77f9a189">2020.semeval-1.145</url>
@@ -1728,7 +1728,7 @@
     <paper id="151">
       <title>Infotec + <fixed-case>C</fixed-case>entro<fixed-case>GEO</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 8: Deep Learning and Text Categorization approach for Memes classification</title>
       <author><first>Guillermo</first><last>Ruiz</last></author>
-      <author><first>Eric S.</first><last>Tellez</last></author>
+      <author id="eric-sadit-tellez"><first>Eric S.</first><last>Tellez</last></author>
       <author><first>Daniela</first><last>Moctezuma</last></author>
       <author><first>Sabino</first><last>Miranda-Jiménez</last></author>
       <author><first>Tania</first><last>Ramírez-delReal</last></author>
@@ -1798,7 +1798,7 @@
     <paper id="157">
       <title><fixed-case>SIS</fixed-case>@<fixed-case>IIITH</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 8: An Overview of Simple Text Classification Methods for Meme Analysis</title>
       <author><first>Sravani</first><last>Boinepelli</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <author><first>Vasudeva</first><last>Varma</last></author>
       <pages>1190–1194</pages>
       <abstract>Memes are steadily taking over the feeds of the public on social media. There is always the threat of malicious users on the internet posting offensive content, even through memes. Hence, the automatic detection of offensive images/memes is imperative along with detection of offensive text. However, this is a much more complex task as it involves both visual cues as well as language understanding and cultural/context knowledge. This paper describes our approach to the task of SemEval-2020 Task 8: Memotion Analysis. We chose to participate only in Task A which dealt with Sentiment Classification, which we formulated as a text classification problem. Through our experiments, we explored multiple training models to evaluate the performance of simple text classification algorithms on the raw text obtained after running OCR on meme images. Our submitted model achieved an accuracy of 72.69% and exceeded the existing baseline’s Macro F1 score by 8% on the official test dataset. Apart from describing our official submission, we shall elucidate how different classification models respond to this task.</abstract>
@@ -1821,7 +1821,7 @@
       <author><first>Zehao</first><last>Liu</last></author>
       <author><first>Emmanuel</first><last>Osei-Brefo</last></author>
       <author><first>Siyuan</first><last>Chen</last></author>
-      <author><first>Huizhi</first><last>Liang</last></author>
+      <author id="huizhi-liang"><first>Huizhi</first><last>Liang</last></author>
       <pages>1201–1207</pages>
       <abstract>Memes are widely used on social media. They usually contain multi-modal information such as images and texts, serving as valuable data sources to analyse opinions and sentiment orientations of online communities. The provided memes data often face an imbalanced data problem, that is, some classes or labelled sentiment categories significantly outnumber other classes. This often results in difficulty in applying machine learning techniques where balanced labelled input data are required. In this paper, a Gaussian Mixture Model sampling method is proposed to tackle the problem of class imbalance for the memes sentiment classification task. To utilise both text and image data, a multi-modal CNN-LSTM model is proposed to jointly learn latent features for positive, negative and neutral category predictions. The experiments show that the re-sampling model can slightly improve the accuracy on the trial data of sub-task A of Task 8. The multi-modal CNN-LSTM model can achieve macro F1 score 0.329 on the test set.</abstract>
       <url hash="13b03497">2020.semeval-1.159</url>
@@ -1853,7 +1853,7 @@
     </paper>
     <paper id="162">
       <title><fixed-case>BAKSA</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 9: Bolstering <fixed-case>CNN</fixed-case> with Self-Attention for Sentiment Analysis of Code Mixed Text</title>
-      <author><first>Ayush</first><last>Kumar</last></author>
+      <author id="ayush-kumar"><first>Ayush</first><last>Kumar</last></author>
       <author><first>Harsh</first><last>Agarwal</last></author>
       <author><first>Keshav</first><last>Bansal</last></author>
       <author><first>Ashutosh</first><last>Modi</last></author>
@@ -1922,7 +1922,7 @@
     <paper id="168">
       <title><fixed-case>IIT</fixed-case> <fixed-case>G</fixed-case>andhinagar at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 9: Code-Mixed Sentiment Classification Using Candidate Sentence Generation and Selection</title>
       <author><first>Vivek</first><last>Srivastava</last></author>
-      <author id="mayank-singh"><first>Mayank</first><last>Singh</last></author>
+      <author><first>Mayank</first><last>Singh</last></author>
       <pages>1259–1264</pages>
       <abstract>Code-mixing is the phenomenon of using multiple languages in the same utterance. It is a frequently used pattern of communication on social media sites such as Facebook, Twitter, etc. Sentiment analysis of the monolingual text is a well-studied task. Code-mixing adds to the challenge of analyzing the sentiment of the text on various platforms such as social media, online gaming, forums, product reviews, etc. We present a candidate sentence generation and selection based approach on top of the Bi-LSTM based neural classifier to classify the Hinglish code-mixed text into one of the three sentiment classes positive, negative, or neutral. The proposed candidate sentence generation and selection based approach show an improvement in the system performance as compared to the Bi-LSTM based neural classifier. We can extend the proposed method to solve other problems with code-mixing in the textual data, such as humor-detection, intent classification, etc.</abstract>
       <url hash="f31d59a2">2020.semeval-1.168</url>
@@ -1944,7 +1944,7 @@
       <title><fixed-case>IUST</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 9: Sentiment Analysis for Code-Mixed Social Media Text Using Deep Neural Networks and Linear Baselines</title>
       <author><first>Soroush</first><last>Javdan</last></author>
       <author><first>Taha</first><last>Shangipour ataei</last></author>
-      <author><first>Behrouz</first><last>Minaei-Bidgoli</last></author>
+      <author id="behrouz-minaei-bidgoli"><first>Behrouz</first><last>Minaei-Bidgoli</last></author>
       <pages>1270–1275</pages>
       <abstract>Sentiment Analysis is a well-studied field of Natural Language Processing. However, the rapid growth of social media and noisy content within them poses significant challenges in addressing this problem with well-established methods and tools. One of these challenges is code-mixing, which means using different languages to convey thoughts in social media texts. Our group, with the name of IUST(username: TAHA), participated at the SemEval-2020 shared task 9 on Sentiment Analysis for Code-Mixed Social Media Text, and we have attempted to develop a system to predict the sentiment of a given code-mixed tweet. We used different preprocessing techniques and proposed to use different methods that vary from NBSVM to more complicated deep neural network models. Our best performing method obtains an F1 score of 0.751 for the Spanish-English sub-task and 0.706 over the Hindi-English sub-task.</abstract>
       <url hash="94093d17">2020.semeval-1.170</url>
@@ -1954,7 +1954,7 @@
     <paper id="171">
       <title><fixed-case>JUNLP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 9: Sentiment Analysis of <fixed-case>H</fixed-case>indi-<fixed-case>E</fixed-case>nglish Code Mixed Data Using Grid Search Cross Validation</title>
       <author><first>Avishek</first><last>Garain</last></author>
-      <author><first>Sainik</first><last>Mahata</last></author>
+      <author id="sainik-mahata"><first>Sainik</first><last>Mahata</last></author>
       <author><first>Dipankar</first><last>Das</last></author>
       <pages>1276–1280</pages>
       <abstract>Code-mixing is a phenomenon which arises mainly in multilingual societies. Multilingual people, who are well versed in their native languages and also English speakers, tend to code-mix using English-based phonetic typing and the insertion of anglicisms in their main language. This linguistic phenomenon poses a great challenge to conventional NLP domains such as Sentiment Analysis, Machine Translation, and Text Summarization, to name a few. In this work, we focus on working out a plausible solution to the domain of Code-Mixed Sentiment Analysis. This work was done as participation in the SemEval-2020 Sentimix Task, where we focused on the sentiment analysis of English-Hindi code-mixed sentences. our username for the submission was “sainik.mahata” and team name was “JUNLP”. We used feature extraction algorithms in conjunction with traditional machine learning algorithms such as SVR and Grid Search in an attempt to solve the task. Our approach garnered an f1-score of 66.2% when tested using metrics prepared by the organizers of the task.</abstract>
@@ -1966,7 +1966,7 @@
       <title><fixed-case>LIMSI</fixed-case>_<fixed-case>UPV</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 9: Recurrent Convolutional Neural Network for Code-mixed Sentiment Analysis</title>
       <author><first>Somnath</first><last>Banerjee</last></author>
       <author><first>Sahar</first><last>Ghannay</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <author><first>Anne</first><last>Vilnat</last></author>
       <author><first>Paolo</first><last>Rosso</last></author>
       <pages>1281–1287</pages>
@@ -2001,7 +2001,7 @@
       <author><first>Subhra Jyoti</first><last>Baroi</last></author>
       <author><first>Nivedita</first><last>Singh</last></author>
       <author><first>Ringki</first><last>Das</last></author>
-      <author><first>Thoudam Doren</first><last>Singh</last></author>
+      <author id="thoudam-doren-singh"><first>Thoudam Doren</first><last>Singh</last></author>
       <pages>1298–1303</pages>
       <abstract>Sentiment Analysis refers to the process of interpreting what a sentence emotes and classifying them as positive, negative, or neutral. The widespread popularity of social media has led to the generation of a lot of text data and specifically, in the Indian social media scenario, the code-mixed Hinglish text i.e, the words of Hindi language, written in the Roman script along with other English words is a common sight. The ability to effectively understand the sentiments in these texts is much needed. This paper proposes a system titled NITS-Hinglish to effectively carry out the sentiment analysis of such code-mixed Hinglish text. The system has fared well with a final F-Score of 0.617 on the test data.</abstract>
       <url hash="bc14e35e">2020.semeval-1.175</url>
@@ -2132,7 +2132,7 @@
       <author><first>Alberto</first><last>Barrón-Cedeño</last></author>
       <author><first>Henning</first><last>Wachsmuth</last></author>
       <author><first>Rostislav</first><last>Petrov</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>1377–1414</pages>
       <abstract>We present the results and the main findings of SemEval-2020 Task 11 on Detection of Propaganda Techniques in News Articles. The task featured two subtasks. Subtask SI is about Span Identification: given a plain-text document, spot the specific text fragments containing propaganda. Subtask TC is about Technique Classification: given a specific text fragment, in the context of a full document, determine the propaganda technique it uses, choosing from an inventory of 14 possible propaganda techniques. The task attracted a large number of participants: 250 teams signed up to participate and 44 made a submission on the test set. In this paper, we present the task, analyze the results, and discuss the system submissions and the methods they used. For both subtasks, the best systems used pre-trained Transformers and ensembles.</abstract>
       <award>Best Task Honorable Mention</award>
@@ -2145,7 +2145,7 @@
       <author><first>Dawid</first><last>Jurkiewicz</last></author>
       <author><first>Łukasz</first><last>Borchmann</last></author>
       <author><first>Izabela</first><last>Kosmala</last></author>
-      <author><first>Filip</first><last>Graliński</last></author>
+      <author id="filip-gralinski"><first>Filip</first><last>Graliński</last></author>
       <pages>1415–1424</pages>
       <abstract>This paper presents the winning system for the propaganda Technique Classification (TC) task and the second-placed system for the propaganda Span Identification (SI) task. The purpose of TC task was to identify an applied propaganda technique given propaganda text fragment. The goal of SI task was to find specific text fragments which contain at least one propaganda technique. Both of the developed solutions used semi-supervised learning technique of self-training. Interestingly, although CRF is barely used with transformer-based language models, the SI task was approached with RoBERTa-CRF architecture. An ensemble of RoBERTa-based models was proposed for the TC task, with one of them making use of Span CLS layers we introduce in the present paper. In addition to describing the submitted systems, an impact of architectural decisions and training schemes is investigated along with remarks regarding training models of the same or better quality with lower computational budget. Finally, the results of error analysis are presented.</abstract>
       <award>Best Paper</award>
@@ -2156,12 +2156,12 @@
     <paper id="188">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 12: Multilingual Offensive Language Identification in Social Media (<fixed-case>O</fixed-case>ffens<fixed-case>E</fixed-case>val 2020)</title>
       <author><first>Marcos</first><last>Zampieri</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Sara</first><last>Rosenthal</last></author>
       <author><first>Pepa</first><last>Atanasova</last></author>
       <author><first>Georgi</first><last>Karadzhov</last></author>
       <author><first>Hamdy</first><last>Mubarak</last></author>
-      <author><first>Leon</first><last>Derczynski</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
       <author><first>Zeses</first><last>Pitenis</last></author>
       <author><first>Çağrı</first><last>Çöltekin</last></author>
       <pages>1425–1447</pages>
@@ -2202,7 +2202,7 @@
       <title>Aschern at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 11: It Takes Three to Tango: <fixed-case>R</fixed-case>o<fixed-case>BERT</fixed-case>a, <fixed-case>CRF</fixed-case>, and Transfer Learning</title>
       <author><first>Anton</first><last>Chernyavskiy</last></author>
       <author><first>Dmitry</first><last>Ilvovsky</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>1462–1468</pages>
       <abstract>We describe our system for SemEval-2020 Task 11 on Detection of Propaganda Techniques in News Articles. We developed ensemble models using RoBERTa-based neural architectures, additional CRF layers, transfer learning between the two subtasks, and advanced post-processing to handle the multi-label nature of the task, the consistency between nested spans, repetitions, and labels from similar spans in training. We achieved sizable improvements over baseline fine-tuned RoBERTa models, and the official evaluation ranked our system 3rd (almost tied with the 2nd) out of 36 teams on the span identification subtask with an F1 score of 0.491, and 2nd (almost tied with the 1st) out of 31 teams on the technique classification subtask with an F1 score of 0.62.</abstract>
       <url hash="a197e0fd">2020.semeval-1.191</url>
@@ -2300,7 +2300,7 @@
     <paper id="200">
       <title><fixed-case>B</fixed-case>ham<fixed-case>NLP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 12: An Ensemble of Different Word Embeddings and Emotion Transfer Learning for <fixed-case>A</fixed-case>rabic Offensive Language Identification in Social Media</title>
       <author><first>Abdullah I.</first><last>Alharbi</last></author>
-      <author><first>Mark</first><last>Lee</last></author>
+      <author id="mark-lee"><first>Mark</first><last>Lee</last></author>
       <pages>1532–1538</pages>
       <abstract>Social media platforms such as Twitter offer people an opportunity to publish short posts in which they can share their opinions and perspectives. While these applications can be valuable, they can also be exploited to promote negative opinions, insults, and hatred against a person, race, or group. These opinions can be spread to millions of people at the click of a mouse. As such, there is a need to develop mechanisms by which offensive language can be automatically detected in social media channels and managed in a timely manner. To help achieve this goal, SemEval 2020 offered a shared task (OffensEval 2020) that involved the detection of offensive text in Arabic. We propose an ensemble approach that combines different levels of word embedding models and transfers learning from other sources of emotion-related tasks. The proposed system ranked 9th out of the 52 entries within the Arabic Offensive language identification subtask.</abstract>
       <url hash="51b2d1d6">2020.semeval-1.200</url>
@@ -2364,7 +2364,7 @@
       <author><first>Marc</first><last>Pàmies</last></author>
       <author><first>Emily</first><last>Öhman</last></author>
       <author><first>Kaisla</first><last>Kajava</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>1569–1575</pages>
       <abstract>This paper presents the different models submitted by the LT@Helsinki team for the SemEval 2020 Shared Task 12. Our team participated in sub-tasks A and C; titled offensive language identification and offense target identification, respectively. In both cases we used the so-called Bidirectional Encoder Representation from Transformer (BERT), a model pre-trained by Google and fine-tuned by us on the OLID and SOLID datasets. The results show that offensive tweet classification is one of several language-based tasks where BERT can achieve state-of-the-art results.</abstract>
       <url hash="5a9d9033">2020.semeval-1.205</url>
@@ -2376,7 +2376,7 @@
       <author><first>Hwijeen</first><last>Ahn</last></author>
       <author><first>Jimin</first><last>Sun</last></author>
       <author><first>Chan Young</first><last>Park</last></author>
-      <author><first>Jungyun</first><last>Seo</last></author>
+      <author id="jungyun-seo"><first>Jungyun</first><last>Seo</last></author>
       <pages>1576–1586</pages>
       <abstract>This paper describes our approach to the task of identifying offensive languages in a multilingual setting. We investigate two data augmentation strategies: using additional semi-supervised labels with different thresholds and cross-lingual transfer with data selection. Leveraging the semi-supervised dataset resulted in performance improvements compared to the baseline trained solely with the manually-annotated dataset. We propose a new metric, Translation Embedding Distance, to measure the transferability of instances for cross-lingual data selection. We also introduce various preprocessing steps tailored for social media text along with methods to fine-tune the pre-trained multilingual BERT (mBERT) for offensive language identification. Our multilingual systems achieved competitive results in Greek, Danish, and Turkish at OffensEval 2020.</abstract>
       <url hash="2c6978ba">2020.semeval-1.206</url>
@@ -2396,7 +2396,7 @@
     <paper id="208">
       <title><fixed-case>NUIG</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 12: Pseudo Labelling for Offensive Content Classification</title>
       <author><first>Shardul</first><last>Suryawanshi</last></author>
-      <author><first>Mihael</first><last>Arcan</last></author>
+      <author id="mihael-arcan"><first>Mihael</first><last>Arcan</last></author>
       <author><first>Paul</first><last>Buitelaar</last></author>
       <pages>1598–1604</pages>
       <abstract>This work addresses the classification problem defined by sub-task A (English only) of the OffensEval 2020 challenge. We used a semi-supervised approach to classify given tweets into an offensive (OFF) or not-offensive (NOT) class. As the OffensEval 2020 dataset is loosely labelled with confidence scores given by unsupervised models, we used last year’s offensive language identification dataset (OLID) to label the OffensEval 2020 dataset. Our approach uses a pseudo-labelling method to annotate the current dataset. We trained four text classifiers on the OLID dataset and the classifier with the highest macro-averaged F1-score has been used to pseudo label the OffensEval 2020 dataset. The same model which performed best amongst four text classifiers on OLID dataset has been trained on the combined dataset of OLID and pseudo labelled OffensEval 2020. We evaluated the classifiers with precision, recall and macro-averaged F1-score as the primary evaluation metric on the OLID and OffensEval 2020 datasets. This work is licensed under a Creative Commons Attribution 4.0 International Licence. Licence details: <url>http://creativecommons.org/licenses/by/4.0/</url>.</abstract>
@@ -2429,8 +2429,8 @@
       <title><fixed-case>SINAI</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 12: Offensive Language Identification Exploring Transfer Learning Models</title>
       <author><first>Flor Miriam</first><last>Plaza del Arco</last></author>
       <author><first>M. Dolores</first><last>Molina González</last></author>
-      <author><first>Alfonso</first><last>Ureña-López</last></author>
-      <author><first>Maite</first><last>Martin</last></author>
+      <author id="l-alfonso-urena-lopez"><first>Alfonso</first><last>Ureña-López</last></author>
+      <author id="m-teresa-martin-valdivia"><first>Maite</first><last>Martin</last></author>
       <pages>1622–1627</pages>
       <abstract>This paper describes the participation of SINAI team at Task 12: OffensEval 2: Multilingual Offensive Language Identification in Social Media. In particular, the participation in Sub-task A in English which consists of identifying tweets as offensive or not offensive. We preprocess the dataset according to the language characteristics used on social media. Then, we select a small set from the training set provided by the organizers and fine-tune different Transformerbased models in order to test their effectiveness. Our team ranks 20th out of 85 participants in Subtask-A using the XLNet model.</abstract>
       <url hash="23019e51">2020.semeval-1.211</url>
@@ -2450,7 +2450,7 @@
       <title><fixed-case>UHH</fixed-case>-<fixed-case>LT</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 12: Fine-Tuning of Pre-Trained Transformer Networks for Offensive Language Detection</title>
       <author><first>Gregor</first><last>Wiedemann</last></author>
       <author><first>Seid Muhie</first><last>Yimam</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>1638–1644</pages>
       <abstract>Fine-tuning of pre-trained transformer networks such as BERT yield state-of-the-art results for text classification tasks. Typically, fine-tuning is performed on task-specific training datasets in a supervised manner. One can also fine-tune in unsupervised manner beforehand by further pre-training the masked language modeling (MLM) task. Hereby, in-domain data for unsupervised MLM resembling the actual classification target dataset allows for domain adaptation of the model. In this paper, we compare current pre-trained transformer networks with and without MLM fine-tuning on their performance for offensive language detection. Our MLM fine-tuned RoBERTa-based classifier officially ranks 1st in the SemEval 2020 Shared Task 12 for the English language. Further experiments with the ALBERT model even surpass this result.</abstract>
       <url hash="aeb6b4e3">2020.semeval-1.213</url>
@@ -2461,7 +2461,7 @@
       <title><fixed-case>EL</fixed-case>-<fixed-case>BERT</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 10: A Multi-Embedding Ensemble Based Approach for Emphasis Selection in Visual Media</title>
       <author><first>Chandresh</first><last>Kanani</last></author>
       <author><first>Sriparna</first><last>Saha</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>1645–1651</pages>
       <abstract>In visual media, text emphasis is the strengthening of words in a text to convey the intent of the author. Text emphasis in visual media is generally done by using different colors, backgrounds, or font for the text; it helps in conveying the actual meaning of the message to the readers. Emphasis selection is the task of choosing candidate words for emphasis, it helps in automatically designing posters and other media contents with written text. If we consider only the text and do not know the intent, then there can be multiple valid emphasis selections. We propose the use of ensembles for emphasis selection to improve over single emphasis selection models. We show that the use of multi-embedding helps in enhancing the results for base models. To show the efficacy of proposed approach we have also done a comparison of our results with state-of-the-art models.</abstract>
       <url hash="a4c4fdbc">2020.semeval-1.214</url>
@@ -2524,7 +2524,7 @@
       <author><first>Debanjan</first><last>Mahata</last></author>
       <author><first>Rakesh</first><last>Gosangi</last></author>
       <author><first>Haimin</first><last>Zhang</last></author>
-      <author><first>Rajiv Ratn</first><last>Shah</last></author>
+      <author id="rajiv-shah"><first>Rajiv Ratn</first><last>Shah</last></author>
       <pages>1678–1684</pages>
       <abstract>This paper presents our submission to the SemEval 2020 - Task 10 on emphasis selection in written text. We approach this emphasis selection problem as a sequence labeling task where we represent the underlying text with various contextual embedding models. We also employ label distribution learning to account for annotator disagreements. We experiment with the choice of model architectures, trainability of layers, and different contextual embeddings. Our best performing architecture is an ensemble of different models, which achieved an overall matching score of 0.783, placing us 15th out of 31 participating teams. Lastly, we analyze the results in terms of parts of speech tags, sentence lengths, and word ordering.</abstract>
       <url hash="e36c3dc5">2020.semeval-1.219</url>
@@ -2546,7 +2546,7 @@
       <title><fixed-case>T</fixed-case>ext<fixed-case>L</fixed-case>earner at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 10: A Contextualized Ranking System in Solving Emphasis Selection in Text</title>
       <author><first>Zhishen</first><last>Yang</last></author>
       <author><first>Lars</first><last>Wolfsteller</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <pages>1691–1697</pages>
       <abstract>This paper describes the emphasis selection system of the team TextLearner for SemEval 2020 Task 10: Emphasis Selection For Written Text in Visual Media. The system aims to learn the emphasis selection distribution using contextual representations extracted from pre-trained language models and a two-staged ranking model. The experimental results demonstrate the strong contextual representation power of the recent advanced transformer-based language model RoBERTa, which can be exploited using a simple but effective architecture on top.</abstract>
       <url hash="008b3d7b">2020.semeval-1.221</url>
@@ -2645,7 +2645,7 @@
       <author><first>Sopan</first><last>Khosla</last></author>
       <author><first>Rishabh</first><last>Joshi</last></author>
       <author><first>Ritam</first><last>Dutt</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <author><first>Yulia</first><last>Tsvetkov</last></author>
       <pages>1756–1763</pages>
       <abstract>In this paper we describe our submission for the task of Propaganda Span Identification in news articles. We introduce a BERT-BiLSTM based span-level propaganda classification model that identifies which token spans within the sentence are indicative of propaganda. The ”multi-granular” model incorporates linguistic knowledge at various levels of text granularity, including word, sentence and document level syntactic, semantic and pragmatic affect features, which significantly improve model performance, compared to its language-agnostic variant. To facilitate better representation learning, we also collect a corpus of 10k news articles, and use it for fine-tuning the model. The final model is a majority-voting ensemble which learns different propaganda class boundaries by leveraging different subsets of incorporated knowledge.</abstract>
@@ -2737,7 +2737,7 @@
       <title>Team <fixed-case>D</fixed-case>i<fixed-case>S</fixed-case>aster at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 11: Combining <fixed-case>BERT</fixed-case> and Hand-crafted Features for Identifying Propaganda Techniques in News</title>
       <author><first>Anders</first><last>Kaas</last></author>
       <author><first>Viktor Torp</first><last>Thomsen</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>1817–1822</pages>
       <abstract>The identification of communication techniques in news articles such as propaganda is important, as such techniques can influence the opinions of large numbers of people. Most work so far focused on the identification at the news article level. Recently, a new dataset and shared task has been proposed for the identification of propaganda techniques at the finer-grained span level. This paper describes our system submission to the subtask of technique classification (TC) for the SemEval 2020 shared task on detection of propaganda techniques in news articles. We propose a method of combining neural BERT representations with hand-crafted features via stacked generalization. Our model has the added advantage that it combines the power of contextual representations from BERT with simple span-based and article-based global features. We present an ablation study which shows that even though BERT representations are very powerful also for this task, BERT still benefits from being combined with carefully designed task-specific features.</abstract>
       <url hash="753aa8ba">2020.semeval-1.238</url>
@@ -2768,7 +2768,7 @@
     <paper id="241">
       <title><fixed-case>UAIC</fixed-case>1860 at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 11: Detection of Propaganda Techniques in News Articles</title>
       <author><first>Vlad</first><last>Ermurachi</last></author>
-      <author><first>Daniela</first><last>Gifu</last></author>
+      <author id="daniela-gifu"><first>Daniela</first><last>Gifu</last></author>
       <pages>1835–1840</pages>
       <abstract>The “Detection of Propaganda Techniques in News Articles” task at the SemEval 2020 competition focuses on detecting and classifying propaganda, pervasive in news article. In this paper, we present a system able to evaluate on sentence level, three traditional text representation techniques for these study goals, using: tf*idf, word and character n-grams. Firstly, we built a binary classifier able to provide corresponding propaganda labels, propaganda or non-propaganda. Secondly, we build a multilabel multiclass model to identify applied propaganda.</abstract>
       <url hash="38229a7c">2020.semeval-1.241</url>
@@ -2843,7 +2843,7 @@
       <title><fixed-case>A</fixed-case>lex<fixed-case>U</fixed-case>-<fixed-case>B</fixed-case>ack<fixed-case>T</fixed-case>ranslation-<fixed-case>TL</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 12: Improving Offensive Language Detection Using Data Augmentation and Transfer Learning</title>
       <author><first>Mai</first><last>Ibrahim</last></author>
       <author><first>Marwan</first><last>Torki</last></author>
-      <author><first>Nagwa</first><last>El-Makky</last></author>
+      <author id="nagwa-m-el-makky"><first>Nagwa</first><last>El-Makky</last></author>
       <pages>1881–1890</pages>
       <abstract>Social media platforms, online news commenting spaces, and many other public forums have become widely known for issues of abusive behavior such as cyber-bullying and personal attacks. In this paper, we use the annotated tweets of the Offensive Language Identification Dataset (OLID) to train three levels of deep learning classifiers to solve the three sub-tasks associated with the dataset. Sub-task A is to determine if the tweet is toxic or not. Then, for offensive tweets, sub-task B requires determining whether the toxicity is targeted. Finally, for sub-task C, we predict the target of the offense; i.e. a group, individual, or other entity. In our solution, we tackle the problem of class imbalance in the dataset by using back translation for data augmentation and utilizing the fine-tuned BERT model in an ensemble of deep learning classifiers. We used this solution to participate in the three English sub-tasks of SemEval-2020 task 12. The proposed solution achieved 0.91393, 0.6300, and 0.57607 macro F1-average in sub-tasks A, B, and C respectively. We achieved the 9th, 14th, and 22nd places for sub-tasks A, B and C respectively.</abstract>
       <url hash="e5e96dc0">2020.semeval-1.248</url>
@@ -2988,7 +2988,7 @@
       <title><fixed-case>IITP</fixed-case>-<fixed-case>AINLPML</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 12: Offensive Tweet Identification and Target Categorization in a Multitask Environment</title>
       <author><first>Soumitra</first><last>Ghosh</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>1983–1991</pages>
       <abstract>In this paper, we describe the participation of IITP-AINLPML team in the SemEval-2020 SharedTask 12 on Offensive Language Identification and Target Categorization in English Twitter data. Our proposed model learns to extract textual features using a BiGRU-based deep neural network supported by a Hierarchical Attention architecture to focus on the most relevant areas in the text. We leverage the effectiveness of multitask learning while building our models for sub-task A and B. We do necessary undersampling of the over-represented classes in the sub-tasks A and C.During training, we consider a threshold of 0.5 as the separation margin between the instances belonging to classes OFF and NOT in sub-task A and UNT and TIN in sub-task B. For sub-task C, the class corresponding to the maximum score among the given confidence scores of the classes(IND, GRP and OTH) is considered as the final label for an instance. Our proposed model obtains the macro F1-scores of 90.95%, 55.69% and 63.88% in sub-task A, B and C, respectively.</abstract>
       <url hash="900e1845">2020.semeval-1.261</url>
@@ -2998,7 +2998,7 @@
     <paper id="262">
       <title><fixed-case>INGEOTEC</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 12: Multilingual Classification of Offensive Text</title>
       <author><first>Sabino</first><last>Miranda-Jiménez</last></author>
-      <author><first>Eric S.</first><last>Tellez</last></author>
+      <author id="eric-sadit-tellez"><first>Eric S.</first><last>Tellez</last></author>
       <author><first>Mario</first><last>Graff</last></author>
       <author><first>Daniela</first><last>Moctezuma</last></author>
       <pages>1992–1997</pages>
@@ -3043,7 +3043,7 @@
     <paper id="266">
       <title><fixed-case>JCT</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 12: Offensive Language Detection in Tweets Using Preprocessing Methods, Character and Word N-grams</title>
       <author><first>Moshe</first><last>Uzan</last></author>
-      <author><first>Yaakov</first><last>HaCohen-Kerner</last></author>
+      <author id="yaakov-hacohen-kerner"><first>Yaakov</first><last>HaCohen-Kerner</last></author>
       <pages>2017–2022</pages>
       <abstract>In this paper, we describe our submissions to SemEval-2020 contest. We tackled subtask 12 - “Multilingual Offensive Language Identification in Social Media”. We developed different models for four languages: Arabic, Danish, Greek, and Turkish. We applied three supervised machine learning methods using various combinations of character and word n-gram features. In addition, we applied various combinations of basic preprocessing methods. Our best submission was a model we built for offensive language identification in Danish using Random Forest. This model was ranked at the 6 position out of 39 submissions. Our result is lower by only 0.0025 than the result of the team that won the 4 place using entirely non-neural methods. Our experiments indicate that char ngram features are more helpful than word ngram features. This phenomenon probably occurs because tweets are more characterized by characters than by words, tweets are short, and contain various special sequences of characters, e.g., hashtags, shortcuts, slang words, and typos.</abstract>
       <url hash="7c457f64">2020.semeval-1.266</url>
@@ -3129,7 +3129,7 @@
     <paper id="274">
       <title><fixed-case>LIIR</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 12: A Cross-Lingual Augmentation Approach for Multilingual Offensive Language Identification</title>
       <author><first>Erfan</first><last>Ghadery</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>2073–2079</pages>
       <abstract>This paper presents our system entitled ‘LIIR’ for SemEval-2020 Task 12 on Multilingual Offensive Language Identification in Social Media (OffensEval 2). We have participated in sub-task A for English, Danish, Greek, Arabic, and Turkish languages. We adapt and fine-tune the BERT and Multilingual Bert models made available by Google AI for English and non-English languages respectively. For the English language, we use a combination of two fine-tuned BERT models. For other languages we propose a cross-lingual augmentation approach in order to enrich training data and we use Multilingual BERT to obtain sentence representations.</abstract>
       <url hash="2541f3e5">2020.semeval-1.274</url>
@@ -3139,7 +3139,7 @@
     <paper id="275">
       <title><fixed-case>LISAC</fixed-case> <fixed-case>FSDM</fixed-case>-<fixed-case>USMBA</fixed-case> Team at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 12: Overcoming <fixed-case>A</fixed-case>ra<fixed-case>BERT</fixed-case>’s pretrain-finetune discrepancy for <fixed-case>A</fixed-case>rabic offensive language identification</title>
       <author><first>Hamza</first><last>Alami</last></author>
-      <author><first>Said</first><last>Ouatik El Alaoui</last></author>
+      <author id="said-ouatik-el-alaoui"><first>Said</first><last>Ouatik El Alaoui</last></author>
       <author><first>Abdessamad</first><last>Benlahbib</last></author>
       <author><first>Noureddine</first><last>En-nahnahi</last></author>
       <pages>2080–2085</pages>
@@ -3182,7 +3182,7 @@
     </paper>
     <paper id="279">
       <title><fixed-case>NTU</fixed-case>_<fixed-case>NLP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 12: Identifying Offensive Tweets Using Hierarchical Multi-Task Learning Approach</title>
-      <author><first>Po-Chun</first><last>Chen</last></author>
+      <author id="po-chun-chen"><first>Po-Chun</first><last>Chen</last></author>
       <author><first>Hen-Hsen</first><last>Huang</last></author>
       <author><first>Hsin-Hsi</first><last>Chen</last></author>
       <pages>2105–2110</pages>
@@ -3227,7 +3227,7 @@
       <author><first>Fatemah</first><last>Husain</last></author>
       <author><first>Jooyeon</first><last>Lee</last></author>
       <author><first>Sam</first><last>Henry</last></author>
-      <author><first>Ozlem</first><last>Uzuner</last></author>
+      <author id="ozlem-uzuner"><first>Ozlem</first><last>Uzuner</last></author>
       <pages>2133–2139</pages>
       <abstract>This paper describes SalamNET, an Arabic offensive language detection system that has been submitted to SemEval 2020 shared task 12: Multilingual Offensive Language Identification in Social Media. Our approach focuses on applying multiple deep learning models and conducting in depth error analysis of results to provide system implications for future development considerations. To pursue our goal, a Recurrent Neural Network (RNN), a Gated Recurrent Unit (GRU), and Long-Short Term Memory (LSTM) models with different design architectures have been developed and evaluated. The SalamNET, a Bi-directional Gated Recurrent Unit (Bi-GRU) based model, reports a macro-F1 score of 0.83%</abstract>
       <url hash="af3d59f6">2020.semeval-1.283</url>
@@ -3236,7 +3236,7 @@
     </paper>
     <paper id="284">
       <title>Smatgrisene at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 12: Offense Detection by <fixed-case>AI</fixed-case> - with a Pinch of Real <fixed-case>I</fixed-case></title>
-      <author><first>Peter Juel</first><last>Henrichsen</last></author>
+      <author id="peter-juel-henrichsen"><first>Peter Juel</first><last>Henrichsen</last></author>
       <author><first>Marianne</first><last>Rathje</last></author>
       <pages>2140–2145</pages>
       <abstract>This paper discusses how ML based classifiers can be enhanced disproportionately by adding small amounts of qualitative linguistic knowledge. As an example we present the Danish classifier Smatgrisene, our contribution to the recent OffensEval Challenge 2020. The classifier was trained on 3000 social media posts annotated for offensiveness, supplemented by rules extracted from the reference work on Danish offensive language (Rathje 2014b). Smatgrisene did surprisingly well in the competition in spite of its extremely simple design, showing an interesting trade-off between technological muscle and linguistic intelligence. Finally, we comment on the perspectives in combining qualitative and quantitative methods for NLP.</abstract>
@@ -3373,7 +3373,7 @@
     <paper id="297">
       <title><fixed-case>UTFPR</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val 2020 Task 12: Identifying Offensive Tweets with Lightweight Ensembles</title>
       <author><first>Marcos Aurélio Hermogenes</first><last>Boriola</last></author>
-      <author><first>Gustavo Henrique</first><last>Paetzold</last></author>
+      <author id="gustavo-paetzold"><first>Gustavo Henrique</first><last>Paetzold</last></author>
       <pages>2232–2236</pages>
       <abstract>Offensive language is a common issue on social media platforms nowadays. In an effort to address this issue, the SemEval 2020 event held the OffensEval 2020 shared task where the participants were challenged to develop systems that identify and classify offensive language in tweets. In this paper, we present a system that uses an Ensemble model stacking a BOW model and a CNN model that led us to place 29th in the ranking for English sub-task A.</abstract>
       <url hash="f0f42fb7">2020.semeval-1.297</url>
@@ -3394,7 +3394,7 @@
     <paper id="299">
       <title><fixed-case>XD</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 12: Ensemble Approach to Offensive Language Identification in Social Media Using Transformer Encoders</title>
       <author><first>Xiangjue</first><last>Dong</last></author>
-      <author><first>Jinho D.</first><last>Choi</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
       <pages>2244–2250</pages>
       <abstract>This paper presents six document classification models using the latest transformer encoders and a high-performing ensemble model for a task of offensive language identification in social media. For the individual models, deep transformer layers are applied to perform multi-head attentions. For the ensemble model, the utterance representations taken from those individual models are concatenated and fed into a linear decoder to make the final decisions. Our ensemble model outperforms the individual models and shows up to 8.6% improvement over the individual models on the development set. On the test set, it achieves macro-F1 of 90.9% and becomes one of the high performing systems among 85 participants in the sub-task A of this shared task. Our analysis shows that although the ensemble model significantly improves the accuracy on the development set, the improvement is not as evident on the test set.</abstract>
       <url hash="537aca91">2020.semeval-1.299</url>
diff --git a/data/xml/2020.sigdial.xml b/data/xml/2020.sigdial.xml
index 50fd9598c1..eb56e67f7c 100644
--- a/data/xml/2020.sigdial.xml
+++ b/data/xml/2020.sigdial.xml
@@ -6,7 +6,7 @@
       <editor><first>Olivier</first><last>Pietquin</last></editor>
       <editor><first>Smaranda</first><last>Muresan</last></editor>
       <editor><first>Vivian</first><last>Chen</last></editor>
-      <editor><first>Casey</first><last>Kennington</last></editor>
+      <editor id="casey-kennington"><first>Casey</first><last>Kennington</last></editor>
       <editor><first>David</first><last>Vandyke</last></editor>
       <editor><first>Nina</first><last>Dethlefs</last></editor>
       <editor><first>Koji</first><last>Inoue</last></editor>
@@ -26,7 +26,7 @@
     <paper id="1">
       <title>Semantic Guidance of Dialogue Generation with Reinforcement Learning</title>
       <author><first>Cheng-Hsun</first><last>Hsueh</last></author>
-      <author><first>Wei-Yun</first><last>Ma</last></author>
+      <author id="wei-yun-ma"><first>Wei-Yun</first><last>Ma</last></author>
       <pages>1–9</pages>
       <abstract>Neural encoder-decoder models have shown promising performance for human-computer dialogue systems over the past few years. However, due to the maximum-likelihood objective for the decoder, the generated responses are often universal and safe to the point that they lack meaningful information and are no longer relevant to the post. To address this, in this paper, we propose semantic guidance using reinforcement learning to ensure that the generated responses indeed include the given or predicted semantics and that these semantics do not appear repeatedly in the response. Synsets, which comprise sets of manually defined synonyms, are used as the form of assigned semantics. For a given/assigned/predicted synset, only one of its synonyms should appear in the generated response; this constitutes a simple but effective semantic-control mechanism. We conduct both quantitative and qualitative evaluations, which show that the generated responses are not only higher-quality but also reflect the assigned semantic controls.</abstract>
       <url hash="b4dfdde2">2020.sigdial-1.1</url>
@@ -38,7 +38,7 @@
       <title>Counseling-Style Reflection Generation Using Generative Pretrained Transformers with Augmented Context</title>
       <author><first>Siqi</first><last>Shen</last></author>
       <author><first>Charles</first><last>Welch</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <author><first>Verónica</first><last>Pérez-Rosas</last></author>
       <pages>10–20</pages>
       <abstract>We introduce a counseling dialogue system that seeks to assist counselors while they are learning and refining their counseling skills. The system generates counselors’reflections – i.e., responses that reflect back on what the client has said given the dialogue history. Our method builds upon the new generative pretrained transformer architecture and enhances it with context augmentation techniques inspired by traditional strategies used during counselor training. Through a set of comparative experiments, we show that the system that incorporates these strategies performs better in the reflection generation task than a system that is just fine-tuned with counseling conversations. To confirm our findings, we present a human evaluation study that shows that our system generates naturally-looking reflections that are also stylistically and grammatically correct.</abstract>
@@ -52,8 +52,8 @@
       <author><first>Lena</first><last>Reed</last></author>
       <author><first>Vrindavan</first><last>Harrison</last></author>
       <author><first>Shereen</first><last>Oraby</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <pages>21–34</pages>
       <abstract>Natural language generators (NLGs) for task-oriented dialogue typically take a meaning representation (MR) as input, and are trained end-to-end with a corpus of MR/utterance pairs, where the MRs cover a specific set of dialogue acts and domain attributes. Creation of such datasets is labor intensive and time consuming. Therefore, dialogue systems for new domain ontologies would benefit from using data for pre-existing ontologies. Here we explore, for the first time, whether it is possible to train an NLG for a new larger ontology using existing training sets for the restaurant domain, where each set is based on a different ontology. We create a new, larger combined ontology, and then train an NLG to produce utterances covering it. For example, if one dataset has attributes for family friendly and rating information, and the other has attributes for decor and service, our aim is an NLG for the combined ontology that can produce utterances that realize values for family friendly, rating, decor and service. Initial experiments with a baseline neural sequence-to-sequence model show that this task is surprisingly challenging. We then develop a novel self-training method that identifies (errorful) model outputs, automatically constructs a corrected MR input to form a new (MR, utterance) training pair, and then repeatedly adds these new instances back into the training data. We then test the resulting model on a new test set. The result is a self-trained model whose performance is an absolute 75.4% improvement over the baseline model. We also report a human qualitative evaluation of the final model showing that it achieves high naturalness, semantic coherence and grammaticality.</abstract>
       <url hash="8b8aaa7d">2020.sigdial-1.3</url>
@@ -69,7 +69,7 @@
       <author><first>Christian</first><last>Geishauser</last></author>
       <author><first>Hsien-Chin</first><last>Lin</last></author>
       <author><first>Marco</first><last>Moresi</last></author>
-      <author><first>Milica</first><last>Gasic</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gasic</last></author>
       <pages>35–44</pages>
       <abstract>Task-oriented dialog systems rely on dialog state tracking (DST) to monitor the user’s goal during the course of an interaction. Multi-domain and open-vocabulary settings complicate the task considerably and demand scalable solutions. In this paper we present a new approach to DST which makes use of various copy mechanisms to fill slots with values. Our model has no need to maintain a list of candidate values. Instead, all values are extracted from the dialog context on-the-fly. A slot is filled by one of three copy mechanisms: (1) Span prediction may extract values directly from the user input; (2) a value may be copied from a system inform memory that keeps track of the system’s inform operations (3) a value may be copied over from a different slot that is already contained in the dialog state to resolve coreferences within and across domains. Our approach combines the advantages of span-based slot filling methods with memory methods to avoid the use of value picklists altogether. We argue that our strategy simplifies the DST task while at the same time achieving state of the art performance on various popular evaluation sets including Multiwoz 2.1, where we achieve a joint goal accuracy beyond 55%.</abstract>
       <url hash="39b9202b">2020.sigdial-1.4</url>
@@ -135,8 +135,8 @@
       <author><first>Ye</first><last>Liu</last></author>
       <author><first>Tao</first><last>Yang</last></author>
       <author><first>Zeyu</first><last>You</last></author>
-      <author id="wei-fan"><first>Wei</first><last>Fan</last></author>
-      <author><first>Philip S.</first><last>Yu</last></author>
+      <author><first>Wei</first><last>Fan</last></author>
+      <author id="philip-s-yu"><first>Philip S.</first><last>Yu</last></author>
       <pages>61–73</pages>
       <abstract>Human tackle reading comprehension not only based on the given context itself but often rely on the commonsense beyond. To empower the machine with commonsense reasoning, in this paper, we propose a Commonsense Evidence Generation and Injection framework in reading comprehension, named CEGI. The framework injects two kinds of auxiliary commonsense evidence into comprehensive reading to equip the machine with the ability of rational thinking. Specifically, we build two evidence generators: one aims to generate textual evidence via a language model; the other aims to extract factual evidence (automatically aligned text-triples) from a commonsense knowledge graph after graph completion. Those evidences incorporate contextual commonsense and serve as the additional inputs to the reasoning model. Thereafter, we propose a deep contextual encoder to extract semantic relationships among the paragraph, question, option, and evidence. Finally, we employ a capsule network to extract different linguistic units (word and phrase) from the relations, and dynamically predict the optimal option based on the extracted units. Experiments on the CosmosQA dataset demonstrate that the proposed CEGI model outperforms the current state-of-the-art approaches and achieves the highest accuracy (83.6%) on the leaderboard.</abstract>
       <url hash="471ecfcb">2020.sigdial-1.9</url>
@@ -146,7 +146,7 @@
     <paper id="10">
       <title>Identifying Collaborative Conversations using Latent Discourse Behaviors</title>
       <author><first>Ayush</first><last>Jain</last></author>
-      <author><first>Maria Leonor</first><last>Pacheco</last></author>
+      <author id="maria-leonor-pacheco"><first>Maria Leonor</first><last>Pacheco</last></author>
       <author><first>Steven</first><last>Lancette</last></author>
       <author><first>Mahak</first><last>Goindani</last></author>
       <author><first>Dan</first><last>Goldwasser</last></author>
@@ -224,7 +224,7 @@
     <paper id="16">
       <title>A Spoken Dialogue System for Spatial Question Answering in a Physical Blocks World</title>
       <author><first>Georgiy</first><last>Platonov</last></author>
-      <author><first>Lenhart</first><last>Schubert</last></author>
+      <author id="lenhart-schubert"><first>Lenhart</first><last>Schubert</last></author>
       <author><first>Benjamin</first><last>Kane</last></author>
       <author><first>Aaron</first><last>Gindi</last></author>
       <pages>128–131</pages>
@@ -322,7 +322,7 @@
       <title>How Self-Attention Improves Rare Class Performance in a Question-Answering Dialogue Agent</title>
       <author><first>Adam</first><last>Stiff</last></author>
       <author><first>Qi</first><last>Song</last></author>
-      <author><first>Eric</first><last>Fosler-Lussier</last></author>
+      <author id="eric-fosler-lussier"><first>Eric</first><last>Fosler-Lussier</last></author>
       <pages>196–202</pages>
       <abstract>Contextualized language modeling using deep Transformer networks has been applied to a variety of natural language processing tasks with remarkable success. However, we find that these models are not a panacea for a question-answering dialogue agent corpus task, which has hundreds of classes in a long-tailed frequency distribution, with only thousands of data points. Instead, we find substantial improvements in recall and accuracy on rare classes from a simple one-layer RNN with multi-headed self-attention and static word embeddings as inputs. While much research has used attention weights to illustrate what input is important for a task, the complexities of our dialogue corpus offer a unique opportunity to examine how the model represents what it attends to, and we offer a detailed analysis of how that contributes to improved performance on rare classes. A particularly interesting phenomenon we observe is that the model picks up implicit meanings by splitting different aspects of the semantics of a single word across multiple attention heads.</abstract>
       <url hash="87ecb41a">2020.sigdial-1.24</url>
@@ -333,8 +333,8 @@
     <paper id="25">
       <title>Filtering conversations through dialogue acts labels for improving corpus-based convergence studies</title>
       <author><first>Simone</first><last>Fuscone</last></author>
-      <author><first>Benoit</first><last>Favre</last></author>
-      <author><first>Laurent</first><last>Prévot</last></author>
+      <author id="benoit-favre"><first>Benoit</first><last>Favre</last></author>
+      <author id="laurent-prevot"><first>Laurent</first><last>Prévot</last></author>
       <pages>203–208</pages>
       <abstract>Cognitive models of conversation and research on user-adaptation in dialogue systems involves a better understanding of speakers convergence in conversation. Convergence effects have been established on controlled data sets, for various acoustic and linguistic variables. Tracking interpersonal dynamics on generic corpora has provided positive but more contrasted outcomes. We propose here to enrich large conversational corpora with dialogue act (DA) information. We use DA-labels as filters in order to create data sub sets featuring homogeneous conversational activity. Those data sets allow a more precise comparison between speakers’ speech variables. Our experiences consist of comparing convergence on low level variables (Energy, Pitch, Speech Rate) measured on raw data sets, with human and automatically DA-labelled data sets. We found that such filtering does help in observing convergence suggesting that studies on interpersonal dynamics should consider such high level dialogue activity types and their related NLP topics as important ingredients of their toolboxes.</abstract>
       <url hash="e8155888">2020.sigdial-1.25</url>
@@ -345,7 +345,7 @@
     <paper id="26">
       <title>Nontrivial Lexical Convergence in a Geography-Themed Game</title>
       <author><first>Amanda</first><last>Bergqvist</last></author>
-      <author><first>Ramesh</first><last>Manuvinakurike</last></author>
+      <author id="ramesh-manuvinakurike"><first>Ramesh</first><last>Manuvinakurike</last></author>
       <author><first>Deepthi</first><last>Karkada</last></author>
       <author><first>Maike</first><last>Paetzel</last></author>
       <pages>209–214</pages>
@@ -359,7 +359,7 @@
       <author><first>Ramiro H.</first><last>Gálvez</last></author>
       <author><first>Lara</first><last>Gauder</last></author>
       <author><first>Jordi</first><last>Luque</last></author>
-      <author><first>Agustín</first><last>Gravano</last></author>
+      <author id="agustin-gravano"><first>Agustín</first><last>Gravano</last></author>
       <pages>215–224</pages>
       <abstract>Acoustic/prosodic (a/p) entrainment has been associated with multiple positive social aspects of human-human conversations. However, research on its effects is still preliminary, first because how to model it is far from standardized, and second because most of the reported findings rely on small corpora or on corpora collected in experimental setups. The present article has a twofold purpose: 1) it proposes a unifying statistical framework for modeling a/p entrainment, and 2) it tests on two large corpora of spontaneous telephone interactions whether three metrics derived from this framework predict positive social aspects of the conversations. The corpora differ in their spoken language, domain, and positive social outcome attached. To our knowledge, this is the first article studying relations between a/p entrainment and positive social outcomes in such large corpora of spontaneous dialog. Our results suggest that our metrics effectively predict, up to some extent, positive social aspects of conversations, which not only validates the methodology, but also provides further insights into the elusive topic of entrainment in human-human conversation.</abstract>
       <url hash="3a5bd4d0">2020.sigdial-1.27</url>
@@ -381,7 +381,7 @@
     <paper id="29">
       <title>Towards Unified Dialogue System Evaluation: A Comprehensive Analysis of Current Evaluation Protocols</title>
       <author><first>Sarah E.</first><last>Finch</last></author>
-      <author><first>Jinho D.</first><last>Choi</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
       <pages>236–245</pages>
       <abstract>As conversational AI-based dialogue management has increasingly become a trending topic, the need for a standardized and reliable evaluation procedure grows even more pressing. The current state of affairs suggests various evaluation protocols to assess chat-oriented dialogue management systems, rendering it difficult to conduct fair comparative studies across different approaches and gain an insightful understanding of their values. To foster this research, a more robust evaluation protocol must be set in place. This paper presents a comprehensive synthesis of both automated and human evaluation methods on dialogue systems, identifying their shortcomings while accumulating evidence towards the most effective evaluation dimensions. A total of 20 papers from the last two years are surveyed to analyze three types of evaluation protocols: automated, static, and interactive. Finally, the evaluation dimensions used in these papers are compared against our expert evaluation on the system-user dialogue data collected from the Alexa Prize 2020.</abstract>
       <url hash="50d5438a">2020.sigdial-1.29</url>
@@ -412,7 +412,7 @@
       <author><first>Yansen</first><last>Wang</last></author>
       <author><first>R. Charles</first><last>Murray</last></author>
       <author><first>Haogang</first><last>Bao</last></author>
-      <author><first>Carolyn</first><last>Rose</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rose</last></author>
       <pages>257–260</pages>
       <abstract>For the past 15 years, in computer-supported collaborative learning applications, conversational agents have been used to structure group interactions in online chat-based environments. A series of experimental studies has provided an empirical foundation for the design of chat-based conversational agents that significantly improve learning over no-support control conditions and static-support control conditions. In this demo, we expand upon this foundation, bringing conversational agents to structure group interaction into physical spaces, with the specific goal of facilitating collaboration and learning in workplace scenarios.</abstract>
       <url hash="8d12f903">2020.sigdial-1.31</url>
@@ -423,7 +423,7 @@
     <paper id="32">
       <title>Emora <fixed-case>STDM</fixed-case>: A Versatile Framework for Innovative Dialogue System Development</title>
       <author><first>James D.</first><last>Finch</last></author>
-      <author><first>Jinho D.</first><last>Choi</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
       <pages>261–264</pages>
       <abstract>This demo paper presents Emora STDM (State Transition Dialogue Manager), a dialogue system development framework that provides novel workflows for rapid prototyping of chat-based dialogue managers as well as collaborative development of complex interactions. Our framework caters to a wide range of expertise levels by supporting interoperability between two popular approaches, state machine and information state, to dialogue management. Our Natural Language Expression package allows seamless integration of pattern matching, custom NLP modules, and database querying, that makes the workflows much more efficient. As a user study, we adopt this framework to an interdisciplinary undergraduate course where students with both technical and non-technical backgrounds are able to develop creative dialogue managers in a short period of time.</abstract>
       <url hash="561004f9">2020.sigdial-1.32</url>
@@ -457,8 +457,8 @@
       <author><first>Mihail</first><last>Eric</last></author>
       <author><first>Karthik</first><last>Gopalakrishnan</last></author>
       <author><first>Behnam</first><last>Hedayatnia</last></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></author>
       <pages>278–289</pages>
       <abstract>Most prior work on task-oriented dialogue systems are restricted to a limited coverage of domain APIs, while users oftentimes have domain related requests that are not covered by the APIs. In this paper, we propose to expand coverage of task-oriented dialogue systems by incorporating external unstructured knowledge sources. We define three sub-tasks: knowledge-seeking turn detection, knowledge selection, and knowledge-grounded response generation, which can be modeled individually or jointly. We introduce an augmented version of MultiWOZ 2.1, which includes new out-of-API-coverage turns and responses grounded on external knowledge sources. We present baselines for each sub-task using both conventional and neural approaches. Our experimental results demonstrate the need for further research in this direction to enable more informative conversational systems.</abstract>
       <url hash="2330848e">2020.sigdial-1.35</url>
@@ -540,7 +540,7 @@
       <author><first>Aishan</first><last>Liu</last></author>
       <author><first>Sweekar</first><last>Sudhakara</last></author>
       <author><first>Alan</first><last>Wagner</last></author>
-      <author><first>Rebecca</first><last>Passonneau</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca</first><last>Passonneau</last></author>
       <pages>339–351</pages>
       <abstract>This paper presents MDP policy learning for agents to learn strategic behavior–how to play board games–during multimodal dialogues. Policies are trained offline in simulation, with dialogues carried out in a formal language. The agent has a temporary belief state for the dialogue, and a persistent knowledge store represented as an extensive-form game tree. How well the agent learns a new game from a dialogue with a simulated partner is evaluated by how well it plays the game, given its dialogue-final knowledge state. During policy training, we control for the simulated dialogue partner’s level of informativeness in responding to questions. The agent learns best when its trained policy matches the current dialogue partner’s informativeness. We also present a novel data collection for training natural language modules. Human subjects who engaged in dialogues with a baseline system rated the system’s language skills as above average. Further, results confirm that human dialogue partners also vary in their informativeness.</abstract>
       <url hash="54f8364b">2020.sigdial-1.41</url>
diff --git a/data/xml/2020.sigmorphon.xml b/data/xml/2020.sigmorphon.xml
index f7ec43e3f7..7c40572462 100644
--- a/data/xml/2020.sigmorphon.xml
+++ b/data/xml/2020.sigmorphon.xml
@@ -22,14 +22,14 @@
       <author><first>Ekaterina</first><last>Vylomova</last></author>
       <author><first>Jennifer</first><last>White</last></author>
       <author><first>Elizabeth</first><last>Salesky</last></author>
-      <author><first>Sabrina J.</first><last>Mielke</last></author>
+      <author id="sabrina-j-mielke"><first>Sabrina J.</first><last>Mielke</last></author>
       <author><first>Shijie</first><last>Wu</last></author>
       <author><first>Edoardo Maria</first><last>Ponti</last></author>
       <author><first>Rowan Hall</first><last>Maudslay</last></author>
       <author><first>Ran</first><last>Zmigrod</last></author>
       <author><first>Josef</first><last>Valvoda</last></author>
       <author><first>Svetlana</first><last>Toldova</last></author>
-      <author><first>Francis</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last></author>
       <author><first>Elena</first><last>Klyachko</last></author>
       <author><first>Ilya</first><last>Yegorov</last></author>
       <author><first>Natalia</first><last>Krizhanovsky</last></author>
@@ -45,7 +45,7 @@
       <author><first>Hilaria</first><last>Cruz</last></author>
       <author><first>Eleanor</first><last>Chodroff</last></author>
       <author><first>Ryan</first><last>Cotterell</last></author>
-      <author><first>Miikka</first><last>Silfverberg</last></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last></author>
       <author><first>Mans</first><last>Hulden</last></author>
       <pages>1–39</pages>
       <abstract>A broad goal in natural language processing (NLP) is to develop a system that has the capacity to process any natural language. Most systems, however, are developed using data from just one language such as English. The SIGMORPHON 2020 shared task on morphological reinflection aims to investigate systems’ ability to generalize across typologically distinct languages, many of which are low resource. Systems were developed using data from 45 languages and just 5 language families, fine-tuned with data from an additional 45 languages and 10 language families (13 in total), and evaluated on all 90 languages. A total of 22 systems (19 neural) from 10 teams were submitted to the task. All four winning systems were neural (two monolingual transformers and two massively multilingual RNN-based models with gated attention). Most teams demonstrate utility of data hallucination and augmentation, ensembles, and multilingual training for low-resource languages. Non-neural learners and manually designed grammars showed competitive and even superior performance on some languages (such as Ingrian, Tajik, Tagalog, Zarma, Lingala), especially with very limited data. Some language families (Afro-Asiatic, Niger-Congo, Turkic) were relatively easy for most systems and achieved over 90% mean accuracy while others were more challenging.</abstract>
@@ -59,7 +59,7 @@
       <author><first>Kyle</first><last>Gorman</last></author>
       <author><first>Lucas F.E.</first><last>Ashby</last></author>
       <author><first>Aaron</first><last>Goyzueta</last></author>
-      <author><first>Arya</first><last>McCarthy</last></author>
+      <author id="arya-d-mccarthy"><first>Arya</first><last>McCarthy</last></author>
       <author><first>Shijie</first><last>Wu</last></author>
       <author><first>Daniel</first><last>You</last></author>
       <pages>40–50</pages>
@@ -71,8 +71,8 @@
     </paper>
     <paper id="3">
       <title>The <fixed-case>SIGMORPHON</fixed-case> 2020 Shared Task on Unsupervised Morphological Paradigm Completion</title>
-      <author><first>Katharina</first><last>Kann</last></author>
-      <author><first>Arya D.</first><last>McCarthy</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
+      <author id="arya-d-mccarthy"><first>Arya D.</first><last>McCarthy</last></author>
       <author><first>Garrett</first><last>Nicolai</last></author>
       <author><first>Mans</first><last>Hulden</last></author>
       <pages>51–62</pages>
@@ -85,7 +85,7 @@
     <paper id="4">
       <title>One-Size-Fits-All Multilingual Models</title>
       <author><first>Ben</first><last>Peters</last></author>
-      <author><first>André F. T.</first><last>Martins</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
       <pages>63–69</pages>
       <abstract>This paper presents DeepSPIN’s submissions to Tasks 0 and 1 of the SIGMORPHON 2020 Shared Task. For both tasks, we present multilingual models, training jointly on data in all languages. We perform no language-specific hyperparameter tuning – each of our submissions uses the same model for all languages. Our basic architecture is the sparse sequence-to-sequence model with entmax attention and loss, which allows our models to learn sparse, local alignments while still being trainable with gradient-based techniques. For Task 1, we achieve strong performance with both RNN- and transformer-based sparse models. For Task 0, we extend our RNN-based model to a multi-encoder set-up in which separate modules encode the lemma and inflection sequences. Despite our models’ lack of language-specific tuning, they tie for first in Task 0 and place third in Task 1.</abstract>
       <url hash="c577bb7e">2020.sigmorphon-1.4</url>
@@ -126,7 +126,7 @@
     <paper id="8">
       <title>The <fixed-case>NYU</fixed-case>-<fixed-case>CUB</fixed-case>oulder Systems for <fixed-case>SIGMORPHON</fixed-case> 2020 Task 0 and Task 2</title>
       <author><first>Assaf</first><last>Singer</last></author>
-      <author><first>Katharina</first><last>Kann</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
       <pages>90–98</pages>
       <abstract>We describe the NYU-CUBoulder systems for the SIGMORPHON 2020 Task 0 on typologically diverse morphological inflection and Task 2 on unsupervised morphological paradigm completion. The former consists of generating morphological inflections from a lemma and a set of morphosyntactic features describing the target form. The latter requires generating entire paradigms for a set of given lemmas from raw text alone. We model morphological inflection as a sequence-to-sequence problem, where the input is the sequence of the lemma’s characters with morphological tags, and the output is the sequence of the inflected form’s characters. First, we apply a transformer model to the task. Second, as inflected forms share most characters with the lemma, we further propose a pointer-generator transformer model to allow easy copying of input characters.</abstract>
       <url hash="d60711c9">2020.sigmorphon-1.8</url>
@@ -136,7 +136,7 @@
     <paper id="9">
       <title>The <fixed-case>IMS</fixed-case>–<fixed-case>CUB</fixed-case>oulder System for the <fixed-case>SIGMORPHON</fixed-case> 2020 Shared Task on Unsupervised Morphological Paradigm Completion</title>
       <author><first>Manuel</first><last>Mager</last></author>
-      <author><first>Katharina</first><last>Kann</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
       <pages>99–105</pages>
       <abstract>In this paper, we present the systems of the University of Stuttgart IMS and the University of Colorado Boulder (IMS–CUBoulder) for SIGMORPHON 2020 Task 2 on unsupervised morphological paradigm completion (Kann et al., 2020). The task consists of generating the morphological paradigms of a set of lemmas, given only the lemmas themselves and unlabeled text. Our proposed system is a modified version of the baseline introduced together with the task. In particular, we experiment with substituting the inflection generation component with an LSTM sequence-to-sequence model and an LSTM pointer-generator network. Our pointer-generator system obtains the best score of all seven submitted systems on average over all languages, and outperforms the official baseline, which was best overall, on Bulgarian and Kannada.</abstract>
       <url hash="4704eca8">2020.sigmorphon-1.9</url>
@@ -156,7 +156,7 @@
     <paper id="11">
       <title><fixed-case>KU</fixed-case>-<fixed-case>CST</fixed-case> at the <fixed-case>SIGMORPHON</fixed-case> 2020 Task 2 on Unsupervised Morphological Paradigm Completion</title>
       <author><first>Manex</first><last>Agirrezabal</last></author>
-      <author><first>Jürgen</first><last>Wedekind</last></author>
+      <author id="jurgen-wedekind"><first>Jürgen</first><last>Wedekind</last></author>
       <pages>111–116</pages>
       <abstract>We present a model for the unsupervised dis- covery of morphological paradigms. The goal of this model is to induce morphological paradigms from the bible (raw text) and a list of lemmas. We have created a model that splits each lemma in a stem and a suffix, and then we try to create a plausible suffix list by con- sidering lemma pairs. Our model was not able to outperform the official baseline, and there is still room for improvement, but we believe that the ideas presented here are worth considering.</abstract>
       <url hash="53f2349d">2020.sigmorphon-1.11</url>
@@ -179,7 +179,7 @@
     <paper id="13">
       <title>Frustratingly Easy Multilingual Grapheme-to-Phoneme Conversion</title>
       <author><first>Nikhil</first><last>Prabhu</last></author>
-      <author><first>Katharina</first><last>Kann</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
       <pages>123–127</pages>
       <abstract>In this paper, we describe two CU-Boulder submissions to the SIGMORPHON 2020 Task 1 on multilingual grapheme-to-phoneme conversion (G2P). Inspired by the high performance of a standard transformer model (Vaswani et al., 2017) on the task, we improve over this approach by adding two modifications: (i) Instead of training exclusively on G2P, we additionally create examples for the opposite direction, phoneme-to-grapheme conversion (P2G). We then perform multi-task training on both tasks. (ii) We produce ensembles of our models via majority voting. Our approaches, though being conceptually simple, result in systems that place 6th and 8th amongst 23 submitted systems, and obtain the best results out of all systems on Lithuanian and Modern Greek, respectively.</abstract>
       <url hash="b67caf54">2020.sigmorphon-1.13</url>
@@ -216,7 +216,7 @@
       <title>One Model to Pronounce Them All: Multilingual Grapheme-to-Phoneme Conversion With a Transformer Ensemble</title>
       <author><first>Kaili</first><last>Vesik</last></author>
       <author><first>Muhammad</first><last>Abdul-Mageed</last></author>
-      <author><first>Miikka</first><last>Silfverberg</last></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last></author>
       <pages>146–152</pages>
       <abstract>The task of grapheme-to-phoneme (G2P) conversion is important for both speech recognition and synthesis. Similar to other speech and language processing tasks, in a scenario where only small-sized training data are available, learning G2P models is challenging. We describe a simple approach of exploiting model ensembles, based on multilingual Transformers and self-training, to develop a highly effective G2P solution for 15 languages. Our models are developed as part of our participation in the SIGMORPHON 2020 Shared Task 1 focused at G2P. Our best models achieve 14.99 word error rate (WER) and 3.30 phoneme error rate (PER), a sizeable improvement over the shared task competitive baselines.</abstract>
       <url hash="5e9e06c6">2020.sigmorphon-1.16</url>
diff --git a/data/xml/2020.signlang.xml b/data/xml/2020.signlang.xml
index 9683a124da..5d08b6815c 100644
--- a/data/xml/2020.signlang.xml
+++ b/data/xml/2020.signlang.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the LREC2020 9th Workshop on the Representation and Processing of Sign Languages: Sign Language Resources in the Service of the Language Community, Technological Challenges and Application Perspectives</booktitle>
       <editor><first>Eleni</first><last>Efthimiou</last></editor>
-      <editor><first>Stavroula-Evita</first><last>Fotinea</last></editor>
+      <editor id="stavroula-evita-fotinea"><first>Stavroula-Evita</first><last>Fotinea</last></editor>
       <editor><first>Thomas</first><last>Hanke</last></editor>
       <editor><first>Julie A.</first><last>Hochgesang</last></editor>
       <editor><first>Jette</first><last>Kristoffersen</last></editor>
@@ -35,7 +35,7 @@
       <title>Improving and Extending Continuous Sign Language Recognition: Taking Iconicity and Spatial Language into account</title>
       <author><first>Valentin</first><last>Belissen</last></author>
       <author><first>Michèle</first><last>Gouiffès</last></author>
-      <author><first>Annelies</first><last>Braffort</last></author>
+      <author id="annelies-braffort"><first>Annelies</first><last>Braffort</last></author>
       <pages>7–12</pages>
       <abstract>In a lot of recent research, attention has been drawn to recognizing sequences of lexical signs in continuous Sign Language corpora, often artificial. However, as SLs are structured through the use of space and iconicity, focusing on lexicon only prevents the field of Continuous Sign Language Recognition (CSLR) from extending to Sign Language Understanding and Translation. In this article, we propose a new formulation of the CSLR problem and discuss the possibility of recognizing higher-level linguistic structures in SL videos, like classifier constructions. These structures show much more variability than lexical signs, and are fundamentally different than them in the sense that form and meaning can not be disentangled. Building on the recently published French Sign Language corpus Dicta-Sign-LSF-v2, we discuss the performance and relevance of a simple recurrent neural network trained to recognize illustrative structures.</abstract>
       <url hash="371ec968">2020.signlang-1.2</url>
@@ -80,7 +80,7 @@
       <author><first>Pedro</first><last>Cabral</last></author>
       <author><first>Matilde</first><last>Gonçalves</last></author>
       <author><first>Hugo</first><last>Nicolau</last></author>
-      <author><first>Luísa</first><last>Coheur</last></author>
+      <author id="luisa-coheur"><first>Luísa</first><last>Coheur</last></author>
       <author><first>Ruben</first><last>Santos</last></author>
       <pages>33–38</pages>
       <abstract>Software for the production of sign languages is much less common than for spoken languages. Such software usually relies on 3D humanoid avatars to produce signs which, inevitably, necessitates the use of animation. One barrier to the use of popular animation tools is their complexity and steep learning curve, which can be hard to master for inexperienced users. Here, we present PE2LGP, an authoring system that features a 3D avatar that signs Portuguese Sign Language. Our Animator is designed specifically to craft sign language animations using a key frame method, and is meant to be easy to use and learn to users without animation skills. We conducted a preliminary evaluation of the Animator, where we animated seven Portuguese Sign Language sentences and asked four sign language users to evaluate their quality. This evaluation revealed that the system, in spite of its simplicity, is indeed capable of producing comprehensible messages.</abstract>
@@ -103,14 +103,14 @@
     </paper>
     <paper id="8">
       <title><fixed-case>LSE</fixed-case>_<fixed-case>UVIGO</fixed-case>: A Multi-source Database for <fixed-case>S</fixed-case>panish <fixed-case>S</fixed-case>ign <fixed-case>L</fixed-case>anguage Recognition</title>
-      <author><first>Laura</first><last>Docío-Fernández</last></author>
+      <author id="laura-docio-fernandez"><first>Laura</first><last>Docío-Fernández</last></author>
       <author><first>José Luis</first><last>Alba-Castro</last></author>
       <author><first>Soledad</first><last>Torres-Guijarro</last></author>
       <author><first>Eduardo</first><last>Rodríguez-Banga</last></author>
       <author><first>Manuel</first><last>Rey-Area</last></author>
       <author><first>Ania</first><last>Pérez-Pérez</last></author>
       <author><first>Sonia</first><last>Rico-Alonso</last></author>
-      <author><first>Carmen</first><last>García-Mateo</last></author>
+      <author id="carmen-garcia-mateo"><first>Carmen</first><last>García-Mateo</last></author>
       <pages>45–52</pages>
       <abstract>This paper presents LSE_UVIGO, a multi-source database designed to foster research on Sign Language Recognition. It is being recorded and compiled for Spanish Sign Language (LSE acronym in Spanish) and contains also spoken Galician language, so it is very well fitted to research on these languages, but also quite useful for fundamental research in any other sign language. LSE_UVIGO is composed of two datasets: LSE_Lex40_UVIGO, a multi-sensor and multi-signer dataset acquired from scratch, designed as an incremental dataset, both in complexity of the visual content and in the variety of signers. It contains static and co-articulated sign recordings, fingerspelled and gloss-based isolated words, and sentences. Its acquisition is done in a controlled lab environment in order to obtain good quality videos with sharp video frames and RGB and depth information, making them suitable to try different approaches to automatic recognition. The second subset, LSE_TVGWeather_UVIGO is being populated from the regional television weather forecasts interpreted to LSE, as a faster way to acquire high quality, continuous LSE recordings with a domain-restricted vocabulary and with a correspondence to spoken sentences.</abstract>
       <url hash="d3ee1bf4">2020.signlang-1.8</url>
@@ -288,7 +288,7 @@
       <title>Towards Large-Scale Data Mining for Data-Driven Analysis of Sign Languages</title>
       <author><first>Boris</first><last>Mocialov</last></author>
       <author><first>Graham</first><last>Turner</last></author>
-      <author><first>Helen</first><last>Hastie</last></author>
+      <author id="helen-hastie"><first>Helen</first><last>Hastie</last></author>
       <pages>145–150</pages>
       <abstract>Access to sign language data is far from adequate. We show that it is possible to collect the data from social networking services such as TikTok, Instagram, and YouTube by applying data filtering to enforce quality standards and by discovering patterns in the filtered data, making it easier to analyse and model. Using our data collection pipeline, we collect and examine the interpretation of songs in both the American Sign Language (ASL) and the Brazilian Sign Language (Libras). We explore their differences and similarities by looking at the co-dependence of the orientation and location phonological parameters.</abstract>
       <url hash="e96a70c5">2020.signlang-1.24</url>
@@ -332,7 +332,7 @@
     <paper id="28">
       <title>Design and Evaluation for a Prototype of an Online Tool to Access Mathematics Notions in Sign Language</title>
       <author><first>Camille</first><last>Nadal</last></author>
-      <author><first>Christophe</first><last>Collet</last></author>
+      <author id="christophe-collet"><first>Christophe</first><last>Collet</last></author>
       <pages>171–176</pages>
       <abstract>The Sign’Maths project aims at giving access to pedagogical resources in Sign Language (SL). It will provide Deaf students and teachers with mathematics vocabulary in SL, this in order to contribute to the standardisation of the vocabulary used at school. The work conducted led to Sign’Maths, an online interactive tool that gives Deaf students access to mathematics definitions in SL. A group of mathematics teachers for Deafs and teachers experts in SL collaborated to create signs to express mathematics concepts, and to produce videos of definitions, examples and illustrations for these concepts. In parallel, we are working on the conception and the design of Sign’Maths software and user interface. Our research work investigated ways to include SL in pedagogical resources in order to present information but also to navigate through the content. User tests revealed that users appreciate the use of SL in a pedagogical resource. However, they pointed out that SL content should be complemented with French to support bilingual education. Our final solution takes advantage of the complementarity of SL, French and visual content to provide an interface that will suit users no matter what their education background is. Future work will investigate a tool for text and signs’ search within Sign’Maths.</abstract>
       <url hash="3c0ad942">2020.signlang-1.28</url>
@@ -365,7 +365,7 @@
     <paper id="31">
       <title>Unsupervised Term Discovery for Continuous Sign Language</title>
       <author><first>Korhan</first><last>Polat</last></author>
-      <author><first>Murat</first><last>Saraçlar</last></author>
+      <author id="murat-saraclar"><first>Murat</first><last>Saraçlar</last></author>
       <pages>189–196</pages>
       <abstract>Most of the sign language recognition (SLR) systems rely on supervision for training and available annotated sign language resources are scarce due to the difficulties of manual labeling. Unsupervised discovery of lexical units would facilitate the annotation process and thus lead to better SLR systems. Inspired by the unsupervised spoken term discovery in speech processing field, we investigate whether a similar approach can be applied in sign language to discover repeating lexical units. We adapt an algorithm that is designed for spoken term discovery by using hand shape and pose features instead of speech features. The experiments are run on a large scale continuous sign corpus and the performance is evaluated using gloss level annotations. This work introduces a new task for sign language processing that has not been addressed before.</abstract>
       <url hash="546b2a98">2020.signlang-1.31</url>
@@ -407,7 +407,7 @@
     <paper id="35">
       <title>Cross-Lingual Keyword Search for Sign Language</title>
       <author><first>Nazif Can</first><last>Tamer</last></author>
-      <author><first>Murat</first><last>Saraçlar</last></author>
+      <author id="murat-saraclar"><first>Murat</first><last>Saraçlar</last></author>
       <pages>217–223</pages>
       <abstract>Sign language research most often relies on exhaustively annotated and segmented data, which is scarce even for the most studied sign languages. However, parallel corpora consisting of sign language interpreting are rarely explored. By utilizing such data for the task of keyword search, this work aims to enable information retrieval from sign language with the queries from the translated written language. With the written language translations as labels, we train a weakly supervised keyword search model for sign language and further improve the retrieval performance with two context modeling strategies. In our experiments, we compare the gloss retrieval and cross language retrieval performance on RWTH-PHOENIX-Weather 2014T dataset.</abstract>
       <url hash="6b2680c3">2020.signlang-1.35</url>
diff --git a/data/xml/2020.sigtyp.xml b/data/xml/2020.sigtyp.xml
index cfecf9022d..2f350afab3 100644
--- a/data/xml/2020.sigtyp.xml
+++ b/data/xml/2020.sigtyp.xml
@@ -6,7 +6,7 @@
       <editor><first>Ekaterina</first><last>Vylomova</last></editor>
       <editor><first>Edoardo M.</first><last>Ponti</last></editor>
       <editor><first>Eitan</first><last>Grossman</last></editor>
-      <editor><first>Arya D.</first><last>McCarthy</last></editor>
+      <editor id="arya-d-mccarthy"><first>Arya D.</first><last>McCarthy</last></editor>
       <editor><first>Yevgeni</first><last>Berzak</last></editor>
       <editor><first>Haim</first><last>Dubossarsky</last></editor>
       <editor><first>Ivan</first><last>Vulić</last></editor>
@@ -27,7 +27,7 @@
       <title><fixed-case>SIGTYP</fixed-case> 2020 Shared Task: Prediction of Typological Features</title>
       <author><first>Johannes</first><last>Bjerva</last></author>
       <author><first>Elizabeth</first><last>Salesky</last></author>
-      <author><first>Sabrina J.</first><last>Mielke</last></author>
+      <author id="sabrina-j-mielke"><first>Sabrina J.</first><last>Mielke</last></author>
       <author><first>Aditi</first><last>Chaudhary</last></author>
       <author><first>Giuseppe G. A.</first><last>Celano</last></author>
       <author><first>Edoardo Maria</first><last>Ponti</last></author>
@@ -47,7 +47,7 @@
       <author><first>Deepak</first><last>Alok</last></author>
       <author><first>Akanksha</first><last>Bansal</last></author>
       <author><first>Bornini</first><last>Lahiri</last></author>
-      <author><first>Atul Kr.</first><last>Ojha</last></author>
+      <author id="atul-kr-ojha"><first>Atul Kr.</first><last>Ojha</last></author>
       <pages>12–16</pages>
       <abstract>This paper enumerates SigTyP 2020 Shared Task on the prediction of typological features as performed by the KMI-Panlingua-IITKGP team. The task entailed the prediction of missing values in a particular language, provided, the name of the language family, its genus, location (in terms of latitude and longitude coordinates and name of the country where it is spoken) and a set of feature-value pair are available. As part of fulfillment of the aforementioned task, the team submitted 3 kinds of system - 2 rule-based and one hybrid system. Of these 3, one rule-based system generated the best performance on the test set. All the systems were ‘constrained’ in the sense that no additional dataset or information, other than those provided by the organisers, was used for developing the systems.</abstract>
       <url hash="bdfbe6fe">2020.sigtyp-1.2</url>
@@ -58,7 +58,7 @@
     <paper id="3">
       <title><fixed-case>NEMO</fixed-case>: Frequentist Inference Approach to Constrained Linguistic Typology Feature Prediction in <fixed-case>SIGTYP</fixed-case> 2020 Shared Task</title>
       <author><first>Alexander</first><last>Gutkin</last></author>
-      <author><first>Richard</first><last>Sproat</last></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last></author>
       <pages>17–28</pages>
       <abstract>This paper describes the NEMO submission to SIGTYP 2020 shared task (Bjerva et al., 2020) which deals with prediction of linguistic typological features for multiple languages using the data derived from World Atlas of Language Structures (WALS). We employ frequentist inference to represent correlations between typological features and use this representation to train simple multi-class estimators that predict individual features. We describe two submitted ridge regression-based configurations which ranked second and third overall in the constrained task. Our best configuration achieved the microaveraged accuracy score of 0.66 on 149 test languages.</abstract>
       <url hash="2d8ce8c4">2020.sigtyp-1.3</url>
@@ -69,7 +69,7 @@
     <paper id="4">
       <title>Predicting Typological Features in <fixed-case>WALS</fixed-case> using Language Embeddings and Conditional Probabilities: <fixed-case>ÚFAL</fixed-case> Submission to the <fixed-case>SIGTYP</fixed-case> 2020 Shared Task</title>
       <author><first>Martin</first><last>Vastl</last></author>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <author><first>Rudolf</first><last>Rosa</last></author>
       <pages>29–35</pages>
       <abstract>We present our submission to the SIGTYP 2020 Shared Task on the prediction of typological features. We submit a constrained system, predicting typological features only based on the WALS database. We investigate two approaches. The simpler of the two is a system based on estimating correlation of feature values within languages by computing conditional probabilities and mutual information. The second approach is to train a neural predictor operating on precomputed language embeddings based on WALS features. Our submitted system combines the two approaches based on their self-estimated confidence scores. We reach the accuracy of 70.7% on the test data and rank first in the shared task.</abstract>
diff --git a/data/xml/2020.sltu.xml b/data/xml/2020.sltu.xml
index e8775cc976..7972f5bcf4 100644
--- a/data/xml/2020.sltu.xml
+++ b/data/xml/2020.sltu.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)</booktitle>
       <editor><first>Dorothee</first><last>Beermann</last></editor>
-      <editor><first>Laurent</first><last>Besacier</last></editor>
+      <editor id="laurent-besacier"><first>Laurent</first><last>Besacier</last></editor>
       <editor><first>Sakriani</first><last>Sakti</last></editor>
       <editor><first>Claudia</first><last>Soria</last></editor>
       <publisher>European Language Resources association</publisher>
@@ -41,7 +41,7 @@
       <author><first>Oddur</first><last>Kjartansson</last></author>
       <author><first>Alexander</first><last>Gutkin</last></author>
       <author><first>Alena</first><last>Butryna</last></author>
-      <author><first>Isin</first><last>Demirsahin</last></author>
+      <author id="isin-demirsahin"><first>Isin</first><last>Demirsahin</last></author>
       <author><first>Clara</first><last>Rivera</last></author>
       <pages>21–27</pages>
       <abstract>This paper introduces new open speech datasets for three of the languages of Spain: Basque, Catalan and Galician. Catalan is furthermore the official language of the Principality of Andorra. The datasets consist of high-quality multi-speaker recordings of the three languages along with the associated transcriptions. The resulting corpora include over 33 hours of crowd-sourced recordings of 132 male and female native speakers. The recording scripts also include material for elicitation of global and local place names, personal and business names. The datasets are released under a permissive license and are available for free download for commercial, academic and personal use. The high-quality annotated speech datasets described in this paper can be used to, among other things, build text-to-speech systems, serve as adaptation data in automatic speech recognition and provide useful phonetic and phonological insights in corpus linguistics.</abstract>
@@ -89,7 +89,7 @@
       <author><first>Xiaohui</first><last>Zhang</last></author>
       <author><first>Kritika</first><last>Singh</last></author>
       <author><first>Yatharth</first><last>Saraf</last></author>
-      <author><first>Geoffrey</first><last>Zweig</last></author>
+      <author id="geoffrey-zweig"><first>Geoffrey</first><last>Zweig</last></author>
       <pages>46–52</pages>
       <abstract>Towards developing high-performing ASR for low-resource languages, approaches to address the lack of resources are to make use of data from multiple languages, and to augment the training data by creating acoustic variations. In this work we present a single grapheme-based ASR model learned on 7 geographically proximal languages, using standard hybrid BLSTM-HMM acoustic models with lattice-free MMI objective. We build the single ASR grapheme set via taking the union over each language-specific grapheme set, and we find such multilingual graphemic hybrid ASR model can perform language-independent recognition on all 7 languages, and substantially outperform each monolingual ASR model. Secondly, we evaluate the efficacy of multiple data augmentation alternatives within language, as well as their complementarity with multilingual modeling. Overall, we show that the proposed multilingual graphemic hybrid ASR with various data augmentation can not only recognize any within training set languages, but also provide large ASR performance improvements.</abstract>
       <url hash="f285cb96">2020.sltu-1.7</url>
@@ -99,11 +99,11 @@
     <paper id="8">
       <title>Neural Text-to-Speech Synthesis for an Under-Resourced Language in a Diglossic Environment: the Case of <fixed-case>G</fixed-case>ascon <fixed-case>O</fixed-case>ccitan</title>
       <author><first>Ander</first><last>Corral</last></author>
-      <author><first>Igor</first><last>Leturia</last></author>
+      <author id="igor-leturia"><first>Igor</first><last>Leturia</last></author>
       <author><first>Aure</first><last>Séguier</last></author>
       <author><first>Michäel</first><last>Barret</last></author>
       <author><first>Benaset</first><last>Dazéas</last></author>
-      <author><first>Philippe</first><last>Boula de Mareüil</last></author>
+      <author id="philippe-boula-de-mareuil"><first>Philippe</first><last>Boula de Mareüil</last></author>
       <author><first>Nicolas</first><last>Quint</last></author>
       <pages>53–60</pages>
       <abstract>Occitan is a minority language spoken in Southern France, some Alpine Valleys of Italy, and the Val d’Aran in Spain, which only very recently started developing language and speech technologies. This paper describes the first project for designing a Text-to-Speech synthesis system for one of its main regional varieties, namely Gascon. We used a state-of-the-art deep neural network approach, the Tacotron2-WaveGlow system. However, we faced two additional difficulties or challenges: on the one hand, we wanted to test if it was possible to obtain good quality results with fewer recording hours than is usually reported for such systems; on the other hand, we needed to achieve a standard, non-Occitan pronunciation of French proper names, therefore we needed to record French words and test phoneme-based approaches. The evaluation carried out over the various developed systems and approaches shows promising results with near production-ready quality. It has also allowed us to detect the phenomena for which some flaws or fall of quality occur, pointing at the direction of future work to improve the quality of the actual system and for new systems for other language varieties and voices.</abstract>
@@ -147,7 +147,7 @@
     </paper>
     <paper id="12">
       <title>Design and evaluation of a smartphone keyboard for <fixed-case>P</fixed-case>lains <fixed-case>C</fixed-case>ree syllabics</title>
-      <author><first>Eddie Antonio</first><last>Santos</last></author>
+      <author id="eddie-antonio-santos"><first>Eddie Antonio</first><last>Santos</last></author>
       <author><first>Atticus</first><last>Harrigan</last></author>
       <pages>88–96</pages>
       <abstract>Plains Cree is a less-resourced language in Canada. To promote its usage online, we describe previous keyboard layouts for typing Plains Cree syllabics on smartphones. We describe our own solution whose development was guided by ergonomics research and corpus statistics. We then describe a case study in which three participants used a previous layout and our own, and we collected quantitative and qualitative data. We conclude that, despite observing accuracy improvements in user testing, introducing a brand new paradigm for typing Plains Cree syllabics may not be ideal for the community.</abstract>
@@ -157,7 +157,7 @@
     </paper>
     <paper id="13">
       <title><fixed-case>M</fixed-case>ulti<fixed-case>S</fixed-case>eg: Parallel Data and Subword Information for Learning Bilingual Embeddings in Low Resource Scenarios</title>
-      <author><first>Efsun</first><last>Sarioglu Kayi</last></author>
+      <author id="efsun-sarioglu-kayi"><first>Efsun</first><last>Sarioglu Kayi</last></author>
       <author><first>Vishal</first><last>Anand</last></author>
       <author><first>Smaranda</first><last>Muresan</last></author>
       <pages>97–105</pages>
@@ -201,8 +201,8 @@
       <title>Fully Convolutional <fixed-case>ASR</fixed-case> for Less-Resourced Endangered Languages</title>
       <author><first>Bao</first><last>Thai</last></author>
       <author><first>Robert</first><last>Jimerson</last></author>
-      <author><first>Raymond</first><last>Ptucha</last></author>
-      <author><first>Emily</first><last>Prud’hommeaux</last></author>
+      <author id="raymond-ptucha"><first>Raymond</first><last>Ptucha</last></author>
+      <author id="emily-prudhommeaux"><first>Emily</first><last>Prud’hommeaux</last></author>
       <pages>126–130</pages>
       <abstract>The application of deep learning to automatic speech recognition (ASR) has yielded dramatic accuracy increases for languages with abundant training data, but languages with limited training resources have yet to see accuracy improvements on this scale. In this paper, we compare a fully convolutional approach for acoustic modelling in ASR with a variety of established acoustic modeling approaches. We evaluate our method on Seneca, a low-resource endangered language spoken in North America. Our method yields word error rates up to 40% lower than those reported using both standard GMM-HMM approaches and established deep neural methods, with a substantial reduction in training time. These results show particular promise for languages like Seneca that are both endangered and lack extensive documentation.</abstract>
       <url hash="7d7a2f11">2020.sltu-1.17</url>
@@ -290,7 +290,7 @@
       <author><first>Navya</first><last>Jose</last></author>
       <author><first>Shardul</first><last>Suryawanshi</last></author>
       <author><first>Elizabeth</first><last>Sherly</last></author>
-      <author><first>John Philip</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John Philip</first><last>McCrae</last></author>
       <pages>177–184</pages>
       <abstract>There is an increasing demand for sentiment analysis of text from social media which are mostly code-mixed. Systems trained on monolingual data fail for code-mixed data due to the complexity of mixing at different levels of the text. However, very few resources are available for code-mixed data to create models specific for this data. Although much research in multilingual and cross-lingual sentiment analysis has used semi-supervised or unsupervised methods, supervised methods still performs better. Only a few datasets for popular languages such as English-Spanish, English-Hindi, and English-Chinese are available. There are no resources available for Malayalam-English code-mixed data. This paper presents a new gold standard corpus for sentiment analysis of code-mixed text in Malayalam-English annotated by voluntary annotators. This gold standard corpus obtained a Krippendorff’s alpha above 0.8 for the dataset. We use this new corpus to provide the benchmark for sentiment analysis in Malayalam-English code-mixed texts.</abstract>
       <url hash="59bc1465">2020.sltu-1.25</url>
@@ -322,7 +322,7 @@
       <author><first>Bharathi Raja</first><last>Chakravarthi</last></author>
       <author><first>Vigneshwaran</first><last>Muralidaran</last></author>
       <author><first>Ruba</first><last>Priyadharshini</last></author>
-      <author><first>John Philip</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John Philip</first><last>McCrae</last></author>
       <pages>202–210</pages>
       <abstract>Understanding the sentiment of a comment from a video or an image is an essential task in many applications. Sentiment analysis of a text can be useful for various decision-making processes. One such application is to analyse the popular sentiments of videos on social media based on viewer comments. However, comments from social media do not follow strict rules of grammar, and they contain mixing of more than one language, often written in non-native scripts. Non-availability of annotated code-mixed data for a low-resourced language like Tamil also adds difficulty to this problem. To overcome this, we created a gold standard Tamil-English code-switched, sentiment-annotated corpus containing 15,744 comment posts from YouTube. In this paper, we describe the process of creating the corpus and assigning polarities. We present inter-annotator agreement and show the results of sentiment analysis trained on this corpus as a benchmark.</abstract>
       <url hash="240990a5">2020.sltu-1.28</url>
@@ -360,9 +360,9 @@
       <title>Lenition and Fortition of Stop Codas in <fixed-case>R</fixed-case>omanian</title>
       <author><first>Mathilde</first><last>Hutin</last></author>
       <author><first>Oana</first><last>Niculescu</last></author>
-      <author><first>Ioana</first><last>Vasilescu</last></author>
-      <author><first>Lori</first><last>Lamel</last></author>
-      <author><first>Martine</first><last>Adda-Decker</last></author>
+      <author id="ioana-vasilescu"><first>Ioana</first><last>Vasilescu</last></author>
+      <author id="lori-lamel"><first>Lori</first><last>Lamel</last></author>
+      <author id="martine-adda-decker"><first>Martine</first><last>Adda-Decker</last></author>
       <pages>226–234</pages>
       <abstract>The present paper aims at providing a first study of lenition- and fortition-type phenomena in coda position in Romanian, a language that can be considered as less-resourced. Our data show that there are two contexts for devoicing in Romanian: before a voiceless obstruent, which means that there is regressive voicelessness assimilation in the language, and before pause, which means that there is a tendency towards final devoicing proper. The data also show that non-canonical voicing is an instance of voicing assimilation, as it is observed mainly before voiced consonants (voiced obstruents and sonorants alike). Two conclusions can be drawn from our analyses. First, from a phonetic point of view, the two devoicing phenomena exhibit the same behavior regarding place of articulation of the coda, while voicing assimilation displays the reverse tendency. In particular, alveolars, which tend to devoice the most, also voice the least. Second, the two assimilation processes have similarities that could distinguish them from final devoicing as such. Final devoicing seems to be sensitive to speech style and gender of the speaker, while assimilation processes do not. This may indicate that the two kinds of processes are phonologized at two different degrees in the language, assimilation being more accepted and generalized than final devoicing.</abstract>
       <url hash="344cdb25">2020.sltu-1.31</url>
@@ -394,8 +394,8 @@
     <paper id="34">
       <title>Automatic Extraction of Verb Paradigms in Regional Languages: the case of the Linguistic Crescent varieties</title>
       <author><first>Elena</first><last>Knyazeva</last></author>
-      <author><first>Gilles</first><last>Adda</last></author>
-      <author><first>Philippe</first><last>Boula de Mareüil</last></author>
+      <author id="gilles-adda"><first>Gilles</first><last>Adda</last></author>
+      <author id="philippe-boula-de-mareuil"><first>Philippe</first><last>Boula de Mareüil</last></author>
       <author><first>Maximilien</first><last>Guérin</last></author>
       <author><first>Nicolas</first><last>Quint</last></author>
       <pages>245–249</pages>
@@ -427,8 +427,8 @@
     </paper>
     <paper id="37">
       <title><fixed-case>DNN</fixed-case>-Based Multilingual Automatic Speech Recognition for <fixed-case>W</fixed-case>olaytta using <fixed-case>O</fixed-case>romo Speech</title>
-      <author><first>Martha Yifiru</first><last>Tachbelie</last></author>
-      <author><first>Solomon Teferra</first><last>Abate</last></author>
+      <author id="martha-yifiru-tachbelie"><first>Martha Yifiru</first><last>Tachbelie</last></author>
+      <author id="solomon-teferra-abate"><first>Solomon Teferra</first><last>Abate</last></author>
       <author><first>Tanja</first><last>Schultz</last></author>
       <pages>265–270</pages>
       <abstract>It is known that Automatic Speech Recognition (ASR) is very useful for human-computer interaction in all the human languages. However, due to its requirement for a big speech corpus, which is very expensive, it has not been developed for most of the languages. Multilingual ASR (MLASR) has been suggested to share existing speech corpora among related languages to develop an ASR for languages which do not have the required speech corpora. Literature shows that phonetic relatedness goes across language families. We have, therefore, conducted experiments on MLASR taking two language families: one as source (Oromo from Cushitic) and the other as target (Wolaytta from Omotic). Using Oromo Deep Neural Network (DNN) based acoustic model, Wolaytta pronunciation dictionary and language model we have achieved Word Error Rate (WER) of 48.34% for Wolaytta. Moreover, our experiments show that adding only 30 minutes of speech data from the target language (Wolaytta) to the whole training data (22.8 hours) of the source language (Oromo) results in a relative WER reduction of 32.77%. Our results show the possibility of developing ASR system for a language, if we have pronunciation dictionary and language model, using an existing speech corpus of another language irrespective of their language family.</abstract>
@@ -449,7 +449,7 @@
     <paper id="39">
       <title>Basic Language Resources for 31 Languages (Plus <fixed-case>E</fixed-case>nglish): The <fixed-case>LORELEI</fixed-case> Representative and Incident Language Packs</title>
       <author><first>Jennifer</first><last>Tracey</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <pages>277–284</pages>
       <abstract>This paper documents and describes the thirty-one basic language resource packs created for the DARPA LORELEI program for use in development and testing of systems capable of providing language-independent situational awareness in emerging scenarios in a low resource language context. Twenty-four Representative Language Packs cover a broad range of language families and typologies, providing large volumes of monolingual and parallel text, smaller volumes of entity and semantic annotations, and a variety of grammatical resources and tools designed to support research into language universals and cross-language transfer. Seven Incident Language Packs provide test data to evaluate system capabilities on a previously unseen low resource language. We discuss the makeup of Representative and Incident Language Packs, the methods used to produce them, and the evolution of their design and implementation over the course of the multi-year LORELEI program. We conclude with a summary of the final language packs including their low-cost publication in the LDC catalog.</abstract>
       <url hash="6a9cd549">2020.sltu-1.39</url>
@@ -545,7 +545,7 @@
       <author><first>Timofey</first><last>Arkhangelskiy</last></author>
       <author><first>Niko</first><last>Partanen</last></author>
       <author><first>Michael</first><last>Rießler</last></author>
-      <author><first>Francis</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last></author>
       <pages>336–341</pages>
       <abstract>In this paper, we expand on previous work on automatic speech recognition in a low-resource scenario typical of data collected by field linguists. We train DeepSpeech models on 35 hours of dialectal Komi speech recordings and correct the output using language models constructed from various sources. Previous experiments showed that transfer learning using DeepSpeech can improve the accuracy of a speech recognizer for Komi, though the error rate remained very high. In this paper we present further experiments with language models created using KenLM from text materials available online. These are constructed from two corpora, one containing literary texts, one for social media content, and another combining the two. We then trained the model using each language model to explore the impact of the language model data source on the speech recognition model. Our results show significant improvements of over 25% in character error rate and nearly 20% in word error rate. This offers important methodological insight into how ASR results can be improved under low-resource conditions: transfer learning can be used to compensate the lack of training data in the target language, and online texts are a very useful resource when developing language models in this context.</abstract>
       <url hash="c27dfe91">2020.sltu-1.47</url>
@@ -563,15 +563,15 @@
       <author><first>Matthew</first><last>Lee</last></author>
       <author><first>Aditi</first><last>Chaudhary</last></author>
       <author><first>Luke</first><last>Gessler</last></author>
-      <author><first>Steven</first><last>Abney</last></author>
+      <author id="steven-abney"><first>Steven</first><last>Abney</last></author>
       <author><first>Shirley Anugrah</first><last>Hayati</last></author>
       <author><first>Antonios</first><last>Anastasopoulos</last></author>
       <author><first>Olga</first><last>Zamaraeva</last></author>
-      <author><first>Emily</first><last>Prud’hommeaux</last></author>
+      <author id="emily-prudhommeaux"><first>Emily</first><last>Prud’hommeaux</last></author>
       <author><first>Jennette</first><last>Child</last></author>
       <author><first>Sara</first><last>Child</last></author>
       <author><first>Rebecca</first><last>Knowles</last></author>
-      <author><first>Sarah</first><last>Moeller</last></author>
+      <author id="sarah-moeller"><first>Sarah</first><last>Moeller</last></author>
       <author><first>Jeffrey</first><last>Micher</last></author>
       <author><first>Yiyuan</first><last>Li</last></author>
       <author><first>Sydney</first><last>Zink</last></author>
@@ -589,7 +589,7 @@
       <author><first>Kumar</first><last>Saurav</last></author>
       <author><first>Kumar</first><last>Saunack</last></author>
       <author><first>Diptesh</first><last>Kanojia</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>352–357</pages>
       <abstract>Dense word vectors or ‘word embeddings’ which encode semantic properties of words, have now become integral to NLP tasks like Machine Translation (MT), Question Answering (QA), Word Sense Disambiguation (WSD), and Information Retrieval (IR). In this paper, we use various existing approaches to create multiple word embeddings for 14 Indian languages. We place these embeddings for all these languages, <i>viz.</i>, Assamese, Bengali, Gujarati, Hindi, Kannada, Konkani, Malayalam, Marathi, Nepali, Odiya, Punjabi, Sanskrit, Tamil, and Telugu in a single repository. Relatively newer approaches that emphasize catering to context (BERT, ELMo, <i>etc.</i>) have shown significant improvements, but require a large amount of resources to generate usable models. We release pre-trained embeddings generated using both contextual and non-contextual approaches. We also use MUSE and XLM to train cross-lingual embeddings for all pairs of the aforementioned languages. To show the efficacy of our embeddings, we evaluate our embedding models on XPOS, UPOS and NER tasks for all these languages. We release a total of 436 models using 8 different approaches. We hope they are useful for the resource-constrained Indian language NLP. The title of this paper refers to the famous novel “A Passage to India” by E.M. Forster, published initially in 1924.</abstract>
       <url hash="71204e25">2020.sltu-1.49</url>
@@ -598,7 +598,7 @@
     </paper>
     <paper id="50">
       <title>A Counselling Corpus in <fixed-case>C</fixed-case>antonese</title>
-      <author><first>John</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
       <author><first>Tianyuan</first><last>Cai</last></author>
       <author><first>Wenxiu</first><last>Xie</last></author>
       <author><first>Lam</first><last>Xing</last></author>
@@ -611,7 +611,7 @@
     <paper id="51">
       <title>Speech Transcription Challenges for Resource Constrained Indigenous Language <fixed-case>C</fixed-case>ree</title>
       <author><first>Vishwa</first><last>Gupta</last></author>
-      <author><first>Gilles</first><last>Boulianne</last></author>
+      <author id="gilles-boulianne"><first>Gilles</first><last>Boulianne</last></author>
       <pages>362–367</pages>
       <abstract>Cree is one of the most spoken Indigenous languages in Canada. From a speech recognition perspective, it is a low-resource language, since very little data is available for either acoustic or language modeling. This has prevented development of speech technology that could help revitalize the language. We describe our experiments with available Cree data to improve automatic transcription both in speaker- independent and dependent scenarios. While it was difficult to get low speaker-independent word error rates with only six speakers, we were able to get low word and phoneme error rates in the speaker-dependent scenario. We compare our phoneme recognition with two state-of-the-art open-source phoneme recognition toolkits, which use end-to-end training and sequence-to-sequence modeling. Our phoneme error rate (8.7%) is significantly lower than that achieved by the best of these systems (15.1%). With these systems and varying amounts of transcribed and text data, we show that pre-training on other languages is important for speaker-independent recognition, and even small amounts of additional text-only documents are useful. These results can guide practical language documentation work, when deciding how much transcribed and text data is needed to achieve useful phoneme accuracies.</abstract>
       <url hash="5d9bcbea">2020.sltu-1.51</url>
diff --git a/data/xml/2020.smm4h.xml b/data/xml/2020.smm4h.xml
index 24cf9b3f92..a7ac89f50c 100644
--- a/data/xml/2020.smm4h.xml
+++ b/data/xml/2020.smm4h.xml
@@ -3,10 +3,10 @@
   <volume id="1" ingest-date="2020-11-29" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Fifth Social Media Mining for Health Applications Workshop &amp; Shared Task</booktitle>
-      <editor><first>Graciela</first><last>Gonzalez-Hernandez</last></editor>
+      <editor id="graciela-gonzalez"><first>Graciela</first><last>Gonzalez-Hernandez</last></editor>
       <editor><first>Ari Z.</first><last>Klein</last></editor>
       <editor><first>Ivan</first><last>Flores</last></editor>
-      <editor><first>Davy</first><last>Weissenbacher</last></editor>
+      <editor id="davy-weissenbacher"><first>Davy</first><last>Weissenbacher</last></editor>
       <editor><first>Arjun</first><last>Magge</last></editor>
       <editor><first>Karen</first><last>O'Connor</last></editor>
       <editor><first>Abeed</first><last>Sarker</last></editor>
@@ -85,7 +85,7 @@
       <author><first>Huong</first><last>Dang</last></author>
       <author><first>Kahyun</first><last>Lee</last></author>
       <author><first>Sam</first><last>Henry</last></author>
-      <author><first>Özlem</first><last>Uzuner</last></author>
+      <author id="ozlem-uzuner"><first>Özlem</first><last>Uzuner</last></author>
       <pages>37–41</pages>
       <abstract>Twitter is a valuable source of patient-generated data that has been used in various population health studies. The first step in many of these studies is to identify and capture Twitter messages (tweets) containing medication mentions. In this article, we describe our submission to Task 1 of the Social Media Mining for Health Applications (SMM4H) Shared Task 2020. This task challenged participants to detect tweets that mention medications or dietary supplements in a natural, highly imbalance dataset. Our system combined a handcrafted preprocessing step with an ensemble of 20 BERT-based classifiers generated by dividing the training dataset into subsets using 10-fold cross validation and exploiting two BERT embedding models. Our system ranked first in this task, and improved the average F1 score across all participating teams by 19.07% with a precision, recall, and F1 on the test set of 83.75%, 87.01%, and 85.35% respectively.</abstract>
       <url hash="9ec7fef8">2020.smm4h-1.5</url>
@@ -165,8 +165,8 @@
     <paper id="12">
       <title>Towards Preemptive Detection of Depression and Anxiety in <fixed-case>T</fixed-case>witter</title>
       <author><first>David</first><last>Owen</last></author>
-      <author><first>Jose</first><last>Camacho-Collados</last></author>
-      <author><first>Luis</first><last>Espinosa Anke</last></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa Anke</last></author>
       <pages>82–89</pages>
       <abstract>Depression and anxiety are psychiatric disorders that are observed in many areas of everyday life. For example, these disorders manifest themselves somewhat frequently in texts written by nondiagnosed users in social media. However, detecting users with these conditions is not a straightforward task as they may not explicitly talk about their mental state, and if they do, contextual cues such as immediacy must be taken into account. When available, linguistic flags pointing to probable anxiety or depression could be used by medical experts to write better guidelines and treatments. In this paper, we develop a dataset designed to foster research in depression and anxiety detection in Twitter, framing the detection task as a binary tweet classification problem. We then apply state-of-the-art classification models to this dataset, providing a competitive set of baselines alongside qualitative error analysis. Our results show that language models perform reasonably well, and better than more traditional baselines. Nonetheless, there is clear room for improvement, particularly with unbalanced training sets and in cases where seemingly obvious linguistic cues (keywords) are used counter-intuitively.</abstract>
       <url hash="f261dcdb">2020.smm4h-1.12</url>
@@ -201,7 +201,7 @@
     <paper id="15">
       <title><fixed-case>FBK</fixed-case>@<fixed-case>SMM</fixed-case>4<fixed-case>H</fixed-case>2020: <fixed-case>R</fixed-case>o<fixed-case>BERT</fixed-case>a for Detecting Medications on <fixed-case>T</fixed-case>witter</title>
       <author><first>Silvia</first><last>Casola</last></author>
-      <author><first>Alberto</first><last>Lavelli</last></author>
+      <author id="alberto-lavelli"><first>Alberto</first><last>Lavelli</last></author>
       <pages>101–103</pages>
       <abstract>This paper describes a classifier for tweets that mention medications or supplements, based on a pretrained transformer. We developed such a system for our participation in Subtask 1 of the Social Media Mining for Health Application workshop, which featured an extremely unbalanced dataset. The model showed promising results, with an F1 of 0.8 (task mean: 0.66).</abstract>
       <url hash="72711b9e">2020.smm4h-1.15</url>
@@ -212,7 +212,7 @@
       <author><first>Sougata</first><last>Saha</last></author>
       <author><first>Souvik</first><last>Das</last></author>
       <author><first>Prashi</first><last>Khurana</last></author>
-      <author><first>Rohini</first><last>Srihari</last></author>
+      <author id="rohini-k-srihari"><first>Rohini</first><last>Srihari</last></author>
       <pages>104–109</pages>
       <abstract>This paper details a system designed for Social Media Mining for Health Applications (SMM4H) Shared Task 2020. We specifically describe the systems designed to solve task 2: Automatic classification of multilingual tweets that report adverse effects, and task 3: Automatic extraction and normalization of adverse effects in English tweets. Fine tuning RoBERTa large for classifying English tweets enables us to achieve a F1 score of 56%, which is an increase of +10% compared to the average F1 score for all the submissions. Using BERT based NER and question answering, we are able to achieve a F1 score of 57.6% for extracting adverse reaction mentions from tweets, which is an increase of +1.2% compared to the average F1 score for all the submissions.</abstract>
       <url hash="381beedf">2020.smm4h-1.16</url>
@@ -288,8 +288,8 @@
     <paper id="24">
       <title><fixed-case>LITL</fixed-case> at <fixed-case>SMM</fixed-case>4<fixed-case>H</fixed-case>: An Old-school Feature-based Classifier for Identifying Adverse Effects in Tweets</title>
       <author><first>Ludovic</first><last>Tanguy</last></author>
-      <author><first>Lydia-Mai</first><last>Ho-Dac</last></author>
-      <author><first>Cécile</first><last>Fabre</last></author>
+      <author id="lydia-mai-ho-dac"><first>Lydia-Mai</first><last>Ho-Dac</last></author>
+      <author id="cecile-fabre"><first>Cécile</first><last>Fabre</last></author>
       <author><first>Roxane</first><last>Bois</last></author>
       <author><first>Touati Mohamed Yacine</first><last>Haddad</last></author>
       <author><first>Claire</first><last>Ibarboure</last></author>
@@ -345,7 +345,7 @@
       <title><fixed-case>NLP</fixed-case>@<fixed-case>VCU</fixed-case>: Identifying Adverse Effects in <fixed-case>E</fixed-case>nglish Tweets for Unbalanced Data</title>
       <author><first>Darshini</first><last>Mahendran</last></author>
       <author><first>Cora</first><last>Lewis</last></author>
-      <author><first>Bridget</first><last>McInnes</last></author>
+      <author id="bridget-mcinnes"><first>Bridget</first><last>McInnes</last></author>
       <pages>158–160</pages>
       <abstract>This paper describes our participation in the Social Media Mining for Health Application (SMM4H 2020) Challenge Track 2 for identifying tweets containing Adverse Effects (AEs). Our system uses Convolutional Neural Networks. We explore downsampling, oversampling, and adjusting the class weights to account for the imbalanced nature of the dataset. Our results showed downsampling outperformed oversampling and adjusting the class weights on the test set however all three obtained similar results on the development set.</abstract>
       <url hash="8e0a5c9c">2020.smm4h-1.29</url>
diff --git a/data/xml/2020.socialnlp.xml b/data/xml/2020.socialnlp.xml
index 2a64d2f2fd..1aeb379264 100644
--- a/data/xml/2020.socialnlp.xml
+++ b/data/xml/2020.socialnlp.xml
@@ -47,7 +47,7 @@
       <author><first>Sayan</first><last>Sinha</last></author>
       <author><first>Sohan</first><last>Patro</last></author>
       <author><first>Kripa</first><last>Ghosh</last></author>
-      <author id="saptarshi-ghosh"><first>Saptarshi</first><last>Ghosh</last></author>
+      <author><first>Saptarshi</first><last>Ghosh</last></author>
       <pages>15–24</pages>
       <abstract>Although a lot of research has been done on utilising Online Social Media during disasters, there exists no system for a specific task that is critical in a post-disaster scenario – identifying resource-needs and resource-availabilities in the disaster-affected region, coupled with their subsequent matching. To this end, we present NARMADA, a semi-automated platform which leverages the crowd-sourced information from social media posts for assisting post-disaster relief coordination efforts. The system employs Natural Language Processing and Information Retrieval techniques for identifying resource-needs and resource-availabilities from microblogs, extracting resources from the posts, and also matching the needs to suitable availabilities. The system is thus capable of facilitating the judicious management of resources during post-disaster relief operations.</abstract>
       <url hash="c75f139f">2020.socialnlp-1.3</url>
diff --git a/data/xml/2020.splu.xml b/data/xml/2020.splu.xml
index e7ac463d7c..22501103e1 100644
--- a/data/xml/2020.splu.xml
+++ b/data/xml/2020.splu.xml
@@ -8,7 +8,7 @@
       <editor><first>Malihe</first><last>Alikhani</last></editor>
       <editor><first>Jason</first><last>Baldridge</last></editor>
       <editor><first>Mohit</first><last>Bansal</last></editor>
-      <editor><first>Marie-Francine</first><last>Moens</last></editor>
+      <editor id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Online</address>
       <month>November</month>
@@ -64,7 +64,7 @@
       <author><first>Mauricio</first><last>Mazuecos</last></author>
       <author><first>Agata</first><last>Marcante</last></author>
       <author><first>Luciana</first><last>Benotti</last></author>
-      <author><first>Raffaella</first><last>Bernardi</last></author>
+      <author id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last></author>
       <pages>29–38</pages>
       <abstract>In this paper, we study the grounding skills required to answer spatial questions asked by humans while playing the GuessWhat?! game. We propose a classification for spatial questions dividing them into absolute, relational, and group questions. We build a new answerer model based on the LXMERT multimodal transformer and we compare a baseline with and without visual features of the scene. We are interested in studying how the attention mechanisms of LXMERT are used to answer spatial questions since they require putting attention on more than one region simultaneously and spotting the relation holding among them. We show that our proposed model outperforms the baseline by a large extent (9.70% on spatial questions and 6.27% overall). By analyzing LXMERT errors and its attention mechanisms, we find that our classification helps to gain a better understanding of the skills required to answer different spatial questions.</abstract>
       <url hash="7075bf3b">2020.splu-1.4</url>
diff --git a/data/xml/2020.spnlp.xml b/data/xml/2020.spnlp.xml
index 1b53be87a7..d48b467330 100644
--- a/data/xml/2020.spnlp.xml
+++ b/data/xml/2020.spnlp.xml
@@ -7,7 +7,7 @@
       <editor><first>Zornitsa</first><last>Kozareva</last></editor>
       <editor><first>Julia</first><last>Kreutzer</last></editor>
       <editor><first>Gerasimos</first><last>Lampouras</last></editor>
-      <editor><first>André</first><last>Martins</last></editor>
+      <editor id="andre-f-t-martins"><first>André</first><last>Martins</last></editor>
       <editor><first>Sujith</first><last>Ravi</last></editor>
       <editor><first>Andreas</first><last>Vlachos</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -23,8 +23,8 @@
     <paper id="1">
       <title>Syntax-driven Iterative Expansion Language Models for Controllable Text Generation</title>
       <author><first>Noe</first><last>Casas</last></author>
-      <author><first>José A. R.</first><last>Fonollosa</last></author>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="jose-a-r-fonollosa"><first>José A. R.</first><last>Fonollosa</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
       <pages>1–10</pages>
       <abstract>The dominant language modeling paradigm handles text as a sequence of discrete tokens. While that approach can capture the latent structure of the text, it is inherently constrained to sequential dynamics for text generation. We propose a new paradigm for introducing a syntactic inductive bias into neural text generation, where the dependency parse tree is used to drive the Transformer model to generate sentences iteratively. Our experiments show that this paradigm is effective at text generation, with quality between LSTMs and Transformers, and comparable diversity, requiring less than half their decoding steps, and its generation process allows direct control over the syntactic constructions of the generated text, enabling the induction of stylistic variations.</abstract>
       <url hash="3619ffe4">2020.spnlp-1.1</url>
@@ -50,7 +50,7 @@
     </paper>
     <paper id="3">
       <title>Generating Synthetic Data for Task-Oriented Semantic Parsing with Hierarchical Representations</title>
-      <author><first>Ke</first><last>Tran</last></author>
+      <author id="ke-m-tran"><first>Ke</first><last>Tran</last></author>
       <author><first>Ming</first><last>Tan</last></author>
       <pages>17–21</pages>
       <abstract>Modern conversational AI systems support natural language understanding for a wide variety of capabilities. While a majority of these tasks can be accomplished using a simple and flat representation of intents and slots, more sophisticated capabilities require complex hierarchical representations supported by semantic parsing. State-of-the-art semantic parsers are trained using supervised learning with data labeled according to a hierarchical schema which might be costly to obtain or not readily available for a new domain. In this work, we explore the possibility of generating synthetic data for neural semantic parsing using a pretrained denoising sequence-to-sequence model (i.e., BART). Specifically, we first extract masked templates from the existing labeled utterances, and then fine-tune BART to generate synthetic utterances conditioning on the extracted templates. Finally, we use an auxiliary parser (AP) to filter the generated utterances. The AP guarantees the quality of the generated data. We show the potential of our approach when evaluating on the Facebook TOP dataset for navigation domain.</abstract>
@@ -62,7 +62,7 @@
     <paper id="4">
       <title>Structured Prediction for Joint Class Cardinality and Entity Property Inference in Model-Complete Text Comprehension</title>
       <author><first>Hendrik</first><last>ter Horst</last></author>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <pages>22–32</pages>
       <abstract>Model-complete text comprehension aims at interpreting a natural language text with respect to a semantic domain model describing the classes and their properties relevant for the domain in question. Solving this task can be approached as a structured prediction problem, consisting in inferring the most probable instance of the semantic model given the text. In this work, we focus on the challenging sub-problem of cardinality prediction that consists in predicting the number of distinct individuals of each class in the semantic model. We show that cardinality prediction can successfully be approached by modeling the overall task as a joint inference problem, predicting the number of individuals of certain classes while at the same time extracting their properties. We approach this task with probabilistic graphical models computing the maximum-a-posteriori instance of the semantic model. Our main contribution lies on the empirical investigation and analysis of different approximative inference strategies based on Gibbs sampling. We present and evaluate our models on the task of extracting key parameters from scientific full text articles describing pre-clinical studies in the domain of spinal cord injury.</abstract>
       <url hash="e3101be1">2020.spnlp-1.4</url>
@@ -74,7 +74,7 @@
       <title>Energy-based Neural Modelling for Large-Scale Multiple Domain Dialogue State Tracking</title>
       <author><first>Anh Duong</first><last>Trinh</last></author>
       <author><first>Robert J.</first><last>Ross</last></author>
-      <author><first>John D.</first><last>Kelleher</last></author>
+      <author id="john-kelleher"><first>John D.</first><last>Kelleher</last></author>
       <pages>33–42</pages>
       <abstract>Scaling up dialogue state tracking to multiple domains is challenging due to the growth in the number of variables being tracked. Furthermore, dialog state tracking models do not yet explicitly make use of relationships between dialogue variables, such as slots across domains. We propose using energy-based structure prediction methods for large-scale dialogue state tracking task in two multiple domain dialogue datasets. Our results indicate that: (i) modelling variable dependencies yields better results; and (ii) the structured prediction output aligns with the dialogue slot-value constraint principles. This leads to promising directions to improve state-of-the-art models by incorporating variable dependencies into their prediction process.</abstract>
       <url hash="a82a7410">2020.spnlp-1.5</url>
diff --git a/data/xml/2020.starsem.xml b/data/xml/2020.starsem.xml
index 09226e959c..d8962f8c34 100644
--- a/data/xml/2020.starsem.xml
+++ b/data/xml/2020.starsem.xml
@@ -19,7 +19,7 @@
     <paper id="1">
       <title>Improving Medical <fixed-case>NLI</fixed-case> Using Context-Aware Domain Knowledge</title>
       <author><first>Shaika</first><last>Chowdhury</last></author>
-      <author><first>Philip</first><last>Yu</last></author>
+      <author id="philip-s-yu"><first>Philip</first><last>Yu</last></author>
       <author><first>Yuan</first><last>Luo</last></author>
       <pages>1–11</pages>
       <abstract>Domain knowledge is important to understand both the lexical and relational associations of words in natural language text, especially for domain-specific tasks like Natural Language Inference (NLI) in the medical domain, where due to the lack of a large annotated dataset such knowledge cannot be implicitly learned during training. However, because of the linguistic idiosyncrasies of clinical texts (e.g., shorthand jargon), solely relying on domain knowledge from an external knowledge base (e.g., UMLS) can lead to wrong inference predictions as it disregards contextual information and, hence, does not return the most relevant mapping. To remedy this, we devise a knowledge adaptive approach for medical NLI that encodes the premise/hypothesis texts by leveraging supplementary external knowledge, alongside the UMLS, based on the word contexts. By incorporating refined domain knowledge at both the lexical and relational levels through a multi-source attention mechanism, it is able to align the token-level interactions between the premise and hypothesis more effectively. Comprehensive experiments and case study on the recently released MedNLI dataset are conducted to validate the effectiveness of the proposed approach.</abstract>
@@ -51,7 +51,7 @@
       <title>Automatic Learning of Modality Exclusivity Norms with Crosslingual Word Embeddings</title>
       <author><first>Emmanuele</first><last>Chersoni</last></author>
       <author><first>Rong</first><last>Xiang</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <pages>32–38</pages>
       <abstract>Collecting modality exclusivity norms for lexical items has recently become a common practice in psycholinguistics and cognitive research. However, these norms are available only for a relatively small number of languages and often involve a costly and time-consuming collection of ratings. In this work, we aim at learning a mapping between word embeddings and modality norms. Our experiments focused on crosslingual word embeddings, in order to predict modality association scores by training on a high-resource language and testing on a low-resource one. We ran two experiments, one in a monolingual and the other one in a crosslingual setting. Results show that modality prediction using off-the-shelf crosslingual embeddings indeed has moderate-to-high correlations with human ratings even when regression algorithms are trained on an English resource and tested on a completely unseen language.</abstract>
@@ -79,7 +79,7 @@
     </paper>
     <paper id="7">
       <title>Token Sequence Labeling vs. Clause Classification for <fixed-case>E</fixed-case>nglish Emotion Stimulus Detection</title>
-      <author><first>Laura Ana Maria</first><last>Oberländer</last></author>
+      <author id="laura-ana-maria-oberlander"><first>Laura Ana Maria</first><last>Oberländer</last></author>
       <author><first>Roman</first><last>Klinger</last></author>
       <pages>58–70</pages>
       <abstract>Emotion stimulus detection is the task of finding the cause of an emotion in a textual description, similar to target or aspect detection for sentiment analysis. Previous work approached this in three ways, namely (1) as text classification into an inventory of predefined possible stimuli (“Is the stimulus category A or B?”), (2) as sequence labeling of tokens (“Which tokens describe the stimulus?”), and (3) as clause classification (“Does this clause contain the emotion stimulus?”). So far, setting (3) has been evaluated broadly on Mandarin and (2) on English, but no comparison has been performed. Therefore, we analyze whether clause classification or token sequence labeling is better suited for emotion stimulus detection in English. We propose an integrated framework which enables us to evaluate the two different approaches comparably, implement models inspired by state-of-the-art approaches in Mandarin, and test them on four English data sets from different domains. Our results show that token sequence labeling is superior on three out of four datasets, in both clause-based and token sequence-based evaluation. The only case in which clause classification performs better is one data set with a high density of clause annotations. Our error analysis further confirms quantitatively and qualitatively that clauses are not the appropriate stimulus unit in English.</abstract>
@@ -112,10 +112,10 @@
     <paper id="10">
       <title>On the Systematicity of Probing Contextualized Word Representations: The Case of Hypernymy in <fixed-case>BERT</fixed-case></title>
       <author><first>Abhilasha</first><last>Ravichander</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <author><first>Kaheer</first><last>Suleman</last></author>
       <author><first>Adam</first><last>Trischler</last></author>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <pages>88–102</pages>
       <abstract>Contextualized word representations have become a driving force in NLP, motivating widespread interest in understanding their capabilities and the mechanisms by which they operate. Particularly intriguing is their ability to identify and encode conceptual abstractions. Past work has probed BERT representations for this competence, finding that BERT can correctly retrieve noun hypernyms in cloze tasks. In this work, we ask the question: <i>do probing studies shed light on systematic knowledge in BERT representations?</i> As a case study, we examine hypernymy knowledge encoded in BERT representations. In particular, we demonstrate through a simple consistency probe that the ability to correctly retrieve hypernyms in cloze tasks, as used in prior work, does not correspond to systematic knowledge in BERT. Our main conclusion is cautionary: even if BERT demonstrates high probing accuracy for a particular competence, it does not necessarily follow that BERT ‘understands’ a concept, and it cannot be expected to systematically generalize across applicable contexts.</abstract>
       <url hash="d5175d88">2020.starsem-1.10</url>
@@ -124,7 +124,7 @@
     <paper id="11">
       <title>Topology of Word Embeddings: Singularities Reflect Polysemy</title>
       <author><first>Alexander</first><last>Jakubowski</last></author>
-      <author><first>Milica</first><last>Gasic</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gasic</last></author>
       <author><first>Marcus</first><last>Zibrowius</last></author>
       <pages>103–113</pages>
       <abstract>The manifold hypothesis suggests that word vectors live on a submanifold within their ambient vector space. We argue that we should, more accurately, expect them to live on a <i>pinched</i> manifold: a singular quotient of a manifold obtained by identifying some of its points. The identified, singular points correspond to polysemous words, i.e. words with multiple meanings. Our point of view suggests that monosemous and polysemous words can be distinguished based on the topology of their neighbourhoods. We present two kinds of empirical evidence to support this point of view: (1) We introduce a topological measure of polysemy based on persistent homology that correlates well with the actual number of meanings of a word. (2) We propose a simple, topologically motivated solution to the SemEval-2010 task on <i>Word Sense Induction &amp; Disambiguation</i> that produces competitive results.</abstract>
@@ -134,7 +134,7 @@
     <paper id="12">
       <title>Assessing Polyseme Sense Similarity through Co-predication Acceptability and Contextualised Embedding Distance</title>
       <author><first>Janosch</first><last>Haber</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>114–124</pages>
       <abstract>Co-predication is one of the most frequently used linguistic tests to tell apart shifts in polysemic sense from changes in homonymic meaning. It is increasingly coming under criticism as evidence is accumulating that it tends to mis-classify specific cases of polysemic sense alteration as homonymy. In this paper, we collect empirical data to investigate these accusations. We asses how co-predication acceptability relates to explicit ratings of polyseme word sense similarity, and how well either measure can be predicted through the distance between target words’ contextualised word embeddings. We find that sense similarity appears to be a major contributor in determining co-predication acceptability, but that co-predication judgements tend to rate especially less similar sense interpretations equally as unacceptable as homonym pairs, effectively mis-classifying these instances. The tested contextualised word embeddings fail to predict word sense similarity consistently, but the similarities between BERT embeddings show a significant correlation with co-predication ratings. We take this finding as evidence that BERT embeddings might be better representations of context than encodings of word meaning.</abstract>
       <url hash="a4474555">2020.starsem-1.12</url>
@@ -162,7 +162,7 @@
     <paper id="15">
       <title>Learning Negation Scope from Syntactic Structure</title>
       <author><first>Nick</first><last>McKenna</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>137–142</pages>
       <abstract>We present a semi-supervised model which learns the semantics of negation purely through analysis of syntactic structure. Linguistic theory posits that the semantics of negation can be understood purely syntactically, though recent research relies on combining a variety of features including part-of-speech tags, word embeddings, and semantic representations to achieve high task performance. Our simplified model returns to syntactic theory and achieves state-of-the-art performance on the task of Negation Scope Detection while demonstrating the tight relationship between the syntax and semantics of negation.</abstract>
       <url hash="34a7b18c">2020.starsem-1.15</url>
@@ -183,7 +183,7 @@
       <author><first>Kazuma</first><last>Hashimoto</last></author>
       <author><first>Chien-Sheng</first><last>Wu</last></author>
       <author><first>Yao</first><last>Wang</last></author>
-      <author><first>Philip</first><last>Yu</last></author>
+      <author id="philip-s-yu"><first>Philip</first><last>Yu</last></author>
       <author><first>Richard</first><last>Socher</last></author>
       <author><first>Caiming</first><last>Xiong</last></author>
       <pages>154–167</pages>
diff --git a/data/xml/2020.stoc.xml b/data/xml/2020.stoc.xml
index 707251f30e..e7e18e6833 100644
--- a/data/xml/2020.stoc.xml
+++ b/data/xml/2020.stoc.xml
@@ -34,8 +34,8 @@
       <author><first>Sashank</first><last>Santhanam</last></author>
       <author><first>Samira</first><last>Shaikh</last></author>
       <author><first>Alan</first><last>Zemel</last></author>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
-      <author><first>Bonnie J.</first><last>Dorr</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="bonnie-dorr"><first>Bonnie J.</first><last>Dorr</last></author>
       <pages>1–8</pages>
       <abstract>We describe a system that supports natural language processing (NLP) components for active defenses against social engineering attacks. We deploy a pipeline of human language technology, including Ask and Framing Detection, Named Entity Recognition, Dialogue Engineering, and Stylometry. The system processes modern message formats through a plug-in architecture to accommodate innovative approaches for message analysis, knowledge representation and dialogue generation. The novelty of the system is that it uses NLP for cyber defense and engages the attacker using bots to elicit evidence to attribute to the attacker and to waste the attacker’s time and resources.</abstract>
       <url hash="e4cc603f">2020.stoc-1.1</url>
@@ -50,8 +50,8 @@
       <author><first>Sashank</first><last>Santhanam</last></author>
       <author><first>Samira</first><last>Shaikh</last></author>
       <author><first>Alan</first><last>Zemel</last></author>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
-      <author><first>Bonnie J.</first><last>Dorr</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="bonnie-dorr"><first>Bonnie J.</first><last>Dorr</last></author>
       <pages>9–14</pages>
       <abstract>We present a paradigm for extensible lexicon development based on Lexical Conceptual Structure to support social engineering detection and response generation. We leverage the central notions of ask (elicitation of behaviors such as providing access to money) and framing (risk/reward implied by the ask). We demonstrate improvements in ask/framing detection through refinements to our lexical organization and show that response generation qualitatively improves as ask/framing detection performance improves. The paradigm presents a systematic and efficient approach to resource adaptation for improved task-specific performance.</abstract>
       <url hash="f4da60fe">2020.stoc-1.2</url>
@@ -126,7 +126,7 @@
       <author><first>Peng</first><last>Liu</last></author>
       <author><first>Kartik-Singh</first><last>Thakur</last></author>
       <author><first>Adam</first><last>Dalton</last></author>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
       <pages>48–55</pages>
       <abstract>This paper describes different approaches to detect malicious content in email interactions through a combination of machine learning and natural language processing tools. Specifically, several neural network designs are tested on word embedding representations to detect suspicious messages and separate them from non-suspicious, benign email. The proposed approaches are trained and tested on distinct email collections, including datasets constructed from publicly available corpora (such as Enron, APWG, etc.) as well as several smaller, non-public datasets used in recent government evaluations. Experimental results show that back-propagation both with and without recurrent neural layers outperforms current state of the art techniques that include supervised learning algorithms with stylometric elements of texts as features. Our results also demonstrate that word embedding vectors are effective means for capturing certain aspects of text meaning that can be teased out through machine learning in non-linear/complex neural networks, in order to obtain highly accurate detection of malicious emails based on email text alone.</abstract>
       <url hash="13c4b37f">2020.stoc-1.8</url>
diff --git a/data/xml/2020.sustainlp.xml b/data/xml/2020.sustainlp.xml
index dc9dce91a2..302aad1435 100644
--- a/data/xml/2020.sustainlp.xml
+++ b/data/xml/2020.sustainlp.xml
@@ -3,11 +3,11 @@
   <volume id="1" ingest-date="2020-11-06" type="proceedings">
     <meta>
       <booktitle>Proceedings of SustaiNLP: Workshop on Simple and Efficient Natural Language Processing</booktitle>
-      <editor><first>Nafise Sadat</first><last>Moosavi</last></editor>
+      <editor id="nafise-sadat-moosavi"><first>Nafise Sadat</first><last>Moosavi</last></editor>
       <editor><first>Angela</first><last>Fan</last></editor>
       <editor><first>Vered</first><last>Shwartz</last></editor>
       <editor><first>Goran</first><last>Glavaš</last></editor>
-      <editor><first>Shafiq</first><last>Joty</last></editor>
+      <editor id="shafiq-joty"><first>Shafiq</first><last>Joty</last></editor>
       <editor><first>Alex</first><last>Wang</last></editor>
       <editor><first>Thomas</first><last>Wolf</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -202,7 +202,7 @@
       <author><first>Parul</first><last>Awasthy</last></author>
       <author><first>Bishwaranjan</first><last>Bhattacharjee</last></author>
       <author><first>John</first><last>Kender</last></author>
-      <author><first>Radu</first><last>Florian</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
       <pages>113–118</pages>
       <abstract>Transfer learning is a popular technique to learn a task using less training data and fewer compute resources. However, selecting the correct source model for transfer learning is a challenging task. We demonstrate a novel predictive method that determines which existing source model would minimize error for transfer learning to a given target. This technique does not require learning for prediction, and avoids computational costs of trail-and-error. We have evaluated this technique on nine datasets across diverse domains, including newswire, user forums, air flight booking, cybersecurity news, etc. We show that it per-forms better than existing techniques such as fine-tuning over vanilla BERT, or curriculum learning over the largest dataset on top of BERT, resulting in average F1 score gains in excess of 3%. Moreover, our technique consistently selects the best model using fewer tries.</abstract>
       <url hash="7d051c86">2020.sustainlp-1.15</url>
@@ -262,7 +262,7 @@
     <paper id="20">
       <title><fixed-case>F</fixed-case>ast<fixed-case>F</fixed-case>ormers: Highly Efficient Transformer Models for Natural Language Understanding</title>
       <author><first>Young Jin</first><last>Kim</last></author>
-      <author><first>Hany</first><last>Hassan</last></author>
+      <author id="hany-hassan-awadalla"><first>Hany</first><last>Hassan</last></author>
       <pages>149–158</pages>
       <abstract>Transformer-based models are the state-of-the-art for Natural Language Understanding (NLU) applications. Models are getting bigger and better on various tasks. However, Transformer models remain computationally challenging since they are not efficient at inference-time compared to traditional approaches. In this paper, we present FastFormers, a set of recipes to achieve efficient inference-time performance for Transformer-based models on various NLU tasks. We show how carefully utilizing knowledge distillation, structured pruning and numerical optimization can lead to drastic improvements on inference efficiency. We provide effective recipes that can guide practitioners to choose the best settings for various NLU tasks and pretrained models. Applying the proposed recipes to the SuperGLUE benchmark, we achieve from 9.8x up to 233.9x speed-up compared to out-of-the-box models on CPU. On GPU, we also achieve up to 12.4x speed-up with the presented methods. We show that FastFormers can drastically reduce cost of serving 100 million requests from 4,223 USD to just 18 USD on an Azure F16s_v2 instance. This translates to a sustainable runtime by reducing energy consumption 6.9x - 125.8x according to the metrics used in the SustaiNLP 2020 shared task.</abstract>
       <url hash="958ba189">2020.sustainlp-1.20</url>
diff --git a/data/xml/2020.tacl.xml b/data/xml/2020.tacl.xml
index efacc42bac..cdd3bba24f 100644
--- a/data/xml/2020.tacl.xml
+++ b/data/xml/2020.tacl.xml
@@ -62,8 +62,8 @@
       <author><first>Mandar</first><last>Joshi</last></author>
       <author><first>Danqi</first><last>Chen</last></author>
       <author><first>Yinhan</first><last>Liu</last></author>
-      <author><first>Daniel S.</first><last>Weld</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="daniel-s-weld"><first>Daniel S.</first><last>Weld</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <author><first>Omer</first><last>Levy</last></author>
       <doi>10.1162/tacl_a_00300</doi>
       <abstract>We present SpanBERT, a pre-training method that is designed to better represent and predict spans of text. Our approach extends BERT by (1) masking contiguous random spans, rather than random tokens, and (2) training the span boundary representations to predict the entire content of the masked span, without relying on the individual token representations within it. SpanBERT consistently outperforms BERT and our better-tuned baselines, with substantial gains on span selection tasks such as question answering and coreference resolution. In particular, with the same training data and model size as BERTlarge, our single model obtains 94.6% and 88.7% F1 on SQuAD 1.1 and 2.0 respectively. We also achieve a new state of the art on the OntoNotes coreference resolution task (79.6% F1), strong performance on the TACRED relation extraction benchmark, and even gains on GLUE.1</abstract>
@@ -75,7 +75,7 @@
       <title>A Graph-based Model for Joint <fixed-case>C</fixed-case>hinese Word Segmentation and Dependency Parsing</title>
       <author><first>Hang</first><last>Yan</last></author>
       <author><first>Xipeng</first><last>Qiu</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <doi>10.1162/tacl_a_00301</doi>
       <abstract>Chinese word segmentation and dependency parsing are two fundamental tasks for Chinese natural language processing. The dependency parsing is defined at the word-level. Therefore word segmentation is the precondition of dependency parsing, which makes dependency parsing suffer from error propagation and unable to directly make use of character-level pre-trained language models (such as BERT). In this paper, we propose a graph-based model to integrate Chinese word segmentation and dependency parsing. Different from previous transition-based joint models, our proposed model is more concise, which results in fewer efforts of feature engineering. Our graph-based joint model achieves better performance than previous joint models and state-of-the-art results in both Chinese word segmentation and dependency parsing. Additionally, when BERT is combined, our model can substantially reduce the performance gap of dependency parsing between joint models and gold-segmented word-based models. Our code is publicly available at <url>https://github.com/fastnlp/JointCwsParser</url></abstract>
       <pages>78–92</pages>
@@ -100,7 +100,7 @@
       <author><first>Shuyan</first><last>Zhou</last></author>
       <author><first>Shruti</first><last>Rijhwani</last></author>
       <author><first>John</first><last>Wieting</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
       <doi>10.1162/tacl_a_00303</doi>
       <abstract>Cross-lingual entity linking (XEL) is the task of finding referents in a target-language knowledge base (KB) for mentions extracted from source-language texts. The first step of (X)EL is candidate generation, which retrieves a list of plausible candidate entities from the target-language KB for each mention. Approaches based on resources from Wikipedia have proven successful in the realm of relatively high-resource languages, but these do not extend well to low-resource languages with few, if any, Wikipedia pages. Recently, transfer learning methods have been shown to reduce the demand for resources in the low-resource languages by utilizing resources in closely related languages, but the performance still lags far behind their high-resource counterparts. In this paper, we first assess the problems faced by current entity candidate generation methods for low-resource XEL, then propose three improvements that (1) reduce the disconnect between entity mentions and KB entries, and (2) improve the robustness of the model to low-resource scenarios. The methods are simple, but effective: We experiment with our approach on seven XEL datasets and find that they yield an average gain of 16.9% in Top-30 gold candidate recall, compared with state-of-the-art baselines. Our improved model also yields an average gain of 7.9% in in-KB accuracy of end-to-end XEL.1</abstract>
@@ -110,7 +110,7 @@
     </paper>
     <paper id="9">
       <title>Does Syntax Need to Grow on Trees? Sources of Hierarchical Inductive Bias in Sequence-to-Sequence Networks</title>
-      <author><first>R. Thomas</first><last>McCoy</last></author>
+      <author id="r-thomas-mccoy"><first>R. Thomas</first><last>McCoy</last></author>
       <author><first>Robert</first><last>Frank</last></author>
       <author><first>Tal</first><last>Linzen</last></author>
       <doi>10.1162/tacl_a_00304</doi>
@@ -124,7 +124,7 @@
       <author><first>Kai</first><last>Sun</last></author>
       <author><first>Dian</first><last>Yu</last></author>
       <author><first>Dong</first><last>Yu</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <doi>10.1162/tacl_a_00305</doi>
       <abstract>Machine reading comprehension tasks require a machine reader to answer questions relevant to the given document. In this paper, we present the first free-form multiple-Choice Chinese machine reading Comprehension dataset (C3), containing 13,369 documents (dialogues or more formally written mixed-genre texts) and their associated 19,577 multiple-choice free-form questions collected from Chinese-as-a-second-language examinations. We present a comprehensive analysis of the prior knowledge (i.e., linguistic, domain-specific, and general world knowledge) needed for these real-world problems. We implement rule-based and popular neural methods and find that there is still a significant performance gap between the best performing model (68.5%) and human readers (96.0%), especiallyon problems that require prior knowledge. We further study the effects of distractor plausibility and data augmentation based on translated relevant datasets for English on model performance. We expect C3 to present great challenges to existing systems as answering 86.8% of questions requires both knowledge within and beyond the accompanying document, and we hope that C3 can serve as a platform to study how to leverage various kinds of prior knowledge to better understand a given written or orally oriented text. C3 is available at <url>https://dataset.org/c3/</url>.</abstract>
       <pages>141–155</pages>
@@ -174,7 +174,7 @@
       <author><first>Sarah Ita</first><last>Levitan</last></author>
       <author><first>Michelle</first><last>Levine</last></author>
       <author><first>Marko</first><last>Mandic</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
       <doi>10.1162/tacl_a_00311</doi>
       <abstract>Humans rarely perform better than chance at lie detection. To better understand human perception of deception, we created a game framework, LieCatcher, to collect ratings of perceived deception using a large corpus of deceptive and truthful interviews. We analyzed the acoustic-prosodic and linguistic characteristics of language trusted and mistrusted by raters and compared these to characteristics of actual truthful and deceptive language to understand how perception aligns with reality. With this data we built classifiers to automatically distinguish trusted from mistrusted speech, achieving an F1 of 66.1%. We next evaluated whether the strategies raters said they used to discriminate between truthful and deceptive responses were in fact useful. Our results show that, although several prosodic and lexical features were consistently perceived as trustworthy, they were not reliable cues. Also, the strategies that judges reported using in deception detection were not helpful for the task. Our work sheds light on the nature of trusted language and provides insight into the challenging problem of human deception detection.</abstract>
       <pages>199–214</pages>
@@ -255,7 +255,7 @@
       <title>Learning Lexical Subspaces in a Distributional Vector Space</title>
       <author><first>Kushal</first><last>Arora</last></author>
       <author><first>Aishik</first><last>Chakraborty</last></author>
-      <author><first>Jackie C. K.</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie C. K.</first><last>Cheung</last></author>
       <doi>10.1162/tacl_a_00316</doi>
       <abstract>In this paper, we propose LexSub, a novel approach towards unifying lexical and distributional semantics. We inject knowledge about lexical-semantic relations into distributional word embeddings by defining subspaces of the distributional vector space in which a lexical relation should hold. Our framework can handle symmetric attract and repel relations (e.g., synonymy and antonymy, respectively), as well as asymmetric relations (e.g., hypernymy and meronomy). In a suite of intrinsic benchmarks, we show that our model outperforms previous approaches on relatedness tasks and on hypernymy classification and detection, while being competitive on word similarity tasks. It also outperforms previous systems on extrinsic classification tasks that benefit from exploiting lexical relational cues. We perform a series of analyses to understand the behaviors of our model.1Code available at <url>https://github.com/aishikchakraborty/LexSub</url>.</abstract>
       <pages>311–329</pages>
@@ -267,7 +267,7 @@
       <author><first>Ashutosh</first><last>Kumar</last></author>
       <author><first>Kabir</first><last>Ahuja</last></author>
       <author><first>Raghuram</first><last>Vadapalli</last></author>
-      <author><first>Partha</first><last>Talukdar</last></author>
+      <author id="partha-talukdar"><first>Partha</first><last>Talukdar</last></author>
       <doi>10.1162/tacl_a_00318</doi>
       <abstract>Given a sentence (e.g., “I like mangoes”) and a constraint (e.g., sentiment flip), the goal of controlled text generation is to produce a sentence that adapts the input sentence to meet the requirements of the constraint (e.g., “I hate mangoes”). Going beyond such simple constraints, recent work has started exploring the incorporation of complex syntactic-guidance as constraints in the task of controlled paraphrase generation. In these methods, syntactic-guidance is sourced from a separate exemplar sentence. However, this prior work has only utilized limited syntactic information available in the parse tree of the exemplar sentence. We address this limitation in the paper and propose Syntax Guided Controlled Paraphraser (SGCP), an end-to-end framework for syntactic paraphrase generation. We find that Sgcp can generate syntax-conforming sentences while not compromising on relevance. We perform extensive automated and human evaluations over multiple real-world English language datasets to demonstrate the efficacy of Sgcp over state-of-the-art baselines. To drive future research, we have made Sgcp’s source code available.1</abstract>
       <pages>329–345</pages>
@@ -281,8 +281,8 @@
       <author><first>Wojciech</first><last>Stokowiec</last></author>
       <author><first>Wang</first><last>Ling</last></author>
       <author><first>Lingpeng</first><last>Kong</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <doi>10.1162/tacl_a_00319</doi>
       <abstract>We show that Bayes’ rule provides an effective mechanism for creating document translation models that can be learned from only parallel sentences and monolingual documents a compelling benefit because parallel documents are not always available. In our formulation, the posterior probability of a candidate translation is the product of the unconditional (prior) probability of the candidate output document and the “reverse translation probability” of translating the candidate output back into the source language. Our proposed model uses a powerful autoregressive language model as the prior on target language documents, but it assumes that each sentence is translated independently from the target to the source language. Crucially, at test time, when a source document is observed, the document language model prior induces dependencies between the translations of the source sentences in the posterior. The model’s independence assumption not only enables efficient use of available data, but it additionally admits a practical left-to-right beam-search algorithm for carrying out inference. Experiments show that our model benefits from using cross-sentence context in the language model, and it outperforms existing document translation approaches.</abstract>
       <pages>346–360</pages>
@@ -307,7 +307,7 @@
       <author><first>Anhad</first><last>Mohananey</last></author>
       <author><first>Wei</first><last>Peng</last></author>
       <author><first>Sheng-Fu</first><last>Wang</last></author>
-      <author><first>Samuel R.</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel R.</first><last>Bowman</last></author>
       <doi>10.1162/tacl_a_00321</doi>
       <abstract>We introduce The Benchmark of Linguistic Minimal Pairs (BLiMP),1 a challenge set for evaluating the linguistic knowledge of language models (LMs) on major grammatical phenomena in English. BLiMP consists of 67 individual datasets, each containing 1,000 minimal pairs—that is, pairs of minimally different sentences that contrast in grammatical acceptability and isolate specific phenomenon in syntax, morphology, or semantics. We generate the data according to linguist-crafted grammar templates, and human aggregate agreement with the labels is 96.4%. We evaluate n-gram, LSTM, and Transformer (GPT-2 and Transformer-XL) LMs by observing whether they assign a higher probability to the acceptable sentence in each minimal pair. We find that state-of-the-art models identify morphological contrasts related to agreement reliably, but they struggle with some subtle semantic and syntactic phenomena, such as negative polarity items and extraction islands.</abstract>
       <pages>377–392</pages>
@@ -337,7 +337,7 @@
     <paper id="28">
       <title>How Can We Know What Language Models Know?</title>
       <author><first>Zhengbao</first><last>Jiang</last></author>
-      <author><first>Frank F.</first><last>Xu</last></author>
+      <author id="frank-f-xu"><first>Frank F.</first><last>Xu</last></author>
       <author><first>Jun</first><last>Araki</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
       <doi>10.1162/tacl_a_00324</doi>
@@ -350,7 +350,7 @@
       <title>Topic Modeling in Embedding Spaces</title>
       <author><first>Adji B.</first><last>Dieng</last></author>
       <author><first>Francisco J. R.</first><last>Ruiz</last></author>
-      <author><first>David M.</first><last>Blei</last></author>
+      <author id="david-blei"><first>David M.</first><last>Blei</last></author>
       <doi>10.1162/tacl_a_00325</doi>
       <abstract>Topic modeling analyzes documents to learn meaningful patterns of words. However, existing topic models fail to learn interpretable topics when working with large and heavy-tailed vocabularies. To this end, we develop the embedded topic model (etm), a generative model of documents that marries traditional topic models with word embeddings. More specifically, the etm models each word with a categorical distribution whose natural parameter is the inner product between the word’s embedding and an embedding of its assigned topic. To fit the etm, we develop an efficient amortized variational inference algorithm. The etm discovers interpretable topics even with large vocabularies that include rare words and stop words. It outperforms existing document models, such as latent Dirichlet allocation, in terms of both topic quality and predictive performance.</abstract>
       <pages>439–453</pages>
@@ -359,9 +359,9 @@
     </paper>
     <paper id="30">
       <title><fixed-case>T</fixed-case>y<fixed-case>D</fixed-case>i <fixed-case>QA</fixed-case>: A Benchmark for Information-Seeking Question Answering in Typologically Diverse Languages</title>
-      <author><first>Jonathan H.</first><last>Clark</last></author>
+      <author id="jonathan-h-clark"><first>Jonathan H.</first><last>Clark</last></author>
       <author><first>Eunsol</first><last>Choi</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <author><first>Dan</first><last>Garrette</last></author>
       <author><first>Tom</first><last>Kwiatkowski</last></author>
       <author><first>Vitaly</first><last>Nikolaev</last></author>
@@ -422,8 +422,8 @@
       <author><first>Marina</first><last>Fomicheva</last></author>
       <author><first>Shuo</first><last>Sun</last></author>
       <author><first>Lisa</first><last>Yankovskaya</last></author>
-      <author><first>Frédéric</first><last>Blain</last></author>
-      <author><first>Francisco</first><last>Guzmán</last></author>
+      <author id="frederic-blain"><first>Frédéric</first><last>Blain</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzmán</last></author>
       <author><first>Mark</first><last>Fishel</last></author>
       <author><first>Nikolaos</first><last>Aletras</last></author>
       <author><first>Vishrav</first><last>Chaudhary</last></author>
@@ -439,13 +439,13 @@
       <author><first>Jacob</first><last>Andreas</last></author>
       <author><first>John</first><last>Bufe</last></author>
       <author><first>David</first><last>Burkett</last></author>
-      <author><first>Charles</first><last>Chen</last></author>
+      <author id="charles-chen-jr"><first>Charles</first><last>Chen</last></author>
       <author><first>Josh</first><last>Clausman</last></author>
       <author><first>Jean</first><last>Crawford</last></author>
       <author><first>Kate</first><last>Crim</last></author>
       <author><first>Jordan</first><last>DeLoach</last></author>
       <author><first>Leah</first><last>Dorner</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <author><first>Hao</first><last>Fang</last></author>
       <author><first>Alan</first><last>Guo</last></author>
       <author><first>David</first><last>Hall</last></author>
@@ -512,7 +512,7 @@
     <paper id="39">
       <title>Nested Named Entity Recognition via Second-best Sequence Learning and Decoding</title>
       <author><first>Takashi</first><last>Shibuya</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <doi>10.1162/tacl_a_00334</doi>
       <abstract>When an entity name contains other names within it, the identification of all combinations of names can become difficult and expensive. We propose a new method to recognize not only outermost named entities but also inner nested ones. We design an objective function for training a neural model that treats the tag sequence for nested entities as the second best path within the span of their parent entity. In addition, we provide the decoding method for inference that extracts entities iteratively from outermost ones to inner ones in an outside-to-inside way. Our method has no additional hyperparameters to the conditional random field based model widely used for flat named entity recognition tasks. Experiments demonstrate that our method performs better than or at least as well as existing methods capable of handling nested entities, achieving F1-scores of 85.82%, 84.34%, and 77.36% on ACE-2004, ACE-2005, and GENIA datasets, respectively.</abstract>
       <pages>605–620</pages>
@@ -612,7 +612,7 @@
       <author><first>Sergey</first><last>Edunov</last></author>
       <author><first>Marjan</first><last>Ghazvininejad</last></author>
       <author><first>Mike</first><last>Lewis</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <doi>10.1162/tacl_a_00343</doi>
       <abstract>This paper demonstrates that multilingual denoising pre-training produces significant performance gains across a wide variety of machine translation (MT) tasks. We present mBART—a sequence-to-sequence denoising auto-encoder pre-trained on large-scale monolingual corpora in many languages using the BART objective (Lewis et al., 2019). mBART is the first method for pre-training a complete sequence-to-sequence model by denoising full texts in multiple languages, whereas previous approaches have focused only on the encoder, decoder, or reconstructing parts of the text. Pre-training a complete model allows it to be directly fine-tuned for supervised (both sentence-level and document-level) and unsupervised machine translation, with no task- specific modifications. We demonstrate that adding mBART initialization produces performance gains in all but the highest-resource settings, including up to 12 BLEU points for low resource MT and over 5 BLEU points for many document-level and unsupervised models. We also show that it enables transfer to language pairs with no bi-text or that were not in the pre-training corpus, and present extensive analysis of which factors contribute the most to effective pre-training.1</abstract>
       <pages>726–742</pages>
@@ -649,8 +649,8 @@
       <author><first>Daniel</first><last>Fried</last></author>
       <author><first>Dani</first><last>Yogatama</last></author>
       <author><first>Laura</first><last>Rimell</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
       <doi>10.1162/tacl_a_00345</doi>
       <abstract>Textual representation learners trained on large amounts of data have achieved notable success on downstream tasks; intriguingly, they have also performed well on challenging tests of syntactic competence. Hence, it remains an open question whether scalable learners like BERT can become fully proficient in the syntax of natural language by virtue of data scale alone, or whether they still benefit from more explicit syntactic biases. To answer this question, we introduce a knowledge distillation strategy for injecting syntactic biases into BERT pretraining, by distilling the syntactically informative predictions of a hierarchical—albeit harder to scale—syntactic language model. Since BERT models masked words in bidirectional context, we propose to distill the approximate marginal distribution over words in context from the syntactic LM. Our approach reduces relative error by 2–21% on a diverse set of structured prediction tasks, although we obtain mixed results on the GLUE benchmark. Our findings demonstrate the benefits of syntactic biases, even for representation learners that exploit large amounts of data, and contribute to a better understanding of where syntactic biases are helpful in benchmarks of natural language understanding.</abstract>
       <pages>776–794</pages>
@@ -673,7 +673,7 @@
       <author><first>Ananya B.</first><last>Sai</last></author>
       <author><first>Akash Kumar</first><last>Mohankumar</last></author>
       <author><first>Siddhartha</first><last>Arora</last></author>
-      <author><first>Mitesh M.</first><last>Khapra</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh M.</first><last>Khapra</last></author>
       <doi>10.1162/tacl_a_00347</doi>
       <abstract>There is an increasing focus on model-based dialog evaluation metrics such as ADEM, RUBER, and the more recent BERT-based metrics. These models aim to assign a high score to all relevant responses and a low score to all irrelevant responses. Ideally, such models should be trained using multiple relevant and irrelevant responses for any given context. However, no such data is publicly available, and hence existing models are usually trained using a single relevant response and multiple randomly selected responses from other contexts (random negatives). To allow for better training and robust evaluation of model-based metrics, we introduce the DailyDialog++ dataset, consisting of (i) five relevant responses for each context and (ii) five adversarially crafted irrelevant responses for each context. Using this dataset, we first show that even in the presence of multiple correct references, n-gram based metrics and embedding based metrics do not perform well at separating relevant responses from even random negatives. While model-based metrics perform better than n-gram and embedding based metrics on random negatives, their performance drops substantially when evaluated on adversarial examples. To check if large scale pretraining could help, we propose a new BERT-based evaluation metric called DEB, which is pretrained on 727M Reddit conversations and then finetuned on our dataset. DEB significantly outperforms existing models, showing better correlation with human judgments and better performance on random negatives (88.27% accuracy). However, its performance again drops substantially when evaluated on adversarial responses, thereby highlighting that even large-scale pretrained evaluation models are not robust to the adversarial examples in our dataset. The dataset1 and code2 are publicly available.</abstract>
       <pages>810–827</pages>
@@ -685,7 +685,7 @@
       <author><first>Phillip</first><last>Keung</last></author>
       <author><first>Julian</first><last>Salazar</last></author>
       <author><first>Yichao</first><last>Lu</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <doi>10.1162/tacl_a_00348</doi>
       <abstract>We describe an unsupervised method to create pseudo-parallel corpora for machine translation (MT) from unaligned text. We use multilingual BERT to create source and target sentence embeddings for nearest-neighbor search and adapt the model via self-training. We validate our technique by extracting parallel sentence pairs on the BUCC 2017 bitext mining task and observe up to a 24.5 point increase (absolute) in F1 scores over previous unsupervised methods. We then improve an XLM-based unsupervised neural MT system pre-trained on Wikipedia by supplementing it with pseudo-parallel text mined from the same corpus, boosting unsupervised translation performance by up to 3.5 BLEU on the WMT’14 French-English and WMT’16 German-English tasks and outperforming the previous state-of-the-art. Finally, we enrich the IWSLT’15 English-Vietnamese corpus with pseudo-parallel Wikipedia sentence pairs, yielding a 1.2 BLEU improvement on the low-resource MT task. We demonstrate that unsupervised bitext mining is an effective way of augmenting MT datasets and complements existing techniques like initializing with pre-trained contextual embeddings.</abstract>
       <pages>828–841</pages>
diff --git a/data/xml/2020.tal.xml b/data/xml/2020.tal.xml
index 6af52f7c5d..dbab8ebb62 100644
--- a/data/xml/2020.tal.xml
+++ b/data/xml/2020.tal.xml
@@ -3,9 +3,9 @@
   <volume id="1" ingest-date="2023-03-16" type="journal">
     <meta>
       <booktitle>Traitement Automatique des Langues, Volume 61, Numéro 1 : Varia [Varia]</booktitle>
-      <editor><first>Cécile</first><last>Fabre</last></editor>
+      <editor id="cecile-fabre"><first>Cécile</first><last>Fabre</last></editor>
       <editor><first>Emmanuel</first><last>Morin</last></editor>
-      <editor><first>Sophie</first><last>Rosset</last></editor>
+      <editor id="sophie-rosset"><first>Sophie</first><last>Rosset</last></editor>
       <editor><first>Pascale</first><last>Sébillot</last></editor>
       <publisher>ATALA (Association pour le Traitement Automatique des Langues)</publisher>
       <address>France</address>
@@ -20,11 +20,11 @@
     </frontmatter>
     <paper id="1">
       <title>Alternances de voisement et processus de lénition et de fortition : une étude automatisée de grands corpus en cinq langues romanes [Voicing alternations in relation with lenition and fortition phenomena: an automated study of large corpora in five <fixed-case>R</fixed-case>omance languages]</title>
-      <author><first>Ioana</first><last>Vasilescu</last></author>
+      <author id="ioana-vasilescu"><first>Ioana</first><last>Vasilescu</last></author>
       <author><first>Yaru</first><last>Wu</last></author>
       <author><first>Adèle</first><last>Jatteau</last></author>
-      <author><first>Martine</first><last>Adda-Decker</last></author>
-      <author><first>Lori</first><last>Lamel</last></author>
+      <author id="martine-adda-decker"><first>Martine</first><last>Adda-Decker</last></author>
+      <author id="lori-lamel"><first>Lori</first><last>Lamel</last></author>
       <pages>13–37</pages>
       <url hash="28df2f42">2020.tal-1.1</url>
       <language>fra</language>
@@ -34,7 +34,7 @@
   <volume id="2" ingest-date="2023-03-16" type="journal">
     <meta>
       <booktitle>Traitement Automatique des Langues, Volume 61, Numéro 2 : TAL et Santé [NLP and Health]</booktitle>
-      <editor><first>Aurélie</first><last>Névéol</last></editor>
+      <editor id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></editor>
       <editor><first>Berry</first><last>de Bruijn</last></editor>
       <editor><first>Corinne</first><last>Fredouille</last></editor>
       <publisher>ATALA (Association pour le Traitement Automatique des Langues)</publisher>
@@ -71,8 +71,8 @@
       <title>A Multi-pass Sieve for Clinical Concept Normalization</title>
       <author><first>Yuxia</first><last>Wang</last></author>
       <author><first>Brian</first><last>Hur</last></author>
-      <author><first>Karin</first><last>Verspoor</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>41–65</pages>
       <url hash="d4bc94dd">2020.tal-2.3</url>
       <bibkey>wang-etal-2020-multi-pass</bibkey>
@@ -114,7 +114,7 @@
     </paper>
     <paper id="2">
       <title>Situated Meaning in Multimodal Dialogue: Human-Robot and Human-Computer Interactions</title>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <author><first>Nikhil</first><last>Krishnaswamy</last></author>
       <pages>17–41</pages>
       <url hash="b4d98628">2020.tal-3.2</url>
@@ -134,7 +134,7 @@
       <author><first>Charlie</first><last>Hallart</last></author>
       <author><first>Juliette</first><last>Maes</last></author>
       <author><first>Nicolas</first><last>Spatola</last></author>
-      <author><first>Laurent</first><last>Prévot</last></author>
+      <author id="laurent-prevot"><first>Laurent</first><last>Prévot</last></author>
       <author><first>Thierry</first><last>Chaminade</last></author>
       <pages>69–93</pages>
       <url hash="9fdd0cb3">2020.tal-3.4</url>
diff --git a/data/xml/2020.textgraphs.xml b/data/xml/2020.textgraphs.xml
index 558bf3a020..ed53c9ec2b 100644
--- a/data/xml/2020.textgraphs.xml
+++ b/data/xml/2020.textgraphs.xml
@@ -8,7 +8,7 @@
       <editor><first>Alexander</first><last>Panchenko</last></editor>
       <editor><first>Fragkiskos D.</first><last>Malliaros</last></editor>
       <editor><first>Ioana</first><last>Hulpuș</last></editor>
-      <editor><first>Peter</first><last>Jansen</last></editor>
+      <editor id="peter-jansen"><first>Peter</first><last>Jansen</last></editor>
       <editor><first>Abhik</first><last>Jana</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Barcelona, Spain (Online)</address>
@@ -95,7 +95,7 @@
       <author><first>Sander</first><last>Bijl de Vroe</last></author>
       <author><first>Mohammad Javad</first><last>Hosseini</last></author>
       <author><first>Mark</first><last>Johnson</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>60–71</pages>
       <abstract>We present a novel method for injecting temporality into entailment graphs to address the problem of spurious entailments, which may arise from similar but temporally distinct events involving the same pair of entities. We focus on the sports domain in which the same pairs of teams play on different occasions, with different outcomes. We present an unsupervised model that aims to learn entailments such as win/lose → play, while avoiding the pitfall of learning non-entailments such as win ̸→ lose. We evaluate our model on a manually constructed dataset, showing that incorporating time intervals and applying a temporal window around them, are effective strategies.</abstract>
       <url hash="20e99db0">2020.textgraphs-1.7</url>
diff --git a/data/xml/2020.tlt.xml b/data/xml/2020.tlt.xml
index ed61f55dae..b4efe981ed 100644
--- a/data/xml/2020.tlt.xml
+++ b/data/xml/2020.tlt.xml
@@ -8,7 +8,7 @@
       <editor><first>Rafael</first><last>Ehren</last></editor>
       <editor><first>Simon</first><last>Petitjean</last></editor>
       <editor><first>Esther</first><last>Seyffarth</last></editor>
-      <editor><first>Djamé</first><last>Seddah</last></editor>
+      <editor id="djame-seddah"><first>Djamé</first><last>Seddah</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Düsseldorf, Germany</address>
       <month>October</month>
@@ -37,7 +37,7 @@
       <author><first>Zuoyu</first><last>Tian</last></author>
       <author><first>Yiwen</first><last>Zhang</last></author>
       <author><first>He</first><last>Zhou</last></author>
-      <author><first>Sandra</first><last>Kuebler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kuebler</last></author>
       <author><first>Chien-Jer Charles</first><last>Lin</last></author>
       <pages>18–30</pages>
       <url hash="6987bf4f">2020.tlt-1.2</url>
@@ -59,8 +59,8 @@
       <title>Fine-Grained Morpho-Syntactic Analysis for the Under-Resourced Language Chaghatay</title>
       <author><first>Kenneth</first><last>Steimel</last></author>
       <author><first>Akbar</first><last>Amat</last></author>
-      <author><first>Arienne</first><last>Dwyer</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="arienne-dwyer"><first>Arienne</first><last>Dwyer</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <pages>43–54</pages>
       <url hash="20ff5ac4">2020.tlt-1.4</url>
       <doi>10.18653/v1/2020.tlt-1.4</doi>
@@ -72,7 +72,7 @@
       <author><first>Tatiana</first><last>Bladier</last></author>
       <author><first>Laura</first><last>Kallmeyer</last></author>
       <author><first>Rainer</first><last>Osswald</last></author>
-      <author><first>Jakub</first><last>Waszczuk</last></author>
+      <author id="jakub-waszczuk"><first>Jakub</first><last>Waszczuk</last></author>
       <pages>55–61</pages>
       <url hash="4b2ab796">2020.tlt-1.5</url>
       <doi>10.18653/v1/2020.tlt-1.5</doi>
@@ -111,7 +111,7 @@
     <paper id="9">
       <title>Estimating <fixed-case>POS</fixed-case> Annotation Consistency of Different Treebanks in a Language</title>
       <author><first>Akshay</first><last>Aggarwal</last></author>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <pages>93–110</pages>
       <url hash="5a046b33">2020.tlt-1.9</url>
       <doi>10.18653/v1/2020.tlt-1.9</doi>
@@ -133,7 +133,7 @@
       <author><first>Mikko</first><last>Luukko</last></author>
       <author><first>Aleksi</first><last>Sahala</last></author>
       <author><first>Sam</first><last>Hardwick</last></author>
-      <author><first>Krister</first><last>Lindén</last></author>
+      <author id="krister-linden"><first>Krister</first><last>Lindén</last></author>
       <pages>124–134</pages>
       <url hash="ab20b238">2020.tlt-1.11</url>
       <doi>10.18653/v1/2020.tlt-1.11</doi>
@@ -143,7 +143,7 @@
     <paper id="12">
       <title>Dependency Relations for <fixed-case>S</fixed-case>anskrit Parsing and Treebank</title>
       <author><first>Amba</first><last>Kulkarni</last></author>
-      <author><first>Pavankumar</first><last>Satuluri</last></author>
+      <author id="pavankumar-satuluri"><first>Pavankumar</first><last>Satuluri</last></author>
       <author><first>Sanjeev</first><last>Panchal</last></author>
       <author><first>Malay</first><last>Maity</last></author>
       <author><first>Amruta</first><last>Malvade</last></author>
@@ -156,7 +156,7 @@
     <paper id="13">
       <title><fixed-case>A</fixed-case>lpino<fixed-case>G</fixed-case>raph: A Graph-based Search Engine for Flexible and Efficient Treebank Search</title>
       <author><first>Peter</first><last>Kleiweg</last></author>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <pages>151–161</pages>
       <url hash="dac74346">2020.tlt-1.13</url>
       <doi>10.18653/v1/2020.tlt-1.13</doi>
@@ -167,7 +167,7 @@
       <title>Implementing an End-to-End Treebank-Informed Pipeline for <fixed-case>B</fixed-case>ulgarian</title>
       <author><first>Alexander</first><last>Popov</last></author>
       <author><first>Petya</first><last>Osenova</last></author>
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <pages>162–167</pages>
       <url hash="2813c906">2020.tlt-1.14</url>
       <doi>10.18653/v1/2020.tlt-1.14</doi>
diff --git a/data/xml/2020.trac.xml b/data/xml/2020.trac.xml
index 6bcdc12b4e..350af3ee43 100644
--- a/data/xml/2020.trac.xml
+++ b/data/xml/2020.trac.xml
@@ -4,10 +4,10 @@
     <meta>
       <booktitle>Proceedings of the Second Workshop on Trolling, Aggression and Cyberbullying</booktitle>
       <editor><first>Ritesh</first><last>Kumar</last></editor>
-      <editor><first>Atul Kr.</first><last>Ojha</last></editor>
+      <editor id="atul-kr-ojha"><first>Atul Kr.</first><last>Ojha</last></editor>
       <editor><first>Bornini</first><last>Lahiri</last></editor>
       <editor><first>Marcos</first><last>Zampieri</last></editor>
-      <editor><first>Shervin</first><last>Malmasi</last></editor>
+      <editor id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></editor>
       <editor><first>Vanessa</first><last>Murdock</last></editor>
       <editor><first>Daniel</first><last>Kadar</last></editor>
       <publisher>European Language Resources Association (ELRA)</publisher>
@@ -50,7 +50,7 @@
       <author><first>Syed Sarfaraz</first><last>Akhtar</last></author>
       <author><first>Naman</first><last>Jain</last></author>
       <author><first>Vinay</first><last>Singh</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>13–20</pages>
       <abstract>The advent of social media has immensely proliferated the amount of opinions and arguments voiced on the internet. These virtual debates often present cases of aggression. While research has been focused largely on analyzing aggression and stance in isolation from each other, this work is the first attempt to gain an extensive and fine-grained understanding of patterns of aggression and figurative language use when voicing opinion. We present a Hindi-English code-mixed dataset of opinion on the politico-social issue of ‘2016 India banknote demonetisation‘ and annotate it across multiple dimensions such as aggression, hate speech, emotion arousal and figurative language usage (such as sarcasm/irony, metaphors/similes, puns/word-play).</abstract>
       <url hash="a4555ee1">2020.trac-1.3</url>
@@ -82,7 +82,7 @@
       <title>Multimodal Meme Dataset (<fixed-case>M</fixed-case>ulti<fixed-case>OFF</fixed-case>) for Identifying Offensive Content in Image and Text</title>
       <author><first>Shardul</first><last>Suryawanshi</last></author>
       <author><first>Bharathi Raja</first><last>Chakravarthi</last></author>
-      <author><first>Mihael</first><last>Arcan</last></author>
+      <author id="mihael-arcan"><first>Mihael</first><last>Arcan</last></author>
       <author><first>Paul</first><last>Buitelaar</last></author>
       <pages>32–41</pages>
       <abstract>A meme is a form of media that spreads an idea or emotion across the internet. As posting meme has become a new form of communication of the web, due to the multimodal nature of memes, postings of hateful memes or related events like trolling, cyberbullying are increasing day by day. Hate speech, offensive content and aggression content detection have been extensively explored in a single modality such as text or image. However, combining two modalities to detect offensive content is still a developing area. Memes make it even more challenging since they express humour and sarcasm in an implicit way, because of which the meme may not be offensive if we only consider the text or the image. Therefore, it is necessary to combine both modalities to identify whether a given meme is offensive or not. Since there was no publicly available dataset for multimodal offensive meme content detection, we leveraged the memes related to the 2016 U.S. presidential election and created the MultiOFF multimodal meme dataset for offensive content detection dataset. We subsequently developed a classifier for this task using the MultiOFF dataset. We use an early fusion technique to combine the image and text modality and compare it with a text- and an image-only baseline to investigate its effectiveness. Our results show improvements in terms of Precision, Recall, and F-Score. The code and dataset for this paper is published in <i>
@@ -99,7 +99,7 @@
       <author><first>Koustava</first><last>Goswami</last></author>
       <author><first>Bharathi Raja</first><last>Chakravarthi</last></author>
       <author><first>Theodorus</first><last>Fransen</last></author>
-      <author><first>John Philip</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John Philip</first><last>McCrae</last></author>
       <pages>42–48</pages>
       <abstract>Hate speech detection in social media communication has become one of the primary concerns to avoid conflicts and curb undesired activities. In an environment where multilingual speakers switch among multiple languages, hate speech detection becomes a challenging task using methods that are designed for monolingual corpora. In our work, we attempt to analyze, detect and provide a comparative study of hate speech in a code-mixed social media text. We also provide a Hindi-English code-mixed data set consisting of Facebook and Twitter posts and comments. Our experiments show that deep learning models trained on this code-mixed corpus perform better.</abstract>
       <url hash="320bed95">2020.trac-1.7</url>
@@ -178,7 +178,7 @@
       <author><first>Anisha</first><last>Datta</last></author>
       <author><first>Shukrity</first><last>Si</last></author>
       <author><first>Urbi</first><last>Chakraborty</last></author>
-      <author><first>Sudip Kumar</first><last>Naskar</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last></author>
       <pages>87–92</pages>
       <abstract>In the last few years, hate speech and aggressive comments have covered almost all the social media platforms like facebook, twitter etc. As a result hatred is increasing. This paper describes our (<b>Team name:</b>
  <b>Spyder</b>) participation in the Shared Task on Aggression Detection organised by TRAC-2, Second Workshop on Trolling, Aggression and Cyberbullying. The Organizers provided datasets in three languages – English, Hindi and Bengali. The task was to classify each instance of the test sets into three categories – “Overtly Aggressive” (OAG), “Covertly Aggressive” (CAG) and “Non-Aggressive” (NAG). In this paper, we propose three different models using Tf-Idf, sentiment polarity and machine learning based classifiers. We obtained f1 score of 43.10%, 59.45% and 44.84% respectively for English, Hindi and Bengali.</abstract>
@@ -258,7 +258,7 @@
       <author><first>María José</first><last>Díaz-Torres</last></author>
       <author><first>Paulina Alejandra</first><last>Morán-Méndez</last></author>
       <author><first>Luis</first><last>Villasenor-Pineda</last></author>
-      <author><first>Manuel</first><last>Montes-y-Gómez</last></author>
+      <author id="manuel-montes"><first>Manuel</first><last>Montes-y-Gómez</last></author>
       <author><first>Juan</first><last>Aguilera</last></author>
       <author><first>Luis</first><last>Meneses-Lerín</last></author>
       <pages>132–136</pages>
diff --git a/data/xml/2020.udw.xml b/data/xml/2020.udw.xml
index 1ed823724c..a36fc6451d 100644
--- a/data/xml/2020.udw.xml
+++ b/data/xml/2020.udw.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2020-11-29" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Fourth Workshop on Universal Dependencies (UDW 2020)</booktitle>
-      <editor><first>Marie-Catherine</first><last>de Marneffe</last></editor>
+      <editor id="marie-catherine-de-marneffe"><first>Marie-Catherine</first><last>de Marneffe</last></editor>
       <editor><first>Miryam</first><last>de Lhoneux</last></editor>
       <editor><first>Joakim</first><last>Nivre</last></editor>
       <editor><first>Sebastian</first><last>Schuster</last></editor>
@@ -41,7 +41,7 @@
       <author><first>Hinrik</first><last>Hafsteinsson</last></author>
       <author><first>Einar Freyr</first><last>Sigurðsson</last></author>
       <author><first>Kristín</first><last>Bjarnadóttir</last></author>
-      <author><first>Anton Karl</first><last>Ingason</last></author>
+      <author id="anton-karl-ingason"><first>Anton Karl</first><last>Ingason</last></author>
       <author><first>Hildur</first><last>Jónsdóttir</last></author>
       <author><first>Steinþór</first><last>Steingrímsson</last></author>
       <pages>16–25</pages>
@@ -77,7 +77,7 @@
     </paper>
     <paper id="7">
       <title>From <fixed-case>LFG</fixed-case> To <fixed-case>UD</fixed-case>: A Combined Approach</title>
-      <author><first>Cheikh M. Bamba</first><last>Dione</last></author>
+      <author id="cheikh-m-bamba-dione"><first>Cheikh M. Bamba</first><last>Dione</last></author>
       <pages>57–66</pages>
       <abstract>This paper reports on a systematic approach for deriving Universal Dependencies from LFG structures. The conversion starts with a step-wise transformation of the c-structure, combining part-of-speech (POS) information and the embedding path to determine the true head of dependency structures. The paper discusses several issues faced by existing algorithms when applied on Wolof and presents the strategies used to account for these issues. An experimental evaluation indicated that our approach was able to generate the correct output in more than 90% of the cases, leading to a substantial improvement in conversion accuracy compared to the previous models.</abstract>
       <url hash="84d19d0f">2020.udw-1.7</url>
@@ -199,7 +199,7 @@
       <title><fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies for <fixed-case>A</fixed-case>lbanian</title>
       <author><first>Marsida</first><last>Toska</last></author>
       <author><first>Joakim</first><last>Nivre</last></author>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <pages>178–188</pages>
       <abstract>In this paper, we introduce the first Universal Dependencies (UD) treebank for standard Albanian, consisting of 60 sentences collected from the Albanian Wikipedia, annotated with lemmas, universal part-of-speech tags, morphological features and syntactic dependencies. In addition to presenting the treebank itself, we discuss a selection of linguistic constructions in Albanian whose analysis in UD is not self-evident, including core arguments and the status of indirect objects, pronominal clitics, genitive constructions, prearticulated adjectives, and modal verbs.</abstract>
       <url hash="9bef0a10">2020.udw-1.20</url>
@@ -219,7 +219,7 @@
     </paper>
     <paper id="22">
       <title>Dependency annotation of noun incorporation in polysynthetic languages</title>
-      <author><first>Francis</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last></author>
       <author><first>Karina</first><last>Mishchenkova</last></author>
       <pages>195–204</pages>
       <abstract>This paper describes an approach to annotating noun incorporation in Universal Dependencies. It motivates the need to annotate this particular morphosyntactic phenomenon and justifies it with respect to frequency of the construction. A case study is presented in which the proposed annotation scheme is applied to Chukchi, a language that exhibits noun incorporation. We compare argument encoding in Chukchi, English and Russian and find that while in English and Russian discourse elements are primarily tracked through noun phrases and pronouns, in Chukchi they are tracked through agreement marking and incorporation, with a lesser role for noun phrases.</abstract>
@@ -232,8 +232,8 @@
       <title><fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependency Treebank for <fixed-case>X</fixed-case>ibe</title>
       <author><first>He</first><last>Zhou</last></author>
       <author><first>Juyeon</first><last>Chung</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
-      <author><first>Francis</first><last>Tyers</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last></author>
       <pages>205–215</pages>
       <abstract>We present our work of constructing the first treebank for the Xibe language following the Universal Dependencies (UD) annotation scheme. Xibe is a low-resourced and severely endangered Tungusic language spoken by the Xibe minority living in the Xinjiang Uygur Autonomous Region of China. We collected 810 sentences so far, including 544 sentences from a grammar book on written Xibe and 266 sentences from Cabcal News. We annotated those sentences manually from scratch. In this paper, we report the procedure of building this treebank and analyze several important annotation issues of our treebank. Finally, we propose our plans for future work.</abstract>
       <url hash="a358086c">2020.udw-1.23</url>
diff --git a/data/xml/2020.vardial.xml b/data/xml/2020.vardial.xml
index e0290ba1d8..0529765f63 100644
--- a/data/xml/2020.vardial.xml
+++ b/data/xml/2020.vardial.xml
@@ -4,9 +4,9 @@
     <meta>
       <booktitle>Proceedings of the 7th Workshop on NLP for Similar Languages, Varieties and Dialects</booktitle>
       <editor><first>Marcos</first><last>Zampieri</last></editor>
-      <editor><first>Preslav</first><last>Nakov</last></editor>
+      <editor id="preslav-nakov"><first>Preslav</first><last>Nakov</last></editor>
       <editor><first>Nikola</first><last>Ljubešić</last></editor>
-      <editor><first>Jörg</first><last>Tiedemann</last></editor>
+      <editor id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></editor>
       <editor><first>Yves</first><last>Scherrer</last></editor>
       <publisher>International Committee on Computational Linguistics (ICCL)</publisher>
       <address>Barcelona, Spain (Online)</address>
@@ -25,7 +25,7 @@
       <author><first>Radu Tudor</first><last>Ionescu</last></author>
       <author><first>Heidi</first><last>Jauhiainen</last></author>
       <author><first>Tommi</first><last>Jauhiainen</last></author>
-      <author><first>Krister</first><last>Lindén</last></author>
+      <author id="krister-linden"><first>Krister</first><last>Lindén</last></author>
       <author><first>Nikola</first><last>Ljubešić</last></author>
       <author><first>Niko</first><last>Partanen</last></author>
       <author><first>Christoph</first><last>Purschke</last></author>
@@ -40,7 +40,7 @@
       <title><fixed-case>ASR</fixed-case> for Non-standardised Languages with Dialectal Variation: the case of <fixed-case>S</fixed-case>wiss <fixed-case>G</fixed-case>erman</title>
       <author><first>Iuliia</first><last>Nigmatulina</last></author>
       <author><first>Tannon</first><last>Kew</last></author>
-      <author><first>Tanja</first><last>Samardzic</last></author>
+      <author id="tanja-samardzic"><first>Tanja</first><last>Samardzic</last></author>
       <pages>15–24</pages>
       <abstract>Strong regional variation, together with the lack of standard orthography, makes Swiss German automatic speech recognition (ASR) particularly difficult in a multi-dialectal setting. This paper focuses on one of the many challenges, namely, the choice of the output text to represent non-standardised Swiss German. We investigate two potential options: a) dialectal writing – approximate phonemic transcriptions that provide close correspondence between grapheme labels and the acoustic signal but are highly inconsistent and b) normalised writing – transcriptions resembling standard German that are relatively consistent but distant from the acoustic signal. To find out which writing facilitates Swiss German ASR, we build several systems using the Kaldi toolkit and a dataset covering 14 regional varieties. A formal comparison shows that the system trained on the normalised transcriptions achieves better results in word error rate (WER) (29.39%) but underperforms at the character level, suggesting dialectal transcriptions offer a viable solution for downstream applications where dialectal differences are important. To better assess word-level performance for dialectal transcriptions, we use a flexible WER measure (FlexWER). When evaluated with this metric, the system trained on dialectal transcriptions outperforms that trained on the normalised writing. Besides establishing a benchmark for Swiss German multi-dialectal ASR, our findings can be helpful in designing ASR systems for other languages without standard orthography.</abstract>
       <url hash="7bab8437">2020.vardial-1.2</url>
@@ -60,7 +60,7 @@
     <paper id="4">
       <title>Machine-oriented <fixed-case>NMT</fixed-case> Adaptation for Zero-shot <fixed-case>NLP</fixed-case> tasks: Comparing the Usefulness of Close and Distant Languages</title>
       <author><first>Amirhossein</first><last>Tebbifakhr</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <pages>36–46</pages>
       <abstract>Neural Machine Translation (NMT) models are typically trained by considering humans as end-users and maximizing human-oriented objectives. However, in some scenarios, their output is consumed by automatic NLP components rather than by humans. In these scenarios, translations’ quality is measured in terms of their “fitness for purpose” (i.e. maximizing performance of external NLP tools) rather than in terms of standard human fluency/adequacy criteria. Recently, reinforcement learning techniques exploiting the feedback from downstream NLP tools have been proposed for “machine-oriented” NMT adaptation. In this work, we tackle the problem in a multilingual setting where a single NMT model translates from multiple languages for downstream automatic processing in the target language. Knowledge sharing across close and distant languages allows to apply our machine-oriented approach in the zero-shot setting where no labeled data for the test language is seen at training time. Moreover, we incorporate multi-lingual BERT in the source side of our NMT system to benefit from the knowledge embedded in this model. Our experiments show coherent performance gains, for different language directions over both i) “generic” NMT models (trained for human consumption), and ii) fine-tuned multilingual BERT. This gain for zero-shot language directions (e.g. Spanish–English) is higher when the models are fine-tuned on a closely-related source language (Italian) than a distant one (German).</abstract>
@@ -70,7 +70,7 @@
     <paper id="5">
       <title>Character Alignment in Morphologically Complex Translation Sets for Related Languages</title>
       <author><first>Michael</first><last>Gasser</last></author>
-      <author><first>Binyam Ephrem</first><last>Seyoum</last></author>
+      <author id="binyam-ephrem-seyoum"><first>Binyam Ephrem</first><last>Seyoum</last></author>
       <author><first>Nazareth Amlesom</first><last>Kifle</last></author>
       <pages>47–56</pages>
       <abstract>For languages with complex morphology, word-to-word translation is a task with various potential applications, for example, in information retrieval, language instruction, and dictionary creation, as well as in machine translation. In this paper, we confine ourselves to the subtask of character alignment for the particular case of families of related languages with very few resources for most or all members. There are many such families; we focus on the subgroup of Semitic languages spoken in Ethiopia and Eritrea. We begin with an adaptation of the familiar alignment algorithms behind statistical machine translation, modifying them as appropriate for our task. We show how character alignment can reveal morphological, phonological, and orthographic correspondences among related languages.</abstract>
@@ -81,10 +81,10 @@
       <title>Bilingual Lexicon Induction across Orthographically-distinct Under-Resourced <fixed-case>D</fixed-case>ravidian Languages</title>
       <author><first>Bharathi Raja</first><last>Chakravarthi</last></author>
       <author><first>Navaneethan</first><last>Rajasekaran</last></author>
-      <author><first>Mihael</first><last>Arcan</last></author>
+      <author id="mihael-arcan"><first>Mihael</first><last>Arcan</last></author>
       <author><first>Kevin</first><last>McGuinness</last></author>
       <author><first>Noel</first><last>E. O’Connor</last></author>
-      <author><first>John P.</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></author>
       <pages>57–69</pages>
       <abstract>Bilingual lexicons are a vital tool for under-resourced languages and recent state-of-the-art approaches to this leverage pretrained monolingual word embeddings using supervised or semi-supervised approaches. However, these approaches require cross-lingual information such as seed dictionaries to train the model and find a linear transformation between the word embedding spaces. Especially in the case of low-resourced languages, seed dictionaries are not readily available, and as such, these methods produce extremely weak results on these languages. In this work, we focus on the Dravidian languages, namely Tamil, Telugu, Kannada, and Malayalam, which are even more challenging as they are written in unique scripts. To take advantage of orthographic information and cognates in these languages, we bring the related languages into a single script. Previous approaches have used linguistically sub-optimal measures such as the Levenshtein edit distance to detect cognates, whereby we demonstrate that the longest common sub-sequence is linguistically more sound and improves the performance of bilingual lexicon induction. We show that our approach can increase the accuracy of bilingual lexicon induction methods on these languages many times, making bilingual lexicon induction approaches feasible for such under-resourced languages.</abstract>
       <url hash="99a051cf">2020.vardial-1.6</url>
@@ -122,7 +122,7 @@
     </paper>
     <paper id="10">
       <title>Neural Machine Translation for translating into <fixed-case>C</fixed-case>roatian and <fixed-case>S</fixed-case>erbian</title>
-      <author><first>Maja</first><last>Popović</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
       <author><first>Alberto</first><last>Poncelas</last></author>
       <author><first>Marija</first><last>Brkic</last></author>
       <author><first>Andy</first><last>Way</last></author>
@@ -144,7 +144,7 @@
       <author><first>Badr M.</first><last>Abdullah</last></author>
       <author><first>Jacek</first><last>Kudera</last></author>
       <author><first>Tania</first><last>Avgustinova</last></author>
-      <author><first>Bernd</first><last>Möbius</last></author>
+      <author id="bernd-mobius"><first>Bernd</first><last>Möbius</last></author>
       <author><first>Dietrich</first><last>Klakow</last></author>
       <pages>128–139</pages>
       <abstract>Deep neural networks have been employed for various spoken language recognition tasks, including tasks that are multilingual by definition such as spoken language identification (LID). In this paper, we present a neural model for Slavic language identification in speech signals and analyze its emergent representations to investigate whether they reflect objective measures of language relatedness or non-linguists’ perception of language similarity. While our analysis shows that the language representation space indeed captures language relatedness to a great extent, we find perceptual confusability to be the best predictor of the language representation similarity.</abstract>
@@ -178,7 +178,7 @@
       <title>Towards Augmenting Lexical Resources for Slang and <fixed-case>A</fixed-case>frican <fixed-case>A</fixed-case>merican <fixed-case>E</fixed-case>nglish</title>
       <author><first>Alyssa</first><last>Hwang</last></author>
       <author><first>William R.</first><last>Frey</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>160–172</pages>
       <abstract>Researchers in natural language processing have developed large, robust resources for understanding formal Standard American English (SAE), but we lack similar resources for variations of English, such as slang and African American English (AAE). In this work, we use word embeddings and clustering algorithms to group semantically similar words in three datasets, two of which contain high incidence of slang and AAE. Since high-quality clusters would contain related words, we could also infer the meaning of an unfamiliar word based on the meanings of words clustered with it. After clustering, we compute precision and recall scores using WordNet and ConceptNet as gold standards and show that these scores are unimportant when the given resources do not fully represent slang and AAE. Amazon Mechanical Turk and expert evaluations show that clusters with low precision can still be considered high quality, and we propose the new Cluster Split Score as a metric for machine-generated clusters. These contributions emphasize the gap in natural language processing research for variations of English and motivate further work to close it.</abstract>
       <url hash="cca9582d">2020.vardial-1.15</url>
@@ -189,7 +189,7 @@
       <author><first>Tommi</first><last>Jauhiainen</last></author>
       <author><first>Heidi</first><last>Jauhiainen</last></author>
       <author><first>Niko</first><last>Partanen</last></author>
-      <author><first>Krister</first><last>Lindén</last></author>
+      <author id="krister-linden"><first>Krister</first><last>Lindén</last></author>
       <pages>173–185</pages>
       <abstract>This article introduces the Wanca 2017 web corpora from which the sentences written in minor Uralic languages were collected for the test set of the Uralic Language Identification (ULI) 2020 shared task. We describe the ULI shared task and how the test set was constructed using the Wanca 2017 corpora and texts in different languages from the Leipzig corpora collection. We also provide the results of a baseline language identification experiment conducted using the ULI 2020 dataset.</abstract>
       <url hash="c77a4cfd">2020.vardial-1.16</url>
@@ -234,7 +234,7 @@
       <title>Experiments in Language Variety Geolocation and Dialect Identification</title>
       <author><first>Tommi</first><last>Jauhiainen</last></author>
       <author><first>Heidi</first><last>Jauhiainen</last></author>
-      <author><first>Krister</first><last>Lindén</last></author>
+      <author id="krister-linden"><first>Krister</first><last>Lindén</last></author>
       <pages>220–231</pages>
       <abstract>In this paper we describe the systems we used when participating in the VarDial Evaluation Campaign organized as part of the 7th workshop on NLP for similar languages, varieties and dialects. The shared tasks we participated in were the second edition of the Romanian Dialect Identification (RDI) and the first edition of the Social Media Variety Geolocation (SMG). The submissions of our SUKI team used generative language models based on Naive Bayes and character n-grams.</abstract>
       <url hash="3c7daedc">2020.vardial-1.21</url>
@@ -263,7 +263,7 @@
     <paper id="24">
       <title><fixed-case>ZHAW</fixed-case>-<fixed-case>I</fixed-case>n<fixed-case>IT</fixed-case> - Social Media Geolocation at <fixed-case>V</fixed-case>ar<fixed-case>D</fixed-case>ial 2020</title>
       <author><first>Fernando</first><last>Benites</last></author>
-      <author><first>Manuela</first><last>Hürlimann</last></author>
+      <author id="manuela-huerlimann"><first>Manuela</first><last>Hürlimann</last></author>
       <author><first>Pius</first><last>von Däniken</last></author>
       <author><first>Mark</first><last>Cieliebak</last></author>
       <pages>254–264</pages>
@@ -283,7 +283,7 @@
     <paper id="26">
       <title>Challenges in Neural Language Identification: <fixed-case>NRC</fixed-case> at <fixed-case>V</fixed-case>ar<fixed-case>D</fixed-case>ial 2020</title>
       <author><first>Gabriel</first><last>Bernier-Colborne</last></author>
-      <author><first>Cyril</first><last>Goutte</last></author>
+      <author id="cyril-goutte"><first>Cyril</first><last>Goutte</last></author>
       <pages>273–282</pages>
       <abstract>We describe the systems developed by the National Research Council Canada for the Uralic language identification shared task at the 2020 VarDial evaluation campaign. Although our official results were well below the baseline, we show in this paper that this was not due to the neural approach to language identification in general, but to a flaw in the function we used to sample data for training and evaluation purposes. Preliminary experiments conducted after the evaluation period suggest that our neural approach to language identification can achieve state-of-the-art results on this task, although further experimentation is required.</abstract>
       <url hash="5f40a823">2020.vardial-1.26</url>
diff --git a/data/xml/2020.vlsp.xml b/data/xml/2020.vlsp.xml
index a85bdc2ab4..4219fd7f8f 100644
--- a/data/xml/2020.vlsp.xml
+++ b/data/xml/2020.vlsp.xml
@@ -42,7 +42,7 @@
     </paper>
     <paper id="3">
       <title>An Empirical Study of Using Pre-trained <fixed-case>BERT</fixed-case> Models for <fixed-case>V</fixed-case>ietnamese Relation Extraction Task at <fixed-case>VLSP</fixed-case> 2020</title>
-      <author><first>Minh Quang Nhat</first><last>Pham</last></author>
+      <author id="minh-quang-nhat-pham"><first>Minh Quang Nhat</first><last>Pham</last></author>
       <pages>13–18</pages>
       <url hash="b189b13e">2020.vlsp-1.3</url>
       <bibkey>pham-2020-empirical</bibkey>
@@ -175,9 +175,9 @@
     <paper id="17">
       <title>Overview of <fixed-case>VLSP</fixed-case> <fixed-case>R</fixed-case>el<fixed-case>E</fixed-case>x shared task: A Data Challenge for Semantic Relation Extraction from <fixed-case>V</fixed-case>ietnamese News</title>
       <author><first>Vu Tran</first><last>Mai</last></author>
-      <author><first>Hoang-Quynh</first><last>Le</last></author>
+      <author id="hoang-quynh-le"><first>Hoang-Quynh</first><last>Le</last></author>
       <author><first>Duy-Cat</first><last>Can</last></author>
-      <author><first>Thi Minh Huyen</first><last>Nguyen</last></author>
+      <author id="thi-minh-huyen-nguyen"><first>Thi Minh Huyen</first><last>Nguyen</last></author>
       <author><first>Tran Ngoc Linh</first><last>Nguyen</last></author>
       <author><first>Thanh Tam</first><last>Doan</last></author>
       <pages>92–98</pages>
diff --git a/data/xml/2020.wac.xml b/data/xml/2020.wac.xml
index 354ab6c99e..7faf04e724 100644
--- a/data/xml/2020.wac.xml
+++ b/data/xml/2020.wac.xml
@@ -6,7 +6,7 @@
       <editor><first>Adrien</first><last>Barbaresi</last></editor>
       <editor><first>Felix</first><last>Bildhauer</last></editor>
       <editor><first>Roland</first><last>Schäfer</last></editor>
-      <editor><first>Egon</first><last>Stemle</last></editor>
+      <editor id="egon-stemle"><first>Egon</first><last>Stemle</last></editor>
       <publisher>European Language Resources Association</publisher>
       <address>Marseille, France</address>
       <month>May</month>
@@ -21,9 +21,9 @@
     <paper id="1">
       <title>Current Challenges in Web Corpus Building</title>
       <author><first>Miloš</first><last>Jakubíček</last></author>
-      <author><first>Vojtěch</first><last>Kovář</last></author>
-      <author><first>Pavel</first><last>Rychlý</last></author>
-      <author><first>Vit</first><last>Suchomel</last></author>
+      <author id="vojtech-kovar"><first>Vojtěch</first><last>Kovář</last></author>
+      <author id="pavel-rychly"><first>Pavel</first><last>Rychlý</last></author>
+      <author id="vit-suchomel"><first>Vit</first><last>Suchomel</last></author>
       <pages>1–4</pages>
       <abstract>In this paper we discuss some of the current challenges in web corpus building that we faced in the recent years when expanding the corpora in Sketch Engine. The purpose of the paper is to provide an overview and raise discussion on possible solutions, rather than bringing ready solutions to the readers. For every issue we try to assess its severity and briefly discuss possible mitigation options.</abstract>
       <url hash="41af0837">2020.wac-1.1</url>
@@ -60,7 +60,7 @@
       <title>Building Web Corpora for Minority Languages</title>
       <author><first>Heidi</first><last>Jauhiainen</last></author>
       <author><first>Tommi</first><last>Jauhiainen</last></author>
-      <author><first>Krister</first><last>Lindén</last></author>
+      <author id="krister-linden"><first>Krister</first><last>Lindén</last></author>
       <pages>23–32</pages>
       <abstract>Web corpora creation for minority languages that do not have their own top-level Internet domain is no trivial matter. Web pages in such minority languages often contain text and links to pages in the dominant language of the country. When building corpora in specific languages, one has to decide how and at which stage to make sure the texts gathered are in the desired language. In the “Finno-Ugric Languages and the Internet” (Suki) project, we created web corpora for Uralic minority languages using web crawling combined with a language identification system in order to identify the language while crawling. In addition, we used language set identification and crowdsourcing before making sentence corpora out of the downloaded texts. In this article, we describe a strategy for collecting textual material from the Internet for minority languages. The strategy is based on the experiences we gained during the Suki project.</abstract>
       <url hash="1cb67619">2020.wac-1.4</url>
@@ -104,7 +104,7 @@
     <paper id="8">
       <title>Streaming Language-Specific <fixed-case>T</fixed-case>witter Data with Optimal Keywords</title>
       <author><first>Tim</first><last>Kreutz</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>57–64</pages>
       <abstract>The Twitter Streaming API has been used to create language-specific corpora with varying degrees of success. Selecting a filter of frequent yet distinct keywords for German resulted in a near-complete collection of German tweets. This method is promising as it keeps within Twitter endpoint limitations and could be applied to other languages besides German. But so far no research has compared methods for selecting optimal keywords for this task. This paper proposes a method for finding optimal key phrases based on a greedy solution to the maximum coverage problem. We generate candidate key phrases for the 50 most frequent languages on Twitter. Candidates are then iteratively selected based on a variety of scoring functions applied to their coverage of target tweets. Selecting candidates based on the scoring function that exponentiates the precision of a key phrase and weighs it by recall achieved the best results overall. Some target languages yield lower results than what could be expected from their prevalence on Twitter. Upon analyzing the errors, we find that these are languages that are very close to more prevalent languages. In these cases, key phrases that limit finding the competitive language are selected, and overall recall on the target language also decreases. We publish the resulting optimized lists for each language as a resource. The code to generate lists for other research objectives is also supplied.</abstract>
       <url hash="261c1309">2020.wac-1.8</url>
diff --git a/data/xml/2020.wanlp.xml b/data/xml/2020.wanlp.xml
index 5bbce999d1..82b411eeaf 100644
--- a/data/xml/2020.wanlp.xml
+++ b/data/xml/2020.wanlp.xml
@@ -26,8 +26,8 @@
       <author><first>Mohammed</first><last>Mediani</last></author>
       <author><first>Moritz</first><last>Behr</last></author>
       <author><first>M. Amin</first><last>Cheragui</last></author>
-      <author><first>Sebastian</first><last>Stüker</last></author>
-      <author><first>Alexander</first><last>Waibel</last></author>
+      <author id="sebastian-stuker"><first>Sebastian</first><last>Stüker</last></author>
+      <author id="alex-waibel"><first>Alexander</first><last>Waibel</last></author>
       <pages>1–11</pages>
       <abstract>In this paper we present the natural language processing components of our German-Arabic speech-to-speech translation system which is being deployed in the context of interpretation during psychiatric, diagnostic interviews. For this purpose we have built a pipe-lined speech-to-speech translation system consisting of automatic speech recognition, text post-processing/segmentation, machine translation and speech synthesis systems. We have implemented two pipe-lines, from German to Arabic and Arabic to German, in order to be able to conduct interpreted two-way dialogues between psychiatrists and potential patients. All systems in our pipeline have been realized as all-neural end-to-end systems, using different architectures suitable for the different components. The speech recognition systems use an encoder/decoder + attention architecture, the text segmentation component and the machine translation system are based on the Transformer architecture, and for the speech synthesis systems we use Tacotron 2 for generating spectrograms and WaveGlow as vocoder. The speech translation is deployed in a server-based speech translation application that implements a turn based translation between a German speaking psychiatrist administrating the Mini-International Neuropsychiatric Interview (M.I.N.I.) and an Arabic speaking person answering the interview. As this is a very specific domain, in addition to the linguistic challenges posed by translating between Arabic and German, we also focus in this paper on the methods we implemented for adapting our speech translation system to the domain of this psychiatric interview.</abstract>
       <url hash="c276811b">2020.wanlp-1.1</url>
@@ -45,7 +45,7 @@
     <paper id="3">
       <title>Is it Great or Terrible? Preserving Sentiment in Neural Machine Translation of <fixed-case>A</fixed-case>rabic Reviews</title>
       <author><first>Hadeel</first><last>Saadany</last></author>
-      <author><first>Constantin</first><last>Orasan</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orasan</last></author>
       <pages>24–37</pages>
       <abstract>Since the advent of Neural Machine Translation (NMT) approaches there has been a tremendous improvement in the quality of automatic translation. However, NMT output still lacks accuracy in some low-resource languages and sometimes makes major errors that need extensive postediting. This is particularly noticeable with texts that do not follow common lexico-grammatical standards, such as user generated content (UGC). In this paper we investigate the challenges involved in translating book reviews from Arabic into English, with particular focus on the errors that lead to incorrect translation of sentiment polarity. Our study points to the special characteristics of Arabic UGC, examines the sentiment transfer errors made by Google Translate of Arabic UGC to English, analyzes why the problem occurs, and proposes an error typology specific of the translation of Arabic UGC. Our analysis shows that the output of online translation tools of Arabic UGC can either fail to transfer the sentiment at all by producing a neutral target text, or completely flips the sentiment polarity of the target word or phrase and hence delivers a wrong affect message. We address this problem by fine-tuning an NMT model with respect to sentiment polarity showing that this approach can significantly help with correcting sentiment errors detected in the online translation of Arabic UGC.</abstract>
       <url hash="fdbc990f">2020.wanlp-1.3</url>
@@ -54,7 +54,7 @@
     <paper id="4">
       <title>Deep Diacritization: Efficient Hierarchical Recurrence for Improved <fixed-case>A</fixed-case>rabic Diacritization</title>
       <author><first>Badr</first><last>AlKhamissi</last></author>
-      <author><first>Muhammad</first><last>ElNokrashy</last></author>
+      <author id="muhammad-elnokrashy"><first>Muhammad</first><last>ElNokrashy</last></author>
       <author><first>Mohamed</first><last>Gabr</last></author>
       <pages>38–48</pages>
       <abstract>We propose a novel architecture for labelling character sequences that achieves state-of-the-art results on the Tashkeela Arabic diacritization benchmark. The core is a two-level recurrence hierarchy that operates on the word and character levels separately—enabling faster training and inference than comparable traditional models. A cross-level attention module further connects the two and opens the door for network interpretability. The task module is a softmax classifier that enumerates valid combinations of diacritics. This architecture can be extended with a recurrent decoder that optionally accepts priors from partially diacritized text, which improves results. We employ extra tricks such as sentence dropout and majority voting to further boost the final result. Our best model achieves a WER of 5.34%, outperforming the previous state-of-the-art with a 30.56% relative error reduction.</abstract>
@@ -141,7 +141,7 @@
     <paper id="12">
       <title>Tracing Traditions: Automatic Extraction of Isnads from Classical <fixed-case>A</fixed-case>rabic Texts</title>
       <author><first>Ryan</first><last>Muther</last></author>
-      <author><first>David</first><last>Smith</last></author>
+      <author id="david-a-smith"><first>David</first><last>Smith</last></author>
       <pages>130–138</pages>
       <abstract>We present our work on automatically detecting isnads, the chains of authorities for a re-port that serve as citations in hadith and other classical Arabic texts. We experiment with both sequence labeling methods for identifying isnads in a single pass and a hybrid “retrieve-and-tag” approach, in which a retrieval model first identifies portions of the text that are likely to contain start points for isnads, then a sequence labeling model identifies the exact starting locations within these much smaller retrieved text chunks. We find that the usefulness of full-document sequence to sequence models is limited due to memory limitations and the ineffectiveness of such models at modeling very long documents. We conclude by sketching future improvements on the tagging task and more in-depth analysis of the people and relationships involved in the social network that influenced the evolution of the written tradition over time.</abstract>
       <url hash="06d6fd7d">2020.wanlp-1.12</url>
@@ -181,7 +181,7 @@
       <title>Multi-Task Sequence Prediction For <fixed-case>T</fixed-case>unisian <fixed-case>A</fixed-case>rabizi Multi-Level Annotation</title>
       <author><first>Elisa</first><last>Gugliotta</last></author>
       <author><first>Marco</first><last>Dinarelli</last></author>
-      <author><first>Olivier</first><last>Kraif</last></author>
+      <author id="olivier-kraif"><first>Olivier</first><last>Kraif</last></author>
       <pages>178–191</pages>
       <abstract>In this paper we propose a multi-task sequence prediction system, based on recurrent neural networks and used to annotate on multiple levels an Arabizi Tunisian corpus. The annotation performed are text classification, tokenization, PoS tagging and encoding of Tunisian Arabizi into CODA* Arabic orthography. The system is learned to predict all the annotation levels in cascade, starting from Arabizi input. We evaluate the system on the TIGER German corpus, suitably converting data to have a multi-task problem, in order to show the effectiveness of our neural architecture. We show also how we used the system in order to annotate a Tunisian Arabizi corpus, which has been afterwards manually corrected and used to further evaluate sequence models on Tunisian data. Our system is developed for the Fairseq framework, which allows for a fast and easy use for any other sequence prediction problem.</abstract>
       <url hash="2470379e">2020.wanlp-1.16</url>
@@ -191,7 +191,7 @@
       <title><fixed-case>A</fixed-case>ra<fixed-case>WEAT</fixed-case>: Multidimensional Analysis of Biases in <fixed-case>A</fixed-case>rabic Word Embeddings</title>
       <author><first>Anne</first><last>Lauscher</last></author>
       <author><first>Rafik</first><last>Takieddin</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <author><first>Goran</first><last>Glavaš</last></author>
       <pages>192–199</pages>
       <abstract>Recent work has shown that distributional word vector spaces often encode human biases like sexism or racism. In this work, we conduct an extensive analysis of biases in Arabic word embeddings by applying a range of recently introduced bias tests on a variety of embedding spaces induced from corpora in Arabic. We measure the presence of biases across several dimensions, namely: embedding models (Skip-Gram, CBOW, and FastText) and vector sizes, types of text (encyclopedic text, and news vs. user-generated content), dialects (Egyptian Arabic vs. Modern Standard Arabic), and time (diachronic analyses over corpora from different time periods). Our analysis yields several interesting findings, e.g., that implicit gender bias in embeddings trained on Arabic news corpora steadily increases over time (between 2007 and 2017). We make the Arabic bias specifications (AraWEAT) publicly available.</abstract>
@@ -202,7 +202,7 @@
       <title>Parallel resources for <fixed-case>T</fixed-case>unisian <fixed-case>A</fixed-case>rabic Dialect Translation</title>
       <author><first>Saméh</first><last>Kchaou</last></author>
       <author><first>Rahma</first><last>Boujelbane</last></author>
-      <author><first>Lamia</first><last>Hadrich-Belguith</last></author>
+      <author id="lamia-hadrich-belguith"><first>Lamia</first><last>Hadrich-Belguith</last></author>
       <pages>200–206</pages>
       <abstract>The difficulty of processing dialects is clearly observed in the high cost of building representative corpus, in particular for machine translation. Indeed, all machine translation systems require a huge amount and good management of training data, which represents a challenge in a low-resource setting such as the Tunisian Arabic dialect. In this paper, we present a data augmentation technique to create a parallel corpus for Tunisian Arabic dialect written in social media and standard Arabic in order to build a Machine Translation (MT) model. The created corpus was used to build a sentence-based translation model. This model reached a BLEU score of 15.03% on a test set, while it was limited to 13.27% utilizing the corpus without augmentation.</abstract>
       <url hash="69de3825">2020.wanlp-1.18</url>
@@ -332,7 +332,7 @@
     <paper id="31">
       <title>The <fixed-case>QMUL</fixed-case>/<fixed-case>HRBDT</fixed-case> contribution to the <fixed-case>NADI</fixed-case> <fixed-case>A</fixed-case>rabic Dialect Identification Shared Task</title>
       <author><first>Abdulrahman</first><last>Aloraini</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <author><first>Ayman</first><last>Alhelbawy</last></author>
       <pages>295–301</pages>
       <abstract>We present the Arabic dialect identification system that we used for the country-level subtask of the NADI challenge. Our model consists of three components: BiLSTM-CNN, character-level TF-IDF, and topic modeling features. We represent each tweet using these features and feed them into a deep neural network. We then add an effective heuristic that improves the overall performance. We achieved an F1-Macro score of 20.77% and an accuracy of 34.32% on the test set. The model was also evaluated on the Arabic Online Commentary dataset, achieving results better than the state-of-the-art.</abstract>
diff --git a/data/xml/2020.wat.xml b/data/xml/2020.wat.xml
index e724cd717f..82154b57e2 100644
--- a/data/xml/2020.wat.xml
+++ b/data/xml/2020.wat.xml
@@ -9,14 +9,14 @@
       <editor><first>Raj</first><last>Dabre</last></editor>
       <editor><first>Anoop</first><last>Kunchukuttan</last></editor>
       <editor><first>Win Pa</first><last>Pa</last></editor>
-      <editor><first>Ondřej</first><last>Bojar</last></editor>
+      <editor id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></editor>
       <editor><first>Shantipriya</first><last>Parida</last></editor>
       <editor><first>Isao</first><last>Goto</last></editor>
       <editor><first>Hidaya</first><last>Mino</last></editor>
       <editor><first>Hiroshi</first><last>Manabe</last></editor>
       <editor><first>Katsuhito</first><last>Sudoh</last></editor>
       <editor><first>Sadao</first><last>Kurohashi</last></editor>
-      <editor><first>Pushpak</first><last>Bhattacharyya</last></editor>
+      <editor id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Suzhou, China</address>
       <month>December</month>
@@ -62,7 +62,7 @@
     <paper id="3">
       <title>Transformer-based Double-token Bidirectional Autoregressive Decoding in Neural Machine Translation</title>
       <author><first>Kenji</first><last>Imamura</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>50–57</pages>
       <abstract>This paper presents a simple method that extends a standard Transformer-based autoregressive decoder, to speed up decoding. The proposed method generates a token from the head and tail of a sentence (two tokens in total) in each step. By simultaneously generating multiple tokens that rarely depend on each other, the decoding speed is increased while the degradation in translation quality is minimized. In our experiments, the proposed method increased the translation speed by around 113%-155% in comparison with a standard autoregressive decoder, while degrading the BLEU scores by no more than 1.03. It was faster than an iterative non-autoregressive decoder in many conditions.</abstract>
       <url hash="488b23ba">2020.wat-1.3</url>
@@ -173,7 +173,7 @@
       <author><first>Sahinur Rahman</first><last>Laskar</last></author>
       <author><first>Abdullah Faiz Ur Rahman</first><last>Khilji</last></author>
       <author><first>Partha</first><last>Pakray</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>109–113</pages>
       <abstract>Machine translation (MT) focuses on the automatic translation of text from one natural language to another natural language. Neural machine translation (NMT) achieves state-of-the-art results in the task of machine translation because of utilizing advanced deep learning techniques and handles issues like long-term dependency, and context-analysis. Nevertheless, NMT still suffers low translation quality for low resource languages. To encounter this challenge, the multi-modal concept comes in. The multi-modal concept combines textual and visual features to improve the translation quality of low resource languages. Moreover, the utilization of monolingual data in the pre-training step can improve the performance of the system for low resource language translations. Workshop on Asian Translation 2020 (WAT2020) organized a translation task for multimodal translation in English to Hindi. We have participated in the same in two-track submission, namely text-only and multi-modal translation with team name CNLP-NITS. The evaluated results are declared at the WAT2020 translation task, which reports that our multi-modal NMT system attained higher scores than our text-only NMT on both challenge and evaluation test set. For the challenge test data, our multi-modal neural machine translation system achieves Bilingual Evaluation Understudy (BLEU) score of 33.57, Rank-based Intuitive Bilingual Evaluation Score (RIBES) 0.754141, Adequacy-Fluency Metrics (AMFM) score 0.787320 and for evaluation test data, BLEU, RIBES, and, AMFM score of 40.51, 0.803208, and 0.820980 for English to Hindi translation respectively.</abstract>
       <url hash="aa3f66e3">2020.wat-1.11</url>
@@ -270,7 +270,7 @@
     </paper>
     <paper id="20">
       <title>A Parallel Evaluation Data Set of Software Documentation with Document Structure Annotation</title>
-      <author><first>Bianka</first><last>Buschbeck</last></author>
+      <author id="bianka-buschbeck"><first>Bianka</first><last>Buschbeck</last></author>
       <author><first>Miriam</first><last>Exel</last></author>
       <pages>160–169</pages>
       <abstract>This paper accompanies the software documentation data set for machine translation, a parallel evaluation data set of data originating from the SAP Help Portal, that we released to the machine translation community for research purposes. It offers the possibility to tune and evaluate machine translation systems in the domain of corporate software documentation and contributes to the availability of a wider range of evaluation scenarios. The data set comprises of the language pairs English to Hindi, Indonesian, Malay and Thai, and thus also increases the test coverage for the many low-resource language pairs. Unlike most evaluation data sets that consist of plain parallel text, the segments in this data set come with additional metadata that describes structural information of the document context. We provide insights into the origin and creation, the particularities and characteristics of the data set as well as machine translation results.</abstract>
diff --git a/data/xml/2020.webnlg.xml b/data/xml/2020.webnlg.xml
index 6c4868d1f8..4d63e4159b 100644
--- a/data/xml/2020.webnlg.xml
+++ b/data/xml/2020.webnlg.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2021-01-18" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 3rd International Workshop on Natural Language Generation from the Semantic Web (WebNLG+)</booktitle>
-      <editor><first>Thiago</first><last>Castro Ferreira</last></editor>
+      <editor id="thiago-castro-ferreira"><first>Thiago</first><last>Castro Ferreira</last></editor>
       <editor><first>Claire</first><last>Gardent</last></editor>
       <editor><first>Nikolai</first><last>Ilinykh</last></editor>
       <editor><first>Chris</first><last>van der Lee</last></editor>
@@ -83,7 +83,7 @@
       <author><first>David</first><last>Bergés</last></author>
       <author><first>Roser</first><last>Cantenys</last></author>
       <author><first>Roger</first><last>Creus</last></author>
-      <author><first>José A. R.</first><last>Fonollosa</last></author>
+      <author id="jose-a-r-fonollosa"><first>José A. R.</first><last>Fonollosa</last></author>
       <pages>40–47</pages>
       <abstract>establishes key guidelines on how, which and when Machine Translation (MT) techniques are worth applying to RDF-to-Text task. Not only do we apply and compare the most prominent MT architecture, the Transformer, but we also analyze state-of-the-art techniques such as Byte Pair Encoding or Back Translation to demonstrate an improvement in generalization. In addition, we empirically show how to tailor these techniques to enhance models relying on learned embeddings rather than using pretrained ones. Automatic metrics suggest that Back Translation can significantly improve model performance up to 7 BLEU points, hence, opening a window for surpassing state-of-the-art results with appropriate architectures.</abstract>
       <url hash="03ebb663">2020.webnlg-1.5</url>
@@ -92,7 +92,7 @@
     <paper id="6">
       <title>Utilising Knowledge Graph Embeddings for Data-to-Text Generation</title>
       <author><first>Nivranshu</first><last>Pasricha</last></author>
-      <author><first>Mihael</first><last>Arcan</last></author>
+      <author id="mihael-arcan"><first>Mihael</first><last>Arcan</last></author>
       <author><first>Paul</first><last>Buitelaar</last></author>
       <pages>48–53</pages>
       <abstract>Data-to-text generation has recently seen a move away from modular and pipeline architectures towards end-to-end architectures based on neural networks. In this work, we employ knowledge graph embeddings and explore their utility for end-to-end approaches in a data-to-text generation task. Our experiments show that using knowledge graph embeddings can yield an improvement of up to 2 – 3 BLEU points for seen categories on the WebNLG corpus without modifying the underlying neural network architecture.</abstract>
@@ -118,7 +118,7 @@
       <author><first>Qipeng</first><last>Guo</last></author>
       <author><first>Zhijing</first><last>Jin</last></author>
       <author><first>Xipeng</first><last>Qiu</last></author>
-      <author><first>Weinan</first><last>Zhang</last></author>
+      <author id="weinan-zhang"><first>Weinan</first><last>Zhang</last></author>
       <author><first>David</first><last>Wipf</last></author>
       <author><first>Zheng</first><last>Zhang</last></author>
       <pages>77–88</pages>
@@ -132,7 +132,7 @@
       <author><first>Betty</first><last>Fabre</last></author>
       <author><first>Tanguy</first><last>Urvoy</last></author>
       <author><first>Johannes</first><last>Heinecke</last></author>
-      <author><first>Lina</first><last>Rojas-Barahona</last></author>
+      <author id="lina-m-rojas-barahona"><first>Lina</first><last>Rojas-Barahona</last></author>
       <pages>89–99</pages>
       <abstract>The task of verbalization of RDF triples has known a growth in popularity due to the rising ubiquity of Knowledge Bases (KBs). The formalism of RDF triples is a simple and efficient way to store facts at a large scale. However, its abstract representation makes it difficult for humans to interpret. For this purpose, the WebNLG challenge aims at promoting automated RDF-to-text generation. We propose to leverage pre-trainings from augmented data with the Transformer model using a data augmentation strategy. Our experiment results show a minimum relative increases of 3.73%, 126.05% and 88.16% in BLEU score for seen categories, unseen entities and unseen categories respectively over the standard training.</abstract>
       <url hash="5b59d320">2020.webnlg-1.9</url>
@@ -171,7 +171,7 @@
       <author><first>Xintong</first><last>Li</last></author>
       <author><first>Aleksandre</first><last>Maskharashvili</last></author>
       <author><first>Symon</first><last>Jory Stevens-Guille</last></author>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <pages>117–124</pages>
       <abstract>In this paper, we report experiments on finetuning large pretrained models to realize resource description framework (RDF) triples to natural language. We provide the details of how to build one of the top-ranked English generation models in WebNLG Challenge 2020. We also show that there appears to be considerable potential for reranking to improve the current state of the art both in terms of statistical metrics and model-based metrics. Our human analyses of the generated texts show that for Russian, pretrained models showed some success, both in terms of lexical and morpho-syntactic choices for generation, as well as for content aggregation. Nevertheless, in a number of cases, the model can be unpredictable, both in terms of failure or success. Omissions of the content and hallucinations, which in many cases occurred at the same time, were major problems. By contrast, the models for English showed near perfect performance on the validation set.</abstract>
       <url hash="45676bd5">2020.webnlg-1.12</url>
@@ -191,7 +191,7 @@
     </paper>
     <paper id="14">
       <title><fixed-case>NILC</fixed-case> at <fixed-case>W</fixed-case>eb<fixed-case>NLG</fixed-case>+: Pretrained Sequence-to-Sequence Models on <fixed-case>RDF</fixed-case>-to-Text Generation</title>
-      <author><first>Marco Antonio</first><last>Sobrevilla Cabezudo</last></author>
+      <author id="marco-antonio-sobrevilla-cabezudo"><first>Marco Antonio</first><last>Sobrevilla Cabezudo</last></author>
       <author><first>Thiago A. S.</first><last>Pardo</last></author>
       <pages>131–136</pages>
       <abstract>This paper describes the submission by the NILC Computational Linguistics research group of the University of São Paulo/Brazil to the RDF-to-Text task for English at the WebNLG+ challenge. The success of the current pretrained models like BERT or GPT-2 in text-to-text generation tasks is well-known, however, its application/success on data-totext generation has not been well-studied and proven. This way, we explore how good a pretrained model, in particular BART, performs on the data-to-text generation task. The results obtained were worse than the baseline and other systems in almost all automatic measures. However, the human evaluation shows better results for our system. Besides, results suggest that BART may generate paraphrases of reference texts.</abstract>
@@ -201,7 +201,7 @@
     <paper id="15">
       <title><fixed-case>NUIG</fixed-case>-<fixed-case>DSI</fixed-case> at the <fixed-case>W</fixed-case>eb<fixed-case>NLG</fixed-case>+ challenge: Leveraging Transfer Learning for <fixed-case>RDF</fixed-case>-to-text generation</title>
       <author><first>Nivranshu</first><last>Pasricha</last></author>
-      <author><first>Mihael</first><last>Arcan</last></author>
+      <author id="mihael-arcan"><first>Mihael</first><last>Arcan</last></author>
       <author><first>Paul</first><last>Buitelaar</last></author>
       <pages>137–143</pages>
       <abstract>This paper describes the system submitted by NUIG-DSI to the WebNLG+ challenge 2020 in the RDF-to-text generation task for the English language. For this challenge, we leverage transfer learning by adopting the T5 model architecture for our submission and fine-tune the model on the WebNLG+ corpus. Our submission ranks among the top five systems for most of the automatic evaluation metrics achieving a BLEU score of 51.74 over all categories with scores of 58.23 and 45.57 across seen and unseen categories respectively.</abstract>
@@ -239,7 +239,7 @@
       <author><first>Roser</first><last>Cantenys</last></author>
       <author><first>Roger</first><last>Creus</last></author>
       <author><first>Oriol</first><last>Domingo</last></author>
-      <author><first>José A. R.</first><last>Fonollosa</last></author>
+      <author id="jose-a-r-fonollosa"><first>José A. R.</first><last>Fonollosa</last></author>
       <pages>167–170</pages>
       <abstract>This work describes the end-to-end system architecture presented at WebNLG Challenge 2020. The system follows the traditional Machine Translation (MT) pipeline, based on the Transformer model, applied in most text-totext problems. Our solution is enriched by means of a Back Translation step over the original corpus. Thus, the system directly relies on lexicalise format since the synthetic data limits the use of delexicalisation.</abstract>
       <url hash="03f4cac9">2020.webnlg-1.19</url>
diff --git a/data/xml/2020.wildre.xml b/data/xml/2020.wildre.xml
index 5e02027ab0..5b29911d38 100644
--- a/data/xml/2020.wildre.xml
+++ b/data/xml/2020.wildre.xml
@@ -3,11 +3,11 @@
   <volume id="1" type="proceedings">
     <meta>
       <booktitle>Proceedings of the WILDRE5– 5th Workshop on Indian Language Data: Resources and Evaluation</booktitle>
-      <editor><first>Girish Nath</first><last>Jha</last></editor>
+      <editor id="girish-nath-jha"><first>Girish Nath</first><last>Jha</last></editor>
       <editor><first>Kalika</first><last>Bali</last></editor>
       <editor><first>Sobha</first><last>L.</last></editor>
       <editor><first>S. S.</first><last>Agrawal</last></editor>
-      <editor><first>Atul Kr.</first><last>Ojha</last></editor>
+      <editor id="atul-kr-ojha"><first>Atul Kr.</first><last>Ojha</last></editor>
       <publisher>European Language Resources Association (ELRA)</publisher>
       <address>Marseille, France</address>
       <month>May</month>
@@ -25,8 +25,8 @@
       <author><first>Nilesh</first><last>Joshi</last></author>
       <author><first>Geetanjali</first><last>Rane</last></author>
       <author><first>Hanumant</first><last>Redkar</last></author>
-      <author><first>Malhar</first><last>Kulkarni</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="malhar-kulkarni"><first>Malhar</first><last>Kulkarni</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>1–6</pages>
       <abstract>Part of Speech (POS) annotation is a significant challenge in natural language processing. The paper discusses issues and challenges faced in the process of POS annotation of the Marathi data from four domains viz., tourism, health, entertainment and agriculture. During POS annotation, a lot of issues were encountered. Some of the major ones are discussed in detail in this paper. Also, the two approaches viz., the lexical (L approach) and the functional (F approach) of POS tagging have been discussed and presented with examples. Further, some ambiguous cases in POS annotation are presented in the paper.</abstract>
       <url hash="7ced6113">2020.wildre-1.1</url>
@@ -38,8 +38,8 @@
       <author><first>Shardul</first><last>Suryawanshi</last></author>
       <author><first>Bharathi Raja</first><last>Chakravarthi</last></author>
       <author><first>Pranav</first><last>Verma</last></author>
-      <author><first>Mihael</first><last>Arcan</last></author>
-      <author><first>John Philip</first><last>McCrae</last></author>
+      <author id="mihael-arcan"><first>Mihael</first><last>Arcan</last></author>
+      <author id="john-philip-mccrae"><first>John Philip</first><last>McCrae</last></author>
       <author><first>Paul</first><last>Buitelaar</last></author>
       <pages>7–13</pages>
       <abstract>Social media are interactive platforms that facilitate the creation or sharing of information, ideas or other forms of expression among people. This exchange is not free from offensive, trolling or malicious contents targeting users or communities. One way of trolling is by making memes, which in most cases combines an image with a concept or catchphrase. The challenge of dealing with memes is that they are region-specific and their meaning is often obscured in humour or sarcasm. To facilitate the computational modelling of trolling in the memes for Indian languages, we created a meme dataset for Tamil (TamilMemes). We annotated and released the dataset containing suspected trolls and not-troll memes. In this paper, we use the a image classification to address the difficulties involved in the classification of troll memes with the existing methods. We found that the identification of a troll meme with such an image classifier is not feasible which has been corroborated with precision, recall and F1-score.</abstract>
@@ -51,7 +51,7 @@
       <title><fixed-case>O</fixed-case>di<fixed-case>E</fixed-case>n<fixed-case>C</fixed-case>orp 2.0: <fixed-case>O</fixed-case>dia-<fixed-case>E</fixed-case>nglish Parallel Corpus for Machine Translation</title>
       <author><first>Shantipriya</first><last>Parida</last></author>
       <author><first>Satya Ranjan</first><last>Dash</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <author><first>Petr</first><last>Motlicek</last></author>
       <author><first>Priyanka</first><last>Pattnaik</last></author>
       <author><first>Debasish Kumar</first><last>Mallick</last></author>
@@ -63,8 +63,8 @@
     </paper>
     <paper id="4">
       <title>Handling Noun-Noun Coreference in <fixed-case>T</fixed-case>amil</title>
-      <author><first>Vijay</first><last>Sundar Ram</last></author>
-      <author><first>Sobha</first><last>Lalitha Devi</last></author>
+      <author id="vijay-sundar-ram"><first>Vijay</first><last>Sundar Ram</last></author>
+      <author id="sobha-lalitha-devi"><first>Sobha</first><last>Lalitha Devi</last></author>
       <pages>20–24</pages>
       <abstract>Natural language understanding by automatic tools is the vital requirement for document processing tools. To achieve it, automatic system has to understand the coherence in the text. Co-reference chains bring coherence to the text. The commonly occurring reference markers which bring cohesiveness are Pronominal, Reflexives, Reciprocals, Distributives, One-anaphors, Noun–noun reference. Here in this paper, we deal with noun-noun reference in Tamil. We present the methodology to resolve these noun-noun anaphors and also present the challenges in handling the noun-noun anaphoric relations in Tamil.</abstract>
       <url hash="91362146">2020.wildre-1.4</url>
@@ -95,7 +95,7 @@
     <paper id="7">
       <title><fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependency Treebanks for Low-Resource <fixed-case>I</fixed-case>ndian Languages: The Case of <fixed-case>B</fixed-case>hojpuri</title>
       <author><first>Atul Kr.</first><last>Ojha</last></author>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <pages>33–38</pages>
       <abstract>This paper presents the first dependency treebank for Bhojpuri, a resource-poor language that belongs to the Indo-Aryan language family. The objective behind the Bhojpuri Treebank (BHTB) project is to create a substantial, syntactically annotated treebank which not only acts as a valuable resource in building language technological tools, also helps in cross-lingual learning and typological research. Currently, the treebank consists of 4,881 annotated tokens in accordance with the annotation scheme of Universal Dependencies (UD). A Bhojpuri tagger and parser were created using machine learning approach. The accuracy of the model is 57.49% UAS, 45.50% LAS, 79.69% UPOS accuracy and 77.64% XPOS accuracy. The paper describes the details of the project including a discussion on linguistic analysis and annotation process of the Bhojpuri UD treebank.</abstract>
       <url hash="45a64efe">2020.wildre-1.7</url>
@@ -105,8 +105,8 @@
     <paper id="8">
       <title>A Fully Expanded Dependency Treebank for <fixed-case>T</fixed-case>elugu</title>
       <author><first>Sneha</first><last>Nallani</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
-      <author><first>Dipti</first><last>Sharma</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti</first><last>Sharma</last></author>
       <pages>39–44</pages>
       <abstract>Treebanks are an essential resource for syntactic parsing. The available Paninian dependency treebank(s) for Telugu is annotated only with inter-chunk dependency relations and not all words of a sentence are part of the parse tree. In this paper, we automatically annotate the intra-chunk dependencies in the treebank using a Shift-Reduce parser based on Context Free Grammar rules for Telugu chunks. We also propose a few additional intra-chunk dependency relations for Telugu apart from the ones used in Hindi treebank. Annotating intra-chunk dependencies finally provides a complete parse tree for every sentence in the treebank. Having a fully expanded treebank is crucial for developing end to end parsers which produce complete trees. We present a fully expanded dependency treebank for Telugu consisting of 3220 sentences. In this paper, we also convert the treebank annotated with Anncorra part-of-speech tagset to the latest BIS tagset. The BIS tagset is a hierarchical tagset adopted as a unified part-of-speech standard across all Indian Languages. The final treebank is made publicly available.</abstract>
       <url hash="d538693e">2020.wildre-1.8</url>
@@ -125,7 +125,7 @@
     <paper id="10">
       <title><fixed-case>P</fixed-case>olish Lexicon-Grammar Development Methodology as an Example for Application to other Languages</title>
       <author><first>Zygmunt</first><last>Vetulani</last></author>
-      <author><first>Grażyna</first><last>Vetulani</last></author>
+      <author id="grazyna-vetulani"><first>Grażyna</first><last>Vetulani</last></author>
       <pages>51–59</pages>
       <abstract>In the paper we present our methodology with the intention to propose it as a reference for creating lexicon-grammars. We share our long-term experience gained during research projects (past and on-going) concerning the description of Polish using this approach. The above-mentioned methodology, linking semantics and syntax, has revealed useful for various IT applications. Among other, we address this paper to researchers working on “less” or “middle-resourced” Indo-European languages as a proposal of a long term academic cooperation in the field. We believe that the confrontation of our lexicon-grammar methodology with other languages – Indo-European, but also Non-Indo-European languages of India, Ugro-Finish or Turkic languages in Eurasia – will allow for better understanding of the level of versatility of our approach and, last but not least, will create opportunities to intensify comparative studies. The reason of presenting some our works on language resources within the Wildre workshop is the intention not only to take up the challenge thrown down in the CFP of this workshop which is: “To provide opportunity for researchers from India to collaborate with researchers from other parts of the world”, but also to generalize this challenge to other languages.</abstract>
       <url hash="cc748602">2020.wildre-1.10</url>
@@ -145,7 +145,7 @@
     <paper id="12">
       <title>A Deeper Study on Features for Named Entity Recognition</title>
       <author><first>Malarkodi</first><last>C S</last></author>
-      <author><first>Sobha</first><last>Lalitha Devi</last></author>
+      <author id="sobha-lalitha-devi"><first>Sobha</first><last>Lalitha Devi</last></author>
       <pages>66–72</pages>
       <abstract>This paper deals with the various features used for the identification of named entities. The performance of the machine learning system heavily depends on the feature selection criteria. The intention to trace the essential features required for the development of named entity system across languages motivated us to conduct this study. The linguistic analysis was done to find out the part of speech patterns surrounding the context of named entities and from the observation linguistic oriented features are identified for both Indian and European languages. The Indian languages belongs to Dravidian language family such as Tamil, Telugu, Malayalam, Indo-Aryan language family such as Hindi, Punjabi, Bengali and Marathi, European languages such as English, Spanish, Dutch, German and Hungarian are used in this work. The machine learning technique CRFs was used for the system development. The experiments were conducted using the linguistic features and the results obtained for each languages are comparable with state-of-art systems.</abstract>
       <url hash="601c2e27">2020.wildre-1.12</url>
diff --git a/data/xml/2020.winlp.xml b/data/xml/2020.winlp.xml
index faf378fbc2..06e0edb622 100644
--- a/data/xml/2020.winlp.xml
+++ b/data/xml/2020.winlp.xml
@@ -66,8 +66,8 @@
     </paper>
     <paper id="5">
       <title>Large Vocabulary Read Speech Corpora for Four <fixed-case>E</fixed-case>thiopian Languages: <fixed-case>A</fixed-case>mharic, <fixed-case>T</fixed-case>igrigna, <fixed-case>O</fixed-case>romo, and <fixed-case>W</fixed-case>olaytta</title>
-      <author><first>Solomon Teferra</first><last>Abate</last></author>
-      <author><first>Martha Yifiru</first><last>Tachbelie</last></author>
+      <author id="solomon-teferra-abate"><first>Solomon Teferra</first><last>Abate</last></author>
+      <author id="martha-yifiru-tachbelie"><first>Martha Yifiru</first><last>Tachbelie</last></author>
       <author><first>Michael</first><last>Melese</last></author>
       <author><first>Hafte</first><last>Abera</last></author>
       <author><first>Tewodros</first><last>Gebreselassie</last></author>
@@ -75,7 +75,7 @@
       <author><first>Yaregal</first><last>Assabie</last></author>
       <author><first>Million Meshesha</first><last>Beyene</last></author>
       <author><first>Solomon</first><last>Atinafu</last></author>
-      <author><first>Binyam Ephrem</first><last>Seyoum</last></author>
+      <author id="binyam-ephrem-seyoum"><first>Binyam Ephrem</first><last>Seyoum</last></author>
       <pages>13–17</pages>
       <abstract>Automatic Speech Recognition (ASR) is one of the most important technologies to help people live a better life in the 21st century. However, its development requires a big speech corpus for a language. The development of such a corpus is expensive especially for under-resourced Ethiopian languages. To address this problem we have developed four medium-sized (longer than 22 hours each) speech corpora for four Ethiopian languages: Amharic, Tigrigna, Oromo, and Wolaytta. In a way of checking the usability of the corpora and deliver a baseline ASR for each language. In this paper, we present the corpora and the baseline ASR systems for each language. The word error rates (WERs) we achieved show that the corpora are usable for further investigation and we recommend the collection of text corpora to train strong language models for Oromo and Wolaytta compared to others.</abstract>
       <doi>10.18653/v1/2020.winlp-1.5</doi>
@@ -85,8 +85,8 @@
     <paper id="6">
       <title><fixed-case>SIMPLEX</fixed-case>-<fixed-case>PB</fixed-case> 2.0: A Reliable Dataset for Lexical Simplification in <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese</title>
       <author><first>Nathan</first><last>Hartmann</last></author>
-      <author><first>Gustavo Henrique</first><last>Paetzold</last></author>
-      <author><first>Sandra</first><last>Aluísio</last></author>
+      <author id="gustavo-paetzold"><first>Gustavo Henrique</first><last>Paetzold</last></author>
+      <author id="sandra-aluisio"><first>Sandra</first><last>Aluísio</last></author>
       <pages>18–22</pages>
       <abstract>Most research on Lexical Simplification (LS) addresses non-native speakers of English, since they are numerous and easy to recruit. This makes it difficult to create LS solutions for other languages and target audiences. This paper presents SIMPLEX-PB 2.0, a dataset for LS in Brazilian Portuguese that, unlike its predecessor SIMPLEX-PB, accurately captures the needs of Brazilian underprivileged children. To create SIMPLEX-PB 2.0, we addressed all limitations of the old SIMPLEX-PB through multiple rounds of manual annotation. As a result, SIMPLEX-PB 2.0 features much more reliable and numerous candidate substitutions to complex words, as well as word complexity rankings produced by a group underprivileged children.</abstract>
       <doi>10.18653/v1/2020.winlp-1.6</doi>
@@ -108,7 +108,7 @@
       <title>Effective questions in referential visual dialogue</title>
       <author><first>Mauricio</first><last>Mazuecos</last></author>
       <author><first>Alberto</first><last>Testoni</last></author>
-      <author><first>Raffaella</first><last>Bernardi</last></author>
+      <author id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last></author>
       <author><first>Luciana</first><last>Benotti</last></author>
       <pages>31–35</pages>
       <abstract>An interesting challenge for situated dialogue systems is referential visual dialog: by asking questions, the system has to identify the referent to which the user refers to. Task success is the standard metric used to evaluate these systems. However, it does not consider how effective each question is, that is how much each question contributes to the goal. We propose a new metric, that measures question effectiveness. As a preliminary study, we report the new metric for state of the art publicly available models on GuessWhat?!. Surprisingly, successful dialogues do not have a higher percentage of effective questions than failed dialogues. This suggests that a system with high task success is not necessarily one that generates good questions.</abstract>
@@ -139,7 +139,7 @@
     <paper id="13">
       <title>Variants of Vector Space Reductions for Predicting the Compositionality of <fixed-case>E</fixed-case>nglish Noun Compounds</title>
       <author><first>Pegah</first><last>Alipoormolabashi</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>51–54</pages>
       <abstract>Predicting the degree of compositionality of noun compounds is a crucial ingredient for lexicography and NLP applications, to know whether the compound should be treated as a whole, or through its constituents. Computational approaches for an automatic prediction typically represent compounds and their constituents within a vector space to have a numeric relatedness measure for the words. This paper provides a systematic evaluation of using different vector-space reduction variants for the prediction. We demonstrate that Word2vec and nouns-only dimensionality reductions are the most successful and stable vector space reduction variants for our task.</abstract>
       <doi>10.18653/v1/2020.winlp-1.13</doi>
@@ -190,7 +190,7 @@
     <paper id="19">
       <title>Can <fixed-case>W</fixed-case>ikipedia Categories Improve Masked Language Model Pretraining?</title>
       <author><first>Diksha</first><last>Meghwal</last></author>
-      <author><first>Katharina</first><last>Kann</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
       <author><first>Iacer</first><last>Calixto</last></author>
       <author><first>Stanislaw</first><last>Jastrzebski</last></author>
       <pages>78</pages>
@@ -243,8 +243,8 @@
     <paper id="25">
       <title>Towards Mitigating Gender Bias in a decoder-based Neural Machine Translation model by Adding Contextual Information</title>
       <author><first>Christine</first><last>Basta</last></author>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
-      <author><first>José A. R.</first><last>Fonollosa</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="jose-a-r-fonollosa"><first>José A. R.</first><last>Fonollosa</last></author>
       <pages>99–102</pages>
       <abstract>Gender bias negatively impacts many natural language processing applications, including machine translation (MT). The motivation behind this work is to study whether recent proposed MT techniques are significantly contributing to attenuate biases in document-level and gender-balanced data. For the study, we consider approaches of adding the previous sentence and the speaker information, implemented in a decoder-based neural MT system. We show improvements both in translation quality (+1 BLEU point) as well as in gender bias mitigation on WinoMT (+5% accuracy).</abstract>
       <doi>10.18653/v1/2020.winlp-1.25</doi>
@@ -285,8 +285,8 @@
     </paper>
     <paper id="29">
       <title>The human unlikeness of neural language models in next-word prediction</title>
-      <author><first>Cassandra L.</first><last>Jacobs</last></author>
-      <author><first>Arya D.</first><last>McCarthy</last></author>
+      <author id="cassandra-l-jacobs"><first>Cassandra L.</first><last>Jacobs</last></author>
+      <author id="arya-d-mccarthy"><first>Arya D.</first><last>McCarthy</last></author>
       <pages>115</pages>
       <abstract>The training objective of unidirectional language models (LMs) is similar to a psycholinguistic benchmark known as the cloze task, which measures next-word predictability. However, LMs lack the rich set of experiences that people do, and humans can be highly creative. To assess human parity in these models’ training objective, we compare the predictions of three neural language models to those of human participants in a freely available behavioral dataset (Luke &amp; Christianson, 2016). Our results show that while neural models show a close correspondence to human productions, they nevertheless assign insufficient probability to how often speakers guess upcoming words, especially for open-class content words.</abstract>
       <doi>10.18653/v1/2020.winlp-1.29</doi>
@@ -355,7 +355,7 @@
     <paper id="37">
       <title>Using Social Media For Bitcoin Day Trading Behavior Prediction</title>
       <author><first>Anna Paula</first><last>Pawlicka Maule</last></author>
-      <author><first>Kristen</first><last>Johnson</last></author>
+      <author id="kristen-johnson"><first>Kristen</first><last>Johnson</last></author>
       <pages>140–143</pages>
       <abstract>This abstract presents preliminary work in the application of natural language processing techniques and social network modeling for the prediction of cryptocurrency trading and investment behavior. Specifically, we are building models to use language and social network behaviors to predict if the tweets of a 24-hour period can be used to buy or sell cryptocurrency to make a profit. In this paper we present our novel task and initial language modeling studies.</abstract>
       <doi>10.18653/v1/2020.winlp-1.37</doi>
@@ -397,7 +397,7 @@
       <title>Enhanced <fixed-case>U</fixed-case>rdu Word Segmentation using Conditional Random Fields and Morphological Context Features</title>
       <author><first>Aamir</first><last>Farhan</last></author>
       <author><first>Mashrukh</first><last>Islam</last></author>
-      <author><first>Dipti Misra</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></author>
       <pages>156–159</pages>
       <abstract>Word segmentation is a fundamental task for most of the NLP applications. Urdu adopts Nastalique writing style which does not have a concept of space. Furthermore, the inherent non-joining attributes of certain characters in Urdu create spaces within a word while writing in digital format. Thus, Urdu not only has space omission but also space insertion issues which make the word segmentation task challenging. In this paper, we improve upon the results of Zia, Raza and Athar (2018) by using a manually annotated corpus of 19,651 sentences along with morphological context features. Using the Conditional Random Field sequence modeler, our model achieves F 1 score of 0.98 for word boundary identification and 0.92 for sub-word boundary identification tasks. The results demonstrated in this paper outperform the state-of-the-art methods.</abstract>
       <doi>10.18653/v1/2020.winlp-1.41</doi>
diff --git a/data/xml/2020.wmt.xml b/data/xml/2020.wmt.xml
index 9613f9566f..0cc5d9c4c4 100644
--- a/data/xml/2020.wmt.xml
+++ b/data/xml/2020.wmt.xml
@@ -4,25 +4,25 @@
     <meta>
       <booktitle>Proceedings of the Fifth Conference on Machine Translation</booktitle>
       <editor><first>Loïc</first><last>Barrault</last></editor>
-      <editor><first>Ondřej</first><last>Bojar</last></editor>
+      <editor id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></editor>
       <editor><first>Fethi</first><last>Bougares</last></editor>
-      <editor><first>Rajen</first><last>Chatterjee</last></editor>
-      <editor><first>Marta R.</first><last>Costa-jussà</last></editor>
+      <editor id="rajen-chatterjee"><first>Rajen</first><last>Chatterjee</last></editor>
+      <editor id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></editor>
       <editor><first>Christian</first><last>Federmann</last></editor>
       <editor><first>Mark</first><last>Fishel</last></editor>
-      <editor><first>Alexander</first><last>Fraser</last></editor>
+      <editor id="alexander-fraser"><first>Alexander</first><last>Fraser</last></editor>
       <editor><first>Yvette</first><last>Graham</last></editor>
       <editor><first>Paco</first><last>Guzman</last></editor>
       <editor><first>Barry</first><last>Haddow</last></editor>
       <editor><first>Matthias</first><last>Huck</last></editor>
-      <editor><first>Antonio Jimeno</first><last>Yepes</last></editor>
+      <editor id="antonio-jimeno-yepes"><first>Antonio Jimeno</first><last>Yepes</last></editor>
       <editor><first>Philipp</first><last>Koehn</last></editor>
-      <editor><first>André</first><last>Martins</last></editor>
+      <editor id="andre-f-t-martins"><first>André</first><last>Martins</last></editor>
       <editor><first>Makoto</first><last>Morishita</last></editor>
       <editor><first>Christof</first><last>Monz</last></editor>
       <editor><first>Masaaki</first><last>Nagata</last></editor>
       <editor><first>Toshiaki</first><last>Nakazawa</last></editor>
-      <editor><first>Matteo</first><last>Negri</last></editor>
+      <editor id="matteo-negri"><first>Matteo</first><last>Negri</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Online</address>
       <month>November</month>
@@ -76,11 +76,11 @@
     </paper>
     <paper id="3">
       <title>Findings of the <fixed-case>WMT</fixed-case> 2020 Shared Task on Chat Translation</title>
-      <author><first>M. Amin</first><last>Farajian</last></author>
+      <author id="m-amin-farajian"><first>M. Amin</first><last>Farajian</last></author>
       <author><first>António V.</first><last>Lopes</last></author>
       <author><first>André F. T.</first><last>Martins</last></author>
       <author><first>Sameen</first><last>Maruf</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>65–75</pages>
       <abstract>We report the results of the first edition of the WMT shared task on chat translation. The task consisted of translating bilingual conversational text, in particular customer support chats for the English-German language pair (English agent, German customer). This task varies from the other translation shared tasks, i.e. news and biomedical, mainly due to the fact that the conversations are bilingual, less planned, more informal, and often ungrammatical. Furthermore, such conversations are usually characterized by shorter and simpler sentences and contain more pronouns. We received 14 submissions from 6 participating teams, all of them covering both directions, i.e. En-&gt;De for agent utterances and De-&gt;En for customer messages. We used automatic metrics (BLEU and TER) for evaluating the translations of both agent and customer messages and human document-level direct assessments (DDA) to evaluate the agent translations.</abstract>
       <url hash="26f9356c">2020.wmt-1.3</url>
@@ -93,7 +93,7 @@
       <author><first>Zhenhao</first><last>Li</last></author>
       <author><first>Juan</first><last>Pino</last></author>
       <author><first>Vishrav</first><last>Chaudhary</last></author>
-      <author><first>Francisco</first><last>Guzmán</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzmán</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
       <author><first>Nadir</first><last>Durrani</last></author>
       <author><first>Yonatan</first><last>Belinkov</last></author>
@@ -112,8 +112,8 @@
       <author><first>Rachel</first><last>Bawden</last></author>
       <author><first>Alexandra</first><last>Birch</last></author>
       <author><first>Radina</first><last>Dobreva</last></author>
-      <author><first>Arturo</first><last>Oncevay</last></author>
-      <author><first>Antonio Valerio</first><last>Miceli Barone</last></author>
+      <author id="arturo-oncevay"><first>Arturo</first><last>Oncevay</last></author>
+      <author id="antonio-valerio-miceli-barone"><first>Antonio Valerio</first><last>Miceli Barone</last></author>
       <author><first>Philip</first><last>Williams</last></author>
       <pages>92–99</pages>
       <abstract>We describe the University of Edinburgh’s submissions to the WMT20 news translation shared task for the low resource language pair English-Tamil and the mid-resource language pair English-Inuktitut. We use the neural machine translation transformer architecture for all submissions and explore a variety of techniques to improve translation quality to compensate for the lack of parallel training data. For the very low-resource English-Tamil, this involves exploring pretraining, using both language model objectives and translation using an unrelated high-resource language pair (German-English), and iterative backtranslation. For English-Inuktitut, we explore the use of multilingual systems, which, despite not being part of the primary submission, would have achieved the best results on the test set.</abstract>
@@ -167,7 +167,7 @@
       <title>Linguistically Motivated Subwords for <fixed-case>E</fixed-case>nglish-<fixed-case>T</fixed-case>amil Translation: <fixed-case>U</fixed-case>niversity of <fixed-case>G</fixed-case>roningen’s Submission to <fixed-case>WMT</fixed-case>-2020</title>
       <author><first>Prajit</first><last>Dhar</last></author>
       <author><first>Arianna</first><last>Bisazza</last></author>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <pages>126–133</pages>
       <abstract>This paper describes our submission for the English-Tamil news translation task of WMT-2020. The various techniques and Neural Machine Translation (NMT) models used by our team are presented and discussed, including back-translation, fine-tuning and word dropout. Additionally, our experiments show that using a linguistically motivated subword segmentation technique (Ataman et al., 2017) does not consistently outperform the more widely used, non-linguistically motivated SentencePiece algorithm (Kudo and Richardson, 2018), despite the agglutinative nature of Tamil morphology.</abstract>
       <url hash="ea075060">2020.wmt-1.9</url>
@@ -178,7 +178,7 @@
       <title>The <fixed-case>TALP</fixed-case>-<fixed-case>UPC</fixed-case> System Description for <fixed-case>WMT</fixed-case>20 News Translation Task: Multilingual Adaptation for Low Resource <fixed-case>MT</fixed-case></title>
       <author><first>Carlos</first><last>Escolano</last></author>
       <author><first>Marta R.</first><last>Costa-jussà</last></author>
-      <author><first>José A. R.</first><last>Fonollosa</last></author>
+      <author id="jose-a-r-fonollosa"><first>José A. R.</first><last>Fonollosa</last></author>
       <pages>134–138</pages>
       <abstract>In this article, we describe the TALP-UPC participation in the WMT20 news translation shared task for Tamil-English. Given the low amount of parallel training data, we resort to adapt the task to a multilingual system to benefit from the positive transfer from high resource languages. We use iterative backtranslation to fine-tune the system and benefit from the monolingual data available. In order to measure the effectivity of such methods, we compare our results to a bilingual baseline system.</abstract>
       <url hash="fc8623ea">2020.wmt-1.10</url>
@@ -233,8 +233,8 @@
     </paper>
     <paper id="15">
       <title>Tilde at <fixed-case>WMT</fixed-case> 2020: News Task Systems</title>
-      <author><first>Rihards</first><last>Krišlauks</last></author>
-      <author><first>Mārcis</first><last>Pinnis</last></author>
+      <author id="rihards-krislauks"><first>Rihards</first><last>Krišlauks</last></author>
+      <author id="marcis-pinnis"><first>Mārcis</first><last>Pinnis</last></author>
       <pages>175–180</pages>
       <abstract>This paper describes Tilde’s submission to the WMT2020 shared task on news translation for both directions of the English-Polish language pair in both the constrained and the unconstrained tracks. We follow our submissions form the previous years and build our baseline systems to be morphologically motivated sub-word unit-based Transformer base models that we train using the Marian machine translation toolkit. Additionally, we experiment with different parallel and monolingual data selection schemes, as well as sampled back-translation. Our final models are ensembles of Transformer base and Transformer big models which feature right-to-left re-ranking.</abstract>
       <url hash="f3fb101e">2020.wmt-1.15</url>
@@ -284,7 +284,7 @@
       <author><first>Vikrant</first><last>Goyal</last></author>
       <author><first>Anoop</first><last>Kunchukuttan</last></author>
       <author><first>Rahul</first><last>Kejriwal</last></author>
-      <author><first>Siddharth</first><last>Jain</last></author>
+      <author id="siddharth-jain"><first>Siddharth</first><last>Jain</last></author>
       <author><first>Amit</first><last>Bhagwat</last></author>
       <pages>202–206</pages>
       <abstract>We describe our submission for the English→Tamil and Tamil→English news translation shared task. In this submission, we focus on exploring if a low-resource language (Tamil) can benefit from a high-resource language (Hindi) with which it shares contact relatedness. We show utilizing contact relatedness via multilingual NMT can significantly improve translation quality for English-Tamil translation.</abstract>
@@ -295,7 +295,7 @@
     <paper id="20">
       <title>The <fixed-case>AFRL</fixed-case> <fixed-case>WMT</fixed-case>20 News Translation Systems</title>
       <author><first>Jeremy</first><last>Gwinnup</last></author>
-      <author><first>Tim</first><last>Anderson</last></author>
+      <author id="tim-anderson"><first>Tim</first><last>Anderson</last></author>
       <pages>207–212</pages>
       <abstract>This report summarizes the Air Force Research Laboratory (AFRL) machine translation (MT) systems submitted to the news-translation task as part of the 2020 Conference on Machine Translation (WMT20) evaluation campaign. This year we largely repurpose strategies from previous years’ efforts with larger datasets and also train models with precomputed word alignments under various settings in an effort to improve translation quality.</abstract>
       <url hash="f1fc779f">2020.wmt-1.20</url>
@@ -318,7 +318,7 @@
       <author><first>Rui</first><last>Wang</last></author>
       <author><first>Kehai</first><last>Chen</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>218–229</pages>
       <abstract>In this paper, we introduced our joint team SJTU-NICT ‘s participation in the WMT 2020 machine translation shared task. In this shared task, we participated in four translation directions of three language pairs: English-Chinese, English-Polish on supervised machine translation track, German-Upper Sorbian on low-resource and unsupervised machine translation tracks. Based on different conditions of language pairs, we have experimented with diverse neural machine translation (NMT) techniques: document-enhanced NMT, XLM pre-trained language model enhanced NMT, bidirectional translation as a pre-training, reference language based UNMT, data-dependent gaussian prior objective, and BT-BLEU collaborative filtering self-training. We also used the TF-IDF algorithm to filter the training set to obtain a domain more similar set with the test set for finetuning. In our submissions, the primary systems won the first place on English to Chinese, Polish to English, and German to Upper Sorbian translation directions.</abstract>
       <url hash="65d90d87">2020.wmt-1.22</url>
@@ -328,7 +328,7 @@
     <paper id="23">
       <title>Combination of Neural Machine Translation Systems at <fixed-case>WMT</fixed-case>20</title>
       <author><first>Benjamin</first><last>Marie</last></author>
-      <author><first>Raphael</first><last>Rubino</last></author>
+      <author id="raphael-rubino"><first>Raphael</first><last>Rubino</last></author>
       <author><first>Atsushi</first><last>Fujita</last></author>
       <pages>230–238</pages>
       <abstract>This paper presents neural machine translation systems and their combination built for the WMT20 English-Polish and Japanese-&gt;English translation tasks. We show that using a Transformer Big architecture, additional training data synthesized from monolingual data, and combining many NMT systems through n-best list reranking improve translation quality. However, while we observed such improvements on the validation data, we did not observed similar improvements on the test data. Our analysis reveals that the presence of translationese texts in the validation data led us to take decisions in building NMT systems that were not optimal to obtain the best results on the test data.</abstract>
@@ -366,13 +366,13 @@
     <paper id="26">
       <title>e<fixed-case>T</fixed-case>ranslation’s Submissions to the <fixed-case>WMT</fixed-case> 2020 News Translation Task</title>
       <author><first>Csaba</first><last>Oravecz</last></author>
-      <author><first>Katina</first><last>Bontcheva</last></author>
-      <author><first>László</first><last>Tihanyi</last></author>
-      <author><first>David</first><last>Kolovratnik</last></author>
+      <author id="katina-bontcheva"><first>Katina</first><last>Bontcheva</last></author>
+      <author id="laszlo-tihanyi"><first>László</first><last>Tihanyi</last></author>
+      <author id="david-kolovratnik"><first>David</first><last>Kolovratnik</last></author>
       <author><first>Bhavani</first><last>Bhaskar</last></author>
       <author><first>Adrien</first><last>Lardilleux</last></author>
       <author><first>Szymon</first><last>Klocek</last></author>
-      <author><first>Andreas</first><last>Eisele</last></author>
+      <author id="andreas-eisele"><first>Andreas</first><last>Eisele</last></author>
       <pages>254–261</pages>
       <abstract>The paper describes the submissions of the eTranslation team to the WMT 2020 news translation shared task. Leveraging the experience from the team’s participation last year we developed systems for 5 language pairs with various strategies. Compared to last year, for some language pairs we dedicated a lot more resources to training, and tried to follow standard best practices to build competitive systems which can achieve good results in the rankings. By using deep and complex architectures we sacrificed direct re-usability of our systems in production environments but evaluation showed that this approach could result in better models that significantly outperform baseline architectures. We submitted two systems to the zero shot robustness task. These submissions are described briefly in this paper as well.</abstract>
       <url hash="9939e8bd">2020.wmt-1.26</url>
@@ -516,8 +516,8 @@
       <author><first>Agustin</first><last>Dal Lago</last></author>
       <author><first>Yotam</first><last>Doron</last></author>
       <author><first>Susannah</first><last>Young</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <pages>326–337</pages>
       <abstract>This paper describes the DeepMind submission to the Chinese<tex-math>\rightarrow</tex-math>English constrained data track of the WMT2020 Shared Task on News Translation. The submission employs a noisy channel factorization as the backbone of a document translation system. This approach allows the flexible combination of a number of independent component models which are further augmented with back-translation, distillation, fine-tuning with in-domain data, Monte-Carlo Tree Search decoding, and improved uncertainty estimation. In order to address persistent issues with the premature truncation of long sequences we included specialized length models and sentence segmentation techniques. Our final system provides a 9.9 BLEU points improvement over a baseline Transformer on our test set (newstest 2019).</abstract>
       <url hash="e3567ca1">2020.wmt-1.36</url>
@@ -578,7 +578,7 @@
       <title>The <fixed-case>MUCOW</fixed-case> word sense disambiguation test suite at <fixed-case>WMT</fixed-case> 2020</title>
       <author><first>Yves</first><last>Scherrer</last></author>
       <author><first>Alessandro</first><last>Raganato</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>365–370</pages>
       <abstract>This paper reports on our participation with the MUCOW test suite at the WMT 2020 news translation task. We introduced MUCOW at WMT 2019 to measure the ability of MT systems to perform word sense disambiguation (WSD), i.e., to translate an ambiguous word with its correct sense. MUCOW is created automatically using existing resources, and the evaluation process is also entirely automated. We evaluate all participating systems of the language pairs English -&gt; Czech, English -&gt; German, and English -&gt; Russian and compare the results with those obtained at WMT 2019. While current NMT systems are fairly good at handling ambiguous source words, we could not identify any substantial progress - at least to the extent that it is measurable by the MUCOW method - in that area over the last year.</abstract>
       <url hash="4ca2c794">2020.wmt-1.40</url>
@@ -623,7 +623,7 @@
       <author><first>Amit</first><last>Kumar</last></author>
       <author><first>Rupjyoti</first><last>Baruah</last></author>
       <author><first>Rajesh Kumar</first><last>Mundotiya</last></author>
-      <author><first>Anil Kumar</first><last>Singh</last></author>
+      <author id="anil-kumar-singh"><first>Anil Kumar</first><last>Singh</last></author>
       <pages>393–395</pages>
       <abstract>This paper reports the results for the Machine Translation (MT) system submitted by the NLPRL team for the Hindi – Marathi Similar Translation Task at WMT 2020. We apply the Transformer-based Neural Machine Translation (NMT) approach on both translation directions for this language pair. The trained model is evaluated on the corpus provided by shared task organizers, using BLEU, RIBES, and TER scores. There were a total of 23 systems submitted for Marathi to Hindi and 21 systems submitted for Hindi to Marathi in the shared task. Out of these, our submission ranked 6th and 9th, respectively.</abstract>
       <url hash="e040232a">2020.wmt-1.44</url>
@@ -635,7 +635,7 @@
       <author><first>Sahinur Rahman</first><last>Laskar</last></author>
       <author><first>Abdullah Faiz Ur Rahman</first><last>Khilji</last></author>
       <author><first>Partha</first><last>Pakray</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>396–401</pages>
       <abstract>Machine Translation (MT) is a vital tool for aiding communication between linguistically separate groups of people. The neural machine translation (NMT) based approaches have gained widespread acceptance because of its outstanding performance. We have participated in WMT20 shared task of similar language translation on Hindi-Marathi pair. The main challenge of this task is by utilization of monolingual data and similarity features of similar language pair to overcome the limitation of available parallel data. In this work, we have implemented NMT based model that simultaneously learns bilingual embedding from both the source and target language pairs. Our model has achieved Hindi to Marathi bilingual evaluation understudy (BLEU) score of 11.59, rank-based intuitive bilingual evaluation score (RIBES) score of 57.76 and translation edit rate (TER) score of 79.07 and Marathi to Hindi BLEU score of 15.44, RIBES score of 61.13 and TER score of 75.96.</abstract>
       <url hash="cc08664f">2020.wmt-1.45</url>
@@ -667,7 +667,7 @@
     <paper id="48">
       <title><fixed-case>NMT</fixed-case> based Similar Language Translation for <fixed-case>H</fixed-case>indi - <fixed-case>M</fixed-case>arathi</title>
       <author><first>Vandan</first><last>Mujadia</last></author>
-      <author><first>Dipti</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti</first><last>Sharma</last></author>
       <pages>414–417</pages>
       <abstract>This paper describes the participation of team F1toF6 (LTRC, IIIT-Hyderabad) for the WMT 2020 task, similar language translation. We experimented with attention based recurrent neural network architecture (seq2seq) for this task. We explored the use of different linguistic features like POS and Morph along with back translation for Hindi-Marathi and Marathi-Hindi machine translation.</abstract>
       <url hash="83fd0ffd">2020.wmt-1.48</url>
@@ -675,12 +675,12 @@
     </paper>
     <paper id="49">
       <title><fixed-case>NUIG</fixed-case>-Panlingua-<fixed-case>KMI</fixed-case> <fixed-case>H</fixed-case>indi-<fixed-case>M</fixed-case>arathi <fixed-case>MT</fixed-case> Systems for Similar Language Translation Task @ <fixed-case>WMT</fixed-case> 2020</title>
-      <author><first>Atul Kr.</first><last>Ojha</last></author>
+      <author id="atul-kr-ojha"><first>Atul Kr.</first><last>Ojha</last></author>
       <author><first>Priya</first><last>Rani</last></author>
       <author><first>Akanksha</first><last>Bansal</last></author>
       <author><first>Bharathi Raja</first><last>Chakravarthi</last></author>
       <author><first>Ritesh</first><last>Kumar</last></author>
-      <author><first>John P.</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></author>
       <pages>418–423</pages>
       <abstract>NUIG-Panlingua-KMI submission to WMT 2020 seeks to push the state-of-the-art in Similar Language Translation Task for Hindi↔Marathi language pair. As part of these efforts, we conducteda series of experiments to address the challenges for translation between similar languages. Among the 4 MT systems prepared under this task, 1 PBSMT systems were prepared for Hindi↔Marathi each and 1 NMT systems were developed for Hindi↔Marathi using Byte PairEn-coding (BPE) into subwords. The results show that different architectures NMT could be an effective method for developing MT systems for closely related languages. Our Hindi-Marathi NMT system was ranked 8th among the 14 teams that participated and our Marathi-Hindi NMT system was ranked 8th among the 11 teams participated for the task.</abstract>
       <url hash="4d53a2ad">2020.wmt-1.49</url>
@@ -698,7 +698,7 @@
     </paper>
     <paper id="51">
       <title>Neural Machine Translation between similar <fixed-case>S</fixed-case>outh-<fixed-case>S</fixed-case>lavic languages</title>
-      <author><first>Maja</first><last>Popović</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
       <author><first>Alberto</first><last>Poncelas</last></author>
       <pages>430–436</pages>
       <abstract>This paper describes the ADAPT-DCU machine translation systems built for the WMT 2020 shared task on Similar Language Translation. We explored several set-ups for NMT for Croatian–Slovenian and Serbian–Slovenian language pairs in both translation directions. Our experiments focus on different amounts and types of training data: we first apply basic filtering on the OpenSubtitles training corpora, then we perform additional cleaning of remaining misaligned segments based on character n-gram matching. Finally, we make use of additional monolingual data by creating synthetic parallel data through back-translation. Automatic evaluation shows that multilingual systems with joint Serbian and Croatian data are better than bilingual, as well as that character-based cleaning leads to improved scores while using less data. The results also confirm once more that adding back-translated data further improves the performance, especially when the synthetic data is similar to the desired domain of the development and test set. This, however, might come at a price of prolonged training time, especially for multitarget systems.</abstract>
@@ -743,7 +743,7 @@
     <paper id="55">
       <title>A3-108 Machine Translation System for Similar Language Translation Shared Task 2020</title>
       <author><first>Saumitra</first><last>Yadav</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>451–455</pages>
       <abstract>In this paper, we describe our submissions for Similar Language Translation Shared Task 2020. We built 12 systems in each direction for Hindi⇐⇒Marathi language pair. This paper outlines initial baseline experiments with various tokenization schemes to train statistical models. Using optimal tokenization scheme among these we created synthetic source side text with back translation. And prune synthetic text with language model scores. This synthetic data was then used along with training data in various settings to build translation models. We also report configuration of the submitted systems and results produced by them.</abstract>
       <url hash="f774e4f8">2020.wmt-1.55</url>
@@ -835,9 +835,9 @@
     </paper>
     <paper id="63">
       <title>Priming Neural Machine Translation</title>
-      <author><first>Minh Quang</first><last>Pham</last></author>
+      <author id="minh-quang-pham"><first>Minh Quang</first><last>Pham</last></author>
       <author><first>Jitao</first><last>Xu</last></author>
-      <author><first>Josep</first><last>Crego</last></author>
+      <author id="josep-m-crego"><first>Josep</first><last>Crego</last></author>
       <author><first>François</first><last>Yvon</last></author>
       <author><first>Jean</first><last>Senellart</last></author>
       <pages>516–527</pages>
@@ -848,7 +848,7 @@
     </paper>
     <paper id="64">
       <title>Subword Segmentation and a Single Bridge Language Affect Zero-Shot Neural Machine Translation</title>
-      <author><first>Annette</first><last>Rios</last></author>
+      <author id="annette-rios-gonzales"><first>Annette</first><last>Rios</last></author>
       <author><first>Mathias</first><last>Müller</last></author>
       <author><first>Rico</first><last>Sennrich</last></author>
       <pages>528–537</pages>
@@ -932,7 +932,7 @@
       <author><first>Yingbo</first><last>Gao</last></author>
       <author><first>Leonard</first><last>Dahlmann</last></author>
       <author><first>Shahram</first><last>Khadivi</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>604–616</pages>
       <abstract>Context-aware neural machine translation (NMT) is a promising direction to improve the translation quality by making use of the additional context, e.g., document-level translation, or having meta-information. Although there exist various architectures and analyses, the effectiveness of different context-aware NMT models is not well explored yet. This paper analyzes the performance of document-level NMT models on four diverse domains with a varied amount of parallel document-level bilingual data. We conduct a comprehensive set of experiments to investigate the impact of document-level NMT. We find that there is no single best approach to document-level NMT, but rather that different architectures come out on top on different tasks. Looking at task-specific problems, such as pronoun resolution or headline translation, we find improvements in the context-aware systems, even in cases where the corpus-level metrics like BLEU show no significant improvement. We also show that document-level back-translation significantly helps to compensate for the lack of document-level bi-texts.</abstract>
       <url hash="179ef0ee">2020.wmt-1.71</url>
@@ -942,8 +942,8 @@
     </paper>
     <paper id="72">
       <title>A Study of Residual Adapters for Multi-Domain Neural Machine Translation</title>
-      <author><first>Minh Quang</first><last>Pham</last></author>
-      <author><first>Josep Maria</first><last>Crego</last></author>
+      <author id="minh-quang-pham"><first>Minh Quang</first><last>Pham</last></author>
+      <author id="josep-m-crego"><first>Josep Maria</first><last>Crego</last></author>
       <author><first>François</first><last>Yvon</last></author>
       <author><first>Jean</first><last>Senellart</last></author>
       <pages>617–628</pages>
@@ -956,7 +956,7 @@
       <title>Mitigating Gender Bias in Machine Translation with Target Gender Annotations</title>
       <author><first>Artūrs</first><last>Stafanovičs</last></author>
       <author><first>Toms</first><last>Bergmanis</last></author>
-      <author><first>Mārcis</first><last>Pinnis</last></author>
+      <author id="marcis-pinnis"><first>Mārcis</first><last>Pinnis</last></author>
       <pages>629–638</pages>
       <abstract>When translating “The secretary asked for details.” to a language with grammatical gender, it might be necessary to determine the gender of the subject “secretary”. If the sentence does not contain the necessary information, it is not always possible to disambiguate. In such cases, machine translation systems select the most common translation option, which often corresponds to the stereotypical translations, thus potentially exacerbating prejudice and marginalisation of certain groups and people. We argue that the information necessary for an adequate translation can not always be deduced from the sentence being translated or even might depend on external knowledge. Therefore, in this work, we propose to decouple the task of acquiring the necessary information from the task of learning to translate correctly when such information is available. To that end, we present a method for training machine translation systems to use word-level annotations containing information about subject’s gender. To prepare training data, we annotate regular source language words with grammatical gender information of the corresponding target language words. Using such data to train machine translation systems reduces their reliance on gender stereotypes when information about the subject’s gender is available. Our experiments on five language pairs show that this allows improving accuracy on the WinoMT test set by up to 25.8 percentage points.</abstract>
       <url hash="d841f0d6">2020.wmt-1.73</url>
@@ -991,15 +991,15 @@
     <paper id="76">
       <title>Findings of the <fixed-case>WMT</fixed-case> 2020 Biomedical Translation Shared Task: <fixed-case>B</fixed-case>asque, <fixed-case>I</fixed-case>talian and <fixed-case>R</fixed-case>ussian as New Additional Languages</title>
       <author><first>Rachel</first><last>Bawden</last></author>
-      <author><first>Giorgio Maria</first><last>Di Nunzio</last></author>
+      <author id="giorgio-maria-di-nunzio"><first>Giorgio Maria</first><last>Di Nunzio</last></author>
       <author><first>Cristian</first><last>Grozea</last></author>
       <author><first>Inigo</first><last>Jauregi Unanue</last></author>
       <author><first>Antonio</first><last>Jimeno Yepes</last></author>
       <author><first>Nancy</first><last>Mah</last></author>
-      <author><first>David</first><last>Martinez</last></author>
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="david-martinez"><first>David</first><last>Martinez</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <author><first>Mariana</first><last>Neves</last></author>
-      <author><first>Maite</first><last>Oronoz</last></author>
+      <author id="maite-oronoz"><first>Maite</first><last>Oronoz</last></author>
       <author><first>Olatz</first><last>Perez-de-Viñaspre</last></author>
       <author><first>Massimo</first><last>Piccardi</last></author>
       <author><first>Roland</first><last>Roller</last></author>
@@ -1034,7 +1034,7 @@
       <author><first>Ahmed</first><last>El-Kishky</last></author>
       <author><first>Naman</first><last>Goyal</last></author>
       <author><first>Peng-Jen</first><last>Chen</last></author>
-      <author><first>Francisco</first><last>Guzmán</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzmán</last></author>
       <pages>726–742</pages>
       <abstract>Following two preceding WMT Shared Task on Parallel Corpus Filtering (Koehn et al., 2018, 2019), we posed again the challenge of assigning sentence-level quality scores for very noisy corpora of sentence pairs crawled from the web, with the goal of sub-selecting the highest-quality data to be used to train ma-chine translation systems. This year, the task tackled the low resource condition of Pashto–English and Khmer–English and also included the challenge of sentence alignment from document pairs.</abstract>
       <url hash="05380a7e">2020.wmt-1.78</url>
@@ -1044,11 +1044,11 @@
     <paper id="79">
       <title>Findings of the <fixed-case>WMT</fixed-case> 2020 Shared Task on Quality Estimation</title>
       <author><first>Lucia</first><last>Specia</last></author>
-      <author><first>Frédéric</first><last>Blain</last></author>
+      <author id="frederic-blain"><first>Frédéric</first><last>Blain</last></author>
       <author><first>Marina</first><last>Fomicheva</last></author>
-      <author><first>Erick</first><last>Fonseca</last></author>
+      <author id="erick-fonseca"><first>Erick</first><last>Fonseca</last></author>
       <author><first>Vishrav</first><last>Chaudhary</last></author>
-      <author><first>Francisco</first><last>Guzmán</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzmán</last></author>
       <author><first>André F. T.</first><last>Martins</last></author>
       <pages>743–764</pages>
       <abstract>We report the results of the WMT20 shared task on Quality Estimation, where the challenge is to predict the quality of the output of neural machine translation systems at the word, sentence and document levels. This edition included new data with open domain texts, direct assessment annotations, and multiple language pairs: English-German, English-Chinese, Russian-English, Romanian-English, Estonian-English, Sinhala-English and Nepali-English data for the sentence-level subtasks, English-German and English-Chinese for the word-level subtask, and English-French data for the document-level subtask. In addition, we made neural machine translation models available to participants. 19 participating teams from 27 institutions submitted altogether 1374 systems to different task variants and language pairs.</abstract>
@@ -1080,7 +1080,7 @@
       <author><first>WonKee</first><last>Lee</last></author>
       <author><first>Jaehun</first><last>Shin</last></author>
       <author><first>Baikjin</first><last>Jung</last></author>
-      <author><first>Young-Kil</first><last>Kim</last></author>
+      <author id="young-gil-kim"><first>Young-Kil</first><last>Kim</last></author>
       <author><first>Jong-Hyeok</first><last>Lee</last></author>
       <pages>777–782</pages>
       <abstract>This paper describes POSTECH-ETRI’s submission to WMT2020 for the shared task on automatic post-editing (APE) for 2 language pairs: English-German (En-De) and English-Chinese (En-Zh). We propose APE systems based on a cross-lingual language model, which jointly adopts translation language modeling (TLM) and masked language modeling (MLM) training objectives in the pre-training stage; the APE models then utilize jointly learned language representations between the source language and the target language. In addition, we created 19 million new sythetic triplets as additional training data for our final ensemble model. According to experimental results on the WMT2020 APE development data set, our models showed an improvement over the baseline by TER of -3.58 and a BLEU score of +5.3 for the En-De subtask; and TER of -5.29 and a BLEU score of +7.32 for the En-Zh subtask.</abstract>
@@ -1140,7 +1140,7 @@
       <title><fixed-case>LIMSI</fixed-case> @ <fixed-case>WMT</fixed-case> 2020</title>
       <author><first>Sadaf</first><last>Abdul Rauf</last></author>
       <author><first>José Carlos</first><last>Rosales Núñez</last></author>
-      <author><first>Minh Quang</first><last>Pham</last></author>
+      <author id="minh-quang-pham"><first>Minh Quang</first><last>Pham</last></author>
       <author><first>François</first><last>Yvon</last></author>
       <pages>803–812</pages>
       <abstract>This paper describes LIMSI’s submissions to the translation shared tasks at WMT’20. This year we have focused our efforts on the biomedical translation task, developing a resource-heavy system for the translation of medical abstracts from English into French, using back-translated texts, terminological resources as well as multiple pre-processing pipelines, including pre-trained representations. Systems were also prepared for the robustness task for translating from English into German; for this large-scale task we developed multi-domain, noise-robust, translation systems aim to handle the two test conditions: zero-shot and few-shot domain adaptation.</abstract>
@@ -1151,7 +1151,7 @@
     <paper id="87">
       <title>Elhuyar submission to the Biomedical Translation Task 2020 on terminology and abstracts translation</title>
       <author><first>Ander</first><last>Corral</last></author>
-      <author><first>Xabier</first><last>Saralegi</last></author>
+      <author id="xabier-saralegi"><first>Xabier</first><last>Saralegi</last></author>
       <pages>813–819</pages>
       <abstract>This article describes the systems submitted by Elhuyar to the 2020 Biomedical Translation Shared Task, specifically the systems presented in the subtasks of terminology translation for English-Basque and abstract translation for English-Basque and English-Spanish. In all cases a Transformer architecture was chosen and we studied different strategies to combine open domain data with biomedical domain data for building the training corpora. For the English-Basque pair, given the scarcity of parallel corpora in the biomedical domain, we set out to create domain training data in a synthetic way. The systems presented in the terminology and abstract translation subtasks for the English-Basque language pair ranked first in their respective tasks among four participants, achieving 0.78 accuracy for terminology translation and a BLEU of 0.1279 for the translation of abstracts. In the abstract translation task for the English-Spanish pair our team ranked second (BLEU=0.4498) in the case of OK sentences.</abstract>
       <url hash="1120ae61">2020.wmt-1.87</url>
@@ -1254,8 +1254,8 @@
       <title>Ixamed’s submission description for <fixed-case>WMT</fixed-case>20 Biomedical shared task: benefits and limitations of using terminologies for domain adaptation</title>
       <author><first>Xabier</first><last>Soto</last></author>
       <author><first>Olatz</first><last>Perez-de-Viñaspre</last></author>
-      <author><first>Gorka</first><last>Labaka</last></author>
-      <author><first>Maite</first><last>Oronoz</last></author>
+      <author id="gorka-labaka"><first>Gorka</first><last>Labaka</last></author>
+      <author id="maite-oronoz"><first>Maite</first><last>Oronoz</last></author>
       <pages>875–880</pages>
       <abstract>In this paper we describe the systems developed at Ixa for our participation in WMT20 Biomedical shared task in three language pairs, en-eu, en-es and es-en. When defining our approach, we have put the focus on making an efficient use of corpora recently compiled for training Machine Translation (MT) systems to translate Covid-19 related text, as well as reusing previously compiled corpora and developed systems for biomedical or clinical domain. Regarding the techniques used, we base on the findings from our previous works for translating clinical texts into Basque, making use of clinical terminology for adapting the MT systems to the clinical domain. However, after manually inspecting some of the outputs generated by our systems, for most of the submissions we end up using the system trained only with the basic corpus, since the systems including the clinical terminologies generated outputs shorter in length than the corresponding references. Thus, we present simple baselines for translating abstracts between English and Spanish (en/es); while for translating abstracts and terms from English into Basque (en-eu), we concatenate the best en-es system for each kind of text with our es-eu system. We present automatic evaluation results in terms of BLEU scores, and analyse the effect of including clinical terminology on the average sentence length of the generated outputs. Following the recent recommendations for a responsible use of GPUs for NLP research, we include an estimation of the generated CO2 emissions, based on the power consumed for training the MT systems.</abstract>
       <url hash="7c2d0baf">2020.wmt-1.96</url>
@@ -1310,7 +1310,7 @@
       <author><first>Ricardo</first><last>Rei</last></author>
       <author><first>Craig</first><last>Stewart</last></author>
       <author><first>Ana C</first><last>Farinha</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <pages>911–920</pages>
       <abstract>We present the contribution of the Unbabel team to the WMT 2020 Shared Task on Metrics. We intend to participate on the segmentlevel, document-level and system-level tracks on all language pairs, as well as the “QE as a Metric” track. Accordingly, we illustrate results of our models in these tracks with reference to test sets from the previous year. Our submissions build upon the recently proposed COMET framework: we train several estimator models to regress on different humangenerated quality scores and a novel ranking model trained on relative ranks obtained from Direct Assessments. We also propose a simple technique for converting segment-level predictions into a document-level score. Overall, our systems achieve strong results for all language pairs on previous test sets and in many cases set a new state-of-the-art.</abstract>
       <url hash="362e34a3">2020.wmt-1.101</url>
@@ -1327,7 +1327,7 @@
       <author><first>Qijun</first><last>Tan</last></author>
       <author><first>Markus</first><last>Freitag</last></author>
       <author><first>Dipanjan</first><last>Das</last></author>
-      <author><first>Ankur</first><last>Parikh</last></author>
+      <author id="ankur-parikh"><first>Ankur</first><last>Parikh</last></author>
       <pages>921–927</pages>
       <abstract>The quality of machine translation systems has dramatically improved over the last decade, and as a result, evaluation has become an increasingly challenging problem. This paper describes our contribution to the WMT 2020 Metrics Shared Task, the main benchmark for automatic evaluation of translation. We make several submissions based on BLEURT, a previously published which uses transfer learning. We extend the metric beyond English and evaluate it on 14 language pairs for which fine-tuning data is available, as well as 4 “zero-shot” language pairs, for which we have no labelled examples. Additionally, we focus on English to German and demonstrate how to combine BLEURT’s predictions with those of YiSi and use alternative reference translations to enhance the performance. Empirical results show that the models achieve competitive results on the WMT Metrics 2019 Shared Task, indicating their promise for the 2020 edition.</abstract>
       <url hash="71669019">2020.wmt-1.102</url>
@@ -1338,7 +1338,7 @@
       <title>Towards a Better Evaluation of Metrics for Machine Translation</title>
       <author><first>Peter</first><last>Stanchev</last></author>
       <author><first>Weiyue</first><last>Wang</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>928–933</pages>
       <abstract>An important aspect of machine translation is its evaluation, which can be achieved through the use of a variety of metrics. To compare these metrics, the workshop on statistical machine translation annually evaluates metrics based on their correlation with human judgement. Over the years, methods for measuring correlation with humans have changed, but little research has been performed on what the optimal methods for acquiring human scores are and how human correlation can be measured. In this work, the methods for evaluating metrics at both system- and segment-level are analyzed in detail and their shortcomings are pointed out.</abstract>
       <url hash="b2c88bbf">2020.wmt-1.103</url>
@@ -1371,12 +1371,12 @@
     </paper>
     <paper id="106">
       <title>Score Combination for Improved Parallel Corpus Filtering for Low Resource Conditions</title>
-      <author><first>Muhammad</first><last>ElNokrashy</last></author>
+      <author id="muhammad-elnokrashy"><first>Muhammad</first><last>ElNokrashy</last></author>
       <author><first>Amr</first><last>Hendy</last></author>
       <author><first>Mohamed</first><last>Abdelghaffar</last></author>
       <author><first>Mohamed</first><last>Afify</last></author>
       <author><first>Ahmed</first><last>Tawfik</last></author>
-      <author><first>Hany</first><last>Hassan Awadalla</last></author>
+      <author id="hany-hassan-awadalla"><first>Hany</first><last>Hassan Awadalla</last></author>
       <pages>947–951</pages>
       <abstract>This paper presents the description of our submission to WMT20 sentence filtering task. We combine scores from custom LASER built for each source language, a classifier built to distinguish positive and negative pairs and the original scores provided with the task. For the mBART setup, provided by the organizers, our method shows 7% and 5% relative improvement, over the baseline, in sacreBLEU score on the test set for Pashto and Khmer respectively.</abstract>
       <url hash="f7f93dee">2020.wmt-1.106</url>
@@ -1385,8 +1385,8 @@
     </paper>
     <paper id="107">
       <title>Bicleaner at <fixed-case>WMT</fixed-case> 2020: <fixed-case>U</fixed-case>niversitat d’Alacant-Prompsit’s submission to the parallel corpus filtering shared task</title>
-      <author><first>Miquel</first><last>Esplà-Gomis</last></author>
-      <author><first>Víctor M.</first><last>Sánchez-Cartagena</last></author>
+      <author id="miquel-espla-gomis"><first>Miquel</first><last>Esplà-Gomis</last></author>
+      <author id="victor-m-sanchez-cartagena"><first>Víctor M.</first><last>Sánchez-Cartagena</last></author>
       <author><first>Jaume</first><last>Zaragoza-Bernabeu</last></author>
       <author><first>Felipe</first><last>Sánchez-Martínez</last></author>
       <pages>952–958</pages>
@@ -1407,7 +1407,7 @@
     </paper>
     <paper id="109">
       <title>Dual Conditional Cross Entropy Scores and <fixed-case>LASER</fixed-case> Similarity Scores for the <fixed-case>WMT</fixed-case>20 Parallel Corpus Filtering Shared Task</title>
-      <author><first>Felicia</first><last>Koerner</last></author>
+      <author id="felicia-koerner"><first>Felicia</first><last>Koerner</last></author>
       <author><first>Philipp</first><last>Koehn</last></author>
       <pages>966–971</pages>
       <abstract>This paper describes our submission to the WMT20 Parallel Corpus Filtering and Alignment for Low-Resource Conditions Shared Task. This year’s corpora are noisy Khmer-English and Pashto-English, with 58.3 million and 11.6 million words respectively (English token count). Our submission focuses on filtering Pashto-English, building on previously successful methods to produce two sets of scores: LASER_LM, a combination of the LASER similarity scores provided in the shared task and perplexity scores from language models, and DCCEF_DUP, dual conditional cross entropy scores combined with a duplication penalty. We improve slightly on the LASER similarity score and find that the provided clean data can successfully be supplemented with a subsampled set of the noisy data, effectively increasing the training data for the models used for dual conditional cross entropy scoring.</abstract>
@@ -1465,7 +1465,7 @@
     </paper>
     <paper id="114">
       <title><fixed-case>RTM</fixed-case> Ensemble Learning Results at Quality Estimation Task</title>
-      <author><first>Ergun</first><last>Biçici</last></author>
+      <author id="ergun-bicici"><first>Ergun</first><last>Biçici</last></author>
       <pages>999–1003</pages>
       <abstract>We obtain new results using referential translation machines (RTMs) with predictions mixed and stacked to obtain a better mixture of experts prediction. We are able to achieve better results than the baseline model in Task 1 subtasks. Our stacking results significantly improve the results on the training sets but decrease the test set results. RTMs can achieve to become the 5th among 13 models in ru-en subtask and 5th in the multilingual track of sentence-level Task 1 based on MAE.</abstract>
       <url hash="ef547830">2020.wmt-1.114</url>
@@ -1477,7 +1477,7 @@
       <author><first>Qu</first><last>Cui</last></author>
       <author><first>Xiang</first><last>Geng</last></author>
       <author><first>Shujian</first><last>Huang</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
       <pages>1004–1009</pages>
       <abstract>This paper describes our system of the sentence-level and word-level Quality Estimation Shared Task of WMT20. Our system is based on the QE Brain, and we simply enhance it by injecting noise at the target side. And to obtain the deep bi-directional information, we use a masked language model at the target side instead of two single directional decoders. Meanwhile, we try to use the extra QE data from the WMT17 and WMT19 to improve our system’s performance. Finally, we ensemble the features or the results from different models to get our best results. Our system finished fifth in the end at sentence-level on both EN-ZH and EN-DE language pairs.</abstract>
       <url hash="03b92828">2020.wmt-1.115</url>
@@ -1489,10 +1489,10 @@
       <author><first>Marina</first><last>Fomicheva</last></author>
       <author><first>Shuo</first><last>Sun</last></author>
       <author><first>Lisa</first><last>Yankovskaya</last></author>
-      <author><first>Frédéric</first><last>Blain</last></author>
+      <author id="frederic-blain"><first>Frédéric</first><last>Blain</last></author>
       <author><first>Vishrav</first><last>Chaudhary</last></author>
       <author><first>Mark</first><last>Fishel</last></author>
-      <author><first>Francisco</first><last>Guzmán</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzmán</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>1010–1017</pages>
       <abstract>This paper presents our submission to the WMT2020 Shared Task on Quality Estimation (QE). We participate in Task and Task 2 focusing on sentence-level prediction. We explore (a) a black-box approach to QE based on pre-trained representations; and (b) glass-box approaches that leverage various indicators that can be extracted from the neural MT systems. In addition to training a feature-based regression model using glass-box quality indicators, we also test whether they can be used to predict MT quality directly with no supervision. We assess our systems in a multi-lingual setting and show that both types of approaches generalise well across languages. Our black-box QE models tied for the winning submission in four out of seven language pairs inTask 1, thus demonstrating very strong performance. The glass-box approaches also performed competitively, representing a light-weight alternative to the neural-based models.</abstract>
@@ -1534,7 +1534,7 @@
       <author><first>João</first><last>Moura</last></author>
       <author><first>Miguel</first><last>Vera</last></author>
       <author><first>Daan</first><last>van Stigt</last></author>
-      <author><first>Fabio</first><last>Kepler</last></author>
+      <author id="fabio-kepler"><first>Fabio</first><last>Kepler</last></author>
       <author><first>André F. T.</first><last>Martins</last></author>
       <pages>1029–1036</pages>
       <abstract>We present the joint contribution of IST and Unbabel to the WMT 2020 Shared Task on Quality Estimation. Our team participated on all tracks (Direct Assessment, Post-Editing Effort, Document-Level), encompassing a total of 14 submissions. Our submitted systems were developed by extending the OpenKiwi framework to a transformer-based predictor-estimator architecture, and to cope with glass-box, uncertainty-based features coming from neural machine translation systems.</abstract>
@@ -1556,7 +1556,7 @@
     </paper>
     <paper id="121">
       <title><fixed-case>NICT</fixed-case> <fixed-case>K</fixed-case>yoto Submission for the <fixed-case>WMT</fixed-case>’20 Quality Estimation Task: Intermediate Training for Domain and Task Adaptation</title>
-      <author><first>Raphael</first><last>Rubino</last></author>
+      <author id="raphael-rubino"><first>Raphael</first><last>Rubino</last></author>
       <pages>1042–1048</pages>
       <abstract>This paper describes the NICT Kyoto submission for the WMT’20 Quality Estimation (QE) shared task. We participated in Task 2: Word and Sentence-level Post-editing Effort, which involved Wikipedia data and two translation directions, namely English-to-German and English-to-Chinese. Our approach is based on multi-task fine-tuned cross-lingual language models (XLM), initially pre-trained and further domain-adapted through intermediate training using the translation language model (TLM) approach complemented with a novel self-supervised learning task which aim is to model errors inherent to machine translation outputs. Results obtained on both word and sentence-level QE show that the proposed intermediate training method is complementary to language model domain adaptation and outperforms the fine-tuning only approach.</abstract>
       <url hash="25fbfc81">2020.wmt-1.121</url>
@@ -1566,8 +1566,8 @@
     <paper id="122">
       <title><fixed-case>T</fixed-case>rans<fixed-case>Q</fixed-case>uest at <fixed-case>WMT</fixed-case>2020: Sentence-Level Direct Assessment</title>
       <author><first>Tharindu</first><last>Ranasinghe</last></author>
-      <author><first>Constantin</first><last>Orasan</last></author>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orasan</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <pages>1049–1055</pages>
       <abstract>This paper presents the team TransQuest’s participation in Sentence-Level Direct Assessment shared task in WMT 2020. We introduce a simple QE framework based on cross-lingual transformers, and we use it to implement and evaluate two different neural architectures. The proposed methods achieve state-of-the-art results surpassing the results obtained by OpenKiwi, the baseline used in the shared task. We further fine tune the QE framework by performing ensemble and data augmentation. Our approach is the winning solution in all of the language pairs according to the WMT 2020 official results.</abstract>
       <url hash="32d15a4f">2020.wmt-1.122</url>
@@ -1658,10 +1658,10 @@
     <paper id="129">
       <title><fixed-case>U</fixed-case>d<fixed-case>S</fixed-case>-<fixed-case>DFKI</fixed-case>@<fixed-case>WMT</fixed-case>20: Unsupervised <fixed-case>MT</fixed-case> and Very Low Resource Supervised <fixed-case>MT</fixed-case> for <fixed-case>G</fixed-case>erman-<fixed-case>U</fixed-case>pper <fixed-case>S</fixed-case>orbian</title>
       <author><first>Sourav</first><last>Dutta</last></author>
-      <author><first>Jesujoba</first><last>Alabi</last></author>
+      <author id="jesujoba-alabi"><first>Jesujoba</first><last>Alabi</last></author>
       <author><first>Saptarashmi</first><last>Bandyopadhyay</last></author>
       <author><first>Dana</first><last>Ruiter</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>1092–1098</pages>
       <abstract>This paper describes the UdS-DFKI submission to the shared task for unsupervised machine translation (MT) and very low-resource supervised MT between German (de) and Upper Sorbian (hsb) at the Fifth Conference of Machine Translation (WMT20). We submit systems for both the supervised and unsupervised tracks. Apart from various experimental approaches like bitext mining, model pre-training, and iterative back-translation, we employ a factored machine translation approach on a small BPE vocabulary.</abstract>
       <url hash="70031bea">2020.wmt-1.129</url>
@@ -1672,7 +1672,7 @@
       <title>Data Selection for Unsupervised Translation of <fixed-case>G</fixed-case>erman–<fixed-case>U</fixed-case>pper <fixed-case>S</fixed-case>orbian</title>
       <author><first>Lukas</first><last>Edman</last></author>
       <author><first>Antonio</first><last>Toral</last></author>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <pages>1099–1103</pages>
       <abstract>This paper describes the methods behind the systems submitted by the University of Groningen for the WMT 2020 Unsupervised Machine Translation task for German–Upper Sorbian. We investigate the usefulness of data selection in the unsupervised setting. We find that we can perform data selection using a pretrained model and show that the quality of a set of sentences or documents can have a great impact on the performance of the UNMT system trained on it. Furthermore, we show that document-level data selection should be preferred for training the XLM model when possible. Finally, we show that there is a trade-off between quality and quantity of the data used to train UNMT systems.</abstract>
       <url hash="845ca95e">2020.wmt-1.130</url>
@@ -1728,8 +1728,8 @@
     <paper id="135">
       <title>The <fixed-case>NITS</fixed-case>-<fixed-case>CNLP</fixed-case> System for the Unsupervised <fixed-case>MT</fixed-case> Task at <fixed-case>WMT</fixed-case> 2020</title>
       <author><first>Salam Michael</first><last>Singh</last></author>
-      <author><first>Thoudam Doren</first><last>Singh</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="thoudam-doren-singh"><first>Thoudam Doren</first><last>Singh</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>1139–1143</pages>
       <abstract>We describe NITS-CNLP’s submission to WMT 2020 unsupervised machine translation shared task for German language (de) to Upper Sorbian (hsb) in a constrained setting i.e, using only the data provided by the organizers. We train our unsupervised model using monolingual data from both the languages by jointly pre-training the encoder and decoder and fine-tune using backtranslation loss. The final model uses the source side (de) monolingual data and the target side (hsb) synthetic data as a pseudo-parallel data to train a pseudo-supervised system which is tuned using the provided development set(dev set).</abstract>
       <url hash="64ef4f2f">2020.wmt-1.135</url>
@@ -1757,7 +1757,7 @@
     <paper id="138">
       <title>How Should Markup Tags Be Translated?</title>
       <author><first>Greg</first><last>Hanneman</last></author>
-      <author><first>Georgiana</first><last>Dinu</last></author>
+      <author id="georgiana-dinu"><first>Georgiana</first><last>Dinu</last></author>
       <pages>1160–1173</pages>
       <abstract>The ability of machine translation (MT) models to correctly place markup is crucial to generating high-quality translations of formatted input. This paper compares two commonly used methods of representing markup tags and tests the ability of MT models to learn tag placement via training data augmentation. We study the interactions of tag representation, data augmentation size, tag complexity, and language pair to show the drawbacks and benefits of each method. We construct and release new test sets containing tagged data for three language pairs of varying difficulty.</abstract>
       <url hash="4780b198">2020.wmt-1.138</url>
@@ -1766,7 +1766,7 @@
     </paper>
     <paper id="139">
       <title>The Tatoeba Translation Challenge – Realistic Data Sets for Low Resource and Multilingual <fixed-case>MT</fixed-case></title>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>1174–1182</pages>
       <abstract>This paper describes the development of a new benchmark for machine translation that provides training and test data for thousands of language pairs covering over 500 languages and tools for creating state-of-the-art translation models from that collection. The main goal is to trigger the development of open translation tools and models with a much broader coverage of the World’s languages. Using the package it is possible to work on realistic low-resource scenarios avoiding artificially reduced setups that are common when demonstrating zero-shot or few-shot learning. For the first time, this package provides a comprehensive collection of diverse data sets in hundreds of languages with systematic language and script annotation and data splits to extend the narrow coverage of existing benchmarks. Together with the data release, we also provide a growing number of pre-trained baseline models for individual language pairs and selected language groups.</abstract>
       <url hash="87ab7dd0">2020.wmt-1.139</url>
@@ -1791,7 +1791,7 @@
       <author><first>Chris</first><last>Kedzie</last></author>
       <author><first>Faisal</first><last>Ladhak</last></author>
       <author><first>Marine</first><last>Carpuat</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>1193–1204</pages>
       <abstract>Users of machine translation (MT) may want to ensure the use of specific lexical terminologies. While there exist techniques for incorporating terminology constraints during inference for MT, current APE approaches cannot ensure that they will appear in the final translation. In this paper, we present both autoregressive and non-autoregressive models for lexically constrained APE, demonstrating that our approach enables preservation of 95% of the terminologies and also improves translation quality on English-German benchmarks. Even when applied to lexically constrained MT output, our approach is able to improve preservation of the terminologies. However, we show that our models do not learn to copy constraints systematically and suggest a simple data augmentation technique that leads to improved performance and robustness.</abstract>
       <url hash="e3b0f9c7">2020.wmt-1.141</url>
diff --git a/data/xml/2020.wnut.xml b/data/xml/2020.wnut.xml
index 51dc5695a2..e29b973388 100644
--- a/data/xml/2020.wnut.xml
+++ b/data/xml/2020.wnut.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the Sixth Workshop on Noisy User-generated Text (W-NUT 2020)</booktitle>
       <editor><first>Wei</first><last>Xu</last></editor>
       <editor><first>Alan</first><last>Ritter</last></editor>
-      <editor><first>Tim</first><last>Baldwin</last></editor>
+      <editor id="timothy-baldwin"><first>Tim</first><last>Baldwin</last></editor>
       <editor><first>Afshin</first><last>Rahimi</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Online</address>
@@ -40,7 +40,7 @@
     </paper>
     <paper id="3">
       <title>Noisy Text Data: Achilles’ Heel of <fixed-case>BERT</fixed-case></title>
-      <author><first>Ankit</first><last>Kumar</last></author>
+      <author id="ankit-srivastava"><first>Ankit</first><last>Kumar</last></author>
       <author><first>Piyush</first><last>Makhija</last></author>
       <author><first>Anuj</first><last>Gupta</last></author>
       <pages>16–21</pages>
@@ -65,7 +65,7 @@
     <paper id="5">
       <title>Combining <fixed-case>BERT</fixed-case> with Static Word Embeddings for Categorizing Social Media</title>
       <author><first>Israa</first><last>Alghanmi</last></author>
-      <author><first>Luis</first><last>Espinosa Anke</last></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa Anke</last></author>
       <author><first>Steven</first><last>Schockaert</last></author>
       <pages>28–33</pages>
       <abstract>Pre-trained neural language models (LMs) have achieved impressive results in various natural language processing tasks, across different languages. Surprisingly, this extends to the social media genre, despite the fact that social media often has very different characteristics from the language that LMs have seen during training. A particularly striking example is the performance of AraBERT, an LM for the Arabic language, which is successful in categorizing social media posts in Arabic dialects, despite only having been trained on Modern Standard Arabic. Our hypothesis in this paper is that the performance of LMs for social media can nonetheless be improved by incorporating static word vectors that have been specifically trained on social media. We show that a simple method for incorporating such word vectors is indeed successful in several Arabic and English benchmarks. Curiously, however, we also find that similar improvements are possible with word vectors that have been trained on traditional text sources (e.g. Wikipedia).</abstract>
@@ -89,7 +89,7 @@
     <paper id="7">
       <title><fixed-case>PHINC</fixed-case>: A Parallel <fixed-case>H</fixed-case>inglish Social Media Code-Mixed Corpus for Machine Translation</title>
       <author><first>Vivek</first><last>Srivastava</last></author>
-      <author id="mayank-singh"><first>Mayank</first><last>Singh</last></author>
+      <author><first>Mayank</first><last>Singh</last></author>
       <pages>41–49</pages>
       <abstract>Code-mixing is the phenomenon of using more than one language in a sentence. In the multilingual communities, it is a very frequently observed pattern of communication on social media platforms. Flexibility to use multiple languages in one text message might help to communicate efficiently with the target audience. But, the noisy user-generated code-mixed text adds to the challenge of processing and understanding natural language to a much larger extent. Machine translation from monolingual source to the target language is a well-studied research problem. Here, we demonstrate that widely popular and sophisticated translation systems such as Google Translate fail at times to translate code-mixed text effectively. To address this challenge, we present a parallel corpus of the 13,738 code-mixed Hindi-English sentences and their corresponding human translation in English. In addition, we also propose a translation pipeline build on top of Google Translate. The evaluation of the proposed pipeline on <tex-math>PHINC</tex-math> demonstrates an increase in the performance of the underlying system. With minimal effort, we can extend the dataset and the proposed approach to other code-mixing language pairs.</abstract>
       <url hash="7a0cbddf">2020.wnut-1.7</url>
@@ -167,7 +167,7 @@
     <paper id="14">
       <title>An Empirical Analysis of Human-Bot Interaction on <fixed-case>R</fixed-case>eddit</title>
       <author><first>Ming-Cheng</first><last>Ma</last></author>
-      <author><first>John P.</first><last>Lalor</last></author>
+      <author id="john-p-lalor"><first>John P.</first><last>Lalor</last></author>
       <pages>101–106</pages>
       <abstract>Automated agents (“bots”) have emerged as an ubiquitous and influential presence on social media. Bots engage on social media platforms by posting content and replying to other users on the platform. In this work we conduct an empirical analysis of the activity of a single bot on Reddit. Our goal is to determine whether bot activity (in the form of posted comments on the website) has an effect on how humans engage on Reddit. We find that (1) the sentiment of a bot comment has a significant, positive effect on the subsequent human reply, and (2) human Reddit users modify their comment behaviors to overlap with the text of the bot, similar to how humans modify their text to mimic other humans in conversation. Understanding human-bot interactions on social media with relatively simple bots is important for preparing for more advanced bots in the future.</abstract>
       <url hash="2142851b">2020.wnut-1.14</url>
@@ -260,7 +260,7 @@
       <title>Detecting Entailment in Code-Mixed <fixed-case>H</fixed-case>indi-<fixed-case>E</fixed-case>nglish Conversations</title>
       <author><first>Sharanya</first><last>Chakravarthy</last></author>
       <author><first>Anjana</first><last>Umapathy</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <pages>165–170</pages>
       <abstract>The presence of large-scale corpora for Natural Language Inference (NLI) has spurred deep learning research in this area, though much of this research has focused solely on monolingual data. Code-mixing is the intertwined usage of multiple languages, and is commonly seen in informal conversations among polyglots. Given the rising importance of dialogue agents, it is imperative that they understand code-mixing, but the scarcity of code-mixed Natural Language Understanding (NLU) datasets has precluded research in this area. The dataset by Khanuja et. al. for detecting conversational entailment in code-mixed Hindi-English text is the first of its kind. We investigate the effectiveness of language modeling, data augmentation, translation, and architectural approaches to address the code-mixed, conversational, and low-resource aspects of this dataset. We obtain an 8.09% increase in test set accuracy over the current state of the art.</abstract>
       <url hash="c3b2840b">2020.wnut-1.22</url>
@@ -510,7 +510,7 @@
       <title><fixed-case>NLP</fixed-case> North at <fixed-case>WNUT</fixed-case>-2020 Task 2: Pre-training versus Ensembling for Detection of Informative <fixed-case>COVID</fixed-case>-19 <fixed-case>E</fixed-case>nglish Tweets</title>
       <author><first>Anders</first><last>Giovanni Møller</last></author>
       <author><first>Rob</first><last>van der Goot</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>331–336</pages>
       <abstract>With the COVID-19 pandemic raging world-wide since the beginning of the 2020 decade, the need for monitoring systems to track relevant information on social media is vitally important. This paper describes our submission to the WNUT-2020 Task 2: Identification of informative COVID-19 English Tweets. We investigate the effectiveness for a variety of classification models, and found that domain-specific pre-trained BERT models lead to the best performance. On top of this, we attempt a variety of ensembling strategies, but these attempts did not lead to further improvements. Our final best model, the standalone CT-BERT model, proved to be highly competitive, leading to a shared first place in the shared task. Our results emphasize the importance of domain and task-related pre-training.</abstract>
       <url hash="c0b8bee9">2020.wnut-1.44</url>
@@ -592,7 +592,7 @@
       <author><first>Varad</first><last>Pimpalkhute</last></author>
       <author><first>Divya</first><last>Rallapalli</last></author>
       <author><first>David</first><last>Siguenza</last></author>
-      <author><first>Graciela</first><last>Gonzalez-Hernandez</last></author>
+      <author id="graciela-gonzalez"><first>Graciela</first><last>Gonzalez-Hernandez</last></author>
       <pages>378–382</pages>
       <abstract>Increasing usage of social media presents new non-traditional avenues for monitoring disease outbreaks, virus transmissions and disease progressions through user posts describing test results or disease symptoms. However, the discussions on the topic of infectious diseases that are informative in nature also span various topics such as news, politics and humor which makes the data mining challenging. We present a system to identify tweets about the COVID19 disease outbreak that are deemed to be informative on Twitter for use in downstream applications. The system scored a F1-score of 0.8941, Precision of 0.9028, Recall of 0.8856 and Accuracy of 0.9010. In the shared task organized as part of the 6th Workshop of Noisy User-generated Text (WNUT), the system was ranked 18th by F1-score and 13th by Accuracy.</abstract>
       <url hash="8f897d7b">2020.wnut-1.52</url>
@@ -682,7 +682,7 @@
       <author><first>Rajesh Kumar</first><last>Mundotiya</last></author>
       <author><first>Rupjyoti</first><last>Baruah</last></author>
       <author><first>Bhavana</first><last>Srivastava</last></author>
-      <author><first>Anil Kumar</first><last>Singh</last></author>
+      <author id="anil-kumar-singh"><first>Anil Kumar</first><last>Singh</last></author>
       <pages>419–422</pages>
       <abstract>The Coronavirus pandemic has been a dominating news on social media for the last many months. Efforts are being made to reduce its spread and reduce the casualties as well as new infections. For this purpose, the information about the infected people and their related symptoms, as available on social media, such as Twitter, can help in prevention and taking precautions. This is an example of using noisy text processing for disaster management. This paper discusses the NLPRL results in Shared Task-2 of WNUT-2020 workshop. We have considered this problem as a binary classification problem and have used a pre-trained ELMo embedding with GRU units. This approach helps classify the tweets with accuracy as 80.85% and 78.54% as F1-score on the provided test dataset. The experimental code is available online.</abstract>
       <url hash="9dfb4503">2020.wnut-1.60</url>
@@ -843,7 +843,7 @@
     </paper>
     <paper id="75">
       <title><fixed-case>COVCOR</fixed-case>20 at <fixed-case>WNUT</fixed-case>-2020 Task 2: An Attempt to Combine Deep Learning and Expert rules</title>
-      <author><first>Ali</first><last>Hürriyetoğlu</last></author>
+      <author id="ali-hurriyetoglu"><first>Ali</first><last>Hürriyetoğlu</last></author>
       <author><first>Ali</first><last>Safaya</last></author>
       <author><first>Osman</first><last>Mutlu</last></author>
       <author><first>Nelleke</first><last>Oostdijk</last></author>
@@ -901,8 +901,8 @@
     </paper>
     <paper id="80">
       <title><fixed-case>HLTRI</fixed-case> at <fixed-case>W</fixed-case>-<fixed-case>NUT</fixed-case> 2020 Shared Task-3: <fixed-case>COVID</fixed-case>-19 Event Extraction from <fixed-case>T</fixed-case>witter Using Multi-Task Hopfield Pooling</title>
-      <author><first>Maxwell</first><last>Weinzierl</last></author>
-      <author><first>Sanda</first><last>Harabagiu</last></author>
+      <author id="maxwell-weinzierl"><first>Maxwell</first><last>Weinzierl</last></author>
+      <author id="sanda-harabagiu"><first>Sanda</first><last>Harabagiu</last></author>
       <pages>530–538</pages>
       <abstract>Extracting structured knowledge involving self-reported events related to the COVID-19 pandemic from Twitter has the potential to inform surveillance systems that play a critical role in public health. The event extraction challenge presented by the W-NUT 2020 Shared Task 3 focused on the identification of five types of events relevant to the COVID-19 pandemic and their respective set of pre-defined slots encoding demographic, epidemiological, clinical as well as spatial, temporal or subjective knowledge. Our participation in the challenge led to the design of a neural architecture for jointly identifying all Event Slots expressed in a tweet relevant to an event of interest. This architecture uses COVID-Twitter-BERT as the pre-trained language model. In addition, to learn text span embeddings for each Event Slot, we relied on a special case of Hopfield Networks, namely Hopfield pooling. The results of the shared task evaluation indicate that our system performs best when it is trained on a larger dataset, while it remains competitive when training on smaller datasets.</abstract>
       <url hash="86654687">2020.wnut-1.80</url>
diff --git a/data/xml/2020.wosp.xml b/data/xml/2020.wosp.xml
index 29228a040b..24fc99a57f 100644
--- a/data/xml/2020.wosp.xml
+++ b/data/xml/2020.wosp.xml
@@ -24,7 +24,7 @@
       <title>Virtual Citation Proximity (<fixed-case>VCP</fixed-case>): Empowering Document Recommender Systems by Learning a Hypothetical In-Text Citation-Proximity Metric for Uncited Documents</title>
       <author><first>Paul</first><last>Molloy</last></author>
       <author><first>Joeran</first><last>Beel</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>1–8</pages>
       <abstract>The relatedness of research articles, patents, court rulings, web pages, and other document types is often calculated with citation or hyperlink-based approaches like co-citation (proximity) analysis. The main limitation of citation-based approaches is that they cannot be used for documents that receive little or no citations. We propose Virtual Citation Proximity (VCP), a Siamese Neural Network architecture, which combines the advantages of co-citation proximity analysis (diverse notions of relatedness / high recommendation performance), with the advantage of content-based filtering (high coverage). VCP is trained on a corpus of documents with textual features, and with real citation proximity as ground truth. VCP then predicts for any two documents, based on their title and abstract, in what proximity the two documents would be co-cited, if they were indeed co-cited. The prediction can be used in the same way as real citation proximity to calculate document relatedness, even for uncited documents. In our evaluation with 2 million co-citations from Wikipedia articles, VCP achieves an MAE of 0.0055, i.e. an improvement of 20% over the baseline, though the learning curve suggests that more work is needed.</abstract>
       <url hash="2f9def55">2020.wosp-1.1</url>
diff --git a/data/xml/2021.acl.xml b/data/xml/2021.acl.xml
index 0e06512674..5e882a7a2c 100644
--- a/data/xml/2021.acl.xml
+++ b/data/xml/2021.acl.xml
@@ -3,7 +3,7 @@
   <volume id="long" ingest-date="2021-07-25" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)</booktitle>
-      <editor><first>Chengqing</first><last>Zong</last></editor>
+      <editor id="chengqing-zong"><first>Chengqing</first><last>Zong</last></editor>
       <editor><first>Fei</first><last>Xia</last></editor>
       <editor><first>Wenjie</first><last>Li</last></editor>
       <editor><first>Roberto</first><last>Navigli</last></editor>
@@ -67,9 +67,9 @@
       <author><first>Paul</first><last>Röttger</last></author>
       <author><first>Bertie</first><last>Vidgen</last></author>
       <author><first>Dong</first><last>Nguyen</last></author>
-      <author><first>Zeerak</first><last>Waseem</last></author>
+      <author id="zeerak-talat"><first>Zeerak</first><last>Waseem</last></author>
       <author><first>Helen</first><last>Margetts</last></author>
-      <author><first>Janet</first><last>Pierrehumbert</last></author>
+      <author id="janet-pierrehumbert"><first>Janet</first><last>Pierrehumbert</last></author>
       <pages>41–58</pages>
       <abstract>Detecting online hate is a difficult task that even state-of-the-art models struggle with. Typically, hate speech detection models are evaluated by measuring their performance on held-out test data using metrics such as accuracy and F1 score. However, this approach makes it difficult to identify specific model weak points. It also risks overestimating generalisable model performance due to increasingly well-evidenced systematic gaps and biases in hate speech datasets. To enable more targeted diagnostic insights, we introduce HateCheck, a suite of functional tests for hate speech detection models. We specify 29 model functionalities motivated by a review of previous research and a series of interviews with civil society stakeholders. We craft test cases for each functionality and validate their quality through a structured annotation process. To illustrate HateCheck’s utility, we test near-state-of-the-art transformer models as well as two popular commercial models, revealing critical model weaknesses.</abstract>
       <url hash="1349cfa3">2021.acl-long.4</url>
@@ -207,7 +207,7 @@
       <author><first>Haoyu</first><last>Song</last></author>
       <author><first>Yan</first><last>Wang</last></author>
       <author><first>Kaiyan</first><last>Zhang</last></author>
-      <author><first>Wei-Nan</first><last>Zhang</last></author>
+      <author id="weinan-zhang"><first>Wei-Nan</first><last>Zhang</last></author>
       <author><first>Ting</first><last>Liu</last></author>
       <pages>167–177</pages>
       <abstract>Maintaining a consistent persona is essential for dialogue agents. Although tremendous advancements have been brought, the limited-scale of annotated personalized dialogue datasets is still a barrier towards training robust and consistent persona-based dialogue models. This work shows how this challenge can be addressed by disentangling persona-based dialogue generation into two sub-tasks with a novel BERT-over-BERT (BoB) model. Specifically, the model consists of a BERT-based encoder and two BERT-based decoders, where one decoder is for response generation, and another is for consistency understanding. In particular, to learn the ability of consistency understanding from large-scale non-dialogue inference data, we train the second decoder in an unlikelihood manner. Under different limited data settings, both automatic and human evaluations demonstrate that the proposed model outperforms strong baselines in response quality and persona consistency.</abstract>
@@ -238,7 +238,7 @@
       <author><first>Tianxiang</first><last>Sun</last></author>
       <author><first>Hang</first><last>Yan</last></author>
       <author><first>Xipeng</first><last>Qiu</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>189–199</pages>
       <abstract>Both performance and efficiency are crucial factors for sequence labeling tasks in many real-world scenarios. Although the pre-trained models (PTMs) have significantly improved the performance of various sequence labeling tasks, their computational cost is expensive. To alleviate this problem, we extend the recent successful early-exit mechanism to accelerate the inference of PTMs for sequence labeling tasks. However, existing early-exit mechanisms are specifically designed for sequence-level tasks, rather than sequence labeling. In this paper, we first propose a simple extension of sentence-level early-exit for sequence labeling tasks. To further reduce the computational cost, we also propose a token-level early-exit mechanism that allows partial tokens to exit early at different layers. Considering the local dependency inherent in sequence labeling, we employed a window-based criterion to decide for a token whether or not to exit. The token-level early-exit brings the gap between training and inference, so we introduce an extra self-sampling fine-tuning stage to alleviate it. The extensive experiments on three popular sequence labeling tasks show that our approach can save up to 66%∼75% inference cost with minimal performance degradation. Compared with competitive compressed models such as DistilBERT, our approach can achieve better performance under the same speed-up ratios of 2×, 3×, and 4×.</abstract>
       <url hash="3cf4a8fa">2021.acl-long.16</url>
@@ -338,8 +338,8 @@
       <title>Multi-Head Highly Parallelized <fixed-case>LSTM</fixed-case> Decoder for Neural Machine Translation</title>
       <author><first>Hongfei</first><last>Xu</last></author>
       <author><first>Qiuhui</first><last>Liu</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Meng</first><last>Zhang</last></author>
       <pages>273–282</pages>
       <abstract>One of the reasons Transformer translation models are popular is that self-attention networks for context modelling can be easily parallelized at sequence level. However, the computational complexity of a self-attention network is <tex-math>O(n^2)</tex-math>, increasing quadratically with sequence length. By contrast, the complexity of LSTM-based approaches is only O(n). In practice, however, LSTMs are much slower to train than self-attention networks as they cannot be parallelized at sequence level: to model context, the current LSTM state relies on the full LSTM computation of the preceding state. This has to be computed n times for a sequence of length n. The linear transformations involved in the LSTM gate and state computations are the major cost factors in this. To enable sequence-level parallelization of LSTMs, we approximate full LSTM context modelling by computing hidden states and gates with the current input and a simple bag-of-words representation of the preceding tokens context. This allows us to compute each input step efficiently in parallel, avoiding the formerly costly sequential linear transformations. We then connect the outputs of each parallel step with computationally cheap element-wise computations. We call this the Highly Parallelized LSTM. To further constrain the number of LSTM parameters, we compute several small HPLSTMs in parallel like multi-head attention in the Transformer. The experiments show that our MHPLSTM decoder achieves significant BLEU improvements, while being even slightly faster than the self-attention network in training, and much faster than the standard LSTM.</abstract>
@@ -351,7 +351,7 @@
     <paper id="24">
       <title>A Bidirectional Transformer Based Alignment Model for Unsupervised Word Alignment</title>
       <author><first>Jingyi</first><last>Zhang</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>283–292</pages>
       <abstract>Word alignment and machine translation are two closely related tasks. Neural translation models, such as RNN-based and Transformer models, employ a target-to-source attention mechanism which can provide rough word alignments, but with a rather low accuracy. High-quality word alignment can help neural machine translation in many different ways, such as missing word detection, annotation transfer and lexicon injection. Existing methods for learning word alignment include statistical word aligners (e.g. GIZA++) and recently neural word alignment models. This paper presents a bidirectional Transformer based alignment (BTBA) model for unsupervised learning of the word alignment task. Our BTBA model predicts the current target word by attending the source context and both left-side and right-side target context to produce accurate target-to-source attention (alignment). We further fine-tune the target-to-source attention in the BTBA model to obtain better alignments using a full context based optimization method and self-supervised training. We test our method on three word alignment tasks and show that our method outperforms both previous neural word alignment approaches and the popular statistical word aligner GIZA++.</abstract>
       <url hash="214f1de7">2021.acl-long.24</url>
@@ -495,7 +495,7 @@
       <title><fixed-case>DESCGEN</fixed-case>: A Distantly Supervised Datasetfor Generating Entity Descriptions</title>
       <author><first>Weijia</first><last>Shi</last></author>
       <author><first>Mandar</first><last>Joshi</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>415–427</pages>
       <abstract>Short textual descriptions of entities provide summaries of their key attributes and have been shown to be useful sources of background knowledge for tasks such as entity linking and question answering. However, generating entity descriptions, especially for new and long-tail entities, can be challenging since relevant information is often scattered across multiple sources with varied content and style. We introduce DESCGEN: given mentions spread over multiple documents, the goal is to generate an entity summary description. DESCGEN consists of 37K entity descriptions from Wikipedia and Fandom, each paired with nine evidence documents on average. The documents were collected using a combination of entity linking and hyperlinks into the entity pages, which together provide high-quality distant supervision. Compared to other multi-document summarization tasks, our task is entity-centric, more abstractive, and covers a wide range of domains. We also propose a two-stage extract-then-generate baseline and show that there exists a large gap (19.9% in ROUGE-L) between state-of-art models and human performance, suggesting that the data will support significant future work.</abstract>
       <url hash="b033c710">2021.acl-long.35</url>
@@ -663,7 +663,7 @@
       <author><first>Rabeeh</first><last>Karimi Mahabadi</last></author>
       <author><first>Sebastian</first><last>Ruder</last></author>
       <author><first>Mostafa</first><last>Dehghani</last></author>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <pages>565–576</pages>
       <abstract>State-of-the-art parameter-efficient fine-tuning methods rely on introducing adapter modules between the layers of a pretrained language model. However, such modules are trained separately for each task and thus do not enable sharing information across tasks. In this paper, we show that we can learn adapter parameters for all layers and tasks by generating them using shared hypernetworks, which condition on task, adapter position, and layer id in a transformer model. This parameter-efficient multi-task learning framework allows us to achieve the best of both worlds by sharing knowledge across tasks via hypernetworks while enabling the model to adapt to each individual task through task-specific adapters. Experiments on the well-known GLUE benchmark show improved performance in multi-task learning while adding only 0.29% parameters per task. We additionally demonstrate substantial performance improvements in few-shot domain generalization across a variety of tasks. Our code is publicly available in <url>https://github.com/rabeehk/hyperformer</url>.</abstract>
       <url hash="4ae58eba">2021.acl-long.47</url>
@@ -700,7 +700,7 @@
     </paper>
     <paper id="50">
       <title>Understanding and Countering Stereotypes: A Computational Approach to the Stereotype Content Model</title>
-      <author><first>Kathleen C.</first><last>Fraser</last></author>
+      <author id="kathleen-c-fraser"><first>Kathleen C.</first><last>Fraser</last></author>
       <author><first>Isar</first><last>Nejadgholi</last></author>
       <author><first>Svetlana</first><last>Kiritchenko</last></author>
       <pages>600–616</pages>
@@ -756,7 +756,7 @@
       <author><first>Baolin</first><last>Peng</last></author>
       <author><first>Zhou</first><last>Yu</last></author>
       <author><first>Ying Nian</first><last>Wu</last></author>
-      <author><first>Song-Chun</first><last>Zhu</last></author>
+      <author id="song-chun-zhu"><first>Song-Chun</first><last>Zhu</last></author>
       <pages>658–670</pages>
       <abstract>Inferring social relations from dialogues is vital for building emotionally intelligent robots to interpret human language better and act accordingly. We model the social network as an And-or Graph, named SocAoG, for the consistency of relations among a group and leveraging attributes as inference cues. Moreover, we formulate a sequential structure prediction task, and propose an <tex-math>\alpha</tex-math>-<tex-math>\beta</tex-math>-<tex-math>\gamma</tex-math> strategy to incrementally parse SocAoG for the dynamic inference upon any incoming utterance: (i) an <tex-math>\alpha</tex-math> process predicting attributes and relations conditioned on the semantics of dialogues, (ii) a <tex-math>\beta</tex-math> process updating the social relations based on related attributes, and (iii) a <tex-math>\gamma</tex-math> process updating individual’s attributes based on interpersonal social relations. Empirical results on DialogRE and MovieGraph show that our model infers social relations more accurately than the state-of-the-art methods. Moreover, the ablation study shows the three processes complement each other, and the case study demonstrates the dynamic relational inference.</abstract>
       <url hash="b72115f3">2021.acl-long.54</url>
@@ -805,7 +805,7 @@
       <title>Increasing Faithfulness in Knowledge-Grounded Dialogue with Controllable Features</title>
       <author><first>Hannah</first><last>Rashkin</last></author>
       <author><first>David</first><last>Reitter</last></author>
-      <author><first>Gaurav Singh</first><last>Tomar</last></author>
+      <author id="gaurav-singh-tomar"><first>Gaurav Singh</first><last>Tomar</last></author>
       <author><first>Dipanjan</first><last>Das</last></author>
       <pages>704–718</pages>
       <abstract>Knowledge-grounded dialogue systems are intended to convey information that is based on evidence provided in a given source text. We discuss the challenges of training a generative neural dialogue model for such systems that is controlled to stay faithful to the evidence. Existing datasets contain a mix of conversational responses that are faithful to selected evidence as well as more subjective or chit-chat style responses. We propose different evaluation measures to disentangle these different styles of responses by quantifying the informativeness and objectivity. At training time, additional inputs based on these evaluation measures are given to the dialogue model. At generation time, these additional inputs act as stylistic controls that encourage the model to generate responses that are faithful to the provided evidence. We also investigate the usage of additional controls at decoding time using resampling techniques. In addition to automatic metrics, we perform a human evaluation study where raters judge the output of these controlled generation models to be generally more objective and faithful to the evidence compared to baseline dialogue systems.</abstract>
@@ -848,7 +848,7 @@
       <author><first>Weile</first><last>Chen</last></author>
       <author><first>Huiqiang</first><last>Jiang</last></author>
       <author><first>Qianhui</first><last>Wu</last></author>
-      <author><first>Börje F.</first><last>Karlsson</last></author>
+      <author id="borje-f-karlsson"><first>Börje F.</first><last>Karlsson</last></author>
       <author><first>Yi</first><last>Guan</last></author>
       <pages>743–753</pages>
       <abstract>Neural methods have been shown to achieve high performance in Named Entity Recognition (NER), but rely on costly high-quality labeled data for training, which is not always available across languages. While previous works have shown that unlabeled data in a target language can be used to improve cross-lingual model performance, we propose a novel adversarial approach (AdvPicker) to better leverage such data and further improve results. We design an adversarial learning framework in which an encoder learns entity domain knowledge from labeled source-language data and better shared features are captured via adversarial training - where a discriminator selects less language-dependent target-language data via similarity to the source language. Experimental results on standard benchmark datasets well demonstrate that the proposed method benefits strongly from this data selection process and outperforms existing state-of-the-art methods; without requiring any additional external resources (e.g., gazetteers or via machine translation).</abstract>
@@ -912,7 +912,7 @@
       <author><first>Patrick</first><last>Fernandes</last></author>
       <author><first>Danish</first><last>Pruthi</last></author>
       <author><first>Aditi</first><last>Chaudhary</last></author>
-      <author><first>André F. T.</first><last>Martins</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
       <pages>788–801</pages>
       <abstract>Context-aware machine translation models are designed to leverage contextual information, but often fail to do so. As a result, they inaccurately disambiguate pronouns and polysemous words that require context for resolution. In this paper, we ask several questions: What contexts do human translators use to resolve ambiguous words? Are models paying large amounts of attention to the same context? What if we explicitly train them to do so? To answer these questions, we introduce SCAT (Supporting Context for Ambiguous Translations), a new English-French dataset comprising supporting context words for 14K translations that professional translators found useful for pronoun disambiguation. Using SCAT, we perform an in-depth analysis of the context used to disambiguate, examining positional and lexical characteristics of the supporting words. Furthermore, we measure the degree of alignment between the model’s attention scores and the supporting context from SCAT, and apply a guided attention strategy to encourage agreement between the two.</abstract>
@@ -928,10 +928,10 @@
       <author><first>Adithya</first><last>Renduchintala</last></author>
       <author><first>Vishrav</first><last>Chaudhary</last></author>
       <author><first>Naman</first><last>Goyal</last></author>
-      <author><first>Francisco</first><last>Guzmán</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzmán</last></author>
       <author><first>Pascale</first><last>Fung</last></author>
       <author><first>Philipp</first><last>Koehn</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>802–812</pages>
       <abstract>The scarcity of parallel data is a major obstacle for training high-quality machine translation systems for low-resource languages. Fortunately, some low-resource languages are linguistically related or similar to high-resource languages; these related languages may share many lexical or syntactic structures. In this work, we exploit this linguistic overlap to facilitate translating to and from a low-resource language with only monolingual data, in addition to any parallel data in the related high-resource language. Our method, NMT-Adapt, combines denoising autoencoding, back-translation and adversarial objectives to utilize monolingual data for low-resource adaptation. We experiment on 7 languages from three different language families and show that our technique significantly improves translation into low-resource language compared to other translation baselines.</abstract>
       <url hash="3c7907c0">2021.acl-long.66</url>
@@ -941,9 +941,9 @@
     </paper>
     <paper id="67">
       <title>Bilingual Lexicon Induction via Unsupervised Bitext Construction and Word Alignment</title>
-      <author><first>Haoyue</first><last>Shi</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
-      <author><first>Sida I.</first><last>Wang</last></author>
+      <author id="freda-shi"><first>Haoyue</first><last>Shi</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="sida-i-wang"><first>Sida I.</first><last>Wang</last></author>
       <pages>813–826</pages>
       <abstract>Bilingual lexicons map words in one language to their translations in another, and are typically induced by learning linear projections to align monolingual word embedding spaces. In this paper, we show it is possible to produce much higher quality lexicons with methods that combine (1) unsupervised bitext mining and (2) unsupervised word alignment. Directly applying a pipeline that uses recent algorithms for both subproblems significantly improves induced lexicon quality and further gains are possible by learning to filter the resulting lexical entries, with both unsupervised and semi-supervised schemes. Our final model outperforms the state of the art on the BUCC 2020 shared task by 14 F1 points averaged over 12 language pairs, while also providing a more interpretable approach that allows for rich reasoning of word meaning in context. Further analysis of our output and the standard reference lexicons suggests they are of comparable quality, and new benchmarks may be needed to measure further progress on this task.</abstract>
       <url hash="b30933bb">2021.acl-long.67</url>
@@ -1020,10 +1020,10 @@
     <paper id="73">
       <title><fixed-case>XLPT</fixed-case>-<fixed-case>AMR</fixed-case>: Cross-Lingual Pre-Training via Multi-Task Learning for Zero-Shot <fixed-case>AMR</fixed-case> Parsing and Text Generation</title>
       <author><first>Dongqin</first><last>Xu</last></author>
-      <author><first>Junhui</first><last>Li</last></author>
+      <author id="junhui-li"><first>Junhui</first><last>Li</last></author>
       <author><first>Muhua</first><last>Zhu</last></author>
       <author><first>Min</first><last>Zhang</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>896–907</pages>
       <abstract>Due to the scarcity of annotated data, Abstract Meaning Representation (AMR) research is relatively limited and challenging for languages other than English. Upon the availability of English AMR dataset and English-to- X parallel datasets, in this paper we propose a novel cross-lingual pre-training approach via multi-task learning (MTL) for both zeroshot AMR parsing and AMR-to-text generation. Specifically, we consider three types of relevant tasks, including AMR parsing, AMR-to-text generation, and machine translation. We hope that knowledge gained while learning for English AMR parsing and text generation can be transferred to the counterparts of other languages. With properly pretrained models, we explore four different finetuning methods, i.e., vanilla fine-tuning with a single task, one-for-all MTL fine-tuning, targeted MTL fine-tuning, and teacher-studentbased MTL fine-tuning. Experimental results on AMR parsing and text generation of multiple non-English languages demonstrate that our approach significantly outperforms a strong baseline of pre-training approach, and greatly advances the state of the art. In detail, on LDC2020T07 we have achieved 70.45%, 71.76%, and 70.80% in Smatch F1 for AMR parsing of German, Spanish, and Italian, respectively, while for AMR-to-text generation of the languages, we have obtained 25.69, 31.36, and 28.42 in BLEU respectively. We make our code available on github <url>https://github.com/xdqkid/XLPT-AMR</url>.</abstract>
       <url hash="77ddc5d5">2021.acl-long.73</url>
@@ -1059,7 +1059,7 @@
       <title>A Targeted Assessment of Incremental Processing in Neural Language Models and Humans</title>
       <author><first>Ethan</first><last>Wilcox</last></author>
       <author><first>Pranali</first><last>Vani</last></author>
-      <author><first>Roger</first><last>Levy</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
       <pages>939–952</pages>
       <abstract>We present a targeted, scaled-up comparison of incremental processing in humans and neural language models by collecting by-word reaction time data for sixteen different syntactic test suites across a range of structural phenomena. Human reaction time data comes from a novel online experimental paradigm called the Interpolated Maze task. We compare human reaction times to by-word probabilities for four contemporary language models, with different architectures and trained on a range of data set sizes. We find that across many phenomena, both humans and language models show increased processing difficulty in ungrammatical sentence regions with human and model ‘accuracy’ scores a la Marvin and Linzen (2018) about equal. However, although language model outputs match humans in direction, we show that models systematically under-predict the difference in magnitude of incremental processing difficulty between grammatical and ungrammatical sentences. Specifically, when models encounter syntactic violations they fail to accurately predict the longer reading times observed in the human data. These results call into question whether contemporary language models are approaching human-like performance for sensitivity to syntactic violations.</abstract>
       <url hash="1adfcc66">2021.acl-long.76</url>
@@ -1083,7 +1083,7 @@
     </paper>
     <paper id="78">
       <title>To <fixed-case>POS</fixed-case> Tag or Not to <fixed-case>POS</fixed-case> Tag: The Impact of <fixed-case>POS</fixed-case> Tags on Morphological Learning in Low-Resource Settings</title>
-      <author><first>Sarah</first><last>Moeller</last></author>
+      <author id="sarah-moeller"><first>Sarah</first><last>Moeller</last></author>
       <author><first>Ling</first><last>Liu</last></author>
       <author><first>Mans</first><last>Hulden</last></author>
       <pages>966–978</pages>
@@ -1096,8 +1096,8 @@
     <paper id="79">
       <title>Prosodic segmentation for parsing spoken dialogue</title>
       <author><first>Elizabeth</first><last>Nielsen</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
-      <author><first>Sharon</first><last>Goldwater</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
       <pages>979–992</pages>
       <abstract>Parsing spoken dialogue poses unique difficulties, including disfluencies and unmarked boundaries between sentence-like units. Previous work has shown that prosody can help with parsing disfluent speech (Tran et al. 2018), but has assumed that the input to the parser is already segmented into sentence-like units (SUs), which isn’t true in existing speech applications. We investigate how prosody affects a parser that receives an entire dialogue turn as input (a turn-based model), instead of gold standard pre-segmented SUs (an SU-based model). In experiments on the English Switchboard corpus, we find that when using transcripts alone, the turn-based model has trouble segmenting SUs, leading to worse parse performance than the SU-based model. However, prosody can effectively replace gold standard SU boundaries: with prosody, the turn-based model performs as well as the SU-based model (91.38 vs. 91.06 F1 score, respectively), despite performing two tasks (SU segmentation and parsing) rather than one (parsing alone). Analysis shows that pitch and intensity features are the most important for this corpus, since they allow the model to correctly distinguish an SU boundary from a speech disfluency – a distinction that the model otherwise struggles to make.</abstract>
       <url hash="54b77a64">2021.acl-long.79</url>
@@ -1132,7 +1132,7 @@
       <author><first>Gilsinia</first><last>Lopez</last></author>
       <author><first>Alexandra</first><last>Olteanu</last></author>
       <author><first>Robert</first><last>Sim</last></author>
-      <author><first>Hanna</first><last>Wallach</last></author>
+      <author id="hanna-wallach"><first>Hanna</first><last>Wallach</last></author>
       <pages>1004–1015</pages>
       <abstract>Auditing NLP systems for computational harms like surfacing stereotypes is an elusive goal. Several recent efforts have focused on benchmark datasets consisting of pairs of contrastive sentences, which are often accompanied by metrics that aggregate an NLP system’s behavior on these pairs into measurements of harms. We examine four such benchmarks constructed for two NLP tasks: language modeling and coreference resolution. We apply a measurement modeling lens—originating from the social sciences—to inventory a range of pitfalls that threaten these benchmarks’ validity as measurement models for stereotyping. We find that these benchmarks frequently lack clear articulations of what is being measured, and we highlight a range of ambiguities and unstated assumptions that affect how these benchmarks conceptualize and operationalize stereotyping.</abstract>
       <url hash="5a9672f9">2021.acl-long.81</url>
@@ -1145,8 +1145,8 @@
       <author><first>Justin</first><last>Lovelace</last></author>
       <author><first>Denis</first><last>Newman-Griffis</last></author>
       <author><first>Shikhar</first><last>Vashishth</last></author>
-      <author><first>Jill Fain</first><last>Lehman</last></author>
-      <author><first>Carolyn</first><last>Rosé</last></author>
+      <author id="jill-fain-lehman"><first>Jill Fain</first><last>Lehman</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rosé</last></author>
       <pages>1016–1029</pages>
       <abstract>Knowledge Graph (KG) completion research usually focuses on densely connected benchmark datasets that are not representative of real KGs. We curate two KG datasets that include biomedical and encyclopedic knowledge and use an existing commonsense KG dataset to explore KG completion in the more realistic setting where dense connectivity is not guaranteed. We develop a deep convolutional network that utilizes textual entity representations and demonstrate that our model outperforms recent KG completion methods in this challenging setting. We find that our model’s performance improvements stem primarily from its robustness to sparsity. We then distill the knowledge from the convolutional network into a student network that re-ranks promising candidate entities. This re-ranking stage leads to further improvements in performance and demonstrates the effectiveness of entity re-ranking for KG completion.</abstract>
       <url hash="e1522f5e">2021.acl-long.82</url>
@@ -1239,7 +1239,7 @@
       <author><first>Jean</first><last>Maillard</last></author>
       <author><first>Vladimir</first><last>Karpukhin</last></author>
       <author><first>Fabio</first><last>Petroni</last></author>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <author><first>Barlas</first><last>Oguz</last></author>
       <author><first>Veselin</first><last>Stoyanov</last></author>
       <author><first>Gargi</first><last>Ghosh</last></author>
@@ -1255,7 +1255,7 @@
       <author><first>Yian</first><last>Zhang</last></author>
       <author><first>Alex</first><last>Warstadt</last></author>
       <author><first>Xiaocheng</first><last>Li</last></author>
-      <author><first>Samuel R.</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel R.</first><last>Bowman</last></author>
       <pages>1112–1125</pages>
       <abstract>NLP is currently dominated by language models like RoBERTa which are pretrained on billions of words. But what exact knowledge or skills do Transformer LMs learn from large-scale pretraining that they cannot learn from less data? To explore this question, we adopt five styles of evaluation: classifier probing, information-theoretic probing, unsupervised relative acceptability judgments, unsupervised language model knowledge probing, and fine-tuning on NLU tasks. We then draw learning curves that track the growth of these different measures of model ability with respect to pretraining data volume using the MiniBERTas, a group of RoBERTa models pretrained on 1M, 10M, 100M and 1B words. We find that these LMs require only about 10M to 100M words to learn to reliably encode most syntactic and semantic features we test. They need a much larger quantity of data in order to acquire enough commonsense knowledge and other skills required to master typical downstream NLU tasks. The results suggest that, while the ability to encode linguistic features is almost certainly necessary for language understanding, it is likely that other, unidentified, forms of knowledge are the major drivers of recent improvements in language understanding among large pretrained models.</abstract>
       <url hash="a10434a4">2021.acl-long.90</url>
@@ -1285,7 +1285,7 @@
       <author><first>Jason</first><last>Phang</last></author>
       <author><first>Haokun</first><last>Liu</last></author>
       <author><first>Kyunghyun</first><last>Cho</last></author>
-      <author><first>Samuel R.</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel R.</first><last>Bowman</last></author>
       <pages>1141–1158</pages>
       <abstract>Recent years have seen numerous NLP datasets introduced to evaluate the performance of fine-tuned models on natural language understanding tasks. Recent results from large pretrained models, though, show that many of these datasets are largely saturated and unlikely to be able to detect further progress. What kind of datasets are still effective at discriminating among strong models, and what kind of datasets should we expect to be able to detect future improvements? To measure this uniformly across datasets, we draw on Item Response Theory and evaluate 29 datasets using predictions from 18 pretrained Transformer models on individual test examples. We find that Quoref, HellaSwag, and MC-TACO are best suited for distinguishing among state-of-the-art models, while SNLI, MNLI, and CommitmentBank seem to be saturated for current strong models. We also observe span selection task format, which is used for QA datasets like QAMR or SQuAD2.0, is effective in differentiating between strong and weak models.</abstract>
       <url hash="8d629013">2021.acl-long.92</url>
@@ -1296,7 +1296,7 @@
     <paper id="93">
       <title>Uncovering Constraint-Based Behavior in Neural Models via Targeted Fine-Tuning</title>
       <author><first>Forrest</first><last>Davis</last></author>
-      <author><first>Marten</first><last>van Schijndel</last></author>
+      <author id="marten-van-schijndel"><first>Marten</first><last>van Schijndel</last></author>
       <pages>1159–1171</pages>
       <abstract>A growing body of literature has focused on detailing the linguistic knowledge embedded in large, pretrained language models. Existing work has shown that non-linguistic biases in models can drive model behavior away from linguistic generalizations. We hypothesized that competing linguistic processes within a language, rather than just non-linguistic model biases, could obscure underlying linguistic knowledge. We tested this claim by exploring a single phenomenon in four languages: English, Chinese, Spanish, and Italian. While human behavior has been found to be similar across languages, we find cross-linguistic variation in model behavior. We show that competing processes in a language act as constraints on model behavior and demonstrate that targeted fine-tuning can re-weight the learned constraints, uncovering otherwise dormant linguistic knowledge in models. Our results suggest that models need to learn both the linguistic constraints in a language and their relative ranking, with mismatches in either producing non-human-like behavior.</abstract>
       <url hash="ed3497d7">2021.acl-long.93</url>
@@ -1309,7 +1309,7 @@
       <author><first>Rishabh</first><last>Bhardwaj</last></author>
       <author><first>Navonil</first><last>Majumder</last></author>
       <author><first>Soujanya</first><last>Poria</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>1172–1182</pages>
       <abstract>Interpretability is an important aspect of the trustworthiness of a model’s predictions. Transformer’s predictions are widely explained by the attention weights, i.e., a probability distribution generated at its self-attention unit (head). Current empirical studies provide shreds of evidence that attention weights are not explanations by proving that they are not unique. A recent study showed theoretical justifications to this observation by proving the non-identifiability of attention weights. For a given input to a head and its output, if the attention weights generated in it are unique, we call the weights identifiable. In this work, we provide deeper theoretical analysis and empirical observations on the identifiability of attention weights. Ignored in the previous works, we find the attention weights are more identifiable than we currently perceive by uncovering the hidden role of the key vector. However, the weights are still prone to be non-unique attentions that make them unfit for interpretation. To tackle this issue, we provide a variant of the encoder layer that decouples the relationship between key and value vector and provides identifiable weights up to the desired length of the input. We prove the applicability of such variations by providing empirical justifications on varied text classification tasks. The implementations are available at <url>https://github.com/declare-lab/identifiable-transformers</url>.</abstract>
       <url hash="7d1720ba">2021.acl-long.94</url>
@@ -1322,7 +1322,7 @@
       <author><first>Xinnuo</first><last>Xu</last></author>
       <author><first>Guoyin</first><last>Wang</last></author>
       <author><first>Young-Bum</first><last>Kim</last></author>
-      <author><first>Sungjin</first><last>Lee</last></author>
+      <author id="sungjin-lee"><first>Sungjin</first><last>Lee</last></author>
       <pages>1183–1195</pages>
       <abstract>Natural Language Generation (NLG) is a key component in a task-oriented dialogue system, which converts the structured meaning representation (MR) to the natural language. For large-scale conversational systems, where it is common to have over hundreds of intents and thousands of slots, neither template-based approaches nor model-based approaches are scalable. Recently, neural NLGs started leveraging transfer learning and showed promising results in few-shot settings. This paper proposes AugNLG, a novel data augmentation approach that combines a self-trained neural retrieval model with a few-shot learned NLU model, to automatically create MR-to-Text data from open-domain texts. The proposed system mostly outperforms the state-of-the-art methods on the FewshotWOZ data in both BLEU and Slot Error Rate. We further confirm improved results on the FewshotSGD data and provide comprehensive analysis results on key components of our system. Our code and data are available at <url>https://github.com/XinnuoXu/AugNLG</url>.</abstract>
       <url hash="f0499e65">2021.acl-long.95</url>
@@ -1334,7 +1334,7 @@
       <title>Can vectors read minds better than experts? Comparing data augmentation strategies for the automated scoring of children’s mindreading ability</title>
       <author><first>Venelin</first><last>Kovatchev</last></author>
       <author><first>Phillip</first><last>Smith</last></author>
-      <author><first>Mark</first><last>Lee</last></author>
+      <author id="mark-lee"><first>Mark</first><last>Lee</last></author>
       <author><first>Rory</first><last>Devine</last></author>
       <pages>1196–1206</pages>
       <abstract>In this paper we implement and compare 7 different data augmentation strategies for the task of automatic scoring of children’s ability to understand others’ thoughts, feelings, and desires (or “mindreading”). We recruit in-domain experts to re-annotate augmented samples and determine to what extent each strategy preserves the original rating. We also carry out multiple experiments to measure how much each augmentation strategy improves the performance of automatic scoring systems. To determine the capabilities of automatic systems to generalize to unseen data, we create UK-MIND-20 - a new corpus of children’s performance on tests of mindreading, consisting of 10,320 question-answer pairs. We obtain a new state-of-the-art performance on the MIND-CA corpus, improving macro-F1-score by 6 points. Results indicate that both the number of training examples and the quality of the augmentation strategies affect the performance of the systems. The task-specific augmentations generally outperform task-agnostic augmentations. Automatic augmentations based on vectors (GloVe, FastText) perform the worst. We find that systems trained on MIND-CA generalize well to UK-MIND-20. We demonstrate that data augmentation strategies also improve the performance on unseen data.</abstract>
@@ -1350,7 +1350,7 @@
       <author><first>Budhaditya</first><last>Deb</last></author>
       <author><first>Guoqing</first><last>Zheng</last></author>
       <author><first>Milad</first><last>Shokouhi</last></author>
-      <author><first>Ahmed Hassan</first><last>Awadallah</last></author>
+      <author id="ahmed-hassan"><first>Ahmed Hassan</first><last>Awadallah</last></author>
       <pages>1207–1220</pages>
       <abstract>Reply suggestion models help users process emails and chats faster. Previous work only studies English reply suggestion. Instead, we present MRS, a multilingual reply suggestion dataset with ten languages. MRS can be used to compare two families of models: 1) retrieval models that select the reply from a fixed set and 2) generation models that produce the reply from scratch. Therefore, MRS complements existing cross-lingual generalization benchmarks that focus on classification and sequence labeling tasks. We build a generation model and a retrieval model as baselines for MRS. The two models have different strengths in the monolingual setting, and they require different strategies to generalize across languages. MRS is publicly available at <url>https://github.com/zhangmozhi/mrs</url>.</abstract>
       <url hash="e60b443b">2021.acl-long.97</url>
@@ -1365,7 +1365,7 @@
       <author><first>Harsh</first><last>Trivedi</last></author>
       <author><first>Alex</first><last>Warstadt</last></author>
       <author><first>Clara</first><last>Vania</last></author>
-      <author><first>Samuel R.</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel R.</first><last>Bowman</last></author>
       <pages>1221–1235</pages>
       <abstract>Crowdsourcing is widely used to create data for common natural language understanding tasks. Despite the importance of these datasets for measuring and refining model understanding of language, there has been little focus on the crowdsourcing methods used for collecting the datasets. In this paper, we compare the efficacy of interventions that have been proposed in prior work as ways of improving data quality. We use multiple-choice question answering as a testbed and run a randomized trial by assigning crowdworkers to write questions under one of four different data collection protocols. We find that asking workers to write explanations for their examples is an ineffective stand-alone strategy for boosting NLU example difficulty. However, we find that training crowdworkers, and then using an iterative process of collecting data, sending feedback, and qualifying workers based on expert judgments is an effective means of collecting challenging data. But using crowdsourced, instead of expert judgments, to qualify workers and send feedback does not prove to be effective. We observe that the data from the iterative protocol with expert assessments is more challenging by several measures. Notably, the human–model gap on the unanimous agreement portion of this data is, on average, twice as large as the gap for the baseline protocol data.</abstract>
       <url hash="2d4d5f28">2021.acl-long.98</url>
@@ -1382,7 +1382,7 @@
       <author><first>Shangyi</first><last>Ning</last></author>
       <author><first>Yancheng</first><last>He</last></author>
       <author><first>Changjian</first><last>Jiang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>1236–1246</pages>
       <abstract>Ideology of legislators is typically estimated by ideal point models from historical records of votes. It represents legislators and legislation as points in a latent space and shows promising results for modeling voting behavior. However, it fails to capture more specific attitudes of legislators toward emerging issues and is unable to model newly-elected legislators without voting histories. In order to mitigate these two problems, we explore to incorporate both voting behavior and public statements on Twitter to jointly model legislators. In addition, we propose a novel task, namely hashtag usage prediction to model the ideology of legislators on Twitter. In practice, we construct a heterogeneous graph for the legislative context and use relational graph neural networks to learn the representation of legislators with the guidance of historical records of their voting and hashtag usage. Experiment results indicate that our model yields significant improvements for the task of roll call vote prediction. Further analysis further demonstrates that legislator representation we learned captures nuances in statements.</abstract>
       <url hash="7a05fe96">2021.acl-long.99</url>
@@ -1406,7 +1406,7 @@
       <author><first>Danni</first><last>Liu</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
       <author><first>James</first><last>Cross</last></author>
-      <author><first>Francisco</first><last>Guzmán</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzmán</last></author>
       <author><first>Xian</first><last>Li</last></author>
       <pages>1259–1273</pages>
       <abstract>Multilingual neural machine translation has shown the capability of directly translating between language pairs unseen in training, i.e. zero-shot translation. Despite being conceptually attractive, it often suffers from low output quality. The difficulty of generalizing to new translation directions suggests the model representations are highly specific to those language pairs seen in training. We demonstrate that a main factor causing the language-specific representations is the positional correspondence to input tokens. We show that this can be easily alleviated by removing residual connections in an encoder layer. With this modification, we gain up to 18.5 BLEU points on zero-shot translation while retaining quality on supervised directions. The improvements are particularly prominent between related languages, where our proposed model outperforms pivot-based translation. Moreover, our approach allows easy integration of new languages, which substantially expands translation coverage. By thorough inspections of the hidden layer outputs, we show that our approach indeed leads to more language-independent representations.</abstract>
@@ -1418,7 +1418,7 @@
     </paper>
     <paper id="102">
       <title>Common Sense Beyond <fixed-case>E</fixed-case>nglish: Evaluating and Improving Multilingual Language Models for Commonsense Reasoning</title>
-      <author><first>Bill Yuchen</first><last>Lin</last></author>
+      <author id="bill-yuchen-lin"><first>Bill Yuchen</first><last>Lin</last></author>
       <author><first>Seyeon</first><last>Lee</last></author>
       <author><first>Xiaoyang</first><last>Qiao</last></author>
       <author><first>Xiang</first><last>Ren</last></author>
@@ -1447,7 +1447,7 @@
       <title>Diverse Pretrained Context Encodings Improve Document Translation</title>
       <author><first>Domenic</first><last>Donato</last></author>
       <author><first>Lei</first><last>Yu</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <pages>1299–1311</pages>
       <abstract>We propose a new architecture for adapting a sentence-level sequence-to-sequence transformer by incorporating multiple pre-trained document context signals and assess the impact on translation performance of (1) different pretraining approaches for generating these signals, (2) the quantity of parallel data for which document context is available, and (3) conditioning on source, target, or source and target contexts. Experiments on the NIST Chinese-English, and IWSLT and WMT English-German tasks support four general conclusions: that using pre-trained context representations markedly improves sample efficiency, that adequate parallel data resources are crucial for learning to use document context, that jointly conditioning on multiple context representations outperforms any single representation, and that source context is more valuable for translation performance than target side context. Our best multi-context model consistently outperforms the best existing context-aware transformers.</abstract>
       <url hash="a379c6b3">2021.acl-long.104</url>
@@ -1461,7 +1461,7 @@
       <author><first>Sarvesh</first><last>Mehtani</last></author>
       <author><first>Vaidehi</first><last>Patil</last></author>
       <author><first>Abhijeet</first><last>Awasthi</last></author>
-      <author><first>Partha</first><last>Talukdar</last></author>
+      <author id="partha-talukdar"><first>Partha</first><last>Talukdar</last></author>
       <author><first>Sunita</first><last>Sarawagi</last></author>
       <pages>1312–1323</pages>
       <abstract>Recent research in multilingual language models (LM) has demonstrated their ability to effectively handle multiple languages in a single model. This holds promise for low web-resource languages (LRL) as multilingual models can enable transfer of supervision from high resource languages to LRLs. However, incorporating a new language in an LM still remains a challenge, particularly for languages with limited corpora and in unseen scripts. In this paper we argue that relatedness among languages in a language family may be exploited to overcome some of the corpora limitations of LRLs, and propose RelateLM. We focus on Indian languages, and exploit relatedness along two dimensions: (1) script (since many Indic scripts originated from the Brahmic script), and (2) sentence structure. RelateLM uses transliteration to convert the unseen script of limited LRL text into the script of a Related Prominent Language (RPL) (Hindi in our case). While exploiting similar sentence structures, RelateLM utilizes readily available bilingual dictionaries to pseudo translate RPL text into LRL corpora. Experiments on multiple real-world benchmark datasets provide validation to our hypothesis that using a related language as pivot, along with transliteration and pseudo translation based data augmentation, can be an effective way to adapt LMs for LRLs, rather than direct training or pivoting through English.</abstract>
@@ -1485,7 +1485,7 @@
     </paper>
     <paper id="107">
       <title>Towards Argument Mining for Social Good: A Survey</title>
-      <author><first>Eva Maria</first><last>Vecchi</last></author>
+      <author id="eva-maria-vecchi"><first>Eva Maria</first><last>Vecchi</last></author>
       <author><first>Neele</first><last>Falk</last></author>
       <author><first>Iman</first><last>Jundi</last></author>
       <author><first>Gabriella</first><last>Lapesa</last></author>
@@ -1540,7 +1540,7 @@
     </paper>
     <paper id="111">
       <title>Select, Extract and Generate: Neural Keyphrase Generation with Layer-wise Coverage Attention</title>
-      <author><first>Wasi</first><last>Ahmad</last></author>
+      <author id="wasi-ahmad"><first>Wasi</first><last>Ahmad</last></author>
       <author><first>Xiao</first><last>Bai</last></author>
       <author><first>Soomin</first><last>Lee</last></author>
       <author><first>Kai-Wei</first><last>Chang</last></author>
@@ -1595,7 +1595,7 @@
       <author><first>Lya Hulliyyatus</first><last>Suadaa</last></author>
       <author><first>Hidetaka</first><last>Kamigaito</last></author>
       <author><first>Kotaro</first><last>Funakoshi</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <author><first>Hiroya</first><last>Takamura</last></author>
       <pages>1451–1465</pages>
       <abstract>Recent neural text generation models have shown significant improvement in generating descriptive text from structured data such as table formats. One of the remaining important challenges is generating more analytical descriptions that can be inferred from facts in a data source. The use of a template-based generator and a pointer-generator is among the potential alternatives for table-to-text generators. In this paper, we propose a framework consisting of a pre-trained model and a copy mechanism. The pre-trained models are fine-tuned to produce fluent text that is enriched with numerical reasoning. However, it still lacks fidelity to the table contents. The copy mechanism is incorporated in the fine-tuning step by using general placeholders to avoid producing hallucinated phrases that are not supported by a table while preserving high fluency. In summary, our contributions are (1) a new dataset for numerical table-to-text generation using pairs of a table and a paragraph of a table description with richer inference from scientific papers, and (2) a table-to-text generation framework enriched with numerical reasoning.</abstract>
@@ -1651,10 +1651,10 @@
       <author><first>Khalil</first><last>Mrini</last></author>
       <author><first>Franck</first><last>Dernoncourt</last></author>
       <author><first>Seunghyun</first><last>Yoon</last></author>
-      <author><first>Trung</first><last>Bui</last></author>
+      <author id="trung-bui"><first>Trung</first><last>Bui</last></author>
       <author><first>Walter</first><last>Chang</last></author>
       <author><first>Emilia</first><last>Farcas</last></author>
-      <author><first>Ndapa</first><last>Nakashole</last></author>
+      <author id="ndapandula-nakashole"><first>Ndapa</first><last>Nakashole</last></author>
       <pages>1505–1515</pages>
       <abstract>Users of medical question answering systems often submit long and detailed questions, making it hard to achieve high recall in answer retrieval. To alleviate this problem, we propose a novel Multi-Task Learning (MTL) method with data augmentation for medical question understanding. We first establish an equivalence between the tasks of question summarization and Recognizing Question Entailment (RQE) using their definitions in the medical domain. Based on this equivalence, we propose a data augmentation algorithm to use just one dataset to optimize for both tasks, with a weighted MTL loss. We introduce gradually soft parameter-sharing: a constraint for decoder parameters to be close, that is gradually loosened as we move to the highest layer. We show through ablation studies that our proposed novelties improve performance. Our method outperforms existing MTL methods across 4 datasets of medical question pairs, in ROUGE scores, RQE accuracy and human evaluation. Finally, we show that our method fares better than single-task learning under 4 low-resource settings.</abstract>
       <url hash="df3b64a6">2021.acl-long.119</url>
@@ -1745,8 +1745,8 @@
       <author><first>Nedim</first><last>Lipka</last></author>
       <author><first>Franck</first><last>Dernoncourt</last></author>
       <author><first>Vlad</first><last>Morariu</last></author>
-      <author><first>Varun</first><last>Manjunatha</last></author>
-      <author><first>Douglas</first><last>Oard</last></author>
+      <author id="varun-manjunatha"><first>Varun</first><last>Manjunatha</last></author>
+      <author id="douglas-w-oard"><first>Douglas</first><last>Oard</last></author>
       <author><first>Philip</first><last>Resnik</last></author>
       <author><first>Henning</first><last>Wachsmuth</last></author>
       <pages>1583–1595</pages>
@@ -1804,8 +1804,8 @@
       <author><first>Zid</first><last>Mancenido</last></author>
       <author><first>Julie</first><last>Cohen</last></author>
       <author><first>Heather</first><last>Hill</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
-      <author><first>Tatsunori</first><last>Hashimoto</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
+      <author id="tatsunori-b-hashimoto"><first>Tatsunori</first><last>Hashimoto</last></author>
       <pages>1638–1653</pages>
       <abstract>In conversation, uptake happens when a speaker builds on the contribution of their interlocutor by, for example, acknowledging, repeating or reformulating what they have said. In education, teachers’ uptake of student contributions has been linked to higher student achievement. Yet measuring and improving teachers’ uptake at scale is challenging, as existing methods require expensive annotation by experts. We propose a framework for computationally measuring uptake, by (1) releasing a dataset of student-teacher exchanges extracted from US math classroom transcripts annotated for uptake by experts; (2) formalizing uptake as pointwise Jensen-Shannon Divergence (pJSD), estimated via next utterance classification; (3) conducting a linguistically-motivated comparison of different unsupervised measures and (4) correlating these measures with educational outcomes. We find that although repetition captures a significant part of uptake, pJSD outperforms repetition-based baselines, as it is capable of identifying a wider range of uptake phenomena like question answering and reformulation. We apply our uptake measure to three different educational datasets with outcome indicators. Unlike baseline measures, pJSD correlates significantly with instruction quality in all three, providing evidence for its generalizability and for its potential to serve as an automated professional development tool for teachers.</abstract>
       <url hash="d6a5f430">2021.acl-long.130</url>
@@ -1817,7 +1817,7 @@
       <title>A Survey of Code-switching: Linguistic and Social Perspectives for Language Technologies</title>
       <author><first>A. Seza</first><last>Doğruöz</last></author>
       <author><first>Sunayana</first><last>Sitaram</last></author>
-      <author><first>Barbara E.</first><last>Bullock</last></author>
+      <author id="barbara-bullock"><first>Barbara E.</first><last>Bullock</last></author>
       <author><first>Almeida Jacqueline</first><last>Toribio</last></author>
       <pages>1654–1666</pages>
       <abstract>The analysis of data in which multiple languages are represented has gained popularity among computational linguists in recent years. So far, much of this research focuses mainly on the improvement of computational methods and largely ignores linguistic and social aspects of C-S discussed across a wide range of languages within the long-established literature in linguistics. To fill this gap, we offer a survey of code-switching (C-S) covering the literature in linguistics with a reflection on the key issues in language technologies. From the linguistic perspective, we provide an overview of structural and functional patterns of C-S focusing on the literature from European and Indian contexts as highly multilingual areas. From the language technologies perspective, we discuss how massive language models fail to represent diverse C-S types due to lack of appropriate training data, lack of robust evaluation benchmarks for C-S (across multilingual situations and types of C-S) and lack of end-to- end systems that cover sociolinguistic aspects of C-S as well. Our survey will be a step to- wards an outcome of mutual benefit for computational scientists and linguists with a shared interest in multilingualism and C-S.</abstract>
@@ -1830,7 +1830,7 @@
       <title>Learning from the Worst: Dynamically Generated Datasets to Improve Online Hate Detection</title>
       <author><first>Bertie</first><last>Vidgen</last></author>
       <author><first>Tristan</first><last>Thrush</last></author>
-      <author><first>Zeerak</first><last>Waseem</last></author>
+      <author id="zeerak-talat"><first>Zeerak</first><last>Waseem</last></author>
       <author><first>Douwe</first><last>Kiela</last></author>
       <pages>1667–1682</pages>
       <abstract>We present a human-and-model-in-the-loop process for dynamically generating datasets and training better performing and more robust hate detection models. We provide a new dataset of 40,000 entries, generated and labelled by trained annotators over four rounds of dynamic data creation. It includes 15,000 challenging perturbations and each hateful entry has fine-grained labels for the type and target of hate. Hateful entries make up 54% of the dataset, which is substantially higher than comparable datasets. We show that model performance is substantially improved using this approach. Models trained on later rounds of data collection perform better on test sets and are harder for annotators to trick. They also have better performance on HateCheck, a suite of functional tests for online hate detection. We provide the code, dataset and annotation guidelines for other researchers to use.</abstract>
@@ -1847,10 +1847,10 @@
       <author><first>Revanth</first><last>Gangi Reddy</last></author>
       <author><first>Sandeep</first><last>Polisetty</last></author>
       <author><first>Heng</first><last>Ji</last></author>
-      <author><first>Shih-Fu</first><last>Chang</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="shih-fu-chang"><first>Shih-Fu</first><last>Chang</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <author><first>Mohit</first><last>Bansal</last></author>
-      <author><first>Avi</first><last>Sil</last></author>
+      <author id="avirup-sil"><first>Avi</first><last>Sil</last></author>
       <pages>1683–1698</pages>
       <abstract>To defend against machine-generated fake news, an effective mechanism is urgently needed. We contribute a novel benchmark for fake news detection at the knowledge element level, as well as a solution for this task which incorporates cross-media consistency checking to detect the fine-grained knowledge elements making news articles misinformative. Due to training data scarcity, we also formulate a novel data synthesis method by manipulating knowledge elements within the knowledge graph to generate noisy training data with specific, hard to detect, known inconsistencies. Our detection approach outperforms the state-of-the-art (up to 16.8% accuracy gain), and more critically, yields fine-grained explanations.</abstract>
       <url hash="f4d7d561">2021.acl-long.133</url>
@@ -1890,7 +1890,7 @@
       <author><first>Jun</first><last>Xu</last></author>
       <author><first>Zeyang</first><last>Lei</last></author>
       <author><first>Haifeng</first><last>Wang</last></author>
-      <author><first>Zheng-Yu</first><last>Niu</last></author>
+      <author id="zheng-yu-niu"><first>Zheng-Yu</first><last>Niu</last></author>
       <author><first>Hua</first><last>Wu</last></author>
       <author><first>Wanxiang</first><last>Che</last></author>
       <pages>1726–1739</pages>
@@ -1995,7 +1995,7 @@
     </paper>
     <paper id="143">
       <title>Implicit Representations of Meaning in Neural Language Models</title>
-      <author><first>Belinda Z.</first><last>Li</last></author>
+      <author id="belinda-z-li"><first>Belinda Z.</first><last>Li</last></author>
       <author><first>Maxwell</first><last>Nye</last></author>
       <author><first>Jacob</first><last>Andreas</last></author>
       <pages>1813–1827</pages>
@@ -2010,7 +2010,7 @@
       <author><first>Matthew</first><last>Finlayson</last></author>
       <author><first>Aaron</first><last>Mueller</last></author>
       <author><first>Sebastian</first><last>Gehrmann</last></author>
-      <author><first>Stuart</first><last>Shieber</last></author>
+      <author id="stuart-m-shieber"><first>Stuart</first><last>Shieber</last></author>
       <author><first>Tal</first><last>Linzen</last></author>
       <author><first>Yonatan</first><last>Belinkov</last></author>
       <pages>1828–1843</pages>
@@ -2052,7 +2052,7 @@
     <paper id="147">
       <title>Poisoning Knowledge Graph Embeddings via Relation Inference Patterns</title>
       <author><first>Peru</first><last>Bhardwaj</last></author>
-      <author><first>John</first><last>Kelleher</last></author>
+      <author id="john-kelleher"><first>John</first><last>Kelleher</last></author>
       <author><first>Luca</first><last>Costabello</last></author>
       <author><first>Declan</first><last>O’Sullivan</last></author>
       <pages>1875–1888</pages>
@@ -2078,7 +2078,7 @@
       <title>A Survey of Race, Racism, and Anti-Racism in <fixed-case>NLP</fixed-case></title>
       <author><first>Anjalie</first><last>Field</last></author>
       <author><first>Su Lin</first><last>Blodgett</last></author>
-      <author><first>Zeerak</first><last>Waseem</last></author>
+      <author id="zeerak-talat"><first>Zeerak</first><last>Waseem</last></author>
       <author><first>Yulia</first><last>Tsvetkov</last></author>
       <pages>1905–1925</pages>
       <abstract>Despite inextricable ties between race and language, little work has considered race in NLP research and development. In this work, we survey 79 papers from the ACL anthology that mention race. These papers reveal various types of race-related bias in all stages of NLP model development, highlighting the need for proactive consideration of how NLP systems can uphold racial hierarchies. However, persistent gaps in research on race and NLP remain: race has been siloed as a niche topic and remains ignored in many NLP tasks; most work operationalizes race as a fixed single-dimensional variable with a ground-truth label, which risks reinforcing differences produced by historical racism; and the voices of historically marginalized people are nearly absent in NLP literature. By identifying where and how NLP literature has and has not considered race, especially in comparison to related fields, our work calls for inclusion and racial justice in NLP research practices.</abstract>
@@ -2145,8 +2145,8 @@
     <paper id="154">
       <title><fixed-case>UXLA</fixed-case>: A Robust Unsupervised Data Augmentation Framework for Zero-Resource Cross-Lingual <fixed-case>NLP</fixed-case></title>
       <author><first>M Saiful</first><last>Bari</last></author>
-      <author><first>Tasnim</first><last>Mohiuddin</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="muhammad-tasnim-mohiuddin"><first>Tasnim</first><last>Mohiuddin</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <pages>1978–1992</pages>
       <abstract>Transfer learning has yielded state-of-the-art (SoTA) results in many supervised NLP tasks. However, annotated data for every target task in every target language is rare, especially for low-resource languages. We propose UXLA, a novel unsupervised data augmentation framework for zero-resource transfer learning scenarios. In particular, UXLA aims to solve cross-lingual adaptation problems from a source language task distribution to an unknown target language task distribution, assuming no training label in the target language. At its core, UXLA performs simultaneous self-training with data augmentation and unsupervised sample selection. To show its effectiveness, we conduct extensive experiments on three diverse zero-resource cross-lingual transfer tasks. UXLA achieves SoTA results in all the tasks, outperforming the baselines by a good margin. With an in-depth framework dissection, we demonstrate the cumulative contributions of different components to its success.</abstract>
       <url hash="5c2cbe02">2021.acl-long.154</url>
@@ -2161,7 +2161,7 @@
       <author><first>Yu</first><last>Bao</last></author>
       <author><first>Mingxuan</first><last>Wang</last></author>
       <author><first>Lin</first><last>Qiu</last></author>
-      <author><first>Weinan</first><last>Zhang</last></author>
+      <author id="weinan-zhang"><first>Weinan</first><last>Zhang</last></author>
       <author><first>Yong</first><last>Yu</last></author>
       <author><first>Lei</first><last>Li</last></author>
       <pages>1993–2003</pages>
@@ -2219,7 +2219,7 @@
       <title><fixed-case>PIGL</fixed-case>e<fixed-case>T</fixed-case>: Language Grounding Through Neuro-Symbolic Interaction in a 3<fixed-case>D</fixed-case> World</title>
       <author><first>Rowan</first><last>Zellers</last></author>
       <author><first>Ari</first><last>Holtzman</last></author>
-      <author><first>Matthew</first><last>Peters</last></author>
+      <author id="matthew-e-peters"><first>Matthew</first><last>Peters</last></author>
       <author><first>Roozbeh</first><last>Mottaghi</last></author>
       <author><first>Aniruddha</first><last>Kembhavi</last></author>
       <author><first>Ali</first><last>Farhadi</last></author>
@@ -2286,7 +2286,7 @@
       <author><first>Wenjie</first><last>Zi</last></author>
       <author><first>Keyi</first><last>Tang</last></author>
       <author><first>Chenyang</first><last>Huang</last></author>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <author><first>Simon J.D.</first><last>Prince</last></author>
       <author><first>Yanshuai</first><last>Cao</last></author>
       <pages>2089–2102</pages>
@@ -2298,7 +2298,7 @@
     </paper>
     <paper id="164">
       <title><fixed-case>BERTAC</fixed-case>: Enhancing Transformer-based Language Models with Adversarially Pretrained Convolutional Neural Networks</title>
-      <author><first>Jong-Hoon</first><last>Oh</last></author>
+      <author id="jong-hoon-oh"><first>Jong-Hoon</first><last>Oh</last></author>
       <author><first>Ryu</first><last>Iida</last></author>
       <author><first>Julien</first><last>Kloetzer</last></author>
       <author><first>Kentaro</first><last>Torisawa</last></author>
@@ -2325,10 +2325,10 @@
       <title>Explaining Relationships Between Scientific Documents</title>
       <author><first>Kelvin</first><last>Luu</last></author>
       <author><first>Xinyi</first><last>Wu</last></author>
-      <author><first>Rik</first><last>Koncel-Kedziorski</last></author>
+      <author id="rik-koncel-kedziorski"><first>Rik</first><last>Koncel-Kedziorski</last></author>
       <author><first>Kyle</first><last>Lo</last></author>
       <author><first>Isabel</first><last>Cachola</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>2130–2144</pages>
       <abstract>We address the task of explaining relationships between two scientific documents using natural language text. This task requires modeling the complex content of long technical documents, deducing a relationship between these documents, and expressing the details of that relationship in text. In addition to the theoretical interest of this task, successful solutions can help improve researcher efficiency in search and review. In this paper we establish a dataset of 622K examples from 154K documents. We pretrain a large language model to serve as the foundation for autoregressive approaches to the task. We explore the impact of taking different views on the two documents, including the use of dense representations extracted with scientific IE systems. We provide extensive automatic and human evaluations which show the promise of such models, but make clear challenges for future work.</abstract>
       <url hash="2f73e655">2021.acl-long.166</url>
@@ -2583,7 +2583,7 @@
     <paper id="185">
       <title>Style is <fixed-case>NOT</fixed-case> a single variable: Case Studies for Cross-Stylistic Language Understanding</title>
       <author><first>Dongyeop</first><last>Kang</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>2376–2387</pages>
       <abstract>Every natural text is written in some style. Style is formed by a complex combination of different stylistic factors, including formality markers, emotions, metaphors, etc. One cannot form a complete understanding of a text without considering these factors. The factors combine and co-vary in complex ways to form styles. Studying the nature of the covarying combinations sheds light on stylistic language in general, sometimes called cross-style language understanding. This paper provides the benchmark corpus (XSLUE) that combines existing datasets and collects a new one for sentence-level cross-style language understanding and evaluation. The benchmark contains text in 15 different styles under the proposed four theoretical groupings: figurative, personal, affective, and interpersonal groups. For valid evaluation, we collect an additional diagnostic set by annotating all 15 styles on the same text. Using XSLUE, we propose three interesting cross-style applications in classification, correlation, and generation. First, our proposed cross-style classifier trained with multiple styles together helps improve overall classification performance against individually-trained style classifiers. Second, our study shows that some styles are highly dependent on each other in human-written text. Finally, we find that combinations of some contradictive styles likely generate stylistically less appropriate text. We believe our benchmark and case studies help explore interesting future directions for cross-style research. The preprocessed datasets and code are publicly available.</abstract>
       <url hash="436ca7fd">2021.acl-long.185</url>
@@ -2690,7 +2690,7 @@
     <paper id="193">
       <title>Comprehensive Study: How the Context Information of Different Granularity Affects Dialogue State Tracking?</title>
       <author><first>Puhai</first><last>Yang</last></author>
-      <author><first>Heyan</first><last>Huang</last></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last></author>
       <author><first>Xian-Ling</first><last>Mao</last></author>
       <pages>2481–2491</pages>
       <abstract>Dialogue state tracking (DST) plays a key role in task-oriented dialogue systems to monitor the user’s goal. In general, there are two strategies to track a dialogue state: predicting it from scratch and updating it from previous state. The scratch-based strategy obtains each slot value by inquiring all the dialogue history, and the previous-based strategy relies on the current turn dialogue to update the previous dialogue state. However, it is hard for the scratch-based strategy to correctly track short-dependency dialogue state because of noise; meanwhile, the previous-based strategy is not very useful for long-dependency dialogue state tracking. Obviously, it plays different roles for the context information of different granularity to track different kinds of dialogue states. Thus, in this paper, we will study and discuss how the context information of different granularity affects dialogue state tracking. First, we explore how greatly different granularities affect dialogue state tracking. Then, we further discuss how to combine multiple granularities for dialogue state tracking. Finally, we apply the findings about context granularity to few-shot learning scenario. Besides, we have publicly released all codes.</abstract>
@@ -2702,7 +2702,7 @@
     <paper id="194">
       <title><fixed-case>OTT</fixed-case>ers: One-turn Topic Transitions for Open-Domain Dialogue</title>
       <author><first>Karin</first><last>Sevegnani</last></author>
-      <author><first>David M.</first><last>Howcroft</last></author>
+      <author id="david-m-howcroft"><first>David M.</first><last>Howcroft</last></author>
       <author><first>Ioannis</first><last>Konstas</last></author>
       <author><first>Verena</first><last>Rieser</last></author>
       <pages>2492–2504</pages>
@@ -2937,7 +2937,7 @@
       <author><first>Sohi</first><last>Sudhir</last></author>
       <author><first>Pushkar</first><last>Mishra</last></author>
       <author><first>Helen</first><last>Yannakoudakis</last></author>
-      <author><first>Saif M.</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif M.</first><last>Mohammad</last></author>
       <author><first>Ekaterina</first><last>Shutova</last></author>
       <pages>2700–2717</pages>
       <abstract>On social media platforms, hateful and offensive language negatively impact the mental well-being of users and the participation of people from diverse backgrounds. Automatic methods to detect offensive language have largely relied on datasets with categorical labels. However, comments can vary in their degree of offensiveness. We create the first dataset of English language Reddit comments that has fine-grained, real-valued scores between -1 (maximally supportive) and 1 (maximally offensive). The dataset was annotated using Best–Worst Scaling, a form of comparative annotation that has been shown to alleviate known biases of using rating scales. We show that the method produces highly reliable offensiveness scores. Finally, we evaluate the ability of widely-used neural models to predict offensiveness scores on this new dataset.</abstract>
@@ -2966,7 +2966,7 @@
       <title>Assessing the Representations of Idiomaticity in Vector Models with a Noun Compound Dataset Labeled at Type and Token Levels</title>
       <author><first>Marcos</first><last>Garcia</last></author>
       <author><first>Tiago</first><last>Kramer Vieira</last></author>
-      <author><first>Carolina</first><last>Scarton</last></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last></author>
       <author><first>Marco</first><last>Idiart</last></author>
       <author><first>Aline</first><last>Villavicencio</last></author>
       <pages>2730–2741</pages>
@@ -3021,7 +3021,7 @@
       <author><first>Xinyin</first><last>Ma</last></author>
       <author><first>Zeqi</first><last>Tan</last></author>
       <author><first>Shuai</first><last>Zhang</last></author>
-      <author><first>Wen</first><last>Wang</last></author>
+      <author id="wen-wang"><first>Wen</first><last>Wang</last></author>
       <author><first>Weiming</first><last>Lu</last></author>
       <pages>2782–2794</pages>
       <abstract>Named entity recognition (NER) is a well-studied task in natural language processing. Traditional NER research only deals with flat entities and ignores nested entities. The span-based methods treat entity recognition as a span classification task. Although these methods have the innate ability to handle nested NER, they suffer from high computational cost, ignorance of boundary information, under-utilization of the spans that partially match with entities, and difficulties in long entity recognition. To tackle these issues, we propose a two-stage entity identifier. First we generate span proposals by filtering and boundary regression on the seed spans to locate the entities, and then label the boundary-adjusted span proposals with the corresponding categories. Our method effectively utilizes the boundary information of entities and partially matched spans during training. Through boundary regression, entities of any length can be covered theoretically, which improves the ability to recognize long entities. In addition, many low-quality seed spans are filtered out in the first stage, which reduces the time complexity of inference. Experiments on nested NER datasets demonstrate that our proposed method outperforms previous state-of-the-art models.</abstract>
@@ -3113,12 +3113,12 @@
     <paper id="222">
       <title>Breaking the Corpus Bottleneck for Context-Aware Neural Machine Translation with Cross-Task Pre-training</title>
       <author><first>Linqing</first><last>Chen</last></author>
-      <author><first>Junhui</first><last>Li</last></author>
-      <author><first>Zhengxian</first><last>Gong</last></author>
+      <author id="junhui-li"><first>Junhui</first><last>Li</last></author>
+      <author id="zhengxian-gong"><first>Zhengxian</first><last>Gong</last></author>
       <author><first>Boxing</first><last>Chen</last></author>
       <author><first>Weihua</first><last>Luo</last></author>
       <author><first>Min</first><last>Zhang</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>2851–2861</pages>
       <abstract>Context-aware neural machine translation (NMT) remains challenging due to the lack of large-scale document-level parallel corpora. To break the corpus bottleneck, in this paper we aim to improve context-aware NMT by taking the advantage of the availability of both large-scale sentence-level parallel dataset and source-side monolingual documents. To this end, we propose two pre-training tasks. One learns to translate a sentence from source language to target language on the sentence-level parallel dataset while the other learns to translate a document from deliberately noised to original on the monolingual documents. Importantly, the two pre-training tasks are jointly and simultaneously learned via the same model, thereafter fine-tuned on scale-limited parallel documents from both sentence-level and document-level perspectives. Experimental results on four translation tasks show that our approach significantly improves translation performance. One nice property of our approach is that the fine-tuned model can be used to translate both sentences and documents.</abstract>
       <url hash="9ceda8ac">2021.acl-long.222</url>
@@ -3150,7 +3150,7 @@
       <author><first>Marco</first><last>Gaido</last></author>
       <author><first>Alina</first><last>Karakanta</last></author>
       <author><first>Alberto</first><last>Martinelli</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <pages>2873–2887</pages>
       <abstract>Five years after the first published proofs of concept, direct approaches to speech translation (ST) are now competing with traditional cascade solutions. In light of this steady progress, can we claim that the performance gap between the two is closed? Starting from this question, we present a systematic comparison between state-of-the-art systems representative of the two paradigms. Focusing on three language directions (English-German/Italian/Spanish), we conduct automatic and manual evaluations, exploiting high-quality professional post-edits and annotations. Our multi-faceted analysis on one of the few publicly available ST benchmarks attests for the first time that: i) the gap between the two paradigms is now closed, and ii) the subtle differences observed in their behavior are not sufficient for humans neither to distinguish them nor to prefer one over the other.</abstract>
@@ -3296,10 +3296,10 @@
     </paper>
     <paper id="235">
       <title>Learning Syntactic Dense Embedding with Correlation Graph for Automatic Readability Assessment</title>
-      <author><first>Xinying</first><last>Qiu</last></author>
+      <author id="xin-ying-qiu"><first>Xinying</first><last>Qiu</last></author>
       <author><first>Yuan</first><last>Chen</last></author>
       <author><first>Hanwu</first><last>Chen</last></author>
-      <author><first>Jian-Yun</first><last>Nie</last></author>
+      <author id="jian-yun-nie"><first>Jian-Yun</first><last>Nie</last></author>
       <author><first>Yuming</first><last>Shen</last></author>
       <author><first>Dawei</first><last>Lu</last></author>
       <pages>3013–3025</pages>
@@ -3403,7 +3403,7 @@
       <title><fixed-case>O</fixed-case>nline <fixed-case>L</fixed-case>earning Meets <fixed-case>M</fixed-case>achine <fixed-case>T</fixed-case>ranslation Evaluation: Finding the Best Systems with the Least Human Effort</title>
       <author><first>Vânia</first><last>Mendonça</last></author>
       <author><first>Ricardo</first><last>Rei</last></author>
-      <author><first>Luisa</first><last>Coheur</last></author>
+      <author id="luisa-coheur"><first>Luisa</first><last>Coheur</last></author>
       <author><first>Alberto</first><last>Sardinha</last></author>
       <author><first>Ana Lúcia</first><last>Santos</last></author>
       <pages>3105–3117</pages>
@@ -3470,16 +3470,16 @@
       <title>Annotating Online Misogyny</title>
       <author><first>Philine</first><last>Zeinert</last></author>
       <author><first>Nanna</first><last>Inie</last></author>
-      <author><first>Leon</first><last>Derczynski</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
       <pages>3181–3197</pages>
       <abstract>Online misogyny, a category of online abusive language, has serious and harmful social consequences. Automatic detection of misogynistic language online, while imperative, poses complicated challenges to both data gathering, data annotation, and bias mitigation, as this type of data is linguistically complex and diverse. This paper makes three contributions in this area: Firstly, we describe the detailed design of our iterative annotation process and codebook. Secondly, we present a comprehensive taxonomy of labels for annotating misogyny in natural written language, and finally, we introduce a high-quality dataset of annotated posts sampled from social media posts.</abstract>
       <url hash="cd7efe76">2021.acl-long.247</url>
       <doi>10.18653/v1/2021.acl-long.247</doi>
       <bibkey>zeinert-etal-2021-annotating</bibkey>
       <video href="https://www.youtube.com/watch?v=xayfVkt7gwo"/>
+      <video href="2021.acl-long.247.mp4"/>
       <revision id="1" href="2021.acl-long.247v1" hash="98f2cdc4"/>
       <revision id="2" href="2021.acl-long.247v2" hash="cd7efe76" date="2021-08-31">Typo and grammar fixes</revision>
-      <video href="2021.acl-long.247.mp4"/>
     </paper>
     <paper id="248">
       <title>Few-<fixed-case>NERD</fixed-case>: A Few-shot Named Entity Recognition Dataset</title>
@@ -3541,7 +3541,7 @@
     <paper id="252">
       <title>Joint Models for Answer Verification in Question Answering Systems</title>
       <author><first>Zeyu</first><last>Zhang</last></author>
-      <author><first>Thuy</first><last>Vu</last></author>
+      <author id="thuy-vu"><first>Thuy</first><last>Vu</last></author>
       <author><first>Alessandro</first><last>Moschitti</last></author>
       <pages>3252–3262</pages>
       <abstract>This paper studies joint models for selecting correct answer sentences among the top k provided by answer sentence selection (AS2) modules, which are core components of retrieval-based Question Answering (QA) systems. Our work shows that a critical step to effectively exploiting an answer set regards modeling the interrelated information between pair of answers. For this purpose, we build a three-way multi-classifier, which decides if an answer supports, refutes, or is neutral with respect to another one. More specifically, our neural architecture integrates a state-of-the-art AS2 module with the multi-classifier, and a joint layer connecting all components. We tested our models on WikiQA, TREC-QA, and a real-world dataset. The results show that our models obtain the new state of the art in AS2.</abstract>
@@ -3555,7 +3555,7 @@
       <author><first>Yifan</first><last>Gao</last></author>
       <author><first>Henghui</first><last>Zhu</last></author>
       <author><first>Patrick</first><last>Ng</last></author>
-      <author><first>Cicero</first><last>Nogueira dos Santos</last></author>
+      <author id="cicero-dos-santos"><first>Cicero</first><last>Nogueira dos Santos</last></author>
       <author><first>Zhiguo</first><last>Wang</last></author>
       <author><first>Feng</first><last>Nan</last></author>
       <author><first>Dejiao</first><last>Zhang</last></author>
@@ -3578,7 +3578,7 @@
       <author><first>Shuo</first><last>Zhang</last></author>
       <author><first>Jiancheng</first><last>Lv</last></author>
       <author><first>Fuli</first><last>Feng</last></author>
-      <author><first>Tat-Seng</first><last>Chua</last></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last></author>
       <pages>3277–3287</pages>
       <abstract>Hybrid data combining both tabular and textual content (e.g., financial reports) are quite pervasive in the real world. However, Question Answering (QA) over such hybrid data is largely neglected in existing research. In this work, we extract samples from real financial reports to build a new large-scale QA dataset containing both Tabular And Textual data, named TAT-QA, where numerical reasoning is usually required to infer the answer, such as addition, subtraction, multiplication, division, counting, comparison/sorting, and the compositions. We further propose a novel QA model termed TAGOP, which is capable of reasoning over both tables and text. It adopts sequence tagging to extract relevant cells from the table along with relevant spans from the text to infer their semantics, and then applies symbolic reasoning over them with a set of aggregation operators to arrive at the final answer. TAGOP achieves 58.0% inF1, which is an 11.1% absolute increase over the previous best baseline model, according to our experiments on TAT-QA. But this result still lags far behind performance of expert human, i.e.90.8% in F1. It is demonstrated that our TAT-QA is very challenging and can serve as a benchmark for training and testing powerful QA models that address hybrid form data.</abstract>
       <url hash="2742dd4f">2021.acl-long.254</url>
@@ -3698,7 +3698,7 @@
       <author><first>Jeremy</first><last>Barnes</last></author>
       <author><first>Robin</first><last>Kurtz</last></author>
       <author><first>Stephan</first><last>Oepen</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <author><first>Erik</first><last>Velldal</last></author>
       <pages>3387–3402</pages>
       <abstract>Structured sentiment analysis attempts to extract full opinion tuples from a text, but over time this task has been subdivided into smaller and smaller sub-tasks, e.g., target extraction or targeted polarity classification. We argue that this division has become counterproductive and propose a new unified framework to remedy the situation. We cast the structured sentiment problem as dependency graph parsing, where the nodes are spans of sentiment holders, targets and expressions, and the arcs are the relations between them. We perform experiments on five datasets in four languages (English, Norwegian, Basque, and Catalan) and show that this approach leads to strong improvements over state-of-the-art baselines. Our analysis shows that refining the sentiment graphs with syntactic dependency information further improves results.</abstract>
@@ -3733,7 +3733,7 @@
       <author><first>Bo</first><last>Zheng</last></author>
       <author><first>Shaohan</first><last>Huang</last></author>
       <author><first>Xian-Ling</first><last>Mao</last></author>
-      <author><first>Heyan</first><last>Huang</last></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last></author>
       <author><first>Furu</first><last>Wei</last></author>
       <pages>3418–3430</pages>
       <abstract>The cross-lingual language models are typically pretrained with masked language modeling on multilingual text or parallel sentences. In this paper, we introduce denoising word alignment as a new cross-lingual pre-training task. Specifically, the model first self-label word alignments for parallel sentences. Then we randomly mask tokens in a bitext pair. Given a masked token, the model uses a pointer network to predict the aligned token in the other language. We alternately perform the above two steps in an expectation-maximization manner. Experimental results show that our method improves cross-lingual transferability on various datasets, especially on the token-level tasks, such as question answering, and structured prediction. Moreover, the model can serve as a pretrained word aligner, which achieves reasonably low error rate on the alignment benchmarks. The code and pretrained parameters are available at github.com/CZWin32768/XLM-Align.</abstract>
@@ -3870,7 +3870,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Runxin</first><last>Xu</last></author>
       <author><first>Tianyu</first><last>Liu</last></author>
       <author><first>Lei</first><last>Li</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <pages>3533–3546</pages>
       <abstract>Document-level event extraction aims to recognize event information from a whole piece of article. Existing methods are not effective due to two challenges of this task: a) the target event arguments are scattered across sentences; b) the correlation among events in a document is non-trivial to model. In this paper, we propose Heterogeneous Graph-based Interaction Model with a Tracker (GIT) to solve the aforementioned two challenges. For the first challenge, GIT constructs a heterogeneous graph interaction network to capture global interactions among different sentences and entity mentions. For the second, GIT introduces a Tracker module to track the extracted events and hence capture the interdependency among the events. Experiments on a large-scale dataset (Zheng et al, 2019) show GIT outperforms the previous methods by 2.8 F1. Further analysis reveals is effective in extracting multiple correlated events and event arguments that scatter across the document.</abstract>
       <url hash="991d2452">2021.acl-long.274</url>
@@ -3882,7 +3882,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <title>Nested Named Entity Recognition via Explicitly Excluding the Influence of the Best Path</title>
       <author><first>Yiran</first><last>Wang</last></author>
       <author><first>Hiroyuki</first><last>Shindo</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <author><first>Taro</first><last>Watanabe</last></author>
       <pages>3547–3557</pages>
       <abstract>This paper presents a novel method for nested named entity recognition. As a layered method, our method extends the prior second-best path recognition method by explicitly excluding the influence of the best path. Our method maintains a set of hidden states at each time step and selectively leverages them to build a different potential function for recognition at each level. In addition, we demonstrate that recognizing innermost entities first results in better performance than the conventional outermost entities first scheme. We provide extensive experimental results on ACE2004, ACE2005, and GENIA datasets to show the effectiveness and efficiency of our proposed method.</abstract>
@@ -3937,8 +3937,8 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
     <paper id="279">
       <title>Superbizarre Is Not Superb: Derivational Morphology Improves <fixed-case>BERT</fixed-case>’s Interpretation of Complex Words</title>
       <author><first>Valentin</first><last>Hofmann</last></author>
-      <author><first>Janet</first><last>Pierrehumbert</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="janet-pierrehumbert"><first>Janet</first><last>Pierrehumbert</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>3594–3608</pages>
       <abstract>How does the input segmentation of pretrained language models (PLMs) affect their interpretations of complex words? We present the first study investigating this question, taking BERT as the example PLM and focusing on its semantic representations of English derivatives. We show that PLMs can be interpreted as serial dual-route models, i.e., the meanings of complex words are either stored or else need to be computed from the subwords, which implies that maximally meaningful input tokens should allow for the best generalization on new words. This hypothesis is confirmed by a series of semantic probing tasks on which DelBERT (Derivation leveraging BERT), a model with derivational input segmentation, substantially outperforms BERT with WordPiece segmentation. Our results suggest that the generalization capabilities of PLMs could be further improved if a morphologically-informed vocabulary of input tokens were used.</abstract>
       <url hash="2d0f907d">2021.acl-long.279</url>
@@ -3949,9 +3949,9 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
     <paper id="280">
       <title><fixed-case>BERT</fixed-case> is to <fixed-case>NLP</fixed-case> what <fixed-case>A</fixed-case>lex<fixed-case>N</fixed-case>et is to <fixed-case>CV</fixed-case>: Can Pre-Trained Language Models Identify Analogies?</title>
       <author><first>Asahi</first><last>Ushio</last></author>
-      <author><first>Luis</first><last>Espinosa Anke</last></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa Anke</last></author>
       <author><first>Steven</first><last>Schockaert</last></author>
-      <author><first>Jose</first><last>Camacho-Collados</last></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></author>
       <pages>3609–3624</pages>
       <abstract>Analogies play a central role in human commonsense reasoning. The ability to recognize analogies such as “eye is to seeing what ear is to hearing”, sometimes referred to as analogical proportions, shape how we structure knowledge and understand language. Surprisingly, however, the task of identifying such analogies has not yet received much attention in the language model era. In this paper, we analyze the capabilities of transformer-based language models on this unsupervised task, using benchmarks obtained from educational settings, as well as more commonly used datasets. We find that off-the-shelf language models can identify analogies to a certain extent, but struggle with abstract and complex relations, and results are highly sensitive to model architecture and hyperparameters. Overall the best results were obtained with GPT-2 and RoBERTa, while configurations using BERT were not able to outperform word embedding models. Our results raise important questions for future work about how, and to what extent, pre-trained language models capture knowledge about abstract semantic relations.</abstract>
       <url hash="943887b8">2021.acl-long.280</url>
@@ -4066,8 +4066,8 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <title>Structural Guidance for Transformer Language Models</title>
       <author><first>Peng</first><last>Qian</last></author>
       <author><first>Tahira</first><last>Naseem</last></author>
-      <author><first>Roger</first><last>Levy</last></author>
-      <author><first>Ramón</first><last>Fernandez Astudillo</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
+      <author id="ramon-fernandez-astudillo"><first>Ramón</first><last>Fernandez Astudillo</last></author>
       <pages>3735–3745</pages>
       <abstract>Transformer-based language models pre-trained on large amounts of text data have proven remarkably successful in learning generic transferable linguistic representations. Here we study whether structural guidance leads to more human-like systematic linguistic generalization in Transformer language models without resorting to pre-training on very large amounts of data. We explore two general ideas. The “Generative Parsing” idea jointly models the incremental parse and word sequence as part of the same sequence modeling task. The “Structural Scaffold” idea guides the language model’s representation via additional structure loss that separately predicts the incremental constituency parse. We train the proposed models along with a vanilla Transformer language model baseline on a 14 million-token and a 46 million-token subset of the BLLIP dataset, and evaluate models’ syntactic generalization performances on SG Test Suites and sized BLiMP. Experiment results across two benchmarks suggest converging evidence that generative structural supervisions can induce more robust and humanlike linguistic generalization in Transformer language models without the need for data intensive pre-training.</abstract>
       <url hash="650ea895">2021.acl-long.289</url>
@@ -4092,7 +4092,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
     <paper id="291">
       <title><fixed-case>C</fixed-case>og<fixed-case>A</fixed-case>lign: Learning to Align Textual Neural Representations to Cognitive Language Processing Signals</title>
       <author><first>Yuqi</first><last>Ren</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <pages>3758–3769</pages>
       <abstract>Most previous studies integrate cognitive language processing signals (e.g., eye-tracking or EEG data) into neural models of natural language processing (NLP) just by directly concatenating word embeddings with cognitive features, ignoring the gap between the two modalities (i.e., textual vs. cognitive) and noise in cognitive features. In this paper, we propose a CogAlign approach to these issues, which learns to align textual neural representations to cognitive features. In CogAlign, we use a shared encoder equipped with a modality discriminator to alternatively encode textual and cognitive inputs to capture their differences and commonalities. Additionally, a text-aware attention mechanism is proposed to detect task-related information and to avoid using noise in cognitive features. Experimental results on three NLP tasks, namely named entity recognition, sentiment analysis and relation extraction, show that CogAlign achieves significant improvements with multiple cognitive features over state-of-the-art models on public datasets. Moreover, our model is able to transfer cognitive information to other datasets that do not have any cognitive processing signals.</abstract>
       <url hash="9e2f3e58">2021.acl-long.291</url>
@@ -4119,7 +4119,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Noah</first><last>Constant</last></author>
       <author><first>Mandy</first><last>Guo</last></author>
       <author><first>Girish</first><last>Kumar</last></author>
-      <author><first>David</first><last>Uthus</last></author>
+      <author id="david-c-uthus"><first>David</first><last>Uthus</last></author>
       <author><first>Zarana</first><last>Parekh</last></author>
       <pages>3786–3800</pages>
       <abstract>We present a novel approach to the problem of text style transfer. Unlike previous approaches requiring style-labeled training data, our method makes use of readily-available unlabeled text by relying on the implicit connection in style between adjacent sentences, and uses labeled data only at inference time. We adapt T5 (Raffel et al., 2020), a strong pretrained text-to-text model, to extract a style vector from text and use it to condition the decoder to perform style transfer. As our label-free training results in a style vector space encoding many facets of style, we recast transfers as “targeted restyling” vector operations that adjust specific attributes of the input while preserving others. We demonstrate that training on unlabeled Amazon reviews data results in a model that is competitive on sentiment transfer, even compared to models trained fully on labeled data. Furthermore, applying our novel method to a diverse corpus of unlabeled web text results in a single model capable of transferring along multiple dimensions of style (dialect, emotiveness, formality, politeness, sentiment) despite no additional training and using only a handful of exemplars at inference time.</abstract>
@@ -4211,8 +4211,8 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Suraj</first><last>Nair</last></author>
       <author><first>Petra</first><last>Galuscakova</last></author>
       <author><first>Rui</first><last>Zhang</last></author>
-      <author><first>Douglas</first><last>Oard</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="douglas-w-oard"><first>Douglas</first><last>Oard</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>3881–3895</pages>
       <abstract>This paper proposes an approach to cross-language sentence selection in a low-resource setting. It uses data augmentation and negative sampling techniques on noisy parallel sentence data to directly learn a cross-lingual embedding-based query relevance model. Results show that this approach performs as well as or better than multiple state-of-the-art machine translation + monolingual retrieval systems trained on the same parallel data. Moreover, when a rationale training secondary objective is applied to encourage the model to match word alignment hints from a phrase-based statistical machine translation model, consistent improvements are seen across three language pairs (English-Somali, English-Swahili and English-Tagalog) over a variety of state-of-the-art baselines.</abstract>
       <url hash="c6cfdb82">2021.acl-long.300</url>
@@ -4246,8 +4246,8 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
     <paper id="303">
       <title><fixed-case>ABCD</fixed-case>: A Graph Framework to Convert Complex Sentences to a Covering Set of Simple Sentences</title>
       <author><first>Yanjun</first><last>Gao</last></author>
-      <author><first>Ting-Hao</first><last>Huang</last></author>
-      <author><first>Rebecca J.</first><last>Passonneau</last></author>
+      <author id="ting-hao-huang"><first>Ting-Hao</first><last>Huang</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca J.</first><last>Passonneau</last></author>
       <pages>3919–3931</pages>
       <abstract>Atomic clauses are fundamental text units for understanding complex sentences. Identifying the atomic sentences within complex sentences is important for applications such as summarization, argument mining, discourse analysis, discourse parsing, and question answering. Previous work mainly relies on rule-based methods dependent on parsing. We propose a new task to decompose each complex sentence into simple sentences derived from the tensed clauses in the source, and a novel problem formulation as a graph edit task. Our neural model learns to Accept, Break, Copy or Drop elements of a graph that combines word adjacency and grammatical dependencies. The full processing pipeline includes modules for graph construction, graph editing, and sentence generation from the output graph. We introduce DeSSE, a new dataset designed to train and evaluate complex sentence decomposition, and MinWiki, a subset of MinWikiSplit. ABCD achieves comparable performance as two parsing baselines on MinWiki. On DeSSE, which has a more even balance of complex sentence types, our model achieves higher accuracy on the number of atomic sentences than an encoder-decoder baseline. Results include a detailed error analysis.</abstract>
       <url hash="97f25d0a">2021.acl-long.303</url>
@@ -4272,7 +4272,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <title>Adversarial Learning for Discourse Rhetorical Structure Parsing</title>
       <author><first>Longyin</first><last>Zhang</last></author>
       <author><first>Fang</first><last>Kong</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>3946–3957</pages>
       <abstract>Text-level discourse rhetorical structure (DRS) parsing is known to be challenging due to the notorious lack of training data. Although recent top-down DRS parsers can better leverage global document context and have achieved certain success, the performance is still far from perfect. To our knowledge, all previous DRS parsers make local decisions for either bottom-up node composition or top-down split point ranking at each time step, and largely ignore DRS parsing from the global view point. Obviously, it is not sufficient to build an entire DRS tree only through these local decisions. In this work, we present our insight on evaluating the pros and cons of the entire DRS tree for global optimization. Specifically, based on recent well-performing top-down frameworks, we introduce a novel method to transform both gold standard and predicted constituency trees into tree diagrams with two color channels. After that, we learn an adversarial bot between gold and fake tree diagrams to estimate the generated DRS trees from a global perspective. We perform experiments on both RST-DT and CDTB corpora and use the original Parseval for performance evaluation. The experimental results show that our parser can substantially improve the performance when compared with previous state-of-the-art parsers.</abstract>
       <url hash="70400942">2021.acl-long.305</url>
@@ -4298,7 +4298,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Tong</first><last>Zhang</last></author>
       <author><first>Long</first><last>Zhang</last></author>
       <author><first>Wei</first><last>Ye</last></author>
-      <author id="bo-li"><first>Bo</first><last>Li</last></author>
+      <author><first>Bo</first><last>Li</last></author>
       <author><first>Jinan</first><last>Sun</last></author>
       <author><first>Xiaoyu</first><last>Zhu</last></author>
       <author><first>Wen</first><last>Zhao</last></author>
@@ -4346,7 +4346,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Dayiheng</first><last>Liu</last></author>
       <author><first>Haibo</first><last>Zhang</last></author>
       <author><first>Weihua</first><last>Luo</last></author>
-      <author><first>Degen</first><last>Huang</last></author>
+      <author id="degen-huang"><first>Degen</first><last>Huang</last></author>
       <author><first>Jinsong</first><last>Su</last></author>
       <pages>4008–4018</pages>
       <abstract>A good translation should not only translate the original content semantically, but also incarnate personal traits of the original text. For a real-world neural machine translation (NMT) system, these user traits (e.g., topic preference, stylistic characteristics and expression habits) can be preserved in user behavior (e.g., historical inputs). However, current NMT systems marginally consider the user behavior due to: 1) the difficulty of modeling user portraits in zero-shot scenarios, and 2) the lack of user-behavior annotated parallel dataset. To fill this gap, we introduce a novel framework called user-driven NMT. Specifically, a cache-based module and a user-driven contrastive learning method are proposed to offer NMT the ability to capture potential user traits from their historical inputs under a zero-shot learning fashion. Furthermore, we contribute the first Chinese-English parallel corpus annotated with user behavior called UDT-Corpus. Experimental results confirm that the proposed user-driven NMT can generate user-specific translations.</abstract>
@@ -4359,8 +4359,8 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <title>End-to-End Lexically Constrained Machine Translation for Morphologically Rich Languages</title>
       <author><first>Josef</first><last>Jon</last></author>
       <author><first>João Paulo</first><last>Aires</last></author>
-      <author><first>Dusan</first><last>Varis</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="dusan-varis"><first>Dusan</first><last>Varis</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>4019–4033</pages>
       <abstract>Lexically constrained machine translation allows the user to manipulate the output sentence by enforcing the presence or absence of certain words and phrases. Although current approaches can enforce terms to appear in the translation, they often struggle to make the constraint word form agree with the rest of the generated output. Our manual analysis shows that 46% of the errors in the output of a baseline constrained model for English to Czech translation are related to agreement. We investigate mechanisms to allow neural machine translation to infer the correct word inflection given lemmatized constraints. In particular, we focus on methods based on training the model with constraints provided as part of the input sequence. Our experiments on English-Czech language pair show that this approach improves translation of constrained terms in both automatic and manual evaluation by reducing errors in agreement. Our approach thus eliminates inflection errors, without introducing new errors or decreasing overall quality of the translation.</abstract>
       <url hash="8c03acd0">2021.acl-long.311</url>
@@ -4371,7 +4371,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
     <paper id="312">
       <title>Handling Extreme Class Imbalance in Technical Logbook Datasets</title>
       <author><first>Farhad</first><last>Akhbardeh</last></author>
-      <author><first>Cecilia Ovesdotter</first><last>Alm</last></author>
+      <author id="cecilia-ovesdotter-alm"><first>Cecilia Ovesdotter</first><last>Alm</last></author>
       <author><first>Marcos</first><last>Zampieri</last></author>
       <author><first>Travis</first><last>Desell</last></author>
       <pages>4034–4045</pages>
@@ -4472,7 +4472,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
     <paper id="319">
       <title>Breaking Down Walls of Text: How Can <fixed-case>NLP</fixed-case> Benefit Consumer Privacy?</title>
       <author><first>Abhilasha</first><last>Ravichander</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <author><first>Thomas</first><last>Norton</last></author>
       <author><first>Shomir</first><last>Wilson</last></author>
       <author><first>Norman</first><last>Sadeh</last></author>
@@ -4505,7 +4505,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
     <paper id="321">
       <title>Reliability Testing for Natural Language Processing Systems</title>
       <author><first>Samson</first><last>Tan</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <author><first>Kathy</first><last>Baxter</last></author>
       <author><first>Araz</first><last>Taeihagh</last></author>
       <author><first>Gregory A.</first><last>Bennett</last></author>
@@ -4543,7 +4543,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Ildikó</first><last>Pilán</last></author>
       <author><first>David</first><last>Sanchez</last></author>
       <author><first>Montserrat</first><last>Batet</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <pages>4188–4203</pages>
       <abstract>This position paper investigates the problem of automated text anonymisation, which is a prerequisite for secure sharing of documents containing sensitive information about individuals. We summarise the key concepts behind text anonymisation and provide a review of current approaches. Anonymisation methods have so far been developed in two fields with little mutual interaction, namely natural language processing and privacy-preserving data publishing. Based on a case study, we outline the benefits and limitations of these approaches and discuss a number of open challenges, such as (1) how to account for multiple types of semantic inferences, (2) how to strike a balance between disclosure risk and data utility and (3) how to evaluate the quality of the resulting anonymisation. We lay out a case for moving beyond sequence labelling models and incorporate explicit measures of disclosure risk into the text anonymisation process.</abstract>
       <url hash="c41d204e">2021.acl-long.323</url>
@@ -4637,7 +4637,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <title>Societal Biases in Language Generation: Progress and Challenges</title>
       <author><first>Emily</first><last>Sheng</last></author>
       <author><first>Kai-Wei</first><last>Chang</last></author>
-      <author><first>Prem</first><last>Natarajan</last></author>
+      <author id="prem-natarajan"><first>Prem</first><last>Natarajan</last></author>
       <author><first>Nanyun</first><last>Peng</last></author>
       <pages>4275–4293</pages>
       <abstract>Technology for language generation has advanced rapidly, spurred by advancements in pre-training large models on massive amounts of data and the need for intelligent agents to communicate in a natural manner. While techniques can effectively generate fluent text, they can also produce undesirable societal biases that can have a disproportionately negative impact on marginalized populations. Language generation presents unique challenges for biases in terms of direct user interaction and the structure of decoding techniques. To better understand these challenges, we present a survey on societal biases in language generation, focusing on how data and techniques contribute to biases and progress towards reducing biases. Motivated by a lack of studies on biases from decoding techniques, we also conduct experiments to quantify the effects of these techniques. By further discussing general trends and open challenges, we call to attention promising directions for research and the importance of fairness and inclusivity considerations for language generation applications.</abstract>
@@ -4762,7 +4762,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
     <paper id="339">
       <title>Neural Stylistic Response Generation with Disentangled Latent Variables</title>
       <author><first>Qingfu</first><last>Zhu</last></author>
-      <author><first>Wei-Nan</first><last>Zhang</last></author>
+      <author id="weinan-zhang"><first>Wei-Nan</first><last>Zhang</last></author>
       <author><first>Ting</first><last>Liu</last></author>
       <author><first>William Yang</first><last>Wang</last></author>
       <pages>4391–4401</pages>
@@ -4774,7 +4774,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
     </paper>
     <paper id="340">
       <title>Intent Classification and Slot Filling for Privacy Policies</title>
-      <author><first>Wasi</first><last>Ahmad</last></author>
+      <author id="wasi-ahmad"><first>Wasi</first><last>Ahmad</last></author>
       <author><first>Jianfeng</first><last>Chi</last></author>
       <author><first>Tu</first><last>Le</last></author>
       <author><first>Thomas</first><last>Norton</last></author>
@@ -4865,7 +4865,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Pedro</first><last>Rodriguez</last></author>
       <author><first>Joe</first><last>Barrow</last></author>
       <author><first>Alexander Miserlis</first><last>Hoyle</last></author>
-      <author><first>John P.</first><last>Lalor</last></author>
+      <author id="john-p-lalor"><first>John P.</first><last>Lalor</last></author>
       <author><first>Robin</first><last>Jia</last></author>
       <author><first>Jordan</first><last>Boyd-Graber</last></author>
       <pages>4486–4503</pages>
@@ -4881,7 +4881,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Ashkan</first><last>Kazemi</last></author>
       <author><first>Kiran</first><last>Garimella</last></author>
       <author><first>Devin</first><last>Gaffney</last></author>
-      <author><first>Scott A.</first><last>Hale</last></author>
+      <author id="scott-a-hale"><first>Scott A.</first><last>Hale</last></author>
       <pages>4504–4517</pages>
       <abstract>Manual fact-checking does not scale well to serve the needs of the internet. This issue is further compounded in non-English contexts. In this paper, we discuss claim matching as a possible solution to scale fact-checking. We define claim matching as the task of identifying pairs of textual messages containing claims that can be served with one fact-check. We construct a novel dataset of WhatsApp tipline and public group messages alongside fact-checked claims that are first annotated for containing “claim-like statements” and then matched with potentially similar items and annotated for claim matching. Our dataset contains content in high-resource (English, Hindi) and lower-resource (Bengali, Malayalam, Tamil) languages. We train our own embedding model using knowledge distillation and a high-quality “teacher” model in order to address the imbalance in embedding quality between the low- and high-resource languages in our dataset. We provide evaluations on the performance of our solution and compare with baselines and existing state-of-the-art multilingual embedding models, namely LASER and LaBSE. We demonstrate that our performance exceeds LASER and LaBSE in all settings. We release our annotated datasets, codebooks, and trained embedding model to allow for further research.</abstract>
       <url hash="9f08f16b">2021.acl-long.347</url>
@@ -4921,7 +4921,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
     </paper>
     <paper id="350">
       <title>Syntax-augmented Multilingual <fixed-case>BERT</fixed-case> for Cross-lingual Transfer</title>
-      <author><first>Wasi</first><last>Ahmad</last></author>
+      <author id="wasi-ahmad"><first>Wasi</first><last>Ahmad</last></author>
       <author><first>Haoran</first><last>Li</last></author>
       <author><first>Kai-Wei</first><last>Chang</last></author>
       <author><first>Yashar</first><last>Mehdad</last></author>
@@ -4935,7 +4935,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
     <paper id="351">
       <title>How to Adapt Your Pretrained Multilingual Model to 1600 Languages</title>
       <author><first>Abteen</first><last>Ebrahimi</last></author>
-      <author><first>Katharina</first><last>Kann</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
       <pages>4555–4567</pages>
       <abstract>Pretrained multilingual models (PMMs) enable zero-shot learning via cross-lingual transfer, performing best for languages seen during pretraining. While methods exist to improve performance for unseen languages, they have almost exclusively been evaluated using amounts of raw text only available for a small fraction of the world’s languages. In this paper, we evaluate the performance of existing methods to adapt PMMs to new languages using a resource available for close to 1600 languages: the New Testament. This is challenging for two reasons: (1) the small corpus size, and (2) the narrow domain. While performance drops for all approaches, we surprisingly still see gains of up to 17.69% accuracy for part-of-speech tagging and 6.29 F1 for NER on average over all languages as compared to XLM-R. Another unexpected finding is that continued pretraining, the simplest approach, performs best. Finally, we perform a case study to disentangle the effects of domain and size and to shed light on the influence of the finetuning source language.</abstract>
       <url hash="0ec129c8">2021.acl-long.351</url>
@@ -5031,7 +5031,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <title>Recursive Tree-Structured Self-Attention for Answer Sentence Selection</title>
       <author><first>Khalil</first><last>Mrini</last></author>
       <author><first>Emilia</first><last>Farcas</last></author>
-      <author><first>Ndapa</first><last>Nakashole</last></author>
+      <author id="ndapandula-nakashole"><first>Ndapa</first><last>Nakashole</last></author>
       <pages>4651–4661</pages>
       <abstract>Syntactic structure is an important component of natural language text. Recent top-performing models in Answer Sentence Selection (AS2) use self-attention and transfer learning, but not syntactic structure. Tree structures have shown strong performance in tasks with sentence pair input like semantic relatedness. We investigate whether tree structures can boost performance in AS2. We introduce the Tree Aggregation Transformer: a novel recursive, tree-structured self-attention model for AS2. The recursive nature of our model is able to represent all levels of syntactic parse trees with only one additional self-attention layer. Without transfer learning, we establish a new state of the art on the popular TrecQA and WikiQA benchmark datasets. Additionally, we evaluate our method on four Community Question Answering datasets, and find that tree-structured representations have limitations with noisy user-generated text. We conduct probing experiments to evaluate how our models leverage tree structures across datasets. Our findings show that the ability of tree-structured models to successfully absorb syntactic information is strongly correlated with a higher performance in AS2.</abstract>
       <url hash="5df73d8b">2021.acl-long.358</url>
@@ -5044,7 +5044,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Zikun</first><last>Hu</last></author>
       <author><first>Yixin</first><last>Cao</last></author>
       <author><first>Lifu</first><last>Huang</last></author>
-      <author><first>Tat-Seng</first><last>Chua</last></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last></author>
       <pages>4662–4671</pages>
       <abstract>Knowledge Graph (KG) and attention mechanism have been demonstrated effective in introducing and selecting useful information for weakly supervised methods. However, only qualitative analysis and ablation study are provided as evidence. In this paper, we contribute a dataset and propose a paradigm to quantitatively evaluate the effect of attention and KG on bag-level relation extraction (RE). We find that (1) higher attention accuracy may lead to worse performance as it may harm the model’s ability to extract entity mention features; (2) the performance of attention is largely influenced by various noise distribution patterns, which is closely related to real-world datasets; (3) KG-enhanced attention indeed improves RE performance, while not through enhanced attention but by incorporating entity prior; and (4) attention mechanism may exacerbate the issue of insufficient training data. Based on these findings, we show that a straightforward variant of RE model can achieve significant improvements (6% AUC on average) on two real-world datasets as compared with three state-of-the-art baselines. Our codes and datasets are available at <url>https://github.com/zig-kwin-hu/how-KG-ATT-help</url>.</abstract>
       <url hash="27db9987">2021.acl-long.359</url>
@@ -5090,7 +5090,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Yan</first><last>Liang</last></author>
       <author><first>Christan</first><last>Grant</last></author>
       <author><first>Xiang</first><last>Ren</last></author>
-      <author><first>Xin Luna</first><last>Dong</last></author>
+      <author id="xin-luna-dong"><first>Xin Luna</first><last>Dong</last></author>
       <pages>4694–4705</pages>
       <abstract>Automatic extraction of product attribute values is an important enabling technology in e-Commerce platforms. This task is usually modeled using sequence labeling architectures, with several extensions to handle multi-attribute extraction. One line of previous work constructs attribute-specific models, through separate decoders or entirely separate models. However, this approach constrains knowledge sharing across different attributes. Other contributions use a single multi-attribute model, with different techniques to embed attribute information. But sharing the entire network parameters across all attributes can limit the model’s capacity to capture attribute-specific characteristics. In this paper we present AdaTag, which uses adaptive decoding to handle extraction. We parameterize the decoder with pretrained attribute embeddings, through a hypernetwork and a Mixture-of-Experts (MoE) module. This allows for separate, but semantically correlated, decoders to be generated on the fly for different attributes. This approach facilitates knowledge sharing, while maintaining the specificity of each attribute. Our experiments on a real-world e-Commerce dataset show marked improvements over previous methods.</abstract>
       <url hash="a183f1ea">2021.acl-long.362</url>
@@ -5103,7 +5103,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Zhengbao</first><last>Jiang</last></author>
       <author><first>Jialong</first><last>Han</last></author>
       <author><first>Bunyamin</first><last>Sisman</last></author>
-      <author><first>Xin Luna</first><last>Dong</last></author>
+      <author id="xin-luna-dong"><first>Xin Luna</first><last>Dong</last></author>
       <pages>4706–4716</pages>
       <abstract>Integrating extracted knowledge from the Web to knowledge graphs (KGs) can facilitate tasks like question answering. We study relation integration that aims to align free-text relations in subject-relation-object extractions to relations in a target KG. To address the challenge that free-text relations are ambiguous, previous methods exploit neighbor entities and relations for additional context. However, the predictions are made independently, which can be mutually inconsistent. We propose a two-stage Collective Relation Integration (CoRI) model, where the first stage independently makes candidate predictions, and the second stage employs a collective model that accesses all candidate predictions to make globally coherent predictions. We further improve the collective model with augmented data from the portion of the target KG that is otherwise unused. Experiment results on two datasets show that CoRI can significantly outperform the baselines, improving AUC from .677 to .748 and from .716 to .780, respectively.</abstract>
       <url hash="25a7a172">2021.acl-long.363</url>
@@ -5116,7 +5116,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Robert L</first><last>Logan IV</last></author>
       <author><first>Andrew</first><last>McCallum</last></author>
       <author><first>Sameer</first><last>Singh</last></author>
-      <author><first>Dan</first><last>Bikel</last></author>
+      <author id="daniel-m-bikel"><first>Dan</first><last>Bikel</last></author>
       <pages>4717–4731</pages>
       <abstract>Streaming cross document entity coreference (CDC) systems disambiguate mentions of named entities in a scalable manner via incremental clustering. Unlike other approaches for named entity disambiguation (e.g., entity linking), streaming CDC allows for the disambiguation of entities that are unknown at inference time. Thus, it is well-suited for processing streams of data where new entities are frequently introduced. Despite these benefits, this task is currently difficult to study, as existing approaches are either evaluated on datasets that are no longer available, or omit other crucial details needed to ensure fair comparison. In this work, we address this issue by compiling a large benchmark adapted from existing free datasets, and performing a comprehensive evaluation of a number of novel and existing baseline models. We investigate: how to best encode mentions, which clustering algorithms are most effective for grouping mentions, how models transfer to different domains, and how bounding the number of mentions tracked during inference impacts performance. Our results show that the relative performance of neural and feature-based mention encoders varies across different domains, and in most cases the best performance is achieved using a combination of both approaches. We also find that performance is minimally impacted by limiting the number of tracked mentions.</abstract>
       <url hash="8347957c">2021.acl-long.364</url>
@@ -5132,7 +5132,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Wei</first><last>Li</last></author>
       <author><first>Jiafeng</first><last>Guo</last></author>
       <author><first>Yuanzhuo</first><last>Wang</last></author>
-      <author><first>Xueqi</first><last>Cheng</last></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last></author>
       <pages>4732–4743</pages>
       <abstract>Temporal Knowledge Graphs (TKGs) have been developed and used in many different areas. Reasoning on TKGs that predicts potential facts (events) in the future brings great challenges to existing models. When facing a prediction task, human beings usually search useful historical information (i.e., clues) in their memories and then reason for future meticulously. Inspired by this mechanism, we propose CluSTeR to predict future facts in a two-stage manner, Clue Searching and Temporal Reasoning, accordingly. Specifically, at the clue searching stage, CluSTeR learns a beam search policy via reinforcement learning (RL) to induce multiple clues from historical facts. At the temporal reasoning stage, it adopts a graph convolution network based sequence method to deduce answers from clues. Experiments on four datasets demonstrate the substantial advantages of CluSTeR compared with the state-of-the-art methods. Moreover, the clues found by CluSTeR further provide interpretability for the results.</abstract>
       <url hash="95c1f489">2021.acl-long.365</url>
@@ -5142,7 +5142,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
     </paper>
     <paper id="366">
       <title>Employing Argumentation Knowledge Graphs for Neural Argument Generation</title>
-      <author><first>Khalid</first><last>Al Khatib</last></author>
+      <author id="khalid-al-khatib"><first>Khalid</first><last>Al Khatib</last></author>
       <author><first>Lukas</first><last>Trautner</last></author>
       <author><first>Henning</first><last>Wachsmuth</last></author>
       <author><first>Yufang</first><last>Hou</last></author>
@@ -5156,7 +5156,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
     </paper>
     <paper id="367">
       <title>Learning Span-Level Interactions for Aspect Sentiment Triplet Extraction</title>
-      <author id="lu-xu"><first>Lu</first><last>Xu</last></author>
+      <author><first>Lu</first><last>Xu</last></author>
       <author><first>Yew Ken</first><last>Chia</last></author>
       <author><first>Lidong</first><last>Bing</last></author>
       <pages>4755–4766</pages>
@@ -5222,7 +5222,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Fei</first><last>Li</last></author>
       <author><first>ZhiChao</first><last>Lin</last></author>
       <author><first>Meishan</first><last>Zhang</last></author>
-      <author><first>Donghong</first><last>Ji</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
       <pages>4814–4828</pages>
       <abstract>Research on overlapped and discontinuous named entity recognition (NER) has received increasing attention. The majority of previous work focuses on either overlapped or discontinuous entities. In this paper, we propose a novel span-based model that can recognize both overlapped and discontinuous entities jointly. The model includes two major steps. First, entity fragments are recognized by traversing over all possible text spans, thus, overlapped entities can be recognized. Second, we perform relation classification to judge whether a given pair of entity fragments to be overlapping or succession. In this way, we can recognize not only discontinuous entities, and meanwhile doubly check the overlapped entities. As a whole, our model can be regarded as a relation extraction paradigm essentially. Experimental results on multiple benchmark datasets (i.e., CLEF, GENIA and ACE05) show that our model is highly competitive for overlapped and discontinuous NER.</abstract>
       <url hash="47196f73">2021.acl-long.372</url>
@@ -5304,7 +5304,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
     <paper id="378">
       <title>Parameter-Efficient Transfer Learning with Diff Pruning</title>
       <author><first>Demi</first><last>Guo</last></author>
-      <author><first>Alexander</first><last>Rush</last></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last></author>
       <author><first>Yoon</first><last>Kim</last></author>
       <pages>4884–4896</pages>
       <abstract>The large size of pretrained networks makes them difficult to deploy for multiple tasks in storage-constrained settings. Diff pruning enables parameter-efficient transfer learning that scales well with new tasks. The approach learns a task-specific “diff” vector that extends the original pretrained parameters. This diff vector is adaptively pruned during training with a differentiable approximation to the L0-norm penalty to encourage sparsity. As the number of tasks increases, diff pruning remains parameter-efficient, as it requires storing only a small diff vector for each task. Since it does not require access to all tasks during training, it is attractive in on-device deployment settings where tasks arrive in stream or even from different providers. Diff pruning can match the performance of finetuned baselines on the GLUE benchmark while only modifying 0.5% of the pretrained model’s parameters per task and scales favorably in comparison to popular pruning approaches.</abstract>
@@ -5384,8 +5384,8 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <title>Generating <fixed-case>SOAP</fixed-case> Notes from Doctor-Patient Conversations Using Modular Summarization Techniques</title>
       <author><first>Kundan</first><last>Krishna</last></author>
       <author><first>Sopan</first><last>Khosla</last></author>
-      <author><first>Jeffrey</first><last>Bigham</last></author>
-      <author><first>Zachary C.</first><last>Lipton</last></author>
+      <author id="jeffrey-p-bigham"><first>Jeffrey</first><last>Bigham</last></author>
+      <author id="zachary-c-lipton"><first>Zachary C.</first><last>Lipton</last></author>
       <pages>4958–4972</pages>
       <abstract>Following each patient visit, physicians draft long semi-structured clinical summaries called SOAP notes. While invaluable to clinicians and researchers, creating digital SOAP notes is burdensome, contributing to physician burnout. In this paper, we introduce the first complete pipelines to leverage deep summarization models to generate these notes based on transcripts of conversations between physicians and patients. After exploring a spectrum of methods across the extractive-abstractive spectrum, we propose Cluster2Sent, an algorithm that (i) extracts important utterances relevant to each summary section; (ii) clusters together related utterances; and then (iii) generates one summary sentence per cluster. Cluster2Sent outperforms its purely abstractive counterpart by 8 ROUGE-1 points, and produces significantly more factual and coherent sentences as assessed by expert human evaluators. For reproducibility, we demonstrate similar benefits on the publicly available AMI dataset. Our results speak to the benefits of structuring summaries into sections and annotating supporting evidence when constructing summarization corpora.</abstract>
       <url hash="515b6f42">2021.acl-long.384</url>
@@ -5465,7 +5465,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Kaitao</first><last>Zhang</last></author>
       <author><first>Jie</first><last>Bao</last></author>
       <author><first>Zhiyuan</first><last>Liu</last></author>
-      <author><first>Paul</first><last>Bennett</last></author>
+      <author id="paul-bennett"><first>Paul</first><last>Bennett</last></author>
       <pages>5030–5043</pages>
       <abstract>The effectiveness of Neural Information Retrieval (Neu-IR) often depends on a large scale of in-domain relevance training signals, which are not always available in real-world ranking scenarios. To democratize the benefits of Neu-IR, this paper presents MetaAdaptRank, a domain adaptive learning method that generalizes Neu-IR models from label-rich source domains to few-shot target domains. Drawing on source-domain massive relevance supervision, MetaAdaptRank contrastively synthesizes a large number of weak supervision signals for target domains and meta-learns to reweight these synthetic “weak” data based on their benefits to the target-domain ranking accuracy of Neu-IR models. Experiments on three TREC benchmarks in the web, news, and biomedical domains show that MetaAdaptRank significantly improves the few-shot ranking accuracy of Neu-IR models. Further analyses indicate that MetaAdaptRank thrives from both its contrastive weak data synthesis and meta-reweighted data selection. The code and data of this paper can be obtained from <url>https://github.com/thunlp/MetaAdaptRank</url>.</abstract>
       <url hash="1cb203f5">2021.acl-long.390</url>
@@ -5522,7 +5522,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Fandong</first><last>Meng</last></author>
       <author><first>Biao</first><last>Zhang</last></author>
       <author><first>Jie</first><last>Zhou</last></author>
-      <author><first>Degen</first><last>Huang</last></author>
+      <author id="degen-huang"><first>Degen</first><last>Huang</last></author>
       <author><first>Qingqiang</first><last>Wu</last></author>
       <author><first>Jinsong</first><last>Su</last></author>
       <pages>5076–5085</pages>
@@ -5614,7 +5614,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
     </paper>
     <paper id="401">
       <title>Data Augmentation with Adversarial Training for Cross-Lingual <fixed-case>NLI</fixed-case></title>
-      <author><first>Xin</first><last>Dong</last></author>
+      <author id="xin-luna-dong"><first>Xin</first><last>Dong</last></author>
       <author><first>Yaxin</first><last>Zhu</last></author>
       <author><first>Zuohui</first><last>Fu</last></author>
       <author><first>Dongkuan</first><last>Xu</last></author>
@@ -5754,7 +5754,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>David</first><last>Harwath</last></author>
       <author><first>Tyler</first><last>Miller</last></author>
       <author><first>Christopher</first><last>Song</last></author>
-      <author><first>James</first><last>Glass</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
       <pages>5284–5300</pages>
       <abstract>In this paper we present the first model for directly synthesizing fluent, natural-sounding spoken audio captions for images that does not require natural language text as an intermediate representation or source of supervision. Instead, we connect the image captioning module and the speech synthesis module with a set of discrete, sub-word speech units that are discovered with a self-supervised visual grounding task. We conduct experiments on the Flickr8k spoken caption dataset in addition to a novel corpus of spoken audio captions collected for the popular MSCOCO dataset, demonstrating that our generated captions also capture diverse visual semantics of the images they describe. We investigate several different intermediate speech representations, and empirically find that the representation must satisfy several important properties to serve as drop-in replacements for text.</abstract>
       <url hash="35e8f58a">2021.acl-long.411</url>
@@ -5806,9 +5806,9 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <title>Learning to Explain: Generating Stable Explanations Fast</title>
       <author><first>Xuelin</first><last>Situ</last></author>
       <author><first>Ingrid</first><last>Zukerman</last></author>
-      <author><first>Cecile</first><last>Paris</last></author>
+      <author id="cecile-paris"><first>Cecile</first><last>Paris</last></author>
       <author><first>Sameen</first><last>Maruf</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>5340–5355</pages>
       <abstract>The importance of explaining the outcome of a machine learning model, especially a black-box model, is widely acknowledged. Recent approaches explain an outcome by identifying the contributions of input features to this outcome. In environments involving large black-box models or complex inputs, this leads to computationally demanding algorithms. Further, these algorithms often suffer from low stability, with explanations varying significantly across similar examples. In this paper, we propose a Learning to Explain (L2E) approach that learns the behaviour of an underlying explanation algorithm simultaneously from all training examples. Once the explanation algorithm is distilled into an explainer network, it can be used to explain new instances. Our experiments on three classification tasks, which compare our approach to six explanation algorithms, show that L2E is between 5 and 7.5×10ˆ4 times faster than these algorithms, while generating more stable explanations, and having comparable faithfulness to the black-box model.</abstract>
       <url hash="e841b1d3">2021.acl-long.415</url>
@@ -5846,7 +5846,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <title>Enabling Lightweight Fine-tuning for Pre-trained Language Model Compression based on Matrix Product Operators</title>
       <author><first>Peiyu</first><last>Liu</last></author>
       <author><first>Ze-Feng</first><last>Gao</last></author>
-      <author><first>Wayne Xin</first><last>Zhao</last></author>
+      <author id="wayne-xin-zhao"><first>Wayne Xin</first><last>Zhao</last></author>
       <author><first>Zhi-Yuan</first><last>Xie</last></author>
       <author><first>Zhong-Yi</first><last>Lu</last></author>
       <author><first>Ji-Rong</first><last>Wen</last></author>
@@ -5969,7 +5969,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Xiaoqing</first><last>Zheng</last></author>
       <author><first>Cho-Jui</first><last>Hsieh</last></author>
       <author><first>Kai-Wei</first><last>Chang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>5482–5492</pages>
       <abstract>Although deep neural networks have achieved prominent performance on many NLP tasks, they are vulnerable to adversarial examples. We propose Dirichlet Neighborhood Ensemble (DNE), a randomized method for training a robust model to defense synonym substitution-based attacks. During training, DNE forms virtual sentences by sampling embedding vectors for each word in an input sentence from a convex hull spanned by the word and its synonyms, and it augments them with the training data. In such a way, the model is robust to adversarial attacks while maintaining the performance on the original clean data. DNE is agnostic to the network architectures and scales to large models (e.g., BERT) for NLP applications. Through extensive experimentation, we demonstrate that our method consistently outperforms recently proposed defense methods by a significant margin across different network architectures and multiple data sets.</abstract>
       <url hash="95f64a43">2021.acl-long.426</url>
@@ -5980,7 +5980,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
     <paper id="427">
       <title>Shortformer: Better Language Modeling using Shorter Inputs</title>
       <author><first>Ofir</first><last>Press</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <author><first>Mike</first><last>Lewis</last></author>
       <pages>5493–5505</pages>
       <abstract>Increasing the input length has been a driver of progress in language modeling with transformers. We identify conditions where shorter inputs are not harmful, and achieve perplexity and efficiency improvements through two new methods that decrease input length. First, we show that initially training a model on short subsequences before moving on to longer ones both reduces overall training time and, surprisingly, substantially improves perplexity. Second, we show how to improve the efficiency of recurrence methods in transformers, which let models condition on previously processed tokens when generating sequences that exceed the maximal length the transformer can handle at once. Existing methods require computationally expensive relative position embeddings; we introduce a simple alternative of adding absolute position embeddings to queries and keys instead of to word embeddings, which efficiently produces superior results. We show that these recurrent models also benefit from short input lengths. Combining these techniques speeds up training by a factor of 1.65, reduces memory usage, and substantially improves perplexity on WikiText-103, without adding any parameters.</abstract>
@@ -6108,7 +6108,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <title>A Human-machine Collaborative Framework for Evaluating Malevolence in Dialogues</title>
       <author><first>Yangjun</first><last>Zhang</last></author>
       <author><first>Pengjie</first><last>Ren</last></author>
-      <author><first>Maarten</first><last>de Rijke</last></author>
+      <author id="maarten-de-rijke"><first>Maarten</first><last>de Rijke</last></author>
       <pages>5612–5623</pages>
       <abstract>Conversational dialogue systems (CDSs) are hard to evaluate due to the complexity of natural language. Automatic evaluation of dialogues often shows insufficient correlation with human judgements. Human evaluation is reliable but labor-intensive. We introduce a human-machine collaborative framework, HMCEval, that can guarantee reliability of the evaluation outcomes with reduced human effort. HMCEval casts dialogue evaluation as a sample assignment problem, where we need to decide to assign a sample to a human or a machine for evaluation. HMCEval includes a model confidence estimation module to estimate the confidence of the predicted sample assignment, and a human effort estimation module to estimate the human effort should the sample be assigned to human evaluation, as well as a sample assignment execution module that finds the optimum assignment solution based on the estimated confidence and effort. We assess the performance of HMCEval on the task of evaluating malevolence in dialogues. The experimental results show that HMCEval achieves around 99% evaluation accuracy with half of the human effort spared, showing that HMCEval provides reliable evaluation outcomes while reducing human effort by a large amount.</abstract>
       <url hash="46767c5d">2021.acl-long.436</url>
@@ -6136,7 +6136,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Pengjie</first><last>Ren</last></author>
       <author><first>Zhumin</first><last>Chen</last></author>
       <author><first>Zhaochun</first><last>Ren</last></author>
-      <author><first>Maarten</first><last>de Rijke</last></author>
+      <author id="maarten-de-rijke"><first>Maarten</first><last>de Rijke</last></author>
       <author><first>Ming</first><last>Zhou</last></author>
       <pages>5638–5650</pages>
       <abstract>Conversational Question Simplification (CQS) aims to simplify self-contained questions into conversational ones by incorporating some conversational characteristics, e.g., anaphora and ellipsis. Existing maximum likelihood estimation based methods often get trapped in easily learned tokens as all tokens are treated equally during training. In this work, we introduce a Reinforcement Iterative Sequence Editing (RISE) framework that optimizes the minimum Levenshtein distance through explicit editing actions. RISE is able to pay attention to tokens that are related to conversational characteristics. To train RISE, we devise an Iterative Reinforce Training (IRT) algorithm with a Dynamic Programming based Sampling (DPS) process to improve exploration. Experimental results on two benchmark datasets show that RISE significantly outperforms state-of-the-art methods and generalizes well on unseen data.</abstract>
@@ -6175,9 +6175,9 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
     </paper>
     <paper id="441">
       <title><fixed-case>D</fixed-case>yna<fixed-case>E</fixed-case>val: Unifying Turn and Dialogue Level Evaluation</title>
-      <author id="chen-zhang"><first>Chen</first><last>Zhang</last></author>
+      <author><first>Chen</first><last>Zhang</last></author>
       <author><first>Yiming</first><last>Chen</last></author>
-      <author><first>Luis Fernando</first><last>D’Haro</last></author>
+      <author id="luis-fernando-dharo"><first>Luis Fernando</first><last>D’Haro</last></author>
       <author><first>Yan</first><last>Zhang</last></author>
       <author><first>Thomas</first><last>Friedrichs</last></author>
       <author><first>Grandee</first><last>Lee</last></author>
@@ -6221,7 +6221,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Yunlong</first><last>Liang</last></author>
       <author><first>Fandong</first><last>Meng</last></author>
       <author><first>Yufeng</first><last>Chen</last></author>
-      <author><first>Jinan</first><last>Xu</last></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last></author>
       <author><first>Jie</first><last>Zhou</last></author>
       <pages>5711–5724</pages>
       <abstract>Neural chat translation aims to translate bilingual conversational text, which has a broad application in international exchanges and cooperation. Despite the impressive performance of sentence-level and context-aware Neural Machine Translation (NMT), there still remain challenges to translate bilingual conversational text due to its inherent characteristics such as role preference, dialogue coherence, and translation consistency. In this paper, we aim to promote the translation quality of conversational text by modeling the above properties. Specifically, we design three latent variational modules to learn the distributions of bilingual conversational characteristics. Through sampling from these learned distributions, the latent variables, tailored for role preference, dialogue coherence, and translation consistency, are incorporated into the NMT model for better translation. We evaluate our approach on the benchmark dataset BConTrasT (English&lt;-&gt;German) and a self-collected bilingual dialogue corpus, named BMELD (English&lt;-&gt;Chinese). Extensive experiments show that our approach notably boosts the performance over strong baselines by a large margin and significantly surpasses some state-of-the-art context-aware NMT models in terms of BLEU and TER. Additionally, we make the BMELD dataset publicly available for the research community.</abstract>
@@ -6264,7 +6264,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Ivan</first><last>Vulić</last></author>
       <author><first>Roi</first><last>Reichart</last></author>
       <author><first>Anna</first><last>Korhonen</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>5751–5767</pages>
       <abstract>Few-shot crosslingual transfer has been shown to outperform its zero-shot counterpart with pretrained encoders like multilingual BERT. Despite its growing popularity, little to no attention has been paid to standardizing and analyzing the design of few-shot experiments. In this work, we highlight a fundamental risk posed by this shortcoming, illustrating that the model exhibits a high degree of sensitivity to the selection of few shots. We conduct a large-scale experimental study on 40 sets of sampled few shots for six diverse NLP tasks across up to 40 languages. We provide an analysis of success and failure cases of few-shot transfer, which highlights the role of lexical features. Additionally, we show that a straightforward full model finetuning approach is quite effective for few-shot transfer, outperforming several state-of-the-art few-shot approaches. As a step towards standardizing few-shot crosslingual experimental designs, we make our sampled few shots publicly available.</abstract>
       <url hash="ae4b801b">2021.acl-long.447</url>
@@ -6275,7 +6275,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
     <paper id="448">
       <title>Coreference Reasoning in Machine Reading Comprehension</title>
       <author><first>Mingzhu</first><last>Wu</last></author>
-      <author><first>Nafise Sadat</first><last>Moosavi</last></author>
+      <author id="nafise-sadat-moosavi"><first>Nafise Sadat</first><last>Moosavi</last></author>
       <author><first>Dan</first><last>Roth</last></author>
       <author><first>Iryna</first><last>Gurevych</last></author>
       <pages>5768–5781</pages>
@@ -6303,7 +6303,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <title>A Conditional Splitting Framework for Efficient Constituency Parsing</title>
       <author><first>Thanh-Tung</first><last>Nguyen</last></author>
       <author><first>Xuan-Phi</first><last>Nguyen</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <author><first>Xiaoli</first><last>Li</last></author>
       <pages>5795–5807</pages>
       <abstract>We introduce a generic seq2seq parsing framework that casts constituency parsing problems (syntactic and discourse parsing) into a series of conditional splitting decisions. Our parsing model estimates the conditional probability distribution of possible splitting points in a given text span and supports efficient top-down decoding, which is linear in number of nodes. The conditional splitting formulation together with efficient beam search inference facilitate structural consistency without relying on expensive structured inference. Crucially, for discourse analysis we show that in our formulation, discourse segmentation can be framed as a special case of parsing which allows us to perform discourse parsing without requiring segmentation as a pre-requisite. Experiments show that our model achieves good results on the standard syntactic parsing tasks under settings with/without pre-trained representations and rivals state-of-the-art (SoTA) methods that are more computationally expensive than ours. In discourse parsing, our method outperforms SoTA by a good margin.</abstract>
@@ -6349,7 +6349,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Linlin</first><last>Liu</last></author>
       <author><first>Bosheng</first><last>Ding</last></author>
       <author><first>Lidong</first><last>Bing</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <author><first>Luo</first><last>Si</last></author>
       <author><first>Chunyan</first><last>Miao</last></author>
       <pages>5834–5846</pages>
@@ -6361,7 +6361,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
     </paper>
     <paper id="454">
       <title>Lexicon Enhanced <fixed-case>C</fixed-case>hinese Sequence Labeling Using <fixed-case>BERT</fixed-case> Adapter</title>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last></author>
+      <author><first>Wei</first><last>Liu</last></author>
       <author><first>Xiyan</first><last>Fu</last></author>
       <author><first>Yue</first><last>Zhang</last></author>
       <author><first>Wenming</first><last>Xiao</last></author>
@@ -6379,7 +6379,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Qinzhuo</first><last>Wu</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
       <author><first>Zhongyu</first><last>Wei</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>5859–5869</pages>
       <abstract>In recent years, math word problem solving has received considerable attention and achieved promising results, but previous methods rarely take numerical values into consideration. Most methods treat the numerical values in the problems as number symbols, and ignore the prominent role of the numerical values in solving the problem. In this paper, we propose a novel approach called NumS2T, which enhances math word problem solving performance by explicitly incorporating numerical values into a sequence-to-tree network. In addition, a numerical properties prediction mechanism is used to capture the category and comparison information of numerals and measure their importance in global expressions. Experimental results on the Math23K and APE datasets demonstrate that our model achieves better performance than existing state-of-the-art models.</abstract>
       <url hash="8461ef94">2021.acl-long.455</url>
@@ -6583,7 +6583,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Bo</first><last>Peng</last></author>
       <author><first>Yi</first><last>Liao</last></author>
       <author><first>Qun</first><last>Liu</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <pages>6012–6025</pages>
       <abstract>In order to deeply understand the capability of pretrained language models in text generation and conduct a diagnostic evaluation, we propose TGEA, an error-annotated dataset with multiple benchmark tasks for text generation from pretrained language models (PLMs). We use carefully selected prompt words to guide GPT-2 to generate candidate sentences, from which we select 47K for error annotation. Crowdsourced workers manually check each of these sentences and detect 12k erroneous sentences. We create an error taxonomy to cover 24 types of errors occurring in these erroneous sentences according to the nature of errors with respect to linguistics and knowledge (e.g., common sense). For each erroneous span in PLM-generated sentences, we also detect another span that is closely associated with it. Each error is hence manually labeled with comprehensive annotations, including the span of the error, the associated span, minimal correction to the error, the type of the error, and rationale behind the error. Apart from the fully annotated dataset, we also present a detailed description of the data collection procedure, statistics and analysis of the dataset. This is the first dataset with comprehensive annotations for PLM-generated texts, which facilitates the diagnostic evaluation of PLM-based text generation. Furthermore, we use TGEA as a benchmark dataset and propose a series of automatic diagnosis tasks, including error detection, error type classification, associated span detection, error rationale generation, to further promote future study on the automatic error detection and correction on texts generated by pretrained language models.</abstract>
       <url hash="31c4bbd1">2021.acl-long.469</url>
@@ -6762,7 +6762,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Yinghao</first><last>Li</last></author>
       <author><first>Pranav</first><last>Shetty</last></author>
       <author><first>Lucas</first><last>Liu</last></author>
-      <author><first>Chao</first><last>Zhang</last></author>
+      <author id="chao-zhang-tu"><first>Chao</first><last>Zhang</last></author>
       <author><first>Le</first><last>Song</last></author>
       <pages>6178–6190</pages>
       <abstract>We study the problem of learning a named entity recognition (NER) tagger using noisy labels from multiple weak supervision sources. Though cheap to obtain, the labels from weak supervision sources are often incomplete, inaccurate, and contradictory, making it difficult to learn an accurate NER model. To address this challenge, we propose a conditional hidden Markov model (CHMM), which can effectively infer true labels from multi-source noisy labels in an unsupervised way. CHMM enhances the classic hidden Markov model with the contextual representation power of pre-trained language models. Specifically, CHMM learns token-wise transition and emission probabilities from the BERT embeddings of the input tokens to infer the latent true labels from noisy observations. We further refine CHMM with an alternate-training approach (CHMM-ALT). It fine-tunes a BERT-NER model with the labels inferred by CHMM, and this BERT-NER’s output is regarded as an additional weak source to train the CHMM in return. Experiments on four NER benchmarks from various domains show that our method outperforms state-of-the-art weakly supervised NER models by wide margins.</abstract>
@@ -6792,7 +6792,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Tao</first><last>Gui</last></author>
       <author><first>Linyang</first><last>Li</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <author><first>Yaqian</first><last>Zhou</last></author>
       <pages>6201–6213</pages>
       <abstract>Distant supervision for relation extraction provides uniform bag labels for each sentence inside the bag, while accurate sentence labels are important for downstream applications that need the exact relation type. Directly using bag labels for sentence-level training will introduce much noise, thus severely degrading performance. In this work, we propose the use of negative training (NT), in which a model is trained using complementary labels regarding that “the instance does not belong to these complementary labels”. Since the probability of selecting a true label as a complementary label is low, NT provides less noisy information. Furthermore, the model trained with NT is able to separate the noisy data from the training data. Based on NT, we propose a sentence-level framework, SENT, for distant relation extraction. SENT not only filters the noisy data to construct a cleaner dataset, but also performs a re-labeling process to transform the noisy data into useful training data, thus further benefiting the model’s performance. Experimental results show the significant improvement of the proposed method over previous methods on sentence-level evaluation and de-noise effect.</abstract>
@@ -6805,7 +6805,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <title>An End-to-End Progressive Multi-Task Learning Framework for Medical Named Entity Recognition and Normalization</title>
       <author><first>Baohang</first><last>Zhou</last></author>
       <author><first>Xiangrui</first><last>Cai</last></author>
-      <author><first>Ying</first><last>Zhang</last></author>
+      <author id="ying-zhang"><first>Ying</first><last>Zhang</last></author>
       <author><first>Xiaojie</first><last>Yuan</last></author>
       <pages>6214–6224</pages>
       <abstract>Medical named entity recognition (NER) and normalization (NEN) are fundamental for constructing knowledge graphs and building QA systems. Existing implementations for medical NER and NEN are suffered from the error propagation between the two tasks. The mispredicted mentions from NER will directly influence the results of NEN. Therefore, the NER module is the bottleneck of the whole system. Besides, the learnable features for both tasks are beneficial to improving the model performance. To avoid the disadvantages of existing models and exploit the generalized representation across the two tasks, we design an end-to-end progressive multi-task learning model for jointly modeling medical NER and NEN in an effective way. There are three level tasks with progressive difficulty in the framework. The progressive tasks can reduce the error propagation with the incremental task settings which implies the lower level tasks gain the supervised signals other than errors from the higher level tasks to improve their performances. Besides, the context features are exploited to enrich the semantic information of entity mentions extracted by NER. The performance of NEN profits from the enhanced entity mention features. The standard entities from knowledge bases are introduced into the NER module for extracting corresponding entity mentions correctly. The empirical results on two publicly available medical literature datasets demonstrate the superiority of our method over nine typical methods.</abstract>
@@ -6852,10 +6852,10 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
     </paper>
     <paper id="488">
       <title>Joint Biomedical Entity and Relation Extraction with Knowledge-Enhanced Collective Inference</title>
-      <author><first>Tuan</first><last>Lai</last></author>
+      <author id="tuan-lai"><first>Tuan</first><last>Lai</last></author>
       <author><first>Heng</first><last>Ji</last></author>
-      <author><first>ChengXiang</first><last>Zhai</last></author>
-      <author><first>Quan Hung</first><last>Tran</last></author>
+      <author id="chengxiang-zhai"><first>ChengXiang</first><last>Zhai</last></author>
+      <author id="quan-hung-tran"><first>Quan Hung</first><last>Tran</last></author>
       <pages>6248–6260</pages>
       <abstract>Compared to the general news domain, information extraction (IE) from biomedical text requires much broader domain knowledge. However, many previous IE methods do not utilize any external knowledge during inference. Due to the exponential growth of biomedical publications, models that do not go beyond their fixed set of parameters will likely fall behind. Inspired by how humans look up relevant information to comprehend a scientific text, we present a novel framework that utilizes external knowledge for joint entity and relation extraction named KECI (Knowledge-Enhanced Collective Inference). Given an input text, KECI first constructs an initial span graph representing its initial understanding of the text. It then uses an entity linker to form a knowledge graph containing relevant background knowledge for the the entity mentions in the text. To make the final predictions, KECI fuses the initial span graph and the knowledge graph into a more refined graph using an attention mechanism. KECI takes a collective approach to link mention spans to entities by integrating global relational information into local representations using graph convolutional networks. Our experimental results show that the framework is highly effective, achieving new state-of-the-art results in two different benchmark datasets: BioRelEx (binding interaction detection) and ADE (adverse drug event extraction). For example, KECI achieves absolute improvements of 4.59% and 4.91% in F1 scores over the state-of-the-art on the BioRelEx entity and relation extraction tasks</abstract>
       <url hash="410cd874">2021.acl-long.488</url>
@@ -6870,7 +6870,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Heng</first><last>Ji</last></author>
       <author><first>Ahmed</first><last>Elsayed</last></author>
       <author><first>Skatje</first><last>Myers</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>6261–6270</pages>
       <abstract>Biomedical Information Extraction from scientific literature presents two unique and non-trivial challenges. First, compared with general natural language texts, sentences from scientific papers usually possess wider contexts between knowledge elements. Moreover, comprehending the fine-grained scientific entities and events urgently requires domain-specific background knowledge. In this paper, we propose a novel biomedical Information Extraction (IE) model to tackle these two challenges and extract scientific entities and events from English research papers. We perform Abstract Meaning Representation (AMR) to compress the wide context to uncover a clear semantic structure for each complex sentence. Besides, we construct the sentence-level knowledge graph from an external knowledge base and use it to enrich the AMR graph to improve the model’s understanding of complex scientific concepts. We use an edge-conditioned graph attention network to encode the knowledge-enriched AMR graph for biomedical IE tasks. Experiments on the GENIA 2011 dataset show that the AMR and external knowledge have contributed 1.8% and 3.0% absolute F-score gains respectively. In order to evaluate the impact of our approach on real-world problems that involve topic-specific fine-grained knowledge elements, we have also created a new ontology and annotated corpus for entity and event extraction for the COVID-19 scientific literature, which can serve as a new benchmark for the biomedical IE community.</abstract>
       <url hash="0658e0ce">2021.acl-long.489</url>
@@ -6947,7 +6947,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Fangxiang</first><last>Feng</last></author>
       <author><first>Zhanyu</first><last>Ma</last></author>
       <author><first>Xiaojie</first><last>Wang</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>6319–6329</pages>
       <abstract>Aspect-based sentiment analysis is a fine-grained sentiment classification task. Recently, graph neural networks over dependency trees have been explored to explicitly model connections between aspects and opinion words. However, the improvement is limited due to the inaccuracy of the dependency parsing results and the informal expressions and complexity of online reviews. To overcome these challenges, in this paper, we propose a dual graph convolutional networks (DualGCN) model that considers the complementarity of syntax structures and semantic correlations simultaneously. Particularly, to alleviate dependency parsing errors, we design a SynGCN module with rich syntactic knowledge. To capture semantic correlations, we design a SemGCN module with self-attention mechanism. Furthermore, we propose orthogonal and differential regularizers to capture semantic correlations between words precisely by constraining attention scores in the SemGCN module. The orthogonal regularizer encourages the SemGCN to learn semantically correlated words with less overlap for each word. The differential regularizer encourages the SemGCN to learn semantic features that the SynGCN fails to capture. Experimental results on three public datasets show that our DualGCN model outperforms state-of-the-art methods and verify the effectiveness of our model.</abstract>
       <url hash="7431232e">2021.acl-long.494</url>
@@ -7005,8 +7005,8 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <title>Keep It Simple: Unsupervised Simplification of Multi-Paragraph Text</title>
       <author><first>Philippe</first><last>Laban</last></author>
       <author><first>Tobias</first><last>Schnabel</last></author>
-      <author><first>Paul</first><last>Bennett</last></author>
-      <author><first>Marti A.</first><last>Hearst</last></author>
+      <author id="paul-bennett"><first>Paul</first><last>Bennett</last></author>
+      <author id="marti-a-hearst"><first>Marti A.</first><last>Hearst</last></author>
       <pages>6365–6378</pages>
       <abstract>This work presents Keep it Simple (KiS), a new approach to unsupervised text simplification which learns to balance a reward across three properties: fluency, salience and simplicity. We train the model with a novel algorithm to optimize the reward (k-SCST), in which the model proposes several candidate simplifications, computes each candidate’s reward, and encourages candidates that outperform the mean reward. Finally, we propose a realistic text comprehension task as an evaluation method for text simplification. When tested on the English news domain, the KiS model outperforms strong supervised baselines by more than 4 SARI points, and can help people complete a comprehension task an average of 18% faster while retaining accuracy, when compared to the original text.</abstract>
       <url hash="ef17e0a9">2021.acl-long.498</url>
@@ -7073,7 +7073,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <title><fixed-case>BERTG</fixed-case>en: Multi-task Generation through <fixed-case>BERT</fixed-case></title>
       <author><first>Faidon</first><last>Mitzalis</last></author>
       <author><first>Ozan</first><last>Caglayan</last></author>
-      <author><first>Pranava</first><last>Madhyastha</last></author>
+      <author id="pranava-swaroop-madhyastha"><first>Pranava</first><last>Madhyastha</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>6440–6455</pages>
       <abstract>We present BERTGen, a novel, generative, decoder-only model which extends BERT by fusing multimodal and multilingual pre-trained models VL-BERT and M-BERT, respectively. BERTGen is auto-regressively trained for language generation tasks, namely image captioning, machine translation and multimodal machine translation, under a multi-task setting. With a comprehensive set of evaluations, we show that BERTGen outperforms many strong baselines across the tasks explored. We also show BERTGen’s ability for zero-shot language generation, where it exhibits competitive performance to supervised counterparts. Finally, we conduct ablation studies which demonstrate that BERTGen substantially benefits from multi-tasking and effectively transfers relevant inductive biases from the pre-trained models.</abstract>
@@ -7100,7 +7100,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Patrick</first><last>Fernandes</last></author>
       <author><first>Kayo</first><last>Yin</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
-      <author><first>André F. T.</first><last>Martins</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
       <pages>6467–6478</pages>
       <abstract>Recent work in neural machine translation has demonstrated both the necessity and feasibility of using inter-sentential context, context from sentences other than those currently being translated. However, while many current methods present model architectures that theoretically can use this extra context, it is often not clear how much they do actually utilize it at translation time. In this paper, we introduce a new metric, conditional cross-mutual information, to quantify usage of context by these models. Using this metric, we measure how much document-level machine translation systems use particular varieties of context. We find that target context is referenced more than source context, and that including more context has a diminishing affect on results. We then introduce a new, simple training method, context-aware word dropout, to increase the usage of context by context-aware models. Experiments show that our method not only increases context usage, but also improves the translation quality according to metrics such as BLEU and COMET, as well as performance on anaphoric pronoun resolution and lexical cohesion contrastive datasets.</abstract>
       <url hash="d2043bd4">2021.acl-long.505</url>
@@ -7112,9 +7112,9 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <title>Beyond Offline Mapping: Learning Cross-lingual Word Embeddings through Context Anchoring</title>
       <author><first>Aitor</first><last>Ormazabal</last></author>
       <author><first>Mikel</first><last>Artetxe</last></author>
-      <author><first>Aitor</first><last>Soroa</last></author>
-      <author><first>Gorka</first><last>Labaka</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="aitor-soroa"><first>Aitor</first><last>Soroa</last></author>
+      <author id="gorka-labaka"><first>Gorka</first><last>Labaka</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <pages>6479–6489</pages>
       <abstract>Recent research on cross-lingual word embeddings has been dominated by unsupervised mapping approaches that align monolingual embeddings. Such methods critically rely on those embeddings having a similar structure, but it was recently shown that the separate training in different languages causes departures from this assumption. In this paper, we propose an alternative approach that does not have this limitation, while requiring a weak seed dictionary (e.g., a list of identical words) as the only form of supervision. Rather than aligning two fixed embedding spaces, our method works by fixing the target language embeddings, and learning a new set of embeddings for the source language that are aligned with them. To that end, we use an extension of skip-gram that leverages translated context words as anchor points, and incorporates self-learning and iterative restarts to reduce the dependency on the initial dictionary. Our approach outperforms conventional mapping methods on bilingual lexicon induction, and obtains competitive results in the downstream XNLI task.</abstract>
       <url hash="a04ea4ea">2021.acl-long.506</url>
@@ -7127,7 +7127,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Holger</first><last>Schwenk</last></author>
       <author><first>Guillaume</first><last>Wenzek</last></author>
       <author><first>Sergey</first><last>Edunov</last></author>
-      <author><first>Edouard</first><last>Grave</last></author>
+      <author id="edouard-grave"><first>Edouard</first><last>Grave</last></author>
       <author><first>Armand</first><last>Joulin</last></author>
       <author><first>Angela</first><last>Fan</last></author>
       <pages>6490–6500</pages>
@@ -7255,7 +7255,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Firoj</first><last>Alam</last></author>
       <author><first>Fabrizio</first><last>Silvestri</last></author>
       <author><first>Hamed</first><last>Firooz</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Giovanni</first><last>Da San Martino</last></author>
       <pages>6603–6617</pages>
       <abstract>Propaganda can be defined as a form of communication that aims to influence the opinions or the actions of people towards a specific goal; this is achieved by means of well-defined rhetorical and psychological devices. Propaganda, in the form we know it today, can be dated back to the beginning of the 17th century. However, it is with the advent of the Internet and the social media that propaganda has started to spread on a much larger scale than before, thus becoming major societal and political issue. Nowadays, a large fraction of propaganda in social media is multimodal, mixing textual with visual content. With this in mind, here we propose a new multi-label multimodal task: detecting the type of propaganda techniques used in memes. We further create and release a new corpus of 950 memes, carefully annotated with 22 propaganda techniques, which can appear in the text, in the image, or in both. Our analysis of the corpus shows that understanding both modalities together is essential for detecting these techniques. This is further confirmed in our experiments with several state-of-the-art multimodal models.</abstract>
@@ -7268,8 +7268,8 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <title>On the Efficacy of Adversarial Data Collection for Question Answering: Results from a Large-Scale Randomized Study</title>
       <author><first>Divyansh</first><last>Kaushik</last></author>
       <author><first>Douwe</first><last>Kiela</last></author>
-      <author><first>Zachary C.</first><last>Lipton</last></author>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="zachary-c-lipton"><first>Zachary C.</first><last>Lipton</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <pages>6618–6633</pages>
       <abstract>In adversarial data collection (ADC), a human workforce interacts with a model in real time, attempting to produce examples that elicit incorrect predictions. Researchers hope that models trained on these more challenging datasets will rely less on superficial patterns, and thus be less brittle. However, despite ADC’s intuitive appeal, it remains unclear when training on adversarial datasets produces more robust models. In this paper, we conduct a large-scale controlled study focused on question answering, assigning workers at random to compose questions either (i) adversarially (with a model in the loop); or (ii) in the standard fashion (without a model). Across a variety of models and datasets, we find that models trained on adversarial data usually perform better on other adversarial datasets but worse on a diverse collection of out-of-domain evaluation sets. Finally, we provide a qualitative analysis of adversarial (vs standard) data, identifying key differences and offering guidance for future research.</abstract>
       <url hash="26907685">2021.acl-long.517</url>
@@ -7310,7 +7310,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <title>Question Answering Over Temporal Knowledge Graphs</title>
       <author><first>Apoorv</first><last>Saxena</last></author>
       <author><first>Soumen</first><last>Chakrabarti</last></author>
-      <author><first>Partha</first><last>Talukdar</last></author>
+      <author id="partha-talukdar"><first>Partha</first><last>Talukdar</last></author>
       <pages>6663–6676</pages>
       <abstract>Temporal Knowledge Graphs (Temporal KGs) extend regular Knowledge Graphs by providing temporal scopes (start and end times) on each edge in the KG. While Question Answering over KG (KGQA) has received some attention from the research community, QA over Temporal KGs (Temporal KGQA) is a relatively unexplored area. Lack of broad coverage datasets has been another factor limiting progress in this area. We address this challenge by presenting CRONQUESTIONS, the largest known Temporal KGQA dataset, clearly stratified into buckets of structural complexity. CRONQUESTIONS expands the only known previous dataset by a factor of 340x. We find that various state-of-the-art KGQA methods fall far short of the desired performance on this new dataset. In response, we also propose CRONKGQA, a transformer-based solution that exploits recent advances in Temporal KG embeddings, and achieves performance superior to all baselines, with an increase of 120% in accuracy over the next best performing method. Through extensive experiments, we give detailed insights into the workings of CRONKGQA, as well as situations where significant further improvements appear possible. In addition to the dataset, we have released our code as well.</abstract>
       <url hash="672feef4">2021.acl-long.520</url>
@@ -7337,7 +7337,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Ximing</first><last>Lu</last></author>
       <author><first>Swabha</first><last>Swayamdipta</last></author>
       <author><first>Chandra</first><last>Bhagavatula</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <author><first>Yejin</first><last>Choi</last></author>
       <pages>6691–6706</pages>
       <abstract>Despite recent advances in natural language generation, it remains challenging to control attributes of generated text. We propose DExperts: Decoding-time Experts, a decoding-time method for controlled text generation that combines a pretrained language model with “expert” LMs and/or “anti-expert” LMs in a product of experts. Intuitively, under the ensemble, tokens only get high probability if they are considered likely by the experts, and unlikely by the anti-experts. We apply DExperts to language detoxification and sentiment-controlled generation, where we outperform existing controllable generation methods on both automatic and human evaluations. Moreover, because DExperts operates only on the output of the pretrained LM, it is effective with (anti-)experts of smaller size, including when operating on GPT-3. Our work highlights the promise of tuning small LMs on text with (un)desirable attributes for efficient decoding-time steering.</abstract>
@@ -7349,9 +7349,9 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
     <paper id="523">
       <title>Polyjuice: Generating Counterfactuals for Explaining, Evaluating, and Improving Models</title>
       <author><first>Tongshuang</first><last>Wu</last></author>
-      <author><first>Marco Tulio</first><last>Ribeiro</last></author>
+      <author id="marco-tulio-ribeiro"><first>Marco Tulio</first><last>Ribeiro</last></author>
       <author><first>Jeffrey</first><last>Heer</last></author>
-      <author><first>Daniel</first><last>Weld</last></author>
+      <author id="daniel-s-weld"><first>Daniel</first><last>Weld</last></author>
       <pages>6707–6723</pages>
       <abstract>While counterfactual examples are useful for analysis and training of NLP models, current generation methods either rely on manual labor to create very few counterfactuals, or only instantiate limited types of perturbations such as paraphrases or word substitutions. We present Polyjuice, a general-purpose counterfactual generator that allows for control over perturbation types and locations, trained by finetuning GPT-2 on multiple datasets of paired sentences. We show that Polyjuice produces diverse sets of realistic counterfactuals, which in turn are useful in various distinct applications: improving training and evaluation on three different tasks (with around 70% less annotation effort than manual generation), augmenting state-of-the-art explanation techniques, and supporting systematic counterfactual error analysis by revealing behaviors easily missed by human experts.</abstract>
       <url hash="5846f631">2021.acl-long.523</url>
@@ -7377,7 +7377,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <title>Learning Latent Structures for Cross Action Phrase Relations in Wet Lab Protocols</title>
       <author><first>Chaitanya</first><last>Kulkarni</last></author>
       <author><first>Jany</first><last>Chan</last></author>
-      <author><first>Eric</first><last>Fosler-Lussier</last></author>
+      <author id="eric-fosler-lussier"><first>Eric</first><last>Fosler-Lussier</last></author>
       <author><first>Raghu</first><last>Machiraju</last></author>
       <pages>6737–6750</pages>
       <abstract>Wet laboratory protocols (WLPs) are critical for conveying reproducible procedures in biological research. They are composed of instructions written in natural language describing the step-wise processing of materials by specific actions. This process flow description for reagents and materials synthesis in WLPs can be captured by material state transfer graphs (MSTGs), which encode global temporal and causal relationships between actions. Here, we propose methods to automatically generate a MSTG for a given protocol by extracting all action relationships across multiple sentences. We also note that previous corpora and methods focused primarily on local intra-sentence relationships between actions and entities and did not address two critical issues: (i) resolution of implicit arguments and (ii) establishing long-range dependencies across sentences. We propose a new model that incrementally learns latent structures and is better suited to resolving inter-sentence relations and implicit arguments. This model draws upon a new corpus WLP-MSTG which was created by extending annotations in the WLP corpora for inter-sentence relations and implicit arguments. Our model achieves an F1 score of 54.53% for temporal and causal relations in protocols from our corpus, which is a significant improvement over previous models - DyGIE++:28.17%; spERT:27.81%. We make our annotated WLP-MSTG corpus available to the research community.</abstract>
@@ -7393,7 +7393,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Mihir</first><last>Goyal</last></author>
       <author><first>Prakhar</first><last>Goel</last></author>
       <author><first>Puneet</first><last>Mathur</last></author>
-      <author><first>Rajiv Ratn</first><last>Shah</last></author>
+      <author id="rajiv-shah"><first>Rajiv Ratn</first><last>Shah</last></author>
       <pages>6751–6762</pages>
       <abstract>Risk prediction is an essential task in financial markets. Merger and Acquisition (M&amp;A) calls provide key insights into the claims made by company executives about the restructuring of the financial firms. Extracting vocal and textual cues from M&amp;A calls can help model the risk associated with such financial activities. To aid the analysis of M&amp;A calls, we curate a dataset of conference call transcripts and their corresponding audio recordings for the time period ranging from 2016 to 2020. We introduce M3ANet, a baseline architecture that takes advantage of the multimodal multi-speaker input to forecast the financial risk associated with the M&amp;A calls. Empirical results prove that the task is challenging, with the pro-posed architecture performing marginally better than strong BERT-based baselines. We release the M3A dataset and benchmark models to motivate future research on this challenging problem domain.</abstract>
       <url hash="4d447793">2021.acl-long.526</url>
@@ -7406,7 +7406,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Rashad</first><last>Albo Jamara</last></author>
       <author><first>Nico</first><last>Herbig</last></author>
       <author><first>Antonio</first><last>Krüger</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>6763–6773</pages>
       <abstract>To translate large volumes of text in a globally connected world, more and more translators are integrating machine translation (MT) and post-editing (PE) into their translation workflows to generate publishable quality translations. While this process has been shown to save time and reduce errors, the task of translation is changing from mostly text production from scratch to fixing errors within useful but partly incorrect MT output. This is affecting the interface design of translation tools, where better support for text editing tasks is required. Here, we present the first study that investigates the usefulness of mid-air hand gestures in combination with the keyboard (GK) for text editing in PE of MT. Guided by a gesture elicitation study with 14 freelance translators, we develop a prototype supporting mid-air hand gestures for cursor placement, text selection, deletion, and reordering. These gestures combined with the keyboard facilitate all editing types required for PE. An evaluation of the prototype shows that the average editing duration of GK is only slightly slower than the standard mouse and keyboard (MK), even though participants are very familiar with the latter, and relative novices to the former. Furthermore, the qualitative analysis shows positive attitudes towards hand gestures for PE, especially when manipulating single words.</abstract>
       <url hash="eba54d17">2021.acl-long.527</url>
@@ -7422,7 +7422,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Liang</first><last>Qiu</last></author>
       <author><first>Siyuan</first><last>Huang</last></author>
       <author><first>Xiaodan</first><last>Liang</last></author>
-      <author><first>Song-Chun</first><last>Zhu</last></author>
+      <author id="song-chun-zhu"><first>Song-Chun</first><last>Zhu</last></author>
       <pages>6774–6786</pages>
       <abstract>Geometry problem solving has attracted much attention in the NLP community recently. The task is challenging as it requires abstract problem understanding and symbolic reasoning with axiomatic knowledge. However, current datasets are either small in scale or not publicly available. Thus, we construct a new large-scale benchmark, Geometry3K, consisting of 3,002 geometry problems with dense annotation in formal language. We further propose a novel geometry solving approach with formal language and symbolic reasoning, called Interpretable Geometry Problem Solver (Inter-GPS). Inter-GPS first parses the problem text and diagram into formal language automatically via rule-based text parsing and neural object detecting, respectively. Unlike implicit learning in existing methods, Inter-GPS incorporates theorem knowledge as conditional rules and performs symbolic reasoning step by step. Also, a theorem predictor is designed to infer the theorem application sequence fed to the symbolic solver for the more efficient and reasonable searching path. Extensive experiments on the Geometry3K and GEOS datasets demonstrate that Inter-GPS achieves significant improvements over existing methods. The project with code and data is available at <url>https://lupantech.github.io/inter-gps</url>.</abstract>
       <url hash="f62be951">2021.acl-long.528</url>
@@ -7432,11 +7432,11 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
     </paper>
     <paper id="529">
       <title>Joint Verification and Reranking for Open Fact Checking Over Tables</title>
-      <author><first>Michael Sejr</first><last>Schlichtkrull</last></author>
+      <author id="michael-schlichtkrull"><first>Michael Sejr</first><last>Schlichtkrull</last></author>
       <author><first>Vladimir</first><last>Karpukhin</last></author>
       <author><first>Barlas</first><last>Oguz</last></author>
       <author><first>Mike</first><last>Lewis</last></author>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <author><first>Sebastian</first><last>Riedel</last></author>
       <pages>6787–6799</pages>
       <abstract>Structured information is an important knowledge source for automatic verification of factual claims. Nevertheless, the majority of existing research into this task has focused on textual data, and the few recent inquiries into structured data have been for the closed-domain setting where appropriate evidence for each claim is assumed to have already been retrieved. In this paper, we investigate verification over structured data in the open-domain setting, introducing a joint reranking-and-verification model which fuses evidence documents in the verification component. Our open-domain model achieves performance comparable to the closed-domain state-of-the-art on the TabFact dataset, and demonstrates performance gains from the inclusion of multiple tables as well as a significant improvement over a heuristic retrieval baseline.</abstract>
@@ -7512,13 +7512,13 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
     </paper>
     <paper id="535">
       <title><fixed-case>C</fixed-case>onvo<fixed-case>S</fixed-case>umm: Conversation Summarization Benchmark and Improved Abstractive Summarization with Argument Mining</title>
-      <author><first>Alexander</first><last>Fabbri</last></author>
+      <author id="alexander-richard-fabbri"><first>Alexander</first><last>Fabbri</last></author>
       <author><first>Faiaz</first><last>Rahman</last></author>
       <author><first>Imad</first><last>Rizvi</last></author>
       <author><first>Borui</first><last>Wang</last></author>
       <author><first>Haoran</first><last>Li</last></author>
       <author><first>Yashar</first><last>Mehdad</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <pages>6866–6880</pages>
       <abstract>While online conversations can cover a vast amount of information in many different formats, abstractive text summarization has primarily focused on modeling solely news articles. This research gap is due, in part, to the lack of standardized datasets for summarizing online discussions. To address this gap, we design annotation protocols motivated by an issues–viewpoints–assertions framework to crowdsource four new datasets on diverse online conversation forms of news comments, discussion forums, community question answering forums, and email threads. We benchmark state-of-the-art models on our datasets and analyze characteristics associated with the data. To create a comprehensive benchmark, we also evaluate these models on widely-used conversation summarization datasets to establish strong baselines in this domain. Furthermore, we incorporate argument mining through graph construction to directly model the issues, viewpoints, and assertions present in a conversation and filter noisy input, showing comparable or improved results according to automatic and human evaluations.</abstract>
       <url hash="3a8f0363">2021.acl-long.535</url>
@@ -7529,10 +7529,10 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
     <paper id="536">
       <title>Improving Factual Consistency of Abstractive Summarization via Question Answering</title>
       <author><first>Feng</first><last>Nan</last></author>
-      <author><first>Cicero</first><last>Nogueira dos Santos</last></author>
+      <author id="cicero-dos-santos"><first>Cicero</first><last>Nogueira dos Santos</last></author>
       <author><first>Henghui</first><last>Zhu</last></author>
       <author><first>Patrick</first><last>Ng</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <author><first>Ramesh</first><last>Nallapati</last></author>
       <author><first>Dejiao</first><last>Zhang</last></author>
       <author><first>Zhiguo</first><last>Wang</last></author>
@@ -7562,7 +7562,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <title>Cross-Lingual Abstractive Summarization with Limited Parallel Resources</title>
       <author><first>Yu</first><last>Bai</last></author>
       <author><first>Yang</first><last>Gao</last></author>
-      <author><first>Heyan</first><last>Huang</last></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last></author>
       <pages>6910–6924</pages>
       <abstract>Parallel cross-lingual summarization data is scarce, requiring models to better use the limited available cross-lingual resources. Existing methods to do so often adopt sequence-to-sequence networks with multi-task frameworks. Such approaches apply multiple decoders, each of which is utilized for a specific task. However, these independent decoders share no parameters, hence fail to capture the relationships between the discrete phrases of summaries in different languages, breaking the connections in order to transfer the knowledge of the high-resource languages to low-resource languages. To bridge these connections, we propose a novel Multi-Task framework for Cross-Lingual Abstractive Summarization (MCLAS) in a low-resource setting. Employing one unified decoder to generate the sequential concatenation of monolingual and cross-lingual summaries, MCLAS makes the monolingual summarization task a prerequisite of the CLS task. In this way, the shared decoder learns interactions involving alignments and summary patterns across languages, which encourages attaining knowledge transfer. Experiments on two CLS datasets demonstrate that our model significantly outperforms three baseline models in both low-resource and full-dataset scenarios. Moreover, in-depth analysis on the generated summaries and attention heads verifies that interactions are learned well using MCLAS, which benefits the CLS task under limited parallel resources.</abstract>
       <url hash="63d6fe54">2021.acl-long.538</url>
@@ -7584,7 +7584,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
     <paper id="540">
       <title>Learning Prototypical Functions for Physical Artifacts</title>
       <author><first>Tianyu</first><last>Jiang</last></author>
-      <author><first>Ellen</first><last>Riloff</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
       <pages>6941–6951</pages>
       <abstract>Humans create things for a reason. Ancient people created spears for hunting, knives for cutting meat, pots for preparing food, etc. The prototypical function of a physical artifact is a kind of commonsense knowledge that we rely on to understand natural language. For example, if someone says “She borrowed the book” then you would assume that she intends to read the book, or if someone asks “Can I use your knife?” then you would assume that they need to cut something. In this paper, we introduce a new NLP task of learning the prototypical uses for human-made physical objects. We use frames from FrameNet to represent a set of common functions for objects, and describe a manually annotated data set of physical objects labeled with their prototypical function. We also present experimental results for this task, including BERT-based models that use predictions from masked patterns as well as artifact sense definitions from WordNet and frame definitions from FrameNet.</abstract>
       <url hash="932892c3">2021.acl-long.540</url>
@@ -7609,8 +7609,8 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
     <paper id="542">
       <title>Dynamic Contextualized Word Embeddings</title>
       <author><first>Valentin</first><last>Hofmann</last></author>
-      <author><first>Janet</first><last>Pierrehumbert</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="janet-pierrehumbert"><first>Janet</first><last>Pierrehumbert</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>6970–6984</pages>
       <abstract>Static word embeddings that represent words by a single vector cannot capture the variability of word meaning in different linguistic and extralinguistic contexts. Building on prior work on contextualized and dynamic word embeddings, we introduce dynamic contextualized word embeddings that represent words as a function of both linguistic and extralinguistic context. Based on a pretrained language model (PLM), dynamic contextualized word embeddings model time and social space jointly, which makes them attractive for a range of NLP tasks involving semantic variability. We highlight potential application scenarios by means of qualitative and quantitative analyses on four English datasets.</abstract>
       <url hash="080ba29b">2021.acl-long.542</url>
@@ -7624,7 +7624,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Maike</first><last>Park</last></author>
       <author><first>Dominik</first><last>Schlechtweg</last></author>
       <author><first>Jonas</first><last>Kuhn</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>6985–6998</pages>
       <abstract>While there is a large amount of research in the field of Lexical Semantic Change Detection, only few approaches go beyond a standard benchmark evaluation of existing models. In this paper, we propose a shift of focus from change detection to change discovery, i.e., discovering novel word senses over time from the full corpus vocabulary. By heavily fine-tuning a type-based and a token-based approach on recently published German data, we demonstrate that both models can successfully be applied to discover new words undergoing meaning change. Furthermore, we provide an almost fully automated framework for both evaluation and discovery.</abstract>
       <url hash="b08914c5">2021.acl-long.543</url>
@@ -7692,7 +7692,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
     <paper id="548">
       <title>Cross-replication Reliability - An Empirical Approach to Interpreting Inter-rater Reliability</title>
       <author><first>Ka</first><last>Wong</last></author>
-      <author><first>Praveen</first><last>Paritosh</last></author>
+      <author id="praveen-paritosh"><first>Praveen</first><last>Paritosh</last></author>
       <author><first>Lora</first><last>Aroyo</last></author>
       <pages>7053–7065</pages>
       <abstract>When collecting annotations and labeled data from humans, a standard practice is to use inter-rater reliability (IRR) as a measure of data goodness (Hallgren, 2012). Metrics such as Krippendorff’s alpha or Cohen’s kappa are typically required to be above a threshold of 0.6 (Landis and Koch, 1977). These absolute thresholds are unreasonable for crowdsourced data from annotators with high cultural and training variances, especially on subjective topics. We present a new alternative to interpreting IRR that is more empirical and contextualized. It is based upon benchmarking IRR against baseline measures in a replication, one of which is a novel cross-replication reliability (xRR) measure based on Cohen’s (1960) kappa. We call this approach the xRR framework. We opensource a replication dataset of 4 million human judgements of facial expressions and analyze it with the proposed framework. We argue this framework can be used to measure the quality of crowdsourced datasets.</abstract>
@@ -7758,7 +7758,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Alexandra</first><last>Olteanu</last></author>
       <author><first>Kaheer</first><last>Suleman</last></author>
       <author><first>Adam</first><last>Trischler</last></author>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <pages>7117–7128</pages>
       <abstract>A false contract is more likely to be rejected than a contract is, yet a false key is less likely than a key to open doors. While correctly interpreting and assessing the effects of such adjective-noun pairs (e.g., false key) on the plausibility of given events (e.g., opening doors) underpins many natural language understanding tasks, doing so often requires a significant degree of world knowledge and common-sense reasoning. We introduce ADEPT – a large-scale semantic plausibility task consisting of over 16 thousand sentences that are paired with slightly modified versions obtained by adding an adjective to a noun. Overall, we find that while the task appears easier for human judges (85% accuracy), it proves more difficult for transformer-based models like RoBERTa (71% accuracy). Our experiments also show that neither the adjective itself nor its taxonomic class suffice in determining the correct plausibility judgement, emphasizing the importance of endowing automatic natural language understanding systems with more context sensitivity and common-sense reasoning.</abstract>
       <url hash="5c0773eb">2021.acl-long.553</url>
@@ -7781,7 +7781,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
     <paper id="555">
       <title>Conditional Generation of Temporally-ordered Event Sequences</title>
       <author><first>Shih-Ting</first><last>Lin</last></author>
-      <author><first>Nathanael</first><last>Chambers</last></author>
+      <author id="nathanael-chambers"><first>Nathanael</first><last>Chambers</last></author>
       <author><first>Greg</first><last>Durrett</last></author>
       <pages>7142–7157</pages>
       <abstract>Models of narrative schema knowledge have proven useful for a range of event-related tasks, but they typically do not capture the temporal relationships between events. We propose a single model that addresses both temporal ordering, sorting given events into the order they occurred, and event infilling, predicting new events which fit into an existing temporally-ordered sequence. We use a BART-based conditional generation model that can capture both temporality and common event co-occurrence, meaning it can be flexibly applied to different tasks in this space. Our model is trained as a denoising autoencoder: we take temporally-ordered event sequences, shuffle them, delete some events, and then attempt to recover the original event sequence. This task teaches the model to make inferences given incomplete knowledge about the events in an underlying scenario. On the temporal ordering task, we show that our model is able to unscramble event sequences from existing datasets without access to explicitly labeled temporal training data, outperforming both a BERT-based pairwise model and a BERT-based pointer network. On event infilling, human evaluation shows that our model is able to generate events that fit better temporally into the input events when compared to GPT-2 story completion models.</abstract>
@@ -7821,7 +7821,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
     <paper id="558">
       <title><fixed-case>S</fixed-case>pan<fixed-case>NER</fixed-case>: Named Entity Re-/Recognition as Span Prediction</title>
       <author><first>Jinlan</first><last>Fu</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <author><first>Pengfei</first><last>Liu</last></author>
       <pages>7183–7195</pages>
       <abstract>Recent years have seen the paradigm shift of Named Entity Recognition (NER) systems from sequence labeling to span prediction. Despite its preliminary effectiveness, the span prediction model’s architectural bias has not been fully understood. In this paper, we first investigate the strengths and weaknesses when the span prediction model is used for named entity recognition compared with the sequence labeling framework and how to further improve it, which motivates us to make complementary advantages of systems based on different paradigms. We then reveal that span prediction, simultaneously, can serve as a system combiner to re-recognize named entities from different systems’ outputs. We experimentally implement 154 systems on 11 datasets, covering three languages, comprehensive results show the effectiveness of span prediction models that both serve as base NER systems and system combiners. We make all codes and datasets available: <url>https://github.com/neulab/spanner</url>, as well as an online system demo: <url>http://spanner.sh</url>. Our model also has been deployed into the ExplainaBoard platform, which allows users to flexibly perform a system combination of top-scoring systems in an interactive way: <url>http://explainaboard.nlpedia.ai/leaderboard/task-ner/</url>.</abstract>
@@ -7896,7 +7896,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Siddharth</first><last>Karamcheti</last></author>
       <author><first>Ranjay</first><last>Krishna</last></author>
       <author><first>Li</first><last>Fei-Fei</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <pages>7265–7281</pages>
       <abstract>Active learning promises to alleviate the massive data needs of supervised machine learning: it has successfully improved sample efficiency by an order of magnitude on traditional tasks like topic classification and object recognition. However, we uncover a striking contrast to this promise: across 5 models and 4 datasets on the task of visual question answering, a wide variety of active learning approaches fail to outperform random selection. To understand this discrepancy, we profile 8 active learning methods on a per-example basis, and identify the problem as collective outliers – groups of examples that active learning methods prefer to acquire but models fail to learn (e.g., questions that ask about text in images or require external knowledge). Through systematic ablation experiments and qualitative visualizations, we verify that collective outliers are a general phenomenon responsible for degrading pool-based active learning. Notably, we show that active learning sample efficiency increases significantly as the number of collective outliers in the active learning pool decreases. We conclude with a discussion and prescriptive recommendations for mitigating the effects of these outliers in future work.</abstract>
       <url hash="8942f361">2021.acl-long.564</url>
@@ -7912,7 +7912,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Sofia</first><last>Serrano</last></author>
       <author><first>Nikita</first><last>Haduong</last></author>
       <author><first>Suchin</first><last>Gururangan</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>7282–7296</pages>
       <abstract>Human evaluations are typically considered the gold standard in natural language generation, but as models’ fluency improves, how well can evaluators detect and judge machine-generated text? We run a study assessing non-experts’ ability to distinguish between human- and machine-authored text (GPT2 and GPT3) in three domains (stories, news articles, and recipes). We find that, without training, evaluators distinguished between GPT3- and human-authored text at random chance level. We explore three approaches for quickly training evaluators to better identify GPT3-authored text (detailed instructions, annotated examples, and paired examples) and find that while evaluators’ accuracy improved up to 55%, it did not significantly improve across the three domains. Given the inconsistent results across text domains and the often contradictory reasons evaluators gave for their judgments, we examine the role untrained human evaluations play in NLG evaluation and provide recommendations to NLG researchers for improving human evaluations of text generated from state-of-the-art models.</abstract>
       <url hash="c6ff4c7f">2021.acl-long.565</url>
@@ -7926,7 +7926,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <title>Scientific Credibility of Machine Translation Research: A Meta-Evaluation of 769 Papers</title>
       <author><first>Benjamin</first><last>Marie</last></author>
       <author><first>Atsushi</first><last>Fujita</last></author>
-      <author><first>Raphael</first><last>Rubino</last></author>
+      <author id="raphael-rubino"><first>Raphael</first><last>Rubino</last></author>
       <pages>7297–7306</pages>
       <abstract>This paper presents the first large-scale meta-evaluation of machine translation (MT). We annotated MT evaluations conducted in 769 research papers published from 2010 to 2020. Our study shows that practices for automatic MT evaluation have dramatically changed during the past decade and follow concerning trends. An increasing number of MT evaluations exclusively rely on differences between BLEU scores to draw conclusions, without performing any kind of statistical significance testing nor human evaluation, while at least 108 metrics claiming to be better than BLEU have been proposed. MT evaluations in recent papers tend to copy and compare automatic metric scores from previous work to claim the superiority of a method or an algorithm without confirming neither exactly the same training, validating, and testing data have been used nor the metric scores are comparable. Furthermore, tools for reporting standardized metric scores are still far from being widely adopted by the MT community. After showing how the accumulation of these pitfalls leads to dubious evaluation, we propose a guideline to encourage better automatic MT evaluation along with a simple meta-evaluation scoring method to assess its credibility.</abstract>
       <url hash="3303b539">2021.acl-long.566</url>
@@ -7954,7 +7954,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <title>Intrinsic Dimensionality Explains the Effectiveness of Language Model Fine-Tuning</title>
       <author><first>Armen</first><last>Aghajanyan</last></author>
       <author><first>Sonal</first><last>Gupta</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>7319–7328</pages>
       <abstract>Although pretrained language models can be fine-tuned to produce state-of-the-art results for a very wide range of language understanding tasks, the dynamics of this process are not well understood, especially in the low data regime. Why can we use relatively vanilla gradient descent algorithms (e.g., without strong regularization) to tune a model with hundreds of millions of parameters on datasets with only hundreds or thousands of labeled examples? In this paper, we argue that analyzing fine-tuning through the lens of intrinsic dimension provides us with empirical and theoretical intuitions to explain this remarkable phenomenon. We empirically show that common pre-trained models have a very low intrinsic dimension; in other words, there exists a low dimension reparameterization that is as effective for fine-tuning as the full parameter space. For example, by optimizing only 200 trainable parameters randomly projected back into the full space, we can tune a RoBERTa model to achieve 90% of the full parameter performance levels on MRPC. Furthermore, we empirically show that pre-training implicitly minimizes intrinsic dimension and, perhaps surprisingly, larger models tend to have lower intrinsic dimension after a fixed number of pre-training updates, at least in part explaining their extreme effectiveness. Lastly, we connect intrinsic dimensionality with low dimensional task representations and compression based generalization bounds to provide intrinsic-dimension-based generalization bounds that are independent of the full parameter count.</abstract>
       <url hash="6dbe6ed0">2021.acl-long.568</url>
@@ -8010,7 +8010,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
   <volume id="short" ingest-date="2021-07-25" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 2: Short Papers)</booktitle>
-      <editor><first>Chengqing</first><last>Zong</last></editor>
+      <editor id="chengqing-zong"><first>Chengqing</first><last>Zong</last></editor>
       <editor><first>Fei</first><last>Xia</last></editor>
       <editor><first>Wenjie</first><last>Li</last></editor>
       <editor><first>Roberto</first><last>Navigli</last></editor>
@@ -8116,7 +8116,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
     <paper id="8">
       <title>Attention Flows are Shapley Value Explanations</title>
       <author><first>Kawin</first><last>Ethayarajh</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <pages>49–54</pages>
       <abstract>Shapley Values, a solution to the credit assignment problem in cooperative game theory, are a popular type of explanation in machine learning, having been used to explain the importance of features, embeddings, and even neurons. In NLP, however, leave-one-out and attention-based explanations still predominate. Can we draw a connection between these different methods? We formally prove that — save for the degenerate case — attention weights and leave-one-out values cannot be Shapley Values. Attention flow is a post-processed variant of attention weights obtained by running the max-flow algorithm on the attention graph. Perhaps surprisingly, we prove that attention flows are indeed Shapley Values, at least at the layerwise level. Given the many desirable theoretical qualities of Shapley Values — which has driven their adoption among the ML community — we argue that NLP practitioners should, when possible, adopt attention flow explanations alongside more traditional ones.</abstract>
       <url hash="8e5cd30b">2021.acl-short.8</url>
@@ -8207,7 +8207,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Denise</first><last>Diaz</last></author>
       <author><first>Kenneth</first><last>Heafield</last></author>
       <author><first>Xian</first><last>Li</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>99–109</pages>
       <abstract>Is bias amplified when neural machine translation (NMT) models are optimized for speed and evaluated on generic test sets using BLEU? We investigate architectures and techniques commonly used to speed up decoding in Transformer-based models, such as greedy search, quantization, average attention networks (AANs) and shallow decoder models and show their effect on gendered noun translation. We construct a new gender bias test set, SimpleGEN, based on gendered noun phrases in which there is a single, unambiguous, correct answer. While we find minimal overall BLEU degradation as we apply speed optimizations, we observe that gendered noun translation performance degrades at a much faster rate.</abstract>
       <url hash="e9b1e147">2021.acl-short.15</url>
@@ -8381,7 +8381,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Hwanhee</first><last>Lee</last></author>
       <author><first>Seunghyun</first><last>Yoon</last></author>
       <author><first>Franck</first><last>Dernoncourt</last></author>
-      <author><first>Trung</first><last>Bui</last></author>
+      <author id="trung-bui"><first>Trung</first><last>Bui</last></author>
       <author><first>Kyomin</first><last>Jung</last></author>
       <pages>220–226</pages>
       <abstract>Despite the success of various text generation metrics such as BERTScore, it is still difficult to evaluate the image captions without enough reference captions due to the diversity of the descriptions. In this paper, we introduce a new metric UMIC, an Unreferenced Metric for Image Captioning which does not require reference captions to evaluate image captions. Based on Vision-and-Language BERT, we train UMIC to discriminate negative captions via contrastive learning. Also, we observe critical problems of the previous benchmark dataset (i.e., human annotations) on image captioning metric, and introduce a new collection of human annotations on the generated captions. We validate UMIC on four datasets, including our new dataset, and show that UMIC has a higher correlation than all previous metrics that require multiple references.</abstract>
@@ -8395,7 +8395,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <title>Anchor-based Bilingual Word Embeddings for Low-Resource Languages</title>
       <author><first>Tobias</first><last>Eder</last></author>
       <author><first>Viktor</first><last>Hangya</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>227–232</pages>
       <abstract>Good quality monolingual word embeddings (MWEs) can be built for languages which have large amounts of unlabeled text. MWEs can be aligned to bilingual spaces using only a few thousand word translation pairs. For low resource languages training MWEs monolingually results in MWEs of poor quality, and thus poor bilingual word embeddings (BWEs) as well. This paper proposes a new approach for building BWEs in which the vector space of the high resource source language is used as a starting point for training an embedding space for the low resource target language. By using the source vectors as anchors the vector spaces are automatically aligned during training. We experiment on English-German, English-Hiligaynon and English-Macedonian. We show that our approach results not only in improved BWEs and bilingual lexicon induction performance, but also in improved target language MWE quality as measured using monolingual word similarity.</abstract>
       <url hash="835dd543">2021.acl-short.30</url>
@@ -8434,7 +8434,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
     <paper id="33">
       <title>Reinforcement Learning for Abstractive Question Summarization with Question-aware Semantic Rewards</title>
       <author><first>Shweta</first><last>Yadav</last></author>
-      <author><first>Deepak</first><last>Gupta</last></author>
+      <author id="deepak-gupta"><first>Deepak</first><last>Gupta</last></author>
       <author><first>Asma</first><last>Ben Abacha</last></author>
       <author><first>Dina</first><last>Demner-Fushman</last></author>
       <pages>249–255</pages>
@@ -8452,8 +8452,8 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Ibrahim</first><last>Abdelaziz</last></author>
       <author><first>Young-Suk</first><last>Lee</last></author>
       <author><first>Pavan</first><last>Kapanipathi</last></author>
-      <author><first>Salim</first><last>Roukos</last></author>
-      <author><first>Alfio</first><last>Gliozzo</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
+      <author id="alfio-gliozzo"><first>Alfio</first><last>Gliozzo</last></author>
       <author><first>Alexander</first><last>Gray</last></author>
       <pages>256–262</pages>
       <abstract>Relation linking is a crucial component of Knowledge Base Question Answering systems. Existing systems use a wide variety of heuristics, or ensembles of multiple systems, heavily relying on the surface question text. However, the explicit semantic parse of the question is a rich source of relation information that is not taken advantage of. We propose a simple transformer-based neural model for relation linking that leverages the AMR semantic parse of a sentence. Our system significantly outperforms the state-of-the-art on 4 popular benchmark datasets. These are based on either DBpedia or Wikidata, demonstrating that our approach is effective across KGs.</abstract>
@@ -8491,9 +8491,9 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
     <paper id="37">
       <title><fixed-case>MOLEMAN</fixed-case>: Mention-Only Linking of Entities with a Mention Annotation Network</title>
       <author><first>Nicholas</first><last>FitzGerald</last></author>
-      <author><first>Dan</first><last>Bikel</last></author>
+      <author id="daniel-m-bikel"><first>Dan</first><last>Bikel</last></author>
       <author><first>Jan</first><last>Botha</last></author>
-      <author><first>Daniel</first><last>Gillick</last></author>
+      <author id="dan-gillick"><first>Daniel</first><last>Gillick</last></author>
       <author><first>Tom</first><last>Kwiatkowski</last></author>
       <author><first>Andrew</first><last>McCallum</last></author>
       <pages>278–285</pages>
@@ -8547,7 +8547,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Renshen</first><last>Wang</last></author>
       <author><first>Yasuhisa</first><last>Fujii</last></author>
       <author><first>Siyang</first><last>Qin</last></author>
-      <author><first>Ashok</first><last>Popat</last></author>
+      <author id="ashok-popat"><first>Ashok</first><last>Popat</last></author>
       <author><first>Tomas</first><last>Pfister</last></author>
       <pages>314–321</pages>
       <abstract>Natural reading orders of words are crucial for information extraction from form-like documents. Despite recent advances in Graph Convolutional Networks (GCNs) on modeling spatial layout patterns of documents, they have limited ability to capture reading orders of given word-level node representations in a graph. We propose Reading Order Equivariant Positional Encoding (ROPE), a new positional encoding technique designed to apprehend the sequential presentation of words in documents. ROPE generates unique reading order codes for neighboring words relative to the target word given a word-level graph connectivity. We study two fundamental document entity extraction tasks including word labeling and word grouping on the public FUNSD dataset and a large-scale payment dataset. We show that ROPE consistently improves existing GCNs with a margin up to 8.4% F1-score.</abstract>
@@ -8609,8 +8609,8 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <title>Modeling Task-Aware <fixed-case>MIMO</fixed-case> Cardinality for Efficient Multilingual Neural Machine Translation</title>
       <author><first>Hongfei</first><last>Xu</last></author>
       <author><first>Qiuhui</first><last>Liu</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <pages>361–367</pages>
       <abstract>Neural machine translation has achieved great success in bilingual settings, as well as in multilingual settings. With the increase of the number of languages, multilingual systems tend to underperform their bilingual counterparts. Model capacity has been found crucial for massively multilingual NMT to support language pairs with varying typological characteristics. Previous work increases the modeling capacity by deepening or widening the Transformer. However, modeling cardinality based on aggregating a set of transformations with the same topology has been proven more effective than going deeper or wider when increasing capacity. In this paper, we propose to efficiently increase the capacity for multilingual NMT by increasing the cardinality. Unlike previous work which feeds the same input to several transformations and merges their outputs into one, we present a Multi-Input-Multi-Output (MIMO) architecture that allows each transformation of the block to have its own input. We also present a task-aware attention mechanism to learn to selectively utilize individual transformations from a set of transformations for different translation directions. Our model surpasses previous work and establishes a new state-of-the-art on the large scale OPUS-100 corpus while being 1.31 times as fast.</abstract>
       <url hash="da0710be">2021.acl-short.46</url>
@@ -8626,7 +8626,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Shujian</first><last>Huang</last></author>
       <author><first>Boxing</first><last>Chen</last></author>
       <author><first>Weihua</first><last>Luo</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
       <pages>368–374</pages>
       <abstract>kNN-MT, recently proposed by Khandelwal et al. (2020a), successfully combines pre-trained neural machine translation (NMT) model with token-level k-nearest-neighbor (kNN) retrieval to improve the translation accuracy. However, the traditional kNN algorithm used in kNN-MT simply retrieves a same number of nearest neighbors for each target token, which may cause prediction errors when the retrieved neighbors include noises. In this paper, we propose Adaptive kNN-MT to dynamically determine the number of k for each target token. We achieve this by introducing a light-weight Meta-k Network, which can be efficiently trained with only a few training samples. On four benchmark machine translation datasets, we demonstrate that the proposed method is able to effectively filter out the noises in retrieval results and significantly outperforms the vanilla kNN-MT model. Even more noteworthy is that the Meta-k Network learned on one domain could be directly applied to other domains and obtain consistent improvements, illustrating the generality of our method. Our implementation is open-sourced at <url>https://github.com/zhengxxn/adaptive-knn-mt</url>.</abstract>
       <url hash="24f9f4e4">2021.acl-short.47</url>
@@ -8705,7 +8705,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
     <paper id="53">
       <title>Discrete Cosine Transform as Universal Sentence Encoder</title>
       <author><first>Nada</first><last>Almarwani</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>419–426</pages>
       <abstract>Modern sentence encoders are used to generate dense vector representations that capture the underlying linguistic characteristics for a sequence of words, including phrases, sentences, or paragraphs. These kinds of representations are ideal for training a classifier for an end task such as sentiment analysis, question answering and text classification. Different models have been proposed to efficiently generate general purpose sentence representations to be used in pretraining protocols. While averaging is the most commonly used efficient sentence encoder, Discrete Cosine Transform (DCT) was recently proposed as an alternative that captures the underlying syntactic characteristics of a given text without compromising practical efficiency compared to averaging. However, as with most other sentence encoders, the DCT sentence encoder was only evaluated in English. To this end, we utilize DCT encoder to generate universal sentence representation for different languages such as German, French, Spanish and Russian. The experimental results clearly show the superior effectiveness of DCT encoding in which consistent performance improvements are achieved over strong baselines on multiple standardized datasets</abstract>
       <url hash="b3776fff">2021.acl-short.53</url>
@@ -8728,8 +8728,8 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
     <paper id="55">
       <title>An Exploratory Analysis of Multilingual Word-Level Quality Estimation with Cross-Lingual Transformers</title>
       <author><first>Tharindu</first><last>Ranasinghe</last></author>
-      <author><first>Constantin</first><last>Orasan</last></author>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orasan</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <pages>434–440</pages>
       <abstract>Most studies on word-level Quality Estimation (QE) of machine translation focus on language-specific models. The obvious disadvantages of these approaches are the need for labelled data for each language pair and the high cost required to maintain several language-specific models. To overcome these problems, we explore different approaches to multilingual, word-level QE. We show that multilingual QE models perform on par with the current language-specific models. In the cases of zero-shot and few-shot QE, we demonstrate that it is possible to accurately predict word-level quality for any given new language pair from models trained on other language pairs. Our findings suggest that the word-level QE models based on powerful pre-trained transformers that we propose in this paper generalise well across languages, making them more useful in real-world scenarios.</abstract>
       <url hash="5615b329">2021.acl-short.55</url>
@@ -8743,7 +8743,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Chong</first><last>Li</last></author>
       <author><first>Cenyuan</first><last>Zhang</last></author>
       <author><first>Xiaoqing</first><last>Zheng</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>441–446</pages>
       <abstract>A sequence-to-sequence learning with neural networks has empirically proven to be an effective framework for Chinese Spelling Correction (CSC), which takes a sentence with some spelling errors as input and outputs the corrected one. However, CSC models may fail to correct spelling errors covered by the confusion sets, and also will encounter unseen ones. We propose a method, which continually identifies the weak spots of a model to generate more valuable training instances, and apply a task-specific pre-training strategy to enhance the model. The generated adversarial examples are gradually added to the training set. Experimental results show that such an adversarial training method combined with the pre-training strategy can improve both the generalization and robustness of multiple CSC models across three different datasets, achieving state-of-the-art performance for CSC task.</abstract>
       <url hash="b6e1c384">2021.acl-short.56</url>
@@ -8768,7 +8768,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
     <paper id="58">
       <title>An Empirical Study on Adversarial Attack on <fixed-case>NMT</fixed-case>: Languages and Positions Matter</title>
       <author><first>Zhiyuan</first><last>Zeng</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <pages>454–460</pages>
       <abstract>In this paper, we empirically investigate adversarial attack on NMT from two aspects: languages (the source vs. the target language) and positions (front vs. rear). For autoregressive NMT models that generate target words from left to right, we observe that adversarial attack on the source language is more effective than on the target language, and that attacking front positions of target sentences or positions of source sentences aligned to the front positions of corresponding target sentences is more effective than attacking other positions. We further exploit the attention distribution of the victim model to attack source sentences at positions that have a strong association with front target words. Experiment results demonstrate that our attention-based adversarial attack is more effective than adversarial attacks by sampling positions randomly or according to gradients.</abstract>
       <url hash="3e9fbe24">2021.acl-short.58</url>
@@ -8779,7 +8779,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
     <paper id="59">
       <title><fixed-case>O</fixed-case>nto<fixed-case>GUM</fixed-case>: Evaluating Contextualized <fixed-case>SOTA</fixed-case> Coreference Resolution on 12 More Genres</title>
       <author><first>Yilun</first><last>Zhu</last></author>
-      <author><first>Sameer</first><last>Pradhan</last></author>
+      <author id="sameer-pradhan"><first>Sameer</first><last>Pradhan</last></author>
       <author><first>Amir</first><last>Zeldes</last></author>
       <pages>461–467</pages>
       <abstract>SOTA coreference resolution produces increasingly impressive scores on the OntoNotes benchmark. However lack of comparable data following the same scheme for more genres makes it difficult to evaluate generalizability to open domain data. This paper provides a dataset and comprehensive evaluation showing that the latest neural LM based end-to-end systems degrade very substantially out of domain. We make an OntoNotes-like coreference dataset called OntoGUM publicly available, converted from GUM, an English corpus covering 12 genres, using deterministic rules, which we evaluate. Thanks to the rich syntactic and discourse annotations in GUM, we are able to create the largest human-annotated coreference corpus following the OntoNotes guidelines, and the first to be evaluated for consistency with the OntoNotes scheme. Out-of-domain evaluation across 12 genres shows nearly 15-20% degradation for both deterministic and deep learning systems, indicating a lack of generalizability or covert overfitting in existing coreference resolution models.</abstract>
@@ -8863,7 +8863,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Yijin</first><last>Liu</last></author>
       <author><first>Fandong</first><last>Meng</last></author>
       <author><first>Jiajun</first><last>Zhang</last></author>
-      <author><first>Jinan</first><last>Xu</last></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last></author>
       <author><first>Jie</first><last>Zhou</last></author>
       <pages>511–516</pages>
       <abstract>Recently, token-level adaptive training has achieved promising improvement in machine translation, where the cross-entropy loss function is adjusted by assigning different training weights to different tokens, in order to alleviate the token imbalance problem. However, previous approaches only use static word frequency information in the target language without considering the source language, which is insufficient for bilingual tasks like machine translation. In this paper, we propose a novel bilingual mutual information (BMI) based adaptive objective, which measures the learning difficulty for each target token from the perspective of bilingualism, and assigns an adaptive weight accordingly to improve token-level adaptive training. This method assigns larger training weights to tokens with higher BMI, so that easy tokens are updated with coarse granularity while difficult tokens are updated with fine granularity. Experimental results on WMT14 English-to-German and WMT19 Chinese-to-English demonstrate the superiority of our approach compared with the Transformer baseline and previous token-level adaptive training approaches. Further analyses confirm that our method can improve the lexical diversity.</abstract>
@@ -8894,7 +8894,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Rajiv</first><last>Jain</last></author>
       <author><first>Franck</first><last>Dernoncourt</last></author>
       <author><first>Vlad</first><last>Morariu</last></author>
-      <author><first>Quan Hung</first><last>Tran</last></author>
+      <author id="quan-hung-tran"><first>Quan Hung</first><last>Tran</last></author>
       <author><first>Dinesh</first><last>Manocha</last></author>
       <pages>524–533</pages>
       <abstract>We present TIMERS - a TIME, Rhetorical and Syntactic-aware model for document-level temporal relation classification in the English language. Our proposed method leverages rhetorical discourse features and temporal arguments from semantic role labels, in addition to traditional local syntactic features, trained through a Gated Relational-GCN. Extensive experiments show that the proposed model outperforms previous methods by 5-18% on the TDDiscourse, TimeBank-Dense, and MATRES datasets due to our discourse-level modeling.</abstract>
@@ -8921,8 +8921,8 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Jiahuan</first><last>Li</last></author>
       <author><first>Yutong</first><last>Shen</last></author>
       <author><first>Shujian</first><last>Huang</last></author>
-      <author><first>Xinyu</first><last>Dai</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
+      <author id="xinyu-dai"><first>Xinyu</first><last>Dai</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
       <pages>543–549</pages>
       <abstract>Subword segmentation algorithms have been a <i>de facto</i> choice when building neural machine translation systems. However, most of them need to learn a segmentation model based on some heuristics, which may produce sub-optimal segmentation. This can be problematic in some scenarios when the target language has rich morphological changes or there is not enough data for learning compact composition rules. Translating at fully character level has the potential to alleviate the issue, but empirical performances of character-based models has not been fully explored. In this paper, we present an in-depth comparison between character-based and subword-based NMT systems under three settings: translating to typologically diverse languages, training with low resource, and adapting to unseen domains. Experiment results show strong competitiveness of character-based models. Further analyses show that compared to subword-based models, character-based models are better at handling morphological phenomena, generating rare and unknown words, and more suitable for transferring to unseen domains.</abstract>
       <url hash="0dd4c302">2021.acl-short.69</url>
@@ -8936,8 +8936,8 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Zheng</first><last>Hu</last></author>
       <author><first>Shoushan</first><last>Li</last></author>
       <author><first>Hanqian</first><last>Wu</last></author>
-      <author><first>Qiaoming</first><last>Zhu</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>550–557</pages>
       <abstract>Chinese word segmentation (CWS) is undoubtedly an important basic task in natural language processing. Previous works only focus on the textual modality, but there are often audio and video utterances (such as news broadcast and face-to-face dialogues), where textual, acoustic and visual modalities normally exist. To this end, we attempt to combine the multi-modality (mainly the converted text and actual voice information) to perform CWS. In this paper, we annotate a new dataset for CWS containing text and audio. Moreover, we propose a time-dependent multi-modal interactive model based on Transformer framework to integrate multi-modal information for word sequence labeling. The experimental results on three different training sets show the effectiveness of our approach with fusing text and audio.</abstract>
       <url hash="0034d4a0">2021.acl-short.70</url>
@@ -9011,7 +9011,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <title>Don’t Let Discourse Confine Your Model: Sequence Perturbations for Improved Event Language Models</title>
       <author><first>Mahnaz</first><last>Koupaee</last></author>
       <author><first>Greg</first><last>Durrett</last></author>
-      <author><first>Nathanael</first><last>Chambers</last></author>
+      <author id="nathanael-chambers"><first>Nathanael</first><last>Chambers</last></author>
       <author><first>Niranjan</first><last>Balasubramanian</last></author>
       <pages>599–604</pages>
       <abstract>Event language models represent plausible sequences of events. Most existing approaches train autoregressive models on text, which successfully capture event co-occurrence but unfortunately constrain the model to follow the discourse order in which events are presented. Other domains may employ different discourse orders, and for many applications, we may care about different notions of ordering (e.g., temporal) or not care about ordering at all (e.g., when predicting related events in a schema). We propose a simple yet surprisingly effective strategy for improving event language models by perturbing event sequences so we can relax model dependence on text order. Despite generating completely synthetic event orderings, we show that this technique improves the performance of the event language models on both applications and out-of-domain events data.</abstract>
@@ -9099,7 +9099,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <title><fixed-case>QA</fixed-case>-Driven Zero-shot Slot Filling with Weak Supervision Pretraining</title>
       <author><first>Xinya</first><last>Du</last></author>
       <author><first>Luheng</first><last>He</last></author>
-      <author id="qi-li"><first>Qi</first><last>Li</last></author>
+      <author><first>Qi</first><last>Li</last></author>
       <author><first>Dian</first><last>Yu</last></author>
       <author><first>Panupong</first><last>Pasupat</last></author>
       <author><first>Yuan</first><last>Zhang</last></author>
@@ -9164,7 +9164,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
     <paper id="88">
       <title>Question Generation for Adaptive Education</title>
       <author><first>Megha</first><last>Srivastava</last></author>
-      <author><first>Noah</first><last>Goodman</last></author>
+      <author id="noah-goodman"><first>Noah</first><last>Goodman</last></author>
       <pages>692–701</pages>
       <abstract>Intelligent and adaptive online education systems aim to make high-quality education available for a diverse range of students. However, existing systems usually depend on a pool of hand-made questions, limiting how fine-grained and open-ended they can be in adapting to individual students. We explore targeted question generation as a controllable sequence generation task. We first show how to fine-tune pre-trained language models for deep knowledge tracing (LM-KT). This model accurately predicts the probability of a student answering a question correctly, and generalizes to questions not seen in training. We then use LM-KT to specify the objective and data for training a model to generate questions conditioned on the student and target difficulty. Our results show we succeed at generating novel, well-calibrated language translation questions for second language learners from a real online education platform.</abstract>
       <url hash="3bae0421">2021.acl-short.88</url>
@@ -9193,7 +9193,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Wenlan</first><last>Wei</last></author>
       <author><first>Yichen</first><last>Zhang</last></author>
       <author><first>Luntian</first><last>Mou</last></author>
-      <author><first>Eric</first><last>Xing</last></author>
+      <author id="eric-xing"><first>Eric</first><last>Xing</last></author>
       <author><first>Pengtao</first><last>Xie</last></author>
       <pages>708–718</pages>
       <abstract>Pathology imaging is broadly used for identifying the causes and effects of diseases or injuries. Given a pathology image, being able to answer questions about the clinical findings contained in the image is very important for medical decision making. In this paper, we aim to develop a pathological visual question answering framework to analyze pathology images and answer medical questions related to these images. To build such a framework, we create PathVQA, a VQA dataset with 32,795 questions asked from 4,998 pathology images. We also propose a three-level optimization framework which performs self-supervised pretraining and VQA finetuning end-to-end to learn powerful visual and textual representations jointly and automatically identifies and excludes noisy self-supervised examples from pretraining. We perform experiments on our created PathVQA dataset and the results demonstrate the effectiveness of our proposed methods. The datasets and code are available at <url>https://github.com/UCSD-AI4H/PathVQA</url></abstract>
@@ -9220,7 +9220,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
     <paper id="92">
       <title>m<fixed-case>TVR</fixed-case>: Multilingual Moment Retrieval in Videos</title>
       <author><first>Jie</first><last>Lei</last></author>
-      <author><first>Tamara</first><last>Berg</last></author>
+      <author id="tamara-berg"><first>Tamara</first><last>Berg</last></author>
       <author><first>Mohit</first><last>Bansal</last></author>
       <pages>726–734</pages>
       <abstract>We introduce mTVR, a large-scale multilingual video moment retrieval dataset, containing 218K English and Chinese queries from 21.8K TV show video clips. The dataset is collected by extending the popular TVR dataset (in English) with paired Chinese queries and subtitles. Compared to existing moment retrieval datasets, mTVR is multilingual, larger, and comes with diverse annotations. We further propose mXML, a multilingual moment retrieval model that learns and operates on data from both languages, via encoder parameter sharing and language neighborhood constraints. We demonstrate the effectiveness of mXML on the newly collected mTVR dataset, where mXML outperforms strong monolingual baselines while using fewer parameters. In addition, we also provide detailed dataset analyses and model ablations. Data and code are publicly available at <url>https://github.com/jayleicn/mTVRetrieval</url></abstract>
@@ -9258,7 +9258,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <title>Quotation Recommendation and Interpretation Based on Transformation from Queries to Quotations</title>
       <author><first>Lingzhi</first><last>Wang</last></author>
       <author><first>Xingshan</first><last>Zeng</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <pages>754–758</pages>
       <abstract>To help individuals express themselves better, quotation recommendation is receiving growing attention. Nevertheless, most prior efforts focus on modeling quotations and queries separately and ignore the relationship between the quotations and the queries. In this work, we introduce a transformation matrix that directly maps the query representations to quotation representations. To better learn the mapping relationship, we employ a mapping loss that minimizes the distance of two semantic spaces (one for quotation and another for mapped-query). Furthermore, we explore using the words in history queries to interpret the figurative language of quotations, where quotation-aware attention is applied on top of history queries to highlight the indicator words. Experiments on two datasets in English and Chinese show that our model outperforms previous state-of-the-art models.</abstract>
       <url hash="df392dd2">2021.acl-short.95</url>
@@ -9306,9 +9306,9 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
     <paper id="99">
       <title>Issues with Entailment-based Zero-shot Text Classification</title>
       <author><first>Tingting</first><last>Ma</last></author>
-      <author><first>Jin-Ge</first><last>Yao</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="jin-ge-yao"><first>Jin-Ge</first><last>Yao</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <pages>786–796</pages>
       <abstract>The general format of natural language inference (NLI) makes it tempting to be used for zero-shot text classification by casting any target label into a sentence of hypothesis and verifying whether or not it could be entailed by the input, aiming at generic classification applicable on any specified label space. In this opinion piece, we point out a few overlooked issues that are yet to be discussed in this line of work. We observe huge variance across different classification datasets amongst standard BERT-based NLI models and surprisingly find that pre-trained BERT without any fine-tuning can yield competitive performance against BERT fine-tuned for NLI. With the concern that these models heavily rely on spurious lexical patterns for prediction, we also experiment with preliminary approaches for more robust NLI, but the results are in general negative. Our observations reveal implicit but challenging difficulties in entailment-based zero-shot text classification.</abstract>
       <url hash="b757531e">2021.acl-short.99</url>
@@ -9322,7 +9322,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Farhad</first><last>Moghimifar</last></author>
       <author><first>Lizhen</first><last>Qu</last></author>
       <author><first>Terry Yue</first><last>Zhuo</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <author><first>Mahsa</first><last>Baktashmotlagh</last></author>
       <pages>797–802</pages>
       <abstract>Commonsense reasoning aims to incorporate sets of commonsense facts, retrieved from Commonsense Knowledge Graphs (CKG), to draw conclusion about ordinary situations. The dynamic nature of commonsense knowledge postulates models capable of performing multi-hop reasoning over new situations. This feature also results in having large-scale sparse Knowledge Graphs, where such reasoning process is needed to predict relations between new events. However, existing approaches in this area are limited by considering CKGs as a limited set of facts, thus rendering them unfit for reasoning over new unseen situations and events. In this paper, we present a neural-symbolic reasoner, which is capable of reasoning over large-scale dynamic CKGs. The logic rules for reasoning over CKGs are learned during training by our model. In addition to providing interpretable explanation, the learned logic rules help to generalise prediction to newly introduced events. Experimental results on the task of link prediction on CKGs prove the effectiveness of our model by outperforming the state-of-the-art models.</abstract>
@@ -9333,7 +9333,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
     </paper>
     <paper id="101">
       <title>What Motivates You? Benchmarking Automatic Detection of Basic Needs from Short Posts</title>
-      <author><first>Sanja</first><last>Stajner</last></author>
+      <author id="sanja-stajner"><first>Sanja</first><last>Stajner</last></author>
       <author><first>Seren</first><last>Yenikent</last></author>
       <author><first>Bilal</first><last>Ghanem</last></author>
       <author><first>Marc</first><last>Franco-Salvador</last></author>
@@ -9363,7 +9363,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Changhan</first><last>Wang</last></author>
       <author><first>Jiatao</first><last>Gu</last></author>
       <author><first>Didier</first><last>Schwab</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <pages>817–824</pages>
       <abstract>Adapter modules were recently introduced as an efficient alternative to fine-tuning in NLP. Adapter tuning consists in freezing pre-trained parameters of a model and injecting lightweight modules between layers, resulting in the addition of only a small number of task-specific trainable parameters. While adapter tuning was investigated for multilingual neural machine translation, this paper proposes a comprehensive analysis of adapters for multilingual speech translation (ST). Starting from different pre-trained models (a multilingual ST trained on parallel data or a multilingual BART (mBART) trained on non parallel multilingual data), we show that adapters can be used to: (a) efficiently specialize ST to specific language pairs with a low extra cost in terms of parameters, and (b) transfer from an automatic speech recognition (ASR) task and an mBART pre-trained model to a multilingual ST task. Experiments show that adapter tuning offer competitive results to full fine-tuning, while being much more parameter-efficient.</abstract>
       <url hash="28aabd7b">2021.acl-short.103</url>
@@ -9402,7 +9402,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Giuseppe</first><last>Castellucci</last></author>
       <author><first>Simone</first><last>Filice</last></author>
       <author><first>Danilo</first><last>Croce</last></author>
-      <author><first>Roberto</first><last>Basili</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
       <pages>837–847</pages>
       <abstract>In real scenarios, a multilingual model trained to solve NLP tasks on a set of languages can be required to support new languages over time. Unfortunately, the straightforward retraining on a dataset containing annotated examples for all the languages is both expensive and time-consuming, especially when the number of target languages grows. Moreover, the original annotated material may no longer be available due to storage or business constraints. Re-training only with the new language data will inevitably result in Catastrophic Forgetting of previously acquired knowledge. We propose a Continual Learning strategy that updates a model to support new languages over time, while maintaining consistent results on previously learned languages. We define a Teacher-Student framework where the existing model “teaches” to a student model its knowledge about the languages it supports, while the student is also trained on a new language. We report an experimental evaluation in several tasks including Sentence Classification, Relational Learning and Sequence Labeling.</abstract>
       <url hash="f88c733a">2021.acl-short.106</url>
@@ -9440,7 +9440,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Johannes Mario</first><last>Meissner</last></author>
       <author><first>Napat</first><last>Thumwanit</last></author>
       <author><first>Saku</first><last>Sugawara</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>862–869</pages>
       <abstract>Natural Language Inference (NLI) datasets contain examples with highly ambiguous labels. While many research works do not pay much attention to this fact, several recent efforts have been made to acknowledge and embrace the existence of ambiguity, such as UNLI and ChaosNLI. In this paper, we explore the option of training directly on the estimated label distribution of the annotators in the NLI task, using a learning loss based on this ambiguity distribution instead of the gold-labels. We prepare AmbiNLI, a trial dataset obtained from readily available sources, and show it is possible to reduce ChaosNLI divergence scores when finetuning on this data, a promising first step towards learning how to capture linguistic ambiguity. Additionally, we show that training on the same amount of data but targeting the ambiguity distribution instead of gold-labels can result in models that achieve higher performance and learn better representations for downstream tasks.</abstract>
       <url hash="43a58ea9">2021.acl-short.109</url>
@@ -9498,7 +9498,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Qingyang</first><last>Wu</last></author>
       <author><first>Zhou</first><last>Yu</last></author>
       <author><first>Kun</first><last>Xu</last></author>
-      <author><first>Eric</first><last>Xing</last></author>
+      <author id="eric-xing"><first>Eric</first><last>Xing</last></author>
       <author><first>Pengtao</first><last>Xie</last></author>
       <pages>886–896</pages>
       <abstract>Under the pandemic of COVID-19, people experiencing COVID19-related symptoms have a pressing need to consult doctors. Because of the shortage of medical professionals, many people cannot receive online consultations timely. To address this problem, we aim to develop a medical dialog system that can provide COVID19-related consultations. We collected two dialog datasets – CovidDialog – (in English and Chinese respectively) containing conversations between doctors and patients about COVID-19. While the largest of their kind, these two datasets are still relatively small compared with general-domain dialog datasets. Training complex dialog generation models on small datasets bears high risk of overfitting. To alleviate overfitting, we develop a multi-task learning approach, which regularizes the data-deficient dialog generation task with a masked token prediction task. Experiments on the CovidDialog datasets demonstrate the effectiveness of our approach. We perform both human evaluation and automatic evaluation of dialogs generated by our method. Results show that the generated responses are promising in being doctor-like, relevant to conversation history, clinically informative and correct. The code and the data are available at <url>https://github.com/UCSD-AI4H/COVID-Dialogue</url>.</abstract>
@@ -9513,7 +9513,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Suwon</first><last>Shin</last></author>
       <author><first>Jaegul</first><last>Choo</last></author>
       <author><first>Ho-Jin</first><last>Choi</last></author>
-      <author><first>Sung-Hyon</first><last>Myaeng</last></author>
+      <author id="sung-hyon-myaeng"><first>Sung-Hyon</first><last>Myaeng</last></author>
       <pages>897–906</pages>
       <abstract>In multi-modal dialogue systems, it is important to allow the use of images as part of a multi-turn conversation. Training such dialogue systems generally requires a large-scale dataset consisting of multi-turn dialogues that involve images, but such datasets rarely exist. In response, this paper proposes a 45k multi-modal dialogue dataset created with minimal human intervention. Our method to create such a dataset consists of (1) preparing and pre-processing text dialogue datasets, (2) creating image-mixed dialogues by using a text-to-image replacement technique, and (3) employing a contextual-similarity-based filtering step to ensure the contextual coherence of the dataset. To evaluate the validity of our dataset, we devise a simple retrieval model for dialogue sentence prediction tasks. Automatic metrics and human evaluation results on such tasks show that our dataset can be effectively used as training data for multi-modal dialogue systems which require an understanding of images and text in a context-aware manner. Our dataset and generation code is available at <url>https://github.com/shh1574/multi-modal-dialogue-dataset</url>.</abstract>
       <url hash="35b50a92">2021.acl-short.113</url>
@@ -9611,7 +9611,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <title>Sequence to General Tree: Knowledge-Guided Geometry Word Problem Solving</title>
       <author><first>Shih-hung</first><last>Tsai</last></author>
       <author><first>Chao-Chun</first><last>Liang</last></author>
-      <author><first>Hsin-Min</first><last>Wang</last></author>
+      <author id="hsin-min-wang"><first>Hsin-Min</first><last>Wang</last></author>
       <author><first>Keh-Yih</first><last>Su</last></author>
       <pages>964–972</pages>
       <abstract>With the recent advancements in deep learning, neural solvers have gained promising results in solving math word problems. However, these SOTA solvers only generate binary expression trees that contain basic arithmetic operators and do not explicitly use the math formulas. As a result, the expression trees they produce are lengthy and uninterpretable because they need to use multiple operators and constants to represent one single formula. In this paper, we propose sequence-to-general tree (S2G) that learns to generate interpretable and executable operation trees where the nodes can be formulas with an arbitrary number of arguments. With nodes now allowed to be formulas, S2G can learn to incorporate mathematical domain knowledge into problem-solving, making the results more interpretable. Experiments show that S2G can achieve a better performance against strong baselines on problems that require domain knowledge.</abstract>
@@ -9693,7 +9693,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Vanessa</first><last>Yan</last></author>
       <author><first>Tianxiao</first><last>Li</last></author>
       <author><first>Rihao</first><last>Qu</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <pages>1005–1011</pages>
       <abstract>Learning prerequisite chains is an important task for one to pick up knowledge efficiently in both known and unknown domains. For example, one may be an expert in the natural language processing (NLP) domain, but want to determine the best order in which to learn new concepts in an unfamiliar Computer Vision domain (CV). Both domains share some common concepts, such as machine learning basics and deep learning models. In this paper, we solve the task of unsupervised cross-domain concept prerequisite chain learning, using an optimized variational graph autoencoder. Our model learns to transfer concept prerequisite relations from an information-rich domain (source domain) to an information-poor domain (target domain), substantially surpassing other baseline models. In addition, we expand an existing dataset by introducing two new domains—-CV and Bioinformatics (BIO). The annotated data and resources as well as the code will be made publicly available.</abstract>
       <url hash="08a9397c">2021.acl-short.127</url>
@@ -9744,8 +9744,8 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Haoyang</first><last>Wen</last></author>
       <author><first>Anthony</first><last>Ferritto</last></author>
       <author><first>Heng</first><last>Ji</last></author>
-      <author><first>Radu</first><last>Florian</last></author>
-      <author><first>Avi</first><last>Sil</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
+      <author id="avirup-sil"><first>Avi</first><last>Sil</last></author>
       <pages>1035–1042</pages>
       <abstract>Existing models on Machine Reading Comprehension (MRC) require complex model architecture for effectively modeling long texts with paragraph representation and classification, thereby making inference computationally inefficient for production use. In this work, we propose VAULT: a light-weight and parallel-efficient paragraph representation for MRC based on contextualized representation from long document input, trained using a new Gaussian distribution-based objective that pays close attention to the partially correct instances that are close to the ground-truth. We validate our VAULT architecture showing experimental results on two benchmark MRC datasets that require long context modeling; one Wikipedia-based (Natural Questions (NQ)) and the other on TechNotes (TechQA). VAULT can achieve comparable performance on NQ with a state-of-the-art (SOTA) complex document modeling approach while being 16 times faster, demonstrating the efficiency of our proposed model. We also demonstrate that our model can also be effectively adapted to a completely different domain – TechQA – with large improvement over a model fine-tuned on a previously published large PLM.</abstract>
       <url hash="a88ae210">2021.acl-short.131</url>
@@ -9781,7 +9781,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Philippe</first><last>Laban</last></author>
       <author><first>Luke</first><last>Dai</last></author>
       <author><first>Lucas</first><last>Bandarkar</last></author>
-      <author><first>Marti A.</first><last>Hearst</last></author>
+      <author id="marti-a-hearst"><first>Marti A.</first><last>Hearst</last></author>
       <pages>1058–1064</pages>
       <abstract>The Shuffle Test is the most common task to evaluate whether NLP models can measure coherence in text. Most recent work uses direct supervision on the task; we show that by simply finetuning a RoBERTa model, we can achieve a near perfect accuracy of 97.8%, a state-of-the-art. We argue that this outstanding performance is unlikely to lead to a good model of text coherence, and suggest that the Shuffle Test should be approached in a Zero-Shot setting: models should be evaluated without being trained on the task itself. We evaluate common models in this setting, such as Generative and Bi-directional Transformers, and find that larger architectures achieve high-performance out-of-the-box. Finally, we suggest the k-Block Shuffle Test, a modification of the original by increasing the size of blocks shuffled. Even though human reader performance remains high (around 95% accuracy), model performance drops from 94% to 78% as block size increases, creating a conceptually simple challenge to benchmark NLP models.</abstract>
       <url hash="10abeef7">2021.acl-short.134</url>
@@ -9831,7 +9831,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
     <paper id="138">
       <title><fixed-case>R</fixed-case>eplicating and Extending “<fixed-case>B</fixed-case>ecause Their Treebanks Leak”: <fixed-case>G</fixed-case>raph Isomorphism, Covariants, and Parser Performance</title>
       <author><first>Mark</first><last>Anderson</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <author><first>Carlos</first><last>Gómez-Rodríguez</last></author>
       <pages>1090–1098</pages>
       <abstract>Søgaard (2020) obtained results suggesting the fraction of trees occurring in the test data isomorphic to trees in the training set accounts for a non-trivial variation in parser performance. Similar to other statistical analyses in NLP, the results were based on evaluating linear regressions. However, the study had methodological issues and was undertaken using a small sample size leading to unreliable results. We present a replication study in which we also bin sentences by length and find that only a small subset of sentences vary in performance with respect to graph isomorphism. Further, the correlation observed between parser performance and graph isomorphism in the wild disappears when controlling for covariants. However, in a controlled experiment, where covariants are kept fixed, we do observe a correlation. We suggest that conclusions drawn from statistical analyses like this need to be tempered and that controlled experiments can complement them by more readily teasing factors apart.</abstract>
@@ -9845,7 +9845,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <title>Don’t Rule Out Monolingual Speakers: <fixed-case>A</fixed-case> Method For Crowdsourcing Machine Translation Data</title>
       <author><first>Rajat</first><last>Bhatnagar</last></author>
       <author><first>Ananya</first><last>Ganesh</last></author>
-      <author><first>Katharina</first><last>Kann</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
       <pages>1099–1106</pages>
       <abstract>High-performing machine translation (MT) systems can help overcome language barriers while making it possible for everyone to communicate and use language technologies in the language of their choice. However, such systems require large amounts of parallel sentences for training, and translators can be difficult to find and expensive. Here, we present a data collection strategy for MT which, in contrast, is cheap and simple, as it does not require bilingual speakers. Based on the insight that humans pay specific attention to movements, we use graphics interchange formats (GIFs) as a pivot to collect parallel sentences from monolingual annotators. We use our strategy to collect data in Hindi, Tamil and English. As a baseline, we also collect data using images as a pivot. We perform an intrinsic evaluation by manually evaluating a subset of the sentence pairs and an extrinsic evaluation by finetuning mBART (Liu et al., 2020) on the collected data. We find that sentences collected via GIFs are indeed of higher quality.</abstract>
       <url hash="7c5f0ac2">2021.acl-short.139</url>
@@ -9879,7 +9879,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>David</first><last>Thulke</last></author>
       <author><first>Weiyue</first><last>Wang</last></author>
       <author><first>Christian</first><last>Dugast</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>1–15</pages>
       <abstract>Data processing is an important step in various natural language processing tasks. As the commonly used datasets in named entity recognition contain only a limited number of samples, it is important to obtain additional labeled data in an efficient and reliable manner. A common practice is to utilize large monolingual unlabeled corpora. Another popular technique is to create synthetic data from the original labeled data (data augmentation). In this work, we investigate the impact of these two methods on the performance of three different named entity recognition tasks.</abstract>
       <url hash="5ddf97d4">2021.acl-srw.1</url>
@@ -9891,7 +9891,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <title>Stage-wise Fine-tuning for Graph-to-Text Generation</title>
       <author><first>Qingyun</first><last>Wang</last></author>
       <author><first>Semih</first><last>Yavuz</last></author>
-      <author><first>Xi Victoria</first><last>Lin</last></author>
+      <author id="xi-victoria-lin"><first>Xi Victoria</first><last>Lin</last></author>
       <author><first>Heng</first><last>Ji</last></author>
       <author><first>Nazneen</first><last>Rajani</last></author>
       <pages>16–22</pages>
@@ -9907,7 +9907,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Weiyue</first><last>Wang</last></author>
       <author><first>Zijian</first><last>Yang</last></author>
       <author><first>Yingbo</first><last>Gao</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>23–32</pages>
       <abstract>The neural hidden Markov model has been proposed as an alternative to attention mechanism in machine translation with recurrent neural networks. However, since the introduction of the transformer models, its performance has been surpassed. This work proposes to introduce the concept of the hidden Markov model to the transformer architecture, which outperforms the transformer baseline. Interestingly, we find that the zero-order model already provides promising performance, giving it an edge compared to a model with first-order dependency, which performs similarly but is significantly slower in training and decoding.</abstract>
       <url hash="ff409cc5">2021.acl-srw.3</url>
@@ -10011,7 +10011,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
     <paper id="11">
       <title>“<fixed-case>I</fixed-case>’ve Seen Things You People Wouldn’t Believe”: Hallucinating Entities in <fixed-case>G</fixed-case>uess<fixed-case>W</fixed-case>hat?!</title>
       <author><first>Alberto</first><last>Testoni</last></author>
-      <author><first>Raffaella</first><last>Bernardi</last></author>
+      <author id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last></author>
       <pages>101–111</pages>
       <abstract>Natural language generation systems have witnessed important progress in the last years, but they are shown to generate tokens that are unrelated to the source input. This problem affects computational models in many NLP tasks, and it is particularly unpleasant in multimodal systems. In this work, we assess the rate of object hallucination in multimodal conversational agents playing the GuessWhat?! referential game. Better visual processing has been shown to mitigate this issue in image captioning; hence, we adapt to the GuessWhat?! task the best visual processing models at disposal, and propose two new models to play the Questioner agent. We show that the new models generate few hallucinations compared to other renowned models available in the literature. Moreover, their hallucinations are less severe (affect task-accuracy less) and are more human-like. We also analyse where hallucinations tend to occur more often through the dialogue: hallucinations are less frequent in earlier turns, cause a cascade hallucination effect, and are often preceded by negative answers, which have been shown to be harder to ground.</abstract>
       <url hash="5b8d534e">2021.acl-srw.11</url>
@@ -10023,7 +10023,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <title>How do different factors Impact the Inter-language Similarity? A Case Study on <fixed-case>I</fixed-case>ndian languages</title>
       <author><first>Sourav</first><last>Kumar</last></author>
       <author><first>Salil</first><last>Aggarwal</last></author>
-      <author><first>Dipti Misra</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></author>
       <author><first>Radhika</first><last>Mamidi</last></author>
       <pages>112–118</pages>
       <abstract>India is one of the most linguistically diverse nations of the world and is culturally very rich. Most of these languages are somewhat similar to each other on account of sharing a common ancestry or being in contact for a long period of time. Nowadays, researchers are constantly putting efforts in utilizing the language relatedness to improve the performance of various NLP systems such as cross lingual semantic search, machine translation, sentiment analysis systems, etc. So in this paper, we performed an extensive case study on similarity involving languages of the Indian subcontinent. Language similarity prediction is defined as the task of measuring how similar the two languages are on the basis of their lexical, morphological and syntactic features. In this study, we concentrate only on the approach to calculate lexical similarity between Indian languages by looking at various factors such as size and type of corpus, similarity algorithms, subword segmentation, etc. The main takeaways from our work are: (i) Relative order of the language similarities largely remain the same, regardless of the factors mentioned above, (ii) Similarity within the same language family is higher, (iii) Languages share more lexical features at the subword level.</abstract>
@@ -10035,7 +10035,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
     <paper id="13">
       <title><fixed-case>COVID</fixed-case>-19 and Misinformation: A Large-Scale Lexical Analysis on <fixed-case>T</fixed-case>witter</title>
       <author><first>Dimosthenis</first><last>Antypas</last></author>
-      <author><first>Jose</first><last>Camacho-Collados</last></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></author>
       <author><first>Alun</first><last>Preece</last></author>
       <author><first>David</first><last>Rogers</last></author>
       <pages>119–126</pages>
@@ -10115,7 +10115,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Smriti</first><last>Singh</last></author>
       <author><first>Tanvi</first><last>Anand</last></author>
       <author><first>Arijit</first><last>Ghosh Chowdhury</last></author>
-      <author><first>Zeerak</first><last>Waseem</last></author>
+      <author id="zeerak-talat"><first>Zeerak</first><last>Waseem</last></author>
       <pages>180–185</pages>
       <abstract>Television shows play an important role inpropagating societal norms. Owing to the popularity of the situational comedy (sitcom) genre, it contributes significantly to the over-all development of society. In an effort to analyze the content of television shows belong-ing to this genre, we present a dataset of dialogue turns from popular sitcoms annotated for the presence of sexist remarks. We train a text classification model to detect sexism using domain adaptive learning. We apply the model to our dataset to analyze the evolution of sexist content over the years. We propose a domain-specific semi-supervised architecture for the aforementioned detection of sexism. Through extensive experiments, we show that our model often yields better classification performance over generic deep learn-ing based sentence classification that does not employ domain-specific training. We find that while sexism decreases over time on average,the proportion of sexist dialogue for the most sexist sitcom actually increases. A quantitative analysis along with a detailed error analysis presents the case for our proposed methodology</abstract>
       <url hash="60efc65b">2021.acl-srw.19</url>
@@ -10139,7 +10139,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <title>Improving the Robustness of <fixed-case>QA</fixed-case> Models to Challenge Sets with Variational Question-Answer Pair Generation</title>
       <author><first>Kazutoshi</first><last>Shinoda</last></author>
       <author><first>Saku</first><last>Sugawara</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>197–214</pages>
       <abstract>Question answering (QA) models for reading comprehension have achieved human-level accuracy on in-distribution test sets. However, they have been demonstrated to lack robustness to challenge sets, whose distribution is different from that of training sets. Existing data augmentation methods mitigate this problem by simply augmenting training sets with synthetic examples sampled from the same distribution as the challenge sets. However, these methods assume that the distribution of a challenge set is known a priori, making them less applicable to unseen challenge sets. In this study, we focus on question-answer pair generation (QAG) to mitigate this problem. While most existing QAG methods aim to improve the quality of synthetic examples, we conjecture that diversity-promoting QAG can mitigate the sparsity of training sets and lead to better robustness. We present a variational QAG model that generates multiple diverse QA pairs from a paragraph. Our experiments show that our method can improve the accuracy of 12 challenge sets, as well as the in-distribution accuracy.</abstract>
       <url hash="337ea3b8">2021.acl-srw.21</url>
@@ -10162,7 +10162,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
     <paper id="23">
       <title>How Many Layers and Why? <fixed-case>A</fixed-case>n Analysis of the Model Depth in Transformers</title>
       <author><first>Antoine</first><last>Simoulin</last></author>
-      <author><first>Benoit</first><last>Crabbé</last></author>
+      <author id="benoit-crabbe"><first>Benoit</first><last>Crabbé</last></author>
       <pages>221–228</pages>
       <abstract>In this study, we investigate the role of the multiple layers in deep transformer models. We design a variant of Albert that dynamically adapts the number of layers for each token of the input. The key specificity of Albert is that weights are tied across layers. Therefore, the stack of encoder layers iteratively repeats the application of the same transformation function on the input. We interpret the repetition of this application as an iterative process where the token contextualized representations are progressively refined. We analyze this process at the token level during pre-training, fine-tuning, and inference. We show that tokens do not require the same amount of iterations and that difficult or crucial tokens for the task are subject to more iterations.</abstract>
       <url hash="c8e8e422">2021.acl-srw.23</url>
@@ -10236,7 +10236,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Duanchen</first><last>Liu</last></author>
       <author><first>Qingyun</first><last>Yang</last></author>
       <author><first>Zoey</first><last>Liu</last></author>
-      <author><first>Emily</first><last>Prud’hommeaux</last></author>
+      <author id="emily-prudhommeaux"><first>Emily</first><last>Prud’hommeaux</last></author>
       <pages>284–291</pages>
       <abstract>Individuals with autism spectrum disorder (ASD) experience difficulties in social aspects of communication, but the linguistic characteristics associated with deficits in discourse and pragmatic expression are often difficult to precisely identify and quantify. We are currently collecting a corpus of transcribed natural conversations produced in an experimental setting in which participants with and without ASD complete a number of collaborative tasks with their neurotypical peers. Using this dyadic conversational data, we investigate three pragmatic features – politeness, uncertainty, and informativeness – and present a dataset of utterances annotated for each of these features on a three-point scale. We then introduce ongoing work in developing and training neural models to automatically predict these features, with the goal of identifying the same between-groups differences that are observed using manual annotations. We find the best performing model for all three features is a feed-forward neural network trained with BERT embeddings. Our models yield higher accuracy than ones used in previous approaches for deriving these features, with F1 exceeding 0.82 for all three pragmatic features.</abstract>
       <url hash="4bd1ed95">2021.acl-srw.29</url>
@@ -10311,7 +10311,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Raúl</first><last>Vázquez</last></author>
       <author><first>Hande</first><last>Celikkanat</last></author>
       <author><first>Mathias</first><last>Creutz</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>337–347</pages>
       <abstract>Various studies show that pretrained language models such as BERT cannot straightforwardly replace encoders in neural machine translation despite their enormous success in other tasks. This is even more astonishing considering the similarities between the architectures. This paper sheds some light on the embedding spaces they create, using average cosine similarity, contextuality metrics and measures for representational similarity for comparison, revealing that BERT and NMT encoder representations look significantly different from one another. In order to address this issue, we propose a supervised transformation from one into the other using explicit alignment and fine-tuning. Our results demonstrate the need for such a transformation to improve the applicability of BERT in MT.</abstract>
       <url hash="75238265">2021.acl-srw.35</url>
@@ -10335,7 +10335,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
     <meta>
       <booktitle>Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing: System Demonstrations</booktitle>
       <editor><first>Heng</first><last>Ji</last></editor>
-      <editor><first>Jong C.</first><last>Park</last></editor>
+      <editor id="jong-c-park"><first>Jong C.</first><last>Park</last></editor>
       <editor><first>Rui</first><last>Xia</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Online</address>
@@ -10412,7 +10412,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Puzhao</first><last>Xie</last></author>
       <author><first>Zhipeng</first><last>Chen</last></author>
       <author><first>Zhuohao</first><last>Yu</last></author>
-      <author><first>Wayne Xin</first><last>Zhao</last></author>
+      <author id="wayne-xin-zhao"><first>Wayne Xin</first><last>Zhao</last></author>
       <author><first>Ji-Rong</first><last>Wen</last></author>
       <pages>30–39</pages>
       <abstract>In this paper, we release an open-source library, called TextBox, to provide a unified, modularized, and extensible text generation framework. TextBox aims to support a broad set of text generation tasks and models. In our library, we implement 21 text generation models on 9 benchmark datasets, covering the categories of VAE, GAN, and pretrained language models. Meanwhile, our library maintains sufficient modularity and extensibility by properly decomposing the model architecture, inference, and learning process into highly reusable modules, which allows users to easily incorporate new models into our framework. The above features make TextBox especially suitable for researchers and practitioners to quickly reproduce baseline models and develop new models. TextBox is implemented based on PyTorch, and released under Apache License 2.0 at the link <url>https://github.com/RUCAIBox/TextBox</url>.</abstract>
@@ -10466,7 +10466,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Masoud</first><last>Jalili Sabet</last></author>
       <author><first>Philipp</first><last>Dufter</last></author>
       <author><first>Michael</first><last>Cysou</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>63–72</pages>
       <abstract>With more than 7000 languages worldwide, multilingual natural language processing (NLP) is essential both from an academic and commercial perspective. Researching typological properties of languages is fundamental for progress in multilingual NLP. Examples include assessing language similarity for effective transfer learning, injecting inductive biases into machine learning models or creating resources such as dictionaries and inflection tables. We provide ParCourE, an online tool that allows to browse a word-aligned parallel corpus, covering 1334 languages. We give evidence that this is useful for typological research. ParCourE can be set up for any parallel corpus and can thus be used for typological research on other corpora as well as for exploring their quality and properties.</abstract>
       <url hash="eee52cc6">2021.acl-demo.8</url>
@@ -10479,8 +10479,8 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Ricardo</first><last>Rei</last></author>
       <author><first>Ana C</first><last>Farinha</last></author>
       <author><first>Craig</first><last>Stewart</last></author>
-      <author><first>Luisa</first><last>Coheur</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="luisa-coheur"><first>Luisa</first><last>Coheur</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <pages>73–80</pages>
       <abstract>We present MT-Telescope, a visualization platform designed to facilitate comparative analysis of the output quality of two Machine Translation (MT) systems. While automated MT evaluation metrics are commonly used to evaluate MT systems at a corpus-level, our platform supports fine-grained segment-level analysis and interactive visualisations that expose the fundamental differences in the performance of the compared systems. MT-Telescope also supports dynamic corpus filtering to enable focused analysis on specific phenomena such as; translation of named entities, handling of terminology, and the impact of input segment length on translation quality. Furthermore, the platform provides a bootstrapped t-test for statistical significance as a means of evaluating the rigor of the resulting system ranking. MT-Telescope is open source, written in Python, and is built around a user friendly and dynamic web interface. Complementing other existing tools, our platform is designed to facilitate and promote the broader adoption of more rigorous analysis practices in the evaluation of MT quality.</abstract>
       <url hash="e1a613d0">2021.acl-demo.9</url>
@@ -10523,7 +10523,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Zhichao</first><last>Geng</last></author>
       <author><first>Hang</first><last>Yan</last></author>
       <author><first>Xipeng</first><last>Qiu</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>99–106</pages>
       <abstract>We present fastHan, an open-source toolkit for four basic tasks in Chinese natural language processing: Chinese word segmentation (CWS), Part-of-Speech (POS) tagging, named entity recognition (NER), and dependency parsing. The backbone of fastHan is a multi-task model based on a pruned BERT, which uses the first 8 layers in BERT. We also provide a 4-layer base model compressed from the 8-layer model. The joint-model is trained and evaluated on 13 corpora of four tasks, yielding near state-of-the-art (SOTA) performance in dependency parsing and NER, achieving SOTA performance in CWS and POS. Besides, fastHan’s transferability is also strong, performing much better than popular segmentation tools on a non-training corpus. To better meet the need of practical application, we allow users to use their own labeled data to further fine-tune fastHan. In addition to its small size and excellent performance, fastHan is user-friendly. Implemented as a python package, fastHan isolates users from the internal technical details and is convenient to use. The project is released on Github.</abstract>
       <url hash="d599a858">2021.acl-demo.12</url>
@@ -10613,8 +10613,8 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <title>A Graphical Interface for Curating Schemas</title>
       <author><first>Piyush</first><last>Mishra</last></author>
       <author><first>Akanksha</first><last>Malhotra</last></author>
-      <author><first>Susan Windisch</first><last>Brown</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="susan-windisch-brown"><first>Susan Windisch</first><last>Brown</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Ghazaleh</first><last>Kazeminejad</last></author>
       <pages>159–166</pages>
       <abstract>Much past work has focused on extracting information like events, entities, and relations from documents. Very little work has focused on analyzing these results for better model understanding. In this paper, we introduce a curation interface that takes an Information Extraction (IE) system’s output in a pre-defined format and generates a graphical representation of its elements. The interface supports editing while curating schemas for complex events like Improvised Explosive Device (IED) based scenarios. We identify various schemas that either have linear event chains or contain parallel events with complicated temporal ordering. We iteratively update an induced schema to uniquely identify events specific to it, add optional events around them, and prune unnecessary events. The resulting schemas are improved and enriched versions of the machine-induced versions.</abstract>
@@ -10669,7 +10669,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Yuanhang</first><last>Zhou</last></author>
       <author><first>Chenzhan</first><last>Shang</last></author>
       <author><first>Yuan</first><last>Cheng</last></author>
-      <author><first>Wayne Xin</first><last>Zhao</last></author>
+      <author id="wayne-xin-zhao"><first>Wayne Xin</first><last>Zhao</last></author>
       <author><first>Yaliang</first><last>Li</last></author>
       <author><first>Ji-Rong</first><last>Wen</last></author>
       <pages>185–193</pages>
@@ -10685,7 +10685,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Julia</first><last>Rozanova</last></author>
       <author><first>Mokanarangan</first><last>Thayaparan</last></author>
       <author><first>Marco</first><last>Valentino</last></author>
-      <author><first>André</first><last>Freitas</last></author>
+      <author id="andre-freitas"><first>André</first><last>Freitas</last></author>
       <pages>194–201</pages>
       <abstract>Probing (or diagnostic classification) has become a popular strategy for investigating whether a given set of intermediate features is present in the representations of neural models. Naive probing studies may have misleading results, but various recent works have suggested more reliable methodologies that compensate for the possible pitfalls of probing. However, these best practices are numerous and fast-evolving. To simplify the process of running a set of probing experiments in line with suggested methodologies, we introduce Probe-Ably: an extendable probing framework which supports and automates the application of probing methods to the user’s inputs.</abstract>
       <url hash="39707ac8">2021.acl-demo.23</url>
@@ -10696,8 +10696,8 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <title><fixed-case>CLTR</fixed-case>: An End-to-End, Transformer-Based System for Cell-Level Table Retrieval and Table Question Answering</title>
       <author><first>Feifei</first><last>Pan</last></author>
       <author><first>Mustafa</first><last>Canim</last></author>
-      <author><first>Michael</first><last>Glass</last></author>
-      <author><first>Alfio</first><last>Gliozzo</last></author>
+      <author id="michael-glass"><first>Michael</first><last>Glass</last></author>
+      <author id="alfio-gliozzo"><first>Alfio</first><last>Gliozzo</last></author>
       <author><first>Peter</first><last>Fox</last></author>
       <pages>202–209</pages>
       <abstract>We present the first end-to-end, transformer-based table question answering (QA) system that takes natural language questions and massive table corpora as inputs to retrieve the most relevant tables and locate the correct table cells to answer the question. Our system, CLTR, extends the current state-of-the-art QA over tables model to build an end-to-end table QA architecture. This system has successfully tackled many real-world table QA problems with a simple, unified pipeline. Our proposed system can also generate a heatmap of candidate columns and rows over complex tables and allow users to quickly identify the correct cells to answer questions. In addition, we introduce two new open domain benchmarks, E2E_WTQ and E2E_GNQ, consisting of 2,005 natural language questions over 76,242 tables. The benchmarks are designed to validate CLTR as well as accommodate future table retrieval and end-to-end table QA research and experiments. Our experiments demonstrate that our system is the current state-of-the-art model on the table retrieval task and produces promising results for end-to-end table QA.</abstract>
@@ -10816,7 +10816,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <title><fixed-case>T</fixed-case>wee<fixed-case>NLP</fixed-case>: A <fixed-case>T</fixed-case>witter Exploration Portal for Natural Language Processing</title>
       <author><first>Viraj</first><last>Shah</last></author>
       <author><first>Shruti</first><last>Singh</last></author>
-      <author id="mayank-singh"><first>Mayank</first><last>Singh</last></author>
+      <author><first>Mayank</first><last>Singh</last></author>
       <pages>265–271</pages>
       <abstract>We present TweeNLP, a one-stop portal that organizes Twitter’s natural language processing (NLP) data and builds a visualization and exploration platform. It curates 19,395 tweets (as of April 2021) from various NLP conferences and general NLP discussions. It supports multiple features such as TweetExplorer to explore tweets by topics, visualize insights from Twitter activity throughout the organization cycle of conferences, discover popular research papers and researchers. It also builds a timeline of conference and workshop submission deadlines. We envision TweeNLP to function as a collective memory unit for the NLP community by integrating the tweets pertaining to research papers with the NLPExplorer scientific literature search engine. The current system is hosted at <url>http://nlpexplorer.org/twitter/CFP</url>.</abstract>
       <url hash="1e0c0a3f">2021.acl-demo.32</url>
@@ -10916,7 +10916,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Shuang</first><last>Chen</last></author>
       <author><first>Qian</first><last>Liu</last></author>
       <author><first>Zhiwei</first><last>Yu</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <author><first>Jian-Guang</first><last>Lou</last></author>
       <author><first>Feng</first><last>Jiang</last></author>
       <pages>325–336</pages>
@@ -10951,7 +10951,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Zexiong</first><last>Pang</last></author>
       <author><first>Qinzhuo</first><last>Wu</last></author>
       <author><first>Zhengyan</first><last>Li</last></author>
-      <author id="chong-zhang"><first>Chong</first><last>Zhang</last></author>
+      <author><first>Chong</first><last>Zhang</last></author>
       <author><first>Ruotian</first><last>Ma</last></author>
       <author><first>Zichu</first><last>Fei</last></author>
       <author><first>Ruijian</first><last>Cai</last></author>
@@ -10972,7 +10972,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Yaqian</first><last>Zhou</last></author>
       <author><first>Zhongyu</first><last>Wei</last></author>
       <author><first>Xipeng</first><last>Qiu</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>347–355</pages>
       <abstract>TextFlint is a multilingual robustness evaluation toolkit for NLP tasks that incorporates universal text transformation, task-specific transformation, adversarial attack, subpopulation, and their combinations to provide comprehensive robustness analyses. This enables practitioners to automatically evaluate their models from various aspects or to customize their evaluations as desired with just a few lines of code. TextFlint also generates complete analytical reports as well as targeted augmented data to address the shortcomings of the model in terms of its robustness. To guarantee acceptability, all the text transformations are linguistically based and all the transformed data selected (up to 100,000 texts) scored highly under human evaluation. To validate the utility, we performed large-scale empirical evaluations (over 67,000) on state-of-the-art deep learning models, classic supervised methods, and real-world systems. The toolkit is already available at <url>https://github.com/textflint</url> with all the evaluation results demonstrated at textflint.io.</abstract>
       <url hash="52f58f1f">2021.acl-demo.41</url>
@@ -10985,7 +10985,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Chi-yang</first><last>Hsu</last></author>
       <author><first>Yun-Wei</first><last>Chu</last></author>
       <author><first>Tsai-Lun</first><last>Yang</last></author>
-      <author><first>Ting-Hao</first><last>Huang</last></author>
+      <author id="ting-hao-huang"><first>Ting-Hao</first><last>Huang</last></author>
       <author><first>Lun-Wei</first><last>Ku</last></author>
       <pages>356–362</pages>
       <abstract>In visual storytelling, a short story is generated based on a given image sequence. Despite years of work, most visual storytelling models remain limited in terms of the generated stories’ fixed length: most models produce stories with exactly five sentences because five-sentence stories dominate the training data. The fix-length stories carry limited details and provide ambiguous textual information to the readers. Therefore, we propose to “stretch” the stories, which create the potential to present in-depth visual details. This paper presents Stretch-VST, a visual storytelling framework that enables the generation of prolonged stories by adding appropriate knowledge, which is selected by the proposed scoring function. We propose a length-controlled Transformer to generate long stories. This model introduces novel positional encoding methods to maintain story quality with lengthy inputs. Experiments confirm that long stories are generated without deteriorating the quality. The human evaluation further shows that Stretch-VST can provide better focus and detail when stories are prolonged compared to state of the art. We create a webpage to demonstrate our prolonged capability.</abstract>
@@ -11032,7 +11032,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
     <paper id="1">
       <title>Advances in Debating Technologies: Building <fixed-case>AI</fixed-case> That Can Debate Humans</title>
       <author><first>Roy</first><last>Bar-Haim</last></author>
-      <author><first>Liat</first><last>Ein-Dor</last></author>
+      <author id="liat-ein-dor"><first>Liat</first><last>Ein-Dor</last></author>
       <author><first>Matan</first><last>Orbach</last></author>
       <author><first>Elad</first><last>Venezian</last></author>
       <author><first>Noam</first><last>Slonim</last></author>
@@ -11058,7 +11058,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
       <author><first>Qiang</first><last>Ning</last></author>
       <author><first>Manling</first><last>Li</last></author>
       <author><first>Heng</first><last>Ji</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <author><first>Dan</first><last>Roth</last></author>
       <pages>6–14</pages>
       <abstract>This tutorial targets researchers and practitioners who are interested in AI technologies that help machines understand natural language text, particularly real-world events described in the text. These include methods to extract the internal structures of an event regarding its protagonist(s), participant(s) and properties, as well as external structures concerning memberships, temporal and causal relations of multiple events. This tutorial will provide audience with a systematic introduction of (i) knowledge representations of events, (ii) various methods for automated extraction, conceptualization and prediction of events and their relations, (iii) induction of event processes and properties, and (iv) a wide range of NLU and commonsense understanding tasks that benefit from aforementioned techniques. We will conclude the tutorial by outlining emerging research problems in this area.</abstract>
@@ -11093,8 +11093,8 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO
     </paper>
     <paper id="5">
       <title>Prosody: Models, Methods, and Applications</title>
-      <author><first>Nigel</first><last>Ward</last></author>
-      <author><first>Gina-Anne</first><last>Levow</last></author>
+      <author id="nigel-ward"><first>Nigel</first><last>Ward</last></author>
+      <author id="gina-anne-levow"><first>Gina-Anne</first><last>Levow</last></author>
       <pages>26–28</pages>
       <abstract>Prosody is essential in human interaction, enabling people to show interest, establish rapport, efficiently convey nuances of attitude or intent, and so on. Some applications that exploit prosodic knowledge have recently shown superhuman performance, and in many respects our ability to effectively model prosody is rapidly advancing. This tutorial will overview the computational modeling of prosody, including recent advances and diverse actual and potential applications.</abstract>
       <url hash="052ca7d2">2021.acl-tutorials.5</url>
diff --git a/data/xml/2021.adaptnlp.xml b/data/xml/2021.adaptnlp.xml
index 3e10af2f14..926947634f 100644
--- a/data/xml/2021.adaptnlp.xml
+++ b/data/xml/2021.adaptnlp.xml
@@ -4,9 +4,9 @@
     <meta>
       <booktitle>Proceedings of the Second Workshop on Domain Adaptation for NLP</booktitle>
       <editor><first>Eyal</first><last>Ben-David</last></editor>
-      <editor><first>Shay</first><last>Cohen</last></editor>
+      <editor id="shay-b-cohen"><first>Shay</first><last>Cohen</last></editor>
       <editor><first>Ryan</first><last>McDonald</last></editor>
-      <editor><first>Barbara</first><last>Plank</last></editor>
+      <editor id="barbara-plank"><first>Barbara</first><last>Plank</last></editor>
       <editor><first>Roi</first><last>Reichart</last></editor>
       <editor><first>Guy</first><last>Rotman</last></editor>
       <editor><first>Yftah</first><last>Ziser</last></editor>
@@ -23,7 +23,7 @@
     <paper id="1">
       <title>Multidomain Pretrained Language Models for Green <fixed-case>NLP</fixed-case></title>
       <author><first>Antonis</first><last>Maronikolakis</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>1–8</pages>
       <abstract>When tackling a task in a given domain, it has been shown that adapting a model to the domain using raw text data before training on the supervised task improves performance versus solely training on the task. The downside is that a lot of domain data is required and if we want to tackle tasks in n domains, we require n models each adapted on domain data before task learning. Storing and using these models separately can be prohibitive for low-end devices. In this paper we show that domain adaptation can be generalised to cover multiple domains. Specifically, a single model can be trained across various domains at the same time with minimal drop in performance, even when we use less data and resources. Thus, instead of training multiple models, we can train a single multidomain model saving on computational resources and training time.</abstract>
       <url hash="4739a758">2021.adaptnlp-1.1</url>
@@ -43,7 +43,7 @@
     <paper id="3">
       <title>Conditional Adversarial Networks for Multi-Domain Text Classification</title>
       <author><first>Yuan</first><last>Wu</last></author>
-      <author><first>Diana</first><last>Inkpen</last></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last></author>
       <author><first>Ahmed</first><last>El-Roby</last></author>
       <pages>16–27</pages>
       <abstract>In this paper, we propose conditional adversarial networks (CANs), a framework that explores the relationship between the shared features and the label predictions to impose stronger discriminability to the learned features, for multi-domain text classification (MDTC). The proposed CAN introduces a conditional domain discriminator to model the domain variance in both the shared feature representations and the class-aware information simultaneously, and adopts entropy conditioning to guarantee the transferability of the shared features. We provide theoretical analysis for the CAN framework, showing that CAN’s objective is equivalent to minimizing the total divergence among multiple joint distributions of shared features and label predictions. Therefore, CAN is a theoretically sound adversarial network that discriminates over multiple distributions. Evaluation results on two MDTC benchmarks show that CAN outperforms prior methods. Further experiments demonstrate that CAN has a good ability to generalize learned knowledge to unseen domains.</abstract>
@@ -108,7 +108,7 @@
     <paper id="9">
       <title>Addressing Zero-Resource Domains Using Document-Level Context in Neural Machine Translation</title>
       <author><first>Dario</first><last>Stojanovski</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>80–93</pages>
       <abstract>Achieving satisfying performance in machine translation on domains for which there is no training data is challenging. Traditional supervised domain adaptation is not suitable for addressing such zero-resource domains because it relies on in-domain parallel data. We show that when in-domain parallel data is not available, access to document-level context enables better capturing of domain generalities compared to only having access to a single sentence. Having access to more information provides a more reliable domain estimation. We present two document-level Transformer models which are capable of using large context sizes and we compare these models against strong Transformer baselines. We obtain improvements for the two zero-resource domains we study. We additionally provide an analysis where we vary the amount of context and look at the case where in-domain data is available.</abstract>
       <url hash="063fdfa1">2021.adaptnlp-1.9</url>
@@ -128,7 +128,7 @@
     </paper>
     <paper id="11">
       <title>Domain adaptation in practice: Lessons from a real-world information extraction pipeline</title>
-      <author><first>Timothy</first><last>Miller</last></author>
+      <author id="timothy-miller"><first>Timothy</first><last>Miller</last></author>
       <author><first>Egoitz</first><last>Laparra</last></author>
       <author><first>Steven</first><last>Bethard</last></author>
       <pages>105–110</pages>
@@ -180,7 +180,7 @@
     <paper id="16">
       <title>Dependency Parsing Evaluation for Low-resource Spontaneous Speech</title>
       <author><first>Zoey</first><last>Liu</last></author>
-      <author><first>Emily</first><last>Prud’hommeaux</last></author>
+      <author id="emily-prudhommeaux"><first>Emily</first><last>Prud’hommeaux</last></author>
       <pages>156–165</pages>
       <abstract>How well can a state-of-the-art parsing system, developed for the written domain, perform when applied to spontaneous speech data involving different interlocutors? This study addresses this question in a low-resource setting using child-parent conversations from the CHILDES databse. Specifically, we focus on dependency parsing evaluation for utterances of one specific child (18 - 27 months) and her parents. We first present a semi-automatic adaption of the dependency annotation scheme in CHILDES to that of the Universal Dependencies project, an annotation style that is more commonly applied in dependency parsing. Our evaluation demonstrates that an outof-domain biaffine parser trained only on written texts performs well with parent speech. There is, however, much room for improvement on child utterances, particularly at 18 and 21 months, due to cases of omission and repetition that are prevalent in child speech. By contrast, parsers trained or fine-tuned with in-domain spoken data on a much smaller scale can achieve comparable results for parent speech and improve the weak parsing performance for child speech at these earlier ages</abstract>
       <url hash="cc584520">2021.adaptnlp-1.16</url>
@@ -198,7 +198,7 @@
     <paper id="18">
       <title>User Factor Adaptation for User Embedding via Multitask Learning</title>
       <author><first>Xiaolei</first><last>Huang</last></author>
-      <author><first>Michael J.</first><last>Paul</last></author>
+      <author id="michael-paul"><first>Michael J.</first><last>Paul</last></author>
       <author><first>Franck</first><last>Dernoncourt</last></author>
       <author><first>Robin</first><last>Burke</last></author>
       <author><first>Mark</first><last>Dredze</last></author>
@@ -257,7 +257,7 @@
       <author><first>Abdul</first><last>Waheed</last></author>
       <author><first>Devamanyu</first><last>Hazarika</last></author>
       <author><first>Min-Yen</first><last>Kan</last></author>
-      <author><first>Rajiv Ratn</first><last>Shah</last></author>
+      <author id="rajiv-shah"><first>Rajiv Ratn</first><last>Shah</last></author>
       <pages>222–244</pages>
       <abstract>The robustness of pretrained language models(PLMs) is generally measured using performance drops on two or more domains. However, we do not yet understand the inherent robustness achieved by contributions from different layers of a PLM. We systematically analyze the robustness of these representations layer by layer from two perspectives. First, we measure the robustness of representations by using domain divergence between two domains. We find that i) Domain variance increases from the lower to the upper layers for vanilla PLMs; ii) Models continuously pretrained on domain-specific data (DAPT)(Gururangan et al., 2020) exhibit more variance than their pretrained PLM counterparts; and that iii) Distilled models (e.g., DistilBERT) also show greater domain variance. Second, we investigate the robustness of representations by analyzing the encoded syntactic and semantic information using diagnostic probes. We find that similar layers have similar amounts of linguistic information for data from an unseen domain.</abstract>
       <url hash="979fb4e9">2021.adaptnlp-1.23</url>
@@ -266,10 +266,10 @@
     <paper id="24">
       <title>Few-Shot Learning of an Interleaved Text Summarization Model by Pretraining with Synthetic Data</title>
       <author><first>Sanjeev Kumar</first><last>Karn</last></author>
-      <author><first>Francine</first><last>Chen</last></author>
+      <author id="francine-chen"><first>Francine</first><last>Chen</last></author>
       <author><first>Yan-Ying</first><last>Chen</last></author>
       <author><first>Ulli</first><last>Waltinger</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>245–254</pages>
       <abstract>Interleaved texts, where posts belonging to different threads occur in a sequence, commonly occur in online chat posts, so that it can be time-consuming to quickly obtain an overview of the discussions. Existing systems first disentangle the posts by threads and then extract summaries from those threads. A major issue with such systems is error propagation from the disentanglement component. While end-to-end trainable summarization system could obviate explicit disentanglement, such systems require a large amount of labeled data. To address this, we propose to pretrain an end-to-end trainable hierarchical encoder-decoder system using synthetic interleaved texts. We show that by fine-tuning on a real-world meeting dataset (AMI), such a system out-performs a traditional two-step system by 22%. We also compare against transformer models and observed that pretraining with synthetic data both the encoder and decoder outperforms the BertSumExtAbs transformer model which pretrains only the encoder on a large dataset.</abstract>
       <url hash="252a1b98">2021.adaptnlp-1.24</url>
diff --git a/data/xml/2021.alta.xml b/data/xml/2021.alta.xml
index a541f1b9b2..17fb554fcc 100644
--- a/data/xml/2021.alta.xml
+++ b/data/xml/2021.alta.xml
@@ -42,7 +42,7 @@
       <author><first>Ehsan</first><last>Abbasnejad</last></author>
       <author><first>Ingrid</first><last>Zukerman</last></author>
       <author><first>Wray</first><last>Buntine</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>22–33</pages>
       <abstract>Visual question answering (VQA) models, in particular modular ones, are commonly trained on large-scale datasets to achieve state-of-the-art performance. However, such datasets are sometimes not available. Further, it has been shown that training these models on small datasets significantly reduces their accuracy. In this paper, we propose curriculum-based learning (CL) regime to increase the accuracy of VQA models trained on small datasets. Specifically, we offer three criteria to rank the samples in these datasets and propose a training strategy for each criterion. Our results show that, for small datasets, our CL approach yields more accurate results than those obtained when training with no curriculum.</abstract>
       <url hash="c631c1a2">2021.alta-1.3</url>
@@ -73,7 +73,7 @@
       <author><first>Rhys</first><last>Biddle</last></author>
       <author><first>Maciek</first><last>Rybinski</last></author>
       <author><first>Qian</first><last>Li</last></author>
-      <author><first>Cecile</first><last>Paris</last></author>
+      <author id="cecile-paris"><first>Cecile</first><last>Paris</last></author>
       <author><first>Guandong</first><last>Xu</last></author>
       <pages>58–67</pages>
       <abstract>The detection of hyperbole is an important stepping stone to understanding the intentions of a hyperbolic utterance. We propose a model that combines pre-trained language models with privileged information for the task of hyperbole detection. We also introduce a suite of behavioural tests to probe the capabilities of hyperbole detection models across a range of hyperbole types. Our experiments show that our model improves upon baseline models on an existing hyperbole detection dataset. Probing experiments combined with analysis using local linear approximations (LIME) show that our model excels at detecting one particular type of hyperbole. Further, we discover that our experiments highlight annotation artifacts introduced through the process of literal paraphrasing of hyperbole. These annotation artifacts are likely to be a roadblock to further improvements in hyperbole detection.</abstract>
@@ -94,7 +94,7 @@
       <title>Phone Based Keyword Spotting for Transcribing Very Low Resource Languages</title>
       <author><first>Eric Le</first><last>Ferrand</last></author>
       <author><first>Steven</first><last>Bird</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <pages>79–86</pages>
       <abstract>We investigate the efficiency of two very different spoken term detection approaches for transcription when the available data is insufficient to train a robust speech recognition system. This work is grounded in a very low-resource language documentation scenario where only a few minutes of recording have been transcribed for a given language so far. Experiments on two oral languages show that a pretrained universal phone recognizer, fine-tuned with only a few minutes of target language speech, can be used for spoken term detection through searches in phone confusion networks with a lexicon expressed as a finite state automaton. Experimental results show that a phone recognition based approach provides better overall performances than Dynamic Time Warping when working with clean data, and highlight the benefits of each methods for two types of speech corpus.</abstract>
       <url hash="e1d48830">2021.alta-1.8</url>
@@ -103,7 +103,7 @@
     <paper id="9">
       <title>Evaluation of Review Summaries via Question-Answering</title>
       <author><first>Nannan</first><last>Huang</last></author>
-      <author><first>Xiuzhen</first><last>Zhang</last></author>
+      <author id="xiuzhen-jenny-zhang"><first>Xiuzhen</first><last>Zhang</last></author>
       <pages>87–96</pages>
       <abstract>Summarisation of reviews aims at compressing opinions expressed in multiple review documents into a concise form while still covering the key opinions. Despite the advancement in summarisation models, evaluation metrics for opinionated text summaries lag behind and still rely on lexical-matching metrics such as ROUGE. In this paper, we propose to use the question-answering(QA) approach to evaluate summaries of opinions in reviews. We propose to identify opinion-bearing text spans in the reference summary to generate QA pairs so as to capture salient opinions. A QA model is then employed to probe the candidate summary to evaluate information overlap between candidate and reference summaries. We show that our metric RunQA, Review Summary Evaluation via Question Answering, correlates well with human judgments in terms of coverage and focus of information. Finally, we design an adversarial task and demonstrate that the proposed approach is more robust than metrics in the literature for ranking summaries.</abstract>
       <url hash="c646845e">2021.alta-1.9</url>
@@ -113,7 +113,7 @@
       <title>Exploring Story Generation with Multi-task Objectives in Variational Autoencoders</title>
       <author><first>Zhuohan</first><last>Xie</last></author>
       <author><first>Jey Han</first><last>Lau</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>97–106</pages>
       <abstract>GPT-2 has been frequently adapted in story generation models as it provides powerful generative capability. However, it still fails to generate consistent stories and lacks diversity. Current story generation models leverage additional information such as plots or commonsense into GPT-2 to guide the generation process. These approaches focus on improving generation quality of stories while our work look at both quality and diversity. We explore combining BERT and GPT-2 to build a variational autoencoder (VAE), and extend it by adding additional objectives to learn global features such as story topic and discourse relations. Our evaluations show our enhanced VAE can provide better quality and diversity trade off, generate less repetitive story content and learn a more informative latent variable.</abstract>
       <url hash="639a9c9a">2021.alta-1.10</url>
@@ -133,7 +133,7 @@
       <title>Robustness Analysis of Grover for Machine-Generated News Detection</title>
       <author><first>Rinaldo</first><last>Gagiano</last></author>
       <author><first>Maria Myung-Hee</first><last>Kim</last></author>
-      <author><first>Xiuzhen</first><last>Zhang</last></author>
+      <author id="xiuzhen-jenny-zhang"><first>Xiuzhen</first><last>Zhang</last></author>
       <author><first>Jennifer</first><last>Biggs</last></author>
       <pages>119–127</pages>
       <abstract>Advancements in Natural Language Generation have raised concerns on its potential misuse for deep fake news. Grover is a model for both generation and detection of neural fake news. While its performance on automatically discriminating neural fake news surpassed GPT-2 and BERT, Grover could face a variety of adversarial attacks to deceive detection. In this work, we present an investigation of Groverâs susceptibility to adversarial attacks such as character-level and word-level perturbations. The experiment results show that even a singular character alteration can cause Grover to fail, affecting up to 97% of target articles with unlimited attack attempts, exposing a lack of robustness. We further analyse these misclassified cases to highlight affected words, identify vulnerability within Groverâs encoder, and perform a novel visualisation of cumulative classification scores to assist in interpreting model behaviour.</abstract>
@@ -143,8 +143,8 @@
     <paper id="13">
       <title>Document Level Hierarchical Transformer</title>
       <author><first>Najam</first><last>Zaidi</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>128–137</pages>
       <abstract>Generating long and coherent text is an important and challenging task encompassing many application areas such as summarization, document level machine translation and story generation. Despite the success in modeling intra-sentence coherence, existing long text generation models (e.g., BART and GPT-3) still struggle to maintain a coherent event sequence throughout the generated text. We conjecture that this is because of the difficulty for the model to revise, replace, revoke or delete any part that has been generated by the model. In this paper, we present a novel semi-autoregressive document generation model capable of revising and editing the generated text. Building on recent models by (Gu et al., 2019; Xu and Carpuat, 2020) we propose document generation as a hierarchical Markov decision process with a two level hierarchy, where the high and low level editing programs. We train our model using imitation learning (Hussein et al., 2017) and introduce roll-in policy such that each policy learns on the output of applying the previous action. Experiments applying the proposed approach sheds various insights on the problems of long text generation using our model. We suggest various remedies such as using distilled dataset, designing better attention mechanisms and using autoregressive models as a low level program.</abstract>
       <url hash="7949b16f">2021.alta-1.13</url>
@@ -174,7 +174,7 @@
     <paper id="16">
       <title>Does <fixed-case>QA</fixed-case>-based intermediate training help fine-tuning language models for text classification?</title>
       <author><first>Shiwei</first><last>Zhang</last></author>
-      <author><first>Xiuzhen</first><last>Zhang</last></author>
+      <author id="xiuzhen-jenny-zhang"><first>Xiuzhen</first><last>Zhang</last></author>
       <pages>158–162</pages>
       <abstract>Fine-tuning pre-trained language models for downstream tasks has become a norm for NLP. Recently it is found that intermediate training can improve performance for fine-tuning language models for target tasks, high-level inference tasks such as Question Answering (QA) tend to work best as intermediate tasks. However it is not clear if intermediate training generally benefits various language models. In this paper, using the SQuAD-2.0 QA task for intermediate training for target text classification tasks, we experimented on eight tasks for single-sequence classification and eight tasks for sequence-pair classification using two base and two compact language models. Our experiments show that QA-based intermediate training generates varying transfer performance across different language models, except for similar QA tasks.</abstract>
       <url hash="17e05a85">2021.alta-1.16</url>
@@ -201,9 +201,9 @@
     </paper>
     <paper id="19">
       <title>Using Discourse Structure to Differentiate Focus Entities from Background Entities in Scientific Literature</title>
-      <author><first>Antonio</first><last>Jimeno Yepes</last></author>
+      <author id="antonio-jimeno-yepes"><first>Antonio</first><last>Jimeno Yepes</last></author>
       <author><first>Ameer</first><last>Albahem</last></author>
-      <author><first>Karin</first><last>Verspoor</last></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last></author>
       <pages>174–178</pages>
       <abstract>In developing systems to identify focus entities in scientific literature, we face the problem of discriminating key entities of interest from other potentially relevant entities of the same type mentioned in the articles. We introduce the task of pathogen characterisation. We aim to discriminate mentions of biological pathogens, that are actively studied in the research presented in scientific publications. These are the pathogens that are the focus of direct experimentation in the research, rather than those that are referred to for context or as playing secondary roles. In this paper, we explore the hypothesis that these focus entities can be differentiated from other, non-actively studied, pathogens mentioned in articles through analysis of the patterns of mentions across different sections of a scientific paper, that is, using the discourse structure of the paper. We provide an indicative case study with the help of a small data set of PubMed abstracts that have been annotated with actively mentioned pathogens.</abstract>
       <url hash="06cb3fe0">2021.alta-1.19</url>
@@ -215,9 +215,9 @@
       <author><first>Aili</first><last>Shen</last></author>
       <author><first>Hiyori</first><last>Yoshikawa</last></author>
       <author><first>Chunpeng</first><last>Ma</last></author>
-      <author><first>Daniel</first><last>Beck</last></author>
+      <author id="daniel-beck"><first>Daniel</first><last>Beck</last></author>
       <author><first>Tomoya</first><last>Iwakura</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>179–184</pages>
       <abstract>Hierarchical document categorisation is a special case of multi-label document categorisation, where there is a taxonomic hierarchy among the labels. While various approaches have been proposed for hierarchical document categorisation, there is no standard benchmark dataset, resulting in different methods being evaluated independently and there being no empirical consensus on what methods perform best. In this work, we examine different combinations of neural text encoders and hierarchical methods in an end-to-end framework, and evaluate over three datasets. We find that the performance of hierarchical document categorisation is determined not only by how the hierarchical information is modelled, but also the structure of the label hierarchy and class distribution.</abstract>
       <url hash="321d97e8">2021.alta-1.20</url>
@@ -251,7 +251,7 @@
     </paper>
     <paper id="23">
       <title>Overview of the 2021 <fixed-case>ALTA</fixed-case> Shared Task: Automatic Grading of Evidence, 10 years later</title>
-      <author><first>Diego</first><last>Mollá</last></author>
+      <author id="diego-molla"><first>Diego</first><last>Mollá</last></author>
       <pages>201–204</pages>
       <abstract>The 2021 ALTA shared task is the 12th instance of a series of shared tasks organised by ALTA since 2010. Motivated by the advances in machine learning in the last 10 years, this yearï¿½s task is a re-visit of the 2011 ALTA shared task. Set within the framework of Evidence Based Medicine (EBM), the goal is to predict the qual-ity of the clinical evidence present in a set of documents. This yearï¿½s participant results didnot improve over those of participants from 2011.</abstract>
       <url hash="5b6bab87">2021.alta-1.23</url>
diff --git a/data/xml/2021.alvr.xml b/data/xml/2021.alvr.xml
index cc50d317f1..a418218be0 100644
--- a/data/xml/2021.alvr.xml
+++ b/data/xml/2021.alvr.xml
@@ -26,8 +26,8 @@
       <author><first>Yu</first><last>Hong</last></author>
       <author><first>Yuchen</first><last>Pan</last></author>
       <author><first>Jian</first><last>Tang</last></author>
-      <author><first>Jianmin</first><last>Yao</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="jianmin-yao"><first>Jianmin</first><last>Yao</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>1–10</pages>
       <abstract>Caption translation aims to translate image annotations (captions for short). Recently, Multimodal Neural Machine Translation (MNMT) has been explored as the essential solution. Besides of linguistic features in captions, MNMT allows visual(image) features to be used. The integration of multimodal features reinforces the semantic representation and considerably improves translation performance. However, MNMT suffers from the incongruence between visual and linguistic features. To overcome the problem, we propose to extend MNMT architecture with a harmonization network, which harmonizes multimodal features(linguistic and visual features)by unidirectional modal space conversion. It enables multimodal translation to be carried out in a seemingly monomodal translation pipeline. We experiment on the golden Multi30k-16 and 17. Experimental results show that, compared to the baseline,the proposed method yields the improvements of 2.2% BLEU for the scenario of translating English captions into German (En→De) at best,7.6% for the case of English-to-French translation(En→Fr) and 1.5% for English-to-Czech(En→Cz). The utilization of harmonization network leads to the competitive performance to the-state-of-the-art.</abstract>
       <url hash="f36f2fc8">2021.alvr-1.1</url>
diff --git a/data/xml/2021.americasnlp.xml b/data/xml/2021.americasnlp.xml
index b2a41f7e9b..fe071b4bca 100644
--- a/data/xml/2021.americasnlp.xml
+++ b/data/xml/2021.americasnlp.xml
@@ -4,12 +4,12 @@
     <meta>
       <booktitle>Proceedings of the First Workshop on Natural Language Processing for Indigenous Languages of the Americas</booktitle>
       <editor><first>Manuel</first><last>Mager</last></editor>
-      <editor><first>Arturo</first><last>Oncevay</last></editor>
-      <editor><first>Annette</first><last>Rios</last></editor>
+      <editor id="arturo-oncevay"><first>Arturo</first><last>Oncevay</last></editor>
+      <editor id="annette-rios-gonzales"><first>Annette</first><last>Rios</last></editor>
       <editor><first>Ivan Vladimir Meza</first><last>Ruiz</last></editor>
       <editor><first>Alexis</first><last>Palmer</last></editor>
       <editor><first>Graham</first><last>Neubig</last></editor>
-      <editor><first>Katharina</first><last>Kann</last></editor>
+      <editor id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Online</address>
       <month>June</month>
@@ -32,7 +32,7 @@
     </paper>
     <paper id="2">
       <title>A corpus of K’iche’ annotated for morphosyntactic structure</title>
-      <author><first>Francis</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last></author>
       <author><first>Robert</first><last>Henderson</last></author>
       <pages>10–20</pages>
       <abstract>This article describes a collection of sentences in K’iche’ annotated for morphology and syntax. K’iche’ is a language in the Mayan language family, spoken in Guatemala. The annotation is done according to the guidelines of the Universal Dependencies project. The corpus consists of a total of 1,433 sentences containing approximately 10,000 tokens and is released under a free/open-source licence. We present a comparison of parsing systems for K’iche’ using this corpus and describe how it can be used for mining linguistic examples.</abstract>
@@ -43,7 +43,7 @@
     <paper id="3">
       <title>Investigating variation in written forms of <fixed-case>N</fixed-case>ahuatl using character-based language models</title>
       <author><first>Robert</first><last>Pugh</last></author>
-      <author><first>Francis</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last></author>
       <pages>21–27</pages>
       <abstract>We describe experiments with character-based language modeling for written variants of Nahuatl. Using a standard LSTM model and publicly available Bible translations, we explore how character language models can be applied to the tasks of estimating mutual intelligibility, identifying genetic similarity, and distinguishing written variants. We demonstrate that these simple language models are able to capture similarities and differences that have been described in the linguistic literature.</abstract>
       <url hash="ba4bb55f">2021.americasnlp-1.3</url>
@@ -76,7 +76,7 @@
     </paper>
     <paper id="6">
       <title>A survey of part-of-speech tagging approaches applied to K’iche’</title>
-      <author><first>Francis</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last></author>
       <author><first>Nick</first><last>Howell</last></author>
       <pages>44–52</pages>
       <abstract>We study the performance of several popular neural part-of-speech taggers from the Universal Dependencies ecosystem on Mayan languages using a small corpus of 1435 annotated K’iche’ sentences consisting of approximately 10,000 tokens, with encouraging results: <tex-math>F_1</tex-math> scores 93%+ on lemmatisation, part-of-speech and morphological feature assignment. The high performance motivates a cross-language part-of-speech tagging study, where K’iche’-trained models are evaluated on two other Mayan languages, Kaqchikel and Uspanteko: performance on Kaqchikel is good, 63-85%, and on Uspanteko modest, 60-71%. Supporting experiments lead us to conclude the relative diversity of morphological features as a plausible explanation for the limiting factors in cross-language tagging performance, providing some direction for future sentence annotation and collection work to support these and other Mayan languages.</abstract>
@@ -112,7 +112,7 @@
     <paper id="9">
       <title>A finite-state morphological analyser for <fixed-case>P</fixed-case>araguayan <fixed-case>G</fixed-case>uaraní</title>
       <author><first>Anastasia</first><last>Kuznetsova</last></author>
-      <author><first>Francis</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last></author>
       <pages>81–89</pages>
       <abstract>This article describes the development of morphological analyser for Paraguayan Guaraní, agglutinative indigenous language spoken by nearly 6 million people in South America. The implementation of our analyser uses HFST (Helsiki Finite State Technology) and two-level transducer that covers morphotactics and phonological processes occurring in Guaraní. We assess the efficacy of the approach on publicly available Wikipedia and Bible corpora and the naive coverage of analyser reaches 86% on Wikipedia and 91% on Bible corpora.</abstract>
       <url hash="d6bf1547">2021.americasnlp-1.9</url>
@@ -123,7 +123,7 @@
       <title>Morphological Segmentation for <fixed-case>S</fixed-case>eneca</title>
       <author><first>Zoey</first><last>Liu</last></author>
       <author><first>Robert</first><last>Jimerson</last></author>
-      <author><first>Emily</first><last>Prud’hommeaux</last></author>
+      <author id="emily-prudhommeaux"><first>Emily</first><last>Prud’hommeaux</last></author>
       <pages>90–101</pages>
       <abstract>This study takes up the task of low-resource morphological segmentation for Seneca, a critically endangered and morphologically complex Native American language primarily spoken in what is now New York State and Ontario. The labeled data in our experiments comes from two sources: one digitized from a publicly available grammar book and the other collected from informal sources. We treat these two sources as distinct domains and investigate different evaluation designs for model selection. The first design abides by standard practices and evaluate models with the in-domain development set, while the second one carries out evaluation using a development domain, or the out-of-domain development set. Across a series of monolingual and crosslinguistic training settings, our results demonstrate the utility of neural encoder-decoder architecture when coupled with multi-task learning.</abstract>
       <url hash="371b8485">2021.americasnlp-1.10</url>
@@ -133,7 +133,7 @@
     <paper id="11">
       <title>Representation of <fixed-case>Y</fixed-case>ine [<fixed-case>A</fixed-case>rawak] Morphology by Finite State Transducer Formalism</title>
       <author><first>Adriano</first><last>Ingunza Torres</last></author>
-      <author><first>John</first><last>Miller</last></author>
+      <author id="john-miller"><first>John</first><last>Miller</last></author>
       <author><first>Arturo</first><last>Oncevay</last></author>
       <author><first>Roberto</first><last>Zariquiey Biondi</last></author>
       <pages>102–112</pages>
@@ -166,7 +166,7 @@
       <title>Expanding <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies for Polysynthetic Languages: A Case of <fixed-case>S</fixed-case>t. <fixed-case>L</fixed-case>awrence <fixed-case>I</fixed-case>sland <fixed-case>Y</fixed-case>upik</title>
       <author><first>Hyunji Hayley</first><last>Park</last></author>
       <author><first>Lane</first><last>Schwartz</last></author>
-      <author><first>Francis</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last></author>
       <pages>131–142</pages>
       <abstract>This paper describes the development of the first Universal Dependencies (UD) treebank for St. Lawrence Island Yupik, an endangered language spoken in the Bering Strait region. While the UD guidelines provided a general framework for our annotations, language-specific decisions were made necessary by the rich morphology of the polysynthetic language. Most notably, we annotated a corpus at the morpheme level as well as the word level. The morpheme level annotation was conducted using an existing morphological analyzer and manual disambiguation. By comparing the two resulting annotation schemes, we argue that morpheme-level annotation is essential for polysynthetic languages like St. Lawrence Island Yupik. Word-level annotation results in degenerate trees for some Yupik sentences and often fails to capture syntactic relations that can be manifested at the morpheme level. Dependency parsing experiments provide further support for morpheme-level annotation. Implications for UD annotation of other polysynthetic languages are discussed.</abstract>
       <url hash="49a8ac83">2021.americasnlp-1.14</url>
@@ -219,7 +219,7 @@
     <paper id="19">
       <title>Ayuuk-<fixed-case>S</fixed-case>panish Neural Machine Translator</title>
       <author><first>Delfino</first><last>Zacarías Márquez</last></author>
-      <author><first>Ivan Vladimir</first><last>Meza Ruiz</last></author>
+      <author id="ivan-meza-ruiz"><first>Ivan Vladimir</first><last>Meza Ruiz</last></author>
       <pages>168–172</pages>
       <abstract>This paper presents the first neural machine translator system for the Ayuuk language. In our experiments we translate from Ayuuk to Spanish, and fromSpanish to Ayuuk. Ayuuk is a language spoken in the Oaxaca state of Mexico by the Ayuukjä’äy people (in Spanish commonly known as Mixes. We use different sources to create a low-resource parallel corpus, more than 6,000 phrases. For some of these resources we rely on automatic alignment. The proposed system is based on the Transformer neural architecture and it uses sub-word level tokenization as the input. We show the current performance given the resources we have collected for the San Juan Güichicovi variant, they are promising, up to 5 BLEU. We based our development on the Masakhane project for African languages.</abstract>
       <url hash="9bee916e">2021.americasnlp-1.19</url>
@@ -237,7 +237,7 @@
     </paper>
     <paper id="21">
       <title>Towards a morphological transducer and orthography converter for <fixed-case>W</fixed-case>estern <fixed-case>T</fixed-case>lacolula <fixed-case>V</fixed-case>alley <fixed-case>Z</fixed-case>apotec</title>
-      <author><first>Jonathan</first><last>Washington</last></author>
+      <author id="jonathan-washington"><first>Jonathan</first><last>Washington</last></author>
       <author><first>Felipe</first><last>Lopez</last></author>
       <author><first>Brook</first><last>Lillehaugen</last></author>
       <pages>185–193</pages>
@@ -267,7 +267,7 @@
       <author><first>Luis</first><last>Chiruzzo</last></author>
       <author><first>Gustavo</first><last>Giménez-Lugo</last></author>
       <author><first>Ricardo</first><last>Ramos</last></author>
-      <author><first>Ivan Vladimir</first><last>Meza Ruiz</last></author>
+      <author id="ivan-meza-ruiz"><first>Ivan Vladimir</first><last>Meza Ruiz</last></author>
       <author><first>Rolando</first><last>Coto-Solano</last></author>
       <author><first>Alexis</first><last>Palmer</last></author>
       <author><first>Elisabeth</first><last>Mager-Hois</last></author>
@@ -341,7 +341,7 @@
       <author><first>Héctor</first><last>Murrieta Bello</last></author>
       <author><first>Daniel</first><last>Hershcovich</last></author>
       <author><first>Miryam</first><last>de Lhoneux</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>248–254</pages>
       <abstract>We evaluated a range of neural machine translation techniques developed specifically for low-resource scenarios. Unsuccessfully. In the end, we submitted two runs: (i) a standard phrase-based model, and (ii) a random babbling baseline using character trigrams. We found that it was surprisingly hard to beat (i), in spite of this model being, in theory, a bad fit for polysynthetic languages; and more interestingly, that (ii) was better than several of the submitted systems, highlighting how difficult low-resource machine translation for polysynthetic languages is.</abstract>
       <url hash="f7694ac1">2021.americasnlp-1.28</url>
@@ -353,7 +353,7 @@
       <author><first>Raúl</first><last>Vázquez</last></author>
       <author><first>Yves</first><last>Scherrer</last></author>
       <author><first>Sami</first><last>Virpioja</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>255–264</pages>
       <abstract>The University of Helsinki participated in the AmericasNLP shared task for all ten language pairs. Our multilingual NMT models reached the first rank on all language pairs in track 1, and first rank on nine out of ten language pairs in track 2. We focused our efforts on three aspects: (1) the collection of additional data from various sources such as Bibles and political constitutions, (2) the cleaning and filtering of training data with the OpusFilter toolkit, and (3) different multilingual training techniques enabled by the latest version of the OpenNMT-py toolkit to make the most efficient use of the scarce data. This paper describes our efforts in detail.</abstract>
       <url hash="c92b8108">2021.americasnlp-1.29</url>
diff --git a/data/xml/2021.argmining.xml b/data/xml/2021.argmining.xml
index dfdd5bbcb2..398ac82cd3 100644
--- a/data/xml/2021.argmining.xml
+++ b/data/xml/2021.argmining.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2021-10-28" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 8th Workshop on Argument Mining</booktitle>
-      <editor><first>Khalid</first><last>Al-Khatib</last></editor>
+      <editor id="khalid-al-khatib"><first>Khalid</first><last>Al-Khatib</last></editor>
       <editor><first>Yufang</first><last>Hou</last></editor>
       <editor><first>Manfred</first><last>Stede</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -43,7 +43,7 @@
       <author><first>Juri</first><last>Opitz</last></author>
       <author><first>Philipp</first><last>Heinisch</last></author>
       <author><first>Philipp</first><last>Wiesenbach</last></author>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <author><first>Anette</first><last>Frank</last></author>
       <pages>24–35</pages>
       <abstract>When assessing the similarity of arguments, researchers typically use approaches that do not provide interpretable evidence or justifications for their ratings. Hence, the features that determine argument similarity remain elusive. We address this issue by introducing <i>novel argument similarity metrics</i> that aim at high performance and explainability. We show that Abstract Meaning Representation (AMR) graphs can be useful for representing arguments, and that novel AMR graph metrics can offer explanations for argument similarity ratings. We start from the hypothesis that <i>similar premises</i> often lead to <i>similar conclusions</i>—and extend an approach for <i>AMR-based argument similarity rating</i> by estimating, in addition, the similarity of <i>conclusions</i> that we automatically infer from the arguments used as premises. We show that AMR similarity metrics make argument similarity judgements more <i>interpretable</i> and may even support <i>argument quality judgements</i>. Our approach provides significant performance improvements over strong baselines in a <i>fully unsupervised</i> setting. Finally, we make first steps to address the problem of reference-less evaluation of argumentative conclusion generations.</abstract>
@@ -128,7 +128,7 @@
       <author><first>Aris</first><last>Fergadis</last></author>
       <author><first>Dimitris</first><last>Pappas</last></author>
       <author><first>Antonia</first><last>Karamolegkou</last></author>
-      <author><first>Haris</first><last>Papageorgiou</last></author>
+      <author id="harris-papageorgiou"><first>Haris</first><last>Papageorgiou</last></author>
       <pages>100–111</pages>
       <abstract>Science, technology and innovation (STI) policies have evolved in the past decade. We are now progressing towards policies that are more aligned with sustainable development through integrating social, economic and environmental dimensions. In this new policy environment, the need to keep track of innovation from its conception in Science and Research has emerged. Argumentation mining, an interdisciplinary NLP field, gives rise to the required technologies. In this study, we present the first STI-driven multidisciplinary corpus of scientific abstracts annotated for argumentative units (AUs) on the sustainable development goals (SDGs) set by the United Nations (UN). AUs are the sentences conveying the Claim(s) reported in the author’s original research and the Evidence provided for support. We also present a set of strong, BERT-based neural baselines achieving an f1-score of 70.0 for Claim and 62.4 for Evidence identification evaluated with 10-fold cross-validation. To demonstrate the effectiveness of our models, we experiment with different test sets showing comparable performance across various SDG policy domains. Our dataset and models are publicly available for research purposes.</abstract>
       <url hash="98812cd6">2021.argmining-1.10</url>
@@ -151,7 +151,7 @@
       <title>Multilingual Counter Narrative Type Classification</title>
       <author><first>Yi-Ling</first><last>Chung</last></author>
       <author><first>Marco</first><last>Guerini</last></author>
-      <author><first>Rodrigo</first><last>Agerri</last></author>
+      <author id="rodrigo-agerri"><first>Rodrigo</first><last>Agerri</last></author>
       <pages>125–132</pages>
       <abstract>The growing interest in employing counter narratives for hatred intervention brings with it a focus on dataset creation and automation strategies. In this scenario, learning to recognize counter narrative types from natural text is expected to be useful for applications such as hate speech countering, where operators from non-governmental organizations are supposed to answer to hate with several and diverse arguments that can be mined from online sources. This paper presents the first multilingual work on counter narrative type classification, evaluating SoTA pre-trained language models in monolingual, multilingual and cross-lingual settings. When considering a fine-grained annotation of counter narrative classes, we report strong baseline classification results for the majority of the counter narrative types, especially if we translate every language to English before cross-lingual prediction. This suggests that knowledge about counter narratives can be successfully transferred across languages.</abstract>
       <url hash="b15138ac">2021.argmining-1.12</url>
@@ -163,7 +163,7 @@
       <title>Predicting Moderation of Deliberative Arguments: Is Argument Quality the Key?</title>
       <author><first>Neele</first><last>Falk</last></author>
       <author><first>Iman</first><last>Jundi</last></author>
-      <author><first>Eva Maria</first><last>Vecchi</last></author>
+      <author id="eva-maria-vecchi"><first>Eva Maria</first><last>Vecchi</last></author>
       <author><first>Gabriella</first><last>Lapesa</last></author>
       <pages>133–141</pages>
       <abstract>Human moderation is commonly employed in deliberative contexts (argumentation and discussion targeting a shared decision on an issue relevant to a group, e.g., citizens arguing on how to employ a shared budget). As the scale of discussion enlarges in online settings, the overall discussion quality risks to drop and moderation becomes more important to assist participants in having a cooperative and productive interaction. The scale also makes it more important to employ NLP methods for(semi-)automatic moderation, e.g. to prioritize when moderation is most needed. In this work, we make the first steps towards (semi-)automatic moderation by using state-of-the-art classification models to predict which posts require moderation, showing that while the task is undoubtedly difficult, performance is significantly above baseline. We further investigate whether argument quality is a key indicator of the need for moderation, showing that surprisingly, high quality arguments also trigger moderation. We make our code and data publicly available.</abstract>
@@ -174,7 +174,7 @@
     <paper id="14">
       <title>Self-trained Pretrained Language Models for Evidence Detection</title>
       <author><first>Mohamed</first><last>Elaraby</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <pages>142–147</pages>
       <abstract>Argument role labeling is a fundamental task in Argument Mining research. However, such research often suffers from a lack of large-scale datasets labeled for argument roles such as evidence, which is crucial for neural model training. While large pretrained language models have somewhat alleviated the need for massive manually labeled datasets, how much these models can further benefit from self-training techniques hasn’t been widely explored in the literature in general and in Argument Mining specifically. In this work, we focus on self-trained language models (particularly BERT) for evidence detection. We provide a thorough investigation on how to utilize pseudo labels effectively in the self-training scheme. We also assess whether adding pseudo labels from an out-of-domain source can be beneficial. Experiments on sentence level evidence detection show that self-training can complement pretrained language models to provide performance improvements.</abstract>
       <url hash="359ce20b">2021.argmining-1.14</url>
@@ -184,7 +184,7 @@
     <paper id="15">
       <title>Multi-task Learning in Argument Mining for Persuasive Online Discussions</title>
       <author><first>Nhat</first><last>Tran</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <pages>148–153</pages>
       <abstract>We utilize multi-task learning to improve argument mining in persuasive online discussions, in which both micro-level and macro-level argumentation must be taken into consideration. Our models learn to identify argument components and the relations between them at the same time. We also tackle the low-precision which arises from imbalanced relation data by experimenting with SMOTE and XGBoost. Our approaches improve over baselines that use the same pre-trained language model but process the argument component task and two relation tasks separately. Furthermore, our results suggest that the tasks to be incorporated into multi-task learning should be taken into consideration as using all relevant tasks does not always lead to the best performance.</abstract>
       <url hash="470d2a1f">2021.argmining-1.15</url>
@@ -208,7 +208,7 @@
     <paper id="17">
       <title>Matching The Statements: A Simple and Accurate Model for Key Point Analysis</title>
       <author><first>Hoang</first><last>Phan</last></author>
-      <author><first>Long</first><last>Nguyen</last></author>
+      <author id="long-nguyen"><first>Long</first><last>Nguyen</last></author>
       <author><first>Long</first><last>Nguyen</last></author>
       <author><first>Khanh</first><last>Doan</last></author>
       <pages>165–174</pages>
@@ -237,7 +237,7 @@
       <author><first>Shahbaz</first><last>Syed</last></author>
       <author><first>Philipp</first><last>Heinisch</last></author>
       <author><first>Maximilian</first><last>Spliethöver</last></author>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <author><first>Martin</first><last>Potthast</last></author>
       <author><first>Henning</first><last>Wachsmuth</last></author>
       <pages>184–189</pages>
diff --git a/data/xml/2021.autosimtrans.xml b/data/xml/2021.autosimtrans.xml
index ade2f629a6..ca0063b7cb 100644
--- a/data/xml/2021.autosimtrans.xml
+++ b/data/xml/2021.autosimtrans.xml
@@ -9,7 +9,7 @@
       <editor><first>Zhongjun</first><last>He</last></editor>
       <editor><first>Qun</first><last>Liu</last></editor>
       <editor><first>Maha</first><last>Elbayad</last></editor>
-      <editor><first>Mark</first><last>Liberman</last></editor>
+      <editor id="mark-liberman"><first>Mark</first><last>Liberman</last></editor>
       <editor><first>Haifeng</first><last>Wang</last></editor>
       <editor><first>Mingbo</first><last>Ma</last></editor>
       <editor><first>Ruiqing</first><last>Zhang</last></editor>
diff --git a/data/xml/2021.bea.xml b/data/xml/2021.bea.xml
index 495e34b31b..b25511c16d 100644
--- a/data/xml/2021.bea.xml
+++ b/data/xml/2021.bea.xml
@@ -27,7 +27,7 @@
       <author><first>Mengyu</first><last>Zhang</last></author>
       <author><first>Weiqi</first><last>Wang</last></author>
       <author><first>Shuqiao</first><last>Sun</last></author>
-      <author><first>Weiwei</first><last>Sun</last></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last></author>
       <pages>1–10</pages>
       <abstract>This paper studies Negation Scope Resolution (NSR) for Chinese as a Second Language (CSL), which shows many unique characteristics that distinguish itself from “standard” Chinese. We annotate a new moderate-sized corpus that covers two background L1 languages, viz. English and Japanese. We build a neural NSR system, which achieves a new state-of-the-art accuracy on English benchmark data. We leverage this system to gauge how successful NSR for CSL can be. Different native language backgrounds of language learners result in unequal cross-lingual transfer, which has a significant impact on processing second language data. In particular, manual annotation, empirical evaluation and error analysis indicate two non-obvious facts: 1) L2-Chinese, L1-Japanese data are more difficult to analyze and thus annotate than L2-Chinese, L1-English data; 2) computational models trained on L2-Chinese, L1-Japanese data perform better than models trained on L2-Chinese, L1-English data.</abstract>
       <url hash="f3f9c66d">2021.bea-1.1</url>
@@ -46,7 +46,7 @@
     <paper id="3">
       <title>Employing distributional semantics to organize task-focused vocabulary learning</title>
       <author><first>Haemanth</first><last>Santhi Ponnusamy</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <pages>26–36</pages>
       <abstract>How can a learner systematically prepare for reading a book they are interested in? In this paper, we explore how computational linguistic methods such as distributional semantics, morphological clustering, and exercise generation can be combined with graph-based learner models to answer this question both conceptually and in practice. Based on highly structured learner models and concepts from network analysis, the learner is guided to efficiently explore the targeted lexical space. They practice using multi-gap learning activities generated from the book. In sum, the approach combines computational linguistic methods with concepts from network analysis and tutoring systems to support learners in pursuing their individual reading task goals.</abstract>
       <url hash="0d9f4c4d">2021.bea-1.3</url>
@@ -66,7 +66,7 @@
       <title>Broad Linguistic Complexity Analysis for <fixed-case>G</fixed-case>reek Readability Classification</title>
       <author><first>Savvas</first><last>Chatzipanagiotidis</last></author>
       <author><first>Maria</first><last>Giagkou</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <pages>48–58</pages>
       <abstract>This paper explores the linguistic complexity of Greek textbooks as a readability classification task. We analyze textbook corpora for different school subjects and textbooks for Greek as a Second Language, covering a very wide spectrum of school age groups and proficiency levels. A broad range of quantifiable linguistic complexity features (lexical, morphological and syntactic) are extracted and calculated. Conducting experiments with different feature subsets, we show that the different linguistic dimensions contribute orthogonal information, each contributing towards the highest result achieved using all linguistic feature subsets. A readability classifier trained on this basis reaches a classification accuracy of 88.16% for the Greek as a Second Language corpus. To investigate the generalizability of the classification models, we also perform cross-corpus evaluations. We show that the model trained on the most varied text collection (for Greek as a school subject) generalizes best. In addition to advancing the state of the art for Greek readability analysis, the paper also contributes insights on the role of different feature sets and training setups for generalizable readability classification.</abstract>
       <url hash="8904c1b6">2021.bea-1.5</url>
@@ -75,7 +75,7 @@
     <paper id="6">
       <title>Character Set Construction for <fixed-case>C</fixed-case>hinese Language Learning</title>
       <author><first>Chak Yan</first><last>Yeung</last></author>
-      <author><first>John</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
       <pages>59–63</pages>
       <abstract>To promote efficient learning of Chinese characters, pedagogical materials may present not only a single character, but a set of characters that are related in meaning and in written form. This paper investigates automatic construction of these character sets. The proposed model represents a character as averaged word vectors of common words containing the character. It then identifies sets of characters with high semantic similarity through clustering. Human evaluation shows that this representation outperforms direct use of character embeddings, and that the resulting character sets capture distinct semantic ranges.</abstract>
       <url hash="521b195f">2021.bea-1.6</url>
@@ -102,7 +102,7 @@
     <paper id="9">
       <title>Essay Quality Signals as Weak Supervision for Source-based Essay Scoring</title>
       <author><first>Haoran</first><last>Zhang</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <pages>85–96</pages>
       <abstract>Human essay grading is a laborious task that can consume much time and effort. Automated Essay Scoring (AES) has thus been proposed as a fast and effective solution to the problem of grading student writing at scale. However, because AES typically uses supervised machine learning, a human-graded essay corpus is still required to train the AES model. Unfortunately, such a graded corpus often does not exist, so creating a corpus for machine learning can also be a laborious task. This paper presents an investigation of replacing the use of human-labeled essay grades when training an AES system with two automatically available but weaker signals of essay quality: word count and topic distribution similarity. Experiments using two source-based essay scoring (evidence score) corpora show that while weak supervision does not yield a competitive result when training a neural source-based AES model, it can be used to successfully extract Topical Components (TCs) from a source text, which are required by a supervised feature-based AES model. In particular, results show that feature-based AES performance is comparable with either automatically or manually constructed TCs.</abstract>
       <url hash="bf57a2fe">2021.bea-1.9</url>
@@ -188,7 +188,7 @@
       <author><first>Manav</first><last>Rathod</last></author>
       <author><first>Tony</first><last>Tu</last></author>
       <author><first>Yunfang</first><last>Xiao</last></author>
-      <author><first>Marti A.</first><last>Hearst</last></author>
+      <author id="marti-a-hearst"><first>Marti A.</first><last>Hearst</last></author>
       <pages>158–170</pages>
       <abstract>Automated question generation has the potential to greatly aid in education applications, such as online study aids to check understanding of readings. The state-of-the-art in neural question generation has advanced greatly, due in part to the availability of large datasets of question-answer pairs. However, the questions generated are often surface-level and not challenging for a human to answer. To develop more challenging questions, we propose the novel task of cause-and-effect question generation. We build a pipeline that extracts causal relations from passages of input text, and feeds these as input to a state-of-the-art neural question generator. The extractor is based on prior work that classifies causal relations by linguistic category (Cao et al., 2016; Altenberg, 1984). This work results in a new, publicly available collection of cause-and-effect questions. We evaluate via both automatic and manual metrics and find performance improves for both question generation and question answering when we utilize a small auxiliary data source of cause-and-effect questions for fine-tuning. Our approach can be easily applied to generate cause-and-effect questions from other text collections and educational material, allowing for adaptable large-scale generation of cause-and-effect questions.</abstract>
       <url hash="4521e16a">2021.bea-1.17</url>
diff --git a/data/xml/2021.bionlp.xml b/data/xml/2021.bionlp.xml
index fd2a2ce30f..0e1ff17b55 100644
--- a/data/xml/2021.bionlp.xml
+++ b/data/xml/2021.bionlp.xml
@@ -4,9 +4,9 @@
     <meta>
       <booktitle>Proceedings of the 20th Workshop on Biomedical Language Processing</booktitle>
       <editor><first>Dina</first><last>Demner-Fushman</last></editor>
-      <editor><first>Kevin Bretonnel</first><last>Cohen</last></editor>
+      <editor id="k-bretonnel-cohen"><first>Kevin Bretonnel</first><last>Cohen</last></editor>
       <editor><first>Sophia</first><last>Ananiadou</last></editor>
-      <editor><first>Junichi</first><last>Tsujii</last></editor>
+      <editor id="junichi-tsujii"><first>Junichi</first><last>Tsujii</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Online</address>
       <month>June</month>
@@ -21,8 +21,8 @@
     <paper id="1">
       <title>Improving <fixed-case>BERT</fixed-case> Model Using Contrastive Learning for Biomedical Relation Extraction</title>
       <author><first>Peng</first><last>Su</last></author>
-      <author><first>Yifan</first><last>Peng</last></author>
-      <author><first>K.</first><last>Vijay-Shanker</last></author>
+      <author id="yifan-peng-cmu"><first>Yifan</first><last>Peng</last></author>
+      <author id="k-vijay-shanker"><first>K.</first><last>Vijay-Shanker</last></author>
       <pages>1–10</pages>
       <abstract>Contrastive learning has been used to learn a high-quality representation of the image in computer vision. However, contrastive learning is not widely utilized in natural language processing due to the lack of a general method of data augmentation for text data. In this work, we explore the method of employing contrastive learning to improve the text representation from the BERT model for relation extraction. The key knob of our framework is a unique contrastive pre-training step tailored for the relation extraction tasks by seamlessly integrating linguistic knowledge into the data augmentation. Furthermore, we investigate how large-scale data constructed from the external knowledge bases can enhance the generality of contrastive pre-training of BERT. The experimental results on three relation extraction benchmark datasets demonstrate that our method can improve the BERT model representation and achieve state-of-the-art performance. In addition, we explore the interpretability of models by showing that BERT with contrastive pre-training relies more on rationales for prediction. Our code and data are publicly available at: <url>https://github.com/AnonymousForNow</url>.</abstract>
       <url hash="49dfb9f0">2021.bionlp-1.1</url>
@@ -44,8 +44,8 @@
     <paper id="3">
       <title>Scalable Few-Shot Learning of Robust Biomedical Name Representations</title>
       <author><first>Pieter</first><last>Fivez</last></author>
-      <author><first>Simon</first><last>Suster</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="simon-suster"><first>Simon</first><last>Suster</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>23–29</pages>
       <abstract>Recent research on robust representations of biomedical names has focused on modeling large amounts of fine-grained conceptual distinctions using complex neural encoders. In this paper, we explore the opposite paradigm: training a simple encoder architecture using only small sets of names sampled from high-level biomedical concepts. Our encoder post-processes pretrained representations of biomedical names, and is effective for various types of input representations, both domain-specific or unsupervised. We validate our proposed few-shot learning approach on multiple biomedical relatedness benchmarks, and show that it allows for continual learning, where we accumulate information from various conceptual hierarchies to consistently improve encoder performance. Given these findings, we propose our approach as a low-cost alternative for exploring the impact of conceptual distinctions on robust biomedical name representations.</abstract>
       <url hash="15454166">2021.bionlp-1.3</url>
@@ -66,8 +66,8 @@
     <paper id="5">
       <title>Are we there yet? Exploring clinical domain knowledge of <fixed-case>BERT</fixed-case> models</title>
       <author><first>Madhumita</first><last>Sushil</last></author>
-      <author><first>Simon</first><last>Suster</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="simon-suster"><first>Simon</first><last>Suster</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>41–53</pages>
       <abstract>We explore whether state-of-the-art BERT models encode sufficient domain knowledge to correctly perform domain-specific inference. Although BERT implementations such as BioBERT are better at domain-based reasoning than those trained on general-domain corpora, there is still a wide margin compared to human performance on these tasks. To bridge this gap, we explore whether supplementing textual domain knowledge in the medical NLI task: a) by further language model pretraining on the medical domain corpora, b) by means of lexical match algorithms such as the BM25 algorithm, c) by supplementing lexical retrieval with dependency relations, or d) by using a trained retriever module, can push this performance closer to that of humans. We do not find any significant difference between knowledge supplemented classification as opposed to the baseline BERT models, however. This is contrary to the results for evidence retrieval on other tasks such as open domain question answering (QA). By examining the retrieval output, we show that the methods fail due to unreliable knowledge retrieval for complex domain-specific reasoning. We conclude that the task of unsupervised text retrieval to bridge the gap in existing information to facilitate inference is more complex than what the state-of-the-art methods can solve, and warrants extensive research in the future.</abstract>
       <url hash="f0859d2d">2021.bionlp-1.5</url>
@@ -181,7 +181,7 @@
       <author><first>Yannis</first><last>Katsis</last></author>
       <author><first>Tyler</first><last>Baldwin</last></author>
       <author><first>Ho-Cheol</first><last>Kim</last></author>
-      <author><first>Chun-Nan</first><last>Hsu</last></author>
+      <author id="chun-nan-hsu"><first>Chun-Nan</first><last>Hsu</last></author>
       <pages>126–130</pages>
       <abstract>NLP has emerged as an essential tool to extract knowledge from the exponentially increasing volumes of biomedical texts. Many NLP tasks, such as named entity recognition and named entity normalization, are especially challenging in the biomedical domain partly because of the prolific use of acronyms. Long names for diseases, bacteria, and chemicals are often replaced by acronyms. We propose Biomedical Local Acronym Resolver (BLAR), a high-performing acronym resolver that leverages state-of-the-art (SOTA) pre-trained language models to accurately resolve local acronyms in biomedical texts. We test BLAR on the Ab3P corpus and achieve state-of-the-art results compared to the current best-performing local acronym resolution algorithms and models.</abstract>
       <url hash="dbce5654">2021.bionlp-1.14</url>
@@ -190,7 +190,7 @@
     </paper>
     <paper id="15">
       <title>Claim Detection in Biomedical <fixed-case>T</fixed-case>witter Posts</title>
-      <author><first>Amelie</first><last>Wührl</last></author>
+      <author id="amelie-wuhrl"><first>Amelie</first><last>Wührl</last></author>
       <author><first>Roman</first><last>Klinger</last></author>
       <pages>131–142</pages>
       <abstract>Social media contains unfiltered and unique information, which is potentially of great value, but, in the case of misinformation, can also do great harm. With regards to biomedical topics, false information can be particularly dangerous. Methods of automatic fact-checking and fake news detection address this problem, but have not been applied to the biomedical domain in social media yet. We aim to fill this research gap and annotate a corpus of 1200 tweets for implicit and explicit biomedical claims (the latter also with span annotations for the claim phrase). With this corpus, which we sample to be related to COVID-19, measles, cystic fibrosis, and depression, we develop baseline models which detect tweets that contain a claim automatically. Our analyses reveal that biomedical tweets are densely populated with claims (45 % in a corpus sampled to contain 1200 tweets focused on the domains mentioned above). Baseline classification experiments with embedding-based classifiers and BERT-based transfer learning demonstrate that the detection is challenging, however, shows acceptable performance for the identification of explicit expressions of claims. Implicit claim tweets are more challenging to detect.</abstract>
@@ -234,7 +234,7 @@
     </paper>
     <paper id="19">
       <title>Word-Level Alignment of Paper Documents with their Electronic Full-Text Counterparts</title>
-      <author><first>Mark-Christoph</first><last>Müller</last></author>
+      <author id="mark-christoph-muller"><first>Mark-Christoph</first><last>Müller</last></author>
       <author><first>Sucheta</first><last>Ghosh</last></author>
       <author><first>Ulrike</first><last>Wittig</last></author>
       <author><first>Maja</first><last>Rey</last></author>
@@ -246,7 +246,7 @@
     </paper>
     <paper id="20">
       <title>Improving Biomedical Pretrained Language Models with Knowledge</title>
-      <author id="zheng-yuan"><first>Zheng</first><last>Yuan</last></author>
+      <author><first>Zheng</first><last>Yuan</last></author>
       <author><first>Yijia</first><last>Liu</last></author>
       <author><first>Chuanqi</first><last>Tan</last></author>
       <author><first>Songfang</first><last>Huang</last></author>
@@ -260,7 +260,7 @@
     <paper id="21">
       <title><fixed-case>E</fixed-case>ntity<fixed-case>BERT</fixed-case>: Entity-centric Masking Strategy for Model Pretraining for the Clinical Domain</title>
       <author><first>Chen</first><last>Lin</last></author>
-      <author><first>Timothy</first><last>Miller</last></author>
+      <author id="timothy-miller"><first>Timothy</first><last>Miller</last></author>
       <author><first>Dmitriy</first><last>Dligach</last></author>
       <author><first>Steven</first><last>Bethard</last></author>
       <author><first>Guergana</first><last>Savova</last></author>
@@ -273,8 +273,8 @@
     <paper id="22">
       <title>Contextual explanation rules for neural clinical classifiers</title>
       <author><first>Madhumita</first><last>Sushil</last></author>
-      <author><first>Simon</first><last>Suster</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="simon-suster"><first>Simon</first><last>Suster</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>202–212</pages>
       <abstract>Several previous studies on explanation for recurrent neural networks focus on approaches that find the most important input segments for a network as its explanations. In that case, the manner in which these input segments combine with each other to form an explanatory pattern remains unknown. To overcome this, some previous work tries to find patterns (called rules) in the data that explain neural outputs. However, their explanations are often insensitive to model parameters, which limits the scalability of text explanations. To overcome these limitations, we propose a pipeline to explain RNNs by means of decision lists (also called rules) over skipgrams. For evaluation of explanations, we create a synthetic sepsis-identification dataset, as well as apply our technique on additional clinical and sentiment analysis datasets. We find that our technique persistently achieves high explanation fidelity and qualitatively interpretable rules.</abstract>
       <url hash="f1f86a29">2021.bionlp-1.22</url>
@@ -283,7 +283,7 @@
     </paper>
     <paper id="23">
       <title>Exploring Word Segmentation and Medical Concept Recognition for <fixed-case>C</fixed-case>hinese Medical Texts</title>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <author><first>Yuanhe</first><last>Tian</last></author>
       <author><first>Tsung-Hui</first><last>Chang</last></author>
       <author><first>Song</first><last>Wu</last></author>
@@ -298,7 +298,7 @@
     <paper id="24">
       <title><fixed-case>B</fixed-case>io<fixed-case>M</fixed-case>-Transformers: Building Large Biomedical Language Models with <fixed-case>BERT</fixed-case>, <fixed-case>ALBERT</fixed-case> and <fixed-case>ELECTRA</fixed-case></title>
       <author><first>Sultan</first><last>Alrowili</last></author>
-      <author><first>Vijay</first><last>Shanker</last></author>
+      <author id="k-vijay-shanker"><first>Vijay</first><last>Shanker</last></author>
       <pages>221–227</pages>
       <abstract>The impact of design choices on the performance of biomedical language models recently has been a subject for investigation. In this paper, we empirically study biomedical domain adaptation with large transformer models using different design choices. We evaluate the performance of our pretrained models against other existing biomedical language models in the literature. Our results show that we achieve state-of-the-art results on several biomedical domain tasks despite using similar or less computational cost compared to other models in the literature. Our findings highlight the significant effect of design choices on improving the performance of biomedical language models.</abstract>
       <url hash="1d8c8f58">2021.bionlp-1.24</url>
@@ -330,7 +330,7 @@
       <title>Measuring the relative importance of full text sections for information retrieval from scientific literature.</title>
       <author><first>Lana</first><last>Yeganova</last></author>
       <author><first>Won Gyu</first><last>Kim</last></author>
-      <author><first>Donald</first><last>Comeau</last></author>
+      <author id="donald-c-comeau"><first>Donald</first><last>Comeau</last></author>
       <author><first>W John</first><last>Wilbur</last></author>
       <author><first>Zhiyong</first><last>Lu</last></author>
       <pages>247–256</pages>
@@ -344,10 +344,10 @@
       <author><first>Khalil</first><last>Mrini</last></author>
       <author><first>Franck</first><last>Dernoncourt</last></author>
       <author><first>Seunghyun</first><last>Yoon</last></author>
-      <author><first>Trung</first><last>Bui</last></author>
+      <author id="trung-bui"><first>Trung</first><last>Bui</last></author>
       <author><first>Walter</first><last>Chang</last></author>
       <author><first>Emilia</first><last>Farcas</last></author>
-      <author><first>Ndapa</first><last>Nakashole</last></author>
+      <author id="ndapandula-nakashole"><first>Ndapa</first><last>Nakashole</last></author>
       <pages>257–262</pages>
       <abstract>In this paper, we describe our approach to question summarization and multi-answer summarization in the context of the 2021 MEDIQA shared task (Ben Abacha et al., 2021). We propose two kinds of transfer learning for the abstractive summarization of medical questions. First, we train on HealthCareMagic, a large question summarization dataset collected from an online healthcare service platform. Second, we leverage the ability of the BART encoder-decoder architecture to model both generation and classification tasks to train on the task of Recognizing Question Entailment (RQE) in the medical domain. We show that both transfer learning methods combined achieve the highest ROUGE scores. Finally, we cast the question-driven extractive summarization of multiple relevant answer documents as an Answer Sentence Selection (AS2) problem. We show how we can preprocess the MEDIQA-AnS dataset such that it can be trained in an AS2 setting. Our AS2 model is able to generate extractive summaries achieving high ROUGE scores.</abstract>
       <url hash="5cf6ffad">2021.bionlp-1.28</url>
@@ -393,7 +393,7 @@
     </paper>
     <paper id="32">
       <title>Optum at <fixed-case>MEDIQA</fixed-case> 2021: Abstractive Summarization of Radiology Reports using simple <fixed-case>BART</fixed-case> Finetuning</title>
-      <author><first>Ravi</first><last>Kondadadi</last></author>
+      <author id="ravikumar-kondadadi"><first>Ravi</first><last>Kondadadi</last></author>
       <author><first>Sahil</first><last>Manchanda</last></author>
       <author><first>Jason</first><last>Ngo</last></author>
       <author><first>Ronan</first><last>McCormack</last></author>
@@ -418,7 +418,7 @@
       <title><fixed-case>NLM</fixed-case> at <fixed-case>MEDIQA</fixed-case> 2021: Transfer Learning-based Approaches for Consumer Question and Multi-Answer Summarization</title>
       <author><first>Shweta</first><last>Yadav</last></author>
       <author><first>Mourad</first><last>Sarrouti</last></author>
-      <author><first>Deepak</first><last>Gupta</last></author>
+      <author id="deepak-gupta"><first>Deepak</first><last>Gupta</last></author>
       <pages>291–301</pages>
       <abstract>The quest for seeking health information has swamped the web with consumers’ healthrelated questions, which makes the need for efficient and reliable question answering systems more pressing. The consumers’ questions, however, are very descriptive and contain several peripheral information (like patient’s medical history, demographic information, etc.), that are often not required for answering the question. Furthermore, it contributes to the challenges of understanding natural language questions for automatic answer retrieval. Also, it is crucial to provide the consumers with the exact and relevant answers, rather than the entire pool of answer documents to their question. One of the cardinal tasks in achieving robust consumer health question answering systems is the question summarization and multi-document answer summarization. This paper describes the participation of the U.S. National Library of Medicine (NLM) in Consumer Question and Multi-Answer Summarization tasks of the MEDIQA 2021 challenge at NAACL-BioNLP workshop. In this work, we exploited the capabilities of pre-trained transformer models and introduced a transfer learning approach for the abstractive Question Summarization and extractive Multi-Answer Summarization tasks by first pre-training our model on a task-specific summarization dataset followed by fine-tuning it for both the tasks via incorporating medical entities. We achieved the second, sixth and the fourth position for the Question Summarization task in terms ROUGE-1, ROUGE-2 and ROUGE-L scores respectively.</abstract>
       <url hash="a89f2507">2021.bionlp-1.34</url>
@@ -444,8 +444,8 @@
       <author><first>Minh-Quang</first><last>Nguyen</last></author>
       <author><first>Huy-Son</first><last>Nguyen</last></author>
       <author><first>Linh Nguyen Tran</first><last>Ngoc</last></author>
-      <author><first>Quang-Thuy</first><last>Ha</last></author>
-      <author><first>Mai-Vu</first><last>Tran</last></author>
+      <author id="quang-thuy-ha"><first>Quang-Thuy</first><last>Ha</last></author>
+      <author id="mai-vu-tran"><first>Mai-Vu</first><last>Tran</last></author>
       <pages>311–319</pages>
       <abstract>This paper describes a system developed to summarize multiple answers challenge in the MEDIQA 2021 shared task collocated with the BioNLP 2021 Workshop. We propose an extractive summarization architecture based on several scores and state-of-the-art techniques. We also present our novel prosper-thy-neighbour strategies to improve performance. Our model has been proven to be effective with the best ROUGE-1/ROUGE-L scores, being the shared task runner up by ROUGE-2 F1 score (over 13 participated teams).</abstract>
       <url hash="da0b8e70">2021.bionlp-1.36</url>
@@ -456,7 +456,7 @@
       <title><fixed-case>MNLP</fixed-case> at <fixed-case>MEDIQA</fixed-case> 2021: Fine-Tuning <fixed-case>PEGASUS</fixed-case> for Consumer Health Question Summarization</title>
       <author><first>Jooyeon</first><last>Lee</last></author>
       <author><first>Huong</first><last>Dang</last></author>
-      <author><first>Ozlem</first><last>Uzuner</last></author>
+      <author id="ozlem-uzuner"><first>Ozlem</first><last>Uzuner</last></author>
       <author><first>Sam</first><last>Henry</last></author>
       <pages>320–327</pages>
       <abstract>This paper details a Consumer Health Question (CHQ) summarization model submitted to MEDIQA 2021 for shared task 1: Question Summarization. Many CHQs are composed of multiple sentences with typos or unnecessary information, which can interfere with automated question answering systems. Question summarization mitigates this issue by removing this unnecessary information, aiding automated systems in generating a more accurate summary. Our summarization approach focuses on applying multiple pre-processing techniques, including question focus identification on the input and the development of an ensemble method to combine question focus with an abstractive summarization method. We use the state-of-art abstractive summarization model, PEGASUS (Pre-training with Extracted Gap-sentences for Abstractive Summarization), to generate abstractive summaries. Our experiments show that using our ensemble method, which combines abstractive summarization with question focus identification, improves performance over using summarization alone. Our model shows a ROUGE-2 F-measure of 11.14% against the official test dataset.</abstract>
@@ -466,7 +466,7 @@
     </paper>
     <paper id="38">
       <title><fixed-case>UET</fixed-case>fishes at <fixed-case>MEDIQA</fixed-case> 2021: Standing-on-the-Shoulders-of-Giants Model for Abstractive Multi-answer Summarization</title>
-      <author><first>Hoang-Quynh</first><last>Le</last></author>
+      <author id="hoang-quynh-le"><first>Hoang-Quynh</first><last>Le</last></author>
       <author><first>Quoc-An</first><last>Nguyen</last></author>
       <author><first>Quoc-Hung</first><last>Duong</last></author>
       <author><first>Minh-Quang</first><last>Nguyen</last></author>
diff --git a/data/xml/2021.blackboxnlp.xml b/data/xml/2021.blackboxnlp.xml
index a4ed4df989..1981c30741 100644
--- a/data/xml/2021.blackboxnlp.xml
+++ b/data/xml/2021.blackboxnlp.xml
@@ -104,7 +104,7 @@
     <paper id="7">
       <title><fixed-case>ALL</fixed-case> Dolphins Are Intelligent and <fixed-case>SOME</fixed-case> Are Friendly: Probing <fixed-case>BERT</fixed-case> for Nouns’ Semantic Properties and their Prototypicality</title>
       <author><first>Marianna</first><last>Apidianaki</last></author>
-      <author><first>Aina</first><last>Garí Soler</last></author>
+      <author id="aina-gari-soler"><first>Aina</first><last>Garí Soler</last></author>
       <pages>79–94</pages>
       <abstract>Large scale language models encode rich commonsense knowledge acquired through exposure to massive data during pre-training, but their understanding of entities and their semantic properties is unclear. We probe BERT (Devlin et al., 2019) for the properties of English nouns as expressed by adjectives that do not restrict the reference scope of the noun they modify (as in “red car”), but instead emphasise some inherent aspect (“red strawberry”). We base our study on psycholinguistics datasets that capture the association strength between nouns and their semantic features. We probe BERT using cloze tasks and in a classification setting, and show that the model has marginal knowledge of these features and their prevalence as expressed in these datasets. We discuss factors that make evaluation challenging and impede drawing general conclusions about the models’ knowledge of noun properties. Finally, we show that when tested in a fine-tuning setting addressing entailment, BERT successfully leverages the information needed for reasoning about the meaning of adjective-noun constructions outperforming previous methods.</abstract>
       <url hash="34a89e31">2021.blackboxnlp-1.7</url>
@@ -148,7 +148,7 @@
       <author><first>Bertrand</first><last>Higy</last></author>
       <author><first>Lieke</first><last>Gelderloos</last></author>
       <author><first>Afra</first><last>Alishahi</last></author>
-      <author><first>Grzegorz</first><last>Chrupała</last></author>
+      <author id="grzegorz-chrupala"><first>Grzegorz</first><last>Chrupała</last></author>
       <pages>163–176</pages>
       <abstract>The distributed and continuous representations used by neural networks are at odds with representations employed in linguistics, which are typically symbolic. Vector quantization has been proposed as a way to induce discrete neural representations that are closer in nature to their linguistic counterparts. However, it is not clear which metrics are the best-suited to analyze such discrete representations. We compare the merits of four commonly used metrics in the context of weakly supervised models of spoken language. We compare the results they show when applied to two different models, while systematically studying the effect of the placement and size of the discretization layer. We find that different evaluation regimes can give inconsistent results. While we can attribute them to the properties of the different metrics in most cases, one point of concern remains: the use of minimal pairs of phoneme triples as stimuli disadvantages larger discrete unit inventories, unlike metrics applied to complete utterances. Furthermore, while in general vector quantization induces representations that correlate with units posited in linguistics, the strength of this correlation is only moderate.</abstract>
       <url hash="c3eeac29">2021.blackboxnlp-1.11</url>
@@ -196,7 +196,7 @@
     <paper id="15">
       <title>On the Language-specificity of Multilingual <fixed-case>BERT</fixed-case> and the Impact of Fine-tuning</title>
       <author><first>Marc</first><last>Tanti</last></author>
-      <author><first>Lonneke</first><last>van der Plas</last></author>
+      <author id="lonneke-van-der-plas"><first>Lonneke</first><last>van der Plas</last></author>
       <author><first>Claudia</first><last>Borg</last></author>
       <author><first>Albert</first><last>Gatt</last></author>
       <pages>214–227</pages>
@@ -311,7 +311,7 @@
     </paper>
     <paper id="25">
       <title>What <fixed-case>BERT</fixed-case> Based Language Model Learns in Spoken Transcripts: An Empirical Study</title>
-      <author><first>Ayush</first><last>Kumar</last></author>
+      <author id="ayush-kumar"><first>Ayush</first><last>Kumar</last></author>
       <author><first>Mukuntha</first><last>Narayanan Sundararaman</last></author>
       <author><first>Jithendra</first><last>Vepa</last></author>
       <pages>322–336</pages>
@@ -389,7 +389,7 @@
       <author><first>Badr</first><last>Abdullah</last></author>
       <author><first>Iuliia</first><last>Zaitova</last></author>
       <author><first>Tania</first><last>Avgustinova</last></author>
-      <author><first>Bernd</first><last>Möbius</last></author>
+      <author id="bernd-mobius"><first>Bernd</first><last>Möbius</last></author>
       <author><first>Dietrich</first><last>Klakow</last></author>
       <pages>407–419</pages>
       <abstract>How do neural networks “perceive” speech sounds from unknown languages? Does the typological similarity between the model’s training language (L1) and an unknown language (L2) have an impact on the model representations of L2 speech signals? To answer these questions, we present a novel experimental design based on representational similarity analysis (RSA) to analyze acoustic word embeddings (AWEs)—vector representations of variable-duration spoken-word segments. First, we train monolingual AWE models on seven Indo-European languages with various degrees of typological similarity. We then employ RSA to quantify the cross-lingual similarity by simulating native and non-native spoken-word processing using AWEs. Our experiments show that typological similarity indeed affects the representational similarity of the models in our study. We further discuss the implications of our work on modeling speech processing and language similarity with neural networks.</abstract>
@@ -443,9 +443,9 @@
     </paper>
     <paper id="37">
       <title>Controlled tasks for model analysis: Retrieving discrete information from sequences</title>
-      <author><first>Ionut-Teodor</first><last>Sorodoc</last></author>
-      <author><first>Gemma</first><last>Boleda</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="ionut-sorodoc"><first>Ionut-Teodor</first><last>Sorodoc</last></author>
+      <author id="gemma-boleda"><first>Gemma</first><last>Boleda</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <pages>468–478</pages>
       <abstract>In recent years, the NLP community has shown increasing interest in analysing how deep learning models work. Given that large models trained on complex tasks are difficult to inspect, some of this work has focused on controlled tasks that emulate specific aspects of language. We propose a new set of such controlled tasks to explore a crucial aspect of natural language processing that has not received enough attention: the need to retrieve discrete information from sequences. We also study model behavior on the tasks with simple instantiations of Transformers and LSTMs. Our results highlight the beneficial role of decoder attention and its sometimes unexpected interaction with other components. Moreover, we show that, for most of the tasks, these simple models still show significant difficulties. We hope that the community will take up the analysis possibilities that our tasks afford, and that a clearer understanding of model behavior on the tasks will lead to better and more transparent models.</abstract>
       <url hash="ae95a9a1">2021.blackboxnlp-1.37</url>
@@ -477,7 +477,7 @@
       <title>Do Language Models Know the Way to <fixed-case>R</fixed-case>ome?</title>
       <author><first>Bastien</first><last>Liétard</last></author>
       <author><first>Mostafa</first><last>Abdou</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>510–517</pages>
       <abstract>The global geometry of language models is important for a range of applications, but language model probes tend to evaluate rather local relations, for which ground truths are easily obtained. In this paper we exploit the fact that in geography, ground truths are available beyond local relations. In a series of experiments, we evaluate the extent to which language model representations of city and country names are isomorphic to real-world geography, e.g., if you tell a language model where Paris and Berlin are, does it know the way to Rome? We find that language models generally encode limited geographic information, but with larger models performing the best, suggesting that geographic knowledge <i>can</i> be induced from higher-order co-occurrence statistics.</abstract>
       <url hash="abdabe9b">2021.blackboxnlp-1.40</url>
@@ -501,7 +501,7 @@
       <title>Fine-Tuned Transformers Show Clusters of Similar Representations Across Layers</title>
       <author><first>Jason</first><last>Phang</last></author>
       <author><first>Haokun</first><last>Liu</last></author>
-      <author><first>Samuel R.</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel R.</first><last>Bowman</last></author>
       <pages>529–538</pages>
       <abstract>Despite the success of fine-tuning pretrained language encoders like BERT for downstream natural language understanding (NLU) tasks, it is still poorly understood how neural networks change after fine-tuning. In this work, we use centered kernel alignment (CKA), a method for comparing learned representations, to measure the similarity of representations in task-tuned models across layers. In experiments across twelve NLU tasks, we discover a consistent block diagonal structure in the similarity of representations within fine-tuned RoBERTa and ALBERT models, with strong similarity within clusters of earlier and later layers, but not between them. The similarity of later layer representations implies that later layers only marginally contribute to task performance, and we verify in experiments that the top few layers of fine-tuned Transformers can be discarded without hurting performance, even with no further tuning.</abstract>
       <url hash="b47764c0">2021.blackboxnlp-1.42</url>
diff --git a/data/xml/2021.bppf.xml b/data/xml/2021.bppf.xml
index 89e8e57524..880ffcdacb 100644
--- a/data/xml/2021.bppf.xml
+++ b/data/xml/2021.bppf.xml
@@ -3,8 +3,8 @@
   <volume id="1" ingest-date="2021-07-25" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 1st Workshop on Benchmarking: Past, Present and Future</booktitle>
-      <editor><first>Kenneth</first><last>Church</last></editor>
-      <editor><first>Mark</first><last>Liberman</last></editor>
+      <editor id="kenneth-church"><first>Kenneth</first><last>Church</last></editor>
+      <editor id="mark-liberman"><first>Mark</first><last>Liberman</last></editor>
       <editor><first>Valia</first><last>Kordoni</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Online</address>
@@ -31,7 +31,7 @@
     <paper id="2">
       <title>Guideline Bias in <fixed-case>W</fixed-case>izard-of-<fixed-case>O</fixed-case>z Dialogues</title>
       <author><first>Victor Petrén Bach</first><last>Hansen</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>8–14</pages>
       <abstract>NLP models struggle with generalization due to sampling and annotator bias. This paper focuses on a different kind of bias that has received very little attention: guideline bias, i.e., the bias introduced by how our annotator guidelines are formulated. We examine two recently introduced dialogue datasets, CCPE-M and Taskmaster-1, both collected by trained assistants in a Wizard-of-Oz set-up. For CCPE-M, we show how a simple lexical bias for the word like in the guidelines biases the data collection. This bias, in effect, leads to poor performance on data without this bias: a preference elicitation architecture based on BERT suffers a 5.3% absolute drop in performance, when like is replaced with a synonymous phrase, and a 13.2% drop in performance when evaluated on out-of-sample data. For Taskmaster-1, we show how the order in which instructions are resented, biases the data collection.</abstract>
       <url hash="4aab095f">2021.bppf-1.2</url>
@@ -45,8 +45,8 @@
       <author><first>Tommaso</first><last>Fornaciari</last></author>
       <author><first>Dirk</first><last>Hovy</last></author>
       <author><first>Silviu</first><last>Paun</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <author><first>Alexandra</first><last>Uma</last></author>
       <pages>15–21</pages>
       <abstract>Evaluation is of paramount importance in data-driven research fields such as Natural Language Processing (NLP) and Computer Vision (CV). Current evaluation practice largely hinges on the existence of a single “ground truth” against which we can meaningfully compare the prediction of a model. However, this comparison is flawed for two reasons. 1) In many cases, more than one answer is correct. 2) Even where there is a single answer, disagreement among annotators is ubiquitous, making it difficult to decide on a gold standard. We argue that the current methods of adjudication, agreement, and evaluation need serious reconsideration. Some researchers now propose to minimize disagreement and to fix datasets. We argue that this is a gross oversimplification, and likely to conceal the underlying complexity. Instead, we suggest that we need to better capture the sources of disagreement to improve today’s evaluation practice. We discuss three sources of disagreement: from the annotator, the data, and the context, and show how this affects even seemingly objective tasks. Datasets with multiple annotations are becoming more common, as are methods to integrate disagreement into modeling. The logical next step is to extend this to evaluation.</abstract>
diff --git a/data/xml/2021.bsnlp.xml b/data/xml/2021.bsnlp.xml
index 027039a329..3f513a644f 100644
--- a/data/xml/2021.bsnlp.xml
+++ b/data/xml/2021.bsnlp.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the 8th Workshop on Balto-Slavic Natural Language Processing</booktitle>
       <editor><first>Bogdan</first><last>Babych</last></editor>
       <editor><first>Olga</first><last>Kanishcheva</last></editor>
-      <editor><first>Preslav</first><last>Nakov</last></editor>
+      <editor id="preslav-nakov"><first>Preslav</first><last>Nakov</last></editor>
       <editor><first>Jakub</first><last>Piskorski</last></editor>
       <editor><first>Lidia</first><last>Pivovarova</last></editor>
       <editor><first>Vasyl</first><last>Starko</last></editor>
@@ -47,7 +47,7 @@
     <paper id="3">
       <title>Abusive Language Recognition in <fixed-case>R</fixed-case>ussian</title>
       <author><first>Kamil</first><last>Saitov</last></author>
-      <author><first>Leon</first><last>Derczynski</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
       <pages>20–25</pages>
       <abstract>Abusive phenomena are commonplace in language on the web. The scope of recognizing abusive language is broad, covering many behaviors and forms of expression. This work addresses automatic detection of abusive language in Russian. The lexical, grammatical and morphological diversity of Russian language present potential difficulties for this task, which is addressed using a variety of machine learning approaches. Finally, competitive performance is reached over multiple domains for this investigation into automatic detection of abusive language in Russian.</abstract>
       <url hash="6f6d08eb">2021.bsnlp-1.3</url>
@@ -88,7 +88,7 @@
     <paper id="7">
       <title>Exploratory Analysis of News Sentiment Using Subgroup Discovery</title>
       <author><first>Anita</first><last>Valmarska</last></author>
-      <author><first>Luis Adrián</first><last>Cabrera-Diego</last></author>
+      <author id="luis-adrian-cabrera-diego"><first>Luis Adrián</first><last>Cabrera-Diego</last></author>
       <author><first>Elvys</first><last>Linhares Pontes</last></author>
       <author><first>Senja</first><last>Pollak</last></author>
       <pages>66–72</pages>
@@ -99,7 +99,7 @@
     <paper id="8">
       <title>Creating an Aligned <fixed-case>R</fixed-case>ussian Text Simplification Dataset from Language Learner Data</title>
       <author><first>Anna</first><last>Dmitrieva</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>73–79</pages>
       <abstract>Parallel language corpora where regular texts are aligned with their simplified versions can be used in both natural language processing and theoretical linguistic studies. They are essential for the task of automatic text simplification, but can also provide valuable insights into the characteristics that make texts more accessible and reveal strategies that human experts use to simplify texts. Today, there exist a few parallel datasets for English and Simple English, but many other languages lack such data. In this paper we describe our work on creating an aligned Russian-Simple Russian dataset composed of Russian literature texts adapted for learners of Russian as a foreign language. This will be the first parallel dataset in this domain, and one of the first Simple Russian datasets in general.</abstract>
       <url hash="426a81b6">2021.bsnlp-1.8</url>
@@ -118,7 +118,7 @@
       <title>Priberam Labs at the 3rd Shared Task on <fixed-case>S</fixed-case>lav<fixed-case>NER</fixed-case></title>
       <author><first>Pedro</first><last>Ferreira</last></author>
       <author><first>Ruben</first><last>Cardoso</last></author>
-      <author><first>Afonso</first><last>Mendes</last></author>
+      <author id="alfonso-mendes"><first>Afonso</first><last>Mendes</last></author>
       <pages>86–92</pages>
       <abstract>This document describes our participation at the 3rd Shared Task on SlavNER, part of the 8th Balto-Slavic Natural Language Processing Workshop, where we focused exclusively in the Named Entity Recognition (NER) task. We addressed this task by combining multi-lingual contextual embedding models, such as XLM-R (Conneau et al., 2020), with character- level embeddings and a biaffine classifier (Yu et al., 2020). This allowed us to train downstream models for NER using all the available training data. We are able to show that this approach results in good performance when replicating the scenario of the 2nd Shared Task.</abstract>
       <url hash="9d082e26">2021.bsnlp-1.10</url>
@@ -127,7 +127,7 @@
     <paper id="11">
       <title>Multilingual <fixed-case>S</fixed-case>lavic Named Entity Recognition</title>
       <author><first>Rinalds</first><last>Vīksna</last></author>
-      <author><first>Inguna</first><last>Skadina</last></author>
+      <author id="inguna-skadina"><first>Inguna</first><last>Skadina</last></author>
       <pages>93–97</pages>
       <abstract>Named entity recognition, in particular for morphological rich languages, is challenging task due to the richness of inflected forms and ambiguity. This challenge is being addressed by SlavNER Shared Task. In this paper we describe system submitted to this task. Our system uses pre-trained multilingual BERT Language Model and is fine-tuned for six Slavic languages of this task on texts distributed by organizers. In our experiments this multilingual NER model achieved 96 F1 score on in-domain data and an F1 score of 83 on out of domain data. Entity coreference module achieved F1 score of 47.6 as evaluated by bsnlp2021 organizers.</abstract>
       <url hash="b667ddb0">2021.bsnlp-1.11</url>
@@ -135,8 +135,8 @@
     </paper>
     <paper id="12">
       <title>Using a Frustratingly Easy Domain and Tagset Adaptation for Creating <fixed-case>S</fixed-case>lavic Named Entity Recognition Systems</title>
-      <author><first>Luis Adrián</first><last>Cabrera-Diego</last></author>
-      <author><first>Jose G.</first><last>Moreno</last></author>
+      <author id="luis-adrian-cabrera-diego"><first>Luis Adrián</first><last>Cabrera-Diego</last></author>
+      <author id="jose-g-moreno"><first>Jose G.</first><last>Moreno</last></author>
       <author><first>Antoine</first><last>Doucet</last></author>
       <pages>98–104</pages>
       <abstract>We present a collection of Named Entity Recognition (NER) systems for six Slavic languages: Bulgarian, Czech, Polish, Slovenian, Russian and Ukrainian. These NER systems have been trained using different BERT models and a Frustratingly Easy Domain Adaptation (FEDA). FEDA allow us creating NER systems using multiple datasets without having to worry about whether the tagset (e.g. Location, Event, Miscellaneous, Time) in the source and target domains match, while increasing the amount of data available for training. Moreover, we boosted the prediction on named entities by marking uppercase words and predicting masked words. Participating in the 3rd Shared Task on SlavNER, our NER systems reached a strict match micro F-score of up to 0.908. The results demonstrate good generalization, even in named entities with weak regularity, such as book titles, or entities that were never seen during the training.</abstract>
diff --git a/data/xml/2021.bucc.xml b/data/xml/2021.bucc.xml
index 25615bdea7..658d11c02a 100644
--- a/data/xml/2021.bucc.xml
+++ b/data/xml/2021.bucc.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the 14th Workshop on Building and Using Comparable Corpora (BUCC 2021)</booktitle>
       <editor><first>Reinhard</first><last>Rapp</last></editor>
       <editor><first>Serge</first><last>Sharoff</last></editor>
-      <editor><first>Pierre</first><last>Zweigenbaum</last></editor>
+      <editor id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></editor>
       <publisher>INCOMA Ltd.</publisher>
       <address>Online (Virtual Mode)</address>
       <month>September</month>
@@ -18,7 +18,7 @@
     </frontmatter>
     <paper id="1">
       <title>Invited Presentation</title>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>1</pages>
       <abstract>AI now and in future will have to grapple continuously with the problem of low resource. AI will increasingly be ML intensive. But ML needs data often with annotation. However, annotation is costly. Over the years, through work on multiple problems, we have developed insight into how to do language processing in low resource setting. Following 6 methods—individually and in combination—seem to be the way forward: 1) Artificially augment resource (e.g. subwords) 2) Cooperative NLP (e.g., pivot in MT) 3) Linguistic embellishment (e.g. factor based MT, source reordering) 4) Joint Modeling (e.g., Coref and NER, Sentiment and Emotion: each task helping the other to either boost accuracy or reduce resource requirement) 5) Multimodality (e.g., eye tracking based NLP, also picture+text+speech based Sentiment Analysis) 6)Cross Lingual Embedding (e.g., embedding from multiple languages helping MT, close to 2 above) The present talk will focus on low resource machine translation. We describe the use of techniques from the above list and bring home the seriousness and methodology of doing Machine Translation in low resource settings.</abstract>
       <url hash="60f94858">2021.bucc-1.1</url>
@@ -72,7 +72,7 @@
       <author><first>Jeremias</first><last>Bohn</last></author>
       <author><first>Jannik</first><last>Fischbach</last></author>
       <author><first>Martin</first><last>Schmitt</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <author><first>Andreas</first><last>Vogelsang</last></author>
       <pages>40–45</pages>
       <abstract>Creating datasets manually by human annotators is a laborious task that can lead to biased and inhomogeneous labels. We propose a flexible, semi-automatic framework for labeling data for relation extraction. Furthermore, we provide a dataset of preprocessed sentences from the requirements engineering domain, including a set of automatically created as well as hand-crafted labels. In our case study, we compare the human and automatic labels and show that there is a substantial overlap between both annotations.</abstract>
@@ -82,7 +82,7 @@
     <paper id="7">
       <title>Majority Voting with Bidirectional Pre-translation For Bitext Retrieval</title>
       <author><first>Alexander</first><last>Jones</last></author>
-      <author><first>Derry Tanti</first><last>Wijaya</last></author>
+      <author id="derry-tanti-wijaya"><first>Derry Tanti</first><last>Wijaya</last></author>
       <pages>46–59</pages>
       <abstract>Obtaining high-quality parallel corpora is of paramount importance for training NMT systems. However, as many language pairs lack adequate gold-standard training data, a popular approach has been to mine so-called “pseudo-parallel” sentences from paired documents in two languages. In this paper, we outline some drawbacks with current methods that rely on an embedding similarity threshold, and propose a heuristic method in its place. Our method involves translating both halves of a paired corpus before mining, and then performing a majority vote on sentence pairs mined in three ways: after translating documents in language x to language y, after translating language y to x, and using the original documents in languages x and y. We demonstrate success with this novel approach on the Tatoeba similarity search benchmark in 64 low-resource languages, and on NMT in Kazakh and Gujarati. We also uncover the effect of resource-related factors (i.e. how much monolingual/bilingual data is available for a given language) on the optimal choice of bitext mining method, demonstrating that there is currently no one-size-fits-all approach for this task. We make the code and data used in our experiments publicly available.</abstract>
       <url hash="9e9ffbc4">2021.bucc-1.7</url>
diff --git a/data/xml/2021.calcs.xml b/data/xml/2021.calcs.xml
index 8d1ec93b0d..fd0cb45c4c 100644
--- a/data/xml/2021.calcs.xml
+++ b/data/xml/2021.calcs.xml
@@ -5,8 +5,8 @@
       <booktitle>Proceedings of the Fifth Workshop on Computational Approaches to Linguistic Code-Switching</booktitle>
       <editor><first>Thamar</first><last>Solorio</last></editor>
       <editor><first>Shuguang</first><last>Chen</last></editor>
-      <editor><first>Alan W.</first><last>Black</last></editor>
-      <editor><first>Mona</first><last>Diab</last></editor>
+      <editor id="alan-w-black"><first>Alan W.</first><last>Black</last></editor>
+      <editor id="mona-diab"><first>Mona</first><last>Diab</last></editor>
       <editor><first>Sunayana</first><last>Sitaram</last></editor>
       <editor><first>Victor</first><last>Soto</last></editor>
       <editor><first>Emre</first><last>Yilmaz</last></editor>
@@ -36,7 +36,7 @@
     <paper id="2">
       <title>Challenges and Limitations with the Metrics Measuring the Complexity of Code-Mixed Text</title>
       <author><first>Vivek</first><last>Srivastava</last></author>
-      <author id="mayank-singh"><first>Mayank</first><last>Singh</last></author>
+      <author><first>Mayank</first><last>Singh</last></author>
       <pages>6–14</pages>
       <abstract>Code-mixing is a frequent communication style among multilingual speakers where they mix words and phrases from two different languages in the same utterance of text or speech. Identifying and filtering code-mixed text is a challenging task due to its co-existence with monolingual and noisy text. Over the years, several code-mixing metrics have been extensively used to identify and validate code-mixed text quality. This paper demonstrates several inherent limitations of code-mixing metrics with examples from the already existing datasets that are popularly used across various experiments.</abstract>
       <url hash="0bede426">2021.calcs-1.2</url>
@@ -47,7 +47,7 @@
       <title>Translate and Classify: Improving Sequence Level Classification for <fixed-case>E</fixed-case>nglish-<fixed-case>H</fixed-case>indi Code-Mixed Data</title>
       <author><first>Devansh</first><last>Gautam</last></author>
       <author><first>Kshitij</first><last>Gupta</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>15–25</pages>
       <abstract>Code-mixing is a common phenomenon in multilingual societies around the world and is especially common in social media texts. Traditional NLP systems, usually trained on monolingual corpora, do not perform well on code-mixed texts. Training specialized models for code-switched texts is difficult due to the lack of large-scale datasets. Translating code-mixed data into standard languages like English could improve performance on various code-mixed tasks since we can use transfer learning from state-of-the-art English models for processing the translated data. This paper focuses on two sequence-level classification tasks for English-Hindi code mixed texts, which are part of the GLUECoS benchmark - Natural Language Inference and Sentiment Analysis. We propose using various pre-trained models that have been fine-tuned for similar English-only tasks and have shown state-of-the-art performance. We further fine-tune these models on the translated code-mixed datasets and achieve state-of-the-art performance in both tasks. To translate English-Hindi code-mixed data to English, we use mBART, a pre-trained multilingual sequence-to-sequence model that has shown competitive performance on various low-resource machine translation pairs and has also shown performance gains in languages that were not in its pre-training corpus.</abstract>
       <url hash="43f455d9">2021.calcs-1.3</url>
@@ -69,7 +69,7 @@
       <author><first>Ramakrishna</first><last>Appicharla</last></author>
       <author><first>Kamal Kumar</first><last>Gupta</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>31–35</pages>
       <abstract>This paper describes the system submitted by IITP-MT team to Computational Approaches to Linguistic Code-Switching (CALCS 2021) shared task on MT for English→Hinglish. We submit a neural machine translation (NMT) system which is trained on the synthetic code-mixed (cm) English-Hinglish parallel corpus. We propose an approach to create code-mixed parallel corpus from a clean parallel corpus in an unsupervised manner. It is an alignment based approach and we do not use any linguistic resources for explicitly marking any token for code-switching. We also train NMT model on the gold corpus provided by the workshop organizers augmented with the generated synthetic code-mixed parallel corpus. The model trained over the generated synthetic cm data achieves 10.09 BLEU points over the given test set.</abstract>
       <url hash="2ded8ad2">2021.calcs-1.5</url>
@@ -94,7 +94,7 @@
       <author><first>Prashant</first><last>Kodali</last></author>
       <author><first>Kshitij</first><last>Gupta</last></author>
       <author><first>Anmol</first><last>Goel</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <author><first>Ponnurangam</first><last>Kumaraguru</last></author>
       <pages>47–55</pages>
       <abstract>Code-mixed languages are very popular in multilingual societies around the world, yet the resources lag behind to enable robust systems on such languages. A major contributing factor is the informal nature of these languages which makes it difficult to collect code-mixed data. In this paper, we propose our system for Task 1 of CACLS 2021 to generate a machine translation system for English to Hinglish in a supervised setting. Translating in the given direction can help expand the set of resources for several tasks by translating valuable datasets from high resource languages. We propose to use mBART, a pre-trained multilingual sequence-to-sequence model, and fully utilize the pre-training of the model by transliterating the roman Hindi words in the code-mixed sentences to Devanagri script. We evaluate how expanding the input by concatenating Hindi translations of the English sentences improves mBART’s performance. Our system gives a BLEU score of 12.22 on test set. Further, we perform a detailed error analysis of our proposed systems and explore the limitations of the provided dataset and metrics.</abstract>
@@ -128,7 +128,7 @@
     <paper id="10">
       <title>A Language-aware Approach to Code-switched Morphological Tagging</title>
       <author><first>Şaziye Betül</first><last>Özateş</last></author>
-      <author><first>Özlem</first><last>Çetinoğlu</last></author>
+      <author id="ozlem-cetinoglu"><first>Özlem</first><last>Çetinoğlu</last></author>
       <pages>72–83</pages>
       <abstract>Morphological tagging of code-switching (CS) data becomes more challenging especially when language pairs composing the CS data have different morphological representations. In this paper, we explore a number of ways of implementing a language-aware morphological tagging method and present our approach for integrating language IDs into a transformer-based framework for CS morphological tagging. We perform our set of experiments on the Turkish-German SAGT Treebank. Experimental results show that including language IDs to the learning model significantly improves accuracy over other approaches.</abstract>
       <url hash="8a6bdcaa">2021.calcs-1.10</url>
@@ -223,7 +223,7 @@
     <paper id="19">
       <title>Code-Mixing on Sesame Street: Dawn of the Adversarial Polyglots</title>
       <author><first>Samson</first><last>Tan</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <pages>141</pages>
       <abstract>Multilingual models have demonstrated impressive cross-lingual transfer performance. However, test sets like XNLI are monolingual at the example level. In multilingual communities, it is common for polyglots to code-mix when conversing with each other. Inspired by this phenomenon, we present two strong black-box adversarial attacks (one word-level, one phrase-level) for multilingual models that push their ability to handle code-mixed sentences to the limit. The former (PolyGloss) uses bilingual dictionaries to propose perturbations and translations of the clean example for sense disambiguation. The latter (Bumblebee) directly aligns the clean example with its translations before extracting phrases as perturbations. Bumblebee has a success rate of 89.75% against XLM-R-large, bringing its average accuracy of 79.85 down to 8.18 on XNLI. Finally, we propose an efficient adversarial training scheme, Code-mixed Adversarial Training (CAT), that trains in the same number of steps as the original model. Even after controlling for the extra training data introduced, CAT improves model accuracy when the model is prevented from relying on lexical overlaps (+3.45), with a negligible drop (-0.15 points) in performance on the original XNLI test set. t-SNE visualizations reveal that CAT improves a model’s language agnosticity. This paper will be published in the proceedings of NAACL-HLT 2021.</abstract>
       <url hash="85f19b06">2021.calcs-1.19</url>
@@ -233,7 +233,7 @@
     </paper>
     <paper id="20">
       <title>Are Multilingual Models Effective in Code-Switching?</title>
-      <author><first>Genta Indra</first><last>Winata</last></author>
+      <author id="genta-indra-winata"><first>Genta Indra</first><last>Winata</last></author>
       <author><first>Samuel</first><last>Cahyawijaya</last></author>
       <author><first>Zihan</first><last>Liu</last></author>
       <author><first>Zhaojiang</first><last>Lin</last></author>
diff --git a/data/xml/2021.case.xml b/data/xml/2021.case.xml
index fce6531c62..debab3acd7 100644
--- a/data/xml/2021.case.xml
+++ b/data/xml/2021.case.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2021-07-25" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 4th Workshop on Challenges and Applications of Automated Extraction of Socio-political Events from Text (CASE 2021)</booktitle>
-      <editor><first>Ali</first><last>Hürriyetoğlu</last></editor>
+      <editor id="ali-hurriyetoglu"><first>Ali</first><last>Hürriyetoğlu</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Online</address>
       <month>August</month>
@@ -18,7 +18,7 @@
     <paper id="1">
       <title>Challenges and Applications of Automated Extraction of Socio-political Events from Text (<fixed-case>CASE</fixed-case> 2021): Workshop and Shared Task Report</title>
       <author><first>Ali</first><last>Hürriyetoğlu</last></author>
-      <author><first>Hristo</first><last>Tanev</last></author>
+      <author id="hristo-tanev"><first>Hristo</first><last>Tanev</last></author>
       <author><first>Vanni</first><last>Zavarella</last></author>
       <author><first>Jakub</first><last>Piskorski</last></author>
       <author><first>Reyyan</first><last>Yeniterzi</last></author>
@@ -79,7 +79,7 @@
       <author><first>Liane</first><last>Guillou</last></author>
       <author><first>Miloš</first><last>Stanojević</last></author>
       <author><first>Nick</first><last>McKenna</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>31–42</pages>
       <abstract>Language provides speakers with a rich system of modality for expressing thoughts about events, without being committed to their actual occurrence. Modality is commonly used in the political news domain, where both actual and possible courses of events are discussed. NLP systems struggle with these semantic phenomena, often incorrectly extracting events which did not happen, which can lead to issues in downstream applications. We present an open-domain, lexicon-based event extraction system that captures various types of modality. This information is valuable for Question Answering, Knowledge Graph construction and Fact-checking tasks, and our evaluation shows that the system is sufficiently strong to be used in downstream applications.</abstract>
       <url hash="68080e57">2021.case-1.6</url>
@@ -90,7 +90,7 @@
     <paper id="7">
       <title>Characterizing News Portrayal of Civil Unrest in <fixed-case>H</fixed-case>ong <fixed-case>K</fixed-case>ong, 1998–2020</title>
       <author><first>James</first><last>Scharf</last></author>
-      <author><first>Arya D.</first><last>McCarthy</last></author>
+      <author id="arya-d-mccarthy"><first>Arya D.</first><last>McCarthy</last></author>
       <author><first>Giovanna Maria Dora</first><last>Dore</last></author>
       <pages>43–52</pages>
       <abstract>We apply statistical techniques from natural language processing to a collection of Western and Hong Kong–based English-language newspaper articles spanning the years 1998–2020, studying the difference and evolution of its portrayal. We observe that both content and attitudes differ between Western and Hong Kong–based sources. ANOVA on keyword frequencies reveals that Hong Kong–based papers discuss protests and democracy less often. Topic modeling detects salient aspects of protests and shows that Hong Kong–based papers made fewer references to police violence during the Anti–Extradition Law Amendment Bill Movement. Diachronic shifts in word embedding neighborhoods reveal a shift in the characterization of salient keywords once the Movement emerged. Together, these raise questions about the existence of anodyne reporting from Hong Kong–based media. Likewise, they illustrate the importance of sample selection for protest event analysis.</abstract>
@@ -114,7 +114,7 @@
       <author><first>Swapnil</first><last>Hingmire</last></author>
       <author><first>Sangameshwar</first><last>Patil</last></author>
       <author><first>Alok</first><last>Kumar</last></author>
-      <author><first>Girish</first><last>Palshikar</last></author>
+      <author id="girish-palshikar"><first>Girish</first><last>Palshikar</last></author>
       <pages>58–67</pages>
       <abstract>Incidents in industries have huge social and political impact and minimizing the consequent damage has been a high priority. However, automated analysis of repositories of incident reports has remained a challenge. In this paper, we focus on automatically extracting events from incident reports. Due to absence of event annotated datasets for industrial incidents we employ a transfer learning based approach which is shown to outperform several baselines. We further provide detailed analysis regarding effect of increase in pre-training data and provide explainability of why pre-training improves the performance.</abstract>
       <url hash="1ce6ac61">2021.case-1.9</url>
@@ -174,8 +174,8 @@
     <paper id="14">
       <title><fixed-case>NUS</fixed-case>-<fixed-case>IDS</fixed-case> at <fixed-case>CASE</fixed-case> 2021 Task 1: Improving Multilingual Event Sentence Coreference Identification With Linguistic Information</title>
       <author><first>Fiona Anting</first><last>Tan</last></author>
-      <author><first>Sujatha Das</first><last>Gollapalli</last></author>
-      <author><first>See-Kiong</first><last>Ng</last></author>
+      <author id="sujatha-das-gollapalli"><first>Sujatha Das</first><last>Gollapalli</last></author>
+      <author id="see-kiong-ng"><first>See-Kiong</first><last>Ng</last></author>
       <pages>105–112</pages>
       <abstract>Event Sentence Coreference Identification (ESCI) aims to cluster event sentences that refer to the same event together for information extraction. We describe our ESCI solution developed for the ACL-CASE 2021 shared tasks on the detection and classification of socio-political and crisis event information in a multilingual setting. For a given article, our proposed pipeline comprises of an accurate sentence pair classifier that identifies coreferent sentence pairs and subsequently uses these predicted probabilities to cluster sentences into groups. Sentence pair representations are constructed from fine-tuned BERT embeddings plus POS embeddings fed through a BiLSTM model, and combined with linguistic-based lexical and semantic similarities between sentences. Our best models ranked 2nd, 1st and 2nd and obtained CoNLL F1 scores of 81.20%, 93.03%, 83.15% for the English, Portuguese and Spanish test sets respectively in the ACL-CASE 2021 competition.</abstract>
       <url hash="39a44101">2021.case-1.14</url>
@@ -223,7 +223,7 @@
       <author><first>Parul</first><last>Awasthy</last></author>
       <author><first>Jian</first><last>Ni</last></author>
       <author><first>Ken</first><last>Barker</last></author>
-      <author><first>Radu</first><last>Florian</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
       <pages>138–146</pages>
       <abstract>In this paper, we present the event detection models and systems we have developed for Multilingual Protest News Detection - Shared Task 1 at CASE 2021. The shared task has 4 subtasks which cover event detection at different granularity levels (from document level to token level) and across multiple languages (English, Hindi, Portuguese and Spanish). To handle data from multiple languages, we use a multilingual transformer-based language model (XLM-R) as the input text encoder. We apply a variety of techniques and build several transformer-based models that perform consistently well across all the subtasks and languages. Our systems achieve an average F_1 score of 81.2. Out of thirteen subtask-language tracks, our submissions rank 1st in nine and 2nd in four tracks.</abstract>
       <url hash="bf723387">2021.case-1.18</url>
@@ -292,7 +292,7 @@
       <author><first>Ken</first><last>Barker</last></author>
       <author><first>Parul</first><last>Awasthy</last></author>
       <author><first>Jian</first><last>Ni</last></author>
-      <author><first>Radu</first><last>Florian</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
       <pages>193–202</pages>
       <abstract>Supervised models can achieve very high accuracy for fine-grained text classification. In practice, however, training data may be abundant for some types but scarce or even non-existent for others. We propose a hybrid architecture that uses as much labeled data as available for fine-tuning classification models, while also allowing for types with little (few-shot) or no (zero-shot) labeled data. In particular, we pair a supervised text classification model with a Natural Language Inference (NLI) reranking model. The NLI reranker uses a textual representation of target types that allows it to score the strength with which a type is implied by a text, without requiring training data for the types. Experiments show that the NLI model is very sensitive to the choice of textual representation, but can be effective for classifying unseen types. It can also improve classification accuracy for the known types of an already highly accurate supervised model.</abstract>
       <url hash="5e9a1b29">2021.case-1.24</url>
@@ -324,7 +324,7 @@
       <title>Discovering Black Lives Matter Events in the <fixed-case>U</fixed-case>nited <fixed-case>S</fixed-case>tates: Shared Task 3, <fixed-case>CASE</fixed-case> 2021</title>
       <author><first>Salvatore</first><last>Giorgi</last></author>
       <author><first>Vanni</first><last>Zavarella</last></author>
-      <author><first>Hristo</first><last>Tanev</last></author>
+      <author id="hristo-tanev"><first>Hristo</first><last>Tanev</last></author>
       <author><first>Nicolas</first><last>Stefanovitch</last></author>
       <author><first>Sy</first><last>Hwang</last></author>
       <author><first>Hansi</first><last>Hettiarachchi</last></author>
diff --git a/data/xml/2021.ccl.xml b/data/xml/2021.ccl.xml
index 3e5ad8ea50..0757d4e0af 100644
--- a/data/xml/2021.ccl.xml
+++ b/data/xml/2021.ccl.xml
@@ -24,8 +24,8 @@
     <paper id="1">
       <title>融合零指代识别的篇章级机器翻译(Context-aware Machine Translation Integrating Zero Pronoun Recognition)</title>
       <author><first>Hao</first><last>Wang</last><variant script="hani"><first>浩</first><last>汪</last></variant></author>
-      <author><first>Junhui</first><last>Li</last><variant script="hani"><first>军辉</first><last>李</last></variant></author>
-      <author><first>Zhengxian</first><last>Gong</last><variant script="hani"><first>正仙</first><last>贡</last></variant></author>
+      <author id="junhui-li"><first>Junhui</first><last>Li</last><variant script="hani"><first>军辉</first><last>李</last></variant></author>
+      <author id="zhengxian-gong"><first>Zhengxian</first><last>Gong</last><variant script="hani"><first>正仙</first><last>贡</last></variant></author>
       <pages>1–12</pages>
       <abstract>在汉语等其他有省略代词习惯的语言中,通常会删掉可从上下文信息推断出的代词。尽管以Transformer为代表的的神经机器翻译模型取得了巨大的成功,但这种省略现象依旧对神经机器翻译模型造成了很大的挑战。本文在Transformer基础上提出了一个融合零指代识别的翻译模型,并引入篇章上下文来丰富指代信息。具体地,该模型采用联合学习的框架,在翻译模型基础上,联合了一个分类任务,即判别句子中省略代词在句子所表示的成分,使得模型能够融合零指代信息辅助翻译。通过在中英对话数据集上的实验,验证了本文提出方法的有效性,与基准模型相比,翻译性能提升了1.48个BLEU值。</abstract>
       <url hash="ba414cb1">2021.ccl-1.1</url>
@@ -37,7 +37,7 @@
       <author><first>Wei</first><last>Hu</last><variant script="hani"><first>纬</first><last>胡</last></variant></author>
       <author><first>Maoxi</first><last>Li</last><variant script="hani"><first>茂西</first><last>李</last></variant></author>
       <author><first>Bailian</first><last>Qiu</last><variant script="hani"><first>白莲</first><last>裘</last></variant></author>
-      <author><first>Mingwen</first><last>Wang</last><variant script="hani"><first>明文</first><last>王</last></variant></author>
+      <author id="mingwen-wang"><first>Mingwen</first><last>Wang</last><variant script="hani"><first>明文</first><last>王</last></variant></author>
       <pages>13–22</pages>
       <abstract>机器译文自动评价对机器翻译的发展和应用起着重要的促进作用,它一般通过计算机器译文和人工参考译文的相似度来度量机器译文的质量。该文通过跨语种预训练语言模型XLM将源语言句子、机器译文和人工参考译文映射到相同的语义空间,结合分层注意力和内部注意力提取源语言句子与机器译文、机器译文与人工参考译文以及源语言句子与人工参考译文之间差异特征,并将其融入到基于Bi-LSTM神经译文自动评价方法中。在WMT’19译文自动评价数据集上的实验结果表明,融合XLM词语表示的神经机器译文自动评价方法显著提高了其与人工评价的相关性。</abstract>
       <url hash="f279b471">2021.ccl-1.2</url>
@@ -47,7 +47,7 @@
     <paper id="3">
       <title>利用语义关联增强的跨语言预训练模型的译文质量评估(A Cross-language Pre-trained Model with Enhanced Semantic Connection for <fixed-case>MT</fixed-case> Quality Estimation)</title>
       <author><first>Heng</first><last>Ye</last><variant script="hani"><first>恒</first><last>叶</last></variant></author>
-      <author><first>Zhengxian</first><last>Gong</last><variant script="hani"><first>正仙</first><last>贡</last></variant></author>
+      <author id="zhengxian-gong"><first>Zhengxian</first><last>Gong</last><variant script="hani"><first>正仙</first><last>贡</last></variant></author>
       <pages>23–34</pages>
       <abstract>机器翻译质量评估(QE)虽然不需要参考译文就能进行自动评估,但它需要人工标注的评估数据进行训练。基于神经网络框架的QE为了克服人工评估数据的稀缺问题,通常包括两个阶段,首先借助大规模的平行语料学习双语对齐,然后在小规模评估数据集上进行评估建模。跨语言预训练模型可以用来代替该任务第一阶段的学习过程,因此本文首先建议一个基于XLM-R的为源/目标语言统一编码的QE模型。其次,由于大多数预训练模型是在多语言的单语数据集上构建的,因此两两语言对的语义关联能力相对较弱。为了能使跨语言预训练模型更好地适应QE任务,本文提出用三种预训练策略来增强预训练模型的跨语言语义关联能力。本文的方法在WMT2017和WMT2019英德评估数据集上都达到了最高性能。</abstract>
       <url hash="5703e6fa">2021.ccl-1.3</url>
@@ -103,7 +103,7 @@
     <paper id="8">
       <title>基于双编码器的医学文本中文分词(<fixed-case>C</fixed-case>hinese word segmentation of medical text based on dual-encoder)</title>
       <author><first>Yuan</first><last>Zong</last><variant script="hani"><first>源</first><last>宗</last></variant></author>
-      <author><first>Baobao</first><last>Chang</last><variant script="hani"><first>宝宝</first><last>常</last></variant></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last><variant script="hani"><first>宝宝</first><last>常</last></variant></author>
       <pages>76–85</pages>
       <abstract>中文分词是自然语言处理领域的基础工作,然而前人的医学文本分词工作都只是直接套用通用分词的方法,而医学文本多专用术语的特点让分词系统需要对医学专用术语和医学文本中的非医学术语文本提供不同的分词粒度。本文提出了双编码器医学文本中文分词模型,利用辅助编码器为医学专有术语提供粗粒度表示。模型将需要粗粒度分词的医学专用术语和需要通用分词粒度的文本分开,在提升医学专用术语的分词能力的同时最大限度地避免了其粗粒度对于医学文本中通用文本分词的干扰。</abstract>
       <url hash="fa13de20">2021.ccl-1.8</url>
@@ -433,8 +433,8 @@
     <paper id="35">
       <title>基于序列到序列的中文<fixed-case>AMR</fixed-case>解析(<fixed-case>C</fixed-case>hinese <fixed-case>AMR</fixed-case> Parsing based on Sequence-to-Sequence Modeling)</title>
       <author><first>Ziyi</first><last>Huang</last><variant script="hani"><first>子怡</first><last>黄</last></variant></author>
-      <author><first>Junhui</first><last>Li</last><variant script="hani"><first>军辉</first><last>李</last></variant></author>
-      <author><first>Zhengxian</first><last>Gong</last><variant script="hani"><first>正仙</first><last>贡</last></variant></author>
+      <author id="junhui-li"><first>Junhui</first><last>Li</last><variant script="hani"><first>军辉</first><last>李</last></variant></author>
+      <author id="zhengxian-gong"><first>Zhengxian</first><last>Gong</last><variant script="hani"><first>正仙</first><last>贡</last></variant></author>
       <pages>374–385</pages>
       <abstract>抽象语义表示(Abstract Meaning Representation,简称AMR)是将给定的文本的语义特征抽象成一个单根的有向无环图。AMR语义解析则是根据输入的文本获取对应的AMR图。相比于英文AMR,中文AMR的研究起步较晚,造成针对中文的AMR语义解析相关研究较少。本文针对公开的中文AMR语料库CAMR1.0,采用序列到序列的方法进行中文AMR语义解析的相关研究。具体地,首先基于Transformer模型实现一个适用于中文的序列到序列AMR语义解析系统;然后,探索并比较了不同预训练模型在中文AMR语义解析中的应用。基于该语料,本文中文AMR语义解析方法最优性能达到了70.29的Smatch F1值。本文是第一次在该数据集上报告实验结果。</abstract>
       <url hash="05969459">2021.ccl-1.35</url>
@@ -487,7 +487,7 @@
     <paper id="40">
       <title>基于自动识别的委婉语历时性发展变化与社会共变研究(A Study on the Diachronic Development and Social Covariance of Euphemism Based on Automatic Recognition)</title>
       <author><first>Chenlin</first><last>Zhang</last><variant script="hani"><first>辰麟</first><last>张</last></variant></author>
-      <author><first>Mingwen</first><last>Wang</last><variant script="hani"><first>明文</first><last>王</last></variant></author>
+      <author id="mingwen-wang"><first>Mingwen</first><last>Wang</last><variant script="hani"><first>明文</first><last>王</last></variant></author>
       <author><first>Yiming</first><last>Tan</last><variant script="hani"><first>亦鸣</first><last>谭</last></variant></author>
       <author><first>Ming</first><last>Yin</last><variant script="hani"><first>明</first><last>尹</last></variant></author>
       <author><first>Xinyi</first><last>Zhang</last><variant script="hani"><first>心怡</first><last>张</last></variant></author>
@@ -614,7 +614,7 @@
       <author><first>Jishun</first><last>Zhao</last><variant script="hani"><first>继舜</first><last>赵</last></variant></author>
       <author><first>Bingjie</first><last>Du</last><variant script="hani"><first>冰洁</first><last>杜</last></variant></author>
       <author><first>Shucheng</first><last>Zhu</last><variant script="hani"><first>述承</first><last>朱</last></variant></author>
-      <author><first>Pengyuan</first><last>Liu</last><variant script="hani"><first>鹏远</first><last>刘</last></variant></author>
+      <author id="pengyuan-liu"><first>Pengyuan</first><last>Liu</last><variant script="hani"><first>鹏远</first><last>刘</last></variant></author>
       <pages>564–575</pages>
       <abstract>自然语言处理领域各项任务中,模型广泛存在性别偏见。然而当前尚无中文性别偏见评估和消偏的相关数据集,因此无法对中文自然语言处理模型中的性别偏见进行评估。首先本文根据16对性别称谓词,从一个平面媒体语料库中筛选出性别无偏的句子,构建了一个含有20000条语句的中文句子级性别无偏数据集SlguSet。随后,本文提出了一个可衡量预训练语言模型性别偏见程度的指标,并对5种流行的预训练语言模型中的性别偏见进行评估。结果表明,中文预训练语言模型中存在不同程度的性别偏见,该文所构建数据集能够很好的对中文预训练语言模型中的性别偏见进行评估。同时,该数据集还可作为评估预训练语言模型消偏方法的数据集。</abstract>
       <url hash="4b24e0f3">2021.ccl-1.51</url>
@@ -625,7 +625,7 @@
       <title>基于多任务标签一致性机制的中文命名实体识别(<fixed-case>C</fixed-case>hinese Named Entity Recognition based on Multi-task Label Consistency Mechanism)</title>
       <author><first>Shuning</first><last>Lv</last><variant script="hani"><first>书宁</first><last>吕</last></variant></author>
       <author><first>Jian</first><last>Liu</last><variant script="hani"><first>健</first><last>刘</last></variant></author>
-      <author><first>Jinan</first><last>Xu</last><variant script="hani"><first>金安</first><last>徐</last></variant></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last><variant script="hani"><first>金安</first><last>徐</last></variant></author>
       <author><first>Yufeng</first><last>Chen</last><variant script="hani"><first>钰枫</first><last>陈</last></variant></author>
       <author><first>Yujie</first><last>Zhang</last><variant script="hani"><first>玉洁</first><last>张</last></variant></author>
       <pages>576–588</pages>
@@ -652,7 +652,7 @@
       <title>融入篇章信息的文学作品命名实体识别(Document-level Literary Named Entity Recognition)</title>
       <author><first>Yuxiang</first><last>Jia</last><variant script="hani"><first>玉祥</first><last>贾</last></variant></author>
       <author><first>Rui</first><last>Chao</last><variant script="hani"><first>睿</first><last>晁</last></variant></author>
-      <author><first>Hongying</first><last>Zan</last><variant script="hani"><first>红英</first><last>昝</last></variant></author>
+      <author id="hongying-zan"><first>Hongying</first><last>Zan</last><variant script="hani"><first>红英</first><last>昝</last></variant></author>
       <author><first>Huayi</first><last>Dou</last><variant script="hani"><first>华溢</first><last>窦</last></variant></author>
       <author><first>Shuai</first><last>Cao</last><variant script="hani"><first>帅</first><last>曹</last></variant></author>
       <author><first>Shuo</first><last>Xu</last><variant script="hani"><first>硕</first><last>徐</last></variant></author>
@@ -679,7 +679,7 @@
       <author><first>Yajuan</first><last>Ye</last><variant script="hani"><first>娅娟</first><last>叶</last></variant></author>
       <author><first>Bin</first><last>Hu</last><variant script="hani"><first>斌</first><last>胡</last></variant></author>
       <author><first>Kunli</first><last>Zhang</last><variant script="hani"><first>坤丽</first><last>张</last></variant></author>
-      <author><first>Hongying</first><last>Zan</last><variant script="hani"><first>红英</first><last>昝</last></variant></author>
+      <author id="hongying-zan"><first>Hongying</first><last>Zan</last><variant script="hani"><first>红英</first><last>昝</last></variant></author>
       <pages>622–632</pages>
       <abstract>电子病历是医疗信息的重要来源,包含大量与医疗相关的领域知识。本文从糖尿病电子病历文本入手,在调研了国内外已有的电子病历语料库的基础上,参考坉圲坂圲实体及关系分类,建立了糖尿病电子病历实体及实体关系分类体系,并制定了标注规范。利用实体及关系标注平台,进行了实体及关系预标注及多轮人工校对工作,形成了糖尿病电子病历实体及关系标注语料库(Diabetes Electronic Medical Record entity and Related Corpus DEMRC)。所构建的DEMRC包含8899个实体、456个实体修饰及16564个关系。对DEMRC进行一致性评价和分析,标注结果达到了较高的一致性。针对实体识别和实体关系抽取任务,分别采用基于迁移学习的Bi-LSTM-CRF模型和RoBERTa模型进行初步实验,并对语料库中的各类实体及关系进行评估,为后续糖尿病电子病历实体识别及关系抽取研究以及糖尿病知识图谱构建打下基础。</abstract>
       <url hash="e918ada6">2021.ccl-1.56</url>
@@ -689,7 +689,7 @@
     <paper id="57">
       <title>脑卒中疾病电子病历实体及实体关系标注语料库构建(Corpus Construction for Named-Entity and Entity Relations for Electronic Medical Records of Stroke Disease)</title>
       <author><first>Hongyang</first><last>Chang</last><variant script="hani"><first>洪阳</first><last>常</last></variant></author>
-      <author><first>Hongying</first><last>Zan</last><variant script="hani"><first>红英</first><last>昝</last></variant></author>
+      <author id="hongying-zan"><first>Hongying</first><last>Zan</last><variant script="hani"><first>红英</first><last>昝</last></variant></author>
       <author><first>Yutuan</first><last>Ma</last><variant script="hani"><first>玉团</first><last>马</last></variant></author>
       <author><first>Kunli</first><last>Zhang</last><variant script="hani"><first>坤丽</first><last>张</last></variant></author>
       <pages>633–642</pages>
@@ -702,7 +702,7 @@
       <title>中文关系抽取的句级语言学特征探究(A Probe into the Sentence-level Linguistic Features of <fixed-case>C</fixed-case>hinese Relation Extraction)</title>
       <author><first>Baixi</first><last>Xing</last><variant script="hani"><first>百西</first><last>邢</last></variant></author>
       <author><first>Jishun</first><last>Zhao</last><variant script="hani"><first>继舜</first><last>赵</last></variant></author>
-      <author><first>Pengyuan</first><last>Liu</last><variant script="hani"><first>鹏远</first><last>刘</last></variant></author>
+      <author id="pengyuan-liu"><first>Pengyuan</first><last>Liu</last><variant script="hani"><first>鹏远</first><last>刘</last></variant></author>
       <pages>643–654</pages>
       <abstract>神经网络模型近些年在关系抽取任务上已经展示出了很好的效果,然而我们对于特征提取的过程所知甚少,而这也进一步限制了深度神经网络模型在关系抽取任务上的进一步发展。当前已有研究工作对英文关系抽取的语言学特征进行探究,并且得到了一些规律。然而由于中文与西方语言之间明显的差异性,其所探究到的规律与解释性不适用于中文关系抽取。本文首次对中文关系抽取神经网络进行探究,采用了四个角度共13种探究任务,其中包含中文特有的分词探究任务。在两个关系抽取数据集上进行了实验,探究了中文关系抽取模型进行特征提取的规律。</abstract>
       <url hash="ab6f80d9">2021.ccl-1.58</url>
@@ -822,7 +822,7 @@
       <author><first>Bo</first><last>Jin</last><variant script="hani"><first>波</first><last>金</last></variant></author>
       <author><first>Mingtong</first><last>Liu</last><variant script="hani"><first>明童</first><last>刘</last></variant></author>
       <author><first>Yujie</first><last>Zhang</last><variant script="hani"><first>玉洁</first><last>张</last></variant></author>
-      <author><first>Jinan</first><last>Xu</last><variant script="hani"><first>金安</first><last>徐</last></variant></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last><variant script="hani"><first>金安</first><last>徐</last></variant></author>
       <author><first>Yufeng</first><last>Chen</last><variant script="hani"><first>钰枫</first><last>陈</last></variant></author>
       <pages>758–768</pages>
       <abstract>如何挖掘语言资源中丰富的复述模板,是复述研究中的一项重要任务。已有方法在人工给定种子实体对的基础上,利用实体关系,通过自举迭代方式,从开放域获取复述模板,规避对平行语料或可比语料的依赖,但是该方法需人工给定实体对,实体关系受限;在迭代过程中语义会发生偏移,影响获取质量。针对这些问题,我们考虑知识库中包含描述特定语义关系的实体对(即关系三元组),提出融合外部知识的开放域复述模板自动获取方法。首先,将关系三元组与开放域文本对齐,获取关系对应文本,并将文本中语义丰富部分泛化成变量槽,获取关系模板;接着设计模板表示方法,本文利用预训练语言模型,在模板表示中融合变量槽语义;最后,根据获得的模板表示,设计自动聚类与筛选方法,获取高精度的复述模板。在融合自动评测与人工评测的评价方法下,实验结果表明,本文提出的方法实现了在开放域数据上复述模板的自动泛化与获取,能够获得质量高、语义一致的复述模板。</abstract>
@@ -883,7 +883,7 @@
       <author><first>Zecheng</first><last>Tang</last><variant script="hani"><first>泽成</first><last>汤</last></variant></author>
       <author><first>Yixin</first><last>Ji</last><variant script="hani"><first>一心</first><last>纪</last></variant></author>
       <author><first>Yibo</first><last>Zhao</last><variant script="hani"><first>怡博</first><last>赵</last></variant></author>
-      <author><first>Junhui</first><last>Li</last><variant script="hani"><first>军辉</first><last>李</last></variant></author>
+      <author id="junhui-li"><first>Junhui</first><last>Li</last><variant script="hani"><first>军辉</first><last>李</last></variant></author>
       <pages>813–824</pages>
       <abstract>语法纠错是自然语言处理领域的热门任务之一,其目的是将错误的句子修改为正确的句子。为了缓解中文训练语料不足的问题,本文从数据增强的角度出发,提出一种新颖的扩充和增强数据的方法。具体地,为了使模型能更好地获取不同类型和不同粒度的错误,本文首先对语法纠错中出现的错误进行了字和词粒度的分类,在此基础上提出了融合字词粒度噪声的数据增强方法,以此获得大规模且质量较高的错误数据集。基于NLPCC2018共享任务的实验结果表明,本文提出的融合字词粒度加噪方法能够显著提升模型的性能,在该数据集上达到了最优的性能。最后,本文分析了错误类型和数据规模对中文语法纠错模型性能的影响。</abstract>
       <url hash="5d2fad15">2021.ccl-1.73</url>
diff --git a/data/xml/2021.cinlp.xml b/data/xml/2021.cinlp.xml
index 167f9b0994..176bd33c7b 100644
--- a/data/xml/2021.cinlp.xml
+++ b/data/xml/2021.cinlp.xml
@@ -14,7 +14,7 @@
       <editor><first>Roi</first><last>Reichart</last></editor>
       <editor><first>Molly</first><last>Roberts</last></editor>
       <editor><first>Uri</first><last>Shalit</last></editor>
-      <editor><first>Brandon</first><last>Stewart</last></editor>
+      <editor id="brandon-m-stewart"><first>Brandon</first><last>Stewart</last></editor>
       <editor><first>Victor</first><last>Veitch</last></editor>
       <editor><first>Diyi</first><last>Yang</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -31,7 +31,7 @@
       <title>Causal Augmentation for Causal Sentence Classification</title>
       <author><first>Fiona Anting</first><last>Tan</last></author>
       <author><first>Devamanyu</first><last>Hazarika</last></author>
-      <author><first>See-Kiong</first><last>Ng</last></author>
+      <author id="see-kiong-ng"><first>See-Kiong</first><last>Ng</last></author>
       <author><first>Soujanya</first><last>Poria</last></author>
       <author><first>Roger</first><last>Zimmermann</last></author>
       <pages>1–20</pages>
diff --git a/data/xml/2021.cl.xml b/data/xml/2021.cl.xml
index 30abce1010..2a6fde94d3 100644
--- a/data/xml/2021.cl.xml
+++ b/data/xml/2021.cl.xml
@@ -13,7 +13,7 @@
     </meta>
     <paper id="1">
       <title>Kathy <fixed-case>M</fixed-case>c<fixed-case>K</fixed-case>eown Interviews Bonnie Webber</title>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <doi>10.1162/coli_a_00393</doi>
       <abstract>Because the 2020 ACL Lifetime Achievement Award presentation could not be done in person, we replaced the usual LTA talk with an interview between Professor Kathy McKeown (Columbia University) and the recipient, Bonnie Webber. The following is an edited version of the interview, with added citations.</abstract>
       <pages>1–7</pages>
@@ -23,7 +23,7 @@
     <paper id="2">
       <title>Formal Basis of a Language Universal</title>
       <author><first>Miloš</first><last>Stanojević</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <doi>10.1162/coli_a_00394</doi>
       <abstract>Steedman (2020) proposes as a formal universal of natural language grammar that grammatical permutations of the kind that have given rise to transformational rules are limited to a class known to mathematicians and computer scientists as the “separable” permutations. This class of permutations is exactly the class that can be expressed in combinatory categorial grammars (CCGs). The excluded non-separable permutations do in fact seem to be absent in a number of studies of crosslinguistic variation in word order in nominal and verbal constructions. The number of permutations that are separable grows in the number n of lexical elements in the construction as the Large Schröder Number Sn−1. Because that number grows much more slowly than the n! number of all permutations, this generalization is also of considerable practical interest for computational applications such as parsing and machine translation. The present article examines the mathematical and computational origins of this restriction, and the reason it is exactly captured in CCG without the imposition of any further constraints.</abstract>
       <pages>9–42</pages>
@@ -35,7 +35,7 @@
       <title>Comparing Knowledge-Intensive and Data-Intensive Models for <fixed-case>E</fixed-case>nglish Resource Semantic Parsing</title>
       <author><first>Junjie</first><last>Cao</last></author>
       <author><first>Zi</first><last>Lin</last></author>
-      <author><first>Weiwei</first><last>Sun</last></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last></author>
       <author><first>Xiaojun</first><last>Wan</last></author>
       <doi>10.1162/coli_a_00395</doi>
       <abstract>In this work, we present a phenomenon-oriented comparative analysis of the two dominant approaches in English Resource Semantic (ERS) parsing: classic, knowledge-intensive and neural, data-intensive models. To reflect state-of-the-art neural NLP technologies, a factorization-based parser is introduced that can produce Elementary Dependency Structures much more accurately than previous data-driven parsers. We conduct a suite of tests for different linguistic phenomena to analyze the grammatical competence of different parsers, where we show that, despite comparable performance overall, knowledge- and data-intensive models produce different types of errors, in a way that can be explained by their theoretical properties. This analysis is beneficial to in-depth evaluation of several representative parsing techniques and leads to new directions for parser development.</abstract>
@@ -46,7 +46,7 @@
     <paper id="4">
       <title>Semantic Data Set Construction from Human Clustering and Spatial Arrangement</title>
       <author><first>Olga</first><last>Majewska</last></author>
-      <author><first>Diana</first><last>McCarthy</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
       <author><first>Jasper J. F.</first><last>van den Bosch</last></author>
       <author><first>Nikolaus</first><last>Kriegeskorte</last></author>
       <author><first>Ivan</first><last>Vulić</last></author>
@@ -61,7 +61,7 @@
       <title>Interpretability Analysis for Named Entity Recognition to Understand System Predictions and How They Can Improve</title>
       <author><first>Oshin</first><last>Agarwal</last></author>
       <author><first>Yinfei</first><last>Yang</last></author>
-      <author><first>Byron C.</first><last>Wallace</last></author>
+      <author id="byron-c-wallace"><first>Byron C.</first><last>Wallace</last></author>
       <author><first>Ani</first><last>Nenkova</last></author>
       <doi>10.1162/coli_a_00397</doi>
       <abstract>Named entity recognition systems achieve remarkable performance on domains such as English news. It is natural to ask: What are these models actually learning to achieve this? Are they merely memorizing the names themselves? Or are they capable of interpreting the text and inferring the correct entity type from the linguistic context? We examine these questions by contrasting the performance of several variants of architectures for named entity recognition, with some provided only representations of the context as features. We experiment with GloVe-based BiLSTM-CRF as well as BERT. We find that context does influence predictions, but the main factor driving high performance is learning the named tokens themselves. Furthermore, we find that BERT is not always better at recognizing predictive contexts compared to a BiLSTM-CRF model. We enlist human annotators to evaluate the feasibility of inferring entity types from context alone and find that humans are also mostly unable to infer entity types for the majority of examples on which the context-only system made errors. However, there is room for improvement: A system should be able to recognize any named entity in a predictive context correctly and our experiments indicate that current systems may be improved by such capability. Our human study also revealed that systems and humans do not always learn the same contextual clues, and context-only systems are sometimes correct even when humans fail to recognize the entity type from the context. Finally, we find that one issue contributing to model errors is the use of “entangled” representations that encode both contextual and local token information into a single vector, which can obscure clues. Our results suggest that designing models that explicitly operate over representations of local inputs and context, respectively, may in some cases improve performance. In light of these and related findings, we highlight directions for future work.</abstract>
@@ -86,7 +86,7 @@
       <author><first>Lifeng</first><last>Jin</last></author>
       <author><first>Lane</first><last>Schwartz</last></author>
       <author><first>Finale</first><last>Doshi-Velez</last></author>
-      <author><first>Timothy</first><last>Miller</last></author>
+      <author id="timothy-miller"><first>Timothy</first><last>Miller</last></author>
       <author><first>William</first><last>Schuler</last></author>
       <doi>10.1162/coli_a_00399</doi>
       <abstract>This article describes a simple PCFG induction model with a fixed category domain that predicts a large majority of attested constituent boundaries, and predicts labels consistent with nearly half of attested constituent labels on a standard evaluation data set of child-directed speech. The article then explores the idea that the difference between simple grammars exhibited by child learners and fully recursive grammars exhibited by adult learners may be an effect of increasing working memory capacity, where the shallow grammars are constrained images of the recursive grammars. An implementation of these memory bounds as limits on center embedding in a depth-specific transform of a recursive grammar yields a significant improvement over an equivalent but unbounded baseline, suggesting that this arrangement may indeed confer a learning advantage.</abstract>
@@ -119,7 +119,7 @@
       <title>Approximating Probabilistic Models as Weighted Finite Automata</title>
       <author><first>Ananda Theertha</first><last>Suresh</last></author>
       <author><first>Brian</first><last>Roark</last></author>
-      <author><first>Michael</first><last>Riley</last></author>
+      <author id="michael-riley"><first>Michael</first><last>Riley</last></author>
       <author><first>Vlad</first><last>Schogol</last></author>
       <doi>10.1162/coli_a_00401</doi>
       <abstract>Weighted finite automata (WFAs) are often used to represent probabilistic models, such as n-gram language models, because among other things, they are efficient for recognition tasks in time and space. The probabilistic source to be represented as a WFA, however, may come in many forms. Given a generic probabilistic model over sequences, we propose an algorithm to approximate it as a WFA such that the Kullback-Leibler divergence between the source model and the WFA target model is minimized. The proposed algorithm involves a counting step and a difference of convex optimization step, both of which can be performed efficiently. We demonstrate the usefulness of our approach on various tasks, including distilling n-gram models from neural models, building compact language models, and building open-vocabulary character models. The algorithms used for these experiments are available in an open-source software library.</abstract>
@@ -129,10 +129,10 @@
     </paper>
     <paper id="11">
       <title><fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies</title>
-      <author><first>Marie-Catherine</first><last>de Marneffe</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="marie-catherine-de-marneffe"><first>Marie-Catherine</first><last>de Marneffe</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <author><first>Joakim</first><last>Nivre</last></author>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <doi>10.1162/coli_a_00402</doi>
       <abstract>Universal dependencies (UD) is a framework for morphosyntactic annotation of human language, which to date has been used to create treebanks for more than 100 languages. In this article, we outline the linguistic theory of the UD framework, which draws on a long tradition of typologically oriented grammatical theories. Grammatical relations between words are centrally used to explain how predicate–argument structures are encoded morphosyntactically in different languages while morphological features and part-of-speech classes give the properties of words. We argue that this theory is a good basis for crosslinguistically consistent annotation of typologically diverse languages in a way that supports computational natural language understanding as well as broader linguistic studies.</abstract>
       <pages>255–308</pages>
@@ -171,7 +171,7 @@
       <author><first>Daniel</first><last>Loureiro</last></author>
       <author><first>Kiamehr</first><last>Rezaee</last></author>
       <author><first>Mohammad Taher</first><last>Pilehvar</last></author>
-      <author><first>Jose</first><last>Camacho-Collados</last></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></author>
       <doi>10.1162/coli_a_00405</doi>
       <abstract>Transformer-based language models have taken many fields in NLP by storm. BERT and its derivatives dominate most of the existing evaluation benchmarks, including those for Word Sense Disambiguation (WSD), thanks to their ability in capturing context-sensitive semantic nuances. However, there is still little knowledge about their capabilities and potential limitations in encoding and recovering word senses. In this article, we provide an in-depth quantitative and qualitative analysis of the celebrated BERT model with respect to lexical ambiguity. One of the main conclusions of our analysis is that BERT can accurately capture high-level sense distinctions, even when a limited number of examples is available for each word sense. Our analysis also reveals that in some cases language models come close to solving coarse-grained noun disambiguation under ideal conditions in terms of availability of training data and computing resources. However, this scenario rarely occurs in real-world settings and, hence, many practical challenges remain even in the coarse-grained setting. We also perform an in-depth comparison of the two main language model-based WSD strategies, namely, fine-tuning and feature extraction, finding that the latter approach is more robust with respect to sense bias and it can better exploit limited available training data. In fact, the simple feature extraction strategy of averaging contextualized embeddings proves robust even using only three training sentences per word sense, with minimal improvements obtained by increasing the size of this training data.</abstract>
       <pages>387–443</pages>
@@ -182,7 +182,7 @@
     <paper id="15">
       <title>Universal Discourse Representation Structure Parsing</title>
       <author><first>Jiangming</first><last>Liu</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <author><first>Mirella</first><last>Lapata</last></author>
       <author><first>Johan</first><last>Bos</last></author>
       <doi>10.1162/coli_a_00406</doi>
@@ -205,7 +205,7 @@
     </meta>
     <paper id="16">
       <title>The Taxonomy of Writing Systems: How to Measure How Logographic a System Is</title>
-      <author><first>Richard</first><last>Sproat</last></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last></author>
       <author><first>Alexander</first><last>Gutkin</last></author>
       <doi>10.1162/coli_a_00409</doi>
       <abstract>Taxonomies of writing systems since Gelb (1952) have classified systems based on what the written symbols represent: if they represent words or morphemes, they are logographic; if syllables, syllabic; if segments, alphabetic; and so forth. Sproat (2000) and Rogers (2005) broke with tradition by splitting the logographic and phonographic aspects into two dimensions, with logography being graded rather than a categorical distinction. A system could be syllabic, and highly logographic; or alphabetic, and mostly non-logographic. This accords better with how writing systems actually work, but neither author proposed a method for measuring logography. In this article we propose a novel measure of the degree of logography that uses an attention-based sequence-to-sequence model trained to predict the spelling of a token from its pronunciation in context. In an ideal phonographic system, the model should need to attend to only the current token in order to compute how to spell it, and this would show in the attention matrix activations. In contrast, with a logographic system, where a given pronunciation might correspond to several different spellings, the model would need to attend to a broader context. The ratio of the activation outside the token and the total activation forms the basis of our measure. We compare this with a simple lexical measure, and an entropic measure, as well as several other neural models, and argue that on balance our attention-based measure accords best with intuition about how logographic various systems are. Our work provides the first quantifiable measure of the notion of logography that accords with linguistic intuition and, we argue, provides better insight into what this notion means.</abstract>
@@ -242,7 +242,7 @@
     <paper id="19">
       <title>Toward Gender-Inclusive Coreference Resolution: An Analysis of Gender and Bias Throughout the Machine Learning Lifecycle*</title>
       <author><first>Yang Trista</first><last>Cao</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <doi>10.1162/coli_a_00413</doi>
       <abstract>Correctly resolving textual mentions of people fundamentally entails making inferences about those people. Such inferences raise the risk of systematic biases in coreference resolution systems, including biases that can harm binary and non-binary trans and cis stakeholders. To better understand such biases, we foreground nuanced conceptualizations of gender from sociology and sociolinguistics, and investigate where in the machine learning pipeline such biases can enter a coreference resolution system. We inspect many existing data sets for trans-exclusionary biases, and develop two new data sets for interrogating bias in both crowd annotations and in existing coreference resolution systems. Through these studies, conducted on English text, we confirm that without acknowledging and building systems that recognize the complexity of gender, we will build systems that fail for: quality of service, stereotyping, and over- or under-representation, especially for binary and non-binary trans users.</abstract>
       <pages>615–661</pages>
@@ -292,7 +292,7 @@
     </meta>
     <paper id="24">
       <title>Natural Language Processing and Computational Linguistics</title>
-      <author><first>Junichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Junichi</first><last>Tsujii</last></author>
       <doi>10.1162/coli_a_00420</doi>
       <pages>707–727</pages>
       <url hash="fa7d783a">2021.cl-4.24</url>
@@ -333,7 +333,7 @@
     <paper id="28">
       <title>The (Un)Suitability of Automatic Evaluation Metrics for Text Simplification</title>
       <author><first>Fernando</first><last>Alva-Manchego</last></author>
-      <author><first>Carolina</first><last>Scarton</last></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <doi>10.1162/coli_a_00418</doi>
       <abstract>In order to simplify sentences, several rewriting operations can be performed, such as replacing complex words per simpler synonyms, deleting unnecessary information, and splitting long sentences. Despite this multi-operation nature, evaluation of automatic simplification systems relies on metrics that moderately correlate with human judgments on the simplicity achieved by executing specific operations (e.g., simplicity gain based on lexical replacements). In this article, we investigate how well existing metrics can assess sentence-level simplifications where multiple operations may have been applied and which, therefore, require more general simplicity judgments. For that, we first collect a new and more reliable data set for evaluating the correlation of metrics and human judgments of overall simplicity. Second, we conduct the first meta-evaluation of automatic metrics in Text Simplification, using our new data set (and other existing data) to analyze the variation of the correlation between metrics’ scores and human judgments across three dimensions: the perceived simplicity level, the system type, and the set of references used for computation. We show that these three aspects affect the correlations and, in particular, highlight the limitations of commonly used operation-specific metrics. Finally, based on our findings, we propose a set of recommendations for automatic evaluation of multi-operation simplifications, suggesting which metrics to compute and how to interpret their scores.</abstract>
@@ -366,8 +366,8 @@
     </paper>
     <paper id="31">
       <title><fixed-case>LFG</fixed-case> Generation from Acyclic <fixed-case>F</fixed-case>-Structures is <fixed-case>NP</fixed-case>-Hard</title>
-      <author><first>Jürgen</first><last>Wedekind</last></author>
-      <author><first>Ronald M.</first><last>Kaplan</last></author>
+      <author id="jurgen-wedekind"><first>Jürgen</first><last>Wedekind</last></author>
+      <author id="ronald-m-kaplan"><first>Ronald M.</first><last>Kaplan</last></author>
       <doi>10.1162/coli_a_00419</doi>
       <abstract>The universal generation problem for LFG grammars is the problem of determining whether a given grammar derives any terminal string with a given f-structure. It is known that this problem is decidable for acyclic f-structures. In this brief note, we show that for those f-structures the problem is nonetheless intractable. This holds even for grammars that are off-line parsable.</abstract>
       <pages>939–946</pages>
diff --git a/data/xml/2021.clpsych.xml b/data/xml/2021.clpsych.xml
index c167771fba..2737001e01 100644
--- a/data/xml/2021.clpsych.xml
+++ b/data/xml/2021.clpsych.xml
@@ -6,7 +6,7 @@
       <editor><first>Nazli</first><last>Goharian</last></editor>
       <editor><first>Philip</first><last>Resnik</last></editor>
       <editor><first>Andrew</first><last>Yates</last></editor>
-      <editor><first>Molly</first><last>Ireland</last></editor>
+      <editor id="molly-ireland"><first>Molly</first><last>Ireland</last></editor>
       <editor><first>Kate</first><last>Niederhoffer</last></editor>
       <editor><first>Rebecca</first><last>Resnik</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -25,7 +25,7 @@
       <author><first>Glorianna</first><last>Jagfeld</last></author>
       <author><first>Fiona</first><last>Lobban</last></author>
       <author><first>Paul</first><last>Rayson</last></author>
-      <author><first>Steven</first><last>Jones</last></author>
+      <author id="steven-jm-jones"><first>Steven</first><last>Jones</last></author>
       <pages>1–14</pages>
       <abstract>Recently, research on mental health conditions using public online data, including Reddit, has surged in NLP and health research but has not reported user characteristics, which are important to judge generalisability of findings. This paper shows how existing NLP methods can yield information on clinical, demographic, and identity characteristics of almost 20K Reddit users who self-report a bipolar disorder diagnosis. This population consists of slightly more feminine- than masculine-gendered mainly young or middle-aged US-based adults who often report additional mental health diagnoses, which is compared with general Reddit statistics and epidemiological studies. Additionally, this paper carefully evaluates all methods and discusses ethical issues.</abstract>
       <url hash="e918a603">2021.clpsych-1.1</url>
@@ -47,7 +47,7 @@
     <paper id="3">
       <title>Individual Differences in the Movement-Mood Relationship in Digital Life Data</title>
       <author><first>Glen</first><last>Coppersmith</last></author>
-      <author><first>Alex</first><last>Fine</last></author>
+      <author id="alex-fine"><first>Alex</first><last>Fine</last></author>
       <author><first>Patrick</first><last>Crutchley</last></author>
       <author><first>Joshua</first><last>Carroll</last></author>
       <pages>25–31</pages>
@@ -74,10 +74,10 @@
     <paper id="5">
       <title>Demonstrating the Reliability of Self-Annotated Emotion Data</title>
       <author><first>Anton</first><last>Malko</last></author>
-      <author><first>Cecile</first><last>Paris</last></author>
+      <author id="cecile-paris"><first>Cecile</first><last>Paris</last></author>
       <author><first>Andreas</first><last>Duenser</last></author>
       <author><first>Maria</first><last>Kangas</last></author>
-      <author><first>Diego</first><last>Molla</last></author>
+      <author id="diego-molla"><first>Diego</first><last>Molla</last></author>
       <author><first>Ross</first><last>Sparks</last></author>
       <author><first>Stephen</first><last>Wan</last></author>
       <pages>45–54</pages>
@@ -171,9 +171,9 @@
     </paper>
     <paper id="10">
       <title>Suicide Risk Prediction by Tracking Self-Harm Aspects in Tweets: <fixed-case>NUS</fixed-case>-<fixed-case>IDS</fixed-case> at the <fixed-case>CLP</fixed-case>sych 2021 Shared Task</title>
-      <author><first>Sujatha Das</first><last>Gollapalli</last></author>
+      <author id="sujatha-das-gollapalli"><first>Sujatha Das</first><last>Gollapalli</last></author>
       <author><first>Guilherme Augusto</first><last>Zagatti</last></author>
-      <author><first>See-Kiong</first><last>Ng</last></author>
+      <author id="see-kiong-ng"><first>See-Kiong</first><last>Ng</last></author>
       <pages>93–98</pages>
       <abstract>We describe our system for identifying users at-risk for suicide based on their tweets developed for the CLPsych 2021 Shared Task. Based on research in mental health studies linking self-harm tendencies with suicide, in our system, we attempt to characterize self-harm aspects expressed in user tweets over a period of time. To this end, we design SHTM, a Self-Harm Topic Model that combines Latent Dirichlet Allocation with a self-harm dictionary for modeling daily tweets of users. Next, differences in moods and topics over time are captured as features to train a deep learning model for suicide prediction.</abstract>
       <url hash="90f8d91f">2021.clpsych-1.10</url>
@@ -222,11 +222,11 @@
     </paper>
     <paper id="14">
       <title>Automatic Detection and Prediction of Psychiatric Hospitalizations From Social Media Posts</title>
-      <author><first>Zhengping</first><last>Jiang</last></author>
+      <author id="zheng-ping-jiang"><first>Zhengping</first><last>Jiang</last></author>
       <author><first>Jonathan</first><last>Zomick</last></author>
       <author><first>Sarah Ita</first><last>Levitan</last></author>
       <author><first>Mark</first><last>Serper</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
       <pages>116–121</pages>
       <abstract>We address the problem of predicting psychiatric hospitalizations using linguistic features drawn from social media posts. We formulate this novel task and develop an approach to automatically extract time spans of self-reported psychiatric hospitalizations. Using this dataset, we build predictive models of psychiatric hospitalization, comparing feature sets, user vs. post classification, and comparing model performance using a varying time window of posts. Our best model achieves an F1 of .718 using 7 days of posts. Our results suggest that this is a useful framework for collecting hospitalization data, and that social media data can be leveraged to predict acute psychiatric crises before they occur, potentially saving lives and improving outcomes for individuals with mental illness.</abstract>
       <url hash="3273c7fe">2021.clpsych-1.14</url>
@@ -263,7 +263,7 @@
     <paper id="17">
       <title>Detecting Cognitive Distortions from Patient-Therapist Interactions</title>
       <author><first>Sagarika</first><last>Shreevastava</last></author>
-      <author><first>Peter</first><last>Foltz</last></author>
+      <author id="peter-foltz"><first>Peter</first><last>Foltz</last></author>
       <pages>151–158</pages>
       <abstract>An important part of Cognitive Behavioral Therapy (CBT) is to recognize and restructure certain negative thinking patterns that are also known as cognitive distortions. The aim of this project is to detect these distortions using natural language processing. We compare and contrast different types of linguistic features as well as different classification algorithms and explore the limitations of applying these techniques on a small dataset. We find that pre-trained Sentence-BERT embeddings to train an SVM classifier yields the best results with an F1-score of 0.79. Lastly, we discuss how this work provides insights into the types of linguistic features that are inherent in cognitive distortions.</abstract>
       <url hash="00e3bcee">2021.clpsych-1.17</url>
@@ -274,7 +274,7 @@
       <title>Evaluating Automatic Speech Recognition Quality and Its Impact on Counselor Utterance Coding</title>
       <author><first>Do June</first><last>Min</last></author>
       <author><first>Verónica</first><last>Pérez-Rosas</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>159–168</pages>
       <abstract>Automatic speech recognition (ASR) is a crucial step in many natural language processing (NLP) applications, as often available data consists mainly of raw speech. Since the result of the ASR step is considered as a meaningful, informative input to later steps in the NLP pipeline, it is important to understand the behavior and failure mode of this step. In this work, we analyze the quality of ASR in the psychotherapy domain, using motivational interviewing conversations between therapists and clients. We conduct domain agnostic and domain-relevant evaluations using standard evaluation metrics and also identify domain-relevant keywords in the ASR output. Moreover, we empirically study the effect of mixing ASR and manual data during the training of a downstream NLP model, and also demonstrate how additional local context can help alleviate the error introduced by noisy ASR transcripts.</abstract>
       <url hash="70e338c6">2021.clpsych-1.18</url>
@@ -294,7 +294,7 @@
     <paper id="20">
       <title>Safeguarding against spurious <fixed-case>AI</fixed-case>-based predictions: The case of automated verbal memory assessment</title>
       <author><first>Chelsea</first><last>Chandler</last></author>
-      <author><first>Peter</first><last>Foltz</last></author>
+      <author id="peter-foltz"><first>Peter</first><last>Foltz</last></author>
       <author><first>Alex</first><last>Cohen</last></author>
       <author><first>Terje</first><last>Holmlund</last></author>
       <author><first>Brita</first><last>Elvevåg</last></author>
diff --git a/data/xml/2021.cmcl.xml b/data/xml/2021.cmcl.xml
index 4579d847f0..6c98737cd2 100644
--- a/data/xml/2021.cmcl.xml
+++ b/data/xml/2021.cmcl.xml
@@ -5,9 +5,9 @@
       <booktitle>Proceedings of the Workshop on Cognitive Modeling and Computational Linguistics</booktitle>
       <editor><first>Emmanuele</first><last>Chersoni</last></editor>
       <editor><first>Nora</first><last>Hollenstein</last></editor>
-      <editor><first>Cassandra</first><last>Jacobs</last></editor>
+      <editor id="cassandra-l-jacobs"><first>Cassandra</first><last>Jacobs</last></editor>
       <editor><first>Yohei</first><last>Oseki</last></editor>
-      <editor><first>Laurent</first><last>Prévot</last></editor>
+      <editor id="laurent-prevot"><first>Laurent</first><last>Prévot</last></editor>
       <editor><first>Enrico</first><last>Santus</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Online</address>
@@ -34,7 +34,7 @@
     <paper id="2">
       <title>Human Sentence Processing: Recurrence or Attention?</title>
       <author><first>Danny</first><last>Merkx</last></author>
-      <author><first>Stefan L.</first><last>Frank</last></author>
+      <author id="stefan-l-frank"><first>Stefan L.</first><last>Frank</last></author>
       <pages>12–22</pages>
       <abstract>Recurrent neural networks (RNNs) have long been an architecture of interest for computational models of human sentence processing. The recently introduced Transformer architecture outperforms RNNs on many natural language processing tasks but little is known about its ability to model human language processing. We compare Transformer- and RNN-based language models’ ability to account for measures of human reading effort. Our analysis shows Transformers to outperform RNNs in explaining self-paced reading times and neural activity during reading English sentences, challenging the widely held idea that human sentence processing involves recurrent and immediate processing and provides evidence for cue-based retrieval.</abstract>
       <url hash="167e6770">2021.cmcl-1.2</url>
@@ -47,9 +47,9 @@
       <author><first>Shohini</first><last>Bhattasali</last></author>
       <author><first>Donald</first><last>Dunagan</last></author>
       <author><first>Luca</first><last>Campanelli</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <author><first>Jonathan</first><last>Brennan</last></author>
-      <author><first>John</first><last>Hale</last></author>
+      <author id="john-hale"><first>John</first><last>Hale</last></author>
       <pages>23–38</pages>
       <abstract>Hierarchical sentence structure plays a role in word-by-word human sentence comprehension, but it remains unclear how best to characterize this structure and unknown how exactly it would be recognized in a step-by-step process model. With a view towards sharpening this picture, we model the time course of hemodynamic activity within the brain during an extended episode of naturalistic language comprehension using Combinatory Categorial Grammar (CCG). CCG has well-defined incremental parsing algorithms, surface compositional semantics, and can explain long-range dependencies as well as complicated cases of coordination. We find that CCG-derived predictors improve a regression model of fMRI time course in six language-relevant brain regions, over and above predictors derived from context-free phrase structure. Adding a special Revealing operator to CCG parsing, one designed to handle right-adjunction, improves the fit in three of these regions. This evidence for CCG from neuroimaging bolsters the more general case for mildly context-sensitive grammars in the cognitive science of language.</abstract>
       <url hash="d65b24ec">2021.cmcl-1.3</url>
@@ -80,7 +80,7 @@
     <paper id="6">
       <title>Accounting for Agreement Phenomena in Sentence Comprehension with Transformer Language Models: Effects of Similarity-based Interference on Surprisal and Attention</title>
       <author><first>Soo Hyun</first><last>Ryu</last></author>
-      <author><first>Richard</first><last>Lewis</last></author>
+      <author id="richard-l-lewis"><first>Richard</first><last>Lewis</last></author>
       <pages>61–71</pages>
       <abstract>We advance a novel explanation of similarity-based interference effects in subject-verb and reflexive pronoun agreement processing, grounded in surprisal values computed from a pretrained large-scale Transformer model, GPT-2. Specifically, we show that surprisal of the verb or reflexive pronoun predicts facilitatory interference effects in ungrammatical sentences, where a distractor noun that matches in number with the verb or pronouns leads to faster reading times, despite the distractor not participating in the agreement relation. We review the human empirical evidence for such effects, including recent meta-analyses and large-scale studies. We also show that attention patterns (indexed by entropy and other measures) in the Transformer show patterns of diffuse attention in the presence of similar distractors, consistent with cue-based retrieval models of parsing. But in contrast to these models, the attentional cues and memory representations are learned entirely from the simple self-supervised task of predicting the next word.</abstract>
       <url hash="951fe812">2021.cmcl-1.6</url>
@@ -104,7 +104,7 @@
     <paper id="8">
       <title><fixed-case>L</fixed-case>ang<fixed-case>R</fixed-case>esearch<fixed-case>L</fixed-case>ab_<fixed-case>NC</fixed-case> at <fixed-case>CMCL</fixed-case>2021 Shared Task: Predicting Gaze Behaviour Using Linguistic Features and Tree Regressors</title>
       <author><first>Raksha</first><last>Agarwal</last></author>
-      <author><first>Niladri</first><last>Chatterjee</last></author>
+      <author id="niladri-chatterjee"><first>Niladri</first><last>Chatterjee</last></author>
       <pages>79–84</pages>
       <abstract>Analysis of gaze data behaviour has gained momentum in recent years for different NLP applications. The present paper aims at modelling gaze data behaviour of tokens in the context of a sentence. We have experimented with various Machine Learning Regression Algorithms on a feature space comprising the linguistic features of the target tokens for prediction of five Eye-Tracking features. CatBoost Regressor performed the best and achieved fourth position in terms of MAE based accuracy measurement for the ZuCo Dataset.</abstract>
       <url hash="5eb3e04f">2021.cmcl-1.8</url>
@@ -167,7 +167,7 @@
       <author><first>Shivani</first><last>Choudhary</last></author>
       <author><first>Kushagri</first><last>Tandon</last></author>
       <author><first>Raksha</first><last>Agarwal</last></author>
-      <author><first>Niladri</first><last>Chatterjee</last></author>
+      <author id="niladri-chatterjee"><first>Niladri</first><last>Chatterjee</last></author>
       <pages>114–119</pages>
       <abstract>Reading and comprehension are quintessentially cognitive tasks. Eye movement acts as a surrogate to understand which part of a sentence is critical to the process of comprehension. The aim of the shared task is to predict five eye-tracking features for a given word of the input sentence. We experimented with several models based on LGBM (Light Gradient Boosting Machine) Regression, ANN (Artificial Neural Network), and CNN (Convolutional Neural Network), using BERT embeddings and some combination of linguistic features. Our submission using CNN achieved an average MAE of 4.0639 and ranked 7th in the shared task. The average MAE was further lowered to 3.994 in post-task evaluation.</abstract>
       <url hash="ab43c80f">2021.cmcl-1.14</url>
@@ -213,7 +213,7 @@
     <paper id="18">
       <title>Enhancing Cognitive Models of Emotions with Representation Learning</title>
       <author><first>Yuting</first><last>Guo</last></author>
-      <author><first>Jinho D.</first><last>Choi</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
       <pages>141–148</pages>
       <abstract>We present a novel deep learning-based framework to generate embedding representations of fine-grained emotions that can be used to computationally describe psychological models of emotions. Our framework integrates a contextualized embedding encoder with a multi-head probing model that enables to interpret dynamically learned representations optimized for an emotion classification task. Our model is evaluated on the Empathetic Dialogue dataset and shows the state-of-the-art result for classifying 32 emotions. Our layer analysis can derive an emotion graph to depict hierarchical relations among the emotions. Our emotion representations can be used to generate an emotion wheel directly comparable to the one from Plutchik’s model, and also augment the values of missing emotions in the PAD emotional state model.</abstract>
       <url hash="388d240e">2021.cmcl-1.18</url>
diff --git a/data/xml/2021.codi.xml b/data/xml/2021.codi.xml
index 88f9b1093b..98e85efccd 100644
--- a/data/xml/2021.codi.xml
+++ b/data/xml/2021.codi.xml
@@ -24,7 +24,7 @@
       <author><first>Cathrine</first><last>Damgaard</last></author>
       <author><first>Paulina</first><last>Toborek</last></author>
       <author><first>Trine</first><last>Eriksen</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>1–11</pages>
       <abstract>Indirect answers are replies to polar questions without the direct use of word cues such as ‘yes’ and ‘no’. Humans are very good at understanding indirect answers, such as ‘I gotta go home sometime’, when asked ‘You wanna crash on the couch?’. Understanding indirect answers is a challenging problem for dialogue systems. In this paper, we introduce a new English corpus to study the problem of understanding indirect answers. Instead of crowd-sourcing both polar questions and answers, we collect questions and indirect answers from transcripts of a prominent TV series and manually annotate them for answer type. The resulting dataset contains 5,930 question-answer pairs. We release both aggregated and raw human annotations. We present a set of experiments in which we evaluate Convolutional Neural Networks (CNNs) for this task, including a cross-dataset evaluation and experiments with learning from disagreements in annotation. Our results show that the task of interpreting indirect answers remains challenging, yet we obtain encouraging improvements when explicitly modeling human disagreement.</abstract>
       <url hash="dd62e4f1">2021.codi-main.1</url>
@@ -76,8 +76,8 @@
     <paper id="5">
       <title>Coreference Chains Categorization by Sequence Clustering</title>
       <author><first>Silvia</first><last>Federzoni</last></author>
-      <author><first>Lydia-Mai</first><last>Ho-Dac</last></author>
-      <author><first>Cécile</first><last>Fabre</last></author>
+      <author id="lydia-mai-ho-dac"><first>Lydia-Mai</first><last>Ho-Dac</last></author>
+      <author id="cecile-fabre"><first>Cécile</first><last>Fabre</last></author>
       <pages>52–57</pages>
       <abstract>The diversity of coreference chains is usually tackled by means of global features (length, types and number of referring expressions, distance between them, etc.). In this paper, we propose a novel approach that provides a description of their composition in terms of sequences of expressions. To this end, we apply sequence analysis techniques to bring out the various strategies for introducing a referent and keeping it active throughout discourse. We discuss a first application of this method to a French written corpus annotated with coreference chains. We obtain clusters that are linguistically coherent and interpretable in terms of reference strategies and we demonstrate the influence of text genre and semantic type of the referent on chain composition.</abstract>
       <url hash="1d653dd3">2021.codi-main.5</url>
@@ -137,7 +137,7 @@
     <paper id="10">
       <title>Revisiting Shallow Discourse Parsing in the <fixed-case>PDTB</fixed-case>-3: Handling Intra-sentential Implicits</title>
       <author><first>Zheng</first><last>Zhao</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <pages>107–121</pages>
       <abstract>In the PDTB-3, several thousand implicit discourse relations were newly annotated within individual sentences, adding to the over 15,000 implicit relations annotated across adjacent sentences in the PDTB-2. Given that the position of the arguments to these intra-sentential implicits is no longer as well-defined as with inter-sentential implicits, a discourse parser must identify both their location and their sense. That is the focus of the current work. The paper provides a comprehensive analysis of our results, showcasing model performance under different scenarios, pointing out limitations and noting future directions.</abstract>
       <url hash="5e65bfe2">2021.codi-main.10</url>
@@ -147,7 +147,7 @@
     <paper id="11">
       <title>Improving Multi-Party Dialogue Discourse Parsing via Domain Integration</title>
       <author><first>Zhengyuan</first><last>Liu</last></author>
-      <author><first>Nancy</first><last>Chen</last></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last></author>
       <pages>122–127</pages>
       <abstract>While multi-party conversations are often less structured than monologues and documents, they are implicitly organized by semantic level correlations across the interactive turns, and dialogue discourse analysis can be applied to predict the dependency structure and relations between the elementary discourse units, and provide feature-rich structural information for downstream tasks. However, the existing corpora with dialogue discourse annotation are collected from specific domains with limited sample sizes, rendering the performance of data-driven approaches poor on incoming dialogues without any domain adaptation. In this paper, we first introduce a Transformer-based parser, and assess its cross-domain performance. We next adopt three methods to gain domain integration from both data and language modeling perspectives to improve the generalization capability. Empirical results show that the neural parser can benefit from our proposed methods, and performs better on cross-domain dialogue samples.</abstract>
       <url hash="8ec86dbb">2021.codi-main.11</url>
@@ -192,7 +192,7 @@
       <title><fixed-case>DMRST</fixed-case>: A Joint Framework for Document-Level Multilingual <fixed-case>RST</fixed-case> Discourse Segmentation and Parsing</title>
       <author><first>Zhengyuan</first><last>Liu</last></author>
       <author><first>Ke</first><last>Shi</last></author>
-      <author><first>Nancy</first><last>Chen</last></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last></author>
       <pages>154–164</pages>
       <abstract>Text discourse parsing weighs importantly in understanding information flow and argumentative structure in natural language, making it beneficial for downstream tasks. While previous work significantly improves the performance of RST discourse parsing, they are not readily applicable to practical use cases: (1) EDU segmentation is not integrated into most existing tree parsing frameworks, thus it is not straightforward to apply such models on newly-coming data. (2) Most parsers cannot be used in multilingual scenarios, because they are developed only in English. (3) Parsers trained from single-domain treebanks do not generalize well on out-of-domain inputs. In this work, we propose a document-level multilingual RST discourse parsing framework, which conducts EDU segmentation and discourse tree parsing jointly. Moreover, we propose a cross-translation augmentation strategy to enable the framework to support multilingual parsing and improve its domain generality. Experimental results show that our model achieves state-of-the-art performance on document-level multilingual RST parsing in all sub-tasks.</abstract>
       <url hash="1e1fea12">2021.codi-main.15</url>
@@ -206,7 +206,7 @@
       <author><first>Vassilina</first><last>Nikoulina</last></author>
       <author><first>Dongyeop</first><last>Kang</last></author>
       <author><first>Didier</first><last>Schwab</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <pages>165–170</pages>
       <abstract>This paper presents an interactive data dashboard that provides users with an overview of the preservation of discourse relations among 28 language pairs. We display a graph network depicting the cross-lingual discourse relations between a pair of languages for multilingual TED talks and provide a search function to look for sentences with specific keywords or relation types, facilitating ease of analysis on the cross-lingual discourse relations.</abstract>
       <url hash="ef794298">2021.codi-main.16</url>
@@ -219,11 +219,11 @@
     <meta>
       <booktitle>Proceedings of the CODI-CRAC 2021 Shared Task on Anaphora, Bridging, and Discourse Deixis in Dialogue</booktitle>
       <editor><first>Sopan</first><last>Khosla</last></editor>
-      <editor><first>Ramesh</first><last>Manuvinakurike</last></editor>
+      <editor id="ramesh-manuvinakurike"><first>Ramesh</first><last>Manuvinakurike</last></editor>
       <editor><first>Vincent</first><last>Ng</last></editor>
-      <editor><first>Massimo</first><last>Poesio</last></editor>
+      <editor id="massimo-poesio"><first>Massimo</first><last>Poesio</last></editor>
       <editor><first>Michael</first><last>Strube</last></editor>
-      <editor><first>Carolyn</first><last>Rosé</last></editor>
+      <editor id="carolyn-rose"><first>Carolyn</first><last>Rosé</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Punta Cana, Dominican Republic</address>
       <month>November</month>
@@ -267,7 +267,7 @@
       <author><first>Natalia</first><last>Skachkova</last></author>
       <author><first>Siyu</first><last>Tao</last></author>
       <author><first>Sharmila</first><last>Upadhyaya</last></author>
-      <author><first>Ivana</first><last>Kruijff-Korbayova</last></author>
+      <author id="ivana-kruijff-korbayova"><first>Ivana</first><last>Kruijff-Korbayova</last></author>
       <pages>32–42</pages>
       <abstract>We describe the system developed by the DFKI-TalkingRobots Team for the CODI-CRAC 2021 Shared-Task on anaphora resolution in dialogue. Our system consists of three subsystems: (1) the Workspace Coreference System (WCS) incrementally clusters mentions using semantic similarity based on embeddings combined with lexical feature heuristics; (2) the Mention-to-Mention (M2M) coreference resolution system pairs same entity mentions; (3) the Discourse Deixis Resolution (DDR) system employs a Siamese Network to detect discourse anaphor-antecedent pairs. WCS achieved F1-score of 55.6% averaged across the evaluation test sets, M2M achieved 57.2% and DDR achieved 21.5%.</abstract>
       <url hash="5a6d3cc9">2021.codi-sharedtask.3</url>
@@ -301,7 +301,7 @@
     <paper id="6">
       <title>Adapted End-to-End Coreference Resolution System for Anaphoric Identities in Dialogues</title>
       <author><first>Liyan</first><last>Xu</last></author>
-      <author><first>Jinho D.</first><last>Choi</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
       <pages>55–62</pages>
       <abstract>We present an effective system adapted from the end-to-end neural coreference resolution model, targeting on the task of anaphora resolution in dialogues. Three aspects are specifically addressed in our approach, including the support of singletons, encoding speakers and turns throughout dialogue interactions, and knowledge transfer utilizing existing resources. Despite the simplicity of our adaptation strategies, they are shown to bring significant impact to the final performance, with up to 27 F1 improvement over the baseline. Our final system ranks the 1st place on the leaderboard of the anaphora resolution track in the CRAC 2021 shared task, and achieves the best evaluation results on all four datasets.</abstract>
       <url hash="c4f0e1b5">2021.codi-sharedtask.6</url>
@@ -315,7 +315,7 @@
       <author><first>Tatiana</first><last>Anikina</last></author>
       <author><first>Siyu</first><last>Tao</last></author>
       <author><first>Sharmila</first><last>Upadhyaya</last></author>
-      <author><first>Ivana</first><last>Kruijff-Korbayova</last></author>
+      <author id="ivana-kruijff-korbayova"><first>Ivana</first><last>Kruijff-Korbayova</last></author>
       <pages>63–70</pages>
       <abstract>We compare our team’s systems to others submitted for the CODI-CRAC 2021 Shared-Task on anaphora resolution in dialogue. We analyse the architectures and performance, report some problematic cases in gold annotations, and suggest possible improvements of the systems, their evaluation, data annotation, and the organization of the shared task.</abstract>
       <url hash="14494493">2021.codi-sharedtask.7</url>
diff --git a/data/xml/2021.computel.xml b/data/xml/2021.computel.xml
index f22d0aa3a2..06b99d3deb 100644
--- a/data/xml/2021.computel.xml
+++ b/data/xml/2021.computel.xml
@@ -8,9 +8,9 @@
       <editor><first>Atticus</first><last>Harrigan</last></editor>
       <editor><first>Mans</first><last>Hulden</last></editor>
       <editor><first>Jordan</first><last>Lachler</last></editor>
-      <editor><first>Sarah</first><last>Moeller</last></editor>
+      <editor id="sarah-moeller"><first>Sarah</first><last>Moeller</last></editor>
       <editor><first>Alexis</first><last>Palmer</last></editor>
-      <editor><first>Miikka</first><last>Silfverberg</last></editor>
+      <editor id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last></editor>
       <editor><first>Lane</first><last>Schwartz</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Online</address>
@@ -44,7 +44,7 @@
       <title><fixed-case>LARA</fixed-case> in the Service of Revivalistics and Documentary Linguistics: Community Engagement and Endangered Languages</title>
       <author><first>Ghil’Ad</first><last>Zuckermann</last></author>
       <author><first>Sigurður</first><last>Vigfússon</last></author>
-      <author><first>Manny</first><last>Rayner</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
       <author><first>Neasa</first><last>Ní Chiaráin</last></author>
       <author><first>Nedelina</first><last>Ivanova</last></author>
       <author><first>Hanieh</first><last>Habibi</last></author>
@@ -89,7 +89,7 @@
       <author><first>Janet</first><last>Wiles</last></author>
       <author><first>Alexis</first><last>Michaud</last></author>
       <author><first>Séverine</first><last>Guillaume</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <author><first>Christopher</first><last>Cox</last></author>
       <author><first>Katya</first><last>Aplonova</last></author>
       <author><first>Guillaume</first><last>Jacques</last></author>
@@ -103,7 +103,7 @@
       <author><first>Nils</first><last>Hjortnaes</last></author>
       <author><first>Niko</first><last>Partanen</last></author>
       <author><first>Michael</first><last>Rießler</last></author>
-      <author><first>Francis M.</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis M.</first><last>Tyers</last></author>
       <pages>63–69</pages>
       <url hash="03dc0888">2021.computel-1.8</url>
       <bibkey>hjortnaes-etal-2021-relevance</bibkey>
@@ -111,7 +111,7 @@
     <paper id="9">
       <title>Shared Digital Resource Application within <fixed-case>I</fixed-case>nsular <fixed-case>S</fixed-case>candinavian</title>
       <author><first>Hinrik</first><last>Hafsteinsson</last></author>
-      <author><first>Anton Karl</first><last>Ingason</last></author>
+      <author id="anton-karl-ingason"><first>Anton Karl</first><last>Ingason</last></author>
       <pages>70–79</pages>
       <url hash="6d31817e">2021.computel-1.9</url>
       <bibkey>hafsteinsson-ingason-2021-shared</bibkey>
@@ -119,7 +119,7 @@
     <paper id="10">
       <title>Towards an Open Source Finite-State Morphological Analyzer for Zacatlán-Ahuacatlán-Tepetzintla <fixed-case>N</fixed-case>ahuatl</title>
       <author><first>Robert</first><last>Pugh</last></author>
-      <author><first>Francis</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last></author>
       <author><first>Marivel</first><last>Huerta Mendez</last></author>
       <pages>80–85</pages>
       <url hash="582288b1">2021.computel-1.10</url>
@@ -135,9 +135,9 @@
     </paper>
     <paper id="12">
       <title>Developing a Shared Task for Speech Processing on Endangered Languages</title>
-      <author><first>Gina-Anne</first><last>Levow</last></author>
+      <author id="gina-anne-levow"><first>Gina-Anne</first><last>Levow</last></author>
       <author><first>Emily</first><last>Ahn</last></author>
-      <author><first>Emily M.</first><last>Bender</last></author>
+      <author id="emily-m-bender"><first>Emily M.</first><last>Bender</last></author>
       <pages>96–106</pages>
       <url hash="7b52dd2e">2021.computel-1.12</url>
       <bibkey>levow-etal-2021-developing</bibkey>
diff --git a/data/xml/2021.conll.xml b/data/xml/2021.conll.xml
index f563e0076f..c2375bff75 100644
--- a/data/xml/2021.conll.xml
+++ b/data/xml/2021.conll.xml
@@ -20,7 +20,7 @@
       <author><first>Katharina</first><last>Weitz</last></author>
       <author><first>Lindsey</first><last>Vanderlyn</last></author>
       <author><first>Ngoc Thang</first><last>Vu</last></author>
-      <author><first>Elisabeth</first><last>André</last></author>
+      <author id="elisabeth-andre"><first>Elisabeth</first><last>André</last></author>
       <pages>1–16</pages>
       <abstract>Human-AI collaboration, a long standing goal in AI, refers to a partnership where a human and artificial intelligence work together towards a shared goal. Collaborative dialog allows human-AI teams to communicate and leverage strengths from both partners. To design collaborative dialog systems, it is important to understand what mental models users form about their AI-dialog partners, however, how users perceive these systems is not fully understood. In this study, we designed a novel, collaborative, communication-based puzzle game and explanatory dialog system. We created a public corpus from 117 conversations and post-surveys and used this to analyze what mental models users formed. Key takeaways include: Even when users were not engaged in the game, they perceived the AI-dialog partner as intelligent and likeable, implying they saw it as a partner separate from the game. This was further supported by users often overestimating the system’s abilities and projecting human-like attributes which led to miscommunications. We conclude that creating shared mental models between users and AI systems is important to achieving successful dialogs. We propose that our insights on mental models and miscommunication, the game, and our corpus provide useful tools for designing collaborative dialog systems.</abstract>
       <url hash="7a6fa03b">2021.conll-1.1</url>
@@ -81,7 +81,7 @@
       <author><first>Emanuele</first><last>Bugliarello</last></author>
       <author><first>Miryam</first><last>de Lhoneux</last></author>
       <author><first>Chen</first><last>Qiu</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>58–71</pages>
       <abstract>Creole languages such as Nigerian Pidgin English and Haitian Creole are under-resourced and largely ignored in the NLP literature. Creoles typically result from the fusion of a foreign language with multiple local languages, and what grammatical and lexical features are transferred to the creole is a complex process. While creoles are generally stable, the prominence of some features may be much stronger with certain demographics or in some linguistic situations. This paper makes several contributions: We collect existing corpora and release models for Haitian Creole, Nigerian Pidgin English, and Singaporean Colloquial English. We evaluate these models on intrinsic and extrinsic tasks. Motivated by the above literature, we compare standard language models with distributionally robust ones and find that, somewhat surprisingly, the standard language models are superior to the distributionally robust ones. We investigate whether this is an effect of over-parameterization or relative distributional stability, and find that the difference persists in the absence of over-parameterization, and that drift is limited, confirming the relative stability of creole languages.</abstract>
       <url hash="32c6b62b">2021.conll-1.5</url>
@@ -136,7 +136,7 @@
       <author><first>Daniel</first><last>Hershcovich</last></author>
       <author><first>Stella</first><last>Frank</last></author>
       <author><first>Ellie</first><last>Pavlick</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>109–132</pages>
       <abstract>Pretrained language models have been shown to encode relational information, such as the relations between entities or concepts in knowledge-bases — (Paris, Capital, France). However, simple relations of this type can often be recovered heuristically and the extent to which models implicitly reflect topological structure that is grounded in world, such as perceptual structure, is unknown. To explore this question, we conduct a thorough case study on color. Namely, we employ a dataset of monolexemic color terms and color chips represented in CIELAB, a color space with a perceptually meaningful distance metric. Using two methods of evaluating the structural alignment of colors in this space with text-derived color term representations, we find significant correspondence. Analyzing the differences in alignment across the color spectrum, we find that warmer colors are, on average, better aligned to the perceptual color space than cooler ones, suggesting an intriguing connection to findings from recent work on efficient communication in color naming. Further analysis suggests that differences in alignment are, in part, mediated by collocationality and differences in syntactic usage, posing questions as to the relationship between color perception and usage and context.</abstract>
       <url hash="b50390ec">2021.conll-1.9</url>
@@ -158,7 +158,7 @@
     </paper>
     <paper id="11">
       <title>Enriching Language Models with Visually-grounded Word Vectors and the <fixed-case>L</fixed-case>ancaster Sensorimotor Norms</title>
-      <author><first>Casey</first><last>Kennington</last></author>
+      <author id="casey-kennington"><first>Casey</first><last>Kennington</last></author>
       <pages>148–157</pages>
       <abstract>Language models are trained only on text despite the fact that humans learn their first language in a highly interactive and multimodal environment where the first set of learned words are largely concrete, denoting physical entities and embodied states. To enrich language models with some of this missing experience, we leverage two sources of information: (1) the Lancaster Sensorimotor norms, which provide ratings (means and standard deviations) for over 40,000 English words along several dimensions of embodiment, and which capture the extent to which something is experienced across 11 different sensory modalities, and (2) vectors from coefficients of binary classifiers trained on images for the BERT vocabulary. We pre-trained the ELECTRA model and fine-tuned the RoBERTa model with these two sources of information then evaluate using the established GLUE benchmark and the Visual Dialog benchmark. We find that enriching language models with the Lancaster norms and image vectors improves results in both tasks, with some implications for robust language models that capture holistic linguistic meaning in a language learning context.</abstract>
       <url hash="18495466">2021.conll-1.11</url>
@@ -170,7 +170,7 @@
       <title>Learning Zero-Shot Multifaceted Visually Grounded Word Embeddings via Multi-Task Training</title>
       <author><first>Hassan</first><last>Shahmohammadi</last></author>
       <author><first>Hendrik P. A.</first><last>Lensch</last></author>
-      <author><first>R. Harald</first><last>Baayen</last></author>
+      <author id="harald-baayen"><first>R. Harald</first><last>Baayen</last></author>
       <pages>158–170</pages>
       <abstract>Language grounding aims at linking the symbolic representation of language (e.g., words) into the rich perceptual knowledge of the outside world. The general approach is to embed both textual and visual information into a common space -the grounded space- confined by an explicit relationship. We argue that since concrete and abstract words are processed differently in the brain, such approaches sacrifice the abstract knowledge obtained from textual statistics in the process of acquiring perceptual information. The focus of this paper is to solve this issue by implicitly grounding the word embeddings. Rather than learning two mappings into a joint space, our approach integrates modalities by implicit alignment. This is achieved by learning a reversible mapping between the textual and the grounded space by means of multi-task training. Intrinsic and extrinsic evaluations show that our way of visual grounding is highly beneficial for both abstract and concrete words. Our embeddings are correlated with human judgments and outperform previous works using pretrained word embeddings on a wide range of benchmarks. Our grounded embeddings are publicly available here.</abstract>
       <url hash="6386e0f9">2021.conll-1.12</url>
@@ -241,7 +241,7 @@
     </paper>
     <paper id="18">
       <title>Agree to Disagree: Analysis of Inter-Annotator Disagreements in Human Evaluation of Machine Translation Output</title>
-      <author><first>Maja</first><last>Popović</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
       <pages>234–243</pages>
       <abstract>This work describes an analysis of inter-annotator disagreements in human evaluation of machine translation output. The errors in the analysed texts were marked by multiple annotators under guidance of different quality criteria: adequacy, comprehension, and an unspecified generic mixture of adequacy and fluency. Our results show that different criteria result in different disagreements, and indicate that a clear definition of quality criterion can improve the inter-annotator agreement. Furthermore, our results show that for certain linguistic phenomena which are not limited to one or two words (such as word ambiguity or gender) but span over several words or even entire phrases (such as negation or relative clause), disagreements do not necessarily represent “errors” or “noise” but are rather inherent to the evaluation process. %These disagreements are caused by differences in error perception and/or the fact that there is no single correct translation of a text so that multiple solutions are possible. On the other hand, for some other phenomena (such as omission or verb forms) agreement can be easily improved by providing more precise and detailed instructions to the evaluators.</abstract>
       <url hash="2b979c2a">2021.conll-1.18</url>
@@ -256,7 +256,7 @@
       <author><first>Yova</first><last>Kementchedjhieva</last></author>
       <author><first>Lukas</first><last>Nielsen</last></author>
       <author><first>Chen</first><last>Qiu</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>244–257</pages>
       <abstract>Negation is one of the most fundamental concepts in human cognition and language, and several natural language inference (NLI) probes have been designed to investigate pretrained language models’ ability to detect and reason with negation. However, the existing probing datasets are limited to English only, and do not enable controlled probing of performance in the absence or presence of negation. In response, we present a multilingual (English, Bulgarian, German, French and Chinese) benchmark collection of NLI examples that are grammatical and correctly labeled, as a result of manual inspection and reformulation. We use the benchmark to probe the negation-awareness of multilingual language models and find that models that correctly predict examples with negation cues, often fail to correctly predict their counter-examples without negation cues, even when the cues are irrelevant for semantic inference.</abstract>
       <url hash="b5dee3ad">2021.conll-1.19</url>
@@ -357,9 +357,9 @@
     <paper id="27">
       <title>Imposing Relation Structure in Language-Model Embeddings Using Contrastive Learning</title>
       <author><first>Christos</first><last>Theodoropoulos</last></author>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <author><first>Andrei Catalin</first><last>Coman</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>337–348</pages>
       <abstract>Though language model text embeddings have revolutionized NLP research, their ability to capture high-level semantic information, such as relations between entities in text, is limited. In this paper, we propose a novel contrastive learning framework that trains sentence embeddings to encode the relations in a graph structure. Given a sentence (unstructured text) and its graph, we use contrastive learning to impose relation-related structure on the token level representations of the sentence obtained with a CharacterBERT (El Boukkouri et al., 2020) model. The resulting relation-aware sentence embeddings achieve state-of-the-art results on the relation extraction task using only a simple KNN classifier, thereby demonstrating the success of the proposed method. Additional visualization by a tSNE analysis shows the effectiveness of the learned representation space compared to baselines. Furthermore, we show that we can learn a different space for named entity recognition, again using a contrastive learning objective, and demonstrate how to successfully combine both representation spaces in an entity-relation task.</abstract>
       <url hash="71988cb1">2021.conll-1.27</url>
@@ -375,7 +375,7 @@
       <author><first>Omar</first><last>Agha</last></author>
       <author><first>Soo-Hwan</first><last>Lee</last></author>
       <author><first>Zhuoye</first><last>Zhao</last></author>
-      <author><first>Samuel R.</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel R.</first><last>Bowman</last></author>
       <author><first>Tal</first><last>Linzen</last></author>
       <pages>349–366</pages>
       <abstract>Understanding language requires grasping not only the overtly stated content, but also making inferences about things that were left unsaid. These inferences include presuppositions, a phenomenon by which a listener learns about new information through reasoning about what a speaker takes as given. Presuppositions require complex understanding of the lexical and syntactic properties that trigger them as well as the broader conversational context. In this work, we introduce the Naturally-Occurring Presuppositions in English (NOPE) Corpus to investigate the context-sensitivity of 10 different types of presupposition triggers and to evaluate machine learning models’ ability to predict human inferences. We find that most of the triggers we investigate exhibit moderate variability. We further find that transformer-based models draw correct inferences in simple cases involving presuppositions, but they fail to capture the minority of exceptional cases in which human judgments reveal complex interactions between context and triggers.</abstract>
@@ -471,7 +471,7 @@
       <title>Does referent predictability affect the choice of referential form? A computational approach using masked coreference resolution</title>
       <author><first>Laura</first><last>Aina</last></author>
       <author><first>Xixian</first><last>Liao</last></author>
-      <author><first>Gemma</first><last>Boleda</last></author>
+      <author id="gemma-boleda"><first>Gemma</first><last>Boleda</last></author>
       <author><first>Matthijs</first><last>Westera</last></author>
       <pages>454–469</pages>
       <abstract>It is often posited that more predictable parts of a speaker’s meaning tend to be made less explicit, for instance using shorter, less informative words. Studying these dynamics in the domain of referring expressions has proven difficult, with existing studies, both psycholinguistic and corpus-based, providing contradictory results. We test the hypothesis that speakers produce less informative referring expressions (e.g., pronouns vs. full noun phrases) when the context is more informative about the referent, using novel computational estimates of referent predictability. We obtain these estimates training an existing coreference resolution system for English on a new task, masked coreference resolution, giving us a probability distribution over referents that is conditioned on the context but not the referring expression. The resulting system retains standard coreference resolution performance while yielding a better estimate of human-derived referent predictability than previous attempts. A statistical analysis of the relationship between model output and mention form supports the hypothesis that predictability affects the form of a mention, both its morphosyntactic type and its length.</abstract>
@@ -496,7 +496,7 @@
     <paper id="38">
       <title>Commonsense Knowledge in Word Associations and <fixed-case>C</fixed-case>oncept<fixed-case>N</fixed-case>et</title>
       <author><first>Chunhua</first><last>Liu</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <author><first>Lea</first><last>Frermann</last></author>
       <pages>481–495</pages>
       <abstract>Humans use countless basic, shared facts about the world to efficiently navigate in their environment. This commonsense knowledge is rarely communicated explicitly, however, understanding how commonsense knowledge is represented in different paradigms is important for (a) a deeper understanding of human cognition and (b) augmenting automatic reasoning systems. This paper presents an in-depth comparison of two large-scale resources of general knowledge: ConceptNet, an engineered relational database, and SWOW, a knowledge graph derived from crowd-sourced word associations. We examine the structure, overlap and differences between the two graphs, as well as the extent of situational commonsense knowledge present in the two resources. We finally show empirically that both resources improve downstream task performance on commonsense reasoning benchmarks over text-only baselines, suggesting that large-scale word association data, which have been obtained for several languages through crowd-sourcing, can be a valuable complement to curated knowledge graphs.</abstract>
@@ -556,7 +556,7 @@
       <author><first>Siddique</first><last>Latif</last></author>
       <author><first>Inyoung</first><last>Kim</last></author>
       <author><first>Ioan</first><last>Calapodescu</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <pages>544–551</pages>
       <abstract>While End-2-End Text-to-Speech (TTS) has made significant progresses over the past few years, these systems still lack intuitive user controls over prosody. For instance, generating speech with fine-grained prosody control (prosodic prominence, contextually appropriate emotions) is still an open challenge. In this paper, we investigate whether we can control prosody directly from the input text, in order to code information related to contrastive focus which emphasizes a specific word that is contrary to the presuppositions of the interlocutor. We build and share a specific dataset for this purpose and show that it allows to train a TTS system were this fine-grained prosodic feature can be correctly conveyed using control tokens. Our evaluation compares synthetic and natural utterances and shows that prosodic patterns of contrastive focus (variations of Fo, Intensity and Duration) can be learnt accurately. Such a milestone is important to allow, for example, smart speakers to be programmatically controlled in terms of output prosody.</abstract>
       <url hash="1ea87db4">2021.conll-1.42</url>
@@ -569,7 +569,7 @@
       <author><first>Hoyun</first><last>Song</last></author>
       <author><first>Soo Hyun</first><last>Ryu</last></author>
       <author><first>Huije</first><last>Lee</last></author>
-      <author><first>Jong</first><last>Park</last></author>
+      <author id="jong-c-park"><first>Jong</first><last>Park</last></author>
       <pages>552–561</pages>
       <abstract>As users in online communities suffer from severe side effects of abusive language, many researchers attempted to detect abusive texts from social media, presenting several datasets for such detection. However, none of them contain both comprehensive labels and contextual information, which are essential for thoroughly detecting all kinds of abusiveness from texts, since datasets with such fine-grained features demand a significant amount of annotations, leading to much increased complexity. In this paper, we propose a Comprehensive Abusiveness Detection Dataset (CADD), collected from the English Reddit posts, with multifaceted labels and contexts. Our dataset is annotated hierarchically for an efficient annotation through crowdsourcing on a large-scale. We also empirically explore the characteristics of our dataset and provide a detailed analysis for novel insights. The results of our experiments with strong pre-trained natural language understanding models on our dataset show that our dataset gives rise to meaningful performance, assuring its practicality for abusive language detection.</abstract>
       <url hash="beecf2a8">2021.conll-1.43</url>
@@ -630,8 +630,8 @@
     <paper id="48">
       <title>The Emergence of the Shape Bias Results from Communicative Efficiency</title>
       <author><first>Eva</first><last>Portelance</last></author>
-      <author><first>Michael C.</first><last>Frank</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="michael-c-frank"><first>Michael C.</first><last>Frank</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <author><first>Alessandro</first><last>Sordoni</last></author>
       <author><first>Romain</first><last>Laroche</last></author>
       <pages>607–623</pages>
@@ -657,7 +657,7 @@
     <paper id="50">
       <title>Analysing Human Strategies of Information Transmission as a Function of Discourse Context</title>
       <author><first>Mario</first><last>Giulianelli</last></author>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <pages>647–660</pages>
       <abstract>Speakers are thought to use rational information transmission strategies for efficient communication (Genzel and Charniak, 2002; Aylett and Turk, 2004; Jaeger and Levy, 2007). Previous work analysing these strategies in sentence production has failed to take into account how the information content of sentences varies as a function of the available discourse context. In this study, we estimate sentence information content within discourse context. We find that speakers transmit information at a stable rate—i.e., rationally—in English newspaper articles but that this rate decreases in spoken open domain and written task-oriented dialogues. We also observe that speakers’ choices are not oriented towards local uniformity of information, which is another hypothesised rational strategy. We suggest that a more faithful model of communication should explicitly include production costs and goal-oriented rewards.</abstract>
       <url hash="284d5e39">2021.conll-1.50</url>
@@ -683,7 +683,7 @@
       <author><first>Emma</first><last>O’Neill</last></author>
       <author><first>Joe</first><last>Kenny</last></author>
       <author><first>Anthony</first><last>Ventresque</last></author>
-      <author><first>Julie</first><last>Carson-Berndsen</last></author>
+      <author id="julie-carson-berndsen"><first>Julie</first><last>Carson-Berndsen</last></author>
       <pages>674–683</pages>
       <abstract>A child who is unfamiliar with the correct spelling of a word often employs a “sound it out” approach: breaking the word down into its constituent sounds and then choosing letters to represent the identified sounds. This often results in a misspelling that is orthographically very different to the intended target. Recently, efforts have been made to develop phonetic based spellcheckers to tackle the more deviant nature of children’s misspellings. However, little work has been done to investigate the potential of spelling correction tools that incorporate regional pronunciation variation. If a child must first identify the sounds that make up a word, it stands to reason their pronunciation would influence this process. We investigate this hypothesis along with the feasibility and potential benefits of adapting spelling correction tools to more specific language variants - particularly Irish Accented English. We use misspelling data from schoolchildren across Ireland to adapt an existing English phonetic-based spellchecker and demonstrate improvements in performance. These results not only prompt consideration of language varieties in the development of spellcheckers but also contribute to existing literature on the role of regional accent in the acquisition of writing proficiency.</abstract>
       <url hash="061c5269">2021.conll-1.52</url>
diff --git a/data/xml/2021.crac.xml b/data/xml/2021.crac.xml
index 3ef40370a8..040f9b9524 100644
--- a/data/xml/2021.crac.xml
+++ b/data/xml/2021.crac.xml
@@ -4,8 +4,8 @@
     <meta>
       <booktitle>Proceedings of the Fourth Workshop on Computational Models of Reference, Anaphora and Coreference</booktitle>
       <editor><first>Maciej</first><last>Ogrodniczuk</last></editor>
-      <editor><first>Sameer</first><last>Pradhan</last></editor>
-      <editor><first>Massimo</first><last>Poesio</last></editor>
+      <editor id="sameer-pradhan"><first>Sameer</first><last>Pradhan</last></editor>
+      <editor id="massimo-poesio"><first>Massimo</first><last>Poesio</last></editor>
       <editor><first>Yulia</first><last>Grishina</last></editor>
       <editor><first>Vincent</first><last>Ng</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -50,7 +50,7 @@
       <author><first>Jongin</first><last>Kim</last></author>
       <author><first>Nayoung</first><last>Choi</last></author>
       <author><first>Min</first><last>Song</last></author>
-      <author><first>Jinho D.</first><last>Choi</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
       <pages>24–35</pages>
       <abstract>This paper presents a new corpus and annotation guideline for a novel coreference resolution task on fictional texts, and analyzes its unique characteristics. FantasyCoref contains 211 stories of Grimms’ Fairy Tales and 3 other fantasy literature annotated in the omniscient writer’s point of view (OWV) to handle distinctive aspects in this genre. This task is more challenging than general coreference resolution in two ways. First, documents in our corpus are 2.5 times longer than the ones in OntoNotes, raising a new layer of difficulty in resolving long-distant referents. Second, annotation of literary styles and concepts raise several issues which are not sufficiently addressed in the existing annotation guidelines. Hence, considerations on such issues and the concept of OWV are necessary to achieve high inter-annotator agreement (IAA) in coreference resolution of fictional texts. We carefully conduct annotation tasks in four stages to ensure the quality of our annotation. As a result, a high IAA score of 87% is achieved using the standard coreference evaluation metric. Finally, state-of-the-art coreference resolution approaches are evaluated on our corpus. After training with our annotated dataset, there was a 2.59% and 3.06% improvement over the model trained on the OntoNotes dataset. Also, we observe that the portion of errors specific to fictional texts declines after the training.</abstract>
       <url hash="cca968d6">2021.crac-1.3</url>
@@ -101,8 +101,8 @@
       <author><first>Hieu</first><last>Lam</last></author>
       <author><first>Martin</first><last>Wu</last></author>
       <author><first>Ophélie</first><last>Lacroix</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>63–69</pages>
       <abstract>Automatic coreference resolution is understudied in Danish even though most of the Danish Dependency Treebank (Buch-Kromann, 2003) is annotated with coreference relations. This paper describes a conversion of its partial, yet well-documented, coreference relations into coreference clusters and the training and evaluation of coreference models on this data. To the best of our knowledge, these are the first publicly available, neural coreference models for Danish. We also present a new entity linking annotation on the dataset using WikiData identifiers, a named entity disambiguation (NED) dataset, and a larger automatically created NED dataset enabling wikily supervised NED models. The entity linking annotation is benchmarked using a state-of-the-art neural entity disambiguation model.</abstract>
       <url hash="c0ca6a17">2021.crac-1.7</url>
@@ -189,7 +189,7 @@
       <author><first>Xudong</first><last>Lin</last></author>
       <author><first>Manling</first><last>Li</last></author>
       <author><first>Heng</first><last>Ji</last></author>
-      <author><first>Shih-Fu</first><last>Chang</last></author>
+      <author id="shih-fu-chang"><first>Shih-Fu</first><last>Chang</last></author>
       <pages>132–140</pages>
       <abstract>Event coreference resolution is critical to understand events in the growing number of online news with multiple modalities including text, video, speech, etc. However, the events and entities depicting in different modalities may not be perfectly aligned and can be difficult to annotate, which makes the task especially challenging with little supervision available. To address the above issues, we propose a supervised model based on attention mechanism and an unsupervised model based on statistical machine translation, capable of learning the relative importance of modalities for event coreference resolution. Experiments on a video multimedia event dataset show that our multimodal models outperform text-only systems in event coreference resolution tasks. A careful analysis reveals that the performance gain of the multimodal model especially under unsupervised settings comes from better learning of visually salient events.</abstract>
       <url hash="817d81a4">2021.crac-1.14</url>
diff --git a/data/xml/2021.cstfrs.xml b/data/xml/2021.cstfrs.xml
index aeb7b02733..16fc14a2ec 100644
--- a/data/xml/2021.cstfrs.xml
+++ b/data/xml/2021.cstfrs.xml
@@ -67,7 +67,7 @@
     <paper id="7">
       <title>Analytical, Symbolic and First-Order Reasoning within Neural Architectures</title>
       <author><first>Samuel</first><last>Ryb</last></author>
-      <author><first>Marten</first><last>van Schijndel</last></author>
+      <author id="marten-van-schijndel"><first>Marten</first><last>van Schijndel</last></author>
       <pages>61–72</pages>
       <url hash="add01631">2021.cstfrs-1.7</url>
       <bibkey>ryb-van-schijndel-2021-analytical</bibkey>
diff --git a/data/xml/2021.dash.xml b/data/xml/2021.dash.xml
index ade9dfc9b1..463772a022 100644
--- a/data/xml/2021.dash.xml
+++ b/data/xml/2021.dash.xml
@@ -122,7 +122,7 @@
     <paper id="10">
       <title>Data Cleaning Tools for Token Classification Tasks</title>
       <author><first>Karthik</first><last>Muthuraman</last></author>
-      <author><first>Frederick</first><last>Reiss</last></author>
+      <author id="frederick-reiss"><first>Frederick</first><last>Reiss</last></author>
       <author><first>Hong</first><last>Xu</last></author>
       <author><first>Bryan</first><last>Cutler</last></author>
       <author><first>Zachary</first><last>Eichenberger</last></author>
@@ -178,7 +178,7 @@
       <author><first>Ajay</first><last>Patel</last></author>
       <author><first>Akanksha</first><last>Malhotra</last></author>
       <author><first>Reno</first><last>Kriz</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Chris</first><last>Callison-Burch</last></author>
       <pages>86–101</pages>
       <abstract>Acquiring training data for natural language processing systems can be expensive and time-consuming. Given a few training examples crafted by experts, large corpora can be mined for thousands of semantically similar examples that provide useful variability to improve model generalization. We present TopGuNN, a fast contextualized k-NN retrieval system that can efficiently index and search over contextual embeddings generated from large corpora. TopGuNN is demonstrated for a training data augmentation use case over the Gigaword corpus. Using approximate k-NN and an efficient architecture, TopGuNN performs queries over an embedding space of 4.63TB (approximately 1.5B embeddings) in less than a day.</abstract>
@@ -192,7 +192,7 @@
       <author><first>Esha</first><last>Singh</last></author>
       <author><first>Martin</first><last>Michalowski</last></author>
       <author><first>Maria</first><last>Gini</last></author>
-      <author><first>Serguei</first><last>Pakhomov</last></author>
+      <author id="serguei-pakhomov"><first>Serguei</first><last>Pakhomov</last></author>
       <pages>102–104</pages>
       <abstract>We present the Everyday Living Artificial Intelligence (AI) Hub, a novel proof-of-concept framework for enhancing human health and wellbeing via a combination of tailored wear-able and Conversational Agent (CA) solutions for non-invasive monitoring of physiological signals, assessment of behaviors through unobtrusive wearable devices, and the provision of personalized interventions to reduce stress and anxiety. We utilize recent advancements and industry standards in the Internet of Things (IoT)and AI technologies to develop this proof-of-concept framework.</abstract>
       <url hash="a37d2866">2021.dash-1.15</url>
diff --git a/data/xml/2021.deelio.xml b/data/xml/2021.deelio.xml
index fb6324e0c7..36239b7b0e 100644
--- a/data/xml/2021.deelio.xml
+++ b/data/xml/2021.deelio.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2021-05-24" type="proceedings">
     <meta>
       <booktitle>Proceedings of Deep Learning Inside Out (DeeLIO): The 2nd Workshop on Knowledge Extraction and Integration for Deep Learning Architectures</booktitle>
-      <editor><first>Eneko</first><last>Agirre</last></editor>
+      <editor id="eneko-agirre"><first>Eneko</first><last>Agirre</last></editor>
       <editor><first>Marianna</first><last>Apidianaki</last></editor>
       <editor><first>Ivan</first><last>Vulić</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -47,8 +47,8 @@
       <author><first>Vidhisha</first><last>Balachandran</last></author>
       <author><first>Bhuwan</first><last>Dhingra</last></author>
       <author><first>Haitian</first><last>Sun</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
-      <author><first>William</first><last>Cohen</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
+      <author id="william-cohen"><first>William</first><last>Cohen</last></author>
       <pages>25–30</pages>
       <abstract>Existing work shows the benefits of integrating KBs with textual evidence for QA only on questions that are answerable by KBs alone (Sun et al., 2019). In contrast, real world QA systems often have to deal with questions that might not be directly answerable by KBs. Here, we investigate the effect of integrating background knowledge from KBs for the Natural Questions (NQ) task. We create a subset of the NQ data, Factual Questions (FQ), where the questions have evidence in the KB in the form of paths that link question entities to answer entities but still must be answered using text, to facilitate further research into KB integration methods. We propose and analyze a simple, model-agnostic approach for incorporating KB paths into text-based QA systems and establish a strong upper bound on FQ for our method using an oracle retriever. We show that several variants of Personalized PageRank based fact retrievers lead to a low recall of answer entities and consequently fail to improve QA performance. Our results suggest that fact retrieval is a bottleneck for integrating KBs into real world QA datasets</abstract>
       <url hash="249681a0">2021.deelio-1.3</url>
@@ -102,7 +102,7 @@
     <paper id="8">
       <title>Enhancing Multiple-Choice Question Answering with Causal Knowledge</title>
       <author><first>Dhairya</first><last>Dalal</last></author>
-      <author><first>Mihael</first><last>Arcan</last></author>
+      <author id="mihael-arcan"><first>Mihael</first><last>Arcan</last></author>
       <author><first>Paul</first><last>Buitelaar</last></author>
       <pages>70–80</pages>
       <abstract>The task of causal question answering aims to reason about causes and effects over a provided real or hypothetical premise. Recent approaches have converged on using transformer-based language models to solve question answering tasks. However, pretrained language models often struggle when external knowledge is not present in the premise or when additional context is required to answer the question. To the best of our knowledge, no prior work has explored the efficacy of augmenting pretrained language models with external causal knowledge for multiple-choice causal question answering. In this paper, we present novel strategies for the representation of causal knowledge. Our empirical results demonstrate the efficacy of augmenting pretrained models with external causal knowledge. We show improved performance on the COPA (Choice of Plausible Alternatives) and WIQA (What If Reasoning Over Procedural Text) benchmark tasks. On the WIQA benchmark, our approach is competitive with the state-of-the-art and exceeds it within the evaluation subcategories of In-Paragraph and Out-of-Paragraph perturbations.</abstract>
@@ -127,7 +127,7 @@
       <author><first>Seojin</first><last>Bang</last></author>
       <author><first>Wen</first><last>Xiao</last></author>
       <author><first>Giuseppe</first><last>Carenini</last></author>
-      <author><first>Raymond</first><last>Ng</last></author>
+      <author id="raymond-ng"><first>Raymond</first><last>Ng</last></author>
       <author><first>Young ji</first><last>Lee</last></author>
       <pages>96–107</pages>
       <abstract>Text classification has wide-ranging applications in various domains. While neural network approaches have drastically advanced performance in text classification, they tend to be powered by a large amount of training data, and interpretability is often an issue. As a step towards better accuracy and interpretability especially on small data, in this paper we present a new knowledge-infused attention mechanism, called KW-ATTN (KnoWledge-infused ATTentioN) to incorporate high-level concepts from external knowledge bases into Neural Network models. We show that KW-ATTN outperforms baseline models using only words as well as other approaches using concepts by classification accuracy, which indicates that high-level concepts help model prediction. Furthermore, crowdsourced human evaluation suggests that additional concept information helps interpretability of the model.</abstract>
@@ -174,7 +174,7 @@
     <paper id="14">
       <title>Predicting Numerals in Natural Language Text Using a Language Model Considering the Quantitative Aspects of Numerals</title>
       <author><first>Taku</first><last>Sakamoto</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>140–150</pages>
       <abstract>Numerical common sense (NCS) is necessary to fully understand natural language text that includes numerals. NCS is knowledge about the numerical features of objects in text, such as size, weight, or color. Existing neural language models treat numerals in a text as string tokens in the same way as other words. Therefore, they cannot reflect the quantitative aspects of numerals in the training process, making it difficult to learn NCS. In this paper, we measure the NCS acquired by existing neural language models using a masked numeral prediction task as an evaluation task. In this task, we use two evaluation metrics to evaluate the language models in terms of the symbolic and quantitative aspects of the numerals, respectively. We also propose methods to reflect not only the symbolic aspect but also the quantitative aspect of numerals in the training of language models, using a loss function that depends on the magnitudes of the numerals and a regression model for the masked numeral prediction task. Finally, we quantitatively evaluate our proposed approaches on four datasets with different properties using the two metrics. Compared with methods that use existing language models, the proposed methods reduce numerical absolute errors, although exact match accuracy was reduced. This result confirms that the proposed methods, which use the magnitudes of the numerals for model training, are an effective way for models to capture NCS.</abstract>
       <url hash="88527634">2021.deelio-1.14</url>
diff --git a/data/xml/2021.depling.xml b/data/xml/2021.depling.xml
index a2795a7307..9add553148 100644
--- a/data/xml/2021.depling.xml
+++ b/data/xml/2021.depling.xml
@@ -28,7 +28,7 @@
     </paper>
     <paper id="2">
       <title>On auxiliary verb in <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies: untangling the issue and proposing a systematized annotation strategy</title>
-      <author><first>Magali</first><last>Duran</last></author>
+      <author id="magali-sanches-duran"><first>Magali</first><last>Duran</last></author>
       <author><first>Adriana</first><last>Pagano</last></author>
       <author><first>Amanda</first><last>Rassi</last></author>
       <author><first>Thiago</first><last>Pardo</last></author>
@@ -95,8 +95,8 @@
       <author><first>Anna</first><last>Nedoluzhko</last></author>
       <author><first>Michal</first><last>Novák</last></author>
       <author><first>Martin</first><last>Popel</last></author>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <pages>101–114</pages>
       <url hash="ea6fb0cc">2021.depling-1.10</url>
       <bibkey>nedoluzhko-etal-2021-one</bibkey>
diff --git a/data/xml/2021.dialdoc.xml b/data/xml/2021.dialdoc.xml
index e7c5dd3354..3920dab6e4 100644
--- a/data/xml/2021.dialdoc.xml
+++ b/data/xml/2021.dialdoc.xml
@@ -86,7 +86,7 @@
       <title><fixed-case>CA</fixed-case>i<fixed-case>RE</fixed-case> in <fixed-case>D</fixed-case>ial<fixed-case>D</fixed-case>oc21: Data Augmentation for Information Seeking Dialogue System</title>
       <author><first>Yan</first><last>Xu</last></author>
       <author><first>Etsuko</first><last>Ishii</last></author>
-      <author><first>Genta Indra</first><last>Winata</last></author>
+      <author id="genta-indra-winata"><first>Genta Indra</first><last>Winata</last></author>
       <author><first>Zhaojiang</first><last>Lin</last></author>
       <author><first>Andrea</first><last>Madotto</last></author>
       <author><first>Zihan</first><last>Liu</last></author>
@@ -103,7 +103,7 @@
       <author><first>Jiapeng</first><last>Li</last></author>
       <author><first>Mingda</first><last>Li</last></author>
       <author><first>Longxuan</first><last>Ma</last></author>
-      <author><first>Wei-Nan</first><last>Zhang</last></author>
+      <author id="weinan-zhang"><first>Wei-Nan</first><last>Zhang</last></author>
       <author><first>Ting</first><last>Liu</last></author>
       <pages>52–56</pages>
       <abstract>We participate in the DialDoc Shared Task sub-task 1 (Knowledge Identification). The task requires identifying the grounding knowledge in form of a document span for the next dialogue turn. We employ two well-known pre-trained language models (RoBERTa and ELECTRA) to identify candidate document spans and propose a metric-based ensemble method for span selection. Our methods include data augmentation, model pre-training/fine-tuning, post-processing, and ensemble. On the submission page, we rank 2nd based on the average of normalized F1 and EM scores used for the final evaluation. Specifically, we rank 2nd on EM and 3rd on F1.</abstract>
@@ -116,7 +116,7 @@
       <author><first>Nico</first><last>Daheim</last></author>
       <author><first>David</first><last>Thulke</last></author>
       <author><first>Christian</first><last>Dugast</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>57–62</pages>
       <abstract>This paper summarizes our entries to both subtasks of the first DialDoc shared task which focuses on the agent response prediction task in goal-oriented document-grounded dialogs. The task is split into two subtasks: predicting a span in a document that grounds an agent turn and generating an agent response based on a dialog and grounding document. In the first subtask, we restrict the set of valid spans to the ones defined in the dataset, use a biaffine classifier to model spans, and finally use an ensemble of different models. For the second sub-task, we use a cascaded model which grounds the response prediction on the predicted span instead of the full document. With these approaches, we obtain significant improvements in both subtasks compared to the baseline.</abstract>
       <url hash="846c5cb9">2021.dialdoc-1.8</url>
@@ -192,7 +192,7 @@
       <author><first>Yeju</first><last>Zhou</last></author>
       <author><first>Kaixin</first><last>Ma</last></author>
       <author><first>Jonathan</first><last>Francis</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <author><first>Alessandro</first><last>Oltramari</last></author>
       <pages>109–112</pages>
       <abstract>In this paper, we describe our systems for solving the two Doc2Dial shared task: knowledge identification and response generation. We proposed several pre-processing and post-processing methods, and we experimented with data augmentation by pre-training the models on other relevant datasets. Our best model for knowledge identification outperformed the baseline by 10.5+ f1-score on the test-dev split, and our best model for response generation outperformed the baseline by 11+ Sacrebleu score on the test-dev split.</abstract>
@@ -215,7 +215,7 @@
       <title>Can <fixed-case>I</fixed-case> Be of Further Assistance? Using Unstructured Knowledge Access to Improve Task-oriented Conversational Modeling</title>
       <author><first>Di</first><last>Jin</last></author>
       <author><first>Seokhwan</first><last>Kim</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></author>
       <pages>119–127</pages>
       <abstract>Most prior work on task-oriented dialogue systems are restricted to limited coverage of domain APIs. However, users oftentimes have requests that are out of the scope of these APIs. This work focuses on responding to these beyond-API-coverage user turns by incorporating external, unstructured knowledge sources. Our approach works in a pipelined manner with knowledge-seeking turn detection, knowledge selection, and response generation in sequence. We introduce novel data augmentation methods for the first two steps and demonstrate that the use of information extracted from dialogue context improves the knowledge selection and end-to-end performances. Through experiments, we achieve state-of-the-art performance for both automatic and human evaluation metrics on the DSTC9 Track 1 benchmark dataset, validating the effectiveness of our contributions.</abstract>
       <url hash="840a6ccf">2021.dialdoc-1.16</url>
diff --git a/data/xml/2021.discann.xml b/data/xml/2021.discann.xml
index eddaa5afba..c52c06be1a 100644
--- a/data/xml/2021.discann.xml
+++ b/data/xml/2021.discann.xml
@@ -58,7 +58,7 @@
       <author><first>Kordula De</first><last>Kuthy</last></author>
       <author><first>Madeeswaran</first><last>Kannan</last></author>
       <author><first>Haemanth Santhi</first><last>Ponnusamy</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <pages>31–40</pages>
       <url hash="febd7fee">2021.discann-1.6</url>
       <bibkey>kuthy-etal-2021-advancing</bibkey>
diff --git a/data/xml/2021.disrpt.xml b/data/xml/2021.disrpt.xml
index c5de4197ac..e774a66957 100644
--- a/data/xml/2021.disrpt.xml
+++ b/data/xml/2021.disrpt.xml
@@ -4,8 +4,8 @@
     <meta>
       <booktitle>Proceedings of the 2nd Shared Task on Discourse Relation Parsing and Treebanking (DISRPT 2021)</booktitle>
       <editor><first>Amir</first><last>Zeldes</last></editor>
-      <editor><first>Yang Janet</first><last>Liu</last></editor>
-      <editor><first>Mikel</first><last>Iruskieta</last></editor>
+      <editor id="yang-janet-liu"><first>Yang Janet</first><last>Liu</last></editor>
+      <editor id="mikel-iruskieta"><first>Mikel</first><last>Iruskieta</last></editor>
       <editor><first>Philippe</first><last>Muller</last></editor>
       <editor><first>Chloé</first><last>Braud</last></editor>
       <editor><first>Sonia</first><last>Badene</last></editor>
@@ -36,7 +36,7 @@
     <paper id="2">
       <title>A Transformer Based Approach towards Identification of Discourse Unit Segments and Connectives</title>
       <author><first>Sahil</first><last>Bakshi</last></author>
-      <author><first>Dipti</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti</first><last>Sharma</last></author>
       <pages>13–21</pages>
       <abstract>Discourse parsing, which involves understanding the structure, information flow, and modeling the coherence of a given text, is an important task in natural language processing. It forms the basis of several natural language processing tasks such as question-answering, text summarization, and sentiment analysis. Discourse unit segmentation is one of the fundamental tasks in discourse parsing and refers to identifying the elementary units of text that combine to form a coherent text. In this paper, we present a transformer based approach towards the automated identification of discourse unit segments and connectives. Early approaches towards segmentation relied on rule-based systems using POS tags and other syntactic information to identify discourse segments. Recently, transformer based neural systems have shown promising results in this domain. Our system, SegFormers, employs this transformer based approach to perform multilingual discourse segmentation and connective identification across 16 datasets encompassing 11 languages and 3 different annotation frameworks. We evaluate the system based on F1 scores for both tasks, with the best system reporting the highest F1 score of 97.02% for the treebanked English RST-DT dataset.</abstract>
       <url hash="6c26e3ac">2021.disrpt-1.2</url>
diff --git a/data/xml/2021.dravidianlangtech.xml b/data/xml/2021.dravidianlangtech.xml
index 7499fbbaf8..ea8dba1a5d 100644
--- a/data/xml/2021.dravidianlangtech.xml
+++ b/data/xml/2021.dravidianlangtech.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the First Workshop on Speech and Language Technologies for Dravidian Languages</booktitle>
       <editor><first>Bharathi Raja</first><last>Chakravarthi</last></editor>
       <editor><first>Ruba</first><last>Priyadharshini</last></editor>
-      <editor><first>Anand</first><last>Kumar M</last></editor>
+      <editor id="anand-kumar-m"><first>Anand</first><last>Kumar M</last></editor>
       <editor><first>Parameswari</first><last>Krishnamurthy</last></editor>
       <editor><first>Elizabeth</first><last>Sherly</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -50,9 +50,9 @@
     </paper>
     <paper id="4">
       <title>Sentiment Classification of Code-Mixed Tweets using Bi-Directional <fixed-case>RNN</fixed-case> and Language Tags</title>
-      <author><first>Sainik</first><last>Mahata</last></author>
+      <author id="sainik-mahata"><first>Sainik</first><last>Mahata</last></author>
       <author><first>Dipankar</first><last>Das</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>28–35</pages>
       <abstract>Sentiment analysis tools and models have been developed extensively throughout the years, for European languages. In contrast, similar tools for Indian Languages are scarce. This is because, state-of-the-art pre-processing tools like POS tagger, shallow parsers, etc., are not readily available for Indian languages. Although, such working tools for Indian languages, like Hindi and Bengali, that are spoken by the majority of the population, are available, finding the same for less spoken languages like, Tamil, Telugu, and Malayalam, is difficult. Moreover, due to the advent of social media, the multi-lingual population of India, who are comfortable with both English ad their regional language, prefer to communicate by mixing both languages. This gives rise to massive code-mixed content and automatically annotating them with their respective sentiment labels becomes a challenging task. In this work, we take up a similar challenge of developing a sentiment analysis model that can work with English-Tamil code-mixed data. The proposed work tries to solve this by using bi-directional LSTMs along with language tagging. Other traditional methods, based on classical machine learning algorithms have also been discussed in the literature, and they also act as the baseline systems to which we will compare our Neural Network based model. The performance of the developed algorithm, based on Neural Network architecture, garnered precision, recall, and F1 scores of 0.59, 0.66, and 0.58 respectively.</abstract>
       <url hash="c9f9f121">2021.dravidianlangtech-1.4</url>
@@ -104,7 +104,7 @@
       <title>Task-Specific Pre-Training and Cross Lingual Transfer for Sentiment Analysis in <fixed-case>D</fixed-case>ravidian Code-Switched Languages</title>
       <author><first>Akshat</first><last>Gupta</last></author>
       <author><first>Sai Krishna</first><last>Rallabandi</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <pages>73–79</pages>
       <abstract>Sentiment analysis in Code-Mixed languages has garnered a lot of attention in recent years. It is an important task for social media monitoring and has many applications, as a large chunk of social media data is Code-Mixed. In this paper, we work on the problem of sentiment analysis for Dravidian Code-Switched languages - Tamil-Engish and Malayalam-English, using three different BERT based models. We leverage task-specific pre-training and cross-lingual transfer to improve on previously reported results, with significant improvement for the Tamil-Engish dataset. We also present a multilingual sentiment classification model that has competitive performance on both Tamil-English and Malayalam-English datasets.</abstract>
       <url hash="00203f7c">2021.dravidianlangtech-1.9</url>
@@ -166,7 +166,7 @@
       <author><first>Ruba</first><last>Priyadharshini</last></author>
       <author><first>Shubhanker</first><last>Banerjee</last></author>
       <author><first>Richard</first><last>Saldanha</last></author>
-      <author><first>John P.</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></author>
       <author><first>Anand Kumar</first><last>M</last></author>
       <author><first>Parameswari</first><last>Krishnamurthy</last></author>
       <author><first>Melvin</first><last>Johnson</last></author>
@@ -194,7 +194,7 @@
       <author><first>Prasanna Kumar</first><last>Kumaresan</last></author>
       <author><first>Rahul</first><last>Ponnusamy</last></author>
       <author><first>Hariharan</first><last>R L</last></author>
-      <author><first>John P.</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></author>
       <author><first>Elizabeth</first><last>Sherly</last></author>
       <pages>133–145</pages>
       <abstract>Detecting offensive language in social media in local languages is critical for moderating user-generated content. Thus, the field of offensive language identification in under-resourced Tamil, Malayalam and Kannada languages are essential. As the user-generated content is more code-mixed and not well studied for under-resourced languages, it is imperative to create resources and conduct benchmarking studies to encourage research in under-resourced Dravidian languages. We created a shared task on offensive language detection in Dravidian languages. We summarize here the dataset for this challenge which are openly available at <url>https://competitions.codalab.org/competitions/27654</url>, and present an overview of the methods and the results of the competing systems.</abstract>
@@ -502,7 +502,7 @@
       <title><fixed-case>JUNLP</fixed-case>@<fixed-case>D</fixed-case>ravidian<fixed-case>L</fixed-case>ang<fixed-case>T</fixed-case>ech-<fixed-case>EACL</fixed-case>2021: Offensive Language Identification in <fixed-case>D</fixed-case>ravidian Langauges</title>
       <author><first>Avishek</first><last>Garain</last></author>
       <author><first>Atanu</first><last>Mandal</last></author>
-      <author><first>Sudip Kumar</first><last>Naskar</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last></author>
       <pages>319–322</pages>
       <abstract>Offensive language identification has been an active area of research in natural language processing. With the emergence of multiple social media platforms offensive language identification has emerged as a need of the hour. Traditional offensive language identification models fail to deliver acceptable results as social media contents are largely in multilingual and are code-mixed in nature. This paper tries to resolve this problem by using IndicBERT and BERT architectures, to facilitate identification of offensive languages for Kannada-English, Malayalam-English, and Tamil-English code-mixed language pairs extracted from social media. The presented approach when evaluated on the test corpus provided precision, recall, and F1 score for language pair Kannada-English as 0.62, 0.71, and 0.66, respectively, for language pair Malayalam-English as 0.77, 0.43, and 0.53, respectively, and for Tamil-English as 0.71, 0.74, and 0.72, respectively.</abstract>
       <url hash="ec811285">2021.dravidianlangtech-1.46</url>
diff --git a/data/xml/2021.eacl.xml b/data/xml/2021.eacl.xml
index c48bdfa284..8f1dfe186c 100644
--- a/data/xml/2021.eacl.xml
+++ b/data/xml/2021.eacl.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume</booktitle>
       <editor><first>Paola</first><last>Merlo</last></editor>
-      <editor><first>Jorg</first><last>Tiedemann</last></editor>
+      <editor id="jorg-tiedemann"><first>Jorg</first><last>Tiedemann</last></editor>
       <editor><first>Reut</first><last>Tsarfaty</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Online</address>
@@ -35,7 +35,7 @@
       <author><first>Chenyan</first><last>Xiong</last></author>
       <author><first>Jianfeng</first><last>Gao</last></author>
       <author><first>Mengdi</first><last>Wang</last></author>
-      <author><first>Bill</first><last>Dolan</last></author>
+      <author id="william-b-dolan"><first>Bill</first><last>Dolan</last></author>
       <pages>12–30</pages>
       <abstract>Multi-document question generation focuses on generating a question that covers the common aspect of multiple documents. Such a model is useful in generating clarifying options. However, a naive model trained only using the targeted (‘positive’) document set may generate too generic questions that cover a larger scope than delineated by the document set. To address this challenge, we introduce the contrastive learning strategy where given ‘positive’ and ‘negative’ sets of documents, we generate a question that is closely related to the ‘positive’ set but is far away from the ‘negative’ set. This setting allows generated questions to be more specific and related to the target document set. To generate such specific questions, we propose Multi-Source Coordinated Question Generator (MSCQG), a novel framework that includes a supervised learning (SL) stage and a reinforcement learning (RL) stage. In the SL stage, a single-document question generator is trained. In the RL stage, a coordinator model is trained to find optimal attention weights to align multiple single-document generators, by optimizing a reward designed to promote specificity of generated questions. We also develop an effective auxiliary objective, named Set-induced Contrastive Regularization (SCR) that improves the coordinator’s contrastive learning during the RL stage. We show that our model significantly outperforms several strong baselines, as measured by automatic metrics and human evaluation. The source repository is publicly available at ‘www.github.com/woonsangcho/contrast_qgen’.</abstract>
       <url hash="2db108d5">2021.eacl-main.2</url>
@@ -59,8 +59,8 @@
       <author><first>Aili</first><last>Shen</last></author>
       <author><first>Hiyori</first><last>Yoshikawa</last></author>
       <author><first>Tomoya</first><last>Iwakura</last></author>
-      <author><first>Daniel</first><last>Beck</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="daniel-beck"><first>Daniel</first><last>Beck</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>42–48</pages>
       <abstract>Images are core components of multi-modal learning in natural language processing (NLP), and results have varied substantially as to whether images improve NLP tasks or not. One confounding effect has been that previous NLP research has generally focused on sophisticated tasks (in varying settings), generally applied to English only. We focus on text classification, in the context of assigning named entity classes to a given Wikipedia page, where images generally complement the text and the Wikipedia page can be in one of a number of different languages. Our experiments across a range of languages show that images complement NLP models (including BERT) trained without external pre-training, but when combined with BERT models pre-trained on large-scale external data, images contribute nothing.</abstract>
       <url hash="14aeec49">2021.eacl-main.4</url>
@@ -70,7 +70,7 @@
     <paper id="5">
       <title>If you’ve got it, flaunt it: Making the most of fine-grained sentiment annotations</title>
       <author><first>Jeremy</first><last>Barnes</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <author><first>Erik</first><last>Velldal</last></author>
       <pages>49–62</pages>
       <abstract>Fine-grained sentiment analysis attempts to extract sentiment holders, targets and polar expressions and resolve the relationship between them, but progress has been hampered by the difficulty of annotation. Targeted sentiment analysis, on the other hand, is a more narrow task, focusing on extracting sentiment targets and classifying their polarity. In this paper, we explore whether incorporating holder and expression information can improve target extraction and classification and perform experiments on eight English datasets. We conclude that jointly predicting target and polarity BIO labels improves target extraction, and that augmenting the input text with gold expressions generally improves targeted polarity classification. This highlights the potential importance of annotating expressions for fine-grained sentiment datasets. At the same time, our results show that performance of current models for predicting polar expressions is poor, hampering the benefit of this information in practice.</abstract>
@@ -97,7 +97,7 @@
       <author><first>Meredith</first><last>Riggs</last></author>
       <author><first>Xinru</first><last>Yan</last></author>
       <author><first>Haogang</first><last>Bao</last></author>
-      <author><first>Carolyn</first><last>Rose</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rose</last></author>
       <pages>78–90</pages>
       <abstract>Modelling persuasion strategies as predictors of task outcome has several real-world applications and has received considerable attention from the computational linguistics community. However, previous research has failed to account for the resisting strategies employed by an individual to foil such persuasion attempts. Grounded in prior literature in cognitive and social psychology, we propose a generalised framework for identifying resisting strategies in persuasive conversations. We instantiate our framework on two distinct datasets comprising persuasion and negotiation conversations. We also leverage a hierarchical sequence-labelling neural architecture to infer the aforementioned resisting strategies automatically. Our experiments reveal the asymmetry of power roles in non-collaborative goal-directed conversations and the benefits accrued from incorporating resisting strategies on the final conversation outcome. We also investigate the role of different resisting strategies on the conversation outcome and glean insights that corroborate with past findings. We also make the code and the dataset of this work publicly available at <url>https://github.com/americast/resper</url>.</abstract>
       <url hash="cc4102a6">2021.eacl-main.7</url>
@@ -185,7 +185,7 @@
       <title>Unification-based Reconstruction of Multi-hop Explanations for Science Questions</title>
       <author><first>Marco</first><last>Valentino</last></author>
       <author><first>Mokanarangan</first><last>Thayaparan</last></author>
-      <author><first>André</first><last>Freitas</last></author>
+      <author id="andre-freitas"><first>André</first><last>Freitas</last></author>
       <pages>200–211</pages>
       <abstract>This paper presents a novel framework for reconstructing multi-hop explanations in science Question Answering (QA). While existing approaches for multi-hop reasoning build explanations considering each question in isolation, we propose a method to leverage explanatory patterns emerging in a corpus of scientific explanations. Specifically, the framework ranks a set of atomic facts by integrating lexical relevance with the notion of unification power, estimated analysing explanations for similar questions in the corpus. An extensive evaluation is performed on the Worldtree corpus, integrating k-NN clustering and Information Retrieval (IR) techniques. We present the following conclusions: (1) The proposed method achieves results competitive with Transformers, yet being orders of magnitude faster, a feature that makes it scalable to large explanatory corpora (2) The unification-based mechanism has a key role in reducing semantic drift, contributing to the reconstruction of many hops explanations (6 or more facts) and the ranking of complex inference facts (+12.0 Mean Average Precision) (3) Crucially, the constructed explanations can support downstream QA models, improving the accuracy of BERT by up to 10% overall.</abstract>
       <url hash="52b4ce1d">2021.eacl-main.15</url>
@@ -243,7 +243,7 @@
     <paper id="20">
       <title>Exploiting Cloze-Questions for Few-Shot Text Classification and Natural Language Inference</title>
       <author><first>Timo</first><last>Schick</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>255–269</pages>
       <abstract>Some NLP tasks can be solved in a fully unsupervised fashion by providing a pretrained language model with “task descriptions” in natural language (e.g., Radford et al., 2019). While this approach underperforms its supervised counterpart, we show in this work that the two ideas can be combined: We introduce Pattern-Exploiting Training (PET), a semi-supervised training procedure that reformulates input examples as cloze-style phrases to help language models understand a given task. These phrases are then used to assign soft labels to a large set of unlabeled examples. Finally, standard supervised training is performed on the resulting training set. For several tasks and languages, PET outperforms supervised training and strong semi-supervised approaches in low-resource settings by a large margin.</abstract>
       <url hash="c7fa4cfd">2021.eacl-main.20</url>
@@ -267,7 +267,7 @@
       <title><fixed-case>AREDSUM</fixed-case>: Adaptive Redundancy-Aware Iterative Sentence Ranking for Extractive Document Summarization</title>
       <author><first>Keping</first><last>Bi</last></author>
       <author><first>Rahul</first><last>Jha</last></author>
-      <author><first>Bruce</first><last>Croft</last></author>
+      <author id="w-bruce-croft"><first>Bruce</first><last>Croft</last></author>
       <author><first>Asli</first><last>Celikyilmaz</last></author>
       <pages>281–291</pages>
       <abstract>Redundancy-aware extractive summarization systems score the redundancy of the sentences to be included in a summary either jointly with their salience information or separately as an additional sentence scoring step. Previous work shows the efficacy of jointly scoring and selecting sentences with neural sequence generation models. It is, however, not well-understood if the gain is due to better encoding techniques or better redundancy reduction approaches. Similarly, the contribution of salience versus diversity components on the created summary is not studied well. Building on the state-of-the-art encoding methods for summarization, we present two adaptive learning models: AREDSUM-SEQ that jointly considers salience and novelty during sentence selection; and a two-step AREDSUM-CTX that scores salience first, then learns to balance salience and redundancy, enabling the measurement of the impact of each aspect. Empirical results on CNN/DailyMail and NYT50 datasets show that by modeling diversity explicitly in a separate step, AREDSUM-CTX achieves significantly better performance than AREDSUM-SEQ as well as state-of-the-art extractive summarization baselines.</abstract>
@@ -281,7 +281,7 @@
       <author><first>Andreas</first><last>Weise</last></author>
       <author><first>Vered</first><last>Silber-Varod</last></author>
       <author><first>Anat</first><last>Lerner</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
       <author><first>Rivka</first><last>Levitan</last></author>
       <pages>292–299</pages>
       <abstract>It has been well-documented for several languages that human interlocutors tend to adapt their linguistic productions to become more similar to each other. This behavior, known as entrainment, affects lexical choice as well, both with regard to specific words, such as referring expressions, and overall style. We offer what we believe to be the first investigation of such lexical entrainment in Hebrew. Using two existing measures, we analyze Hebrew speakers interacting in a Map Task, a popular experimental setup, and find rich evidence of lexical entrainment. Analyzing speaker pairs by the combination of their genders as well as speakers by their individual gender, we find no clear pattern of differences. We do, however, find that speakers in a position of less power entrain more than those with greater power, which matches theoretical accounts. Overall, our results mostly accord with those for American English, with a lack of entrainment on hedge words being the main difference.</abstract>
@@ -353,7 +353,7 @@
     </paper>
     <paper id="29">
       <title>A Systematic Review of Reproducibility Research in Natural Language Processing</title>
-      <author><first>Anya</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anya</first><last>Belz</last></author>
       <author><first>Shubham</first><last>Agarwal</last></author>
       <author><first>Anastasia</first><last>Shimorina</last></author>
       <author><first>Ehud</first><last>Reiter</last></author>
@@ -367,10 +367,10 @@
       <title>Bootstrapping Multilingual <fixed-case>AMR</fixed-case> with Contextual Word Alignments</title>
       <author><first>Janaki</first><last>Sheth</last></author>
       <author><first>Young-Suk</first><last>Lee</last></author>
-      <author><first>Ramón</first><last>Fernandez Astudillo</last></author>
+      <author id="ramon-fernandez-astudillo"><first>Ramón</first><last>Fernandez Astudillo</last></author>
       <author><first>Tahira</first><last>Naseem</last></author>
-      <author><first>Radu</first><last>Florian</last></author>
-      <author><first>Salim</first><last>Roukos</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
       <author><first>Todd</first><last>Ward</last></author>
       <pages>394–404</pages>
       <abstract>We develop high performance multilingual Abstract Meaning Representation (AMR) systems by projecting English AMR annotations to other languages with weak supervision. We achieve this goal by bootstrapping transformer-based multilingual word embeddings, in particular those from cross-lingual RoBERTa (XLM-R large). We develop a novel technique for foreign-text-to-English AMR alignment, using the contextual word alignment between English and foreign language tokens. This word alignment is weakly supervised and relies on the contextualized XLM-R word embeddings. We achieve a highly competitive performance that surpasses the best published results for German, Italian, Spanish and Chinese.</abstract>
@@ -445,7 +445,7 @@
       <title><fixed-case>FEWS</fixed-case>: Large-Scale, Low-Shot Word Sense Disambiguation with the Dictionary</title>
       <author><first>Terra</first><last>Blevins</last></author>
       <author><first>Mandar</first><last>Joshi</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>455–465</pages>
       <abstract>Current models for Word Sense Disambiguation (WSD) struggle to disambiguate rare senses, despite reaching human performance on global WSD metrics. This stems from a lack of data for both modeling and evaluating rare senses in existing WSD datasets. In this paper, we introduce FEWS (Few-shot Examples of Word Senses), a new low-shot WSD dataset automatically extracted from example sentences in Wiktionary. FEWS has high sense coverage across different natural language domains and provides: (1) a large training set that covers many more senses than previous datasets and (2) a comprehensive evaluation set containing few- and zero-shot examples of a wide variety of senses. We establish baselines on FEWS with knowledge-based and neural WSD approaches and present transfer learning experiments demonstrating that models additionally trained with FEWS better capture rare senses in existing WSD datasets. Finally, we find humans outperform the best baseline models on FEWS, indicating that FEWS will support significant future work on low-shot WSD.</abstract>
       <url hash="678232f4">2021.eacl-main.36</url>
@@ -516,7 +516,7 @@
     <paper id="42">
       <title>Does She Wink or Does She Nod? A Challenging Benchmark for Evaluating Word Understanding of Language Models</title>
       <author><first>Lutfi Kerem</first><last>Senel</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>532–538</pages>
       <abstract>Recent progress in pretraining language models on large corpora has resulted in significant performance gains on many NLP tasks. These large models acquire linguistic knowledge during pretraining, which helps to improve performance on downstream tasks via fine-tuning. To assess what kind of knowledge is acquired, language models are commonly probed by querying them with ‘fill in the blank’ style cloze questions. Existing probing datasets mainly focus on knowledge about relations between words and entities. We introduce WDLMPro (Word Definitions Language Model Probing) to evaluate word understanding directly using dictionary definitions of words. In our experiments, three popular pretrained language models struggle to match words and their definitions. This indicates that they understand many words poorly and that our new probing task is a difficult challenge that could help guide research on LMs in the future.</abstract>
       <url hash="52d785ec">2021.eacl-main.42</url>
@@ -611,7 +611,7 @@
       <author><first>Ahmed</first><last>El-Kishky</last></author>
       <author><first>Adithya</first><last>Renduchintala</last></author>
       <author><first>Vishrav</first><last>Chaudhary</last></author>
-      <author><first>Francisco</first><last>Guzmán</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzmán</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>619–625</pages>
       <abstract>Quality estimation aims to measure the quality of translated content without access to a reference translation. This is crucial for machine translation systems in real-world scenarios where high-quality translation is needed. While many approaches exist for quality estimation, they are based on supervised machine learning requiring costly human labelled data. As an alternative, we propose a technique that does not rely on examples from human-annotators and instead uses synthetic training data. We train off-the-shelf architectures for supervised quality estimation on our synthetic data and show that the resulting models achieve comparable performance to models trained on human-annotated data, both for sentence and word-level prediction.</abstract>
@@ -633,8 +633,8 @@
     <paper id="52">
       <title><fixed-case>GRIT</fixed-case>: Generative Role-filler Transformers for Document-level Event Entity Extraction</title>
       <author><first>Xinya</first><last>Du</last></author>
-      <author><first>Alexander</first><last>Rush</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>634–644</pages>
       <abstract>We revisit the classic problem of document-level role-filler entity extraction (REE) for template filling. We argue that sentence-level approaches are ill-suited to the task and introduce a generative transformer-based encoder-decoder framework (GRIT) that is designed to model context at the document level: it can make extraction decisions across sentence boundaries; is implicitly aware of noun phrase coreference structure, and has the capacity to respect cross-role dependencies in the template structure. We evaluate our approach on the MUC-4 dataset, and show that our model performs substantially better than prior work. We also show that our modeling choices contribute to model performance, e.g., by implicitly capturing linguistic knowledge such as recognizing coreferent entity mentions.</abstract>
       <url hash="9baa7941">2021.eacl-main.52</url>
@@ -679,7 +679,7 @@
     <paper id="56">
       <title><fixed-case>F</fixed-case>ake<fixed-case>F</fixed-case>low: Fake News Detection by Modeling the Flow of Affective Information</title>
       <author><first>Bilal</first><last>Ghanem</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <author><first>Paolo</first><last>Rosso</last></author>
       <author><first>Francisco</first><last>Rangel</last></author>
       <pages>679–689</pages>
@@ -692,7 +692,7 @@
       <title><fixed-case>CTC</fixed-case>-based Compression for Direct Speech Translation</title>
       <author><first>Marco</first><last>Gaido</last></author>
       <author><first>Mauro</first><last>Cettolo</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <pages>690–696</pages>
       <abstract>Previous studies demonstrated that a dynamic phone-informed compression of the input audio is beneficial for speech translation (ST). However, they required a dedicated model for phone recognition and did not test this solution for direct ST, in which a single model translates the input audio into the target language without intermediate representations. In this work, we propose the first method able to perform a dynamic compression of the input in direct ST models. In particular, we exploit the Connectionist Temporal Classification (CTC) to compress the input sequence according to its phonetic characteristics. Our experiments demonstrate that our solution brings a 1.3-1.5 BLEU improvement over a strong baseline on two language pairs (English-Italian and English-German), contextually reducing the memory footprint by more than 10%.</abstract>
@@ -731,7 +731,7 @@
       <title>Top-down Discourse Parsing via Sequence Labelling</title>
       <author><first>Fajri</first><last>Koto</last></author>
       <author><first>Jey Han</first><last>Lau</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>715–726</pages>
       <abstract>We introduce a top-down approach to discourse parsing that is conceptually simpler than its predecessors (Kobayashi et al., 2020; Zhang et al., 2020). By framing the task as a sequence labelling problem where the goal is to iteratively segment a document into individual discourse units, we are able to eliminate the decoder and reduce the search space for splitting points. We explore both traditional recurrent models and modern pre-trained transformer models for the task, and additionally introduce a novel dynamic oracle for top-down parsing. Based on the Full metric, our proposed LSTM model sets a new state-of-the-art for RST parsing.</abstract>
       <url hash="a719a474">2021.eacl-main.60</url>
@@ -820,7 +820,7 @@
       <author><first>Rahul</first><last>Aralikatte</last></author>
       <author><first>Matthew</first><last>Lamm</last></author>
       <author><first>Daniel</first><last>Hardt</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>810–817</pages>
       <abstract>Most, if not all forms of ellipsis (e.g., so does Mary) are similar to reading comprehension questions (what does Mary do), in that in order to resolve them, we need to identify an appropriate text span in the preceding discourse. Following this observation, we present an alternative approach for English ellipsis resolution relying on architectures developed for question answering (QA). We present both single-task models, and joint models trained on auxiliary QA and coreference resolution datasets, clearly outperforming the current state of the art for Sluice Ellipsis (from 70.00 to 86.01 F1) and Verb Phrase Ellipsis (from 72.89 to 78.66 F1).</abstract>
       <url hash="0b8e852a">2021.eacl-main.68</url>
@@ -887,7 +887,7 @@
     <paper id="74">
       <title>Leveraging Passage Retrieval with Generative Models for Open Domain Question Answering</title>
       <author><first>Gautier</first><last>Izacard</last></author>
-      <author><first>Edouard</first><last>Grave</last></author>
+      <author id="edouard-grave"><first>Edouard</first><last>Grave</last></author>
       <pages>874–880</pages>
       <abstract>Generative models for open domain question answering have proven to be competitive, without resorting to external knowledge. While promising, this approach requires to use models with billions of parameters, which are expensive to train and query. In this paper, we investigate how much these models can benefit from retrieving text passages, potentially containing evidence. We obtain state-of-the-art results on the Natural Questions and TriviaQA open benchmarks. Interestingly, we observe that the performance of this method significantly improves when increasing the number of retrieved passages. This is evidence that sequence-to-sequence models offers a flexible framework to efficiently aggregate and combine evidence from multiple passages.</abstract>
       <url hash="e46810b8">2021.eacl-main.74</url>
@@ -957,8 +957,8 @@
     <paper id="80">
       <title>Multilingual Machine Translation: Closing the Gap between Shared and Language-specific Encoder-Decoders</title>
       <author><first>Carlos</first><last>Escolano</last></author>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
-      <author><first>José A. R.</first><last>Fonollosa</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="jose-a-r-fonollosa"><first>José A. R.</first><last>Fonollosa</last></author>
       <author><first>Mikel</first><last>Artetxe</last></author>
       <pages>944–948</pages>
       <abstract>State-of-the-art multilingual machine translation relies on a universal encoder-decoder, which requires retraining the entire system to add new languages. In this paper, we propose an alternative approach that is based on language-specific encoder-decoders, and can thus be more easily extended to new languages by learning their corresponding modules. So as to encourage a common interlingua representation, we simultaneously train the N initial languages. Our experiments show that the proposed approach outperforms the universal encoder-decoder by 3.28 BLEU points on average, while allowing to add new languages without the need to retrain the rest of the modules. All in all, our work closes the gap between shared and language-specific encoderdecoders, advancing toward modular multilingual machine translation systems that can be flexibly extended in lifelong learning settings.</abstract>
@@ -989,7 +989,7 @@
     </paper>
     <paper id="83">
       <title>Hierarchical Multi-head Attentive Network for Evidence-aware Fake News Detection</title>
-      <author><first>Nguyen</first><last>Vo</last></author>
+      <author id="nguyen-vo"><first>Nguyen</first><last>Vo</last></author>
       <author><first>Kyumin</first><last>Lee</last></author>
       <pages>965–975</pages>
       <abstract>The widespread of fake news and misinformation in various domains ranging from politics, economics to public health has posed an urgent need to automatically fact-check information. A recent trend in fake news detection is to utilize evidence from external sources. However, existing evidence-aware fake news detection methods focused on either only word-level attention or evidence-level attention, which may result in suboptimal performance. In this paper, we propose a Hierarchical Multi-head Attentive Network to fact-check textual claims. Our model jointly combines multi-head word-level attention and multi-head document-level attention, which aid explanation in both word-level and evidence-level. Experiments on two real-word datasets show that our model outperforms seven state-of-the-art baselines. Improvements over baselines are from 6% to 18%. Our source code and datasets are released at <url>https://github.com/nguyenvo09/EACL2021</url>.</abstract>
@@ -1001,7 +1001,7 @@
       <title>Identifying Named Entities as they are Typed</title>
       <author><first>Ravneet</first><last>Arora</last></author>
       <author><first>Chen-Tse</first><last>Tsai</last></author>
-      <author><first>Daniel</first><last>Preotiuc-Pietro</last></author>
+      <author id="daniel-preotiuc-pietro"><first>Daniel</first><last>Preotiuc-Pietro</last></author>
       <pages>976–988</pages>
       <abstract>Identifying named entities in written text is an essential component of the text processing pipeline used in applications such as text editors to gain a better understanding of the semantics of the text. However, the typical experimental setup for evaluating Named Entity Recognition (NER) systems is not directly applicable to systems that process text in real time as the text is being typed. Evaluation is performed on a sentence level assuming the end-user is willing to wait until the entire sentence is typed for entities to be identified and further linked to identifiers or co-referenced. We introduce a novel experimental setup for NER systems for applications where decisions about named entity boundaries need to be performed in an online fashion. We study how state-of-the-art methods perform under this setup in multiple languages and propose adaptations to these models to suit this new experimental setup. Experimental results show that the best systems that are evaluated on each token after its typed, reach performance within 1–5 F1 points of systems that are evaluated at the end of the sentence. These show that entity recognition can be performed in this setup and open up the development of other NLP tools in a similar setup.</abstract>
       <url hash="071ddca4">2021.eacl-main.84</url>
@@ -1059,7 +1059,7 @@
       <author><first>Thomas</first><last>Kober</last></author>
       <author><first>Julie</first><last>Weeds</last></author>
       <author><first>Lorenzo</first><last>Bertolini</last></author>
-      <author><first>David</first><last>Weir</last></author>
+      <author id="david-weir"><first>David</first><last>Weir</last></author>
       <pages>1034–1048</pages>
       <abstract>The automatic detection of hypernymy relationships represents a challenging problem in NLP. The successful application of state-of-the-art supervised approaches using distributed representations has generally been impeded by the limited availability of high quality training data. We have developed two novel data augmentation techniques which generate new training examples from existing ones. First, we combine the linguistic principles of hypernym transitivity and intersective modifier-noun composition to generate additional pairs of vectors, such as “small dog - dog” or “small dog - animal”, for which a hypernymy relationship can be assumed. Second, we use generative adversarial networks (GANs) to generate pairs of vectors for which the hypernymy relation can also be assumed. We furthermore present two complementary strategies for extending an existing dataset by leveraging linguistic resources such as WordNet. Using an evaluation across 3 different datasets for hypernymy detection and 2 different vector spaces, we demonstrate that both of the proposed automatic data augmentation and dataset extension strategies substantially improve classifier performance.</abstract>
       <url hash="27663bea">2021.eacl-main.89</url>
@@ -1099,7 +1099,7 @@
       <author><first>Ji</first><last>Ma</last></author>
       <author><first>Ivan</first><last>Korotkov</last></author>
       <author><first>Yinfei</first><last>Yang</last></author>
-      <author><first>Keith</first><last>Hall</last></author>
+      <author id="keith-hall"><first>Keith</first><last>Hall</last></author>
       <author><first>Ryan</first><last>McDonald</last></author>
       <pages>1075–1088</pages>
       <abstract>A major obstacle to the wide-spread adoption of neural retrieval models is that they require large supervised training sets to surpass traditional term-based techniques, which are constructed from raw corpora. In this paper, we propose an approach to zero-shot learning for passage retrieval that uses synthetic question generation to close this gap. The question generation system is trained on general domain data, but is applied to documents in the targeted domain. This allows us to create arbitrarily large, yet noisy, question-passage relevance pairs that are domain specific. Furthermore, when this is coupled with a simple hybrid term-neural model, first-stage retrieval performance can be improved further. Empirically, we show that this is an effective strategy for building neural passage retrieval models in the absence of large training corpora. Depending on the domain, this technique can even approach the accuracy of supervised models.</abstract>
@@ -1111,7 +1111,7 @@
       <title>Discourse-Aware Unsupervised Summarization for Long Scientific Documents</title>
       <author><first>Yue</first><last>Dong</last></author>
       <author><first>Andrei</first><last>Mircea</last></author>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <pages>1089–1102</pages>
       <abstract>We propose an unsupervised graph-based ranking model for extractive summarization of long scientific documents. Our method assumes a two-level hierarchical graph representation of the source document, and exploits asymmetrical positional cues to determine sentence importance. Results on the PubMed and arXiv datasets show that our approach outperforms strong unsupervised baselines by wide margins in automatic metrics and human evaluation. In addition, it achieves performance comparable to many state-of-the-art supervised approaches which are trained on hundreds of thousands of examples. These results suggest that patterns in the discourse structure are a strong signal for determining importance in scientific articles.</abstract>
       <url hash="4a81f760">2021.eacl-main.93</url>
@@ -1135,7 +1135,7 @@
       <author><first>Kyunghyun</first><last>Cho</last></author>
       <author><first>Myle</first><last>Ott</last></author>
       <author><first>Bing</first><last>Liu</last></author>
-      <author><first>James</first><last>Glass</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
       <author><first>Fuchun</first><last>Peng</last></author>
       <pages>1121–1133</pages>
       <abstract>In this work, we study how the finetuning stage in the pretrain-finetune framework changes the behavior of a pretrained neural language generator. We focus on the transformer encoder-decoder model for the open-domain dialogue response generation task. Our major finding is that after standard finetuning, the model forgets some of the important language generation skills acquired during large-scale pretraining. We demonstrate the forgetting phenomenon through a set of detailed behavior analysis from the perspectives of knowledge transfer, context sensitivity, and function space projection. As a preliminary attempt to alleviate the forgetting problem, we propose an intuitive finetuning strategy named “mix-review”. We find that mix-review effectively regularizes the finetuning process, and the forgetting problem is alleviated to some extent. Finally, we discuss interesting behavior of the resulting dialogue model and its implications.</abstract>
@@ -1199,7 +1199,7 @@
     <paper id="100">
       <title>Randomized Deep Structured Prediction for Discourse-Level Processing</title>
       <author><first>Manuel</first><last>Widmoser</last></author>
-      <author><first>Maria Leonor</first><last>Pacheco</last></author>
+      <author id="maria-leonor-pacheco"><first>Maria Leonor</first><last>Pacheco</last></author>
       <author><first>Jean</first><last>Honorio</last></author>
       <author><first>Dan</first><last>Goldwasser</last></author>
       <pages>1174–1184</pages>
@@ -1210,7 +1210,7 @@
     </paper>
     <paper id="101">
       <title>Automatic Data Acquisition for Event Coreference Resolution</title>
-      <author><first>Prafulla Kumar</first><last>Choubey</last></author>
+      <author id="prafulla-kumar-choubey"><first>Prafulla Kumar</first><last>Choubey</last></author>
       <author><first>Ruihong</first><last>Huang</last></author>
       <pages>1185–1196</pages>
       <abstract>We propose to leverage lexical paraphrases and high precision rules informed by news discourse structure to automatically collect coreferential and non-coreferential event pairs from unlabeled English news articles. We perform both manual validation and empirical evaluation on multiple evaluation datasets with different event domains and text genres to assess the quality of our acquired event pairs. We found that a model trained on our acquired event pairs performs comparably as the supervised model when applied to new data out of the training data domains. Further, augmenting human-annotated data with the acquired event pairs provides empirical performance gains on both in-domain and out-of-domain evaluation datasets.</abstract>
@@ -1262,7 +1262,7 @@
       <author><first>Yao</first><last>Wan</last></author>
       <author><first>Jianguo</first><last>Zhang</last></author>
       <author><first>Wenting</first><last>Zhao</last></author>
-      <author><first>Philip</first><last>Yu</last></author>
+      <author id="philip-s-yu"><first>Philip</first><last>Yu</last></author>
       <pages>1235–1244</pages>
       <abstract>The non-autoregressive models have boosted the efficiency of neural machine translation through parallelized decoding at the cost of effectiveness, when comparing with the autoregressive counterparts. In this paper, we claim that the syntactic and semantic structures among natural language are critical for non-autoregressive machine translation and can further improve the performance. However, these structures are rarely considered in the existing non-autoregressive models. Inspired by this intuition, we propose to incorporate the explicit syntactic and semantic structure of languages into a non-autoregressive Transformer, for the task of neural machine translation. Moreover, we also consider the intermediate latent alignment within target sentences to better learn the long-term token dependencies. Experimental results on two real-world datasets (i.e., WMT14 En-De and WMT16 En- Ro) show that our model achieves a significantly faster speed, as well as keeps the translation quality when compared with several state-of-the-art non-autoregressive models.</abstract>
       <url hash="a49bfbf3">2021.eacl-main.105</url>
@@ -1293,7 +1293,7 @@
     <paper id="108">
       <title>Language Models for Lexical Inference in Context</title>
       <author><first>Martin</first><last>Schmitt</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>1267–1280</pages>
       <abstract>Lexical inference in context (LIiC) is the task of recognizing textual entailment between two very similar sentences, i.e., sentences that only differ in one expression. It can therefore be seen as a variant of the natural language inference task that is focused on lexical semantics. We formulate and evaluate the first approaches based on pretrained language models (LMs) for this task: (i) a few-shot NLI classifier, (ii) a relation induction approach based on handcrafted patterns expressing the semantics of lexical inference, and (iii) a variant of (ii) with patterns that were automatically extracted from a corpus. All our approaches outperform the previous state of the art, showing the potential of pretrained LMs for LIiC. In an extensive analysis, we investigate factors of success and failure of our three approaches.</abstract>
       <url hash="21fe37f1">2021.eacl-main.108</url>
@@ -1305,7 +1305,7 @@
       <author><first>Zhuang</first><last>Li</last></author>
       <author><first>Lizhen</first><last>Qu</last></author>
       <author><first>Shuo</first><last>Huang</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>1281–1291</pages>
       <abstract>In this work, we investigate the problems of semantic parsing in a few-shot learning setting. In this setting, we are provided with k utterance-logical form pairs per new predicate. The state-of-the-art neural semantic parsers achieve less than 25% accuracy on benchmark datasets when k = 1. To tackle this problem, we proposed to i) apply a designated meta-learning method to train the model; ii) regularize attention scores with alignment statistics; iii) apply a smoothing technique in pretraining. As a result, our method consistently outperforms all the baselines in both one and two-shot settings.</abstract>
       <url hash="1b4c23c5">2021.eacl-main.109</url>
@@ -1344,7 +1344,7 @@
       <author><first>Ozan</first><last>Caglayan</last></author>
       <author><first>Menekse</first><last>Kuyu</last></author>
       <author><first>Mustafa Sercan</first><last>Amac</last></author>
-      <author><first>Pranava</first><last>Madhyastha</last></author>
+      <author id="pranava-swaroop-madhyastha"><first>Pranava</first><last>Madhyastha</last></author>
       <author><first>Erkut</first><last>Erdem</last></author>
       <author><first>Aykut</first><last>Erdem</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
@@ -1360,7 +1360,7 @@
       <title>Memorization vs. Generalization : Quantifying Data Leakage in <fixed-case>NLP</fixed-case> Performance Evaluation</title>
       <author><first>Aparna</first><last>Elangovan</last></author>
       <author><first>Jiayuan</first><last>He</last></author>
-      <author><first>Karin</first><last>Verspoor</last></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last></author>
       <pages>1325–1335</pages>
       <abstract>Public datasets are often used to evaluate the efficacy and generalizability of state-of-the-art methods for many tasks in natural language processing (NLP). However, the presence of overlap between the train and test datasets can lead to inflated results, inadvertently evaluating the model’s ability to memorize and interpreting it as the ability to generalize. In addition, such data sets may not provide an effective indicator of the performance of these methods in real world scenarios. We identify leakage of training data into test data on several publicly available datasets used to evaluate NLP tasks, including named entity recognition and relation extraction, and study them to assess the impact of that leakage on the model’s ability to memorize versus generalize.</abstract>
       <url hash="e6adbe10">2021.eacl-main.113</url>
@@ -1388,7 +1388,7 @@
       <author><first>Vishrav</first><last>Chaudhary</last></author>
       <author><first>Shuo</first><last>Sun</last></author>
       <author><first>Hongyu</first><last>Gong</last></author>
-      <author><first>Francisco</first><last>Guzmán</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzmán</last></author>
       <pages>1351–1361</pages>
       <abstract>We present an approach based on multilingual sentence embeddings to automatically extract parallel sentences from the content of Wikipedia articles in 96 languages, including several dialects or low-resource languages. We do not limit the extraction process to alignments with English, but we systematically consider all possible language pairs. In total, we are able to extract 135M parallel sentences for 16720 different language pairs, out of which only 34M are aligned with English. This corpus is freely available. To get an indication on the quality of the extracted bitexts, we train neural MT baseline systems on the mined data only for 1886 languages pairs, and evaluate them on the TED corpus, achieving strong BLEU scores for many language pairs. The WikiMatrix bitexts seem to be particularly interesting to train MT systems between distant languages without the need to pivot through English.</abstract>
       <url hash="476948aa">2021.eacl-main.115</url>
@@ -1401,8 +1401,8 @@
       <author><first>Christian</first><last>Druckenbrodt</last></author>
       <author><first>Saber A</first><last>Akhondi</last></author>
       <author><first>Jiayuan</first><last>He</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
-      <author><first>Karin</first><last>Verspoor</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last></author>
       <pages>1362–1375</pages>
       <abstract>Chemical patents contain rich coreference and bridging links, which are the target of this research. Specially, we introduce a novel annotation scheme, based on which we create the ChEMU-Ref dataset from reaction description snippets in English-language chemical patents. We propose a neural approach to anaphora resolution, which we show to achieve strong results, especially when jointly trained over coreference and bridging links.</abstract>
       <url hash="26926da4">2021.eacl-main.116</url>
@@ -1449,8 +1449,8 @@
     </paper>
     <paper id="120">
       <title>Evaluating language models for the retrieval and categorization of lexical collocations</title>
-      <author><first>Luis</first><last>Espinosa Anke</last></author>
-      <author><first>Joan</first><last>Codina-Filba</last></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa Anke</last></author>
+      <author id="joan-codina-filba"><first>Joan</first><last>Codina-Filba</last></author>
       <author><first>Leo</first><last>Wanner</last></author>
       <pages>1406–1417</pages>
       <abstract>Lexical collocations are idiosyncratic combinations of two syntactically bound lexical items (e.g., “heavy rain” or “take a step”). Understanding their degree of compositionality and idiosyncrasy, as well their underlying semantics, is crucial for language learners, lexicographers and downstream NLP applications. In this paper, we perform an exhaustive analysis of current language models for collocation understanding. We first construct a dataset of apparitions of lexical collocations in context, categorized into 17 representative semantic categories. Then, we perform two experiments: (1) unsupervised collocate retrieval using BERT, and (2) supervised collocation classification in context. We find that most models perform well in distinguishing light verb constructions, especially if the collocation’s first argument acts as subject, but often fail to distinguish, first, different syntactic structures within the same semantic category, and second, fine-grained semantic categories which restrict the use of small sets of valid collocates for a given base.</abstract>
@@ -1495,7 +1495,7 @@
       <title>Civil Rephrases Of Toxic Texts With Self-Supervised Transformers</title>
       <author><first>Léo</first><last>Laugier</last></author>
       <author><first>John</first><last>Pavlopoulos</last></author>
-      <author><first>Jeffrey</first><last>Sorensen</last></author>
+      <author id="jeffrey-sorensen"><first>Jeffrey</first><last>Sorensen</last></author>
       <author><first>Lucas</first><last>Dixon</last></author>
       <pages>1442–1461</pages>
       <abstract>Platforms that support online commentary, from social networks to news sites, are increasingly leveraging machine learning to assist their moderation efforts. But this process does not typically provide feedback to the author that would help them contribute according to the community guidelines. This is prohibitively time-consuming for human moderators to do, and computational approaches are still nascent. This work focuses on models that can help suggest rephrasings of toxic comments in a more civil manner. Inspired by recent progress in unpaired sequence-to-sequence tasks, a self-supervised learning model is introduced, called CAE-T5. CAE-T5 employs a pre-trained text-to-text transformer, which is fine tuned with a denoising and cyclic auto-encoder loss. Experimenting with the largest toxicity detection dataset to date (Civil Comments) our model generates sentences that are more fluent and better at preserving the initial content compared to earlier text style transfer systems which we compare with using several scoring systems and human evaluation.</abstract>
@@ -1512,7 +1512,7 @@
       <author><first>Hidetaka</first><last>Kamigaito</last></author>
       <author><first>Kotaro</first><last>Funakoshi</last></author>
       <author><first>Hiroya</first><last>Takamura</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>1462–1473</pages>
       <abstract>The task of generating weather-forecast comments from meteorological simulations has the following requirements: (i) the changes in numerical values for various physical quantities need to be considered, (ii) the weather comments should be dependent on delivery time and area information, and (iii) the comments should provide useful information for users. To meet these requirements, we propose a data-to-text model that incorporates three types of encoders for numerical forecast maps, observation data, and meta-data. We also introduce weather labels representing weather information, such as sunny and rain, for our model to explicitly describe useful information. We conducted automatic and human evaluations. The results indicate that our model performed best against baselines in terms of informativeness. We make our code and data publicly available.</abstract>
       <url hash="f69d32eb">2021.eacl-main.125</url>
@@ -1522,7 +1522,7 @@
     <paper id="126">
       <title><fixed-case>SICK</fixed-case>-<fixed-case>NL</fixed-case>: A Dataset for <fixed-case>D</fixed-case>utch Natural Language Inference</title>
       <author><first>Gijs</first><last>Wijnholds</last></author>
-      <author><first>Michael</first><last>Moortgat</last></author>
+      <author id="michael-moortgat"><first>Michael</first><last>Moortgat</last></author>
       <pages>1474–1479</pages>
       <abstract>We present SICK-NL (read: signal), a dataset targeting Natural Language Inference in Dutch. SICK-NL is obtained by translating the SICK dataset of (Marelli et al., 2014) from English into Dutch. Having a parallel inference dataset allows us to compare both monolingual and multilingual NLP models for English and Dutch on the two tasks. In the paper, we motivate and detail the translation process, perform a baseline evaluation on both the original SICK dataset and its Dutch incarnation SICK-NL, taking inspiration from Dutch skipgram embeddings and contextualised embedding models. In addition, we encapsulate two phenomena encountered in the translation to formulate stress tests and verify how well the Dutch models capture syntactic restructurings that do not affect semantics. Our main finding is all models perform worse on SICK-NL than on SICK, indicating that the Dutch dataset is more challenging than the English original. Results on the stress tests show that models don’t fully capture word order freedom in Dutch, warranting future systematic studies.</abstract>
       <url hash="dce3e558">2021.eacl-main.126</url>
@@ -1536,8 +1536,8 @@
       <author><first>Yevgen</first><last>Matusevych</last></author>
       <author><first>Herman</first><last>Kamper</last></author>
       <author><first>Thomas</first><last>Schatz</last></author>
-      <author><first>Naomi</first><last>Feldman</last></author>
-      <author><first>Sharon</first><last>Goldwater</last></author>
+      <author id="naomi-feldman"><first>Naomi</first><last>Feldman</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
       <pages>1480–1490</pages>
       <abstract>Non-native speakers show difficulties with spoken word processing. Many studies attribute these difficulties to imprecise phonological encoding of words in the lexical memory. We test an alternative hypothesis: that some of these difficulties can arise from the non-native speakers’ phonetic perception. We train a computational model of phonetic learning, which has no access to phonology, on either one or two languages. We first show that the model exhibits predictable behaviors on phone-level and word-level discrimination tasks. We then test the model on a spoken word processing task, showing that phonology may not be necessary to explain some of the word processing effects observed in non-native speakers. We run an additional analysis of the model’s lexical representation space, showing that the two training languages are not fully separated in that space, similarly to the languages of a bilingual human speaker.</abstract>
       <url hash="5414afe7">2021.eacl-main.127</url>
@@ -1653,7 +1653,7 @@
       <title>Benchmarking Machine Reading Comprehension: A Psychological Perspective</title>
       <author><first>Saku</first><last>Sugawara</last></author>
       <author><first>Pontus</first><last>Stenetorp</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>1592–1612</pages>
       <abstract>Machine reading comprehension (MRC) has received considerable attention as a benchmark for natural language understanding. However, the conventional task design of MRC lacks explainability beyond the model interpretation, i.e., reading comprehension by a model cannot be explained in human terms. To this end, this position paper provides a theoretical basis for the design of MRC datasets based on psychology as well as psychometrics, and summarizes it in terms of the prerequisites for benchmarking MRC. We conclude that future datasets should (i) evaluate the capability of the model for constructing a coherent and grounded representation to understand context-dependent situations and (ii) ensure substantive validity by shortcut-proof questions and explanation as a part of the task design.</abstract>
       <url hash="ecb0de3b">2021.eacl-main.137</url>
@@ -1691,7 +1691,7 @@
       <author><first>Artem</first><last>Revenko</last></author>
       <author><first>Kiamehr</first><last>Rezaee</last></author>
       <author><first>Mohammad Taher</first><last>Pilehvar</last></author>
-      <author><first>Jose</first><last>Camacho-Collados</last></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></author>
       <pages>1635–1645</pages>
       <abstract>We present WiC-TSV, a new multi-domain evaluation benchmark for Word Sense Disambiguation. More specifically, we introduce a framework for Target Sense Verification of Words in Context which grounds its uniqueness in the formulation as binary classification task thus being independent of external sense inventories, and the coverage of various domains. This makes the dataset highly flexible for the evaluation of a diverse set of models and systems in and across domains. WiC-TSV provides three different evaluation settings, depending on the input signals provided to the model. We set baseline performance on the dataset using state-of-the-art language models. Experimental results show that even though these models can perform decently on the task, there remains a gap between machine and human performance, especially in out-of-domain settings. WiC-TSV data is available at <url>https://competitions.codalab.org/competitions/23683</url>.</abstract>
       <url hash="f4b56680">2021.eacl-main.140</url>
@@ -1796,7 +1796,7 @@
       <author><first>Sanchit</first><last>Agarwal</last></author>
       <author><first>Chien-Wei</first><last>Lin</last></author>
       <author><first>Tagyoung</first><last>Chung</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></author>
       <pages>1730–1739</pages>
       <abstract>Dialogue State Tracking (DST) forms a core component of automated chatbot based systems designed for specific goals like hotel, taxi reservation, tourist information etc. With the increasing need to deploy such systems in new domains, solving the problem of zero/few-shot DST has become necessary. There has been a rising trend for learning to transfer knowledge from resource-rich domains to unknown domains with minimal need for additional data. In this work, we explore the merits of meta-learning algorithms for this transfer and hence, propose a meta-learner D-REPTILE specific to the DST problem. With extensive experimentation, we provide clear evidence of benefits over conventional approaches across different domains, methods, base models and datasets with significant (5-25%) improvement over the baseline in a low-data setting. Our proposed meta-learner is agnostic of the underlying model and hence any existing state-of-the-art DST system can improve its performance on unknown domains using our training strategy.</abstract>
       <url hash="b7bcaf1d">2021.eacl-main.148</url>
@@ -1842,7 +1842,7 @@
     <paper id="152">
       <title>What Sounds “Right” to Me? Experiential Factors in the Perception of Political Ideology</title>
       <author><first>Qinlan</first><last>Shen</last></author>
-      <author><first>Carolyn</first><last>Rose</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rose</last></author>
       <pages>1762–1771</pages>
       <abstract>In this paper, we challenge the assumption that political ideology is inherently built into text by presenting an investigation into the impact of experiential factors on annotator perceptions of political ideology. We construct an annotated corpus of U.S. political discussion, where in addition to ideology labels for texts, annotators provide information about their political affiliation, exposure to political news, and familiarity with the source domain of discussion, Reddit. We investigate the variability in ideology judgments across annotators, finding evidence that these experiential factors may influence the consistency of how political ideologies are perceived. Finally, we present evidence that understanding how humans perceive and interpret ideology from texts remains a challenging task for state-of-the-art language models, pointing towards potential issues when modeling user experiences that may require more contextual knowledge.</abstract>
       <url hash="437ed8a3">2021.eacl-main.152</url>
@@ -1863,7 +1863,7 @@
       <title>Globalizing <fixed-case>BERT</fixed-case>-based Transformer Architectures for Long Document Summarization</title>
       <author><first>Quentin</first><last>Grail</last></author>
       <author><first>Julien</first><last>Perez</last></author>
-      <author><first>Eric</first><last>Gaussier</last></author>
+      <author id="eric-gaussier"><first>Eric</first><last>Gaussier</last></author>
       <pages>1792–1810</pages>
       <abstract>Fine-tuning a large language model on downstream tasks has become a commonly adopted process in the Natural Language Processing (NLP) (CITATION). However, such a process, when associated with the current transformer-based (CITATION) architectures, shows several limitations when the target task requires to reason with long documents. In this work, we introduce a novel hierarchical propagation layer that spreads information between multiple transformer windows. We adopt a hierarchical approach where the input is divided in multiple blocks independently processed by the scaled dot-attentions and combined between the successive layers. We validate the effectiveness of our approach on three extractive summarization corpora of long scientific papers and news articles. We compare our approach to standard and pre-trained language-model-based summarizers and report state-of-the-art results for long document summarization and comparable results for smaller document summarization.</abstract>
       <url hash="855babc7">2021.eacl-main.154</url>
@@ -1885,7 +1885,7 @@
     </paper>
     <paper id="156">
       <title>We Need To Talk About Random Splits</title>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <author><first>Sebastian</first><last>Ebert</last></author>
       <author><first>Jasmijn</first><last>Bastings</last></author>
       <author><first>Katja</first><last>Filippova</last></author>
@@ -1956,7 +1956,7 @@
     <paper id="162">
       <title>Error Analysis and the Role of Morphology</title>
       <author><first>Marcel</first><last>Bollmann</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>1887–1900</pages>
       <abstract>We evaluate two common conjectures in error analysis of NLP models: (i) Morphology is predictive of errors; and (ii) the importance of morphology increases with the morphological complexity of a language. We show across four different tasks and up to 57 languages that of these conjectures, somewhat surprisingly, only (i) is true. Using morphological features does improve error prediction across tasks; however, this effect is less pronounced with morphologically complex languages. We speculate this is because morphology is more discriminative in morphologically simple languages. Across all four tasks, case and gender are the morphological features most predictive of error.</abstract>
       <url hash="059162c0">2021.eacl-main.162</url>
@@ -2054,7 +2054,7 @@
       <title>Attention-based Relational Graph Convolutional Network for Target-Oriented Opinion Words Extraction</title>
       <author><first>Junfeng</first><last>Jiang</last></author>
       <author><first>An</first><last>Wang</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>1986–1997</pages>
       <abstract>Target-oriented opinion words extraction (TOWE) is a subtask of aspect-based sentiment analysis (ABSA). It aims to extract the corresponding opinion words for a given opinion target in a review sentence. Intuitively, the relation between an opinion target and an opinion word mostly relies on syntactics. In this study, we design a directed syntactic dependency graph based on a dependency tree to establish a path from the target to candidate opinions. Subsequently, we propose a novel attention-based relational graph convolutional neural network (ARGCN) to exploit syntactic information over dependency graphs. Moreover, to explicitly extract the corresponding opinion words toward the given opinion target, we effectively encode target information in our model with the target-aware representation. Empirical results demonstrate that our model significantly outperforms all of the existing models on four benchmark datasets. Extensive analysis also demonstrates the effectiveness of each component of our models. Our code is available at <url>https://github.com/wcwowwwww/towe-eacl</url>.</abstract>
       <url hash="e4515972">2021.eacl-main.170</url>
@@ -2075,7 +2075,7 @@
     <paper id="172">
       <title>Learning Relatedness between Types with Prototypes for Relation Extraction</title>
       <author><first>Lisheng</first><last>Fu</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <pages>2011–2016</pages>
       <abstract>Relation schemas are often pre-defined for each relation dataset. Relation types can be related from different datasets and have overlapping semantics. We hypothesize we can combine these datasets according to the semantic relatedness between the relation types to overcome the problem of lack of training data. It is often easy to discover the connection between relation types based on relation names or annotation guides, but hard to measure the exact similarity and take advantage of the connection between the relation types from different datasets. We propose to use prototypical examples to represent each relation type and use these examples to augment related types from a different dataset. We obtain further improvement (ACE05) with this type augmentation over a strong baseline which uses multi-task learning between datasets to obtain better feature representation for relations. We make our implementation publicly available: <url>https://github.com/fufrank5/relatedness</url></abstract>
       <url hash="966f011d">2021.eacl-main.172</url>
@@ -2145,7 +2145,7 @@
     <paper id="178">
       <title>The Interplay of Task Success and Dialogue Quality: An in-depth Evaluation in Task-Oriented Visual Dialogues</title>
       <author><first>Alberto</first><last>Testoni</last></author>
-      <author><first>Raffaella</first><last>Bernardi</last></author>
+      <author id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last></author>
       <pages>2071–2082</pages>
       <abstract>When training a model on referential dialogue guessing games, the best model is usually chosen based on its task success. We show that in the popular end-to-end approach, this choice prevents the model from learning to generate linguistically richer dialogues, since the acquisition of language proficiency takes longer than learning the guessing task. By comparing models playing different games (GuessWhat, GuessWhich, and Mutual Friends), we show that this discrepancy is model- and task-agnostic. We investigate whether and when better language quality could lead to higher task success. We show that in GuessWhat, models could increase their accuracy if they learn to ground, encode, and decode also words that do not occur frequently in the training set.</abstract>
       <url hash="c70a6106">2021.eacl-main.178</url>
@@ -2214,7 +2214,7 @@
     <paper id="184">
       <title>A Unified Feature Representation for Lexical Connotations</title>
       <author><first>Emily</first><last>Allaway</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>2145–2163</pages>
       <abstract>Ideological attitudes and stance are often expressed through subtle meanings of words and phrases. Understanding these connotations is critical to recognizing the cultural and emotional perspectives of the speaker. In this paper, we use distant labeling to create a new lexical resource representing connotation aspects for nouns and adjectives. Our analysis shows that it aligns well with human judgments. Additionally, we present a method for creating lexical representations that capture connotations within the embedding space and show that using the embeddings provides a statistically significant improvement on the task of stance detection when data is limited.</abstract>
       <url hash="b6069cb4">2021.eacl-main.184</url>
@@ -2226,7 +2226,7 @@
       <author><first>Ramit</first><last>Sawhney</last></author>
       <author><first>Arnav</first><last>Wadhwa</last></author>
       <author><first>Shivam</first><last>Agarwal</last></author>
-      <author><first>Rajiv Ratn</first><last>Shah</last></author>
+      <author id="rajiv-shah"><first>Rajiv Ratn</first><last>Shah</last></author>
       <pages>2164–2175</pages>
       <abstract>Designing profitable trading strategies is complex as stock movements are highly stochastic; the market is influenced by large volumes of noisy data across diverse information sources like news and social media. Prior work mostly treats stock movement prediction as a regression or classification task and is not directly optimized towards profit-making. Further, they do not model the fine-grain temporal irregularities in the release of vast volumes of text that the market responds to quickly. Building on these limitations, we propose a novel hierarchical, learning to rank approach that uses textual data to make time-aware predictions for ranking stocks based on expected profit. Our approach outperforms state-of-the-art methods by over 8% in terms of cumulative profit and risk-adjusted returns in trading simulations on two benchmarks: English tweets and Chinese financial news spanning two major stock indexes and four global markets. Through ablative and qualitative analyses, we build the case for our method as a tool for daily stock trading.</abstract>
       <url hash="ca9a10da">2021.eacl-main.185</url>
@@ -2272,8 +2272,8 @@
       <title>First Align, then Predict: Understanding the Cross-Lingual Ability of Multilingual <fixed-case>BERT</fixed-case></title>
       <author><first>Benjamin</first><last>Muller</last></author>
       <author><first>Yanai</first><last>Elazar</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
-      <author><first>Djamé</first><last>Seddah</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
       <pages>2214–2231</pages>
       <abstract>Multilingual pretrained language models have demonstrated remarkable zero-shot cross-lingual transfer capabilities. Such transfer emerges by fine-tuning on a task of interest in one language and evaluating on a distinct language, not seen during the fine-tuning. Despite promising results, we still lack a proper understanding of the source of this transfer. Using a novel layer ablation technique and analyses of the model’s internal representations, we show that multilingual BERT, a popular multilingual language model, can be viewed as the stacking of two sub-networks: a multilingual encoder followed by a task-specific language-agnostic predictor. While the encoder is crucial for cross-lingual transfer and remains mostly unchanged during fine-tuning, the task predictor has little importance on the transfer and can be reinitialized during fine-tuning. We present extensive experiments with three distinct tasks, seventeen typologically diverse languages and multiple domains to support our hypothesis.</abstract>
       <url hash="4738855e">2021.eacl-main.189</url>
@@ -2338,7 +2338,7 @@
     <paper id="195">
       <title>Content-based Models of Quotation</title>
       <author><first>Ansel</first><last>MacLaughlin</last></author>
-      <author><first>David</first><last>Smith</last></author>
+      <author id="david-a-smith"><first>David</first><last>Smith</last></author>
       <pages>2296–2314</pages>
       <abstract>We explore the task of quotability identification, in which, given a document, we aim to identify which of its passages are the most quotable, i.e. the most likely to be directly quoted by later derived documents. We approach quotability identification as a passage ranking problem and evaluate how well both feature-based and BERT-based (Devlin et al., 2019) models rank the passages in a given document by their predicted quotability. We explore this problem through evaluations on five datasets that span multiple languages (English, Latin) and genres of literature (e.g. poetry, plays, novels) and whose corresponding derived documents are of multiple types (news, journal articles). Our experiments confirm the relatively strong performance of BERT-based models on this task, with the best model, a RoBERTA sequential sentence tagger, achieving an average rho of 0.35 and NDCG@1, 5, 50 of 0.26, 0.31 and 0.40, respectively, across all five datasets.</abstract>
       <url hash="38289e3b">2021.eacl-main.195</url>
@@ -2374,8 +2374,8 @@
       <title>Event-Driven News Stream Clustering using Entity-Aware Contextual Embeddings</title>
       <author><first>Kailash Karthik</first><last>Saravanakumar</last></author>
       <author><first>Miguel</first><last>Ballesteros</last></author>
-      <author><first>Muthu Kumar</first><last>Chandrasekaran</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="muthu-kumar-chandrasekaran"><first>Muthu Kumar</first><last>Chandrasekaran</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>2330–2340</pages>
       <abstract>We propose a method for online news stream clustering that is a variant of the non-parametric streaming K-means algorithm. Our model uses a combination of sparse and dense document representations, aggregates document-cluster similarity along these multiple representations and makes the clustering decision using a neural classifier. The weighted document-cluster similarity model is learned using a novel adaptation of the triplet loss into a linear classification objective. We show that the use of a suitable fine-tuning objective and external knowledge in pre-trained transformer models yields significant improvements in the effectiveness of contextual embeddings for clustering. Our model achieves a new state-of-the-art on a standard stream clustering dataset of English documents.</abstract>
       <url hash="9c510570">2021.eacl-main.198</url>
@@ -2397,7 +2397,7 @@
     <paper id="200">
       <title>Lexical Normalization for Code-switched Data and its Effect on <fixed-case>POS</fixed-case> Tagging</title>
       <author><first>Rob</first><last>van der Goot</last></author>
-      <author><first>Özlem</first><last>Çetinoğlu</last></author>
+      <author id="ozlem-cetinoglu"><first>Özlem</first><last>Çetinoğlu</last></author>
       <pages>2352–2365</pages>
       <abstract>Lexical normalization, the translation of non-canonical data to standard language, has shown to improve the performance of many natural language processing tasks on social media. Yet, using multiple languages in one utterance, also called code-switching (CS), is frequently overlooked by these normalization systems, despite its common use in social media. In this paper, we propose three normalization models specifically designed to handle code-switched data which we evaluate for two language pairs: Indonesian-English and Turkish-German. For the latter, we introduce novel normalization layers and their corresponding language ID and POS tags for the dataset, and evaluate the downstream effect of normalization on POS tagging. Results show that our CS-tailored normalization models significantly outperform monolingual ones, and lead to 5.4% relative performance increase for POS tagging as compared to unnormalized input.</abstract>
       <url hash="a098cb32">2021.eacl-main.200</url>
@@ -2407,7 +2407,7 @@
     <paper id="201">
       <title>Structural Encoding and Pre-training Matter: Adapting <fixed-case>BERT</fixed-case> for Table-Based Fact Verification</title>
       <author><first>Rui</first><last>Dong</last></author>
-      <author><first>David</first><last>Smith</last></author>
+      <author id="david-a-smith"><first>David</first><last>Smith</last></author>
       <pages>2366–2375</pages>
       <abstract>Growing concern with online misinformation has encouraged NLP research on fact verification. Since writers often base their assertions on structured data, we focus here on verifying textual statements given evidence in tables. Starting from the Table Parsing (TAPAS) model developed for question answering (Herzig et al., 2020), we find that modeling table structure improves a language model pre-trained on unstructured text. Pre-training language models on English Wikipedia table data further improves performance. Pre-training on a question answering task with column-level cell rank information achieves the best performance. With improved pre-training and cell embeddings, this approach outperforms the state-of-the-art Numerically-aware Graph Neural Network table fact verification model (GNN-TabFact), increasing statement classification accuracy from 72.2% to 73.9% even without modeling numerical information. Incorporating numerical information with cell rankings and pre-training on a question-answering task increases accuracy to 76%. We further analyze accuracy on statements implicating single rows or multiple rows and columns of tables, on different numerical reasoning subtasks, and on generalizing to detecting errors in statements derived from the ToTTo table-to-text generation dataset.</abstract>
       <url hash="33b84061">2021.eacl-main.201</url>
@@ -2416,9 +2416,9 @@
     </paper>
     <paper id="202">
       <title>A Study of Automatic Metrics for the Evaluation of Natural Language Explanations</title>
-      <author><first>Miruna-Adriana</first><last>Clinciu</last></author>
+      <author id="miruna-clinciu"><first>Miruna-Adriana</first><last>Clinciu</last></author>
       <author><first>Arash</first><last>Eshghi</last></author>
-      <author><first>Helen</first><last>Hastie</last></author>
+      <author id="helen-hastie"><first>Helen</first><last>Hastie</last></author>
       <pages>2376–2387</pages>
       <abstract>As transparency becomes key for robotics and AI, it will be necessary to evaluate the methods through which transparency is provided, including automatically generated natural language (NL) explanations. Here, we explore parallels between the generation of such explanations and the much-studied field of evaluation of Natural Language Generation (NLG). Specifically, we investigate which of the NLG evaluation measures map well to explanations. We present the ExBAN corpus: a crowd-sourced corpus of NL explanations for Bayesian Networks. We run correlations comparing human subjective ratings with NLG automatic measures. We find that embedding-based automatic NLG evaluation methods, such as BERTScore and BLEURT, have a higher correlation with human ratings, compared to word-overlap metrics, such as BLEU and ROUGE. This work has implications for Explainable AI and transparent robotic and autonomous systems.</abstract>
       <url hash="cab408f2">2021.eacl-main.202</url>
@@ -2429,7 +2429,7 @@
       <title>Adversarial Stylometry in the Wild: <fixed-case>T</fixed-case>ransferable Lexical Substitution Attacks on Author Profiling</title>
       <author><first>Chris</first><last>Emmery</last></author>
       <author><first>Ákos</first><last>Kádár</last></author>
-      <author><first>Grzegorz</first><last>Chrupała</last></author>
+      <author id="grzegorz-chrupala"><first>Grzegorz</first><last>Chrupała</last></author>
       <pages>2388–2402</pages>
       <abstract>Written language contains stylistic cues that can be exploited to automatically infer a variety of potentially sensitive author information. Adversarial stylometry intends to attack such models by rewriting an author’s text. Our research proposes several components to facilitate deployment of these adversarial attacks in the wild, where neither data nor target models are accessible. We introduce a transformer-based extension of a lexical replacement attack, and show it achieves high transferability when trained on a weakly labeled corpus—decreasing target model performance below chance. While not completely inconspicuous, our more successful attacks also prove notably less detectable by humans. Our framework therefore provides a promising direction for future privacy-preserving adversarial attacks.</abstract>
       <url hash="df0a3f12">2021.eacl-main.203</url>
@@ -2442,7 +2442,7 @@
       <author><first>Hwijeen</first><last>Ahn</last></author>
       <author><first>Chan Young</first><last>Park</last></author>
       <author><first>Yulia</first><last>Tsvetkov</last></author>
-      <author><first>David R.</first><last>Mortensen</last></author>
+      <author id="david-r-mortensen"><first>David R.</first><last>Mortensen</last></author>
       <pages>2403–2414</pages>
       <abstract>Much work in cross-lingual transfer learning explored how to select better transfer languages for multilingual tasks, primarily focusing on typological and genealogical similarities between languages. We hypothesize that these measures of linguistic proximity are not enough when working with pragmatically-motivated tasks, such as sentiment analysis. As an alternative, we introduce three linguistic features that capture cross-cultural similarities that manifest in linguistic patterns and quantify distinct aspects of language pragmatics: language context-level, figurative language, and the lexification of emotion concepts. Our analyses show that the proposed pragmatic features do capture cross-cultural similarities and align well with existing work in sociolinguistics and linguistic anthropology. We further corroborate the effectiveness of pragmatically-driven transfer in the downstream task of choosing transfer languages for cross-lingual sentiment analysis.</abstract>
       <url hash="307caf69">2021.eacl-main.204</url>
@@ -2454,7 +2454,7 @@
       <author><first>Ramit</first><last>Sawhney</last></author>
       <author><first>Harshit</first><last>Joshi</last></author>
       <author><first>Lucie</first><last>Flek</last></author>
-      <author><first>Rajiv Ratn</first><last>Shah</last></author>
+      <author id="rajiv-shah"><first>Rajiv Ratn</first><last>Shah</last></author>
       <pages>2415–2428</pages>
       <abstract>Recent psychological studies indicate that individuals exhibiting suicidal ideation increasingly turn to social media rather than mental health practitioners. Contextualizing the build-up of such ideation is critical for the identification of users at risk. In this work, we focus on identifying suicidal intent in tweets by augmenting linguistic models with emotional phases modeled from users’ historical context. We propose PHASE, a time-and phase-aware framework that adaptively learns features from a user’s historical emotional spectrum on Twitter for preliminary screening of suicidal risk. Building on clinical studies, PHASE learns phase-like progressions in users’ historical Plutchik-wheel-based emotions to contextualize suicidal intent. While outperforming state-of-the-art methods, we show the utility of temporal and phase-based emotional contextual cues for suicide ideation detection. We further discuss practical and ethical considerations.</abstract>
       <url hash="24bdfee3">2021.eacl-main.205</url>
@@ -2464,7 +2464,7 @@
     <paper id="206">
       <title>Exploiting Definitions for Frame Identification</title>
       <author><first>Tianyu</first><last>Jiang</last></author>
-      <author><first>Ellen</first><last>Riloff</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
       <pages>2429–2434</pages>
       <abstract>Frame identification is one of the key challenges for frame-semantic parsing. The goal of this task is to determine which frame best captures the meaning of a target word or phrase in a sentence. We present a new model for frame identification that uses a pre-trained transformer model to generate representations for frames and lexical units (senses) using their formal definitions in FrameNet. Our frame identification model assesses the suitability of a frame for a target word in a sentence based on the semantic coherence of their meanings. We evaluate our model on three data sets and show that it consistently achieves better performance than previous systems.</abstract>
       <url hash="ce326859">2021.eacl-main.206</url>
@@ -2485,8 +2485,8 @@
     <paper id="208">
       <title>Conceptual Grounding Constraints for Truly Robust Biomedical Name Representations</title>
       <author><first>Pieter</first><last>Fivez</last></author>
-      <author><first>Simon</first><last>Suster</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="simon-suster"><first>Simon</first><last>Suster</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>2440–2450</pages>
       <abstract>Effective representation of biomedical names for downstream NLP tasks requires the encoding of both lexical as well as domain-specific semantic information. Ideally, the synonymy and semantic relatedness of names should be consistently reflected by their closeness in an embedding space. To achieve such robustness, prior research has considered multi-task objectives when training neural encoders. In this paper, we take a next step towards truly robust representations, which capture more domain-specific semantics while remaining universally applicable across different biomedical corpora and domains. To this end, we use conceptual grounding constraints which more effectively align encoded names to pretrained embeddings of their concept identifiers. These constraints are effective even when using a Deep Averaging Network, a simple feedforward encoding architecture that allows for scaling to large corpora while remaining sufficiently expressive. We empirically validate our approach using multiple tasks and benchmarks, which assess both literal synonymy as well as more general semantic relatedness.</abstract>
       <url hash="b9423fbf">2021.eacl-main.208</url>
@@ -2631,7 +2631,7 @@
       <author><first>Artidoro</first><last>Pagnoni</last></author>
       <author><first>Jay Yoon</first><last>Lee</last></author>
       <author><first>Dheeraj</first><last>Rajagopal</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
       <author><first>Yulia</first><last>Tsvetkov</last></author>
       <pages>2575–2585</pages>
       <abstract>Abstractive text summarization aims at compressing the information of a long source document into a rephrased, condensed summary. Despite advances in modeling techniques, abstractive summarization models still suffer from several key challenges: (i) layout bias: they overfit to the style of training corpora; (ii) limited abstractiveness: they are optimized to copying n-grams from the source rather than generating novel abstractive summaries; (iii) lack of transparency: they are not interpretable. In this work, we propose a framework based on document-level structure induction for summarization to address these challenges. To this end, we propose incorporating latent and explicit dependencies across sentences in the source document into end-to-end single-document summarization models. Our framework complements standard encoder-decoder summarization models by augmenting them with rich structure-aware document representations based on implicitly learned (latent) structures and externally-derived linguistic (explicit) structures. We show that our summarization framework, trained on the CNN/DM dataset, improves the coverage of content in the source documents, generates more abstractive summaries by generating more novel n-grams, and incorporates interpretable sentence-level structures, while performing on par with standard baselines.</abstract>
@@ -2704,7 +2704,7 @@
     <paper id="226">
       <title>On the Computational Modelling of <fixed-case>M</fixed-case>ichif Verbal Morphology</title>
       <author><first>Fineen</first><last>Davis</last></author>
-      <author><first>Eddie A.</first><last>Santos</last></author>
+      <author id="eddie-antonio-santos"><first>Eddie A.</first><last>Santos</last></author>
       <author><first>Heather</first><last>Souter</last></author>
       <pages>2631–2636</pages>
       <abstract>This paper presents a finite-state computational model of the verbal morphology of Michif. Michif, the official language of the Métis peoples, is a uniquely mixed language with Algonquian and French origins. It is spoken across the Métis homelands in what is now called Canada and the United States, but it is highly endangered with less than 100 speakers. The verbal morphology is remarkably complex, as the already polysynthetic Algonquian patterns are combined with French elements and unique morpho-phonological interactions. The model presented in this paper, LI VERB KAA-OOSHITAHK DI MICHIF handles this complexity by using a series of composed finite-state transducers to model the concatenative morphology and phonological rule alternations that are unique to Michif. Such a rule-based approach is necessary as there is insufficient language data for an approach that uses machine learning. A language model such as LI VERB KAA-OOSHITAHK DI MICHIF furthers the goals of Indigenous computational linguistics in Canada while also supporting the creation of tools for documentation, education, and revitalization that are desired by the Métis community.</abstract>
@@ -2738,7 +2738,7 @@
     </paper>
     <paper id="229">
       <title>Informative and Controllable Opinion Summarization</title>
-      <author><first>Reinald Kim</first><last>Amplayo</last></author>
+      <author id="reinald-kim-amplayo"><first>Reinald Kim</first><last>Amplayo</last></author>
       <author><first>Mirella</first><last>Lapata</last></author>
       <pages>2662–2672</pages>
       <abstract>Opinion summarization is the task of automatically generating summaries for a set of reviews about a specific target (e.g., a movie or a product). Since the number of reviews for each target can be prohibitively large, neural network-based methods follow a two-stage approach where an extractive step first pre-selects a subset of salient opinions and an abstractive step creates the summary while conditioning on the extracted subset. However, the extractive model leads to loss of information which may be useful depending on user needs. In this paper we propose a summarization framework that eliminates the need to rely only on pre-selected content and waste possibly useful information, especially when customizing summaries. The framework enables the use of all input reviews by first condensing them into multiple dense vectors which serve as input to an abstractive model. We showcase an effective instantiation of our framework which produces more informative summaries and also allows to take user preferences into account using our zero-shot customization technique. Experimental results demonstrate that our model improves the state of the art on the Rotten Tomatoes dataset and generates customized summaries effectively.</abstract>
@@ -2748,7 +2748,7 @@
     </paper>
     <paper id="230">
       <title>Coloring the Black Box: What Synesthesia Tells Us about Character Embeddings</title>
-      <author><first>Katharina</first><last>Kann</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
       <author><first>Mauro M.</first><last>Monsalve-Mercado</last></author>
       <pages>2673–2685</pages>
       <abstract>In contrast to their word- or sentence-level counterparts, character embeddings are still poorly understood. We aim at closing this gap with an in-depth study of English character embeddings. For this, we use resources from research on grapheme–color synesthesia – a neuropsychological phenomenon where letters are associated with colors –, which give us insight into which characters are similar for synesthetes and how characters are organized in color space. Comparing 10 different character embeddings, we ask: How similar are character embeddings to a synesthete’s perception of characters? And how similar are character embeddings extracted from different models? We find that LSTMs agree with humans more than transformers. Comparing across tasks, grapheme-to-phoneme conversion results in the most human-like character embeddings. Finally, ELMo embeddings differ from both humans and other models.</abstract>
@@ -2770,7 +2770,7 @@
       <title><fixed-case>BERT</fixed-case>ective: Language Models and Contextual Information for Deception Detection</title>
       <author><first>Tommaso</first><last>Fornaciari</last></author>
       <author><first>Federico</first><last>Bianchi</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <author><first>Dirk</first><last>Hovy</last></author>
       <pages>2699–2708</pages>
       <abstract>Spotting a lie is challenging but has an enormous potential impact on security as well as private and public safety. Several NLP methods have been proposed to classify texts as truthful or deceptive. In most cases, however, the target texts’ preceding context is not considered. This is a severe limitation, as any communication takes place in context, not in a vacuum, and context can help to detect deception. We study a corpus of Italian dialogues containing deceptive statements and implement deep neural models that incorporate various linguistic contexts. We establish a new state-of-the-art identifying deception and find that not all context is equally useful to the task. Only the texts closest to the target, if from the same speaker (rather than questions by an interlocutor), boost performance. We also find that the semantic information in language models such as BERT contributes to the performance. However, BERT alone does not capture the implicit knowledge of deception cues: its contribution is conditional on the concurrent use of attention to learn cues from BERT’s representations.</abstract>
@@ -2783,8 +2783,8 @@
     <paper id="233">
       <title>Learning Coupled Policies for Simultaneous Machine Translation using Imitation Learning</title>
       <author><first>Philip</first><last>Arthur</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>2709–2719</pages>
       <abstract>We present a novel approach to efficiently learn a simultaneous translation model with coupled programmer-interpreter policies. First, we present an algorithmic oracle to produce oracle READ/WRITE actions for training bilingual sentence-pairs using the notion of word alignments. This oracle actions are designed to capture enough information from the partial input before writing the output. Next, we perform a coupled scheduled sampling to effectively mitigate the exposure bias when learning both policies jointly with imitation learning. Experiments on six language-pairs show our method outperforms strong baselines in terms of translation quality quality while keeping the delay low.</abstract>
       <url hash="4bfe7cb7">2021.eacl-main.233</url>
@@ -2810,10 +2810,10 @@
       <author><first>Feng</first><last>Nan</last></author>
       <author><first>Ramesh</first><last>Nallapati</last></author>
       <author><first>Zhiguo</first><last>Wang</last></author>
-      <author><first>Cicero</first><last>Nogueira dos Santos</last></author>
+      <author id="cicero-dos-santos"><first>Cicero</first><last>Nogueira dos Santos</last></author>
       <author><first>Henghui</first><last>Zhu</last></author>
       <author><first>Dejiao</first><last>Zhang</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <author><first>Bing</first><last>Xiang</last></author>
       <pages>2727–2733</pages>
       <abstract>A key challenge for abstractive summarization is ensuring factual consistency of the generated summary with respect to the original document. For example, state-of-the-art models trained on existing datasets exhibit entity hallucination, generating names of entities that are not present in the source document. We propose a set of new metrics to quantify the entity-level factual consistency of generated summaries and we show that the entity hallucination problem can be alleviated by simply filtering the training data. In addition, we propose a summary-worthy entity classification task to the training process as well as a joint entity and summary generation approach, which yield further improvements in entity level metrics.</abstract>
@@ -2856,8 +2856,8 @@
     <paper id="239">
       <title>Diverse Adversaries for Mitigating Bias in Training</title>
       <author><first>Xudong</first><last>Han</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>2760–2765</pages>
       <abstract>Adversarial learning can learn fairer and less biased models of language processing than standard training. However, current adversarial techniques only partially mitigate the problem of model bias, added to which their training procedures are often unstable. In this paper, we propose a novel approach to adversarial learning based on the use of multiple diverse discriminators, whereby discriminators are encouraged to learn orthogonal hidden representations from one another. Experimental results show that our method substantially improves over standard adversarial removal methods, in terms of reducing bias and stability of training.</abstract>
       <url hash="7651bd2a">2021.eacl-main.239</url>
@@ -2881,7 +2881,7 @@
     </paper>
     <paper id="241">
       <title>Better Neural Machine Translation by Extracting Linguistic Information from <fixed-case>BERT</fixed-case></title>
-      <author><first>Hassan S.</first><last>Shavarani</last></author>
+      <author id="hassan-s-shavarani"><first>Hassan S.</first><last>Shavarani</last></author>
       <author><first>Anoop</first><last>Sarkar</last></author>
       <pages>2772–2783</pages>
       <abstract>Adding linguistic information (syntax or semantics) to neural machine translation (NMT) have mostly focused on using point estimates from pre-trained models. Directly using the capacity of massive pre-trained contextual word embedding models such as BERT(Devlin et al., 2019) has been marginally useful in NMT because effective fine-tuning is difficult to obtain for NMT without making training brittle and unreliable. We augment NMT by extracting dense fine-tuned vector-based linguistic information from BERT instead of using point estimates. Experimental results show that our method of incorporating linguistic information helps NMT to generalize better in a variety of training contexts and is no more difficult to train than conventional Transformer-based NMT.</abstract>
@@ -2895,7 +2895,7 @@
       <author><first>Changbing</first><last>Yang</last></author>
       <author><first>Yu</first><last>Li</last></author>
       <author><first>Alex</first><last>Warstadt</last></author>
-      <author><first>Katharina</first><last>Kann</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
       <pages>2784–2790</pages>
       <abstract>Linguistically informed analyses of language models (LMs) contribute to the understanding and improvement of such models. Here, we introduce the corpus of Chinese linguistic minimal pairs (CLiMP) to investigate what knowledge Chinese LMs acquire. CLiMP consists of sets of 1000 minimal pairs (MPs) for 16 syntactic contrasts in Chinese, covering 9 major Chinese linguistic phenomena. The MPs are semi-automatically generated, and human agreement with the labels in CLiMP is 95.8%. We evaluate 11 different LMs on CLiMP, covering n-grams, LSTMs, and Chinese BERT. We find that classifier–noun agreement and verb complement selection are the phenomena that models generally perform best at. However, models struggle the most with the ba construction, binding, and filler-gap dependencies. Overall, Chinese BERT achieves an 81.8% average accuracy, while the performances of LSTMs and 5-grams are only moderately above chance level.</abstract>
       <url hash="885d7190">2021.eacl-main.242</url>
@@ -2969,9 +2969,9 @@
       <author><first>Elsbeth</first><last>Turcan</last></author>
       <author><first>Petra</first><last>Galuscakova</last></author>
       <author><first>Elena</first><last>Zotkina</last></author>
-      <author><first>Zhengping</first><last>Jiang</last></author>
+      <author id="zheng-ping-jiang"><first>Zhengping</first><last>Jiang</last></author>
       <author><first>Peter</first><last>Bell</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>2842–2854</pages>
       <abstract>Typical ASR systems segment the input audio into utterances using purely acoustic information, which may not resemble the sentence-like units that are expected by conventional machine translation (MT) systems for Spoken Language Translation. In this work, we propose a model for correcting the acoustic segmentation of ASR models for low-resource languages to improve performance on downstream tasks. We propose the use of subtitles as a proxy dataset for correcting ASR acoustic segmentation, creating synthetic acoustic utterances by modeling common error modes. We train a neural tagging model for correcting ASR acoustic segmentation and show that it improves downstream performance on MT and audio-document cross-language information retrieval (CLIR).</abstract>
       <url hash="bdd297bc">2021.eacl-main.248</url>
@@ -3032,7 +3032,7 @@
       <title>Representations for Question Answering from Documents with Tables and Text</title>
       <author><first>Vicky</first><last>Zayats</last></author>
       <author><first>Kristina</first><last>Toutanova</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
       <pages>2895–2906</pages>
       <abstract>Tables in web documents are pervasive and can be directly used to answer many of the queries searched on the web, motivating their integration in question answering. Very often information presented in tables is succinct and hard to interpret with standard language representations. On the other hand, tables often appear within textual context, such as an article describing the table. Using the information from an article as additional context can potentially enrich table representations. In this work we aim to improve question answering from tables by refining table representations based on information from surrounding text. We also present an effective method to combine text and table-based predictions for question answering from full documents, obtaining significant improvements on the Natural Questions dataset (Kwiatkowski et al., 2019).</abstract>
       <url hash="0b607ae3">2021.eacl-main.253</url>
@@ -3044,7 +3044,7 @@
       <author><first>Kemal</first><last>Kurniawan</last></author>
       <author><first>Lea</first><last>Frermann</last></author>
       <author><first>Philip</first><last>Schulz</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>2907–2918</pages>
       <abstract>Cross-lingual transfer is a leading technique for parsing low-resource languages in the absence of explicit supervision. Simple ‘direct transfer’ of a learned model based on a multilingual input encoding has provided a strong benchmark. This paper presents a method for unsupervised cross-lingual transfer that improves over direct transfer systems by using their output as implicit supervision as part of self-training on unlabelled text in the target language. The method assumes minimal resources and provides maximal flexibility by (a) accepting any pre-trained arc-factored dependency parser; (b) assuming no access to source language data; (c) supporting both projective and non-projective parsing; and (d) supporting multi-source transfer. With English as the source language, we show significant improvements over state-of-the-art transfer models on both distant and nearby languages, despite our conceptually simpler approach. We provide analyses of the choice of source languages for multi-source transfer, and the advantage of non-projective parsing. Our code is available online.</abstract>
       <url hash="71e63e3e">2021.eacl-main.254</url>
@@ -3055,7 +3055,7 @@
       <title>Modelling Context Emotions using Multi-task Learning for Emotion Controlled Dialog Generation</title>
       <author><first>Deeksha</first><last>Varshney</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>2919–2931</pages>
       <abstract>A recent topic of research in natural language generation has been the development of automatic response generation modules that can automatically respond to a user’s utterance in an empathetic manner. Previous research has tackled this task using neural generative methods by augmenting emotion classes with the input sequences. However, the outputs by these models may be inconsistent. We employ multi-task learning to predict the emotion label and to generate a viable response for a given utterance using a common encoder with multiple decoders. Our proposed encoder-decoder model consists of a self-attention based encoder and a decoder with dot product attention mechanism to generate response with a specified emotion. We use the focal loss to handle imbalanced data distribution, and utilize the consistency loss to allow coherent decoding by the decoders. Human evaluation reveals that our model produces more emotionally pertinent responses. In addition, our model outperforms multiple strong baselines on automatic evaluation measures such as F1 and BLEU scores, thus resulting in more fluent and adequate responses.</abstract>
       <url hash="e5edd288">2021.eacl-main.255</url>
@@ -3090,8 +3090,8 @@
     <paper id="258">
       <title>Adapting Event Extractors to Medical Data: Bridging the Covariate Shift</title>
       <author><first>Aakanksha</first><last>Naik</last></author>
-      <author><first>Jill Fain</first><last>Lehman</last></author>
-      <author><first>Carolyn</first><last>Rose</last></author>
+      <author id="jill-fain-lehman"><first>Jill Fain</first><last>Lehman</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rose</last></author>
       <pages>2963–2975</pages>
       <abstract>We tackle the task of adapting event extractors to new domains without labeled data, by aligning the marginal distributions of source and target domains. As a testbed, we create two new event extraction datasets using English texts from two medical domains: (i) clinical notes, and (ii) doctor-patient conversations. We test the efficacy of three marginal alignment techniques: (i) adversarial domain adaptation (ADA), (ii) domain adaptive fine-tuning (DAFT), and (iii) a new instance weighting technique based on language model likelihood scores (LIW). LIW and DAFT improve over a no-transfer BERT baseline on both domains, but ADA only improves on notes. Deeper analysis of performance under different types of shifts (e.g., lexical shift, semantic shift) explains some of the variations among models. Our best-performing models reach F1 scores of 70.0 and 72.9 on notes and conversations respectively, using no labeled target data.</abstract>
       <url hash="11f71d18">2021.eacl-main.258</url>
@@ -3104,8 +3104,8 @@
       <author><first>Siddharth</first><last>Dalmia</last></author>
       <author><first>Maria</first><last>Ryskina</last></author>
       <author><first>Florian</first><last>Metze</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <pages>2976–2992</pages>
       <abstract>When Question-Answering (QA) systems are deployed in the real world, users query them through a variety of interfaces, such as speaking to voice assistants, typing questions into a search engine, or even translating questions to languages supported by the QA system. While there has been significant community attention devoted to identifying correct answers in passages assuming a perfectly formed question, we show that components in the pipeline that precede an answering engine can introduce varied and considerable sources of error, and performance can degrade substantially based on these upstream noise sources even for powerful pre-trained QA models. We conclude that there is substantial room for progress before QA systems can be effectively deployed, highlight the need for QA evaluation to expand to consider real-world use, and hope that our findings will spur greater community interest in the issues that arise when our systems actually need to be of utility to humans.</abstract>
       <url hash="e199151f">2021.eacl-main.259</url>
@@ -3166,7 +3166,7 @@
       <author><first>Vinit</first><last>Ravishankar</last></author>
       <author><first>Artur</first><last>Kulmizev</last></author>
       <author><first>Mostafa</first><last>Abdou</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <author><first>Joakim</first><last>Nivre</last></author>
       <pages>3031–3045</pages>
       <abstract>Since the popularization of the Transformer as a general-purpose feature encoder for NLP, many studies have attempted to decode linguistic structure from its novel multi-head attention mechanism. However, much of such work focused almost exclusively on English — a language with rigid word order and a lack of inflectional morphology. In this study, we present decoding experiments for multilingual BERT across 18 languages in order to test the generalizability of the claim that dependency syntax is reflected in attention patterns. We show that full trees can be decoded above baseline accuracy from single attention heads, and that individual relations are often tracked by the same heads across languages. Furthermore, in an attempt to address recent debates about the status of attention as an explanatory mechanism, we experiment with fine-tuning mBERT on a supervised parsing objective while freezing different series of parameters. Interestingly, in steering the objective to learn explicit linguistic structure, we find much of the same structure represented in the resulting attention patterns, with interesting differences with respect to which parameters are frozen.</abstract>
@@ -3186,7 +3186,7 @@
     </paper>
     <paper id="266">
       <title><fixed-case>CDA</fixed-case>: a Cost Efficient Content-based Multilingual Web Document Aligner</title>
-      <author><first>Thuy</first><last>Vu</last></author>
+      <author id="thuy-vu"><first>Thuy</first><last>Vu</last></author>
       <author><first>Alessandro</first><last>Moschitti</last></author>
       <pages>3053–3061</pages>
       <abstract>We introduce a Content-based Document Alignment approach (CDA), an efficient method to align multilingual web documents based on content in creating parallel training data for machine translation (MT) systems operating at the industrial level. CDA works in two steps: (i) projecting documents of a web domain to a shared multilingual space; then (ii) aligning them based on the similarity of their representations in such space. We leverage lexical translation models to build vector representations using TF×IDF. CDA achieves performance comparable with state-of-the-art systems in the WMT-16 Bilingual Document Alignment Shared Task benchmark while operating in multilingual space. Besides, we created two web-scale datasets to examine the robustness of CDA in an industrial setting involving up to 28 languages and millions of documents. The experiments show that CDA is robust, cost-effective, and is significantly superior in (i) processing large and noisy web data and (ii) scaling to new and low-resourced languages.</abstract>
@@ -3198,7 +3198,7 @@
       <title>Metric-Type Identification for Multi-Level Header Numerical Tables in Scientific Papers</title>
       <author><first>Lya Hulliyyatus</first><last>Suadaa</last></author>
       <author><first>Hidetaka</first><last>Kamigaito</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <author><first>Hiroya</first><last>Takamura</last></author>
       <pages>3062–3071</pages>
       <abstract>Numerical tables are widely used to present experimental results in scientific papers. For table understanding, a metric-type is essential to discriminate numbers in the tables. We introduce a new information extraction task, metric-type identification from multi-level header numerical tables, and provide a dataset extracted from scientific papers consisting of header tables, captions, and metric-types. We then propose two joint-learning neural classification and generation schemes featuring pointer-generator-based and BERT-based models. Our results show that the joint models can handle both in-header and out-of-header metric-type identification problems.</abstract>
@@ -3244,7 +3244,7 @@
     <paper id="271">
       <title>Facilitating Terminology Translation with Target Lemma Annotations</title>
       <author><first>Toms</first><last>Bergmanis</last></author>
-      <author><first>Mārcis</first><last>Pinnis</last></author>
+      <author id="marcis-pinnis"><first>Mārcis</first><last>Pinnis</last></author>
       <pages>3105–3111</pages>
       <abstract>Most of the recent work on terminology integration in machine translation has assumed that terminology translations are given already inflected in forms that are suitable for the target language sentence. In day-to-day work of professional translators, however, it is seldom the case as translators work with bilingual glossaries where terms are given in their dictionary forms; finding the right target language form is part of the translation process. We argue that the requirement for apriori specified target language forms is unrealistic and impedes the practical applicability of previous work. In this work, we propose to train machine translation systems using a source-side data augmentation method that annotates randomly selected source language words with their target language lemmas. We show that systems trained on such augmented data are readily usable for terminology integration in real-life translation scenarios. Our experiments on terminology translation into the morphologically complex Baltic and Uralic languages show an improvement of up to 7 BLEU points over baseline systems with no means for terminology integration and an average improvement of 4 BLEU points over the previous work. Results of the human evaluation indicate a 47.7% absolute improvement over the previous work in term translation accuracy when translating into Latvian.</abstract>
       <url hash="b9ed255d">2021.eacl-main.271</url>
@@ -3279,7 +3279,7 @@
       <author><first>Maarten</first><last>Sap</last></author>
       <author><first>Swabha</first><last>Swayamdipta</last></author>
       <author><first>Yejin</first><last>Choi</last></author>
-      <author><first>Noah</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah</first><last>Smith</last></author>
       <pages>3143–3155</pages>
       <abstract>Biased associations have been a challenge in the development of classifiers for detecting toxic language, hindering both fairness and accuracy. As potential solutions, we investigate recently introduced debiasing methods for text classification datasets and models, as applied to toxic language detection. Our focus is on lexical (e.g., swear words, slurs, identity mentions) and dialectal markers (specifically African American English). Our comprehensive experiments establish that existing methods are limited in their ability to prevent biased behavior in current toxicity detectors. We then propose an automatic, dialect-aware data correction method, as a proof-of-concept. Despite the use of synthetic labels, this method reduces dialectal associations with toxicity. Overall, our findings show that debiasing a model trained on biased toxic language data is not as effective as simply relabeling the data to remove existing biases.</abstract>
       <url hash="be2add1f">2021.eacl-main.274</url>
@@ -3331,10 +3331,10 @@
     </paper>
     <paper id="278">
       <title>Interpretability for Morphological Inflection: from Character-level Predictions to Subword-level Rules</title>
-      <author><first>Tatyana</first><last>Ruzsics</last></author>
+      <author id="tatyana-ruzsics"><first>Tatyana</first><last>Ruzsics</last></author>
       <author><first>Olga</first><last>Sozinova</last></author>
       <author><first>Ximena</first><last>Gutierrez-Vasques</last></author>
-      <author><first>Tanja</first><last>Samardzic</last></author>
+      <author id="tanja-samardzic"><first>Tanja</first><last>Samardzic</last></author>
       <pages>3189–3201</pages>
       <abstract>Neural models for morphological inflection have recently attained very high results. However, their interpretation remains challenging. Towards this goal, we propose a simple linguistically-motivated variant to the encoder-decoder model with attention. In our model, character-level cross-attention mechanism is complemented with a self-attention module over substrings of the input. We design a novel approach for pattern extraction from attention weights to interpret what the model learn. We apply our methodology to analyze the model’s decisions on three typologically-different languages and find that a) our pattern extraction method applied to cross-attention weights uncovers variation in form of inflection morphemes, b) pattern extraction from self-attention shows triggers for such variation, c) both types of patterns are closely aligned with grammar inflection classes and class assignment criteria, for all three languages. Additionally, we find that the proposed encoder attention component leads to consistent performance improvements over a strong baseline.</abstract>
       <url hash="31fefc2e">2021.eacl-main.278</url>
@@ -3370,7 +3370,7 @@
       <author><first>Andy Mingren</first><last>Li</last></author>
       <author><first>Yishu</first><last>Miao</last></author>
       <author><first>Ozan</first><last>Caglayan</last></author>
-      <author><first>Pranava</first><last>Madhyastha</last></author>
+      <author id="pranava-swaroop-madhyastha"><first>Pranava</first><last>Madhyastha</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>3222–3233</pages>
       <abstract>This paper addresses the problem of simultaneous machine translation (SiMT) by exploring two main concepts: (a) adaptive policies to learn a good trade-off between high translation quality and low latency; and (b) visual information to support this process by providing additional (visual) contextual information which may be available before the textual input is produced. For that, we propose a multimodal approach to simultaneous machine translation using reinforcement learning, with strategies to integrate visual and textual information in both the agent and the environment. We provide an exploration on how different types of visual information and integration strategies affect the quality and latency of simultaneous translation models, and demonstrate that visual cues lead to higher quality while keeping the latency low.</abstract>
@@ -3381,7 +3381,7 @@
     <paper id="282">
       <title><fixed-case>STAR</fixed-case>: Cross-modal [<fixed-case>STA</fixed-case>]tement [<fixed-case>R</fixed-case>]epresentation for selecting relevant mathematical premises</title>
       <author><first>Deborah</first><last>Ferreira</last></author>
-      <author><first>André</first><last>Freitas</last></author>
+      <author id="andre-freitas"><first>André</first><last>Freitas</last></author>
       <pages>3234–3243</pages>
       <abstract>Mathematical statements written in natural language are usually composed of two different modalities: mathematical elements and natural language. These two modalities have several distinct linguistic and semantic properties. State-of-the-art representation techniques have demonstrated an inability in capturing such an entangled style of discourse. In this work, we propose STAR, a model that uses cross-modal attention to learn how to represent mathematical text for the task of Natural Language Premise Selection. This task uses conjectures written in both natural and mathematical language to recommend premises that most likely will be relevant to prove a particular statement. We found that STAR not only outperforms baselines that do not distinguish between natural language and mathematical elements, but it also achieves better performance than state-of-the-art models.</abstract>
       <url hash="6a5e0a4f">2021.eacl-main.282</url>
@@ -3390,7 +3390,7 @@
     </paper>
     <paper id="283">
       <title>Do Multi-Hop Question Answering Systems Know How to Answer the Single-Hop Sub-Questions?</title>
-      <author id="yixuan-tang"><first>Yixuan</first><last>Tang</last></author>
+      <author><first>Yixuan</first><last>Tang</last></author>
       <author><first>Hwee Tou</first><last>Ng</last></author>
       <author><first>Anthony</first><last>Tung</last></author>
       <pages>3244–3249</pages>
@@ -3403,7 +3403,7 @@
       <title>Multilingual <fixed-case>LAMA</fixed-case>: Investigating Knowledge in Multilingual Pretrained Language Models</title>
       <author><first>Nora</first><last>Kassner</last></author>
       <author><first>Philipp</first><last>Dufter</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>3250–3258</pages>
       <abstract>Recently, it has been found that monolingual English language models can be used as knowledge bases. Instead of structural knowledge base queries, masked sentences such as “Paris is the capital of [MASK]” are used as probes. We translate the established benchmarks TREx and GoogleRE into 53 languages. Working with mBERT, we investigate three questions. (i) Can mBERT be used as a multilingual knowledge base? Most prior work only considers English. Extending research to multiple languages is important for diversity and accessibility. (ii) Is mBERT’s performance as knowledge base language-independent or does it vary from language to language? (iii) A multilingual model is trained on more text, e.g., mBERT is trained on 104 Wikipedias. Can mBERT leverage this for better performance? We find that using mBERT as a knowledge base yields varying performance across languages and pooling predictions across languages improves performance. Conversely, mBERT exhibits a language bias; e.g., when queried in Italian, it tends to predict Italy as the country of origin.</abstract>
       <url hash="b073e42f">2021.eacl-main.284</url>
@@ -3446,9 +3446,9 @@
       <author><first>Diptesh</first><last>Kanojia</last></author>
       <author><first>Prashant</first><last>Sharma</last></author>
       <author><first>Sayali</first><last>Ghodekar</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
-      <author><first>Malhar</first><last>Kulkarni</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="malhar-kulkarni"><first>Malhar</first><last>Kulkarni</last></author>
       <pages>3281–3292</pages>
       <abstract>Automatic detection of cognates helps downstream NLP tasks of Machine Translation, Cross-lingual Information Retrieval, Computational Phylogenetics and Cross-lingual Named Entity Recognition. Previous approaches for the task of cognate detection use orthographic, phonetic and semantic similarity based features sets. In this paper, we propose a novel method for enriching the feature sets, with cognitive features extracted from human readers’ gaze behaviour. We collect gaze behaviour data for a small sample of cognates and show that extracted cognitive features help the task of cognate detection. However, gaze data collection and annotation is a costly task. We use the collected gaze behaviour data to predict cognitive features for a larger sample and show that predicted cognitive features, also, significantly improve the task performance. We report improvements of 10% with the collected gaze features, and 12% using the predicted gaze features, over the previously proposed approaches. Furthermore, we release the collected gaze behaviour data along with our code and cross-lingual models.</abstract>
       <url hash="2225a875">2021.eacl-main.288</url>
@@ -3471,7 +3471,7 @@
     <paper id="290">
       <title>Modeling Coreference Relations in Visual Dialog</title>
       <author><first>Mingxiao</first><last>Li</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>3306–3318</pages>
       <abstract>Visual dialog is a vision-language task where an agent needs to answer a series of questions grounded in an image based on the understanding of the dialog history and the image. The occurrences of coreference relations in the dialog makes it a more challenging task than visual question-answering. Most previous works have focused on learning better multi-modal representations or on exploring different ways of fusing visual and language features, while the coreferences in the dialog are mainly ignored. In this paper, based on linguistic knowledge and discourse features of human dialog we propose two soft constraints that can improve the model’s ability of resolving coreferences in dialog in an unsupervised way. Experimental results on the VisDial v1.0 dataset shows that our model, which integrates two novel and linguistically inspired soft constraints in a deep transformer neural architecture, obtains new state-of-the-art performance in terms of recall at 1 and other evaluation metrics compared to current existing models and this without pretraining on other vision language datasets. Our qualitative results also demonstrate the effectiveness of the method that we propose.</abstract>
       <url hash="4399e4b6">2021.eacl-main.290</url>
@@ -3483,7 +3483,7 @@
       <author><first>Yadollah</first><last>Yaghoobzadeh</last></author>
       <author><first>Soroush</first><last>Mehri</last></author>
       <author><first>Remi</first><last>Tachet des Combes</last></author>
-      <author><first>T. J.</first><last>Hazen</last></author>
+      <author id="timothy-j-hazen"><first>T. J.</first><last>Hazen</last></author>
       <author><first>Alessandro</first><last>Sordoni</last></author>
       <pages>3319–3332</pages>
       <abstract>Neural NLP models tend to rely on spurious correlations between labels and input features to perform their tasks. Minority examples, i.e., examples that contradict the spurious correlations present in the majority of data points, have been shown to increase the out-of-distribution generalization of pre-trained language models. In this paper, we first propose using example forgetting to find minority examples without prior knowledge of the spurious correlations present in the dataset. Forgettable examples are instances either learned and then forgotten during training or never learned. We show empirically how these examples are related to minorities in our training sets. Then, we introduce a new approach to robustify models by fine-tuning our models twice, first on the full training data and second on the minorities only. We obtain substantial improvements in out-of-distribution generalization when applying our approach to the MNLI, QQP and FEVER datasets.</abstract>
@@ -3527,7 +3527,7 @@
       <title>Probing the Probing Paradigm: Does Probing Accuracy Entail Task Relevance?</title>
       <author><first>Abhilasha</first><last>Ravichander</last></author>
       <author><first>Yonatan</first><last>Belinkov</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>3363–3377</pages>
       <abstract>Although neural models have achieved impressive results on several NLP benchmarks, little is understood about the mechanisms they use to perform language tasks. Thus, much recent attention has been devoted to analyzing the sentence representations learned by neural encoders, through the lens of ‘probing’ tasks. However, to what extent was the information encoded in sentence representations, as discovered through a probe, actually used by the model to perform its task? In this work, we examine this probing paradigm through a case study in Natural Language Inference, showing that models can learn to encode linguistic properties even if they are not needed for the task on which the model was trained. We further identify that pretrained word embeddings play a considerable role in encoding these properties rather than the training task itself, highlighting the importance of careful controls when designing probing experiments. Finally, through a set of controlled synthetic tasks, we demonstrate models can encode these properties considerably above chance-level, even when distributed in the data as random noise, calling into question the interpretation of absolute claims on probing tasks.</abstract>
       <url hash="59cd5672">2021.eacl-main.295</url>
@@ -3540,7 +3540,7 @@
       <author><first>Yukun</first><last>Feng</last></author>
       <author><first>Hidetaka</first><last>Kamigaito</last></author>
       <author><first>Hiroya</first><last>Takamura</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>3378–3390</pages>
       <abstract>This work presents multi-modal deep SVDD (mSVDD) for one-class text classification. By extending the uni-modal SVDD to a multiple modal one, we build mSVDD with multiple hyperspheres, that enable us to build a much better description for target one-class data. Additionally, the end-to-end architecture of mSVDD can jointly handle neural feature learning and one-class text learning. We also introduce a mechanism for incorporating negative supervision in the absence of real negative data, which can be beneficial to the mSVDD model. We conduct experiments on Reuters and 20 Newsgroup datasets, and the experimental results demonstrate that mSVDD outperforms uni-modal SVDD and mSVDD can get further improvements when negative supervision is incorporated.</abstract>
       <url hash="4f8eb485">2021.eacl-main.296</url>
@@ -3580,7 +3580,7 @@
       <author><first>Tejas</first><last>Dhamecha</last></author>
       <author><first>Preethi</first><last>Jyothi</last></author>
       <author><first>Samarth</first><last>Bharadwaj</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>3421–3427</pages>
       <abstract>Spoken language is different from the written language in its style and structure. Disfluencies that appear in transcriptions from speech recognition systems generally hamper the performance of downstream NLP tasks. Thus, a disfluency correction system that converts disfluent to fluent text is of great value. This paper introduces a disfluency correction model that translates disfluent to fluent text by drawing inspiration from recent encoder-decoder unsupervised style-transfer models for text. We also show considerable benefits in performance when utilizing a small sample of 500 parallel disfluent-fluent sentences in a semi-supervised way. Our unsupervised approach achieves a BLEU score of 79.39 on the Switchboard corpus test set, with further improvement to a BLEU score of 85.28 with semi-supervision. Both are comparable to two competitive fully-supervised models.</abstract>
       <url hash="a51cbb65">2021.eacl-main.299</url>
@@ -3617,7 +3617,7 @@
       <author><first>Ximena</first><last>Gutierrez-Vasques</last></author>
       <author><first>Christian</first><last>Bentz</last></author>
       <author><first>Olga</first><last>Sozinova</last></author>
-      <author><first>Tanja</first><last>Samardzic</last></author>
+      <author id="tanja-samardzic"><first>Tanja</first><last>Samardzic</last></author>
       <pages>3454–3468</pages>
       <abstract>The distributions of orthographic word types are very different across languages due to typological characteristics, different writing traditions and potentially other factors. The wide range of cross-linguistic diversity is still a major challenge for NLP and the study of language. We use BPE and information-theoretic measures to investigate if distributions become similar under specific levels of subword tokenization. We perform a cross-linguistic comparison, following incremental merges of BPE (we go from characters to words) for 47 diverse languages. We show that text entropy values (a feature of probability distributions) tend to converge at specific subword levels: relatively few BPE merges (around 350) lead to the most similar distributions across languages. Additionally, we analyze the interaction between subword and word-level distributions and show that our findings can be interpreted in light of the ongoing discussion regarding different types of morphological complexity.</abstract>
       <url hash="51eddd41">2021.eacl-main.302</url>
@@ -3627,7 +3627,7 @@
     <paper id="303">
       <title>A Large-scale Evaluation of Neural Machine Transliteration for <fixed-case>I</fixed-case>ndic Languages</title>
       <author><first>Anoop</first><last>Kunchukuttan</last></author>
-      <author><first>Siddharth</first><last>Jain</last></author>
+      <author id="siddharth-jain"><first>Siddharth</first><last>Jain</last></author>
       <author><first>Rahul</first><last>Kejriwal</last></author>
       <pages>3469–3475</pages>
       <abstract>We take up the task of large-scale evaluation of neural machine transliteration between English and Indic languages, with a focus on multilingual transliteration to utilize orthographic similarity between Indian languages. We create a corpus of 600K word pairs mined from parallel translation corpora and monolingual corpora, which is the largest transliteration corpora for Indian languages mined from public sources. We perform a detailed analysis of multilingual transliteration and propose an improved multilingual training recipe for Indic languages. We analyze various factors affecting transliteration quality like language family, transliteration direction and word origin.</abstract>
@@ -3638,7 +3638,7 @@
     <paper id="304">
       <title>Communicative-Function-Based Sentence Classification for Construction of an Academic Formulaic Expression Database</title>
       <author><first>Kenichi</first><last>Iwatsuki</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>3476–3497</pages>
       <abstract>Formulaic expressions (FEs), such as ‘in this paper, we propose’ are frequently used in scientific papers. FEs convey a communicative function (CF), i.e. ‘showing the aim of the paper’ in the above-mentioned example. Although CF-labelled FEs are helpful in assisting academic writing, the construction of FE databases requires manual labour for assigning CF labels. In this study, we considered a fully automated construction of a CF-labelled FE database using the top–down approach, in which the CF labels are first assigned to sentences, and then the FEs are extracted. For the CF-label assignment, we created a CF-labelled sentence dataset, on which we trained a SciBERT classifier. We show that the classifier and dataset can be used to construct FE databases of disciplines that are different from the training data. The accuracy of in-disciplinary classification was more than 80%, while cross-disciplinary classification also worked well. We also propose an FE extraction method, which was applied to the CF-labelled sentences. Finally, we constructed and published a new, large CF-labelled FE database. The evaluation of the final CF-labelled FE database showed that approximately 65% of the FEs are correct and useful, which is sufficiently high considering practical use.</abstract>
       <url hash="84e448c8">2021.eacl-main.304</url>
@@ -3670,7 +3670,7 @@
     </paper>
     <paper id="307">
       <title>Don’t Change Me! User-Controllable Selective Paraphrase Generation</title>
-      <author id="mohan-zhang"><first>Mohan</first><last>Zhang</last></author>
+      <author><first>Mohan</first><last>Zhang</last></author>
       <author><first>Luchen</first><last>Tan</last></author>
       <author><first>Zihang</first><last>Fu</last></author>
       <author><first>Kun</first><last>Xiong</last></author>
@@ -3685,10 +3685,10 @@
     </paper>
     <paper id="308">
       <title>Rethinking Coherence Modeling: Synthetic vs. Downstream Tasks</title>
-      <author><first>Tasnim</first><last>Mohiuddin</last></author>
+      <author id="muhammad-tasnim-mohiuddin"><first>Tasnim</first><last>Mohiuddin</last></author>
       <author><first>Prathyusha</first><last>Jwalapuram</last></author>
       <author><first>Xiang</first><last>Lin</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <pages>3528–3539</pages>
       <abstract>Although coherence modeling has come a long way in developing novel models, their evaluation on downstream applications for which they are purportedly developed has largely been neglected. With the advancements made by neural approaches in applications such as machine translation (MT), summarization and dialog systems, the need for coherence evaluation of these tasks is now more crucial than ever. However, coherence models are typically evaluated only on synthetic tasks, which may not be representative of their performance in downstream applications. To investigate how representative the synthetic tasks are of downstream use cases, we conduct experiments on benchmarking well-known traditional and neural coherence models on synthetic sentence ordering tasks, and contrast this with their performance on three downstream applications: coherence evaluation for MT and summarization, and next utterance prediction in retrieval-based dialog. Our results demonstrate a weak correlation between the model performances in the synthetic tasks and the downstream applications, motivating alternate training and evaluation methods for coherence models.</abstract>
       <url hash="18107849">2021.eacl-main.308</url>
@@ -3709,7 +3709,7 @@
       <title>Probing for idiomaticity in vector space models</title>
       <author><first>Marcos</first><last>Garcia</last></author>
       <author><first>Tiago</first><last>Kramer Vieira</last></author>
-      <author><first>Carolina</first><last>Scarton</last></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last></author>
       <author><first>Marco</first><last>Idiart</last></author>
       <author><first>Aline</first><last>Villavicencio</last></author>
       <pages>3551–3564</pages>
@@ -3724,7 +3724,7 @@
       <title>Is the Understanding of Explicit Discourse Relations Required in Machine Reading Comprehension?</title>
       <author><first>Yulong</first><last>Wu</last></author>
       <author><first>Viktor</first><last>Schlegel</last></author>
-      <author><first>Riza</first><last>Batista-Navarro</last></author>
+      <author id="riza-theresa-batista-navarro"><first>Riza</first><last>Batista-Navarro</last></author>
       <pages>3565–3579</pages>
       <abstract>An in-depth analysis of the level of language understanding required by existing Machine Reading Comprehension (MRC) benchmarks can provide insight into the reading capabilities of machines. In this paper, we propose an ablation-based methodology to assess the extent to which MRC datasets evaluate the understanding of explicit discourse relations. We define seven MRC skills which require the understanding of different discourse relations. We then introduce ablation methods that verify whether these skills are required to succeed on a dataset. By observing the drop in performance of neural MRC models evaluated on the original and the modified dataset, we can measure to what degree the dataset requires these skills, in order to be understood correctly. Experiments on three large-scale datasets with the BERT-base and ALBERT-xxlarge model show that the relative changes for all skills are small (less than 6%). These results imply that most of the answered questions in the examined datasets do not require understanding the discourse structure of the text. To specifically probe for natural language understanding, there is a need to design more challenging benchmarks that can correctly evaluate the intended skills.</abstract>
       <url hash="e73837e6">2021.eacl-main.311</url>
@@ -3733,7 +3733,7 @@
     </paper>
     <paper id="312">
       <title>Why Is <fixed-case>MBTI</fixed-case> Personality Detection from Texts a Difficult Task?</title>
-      <author><first>Sanja</first><last>Stajner</last></author>
+      <author id="sanja-stajner"><first>Sanja</first><last>Stajner</last></author>
       <author><first>Seren</first><last>Yenikent</last></author>
       <pages>3580–3589</pages>
       <abstract>Automatic detection of the four MBTI personality dimensions from texts has recently attracted noticeable attention from the natural language processing and computational linguistic communities. Despite the large collections of Twitter data for training, the best systems rarely even outperform the majority-class baseline. In this paper, we discuss the theoretical reasons for such low results and present the insights from an annotation study that further shed the light on this issue.</abstract>
@@ -3791,7 +3791,7 @@
     <paper id="317">
       <title>Lifelong Knowledge-Enriched Social Event Representation Learning</title>
       <author><first>Prashanth</first><last>Vijayaraghavan</last></author>
-      <author><first>Deb</first><last>Roy</last></author>
+      <author id="deb-roy"><first>Deb</first><last>Roy</last></author>
       <pages>3624–3635</pages>
       <abstract>The ability of humans to symbolically represent social events and situations is crucial for various interactions in everyday life. Several studies in cognitive psychology have established the role of mental state attributions in effectively representing variable aspects of these social events. In the past, NLP research on learning event representations often focuses on construing syntactic and semantic information from language. However, they fail to consider the importance of pragmatic aspects and the need to consistently update new social situational information without forgetting the accumulated experiences. In this work, we propose a representation learning framework to directly address these shortcomings by integrating social commonsense knowledge with recent advancements in the space of lifelong language learning. First, we investigate methods to incorporate pragmatic aspects into our social event embeddings by leveraging social commonsense knowledge. Next, we introduce continual learning strategies that allow for incremental consolidation of new knowledge while retaining and promoting efficient usage of prior knowledge. Experimental results on event similarity, reasoning, and paraphrase detection tasks prove the efficacy of our social event embeddings.</abstract>
       <url hash="1146cc84">2021.eacl-main.317</url>
@@ -3862,7 +3862,7 @@
       <author><first>Yoshitaka</first><last>Ushiku</last></author>
       <author><first>Atsushi</first><last>Hashimoto</last></author>
       <author><first>Taro</first><last>Watanabe</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>3692–3702</pages>
       <abstract>Unsupervised image captioning is a challenging task that aims at generating captions without the supervision of image-sentence pairs, but only with images and sentences drawn from different sources and object labels detected from the images. In previous work, pseudo-captions, i.e., sentences that contain the detected object labels, were assigned to a given image. The focus of the previous work was on the alignment of input images and pseudo-captions at the sentence level. However, pseudo-captions contain many words that are irrelevant to a given image. In this work, we investigate the effect of removing mismatched words from image-sentence alignment to determine how they make this task difficult. We propose a simple gating mechanism that is trained to align image features with only the most reliable words in pseudo-captions: the detected object labels. The experimental results show that our proposed method outperforms the previous methods without introducing complex sentence-level learning objectives. Combined with the sentence-level alignment method of previous work, our method further improves its performance. These results confirm the importance of careful alignment in word-level details.</abstract>
       <url hash="9d370dd8">2021.eacl-main.323</url>
@@ -3906,7 +3906,7 @@
     <meta>
       <booktitle>Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: System Demonstrations</booktitle>
       <editor><first>Dimitra</first><last>Gkatzia</last></editor>
-      <editor><first>Djamé</first><last>Seddah</last></editor>
+      <editor id="djame-seddah"><first>Djamé</first><last>Seddah</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Online</address>
       <month>April</month>
@@ -3957,7 +3957,7 @@
       <author><first>Phuong</first><last>Nguyen</last></author>
       <author><first>Chau</first><last>Nguyen</last></author>
       <author><first>Ken</first><last>Satoh</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <author><first>Minh</first><last>Nguyen</last></author>
       <pages>24–31</pages>
       <abstract>This paper presents CovRelex, a scientific paper retrieval system targeting entities and relations via relation extraction on COVID-19 scientific papers. This work aims at building a system supporting users efficiently in acquiring knowledge across a huge number of COVID-19 scientific papers published rapidly. Our system can be accessed via <url>https://www.jaist.ac.jp/is/labs/nguyen-lab/systems/covrelex/</url>.</abstract>
@@ -3993,7 +3993,7 @@
     <paper id="7">
       <title><fixed-case>T</fixed-case>-<fixed-case>NER</fixed-case>: An All-Round Python Library for Transformer-based Named Entity Recognition</title>
       <author><first>Asahi</first><last>Ushio</last></author>
-      <author><first>Jose</first><last>Camacho-Collados</last></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></author>
       <pages>53–62</pages>
       <abstract>Language model (LM) pretraining has led to consistent improvements in many NLP downstream tasks, including named entity recognition (NER). In this paper, we present T-NER (Transformer-based Named Entity Recognition), a Python library for NER LM finetuning. In addition to its practical utility, T-NER facilitates the study and investigation of the cross-domain and cross-lingual generalization ability of LMs finetuned on NER. Our library also provides a web app where users can get model predictions interactively for arbitrary text, which facilitates qualitative model evaluation for non-expert programmers. We show the potential of the library by compiling nine public NER datasets into a unified format and evaluating the cross-domain and cross- lingual performance across the datasets. The results from our initial experiments show that in-domain performance is generally competitive across datasets. However, cross-domain generalization is challenging even with a large pretrained LM, which has nevertheless capacity to learn domain-specific features if fine- tuned on a combined dataset. To facilitate future research, we also release all our LM checkpoints via the Hugging Face model hub.</abstract>
       <url hash="48d2a697">2021.eacl-demos.7</url>
@@ -4004,7 +4004,7 @@
       <title>Forum 4.0: An Open-Source User Comment Analysis Framework</title>
       <author><first>Marlo</first><last>Haering</last></author>
       <author><first>Jakob Smedegaard</first><last>Andersen</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <author><first>Wiebke</first><last>Loosen</last></author>
       <author><first>Benjamin</first><last>Milde</last></author>
       <author><first>Tim</first><last>Pietz</last></author>
@@ -4021,7 +4021,7 @@
     <paper id="9">
       <title><fixed-case>SLTEV</fixed-case>: Comprehensive Evaluation of Spoken Language Translation</title>
       <author><first>Ebrahim</first><last>Ansari</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <author><first>Barry</first><last>Haddow</last></author>
       <author><first>Mohammad</first><last>Mahmoudi</last></author>
       <pages>71–79</pages>
@@ -4046,7 +4046,7 @@
       <title><fixed-case>D</fixed-case>eb<fixed-case>IE</fixed-case>: A Platform for Implicit and Explicit Debiasing of Word Embedding Spaces</title>
       <author><first>Niklas</first><last>Friedrich</last></author>
       <author><first>Anne</first><last>Lauscher</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <author><first>Goran</first><last>Glavaš</last></author>
       <pages>91–98</pages>
       <abstract>Recent research efforts in NLP have demonstrated that distributional word vector spaces often encode stereotypical human biases, such as racism and sexism. With word representations ubiquitously used in NLP models and pipelines, this raises ethical issues and jeopardizes the fairness of language technologies. While there exists a large body of work on bias measures and debiasing methods, to date, there is no platform that would unify these research efforts and make bias measuring and debiasing of representation spaces widely accessible. In this work, we present DebIE, the first integrated platform for (1) measuring and (2) mitigating bias in word embeddings. Given an (i) embedding space (users can choose between the predefined spaces or upload their own) and (ii) a bias specification (users can choose between existing bias specifications or create their own), DebIE can (1) compute several measures of implicit and explicit bias and modify the embedding space by executing two (mutually composable) debiasing models. DebIE’s functionality can be accessed through four different interfaces: (a) a web application, (b) a desktop application, (c) a REST-ful API, and (d) as a command-line application. DebIE is available at: debie.informatik.uni-mannheim.de.</abstract>
@@ -4198,7 +4198,7 @@
       <author><first>Ahmet</first><last>Üstün</last></author>
       <author><first>Alan</first><last>Ramponi</last></author>
       <author><first>Ibrahim</first><last>Sharaf</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>176–197</pages>
       <abstract>Transfer learning, particularly approaches that combine multi-task learning with pre-trained contextualized embeddings and fine-tuning, have advanced the field of Natural Language Processing tremendously in recent years. In this paper we present MaChAmp, a toolkit for easy fine-tuning of contextualized embeddings in multi-task settings. The benefits of MaChAmp are its flexible configuration options, and the support of a variety of natural language processing tasks in a uniform toolkit, from text classification and sequence labeling to dependency parsing, masked language modeling, and text generation.</abstract>
       <url hash="90821c0d">2021.eacl-demos.22</url>
@@ -4211,7 +4211,7 @@
       <author><first>Saba</first><last>Anwar</last></author>
       <author><first>Seid Muhie</first><last>Yimam</last></author>
       <author><first>Alexander</first><last>Friedrich</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>198–204</pages>
       <abstract>We present Sense Clustering over Time (SCoT), a novel network-based tool for analysing lexical change. SCoT represents the meanings of a word as clusters of similar words. It visualises their formation, change, and demise. There are two main approaches to the exploration of dynamic networks: the discrete one compares a series of clustered graphs from separate points in time. The continuous one analyses the changes of one dynamic network over a time-span. SCoT offers a new hybrid solution. First, it aggregates time-stamped documents into intervals and calculates one sense graph per discrete interval. Then, it merges the static graphs to a new type of dynamic semantic neighbourhood graph over time. The resulting sense clusters offer uniquely detailed insights into lexical change over continuous intervals with model transparency and provenance. SCoT has been successfully used in a European study on the changing meaning of ‘crisis’.</abstract>
       <url hash="56fb7627">2021.eacl-demos.23</url>
@@ -4234,7 +4234,7 @@
     <paper id="25">
       <title><fixed-case>T</fixed-case>2<fixed-case>NER</fixed-case>: Transformers based Transfer Learning Framework for Named Entity Recognition</title>
       <author><first>Saadullah</first><last>Amin</last></author>
-      <author><first>Guenter</first><last>Neumann</last></author>
+      <author id="gunter-neumann"><first>Guenter</first><last>Neumann</last></author>
       <pages>212–220</pages>
       <abstract>Recent advances in deep transformer models have achieved state-of-the-art in several natural language processing (NLP) tasks, whereas named entity recognition (NER) has traditionally benefited from long-short term memory (LSTM) networks. In this work, we present a Transformers based Transfer Learning framework for Named Entity Recognition (T2NER) created in PyTorch for the task of NER with deep transformer models. The framework is built upon the Transformers library as the core modeling engine and supports several transfer learning scenarios from sequential transfer to domain adaptation, multi-task learning, and semi-supervised learning. It aims to bridge the gap between the algorithmic advances in these areas by combining them with the state-of-the-art in transformer models to provide a unified platform that is readily extensible and can be used for both the transfer learning research in NER, and for real-world applications. The framework is available at: <url>https://github.com/suamin/t2ner</url>.</abstract>
       <url hash="36940106">2021.eacl-demos.25</url>
@@ -4245,22 +4245,22 @@
     <paper id="26">
       <title><fixed-case>E</fixed-case>uropean Language Grid: A Joint Platform for the <fixed-case>E</fixed-case>uropean Language Technology Community</title>
       <author><first>Georg</first><last>Rehm</last></author>
-      <author><first>Stelios</first><last>Piperidis</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
-      <author><first>Jan</first><last>Hajic</last></author>
+      <author id="stelios-piperidis"><first>Stelios</first><last>Piperidis</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajic</last></author>
       <author><first>Victoria</first><last>Arranz</last></author>
-      <author><first>Andrejs</first><last>Vasiļjevs</last></author>
+      <author id="andrejs-vasiljevs"><first>Andrejs</first><last>Vasiļjevs</last></author>
       <author><first>Gerhard</first><last>Backfried</last></author>
-      <author><first>Jose Manuel</first><last>Gomez-Perez</last></author>
+      <author id="jose-manuel-gomez-perez"><first>Jose Manuel</first><last>Gomez-Perez</last></author>
       <author><first>Ulrich</first><last>Germann</last></author>
       <author><first>Rémi</first><last>Calizzano</last></author>
       <author><first>Nils</first><last>Feldhus</last></author>
       <author><first>Stefanie</first><last>Hegele</last></author>
       <author><first>Florian</first><last>Kintzel</last></author>
       <author><first>Katrin</first><last>Marheinecke</last></author>
-      <author><first>Julian</first><last>Moreno-Schneider</last></author>
-      <author><first>Dimitris</first><last>Galanis</last></author>
-      <author><first>Penny</first><last>Labropoulou</last></author>
+      <author id="julian-moreno-schneider"><first>Julian</first><last>Moreno-Schneider</last></author>
+      <author id="dimitrios-galanis"><first>Dimitris</first><last>Galanis</last></author>
+      <author id="penny-labropoulou"><first>Penny</first><last>Labropoulou</last></author>
       <author><first>Miltos</first><last>Deligiannis</last></author>
       <author><first>Katerina</first><last>Gkirtzou</last></author>
       <author><first>Athanasia</first><last>Kolovou</last></author>
@@ -4268,10 +4268,10 @@
       <author><first>Leon</first><last>Voukoutis</last></author>
       <author><first>Ian</first><last>Roberts</last></author>
       <author><first>Jana</first><last>Hamrlova</last></author>
-      <author><first>Dusan</first><last>Varis</last></author>
+      <author id="dusan-varis"><first>Dusan</first><last>Varis</last></author>
       <author><first>Lukas</first><last>Kacena</last></author>
-      <author><first>Khalid</first><last>Choukri</last></author>
-      <author><first>Valérie</first><last>Mapelli</last></author>
+      <author id="khalid-choukri"><first>Khalid</first><last>Choukri</last></author>
+      <author id="valerie-mapelli"><first>Valérie</first><last>Mapelli</last></author>
       <author><first>Mickaël</first><last>Rigault</last></author>
       <author><first>Julija</first><last>Melnika</last></author>
       <author><first>Miro</first><last>Janosik</last></author>
@@ -4291,7 +4291,7 @@
       <author><first>Hidetaka</first><last>Kamigaito</last></author>
       <author><first>Jingun</first><last>Kwon</last></author>
       <author><first>Young-In</first><last>Song</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>231–237</pages>
       <abstract>One way to enhance user engagement in search engines is to suggest interesting facts to the user. Although relationships between persons are important as a target for text mining, there are few effective approaches for extracting the interesting relationships between persons. We therefore propose a method for extracting interesting relationships between persons from natural language texts by focusing on their surprisingness. Our method first extracts all personal relationships from dependency trees for the texts and then calculates surprise scores for distributed representations of the extracted relationships in an unsupervised manner. The unique point of our method is that it does not require any labeled dataset with annotation for the surprising personal relationships. The results of the human evaluation show that the proposed method could extract more interesting relationships between persons from Japanese Wikipedia articles than a popularity-based baseline method. We demonstrate our proposed method as a chrome plugin on google search.</abstract>
       <url hash="04bfcaf1">2021.eacl-demos.27</url>
@@ -4314,7 +4314,7 @@
     </paper>
     <paper id="29">
       <title>Story Centaur: Large Language Model Few Shot Learning as a Creative Writing Tool</title>
-      <author><first>Ben</first><last>Swanson</last></author>
+      <author id="ben-swanson"><first>Ben</first><last>Swanson</last></author>
       <author><first>Kory</first><last>Mathewson</last></author>
       <author><first>Ben</first><last>Pietrzak</last></author>
       <author><first>Sherol</first><last>Chen</last></author>
@@ -4350,10 +4350,10 @@
     </paper>
     <paper id="32">
       <title><fixed-case>ELITR</fixed-case> Multilingual Live Subtitling: Demo and Strategy</title>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <author><first>Dominik</first><last>Macháček</last></author>
       <author><first>Sangeet</first><last>Sagar</last></author>
-      <author><first>Otakar</first><last>Smrž</last></author>
+      <author id="otakar-smrz"><first>Otakar</first><last>Smrž</last></author>
       <author><first>Jonáš</first><last>Kratochvíl</last></author>
       <author><first>Peter</first><last>Polák</last></author>
       <author><first>Ebrahim</first><last>Ansari</last></author>
@@ -4364,8 +4364,8 @@
       <author><first>Ivan</first><last>Simonini</last></author>
       <author><first>Thai-Son</first><last>Nguyen</last></author>
       <author><first>Felix</first><last>Schneider</last></author>
-      <author><first>Sebastian</first><last>Stüker</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="sebastian-stuker"><first>Sebastian</first><last>Stüker</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <author><first>Barry</first><last>Haddow</last></author>
       <author><first>Rico</first><last>Sennrich</last></author>
       <author><first>Philip</first><last>Williams</last></author>
@@ -4398,10 +4398,10 @@
     </paper>
     <paper id="35">
       <title>Domain Expert Platform for Goal-Oriented Dialog Collection</title>
-      <author><first>Didzis</first><last>Goško</last></author>
-      <author><first>Arturs</first><last>Znotins</last></author>
-      <author><first>Inguna</first><last>Skadina</last></author>
-      <author><first>Normunds</first><last>Gruzitis</last></author>
+      <author id="didzis-gosko"><first>Didzis</first><last>Goško</last></author>
+      <author id="arturs-znotins"><first>Arturs</first><last>Znotins</last></author>
+      <author id="inguna-skadina"><first>Inguna</first><last>Skadina</last></author>
+      <author id="normunds-gruzitis"><first>Normunds</first><last>Gruzitis</last></author>
       <author><first>Gunta</first><last>Nešpore-Bērzkalne</last></author>
       <pages>295–301</pages>
       <abstract>Today, most dialogue systems are fully or partly built using neural network architectures. A crucial prerequisite for the creation of a goal-oriented neural network dialogue system is a dataset that represents typical dialogue scenarios and includes various semantic annotations, e.g. intents, slots and dialogue actions, that are necessary for training a particular neural network architecture. In this demonstration paper, we present an easy to use interface and its back-end which is oriented to domain experts for the collection of goal-oriented dialogue samples. The platform not only allows to collect or write sample dialogues in a structured way, but also provides a means for simple annotation and interpretation of the dialogues. The platform itself is language-independent; it depends only on the availability of particular language processing components for a specific language. It is currently being used to collect dialogue samples in Latvian (a highly inflected language) which represent typical communication between students and the student service.</abstract>
@@ -4413,7 +4413,7 @@
       <title>Which is Better for Deep Learning: Python or <fixed-case>MATLAB</fixed-case>? Answering Comparative Questions in Natural Language</title>
       <author><first>Viktoriia</first><last>Chekalina</last></author>
       <author><first>Alexander</first><last>Bondarenko</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <author><first>Meriem</first><last>Beloucif</last></author>
       <author><first>Varvara</first><last>Logacheva</last></author>
       <author><first>Alexander</first><last>Panchenko</last></author>
@@ -4438,7 +4438,7 @@
       <author><first>Aditya</first><last>Gaydhani</last></author>
       <author><first>Sheena</first><last>Dufresne</last></author>
       <author><first>Maria</first><last>Gini</last></author>
-      <author><first>Serguei</first><last>Pakhomov</last></author>
+      <author id="serguei-pakhomov"><first>Serguei</first><last>Pakhomov</last></author>
       <pages>321–328</pages>
       <abstract>Conversational Agent for Daily Living Assessment Coaching (CADLAC) is a multi-modal conversational agent system designed to impersonate “individuals” with various levels of ability in activities of daily living (ADLs: e.g., dressing, bathing, mobility, etc.) for use in training professional assessors how to conduct interviews to determine one’s level of functioning. The system is implemented on the MindMeld platform for conversational AI and features a Bidirectional Long Short-Term Memory topic tracker that allows the agent to navigate conversations spanning 18 different ADL domains, a dialogue manager that interfaces with a database of over 10,000 historical ADL assessments, a rule-based Natural Language Generation (NLG) module, and a pre-trained open-domain conversational sub-agent (based on GPT-2) for handling conversation turns outside of the 18 ADL domains. CADLAC is delivered via state-of-the-art web frameworks to handle multiple conversations and users simultaneously and is enabled with voice interface. The paper includes a description of the system design and evaluation of individual components followed by a brief discussion of current limitations and next steps.</abstract>
       <url hash="be2f7398">2021.eacl-demos.38</url>
@@ -4448,7 +4448,7 @@
     <paper id="39">
       <title><fixed-case>HULK</fixed-case>: An Energy Efficiency Benchmark Platform for Responsible Natural Language Processing</title>
       <author><first>Xiyou</first><last>Zhou</last></author>
-      <author id="zhiyu-chen"><first>Zhiyu</first><last>Chen</last></author>
+      <author><first>Zhiyu</first><last>Chen</last></author>
       <author><first>Xiaoyong</first><last>Jin</last></author>
       <author><first>William Yang</first><last>Wang</last></author>
       <pages>329–336</pages>
@@ -4461,10 +4461,10 @@
   <volume id="srw" ingest-date="2021-04-19" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Student Research Workshop</booktitle>
-      <editor><first>Ionut-Teodor</first><last>Sorodoc</last></editor>
+      <editor id="ionut-sorodoc"><first>Ionut-Teodor</first><last>Sorodoc</last></editor>
       <editor><first>Madhumita</first><last>Sushil</last></editor>
       <editor><first>Ece</first><last>Takmaz</last></editor>
-      <editor><first>Eneko</first><last>Agirre</last></editor>
+      <editor id="eneko-agirre"><first>Eneko</first><last>Agirre</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Online</address>
       <month>April</month>
@@ -4534,7 +4534,7 @@
       <title>A reproduction of Apple’s bi-directional <fixed-case>LSTM</fixed-case> models for language identification in short strings</title>
       <author><first>Mads</first><last>Toftrup</last></author>
       <author><first>Søren</first><last>Asger Sørensen</last></author>
-      <author><first>Manuel R.</first><last>Ciosici</last></author>
+      <author id="manuel-r-ciosici"><first>Manuel R.</first><last>Ciosici</last></author>
       <author><first>Ira</first><last>Assent</last></author>
       <pages>36–42</pages>
       <abstract>Language Identification is the task of identifying a document’s language. For applications like automatic spell checker selection, language identification must use very short strings such as text message fragments. In this work, we reproduce a language identification architecture that Apple briefly sketched in a blog post. We confirm the bi-LSTM model’s performance and find that it outperforms current open-source language identifiers. We further find that its language identification mistakes are due to confusion between related languages.</abstract>
@@ -4589,7 +4589,7 @@
     <paper id="11">
       <title>Contrasting distinct structured views to learn sentence embeddings</title>
       <author><first>Antoine</first><last>Simoulin</last></author>
-      <author><first>Benoit</first><last>Crabbé</last></author>
+      <author id="benoit-crabbe"><first>Benoit</first><last>Crabbé</last></author>
       <pages>71–79</pages>
       <abstract>We propose a self-supervised method that builds sentence embeddings from the combination of diverse explicit syntactic structures of a sentence. We assume structure is crucial to building consistent representations as we expect sentence meaning to be a function of both syntax and semantic aspects. In this perspective, we hypothesize that some linguistic representations might be better adapted given the considered task or sentence. We, therefore, propose to learn individual representation functions for different syntactic frameworks jointly. Again, by hypothesis, all such functions should encode similar semantic information differently and consequently, be complementary for building better sentential semantic embeddings. To assess such hypothesis, we propose an original contrastive multi-view framework that induces an explicit interaction between models during the training phase. We make experiments combining various structures such as dependency, constituency, or sequential schemes. Our results outperform comparable methods on several tasks from standard sentence embedding benchmarks.</abstract>
       <url hash="1c9fb6db">2021.eacl-srw.11</url>
@@ -4599,7 +4599,7 @@
     <paper id="12">
       <title>Discrete Reasoning Templates for Natural Language Understanding</title>
       <author><first>Hadeel</first><last>Al-Negheimish</last></author>
-      <author><first>Pranava</first><last>Madhyastha</last></author>
+      <author id="pranava-swaroop-madhyastha"><first>Pranava</first><last>Madhyastha</last></author>
       <author><first>Alessandra</first><last>Russo</last></author>
       <pages>80–87</pages>
       <abstract>Reasoning about information from multiple parts of a passage to derive an answer is an open challenge for reading-comprehension models. In this paper, we present an approach that reasons about complex questions by decomposing them to simpler subquestions that can take advantage of single-span extraction reading-comprehension models, and derives the final answer according to instructions in a predefined reasoning template. We focus on subtraction based arithmetic questions and evaluate our approach on a subset of the DROP dataset. We show that our approach is competitive with the state of the art while being interpretable and requires little supervision.</abstract>
@@ -4747,7 +4747,7 @@
       <author><first>Sinan</first><last>Kurtyigit</last></author>
       <author><first>Dominik</first><last>Schlechtweg</last></author>
       <author><first>Jonas</first><last>Kuhn</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>192–202</pages>
       <abstract>Type- and token-based embedding architectures are still competing in lexical semantic change detection. The recent success of type-based models in SemEval-2020 Task 1 has raised the question why the success of token-based models on a variety of other NLP tasks does not translate to our field. We investigate the influence of a range of variables on clusterings of BERT vectors and show that its low performance is largely due to orthographic information on the target word, which is encoded even in the higher layers of BERT representations. By reducing the influence of orthography we considerably improve BERT’s performance.</abstract>
       <url hash="46529311">2021.eacl-srw.25</url>
@@ -4807,7 +4807,7 @@
       <author><first>Jan</first><last>Niehues</last></author>
       <author><first>Elizabeth</first><last>Salesky</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <pages>10–13</pages>
       <abstract>Speech translation is the translation of speech in one language typically to text in another, traditionally accomplished through a combination of automatic speech recognition and machine translation. Speech translation has attracted interest for many years, but the recent successful applications of deep learning to both individual tasks have enabled new opportunities through joint modeling, in what we today call ‘end-to-end speech translation.’ In this tutorial we introduce the techniques used in cutting-edge research on speech translation. Starting from the traditional cascaded approach, we give an overview on data sources and model architectures to achieve state-of-the art performance with end-to-end speech translation for both high- and low-resource languages. In addition, we discuss methods to evaluate analyze the proposed solutions, as well as the challenges faced when applying speech translation models for real-world applications.</abstract>
       <url hash="29e48e66">2021.eacl-tutorials.3</url>
@@ -4816,10 +4816,10 @@
     </paper>
     <paper id="4">
       <title>Reviewing Natural Language Processing Research</title>
-      <author><first>Kevin</first><last>Cohen</last></author>
+      <author id="k-bretonnel-cohen"><first>Kevin</first><last>Cohen</last></author>
       <author><first>Karën</first><last>Fort</last></author>
       <author><first>Margot</first><last>Mieskes</last></author>
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <author><first>Anna</first><last>Rogers</last></author>
       <pages>14–16</pages>
       <abstract>The reviewing procedure has been identified as one of the major issues in the current situation of the NLP field. While it is implicitly assumed that junior researcher learn reviewing during their PhD project, this might not always be the case. Additionally, with the growing NLP community and the efforts in the context of widening the NLP community, researchers joining the field might not have the opportunity to practise reviewing. This tutorial fills in this gap by providing an opportunity to learn the basics of reviewing. Also more experienced researchers might find this tutorial interesting to revise their reviewing procedure.</abstract>
diff --git a/data/xml/2021.ecnlp.xml b/data/xml/2021.ecnlp.xml
index 2c8e96e6d9..78e6d158ab 100644
--- a/data/xml/2021.ecnlp.xml
+++ b/data/xml/2021.ecnlp.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2021-07-25" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 4th Workshop on e-Commerce and NLP</booktitle>
-      <editor><first>Shervin</first><last>Malmasi</last></editor>
+      <editor id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></editor>
       <editor><first>Surya</first><last>Kallumadi</last></editor>
       <editor><first>Nicola</first><last>Ueffing</last></editor>
       <editor><first>Oleg</first><last>Rokhlenko</last></editor>
@@ -89,7 +89,7 @@
       <author><first>Yue</first><last>Liu</last></author>
       <author><first>Heng</first><last>Ji</last></author>
       <author id="yang-liu-icsi"><first>Yang</first><last>Liu</last></author>
-      <author><first>Premkumar</first><last>Natarajan</last></author>
+      <author id="prem-natarajan"><first>Premkumar</first><last>Natarajan</last></author>
       <pages>38–48</pages>
       <abstract>The growing popularity of Virtual Assistants poses new challenges for Entity Resolution, the task of linking mentions in text to their referent entities in a knowledge base. Specifically, in the shopping domain, customers tend to mention the entities implicitly (e.g., “organic milk”) rather than use the entity names explicitly, leading to a large number of candidate products. Meanwhile, for the same query, different customers may expect different results. For example, with “add milk to my cart”, a customer may refer to a certain product from his/her favorite brand, while some customers may want to re-order products they regularly purchase. Moreover, new customers may lack persistent shopping history, which requires us to enrich the connections between customers through products and their attributes. To address these issues, we propose a new framework that leverages personalized features to improve the accuracy of product ranking. We first build a cross-source heterogeneous knowledge graph from customer purchase history and product knowledge graph to jointly learn customer and product embeddings. After that, we incorporate product, customer, and history representations into a neural reranking model to predict which candidate is most likely to be purchased by a specific customer. Experiment results show that our model substantially improves the accuracy of the top ranked candidates by 24.6% compared to the state-of-the-art product search model.</abstract>
       <url hash="19a00849">2021.ecnlp-1.6</url>
@@ -211,7 +211,7 @@
       <title>Enhancing Aspect Extraction for <fixed-case>H</fixed-case>indi</title>
       <author><first>Arghya</first><last>Bhattacharya</last></author>
       <author><first>Alok</first><last>Debnath</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>140–149</pages>
       <abstract>Aspect extraction is not a well-explored topic in Hindi, with only one corpus having been developed for the task. In this paper, we discuss the merits of the existing corpus in terms of quality, size, sparsity, and performance in aspect extraction tasks using established models. To provide a better baseline corpus for aspect extraction, we translate the SemEval 2014 aspect-based sentiment analysis dataset and annotate the aspects in that data. We provide rigorous guidelines and a replicable methodology for this task. We quantitatively evaluate the translations and annotations using inter-annotator agreement scores. We also evaluate our dataset using state-of-the-art neural aspect extraction models in both monolingual and multilingual settings and show that the models perform far better on our corpus than on the existing Hindi dataset. With this, we establish our corpus as the gold-standard aspect extraction dataset in Hindi.</abstract>
       <url hash="37aa2995">2021.ecnlp-1.17</url>
diff --git a/data/xml/2021.econlp.xml b/data/xml/2021.econlp.xml
index f5697655f9..8c35310249 100644
--- a/data/xml/2021.econlp.xml
+++ b/data/xml/2021.econlp.xml
@@ -4,8 +4,8 @@
     <meta>
       <booktitle>Proceedings of the Third Workshop on Economics and Natural Language Processing</booktitle>
       <editor><first>Udo</first><last>Hahn</last></editor>
-      <editor><first>Veronique</first><last>Hoste</last></editor>
-      <editor><first>Amanda</first><last>Stent</last></editor>
+      <editor id="veronique-hoste"><first>Veronique</first><last>Hoste</last></editor>
+      <editor id="amanda-stent"><first>Amanda</first><last>Stent</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Punta Cana, Dominican Republic</address>
       <month>November</month>
@@ -19,7 +19,7 @@
     <paper id="1">
       <title>A Fine-Grained Annotated Corpus for Target-Based Opinion Analysis of Economic and Financial Narratives</title>
       <author><first>Jiahui</first><last>Hu</last></author>
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <pages>1–12</pages>
       <abstract>In this paper about aspect-based sentiment analysis (ABSA), we present the first version of a fine-grained annotated corpus for target-based opinion analysis (TBOA) to analyze economic activities or financial markets. We have annotated, at an intra-sentential level, a corpus of sentences extracted from documents representative of financial analysts’ most-read materials by considering how financial actors communicate about the evolution of event trends and analyze related publications (news, official communications, etc.). Since we focus on identifying the expressions of opinions related to the economy and financial markets, we annotated the sentences that contain at least one subjective expression about a domain-specific term. Candidate sentences for annotations were randomly chosen from texts of specialized press and professional information channels over a period ranging from 1986 to 2021. Our annotation scheme relies on various linguistic markers like domain-specific vocabulary, syntactic structures, and rhetorical relations to explicitly describe the author’s subjective stance. We investigated and evaluated the recourse to automatic pre-annotation with existing natural language processing technologies to alleviate the annotation workload. Our aim is to propose a corpus usable on the one hand as training material for the automatic detection of the opinions expressed on an extensive range of domain-specific aspects and on the other hand as a gold standard for evaluation TBOA. In this paper, we present our pre-annotation models and evaluations of their performance, introduce our annotation scheme and report on the main characteristics of our corpus.</abstract>
       <url hash="06570a64">2021.econlp-1.1</url>
@@ -135,7 +135,7 @@
     <paper id="11">
       <title>Cryptocurrency Day Trading and Framing Prediction in Microblog Discourse</title>
       <author><first>Anna Paula</first><last>Pawlicka Maule</last></author>
-      <author><first>Kristen</first><last>Johnson</last></author>
+      <author id="kristen-johnson"><first>Kristen</first><last>Johnson</last></author>
       <pages>82–92</pages>
       <abstract>With 56 million people actively trading and investing in cryptocurrency online and globally in 2020, there is an increasing need for automatic social media analysis tools to help understand trading discourse and behavior. In this work, we present a dual natural language modeling pipeline which leverages language and social network behaviors for the prediction of cryptocurrency day trading actions and their associated framing patterns. This pipeline first predicts if tweets can be used to guide day trading behavior, specifically if a cryptocurrency investor should buy, sell, or hold their cryptocurrencies in order to make a profit. Next, tweets are input to an unsupervised deep clustering approach to automatically detect trading framing patterns. Our contributions include the modeling pipeline for this novel task, a new Cryptocurrency Tweets Dataset compiled from influential accounts, and a Historical Price Dataset. Our experiments show that our approach achieves an 88.78% accuracy for day trading behavior prediction and reveals framing fluctuations prior to and during the COVID-19 pandemic that could be used to guide investment actions.</abstract>
       <url hash="834d6ddc">2021.econlp-1.11</url>
diff --git a/data/xml/2021.emnlp.xml b/data/xml/2021.emnlp.xml
index 82c7e08686..2b05d3b65f 100644
--- a/data/xml/2021.emnlp.xml
+++ b/data/xml/2021.emnlp.xml
@@ -3,10 +3,10 @@
   <volume id="main" ingest-date="2021-11-03" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing</booktitle>
-      <editor><first>Marie-Francine</first><last>Moens</last></editor>
-      <editor><first>Xuanjing</first><last>Huang</last></editor>
+      <editor id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></editor>
+      <editor id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></editor>
       <editor><first>Lucia</first><last>Specia</last></editor>
-      <editor><first>Scott Wen-tau</first><last>Yih</last></editor>
+      <editor id="wen-tau-yih"><first>Scott Wen-tau</first><last>Yih</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Online and Punta Cana, Dominican Republic</address>
       <month>November</month>
@@ -68,7 +68,7 @@
       <author><first>Dan</first><last>Liu</last></author>
       <author><first>Mengge</first><last>Du</last></author>
       <author><first>Xiaoxi</first><last>Li</last></author>
-      <author id="ya-li"><first>Ya</first><last>Li</last></author>
+      <author><first>Ya</first><last>Li</last></author>
       <author><first>Enhong</first><last>Chen</last></author>
       <pages>39–55</pages>
       <abstract>This paper proposes a novel architecture, Cross Attention Augmented Transducer (CAAT), for simultaneous translation. The framework aims to jointly optimize the policy and translation models. To effectively consider all possible READ-WRITE simultaneous translation action paths, we adapt the online automatic speech recognition (ASR) model, RNN-T, but remove the strong monotonic constraint, which is critical for the translation task to consider reordering. To make CAAT work, we introduce a novel latency loss whose expectation can be optimized by a forward-backward algorithm. We implement CAAT with Transformer while the general CAAT architecture can also be implemented with other attention-based encoder-decoder frameworks. Experiments on both speech-to-text (S2T) and text-to-text (T2T) simultaneous translation tasks show that CAAT achieves significantly better latency-quality trade-offs compared to the state-of-the-art simultaneous translation approaches.</abstract>
@@ -95,7 +95,7 @@
       <author><first>Yunlong</first><last>Liang</last></author>
       <author><first>Chulun</first><last>Zhou</last></author>
       <author><first>Fandong</first><last>Meng</last></author>
-      <author><first>Jinan</first><last>Xu</last></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last></author>
       <author><first>Yufeng</first><last>Chen</last></author>
       <author><first>Jinsong</first><last>Su</last></author>
       <author><first>Jie</first><last>Zhou</last></author>
@@ -123,7 +123,7 @@
     <paper id="8">
       <title>Controllable Neural Dialogue Summarization with Personal Named Entity Planning</title>
       <author><first>Zhengyuan</first><last>Liu</last></author>
-      <author><first>Nancy</first><last>Chen</last></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last></author>
       <pages>92–106</pages>
       <abstract>In this paper, we propose a controllable neural generation framework that can flexibly guide dialogue summarization with personal named entity planning. The conditional sequences are modulated to decide what types of information or what perspective to focus on when forming summaries to tackle the under-constrained problem in summarization tasks. This framework supports two types of use cases: (1) Comprehensive Perspective, which is a general-purpose case with no user-preference specified, considering summary points from all conversational interlocutors and all mentioned persons; (2) Focus Perspective, positioning the summary based on a user-specified personal named entity, which could be one of the interlocutors or one of the persons mentioned in the conversation. During training, we exploit occurrence planning of personal named entities and coreference information to improve temporal coherence and to minimize hallucination in neural generation. Experimental results show that our proposed framework generates fluent and factually consistent summaries under various planning controls using both objective metrics and human evaluations.</abstract>
       <url hash="8adb9328">2021.emnlp-main.8</url>
@@ -159,7 +159,7 @@
       <author><first>Baoyu</first><last>Jing</last></author>
       <author><first>Zeyu</first><last>You</last></author>
       <author><first>Tao</first><last>Yang</last></author>
-      <author id="wei-fan"><first>Wei</first><last>Fan</last></author>
+      <author><first>Wei</first><last>Fan</last></author>
       <author><first>Hanghang</first><last>Tong</last></author>
       <pages>133–139</pages>
       <abstract>Extractive text summarization aims at extracting the most representative sentences from a given document as its summary. To extract a good summary from a long text document, sentence embedding plays an important role. Recent studies have leveraged graph neural networks to capture the inter-sentential relationship (e.g., the discourse graph) within the documents to learn contextual sentence embedding. However, those approaches neither consider multiple types of inter-sentential relationships (e.g., semantic similarity and natural connection relationships), nor model intra-sentential relationships (e.g, semantic similarity and syntactic relationship among words). To address these problems, we propose a novel Multiplex Graph Convolutional Network (Multi-GCN) to jointly model different types of relationships among sentences and words. Based on Multi-GCN, we propose a Multiplex Graph Summarization (Multi-GraS) model for extractive text summarization. Finally, we evaluate the proposed models on the CNN/DailyMail benchmark dataset to demonstrate effectiveness of our method.</abstract>
@@ -187,7 +187,7 @@
       <author><first>Yao</first><last>Wan</last></author>
       <author><first>Congying</first><last>Xia</last></author>
       <author><first>Lifang</first><last>He</last></author>
-      <author><first>Philip</first><last>Yu</last></author>
+      <author id="philip-s-yu"><first>Philip</first><last>Yu</last></author>
       <pages>146–154</pages>
       <abstract>To capture the semantic graph structure from raw text, most existing summarization approaches are built on GNNs with a pre-trained model. However, these methods suffer from cumbersome procedures and inefficient computations for long-text documents. To mitigate these issues, this paper proposes HetFormer, a Transformer-based pre-trained model with multi-granularity sparse attentions for long-text extractive summarization. Specifically, we model different types of semantic nodes in raw text as a potential heterogeneous graph and directly learn heterogeneous relationships (edges) among nodes by Transformer. Extensive experiments on both single- and multi-document summarization tasks show that HetFormer achieves state-of-the-art performance in Rouge F1 while using less memory and fewer parameters.</abstract>
       <url hash="c660f213">2021.emnlp-main.13</url>
@@ -238,7 +238,7 @@
     <paper id="17">
       <title>A Partition Filter Network for Joint Entity and Relation Extraction</title>
       <author><first>Zhiheng</first><last>Yan</last></author>
-      <author id="chong-zhang"><first>Chong</first><last>Zhang</last></author>
+      <author><first>Chong</first><last>Zhang</last></author>
       <author><first>Jinlan</first><last>Fu</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
       <author><first>Zhongyu</first><last>Wei</last></author>
@@ -312,7 +312,7 @@
       <title>Learning Implicit Sentiment in Aspect-based Sentiment Analysis with Supervised Contrastive Pre-Training</title>
       <author><first>Zhengyan</first><last>Li</last></author>
       <author><first>Yicheng</first><last>Zou</last></author>
-      <author id="chong-zhang"><first>Chong</first><last>Zhang</last></author>
+      <author><first>Chong</first><last>Zhang</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
       <author><first>Zhongyu</first><last>Wei</last></author>
       <pages>246–256</pages>
@@ -448,7 +448,7 @@
     <paper id="32">
       <title>Few-Shot Text Generation with Natural Language Instructions</title>
       <author><first>Timo</first><last>Schick</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>390–402</pages>
       <abstract>Providing pretrained language models with simple task descriptions in natural language enables them to solve some tasks in a fully unsupervised fashion. Moreover, when combined with regular learning from examples, this idea yields impressive few-shot results for a wide range of text classification tasks. It is also a promising direction to improve data efficiency in generative settings, but there are several challenges to using a combination of task descriptions and example-based learning for text generation. In particular, it is crucial to find task descriptions that are easy to understand for the pretrained model and to ensure that it actually makes good use of them; furthermore, effective measures against overfitting have to be implemented. In this paper, we show how these challenges can be tackled: We introduce GenPET, a method for text generation that is based on pattern-exploiting training, a recent approach for combining textual instructions with supervised learning that only works for classification tasks. On several summarization and headline generation datasets, GenPET gives consistent improvements over strong baselines in few-shot settings.</abstract>
       <url hash="861ef6b4">2021.emnlp-main.32</url>
@@ -533,7 +533,7 @@
       <title>Inducing Stereotypical Character Roles from Plot Structure</title>
       <author><first>Labiba</first><last>Jahan</last></author>
       <author><first>Rahul</first><last>Mittal</last></author>
-      <author><first>Mark</first><last>Finlayson</last></author>
+      <author id="mark-finlayson"><first>Mark</first><last>Finlayson</last></author>
       <pages>492–497</pages>
       <abstract>Stereotypical character roles-also known as archetypes or dramatis personae-play an important function in narratives: they facilitate efficient communication with bundles of default characteristics and associations and ease understanding of those characters’ roles in the overall narrative. We present a fully unsupervised k-means clustering approach for learning stereotypical roles given only structural plot information. We demonstrate the technique on Vladimir Propp’s structural theory of Russian folktales (captured in the extended ProppLearner corpus, with 46 tales), showing that our approach can induce six out of seven of Propp’s dramatis personae with F1 measures of up to 0.70 (0.58 average), with an additional category for minor characters. We have explored various feature sets and variations of a cluster evaluation method. The best-performing feature set comprises plot functions, unigrams, tf-idf weights, and embeddings over coreference chain heads. Roles that are mentioned more often (Hero, Villain), or have clearly distinct plot patterns (Princess) are more strongly differentiated than less frequent or distinct roles (Dispatcher, Helper, Donor). Detailed error analysis suggests that the quality of the coreference chain and plot functions annotations are critical for this task. We provide all our data and code for reproducibility.</abstract>
       <url hash="6d207d87">2021.emnlp-main.39</url>
@@ -570,7 +570,7 @@
     <paper id="42">
       <title>Mitigating Language-Dependent Ethnic Bias in <fixed-case>BERT</fixed-case></title>
       <author><first>Jaimeen</first><last>Ahn</last></author>
-      <author><first>Alice</first><last>Oh</last></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last></author>
       <pages>533–549</pages>
       <abstract>In this paper, we study ethnic bias and how it varies across languages by analyzing and mitigating ethnic bias in monolingual BERT for English, German, Spanish, Korean, Turkish, and Chinese. To observe and quantify ethnic bias, we develop a novel metric called Categorical Bias score. Then we propose two methods for mitigation; first using a multilingual model, and second using contextual word alignment of two monolingual models. We compare our proposed methods with monolingual BERT and show that these methods effectively alleviate the ethnic bias. Which of the two methods works better depends on the amount of NLP resources available for that language. We additionally experiment with Arabic and Greek to verify that our proposed methods work for a wider variety of languages.</abstract>
       <url hash="dca81d03">2021.emnlp-main.42</url>
@@ -600,7 +600,7 @@
       <author><first>Gabriel</first><last>Poesia</last></author>
       <author><first>Robert</first><last>Hawkins</last></author>
       <author><first>Dorsa</first><last>Sadigh</last></author>
-      <author><first>Noah</first><last>Goodman</last></author>
+      <author id="noah-goodman"><first>Noah</first><last>Goodman</last></author>
       <pages>563–570</pages>
       <abstract>An overarching goal of natural language processing is to enable machines to communicate seamlessly with humans. However, natural language can be ambiguous or unclear. In cases of uncertainty, humans engage in an interactive process known as repair: asking questions and seeking clarification until their uncertainty is resolved. We propose a framework for building a visually grounded question-asking model capable of producing polar (yes-no) clarification questions to resolve misunderstandings in dialogue. Our model uses an expected information gain objective to derive informative questions from an off-the-shelf image captioner without requiring any supervised question-answer data. We demonstrate our model’s ability to pose questions that improve communicative success in a goal-oriented 20 questions game with synthetic and human answerers.</abstract>
       <url hash="3eac608c">2021.emnlp-main.44</url>
@@ -637,7 +637,7 @@
     <paper id="47">
       <title><fixed-case>T</fixed-case>ext2<fixed-case>M</fixed-case>ol: Cross-Modal Molecule Retrieval with Natural Language Queries</title>
       <author><first>Carl</first><last>Edwards</last></author>
-      <author><first>ChengXiang</first><last>Zhai</last></author>
+      <author id="chengxiang-zhai"><first>ChengXiang</first><last>Zhai</last></author>
       <author><first>Heng</first><last>Ji</last></author>
       <pages>595–607</pages>
       <abstract>We propose a new task, Text2Mol, to retrieve molecules using natural language descriptions as queries. Natural language and molecules encode information in very different ways, which leads to the exciting but challenging problem of integrating these two very different modalities. Although some work has been done on text-based retrieval and structure-based retrieval, this new task requires integrating molecules and natural language more directly. Moreover, this can be viewed as an especially challenging cross-lingual retrieval problem by considering the molecules as a language with a very unique grammar. We construct a paired dataset of molecules and their corresponding text descriptions, which we use to learn an aligned common semantic embedding space for retrieval. We extend this to create a cross-modal attention-based model for explainability and reranking by interpreting the attentions as association rules. We also employ an ensemble approach to integrate our different architectures, which significantly improves results from 0.372 to 0.499 MRR. This new multimodal approach opens a new perspective on solving problems in chemistry literature understanding and molecular machine learning.</abstract>
@@ -731,7 +731,7 @@
       <author><first>Shiunzu</first><last>Kuo</last></author>
       <author><first>Vikas</first><last>Bhardwaj</last></author>
       <author><first>Anuj</first><last>Kumar</last></author>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <pages>682–697</pages>
       <abstract>We propose a novel framework to train models to classify acceptability of responses generated by natural language generation (NLG) models, improving upon existing sentence transformation and model-based approaches. An NLG response is considered acceptable if it is both semantically correct and grammatical. We don’t make use of any human references making the classifiers suitable for runtime deployment. Training data for the classifiers is obtained using a 2-stage approach of first generating synthetic data using a combination of existing and new model-based approaches followed by a novel validation framework to filter and sort the synthetic data into acceptable and unacceptable classes. Our 2-stage approach adapts to a wide range of data representations and does not require additional data beyond what the NLG models are trained on. It is also independent of the underlying NLG model architecture, and is able to generate more realistic samples close to the distribution of the NLG model-generated responses. We present results on 5 datasets (WebNLG, Cleaned E2E, ViGGO, Alarm, and Weather) with varying data representations. We compare our framework with existing techniques that involve synthetic data generation using simple sentence transformations and/or model-based techniques, and show that building acceptability classifiers using data that resembles the generation model outputs followed by a validation framework outperforms the existing techniques, achieving state-of-the-art results. We also show that our techniques can be used in few-shot settings using self-training.</abstract>
       <url hash="110a97d3">2021.emnlp-main.53</url>
@@ -795,7 +795,7 @@
       <title>Learning Compact Metrics for <fixed-case>MT</fixed-case></title>
       <author><first>Amy</first><last>Pu</last></author>
       <author><first>Hyung Won</first><last>Chung</last></author>
-      <author><first>Ankur</first><last>Parikh</last></author>
+      <author id="ankur-parikh"><first>Ankur</first><last>Parikh</last></author>
       <author><first>Sebastian</first><last>Gehrmann</last></author>
       <author><first>Thibault</first><last>Sellam</last></author>
       <pages>751–762</pages>
@@ -810,7 +810,7 @@
     <paper id="59">
       <title>The Impact of Positional Encodings on Multilingual Compression</title>
       <author><first>Vinit</first><last>Ravishankar</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>763–777</pages>
       <abstract>In order to preserve word-order information in a non-autoregressive setting, transformer architectures tend to include positional knowledge, by (for instance) adding positional encodings to token embeddings. Several modifications have been proposed over the sinusoidal positional encodings used in the original transformer architecture; these include, for instance, separating position encodings and token embeddings, or directly modifying attention weights based on the distance between word pairs. We first show that surprisingly, while these modifications tend to improve monolingual language models, none of them result in better multilingual language models. We then answer why that is: sinusoidal encodings were explicitly designed to facilitate compositionality by allowing linear projections over arbitrary time steps. Higher variances in multilingual training distributions requires higher compression, in which case, compositionality becomes indispensable. Learned absolute positional encodings (e.g., in mBERT) tend to approximate sinusoidal embeddings in multilingual settings, but more complex positional encoding architectures lack the inductive bias to effectively learn cross-lingual alignment. In other words, while sinusoidal positional encodings were designed for monolingual applications, they are particularly useful in multilingual language models.</abstract>
       <url hash="fb003bd3">2021.emnlp-main.59</url>
@@ -822,7 +822,7 @@
       <title>Disentangling Representations of Text by Masking Transformers</title>
       <author><first>Xiongyi</first><last>Zhang</last></author>
       <author><first>Jan-Willem</first><last>van de Meent</last></author>
-      <author><first>Byron</first><last>Wallace</last></author>
+      <author id="byron-c-wallace"><first>Byron</first><last>Wallace</last></author>
       <pages>778–791</pages>
       <abstract>Representations from large pretrained models such as BERT encode a range of features into monolithic vectors, affording strong predictive accuracy across a range of downstream tasks. In this paper we explore whether it is possible to learn disentangled representations by identifying existing subnetworks within pretrained models that encode distinct, complementary aspects. Concretely, we learn binary masks over transformer weights or hidden units to uncover subsets of features that correlate with a specific factor of variation; this eliminates the need to train a disentangled model from scratch for a particular task. We evaluate this method with respect to its ability to disentangle representations of sentiment from genre in movie reviews, toxicity from dialect in Tweets, and syntax from semantics. By combining masking with magnitude pruning we find that we can identify sparse subnetworks within BERT that strongly encode particular aspects (e.g., semantics) while only weakly encoding others (e.g., syntax). Moreover, despite only learning masks, disentanglement-via-masking performs as well as — and often better than —previously proposed methods based on variational autoencoders and adversarial training.</abstract>
       <url hash="87689b47">2021.emnlp-main.60</url>
@@ -860,7 +860,7 @@
       <author><first>Cory</first><last>Paik</last></author>
       <author><first>Stéphane</first><last>Aroca-Ouellette</last></author>
       <author><first>Alessandro</first><last>Roncone</last></author>
-      <author><first>Katharina</first><last>Kann</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
       <pages>823–835</pages>
       <abstract>Recent work has raised concerns about the inherent limitations of text-only pretraining. In this paper, we first demonstrate that reporting bias, the tendency of people to not state the obvious, is one of the causes of this limitation, and then investigate to what extent multimodal training can mitigate this issue. To accomplish this, we 1) generate the Color Dataset (CoDa), a dataset of human-perceived color distributions for 521 common objects; 2) use CoDa to analyze and compare the color distribution found in text, the distribution captured by language models, and a human’s perception of color; and 3) investigate the performance differences between text-only and multimodal models on CoDa. Our results show that the distribution of colors that a language model recovers correlates more strongly with the inaccurate distribution found in text than with the ground-truth, supporting the claim that reporting bias negatively impacts and inherently limits text-only training. We then demonstrate that multimodal models can leverage their visual training to mitigate these effects, providing a promising avenue for future research.</abstract>
       <url hash="d5503eb4">2021.emnlp-main.63</url>
@@ -910,7 +910,7 @@
     </paper>
     <paper id="67">
       <title>Jump-Starting Item Parameters for Adaptive Language Tests</title>
-      <author><first>Arya D.</first><last>McCarthy</last></author>
+      <author id="arya-d-mccarthy"><first>Arya D.</first><last>McCarthy</last></author>
       <author><first>Kevin P.</first><last>Yancey</last></author>
       <author><first>Geoffrey T.</first><last>LaFlair</last></author>
       <author><first>Jesse</first><last>Egbert</last></author>
@@ -948,7 +948,7 @@
       <author><first>Matúš</first><last>Falis</last></author>
       <author><first>Hang</first><last>Dong</last></author>
       <author><first>Alexandra</first><last>Birch</last></author>
-      <author><first>Beatrice</first><last>Alex</last></author>
+      <author id="beatrice-alex"><first>Beatrice</first><last>Alex</last></author>
       <pages>907–912</pages>
       <abstract>Large-Scale Multi-Label Text Classification (LMTC) includes tasks with hierarchical label spaces, such as automatic assignment of ICD-9 codes to discharge summaries. Performance of models in prior art is evaluated with standard precision, recall, and F1 measures without regard for the rich hierarchical structure. In this work we argue for hierarchical evaluation of the predictions of neural LMTC models. With the example of the ICD-9 ontology we describe a structural issue in the representation of the structured label space in prior art, and propose an alternative representation based on the depth of the ontology. We propose a set of metrics for hierarchical evaluation using the depth-based representation. We compare the evaluation scores from the proposed metrics with previously used metrics on prior art LMTC models for ICD-9 coding in MIMIC-III. We also propose further avenues of research involving the proposed ontological representation.</abstract>
       <url hash="0fedef02">2021.emnlp-main.69</url>
@@ -1018,7 +1018,7 @@
       <author id="patrick-haller-zurich"><first>Patrick</first><last>Haller</last></author>
       <author><first>Lena</first><last>Jäger</last></author>
       <author><first>Ryan</first><last>Cotterell</last></author>
-      <author><first>Roger</first><last>Levy</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
       <pages>963–980</pages>
       <abstract>The uniform information density (UID) hypothesis posits a preference among language users for utterances structured such that information is distributed uniformly across a signal. While its implications on language production have been well explored, the hypothesis potentially makes predictions about language comprehension and linguistic acceptability as well. Further, it is unclear how uniformity in a linguistic signal—or lack thereof—should be measured, and over which linguistic unit, e.g., the sentence or language level, this uniformity should hold. Here we investigate these facets of the UID hypothesis using reading time and acceptability data. While our reading time results are generally consistent with previous work, they are also consistent with a weakly super-linear effect of surprisal, which would be compatible with UID’s predictions. For acceptability judgments, we find clearer evidence that non-uniformity in information density is predictive of lower acceptability. We then explore multiple operationalizations of UID, motivated by different interpretations of the original hypothesis, and analyze the scope over which the pressure towards uniformity is exerted. The explanatory power of a subset of the proposed operationalizations suggests that the strongest trend may be a regression towards a mean surprisal across the language, rather than the phrase, sentence, or document—a finding that supports a typical interpretation of UID, namely that it is the byproduct of language users maximizing the use of a (hypothetical) communication channel.</abstract>
       <url hash="75220e7a">2021.emnlp-main.74</url>
@@ -1068,7 +1068,7 @@
       <author><first>Kyoung-Rok</first><last>Jang</last></author>
       <author><first>Junmo</first><last>Kang</last></author>
       <author><first>Giwon</first><last>Hong</last></author>
-      <author><first>Sung-Hyon</first><last>Myaeng</last></author>
+      <author id="sung-hyon-myaeng"><first>Sung-Hyon</first><last>Myaeng</last></author>
       <author><first>Joohee</first><last>Park</last></author>
       <author><first>Taewon</first><last>Yoon</last></author>
       <author><first>Heecheol</first><last>Seo</last></author>
@@ -1161,7 +1161,7 @@
       <title><fixed-case>M</fixed-case>ind<fixed-case>C</fixed-case>raft: Theory of Mind Modeling for Situated Dialogue in Collaborative Tasks</title>
       <author><first>Cristian-Paul</first><last>Bara</last></author>
       <author><first>Sky</first><last>CH-Wang</last></author>
-      <author><first>Joyce</first><last>Chai</last></author>
+      <author id="joyce-chai"><first>Joyce</first><last>Chai</last></author>
       <pages>1112–1125</pages>
       <abstract>An ideal integration of autonomous agents in a human world implies that they are able to collaborate on human terms. In particular, theory of mind plays an important role in maintaining common ground during human collaboration and communication. To enable theory of mind modeling in situated interactions, we introduce a fine-grained dataset of collaborative tasks performed by pairs of human subjects in the 3D virtual blocks world of Minecraft. It provides information that captures partners’ beliefs of the world and of each other as an interaction unfolds, bringing abundant opportunities to study human collaborative behaviors in situated language communication. As a first step towards our goal of developing embodied AI agents able to infer belief states of collaborative partners in situ, we build and present results on computational models for several theory of mind tasks.</abstract>
       <url hash="f79e0dbd">2021.emnlp-main.85</url>
@@ -1187,7 +1187,7 @@
     <paper id="87">
       <title>Cross-lingual Intermediate Fine-tuning improves Dialogue State Tracking</title>
       <author><first>Nikita</first><last>Moghe</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <author><first>Alexandra</first><last>Birch</last></author>
       <pages>1137–1150</pages>
       <abstract>Recent progress in task-oriented neural dialogue systems is largely focused on a handful of languages, as annotation of training data is tedious and expensive. Machine translation has been used to make systems multilingual, but this can introduce a pipeline of errors. Another promising solution is using cross-lingual transfer learning through pretrained multilingual models. Existing methods train multilingual models with additional code-mixed task data or refine the cross-lingual representations through parallel ontologies. In this work, we enhance the transfer learning process by intermediate fine-tuning of pretrained multilingual models, where the multilingual models are fine-tuned with different but related data and/or tasks. Specifically, we use parallel and conversational movie subtitles datasets to design cross-lingual intermediate tasks suitable for downstream dialogue tasks. We use only 200K lines of parallel data for intermediate fine-tuning which is already available for 1782 language pairs. We test our approach on the cross-lingual dialogue state tracking task for the parallel MultiWoZ (English -&gt; Chinese, Chinese -&gt; English) and Multilingual WoZ (English -&gt; German, English -&gt; Italian) datasets. We achieve impressive improvements (&gt; 20% on joint goal accuracy) on the parallel MultiWoZ dataset and the Multilingual WoZ dataset over the vanilla baseline with only 10% of the target language task data and zero-shot setup respectively.</abstract>
@@ -1254,10 +1254,10 @@
     <paper id="92">
       <title>Label Verbalization and Entailment for Effective Zero and Few-Shot Relation Extraction</title>
       <author><first>Oscar</first><last>Sainz</last></author>
-      <author><first>Oier</first><last>Lopez de Lacalle</last></author>
-      <author><first>Gorka</first><last>Labaka</last></author>
+      <author id="oier-lopez-de-lacalle"><first>Oier</first><last>Lopez de Lacalle</last></author>
+      <author id="gorka-labaka"><first>Gorka</first><last>Labaka</last></author>
       <author><first>Ander</first><last>Barrena</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <pages>1199–1212</pages>
       <abstract>Relation extraction systems require large amounts of labeled examples which are costly to annotate. In this work we reformulate relation extraction as an entailment task, with simple, hand-made, verbalizations of relations produced in less than 15 min per relation. The system relies on a pretrained textual entailment engine which is run as-is (no training examples, zero-shot) or further fine-tuned on labeled examples (few-shot or fully trained). In our experiments on TACRED we attain 63% F1 zero-shot, 69% with 16 examples per relation (17% points better than the best supervised system on the same conditions), and only 4 points short to the state-of-the-art (which uses 20 times more training data). We also show that the performance can be improved significantly with larger entailment models, up to 12 points in zero-shot, allowing to report the best results to date on TACRED when fully trained. The analysis shows that our few-shot systems are specially effective when discriminating between relations, and that the performance difference in low data regimes comes mainly from identifying no-relation cases.</abstract>
       <url hash="26a706c7">2021.emnlp-main.92</url>
@@ -1299,7 +1299,7 @@
       <author><first>Jiangtao</first><last>Feng</last></author>
       <author><first>Lin</first><last>Qiu</last></author>
       <author><first>Hao</first><last>Zhou</last></author>
-      <author><first>Weinan</first><last>Zhang</last></author>
+      <author id="weinan-zhang"><first>Weinan</first><last>Zhang</last></author>
       <author><first>Yong</first><last>Yu</last></author>
       <author><first>Lei</first><last>Li</last></author>
       <pages>1239–1250</pages>
@@ -1368,7 +1368,7 @@
       <title>Evaluating the Evaluation Metrics for Style Transfer: A Case Study in Multilingual Formality Transfer</title>
       <author><first>Eleftheria</first><last>Briakou</last></author>
       <author><first>Sweta</first><last>Agrawal</last></author>
-      <author><first>Joel</first><last>Tetreault</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
       <author><first>Marine</first><last>Carpuat</last></author>
       <pages>1321–1336</pages>
       <abstract>While the field of style transfer (ST) has been growing rapidly, it has been hampered by a lack of standardized practices for automatic evaluation. In this paper, we evaluate leading automatic metrics on the oft-researched task of formality style transfer. Unlike previous evaluations, which focus solely on English, we expand our focus to Brazilian-Portuguese, French, and Italian, making this work the first multilingual evaluation of metrics in ST. We outline best practices for automatic evaluation in (formality) style transfer and identify several models that correlate well with human judgments and are robust across languages. We hope that this work will help accelerate development in ST, where human evaluation is often challenging to collect.</abstract>
@@ -1397,7 +1397,7 @@
     </paper>
     <paper id="102">
       <title>Understanding Politics via Contextualized Discourse Processing</title>
-      <author><first>Rajkumar</first><last>Pujari</last></author>
+      <author id="rajkumar-pujari"><first>Rajkumar</first><last>Pujari</last></author>
       <author><first>Dan</first><last>Goldwasser</last></author>
       <pages>1353–1367</pages>
       <abstract>Politicians often have underlying agendas when reacting to events. Arguments in contexts of various events reflect a fairly consistent set of agendas for a given entity. In spite of recent advances in Pretrained Language Models, those text representations are not designed to capture such nuanced patterns. In this paper, we propose a Compositional Reader model consisting of encoder and composer modules, that captures and leverages such information to generate more effective representations for entities, issues, and events. These representations are contextualized by tweets, press releases, issues, news articles, and participating entities. Our model processes several documents at once and generates composed representations for multiple entities over several issues or events. Via qualitative and quantitative empirical analysis, we show that these representations are meaningful and effective.</abstract>
@@ -1448,7 +1448,7 @@
       <title>Focus on what matters: Applying Discourse Coherence Theory to Cross Document Coreference</title>
       <author><first>William</first><last>Held</last></author>
       <author><first>Dan</first><last>Iter</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <pages>1406–1417</pages>
       <abstract>Performing event and entity coreference resolution across documents vastly increases the number of candidate mentions, making it intractable to do the full <tex-math>n^2</tex-math> pairwise comparisons. Existing approaches simplify by considering coreference only within document clusters, but this fails to handle inter-cluster coreference, common in many applications. As a result cross-document coreference algorithms are rarely applied to downstream tasks. We draw on an insight from discourse coherence theory: potential coreferences are constrained by the reader’s discourse focus. We model the entities/events in a reader’s focus as a neighborhood within a learned latent embedding space which minimizes the distance between mentions and the centroids of their gold coreference clusters. We then use these neighborhoods to sample only hard negatives to train a fine-grained classifier on mention pairs and their local discourse features. Our approach achieves state-of-the-art results for both events and entities on the ECB+, Gun Violence, Football Coreference, and Cross-Domain Cross-Document Coreference corpora. Furthermore, training on multiple corpora improves average performance across all datasets by 17.2 F1 points, leading to a robust coreference resolution model that is now feasible to apply to downstream tasks.</abstract>
       <url hash="289c46b6">2021.emnlp-main.106</url>
@@ -1500,7 +1500,7 @@
       <title><fixed-case>R</fixed-case>ule<fixed-case>BERT</fixed-case>: Teaching Soft Rules to Pre-Trained Language Models</title>
       <author><first>Mohammed</first><last>Saeed</last></author>
       <author><first>Naser</first><last>Ahmadi</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Paolo</first><last>Papotti</last></author>
       <pages>1460–1476</pages>
       <abstract>While pre-trained language models (PLMs) are the go-to solution to tackle many natural language processing problems, they are still very limited in their ability to capture and to use common-sense knowledge. In fact, even if information is available in the form of approximate (soft) logical rules, it is not clear how to transfer it to a PLM in order to improve its performance for deductive reasoning tasks. Here, we aim to bridge this gap by teaching PLMs how to reason with soft Horn rules. We introduce a classification task where, given facts and soft rules, the PLM should return a prediction with a probability for a given hypothesis. We release the first dataset for this task, and we propose a revised loss function that enables the PLM to learn how to predict precise probabilities for the task. Our evaluation results show that the resulting fine-tuned models achieve very high performance, even on logical rules that were unseen at training. Moreover, we demonstrate that logical notions expressed by the rules are transferred to the fine-tuned model, yielding state-of-the-art results on external datasets.</abstract>
@@ -1536,9 +1536,9 @@
     <paper id="113">
       <title>Shortcutted Commonsense: Data Spuriousness in Deep Learning of Commonsense Reasoning</title>
       <author><first>Ruben</first><last>Branco</last></author>
-      <author><first>António</first><last>Branco</last></author>
-      <author><first>João</first><last>António Rodrigues</last></author>
-      <author><first>João Ricardo</first><last>Silva</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
+      <author id="joao-rodrigues"><first>João</first><last>António Rodrigues</last></author>
+      <author id="joao-silva"><first>João Ricardo</first><last>Silva</last></author>
       <pages>1504–1521</pages>
       <abstract>Commonsense is a quintessential human capacity that has been a core challenge to Artificial Intelligence since its inception. Impressive results in Natural Language Processing tasks, including in commonsense reasoning, have consistently been achieved with Transformer neural language models, even matching or surpassing human performance in some benchmarks. Recently, some of these advances have been called into question: so called data artifacts in the training data have been made evident as spurious correlations and shallow shortcuts that in some cases are leveraging these outstanding results. In this paper we seek to further pursue this analysis into the realm of commonsense related language processing tasks. We undertake a study on different prominent benchmarks that involve commonsense reasoning, along a number of key stress experiments, thus seeking to gain insight on whether the models are learning transferable generalizations intrinsic to the problem at stake or just taking advantage of incidental shortcuts in the data items. The results obtained indicate that most datasets experimented with are problematic, with models resorting to non-robust features and appearing not to be learning and generalizing towards the overall tasks intended to be conveyed or exemplified by the datasets.</abstract>
       <url hash="383365d9">2021.emnlp-main.113</url>
@@ -1647,7 +1647,7 @@
       <author><first>John</first><last>Hewitt</last></author>
       <author><first>Kawin</first><last>Ethayarajh</last></author>
       <author><first>Percy</first><last>Liang</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <pages>1626–1639</pages>
       <abstract>Probing experiments investigate the extent to which neural representations make properties—like part-of-speech—predictable. One suggests that a representation encodes a property if probing that representation produces higher accuracy than probing a baseline representation like non-contextual word embeddings. Instead of using baselines as a point of comparison, we’re interested in measuring information that is contained in the representation but not in the baseline. For example, current methods can detect when a representation is more useful than the word identity (a baseline) for predicting part-of-speech; however, they cannot detect when the representation is predictive of just the aspects of part-of-speech not explainable by the word identity. In this work, we extend a theory of usable information called V-information and propose conditional probing, which explicitly conditions on the information in the baseline. In a case study, we find that after conditioning on non-contextual word embeddings, properties like part-of-speech are accessible at deeper layers of a network than previously thought.</abstract>
       <url hash="f129b85e">2021.emnlp-main.122</url>
@@ -1657,10 +1657,10 @@
     </paper>
     <paper id="123">
       <title><fixed-case>GFST</fixed-case>: <fixed-case>G</fixed-case>ender-Filtered Self-Training for More Accurate Gender in Translation</title>
-      <author><first>Prafulla Kumar</first><last>Choubey</last></author>
+      <author id="prafulla-kumar-choubey"><first>Prafulla Kumar</first><last>Choubey</last></author>
       <author><first>Anna</first><last>Currey</last></author>
       <author><first>Prashant</first><last>Mathur</last></author>
-      <author><first>Georgiana</first><last>Dinu</last></author>
+      <author id="georgiana-dinu"><first>Georgiana</first><last>Dinu</last></author>
       <pages>1640–1654</pages>
       <abstract>Targeted evaluations have found that machine translation systems often output incorrect gender in translations, even when the gender is clear from context. Furthermore, these incorrectly gendered translations have the potential to reflect or amplify social biases. We propose gender-filtered self-training (GFST) to improve gender translation accuracy on unambiguously gendered inputs. Our GFST approach uses a source monolingual corpus and an initial model to generate gender-specific pseudo-parallel corpora which are then filtered and added to the training data. We evaluate GFST on translation from English into five languages, finding that it improves gender accuracy without damaging generic quality. We also show the viability of GFST on several experimental settings, including re-training from scratch, fine-tuning, controlling the gender balance of the data, forward translation, and back-translation.</abstract>
       <url hash="22313a5b">2021.emnlp-main.123</url>
@@ -1672,7 +1672,7 @@
       <title>“Wikily” Supervised Neural Translation Tailored to Cross-Lingual Tasks</title>
       <author><first>Mohammad Sadegh</first><last>Rasooli</last></author>
       <author><first>Chris</first><last>Callison-Burch</last></author>
-      <author><first>Derry Tanti</first><last>Wijaya</last></author>
+      <author id="derry-tanti-wijaya"><first>Derry Tanti</first><last>Wijaya</last></author>
       <pages>1655–1670</pages>
       <abstract>We present a simple but effective approach for leveraging Wikipedia for neural machine translation as well as cross-lingual tasks of image captioning and dependency parsing without using any direct supervision from external parallel data or supervised models in the target language. We show that first sentences and titles of linked Wikipedia pages, as well as cross-lingual image captions, are strong signals for a seed parallel data to extract bilingual dictionaries and cross-lingual word embeddings for mining parallel text from Wikipedia. Our final model achieves high BLEU scores that are close to or sometimes higher than strong <i>supervised</i> baselines in low-resource languages; e.g. supervised BLEU of 4.0 versus 12.1 from our model in English-to-Kazakh. Moreover, we tailor our <i>wikily</i> translation models to unsupervised image captioning, and cross-lingual dependency parser transfer. In image captioning, we train a multi-tasking machine translation and image captioning pipeline for Arabic and English from which the Arabic training data is a <i>wikily</i> translation of the English captioning data. Our captioning results on Arabic are slightly <i>better</i> than that of its supervised model. In dependency parsing, we translate a large amount of monolingual text, and use it as an artificial training data in an <i>annotation projection</i> framework. We show that our model outperforms recent work on cross-lingual transfer of dependency parsers.</abstract>
       <url hash="6485cc00">2021.emnlp-main.124</url>
@@ -1688,7 +1688,7 @@
       <author><first>Shaohan</first><last>Huang</last></author>
       <author><first>Saksham</first><last>Singhal</last></author>
       <author><first>Xian-Ling</first><last>Mao</last></author>
-      <author><first>Heyan</first><last>Huang</last></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last></author>
       <author><first>Xia</first><last>Song</last></author>
       <author><first>Furu</first><last>Wei</last></author>
       <pages>1671–1683</pages>
@@ -1701,7 +1701,7 @@
     <paper id="126">
       <title>Improving Zero-Shot Cross-Lingual Transfer Learning via Robust Training</title>
       <author><first>Kuan-Hao</first><last>Huang</last></author>
-      <author><first>Wasi</first><last>Ahmad</last></author>
+      <author id="wasi-ahmad"><first>Wasi</first><last>Ahmad</last></author>
       <author><first>Nanyun</first><last>Peng</last></author>
       <author><first>Kai-Wei</first><last>Chang</last></author>
       <pages>1684–1697</pages>
@@ -1715,7 +1715,7 @@
       <title>Speechformer: Reducing Information Loss in Direct Speech Translation</title>
       <author><first>Sara</first><last>Papi</last></author>
       <author><first>Marco</first><last>Gaido</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <pages>1698–1706</pages>
       <abstract>Transformer-based models have gained increasing popularity achieving state-of-the-art performance in many research fields including speech translation. However, Transformer’s quadratic complexity with respect to the input sequence length prevents its adoption as is with audio signals, which are typically represented by long sequences. Current solutions resort to an initial sub-optimal compression based on a fixed sampling of raw audio features. Therefore, potentially useful linguistic information is not accessible to higher-level layers in the architecture. To solve this issue, we propose Speechformer, an architecture that, thanks to reduced memory usage in the attention layers, avoids the initial lossy compression and aggregates information only at a higher level according to more informed linguistic criteria. Experiments on three language pairs (en→de/es/nl) show the efficacy of our solution, with gains of up to 0.8 BLEU on the standard MuST-C corpus and of up to 4.0 BLEU in a low resource scenario.</abstract>
@@ -1728,7 +1728,7 @@
       <title>Is “moby dick” a Whale or a Bird? Named Entities and Terminology in Speech Translation</title>
       <author><first>Marco</first><last>Gaido</last></author>
       <author><first>Susana</first><last>Rodríguez</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Luisa</first><last>Bentivogli</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <pages>1707–1716</pages>
@@ -1754,7 +1754,7 @@
     <paper id="130">
       <title>Translation-based Supervision for Policy Generation in Simultaneous Neural Machine Translation</title>
       <author><first>Ashkan</first><last>Alinejad</last></author>
-      <author><first>Hassan S.</first><last>Shavarani</last></author>
+      <author id="hassan-s-shavarani"><first>Hassan S.</first><last>Shavarani</last></author>
       <author><first>Anoop</first><last>Sarkar</last></author>
       <pages>1734–1744</pages>
       <abstract>In simultaneous machine translation, finding an agent with the optimal action sequence of reads and writes that maintain a high level of translation quality while minimizing the average lag in producing target tokens remains an extremely challenging problem. We propose a novel supervised learning approach for training an agent that can detect the minimum number of reads required for generating each target token by comparing simultaneous translations against full-sentence translations during training to generate oracle action sequences. These oracle sequences can then be used to train a supervised model for action generation at inference time. Our approach provides an alternative to current heuristic methods in simultaneous translation by introducing a new training objective, which is easier to train than previous attempts at training the agent using reinforcement learning techniques for this task. Our experimental results show that our novel training method for action generation produces much higher quality translations while minimizing the average lag in simultaneous translation.</abstract>
@@ -1793,7 +1793,7 @@
       <author><first>Vivek</first><last>Ramanujan</last></author>
       <author><first>Yoav</first><last>Goldberg</last></author>
       <author><first>Roy</first><last>Schwartz</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>1766–1781</pages>
       <abstract>The capacity of neural networks like the widely adopted transformer is known to be very high. Evidence is emerging that they learn successfully due to inductive bias in the training routine, typically a variant of gradient descent (GD). To better understand this bias, we study the tendency for transformer parameters to grow in magnitude (<tex-math>\ell_2</tex-math> norm) during training, and its implications for the emergent representations within self attention layers. Empirically, we document norm growth in the training of transformer language models, including T5 during its pretraining. As the parameters grow in magnitude, we prove that the network approximates a discretized network with saturated activation functions. Such “saturated” networks are known to have a reduced capacity compared to the full network family that can be described in terms of formal languages and automata. Our results suggest saturation is a new characterization of an inductive bias implicit in GD of particular interest for NLP. We leverage the emergent discrete structure in a saturated transformer to analyze the role of different attention heads, finding that some focus locally on a small number of positions, while other heads compute global averages, allowing counting. We believe understanding the interplay between these two capabilities may shed further light on the structure of computation within large transformers.</abstract>
       <url hash="bdb4b222">2021.emnlp-main.133</url>
@@ -1819,10 +1819,10 @@
       <author><first>Matt</first><last>Gardner</last></author>
       <author><first>William</first><last>Merrill</last></author>
       <author><first>Jesse</first><last>Dodge</last></author>
-      <author><first>Matthew</first><last>Peters</last></author>
+      <author id="matthew-e-peters"><first>Matthew</first><last>Peters</last></author>
       <author><first>Alexis</first><last>Ross</last></author>
       <author><first>Sameer</first><last>Singh</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>1801–1813</pages>
       <abstract>Much recent work in NLP has documented dataset artifacts, bias, and spurious correlations between input features and output labels. However, how to tell which features have “spurious” instead of legitimate correlations is typically left unspecified. In this work we argue that for complex language understanding tasks, all simple feature correlations are spurious, and we formalize this notion into a class of problems which we call competency problems. For example, the word “amazing” on its own should not give information about a sentiment label independent of the context in which it appears, which could include negation, metaphor, sarcasm, etc. We theoretically analyze the difficulty of creating data for competency problems when human bias is taken into account, showing that realistic datasets will increasingly deviate from competency problems as dataset size increases. This analysis gives us a simple statistical test for dataset artifacts, which we use to show more subtle biases than were described in prior work, including demonstrating that models are inappropriately affected by these less extreme biases. Our theoretical treatment of this problem also allows us to analyze proposed solutions, such as making local edits to dataset instances, and to give recommendations for future data collection and model design efforts that target competency problems.</abstract>
       <url hash="76991bde">2021.emnlp-main.135</url>
@@ -1836,7 +1836,7 @@
       <author><first>Huaxiu</first><last>Yao</last></author>
       <author><first>Ying-xin</first><last>Wu</last></author>
       <author><first>Maruan</first><last>Al-Shedivat</last></author>
-      <author><first>Eric</first><last>Xing</last></author>
+      <author id="eric-xing"><first>Eric</first><last>Xing</last></author>
       <pages>1814–1821</pages>
       <abstract>Meta-learning has achieved great success in leveraging the historical learned knowledge to facilitate the learning process of the new task. However, merely learning the knowledge from the historical tasks, adopted by current meta-learning algorithms, may not generalize well to testing tasks when they are not well-supported by training tasks. This paper studies a low-resource text classification problem and bridges the gap between meta-training and meta-testing tasks by leveraging the external knowledge bases. Specifically, we propose KGML to introduce additional representation for each sentence learned from the extracted sentence-specific knowledge graph. The extensive experiments on three datasets demonstrate the effectiveness of KGML under both supervised adaptation and unsupervised adaptation settings.</abstract>
       <url hash="5e07393f">2021.emnlp-main.136</url>
@@ -1848,7 +1848,7 @@
       <title>Sentence Bottleneck Autoencoders from Transformer Language Models</title>
       <author><first>Ivan</first><last>Montero</last></author>
       <author><first>Nikolaos</first><last>Pappas</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>1822–1831</pages>
       <abstract>Representation learning for text via pretraining a language model on a large corpus has become a standard starting point for building NLP systems. This approach stands in contrast to autoencoders, also trained on raw text, but with the objective of learning to encode each input as a vector that allows full reconstruction. Autoencoders are attractive because of their latent space structure and generative properties. We therefore explore the construction of a sentence-level autoencoder from a pretrained, frozen transformer language model. We adapt the masked language modeling objective as a generative, denoising one, while only training a sentence bottleneck and a single-layer modified transformer decoder. We demonstrate that the sentence representations discovered by our model achieve better quality than previous methods that extract representations from pretrained transformers on text similarity tasks, style transfer (an example of controlled generation), and single-sentence classification tasks in the GLUE benchmark, while using fewer parameters than large pretrained models.</abstract>
       <url hash="8bde0262">2021.emnlp-main.137</url>
@@ -1860,7 +1860,7 @@
       <title>Efficient Contrastive Learning via Novel Data Augmentation and Curriculum Learning</title>
       <author><first>Seonghyeon</first><last>Ye</last></author>
       <author><first>Jiseon</first><last>Kim</last></author>
-      <author><first>Alice</first><last>Oh</last></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last></author>
       <pages>1832–1838</pages>
       <abstract>We introduce EfficientCL, a memory-efficient continual pretraining method that applies contrastive learning with novel data augmentation and curriculum learning. For data augmentation, we stack two types of operation sequentially: cutoff and PCA jittering. While pretraining steps proceed, we apply curriculum learning by incrementing the augmentation degree for each difficulty step. After data augmentation is finished, contrastive learning is applied on projected embeddings of original and augmented examples. When finetuned on GLUE benchmark, our model outperforms baseline models, especially for sentence-level tasks. Additionally, this improvement is capable with only 70% of computational memory compared to the baseline model.</abstract>
       <url hash="fbd8e08f">2021.emnlp-main.138</url>
@@ -1886,7 +1886,7 @@
       <author><first>Zeqiu</first><last>Wu</last></author>
       <author><first>Bo-Ru</first><last>Lu</last></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
       <pages>1852–1863</pages>
       <abstract>Identifying relevant knowledge to be used in conversational systems that are grounded in long documents is critical to effective response generation. We introduce a knowledge identification model that leverages the document structure to provide dialogue-contextualized passage encodings and better locate knowledge relevant to the conversation. An auxiliary loss captures the history of dialogue-document connections. We demonstrate the effectiveness of our model on two document-grounded conversational datasets and provide analyses showing generalization to unseen documents and long dialogue contexts.</abstract>
       <url hash="e3d5606e">2021.emnlp-main.140</url>
@@ -1953,14 +1953,14 @@
     <paper id="144">
       <title>Few-Shot Intent Detection via Contrastive Pre-Training and Fine-Tuning</title>
       <author><first>Jianguo</first><last>Zhang</last></author>
-      <author><first>Trung</first><last>Bui</last></author>
+      <author id="trung-bui"><first>Trung</first><last>Bui</last></author>
       <author><first>Seunghyun</first><last>Yoon</last></author>
       <author><first>Xiang</first><last>Chen</last></author>
       <author><first>Zhiwei</first><last>Liu</last></author>
       <author><first>Congying</first><last>Xia</last></author>
-      <author><first>Quan Hung</first><last>Tran</last></author>
+      <author id="quan-hung-tran"><first>Quan Hung</first><last>Tran</last></author>
       <author><first>Walter</first><last>Chang</last></author>
-      <author><first>Philip</first><last>Yu</last></author>
+      <author id="philip-s-yu"><first>Philip</first><last>Yu</last></author>
       <pages>1906–1912</pages>
       <abstract>In this work, we focus on a more challenging few-shot intent detection scenario where many intents are fine-grained and semantically similar. We present a simple yet effective few-shot intent detection schema via contrastive pre-training and fine-tuning. Specifically, we first conduct self-supervised contrastive pre-training on collected intent datasets, which implicitly learns to discriminate semantically similar utterances without using any labels. We then perform few-shot intent detection together with supervised contrastive learning, which explicitly pulls utterances from the same intent closer and pushes utterances across different intents farther. Experimental results show that our proposed method achieves state-of-the-art performance on three challenging intent detection datasets under 5-shot and 10-shot settings.</abstract>
       <url hash="35bcd944">2021.emnlp-main.144</url>
@@ -2008,10 +2008,10 @@
     </paper>
     <paper id="148">
       <title>Robust Retrieval Augmented Generation for Zero-shot Slot Filling</title>
-      <author><first>Michael</first><last>Glass</last></author>
+      <author id="michael-glass"><first>Michael</first><last>Glass</last></author>
       <author><first>Gaetano</first><last>Rossiello</last></author>
       <author><first>Md Faisal Mahbub</first><last>Chowdhury</last></author>
-      <author><first>Alfio</first><last>Gliozzo</last></author>
+      <author id="alfio-gliozzo"><first>Alfio</first><last>Gliozzo</last></author>
       <pages>1939–1949</pages>
       <abstract>Automatically inducing high quality knowledge graphs from a given collection of documents still remains a challenging problem in AI. One way to make headway for this problem is through advancements in a related task known as slot filling. In this task, given an entity query in form of [Entity, Slot, ?], a system is asked to ‘fill’ the slot by generating or extracting the missing value exploiting evidence extracted from relevant passage(s) in the given document collection. The recent works in the field try to solve this task in an end-to-end fashion using retrieval-based language models. In this paper, we present a novel approach to zero-shot slot filling that extends dense passage retrieval with hard negatives and robust training procedures for retrieval augmented generation models. Our model reports large improvements on both T-REx and zsRE slot filling datasets, improving both passage retrieval and slot value generation, and ranking at the top-1 position in the KILT leaderboard. Moreover, we demonstrate the robustness of our system showing its domain adaptation capability on a new variant of the TACRED dataset for slot filling, through a combination of zero/few-shot learning. We release the source code and pre-trained models.</abstract>
       <url hash="9c7ab772">2021.emnlp-main.148</url>
@@ -2107,10 +2107,10 @@
     </paper>
     <paper id="155">
       <title>Fairness-aware Class Imbalanced Learning</title>
-      <author><first>Shivashankar</first><last>Subramanian</last></author>
+      <author id="shivashankar-subramanian"><first>Shivashankar</first><last>Subramanian</last></author>
       <author><first>Afshin</first><last>Rahimi</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <author><first>Lea</first><last>Frermann</last></author>
       <pages>2045–2051</pages>
       <abstract>Class imbalance is a common challenge in many NLP tasks, and has clear connections to bias, in that bias in training data often leads to higher accuracy for majority groups at the expense of minority groups. However there has traditionally been a disconnect between research on class-imbalanced learning and mitigating bias, and only recently have the two been looked at through a common lens. In this work we evaluate long-tail learning methods for tweet sentiment and occupation classification, and extend a margin-loss based approach with methods to enforce fairness. We empirically show through controlled experiments that the proposed approaches help mitigate both class imbalance and demographic biases.</abstract>
@@ -2228,7 +2228,7 @@
       <author><first>Soravit</first><last>Changpinyo</last></author>
       <author><first>Boqing</first><last>Gong</last></author>
       <author><first>Piyush</first><last>Sharma</last></author>
-      <author><first>Song-Chun</first><last>Zhu</last></author>
+      <author id="song-chun-zhu"><first>Song-Chun</first><last>Zhu</last></author>
       <author><first>Radu</first><last>Soricut</last></author>
       <pages>2148–2166</pages>
       <abstract>One challenge in evaluating visual question answering (VQA) models in the cross-dataset adaptation setting is that the distribution shifts are multi-modal, making it difficult to identify if it is the shifts in visual or language features that play a key role. In this paper, we propose a semi-automatic framework for generating disentangled shifts by introducing a controllable visual question-answer generation (VQAG) module that is capable of generating highly-relevant and diverse question-answer pairs with the desired dataset style. We use it to create CrossVQA, a collection of test splits for assessing VQA generalization based on the VQA2, VizWiz, and Open Images datasets. We provide an analysis of our generated datasets and demonstrate its utility by using them to evaluate several state-of-the-art VQA systems. One important finding is that the visual shifts in cross-dataset VQA matter more than the language shifts. More broadly, we present a scalable framework for systematically evaluating the machine with little human intervention.</abstract>
@@ -2271,7 +2271,7 @@
     <paper id="167">
       <title>Effect of Visual Extensions on Natural Language Understanding in Vision-and-Language Models</title>
       <author><first>Taichi</first><last>Iki</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>2189–2196</pages>
       <abstract>A method for creating a vision-and-language (V&amp;L) model is to extend a language model through structural modifications and V&amp;L pre-training. Such an extension aims to make a V&amp;L model inherit the capability of natural language understanding (NLU) from the original language model. To see how well this is achieved, we propose to evaluate V&amp;L models using an NLU benchmark (GLUE). We compare five V&amp;L models, including single-stream and dual-stream models, trained with the same pre-training. Dual-stream models, with their higher modality independence achieved by approximately doubling the number of parameters, are expected to preserve the NLU capability better. Our main finding is that the dual-stream scores are not much different than the single-stream scores, contrary to expectation. Further analysis shows that pre-training causes the performance drop in NLU tasks with few exceptions. These results suggest that adopting a single-stream structure and devising the pre-training could be an effective method for improving the maintenance of language knowledge in V&amp;L extensions.</abstract>
       <url hash="92545804">2021.emnlp-main.167</url>
@@ -2283,7 +2283,7 @@
       <title>Neural Path Hunter: Reducing Hallucination in Dialogue Systems via Path Grounding</title>
       <author><first>Nouha</first><last>Dziri</last></author>
       <author><first>Andrea</first><last>Madotto</last></author>
-      <author><first>Osmar</first><last>Zaïane</last></author>
+      <author id="osmar-r-zaiane"><first>Osmar</first><last>Zaïane</last></author>
       <author><first>Avishek Joey</first><last>Bose</last></author>
       <pages>2197–2214</pages>
       <abstract>Dialogue systems powered by large pre-trained language models exhibit an innate ability to deliver fluent and natural-sounding responses. Despite their impressive performance, these models are fitful and can often generate factually incorrect statements impeding their widespread adoption. In this paper, we focus on the task of improving faithfulness and reducing hallucination of neural dialogue systems to known facts supplied by a Knowledge Graph (KG). We propose Neural Path Hunter which follows a generate-then-refine strategy whereby a generated response is amended using the KG. Neural Path Hunter leverages a separate token-level fact critic to identify plausible sources of hallucination followed by a refinement stage that retrieves correct entities by crafting a query signal that is propagated over a k-hop subgraph. We empirically validate our proposed approach on the OpenDialKG dataset (Moon et al., 2019) against a suite of metrics and report a relative improvement of faithfulness over dialogue responses by 20.35% based on FeQA (Durmus et al., 2020). The code is available at <url>https://github.com/nouhadziri/Neural-Path-Hunter</url>.</abstract>
@@ -2532,7 +2532,7 @@
       <author><first>Yubin</first><last>Ge</last></author>
       <author><first>Jiali</first><last>Zeng</last></author>
       <author><first>Junfeng</first><last>Yao</last></author>
-      <author><first>Degen</first><last>Huang</last></author>
+      <author id="degen-huang"><first>Degen</first><last>Huang</last></author>
       <author><first>Jinsong</first><last>Su</last></author>
       <pages>2407–2417</pages>
       <abstract>Dominant sentence ordering models can be classified into pairwise ordering models and set-to-sequence models. However, there is little attempt to combine these two types of models, which inituitively possess complementary advantages. In this paper, we propose a novel sentence ordering framework which introduces two classifiers to make better use of pairwise orderings for graph-based sentence ordering (Yin et al. 2019, 2021). Specially, given an initial sentence-entity graph, we first introduce a graph-based classifier to predict pairwise orderings between linked sentences. Then, in an iterative manner, based on the graph updated by previously predicted high-confident pairwise orderings, another classifier is used to predict the remaining uncertain pairwise orderings. At last, we adapt a GRN-based sentence ordering model (Yin et al. 2019, 2021) on the basis of final graph. Experiments on five commonly-used datasets demonstrate the effectiveness and generality of our model. Particularly, when equipped with BERT (Devlin et al. 2019) and FHDecoder (Yin et al. 2020), our model achieves state-of-the-art performance. Our code is available at <url>https://github.com/DeepLearnXMU/IRSEG</url>.</abstract>
@@ -2547,7 +2547,7 @@
       <author><first>Yaxin</first><last>Fan</last></author>
       <author><first>Xiaomin</first><last>Chu</last></author>
       <author><first>Peifeng</first><last>Li</last></author>
-      <author><first>Qiaoming</first><last>Zhu</last></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last></author>
       <pages>2418–2431</pages>
       <abstract>Implicit discourse relation recognition (IDRR) is a critical task in discourse analysis. Previous studies only regard it as a classification task and lack an in-depth understanding of the semantics of different relations. Therefore, we first view IDRR as a generation task and further propose a method joint modeling of the classification and generation. Specifically, we propose a joint model, CG-T5, to recognize the relation label and generate the target sentence containing the meaning of relations simultaneously. Furthermore, we design three target sentence forms, including the question form, for the generation model to incorporate prior knowledge. To address the issue that large discourse units are hardly embedded into the target sentence, we also propose a target sentence construction mechanism that automatically extracts core sentences from those large discourse units. Experimental results both on Chinese MCDTB and English PDTB datasets show that our model CG-T5 achieves the best performance against several state-of-the-art systems.</abstract>
       <url hash="40e9958a">2021.emnlp-main.187</url>
@@ -2557,9 +2557,9 @@
     </paper>
     <paper id="188">
       <title>A Language Model-based Generative Classifier for Sentence-level Discourse Parsing</title>
-      <author><first>Ying</first><last>Zhang</last></author>
+      <author id="ying-zhang"><first>Ying</first><last>Zhang</last></author>
       <author><first>Hidetaka</first><last>Kamigaito</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>2432–2446</pages>
       <abstract>Discourse segmentation and sentence-level discourse parsing play important roles for various NLP tasks to consider textual coherence. Despite recent achievements in both tasks, there is still room for improvement due to the scarcity of labeled data. To solve the problem, we propose a language model-based generative classifier (LMGC) for using more information from labels by treating the labels as an input while enhancing label representations by embedding descriptions for each label. Moreover, since this enables LMGC to make ready the representations for labels, unseen in the pre-training step, we can effectively use a pre-trained language model in LMGC. Experimental results on the RST-DT dataset show that our LMGC achieved the state-of-the-art F1 score of 96.72 in discourse segmentation. It further achieved the state-of-the-art relation F1 scores of 84.69 with gold EDU boundaries and 81.18 with automatically segmented boundaries, respectively, in sentence-level discourse parsing.</abstract>
       <url hash="23efabaf">2021.emnlp-main.188</url>
@@ -2616,7 +2616,7 @@
     </paper>
     <paper id="192">
       <title><fixed-case>FL</fixed-case>i<fixed-case>T</fixed-case>ext: A Faster and Lighter Semi-Supervised Text Classification with Convolution Networks</title>
-      <author id="chen-liu"><first>Chen</first><last>Liu</last></author>
+      <author><first>Chen</first><last>Liu</last></author>
       <author><first>Zhang</first><last>Mengchao</last></author>
       <author><first>Fu</first><last>Zhibing</last></author>
       <author><first>Panpan</first><last>Hou</last></author>
@@ -2630,10 +2630,10 @@
     </paper>
     <paper id="193">
       <title>Evaluating Debiasing Techniques for Intersectional Biases</title>
-      <author><first>Shivashankar</first><last>Subramanian</last></author>
+      <author id="shivashankar-subramanian"><first>Shivashankar</first><last>Subramanian</last></author>
       <author><first>Xudong</first><last>Han</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <author><first>Lea</first><last>Frermann</last></author>
       <pages>2492–2498</pages>
       <abstract>Bias is pervasive for NLP models, motivating the development of automatic debiasing techniques. Evaluation of NLP debiasing methods has largely been limited to binary attributes in isolation, e.g., debiasing with respect to binary gender or race, however many corpora involve multiple such attributes, possibly with higher cardinality. In this paper we argue that a truly fair model must consider ‘gerrymandering’ groups which comprise not only single attributes, but also intersectional groups. We evaluate a form of bias-constrained model which is new to NLP, as well an extension of the iterative nullspace projection technique which can handle multiple identities.</abstract>
@@ -2661,7 +2661,7 @@
       <author><first>Yanyan</first><last>Lan</last></author>
       <author><first>Yan</first><last>Wang</last></author>
       <author><first>Huawei</first><last>Shen</last></author>
-      <author><first>Xueqi</first><last>Cheng</last></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last></author>
       <pages>2510–2521</pages>
       <abstract>Unsupervised style transfer models are mainly based on an inductive learning approach, which represents the style as embeddings, decoder parameters, or discriminator parameters and directly applies these general rules to the test cases. However, the lacking of parallel corpus hinders the ability of these inductive learning methods on this task. As a result, it is likely to cause severe inconsistent style expressions, like ‘the salad is rude’. To tackle this problem, we propose a novel transductive learning approach in this paper, based on a retrieval-based context-aware style representation. Specifically, an attentional encoder-decoder with a retriever framework is utilized. It involves top-K relevant sentences in the target style in the transfer process. In this way, we can learn a context-aware style embedding to alleviate the above inconsistency problem. In this paper, both sparse (BM25) and dense retrieval functions (MIPS) are used, and two objective functions are designed to facilitate joint learning. Experimental results show that our method outperforms several strong baselines. The proposed transductive learning approach is general and effective to the task of unsupervised style transfer, and we will apply it to the other two typical methods in the future.</abstract>
       <url hash="409db797">2021.emnlp-main.195</url>
@@ -2688,7 +2688,7 @@
       <title>Coupling Context Modeling with Zero Pronoun Recovering for Document-Level Natural Language Generation</title>
       <author><first>Xin</first><last>Tan</last></author>
       <author><first>Longyin</first><last>Zhang</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>2530–2540</pages>
       <abstract>Natural language generation (NLG) tasks on pro-drop languages are known to suffer from zero pronoun (ZP) problems, and the problems remain challenging due to the scarcity of ZP-annotated NLG corpora. In this case, we propose a highly adaptive two-stage approach to couple context modeling with ZP recovering to mitigate the ZP problem in NLG tasks. Notably, we frame the recovery process in a task-supervised fashion where the ZP representation recovering capability is learned during the NLG task learning process, thus our method does not require NLG corpora annotated with ZPs. For system enhancement, we learn an adversarial bot to adjust our model outputs to alleviate the error propagation caused by mis-recovered ZPs. Experiments on three document-level NLG tasks, i.e., machine translation, question answering, and summarization, show that our approach can improve the performance to a great extent, and the improvement on pronoun translation is very impressive.</abstract>
       <url hash="fc13cfe4">2021.emnlp-main.197</url>
@@ -2769,11 +2769,11 @@
       <title>Syntactically-Informed Unsupervised Paraphrasing with Non-Parallel Data</title>
       <author><first>Erguang</first><last>Yang</last></author>
       <author><first>Mingtong</first><last>Liu</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Yujie</first><last>Zhang</last></author>
       <author><first>Yao</first><last>Meng</last></author>
       <author><first>Changjian</first><last>Hu</last></author>
-      <author><first>Jinan</first><last>Xu</last></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last></author>
       <author><first>Yufeng</first><last>Chen</last></author>
       <pages>2594–2604</pages>
       <abstract>Previous works on syntactically controlled paraphrase generation heavily rely on large-scale parallel paraphrase data that is not easily available for many languages and domains. In this paper, we take this research direction to the extreme and investigate whether it is possible to learn syntactically controlled paraphrase generation with nonparallel data. We propose a syntactically-informed unsupervised paraphrasing model based on conditional variational auto-encoder (VAE) which can generate texts in a specified syntactic structure. Particularly, we design a two-stage learning method to effectively train the model using non-parallel data. The conditional VAE is trained to reconstruct the input sentence according to the given input and its syntactic structure. Furthermore, to improve the syntactic controllability and semantic consistency of the pre-trained conditional VAE, we fine-tune it using syntax controlling and cycle reconstruction learning objectives, and employ Gumbel-Softmax to combine these new learning objectives. Experiment results demonstrate that the proposed model trained only on non-parallel data is capable of generating diverse paraphrases with specified syntactic structure. Additionally, we validate the effectiveness of our method for generating syntactically adversarial examples on the sentiment analysis task.</abstract>
@@ -2929,7 +2929,7 @@
       <title>Machine Reading Comprehension as Data Augmentation: A Case Study on Implicit Event Argument Extraction</title>
       <author><first>Jian</first><last>Liu</last></author>
       <author><first>Yufeng</first><last>Chen</last></author>
-      <author><first>Jinan</first><last>Xu</last></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last></author>
       <pages>2716–2725</pages>
       <abstract>Implicit event argument extraction (EAE) is a crucial document-level information extraction task that aims to identify event arguments beyond the sentence level. Despite many efforts for this task, the lack of enough training data has long impeded the study. In this paper, we take a new perspective to address the data sparsity issue faced by implicit EAE, by bridging the task with machine reading comprehension (MRC). Particularly, we devise two data augmentation regimes via MRC, including: 1) implicit knowledge transfer, which enables knowledge transfer from other tasks, by building a unified training framework in the MRC formulation, and 2) explicit data augmentation, which can explicitly generate new training examples, by treating MRC models as an annotator. The extensive experiments have justified the effectiveness of our approach — it not only obtains state-of-the-art performance on two benchmarks, but also demonstrates superior results in a data-low scenario.</abstract>
       <url hash="7a22ed6d">2021.emnlp-main.214</url>
@@ -2957,9 +2957,9 @@
       <author><first>Chenwei</first><last>Zhang</last></author>
       <author><first>Yawen</first><last>Yang</last></author>
       <author><first>Xiaohe</first><last>Li</last></author>
-      <author id="li-lin"><first>Li</first><last>Lin</last></author>
+      <author><first>Li</first><last>Lin</last></author>
       <author><first>Lijie</first><last>Wen</last></author>
-      <author><first>Philip S.</first><last>Yu</last></author>
+      <author id="philip-s-yu"><first>Philip S.</first><last>Yu</last></author>
       <pages>2737–2746</pages>
       <abstract>Low-resource Relation Extraction (LRE) aims to extract relation facts from limited labeled corpora when human annotation is scarce. Existing works either utilize self-training scheme to generate pseudo labels that will cause the gradual drift problem, or leverage meta-learning scheme which does not solicit feedback explicitly. To alleviate selection bias due to the lack of feedback loops in existing LRE learning paradigms, we developed a Gradient Imitation Reinforcement Learning method to encourage pseudo label data to imitate the gradient descent direction on labeled data and bootstrap its optimization capability through trial and error. We also propose a framework called GradLRE, which handles two major scenarios in low-resource relation extraction. Besides the scenario where unlabeled data is sufficient, GradLRE handles the situation where no unlabeled data is available, by exploiting a contextualized augmentation method to generate data. Experimental results on two public datasets demonstrate the effectiveness of GradLRE on low resource relation extraction when comparing with baselines.</abstract>
       <url hash="8a08b757">2021.emnlp-main.216</url>
@@ -3013,7 +3013,7 @@
       <author><first>Guolin</first><last>Ke</last></author>
       <author><first>Waleed</first><last>Malik</last></author>
       <author><first>Zhicheng</first><last>Dou</last></author>
-      <author><first>Paul</first><last>Bennett</last></author>
+      <author id="paul-bennett"><first>Paul</first><last>Bennett</last></author>
       <author><first>Tie-Yan</first><last>Liu</last></author>
       <author><first>Arnold</first><last>Overwijk</last></author>
       <pages>2780–2791</pages>
@@ -3071,7 +3071,7 @@
       <author><first>Ruiyang</first><last>Ren</last></author>
       <author><first>Yingqi</first><last>Qu</last></author>
       <author><first>Jing</first><last>Liu</last></author>
-      <author><first>Wayne Xin</first><last>Zhao</last></author>
+      <author id="wayne-xin-zhao"><first>Wayne Xin</first><last>Zhao</last></author>
       <author><first>QiaoQiao</first><last>She</last></author>
       <author><first>Hua</first><last>Wu</last></author>
       <author><first>Haifeng</first><last>Wang</last></author>
@@ -3096,10 +3096,10 @@
     </paper>
     <paper id="226">
       <title>From Alignment to Assignment: Frustratingly Simple Unsupervised Entity Alignment</title>
-      <author><first>Xin</first><last>Mao</last></author>
+      <author id="xinnian-mao"><first>Xin</first><last>Mao</last></author>
       <author><first>Wenting</first><last>Wang</last></author>
       <author><first>Yuanbin</first><last>Wu</last></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <pages>2843–2853</pages>
       <abstract>Cross-lingual entity alignment (EA) aims to find the equivalent entities between crosslingual KGs (Knowledge Graphs), which is a crucial step for integrating KGs. Recently, many GNN-based EA methods are proposed and show decent performance improvements on several public datasets. However, existing GNN-based EA methods inevitably inherit poor interpretability and low efficiency from neural networks. Motivated by the isomorphic assumption of GNN-based methods, we successfully transform the cross-lingual EA problem into an assignment problem. Based on this re-definition, we propose a frustratingly Simple but Effective Unsupervised entity alignment method (SEU) without neural networks. Extensive experiments have been conducted to show that our proposed unsupervised approach even beats advanced supervised methods across all public datasets while having high efficiency, interpretability, and stability.</abstract>
       <url hash="ff462661">2021.emnlp-main.226</url>
@@ -3195,8 +3195,8 @@
       <author><first>Xuelin</first><last>Situ</last></author>
       <author><first>Sameen</first><last>Maruf</last></author>
       <author><first>Ingrid</first><last>Zukerman</last></author>
-      <author><first>Cecile</first><last>Paris</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="cecile-paris"><first>Cecile</first><last>Paris</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>2933–2940</pages>
       <abstract>Lifelong Learning (LL) black-box models are dynamic in that they keep learning from new tasks and constantly update their parameters. Owing to the need to utilize information from previously seen tasks, and capture commonalities in potentially diverse data, it is hard for automatic explanation methods to explain the outcomes of these models. In addition, existing explanation methods, e.g., LIME, which are computationally expensive when explaining a static black-box model, are even more inefficient in the LL setting. In this paper, we propose a novel Lifelong Explanation (LLE) approach that continuously trains a student explainer under the supervision of a teacher – an arbitrary explanation algorithm – on different tasks undertaken in LL. We also leverage the Experience Replay (ER) mechanism to prevent catastrophic forgetting in the student explainer. Our experiments comparing LLE to three baselines on text classification tasks show that LLE can enhance the stability of the explanations for all seen tasks and maintain the same level of faithfulness to the black-box model as the teacher, while being up to 10ˆ2 times faster at test time. Our ablation study shows that the ER mechanism in our LLE approach enhances the learning capabilities of the student explainer. Our code is available at <url>https://github.com/situsnow/LLE</url>.</abstract>
       <url hash="db337028">2021.emnlp-main.233</url>
@@ -3206,10 +3206,10 @@
     </paper>
     <paper id="234">
       <title>Linguistic Dependencies and Statistical Dependence</title>
-      <author><first>Jacob Louis</first><last>Hoover</last></author>
+      <author id="jacob-hoover-vigly"><first>Jacob Louis</first><last>Hoover</last></author>
       <author><first>Wenyu</first><last>Du</last></author>
       <author><first>Alessandro</first><last>Sordoni</last></author>
-      <author><first>Timothy J.</first><last>O’Donnell</last></author>
+      <author id="timothy-odonnell"><first>Timothy J.</first><last>O’Donnell</last></author>
       <pages>2941–2963</pages>
       <abstract>Are pairs of words that tend to occur together also likely to stand in a linguistic dependency? This empirical question is motivated by a long history of literature in cognitive science, psycholinguistics, and NLP. In this work we contribute an extensive analysis of the relationship between linguistic dependencies and statistical dependence between words. Improving on previous work, we introduce the use of large pretrained language models to compute contextualized estimates of the pointwise mutual information between words (CPMI). For multiple models and languages, we extract dependency trees which maximize CPMI, and compare to gold standard linguistic dependencies. Overall, we find that CPMI dependencies achieve an unlabelled undirected attachment score of at most <tex-math>\approx 0.5</tex-math>. While far above chance, and consistently above a non-contextualized PMI baseline, this score is generally comparable to a simple baseline formed by connecting adjacent words. We analyze which kinds of linguistic dependencies are best captured in CPMI dependencies, and also find marked differences between the estimates of the large pretrained language models, illustrating how their different training schemes affect the type of dependencies they capture.</abstract>
       <url hash="48121ef9">2021.emnlp-main.234</url>
@@ -3458,7 +3458,7 @@
       <author><first>Shizhan</first><last>Chen</last></author>
       <author><first>Xiaowang</first><last>Zhang</last></author>
       <author><first>Zhiyong</first><last>Feng</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Shaojuan</first><last>Wu</last></author>
       <author><first>Chunliu</first><last>Dou</last></author>
       <pages>3148–3161</pages>
@@ -3573,7 +3573,7 @@
     <paper id="260">
       <title>Enlivening Redundant Heads in Multi-head Self-attention for Machine Translation</title>
       <author><first>Tianfu</first><last>Zhang</last></author>
-      <author><first>Heyan</first><last>Huang</last></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last></author>
       <author><first>Chong</first><last>Feng</last></author>
       <author><first>Longbing</first><last>Cao</last></author>
       <pages>3238–3248</pages>
@@ -3587,7 +3587,7 @@
       <title>Unsupervised Neural Machine Translation with Universal Grammar</title>
       <author><first>Zuchao</first><last>Li</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Hai</first><last>Zhao</last></author>
       <pages>3249–3264</pages>
       <abstract>Machine translation usually relies on parallel corpora to provide parallel signals for training. The advent of unsupervised machine translation has brought machine translation away from this reliance, though performance still lags behind traditional supervised machine translation. In unsupervised machine translation, the model seeks symmetric language similarities as a source of weak parallel signal to achieve translation. Chomsky’s Universal Grammar theory postulates that grammar is an innate form of knowledge to humans and is governed by universal principles and constraints. Therefore, in this paper, we seek to leverage such shared grammar clues to provide more explicit language parallel signals to enhance the training of unsupervised machine translation models. Through experiments on multiple typical language pairs, we demonstrate the effectiveness of our proposed approaches.</abstract>
@@ -3599,8 +3599,8 @@
     <paper id="262">
       <title>Encouraging Lexical Translation Consistency for Document-Level Neural Machine Translation</title>
       <author><first>Xinglin</first><last>Lyu</last></author>
-      <author><first>Junhui</first><last>Li</last></author>
-      <author><first>Zhengxian</first><last>Gong</last></author>
+      <author id="junhui-li"><first>Junhui</first><last>Li</last></author>
+      <author id="zhengxian-gong"><first>Zhengxian</first><last>Gong</last></author>
       <author><first>Min</first><last>Zhang</last></author>
       <pages>3265–3277</pages>
       <abstract>Recently a number of approaches have been proposed to improve translation performance for document-level neural machine translation (NMT). However, few are focusing on the subject of lexical translation consistency. In this paper we apply “one translation per discourse” in NMT, and aim to encourage lexical translation consistency for document-level NMT. This is done by first obtaining a word link for each source word in a document, which tells the positions where the source word appears. Then we encourage the translation of those words within a link to be consistent in two ways. On the one hand, when encoding sentences within a document we properly share context information of those words. On the other hand, we propose an auxiliary loss function to better constrain that their translation should be consistent. Experimental results on Chinese↔English and English→French translation tasks show that our approach not only achieves state-of-the-art performance in BLEU scores, but also greatly improves lexical consistency in translation.</abstract>
@@ -3625,7 +3625,7 @@
       <author><first>Yijin</first><last>Liu</last></author>
       <author><first>Fandong</first><last>Meng</last></author>
       <author><first>Yufeng</first><last>Chen</last></author>
-      <author><first>Jinan</first><last>Xu</last></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last></author>
       <author><first>Jie</first><last>Zhou</last></author>
       <pages>3285–3296</pages>
       <abstract>Scheduled sampling is widely used to mitigate the exposure bias problem for neural machine translation. Its core motivation is to simulate the inference scene during training by replacing ground-truth tokens with predicted tokens, thus bridging the gap between training and inference. However, vanilla scheduled sampling is merely based on training steps and equally treats all decoding steps. Namely, it simulates an inference scene with uniform error rates, which disobeys the real inference scene, where larger decoding steps usually have higher error rates due to error accumulations. To alleviate the above discrepancy, we propose scheduled sampling methods based on decoding steps, increasing the selection chance of predicted tokens with the growth of decoding steps. Consequently, we can more realistically simulate the inference scene during training, thus better bridging the gap between training and inference. Moreover, we investigate scheduled sampling based on both training steps and decoding steps for further improvements. Experimentally, our approaches significantly outperform the Transformer baseline and vanilla scheduled sampling on three large-scale WMT tasks. Additionally, our approaches also generalize well to the text summarization task on two popular benchmarks.</abstract>
@@ -3678,10 +3678,10 @@
     </paper>
     <paper id="268">
       <title>Generalised Unsupervised Domain Adaptation of Neural Machine Translation with Cross-Lingual Data Selection</title>
-      <author><first>Thuy-Trang</first><last>Vu</last></author>
+      <author id="thuy-vu"><first>Thuy-Trang</first><last>Vu</last></author>
       <author><first>Xuanli</first><last>He</last></author>
       <author><first>Dinh</first><last>Phung</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>3335–3346</pages>
       <abstract>This paper considers the unsupervised domain adaptation problem for neural machine translation (NMT), where we assume the access to only monolingual text in either the source or target language in the new domain. We propose a cross-lingual data selection method to extract in-domain sentences in the missing language side from a large generic monolingual corpus. Our proposed method trains an adaptive layer on top of multilingual BERT by contrastive learning to align the representation between the source and target language. This then enables the transferability of the domain classifier between the languages in a zero-shot manner. Once the in-domain data is detected by the classifier, the NMT model is then adapted to the new domain by jointly learning translation and domain discrimination tasks. We evaluate our cross-lingual data selection method on NMT across five diverse domains in three language pairs, as well as a real-world scenario of translation for COVID-19. The results show that our proposed method outperforms other selection baselines up to +1.5 BLEU score.</abstract>
       <url hash="c461d256">2021.emnlp-main.268</url>
@@ -3839,7 +3839,7 @@
       <author><first>Jin Yea</first><last>Jang</last></author>
       <author><first>San</first><last>Kim</last></author>
       <author><first>Minyoung</first><last>Jung</last></author>
-      <author><first>Saim</first><last>Shin</last></author>
+      <author id="saim-shin"><first>Saim</first><last>Shin</last></author>
       <author><first>Gahgene</first><last>Gweon</last></author>
       <pages>3447–3452</pages>
       <abstract>Backchannel (BC), a short reaction signal of a listener to a speaker’s utterances, helps to improve the quality of the conversation. Several studies have been conducted to predict BC in conversation; however, the utilization of advanced natural language processing techniques using lexical information presented in the utterances of a speaker has been less considered. To address this limitation, we present a BC prediction model called BPM_MT (Backchannel prediction model with multitask learning), which utilizes KoBERT, a pre-trained language model. The BPM_MT simultaneously carries out two tasks at learning: 1) BC category prediction using acoustic and lexical features, and 2) sentiment score prediction based on sentiment cues. BPM_MT exhibited 14.24% performance improvement compared to the existing baseline in the four BC categories: continuer, understanding, empathic response, and No BC. In particular, for empathic response category, a performance improvement of 17.14% was achieved.</abstract>
@@ -3868,7 +3868,7 @@
       <author><first>Junmo</first><last>Kang</last></author>
       <author><first>Jeonghwan</first><last>Kim</last></author>
       <author><first>Suwon</first><last>Shin</last></author>
-      <author><first>Sung-Hyon</first><last>Myaeng</last></author>
+      <author id="sung-hyon-myaeng"><first>Sung-Hyon</first><last>Myaeng</last></author>
       <pages>3464–3476</pages>
       <abstract>Tag recommendation relies on either a ranking function for top-k tags or an autoregressive generation method. However, the previous methods neglect one of two seemingly conflicting yet desirable characteristics of a tag set: orderlessness and inter-dependency. While the ranking approach fails to address the inter-dependency among tags when they are ranked, the autoregressive approach fails to take orderlessness into account because it is designed to utilize sequential relations among tokens. We propose a sequence-oblivious generation method for tag recommendation, in which the next tag to be generated is independent of the order of the generated tags and the order of the ground truth tags occurring in training data. Empirical results on two different domains, Instagram and Stack Overflow, show that our method is significantly superior to the previous approaches.</abstract>
       <url hash="28ae4fd0">2021.emnlp-main.279</url>
@@ -3880,7 +3880,7 @@
       <title>End-to-End Conversational Search for Online Shopping with Utterance Transfer</title>
       <author><first>Liqiang</first><last>Xiao</last></author>
       <author><first>Jun</first><last>Ma</last></author>
-      <author><first>Xin Luna</first><last>Dong</last></author>
+      <author id="xin-luna-dong"><first>Xin Luna</first><last>Dong</last></author>
       <author><first>Pascual</first><last>Martínez-Gómez</last></author>
       <author><first>Nasser</first><last>Zalmout</last></author>
       <author><first>Chenwei</first><last>Zhang</last></author>
@@ -3898,7 +3898,7 @@
       <title>Self-Supervised Curriculum Learning for Spelling Error Correction</title>
       <author><first>Zifa</first><last>Gan</last></author>
       <author><first>Hongfei</first><last>Xu</last></author>
-      <author><first>Hongying</first><last>Zan</last></author>
+      <author id="hongying-zan"><first>Hongying</first><last>Zan</last></author>
       <pages>3487–3494</pages>
       <abstract>Spelling Error Correction (SEC) that requires high-level language understanding is a challenging but useful task. Current SEC approaches normally leverage a pre-training then fine-tuning procedure that treats data equally. By contrast, Curriculum Learning (CL) utilizes training data differently during training and has shown its effectiveness in improving both performance and training efficiency in many other NLP tasks. In NMT, a model’s performance has been shown sensitive to the difficulty of training examples, and CL has been shown effective to address this. In SEC, the data from different language learners are naturally distributed at different difficulty levels (some errors made by beginners are obvious to correct while some made by fluent speakers are hard), and we expect that designing a curriculum correspondingly for model learning may also help its training and bring about better performance. In this paper, we study how to further improve the performance of the state-of-the-art SEC method with CL, and propose a Self-Supervised Curriculum Learning (SSCL) approach. Specifically, we directly use the cross-entropy loss as criteria for: 1) scoring the difficulty of training data, and 2) evaluating the competence of the model. In our approach, CL improves the model training, which in return improves the CL measurement. In our experiments on the SIGHAN 2015 Chinese spelling check task, we show that SSCL is superior to previous norm-based and uncertainty-aware approaches, and establish a new state of the art (74.38% F1).</abstract>
       <url hash="81d36531">2021.emnlp-main.281</url>
@@ -4061,7 +4061,7 @@
       <author><first>Peng</first><last>Qi</last></author>
       <author><first>Haejun</first><last>Lee</last></author>
       <author><first>Tg</first><last>Sido</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <pages>3599–3614</pages>
       <abstract>We develop a unified system to answer directly from text open-domain questions that may require a varying number of retrieval steps. We employ a single multi-task transformer model to perform all the necessary subtasks—retrieving supporting facts, reranking them, and predicting the answer from all retrieved documents—in an iterative fashion. We avoid crucial assumptions of previous work that do not transfer well to real-world settings, including exploiting knowledge of the fixed number of retrieval steps required to answer each question or using structured metadata like knowledge bases or web links that have limited availability. Instead, we design a system that can answer open-domain questions on any text collection without prior knowledge of reasoning complexity. To emulate this setting, we construct a new benchmark, called BeerQA, by combining existing one- and two-step datasets with a new collection of 530 questions that require three Wikipedia pages to answer, unifying Wikipedia corpora versions in the process. We show that our model demonstrates competitive performance on both existing benchmarks and this new benchmark. We make the new benchmark available at <url>https://beerqa.github.io/</url>.</abstract>
       <url hash="e7506d3d">2021.emnlp-main.292</url>
@@ -4075,7 +4075,7 @@
       <author><first>Liang</first><last>Pang</last></author>
       <author><first>Yanyan</first><last>Lan</last></author>
       <author><first>Huawei</first><last>Shen</last></author>
-      <author><first>Xueqi</first><last>Cheng</last></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last></author>
       <pages>3615–3626</pages>
       <abstract>Information seeking is an essential step for open-domain question answering to efficiently gather evidence from a large corpus. Recently, iterative approaches have been proven to be effective for complex questions, by recursively retrieving new evidence at each step. However, almost all existing iterative approaches use predefined strategies, either applying the same retrieval function multiple times or fixing the order of different retrieval functions, which cannot fulfill the diverse requirements of various questions. In this paper, we propose a novel adaptive information-seeking strategy for open-domain question answering, namely AISO. Specifically, the whole retrieval and answer process is modeled as a partially observed Markov decision process, where three types of retrieval operations (e.g., BM25, DPR, and hyperlink) and one answer operation are defined as actions. According to the learned policy, AISO could adaptively select a proper retrieval action to seek the missing evidence at each step, based on the collected evidence and the reformulated query, or directly output the answer when the evidence set is sufficient for the question. Experiments on SQuAD Open and HotpotQA fullwiki, which serve as single-hop and multi-hop open-domain QA benchmarks, show that AISO outperforms all baseline methods with predefined strategies in terms of both retrieval and answer evaluations.</abstract>
       <url hash="9f3da3d8">2021.emnlp-main.293</url>
@@ -4086,13 +4086,13 @@
     </paper>
     <paper id="294">
       <title>Mapping probability word problems to executable representations</title>
-      <author><first>Simon</first><last>Suster</last></author>
+      <author id="simon-suster"><first>Simon</first><last>Suster</last></author>
       <author><first>Pieter</first><last>Fivez</last></author>
       <author><first>Pietro</first><last>Totis</last></author>
       <author><first>Angelika</first><last>Kimmig</last></author>
       <author><first>Jesse</first><last>Davis</last></author>
       <author><first>Luc</first><last>de Raedt</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>3627–3640</pages>
       <abstract>While solving math word problems automatically has received considerable attention in the NLP community, few works have addressed probability word problems specifically. In this paper, we employ and analyse various neural models for answering such word problems. In a two-step approach, the problem text is first mapped to a formal representation in a declarative language using a sequence-to-sequence model, and then the resulting representation is executed using a probabilistic programming system to provide the answer. Our best performing model incorporates general-domain contextualised word representations that were finetuned using transfer learning on another in-domain dataset. We also apply end-to-end models to this task, which bring out the importance of the two-step approach in obtaining correct solutions to probability problems.</abstract>
       <url hash="a8caa0e1">2021.emnlp-main.294</url>
@@ -4166,7 +4166,7 @@
       <author><first>Siru</first><last>Ouyang</last></author>
       <author><first>Hai</first><last>Zhao</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>3685–3696</pages>
       <abstract>Conversational machine reading (CMR) requires machines to communicate with humans through multi-turn interactions between two salient dialogue states of decision making and question generation processes. In open CMR settings, as the more realistic scenario, the retrieved background knowledge would be noisy, which results in severe challenges in the information transmission. Existing studies commonly train independent or pipeline systems for the two subtasks. However, those methods are trivial by using hard-label decisions to activate question generation, which eventually hinders the model performance. In this work, we propose an effective gating strategy by smoothing the two dialogue states in only one decoder and bridge decision making and question generation to provide a richer dialogue state reference. Experiments on the OR-ShARC dataset show the effectiveness of our method, which achieves new state-of-the-art results.</abstract>
       <url hash="28e9ba53">2021.emnlp-main.299</url>
@@ -4176,7 +4176,7 @@
     </paper>
     <paper id="300">
       <title><fixed-case>F</fixed-case>in<fixed-case>QA</fixed-case>: A Dataset of Numerical Reasoning over Financial Data</title>
-      <author id="zhiyu-chen"><first>Zhiyu</first><last>Chen</last></author>
+      <author><first>Zhiyu</first><last>Chen</last></author>
       <author><first>Wenhu</first><last>Chen</last></author>
       <author><first>Charese</first><last>Smiley</last></author>
       <author><first>Sameena</first><last>Shah</last></author>
@@ -4184,8 +4184,8 @@
       <author><first>Dylan</first><last>Langdon</last></author>
       <author><first>Reema</first><last>Moussa</last></author>
       <author><first>Matt</first><last>Beane</last></author>
-      <author><first>Ting-Hao</first><last>Huang</last></author>
-      <author><first>Bryan</first><last>Routledge</last></author>
+      <author id="ting-hao-huang"><first>Ting-Hao</first><last>Huang</last></author>
+      <author id="bryan-r-routledge"><first>Bryan</first><last>Routledge</last></author>
       <author><first>William Yang</first><last>Wang</last></author>
       <pages>3697–3711</pages>
       <abstract>The sheer volume of financial statements makes it difficult for humans to access and analyze a business’s financials. Robust numerical reasoning likewise faces unique challenges in this domain. In this work, we focus on answering deep questions over financial data, aiming to automate the analysis of a large corpus of financial documents. In contrast to existing tasks on general domain, the finance domain includes complex numerical reasoning and understanding of heterogeneous representations. To facilitate analytical progress, we propose a new large-scale dataset, FinQA, with Question-Answering pairs over Financial reports, written by financial experts. We also annotate the gold reasoning programs to ensure full explainability. We further introduce baselines and conduct comprehensive experiments in our dataset. The results demonstrate that popular, large, pre-trained models fall far short of expert humans in acquiring finance knowledge and in complex multi-step numerical reasoning on that knowledge. Our dataset – the first of its kind – should therefore enable significant, new community research into complex application domains. The dataset and code are publicly available at <url>https://github.com/czyssrs/FinQA</url>.</abstract>
@@ -4211,7 +4211,7 @@
     </paper>
     <paper id="302">
       <title><fixed-case>R</fixed-case>ock<fixed-case>NER</fixed-case>: A Simple Method to Create Adversarial Examples for Evaluating the Robustness of Named Entity Recognition Models</title>
-      <author><first>Bill Yuchen</first><last>Lin</last></author>
+      <author id="bill-yuchen-lin"><first>Bill Yuchen</first><last>Lin</last></author>
       <author><first>Wenyang</first><last>Gao</last></author>
       <author><first>Jun</first><last>Yan</last></author>
       <author><first>Ryan</first><last>Moreno</last></author>
@@ -4242,7 +4242,7 @@
       <title>Constructing a Psychometric Testbed for Fair Natural Language Processing</title>
       <author><first>Ahmed</first><last>Abbasi</last></author>
       <author><first>David</first><last>Dobolyi</last></author>
-      <author><first>John P.</first><last>Lalor</last></author>
+      <author id="john-p-lalor"><first>John P.</first><last>Lalor</last></author>
       <author><first>Richard G.</first><last>Netemeyer</last></author>
       <author><first>Kendall</first><last>Smith</last></author>
       <author><first>Yi</first><last>Yang</last></author>
@@ -4271,7 +4271,7 @@
       <title><fixed-case>C</fixed-case>hinese <fixed-case>WPLC</fixed-case>: A <fixed-case>C</fixed-case>hinese Dataset for Evaluating Pretrained Language Models on Word Prediction Given Long-Range Context</title>
       <author><first>Huibin</first><last>Ge</last></author>
       <author><first>Chenxi</first><last>Sun</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Qun</first><last>Liu</last></author>
       <pages>3770–3778</pages>
       <abstract>This paper presents a Chinese dataset for evaluating pretrained language models on Word Prediction given Long-term Context (Chinese WPLC). We propose both automatic and manual selection strategies tailored to Chinese to guarantee that target words in passages collected from over 69K novels can only be predicted with long-term context beyond the scope of sentences containing the target words. Dataset analysis reveals that the types of target words range from common nouns to Chinese 4-character idioms. We also observe that linguistic relations between target words and long-range context exhibit diversity, including lexical match, synonym, summary and reasoning. Experiment results show that the Chinese pretrained language model PanGu-<tex-math>\alpha</tex-math> is 45 points behind human in terms of top-1 word prediction accuracy, indicating that Chinese WPLC is a challenging dataset. The dataset is publicly available at <url>https://git.openi.org.cn/PCL-Platform.Intelligence/Chinese_WPLC</url>.</abstract>
@@ -4324,7 +4324,7 @@
       <title>Total Recall: a Customized Continual Learning Method for Neural Semantic Parsers</title>
       <author><first>Zhuang</first><last>Li</last></author>
       <author><first>Lizhen</first><last>Qu</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>3816–3831</pages>
       <abstract>This paper investigates continual learning for semantic parsing. In this setting, a neural semantic parser learns tasks sequentially without accessing full training data from previous tasks. Direct application of the SOTA continual learning algorithms to this problem fails to achieve comparable performance with re-training models with all seen tasks because they have not considered the special properties of structured outputs yielded by semantic parsers. Therefore, we propose TotalRecall, a continual learning method designed for neural semantic parsers from two aspects: i) a sampling method for memory replay that diversifies logical form templates and balances distributions of parse actions in a memory; ii) a two-stage training method that significantly improves generalization capability of the parsers across tasks. We conduct extensive experiments to study the research problems involved in continual semantic parsing and demonstrate that a neural semantic parser trained with TotalRecall achieves superior performance than the one trained directly with the SOTA continual learning algorithms and achieve a 3-6 times speedup compared to re-training from scratch.</abstract>
       <url hash="6b0a4707">2021.emnlp-main.310</url>
@@ -4390,7 +4390,7 @@
     <paper id="315">
       <title>Virtual Data Augmentation: A Robust and General Framework for Fine-tuning Pre-trained Models</title>
       <author><first>Kun</first><last>Zhou</last></author>
-      <author><first>Wayne Xin</first><last>Zhao</last></author>
+      <author id="wayne-xin-zhao"><first>Wayne Xin</first><last>Zhao</last></author>
       <author><first>Sirui</first><last>Wang</last></author>
       <author><first>Fuzheng</first><last>Zhang</last></author>
       <author><first>Wei</first><last>Wu</last></author>
@@ -4578,7 +4578,7 @@
       <author><first>Saim</first><last>Wani</last></author>
       <author><first>Shivansh</first><last>Patel</last></author>
       <author><first>Unnat</first><last>Jain</last></author>
-      <author><first>Angel</first><last>Chang</last></author>
+      <author id="angel-chang"><first>Angel</first><last>Chang</last></author>
       <pages>4018–4028</pages>
       <abstract>In the Vision-and-Language Navigation (VLN) task an embodied agent navigates a 3D environment, following natural language instructions. A challenge in this task is how to handle ‘off the path’ scenarios where an agent veers from a reference path. Prior work supervises the agent with actions based on the shortest path from the agent’s location to the goal, but such goal-oriented supervision is often not in alignment with the instruction. Furthermore, the evaluation metrics employed by prior work do not measure how much of a language instruction the agent is able to follow. In this work, we propose a simple and effective language-aligned supervision scheme, and a new metric that measures the number of sub-instructions the agent has completed during navigation.</abstract>
       <url hash="342014f2">2021.emnlp-main.328</url>
@@ -4602,7 +4602,7 @@
       <author><first>Jingun</first><last>Kwon</last></author>
       <author><first>Naoki</first><last>Kobayashi</last></author>
       <author><first>Hidetaka</first><last>Kamigaito</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>4039–4044</pages>
       <abstract>Sentence extractive summarization shortens a document by selecting sentences for a summary while preserving its important contents. However, constructing a coherent and informative summary is difficult using a pre-trained BERT-based encoder since it is not explicitly trained for representing the information of sentences in a document. We propose a nested tree-based extractive summarization model on RoBERTa (NeRoBERTa), where nested tree structures consist of syntactic and discourse trees in a given document. Experimental results on the CNN/DailyMail dataset showed that NeRoBERTa outperforms baseline models in ROUGE. Human evaluation results also showed that NeRoBERTa achieves significantly better scores than the baselines in terms of coherence and yields comparable scores to the state-of-the-art models.</abstract>
       <url hash="463fd16e">2021.emnlp-main.330</url>
@@ -4674,7 +4674,7 @@
       <author><first>Yuta</first><last>Hitomi</last></author>
       <author><first>Hideaki</first><last>Tamori</last></author>
       <author><first>Ryohei</first><last>Sasano</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Kentaro</first><last>Inui</last></author>
       <author><first>Koichi</first><last>Takeda</last></author>
       <pages>4085–4090</pages>
@@ -4782,7 +4782,7 @@
       <author><first>Jaydeep</first><last>Sen</last></author>
       <author><first>Mustafa</first><last>Canim</last></author>
       <author><first>Soumen</first><last>Chakrabarti</last></author>
-      <author><first>Alfio</first><last>Gliozzo</last></author>
+      <author id="alfio-gliozzo"><first>Alfio</first><last>Gliozzo</last></author>
       <author><first>Karthik</first><last>Sankaranarayanan</last></author>
       <pages>4159–4172</pages>
       <abstract>Weakly-supervised table question-answering (TableQA) models have achieved state-of-art performance by using pre-trained BERT transformer to jointly encoding a question and a table to produce structured query for the question. However, in practical settings TableQA systems are deployed over table corpora having topic and word distributions quite distinct from BERT’s pretraining corpus. In this work we simulate the practical topic shift scenario by designing novel challenge benchmarks WikiSQL-TS and WikiTable-TS, consisting of train-dev-test splits in five distinct topic groups, based on the popular WikiSQL and WikiTable-Questions datasets. We empirically show that, despite pre-training on large open-domain text, performance of models degrades significantly when they are evaluated on unseen topics. In response, we propose T3QA (Topic Transferable Table Question Answering) a pragmatic adaptation framework for TableQA comprising of: (1) topic-specific vocabulary injection into BERT, (2) a novel text-to-text transformer generator (such as T5, GPT2) based natural language question generation pipeline focused on generating topic-specific training data, and (3) a logical form re-ranker. We show that T3QA provides a reasonably good baseline for our topic shift benchmarks. We believe our topic split benchmarks will lead to robust TableQA solutions that are better suited for practical deployment</abstract>
@@ -4963,7 +4963,7 @@
       <title><fixed-case>D</fixed-case>u<fixed-case>R</fixed-case>ec<fixed-case>D</fixed-case>ial 2.0: A Bilingual Parallel Corpus for Conversational Recommendation</title>
       <author><first>Zeming</first><last>Liu</last></author>
       <author><first>Haifeng</first><last>Wang</last></author>
-      <author><first>Zheng-Yu</first><last>Niu</last></author>
+      <author id="zheng-yu-niu"><first>Zheng-Yu</first><last>Niu</last></author>
       <author><first>Hua</first><last>Wu</last></author>
       <author><first>Wanxiang</first><last>Che</last></author>
       <pages>4335–4347</pages>
@@ -4977,7 +4977,7 @@
       <title>End-to-End Learning of Flowchart Grounded Task-Oriented Dialogs</title>
       <author><first>Dinesh</first><last>Raghu</last></author>
       <author><first>Shantanu</first><last>Agarwal</last></author>
-      <author><first>Sachindra</first><last>Joshi</last></author>
+      <author id="sachindra-joshi"><first>Sachindra</first><last>Joshi</last></author>
       <author><first/><last>Mausam</last></author>
       <pages>4348–4366</pages>
       <abstract>We propose a novel problem within end-to-end learning of task oriented dialogs (TOD), in which the dialog system mimics a troubleshooting agent who helps a user by diagnosing their problem (e.g., car not starting). Such dialogs are grounded in domain-specific flowcharts, which the agent is supposed to follow during the conversation. Our task exposes novel technical challenges for neural TOD, such as grounding an utterance to the flowchart without explicit annotation, referring to additional manual pages when user asks a clarification question, and ability to follow unseen flowcharts at test time. We release a dataset (FLODIAL) consisting of 2,738 dialogs grounded on 12 different troubleshooting flowcharts. We also design a neural model, FLONET, which uses a retrieval-augmented generation architecture to train the dialog agent. Our experiments find that FLONET can do zero-shot transfer to unseen flowcharts, and sets a strong baseline for future research.</abstract>
@@ -4993,7 +4993,7 @@
       <author><first>Seonghyeon</first><last>Ye</last></author>
       <author><first>Jaeyeol</first><last>Jeon</last></author>
       <author><first>Hee Young</first><last>Park</last></author>
-      <author><first>Alice</first><last>Oh</last></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last></author>
       <pages>4367–4380</pages>
       <abstract>We present a model to predict fine-grained emotions along the continuous dimensions of valence, arousal, and dominance (VAD) with a corpus with categorical emotion annotations. Our model is trained by minimizing the EMD (Earth Mover’s Distance) loss between the predicted VAD score distribution and the categorical emotion distributions sorted along VAD, and it can simultaneously classify the emotion categories and predict the VAD scores for a given sentence. We use pre-trained RoBERTa-Large and fine-tune on three different corpora with categorical labels and evaluate on EmoBank corpus with VAD scores. We show that our approach reaches comparable performance to that of the state-of-the-art classifiers in categorical emotion classification and shows significant positive correlations with the ground truth VAD scores. Also, further training with supervision of VAD labels leads to improved performance especially when dataset is small. We also present examples of predictions of appropriate emotion words that are not part of the original annotations.</abstract>
       <url hash="b0fec8db">2021.emnlp-main.358</url>
@@ -5017,10 +5017,10 @@
       <author><first>Xincheng</first><last>Ju</last></author>
       <author><first>Dong</first><last>Zhang</last></author>
       <author><first>Rong</first><last>Xiao</last></author>
-      <author><first>Junhui</first><last>Li</last></author>
+      <author id="junhui-li"><first>Junhui</first><last>Li</last></author>
       <author><first>Shoushan</first><last>Li</last></author>
       <author><first>Min</first><last>Zhang</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>4395–4405</pages>
       <abstract>Aspect terms extraction (ATE) and aspect sentiment classification (ASC) are two fundamental and fine-grained sub-tasks in aspect-level sentiment analysis (ALSA). In the textual analysis, joint extracting both aspect terms and sentiment polarities has been drawn much attention due to the better applications than individual sub-task. However, in the multi-modal scenario, the existing studies are limited to handle each sub-task independently, which fails to model the innate connection between the above two objectives and ignores the better applications. Therefore, in this paper, we are the first to jointly perform multi-modal ATE (MATE) and multi-modal ASC (MASC), and we propose a multi-modal joint learning approach with auxiliary cross-modal relation detection for multi-modal aspect-level sentiment analysis (MALSA). Specifically, we first build an auxiliary text-image relation detection module to control the proper exploitation of visual information. Second, we adopt the hierarchical framework to bridge the multi-modal connection between MATE and MASC, as well as separately visual guiding for each sub module. Finally, we can obtain all aspect-level sentiment polarities dependent on the jointly extracted specific aspects. Extensive experiments show the effectiveness of our approach against the joint textual approaches, pipeline and collapsed multi-modal approaches.</abstract>
       <url hash="84931f70">2021.emnlp-main.360</url>
@@ -5060,7 +5060,7 @@
       <title>The Effect of Round-Trip Translation on Fairness in Sentiment Analysis</title>
       <author><first>Jonathan Gabel</first><last>Christiansen</last></author>
       <author><first>Mathias</first><last>Gammelgaard</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>4423–4428</pages>
       <abstract>Sentiment analysis systems have been shown to exhibit sensitivity to protected attributes. Round-trip translation, on the other hand, has been shown to normalize text. We explore the impact of round-trip translation on the demographic parity of sentiment classifiers and show how round-trip translation consistently improves classification fairness at test time (reducing up to 47% of between-group gaps). We also explore the idea of retraining sentiment classifiers on round-trip-translated data.</abstract>
       <url hash="be1a6c3e">2021.emnlp-main.363</url>
@@ -5072,7 +5072,7 @@
       <title><fixed-case>CH</fixed-case>o<fixed-case>R</fixed-case>a<fixed-case>L</fixed-case>: Collecting Humor Reaction Labels from Millions of Social Media Users</title>
       <author><first>Zixiaofan</first><last>Yang</last></author>
       <author><first>Shayan</first><last>Hooshmand</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
       <pages>4429–4435</pages>
       <abstract>Humor detection has gained attention in recent years due to the desire to understand user-generated content with figurative language. However, substantial individual and cultural differences in humor perception make it very difficult to collect a large-scale humor dataset with reliable humor labels. We propose CHoRaL, a framework to generate perceived humor labels on Facebook posts, using the naturally available user reactions to these posts with no manual annotation needed. CHoRaL provides both binary labels and continuous scores of humor and non-humor. We present the largest dataset to date with labeled humor on 785K posts related to COVID-19. Additionally, we analyze the expression of COVID-related humor in social media by extracting lexico-semantic and affective features from the posts, and build humor detection models with performance similar to humans. CHoRaL enables the development of large-scale humor detection models on any topic and opens a new path to the study of humor on social media.</abstract>
       <url hash="05b67585">2021.emnlp-main.364</url>
@@ -5088,7 +5088,7 @@
       <author><first>Lu</first><last>Xiang</last></author>
       <author><first>Yu</first><last>Zhou</last></author>
       <author><first>Jiajun</first><last>Zhang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>4436–4451</pages>
       <abstract>Dialogue summarization has drawn much attention recently. Especially in the customer service domain, agents could use dialogue summaries to help boost their works by quickly knowing customer’s issues and service progress. These applications require summaries to contain the perspective of a single speaker and have a clear topic flow structure, while neither are available in existing datasets. Therefore, in this paper, we introduce a novel Chinese dataset for Customer Service Dialogue Summarization (CSDS). CSDS improves the abstractive summaries in two aspects: (1) In addition to the overall summary for the whole dialogue, role-oriented summaries are also provided to acquire different speakers’ viewpoints. (2) All the summaries sum up each topic separately, thus containing the topic-level structure of the dialogue. We define tasks in CSDS as generating the overall summary and different role-oriented summaries for a given dialogue. Next, we compare various summarization methods on CSDS, and experiment results show that existing methods are prone to generate redundant and incoherent summaries. Besides, the performance becomes much worse when analyzing the performance on role-oriented summaries and topic structures. We hope that this study could benchmark Chinese dialogue summarization and benefit further studies.</abstract>
       <url hash="b0fd9833">2021.emnlp-main.365</url>
@@ -5177,7 +5177,7 @@
     <paper id="372">
       <title>All Bark and No Bite: Rogue Dimensions in Transformer Language Models Obscure Representational Quality</title>
       <author><first>William</first><last>Timkey</last></author>
-      <author><first>Marten</first><last>van Schijndel</last></author>
+      <author id="marten-van-schijndel"><first>Marten</first><last>van Schijndel</last></author>
       <pages>4527–4546</pages>
       <abstract>Similarity measures are a vital tool for understanding how language models represent and process language. Standard representational similarity measures such as cosine similarity and Euclidean distance have been successfully used in static word embedding models to understand how words cluster in semantic space. Recently, these measures have been applied to embeddings from contextualized models such as BERT and GPT-2. In this work, we call into question the informativity of such measures for contextualized language models. We find that a small number of rogue dimensions, often just 1-3, dominate these measures. Moreover, we find a striking mismatch between the dimensions that dominate similarity measures and those which are important to the behavior of the model. We show that simple postprocessing techniques such as standardization are able to correct for rogue dimensions and reveal underlying representational quality. We argue that accounting for rogue dimensions is essential for any similarity-based analysis of contextual language models.</abstract>
       <url hash="a3c7ad7b">2021.emnlp-main.372</url>
@@ -5218,7 +5218,7 @@
       <author><first>Sheng</first><last>Zhang</last></author>
       <author><first>Xin</first><last>Zhang</last></author>
       <author><first>Weiming</first><last>Zhang</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>4581–4588</pages>
       <abstract>Using data from English cloze tests, in which subjects also self-reported their gender, age, education, and race, we examine performance differences of pretrained language models across demographic groups, defined by these (protected) attributes. We demonstrate wide performance gaps across demographic groups and show that pretrained language models systematically disfavor young non-white male speakers; i.e., not only do pretrained language models learn social biases (stereotypical associations) – pretrained language models also learn sociolectal biases, learning to speak more like some than like others. We show, however, that, with the exception of BERT models, larger pretrained language models reduce some the performance gaps between majority and minority groups.</abstract>
       <url hash="81262ab0">2021.emnlp-main.375</url>
@@ -5241,7 +5241,7 @@
       <title>Are <fixed-case>T</fixed-case>ransformers a Modern Version of <fixed-case>ELIZA</fixed-case>? <fixed-case>O</fixed-case>bservations on <fixed-case>F</fixed-case>rench Object Verb Agreement</title>
       <author><first>Bingzhi</first><last>Li</last></author>
       <author><first>Guillaume</first><last>Wisniewski</last></author>
-      <author><first>Benoit</first><last>Crabbé</last></author>
+      <author id="benoit-crabbe"><first>Benoit</first><last>Crabbé</last></author>
       <pages>4599–4610</pages>
       <abstract>Many recent works have demonstrated that unsupervised sentence representations of neural networks encode syntactic information by observing that neural language models are able to predict the agreement between a verb and its subject. We take a critical look at this line of research by showing that it is possible to achieve high accuracy on this agreement task with simple surface heuristics, indicating a possible flaw in our assessment of neural networks’ syntactic ability. Our fine-grained analyses of results on the long-range French object-verb agreement show that contrary to LSTMs, Transformers are able to capture a non-trivial amount of grammatical structure.</abstract>
       <url hash="5e9da7db">2021.emnlp-main.377</url>
@@ -5450,7 +5450,7 @@
       <author><first>Santiago</first><last>Castro</last></author>
       <author><first>Hanwen</first><last>Miao</last></author>
       <author><first>Weiji</first><last>Li</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>4770–4785</pages>
       <abstract>We aim to automatically identify human action reasons in online videos. We focus on the widespread genre of lifestyle vlogs, in which people perform actions while verbally describing them. We introduce and make publicly available the WhyAct dataset, consisting of 1,077 visual actions manually annotated with their reasons. We describe a multimodal model that leverages visual and textual information to automatically infer the reasons corresponding to an action presented in the video.</abstract>
       <url hash="da0ec43a">2021.emnlp-main.392</url>
@@ -5462,7 +5462,7 @@
       <title>Genre as Weak Supervision for Cross-lingual Dependency Parsing</title>
       <author><first>Max</first><last>Müller-Eberstein</last></author>
       <author><first>Rob</first><last>van der Goot</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>4786–4802</pages>
       <abstract>Recent work has shown that monolingual masked language models learn to represent data-driven notions of language variation which can be used for domain-targeted training data selection. Dataset genre labels are already frequently available, yet remain largely unexplored in cross-lingual setups. We harness this genre metadata as a weak supervision signal for targeted data selection in zero-shot dependency parsing. Specifically, we project treebank-level genre information to the finer-grained sentence level, with the goal to amplify information implicitly stored in unsupervised contextualized representations. We demonstrate that genre is recoverable from multilingual contextual embeddings and that it provides an effective signal for training data selection in cross-lingual, zero-shot scenarios. For 12 low-resource language treebanks, six of which are test-only, our genre-specific methods significantly outperform competitive baselines as well as recent embedding-based methods for data selection. Moreover, genre-based data selection provides new state-of-the-art results for three of these target languages.</abstract>
       <url hash="71deb6c1">2021.emnlp-main.393</url>
@@ -5554,7 +5554,7 @@
     <paper id="400">
       <title>Zero-Shot Dialogue Disentanglement by Self-Supervised Entangled Response Selection</title>
       <author><first>Ta-Chung</first><last>Chi</last></author>
-      <author><first>Alexander</first><last>Rudnicky</last></author>
+      <author id="alexander-rudnicky"><first>Alexander</first><last>Rudnicky</last></author>
       <pages>4897–4902</pages>
       <abstract>Dialogue disentanglement aims to group utterances in a long and multi-participant dialogue into threads. This is useful for discourse analysis and downstream applications such as dialogue response selection, where it can be the first step to construct a clean context/response set. Unfortunately, labeling all <i>reply-to</i> links takes quadratic effort w.r.t the number of utterances: an annotator must check all preceding utterances to identify the one to which the current utterance is a reply. In this paper, we are the first to propose a <b>zero-shot</b> dialogue disentanglement solution. Firstly, we train a model on a multi-participant response selection dataset harvested from the web which is not annotated; we then apply the trained model to perform zero-shot dialogue disentanglement. Without any labeled data, our model can achieve a cluster F1 score of 25. We also fine-tune the model using various amounts of labeled data. Experiments show that with only 10% of the data, we achieve nearly the same performance of using the full dataset.</abstract>
       <url hash="3cd15251">2021.emnlp-main.400</url>
@@ -5608,7 +5608,7 @@
       <title>Dialogue State Tracking with a Language Model using Schema-Driven Prompting</title>
       <author><first>Chia-Hsuan</first><last>Lee</last></author>
       <author><first>Hao</first><last>Cheng</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
       <pages>4937–4949</pages>
       <abstract>Task-oriented conversational systems often use dialogue state tracking to represent the user’s intentions, which involves filling in values of pre-defined slots. Many approaches have been proposed, often using task-specific architectures with special-purpose classifiers. Recently, good results have been obtained using more general architectures based on pretrained language models. Here, we introduce a new variation of the language modeling approach that uses schema-driven prompting to provide task-aware history encoding that is used for both categorical and non-categorical slots. We further improve performance by augmenting the prompting with schema descriptions, a naturally occurring source of in-domain knowledge. Our purely generative system achieves state-of-the-art performance on MultiWOZ 2.2 and achieves competitive performance on two other benchmarks: MultiWOZ 2.1 and M2M. The data and code will be available at <url>https://github.com/chiahsuan156/DST-as-Prompting</url>.</abstract>
       <url hash="8c222adc">2021.emnlp-main.404</url>
@@ -5660,8 +5660,8 @@
       <title>Unsupervised Data Augmentation with Naive Augmentation and without Unlabeled Data</title>
       <author><first>David</first><last>Lowell</last></author>
       <author><first>Brian</first><last>Howard</last></author>
-      <author><first>Zachary C.</first><last>Lipton</last></author>
-      <author><first>Byron</first><last>Wallace</last></author>
+      <author id="zachary-c-lipton"><first>Zachary C.</first><last>Lipton</last></author>
+      <author id="byron-c-wallace"><first>Byron</first><last>Wallace</last></author>
       <pages>4992–5001</pages>
       <abstract>Unsupervised Data Augmentation (UDA) is a semisupervised technique that applies a consistency loss to penalize differences between a model’s predictions on (a) observed (unlabeled) examples; and (b) corresponding ‘noised’ examples produced via data augmentation. While UDA has gained popularity for text classification, open questions linger over which design decisions are necessary and how to extend the method to sequence labeling tasks. In this paper, we re-examine UDA and demonstrate its efficacy on several sequential tasks. Our main contribution is an empirical study of UDA to establish which components of the algorithm confer benefits in NLP. Notably, although prior work has emphasized the use of clever augmentation techniques including back-translation, we find that enforcing consistency between predictions assigned to observed and randomly substituted words often yields comparable (or greater) benefits compared to these more complex perturbation models. Furthermore, we find that applying UDA’s consistency loss affords meaningful gains without any unlabeled data at all, i.e., in a standard supervised setting. In short, UDA need not be unsupervised to realize much of its noted benefits, and does not require complex data augmentation to be effective.</abstract>
       <url hash="ec528452">2021.emnlp-main.408</url>
@@ -5753,7 +5753,7 @@
       <author><first>Tianxing</first><last>He</last></author>
       <author><first>Jingzhao</first><last>Zhang</last></author>
       <author><first>Zhiming</first><last>Zhou</last></author>
-      <author><first>James</first><last>Glass</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
       <pages>5087–5102</pages>
       <abstract>Exposure bias has been regarded as a central problem for auto-regressive language models (LM). It claims that teacher forcing would cause the test-time generation to be incrementally distorted due to the training-generation discrepancy. Although a lot of algorithms have been proposed to avoid teacher forcing and therefore alleviate exposure bias, there is little work showing how serious the exposure bias problem actually is. In this work, we focus on the task of open-ended language generation, propose metrics to quantify the impact of exposure bias in the aspects of quality, diversity, and consistency. Our key intuition is that if we feed ground-truth data prefixes (instead of prefixes generated by the model itself) into the model and ask it to continue the generation, the performance should become much better because the training-generation discrepancy in the prefix is removed. Both automatic and human evaluations are conducted in our experiments. On the contrary to the popular belief in exposure bias, we find that the the distortion induced by the prefix discrepancy is limited, and does not seem to be incremental during the generation. Moreover, our analysis reveals an interesting self-recovery ability of the LM, which we hypothesize to be countering the harmful effects from exposure bias.</abstract>
       <url hash="512311c2">2021.emnlp-main.415</url>
@@ -5807,7 +5807,7 @@
       <title>Journalistic Guidelines Aware News Image Captioning</title>
       <author><first>Xuewen</first><last>Yang</last></author>
       <author><first>Svebor</first><last>Karaman</last></author>
-      <author><first>Joel</first><last>Tetreault</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
       <author><first>Alejandro</first><last>Jaimes</last></author>
       <pages>5162–5175</pages>
       <abstract>The task of news article image captioning aims to generate descriptive and informative captions for news article images. Unlike conventional image captions that simply describe the content of the image in general terms, news image captions follow journalistic guidelines and rely heavily on named entities to describe the image content, often drawing context from the whole article they are associated with. In this work, we propose a new approach to this task, motivated by caption guidelines that journalists follow. Our approach, Journalistic Guidelines Aware News Image Captioning (JoGANIC), leverages the structure of captions to improve the generation quality and guide our representation design. Experimental results, including detailed ablation studies, on two large-scale publicly available datasets show that JoGANIC substantially outperforms state-of-the-art methods both on caption generation and named entity related metrics.</abstract>
@@ -5832,7 +5832,7 @@
       <title>Refocusing on Relevance: Personalization in <fixed-case>NLG</fixed-case></title>
       <author><first>Shiran</first><last>Dudy</last></author>
       <author><first>Steven</first><last>Bedrick</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <pages>5190–5202</pages>
       <abstract>Many NLG tasks such as summarization, dialogue response, or open domain question answering, focus primarily on a source text in order to generate a target response. This standard approach falls short, however, when a user’s intent or context of work is not easily recoverable based solely on that source text– a scenario that we argue is more of the rule than the exception. In this work, we argue that NLG systems in general should place a much higher level of emphasis on making use of additional context, and suggest that relevance (as used in Information Retrieval) be thought of as a crucial tool for designing user-oriented text-generating tasks. We further discuss possible harms and hazards around such personalization, and argue that value-sensitive design represents a crucial path forward through these challenges.</abstract>
       <url hash="6ba7f724">2021.emnlp-main.421</url>
@@ -5849,7 +5849,7 @@
       <author><first>Kyunghyun</first><last>Cho</last></author>
       <author><first>Heng</first><last>Ji</last></author>
       <author><first>Jiawei</first><last>Han</last></author>
-      <author><first>Clare</first><last>Voss</last></author>
+      <author id="clare-voss"><first>Clare</first><last>Voss</last></author>
       <pages>5203–5215</pages>
       <abstract>Event schemas encode knowledge of stereotypical structures of events and their connections. As events unfold, schemas are crucial to act as a scaffolding. Previous work on event schema induction focuses either on atomic events or linear temporal event sequences, ignoring the interplay between events via arguments and argument relations. We introduce a new concept of Temporal Complex Event Schema: a graph-based schema representation that encompasses events, arguments, temporal connections and argument relations. In addition, we propose a Temporal Event Graph Model that predicts event instances following the temporal complex event schema. To build and evaluate such schemas, we release a new schema learning corpus containing 6,399 documents accompanied with event graphs, and we have manually constructed gold-standard schemas. Intrinsic evaluations by schema matching and instance graph perplexity, prove the superior quality of our probabilistic graph schema library compared to linear representations. Extrinsic evaluation on schema-guided future event prediction further demonstrates the predictive power of our event graph model, significantly outperforming human schemas and baselines by more than 17.8% on HITS@1.</abstract>
       <url hash="c55fb007">2021.emnlp-main.422</url>
@@ -5923,7 +5923,7 @@
       <title>Lifelong Event Detection with Knowledge Transfer</title>
       <author><first>Pengfei</first><last>Yu</last></author>
       <author><first>Heng</first><last>Ji</last></author>
-      <author><first>Prem</first><last>Natarajan</last></author>
+      <author id="prem-natarajan"><first>Prem</first><last>Natarajan</last></author>
       <pages>5278–5290</pages>
       <abstract>Traditional supervised Information Extraction (IE) methods can extract structured knowledge elements from unstructured data, but it is limited to a pre-defined target ontology. In reality, the ontology of interest may change over time, adding emergent new types or more fine-grained subtypes. We propose a new lifelong learning framework to address this challenge. We focus on lifelong event detection as an exemplar case and propose a new problem formulation that is also generalizable to other IE tasks. In event detection and more general IE tasks, rich correlations or semantic relatedness exist among hierarchical knowledge element types. In our proposed framework, knowledge is being transferred between learned old event types and new event types. Specifically, we update old knowledge with new event types’ mentions using a self-training loss. In addition, we aggregate old event types’ representations based on their similarities with new event types to initialize the new event types’ representations. Experimental results show that our framework outperforms competitive baselines with a 5.1% absolute gain in the F1 score. Moreover, our proposed framework can boost the F1 score for over 30% absolute gain on some new long-tail rare event types with few training instances. Our knowledge transfer module improves performance on both learned event types and new event types under the lifelong learning setting, showing that it helps consolidate old knowledge and improve novel knowledge acquisition.</abstract>
       <url hash="a6ec3397">2021.emnlp-main.428</url>
@@ -6106,7 +6106,7 @@
       <title><fixed-case>PDALN</fixed-case>: Progressive Domain Adaptation over a Pre-trained Model for Low-Resource Cross-Domain Named Entity Recognition</title>
       <author><first>Tao</first><last>Zhang</last></author>
       <author><first>Congying</first><last>Xia</last></author>
-      <author><first>Philip S.</first><last>Yu</last></author>
+      <author id="philip-s-yu"><first>Philip S.</first><last>Yu</last></author>
       <author><first>Zhiwei</first><last>Liu</last></author>
       <author><first>Shu</first><last>Zhao</last></author>
       <pages>5441–5451</pages>
@@ -6145,7 +6145,7 @@
       <author><first>Filip</first><last>Ilievski</last></author>
       <author><first>Jonathan</first><last>Francis</last></author>
       <author><first>Satoru</first><last>Ozaki</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <author><first>Alessandro</first><last>Oltramari</last></author>
       <pages>5474–5483</pages>
       <abstract>Commonsense reasoning benchmarks have been largely solved by fine-tuning language models. The downside is that fine-tuning may cause models to overfit to task-specific data and thereby forget their knowledge gained during pre-training. Recent works only propose lightweight model updates as models may already possess useful knowledge from past experience, but a challenge remains in understanding what parts and to what extent models should be refined for a given task. In this paper, we investigate what models learn from commonsense reasoning datasets. We measure the impact of three different adaptation methods on the generalization and accuracy of models. Our experiments with two models show that fine-tuning performs best, by learning both the content and the structure of the task, but suffers from overfitting and limited generalization to novel answers. We observe that alternative adaptation methods like prefix-tuning have comparable accuracy, but generalize better to unseen answers and are more robust to adversarial splits.</abstract>
@@ -6215,7 +6215,7 @@
     <paper id="451">
       <title>The Stem Cell Hypothesis: Dilemma behind Multi-Task Learning with Transformer Encoders</title>
       <author><first>Han</first><last>He</last></author>
-      <author><first>Jinho D.</first><last>Choi</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
       <pages>5555–5577</pages>
       <abstract>Multi-task learning with transformer encoders (MTL) has emerged as a powerful technique to improve performance on closely-related tasks for both accuracy and efficiency while a question still remains whether or not it would perform as well on tasks that are distinct in nature. We first present MTL results on five NLP tasks, POS, NER, DEP, CON, and SRL, and depict its deficiency over single-task learning. We then conduct an extensive pruning analysis to show that a certain set of attention heads get claimed by most tasks during MTL, who interfere with one another to fine-tune those heads for their own objectives. Based on this finding, we propose the Stem Cell Hypothesis to reveal the existence of attention heads naturally talented for many tasks that cannot be jointly trained to create adequate embeddings for all of those tasks. Finally, we design novel parameter-free probes to justify our hypothesis and demonstrate how attention heads are transformed across the five tasks during MTL through label analysis.</abstract>
       <url hash="d74d25cc">2021.emnlp-main.451</url>
@@ -6251,7 +6251,7 @@
       <title>Controlled Evaluation of Grammatical Knowledge in <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese Language Models</title>
       <author><first>Yiwen</first><last>Wang</last></author>
       <author><first>Jennifer</first><last>Hu</last></author>
-      <author><first>Roger</first><last>Levy</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
       <author><first>Peng</first><last>Qian</last></author>
       <pages>5604–5620</pages>
       <abstract>Prior work has shown that structural supervision helps English language models learn generalizations about syntactic phenomena such as subject-verb agreement. However, it remains unclear if such an inductive bias would also improve language models’ ability to learn grammatical dependencies in typologically different languages. Here we investigate this question in Mandarin Chinese, which has a logographic, largely syllable-based writing system; different word order; and sparser morphology than English. We train LSTMs, Recurrent Neural Network Grammars, Transformer language models, and Transformer-parameterized generative parsing models on two Mandarin Chinese datasets of different sizes. We evaluate the models’ ability to learn different aspects of Mandarin grammar that assess syntactic and semantic relationships. We find suggestive evidence that structural supervision helps with representing syntactic state across intervening content and improves performance in low-data settings, suggesting that the benefits of hierarchical inductive biases in acquiring dependency relationships may extend beyond English.</abstract>
@@ -6350,9 +6350,9 @@
     </paper>
     <paper id="462">
       <title><fixed-case>ST</fixed-case>ra<fixed-case>TA</fixed-case>: Self-Training with Task Augmentation for Better Few-shot Learning</title>
-      <author><first>Tu</first><last>Vu</last></author>
-      <author><first>Minh-Thang</first><last>Luong</last></author>
-      <author><first>Quoc</first><last>Le</last></author>
+      <author id="tu-vu"><first>Tu</first><last>Vu</last></author>
+      <author id="minh-thang-luong"><first>Minh-Thang</first><last>Luong</last></author>
+      <author id="quoc-le"><first>Quoc</first><last>Le</last></author>
       <author><first>Grady</first><last>Simon</last></author>
       <author><first>Mohit</first><last>Iyyer</last></author>
       <pages>5715–5731</pages>
@@ -6446,7 +6446,7 @@
       <author><first>Anchit</first><last>Gupta</last></author>
       <author><first>Akshat</first><last>Shrivastava</last></author>
       <author><first>Xilun</first><last>Chen</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <author><first>Sonal</first><last>Gupta</last></author>
       <pages>5799–5811</pages>
       <abstract>We propose pre-finetuning, an additional large-scale learning stage between language model pre-training and fine-tuning. Pre-finetuning is massively multi-task learning (around 50 datasets, over 4.8 million total labeled examples), and is designed to encourage learning of representations that generalize better to many different tasks. We show that pre-finetuning consistently improves performance for pretrained discriminators (e.g. RoBERTa) and generation models (e.g. BART) on a wide range of tasks (sentence prediction, commonsense reasoning, MRC, etc.), while also significantly improving sample efficiency during fine-tuning. We also show that large-scale multi-tasking is crucial; pre-finetuning can hurt performance when few tasks are used up until a critical point (usually above 15) after which performance improves linearly in the number of tasks.</abstract>
@@ -6499,7 +6499,7 @@
       <title>Frustratingly Simple but Surprisingly Strong: Using Language-Independent Features for Zero-shot Cross-lingual Semantic Parsing</title>
       <author><first>Jingfeng</first><last>Yang</last></author>
       <author><first>Federico</first><last>Fancellu</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <author><first>Diyi</first><last>Yang</last></author>
       <pages>5848–5856</pages>
       <abstract>The availability of corpora has led to significant advances in training semantic parsers in English. Unfortunately, for languages other than English, annotated data is limited and so is the performance of the developed parsers. Recently, pretrained multilingual models have been proven useful for zero-shot cross-lingual transfer in many NLP tasks. What else does it require to apply a parser trained in English to other languages for zero-shot cross-lingual semantic parsing? Will simple language-independent features help? To this end, we experiment with six Discourse Representation Structure (DRS) semantic parsers in English, and generalize them to Italian, German and Dutch, where there are only a small number of manually annotated parses available. Extensive experiments show that despite its simplicity, adding Universal Dependency (UD) relations and Universal POS tags (UPOS) as model-agnostic features achieves surprisingly strong improvement on all parsers.</abstract>
@@ -6530,7 +6530,7 @@
       <author><first>Vishrav</first><last>Chaudhary</last></author>
       <author><first>James</first><last>Cross</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
-      <author><first>Francisco</first><last>Guzmán</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzmán</last></author>
       <pages>5865–5875</pages>
       <abstract>Sentence-level Quality estimation (QE) of machine translation is traditionally formulated as a regression task, and the performance of QE models is typically measured by Pearson correlation with human labels. Recent QE models have achieved previously-unseen levels of correlation with human judgments, but they rely on large multilingual contextualized language models that are computationally expensive and make them infeasible for real-world applications. In this work, we evaluate several model compression techniques for QE and find that, despite their popularity in other NLP tasks, they lead to poor performance in this regression setting. We observe that a full model parameterization is required to achieve SoTA results in a regression task. However, we argue that the level of expressiveness of a model in a continuous range is unnecessary given the downstream applications of QE, and show that reframing QE as a classification problem and evaluating QE models using classification metrics would better reflect their actual performance in real-world applications.</abstract>
       <url hash="db4e751d">2021.emnlp-main.474</url>
@@ -6544,7 +6544,7 @@
       <author><first>Anoop</first><last>Babu</last></author>
       <author><first>Duygu</first><last>Ataman</last></author>
       <author><first>Sherzod</first><last>Kariev</last></author>
-      <author><first>Francis</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last></author>
       <author><first>Otabek</first><last>Abduraufov</last></author>
       <author><first>Mammad</first><last>Hajili</last></author>
       <author><first>Sardana</first><last>Ivanova</last></author>
@@ -6565,9 +6565,9 @@
     </paper>
     <paper id="476">
       <title>Analyzing the Surprising Variability in Word Embedding Stability Across Languages</title>
-      <author><first>Laura</first><last>Burdick</last></author>
+      <author id="laura-burdick"><first>Laura</first><last>Burdick</last></author>
       <author><first>Jonathan K.</first><last>Kummerfeld</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>5891–5901</pages>
       <abstract>Word embeddings are powerful representations that form the foundation of many natural language processing architectures, both in English and in other languages. To gain further insight into word embeddings, we explore their stability (e.g., overlap between the nearest neighbors of a word in different embedding spaces) in diverse languages. We discuss linguistic properties that are related to stability, drawing out insights about correlations with affixing, language gender systems, and other features. This has implications for embedding use, particularly in research that uses them to study language trends.</abstract>
       <url hash="2331dae8">2021.emnlp-main.476</url>
@@ -6630,7 +6630,7 @@
       <author id="yang-liu-3m"><first>Yang</first><last>Liu</last></author>
       <author><first>Hua</first><last>Cheng</last></author>
       <author><first>Russell</first><last>Klopfer</last></author>
-      <author><first>Matthew R.</first><last>Gormley</last></author>
+      <author id="matthew-r-gormley"><first>Matthew R.</first><last>Gormley</last></author>
       <author><first>Thomas</first><last>Schaaf</last></author>
       <pages>5941–5953</pages>
       <abstract>Multi-label document classification (MLDC) problems can be challenging, especially for long documents with a large label set and a long-tail distribution over labels. In this paper, we present an effective convolutional attention network for the MLDC problem with a focus on medical code prediction from clinical documents. Our innovations are three-fold: (1) we utilize a deep convolution-based encoder with the squeeze-and-excitation networks and residual networks to aggregate the information across the document and learn meaningful document representations that cover different ranges of texts; (2) we explore multi-layer and sum-pooling attention to extract the most informative features from these multi-scale representations; (3) we combine binary cross entropy loss and focal loss to improve performance for rare labels. We focus our evaluation study on MIMIC-III, a widely used dataset in the medical domain. Our models outperform prior work on medical coding and achieve new state-of-the-art results on multiple metrics. We also demonstrate the language independent nature of our approach by applying it to two non-English datasets. Our model outperforms prior best model and a multilingual Transformer model by a substantial margin.</abstract>
@@ -6658,7 +6658,7 @@
       <title><fixed-case>IGA</fixed-case>: An Intent-Guided Authoring Assistant</title>
       <author><first>Simeng</first><last>Sun</last></author>
       <author><first>Wenlong</first><last>Zhao</last></author>
-      <author><first>Varun</first><last>Manjunatha</last></author>
+      <author id="varun-manjunatha"><first>Varun</first><last>Manjunatha</last></author>
       <author><first>Rajiv</first><last>Jain</last></author>
       <author><first>Vlad</first><last>Morariu</last></author>
       <author><first>Franck</first><last>Dernoncourt</last></author>
@@ -6710,7 +6710,7 @@
       <title>A Semantic Feature-Wise Transformation Relation Network for Automatic Short Answer Grading</title>
       <author><first>Zhaohui</first><last>Li</last></author>
       <author><first>Yajur</first><last>Tomar</last></author>
-      <author><first>Rebecca J.</first><last>Passonneau</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca J.</first><last>Passonneau</last></author>
       <pages>6030–6040</pages>
       <abstract>Automatic short answer grading (ASAG) is the task of assessing students’ short natural language responses to objective questions. It is a crucial component of new education platforms, and could support more wide-spread use of constructed response questions to replace cognitively less challenging multiple choice questions. We propose a Semantic Feature-wise transformation Relation Network (SFRN) that exploits the multiple components of ASAG datasets more effectively. SFRN captures relational knowledge among the questions (Q), reference answers or rubrics (R), and labeled student answers (A). A relation network learns vector representations for the elements of QRA triples, then combines the learned representations using learned semantic feature-wise transformations. We apply translation-based data augmentation to address the two problems of limited training data, and high data skew for multi-class ASAG tasks. Our model has up to 11% performance improvement over state-of-the-art results on the benchmark SemEval-2013 datasets, and surpasses custom approaches designed for a Kaggle challenge, demonstrating its generality.</abstract>
       <url hash="66e9b312">2021.emnlp-main.487</url>
@@ -6735,10 +6735,10 @@
       <author><first>Han</first><last>Li</last></author>
       <author><first>Ameen</first><last>Patel</last></author>
       <author><first>Sidharth</first><last>Mudgal</last></author>
-      <author><first>Sungjin</first><last>Lee</last></author>
+      <author id="sungjin-lee"><first>Sungjin</first><last>Lee</last></author>
       <author><first>Young-Bum</first><last>Kim</last></author>
       <author><first>Spyros</first><last>Matsoukas</last></author>
-      <author><first>Ruhi</first><last>Sarikaya</last></author>
+      <author id="ruhi-sarikaya"><first>Ruhi</first><last>Sarikaya</last></author>
       <pages>6054–6063</pages>
       <abstract>Natural Language Understanding (NLU) is an established component within a conversational AI or digital assistant system, and it is responsible for producing semantic understanding of a user request. We propose a scalable and automatic approach for improving NLU in a large-scale conversational AI system by leveraging implicit user feedback, with an insight that user interaction data and dialog context have rich information embedded from which user satisfaction and intention can be inferred. In particular, we propose a domain-agnostic framework for curating new supervision data for improving NLU from live production traffic. With an extensive set of experiments, we show the results of applying the framework and improving NLU for a large-scale production system across 10 domains.</abstract>
       <url hash="66d00b1e">2021.emnlp-main.489</url>
@@ -6787,12 +6787,12 @@
     </paper>
     <paper id="493">
       <title>Perhaps <fixed-case>PTLM</fixed-case>s Should Go to School – A Task to Assess Open Book and Closed Book <fixed-case>QA</fixed-case></title>
-      <author><first>Manuel</first><last>Ciosici</last></author>
+      <author id="manuel-r-ciosici"><first>Manuel</first><last>Ciosici</last></author>
       <author><first>Joe</first><last>Cecil</last></author>
       <author><first>Dong-Ho</first><last>Lee</last></author>
       <author><first>Alex</first><last>Hedges</last></author>
       <author><first>Marjorie</first><last>Freedman</last></author>
-      <author><first>Ralph</first><last>Weischedel</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
       <pages>6104–6111</pages>
       <abstract>Our goal is to deliver a new task and leaderboard to stimulate research on question answering and pre-trained language models (PTLMs) to understand a significant instructional document, e.g., an introductory college textbook or a manual. PTLMs have shown great success in many question-answering tasks, given significant supervised training, but much less so in zero-shot settings. We propose a new task that includes two college-level introductory texts in the social sciences (American Government 2e) and humanities (U.S. History), hundreds of true/false statements based on review questions written by the textbook authors, validation/development tests based on the first eight chapters of the textbooks, blind tests based on the remaining textbook chapters, and baseline results given state-of-the-art PTLMs. Since the questions are balanced, random performance should be ~50%. T5, fine-tuned with BoolQ achieves the same performance, suggesting that the textbook’s content is not pre-represented in the PTLM. Taking the exam closed book, but having read the textbook (i.e., adding the textbook to T5’s pre-training), yields at best minor improvement (56%), suggesting that the PTLM may not have “understood” the textbook (or perhaps misunderstood the questions). Performance is better (~60%) when the exam is taken open-book (i.e., allowing the machine to automatically retrieve a paragraph and use it to answer the question).</abstract>
       <url hash="586e0f3f">2021.emnlp-main.493</url>
@@ -6857,7 +6857,7 @@
       <author><first>Song</first><last>Feng</last></author>
       <author><first>Siva Sankalp</first><last>Patel</last></author>
       <author><first>Hui</first><last>Wan</last></author>
-      <author><first>Sachindra</first><last>Joshi</last></author>
+      <author id="sachindra-joshi"><first>Sachindra</first><last>Joshi</last></author>
       <pages>6162–6176</pages>
       <abstract>We propose MultiDoc2Dial, a new task and dataset on modeling goal-oriented dialogues grounded in multiple documents. Most previous works treat document-grounded dialogue modeling as machine reading comprehension task based on a single given document or passage. In this work, we aim to address more realistic scenarios where a goal-oriented information-seeking conversation involves multiple topics, and hence is grounded on different documents. To facilitate such task, we introduce a new dataset that contains dialogues grounded in multiple documents from four different domains. We also explore modeling the dialogue-based and document-based contexts in the dataset. We present strong baseline approaches and various experimental results, aiming to support further research efforts on such a task.</abstract>
       <url hash="24beff46">2021.emnlp-main.498</url>
@@ -6876,7 +6876,7 @@
       <author><first>Amardeep</first><last>Kumar</last></author>
       <author><first>Isabelle G.</first><last>Lee</last></author>
       <author><first>Anish</first><last>Acharya</last></author>
-      <author><first>Rajiv Ratn</first><last>Shah</last></author>
+      <author id="rajiv-shah"><first>Rajiv Ratn</first><last>Shah</last></author>
       <pages>6177–6192</pages>
       <abstract>Code-switching is the communication phenomenon where the speakers switch between different languages during a conversation. With the widespread adoption of conversational agents and chat platforms, code-switching has become an integral part of written conversations in many multi-lingual communities worldwide. Therefore, it is essential to develop techniques for understanding and summarizing these conversations. Towards this objective, we introduce the task of abstractive summarization of Hindi-English (Hi-En) code-switched conversations. We also develop the first code-switched conversation summarization dataset - <i>GupShup</i>, which contains over 6,800 Hi-En conversations and their corresponding human-annotated summaries in English (En) and Hi-En. We present a detailed account of the entire data collection and annotation process. We analyze the dataset using various code-switching statistics. We train state-of-the-art abstractive summarization models and report their performances using both automated metrics and human evaluation. Our results show that multi-lingual mBART and multi-view seq2seq models obtain the best performances on this new dataset. We also conduct an extensive qualitative analysis to provide insight into the models and some of their shortcomings.</abstract>
       <url hash="712d3676">2021.emnlp-main.499</url>
@@ -6900,7 +6900,7 @@
     </paper>
     <paper id="501">
       <title>Data Collection vs. Knowledge Graph Completion: What is Needed to Improve Coverage?</title>
-      <author><first>Kenneth</first><last>Church</last></author>
+      <author id="kenneth-church"><first>Kenneth</first><last>Church</last></author>
       <author><first>Yuchen</first><last>Bian</last></author>
       <pages>6210–6215</pages>
       <abstract>This survey/position paper discusses ways to improve coverage of resources such as WordNet. Rapp estimated correlations, rho, between corpus statistics and pyscholinguistic norms. rho improves with quantity (corpus size) and quality (balance). 1M words is enough for simple estimates (unigram frequencies), but at least 100x more is required for good estimates of word associations and embeddings. Given such estimates, WordNet’s coverage is remarkable. WordNet was developed on SemCor, a small sample (200k words) from the Brown Corpus. Knowledge Graph Completion (KGC) attempts to learn missing links from subsets. But Rapp’s estimates of sizes suggest it would be more profitable to collect more data than to infer missing information that is not there.</abstract>
@@ -6927,7 +6927,7 @@
       <title>On the Benefit of Syntactic Supervision for Cross-lingual Transfer in Semantic Role Labeling</title>
       <author><first>Zhisong</first><last>Zhang</last></author>
       <author><first>Emma</first><last>Strubell</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>6229–6246</pages>
       <abstract>Although recent developments in neural architectures and pre-trained representations have greatly increased state-of-the-art model performance on fully-supervised semantic role labeling (SRL), the task remains challenging for languages where supervised SRL training data are not abundant. Cross-lingual learning can improve performance in this setting by transferring knowledge from high-resource languages to low-resource ones. Moreover, we hypothesize that annotations of syntactic dependencies can be leveraged to further facilitate cross-lingual transfer. In this work, we perform an empirical exploration of the helpfulness of syntactic supervision for crosslingual SRL within a simple multitask learning scheme. With comprehensive evaluations across ten languages (in addition to English) and three SRL benchmark datasets, including both dependency- and span-based SRL, we show the effectiveness of syntactic supervision in low-resource scenarios.</abstract>
       <url hash="b4124564">2021.emnlp-main.503</url>
@@ -6976,10 +6976,10 @@
       <title>Structure-aware Fine-tuning of Sequence-to-sequence Transformers for Transition-based <fixed-case>AMR</fixed-case> Parsing</title>
       <author><first>Jiawei</first><last>Zhou</last></author>
       <author><first>Tahira</first><last>Naseem</last></author>
-      <author><first>Ramón</first><last>Fernandez Astudillo</last></author>
+      <author id="ramon-fernandez-astudillo"><first>Ramón</first><last>Fernandez Astudillo</last></author>
       <author><first>Young-Suk</first><last>Lee</last></author>
-      <author><first>Radu</first><last>Florian</last></author>
-      <author><first>Salim</first><last>Roukos</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
       <pages>6279–6290</pages>
       <abstract>Predicting linearized Abstract Meaning Representation (AMR) graphs using pre-trained sequence-to-sequence Transformer models has recently led to large improvements on AMR parsing benchmarks. These parsers are simple and avoid explicit modeling of structure but lack desirable properties such as graph well-formedness guarantees or built-in graph-sentence alignments. In this work we explore the integration of general pre-trained sequence-to-sequence language models and a structure-aware transition-based approach. We depart from a pointer-based transition system and propose a simplified transition set, designed to better exploit pre-trained language models for structured fine-tuning. We also explore modeling the parser state within the pre-trained encoder-decoder architecture and different vocabulary strategies for the same purpose. We provide a detailed comparison with recent progress in AMR parsing and show that the proposed parser retains the desirable properties of previous transition-based approaches, while being simpler and reaching the new parsing state of the art for AMR 2.0, without the need for graph re-categorization.</abstract>
       <url hash="0cfd512f">2021.emnlp-main.507</url>
@@ -6994,7 +6994,7 @@
       <author><first>Dheeraj</first><last>Rajagopal</last></author>
       <author><first>Peter</first><last>Clark</last></author>
       <author><first>Yiming</first><last>Yang</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>6291–6310</pages>
       <abstract>Defeasible reasoning is the mode of reasoning where conclusions can be overturned by taking into account new evidence. Existing cognitive science literature on defeasible reasoning suggests that a person forms a “mental model” of the problem scenario before answering questions. Our research goal asks whether neural models can similarly benefit from envisioning the question scenario before answering a defeasible query. Our approach is, given a question, to have a model first create a graph of relevant influences, and then leverage that graph as an additional input when answering the question. Our system, CURIOUS, achieves a new state-of-the-art on three different defeasible reasoning datasets. This result is significant as it illustrates that performance can be improved by guiding a system to “think about” a question and explicitly model the scenario, rather than answering reflexively.</abstract>
       <url hash="9b25d655">2021.emnlp-main.508</url>
@@ -7021,7 +7021,7 @@
       <title>Does <fixed-case>BERT</fixed-case> Learn as Humans Perceive? Understanding Linguistic Styles through Lexica</title>
       <author><first>Shirley Anugrah</first><last>Hayati</last></author>
       <author><first>Dongyeop</first><last>Kang</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <pages>6323–6331</pages>
       <abstract>People convey their intention and attitude through linguistic styles of the text that they write. In this study, we investigate lexicon usages across styles throughout two lenses: human perception and machine word importance, since words differ in the strength of the stylistic cues that they provide. To collect labels of human perception, we curate a new dataset, Hummingbird, on top of benchmarking style datasets. We have crowd workers highlight the representative words in the text that makes them think the text has the following styles: politeness, sentiment, offensiveness, and five emotion types. We then compare these human word labels with word importance derived from a popular fine-tuned style classifier like BERT. Our results show that the BERT often finds content words not relevant to the target style as important words used in style prediction, but humans do not perceive the same way even though for some styles (e.g., positive sentiment and joy) human- and machine-identified words share significant overlap for some styles.</abstract>
       <url hash="068a99d5">2021.emnlp-main.510</url>
@@ -7083,7 +7083,7 @@
       <author><first>Masum</first><last>Hasan</last></author>
       <author><first>Md Saiful</first><last>Islam</last></author>
       <author><first>Kurtis</first><last>Haut</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <author><first>Ehsan</first><last>Hoque</last></author>
       <pages>6387–6397</pages>
       <abstract>The combination of gestures, intonations, and textual content plays a key role in argument delivery. However, the current literature mostly considers textual content while assessing the quality of an argument, and it is limited to datasets containing short sequences (18-48 words). In this paper, we study argument quality assessment in a multimodal context, and experiment on DBATES, a publicly available dataset of long debate videos. First, we propose a set of interpretable debate centric features such as clarity, content variation, body movement cues, and pauses, inspired by theories of argumentation quality. Second, we design the Multimodal ARgument Quality assessor (MARQ) – a hierarchical neural network model that summarizes the multimodal signals on long sequences and enriches the multimodal embedding with debate centric features. Our proposed MARQ model achieves an accuracy of 81.91% on the argument quality prediction task and outperforms established baseline models with an error rate reduction of 22.7%. Through ablation studies, we demonstrate the importance of multimodal cues in modeling argument quality.</abstract>
@@ -7097,7 +7097,7 @@
       <author><first>Arjun</first><last>Akula</last></author>
       <author><first>Spandana</first><last>Gella</last></author>
       <author><first>Keze</first><last>Wang</last></author>
-      <author><first>Song-Chun</first><last>Zhu</last></author>
+      <author id="song-chun-zhu"><first>Song-Chun</first><last>Zhu</last></author>
       <author><first>Siva</first><last>Reddy</last></author>
       <pages>6398–6416</pages>
       <abstract>Neural module networks (NMN) are a popular approach for grounding visual referring expressions. Prior implementations of NMN use pre-defined and fixed textual inputs in their module instantiation. This necessitates a large number of modules as they lack the ability to share weights and exploit associations between similar textual contexts (e.g. “dark cube on the left” vs. “black cube on the left”). In this work, we address these limitations and evaluate the impact of contextual clues in improving the performance of NMN models. First, we address the problem of fixed textual inputs by parameterizing the module arguments. This substantially reduce the number of modules in NMN by up to 75% without any loss in performance. Next we propose a method to contextualize our parameterized model to enhance the module’s capacity in exploiting the visiolinguistic associations. Our model outperforms the state-of-the-art NMN model on CLEVR-Ref+ dataset with +8.1% improvement in accuracy on the single-referent test set and +4.3% on the full test set. Additionally, we demonstrate that contextualization provides +11.2% and +1.7% improvements in accuracy over prior NMN models on CLOSURE and NLVR2. We further evaluate the impact of our contextualization by constructing a contrast set for CLEVR-Ref+, which we call CC-Ref+. We significantly outperform the baselines by as much as +10.4% absolute accuracy on CC-Ref+, illustrating the generalization skills of our approach.</abstract>
@@ -7139,7 +7139,7 @@
       <author><first>Lingfei</first><last>Wu</last></author>
       <author><first>Tian</first><last>Gao</last></author>
       <author><first>Heng</first><last>Ji</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>6443–6456</pages>
       <abstract>Timeline Summarization identifies major events from a news collection and describes them following temporal order, with key dates tagged. Previous methods generally generate summaries separately for each date after they determine the key dates of events. These methods overlook the events’ intra-structures (arguments) and inter-structures (event-event connections). Following a different route, we propose to represent the news articles as an event-graph, thus the summarization becomes compressing the whole graph to its salient sub-graph. The key hypothesis is that the events connected through shared arguments and temporal order depict the skeleton of a timeline, containing events that are semantically related, temporally coherent and structurally salient in the global event graph. A time-aware optimal transport distance is then introduced for learning the compression model in an unsupervised manner. We show that our approach significantly improves on the state of the art on three real-world datasets, including two public standard benchmarks and our newly collected Timeline100 dataset.</abstract>
       <url hash="22b6177e">2021.emnlp-main.519</url>
@@ -7152,7 +7152,7 @@
       <author><first>Sangwoo</first><last>Cho</last></author>
       <author><first>Franck</first><last>Dernoncourt</last></author>
       <author><first>Tim</first><last>Ganter</last></author>
-      <author><first>Trung</first><last>Bui</last></author>
+      <author id="trung-bui"><first>Trung</first><last>Bui</last></author>
       <author><first>Nedim</first><last>Lipka</last></author>
       <author><first>Walter</first><last>Chang</last></author>
       <author><first>Hailin</first><last>Jin</last></author>
@@ -7215,7 +7215,7 @@
     <paper id="525">
       <title><fixed-case>SPECTRA</fixed-case>: Sparse Structured Text Rationalization</title>
       <author><first>Nuno M.</first><last>Guerreiro</last></author>
-      <author><first>André F. T.</first><last>Martins</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
       <pages>6534–6550</pages>
       <abstract>Selective rationalization aims to produce decisions along with rationales (e.g., text highlights or word alignments between two sentences). Commonly, rationales are modeled as stochastic binary masks, requiring sampling-based gradient estimators, which complicates training and requires careful hyperparameter tuning. Sparse attention mechanisms are a deterministic alternative, but they lack a way to regularize the rationale extraction (e.g., to control the sparsity of a text highlight or the number of alignments). In this paper, we present a unified framework for deterministic extraction of structured explanations via constrained inference on a factor graph, forming a differentiable layer. Our approach greatly eases training and rationale regularization, generally outperforming previous work on what comes to performance and plausibility of the extracted rationales. We further provide a comparative study of stochastic and deterministic methods for rationale extraction for classification and natural language inference tasks, jointly assessing their predictive power, quality of the explanations, and model variability.</abstract>
       <url hash="e0ef9dc5">2021.emnlp-main.525</url>
@@ -7255,7 +7255,7 @@
     </paper>
     <paper id="528">
       <title>Aspect-Controllable Opinion Summarization</title>
-      <author><first>Reinald Kim</first><last>Amplayo</last></author>
+      <author id="reinald-kim-amplayo"><first>Reinald Kim</first><last>Amplayo</last></author>
       <author><first>Stefanos</first><last>Angelidis</last></author>
       <author><first>Mirella</first><last>Lapata</last></author>
       <pages>6578–6593</pages>
@@ -7318,7 +7318,7 @@
       <title>Multilingual Unsupervised Neural Machine Translation with Denoising Adapters</title>
       <author><first>Ahmet</first><last>Üstün</last></author>
       <author><first>Alexandre</first><last>Berard</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <author><first>Matthias</first><last>Gallé</last></author>
       <pages>6650–6662</pages>
       <abstract>We consider the problem of multilingual unsupervised machine translation, translating to and from languages that only have monolingual data by using auxiliary parallel language pairs. For this problem the standard procedure so far to leverage the monolingual data is _back-translation_, which is computationally costly and hard to tune. In this paper we propose instead to use _denoising adapters_, adapter layers with a denoising objective, on top of pre-trained mBART-50. In addition to the modularity and flexibility of such an approach we show that the resulting translations are on-par with back-translating as measured by BLEU, and furthermore it allows adding unseen languages incrementally.</abstract>
@@ -7343,7 +7343,7 @@
       <title>Controlling Machine Translation for Multiple Attributes with Additive Interventions</title>
       <author><first>Andrea</first><last>Schioppa</last></author>
       <author><first>David</first><last>Vilar</last></author>
-      <author><first>Artem</first><last>Sokolov</last></author>
+      <author id="artem-sokolov"><first>Artem</first><last>Sokolov</last></author>
       <author><first>Katja</first><last>Filippova</last></author>
       <pages>6676–6696</pages>
       <abstract>Fine-grained control of machine translation (MT) outputs along multiple attributes is critical for many modern MT applications and is a requirement for gaining users’ trust. A standard approach for exerting control in MT is to prepend the input with a special tag to signal the desired output attribute. Despite its simplicity, attribute tagging has several drawbacks: continuous values must be binned into discrete categories, which is unnatural for certain applications; interference between multiple tags is poorly understood. We address these problems by introducing vector-valued interventions which allow for fine-grained control over multiple attributes simultaneously via a weighted linear combination of the corresponding vectors. For some attributes, our approach even allows for fine-tuning a model trained without annotations to support such interventions. In experiments with three attributes (length, politeness and monotonicity) and two language pairs (English to German and Japanese) our models achieve better control over a wider range of tasks compared to tagging, and translation quality does not degrade when no control is requested. Finally, we demonstrate how to enable control in an already trained model after a relatively cheap fine-tuning stage.</abstract>
@@ -7355,7 +7355,7 @@
     <paper id="536">
       <title>A Generative Framework for Simultaneous Machine Translation</title>
       <author><first>Yishu</first><last>Miao</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>6697–6706</pages>
       <abstract>We propose a generative framework for simultaneous machine translation. Conventional approaches use a fixed number of source words to translate or learn dynamic policies for the number of source words by reinforcement learning. Here we formulate simultaneous translation as a structural sequence-to-sequence learning problem. A latent variable is introduced to model read or translate actions at every time step, which is then integrated out to consider all the possible translation policies. A re-parameterised Poisson prior is used to regularise the policies which allows the model to explicitly balance translation quality and latency. The experiments demonstrate the effectiveness and robustness of the generative framework, which achieves the best BLEU scores given different average translation latencies on benchmark datasets.</abstract>
@@ -7368,8 +7368,8 @@
       <title>It Is Not As Good As You Think! Evaluating Simultaneous Machine Translation on Interpretation Data</title>
       <author><first>Jinming</first><last>Zhao</last></author>
       <author><first>Philip</first><last>Arthur</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <author><first>Ehsan</first><last>Shareghi</last></author>
       <pages>6707–6715</pages>
       <abstract>Most existing simultaneous machine translation (SiMT) systems are trained and evaluated on offline translation corpora. We argue that SiMT systems should be trained and tested on real interpretation data. To illustrate this argument, we propose an interpretation test set and conduct a realistic evaluation of SiMT trained on offline translations. Our results, on our test set along with 3 existing smaller scale language pairs, highlight the difference of up-to 13.83 BLEU score when SiMT models are evaluated on translation vs interpretation data. In the absence of interpretation training data, we propose a translation-to-interpretation (T2I) style transfer method which allows converting existing offline translations into interpretation-style data, leading to up-to 2.8 BLEU improvement. However, the evaluation gap remains notable, calling for constructing large-scale interpretation corpora better suited for evaluating and developing SiMT systems.</abstract>
@@ -7385,7 +7385,7 @@
       <author><first>Xujiang</first><last>Zhao</last></author>
       <author><first>Haifeng</first><last>Chen</last></author>
       <author><first>Feng</first><last>Chen</last></author>
-      <author><first>Jinho D.</first><last>Choi</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
       <pages>6716–6723</pages>
       <abstract>Recent multilingual pre-trained language models have achieved remarkable zero-shot performance, where the model is only finetuned on one source language and directly evaluated on target languages. In this work, we propose a self-learning framework that further utilizes unlabeled data of target languages, combined with uncertainty estimation in the process to select high-quality silver labels. Three different uncertainties are adapted and analyzed specifically for the cross lingual transfer: Language Heteroscedastic/Homoscedastic Uncertainty (LEU/LOU), Evidential Uncertainty (EVI). We evaluate our framework with uncertainties on two cross-lingual tasks including Named Entity Recognition (NER) and Natural Language Inference (NLI) covering 40 languages in total, which outperforms the baselines significantly by 10 F1 for NER on average and 2.5 accuracy for NLI.</abstract>
       <url hash="9dd570d4">2021.emnlp-main.538</url>
@@ -7462,7 +7462,7 @@
       <author><first>Dmytro</first><last>Okhonko</last></author>
       <author><first>Armen</first><last>Aghajanyan</last></author>
       <author><first>Florian</first><last>Metze</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <author><first>Christoph</first><last>Feichtenhofer</last></author>
       <pages>6787–6800</pages>
       <abstract>We present VideoCLIP, a contrastive approach to pre-train a unified model for zero-shot video and text understanding, without using any labels on downstream tasks. VideoCLIP trains a transformer for video and text by contrasting temporally overlapping positive video-text pairs with hard negatives from nearest neighbor retrieval. Our experiments on a diverse series of downstream tasks, including sequence-level text-video retrieval, VideoQA, token-level action localization, and action segmentation reveal state-of-the-art performance, surpassing prior work, and in some cases even outperforming supervised approaches. Code is made available at <url>https://github.com/pytorch/fairseq/examples/MMPT</url>.</abstract>
@@ -7603,7 +7603,7 @@
     <paper id="555">
       <title>Generating Datasets with Pretrained Language Models</title>
       <author><first>Timo</first><last>Schick</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>6943–6951</pages>
       <abstract>To obtain high-quality sentence embeddings from pretrained language models (PLMs), they must either be augmented with additional pretraining objectives or finetuned on a large set of labeled text pairs. While the latter approach typically outperforms the former, it requires great human effort to generate suitable datasets of sufficient size. In this paper, we show how PLMs can be leveraged to obtain high-quality sentence embeddings without the need for labeled data, finetuning or modifications to the pretraining objective: We utilize the generative abilities of large and high-performing PLMs to generate entire datasets of labeled text pairs from scratch, which we then use for finetuning much smaller and more efficient models. Our fully unsupervised approach outperforms strong baselines on several semantic textual similarity datasets.</abstract>
       <url hash="62dc576c">2021.emnlp-main.555</url>
@@ -7614,7 +7614,7 @@
     <paper id="556">
       <title>Continuous Entailment Patterns for Lexical Inference in Context</title>
       <author><first>Martin</first><last>Schmitt</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>6952–6959</pages>
       <abstract>Combining a pretrained language model (PLM) with textual patterns has been shown to help in both zero- and few-shot settings. For zero-shot performance, it makes sense to design patterns that closely resemble the text seen during self-supervised pretraining because the model has never seen anything else. Supervised training allows for more flexibility. If we allow for tokens outside the PLM’s vocabulary, patterns can be adapted more flexibly to a PLM’s idiosyncrasies. Contrasting patterns where a “token” can be any continuous vector from those where a discrete choice between vocabulary elements has to be made, we call our method CONtinous pAtterNs (CONAN). We evaluate CONAN on two established benchmarks for lexical inference in context (LIiC) a.k.a. predicate entailment, a challenging natural language understanding task with relatively small training data. In a direct comparison with discrete patterns, CONAN consistently leads to improved performance, setting a new state of the art. Our experiments give valuable insights on the kind of pattern that enhances a PLM’s performance on LIiC and raise important questions regarding our understanding of PLMs using text patterns.</abstract>
       <url hash="c34c3f03">2021.emnlp-main.556</url>
@@ -7680,7 +7680,7 @@
     <paper id="561">
       <title>Generative Context Pair Selection for Multi-hop Question Answering</title>
       <author><first>Dheeru</first><last>Dua</last></author>
-      <author><first>Cicero</first><last>Nogueira dos Santos</last></author>
+      <author id="cicero-dos-santos"><first>Cicero</first><last>Nogueira dos Santos</last></author>
       <author><first>Patrick</first><last>Ng</last></author>
       <author><first>Ben</first><last>Athiwaratkun</last></author>
       <author><first>Bing</first><last>Xiang</last></author>
@@ -7698,8 +7698,8 @@
       <author><first>Arij</first><last>Riabi</last></author>
       <author><first>Thomas</first><last>Scialom</last></author>
       <author><first>Rachel</first><last>Keraron</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
-      <author><first>Djamé</first><last>Seddah</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
       <author><first>Jacopo</first><last>Staiano</last></author>
       <pages>7016–7030</pages>
       <abstract>Coupled with the availability of large scale datasets, deep learning architectures have enabled rapid progress on the Question Answering task. However, most of those datasets are in English, and the performances of state-of-the-art multilingual models are significantly lower when evaluated on non-English data. Due to high data collection costs, it is not realistic to obtain annotated data for each language one desires to support. We propose a method to improve the Cross-lingual Question Answering performance without requiring additional annotated data, leveraging Question Generation models to produce synthetic samples in a cross-lingual fashion. We show that the proposed method allows to significantly outperform the baselines trained on English data only. We report a new state-of-the-art on four datasets: MLQA, XQuAD, SQuAD-it and PIAF (fr).</abstract>
@@ -7714,7 +7714,7 @@
       <author><first>Giwon</first><last>Hong</last></author>
       <author><first>Kyung-min</first><last>Kim</last></author>
       <author><first>Junmo</first><last>Kang</last></author>
-      <author><first>Sung-Hyon</first><last>Myaeng</last></author>
+      <author id="sung-hyon-myaeng"><first>Sung-Hyon</first><last>Myaeng</last></author>
       <pages>7031–7037</pages>
       <abstract>Numerical reasoning in machine reading comprehension (MRC) has shown drastic improvements over the past few years. While the previous models for numerical MRC are able to interpolate the learned numerical reasoning capabilities, it is not clear whether they can perform just as well on numbers unseen in the training dataset. Our work rigorously tests state-of-the-art models on DROP, a numerical MRC dataset, to see if they can handle passages that contain out-of-range numbers. One of the key findings is that the models fail to extrapolate to unseen numbers. Presenting numbers as digit-by-digit input to the model, we also propose the <i>E-digit</i> number form that alleviates the lack of extrapolation in models and reveals the need to treat numbers differently from regular words in the text. Our work provides a valuable insight into the numerical MRC models and the way to represent number forms in MRC.</abstract>
       <url hash="64f481c2">2021.emnlp-main.563</url>
@@ -7728,7 +7728,7 @@
       <author><first>Peter</first><last>West</last></author>
       <author><first>Vered</first><last>Shwartz</last></author>
       <author><first>Yejin</first><last>Choi</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>7038–7051</pages>
       <abstract>Large language models have shown promising results in zero-shot settings. For example, they can perform multiple choice tasks simply by conditioning on a question and selecting the answer with the highest probability. However, ranking by string probability can be problematic due to surface form competition—wherein different surface forms compete for probability mass, even if they represent the same underlying concept in a given context, e.g. “computer” and “PC.” Since probability mass is finite, this lowers the probability of the correct answer, due to competition from other strings that are valid answers (but not one of the multiple choice options). We introduce Domain Conditional Pointwise Mutual Information, an alternative scoring function that directly compensates for surface form competition by simply reweighing each option according to its a priori likelihood within the context of a specific task. It achieves consistent gains in zero-shot performance over both calibrated and uncalibrated scoring functions on all GPT-2 and GPT-3 models on a variety of multiple choice datasets.</abstract>
       <url hash="10f8ae31">2021.emnlp-main.564</url>
@@ -7755,7 +7755,7 @@
       <title>Back-Training excels Self-Training at Unsupervised Domain Adaptation of Question Generation and Passage Retrieval</title>
       <author><first>Devang</first><last>Kulshreshtha</last></author>
       <author><first>Robert</first><last>Belfer</last></author>
-      <author><first>Iulian Vlad</first><last>Serban</last></author>
+      <author id="iulian-vlad-serban"><first>Iulian Vlad</first><last>Serban</last></author>
       <author><first>Siva</first><last>Reddy</last></author>
       <pages>7064–7078</pages>
       <abstract>In this work, we introduce back-training, an alternative to self-training for unsupervised domain adaptation (UDA). While self-training generates synthetic training data where natural inputs are aligned with noisy outputs, back-training results in natural outputs aligned with noisy inputs. This significantly reduces the gap between target domain and synthetic data distribution, and reduces model overfitting to source domain. We run UDA experiments on question generation and passage retrieval from the Natural Questions domain to machine learning and biomedical domains. We find that back-training vastly outperforms self-training by a mean improvement of 7.8 BLEU-4 points on generation, and 17.6% top-20 retrieval accuracy across both domains. We further propose consistency filters to remove low-quality synthetic data before training. We also release a new domain-adaptation dataset - MLQuestions containing 35K unaligned questions, 50K unaligned passages, and 3K aligned question-passage pairs.</abstract>
@@ -7809,7 +7809,7 @@
       <author><first>Antonios</first><last>Anastasopoulos</last></author>
       <author><first>Shruti</first><last>Rijhwani</last></author>
       <author><first>Aditi</first><last>Chaudhary</last></author>
-      <author><first>David R.</first><last>Mortensen</last></author>
+      <author id="david-r-mortensen"><first>David R.</first><last>Mortensen</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
       <author><first>Yulia</first><last>Tsvetkov</last></author>
       <pages>7131–7150</pages>
@@ -7823,7 +7823,7 @@
       <title><fixed-case>AM</fixed-case>2i<fixed-case>C</fixed-case>o: Evaluating Word Meaning in Context across Low-Resource Languages with Adversarial Examples</title>
       <author><first>Qianchu</first><last>Liu</last></author>
       <author><first>Edoardo Maria</first><last>Ponti</last></author>
-      <author><first>Diana</first><last>McCarthy</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
       <author><first>Ivan</first><last>Vulić</last></author>
       <author><first>Anna</first><last>Korhonen</last></author>
       <pages>7151–7162</pages>
@@ -7836,7 +7836,7 @@
     <paper id="572">
       <title><fixed-case>C</fixed-case>ross<fixed-case>F</fixed-case>it: A Few-shot Learning Challenge for Cross-task Generalization in <fixed-case>NLP</fixed-case></title>
       <author><first>Qinyuan</first><last>Ye</last></author>
-      <author><first>Bill Yuchen</first><last>Lin</last></author>
+      <author id="bill-yuchen-lin"><first>Bill Yuchen</first><last>Lin</last></author>
       <author><first>Xiang</first><last>Ren</last></author>
       <pages>7163–7189</pages>
       <abstract>Humans can learn a new language task efficiently with only few examples, by leveraging their knowledge obtained when learning prior tasks. In this paper, we explore whether and how such cross-task generalization ability can be acquired, and further applied to build better few-shot learners across diverse NLP tasks. We introduce CrossFit, a problem setup for studying cross-task generalization ability, which standardizes seen/unseen task partitions, data access during different learning stages, and the evaluation protocols. To instantiate different seen/unseen task partitions in CrossFit and facilitate in-depth analysis, we present the NLP Few-shot Gym, a repository of 160 diverse few-shot NLP tasks created from open-access NLP datasets and converted to a unified text-to-text format. Our analysis reveals that the few-shot learning ability on unseen tasks can be improved via an upstream learning stage using a set of seen tasks. We also observe that the selection of upstream learning tasks can significantly influence few-shot performance on unseen tasks, asking further analysis on task similarity and transferability.</abstract>
@@ -7849,7 +7849,7 @@
     <paper id="573">
       <title>On the Influence of Masking Policies in Intermediate Pre-training</title>
       <author><first>Qinyuan</first><last>Ye</last></author>
-      <author><first>Belinda Z.</first><last>Li</last></author>
+      <author id="belinda-z-li"><first>Belinda Z.</first><last>Li</last></author>
       <author><first>Sinong</first><last>Wang</last></author>
       <author><first>Benjamin</first><last>Bolte</last></author>
       <author><first>Hao</first><last>Ma</last></author>
@@ -7880,7 +7880,7 @@
       <author><first>Tanay</first><last>Dixit</last></author>
       <author><first>Dev Yashpal</first><last>Sheth</last></author>
       <author><first>Sreyas</first><last>Mohan</last></author>
-      <author><first>Mitesh M.</first><last>Khapra</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh M.</first><last>Khapra</last></author>
       <pages>7219–7234</pages>
       <abstract>Natural Language Generation (NLG) evaluation is a multifaceted task requiring assessment of multiple desirable criteria, e.g., fluency, coherency, coverage, relevance, adequacy, overall quality, etc. Across existing datasets for 6 NLG tasks, we observe that the human evaluation scores on these multiple criteria are often not correlated. For example, there is a very low correlation between human scores on fluency and data coverage for the task of structured data to text generation. This suggests that the current recipe of proposing new automatic evaluation metrics for NLG by showing that they correlate well with scores assigned by humans for a single criteria (overall quality) alone is inadequate. Indeed, our extensive study involving 25 automatic evaluation metrics across 6 different tasks and 18 different evaluation criteria shows that there is no single metric which correlates well with human scores on all desirable criteria, for most NLG tasks. Given this situation, we propose CheckLists for better design and evaluation of automatic metrics. We design templates which target a specific criteria (e.g., coverage) and perturb the output such that the quality gets affected only along this specific criteria (e.g., the coverage drops). We show that existing evaluation metrics are not robust against even such simple perturbations and disagree with scores assigned by humans to the perturbed output. The proposed templates thus allow for a fine-grained assessment of automatic evaluation metrics exposing their limitations and will facilitate better design, analysis and evaluation of such metrics. Our templates and code are available at <url>https://iitmnlp.github.io/EvalEval/</url></abstract>
       <url hash="7bf7f3b6">2021.emnlp-main.575</url>
@@ -7919,7 +7919,7 @@
       <author><first>Alexandre</first><last>Muzio</last></author>
       <author><first>Prasad</first><last>Tadepalli</last></author>
       <author><first>Stefan</first><last>Lee</last></author>
-      <author><first>Hany</first><last>Hassan</last></author>
+      <author id="hany-hassan-awadalla"><first>Hany</first><last>Hassan</last></author>
       <pages>7266–7279</pages>
       <abstract>Multilingual Neural Machine Translation (NMT) enables one model to serve all translation directions, including ones that are unseen during training, i.e. zero-shot translation. Despite being theoretically attractive, current models often produce low quality translations – commonly failing to even produce outputs in the right target language. In this work, we observe that off-target translation is dominant even in strong multilingual systems, trained on massive multilingual corpora. To address this issue, we propose a joint approach to regularize NMT models at both representation-level and gradient-level. At the representation level, we leverage an auxiliary target language prediction task to regularize decoder outputs to retain information about the target language. At the gradient level, we leverage a small amount of direct data (in thousands of sentence pairs) to regularize model gradients. Our results demonstrate that our approach is highly effective in both reducing off-target translation occurrences and improving zero-shot translation performance by +5.59 and +10.38 BLEU on WMT and OPUS datasets respectively. Moreover, experiments show that our method also works well when the small amount of direct data is not available.</abstract>
       <url hash="1d4afce6">2021.emnlp-main.578</url>
@@ -7948,7 +7948,7 @@
       <author><first>Yitong</first><last>Li</last></author>
       <author><first>Meng</first><last>Zhang</last></author>
       <author><first>Liangyou</first><last>Li</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <author><first>Qun</first><last>Liu</last></author>
       <pages>7291–7305</pages>
       <abstract>Learning multilingual and multi-domain translation model is challenging as the heterogeneous and imbalanced data make the model converge inconsistently over different corpora in real world. One common practice is to adjust the share of each corpus in the training, so that the learning process is balanced and low-resource cases can benefit from the high resource ones. However, automatic balancing methods usually depend on the intra- and inter-dataset characteristics, which is usually agnostic or requires human priors. In this work, we propose an approach, MultiUAT, that dynamically adjusts the training data usage based on the model’s uncertainty on a small set of trusted clean data for multi-corpus machine translation. We experiments with two classes of uncertainty measures on multilingual (16 languages with 4 settings) and multi-domain settings (4 for in-domain and 2 for out-of-domain on English-German translation) and demonstrate our approach MultiUAT substantially outperforms its baselines, including both static and dynamic strategies. We analyze the cross-domain transfer and show the deficiency of static and similarity based methods.</abstract>
@@ -8008,8 +8008,8 @@
     </paper>
     <paper id="585">
       <title>Explaining Answers with Entailment Trees</title>
-      <author><first>Bhavana</first><last>Dalvi</last></author>
-      <author><first>Peter</first><last>Jansen</last></author>
+      <author id="bhavana-dalvi"><first>Bhavana</first><last>Dalvi</last></author>
+      <author id="peter-jansen"><first>Peter</first><last>Jansen</last></author>
       <author><first>Oyvind</first><last>Tafjord</last></author>
       <author><first>Zhengnan</first><last>Xie</last></author>
       <author><first>Hannah</first><last>Smith</last></author>
@@ -8051,7 +8051,7 @@
       <author><first>Jennifer</first><last>Lee</last></author>
       <author><first>Antoine</first><last>Bosselut</last></author>
       <author><first>Yejin</first><last>Choi</last></author>
-      <author><first>Tom</first><last>Mitchell</last></author>
+      <author id="tom-mitchell"><first>Tom</first><last>Mitchell</last></author>
       <pages>7404–7418</pages>
       <abstract>One of the challenges faced by conversational agents is their inability to identify unstated presumptions of their users’ commands, a task trivial for humans due to their common sense. In this paper, we propose a zero-shot commonsense reasoning system for conversational agents in an attempt to achieve this. Our reasoner uncovers unstated presumptions from user commands satisfying a general template of if-(state), then-(action), because-(goal). Our reasoner uses a state-of-the-art transformer-based generative commonsense knowledge base (KB) as its source of background knowledge for reasoning. We propose a novel and iterative knowledge query mechanism to extract multi-hop reasoning chains from the neural KB which uses symbolic logic rules to significantly reduce the search space. Similar to any KBs gathered to date, our commonsense KB is prone to missing knowledge. Therefore, we propose to conversationally elicit the missing knowledge from human users with our novel dynamic question generation strategy, which generates and presents contextualized queries to human users. We evaluate the model with a user study with human users that achieves a 35% higher success rate compared to SOTA.</abstract>
       <url hash="3fab6882">2021.emnlp-main.588</url>
@@ -8079,7 +8079,7 @@
       <author><first>Zhaojiang</first><last>Lin</last></author>
       <author><first>Zhenpeng</first><last>Zhou</last></author>
       <author><first>Seungwhan</first><last>Moon</last></author>
-      <author><first>Paul</first><last>Crook</last></author>
+      <author id="paul-a-crook"><first>Paul</first><last>Crook</last></author>
       <author><first>Bing</first><last>Liu</last></author>
       <author><first>Zhou</first><last>Yu</last></author>
       <author><first>Eunjoon</first><last>Cho</last></author>
@@ -8114,7 +8114,7 @@
       <title>Investigating Robustness of Dialog Models to Popular Figurative Language Constructs</title>
       <author><first>Harsh</first><last>Jhamtani</last></author>
       <author><first>Varun</first><last>Gangal</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <author><first>Taylor</first><last>Berg-Kirkpatrick</last></author>
       <pages>7476–7485</pages>
       <abstract>Humans often employ figurative language use in communication, including during interactions with dialog systems. Thus, it is important for real-world dialog systems to be able to handle popular figurative language constructs like metaphor and simile. In this work, we analyze the performance of existing dialog models in situations where the input dialog context exhibits use of figurative language. We observe large gaps in handling of figurative language when evaluating the models on two open domain dialog datasets. When faced with dialog contexts consisting of figurative language, some models show very large drops in performance compared to contexts without figurative language. We encourage future research in dialog modeling to separately analyze and report results on figurative language in order to better test model capabilities relevant to real-world use. Finally, we propose lightweight solutions to help existing models become more robust to figurative language by simply using an external resource to translate figurative language to literal (non-figurative) forms while preserving the meaning to the best extent possible.</abstract>
@@ -8143,7 +8143,7 @@
       <author><first>Iz</first><last>Beltagy</last></author>
       <author><first>Madeleine</first><last>van Zuylen</last></author>
       <author><first>Bailey</first><last>Kuehl</last></author>
-      <author><first>Lucy Lu</first><last>Wang</last></author>
+      <author id="lucy-lu-wang"><first>Lucy Lu</first><last>Wang</last></author>
       <pages>7494–7513</pages>
       <abstract>To assess the effectiveness of any medical intervention, researchers must conduct a time-intensive and manual literature review. NLP systems can help to automate or assist in parts of this expensive process. In support of this goal, we release MSˆ2 (Multi-Document Summarization of Medical Studies), a dataset of over 470k documents and 20K summaries derived from the scientific literature. This dataset facilitates the development of systems that can assess and aggregate contradictory evidence across multiple studies, and is the first large-scale, publicly available multi-document summarization dataset in the biomedical domain. We experiment with a summarization system based on BART, with promising early results, though significant work remains to achieve higher summarization quality. We formulate our summarization inputs and targets in both free text and structured forms and modify a recently proposed metric to assess the quality of our system’s generated summaries. Data and models are available at <url>https://github.com/allenai/ms2</url>.</abstract>
       <url hash="c7d79f15">2021.emnlp-main.594</url>
@@ -8169,7 +8169,7 @@
     </paper>
     <paper id="596">
       <title>On the Challenges of Evaluating Compositional Explanations in Multi-Hop Inference: Relevance, Completeness, and Expert Ratings</title>
-      <author><first>Peter</first><last>Jansen</last></author>
+      <author id="peter-jansen"><first>Peter</first><last>Jansen</last></author>
       <author><first>Kelly J.</first><last>Smith</last></author>
       <author><first>Dan</first><last>Moreno</last></author>
       <author><first>Huitzilin</first><last>Ortiz</last></author>
@@ -8201,7 +8201,7 @@
       <author><first>Pei</first><last>Zhou</last></author>
       <author><first>Rahul</first><last>Khanna</last></author>
       <author><first>Seyeon</first><last>Lee</last></author>
-      <author><first>Bill Yuchen</first><last>Lin</last></author>
+      <author id="bill-yuchen-lin"><first>Bill Yuchen</first><last>Lin</last></author>
       <author><first>Daniel</first><last>Ho</last></author>
       <author><first>Jay</first><last>Pujara</last></author>
       <author><first>Xiang</first><last>Ren</last></author>
@@ -8217,7 +8217,7 @@
       <author><first>Mingkai</first><last>Deng</last></author>
       <author><first>Bowen</first><last>Tan</last></author>
       <author><first>Zhengzhong</first><last>Liu</last></author>
-      <author><first>Eric</first><last>Xing</last></author>
+      <author id="eric-xing"><first>Eric</first><last>Xing</last></author>
       <author><first>Zhiting</first><last>Hu</last></author>
       <pages>7580–7605</pages>
       <abstract>Natural language generation (NLG) spans a broad range of tasks, each of which serves for specific objectives and desires different properties of generated text. The complexity makes automatic evaluation of NLG particularly challenging. Previous work has typically focused on a single task and developed individual evaluation metrics based on specific intuitions. In this paper, we propose a unifying perspective based on the nature of information change in NLG tasks, including compression (e.g., summarization), transduction (e.g., text rewriting), and creation (e.g., dialog). _Information alignment_ between input, context, and output text plays a common central role in characterizing the generation. With automatic alignment prediction models, we develop a family of interpretable metrics that are suitable for evaluating key aspects of different NLG tasks, often without need of gold reference data. Experiments show the uniformly designed metrics achieve stronger or comparable correlations with human judgement compared to state-of-the-art metrics in each of diverse tasks, including text summarization, style transfer, and knowledge-grounded dialog.</abstract>
@@ -8231,8 +8231,8 @@
       <title><fixed-case>MATE</fixed-case>: Multi-view Attention for Table Transformer Efficiency</title>
       <author><first>Julian</first><last>Eisenschlos</last></author>
       <author><first>Maharshi</first><last>Gor</last></author>
-      <author><first>Thomas</first><last>Müller</last></author>
-      <author><first>William</first><last>Cohen</last></author>
+      <author id="thomas-mueller"><first>Thomas</first><last>Müller</last></author>
+      <author id="william-cohen"><first>William</first><last>Cohen</last></author>
       <pages>7606–7619</pages>
       <abstract>This work presents a sparse-attention Transformer architecture for modeling documents that contain large tables. Tables are ubiquitous on the web, and are rich in information. However, more than 20% of relational tables on the web have 20 or more rows (Cafarella et al., 2008), and these large tables present a challenge for current Transformer models, which are typically limited to 512 tokens. Here we propose MATE, a novel Transformer architecture designed to model the structure of web tables. MATE uses sparse attention in a way that allows heads to efficiently attend to either rows or columns in a table. This architecture scales linearly with respect to speed and memory, and can handle documents containing more than 8000 tokens with current accelerators. MATE also has a more appropriate inductive bias for tabular data, and sets a new state-of-the-art for three table reasoning datasets. For HybridQA (Chen et al., 2020), a dataset that involves large documents containing tables, we improve the best prior result by 19 points.</abstract>
       <url hash="91232325">2021.emnlp-main.600</url>
@@ -8334,12 +8334,12 @@
       <author><first>Richard</first><last>Shin</last></author>
       <author><first>Christopher</first><last>Lin</last></author>
       <author><first>Sam</first><last>Thomson</last></author>
-      <author><first>Charles</first><last>Chen</last></author>
+      <author id="charles-chen-jr"><first>Charles</first><last>Chen</last></author>
       <author><first>Subhro</first><last>Roy</last></author>
       <author><first>Emmanouil Antonios</first><last>Platanios</last></author>
       <author><first>Adam</first><last>Pauls</last></author>
       <author><first>Dan</first><last>Klein</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <author><first>Benjamin</first><last>Van Durme</last></author>
       <pages>7699–7715</pages>
       <abstract>We explore the use of large pretrained language models as few-shot semantic parsers. The goal in semantic parsing is to generate a structured meaning representation given a natural language input. However, language models are trained to generate natural language. To bridge the gap, we use language models to paraphrase inputs into a controlled sublanguage resembling English that can be automatically mapped to a target meaning representation. Our results demonstrate that with only a small amount of data and very little code to convert into English-like representations, our blueprint for rapidly bootstrapping semantic parsers leads to surprisingly effective performance on multiple community tasks, greatly exceeding baseline methods also trained on the same limited data.</abstract>
@@ -8429,7 +8429,7 @@
     <paper id="615">
       <title>Come hither or go away? Recognising pre-electoral coalition signals in the news</title>
       <author><first>Ines</first><last>Rehbein</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <author><first>Anna</first><last>Adendorf</last></author>
       <author><first>Oke</first><last>Bahnsen</last></author>
       <author><first>Lukas</first><last>Stoetzer</last></author>
@@ -8516,7 +8516,7 @@
     <paper id="621">
       <title>A Collaborative Multi-agent Reinforcement Learning Framework for Dialog Action Decomposition</title>
       <author><first>Huimin</first><last>Wang</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <pages>7882–7889</pages>
       <abstract>Most reinforcement learning methods for dialog policy learning train a centralized agent that selects a predefined joint action concatenating domain name, intent type, and slot name. The centralized dialog agent suffers from a great many user-agent interaction requirements due to the large action space. Besides, designing the concatenated actions is laborious to engineers and maybe struggled with edge cases. To solve these problems, we model the dialog policy learning problem with a novel multi-agent framework, in which each part of the action is led by a different agent. The framework reduces labor costs for action templates and decreases the size of the action space for each agent. Furthermore, we relieve the non-stationary problem caused by the changing dynamics of the environment as evolving of agents’ policies by introducing a joint optimization process that makes agents can exchange their policy information. Concurrently, an independent experience replay buffer mechanism is integrated to reduce the dependence between gradients of samples to improve training efficiency. The effectiveness of the proposed framework is demonstrated in a multi-domain environment with both user simulator evaluation and human evaluation.</abstract>
       <url hash="946e81be">2021.emnlp-main.621</url>
@@ -8531,7 +8531,7 @@
       <author><first>Andrea</first><last>Madotto</last></author>
       <author><first>Seungwhan</first><last>Moon</last></author>
       <author><first>Zhenpeng</first><last>Zhou</last></author>
-      <author><first>Paul</first><last>Crook</last></author>
+      <author id="paul-a-crook"><first>Paul</first><last>Crook</last></author>
       <author><first>Zhiguang</first><last>Wang</last></author>
       <author><first>Zhou</first><last>Yu</last></author>
       <author><first>Eunjoon</first><last>Cho</last></author>
@@ -8553,7 +8553,7 @@
       <author><first>Hsien-chin</first><last>Lin</last></author>
       <author><first>Nurul</first><last>Lubis</last></author>
       <author><first>Shutong</first><last>Feng</last></author>
-      <author><first>Milica</first><last>Gasic</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gasic</last></author>
       <pages>7901–7914</pages>
       <abstract>The ability to identify and resolve uncertainty is crucial for the robustness of a dialogue system. Indeed, this has been confirmed empirically on systems that utilise Bayesian approaches to dialogue belief tracking. However, such systems consider only confidence estimates and have difficulty scaling to more complex settings. Neural dialogue systems, on the other hand, rarely take uncertainties into account. They are therefore overconfident in their decisions and less robust. Moreover, the performance of the tracking task is often evaluated in isolation, without consideration of its effect on the downstream policy optimisation. We propose the use of different uncertainty measures in neural belief tracking. The effects of these measures on the downstream task of policy optimisation are evaluated by adding selected measures of uncertainty to the feature space of the policy and training policies through interaction with a user simulator. Both human and simulated user results show that incorporating these measures leads to improvements both of the performance and of the robustness of the downstream dialogue policy. This highlights the importance of developing neural dialogue belief trackers that take uncertainty into account.</abstract>
       <url hash="671b4403">2021.emnlp-main.623</url>
@@ -8564,7 +8564,7 @@
     <paper id="624">
       <title>Dynamic Forecasting of Conversation Derailment</title>
       <author><first>Yova</first><last>Kementchedjhieva</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>7915–7919</pages>
       <abstract>Online conversations can sometimes take a turn for the worse, either due to systematic cultural differences, accidental misunderstandings, or mere malice. Automatically forecasting derailment in public online conversations provides an opportunity to take early action to moderate it. Previous work in this space is limited, and we extend it in several ways. We apply a pretrained language encoder to the task, which outperforms earlier approaches. We further experiment with shifting the training paradigm for the task from a static to a dynamic one to increase the forecast horizon. This approach shows mixed results: in a high-quality data setting, a longer average forecast horizon can be achieved at the cost of a small drop in F1; in a low-quality data setting, however, dynamic training propagates the noise and is highly detrimental to performance.</abstract>
       <url hash="4c08ad5a">2021.emnlp-main.624</url>
@@ -8660,7 +8660,7 @@
       <title>A Bag of Tricks for Dialogue Summarization</title>
       <author><first>Muhammad</first><last>Khalifa</last></author>
       <author><first>Miguel</first><last>Ballesteros</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>8014–8022</pages>
       <abstract>Dialogue summarization comes with its own peculiar challenges as opposed to news or scientific articles summarization. In this work, we explore four different challenges of the task: handling and differentiating parts of the dialogue belonging to multiple speakers, negation understanding, reasoning about the situation, and informal language understanding. Using a pretrained sequence-to-sequence language model, we explore speaker name substitution, negation scope highlighting, multi-task learning with relevant tasks, and pretraining on in-domain data. Our experiments show that our proposed techniques indeed improve summarization performance, outperforming strong baselines.</abstract>
       <url hash="c8d26f1e">2021.emnlp-main.631</url>
@@ -8670,7 +8670,7 @@
     </paper>
     <paper id="632">
       <title>Paraphrasing Compound Nominalizations</title>
-      <author><first>John</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
       <author><first>Ho Hung</first><last>Lim</last></author>
       <author><first>Carol</first><last>Webster</last></author>
       <pages>8023–8028</pages>
@@ -8699,7 +8699,7 @@
     <paper id="634">
       <title>Low-Rank Subspaces for Unsupervised Entity Linking</title>
       <author><first>Akhil</first><last>Arora</last></author>
-      <author><first>Alberto</first><last>Garcia-Duran</last></author>
+      <author id="alberto-garcia-duran"><first>Alberto</first><last>Garcia-Duran</last></author>
       <author><first>Robert</first><last>West</last></author>
       <pages>8037–8054</pages>
       <abstract>Entity linking is an important problem with many applications. Most previous solutions were designed for settings where annotated training data is available, which is, however, not the case in numerous domains. We propose a light-weight and scalable entity linking method, Eigenthemes, that relies solely on the availability of entity names and a referent knowledge base. Eigenthemes exploits the fact that the entities that are truly mentioned in a document (the “gold entities”) tend to form a semantically dense subset of the set of all candidate entities in the document. Geometrically speaking, when representing entities as vectors via some given embedding, the gold entities tend to lie in a low-rank subspace of the full embedding space. Eigenthemes identifies this subspace using the singular value decomposition and scores candidate entities according to their proximity to the subspace. On the empirical front, we introduce multiple strong baselines that compare favorably to (and sometimes even outperform) the existing state of the art. Extensive experiments on benchmark datasets from a variety of real-world domains showcase the effectiveness of our approach.</abstract>
@@ -8753,7 +8753,7 @@
       <title>Back to the Basics: A Quantitative Analysis of Statistical and Graph-Based Term Weighting Schemes for Keyword Extraction</title>
       <author><first>Asahi</first><last>Ushio</last></author>
       <author><first>Federico</first><last>Liberatore</last></author>
-      <author><first>Jose</first><last>Camacho-Collados</last></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></author>
       <pages>8089–8103</pages>
       <abstract>Term weighting schemes are widely used in Natural Language Processing and Information Retrieval. In particular, term weighting is the basis for keyword extraction. However, there are relatively few evaluation studies that shed light about the strengths and shortcomings of each weighting scheme. In fact, in most cases researchers and practitioners resort to the well-known tf-idf as default, despite the existence of other suitable alternatives, including graph-based models. In this paper, we perform an exhaustive and large-scale empirical comparison of both statistical and graph-based term weighting methods in the context of keyword extraction. Our analysis reveals some interesting findings such as the advantages of the less-known lexical specificity with respect to tf-idf, or the qualitative differences between statistical and graph-based methods. Finally, based on our findings we discuss and devise some suggestions for practitioners. Source code to reproduce our experimental results, including a keyword extraction library, are available in the following repository: <url>https://github.com/asahi417/kex</url></abstract>
       <url hash="5795ffdc">2021.emnlp-main.638</url>
@@ -8821,7 +8821,7 @@
       <author><first>Yi</first><last>Huang</last></author>
       <author><first>Buse</first><last>Giledereli</last></author>
       <author><first>Abdullatif</first><last>Köksal</last></author>
-      <author><first>Arzucan</first><last>Özgür</last></author>
+      <author id="arzucan-ozgur"><first>Arzucan</first><last>Özgür</last></author>
       <author><first>Elif</first><last>Ozkirimli</last></author>
       <pages>8153–8161</pages>
       <abstract>Multi-label text classification is a challenging task because it requires capturing label dependencies. It becomes even more challenging when class distribution is long-tailed. Resampling and re-weighting are common approaches used for addressing the class imbalance problem, however, they are not effective when there is label dependency besides class imbalance because they result in oversampling of common labels. Here, we introduce the application of balancing loss functions for multi-label text classification. We perform experiments on a general domain dataset with 90 labels (Reuters-21578) and a domain-specific dataset from PubMed with 18211 labels. We find that a distribution-balanced loss function, which inherently addresses both the class imbalance and label linkage problems, outperforms commonly used loss functions. Distribution balancing methods have been successfully used in the image recognition field. Here, we show their effectiveness in natural language processing. Source code is available at <url>https://github.com/blessu/BalancedLossNLP</url>.</abstract>
@@ -8874,7 +8874,7 @@
       <author><first>Nathaniel</first><last>Berger</last></author>
       <author><first>Stefan</first><last>Riezler</last></author>
       <author><first>Sebastian</first><last>Ebert</last></author>
-      <author><first>Artem</first><last>Sokolov</last></author>
+      <author id="artem-sokolov"><first>Artem</first><last>Sokolov</last></author>
       <pages>8216–8224</pages>
       <abstract>Recently more attention has been given to adversarial attacks on neural networks for natural language processing (NLP). A central research topic has been the investigation of search algorithms and search constraints, accompanied by benchmark algorithms and tasks. We implement an algorithm inspired by zeroth order optimization-based attacks and compare with the benchmark results in the TextAttack framework. Surprisingly, we find that optimization-based methods do not yield any improvement in a constrained setup and slightly benefit from approximate gradient information only in unconstrained setups where search spaces are larger. In contrast, simple heuristics exploiting nearest neighbors without querying the target function yield substantial success rates in constrained setups, and nearly full success rate in unconstrained setups, at an order of magnitude fewer queries. We conclude from these results that current TextAttack benchmark tasks are too easy and constraints are too strict, preventing meaningful research on black-box adversarial text attacks.</abstract>
       <url hash="103f9e6a">2021.emnlp-main.647</url>
@@ -8885,7 +8885,7 @@
     <paper id="648">
       <title>Adversarial Attacks on Knowledge Graph Embeddings via Instance Attribution Methods</title>
       <author><first>Peru</first><last>Bhardwaj</last></author>
-      <author><first>John</first><last>Kelleher</last></author>
+      <author id="john-kelleher"><first>John</first><last>Kelleher</last></author>
       <author><first>Luca</first><last>Costabello</last></author>
       <author><first>Declan</first><last>O’Sullivan</last></author>
       <pages>8225–8239</pages>
@@ -8897,7 +8897,7 @@
     </paper>
     <paper id="649">
       <title>Locke’s Holiday: Belief Bias in Machine Reading</title>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>8240–8245</pages>
       <abstract>I highlight a simple failure mode of state-of-the-art machine reading systems: when contexts do not align with commonly shared beliefs. For example, machine reading systems fail to answer <i>What did Elizabeth want?</i> correctly in the context of ‘My kingdom for a cough drop, cried Queen Elizabeth.’ Biased by co-occurrence statistics in the training data of pretrained language models, systems predict <i>my kingdom</i>, rather than <i>a cough drop</i>. I argue such biases are analogous to human belief biases and present a carefully designed challenge dataset for English machine reading, called Auto-Locke, to quantify such effects. Evaluations of machine reading systems on Auto-Locke show the pervasiveness of belief bias in machine reading.</abstract>
       <url hash="0494c636">2021.emnlp-main.649</url>
@@ -8907,8 +8907,8 @@
     </paper>
     <paper id="650">
       <title>Sequence Length is a Domain: Length-based Overfitting in Transformer Models</title>
-      <author><first>Dusan</first><last>Varis</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="dusan-varis"><first>Dusan</first><last>Varis</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>8246–8257</pages>
       <abstract>Transformer-based sequence-to-sequence architectures, while achieving state-of-the-art results on a large number of NLP tasks, can still suffer from overfitting during training. In practice, this is usually countered either by applying regularization methods (e.g. dropout, L2-regularization) or by providing huge amounts of training data. Additionally, Transformer and other architectures are known to struggle when generating very long sequences. For example, in machine translation, the neural-based systems perform worse on very long sequences when compared to the preceding phrase-based translation approaches (Koehn and Knowles, 2017). We present results which suggest that the issue might also be in the mismatch between the length distributions of the training and validation data combined with the aforementioned tendency of the neural networks to overfit to the training data. We demonstrate on a simple string editing tasks and a machine translation task that the Transformer model performance drops significantly when facing sequences of length diverging from the length distribution in the training data. Additionally, we show that the observed drop in performance is due to the hypothesis length corresponding to the lengths seen by the model during training rather than the length of the input sequence.</abstract>
       <url hash="55a0b05d">2021.emnlp-main.650</url>
@@ -8934,7 +8934,7 @@
       <title>Is Information Density Uniform in Task-Oriented Dialogues?</title>
       <author><first>Mario</first><last>Giulianelli</last></author>
       <author><first>Arabella</first><last>Sinclair</last></author>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <pages>8271–8283</pages>
       <abstract>The Uniform Information Density principle states that speakers plan their utterances to reduce fluctuations in the density of the information transmitted. In this paper, we test whether, and within which contextual units this principle holds in task-oriented dialogues. We show that there is evidence supporting the principle in written dialogues where participants play a cooperative reference game as well as in spoken dialogues involving instruction giving and following. Our study underlines the importance of identifying the relevant contextual components, showing that information content increases particularly within topically and referentially related contextual units.</abstract>
       <url hash="4fc36c4b">2021.emnlp-main.652</url>
@@ -9095,7 +9095,7 @@
       <author><first>Alessandro</first><last>Raganato</last></author>
       <author><first>Raúl</first><last>Vázquez</last></author>
       <author><first>Mathias</first><last>Creutz</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>8449–8456</pages>
       <abstract>Zero-shot translations is a fascinating feature of Multilingual Neural Machine Translation (MNMT) systems. These MNMT models are usually trained on English-centric data, i.e. English either as the source or target language, and with a language label prepended to the input indicating the target language. However, recent work has highlighted several flaws of these models in zero-shot scenarios where language labels are ignored and the wrong language is generated or different runs show highly unstable results. In this paper, we investigate the benefits of an explicit alignment to language labels in Transformer-based MNMT models in the zero-shot context, by jointly training one cross attention head with word alignment supervision to stress the focus on the target language label. We compare and evaluate several MNMT systems on three multilingual MT benchmarks of different sizes, showing that simply supervising one cross attention head to focus both on word alignments and language labels reduces the bias towards translating into the wrong language, improving the zero-shot performance overall. Moreover, as an additional advantage, we find that our alignment supervision leads to more stable results across different training runs.</abstract>
       <url hash="2955ecdc">2021.emnlp-main.664</url>
@@ -9110,7 +9110,7 @@
       <author><first>Lutfi Kerem</first><last>Senel</last></author>
       <author><first>Philipp</first><last>Dufter</last></author>
       <author><first>François</first><last>Yvon</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>8457–8469</pages>
       <abstract>With the advent of end-to-end deep learning approaches in machine translation, interest in word alignments initially decreased; however, they have again become a focus of research more recently. Alignments are useful for typological research, transferring formatting like markup to translated texts, and can be used in the decoding of machine translation systems. At the same time, massively multilingual processing is becoming an important NLP scenario, and pretrained language and machine translation models that are truly multilingual are proposed. However, most alignment algorithms rely on bitexts only and do not leverage the fact that many parallel corpora are multiparallel. In this work, we exploit the multiparallelity of corpora by representing an initial set of bilingual alignments as a graph and then predicting additional edges in the graph. We present two graph algorithms for edge prediction: one inspired by recommender systems and one based on network link prediction. Our experimental results show absolute improvements in F1 of up to 28% over the baseline bilingual word aligner in different datasets.</abstract>
       <url hash="96387566">2021.emnlp-main.665</url>
@@ -9123,7 +9123,7 @@
       <author><first>Eva</first><last>Hasler</last></author>
       <author><first>Tobias</first><last>Domhan</last></author>
       <author><first>Jonay</first><last>Trenous</last></author>
-      <author><first>Ke</first><last>Tran</last></author>
+      <author id="ke-m-tran"><first>Ke</first><last>Tran</last></author>
       <author id="bill-byrne"><first>Bill</first><last>Byrne</last></author>
       <author><first>Felix</first><last>Hieber</last></author>
       <pages>8470–8477</pages>
@@ -9148,7 +9148,7 @@
     <paper id="668">
       <title>Effective Fine-Tuning Methods for Cross-lingual Adaptation</title>
       <author><first>Tao</first><last>Yu</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <pages>8492–8501</pages>
       <abstract>Large scale multilingual pre-trained language models have shown promising results in zero- and few-shot cross-lingual tasks. However, recent studies have shown their lack of generalizability when the languages are structurally dissimilar. In this work, we propose a novel fine-tuning method based on co-training that aims to learn more generalized semantic equivalences as a complementary to multilingual language modeling using the unlabeled data in the target language. We also propose an adaption method based on contrastive learning to better capture the semantic relationship in the parallel data, when a few translation pairs are available. To show our method’s effectiveness, we conduct extensive experiments on cross-lingual inference and review classification tasks across various languages. We report significant gains compared to directly fine-tuning multilingual pre-trained models and other semi-supervised alternatives.</abstract>
       <url hash="ee99e4db">2021.emnlp-main.668</url>
@@ -9158,8 +9158,8 @@
     </paper>
     <paper id="669">
       <title>Rethinking Data Augmentation for Low-Resource Neural Machine Translation: A Multi-Task Learning Approach</title>
-      <author><first>Víctor M.</first><last>Sánchez-Cartagena</last></author>
-      <author><first>Miquel</first><last>Esplà-Gomis</last></author>
+      <author id="victor-m-sanchez-cartagena"><first>Víctor M.</first><last>Sánchez-Cartagena</last></author>
+      <author id="miquel-espla-gomis"><first>Miquel</first><last>Esplà-Gomis</last></author>
       <author><first>Juan Antonio</first><last>Pérez-Ortiz</last></author>
       <author><first>Felipe</first><last>Sánchez-Martínez</last></author>
       <pages>8502–8516</pages>
@@ -9195,7 +9195,7 @@
     <paper id="672">
       <title>Discrete and Soft Prompting for Multilingual Models</title>
       <author><first>Mengjie</first><last>Zhao</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>8547–8555</pages>
       <abstract>It has been shown for English that discrete and soft prompting perform strongly in few-shot learning with pretrained language models (PLMs). In this paper, we show that discrete and soft prompting perform better than finetuning in multilingual cases: Crosslingual transfer and in-language training of multilingual natural language inference. For example, with 48 English training examples, finetuning obtains 33.74% accuracy in crosslingual transfer, barely surpassing the majority baseline (33.33%). In contrast, discrete and soft prompting outperform finetuning, achieving 36.43% and 38.79%. We also demonstrate good performance of prompting with training data in multiple languages other than English.</abstract>
       <url hash="ab628c18">2021.emnlp-main.672</url>
@@ -9235,7 +9235,7 @@
       <author><first>Rudra</first><last>Murthy</last></author>
       <author><first>Samarth</first><last>Bharadwaj</last></author>
       <author><first>Karthik</first><last>Sankaranarayanan</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>8584–8595</pages>
       <abstract>We explore the impact of leveraging the relatedness of languages that belong to the same family in NLP models using multilingual fine-tuning. We hypothesize and validate that multilingual fine-tuning of pre-trained language models can yield better performance on downstream NLP applications, compared to models fine-tuned on individual languages. A first of its kind detailed study is presented to track performance change as languages are added to a base language in a graded and greedy (in the sense of best boost of performance) manner; which reveals that careful selection of subset of related languages can significantly improve performance than utilizing all related languages. The Indo-Aryan (IA) language family is chosen for the study, the exact languages being Bengali, Gujarati, Hindi, Marathi, Oriya, Punjabi and Urdu. The script barrier is crossed by simple rule-based transliteration of the text of all languages to Devanagari. Experiments are performed on mBERT, IndicBERT, MuRIL and two RoBERTa-based LMs, the last two being pre-trained by us. Low resource languages, such as Oriya and Punjabi, are found to be the largest beneficiaries of multilingual fine-tuning. Textual Entailment, Entity Classification, Section Title Prediction, tasks of IndicGLUE and POS tagging form our test bed. Compared to monolingual fine tuning we get relative performance improvement of up to 150% in the downstream tasks. The surprise take-away is that for any language there is a particular combination of other languages which yields the best performance, and any additional language is in fact detrimental.</abstract>
       <url hash="bb21fcbb">2021.emnlp-main.675</url>
@@ -9248,8 +9248,8 @@
       <title>Comparing Feature-Engineering and Feature-Learning Approaches for Multilingual Translationese Classification</title>
       <author><first>Daria</first><last>Pylypenko</last></author>
       <author><first>Kwabena</first><last>Amponsah-Kaakyire</last></author>
-      <author><first>Koel</first><last>Dutta Chowdhury</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="koel-dutta-chowdhury"><first>Koel</first><last>Dutta Chowdhury</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <author><first>Cristina</first><last>España-Bonet</last></author>
       <pages>8596–8611</pages>
       <abstract>Traditional hand-crafted linguistically-informed features have often been used for distinguishing between translated and original non-translated texts. By contrast, to date, neural architectures without manual feature engineering have been less explored for this task. In this work, we (i) compare the traditional feature-engineering-based approach to the feature-learning-based one and (ii) analyse the neural architectures in order to investigate how well the hand-crafted features explain the variance in the neural models’ predictions. We use pre-trained neural word embeddings, as well as several end-to-end neural architectures in both monolingual and multilingual settings and compare them to feature-engineering-based SVM classifiers. We show that (i) neural architectures outperform other approaches by more than 20 accuracy points, with the BERT-based model performing the best in both the monolingual and multilingual settings; (ii) while many individual hand-crafted translationese features correlate with neural model predictions, feature importance analysis shows that the most important features for neural and classical architectures differ; and (iii) our multilingual experiments provide empirical evidence for translationese universals across languages.</abstract>
@@ -9286,9 +9286,9 @@
       <author><first>Ran</first><last>Wang</last></author>
       <author><first>Xi’ao</first><last>Su</last></author>
       <author><first>Siyu</first><last>Long</last></author>
-      <author><first>Xinyu</first><last>Dai</last></author>
+      <author id="xinyu-dai"><first>Xinyu</first><last>Dai</last></author>
       <author><first>Shujian</first><last>Huang</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
       <pages>8633–8646</pages>
       <abstract>Large-scale multi-label text classification (LMTC) tasks often face long-tailed label distributions, where many labels have few or even no training instances. Although current methods can exploit prior knowledge to handle these few/zero-shot labels, they neglect the meta-knowledge contained in the dataset that can guide models to learn with few samples. In this paper, for the first time, this problem is addressed from a meta-learning perspective. However, the simple extension of meta-learning approaches to multi-label classification is sub-optimal for LMTC tasks due to long-tailed label distribution and coexisting of few- and zero-shot scenarios. We propose a meta-learning approach named META-LMTC. Specifically, it constructs more faithful and more diverse tasks according to well-designed sampling strategies and directly incorporates the objective of adapting to new low-resource tasks into the meta-learning phase. Extensive experiments show that META-LMTC achieves state-of-the-art performance against strong baselines and can still enhance powerful BERTlike models.</abstract>
       <url hash="7e5317de">2021.emnlp-main.679</url>
@@ -9337,7 +9337,7 @@
       <title><fixed-case>ST</fixed-case>a<fixed-case>CK</fixed-case>: Sentence Ordering with Temporal Commonsense Knowledge</title>
       <author><first>Deepanway</first><last>Ghosal</last></author>
       <author><first>Navonil</first><last>Majumder</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <author><first>Soujanya</first><last>Poria</last></author>
       <pages>8676–8686</pages>
       <abstract>Sentence order prediction is the task of finding the correct order of sentences in a randomly ordered document. Correctly ordering the sentences requires an understanding of coherence with respect to the chronological sequence of events described in the text. Document-level contextual understanding and commonsense knowledge centered around these events are often essential in uncovering this coherence and predicting the exact chronological order. In this paper, we introduce STaCK — a framework based on graph neural networks and temporal commonsense knowledge to model global information and predict the relative order of sentences. Our graph network accumulates temporal evidence using knowledge of ‘past’ and ‘future’ and formulates sentence ordering as a constrained edge classification problem. We report results on five different datasets, and empirically show that the proposed method is naturally suitable for order prediction. The implementation of this work is available at: <url>https://github.com/declare-lab/sentence-ordering</url>.</abstract>
@@ -9348,7 +9348,7 @@
     </paper>
     <paper id="684">
       <title>Preventing Author Profiling through Zero-Shot Multilingual Back-Translation</title>
-      <author><first>David Ifeoluwa</first><last>Adelani</last></author>
+      <author id="david-ifeoluwa-adelani"><first>David Ifeoluwa</first><last>Adelani</last></author>
       <author><first>Miaoran</first><last>Zhang</last></author>
       <author><first>Xiaoyu</first><last>Shen</last></author>
       <author><first>Ali</first><last>Davody</last></author>
@@ -9365,7 +9365,7 @@
       <title><fixed-case>C</fixed-case>ode<fixed-case>T</fixed-case>5: Identifier-aware Unified Pre-trained Encoder-Decoder Models for Code Understanding and Generation</title>
       <author><first>Yue</first><last>Wang</last></author>
       <author><first>Weishi</first><last>Wang</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <author><first>Steven C.H.</first><last>Hoi</last></author>
       <pages>8696–8708</pages>
       <abstract>Pre-trained models for Natural Languages (NL) like BERT and GPT have been recently shown to transfer well to Programming Languages (PL) and largely benefit a broad set of code-related tasks. Despite their success, most current methods either rely on an encoder-only (or decoder-only) pre-training that is suboptimal for generation (resp. understanding) tasks or process the code snippet in the same way as NL, neglecting the special characteristics of PL such as token types. We present CodeT5, a unified pre-trained encoder-decoder Transformer model that better leverages the code semantics conveyed from the developer-assigned identifiers. Our model employs a unified framework to seamlessly support both code understanding and generation tasks and allows for multi-task learning. Besides, we propose a novel identifier-aware pre-training task that enables the model to distinguish which code tokens are identifiers and to recover them when they are masked. Furthermore, we propose to exploit the user-written code comments with a bimodal dual generation task for better NL-PL alignment. Comprehensive experiments show that CodeT5 significantly outperforms prior methods on understanding tasks such as code defect detection and clone detection, and generation tasks across various directions including PL-NL, NL-PL, and PL-PL. Further analysis reveals that our model can better capture semantic information from code. Our code and pre-trained models are released at <url>https://github.com/salesforce/CodeT5</url>.</abstract>
@@ -9525,7 +9525,7 @@
       <title><fixed-case>B</fixed-case>elief<fixed-case>B</fixed-case>ank: Adding Memory to a Pre-Trained Language Model for a Systematic Notion of Belief</title>
       <author><first>Nora</first><last>Kassner</last></author>
       <author><first>Oyvind</first><last>Tafjord</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <author><first>Peter</first><last>Clark</last></author>
       <pages>8849–8861</pages>
       <abstract>Although pretrained language models (PTLMs) contain significant amounts of world knowledge, they can still produce inconsistent answers to questions when probed, even after specialized training. As a result, it can be hard to identify what the model actually “believes” about the world, making it susceptible to inconsistent behavior and simple errors. Our goal is to reduce these problems. Our approach is to embed a PTLM in a broader system that also includes an evolving, symbolic memory of beliefs – a BeliefBank – that records but then may modify the raw PTLM answers. We describe two mechanisms to improve belief consistency in the overall system. First, a reasoning component – a weighted MaxSAT solver – revises beliefs that significantly clash with others. Second, a feedback component issues future queries to the PTLM using known beliefs as context. We show that, in a controlled experimental setting, these two mechanisms result in more consistent beliefs in the overall system, improving both the accuracy and consistency of its answers over time. This is significant as it is a first step towards PTLM-based architectures with a systematic notion of belief, enabling them to construct a more coherent picture of the world, and improve over time without model retraining.</abstract>
@@ -9549,7 +9549,7 @@
     <paper id="699">
       <title><fixed-case>I</fixed-case>ndo<fixed-case>NLG</fixed-case>: Benchmark and Resources for Evaluating <fixed-case>I</fixed-case>ndonesian Natural Language Generation</title>
       <author><first>Samuel</first><last>Cahyawijaya</last></author>
-      <author><first>Genta Indra</first><last>Winata</last></author>
+      <author id="genta-indra-winata"><first>Genta Indra</first><last>Winata</last></author>
       <author><first>Bryan</first><last>Wilie</last></author>
       <author><first>Karissa</first><last>Vincentio</last></author>
       <author><first>Xiaohong</first><last>Li</last></author>
@@ -9609,7 +9609,7 @@
     </paper>
     <paper id="703">
       <title>What happens if you treat ordinal ratings as interval data? Human evaluations in <fixed-case>NLP</fixed-case> are even more under-powered than you think</title>
-      <author><first>David M.</first><last>Howcroft</last></author>
+      <author id="david-m-howcroft"><first>David M.</first><last>Howcroft</last></author>
       <author><first>Verena</first><last>Rieser</last></author>
       <pages>8932–8939</pages>
       <abstract>Previous work has shown that human evaluations in NLP are notoriously under-powered. Here, we argue that there are two common factors which make this problem even worse: NLP studies usually (a) treat ordinal data as interval data and (b) operate under high variance settings while the differences they are hoping to detect are often subtle. We demonstrate through simulation that ordinal mixed effects models are better able to detect small differences between models, especially in high variance settings common in evaluations of generated texts. We release tools for researchers to conduct their own power analysis and test their assumptions. We also make recommendations for improving statistical power.</abstract>
@@ -9704,7 +9704,7 @@
       <title>Cross-Domain Label-Adaptive Stance Detection</title>
       <author><first>Momchil</first><last>Hardalov</last></author>
       <author><first>Arnav</first><last>Arora</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Isabelle</first><last>Augenstein</last></author>
       <pages>9011–9028</pages>
       <abstract>Stance detection concerns the classification of a writer’s viewpoint towards a target. There are different task variants, e.g., stance of a tweet vs. a full article, or stance with respect to a claim vs. an (implicit) topic. Moreover, task definitions vary, which includes the label inventory, the data collection, and the annotation protocol. All these aspects hinder cross-domain studies, as they require changes to standard domain adaptation approaches. In this paper, we perform an in-depth analysis of 16 stance detection datasets, and we explore the possibility for cross-domain learning from them. Moreover, we propose an end-to-end unsupervised framework for out-of-domain prediction of unseen, user-defined labels. In particular, we combine domain adaptation techniques such as mixture of experts and domain-adversarial training with label embeddings, and we demonstrate sizable performance gains over strong baselines, both (i) in-domain, i.e., for seen targets, and (ii) out-of-domain, i.e., for unseen targets. Finally, we perform an exhaustive analysis of the cross-domain results, and we highlight the important factors influencing the model performance.</abstract>
@@ -9730,7 +9730,7 @@
     <paper id="712">
       <title>Distilling Relation Embeddings from Pretrained Language Models</title>
       <author><first>Asahi</first><last>Ushio</last></author>
-      <author><first>Jose</first><last>Camacho-Collados</last></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></author>
       <author><first>Steven</first><last>Schockaert</last></author>
       <pages>9044–9062</pages>
       <abstract>Pre-trained language models have been found to capture a surprisingly rich amount of lexical knowledge, ranging from commonsense properties of everyday concepts to detailed factual knowledge about named entities. Among others, this makes it possible to distill high-quality word vectors from pre-trained language models. However, it is currently unclear to what extent it is possible to distill relation embeddings, i.e. vectors that characterize the relationship between two words. Such relation embeddings are appealing because they can, in principle, encode relational knowledge in a more fine-grained way than is possible with knowledge graphs. To obtain relation embeddings from a pre-trained language model, we encode word pairs using a (manually or automatically generated) prompt, and we fine-tune the language model such that relationally similar word pairs yield similar output vectors. We find that the resulting relation embeddings are highly competitive on analogy (unsupervised) and relation classification (supervised) benchmarks, even without any task-specific fine-tuning. Source code to reproduce our experimental results and the model checkpoints are available in the following repository: <url>https://github.com/asahi417/relbert</url></abstract>
@@ -9742,7 +9742,7 @@
     <paper id="713">
       <title>Avoiding Inference Heuristics in Few-shot Prompt-based Finetuning</title>
       <author><first>Prasetya</first><last>Utama</last></author>
-      <author><first>Nafise Sadat</first><last>Moosavi</last></author>
+      <author id="nafise-sadat-moosavi"><first>Nafise Sadat</first><last>Moosavi</last></author>
       <author><first>Victor</first><last>Sanh</last></author>
       <author><first>Iryna</first><last>Gurevych</last></author>
       <pages>9063–9074</pages>
@@ -9755,7 +9755,7 @@
     <paper id="714">
       <title>A Differentiable Relaxation of Graph Segmentation and Alignment for <fixed-case>AMR</fixed-case> Parsing</title>
       <author><first>Chunchuan</first><last>Lyu</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <author><first>Ivan</first><last>Titov</last></author>
       <pages>9075–9091</pages>
       <abstract>Abstract Meaning Representations (AMR) are a broad-coverage semantic formalism which represents sentence meaning as a directed acyclic graph. To train most AMR parsers, one needs to segment the graph into subgraphs and align each such subgraph to a word in a sentence; this is normally done at preprocessing, relying on hand-crafted rules. In contrast, we treat both alignment and segmentation as latent variables in our model and induce them as part of end-to-end training. As marginalizing over the structured latent variables is infeasible, we use the variational autoencoding framework. To ensure end-to-end differentiable optimization, we introduce a differentiable relaxation of the segmentation and alignment problems. We observe that inducing segmentation yields substantial gains over using a ‘greedy’ segmentation heuristic. The performance of our method also approaches that of a model that relies on the segmentation rules of Lyu and Titov (2018), which were hand-crafted to handle individual AMR constructions.</abstract>
@@ -9782,7 +9782,7 @@
       <author><first>Sourav</first><last>Dutta</last></author>
       <author><first>Haytham</first><last>Assem</last></author>
       <author><first>Theodorus</first><last>Fransen</last></author>
-      <author><first>John P.</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></author>
       <pages>9099–9113</pages>
       <abstract>Multilingual sentence embeddings capture rich semantic information not only for measuring similarity between texts but also for catering to a broad range of downstream cross-lingual NLP tasks. State-of-the-art multilingual sentence embedding models require large parallel corpora to learn efficiently, which confines the scope of these models. In this paper, we propose a novel sentence embedding framework based on an unsupervised loss function for generating effective multilingual sentence embeddings, eliminating the need for parallel corpora. We capture semantic similarity and relatedness between sentences using a multi-task loss function for training a dual encoder model mapping different languages onto the same vector space. We demonstrate the efficacy of an unsupervised as well as a weakly supervised variant of our framework on STS, BUCC and Tatoeba benchmark tasks. The proposed unsupervised sentence embedding framework outperforms even supervised state-of-the-art methods for certain under-resourced languages on the Tatoeba dataset and on a monolingual benchmark. Further, we show enhanced zero-shot learning capabilities for more than 30 languages, with the model being trained on only 13 languages. Our model can be extended to a wide range of languages from any language family, as it overcomes the requirement of parallel corpora for training.</abstract>
       <url hash="3d526338">2021.emnlp-main.716</url>
@@ -9792,7 +9792,7 @@
     </paper>
     <paper id="717">
       <title><fixed-case>NB</fixed-case>-<fixed-case>MLM</fixed-case>: Efficient Domain Adaptation of Masked Language Models for Sentiment Analysis</title>
-      <author><first>Nikolay</first><last>Arefyev</last></author>
+      <author id="nikolay-arefyev"><first>Nikolay</first><last>Arefyev</last></author>
       <author><first>Dmitrii</first><last>Kharchev</last></author>
       <author><first>Artem</first><last>Shelmanov</last></author>
       <pages>9114–9124</pages>
@@ -9806,7 +9806,7 @@
       <title>Revisiting Self-training for Few-shot Learning of Language Model</title>
       <author><first>Yiming</first><last>Chen</last></author>
       <author><first>Yan</first><last>Zhang</last></author>
-      <author id="chen-zhang"><first>Chen</first><last>Zhang</last></author>
+      <author><first>Chen</first><last>Zhang</last></author>
       <author><first>Grandee</first><last>Lee</last></author>
       <author><first>Ran</first><last>Cheng</last></author>
       <author><first>Haizhou</first><last>Li</last></author>
@@ -9937,7 +9937,7 @@
     </paper>
     <paper id="728">
       <title>Towards Label-Agnostic Emotion Embeddings</title>
-      <author><first>Sven</first><last>Buechel</last></author>
+      <author id="sven-buechel"><first>Sven</first><last>Buechel</last></author>
       <author><first>Luise</first><last>Modersohn</last></author>
       <author><first>Udo</first><last>Hahn</last></author>
       <pages>9231–9249</pages>
@@ -9974,7 +9974,7 @@
     <paper id="731">
       <title><fixed-case>PASTE</fixed-case>: A Tagging-Free Decoding Framework Using Pointer Networks for Aspect Sentiment Triplet Extraction</title>
       <author><first>Rajdeep</first><last>Mukherjee</last></author>
-      <author><first>Tapas</first><last>Nayak</last></author>
+      <author id="tapas-nayak"><first>Tapas</first><last>Nayak</last></author>
       <author><first>Yash</first><last>Butala</last></author>
       <author><first>Sourangshu</first><last>Bhattacharya</last></author>
       <author><first>Pawan</first><last>Goyal</last></author>
@@ -10038,7 +10038,7 @@
     <paper id="736">
       <title>Looking for Confirmations: An Effective and Human-Like Visual Dialogue Strategy</title>
       <author><first>Alberto</first><last>Testoni</last></author>
-      <author><first>Raffaella</first><last>Bernardi</last></author>
+      <author id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last></author>
       <pages>9330–9338</pages>
       <abstract>Generating goal-oriented questions in Visual Dialogue tasks is a challenging and longstanding problem. State-Of-The-Art systems are shown to generate questions that, although grammatically correct, often lack an effective strategy and sound unnatural to humans. Inspired by the cognitive literature on information search and cross-situational word learning, we design Confirm-it, a model based on a beam search re-ranking algorithm that guides an effective goal-oriented strategy by asking questions that confirm the model’s conjecture about the referent. We take the GuessWhat?! game as a case-study. We show that dialogues generated by Confirm-it are more natural and effective than beam search decoding without re-ranking.</abstract>
       <url hash="c24b5e9c">2021.emnlp-main.736</url>
@@ -10051,7 +10051,7 @@
       <author><first>Yingzhu</first><last>Zhao</last></author>
       <author><first>Chongjia</first><last>Ni</last></author>
       <author><first>Cheung-Chi</first><last>Leung</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <author><first>Eng Siong</first><last>Chng</last></author>
       <author><first>Bin</first><last>Ma</last></author>
       <pages>9339–9349</pages>
@@ -10136,7 +10136,7 @@
     <paper id="744">
       <title>Enriching and Controlling Global Semantics for Text Summarization</title>
       <author><first>Thong</first><last>Nguyen</last></author>
-      <author><first>Anh Tuan</first><last>Luu</last></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last></author>
       <author><first>Truc</first><last>Lu</last></author>
       <author><first>Tho</first><last>Quan</last></author>
       <pages>9443–9456</pages>
@@ -10206,7 +10206,7 @@
       <author><first>Fuli</first><last>Luo</last></author>
       <author><first>Zhiyuan</first><last>Zhang</last></author>
       <author><first>Chuanqi</first><last>Tan</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <author><first>Songfang</first><last>Huang</last></author>
       <author><first>Fei</first><last>Huang</last></author>
       <pages>9514–9528</pages>
@@ -10306,7 +10306,7 @@
       <author><first>Chen</first><last>Zhao</last></author>
       <author><first>Chenyan</first><last>Xiong</last></author>
       <author><first>Jordan</first><last>Boyd-Graber</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <pages>9612–9622</pages>
       <abstract>Open-domain question answering answers a question based on evidence retrieved from a large corpus. State-of-the-art neural approaches require intermediate evidence annotations for training. However, such intermediate annotations are expensive, and methods that rely on them cannot transfer to the more common setting, where only question–answer pairs are available. This paper investigates whether models can learn to find evidence from a large corpus, with only distant supervision from answer labels for model training, thereby generating no additional annotation cost. We introduce a novel approach (DistDR) that iteratively improves over a weak retriever by alternately finding evidence from the up-to-date model and encouraging the model to learn the most likely evidence. Without using any evidence labels, DistDR is on par with fully-supervised state-of-the-art methods on both multi-hop and single-hop QA benchmarks. Our analysis confirms that DistDR finds more accurate evidence over iterations, which leads to model improvements. The code is available at <url>https://github.com/henryzhao5852/DistDR</url>.</abstract>
       <url hash="f69c33b4">2021.emnlp-main.756</url>
@@ -10340,7 +10340,7 @@
     <paper id="759">
       <title>Numerical reasoning in machine reading comprehension tasks: are we there yet?</title>
       <author><first>Hadeel</first><last>Al-Negheimish</last></author>
-      <author><first>Pranava</first><last>Madhyastha</last></author>
+      <author id="pranava-swaroop-madhyastha"><first>Pranava</first><last>Madhyastha</last></author>
       <author><first>Alessandra</first><last>Russo</last></author>
       <pages>9643–9649</pages>
       <abstract>Numerical reasoning based machine reading comprehension is a task that involves reading comprehension along with using arithmetic operations such as addition, subtraction, sorting and counting. The DROP benchmark (Dua et al., 2019) is a recent dataset that has inspired the design of NLP models aimed at solving this task. The current standings of these models in the DROP leaderboard, over standard metrics, suggests that the models have achieved near-human performance. However, does this mean that these models have learned to reason? In this paper, we present a controlled study on some of the top-performing model architectures for the task of numerical reasoning. Our observations suggest that the standard metrics are incapable of measuring progress towards such tasks.</abstract>
@@ -10594,7 +10594,7 @@
       <author><first>Jiafeng</first><last>Guo</last></author>
       <author><first>Zixuan</first><last>Li</last></author>
       <author><first>Xiaolong</first><last>Jin</last></author>
-      <author><first>Xueqi</first><last>Cheng</last></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last></author>
       <pages>9869–9878</pages>
       <abstract>Scripts are structured sequences of events together with the participants, which are extracted from the texts. Script event prediction aims to predict the subsequent event given the historical events in the script. Two kinds of information facilitate this task, namely, the event-level information and the script-level information. At the event level, existing studies view an event as a verb with its participants, while neglecting other useful properties, such as the state of the participants. At the script level, most existing studies only consider a single event sequence corresponding to one common protagonist. In this paper, we propose a Transformer-based model, called MCPredictor, which integrates deep event-level and script-level information for script event prediction. At the event level, MCPredictor utilizes the rich information in the text to obtain more comprehensive event semantic representations. At the script-level, it considers multiple event sequences corresponding to different participants of the subsequent event. The experimental results on the widely-used New York Times corpus demonstrate the effectiveness and superiority of the proposed model.</abstract>
       <url hash="1df22a15">2021.emnlp-main.777</url>
@@ -10663,7 +10663,7 @@
     <paper id="783">
       <title>Identifying Morality Frames in Political Tweets using Relational Learning</title>
       <author><first>Shamik</first><last>Roy</last></author>
-      <author><first>Maria Leonor</first><last>Pacheco</last></author>
+      <author id="maria-leonor-pacheco"><first>Maria Leonor</first><last>Pacheco</last></author>
       <author><first>Dan</first><last>Goldwasser</last></author>
       <pages>9939–9958</pages>
       <abstract>Extracting moral sentiment from text is a vital component in understanding public opinion, social movements, and policy decisions. The Moral Foundation Theory identifies five moral foundations, each associated with a positive and negative polarity. However, moral sentiment is often motivated by its targets, which can correspond to individuals or collective entities. In this paper, we introduce morality frames, a representation framework for organizing moral attitudes directed at different entities, and come up with a novel and high-quality annotated dataset of tweets written by US politicians. Then, we propose a relational learning model to predict moral attitudes towards entities and moral foundations jointly. We do qualitative and quantitative evaluations, showing that moral sentiment towards entities differs highly across political ideologies.</abstract>
@@ -10715,7 +10715,7 @@
       <author><first>Jiseon</first><last>Kim</last></author>
       <author><first>Elden</first><last>Griggs</last></author>
       <author><first>In Song</first><last>Kim</last></author>
-      <author><first>Alice</first><last>Oh</last></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last></author>
       <pages>10048–10064</pages>
       <abstract>Bill writing is a critical element of representative democracy. However, it is often overlooked that most legislative bills are derived, or even directly copied, from other bills. Despite the significance of bill-to-bill linkages for understanding the legislative process, existing approaches fail to address semantic similarities across bills, let alone reordering or paraphrasing which are prevalent in legal document writing. In this paper, we overcome these limitations by proposing a 5-class classification task that closely reflects the nature of the bill generation process. In doing so, we construct a human-labeled dataset of 4,721 bill-to-bill relationships at the subsection-level and release this annotated dataset to the research community. To augment the dataset, we generate synthetic data with varying degrees of similarity, mimicking the complex bill writing process. We use BERT variants and apply multi-stage training, sequentially fine-tuning our models with synthetic and human-labeled datasets. We find that the predictive performance significantly improves when training with both human-labeled and synthetic data. Finally, we apply our trained model to infer section- and bill-level similarities. Our analysis shows that the proposed methodology successfully captures the similarities across legal documents at various levels of aggregation.</abstract>
       <url hash="5fb4ee9e">2021.emnlp-main.787</url>
@@ -10743,7 +10743,7 @@
       <author><first>Pranav Jeevan</first><last>P</last></author>
       <author><first>Prerak</first><last>Gandhi</last></author>
       <author><first>Diptesh</first><last>Kanojia</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>10073–10079</pages>
       <abstract>Computational Humour (CH) has attracted the interest of Natural Language Processing and Computational Linguistics communities. Creating datasets for automatic measurement of humour quotient is difficult due to multiple possible interpretations of the content. In this work, we create a multi-modal humour-annotated dataset (~40 hours) using stand-up comedy clips. We devise a novel scoring mechanism to annotate the training data with a humour quotient score using the audience’s laughter. The normalized duration (laughter duration divided by the clip duration) of laughter in each clip is used to compute this humour coefficient score on a five-point scale (0-4). This method of scoring is validated by comparing with manually annotated scores, wherein a quadratic weighted kappa of 0.6 is obtained. We use this dataset to train a model that provides a ‘funniness’ score, on a five-point scale, given the audio and its corresponding text. We compare various neural language models for the task of humour-rating and achieve an accuracy of 0.813 in terms of Quadratic Weighted Kappa (QWK). Our ‘Open Mic’ dataset is released for further research along with the code.</abstract>
       <url hash="06308fa1">2021.emnlp-main.789</url>
@@ -10875,7 +10875,7 @@
       <author><first>Raksha</first><last>Shenoy</last></author>
       <author><first>Nico</first><last>Herbig</last></author>
       <author><first>Antonio</first><last>Krüger</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>10173–10185</pages>
       <abstract>Compared to fully manual translation, post-editing (PE) machine translation (MT) output can save time and reduce errors. Automatic word-level quality estimation (QE) aims to predict the correctness of words in MT output and holds great promise to aid PE by flagging problematic output. Quality of QE is crucial, as incorrect QE might lead to translators missing errors or wasting time on already correct MT output. Achieving accurate automatic word-level QE is very hard, and it is currently not known (i) at what quality threshold QE is actually beginning to be useful for human PE, and (ii), how to best present word-level QE information to translators. In particular, should word-level QE visualization indicate uncertainty of the QE model or not? In this paper, we address both research questions with real and simulated word-level QE, visualizations, and user studies, where time, subjective ratings, and quality of the final translations are assessed. Results show that current word-level QE models are not yet good enough to support PE. Instead, quality levels of &gt; 80% F1 are required. For helpful quality levels, a visualization reflecting the uncertainty of the QE model is preferred. Our analysis further shows that speed gains achieved through QE are not merely a result of blindly trusting the QE system, but that the quality of the final translations also improves. The threshold results from the paper establish a quality goal for future word-level QE research.</abstract>
       <url hash="c3200638">2021.emnlp-main.799</url>
@@ -10901,7 +10901,7 @@
       <title>Neural Machine Translation Quality and Post-Editing Performance</title>
       <author><first>Vilém</first><last>Zouhar</last></author>
       <author><first>Martin</first><last>Popel</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <author><first>Aleš</first><last>Tamchyna</last></author>
       <pages>10204–10214</pages>
       <abstract>We test the natural expectation that using MT in professional translation saves human processing time. The last such study was carried out by Sanchez-Torron and Koehn (2016) with phrase-based MT, artificially reducing the translation quality. In contrast, we focus on neural MT (NMT) of high quality, which has become the state-of-the-art approach since then and also got adopted by most translation companies. Through an experimental study involving over 30 professional translators for English -&gt; Czech translation, we examine the relationship between NMT performance and post-editing time and quality. Across all models, we found that better MT systems indeed lead to fewer changes in the sentences in this industry setting. The relation between system quality and post-editing time is however not straightforward and, contrary to the results on phrase-based MT, BLEU is definitely not a stable predictor of the time or final output quality.</abstract>
@@ -10946,7 +10946,7 @@
       <title><fixed-case>M</fixed-case>easuring Association Between Labels and Free-Text Rationales</title>
       <author><first>Sarah</first><last>Wiegreffe</last></author>
       <author><first>Ana</first><last>Marasović</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>10266–10284</pages>
       <abstract>In interpretable NLP, we require faithful rationales that reflect the model’s decision-making process for an explained instance. While prior work focuses on extractive rationales (a subset of the input words), we investigate their less-studied counterpart: free-text natural language rationales. We demonstrate that *pipelines*, models for faithful rationalization on information-extraction style tasks, do not work as well on “reasoning” tasks requiring free-text rationales. We turn to models that *jointly* predict and rationalize, a class of widely used high-performance models for free-text rationalization. We investigate the extent to which the labels and rationales predicted by these models are associated, a necessary property of faithful explanation. Via two tests, *robustness equivalence* and *feature importance agreement*, we find that state-of-the-art T5-based joint models exhibit desirable properties for explaining commonsense question-answering and natural language inference, indicating their potential for producing faithful free-text rationales.</abstract>
       <url hash="3d582512">2021.emnlp-main.804</url>
@@ -10985,8 +10985,8 @@
       <title>Rationales for Sequential Predictions</title>
       <author><first>Keyon</first><last>Vafa</last></author>
       <author><first>Yuntian</first><last>Deng</last></author>
-      <author><first>David</first><last>Blei</last></author>
-      <author><first>Alexander</first><last>Rush</last></author>
+      <author id="david-blei"><first>David</first><last>Blei</last></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last></author>
       <pages>10314–10332</pages>
       <abstract>Sequence models are a critical component of modern NLP systems, but their predictions are difficult to explain. We consider model explanations though rationales, subsets of context that can explain individual model predictions. We find sequential rationales by solving a combinatorial optimization: the best rationale is the smallest subset of input tokens that would predict the same output as the full sequence. Enumerating all subsets is intractable, so we propose an efficient greedy algorithm to approximate this objective. The algorithm, which is called greedy rationalization, applies to any model. For this approach to be effective, the model should form compatible conditional distributions when making predictions on incomplete subsets of the context. This condition can be enforced with a short fine-tuning step. We study greedy rationalization on language modeling and machine translation. Compared to existing baselines, greedy rationalization is best at optimizing the sequential objective and provides the most faithful rationales. On a new dataset of annotated sequential rationales, greedy rationales are most similar to human rationales.</abstract>
       <url hash="41a62020">2021.emnlp-main.807</url>
@@ -11043,7 +11043,7 @@
       <author><first>Gaetano</first><last>Rossiello</last></author>
       <author><first>Nandana</first><last>Mihindukulasooriya</last></author>
       <author><first>Sugato</first><last>Bagchi</last></author>
-      <author><first>Alfio</first><last>Gliozzo</last></author>
+      <author id="alfio-gliozzo"><first>Alfio</first><last>Gliozzo</last></author>
       <pages>10379–10394</pages>
       <abstract>Noun phrases and Relation phrases in open knowledge graphs are not canonicalized, leading to an explosion of redundant and ambiguous subject-relation-object triples. Existing approaches to solve this problem take a two-step approach. First, they generate embedding representations for both noun and relation phrases, then a clustering algorithm is used to group them using the embeddings as features. In this work, we propose Canonicalizing Using Variational AutoEncoders and Side Information (CUVA), a joint model to learn both embeddings and cluster assignments in an end-to-end approach, which leads to a better vector representation for the noun and relation phrases. Our evaluation over multiple benchmarks shows that CUVA outperforms the existing state-of-the-art approaches. Moreover, we introduce CanonicNell, a novel dataset to evaluate entity canonicalization systems.</abstract>
       <url hash="339a40ac">2021.emnlp-main.811</url>
@@ -11089,7 +11089,7 @@
       <author><first>Ahmed</first><last>El-Kishky</last></author>
       <author><first>Adithya</first><last>Renduchintala</last></author>
       <author><first>James</first><last>Cross</last></author>
-      <author><first>Francisco</first><last>Guzmán</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzmán</last></author>
       <author><first>Philipp</first><last>Koehn</last></author>
       <pages>10424–10430</pages>
       <abstract>Cross-lingual named-entity lexica are an important resource to multilingual NLP tasks such as machine translation and cross-lingual wikification. While knowledge bases contain a large number of entities in high-resource languages such as English and French, corresponding entities for lower-resource languages are often missing. To address this, we propose Lexical-Semantic-Phonetic Align (LSP-Align), a technique to automatically mine cross-lingual entity lexica from mined web data. We demonstrate LSP-Align outperforms baselines at extracting cross-lingual entity pairs and mine 164 million entity pairs from 120 different languages aligned with English. We release these cross-lingual entity pairs along with the massively multilingual tagged named entity corpus as a resource to the NLP community.</abstract>
@@ -11207,7 +11207,7 @@
     <paper id="823">
       <title>A Root of a Problem: Optimizing Single-Root Dependency Parsing</title>
       <author><first>Miloš</first><last>Stanojević</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <pages>10540–10557</pages>
       <abstract>We describe two approaches to single-root dependency parsing that yield significant speed ups in such parsing. One approach has been previously used in dependency parsers in practice, but remains undocumented in the parsing literature, and is considered a heuristic. We show that this approach actually finds the optimal dependency tree. The second approach relies on simple reweighting of the inference graph being input to the dependency parser and has an optimal running time. Here, we again show that this approach is fully correct and identifies the highest-scoring parse tree. Our experiments demonstrate a manyfold speed up compared to a previous graph-based state-of-the-art parser without any loss in accuracy or optimality.</abstract>
       <url hash="75b9148b">2021.emnlp-main.823</url>
@@ -11242,7 +11242,7 @@
     </paper>
     <paper id="826">
       <title>A New Representation for Span-based <fixed-case>CCG</fixed-case> Parsing</title>
-      <author><first>Yoshihide</first><last>Kato</last></author>
+      <author id="yoshihide-kato"><first>Yoshihide</first><last>Kato</last></author>
       <author><first>Shigeki</first><last>Matsubara</last></author>
       <pages>10579–10584</pages>
       <abstract>This paper proposes a new representation for CCG derivations. CCG derivations are represented as trees whose nodes are labeled with categories strictly restricted by CCG rule schemata. This characteristic is not suitable for span-based parsing models because they predict node labels independently. In other words, span-based models may generate invalid CCG derivations that violate the rule schemata. Our proposed representation decomposes CCG derivations into several independent pieces and prevents the span-based parsing models from violating the schemata. Our experimental result shows that an off-the-shelf span-based parser with our representation is comparable with previous CCG parsers.</abstract>
@@ -11279,7 +11279,7 @@
       <author><first>François</first><last>Lagunas</last></author>
       <author><first>Ella</first><last>Charlaix</last></author>
       <author><first>Victor</first><last>Sanh</last></author>
-      <author><first>Alexander</first><last>Rush</last></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last></author>
       <pages>10619–10629</pages>
       <abstract>Pre-training has improved model accuracy for both classification and generation tasks at the cost of introducing much larger and slower models. Pruning methods have proven to be an effective way of reducing model size, whereas distillation methods are proven for speeding up inference. We introduce a block pruning approach targeting both small and fast models. Our approach extends structured methods by considering blocks of any size and integrates this structure into the movement pruning paradigm for fine-tuning. We find that this approach learns to prune out full components of the underlying model, such as attention heads. Experiments consider classification and generation tasks, yielding among other results a pruned model that is a 2.4x faster, 74% smaller BERT on SQuAD v1, with a 1% drop on F1, competitive both with distilled models in speed and pruned models in size.</abstract>
       <url hash="38185732">2021.emnlp-main.829</url>
@@ -11297,7 +11297,7 @@
       <author><first>Nikolaos</first><last>Pappas</last></author>
       <author><first>Yi</first><last>Mao</last></author>
       <author><first>Weizhu</first><last>Chen</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>10630–10643</pages>
       <abstract>Transformers have outperformed recurrent neural networks (RNNs) in natural language generation. But this comes with a signifi- cant computational cost, as the attention mechanism’s complexity scales quadratically with sequence length. Efficient transformer variants have received increasing interest in recent works. Among them, a linear-complexity recurrent variant has proven well suited for autoregressive generation. It approximates the softmax attention with randomized or heuristic feature maps, but can be difficult to train and may yield suboptimal accuracy. This work aims to convert a pretrained transformer into its efficient recurrent counterpart, improving efficiency while maintaining accuracy. Specifically, we propose a swap-then-finetune procedure: in an off-the-shelf pretrained transformer, we replace the softmax attention with its linear-complexity recurrent alternative and then finetune. With a learned feature map, our approach provides an improved tradeoff between efficiency and accuracy over the standard transformer and other recurrent variants. We also show that the finetuning process has lower training cost relative to training these recurrent variants from scratch. As many models for natural language tasks are increasingly dependent on large-scale pretrained transformers, this work presents a viable approach to improving inference efficiency without repeating the expensive pretraining process.</abstract>
       <url hash="20e04c33">2021.emnlp-main.830</url>
@@ -11336,7 +11336,7 @@
       <title><fixed-case>I</fixed-case>ndo<fixed-case>BERT</fixed-case>weet: A Pretrained Language Model for <fixed-case>I</fixed-case>ndonesian <fixed-case>T</fixed-case>witter with Effective Domain-Specific Vocabulary Initialization</title>
       <author><first>Fajri</first><last>Koto</last></author>
       <author><first>Jey Han</first><last>Lau</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>10660–10668</pages>
       <abstract>We present IndoBERTweet, the first large-scale pretrained model for Indonesian Twitter that is trained by extending a monolingually-trained Indonesian BERT model with additive domain-specific vocabulary. We focus in particular on efficient model adaptation under vocabulary mismatch, and benchmark different ways of initializing the BERT embedding layer for new word types. We find that initializing with the average BERT subword embedding makes pretraining five times faster, and is more effective than proposed methods for vocabulary adaptation in terms of extrinsic evaluation over seven Twitter-based datasets.</abstract>
       <url hash="6045b071">2021.emnlp-main.833</url>
@@ -11428,7 +11428,7 @@
       <author><first>Mohammad Javad</first><last>Hosseini</last></author>
       <author><first>Sander</first><last>Bijl de Vroe</last></author>
       <author><first>Mark</first><last>Johnson</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>10758–10768</pages>
       <abstract>Drawing inferences between open-domain natural language predicates is a necessity for true language understanding. There has been much progress in unsupervised learning of entailment graphs for this purpose. We make three contributions: (1) we reinterpret the Distributional Inclusion Hypothesis to model entailment between predicates of different valencies, like DEFEAT(Biden, Trump) entails WIN(Biden); (2) we actualize this theory by learning unsupervised Multivalent Entailment Graphs of open-domain predicates; and (3) we demonstrate the capabilities of these graphs on a novel question answering task. We show that directional entailment is more helpful for inference than non-directional similarity on questions of fine-grained semantics. We also show that drawing on evidence across valencies answers more questions than by using only the same valency evidence.</abstract>
       <url hash="15f92c08">2021.emnlp-main.840</url>
@@ -11543,7 +11543,7 @@
       <author><first>Zuchao</first><last>Li</last></author>
       <author><first>Kevin</first><last>Parnow</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Hai</first><last>Zhao</last></author>
       <pages>1–10</pages>
       <abstract>In this paper, we present <b>MiSS</b>, an assistant for multi-style simultaneous translation. Our proposed translation system has five key features: highly accurate translation, simultaneous translation, translation for multiple text styles, back-translation for translation quality evaluation, and grammatical error correction. With this system, we aim to provide a complete translation experience for machine translation users. Our design goals are high translation accuracy, real-time translation, flexibility, and measurable translation quality. Compared with the free commercial translation systems commonly used, our translation assistance system regards the machine translation application as a more complete and fully-featured tool for users. By incorporating additional features and giving the user better control over their experience, we improve translation efficiency and performance. Additionally, our assistant system combines machine translation, grammatical error correction, and interactive edits, and uses a crowdsourcing mode to collect more data for further training to improve both the machine translation and grammatical error correction models. A short video demonstrating our system is available at <url>https://www.youtube.com/watch?v=ZGCo7KtRKd8</url>.</abstract>
@@ -11580,7 +11580,7 @@
     <paper id="4">
       <title><fixed-case>T</fixed-case>rans<fixed-case>I</fixed-case>ns: Document Translation with Markup Reinsertion</title>
       <author><first>Jörg</first><last>Steffen</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>28–34</pages>
       <abstract>For many use cases, it is required that MT does not just translate raw text, but complex formatted documents (e.g. websites, slides, spreadsheets) and the result of the translation should reflect the formatting. This is challenging, as markup can be nested, apply to spans contiguous in source but non-contiguous in target etc. Here we present TransIns, a system for non-plain text document translation that builds on the Okapi framework and MT models trained with Marian NMT. We develop, implement and evaluate different strategies for reinserting markup into translated sentences using token alignments between source and target sentences. We propose a simple and effective strategy that compiles down all markup to single source tokens and transfers them to aligned target tokens. A first evaluation shows that this strategy yields highly accurate markup in the translated documents that outperforms the markup quality found in documents translated with popular translation services. We release TransIns under the MIT License as open-source software on <url>https://github.com/DFKI-MLT/TransIns</url>. An online demonstrator is available at <url>https://transins.dfki.de</url>.</abstract>
       <url hash="e85bc09f">2021.emnlp-demo.4</url>
@@ -11591,7 +11591,7 @@
     <paper id="5">
       <title><fixed-case>ET</fixed-case>: A Workstation for Querying, Editing and Evaluating Annotated Corpora</title>
       <author><first>Elvis</first><last>de Souza</last></author>
-      <author><first>Cláudia</first><last>Freitas</last></author>
+      <author id="claudia-freitas"><first>Cláudia</first><last>Freitas</last></author>
       <pages>35–41</pages>
       <abstract>In this paper we explore the functionalities of ET, a suite designed to support linguistic research and natural language processing tasks using corpora annotated in the CoNLL-U format. These goals are achieved by two integrated environments – Interrogatório, an environment for querying and editing annotated corpora, and Julgamento, an environment for assessing their quality. ET is open-source, built on different Python Web technologies and has Web demonstrations available on-line. ET has been intensively used in our research group for over two years, being the chosen framework for several linguistic and NLP-related studies conducted by its researchers.</abstract>
       <url hash="07ec1285">2021.emnlp-demo.5</url>
@@ -11696,10 +11696,10 @@
     </paper>
     <paper id="13">
       <title>Semantic Context Path Labeling for Semantic Exploration of User Reviews</title>
-      <author><first>Salah</first><last>Aït-Mokhtar</last></author>
+      <author id="salah-ait-mokhtar"><first>Salah</first><last>Aït-Mokhtar</last></author>
       <author><first>Caroline</first><last>Brun</last></author>
       <author><first>Yves</first><last>Hoppenot</last></author>
-      <author><first>Agnes</first><last>Sandor</last></author>
+      <author id="agnes-sandor"><first>Agnes</first><last>Sandor</last></author>
       <pages>106–113</pages>
       <abstract>In this paper we present a prototype demonstrator showcasing a novel method to perform semantic exploration of user reviews. The system enables effective navigation in a rich contextual semantic schema with a large number of structured classes indicating relevant information. In order to identify instances of the structured classes in the reviews, we defined a new Information Extraction task called Semantic Context Path (SCP) labeling, which simultaneously assigns types and semantic roles to entity mentions. Reviews can rapidly be explored based on the fine-grained and structured semantic classes. As a proof-of-concept, we have implemented this system for reviews on Points-of-Interest, in English and Korean.</abstract>
       <url hash="6329344c">2021.emnlp-demo.13</url>
@@ -11734,8 +11734,8 @@
       <author><first>Phillip</first><last>Lee</last></author>
       <author><first>Jeshwanth</first><last>Bheemanpally</last></author>
       <author><first>Rohan</first><last>Pandey</last></author>
-      <author><first>Adwait</first><last>Ratnaparkhi</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="adwait-ratnaparkhi"><first>Adwait</first><last>Ratnaparkhi</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <pages>124–133</pages>
       <abstract>Athena 2.0 is an Alexa Prize SocialBot that has been a finalist in the last two Alexa Prize Grand Challenges. One reason for Athena’s success is its novel dialogue management strategy, which allows it to dynamically construct dialogues and responses from component modules, leading to novel conversations with every interaction. Here we describe Athena’s system design and performance in the Alexa Prize during the 20/21 competition. A live demo of Athena as well as video recordings will provoke discussion on the state of the art in conversational AI.</abstract>
       <url hash="3c5b8a47">2021.emnlp-demo.15</url>
@@ -11792,7 +11792,7 @@
       <author><first>Jin</first><last>Zhao</last></author>
       <author><first>Nianwen</first><last>Xue</last></author>
       <author><first>Jens</first><last>Van Gysel</last></author>
-      <author><first>Jinho D.</first><last>Choi</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
       <pages>160–167</pages>
       <abstract>We present UMR-Writer, a web-based application for annotating Uniform Meaning Representations (UMR), a graph-based, cross-linguistically applicable semantic representation developed recently to support the development of interpretable natural language applications that require deep semantic analysis of texts. We present the functionalities of UMR-Writer and discuss the challenges in developing such a tool and how they are addressed.</abstract>
       <url hash="db7c4db2">2021.emnlp-demo.19</url>
@@ -11844,7 +11844,7 @@
       <author><first>Thibault</first><last>Goehringer</last></author>
       <author><first>Victor</first><last>Mustar</last></author>
       <author><first>François</first><last>Lagunas</last></author>
-      <author><first>Alexander</first><last>Rush</last></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last></author>
       <author><first>Thomas</first><last>Wolf</last></author>
       <pages>175–184</pages>
       <abstract>The scale, variety, and quantity of publicly-available NLP datasets has grown rapidly as researchers propose new tasks, larger models, and novel benchmarks. Datasets is a community library for contemporary NLP designed to support this ecosystem. Datasets aims to standardize end-user interfaces, versioning, and documentation, while providing a lightweight front-end that behaves similarly for small datasets as for internet-scale corpora. The design of the library incorporates a distributed, community-driven approach to adding datasets and documenting usage. After a year of development, the library now includes more than 650 unique datasets, has more than 250 contributors, and has helped support a variety of novel cross-dataset research projects and shared tasks. The library is available at <url>https://github.com/huggingface/datasets</url>.</abstract>
@@ -11857,7 +11857,7 @@
       <title>Summary Explorer: Visualizing the State of the Art in Text Summarization</title>
       <author><first>Shahbaz</first><last>Syed</last></author>
       <author><first>Tariq</first><last>Yousef</last></author>
-      <author><first>Khalid</first><last>Al Khatib</last></author>
+      <author id="khalid-al-khatib"><first>Khalid</first><last>Al Khatib</last></author>
       <author><first>Stefan</first><last>Jänicke</last></author>
       <author><first>Martin</first><last>Potthast</last></author>
       <pages>185–194</pages>
@@ -11906,7 +11906,7 @@
       <author><first>Tim</first><last>French</last></author>
       <author><first>Melinda</first><last>Hodkiewicz</last></author>
       <author><first>Michael</first><last>Stewart</last></author>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last></author>
+      <author><first>Wei</first><last>Liu</last></author>
       <pages>212–219</pages>
       <abstract>NLP systems are often challenged by difficulties arising from noisy, non-standard, and domain specific corpora. The task of lexical normalisation aims to standardise such corpora, but currently lacks suitable tools to acquire high-quality annotated data to support deep learning based approaches. In this paper, we present LexiClean, the first open-source web-based annotation tool for multi-task lexical normalisation. LexiClean’s main contribution is support for simultaneous in situ token-level modification and annotation that can be rapidly applied corpus wide. We demonstrate the usefulness of our tool through a case study on two sets of noisy corpora derived from the specialised-domain of industrial mining. We show that LexiClean allows for the rapid and efficient development of high-quality parallel corpora. A demo of our system is available at: <url>https://youtu.be/P7_ooKrQPDU</url>.</abstract>
       <url hash="d0a345c6">2021.emnlp-demo.25</url>
@@ -11955,7 +11955,7 @@
       <author><first>Yimeng</first><last>Sun</last></author>
       <author><first>Margrit</first><last>Betke</last></author>
       <author><first>Prakash</first><last>Ishwar</last></author>
-      <author><first>Derry Tanti</first><last>Wijaya</last></author>
+      <author id="derry-tanti-wijaya"><first>Derry Tanti</first><last>Wijaya</last></author>
       <pages>242–250</pages>
       <abstract>When journalists cover a news story, they can cover the story from multiple angles or perspectives. These perspectives are called “frames,” and usage of one frame or another may influence public perception and opinion of the issue at hand. We develop a web-based system for analyzing frames in multilingual text documents. We propose and guide users through a five-step end-to-end computational framing analysis framework grounded in media framing theory in communication research. Users can use the framework to analyze multilingual text data, starting from the exploration of frames in user’s corpora and through review of previous framing literature (step 1-3) to frame classification (step 4) and prediction (step 5). The framework combines unsupervised and supervised machine learning and leverages a state-of-the-art (SoTA) multilingual language model, which can significantly enhance frame prediction performance while requiring a considerably small sample of manual annotations. Through the interactive website, anyone can perform the proposed computational framing analysis, making advanced computational analysis available to researchers without a programming background and bridging the digital divide within the communication research discipline in particular and the academic community in general. The system is available online at <url>http://www.openframing.org</url>, via an API <url>http://www.openframing.org:5000/docs/</url>, or through our GitHub page <url>https://github.com/vibss2397/openFraming</url>.</abstract>
       <url hash="42c2be30">2021.emnlp-demo.28</url>
@@ -12089,8 +12089,8 @@
       <author><first>Troy</first><last>Feng</last></author>
       <author><first>Yusen</first><last>Zhang</last></author>
       <author><first>Tao</first><last>Yu</last></author>
-      <author><first>Ahmed Hassan</first><last>Awadallah</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="ahmed-hassan"><first>Ahmed Hassan</first><last>Awadallah</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <pages>329–338</pages>
       <abstract>Recent advances in summarization provide models that can generate summaries of higher quality. Such models now exist for a number of summarization tasks, including query-based summarization, dialogue summarization, and multi-document summarization. While such models and tasks are rapidly growing in the research field, it has also become challenging for non-experts to keep track of them. To make summarization methods more accessible to a wider audience, we develop SummerTime by rethinking the summarization task from the perspective of an NLP non-expert. SummerTime is a complete toolkit for text summarization, including various models, datasets, and evaluation metrics, for a full spectrum of summarization-related tasks. SummerTime integrates with libraries designed for NLP researchers, and enables users with easy-to-use APIs. With SummerTime, users can locate pipeline solutions and search for the best model with their own data, and visualize the differences, all with a few lines of code. We also provide explanations for models and evaluation metrics to help users understand the model behaviors and select models that best suit their needs. Our library, along with a notebook demo, is available at <url>https://github.com/Yale-LILY/SummerTime</url>.</abstract>
       <url hash="46160b2b">2021.emnlp-demo.37</url>
@@ -12156,7 +12156,7 @@
       <author><first>Fernando</first><last>Alva-Manchego</last></author>
       <author><first>Abiola</first><last>Obamuyide</last></author>
       <author><first>Amit</first><last>Gajbhiye</last></author>
-      <author><first>Frédéric</first><last>Blain</last></author>
+      <author id="frederic-blain"><first>Frédéric</first><last>Blain</last></author>
       <author><first>Marina</first><last>Fomicheva</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>382–389</pages>
@@ -12190,7 +12190,7 @@
       <author><first>Nikita</first><last>Nangia</last></author>
       <author><first>Maarten</first><last>Sap</last></author>
       <author><first>Mark</first><last>Yatskar</last></author>
-      <author><first>Samuel R.</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel R.</first><last>Bowman</last></author>
       <author><first>Yoav</first><last>Artzi</last></author>
       <pages>1–6</pages>
       <abstract>Crowdsourcing from non-experts is one of the most common approaches to collecting data and annotations in NLP. Even though it is such a fundamental tool in NLP, crowdsourcing use is largely guided by common practices and the personal experience of researchers. Developing a theory of crowdsourcing use for practical language problems remains an open challenge. However, there are various principles and practices that have proven effective in generating high quality and diverse data. This tutorial exposes NLP researchers to such data collection crowdsourcing methods and principles through a detailed discussion of a diverse set of case studies. The selection of case studies focuses on challenging settings where crowdworkers are asked to write original text or otherwise perform relatively unconstrained work. Through these case studies, we discuss in detail processes that were carefully designed to achieve data with specific properties, for example to require logical inference, grounded reasoning or conversational understanding. Each case study focuses on data collection crowdsourcing protocol details that often receive limited attention in research presentations, for example in conferences, but are critical for research success.</abstract>
@@ -12226,7 +12226,7 @@
     <paper id="4">
       <title>Multi-Domain Multilingual Question Answering</title>
       <author><first>Sebastian</first><last>Ruder</last></author>
-      <author><first>Avi</first><last>Sil</last></author>
+      <author id="avirup-sil"><first>Avi</first><last>Sil</last></author>
       <pages>17–21</pages>
       <abstract>Question answering (QA) is one of the most challenging and impactful tasks in natural language processing. Most research in QA, however, has focused on the open-domain or monolingual setting while most real-world applications deal with specific domains or languages. In this tutorial, we attempt to bridge this gap. Firstly, we introduce standard benchmarks in multi-domain and multilingual QA. In both scenarios, we discuss state-of-the-art approaches that achieve impressive performance, ranging from zero-shot transfer learning to out-of-the-box training with open-domain QA systems. Finally, we will present open research problems that this new research agenda poses such as multi-task learning, cross-lingual transfer learning, domain adaptation and training large scale pre-trained multilingual language models.</abstract>
       <url hash="906ab71f">2021.emnlp-tutorials.4</url>
diff --git a/data/xml/2021.eval4nlp.xml b/data/xml/2021.eval4nlp.xml
index f2a313bc70..939dc8c9cc 100644
--- a/data/xml/2021.eval4nlp.xml
+++ b/data/xml/2021.eval4nlp.xml
@@ -24,8 +24,8 @@
       <author><first>Hicham</first><last>El Boukkouri</last></author>
       <author><first>Cyril</first><last>Grouin</last></author>
       <author><first>Thomas</first><last>Lavergne</last></author>
-      <author><first>Patrick</first><last>Paroubek</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <pages>1–10</pages>
       <abstract>Most of the time, when dealing with a particular Natural Language Processing task, systems are compared on the basis of global statistics such as recall, precision, F1-score, etc. While such scores provide a general idea of the behavior of these systems, they ignore a key piece of information that can be useful for assessing progress and discerning remaining challenges: the relative difficulty of test instances. To address this shortcoming, we introduce the notion of differential evaluation which effectively defines a pragmatic partition of instances into gradually more difficult bins by leveraging the predictions made by a set of systems. Comparing systems along these difficulty bins enables us to produce a finer-grained analysis of their relative merits, which we illustrate on two use-cases: a comparison of systems participating in a multi-label text classification task (CLEF eHealth 2018 ICD-10 coding), and a comparison of neural models trained for biomedical entity detection (BioCreative V chemical-disease relations dataset).</abstract>
       <url hash="8ac4f987">2021.eval4nlp-1.1</url>
@@ -107,8 +107,8 @@
       <author><first>Tao</first><last>Yu</last></author>
       <author><first>Tong</first><last>Niu</last></author>
       <author><first>Yingbo</first><last>Zhou</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
-      <author><first>Xi Victoria</first><last>Lin</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
+      <author id="xi-victoria-lin"><first>Xi Victoria</first><last>Lin</last></author>
       <pages>73–83</pages>
       <abstract>The benchmark performance of cross-database semantic parsing has climbed steadily in recent years, catalyzed by the wide adoption of pre-trained language models. Yet existing work have shown that state-of-the-art cross-database semantic parsers struggle to generalize to novel user utterances, databases and query structures. To obtain transparent details on the strengths and limitation of these models, we propose a diagnostic testing approach based on controlled synthesis of canonical natural language and SQL pairs. Inspired by the CheckList, we characterize a set of essential capabilities for cross-database semantic parsing models, and detailed the method for synthesizing the corresponding test data. We evaluated a variety of high performing models using the proposed approach, and identified several non-obvious weaknesses across models (e.g. unable to correctly select many columns). Our dataset and code are released as a test suite at <url>http://github.com/hclent/BehaviorCheckingSemPar</url>.</abstract>
       <url hash="2bf0331d">2021.eval4nlp-1.8</url>
@@ -140,7 +140,7 @@
     </paper>
     <paper id="11">
       <title>Statistically Significant Detection of Semantic Shifts using Contextual Word Embeddings</title>
-      <author id="yang-liu-Helsinki"><first>Yang</first><last>Liu</last></author>
+      <author id="yang-liu-helsinki"><first>Yang</first><last>Liu</last></author>
       <author><first>Alan</first><last>Medlar</last></author>
       <author><first>Dorota</first><last>Glowacka</last></author>
       <pages>104–113</pages>
@@ -166,7 +166,7 @@
       <author><first>Ayush</first><last>Garg</last></author>
       <author><first>Sammed</first><last>Kagi</last></author>
       <author><first>Vivek</first><last>Srivastava</last></author>
-      <author id="mayank-singh"><first>Mayank</first><last>Singh</last></author>
+      <author><first>Mayank</first><last>Singh</last></author>
       <pages>123–132</pages>
       <abstract>Code-mixing is a phenomenon of mixing words and phrases from two or more languages in a single utterance of speech and text. Due to the high linguistic diversity, code-mixing presents several challenges in evaluating standard natural language generation (NLG) tasks. Various widely popular metrics perform poorly with the code-mixed NLG tasks. To address this challenge, we present a metric in- dependent evaluation pipeline MIPE that significantly improves the correlation between evaluation metrics and human judgments on the generated code-mixed text. As a use case, we demonstrate the performance of MIPE on the machine-generated Hinglish (code-mixing of Hindi and English languages) sentences from the HinGE corpus. We can extend the proposed evaluation strategy to other code-mixed language pairs, NLG tasks, and evaluation metrics with minimal to no effort.</abstract>
       <url hash="c79459a6">2021.eval4nlp-1.13</url>
@@ -179,7 +179,7 @@
       <author><first>Marcos</first><last>Treviso</last></author>
       <author><first>Nuno M.</first><last>Guerreiro</last></author>
       <author><first>Ricardo</first><last>Rei</last></author>
-      <author><first>André F. T.</first><last>Martins</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
       <pages>133–145</pages>
       <abstract>We present the joint contribution of Instituto Superior Técnico (IST) and Unbabel to the Explainable Quality Estimation (QE) shared task, where systems were submitted to two tracks: constrained (without word-level supervision) and unconstrained (with word-level supervision). For the constrained track, we experimented with several explainability methods to extract the relevance of input tokens from sentence-level QE models built on top of multilingual pre-trained transformers. Among the different tested methods, composing explanations in the form of attention weights scaled by the norm of value vectors yielded the best results. When word-level labels are used during training, our best results were obtained by using word-level predicted probabilities. We further improve the performance of our methods on the two tracks by ensembling explanation scores extracted from models trained with different pre-trained transformers, achieving strong results for in-domain and zero-shot language pairs.</abstract>
       <url hash="3e99e093">2021.eval4nlp-1.14</url>
@@ -189,7 +189,7 @@
     </paper>
     <paper id="15">
       <title>Error Identification for Machine Translation with Metric Embedding and Attention</title>
-      <author><first>Raphael</first><last>Rubino</last></author>
+      <author id="raphael-rubino"><first>Raphael</first><last>Rubino</last></author>
       <author><first>Atsushi</first><last>Fujita</last></author>
       <author><first>Benjamin</first><last>Marie</last></author>
       <pages>146–156</pages>
@@ -249,7 +249,7 @@
     <paper id="20">
       <title><fixed-case>H</fixed-case>in<fixed-case>GE</fixed-case>: A Dataset for Generation and Evaluation of Code-Mixed <fixed-case>H</fixed-case>inglish Text</title>
       <author><first>Vivek</first><last>Srivastava</last></author>
-      <author id="mayank-singh"><first>Mayank</first><last>Singh</last></author>
+      <author><first>Mayank</first><last>Singh</last></author>
       <pages>200–208</pages>
       <abstract>Text generation is a highly active area of research in the computational linguistic community. The evaluation of the generated text is a challenging task and multiple theories and metrics have been proposed over the years. Unfortunately, text generation and evaluation are relatively understudied due to the scarcity of high-quality resources in code-mixed languages where the words and phrases from multiple languages are mixed in a single utterance of text and speech. To address this challenge, we present a corpus (HinGE) for a widely popular code-mixed language Hinglish (code-mixing of Hindi and English languages). HinGE has Hinglish sentences generated by humans as well as two rule-based algorithms corresponding to the parallel Hindi-English sentences. In addition, we demonstrate the in- efficacy of widely-used evaluation metrics on the code-mixed data. The HinGE dataset will facilitate the progress of natural language generation research in code-mixed languages.</abstract>
       <url hash="bb609d2f">2021.eval4nlp-1.20</url>
@@ -262,7 +262,7 @@
       <author><first>Oskar</first><last>Wysocki</last></author>
       <author><first>Malina</first><last>Florea</last></author>
       <author><first>Dónal</first><last>Landers</last></author>
-      <author><first>André</first><last>Freitas</last></author>
+      <author id="andre-freitas"><first>André</first><last>Freitas</last></author>
       <pages>209–229</pages>
       <abstract>SemEval is the primary venue in the NLP community for the proposal of new challenges and for the systematic empirical evaluation of NLP systems. This paper provides a systematic quantitative analysis of SemEval aiming to evidence the patterns of the contributions behind SemEval. By understanding the distribution of task types, metrics, architectures, participation and citations over time we aim to answer the question on what is being evaluated by SemEval.</abstract>
       <url hash="ac0e2951">2021.eval4nlp-1.21</url>
@@ -298,7 +298,7 @@
       <title>Explainable Quality Estimation: <fixed-case>CUNI</fixed-case> <fixed-case>E</fixed-case>val4<fixed-case>NLP</fixed-case> Submission</title>
       <author><first>Peter</first><last>Polák</last></author>
       <author><first>Muskaan</first><last>Singh</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>250–255</pages>
       <abstract>This paper describes our participating system in the shared task Explainable quality estimation of 2nd Workshop on Evaluation &amp; Comparison of NLP Systems. The task of quality estimation (QE, a.k.a. reference-free evaluation) is to predict the quality of MT output at inference time without access to reference translations. In this proposed work, we first build a word-level quality estimation model, then we finetune this model for sentence-level QE. Our proposed models achieve near state-of-the-art results. In the word-level QE, we place 2nd and 3rd on the supervised Ro-En and Et-En test sets. In the sentence-level QE, we achieve a relative improvement of 8.86% (Ro-En) and 10.6% (Et-En) in terms of the Pearson correlation coefficient over the baseline model.</abstract>
       <url hash="7e01b701">2021.eval4nlp-1.24</url>
diff --git a/data/xml/2021.fever.xml b/data/xml/2021.fever.xml
index c5807ad4e2..9548033f5a 100644
--- a/data/xml/2021.fever.xml
+++ b/data/xml/2021.fever.xml
@@ -8,7 +8,7 @@
       <editor><first>Oana</first><last>Cocarascu</last></editor>
       <editor><first>Zhijiang</first><last>Guo</last></editor>
       <editor><first>Arpit</first><last>Mittal</last></editor>
-      <editor><first>Michael</first><last>Schlichtkrull</last></editor>
+      <editor id="michael-schlichtkrull"><first>Michael</first><last>Schlichtkrull</last></editor>
       <editor><first>James</first><last>Thorne</last></editor>
       <editor><first>Andreas</first><last>Vlachos</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -100,7 +100,7 @@
       <title>Verdict Inference with Claim and Retrieved Elements Using <fixed-case>R</fixed-case>o<fixed-case>BERT</fixed-case>a</title>
       <author><first>In-Zu</first><last>Gi</last></author>
       <author><first>Ting-Yu</first><last>Fang</last></author>
-      <author><first>Richard Tzong-Han</first><last>Tsai</last></author>
+      <author id="richard-tzong-han-tsai"><first>Richard Tzong-Han</first><last>Tsai</last></author>
       <pages>60–65</pages>
       <abstract>Automatic fact verification has attracted recent research attention as the increasing dissemination of disinformation on social media platforms. The FEVEROUS shared task introduces a benchmark for fact verification, in which a system is challenged to verify the given claim using the extracted evidential elements from Wikipedia documents. In this paper, we propose our 3rd place three-stage system consisting of document retrieval, element retrieval, and verdict inference for the FEVEROUS shared task. By considering the context relevance in the fact extraction and verification task, our system achieves 0.29 FEVEROUS score on the development set and 0.25 FEVEROUS score on the blind test set, both outperforming the FEVEROUS baseline.</abstract>
       <url hash="a13a8bc3">2021.fever-1.7</url>
@@ -110,7 +110,7 @@
     <paper id="8">
       <title>Stance Detection in <fixed-case>G</fixed-case>erman News Articles</title>
       <author><first>Laura</first><last>Mascarell</last></author>
-      <author><first>Tatyana</first><last>Ruzsics</last></author>
+      <author id="tatyana-ruzsics"><first>Tatyana</first><last>Ruzsics</last></author>
       <author><first>Christian</first><last>Schneebeli</last></author>
       <author><first>Philippe</first><last>Schlattner</last></author>
       <author><first>Luca</first><last>Campanella</last></author>
@@ -162,7 +162,7 @@
       <author><first>Giulio</first><last>Alfarano</last></author>
       <author><first>Khai</first><last>Nguyen</last></author>
       <author><first>Duc</first><last>Pham</last></author>
-      <author><first>Raphael</first><last>Troncy</last></author>
+      <author id="raphael-troncy"><first>Raphael</first><last>Troncy</last></author>
       <author><first>Paolo</first><last>Papotti</last></author>
       <pages>108–112</pages>
       <abstract>Computational fact-checking has gained a lot of traction in the machine learning and natural language processing communities. A plethora of solutions have been developed, but methods which leverage both structured and unstructured information to detect misinformation are of particular relevance. In this paper, we tackle the FEVEROUS (Fact Extraction and VERification Over Unstructured and Structured information) challenge which consists of an open source baseline system together with a benchmark dataset containing 87,026 verified claims. We extend this baseline model by improving the evidence retrieval module yielding the best evidence F1 score among the competitors in the challenge leaderboard while obtaining an overall FEVEROUS score of 0.20 (5th best ranked system).</abstract>
diff --git a/data/xml/2021.findings.xml b/data/xml/2021.findings.xml
index 49c13820e8..2f8c938a5a 100644
--- a/data/xml/2021.findings.xml
+++ b/data/xml/2021.findings.xml
@@ -3,7 +3,7 @@
   <volume id="acl" ingest-date="2021-07-25" type="proceedings">
     <meta>
       <booktitle>Findings of the Association for Computational Linguistics: ACL-IJCNLP 2021</booktitle>
-      <editor><first>Chengqing</first><last>Zong</last></editor>
+      <editor id="chengqing-zong"><first>Chengqing</first><last>Zong</last></editor>
       <editor><first>Fei</first><last>Xia</last></editor>
       <editor><first>Wenjie</first><last>Li</last></editor>
       <editor><first>Roberto</first><last>Navigli</last></editor>
@@ -22,7 +22,7 @@
       <title>Explainable Inference Over Grounding-Abstract Chains for Science Questions</title>
       <author><first>Mokanarangan</first><last>Thayaparan</last></author>
       <author><first>Marco</first><last>Valentino</last></author>
-      <author><first>André</first><last>Freitas</last></author>
+      <author id="andre-freitas"><first>André</first><last>Freitas</last></author>
       <pages>1–12</pages>
       <url hash="1cbc8a78">2021.findings-acl.1</url>
       <doi>10.18653/v1/2021.findings-acl.1</doi>
@@ -206,7 +206,7 @@
       <title>More than just Frequency? Demasking Unsupervised Hypernymy Prediction Methods</title>
       <author><first>Thomas</first><last>Bott</last></author>
       <author><first>Dominik</first><last>Schlechtweg</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>186–192</pages>
       <url hash="f9bcc3c9">2021.findings-acl.16</url>
       <doi>10.18653/v1/2021.findings-acl.16</doi>
@@ -231,7 +231,7 @@
       <author><first>Kazi Sajeed</first><last>Mehrab</last></author>
       <author><first>Md. Mahim Anjum</first><last>Haque</last></author>
       <author><first>Tahmid</first><last>Hasan</last></author>
-      <author><first>Wasi</first><last>Ahmad</last></author>
+      <author id="wasi-ahmad"><first>Wasi</first><last>Ahmad</last></author>
       <author><first>Anindya</first><last>Iqbal</last></author>
       <author><first>Rifat</first><last>Shahriyar</last></author>
       <pages>210–218</pages>
@@ -269,7 +269,7 @@
       <author><first>Sho</first><last>Takase</last></author>
       <author><first>Kei</first><last>Uchiumi</last></author>
       <author><first>Atsushi</first><last>Keyaki</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <pages>244–255</pages>
       <url hash="9dc0c961">2021.findings-acl.21</url>
       <doi>10.18653/v1/2021.findings-acl.21</doi>
@@ -319,7 +319,7 @@
     <paper id="25">
       <title>Better <fixed-case>C</fixed-case>hinese Sentence Segmentation with Reinforcement Learning</title>
       <author><first>Srivatsan</first><last>Srinivasan</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <pages>293–302</pages>
       <url hash="bffb0edb">2021.findings-acl.25</url>
       <doi>10.18653/v1/2021.findings-acl.25</doi>
@@ -341,7 +341,7 @@
       <title>Empirical Error Modeling Improves Robustness of Noisy Neural Sequence Labeling</title>
       <author><first>Marcin</first><last>Namysl</last></author>
       <author><first>Sven</first><last>Behnke</last></author>
-      <author><first>Joachim</first><last>Köhler</last></author>
+      <author id="joachim-kohler"><first>Joachim</first><last>Köhler</last></author>
       <pages>314–329</pages>
       <url hash="e9b0a500">2021.findings-acl.27</url>
       <attachment type="OptionalSupplementaryMaterial" hash="8c54d7ee">2021.findings-acl.27.OptionalSupplementaryMaterial.zip</attachment>
@@ -517,8 +517,8 @@
     <paper id="41">
       <title>Decoupling Adversarial Training for Fair <fixed-case>NLP</fixed-case></title>
       <author><first>Xudong</first><last>Han</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>471–477</pages>
       <url hash="27122fce">2021.findings-acl.41</url>
       <doi>10.18653/v1/2021.findings-acl.41</doi>
@@ -583,7 +583,7 @@
       <author><first>Jinghui</first><last>Qin</last></author>
       <author><first>Xiaodan</first><last>Liang</last></author>
       <author><first>Lingbo</first><last>Liu</last></author>
-      <author><first>Eric</first><last>Xing</last></author>
+      <author id="eric-xing"><first>Eric</first><last>Xing</last></author>
       <author><first>Liang</first><last>Lin</last></author>
       <pages>513–523</pages>
       <url hash="0c35cb5d">2021.findings-acl.46</url>
@@ -595,7 +595,7 @@
       <title><fixed-case>SIRE</fixed-case>: Separate Intra- and Inter-sentential Reasoning for Document-level Relation Extraction</title>
       <author><first>Shuang</first><last>Zeng</last></author>
       <author><first>Yuting</first><last>Wu</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <pages>524–534</pages>
       <url hash="1e389b3e">2021.findings-acl.47</url>
       <doi>10.18653/v1/2021.findings-acl.47</doi>
@@ -622,7 +622,7 @@
       <author><first>Shengqiong</first><last>Wu</last></author>
       <author><first>Yafeng</first><last>Ren</last></author>
       <author><first>Fei</first><last>Li</last></author>
-      <author><first>Donghong</first><last>Ji</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
       <pages>549–559</pages>
       <url hash="b16b180c">2021.findings-acl.49</url>
       <doi>10.18653/v1/2021.findings-acl.49</doi>
@@ -644,7 +644,7 @@
     <paper id="51">
       <title>Contrastive Fine-tuning Improves Robustness for Neural Rankers</title>
       <author><first>Xiaofei</first><last>Ma</last></author>
-      <author><first>Cicero</first><last>Nogueira dos Santos</last></author>
+      <author id="cicero-dos-santos"><first>Cicero</first><last>Nogueira dos Santos</last></author>
       <author><first>Andrew O.</first><last>Arnold</last></author>
       <pages>570–582</pages>
       <url hash="8f371ea7">2021.findings-acl.51</url>
@@ -665,8 +665,8 @@
     <paper id="53">
       <title><fixed-case>T</fixed-case>ell<fixed-case>M</fixed-case>e<fixed-case>W</fixed-case>hy: A Dataset for Answering Why-Questions in Narratives</title>
       <author><first>Yash Kumar</first><last>Lal</last></author>
-      <author><first>Nathanael</first><last>Chambers</last></author>
-      <author><first>Raymond</first><last>Mooney</last></author>
+      <author id="nathanael-chambers"><first>Nathanael</first><last>Chambers</last></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last></author>
       <author><first>Niranjan</first><last>Balasubramanian</last></author>
       <pages>596–610</pages>
       <url hash="8940e02d">2021.findings-acl.53</url>
@@ -699,8 +699,8 @@
       <author><first>Murilo</first><last>Gazzola</last></author>
       <author><first>Ester</first><last>Sabino</last></author>
       <author><first>Anna</first><last>Levin</last></author>
-      <author><first>Arnaldo</first><last>Candido Jr</last></author>
-      <author><first>Sandra</first><last>Aluisio</last></author>
+      <author id="arnaldo-candido-jr"><first>Arnaldo</first><last>Candido Jr</last></author>
+      <author id="sandra-aluisio"><first>Sandra</first><last>Aluisio</last></author>
       <author><first>Marcelo</first><last>Finger</last></author>
       <pages>625–633</pages>
       <url hash="a8fb9fd1">2021.findings-acl.55</url>
@@ -753,7 +753,7 @@
     <paper id="59">
       <title>Prediction or Comparison: Toward Interpretable Qualitative Reasoning</title>
       <author><first>Mucheng</first><last>Ren</last></author>
-      <author><first>Heyan</first><last>Huang</last></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last></author>
       <author><first>Yang</first><last>Gao</last></author>
       <pages>664–675</pages>
       <url hash="ee7af8df">2021.findings-acl.59</url>
@@ -791,7 +791,7 @@
       <author><first>Sang-Woo</first><last>Lee</last></author>
       <author><first>Ji-Hoon</first><last>Kim</last></author>
       <author><first>Jung-Woo</first><last>Ha</last></author>
-      <author><first>Alice</first><last>Oh</last></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last></author>
       <pages>694–704</pages>
       <url hash="88685e0f">2021.findings-acl.62</url>
       <doi>10.18653/v1/2021.findings-acl.62</doi>
@@ -817,7 +817,7 @@
       <author><first>Chao</first><last>Li</last></author>
       <author><first>Zizhen</first><last>Wang</last></author>
       <author><first>Yunbo</first><last>Cao</last></author>
-      <author><first>Heyan</first><last>Huang</last></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last></author>
       <author><first>Xian-Ling</first><last>Mao</last></author>
       <pages>716–728</pages>
       <url hash="6e50f7b7">2021.findings-acl.64</url>
@@ -841,7 +841,7 @@
       <author><first>Lei</first><last>Ji</last></author>
       <author><first>Huaishao</first><last>Luo</last></author>
       <author><first>Botian</first><last>Shi</last></author>
-      <author><first>Heyan</first><last>Huang</last></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last></author>
       <author><first>Nan</first><last>Duan</last></author>
       <author><first>Xian-Ling</first><last>Mao</last></author>
       <pages>743–752</pages>
@@ -902,7 +902,7 @@
       <title>Evaluating the Efficacy of Summarization Evaluation across Languages</title>
       <author><first>Fajri</first><last>Koto</last></author>
       <author><first>Jey Han</first><last>Lau</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>801–812</pages>
       <url hash="f47d9b4b">2021.findings-acl.71</url>
       <doi>10.18653/v1/2021.findings-acl.71</doi>
@@ -952,7 +952,7 @@
       <title>Can Cognate Prediction Be Modelled as a Low-Resource Machine Translation Task?</title>
       <author><first>Clémentine</first><last>Fourrier</last></author>
       <author><first>Rachel</first><last>Bawden</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <pages>847–861</pages>
       <url hash="9a84c271">2021.findings-acl.75</url>
       <doi>10.18653/v1/2021.findings-acl.75</doi>
@@ -965,7 +965,7 @@
       <title>What if This Modified That? Syntactic Interventions with Counterfactual Embeddings</title>
       <author><first>Mycal</first><last>Tucker</last></author>
       <author><first>Peng</first><last>Qian</last></author>
-      <author><first>Roger</first><last>Levy</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
       <pages>862–875</pages>
       <url hash="cb0ef1f5">2021.findings-acl.76</url>
       <doi>10.18653/v1/2021.findings-acl.76</doi>
@@ -1016,7 +1016,7 @@
       <author><first>Pepa</first><last>Atanasova</last></author>
       <author><first>Georgi</first><last>Karadzhov</last></author>
       <author><first>Marcos</first><last>Zampieri</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>915–928</pages>
       <url hash="5b228554">2021.findings-acl.80</url>
       <doi>10.18653/v1/2021.findings-acl.80</doi>
@@ -1039,7 +1039,7 @@
       <title>Promoting Graph Awareness in Linearized Graph-to-Text Generation</title>
       <author><first>Alexander Miserlis</first><last>Hoyle</last></author>
       <author><first>Ana</first><last>Marasović</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>944–956</pages>
       <url hash="1639d238">2021.findings-acl.82</url>
       <doi>10.18653/v1/2021.findings-acl.82</doi>
@@ -1065,7 +1065,7 @@
       <author><first>Sarath</first><last>Chandar</last></author>
       <author><first>Soroush</first><last>Vosoughi</last></author>
       <author><first>Teruko</first><last>Mitamura</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>968–988</pages>
       <url hash="441527e6">2021.findings-acl.84</url>
       <doi>10.18653/v1/2021.findings-acl.84</doi>
@@ -1115,7 +1115,7 @@
       <author><first>Jianqiang</first><last>Ma</last></author>
       <author><first>Zeyu</first><last>Yan</last></author>
       <author><first>Chang</first><last>Li</last></author>
-      <author id="yang-zhang"><first>Yang</first><last>Zhang</last></author>
+      <author><first>Yang</first><last>Zhang</last></author>
       <pages>1028–1033</pages>
       <url hash="437e51d0">2021.findings-acl.88</url>
       <doi>10.18653/v1/2021.findings-acl.88</doi>
@@ -1238,7 +1238,7 @@
     <paper id="98">
       <title>Out of Order: How important is the sequential order of words in a sentence in Natural Language Understanding tasks?</title>
       <author><first>Thang</first><last>Pham</last></author>
-      <author><first>Trung</first><last>Bui</last></author>
+      <author id="trung-bui"><first>Trung</first><last>Bui</last></author>
       <author><first>Long</first><last>Mai</last></author>
       <author><first>Anh</first><last>Nguyen</last></author>
       <pages>1145–1160</pages>
@@ -1295,7 +1295,7 @@
       <author><first>Ruochen</first><last>Xu</last></author>
       <author id="yang-liu-edinburgh"><first>Yang</first><last>Liu</last></author>
       <author><first>Michael</first><last>Zeng</last></author>
-      <author><first>Xuedong</first><last>Huang</last></author>
+      <author id="xuedong-huang"><first>Xuedong</first><last>Huang</last></author>
       <pages>1201–1207</pages>
       <url hash="e288d613">2021.findings-acl.102</url>
       <doi>10.18653/v1/2021.findings-acl.102</doi>
@@ -1339,7 +1339,7 @@
       <author><first>Rahul</first><last>Aralikatte</last></author>
       <author><first>Disha</first><last>Shrivastava</last></author>
       <author><first>Siva</first><last>Reddy</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>1245–1260</pages>
       <url hash="49004d00">2021.findings-acl.106</url>
       <doi>10.18653/v1/2021.findings-acl.106</doi>
@@ -1353,7 +1353,7 @@
       <author><first>Ximing</first><last>Lu</last></author>
       <author><first>Jena D.</first><last>Hwang</last></author>
       <author><first>Antoine</first><last>Bosselut</last></author>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <author><first>Yejin</first><last>Choi</last></author>
       <pages>1261–1274</pages>
       <url hash="2a1f3e25">2021.findings-acl.107</url>
@@ -1400,7 +1400,7 @@
       <author><first>Shujie</first><last>Liu</last></author>
       <author><first>Furu</first><last>Wei</last></author>
       <author><first>Ming</first><last>Zhou</last></author>
-      <author><first>Muyun</first><last>Yang</last></author>
+      <author id="muyun-yang"><first>Muyun</first><last>Yang</last></author>
       <pages>1300–1305</pages>
       <url hash="d76d07cb">2021.findings-acl.111</url>
       <doi>10.18653/v1/2021.findings-acl.111</doi>
@@ -1467,7 +1467,7 @@
     <paper id="116">
       <title>Exploiting Position Bias for Robust Aspect Sentiment Classification</title>
       <author><first>Fang</first><last>Ma</last></author>
-      <author id="chen-zhang"><first>Chen</first><last>Zhang</last></author>
+      <author><first>Chen</first><last>Zhang</last></author>
       <author><first>Dawei</first><last>Song</last></author>
       <pages>1352–1358</pages>
       <url hash="8b7b559c">2021.findings-acl.116</url>
@@ -1482,7 +1482,7 @@
       <author><first>Fei</first><last>Li</last></author>
       <author><first>Hao</first><last>Fei</last></author>
       <author><first>Yafeng</first><last>Ren</last></author>
-      <author><first>Donghong</first><last>Ji</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
       <pages>1359–1370</pages>
       <url hash="d66f73b0">2021.findings-acl.117</url>
       <doi>10.18653/v1/2021.findings-acl.117</doi>
@@ -1522,9 +1522,9 @@
       <author><first>Chunting</first><last>Zhou</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
       <author><first>Jiatao</first><last>Gu</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
-      <author><first>Francisco</first><last>Guzmán</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzmán</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <author><first>Marjan</first><last>Ghazvininejad</last></author>
       <pages>1393–1404</pages>
       <url hash="5702e2e8">2021.findings-acl.120</url>
@@ -1538,7 +1538,7 @@
       <author><first>Duyu</first><last>Tang</last></author>
       <author><first>Nan</first><last>Duan</last></author>
       <author><first>Zhongyu</first><last>Wei</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <author><first>Jianshu</first><last>Ji</last></author>
       <author><first>Guihong</first><last>Cao</last></author>
       <author><first>Daxin</first><last>Jiang</last></author>
@@ -1579,7 +1579,7 @@
       <title>Exploring the Role of Context in Utterance-level Emotion, Act and Intent Classification in Conversations: An Empirical Study</title>
       <author><first>Deepanway</first><last>Ghosal</last></author>
       <author><first>Navonil</first><last>Majumder</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <author><first>Soujanya</first><last>Poria</last></author>
       <pages>1435–1449</pages>
       <url hash="e32a873d">2021.findings-acl.124</url>
@@ -1616,11 +1616,11 @@
       <title>Putting words into the system’s mouth: A targeted attack on neural machine translation using monolingual data poisoning</title>
       <author><first>Jun</first><last>Wang</last></author>
       <author><first>Chang</first><last>Xu</last></author>
-      <author><first>Francisco</first><last>Guzmán</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzmán</last></author>
       <author><first>Ahmed</first><last>El-Kishky</last></author>
       <author><first>Yuqing</first><last>Tang</last></author>
       <author><first>Benjamin</first><last>Rubinstein</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>1463–1473</pages>
       <url hash="d7521c5b">2021.findings-acl.127</url>
       <doi>10.18653/v1/2021.findings-acl.127</doi>
@@ -1667,7 +1667,7 @@
     </paper>
     <paper id="131">
       <title><fixed-case>R</fixed-case>iddle<fixed-case>S</fixed-case>ense: Reasoning about Riddle Questions Featuring Linguistic Creativity and Commonsense Knowledge</title>
-      <author><first>Bill Yuchen</first><last>Lin</last></author>
+      <author id="bill-yuchen-lin"><first>Bill Yuchen</first><last>Lin</last></author>
       <author><first>Ziyi</first><last>Wu</last></author>
       <author><first>Yichi</first><last>Yang</last></author>
       <author><first>Dong-Ho</first><last>Lee</last></author>
@@ -1693,11 +1693,11 @@
     <paper id="133">
       <title>Learning Slice-Aware Representations with Mixture of Attentions</title>
       <author><first>Cheng</first><last>Wang</last></author>
-      <author><first>Sungjin</first><last>Lee</last></author>
+      <author id="sungjin-lee"><first>Sungjin</first><last>Lee</last></author>
       <author><first>Sunghyun</first><last>Park</last></author>
       <author><first>Han</first><last>Li</last></author>
       <author><first>Young-Bum</first><last>Kim</last></author>
-      <author><first>Ruhi</first><last>Sarikaya</last></author>
+      <author id="ruhi-sarikaya"><first>Ruhi</first><last>Sarikaya</last></author>
       <pages>1530–1536</pages>
       <url hash="9bcd977a">2021.findings-acl.133</url>
       <doi>10.18653/v1/2021.findings-acl.133</doi>
@@ -1726,7 +1726,7 @@
       <title>Few-shot Knowledge Graph-to-Text Generation with Pretrained Language Models</title>
       <author><first>Junyi</first><last>Li</last></author>
       <author><first>Tianyi</first><last>Tang</last></author>
-      <author><first>Wayne Xin</first><last>Zhao</last></author>
+      <author id="wayne-xin-zhao"><first>Wayne Xin</first><last>Zhao</last></author>
       <author><first>Zhicheng</first><last>Wei</last></author>
       <author><first>Nicholas Jing</first><last>Yuan</last></author>
       <author><first>Ji-Rong</first><last>Wen</last></author>
@@ -1779,10 +1779,10 @@
     </paper>
     <paper id="140">
       <title>Target-oriented Fine-tuning for Zero-Resource Named Entity Recognition</title>
-      <author><first>Ying</first><last>Zhang</last></author>
+      <author id="ying-zhang"><first>Ying</first><last>Zhang</last></author>
       <author><first>Fandong</first><last>Meng</last></author>
       <author><first>Yufeng</first><last>Chen</last></author>
-      <author><first>Jinan</first><last>Xu</last></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last></author>
       <author><first>Jie</first><last>Zhou</last></author>
       <pages>1603–1615</pages>
       <url hash="324795b8">2021.findings-acl.140</url>
@@ -1807,7 +1807,7 @@
       <author><first>Haotong</first><last>Sun</last></author>
       <author><first>Junsheng</first><last>Zhou</last></author>
       <author><first>Weiguang</first><last>Qu</last></author>
-      <author><first>Xinyu</first><last>Dai</last></author>
+      <author id="xinyu-dai"><first>Xinyu</first><last>Dai</last></author>
       <pages>1630–1640</pages>
       <url hash="8ed01b08">2021.findings-acl.142</url>
       <doi>10.18653/v1/2021.findings-acl.142</doi>
@@ -1830,7 +1830,7 @@
       <title>Discriminative Reasoning for Document-level Relation Extraction</title>
       <author><first>Wang</first><last>Xu</last></author>
       <author><first>Kehai</first><last>Chen</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <pages>1653–1663</pages>
       <url hash="fa602468">2021.findings-acl.144</url>
       <doi>10.18653/v1/2021.findings-acl.144</doi>
@@ -1923,7 +1923,7 @@
       <author><first>Yukun</first><last>Feng</last></author>
       <author><first>Hao</first><last>Wu</last></author>
       <author><first>Hidetaka</first><last>Kamigaito</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>1743–1750</pages>
       <url hash="c3e5e04e">2021.findings-acl.152</url>
       <doi>10.18653/v1/2021.findings-acl.152</doi>
@@ -1996,7 +1996,7 @@
     </paper>
     <paper id="158">
       <title>Cross-Lingual Cross-Domain Nested Named Entity Evaluation on <fixed-case>E</fixed-case>nglish Web Texts</title>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>1808–1815</pages>
       <url hash="fb2d83d3">2021.findings-acl.158</url>
       <doi>10.18653/v1/2021.findings-acl.158</doi>
@@ -2044,7 +2044,7 @@
       <author><first>Huang-Cheng</first><last>Chou</last></author>
       <author><first>Woan-Shiuan</first><last>Chien</last></author>
       <author><first>Da-Cheng</first><last>Juan</last></author>
-      <author><first>Chi-Chun</first><last>Lee</last></author>
+      <author id="chi-chun-lee"><first>Chi-Chun</first><last>Lee</last></author>
       <pages>1846–1860</pages>
       <url hash="adb2039f">2021.findings-acl.162</url>
       <attachment type="OptionalSupplementaryMaterial" hash="00abef8c">2021.findings-acl.162.OptionalSupplementaryMaterial.zip</attachment>
@@ -2067,7 +2067,7 @@
       <title>Structured Refinement for Sequential Labeling</title>
       <author><first>Yiran</first><last>Wang</last></author>
       <author><first>Hiroyuki</first><last>Shindo</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <author><first>Taro</first><last>Watanabe</last></author>
       <pages>1873–1884</pages>
       <url hash="76ff9b21">2021.findings-acl.164</url>
@@ -2088,7 +2088,7 @@
     <paper id="166">
       <title>Deciphering Implicit Hate: Evaluating Automated Detection Algorithms for Multimodal Hate</title>
       <author><first>Austin</first><last>Botelho</last></author>
-      <author><first>Scott A.</first><last>Hale</last></author>
+      <author id="scott-a-hale"><first>Scott A.</first><last>Hale</last></author>
       <author><first>Bertie</first><last>Vidgen</last></author>
       <pages>1896–1907</pages>
       <url hash="4d1678d9">2021.findings-acl.166</url>
@@ -2101,7 +2101,7 @@
       <title>Studying the Evolution of Scientific Topics and their Relationships</title>
       <author><first>Ana Sabina</first><last>Uban</last></author>
       <author><first>Cornelia</first><last>Caragea</last></author>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <pages>1908–1922</pages>
       <url hash="584371e6">2021.findings-acl.167</url>
       <doi>10.18653/v1/2021.findings-acl.167</doi>
@@ -2110,7 +2110,7 @@
     <paper id="168">
       <title>End-to-End Self-Debiasing Framework for Robust <fixed-case>NLU</fixed-case> Training</title>
       <author><first>Abbas</first><last>Ghaddar</last></author>
-      <author><first>Phillippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Phillippe</first><last>Langlais</last></author>
       <author><first>Mehdi</first><last>Rezagholizadeh</last></author>
       <author><first>Ahmad</first><last>Rashid</last></author>
       <pages>1923–1929</pages>
@@ -2195,7 +2195,7 @@
       <author><first>Scott</first><last>Novotney</last></author>
       <author><first>Ivan</first><last>Bulyko</last></author>
       <author><first>Ariya</first><last>Rastrow</last></author>
-      <author><first>Andreas</first><last>Stolcke</last></author>
+      <author id="andreas-stolcke"><first>Andreas</first><last>Stolcke</last></author>
       <author><first>Ankur</first><last>Gandhe</last></author>
       <pages>1994–2003</pages>
       <url hash="c8551910">2021.findings-acl.175</url>
@@ -2207,7 +2207,7 @@
       <title>Annotation and Evaluation of Coreference Resolution in Screenplays</title>
       <author><first>Sabyasachee</first><last>Baruah</last></author>
       <author><first>Sandeep</first><last>Nallan Chakravarthula</last></author>
-      <author><first>Shrikanth</first><last>Narayanan</last></author>
+      <author id="shrikanth-narayanan"><first>Shrikanth</first><last>Narayanan</last></author>
       <pages>2004–2010</pages>
       <url hash="d7fd9230">2021.findings-acl.176</url>
       <attachment type="OptionalSupplementaryMaterial" hash="edbabf3c">2021.findings-acl.176.OptionalSupplementaryMaterial.gz</attachment>
@@ -2273,7 +2273,7 @@
       <author><first>Shuwen</first><last>Qiu</last></author>
       <author><first>Lifeng</first><last>Fan</last></author>
       <author><first>Yixin</first><last>Zhu</last></author>
-      <author><first>Song-Chun</first><last>Zhu</last></author>
+      <author id="song-chun-zhu"><first>Song-Chun</first><last>Zhu</last></author>
       <pages>2074–2085</pages>
       <url hash="56b62e54">2021.findings-acl.182</url>
       <doi>10.18653/v1/2021.findings-acl.182</doi>
@@ -2295,7 +2295,7 @@
     </paper>
     <paper id="184">
       <title>Fusion: Towards Automated <fixed-case>ICD</fixed-case> Coding via Feature Compression</title>
-      <author id="junyu-luo"><first>Junyu</first><last>Luo</last></author>
+      <author><first>Junyu</first><last>Luo</last></author>
       <author><first>Cao</first><last>Xiao</last></author>
       <author><first>Lucas</first><last>Glass</last></author>
       <author><first>Jimeng</first><last>Sun</last></author>
@@ -2315,7 +2315,7 @@
       <author><first>Michel</first><last>Galley</last></author>
       <author><first>Chris</first><last>Brockett</last></author>
       <author><first>Yizhe</first><last>Zhang</last></author>
-      <author><first>Bill</first><last>Dolan</last></author>
+      <author id="william-b-dolan"><first>Bill</first><last>Dolan</last></author>
       <pages>2102–2113</pages>
       <url hash="5664ce25">2021.findings-acl.185</url>
       <doi>10.18653/v1/2021.findings-acl.185</doi>
@@ -2362,7 +2362,7 @@
       <author><first>Jie</first><last>Zhou</last></author>
       <author><first>Yuanbin</first><last>Wu</last></author>
       <author><first>Qin</first><last>Chen</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <author><first>Liang</first><last>He</last></author>
       <pages>2152–2161</pages>
       <url hash="edf86035">2021.findings-acl.189</url>
@@ -2391,7 +2391,7 @@
       <author><first>Shangwen</first><last>Lv</last></author>
       <author><first>Yingqi</first><last>Qu</last></author>
       <author><first>Jing</first><last>Liu</last></author>
-      <author><first>Wayne Xin</first><last>Zhao</last></author>
+      <author id="wayne-xin-zhao"><first>Wayne Xin</first><last>Zhao</last></author>
       <author><first>QiaoQiao</first><last>She</last></author>
       <author><first>Hua</first><last>Wu</last></author>
       <author><first>Haifeng</first><last>Wang</last></author>
@@ -2457,7 +2457,7 @@
       <author><first>Jizhi</first><last>Zhang</last></author>
       <author><first>Xiangnan</first><last>He</last></author>
       <author><first>Hanwang</first><last>Zhang</last></author>
-      <author><first>Tat-Seng</first><last>Chua</last></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last></author>
       <pages>2226–2236</pages>
       <url hash="61d34875">2021.findings-acl.196</url>
       <doi>10.18653/v1/2021.findings-acl.196</doi>
@@ -2568,7 +2568,7 @@
       <author><first>Yijin</first><last>Liu</last></author>
       <author><first>Fandong</first><last>Meng</last></author>
       <author><first>Yufeng</first><last>Chen</last></author>
-      <author><first>Jinan</first><last>Xu</last></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last></author>
       <author><first>Jie</first><last>Zhou</last></author>
       <pages>2327–2337</pages>
       <url hash="a0dd9271">2021.findings-acl.205</url>
@@ -2579,7 +2579,7 @@
       <title><fixed-case>MA</fixed-case>-<fixed-case>BERT</fixed-case>: Learning Representation by Incorporating Multi-Attribute Knowledge in Transformers</title>
       <author><first>You</first><last>Zhang</last></author>
       <author><first>Jin</first><last>Wang</last></author>
-      <author><first>Liang-Chih</first><last>Yu</last></author>
+      <author id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last></author>
       <author><first>Xuejie</first><last>Zhang</last></author>
       <pages>2338–2343</pages>
       <url hash="226ba154">2021.findings-acl.206</url>
@@ -2593,7 +2593,7 @@
       <author><first>Yuxuan</first><last>Wang</last></author>
       <author><first>Wanxiang</first><last>Che</last></author>
       <author><first>Ivan</first><last>Titov</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <author><first>Zhilin</first><last>Lei</last></author>
       <author><first>Ting</first><last>Liu</last></author>
       <pages>2344–2354</pages>
@@ -2608,7 +2608,7 @@
       <author><first>Tiberiu</first><last>Sosea</last></author>
       <author><first>Aditya</first><last>Sawant</last></author>
       <author><first>Ajith Jayaraman</first><last>Nair</last></author>
-      <author><first>Diana</first><last>Inkpen</last></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last></author>
       <author><first>Cornelia</first><last>Caragea</last></author>
       <pages>2355–2365</pages>
       <url hash="ca96a486">2021.findings-acl.208</url>
@@ -2630,7 +2630,7 @@
       <author><first>Kuicai</first><last>Dong</last></author>
       <author><first>Zhao</first><last>Yilin</last></author>
       <author><first>Aixin</first><last>Sun</last></author>
-      <author><first>Jung-Jae</first><last>Kim</last></author>
+      <author id="jung-jae-kim"><first>Jung-Jae</first><last>Kim</last></author>
       <author><first>Xiaoli</first><last>Li</last></author>
       <pages>2377–2389</pages>
       <url hash="e32528d6">2021.findings-acl.210</url>
@@ -2684,7 +2684,7 @@
       <author><first>Tongtong</first><last>Wu</last></author>
       <author><first>Guilin</first><last>Qi</last></author>
       <author><first>Yuan-Fang</first><last>Li</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <author><first>Sheng</first><last>Bi</last></author>
       <pages>2417–2429</pages>
       <url hash="186978a0">2021.findings-acl.214</url>
@@ -2815,7 +2815,7 @@
       <title><fixed-case>A</fixed-case>da<fixed-case>ST</fixed-case>: Dynamically Adapting Encoder States in the Decoder for End-to-End Speech-to-Text Translation</title>
       <author><first>Wuwei</first><last>Huang</last></author>
       <author><first>Dexin</first><last>Wang</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <pages>2539–2545</pages>
       <url hash="74ba12c9">2021.findings-acl.224</url>
       <doi>10.18653/v1/2021.findings-acl.224</doi>
@@ -2825,7 +2825,7 @@
     <paper id="225">
       <title><fixed-case>OKGIT</fixed-case>: <fixed-case>O</fixed-case>pen Knowledge Graph Link Prediction with Implicit Types</title>
       <author><first>.</first><last>Chandrahas</last></author>
-      <author><first>Partha</first><last>Talukdar</last></author>
+      <author id="partha-talukdar"><first>Partha</first><last>Talukdar</last></author>
       <pages>2546–2559</pages>
       <url hash="a91d249f">2021.findings-acl.225</url>
       <attachment type="OptionalSupplementaryMaterial" hash="fa92a213">2021.findings-acl.225.OptionalSupplementaryMaterial.zip</attachment>
@@ -2849,7 +2849,7 @@
       <title>Joint Multi-Decoder Framework with Hierarchical Pointer Network for Frame Semantic Parsing</title>
       <author><first>Xudong</first><last>Chen</last></author>
       <author><first>Ce</first><last>Zheng</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <pages>2570–2578</pages>
       <url hash="f29782ef">2021.findings-acl.227</url>
       <doi>10.18653/v1/2021.findings-acl.227</doi>
@@ -2860,7 +2860,7 @@
       <author><first>Jhih-wei</first><last>Chen</last></author>
       <author><first>Tsu-Jui</first><last>Fu</last></author>
       <author><first>Chen-Kang</first><last>Lee</last></author>
-      <author><first>Wei-Yun</first><last>Ma</last></author>
+      <author id="wei-yun-ma"><first>Wei-Yun</first><last>Ma</last></author>
       <pages>2579–2593</pages>
       <url hash="b2681266">2021.findings-acl.228</url>
       <attachment type="OptionalSupplementaryMaterial" hash="c71c4dd3">2021.findings-acl.228.OptionalSupplementaryMaterial.zip</attachment>
@@ -2923,7 +2923,7 @@
     </paper>
     <paper id="233">
       <title>Automatic Text Simplification for Social Good: Progress and Challenges</title>
-      <author><first>Sanja</first><last>Stajner</last></author>
+      <author id="sanja-stajner"><first>Sanja</first><last>Stajner</last></author>
       <pages>2637–2652</pages>
       <url hash="c5ddd124">2021.findings-acl.233</url>
       <doi>10.18653/v1/2021.findings-acl.233</doi>
@@ -2992,7 +2992,7 @@
     <paper id="239">
       <title>Continual Mixed-Language Pre-Training for Extremely Low-Resource Neural Machine Translation</title>
       <author><first>Zihan</first><last>Liu</last></author>
-      <author><first>Genta Indra</first><last>Winata</last></author>
+      <author id="genta-indra-winata"><first>Genta Indra</first><last>Winata</last></author>
       <author><first>Pascale</first><last>Fung</last></author>
       <pages>2706–2718</pages>
       <url hash="a81c9b05">2021.findings-acl.239</url>
@@ -3037,7 +3037,7 @@
       <title>When Time Makes Sense: A Historically-Aware Approach to Targeted Sense Disambiguation</title>
       <author><first>Kaspar</first><last>Beelen</last></author>
       <author><first>Federico</first><last>Nanni</last></author>
-      <author><first>Mariona</first><last>Coll Ardanuy</last></author>
+      <author id="mariona-coll-ardanuy"><first>Mariona</first><last>Coll Ardanuy</last></author>
       <author><first>Kasra</first><last>Hosseini</last></author>
       <author><first>Giorgia</first><last>Tolfo</last></author>
       <author><first>Barbara</first><last>McGillivray</last></author>
@@ -3050,8 +3050,8 @@
     <paper id="244">
       <title>Understanding Feature Focus in Multitask Settings for Lexico-semantic Relation Identification</title>
       <author><first>Houssam</first><last>Akhmouch</last></author>
-      <author><first>Gaël</first><last>Dias</last></author>
-      <author><first>Jose G.</first><last>Moreno</last></author>
+      <author id="gael-dias"><first>Gaël</first><last>Dias</last></author>
+      <author id="jose-g-moreno"><first>Jose G.</first><last>Moreno</last></author>
       <pages>2762–2772</pages>
       <url hash="ceb39d0c">2021.findings-acl.244</url>
       <doi>10.18653/v1/2021.findings-acl.244</doi>
@@ -3077,7 +3077,7 @@
       <author><first>Rituparna</first><last>Mukherjee</last></author>
       <author><first>Shivam</first><last>Sharma</last></author>
       <author><first>Md. Shad</first><last>Akhtar</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Tanmoy</first><last>Chakraborty</last></author>
       <pages>2783–2796</pages>
       <url hash="1e76fef1">2021.findings-acl.246</url>
@@ -3103,7 +3103,7 @@
     <paper id="248">
       <title><fixed-case>Z</fixed-case>m<fixed-case>BART</fixed-case>: An Unsupervised Cross-lingual Transfer Framework for Language Generation</title>
       <author><first>Kaushal Kumar</first><last>Maurya</last></author>
-      <author><first>Maunendra Sankar</first><last>Desarkar</last></author>
+      <author id="maunendra-sankar-desarkar"><first>Maunendra Sankar</first><last>Desarkar</last></author>
       <author><first>Yoshinobu</first><last>Kano</last></author>
       <author><first>Kumari</first><last>Deepshikha</last></author>
       <pages>2804–2818</pages>
@@ -3131,7 +3131,7 @@
     <paper id="250">
       <title>Do Multilingual Neural Machine Translation Models Contain Language Pair Specific Attention Heads?</title>
       <author><first>Zae Myung</first><last>Kim</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <author><first>Vassilina</first><last>Nikoulina</last></author>
       <author><first>Didier</first><last>Schwab</last></author>
       <pages>2832–2841</pages>
@@ -3157,9 +3157,9 @@
       <author><first>Yawen</first><last>Ouyang</last></author>
       <author><first>Jiasheng</first><last>Ye</last></author>
       <author><first>Yu</first><last>Chen</last></author>
-      <author><first>Xinyu</first><last>Dai</last></author>
+      <author id="xinyu-dai"><first>Xinyu</first><last>Dai</last></author>
       <author><first>Shujian</first><last>Huang</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
       <pages>2852–2861</pages>
       <url hash="8081a962">2021.findings-acl.252</url>
       <doi>10.18653/v1/2021.findings-acl.252</doi>
@@ -3181,7 +3181,7 @@
       <title><fixed-case>M</fixed-case>erge<fixed-case>D</fixed-case>istill: <fixed-case>M</fixed-case>erging Language Models using Pre-trained Distillation</title>
       <author><first>Simran</first><last>Khanuja</last></author>
       <author><first>Melvin</first><last>Johnson</last></author>
-      <author><first>Partha</first><last>Talukdar</last></author>
+      <author id="partha-talukdar"><first>Partha</first><last>Talukdar</last></author>
       <pages>2874–2887</pages>
       <url hash="a234a635">2021.findings-acl.254</url>
       <doi>10.18653/v1/2021.findings-acl.254</doi>
@@ -3203,8 +3203,8 @@
       <title><fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et-assisted Noun Compound Interpretation</title>
       <author><first>Girishkumar</first><last>Ponkiya</last></author>
       <author><first>Diptesh</first><last>Kanojia</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
-      <author><first>Girish</first><last>Palshikar</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="girish-palshikar"><first>Girish</first><last>Palshikar</last></author>
       <pages>2901–2911</pages>
       <url hash="f2c35e4e">2021.findings-acl.256</url>
       <doi>10.18653/v1/2021.findings-acl.256</doi>
@@ -3240,7 +3240,7 @@
       <title>On the Interaction of Belief Bias and Explanations</title>
       <author><first>Ana Valeria</first><last>González</last></author>
       <author><first>Anna</first><last>Rogers</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>2930–2942</pages>
       <url hash="6ccc7dc5">2021.findings-acl.259</url>
       <doi>10.18653/v1/2021.findings-acl.259</doi>
@@ -3321,7 +3321,7 @@
     <paper id="266">
       <title>Probing Pre-Trained Language Models for Disease Knowledge</title>
       <author><first>Israa</first><last>Alghanmi</last></author>
-      <author><first>Luis</first><last>Espinosa Anke</last></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa Anke</last></author>
       <author><first>Steven</first><last>Schockaert</last></author>
       <pages>3023–3033</pages>
       <url hash="a02bc2a3">2021.findings-acl.266</url>
@@ -3330,9 +3330,9 @@
     </paper>
     <paper id="267">
       <title><fixed-case>A</fixed-case>ug<fixed-case>V</fixed-case>ic: Exploiting <fixed-case>B</fixed-case>i<fixed-case>T</fixed-case>ext Vicinity for Low-Resource <fixed-case>NMT</fixed-case></title>
-      <author><first>Tasnim</first><last>Mohiuddin</last></author>
+      <author id="muhammad-tasnim-mohiuddin"><first>Tasnim</first><last>Mohiuddin</last></author>
       <author><first>M Saiful</first><last>Bari</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <pages>3034–3045</pages>
       <url hash="d60fb086">2021.findings-acl.267</url>
       <doi>10.18653/v1/2021.findings-acl.267</doi>
@@ -3411,7 +3411,7 @@
       <author><first>Geeticka</first><last>Chauhan</last></author>
       <author><first>Brian</first><last>Tse</last></author>
       <author><first>Mrinmaya</first><last>Sachan</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>3099–3113</pages>
       <url hash="3f43f9a9">2021.findings-acl.273</url>
       <doi>10.18653/v1/2021.findings-acl.273</doi>
@@ -3502,7 +3502,7 @@
       <author><first>Suci</first><last>Fitriany</last></author>
       <author><first>Alham Fikri</first><last>Aji</last></author>
       <author><first>Radityo Eko</first><last>Prasojo</last></author>
-      <author><first>Derry Tanti</first><last>Wijaya</last></author>
+      <author id="derry-tanti-wijaya"><first>Derry Tanti</first><last>Wijaya</last></author>
       <pages>3170–3183</pages>
       <url hash="78cf91b8">2021.findings-acl.280</url>
       <doi>10.18653/v1/2021.findings-acl.280</doi>
@@ -3548,7 +3548,7 @@
     <paper id="284">
       <title>Is the Lottery Fair? Evaluating Winning Tickets Across Demographics</title>
       <author><first>Victor Petrén Bach</first><last>Hansen</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>3214–3224</pages>
       <url hash="c9a81e10">2021.findings-acl.284</url>
       <doi>10.18653/v1/2021.findings-acl.284</doi>
@@ -3597,7 +3597,7 @@
     <paper id="289">
       <title><fixed-case>D</fixed-case>o<fixed-case>T</fixed-case>: An efficient Double Transformer for <fixed-case>NLP</fixed-case> tasks with tables</title>
       <author><first>Syrine</first><last>Krichene</last></author>
-      <author><first>Thomas</first><last>Müller</last></author>
+      <author id="thomas-mueller"><first>Thomas</first><last>Müller</last></author>
       <author><first>Julian</first><last>Eisenschlos</last></author>
       <pages>3273–3283</pages>
       <url hash="673ffa3e">2021.findings-acl.289</url>
@@ -3628,7 +3628,7 @@
     <paper id="292">
       <title>Hyperbolic Temporal Knowledge Graph Embeddings with Relational and Time Curvatures</title>
       <author><first>Sebastien</first><last>Montella</last></author>
-      <author><first>Lina M.</first><last>Rojas Barahona</last></author>
+      <author id="lina-m-rojas-barahona"><first>Lina M.</first><last>Rojas Barahona</last></author>
       <author><first>Johannes</first><last>Heinecke</last></author>
       <pages>3296–3308</pages>
       <url hash="23341630">2021.findings-acl.292</url>
@@ -3675,7 +3675,7 @@
       <title>Representing Syntax and Composition with Geometric Transformations</title>
       <author><first>Lorenzo</first><last>Bertolini</last></author>
       <author><first>Julie</first><last>Weeds</last></author>
-      <author><first>David</first><last>Weir</last></author>
+      <author id="david-weir"><first>David</first><last>Weir</last></author>
       <author><first>Qiwei</first><last>Peng</last></author>
       <pages>3343–3353</pages>
       <url hash="c34f341f">2021.findings-acl.296</url>
@@ -3700,7 +3700,7 @@
       <title>To Point or Not to Point: Understanding How Abstractive Summarizers Paraphrase Text</title>
       <author><first>Matt</first><last>Wilber</last></author>
       <author><first>William</first><last>Timkey</last></author>
-      <author><first>Marten</first><last>van Schijndel</last></author>
+      <author id="marten-van-schijndel"><first>Marten</first><last>van Schijndel</last></author>
       <pages>3362–3376</pages>
       <url hash="f6b062c6">2021.findings-acl.298</url>
       <doi>10.18653/v1/2021.findings-acl.298</doi>
@@ -3796,7 +3796,7 @@
     <paper id="306">
       <title>Generating Informative Conclusions for Argumentative Texts</title>
       <author><first>Shahbaz</first><last>Syed</last></author>
-      <author><first>Khalid</first><last>Al Khatib</last></author>
+      <author id="khalid-al-khatib"><first>Khalid</first><last>Al Khatib</last></author>
       <author><first>Milad</first><last>Alshomary</last></author>
       <author><first>Henning</first><last>Wachsmuth</last></author>
       <author><first>Martin</first><last>Potthast</last></author>
@@ -3808,7 +3808,7 @@
     </paper>
     <paper id="307">
       <title>Substructure Substitution: Structured Data Augmentation for <fixed-case>NLP</fixed-case></title>
-      <author><first>Haoyue</first><last>Shi</last></author>
+      <author id="freda-shi"><first>Haoyue</first><last>Shi</last></author>
       <author><first>Karen</first><last>Livescu</last></author>
       <author><first>Kevin</first><last>Gimpel</last></author>
       <pages>3494–3508</pages>
@@ -3864,7 +3864,7 @@
       <author><first>Yasumasa</first><last>Onoe</last></author>
       <author><first>Ioana</first><last>Baldini</last></author>
       <author><first>Joydeep</first><last>Ghosh</last></author>
-      <author><first>Byron</first><last>Wallace</last></author>
+      <author id="byron-c-wallace"><first>Byron</first><last>Wallace</last></author>
       <author><first>Kush</first><last>Varshney</last></author>
       <pages>3547–3561</pages>
       <url hash="38649ced">2021.findings-acl.311</url>
@@ -3888,7 +3888,7 @@
       <author><first>Marco</first><last>Gaido</last></author>
       <author><first>Beatrice</first><last>Savoldi</last></author>
       <author><first>Luisa</first><last>Bentivogli</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <pages>3576–3589</pages>
       <url hash="b67450cb">2021.findings-acl.313</url>
@@ -3909,7 +3909,7 @@
       <title>An Exploratory Analysis of the Relation between Offensive Language and Mental Health</title>
       <author><first>Ana-Maria</first><last>Bucur</last></author>
       <author><first>Marcos</first><last>Zampieri</last></author>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <pages>3600–3606</pages>
       <url hash="a4514c0a">2021.findings-acl.315</url>
       <doi>10.18653/v1/2021.findings-acl.315</doi>
@@ -3931,7 +3931,7 @@
     <paper id="317">
       <title><fixed-case>P</fixed-case>roof<fixed-case>W</fixed-case>riter: Generating Implications, Proofs, and Abductive Statements over Natural Language</title>
       <author><first>Oyvind</first><last>Tafjord</last></author>
-      <author><first>Bhavana</first><last>Dalvi</last></author>
+      <author id="bhavana-dalvi"><first>Bhavana</first><last>Dalvi</last></author>
       <author><first>Peter</first><last>Clark</last></author>
       <pages>3621–3634</pages>
       <url hash="39bb8d56">2021.findings-acl.317</url>
@@ -4063,7 +4063,7 @@
       <author><first>Chulaka</first><last>Gunasekara</last></author>
       <author><first>Guy</first><last>Feigenblat</last></author>
       <author><first>Benjamin</first><last>Sznajder</last></author>
-      <author><first>Sachindra</first><last>Joshi</last></author>
+      <author id="sachindra-joshi"><first>Sachindra</first><last>Joshi</last></author>
       <author><first>David</first><last>Konopnicki</last></author>
       <pages>3748–3756</pages>
       <url hash="ab606c7e">2021.findings-acl.329</url>
@@ -4143,7 +4143,7 @@
       <title>Explaining <fixed-case>NLP</fixed-case> Models via Minimal Contrastive Editing (<fixed-case>M</fixed-case>i<fixed-case>CE</fixed-case>)</title>
       <author><first>Alexis</first><last>Ross</last></author>
       <author><first>Ana</first><last>Marasović</last></author>
-      <author><first>Matthew</first><last>Peters</last></author>
+      <author id="matthew-e-peters"><first>Matthew</first><last>Peters</last></author>
       <pages>3840–3852</pages>
       <url hash="def48247">2021.findings-acl.336</url>
       <attachment type="OptionalSupplementaryMaterial" hash="5713c22a">2021.findings-acl.336.OptionalSupplementaryMaterial.zip</attachment>
@@ -4169,7 +4169,7 @@
       <title>Synthesizing Adversarial Negative Responses for Robust Response Ranking and Evaluation</title>
       <author><first>Prakhar</first><last>Gupta</last></author>
       <author><first>Yulia</first><last>Tsvetkov</last></author>
-      <author><first>Jeffrey</first><last>Bigham</last></author>
+      <author id="jeffrey-p-bigham"><first>Jeffrey</first><last>Bigham</last></author>
       <pages>3867–3883</pages>
       <url hash="7adfbe6c">2021.findings-acl.338</url>
       <doi>10.18653/v1/2021.findings-acl.338</doi>
@@ -4181,15 +4181,15 @@
       <author><first>Pavan</first><last>Kapanipathi</last></author>
       <author><first>Ibrahim</first><last>Abdelaziz</last></author>
       <author><first>Srinivas</first><last>Ravishankar</last></author>
-      <author><first>Salim</first><last>Roukos</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
       <author><first>Alexander</first><last>Gray</last></author>
-      <author><first>Ramón</first><last>Fernandez Astudillo</last></author>
+      <author id="ramon-fernandez-astudillo"><first>Ramón</first><last>Fernandez Astudillo</last></author>
       <author><first>Maria</first><last>Chang</last></author>
       <author><first>Cristina</first><last>Cornelio</last></author>
       <author><first>Saswati</first><last>Dana</last></author>
-      <author><first>Achille</first><last>Fokoue</last></author>
+      <author id="achille-fokoue-nkoutche"><first>Achille</first><last>Fokoue</last></author>
       <author><first>Dinesh</first><last>Garg</last></author>
-      <author><first>Alfio</first><last>Gliozzo</last></author>
+      <author id="alfio-gliozzo"><first>Alfio</first><last>Gliozzo</last></author>
       <author><first>Sairam</first><last>Gurajada</last></author>
       <author><first>Hima</first><last>Karanam</last></author>
       <author><first>Naweed</first><last>Khan</last></author>
@@ -4236,10 +4236,10 @@
     </paper>
     <paper id="342">
       <title>Perceptual Models of Machine-Edited Text</title>
-      <author><first>Elizabeth</first><last>Merkhofer</last></author>
+      <author id="elizabeth-merkhofer"><first>Elizabeth</first><last>Merkhofer</last></author>
       <author><first>Monica-Ann</first><last>Mendoza</last></author>
       <author><first>Rebecca</first><last>Marvin</last></author>
-      <author><first>John</first><last>Henderson</last></author>
+      <author id="john-henderson"><first>John</first><last>Henderson</last></author>
       <pages>3909–3920</pages>
       <url hash="bd6d9156">2021.findings-acl.342</url>
       <doi>10.18653/v1/2021.findings-acl.342</doi>
@@ -4306,7 +4306,7 @@
       <author><first>Cass</first><last>Lowry</last></author>
       <author><first>Sujay</first><last>Khandagale</last></author>
       <author><first>Francesca</first><last>Callejas</last></author>
-      <author><first>Judith</first><last>Klavans</last></author>
+      <author id="judith-l-klavans"><first>Judith</first><last>Klavans</last></author>
       <author><first>Maria</first><last>Polinsky</last></author>
       <author><first>Smaranda</first><last>Muresan</last></author>
       <pages>3969–3974</pages>
@@ -4321,7 +4321,7 @@
       <author><first>Shuai</first><last>Wang</last></author>
       <author><first>Rishita</first><last>Anubhai</last></author>
       <author><first>Kasturi</first><last>Bhattacharjee</last></author>
-      <author><first>Yaser</first><last>Al-Onaizan</last></author>
+      <author id="yaser-al-onaizan"><first>Yaser</first><last>Al-Onaizan</last></author>
       <author><first>Smaranda</first><last>Muresan</last></author>
       <pages>3975–3989</pages>
       <url hash="e67c95f2">2021.findings-acl.348</url>
@@ -4404,7 +4404,7 @@
       <title>Analyzing Stereotypes in Generative Text Inference Tasks</title>
       <author><first>Anna</first><last>Sotnikova</last></author>
       <author><first>Yang Trista</first><last>Cao</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <author><first>Rachel</first><last>Rudinger</last></author>
       <pages>4052–4065</pages>
       <url hash="89535a1a">2021.findings-acl.355</url>
@@ -4429,7 +4429,7 @@
       <title>Improving Automated Evaluation of Open Domain Dialog via Diverse Reference Augmentation</title>
       <author><first>Varun</first><last>Gangal</last></author>
       <author><first>Harsh</first><last>Jhamtani</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <author><first>Taylor</first><last>Berg-Kirkpatrick</last></author>
       <pages>4079–4090</pages>
       <url hash="bacf7793">2021.findings-acl.357</url>
@@ -4496,7 +4496,7 @@
       <author><first>Shraddhan</first><last>Jain</last></author>
       <author><first>Michael</first><last>Ferdman</last></author>
       <author><first>Peter</first><last>Milder</last></author>
-      <author><first>H. Andrew</first><last>Schwartz</last></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last></author>
       <author><first>Niranjan</first><last>Balasubramanian</last></author>
       <pages>4147–4157</pages>
       <url hash="f20352d8">2021.findings-acl.363</url>
@@ -4536,7 +4536,7 @@
       <author><first>Julian</first><last>Michael</last></author>
       <author><first>Marjan</first><last>Ghazvininejad</last></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>4179–4192</pages>
       <url hash="1e35a4ca">2021.findings-acl.366</url>
       <doi>10.18653/v1/2021.findings-acl.366</doi>
@@ -4559,7 +4559,7 @@
     <paper id="368">
       <title>Hierarchical Task Learning from Language Instructions with Unified Transformers and Self-Monitoring</title>
       <author><first>Yichi</first><last>Zhang</last></author>
-      <author><first>Joyce</first><last>Chai</last></author>
+      <author id="joyce-chai"><first>Joyce</first><last>Chai</last></author>
       <pages>4202–4213</pages>
       <url hash="480c5a85">2021.findings-acl.368</url>
       <doi>10.18653/v1/2021.findings-acl.368</doi>
@@ -4583,7 +4583,7 @@
       <author><first>Masoumeh</first><last>Aminzadeh</last></author>
       <author><first>Christoph</first><last>Feichtenhofer</last></author>
       <author><first>Florian</first><last>Metze</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>4227–4239</pages>
       <url hash="c7c75f18">2021.findings-acl.370</url>
       <doi>10.18653/v1/2021.findings-acl.370</doi>
@@ -4646,7 +4646,7 @@
       <title>Grounding ‘Grounding’ in <fixed-case>NLP</fixed-case></title>
       <author><first>Khyathi Raghavi</first><last>Chandu</last></author>
       <author><first>Yonatan</first><last>Bisk</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <pages>4283–4305</pages>
       <url hash="4ee75ca6">2021.findings-acl.375</url>
       <doi>10.18653/v1/2021.findings-acl.375</doi>
@@ -4738,8 +4738,8 @@
       <title>Enhancing <fixed-case>C</fixed-case>hinese Word Segmentation via Pseudo Labels for Practicability</title>
       <author><first>Kaiyu</first><last>Huang</last></author>
       <author><first>Junpeng</first><last>Liu</last></author>
-      <author><first>Degen</first><last>Huang</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="degen-huang"><first>Degen</first><last>Huang</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Zhuang</first><last>Liu</last></author>
       <author><first>Jinsong</first><last>Su</last></author>
       <pages>4369–4381</pages>
@@ -4776,7 +4776,7 @@
       <author><first>Xinran</first><last>Zhao</last></author>
       <author><first>Esin</first><last>Durmus</last></author>
       <author><first>Hongming</first><last>Zhang</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>4401–4407</pages>
       <url hash="e86babaa">2021.findings-acl.386</url>
       <doi>10.18653/v1/2021.findings-acl.386</doi>
@@ -4811,7 +4811,7 @@
     <paper id="389">
       <title>Inducing Semantic Roles Without Syntax</title>
       <author><first>Julian</first><last>Michael</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>4427–4442</pages>
       <url hash="6728244b">2021.findings-acl.389</url>
       <doi>10.18653/v1/2021.findings-acl.389</doi>
@@ -4821,7 +4821,7 @@
       <title>Plot and Rework: Modeling Storylines for Visual Storytelling</title>
       <author><first>Chi-yang</first><last>Hsu</last></author>
       <author><first>Yun-Wei</first><last>Chu</last></author>
-      <author><first>Ting-Hao</first><last>Huang</last></author>
+      <author id="ting-hao-huang"><first>Ting-Hao</first><last>Huang</last></author>
       <author><first>Lun-Wei</first><last>Ku</last></author>
       <pages>4443–4453</pages>
       <url hash="dab6b04b">2021.findings-acl.390</url>
@@ -4851,7 +4851,7 @@
       <author><first>Jonathan K.</first><last>Kummerfeld</last></author>
       <author><first>Lawrence C</first><last>An</last></author>
       <author><first>Kenneth</first><last>Resnicow</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <author><first>Verónica</first><last>Pérez-Rosas</last></author>
       <pages>4467–4480</pages>
       <url hash="9c5e2750">2021.findings-acl.392</url>
@@ -4862,7 +4862,7 @@
     <paper id="393">
       <title>An Investigation of Suitability of Pre-Trained Language Models for Dialogue Generation – Avoiding Discrepancies</title>
       <author><first>Yan</first><last>Zeng</last></author>
-      <author><first>Jian-Yun</first><last>Nie</last></author>
+      <author id="jian-yun-nie"><first>Jian-Yun</first><last>Nie</last></author>
       <pages>4481–4494</pages>
       <url hash="ba933379">2021.findings-acl.393</url>
       <doi>10.18653/v1/2021.findings-acl.393</doi>
@@ -4883,7 +4883,7 @@
     <paper id="395">
       <title>Reordering Examples Helps during Priming-based Few-Shot Learning</title>
       <author><first>Sawan</first><last>Kumar</last></author>
-      <author><first>Partha</first><last>Talukdar</last></author>
+      <author id="partha-talukdar"><first>Partha</first><last>Talukdar</last></author>
       <pages>4507–4518</pages>
       <url hash="7e3e5c81">2021.findings-acl.395</url>
       <doi>10.18653/v1/2021.findings-acl.395</doi>
@@ -4984,7 +4984,7 @@
       <author><first>Stéphane</first><last>Aroca-Ouellette</last></author>
       <author><first>Cory</first><last>Paik</last></author>
       <author><first>Alessandro</first><last>Roncone</last></author>
-      <author><first>Katharina</first><last>Kann</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
       <pages>4597–4608</pages>
       <url hash="9712612a">2021.findings-acl.404</url>
       <doi>10.18653/v1/2021.findings-acl.404</doi>
@@ -5056,9 +5056,9 @@
       <author><first>Wei</first><last>Shi</last></author>
       <author><first>Jiewen</first><last>Wu</last></author>
       <author><first>Xiwen</first><last>Yang</last></author>
-      <author><first>Nancy</first><last>Chen</last></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last></author>
       <author><first>Ivan</first><last>Ho Mien</last></author>
-      <author><first>Jung-Jae</first><last>Kim</last></author>
+      <author id="jung-jae-kim"><first>Jung-Jae</first><last>Kim</last></author>
       <author><first>Pavitra</first><last>Krishnaswamy</last></author>
       <pages>4665–4672</pages>
       <url hash="14caccc4">2021.findings-acl.410</url>
@@ -5123,10 +5123,10 @@
       <title>As Easy as 1, 2, 3: Behavioural Testing of <fixed-case>NMT</fixed-case> Systems for Numerical Translation</title>
       <author><first>Jun</first><last>Wang</last></author>
       <author><first>Chang</first><last>Xu</last></author>
-      <author><first>Francisco</first><last>Guzmán</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzmán</last></author>
       <author><first>Ahmed</first><last>El-Kishky</last></author>
       <author><first>Benjamin</first><last>Rubinstein</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>4711–4717</pages>
       <url hash="aab67890">2021.findings-acl.415</url>
       <doi>10.18653/v1/2021.findings-acl.415</doi>
@@ -5159,8 +5159,8 @@
     <paper id="418">
       <title>What Would a Teacher Do? <fixed-case>P</fixed-case>redicting Future Talk Moves</title>
       <author><first>Ananya</first><last>Ganesh</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
-      <author><first>Katharina</first><last>Kann</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
       <pages>4739–4751</pages>
       <url hash="248bc591">2021.findings-acl.418</url>
       <doi>10.18653/v1/2021.findings-acl.418</doi>
@@ -5184,7 +5184,7 @@
       <title>Multilingual Simultaneous Neural Machine Translation</title>
       <author><first>Philip</first><last>Arthur</last></author>
       <author><first>Dongwon</first><last>Ryu</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>4758–4766</pages>
       <url hash="de722f17">2021.findings-acl.420</url>
       <doi>10.18653/v1/2021.findings-acl.420</doi>
@@ -5292,7 +5292,7 @@
       <title>John praised <fixed-case>M</fixed-case>ary because _he_? Implicit Causality Bias and Its Interaction with Explicit Cues in <fixed-case>LM</fixed-case>s</title>
       <author><first>Yova</first><last>Kementchedjhieva</last></author>
       <author><first>Mark</first><last>Anderson</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>4859–4871</pages>
       <url hash="63429574">2021.findings-acl.429</url>
       <attachment type="OptionalSupplementaryMaterial" hash="315a208d">2021.findings-acl.429.OptionalSupplementaryMaterial.zip</attachment>
@@ -5361,7 +5361,7 @@
     <paper id="435">
       <title><fixed-case>D</fixed-case>oc<fixed-case>NLI</fixed-case>: A Large-scale Dataset for Document-level Natural Language Inference</title>
       <author><first>Wenpeng</first><last>Yin</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <author><first>Caiming</first><last>Xiong</last></author>
       <pages>4913–4922</pages>
       <url hash="a11d6510">2021.findings-acl.435</url>
@@ -5385,10 +5385,10 @@
       <title>Are Multilingual Models the Best Choice for Moderately Under-resourced Languages? <fixed-case>A</fixed-case> Comprehensive Assessment for <fixed-case>C</fixed-case>atalan</title>
       <author><first>Jordi</first><last>Armengol-Estapé</last></author>
       <author><first>Casimiro Pio</first><last>Carrino</last></author>
-      <author><first>Carlos</first><last>Rodriguez-Penagos</last></author>
-      <author><first>Ona</first><last>de Gibert Bonet</last></author>
+      <author id="carlos-rodriguez-penagos"><first>Carlos</first><last>Rodriguez-Penagos</last></author>
+      <author id="ona-de-gibert"><first>Ona</first><last>de Gibert Bonet</last></author>
       <author><first>Carme</first><last>Armentano-Oller</last></author>
-      <author><first>Aitor</first><last>Gonzalez-Agirre</last></author>
+      <author id="aitor-gonzalez-agirre"><first>Aitor</first><last>Gonzalez-Agirre</last></author>
       <author><first>Maite</first><last>Melero</last></author>
       <author><first>Marta</first><last>Villegas</last></author>
       <pages>4933–4946</pages>
@@ -5473,7 +5473,7 @@
       <author><first>Madina</first><last>Hasan</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <author><first>Thomas</first><last>Hain</last></author>
-      <author><first>Björn</first><last>Schuller</last></author>
+      <author id="bjorn-schuller"><first>Björn</first><last>Schuller</last></author>
       <pages>5004–5009</pages>
       <url hash="4cca3b8b">2021.findings-acl.443</url>
       <doi>10.18653/v1/2021.findings-acl.443</doi>
@@ -5505,7 +5505,7 @@
     <paper id="446">
       <title>Phrase-Level Action Reinforcement Learning for Neural Dialog Response Generation</title>
       <author><first>Takato</first><last>Yamazaki</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>5028–5038</pages>
       <url hash="8604beaa">2021.findings-acl.446</url>
       <doi>10.18653/v1/2021.findings-acl.446</doi>
@@ -5531,7 +5531,7 @@
       <author><first>Dinesh</first><last>Raghu</last></author>
       <author><first>Atishya</first><last>Jain</last></author>
       <author><first/><last>Mausam</last></author>
-      <author><first>Sachindra</first><last>Joshi</last></author>
+      <author id="sachindra-joshi"><first>Sachindra</first><last>Joshi</last></author>
       <pages>5051–5061</pages>
       <url hash="a21dbd7a">2021.findings-acl.448</url>
       <doi>10.18653/v1/2021.findings-acl.448</doi>
@@ -5552,7 +5552,7 @@
     </paper>
     <paper id="450">
       <title>What Did You Refer to? <fixed-case>E</fixed-case>valuating Co-References in Dialogue</title>
-      <author><first>Wei-Nan</first><last>Zhang</last></author>
+      <author id="weinan-zhang"><first>Wei-Nan</first><last>Zhang</last></author>
       <author><first>Yue</first><last>Zhang</last></author>
       <author><first>Hanlin</first><last>Tang</last></author>
       <author><first>Zhengyu</first><last>Zhao</last></author>
@@ -5579,7 +5579,7 @@
       <author><first>Amit</first><last>Gajbhiye</last></author>
       <author><first>Marina</first><last>Fomicheva</last></author>
       <author><first>Fernando</first><last>Alva-Manchego</last></author>
-      <author><first>Frédéric</first><last>Blain</last></author>
+      <author id="frederic-blain"><first>Frédéric</first><last>Blain</last></author>
       <author><first>Abiola</first><last>Obamuyide</last></author>
       <author><first>Nikolaos</first><last>Aletras</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
@@ -5631,7 +5631,7 @@
       <author><first>Dheeraj</first><last>Rajagopal</last></author>
       <author><first>Niket</first><last>Tandon</last></author>
       <author><first>Yiming</first><last>Yang</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>5138–5147</pages>
       <url hash="11f1e27f">2021.findings-acl.456</url>
       <doi>10.18653/v1/2021.findings-acl.456</doi>
@@ -5641,8 +5641,8 @@
     <paper id="457">
       <title>Characterizing Social Spambots by their Human Traits</title>
       <author><first>Salvatore</first><last>Giorgi</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
-      <author><first>H. Andrew</first><last>Schwartz</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last></author>
       <pages>5148–5158</pages>
       <url hash="add3ff24">2021.findings-acl.457</url>
       <doi>10.18653/v1/2021.findings-acl.457</doi>
@@ -5653,10 +5653,10 @@
   <volume id="emnlp" ingest-date="2021-11-01" type="proceedings">
     <meta>
       <booktitle>Findings of the Association for Computational Linguistics: EMNLP 2021</booktitle>
-      <editor><first>Marie-Francine</first><last>Moens</last></editor>
-      <editor><first>Xuanjing</first><last>Huang</last></editor>
+      <editor id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></editor>
+      <editor id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></editor>
       <editor><first>Lucia</first><last>Specia</last></editor>
-      <editor><first>Scott Wen-tau</first><last>Yih</last></editor>
+      <editor id="wen-tau-yih"><first>Scott Wen-tau</first><last>Yih</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Punta Cana, Dominican Republic</address>
       <month>November</month>
@@ -5724,7 +5724,7 @@
       <title>Neural News Recommendation with Collaborative News Encoding and Structural User Encoding</title>
       <author><first>Zhiming</first><last>Mao</last></author>
       <author><first>Xingshan</first><last>Zeng</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <pages>46–55</pages>
       <abstract>Automatic news recommendation has gained much attention from the academic community and industry. Recent studies reveal that the key to this task lies within the effective representation learning of both news and users. Existing works typically encode news title and content separately while neglecting their semantic interaction, which is inadequate for news text comprehension. Besides, previous models encode user browsing history without leveraging the structural correlation of user browsed news to reflect user interests explicitly. In this work, we propose a news recommendation framework consisting of collaborative news encoding (CNE) and structural user encoding (SUE) to enhance news and user representation learning. CNE equipped with bidirectional LSTMs encodes news title and content collaboratively with cross-selection and cross-attention modules to learn semantic-interactive news representations. SUE utilizes graph convolutional networks to extract cluster-structural features of user history, followed by intra-cluster and inter-cluster attention modules to learn hierarchical user interest representations. Experiment results on the MIND dataset validate the effectiveness of our model to improve the performance of news recommendation.</abstract>
       <url hash="1c0a18e0">2021.findings-emnlp.5</url>
@@ -5738,7 +5738,7 @@
       <author><first>Dian</first><last>Yu</last></author>
       <author><first>Kai</first><last>Sun</last></author>
       <author><first>Dong</first><last>Yu</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>56–68</pages>
       <abstract>Despite considerable progress, most machine reading comprehension (MRC) tasks still lack sufficient training data to fully exploit powerful deep neural network models with millions of parameters, and it is laborious, expensive, and time-consuming to create large-scale, high-quality MRC data through crowdsourcing. This paper focuses on generating more training data for MRC tasks by leveraging existing question-answering (QA) data. We first collect a large-scale multi-subject multiple-choice QA dataset for Chinese, ExamQA. We next use incomplete, yet relevant snippets returned by a web search engine as the context for each QA instance to convert it into a weakly-labeled MRC instance. To better use the weakly-labeled data to improve a target MRC task, we evaluate and compare several methods and further propose a self-teaching paradigm. Experimental results show that, upon state-of-the-art MRC baselines, we can obtain +5.1% in accuracy on a multiple-choice Chinese MRC dataset, Cˆ3, and +3.8% in exact match on an extractive Chinese MRC dataset, CMRC 2018, demonstrating the usefulness of the generated QA-based weakly-labeled data for different types of MRC tasks as well as the effectiveness of self-teaching. ExamQA will be available at <url>https://dataset.org/examqa/</url>.</abstract>
       <url hash="0b5b5415">2021.findings-emnlp.6</url>
@@ -5771,7 +5771,7 @@
       <author><first>Shoya</first><last>Yoshida</last></author>
       <author><first>Lovish</first><last>Chum</last></author>
       <author><first>Heng</first><last>Ji</last></author>
-      <author><first>Shih-Fu</first><last>Chang</last></author>
+      <author id="shih-fu-chang"><first>Shih-Fu</first><last>Chang</last></author>
       <pages>74–88</pages>
       <abstract>Visual and textual modalities contribute complementary information about events described in multimedia documents. Videos contain rich dynamics and detailed unfoldings of events, while text describes more high-level and abstract concepts. However, existing event extraction methods either do not handle video or solely target video while ignoring other modalities. In contrast, we propose the first approach to jointly extract events from both video and text articles. We introduce the new task of Video MultiMedia Event Extraction and propose two novel components to build the first system towards this task. First, we propose the first self-supervised cross-modal event coreference model that can determine coreference between video events and text events without any manually annotated pairs. Second, we introduce the first cross-modal transformer architecture, which extracts structured event information from both videos and text documents. We also construct and will publicly release a new benchmark of video-article pairs, consisting of 860 video-article pairs with extensive annotations for evaluating methods on this task. Our experimental results demonstrate the effectiveness of our proposed method on our new benchmark dataset. We achieve 6.0% and 5.8% absolute F-score gain on multimodal event coreference resolution and multimedia event extraction.</abstract>
       <url hash="eb1f163b">2021.findings-emnlp.8</url>
@@ -5918,7 +5918,7 @@
       <author><first>Yingbo</first><last>Zhou</last></author>
       <author><first>Semih</first><last>Yavuz</last></author>
       <author><first>Caiming</first><last>Xiong</last></author>
-      <author><first>Philip</first><last>Yu</last></author>
+      <author id="philip-s-yu"><first>Philip</first><last>Yu</last></author>
       <pages>188–200</pages>
       <abstract>Dense neural text retrieval has achieved promising results on open-domain Question Answering (QA), where latent representations of questions and passages are exploited for maximum inner product search in the retrieval process. However, current dense retrievers require splitting documents into short passages that usually contain local, partial and sometimes biased context, and highly depend on the splitting process. As a consequence, it may yield inaccurate and misleading hidden representations, thus deteriorating the final retrieval result. In this work, we propose Dense Hierarchical Retrieval (DHR), a hierarchical framework which can generate accurate dense representations of passages by utilizing both macroscopic semantics in the document and microscopic semantics specific to each passage. Specifically, a document-level retriever first identifies relevant documents, among which relevant passages are then retrieved by a passage-level retriever. The ranking of the retrieved passages will be further calibrated by examining the document-level relevance. In addition, hierarchical title structure and two negative sampling strategies (i.e., In-Doc and In-Sec negatives) are investigated. We apply DHR to large-scale open-domain QA datasets. DHR significantly outperforms the original dense passage retriever, and helps an end-to-end QA system outperform the strong baselines on multiple open-domain QA benchmarks.</abstract>
       <url hash="595ebccd">2021.findings-emnlp.19</url>
@@ -5989,7 +5989,7 @@
       <author><first>Guy</first><last>Feigenblat</last></author>
       <author><first>Chulaka</first><last>Gunasekara</last></author>
       <author><first>Benjamin</first><last>Sznajder</last></author>
-      <author><first>Sachindra</first><last>Joshi</last></author>
+      <author id="sachindra-joshi"><first>Sachindra</first><last>Joshi</last></author>
       <author><first>David</first><last>Konopnicki</last></author>
       <author><first>Ranit</first><last>Aharonov</last></author>
       <pages>245–260</pages>
@@ -6030,7 +6030,7 @@
       <author><first>Zeyu</first><last>Peng</last></author>
       <author><first>Tejas</first><last>Vaidhya</last></author>
       <author><first>Bernhard</first><last>Schoelkopf</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>288–301</pages>
       <abstract>Mining the causes of political decision-making is an active research area in the field of political science. In the past, most studies have focused on long-term policies that are collected over several decades of time, and have primarily relied on surveys as the main source of predictors. However, the recent COVID-19 pandemic has given rise to a new political phenomenon, where political decision-making consists of frequent short-term decisions, all on the same controlled topic—the pandemic. In this paper, we focus on the question of how public opinion influences policy decisions, while controlling for confounders such as COVID-19 case increases or unemployment rates. Using a dataset consisting of Twitter data from the 50 US states, we classify the sentiments toward governors of each state, and conduct controlled studies and comparisons. Based on the compiled samples of sentiments, policies, and confounders, we conduct causal inference to discover trends in political decision-making across different states.</abstract>
       <url hash="ee14905c">2021.findings-emnlp.27</url>
@@ -6110,7 +6110,7 @@
       <author><first>San</first><last>Kim</last></author>
       <author><first>Jin Yea</first><last>Jang</last></author>
       <author><first>Minyoung</first><last>Jung</last></author>
-      <author><first>Saim</first><last>Shin</last></author>
+      <author id="saim-shin"><first>Saim</first><last>Shin</last></author>
       <pages>352–365</pages>
       <abstract>Research on open-domain dialogue systems that allow free topics is challenging in the field of natural language processing (NLP). The performance of the dialogue system has been improved recently by the method utilizing dialogue-related knowledge; however, non-English dialogue systems suffer from reproducing the performance of English dialogue systems because securing knowledge in the same language with the dialogue system is relatively difficult. Through experiments with a Korean dialogue system, this paper proves that the performance of a non-English dialogue system can be improved by utilizing English knowledge, highlighting the system uses cross-lingual knowledge. For the experiments, we 1) constructed a Korean version of the Wizard of Wikipedia dataset, 2) built Korean-English T5 (KE-T5), a language model pre-trained with Korean and English corpus, and 3) developed a knowledge-grounded Korean dialogue model based on KE-T5. We observed the performance improvement in the open-domain Korean dialogue model even only English knowledge was given. The experimental results showed that the knowledge inherent in cross-lingual language models can be helpful for generating responses in open dialogue systems.</abstract>
       <url hash="221388e1">2021.findings-emnlp.33</url>
@@ -6152,7 +6152,7 @@
     <paper id="36">
       <title>Cartography Active Learning</title>
       <author><first>Mike</first><last>Zhang</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>395–406</pages>
       <abstract>We propose Cartography Active Learning (CAL), a novel Active Learning (AL) algorithm that exploits the behavior of the model on individual instances during training as a proxy to find the most informative instances for labeling. CAL is inspired by data maps, which were recently proposed to derive insights into dataset quality (Swayamdipta et al., 2020). We compare our method on popular text classification tasks to commonly used AL strategies, which instead rely on post-training behavior. We demonstrate that CAL is competitive to other common AL methods, showing that training dynamics derived from small seed data can be successfully used for AL. We provide insights into our new AL method by analyzing batch-level statistics utilizing the data maps. Our results further show that CAL results in a more data-efficient learning strategy, achieving comparable or better results with considerably less training data.</abstract>
       <url hash="92f1d55d">2021.findings-emnlp.36</url>
@@ -6192,7 +6192,7 @@
     <paper id="39">
       <title>Attention Weights in Transformer <fixed-case>NMT</fixed-case> Fail Aligning Words Between Sequences but Largely Explain Model Predictions</title>
       <author><first>Javier</first><last>Ferrando</last></author>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
       <pages>434–443</pages>
       <abstract>This work proposes an extensive analysis of the Transformer architecture in the Neural Machine Translation (NMT) setting. Focusing on the encoder-decoder attention mechanism, we prove that attention weights systematically make alignment errors by relying mainly on uninformative tokens from the source sequence. However, we observe that NMT models assign attention to these tokens to regulate the contribution in the prediction of the two contexts, the source and the prefix of the target sequence. We provide evidence about the influence of wrong alignments on the model behavior, demonstrating that the encoder-decoder attention mechanism is well suited as an interpretability method for NMT. Finally, based on our analysis, we propose methods that largely reduce the word alignment error rate compared to standard induced alignments from attention weights.</abstract>
       <url hash="60dd3d42">2021.findings-emnlp.39</url>
@@ -6261,7 +6261,7 @@
       <author><first>Fukun</first><last>Ma</last></author>
       <author><first>Chenyao</first><last>Liu</last></author>
       <author><first>Lijie</first><last>Wen</last></author>
-      <author><first>Philip S.</first><last>Yu</last></author>
+      <author id="philip-s-yu"><first>Philip S.</first><last>Yu</last></author>
       <pages>487–496</pages>
       <abstract>To alleviate human efforts from obtaining large-scale annotations, Semi-Supervised Relation Extraction methods aim to leverage unlabeled data in addition to learning from limited samples. Existing self-training methods suffer from the gradual drift problem, where noisy pseudo labels on unlabeled data are incorporated during training. To alleviate the noise in pseudo labels, we propose a method called MetaSRE, where a Relation Label Generation Network generates accurate quality assessment on pseudo labels by (meta) learning from the successful and failed attempts on Relation Classification Network as an additional meta-objective. To reduce the influence of noisy pseudo labels, MetaSRE adopts a pseudo label selection and exploitation scheme which assesses pseudo label quality on unlabeled samples and only exploits high-quality pseudo labels in a self-training fashion to incrementally augment labeled samples for both robustness and accuracy. Experimental results on two public datasets demonstrate the effectiveness of the proposed approach.</abstract>
       <url hash="31ab5a22">2021.findings-emnlp.44</url>
@@ -6304,7 +6304,7 @@
       <author><first>Guy</first><last>Feigenblat</last></author>
       <author><first>Benjamin</first><last>Sznajder</last></author>
       <author><first>Ranit</first><last>Aharonov</last></author>
-      <author><first>Sachindra</first><last>Joshi</last></author>
+      <author id="sachindra-joshi"><first>Sachindra</first><last>Joshi</last></author>
       <pages>518–526</pages>
       <abstract>Neural abstractive summarization models have drastically improved in the recent years. However, the summaries generated by these models generally suffer from issues such as: not capturing the critical facts in source documents, and containing facts that are inconsistent with the source documents. In this work, we present a general framework to train abstractive summarization models to alleviate such issues. We first train a sequence-to-sequence model to summarize documents, and then further train this model in a Reinforcement Learning setting with question-answering based rewards. We evaluate the summaries generated by the this framework using multiple automatic measures and human judgements. The experimental results show that the question-answering rewards can be used as a general framework to improve neural abstractive summarization. Particularly, the results from human evaluations show that the summaries generated by our approach is preferred over 30% of the time over the summaries generated by general abstractive summarization models.</abstract>
       <url hash="366b9209">2021.findings-emnlp.47</url>
@@ -6337,7 +6337,7 @@
     <paper id="50">
       <title>Unseen Entity Handling in Complex Question Answering over Knowledge Base via Language Generation</title>
       <author><first>Xin</first><last>Huang</last></author>
-      <author><first>Jung-Jae</first><last>Kim</last></author>
+      <author id="jung-jae-kim"><first>Jung-Jae</first><last>Kim</last></author>
       <author><first>Bowei</first><last>Zou</last></author>
       <pages>547–557</pages>
       <abstract>Complex question answering over knowledge base remains as a challenging task because it involves reasoning over multiple pieces of information, including intermediate entities/relations and other constraints. Previous methods simplify the SPARQL query of a question into such forms as a list or a graph, missing such constraints as “filter” and “order_by”, and present models specialized for generating those simplified forms from a given question. We instead introduce a novel approach that directly generates an executable SPARQL query without simplification, addressing the issue of generating unseen entities. We adapt large scale pre-trained encoder-decoder models and show that our method significantly outperforms the previous methods and also that our method has higher interpretability and computational efficiency than the previous methods.</abstract>
@@ -6401,7 +6401,7 @@
       <author><first>Hui</first><last>Huang</last></author>
       <author><first>Dong</first><last>Jing</last></author>
       <author><first>Yufeng</first><last>Chen</last></author>
-      <author><first>Jinan</first><last>Xu</last></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last></author>
       <author><first>Jian</first><last>Liu</last></author>
       <pages>599–610</pages>
       <abstract>Recent multilingual pre-trained models, like XLM-RoBERTa (XLM-R), have been demonstrated effective in many cross-lingual tasks. However, there are still gaps between the contextualized representations of similar words in different languages. To solve this problem, we propose a novel framework named Multi-View Mixed Language Training (MVMLT), which leverages code-switched data with multi-view learning to fine-tune XLM-R. MVMLT uses gradient-based saliency to extract keywords which are the most relevant to downstream tasks and replaces them with the corresponding words in the target language dynamically. Furthermore, MVMLT utilizes multi-view learning to encourage contextualized embeddings to align into a more refined language-invariant space. Extensive experiments with four languages show that our model achieves state-of-the-art results on zero-shot cross-lingual sentiment classification and dialogue state tracking tasks, demonstrating the effectiveness of our proposed model.</abstract>
@@ -6428,7 +6428,7 @@
       <author><first>Gijs</first><last>Danoe</last></author>
       <author><first>Friso</first><last>Stolk</last></author>
       <author><first>Britt</first><last>Bruntink</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>611–649</pages>
       <abstract>With the emergence of the COVID-19 pandemic, the political and the medical aspects of disinformation merged as the problem got elevated to a whole new level to become the first global infodemic. Fighting this infodemic has been declared one of the most important focus areas of the World Health Organization, with dangers ranging from promoting fake cures, rumors, and conspiracy theories to spreading xenophobia and panic. Addressing the issue requires solving a number of challenging problems such as identifying messages containing claims, determining their check-worthiness and factuality, and their potential to do harm as well as the nature of that harm, to mention just a few. To address this gap, we release a large dataset of 16K manually annotated tweets for fine-grained disinformation analysis that (i) focuses on COVID-19, (ii) combines the perspectives and the interests of journalists, fact-checkers, social media platforms, policy makers, and society, and (iii) covers Arabic, Bulgarian, Dutch, and English. Finally, we show strong evaluation results using pretrained Transformers, thus confirming the practical utility of the dataset in monolingual vs. multilingual, and single task vs. multitask settings.</abstract>
       <url hash="14389013">2021.findings-emnlp.56</url>
@@ -6454,7 +6454,7 @@
       <title>Stream-level Latency Evaluation for Simultaneous Machine Translation</title>
       <author><first>Javier</first><last>Iranzo-Sánchez</last></author>
       <author><first>Jorge</first><last>Civera Saiz</last></author>
-      <author><first>Alfons</first><last>Juan</last></author>
+      <author id="alfons-juan"><first>Alfons</first><last>Juan</last></author>
       <pages>664–670</pages>
       <abstract>Simultaneous machine translation has recently gained traction thanks to significant quality improvements and the advent of streaming applications. Simultaneous translation systems need to find a trade-off between translation quality and response time, and with this purpose multiple latency measures have been proposed. However, latency evaluations for simultaneous translation are estimated at the sentence level, not taking into account the sequential nature of a streaming scenario. Indeed, these sentence-level latency measures are not well suited for continuous stream translation, resulting in figures that are not coherent with the simultaneous translation policy of the system being assessed. This work proposes a stream level adaptation of the current latency measures based on a re-segmentation approach applied to the output translation, that is successfully evaluated on streaming conditions for a reference IWSLT task.</abstract>
       <url hash="1b8f37be">2021.findings-emnlp.58</url>
@@ -6500,7 +6500,7 @@
     <paper id="62">
       <title>Learn Continually, Generalize Rapidly: Lifelong Knowledge Accumulation for Few-shot Learning</title>
       <author><first>Xisen</first><last>Jin</last></author>
-      <author><first>Bill Yuchen</first><last>Lin</last></author>
+      <author id="bill-yuchen-lin"><first>Bill Yuchen</first><last>Lin</last></author>
       <author><first>Mohammad</first><last>Rostami</last></author>
       <author><first>Xiang</first><last>Ren</last></author>
       <pages>714–729</pages>
@@ -6573,8 +6573,8 @@
       <title>Learning Hard Retrieval Decoder Attention for Transformers</title>
       <author><first>Hongfei</first><last>Xu</last></author>
       <author><first>Qiuhui</first><last>Liu</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <pages>779–785</pages>
       <abstract>The Transformer translation model is based on the multi-head attention mechanism, which can be parallelized easily. The multi-head attention network performs the scaled dot-product attention function in parallel, empowering the model by jointly attending to information from different representation subspaces at different positions. In this paper, we present an approach to learning a hard retrieval attention where an attention head only attends to one token in the sentence rather than all tokens. The matrix multiplication between attention probabilities and the value sequence in the standard scaled dot-product attention can thus be replaced by a simple and efficient retrieval operation. We show that our hard retrieval attention mechanism is 1.43 times faster in decoding, while preserving translation quality on a wide range of machine translation tasks when used in the decoder self- and cross-attention networks.</abstract>
       <url hash="ba8cdde0">2021.findings-emnlp.67</url>
@@ -6631,7 +6631,7 @@
       <author><first>Yizhong</first><last>Wang</last></author>
       <author><first>Jungo</first><last>Kasai</last></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>820–842</pages>
       <abstract>Models of language trained on very large corpora have been demonstrated useful for natural language processing. As fixed artifacts, they have become the object of intense study, with many researchers “probing” the extent to which they acquire and readily demonstrate linguistic abstractions, factual and commonsense knowledge, and reasoning abilities. Recent work applied several probes to intermediate training stages to observe the developmental process of a large-scale model (Chiang et al., 2020). Following this effort, we systematically answer a question: for various types of knowledge a language model learns, when during (pre)training are they acquired? Using RoBERTa as a case study, we find: linguistic knowledge is acquired fast, stably, and robustly across domains. Facts and commonsense are slower and more domain-sensitive. Reasoning abilities are, in general, not stably acquired. As new datasets, pretraining protocols, and probes emerge, we believe that probing-across-time analyses can help researchers understand the complex, intermingled learning that these models undergo and guide us toward more efficient approaches that accomplish necessary learning faster.</abstract>
       <url hash="1743f01f">2021.findings-emnlp.71</url>
@@ -6657,7 +6657,7 @@
       <author><first>Martin</first><last>Fajcik</last></author>
       <author><first>Martin</first><last>Docekal</last></author>
       <author><first>Karel</first><last>Ondrej</last></author>
-      <author><first>Pavel</first><last>Smrz</last></author>
+      <author id="pavel-smrz"><first>Pavel</first><last>Smrz</last></author>
       <pages>854–870</pages>
       <abstract>This work presents a novel four-stage open-domain QA pipeline R2-D2 (Rank twice, reaD twice). The pipeline is composed of a retriever, passage reranker, extractive reader, generative reader and a mechanism that aggregates the final prediction from all system’s components. We demonstrate its strength across three open-domain QA datasets: NaturalQuestions, TriviaQA and EfficientQA, surpassing state-of-the-art on the first two. Our analysis demonstrates that: (i) combining extractive and generative reader yields absolute improvements up to 5 exact match and it is at least twice as effective as the posterior averaging ensemble of the same models with different parameters, (ii) the extractive reader with fewer parameters can match the performance of the generative reader on extractive QA datasets.</abstract>
       <url hash="ff3a9922">2021.findings-emnlp.73</url>
@@ -6684,7 +6684,7 @@
       <author><first>Yimin</first><last>Fan</last></author>
       <author><first>Yaobo</first><last>Liang</last></author>
       <author><first>Alexandre</first><last>Muzio</last></author>
-      <author><first>Hany</first><last>Hassan</last></author>
+      <author id="hany-hassan-awadalla"><first>Hany</first><last>Hassan</last></author>
       <author><first>Houqiang</first><last>Li</last></author>
       <author><first>Ming</first><last>Zhou</last></author>
       <author><first>Nan</first><last>Duan</last></author>
@@ -6791,7 +6791,7 @@
       <author><first>Rose</first><last>Wang</last></author>
       <author><first>Julia</first><last>White</last></author>
       <author><first>Jesse</first><last>Mu</last></author>
-      <author><first>Noah</first><last>Goodman</last></author>
+      <author id="noah-goodman"><first>Noah</first><last>Goodman</last></author>
       <pages>977–984</pages>
       <abstract>To be good conversational partners, natural language processing (NLP) systems should be trained to produce contextually useful utterances. Prior work has investigated training NLP systems with communication-based objectives, where a neural listener stands in as a communication partner. However, these systems commonly suffer from semantic drift where the learned language diverges radically from natural language. We propose a method that uses a population of neural listeners to regularize speaker training. We first show that language drift originates from the poor uncertainty calibration of a neural listener, which makes high-certainty predictions on novel sentences. We explore ensemble- and dropout-based populations of listeners and find that the former results in better uncertainty quantification. We evaluate both population-based objectives on reference games, and show that the ensemble method with better calibration enables the speaker to generate pragmatic utterances while scaling to a large vocabulary and generalizing to new games and listeners.</abstract>
       <url hash="32b351fd">2021.findings-emnlp.83</url>
@@ -6818,7 +6818,7 @@
       <author><first>Kyle</first><last>Gorman</last></author>
       <author><first>Christo</first><last>Kirov</last></author>
       <author><first>Brian</first><last>Roark</last></author>
-      <author><first>Richard</first><last>Sproat</last></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last></author>
       <pages>995–1005</pages>
       <abstract>Ad hoc abbreviations are commonly found in informal communication channels that favor shorter messages. We consider the task of reversing these abbreviations in context to recover normalized, expanded versions of abbreviated messages. The problem is related to, but distinct from, spelling correction, as ad hoc abbreviations are intentional and can involve more substantial differences from the original words. Ad hoc abbreviations are also productively generated on-the-fly, so they cannot be resolved solely by dictionary lookup. We generate a large, open-source data set of ad hoc abbreviations. This data is used to study abbreviation strategies and to develop two strong baselines for abbreviation expansion.</abstract>
       <url hash="312cec3e">2021.findings-emnlp.85</url>
@@ -6893,7 +6893,7 @@
       <author id="yang-liu-dt"><first>Yang</first><last>Liu</last></author>
       <author><first>Chao</first><last>Li</last></author>
       <author><first>Yunbo</first><last>Cao</last></author>
-      <author id="dongsheng-li"><first>Dongsheng</first><last>Li</last></author>
+      <author><first>Dongsheng</first><last>Li</last></author>
       <pages>1053–1066</pages>
       <abstract>Although showing promising values to downstream applications, generating question and answer together is under-explored. In this paper, we introduce a novel task that targets question-answer pair generation from visual images. It requires not only generating diverse question-answer pairs but also keeping the consistency of them. We study different generation paradigms for this task and propose three models: the pipeline model, the joint model, and the sequential model. We integrate variational inference into these models to achieve diversity and consistency. We also propose region representation scaling and attention alignment to improve the consistency further. We finally devise an evaluator as a quantitative metric for consistency. We validate our approach on two benchmarks, VQA2.0 and Visual-7w, by automatically and manually evaluating diversity and consistency. Experimental results show the effectiveness of our models: they can generate diverse or consistent pairs. Moreover, this task can be used to improve visual question generation and visual question answering.</abstract>
       <url hash="f8113f8f">2021.findings-emnlp.91</url>
@@ -6905,7 +6905,7 @@
       <title>Entity-level Cross-modal Learning Improves Multi-modal Machine Translation</title>
       <author><first>Xin</first><last>Huang</last></author>
       <author><first>Jiajun</first><last>Zhang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>1067–1080</pages>
       <abstract>Multi-modal machine translation (MMT) aims at improving translation performance by incorporating visual information. Most of the studies leverage the visual information through integrating the global image features as auxiliary input or decoding by attending to relevant local regions of the image. However, this kind of usage of visual information makes it difficult to figure out how the visual modality helps and why it works. Inspired by the findings of (CITATION) that entities are most informative in the image, we propose an explicit entity-level cross-modal learning approach that aims to augment the entity representation. Specifically, the approach is framed as a reconstruction task that reconstructs the original textural input from multi-modal input in which entities are replaced with visual features. Then, a multi-task framework is employed to combine the translation task and the reconstruction task to make full use of cross-modal entity representation learning. The extensive experiments demonstrate that our approach can achieve comparable or even better performance than state-of-the-art models. Furthermore, our in-depth analysis shows how visual information improves translation.</abstract>
       <url hash="ed25495d">2021.findings-emnlp.92</url>
@@ -7017,9 +7017,9 @@
       <author><first>Yeqiu</first><last>Li</last></author>
       <author><first>Bowei</first><last>Zou</last></author>
       <author><first>Zhifeng</first><last>Li</last></author>
-      <author><first>Ai Ti</first><last>Aw</last></author>
+      <author id="aiti-aw"><first>Ai Ti</first><last>Aw</last></author>
       <author><first>Yu</first><last>Hong</last></author>
-      <author><first>Qiaoming</first><last>Zhu</last></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last></author>
       <pages>1157–1165</pages>
       <abstract>We tackle multi-choice question answering. Acquiring related commonsense knowledge to the question and options facilitates the recognition of the correct answer. However, the current reasoning models suffer from the noises in the retrieved knowledge. In this paper, we propose a novel encoding method which is able to conduct interception and soft filtering. This contributes to the harvesting and absorption of representative information with less interference from noises. We experiment on CommonsenseQA. Experimental results illustrate that our method yields substantial and consistent improvements compared to the strong Bert, RoBERTa and Albert-based baselines.</abstract>
       <url hash="2ee98ec8">2021.findings-emnlp.100</url>
@@ -7035,7 +7035,7 @@
       <author><first>Jan-David</first><last>Krieger</last></author>
       <author><first>Terry</first><last>Ruas</last></author>
       <author><first>Bela</first><last>Gipp</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>1166–1177</pages>
       <abstract>Media coverage has a substantial effect on the public perception of events. Nevertheless, media outlets are often biased. One way to bias news articles is by altering the word choice. The automatic identification of bias by word choice is challenging, primarily due to the lack of a gold standard data set and high context dependencies. This paper presents BABE, a robust and diverse data set created by trained experts, for media bias research. We also analyze why expert labeling is essential within this domain. Our data set offers better annotation quality and higher inter-annotator agreement than existing work. It consists of 3,700 sentences balanced among topics and outlets, containing media bias labels on the word and sentence level. Based on our data, we also introduce a way to detect bias-inducing sentences in news articles automatically. Our best performing BERT-based model is pre-trained on a larger corpus consisting of distant labels. Fine-tuning and evaluating the model on our proposed supervised data set, we achieve a macro F1-score of 0.804, outperforming existing methods.</abstract>
       <url hash="7ac3d26e">2021.findings-emnlp.101</url>
@@ -7067,7 +7067,7 @@
       <author><first>Biswesh</first><last>Mohapatra</last></author>
       <author><first>Gaurav</first><last>Pandey</last></author>
       <author><first>Danish</first><last>Contractor</last></author>
-      <author><first>Sachindra</first><last>Joshi</last></author>
+      <author id="sachindra-joshi"><first>Sachindra</first><last>Joshi</last></author>
       <pages>1190–1203</pages>
       <abstract>Popular dialog datasets such as MultiWOZ are created by providing crowd workers an instruction, expressed in natural language, that describes the task to be accomplished. Crowd workers play the role of a user and an agent to generate dialogs to accomplish tasks involving booking restaurant tables, calling a taxi etc. In this paper, we present a data creation strategy that uses the pre-trained language model, GPT2, to simulate the interaction between crowd workers by creating a user bot and an agent bot. We train the simulators using a smaller percentage of actual crowd-generated conversations and their corresponding instructions. We demonstrate that by using the simulated data, we achieve significant improvements in low-resource settings on two publicly available datasets - MultiWOZ dataset and the Persona chat dataset.</abstract>
       <url hash="0f1df067">2021.findings-emnlp.103</url>
@@ -7133,7 +7133,7 @@
     <paper id="108">
       <title><fixed-case>A</fixed-case>rabic<fixed-case>T</fixed-case>ransformer: Efficient Large <fixed-case>A</fixed-case>rabic Language Model with Funnel Transformer and <fixed-case>ELECTRA</fixed-case> Objective</title>
       <author><first>Sultan</first><last>Alrowili</last></author>
-      <author><first>Vijay</first><last>Shanker</last></author>
+      <author id="k-vijay-shanker"><first>Vijay</first><last>Shanker</last></author>
       <pages>1255–1261</pages>
       <abstract>Pre-training Transformer-based models such as BERT and ELECTRA on a collection of Arabic corpora, demonstrated by both AraBERT and AraELECTRA, shows an impressive result on downstream tasks. However, pre-training Transformer-based language models is computationally expensive, especially for large-scale models. Recently, Funnel Transformer has addressed the sequential redundancy inside Transformer architecture by compressing the sequence of hidden states, leading to a significant reduction in the pre-training cost. This paper empirically studies the performance and efficiency of building an Arabic language model with Funnel Transformer and ELECTRA objective. We find that our model achieves state-of-the-art results on several Arabic downstream tasks despite using less computational resources compared to other BERT-based models.</abstract>
       <url hash="3f657e71">2021.findings-emnlp.108</url>
@@ -7158,7 +7158,7 @@
       <author><first>Zujun</first><last>Dou</last></author>
       <author><first>Yu</first><last>Hong</last></author>
       <author><first>Yu</first><last>Sun</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>1275–1283</pages>
       <abstract>Training implicit discourse relation classifiers suffers from data sparsity. Variational AutoEncoder (VAE) appears to be the proper solution. It is because ideally VAE is capable of generating inexhaustible varying samples, and this facilitates selective data augmentation. However, our experiments show that coupling VAE with the RoBERTa-based classifier results in severe performance degradation. We ascribe the unusual phenomenon to erroneous sampling that would happen when VAE pursued variations. To overcome the problem, we develop a re-anchoring strategy, where Conditional VAE (CVAE) is used for estimating the risk of erroneous sampling, and meanwhile migrating the anchor to reduce the risk. The test results on PDTB v2.0 illustrate that, compared to the RoBERTa-based baseline, re-anchoring yields substantial improvements. Besides, we observe that re-anchoring can cooperate with other auxiliary strategies (transfer learning and interactive attention mechanism) to further improve the baseline, obtaining the F-scores of about 55%, 63%, 80% and 44% for the four main relation types (Comparison, Contingency, Expansion, Temporality) in the binary classification (Yes/No) scenario.</abstract>
       <url hash="93fe97b2">2021.findings-emnlp.110</url>
@@ -7196,7 +7196,7 @@
       <author><first>Longyin</first><last>Zhang</last></author>
       <author><first>Xin</first><last>Tan</last></author>
       <author><first>Fang</first><last>Kong</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>1304–1312</pages>
       <abstract>Discourse analysis has long been known to be fundamental in natural language processing. In this research, we present our insight on discourse-level topic chain (DTC) parsing which aims at discovering new topics and investigating how these topics evolve over time within an article. To address the lack of data, we contribute a new discourse corpus with DTC-style dependency graphs annotated upon news articles. In particular, we ensure the high reliability of the corpus by utilizing a two-step annotation strategy to build the data and filtering out the annotations with low confidence scores. Based on the annotated corpus, we introduce a simple yet robust system for automatic discourse-level topic chain parsing.</abstract>
       <url hash="eaa74f42">2021.findings-emnlp.113</url>
@@ -7208,7 +7208,7 @@
       <title>Multilingual Neural Machine Translation: Can Linguistic Hierarchies Help?</title>
       <author><first>Fahimeh</first><last>Saleh</last></author>
       <author><first>Wray</first><last>Buntine</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <author><first>Lan</first><last>Du</last></author>
       <pages>1313–1330</pages>
       <abstract>Multilingual Neural Machine Translation (MNMT) trains a single NMT model that supports translation between multiple languages, rather than training separate models for different languages. Learning a single model can enhance the low-resource translation by leveraging data from multiple languages. However, the performance of an MNMT model is highly dependent on the type of languages used in training, as transferring knowledge from a diverse set of languages degrades the translation performance due to negative transfer. In this paper, we propose a Hierarchical Knowledge Distillation (HKD) approach for MNMT which capitalises on language groups generated according to typological features and phylogeny of languages to overcome the issue of negative transfer. HKD generates a set of multilingual teacher-assistant models via a selective knowledge distillation mechanism based on the language groups, and then distills the ultimate multilingual model from those assistants in an adaptive way. Experimental results derived from the TED dataset with 53 languages demonstrate the effectiveness of our approach in avoiding the negative transfer effect in MNMT, leading to an improved translation performance (about 1 BLEU score in average) compared to strong baselines.</abstract>
@@ -7476,7 +7476,7 @@
       <author><first>Budhaditya</first><last>Deb</last></author>
       <author><first>Guoqing</first><last>Zheng</last></author>
       <author><first>Milad</first><last>Shokouhi</last></author>
-      <author><first>Ahmed Hassan</first><last>Awadallah</last></author>
+      <author id="ahmed-hassan"><first>Ahmed Hassan</first><last>Awadallah</last></author>
       <pages>1553–1568</pages>
       <abstract>We study the problem of multilingual automated reply suggestions (RS) model serving many languages simultaneously. Multilingual models are often challenged by model capacity and severe data distribution skew across languages. While prior works largely focus on monolingual models, we propose Conditional Generative Matching models (CGM), optimized within a Variational Autoencoder framework to address challenges arising from multilingual RS. CGM does so with expressive message conditional priors, mixture densities to enhance multilingual data representation, latent alignment for language discrimination, and effective variational optimization techniques for training multilingual RS. The enhancements result in performance that exceed competitive baselines in relevance (ROUGE score) by more than 10% on average, and 16%for low resource languages. CGM also shows remarkable improvements in diversity (80%) illustrating its expressiveness in representation of multi-lingual data.</abstract>
       <url hash="4d6a1182">2021.findings-emnlp.134</url>
@@ -7510,7 +7510,7 @@
     </paper>
     <paper id="137">
       <title><fixed-case>P</fixed-case>rofiling News Discourse Structure Using Explicit Subtopic Structures Guided Critics</title>
-      <author><first>Prafulla Kumar</first><last>Choubey</last></author>
+      <author id="prafulla-kumar-choubey"><first>Prafulla Kumar</first><last>Choubey</last></author>
       <author><first>Ruihong</first><last>Huang</last></author>
       <pages>1594–1605</pages>
       <abstract>We present an actor-critic framework to induce subtopical structures in a news article for news discourse profiling. The model uses multiple critics that act according to known subtopic structures while the actor aims to outperform them. The content structures constitute sentences that represent latent subtopic boundaries. Then, we introduce a hierarchical neural network that uses the identified subtopic boundary sentences to model multi-level interaction between sentences, subtopics, and the document. Experimental results and analyses on the NewsDiscourse corpus show that the actor model learns to effectively segment a document into subtopics and improves the performance of the hierarchical model on the news discourse profiling task.</abstract>
@@ -7536,7 +7536,7 @@
       <title>Learning from Language Description: Low-shot Named Entity Recognition via Decomposed Framework</title>
       <author><first>Yaqing</first><last>Wang</last></author>
       <author><first>Haoda</first><last>Chu</last></author>
-      <author><first>Chao</first><last>Zhang</last></author>
+      <author id="chao-zhang-tu"><first>Chao</first><last>Zhang</last></author>
       <author><first>Jing</first><last>Gao</last></author>
       <pages>1618–1630</pages>
       <abstract>In this work, we study the problem of named entity recognition (NER) in a low resource scenario, focusing on few-shot and zero-shot settings. Built upon large-scale pre-trained language models, we propose a novel NER framework, namely SpanNER, which learns from natural language supervision and enables the identification of never-seen entity classes without using in-domain labeled data. We perform extensive experiments on 5 benchmark datasets and evaluate the proposed method in the few-shot learning, domain transfer and zero-shot learning settings. The experimental results show that the proposed method can bring 10%, 23% and 26% improvements in average over the best baselines in few-shot learning, domain transfer and zero-shot learning settings respectively.</abstract>
@@ -7547,9 +7547,9 @@
     </paper>
     <paper id="140">
       <title><fixed-case>BERT</fixed-case> might be Overkill: A Tiny but Effective Biomedical Entity Linker based on Residual Convolutional Neural Networks</title>
-      <author><first>Tuan</first><last>Lai</last></author>
+      <author id="tuan-lai"><first>Tuan</first><last>Lai</last></author>
       <author><first>Heng</first><last>Ji</last></author>
-      <author><first>ChengXiang</first><last>Zhai</last></author>
+      <author id="chengxiang-zhai"><first>ChengXiang</first><last>Zhai</last></author>
       <pages>1631–1639</pages>
       <abstract>Biomedical entity linking is the task of linking entity mentions in a biomedical document to referent entities in a knowledge base. Recently, many BERT-based models have been introduced for the task. While these models achieve competitive results on many datasets, they are computationally expensive and contain about 110M parameters. Little is known about the factors contributing to their impressive performance and whether the over-parameterization is needed. In this work, we shed some light on the inner workings of these large BERT-based models. Through a set of probing experiments, we have found that the entity linking performance only changes slightly when the input word order is shuffled or when the attention scope is limited to a fixed window size. From these observations, we propose an efficient convolutional neural network with residual connections for biomedical entity linking. Because of the sparse connectivity and weight sharing properties, our model has a small number of parameters and is highly efficient. On five public datasets, our model achieves comparable or even better linking accuracy than the state-of-the-art BERT-based models while having about 60 times fewer parameters.</abstract>
       <url hash="cb164253">2021.findings-emnlp.140</url>
@@ -7575,7 +7575,7 @@
     <paper id="142">
       <title>Exploring Multitask Learning for Low-Resource Abstractive Summarization</title>
       <author><first>Ahmed</first><last>Magooda</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <author><first>Mohamed</first><last>Elaraby</last></author>
       <pages>1652–1661</pages>
       <abstract>This paper explores the effect of using multitask learning for abstractive summarization in the context of small training corpora. In particular, we incorporate four different tasks (extractive summarization, language modeling, concept detection, and paraphrase detection) both individually and in combination, with the goal of enhancing the target task of abstractive summarization via multitask learning. We show that for many task combinations, a model trained in a multitask setting outperforms a model trained only for abstractive summarization, with no additional summarization data introduced. Additionally, we do a comprehensive search and find that certain tasks (e.g. paraphrase detection) consistently benefit abstractive summarization, not only when combined with other tasks but also when using different architectures and training corpora.</abstract>
@@ -7656,7 +7656,7 @@
       <author><first>Austin</first><last>Reiter</last></author>
       <author><first>Ser-Nam</first><last>Lim</last></author>
       <author><first>Yoav</first><last>Artzi</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>1716–1723</pages>
       <abstract>We introduce Classification with Alternating Normalization (CAN), a non-parametric post-processing step for classification. CAN improves classification accuracy for challenging examples by re-adjusting their predicted class probability distribution using the predicted class distributions of high-confidence validation examples. CAN is easily applicable to any probabilistic classifier, with minimal computation overhead. We analyze the properties of CAN using simulated experiments, and empirically demonstrate its effectiveness across a diverse set of classification tasks.</abstract>
       <url hash="01a7e57d">2021.findings-emnlp.148</url>
@@ -7698,7 +7698,7 @@
     <paper id="151">
       <title>Towards Developing a Multilingual and Code-Mixed Visual Question Answering System by Knowledge Distillation</title>
       <author><first>Humair</first><last>Raj Khan</last></author>
-      <author><first>Deepak</first><last>Gupta</last></author>
+      <author id="deepak-gupta"><first>Deepak</first><last>Gupta</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
       <pages>1753–1767</pages>
       <abstract>Pre-trained language-vision models have shown remarkable performance on the visual question answering (VQA) task. However, most pre-trained models are trained by only considering monolingual learning, especially the resource-rich language like English. Training such models for multilingual setups demand high computing resources and multilingual language-vision dataset which hinders their application in practice. To alleviate these challenges, we propose a knowledge distillation approach to extend an English language-vision model (teacher) into an equally effective multilingual and code-mixed model (student). Unlike the existing knowledge distillation methods, which only use the output from the last layer of the teacher network for distillation, our student model learns and imitates the teacher from multiple intermediate layers (language and vision encoders) with appropriately designed distillation objectives for incremental knowledge extraction. We also create the large-scale multilingual and code-mixed VQA dataset in eleven different language setups considering the multiple Indian and European languages. Experimental results and in-depth analysis show the effectiveness of the proposed VQA model over the pre-trained language-vision models on eleven diverse language setups.</abstract>
@@ -7713,7 +7713,7 @@
       <author><first>Fandong</first><last>Meng</last></author>
       <author><first>Jinchao</first><last>Zhang</last></author>
       <author><first>Yufeng</first><last>Chen</last></author>
-      <author><first>Jinan</first><last>Xu</last></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last></author>
       <author><first>Jie</first><last>Zhou</last></author>
       <pages>1768–1780</pages>
       <abstract>Aspect-based sentiment analysis (ABSA) mainly involves three subtasks: aspect term extraction, opinion term extraction, and aspect-level sentiment classification, which are typically handled in a separate or joint manner. However, previous approaches do not well exploit the interactive relations among three subtasks and do not pertinently leverage the easily available document-level labeled domain/sentiment knowledge, which restricts their performances. To address these issues, we propose a novel Iterative Multi-Knowledge Transfer Network (IMKTN) for end-to-end ABSA. For one thing, through the interactive correlations between the ABSA subtasks, our IMKTN transfers the task-specific knowledge from any two of the three subtasks to another one at the token level by utilizing a well-designed routing algorithm, that is, any two of the three subtasks will help the third one. For another, our IMKTN pertinently transfers the document-level knowledge, i.e., domain-specific and sentiment-related knowledge, to the aspect-level subtasks to further enhance the corresponding performance. Experimental results on three benchmark datasets demonstrate the effectiveness and superiority of our approach.</abstract>
@@ -7808,7 +7808,7 @@
       <author><first>Yu</first><last>Feng</last></author>
       <author><first>Jing</first><last>Zhang</last></author>
       <author><first>Gaole</first><last>He</last></author>
-      <author><first>Wayne Xin</first><last>Zhao</last></author>
+      <author id="wayne-xin-zhao"><first>Wayne Xin</first><last>Zhao</last></author>
       <author><first>Lemao</first><last>Liu</last></author>
       <author><first>Quan</first><last>Liu</last></author>
       <author><first>Cuiping</first><last>Li</last></author>
@@ -7880,7 +7880,7 @@
     <paper id="164">
       <title><fixed-case>C</fixed-case>ontract<fixed-case>NLI</fixed-case>: A Dataset for Document-level Natural Language Inference for Contracts</title>
       <author><first>Yuta</first><last>Koreeda</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <pages>1907–1919</pages>
       <abstract>Reviewing contracts is a time-consuming procedure that incurs large expenses to companies and social inequality to those who cannot afford it. In this work, we propose “document-level natural language inference (NLI) for contracts”, a novel, real-world application of NLI that addresses such problems. In this task, a system is given a set of hypotheses (such as “Some obligations of Agreement may survive termination.”) and a contract, and it is asked to classify whether each hypothesis is “entailed by”, “contradicting to” or “not mentioned by” (neutral to) the contract as well as identifying “evidence” for the decision as spans in the contract. We annotated and release the largest corpus to date consisting of 607 annotated contracts. We then show that existing models fail badly on our task and introduce a strong baseline, which (a) models evidence identification as multi-label classification over spans instead of trying to predict start and end tokens, and (b) employs more sophisticated context segmentation for dealing with long documents. We also show that linguistic characteristics of contracts, such as negations by exceptions, are contributing to the difficulty of this task and that there is much room for improvement.</abstract>
       <url hash="81b2a75c">2021.findings-emnlp.164</url>
@@ -7902,7 +7902,7 @@
     </paper>
     <paper id="166">
       <title>Grouped-Attention for Content-Selection and Content-Plan Generation</title>
-      <author><first>Bayu Distiawan</first><last>Trisedya</last></author>
+      <author id="bayu-distiawan"><first>Bayu Distiawan</first><last>Trisedya</last></author>
       <author><first>Xiaojie</first><last>Wang</last></author>
       <author><first>Jianzhong</first><last>Qi</last></author>
       <author><first>Rui</first><last>Zhang</last></author>
@@ -8000,8 +8000,8 @@
       <title>Say ‘<fixed-case>YES</fixed-case>’ to Positivity: Detecting Toxic Language in Workplace Communications</title>
       <author><first>Meghana Moorthy</first><last>Bhat</last></author>
       <author><first>Saghar</first><last>Hosseini</last></author>
-      <author><first>Ahmed Hassan</first><last>Awadallah</last></author>
-      <author><first>Paul</first><last>Bennett</last></author>
+      <author id="ahmed-hassan"><first>Ahmed Hassan</first><last>Awadallah</last></author>
+      <author id="paul-bennett"><first>Paul</first><last>Bennett</last></author>
       <author><first>Weisheng</first><last>Li</last></author>
       <pages>2017–2029</pages>
       <abstract>Workplace communication (e.g. email, chat, etc.) is a central part of enterprise productivity. Healthy conversations are crucial for creating an inclusive environment and maintaining harmony in an organization. Toxic communications at the workplace can negatively impact overall job satisfaction and are often subtle, hidden, or demonstrate human biases. The linguistic subtlety of mild yet hurtful conversations has made it difficult for researchers to quantify and extract toxic conversations automatically. While offensive language or hate speech has been extensively studied in social communities, there has been little work studying toxic communication in emails. Specifically, the lack of corpus, sparsity of toxicity in enterprise emails, and well-defined criteria for annotating toxic conversations have prevented researchers from addressing the problem at scale. We take the first step towards studying toxicity in workplace emails by providing (1) a general and computationally viable taxonomy to study toxic language at the workplace (2) a dataset to study toxic language at the workplace based on the taxonomy and (3) analysis on why offensive language and hate-speech datasets are not suitable to detect workplace toxicity.</abstract>
@@ -8029,7 +8029,7 @@
     <paper id="175">
       <title>Mitigating Data Scarceness through Data Synthesis, Augmentation and Curriculum for Abstractive Summarization</title>
       <author><first>Ahmed</first><last>Magooda</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <pages>2043–2052</pages>
       <abstract>This paper explores three simple data manipulation techniques (synthesis, augmentation, curriculum) for improving abstractive summarization models without the need for any additional data. We introduce a method of data synthesis with paraphrasing, a data augmentation technique with sample mixing, and curriculum learning with two new difficulty metrics based on specificity and abstractiveness. We conduct experiments to show that these three techniques can help improve abstractive summarization across two summarization models and two different small datasets. Furthermore, we show that these techniques can improve performance when applied in isolation and when combined.</abstract>
       <url hash="97f4ad27">2021.findings-emnlp.175</url>
@@ -8128,7 +8128,7 @@
       <author><first>Lingzhi</first><last>Wang</last></author>
       <author><first>Xingshan</first><last>Zeng</last></author>
       <author><first>Huang</first><last>Hu</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <author><first>Daxin</first><last>Jiang</last></author>
       <pages>2127–2137</pages>
       <abstract>In recent years, world business in online discussions and opinion sharing on social media is booming. Re-entry prediction task is thus proposed to help people keep track of the discussions which they wish to continue. Nevertheless, existing works only focus on exploiting chatting history and context information, and ignore the potential useful learning signals underlying conversation data, such as conversation thread patterns and repeated engagement of target users, which help better understand the behavior of target users in conversations. In this paper, we propose three interesting and well-founded auxiliary tasks, namely, Spread Pattern, Repeated Target user, and Turn Authorship, as the self-supervised signals for re-entry prediction. These auxiliary tasks are trained together with the main task in a multi-task manner. Experimental results on two datasets newly collected from Twitter and Reddit show that our method outperforms the previous state-of-the-arts with fewer parameters and faster convergence. Extensive experiments and analysis show the effectiveness of our proposed models and also point out some key ideas in designing self-supervised tasks.</abstract>
@@ -8419,7 +8419,7 @@
       <title>Wine is not v i n. On the Compatibility of Tokenizations across Languages</title>
       <author><first>Antonis</first><last>Maronikolakis</last></author>
       <author><first>Philipp</first><last>Dufter</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>2382–2399</pages>
       <abstract>The size of the vocabulary is a central design choice in large pretrained language models, with respect to both performance and memory requirements. Typically, subword tokenization algorithms such as byte pair encoding and WordPiece are used. In this work, we investigate the compatibility of tokenizations for multilingual static and contextualized embedding spaces and propose a measure that reflects the compatibility of tokenizations across languages. Our goal is to prevent incompatible tokenizations, e.g., “wine” (word-level) in English vs. “v i n” (character-level) in French, which make it hard to learn good multilingual semantic representations. We show that our compatibility measure allows the system designer to create vocabularies across languages that are compatible – a desideratum that so far has been neglected in multilingual models.</abstract>
       <url hash="27c0ce75">2021.findings-emnlp.205</url>
@@ -8430,7 +8430,7 @@
     <paper id="206">
       <title>Temporal Adaptation of <fixed-case>BERT</fixed-case> and Performance on Downstream Document Classification: Insights from Social Media</title>
       <author><first>Paul</first><last>Röttger</last></author>
-      <author><first>Janet</first><last>Pierrehumbert</last></author>
+      <author id="janet-pierrehumbert"><first>Janet</first><last>Pierrehumbert</last></author>
       <pages>2400–2412</pages>
       <abstract>Language use differs between domains and even within a domain, language use changes over time. For pre-trained language models like BERT, domain adaptation through continued pre-training has been shown to improve performance on in-domain downstream tasks. In this article, we investigate whether temporal adaptation can bring additional benefits. For this purpose, we introduce a corpus of social media comments sampled over three years. It contains unlabelled data for adaptation and evaluation on an upstream masked language modelling task as well as labelled data for fine-tuning and evaluation on a downstream document classification task. We find that temporality matters for both tasks: temporal adaptation improves upstream and temporal fine-tuning downstream task performance. Time-specific models generally perform better on past than on future test sets, which matches evidence on the bursty usage of topical words. However, adapting BERT to time and domain does not improve performance on the downstream task over only adapting to domain. Token-level analysis shows that temporal adaptation captures event-driven changes in language use in the downstream task, but not those changes that are actually relevant to task performance. Based on our findings, we discuss when temporal adaptation may be more effective.</abstract>
       <url hash="2323316c">2021.findings-emnlp.206</url>
@@ -8582,7 +8582,7 @@
     <paper id="218">
       <title>Probing Pre-trained Language Models for Semantic Attributes and their Values</title>
       <author><first>Meriem</first><last>Beloucif</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>2554–2559</pages>
       <abstract>Pretrained language models (PTLMs) yield state-of-the-art performance on many natural language processing tasks, including syntax, semantics and commonsense. In this paper, we focus on identifying to what extent do PTLMs capture semantic attributes and their values, e.g., the correlation between rich and high net worth. We use PTLMs to predict masked tokens using patterns and lists of items from Wikidata in order to verify how likely PTLMs encode semantic attributes along with their values. Such inferences based on semantics are intuitive for humans as part of our language understanding. Since PTLMs are trained on large amount of Wikipedia data we would assume that they can generate similar predictions, yet our findings reveal that PTLMs are still much worse than humans on this task. We show evidence and analysis explaining how to exploit our methodology to integrate better context and semantics into PTLMs using knowledge bases.</abstract>
       <url hash="0b10479f">2021.findings-emnlp.218</url>
@@ -8654,7 +8654,7 @@
     <paper id="224">
       <title>Subword Mapping and Anchoring across Languages</title>
       <author><first>Giorgos</first><last>Vernikos</last></author>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <pages>2633–2647</pages>
       <abstract>State-of-the-art multilingual systems rely on shared vocabularies that sufficiently cover all considered languages. To this end, a simple and frequently used approach makes use of subword vocabularies constructed jointly over several languages. We hypothesize that such vocabularies are suboptimal due to false positives (identical subwords with different meanings across languages) and false negatives (different subwords with similar meanings). To address these issues, we propose Subword Mapping and Anchoring across Languages (SMALA), a method to construct bilingual subword vocabularies. SMALA extracts subword alignments using an unsupervised state-of-the-art mapping technique and uses them to create cross-lingual anchors based on subword similarities. We demonstrate the benefits of SMALA for cross-lingual natural language inference (XNLI), where it improves zero-shot transfer to an unseen language without task-specific data, but only by sharing subword embeddings. Moreover, in neural machine translation, we show that joint subword vocabularies obtained with SMALA lead to higher BLEU scores on sentences that contain many false positives and false negatives.</abstract>
       <url hash="8cffc714">2021.findings-emnlp.224</url>
@@ -8667,7 +8667,7 @@
       <author><first>Avi</first><last>Caciularu</last></author>
       <author><first>Arman</first><last>Cohan</last></author>
       <author><first>Iz</first><last>Beltagy</last></author>
-      <author><first>Matthew</first><last>Peters</last></author>
+      <author id="matthew-e-peters"><first>Matthew</first><last>Peters</last></author>
       <author><first>Arie</first><last>Cattan</last></author>
       <author><first>Ido</first><last>Dagan</last></author>
       <pages>2648–2662</pages>
@@ -8680,7 +8680,7 @@
     <paper id="226">
       <title>Patterns of Polysemy and Homonymy in Contextualised Language Models</title>
       <author><first>Janosch</first><last>Haber</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>2663–2676</pages>
       <abstract>One of the central aspects of contextualised language models is that they should be able to distinguish the meaning of lexically ambiguous words by their contexts. In this paper we investigate the extent to which the contextualised embeddings of word forms that display multiplicity of sense reflect traditional distinctions of polysemy and homonymy. To this end, we introduce an extended, human-annotated dataset of graded word sense similarity and co-predication acceptability, and evaluate how well the similarity of embeddings predicts similarity in meaning. Both types of human judgements indicate that the similarity of polysemic interpretations falls in a continuum between identity of meaning and homonymy. However, we also observe significant differences within the similarity ratings of polysemes, forming consistent patterns for different types of polysemic sense alternation. Our dataset thus appears to capture a substantial part of the complexity of lexical ambiguity, and can provide a realistic test bed for contextualised embeddings. Among the tested models, BERT Large shows the strongest correlation with the collected word sense similarity ratings, but struggles to consistently replicate the observed similarity patterns. When clustering ambiguous word forms based on their embeddings, the model displays high confidence in discerning homonyms and some types of polysemic alternations, but consistently fails for others.</abstract>
       <url hash="d15713dc">2021.findings-emnlp.226</url>
@@ -8703,7 +8703,7 @@
     <paper id="228">
       <title>Controlled Neural Sentence-Level Reframing of News Articles</title>
       <author><first>Wei-Fan</first><last>Chen</last></author>
-      <author><first>Khalid</first><last>Al Khatib</last></author>
+      <author id="khalid-al-khatib"><first>Khalid</first><last>Al Khatib</last></author>
       <author><first>Benno</first><last>Stein</last></author>
       <author><first>Henning</first><last>Wachsmuth</last></author>
       <pages>2683–2693</pages>
@@ -8743,7 +8743,7 @@
       <title>Improving Numerical Reasoning Skills in the Modular Approach for Complex Question Answering on Text</title>
       <author><first>Xiao-Yu</first><last>Guo</last></author>
       <author><first>Yuan-Fang</first><last>Li</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>2713–2718</pages>
       <abstract>Numerical reasoning skills are essential for complex question answering (CQA) over text. It requires opertaions including counting, comparison, addition and subtraction. A successful approach to CQA on text, Neural Module Networks (NMNs), follows the programmer-interpreter paradigm and leverages specialised modules to perform compositional reasoning. However, the NMNs framework does not consider the relationship between numbers and entities in both questions and paragraphs. We propose effective techniques to improve NMNs’ numerical reasoning capabilities by making the interpreter question-aware and capturing the relationship between entities and numbers. On the same subset of the DROP dataset for CQA on text, experimental results show that our additions outperform the original NMNs by 3.0 points for the overall F1 score.</abstract>
       <url hash="60bb27bd">2021.findings-emnlp.231</url>
@@ -8754,8 +8754,8 @@
     </paper>
     <paper id="232">
       <title>Retrieval Augmented Code Generation and Summarization</title>
-      <author><first>Md Rizwan</first><last>Parvez</last></author>
-      <author><first>Wasi</first><last>Ahmad</last></author>
+      <author id="md-rizwan-parvez"><first>Md Rizwan</first><last>Parvez</last></author>
+      <author id="wasi-ahmad"><first>Wasi</first><last>Ahmad</last></author>
       <author><first>Saikat</first><last>Chakraborty</last></author>
       <author><first>Baishakhi</first><last>Ray</last></author>
       <author><first>Kai-Wei</first><last>Chang</last></author>
@@ -8833,9 +8833,9 @@
     <paper id="238">
       <title>Open-Domain Contextual Link Prediction and its Complementarity with Entailment Graphs</title>
       <author><first>Mohammad Javad</first><last>Hosseini</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <author><first>Mark</first><last>Johnson</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>2790–2802</pages>
       <abstract>An open-domain knowledge graph (KG) has entities as nodes and natural language relations as edges, and is constructed by extracting (subject, relation, object) triples from text. The task of open-domain link prediction is to infer missing relations in the KG. Previous work has used standard link prediction for the task. Since triples are extracted from text, we can ground them in the larger textual context in which they were originally found. However, standard link prediction methods only rely on the KG structure and ignore the textual context that each triple was extracted from. In this paper, we introduce the new task of open-domain contextual link prediction which has access to both the textual context and the KG structure to perform link prediction. We build a dataset for the task and propose a model for it. Our experiments show that context is crucial in predicting missing relations. We also demonstrate the utility of contextual link prediction in discovering context-independent entailments between relations, in the form of entailment graphs (EG), in which the nodes are the relations. The reverse holds too: context-independent EGs assist in predicting relations in context.</abstract>
       <url hash="a769052f">2021.findings-emnlp.238</url>
@@ -8889,8 +8889,8 @@
     <paper id="242">
       <title>“Be nice to your wife! The restaurants are closed”: Can Gender Stereotype Detection Improve Sexism Classification?</title>
       <author><first>Patricia</first><last>Chiril</last></author>
-      <author><first>Farah</first><last>Benamara</last></author>
-      <author><first>Véronique</first><last>Moriceau</last></author>
+      <author id="farah-benamara"><first>Farah</first><last>Benamara</last></author>
+      <author id="veronique-moriceau"><first>Véronique</first><last>Moriceau</last></author>
       <pages>2833–2844</pages>
       <abstract>In this paper, we focus on the detection of sexist hate speech against women in tweets studying for the first time the impact of gender stereotype detection on sexism classification. We propose: (1) the first dataset annotated for gender stereotype detection, (2) a new method for data augmentation based on sentence similarity with multilingual external datasets, and (3) a set of deep learning experiments first to detect gender stereotypes and then, to use this auxiliary task for sexism detection. Although the presence of stereotypes does not necessarily entail hateful content, our results show that sexism classification can definitively benefit from gender stereotype detection.</abstract>
       <url hash="6fd9bfd3">2021.findings-emnlp.242</url>
@@ -8901,7 +8901,7 @@
     <paper id="243">
       <title>Automatic Discrimination between Inherited and Borrowed <fixed-case>L</fixed-case>atin Words in <fixed-case>R</fixed-case>omance Languages</title>
       <author><first>Alina Maria</first><last>Cristea</last></author>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <author><first>Simona</first><last>Georgescu</last></author>
       <author><first>Mihnea-Lucian</first><last>Mihai</last></author>
       <author><first>Ana Sabina</first><last>Uban</last></author>
@@ -8930,8 +8930,8 @@
       <title>Knowledge-Interactive Network with Sentiment Polarity Intensity-Aware Multi-Task Learning for Emotion Recognition in Conversations</title>
       <author><first>Yunhe</first><last>Xie</last></author>
       <author><first>Kailai</first><last>Yang</last></author>
-      <author><first>Chengjie</first><last>Sun</last></author>
-      <author><first>Bingquan</first><last>Liu</last></author>
+      <author id="cheng-jie-sun"><first>Chengjie</first><last>Sun</last></author>
+      <author id="bingquan-liu"><first>Bingquan</first><last>Liu</last></author>
       <author><first>Zhenzhou</first><last>Ji</last></author>
       <pages>2879–2889</pages>
       <abstract>Emotion Recognition in Conversation (ERC) has gained much attention from the NLP community recently. Some models concentrate on leveraging commonsense knowledge or multi-task learning to help complicated emotional reasoning. However, these models neglect direct utterance-knowledge interaction. In addition, these models utilize emotion-indirect auxiliary tasks, which provide limited affective information for the ERC task. To address the above issues, we propose a Knowledge-Interactive Network with sentiment polarity intensity-aware multi-task learning, namely KI-Net, which leverages both commonsense knowledge and sentiment lexicon to augment semantic information. Specifically, we use a self-matching module for internal utterance-knowledge interaction. Considering correlations with the ERC task, a phrase-level Sentiment Polarity Intensity Prediction (SPIP) task is devised as an auxiliary task. Experiments show that all knowledge integration, self-matching and SPIP modules improve the model performance respectively on three datasets. Moreover, our KI-Net model shows 1.04% performance improvement over the state-of-the-art model on the IEMOCAP dataset.</abstract>
@@ -8972,9 +8972,9 @@
       <author><first>Kaiyu</first><last>Huang</last></author>
       <author><first>Hao</first><last>Yu</last></author>
       <author><first>Junpeng</first><last>Liu</last></author>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last></author>
+      <author><first>Wei</first><last>Liu</last></author>
       <author><first>Jingxiang</first><last>Cao</last></author>
-      <author><first>Degen</first><last>Huang</last></author>
+      <author id="degen-huang"><first>Degen</first><last>Huang</last></author>
       <pages>2908–2917</pages>
       <abstract>Precise information of word boundary can alleviate the problem of lexical ambiguity to improve the performance of natural language processing (NLP) tasks. Thus, Chinese word segmentation (CWS) is a fundamental task in NLP. Due to the development of pre-trained language models (PLM), pre-trained knowledge can help neural methods solve the main problems of the CWS in significant measure. Existing methods have already achieved high performance on several benchmarks (e.g., Bakeoff-2005). However, recent outstanding studies are limited by the small-scale annotated corpus. To further improve the performance of CWS methods based on fine-tuning the PLMs, we propose a novel neural framework, LBGCN, which incorporates a lexicon-based graph convolutional network into the Transformer encoder. Experimental results on five benchmarks and four cross-domain datasets show the lexicon-based graph convolutional network successfully captures the information of candidate words and helps to improve performance on the benchmarks (Bakeoff-2005 and CTB6) and the cross-domain datasets (SIGHAN-2010). Further experiments and analyses demonstrate that our proposed framework effectively models the lexicon to enhance the ability of basic neural frameworks and strengthens the robustness in the cross-domain scenario.</abstract>
       <url hash="f8312754">2021.findings-emnlp.248</url>
@@ -8989,7 +8989,7 @@
       <author><first>Yeyun</first><last>Gong</last></author>
       <author><first>Jian</first><last>Jiao</last></author>
       <author><first>Ruofei</first><last>Zhang</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Nan</first><last>Duan</last></author>
       <pages>2918–2928</pages>
       <abstract>Pre-trained language models have led to substantial gains over a broad range of natural language processing (NLP) tasks, but have been shown to have limitations for natural language generation tasks with high-quality requirements on the output, such as commonsense generation and ad keyword generation. In this work, we present a novel Knowledge Filtering and Contrastive learning Network (KFCNet) which references external knowledge and achieves better generation performance. Specifically, we propose a BERT-based filter model to remove low-quality candidates, and apply contrastive learning separately to each of the encoder and decoder, within a general encoder–decoder architecture. The encoder contrastive module helps to capture global target semantics during encoding, and the decoder contrastive module enhances the utility of retrieved prototypes while learning general features. Extensive experiments on the CommonGen benchmark show that our model outperforms the previous state of the art by a large margin: +6.6 points (42.5 vs. 35.9) for BLEU-4, +3.7 points (33.3 vs. 29.6) for SPICE, and +1.3 points (18.3 vs. 17.0) for CIDEr. We further verify the effectiveness of the proposed contrastive module on ad keyword generation, and show that our model has potential commercial value.</abstract>
@@ -9040,7 +9040,7 @@
       <author><first>Matthew</first><last>Matero</last></author>
       <author><first>Nikita</first><last>Soni</last></author>
       <author><first>Niranjan</first><last>Balasubramanian</last></author>
-      <author><first>H. Andrew</first><last>Schwartz</last></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last></author>
       <pages>2959–2966</pages>
       <abstract>Much of natural language processing is focused on leveraging large capacity language models, typically trained over single messages with a task of predicting one or more tokens. However, modeling human language at higher-levels of context (i.e., sequences of messages) is under-explored. In stance detection and other social media tasks where the goal is to predict an attribute of a message, we have contextual data that is loosely semantically connected by authorship. Here, we introduce Message-Level Transformer (MeLT) – a hierarchical message-encoder pre-trained over Twitter and applied to the task of stance prediction. We focus on stance prediction as a task benefiting from knowing the context of the message (i.e., the sequence of previous messages). The model is trained using a variant of masked-language modeling; where instead of predicting tokens, it seeks to generate an entire masked (aggregated) message vector via reconstruction loss. We find that applying this pre-trained masked message-level transformer to the downstream task of stance detection achieves F1 performance of 67%.</abstract>
       <url hash="3309b723">2021.findings-emnlp.253</url>
@@ -9090,7 +9090,7 @@
     <paper id="257">
       <title>Argumentation-Driven Evidence Association in Criminal Cases</title>
       <author><first>Yefei</first><last>Teng</last></author>
-      <author><first>WenHan</first><last>Chao</last></author>
+      <author id="wenhan-chao"><first>WenHan</first><last>Chao</last></author>
       <pages>2997–3001</pages>
       <abstract>Evidence association in criminal cases is dividing a set of judicial evidence into several non-overlapping subsets, improving the interpretability and legality of conviction. Observably, evidence divided into the same subset usually supports the same claim. Therefore, we propose an argumentation-driven supervised learning method to calculate the distance between evidence pairs for the following evidence association step in this paper. Experimental results on a real-world dataset demonstrate the effectiveness of our method.</abstract>
       <url hash="7e2885d3">2021.findings-emnlp.257</url>
@@ -9185,9 +9185,9 @@
       <author><first>Yohan</first><last>Jo</last></author>
       <author><first>Haneul</first><last>Yoo</last></author>
       <author><first>JinYeong</first><last>Bak</last></author>
-      <author><first>Alice</first><last>Oh</last></author>
-      <author><first>Chris</first><last>Reed</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last></author>
+      <author id="chris-reed"><first>Chris</first><last>Reed</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>3074–3094</pages>
       <abstract>Finding counterevidence to statements is key to many tasks, including counterargument generation. We build a system that, given a statement, retrieves counterevidence from diverse sources on the Web. At the core of this system is a natural language inference (NLI) model that determines whether a candidate sentence is valid counterevidence or not. Most NLI models to date, however, lack proper reasoning abilities necessary to find counterevidence that involves complex inference. Thus, we present a knowledge-enhanced NLI model that aims to handle causality- and example-based inference by incorporating knowledge graphs. Our NLI model outperforms baselines for NLI tasks, especially for instances that require the targeted inference. In addition, this NLI model further improves the counterevidence retrieval system, notably finding complex counterevidence better.</abstract>
       <url hash="458c573b">2021.findings-emnlp.264</url>
@@ -9211,7 +9211,7 @@
       <author><first>Hwiyeol</first><last>Jo</last></author>
       <author><first>Dongyeop</first><last>Kang</last></author>
       <author><first>Andrew</first><last>Head</last></author>
-      <author><first>Marti A.</first><last>Hearst</last></author>
+      <author id="marti-a-hearst"><first>Marti A.</first><last>Hearst</last></author>
       <pages>3102–3115</pages>
       <abstract>Natural language models often fall short when understanding and generating mathematical notation. What is not clear is whether these shortcomings are due to fundamental limitations of the models, or the absence of appropriate tasks. In this paper, we explore the extent to which natural language models can learn semantics between mathematical notation and their surrounding text. We propose two notation prediction tasks, and train a model that selectively masks notation tokens and encodes left and/or right sentences as context. Compared to baseline models trained by masked language modeling, our method achieved significantly better performance at the two tasks, showing that this approach is a good first step towards modeling mathematical texts. However, the current models rarely predict unseen symbols correctly, and token-level predictions are more accurate than symbol-level predictions, indicating more work is needed to represent structural patterns. Based on the results, we suggest future works toward modeling mathematical texts.</abstract>
       <url hash="744ddaf5">2021.findings-emnlp.266</url>
@@ -9223,7 +9223,7 @@
       <title>Unpacking the Interdependent Systems of Discrimination: Ableist Bias in <fixed-case>NLP</fixed-case> Systems through an Intersectional Lens</title>
       <author><first>Saad</first><last>Hassan</last></author>
       <author><first>Matt</first><last>Huenerfauth</last></author>
-      <author><first>Cecilia Ovesdotter</first><last>Alm</last></author>
+      <author id="cecilia-ovesdotter-alm"><first>Cecilia Ovesdotter</first><last>Alm</last></author>
       <pages>3116–3123</pages>
       <abstract>Much of the world’s population experiences some form of disability during their lifetime. Caution must be exercised while designing natural language processing (NLP) systems to prevent systems from inadvertently perpetuating ableist bias against people with disabilities, i.e., prejudice that favors those with typical abilities. We report on various analyses based on word predictions of a large-scale BERT language model. Statistically significant results demonstrate that people with disabilities can be disadvantaged. Findings also explore overlapping forms of discrimination related to interconnected gender and race identities.</abstract>
       <url hash="8ced3576">2021.findings-emnlp.267</url>
@@ -9266,7 +9266,7 @@
       <author><first>Ahmad</first><last>Rashid</last></author>
       <author><first>Mehdi</first><last>Rezagholizadeh</last></author>
       <author><first>Ali</first><last>Ghodsi</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <pages>3145–3152</pages>
       <abstract>Knowledge Distillation (KD) is extensively used in Natural Language Processing to compress the pre-training and task-specific fine-tuning phases of large neural language models. A student model is trained to minimize a convex combination of the prediction loss over the labels and another over the teacher output. However, most existing works either fix the interpolating weight between the two losses apriori or vary the weight using heuristics. In this work, we propose a novel sample-wise loss weighting method, RW-KD. A meta-learner, simultaneously trained with the student, adaptively re-weights the two losses for each sample. We demonstrate, on 7 datasets of the GLUE benchmark, that RW-KD outperforms other loss re-weighting methods for KD.</abstract>
       <url hash="36823d65">2021.findings-emnlp.270</url>
@@ -9289,7 +9289,7 @@
     <paper id="272">
       <title>Beyond the Tip of the Iceberg: Assessing Coherence of Text Classifiers</title>
       <author><first>Shane</first><last>Storks</last></author>
-      <author><first>Joyce</first><last>Chai</last></author>
+      <author id="joyce-chai"><first>Joyce</first><last>Chai</last></author>
       <pages>3169–3177</pages>
       <abstract>As large-scale, pre-trained language models achieve human-level and superhuman accuracy on existing language understanding tasks, statistical bias in benchmark data and probing studies have recently called into question their true capabilities. For a more informative evaluation than accuracy on text classification tasks can offer, we propose evaluating systems through a novel measure of prediction coherence. We apply our framework to two existing language understanding benchmarks with different properties to demonstrate its versatility. Our experimental results show that this evaluation framework, although simple in ideas and implementation, is a quick, effective, and versatile measure to provide insight into the coherence of machines’ predictions.</abstract>
       <url hash="3fd96d3a">2021.findings-emnlp.272</url>
@@ -9300,8 +9300,8 @@
     <paper id="273">
       <title>Does Pretraining for Summarization Require Knowledge Transfer?</title>
       <author><first>Kundan</first><last>Krishna</last></author>
-      <author><first>Jeffrey</first><last>Bigham</last></author>
-      <author><first>Zachary C.</first><last>Lipton</last></author>
+      <author id="jeffrey-p-bigham"><first>Jeffrey</first><last>Bigham</last></author>
+      <author id="zachary-c-lipton"><first>Zachary C.</first><last>Lipton</last></author>
       <pages>3178–3189</pages>
       <abstract>Pretraining techniques leveraging enormous datasets have driven recent advances in text summarization. While folk explanations suggest that knowledge transfer accounts for pretraining’s benefits, little is known about why it works or what makes a pretraining task or dataset suitable. In this paper, we challenge the knowledge transfer story, showing that pretraining on documents consisting of character n-grams selected at random, we can nearly match the performance of models pretrained on real corpora. This work holds the promise of eliminating upstream corpora, which may alleviate some concerns over offensive language, bias, and copyright issues. To see whether the small residual benefit of using real data could be accounted for by the structure of the pretraining task, we design several tasks motivated by a qualitative study of summarization corpora. However, these tasks confer no appreciable benefit, leaving open the possibility of a small role for knowledge transfer.</abstract>
       <url hash="fa89b517">2021.findings-emnlp.273</url>
@@ -9313,7 +9313,7 @@
       <title>Bandits Don’t Follow Rules: Balancing Multi-Facet Machine Translation with Multi-Armed Bandits</title>
       <author><first>Julia</first><last>Kreutzer</last></author>
       <author><first>David</first><last>Vilar</last></author>
-      <author><first>Artem</first><last>Sokolov</last></author>
+      <author id="artem-sokolov"><first>Artem</first><last>Sokolov</last></author>
       <pages>3190–3204</pages>
       <abstract>Training data for machine translation (MT) is often sourced from a multitude of large corpora that are multi-faceted in nature, e.g. containing contents from multiple domains or different levels of quality or complexity. Naturally, these facets do not occur with equal frequency, nor are they equally important for the test scenario at hand. In this work, we propose to optimize this balance jointly with MT model parameters to relieve system developers from manual schedule design. A multi-armed bandit is trained to dynamically choose between facets in a way that is most beneficial for the MT system. We evaluate it on three different multi-facet applications: balancing translationese and natural training data, or data from multiple domains or multiple language pairs. We find that bandit learning leads to competitive MT systems across tasks, and our analysis provides insights into its learned strategies and the underlying data sets.</abstract>
       <url hash="4bca2ba4">2021.findings-emnlp.274</url>
@@ -9349,7 +9349,7 @@
       <title><fixed-case>S</fixed-case>ci<fixed-case>C</fixed-case>ap: Generating Captions for Scientific Figures</title>
       <author><first>Ting-Yao</first><last>Hsu</last></author>
       <author><first>C Lee</first><last>Giles</last></author>
-      <author><first>Ting-Hao</first><last>Huang</last></author>
+      <author id="ting-hao-huang"><first>Ting-Hao</first><last>Huang</last></author>
       <pages>3258–3264</pages>
       <abstract>Researchers use figures to communicate rich, complex information in scientific papers. The captions of these figures are critical to conveying effective messages. However, low-quality figure captions commonly occur in scientific articles and may decrease understanding. In this paper, we propose an end-to-end neural framework to automatically generate informative, high-quality captions for scientific figures. To this end, we introduce SCICAP, a large-scale figure-caption dataset based on computer science arXiv papers published between 2010 and 2020. After pre-processing – including figure-type classification, sub-figure identification, text normalization, and caption text selection – SCICAP contained more than two million figures extracted from over 290,000 papers. We then established baseline models that caption graph plots, the dominant (19.2%) figure type. The experimental results showed both opportunities and steep challenges of generating captions for scientific figures.</abstract>
       <url hash="49dc1296">2021.findings-emnlp.277</url>
@@ -9520,7 +9520,7 @@
       <title>m<fixed-case>DAPT</fixed-case>: Multilingual Domain Adaptive Pretraining in a Single Model</title>
       <author><first>Rasmus</first><last>Kær Jørgensen</last></author>
       <author><first>Mareike</first><last>Hartmann</last></author>
-      <author><first>Xiang</first><last>Dai</last></author>
+      <author id="xiang-dai"><first>Xiang</first><last>Dai</last></author>
       <author><first>Desmond</first><last>Elliott</last></author>
       <pages>3404–3418</pages>
       <abstract>Domain adaptive pretraining, i.e. the continued unsupervised pretraining of a language model on domain-specific text, improves the modelling of text for downstream tasks within the domain. Numerous real-world applications are based on domain-specific text, e.g. working with financial or biomedical documents, and these applications often need to support multiple languages. However, large-scale domain-specific multilingual pretraining data for such scenarios can be difficult to obtain, due to regulations, legislation, or simply a lack of language- and domain-specific text. One solution is to train a single multilingual model, taking advantage of the data available in as many languages as possible. In this work, we explore the benefits of domain adaptive pretraining with a focus on adapting to multiple languages within a specific domain. We propose different techniques to compose pretraining corpora that enable a language model to both become domain-specific and multilingual. Evaluation on nine domain-specific datasets—for biomedical named entity recognition and financial sentence classification—covering seven different languages show that a single multilingual domain-specific model can outperform the general multilingual model, and performs close to its monolingual counterpart. This finding holds across two different pretraining methods, adapter-based pretraining and full model pretraining.</abstract>
@@ -9578,7 +9578,7 @@
       <title><fixed-case>AS</fixed-case>titch<fixed-case>I</fixed-case>n<fixed-case>L</fixed-case>anguage<fixed-case>M</fixed-case>odels: Dataset and Methods for the Exploration of Idiomaticity in Pre-Trained Language Models</title>
       <author><first>Harish</first><last>Tayyar Madabushi</last></author>
       <author><first>Edward</first><last>Gow-Smith</last></author>
-      <author><first>Carolina</first><last>Scarton</last></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last></author>
       <author><first>Aline</first><last>Villavicencio</last></author>
       <pages>3464–3477</pages>
       <abstract>Despite their success in a variety of NLP tasks, pre-trained language models, due to their heavy reliance on compositionality, fail in effectively capturing the meanings of multiword expressions (MWEs), especially idioms. Therefore, datasets and methods to improve the representation of MWEs are urgently needed. Existing datasets are limited to providing the degree of idiomaticity of expressions along with the literal and, where applicable, (a single) non-literal interpretation of MWEs. This work presents a novel dataset of naturally occurring sentences containing MWEs manually classified into a fine-grained set of meanings, spanning both English and Portuguese. We use this dataset in two tasks designed to test i) a language model’s ability to detect idiom usage, and ii) the effectiveness of a language model in generating representations of sentences containing idioms. Our experiments demonstrate that, on the task of detecting idiomatic usage, these models perform reasonably well in the one-shot and few-shot scenarios, but that there is significant scope for improvement in the zero-shot scenario. On the task of representing idiomaticity, we find that pre-training is not always effective, while fine-tuning could provide a sample efficient method of learning representations of sentences containing MWEs.</abstract>
@@ -9602,7 +9602,7 @@
     </paper>
     <paper id="296">
       <title>A Computational Exploration of Pejorative Language in Social Media</title>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <author><first>Ioan-Bogdan</first><last>Iordache</last></author>
       <author><first>Ana Sabina</first><last>Uban</last></author>
       <author><first>Marcos</first><last>Zampieri</last></author>
@@ -9668,7 +9668,7 @@
     <paper id="301">
       <title>Disentangling Generative Factors in Natural Language with Discrete Variational Autoencoders</title>
       <author><first>Giangiacomo</first><last>Mercatali</last></author>
-      <author><first>André</first><last>Freitas</last></author>
+      <author id="andre-freitas"><first>André</first><last>Freitas</last></author>
       <pages>3547–3556</pages>
       <abstract>The ability of learning disentangled representations represents a major step for interpretable NLP systems as it allows latent linguistic features to be controlled. Most approaches to disentanglement rely on continuous variables, both for images and text. We argue that despite being suitable for image datasets, continuous variables may not be ideal to model features of textual data, due to the fact that most generative factors in text are discrete. We propose a Variational Autoencoder based method which models language features as discrete variables and encourages independence between variables for learning disentangled representations. The proposed model outperforms continuous and discrete baselines on several qualitative and quantitative benchmarks for disentanglement as well as on a text style transfer downstream application.</abstract>
       <url hash="ee57fdab">2021.findings-emnlp.301</url>
@@ -9694,10 +9694,10 @@
     <paper id="303">
       <title>Do <fixed-case>UD</fixed-case> Trees Match Mention Spans in Coreference Annotations?</title>
       <author><first>Martin</first><last>Popel</last></author>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
       <author><first>Anna</first><last>Nedoluzhko</last></author>
       <author><first>Michal</first><last>Novák</last></author>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <pages>3570–3576</pages>
       <abstract>One can find dozens of data resources for various languages in which coreference - a relation between two or more expressions that refer to the same real-world entity - is manually annotated. One could also assume that such expressions usually constitute syntactically meaningful units; however, mention spans have been annotated simply by delimiting token intervals in most coreference projects, i.e., independently of any syntactic representation. We argue that it could be advantageous to make syntactic and coreference annotations convergent in the long term. We present a pilot empirical study focused on matches and mismatches between hand-annotated linear mention spans and automatically parsed syntactic trees that follow Universal Dependencies conventions. The study covers 9 datasets for 8 different languages.</abstract>
       <url hash="84b51eef">2021.findings-emnlp.303</url>
@@ -9712,7 +9712,7 @@
       <author><first>Ankur</first><last>Bapna</last></author>
       <author><first>Maxim</first><last>Krikun</last></author>
       <author><first>Dmitry</first><last>Lepikhin</last></author>
-      <author><first>Minh-Thang</first><last>Luong</last></author>
+      <author id="minh-thang-luong"><first>Minh-Thang</first><last>Luong</last></author>
       <author><first>Orhan</first><last>Firat</last></author>
       <pages>3577–3599</pages>
       <abstract>Sparse Mixture-of-Experts (MoE) has been a successful approach for scaling multilingual translation models to billions of parameters without a proportional increase in training computation. However, MoE models are prohibitively large and practitioners often resort to methods such as distillation for serving. In this work, we investigate routing strategies at different granularity (token, sentence, task) in MoE models to bypass distillation. Experiments on WMT and a web-scale dataset suggest that task-level routing (task-MoE) enables us to extract smaller, ready-to-deploy sub-networks from large sparse models. On WMT, our task-MoE with 32 experts (533M parameters) outperforms the best performing token-level MoE model (token-MoE) by +1.0 BLEU on average across 30 language pairs. The peak inference throughput is also improved by a factor of 1.9x when we route by tasks instead of tokens. While distilling a token-MoE to a smaller dense model preserves only 32% of the BLEU gains, our sub-network task-MoE, by design, preserves all the gains with the same inference cost as the distilled student model. Finally, when scaling up to 200 language pairs, our 128-expert task-MoE (13B parameters) performs competitively with a token-level counterpart, while improving the peak inference throughput by a factor of 2.6x.</abstract>
@@ -9822,7 +9822,7 @@
     <paper id="312">
       <title>Post-Editing Extractive Summaries by Definiteness Prediction</title>
       <author><first>Jad</first><last>Kabbara</last></author>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <pages>3682–3692</pages>
       <abstract>Extractive summarization has been the mainstay of automatic summarization for decades. Despite all the progress, extractive summarizers still suffer from shortcomings including coreference issues arising from extracting sentences away from their original context in the source document. This affects the coherence and readability of extractive summaries. In this work, we propose a lightweight post-editing step for extractive summaries that centers around a single linguistic decision: the definiteness of noun phrases. We conduct human evaluation studies that show that human expert judges substantially prefer the output of our proposed system over the original summaries. Moreover, based on an automatic evaluation study, we provide evidence for our system’s ability to generate linguistic decisions that lead to improved extractive summaries. We also draw insights about how the automatic system is exploiting some local cues related to the writing style of the main article texts or summary texts to make the decisions, rather than reasoning about the contexts pragmatically.</abstract>
       <url hash="93a4ed29">2021.findings-emnlp.312</url>
@@ -9838,7 +9838,7 @@
       <author><first>Vasudevan</first><last>Jagannathan</last></author>
       <author><first>Hamid Reza</first><last>Hassanzadeh</last></author>
       <author><first>Thomas</first><last>Schaaf</last></author>
-      <author><first>Matthew R.</first><last>Gormley</last></author>
+      <author id="matthew-r-gormley"><first>Matthew R.</first><last>Gormley</last></author>
       <pages>3693–3712</pages>
       <abstract>Fine-tuning pretrained models for automatically summarizing doctor-patient conversation transcripts presents many challenges: limited training data, significant domain shift, long and noisy transcripts, and high target summary variability. In this paper, we explore the feasibility of using pretrained transformer models for automatically summarizing doctor-patient conversations directly from transcripts. We show that fluent and adequate summaries can be generated with limited training data by fine-tuning BART on a specially constructed dataset. The resulting models greatly surpass the performance of an average human annotator and the quality of previous published work for the task. We evaluate multiple methods for handling long conversations, comparing them to the obvious baseline of truncating the conversation to fit the pretrained model length limit. We introduce a multistage approach that tackles the task by learning two fine-tuned models: one for summarizing conversation chunks into partial summaries, followed by one for rewriting the collection of partial summaries into a complete summary. Using a carefully chosen fine-tuning dataset, this method is shown to be effective at handling longer conversations, improving the quality of generated summaries. We conduct both an automatic evaluation (through ROUGE and two concept-based metrics focusing on medical findings) and a human evaluation (through qualitative examples from literature, assessing hallucination, generalization, fluency, and general quality of the generated summaries).</abstract>
       <url hash="e5380e4e">2021.findings-emnlp.313</url>
@@ -9862,7 +9862,7 @@
       <author><first>Denis</first><last>Peskov</last></author>
       <author><first>Viktor</first><last>Hangya</last></author>
       <author><first>Jordan</first><last>Boyd-Graber</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>3725–3750</pages>
       <abstract>How would you explain Bill Gates to a German? He is associated with founding a company in the United States, so perhaps the German founder Carl Benz could stand in for Gates in those contexts. This type of translation is called adaptation in the translation community. Until now, this task has not been done computationally. Automatic adaptation could be used in natural language processing for machine translation and indirectly for generating new question answering datasets and education. We propose two automatic methods and compare them to human results for this novel NLP task. First, a structured knowledge base adapts named entities using their shared properties. Second, vector-arithmetic and orthogonal embedding mappings methods identify better candidates, but at the expense of interpretable features. We evaluate our methods through a new dataset of human adaptations.</abstract>
       <url hash="9c3db774">2021.findings-emnlp.315</url>
@@ -9897,7 +9897,7 @@
     <paper id="318">
       <title>Sequence-to-Lattice Models for Fast Translation</title>
       <author><first>Yuntian</first><last>Deng</last></author>
-      <author><first>Alexander</first><last>Rush</last></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last></author>
       <pages>3765–3772</pages>
       <abstract>Non-autoregressive machine translation (NAT) approaches enable fast generation by utilizing parallelizable generative processes. The remaining bottleneck in these models is their decoder layers; unfortunately unlike in autoregressive models (Kasai et al., 2020), removing decoder layers from NAT models significantly degrades accuracy. This work proposes a sequence-to-lattice model that replaces the decoder with a search lattice. Our approach first constructs a candidate lattice using efficient lookup operations, generates lattice scores from a deep encoder, and finally finds the best path using dynamic programming. Experiments on three machine translation datasets show that our method is faster than past non-autoregressive generation approaches, and more accurate than naively reducing the number of decoder layers.</abstract>
       <url hash="63d3ab31">2021.findings-emnlp.318</url>
@@ -9956,7 +9956,7 @@
       <title>Searching for More Efficient Dynamic Programs</title>
       <author><first>Tim</first><last>Vieira</last></author>
       <author><first>Ryan</first><last>Cotterell</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>3812–3830</pages>
       <abstract>Computational models of human language often involve combinatorial problems. For instance, a probabilistic parser may marginalize over exponentially many trees to make predictions. Algorithms for such problems often employ dynamic programming and are not always unique. Finding one with optimal asymptotic runtime can be unintuitive, time-consuming, and error-prone. Our work aims to automate this laborious process. Given an initial correct declarative program, we search for a sequence of semantics-preserving transformations to improve its running time as much as possible. To this end, we describe a set of program transformations, a simple metric for assessing the efficiency of a transformed program, and a heuristic search procedure to improve this metric. We show that in practice, automated search—like the mental search performed by human programmers—can find substantial improvements to the initial program. Empirically, we show that many speed-ups described in the NLP literature could have been discovered automatically by our system.</abstract>
       <url hash="c9c3f401">2021.findings-emnlp.322</url>
@@ -10031,7 +10031,7 @@
       <title><fixed-case>C</fixed-case>onvex <fixed-case>A</fixed-case>ggregation for <fixed-case>O</fixed-case>pinion <fixed-case>S</fixed-case>ummarization</title>
       <author><first>Hayate</first><last>Iso</last></author>
       <author><first>Xiaolan</first><last>Wang</last></author>
-      <author><first>Yoshihiko</first><last>Suhara</last></author>
+      <author id="yoshi-suhara"><first>Yoshihiko</first><last>Suhara</last></author>
       <author><first>Stefanos</first><last>Angelidis</last></author>
       <author><first>Wang-Chiew</first><last>Tan</last></author>
       <pages>3885–3903</pages>
@@ -10062,7 +10062,7 @@
       <author><first>Taisiya</first><last>Glushkova</last></author>
       <author><first>Chrysoula</first><last>Zerva</last></author>
       <author><first>Ricardo</first><last>Rei</last></author>
-      <author><first>André F. T.</first><last>Martins</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
       <pages>3920–3938</pages>
       <abstract>Several neural-based metrics have been recently proposed to evaluate machine translation quality. However, all of them resort to point estimates, which provide limited information at segment level. This is made worse as they are trained on noisy, biased and scarce human judgements, often resulting in unreliable quality predictions. In this paper, we introduce uncertainty-aware MT evaluation and analyze the trustworthiness of the predicted quality. We combine the COMET framework with two uncertainty estimation methods, Monte Carlo dropout and deep ensembles, to obtain quality scores along with confidence intervals. We compare the performance of our uncertainty-aware MT evaluation methods across multiple language pairs from the QT21 dataset and the WMT20 metrics task, augmented with MQM annotations. We experiment with varying numbers of references and further discuss the usefulness of uncertainty-aware quality estimation (without references) to flag possibly critical translation mistakes.</abstract>
       <url hash="91ac62a5">2021.findings-emnlp.330</url>
@@ -10099,8 +10099,8 @@
     <paper id="333">
       <title>Benchmarking Meta-embeddings: What Works and What Does Not</title>
       <author><first>Iker</first><last>García-Ferrero</last></author>
-      <author><first>Rodrigo</first><last>Agerri</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="rodrigo-agerri"><first>Rodrigo</first><last>Agerri</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <pages>3957–3972</pages>
       <abstract>In the last few years, several methods have been proposed to build meta-embeddings. The general aim was to obtain new representations integrating complementary knowledge from different source pre-trained embeddings thereby improving their overall quality. However, previous meta-embeddings have been evaluated using a variety of methods and datasets, which makes it difficult to draw meaningful conclusions regarding the merits of each approach. In this paper we propose a unified common framework, including both intrinsic and extrinsic tasks, for a fair and objective meta-embeddings evaluation. Furthermore, we present a new method to generate meta-embeddings, outperforming previous work on a large number of intrinsic evaluation benchmarks. Our evaluation framework also allows us to conclude that previous extrinsic evaluations of meta-embeddings have been overestimated.</abstract>
       <url hash="d0f7cded">2021.findings-emnlp.333</url>
@@ -10143,7 +10143,7 @@
       <author><first>Eric</first><last>Chang</last></author>
       <author><first>Amilcare</first><last>Gentili</last></author>
       <author><first>Julian</first><last>McAuley</last></author>
-      <author><first>Chun-Nan</first><last>Hsu</last></author>
+      <author id="chun-nan-hsu"><first>Chun-Nan</first><last>Hsu</last></author>
       <pages>4009–4015</pages>
       <abstract>Radiology report generation aims at generating descriptive text from radiology images automatically, which may present an opportunity to improve radiology reporting and interpretation. A typical setting consists of training encoder-decoder models on image-report pairs with a cross entropy loss, which struggles to generate informative sentences for clinical diagnoses since normal findings dominate the datasets. To tackle this challenge and encourage more clinically-accurate text outputs, we propose a novel weakly supervised contrastive loss for medical report generation. Experimental results demonstrate that our method benefits from contrasting target reports with incorrect but semantically-close ones. It outperforms previous work on both clinical correctness and text generation metrics for two public benchmarks.</abstract>
       <url hash="572bb80b">2021.findings-emnlp.336</url>
@@ -10153,7 +10153,7 @@
     </paper>
     <paper id="337">
       <title><fixed-case>NUANCED</fixed-case>: Natural Utterance Annotation for Nuanced Conversation with Estimated Distributions</title>
-      <author id="zhiyu-chen"><first>Zhiyu</first><last>Chen</last></author>
+      <author><first>Zhiyu</first><last>Chen</last></author>
       <author><first>Honglei</first><last>Liu</last></author>
       <author><first>Hu</first><last>Xu</last></author>
       <author><first>Seungwhan</first><last>Moon</last></author>
@@ -10193,7 +10193,7 @@
       <author><first>Hengchang</first><last>Hu</last></author>
       <author><first>Margrit</first><last>Betke</last></author>
       <author><first>Prakash</first><last>Ishwar</last></author>
-      <author><first>Derry Tanti</first><last>Wijaya</last></author>
+      <author id="derry-tanti-wijaya"><first>Derry Tanti</first><last>Wijaya</last></author>
       <pages>4037–4050</pages>
       <abstract>News media structure their reporting of events or issues using certain perspectives. When describing an incident involving gun violence, for example, some journalists may focus on mental health or gun regulation, while others may emphasize the discussion of gun rights. Such perspectives are called “frames” in communication research. We study, for the first time, the value of combining lead images and their contextual information with text to identify the frame of a given news article. We observe that using multiple modes of information(article- and image-derived features) improves prediction of news frames over any single mode of information when the images are relevant to the frames of the headlines. We also observe that frame image relevance is related to the ease of conveying frames via images, which we call frame concreteness. Additionally, we release the first multimodal news framing dataset related to gun violence in the U.S., curated and annotated by communication researchers. The dataset will allow researchers to further examine the use of multiple information modalities for studying media framing.</abstract>
       <url hash="2b650ef5">2021.findings-emnlp.339</url>
@@ -10232,7 +10232,7 @@
       <author><first>Suchin</first><last>Gururangan</last></author>
       <author><first>Dallas</first><last>Card</last></author>
       <author><first>Roy</first><last>Schwartz</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>4066–4073</pages>
       <abstract>Research in NLP is often supported by experimental results, and improved reporting of such results can lead to better understanding and more reproducible science. In this paper we analyze three statistical estimators for expected validation performance, a tool used for reporting performance (e.g., accuracy) as a function of computational budget (e.g., number of hyperparameter tuning experiments). Where previous work analyzing such estimators focused on the bias, we also examine the variance and mean squared error (MSE). In both synthetic and realistic scenarios, we evaluate three estimators and find the unbiased estimator has the highest variance, and the estimator with the smallest variance has the largest bias; the estimator with the smallest MSE strikes a balance between bias and variance, displaying a classic bias-variance tradeoff. We use expected validation performance to compare between different models, and analyze how frequently each estimator leads to drawing incorrect conclusions about which of two models performs best. We find that the two biased estimators lead to the fewest incorrect conclusions, which hints at the importance of minimizing variance and MSE.</abstract>
       <url hash="bfcf7612">2021.findings-emnlp.342</url>
@@ -10294,7 +10294,7 @@
       <author><first>Wenting</first><last>Zhao</last></author>
       <author><first>Ye</first><last>Liu</last></author>
       <author><first>Yao</first><last>Wan</last></author>
-      <author><first>Philip</first><last>Yu</last></author>
+      <author id="philip-s-yu"><first>Philip</first><last>Yu</last></author>
       <pages>4106–4117</pages>
       <abstract>Few-shot table-to-text generation is a task of composing fluent and faithful sentences to convey table content using limited data. Despite many efforts having been made towards generating impressive fluent sentences by fine-tuning powerful pre-trained language models, the faithfulness of generated content still needs to be improved. To this end, this paper proposes a novel approach Attend, Memorize and Generate (called AMG), inspired by the text generation process of humans. In particular, AMG (1) attends over the multi-granularity of context using a novel strategy based on table slot level and traditional token-by-token level attention to exploit both the table structure and natural linguistic information; (2) dynamically memorizes the table slot allocation states; and (3) generates faithful sentences according to both the context and memory allocation states. Comprehensive experiments with human evaluation on three domains (i.e., humans, songs, and books) of the Wiki dataset show that our model can generate higher qualified texts when compared with several state-of-the-art baselines, in both fluency and faithfulness.</abstract>
       <url hash="8ef048d3">2021.findings-emnlp.347</url>
@@ -10324,7 +10324,7 @@
       <author><first>Pei</first><last>Zhou</last></author>
       <author><first>Pegah</first><last>Jandaghi</last></author>
       <author><first>Hyundong</first><last>Cho</last></author>
-      <author><first>Bill Yuchen</first><last>Lin</last></author>
+      <author id="bill-yuchen-lin"><first>Bill Yuchen</first><last>Lin</last></author>
       <author><first>Jay</first><last>Pujara</last></author>
       <author><first>Xiang</first><last>Ren</last></author>
       <pages>4132–4146</pages>
@@ -10353,7 +10353,7 @@
     <paper id="351">
       <title>Textual Time Travel: A Temporally Informed Approach to Theory of Mind</title>
       <author><first>Akshatha</first><last>Arodi</last></author>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <pages>4162–4172</pages>
       <abstract>Natural language processing systems such as dialogue agents should be able to reason about other people’s beliefs, intentions and desires. This capability, called theory of mind (ToM), is crucial, as it allows a model to predict and interpret the needs of users based on their mental states. A recent line of research evaluates the ToM capability of existing memory-augmented neural models through question-answering. These models perform poorly on false belief tasks where beliefs differ from reality, especially when the dataset contains distracting sentences. In this paper, we propose a new temporally informed approach for improving the ToM capability of memory-augmented neural models. Our model incorporates priors about the entities’ minds and tracks their mental states as they evolve over time through an extended passage. It then responds to queries through textual time travel–i.e., by accessing the stored memory of an earlier time step. We evaluate our model on ToM datasets and find that this approach improves performance, particularly by correcting the predicted mental states to match the false belief.</abstract>
       <url hash="5e250c12">2021.findings-emnlp.351</url>
@@ -10416,7 +10416,7 @@
       <author><first>Allen</first><last>Kim</last></author>
       <author><first>Charuta</first><last>Pethe</last></author>
       <author><first>Naoya</first><last>Inoue</last></author>
-      <author><first>Steve</first><last>Skiena</last></author>
+      <author id="steven-skiena"><first>Steve</first><last>Skiena</last></author>
       <pages>4217–4226</pages>
       <abstract>Substantial amounts of work are required to clean large collections of digitized books for NLP analysis, both because of the presence of errors in the scanned text and the presence of duplicate volumes in the corpora. In this paper, we consider the issue of deduplication in the presence of optical character recognition (OCR) errors. We present methods to handle these errors, evaluated on a collection of 19,347 texts from the Project Gutenberg dataset and 96,635 texts from the HathiTrust Library. We demonstrate that improvements in language models now enable the detection and correction of OCR errors without consideration of the scanning image itself. The inconsistencies found by aligning pairs of scans of the same underlying work provides training data to build models for detecting and correcting errors. We identify the canonical version for each of 17,136 repeatedly-scanned books from 58,808 scans. Finally, we investigate methods to detect and correct errors in single-copy texts. We show that on average, our method corrects over six times as many errors as it introduces. We also provide interesting analysis on the relation between scanning quality and other factors such as location and publication year.</abstract>
       <url hash="c95e1e8c">2021.findings-emnlp.356</url>
@@ -10445,7 +10445,7 @@
       <author><first>Boxing</first><last>Chen</last></author>
       <author><first>Jun</first><last>Xie</last></author>
       <author><first>Weihua</first><last>Luo</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
       <pages>4234–4241</pages>
       <abstract>Recently, kNN-MT (Khandelwal et al., 2020) has shown the promising capability of directly incorporating the pre-trained neural machine translation (NMT) model with domain-specific token-level k-nearest-neighbor (kNN) retrieval to achieve domain adaptation without retraining. Despite being conceptually attractive, it heavily relies on high-quality in-domain parallel corpora, limiting its capability on unsupervised domain adaptation, where in-domain parallel corpora are scarce or nonexistent. In this paper, we propose a novel framework that directly uses in-domain monolingual sentences in the target language to construct an effective datastore for k-nearest-neighbor retrieval. To this end, we first introduce an autoencoder task based on the target language, and then insert lightweight adapters into the original NMT model to map the token-level representation of this task to the ideal representation of the translation task. Experiments on multi-domain datasets demonstrate that our proposed approach significantly improves the translation accuracy with target-side monolingual data, while achieving comparable performance with back-translation. Our implementation is open-sourced at <url>https://github.com/zhengxxn/UDA-KNN</url>.</abstract>
       <url hash="9c9b2190">2021.findings-emnlp.358</url>
@@ -10456,7 +10456,7 @@
     <paper id="359">
       <title>The Topic Confusion Task: A Novel Evaluation Scenario for Authorship Attribution</title>
       <author><first>Malik</first><last>Altakrori</last></author>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <author><first>Benjamin C. M.</first><last>Fung</last></author>
       <pages>4242–4256</pages>
       <abstract>Authorship attribution is the problem of identifying the most plausible author of an anonymous text from a set of candidate authors. Researchers have investigated same-topic and cross-topic scenarios of authorship attribution, which differ according to whether new, unseen topics are used in the testing phase. However, neither scenario allows us to explain whether errors are caused by failure to capture authorship writing style or by the topic shift. Motivated by this, we propose the <i>topic confusion</i> task where we switch the author-topic configuration between the training and testing sets. This setup allows us to investigate two types of errors: one caused by the topic shift and one caused by the features’ inability to capture the writing styles. We show that stylometric features with part-of-speech tags are the least susceptible to topic variations. We further show that combining them with other features leads to significantly lower topic confusion and higher attribution accuracy. Finally, we show that pretrained language models such as BERT and RoBERTa perform poorly on this task and are surpassed by simple features such as word-level <tex-math>n</tex-math>-gram.</abstract>
@@ -10470,7 +10470,7 @@
       <author><first>Andrew</first><last>Lee</last></author>
       <author><first>Jonathan K.</first><last>Kummerfeld</last></author>
       <author><first>Larry</first><last>An</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>4257–4272</pages>
       <abstract>Many statistical models have high accuracy on test benchmarks, but are not explainable, struggle in low-resource scenarios, cannot be reused for multiple tasks, and cannot easily integrate domain expertise. These factors limit their use, particularly in settings such as mental health, where it is difficult to annotate datasets and model outputs have significant impact. We introduce a micromodel architecture to address these challenges. Our approach allows researchers to build interpretable representations that embed domain knowledge and provide explanations throughout the model’s decision process. We demonstrate the idea on multiple mental health tasks: depression classification, PTSD classification, and suicidal risk assessment. Our systems consistently produce strong results, even in low-resource scenarios, and are more interpretable than alternative methods.</abstract>
       <url hash="63ab617a">2021.findings-emnlp.360</url>
@@ -10481,7 +10481,7 @@
     <paper id="361">
       <title>Discovering Explanatory Sentences in Legal Case Decisions Using Pre-trained Language Models</title>
       <author><first>Jaromir</first><last>Savelka</last></author>
-      <author><first>Kevin</first><last>Ashley</last></author>
+      <author id="kevin-d-ashley"><first>Kevin</first><last>Ashley</last></author>
       <pages>4273–4283</pages>
       <abstract>Legal texts routinely use concepts that are difficult to understand. Lawyers elaborate on the meaning of such concepts by, among other things, carefully investigating how they have been used in the past. Finding text snippets that mention a particular concept in a useful way is tedious, time-consuming, and hence expensive. We assembled a data set of 26,959 sentences, coming from legal case decisions, and labeled them in terms of their usefulness for explaining selected legal concepts. Using the dataset we study the effectiveness of transformer models pre-trained on large language corpora to detect which of the sentences are useful. In light of models’ predictions, we analyze various linguistic properties of the explanatory sentences as well as their relationship to the legal concept that needs to be explained. We show that the transformer-based models are capable of learning surprisingly sophisticated features and outperform the prior approaches to the task.</abstract>
       <url hash="8635ec17">2021.findings-emnlp.361</url>
@@ -10509,7 +10509,7 @@
     <paper id="363">
       <title>Reference-based Weak Supervision for Answer Sentence Selection using Web Data</title>
       <author><first>Vivek</first><last>Krishnamurthy</last></author>
-      <author><first>Thuy</first><last>Vu</last></author>
+      <author id="thuy-vu"><first>Thuy</first><last>Vu</last></author>
       <author><first>Alessandro</first><last>Moschitti</last></author>
       <pages>4294–4299</pages>
       <abstract>Answer Sentence Selection (AS2) models are core components of efficient retrieval-based Question Answering (QA) systems. We present the Reference-based Weak Supervision (RWS), a fully automatic large-scale data pipeline that harvests high-quality weakly- supervised answer sentences from Web data, only requiring a question-reference pair as input. We evaluated the quality of the RWS-derived data by training TANDA models, which are the state of the art for AS2. Our results show that the data consistently bolsters TANDA on three different datasets. In particular, we set the new state of the art for AS2 to P@1=90.1%, and MAP=92.9%, on WikiQA. We record similar performance gains of RWS on a much larger dataset named Web-based Question Answering (WQA).</abstract>
@@ -10599,9 +10599,9 @@
       <title>Mitigating Data Poisoning in Text Classification with Differential Privacy</title>
       <author><first>Chang</first><last>Xu</last></author>
       <author><first>Jun</first><last>Wang</last></author>
-      <author><first>Francisco</first><last>Guzmán</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzmán</last></author>
       <author><first>Benjamin</first><last>Rubinstein</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>4348–4356</pages>
       <abstract>NLP models are vulnerable to data poisoning attacks. One type of attack can plant a backdoor in a model by injecting poisoned examples in training, causing the victim model to misclassify test instances which include a specific pattern. Although defences exist to counter these attacks, they are specific to an attack type or pattern. In this paper, we propose a generic defence mechanism by making the training process robust to poisoning attacks through gradient shaping methods, based on differentially private training. We show that our method is highly effective in mitigating, or even eliminating, poisoning attacks on text classification, with only a small cost in predictive accuracy.</abstract>
       <url hash="24988dfc">2021.findings-emnlp.369</url>
@@ -10649,7 +10649,7 @@
       <title>Switch Point biased Self-Training: Re-purposing Pretrained Models for Code-Switching</title>
       <author><first>Parul</first><last>Chopra</last></author>
       <author><first>Sai Krishna</first><last>Rallabandi</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <author><first>Khyathi Raghavi</first><last>Chandu</last></author>
       <pages>4389–4397</pages>
       <abstract>Code-switching (CS), a ubiquitous phenomenon due to the ease of communication it offers in multilingual communities still remains an understudied problem in language processing. The primary reasons behind this are: (1) minimal efforts in leveraging large pretrained multilingual models, and (2) the lack of annotated data. The distinguishing case of low performance of multilingual models in CS is the intra-sentence mixing of languages leading to switch points. We first benchmark two sequence labeling tasks – POS and NER on 4 different language pairs with a suite of pretrained models to identify the problems and select the best performing char-BERT model among them (addressing (1)). We then propose a self training method to repurpose the existing pretrained models using a switch-point bias by leveraging unannotated data (addressing (2)). We finally demonstrate that our approach performs well on both tasks by reducing the gap between the switch point performance while retaining the overall performance on two distinct language pairs in both the tasks. We plan to release our models and the code for all our experiments.</abstract>
@@ -10703,8 +10703,8 @@
       <author><first>Chenguang</first><last>Zhu</last></author>
       <author><first>Budhaditya</first><last>Deb</last></author>
       <author><first>Asli</first><last>Celikyilmaz</last></author>
-      <author><first>Ahmed Hassan</first><last>Awadallah</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="ahmed-hassan"><first>Ahmed Hassan</first><last>Awadallah</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <pages>4426–4433</pages>
       <abstract>Dialogue summarization helps readers capture salient information from long conversations in meetings, interviews, and TV series. However, real-world dialogues pose a great challenge to current summarization models, as the dialogue length typically exceeds the input limits imposed by recent transformer-based pre-trained models, and the interactive nature of dialogues makes relevant information more context-dependent and sparsely distributed than news articles. In this work, we perform a comprehensive study on long dialogue summarization by investigating three strategies to deal with the lengthy input problem and locate relevant information: (1) extended transformer models such as Longformer, (2) retrieve-then-summarize pipeline models with several dialogue utterance retrieval methods, and (3) hierarchical dialogue encoding models such as HMNet. Our experimental results on three long dialogue datasets (QMSum, MediaSum, SummScreen) show that the retrieve-then-summarize pipeline models yield the best performance. We also demonstrate that the summary quality can be further improved with a stronger retrieval model and pretraining on proper external summarization datasets.</abstract>
       <url hash="f7b1a504">2021.findings-emnlp.377</url>
@@ -10730,7 +10730,7 @@
       <author><first>Shivam</first><last>Sharma</last></author>
       <author><first>Dimitar</first><last>Dimitrov</last></author>
       <author><first>Md. Shad</first><last>Akhtar</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Tanmoy</first><last>Chakraborty</last></author>
       <pages>4439–4455</pages>
       <abstract>Internet memes have become powerful means to transmit political, psychological, and socio-cultural ideas. Although memes are typically humorous, recent days have witnessed an escalation of harmful memes used for trolling, cyberbullying, and abuse. Detecting such memes is challenging as they can be highly satirical and cryptic. Moreover, while previous work has focused on specific aspects of memes such as hate speech and propaganda, there has been little work on harm in general. Here, we aim to bridge this gap. In particular, we focus on two tasks: (i)detecting harmful memes, and (ii) identifying the social entities they target. We further extend the recently released HarMeme dataset, which covered COVID-19, with additional memes and a new topic: US politics. To solve these tasks, we propose MOMENTA (MultimOdal framework for detecting harmful MemEs aNd Their tArgets), a novel multimodal deep neural network that uses global and local perspectives to detect harmful memes. MOMENTA systematically analyzes the local and the global perspective of the input meme (in both modalities) and relates it to the background context. MOMENTA is interpretable and generalizable, and our experiments show that it outperforms several strong rivaling approaches.</abstract>
@@ -10747,7 +10747,7 @@
       <author><first>Xiang</first><last>Gao</last></author>
       <author><first>Hamid</first><last>Palangi</last></author>
       <author><first>Jianfeng</first><last>Wang</last></author>
-      <author><first>Kenneth</first><last>Forbus</last></author>
+      <author id="kenneth-forbus"><first>Kenneth</first><last>Forbus</last></author>
       <author><first>Jianfeng</first><last>Gao</last></author>
       <pages>4456–4472</pages>
       <abstract>Emotion and empathy are examples of human qualities lacking in many human-machine interactions. The goal of our work is to generate engaging dialogue grounded in a user-shared image with increased emotion and empathy while minimizing socially inappropriate or offensive outputs. We release the Neural Image Commenting with Empathy (NICE) dataset consisting of almost two million images and the corresponding human-generated comments, a set of human annotations, and baseline performance on a range of models. In-stead of relying on manually labeled emotions, we also use automatically generated linguistic representations as a source of weakly supervised labels. Based on these annotations, we define two different tasks for the NICE dataset. Then, we propose a novel pre-training model - Modeling Affect Generation for Image Comments (MAGIC) - which aims to generate comments for images, conditioned on linguistic representations that capture style and affect, and to help generate more empathetic, emotional, engaging and socially appropriate comments. Using this model we achieve state-of-the-art performance on one of our NICE tasks. The experiments show that the approach can generate more human-like and engaging image comments.</abstract>
@@ -10838,7 +10838,7 @@
       <author><first>Parsa</first><last>Farinneya</last></author>
       <author><first>Mohammad Mahdi</first><last>Abdollah Pour</last></author>
       <author><first>Sardar</first><last>Hamidian</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>4556–4565</pages>
       <abstract>Social media has emerged as a key channel for seeking information. Online users spend several hours reading, posting, and searching for news on microblogging platforms daily. However, this could act as a double-edged sword especially when not all information online is reliable. Moreover, the inherently unmoderated nature of social media renders identifying unverified information ever more challenging. Most of the existing approaches for rumor tracking are not scalable because of their dependency on a significant amount of labeled data. In this work, we investigate this problem from different angles. We design an Active-Transfer Learning (ATL) strategy to identify rumors with a limited amount of annotated data. We go beyond that and investigate the impact of leveraging various machine learning approaches in addition to different contextual representations. We discuss the impact of multiple classifiers on a limited amount of annotated data followed by an interactive approach to gradually update the models by adding the least certain samples (LCS) from the pool of unlabeled data. Our proposed Active Learning (AL) strategy achieves faster convergence in terms of the F-score while requiring fewer annotated samples (42% of the whole dataset for the best model).</abstract>
       <url hash="0d5d6dd8">2021.findings-emnlp.387</url>
@@ -10936,7 +10936,7 @@
       <author><first>Hongjie</first><last>Ren</last></author>
       <author><first>Kazushige</first><last>Ouchi</last></author>
       <author><first>Ze</first><last>Liu</last></author>
-      <author><first>Jinan</first><last>Xu</last></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last></author>
       <pages>4620–4630</pages>
       <abstract>Generative conversation systems tend to produce meaningless and generic responses, which significantly reduce the user experience. In order to generate informative and diverse responses, recent studies proposed to fuse knowledge to improve informativeness and adopt latent variables to enhance the diversity. However, utilizing latent variables will lead to the inaccuracy of knowledge in the responses, and the dissemination of wrong knowledge will mislead the communicators. To address this problem, we propose a Syntactically Diverse Adversarial Network (SDAN) for knowledge-grounded conversation model. SDAN contains an adversarial hierarchical semantic network to keep the semantic coherence, a knowledge-aware network to attend more related knowledge for improving the informativeness and a syntactic latent variable network to generate syntactically diverse responses. Additionally, in order to increase the controllability of syntax, we adopt adversarial learning to decouple semantic and syntactic representations. Experimental results show that our model can not only generate syntactically diverse and knowledge-accurate responses but also significantly achieve the balance between improving the syntactic diversity and maintaining the knowledge accuracy.</abstract>
       <url hash="4587ce5f">2021.findings-emnlp.394</url>
@@ -10965,7 +10965,7 @@
       <author><first>Mingxuan</first><last>Wang</last></author>
       <author><first>Lei</first><last>Li</last></author>
       <author><first>Hang</first><last>Li</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <pages>4639–4644</pages>
       <abstract>This paper presents Self-correcting Encoding (Secoco), a framework that effectively deals with noisy input for robust neural machine translation by introducing self-correcting predictors. Different from previous robust approaches, Secoco enables NMT to explicitly correct noisy inputs and delete specific errors simultaneously with the translation decoding process. Secoco is able to achieve significant improvements over strong baselines on two real-world test sets and a benchmark WMT dataset with good interpretability. We will make our code and dataset publicly available soon.</abstract>
       <url hash="0ca13e16">2021.findings-emnlp.396</url>
@@ -11060,7 +11060,7 @@
       <title><fixed-case>A</fixed-case>uto<fixed-case>EQA</fixed-case>: Auto-Encoding Questions for Extractive Question Answering</title>
       <author><first>Stalin</first><last>Varanasi</last></author>
       <author><first>Saadullah</first><last>Amin</last></author>
-      <author><first>Guenter</first><last>Neumann</last></author>
+      <author id="gunter-neumann"><first>Guenter</first><last>Neumann</last></author>
       <pages>4706–4712</pages>
       <abstract>There has been a significant progress in the field of Extractive Question Answering (EQA) in the recent years. However, most of them are reliant on annotations of answer-spans in the corresponding passages. In this work, we address the problem of EQA when no annotations are present for the answer span, i.e., when the dataset contains only questions and corresponding passages. Our method is based on auto-encoding of the question that performs a question answering task during encoding and a question generation task during decoding. We show that our method performs well in a zero-shot setting and can provide an additional loss to boost performance for EQA.</abstract>
       <url hash="63c32b9c">2021.findings-emnlp.403</url>
@@ -11201,7 +11201,7 @@
     <paper id="414">
       <title>‘Just What do You Think You’re Doing, Dave?’ A Checklist for Responsible Data Use in <fixed-case>NLP</fixed-case></title>
       <author><first>Anna</first><last>Rogers</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Kobi</first><last>Leins</last></author>
       <pages>4821–4833</pages>
       <abstract>A key part of the NLP ethics movement is responsible use of data, but exactly what that means or how it can be best achieved remain unclear. This position paper discusses the core legal and ethical principles for collection and sharing of textual data, and the tensions between them. We propose a potential checklist for responsible data (re-)use that could both standardise the peer review of conference submissions, as well as enable a more in-depth view of published research across the community. Our proposal aims to contribute to the development of a consistent standard for data (re-)use, embraced across NLP conferences.</abstract>
@@ -11298,7 +11298,7 @@
       <author><first>Karmanya</first><last>Aggarwal</last></author>
       <author><first>Emily</first><last>Allaway</last></author>
       <author><first>Tal</first><last>Linzen</last></author>
-      <author><first>Samuel R.</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel R.</first><last>Bowman</last></author>
       <pages>4886–4901</pages>
       <abstract>Many crowdsourced NLP datasets contain systematic artifacts that are identified only after data collection is complete. Earlier identification of these issues should make it easier to create high-quality training and evaluation data. We attempt this by evaluating protocols in which expert linguists work ‘in the loop’ during data collection to identify and address these issues by adjusting task instructions and incentives. Using natural language inference as a test case, we compare three data collection protocols: (i) a baseline protocol with no linguist involvement, (ii) a linguist-in-the-loop intervention with iteratively-updated constraints on the writing task, and (iii) an extension that adds direct interaction between linguists and crowdworkers via a chatroom. We find that linguist involvement does not lead to increased accuracy on out-of-domain test sets compared to baseline, and adding a chatroom has no effect on the data. Linguist involvement does, however, lead to more challenging evaluation data and higher accuracy on some challenge sets, demonstrating the benefits of integrating expert analysis during data collection.</abstract>
       <url hash="720e338c">2021.findings-emnlp.421</url>
@@ -11311,7 +11311,7 @@
       <author><first>Shane</first><last>Storks</last></author>
       <author><first>Qiaozi</first><last>Gao</last></author>
       <author><first>Yichi</first><last>Zhang</last></author>
-      <author><first>Joyce</first><last>Chai</last></author>
+      <author id="joyce-chai"><first>Joyce</first><last>Chai</last></author>
       <pages>4902–4918</pages>
       <abstract>Large-scale, pre-trained language models (LMs) have achieved human-level performance on a breadth of language understanding tasks. However, evaluations only based on end task performance shed little light on machines’ true ability in language understanding and reasoning. In this paper, we highlight the importance of evaluating the underlying reasoning process in addition to end performance. Toward this goal, we introduce Tiered Reasoning for Intuitive Physics (TRIP), a novel commonsense reasoning dataset with dense annotations that enable multi-tiered evaluation of machines’ reasoning process. Our empirical results show that while large LMs can achieve high end performance, they struggle to support their predictions with valid supporting evidence. The TRIP dataset and our baseline results will motivate verifiable evaluation of commonsense reasoning and facilitate future research toward developing better language understanding and reasoning models.</abstract>
       <url hash="703e207e">2021.findings-emnlp.422</url>
@@ -11323,7 +11323,7 @@
     <paper id="423">
       <title>Making Heads and Tails of Models with Marginal Calibration for Sparse Tagsets</title>
       <author><first>Michael</first><last>Kranzlein</last></author>
-      <author><first>Nelson F.</first><last>Liu</last></author>
+      <author id="nelson-f-liu"><first>Nelson F.</first><last>Liu</last></author>
       <author><first>Nathan</first><last>Schneider</last></author>
       <pages>4919–4928</pages>
       <abstract>For interpreting the behavior of a probabilistic model, it is useful to measure a model’s calibration—the extent to which it produces reliable confidence scores. We address the open problem of calibration for tagging models with sparse tagsets, and recommend strategies to measure and reduce calibration error (CE) in such models. We show that several post-hoc recalibration techniques all reduce calibration error across the marginal distribution for two existing sequence taggers. Moreover, we propose tag frequency grouping (TFG) as a way to measure calibration error in different frequency bands. Further, recalibrating each group separately promotes a more equitable reduction of calibration error across the tag frequency spectrum.</abstract>
@@ -11338,11 +11338,11 @@
       <author><first>Akhilesh Deepak</first><last>Gotmare</last></author>
       <author><first>Bryan</first><last>McCann</last></author>
       <author><first>Nitish Shirish</first><last>Keskar</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <author><first>Richard</first><last>Socher</last></author>
       <author><first>Nazneen Fatema</first><last>Rajani</last></author>
       <pages>4929–4952</pages>
-      <abstract/>
+      <abstract></abstract>
       <url hash="279293d2">2021.findings-emnlp.424</url>
       <bibkey>krause-etal-2021-gedi-generative</bibkey>
       <doi>10.18653/v1/2021.findings-emnlp.424</doi>
diff --git a/data/xml/2021.fnp.xml b/data/xml/2021.fnp.xml
index 880ddea61f..d9d8477bce 100644
--- a/data/xml/2021.fnp.xml
+++ b/data/xml/2021.fnp.xml
@@ -32,7 +32,7 @@
       <author><first>Dominique</first><last>Mariko</last></author>
       <author><first>Estelle</first><last>Labidurie</last></author>
       <author><first>Hugues</first><last>de Mazancourt</last></author>
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <pages>9–18</pages>
       <url hash="e0236a9a">2021.fnp-1.2</url>
       <bibkey>cui-etal-2021-sequence</bibkey>
@@ -66,7 +66,7 @@
     <paper id="6">
       <title><fixed-case>NUS</fixed-case>-<fixed-case>IDS</fixed-case> at <fixed-case>F</fixed-case>in<fixed-case>C</fixed-case>ausal 2021: Dependency Tree in Graph Neural Network for Better Cause-Effect Span Detection</title>
       <author><first>Fiona Anting</first><last>Tan</last></author>
-      <author><first>See-Kiong</first><last>Ng</last></author>
+      <author id="see-kiong-ng"><first>See-Kiong</first><last>Ng</last></author>
       <pages>37–43</pages>
       <url hash="2d8c314e">2021.fnp-1.6</url>
       <bibkey>tan-ng-2021-nus</bibkey>
@@ -111,7 +111,7 @@
     <paper id="11">
       <title>Annotation model and corpus for opinionated economy and finance narrative detection</title>
       <author><first>Jiahui</first><last>Hu</last></author>
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <author><first>Dirk</first><last>Schumacher</last></author>
       <pages>61–66</pages>
       <url hash="1c903a1e">2021.fnp-1.11</url>
@@ -171,7 +171,7 @@
       <title>Extractive Financial Narrative Summarisation using <fixed-case>S</fixed-case>entence<fixed-case>BERT</fixed-case> Based Clustering</title>
       <author><first>Tuba</first><last>Gokhan</last></author>
       <author><first>Phillip</first><last>Smith</last></author>
-      <author><first>Mark</first><last>Lee</last></author>
+      <author id="mark-lee"><first>Mark</first><last>Lee</last></author>
       <pages>94–98</pages>
       <url hash="48c58fdb">2021.fnp-1.18</url>
       <bibkey>gokhan-etal-2021-extractive</bibkey>
diff --git a/data/xml/2021.gebnlp.xml b/data/xml/2021.gebnlp.xml
index bf08b93fe7..d3f6c4feaa 100644
--- a/data/xml/2021.gebnlp.xml
+++ b/data/xml/2021.gebnlp.xml
@@ -97,7 +97,7 @@
     <paper id="8">
       <title>Using Gender- and Polarity-Informed Models to Investigate Bias</title>
       <author><first>Samia</first><last>Touileb</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <author><first>Erik</first><last>Velldal</last></author>
       <pages>66–74</pages>
       <abstract>In this work we explore the effect of incorporating demographic metadata in a text classifier trained on top of a pre-trained transformer language model. More specifically, we add information about the gender of critics and book authors when classifying the polarity of book reviews, and the polarity of the reviews when classifying the genders of authors and critics. We use an existing data set of Norwegian book reviews with ratings by professional critics, which has also been augmented with gender information, and train a document-level sentiment classifier on top of a recently released Norwegian BERT-model. We show that gender-informed models obtain substantially higher accuracy, and that polarity-informed models obtain higher accuracy when classifying the genders of book authors. For this particular data set, we take this result as a confirmation of the gender bias in the underlying label distribution, but in other settings we believe a similar approach can be used for mitigating bias in the model.</abstract>
@@ -107,8 +107,8 @@
     </paper>
     <paper id="9">
       <title>Assessing Gender Bias in <fixed-case>W</fixed-case>ikipedia: Inequalities in Article Titles</title>
-      <author><first>Agnieszka</first><last>Falenska</last></author>
-      <author><first>Özlem</first><last>Çetinoğlu</last></author>
+      <author id="agnieszka-falenska"><first>Agnieszka</first><last>Falenska</last></author>
+      <author id="ozlem-cetinoglu"><first>Özlem</first><last>Çetinoğlu</last></author>
       <pages>75–85</pages>
       <abstract>Potential gender biases existing in Wikipedia’s content can contribute to biased behaviors in a variety of downstream NLP systems. Yet, efforts in understanding what inequalities in portraying women and men occur in Wikipedia focused so far only on *biographies*, leaving open the question of how often such harmful patterns occur in other topics. In this paper, we investigate gender-related asymmetries in Wikipedia titles from *all domains*. We assess that for only half of gender-related articles, i.e., articles with words such as *women* or *male* in their titles, symmetrical counterparts describing the same concept for the other gender (and clearly stating it in their titles) exist. Among the remaining imbalanced cases, the vast majority of articles concern sports- and social-related issues. We provide insights on how such asymmetries can influence other Wikipedia components and propose steps towards reducing the frequency of observed patterns.</abstract>
       <url hash="4a1c0980">2021.gebnlp-1.9</url>
@@ -119,7 +119,7 @@
       <title>Investigating the Impact of Gender Representation in <fixed-case>ASR</fixed-case> Training Data: a Case Study on Librispeech</title>
       <author><first>Mahault</first><last>Garnerin</last></author>
       <author><first>Solange</first><last>Rossato</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <pages>86–92</pages>
       <abstract>In this paper we question the impact of gender representation in training data on the performance of an end-to-end ASR system. We create an experiment based on the Librispeech corpus and build 3 different training corpora varying only the proportion of data produced by each gender category. We observe that if our system is overall robust to the gender balance or imbalance in training data, it is nonetheless dependant of the adequacy between the individuals present in the training and testing sets.</abstract>
       <url hash="d224b621">2021.gebnlp-1.10</url>
@@ -129,7 +129,7 @@
     <paper id="11">
       <title>Generating Gender Augmented Data for <fixed-case>NLP</fixed-case></title>
       <author><first>Nishtha</first><last>Jain</last></author>
-      <author><first>Maja</first><last>Popović</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
       <author><first>Declan</first><last>Groves</last></author>
       <author><first>Eva</first><last>Vanmassenhove</last></author>
       <pages>93–102</pages>
diff --git a/data/xml/2021.gem.xml b/data/xml/2021.gem.xml
index c089c90557..505d6b8f9b 100644
--- a/data/xml/2021.gem.xml
+++ b/data/xml/2021.gem.xml
@@ -81,7 +81,7 @@
       <author><first>Eleftheria</first><last>Briakou</last></author>
       <author><first>Sweta</first><last>Agrawal</last></author>
       <author><first>Ke</first><last>Zhang</last></author>
-      <author><first>Joel</first><last>Tetreault</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
       <author><first>Marine</first><last>Carpuat</last></author>
       <pages>58–67</pages>
       <abstract>This paper reviews and summarizes human evaluation practices described in 97 style transfer papers with respect to three main evaluation aspects: style transfer, meaning preservation, and fluency. In principle, evaluations by human raters should be the most reliable. However, in style transfer papers, we find that protocols for human evaluations are often underspecified and not standardized, which hampers the reproducibility of research in this field and progress toward better human and automatic evaluation methods.</abstract>
@@ -130,7 +130,7 @@
       <author><first>Anuoluwapo</first><last>Aremu</last></author>
       <author><first>Antoine</first><last>Bosselut</last></author>
       <author><first>Khyathi Raghavi</first><last>Chandu</last></author>
-      <author><first>Miruna-Adriana</first><last>Clinciu</last></author>
+      <author id="miruna-clinciu"><first>Miruna-Adriana</first><last>Clinciu</last></author>
       <author><first>Dipanjan</first><last>Das</last></author>
       <author><first>Kaustubh</first><last>Dhole</last></author>
       <author><first>Wanyu</first><last>Du</last></author>
@@ -139,7 +139,7 @@
       <author><first>Chris Chinenye</first><last>Emezue</last></author>
       <author><first>Varun</first><last>Gangal</last></author>
       <author><first>Cristina</first><last>Garbacea</last></author>
-      <author><first>Tatsunori</first><last>Hashimoto</last></author>
+      <author id="tatsunori-b-hashimoto"><first>Tatsunori</first><last>Hashimoto</last></author>
       <author><first>Yufang</first><last>Hou</last></author>
       <author><first>Yacine</first><last>Jernite</last></author>
       <author><first>Harsh</first><last>Jhamtani</last></author>
@@ -162,7 +162,7 @@
       <author><first>Vitaly</first><last>Nikolaev</last></author>
       <author><first>Andre</first><last>Niyongabo Rubungo</last></author>
       <author><first>Salomey</first><last>Osei</last></author>
-      <author><first>Ankur</first><last>Parikh</last></author>
+      <author id="ankur-parikh"><first>Ankur</first><last>Parikh</last></author>
       <author><first>Laura</first><last>Perez-Beltrachini</last></author>
       <author><first>Niranjan Ramesh</first><last>Rao</last></author>
       <author><first>Vikas</first><last>Raunak</last></author>
@@ -172,7 +172,7 @@
       <author><first>Thibault</first><last>Sellam</last></author>
       <author><first>Samira</first><last>Shaikh</last></author>
       <author><first>Anastasia</first><last>Shimorina</last></author>
-      <author><first>Marco Antonio</first><last>Sobrevilla Cabezudo</last></author>
+      <author id="marco-antonio-sobrevilla-cabezudo"><first>Marco Antonio</first><last>Sobrevilla Cabezudo</last></author>
       <author><first>Hendrik</first><last>Strobelt</last></author>
       <author><first>Nishant</first><last>Subramani</last></author>
       <author><first>Wei</first><last>Xu</last></author>
@@ -206,7 +206,7 @@
       <author><first>Peyman</first><last>Heidari</last></author>
       <author><first>Ankit</first><last>Arun</last></author>
       <author><first>Shashank</first><last>Jain</last></author>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <pages>136–147</pages>
       <abstract>We explore the use of self-training and acceptability classifiers with pre-trained models for natural language generation in structure-to-text settings using three GEM datasets (E2E, WebNLG-en, Schema-Guided Dialog). With the Schema-Guided Dialog dataset, we also experiment with including multiple turns of context in the input. We find that self-training with reconstruction matching along with acceptability classifier filtering can improve semantic correctness, though gains are limited in the full-data setting. With context-conditioning, we find that including multiple turns in the context encourages the model to align with the user’s word and phrasing choices as well as to generate more self-consistent responses. In future versions of the GEM challenge, we encourage the inclusion of few-shot tracks to encourage research on data efficiency.</abstract>
       <url hash="64c93fe9">2021.gem-1.12</url>
@@ -216,7 +216,7 @@
     <paper id="13">
       <title><fixed-case>NUIG</fixed-case>-<fixed-case>DSI</fixed-case>’s submission to The <fixed-case>GEM</fixed-case> Benchmark 2021</title>
       <author><first>Nivranshu</first><last>Pasricha</last></author>
-      <author><first>Mihael</first><last>Arcan</last></author>
+      <author id="mihael-arcan"><first>Mihael</first><last>Arcan</last></author>
       <author><first>Paul</first><last>Buitelaar</last></author>
       <pages>148–154</pages>
       <abstract>This paper describes the submission by NUIG-DSI to the GEM benchmark 2021. We participate in the modeling shared task where we submit outputs on four datasets for data-to-text generation, namely, DART, WebNLG (en), E2E and CommonGen. We follow an approach similar to the one described in the GEM benchmark paper where we use the pre-trained T5-base model for our submission. We train this model on additional monolingual data where we experiment with different masking strategies specifically focused on masking entities, predicates and concepts as well as a random masking strategy for pre-training. In our results we find that random masking performs the best in terms of automatic evaluation metrics, though the results are not statistically significantly different compared to other masking strategies.</abstract>
@@ -228,7 +228,7 @@
       <title><fixed-case>S</fixed-case>imple<fixed-case>NER</fixed-case> Sentence Simplification System for <fixed-case>GEM</fixed-case> 2021</title>
       <author><first>K V Aditya</first><last>Srivatsa</last></author>
       <author><first>Monil</first><last>Gokani</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>155–160</pages>
       <abstract>This paper describes SimpleNER, a model developed for the sentence simplification task at GEM-2021. Our system is a monolingual Seq2Seq Transformer architecture that uses control tokens pre-pended to the data, allowing the model to shape the generated simplifications according to user desired attributes. Additionally, we show that NER-tagging the training data before use helps stabilize the effect of the control tokens and significantly improves the overall performance of the system. We also employ pretrained embeddings to reduce data sparsity and allow the model to produce more generalizable outputs.</abstract>
       <url hash="62815520">2021.gem-1.14</url>
diff --git a/data/xml/2021.germeval.xml b/data/xml/2021.germeval.xml
index e9a447856a..36a9958a2c 100644
--- a/data/xml/2021.germeval.xml
+++ b/data/xml/2021.germeval.xml
@@ -133,7 +133,7 @@
     <paper id="12">
       <title>ur-iw-hnt at <fixed-case>G</fixed-case>erm<fixed-case>E</fixed-case>val 2021: An Ensembling Strategy with Multiple <fixed-case>BERT</fixed-case> Models</title>
       <author><first>Hoai Nam</first><last>Tran</last></author>
-      <author><first>Udo</first><last>Kruschwitz</last></author>
+      <author id="udo-kruschwitz"><first>Udo</first><last>Kruschwitz</last></author>
       <pages>83–87</pages>
       <abstract>This paper describes our approach (ur-iw-hnt) for the Shared Task of GermEval2021 to identify toxic, engaging, and fact-claiming comments. We submitted three runs using an ensembling strategy by majority (hard) voting with multiple different BERT models of three different types: German-based, Twitter-based, and multilingual models. All ensemble models outperform single models, while BERTweet is the winner of all individual models in every subtask. Twitter-based models perform better than GermanBERT models, and multilingual models perform worse but by a small margin.</abstract>
       <url hash="a929a4f4">2021.germeval-1.12</url>
@@ -153,7 +153,7 @@
     <paper id="14">
       <title><fixed-case>UR</fixed-case>@<fixed-case>NLP</fixed-case>_<fixed-case>A</fixed-case>_<fixed-case>T</fixed-case>eam @ <fixed-case>G</fixed-case>erm<fixed-case>E</fixed-case>val 2021: Ensemble-based Classification of Toxic, Engaging and Fact-Claiming Comments</title>
       <author><first>Kwabena Odame</first><last>Akomeah</last></author>
-      <author><first>Udo</first><last>Kruschwitz</last></author>
+      <author id="udo-kruschwitz"><first>Udo</first><last>Kruschwitz</last></author>
       <author><first>Bernd</first><last>Ludwig</last></author>
       <pages>95–99</pages>
       <abstract>In this paper, we report on our approach to addressing the GermEval 2021 Shared Task on the Identification of Toxic, Engaging, and Fact-Claiming Comments for the German language. We submitted three runs for each subtask based on ensembles of three models each using contextual embeddings from pre-trained language models using SVM and neural-network-based classifiers. We include language-specific as well as language-agnostic language models – both with and without fine-tuning. We observe that for the runs we submitted that the SVM models overfitted the training data and this affected the aggregation method (simple majority voting) of the ensembles. The model records a lower performance on the test set than on the training set. Exploring the issue of overfitting we uncovered that due to a bug in the pipeline the runs we submitted had not been trained on the full set but only on a small training set. Therefore in this paper we also include the results we get when trained on the full training set which demonstrate the power of ensembles.</abstract>
diff --git a/data/xml/2021.gwc.xml b/data/xml/2021.gwc.xml
index 4ab2172d7b..0220e33101 100644
--- a/data/xml/2021.gwc.xml
+++ b/data/xml/2021.gwc.xml
@@ -70,7 +70,7 @@
     <paper id="6">
       <title><fixed-case>A</fixed-case>sk2<fixed-case>T</fixed-case>ransformers: Zero-Shot Domain labelling with Pretrained Language Models</title>
       <author><first>Oscar</first><last>Sainz</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <pages>44–52</pages>
       <abstract>In this paper we present a system that exploits different pre-trained Language Models for assigning domain labels to WordNet synsets without any kind of supervision. Furthermore, the system is not restricted to use a particular set of domain labels. We exploit the knowledge encoded within different off-the-shelf pre-trained Language Models and task formulations to infer the domain label of a particular WordNet definition. The proposed zero-shot system achieves a new state-of-the-art on the English dataset used in the evaluation.</abstract>
       <url hash="d8affe52">2021.gwc-1.6</url>
@@ -100,7 +100,7 @@
     <paper id="9">
       <title>Monolingual Word Sense Alignment as a Classification Problem</title>
       <author><first>Sina</first><last>Ahmadi</last></author>
-      <author><first>John P.</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></author>
       <pages>73–80</pages>
       <abstract>Words are defined based on their meanings in various ways in different resources. Aligning word senses across monolingual lexicographic resources increases domain coverage and enables integration and incorporation of data. In this paper, we explore the application of classification methods using manually-extracted features along with representation learning techniques in the task of word sense alignment and semantic relationship detection. We demonstrate that the performance of classification methods dramatically varies based on the type of semantic relationships due to the nature of the task but outperforms the previous experiments.</abstract>
       <url hash="c7363d23">2021.gwc-1.9</url>
@@ -118,8 +118,8 @@
     </paper>
     <paper id="11">
       <title>The <fixed-case>G</fixed-case>lobal<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Formats: Updates for 2020</title>
-      <author><first>John P.</first><last>McCrae</last></author>
-      <author><first>Michael Wayne</first><last>Goodman</last></author>
+      <author id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></author>
+      <author id="michael-wayne-goodman"><first>Michael Wayne</first><last>Goodman</last></author>
       <author><first>Francis</first><last>Bond</last></author>
       <author><first>Alexandre</first><last>Rademaker</last></author>
       <author><first>Ewa</first><last>Rudnicka</last></author>
@@ -131,7 +131,7 @@
     </paper>
     <paper id="12">
       <title>Intrinsically Interlingual: The Wn Python Library for Wordnets</title>
-      <author><first>Michael Wayne</first><last>Goodman</last></author>
+      <author id="michael-wayne-goodman"><first>Michael Wayne</first><last>Goodman</last></author>
       <author><first>Francis</first><last>Bond</last></author>
       <pages>100–107</pages>
       <abstract>This paper introduces Wn, a new Python library for working with wordnets. Unlike previous libraries, Wn is built from the beginning to accommodate multiple wordnets — for multiple languages or multiple versions of the same wordnet — while retaining the ability to query and traverse them independently. It is also able to download and incorporate wordnets published online. These features are made possible through Wn’s adoption of standard formats and methods for interoperability, namely the WN-LMF schema (Vossen et al., 2013; Bond et al., 2020) and the Collaborative Interlingual Index (Bond et al., 2016). Wn is open-source, easily available, and well-documented.</abstract>
@@ -167,7 +167,7 @@
     <paper id="15">
       <title>Evaluation of Taxonomy Enrichment on Diachronic <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Versions</title>
       <author><first>Irina</first><last>Nikishina</last></author>
-      <author><first>Natalia</first><last>Loukachevitch</last></author>
+      <author id="natalia-loukachevitch"><first>Natalia</first><last>Loukachevitch</last></author>
       <author><first>Varvara</first><last>Logacheva</last></author>
       <author><first>Alexander</first><last>Panchenko</last></author>
       <pages>126–136</pages>
@@ -257,7 +257,7 @@
     <paper id="23">
       <title>Comparing Similarity of Words Based on Psychosemantic Experiment and <fixed-case>R</fixed-case>u<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et</title>
       <author><first>Valery</first><last>Solovyev</last></author>
-      <author><first>Natalia</first><last>Loukachevitch</last></author>
+      <author id="natalia-loukachevitch"><first>Natalia</first><last>Loukachevitch</last></author>
       <pages>199–206</pages>
       <abstract>In the paper we compare the structure of the Russian language thesaurus RuWordNet with the data of a psychosemantic experiment to identify semantically close words. The aim of the study is to find out to what extent the structure of RuWordNet corresponds to the intuitive ideas of native speakers about the semantic proximity of words. The respondents were asked to list synonyms to a given word. As a result of the experiment, we found that the respondents mainly mentioned not only synonyms but words that are in paradigmatic relations with the stimuli. The words of the mental sphere were chosen for the experiment. In 95% of cases, the words characterized in the experiment as semantically close were also close according to the thesaurus. In other cases, additions to the thesaurus were proposed.</abstract>
       <url hash="28800bff">2021.gwc-1.23</url>
@@ -320,7 +320,7 @@
     </paper>
     <paper id="29">
       <title>Towards a Linking between <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et and <fixed-case>W</fixed-case>ikidata</title>
-      <author><first>John P.</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></author>
       <author><first>David</first><last>Cillessen</last></author>
       <pages>252–257</pages>
       <abstract>WordNet is the most widely used lexical resource for English, while Wikidata is one of the largest knowledge graphs of entity and concepts available. While, there is a clear difference in the focus of these two resources, there is also a significant overlap and as such a complete linking of these resources would have many uses. We propose the development of such a linking, first by means of the hapax legomenon links and secondly by the use of natural language processing techniques. We show that these can be done with high accuracy but that human validation is still necessary. This has resulted in over 9,000 links being added between these two resources.</abstract>
@@ -340,7 +340,7 @@
     <paper id="31">
       <title><fixed-case>D</fixed-case>an<fixed-case>N</fixed-case>et2: Extending the coverage of adjectives in <fixed-case>D</fixed-case>an<fixed-case>N</fixed-case>et based on thesaurus data (project presentation)</title>
       <author><first>Sanni</first><last>Nimb</last></author>
-      <author><first>Bolette</first><last>Pedersen</last></author>
+      <author id="bolette-sandford-pedersen"><first>Bolette</first><last>Pedersen</last></author>
       <author><first>Sussi</first><last>Olsen</last></author>
       <pages>267–272</pages>
       <abstract>The paper describes work in progress in the DanNet2 project financed by the Carlsberg Foundation. The project aim is to extend the original Danish wordnet, DanNet, in several ways. Main focus is on extension of the coverage and description of the adjectives, a part of speech that was rather sparsely described in the original wordnet. We describe the methodology and initial work of semi-automatically transferring adjectives from the Danish Thesaurus to the wordnet with the aim of easily enlarging the coverage from 3,000 to approx. 13,000 adjectival synsets. Transfer is performed by manually encoding all missing adjectival subsection headwords from the thesaurus and thereafter employing a semi-automatic procedure where adjectives from the same subsection are transferred to the wordnet as either 1) near synonyms to the section’s headword, 2) hyponyms to the section’s headword, or 3) as members of the same synset as the headword. We also discuss how to deal with the problem of multiple representations of the same sense in the thesaurus, and present other types of information from the thesaurus that we plan to integrate, such as thematic and sentiment information.</abstract>
diff --git a/data/xml/2021.hackashop.xml b/data/xml/2021.hackashop.xml
index ab9d38bf32..8854ae9a3e 100644
--- a/data/xml/2021.hackashop.xml
+++ b/data/xml/2021.hackashop.xml
@@ -33,7 +33,7 @@
       <title>Related Named Entities Classification in the Economic-Financial Context</title>
       <author><first>Daniel</first><last>De Los Reyes</last></author>
       <author><first>Allan</first><last>Barcelos</last></author>
-      <author><first>Renata</first><last>Vieira</last></author>
+      <author id="renata-vieira"><first>Renata</first><last>Vieira</last></author>
       <author><first>Isabel</first><last>Manssour</last></author>
       <pages>8–15</pages>
       <abstract>The present work uses the Bidirectional Encoder Representations from Transformers (BERT) to process a sentence and its entities and indicate whether two named entities present in a sentence are related or not, constituting a binary classification problem. It was developed for the Portuguese language, considering the financial domain and exploring deep linguistic representations to identify a relation between entities without using other lexical-semantic resources. The results of the experiments show an accuracy of 86% of the predictions.</abstract>
@@ -172,7 +172,7 @@
       <author><first>Matej</first><last>Ulčar</last></author>
       <author><first>Linda</first><last>Freienthal</last></author>
       <author><first>Silver</first><last>Traat</last></author>
-      <author><first>Luis Adrián</first><last>Cabrera-Diego</last></author>
+      <author id="luis-adrian-cabrera-diego"><first>Luis Adrián</first><last>Cabrera-Diego</last></author>
       <author><first>Matej</first><last>Martinc</last></author>
       <author><first>Nada</first><last>Lavrač</last></author>
       <author><first>Blaž</first><last>Škrlj</last></author>
@@ -182,8 +182,8 @@
       <author><first>Vid</first><last>Podpečan</last></author>
       <author><first>Janez</first><last>Kranjc</last></author>
       <author><first>Shane</first><last>Sheehan</last></author>
-      <author><first>Emanuela</first><last>Boros</last></author>
-      <author><first>Jose G.</first><last>Moreno</last></author>
+      <author id="emanuela-boros"><first>Emanuela</first><last>Boros</last></author>
+      <author id="jose-g-moreno"><first>Jose G.</first><last>Moreno</last></author>
       <author><first>Antoine</first><last>Doucet</last></author>
       <author><first>Hannu</first><last>Toivonen</last></author>
       <pages>99–109</pages>
diff --git a/data/xml/2021.hcinlp.xml b/data/xml/2021.hcinlp.xml
index 49f00f9923..1e43749c8d 100644
--- a/data/xml/2021.hcinlp.xml
+++ b/data/xml/2021.hcinlp.xml
@@ -6,7 +6,7 @@
       <editor><first>Su Lin</first><last>Blodgett</last></editor>
       <editor><first>Michael</first><last>Madaio</last></editor>
       <editor><first>Brendan</first><last>O'Connor</last></editor>
-      <editor><first>Hanna</first><last>Wallach</last></editor>
+      <editor id="hanna-wallach"><first>Hanna</first><last>Wallach</last></editor>
       <editor><first>Qian</first><last>Yang</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Online</address>
@@ -30,7 +30,7 @@
     </paper>
     <paper id="2">
       <title>Spellchecking for Children in Web Search: a Natural Language Interface Case-study</title>
-      <author><first>Casey</first><last>Kennington</last></author>
+      <author id="casey-kennington"><first>Casey</first><last>Kennington</last></author>
       <author><first>Jerry Alan</first><last>Fails</last></author>
       <author><first>Katherine</first><last>Landau Wright</last></author>
       <author><first>Maria Soledad</first><last>Pera</last></author>
@@ -105,7 +105,7 @@
       <author><first>Jeonghwan</first><last>Kim</last></author>
       <author><first>Junmo</first><last>Kang</last></author>
       <author><first>Suwon</first><last>Shin</last></author>
-      <author><first>Sung-Hyon</first><last>Myaeng</last></author>
+      <author id="sung-hyon-myaeng"><first>Sung-Hyon</first><last>Myaeng</last></author>
       <pages>53–59</pages>
       <abstract>Customer reviews are useful in providing an indirect, secondhand experience of a product. People often use reviews written by other customers as a guideline prior to purchasing a product. Such behavior signifies the authenticity of reviews in e-commerce platforms. However, fake reviews are increasingly becoming a hassle for both consumers and product owners. To address this issue, we propose You Only Need Gold (YONG), an essential information mining tool for detecting fake reviews and augmenting user discretion. Our experimental results show the poor human performance on fake review detection, substantially improved user capability given our tool, and the ultimate need for user reliance on the tool.</abstract>
       <url hash="51f1dcdd">2021.hcinlp-1.9</url>
@@ -125,7 +125,7 @@
       <author><first>Henrik</first><last>Voigt</last></author>
       <author><first>Monique</first><last>Meuschke</last></author>
       <author><first>Kai</first><last>Lawonn</last></author>
-      <author><first>Sina</first><last>Zarrieß</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
       <pages>66–73</pages>
       <abstract>Intuitive interaction with visual models becomes an increasingly important task in the field of Visualization (VIS) and verbal interaction represents a significant aspect of it. Vice versa, modeling verbal interaction in visual environments is a major trend in ongoing research in NLP. To date, research on Language &amp; Vision, however, mostly happens at the intersection of NLP and Computer Vision (CV), and much less at the intersection of NLP and Visualization, which is an important area in Human-Computer Interaction (HCI). This paper presents a brief survey of recent work on interactive tasks and set-ups in NLP and Visualization. We discuss the respective methods, show interesting gaps, and conclude by suggesting neural, visually grounded dialogue modeling as a promising potential for NLIs for visual models.</abstract>
       <url hash="fb2e8946">2021.hcinlp-1.11</url>
@@ -150,7 +150,7 @@
     </paper>
     <paper id="14">
       <title>Machine Translation Believability</title>
-      <author><first>Marianna</first><last>Martindale</last></author>
+      <author id="marianna-martindale"><first>Marianna</first><last>Martindale</last></author>
       <author><first>Kevin</first><last>Duh</last></author>
       <author><first>Marine</first><last>Carpuat</last></author>
       <pages>88–95</pages>
@@ -169,7 +169,7 @@
     <paper id="16">
       <title>An <fixed-case>IDR</fixed-case> Framework of Opportunities and Barriers between <fixed-case>HCI</fixed-case> and <fixed-case>NLP</fixed-case></title>
       <author><first>Nanna</first><last>Inie</last></author>
-      <author><first>Leon</first><last>Derczynski</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
       <pages>101–108</pages>
       <abstract>This paper presents a framework of opportunities and barriers/risks between the two research fields Natural Language Processing (NLP) and Human-Computer Interaction (HCI). The framework is constructed by following an interdisciplinary research-model (IDR), combining field-specific knowledge with existing work in the two fields. The resulting framework is intended as a departure point for discussion and inspiration for research collaborations.</abstract>
       <url hash="d0ddbf15">2021.hcinlp-1.16</url>
diff --git a/data/xml/2021.humeval.xml b/data/xml/2021.humeval.xml
index 34a06511aa..56fb2ff8c6 100644
--- a/data/xml/2021.humeval.xml
+++ b/data/xml/2021.humeval.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2021-04-19" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Workshop on Human Evaluation of NLP Systems (HumEval)</booktitle>
-      <editor><first>Anya</first><last>Belz</last></editor>
+      <editor id="anja-belz"><first>Anya</first><last>Belz</last></editor>
       <editor><first>Shubham</first><last>Agarwal</last></editor>
       <editor><first>Yvette</first><last>Graham</last></editor>
       <editor><first>Ehud</first><last>Reiter</last></editor>
@@ -21,27 +21,27 @@
     </frontmatter>
     <paper id="1">
       <title>It’s Commonsense, isn’t it? Demystifying Human Evaluations in Commonsense-Enhanced <fixed-case>NLG</fixed-case> Systems</title>
-      <author><first>Miruna-Adriana</first><last>Clinciu</last></author>
+      <author id="miruna-clinciu"><first>Miruna-Adriana</first><last>Clinciu</last></author>
       <author><first>Dimitra</first><last>Gkatzia</last></author>
       <author><first>Saad</first><last>Mahamood</last></author>
       <pages>1–12</pages>
       <abstract>Common sense is an integral part of human cognition which allows us to make sound decisions, communicate effectively with others and interpret situations and utterances. Endowing AI systems with commonsense knowledge capabilities will help us get closer to creating systems that exhibit human intelligence. Recent efforts in Natural Language Generation (NLG) have focused on incorporating commonsense knowledge through large-scale pre-trained language models or by incorporating external knowledge bases. Such systems exhibit reasoning capabilities without common sense being explicitly encoded in the training set. These systems require careful evaluation, as they incorporate additional resources during training which adds additional sources of errors. Additionally, human evaluation of such systems can have significant variation, making it impossible to compare different systems and define baselines. This paper aims to demystify human evaluations of commonsense-enhanced NLG systems by proposing the Commonsense Evaluation Card (CEC), a set of recommendations for evaluation reporting of commonsense-enhanced NLG systems, underpinned by an extensive analysis of human evaluations reported in the recent literature.</abstract>
       <url hash="9d3a126b">2021.humeval-1.1</url>
       <video href="https://www.youtube.com/watch?v=LlrsKZOKIoo"/>
-      <bibkey>clinciu-etal-2021-commonsense</bibkey>
       <video href="2021.humeval-1.1.mp4"/>
+      <bibkey>clinciu-etal-2021-commonsense</bibkey>
     </paper>
     <paper id="2">
       <title>Estimating Subjective Crowd-Evaluations as an Additional Objective to Improve Natural Language Generation</title>
       <author><first>Jakob</first><last>Nyberg</last></author>
       <author><first>Maike</first><last>Paetzel</last></author>
-      <author><first>Ramesh</first><last>Manuvinakurike</last></author>
+      <author id="ramesh-manuvinakurike"><first>Ramesh</first><last>Manuvinakurike</last></author>
       <pages>13–24</pages>
       <abstract>Human ratings are one of the most prevalent methods to evaluate the performance of NLP (natural language processing) algorithms. Similarly, it is common to measure the quality of sentences generated by a natural language generation model using human raters. In this paper we argue for exploring the use of subjective evaluations within the process of training language generation models in a multi-task learning setting. As a case study, we use a crowd-authored dialogue corpus to fine-tune six different language generation models. Two of these models incorporate multi-task learning and use subjective ratings of lines as part of an explicit learning goal. A human evaluation of the generated dialogue lines reveals that utterances generated by the multi-tasking models were subjectively rated as the most typical, most moving the conversation forward, and least offensive. Based on these promising first results, we discuss future research directions for incorporating subjective human evaluations into language model training and to hence keep the human user in the loop during the development process.</abstract>
       <url hash="3ae444bc">2021.humeval-1.2</url>
       <video href="https://www.youtube.com/watch?v=SE-y2PLX2wE"/>
-      <bibkey>nyberg-etal-2021-estimating</bibkey>
       <video href="2021.humeval-1.2.mp4"/>
+      <bibkey>nyberg-etal-2021-estimating</bibkey>
     </paper>
     <paper id="3">
       <title>Trading Off Diversity and Quality in Natural Language Generation</title>
@@ -153,7 +153,7 @@
     <paper id="13">
       <title>Detecting Post-Edited References and Their Effect on Human Evaluation</title>
       <author><first>Věra</first><last>Kloudová</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <author><first>Martin</first><last>Popel</last></author>
       <pages>114–119</pages>
       <abstract>This paper provides a quick overview of possible methods how to detect that reference translations were actually created by post-editing an MT system. Two methods based on automatic metrics are presented: BLEU difference between the suspected MT and some other good MT and BLEU difference using additional references. These two methods revealed a suspicion that the WMT 2020 Czech reference is based on MT. The suspicion was confirmed in a manual analysis by finding concrete proofs of the post-editing procedure in particular sentences. Finally, a typology of post-editing changes is presented where typical errors or changes made by the post-editor or errors adopted from the MT are classified.</abstract>
@@ -164,14 +164,14 @@
       <title>A Case Study of Efficacy and Challenges in Practical Human-in-Loop Evaluation of <fixed-case>NLP</fixed-case> Systems Using Checklist</title>
       <author><first>Shaily</first><last>Bhatt</last></author>
       <author><first>Rahul</first><last>Jain</last></author>
-      <author><first>Sandipan</first><last>Dandapat</last></author>
+      <author id="sandipan-dandapat"><first>Sandipan</first><last>Dandapat</last></author>
       <author><first>Sunayana</first><last>Sitaram</last></author>
       <pages>120–130</pages>
       <abstract>Despite state-of-the-art performance, NLP systems can be fragile in real-world situations. This is often due to insufficient understanding of the capabilities and limitations of models and the heavy reliance on standard evaluation benchmarks. Research into non-standard evaluation to mitigate this brittleness is gaining increasing attention. Notably, the behavioral testing principle ‘Checklist’, which decouples testing from implementation revealed significant failures in state-of-the-art models for multiple tasks. In this paper, we present a case study of using Checklist in a practical scenario. We conduct experiments for evaluating an offensive content detection system and use a data augmentation technique for improving the model using insights from Checklist. We lay out the challenges and open questions based on our observations of using Checklist for human-in-loop evaluation and improvement of NLP systems. Disclaimer: The paper contains examples of content with offensive language. The examples do not represent the views of the authors or their employers towards any person(s), group(s), practice(s), or entity/entities.</abstract>
       <url hash="e16ad0fe">2021.humeval-1.14</url>
       <video href="https://www.youtube.com/watch?v=fjkKVUZHJRQ"/>
-      <bibkey>bhatt-etal-2021-case</bibkey>
       <video href="2021.humeval-1.14.mp4"/>
+      <bibkey>bhatt-etal-2021-case</bibkey>
     </paper>
     <paper id="15">
       <title>Interrater Disagreement Resolution: A Systematic Procedure to Reach Consensus in Annotation Tasks</title>
diff --git a/data/xml/2021.icnlsp.xml b/data/xml/2021.icnlsp.xml
index 30fae356d0..e38edd9963 100644
--- a/data/xml/2021.icnlsp.xml
+++ b/data/xml/2021.icnlsp.xml
@@ -80,7 +80,7 @@
     <paper id="7">
       <title>Beyond Voice Activity Detection: Hybrid Audio Segmentation for Direct Speech Translation</title>
       <author><first>Marco</first><last>Gaido</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Mauro</first><last>Cettolo</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <pages>55–62</pages>
@@ -91,7 +91,7 @@
       <title>A Sample-Based Training Method for Distantly Supervised Relation Extraction with Pre-Trained Transformers</title>
       <author><first>Mehrdad</first><last>Nasser</last></author>
       <author><first>Mohamad Bagher</first><last>Sajadi</last></author>
-      <author><first>Behrouz</first><last>Minaei-Bidgoli</last></author>
+      <author id="behrouz-minaei-bidgoli"><first>Behrouz</first><last>Minaei-Bidgoli</last></author>
       <pages>63–72</pages>
       <url hash="dc920634">2021.icnlsp-1.8</url>
       <bibkey>nasser-etal-2021-sample</bibkey>
@@ -135,7 +135,7 @@
     <paper id="13">
       <title>Formulating Automated Responses to Cognitive Distortions for <fixed-case>CBT</fixed-case> Interactions</title>
       <author><first>Ignacio</first><last>de Toledo Rodriguez</last></author>
-      <author><first>Giancarlo</first><last>Salton</last></author>
+      <author id="giancarlo-salton"><first>Giancarlo</first><last>Salton</last></author>
       <author><first>Robert</first><last>Ross</last></author>
       <pages>108–116</pages>
       <url hash="42b7009f">2021.icnlsp-1.13</url>
@@ -234,7 +234,7 @@
       <author><first>Abdul</first><last>Waheed</last></author>
       <author><first>Muskan</first><last>Goyal</last></author>
       <author><first>Nimisha</first><last>Mittal</last></author>
-      <author><first>Deepak</first><last>Gupta</last></author>
+      <author id="deepak-gupta"><first>Deepak</first><last>Gupta</last></author>
       <author><first>Ashish</first><last>Khanna</last></author>
       <author><first>Moolchand</first><last>Sharma</last></author>
       <pages>209–218</pages>
diff --git a/data/xml/2021.icon.xml b/data/xml/2021.icon.xml
index b52b6ee109..c0b5c2bd4a 100644
--- a/data/xml/2021.icon.xml
+++ b/data/xml/2021.icon.xml
@@ -3,9 +3,9 @@
   <volume id="main" ingest-date="2022-09-06" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 18th International Conference on Natural Language Processing (ICON)</booktitle>
-      <editor><first>Sivaji</first><last>Bandyopadhyay</last></editor>
-      <editor><first>Sobha Lalitha</first><last>Devi</last></editor>
-      <editor><first>Pushpak</first><last>Bhattacharyya</last></editor>
+      <editor id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></editor>
+      <editor id="sobha-lalitha-devi"><first>Sobha Lalitha</first><last>Devi</last></editor>
+      <editor id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></editor>
       <publisher>NLP Association of India (NLPAI)</publisher>
       <address>National Institute of Technology Silchar, Silchar, India</address>
       <month>December</month>
@@ -21,7 +21,7 @@
       <title>Constrained Decoding for Technical Term Retention in <fixed-case>E</fixed-case>nglish-<fixed-case>H</fixed-case>indi <fixed-case>MT</fixed-case></title>
       <author><first>Niyati</first><last>Bafna</last></author>
       <author><first>Martin</first><last>Vastl</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>1–6</pages>
       <abstract>Technical terms may require special handling when the target audience is bilingual, depending on the cultural and educational norms of the society in question. In particular, certain translation scenarios may require “term retention” i.e. preserving of the source language technical terms in the target language output to produce a fluent and comprehensible code-switched sentence. We show that a standard transformer-based machine translation model can be adapted easily to perform this task with little or no damage to the general quality of its output. We present an English-to-Hindi model that is trained to obey a “retain” signal, i.e. it can perform the required code-mixing on a list of terms, possibly unseen, provided at runtime. We perform automatic evaluation using BLEU as well as F1 metrics on the list of retained terms; we also collect manual judgments on the quality of the output sentences.</abstract>
       <url hash="53226245">2021.icon-main.1</url>
@@ -54,8 +54,8 @@
     </paper>
     <paper id="4">
       <title>Small Batch Sizes Improve Training of Low-Resource Neural <fixed-case>MT</fixed-case></title>
-      <author><first>Àlex</first><last>Atrio</last></author>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="alex-r-atrio"><first>Àlex</first><last>Atrio</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <pages>18–24</pages>
       <abstract>We study the role of an essential hyper-parameter that governs the training of Transformers for neural machine translation in a low-resource setting: the batch size. Using theoretical insights and experimental evidence, we argue against the widespread belief that batch size should be set as large as allowed by the memory of the GPUs. We show that in a low-resource setting, a smaller batch size leads to higher scores in a shorter training time, and argue that this is due to better regularization of the gradients during training.</abstract>
       <url hash="3664f31c">2021.icon-main.4</url>
@@ -85,7 +85,7 @@
       <title>Assessing Post-editing Effort in the <fixed-case>E</fixed-case>nglish-<fixed-case>H</fixed-case>indi Direction</title>
       <author><first>Arafat</first><last>Ahsan</last></author>
       <author><first>Vandan</first><last>Mujadia</last></author>
-      <author><first>Dipti Misra</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></author>
       <pages>44–53</pages>
       <abstract>We present findings from a first in-depth post-editing effort estimation study in the English-Hindi direction along multiple effort indicators. We conduct a controlled experiment involving professional translators, who complete assigned tasks alternately, in a translation from scratch and a post-edit condition. We find that post-editing reduces translation time (by 63%), utilizes fewer keystrokes (by 59%), and decreases the number of pauses (by 63%) when compared to translating from scratch. We further verify the quality of translations thus produced via a human evaluation task in which we do not detect any discernible quality differences.</abstract>
       <url hash="46db2daf">2021.icon-main.7</url>
@@ -97,7 +97,7 @@
       <author><first>Laishram</first><last>Rahul</last></author>
       <author><first>Alok</first><last>Singh</last></author>
       <author><first>Salam Michael</first><last>Singh</last></author>
-      <author><first>Thoudam Doren</first><last>Singh</last></author>
+      <author id="thoudam-doren-singh"><first>Thoudam Doren</first><last>Singh</last></author>
       <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>54–63</pages>
       <abstract>In this paper, we report the experimental findings of building Speech-to-Text translation systems for Manipuri-English on low resource setting which is first of its kind in this language pair. For this purpose, a new dataset consisting of a Manipuri-English parallel corpus along with the corresponding audio version of the Manipuri text is built. Based on this dataset, a benchmark evaluation is reported for the Manipuri-English Speech-to-Text translation using two approaches: 1) a pipeline model consisting of ASR (Automatic Speech Recognition) and Machine translation, and 2) an end-to-end Speech-to-Text translation. Gaussian Mixture Model-Hidden Markov Model (GMM-HMM) and Time delay neural network (TDNN) Acoustic models are used to build two different pipeline systems using a shared MT system. Experimental result shows that the TDNN model outperforms GMM-HMM model significantly by a margin of 2.53% WER. However, their evaluation of Speech-to-Text translation differs by a small margin of 0.1 BLEU. Both the pipeline translation models outperform the end-to-end translation model by a margin of 2.6 BLEU score.</abstract>
@@ -109,7 +109,7 @@
       <author><first>Salam Michael</first><last>Singh</last></author>
       <author><first>Loitongbam</first><last>Sanayai Meetei</last></author>
       <author><first>Alok</first><last>Singh</last></author>
-      <author><first>Thoudam Doren</first><last>Singh</last></author>
+      <author id="thoudam-doren-singh"><first>Thoudam Doren</first><last>Singh</last></author>
       <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>64–74</pages>
       <abstract>In recent times, machine translation models can learn to perform implicit bridging between language pairs never seen explicitly during training and showing that transfer learning helps for languages with constrained resources. This work investigates the low resource machine translation via transfer learning from multilingual pre-trained models i.e. mBART-50 and mT5-base in the pretext of Indo-Aryan (Assamese and Bengali) and Tibeto-Burman (Manipuri) languages via finetuning as a downstream task. Assamese and Manipuri were absent in the pretraining of both mBART-50 and the mT5 models. However, the experimental results attest that the finetuning from these pre-trained models surpasses the multilingual model trained from scratch.</abstract>
@@ -138,7 +138,7 @@
       <title>The Importance of Context in Very Low Resource Language Modeling</title>
       <author><first>Lukas</first><last>Edman</last></author>
       <author><first>Antonio</first><last>Toral</last></author>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <pages>86–92</pages>
       <abstract>This paper investigates very low resource language model pretraining, when less than 100 thousand sentences are available. We find that, in very low-resource scenarios, statistical n-gram language models outperform state-of-the-art neural models. Our experiments show that this is mainly due to the focus of the former on a local context. As such, we introduce three methods to improve a neural model’s performance in the low-resource setting, finding that limiting the model’s self-attention is the most effective one, improving on downstream tasks such as NLI and POS tagging by up to 5% for the languages we test on: English, Hindi, and Turkish.</abstract>
       <url hash="2f3ff333">2021.icon-main.12</url>
@@ -171,7 +171,7 @@
       <title>On the Universality of Deep Contextual Language Models</title>
       <author><first>Shaily</first><last>Bhatt</last></author>
       <author><first>Poonam</first><last>Goyal</last></author>
-      <author><first>Sandipan</first><last>Dandapat</last></author>
+      <author id="sandipan-dandapat"><first>Sandipan</first><last>Dandapat</last></author>
       <author><first>Monojit</first><last>Choudhury</last></author>
       <author><first>Sunayana</first><last>Sitaram</last></author>
       <pages>106–119</pages>
@@ -298,7 +298,7 @@
       <title>Towards Multimodal Vision-Language Models Generating Non-Generic Text</title>
       <author><first>Wes</first><last>Robbins</last></author>
       <author><first>Zanyar</first><last>Zohourianshahzadi</last></author>
-      <author><first>Jugal</first><last>Kalita</last></author>
+      <author id="jugal-kalita"><first>Jugal</first><last>Kalita</last></author>
       <pages>220–230</pages>
       <abstract>Vision-language models can assess visual context in an image and generate descriptive text. While the generated text may be accurate and syntactically correct, it is often overly general. To address this, recent work has used optical character recognition to supplement visual information with text extracted from an image. In this work, we contend that vision-language models can benefit from information that can be extracted from an image, but are not used by current models. We modify previous multimodal frameworks to accept relevant information from any number of auxiliary classifiers. In particular, we focus on person names as an additional set of tokens and create a novel image-caption dataset to facilitate captioning with person names. The dataset, Politicians and Athletes in Captions (PAC), consists of captioned images of well-known people in context. By fine-tuning pretrained models with this dataset, we demonstrate a model that can naturally integrate facial recognition tokens into generated text by training on limited data. For the PAC dataset, we provide a discussion on collection and baseline benchmark scores.</abstract>
       <url hash="b308199a">2021.icon-main.27</url>
@@ -308,7 +308,7 @@
     <paper id="28">
       <title>Image Caption Generation Framework for <fixed-case>A</fixed-case>ssamese News using Attention Mechanism</title>
       <author><first>Ringki</first><last>Das</last></author>
-      <author><first>Thoudam Doren</first><last>Singh</last></author>
+      <author id="thoudam-doren-singh"><first>Thoudam Doren</first><last>Singh</last></author>
       <pages>231–239</pages>
       <abstract>Automatic caption generation is an artificial intelligence problem that falls at the intersection of computer vision and natural language processing. Although significant works have been reported in image captioning, the contribution is limited to English and few major languages with sufficient resources. But, no work on image captioning has been reported in a resource-constrained language like Assamese. With this inspiration, we propose an encoder-decoder based framework for image caption generation in the Assamese news domain. The VGG-16 pre-trained model at the encoder side and LSTM with an attention mechanism are employed at the decoder side to generate the Assamese caption. We train the proposed model on the dataset built in-house consisting of 10,000 images with a single caption for each image. We describe our experimental methodology, quantitative and qualitative results which validate the effectiveness of our model for caption generation. The proposed model shows a BLEU score of 12.1 outperforming the baseline model.</abstract>
       <url hash="9742f99f">2021.icon-main.28</url>
@@ -319,7 +319,7 @@
       <author><first>Alok</first><last>Singh</last></author>
       <author><first>Loitongbam</first><last>Sanayai Meetei</last></author>
       <author><first>Salam Michael</first><last>Singh</last></author>
-      <author><first>Thoudam Doren</first><last>Singh</last></author>
+      <author id="thoudam-doren-singh"><first>Thoudam Doren</first><last>Singh</last></author>
       <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>240–250</pages>
       <abstract>Describing a video is a challenging yet attractive task since it falls into the intersection of computer vision and natural language generation. The attention-based models have reported the best performance. However, all these models follow similar procedures, such as segmenting videos into chunks of frames or sampling frames at equal intervals for visual encoding. The process of segmenting video into chunks or sampling frames at equal intervals causes encoding of redundant visual information and requires additional computational cost since a video consists of a sequence of similar frames and suffers from inescapable noise such as uneven illumination, occlusion and motion effects. In this paper, a boundary-based keyframes selection approach for video description is proposed that allow the system to select a compact subset of keyframes to encode the visual information and generate a description for a video without much degradation. The proposed approach uses 3 4 frames per video and yields competitive performance over two benchmark datasets MSVD and MSR-VTT (in both English and Hindi).</abstract>
@@ -369,7 +369,7 @@
     <paper id="34">
       <title>Classifying Verses of the <fixed-case>Q</fixed-case>uran using Doc2vec</title>
       <author><first>Menwa</first><last>Alshammeri</last></author>
-      <author><first>Eric</first><last>Atwell</last></author>
+      <author id="eric-atwell"><first>Eric</first><last>Atwell</last></author>
       <author><first>Mohammad</first><last>Alsalka</last></author>
       <pages>284–288</pages>
       <abstract>The Quran, as a significant religious text, bears important spiritual and linguistic values. Understanding the text and inferring the underlying meanings entails semantic similarity analysis. We classified the verses of the Quran into 15 pre-defined categories or concepts, based on the Qurany corpus, using Doc2Vec and Logistic Regression. Our classifier scored 70% accuracy, and 60% F1-score using the distributed bag-of-words architecture. We then measured how similar the documents within the same category are to each other semantically and use this information to evaluate our model. We calculated the mean difference and average similarity values for each category to indicate how well our model describes that category.</abstract>
@@ -431,7 +431,7 @@
     </paper>
     <paper id="40">
       <title>Resolving Prepositional Phrase Attachment Ambiguities with Contextualized Word Embeddings</title>
-      <author><first>Adwait</first><last>Ratnaparkhi</last></author>
+      <author id="adwait-ratnaparkhi"><first>Adwait</first><last>Ratnaparkhi</last></author>
       <author><first>Atul</first><last>Kumar</last></author>
       <pages>335–340</pages>
       <abstract>This paper applies contextualized word embedding models to a long-standing problem in the natural language parsing community, namely prepositional phrase attachment. Following past formulations of this problem, we use data sets in which the attachment decision is both a binary-valued choice as well as a multi-valued choice. We present a deep learning architecture that fine-tunes the output of a contextualized word embedding model for the purpose of predicting attachment decisions. We present experiments on two commonly used datasets that outperform the previous best results, using only the original training data and the unannotated full sentence context.</abstract>
@@ -444,7 +444,7 @@
       <author><first>Chenchen</first><last>Ding</last></author>
       <author><first>Katsuhito</first><last>Sudoh</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Satoshi</first><last>Nakamura</last></author>
       <pages>341–346</pages>
       <abstract>Pretrained multilingual language models have become a key part of cross-lingual transfer for many natural language processing tasks, even those without bilingual information. This work further investigates the cross-lingual transfer ability of these models for constituency parsing and focuses on multi-source transfer. Addressing structure and label set diversity problems, we propose the integration of typological features into the parsing model and treebank normalization. We trained the model on eight languages with diverse structures and use transfer parsing for an additional six low-resource languages. The experimental results show that the treebank normalization is essential for cross-lingual transfer performance and the typological features introduce further improvement. As a result, our approach improves the baseline F1 of multi-source transfer by 5 on average.</abstract>
@@ -524,7 +524,7 @@
       <title>Temporal Question Generation from History Text</title>
       <author><first>Harsimran</first><last>Bedi</last></author>
       <author><first>Sangameshwar</first><last>Patil</last></author>
-      <author><first>Girish</first><last>Palshikar</last></author>
+      <author id="girish-palshikar"><first>Girish</first><last>Palshikar</last></author>
       <pages>408–413</pages>
       <abstract>Temporal analysis of history text has always held special significance to students, historians and the Social Sciences community in general. We observe from experimental data that existing deep learning (DL) models of ProphetNet and UniLM for question generation (QG) task do not perform satisfactorily when used directly for temporal QG from history text. We propose linguistically motivated templates for generating temporal questions that probe different aspects of history text and show that finetuning the DL models using the temporal questions significantly improves their performance on temporal QG task. Using automated metrics as well as human expert evaluation, we show that performance of the DL models finetuned with the template-based questions is better than finetuning done with temporal questions from SQuAD.</abstract>
       <url hash="d6932527">2021.icon-main.49</url>
@@ -636,7 +636,7 @@
       <title>An Efficient <fixed-case>BERT</fixed-case> Based Approach to Detect Aggression and Misogyny</title>
       <author><first>Sandip</first><last>Dutta</last></author>
       <author><first>Utso</first><last>Majumder</last></author>
-      <author><first>Sudip</first><last>Naskar</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip</first><last>Naskar</last></author>
       <pages>493–498</pages>
       <abstract>Social media is bustling with ever growing cases of trolling, aggression and hate. A huge amount of social media data is generated each day which is insurmountable for manual inspection. In this work, we propose an efficient and fast method to detect aggression and misogyny in social media texts. We use data from the Second Workshop on Trolling, Aggression and Cyber Bullying for our task. We employ a BERT based model to augment our data. Next we employ Tf-Idf and XGBoost for detecting aggression and misogyny. Our model achieves 0.73 and 0.85 Weighted F1 Scores on the 2 prediction tasks, which are comparable to the state of the art. However, the training time, model size and resource requirements of our model are drastically lower compared to the state of the art models, making our model useful for fast inference.</abstract>
       <url hash="485d5744">2021.icon-main.60</url>
@@ -662,7 +662,7 @@
     <paper id="63">
       <title>Using Random Perturbations to Mitigate Adversarial Attacks on Sentiment Analysis Models</title>
       <author><first>Abigail</first><last>Swenor</last></author>
-      <author><first>Jugal</first><last>Kalita</last></author>
+      <author id="jugal-kalita"><first>Jugal</first><last>Kalita</last></author>
       <pages>519–528</pages>
       <abstract>Attacks on deep learning models are often difficult to identify and therefore are difficult to protect against. This problem is exacerbated by the use of public datasets that typically are not manually inspected before use. In this paper, we offer a solution to this vulnerability by using, during testing, random perturbations such as spelling correction if necessary, substitution by random synonym, or simply drop-ping the word. These perturbations are applied to random words in random sentences to defend NLP models against adversarial attacks. Our Random Perturbations Defense andIncreased Randomness Defense methods are successful in returning attacked models to similar accuracy of models before attacks. The original accuracy of the model used in this work is 80% for sentiment classification. After undergoing attacks, the accuracy drops to an accuracy between 0% and 44%. After applying our defense methods, the accuracy of the model is returned to the original accuracy within statistical significance.</abstract>
       <url hash="6b5336d3">2021.icon-main.63</url>
@@ -722,7 +722,7 @@
       <title><fixed-case>D</fixed-case>ialog<fixed-case>A</fixed-case>cts based Search and Retrieval for Response Generation in Conversation Systems</title>
       <author><first>Nidhi</first><last>Arora</last></author>
       <author><first>Rashmi</first><last>Prasad</last></author>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
       <pages>564–572</pages>
       <abstract>Designing robust conversation systems with great customer experience requires a team of design experts to think of all probable ways a customer can interact with the system and then author responses for each use case individually. The responses are authored from scratch for each new client and application even though similar responses have been created in the past. This happens largely because the responses are encoded using domain specific set of intents and entities. In this paper, we present preliminary work to define a dialog act schema to merge and map responses from different domains and applications using a consistent domain-independent representation. These representations are stored and maintained using an Elasticsearch system to facilitate generation of responses through a search and retrieval process. We experimented generating different surface realizations for a response given a desired information state of the dialog.</abstract>
       <url hash="4a833660">2021.icon-main.69</url>
@@ -744,7 +744,7 @@
     <paper id="71">
       <title>Weakly Supervised Extraction of Tasks from Text</title>
       <author><first>Sachin</first><last>Pawar</last></author>
-      <author><first>Girish</first><last>Palshikar</last></author>
+      <author id="girish-palshikar"><first>Girish</first><last>Palshikar</last></author>
       <author><first>Anindita</first><last>Sinha Banerjee</last></author>
       <pages>583–592</pages>
       <abstract>In this paper, we propose a novel problem of automatic extraction of tasks from text. A task is a well-defined knowledge-based volitional action. We describe various characteristics of tasks as well as compare and contrast them with events. We propose two techniques for task extraction – i) using linguistic patterns and ii) using a BERT-based weakly supervised neural model. We evaluate our techniques with other competent baselines on 4 datasets from different domains. Overall, the BERT-based weakly supervised neural model generalizes better across multiple domains as compared to the purely linguistic patterns based approach.</abstract>
@@ -796,7 +796,7 @@
       <author><first>Saujas</first><last>Vaduguru</last></author>
       <author><first>Partho</first><last>Sarthi</last></author>
       <author><first>Monojit</first><last>Choudhury</last></author>
-      <author><first>Dipti</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti</first><last>Sharma</last></author>
       <pages>619–628</pages>
       <abstract>Learning linguistic generalizations from only a few examples is a challenging task. Recent work has shown that program synthesis – a method to learn rules from data in the form of programs in a domain-specific language – can be used to learn phonological rules in highly data-constrained settings. In this paper, we use the problem of phonological stress placement as a case to study how the design of the domain-specific language influences the generalization ability when using the same learning algorithm. We find that encoding the distinction between consonants and vowels results in much better performance, and providing syllable-level information further improves generalization. Program synthesis, thus, provides a way to investigate how access to explicit linguistic information influences what can be learnt from a small number of examples.</abstract>
       <url hash="c877a33f">2021.icon-main.76</url>
@@ -819,7 +819,7 @@
       <title>Introduction to <fixed-case>P</fixed-case>roverb<fixed-case>N</fixed-case>et: An Online Multilingual Database of Proverbs and Comprehensive Metadata</title>
       <author><first>Shreyas</first><last>Pimpalgaonkar</last></author>
       <author><first>Dhanashree</first><last>Lele</last></author>
-      <author><first>Malhar</first><last>Kulkarni</last></author>
+      <author id="malhar-kulkarni"><first>Malhar</first><last>Kulkarni</last></author>
       <author><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>638–650</pages>
       <abstract>Proverbs are unique linguistic expressions used by humans in the process of communication. They are frozen expressions and have the capacity to convey deep semantic aspects of a given language. This paper describes ProverbNet, a novel online multilingual database of proverbs and comprehensive metadata equipped with a multipurpose search engine to store, explore, understand, classify and analyze proverbs and their metadata. ProverbNet has immense applications including machine translation, cognitive studies and learning tools. We have 2320 Sanskrit Proverbs and 1136 Marathi proverbs and their metadata in ProverbNet and are adding more proverbs in different languages to the network.</abstract>
@@ -846,7 +846,7 @@
       <title><fixed-case>F</fixed-case>in<fixed-case>R</fixed-case>ead: A Transfer Learning Based Tool to Assess Readability of Definitions of Financial Terms</title>
       <author><first>Sohom</first><last>Ghosh</last></author>
       <author><first>Shovon</first><last>Sengupta</last></author>
-      <author><first>Sudip</first><last>Naskar</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip</first><last>Naskar</last></author>
       <author><first>Sunny Kumar</first><last>Singh</last></author>
       <pages>658–659</pages>
       <abstract>Simplified definitions of complex terms help learners to understand any content better. Comprehending readability is critical for the simplification of these contents. In most cases, the standard formula based readability measures do not hold good for measuring the complexity of definitions of financial terms. Furthermore, some of them works only for corpora of longer length which have at least 30 sentences. In this paper, we present a tool for evaluating readability of definitions of financial terms. It consists of a Light GBM based classification layer over sentence embeddings (Reimers et al., 2019) of FinBERT (Araci, 2019). It is trained on glossaries of several financial textbooks and definitions of various financial terms which are available on the web. The extensive evaluation shows that it outperforms the standard benchmarks by achieving a AU-ROC score of 0.993 on the validation set.</abstract>
@@ -981,7 +981,7 @@
       <author><first>Julio de Jesús</first><last>Guerrero-Zambrano</last></author>
       <author><first>Dominic</first><last>Forest</last></author>
       <author><first>Gerardo</first><last>Reyes-Salgado</last></author>
-      <author><first>Juan-Manuel</first><last>Torres-Moreno</last></author>
+      <author id="juan-manuel-torres-moreno"><first>Juan-Manuel</first><last>Torres-Moreno</last></author>
       <pages>41–45</pages>
       <abstract>This work aims to evaluate the ability that both probabilistic and state-of-the-art vector space modeling (VSM) methods provide to well known machine learning algorithms to identify social network documents to be classified as aggressive, gender biased or communally charged. To this end, an exploratory stage was performed first in order to find relevant settings to test, i.e. by using training and development samples, we trained multiple algorithms using multiple vector space modeling and probabilistic methods and discarded the less informative configurations. These systems were submitted to the competition of the ComMA@ICON’21 Workshop on Multilingual Gender Biased and Communal Language Identification.</abstract>
       <url hash="d6021e31">2021.icon-multigen.6</url>
@@ -1001,7 +1001,7 @@
       <title>Sdutta at <fixed-case>C</fixed-case>om<fixed-case>MA</fixed-case>@<fixed-case>ICON</fixed-case>: A <fixed-case>CNN</fixed-case>-<fixed-case>LSTM</fixed-case> Model for Hate Detection</title>
       <author><first>Sandip</first><last>Dutta</last></author>
       <author><first>Utso</first><last>Majumder</last></author>
-      <author><first>Sudip</first><last>Naskar</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip</first><last>Naskar</last></author>
       <pages>53–57</pages>
       <abstract>In today’s world, online activity and social media are facing an upsurge of cases of aggression, gender-biased comments and communal hate. In this shared task, we used a CNN-LSTM hybrid method to detect aggression, misogynistic and communally charged content in social media texts. First, we employ text cleaning and convert the text into word embeddings. Next we proceed to our CNN-LSTM based model to predict the nature of the text. Our model achieves 0.288, 0.279, 0.294 and 0.335 Overall Micro F1 Scores in multilingual, Meitei, Bengali and Hindi datasets, respectively, on the 3 prediction labels.</abstract>
       <url hash="704ef4e1">2021.icon-multigen.8</url>
@@ -1013,7 +1013,7 @@
       <author><first>Oxana</first><last>Vitman</last></author>
       <author><first>Hosahalli Lakshmaiah</first><last>Shashirekha</last></author>
       <author><first>Grigori</first><last>Sidorov</last></author>
-      <author><first>Alexander</first><last>Gelbukh</last></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last></author>
       <pages>58–63</pages>
       <abstract>Social media analytics are widely being explored by researchers for various applications. Prominent among them are identifying and blocking abusive contents especially targeting individuals and communities, for various reasons. The increasing abusive contents and the increasing number of users on social media demands automated tools to detect and filter the abusive contents as it is highly impossible to handle this manually. To address the challenges of detecting abusive contents, this paper describes the approaches proposed by our team MUCIC for Multilingual Gender Biased and Communal Language Identification shared task (ComMA@ICON) at International Conference on Natural Language Processing (ICON) 2021. This shared task dataset consists of code-mixed multi-script texts in Meitei, Bangla, Hindi as well as in Multilingual (a combination of Meitei, Bangla, Hindi, and English). The shared task is modeled as a multi-label Text Classification (TC) task combining word and char n-grams with vectors obtained from Multilingual Sentence Encoder (MSE) to train the Machine Learning (ML) classifiers using Pre-aggregation and Post-aggregation of labels. These approaches obtained the highest performance in the shared task for Meitei, Bangla, and Multilingual texts with instance-F1 scores of 0.350, 0.412, and 0.380 respectively using Pre-aggregation of labels.</abstract>
       <url hash="9e1d22f5">2021.icon-multigen.9</url>
diff --git a/data/xml/2021.ijclclp.xml b/data/xml/2021.ijclclp.xml
index ec43d01465..8e5c35ce55 100644
--- a/data/xml/2021.ijclclp.xml
+++ b/data/xml/2021.ijclclp.xml
@@ -23,7 +23,7 @@
       <author><first>Tien-Hong</first><last>Lo</last></author>
       <author><first>Shi-Yan</first><last>Weng</last></author>
       <author><first>Shih-Hsuan</first><last>Chiu</last></author>
-      <author><first>Yao-Ting</first><last>Sung</last></author>
+      <author id="yao-ting-sung"><first>Yao-Ting</first><last>Sung</last></author>
       <author><first>Berlin</first><last>Chen</last></author>
       <url hash="62acd771">2021.ijclclp-1.1</url>
       <bibkey>chao-etal-2021-ntnu</bibkey>
@@ -51,7 +51,7 @@
     <paper id="4">
       <title>Textual Relations with Conjunctive Adverbials in <fixed-case>E</fixed-case>nglish Writing by <fixed-case>C</fixed-case>hinese Speakers: A corpus-based Approach</title>
       <author><first>Tung-Yu</first><last>Kao</last></author>
-      <author><first>Li-mei</first><last>Chen</last></author>
+      <author id="li-mei-chen"><first>Li-mei</first><last>Chen</last></author>
       <url hash="3becda25">2021.ijclclp-1.4</url>
       <bibkey>kao-chen-2021-textual</bibkey>
     </paper>
@@ -59,7 +59,7 @@
       <title>中文新聞文本之宣傳手法標記與分析 (The Analysis and Annotation of Propaganda Techniques in <fixed-case>C</fixed-case>hinese News Texts)</title>
       <author><first>Meng-Hsien</first><last>Shih</last></author>
       <author><first>Ren-feng</first><last>Duann</last></author>
-      <author><first>Siaw-Fong</first><last>Chung</last></author>
+      <author id="siaw-fong-chung"><first>Siaw-Fong</first><last>Chung</last></author>
       <url hash="4e6b1858">2021.ijclclp-1.5</url>
       <language>zho</language>
       <bibkey>shih-etal-2021-zhong</bibkey>
@@ -103,7 +103,7 @@
     <paper id="3">
       <title>使用低通時序列語音特徵訓練理想比率遮罩法之語音強化 (Employing Low-Pass Filtered Temporal Speech Features for the Training of Ideal Ratio Mask in Speech Enhancement)</title>
       <author><first>Yan-Tong</first><last>Chen</last></author>
-      <author><first>Jeih-weih</first><last>Hung</last></author>
+      <author id="jeih-weih-hung"><first>Jeih-weih</first><last>Hung</last></author>
       <url hash="3c7185e5">2021.ijclclp-2.3</url>
       <language>zho</language>
       <bibkey>chen-hung-2021-shi</bibkey>
@@ -121,7 +121,7 @@
       <author><first>Chao-Chun</first><last>Liang</last></author>
       <author><first>Daniel</first><last>Lee</last></author>
       <author><first>Meng-Tse</first><last>Wu</last></author>
-      <author><first>Hsin-Min</first><last>Wang</last></author>
+      <author id="hsin-min-wang"><first>Hsin-Min</first><last>Wang</last></author>
       <author><first>Keh-Yih</first><last>Su</last></author>
       <url hash="f53e7466">2021.ijclclp-2.5</url>
       <bibkey>liang-etal-2021-answering</bibkey>
diff --git a/data/xml/2021.inlg.xml b/data/xml/2021.inlg.xml
index 70bfa6f967..68c9f1f73d 100644
--- a/data/xml/2021.inlg.xml
+++ b/data/xml/2021.inlg.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2021-09-20" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 14th International Conference on Natural Language Generation</booktitle>
-      <editor><first>Anya</first><last>Belz</last></editor>
+      <editor id="anja-belz"><first>Anya</first><last>Belz</last></editor>
       <editor><first>Angela</first><last>Fan</last></editor>
       <editor><first>Ehud</first><last>Reiter</last></editor>
       <editor><first>Yaji</first><last>Sripada</last></editor>
@@ -21,8 +21,8 @@
     <paper id="1">
       <title>Generating Diverse Descriptions from Semantic Graphs</title>
       <author><first>Jiuzhou</first><last>Han</last></author>
-      <author><first>Daniel</first><last>Beck</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="daniel-beck"><first>Daniel</first><last>Beck</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>1–11</pages>
       <abstract>Text generation from semantic graphs is traditionally performed with deterministic methods, which generate a unique description given an input graph. However, the generation problem admits a range of acceptable textual outputs, exhibiting lexical, syntactic and semantic variation. To address this disconnect, we present two main contributions. First, we propose a stochastic graph-to-text model, incorporating a latent variable in an encoder-decoder model, and its use in an ensemble. Second, to assess the diversity of the generated sentences, we propose a new automatic evaluation metric which jointly evaluates output diversity and quality in a multi-reference setting. We evaluate the models on WebNLG datasets in English and Russian, and show an ensemble of stochastic models produces diverse sets of generated sentences while, retaining similar quality to state-of-the-art models.</abstract>
       <url hash="77d12172">2021.inlg-1.1</url>
@@ -34,7 +34,7 @@
       <author><first>Aleksandre</first><last>Maskharashvili</last></author>
       <author><first>Symon</first><last>Stevens-Guille</last></author>
       <author><first>Xintong</first><last>Li</last></author>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <pages>12–23</pages>
       <abstract>Recent developments in natural language generation (NLG) have bolstered arguments in favor of re-introducing explicit coding of discourse relations in the input to neural models. In the Methodius corpus, a meaning representation (MR) is hierarchically structured and includes discourse relations. Meanwhile pre-trained language models have been shown to implicitly encode rich linguistic knowledge which provides an excellent resource for NLG. By virtue of synthesizing these lines of research, we conduct extensive experiments on the benefits of using pre-trained models and discourse relation information in MRs, focusing on the improvement of discourse coherence and correctness. We redesign the Methodius corpus; we also construct another Methodius corpus in which MRs are not hierarchically structured but flat. We report experiments on different versions of the corpora, which probe when, where, and how pre-trained models benefit from MRs with discourse relation information in them. We conclude that discourse relations significantly improve NLG when data is limited.</abstract>
       <url hash="c3d2f45e">2021.inlg-1.2</url>
@@ -47,7 +47,7 @@
       <author><first>Haemanth</first><last>Santhi Ponnusamy</last></author>
       <author><first>Kordula</first><last>De Kuthy</last></author>
       <author><first>Lukas</first><last>Stein</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <pages>24–34</pages>
       <abstract>In question generation, the question produced has to be well-formed and meaningfully related to the answer serving as input. Neural generation methods have predominantly leveraged the distributional semantics of words as representations of meaning and generated questions one word at a time. In this paper, we explore the viability of form-based and more fine-grained encodings, such as character or subword representations for question generation. We start from the typical seq2seq architecture using word embeddings presented by De Kuthy et al. (2020), who generate questions from text so that the answer given in the input text matches not just in meaning but also in form, satisfying question-answer congruence. We show that models trained on character and subword representations substantially outperform the published results based on word embeddings, and they do so with fewer parameters. Our approach eliminates two important problems of the word-based approach: the encoding of rare or out-of-vocabulary words and the incorrect replacement of words with semantically-related ones. The character-based model substantially improves on the published results, both in terms of BLEU scores and regarding the quality of the generated question. Going beyond the specific task, this result adds to the evidence weighing different form- and meaning-based representations for natural language processing tasks.</abstract>
       <url hash="09da1a7f">2021.inlg-1.3</url>
@@ -80,7 +80,7 @@
       <title>Predicting Antonyms in Context using <fixed-case>BERT</fixed-case></title>
       <author><first>Ayana</first><last>Niwa</last></author>
       <author><first>Keisuke</first><last>Nishiguchi</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <pages>48–54</pages>
       <abstract>We address the task of antonym prediction in a context, which is a fill-in-the-blanks problem. This task setting is unique and practical because it requires contrastiveness to the other word and naturalness as a text in filling a blank. We propose methods for fine-tuning pre-trained masked language models (BERT) for context-aware antonym prediction. The experimental results demonstrate that these methods have positive impacts on the prediction of antonyms within a context. Moreover, human evaluation reveals that more than 85% of predictions using the proposed method are acceptable as antonyms.</abstract>
       <url hash="ffc3387c">2021.inlg-1.6</url>
@@ -115,7 +115,7 @@
       <author><first>Karthik</first><last>Gopalakrishnan</last></author>
       <author><first>Pankaj</first><last>Rajan</last></author>
       <author id="yang-liu-icsi"><first>Yang</first><last>Liu</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></author>
       <pages>76–86</pages>
       <abstract>Incorporating external knowledge sources effectively in conversations is a longstanding problem in open-domain dialogue research. The existing literature on open-domain knowledge selection is limited and makes certain brittle assumptions on knowledge sources to simplify the overall task, such as the existence of a single relevant knowledge sentence per context. In this work, we evaluate the existing state of open-domain conversation knowledge selection, showing where the existing methodologies regarding data and evaluation are flawed. We then improve on them by proposing a new framework for collecting relevant knowledge, and create an augmented dataset based on the Wizard of Wikipedia (WOW) corpus, which we call WOW++. WOW++ averages 8 relevant knowledge sentences per dialogue context, embracing the inherent ambiguity of open-domain dialogue knowledge selection. We then benchmark various knowledge ranking algorithms on this augmented dataset with both intrinsic evaluation and extrinsic measures of response quality, showing that neural rerankers that use WOW++ can outperform rankers trained on standard datasets.</abstract>
       <url hash="45ade724">2021.inlg-1.9</url>
@@ -127,7 +127,7 @@
       <author><first>Xintong</first><last>Li</last></author>
       <author><first>Symon</first><last>Stevens-Guille</last></author>
       <author><first>Aleksandre</first><last>Maskharashvili</last></author>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <pages>87–102</pages>
       <abstract>Neural approaches to natural language generation in task-oriented dialogue have typically required large amounts of annotated training data to achieve satisfactory performance, especially when generating from compositional inputs. To address this issue, we show that self-training enhanced with constrained decoding yields large gains in data efficiency on a conversational weather dataset that employs compositional meaning representations. In particular, our experiments indicate that self-training with constrained decoding can enable sequence-to-sequence models to achieve satisfactory quality using vanilla decoding with five to ten times less data than with ordinary supervised baseline; moreover, by leveraging pretrained models, data efficiency can be increased further to fifty times. We confirm the main automatic results with human evaluations and show that they extend to an enhanced, compositional version of the E2E dataset. The end result is an approach that makes it possible to achieve acceptable performance on compositional NLG tasks using hundreds rather than tens of thousands of training samples.</abstract>
       <url hash="a39a71a0">2021.inlg-1.10</url>
@@ -154,7 +154,7 @@
       <author><first>Sameen</first><last>Maruf</last></author>
       <author><first>Ingrid</first><last>Zukerman</last></author>
       <author><first>Ehud</first><last>Reiter</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>114–127</pages>
       <abstract>We offer an approach to explain Decision Tree (DT) predictions by addressing potential conflicts between aspects of these predictions and plausible expectations licensed by background information. We define four types of conflicts, operationalize their identification, and specify explanatory schemas that address them. Our human evaluation focused on the effect of explanations on users’ understanding of a DT’s reasoning and their willingness to act on its predictions. The results show that (1) explanations that address potential conflicts are considered at least as good as baseline explanations that just follow a DT path; and (2) the conflict-based explanations are deemed especially valuable when users’ expectations disagree with the DT’s predictions.</abstract>
       <url hash="0a6f5d87">2021.inlg-1.12</url>
@@ -174,7 +174,7 @@
     <paper id="14">
       <title>Underreporting of errors in <fixed-case>NLG</fixed-case> output, and what to do about it</title>
       <author><first>Emiel</first><last>van Miltenburg</last></author>
-      <author><first>Miruna</first><last>Clinciu</last></author>
+      <author id="miruna-clinciu"><first>Miruna</first><last>Clinciu</last></author>
       <author><first>Ondřej</first><last>Dušek</last></author>
       <author><first>Dimitra</first><last>Gkatzia</last></author>
       <author><first>Stephanie</first><last>Inglis</last></author>
@@ -195,7 +195,7 @@
       <title>What can Neural Referential Form Selectors Learn?</title>
       <author><first>Guanyi</first><last>Chen</last></author>
       <author><first>Fahime</first><last>Same</last></author>
-      <author><first>Kees</first><last>van Deemter</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
       <pages>154–166</pages>
       <abstract>Despite achieving encouraging results, neural Referring Expression Generation models are often thought to lack transparency. We probed neural Referential Form Selection (RFS) models to find out to what extent the linguistic features influencing the RE form are learned and captured by state-of-the-art RFS models. The results of 8 probing tasks show that all the defined features were learned to some extent. The probing tasks pertaining to referential status and syntactic position exhibited the highest performance. The lowest performance was achieved by the probing models designed to predict discourse structure properties beyond the sentence level.</abstract>
       <url hash="00e09a6e">2021.inlg-1.15</url>
@@ -224,7 +224,7 @@
       <title>Using <fixed-case>BERT</fixed-case> for choosing classifiers in <fixed-case>M</fixed-case>andarin</title>
       <author><first>Jani</first><last>Järnfors</last></author>
       <author><first>Guanyi</first><last>Chen</last></author>
-      <author><first>Kees</first><last>van Deemter</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
       <author><first>Rint</first><last>Sybesma</last></author>
       <pages>172–176</pages>
       <abstract>Choosing the most suitable classifier in a linguistic context is a well-known problem in the production of Mandarin and many other languages. The present paper proposes a solution based on BERT, compares this solution to previous neural and rule-based models, and argues that the BERT model performs particularly well on those difficult cases where the classifier adds information to the text.</abstract>
@@ -234,7 +234,7 @@
     </paper>
     <paper id="18">
       <title>Enriching the <fixed-case>E</fixed-case>2<fixed-case>E</fixed-case> dataset</title>
-      <author><first>Thiago</first><last>Castro Ferreira</last></author>
+      <author id="thiago-castro-ferreira"><first>Thiago</first><last>Castro Ferreira</last></author>
       <author><first>Helena</first><last>Vaz</last></author>
       <author><first>Brian</first><last>Davis</last></author>
       <author><first>Adriana</first><last>Pagano</last></author>
@@ -276,7 +276,7 @@
       <author><first>Steven Y.</first><last>Feng</last></author>
       <author><first>Jessica</first><last>Huynh</last></author>
       <author><first>Chaitanya Prasad</first><last>Narisetty</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <author><first>Varun</first><last>Gangal</last></author>
       <pages>212–225</pages>
       <abstract>We motivate and propose a suite of simple but effective improvements for concept-to-text generation called SAPPHIRE: Set Augmentation and Post-hoc PHrase Infilling and REcombination. We demonstrate their effectiveness on generative commonsense reasoning, a.k.a. the CommonGen task, through experiments using both BART and T5 models. Through extensive automatic and human evaluation, we show that SAPPHIRE noticeably improves model performance. An in-depth qualitative analysis illustrates that SAPPHIRE effectively addresses many issues of the baseline model generations, including lack of commonsense, insufficient specificity, and poor fluency.</abstract>
@@ -370,7 +370,7 @@
     <paper id="30">
       <title>Another <fixed-case>PASS</fixed-case>: A Reproduction Study of the Human Evaluation of a Football Report Generation System</title>
       <author><first>Simon</first><last>Mille</last></author>
-      <author><first>Thiago</first><last>Castro Ferreira</last></author>
+      <author id="thiago-castro-ferreira"><first>Thiago</first><last>Castro Ferreira</last></author>
       <author><first>Anya</first><last>Belz</last></author>
       <author><first>Brian</first><last>Davis</last></author>
       <pages>286–292</pages>
@@ -381,7 +381,7 @@
     </paper>
     <paper id="31">
       <title>A Reproduction Study of an Annotation-based Human Evaluation of <fixed-case>MT</fixed-case> Outputs</title>
-      <author><first>Maja</first><last>Popović</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
       <author><first>Anya</first><last>Belz</last></author>
       <pages>293–300</pages>
       <abstract>In this paper we report our reproduction study of the Croatian part of an annotation-based human evaluation of machine-translated user reviews (Popovic, 2020). The work was carried out as part of the ReproGen Shared Task on Reproducibility of Human Evaluation in NLG. Our aim was to repeat the original study exactly, except for using a different set of evaluators. We describe the experimental design, characterise differences between original and reproduction study, and present the results from each study, along with analysis of the similarity between them. For the six main evaluation results of Major/Minor/All Comprehension error rates and Major/Minor/All Adequacy error rates, we find that (i) 4/6 system rankings are the same in both studies, (ii) the relative differences between systems are replicated well for Major Comprehension and Adequacy (Pearson’s &gt; 0.9), but not for the corresponding Minor error rates (Pearson’s 0.36 for Adequacy, 0.67 for Comprehension), and (iii) the individual system scores for both types of Minor error rates had a higher degree of reproducibility than the corresponding Major error rates. We also examine inter-annotator agreement and compare the annotations obtained in the original and reproduction studies.</abstract>
@@ -414,7 +414,7 @@
     <paper id="34">
       <title>Quality Evaluation of the Low-Resource Synthetically Generated Code-Mixed <fixed-case>H</fixed-case>inglish Text</title>
       <author><first>Vivek</first><last>Srivastava</last></author>
-      <author id="mayank-singh"><first>Mayank</first><last>Singh</last></author>
+      <author><first>Mayank</first><last>Singh</last></author>
       <pages>314–319</pages>
       <abstract>In this shared task, we seek the participating teams to investigate the factors influencing the quality of the code-mixed text generation systems. We synthetically generate code-mixed Hinglish sentences using two distinct approaches and employ human annotators to rate the generation quality. We propose two subtasks, quality rating prediction and annotators’ disagreement prediction of the synthetic Hinglish dataset. The proposed subtasks will put forward the reasoning and explanation of the factors influencing the quality and human perception of the code-mixed text.</abstract>
       <url hash="7ca3c662">2021.inlg-1.34</url>
@@ -475,7 +475,7 @@
       <author><first>Mauajama</first><last>Firdaus</last></author>
       <author><first>Umang</first><last>Jain</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>353–363</pages>
       <abstract>Social chatbots have gained immense popularity, and their appeal lies not just in their capacity to respond to the diverse requests from users, but also in the ability to develop an emotional connection with users. To further develop and promote social chatbots, we need to concentrate on increasing user interaction and take into account both the intellectual and emotional quotient in the conversational agents. Therefore, in this work, we propose the task of sentiment aware emotion controlled personalized dialogue generation giving the machine the capability to respond emotionally and in accordance with the persona of the user. As sentiment and emotions are highly co-related, we use the sentiment knowledge of the previous utterance to generate the correct emotional response in accordance with the user persona. We design a Transformer based Dialogue Generation framework, that generates responses that are sensitive to the emotion of the user and corresponds to the persona and sentiment as well. Moreover, the persona information is encoded by a different Transformer encoder, along with the dialogue history, is fed to the decoder for generating responses. We annotate the PersonaChat dataset with sentiment information to improve the response quality. Experimental results on the PersonaChat dataset show that the proposed framework significantly outperforms the existing baselines, thereby generating personalized emotional responses in accordance with the sentiment that provides better emotional connection and user satisfaction as desired in a social chatbot.</abstract>
       <url hash="5d17bbe4">2021.inlg-1.39</url>
@@ -495,10 +495,10 @@
     </paper>
     <paper id="41">
       <title>Decoding, Fast and Slow: A Case Study on Balancing Trade-Offs in Incremental, Character-level Pragmatic Reasoning</title>
-      <author><first>Sina</first><last>Zarrieß</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
       <author><first>Hendrik</first><last>Buschmeier</last></author>
       <author><first>Ting</first><last>Han</last></author>
-      <author><first>Simeon</first><last>Schüz</last></author>
+      <author id="simeon-junker"><first>Simeon</first><last>Schüz</last></author>
       <pages>371–376</pages>
       <abstract>Recent work has adopted models of pragmatic reasoning for the generation of informative language in, e.g., image captioning. We propose a simple but highly effective relaxation of fully rational decoding, based on an existing incremental and character-level approach to pragmatically informative neural image captioning. We implement a mixed, ‘fast’ and ‘slow’, speaker that applies pragmatic reasoning occasionally (only word-initially), while unrolling the language model. In our evaluation, we find that increased informativeness through pragmatic decoding generally lowers quality and, somewhat counter-intuitively, increases repetitiveness in captions. Our mixed speaker, however, achieves a good balance between quality and informativeness.</abstract>
       <url hash="5bfa463c">2021.inlg-1.41</url>
@@ -540,7 +540,7 @@
     <paper id="45">
       <title>Attention Is Indeed All You Need: Semantically Attention-Guided Decoding for Data-to-Text <fixed-case>NLG</fixed-case></title>
       <author><first>Juraj</first><last>Juraska</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <pages>416–431</pages>
       <abstract>Ever since neural models were adopted in data-to-text language generation, they have invariably been reliant on extrinsic components to improve their semantic accuracy, because the models normally do not exhibit the ability to generate text that reliably mentions all of the information provided in the input. In this paper, we propose a novel decoding method that extracts interpretable information from encoder-decoder models’ cross-attention, and uses it to infer which attributes are mentioned in the generated text, which is subsequently used to rescore beam hypotheses. Using this decoding method with T5 and BART, we show on three datasets its ability to dramatically reduce semantic errors in the generated outputs, while maintaining their state-of-the-art quality.</abstract>
       <url hash="924003e6">2021.inlg-1.45</url>
diff --git a/data/xml/2021.insights.xml b/data/xml/2021.insights.xml
index 2977e85b23..ca071aab16 100644
--- a/data/xml/2021.insights.xml
+++ b/data/xml/2021.insights.xml
@@ -19,7 +19,7 @@
     </frontmatter>
     <paper id="1">
       <title>Corrected <fixed-case>CBOW</fixed-case> Performs as well as Skip-gram</title>
-      <author><first>Ozan</first><last>İrsoy</last></author>
+      <author id="ozan-irsoy"><first>Ozan</first><last>İrsoy</last></author>
       <author><first>Adrian</first><last>Benton</last></author>
       <author><first>Karl</first><last>Stratos</last></author>
       <pages>1–8</pages>
@@ -45,7 +45,7 @@
       <title><fixed-case>BERT</fixed-case> Cannot Align Characters</title>
       <author><first>Antonis</first><last>Maronikolakis</last></author>
       <author><first>Philipp</first><last>Dufter</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>16–22</pages>
       <abstract>In previous work, it has been shown that BERT can adequately align cross-lingual sentences on the word level. Here we investigate whether BERT can also operate as a char-level aligner. The languages examined are English, Fake English, German and Greek. We show that the closer two languages are, the better BERT can align them on the character level. BERT indeed works well in English to Fake English alignment, but this does not generalize to natural languages to the same extent. Nevertheless, the proximity of two languages does seem to be a factor. English is more related to German than to Greek and this is reflected in how well BERT aligns them; English to German is better than English to Greek. We examine multiple setups and show that the similarity matrices for natural languages show weaker relations the further apart two languages are.</abstract>
       <url hash="993e58e5">2021.insights-1.3</url>
@@ -59,7 +59,7 @@
       <author><first>Sungjin</first><last>Park</last></author>
       <author><first>Seolhwa</first><last>Lee</last></author>
       <author><first>Taesun</first><last>Whang</last></author>
-      <author><first>Heuiseok</first><last>Lim</last></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last></author>
       <pages>23–28</pages>
       <abstract>In the field of natural language processing, ensembles are broadly known to be effective in improving performance. This paper analyzes how ensemble of neural machine translation (NMT) models affect performance improvement by designing various experimental setups (i.e., intra-, inter-ensemble, and non-convergence ensemble). To an in-depth examination, we analyze each ensemble method with respect to several aspects such as different attention models and vocab strategies. Experimental results show that ensembling is not always resulting in performance increases and give noteworthy negative findings.</abstract>
       <url hash="507d99cc">2021.insights-1.4</url>
@@ -93,7 +93,7 @@
     <paper id="7">
       <title>Zero-Shot Cross-Lingual Transfer is a Hard Baseline to Beat in <fixed-case>G</fixed-case>erman Fine-Grained Entity Typing</title>
       <author><first>Sabine</first><last>Weber</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>42–48</pages>
       <abstract>The training of NLP models often requires large amounts of labelled training data, which makes it difficult to expand existing models to new languages. While zero-shot cross-lingual transfer relies on multilingual word embeddings to apply a model trained on one language to another, Yarowski and Ngai (2001) propose the method of annotation projection to generate training data without manual annotation. This method was successfully used for the tasks of named entity recognition and coarse-grained entity typing, but we show that it is outperformed by zero-shot cross-lingual transfer when applied to the similar task of fine-grained entity typing. In our study of fine-grained entity typing with the FIGER type ontology for German, we show that annotation projection amplifies the English model’s tendency to underpredict level 2 labels and is beaten by zero-shot cross-lingual transfer on three novel test sets.</abstract>
       <url hash="2a982749">2021.insights-1.7</url>
@@ -135,7 +135,7 @@
       <author><first>Jan</first><last>Rosendahl</last></author>
       <author><first>Christian</first><last>Herold</last></author>
       <author><first>Frithjof</first><last>Petrick</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>62–66</pages>
       <abstract>In this work, we conduct a comprehensive investigation on one of the centerpieces of modern machine translation systems: the encoder-decoder attention mechanism. Motivated by the concept of first-order alignments, we extend the (cross-)attention mechanism by a recurrent connection, allowing direct access to previous attention/alignment decisions. We propose several ways to include such a recurrency into the attention mechanism. Verifying their performance across different translation tasks we conclude that these extensions and dependencies are not beneficial for the translation performance of the Transformer architecture.</abstract>
       <url hash="19a0619e">2021.insights-1.10</url>
@@ -147,7 +147,7 @@
       <title>On the Difficulty of Segmenting Words with Attention</title>
       <author><first>Ramon</first><last>Sanabria</last></author>
       <author><first>Hao</first><last>Tang</last></author>
-      <author><first>Sharon</first><last>Goldwater</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
       <pages>67–73</pages>
       <abstract>Word segmentation, the problem of finding word boundaries in speech, is of interest for a range of tasks. Previous papers have suggested that for sequence-to-sequence models trained on tasks such as speech translation or speech recognition, attention can be used to locate and segment the words. We show, however, that even on monolingual data this approach is brittle. In our experiments with different input types, data sizes, and segmentation algorithms, only models trained to predict phones from words succeed in the task. Models trained to predict words from either phones or speech (i.e., the opposite direction needed to generalize to new data), yield much worse results, suggesting that attention-based segmentation is only useful in limited scenarios.</abstract>
       <url hash="de075e65">2021.insights-1.11</url>
@@ -181,7 +181,7 @@
       <title>Learning Data Augmentation Schedules for Natural Language Processing</title>
       <author><first>Daphné</first><last>Chopard</last></author>
       <author><first>Matthias S.</first><last>Treder</last></author>
-      <author><first>Irena</first><last>Spasić</last></author>
+      <author id="irena-spasic"><first>Irena</first><last>Spasić</last></author>
       <pages>89–102</pages>
       <abstract>Despite its proven efficiency in other fields, data augmentation is less popular in the context of natural language processing (NLP) due to its complexity and limited results. A recent study (Longpre et al., 2020) showed for example that task-agnostic data augmentations fail to consistently boost the performance of pretrained transformers even in low data regimes. In this paper, we investigate whether data-driven augmentation scheduling and the integration of a wider set of transformations can lead to improved performance where fixed and limited policies were unsuccessful. Our results suggest that, while this approach can help the training process in some settings, the improvements are unsubstantial. This negative result is meant to help researchers better understand the limitations of data augmentation for NLP.</abstract>
       <url hash="b534b2f5">2021.insights-1.14</url>
@@ -206,7 +206,7 @@
       <author><first>Liane</first><last>Guillou</last></author>
       <author><first>Sander</first><last>Bijl de Vroe</last></author>
       <author><first>Mark</first><last>Johnson</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>110–116</pages>
       <abstract>Understanding linguistic modality is widely seen as important for downstream tasks such as Question Answering and Knowledge Graph Population. Entailment Graph learning might also be expected to benefit from attention to modality. We build Entailment Graphs using a news corpus filtered with a modality parser, and show that stripping modal modifiers from predicates in fact increases performance. This suggests that for some tasks, the pragmatics of modal modification of predicates allows them to contribute as evidence of entailment.</abstract>
       <url hash="79eee9a8">2021.insights-1.16</url>
@@ -240,8 +240,8 @@
     <paper id="19">
       <title>Challenging the Semi-Supervised <fixed-case>VAE</fixed-case> Framework for Text Classification</title>
       <author><first>Ghazi</first><last>Felhi</last></author>
-      <author><first>Joseph</first><last>Le Roux</last></author>
-      <author><first>Djamé</first><last>Seddah</last></author>
+      <author id="joseph-le-roux"><first>Joseph</first><last>Le Roux</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
       <pages>136–143</pages>
       <abstract>Semi-Supervised Variational Autoencoders (SSVAEs) are widely used models for data efficient learning. In this paper, we question the adequacy of the standard design of sequence SSVAEs for the task of text classification as we exhibit two sources of overcomplexity for which we provide simplifications. These simplifications to SSVAEs preserve their theoretical soundness while providing a number of practical advantages in the semi-supervised setup where the result of training is a text classifier. These simplifications are the removal of (i) the Kullback-Liebler divergence from its objective and (ii) the fully unobserved latent variable from its probabilistic model. These changes relieve users from choosing a prior for their latent variables, make the model smaller and faster, and allow for a better flow of information into the latent variables. We compare the simplified versions to standard SSVAEs on 4 text classification tasks. On top of the above-mentioned simplification, experiments show a speed-up of 26%, while keeping equivalent classification scores. The code to reproduce our experiments is public.</abstract>
       <url hash="f94e6d7d">2021.insights-1.19</url>
diff --git a/data/xml/2021.internlp.xml b/data/xml/2021.internlp.xml
index 1cd84d5c1d..9202bc2eee 100644
--- a/data/xml/2021.internlp.xml
+++ b/data/xml/2021.internlp.xml
@@ -8,7 +8,7 @@
       <editor><first>Iryna</first><last>Gurevych</last></editor>
       <editor><first>Ji-Ung</first><last>Lee</last></editor>
       <editor><first>Filip</first><last>Radlinski</last></editor>
-      <editor><first>Hinrich</first><last>Schütze</last></editor>
+      <editor id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></editor>
       <editor><first>Edwin</first><last>Simpson</last></editor>
       <editor><first>Lili</first><last>Yu</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -67,12 +67,12 @@
     </paper>
     <paper id="5">
       <title>Dynamic Facet Selection by Maximizing Graded Relevance</title>
-      <author><first>Michael</first><last>Glass</last></author>
+      <author id="michael-glass"><first>Michael</first><last>Glass</last></author>
       <author><first>Md Faisal Mahbub</first><last>Chowdhury</last></author>
       <author><first>Yu</first><last>Deng</last></author>
       <author><first>Ruchi</first><last>Mahindru</last></author>
       <author><first>Nicolas Rodolfo</first><last>Fauceglia</last></author>
-      <author><first>Alfio</first><last>Gliozzo</last></author>
+      <author id="alfio-gliozzo"><first>Alfio</first><last>Gliozzo</last></author>
       <author><first>Nandana</first><last>Mihindukulasooriya</last></author>
       <pages>32–39</pages>
       <abstract>Dynamic faceted search (DFS), an interactive query refinement technique, is a form of Human–computer information retrieval (HCIR) approach. It allows users to narrow down search results through facets, where the facets-documents mapping is determined at runtime based on the context of user query instead of pre-indexing the facets statically. In this paper, we propose a new unsupervised approach for dynamic facet generation, namely optimistic facets, which attempts to generate the best possible subset of facets, hence maximizing expected Discounted Cumulative Gain (DCG), a measure of ranking quality that uses a graded relevance scale. We also release code to generate a new evaluation dataset. Through empirical results on two datasets, we show that the proposed DFS approach considerably improves the document ranking in the search results.</abstract>
diff --git a/data/xml/2021.isa.xml b/data/xml/2021.isa.xml
index e6a21d1aaf..0bd53f1304 100644
--- a/data/xml/2021.isa.xml
+++ b/data/xml/2021.isa.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2021-10-27" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 17th Joint ACL - ISO Workshop on Interoperable Semantic Annotation</booktitle>
-      <editor><first>Harry</first><last>Bunt</last></editor>
+      <editor id="harry-bunt"><first>Harry</first><last>Bunt</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Groningen, The Netherlands (online)</address>
       <month>June</month>
@@ -68,7 +68,7 @@
     </paper>
     <paper id="6">
       <title>Converting Multilayer Glosses into Semantic and Pragmatic forms with <fixed-case>GENLIS</fixed-case></title>
-      <author><first>Rodolfo</first><last>Delmonte</last></author>
+      <author id="rodolfo-delmonte"><first>Rodolfo</first><last>Delmonte</last></author>
       <author><first>Serena</first><last>Trolvi</last></author>
       <author><first>Francesco</first><last>Stiffoni</last></author>
       <pages>54–64</pages>
diff --git a/data/xml/2021.iwclul.xml b/data/xml/2021.iwclul.xml
index 8ccbedf31f..5090315d2a 100644
--- a/data/xml/2021.iwclul.xml
+++ b/data/xml/2021.iwclul.xml
@@ -22,7 +22,7 @@
       <title>Keyword spotting for audiovisual archival search in <fixed-case>U</fixed-case>ralic languages</title>
       <author><first>Nils</first><last>Hjortnaes</last></author>
       <author><first>Niko</first><last>Partanen</last></author>
-      <author><first>Francis M.</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis M.</first><last>Tyers</last></author>
       <pages>1–7</pages>
       <url hash="1a134584">2021.iwclul-1.1</url>
       <bibkey>hjortnaes-etal-2021-keyword</bibkey>
diff --git a/data/xml/2021.iwcs.xml b/data/xml/2021.iwcs.xml
index 6fe1f24bd0..10ac7838cf 100644
--- a/data/xml/2021.iwcs.xml
+++ b/data/xml/2021.iwcs.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2021-10-26" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 14th International Conference on Computational Semantics (IWCS)</booktitle>
-      <editor><first>Sina</first><last>Zarrieß</last></editor>
+      <editor id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></editor>
       <editor><first>Johan</first><last>Bos</last></editor>
       <editor><first>Rik</first><last>van Noord</last></editor>
       <editor><first>Lasha</first><last>Abzianidze</last></editor>
@@ -23,7 +23,7 @@
       <author><first>Guy</first><last>Marshall</last></author>
       <author><first>Mokanarangan</first><last>Thayaparan</last></author>
       <author><first>Philip</first><last>Osborne</last></author>
-      <author><first>André</first><last>Freitas</last></author>
+      <author id="andre-freitas"><first>André</first><last>Freitas</last></author>
       <pages>1–10</pages>
       <abstract>This paper explores the topic of transportability, as a sub-area of generalisability. By proposing the utilisation of metrics based on well-established statistics, we are able to estimate the change in performance of NLP models in new contexts. Defining a new measure for transportability may allow for better estimation of NLP system performance in new domains, and is crucial when assessing the performance of NLP systems in new tasks and domains. Through several instances of increasing complexity, we demonstrate how lightweight domain similarity measures can be used as estimators for the transportability in NLP applications. The proposed transportability measures are evaluated in the context of Named Entity Recognition and Natural Language Inference tasks.</abstract>
       <url hash="a46138ff">2021.iwcs-1.1</url>
@@ -52,7 +52,7 @@
     <paper id="4">
       <title>Computing All Quantifier Scopes with <fixed-case>CCG</fixed-case></title>
       <author><first>Miloš</first><last>Stanojević</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>33–37</pages>
       <abstract>We present a method for computing all quantifer scopes that can be extracted from a single CCG derivation. To do that we build on the proposal of Steedman (1999, 2011) where all existential quantifiers are treated as Skolem functions. We extend the approach by introducing a better packed representation of all possible specifications that also includes node addresses where the specifications happen. These addresses are necessary for recovering all, and only, possible readings.</abstract>
       <url hash="5350cf8e">2021.iwcs-1.4</url>
@@ -63,7 +63,7 @@
       <author><first>Zili</first><last>Zhou</last></author>
       <author><first>Marco</first><last>Valentino</last></author>
       <author><first>Donal</first><last>Landers</last></author>
-      <author><first>André</first><last>Freitas</last></author>
+      <author id="andre-freitas"><first>André</first><last>Freitas</last></author>
       <pages>38–50</pages>
       <abstract>This paper describes N-XKT (Neural encoding based on eXplanatory Knowledge Transfer), a novel method for the automatic transfer of explanatory knowledge through neural encoding mechanisms. We demonstrate that N-XKT is able to improve accuracy and generalization on science Question Answering (QA). Specifically, by leveraging facts from background explanatory knowledge corpora, the N-XKT model shows a clear improvement on zero-shot QA. Furthermore, we show that N-XKT can be fine-tuned on a target QA dataset, enabling faster convergence and more accurate results. A systematic analysis is conducted to quantitatively analyze the performance of the N-XKT model and the impact of different categories of knowledge on the zero-shot generalization task.</abstract>
       <url hash="3c0b73ba">2021.iwcs-1.5</url>
@@ -72,7 +72,7 @@
     <paper id="6">
       <title>Predicate Representations and Polysemy in <fixed-case>V</fixed-case>erb<fixed-case>N</fixed-case>et Semantic Parsing</title>
       <author><first>James</first><last>Gung</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>51–62</pages>
       <abstract>Despite recent advances in semantic role labeling propelled by pre-trained text encoders like BERT, performance lags behind when applied to predicates observed infrequently during training or to sentences in new domains. In this work, we investigate how role labeling performance on low-frequency predicates and out-of-domain data can be further improved by using VerbNet, a verb lexicon that groups verbs into hierarchical classes based on shared syntactic and semantic behavior and defines semantic representations describing relations between arguments. We find that VerbNet classes provide an effective level of abstraction, improving generalization on low-frequency predicates by allowing them to learn from the training examples of other predicates belonging to the same class. We also find that joint training of VerbNet role labeling and predicate disambiguation of VerbNet classes for polysemous verbs leads to improvements in both tasks, naturally supporting the extraction of VerbNet’s semantic representations.</abstract>
       <url hash="b184d715">2021.iwcs-1.6</url>
@@ -92,7 +92,7 @@
       <title>Do Natural Language Explanations Represent Valid Logical Arguments? Verifying Entailment in Explainable <fixed-case>NLI</fixed-case> Gold Standards</title>
       <author><first>Marco</first><last>Valentino</last></author>
       <author><first>Ian</first><last>Pratt-Hartmann</last></author>
-      <author><first>André</first><last>Freitas</last></author>
+      <author id="andre-freitas"><first>André</first><last>Freitas</last></author>
       <pages>76–86</pages>
       <abstract>An emerging line of research in Explainable NLP is the creation of datasets enriched with human-annotated explanations and rationales, used to build and evaluate models with step-wise inference and explanation generation capabilities. While human-annotated explanations are used as ground-truth for the inference, there is a lack of systematic assessment of their consistency and rigour. In an attempt to provide a critical quality assessment of Explanation Gold Standards (XGSs) for NLI, we propose a systematic annotation methodology, named Explanation Entailment Verification (EEV), to quantify the logical validity of human-annotated explanations. The application of EEV on three mainstream datasets reveals the surprising conclusion that a majority of the explanations, while appearing coherent on the surface, represent logically invalid arguments, ranging from being incomplete to containing clearly identifiable logical errors. This conclusion confirms that the inferential properties of explanations are still poorly formalised and understood, and that additional work on this line of research is necessary to improve the way Explanation Gold Standards are constructed.</abstract>
       <url hash="ecb500d5">2021.iwcs-1.8</url>
@@ -111,7 +111,7 @@
     <paper id="10">
       <title>Automatic Assignment of Semantic Frames in Disaster Response Team Communication Dialogues</title>
       <author><first>Natalia</first><last>Skachkova</last></author>
-      <author><first>Ivana</first><last>Kruijff-Korbayova</last></author>
+      <author id="ivana-kruijff-korbayova"><first>Ivana</first><last>Kruijff-Korbayova</last></author>
       <pages>93–109</pages>
       <abstract>We investigate frame semantics as a meaning representation framework for team communication in a disaster response scenario. We focus on the automatic frame assignment and retrain PAFIBERT, which is one of the state-of-the-art frame classifiers, on English and German disaster response team communication data, obtaining accuracy around 90%. We examine the performance of both models and discuss their adjustments, such as sampling of additional training instances from an unrelated domain and adding extra lexical and discourse features to input token representations. We show that sampling has some positive effect on the German frame classifier, discuss an unexpected impact of extra features on the models’ behaviour and perform a careful error analysis.</abstract>
       <url hash="3cdce450">2021.iwcs-1.10</url>
@@ -146,7 +146,7 @@
       <author><first>Rebecca</first><last>Kehlbeck</last></author>
       <author><first>Rita</first><last>Sevastjanova</last></author>
       <author><first>Oliver</first><last>Deussen</last></author>
-      <author><first>Daniel</first><last>Keim</last></author>
+      <author id="daniel-keim"><first>Daniel</first><last>Keim</last></author>
       <author><first>Miriam</first><last>Butt</last></author>
       <pages>132–143</pages>
       <abstract>Research in NLP has mainly focused on factoid questions, with the goal of finding quick and reliable ways of matching a query to an answer. However, human discourse involves more than that: it contains non-canonical questions deployed to achieve specific communicative goals. In this paper, we investigate this under-studied aspect of NLP by introducing a targeted task, creating an appropriate corpus for the task and providing baseline models of diverse nature. With this, we are also able to generate useful insights on the task and open the way for future research in this direction.</abstract>
@@ -158,7 +158,7 @@
       <title>New Domain, Major Effort? How Much Data is Necessary to Adapt a Temporal Tagger to the Voice Assistant Domain</title>
       <author><first>Touhidul</first><last>Alam</last></author>
       <author><first>Alessandra</first><last>Zarcone</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <pages>144–154</pages>
       <abstract>Reliable tagging of Temporal Expressions (TEs, e.g., Book a table at L’Osteria for Sunday evening) is a central requirement for Voice Assistants (VAs). However, there is a dearth of resources and systems for the VA domain, since publicly-available temporal taggers are trained only on substantially different domains, such as news and clinical text. Since the cost of annotating large datasets is prohibitive, we investigate the trade-off between in-domain data and performance in DA-Time, a hybrid temporal tagger for the English VA domain which combines a neural architecture for robust TE recognition, with a parser-based TE normalizer. We find that transfer learning goes a long way even with as little as 25 in-domain sentences: DA-Time performs at the state of the art on the news domain, and substantially outperforms it on the VA domain.</abstract>
       <url hash="1be7eaa6">2021.iwcs-1.14</url>
@@ -184,10 +184,10 @@
     </paper>
     <paper id="17">
       <title>Builder, we have done it: Evaluating &amp; Extending Dialogue-<fixed-case>AMR</fixed-case> <fixed-case>NLU</fixed-case> Pipeline for Two Collaborative Domains</title>
-      <author><first>Claire</first><last>Bonial</last></author>
+      <author id="claire-bonial"><first>Claire</first><last>Bonial</last></author>
       <author><first>Mitchell</first><last>Abrams</last></author>
-      <author><first>David</first><last>Traum</last></author>
-      <author><first>Clare</first><last>Voss</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
+      <author id="clare-voss"><first>Clare</first><last>Voss</last></author>
       <pages>173–183</pages>
       <abstract>We adopt, evaluate, and improve upon a two-step natural language understanding (NLU) pipeline that incrementally tames the variation of unconstrained natural language input and maps to executable robot behaviors. The pipeline first leverages Abstract Meaning Representation (AMR) parsing to capture the propositional content of the utterance, and second converts this into “Dialogue-AMR,” which augments standard AMR with information on tense, aspect, and speech acts. Several alternative approaches and training datasets are evaluated for both steps and corresponding components of the pipeline, some of which outperform the original. We extend the Dialogue-AMR annotation schema to cover a different collaborative instruction domain and evaluate on both domains. With very little training data, we achieve promising performance in the new domain, demonstrating the scalability of this approach.</abstract>
       <url hash="6d101b1f">2021.iwcs-1.17</url>
@@ -199,7 +199,7 @@
       <author><first>Gene</first><last>Kim</last></author>
       <author><first>Viet</first><last>Duong</last></author>
       <author><first>Xin</first><last>Lu</last></author>
-      <author><first>Lenhart</first><last>Schubert</last></author>
+      <author id="lenhart-schubert"><first>Lenhart</first><last>Schubert</last></author>
       <pages>184–201</pages>
       <abstract>“Episodic Logic: Unscoped Logical Form” (EL-ULF) is a semantic representation capturing predicate-argument structure as well as more challenging aspects of language within the Episodic Logic formalism. We present the first learned approach for parsing sentences into ULFs, using a growing set of annotated examples. The results provide a strong baseline for future improvement. Our method learns a sequence-to-sequence model for predicting the transition action sequence within a modified cache transition system. We evaluate the efficacy of type grammar-based constraints, a word-to-symbol lexicon, and transition system state features in this task. Our system is available at <url>https://github.com/genelkim/ulf-transition-parser</url>. We also present the first official annotated ULF dataset at <url>https://www.cs.rochester.edu/u/gkim21/ulf/resources/</url>.</abstract>
       <url hash="328a093b">2021.iwcs-1.18</url>
@@ -219,7 +219,7 @@
     <paper id="20">
       <title>Tuning Deep Active Learning for Semantic Role Labeling</title>
       <author><first>Skatje</first><last>Myers</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>212–221</pages>
       <abstract>Active learning has been shown to reduce annotation requirements for numerous natural language processing tasks, including semantic role labeling (SRL). SRL involves labeling argument spans for potentially multiple predicates in a sentence, which makes it challenging to aggregate the numerous decisions into a single score for determining new instances to annotate. In this paper, we apply two ways of aggregating scores across multiple predicates in order to choose query sentences with two methods of estimating model certainty: using the neural network’s outputs and using dropout-based Bayesian Active Learning by Disagreement. We compare these methods with three passive baselines — random sentence selection, random whole-document selection, and selecting sentences with the most predicates — and analyse the effect these strategies have on the learning curve with respect to reducing the number of annotated sentences and predicates to achieve high performance.</abstract>
       <url hash="60c6cbbe">2021.iwcs-1.20</url>
@@ -230,10 +230,10 @@
       <author><first>Kevin</first><last>Stowe</last></author>
       <author><first>Jenette</first><last>Preciado</last></author>
       <author><first>Kathryn</first><last>Conger</last></author>
-      <author><first>Susan Windisch</first><last>Brown</last></author>
+      <author id="susan-windisch-brown"><first>Susan Windisch</first><last>Brown</last></author>
       <author><first>Ghazaleh</first><last>Kazeminejad</last></author>
       <author><first>James</first><last>Gung</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>222–227</pages>
       <abstract>The SemLink resource provides mappings between a variety of lexical semantic ontologies, each with their strengths and weaknesses. To take advantage of these differences, the ability to move between resources is essential. This work describes advances made to improve the usability of the SemLink resource: the automatic addition of new instances and mappings, manual corrections, sense-based vectors and collocation information, and architecture built to automatically update the resource when versions of the underlying resources change. These updates improve coverage, provide new tools to leverage the capabilities of these resources, and facilitate seamless updates, ensuring the consistency and applicability of these mappings in the future.</abstract>
       <url hash="50e6b2f6">2021.iwcs-1.21</url>
@@ -254,7 +254,7 @@
       <author><first>Neele</first><last>Falk</last></author>
       <author><first>Yana</first><last>Strakatova</last></author>
       <author><first>Eva</first><last>Huber</last></author>
-      <author><first>Erhard</first><last>Hinrichs</last></author>
+      <author id="erhard-hinrichs"><first>Erhard</first><last>Hinrichs</last></author>
       <pages>239–249</pages>
       <abstract>Adjectives such as heavy (as in heavy rain) and windy (as in windy day) provide possible values for the attributes intensity and climate, respectively. The attributes themselves are not overtly realized and are in this sense implicit. While these attributes can be easily inferred by humans, their automatic classification poses a challenging task for computational models. We present the following contributions: (1) We gain new insights into the attribute selection task for German. More specifically, we develop computational models for this task that are able to generalize to unseen data. Moreover, we show that classification accuracy depends, inter alia, on the degree of polysemy of the lexemes involved, on the generalization potential of the training data and on the degree of semantic transparency of the adjective-noun pairs in question. (2) We provide the first resource for computational and linguistic experiments with German adjective-noun pairs that can be used for attribute selection and related tasks. In order to safeguard against unwelcome memorization effects, we present an automatic data augmentation method based on a lexical resource that can increase the size of the training data to a large extent.</abstract>
       <url hash="10b22a4f">2021.iwcs-1.23</url>
diff --git a/data/xml/2021.iwpt.xml b/data/xml/2021.iwpt.xml
index 0539534472..619ef3c768 100644
--- a/data/xml/2021.iwpt.xml
+++ b/data/xml/2021.iwpt.xml
@@ -7,8 +7,8 @@
       <editor><first>Kenji</first><last>Sagae</last></editor>
       <editor><first>Reut</first><last>Tsarfaty</last></editor>
       <editor><first>Gosse</first><last>Bouma</last></editor>
-      <editor><first>Djamé</first><last>Seddah</last></editor>
-      <editor><first>Daniel</first><last>Zeman</last></editor>
+      <editor id="djame-seddah"><first>Djamé</first><last>Seddah</last></editor>
+      <editor id="daniel-zeman"><first>Daniel</first><last>Zeman</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Online</address>
       <month>August</month>
@@ -57,7 +57,7 @@
       <title>Semi-Automatic Construction of Text-to-<fixed-case>SQL</fixed-case> Data for Domain Transfer</title>
       <author><first>Tianyi</first><last>Li</last></author>
       <author><first>Sujian</first><last>Li</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>38–49</pages>
       <abstract>Strong and affordable in-domain data is a desirable asset when transferring trained semantic parsers to novel domains. As previous methods for semi-automatically constructing such data cannot handle the complexity of realistic SQL queries, we propose to construct SQL queries via context-dependent sampling, and introduce the concept of topic. Along with our SQL query construction method, we propose a novel pipeline of semi-automatic Text-to-SQL dataset construction that covers the broad space of SQL queries. We show that the created dataset is comparable with expert annotation along multiple dimensions, and is capable of improving domain transfer performance for SOTA semantic parsers.</abstract>
       <url hash="e79eec90">2021.iwpt-1.4</url>
@@ -68,7 +68,7 @@
     <paper id="5">
       <title>Levi Graph <fixed-case>AMR</fixed-case> Parser using Heterogeneous Attention</title>
       <author><first>Han</first><last>He</last></author>
-      <author><first>Jinho D.</first><last>Choi</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
       <pages>50–57</pages>
       <abstract>Coupled with biaffine decoders, transformers have been effectively adapted to text-to-graph transduction and achieved state-of-the-art performance on AMR parsing. Many prior works, however, rely on the biaffine decoder for either or both arc and label predictions although most features used by the decoder may be learned by the transformer already. This paper presents a novel approach to AMR parsing by combining heterogeneous data (tokens, concepts, labels) as one input to a transformer to learn attention, and use only attention matrices from the transformer to predict all elements in AMR graphs (concepts, arcs, labels). Although our models use significantly fewer parameters than the previous state-of-the-art graph parser, they show similar or better accuracy on AMR 2.0 and 3.0.</abstract>
       <url hash="4c335237">2021.iwpt-1.5</url>
@@ -112,7 +112,7 @@
     </paper>
     <paper id="9">
       <title>Multilingual Dependency Parsing for Low-Resource <fixed-case>A</fixed-case>frican Languages: Case Studies on <fixed-case>B</fixed-case>ambara, <fixed-case>W</fixed-case>olof, and <fixed-case>Y</fixed-case>oruba</title>
-      <author><first>Cheikh M. Bamba</first><last>Dione</last></author>
+      <author id="cheikh-m-bamba-dione"><first>Cheikh M. Bamba</first><last>Dione</last></author>
       <pages>84–92</pages>
       <abstract>This paper describes a methodology for syntactic knowledge transfer between high-resource languages to extremely low-resource languages. The methodology consists in leveraging multilingual BERT self-attention model pretrained on large datasets to develop a multilingual multi-task model that can predict Universal Dependencies annotations for three African low-resource languages. The UD annotations include universal part-of-speech, morphological features, lemmas, and dependency trees. In our experiments, we used multilingual word embeddings and a total of 11 Universal Dependencies treebanks drawn from three high-resource languages (English, French, Norwegian) and three low-resource languages (Bambara, Wolof and Yoruba). We developed various models to test specific language combinations involving contemporary contact languages or genetically related languages. The results of the experiments show that multilingual models that involve high-resource languages and low-resource languages with contemporary contact between each other can provide better results than combinations that only include unrelated languages. As far genetic relationships are concerned, we could not draw any conclusion regarding the impact of language combinations involving the selected low-resource languages, namely Wolof and Yoruba.</abstract>
       <url hash="b63a84ca">2021.iwpt-1.9</url>
@@ -123,7 +123,7 @@
       <title>Bidirectional Domain Adaptation Using Weighted Multi-Task Learning</title>
       <author><first>Daniel</first><last>Dakota</last></author>
       <author><first>Zeeshan Ali</first><last>Sayyed</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <pages>93–105</pages>
       <abstract>Domain adaption in syntactic parsing is still a significant challenge. We address the issue of data imbalance between the in-domain and out-of-domain treebank typically used for the problem. We define domain adaptation as a Multi-task learning (MTL) problem, which allows us to train two parsers, one for each do-main. Our results show that the MTL approach is beneficial for the smaller treebank. For the larger treebank, we need to use loss weighting in order to avoid a decrease in performance be-low the single task. In order to determine towhat degree the data imbalance between two domains and the domain differences affect results, we also carry out an experiment with two imbalanced in-domain treebanks and show that loss weighting also improves performance in an in-domain setting. Given loss weighting in MTL, we can improve results for both parsers.</abstract>
       <url hash="6227bb0d">2021.iwpt-1.10</url>
@@ -134,7 +134,7 @@
     <paper id="11">
       <title>Strength in Numbers: Averaging and Clustering Effects in Mixture of Experts for Graph-Based Dependency Parsing</title>
       <author><first>Xudong</first><last>Zhang</last></author>
-      <author><first>Joseph</first><last>Le Roux</last></author>
+      <author id="joseph-le-roux"><first>Joseph</first><last>Le Roux</last></author>
       <author><first>Thierry</first><last>Charnois</last></author>
       <pages>106–118</pages>
       <abstract>We review two features of mixture of experts (MoE) models which we call averaging and clustering effects in the context of graph-based dependency parsers learned in a supervised probabilistic framework. Averaging corresponds to the ensemble combination of parsers and is responsible for variance reduction which helps stabilizing and improving parsing accuracy. Clustering describes the capacity of MoE models to give more credit to experts believed to be more accurate given an input. Although promising, this is difficult to achieve, especially without additional data. We design an experimental set-up to study the impact of these effects. Whereas averaging is always beneficial, clustering requires good initialization and stabilization techniques, but its advantages over mere averaging seem to eventually vanish when enough experts are present. As a by product, we show how this leads to state-of-the-art results on the PTB and the CoNLL09 Chinese treebank, with low variance across experiments.</abstract>
@@ -259,7 +259,7 @@
       <author><first>Alireza</first><last>Mohammadshahi</last></author>
       <author><first>Joachim</first><last>Wagner</last></author>
       <author><first>Jennifer</first><last>Foster</last></author>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <pages>204–212</pages>
       <abstract>We describe the DCU-EPFL submission to the IWPT 2021 Parsing Shared Task: From Raw Text to Enhanced Universal Dependencies. The task involves parsing Enhanced UD graphs, which are an extension of the basic dependency trees designed to be more facilitative towards representing semantic structure. Evaluation is carried out on 29 treebanks in 17 languages and participants are required to parse the data from each language starting from raw strings. Our approach uses the Stanza pipeline to preprocess the text files, XLM-RoBERTa to obtain contextualized token representations, and an edge-scoring and labeling model to predict the enhanced graph. Finally, we run a postprocessing script to ensure all of our outputs are valid Enhanced UD graphs. Our system places 6th out of 9 participants with a coarse Enhanced Labeled Attachment Score (ELAS) of 83.57. We carry out additional post-deadline experiments which include using Trankit for pre-processing, XLM-RoBERTa LARGE, treebank concatenation, and multitask learning between a basic and an enhanced dependency parser. All of these modifications improve our initial score and our final system has a coarse ELAS of 88.04.</abstract>
       <url hash="bc0dc6e3">2021.iwpt-1.22</url>
diff --git a/data/xml/2021.iwslt.xml b/data/xml/2021.iwslt.xml
index e4e7f0c7c5..e12a56e1d3 100644
--- a/data/xml/2021.iwslt.xml
+++ b/data/xml/2021.iwslt.xml
@@ -4,8 +4,8 @@
     <meta>
       <booktitle>Proceedings of the 18th International Conference on Spoken Language Translation (IWSLT 2021)</booktitle>
       <editor><first>Marcello</first><last>Federico</last></editor>
-      <editor><first>Alex</first><last>Waibel</last></editor>
-      <editor><first>Marta R.</first><last>Costa-jussà</last></editor>
+      <editor id="alex-waibel"><first>Alex</first><last>Waibel</last></editor>
+      <editor id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></editor>
       <editor><first>Jan</first><last>Niehues</last></editor>
       <editor><first>Sebastian</first><last>Stuker</last></editor>
       <editor><first>Elizabeth</first><last>Salesky</last></editor>
@@ -23,18 +23,18 @@
     <paper id="1">
       <title><fixed-case>FINDINGS</fixed-case> <fixed-case>OF</fixed-case> <fixed-case>THE</fixed-case> <fixed-case>IWSLT</fixed-case> 2021 <fixed-case>EVALUATION</fixed-case> <fixed-case>CAMPAIGN</fixed-case></title>
       <author><first>Antonios</first><last>Anastasopoulos</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <author><first>Jacob</first><last>Bremerman</last></author>
       <author><first>Roldano</first><last>Cattoni</last></author>
       <author><first>Maha</first><last>Elbayad</last></author>
       <author><first>Marcello</first><last>Federico</last></author>
       <author><first>Xutai</first><last>Ma</last></author>
       <author><first>Satoshi</first><last>Nakamura</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
       <author><first>Juan</first><last>Pino</last></author>
       <author><first>Elizabeth</first><last>Salesky</last></author>
-      <author><first>Sebastian</first><last>Stüker</last></author>
+      <author id="sebastian-stuker"><first>Sebastian</first><last>Stüker</last></author>
       <author><first>Katsuhito</first><last>Sudoh</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <author><first>Alexander</first><last>Waibel</last></author>
@@ -92,7 +92,7 @@
       <title>Without Further Ado: Direct and Simultaneous Speech Translation by <fixed-case>A</fixed-case>pp<fixed-case>T</fixed-case>ek in 2021</title>
       <author><first>Parnia</first><last>Bahar</last></author>
       <author><first>Patrick</first><last>Wilken</last></author>
-      <author><first>Mattia A.</first><last>Di Gangi</last></author>
+      <author id="mattia-a-di-gangi"><first>Mattia A.</first><last>Di Gangi</last></author>
       <author><first>Evgeny</first><last>Matusov</last></author>
       <pages>52–63</pages>
       <abstract>This paper describes the offline and simultaneous speech translation systems developed at AppTek for IWSLT 2021. Our offline ST submission includes the direct end-to-end system and the so-called posterior tight integrated model, which is akin to the cascade system but is trained in an end-to-end fashion, where all the cascaded modules are end-to-end models themselves. For simultaneous ST, we combine hybrid automatic speech recognition with a machine translation approach whose translation policy decisions are learned from statistical word alignments. Compared to last year, we improve general quality and provide a wider range of quality/latency trade-offs, both due to a data augmentation method making the MT model robust to varying chunk sizes. Finally, we present a method for ASR output segmentation into sentences that introduces a minimal additional delay.</abstract>
@@ -133,7 +133,7 @@
       <title>Dealing with training and test segmentation mismatch: <fixed-case>FBK</fixed-case>@<fixed-case>IWSLT</fixed-case>2021</title>
       <author><first>Sara</first><last>Papi</last></author>
       <author><first>Marco</first><last>Gaido</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <pages>84–91</pages>
       <abstract>This paper describes FBK’s system submission to the IWSLT 2021 Offline Speech Translation task. We participated with a direct model, which is a Transformer-based architecture trained to translate English speech audio data into German texts. The training pipeline is characterized by knowledge distillation and a two-step fine-tuning procedure. Both knowledge distillation and the first fine-tuning step are carried out on manually segmented real and synthetic data, the latter being generated with an MT system trained on the available corpora. Differently, the second fine-tuning step is carried out on a random segmentation of the MuST-C v2 En-De dataset. Its main goal is to reduce the performance drops occurring when a speech translation model trained on manually segmented data (i.e. an ideal, sentence-like segmentation) is evaluated on automatically segmented audio (i.e. actual, more realistic testing conditions). For the same purpose, a custom hybrid segmentation procedure that accounts for both audio content (pauses) and for the length of the produced segments is applied to the test data before passing them to the system. At inference time, we compared this procedure with a baseline segmentation method based on Voice Activity Detection (VAD). Our results indicate the effectiveness of the proposed hybrid approach, shown by a reduction of the gap with manual segmentation from 8.3 to 1.4 BLEU points.</abstract>
@@ -176,7 +176,7 @@
       <author><first>Gerard I.</first><last>Gállego</last></author>
       <author><first>Ioannis</first><last>Tsiamas</last></author>
       <author><first>Carlos</first><last>Escolano</last></author>
-      <author><first>José A. R.</first><last>Fonollosa</last></author>
+      <author id="jose-a-r-fonollosa"><first>José A. R.</first><last>Fonollosa</last></author>
       <author><first>Marta R.</first><last>Costa-jussà</last></author>
       <pages>110–119</pages>
       <abstract>This paper describes the submission to the IWSLT 2021 offline speech translation task by the UPC Machine Translation group. The task consists of building a system capable of translating English audio recordings extracted from TED talks into German text. Submitted systems can be either cascade or end-to-end and use a custom or given segmentation. Our submission is an end-to-end speech translation system, which combines pre-trained models (Wav2Vec 2.0 and mBART) with coupling modules between the encoder and decoder, and uses an efficient fine-tuning technique, which trains only 20% of its total parameters. We show that adding an Adapter to the system and pre-training it, can increase the convergence speed and the final result, with which we achieve a BLEU score of 27.3 on the MuST-C test set. Our final model is an ensemble that obtains 28.22 BLEU score on the same set. Our submission also uses a custom segmentation algorithm that employs pre-trained Wav2Vec 2.0 for identifying periods of untranscribable text and can bring improvements of 2.5 to 3 BLEU score on the IWSLT 2019 test set, as compared to the result with the given segmentation.</abstract>
@@ -203,10 +203,10 @@
       <author><first>Tuan Nam</first><last>Nguyen</last></author>
       <author><first>Thai Son</first><last>Nguyen</last></author>
       <author><first>Christian</first><last>Huber</last></author>
-      <author><first>Ngoc-Quan</first><last>Pham</last></author>
+      <author id="ngoc-quan-pham"><first>Ngoc-Quan</first><last>Pham</last></author>
       <author><first>Thanh-Le</first><last>Ha</last></author>
       <author><first>Felix</first><last>Schneider</last></author>
-      <author><first>Sebastian</first><last>Stüker</last></author>
+      <author id="sebastian-stuker"><first>Sebastian</first><last>Stüker</last></author>
       <pages>125–130</pages>
       <abstract>This paper describes KIT’submission to the IWSLT 2021 Offline Speech Translation Task. We describe a system in both cascaded condition and end-to-end condition. In the cascaded condition, we investigated different end-to-end architectures for the speech recognition module. For the text segmentation module, we trained a small transformer-based model on high-quality monolingual data. For the translation module, our last year’s neural machine translation model was reused. In the end-to-end condition, we improved our Speech Relative Transformer architecture to reach or even surpass the result of the cascade system.</abstract>
       <url hash="dac417e1">2021.iwslt-1.13</url>
@@ -260,10 +260,10 @@
     </paper>
     <paper id="18">
       <title>Multilingual Speech Translation <fixed-case>KIT</fixed-case> @ <fixed-case>IWSLT</fixed-case>2021</title>
-      <author><first>Ngoc-Quan</first><last>Pham</last></author>
+      <author id="ngoc-quan-pham"><first>Ngoc-Quan</first><last>Pham</last></author>
       <author><first>Tuan Nam</first><last>Nguyen</last></author>
       <author><first>Thanh-Le</first><last>Ha</last></author>
-      <author><first>Sebastian</first><last>Stüker</last></author>
+      <author id="sebastian-stuker"><first>Sebastian</first><last>Stüker</last></author>
       <author><first>Alexander</first><last>Waibel</last></author>
       <author><first>Dan</first><last>He</last></author>
       <pages>154–159</pages>
@@ -364,7 +364,7 @@
       <title>Between Flexibility and Consistency: Joint Generation of Captions and Subtitles</title>
       <author><first>Alina</first><last>Karakanta</last></author>
       <author><first>Marco</first><last>Gaido</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <pages>215–225</pages>
       <abstract>Speech translation (ST) has lately received growing interest for the generation of subtitles without the need for an intermediate source language transcription and timing (i.e. captions). However, the joint generation of source captions and target subtitles does not only bring potential output quality advantages when the two decoding processes inform each other, but it is also often required in multilingual scenarios. In this work, we focus on ST models which generate consistent captions-subtitles in terms of structure and lexical content. We further introduce new metrics for evaluating subtitling consistency. Our findings show that joint decoding leads to increased performance and consistency between the generated captions and subtitles while still allowing for sufficient flexibility to produce subtitles conforming to language-specific needs and norms.</abstract>
@@ -433,7 +433,7 @@
       <author><first>Pavel</first><last>Petrushkov</last></author>
       <author><first>Tomer</first><last>Lancewicki</last></author>
       <author><first>Shahram</first><last>Khadivi</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>276–286</pages>
       <abstract>Complex natural language applications such as speech translation or pivot translation traditionally rely on cascaded models. However,cascaded models are known to be prone to error propagation and model discrepancy problems. Furthermore, there is no possibility of using end-to-end training data in conventional cascaded systems, meaning that the training data most suited for the task cannot be used. Previous studies suggested several approaches for integrated end-to-end training to overcome those problems, however they mostly rely on(synthetic or natural) three-way data. We propose a cascaded model based on the non-autoregressive Transformer that enables end-to-end training without the need for an explicit intermediate representation. This new architecture (i) avoids unnecessary early decisions that can cause errors which are then propagated throughout the cascaded models and (ii) utilizes the end-to-end training data directly. We conduct an evaluation on two pivot-based machine translation tasks, namely French→German and German→Czech. Our experimental results show that the proposed architecture yields an improvement of more than 2 BLEU for French→German over the cascaded baseline.</abstract>
       <url hash="622fd78e">2021.iwslt-1.32</url>
@@ -442,7 +442,7 @@
     </paper>
     <paper id="33">
       <title>Data Augmentation by Concatenation for Low-Resource Translation: A Mystery and a Solution</title>
-      <author><first>Toan Q.</first><last>Nguyen</last></author>
+      <author id="toan-q-nguyen"><first>Toan Q.</first><last>Nguyen</last></author>
       <author><first>Kenton</first><last>Murray</last></author>
       <author><first>David</first><last>Chiang</last></author>
       <pages>287–293</pages>
diff --git a/data/xml/2021.jeptalnrecital.xml b/data/xml/2021.jeptalnrecital.xml
index a0bf2e4b08..30504960d5 100644
--- a/data/xml/2021.jeptalnrecital.xml
+++ b/data/xml/2021.jeptalnrecital.xml
@@ -76,7 +76,7 @@
       <title>Plongements Interprétables pour la Détection de Biais Cachés (Interpretable Embeddings for Hidden Biases Detection)</title>
       <author><first>Tom</first><last>Bourgeade</last></author>
       <author><first>Philippe</first><last>Muller</last></author>
-      <author><first>Tim</first><last>Van de Cruys</last></author>
+      <author id="tim-van-de-cruys"><first>Tim</first><last>Van de Cruys</last></author>
       <pages>64–80</pages>
       <abstract>De nombreuses tâches sémantiques en TAL font usage de données collectées de manière semiautomatique, ce qui est souvent source d’artefacts indésirables qui peuvent affecter négativement les modèles entraînés sur celles-ci. Avec l’évolution plus récente vers des modèles à usage générique pré-entraînés plus complexes, et moins interprétables, ces biais peuvent conduire à l’intégration de corrélations indésirables dans des applications utilisateurs. Récemment, quelques méthodes ont été proposées pour entraîner des plongements de mots avec une meilleure interprétabilité. Nous proposons une méthode simple qui exploite ces représentations pour détecter de manière préventive des corrélations lexicales faciles à apprendre, dans divers jeux de données. Nous évaluons à cette fin quelques modèles de plongements interprétables populaires pour l’anglais, en utilisant à la fois une évaluation intrinsèque, et un ensemble de tâches sémantiques en aval, et nous utilisons la qualité interprétable des plongements afin de diagnostiquer des biais potentiels dans les jeux de données associés.</abstract>
       <url hash="7d2dd48f">2021.jeptalnrecital-taln.6</url>
@@ -106,7 +106,7 @@
     <paper id="9">
       <title>Analyse en dépendances du français avec des plongements contextualisés (<fixed-case>F</fixed-case>rench dependency parsing with contextualized embeddings)</title>
       <author><first>Loïc</first><last>Grobol</last></author>
-      <author><first>Benoit</first><last>Crabbé</last></author>
+      <author id="benoit-crabbe"><first>Benoit</first><last>Crabbé</last></author>
       <pages>106–114</pages>
       <abstract>Cet article présente un analyseur syntaxique en dépendances pour le français qui se compare favorablement à l’état de l’art sur la plupart des corpus de référence. L’analyseur s’appuie sur de riches représentations lexicales issues notamment de BERT et de FASTTEXT. On remarque que les représentations lexicales produites par FLAUBERT ont un caractère auto-suffisant pour réaliser la tâche d’analyse syntaxique de manière optimale.</abstract>
       <url hash="0c432efe">2021.jeptalnrecital-taln.9</url>
@@ -116,7 +116,7 @@
     <paper id="10">
       <title>Caractérisation des relations sémantiques entre termes multi-mots fondée sur l’analogie (Semantic relations recognition between multi-word terms by means of analogy )</title>
       <author><first>Yizhe</first><last>Wang</last></author>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <author><first>Nabil</first><last>Hathout</last></author>
       <pages>115–124</pages>
       <abstract>La terminologie d’un domaine rend compte de la structure du domaine grâce aux relations entre ses termes. Dans cet article, nous nous intéressons à la caractérisation des relations terminologiques qui existent entre termes multi-mots (MWT) dans les espaces vectoriels distributionnels. Nous avons constitué un jeu de données composé de MWT en français du domaine de l’environnement, reliés par des relations sémantiques lexicales. Nous présentons une expérience dans laquelle ces relations sémantiques entre MWT sont caractérisées au moyen de l’analogie. Les résultats obtenus permettent d’envisager un processus automatique pour aider à la structuration des terminologies.</abstract>
@@ -136,11 +136,11 @@
     </paper>
     <paper id="12">
       <title>Contribution d’informations syntaxiques aux capacités de généralisation compositionelle des modèles seq2seq convolutifs (Assessing the Contribution of Syntactic Information for Compositional Generalization of seq2seq Convolutional Networks)</title>
-      <author><first>Diana Nicoleta</first><last>Popa</last></author>
+      <author id="diana-nicoleta-popa"><first>Diana Nicoleta</first><last>Popa</last></author>
       <author><first>William N.</first><last>Havard</last></author>
       <author><first>Maximin</first><last>Coavoux</last></author>
-      <author><first>Eric</first><last>Gaussier</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="eric-gaussier"><first>Eric</first><last>Gaussier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <pages>134–141</pages>
       <abstract>Les modèles neuronaux de type seq2seq manifestent d’étonnantes capacités de prédiction quand ils sont entraînés sur des données de taille suffisante. Cependant, ils échouent à généraliser de manière satisfaisante quand la tâche implique d’apprendre et de réutiliser des règles systématiques de composition et non d’apprendre simplement par imitation des exemples d’entraînement. Le jeu de données SCAN, constitué d’un ensemble de commandes en langage naturel associées à des séquences d’action, a été spécifiquement conçu pour évaluer les capacités des réseaux de neurones à apprendre ce type de généralisation compositionnelle. Dans cet article, nous nous proposons d’étudier la contribution d’informations syntaxiques sur les capacités de généralisation compositionnelle des réseaux de neurones seq2seq convolutifs.</abstract>
       <url hash="dd6ab45d">2021.jeptalnrecital-taln.12</url>
@@ -153,7 +153,7 @@
       <author><first>Vojtech</first><last>Hudecek</last></author>
       <author><first>Daniel</first><last>Stancl</last></author>
       <author><first>Ondrej</first><last>Dusek</last></author>
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <pages>142–152</pages>
       <abstract>Définition et détection des incohérences du système dans les dialogues orientés tâche. Nous présentons des expériences sur la détection automatique des comportements incohérents des systèmes de dialogues orientés tâche à partir du contexte. Nous enrichissons les données bAbI/DSTC2 (Bordes et al., 2017) avec une annotation automatique des incohérences de dialogue, et nous démontrons que les incohérences sont en corrélation avec les dialogues ratés. Nous supposons que l’utilisation d’un historique de dialogue limité et la prédiction du prochain tour de l’utilisateur peuvent améliorer la classification des incohérences. Si les deux hypothèses sont confirmées pour un modèle de dialogue basé sur les réseaux de mémoire, elles ne le sont pas pour un entraînement basé sur le modèle de langage GPT-2, qui bénéficie le plus de l’utilisation de l’historique complet du dialogue et obtient un score de précision de 0,99.</abstract>
       <url hash="73c25e25">2021.jeptalnrecital-taln.13</url>
@@ -164,7 +164,7 @@
       <title>Évaluation de méthodes et d’outils pour la lemmatisation automatique du français médiéval (Evaluation of methods and tools for automatic lemmatization in <fixed-case>O</fixed-case>ld <fixed-case>F</fixed-case>rench)</title>
       <author><first>Cristina</first><last>Holgado</last></author>
       <author><first>Alexei</first><last>Lavrentiev</last></author>
-      <author><first>Mathieu</first><last>Constant</last></author>
+      <author id="matthieu-constant"><first>Mathieu</first><last>Constant</last></author>
       <pages>153–161</pages>
       <abstract>Pour les langues historiques non stabilisées comme le français médiéval, la lemmatisation automatique présente toujours des défis, car cette langue connaît une forte variation graphique. Dans cet article, nous dressons un état des lieux de la lemmatisation automatique pour cette langue en comparant les performances de quatre lemmatiseurs existants sur un même jeu de données. L’objectif est d’évaluer où se situent les nouvelles techniques de l’apprentissage automatique par rapport aux techniques plus traditionnelles s’appuyant sur des systèmes de règles et lexiques, en particulier pour la prédiction des mots inconnus.</abstract>
       <url hash="d880f6bf">2021.jeptalnrecital-taln.14</url>
@@ -187,7 +187,7 @@
     <paper id="16">
       <title>Formalisation de la relation entre les verbes imperfectifs et perfectifs en ukrainien (In the <fixed-case>S</fixed-case>lavic linguistic tradition, perfective and imperfective forms of verbs are traditionally entered independently in dictionaries)</title>
       <author><first>Olena</first><last>Saint-Joanis</last></author>
-      <author><first>Max</first><last>Silberztein</last></author>
+      <author id="max-silberztein"><first>Max</first><last>Silberztein</last></author>
       <pages>171–178</pages>
       <abstract>Dans la tradition linguistique slave, les formes perfectives et imperfectives des verbes sont traditionnellement inscrites séparément dans les dictionnaires. Cependant, il existe de forts liens morphologiques et sémantiques entre les deux formes verbales. Nous présentons une formalisation qui nous a permis de lier les deux formes. Nous avons construit un dictionnaire électronique qui contient plus de 13 000 entrées verbales associées à plus de 300 paradigmes morphologiques, qui peut être utilisé pour automatiquement lemmatiser les formes verbales dans les textes ukrainiens et relier les formes perfectives et imperfectives.</abstract>
       <url hash="d8be9cc2">2021.jeptalnrecital-taln.16</url>
@@ -196,7 +196,7 @@
     </paper>
     <paper id="17">
       <title>Intérêt des modèles de caractères pour la détection d’événements (The interest of character-level models for event detection)</title>
-      <author><first>Emanuela</first><last>Boros</last></author>
+      <author id="emanuela-boros"><first>Emanuela</first><last>Boros</last></author>
       <author><first>Romaric</first><last>Besançon</last></author>
       <author><first>Olivier</first><last>Ferret</last></author>
       <author><first>Brigitte</first><last>Grau</last></author>
@@ -254,8 +254,8 @@
       <title>Stratégie Multitâche pour la Classification Multiclasse (A Multitask Strategy for Multiclass Classification)</title>
       <author><first>Houssam</first><last>Akhmouch</last></author>
       <author><first>Hamza</first><last>Bouanani</last></author>
-      <author><first>Gaël</first><last>Dias</last></author>
-      <author><first>Jose G.</first><last>Moreno</last></author>
+      <author id="gael-dias"><first>Gaël</first><last>Dias</last></author>
+      <author id="jose-g-moreno"><first>Jose G.</first><last>Moreno</last></author>
       <pages>227–236</pages>
       <abstract>Nous proposons une idée originale pour exploiter les relations entre les classes dans les problèmes multiclasses. Nous définissons deux architectures multitâches de type one-vs-rest qui combinent des ensembles de classifieurs appris dans une configuration multitâche en utilisant des réseaux de neurones. Les expériences menées sur six jeux de données pour la classification des sentiments, des émotions, des thématiques et des relations lexico-sémantiques montrent que nos architectures améliorent constamment les performances par rapport aux stratégies de l’état de l’art de type one-vsrest et concurrencent fortement les autres stratégies multiclasses.</abstract>
       <url hash="033f7c03">2021.jeptalnrecital-taln.22</url>
@@ -301,7 +301,7 @@ _
 
 _ (<fixed-case>F</fixed-case>rench) We introduce a <fixed-case>F</fixed-case>rench adaptation from the well-known <fixed-case>GPT</fixed-case> model)</title>
       <author><first>Antoine</first><last>Simoulin</last></author>
-      <author><first>Benoit</first><last>Crabbé</last></author>
+      <author id="benoit-crabbe"><first>Benoit</first><last>Crabbé</last></author>
       <pages>246–255</pages>
       <abstract>Nous proposons une adaptation en français du fameux modèle Generative Pre-trained Transformer (GPT). Ce dernier appartient à la catégorie des architectures transformers qui ont significativement transformé les méthodes de traitement automatique du langage. Ces architectures sont en particulier pré-entraînées sur des tâches auto-supervisées et sont ainsi spécifiques pour une langue donnée. Si certaines sont disponibles en français, la plupart se déclinent avant tout en anglais. GPT est particulièrement efficace pour les tâches de génération de texte. Par ailleurs, il est possible de l’appliquer à de nombreux cas d’usages. Ses propriétés génératives singulières permettent de l’utiliser dans des conditions originales comme l’apprentissage sans exemple qui ne suppose aucune mise à jour des poids du modèle, ou modification de l’architecture.</abstract>
       <url hash="d9db3eff">2021.jeptalnrecital-taln.24</url>
@@ -311,7 +311,7 @@ _ (<fixed-case>F</fixed-case>rench) We introduce a <fixed-case>F</fixed-case>ren
     <paper id="25">
       <title>Une étude des avis en ligne : généralisabilité d’un modèle d’évaluation (A Study of Online Reviews : Generalizability of the Evaluation Model)</title>
       <author><first>Hyun</first><last>Jung Kang</last></author>
-      <author><first>Iris</first><last>Eshkol-Taravella</last></author>
+      <author id="iris-eshkol"><first>Iris</first><last>Eshkol-Taravella</last></author>
       <pages>256–263</pages>
       <abstract>Ce travail se situe dans la continuité de nos travaux antérieurs proposant le modèle d’évaluation portant sur des avis en ligne sur des restaurants. Le modèle est composé de quatre catégories : l’opinion (positive, négative, mixte), la suggestion, l’intention et la description. Cet article vise à tester la généralisabilité du modèle en l’appliquant sur deux corpus supplémentaires : un corpus relevant d’un autre domaine (celui de l’hôtellerie) et un corpus écrit dans une autre langue (le coréen). Nous avons présenté l’annotation manuelle et la détection automatique de ces catégories en nous appuyant sur différents modèles de l’apprentissage de surface (SVM) et l’apprentissage profond (LSTM).</abstract>
       <url hash="7ba633cd">2021.jeptalnrecital-taln.25</url>
@@ -526,7 +526,7 @@ _ (<fixed-case>F</fixed-case>rench) We introduce a <fixed-case>F</fixed-case>ren
       <title>Corpus <fixed-case>EN</fixed-case>-Istex : un corpus d’articles scientifiques annoté manuellement en entités nommées (<fixed-case>ISTEX</fixed-case>-<fixed-case>EN</fixed-case> Corpus: a scientific paper corpus manually annotated in named entities)</title>
       <author><first>Enza</first><last>Morale</last></author>
       <author><first>Denis</first><last>Maurel</last></author>
-      <author><first>Jeanne</first><last>Villaneau</last></author>
+      <author id="jeanne-villaneau"><first>Jeanne</first><last>Villaneau</last></author>
       <author><first>Jean-Yves</first><last>Antoine</last></author>
       <pages>6–7</pages>
       <abstract>Nous présentons ici une nouvelle ressource libre : le corpus EN-ISTEX, un corpus de deux cents articles scientifiques annotés manuellement en entités nommées. Ces articles ont été extraits des deux éditeurs scientifiques les plus importants de la plateforme ISTEX. Tous les domaines sont concernés, même si les sciences dites dures, en particulier les sciences du vivant et de la santé, sont prépondérantes. Parmi ceux-ci vingt articles ont été multi-annotés afin de vérifier l’adéquation du guide d’annotation et la fiabilité de l’annotation. L’accord inter annotateurs sur ces vingt textes s’élève à 91 %.</abstract>
@@ -599,7 +599,7 @@ _ (<fixed-case>F</fixed-case>rench) We introduce a <fixed-case>F</fixed-case>ren
       <title>Classification multi-label de cas cliniques avec <fixed-case>C</fixed-case>amem<fixed-case>BERT</fixed-case> (Multi-label classification of clinical cases with <fixed-case>C</fixed-case>amem<fixed-case>BERT</fixed-case> )</title>
       <author><first>Alexandre</first><last>Bailly</last></author>
       <author><first>Corentin</first><last>Blanc</last></author>
-      <author><first>Thierry</first><last>Guillotin</last></author>
+      <author id="thierry-guillotin"><first>Thierry</first><last>Guillotin</last></author>
       <pages>14–20</pages>
       <abstract>La quantité de documents textuels médicaux allant grandissant, la nécessité d’en extraire automatiquement des informations concernant des patients devient de plus en plus grande. La prédiction du profil clinique permet de gagner du temps pour le praticien tout en extrayant l’essentiel de l’information concernant un patient. Avec l’explosion du nombre de documents (médicaux ou non), des modèles pré-entraînés tels que BERT pour l’anglais ou CamemBERT pour le français ont émergé. L’utilisation de ces modèles permet d’encoder contextuellement du texte afin de l’utiliser dans des réseaux neuronaux pour notamment prédire des profils cliniques. Cet article vise à comparer différentes méthodes de prédiction de profil clinique en se basant sur l’utilisation de CamemBERT. Dans un premier temps, uniquement du texte provenant de documents médicaux a été utilisé. Dans un second temps, des entités nommées ont été injectées en plus du texte par concaténation ou par sommation pondérée. Les résultats ont montré un succès limité et dépendant de la prévalence des chapitres à prédire dans le corpus ainsi qu’une dégradation des performances lors de l’ajout des entités nommées.</abstract>
       <url hash="5388dc84">2021.jeptalnrecital-deft.2</url>
diff --git a/data/xml/2021.konvens.xml b/data/xml/2021.konvens.xml
index 9f5f3e9380..d9cfe7e36c 100644
--- a/data/xml/2021.konvens.xml
+++ b/data/xml/2021.konvens.xml
@@ -6,7 +6,7 @@
       <editor><first>Kilian</first><last>Evang</last></editor>
       <editor><first>Laura</first><last>Kallmeyer</last></editor>
       <editor><first>Rainer</first><last>Osswald</last></editor>
-      <editor><first>Jakub</first><last>Waszczuk</last></editor>
+      <editor id="jakub-waszczuk"><first>Jakub</first><last>Waszczuk</last></editor>
       <editor><first>Torsten</first><last>Zesch</last></editor>
       <publisher>KONVENS 2021 Organizers</publisher>
       <address>Düsseldorf, Germany</address>
@@ -51,7 +51,7 @@
       <author><first>Sana</first><last>Moin</last></author>
       <author><first>Anirban</first><last>Bhowmick</last></author>
       <author><first>Seid Muhie</first><last>Yimam</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>37–48</pages>
       <url hash="b5f5fc74">2021.konvens-1.4</url>
       <bibkey>von-boguszewski-etal-2021-hateful</bibkey>
@@ -103,7 +103,7 @@
       <title>Extraction and Normalization of Vague Time Expressions in <fixed-case>G</fixed-case>erman</title>
       <author><first>Ulrike</first><last>May</last></author>
       <author><first>Karolina</first><last>Zaczynska</last></author>
-      <author><first>Julián</first><last>Moreno-Schneider</last></author>
+      <author id="julian-moreno-schneider"><first>Julián</first><last>Moreno-Schneider</last></author>
       <author><first>Georg</first><last>Rehm</last></author>
       <pages>114–126</pages>
       <url hash="5cb3dbfc">2021.konvens-1.10</url>
@@ -154,7 +154,7 @@
       <title>Neural End-to-end Coreference Resolution for <fixed-case>G</fixed-case>erman in Different Domains</title>
       <author><first>Fynn</first><last>Schröder</last></author>
       <author><first>Hans Ole</first><last>Hatzel</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>170–181</pages>
       <url hash="04b3285d">2021.konvens-1.15</url>
       <bibkey>schroder-etal-2021-neural</bibkey>
@@ -193,7 +193,7 @@
     </paper>
     <paper id="20">
       <title><fixed-case>D</fixed-case>e<fixed-case>I</fixed-case>n<fixed-case>S</fixed-case>tance: Creating and Evaluating a <fixed-case>G</fixed-case>erman Corpus for Fine-Grained Inferred Stance Detection</title>
-      <author><first>Anne</first><last>Göhring</last></author>
+      <author id="anne-gohring"><first>Anne</first><last>Göhring</last></author>
       <author><first>Manfred</first><last>Klenner</last></author>
       <author><first>Sophia</first><last>Conrad</last></author>
       <pages>213–217</pages>
@@ -231,9 +231,9 @@
     <paper id="24">
       <title><fixed-case>W</fixed-case>ord<fixed-case>G</fixed-case>uess: Using Associations for Guessing, Learning and Exploring Related Words</title>
       <author><first>Cennet</first><last>Oguz</last></author>
-      <author><first>André</first><last>Blessing</last></author>
+      <author id="andre-blessing"><first>André</first><last>Blessing</last></author>
       <author><first>Jonas</first><last>Kuhn</last></author>
-      <author><first>Sabine</first><last>Schulte Im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte Im Walde</last></author>
       <pages>235–241</pages>
       <url hash="aa63f3eb">2021.konvens-1.24</url>
       <bibkey>oguz-etal-2021-wordguess</bibkey>
@@ -242,7 +242,7 @@
       <title>Towards a balanced annotated <fixed-case>L</fixed-case>ow <fixed-case>S</fixed-case>axon dataset for diachronic investigation of dialectal variation</title>
       <author><first>Janine</first><last>Siewert</last></author>
       <author><first>Yves</first><last>Scherrer</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>242–246</pages>
       <url hash="2fbca907">2021.konvens-1.25</url>
       <bibkey>siewert-etal-2021-towards</bibkey>
diff --git a/data/xml/2021.lantern.xml b/data/xml/2021.lantern.xml
index 2459a75442..c5d860c956 100644
--- a/data/xml/2021.lantern.xml
+++ b/data/xml/2021.lantern.xml
@@ -8,7 +8,7 @@
       <editor><first>Sandro</first><last>Pezzelle</last></editor>
       <editor><first>Aditya</first><last>Mogadala</last></editor>
       <editor><first>Dietrich</first><last>Klakow</last></editor>
-      <editor><first>Marie-Francine</first><last>Moens</last></editor>
+      <editor id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></editor>
       <editor><first>Zeynep</first><last>Akata</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Kyiv, Ukraine</address>
@@ -65,7 +65,7 @@
     <paper id="5">
       <title>What Did This Castle Look like before? Exploring Referential Relations in Naturally Occurring Multimodal Texts</title>
       <author><first>Ronja</first><last>Utescher</last></author>
-      <author><first>Sina</first><last>Zarrieß</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
       <pages>53–60</pages>
       <abstract>Multi-modal texts are abundant and diverse in structure, yet Language &amp; Vision research of these naturally occurring texts has mostly focused on genres that are comparatively light on text, like tweets. In this paper, we discuss the challenges and potential benefits of a L&amp;V framework that explicitly models referential relations, taking Wikipedia articles about buildings as an example. We briefly survey existing related tasks in L&amp;V and propose multi-modal information extraction as a general direction for future research.</abstract>
       <url hash="1666b58c">2021.lantern-1.5</url>
diff --git a/data/xml/2021.latechclfl.xml b/data/xml/2021.latechclfl.xml
index 5aafbcec46..b007a33faa 100644
--- a/data/xml/2021.latechclfl.xml
+++ b/data/xml/2021.latechclfl.xml
@@ -6,7 +6,7 @@
       <editor><first>Stefania</first><last>Degaetano-Ortlieb</last></editor>
       <editor><first>Anna</first><last>Kazantseva</last></editor>
       <editor><first>Nils</first><last>Reiter</last></editor>
-      <editor><first>Stan</first><last>Szpakowicz</last></editor>
+      <editor id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Punta Cana, Dominican Republic (online)</address>
       <month>November</month>
@@ -53,7 +53,7 @@
     </paper>
     <paper id="4">
       <title>Quantifying Contextual Aspects of Inter-annotator Agreement in Intertextuality Research</title>
-      <author><first>Enrique</first><last>Manjavacas Arevalo</last></author>
+      <author id="enrique-manjavacas"><first>Enrique</first><last>Manjavacas Arevalo</last></author>
       <author><first>Laurence</first><last>Mellerin</last></author>
       <author><first>Mike</first><last>Kestemont</last></author>
       <pages>31–42</pages>
@@ -132,7 +132,7 @@
     <paper id="10">
       <title>Unsupervised Adverbial Identification in <fixed-case>M</fixed-case>odern <fixed-case>C</fixed-case>hinese Literature</title>
       <author><first>Wenxiu</first><last>Xie</last></author>
-      <author><first>John</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
       <author><first>Fangqiong</first><last>Zhan</last></author>
       <author><first>Xiao</first><last>Han</last></author>
       <author><first>Chi-Yin</first><last>Chow</last></author>
@@ -159,7 +159,7 @@
       <title>Translationese in <fixed-case>R</fixed-case>ussian Literary Texts</title>
       <author><first>Maria</first><last>Kunilovskaya</last></author>
       <author><first>Ekaterina</first><last>Lapshinova-Koltunski</last></author>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <pages>101–112</pages>
       <abstract>The paper reports the results of a translationese study of literary texts based on translated and non-translated Russian. We aim to find out if translations deviate from non-translated literary texts, and if the established differences can be attributed to typological relations between source and target languages. We expect that literary translations from typologically distant languages should exhibit more translationese, and the fingerprints of individual source languages (and their families) are traceable in translations. We explore linguistic properties that distinguish non-translated Russian literature from translations into Russian. Our results show that non-translated fiction is different from translations to the degree that these two language varieties can be automatically classified. As expected, language typology is reflected in translations of literary texts. We identified features that point to linguistic specificity of Russian non-translated literature and to shining-through effects. Some of translationese features cut across all language pairs, while others are characteristic of literary translations from languages belonging to specific language families.</abstract>
       <url hash="b3e4c2f2">2021.latechclfl-1.12</url>
@@ -205,7 +205,7 @@
     <paper id="16">
       <title>Zero-Shot Information Extraction to Enhance a Knowledge Graph Describing Silk Textiles</title>
       <author><first>Thomas</first><last>Schleider</last></author>
-      <author><first>Raphael</first><last>Troncy</last></author>
+      <author id="raphael-troncy"><first>Raphael</first><last>Troncy</last></author>
       <pages>138–146</pages>
       <abstract>The knowledge of the European silk textile production is a typical case for which the information collected is heterogeneous, spread across many museums and sparse since rarely complete. Knowledge Graphs for this cultural heritage domain, when being developed with appropriate ontologies and vocabularies, enable to integrate and reconcile this diverse information. However, many of these original museum records still have some metadata gaps. In this paper, we present a zero-shot learning approach that leverages the ConceptNet common sense knowledge graph to predict categorical metadata informing about the silk objects production. We compared the performance of our approach with traditional supervised deep learning-based methods that do require training data. We demonstrate promising and competitive performance for similar datasets and circumstances and the ability to predict sometimes more fine-grained information. Our results can be reproduced using the code and datasets published at <url>https://github.com/silknow/ZSL-KG-silk</url>.</abstract>
       <url hash="603bc3b6">2021.latechclfl-1.16</url>
@@ -239,7 +239,7 @@
     <paper id="19">
       <title>Period Classification in <fixed-case>C</fixed-case>hinese Historical Texts</title>
       <author><first>Zuoyu</first><last>Tian</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <pages>168–177</pages>
       <abstract>In this study, we study language change in Chinese Biji by using a classification task: classifying Ancient Chinese texts by time periods. Specifically, we focus on a unique genre in classical Chinese literature: Biji (literally “notebook” or “brush notes”), i.e., collections of anecdotes, quotations, etc., anything authors consider noteworthy, Biji span hundreds of years across many dynasties and conserve informal language in written form. For these reasons, they are regarded as a good resource for investigating language change in Chinese (Fang, 2010). In this paper, we create a new dataset of 108 Biji across four dynasties. Based on the dataset, we first introduce a time period classification task for Chinese. Then we investigate different feature representation methods for classification. The results show that models using contextualized embeddings perform best. An analysis of the top features chosen by the word n-gram model (after bleaching proper nouns) confirms that these features are informative and correspond to observations and assumptions made by historical linguists.</abstract>
       <url hash="b030a9f1">2021.latechclfl-1.19</url>
@@ -248,7 +248,7 @@
     </paper>
     <paper id="20">
       <title>A Mixed-Methods Analysis of Western and <fixed-case>H</fixed-case>ong <fixed-case>K</fixed-case>ong–based Reporting on the 2019–2020 Protests</title>
-      <author><first>Arya D.</first><last>McCarthy</last></author>
+      <author id="arya-d-mccarthy"><first>Arya D.</first><last>McCarthy</last></author>
       <author><first>James</first><last>Scharf</last></author>
       <author><first>Giovanna Maria Dora</first><last>Dore</last></author>
       <pages>178–188</pages>
diff --git a/data/xml/2021.law.xml b/data/xml/2021.law.xml
index 0bceb7bed5..54e05ab699 100644
--- a/data/xml/2021.law.xml
+++ b/data/xml/2021.law.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2021-10-28" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Joint 15th Linguistic Annotation Workshop (LAW) and 3rd Designing Meaning Representations (DMR) Workshop</booktitle>
-      <editor><first>Claire</first><last>Bonial</last></editor>
+      <editor id="claire-bonial"><first>Claire</first><last>Bonial</last></editor>
       <editor><first>Nianwen</first><last>Xue</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Punta Cana, Dominican Republic</address>
@@ -55,7 +55,7 @@
     <paper id="4">
       <title><fixed-case>A</fixed-case>uto<fixed-case>A</fixed-case>spect: Automatic Annotation of Tense and Aspect for Uniform Meaning Representations</title>
       <author><first>Daniel</first><last>Chen</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Meagan</first><last>Vigus</last></author>
       <pages>36–45</pages>
       <abstract>We present AutoAspect, a novel, rule-based annotation tool for labeling tense and aspect. The pilot version annotates English data. The aspect labels are designed specifically for Uniform Meaning Representations (UMR), an annotation schema that aims to encode crosslingual semantic information. The annotation tool combines syntactic and semantic cues to assign aspects on a sentence-by-sentence basis, following a sequence of rules that each output a UMR aspect. Identified events proceed through the sequence until they are assigned an aspect. We achieve a recall of 76.17% for identifying UMR events and an accuracy of 62.57% on all identified events, with high precision values for 2 of the aspect labels.</abstract>
@@ -106,7 +106,7 @@
     <paper id="8">
       <title>Sister Help: Data Augmentation for Frame-Semantic Role Labeling</title>
       <author><first>Ayush</first><last>Pancholy</last></author>
-      <author><first>Miriam R L</first><last>Petruck</last></author>
+      <author id="miriam-r-l-petruck"><first>Miriam R L</first><last>Petruck</last></author>
       <author><first>Swabha</first><last>Swayamdipta</last></author>
       <pages>78–84</pages>
       <abstract>While FrameNet is widely regarded as a rich resource of semantics in natural language processing, a major criticism concerns its lack of coverage and the relative paucity of its labeled data compared to other commonly used lexical resources such as PropBank and VerbNet. This paper reports on a pilot study to address these gaps. We propose a data augmentation approach, which uses existing frame-specific annotation to automatically annotate other lexical units of the same frame which are unannotated. Our rule-based approach defines the notion of a **sister lexical unit** and generates frame-specific augmented data for training. We present experiments on frame-semantic role labeling which demonstrate the importance of this data augmentation: we obtain a large improvement to prior results on frame identification and argument identification for FrameNet, utilizing both full-text and lexicographic annotations under FrameNet. Our findings on data augmentation highlight the value of automatic resource creation for improved models in frame-semantic parsing.</abstract>
@@ -154,8 +154,8 @@
       <author><first>Talha</first><last>Bedir</last></author>
       <author><first>Karahan</first><last>Şahin</last></author>
       <author><first>Onur</first><last>Gungor</last></author>
-      <author><first>Suzan</first><last>Uskudarli</last></author>
-      <author><first>Arzucan</first><last>Özgür</last></author>
+      <author id="suzan-uskudarli"><first>Suzan</first><last>Uskudarli</last></author>
+      <author id="arzucan-ozgur"><first>Arzucan</first><last>Özgür</last></author>
       <author><first>Tunga</first><last>Güngör</last></author>
       <author><first>Balkiz</first><last>Ozturk Basaran</last></author>
       <pages>112–122</pages>
@@ -167,7 +167,7 @@
     <paper id="13">
       <title>Automatic Entity State Annotation using the <fixed-case>V</fixed-case>erb<fixed-case>N</fixed-case>et Semantic Parser</title>
       <author><first>Ghazaleh</first><last>Kazeminejad</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Tao</first><last>Li</last></author>
       <author><first>Vivek</first><last>Srikumar</last></author>
       <pages>123–132</pages>
@@ -189,7 +189,7 @@
     </paper>
     <paper id="15">
       <title>Increasing Sentence-Level Comprehension Through Text Classification of Epistemic Functions</title>
-      <author><first>Maria</first><last>Berger</last></author>
+      <author id="maria-berger"><first>Maria</first><last>Berger</last></author>
       <author><first>Elizabeth</first><last>Goldstein</last></author>
       <pages>139–150</pages>
       <abstract>Word embeddings capture semantic meaning of individual words. How to bridge word-level linguistic knowledge with sentence-level language representation is an open problem. This paper examines whether sentence-level representations can be achieved by building a custom sentence database focusing on one aspect of a sentence’s meaning. Our three separate semantic aspects are whether the sentence: (1) communicates a causal relationship, (2) indicates that two things are correlated with each other, and (3) expresses information or knowledge. The three classifiers provide epistemic information about a sentence’s content.</abstract>
diff --git a/data/xml/2021.lchange.xml b/data/xml/2021.lchange.xml
index c9bb1fedd2..c101240d0a 100644
--- a/data/xml/2021.lchange.xml
+++ b/data/xml/2021.lchange.xml
@@ -114,7 +114,7 @@
       <author><first>Ana Sabina</first><last>Uban</last></author>
       <author><first>Alina Maria</first><last>Cristea</last></author>
       <author><first>Anca</first><last>Dinu</last></author>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <author><first>Simona</first><last>Georgescu</last></author>
       <author><first>Laurentiu</first><last>Zoicas</last></author>
       <pages>64–74</pages>
diff --git a/data/xml/2021.louhi.xml b/data/xml/2021.louhi.xml
index c58e1f5e7c..d3b5747ddf 100644
--- a/data/xml/2021.louhi.xml
+++ b/data/xml/2021.louhi.xml
@@ -4,10 +4,10 @@
     <meta>
       <booktitle>Proceedings of the 12th International Workshop on Health Text Mining and Information Analysis</booktitle>
       <editor><first>Eben</first><last>Holderness</last></editor>
-      <editor><first>Antonio</first><last>Jimeno Yepes</last></editor>
-      <editor><first>Alberto</first><last>Lavelli</last></editor>
+      <editor id="antonio-jimeno-yepes"><first>Antonio</first><last>Jimeno Yepes</last></editor>
+      <editor id="alberto-lavelli"><first>Alberto</first><last>Lavelli</last></editor>
       <editor><first>Anne-Lyse</first><last>Minard</last></editor>
-      <editor><first>James</first><last>Pustejovsky</last></editor>
+      <editor id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></editor>
       <editor><first>Fabio</first><last>Rinaldi</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>online</address>
@@ -31,7 +31,7 @@
     <paper id="2">
       <title>Multilingual Negation Scope Resolution for Clinical Text</title>
       <author><first>Mareike</first><last>Hartmann</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>7–18</pages>
       <abstract>Negation scope resolution is key to high-quality information extraction from clinical texts, but so far, efforts to make encoders used for information extraction negation-aware have been limited to English. We present a universal approach to multilingual negation scope resolution, that overcomes the lack of training data by relying on disparate resources in different languages and domains. We evaluate two approaches to learn from these resources, training on combined data and training in a multi-task learning setup. Our experiments show that zero-shot scope resolution in clinical text is possible, and that combining available resources improves performance in most cases.</abstract>
       <url hash="6ab0db1a">2021.louhi-1.2</url>
@@ -44,7 +44,7 @@
       <author><first>Ramie</first><last>Fathy</last></author>
       <author><first>Barrington</first><last>Bennett</last></author>
       <author><first>Daniel</first><last>Stokes</last></author>
-      <author><first>Sharath Chandra</first><last>Guntuku</last></author>
+      <author id="sharath-chandra-guntuku"><first>Sharath Chandra</first><last>Guntuku</last></author>
       <pages>19–27</pages>
       <abstract>In online forums focused on health and wellbeing, individuals tend to seek and give the following social support: emotional and informational support. Understanding the expressions of these social supports in an online COVID- 19 forum is important for: (a) the forum and its members to provide the right type of support to individuals and (b) determining the long term effects of the COVID-19 pandemic on the well-being of the public, thereby informing interventions. In this work, we build four machine learning models to measure the extent of the following social supports expressed in each post in a COVID-19 online forum: (a) emotional support given (b) emotional support sought (c) informational support given, and (d) informational support sought. Using these models, we aim to: (i) determine if there is a correlation between the different social supports expressed in posts e.g. when members of the forum give emotional support in posts, do they also tend to give or seek informational support in the same post? (ii) determine how these social supports sought and given changes over time in published posts. We find that (i) there is a positive correlation between the informational support given in posts and the emotional support given and emotional support sought, respectively, in these posts and (ii) over time, users tended to seek more emotional support and give less emotional support.</abstract>
       <url hash="61bb3c59">2021.louhi-1.3</url>
@@ -72,8 +72,8 @@
     <paper id="6">
       <title>Integrating Higher-Level Semantics into Robust Biomedical Name Representations</title>
       <author><first>Pieter</first><last>Fivez</last></author>
-      <author><first>Simon</first><last>Suster</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="simon-suster"><first>Simon</first><last>Suster</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>49–58</pages>
       <abstract>Neural encoders of biomedical names are typically considered robust if representations can be effectively exploited for various downstream NLP tasks. To achieve this, encoders need to model domain-specific biomedical semantics while rivaling the universal applicability of pretrained self-supervised representations. Previous work on robust representations has focused on learning low-level distinctions between names of fine-grained biomedical concepts. These fine-grained concepts can also be clustered together to reflect higher-level, more general semantic distinctions, such as grouping the names nettle sting and tick-borne fever together under the description puncture wound of skin. It has not yet been empirically confirmed that training biomedical name encoders on fine-grained distinctions automatically leads to bottom-up encoding of such higher-level semantics. In this paper, we show that this bottom-up effect exists, but that it is still relatively limited. As a solution, we propose a scalable multi-task training regime for biomedical name encoders which can also learn robust representations using only higher-level semantic classes. These representations can generalise both bottom-up as well as top-down among various semantic hierarchies. Moreover, we show how they can be used out-of-the-box for improved unsupervised detection of hypernyms, while retaining robust performance on various semantic relatedness benchmarks.</abstract>
       <url hash="729dc25a">2021.louhi-1.6</url>
diff --git a/data/xml/2021.ltedi.xml b/data/xml/2021.ltedi.xml
index 53102e16c8..7832057ac4 100644
--- a/data/xml/2021.ltedi.xml
+++ b/data/xml/2021.ltedi.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the First Workshop on Language Technology for Equality, Diversity and Inclusion</booktitle>
       <editor><first>Bharathi Raja</first><last>Chakravarthi</last></editor>
-      <editor><first>John P.</first><last>McCrae</last></editor>
+      <editor id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></editor>
       <editor><first>Manel</first><last>Zarrouk</last></editor>
       <editor><first>Kalika</first><last>Bali</last></editor>
       <editor><first>Paul</first><last>Buitelaar</last></editor>
@@ -39,7 +39,7 @@
       <title>Cross-Lingual Transfer Learning for Hate Speech Detection</title>
       <author><first>Irina</first><last>Bigoulaeva</last></author>
       <author><first>Viktor</first><last>Hangya</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>15–25</pages>
       <abstract>We address the task of automatic hate speech detection for low-resource languages. Rather than collecting and annotating new hate speech data, we show how to use cross-lingual transfer learning to leverage already existing data from higher-resource languages. Using bilingual word embeddings based classifiers we achieve good performance on the target language by training only on the source dataset. Using our transferred system we bootstrap on unlabeled target language data, improving the performance of standard cross-lingual transfer approaches. We use English as a high resource language and German as the target language for which only a small amount of annotated corpora are available. Our results indicate that cross-lingual transfer learning together with our approach to leverage additional unlabeled data is an effective way of achieving good performance on low-resource target languages without the need for any target-language annotations.</abstract>
       <url hash="34394535">2021.ltedi-1.3</url>
@@ -74,7 +74,7 @@
     <paper id="6">
       <title><fixed-case>GEPSA</fixed-case>, a tool for monitoring social challenges in digital press</title>
       <author><first>Iñaki</first><last>San Vicente</last></author>
-      <author><first>Xabier</first><last>Saralegi</last></author>
+      <author id="xabier-saralegi"><first>Xabier</first><last>Saralegi</last></author>
       <author><first>Nerea</first><last>Zubia</last></author>
       <pages>46–50</pages>
       <abstract>This papers presents a platform for monitoring press narratives with respect to several social challenges, including gender equality, migrations and minority languages. As narratives are encoded in natural language, we have to use natural processing techniques to automate their analysis. Thus, crawled news are processed by means of several NLP modules, including named entity recognition, keyword extraction,document classification for social challenge detection, and sentiment analysis. A Flask powered interface provides data visualization for a user-based analysis of the data. This paper presents the architecture of the system and describes in detail its different components. Evaluation is provided for the modules related to extraction and classification of information regarding social challenges.</abstract>
@@ -313,7 +313,7 @@
       <title><fixed-case>CFILT</fixed-case> <fixed-case>IIT</fixed-case> <fixed-case>B</fixed-case>ombay@<fixed-case>LT</fixed-case>-<fixed-case>EDI</fixed-case>-<fixed-case>EACL</fixed-case>2021: Hope Speech Detection for Equality, Diversity, and Inclusion using Multilingual Representation from<fixed-case>T</fixed-case>ransformers</title>
       <author><first>Pankaj</first><last>Singh</last></author>
       <author><first>Prince</first><last>Kumar</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>193–196</pages>
       <abstract>With the internet becoming part and parcel of our lives, engagement in social media has increased a lot. Identifying and eliminating offensive content from social media has become of utmost priority to prevent any kind of violence. However, detecting encouraging, supportive and positive content is equally important to prevent misuse of censorship targeted to attack freedom of speech. This paper presents our system for the shared task Hope Speech Detection for Equality, Diversity, and Inclusion at LT-EDI, EACL 2021. The data for this shared task is provided in English, Tamil, and Malayalam which was collected from YouTube comments. It is a multiclass classification problem where each data instance is categorized into one of the three classes: ‘Hope speech’, ‘Not hope speech’, and ‘Not in intended language’. We propose a system that employs multilingual transformer models to obtain the representation of text and classifies it into one of the three classes. We explored the use of multilingual models trained specifically for Indian languages along with generic multilingual models. Our system was ranked 2nd for English, 2nd for Malayalam, and 7th for the Tamil language in the final leader board published by organizers and obtained a weighted F1-score of 0.92, 0.84, 0.55 respectively on the hidden test dataset used for the competition. We have made our system publicly available at GitHub.</abstract>
       <url hash="9b981e4b">2021.ltedi-1.29</url>
diff --git a/data/xml/2021.maiworkshop.xml b/data/xml/2021.maiworkshop.xml
index 657fea8104..5099f1876a 100644
--- a/data/xml/2021.maiworkshop.xml
+++ b/data/xml/2021.maiworkshop.xml
@@ -26,7 +26,7 @@
       <title>Multimodal Weighted Fusion of Transformers for Movie Genre Classification</title>
       <author><first>Isaac</first><last>Rodríguez Bribiesca</last></author>
       <author><first>Adrián Pastor</first><last>López Monroy</last></author>
-      <author><first>Manuel</first><last>Montes-y-Gómez</last></author>
+      <author id="manuel-montes"><first>Manuel</first><last>Montes-y-Gómez</last></author>
       <pages>1–5</pages>
       <abstract>The Multimodal Transformer showed to be a competitive model for multimodal tasks involving textual, visual and audio signals. However, as more modalities are involved, its late fusion by concatenation starts to have a negative impact on the model’s performance. Besides, interpreting model’s predictions becomes difficult, as one would have to look at the different attention activation matrices. In order to overcome these shortcomings, we propose to perform late fusion by adding a GMU module, which effectively allows the model to weight modalities at instance level, improving its performance while providing a better interpretabilty mechanism. In the experiments, we compare our proposed model (MulT-GMU) against the original implementation (MulT-Concat) and a SOTA model tested in a movie genre classification dataset. Our approach, MulT-GMU, outperforms both, MulT-Concat and previous SOTA model.</abstract>
       <url hash="a8109f04">2021.maiworkshop-1.1</url>
@@ -137,7 +137,7 @@
       <author><first>Li Erran</first><last>Li</last></author>
       <author><first>Zhiting</first><last>Hu</last></author>
       <author><first>Yi</first><last>Xu</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></author>
       <author><first>Zheng</first><last>Du</last></author>
       <author><first>Belinda</first><last>Zeng</last></author>
       <pages>74–78</pages>
diff --git a/data/xml/2021.metanlp.xml b/data/xml/2021.metanlp.xml
index 91aae5c028..57c2bd7f8b 100644
--- a/data/xml/2021.metanlp.xml
+++ b/data/xml/2021.metanlp.xml
@@ -10,7 +10,7 @@
       <editor><first>Mandy</first><last>Korpusik</last></editor>
       <editor><first>Shuyan</first><last>Dong</last></editor>
       <editor><first>Ngoc Thang</first><last>Vu</last></editor>
-      <editor><first>Dilek</first><last>Hakkani-Tur</last></editor>
+      <editor id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Online</address>
       <month>August</month>
@@ -50,7 +50,7 @@
       <title>Zero-Shot Compositional Concept Learning</title>
       <author><first>Guangyue</first><last>Xu</last></author>
       <author><first>Parisa</first><last>Kordjamshidi</last></author>
-      <author><first>Joyce</first><last>Chai</last></author>
+      <author id="joyce-chai"><first>Joyce</first><last>Chai</last></author>
       <pages>19–27</pages>
       <abstract>In this paper, we study the problem of recognizing compositional attribute-object concepts within the zero-shot learning (ZSL) framework. We propose an episode-based cross-attention (EpiCA) network which combines merits of cross-attention mechanism and episode-based training strategy to recognize novel compositional concepts. Firstly, EpiCA bases on cross-attention to correlate conceptvisual information and utilizes the gated pooling layer to build contextualized representations for both images and concepts. The updated representations are used for a more indepth multi-modal relevance calculation for concept recognition. Secondly, a two-phase episode training strategy, especially the ransductive phase, is adopted to utilize unlabeled test examples to alleviate the low-resource learning problem. Experiments on two widelyused zero-shot compositional learning (ZSCL) benchmarks have demonstrated the effectiveness of the model compared with recent approaches on both conventional and generalized ZSCL settings.</abstract>
       <url hash="86f04ae6">2021.metanlp-1.3</url>
@@ -71,8 +71,8 @@
     <paper id="5">
       <title>On the cross-lingual transferability of multilingual prototypical models across <fixed-case>NLU</fixed-case> tasks</title>
       <author><first>Oralie</first><last>Cattan</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
-      <author><first>Christophe</first><last>Servan</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
+      <author id="christophe-servan"><first>Christophe</first><last>Servan</last></author>
       <pages>36–43</pages>
       <abstract>Supervised deep learning-based approaches have been applied to task-oriented dialog and have proven to be effective for limited domain and language applications when a sufficient number of training examples are available. In practice, these approaches suffer from the drawbacks of domain-driven design and under-resourced languages. Domain and language models are supposed to grow and change as the problem space evolves. On one hand, research on transfer learning has demonstrated the cross-lingual ability of multilingual Transformers-based models to learn semantically rich representations. On the other, in addition to the above approaches, meta-learning have enabled the development of task and language learning algorithms capable of far generalization. Through this context, this article proposes to investigate the cross-lingual transferability of using synergistically few-shot learning with prototypical neural networks and multilingual Transformers-based models. Experiments in natural language understanding tasks on MultiATIS++ corpus shows that our approach substantially improves the observed transfer learning performances between the low and the high resource languages. More generally our approach confirms that the meaningful latent space learned in a given language can be can be generalized to unseen and under-resourced ones using meta-learning.</abstract>
       <url hash="e2dc8e45">2021.metanlp-1.5</url>
@@ -104,7 +104,7 @@
     </paper>
     <paper id="8">
       <title>Semi-supervised Meta-learning for Cross-domain Few-shot Intent Classification</title>
-      <author id="yue-li"><first>Yue</first><last>Li</last></author>
+      <author><first>Yue</first><last>Li</last></author>
       <author><first>Jiong</first><last>Zhang</last></author>
       <pages>67–75</pages>
       <abstract>Meta learning aims to optimize the model’s capability to generalize to new tasks and domains. Lacking a data-efficient way to create meta training tasks has prevented the application of meta-learning to the real-world few shot learning scenarios. Recent studies have proposed unsupervised approaches to create meta-training tasks from unlabeled data for free, e.g., the SMLMT method (Bansal et al., 2020a) constructs unsupervised multi-class classification tasks from the unlabeled text by randomly masking words in the sentence and let the meta learner choose which word to fill in the blank. This study proposes a semi-supervised meta-learning approach that incorporates both the representation power of large pre-trained language models and the generalization capability of prototypical networks enhanced by SMLMT. The semi-supervised meta training approach avoids overfitting prototypical networks on a small number of labeled training examples and quickly learns cross-domain task-specific representation only from a few supporting examples. By incorporating SMLMT with prototypical networks, the meta learner generalizes better to unseen domains and gains higher accuracy on out-of-scope examples without the heavy lifting of pre-training. We observe significant improvement in few-shot generalization after training only a few epochs on the intent classification tasks evaluated in a multi-domain setting.</abstract>
diff --git a/data/xml/2021.mmsr.xml b/data/xml/2021.mmsr.xml
index d29b604ad9..73480a2e88 100644
--- a/data/xml/2021.mmsr.xml
+++ b/data/xml/2021.mmsr.xml
@@ -6,7 +6,7 @@
       <editor><first>Lucia</first><last>Donatelli</last></editor>
       <editor><first>Nikhil</first><last>Krishnaswamy</last></editor>
       <editor><first>Kenneth</first><last>Lai</last></editor>
-      <editor><first>James</first><last>Pustejovsky</last></editor>
+      <editor id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Groningen, Netherlands (Online)</address>
       <month>June</month>
@@ -91,7 +91,7 @@
     </paper>
     <paper id="8">
       <title>Incremental Unit Networks for Multimodal, Fine-grained Information State Representation</title>
-      <author><first>Casey</first><last>Kennington</last></author>
+      <author id="casey-kennington"><first>Casey</first><last>Kennington</last></author>
       <author><first>David</first><last>Schlangen</last></author>
       <pages>89–94</pages>
       <abstract>We offer a fine-grained information state annotation scheme that follows directly from the Incremental Unit abstract model of dialogue processing when used within a multimodal, co-located, interactive setting. We explain the Incremental Unit model and give an example application using the Localized Narratives dataset, then offer avenues for future research.</abstract>
diff --git a/data/xml/2021.mmtlrl.xml b/data/xml/2021.mmtlrl.xml
index fca787cce4..32450165aa 100644
--- a/data/xml/2021.mmtlrl.xml
+++ b/data/xml/2021.mmtlrl.xml
@@ -3,10 +3,10 @@
   <volume id="1" ingest-date="2021-11-09" type="proceedings">
     <meta>
       <booktitle>Proceedings of the First Workshop on Multimodal Machine Translation for Low Resource Languages (MMTLRL 2021)</booktitle>
-      <editor><first>Thoudam</first><last>Doren Singh</last></editor>
+      <editor id="thoudam-doren-singh"><first>Thoudam</first><last>Doren Singh</last></editor>
       <editor><first>Cristina</first><last>España i Bonet</last></editor>
-      <editor><first>Sivaji</first><last>Bandyopadhyay</last></editor>
-      <editor><first>Josef</first><last>van Genabith</last></editor>
+      <editor id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></editor>
+      <editor id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></editor>
       <publisher>INCOMA Ltd.</publisher>
       <address>Online (Virtual Mode)</address>
       <month>September</month>
diff --git a/data/xml/2021.motra.xml b/data/xml/2021.motra.xml
index 3eec414004..40ab980d60 100644
--- a/data/xml/2021.motra.xml
+++ b/data/xml/2021.motra.xml
@@ -6,7 +6,7 @@
       <editor><first>Yuri</first><last>Bizzoni</last></editor>
       <editor><first>Elke</first><last>Teich</last></editor>
       <editor><first>Cristina</first><last>España-Bonet</last></editor>
-      <editor><first>Josef</first><last>van Genabith</last></editor>
+      <editor id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>online</address>
       <month>May</month>
@@ -30,7 +30,7 @@
     </paper>
     <paper id="2">
       <title><fixed-case>H</fixed-case>ei<fixed-case>C</fixed-case>i<fixed-case>C</fixed-case>: A simultaneous interpreting corpus combining product and pre-process data</title>
-      <author><first>Kerstin</first><last>Kunz</last></author>
+      <author id="kerstin-kunz"><first>Kerstin</first><last>Kunz</last></author>
       <author><first>Christoph</first><last>Stoll</last></author>
       <author><first>Eva</first><last>Klüber</last></author>
       <pages>8–14</pages>
diff --git a/data/xml/2021.mrl.xml b/data/xml/2021.mrl.xml
index 5e09688a67..02a7018f61 100644
--- a/data/xml/2021.mrl.xml
+++ b/data/xml/2021.mrl.xml
@@ -21,7 +21,7 @@
     </frontmatter>
     <paper id="1">
       <title>Language Models are Few-shot Multilingual Learners</title>
-      <author><first>Genta Indra</first><last>Winata</last></author>
+      <author id="genta-indra-winata"><first>Genta Indra</first><last>Winata</last></author>
       <author><first>Andrea</first><last>Madotto</last></author>
       <author><first>Zhaojiang</first><last>Lin</last></author>
       <author><first>Rosanne</first><last>Liu</last></author>
@@ -38,8 +38,8 @@
       <title>Learning Contextualised Cross-lingual Word Embeddings and Alignments for Extremely Low-Resource Languages Using Parallel Corpora</title>
       <author><first>Takashi</first><last>Wada</last></author>
       <author><first>Tomoharu</first><last>Iwata</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Jey Han</first><last>Lau</last></author>
       <pages>16–31</pages>
       <abstract>We propose a new approach for learning contextualised cross-lingual word embeddings based on a small parallel corpus (e.g. a few hundred sentence pairs). Our method obtains word embeddings via an LSTM encoder-decoder model that simultaneously translates and reconstructs an input sentence. Through sharing model parameters among different languages, our model jointly trains the word embeddings in a common cross-lingual space. We also propose to combine word and subword embeddings to make use of orthographic similarities across different languages. We base our experiments on real-world data from endangered languages, namely Yongning Na, Shipibo-Konibo, and Griko. Our experiments on bilingual lexicon induction and word alignment tasks show that our model outperforms existing methods by a large margin for most language pairs. These results demonstrate that, contrary to common belief, an encoder-decoder translation model is beneficial for learning cross-lingual representations even in extremely low-resource conditions. Furthermore, our model also works well on high-resource conditions, achieving state-of-the-art performance on a German-English word-alignment task.</abstract>
@@ -51,7 +51,7 @@
     <paper id="3">
       <title>Clustering Monolingual Vocabularies to Improve Cross-Lingual Generalization</title>
       <author><first>Riccardo</first><last>Bassani</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <author><first>Tejaswini</first><last>Deoskar</last></author>
       <pages>32–40</pages>
       <abstract>Multilingual language models exhibit better performance for some languages than for others (Singh et al., 2019), and many languages do not seem to benefit from multilingual sharing at all, presumably as a result of poor multilingual segmentation (Pyysal o et al., 2020). This work explores the idea of learning multilingual language models based on clustering of monolingual segments. We show significant improvements over standard multilingual segmentation and training across nine languages on a question answering task, both in a small model regime and for a model of the size of BERT-base.</abstract>
@@ -64,7 +64,7 @@
       <title>Do not neglect related languages: The case of low-resource <fixed-case>O</fixed-case>ccitan cross-lingual word embeddings</title>
       <author><first>Lisa</first><last>Woller</last></author>
       <author><first>Viktor</first><last>Hangya</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>41–50</pages>
       <abstract>Cross-lingual word embeddings (CLWEs) have proven indispensable for various natural language processing tasks, e.g., bilingual lexicon induction (BLI). However, the lack of data often impairs the quality of representations. Various approaches requiring only weak cross-lingual supervision were proposed, but current methods still fail to learn good CLWEs for languages with only a small monolingual corpus. We therefore claim that it is necessary to explore further datasets to improve CLWEs in low-resource setups. In this paper we propose to incorporate data of related high-resource languages. In contrast to previous approaches which leverage independently pre-trained embeddings of languages, we (i) train CLWEs for the low-resource and a related language jointly and (ii) map them to the target language to build the final multilingual space. In our experiments we focus on Occitan, a low-resource Romance language which is often neglected due to lack of resources. We leverage data from French, Spanish and Catalan for training and evaluate on the Occitan-English BLI task. By incorporating supporting languages our method outperforms previous approaches by a large margin. Furthermore, our analysis shows that the degree of relatedness between an incorporated language and the low-resource language is critically important.</abstract>
       <url hash="00966483">2021.mrl-1.4</url>
@@ -75,7 +75,7 @@
     <paper id="5">
       <title>Specializing Multilingual Language Models: An Empirical Study</title>
       <author><first>Ethan C.</first><last>Chau</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>51–61</pages>
       <abstract>Pretrained multilingual language models have become a common tool in transferring NLP capabilities to low-resource languages, often with adaptations. In this work, we study the performance, extensibility, and interaction of two such adaptations: vocabulary augmentation and script transliteration. Our evaluations on part-of-speech tagging, universal dependency parsing, and named entity recognition in nine diverse low-resource languages uphold the viability of these approaches while raising new questions around how to optimally adapt multilingual models to low-resource settings.</abstract>
       <url hash="22575101">2021.mrl-1.5</url>
@@ -135,7 +135,7 @@
       <title>On the Cross-lingual Transferability of Contextualized Sense Embeddings</title>
       <author><first>Kiamehr</first><last>Rezaee</last></author>
       <author><first>Daniel</first><last>Loureiro</last></author>
-      <author><first>Jose</first><last>Camacho-Collados</last></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></author>
       <author><first>Mohammad Taher</first><last>Pilehvar</last></author>
       <pages>107–115</pages>
       <abstract>In this paper we analyze the extent to which contextualized sense embeddings, i.e., sense embeddings that are computed based on contextualized word embeddings, are transferable across languages. To this end, we compiled a unified cross-lingual benchmark for Word Sense Disambiguation. We then propose two simple strategies to transfer sense-specific knowledge across languages and test them on the benchmark. Experimental results show that this contextualized knowledge can be effectively transferred to similar languages through pre-trained multilingual language models, to the extent that they can out-perform monolingual representations learnednfrom existing language-specific data.</abstract>
@@ -258,7 +258,7 @@
       <author><first>Soojin</first><last>Chung</last></author>
       <author><first>Hyunsoo</first><last>Woo</last></author>
       <author><first>Min</first><last>Song</last></author>
-      <author><first>Jinho D.</first><last>Choi</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
       <pages>224–237</pages>
       <abstract>This paper presents a English-Korean parallel dataset that collects 381K news articles where 1,400 of them, comprising 10K sentences, are manually labeled for crosslingual named entity recognition (NER). The annotation guidelines for the two languages are developed in parallel, that yield the inter-annotator agreement scores of 91 and 88% for English and Korean respectively, indicating sublime quality annotation in our dataset. Three types of crosslingual learning approaches, direct model transfer, embedding projection, and annotation projection, are used to develop zero-shot Korean NER models. Our best model gives the F1-score of 51% that is very encouraging, considering the extremely distinct natures of these two languages. This is pioneering work that explores zero-shot cross-lingual learning between English and Korean and provides rich parallel annotation for a core NLP task such as named entity recognition.</abstract>
       <url hash="1073cb2a">2021.mrl-1.19</url>
diff --git a/data/xml/2021.mrqa.xml b/data/xml/2021.mrqa.xml
index 74c2ba6b17..d49195dfa6 100644
--- a/data/xml/2021.mrqa.xml
+++ b/data/xml/2021.mrqa.xml
@@ -26,7 +26,7 @@
       <author><first>Maxime</first><last>De Bruyn</last></author>
       <author><first>Ehsan</first><last>Lotfi</last></author>
       <author><first>Jeska</first><last>Buhmann</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>1–13</pages>
       <abstract>In this paper, we present the first multilingual FAQ dataset publicly available. We collected around 6M FAQ pairs from the web, in 21 different languages. Although this is significantly larger than existing FAQ retrieval datasets, it comes with its own challenges: duplication of content and uneven distribution of topics. We adopt a similar setup as Dense Passage Retrieval (DPR) and test various bi-encoders on this dataset. Our experiments reveal that a multilingual model based on XLM-RoBERTa achieves the best results, except for English. Lower resources languages seem to learn from one another as a multilingual model achieves a higher MRR than language-specific ones. Our qualitative analysis reveals the brittleness of the model on simple word changes. We publicly release our dataset, model, and training script.</abstract>
       <url hash="e9c21c43">2021.mrqa-1.1</url>
@@ -37,7 +37,7 @@
       <title>Rethinking the Objectives of Extractive Question Answering</title>
       <author><first>Martin</first><last>Fajcik</last></author>
       <author><first>Josef</first><last>Jon</last></author>
-      <author><first>Pavel</first><last>Smrz</last></author>
+      <author id="pavel-smrz"><first>Pavel</first><last>Smrz</last></author>
       <pages>14–27</pages>
       <abstract>This work demonstrates that using the objective with independence assumption for modelling the span probability P (a_s , a_e ) = P (a_s )P (a_e) of span starting at position a_s and ending at position a_e has adverse effects. Therefore we propose multiple approaches to modelling joint probability P (a_s , a_e) directly. Among those, we propose a compound objective, composed from the joint probability while still keeping the objective with independence assumption as an auxiliary objective. We find that the compound objective is consistently superior or equal to other assumptions in exact match. Additionally, we identified common errors caused by the assumption of independence and manually checked the counterpart predictions, demonstrating the impact of the compound objective on the real examples. Our findings are supported via experiments with three extractive QA models (BIDAF, BERT, ALBERT) over six datasets and our code, individual results and manual analysis are available online.</abstract>
       <url hash="fc859555">2021.mrqa-1.2</url>
@@ -47,8 +47,8 @@
     <paper id="3">
       <title>What Would it Take to get Biomedical <fixed-case>QA</fixed-case> Systems into Practice?</title>
       <author><first>Gregory</first><last>Kell</last></author>
-      <author><first>Iain</first><last>Marshall</last></author>
-      <author><first>Byron</first><last>Wallace</last></author>
+      <author id="iain-marshall"><first>Iain</first><last>Marshall</last></author>
+      <author id="byron-c-wallace"><first>Byron</first><last>Wallace</last></author>
       <author><first>Andre</first><last>Jaun</last></author>
       <pages>28–41</pages>
       <abstract>Medical question answering (QA) systems have the potential to answer clinicians’ uncertainties about treatment and diagnosis on-demand, informed by the latest evidence. However, despite the significant progress in general QA made by the NLP community, medical QA systems are still not widely used in clinical environments. One likely reason for this is that clinicians may not readily trust QA system outputs, in part because transparency, trustworthiness, and provenance have not been key considerations in the design of such models. In this paper we discuss a set of criteria that, if met, we argue would likely increase the utility of biomedical QA systems, which may in turn lead to adoption of such systems in practice. We assess existing models, tasks, and datasets with respect to these criteria, highlighting shortcomings of previously proposed approaches and pointing toward what might be more usable QA systems.</abstract>
@@ -83,7 +83,7 @@
       <title>Can Question Generation Debias Question Answering Models? A Case Study on Question–Context Lexical Overlap</title>
       <author><first>Kazutoshi</first><last>Shinoda</last></author>
       <author><first>Saku</first><last>Sugawara</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>63–72</pages>
       <abstract>Question answering (QA) models for reading comprehension have been demonstrated to exploit unintended dataset biases such as question–context lexical overlap. This hinders QA models from generalizing to under-represented samples such as questions with low lexical overlap. Question generation (QG), a method for augmenting QA datasets, can be a solution for such performance degradation if QG can properly debias QA datasets. However, we discover that recent neural QG models are biased towards generating questions with high lexical overlap, which can amplify the dataset bias. Moreover, our analysis reveals that data augmentation with these QG models frequently impairs the performance on questions with low lexical overlap, while improving that on questions with high lexical overlap. To address this problem, we use a synonym replacement-based approach to augment questions with low lexical overlap. We demonstrate that the proposed data augmentation approach is simple yet effective to mitigate the degradation problem with only 70k synthetic examples.</abstract>
       <url hash="361d679b">2021.mrqa-1.6</url>
@@ -93,8 +93,8 @@
     <paper id="7">
       <title>What Can a Generative Language Model Answer About a Passage?</title>
       <author><first>Douglas</first><last>Summers-Stay</last></author>
-      <author><first>Claire</first><last>Bonial</last></author>
-      <author><first>Clare</first><last>Voss</last></author>
+      <author id="claire-bonial"><first>Claire</first><last>Bonial</last></author>
+      <author id="clare-voss"><first>Clare</first><last>Voss</last></author>
       <pages>73–81</pages>
       <abstract>Generative language models trained on large, diverse corpora can answer questions about a passage by generating the most likely continuation of the passage followed by a question/answer pair. However, accuracy rates vary depending on the type of question asked. In this paper we keep the passage fixed, and test with a wide variety of question types, exploring the strengths and weaknesses of the GPT-3 language model. We provide the passage and test questions as a challenge set for other language models.</abstract>
       <url hash="f415bd6d">2021.mrqa-1.7</url>
diff --git a/data/xml/2021.mtsummit.xml b/data/xml/2021.mtsummit.xml
index e413a58573..c065dd5994 100644
--- a/data/xml/2021.mtsummit.xml
+++ b/data/xml/2021.mtsummit.xml
@@ -8,7 +8,7 @@
       <month>August</month>
       <year>2021</year>
       <editor><first>Kevin</first><last>Duh</last></editor>
-      <editor><first>Francisco</first><last>Guzmán</last></editor>
+      <editor id="francisco-guzman"><first>Francisco</first><last>Guzmán</last></editor>
       <url hash="7b81460c">2021.mtsummit-research</url>
       <venue>mtsummit</venue>
     </meta>
@@ -20,7 +20,7 @@
       <title>Learning Curricula for Multilingual Neural Machine Translation Training</title>
       <author><first>Gaurav</first><last>Kumar</last></author>
       <author><first>Philipp</first><last>Koehn</last></author>
-      <author><first>Sanjeev</first><last>Khudanpur</last></author>
+      <author id="sanjeev-khudanpur"><first>Sanjeev</first><last>Khudanpur</last></author>
       <pages>1-9</pages>
       <url hash="d1a0e709">2021.mtsummit-research.1</url>
       <abstract>Low-resource Multilingual Neural Machine Translation (MNMT) is typically tasked with improving the translation performance on one or more language pairs with the aid of high-resource language pairs. In this paper and we propose two simple search based curricula – orderings of the multilingual training data – which help improve translation performance in conjunction with existing techniques such as fine-tuning. Additionally and we attempt to learn a curriculum for MNMT from scratch jointly with the training of the translation system using contextual multi-arm bandits. We show on the FLORES low-resource translation dataset that these learned curricula can provide better starting points for fine tuning and improve overall performance of the translation system.</abstract>
@@ -32,7 +32,7 @@
       <author><first>Dhanvanth</first><last>Boppana</last></author>
       <author><first>Rejwanul</first><last>Haque</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>10-22</pages>
       <url hash="dc619f86">2021.mtsummit-research.2</url>
       <abstract>Interactive-predictive translation is a collaborative iterative process and where human translators produce translations with the help of machine translation (MT) systems interactively. Various sampling techniques in active learning (AL) exist to update the neural MT (NMT) model in the interactive-predictive scenario. In this paper and we explore term based (named entity count (NEC)) and quality based (quality estimation (QE) and sentence similarity (Sim)) sampling techniques – which are used to find the ideal candidates from the incoming data – for human supervision and MT model’s weight updation. We carried out experiments with three language pairs and viz. German-English and Spanish-English and Hindi-English. Our proposed sampling technique yields 1.82 and 0.77 and 0.81 BLEU points improvements for German-English and Spanish-English and Hindi-English and respectively and over random sampling based baseline. It also improves the present state-of-the-art by 0.35 and 0.12 BLEU points for German-English and Spanish-English and respectively. Human editing effort in terms of number-of-words-changed also improves by 5 and 4 points for German-English and Spanish-English and respectively and compared to the state-of-the-art.</abstract>
@@ -42,7 +42,7 @@
       <title>Crosslingual Embeddings are Essential in <fixed-case>UNMT</fixed-case> for distant languages: An <fixed-case>E</fixed-case>nglish to <fixed-case>I</fixed-case>ndo<fixed-case>A</fixed-case>ryan Case Study</title>
       <author><first>Tamali</first><last>Banerjee</last></author>
       <author><first>Rudra</first><last>V Murthy</last></author>
-      <author><first>Pushpak</first><last>Bhattacharya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharya</last></author>
       <pages>23-34</pages>
       <url hash="d9f87513">2021.mtsummit-research.3</url>
       <abstract>Recent advances in Unsupervised Neural Machine Translation (UNMT) has minimized the gap between supervised and unsupervised machine translation performance for closely related language-pairs. However and the situation is very different for distant language pairs. Lack of overlap in lexicon and low syntactic similarity such as between English and IndoAryan languages leads to poor translation quality in existing UNMT systems. In this paper and we show that initialising the embedding layer of UNMT models with cross-lingual embeddings leads to significant BLEU score improvements over existing UNMT models where the embedding layer weights are randomly initialized. Further and freezing the embedding layer weights leads to better gains compared to updating the embedding layer weights during training. We experimented using Masked Sequence to Sequence (MASS) and Denoising Autoencoder (DAE) UNMT approaches for three distant language pairs. The proposed cross-lingual embedding initialization yields BLEU score improvement of as much as ten times over the baseline for English-Hindi and English-Bengali and English-Gujarati. Our analysis shows that initialising embedding layer with static cross-lingual embedding mapping is essential for training of UNMT models for distant language-pairs.</abstract>
@@ -55,7 +55,7 @@
       <author><first>Shivam</first><last>Mhaskar</last></author>
       <author><first>Sourabh</first><last>Deoghare</last></author>
       <author><first>Aman</first><last>Sehgal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>35-47</pages>
       <url hash="571ce526">2021.mtsummit-research.4</url>
       <abstract>In this paper and we explore different techniques of overcoming the challenges of low-resource in Neural Machine Translation (NMT) and specifically focusing on the case of English-Marathi NMT. NMT systems require a large amount of parallel corpora to obtain good quality translations. We try to mitigate the low-resource problem by augmenting parallel corpora or by using transfer learning. Techniques such as Phrase Table Injection (PTI) and back-translation and mixing of language corpora are used for enhancing the parallel data; whereas pivoting and multilingual embeddings are used to leverage transfer learning. For pivoting and Hindi comes in as assisting language for English-Marathi translation. Compared to baseline transformer model and a significant improvement trend in BLEU score is observed across various techniques. We have done extensive manual and automatic and qualitative evaluation of our systems. Since the trend in Machine Translation (MT) today is post-editing and measuring of Human Effort Reduction (HER) and we have given our preliminary observations on Translation Edit Rate (TER) vs. BLEU score study and where TER is regarded as a measure of HER.</abstract>
@@ -73,9 +73,9 @@
     </paper>
     <paper id="6">
       <title>The Effect of Domain and Diacritics in <fixed-case>Y</fixed-case>oruba–<fixed-case>E</fixed-case>nglish Neural Machine Translation</title>
-      <author><first>David Ifeoluwa</first><last>Adelani</last></author>
+      <author id="david-ifeoluwa-adelani"><first>David Ifeoluwa</first><last>Adelani</last></author>
       <author><first>Dana</first><last>Ruiter</last></author>
-      <author><first>Jesujoba O.</first><last>Alabi</last></author>
+      <author id="jesujoba-alabi"><first>Jesujoba O.</first><last>Alabi</last></author>
       <author><first>Damilola</first><last>Adebonojo</last></author>
       <author><first>Adesina</first><last>Ayeni</last></author>
       <author><first>Mofe</first><last>Adeyemi</last></author>
@@ -92,7 +92,7 @@ Our models outperform massively multilingual models such as Google (<tex-math>+8
       <title>Integrating Unsupervised Data Generation into Self-Supervised Neural Machine Translation for Low-Resource Languages</title>
       <author><first>Dana</first><last>Ruiter</last></author>
       <author><first>Dietrich</first><last>Klakow</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <author><first>Cristina</first><last>España-Bonet</last></author>
       <pages>76-91</pages>
       <url hash="71ec5ae3">2021.mtsummit-research.7</url>
@@ -104,13 +104,13 @@ Our models outperform massively multilingual models such as Google (<tex-math>+8
       <author><first>Alexandra</first><last>Birch</last></author>
       <author><first>Barry</first><last>Haddow</last></author>
       <author><first>Antonio</first><last>Valerio Miceli Barone</last></author>
-      <author><first>Jindrich</first><last>Helcl</last></author>
+      <author id="jindrich-helcl"><first>Jindrich</first><last>Helcl</last></author>
       <author><first>Jonas</first><last>Waldendorf</last></author>
       <author><first>Felipe</first><last>Sánchez Martínez</last></author>
-      <author><first>Mikel</first><last>Forcada</last></author>
+      <author id="mikel-l-forcada"><first>Mikel</first><last>Forcada</last></author>
       <author><first>Víctor</first><last>Sánchez Cartagena</last></author>
       <author><first>Juan Antonio</first><last>Pérez-Ortiz</last></author>
-      <author><first>Miquel</first><last>Esplà-Gomis</last></author>
+      <author id="miquel-espla-gomis"><first>Miquel</first><last>Esplà-Gomis</last></author>
       <author><first>Wilker</first><last>Aziz</last></author>
       <author><first>Lina</first><last>Murady</last></author>
       <author><first>Sevi</first><last>Sariisik</last></author>
@@ -144,7 +144,7 @@ Our models outperform massively multilingual models such as Google (<tex-math>+8
       <title>Scrambled Translation Problem: A Problem of Denoising <fixed-case>UNMT</fixed-case></title>
       <author><first>Tamali</first><last>Banerjee</last></author>
       <author><first>Rudra</first><last>V Murthy</last></author>
-      <author><first>Pushpak</first><last>Bhattacharya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharya</last></author>
       <pages>127-138</pages>
       <url hash="92ca6c7d">2021.mtsummit-research.11</url>
       <abstract>In this paper and we identify an interesting kind of error in the output of Unsupervised Neural Machine Translation (UNMT) systems like Undreamt1. We refer to this error type as Scrambled Translation problem. We observe that UNMT models which use word shuffle noise (as in case of Undreamt) can generate correct words and but fail to stitch them together to form phrases. As a result and words of the translated sentence look scrambled and resulting in decreased BLEU. We hypothesise that the reason behind scrambled translation problem is ’shuffling noise’ which is introduced in every input sentence as a denoising strategy. To test our hypothesis and we experiment by retraining UNMT models with a simple retraining strategy. We stop the training of the Denoising UNMT model after a pre-decided number of iterations and resume the training for the remaining iterations- which number is also pre-decided- using original sentence as input without adding any noise. Our proposed solution achieves significant performance improvement UNMT models that train conventionally. We demonstrate these performance gains on four language pairs and viz. and English-French and English-German and English-Spanish and Hindi-Punjabi. Our qualitative and quantitative analysis shows that the retraining strategy helps achieve better alignment as observed by attention heatmap and better phrasal translation and leading to statistically significant improvement in BLEU scores.</abstract>
@@ -177,7 +177,7 @@ Our models outperform massively multilingual models such as Google (<tex-math>+8
     </paper>
     <paper id="14">
       <title>On nature and causes of observed <fixed-case>MT</fixed-case> errors</title>
-      <author><first>Maja</first><last>Popovic</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popovic</last></author>
       <pages>163-175</pages>
       <url hash="e5ca11cd">2021.mtsummit-research.14</url>
       <abstract>This work describes analysis of nature and causes of MT errors observed by different evaluators under guidance of different quality criteria: adequacy and comprehension and and a not specified generic mixture of adequacy and fluency. We report results for three language pairs and two domains and eleven MT systems. Our findings indicate that and despite the fact that some of the identified phenomena depend on domain and/or language and the following set of phenomena can be considered as generally challenging for modern MT systems: rephrasing groups of words and translation of ambiguous source words and translating noun phrases and and mistranslations. Furthermore and we show that the quality criterion also has impact on error perception. Our findings indicate that comprehension and adequacy can be assessed simultaneously by different evaluators and so that comprehension and as an important quality criterion and can be included more often in human evaluations.</abstract>
@@ -187,7 +187,7 @@ Our models outperform massively multilingual models such as Google (<tex-math>+8
       <title>A Comparison of Sentence-Weighting Techniques for <fixed-case>NMT</fixed-case></title>
       <author><first>Simon</first><last>Rieß</last></author>
       <author><first>Matthias</first><last>Huck</last></author>
-      <author><first>Alex</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alex</first><last>Fraser</last></author>
       <pages>176-187</pages>
       <url hash="6e7c5d5a">2021.mtsummit-research.15</url>
       <abstract>Sentence weighting is a simple and powerful domain adaptation technique. We carry out domain classification for computing sentence weights with 1) language model cross entropy difference 2) a convolutional neural network 3) a Recursive Neural Tensor Network. We compare these approaches with regard to domain classification accuracy and and study the posterior probability distributions. Then we carry out NMT experiments in the scenario where we have no in-domain parallel corpora and and only very limited in-domain monolingual corpora. Here and we use the domain classifier to reweight the sentences of our out-of-domain training corpus. This leads to improvements of up to 2.1 BLEU for German to English translation.</abstract>
@@ -196,7 +196,7 @@ Our models outperform massively multilingual models such as Google (<tex-math>+8
     <paper id="16">
       <title>Sentiment-based Candidate Selection for <fixed-case>NMT</fixed-case></title>
       <author><first>Alexander</first><last>Jones</last></author>
-      <author><first>Derry</first><last>Wijaya</last></author>
+      <author id="derry-tanti-wijaya"><first>Derry</first><last>Wijaya</last></author>
       <pages>188-201</pages>
       <url hash="6b20bd43">2021.mtsummit-research.16</url>
       <abstract>The explosion of user-generated content (UGC)—e.g. social media posts and comments and and reviews—has motivated the development of NLP applications tailored to these types of informal texts. Prevalent among these applications have been sentiment analysis and machine translation (MT). Grounded in the observation that UGC features highly idiomatic and sentiment-charged language and we propose a decoder-side approach that incorporates automatic sentiment scoring into the MT candidate selection process. We train monolingual sentiment classifiers in English and Spanish and in addition to a multilingual sentiment model and by fine-tuning BERT and XLM-RoBERTa. Using n-best candidates generated by a baseline MT model with beam search and we select the candidate that minimizes the absolute difference between the sentiment score of the source sentence and that of the translation and and perform two human evaluations to assess the produced translations. Unlike previous work and we select this minimally divergent translation by considering the sentiment scores of the source sentence and translation on a continuous interval and rather than using e.g. binary classification and allowing for more fine-grained selection of translation candidates. The results of human evaluations show that and in comparison to the open-source MT baseline model on top of which our sentiment-based pipeline is built and our pipeline produces more accurate translations of colloquial and sentiment-heavy source texts.</abstract>
@@ -253,7 +253,7 @@ Our models outperform massively multilingual models such as Google (<tex-math>+8
     <paper id="22">
       <title>Introducing Mouse Actions into Interactive-Predictive Neural Machine Translation</title>
       <author><first>Ángel</first><last>Navarro</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <pages>270-281</pages>
       <url hash="9dbd11ef">2021.mtsummit-research.22</url>
       <abstract>The quality of the translations generated by Machine Translation (MT) systems has highly improved through the years and but we are still far away to obtain fully automatic high-quality translations. To generate them and translators make use of Computer-Assisted Translation (CAT) tools and among which we find the Interactive-Predictive Machine Translation (IPMT) systems. In this paper and we use bandit feedback as the main and only information needed to generate new predictions that correct the previous translations. The application of bandit feedback reduces significantly the number of words that the translator need to type in an IPMT session. In conclusion and the use of this technique saves useful time and effort to translators and its performance improves with the future advances in MT and so we recommend its application in the actuals IPMT systems.</abstract>
@@ -298,7 +298,7 @@ Our models outperform massively multilingual models such as Google (<tex-math>+8
     <paper id="1">
       <title>Seed Words Based Data Selection for Language Model Adaptation</title>
       <author><first>Roberto</first><last>Gretter</last></author>
-      <author><first>Marco</first><last>Matassoni</last></author>
+      <author id="marco-matassoni"><first>Marco</first><last>Matassoni</last></author>
       <author><first>Daniele</first><last>Falavigna</last></author>
       <pages>1-12</pages>
       <url hash="b0308a90">2021.mtsummit-asltrw.1</url>
@@ -308,7 +308,7 @@ Our models outperform massively multilingual models such as Google (<tex-math>+8
     <paper id="2">
       <title>Post-Editing Job Profiles for Subtitlers</title>
       <author><first>Anke</first><last>Tardel</last></author>
-      <author><first>Silvia</first><last>Hansen-Schirra</last></author>
+      <author id="silvia-hansen-schirra"><first>Silvia</first><last>Hansen-Schirra</last></author>
       <author><first>Jean</first><last>Nitzke</last></author>
       <pages>13-22</pages>
       <url hash="7fd9ca14">2021.mtsummit-asltrw.2</url>
@@ -317,10 +317,10 @@ Our models outperform massively multilingual models such as Google (<tex-math>+8
     </paper>
     <paper id="3">
       <title>Operating a Complex <fixed-case>SLT</fixed-case> System with Speakers and Human Interpreters</title>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <author><first>Vojtěch</first><last>Srdečný</last></author>
       <author><first>Rishu</first><last>Kumar</last></author>
-      <author><first>Otakar</first><last>Smrž</last></author>
+      <author id="otakar-smrz"><first>Otakar</first><last>Smrž</last></author>
       <author><first>Felix</first><last>Schneider</last></author>
       <author><first>Barry</first><last>Haddow</last></author>
       <author><first>Phil</first><last>Williams</last></author>
@@ -334,7 +334,7 @@ Our models outperform massively multilingual models such as Google (<tex-math>+8
       <title>Simultaneous Speech Translation for Live Subtitling: from Delay to Display</title>
       <author><first>Alina</first><last>Karakanta</last></author>
       <author><first>Sara</first><last>Papi</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <pages>35-48</pages>
       <url hash="3ecf36a9">2021.mtsummit-asltrw.4</url>
@@ -402,7 +402,7 @@ Our models outperform massively multilingual models such as Google (<tex-math>+8
       <author><first>Frankie</first><last>Picron</last></author>
       <author><first>Davy</first><last>Van Landuyt</last></author>
       <author><first>Tina</first><last>Sioen</last></author>
-      <author><first>Annelies</first><last>Braffort</last></author>
+      <author id="annelies-braffort"><first>Annelies</first><last>Braffort</last></author>
       <author><first>Michael</first><last>Filhol</last></author>
       <author><first>Sarah</first><last>Ebling</last></author>
       <author><first>Thomas</first><last>Hanke</last></author>
@@ -422,7 +422,7 @@ Our models outperform massively multilingual models such as Google (<tex-math>+8
       <author><first>Amelie</first><last>Unger</last></author>
       <author><first>Kristoffer</first><last>Waldow</last></author>
       <author><first>Sonja</first><last>Wecker</last></author>
-      <author><first>Elisabeth</first><last>André</last></author>
+      <author id="elisabeth-andre"><first>Elisabeth</first><last>André</last></author>
       <author><first>Stephan</first><last>Busemann</last></author>
       <author><first>Christian</first><last>Dold</last></author>
       <author><first>Arnulph</first><last>Fuhrmann</last></author>
@@ -570,8 +570,8 @@ Our models outperform massively multilingual models such as Google (<tex-math>+8
     <paper id="6">
       <title>Corpus Creation and Evaluation for Speech-to-Text and Speech Translation</title>
       <author><first>Corey</first><last>Miller</last></author>
-      <author><first>Evelyne</first><last>Tzoukermann</last></author>
-      <author><first>Jennifer</first><last>Doyon</last></author>
+      <author id="evelyne-tzoukermann"><first>Evelyne</first><last>Tzoukermann</last></author>
+      <author id="jennifer-doyon"><first>Jennifer</first><last>Doyon</last></author>
       <author><first>Elizabeth</first><last>Mallard</last></author>
       <pages>44-53</pages>
       <url hash="a6d2393f">2021.mtsummit-up.6</url>
@@ -581,7 +581,7 @@ Our models outperform massively multilingual models such as Google (<tex-math>+8
     <paper id="7">
       <title>From Research to Production: Fine-Grained Analysis of Terminology Integration</title>
       <author><first>Toms</first><last>Bergmanis</last></author>
-      <author><first>Mārcis</first><last>Pinnis</last></author>
+      <author id="marcis-pinnis"><first>Mārcis</first><last>Pinnis</last></author>
       <author><first>Paula</first><last>Reichenberg</last></author>
       <pages>54-77</pages>
       <abstract>Dynamic terminology integration in neural machine translation (NMT) is a sought-after feature of computer-aided translation tools among language service providers and small to medium businesses. Despite the recent surge in research on terminology integration in NMT, it still is seldom or inadequately supported in commercial machine translation solutions. In this presentation, we will share our experience of developing and deploying terminology integration capabilities for NMT systems in production. We will look at the three core tasks of terminology integration: terminology management, terminology identification, and translation with terminology. This talk will be insightful for NMT system developers, translators, terminologists, and anyone interested in translation projects.</abstract>
@@ -591,7 +591,7 @@ Our models outperform massively multilingual models such as Google (<tex-math>+8
     <paper id="8">
       <title>Glossary functionality in commercial machine translation: does it help? A first step to identify best practices for a language service provider</title>
       <author><first>Randy</first><last>Scansani</last></author>
-      <author><first>Loïc</first><last>Dugast</last></author>
+      <author id="loic-dugast"><first>Loïc</first><last>Dugast</last></author>
       <pages>78-88</pages>
       <url hash="fae6fc1a">2021.mtsummit-up.8</url>
       <abstract>Recently, a number of commercial Machine Translation (MT) providers have started to offer glossary features allowing users to enforce terminology into the output of a generic model. However, to the best of our knowledge it is not clear how such features would impact terminology accuracy and the overall quality of the output. The present contribution aims at providing a first insight into the performance of the glossary-enhanced generic models offered by four providers. Our tests involve two different domains and language pairs, i.e. Sportswear En–Fr and Industrial Equipment De–En. The output of each generic model and of the glossaryenhanced one will be evaluated relying on Translation Error Rate (TER) to take into account the overall output quality and on accuracy to assess the compliance with the glossary. This is followed by a manual evaluation. The present contribution mainly focuses on understanding how these glossary features can be fruitfully exploited by language service providers (LSPs), especially in a scenario in which a customer glossary is already available and is added to the generic model as is.</abstract>
@@ -668,7 +668,7 @@ Our models outperform massively multilingual models such as Google (<tex-math>+8
       <title>Field Experiments of Real Time Foreign News Distribution Powered by <fixed-case>MT</fixed-case></title>
       <author><first>Keiji</first><last>Yasuda</last></author>
       <author><first>Ichiro</first><last>Yamada</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Hideki</first><last>Tanaka</last></author>
       <author><first>Hidehiro</first><last>Asaka</last></author>
       <author><first>Takeshi</first><last>Anzai</last></author>
@@ -732,16 +732,16 @@ Our models outperform massively multilingual models such as Google (<tex-math>+8
     </paper>
     <paper id="23">
       <title>Neural Translation for <fixed-case>E</fixed-case>uropean <fixed-case>U</fixed-case>nion (<fixed-case>NTEU</fixed-case>)</title>
-      <author><first>Mercedes</first><last>García-Martínez</last></author>
+      <author id="mercedes-garcia-martinez"><first>Mercedes</first><last>García-Martínez</last></author>
       <author><first>Laurent</first><last>Bié</last></author>
       <author><first>Aleix</first><last>Cerdà</last></author>
       <author><first>Amando</first><last>Estela</last></author>
       <author><first>Manuel</first><last>Herranz</last></author>
-      <author><first>Rihards</first><last>Krišlauks</last></author>
+      <author id="rihards-krislauks"><first>Rihards</first><last>Krišlauks</last></author>
       <author><first>Maite</first><last>Melero</last></author>
       <author><first>Tony</first><last>O’Dowd</last></author>
       <author><first>Sinead</first><last>O’Gorman</last></author>
-      <author><first>Marcis</first><last>Pinnis</last></author>
+      <author id="marcis-pinnis"><first>Marcis</first><last>Pinnis</last></author>
       <author><first>Artūrs</first><last>Stafanovič</last></author>
       <author><first>Riccardo</first><last>Superbo</last></author>
       <author><first>Artūrs</first><last>Vasiļevskis</last></author>
@@ -780,7 +780,7 @@ Our models outperform massively multilingual models such as Google (<tex-math>+8
     </paper>
     <paper id="26">
       <title>Using speech technology in the translation process workflow in international organizations: A quantitative and qualitative study</title>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Jeevanthi</first><last>Liyanapathirana</last></author>
       <pages>382-395</pages>
       <abstract>In international organizations, the growing demand for translations has increased the need for post-editing. Different studies show that automatic speech recognition systems have the potential to increase the productivity of the translation process as well as the quality. In this talk, we will explore the possibilities of using speech in the translation process by conducting a post-editing experiment with three professional translators in an international organization. Our experiment consisted of comparing three translation methods: speaking the translation with MT as an inspiration (RESpeaking), post-editing the MT suggestions by typing (PE), and editing the MT suggestion using speech (SPE). BLEU and HTER scores were used to compare the three methods. Our study shows that translators did more edits under condition RES, whereas in SPE, the resulting translations were closer to the reference according to the BLEU score and required less edits. Time taken to translate was the least in SPE followed by PE, RES methods and the translators preferred using speech to typing. These results show the potential of speech when it is coupled with post-editing. To the best of our knowledge, this is the first quantitative study conducted on using post-editing and speech together in large scale international organizations.</abstract>
@@ -836,8 +836,8 @@ Our models outperform massively multilingual models such as Google (<tex-math>+8
       <month>August</month>
       <year>2021</year>
       <editor><first>John</first><last>Ortega</last></editor>
-      <editor><first>Atul Kr.</first><last>Ojha</last></editor>
-      <editor><first>Katharina</first><last>Kann</last></editor>
+      <editor id="atul-kr-ojha"><first>Atul Kr.</first><last>Ojha</last></editor>
+      <editor id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></editor>
       <editor><first>Chao-Hong</first><last>Liu</last></editor>
       <url hash="2510de92">2021.mtsummit-loresmt</url>
       <venue>loresmt</venue>
@@ -852,7 +852,7 @@ Our models outperform massively multilingual models such as Google (<tex-math>+8
       <author><first>Chanjun</first><last>Park</last></author>
       <author><first>Hyeonseok</first><last>Moon</last></author>
       <author><first>Jaehyung</first><last>Seo</last></author>
-      <author><first>Heuiseok</first><last>Lim</last></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last></author>
       <pages>1-10</pages>
       <url hash="b693ee3a">2021.mtsummit-loresmt.1</url>
       <abstract>In quality estimation (QE), the quality of translation can be predicted by referencing the source sentence and the machine translation (MT) output without access to the reference sentence. However, there exists a paradox in that constructing a dataset for creating a QE model requires non-trivial human labor and time, and it may even requires additional effort compared to the cost of constructing a parallel corpus. In this study, to address this paradox and utilize the various applications of QE, even in low-resource languages (LRLs), we propose a method for automatically constructing a pseudo-QE dataset without using human labor. We perform a comparative analysis on the pseudo-QE dataset using multilingual pre-trained language models. As we generate the pseudo dataset, we conduct experiments using various external machine translators as test sets to verify the accuracy of the results objectively. Also, the experimental results show that multilingual BART demonstrates the best performance, and we confirm the applicability of QE in LRLs using pseudo-QE dataset construction methods.</abstract>
@@ -879,7 +879,7 @@ Our models outperform massively multilingual models such as Google (<tex-math>+8
     <paper id="4">
       <title>Active Learning for Massively Parallel Translation of Constrained Text into Low Resource Languages</title>
       <author><first>Zhong</first><last>Zhou</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>32-43</pages>
       <url hash="31d3d9e9">2021.mtsummit-loresmt.4</url>
       <abstract>We translate a closed text that is known in advance and available in many languages into a new and severely low resource language. Most human translation efforts adopt a portionbased approach to translate consecutive pages/chapters in order, which may not suit machine translation. We compare the portion-based approach that optimizes coherence of the text locally with the random sampling approach that increases coverage of the text globally. Our results show that the random sampling approach performs better. When training on a seed corpus of ∼1,000 lines from the Bible and testing on the rest of the Bible (∼30,000 lines), random sampling gives a performance gain of +11.0 BLEU using English as a simulated low resource language, and +4.9 BLEU using Eastern Pokomchi, a Mayan language. Furthermore, we compare three ways of updating machine translation models with increasing amount of human post-edited data through iterations. We find that adding newly post-edited data to training after vocabulary update without self-supervision performs the best. We propose an algorithm for human and machine to work together seamlessly to translate a closed text into a severely low resource language.</abstract>
@@ -902,7 +902,7 @@ Our models outperform massively multilingual models such as Google (<tex-math>+8
       <author><first>Caitlin</first><last>Smith</last></author>
       <author><first>Eric</first><last>Rosen</last></author>
       <author><first>Asli</first><last>Celikyilmaz</last></author>
-      <author><first>R. Thomas</first><last>McCoy</last></author>
+      <author id="r-thomas-mccoy"><first>R. Thomas</first><last>McCoy</last></author>
       <author><first>Yichen</first><last>Jiang</last></author>
       <author><first>Coleman</first><last>Haley</last></author>
       <author><first>Roland</first><last>Fernandez</last></author>
@@ -939,7 +939,7 @@ Our models outperform massively multilingual models such as Google (<tex-math>+8
       <author><first>Sahinur Rahman</first><last>Laskar</last></author>
       <author><first>Abdullah</first><last>Faiz Ur Rahman Khilji Darsh Kaushik</last></author>
       <author><first>Partha</first><last>Pakray</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>89-95</pages>
       <url hash="a69fcce0">2021.mtsummit-loresmt.9</url>
       <abstract>In machine translation, corpus preparation is one of the crucial tasks, particularly for lowresource pairs. In multilingual countries like India, machine translation plays a vital role in communication among people with various linguistic backgrounds. There are available online automatic translation systems by Google and Microsoft which include various languages which lack support for the Khasi language, which can hence be considered lowresource. This paper overviews the development of EnKhCorp1.0, a corpus for English–Khasi pair, and implemented baseline systems for EnglishtoKhasi and KhasitoEnglish translation based on the neural machine translation approach.</abstract>
@@ -948,7 +948,7 @@ Our models outperform massively multilingual models such as Google (<tex-math>+8
     <paper id="10">
       <title>Zero-Shot Neural Machine Translation with Self-Learning Cycle</title>
       <author><first>Surafel M.</first><last>Lakew</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <pages>96-113</pages>
       <url hash="92554ecc">2021.mtsummit-loresmt.10</url>
@@ -971,7 +971,7 @@ Our models outperform massively multilingual models such as Google (<tex-math>+8
     <paper id="12">
       <title>A3-108 Machine Translation System for <fixed-case>L</fixed-case>o<fixed-case>R</fixed-case>es<fixed-case>MT</fixed-case> Shared Task @<fixed-case>MT</fixed-case> Summit 2021 Conference</title>
       <author><first>Saumitra</first><last>Yadav</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>124-128</pages>
       <url hash="7640ab5e">2021.mtsummit-loresmt.12</url>
       <abstract>In this paper, we describe our submissions for LoResMT Shared Task @MT Summit 2021 Conference. We built statistical translation systems in each direction for English ⇐⇒ Marathi language pair. This paper outlines initial baseline experiments with various tokenization schemes to train models. Using optimal tokenization scheme we create synthetic data and further train augmented dataset to create more statistical models. Also, we reorder English to match Marathi syntax to further train another set of baseline and data augmented models using various tokenization schemes. We report configuration of the submitted systems and results produced by them.</abstract>
@@ -1013,7 +1013,7 @@ Our models outperform massively multilingual models such as Google (<tex-math>+8
     <paper id="16">
       <title><fixed-case>E</fixed-case>nglish-<fixed-case>M</fixed-case>arathi Neural Machine Translation for <fixed-case>L</fixed-case>o<fixed-case>R</fixed-case>es<fixed-case>MT</fixed-case> 2021</title>
       <author><first>Vandan</first><last>Mujadia</last></author>
-      <author><first>Dipti Misra</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></author>
       <pages>151-157</pages>
       <url hash="91f17898">2021.mtsummit-loresmt.16</url>
       <abstract>In this paper, we (team - oneNLP-IIITH) describe our Neural Machine Translation approaches for English-Marathi (both direction) for LoResMT-20211 . We experimented with transformer based Neural Machine Translation and explored the use of different linguistic features like POS and Morph on subword unit for both English-Marathi and Marathi-English. In addition, we have also explored forward and backward translation using web-crawled monolingual data. We obtained 22.2 (overall 2 nd) and 31.3 (overall 1 st) BLEU scores for English-Marathi and Marathi-English on respectively</abstract>
@@ -1023,7 +1023,7 @@ Our models outperform massively multilingual models such as Google (<tex-math>+8
       <title>Evaluating the Performance of Back-translation for Low Resource <fixed-case>E</fixed-case>nglish-<fixed-case>M</fixed-case>arathi Language Pair: <fixed-case>CFILT</fixed-case>-<fixed-case>IITB</fixed-case>ombay @ <fixed-case>L</fixed-case>o<fixed-case>R</fixed-case>es<fixed-case>MT</fixed-case> 2021</title>
       <author><first>Aditya</first><last>Jain</last></author>
       <author><first>Shivam</first><last>Mhaskar</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>158-162</pages>
       <url hash="ec6106f8">2021.mtsummit-loresmt.17</url>
       <abstract>In this paper, we discuss the details of the various Machine Translation (MT) systems that we have submitted for the English-Marathi LoResMT task. As a part of this task, we have submitted three different Neural Machine Translation (NMT) systems; a Baseline English-Marathi system, a Baseline Marathi-English system, and an English-Marathi system that is based on the back-translation technique. We explore the performance of these NMT systems between English and Marathi languages, which forms a low resource language pair due to unavailability of sufficient parallel data. We also explore the performance of the back-translation technique when the back-translated data is obtained from NMT systems that are trained on a very less amount of data. From our experiments, we observe that the back-translation technique can help improve the MT quality over the baseline for the English-Marathi language pair.</abstract>
diff --git a/data/xml/2021.mwe.xml b/data/xml/2021.mwe.xml
index b0a4dd9d06..926f052253 100644
--- a/data/xml/2021.mwe.xml
+++ b/data/xml/2021.mwe.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the 17th Workshop on Multiword Expressions (MWE 2021)</booktitle>
       <editor><first>Paul</first><last>Cook</last></editor>
       <editor><first>Jelena</first><last>Mitrović</last></editor>
-      <editor><first>Carla Parra</first><last>Escartín</last></editor>
+      <editor id="carla-parra-escartin"><first>Carla Parra</first><last>Escartín</last></editor>
       <editor><first>Ashwini</first><last>Vaidya</last></editor>
       <editor><first>Petya</first><last>Osenova</last></editor>
       <editor><first>Shiva</first><last>Taslimipoor</last></editor>
@@ -33,7 +33,7 @@
     <paper id="2">
       <title>Where Do Aspectual Variants of Light Verb Constructions Belong?</title>
       <author><first>Aggeliki</first><last>Fotopoulou</last></author>
-      <author><first>Eric</first><last>Laporte</last></author>
+      <author id="eric-laporte"><first>Eric</first><last>Laporte</last></author>
       <author><first>Takuya</first><last>Nakamura</last></author>
       <pages>2–12</pages>
       <abstract>Expressions with an aspectual variant of a light verb, e.g. ‘take on debt’ vs. ‘have debt’, are frequent in texts but often difficult to classify between verbal idioms, light verb constructions or compositional phrases. We investigate the properties of such expressions with a disputed membership and propose a selection of features that determine more satisfactory boundaries between the three categories in this zone, assigning the expressions to one of them.</abstract>
@@ -76,7 +76,7 @@
     </paper>
     <paper id="6">
       <title>Lexical Semantic Recognition</title>
-      <author><first>Nelson F.</first><last>Liu</last></author>
+      <author id="nelson-f-liu"><first>Nelson F.</first><last>Liu</last></author>
       <author><first>Daniel</first><last>Hershcovich</last></author>
       <author><first>Michael</first><last>Kranzlein</last></author>
       <author><first>Nathan</first><last>Schneider</last></author>
@@ -90,7 +90,7 @@
     <paper id="7">
       <title>Finding <fixed-case>BERT</fixed-case>’s Idiomatic Key</title>
       <author><first>Vasudevan</first><last>Nedumpozhimana</last></author>
-      <author><first>John</first><last>Kelleher</last></author>
+      <author id="john-kelleher"><first>John</first><last>Kelleher</last></author>
       <pages>57–62</pages>
       <abstract>Sentence embeddings encode information relating to the usage of idioms in a sentence. This paper reports a set of experiments that combine a probing methodology with input masking to analyse where in a sentence this idiomatic information is taken from, and what form it takes. Our results indicate that BERT’s idiomatic key is primarily found within an idiomatic expression, but also draws on information from the surrounding context. Also, BERT can distinguish between the disruption in a sentence caused by words missing and the incongruity caused by idiomatic usage.</abstract>
       <url hash="28777a72">2021.mwe-1.7</url>
diff --git a/data/xml/2021.naacl.xml b/data/xml/2021.naacl.xml
index 12ba7ee475..5adc0f0989 100644
--- a/data/xml/2021.naacl.xml
+++ b/data/xml/2021.naacl.xml
@@ -5,8 +5,8 @@
       <booktitle>Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</booktitle>
       <editor><first>Kristina</first><last>Toutanova</last></editor>
       <editor><first>Anna</first><last>Rumshisky</last></editor>
-      <editor><first>Luke</first><last>Zettlemoyer</last></editor>
-      <editor><first>Dilek</first><last>Hakkani-Tur</last></editor>
+      <editor id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></editor>
+      <editor id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></editor>
       <editor><first>Iz</first><last>Beltagy</last></editor>
       <editor><first>Steven</first><last>Bethard</last></editor>
       <editor><first>Ryan</first><last>Cotterell</last></editor>
@@ -89,7 +89,7 @@
       <author><first>Heng</first><last>Ji</last></author>
       <author><first>Qiang</first><last>Ning</last></author>
       <author><first>Jiawei</first><last>Han</last></author>
-      <author><first>Avi</first><last>Sil</last></author>
+      <author id="avirup-sil"><first>Avi</first><last>Sil</last></author>
       <author><first>Hanghang</first><last>Tong</last></author>
       <author><first>Dan</first><last>Roth</last></author>
       <pages>62–73</pages>
@@ -102,9 +102,9 @@
     <paper id="7">
       <title>Probing Word Translations in the Transformer and Trading Decoder for Encoder Layers</title>
       <author><first>Hongfei</first><last>Xu</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <author><first>Qiuhui</first><last>Liu</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <pages>74–85</pages>
       <abstract>Due to its effectiveness and performance, the Transformer translation model has attracted wide attention, most recently in terms of probing-based approaches. Previous work focuses on using or probing source linguistic features in the encoder. To date, the way word translation evolves in Transformer layers has not yet been investigated. Naively, one might assume that encoder layers capture source information while decoder layers translate. In this work, we show that this is not quite the case: translation already happens progressively in encoder layers and even in the input embeddings. More surprisingly, we find that some of the lower decoder layers do not actually do that much decoding. We show all of this in terms of a probing approach where we project representations of the layer analyzed to the final trained and frozen classifier level of the Transformer decoder to measure word translation accuracy. Our findings motivate and explain a Transformer configuration change: if translation already happens in the encoder layers, perhaps we can increase the number of encoder layers, while decreasing the number of decoder layers, boosting decoding speed, without loss in translation quality? Our experiments show that this is indeed the case: we can increase speed by up to a factor 2.3 with small gains in translation quality, while an 18-4 deep encoder configuration boosts translation quality by +1.42 BLEU (En-De) at a speed-up of 1.4.</abstract>
       <url hash="7c15d4e4">2021.naacl-main.7</url>
@@ -193,10 +193,10 @@
       <author><first>Vilém</first><last>Zouhar</last></author>
       <author><first>Michal</first><last>Novák</last></author>
       <author><first>Matúš</first><last>Žilinec</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <author><first>Mateo</first><last>Obregón</last></author>
-      <author><first>Robin L.</first><last>Hill</last></author>
-      <author><first>Frédéric</first><last>Blain</last></author>
+      <author id="robin-l-hill"><first>Robin L.</first><last>Hill</last></author>
+      <author id="frederic-blain"><first>Frédéric</first><last>Blain</last></author>
       <author><first>Marina</first><last>Fomicheva</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <author><first>Lisa</first><last>Yankovskaya</last></author>
@@ -213,7 +213,7 @@
       <author><first>Christian</first><last>Herold</last></author>
       <author><first>Jan</first><last>Rosendahl</last></author>
       <author><first>Joris</first><last>Vanvinckenroye</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>162–172</pages>
       <abstract>Data filtering for machine translation (MT) describes the task of selecting a subset of a given, possibly noisy corpus with the aim to maximize the performance of an MT system trained on this selected data. Over the years, many different filtering approaches have been proposed. However, varying task definitions and data conditions make it difficult to draw a meaningful comparison. In the present work, we aim for a more systematic approach to the task at hand. First, we analyze the performance of language identification, a tool commonly used for data filtering in the MT community and identify specific weaknesses. Based on our findings, we then propose several novel methods for data filtering, based on cross-lingual word embeddings. We compare our approaches to one of the winning methods from the WMT 2018 shared task on parallel corpus filtering on three real-life, high resource MT tasks. We find that said method, which was performing very strong in the WMT shared task, does not perform well within our more realistic task conditions. While we find that our approaches come out at the top on all three tasks, different variants perform best on different tasks. Further experiments on the WMT 2020 shared task for parallel corpus filtering show that our methods achieve comparable results to the strongest submissions of this campaign.</abstract>
       <url hash="d2ab04a6">2021.naacl-main.15</url>
@@ -225,7 +225,7 @@
       <title>Improving the Lexical Ability of Pretrained Language Models for Unsupervised Neural Machine Translation</title>
       <author><first>Alexandra</first><last>Chronopoulou</last></author>
       <author><first>Dario</first><last>Stojanovski</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>173–180</pages>
       <abstract>Successful methods for unsupervised neural machine translation (UNMT) employ cross-lingual pretraining via self-supervision, often in the form of a masked language modeling or a sequence generation task, which requires the model to align the lexical- and high-level representations of the two languages. While cross-lingual pretraining works for similar languages with abundant corpora, it performs poorly in low-resource and distant languages. Previous research has shown that this is because the representations are not sufficiently aligned. In this paper, we enhance the bilingual masked language model pretraining with lexical-level information by using type-level cross-lingual subword embeddings. Empirical results demonstrate improved performance both on UNMT (up to 4.5 BLEU) and bilingual lexicon induction using our method compared to a UNMT baseline.</abstract>
       <url hash="71a775df">2021.naacl-main.16</url>
@@ -248,7 +248,7 @@
       <title>Counterfactual Data Augmentation for Neural Machine Translation</title>
       <author><first>Qi</first><last>Liu</last></author>
       <author><first>Matt</first><last>Kusner</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
       <pages>187–197</pages>
       <abstract>We propose a data augmentation method for neural machine translation. It works by interpreting language models and phrasal alignment causally. Specifically, it creates augmented parallel translation corpora by generating (path-specific) counterfactual aligned phrases. We generate these by sampling new source phrases from a masked language model, then sampling an aligned counterfactual target phrase by noting that a translation language model can be interpreted as a Gumbel-Max Structural Causal Model (Oberst and Sontag, 2019). Compared to previous work, our method takes both context and alignment into account to maintain the symmetry between source and target sequences. Experiments on IWSLT’15 English → Vietnamese, WMT’17 English → German, WMT’18 English → Turkish, and WMT’19 robust English → French show that the method can improve the performance of translation, backtranslation and translation robustness.</abstract>
       <url hash="f1cab7c2">2021.naacl-main.18</url>
@@ -262,7 +262,7 @@
       <author><first>Isidora</first><last>Tourni</last></author>
       <author><first>Mohammad Sadegh</first><last>Rasooli</last></author>
       <author><first>Chris</first><last>Callison-Burch</last></author>
-      <author><first>Derry Tanti</first><last>Wijaya</last></author>
+      <author id="derry-tanti-wijaya"><first>Derry Tanti</first><last>Wijaya</last></author>
       <pages>198–209</pages>
       <abstract>Neural Machine Translation (NMT) models have been observed to produce poor translations when there are few/no parallel sentences to train the models. In the absence of parallel data, several approaches have turned to the use of images to learn translations. Since images of words, e.g., horse may be unchanged across languages, translations can be identified via images associated with words in different languages that have a high degree of visual similarity. However, translating via images has been shown to improve upon text-only models only marginally. To better understand when images are useful for translation, we study image translatability of words, which we define as the translatability of words via images, by measuring intra- and inter-cluster similarities of image representations of words that are translations of each other. We find that images of words are not always invariant across languages, and that language pairs with shared culture, meaning having either a common language family, ethnicity or religion, have improved image translatability (i.e., have more similar images for similar words) compared to its converse, regardless of their geographic proximity. In addition, in line with previous works that show images help more in translating concrete words, we found that concrete words have improved image translatability compared to abstract ones.</abstract>
       <url hash="efb60ccb">2021.naacl-main.19</url>
@@ -289,7 +289,7 @@
       <title>A Million Tweets Are Worth a Few Points: Tuning Transformers for Customer Service Tasks</title>
       <author><first>Amir</first><last>Hadifar</last></author>
       <author><first>Sofie</first><last>Labat</last></author>
-      <author><first>Veronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Veronique</first><last>Hoste</last></author>
       <author><first>Chris</first><last>Develder</last></author>
       <author><first>Thomas</first><last>Demeester</last></author>
       <pages>220–225</pages>
@@ -366,7 +366,7 @@
       <author><first>Dingmin</first><last>Wang</last></author>
       <author><first>Chenghua</first><last>Lin</last></author>
       <author><first>Qi</first><last>Liu</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <pages>289–295</pages>
       <abstract>We present a fast and scalable architecture called Explicit Modular Decomposition (EMD), in which we incorporate both classification-based and extraction-based methods and design four modules (for clas- sification and sequence labelling) to jointly extract dialogue states. Experimental results based on the MultiWoz 2.0 dataset validates the superiority of our proposed model in terms of both complexity and scalability when compared to the state-of-the-art methods, especially in the scenario of multi-domain dialogues entangled with many turns of utterances.</abstract>
       <url hash="d8123d89">2021.naacl-main.27</url>
@@ -428,7 +428,7 @@
       <author><first>Julian</first><last>Eisenschlos</last></author>
       <author><first>Bhuwan</first><last>Dhingra</last></author>
       <author><first>Jannis</first><last>Bulian</last></author>
-      <author><first>Benjamin</first><last>Börschinger</last></author>
+      <author id="benjamin-borschinger"><first>Benjamin</first><last>Börschinger</last></author>
       <author><first>Jordan</first><last>Boyd-Graber</last></author>
       <pages>352–365</pages>
       <abstract>We release FoolMeTwice (FM2 for short), a large dataset of challenging entailment pairs collected through a fun multi-player game. Gamification encourages adversarial examples, drastically lowering the number of examples that can be solved using “shortcuts” compared to other popular entailment datasets. Players are presented with two tasks. The first task asks the player to write a plausible claim based on the evidence from a Wikipedia page. The second one shows two plausible claims written by other players, one of which is false, and the goal is to identify it before the time runs out. Players “pay” to see clues retrieved from the evidence pool: the more evidence the player needs, the harder the claim. Game-play between motivated players leads to diverse strategies for crafting claims, such as temporal inference and diverting to unrelated evidence, and results in higher quality data for the entailment and evidence retrieval tasks. We open source the dataset and the game code.</abstract>
@@ -464,7 +464,7 @@
     <paper id="35">
       <title>Text Generation from Discourse Representation Structures</title>
       <author><first>Jiangming</first><last>Liu</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <author><first>Mirella</first><last>Lapata</last></author>
       <pages>397–415</pages>
       <abstract>We propose neural models to generate text from formal meaning representations based on Discourse Representation Structures (DRSs). DRSs are document-level representations which encode rich semantic detail pertaining to rhetorical relations, presupposition, and co-reference within and across sentences. We formalize the task of neural DRS-to-text generation and provide modeling solutions for the problems of condition ordering and variable naming which render generation from DRSs non-trivial. Our generator relies on a novel sibling treeLSTM model which is able to accurately represent DRS structures and is more generally suited to trees with wide branches. We achieve competitive performance (59.48 BLEU) on the GMB benchmark against several strong baselines.</abstract>
@@ -491,7 +491,7 @@
     <paper id="37">
       <title><fixed-case>DART</fixed-case>: Open-Domain Structured Data Record to Text Generation</title>
       <author><first>Linyong</first><last>Nan</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <author><first>Rui</first><last>Zhang</last></author>
       <author><first>Amrit</first><last>Rau</last></author>
       <author><first>Abhinand</first><last>Sivaprasad</last></author>
@@ -510,7 +510,7 @@
       <author><first>Ankit</first><last>Gupta</last></author>
       <author><first>Tao</first><last>Yu</last></author>
       <author><first>Yi Chern</first><last>Tan</last></author>
-      <author><first>Xi Victoria</first><last>Lin</last></author>
+      <author id="xi-victoria-lin"><first>Xi Victoria</first><last>Lin</last></author>
       <author><first>Caiming</first><last>Xiong</last></author>
       <author><first>Richard</first><last>Socher</last></author>
       <author><first>Nazneen Fatema</first><last>Rajani</last></author>
@@ -525,8 +525,8 @@
       <title>When Being Unseen from m<fixed-case>BERT</fixed-case> is just the Beginning: Handling New Languages With Multilingual Language Models</title>
       <author><first>Benjamin</first><last>Muller</last></author>
       <author><first>Antonios</first><last>Anastasopoulos</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
-      <author><first>Djamé</first><last>Seddah</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
       <pages>448–462</pages>
       <abstract>Transfer learning based on pretraining language models on a large amount of raw data has become a new norm to reach state-of-the-art performance in NLP. Still, it remains unclear how this approach should be applied for unseen languages that are not covered by any available large-scale multilingual language model and for which only a small amount of raw data is generally available. In this work, by comparing multilingual and monolingual models, we show that such models behave in multiple ways on unseen languages. Some languages greatly benefit from transfer learning and behave similarly to closely related high resource languages whereas others apparently do not. Focusing on the latter, we show that this failure to transfer is largely related to the impact of the script used to write such languages. We show that transliterating those languages significantly improves the potential of large-scale multilingual language models on downstream tasks. This result provides a promising direction towards making these massively multilingual models useful for a new set of unseen languages.</abstract>
       <url hash="8ecc9794">2021.naacl-main.38</url>
@@ -537,7 +537,7 @@
     <paper id="39">
       <title>Multi-Adversarial Learning for Cross-Lingual Word Embeddings</title>
       <author><first>Haozhou</first><last>Wang</last></author>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <author><first>Paola</first><last>Merlo</last></author>
       <pages>463–472</pages>
       <abstract>Generative adversarial networks (GANs) have succeeded in inducing cross-lingual word embeddings - maps of matching words across languages - without supervision. Despite these successes, GANs’ performance for the difficult case of distant languages is still not satisfactory. These limitations have been explained by GANs’ incorrect assumption that source and target embedding spaces are related by a single linear mapping and are approximately isomorphic. We assume instead that, especially across distant languages, the mapping is only piece-wise linear, and propose a multi-adversarial learning method. This novel method induces the seed cross-lingual dictionary through multiple mappings, each induced to fit the mapping for one subspace. Our experiments on unsupervised bilingual lexicon induction and cross-lingual document classification show that this method improves performance over previous single-mapping methods, especially for distant languages.</abstract>
@@ -583,7 +583,7 @@
       <author><first>Subhabrata</first><last>Mukherjee</last></author>
       <author><first>Milad</first><last>Shokouhi</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
-      <author><first>Ahmed Hassan</first><last>Awadallah</last></author>
+      <author id="ahmed-hassan"><first>Ahmed Hassan</first><last>Awadallah</last></author>
       <pages>499–511</pages>
       <abstract>The combination of multilingual pre-trained representations and cross-lingual transfer learning is one of the most effective methods for building functional NLP systems for low-resource languages. However, for extremely low-resource languages without large-scale monolingual corpora for pre-training or sufficient annotated data for fine-tuning, transfer learning remains an understudied and challenging task. Moreover, recent work shows that multilingual representations are surprisingly disjoint across languages, bringing additional challenges for transfer onto extremely low-resource languages. In this paper, we propose MetaXL, a meta-learning based framework that learns to transform representations judiciously from auxiliary languages to a target one and brings their representation spaces closer for effective transfer. Extensive experiments on real-world low-resource languages – without access to large-scale monolingual corpora or large amounts of labeled data – for tasks like cross-lingual sentiment analysis and named entity recognition show the effectiveness of our approach. Code for MetaXL is publicly available at github.com/microsoft/MetaXL.</abstract>
       <url hash="6382be74">2021.naacl-main.42</url>
@@ -593,7 +593,7 @@
     <paper id="43">
       <title>Open Domain Question Answering over Tables via Dense Retrieval</title>
       <author><first>Jonathan</first><last>Herzig</last></author>
-      <author><first>Thomas</first><last>Müller</last></author>
+      <author id="thomas-mueller"><first>Thomas</first><last>Müller</last></author>
       <author><first>Syrine</first><last>Krichene</last></author>
       <author><first>Julian</first><last>Eisenschlos</last></author>
       <pages>512–519</pages>
@@ -609,7 +609,7 @@
       <author><first>Svitlana</first><last>Vakulenko</last></author>
       <author><first>Zhucheng</first><last>Tu</last></author>
       <author><first>Shayne</first><last>Longpre</last></author>
-      <author><first>Stephen</first><last>Pulman</last></author>
+      <author id="stephen-pulman"><first>Stephen</first><last>Pulman</last></author>
       <author><first>Srinivas</first><last>Chappidi</last></author>
       <pages>520–534</pages>
       <abstract>We introduce a new dataset for Question Rewriting in Conversational Context (QReCC), which contains 14K conversations with 80K question-answer pairs. The task in QReCC is to find answers to conversational questions within a collection of 10M web pages (split into 54M passages). Answers to questions in the same conversation may be distributed across several web pages. QReCC provides annotations that allow us to train and evaluate individual subtasks of question rewriting, passage retrieval and reading comprehension required for the end-to-end conversational question answering (QA) task. We report the effectiveness of a strong baseline approach that combines the state-of-the-art model for question rewriting, and competitive models for open-domain QA. Our results set the first baseline for the QReCC dataset with F1 of 19.10, compared to the human upper bound of 75.45, indicating the difficulty of the setup and a large room for improvement.</abstract>
@@ -636,7 +636,7 @@
       <title><fixed-case>XOR</fixed-case> <fixed-case>QA</fixed-case>: Cross-lingual Open-Retrieval Question Answering</title>
       <author><first>Akari</first><last>Asai</last></author>
       <author><first>Jungo</first><last>Kasai</last></author>
-      <author><first>Jonathan</first><last>Clark</last></author>
+      <author id="jonathan-h-clark"><first>Jonathan</first><last>Clark</last></author>
       <author><first>Kenton</first><last>Lee</last></author>
       <author><first>Eunsol</first><last>Choi</last></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last></author>
@@ -698,7 +698,7 @@
       <title>Preregistering <fixed-case>NLP</fixed-case> research</title>
       <author><first>Emiel</first><last>van Miltenburg</last></author>
       <author><first>Chris</first><last>van der Lee</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <pages>613–623</pages>
       <abstract>Preregistration refers to the practice of specifying what you are going to do, and what you expect to find in your study, before carrying out the study. This practice is increasingly common in medicine and psychology, but is rarely discussed in NLP. This paper discusses preregistration in more detail, explores how NLP researchers could preregister their work, and presents several preregistration questions for different kinds of studies. Finally, we argue in favour of registered reports, which could provide firmer grounds for slow science in NLP research. The goal of this paper is to elicit a discussion in the NLP community, which we hope to synthesise into a general NLP preregistration form in future research.</abstract>
       <url hash="d4531582">2021.naacl-main.51</url>
@@ -775,13 +775,13 @@
     </paper>
     <paper id="57">
       <title>Improving Zero and Few-Shot Abstractive Summarization with Intermediate Fine-tuning and Data Augmentation</title>
-      <author><first>Alexander</first><last>Fabbri</last></author>
+      <author id="alexander-richard-fabbri"><first>Alexander</first><last>Fabbri</last></author>
       <author><first>Simeng</first><last>Han</last></author>
       <author><first>Haoyuan</first><last>Li</last></author>
       <author><first>Haoran</first><last>Li</last></author>
       <author><first>Marjan</first><last>Ghazvininejad</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <author><first>Yashar</first><last>Mehdad</last></author>
       <pages>704–717</pages>
       <abstract>Models pretrained with self-supervised objectives on large text corpora achieve state-of-the-art performance on English text summarization tasks. However, these models are typically fine-tuned on hundreds of thousands of data points, an infeasible requirement when applying summarization to new, niche domains. In this work, we introduce a novel and generalizable method, called WikiTransfer, for fine-tuning pretrained models for summarization in an unsupervised, dataset-specific manner. WikiTransfer fine-tunes pretrained models on pseudo-summaries, produced from generic Wikipedia data, which contain characteristics of the target dataset, such as the length and level of abstraction of the desired summaries. WikiTransfer models achieve state-of-the-art, zero-shot abstractive summarization performance on the CNN-DailyMail dataset and demonstrate the effectiveness of our approach on three additional diverse datasets. These models are more robust to noisy data and also achieve better or comparable few-shot performance using 10 and 100 training examples when compared to few-shot transfer from other summarization datasets. To further boost performance, we employ data augmentation via round-trip translation as well as introduce a regularization term for improved few-shot transfer. To understand the role of dataset aspects in transfer performance and the quality of the resulting output summaries, we further study the effect of the components of our unsupervised fine-tuning data and analyze few-shot performance using both automatic and human evaluation.</abstract>
@@ -797,7 +797,7 @@
       <author><first>Ruochen</first><last>Xu</last></author>
       <author><first>Qingkai</first><last>Zeng</last></author>
       <author><first>Michael</first><last>Zeng</last></author>
-      <author><first>Xuedong</first><last>Huang</last></author>
+      <author id="xuedong-huang"><first>Xuedong</first><last>Huang</last></author>
       <author><first>Meng</first><last>Jiang</last></author>
       <pages>718–733</pages>
       <abstract>Automatic abstractive summaries are found to often distort or fabricate facts in the article. This inconsistency between summary and original text has seriously impacted its applicability. We propose a fact-aware summarization model FASum to extract and integrate factual relations into the summary generation process via graph attention. We then design a factual corrector model FC to automatically correct factual errors from summaries generated by existing systems. Empirical results show that the fact-aware summarization can produce abstractive summaries with higher factual consistency compared with existing systems, and the correction model improves the factual consistency of given summaries via modifying only a few keywords.</abstract>
@@ -814,7 +814,7 @@
       <author><first>Yuan</first><last>Zhang</last></author>
       <author><first>Xinya</first><last>Du</last></author>
       <author><first>Panupong</first><last>Pasupat</last></author>
-      <author id="qi-li"><first>Qi</first><last>Li</last></author>
+      <author><first>Qi</first><last>Li</last></author>
       <pages>734–749</pages>
       <abstract>Few-shot learning arises in important practical scenarios, such as when a natural language understanding system needs to learn new semantic labels for an emerging, resource-scarce domain. In this paper, we explore retrieval-based methods for intent classification and slot filling tasks in few-shot settings. Retrieval-based methods make predictions based on labeled examples in the retrieval index that are similar to the input, and thus can adapt to new domains simply by changing the index without having to retrain the model. However, it is non-trivial to apply such methods on tasks with a complex label space like slot filling. To this end, we propose a span-level retrieval method that learns similar contextualized representations for spans with the same label via a novel batch-softmax objective. At inference time, we use the labels of the retrieved spans to construct the final structure with the highest aggregated score. Our method outperforms previous systems in various few-shot settings on the CLINC and SNIPS benchmarks.</abstract>
       <url hash="e9540035">2021.naacl-main.59</url>
@@ -826,7 +826,7 @@
       <title>“Nice Try, Kiddo”: Investigating Ad Hominems in Dialogue Responses</title>
       <author><first>Emily</first><last>Sheng</last></author>
       <author><first>Kai-Wei</first><last>Chang</last></author>
-      <author><first>Prem</first><last>Natarajan</last></author>
+      <author id="prem-natarajan"><first>Prem</first><last>Natarajan</last></author>
       <author><first>Nanyun</first><last>Peng</last></author>
       <pages>750–767</pages>
       <abstract>Ad hominem attacks are those that target some feature of a person’s character instead of the position the person is maintaining. These attacks are harmful because they propagate implicit biases and diminish a person’s credibility. Since dialogue systems respond directly to user input, it is important to study ad hominems in dialogue responses. To this end, we propose categories of ad hominems, compose an annotated dataset, and build a classifier to analyze human and dialogue system responses to English Twitter posts. We specifically compare responses to Twitter topics about marginalized communities (#BlackLivesMatter, #MeToo) versus other topics (#Vegan, #WFH), because the abusive language of ad hominems could further amplify the skew of power away from marginalized populations. Furthermore, we propose a constrained decoding technique that uses salient n-gram similarity as a soft constraint for top-k sampling to reduce the amount of ad hominems generated. Our results indicate that 1) responses from both humans and DialoGPT contain more ad hominems for discussions around marginalized communities, 2) different quantities of ad hominems in the training data can influence the likelihood of generating ad hominems, and 3) we can use constrained decoding techniques to reduce ad hominems in generated dialogue responses.</abstract>
@@ -838,7 +838,7 @@
     <paper id="61">
       <title>Human-like informative conversations: Better acknowledgements using conditional mutual information</title>
       <author><first>Ashwin</first><last>Paranjape</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <pages>768–781</pages>
       <abstract>This work aims to build a dialogue agent that can weave new factual content into conversations as naturally as humans. We draw insights from linguistic principles of conversational analysis and annotate human-human conversations from the Switchboard Dialog Act Corpus to examine humans strategies for acknowledgement, transition, detail selection and presentation. When current chatbots (explicitly provided with new factual content) introduce facts into a conversation, their generated responses do not acknowledge the prior turns. This is because models trained with two contexts - new factual content and conversational history - generate responses that are non-specific w.r.t. one of the contexts, typically the conversational history. We show that specificity w.r.t. conversational history is better captured by pointwise conditional mutual information (pcmi_h) than by the established use of pointwise mutual information (pmi). Our proposed method, Fused-PCMI, trades off pmi for pcmi_h and is preferred by humans for overall quality over the Max-PMI baseline 60% of the time. Human evaluators also judge responses with higher pcmi_h better at acknowledgement 74% of the time. The results demonstrate that systems mimicking human conversational traits (in this case acknowledgement) improve overall quality and more broadly illustrate the utility of linguistic principles in improving dialogue agents.</abstract>
       <url hash="84b18320">2021.naacl-main.61</url>
@@ -875,7 +875,7 @@
       <author><first>Jack</first><last>Urbanek</last></author>
       <author><first>Margaret</first><last>Li</last></author>
       <author><first>Arthur</first><last>Szlam</last></author>
-      <author><first>Tim</first><last>Rocktäschel</last></author>
+      <author id="tim-rocktaschel"><first>Tim</first><last>Rocktäschel</last></author>
       <author><first>Jason</first><last>Weston</last></author>
       <pages>807–833</pages>
       <abstract>We seek to create agents that both act and communicate with other agents in pursuit of a goal. Towards this end, we extend LIGHT (Urbanek et al. 2019)—a large-scale crowd-sourced fantasy text-game—with a dataset of quests. These contain natural language motivations paired with in-game goals and human demonstrations; completing a quest might require dialogue or actions (or both). We introduce a reinforcement learning system that (1) incorporates large-scale language modeling-based and commonsense reasoning-based pre-training to imbue the agent with relevant priors; and (2) leverages a factorized action space of action commands and dialogue, balancing between the two. We conduct zero-shot evaluations using held-out human expert demonstrations, showing that our agents are able to act consistently and talk naturally with respect to their motivations.</abstract>
@@ -903,7 +903,7 @@
       <author><first>Giannis</first><last>Karamanolakis</last></author>
       <author><first>Subhabrata</first><last>Mukherjee</last></author>
       <author><first>Guoqing</first><last>Zheng</last></author>
-      <author><first>Ahmed Hassan</first><last>Awadallah</last></author>
+      <author id="ahmed-hassan"><first>Ahmed Hassan</first><last>Awadallah</last></author>
       <pages>845–863</pages>
       <abstract>State-of-the-art deep neural networks require large-scale labeled training data that is often expensive to obtain or not available for many tasks. Weak supervision in the form of domain-specific rules has been shown to be useful in such settings to automatically generate weakly labeled training data. However, learning with weak rules is challenging due to their inherent heuristic and noisy nature. An additional challenge is rule coverage and overlap, where prior work on weak supervision only considers instances that are covered by weak rules, thus leaving valuable unlabeled data behind. In this work, we develop a weak supervision framework (ASTRA) that leverages all the available data for a given task. To this end, we leverage task-specific unlabeled data through self-training with a model (student) that considers contextualized representations and predicts pseudo-labels for instances that may not be covered by weak rules. We further develop a rule attention network (teacher) that learns how to aggregate student pseudo-labels with weak rule labels, conditioned on their fidelity and the underlying context of an instance. Finally, we construct a semi-supervised learning objective for end-to-end training with unlabeled data, domain-specific rules, and a small amount of labeled data. Extensive experiments on six benchmark datasets for text classification demonstrate the effectiveness of our approach with significant improvements over state-of-the-art baselines.</abstract>
       <url hash="d07118b2">2021.naacl-main.66</url>
@@ -953,8 +953,8 @@
     <paper id="70">
       <title>Template Filling with Generative Transformers</title>
       <author><first>Xinya</first><last>Du</last></author>
-      <author><first>Alexander</first><last>Rush</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>909–914</pages>
       <abstract>Template filling is generally tackled by a pipeline of two separate supervised systems – one for role-filler extraction and another for template/event recognition. Since pipelines consider events in isolation, they can suffer from error propagation. We introduce a framework based on end-to-end generative transformers for this task (i.e., GTT). It naturally models the dependence between entities both within a single event and across the multiple events described in a document. Experiments demonstrate that this framework substantially outperforms pipeline-based approaches, and other neural end-to-end baselines that do not model between-event dependencies. We further show that our framework specifically improves performance on documents containing multiple events.</abstract>
       <url hash="ced24048">2021.naacl-main.70</url>
@@ -965,7 +965,7 @@
     <paper id="71">
       <title>Towards Interpreting and Mitigating Shortcut Learning Behavior of <fixed-case>NLU</fixed-case> models</title>
       <author><first>Mengnan</first><last>Du</last></author>
-      <author><first>Varun</first><last>Manjunatha</last></author>
+      <author id="varun-manjunatha"><first>Varun</first><last>Manjunatha</last></author>
       <author><first>Rajiv</first><last>Jain</last></author>
       <author><first>Ruchi</first><last>Deshpande</last></author>
       <author><first>Franck</first><last>Dernoncourt</last></author>
@@ -985,7 +985,7 @@
       <author><first>Jiaji</first><last>Huang</last></author>
       <author><first>Xingyu</first><last>Cai</last></author>
       <author><first>Jiahong</first><last>Yuan</last></author>
-      <author><first>Kenneth</first><last>Church</last></author>
+      <author id="kenneth-church"><first>Kenneth</first><last>Church</last></author>
       <pages>930–945</pages>
       <abstract>Multi-layer multi-head self-attention mechanism is widely applied in modern neural language models. Attention redundancy has been observed among attention heads but has not been deeply studied in the literature. Using BERT-base model as an example, this paper provides a comprehensive study on attention redundancy which is helpful for model interpretation and model compression. We analyze the attention redundancy with Five-Ws and How. (What) We define and focus the study on redundancy matrices generated from pre-trained and fine-tuned BERT-base model for GLUE datasets. (How) We use both token-based and sentence-based distance functions to measure the redundancy. (Where) Clear and similar redundancy patterns (cluster structure) are observed among attention heads. (When) Redundancy patterns are similar in both pre-training and fine-tuning phases. (Who) We discover that redundancy patterns are task-agnostic. Similar redundancy patterns even exist for randomly generated token sequences. (“Why”) We also evaluate influences of the pre-training dropout ratios on attention redundancy. Based on the phase-independent and task-agnostic attention redundancy patterns, we propose a simple zero-shot pruning method as a case study. Experiments on fine-tuning GLUE tasks verify its effectiveness. The comprehensive analyses on attention redundancy make model understanding and zero-shot model pruning promising.</abstract>
       <url hash="17f0ebc0">2021.naacl-main.72</url>
@@ -999,7 +999,7 @@
       <author><first>Sarthak</first><last>Jain</last></author>
       <author><first>Karl</first><last>Pichotta</last></author>
       <author><first>Yoav</first><last>Goldberg</last></author>
-      <author><first>Byron</first><last>Wallace</last></author>
+      <author id="byron-c-wallace"><first>Byron</first><last>Wallace</last></author>
       <pages>946–959</pages>
       <abstract>Large Transformers pretrained over clinical notes from Electronic Health Records (EHR) have afforded substantial gains in performance on predictive clinical tasks. The cost of training such models (and the necessity of data access to do so) coupled with their utility motivates parameter sharing, i.e., the release of pretrained models such as ClinicalBERT. While most efforts have used deidentified EHR, many researchers have access to large sets of sensitive, non-deidentified EHR with which they might train a BERT model (or similar). Would it be safe to release the weights of such a model if they did? In this work, we design a battery of approaches intended to recover Personal Health Information (PHI) from a trained BERT. Specifically, we attempt to recover patient names and conditions with which they are associated. We find that simple probing methods are not able to meaningfully extract sensitive information from BERT trained over the MIMIC-III corpus of EHR. However, more sophisticated “attacks” may succeed in doing so: To facilitate such research, we make our experimental setup and baseline probing models available at <url>https://github.com/elehman16/exposing_patient_data_release</url>.</abstract>
       <url hash="03fc0bc3">2021.naacl-main.73</url>
@@ -1011,7 +1011,7 @@
       <title>Low-Complexity Probing via Finding Subnetworks</title>
       <author><first>Steven</first><last>Cao</last></author>
       <author><first>Victor</first><last>Sanh</last></author>
-      <author><first>Alexander</first><last>Rush</last></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last></author>
       <pages>960–966</pages>
       <abstract>The dominant approach in probing neural networks for linguistic properties is to train a new shallow multi-layer perceptron (MLP) on top of the model’s internal representations. This approach can detect properties encoded in the model, but at the cost of adding new parameters that may learn the task directly. We instead propose a subtractive pruning-based probe, where we find an existing subnetwork that performs the linguistic task of interest. Compared to an MLP, the subnetwork probe achieves both higher accuracy on pre-trained models and lower accuracy on random models, so it is both better at finding properties of interest and worse at learning on its own. Next, by varying the complexity of each probe, we show that subnetwork probing Pareto-dominates MLP probing in that it achieves higher accuracy given any budget of probe complexity. Finally, we analyze the resulting subnetworks across various tasks to locate where each task is encoded, and we find that lower-level tasks are captured in lower layers, reproducing similar findings in past work.</abstract>
       <url hash="b5d87445">2021.naacl-main.74</url>
@@ -1023,7 +1023,7 @@
       <title>An Empirical Comparison of Instance Attribution Methods for <fixed-case>NLP</fixed-case></title>
       <author><first>Pouya</first><last>Pezeshkpour</last></author>
       <author><first>Sarthak</first><last>Jain</last></author>
-      <author><first>Byron</first><last>Wallace</last></author>
+      <author id="byron-c-wallace"><first>Byron</first><last>Wallace</last></author>
       <author><first>Sameer</first><last>Singh</last></author>
       <pages>967–975</pages>
       <abstract>Widespread adoption of deep models has motivated a pressing need for approaches to interpret network outputs and to facilitate model debugging. Instance attribution methods constitute one means of accomplishing these goals by retrieving training instances that (may have) led to a particular prediction. Influence functions (IF; Koh and Liang 2017) provide machinery for doing this by quantifying the effect that perturbing individual train instances would have on a specific test prediction. However, even approximating the IF is computationally expensive, to the degree that may be prohibitive in many cases. Might simpler approaches (e.g., retrieving train examples most similar to a given test point) perform comparably? In this work, we evaluate the degree to which different potential instance attribution agree with respect to the importance of training samples. We find that simple retrieval methods yield training instances that differ from those identified via gradient-based methods (such as IFs), but that nonetheless exhibit desirable characteristics similar to more complex attribution methods. Code for all methods and experiments in this paper is available at: <url>https://github.com/successar/instance_attributions_NLP</url>.</abstract>
@@ -1151,7 +1151,7 @@
       <author><first>Haoming</first><last>Jiang</last></author>
       <author><first>Wendi</first><last>Ren</last></author>
       <author><first>Tuo</first><last>Zhao</last></author>
-      <author><first>Chao</first><last>Zhang</last></author>
+      <author id="chao-zhang-tu"><first>Chao</first><last>Zhang</last></author>
       <pages>1063–1077</pages>
       <abstract>Fine-tuned pre-trained language models (LMs) have achieved enormous success in many natural language processing (NLP) tasks, but they still require excessive labeled data in the fine-tuning stage. We study the problem of fine-tuning pre-trained LMs using only weak supervision, without any labeled data. This problem is challenging because the high capacity of LMs makes them prone to overfitting the noisy labels generated by weak supervision. To address this problem, we develop a contrastive self-training framework, COSINE, to enable fine-tuning LMs with weak supervision. Underpinned by contrastive regularization and confidence-based reweighting, our framework gradually improves model fitting while effectively suppressing error propagation. Experiments on sequence, token, and sentence pair classification tasks show that our model outperforms the strongest baseline by large margins and achieves competitive performance with fully-supervised fine-tuning methods. Our implementation is available on <url>https://github.com/yueyu1030/COSINE</url>.</abstract>
       <url hash="e055f2f3">2021.naacl-main.84</url>
@@ -1201,8 +1201,8 @@
     <paper id="88">
       <title><fixed-case>DR</fixed-case>e<fixed-case>C</fixed-case>a: A General Task Augmentation Strategy for Few-Shot Natural Language Inference</title>
       <author><first>Shikhar</first><last>Murty</last></author>
-      <author><first>Tatsunori B.</first><last>Hashimoto</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="tatsunori-b-hashimoto"><first>Tatsunori B.</first><last>Hashimoto</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <pages>1113–1125</pages>
       <abstract>Meta-learning promises few-shot learners that can adapt to new distributions by repurposing knowledge acquired from previous training. However, we believe meta-learning has not yet succeeded in NLP due to the lack of a well-defined task distribution, leading to attempts that treat datasets as tasks. Such an ad hoc task distribution causes problems of quantity and quality. Since there’s only a handful of datasets for any NLP problem, meta-learners tend to overfit their adaptation mechanism and, since NLP datasets are highly heterogeneous, many learning episodes have poor transfer between their support and query sets, which discourages the meta-learner from adapting. To alleviate these issues, we propose DReCA (Decomposing datasets into Reasoning Categories), a simple method for discovering and using latent reasoning categories in a dataset, to form additional high quality tasks. DReCA works by splitting examples into label groups, embedding them with a finetuned BERT model and then clustering each group into reasoning categories. Across four few-shot NLI problems, we demonstrate that using DReCA improves the accuracy of meta-learners by 1.5-4%</abstract>
       <url hash="ff6f450a">2021.naacl-main.88</url>
@@ -1215,7 +1215,7 @@
       <author><first>Xavier</first><last>Garcia</last></author>
       <author><first>Aditya</first><last>Siddhant</last></author>
       <author><first>Orhan</first><last>Firat</last></author>
-      <author><first>Ankur</first><last>Parikh</last></author>
+      <author id="ankur-parikh"><first>Ankur</first><last>Parikh</last></author>
       <pages>1126–1137</pages>
       <abstract>Unsupervised translation has reached impressive performance on resource-rich language pairs such as English-French and English-German. However, early studies have shown that in more realistic settings involving low-resource, rare languages, unsupervised translation performs poorly, achieving less than 3.0 BLEU. In this work, we show that multilinguality is critical to making unsupervised systems practical for low-resource settings. In particular, we present a single model for 5 low-resource languages (Gujarati, Kazakh, Nepali, Sinhala, and Turkish) to and from English directions, which leverages monolingual and auxiliary parallel data from other high-resource language pairs via a three-stage training scheme. We outperform all current state-of-the-art unsupervised baselines for these languages, achieving gains of up to 14.4 BLEU. Additionally, we outperform strong supervised baselines for various language pairs as well as match the performance of the current state-of-the-art supervised model for Nepali-English. We conduct a series of ablation studies to establish the robustness of our model under different degrees of data quality, as well as to analyze the factors which led to the superior performance of the proposed approach over traditional unsupervised models.</abstract>
       <url hash="6817eaa9">2021.naacl-main.89</url>
@@ -1265,7 +1265,7 @@
       <title>Towards Continual Learning for Multilingual Machine Translation via Vocabulary Substitution</title>
       <author><first>Xavier</first><last>Garcia</last></author>
       <author><first>Noah</first><last>Constant</last></author>
-      <author><first>Ankur</first><last>Parikh</last></author>
+      <author id="ankur-parikh"><first>Ankur</first><last>Parikh</last></author>
       <author><first>Orhan</first><last>Firat</last></author>
       <pages>1184–1192</pages>
       <abstract>We propose a straightforward vocabulary adaptation scheme to extend the language capacity of multilingual machine translation models, paving the way towards efficient continual learning for multilingual machine translation. Our approach is suitable for large-scale datasets, applies to distant languages with unseen scripts, incurs only minor degradation on the translation performance for the original language pairs and provides competitive performance even in the case where we only possess monolingual data for the new languages.</abstract>
@@ -1300,13 +1300,13 @@
     </paper>
     <paper id="96">
       <title>Capturing Row and Column Semantics in Transformer Based Question Answering over Tables</title>
-      <author><first>Michael</first><last>Glass</last></author>
+      <author id="michael-glass"><first>Michael</first><last>Glass</last></author>
       <author><first>Mustafa</first><last>Canim</last></author>
-      <author><first>Alfio</first><last>Gliozzo</last></author>
+      <author id="alfio-gliozzo"><first>Alfio</first><last>Gliozzo</last></author>
       <author><first>Saneem</first><last>Chemmengath</last></author>
       <author><first>Vishwajeet</first><last>Kumar</last></author>
       <author><first>Rishav</first><last>Chakravarti</last></author>
-      <author><first>Avi</first><last>Sil</last></author>
+      <author id="avirup-sil"><first>Avi</first><last>Sil</last></author>
       <author><first>Feifei</first><last>Pan</last></author>
       <author><first>Samarth</first><last>Bharadwaj</last></author>
       <author><first>Nicolas Rodolfo</first><last>Fauceglia</last></author>
@@ -1359,7 +1359,7 @@
       <author><first>Srinivasan</first><last>Iyer</last></author>
       <author><first>Sewon</first><last>Min</last></author>
       <author><first>Yashar</first><last>Mehdad</last></author>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <pages>1280–1287</pages>
       <abstract>State-of-the-art Machine Reading Comprehension (MRC) models for Open-domain Question Answering (QA) are typically trained for span selection using distantly supervised positive examples and heuristically retrieved negative examples. This training scheme possibly explains empirical observations that these models achieve a high recall amongst their top few predictions, but a low overall accuracy, motivating the need for answer re-ranking. We develop a successful re-ranking approach (RECONSIDER) for span-extraction tasks that improves upon the performance of MRC models, even beyond large-scale pre-training. RECONSIDER is trained on positive and negative examples extracted from high confidence MRC model predictions, and uses in-passage span annotations to perform span-focused re-ranking over a smaller candidate set. As a result, RECONSIDER learns to eliminate close false positives, achieving a new extractive state of the art on four QA tasks, with 45.5% Exact Match accuracy on Natural Questions with real user questions, and 61.7% on TriviaQA. We will release all related data, models, and code.</abstract>
       <url hash="6e6d549d">2021.naacl-main.100</url>
@@ -1400,7 +1400,7 @@
       <author><first>Raymond</first><last>Li</last></author>
       <author><first>Dzmitry</first><last>Bahdanau</last></author>
       <author><first>Harm</first><last>de Vries</last></author>
-      <author><first>Chris</first><last>Pal</last></author>
+      <author id="christopher-pal"><first>Chris</first><last>Pal</last></author>
       <pages>1313–1321</pages>
       <abstract>Recent neural text-to-SQL models can effectively translate natural language questions to corresponding SQL queries on unseen databases. Working mostly on the Spider dataset, researchers have proposed increasingly sophisticated solutions to the problem. Contrary to this trend, in this paper we focus on simplifications. We begin by building DuoRAT, a re-implementation of the state-of-the-art RAT-SQL model that unlike RAT-SQL is using only relation-aware or vanilla transformers as the building blocks. We perform several ablation experiments using DuoRAT as the baseline model. Our experiments confirm the usefulness of some techniques and point out the redundancy of others, including structural SQL features and features that link the question with the schema.</abstract>
       <url hash="65e26d11">2021.naacl-main.103</url>
@@ -1426,8 +1426,8 @@
     <paper id="105">
       <title>Structure-Grounded Pretraining for Text-to-<fixed-case>SQL</fixed-case></title>
       <author><first>Xiang</first><last>Deng</last></author>
-      <author><first>Ahmed Hassan</first><last>Awadallah</last></author>
-      <author><first>Christopher</first><last>Meek</last></author>
+      <author id="ahmed-hassan"><first>Ahmed Hassan</first><last>Awadallah</last></author>
+      <author id="christopher-meek"><first>Christopher</first><last>Meek</last></author>
       <author><first>Oleksandr</first><last>Polozov</last></author>
       <author><first>Huan</first><last>Sun</last></author>
       <author><first>Matthew</first><last>Richardson</last></author>
@@ -1443,7 +1443,7 @@
       <author><first>Congying</first><last>Xia</last></author>
       <author><first>Wenpeng</first><last>Yin</last></author>
       <author><first>Yihao</first><last>Feng</last></author>
-      <author><first>Philip</first><last>Yu</last></author>
+      <author id="philip-s-yu"><first>Philip</first><last>Yu</last></author>
       <pages>1351–1360</pages>
       <abstract>Text classification is usually studied by labeling natural language texts with relevant categories from a predefined set. In the real world, new classes might keep challenging the existing system with limited labeled data. The system should be intelligent enough to recognize upcoming new classes with a few examples. In this work, we define a new task in the NLP domain, incremental few-shot text classification, where the system incrementally handles multiple rounds of new classes. For each round, there is a batch of new classes with a few labeled examples per class. Two major challenges exist in this new task: (i) For the learning process, the system should incrementally learn new classes round by round without re-training on the examples of preceding classes; (ii) For the performance, the system should perform well on new classes without much loss on preceding classes. In addition to formulating the new task, we also release two benchmark datasets in the incremental few-shot setting: intent classification and relation classification. Moreover, we propose two entailment approaches, ENTAILMENT and HYBRID, which show promise for solving this novel problem.</abstract>
       <url hash="d046f919">2021.naacl-main.106</url>
@@ -1558,7 +1558,7 @@
       <author><first>Jinlan</first><last>Fu</last></author>
       <author><first>Liangjing</first><last>Feng</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <author><first>Pengfei</first><last>Liu</last></author>
       <pages>1463–1475</pages>
       <abstract>The development of neural networks and pretraining techniques has spawned many sentence-level tagging systems that achieved superior performance on typical benchmarks. However, a relatively less discussed topic is what if more context information is introduced into current top-scoring tagging systems. Although several existing works have attempted to shift tagging systems from sentence-level to document-level, there is still no consensus conclusion about when and why it works, which limits the applicability of the larger-context approach in tagging tasks. In this paper, instead of pursuing a state-of-the-art tagging system by architectural exploration, we focus on investigating when and why the larger-context training, as a general strategy, can work. To this end, we conduct a thorough comparative study on four proposed aggregators for context information collecting and present an attribute-aided evaluation method to interpret the improvement brought by larger-context training. Experimentally, we set up a testbed based on four tagging tasks and thirteen datasets. Hopefully, our preliminary observations can deepen the understanding of larger-context training and enlighten more follow-up works on the use of contextual information.</abstract>
@@ -1596,7 +1596,7 @@
       <author><first>Tao</first><last>Meng</last></author>
       <author><first>Anjie</first><last>Fang</last></author>
       <author><first>Oleg</first><last>Rokhlenko</last></author>
-      <author><first>Shervin</first><last>Malmasi</last></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></author>
       <pages>1499–1512</pages>
       <abstract>Named Entity Recognition (NER) remains difficult in real-world settings; current challenges include short texts (low context), emerging entities, and complex entities (e.g. movie names). Gazetteer features can help, but results have been mixed due to challenges with adding extra features, and a lack of realistic evaluation data. It has been shown that including gazetteer features can cause models to overuse or underuse them, leading to poor generalization. We propose GEMNET, a novel approach for gazetteer knowledge integration, including (1) a flexible Contextual Gazetteer Representation (CGR) encoder that can be fused with any word-level model; and (2) a Mixture-of- Experts gating network that overcomes the feature overuse issue by learning to conditionally combine the context and gazetteer features, instead of assigning them fixed weights. To comprehensively evaluate our approaches, we create 3 large NER datasets (24M tokens) reflecting current challenges. In an uncased setting, our methods show large gains (up to +49% F1) in recognizing difficult entities compared to existing baselines. On standard benchmarks, we achieve a new uncased SOTA on CoNLL03 and WNUT17.</abstract>
       <url hash="83cc6ffd">2021.naacl-main.118</url>
@@ -1626,7 +1626,7 @@
       <author><first>ChaeHun</first><last>Park</last></author>
       <author><first>Eugene</first><last>Jang</last></author>
       <author><first>Wonsuk</first><last>Yang</last></author>
-      <author><first>Jong</first><last>Park</last></author>
+      <author id="jong-c-park"><first>Jong</first><last>Park</last></author>
       <pages>1525–1534</pages>
       <abstract>Evaluating the quality of responses generated by open-domain conversation systems is a challenging task. This is partly because there can be multiple appropriate responses to a given dialogue history. Reference-based metrics that rely on comparisons to a set of known correct responses often fail to account for this variety, and consequently correlate poorly with human judgment. To address this problem, researchers have investigated the possibility of assessing response quality without using a set of known correct responses. RUBER demonstrated that an automatic response evaluation model could be made using unsupervised learning for the next-utterance prediction (NUP) task. For the unsupervised learning of such model, we propose a method of manipulating a golden response to create a new negative response that is designed to be inappropriate within the context while maintaining high similarity with the original golden response. We find, from our experiments on English datasets, that using the negative samples generated by our method alongside random negative samples can increase the model’s correlation with human evaluations. The process of generating such negative samples is automated and does not rely on human annotation.</abstract>
       <url hash="7397eb94">2021.naacl-main.120</url>
@@ -1653,7 +1653,7 @@
       <author><first>Taesuk</first><last>Hong</last></author>
       <author><first>Byoungjae</first><last>Kim</last></author>
       <author><first>Youngjoong</first><last>Ko</last></author>
-      <author><first>Jungyun</first><last>Seo</last></author>
+      <author id="jungyun-seo"><first>Jungyun</first><last>Seo</last></author>
       <pages>1549–1558</pages>
       <abstract>Retrieval-based dialogue systems display an outstanding performance when pre-trained language models are used, which includes bidirectional encoder representations from transformers (BERT). During the multi-turn response selection, BERT focuses on training the relationship between the context with multiple utterances and the response. However, this method of training is insufficient when considering the relations between each utterance in the context. This leads to a problem of not completely understanding the context flow that is required to select a response. To address this issue, we propose a new fine-grained post-training method that reflects the characteristics of the multi-turn dialogue. Specifically, the model learns the utterance level interactions by training every short context-response pair in a dialogue session. Furthermore, by using a new training objective, the utterance relevance classification, the model understands the semantic relevance and coherence between the dialogue utterances. Experimental results show that our model achieves new state-of-the-art with significant margins on three benchmark datasets. This suggests that the fine-grained post-training method is highly effective for the response selection task.</abstract>
       <url hash="c5a0dec4">2021.naacl-main.122</url>
@@ -1681,14 +1681,14 @@
       <title>Adding Chit-Chat to Enhance Task-Oriented Dialogues</title>
       <author><first>Kai</first><last>Sun</last></author>
       <author><first>Seungwhan</first><last>Moon</last></author>
-      <author><first>Paul</first><last>Crook</last></author>
+      <author id="paul-a-crook"><first>Paul</first><last>Crook</last></author>
       <author><first>Stephen</first><last>Roller</last></author>
       <author><first>Becka</first><last>Silvert</last></author>
       <author><first>Bing</first><last>Liu</last></author>
       <author><first>Zhiguang</first><last>Wang</last></author>
       <author><first>Honglei</first><last>Liu</last></author>
       <author><first>Eunjoon</first><last>Cho</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>1570–1583</pages>
       <abstract>Existing dialogue corpora and models are typically designed under two disjoint motives: while task-oriented systems focus on achieving functional goals (e.g., booking hotels), open-domain chatbots aim at making socially engaging conversations. In this work, we propose to integrate both types of systems by Adding Chit-Chat to ENhance Task-ORiented dialogues (ACCENTOR), with the goal of making virtual assistant conversations more engaging and interactive. Specifically, we propose a Human &lt;-&gt; AI collaborative data collection approach for generating diverse chit-chat responses to augment task-oriented dialogues with minimal annotation effort. We then present our new chit-chat-based annotations to 23.8K dialogues from two popular task-oriented datasets (Schema-Guided Dialogue and MultiWOZ 2.1) and demonstrate their advantage over the originals via human evaluation. Lastly, we propose three new models for adding chit-chat to task-oriented dialogues, explicitly trained to predict user goals and to generate contextually relevant chit-chat responses. Automatic and human evaluations show that, compared with the state-of-the-art task-oriented baseline, our models can code-switch between task and chit-chat to be more engaging, interesting, knowledgeable, and humanlike, while maintaining competitive task performance.</abstract>
       <url hash="b543a644">2021.naacl-main.124</url>
@@ -1699,7 +1699,7 @@
     <paper id="125">
       <title>Incorporating Syntax and Semantics in Coreference Resolution with Heterogeneous Graph Attention Network</title>
       <author><first>Fan</first><last>Jiang</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>1584–1591</pages>
       <abstract>External syntactic and semantic information has been largely ignored by existing neural coreference resolution models. In this paper, we present a heterogeneous graph-based model to incorporate syntactic and semantic structures of sentences. The proposed graph contains a syntactic sub-graph where tokens are connected based on a dependency tree, and a semantic sub-graph that contains arguments and predicates as nodes and semantic role labels as edges. By applying a graph attention network, we can obtain syntactically and semantically augmented word representation, which can be integrated using an attentive integration layer and gating mechanism. Experiments on the OntoNotes 5.0 benchmark show the effectiveness of our proposed model.</abstract>
       <url hash="4c7242a7">2021.naacl-main.125</url>
@@ -1726,7 +1726,7 @@
       <author><first>Naoki</first><last>Kobayashi</last></author>
       <author><first>Tsutomu</first><last>Hirao</last></author>
       <author><first>Hidetaka</first><last>Kamigaito</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <author><first>Masaaki</first><last>Nagata</last></author>
       <pages>1600–1612</pages>
       <abstract>Most of the previous Rhetorical Structure Theory (RST) parsing methods are based on supervised learning such as neural networks, that require an annotated corpus of sufficient size and quality. However, the RST Discourse Treebank (RST-DT), the benchmark corpus for RST parsing in English, is small due to the costly annotation of RST trees. The lack of large annotated training data causes poor performance especially in relation labeling. Therefore, we propose a method for improving neural RST parsing models by exploiting silver data, i.e., automatically annotated data. We create large-scale silver data from an unlabeled corpus by using a state-of-the-art RST parser. To obtain high-quality silver data, we extract agreement subtrees from RST trees for documents built using the RST parsers. We then pre-train a neural RST parser with the obtained silver data and fine-tune it on the RST-DT. Experimental results show that our method achieved the best micro-F1 scores for Nuclearity and Relation at 75.0 and 63.2, respectively. Furthermore, we obtained a remarkable gain in the Relation score, 3.0 points, against the previous state-of-the-art parser.</abstract>
@@ -1739,7 +1739,7 @@
       <title><fixed-case>RST</fixed-case> Parsing from Scratch</title>
       <author><first>Thanh-Tung</first><last>Nguyen</last></author>
       <author><first>Xuan-Phi</first><last>Nguyen</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <author><first>Xiaoli</first><last>Li</last></author>
       <pages>1613–1625</pages>
       <abstract>We introduce a novel top-down end-to-end formulation of document level discourse parsing in the Rhetorical Structure Theory (RST) framework. In this formulation, we consider discourse parsing as a sequence of splitting decisions at token boundaries and use a seq2seq network to model the splitting decisions. Our framework facilitates discourse parsing from scratch without requiring discourse segmentation as a prerequisite; rather, it yields segmentation as part of the parsing process. Our unified parsing model adopts a beam search to decode the best tree structure by searching through a space of high scoring trees. With extensive experiments on the standard RST discourse treebank, we demonstrate that our parser outperforms existing methods by a good margin in both end-to-end parsing and parsing with gold segmentation. More importantly, it does so without using any handcrafted features, making it faster and easily adaptable to new languages and domains.</abstract>
@@ -1765,7 +1765,7 @@
       <title>Evaluating the Impact of a Hierarchical Discourse Representation on Entity Coreference Resolution Performance</title>
       <author><first>Sopan</first><last>Khosla</last></author>
       <author><first>James</first><last>Fiacco</last></author>
-      <author><first>Carolyn</first><last>Rosé</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rosé</last></author>
       <pages>1645–1651</pages>
       <abstract>Recent work on entity coreference resolution (CR) follows current trends in Deep Learning applied to embeddings and relatively simple task-related features. SOTA models do not make use of hierarchical representations of discourse structure. In this work, we leverage automatically constructed discourse parse trees within a neural approach and demonstrate a significant improvement on two benchmark entity coreference-resolution datasets. We explore how the impact varies depending upon the type of mention.</abstract>
       <url hash="ed7fe9fb">2021.naacl-main.130</url>
@@ -1835,7 +1835,7 @@
       <author><first>Jian</first><last>Jiao</last></author>
       <author><first>Nan</first><last>Duan</last></author>
       <author><first>Ruofei</first><last>Zhang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>1692–1701</pages>
       <abstract>Transformer is an attention-based neural network, which consists of two sublayers, namely, Self-Attention Network (SAN) and Feed-Forward Network (FFN). Existing research explores to enhance the two sublayers separately to improve the capability of Transformer for text representation. In this paper, we present a novel understanding of SAN and FFN as Mask Attention Networks (MANs) and show that they are two special cases of MANs with static mask matrices. However, their static mask matrices limit the capability for localness modeling in text representation learning. We therefore introduce a new layer named dynamic mask attention network (DMAN) with a learnable mask matrix which is able to model localness adaptively. To incorporate advantages of DMAN, SAN, and FFN, we propose a sequential layered structure to combine the three types of layers. Extensive experiments on various tasks, including neural machine translation and text summarization demonstrate that our model outperforms the original Transformer.</abstract>
       <url hash="720f933a">2021.naacl-main.135</url>
@@ -1878,7 +1878,7 @@
       <author><first>Ian</first><last>Porada</last></author>
       <author><first>Kaheer</first><last>Suleman</last></author>
       <author><first>Adam</first><last>Trischler</last></author>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <pages>1732–1743</pages>
       <abstract>Understanding natural language requires common sense, one aspect of which is the ability to discern the plausibility of events. While distributional models—most recently pre-trained, Transformer language models—have demonstrated improvements in modeling event plausibility, their performance still falls short of humans’. In this work, we show that Transformer-based plausibility models are markedly inconsistent across the conceptual classes of a lexical hierarchy, inferring that “a person breathing” is plausible while “a dentist breathing” is not, for example. We find this inconsistency persists even when models are softly injected with lexical knowledge, and we present a simple post-hoc method of forcing model consistency that improves correlation with human plausibility judgements.</abstract>
       <url hash="1a571897">2021.naacl-main.138</url>
@@ -2177,7 +2177,7 @@
       <author><first>Prabir</first><last>Mallick</last></author>
       <author><first>Sangameshwar</first><last>Patil</last></author>
       <author><first>Indrajit</first><last>Bhattacharya</last></author>
-      <author><first>Girish</first><last>Palshikar</last></author>
+      <author id="girish-palshikar"><first>Girish</first><last>Palshikar</last></author>
       <pages>1996–2005</pages>
       <abstract>Given the diversity of the candidates and complexity of job requirements, and since interviewing is an inherently subjective process, it is an important task to ensure consistent, uniform, efficient and objective interviews that result in high quality recruitment. We propose an interview assistant system to automatically, and in an objective manner, select an optimal set of technical questions (from question banks) personalized for a candidate. This set can help a human interviewer to plan for an upcoming interview of that candidate. We formalize the problem of selecting a set of questions as an integer linear programming problem and use standard solvers to get a solution. We use knowledge graph as background knowledge in this formulation, and derive our objective functions and constraints from it. We use candidate’s resume to personalize the selection of questions. We propose an intrinsic evaluation to compare a set of suggested questions with actually asked questions. We also use expert interviewers to comparatively evaluate our approach with a set of reasonable baselines.</abstract>
       <url hash="3e84403f">2021.naacl-main.160</url>
@@ -2318,7 +2318,7 @@
       <author><first>Seunghyun</first><last>Yoon</last></author>
       <author><first>Franck</first><last>Dernoncourt</last></author>
       <author><first>Doo Soon</first><last>Kim</last></author>
-      <author><first>Trung</first><last>Bui</last></author>
+      <author id="trung-bui"><first>Trung</first><last>Bui</last></author>
       <author><first>Joongbo</first><last>Shin</last></author>
       <author><first>Kyomin</first><last>Jung</last></author>
       <pages>2105–2115</pages>
@@ -2334,7 +2334,7 @@
       <author><first>Yiwei</first><last>Lyu</last></author>
       <author><first>Paul Pu</first><last>Liang</last></author>
       <author><first>Hai</first><last>Pham</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <author><first>Barnabás</first><last>Póczos</last></author>
       <author><first>Ruslan</first><last>Salakhutdinov</last></author>
       <author><first>Louis-Philippe</first><last>Morency</last></author>
@@ -2377,7 +2377,7 @@
     <paper id="174">
       <title>Framing Unpacked: A Semi-Supervised Interpretable Multi-View Model of Media Frames</title>
       <author><first>Shima</first><last>Khanehzar</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <author><first>Gosia</first><last>Mikolajczak</last></author>
       <author><first>Andrew</first><last>Turpin</last></author>
       <author><first>Lea</first><last>Frermann</last></author>
@@ -2392,7 +2392,7 @@
       <title>Automatic Classification of Neutralization Techniques in the Narrative of Climate Change Scepticism</title>
       <author><first>Shraey</first><last>Bhatia</last></author>
       <author><first>Jey Han</first><last>Lau</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>2167–2175</pages>
       <abstract>Neutralisation techniques, e.g. denial of responsibility and denial of victim, are used in the narrative of climate change scepticism to justify lack of action or to promote an alternative view. We first draw on social science to introduce the problem to the community of nlp, present the granularity of the coding schema and then collect manual annotations of neutralised techniques in text relating to climate change, and experiment with supervised and semi- supervised BERT-based models.</abstract>
       <url hash="b855eab7">2021.naacl-main.175</url>
@@ -2404,7 +2404,7 @@
       <title>Suicide Ideation Detection via Social and Temporal User Representations using Hyperbolic Learning</title>
       <author><first>Ramit</first><last>Sawhney</last></author>
       <author><first>Harshit</first><last>Joshi</last></author>
-      <author><first>Rajiv Ratn</first><last>Shah</last></author>
+      <author id="rajiv-shah"><first>Rajiv Ratn</first><last>Shah</last></author>
       <author><first>Lucie</first><last>Flek</last></author>
       <pages>2176–2190</pages>
       <abstract>Recent psychological studies indicate that individuals exhibiting suicidal ideation increasingly turn to social media rather than mental health practitioners. Personally contextualizing the buildup of such ideation is critical for accurate identification of users at risk. In this work, we propose a framework jointly leveraging a user’s emotional history and social information from a user’s neighborhood in a network to contextualize the interpretation of the latest tweet of a user on Twitter. Reflecting upon the scale-free nature of social network relationships, we propose the use of Hyperbolic Graph Convolution Networks, in combination with the Hawkes process to learn the historical emotional spectrum of a user in a time-sensitive manner. Our system significantly outperforms state-of-the-art methods on this task, showing the benefits of both socially and personally contextualized representations.</abstract>
@@ -2419,7 +2419,7 @@
       <author><first>Andrea</first><last>Ceolin</last></author>
       <author><first>Iknoor</first><last>Singh</last></author>
       <author><first>Niyati</first><last>Chhaya</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <pages>2191–2200</pages>
       <abstract>This study introduces and analyzes WikiTalkEdit, a dataset of conversations and edit histories from Wikipedia, for research in online cooperation and conversation modeling. The dataset comprises dialog triplets from the Wikipedia Talk pages, and editing actions on the corresponding articles being discussed. We show how the data supports the classic understanding of style matching, where positive emotion and the use of first-person pronouns predict a positive emotional change in a Wikipedia contributor. However, they do not predict editorial behavior. On the other hand, feedback invoking evidentiality and criticism, and references to Wikipedia’s community norms, is more likely to persuade the contributor to perform edits but is less likely to lead to a positive emotion. We developed baseline classifiers trained on pre-trained RoBERTa features that can predict editorial change with an F1 score of .54, as compared to an F1 score of .66 for predicting emotional change. A diagnostic analysis of persisting errors is also provided. We conclude with possible applications and recommendations for future work. The dataset is publicly available for the research community at <url>https://github.com/kj2013/WikiTalkEdit/</url>.</abstract>
       <url hash="25ba3e2d">2021.naacl-main.177</url>
@@ -2508,7 +2508,7 @@
       <title>Learning to Recognize Dialect Features</title>
       <author><first>Dorottya</first><last>Demszky</last></author>
       <author><first>Devyani</first><last>Sharma</last></author>
-      <author><first>Jonathan</first><last>Clark</last></author>
+      <author id="jonathan-h-clark"><first>Jonathan</first><last>Clark</last></author>
       <author><first>Vinodkumar</first><last>Prabhakaran</last></author>
       <author><first>Jacob</first><last>Eisenstein</last></author>
       <pages>2315–2338</pages>
@@ -2521,7 +2521,7 @@
     <paper id="185">
       <title>It’s Not Just Size That Matters: Small Language Models Are Also Few-Shot Learners</title>
       <author><first>Timo</first><last>Schick</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>2339–2352</pages>
       <abstract>When scaled to hundreds of billions of parameters, pretrained language models such as GPT-3 (Brown et al., 2020) achieve remarkable few-shot performance. However, enormous amounts of compute are required for training and applying such big models, resulting in a large carbon footprint and making it difficult for researchers and practitioners to use them. We show that performance similar to GPT-3 can be obtained with language models that are much “greener” in that their parameter count is several orders of magnitude smaller. This is achieved by converting textual inputs into cloze questions that contain a task description, combined with gradient-based optimization; exploiting unlabeled data gives further improvements. We identify key factors required for successful natural language understanding with small language models.</abstract>
       <url hash="dbaf6c76">2021.naacl-main.185</url>
@@ -2534,7 +2534,7 @@
       <title>Static Embeddings as Efficient Knowledge Bases?</title>
       <author><first>Philipp</first><last>Dufter</last></author>
       <author><first>Nora</first><last>Kassner</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>2353–2363</pages>
       <abstract>Recent research investigates factual knowledge stored in large pretrained language models (PLMs). Instead of structural knowledge base (KB) queries, masked sentences such as “Paris is the capital of [MASK]” are used as probes. The good performance on this analysis task has been interpreted as PLMs becoming potential repositories of factual knowledge. In experiments across ten linguistically diverse languages, we study knowledge contained in static embeddings. We show that, when restricting the output space to a candidate set, simple nearest neighbor matching using static embeddings performs better than PLMs. E.g., static embeddings perform 1.6% points better than BERT while just using 0.3% of energy for training. One important factor in their good comparative performance is that static embeddings are standardly learned for a large vocabulary. In contrast, BERT exploits its more sophisticated, but expensive ability to compose meaningful representations from a much smaller subword vocabulary.</abstract>
       <url hash="ae75df19">2021.naacl-main.186</url>
@@ -2652,7 +2652,7 @@
       <author><first>Junjie</first><last>Hu</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
       <author><first>Florian</first><last>Metze</last></author>
-      <author><first>Alexander</first><last>Hauptmann</last></author>
+      <author id="alexander-g-hauptmann"><first>Alexander</first><last>Hauptmann</last></author>
       <pages>2443–2459</pages>
       <abstract>This paper studies zero-shot cross-lingual transfer of vision-language models. Specifically, we focus on multilingual text-to-video search and propose a Transformer-based model that learns contextual multilingual multimodal embeddings. Under a zero-shot setting, we empirically demonstrate that performance degrades significantly when we query the multilingual text-video model with non-English sentences. To address this problem, we introduce a multilingual multimodal pre-training strategy, and collect a new multilingual instructional video dataset (Multi-HowTo100M) for pre-training. Experiments on VTT show that our method significantly improves video search in non-English languages without additional annotations. Furthermore, when multilingual annotations are available, our method outperforms recent baselines by a large margin in multilingual text-to-video search on VTT and VATEX; as well as in multilingual text-to-image search on Multi30K. Our model and Multi-HowTo100M is available at <url>http://github.com/berniebear/Multi-HT100M</url>.</abstract>
       <url hash="52e407d9">2021.naacl-main.195</url>
@@ -2683,7 +2683,7 @@
       <author><first>Alan</first><last>Ramponi</last></author>
       <author><first>Siti Oryza</first><last>Khairunnisa</last></author>
       <author><first>Mamoru</first><last>Komachi</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>2479–2497</pages>
       <abstract>The lack of publicly available evaluation data for low-resource languages limits progress in Spoken Language Understanding (SLU). As key tasks like intent classification and slot filling require abundant training data, it is desirable to reuse existing data in high-resource languages to develop models for low-resource scenarios. We introduce xSID, a new benchmark for cross-lingual (x) Slot and Intent Detection in 13 languages from 6 language families, including a very low-resource dialect. To tackle the challenge, we propose a joint learning approach, with English SLU training data and non-English auxiliary tasks from raw text, syntax and translation for transfer. We study two setups which differ by type and language coverage of the pre-trained embeddings. Our results show that jointly learning the main tasks with masked language modeling is effective for slots, while machine translation transfer works best for intent classification.</abstract>
       <url hash="e4433f83">2021.naacl-main.197</url>
@@ -2731,7 +2731,7 @@
       <author><first>Vladimir</first><last>Karpukhin</last></author>
       <author><first>Jean</first><last>Maillard</last></author>
       <author><first>Vassilis</first><last>Plachouras</last></author>
-      <author><first>Tim</first><last>Rocktäschel</last></author>
+      <author id="tim-rocktaschel"><first>Tim</first><last>Rocktäschel</last></author>
       <author><first>Sebastian</first><last>Riedel</last></author>
       <pages>2523–2544</pages>
       <abstract>Challenging problems such as open-domain question answering, fact checking, slot filling and entity linking require access to large, external knowledge sources. While some models do well on individual tasks, developing general models is difficult as each task might require computationally expensive indexing of custom knowledge sources, in addition to dedicated infrastructure. To catalyze research on models that condition on specific information in large textual resources, we present a benchmark for knowledge-intensive language tasks (KILT). All tasks in KILT are grounded in the same snapshot of Wikipedia, reducing engineering turnaround through the re-use of components, as well as accelerating research into task-agnostic memory architectures. We test both task-specific and general baselines, evaluating downstream performance in addition to the ability of the models to provide provenance. We find that a shared dense vector index coupled with a seq2seq model is a strong baseline, outperforming more tailor-made approaches for fact checking, open-domain question answering and dialogue, and yielding competitive results on entity linking and slot filling, by generating disambiguated text. KILT data and code are available at <url>https://github.com/facebookresearch/KILT</url>.</abstract>
@@ -2784,9 +2784,9 @@
       <author><first>Tommaso</first><last>Fornaciari</last></author>
       <author><first>Alexandra</first><last>Uma</last></author>
       <author><first>Silviu</first><last>Paun</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <author><first>Dirk</first><last>Hovy</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>2591–2597</pages>
       <abstract>Supervised learning assumes that a ground truth label exists. However, the reliability of this ground truth depends on human annotators, who often disagree. Prior work has shown that this disagreement can be helpful in training models. We propose a novel method to incorporate this disagreement as information: in addition to the standard error computation, we use soft-labels (i.e., probability distributions over the annotator labels) as an auxiliary task in a multi-task neural network. We measure the divergence between the predictions and the target soft-labels with several loss-functions and evaluate the models on various NLP tasks. We find that the soft-label prediction auxiliary task reduces the penalty for errors on ambiguous entities, and thereby mitigates overfitting. It significantly improves performance across tasks, beyond the standard approach and prior work.</abstract>
       <url hash="0b8200db">2021.naacl-main.204</url>
@@ -2810,11 +2810,11 @@
     </paper>
     <paper id="206">
       <title>Variance-reduced First-order Meta-learning for Natural Language Processing Tasks</title>
-      <author><first>Lingxiao</first><last>Wang</last></author>
+      <author id="ling-xiao-wang"><first>Lingxiao</first><last>Wang</last></author>
       <author><first>Kevin</first><last>Huang</last></author>
       <author><first>Tengyu</first><last>Ma</last></author>
       <author><first>Quanquan</first><last>Gu</last></author>
-      <author id="jing-huang"><first>Jing</first><last>Huang</last></author>
+      <author><first>Jing</first><last>Huang</last></author>
       <pages>2609–2615</pages>
       <abstract>First-order meta-learning algorithms have been widely used in practice to learn initial model parameters that can be quickly adapted to new tasks due to their efficiency and effectiveness. However, existing studies find that meta-learner can overfit to some specific adaptation when we have heterogeneous tasks, leading to significantly degraded performance. In Natural Language Processing (NLP) applications, datasets are often diverse and each task has its unique characteristics. Therefore, to address the overfitting issue when applying first-order meta-learning to NLP applications, we propose to reduce the variance of the gradient estimator used in task adaptation. To this end, we develop a variance-reduced first-order meta-learning algorithm. The core of our algorithm is to introduce a novel variance reduction term to the gradient estimation when performing the task adaptation. Experiments on two NLP applications: few-shot text classification and multi-domain dialog state tracking demonstrate the superior performance of our proposed method.</abstract>
       <url hash="93468fde">2021.naacl-main.206</url>
@@ -2827,7 +2827,7 @@
       <author><first>Tianze</first><last>Shi</last></author>
       <author><first>Adrian</first><last>Benton</last></author>
       <author><first>Igor</first><last>Malioutov</last></author>
-      <author><first>Ozan</first><last>İrsoy</last></author>
+      <author id="ozan-irsoy"><first>Ozan</first><last>İrsoy</last></author>
       <pages>2616–2626</pages>
       <abstract>While the predictive performance of modern statistical dependency parsers relies heavily on the availability of expensive expert-annotated treebank data, not all annotations contribute equally to the training of the parsers. In this paper, we attempt to reduce the number of labeled examples needed to train a strong dependency parser using batch active learning (AL). In particular, we investigate whether enforcing diversity in the sampled batches, using determinantal point processes (DPPs), can improve over their diversity-agnostic counterparts. Simulation experiments on an English newswire corpus show that selecting diverse batches with DPPs is superior to strong selection strategies that do not enforce batch diversity, especially during the initial stages of the learning process. Additionally, our diversity-aware strategy is robust under a corpus duplication setting, where diversity-agnostic sampling strategies exhibit significant degradation.</abstract>
       <url hash="b34ca299">2021.naacl-main.207</url>
@@ -2838,7 +2838,7 @@
     <paper id="208">
       <title>How many data points is a prompt worth?</title>
       <author><first>Teven</first><last>Le Scao</last></author>
-      <author><first>Alexander</first><last>Rush</last></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last></author>
       <pages>2627–2636</pages>
       <abstract>When fine-tuning pretrained models for classification, researchers either use a generic model head or a task-specific prompt for prediction. Proponents of prompting have argued that prompts provide a method for injecting task-specific guidance, which is beneficial in low-data regimes. We aim to quantify this benefit through rigorous testing of prompts in a fair setting: comparing prompted and head-based fine-tuning in equal conditions across many tasks and data sizes. By controlling for many sources of advantage, we find that prompting does indeed provide a benefit, and that this benefit can be quantified per task. Results show that prompting is often worth 100s of data points on average across classification tasks.</abstract>
       <url hash="a721c88e">2021.naacl-main.208</url>
@@ -2863,7 +2863,7 @@
     <paper id="210">
       <title>Smoothing and Shrinking the Sparse <fixed-case>S</fixed-case>eq2<fixed-case>S</fixed-case>eq Search Space</title>
       <author><first>Ben</first><last>Peters</last></author>
-      <author><first>André F. T.</first><last>Martins</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
       <pages>2642–2654</pages>
       <abstract>Current sequence-to-sequence models are trained to minimize cross-entropy and use softmax to compute the locally normalized probabilities over target sequences. While this setup has led to strong results in a variety of tasks, one unsatisfying aspect is its length bias: models give high scores to short, inadequate hypotheses and often make the empty string the argmax—the so-called cat got your tongue problem. Recently proposed entmax-based sparse sequence-to-sequence models present a possible solution, since they can shrink the search space by assigning zero probability to bad hypotheses, but their ability to handle word-level tasks with transformers has never been tested. In this work, we show that entmax-based models effectively solve the cat got your tongue problem, removing a major source of model error for neural machine translation. In addition, we generalize label smoothing, a critical regularization technique, to the broader family of Fenchel-Young losses, which includes both cross-entropy and the entmax losses. Our resulting label-smoothed entmax loss models set a new state of the art on multilingual grapheme-to-phoneme conversion and deliver improvements and better calibration properties on cross-lingual morphological inflection and machine translation for 7 language pairs.</abstract>
       <url hash="58efe875">2021.naacl-main.210</url>
@@ -2873,7 +2873,7 @@
     </paper>
     <paper id="211">
       <title>Unified Pre-training for Program Understanding and Generation</title>
-      <author><first>Wasi</first><last>Ahmad</last></author>
+      <author id="wasi-ahmad"><first>Wasi</first><last>Ahmad</last></author>
       <author><first>Saikat</first><last>Chakraborty</last></author>
       <author><first>Baishakhi</first><last>Ray</last></author>
       <author><first>Kai-Wei</first><last>Chang</last></author>
@@ -2923,7 +2923,7 @@
     <paper id="215">
       <title>Semantic Frame Forecast</title>
       <author><first>Chieh-Yang</first><last>Huang</last></author>
-      <author><first>Ting-Hao</first><last>Huang</last></author>
+      <author id="ting-hao-huang"><first>Ting-Hao</first><last>Huang</last></author>
       <pages>2702–2713</pages>
       <abstract>This paper introduces Semantic Frame Forecast, a task that predicts the semantic frames that will occur in the next 10, 100, or even 1,000 sentences in a running story. Prior work focused on predicting the immediate future of a story, such as one to a few sentences ahead. However, when novelists write long stories, generating a few sentences is not enough to help them gain high-level insight to develop the follow-up story. In this paper, we formulate a long story as a sequence of “story blocks,” where each block contains a fixed number of sentences (e.g., 10, 100, or 200). This formulation allows us to predict the follow-up story arc beyond the scope of a few sentences. We represent a story block using the term frequencies (TF) of semantic frames in it, normalized by each frame’s inverse document frequency (IDF). We conduct semantic frame forecast experiments on 4,794 books from the Bookcorpus and 7,962 scientific abstracts from CODA-19, with block sizes ranging from 5 to 1,000 sentences. The results show that automated models can forecast the follow-up story blocks better than the random, prior, and replay baselines, indicating the feasibility of the task. We also learn that the models using the frame representation as features outperform all the existing approaches when the block size is over 150 sentences. The human evaluation also shows that the proposed frame representation, when visualized as word clouds, is comprehensible, representative, and specific to humans.</abstract>
       <url hash="41d9ea57">2021.naacl-main.215</url>
@@ -2937,7 +2937,7 @@
       <author><first>Michalis</first><last>Papakostas</last></author>
       <author><first>Mihai</first><last>Burzo</last></author>
       <author><first>Mohamed</first><last>Abouelenien</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>2714–2725</pages>
       <abstract>The capability to automatically detect human stress can benefit artificial intelligent agents involved in affective computing and human-computer interaction. Stress and emotion are both human affective states, and stress has proven to have important implications on the regulation and expression of emotion. Although a series of methods have been established for multimodal stress detection, limited steps have been taken to explore the underlying inter-dependence between stress and emotion. In this work, we investigate the value of emotion recognition as an auxiliary task to improve stress detection. We propose MUSER – a transformer-based model architecture and a novel multi-task learning algorithm with speed-based dynamic sampling strategy. Evaluation on the Multimodal Stressed Emotion (MuSE) dataset shows that our model is effective for stress detection with both internal and external auxiliary tasks, and achieves state-of-the-art results.</abstract>
       <url hash="dcde72e8">2021.naacl-main.216</url>
@@ -2950,7 +2950,7 @@
       <author><first>Yi</first><last>Zhang</last></author>
       <author><first>Sujay Kumar</first><last>Jauhar</last></author>
       <author><first>Julia</first><last>Kiseleva</last></author>
-      <author><first>Ryen</first><last>White</last></author>
+      <author id="ryen-white"><first>Ryen</first><last>White</last></author>
       <author><first>Dan</first><last>Roth</last></author>
       <pages>2726–2735</pages>
       <abstract>People rely on digital task management tools, such as email or to-do apps, to manage their tasks. Some of these tasks are large and complex, leading to action paralysis and feelings of being overwhelmed on the part of the user. The micro-productivity literature has shown that such tasks could benefit from being decomposed and organized, in order to reduce user cognitive load. Thus in this paper, we propose a novel end-to-end pipeline that consumes a complex task and induces a dependency graph from unstructured text to represent sub-tasks and their relationships. Our solution first finds nodes for sub-tasks from multiple ‘how-to’ articles on the web by injecting a neural text generator with three key desiderata – relevance, abstraction, and consensus. Then we resolve and infer edges between these subtask nodes by learning task dependency relations. We collect a new dataset of complex tasks with their sub-task graph to develop and evaluate our solutions. Both components of our graph induction solution are evaluated in experiments, demonstrating that our models outperform a state-of-the-art text generator significantly. Our generalizable and scalable end-to-end solution has important implications for boosting user productivity and assisting with digital task management.</abstract>
@@ -2989,7 +2989,7 @@
       <title>Learning to Synthesize Data for Semantic Parsing</title>
       <author><first>Bailin</first><last>Wang</last></author>
       <author><first>Wenpeng</first><last>Yin</last></author>
-      <author><first>Xi Victoria</first><last>Lin</last></author>
+      <author id="xi-victoria-lin"><first>Xi Victoria</first><last>Lin</last></author>
       <author><first>Caiming</first><last>Xiong</last></author>
       <pages>2760–2766</pages>
       <abstract>Synthesizing data for semantic parsing has gained increasing attention recently. However, most methods require handcrafted (high-precision) rules in their generative process, hindering the exploration of diverse unseen data. In this work, we propose a generative model which features a (non-neural) PCFG that models the composition of programs (e.g., SQL), and a BART-based translation model that maps a program to an utterance. Due to the simplicity of PCFG and pre-trained BART, our generative model can be efficiently learned from existing data at hand. Moreover, explicitly modeling compositions using PCFG leads to better exploration of unseen programs, thus generate more diverse data. We evaluate our method in both in-domain and out-of-domain settings of text-to-SQL parsing on the standard benchmarks of GeoQuery and Spider, respectively. Our empirical results show that the synthesized data generated from our model can substantially help a semantic parser achieve better compositional and domain generalization.</abstract>
@@ -3029,7 +3029,7 @@
       <author><first>Roma</first><last>Patel</last></author>
       <author><first>Marta</first><last>Garnelo</last></author>
       <author><first>Ian</first><last>Gemp</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <author><first>Yoram</first><last>Bachrach</last></author>
       <pages>2789–2798</pages>
       <abstract>The input vocabulary and the representations learned are crucial to the performance of neural NLP models. Using the full vocabulary results in less explainable and more memory intensive models, with the embedding layer often constituting the majority of model parameters. It is thus common to use a smaller vocabulary to lower memory requirements and construct more interpertable models. We propose a vocabulary selection method that views words as members of a team trying to maximize the model’s performance. We apply power indices from cooperative game theory, including the Shapley value and Banzhaf index, that measure the relative importance of individual team members in accomplishing a joint task. We approximately compute these indices to identify the most influential words. Our empirical evaluation examines multiple NLP tasks, including sentence and document classification, question answering and textual entailment. We compare to baselines that select words based on frequency, TF-IDF and regression coefficients under L1 regularization, and show that this game-theoretic vocabulary selection outperforms all baseline on a range of different tasks and datasets.</abstract>
@@ -3111,7 +3111,7 @@
       <author><first>Peng</first><last>Qi</last></author>
       <author><first>Guangtao</first><last>Wang</last></author>
       <author><first>Rex</first><last>Ying</last></author>
-      <author id="jing-huang"><first>Jing</first><last>Huang</last></author>
+      <author><first>Jing</first><last>Huang</last></author>
       <author><first>Xiaodong</first><last>He</last></author>
       <author><first>Bowen</first><last>Zhou</last></author>
       <pages>2884–2894</pages>
@@ -3125,7 +3125,7 @@
       <title>Emotion-Infused Models for Explainable Psychological Stress Detection</title>
       <author><first>Elsbeth</first><last>Turcan</last></author>
       <author><first>Smaranda</first><last>Muresan</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>2895–2909</pages>
       <abstract>The problem of detecting psychological stress in online posts, and more broadly, of detecting people in distress or in need of help, is a sensitive application for which the ability to interpret models is vital. Here, we present work exploring the use of a semantically related task, emotion detection, for equally competent but more explainable and human-like psychological stress detection as compared to a black-box model. In particular, we explore the use of multi-task learning as well as emotion-based language model fine-tuning. With our emotion-infused models, we see comparable results to state-of-the-art BERT. Our analysis of the words used for prediction show that our emotion-infused models mirror psychological components of stress.</abstract>
       <url hash="553ed5de">2021.naacl-main.230</url>
@@ -3171,7 +3171,7 @@
     <paper id="234">
       <title>Learning Syntax from Naturally-Occurring Bracketings</title>
       <author><first>Tianze</first><last>Shi</last></author>
-      <author><first>Ozan</first><last>İrsoy</last></author>
+      <author id="ozan-irsoy"><first>Ozan</first><last>İrsoy</last></author>
       <author><first>Igor</first><last>Malioutov</last></author>
       <author><first>Lillian</first><last>Lee</last></author>
       <pages>2941–2949</pages>
@@ -3255,7 +3255,7 @@
     <paper id="240">
       <title>Controlling Dialogue Generation with Semantic Exemplars</title>
       <author><first>Prakhar</first><last>Gupta</last></author>
-      <author><first>Jeffrey</first><last>Bigham</last></author>
+      <author id="jeffrey-p-bigham"><first>Jeffrey</first><last>Bigham</last></author>
       <author><first>Yulia</first><last>Tsvetkov</last></author>
       <author><first>Amy</first><last>Pavel</last></author>
       <pages>3018–3029</pages>
@@ -3446,7 +3446,7 @@
       <title>News Headline Grouping as a Challenging <fixed-case>NLU</fixed-case> Task</title>
       <author><first>Philippe</first><last>Laban</last></author>
       <author><first>Lucas</first><last>Bandarkar</last></author>
-      <author><first>Marti A.</first><last>Hearst</last></author>
+      <author id="marti-a-hearst"><first>Marti A.</first><last>Hearst</last></author>
       <pages>3186–3198</pages>
       <abstract>Recent progress in Natural Language Understanding (NLU) has seen the latest models outperform human performance on many standard tasks. These impressive results have led the community to introspect on dataset limitations, and iterate on more nuanced challenges. In this paper, we introduce the task of HeadLine Grouping (HLG) and a corresponding dataset (HLGD) consisting of 20,056 pairs of news headlines, each labeled with a binary judgement as to whether the pair belongs within the same group. On HLGD, human annotators achieve high performance of around 0.9 F-1, while current state-of-the art Transformer models only reach 0.75 F-1, opening the path for further improvements. We further propose a novel unsupervised Headline Generator Swap model for the task of HeadLine Grouping that achieves within 3 F-1 of the best supervised model. Finally, we analyze high-performing models with consistency tests, and find that models are not consistent in their predictions, revealing modeling limits of current architectures.</abstract>
       <url hash="800ae2c2">2021.naacl-main.255</url>
@@ -3459,7 +3459,7 @@
       <author><first>Eleftheria</first><last>Briakou</last></author>
       <author><first>Di</first><last>Lu</last></author>
       <author><first>Ke</first><last>Zhang</last></author>
-      <author><first>Joel</first><last>Tetreault</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
       <pages>3199–3216</pages>
       <abstract>We take the first step towards multilingual style transfer by creating and releasing XFORMAL, a benchmark of multiple formal reformulations of informal text in Brazilian Portuguese, French, and Italian. Results on XFORMAL suggest that state-of-the-art style transfer approaches perform close to simple baselines, indicating that style transfer is even more challenging when moving multilingual.</abstract>
       <url hash="e5d15c00">2021.naacl-main.256</url>
@@ -3513,7 +3513,7 @@
       <author><first>Hao</first><last>Peng</last></author>
       <author><first>Dongxiao</first><last>He</last></author>
       <author><first>Jianxin</first><last>Li</last></author>
-      <author><first>Philip</first><last>Yu</last></author>
+      <author id="philip-s-yu"><first>Philip</first><last>Yu</last></author>
       <pages>3259–3265</pages>
       <abstract>The current state-of-the-art model HiAGM for hierarchical text classification has two limitations. First, it correlates each text sample with all labels in the dataset which contains irrelevant information. Second, it does not consider any statistical constraint on the label representations learned by the structure encoder, while constraints for representation learning are proved to be helpful in previous work. In this paper, we propose HTCInfoMax to address these issues by introducing information maximization which includes two modules: text-label mutual information maximization and label prior matching. The first module can model the interaction between each text sample and its ground truth labels explicitly which filters out irrelevant information. The second one encourages the structure encoder to learn better representations with desired characteristics for all labels which can better handle label imbalance in hierarchical text classification. Experimental results on two benchmark datasets demonstrate the effectiveness of the proposed HTCInfoMax.</abstract>
       <url hash="a5b3e9bd">2021.naacl-main.260</url>
@@ -3639,7 +3639,7 @@
       <title><fixed-case>TABBIE</fixed-case>: Pretrained Representations of Tabular Data</title>
       <author><first>Hiroshi</first><last>Iida</last></author>
       <author><first>Dung</first><last>Thai</last></author>
-      <author><first>Varun</first><last>Manjunatha</last></author>
+      <author id="varun-manjunatha"><first>Varun</first><last>Manjunatha</last></author>
       <author><first>Mohit</first><last>Iyyer</last></author>
       <pages>3446–3456</pages>
       <abstract>Existing work on tabular representation-learning jointly models tables and associated text using self-supervised objective functions derived from pretrained language models such as BERT. While this joint pretraining improves tasks involving paired tables and text (e.g., answering questions about tables), we show that it underperforms on tasks that operate over tables without any associated text (e.g., populating missing cells). We devise a simple pretraining objective (corrupt cell detection) that learns exclusively from tabular data and reaches the state-of-the-art on a suite of table-based prediction tasks. Unlike competing approaches, our model (TABBIE) provides embeddings of all table substructures (cells, rows, and columns), and it also requires far less compute to train. A qualitative analysis of our model’s learned cell, column, and row representations shows that it understands complex table semantics and numerical trends.</abstract>
@@ -3650,7 +3650,7 @@
     </paper>
     <paper id="271">
       <title>Better Feature Integration for Named Entity Recognition</title>
-      <author id="lu-xu"><first>Lu</first><last>Xu</last></author>
+      <author><first>Lu</first><last>Xu</last></author>
       <author><first>Zhanming</first><last>Jie</last></author>
       <author><first>Wei</first><last>Lu</last></author>
       <author><first>Lidong</first><last>Bing</last></author>
@@ -3685,10 +3685,10 @@
     </paper>
     <paper id="274">
       <title>A Context-Dependent Gated Module for Incorporating Symbolic Semantics into Event Coreference Resolution</title>
-      <author><first>Tuan</first><last>Lai</last></author>
+      <author id="tuan-lai"><first>Tuan</first><last>Lai</last></author>
       <author><first>Heng</first><last>Ji</last></author>
-      <author><first>Trung</first><last>Bui</last></author>
-      <author><first>Quan Hung</first><last>Tran</last></author>
+      <author id="trung-bui"><first>Trung</first><last>Bui</last></author>
+      <author id="quan-hung-tran"><first>Quan Hung</first><last>Tran</last></author>
       <author><first>Franck</first><last>Dernoncourt</last></author>
       <author><first>Walter</first><last>Chang</last></author>
       <pages>3491–3499</pages>
@@ -3750,7 +3750,7 @@
     <paper id="279">
       <title>Choose Your Own Adventure: Paired Suggestions in Collaborative Writing for Evaluating Story Generation Models</title>
       <author><first>Elizabeth</first><last>Clark</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>3566–3575</pages>
       <abstract>Story generation is an open-ended and subjective task, which poses a challenge for evaluating story generation models. We present Choose Your Own Adventure, a collaborative writing setup for pairwise model evaluation. Two models generate suggestions to people as they write a short story; we ask writers to choose one of the two suggestions, and we observe which model’s suggestions they prefer. The setup also allows further analysis based on the revisions people make to the suggestions. We show that these measures, combined with automatic metrics, provide an informative picture of the models’ performance, both in cases where the differences in generation methods are small (nucleus vs. top-k sampling) and large (GPT2 vs. Fusion models).</abstract>
       <url hash="de113163">2021.naacl-main.279</url>
@@ -3768,7 +3768,7 @@
       <author><first>Wenhui</first><last>Wang</last></author>
       <author><first>Xia</first><last>Song</last></author>
       <author><first>Xian-Ling</first><last>Mao</last></author>
-      <author><first>Heyan</first><last>Huang</last></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last></author>
       <author><first>Ming</first><last>Zhou</last></author>
       <pages>3576–3588</pages>
       <abstract>In this work, we present an information-theoretic framework that formulates cross-lingual language model pre-training as maximizing mutual information between multilingual-multi-granularity texts. The unified view helps us to better understand the existing methods for learning cross-lingual representations. More importantly, inspired by the framework, we propose a new pre-training task based on contrastive learning. Specifically, we regard a bilingual sentence pair as two views of the same meaning and encourage their encoded representations to be more similar than the negative examples. By leveraging both monolingual and parallel corpora, we jointly train the pretext tasks to improve the cross-lingual transferability of pre-trained models. Experimental results on several benchmarks show that our approach achieves considerably better performance. The code and pre-trained models are available at <url>https://aka.ms/infoxlm</url>.</abstract>
@@ -3793,7 +3793,7 @@
     <paper id="282">
       <title>Code-Mixing on Sesame Street: Dawn of the Adversarial Polyglots</title>
       <author><first>Samson</first><last>Tan</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <pages>3596–3616</pages>
       <abstract>Multilingual models have demonstrated impressive cross-lingual transfer performance. However, test sets like XNLI are monolingual at the example level. In multilingual communities, it is common for polyglots to code-mix when conversing with each other. Inspired by this phenomenon, we present two strong black-box adversarial attacks (one word-level, one phrase-level) for multilingual models that push their ability to handle code-mixed sentences to the limit. The former uses bilingual dictionaries to propose perturbations and translations of the clean example for sense disambiguation. The latter directly aligns the clean example with its translations before extracting phrases as perturbations. Our phrase-level attack has a success rate of 89.75% against XLM-R-large, bringing its average accuracy of 79.85 down to 8.18 on XNLI. Finally, we propose an efficient adversarial training scheme that trains in the same number of steps as the original model and show that it creates more language-invariant representations, improving clean and robust accuracy in the absence of lexical overlap without degrading performance on the original examples.</abstract>
       <url hash="6f01806e">2021.naacl-main.282</url>
@@ -3808,7 +3808,7 @@
       <author><first>Meryem</first><last>M’hamdi</last></author>
       <author><first>Doo Soon</first><last>Kim</last></author>
       <author><first>Franck</first><last>Dernoncourt</last></author>
-      <author><first>Trung</first><last>Bui</last></author>
+      <author id="trung-bui"><first>Trung</first><last>Bui</last></author>
       <author><first>Xiang</first><last>Ren</last></author>
       <author><first>Jonathan</first><last>May</last></author>
       <pages>3617–3632</pages>
@@ -3873,7 +3873,7 @@
       <author><first>Pat</first><last>Verga</last></author>
       <author><first>Haitian</first><last>Sun</last></author>
       <author><first>Livio</first><last>Baldini Soares</last></author>
-      <author><first>William</first><last>Cohen</last></author>
+      <author id="william-cohen"><first>William</first><last>Cohen</last></author>
       <pages>3678–3691</pages>
       <abstract>Past research has demonstrated that large neural language models (LMs) encode surprising amounts of factual information: however, augmenting or modifying this information requires modifying a corpus and retraining, which is computationally expensive. To address this problem, we develop a neural LM that includes an interpretable neuro-symbolic KB in the form of a “fact memory”. Each element of the fact memory is formed from a triple of vectors, where each vector corresponds to a KB entity or relation. Our LM improves performance on knowledge-intensive question-answering tasks, sometimes dramatically, including a 27 point increase in one setting of WebQuestionsSP over a state-of-the-art open-book model, despite using 5% of the parameters. Most interestingly, we demonstrate that the model can be modified, without <i>any</i> re-training, by updating the fact memory.</abstract>
       <url hash="bb08d5c1">2021.naacl-main.288</url>
@@ -3952,7 +3952,7 @@
       <title>An Empirical Investigation of Bias in the Multimodal Analysis of Financial Earnings Calls</title>
       <author><first>Ramit</first><last>Sawhney</last></author>
       <author><first>Arshiya</first><last>Aggarwal</last></author>
-      <author><first>Rajiv Ratn</first><last>Shah</last></author>
+      <author id="rajiv-shah"><first>Rajiv Ratn</first><last>Shah</last></author>
       <pages>3751–3757</pages>
       <abstract>Volatility prediction is complex due to the stock market’s stochastic nature. Existing research focuses on the textual elements of financial disclosures like earnings calls transcripts to forecast stock volatility and risk, but ignores the rich acoustic features in the company executives’ speech. Recently, new multimodal approaches that leverage the verbal and vocal cues of speakers in financial disclosures significantly outperform previous state-of-the-art approaches demonstrating the benefits of multimodality and speech. However, the financial realm is still plagued with a severe underrepresentation of various communities spanning diverse demographics, gender, and native speech. While multimodal models are better risk forecasters, it is imperative to also investigate the potential bias that these models may learn from the speech signals of company executives. In this work, we present the first study to discover the gender bias in multimodal volatility prediction due to gender-sensitive audio features and fewer female executives in earnings calls of one of the world’s biggest stock indexes, the S&amp;P 500 index. We quantitatively analyze bias as error disparity and investigate the sources of this bias. Our results suggest that multimodal neural financial models accentuate gender-based stereotypes.</abstract>
       <url hash="6944fd3e">2021.naacl-main.294</url>
@@ -3993,7 +3993,7 @@
       <author><first>Shrimai</first><last>Prabhumoye</last></author>
       <author><first>Brendon</first><last>Boldt</last></author>
       <author><first>Ruslan</first><last>Salakhutdinov</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <pages>3784–3798</pages>
       <abstract>Recent work in natural language processing (NLP) has focused on ethical challenges such as understanding and mitigating bias in data and algorithms; identifying objectionable content like hate speech, stereotypes and offensive language; and building frameworks for better system design and data handling practices. However, there has been little discussion about the ethical foundations that underlie these efforts. In this work, we study one ethical theory, namely deontological ethics, from the perspective of NLP. In particular, we focus on the generalization principle and the respect for autonomy through informed consent. We provide four case studies to demonstrate how these principles can be used with NLP systems. We also recommend directions to avoid the ethical issues in these systems.</abstract>
       <url hash="387fce63">2021.naacl-main.297</url>
@@ -4020,7 +4020,7 @@
       <title>On the Impact of Random Seeds on the Fairness of Clinical Classifiers</title>
       <author><first>Silvio</first><last>Amir</last></author>
       <author><first>Jan-Willem</first><last>van de Meent</last></author>
-      <author><first>Byron</first><last>Wallace</last></author>
+      <author id="byron-c-wallace"><first>Byron</first><last>Wallace</last></author>
       <pages>3808–3823</pages>
       <abstract>Recent work has shown that fine-tuning large networks is surprisingly sensitive to changes in random seed(s). We explore the implications of this phenomenon for model fairness across demographic groups in clinical prediction tasks over electronic health records (EHR) in MIMIC-III —— the standard dataset in clinical NLP research. Apparent subgroup performance varies substantially for seeds that yield similar overall performance, although there is no evidence of a trade-off between overall and subgroup performance. However, we also find that the small sample sizes inherent to looking at intersections of minority groups and somewhat rare conditions limit our ability to accurately estimate disparities. Further, we find that jointly optimizing for high overall performance and low disparities does not yield statistically significant improvements. Our results suggest that fairness work using MIMIC-III should carefully account for variations in apparent differences that may arise from stochasticity and small sample sizes.</abstract>
       <url hash="3a511f1a">2021.naacl-main.299</url>
@@ -4044,7 +4044,7 @@
       <title>Discourse Probing of Pretrained Language Models</title>
       <author><first>Fajri</first><last>Koto</last></author>
       <author><first>Jey Han</first><last>Lau</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>3849–3864</pages>
       <abstract>Existing work on probing of pretrained language models (LMs) has predominantly focused on sentence-level syntactic tasks. In this paper, we introduce document-level discourse probing to evaluate the ability of pretrained LMs to capture document-level relations. We experiment with 7 pretrained LMs, 4 languages, and 7 discourse probing tasks, and find BART to be overall the best model at capturing discourse — but only in its encoder, with BERT performing surprisingly well as the baseline model. Across the different models, there are substantial differences in which layers best capture discourse information, and large disparities between models.</abstract>
       <url hash="6622a4a5">2021.naacl-main.301</url>
@@ -4060,7 +4060,7 @@
       <author><first>Yingce</first><last>Xia</last></author>
       <author><first>Shufang</first><last>Xie</last></author>
       <author><first>Tao</first><last>Qin</last></author>
-      <author><first>Xinyu</first><last>Dai</last></author>
+      <author id="xinyu-dai"><first>Xinyu</first><last>Dai</last></author>
       <author><first>Tie-Yan</first><last>Liu</last></author>
       <pages>3865–3878</pages>
       <abstract>Transformer architecture achieves great success in abundant natural language processing tasks. The over-parameterization of the Transformer model has motivated plenty of works to alleviate its overfitting for superior performances. With some explorations, we find simple techniques such as dropout, can greatly boost model performance with a careful design. Therefore, in this paper, we integrate different dropout techniques into the training of Transformer models. Specifically, we propose an approach named UniDrop to unites three different dropout techniques from fine-grain to coarse-grain, i.e., feature dropout, structure dropout, and data dropout. Theoretically, we demonstrate that these three dropouts play different roles from regularization perspectives. Empirically, we conduct experiments on both neural machine translation and text classification benchmark datasets. Extensive results indicate that Transformer with UniDrop can achieve around 1.5 BLEU improvement on IWSLT14 translation tasks, and better accuracy for the classification even using strong pre-trained RoBERTa as backbone.</abstract>
@@ -4098,7 +4098,7 @@
     </paper>
     <paper id="305">
       <title>Double Perturbation: On the Robustness of Robustness and Counterfactual Bias Evaluation</title>
-      <author id="chong-zhang"><first>Chong</first><last>Zhang</last></author>
+      <author><first>Chong</first><last>Zhang</last></author>
       <author><first>Jieyu</first><last>Zhao</last></author>
       <author><first>Huan</first><last>Zhang</last></author>
       <author><first>Kai-Wei</first><last>Chang</last></author>
@@ -4117,7 +4117,7 @@
       <author><first>Jatin</first><last>Ganhotra</last></author>
       <author><first>Hui</first><last>Wan</last></author>
       <author><first>Chulaka</first><last>Gunasekara</last></author>
-      <author><first>Sachindra</first><last>Joshi</last></author>
+      <author id="sachindra-joshi"><first>Sachindra</first><last>Joshi</last></author>
       <author><first>Yangfeng</first><last>Ji</last></author>
       <pages>3917–3930</pages>
       <abstract>Explaining neural network models is important for increasing their trustworthiness in real-world applications. Most existing methods generate post-hoc explanations for neural network models by identifying individual feature attributions or detecting interactions between adjacent features. However, for models with text pairs as inputs (e.g., paraphrase identification), existing methods are not sufficient to capture feature interactions between two texts and their simple extension of computing all word-pair interactions between two texts is computationally inefficient. In this work, we propose the Group Mask (GMASK) method to implicitly detect word correlations by grouping correlated words from the input text pair together and measure their contribution to the corresponding NLP tasks as a whole. The proposed method is evaluated with two different model architectures (decomposable attention model and BERT) across four datasets, including natural language inference and paraphrase identification tasks. Experiments show the effectiveness of GMASK in providing faithful explanations to these models.</abstract>
@@ -4183,8 +4183,8 @@
       <author><first>Rui</first><last>Wang</last></author>
       <author><first>Kehai</first><last>Chen</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <pages>3975–3981</pages>
       <abstract>Unsupervised neural machine translation (UNMT) that relies solely on massive monolingual corpora has achieved remarkable results in several translation tasks. However, in real-world scenarios, massive monolingual corpora do not exist for some extremely low-resource languages such as Estonian, and UNMT systems usually perform poorly when there is not adequate training corpus for one language. In this paper, we first define and analyze the unbalanced training data scenario for UNMT. Based on this scenario, we propose UNMT self-training mechanisms to train a robust UNMT system and improve its performance in this case. Experimental results on several language pairs show that the proposed methods substantially outperform conventional UNMT systems.</abstract>
       <url hash="8eb1b8db">2021.naacl-main.311</url>
@@ -4254,7 +4254,7 @@
       <author><first>Ramit</first><last>Sawhney</last></author>
       <author><first>Arnav</first><last>Wadhwa</last></author>
       <author><first>Shivam</first><last>Agarwal</last></author>
-      <author><first>Rajiv Ratn</first><last>Shah</last></author>
+      <author id="rajiv-shah"><first>Rajiv Ratn</first><last>Shah</last></author>
       <pages>4018–4030</pages>
       <abstract>It is challenging to design profitable and practical trading strategies, as stock price movements are highly stochastic, and the market is heavily influenced by chaotic data across sources like news and social media. Existing NLP approaches largely treat stock prediction as a classification or regression problem and are not optimized to make profitable investment decisions. Further, they do not model the temporal dynamics of large volumes of diversely influential text to which the market responds quickly. Building on these shortcomings, we propose a deep reinforcement learning approach that makes time-aware decisions to trade stocks while optimizing profit using textual data. Our method outperforms state-of-the-art in terms of risk-adjusted returns in trading simulations on two benchmarks: Tweets (English) and financial news (Chinese) pertaining to two major indexes and four global stock markets. Through extensive experiments and studies, we build the case for our method as a tool for quantitative trading.</abstract>
       <url hash="cf80d8f0">2021.naacl-main.316</url>
@@ -4294,7 +4294,7 @@
       <author><first>Mohammad</first><last>Kachuee</last></author>
       <author><first>Hao</first><last>Yuan</last></author>
       <author><first>Young-Bum</first><last>Kim</last></author>
-      <author><first>Sungjin</first><last>Lee</last></author>
+      <author id="sungjin-lee"><first>Sungjin</first><last>Lee</last></author>
       <pages>4053–4064</pages>
       <abstract>Turn-level user satisfaction is one of the most important performance metrics for conversational agents. It can be used to monitor the agent’s performance and provide insights about defective user experiences. While end-to-end deep learning has shown promising results, having access to a large number of reliable annotated samples required by these methods remains challenging. In a large-scale conversational system, there is a growing number of newly developed skills, making the traditional data collection, annotation, and modeling process impractical due to the required annotation costs and the turnaround times. In this paper, we suggest a self-supervised contrastive learning approach that leverages the pool of unlabeled data to learn user-agent interactions. We show that the pre-trained models using the self-supervised objective are transferable to the user satisfaction prediction. In addition, we propose a novel few-shot transfer learning approach that ensures better transferability for very small sample sizes. The suggested few-shot method does not require any inner loop optimization process and is scalable to very large datasets and complex models. Based on our experiments using real data from a large-scale commercial system, the suggested approach is able to significantly reduce the required number of annotations, while improving the generalization on unseen skills.</abstract>
       <url hash="a0c496dd">2021.naacl-main.319</url>
@@ -4317,7 +4317,7 @@
       <title>Grey-box Adversarial Attack And Defence For Sentiment Classification</title>
       <author><first>Ying</first><last>Xu</last></author>
       <author><first>Xu</first><last>Zhong</last></author>
-      <author><first>Antonio</first><last>Jimeno Yepes</last></author>
+      <author id="antonio-jimeno-yepes"><first>Antonio</first><last>Jimeno Yepes</last></author>
       <author><first>Jey Han</first><last>Lau</last></author>
       <pages>4078–4087</pages>
       <abstract>We introduce a grey-box adversarial attack and defence framework for sentiment classification. We address the issues of differentiability, label preservation and input reconstruction for adversarial attack and defence in one unified framework. Our results show that once trained, the attacking model is capable of generating high-quality adversarial examples substantially faster (one order of magnitude less in time) than state-of-the-art attacking methods. These examples also preserve the original sentiment according to human evaluation. Additionally, our framework produces an improved classifier that is robust in defending against multiple adversarial attacking methods. Code is available at: <url>https://github.com/ibm-aur-nlp/adv-def-text-dist</url>.</abstract>
@@ -4331,7 +4331,7 @@
       <title>How low is too low? A monolingual take on lemmatisation in <fixed-case>I</fixed-case>ndian languages</title>
       <author><first>Kumar</first><last>Saunack</last></author>
       <author><first>Kumar</first><last>Saurav</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>4088–4094</pages>
       <abstract>Lemmatization aims to reduce the sparse data problem by relating the inflected forms of a word to its dictionary form. Most prior work on ML based lemmatization has focused on high resource languages, where data sets (word forms) are readily available. For languages which have no linguistic work available, especially on morphology or in languages where the computational realization of linguistic rules is complex and cumbersome, machine learning based lemmatizers are the way togo. In this paper, we devote our attention to lemmatisation for low resource, morphologically rich scheduled Indian languages using neural methods. Here, low resource means only a small number of word forms are available. We perform tests to analyse the variance in monolingual models’ performance on varying the corpus size and contextual morphological tag data for training. We show that monolingual approaches with data augmentation can give competitive accuracy even in the low resource setting, which augurs well for NLP in low resource setting.</abstract>
       <url hash="a23d934d">2021.naacl-main.322</url>
@@ -4343,7 +4343,7 @@
       <title>Causal Effects of Linguistic Properties</title>
       <author><first>Reid</first><last>Pryzant</last></author>
       <author><first>Dallas</first><last>Card</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <author><first>Victor</first><last>Veitch</last></author>
       <author><first>Dhanya</first><last>Sridhar</last></author>
       <pages>4095–4109</pages>
@@ -4368,7 +4368,7 @@
       <author><first>Zhiyi</first><last>Ma</last></author>
       <author><first>Tristan</first><last>Thrush</last></author>
       <author><first>Sebastian</first><last>Riedel</last></author>
-      <author><first>Zeerak</first><last>Waseem</last></author>
+      <author id="zeerak-talat"><first>Zeerak</first><last>Waseem</last></author>
       <author><first>Pontus</first><last>Stenetorp</last></author>
       <author><first>Robin</first><last>Jia</last></author>
       <author><first>Mohit</first><last>Bansal</last></author>
@@ -4384,8 +4384,8 @@
     <paper id="325">
       <title>Translational <fixed-case>NLP</fixed-case>: A New Paradigm and General Principles for Natural Language Processing Research</title>
       <author><first>Denis</first><last>Newman-Griffis</last></author>
-      <author><first>Jill Fain</first><last>Lehman</last></author>
-      <author><first>Carolyn</first><last>Rosé</last></author>
+      <author id="jill-fain-lehman"><first>Jill Fain</first><last>Lehman</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rosé</last></author>
       <author><first>Harry</first><last>Hochheiser</last></author>
       <pages>4125–4138</pages>
       <abstract>Natural language processing (NLP) research combines the study of universal principles, through basic science, with applied science targeting specific use cases and settings. However, the process of exchange between basic NLP and applications is often assumed to emerge naturally, resulting in many innovations going unapplied and many important questions left unstudied. We describe a new paradigm of Translational NLP, which aims to structure and facilitate the processes by which basic and applied NLP research inform one another. Translational NLP thus presents a third research paradigm, focused on understanding the challenges posed by application needs and how these challenges can drive innovation in basic science and technology design. We show that many significant advances in NLP research have emerged from the intersection of basic principles with application needs, and present a conceptual framework outlining the stakeholders and key questions in translational research. Our framework provides a roadmap for developing Translational NLP as a dedicated research area, and identifies general translational principles to facilitate exchange between basic and applied research.</abstract>
@@ -4408,7 +4408,7 @@
     </paper>
     <paper id="327">
       <title>Probing for Bridging Inference in Transformer Language Models</title>
-      <author><first>Onkar</first><last>Pandit</last></author>
+      <author id="onkar-arun-pandit"><first>Onkar</first><last>Pandit</last></author>
       <author><first>Yufang</first><last>Hou</last></author>
       <pages>4153–4163</pages>
       <abstract>We probe pre-trained transformer language models for bridging inference. We first investigate individual attention heads in BERT and observe that attention heads at higher layers prominently focus on bridging relations in-comparison with the lower and middle layers, also, few specific attention heads concentrate consistently on bridging. More importantly, we consider language models as a whole in our second approach where bridging anaphora resolution is formulated as a masked token prediction task (Of-Cloze test). Our formulation produces optimistic results without any fine-tuning, which indicates that pre-trained language models substantially capture bridging inference. Our further investigation shows that the distance between anaphor-antecedent and the context provided to language models play an important role in the inference.</abstract>
@@ -4432,9 +4432,9 @@
     <paper id="329">
       <title>Stay Together: A System for Single and Split-antecedent Anaphora Resolution</title>
       <author><first>Juntao</first><last>Yu</last></author>
-      <author><first>Nafise Sadat</first><last>Moosavi</last></author>
+      <author id="nafise-sadat-moosavi"><first>Nafise Sadat</first><last>Moosavi</last></author>
       <author><first>Silviu</first><last>Paun</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>4174–4184</pages>
       <abstract>The state-of-the-art on basic, single-antecedent anaphora has greatly improved in recent years. Researchers have therefore started to pay more attention to more complex cases of anaphora such as split-antecedent anaphora, as in “Time-Warner is considering a legal challenge to Telecommunications Inc’s plan to buy half of Showtime Networks Inc–a move that could lead to all-out war between the two powerful companies”. Split-antecedent anaphora is rarer and more complex to resolve than single-antecedent anaphora; as a result, it is not annotated in many datasets designed to test coreference, and previous work on resolving this type of anaphora was carried out in unrealistic conditions that assume gold mentions and/or gold split-antecedent anaphors are available. These systems also focus on split-antecedent anaphors only. In this work, we introduce a system that resolves both single and split-antecedent anaphors, and evaluate it in a more realistic setting that uses predicted mentions. We also start addressing the question of how to evaluate single and split-antecedent anaphors together using standard coreference evaluation metrics.</abstract>
       <url hash="54e69f35">2021.naacl-main.329</url>
@@ -4468,7 +4468,7 @@
       <title>Multi-source Neural Topic Modeling in Multi-view Embedding Spaces</title>
       <author><first>Pankaj</first><last>Gupta</last></author>
       <author><first>Yatin</first><last>Chaudhary</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>4205–4217</pages>
       <abstract>Though word embeddings and topics are complementary representations, several past works have only used pretrained word embeddings in (neural) topic modeling to address data sparsity in short-text or small collection of documents. This work presents a novel neural topic modeling framework using multi-view embed ding spaces: (1) pretrained topic-embeddings, and (2) pretrained word-embeddings (context-insensitive from Glove and context-sensitive from BERT models) jointly from one or many sources to improve topic quality and better deal with polysemy. In doing so, we first build respective pools of pretrained topic (i.e., TopicPool) and word embeddings (i.e., WordPool). We then identify one or more relevant source domain(s) and transfer knowledge to guide meaningful learning in the sparse target domain. Within neural topic modeling, we quantify the quality of topics and document representations via generalization (perplexity), interpretability (topic coherence) and information retrieval (IR) using short-text, long-text, small and large document collections from news and medical domains. Introducing the multi-source multi-view embedding spaces, we have shown state-of-the-art neural topic modeling using 6 source (high-resource) and 5 target (low-resource) corpora.</abstract>
       <url hash="993d4961">2021.naacl-main.332</url>
@@ -4482,7 +4482,7 @@
       <author><first>Jimin</first><last>Huang</last></author>
       <author><first>Pan</first><last>Du</last></author>
       <author><first>Min</first><last>Peng</last></author>
-      <author><first>Jian-Yun</first><last>Nie</last></author>
+      <author id="jian-yun-nie"><first>Jian-Yun</first><last>Nie</last></author>
       <pages>4218–4227</pages>
       <abstract>Graph convolutional networks (GCNs) have been applied recently to text classification and produced an excellent performance. However, existing GCN-based methods do not assume an explicit latent semantic structure of documents, making learned representations less effective and difficult to interpret. They are also transductive in nature, thus cannot handle out-of-graph documents. To address these issues, we propose a novel model named inductive Topic Variational Graph Auto-Encoder (T-VGAE), which incorporates a topic model into variational graph-auto-encoder (VGAE) to capture the hidden semantic information between documents and words. T-VGAE inherits the interpretability of the topic model and the efficient information propagation mechanism of VGAE. It learns probabilistic representations of words and documents by jointly encoding and reconstructing the global word-level graph and bipartite graphs of documents, where each document is considered individually and decoupled from the global correlation graph so as to enable inductive learning. Our experiments on several benchmark datasets show that our method outperforms the existing competitive models on supervised and semi-supervised text classification, as well as unsupervised text representation learning. In addition, it has higher interpretability and is able to deal with unseen documents.</abstract>
       <url hash="657546ce">2021.naacl-main.333</url>
@@ -4549,7 +4549,7 @@
       <author><first>Shrimai</first><last>Prabhumoye</last></author>
       <author><first>Kazuma</first><last>Hashimoto</last></author>
       <author><first>Yingbo</first><last>Zhou</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <author><first>Ruslan</first><last>Salakhutdinov</last></author>
       <pages>4274–4287</pages>
       <abstract>Document grounded generation is the task of using the information provided in a document to improve text generation. This work focuses on two different document grounded generation tasks: Wikipedia Update Generation task and Dialogue response generation. Our work introduces two novel adaptations of large scale pre-trained encoder-decoder models focusing on building context driven representation of the document and enabling specific attention to the information in the document. Additionally, we provide a stronger BART baseline for these tasks. Our proposed techniques outperform existing methods on both automated (at least 48% increase in BLEU-4 points) and human evaluation for closeness to reference and relevance to the document. Furthermore, we perform comprehensive manual inspection of the generated output and categorize errors to provide insights into future directions in modeling these tasks.</abstract>
@@ -4591,7 +4591,7 @@
       <author><first>Bowen</first><last>Tan</last></author>
       <author><first>Zichao</first><last>Yang</last></author>
       <author><first>Maruan</first><last>Al-Shedivat</last></author>
-      <author><first>Eric</first><last>Xing</last></author>
+      <author id="eric-xing"><first>Eric</first><last>Xing</last></author>
       <author><first>Zhiting</first><last>Hu</last></author>
       <pages>4313–4324</pages>
       <abstract>Large-scale language models (LMs) pretrained on massive corpora of text, such as GPT-2, are powerful open-domain text generators. However, as our systematic examination reveals, it is still challenging for such models to generate coherent long passages of text (e.g., 1000 tokens), especially when the models are fine-tuned to the target domain on a small corpus. Previous planning-then-generation methods also fall short of producing such long text in various domains. To overcome the limitations, we propose a simple but effective method of generating text in a progressive manner, inspired by generating images from low to high resolution. Our method first produces domain-specific content keywords and then progressively refines them into complete passages in multiple stages. The simple design allows our approach to take advantage of pretrained LMs at each stage and effectively adapt to any target domain given only a small set of examples. We conduct a comprehensive empirical study with a broad set of evaluation metrics, and show that our approach significantly improves upon the fine-tuned large LMs and various planning-then-generation methods in terms of quality and sample efficiency. Human evaluation also validates that our model generations are more coherent.</abstract>
@@ -4616,7 +4616,7 @@
       <author><first>Sarik</first><last>Ghazarian</last></author>
       <author><first>Zixi</first><last>Liu</last></author>
       <author><first>Akash</first><last>S M</last></author>
-      <author><first>Ralph</first><last>Weischedel</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
       <author><first>Aram</first><last>Galstyan</last></author>
       <author><first>Nanyun</first><last>Peng</last></author>
       <pages>4334–4344</pages>
@@ -4728,7 +4728,7 @@
       <author><first>Sian</first><last>Gooding</last></author>
       <author><first>Ekaterina</first><last>Kochmar</last></author>
       <author><first>Seid Muhie</first><last>Yimam</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>4439–4449</pages>
       <abstract>Lexical complexity is a highly subjective notion, yet this factor is often neglected in lexical simplification and readability systems which use a ”one-size-fits-all” approach. In this paper, we investigate which aspects contribute to the notion of lexical complexity in various groups of readers, focusing on native and non-native speakers of English, and how the notion of complexity changes depending on the proficiency level of a non-native reader. To facilitate reproducibility of our approach and foster further research into these aspects, we release a dataset of complex words annotated by readers with different backgrounds.</abstract>
       <url hash="9b793814">2021.naacl-main.351</url>
@@ -4764,7 +4764,7 @@
     </paper>
     <paper id="354">
       <title>On Biasing Transformer Attention Towards Monotonicity</title>
-      <author><first>Annette</first><last>Rios</last></author>
+      <author id="annette-rios-gonzales"><first>Annette</first><last>Rios</last></author>
       <author><first>Chantal</first><last>Amrhein</last></author>
       <author><first>Noëmi</first><last>Aepli</last></author>
       <author><first>Rico</first><last>Sennrich</last></author>
@@ -4783,7 +4783,7 @@
       <author><first>Madeleine</first><last>van Zuylen</last></author>
       <author><first>Sravanthi</first><last>Parasa</last></author>
       <author><first>Eric</first><last>Horvitz</last></author>
-      <author><first>Daniel</first><last>Weld</last></author>
+      <author id="daniel-s-weld"><first>Daniel</first><last>Weld</last></author>
       <author><first>Roy</first><last>Schwartz</last></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last></author>
       <pages>4489–4503</pages>
@@ -4810,7 +4810,7 @@
       <author><first>Matthew</first><last>Matero</last></author>
       <author><first>Aravind Reddy</first><last>Ravula</last></author>
       <author><first>Huy</first><last>Vu</last></author>
-      <author><first>H. Andrew</first><last>Schwartz</last></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last></author>
       <pages>4515–4532</pages>
       <abstract>In human-level NLP tasks, such as predicting mental health, personality, or demographics, the number of observations is often smaller than the standard 768+ hidden state sizes of each layer within modern transformer-based language models, limiting the ability to effectively leverage transformers. Here, we provide a systematic study on the role of dimension reduction methods (principal components analysis, factorization techniques, or multi-layer auto-encoders) as well as the dimensionality of embedding vectors and sample sizes as a function of predictive performance. We first find that fine-tuning large models with a limited amount of data pose a significant difficulty which can be overcome with a pre-trained dimension reduction regime. RoBERTa consistently achieves top performance in human-level tasks, with PCA giving benefit over other reduction methods in better handling users that write longer texts. Finally, we observe that a majority of the tasks achieve results comparable to the best performance with just 1/12 of the embedding dimensions.</abstract>
       <url hash="c1b9bbd9">2021.naacl-main.357</url>
@@ -4824,7 +4824,7 @@
       <author><first>Evan</first><last>Sholle</last></author>
       <author><first>Ashley</first><last>Beecy</last></author>
       <author><first>Subhi</first><last>Al’Aref</last></author>
-      <author><first>Yifan</first><last>Peng</last></author>
+      <author id="yifan-peng-cmu"><first>Yifan</first><last>Peng</last></author>
       <pages>4533–4538</pages>
       <abstract>Utilizing clinical texts in survival analysis is difficult because they are largely unstructured. Current automatic extraction models fail to capture textual information comprehensively since their labels are limited in scope. Furthermore, they typically require a large amount of data and high-quality expert annotations for training. In this work, we present a novel method of using BERT-based hidden layer representations of clinical texts as covariates for proportional hazards models to predict patient survival outcomes. We show that hidden layers yield notably more accurate predictions than predefined features, outperforming the previous baseline model by 5.7% on average across C-index and time-dependent AUC. We make our work publicly available at <url>https://github.com/bionlplab/heart_failure_mortality</url>.</abstract>
       <url hash="39b07bce">2021.naacl-main.358</url>
@@ -4916,7 +4916,7 @@
       <author><first>Kyle</first><last>Lo</last></author>
       <author><first>Iz</first><last>Beltagy</last></author>
       <author><first>Arman</first><last>Cohan</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <author><first>Matt</first><last>Gardner</last></author>
       <pages>4599–4610</pages>
       <abstract>Readers of academic research papers often read with the goal of answering specific questions. Question Answering systems that can answer those questions can make consumption of the content much more efficient. However, building such tools requires data that reflect the difficulty of the task arising from complex reasoning about claims made in multiple parts of a paper. In contrast, existing information-seeking question answering datasets usually contain questions about generic factoid-type information. We therefore present Qasper, a dataset of 5049 questions over 1585 Natural Language Processing papers. Each question is written by an NLP practitioner who read only the title and abstract of the corresponding paper, and the question seeks information present in the full text. The questions are then answered by a separate set of NLP practitioners who also provide supporting evidence to answers. We find that existing models that do well on other QA tasks do not perform well on answering these questions, underperforming humans by at least 27 F1 points when answering them from entire papers, motivating further research in document-grounded, information-seeking QA, which our dataset is designed to facilitate.</abstract>
@@ -4927,12 +4927,12 @@
     </paper>
     <paper id="366">
       <title>Differentiable Open-Ended Commonsense Reasoning</title>
-      <author><first>Bill Yuchen</first><last>Lin</last></author>
+      <author id="bill-yuchen-lin"><first>Bill Yuchen</first><last>Lin</last></author>
       <author><first>Haitian</first><last>Sun</last></author>
       <author><first>Bhuwan</first><last>Dhingra</last></author>
       <author><first>Manzil</first><last>Zaheer</last></author>
       <author><first>Xiang</first><last>Ren</last></author>
-      <author><first>William</first><last>Cohen</last></author>
+      <author id="william-cohen"><first>William</first><last>Cohen</last></author>
       <pages>4611–4625</pages>
       <abstract>Current commonsense reasoning research focuses on developing models that use commonsense knowledge to answer multiple-choice questions. However, systems designed to answer multiple-choice questions may not be useful in applications that do not provide a small list of candidate answers to choose from. As a step towards making commonsense reasoning research more realistic, we propose to study open-ended commonsense reasoning (OpenCSR) — the task of answering a commonsense question without any pre-defined choices — using as a resource only a corpus of commonsense facts written in natural language. OpenCSR is challenging due to a large decision space, and because many questions require implicit multi-hop reasoning. As an approach to OpenCSR, we propose DrFact, an efficient Differentiable model for multi-hop Reasoning over knowledge Facts. To evaluate OpenCSR methods, we adapt several popular commonsense reasoning benchmarks, and collect multiple new answers for each test question via crowd-sourcing. Experiments show that DrFact outperforms strong baseline methods by a large margin.</abstract>
       <url hash="a3985c3c">2021.naacl-main.366</url>
@@ -4946,7 +4946,7 @@
       <author><first>Song</first><last>Feng</last></author>
       <author><first>Chulaka</first><last>Gunasekara</last></author>
       <author><first>Siva Sankalp</first><last>Patel</last></author>
-      <author><first>Sachindra</first><last>Joshi</last></author>
+      <author id="sachindra-joshi"><first>Sachindra</first><last>Joshi</last></author>
       <author><first>Luis</first><last>Lastras</last></author>
       <pages>4626–4634</pages>
       <abstract>Machine reading comprehension is a challenging task especially for querying documents with deep and interconnected contexts. Transformer-based methods have shown advanced performances on this task; however, most of them still treat documents as a flat sequence of tokens. This work proposes a new Transformer-based method that reads a document as tree slices. It contains two modules for identifying more relevant text passage and the best answer span respectively, which are not only jointly trained but also jointly consulted at inference time. Our evaluation results show that our proposed method outperforms several competitive baseline approaches on two datasets from varied domains.</abstract>
@@ -4960,7 +4960,7 @@
       <author><first>Chen</first><last>Zhao</last></author>
       <author><first>Chenyan</first><last>Xiong</last></author>
       <author><first>Jordan</first><last>Boyd-Graber</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <pages>4635–4641</pages>
       <abstract>Complex question answering often requires finding a reasoning chain that consists of multiple evidence pieces. Current approaches incorporate the strengths of structured knowledge and unstructured text, assuming text corpora is semi-structured. Building on dense retrieval methods, we propose a new multi-step retrieval approach (BeamDR) that iteratively forms an evidence chain through beam search in dense representations. When evaluated on multi-hop question answering, BeamDR is competitive to state-of-the-art systems, without using any semi-structured information. Through query composition in dense space, BeamDR captures the implicit relationships between evidence in the reasoning chain. The code is available at <url>https://github.com/henryzhao5852/BeamDR</url>.</abstract>
       <url hash="7eb4993c">2021.naacl-main.368</url>
@@ -4982,7 +4982,7 @@
     </paper>
     <paper id="370">
       <title>Scalar Adjective Identification and Multilingual Ranking</title>
-      <author><first>Aina</first><last>Garí Soler</last></author>
+      <author id="aina-gari-soler"><first>Aina</first><last>Garí Soler</last></author>
       <author><first>Marianna</first><last>Apidianaki</last></author>
       <pages>4653–4660</pages>
       <abstract>The intensity relationship that holds between scalar adjectives (e.g., nice &lt; great &lt; wonderful) is highly relevant for natural language inference and common-sense reasoning. Previous research on scalar adjective ranking has focused on English, mainly due to the availability of datasets for evaluation. We introduce a new multilingual dataset in order to promote research on scalar adjectives in new languages. We perform a series of experiments and set performance baselines on this dataset, using monolingual and multilingual contextual language models. Additionally, we introduce a new binary classification task for English scalar adjective identification which examines the models’ ability to distinguish scalar from relational adjectives. We probe contextualised representations and report baseline results for future comparison on this task.</abstract>
@@ -5044,7 +5044,7 @@
       <author><first>Xuebin</first><last>Qin</last></author>
       <author><first>Nawshad</first><last>Farruque</last></author>
       <author><first>Lili</first><last>Mou</last></author>
-      <author><first>Osmar</first><last>Zaïane</last></author>
+      <author id="osmar-r-zaiane"><first>Osmar</first><last>Zaïane</last></author>
       <pages>4717–4724</pages>
       <abstract>Multi-label emotion classification is an important task in NLP and is essential to many applications. In this work, we propose a sequence-to-emotion (Seq2Emo) approach, which implicitly models emotion correlations in a bi-directional decoder. Experiments on SemEval’18 and GoEmotions datasets show that our approach outperforms state-of-the-art methods (without using external data). In particular, Seq2Emo outperforms the binary relevance (BR) and classifier chain (CC) approaches in a fair setting.</abstract>
       <url hash="f0e16946">2021.naacl-main.375</url>
@@ -5091,7 +5091,7 @@
       <title>Adversarial Learning for Zero-Shot Stance Detection on Social Media</title>
       <author><first>Emily</first><last>Allaway</last></author>
       <author><first>Malavika</first><last>Srikanth</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>4756–4767</pages>
       <abstract>Stance detection on social media can help to identify and understand slanted news or commentary in everyday life. In this work, we propose a new model for zero-shot stance detection on Twitter that uses adversarial learning to generalize across topics. Our model achieves state-of-the-art performance on a number of unseen test topics with minimal computational costs. In addition, we extend zero-shot stance detection to topics not previously considered, highlighting future directions for zero-shot transfer.</abstract>
       <url hash="a1d5a465">2021.naacl-main.379</url>
@@ -5138,7 +5138,7 @@
       <author><first>Emily</first><last>Alsentzer</last></author>
       <author><first>Mert</first><last>Ketenci</last></author>
       <author><first>Jason</first><last>Zucker</last></author>
-      <author><first>Noémie</first><last>Elhadad</last></author>
+      <author id="noemie-elhadad"><first>Noémie</first><last>Elhadad</last></author>
       <pages>4794–4811</pages>
       <abstract>Summarization of clinical narratives is a long-standing research problem. Here, we introduce the task of hospital-course summarization. Given the documentation authored throughout a patient’s hospitalization, generate a paragraph that tells the story of the patient admission. We construct an English, text-to-text dataset of 109,000 hospitalizations (2M source notes) and their corresponding summary proxy: the clinician-authored “Brief Hospital Course” paragraph written as part of a discharge note. Exploratory analyses reveal that the BHC paragraphs are highly abstractive with some long extracted fragments; are concise yet comprehensive; differ in style and content organization from the source notes; exhibit minimal lexical cohesion; and represent silver-standard references. Our analysis identifies multiple implications for modeling this complex, multi-document summarization task.</abstract>
       <url hash="9a4381c2">2021.naacl-main.382</url>
@@ -5174,7 +5174,7 @@
     </paper>
     <paper id="385">
       <title>What Will it Take to Fix Benchmarking in Natural Language Understanding?</title>
-      <author><first>Samuel R.</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel R.</first><last>Bowman</last></author>
       <author><first>George</first><last>Dahl</last></author>
       <pages>4843–4855</pages>
       <abstract>Evaluation for many natural language understanding (NLU) tasks is broken: Unreliable and biased systems score so highly on standard benchmarks that there is little room for researchers who develop better systems to demonstrate their improvements. The recent trend to abandon IID benchmarks in favor of adversarially-constructed, out-of-distribution test sets ensures that current models will perform poorly, but ultimately only obscures the abilities that we want our benchmarks to measure. In this position paper, we lay out four criteria that we argue NLU benchmarks should meet. We argue most current benchmarks fail at these criteria, and that adversarial data collection does not meaningfully address the causes of these failures. Instead, restoring a healthy evaluation ecosystem will require significant progress in the design of benchmark datasets, the reliability with which they are annotated, their size, and the ways they handle social bias.</abstract>
@@ -5203,8 +5203,8 @@
       <author><first>Ramit</first><last>Sawhney</last></author>
       <author><first>Puneet</first><last>Mathur</last></author>
       <author><first>Taru</first><last>Jain</last></author>
-      <author><first>Akash Kumar</first><last>Gautam</last></author>
-      <author><first>Rajiv Ratn</first><last>Shah</last></author>
+      <author id="akash-kumar-gautam"><first>Akash Kumar</first><last>Gautam</last></author>
+      <author id="rajiv-shah"><first>Rajiv Ratn</first><last>Shah</last></author>
       <pages>4881–4892</pages>
       <abstract>The #MeToo movement on social media platforms initiated discussions over several facets of sexual harassment in our society. Prior work by the NLP community for automated identification of the narratives related to sexual abuse disclosures barely explored this social phenomenon as an independent task. However, emotional attributes associated with textual conversations related to the #MeToo social movement are complexly intertwined with such narratives. We formulate the task of identifying narratives related to the sexual abuse disclosures in online posts as a joint modeling task that leverages their emotional attributes through multitask learning. Our results demonstrate that positive knowledge transfer via context-specific shared representations of a flexible cross-stitched parameter sharing model helps establish the inherent benefit of jointly modeling tasks related to sexual abuse disclosures with emotion classification from the text in homogeneous and heterogeneous settings. We show how for more domain-specific tasks related to sexual abuse disclosures such as sarcasm identification and dialogue act (refutation, justification, allegation) classification, homogeneous multitask learning is helpful, whereas for more general tasks such as stance and hate speech detection, heterogeneous multitask learning with emotion classification works better.</abstract>
       <url hash="d35db9e4">2021.naacl-main.387</url>
@@ -5241,7 +5241,7 @@
     <paper id="390">
       <title>Identifying inherent disagreement in natural language inference</title>
       <author><first>Xinliang Frederick</first><last>Zhang</last></author>
-      <author><first>Marie-Catherine</first><last>de Marneffe</last></author>
+      <author id="marie-catherine-de-marneffe"><first>Marie-Catherine</first><last>de Marneffe</last></author>
       <pages>4908–4915</pages>
       <abstract>Natural language inference (NLI) is the task of determining whether a piece of text is entailed, contradicted by or unrelated to another piece of text. In this paper, we investigate how to tease systematic inferences (i.e., items for which people agree on the NLI label) apart from disagreement items (i.e., items which lead to different annotations), which most prior work has overlooked. To distinguish systematic inferences from disagreement items, we propose Artificial Annotators (AAs) to simulate the uncertainty in the annotation process by capturing the modes in annotations. Results on the CommitmentBank, a corpus of naturally occurring discourses in English, confirm that our approach performs statistically significantly better than all baselines. We further show that AAs learn linguistic patterns and context-dependent reasoning.</abstract>
       <url hash="7ddaf53b">2021.naacl-main.390</url>
@@ -5252,7 +5252,7 @@
     <paper id="391">
       <title>Modeling Human Mental States with an Entity-based Narrative Graph</title>
       <author><first>I-Ta</first><last>Lee</last></author>
-      <author><first>Maria Leonor</first><last>Pacheco</last></author>
+      <author id="maria-leonor-pacheco"><first>Maria Leonor</first><last>Pacheco</last></author>
       <author><first>Dan</first><last>Goldwasser</last></author>
       <pages>4916–4926</pages>
       <abstract>Understanding narrative text requires capturing characters’ motivations, goals, and mental states. This paper proposes an Entity-based Narrative Graph (ENG) to model the internal- states of characters in a story. We explicitly model entities, their interactions and the context in which they appear, and learn rich representations for them. We experiment with different task-adaptive pre-training objectives, in-domain training, and symbolic inference to capture dependencies between different decisions in the output space. We evaluate our model on two narrative understanding tasks: predicting character mental states, and desire fulfillment, and conduct a qualitative analysis.</abstract>
@@ -5264,7 +5264,7 @@
     <paper id="392">
       <title>A Simple and Efficient Multi-Task Learning Approach for Conditioned Dialogue Generation</title>
       <author><first>Yan</first><last>Zeng</last></author>
-      <author><first>Jian-Yun</first><last>Nie</last></author>
+      <author id="jian-yun-nie"><first>Jian-Yun</first><last>Nie</last></author>
       <pages>4927–4939</pages>
       <abstract>Conditioned dialogue generation suffers from the scarcity of labeled responses. In this work, we exploit labeled non-dialogue text data related to the condition, which are much easier to collect. We propose a multi-task learning approach to leverage both labeled dialogue and text data. The 3 tasks jointly optimize the same pre-trained Transformer – conditioned dialogue generation task on the labeled dialogue data, conditioned language encoding task and conditioned language generation task on the labeled text data. Experimental results show that our approach outperforms the state-of-the-art models by leveraging the labeled texts, and it also obtains larger improvement in performance comparing to the previous methods to leverage text data.</abstract>
       <url hash="ff9b00c5">2021.naacl-main.392</url>
@@ -5300,8 +5300,8 @@
     <paper id="395">
       <title>Paragraph-level Simplification of Medical Texts</title>
       <author><first>Ashwin</first><last>Devaraj</last></author>
-      <author><first>Iain</first><last>Marshall</last></author>
-      <author><first>Byron</first><last>Wallace</last></author>
+      <author id="iain-marshall"><first>Iain</first><last>Marshall</last></author>
+      <author id="byron-c-wallace"><first>Byron</first><last>Wallace</last></author>
       <author><first>Junyi Jessy</first><last>Li</last></author>
       <pages>4972–4984</pages>
       <abstract>We consider the problem of learning to simplify medical texts. This is important because most reliable, up-to-date information in biomedicine is dense with jargon and thus practically inaccessible to the lay audience. Furthermore, manual simplification does not scale to the rapidly growing body of biomedical literature, motivating the need for automated approaches. Unfortunately, there are no large-scale resources available for this task. In this work we introduce a new corpus of parallel texts in English comprising technical and lay summaries of all published evidence pertaining to different clinical topics. We then propose a new metric based on likelihood scores from a masked language model pretrained on scientific texts. We show that this automated measure better differentiates between technical and lay summaries than existing heuristics. We introduce and evaluate baseline encoder-decoder Transformer models for simplification and propose a novel augmentation to these in which we explicitly penalize the decoder for producing “jargon” terms; we find that this yields improvements over baselines in terms of readability.</abstract>
@@ -5369,7 +5369,7 @@
       <author><first>Liqun</first><last>Chen</last></author>
       <author><first>Chris</first><last>Brockett</last></author>
       <author><first>Ming-Ting</first><last>Sun</last></author>
-      <author><first>Bill</first><last>Dolan</last></author>
+      <author id="william-b-dolan"><first>Bill</first><last>Dolan</last></author>
       <pages>5053–5069</pages>
       <abstract>Adversarial examples expose the vulnerabilities of natural language processing (NLP) models, and can be used to evaluate and improve their robustness. Existing techniques of generating such examples are typically driven by local heuristic rules that are agnostic to the context, often resulting in unnatural and ungrammatical outputs. This paper presents CLARE, a ContextuaLized AdversaRial Example generation model that produces fluent and grammatical outputs through a mask-then-infill procedure. CLARE builds on a pre-trained masked language model and modifies the inputs in a context-aware manner. We propose three contextualized perturbations, Replace, Insert and Merge, that allow for generating outputs of varied lengths. CLARE can flexibly combine these perturbations and apply them at any position in the inputs, and is thus able to attack the victim model more effectively with fewer edits. Extensive experiments and human evaluation demonstrate that CLARE outperforms the baselines in terms of attack success rate, textual similarity, fluency and grammaticality.</abstract>
       <url hash="5aa24760">2021.naacl-main.400</url>
@@ -5392,7 +5392,7 @@
     </paper>
     <paper id="402">
       <title>Evaluating the Values of Sources in Transfer Learning</title>
-      <author><first>Md Rizwan</first><last>Parvez</last></author>
+      <author id="md-rizwan-parvez"><first>Md Rizwan</first><last>Parvez</last></author>
       <author><first>Kai-Wei</first><last>Chang</last></author>
       <pages>5084–5116</pages>
       <abstract>Transfer learning that adapts a model trained on data-rich sources to low-resource targets has been widely applied in natural language processing (NLP). However, when training a transfer model over multiple sources, not every source is equally useful for the target. To better transfer a model, it is essential to understand the values of the sources. In this paper, we develop , an efficient source valuation framework for quantifying the usefulness of the sources (e.g., ) in transfer learning based on the Shapley value method. Experiments and comprehensive analyses on both cross-domain and cross-lingual transfers demonstrate that our framework is not only effective in choosing useful transfer sources but also the source values match the intuitive source-target similarity.</abstract>
@@ -5416,7 +5416,7 @@
     <paper id="404">
       <title>On the Inductive Bias of Masked Language Modeling: From Statistical to Syntactic Dependencies</title>
       <author><first>Tianyi</first><last>Zhang</last></author>
-      <author><first>Tatsunori B.</first><last>Hashimoto</last></author>
+      <author id="tatsunori-b-hashimoto"><first>Tatsunori B.</first><last>Hashimoto</last></author>
       <pages>5131–5146</pages>
       <abstract>We study how masking and predicting tokens in an unsupervised fashion can give rise to linguistic structures and downstream performance gains. Recent theories have suggested that pretrained language models acquire useful inductive biases through masks that implicitly act as cloze reductions for downstream tasks. While appealing, we show that the success of the random masking strategy used in practice cannot be explained by such cloze-like masks alone. We construct cloze-like masks using task-specific lexicons for three different classification datasets and show that the majority of pretrained performance gains come from generic masks that are not associated with the lexicon. To explain the empirical success of these generic masks, we demonstrate a correspondence between the Masked Language Model (MLM) objective and existing methods for learning statistical dependencies in graphical models. Using this, we derive a method for extracting these learned statistical dependencies in MLMs and show that these dependencies encode useful inductive biases in the form of syntactic structures. In an unsupervised parsing evaluation, simply forming a minimum spanning tree on the implied statistical dependence structure outperforms a classic method for unsupervised parsing (58.74 vs. 55.91 UUAS).</abstract>
       <url hash="e38eece1">2021.naacl-main.404</url>
@@ -5429,8 +5429,8 @@
       <author><first>Chu-Cheng</first><last>Lin</last></author>
       <author><first>Aaron</first><last>Jaech</last></author>
       <author><first>Xin</first><last>Li</last></author>
-      <author><first>Matthew R.</first><last>Gormley</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="matthew-r-gormley"><first>Matthew R.</first><last>Gormley</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>5147–5173</pages>
       <abstract>Standard autoregressive language models perform only polynomial-time computation to compute the probability of the next symbol. While this is attractive, it means they cannot model distributions whose next-symbol probability is <i>hard</i> to compute. Indeed, they cannot even model them well enough to solve associated <i>easy</i> decision problems for which an engineer might want to consult a language model. These limitations apply no matter how much computation and data are used to train the model, unless the model is given access to oracle parameters that grow <i>superpolynomially</i> in sequence length. Thus, simply training larger autoregressive language models is not a panacea for NLP. Alternatives include energy-based models (which give up efficient sampling) and latent-variable autoregressive models (which give up efficient scoring of a given string). Both are powerful enough to escape the above limitations.</abstract>
       <url hash="49e12cc6">2021.naacl-main.405</url>
@@ -5497,7 +5497,7 @@
     <paper id="410">
       <title>Learning How to Ask: Querying <fixed-case>LM</fixed-case>s with Mixtures of Soft Prompts</title>
       <author><first>Guanghui</first><last>Qin</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>5203–5212</pages>
       <abstract>Natural-language prompts have recently been used to coax pretrained language models into performing other AI tasks, using a fill-in-the-blank paradigm (Petroni et al., 2019) or a few-shot extrapolation paradigm (Brown et al., 2020). For example, language models retain factual knowledge from their training corpora that can be extracted by asking them to “fill in the blank” in a sentential prompt. However, where does this prompt come from? We explore the idea of learning prompts by gradient descent—either fine-tuning prompts taken from previous work, or starting from random initialization. Our prompts consist of “soft words,” i.e., continuous vectors that are not necessarily word type embeddings from the language model. Furthermore, for each task, we optimize a mixture of prompts, learning which prompts are most effective and how to ensemble them. Across multiple English LMs and tasks, our approach hugely outperforms previous methods, showing that the implicit factual knowledge in language models was previously underestimated. Moreover, this knowledge is cheap to elicit: random initialization is nearly as good as informed initialization.</abstract>
       <url hash="eb8f3e6e">2021.naacl-main.410</url>
@@ -5525,7 +5525,7 @@
     </paper>
     <paper id="412">
       <title><fixed-case>AVA</fixed-case>: an Automatic e<fixed-case>V</fixed-case>aluation Approach for Question Answering Systems</title>
-      <author><first>Thuy</first><last>Vu</last></author>
+      <author id="thuy-vu"><first>Thuy</first><last>Vu</last></author>
       <author><first>Alessandro</first><last>Moschitti</last></author>
       <pages>5223–5233</pages>
       <abstract>We introduce AVA, an automatic evaluation approach for Question Answering, which given a set of questions associated with Gold Standard answers (references), can estimate system Accuracy. AVA uses Transformer-based language models to encode question, answer, and reference texts. This allows for effectively assessing answer correctness using similarity between the reference and an automatic answer, biased towards the question semantics. To design, train, and test AVA, we built multiple large training, development, and test sets on public and industrial benchmarks. Our innovative solutions achieve up to 74.7% F1 score in predicting human judgment for single answers. Additionally, AVA can be used to evaluate the overall system Accuracy with an error lower than 7% at 95% of confidence when measured on several QA systems.</abstract>
@@ -5558,7 +5558,7 @@
       <author><first>Chris</first><last>Brockett</last></author>
       <author><first>Chris</first><last>Quirk</last></author>
       <author><first>Jianfeng</first><last>Gao</last></author>
-      <author><first>Bill</first><last>Dolan</last></author>
+      <author id="william-b-dolan"><first>Bill</first><last>Dolan</last></author>
       <pages>5259–5274</pages>
       <abstract>A prevailing paradigm in neural text generation is one-shot generation, where text is produced in a single step. The one-shot setting is inadequate, however, when the constraints the user wishes to impose on the generated text are dynamic, especially when authoring longer documents. We address this limitation with an interactive text generation setting in which the user interacts with the system by issuing commands to edit existing text. To this end, we propose a novel text editing task, and introduce WikiDocEdits, a dataset of single-sentence edits crawled from Wikipedia. We show that our Interactive Editor, a transformer-based model trained on this dataset, outperforms baselines and obtains positive results in both automatic and human evaluations. We present empirical and qualitative analyses of this model’s performance.</abstract>
       <url hash="5ea5f670">2021.naacl-main.414</url>
@@ -5586,7 +5586,7 @@
       <author><first>Yuhao</first><last>Zhang</last></author>
       <author><first>Emily</first><last>Tsai</last></author>
       <author><first>Curtis</first><last>Langlotz</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <pages>5288–5304</pages>
       <abstract>Neural image-to-text radiology report generation systems offer the potential to improve radiology reporting by reducing the repetitive process of report drafting and identifying possible medical errors. However, existing report generation systems, despite achieving high performances on natural language generation metrics such as CIDEr or BLEU, still suffer from incomplete and inconsistent generations. Here we introduce two new simple rewards to encourage the generation of factually complete and consistent radiology reports: one that encourages the system to generate radiology domain entities consistent with the reference, and one that uses natural language inference to encourage these entities to be described in inferentially consistent ways. We combine these with the novel use of an existing semantic equivalence metric (BERTScore). We further propose a report generation system that optimizes these rewards via reinforcement learning. On two open radiology report datasets, our system substantially improved the F1 score of a clinical information extraction performance by +22.1 (Delta +63.9%). We further show via a human evaluation and a qualitative analysis that our system leads to generations that are more factually complete and consistent compared to the baselines.</abstract>
       <url hash="e4e72a3e">2021.naacl-main.416</url>
@@ -5645,7 +5645,7 @@
       <author><first>Haoxuan</first><last>You</last></author>
       <author><first>Zhecan</first><last>Wang</last></author>
       <author><first>Alireza</first><last>Zareian</last></author>
-      <author><first>Shih-Fu</first><last>Chang</last></author>
+      <author id="shih-fu-chang"><first>Shih-Fu</first><last>Chang</last></author>
       <author><first>Kai-Wei</first><last>Chang</last></author>
       <pages>5339–5350</pages>
       <abstract>Pre-trained contextual vision-and-language (V&amp;L) models have achieved impressive performance on various benchmarks. However, existing models require a large amount of parallel image-caption data for pre-training. Such data are costly to collect and require cumbersome curation. Inspired by unsupervised machine translation, we investigate if a strong V&amp;L representation model can be learned through unsupervised pre-training without image-caption corpora. In particular, we propose to conduct “mask-and-predict” pre-training on text-only and image-only corpora and introduce the object tags detected by an object recognition model as anchor points to bridge two modalities. We find that such a simple approach achieves performance close to a model pre-trained with aligned data, on four English V&amp;L benchmarks. Our work challenges the widely held notion that aligned data is necessary for V&amp;L pre-training, while significantly reducing the amount of supervision needed for V&amp;L models.</abstract>
@@ -5722,7 +5722,7 @@
     <paper id="426">
       <title>Self-training Improves Pre-training for Natural Language Understanding</title>
       <author><first>Jingfei</first><last>Du</last></author>
-      <author><first>Edouard</first><last>Grave</last></author>
+      <author id="edouard-grave"><first>Edouard</first><last>Grave</last></author>
       <author><first>Beliz</first><last>Gunel</last></author>
       <author><first>Vishrav</first><last>Chaudhary</last></author>
       <author><first>Onur</first><last>Celebi</last></author>
@@ -5742,7 +5742,7 @@
       <author><first>Xiaokai</first><last>Wei</last></author>
       <author><first>Shang-Wen</first><last>Li</last></author>
       <author><first>Henghui</first><last>Zhu</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <author><first>Ramesh</first><last>Nallapati</last></author>
       <author><first>Andrew O.</first><last>Arnold</last></author>
       <author><first>Bing</first><last>Xiang</last></author>
@@ -5774,7 +5774,7 @@
       <author><first>Xiaoyuan</first><last>Yi</last></author>
       <author><first>Maosong</first><last>Sun</last></author>
       <author><first>Liner</first><last>Yang</last></author>
-      <author><first>Tat-Seng</first><last>Chua</last></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last></author>
       <pages>5441–5452</pages>
       <abstract>Grammatical Error Correction (GEC) aims to correct writing errors and help language learners improve their writing skills. However, existing GEC models tend to produce spurious corrections or fail to detect lots of errors. The quality estimation model is necessary to ensure learners get accurate GEC results and avoid misleading from poorly corrected sentences. Well-trained GEC models can generate several high-quality hypotheses through decoding, such as beam search, which provide valuable GEC evidence and can be used to evaluate GEC quality. However, existing models neglect the possible GEC evidence from different hypotheses. This paper presents the Neural Verification Network (VERNet) for GEC quality estimation with multiple hypotheses. VERNet establishes interactions among hypotheses with a reasoning graph and conducts two kinds of attention mechanisms to propagate GEC evidence to verify the quality of generated hypotheses. Our experiments on four GEC datasets show that VERNet achieves state-of-the-art grammatical error detection performance, achieves the best quality estimation results, and significantly improves GEC performance by reranking hypotheses. All data and source codes are available at <url>https://github.com/thunlp/VERNet</url>.</abstract>
       <url hash="4b3cb79b">2021.naacl-main.429</url>
@@ -5802,7 +5802,7 @@
       <author><first>Zhongyu</first><last>Wei</last></author>
       <author><first>Jing</first><last>Li</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>5467–5478</pages>
       <abstract>In this paper, we focus on identifying interactive argument pairs from two posts with opposite stances to a certain topic. Considering opinions are exchanged from different perspectives of the discussing topic, we study the discrete representations for arguments to capture varying aspects in argumentation languages (e.g., the debate focus and the participant behavior). Moreover, we utilize hierarchical structure to model post-wise information incorporating contextual knowledge. Experimental results on the large-scale dataset collected from CMV show that our proposed framework can significantly outperform the competitive baselines. Further analyses reveal why our model yields superior performance and prove the usefulness of our learned representations.</abstract>
       <url hash="ee0713b6">2021.naacl-main.431</url>
@@ -5813,11 +5813,11 @@
     <paper id="432">
       <title>On Unifying Misinformation Detection</title>
       <author><first>Nayeon</first><last>Lee</last></author>
-      <author><first>Belinda Z.</first><last>Li</last></author>
+      <author id="belinda-z-li"><first>Belinda Z.</first><last>Li</last></author>
       <author><first>Sinong</first><last>Wang</last></author>
       <author><first>Pascale</first><last>Fung</last></author>
       <author><first>Hao</first><last>Ma</last></author>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <author><first>Madian</first><last>Khabsa</last></author>
       <pages>5479–5485</pages>
       <abstract>In this paper, we introduce UnifiedM2, a general-purpose misinformation model that jointly models multiple domains of misinformation with a single, unified setup. The model is trained to handle four tasks: detecting news bias, clickbait, fake news, and verifying rumors. By grouping these tasks together, UnifiedM2 learns a richer representation of misinformation, which leads to state-of-the-art or comparable performance across all tasks. Furthermore, we demonstrate that UnifiedM2’s learned representation is helpful for few-shot learning of unseen misinformation tasks/datasets and the model’s generalizability to unseen events.</abstract>
@@ -5854,8 +5854,8 @@
     </paper>
     <paper id="435">
       <title>Do <fixed-case>RNN</fixed-case> States Encode Abstract Phonological Alternations?</title>
-      <author><first>Miikka</first><last>Silfverberg</last></author>
-      <author><first>Francis</first><last>Tyers</last></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last></author>
       <author><first>Garrett</first><last>Nicolai</last></author>
       <author><first>Mans</first><last>Hulden</last></author>
       <pages>5501–5513</pages>
@@ -5887,8 +5887,8 @@
       <author><first>Lei</first><last>Li</last></author>
       <author><first>Tianyu</first><last>Liu</last></author>
       <author><first>Zhifang</first><last>Sui</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <pages>5524–5531</pages>
       <abstract>In this paper, we tackle the task of Definition Generation (DG) in Chinese, which aims at automatically generating a definition for a word. Most existing methods take the source word as an indecomposable semantic unit. However, in parataxis languages like Chinese, word meanings can be composed using the word formation process, where a word (“桃花”, peach-blossom) is formed by formation components (“桃”, peach; “花”, flower) using a formation rule (Modifier-Head). Inspired by this process, we propose to enhance DG with word formation features. We build a formation-informed dataset, and propose a model DeFT, which Decomposes words into formation features, dynamically Fuses different features through a gating mechanism, and generaTes word definitions. Experimental results show that our method is both effective and robust.</abstract>
       <url hash="e5413c3d">2021.naacl-main.437</url>
@@ -5901,7 +5901,7 @@
       <author><first>Shohei</first><last>Higashiyama</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
       <author><first>Taro</first><last>Watanabe</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>5532–5541</pages>
       <abstract>Morphological analysis (MA) and lexical normalization (LN) are both important tasks for Japanese user-generated text (UGT). To evaluate and compare different MA/LN systems, we have constructed a publicly available Japanese UGT corpus. Our corpus comprises 929 sentences annotated with morphological and normalization information, along with category information we classified for frequent UGT-specific phenomena. Experiments on the corpus demonstrated the low performance of existing MA/LN methods for non-general words and non-standard forms, indicating that the corpus would be a challenging benchmark for further research on UGT.</abstract>
       <url hash="f1141938">2021.naacl-main.438</url>
@@ -5929,7 +5929,7 @@
       <author><first>Yiyang</first><last>Hou</last></author>
       <author><first>Yajie</first><last>Ye</last></author>
       <author><first>Li</first><last>Liang</last></author>
-      <author><first>Weiwei</first><last>Sun</last></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last></author>
       <pages>5554–5566</pages>
       <abstract>Universal Semantic Tagging aims to provide lightweight unified analysis for all languages at the word level. Though the proposed annotation scheme is conceptually promising, the feasibility is only examined in four Indo–European languages. This paper is concerned with extending the annotation scheme to handle Mandarin Chinese and empirically study the plausibility of unifying meaning representations for multiple languages. We discuss a set of language-specific semantic phenomena, propose new annotation specifications and build a richly annotated corpus. The corpus consists of 1100 English–Chinese parallel sentences, where compositional semantic analysis is available for English, and another 1000 Chinese sentences which has enriched syntactic analysis. By means of the new annotations, we also evaluate a series of neural tagging models to gauge how successful semantic tagging can be: accuracies of 92.7% and 94.6% are obtained for Chinese and English respectively. The English tagging performance is remarkably better than the state-of-the-art by 7.7%.</abstract>
       <url hash="63148334">2021.naacl-main.440</url>
@@ -5969,8 +5969,8 @@
       <title><fixed-case>AMR</fixed-case> Parsing with Action-Pointer Transformer</title>
       <author><first>Jiawei</first><last>Zhou</last></author>
       <author><first>Tahira</first><last>Naseem</last></author>
-      <author><first>Ramón</first><last>Fernandez Astudillo</last></author>
-      <author><first>Radu</first><last>Florian</last></author>
+      <author id="ramon-fernandez-astudillo"><first>Ramón</first><last>Fernandez Astudillo</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
       <pages>5585–5598</pages>
       <abstract>Abstract Meaning Representation parsing is a sentence-to-graph prediction task where target nodes are not explicitly aligned to sentence tokens. However, since graph nodes are semantically based on one or more sentence tokens, implicit alignments can be derived. Transition-based parsers operate over the sentence from left to right, capturing this inductive bias via alignments at the cost of limited expressiveness. In this work, we propose a transition-based system that combines hard-attention over sentences with a target-side action pointer mechanism to decouple source tokens from node representations and address alignments. We model the transitions as well as the pointer mechanism through straightforward modifications within a single Transformer architecture. Parser state and graph structure information are efficiently encoded using attention heads. We show that our action-pointer approach leads to increased expressiveness and attains large gains (+1.6 points) against the best transition-based AMR parser in very similar conditions. While using no graph re-categorization, our single model yields the second best Smatch score on AMR 2.0 (81.8), which is further improved to 83.4 with silver data and ensemble decoding.</abstract>
       <url hash="6f18c4ee">2021.naacl-main.443</url>
@@ -5981,11 +5981,11 @@
     <paper id="444">
       <title><fixed-case>NL</fixed-case>-<fixed-case>EDIT</fixed-case>: Correcting Semantic Parse Errors through Natural Language Interaction</title>
       <author><first>Ahmed</first><last>Elgohary</last></author>
-      <author><first>Christopher</first><last>Meek</last></author>
+      <author id="christopher-meek"><first>Christopher</first><last>Meek</last></author>
       <author><first>Matthew</first><last>Richardson</last></author>
       <author><first>Adam</first><last>Fourney</last></author>
       <author><first>Gonzalo</first><last>Ramos</last></author>
-      <author><first>Ahmed Hassan</first><last>Awadallah</last></author>
+      <author id="ahmed-hassan"><first>Ahmed Hassan</first><last>Awadallah</last></author>
       <pages>5599–5610</pages>
       <abstract>We study semantic parsing in an interactive setting in which users correct errors with natural language feedback. We present NL-EDIT, a model for interpreting natural language feedback in the interaction context to generate a sequence of edits that can be applied to the initial parse to correct its errors. We show that NL-EDIT can boost the accuracy of existing text-to-SQL parsers by up to 20% with only one turn of correction. We analyze the limitations of the model and discuss directions for improvement and evaluation. The code and datasets used in this paper are publicly available at <url>http://aka.ms/NLEdit</url>.</abstract>
       <url hash="623a4ff3">2021.naacl-main.444</url>
@@ -6042,7 +6042,7 @@
       <author><first>Zhaojiang</first><last>Lin</last></author>
       <author><first>Bing</first><last>Liu</last></author>
       <author><first>Seungwhan</first><last>Moon</last></author>
-      <author><first>Paul</first><last>Crook</last></author>
+      <author id="paul-a-crook"><first>Paul</first><last>Crook</last></author>
       <author><first>Zhenpeng</first><last>Zhou</last></author>
       <author><first>Zhiguang</first><last>Wang</last></author>
       <author><first>Zhou</first><last>Yu</last></author>
@@ -6161,7 +6161,7 @@
       <author><first>Tulika</first><last>Saha</last></author>
       <author><first>Apoorva</first><last>Upadhyaya</last></author>
       <author><first>Sriparna</first><last>Saha</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>5727–5737</pages>
       <abstract>Speech Act Classification determining the communicative intent of an utterance has been investigated widely over the years as a standalone task. This holds true for discussion in any fora including social media platform such as Twitter. But the emotional state of the tweeter which has a considerable effect on the communication has not received the attention it deserves. Closely related to emotion is sentiment, and understanding of one helps understand the other. In this work, we firstly create a new multi-modal, emotion-TA (‘TA’ means tweet act, i.e., speech act in Twitter) dataset called <i>EmoTA</i> collected from open-source Twitter dataset. We propose a Dyadic Attention Mechanism (DAM) based multi-modal, adversarial multi-tasking framework. DAM incorporates intra-modal and inter-modal attention to fuse multiple modalities and learns generalized features across all the tasks. Experimental results indicate that the proposed framework boosts the performance of the primary task, i.e., TA classification (TAC) by benefitting from the two secondary tasks, i.e., Sentiment and Emotion Analysis compared to its uni-modal and single task TAC (tweet act classification) variants.</abstract>
       <url hash="c6ca1a45">2021.naacl-main.456</url>
@@ -6187,8 +6187,8 @@
       <author><first>Shujian</first><last>Huang</last></author>
       <author><first>Tong</first><last>Xiao</last></author>
       <author><first>Dongqi</first><last>Wang</last></author>
-      <author><first>Xinyu</first><last>Dai</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
+      <author id="xinyu-dai"><first>Xinyu</first><last>Dai</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
       <pages>5749–5759</pages>
       <abstract>Non-autoregressive Transformer is a promising text generation model. However, current non-autoregressive models still fall behind their autoregressive counterparts in translation quality. We attribute this accuracy gap to the lack of dependency modeling among decoder inputs. In this paper, we propose CNAT, which learns implicitly categorical codes as latent variables into the non-autoregressive decoding. The interaction among these categorical codes remedies the missing dependencies and improves the model capacity. Experiment results show that our model achieves comparable or better performance in machine translation tasks than several strong baselines.</abstract>
       <url hash="f1787812">2021.naacl-main.458</url>
@@ -6289,7 +6289,7 @@
       <author><first>Jing</first><last>Liu</last></author>
       <author><first>Kai</first><last>Liu</last></author>
       <author><first>Ruiyang</first><last>Ren</last></author>
-      <author><first>Wayne Xin</first><last>Zhao</last></author>
+      <author id="wayne-xin-zhao"><first>Wayne Xin</first><last>Zhao</last></author>
       <author><first>Daxiang</first><last>Dong</last></author>
       <author><first>Hua</first><last>Wu</last></author>
       <author><first>Haifeng</first><last>Wang</last></author>
@@ -6371,11 +6371,11 @@
       <author><first>Ahmad</first><last>Zaidi</last></author>
       <author><first>Mutethia</first><last>Mutuma</last></author>
       <author><first>Rahul</first><last>Jha</last></author>
-      <author><first>Ahmed Hassan</first><last>Awadallah</last></author>
+      <author id="ahmed-hassan"><first>Ahmed Hassan</first><last>Awadallah</last></author>
       <author><first>Asli</first><last>Celikyilmaz</last></author>
       <author id="yang-liu-edinburgh"><first>Yang</first><last>Liu</last></author>
       <author><first>Xipeng</first><last>Qiu</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <pages>5905–5921</pages>
       <abstract>Meetings are a key component of human collaboration. As increasing numbers of meetings are recorded and transcribed, meeting summaries have become essential to remind those who may or may not have attended the meetings about the key decisions made and the tasks to be completed. However, it is hard to create a single short summary that covers all the content of a long meeting involving multiple people and topics. In order to satisfy the needs of different types of users, we define a new query-based multi-domain meeting summarization task, where models have to select and summarize relevant spans of meetings in response to a query, and we introduce QMSum, a new benchmark for this task. QMSum consists of 1,808 query-summary pairs over 232 meetings in multiple domains. Besides, we investigate a locate-then-summarize method and evaluate a set of strong summarization baselines on the task. Experimental results and manual analysis reveal that QMSum presents significant challenges in long meeting summarization for future research. Dataset is available at <url>https://github.com/Yale-LILY/QMSum</url>.</abstract>
       <url hash="7ab7f678">2021.naacl-main.472</url>
@@ -6452,8 +6452,8 @@
   <volume id="demos" ingest-date="2021-05-24" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Demonstrations</booktitle>
-      <editor><first>Avi</first><last>Sil</last></editor>
-      <editor><first>Xi Victoria</first><last>Lin</last></editor>
+      <editor id="avirup-sil"><first>Avi</first><last>Sil</last></editor>
+      <editor id="xi-victoria-lin"><first>Xi Victoria</first><last>Lin</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Online</address>
       <month>June</month>
@@ -6478,13 +6478,13 @@
     </paper>
     <paper id="2">
       <title>Machine-Assisted Script Curation</title>
-      <author><first>Manuel</first><last>Ciosici</last></author>
+      <author id="manuel-r-ciosici"><first>Manuel</first><last>Ciosici</last></author>
       <author><first>Joseph</first><last>Cummings</last></author>
       <author><first>Mitchell</first><last>DeHaven</last></author>
       <author><first>Alex</first><last>Hedges</last></author>
       <author><first>Yash</first><last>Kankanampati</last></author>
       <author><first>Dong-Ho</first><last>Lee</last></author>
-      <author><first>Ralph</first><last>Weischedel</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
       <author><first>Marjorie</first><last>Freedman</last></author>
       <pages>8–17</pages>
       <abstract>We describe Machine-Aided Script Curator (MASC), a system for human-machine collaborative script authoring. Scripts produced with MASC include (1) English descriptions of sub-events that comprise a larger, complex event; (2) event types for each of those events; (3) a record of entities expected to participate in multiple sub-events; and (4) temporal sequencing between the sub-events. MASC automates portions of the script creation process with suggestions for event types, links to Wikidata, and sub-events that may have been forgotten. We illustrate how these automations are useful to the script writer with a few case-study scripts.</abstract>
@@ -6587,15 +6587,15 @@
       <author><first>Yi</first><last>Fung</last></author>
       <author><first>Heng</first><last>Ji</last></author>
       <author><first>Jiawei</first><last>Han</last></author>
-      <author><first>Shih-Fu</first><last>Chang</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="shih-fu-chang"><first>Shih-Fu</first><last>Chang</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <author><first>Jasmine</first><last>Rah</last></author>
       <author><first>David</first><last>Liem</last></author>
       <author><first>Ahmed</first><last>ELsayed</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
-      <author><first>Clare</first><last>Voss</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
+      <author id="clare-voss"><first>Clare</first><last>Voss</last></author>
       <author><first>Cynthia</first><last>Schneider</last></author>
-      <author><first>Boyan</first><last>Onyshkevych</last></author>
+      <author id="boyan-onyshkevych"><first>Boyan</first><last>Onyshkevych</last></author>
       <pages>66–77</pages>
       <abstract>To combat COVID-19, both clinicians and scientists need to digest the vast amount of relevant biomedical knowledge in literature to understand the disease mechanism and the related biological functions. We have developed a novel and comprehensive knowledge discovery framework, COVID-KG to extract fine-grained multimedia knowledge elements (entities, relations and events) from scientific literature. We then exploit the constructed multimedia knowledge graphs (KGs) for question answering and report generation, using drug repurposing as a case study. Our framework also provides detailed contextual sentences, subfigures, and knowledge subgraphs as evidence. All of the data, KGs, reports.</abstract>
       <url hash="db194589">2021.naacl-demos.8</url>
@@ -6633,7 +6633,7 @@
       <title>Interactive Plot Manipulation using Natural Language</title>
       <author><first>Yihan</first><last>Wang</last></author>
       <author><first>Yutong</first><last>Shao</last></author>
-      <author><first>Ndapa</first><last>Nakashole</last></author>
+      <author id="ndapandula-nakashole"><first>Ndapa</first><last>Nakashole</last></author>
       <pages>92–98</pages>
       <abstract>We present an interactive Plotting Agent, a system that enables users to directly manipulate plots using natural language instructions within an interactive programming environment. The Plotting Agent maps language to plot updates. We formulate this problem as a slot-based task-oriented dialog problem, which we tackle with a sequence-to-sequence model. This plotting model while accurate in most cases, still makes errors, therefore, the system allows a feedback mode, wherein the user is presented with a top-k list of plots, among which the user can pick the desired one. From this kind of feedback, we can then, in principle, continuously learn and improve the system. Given that plotting is widely used across data-driven fields, we believe our demonstration will be of interest to both practitioners such as data scientists broadly defined, and researchers interested in natural language interfaces.</abstract>
       <url hash="c4181900">2021.naacl-demos.11</url>
@@ -6645,7 +6645,7 @@
       <title><fixed-case>A</fixed-case>ctive<fixed-case>A</fixed-case>nno: General-Purpose Document-Level Annotation Tool with Active Learning Integration</title>
       <author><first>Max</first><last>Wiechmann</last></author>
       <author><first>Seid Muhie</first><last>Yimam</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>99–105</pages>
       <abstract>ActiveAnno is an annotation tool focused on document-level annotation tasks developed both for industry and research settings. It is designed to be a general-purpose tool with a wide variety of use cases. It features a modern and responsive web UI for creating annotation projects, conducting annotations, adjudicating disagreements, and analyzing annotation results. ActiveAnno embeds a highly configurable and interactive user interface. The tool also integrates a RESTful API that enables integration into other software systems, including an API for machine learning integration. ActiveAnno is built with extensible design and easy deployment in mind, all to enable users to perform annotation tasks with high efficiency and high-quality annotation results.</abstract>
       <url hash="620fb7be">2021.naacl-demos.12</url>
@@ -6659,7 +6659,7 @@
       <author><first>Denis</first><last>Newman-Griffis</last></author>
       <author><first>Venkatesh</first><last>Sivaraman</last></author>
       <author><first>Adam</first><last>Perer</last></author>
-      <author><first>Eric</first><last>Fosler-Lussier</last></author>
+      <author id="eric-fosler-lussier"><first>Eric</first><last>Fosler-Lussier</last></author>
       <author><first>Harry</first><last>Hochheiser</last></author>
       <pages>106–115</pages>
       <abstract>Embeddings of words and concepts capture syntactic and semantic regularities of language; however, they have seen limited use as tools to study characteristics of different corpora and how they relate to one another. We introduce TextEssence, an interactive system designed to enable comparative analysis of corpora using embeddings. TextEssence includes visual, neighbor-based, and similarity-based modes of embedding analysis in a lightweight, web-based interface. We further propose a new measure of embedding confidence based on nearest neighborhood overlap, to assist in identifying high-quality embeddings for corpus analysis. A case study on COVID-19 scientific literature illustrates the utility of the system. TextEssence can be found at <url>https://textessence.github.io</url>.</abstract>
@@ -6699,7 +6699,7 @@
       <author><first>Raefer</first><last>Gabriel</last></author>
       <author><first>Shuyang</first><last>Gao</last></author>
       <author><first>Rahul</first><last>Goel</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></author>
       <author><first>Jan</first><last>Jezabek</last></author>
       <author><first>Abhay</first><last>Jha</last></author>
       <author><first>Jiun-Yu</first><last>Kao</last></author>
@@ -6729,7 +6729,7 @@
       <title><fixed-case>RESIN</fixed-case>: A Dockerized Schema-Guided Cross-document Cross-lingual Cross-media Information Extraction and Event Tracking System</title>
       <author><first>Haoyang</first><last>Wen</last></author>
       <author><first>Ying</first><last>Lin</last></author>
-      <author><first>Tuan</first><last>Lai</last></author>
+      <author id="tuan-lai"><first>Tuan</first><last>Lai</last></author>
       <author><first>Xiaoman</first><last>Pan</last></author>
       <author><first>Sha</first><last>Li</last></author>
       <author><first>Xudong</first><last>Lin</last></author>
@@ -6745,13 +6745,13 @@
       <author><first>Qing</first><last>Lyu</last></author>
       <author><first>Dídac</first><last>Surís</last></author>
       <author><first>Brian</first><last>Chen</last></author>
-      <author><first>Susan Windisch</first><last>Brown</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="susan-windisch-brown"><first>Susan Windisch</first><last>Brown</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Chris</first><last>Callison-Burch</last></author>
       <author><first>Carl</first><last>Vondrick</last></author>
       <author><first>Jiawei</first><last>Han</last></author>
       <author><first>Dan</first><last>Roth</last></author>
-      <author><first>Shih-Fu</first><last>Chang</last></author>
+      <author id="shih-fu-chang"><first>Shih-Fu</first><last>Chang</last></author>
       <author><first>Heng</first><last>Ji</last></author>
       <pages>133–143</pages>
       <abstract>We present a new information extraction system that can automatically construct temporal event graphs from a collection of news documents from multiple sources, multiple languages (English and Spanish for our experiment), and multiple data modalities (speech, text, image and video). The system advances state-of-the-art from two aspects: (1) extending from sentence-level event extraction to cross-document cross-lingual cross-media event extraction, coreference resolution and temporal event tracking; (2) using human curated event schema library to match and enhance the extraction output. We have made the dockerlized system publicly available for research purpose at GitHub, with a demo video.</abstract>
@@ -6777,7 +6777,7 @@
       <booktitle>Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Student Research Workshop</booktitle>
       <editor><first>Esin</first><last>Durmus</last></editor>
       <editor><first>Vivek</first><last>Gupta</last></editor>
-      <editor><first>Nelson</first><last>Liu</last></editor>
+      <editor id="nelson-f-liu"><first>Nelson</first><last>Liu</last></editor>
       <editor><first>Nanyun</first><last>Peng</last></editor>
       <editor><first>Yu</first><last>Su</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -6838,7 +6838,7 @@
     <paper id="5">
       <title>Towards Layered Events and Schema Representations in Long Documents</title>
       <author><first>Hans Ole</first><last>Hatzel</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>32–39</pages>
       <abstract>In this thesis proposal, we explore the application of event extraction to literary texts. Considering the lengths of literary documents modeling events in different granularities may be more adequate to extract meaningful information, as individual elements contribute little to the overall semantics. We adapt the concept of schemas as sequences of events all describing a single process, connected through shared participants extending it to for multiple schemas in a document. Segmentation of event sequences into schemas is approached by modeling event sequences, on such task as the narrative cloze task, the prediction of missing events in sequences. We propose building on sequences of event embeddings to form schema embeddings, thereby summarizing sections of documents using a single representation. This approach will allow for the comparisons of different sections of documents and entire literary works. Literature is a challenging domain based on its variety of genres, yet the representation of literary content has received relatively little attention.</abstract>
       <url hash="a71ba858">2021.naacl-srw.5</url>
@@ -6885,7 +6885,7 @@
       <author><first>Jinfeng</first><last>Xiao</last></author>
       <author><first>Lidan</first><last>Wang</last></author>
       <author><first>Franck</first><last>Dernoncourt</last></author>
-      <author><first>Trung</first><last>Bui</last></author>
+      <author id="trung-bui"><first>Trung</first><last>Bui</last></author>
       <author><first>Tong</first><last>Sun</last></author>
       <author><first>Jiawei</first><last>Han</last></author>
       <pages>61–67</pages>
@@ -6913,7 +6913,7 @@
       <author><first>Jingxuan</first><last>Tu</last></author>
       <author><first>Marc</first><last>Verhagen</last></author>
       <author><first>Brent</first><last>Cochran</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <pages>76–87</pages>
       <abstract>We propose semantic visualization as a linguistic visual analytic method. It can enable exploration and discovery over large datasets of complex networks by exploiting the semantics of the relations in them. This involves extracting information, applying parameter reduction operations, building hierarchical data representation and designing visualization. We also present the accompanying COVID-SemViz a searchable and interactive visualization system for knowledge exploration of COVID-19 data to demonstrate the application of our proposed method. In the user studies, users found that semantic visualization-powered COVID-SemViz is helpful in terms of finding relevant information and discovering unknown associations.</abstract>
       <url hash="2e0108f5">2021.naacl-srw.11</url>
@@ -7040,7 +7040,7 @@
       <author><first>Florian</first><last>Schneider</last></author>
       <author><first>Özge</first><last>Alaçam</last></author>
       <author><first>Xintong</first><last>Wang</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <abstract>In primary school, children’s books, as well as in modern language learning apps, multi-modal learning strategies like illustrations of terms and phrases are used to support reading comprehension. Also, several studies in educational psychology suggest that integrating cross-modal information will improve reading comprehension. We claim that state-of- he-art multi-modal transformers, which could be used in a language learner context to improve human reading, will perform poorly because of the short and relatively simple textual data those models are trained with. To prove our hypotheses, we collected a new multi-modal image-retrieval dataset based on data from Wikipedia. In an in-depth data analysis, we highlight the differences between our dataset and other popular datasets. Additionally, we evaluate several state-of-the-art multi-modal transformers on text-image retrieval on our dataset and analyze their meager results, which verify our claims.</abstract>
       <url hash="d8aae487">2021.naacl-srw.21</url>
       <bibkey>schneider-etal-2021-towards</bibkey>
@@ -7051,8 +7051,8 @@
     <meta>
       <booktitle>Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Tutorials</booktitle>
       <editor><first>Greg</first><last>Kondrak</last></editor>
-      <editor><first>Kalina</first><last>Bontcheva</last></editor>
-      <editor><first>Dan</first><last>Gillick</last></editor>
+      <editor id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></editor>
+      <editor id="dan-gillick"><first>Dan</first><last>Gillick</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Online</address>
       <month>June</month>
@@ -7104,7 +7104,7 @@
     </paper>
     <paper id="4">
       <title>A Tutorial on Evaluation Metrics used in Natural Language Generation</title>
-      <author><first>Mitesh M.</first><last>Khapra</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh M.</first><last>Khapra</last></author>
       <author><first>Ananya B.</first><last>Sai</last></author>
       <pages>15–19</pages>
       <abstract>The advent of Deep Learning and the availability of large scale datasets has accelerated research on Natural Language Generation with a focus on newer tasks and better models. With such rapid progress, it is vital to assess the extent of scientific progress made and identify the areas/components that need improvement. To accomplish this in an automatic and reliable manner, the NLP community has actively pursued the development of automatic evaluation metrics. Especially in the last few years, there has been an increasing focus on evaluation metrics, with several criticisms of existing metrics and proposals for several new metrics. This tutorial presents the evolution of automatic evaluation metrics to their current state along with the emerging trends in this field by specifically addressing the following questions: (i) What makes NLG evaluation challenging? (ii) Why do we need automatic evaluation metrics? (iii) What are the existing automatic evaluation metrics and how can they be organised in a coherent taxonomy? (iv) What are the criticisms and shortcomings of existing metrics? (v) What are the possible future directions of research?</abstract>
@@ -7119,7 +7119,7 @@
       <author><first>Arman</first><last>Cohan</last></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last></author>
       <author><first>Sewon</first><last>Min</last></author>
-      <author><first>Matthew E.</first><last>Peters</last></author>
+      <author id="matthew-e-peters"><first>Matthew E.</first><last>Peters</last></author>
       <pages>20–24</pages>
       <abstract>In this tutorial, we aim at bringing interested NLP researchers up to speed about the recent and ongoing techniques for document-level representation learning. Additionally, our goal is to reveal new research opportunities to the audience, which will hopefully bring us closer to address existing challenges in this domain.</abstract>
       <url hash="6d7a352b">2021.naacl-tutorials.5</url>
@@ -7151,7 +7151,7 @@
       <booktitle>Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Industry Papers</booktitle>
       <editor><first>Young-bum</first><last>Kim</last></editor>
       <editor><first>Yunyao</first><last>Li</last></editor>
-      <editor><first>Owen</first><last>Rambow</last></editor>
+      <editor id="owen-rambow"><first>Owen</first><last>Rambow</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Online</address>
       <month>June</month>
@@ -7183,7 +7183,7 @@
       <author><first>Jose</first><last>Garrido Ramas</last></author>
       <author><first>Giorgio</first><last>Pessot</last></author>
       <author><first>Abdalghani</first><last>Abujabal</last></author>
-      <author><first>Martin</first><last>Rajman</last></author>
+      <author id="martin-rajman"><first>Martin</first><last>Rajman</last></author>
       <pages>10–18</pages>
       <abstract>Annotation conflict resolution is crucial towards building machine learning models with acceptable performance. Past work on annotation conflict resolution had assumed that data is collected at once, with a fixed set of annotators and fixed annotation guidelines. Moreover, previous work dealt with atomic labeling tasks. In this paper, we address annotation conflict resolution for Natural Language Understanding (NLU), a structured prediction task, in a real-world setting of commercial voice-controlled personal assistants, where (1) regular data collections are needed to support new and existing functionalities, (2) annotation guidelines evolve over time, and (3) the pool of annotators change across data collections. We devise an approach combining information-theoretic measures and a supervised neural model to resolve conflicts in data annotation. We evaluate our approach both intrinsically and extrinsically on a real-world dataset with 3.5M utterances of a commercial dialog system in German. Our approach leads to dramatic improvements over a majority baseline especially in contentious cases. On the NLU task, our approach achieves 2.75% error reduction over a no-resolution baseline.</abstract>
       <url hash="d6d001a4">2021.naacl-industry.2</url>
@@ -7221,7 +7221,7 @@
       <author><first>Han</first><last>Wang</last></author>
       <author><first>Yue</first><last>Liu</last></author>
       <author id="yang-liu-icsi"><first>Yang</first><last>Liu</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></author>
       <pages>26–33</pages>
       <abstract>In recent years, incorporating external knowledge for response generation in open-domain conversation systems has attracted great interest. To improve the relevancy of retrieved knowledge, we propose a neural entity linking (NEL) approach. Different from formal documents, such as news, conversational utterances are informal and multi-turn, which makes it more challenging to disambiguate the entities. Therefore, we present a context-aware named entity recognition model (NER) and entity resolution (ER) model to utilize dialogue context information. We conduct NEL experiments on three open-domain conversation datasets and validate that incorporating context information improves the performance of NER and ER models. The end-to-end NEL approach outperforms the baseline by 62.8% relatively in F1 metric. Furthermore, we verify that using external knowledge based on NEL benefits the neural response generation model.</abstract>
       <url hash="ffdf16fe">2021.naacl-industry.4</url>
@@ -7295,7 +7295,7 @@
       <author><first>Shahab</first><last>Jalalvand</last></author>
       <author><first>Minhua</first><last>Chen</last></author>
       <author><first>Yanjie</first><last>Zhao</last></author>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
       <pages>63–71</pages>
       <abstract>Spoken language understanding (SLU) extracts the intended mean- ing from a user utterance and is a critical component of conversational virtual agents. In enterprise virtual agents (EVAs), language understanding is substantially challenging. First, the users are infrequent callers who are unfamiliar with the expectations of a pre-designed conversation flow. Second, the users are paying customers of an enterprise who demand a reliable, consistent and efficient user experience when resolving their issues. In this work, we describe a general and robust framework for intent and entity extraction utilizing a hybrid of statistical and rule-based approaches. Our framework includes confidence modeling that incorporates information from all components in the SLU pipeline, a critical addition for EVAs to en- sure accuracy. Our focus is on creating accurate and scalable SLU that can be deployed rapidly for a large class of EVA applications with little need for human intervention.</abstract>
       <url hash="0a40ab22">2021.naacl-industry.9</url>
@@ -7306,7 +7306,7 @@
     <paper id="10">
       <title>Proteno: Text Normalization with Limited Data for Fast Deployment in Text to Speech Systems</title>
       <author><first>Shubhi</first><last>Tyagi</last></author>
-      <author><first>Antonio</first><last>Bonafonte</last></author>
+      <author id="antonio-bonafonte"><first>Antonio</first><last>Bonafonte</last></author>
       <author><first>Jaime</first><last>Lorenzo-Trueba</last></author>
       <author><first>Javier</first><last>Latorre</last></author>
       <pages>72–79</pages>
@@ -7319,10 +7319,10 @@
     <paper id="11">
       <title>Addressing the Vulnerability of <fixed-case>NMT</fixed-case> in Input Perturbations</title>
       <author><first>Weiwen</first><last>Xu</last></author>
-      <author><first>Ai Ti</first><last>Aw</last></author>
+      <author id="aiti-aw"><first>Ai Ti</first><last>Aw</last></author>
       <author><first>Yang</first><last>Ding</last></author>
       <author><first>Kui</first><last>Wu</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <pages>80–88</pages>
       <abstract>Neural Machine Translation (NMT) has achieved significant breakthrough in performance but is known to suffer vulnerability to input perturbations. As real input noise is difficult to predict during training, robustness is a big issue for system deployment. In this paper, we improve the robustness of NMT models by reducing the effect of noisy words through a Context-Enhanced Reconstruction (CER) approach. CER trains the model to resist noise in two steps: (1) perturbation step that breaks the naturalness of input sequence with made-up words; (2) reconstruction step that defends the noise propagation by generating better and more robust contextual representation. Experimental results on Chinese-English (ZH-EN) and French-English (FR-EN) translation tasks demonstrate robustness improvement on both news and social media text. Further fine-tuning experiments on social media text show our approach can converge at a higher position and provide a better adaptation.</abstract>
       <url hash="7ea7a894">2021.naacl-industry.11</url>
@@ -7348,7 +7348,7 @@
       <author><first>Chanjun</first><last>Park</last></author>
       <author><first>Sugyeong</first><last>Eo</last></author>
       <author><first>Hyeonseok</first><last>Moon</last></author>
-      <author><first>Heuiseok</first><last>Lim</last></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last></author>
       <pages>97–104</pages>
       <abstract>Most of the recent Natural Language Processing(NLP) studies are based on the Pretrain-Finetuning Approach (PFA), but in small and medium-sized enterprises or companies with insufficient hardware there are many limitations to servicing NLP application software using such technology due to slow speed and insufficient memory. The latest PFA technologies require large amounts of data, especially for low-resource languages, making them much more difficult to work with. We propose a new tokenization method, ONE-Piece, to address this limitation that combines the morphology-considered subword tokenization method and the vocabulary method used after probing for an existing method that has not been carefully considered before. Our proposed method can also be used without modifying the model structure. We experiment by applying ONE-Piece to Korean, a morphologically-rich and low-resource language. We derive an optimal subword tokenization result for Korean-English machine translation by conducting a case study that combines the subword tokenization method, morphological segmentation, and vocabulary method. Through comparative experiments with all the tokenization methods currently used in NLP research, ONE-Piece achieves performance comparable to the current Korean-English machine translation state-of-the-art model.</abstract>
       <url hash="0cd8fd18">2021.naacl-industry.13</url>
@@ -7362,7 +7362,7 @@
       <author><first>Chengqi</first><last>Zhao</last></author>
       <author><first>Mingxuan</first><last>Wang</last></author>
       <author><first>Lei</first><last>Li</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <pages>105–112</pages>
       <abstract>Automatic translation of dialogue texts is a much needed demand in many real life scenarios. However, the currently existing neural machine translation delivers unsatisfying results. In this paper, we conduct a deep analysis of a dialogue corpus and summarize three major issues on dialogue translation, including pronoun dropping (), punctuation dropping (), and typos (). In response to these challenges, we propose a joint learning method to identify omission and typo, and utilize context to translate dialogue utterances. To properly evaluate the performance, we propose a manually annotated dataset with 1,931 Chinese-English parallel utterances from 300 dialogues as a benchmark testbed for dialogue translation. Our experiments show that the proposed method improves translation quality by 3.2 BLEU over the baselines. It also elevates the recovery rate of omitted pronouns from 26.09% to 47.16%. We will publish the code and dataset publicly at https://xxx.xx.</abstract>
       <url hash="d3015c87">2021.naacl-industry.14</url>
@@ -7515,7 +7515,7 @@
       <author><first>Sida</first><last>Gao</last></author>
       <author><first>Xiao</first><last>Yang</last></author>
       <author><first>Justine</first><last>Kao</last></author>
-      <author><first>Stephen</first><last>Pulman</last></author>
+      <author id="stephen-pulman"><first>Stephen</first><last>Pulman</last></author>
       <author><first>Atish</first><last>Kothari</last></author>
       <author><first>Ray</first><last>Shen</last></author>
       <author><first>Yinying</first><last>Pan</last></author>
@@ -7560,7 +7560,7 @@
       <author><first>Brian</first><last>Lester</last></author>
       <author><first>Sagnik</first><last>Ray Choudhury</last></author>
       <author><first>Rashmi</first><last>Prasad</last></author>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
       <pages>214–221</pages>
       <abstract>Complex natural language understanding modules in dialog systems have a richer understanding of user utterances, and thus are critical in providing a better user experience. However, these models are often created from scratch, for specific clients and use cases and require the annotation of large datasets. This encourages the sharing of annotated data across multiple clients. To facilitate this we introduce the idea of <i>intent features</i>: domain and topic agnostic properties of intents that can be learnt from the syntactic cues only, and hence can be shared. We introduce a new neural network architecture, the Global-Local model, that shows significant improvement over strong baselines for identifying these features in a deployed, multi-intent natural language understanding module, and more generally in a classification setting where a part of an utterance has to be classified utilizing the whole context.</abstract>
       <url hash="3e7d08e7">2021.naacl-industry.27</url>
@@ -7625,8 +7625,8 @@
     </paper>
     <paper id="31">
       <title>Coherent and Concise Radiology Report Generation via Context Specific Image Representations and Orthogonal Sentence States</title>
-      <author><first>Litton</first><last>J Kurisinkel</last></author>
-      <author><first>Ai Ti</first><last>Aw</last></author>
+      <author id="litton-j-kurisinkel"><first>Litton</first><last>J Kurisinkel</last></author>
+      <author id="aiti-aw"><first>Ai Ti</first><last>Aw</last></author>
       <author><first>Nancy F</first><last>Chen</last></author>
       <pages>246–254</pages>
       <abstract>Neural models for text generation are often designed in an end-to-end fashion, typically with zero control over intermediate computations, limiting their practical usability in downstream applications. In this work, we incorporate explicit means into neural models to ensure topical continuity, informativeness and content diversity of generated radiology reports. For the purpose we propose a method to compute image representations specific to each sentential context and eliminate redundant content by exploiting diverse sentence states. We conduct experiments to generate radiology reports from medical images of chest x-rays using MIMIC-CXR. Our model outperforms baselines by up to 18% and 29% respective in the evaluation for informativeness and content ordering respectively, relative on objective metrics and 16% on human evaluation.</abstract>
@@ -7640,7 +7640,7 @@
       <author><first>Hidetaka</first><last>Kamigaito</last></author>
       <author><first>Peinan</first><last>Zhang</last></author>
       <author><first>Hiroya</first><last>Takamura</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>255–262</pages>
       <abstract>Although there are many studies on neural language generation (NLG), few trials are put into the real world, especially in the advertising domain. Generating ads with NLG models can help copywriters in their creation. However, few studies have adequately evaluated the effect of generated ads with actual serving included because it requires a large amount of training data and a particular environment. In this paper, we demonstrate a practical use case of generating ad-text with an NLG model. Specially, we show how to improve the ads’ impact, deploy models to a product, and evaluate the generated ads.</abstract>
       <url hash="b89acc6f">2021.naacl-industry.32</url>
diff --git a/data/xml/2021.naloma.xml b/data/xml/2021.naloma.xml
index 4f1ed396a8..7aadd23ece 100644
--- a/data/xml/2021.naloma.xml
+++ b/data/xml/2021.naloma.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the 1st and 2nd Workshops on Natural Logic Meets Machine Learning (NALOMA)</booktitle>
       <editor><first>Aikaterini-Lida</first><last>Kalouli</last></editor>
-      <editor><first>Lawrence S.</first><last>Moss</last></editor>
+      <editor id="lawrence-s-moss"><first>Lawrence S.</first><last>Moss</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Groningen, the Netherlands (online)</address>
       <month>June</month>
@@ -20,7 +20,7 @@
       <title>Learning General Event Schemas with Episodic Logic</title>
       <author><first>Lane</first><last>Lawley</last></author>
       <author><first>Benjamin</first><last>Kuehnert</last></author>
-      <author><first>Lenhart</first><last>Schubert</last></author>
+      <author id="lenhart-schubert"><first>Lenhart</first><last>Schubert</last></author>
       <pages>1–6</pages>
       <abstract>We present a system for learning generalized, stereotypical patterns of events—or “schemas”—from natural language stories, and applying them to make predictions about other stories. Our schemas are represented with Episodic Logic, a logical form that closely mirrors natural language. By beginning with a “head start” set of protoschemas— schemas that a 1- or 2-year-old child would likely know—we can obtain useful, general world knowledge with very few story examples—often only one or two. Learned schemas can be combined into more complex, composite schemas, and used to make predictions in other stories where only partial information is available.</abstract>
       <url hash="6b3c13b6">2021.naloma-1.1</url>
@@ -57,7 +57,7 @@
       <title>Monotonic Inference for Underspecified Episodic Logic</title>
       <author><first>Gene</first><last>Kim</last></author>
       <author><first>Mandar</first><last>Juvekar</last></author>
-      <author><first>Lenhart</first><last>Schubert</last></author>
+      <author id="lenhart-schubert"><first>Lenhart</first><last>Schubert</last></author>
       <pages>26–40</pages>
       <abstract>We present a method of making natural logic inferences from Unscoped Logical Form of Episodic Logic. We establish a correspondence between inference rules of scope resolved Episodic Logic and the natural logic treatment by Sánchez Valencia (1991a), and hence demonstrate the ability to handle foundational natural logic inferences from prior literature as well as more general nested monotonicity inferences.</abstract>
       <url hash="7d7fd64d">2021.naloma-1.5</url>
@@ -69,7 +69,7 @@
       <author><first>Deborah</first><last>Ferreira</last></author>
       <author><first>Mokanarangan</first><last>Thayaparan</last></author>
       <author><first>Marco</first><last>Valentino</last></author>
-      <author><first>André</first><last>Freitas</last></author>
+      <author id="andre-freitas"><first>André</first><last>Freitas</last></author>
       <pages>41–50</pages>
       <abstract>Natural language contexts display logical regularities with respect to substitutions of related concepts: these are captured in a functional order-theoretic property called monotonicity. For a certain class of NLI problems where the resulting entailment label depends only on the context monotonicity and the relation between the substituted concepts, we build on previous techniques that aim to improve the performance of NLI models for these problems, as consistent performance across both upward and downward monotone contexts still seems difficult to attain even for state of the art models. To this end, we reframe the problem of context monotonicity classification to make it compatible with transformer-based pre-trained NLI models and add this task to the training pipeline. Furthermore, we introduce a sound and complete simplified monotonicity logic formalism which describes our treatment of contexts as abstract units. Using the notions in our formalism, we adapt targeted challenge sets to investigate whether an intermediate context monotonicity classification task can aid NLI models’ performance on examples exhibiting monotonicity reasoning.</abstract>
       <url hash="5fa4866f">2021.naloma-1.6</url>
@@ -100,7 +100,7 @@
       <author><first>Mandar</first><last>Juvekar</last></author>
       <author><first>Junis</first><last>Ekmekciu</last></author>
       <author><first>Viet</first><last>Duong</last></author>
-      <author><first>Lenhart</first><last>Schubert</last></author>
+      <author id="lenhart-schubert"><first>Lenhart</first><last>Schubert</last></author>
       <pages>71–80</pages>
       <abstract>We implement the formalization of natural logic-like monotonic inference using Unscoped Episodic Logical Forms (ULFs) by Kim et al. (2020). We demonstrate this system’s capacity to handle a variety of challenging semantic phenomena using the FraCaS dataset (Cooper et al., 1996). These results give empirical evidence for prior claims that ULF is an appropriate representation to mediate natural logic-like inferences.</abstract>
       <url hash="a0604bc3">2021.naloma-1.9</url>
diff --git a/data/xml/2021.nejlt.xml b/data/xml/2021.nejlt.xml
index f7d68d5874..abf374ac14 100644
--- a/data/xml/2021.nejlt.xml
+++ b/data/xml/2021.nejlt.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2023-02-23" type="journal">
     <meta>
       <booktitle>Northern European Journal of Language Technology, Volume 7</booktitle>
-      <editor><first>Leon</first><last>Derczynski</last></editor>
+      <editor id="leon-derczynski"><first>Leon</first><last>Derczynski</last></editor>
       <publisher>Linköping University Electronic Press</publisher>
       <address>Linköping, Sweden</address>
       <doi>10.3384/nejlt.2000-1533.7.1</doi>
diff --git a/data/xml/2021.newsum.xml b/data/xml/2021.newsum.xml
index 21da8d5962..229ec74c0c 100644
--- a/data/xml/2021.newsum.xml
+++ b/data/xml/2021.newsum.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the Third Workshop on New Frontiers in Summarization</booktitle>
       <editor><first>Giuseppe</first><last>Carenini</last></editor>
-      <editor><first>Jackie Chi Kit</first><last>Cheung</last></editor>
+      <editor id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></editor>
       <editor><first>Yue</first><last>Dong</last></editor>
       <editor id="fei-liu-utdallas"><first>Fei</first><last>Liu</last></editor>
       <editor><first>Lu</first><last>Wang</last></editor>
@@ -21,7 +21,7 @@
     <paper id="1">
       <title>Sentence-level Planning for Especially Abstractive Summarization</title>
       <author><first>Andreas</first><last>Marfurt</last></author>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <pages>1–14</pages>
       <abstract>Abstractive summarization models heavily rely on copy mechanisms, such as the pointer network or attention, to achieve good performance, measured by textual overlap with reference summaries. As a result, the generated summaries stay close to the formulations in the source document. We propose the *sentence planner* model to generate more abstractive summaries. It includes a hierarchical decoder that first generates a representation for the next summary sentence, and then conditions the word generator on this representation. Our generated summaries are more abstractive and at the same time achieve high ROUGE scores when compared to human reference summaries. We verify the effectiveness of our design decisions with extensive evaluations.</abstract>
       <url hash="6c478295">2021.newsum-1.1</url>
@@ -77,7 +77,7 @@
     <paper id="6">
       <title>Evaluation of Summarization Systems across Gender, Age, and Race</title>
       <author><first>Anna</first><last>Jørgensen</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>51–56</pages>
       <abstract>Summarization systems are ultimately evaluated by human annotators and raters. Usually, annotators and raters do not reflect the demographics of end users, but are recruited through student populations or crowdsourcing platforms with skewed demographics. For two different evaluation scenarios – evaluation against gold summaries and system output ratings – we show that summary evaluation is sensitive to protected attributes. This can severely bias system development and evaluation, leading us to build models that cater for some groups rather than others.</abstract>
       <url hash="29131b06">2021.newsum-1.6</url>
@@ -106,7 +106,7 @@
       <author><first>Chanhee</first><last>Lee</last></author>
       <author><first>Seungwoo</first><last>Cho</last></author>
       <author><first>Mingun</first><last>Park</last></author>
-      <author><first>Heuiseok</first><last>Lim</last></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last></author>
       <pages>65–73</pages>
       <abstract>In this paper, we focus on improving the quality of the summary generated by neural abstractive dialogue summarization systems. Even though pre-trained language models generate well-constructed and promising results, it is still challenging to summarize the conversation of multiple participants since the summary should include a description of the overall situation and the actions of each speaker. This paper proposes self-supervised strategies for speaker-focused post-correction in abstractive dialogue summarization. Specifically, our model first discriminates which type of speaker correction is required in a draft summary and then generates a revised summary according to the required type. Experimental results show that our proposed method adequately corrects the draft summaries, and the revised summaries are significantly improved in both quantitative and qualitative evaluations.</abstract>
       <url hash="72e1d5f4">2021.newsum-1.8</url>
@@ -117,7 +117,7 @@
     <paper id="9">
       <title>Measuring Similarity of Opinion-bearing Sentences</title>
       <author><first>Wenyi</first><last>Tay</last></author>
-      <author><first>Xiuzhen</first><last>Zhang</last></author>
+      <author id="xiuzhen-jenny-zhang"><first>Xiuzhen</first><last>Zhang</last></author>
       <author><first>Stephen</first><last>Wan</last></author>
       <author><first>Sarvnaz</first><last>Karimi</last></author>
       <pages>74–84</pages>
@@ -148,7 +148,7 @@
       <author><first>Nicole</first><last>Beckage</last></author>
       <author><first>Shachi</first><last>H Kumar</last></author>
       <author><first>Saurav</first><last>Sahay</last></author>
-      <author><first>Ramesh</first><last>Manuvinakurike</last></author>
+      <author id="ramesh-manuvinakurike"><first>Ramesh</first><last>Manuvinakurike</last></author>
       <pages>96–106</pages>
       <abstract>Incremental meeting temporal summarization, summarizing relevant information of partial multi-party meeting dialogue, is emerging as the next challenge in summarization research. Here we examine the extent to which human abstractive summaries of the preceding increments (context) can be combined with extractive meeting dialogue to generate abstractive summaries. We find that previous context improves ROUGE scores. Our findings further suggest that contexts begin to outweigh the dialogue. Using keyphrase extraction and semantic role labeling (SRL), we find that SRL captures relevant information without overwhelming the the model architecture. By compressing the previous contexts by ~70%, we achieve better ROUGE scores over our baseline models. Collectively, these results suggest that context matters, as does the way in which context is presented to the model.</abstract>
       <url hash="8b52edd0">2021.newsum-1.11</url>
@@ -160,7 +160,7 @@
       <title>Are We Summarizing the Right Way? A Survey of Dialogue Summarization Data Sets</title>
       <author><first>Don</first><last>Tuggener</last></author>
       <author><first>Margot</first><last>Mieskes</last></author>
-      <author><first>Jan</first><last>Deriu</last></author>
+      <author id="jan-milan-deriu"><first>Jan</first><last>Deriu</last></author>
       <author><first>Mark</first><last>Cieliebak</last></author>
       <pages>107–118</pages>
       <abstract>Dialogue summarization is a long-standing task in the field of NLP, and several data sets with dialogues and associated human-written summaries of different styles exist. However, it is unclear for which type of dialogue which type of summary is most appropriate. For this reason, we apply a linguistic model of dialogue types to derive matching summary items and NLP tasks. This allows us to map existing dialogue summarization data sets into this model and identify gaps and potential directions for future work. As part of this process, we also provide an extensive overview of existing dialogue summarization data sets.</abstract>
@@ -212,7 +212,7 @@
     </paper>
     <paper id="16">
       <title>A New Dataset and Efficient Baselines for Document-level Text Simplification in <fixed-case>G</fixed-case>erman</title>
-      <author><first>Annette</first><last>Rios</last></author>
+      <author id="annette-rios-gonzales"><first>Annette</first><last>Rios</last></author>
       <author><first>Nicolas</first><last>Spring</last></author>
       <author><first>Tannon</first><last>Kew</last></author>
       <author><first>Marek</first><last>Kostrzewa</last></author>
diff --git a/data/xml/2021.nllp.xml b/data/xml/2021.nllp.xml
index 9980856116..4ddd92a00f 100644
--- a/data/xml/2021.nllp.xml
+++ b/data/xml/2021.nllp.xml
@@ -7,7 +7,7 @@
       <editor><first>Ion</first><last>Androutsopoulos</last></editor>
       <editor><first>Leslie</first><last>Barrett</last></editor>
       <editor><first>Catalina</first><last>Goanta</last></editor>
-      <editor><first>Daniel</first><last>Preotiuc-Pietro</last></editor>
+      <editor id="daniel-preotiuc-pietro"><first>Daniel</first><last>Preotiuc-Pietro</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Punta Cana, Dominican Republic</address>
       <month>November</month>
@@ -142,11 +142,11 @@
     <paper id="10">
       <title>Few-shot and Zero-shot Approaches to Legal Text Classification: A Case Study in the Financial Sector</title>
       <author><first>Rajdeep</first><last>Sarkar</last></author>
-      <author><first>Atul Kr.</first><last>Ojha</last></author>
+      <author id="atul-kr-ojha"><first>Atul Kr.</first><last>Ojha</last></author>
       <author><first>Jay</first><last>Megaro</last></author>
       <author><first>John</first><last>Mariano</last></author>
       <author><first>Vall</first><last>Herard</last></author>
-      <author><first>John P.</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></author>
       <pages>102–106</pages>
       <abstract>The application of predictive coding techniques to legal texts has the potential to greatly reduce the cost of legal review of documents, however, there is such a wide array of legal tasks and continuously evolving legislation that it is hard to construct sufficient training data to cover all cases. In this paper, we investigate few-shot and zero-shot approaches that require substantially less training data and introduce a triplet architecture, which for promissory statements produces performance close to that of a supervised system. This method allows predictive coding methods to be rapidly developed for new regulations and markets.</abstract>
       <url hash="994c7657">2021.nllp-1.10</url>
@@ -206,7 +206,7 @@
     <paper id="15">
       <title>Capturing Logical Structure of Visually Structured Documents with Multimodal Transition Parser</title>
       <author><first>Yuta</first><last>Koreeda</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <pages>144–154</pages>
       <abstract>While many NLP pipelines assume raw, clean texts, many texts we encounter in the wild, including a vast majority of legal documents, are not so clean, with many of them being visually structured documents (VSDs) such as PDFs. Conventional preprocessing tools for VSDs mainly focused on word segmentation and coarse layout analysis, whereas fine-grained logical structure analysis (such as identifying paragraph boundaries and their hierarchies) of VSDs is underexplored. To that end, we proposed to formulate the task as prediction of “transition labels” between text fragments that maps the fragments to a tree, and developed a feature-based machine learning system that fuses visual, textual and semantic cues. Our system is easily customizable to different types of VSDs and it significantly outperformed baselines in identifying different structures in VSDs. For example, our system obtained a paragraph boundary detection F1 score of 0.953 which is significantly better than a popular PDF-to-text tool with an F1 score of 0.739.</abstract>
       <url hash="e8035863">2021.nllp-1.15</url>
@@ -226,7 +226,7 @@
     </paper>
     <paper id="17">
       <title>Supervised Identification of Participant Slots in Contracts</title>
-      <author><first>Dan</first><last>Simonson</last></author>
+      <author id="dan-simonson"><first>Dan</first><last>Simonson</last></author>
       <pages>163–171</pages>
       <abstract>This paper presents a technique for the identification of participant slots in English language contracts. Taking inspiration from unsupervised slot extraction techniques, the system presented here uses a supervised approach to identify terms used to refer to a genre-specific slot in novel contracts. We evaluate the system in multiple feature configurations to demonstrate that the best performing system in both genres of contracts omits the exact mention form from consideration—even though such mention forms are often the name of the slot under consideration—and is instead based solely on the dependency label and parent; in other words, a more reliable quantification of a party’s role in a contract is found in what they do rather than what they are named.</abstract>
       <url hash="1652370c">2021.nllp-1.17</url>
@@ -260,7 +260,7 @@
       <title>Learning from Limited Labels for Long Legal Dialogue</title>
       <author><first>Jenny</first><last>Hong</last></author>
       <author><first>Derek</first><last>Chong</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <pages>190–204</pages>
       <abstract>We study attempting to achieve high accuracy information extraction of case factors from a challenging dataset of parole hearings, which, compared to other legal NLP datasets, has longer texts, with fewer labels. On this corpus, existing work directly applying pretrained neural models has failed to extract all but a few relatively basic items with little improvement over rule-based extraction. We address two challenges posed by existing work: training on long documents and reasoning over complex speech patterns. We use a similar approach to the two-step open-domain question answering approach by using a Reducer to extract relevant text segments and a Producer to generate both extractive answers and non-extractive classifications. In a context like ours, with limited labeled data, we show that a superior approach for strong performance within limited development time is to use a combination of a rule-based Reducer and a neural Producer. We study four representative tasks from the parole dataset. On all four, we improve extraction from the previous benchmark of 0.41–0.63 to 0.83–0.89 F1.</abstract>
       <url hash="980f16c3">2021.nllp-1.20</url>
@@ -294,7 +294,7 @@
       <author><first>Jey Han</first><last>Lau</last></author>
       <author><first>Brayden</first><last>Merrifield</last></author>
       <author><first>Kate</first><last>Fazio</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>217–227</pages>
       <abstract>Free legal assistance is critically under-resourced, and many of those who seek legal help have their needs unmet. A major bottleneck in the provision of free legal assistance to those most in need is the determination of the precise nature of the legal problem. This paper describes a collaboration with a major provider of free legal assistance, and the deployment of natural language processing models to assign area-of-law categories to real-world requests for legal assistance. In particular, we focus on an investigation of models to generate efficiencies in the triage process, but also the risks associated with naive use of model predictions, including fairness across different user demographics.</abstract>
       <url hash="80f93c57">2021.nllp-1.23</url>
@@ -307,7 +307,7 @@
       <author><first>Meladel</first><last>Mistica</last></author>
       <author><first>Inbar</first><last>Levy</last></author>
       <author><first>Andrew</first><last>Christie</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>228–238</pages>
       <abstract>We introduce the new task of domain name dispute resolution (DNDR), that predicts the outcome of a process for resolving disputes about legal entitlement to a domain name. TheICANN UDRP establishes a mandatory arbitration process for a dispute between a trade-mark owner and a domain name registrant pertaining to a generic Top-Level Domain (gTLD) name (one ending in .COM, .ORG, .NET, etc). The nature of the problem leads to a very skewed data set, which stems from being able to register a domain name with extreme ease, very little expense, and no need to prove an entitlement to it. In this paper, we describe thetask and associated data set. We also present benchmarking results based on a range of mod-els, which show that simple baselines are in general difficult to beat due to the skewed data distribution, but in the specific case of the respondent having submitted a response, a fine-tuned BERT model offers considerable improvements over a majority-class model</abstract>
       <url hash="212b58e8">2021.nllp-1.24</url>
diff --git a/data/xml/2021.nlp4call.xml b/data/xml/2021.nlp4call.xml
index eb621abd53..ea19a00e19 100644
--- a/data/xml/2021.nlp4call.xml
+++ b/data/xml/2021.nlp4call.xml
@@ -27,7 +27,7 @@
       <author><first>Christos</first><last>Rodosthenous</last></author>
       <author><first>Federico</first><last>Sangati</last></author>
       <author><first>Alexander</first><last>König</last></author>
-      <author><first>Corina</first><last>Forascu</last></author>
+      <author id="corina-forascu"><first>Corina</first><last>Forascu</last></author>
       <pages>1–14</pages>
       <url hash="465cbdb5">2021.nlp4call-1.1</url>
       <bibkey>nicolas-etal-2021-experiment</bibkey>
@@ -35,9 +35,9 @@
     <paper id="2">
       <title>Automatic annotation of curricular language targets to enrich activity models and support both pedagogy and adaptive systems</title>
       <author><first>Martí</first><last>Quixal</last></author>
-      <author><first>Björn</first><last>Rudzewitz</last></author>
+      <author id="bjorn-rudzewitz"><first>Björn</first><last>Rudzewitz</last></author>
       <author><first>Elizabeth</first><last>Bear</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <pages>15–27</pages>
       <url hash="d830d86b">2021.nlp4call-1.2</url>
       <bibkey>quixal-etal-2021-automatic</bibkey>
@@ -53,9 +53,9 @@
     </paper>
     <paper id="4">
       <title>Using Broad Linguistic Complexity Modeling for Cross-Lingual Readability Assessment</title>
-      <author><first>Zarah</first><last>Weiss</last></author>
+      <author id="zarah-weiss"><first>Zarah</first><last>Weiss</last></author>
       <author><first>Xiaobin</first><last>Chen</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <pages>38–54</pages>
       <url hash="36556c69">2021.nlp4call-1.4</url>
       <bibkey>weiss-etal-2021-using</bibkey>
@@ -63,7 +63,7 @@
     <paper id="5">
       <title>Developing <fixed-case>F</fixed-case>lashcards for Learning <fixed-case>I</fixed-case>celandic</title>
       <author><first>Xindan</first><last>Xu</last></author>
-      <author><first>Anton Karl</first><last>Ingason</last></author>
+      <author id="anton-karl-ingason"><first>Anton Karl</first><last>Ingason</last></author>
       <pages>55–61</pages>
       <url hash="46590fed">2021.nlp4call-1.5</url>
       <bibkey>xu-ingason-2021-developing</bibkey>
diff --git a/data/xml/2021.nlp4convai.xml b/data/xml/2021.nlp4convai.xml
index a43e8cb90f..6e988ef416 100644
--- a/data/xml/2021.nlp4convai.xml
+++ b/data/xml/2021.nlp4convai.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2021-10-28" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 3rd Workshop on Natural Language Processing for Conversational AI</booktitle>
-      <editor><first>Alexandros</first><last>Papangelis</last></editor>
+      <editor id="alexandros-papangelis"><first>Alexandros</first><last>Papangelis</last></editor>
       <editor><first>Paweł</first><last>Budzianowski</last></editor>
       <editor><first>Bing</first><last>Liu</last></editor>
       <editor><first>Elnaz</first><last>Nouri</last></editor>
@@ -124,7 +124,7 @@
       <title>What Went Wrong? Explaining Overall Dialogue Quality through Utterance-Level Impacts</title>
       <author><first>James D.</first><last>Finch</last></author>
       <author><first>Sarah E.</first><last>Finch</last></author>
-      <author><first>Jinho D.</first><last>Choi</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
       <pages>93–101</pages>
       <abstract>Improving user experience of a dialogue system often requires intensive developer effort to read conversation logs, run statistical analyses, and intuit the relative importance of system shortcomings. This paper presents a novel approach to automated analysis of conversation logs that learns the relationship between user-system interactions and overall dialogue quality. Unlike prior work on utterance-level quality prediction, our approach learns the impact of each interaction from the overall user rating without utterance-level annotation, allowing resultant model conclusions to be derived on the basis of empirical evidence and at low cost. Our model identifies interactions that have a strong correlation with the overall dialogue quality in a chatbot setting. Experiments show that the automated analysis from our model agrees with expert judgments, making this work the first to show that such weakly-supervised learning of utterance-level quality prediction is highly achievable.</abstract>
       <url hash="e6426f79">2021.nlp4convai-1.9</url>
@@ -135,7 +135,7 @@
       <title><fixed-case>XP</fixed-case>ersona: Evaluating Multilingual Personalized Chatbot</title>
       <author><first>Zhaojiang</first><last>Lin</last></author>
       <author><first>Zihan</first><last>Liu</last></author>
-      <author><first>Genta Indra</first><last>Winata</last></author>
+      <author id="genta-indra-winata"><first>Genta Indra</first><last>Winata</last></author>
       <author><first>Samuel</first><last>Cahyawijaya</last></author>
       <author><first>Andrea</first><last>Madotto</last></author>
       <author><first>Yejin</first><last>Bang</last></author>
@@ -247,7 +247,7 @@
       <author><first>Shuyang</first><last>Dai</last></author>
       <author><first>Guoyin</first><last>Wang</last></author>
       <author><first>Sunghyun</first><last>Park</last></author>
-      <author><first>Sungjin</first><last>Lee</last></author>
+      <author id="sungjin-lee"><first>Sungjin</first><last>Lee</last></author>
       <pages>189–197</pages>
       <abstract>Large-scale auto-regressive models have achieved great success in dialogue response generation, with the help of Transformer layers. However, these models do not learn a representative latent space of the sentence distribution, making it hard to control the generation. Recent works have tried on learning sentence representations using Transformer-based framework, but do not model the context-response relationship embedded in the dialogue datasets. In this work, we aim to construct a robust sentence representation learning model, that is specifically designed for dialogue response generation, with Transformer-based encoder-decoder structure. An utterance-level contrastive learning is proposed, encoding predictive information in each context representation for its corresponding response. Extensive experiments are conducted to verify the robustness of the proposed representation learning mechanism. By using both reference-based and reference-free evaluation metrics, we provide detailed analysis on the generated sentences, demonstrating the effectiveness of our proposed model.</abstract>
       <url hash="143478fa">2021.nlp4convai-1.18</url>
@@ -270,7 +270,7 @@
       <title>Investigating Pretrained Language Models for Graph-to-Text Generation</title>
       <author><first>Leonardo F. R.</first><last>Ribeiro</last></author>
       <author><first>Martin</first><last>Schmitt</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <author><first>Iryna</first><last>Gurevych</last></author>
       <pages>211–227</pages>
       <abstract>Graph-to-text generation aims to generate fluent texts from graph-based data. In this paper, we investigate two recent pretrained language models (PLMs) and analyze the impact of different task-adaptive pretraining strategies for PLMs in graph-to-text generation. We present a study across three graph domains: meaning representations, Wikipedia knowledge graphs (KGs) and scientific KGs. We show that approaches based on PLMs BART and T5 achieve new state-of-the-art results and that task-adaptive pretraining strategies improve their performance even further. We report new state-of-the-art BLEU scores of 49.72 on AMR-LDC2017T10, 59.70 on WebNLG, and 25.66 on AGENDA datasets - a relative improvement of 31.8%, 4.5%, and 42.4%, respectively, with our models generating significantly more fluent texts than human references. In an extensive analysis, we identify possible reasons for the PLMs’ success on graph-to-text tasks. Our findings suggest that the PLMs benefit from similar facts seen during pretraining or fine-tuning, such that they perform well even when the input graph is reduced to a simple bag of node and edge labels.</abstract>
@@ -288,7 +288,7 @@
       <author><first>Yuheng</first><last>Du</last></author>
       <author><first>Anjali</first><last>Narayan-Chen</last></author>
       <author><first>Tagyoung</first><last>Chung</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></author>
       <pages>228–242</pages>
       <abstract>Natural Language Generation (NLG) for task-oriented dialogue systems focuses on communicating specific content accurately, fluently, and coherently. While these attributes are crucial for a successful dialogue, it is also desirable to simultaneously accomplish specific stylistic goals, such as response length, point-of-view, descriptiveness, sentiment, formality, and empathy. In this work, we focus on stylistic control and evaluation for schema-guided NLG, with joint goals of achieving both semantic and stylistic control. We experiment in detail with various controlled generation methods for large pretrained language models: specifically, conditional training, guided fine-tuning, and guided decoding. We discuss their advantages and limitations, and evaluate them with a broad range of automatic and human evaluation metrics. Our results show that while high style accuracy and semantic correctness are easier to achieve for more lexically-defined styles with conditional training, stylistic control is also achievable for more semantically complex styles using discriminator-based guided decoding methods. The results also suggest that methods that are more scalable (with less hyper-parameters tuning) and that disentangle context generation and stylistic variations are more effective at achieving semantic correctness and style accuracy.</abstract>
       <url hash="f8b76ed3">2021.nlp4convai-1.21</url>
@@ -302,7 +302,7 @@
       <author><first>Joel Ruben Antony</first><last>Moniz</last></author>
       <author><first>Xiao</first><last>Yang</last></author>
       <author><first>Manos</first><last>Tsagkias</last></author>
-      <author><first>Stephen</first><last>Pulman</last></author>
+      <author id="stephen-pulman"><first>Stephen</first><last>Pulman</last></author>
       <pages>243–250</pages>
       <abstract>Entity tags in human-machine dialog are integral to natural language understanding (NLU) tasks in conversational assistants. However, current systems struggle to accurately parse spoken queries with the typical use of text input alone, and often fail to understand the user intent. Previous work in linguistics has identified a cross-language tendency for longer speech pauses surrounding nouns as compared to verbs. We demonstrate that the linguistic observation on pauses can be used to improve accuracy in machine-learnt language understanding tasks. Analysis of pauses in French and English utterances from a commercial voice assistant shows the statistically significant difference in pause duration around multi-token entity span boundaries compared to within entity spans. Additionally, in contrast to text-based NLU, we apply pause duration to enrich contextual embeddings to improve shallow parsing of entities. Results show that our proposed novel embeddings improve the relative error rate by up to 8% consistently across three domains for French, without any added annotation or alignment costs to the parser.</abstract>
       <url hash="df826e0f">2021.nlp4convai-1.22</url>
@@ -319,7 +319,7 @@
       <author><first>Jay</first><last>Pujara</last></author>
       <author><first>Xiang</first><last>Ren</last></author>
       <author id="yang-liu-icsi"><first>Yang</first><last>Liu</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></author>
       <pages>251–253</pages>
       <abstract>Humans make appropriate responses not only based on previous dialogue utterances but also on implicit background knowledge such as common sense. Although neural response generation models seem to produce human-like responses, they are mostly end-to-end and not generating intermediate grounds between a dialogue history and responses. This work aims to study if and how we can train an RG model that talks with itself to generate implicit knowledge before making responses. We further investigate can such models identify when to generate implicit background knowledge and when it is not necessary. Experimental results show that compared with models that directly generate responses given a dialogue history, self-talk models produce better-quality responses according to human evaluation on grammaticality, coherence, and engagingness. And models that are trained to identify when to self-talk further improves the response quality. Analysis on generated implicit knowledge shows that models mostly use the knowledge appropriately in the responses.</abstract>
       <url hash="179cc59c">2021.nlp4convai-1.23</url>
@@ -331,7 +331,7 @@
       <author><first>Ehsan</first><last>Lotfi</last></author>
       <author><first>Maxime</first><last>De Bruyn</last></author>
       <author><first>Jeska</first><last>Buhmann</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>254–262</pages>
       <abstract>Knowledge Grounded Conversation Models are usually based on a selection/retrieval module and a generation module, trained separately or simultaneously, with or without having access to a ‘gold’ knowledge option. With the introduction of large pre-trained generative models, the selection and generation part have become more and more entangled, shifting the focus towards enhancing knowledge incorporation (from multiple sources) instead of trying to pick the best knowledge option. These approaches however depend on knowledge labels and/or a separate dense retriever for their best performance. In this work we study the unsupervised selection abilities of pre-trained generative models (e.g. BART) and show that by adding a score-and-aggregate module between encoder and decoder, they are capable of learning to pick the proper knowledge through minimising the language modelling loss (i.e. without having access to knowledge labels). Trained as such, our model - K-Mine - shows competitive selection and generation performance against models that benefit from knowledge labels and/or separate dense retriever.</abstract>
       <url hash="6fbace53">2021.nlp4convai-1.24</url>
@@ -371,7 +371,7 @@
       <author><first>Shuyang</first><last>Gao</last></author>
       <author><first>Seokhwan</first><last>Kim</last></author>
       <author id="yang-liu-icsi"><first>Yang</first><last>Liu</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></author>
       <pages>281–288</pages>
       <abstract>Most prior work on task-oriented dialogue systems is restricted to supporting domain APIs. However, users may have requests that are out of the scope of these APIs. This work focuses on identifying such user requests. Existing methods for this task mainly rely on fine-tuning pre-trained models on large annotated data. We propose a novel method, REDE, based on adaptive representation learning and density estimation. REDE can be applied to zero-shot cases, and quickly learns a high-performing detector with only a few shots by updating less than 3K parameters. We demonstrate REDE’s competitive performance on DSTC9 data and our newly collected test set.</abstract>
       <url hash="bc4804fa">2021.nlp4convai-1.27</url>
diff --git a/data/xml/2021.nlp4dh.xml b/data/xml/2021.nlp4dh.xml
index 8dbf0a2e1f..d967eff125 100644
--- a/data/xml/2021.nlp4dh.xml
+++ b/data/xml/2021.nlp4dh.xml
@@ -48,7 +48,7 @@
     </paper>
     <paper id="4">
       <title><fixed-case>M</fixed-case>ac<fixed-case>BERT</fixed-case>h: Development and Evaluation of a Historically Pre-trained Language Model for <fixed-case>E</fixed-case>nglish (1450-1950)</title>
-      <author><first>Enrique</first><last>Manjavacas Arevalo</last></author>
+      <author id="enrique-manjavacas"><first>Enrique</first><last>Manjavacas Arevalo</last></author>
       <author><first>Lauren</first><last>Fonteyn</last></author>
       <pages>23–36</pages>
       <abstract>The new pre-train-then-fine-tune paradigm in Natural made important performance gains accessible to a wider audience. Once pre-trained, deploying a large language model presents comparatively small infrastructure requirements, and offers robust performance in many NLP tasks. The Digital Humanities community has been an early adapter of this paradigm. Yet, a large part of this community is concerned with the application of NLP algorithms to historical texts, for which large models pre-trained on contemporary text may not provide optimal results. In the present paper, we present “MacBERTh”—a transformer-based language model pre-trained on historical English—and exhaustively assess its benefits on a large set of relevant downstream tasks. Our experiments highlight that, despite some differences across target time periods, pre-training on historical language from scratch outperforms models pre-trained on present-day language and later adapted to historical language.</abstract>
@@ -185,7 +185,7 @@
     <paper id="18">
       <title>Transferring Modern Named Entity Recognition to the Historical Domain: How to Take the Step?</title>
       <author><first>Baptiste</first><last>Blouin</last></author>
-      <author><first>Benoit</first><last>Favre</last></author>
+      <author id="benoit-favre"><first>Benoit</first><last>Favre</last></author>
       <author><first>Jeremy</first><last>Auguste</last></author>
       <author><first>Christian</first><last>Henriot</last></author>
       <pages>152–162</pages>
diff --git a/data/xml/2021.nlp4if.xml b/data/xml/2021.nlp4if.xml
index 1136bc88d9..4affc82253 100644
--- a/data/xml/2021.nlp4if.xml
+++ b/data/xml/2021.nlp4if.xml
@@ -6,7 +6,7 @@
       <editor><first>Anna</first><last>Feldman</last></editor>
       <editor><first>Giovanni</first><last>Da San Martino</last></editor>
       <editor><first>Chris</first><last>Leberknight</last></editor>
-      <editor><first>Preslav</first><last>Nakov</last></editor>
+      <editor id="preslav-nakov"><first>Preslav</first><last>Nakov</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Online</address>
       <month>June</month>
@@ -21,7 +21,7 @@
     <paper id="1">
       <title>Identifying Automatically Generated Headlines using Transformers</title>
       <author><first>Antonis</first><last>Maronikolakis</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <author><first>Mark</first><last>Stevenson</last></author>
       <pages>1–6</pages>
       <abstract>False information spread via the internet and social media influences public opinion and user activity, while generative models enable fake content to be generated faster and more cheaply than had previously been possible. In the not so distant future, identifying fake content generated by deep learning models will play a key role in protecting users from misinformation. To this end, a dataset containing human and computer-generated headlines was created and a user study indicated that humans were only able to identify the fake headlines in 47.8% of the cases. However, the most accurate automatic approach, transformers, achieved an overall accuracy of 85.7%, indicating that content generated from language models can be filtered out accurately.</abstract>
@@ -33,7 +33,7 @@
       <title>Improving Hate Speech Type and Target Detection with Hateful Metaphor Features</title>
       <author><first>Jens</first><last>Lemmens</last></author>
       <author><first>Ilia</first><last>Markov</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>7–16</pages>
       <abstract>We study the usefulness of hateful metaphorsas features for the identification of the type and target of hate speech in Dutch Facebook comments. For this purpose, all hateful metaphors in the Dutch LiLaH corpus were annotated and interpreted in line with Conceptual Metaphor Theory and Critical Metaphor Analysis. We provide SVM and BERT/RoBERTa results, and investigate the effect of different metaphor information encoding methods on hate speech type and target detection accuracy. The results of the conducted experiments show that hateful metaphor features improve model performance for the both tasks. To our knowledge, it is the first time that the effectiveness of hateful metaphors as an information source for hatespeech classification is investigated.</abstract>
       <url hash="f5dee837">2021.nlp4if-1.2</url>
@@ -43,7 +43,7 @@
     <paper id="3">
       <title>Improving Cross-Domain Hate Speech Detection by Reducing the False Positive Rate</title>
       <author><first>Ilia</first><last>Markov</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>17–22</pages>
       <abstract>Hate speech detection is an actively growing field of research with a variety of recently proposed approaches that allowed to push the state-of-the-art results. One of the challenges of such automated approaches – namely recent deep learning models – is a risk of false positives (i.e., false accusations), which may lead to over-blocking or removal of harmless social media content in applications with little moderator intervention. We evaluate deep learning models both under in-domain and cross-domain hate speech detection conditions, and introduce an SVM approach that allows to significantly improve the state-of-the-art results when combined with the deep learning models through a simple majority-voting ensemble. The improvement is mainly due to a reduction of the false positive rate.</abstract>
       <url hash="b454930c">2021.nlp4if-1.3</url>
@@ -92,7 +92,7 @@
       <author><first>Ashkan</first><last>Kazemi</last></author>
       <author><first>Zehua</first><last>Li</last></author>
       <author><first>Verónica</first><last>Pérez-Rosas</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>45–50</pages>
       <abstract>In this paper, we explore the construction of natural language explanations for news claims, with the goal of assisting fact-checking and news evaluation applications. We experiment with two methods: (1) an extractive method based on Biased TextRank – a resource-effective unsupervised graph-based algorithm for content extraction; and (2) an abstractive method based on the GPT-2 language model. We perform comparative evaluations on two misinformation datasets in the political and health news domains, and find that the extractive method shows the most promise.</abstract>
       <url hash="d1838b6b">2021.nlp4if-1.7</url>
@@ -172,10 +172,10 @@
     </paper>
     <paper id="14">
       <title><fixed-case>NARNIA</fixed-case> at <fixed-case>NLP</fixed-case>4<fixed-case>IF</fixed-case>-2021: Identification of Misinformation in <fixed-case>COVID</fixed-case>-19 Tweets Using <fixed-case>BERT</fixed-case>weet</title>
-      <author><first>Ankit</first><last>Kumar</last></author>
+      <author id="ankit-srivastava"><first>Ankit</first><last>Kumar</last></author>
       <author><first>Naman</first><last>Jhunjhunwala</last></author>
       <author><first>Raksha</first><last>Agarwal</last></author>
-      <author><first>Niladri</first><last>Chatterjee</last></author>
+      <author id="niladri-chatterjee"><first>Niladri</first><last>Chatterjee</last></author>
       <pages>99–103</pages>
       <abstract>The spread of COVID-19 has been accompanied with widespread misinformation on social media. In particular, Twitterverse has seen a huge increase in dissemination of distorted facts and figures. The present work aims at identifying tweets regarding COVID-19 which contains harmful and false information. We have experimented with a number of Deep Learning-based models, including different word embeddings, such as Glove, ELMo, among others. BERTweet model achieved the best overall F1-score of 0.881 and secured the third rank on the above task.</abstract>
       <url hash="e434f5cf">2021.nlp4if-1.14</url>
diff --git a/data/xml/2021.nlp4musa.xml b/data/xml/2021.nlp4musa.xml
index c8796296e6..1187378cc2 100644
--- a/data/xml/2021.nlp4musa.xml
+++ b/data/xml/2021.nlp4musa.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the 2nd Workshop on NLP for Music and Spoken Audio (NLP4MusA)</booktitle>
       <editor><first>Sergio</first><last>Oramas</last></editor>
       <editor><first>Elena</first><last>Epure</last></editor>
-      <editor><first>Luis</first><last>Espinosa-Anke</last></editor>
+      <editor id="luis-espinosa-anke"><first>Luis</first><last>Espinosa-Anke</last></editor>
       <editor><first>Rosie</first><last>Jones</last></editor>
       <editor><first>Massimo</first><last>Quadrana</last></editor>
       <editor><first>Mohamed</first><last>Sordo</last></editor>
@@ -60,7 +60,7 @@
     <paper id="5">
       <title>Using Listeners’ Interpretations in Topic Classification of Song Lyrics</title>
       <author><first>Varvara</first><last>Papazoglou</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <pages>22–26</pages>
       <url hash="0815103b">2021.nlp4musa-1.5</url>
       <bibkey>papazoglou-gaizauskas-2021-using</bibkey>
diff --git a/data/xml/2021.nlp4posimpact.xml b/data/xml/2021.nlp4posimpact.xml
index 52ee1a4e8e..febb142234 100644
--- a/data/xml/2021.nlp4posimpact.xml
+++ b/data/xml/2021.nlp4posimpact.xml
@@ -22,7 +22,7 @@
     </frontmatter>
     <paper id="1">
       <title>Restatement and Question Generation for Counsellor Chatbot</title>
-      <author><first>John</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
       <author><first>Baikun</first><last>Liang</last></author>
       <author><first>Haley</first><last>Fong</last></author>
       <pages>1–7</pages>
@@ -45,8 +45,8 @@
       <title>Cartography of Natural Language Processing for Social Good (<fixed-case>NLP</fixed-case>4<fixed-case>SG</fixed-case>): Searching for Definitions, Statistics and White Spots</title>
       <author><first>Paula</first><last>Fortuna</last></author>
       <author><first>Laura</first><last>Pérez-Mayos</last></author>
-      <author><first>Ahmed</first><last>AbuRa’ed</last></author>
-      <author><first>Juan</first><last>Soler-Company</last></author>
+      <author id="ahmed-aburaed"><first>Ahmed</first><last>AbuRa’ed</last></author>
+      <author id="juan-soler-company"><first>Juan</first><last>Soler-Company</last></author>
       <author><first>Leo</first><last>Wanner</last></author>
       <pages>19–26</pages>
       <abstract>The range of works that can be considered as developing NLP for social good (NLP4SG) is enormous. While many of them target the identification of hate speech or fake news, there are others that address, e.g., text simplification to alleviate consequences of dyslexia, or coaching strategies to fight depression. However, so far, there is no clear picture of what areas are targeted by NLP4SG, who are the actors, which are the main scenarios and what are the topics that have been left aside. In order to obtain a clearer view in this respect, we first propose a working definition of NLP4SG and identify some primary aspects that are crucial for NLP4SG, including, e.g., areas, ethics, privacy and bias. Then, we draw upon a corpus of around 50,000 articles downloaded from the ACL Anthology. Based on a list of keywords retrieved from the literature and revised in view of the task, we select from this corpus articles that can be considered to be on NLP4SG according to our definition and analyze them in terms of trends along the time line, etc. The result is a map of the current NLP4SG research and insights concerning the white spots on this map.</abstract>
@@ -113,7 +113,7 @@
       <title>Challenges for Information Extraction from Dialogue in Criminal Law</title>
       <author><first>Jenny</first><last>Hong</last></author>
       <author><first>Catalin</first><last>Voss</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <pages>71–81</pages>
       <abstract>Information extraction and question answering have the potential to introduce a new paradigm for how machine learning is applied to criminal law. Existing approaches generally use tabular data for predictive metrics. An alternative approach is needed for matters of equitable justice, where individuals are judged on a case-by-case basis, in a process involving verbal or written discussion and interpretation of case factors. Such discussions are individualized, but they nonetheless rely on underlying facts. Information extraction can play an important role in surfacing these facts, which are still important to understand. We analyze unsupervised, weakly supervised, and pre-trained models’ ability to extract such factual information from the free-form dialogue of California parole hearings. With a few exceptions, most F1 scores are below 0.85. We use this opportunity to highlight some opportunities for further research for information extraction and question answering. We encourage new developments in NLP to enable analysis and review of legal cases to be done in a post-hoc, not predictive, manner.</abstract>
       <url hash="75a129e5">2021.nlp4posimpact-1.8</url>
@@ -187,7 +187,7 @@
     </paper>
     <paper id="15">
       <title>A Speech-enabled Fixed-phrase Translator for Healthcare Accessibility</title>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Johanna</first><last>Gerlach</last></author>
       <author><first>Jonathan</first><last>Mutal</last></author>
       <author><first>Nikos</first><last>Tsourakis</last></author>
@@ -201,7 +201,7 @@
     <paper id="16">
       <title>A Grounded Well-being Conversational Agent with Multiple Interaction Modes: Preliminary Results</title>
       <author><first>Xinxin</first><last>Yan</last></author>
-      <author><first>Ndapa</first><last>Nakashole</last></author>
+      <author id="ndapandula-nakashole"><first>Ndapa</first><last>Nakashole</last></author>
       <pages>143–151</pages>
       <abstract>Technologies for enhancing well-being, healthcare vigilance and monitoring are on the rise. However, despite patient interest, such technologies suffer from low adoption. One hypothesis for this limited adoption is loss of human interaction that is central to doctor-patient encounters. In this paper we seek to address this limitation via a conversational agent that adopts one aspect of in-person doctor-patient interactions: A human avatar to facilitate medical grounded question answering. This is akin to the in-person scenario where the doctor may point to the human body or the patient may point to their own body to express their conditions. Additionally, our agent has multiple interaction modes, that may give more options for the patient to use the agent, not just for medical question answering, but also to engage in conversations about general topics and current events. Both the avatar, and the multiple interaction modes could help improve adherence. We present a high level overview of the design of our agent, Marie Bot Wellbeing. We also report implementation details of our early prototype , and present preliminary results.</abstract>
       <url hash="b0d8e80f">2021.nlp4posimpact-1.16</url>
diff --git a/data/xml/2021.nlpmc.xml b/data/xml/2021.nlpmc.xml
index f1cc6593a8..3487f5019e 100644
--- a/data/xml/2021.nlpmc.xml
+++ b/data/xml/2021.nlpmc.xml
@@ -10,7 +10,7 @@
       <editor><first>Shaoqing</first><last>Yuan</last></editor>
       <editor><first>Yi</first><last>Zhang</last></editor>
       <editor><first>Parminder</first><last>Bhatia</last></editor>
-      <editor><first>Byron</first><last>Wallace</last></editor>
+      <editor id="byron-c-wallace"><first>Byron</first><last>Wallace</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Online</address>
       <month>June</month>
@@ -37,7 +37,7 @@
     <paper id="2">
       <title>Towards Automating Medical Scribing : Clinic Visit <fixed-case>D</fixed-case>ialogue2<fixed-case>N</fixed-case>ote Sentence Alignment and Snippet Summarization</title>
       <author><first>Wen-wai</first><last>Yim</last></author>
-      <author><first>Meliha</first><last>Yetisgen</last></author>
+      <author id="meliha-yetisgen-yildiz"><first>Meliha</first><last>Yetisgen</last></author>
       <pages>10–20</pages>
       <abstract>Medical conversations from patient visits are routinely summarized into clinical notes for documentation of clinical care. The automatic creation of clinical note is particularly challenging given that it requires summarization over spoken language and multiple speaker turns; as well, clinical notes include highly technical semi-structured text. In this paper, we describe our corpus creation method and baseline systems for two NLP tasks, clinical dialogue2note sentence alignment and clinical dialogue2note snippet summarization. These two systems, as well as other models created from such a corpus, may be incorporated as parts of an overall end-to-end clinical note generation system.</abstract>
       <url hash="7073d125">2021.nlpmc-1.2</url>
@@ -116,7 +116,7 @@
       <author><first>Franck</first><last>Dernoncourt</last></author>
       <author><first>Walter</first><last>Chang</last></author>
       <author><first>Emilia</first><last>Farcas</last></author>
-      <author><first>Ndapa</first><last>Nakashole</last></author>
+      <author id="ndapandula-nakashole"><first>Ndapa</first><last>Nakashole</last></author>
       <pages>58–65</pages>
       <abstract>Understanding the intent of medical questions asked by patients, or Consumer Health Questions, is an essential skill for medical Conversational AI systems. We propose a novel data-augmented and simple joint learning approach combining question summarization and Recognizing Question Entailment (RQE) in the medical domain. Our data augmentation approach enables to use just one dataset for joint learning. We show improvements on both tasks across four biomedical datasets in accuracy (+8%), ROUGE-1 (+2.5%) and human evaluation scores. Human evaluation shows joint learning generates faithful and informative summaries. Finally, we release our code, the two question summarization datasets extracted from a large-scale medical dialogue dataset, as well as our augmented datasets.</abstract>
       <url hash="e652a3a3">2021.nlpmc-1.8</url>
diff --git a/data/xml/2021.nodalida.xml b/data/xml/2021.nodalida.xml
index 00954e1038..a41ae68ba8 100644
--- a/data/xml/2021.nodalida.xml
+++ b/data/xml/2021.nodalida.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the 23rd Nordic Conference on Computational Linguistics (NoDaLiDa)</booktitle>
       <editor><first>Simon</first><last>Dobnik</last></editor>
-      <editor><first>Lilja</first><last>Øvrelid</last></editor>
+      <editor id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></editor>
       <publisher>Linköping University Electronic Press, Sweden</publisher>
       <address>Reykjavik, Iceland (Online)</address>
       <month>May 31--2 June</month>
@@ -222,7 +222,7 @@
       <title>De-identification of Privacy-related Entities in Job Postings</title>
       <author><first>Kristian Nørgaard</first><last>Jensen</last></author>
       <author><first>Mike</first><last>Zhang</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>210–221</pages>
       <abstract>De-identification is the task of detecting privacy-related entities in text, such as person names, emails and contact data. It has been well-studied within the medical domain. The need for de-identification technology is increasing, as privacy-preserving data handling is in high demand in many domains. In this paper, we focus on job postings. We present JobStack, a new corpus for de-identification of personal data in job vacancies on Stackoverflow. We introduce baselines, comparing Long-Short Term Memory (LSTM) and Transformer models. To improve these baselines, we experiment with BERT representations, and distantly related auxiliary data via multi-task learning. Our results show that auxiliary data helps to improve de-identification performance. While BERT representations improve performance, surprisingly “vanilla” BERT turned out to be more effective than BERT trained on Stackoverflow-related data.</abstract>
       <url hash="a59b2641">2021.nodalida-main.21</url>
@@ -269,7 +269,7 @@
     <paper id="26">
       <title>Synonym Replacement based on a Study of Basic-level Nouns in <fixed-case>S</fixed-case>wedish Texts of Different Complexity</title>
       <author><first>Evelina</first><last>Rennes</last></author>
-      <author><first>Arne</first><last>Jönsson</last></author>
+      <author id="arne-jonsson"><first>Arne</first><last>Jönsson</last></author>
       <pages>259–267</pages>
       <abstract>Basic-level terms have been described as the most important to human categorisation. They are the earliest emerging words in children’s language acquisition, and seem to be more frequently occurring in language in general. In this article, we explored the use of basic-level nouns in texts of different complexity, and hypothesise that hypernyms with characteristics of basic-level words could be useful for the task of lexical simplification. We conducted two corpus studies using four different corpora, two corpora of standard Swedish and two corpora of simple Swedish, and explored whether corpora of simple texts contain a higher proportion of basic-level nouns than corpora of standard Swedish. Based on insights from the corpus studies, we developed a novel algorithm for choosing the best synonym by rewarding high relative frequencies and monolexemity, and restricting the climb in the word hierarchy not to suggest synonyms of a too high level of inclusiveness.</abstract>
       <url hash="393b6356">2021.nodalida-main.26</url>
@@ -289,7 +289,7 @@
       <author><first>Aarne</first><last>Talman</last></author>
       <author><first>Marianna</first><last>Apidianaki</last></author>
       <author><first>Stergios</first><last>Chatzikyriakidis</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>276–287</pages>
       <abstract>Pre-trained neural language models give high performance on natural language inference (NLI) tasks. But whether they actually understand the meaning of the processed sequences is still unclear. We propose a new diagnostics test suite which allows to assess whether a dataset constitutes a good testbed for evaluating the models’ meaning understanding capabilities. We specifically apply controlled corruption transformations to widely used benchmarks (MNLI and ANLI), which involve removing entire word classes and often lead to non-sensical sentence pairs. If model accuracy on the corrupted data remains high, then the dataset is likely to contain statistical biases and artefacts that guide prediction. Inversely, a large decrease in model accuracy indicates that the original dataset provides a proper challenge to the models’ reasoning capabilities. Hence, our proposed controls can serve as a crash test for developing high quality data for NLI tasks.</abstract>
       <url hash="c358f7c4">2021.nodalida-main.28</url>
@@ -345,7 +345,7 @@
     <paper id="33">
       <title>Towards cross-lingual application of language-specific <fixed-case>P</fixed-case>o<fixed-case>S</fixed-case> tagging schemes</title>
       <author><first>Hinrik</first><last>Hafsteinsson</last></author>
-      <author><first>Anton Karl</first><last>Ingason</last></author>
+      <author id="anton-karl-ingason"><first>Anton Karl</first><last>Ingason</last></author>
       <pages>321–325</pages>
       <abstract>We describe the process of conversion between the PoS tagging schemes of two languages, the Icelandic MIM-GOLD tagging scheme and the Faroese Sosialurin tagging scheme. These tagging schemes are functionally similar but use separate ways to encode fine-grained morphological information on tokenised text. As Faroese and Icelandic are lexically and grammatically similar, having a systematic method to convert between these two tagging schemes would be beneficial in the field of language technology, specifically in research on transfer learning between the two languages. As a product of our work, we present a provisional version of Icelandic corpora, prepared in the Faroese PoS tagging scheme, ready for use in cross-lingual NLP applications.</abstract>
       <url hash="a590242a">2021.nodalida-main.33</url>
@@ -387,7 +387,7 @@
       <author><first>Mikko</first><last>Aulamo</last></author>
       <author><first>Sami</first><last>Virpioja</last></author>
       <author><first>Yves</first><last>Scherrer</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>351–356</pages>
       <abstract>We consider a low-resource translation task from Finnish into Northern Sámi. Collecting all available parallel data between the languages, we obtain around 30,000 sentence pairs. However, there exists a significantly larger monolingual Northern Sámi corpus, as well as a rule-based machine translation (RBMT) system between the languages. To make the best use of the monolingual data in a neural machine translation (NMT) system, we use the backtranslation approach to create synthetic parallel data from it using both NMT and RBMT systems. Evaluating the results on an in-domain test set and a small out-of-domain set, we find that the RBMT backtranslation outperforms NMT backtranslation clearly for the out-of-domain test set, but also slightly for the in-domain data, for which the NMT backtranslation model provided clearly better BLEU scores than the RBMT. In addition, combining both backtranslated data sets improves the RBMT approach only for the in-domain test set. This suggests that the RBMT system provides general-domain knowledge that cannot be found from the relative small parallel training data.</abstract>
       <url hash="83e1a738">2021.nodalida-main.37</url>
@@ -449,7 +449,7 @@
       <title>Error Analysis of using <fixed-case>BART</fixed-case> for Multi-Document Summarization: A Study for <fixed-case>E</fixed-case>nglish and <fixed-case>G</fixed-case>erman Language</title>
       <author><first>Timo</first><last>Johner</last></author>
       <author><first>Abhik</first><last>Jana</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>391–397</pages>
       <abstract>Recent research using pre-trained language models for multi-document summarization task lacks deep investigation of potential erroneous cases and their possible application on other languages. In this work, we apply a pre-trained language model (BART) for multi-document summarization (MDS) task using both fine-tuning and without fine-tuning. We use two English datasets and one German dataset for this study. First, we reproduce the multi-document summaries for English language by following one of the recent studies. Next, we show the applicability of the model to German language by achieving state-of-the-art performance on German MDS. We perform an in-depth error analysis of the followed approach for both languages, which leads us to identifying most notable errors, from made-up facts and topic delimitation, and quantifying the amount of extractiveness.</abstract>
       <url hash="dea8bea8">2021.nodalida-main.43</url>
@@ -477,13 +477,13 @@
     </paper>
     <paper id="46">
       <title>The <fixed-case>D</fixed-case>anish <fixed-case>G</fixed-case>igaword Corpus</title>
-      <author><first>Leon</first><last>Strømberg-Derczynski</last></author>
-      <author><first>Manuel</first><last>Ciosici</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Strømberg-Derczynski</last></author>
+      <author id="manuel-r-ciosici"><first>Manuel</first><last>Ciosici</last></author>
       <author><first>Rebekah</first><last>Baglini</last></author>
       <author><first>Morten H.</first><last>Christiansen</last></author>
       <author><first>Jacob Aarup</first><last>Dalsgaard</last></author>
       <author><first>Riccardo</first><last>Fusaroli</last></author>
-      <author><first>Peter Juel</first><last>Henrichsen</last></author>
+      <author id="peter-juel-henrichsen"><first>Peter Juel</first><last>Henrichsen</last></author>
       <author><first>Rasmus</first><last>Hvingelby</last></author>
       <author><first>Andreas</first><last>Kirkedal</last></author>
       <author><first>Alex Speed</first><last>Kjeldsen</last></author>
@@ -503,7 +503,7 @@
     <paper id="47">
       <title><fixed-case>D</fixed-case>an<fixed-case>FEVER</fixed-case>: claim verification dataset for <fixed-case>D</fixed-case>anish</title>
       <author><first>Jeppe</first><last>Nørregaard</last></author>
-      <author><first>Leon</first><last>Derczynski</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
       <pages>422–428</pages>
       <abstract>We present a dataset, DanFEVER, intended for multilingual misinformation research. The dataset is in Danish and has the same format as the well-known English FEVER dataset. It can be used for testing methods in multilingual settings, as well as for creating models in production for the Danish language.</abstract>
       <url hash="b787c42b">2021.nodalida-main.47</url>
@@ -525,7 +525,7 @@
     <paper id="49">
       <title>Getting Hold of Villains and other Rogues</title>
       <author><first>Manfred</first><last>Klenner</last></author>
-      <author><first>Anne</first><last>Göhring</last></author>
+      <author id="anne-gohring"><first>Anne</first><last>Göhring</last></author>
       <author><first>Sophia</first><last>Conrad</last></author>
       <pages>435–439</pages>
       <abstract>In this paper, we introduce the first corpus specifying negative entities within sentences. We discuss indicators for their presence, namely particular verbs, but also the linguistic conditions when their prediction should be suppressed. We further show that a fine-tuned Bert-based baseline model outperforms an over-generating rule-based approach which is not aware of these further restrictions. If a perfect filter were applied, both would be on par.</abstract>
diff --git a/data/xml/2021.nuse.xml b/data/xml/2021.nuse.xml
index 2afb09c47b..a9c0fde730 100644
--- a/data/xml/2021.nuse.xml
+++ b/data/xml/2021.nuse.xml
@@ -41,7 +41,7 @@
       <author><first>Aakanksha</first><last>Naik</last></author>
       <author><first>Huiming</first><last>Jin</last></author>
       <author><first>Hariharan</first><last>Muralidharan</last></author>
-      <author><first>Carolyn</first><last>Rosé</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rosé</last></author>
       <pages>13–23</pages>
       <abstract>Fanfiction presents an opportunity as a data source for research in NLP, education, and social science. However, answering specific research questions with this data is difficult, since fanfiction contains more diverse writing styles than formal fiction. We present a text processing pipeline for fanfiction, with a focus on identifying text associated with characters. The pipeline includes modules for character identification and coreference, as well as the attribution of quotes and narration to those characters. Additionally, the pipeline contains a novel approach to character coreference that uses knowledge from quote attribution to resolve pronouns within quotes. For each module, we evaluate the effectiveness of various approaches on 10 annotated fanfiction stories. This pipeline outperforms tools developed for formal fiction on the tasks of character coreference and quote attribution</abstract>
       <url hash="31a57f0a">2021.nuse-1.2</url>
@@ -73,7 +73,7 @@
     </paper>
     <paper id="5">
       <title>Gender and Representation Bias in <fixed-case>GPT</fixed-case>-3 Generated Stories</title>
-      <author><first>Li</first><last>Lucy</last></author>
+      <author id="li-lucy"><first>Li</first><last>Lucy</last></author>
       <author><first>David</first><last>Bamman</last></author>
       <pages>48–55</pages>
       <abstract>Using topic modeling and lexicon-based word similarity, we find that stories generated by GPT-3 exhibit many known gender stereotypes. Generated stories depict different topics and descriptions depending on GPT-3’s perceived gender of the character in a prompt, with feminine characters more likely to be associated with family and appearance, and described as less powerful than masculine characters, even when associated with high power verbs in a prompt. Our study raises questions on how one can avoid unintended social biases when using large language models for storytelling.</abstract>
diff --git a/data/xml/2021.paclic.xml b/data/xml/2021.paclic.xml
index 7147da018a..130fc010dc 100644
--- a/data/xml/2021.paclic.xml
+++ b/data/xml/2021.paclic.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the 35th Pacific Asia Conference on Language, Information and Computation</booktitle>
       <editor><first>Kaibao</first><last>Hu</last></editor>
       <editor><first>Jong-Bok</first><last>Kim</last></editor>
-      <editor><first>Chengqing</first><last>Zong</last></editor>
+      <editor id="chengqing-zong"><first>Chengqing</first><last>Zong</last></editor>
       <editor><first>Emmanuele</first><last>Chersoni</last></editor>
       <publisher>Association for Computational Lingustics</publisher>
       <address>Shanghai, China</address>
@@ -48,7 +48,7 @@
     </paper>
     <paper id="4">
       <title>Exploring sentiment constructions: connecting deep learning models with linguistic construction</title>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <author><first>Yu-Hsiang</first><last>Tseng</last></author>
       <pages>32–39</pages>
       <url hash="3f3f4652">2021.paclic-1.4</url>
@@ -67,7 +67,7 @@
       <title>An Empirical Performance Analysis of State-of-the-Art Summarization Models for Automatic Minuting</title>
       <author><first>Muskaan</first><last>Singh</last></author>
       <author><first>Tirthankar</first><last>Ghosal</last></author>
-      <author><first>Ondrej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondrej</first><last>Bojar</last></author>
       <pages>50–60</pages>
       <url hash="456e6b22">2021.paclic-1.6</url>
       <bibkey>muskaan-singh-bojar-2021-empirical</bibkey>
@@ -116,7 +116,7 @@
       <author><first>Po-Ya Angela</first><last>Wang</last></author>
       <author><first>Han-Tang</first><last>Hung</last></author>
       <author><first>Ka-Sîng</first><last>Khóo</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <pages>108–118</pages>
       <url hash="9d6a7933">2021.paclic-1.12</url>
       <bibkey>chang-etal-2021-examine</bibkey>
@@ -133,7 +133,7 @@
     </paper>
     <paper id="14">
       <title>Aspect or Manner? A Study of Reduplicated Adverbials in <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese</title>
-      <author><first>Siaw-Fong</first><last>Chung</last></author>
+      <author id="siaw-fong-chung"><first>Siaw-Fong</first><last>Chung</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <pages>130–139</pages>
       <url hash="08e13262">2021.paclic-1.14</url>
@@ -176,7 +176,7 @@
     </paper>
     <paper id="19">
       <title>Applying Masked Language Models to Search for Suitable Verbs Used in Academic Writing</title>
-      <author><first>Chooi Ling</first><last>Goh</last></author>
+      <author id="chooi-ling-goh"><first>Chooi Ling</first><last>Goh</last></author>
       <pages>180–188</pages>
       <url hash="56750036">2021.paclic-1.19</url>
       <bibkey>goh-2021-applying</bibkey>
@@ -192,7 +192,7 @@
       <title>A Comparative Study of Collocation Extraction Methods from the Perspectives of Vocabulary and Grammar: A Case Study in the Field of Journalism</title>
       <author><first>Lulu</first><last>Gu</last></author>
       <author><first>Yue</first><last>Pan</last></author>
-      <author><first>Pengyuan</first><last>Liu</last></author>
+      <author id="pengyuan-liu"><first>Pengyuan</first><last>Liu</last></author>
       <pages>201–210</pages>
       <url hash="6d3c796b">2021.paclic-1.21</url>
       <bibkey>gu-etal-2021-comparative</bibkey>
@@ -246,7 +246,7 @@
       <title>Various Errors Improve Neural Grammatical Error Correction</title>
       <author><first>Shota</first><last>Koyama</last></author>
       <author><first>Hiroya</first><last>Takamura</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <pages>251–261</pages>
       <url hash="93ca98db">2021.paclic-1.27</url>
       <bibkey>koyama-etal-2021-various</bibkey>
@@ -391,7 +391,7 @@
       <title>Natural Language Inference using Neural Network and Tableau Method</title>
       <author><first>Ayahito</first><last>Saji</last></author>
       <author><first>Daiki</first><last>Takao</last></author>
-      <author><first>Yoshihide</first><last>Kato</last></author>
+      <author id="yoshihide-kato"><first>Yoshihide</first><last>Kato</last></author>
       <author><first>Shigeki</first><last>Matsubara</last></author>
       <pages>402–410</pages>
       <url hash="2963ebbf">2021.paclic-1.43</url>
@@ -543,7 +543,7 @@
     <paper id="61">
       <title>Incorporating Semantic Textual Similarity and Lexical Matching for Information Retrieval</title>
       <author><first>Hiroki</first><last>Iida</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <pages>582–591</pages>
       <url hash="965622c8">2021.paclic-1.61</url>
       <bibkey>iida-okazaki-2021-incorporating</bibkey>
@@ -561,7 +561,7 @@
     </paper>
     <paper id="63">
       <title>A Sentiment Analysis of Men’s and Women’s Speech in the <fixed-case>BNC</fixed-case>64</title>
-      <author><first>Yong-Hun</first><last>Lee</last></author>
+      <author id="yong-hun-lee"><first>Yong-Hun</first><last>Lee</last></author>
       <author><first>Ji-Hye</first><last>Kim</last></author>
       <pages>603–610</pages>
       <url hash="a778abab">2021.paclic-1.63</url>
@@ -574,7 +574,7 @@
       <author><first>Chaehun</first><last>Park</last></author>
       <author><first>Hoyun</first><last>Song</last></author>
       <author><first>Eugene</first><last>Jang</last></author>
-      <author><first>Jong C.</first><last>Park</last></author>
+      <author id="jong-c-park"><first>Jong C.</first><last>Park</last></author>
       <pages>611–621</pages>
       <url hash="41fc1c39">2021.paclic-1.64</url>
       <bibkey>lee-etal-2021-optimizing</bibkey>
@@ -673,7 +673,7 @@
     </paper>
     <paper id="76">
       <title>Science Mapping of Publications in Natural Language Processing in the <fixed-case>P</fixed-case>hilippines: 2006 to 2020</title>
-      <author><first>Rachel Edita O.</first><last>Roxas</last></author>
+      <author id="rachel-edita-roxas"><first>Rachel Edita O.</first><last>Roxas</last></author>
       <author><first>Joseph Marvin</first><last>Imperial</last></author>
       <author><first>Angelica H.</first><last>De La Cruz</last></author>
       <pages>721–730</pages>
diff --git a/data/xml/2021.pail.xml b/data/xml/2021.pail.xml
index 878c63ff45..a510ed7a8b 100644
--- a/data/xml/2021.pail.xml
+++ b/data/xml/2021.pail.xml
@@ -23,7 +23,7 @@
       <author><first>Shyam</first><last>Ratan</last></author>
       <author><first>Deepak</first><last>Alok</last></author>
       <author><first>Ritesh</first><last>Kumar</last></author>
-      <author><first>Atul Kr.</first><last>Ojha</last></author>
+      <author id="atul-kr-ojha"><first>Atul Kr.</first><last>Ojha</last></author>
       <pages>1–11</pages>
       <abstract>In this paper, we discuss the development of treebanks for two low-resourced Indian languages - Magahi and Braj - based on the Universal Dependencies framework. The Magahi treebank contains 945 sentences and Braj treebank around 500 sentences marked with their lemmas, part-of-speech, morphological features and universal dependencies. This paper gives a description of the different dependency relationship found in the two languages and give some statistics of the two treebanks. The dataset will be made publicly available on Universal Dependency (UD) repository in the next (v2.10) release.</abstract>
       <url hash="f0f8d2c8">2021.pail-1.1</url>
@@ -41,8 +41,8 @@
     </paper>
     <paper id="3">
       <title>Dependency Parsing in a Morphological rich language, <fixed-case>T</fixed-case>amil</title>
-      <author><first>Vijay</first><last>Sundar Ram</last></author>
-      <author><first>Sobha</first><last>Lalitha Devi</last></author>
+      <author id="vijay-sundar-ram"><first>Vijay</first><last>Sundar Ram</last></author>
+      <author id="sobha-lalitha-devi"><first>Sobha</first><last>Lalitha Devi</last></author>
       <pages>20–26</pages>
       <abstract>Dependency parsing is the process of analysing the grammatical structure of a sentence based on the dependencies between the words in a sentence. The annotation of dependency parsing is done using different formalisms at word-level namely Universal Dependencies and chunk-level namely AnnaCorra. Though dependency parsing is deeply dealt in languages such as English, Czech etc the same cannot be adopted for the morphologically rich and agglutinative languages. In this paper, we discuss the development of a dependency parser for Tamil, a South Dravidian language. The different characteristics of the language make this task a challenging task. Tamil, a morphologically rich and agglutinative language, has copula drop, accusative and genitive case drop and pro-drop. Coordinative constructions are introduced by affixation of morpheme ‘um’. Embedded clausal structures are common in relative participle and complementizer clauses. In this paper, we have discussed our approach to handle some of these challenges. We have used Malt parser, a supervised learning- approach based implementation. We have obtained an accuracy of 79.27% for Unlabelled Attachment Score, 73.64% for Labelled Attachment Score and 68.82% for Labelled Accuracy.</abstract>
       <url hash="b86079b8">2021.pail-1.3</url>
diff --git a/data/xml/2021.privatenlp.xml b/data/xml/2021.privatenlp.xml
index 2a074c6b29..ea3e07f38e 100644
--- a/data/xml/2021.privatenlp.xml
+++ b/data/xml/2021.privatenlp.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the Third Workshop on Privacy in Natural Language Processing</booktitle>
       <editor><first>Oluwaseyi</first><last>Feyisetan</last></editor>
       <editor><first>Sepideh</first><last>Ghanavati</last></editor>
-      <editor><first>Shervin</first><last>Malmasi</last></editor>
+      <editor id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></editor>
       <editor><first>Patricia</first><last>Thaine</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Online</address>
@@ -65,7 +65,7 @@
     <paper id="4">
       <title>An Investigation towards Differentially Private Sequence Tagging in a Federated Framework</title>
       <author><first>Abhik</first><last>Jana</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>30–35</pages>
       <abstract>To build machine learning-based applications for sensitive domains like medical, legal, etc. where the digitized text contains private information, anonymization of text is required for preserving privacy. Sequence tagging, e.g. as done in Named Entity Recognition (NER) can help to detect private information. However, to train sequence tagging models, a sufficient amount of labeled data are required but for privacy-sensitive domains, such labeled data also can not be shared directly. In this paper, we investigate the applicability of a privacy-preserving framework for sequence tagging tasks, specifically NER. Hence, we analyze a framework for the NER task, which incorporates two levels of privacy protection. Firstly, we deploy a federated learning (FL) framework where the labeled data are not shared with the centralized server as well as the peer clients. Secondly, we apply differential privacy (DP) while the models are being trained in each client instance. While both privacy measures are suitable for privacy-aware models, their combination results in unstable models. To our knowledge, this is the first study of its kind on privacy-aware sequence tagging models.</abstract>
       <url hash="e434bd2d">2021.privatenlp-1.4</url>
diff --git a/data/xml/2021.ranlp.xml b/data/xml/2021.ranlp.xml
index c3eb8e9ac8..9a7179bf21 100644
--- a/data/xml/2021.ranlp.xml
+++ b/data/xml/2021.ranlp.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2021-11-09" type="proceedings">
     <meta>
       <booktitle>Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)</booktitle>
-      <editor><first>Ruslan</first><last>Mitkov</last></editor>
+      <editor id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></editor>
       <editor><first>Galia</first><last>Angelova</last></editor>
       <publisher>INCOMA Ltd.</publisher>
       <address>Held Online</address>
@@ -29,7 +29,7 @@
       <title>Ontology Population Reusing Resources for Dialogue Intent Detection: Generic and Multilingual Approach</title>
       <author><first>Cristina</first><last>Aceta</last></author>
       <author><first>Izaskun</first><last>Fernández</last></author>
-      <author><first>Aitor</first><last>Soroa</last></author>
+      <author id="aitor-soroa"><first>Aitor</first><last>Soroa</last></author>
       <pages>10–18</pages>
       <abstract>This work presents a generic semi-automatic strategy to populate the domain ontology of an ontology-driven task-oriented dialogue system, with the aim of performing successful intent detection in the dialogue process, reusing already existing multilingual resources. This semi-automatic approach allows ontology engineers to exploit available resources so as to associate the potential situations in the use case to FrameNet frames and obtain the relevant lexical units associated to them in the target language, following lexical and semantic criteria, without linguistic expert knowledge. This strategy has been validated and evaluated in two use cases, from industrial scenarios, for interaction in Spanish with a guide robot and with a Computerized Maintenance Management System (CMMS). In both cases, this method has allowed the ontology engineer to instantiate the domain ontology with the intent-relevant information with quality data in a simple and low-resource-consuming manner.</abstract>
       <url hash="30fea4af">2021.ranlp-1.2</url>
@@ -49,7 +49,7 @@
       <title>Domain Adaptation for <fixed-case>H</fixed-case>indi-<fixed-case>T</fixed-case>elugu Machine Translation Using Domain Specific Back Translation</title>
       <author><first>Hema</first><last>Ala</last></author>
       <author><first>Vandan</first><last>Mujadia</last></author>
-      <author><first>Dipti</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti</first><last>Sharma</last></author>
       <pages>26–34</pages>
       <abstract>In this paper, we present a novel approachfor domain adaptation in Neural MachineTranslation which aims to improve thetranslation quality over a new domain. Adapting new domains is a highly challeng-ing task for Neural Machine Translation onlimited data, it becomes even more diffi-cult for technical domains such as Chem-istry and Artificial Intelligence due to spe-cific terminology, etc. We propose DomainSpecific Back Translation method whichuses available monolingual data and gen-erates synthetic data in a different way. This approach uses Out Of Domain words. The approach is very generic and can beapplied to any language pair for any domain. We conduct our experiments onChemistry and Artificial Intelligence do-mains for Hindi and Telugu in both direc-tions. It has been observed that the usageof synthetic data created by the proposedalgorithm improves the BLEU scores significantly.</abstract>
       <url hash="4d1fbfec">2021.ranlp-1.4</url>
@@ -86,7 +86,7 @@
     <paper id="8">
       <title>Comparing Supervised Machine Learning Techniques for Genre Analysis in Software Engineering Research Articles</title>
       <author><first>Felipe</first><last>Araújo de Britto</last></author>
-      <author><first>Thiago</first><last>Castro Ferreira</last></author>
+      <author id="thiago-castro-ferreira"><first>Thiago</first><last>Castro Ferreira</last></author>
       <author><first>Leonardo Pereira</first><last>Nunes</last></author>
       <author><first>Fernando</first><last>Silva Parreiras</last></author>
       <pages>63–72</pages>
@@ -97,7 +97,7 @@
     <paper id="9">
       <title>Enriching the Transformer with Linguistic Factors for Low-Resource Machine Translation</title>
       <author><first>Jordi</first><last>Armengol-Estapé</last></author>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
       <author><first>Carlos</first><last>Escolano</last></author>
       <pages>73–78</pages>
       <abstract>Introducing factors, that is to say, word features such as linguistic information referring to the source tokens, is known to improve the results of neural machine translation systems in certain settings, typically in recurrent architectures. This study proposes enhancing the current state-of-the-art neural machine translation architecture, the Transformer, so that it allows to introduce external knowledge. In particular, our proposed modification, the Factored Transformer, uses linguistic factors that insert additional knowledge into the machine translation system. Apart from using different kinds of features, we study the effect of different architectural configurations. Specifically, we analyze the performance of combining words and features at the embedding level or at the encoder level, and we experiment with two different combination strategies. With the best-found configuration, we show improvements of 0.8 BLEU over the baseline Transformer in the IWSLT German-to-English task. Moreover, we experiment with the more challenging FLoRes English-to-Nepali benchmark, which includes both extremely low-resourced and very distant languages, and obtain an improvement of 1.2 BLEU</abstract>
@@ -212,7 +212,7 @@
       <author><first>Sonja</first><last>Remmer</last></author>
       <author><first>Alicia</first><last>Pérez</last></author>
       <author><first>Hercules</first><last>Dalianis</last></author>
-      <author><first>Arantza</first><last>Casillas</last></author>
+      <author id="arantza-casillas"><first>Arantza</first><last>Casillas</last></author>
       <pages>165–172</pages>
       <abstract>We introduce a multi-label text classifier with per-label attention for the classification of Electronic Health Records according to the International Classification of Diseases. We apply the model on two Electronic Health Records datasets with Discharge Summaries in two languages with fewer resources than English, Spanish and Swedish. Our model leverages the BERT Multilingual model (specifically the Wikipedia, as the model have been trained with 104 languages, including Spanish and Swedish, with the largest Wikipedia dumps) to share the language modelling capabilities across the languages. With the per-label attention, the model can compute the relevance of each word from the EHR towards the prediction of each label. For the experimental framework, we apply 157 labels from Chapter XI – Diseases of the Digestive System of the ICD, which makes the attention especially important as the model has to discriminate between similar diseases. 1 <url>https://github.com/google-research/bert/blob/master/multilingual.md#list-of-languages</url></abstract>
       <url hash="d06ba45b">2021.ranlp-1.20</url>
@@ -234,7 +234,7 @@
       <author><first>Ivan</first><last>Koychev</last></author>
       <author><first>Maria</first><last>Castaldo</last></author>
       <author><first>Tommaso</first><last>Venturini</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>182–189</pages>
       <abstract>We propose a novel framework for predicting the factuality of reporting of news media outlets by studying the user attention cycles in their YouTube channels. In particular, we design a rich set of features derived from the temporal evolution of the number of views, likes, dislikes, and comments for a video, which we then aggregate to the channel level. We develop and release a dataset for the task, containing observations of user attention on YouTube channels for 489 news media. Our experiments demonstrate both complementarity and sizable improvements over state-of-the-art textual representations.</abstract>
       <url hash="858a220a">2021.ranlp-1.22</url>
@@ -253,7 +253,7 @@
       <title>A Psychologically Informed Part-of-Speech Analysis of Depression in Social Media</title>
       <author><first>Ana-Maria</first><last>Bucur</last></author>
       <author><first>Ioana R.</first><last>Podina</last></author>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <pages>199–207</pages>
       <abstract>In this work, we provide an extensive part-of-speech analysis of the discourse of social media users with depression. Research in psychology revealed that depressed users tend to be self-focused, more preoccupied with themselves and ruminate more about their lives and emotions. Our work aims to make use of large-scale datasets and computational methods for a quantitative exploration of discourse. We use the publicly available depression dataset from the Early Risk Prediction on the Internet Workshop (eRisk) 2018 and extract part-of-speech features and several indices based on them. Our results reveal statistically significant differences between the depressed and non-depressed individuals confirming findings from the existing psychology literature. Our work provides insights regarding the way in which depressed individuals are expressing themselves on social media platforms, allowing for better-informed computational models to help monitor and prevent mental illnesses.</abstract>
       <url hash="41e9cd41">2021.ranlp-1.24</url>
@@ -275,10 +275,10 @@
       <author><first>Hian</first><last>Cañizares-Díaz</last></author>
       <author><first>Alejandro</first><last>Piad-Morffis</last></author>
       <author><first>Suilan</first><last>Estevez-Velarde</last></author>
-      <author><first>Yoan</first><last>Gutiérrez</last></author>
+      <author id="yoan-gutierrez"><first>Yoan</first><last>Gutiérrez</last></author>
       <author><first>Yudivián</first><last>Almeida Cruz</last></author>
-      <author><first>Andres</first><last>Montoyo</last></author>
-      <author><first>Rafael</first><last>Muñoz-Guillena</last></author>
+      <author id="andres-montoyo"><first>Andres</first><last>Montoyo</last></author>
+      <author id="rafael-munoz"><first>Rafael</first><last>Muñoz-Guillena</last></author>
       <pages>216–225</pages>
       <abstract>This paper presents an active learning approach that aims to reduce the human effort required during the annotation of natural language corpora composed of entities and semantic relations. Our approach assists human annotators by intelligently selecting the most informative sentences to annotate and then pre-annotating them with a few highly accurate entities and semantic relations. We define an uncertainty-based query strategy with a weighted density factor, using similarity metrics based on sentence embeddings. As a case study, we evaluate our approach via simulation in a biomedical corpus and estimate the potential reduction in total annotation time. Experimental results suggest that the query strategy reduces by between 35% and 40% the number of sentences that must be manually annotated to develop systems able to reach a target F1 score, while the pre-annotation strategy produces an additional 24% reduction in the total annotation time. Overall, our preliminary experiments suggest that as much as 60% of the annotation time could be saved while producing corpora that have the same usefulness for training machine learning algorithms. An open-source computational tool that implements the aforementioned strategies is presented and published online for the research community.</abstract>
       <url hash="030279a8">2021.ranlp-1.26</url>
@@ -296,7 +296,7 @@
     </paper>
     <paper id="28">
       <title>Evaluating Recognizing Question Entailment Methods for a <fixed-case>P</fixed-case>ortuguese Community Question-Answering System about Diabetes Mellitus</title>
-      <author><first>Thiago</first><last>Castro Ferreira</last></author>
+      <author id="thiago-castro-ferreira"><first>Thiago</first><last>Castro Ferreira</last></author>
       <author><first>João</first><last>Victor de Pinho Costa</last></author>
       <author><first>Isabela</first><last>Rigotto</last></author>
       <author><first>Vitoria</first><last>Portella</last></author>
@@ -323,8 +323,8 @@
     <paper id="29">
       <title>On the Usability of Transformers-based Models for a <fixed-case>F</fixed-case>rench Question-Answering Task</title>
       <author><first>Oralie</first><last>Cattan</last></author>
-      <author><first>Christophe</first><last>Servan</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="christophe-servan"><first>Christophe</first><last>Servan</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <pages>244–255</pages>
       <abstract>For many tasks, state-of-the-art results have been achieved with Transformer-based architectures, resulting in a paradigmatic shift in practices from the use of task-specific architectures to the fine-tuning of pre-trained language models. The ongoing trend consists in training models with an ever-increasing amount of data and parameters, which requires considerable resources. It leads to a strong search to improve resource efficiency based on algorithmic and hardware improvements evaluated only for English. This raises questions about their usability when applied to small-scale learning problems, for which a limited amount of training data is available, especially for under-resourced languages tasks. The lack of appropriately sized corpora is a hindrance to applying data-driven and transfer learning-based approaches with strong instability cases. In this paper, we establish a state-of-the-art of the efforts dedicated to the usability of Transformer-based models and propose to evaluate these improvements on the question-answering performances of French language which have few resources. We address the instability relating to data scarcity by investigating various training strategies with data augmentation, hyperparameters optimization and cross-lingual transfer. We also introduce a new compact model for French FrALBERT which proves to be competitive in low-resource settings.</abstract>
       <url hash="09c969d4">2021.ranlp-1.29</url>
@@ -343,7 +343,7 @@
       <title>Character-based <fixed-case>T</fixed-case>hai Word Segmentation with Multiple Attentions</title>
       <author><first>Thodsaporn</first><last>Chay-intr</last></author>
       <author><first>Hidetaka</first><last>Kamigaito</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>264–273</pages>
       <abstract>Character-based word-segmentation models have been extensively applied to agglutinative languages, including Thai, due to their high performance. These models estimate word boundaries from a character sequence. However, a character unit in sequences has no essential meaning, compared with word, subword, and character cluster units. We propose a Thai word-segmentation model that uses various types of information, including words, subwords, and character clusters, from a character sequence. Our model applies multiple attentions to refine segmentation inferences by estimating the significant relationships among characters and various unit types. The experimental results indicate that our model can outperform other state-of-the-art Thai word-segmentation models.</abstract>
       <url hash="d2d99700">2021.ranlp-1.31</url>
@@ -372,7 +372,7 @@
     <paper id="34">
       <title><fixed-case>RED</fixed-case>: A Novel Dataset for <fixed-case>R</fixed-case>omanian Emotion Detection from Tweets</title>
       <author><first>Alexandra</first><last>Ciobotaru</last></author>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <pages>291–300</pages>
       <abstract>In Romanian language there are some resources for automatic text comprehension, but for Emotion Detection, not lexicon-based, there are none. To cover this gap, we extracted data from Twitter and created the first dataset containing tweets annotated with five types of emotions: joy, fear, sadness, anger and neutral, with the intent of being used for opinion mining and analysis tasks. In this article we present some features of our novel dataset, and create a benchmark to achieve the first supervised machine learning model for automatic Emotion Detection in Romanian short texts. We investigate the performance of four classical machine learning models: Multinomial Naive Bayes, Logistic Regression, Support Vector Classification and Linear Support Vector Classification. We also investigate more modern approaches like fastText, which makes use of subword information. Lastly, we fine-tune the Romanian BERT for text classification and our experiments show that the BERT-based model has the best performance for the task of Emotion Detection from Romanian tweets. Keywords: Emotion Detection, Twitter, Romanian, Supervised Machine Learning</abstract>
       <url hash="3c209b17">2021.ranlp-1.34</url>
@@ -401,7 +401,7 @@
       <title>Towards an Etymological Map of <fixed-case>R</fixed-case>omanian</title>
       <author><first>Alina Maria</first><last>Cristea</last></author>
       <author><first>Anca</first><last>Dinu</last></author>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <author><first>Simona</first><last>Georgescu</last></author>
       <author><first>Ana Sabina</first><last>Uban</last></author>
       <author><first>Laurentiu</first><last>Zoicas</last></author>
@@ -414,7 +414,7 @@
       <title>A Syntax-Aware Edit-based System for Text Simplification</title>
       <author><first>Oscar M.</first><last>Cumbicus-Pineda</last></author>
       <author><first>Itziar</first><last>Gonzalez-Dios</last></author>
-      <author><first>Aitor</first><last>Soroa</last></author>
+      <author id="aitor-soroa"><first>Aitor</first><last>Soroa</last></author>
       <pages>324–334</pages>
       <abstract>Edit-based text simplification systems have attained much attention in recent years due to their ability to produce simplification solutions that are interpretable, as well as requiring less training examples compared to traditional seq2seq systems. Edit-based systems learn edit operations at a word level, but it is well known that many of the operations performed when simplifying text are of a syntactic nature. In this paper we propose to add syntactic information into a well known edit-based system. We extend the system with a graph convolutional network module that mimics the dependency structure of the sentence, thus giving the model an explicit representation of syntax. We perform a series of experiments in English, Spanish and Italian, and report improvements of the state of the art in four out of five datasets. Further analysis shows that syntactic information is always beneficial, and suggest that syntax is more helpful in complex sentences.</abstract>
       <url hash="d0a5c594">2021.ranlp-1.38</url>
@@ -423,8 +423,8 @@
     <paper id="39">
       <title>On Generating Fact-Infused Question Variations</title>
       <author><first>Arthur</first><last>Deschamps</last></author>
-      <author><first>Sujatha Das</first><last>Gollapalli</last></author>
-      <author><first>See-Kiong</first><last>Ng</last></author>
+      <author id="sujatha-das-gollapalli"><first>Sujatha Das</first><last>Gollapalli</last></author>
+      <author id="see-kiong-ng"><first>See-Kiong</first><last>Ng</last></author>
       <pages>335–345</pages>
       <abstract>To fully model human-like ability to ask questions, automatic question generation (QG) models must be able to produce multiple expressions of the same question with different levels of detail. Unfortunately, existing datasets available for learning QG do not include paraphrases or question variations affecting a model’s ability to learn this capability. We present FIRS, a dataset containing human-generated fact-infused rewrites of questions from the widely-used SQuAD dataset to address this limitation. Questions in FIRS were obtained by combining a given question with facts of entities referenced in the question. We study a double encoder-decoder model, Fact-Infused Question Generator (FIQG), for learning to generate fact-infused questions from a given question. Experimental results show that FIQG effectively incorporates information from facts to add more detail to a given question. To the best of our knowledge, ours is the first study to present fact-infusion as a novel form of question paraphrasing.</abstract>
       <url hash="d696a278">2021.ranlp-1.39</url>
@@ -433,8 +433,8 @@
     <paper id="40">
       <title>Event Prominence Extraction Combining a Knowledge-Based Syntactic Parser and a <fixed-case>BERT</fixed-case> Classifier for <fixed-case>D</fixed-case>utch</title>
       <author><first>Thierry</first><last>Desot</last></author>
-      <author><first>Orphee</first><last>De Clercq</last></author>
-      <author><first>Veronique</first><last>Hoste</last></author>
+      <author id="orphee-de-clercq"><first>Orphee</first><last>De Clercq</last></author>
+      <author id="veronique-hoste"><first>Veronique</first><last>Hoste</last></author>
       <pages>346–357</pages>
       <abstract>A core task in information extraction is event detection that identifies event triggers in sentences that are typically classified into event types. In this study an event is considered as the unit to measure diversity and similarity in news articles in the framework of a news recommendation system. Current typology-based event detection approaches fail to handle the variety of events expressed in real-world situations. To overcome this, we aim to perform event salience classification and explore whether a transformer model is capable of classifying new information into less and more general prominence classes. After comparing a Support Vector Machine (SVM) baseline and our transformer-based classifier performances on several event span formats, we conceived multi-word event spans as syntactic clauses. Those are fed into our prominence classifier which is fine-tuned on pre-trained Dutch BERT word embeddings. On top of that we outperform a pipeline of a Conditional Random Field (CRF) approach to event-trigger word detection and the BERT-based classifier. To the best of our knowledge we present the first event extraction approach that combines an expert-based syntactic parser with a transformer-based classifier for Dutch.</abstract>
       <url hash="6ea7b948">2021.ranlp-1.40</url>
@@ -460,9 +460,9 @@
     </paper>
     <paper id="43">
       <title>Tracing Source Language Interference in Translation with Graph-Isomorphism Measures</title>
-      <author><first>Koel</first><last>Dutta Chowdhury</last></author>
+      <author id="koel-dutta-chowdhury"><first>Koel</first><last>Dutta Chowdhury</last></author>
       <author><first>Cristina</first><last>España-Bonet</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>375–385</pages>
       <abstract>Previous research has used linguistic features to show that translations exhibit traces of source language interference and that phylogenetic trees between languages can be reconstructed from the results of translations into the same language. Recent research has shown that instances of translationese (source language interference) can even be detected in embedding spaces, comparing embeddings spaces of original language data with embedding spaces resulting from translations into the same language, using a simple Eigenvector-based divergence from isomorphism measure. To date, it remains an open question whether alternative graph-isomorphism measures can produce better results. In this paper, we (i) explore Gromov-Hausdorff distance, (ii) present a novel spectral version of the Eigenvector-based method, and (iii) evaluate all approaches against a broad linguistic typological database (URIEL). We show that language distances resulting from our spectral isomorphism approaches can reproduce genetic trees on a par with previous work without requiring any explicit linguistic information and that the results can be extended to non-Indo-European languages. Finally, we show that the methods are robust under a variety of modeling conditions.</abstract>
       <url hash="bc1d80ed">2021.ranlp-1.43</url>
@@ -491,8 +491,8 @@
       <author><first>Suilan</first><last>Estevez-Velarde</last></author>
       <author><first>Alejandro</first><last>Piad-Morffis</last></author>
       <author><first>Yoan</first><last>Gutierrez</last></author>
-      <author><first>Andres</first><last>Montoyo</last></author>
-      <author><first>Rafael</first><last>Muñoz</last></author>
+      <author id="andres-montoyo"><first>Andres</first><last>Montoyo</last></author>
+      <author id="rafael-munoz"><first>Rafael</first><last>Muñoz</last></author>
       <author><first>Yudivián</first><last>Almeida Cruz</last></author>
       <pages>402–410</pages>
       <abstract>This paper presents the preliminary results of an ongoing project that analyzes the growing body of scientific research published around the COVID-19 pandemic. In this research, a general-purpose semantic model is used to double annotate a batch of 500 sentences that were manually selected from the CORD-19 corpus. Afterwards, a baseline text-mining pipeline is designed and evaluated via a large batch of 100,959 sentences. We present a qualitative analysis of the most interesting facts automatically extracted and highlight possible future lines of development. The preliminary results show that general-purpose semantic models are a useful tool for discovering fine-grained knowledge in large corpora of scientific documents.</abstract>
@@ -516,7 +516,7 @@
       <author><first>Chenlong</first><last>Hu</last></author>
       <author><first>Hidetaka</first><last>Kamigaito</last></author>
       <author><first>Hiroya</first><last>Takamura</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>421–427</pages>
       <abstract>Character-aware neural language models can capture the relationship between words by exploiting character-level information and are particularly effective for languages with rich morphology. However, these models are usually biased towards information from surface forms. To alleviate this problem, we propose a simple and effective method to improve a character-aware neural language model by forcing a character encoder to produce word-based embeddings under Skip-gram architecture in a warm-up step without extra training data. We empirically show that the resulting character-aware neural language model achieves obvious improvements of perplexity scores on typologically diverse languages, that contain many low-frequency or unseen words.</abstract>
       <url hash="d42325b2">2021.ranlp-1.48</url>
@@ -558,7 +558,7 @@
     <paper id="52">
       <title>A Dynamic Head Importance Computation Mechanism for Neural Machine Translation</title>
       <author><first>Akshay</first><last>Goindani</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>454–462</pages>
       <abstract>Multiple parallel attention mechanisms that use multiple attention heads facilitate greater performance of the Transformer model for various applications e.g., Neural Machine Translation (NMT), text classification. In multi-head attention mechanism, different heads attend to different parts of the input. However, the limitation is that multiple heads might attend to the same part of the input, resulting in multiple heads being redundant. Thus, the model resources are under-utilized. One approach to avoid this is to prune least important heads based on certain importance score. In this work, we focus on designing a Dynamic Head Importance Computation Mechanism (DHICM) to dynamically calculate the importance of a head with respect to the input. Our insight is to design an additional attention layer together with multi-head attention, and utilize the outputs of the multi-head attention along with the input, to compute the importance for each head. Additionally, we add an extra loss function to prevent the model from assigning same score to all heads, to identify more important heads and improvise performance. We analyzed performance of DHICM for NMT with different languages. Experiments on different datasets show that DHICM outperforms traditional Transformer-based approach by large margin, especially, when less training data is available.</abstract>
       <url hash="9b38a7cb">2021.ranlp-1.52</url>
@@ -596,7 +596,7 @@
       <title>Apples to Apples: A Systematic Evaluation of Topic Models</title>
       <author><first>Ismail</first><last>Harrando</last></author>
       <author><first>Pasquale</first><last>Lisena</last></author>
-      <author><first>Raphael</first><last>Troncy</last></author>
+      <author id="raphael-troncy"><first>Raphael</first><last>Troncy</last></author>
       <pages>483–493</pages>
       <abstract>From statistical to neural models, a wide variety of topic modelling algorithms have been proposed in the literature. However, because of the diversity of datasets and metrics, there have not been many efforts to systematically compare their performance on the same benchmarks and under the same conditions. In this paper, we present a selection of 9 topic modelling techniques from the state of the art reflecting a diversity of approaches to the task, an overview of the different metrics used to compare their performance, and the challenges of conducting such a comparison. We empirically evaluate the performance of these models on different settings reflecting a variety of real-life conditions in terms of dataset size, number of topics, and distribution of topics, following identical preprocessing and evaluation processes. Using both metrics that rely on the intrinsic characteristics of the dataset (different coherence metrics), as well as external knowledge (word embeddings and ground-truth topic labels), our experiments reveal several shortcomings regarding the common practices in topic models evaluation.</abstract>
       <url hash="52778976">2021.ranlp-1.55</url>
@@ -776,7 +776,7 @@
     <paper id="73">
       <title><fixed-case>BERT</fixed-case>-<fixed-case>P</fixed-case>ers<fixed-case>NER</fixed-case>: A New Model for <fixed-case>P</fixed-case>ersian Named Entity Recognition</title>
       <author><first>Farane</first><last>Jalali Farahani</last></author>
-      <author><first>Gholamreza</first><last>Ghassem-Sani</last></author>
+      <author id="gholamreza-ghassem-sani"><first>Gholamreza</first><last>Ghassem-Sani</last></author>
       <pages>647–654</pages>
       <abstract>Named entity recognition (NER) is one of the major tasks in natural language processing. A named entity is often a word or expression that bears a valuable piece of information, which can be effectively employed by some major NLP tasks such as machine translation, question answering, and text summarization. In this paper, we introduce a new model called BERT-PersNER (BERT based Persian Named Entity Recognizer), in which we have applied transfer learning and active learning approaches to NER in Persian, which is regarded as a low-resource language. Like many others, we have used Conditional Random Field for tag decoding in our proposed architecture. BERT-PersNER has outperformed two available studies in Persian NER, in most cases of our experiments using the supervised learning approach on two Persian datasets called Arman and Peyma. Besides, as the very first effort to try active learning in the Persian NER, using only 30% of Arman and 20% of Peyma, we respectively achieved 92.15%, and 92.41% performance of the mentioned supervised learning experiments.</abstract>
       <url hash="f3659c63">2021.ranlp-1.73</url>
@@ -873,7 +873,7 @@
       <title>Multilingual Multi-Domain <fixed-case>NMT</fixed-case> for <fixed-case>I</fixed-case>ndian Languages</title>
       <author><first>Sourav</first><last>Kumar</last></author>
       <author><first>Salil</first><last>Aggarwal</last></author>
-      <author><first>Dipti</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti</first><last>Sharma</last></author>
       <pages>727–733</pages>
       <abstract>India is known as the land of many tongues and dialects. Neural machine translation (NMT) is the current state-of-the-art approach for machine translation (MT) but performs better only with large datasets which Indian languages usually lack, making this approach infeasible. So, in this paper, we address the problem of data scarcity by efficiently training multilingual and multilingual multi domain NMT systems involving languages of the 𝐈𝐧𝐝𝐢𝐚𝐧 𝐬𝐮𝐛𝐜𝐨𝐧𝐭𝐢𝐧𝐞𝐧𝐭. We are proposing the technique for using the joint domain and language tags in a multilingual setup. We draw three major conclusions from our experiments: (i) Training a multilingual system via exploiting lexical similarity based on language family helps in achieving an overall average improvement of 𝟑.𝟐𝟓 𝐁𝐋𝐄𝐔 𝐩𝐨𝐢𝐧𝐭𝐬 over bilingual baselines, (ii) Technique of incorporating domain information into the language tokens helps multilingual multi-domain system in getting a significant average improvement of 𝟔 𝐁𝐋𝐄𝐔 𝐩𝐨𝐢𝐧𝐭𝐬 over the baselines, (iii) Multistage fine-tuning further helps in getting an improvement of 𝟏-𝟏.𝟓 𝐁𝐋𝐄𝐔 𝐩𝐨𝐢𝐧𝐭𝐬 for the language pair of interest.</abstract>
       <url hash="7ffffbd3">2021.ranlp-1.83</url>
@@ -931,7 +931,7 @@
       <author><first>Naoki</first><last>Kobayashi</last></author>
       <author><first>Hidetaka</first><last>Kamigaito</last></author>
       <author><first>Hiroya</first><last>Takamura</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>770–779</pages>
       <abstract>In the social media, users frequently use small images called emojis in their posts. Although using emojis in texts plays a key role in recent communication systems, less attention has been paid on their positions in the given texts, despite that users carefully choose and put an emoji that matches their post. Exploring positions of emojis in texts will enhance understanding of the relationship between emojis and texts. We extend an emoji label prediction task taking into account the information of emoji positions, by jointly learning the emoji position in a tweet to predict the emoji label. The results demonstrate that the position of emojis in texts is a good clue to boost the performance of emoji label prediction. Human evaluation validates that there exists a suitable emoji position in a tweet, and our proposed task is able to make tweets more fancy and natural. In addition, considering emoji position can further improve the performance for the irony detection task compared to the emoji label prediction. We also report the experimental results for the modified dataset, due to the problem of the original dataset for the first shared task to predict an emoji label in SemEval2018.</abstract>
       <url hash="7b90d5ba">2021.ranlp-1.88</url>
@@ -940,7 +940,7 @@
     <paper id="89">
       <title>Addressing Slot-Value Changes in Task-oriented Dialogue Systems through Dialogue Domain Adaptation</title>
       <author><first>Tiziano</first><last>Labruna</last></author>
-      <author><first>Bernardo</first><last>Magnini</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
       <pages>780–789</pages>
       <abstract>Recent task-oriented dialogue systems learn a model from annotated dialogues, and such dialogues are in turn collected and annotated so that they are consistent with certain domain knowledge. However, in real scenarios, domain knowledge is subject to frequent changes, and initial training dialogues may soon become obsolete, resulting in a significant decrease in the model performance. In this paper, we investigate the relationship between training dialogues and domain knowledge, and propose Dialogue Domain Adaptation, a methodology aiming at adapting initial training dialogues to changes intervened in the domain knowledge. We focus on slot-value changes (e.g., when new slot values are available to describe domain entities) and define an experimental setting for dialogue domain adaptation. First, we show that current state-of-the-art models for dialogue state tracking are still poorly robust to slot-value changes of the domain knowledge. Then, we compare different domain adaptation strategies, showing that simple techniques are effective to reduce the gap between training dialogues and domain knowledge.</abstract>
       <url hash="5bb8fd9f">2021.ranlp-1.89</url>
@@ -958,7 +958,7 @@
     </paper>
     <paper id="91">
       <title>Text Retrieval for Language Learners: Graded Vocabulary vs. Open Learner Model</title>
-      <author><first>John</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
       <author><first>Chak Yan</first><last>Yeung</last></author>
       <pages>798–804</pages>
       <abstract>A text retrieval system for language learning returns reading materials at the appropriate difficulty level for the user. The system typically maintains a learner model on the user’s vocabulary knowledge, and identifies texts that best fit the model. As the user’s language proficiency increases, model updates are necessary to retrieve texts with the corresponding lexical complexity. We investigate an open learner model that allows user modification of its content, and evaluate its effectiveness with respect to the amount of user update effort. We compare this model with the graded approach, in which the system returns texts at the optimal grade. When the user makes at least half of the expected updates to the open learner model, simulation results show that it outperforms the graded approach in retrieving texts that fit user preference for new-word density.</abstract>
@@ -1035,9 +1035,9 @@
       <title><fixed-case>G</fixed-case>e<fixed-case>SERA</fixed-case>: General-domain Summary Evaluation by Relevance Analysis</title>
       <author><first>Jessica</first><last>López Espejel</last></author>
       <author><first>Gaël</first><last>de Chalendar</last></author>
-      <author><first>Jorge</first><last>Garcia Flores</last></author>
+      <author id="jorge-garcia-flores"><first>Jorge</first><last>Garcia Flores</last></author>
       <author><first>Thierry</first><last>Charnois</last></author>
-      <author><first>Ivan Vladimir</first><last>Meza Ruiz</last></author>
+      <author id="ivan-meza-ruiz"><first>Ivan Vladimir</first><last>Meza Ruiz</last></author>
       <pages>856–867</pages>
       <abstract>We present GeSERA, an open-source improved version of SERA for evaluating automatic extractive and abstractive summaries from the general domain. SERA is based on a search engine that compares candidate and reference summaries (called queries) against an information retrieval document base (called index). SERA was originally designed for the biomedical domain only, where it showed a better correlation with manual methods than the widely used lexical-based ROUGE method. In this paper, we take out SERA from the biomedical domain to the general one by adapting its content-based method to successfully evaluate summaries from the general domain. First, we improve the query reformulation strategy with POS Tags analysis of general-domain corpora. Second, we replace the biomedical index used in SERA with two article collections from AQUAINT-2 and Wikipedia. We conduct experiments with TAC2008, TAC2009, and CNNDM datasets. Results show that, in most cases, GeSERA achieves higher correlations with manual evaluation methods than SERA, while it reduces its gap with ROUGE for general-domain summary evaluation. GeSERA even surpasses ROUGE in two cases of TAC2009. Finally, we conduct extensive experiments and provide a comprehensive study of the impact of human annotators and the index size on summary evaluation with SERA and GeSERA.</abstract>
       <url hash="e0cb09db">2021.ranlp-1.98</url>
@@ -1047,7 +1047,7 @@
       <title>On the Interaction between Annotation Quality and Classifier Performance in Abusive Language Detection</title>
       <author><first>Holly</first><last>Lopez Long</last></author>
       <author><first>Alexandra</first><last>O’Neil</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <pages>868–875</pages>
       <abstract>Abusive language detection has become an important tool for the cultivation of safe online platforms. We investigate the interaction of annotation quality and classifier performance. We use a new, fine-grained annotation scheme that allows us to distinguish between abusive language and colloquial uses of profanity that are not meant to harm. Our results show a tendency of crowd workers to overuse the abusive class, which creates an unrealistic class balance and affects classification accuracy. We also investigate different methods of distinguishing between explicit and implicit abuse and show lexicon-based approaches either over- or under-estimate the proportion of explicit abuse in data sets.</abstract>
       <url hash="9368fe61">2021.ranlp-1.99</url>
@@ -1055,7 +1055,7 @@
     </paper>
     <paper id="100">
       <title><fixed-case>NEREL</fixed-case>: A <fixed-case>R</fixed-case>ussian Dataset with Nested Named Entities, Relations and Events</title>
-      <author><first>Natalia</first><last>Loukachevitch</last></author>
+      <author id="natalia-loukachevitch"><first>Natalia</first><last>Loukachevitch</last></author>
       <author><first>Ekaterina</first><last>Artemova</last></author>
       <author><first>Tatiana</first><last>Batura</last></author>
       <author><first>Pavel</first><last>Braslavski</last></author>
@@ -1073,7 +1073,7 @@
       <title>Active Learning for Interactive Relation Extraction in a <fixed-case>F</fixed-case>rench Newspaper’s Articles</title>
       <author><first>Cyrielle</first><last>Mallart</last></author>
       <author><first>Michel</first><last>Le Nouy</last></author>
-      <author><first>Guillaume</first><last>Gravier</last></author>
+      <author id="guillaume-gravier"><first>Guillaume</first><last>Gravier</last></author>
       <author><first>Pascale</first><last>Sébillot</last></author>
       <pages>886–894</pages>
       <abstract>Relation extraction is a subtask of natural langage processing that has seen many improvements in recent years, with the advent of complex pre-trained architectures. Many of these state-of-the-art approaches are tested against benchmarks with labelled sentences containing tagged entities, and require important pre-training and fine-tuning on task-specific data. However, in a real use-case scenario such as in a newspaper company mostly dedicated to local information, relations are of varied, highly specific type, with virtually no annotated data for such relations, and many entities co-occur in a sentence without being related. We question the use of supervised state-of-the-art models in such a context, where resources such as time, computing power and human annotators are limited. To adapt to these constraints, we experiment with an active-learning based relation extraction pipeline, consisting of a binary LSTM-based lightweight model for detecting the relations that do exist, and a state-of-the-art model for relation classification. We compare several choices for classification models in this scenario, from basic word embedding averaging, to graph neural networks and Bert-based ones, as well as several active learning acquisition strategies, in order to find the most cost-efficient yet accurate approach in our French largest daily newspaper company’s use case.</abstract>
@@ -1096,7 +1096,7 @@
       <author><first>Milena</first><last>Slavcheva</last></author>
       <author><first>Petya</first><last>Osenova</last></author>
       <author><first>Ivaylo</first><last>Radev</last></author>
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <pages>901–909</pages>
       <abstract>The paper describes a system for automatic summarization in English language of online news data that come from different non-English languages. The system is designed to be used in production environment for media monitoring. Automatic summarization can be very helpful in this domain when applied as a helper tool for journalists so that they can review just the important information from the news channels. However, like every software solution, the automatic summarization needs performance monitoring and assured safe environment for the clients. In media monitoring environment the most problematic features to be addressed are: the copyright issues, the factual consistency, the style of the text and the ethical norms in journalism. Thus, the main contribution of our present work is that the above mentioned characteristics are successfully monitored in neural automatic summarization models and improved with the help of validation, fact-preserving and fact-checking procedures.</abstract>
       <url hash="3b8dfd1d">2021.ranlp-1.103</url>
@@ -1155,7 +1155,7 @@
       <title>Ranking Online Reviews Based on Their Helpfulness: An Unsupervised Approach</title>
       <author><first>Alimuddin</first><last>Melleng</last></author>
       <author><first>Anna</first><last>Jurek-Loughrey</last></author>
-      <author><first>Deepak</first><last>P</last></author>
+      <author id="deepak-p"><first>Deepak</first><last>P</last></author>
       <pages>959–967</pages>
       <abstract>Online reviews are an essential aspect of online shopping for both customers and retailers. However, many reviews found on the Internet lack in quality, informativeness or helpfulness. In many cases, they lead the customers towards positive or negative opinions without providing any concrete details (e.g., very poor product, I would not recommend it). In this work, we propose a novel unsupervised method for quantifying helpfulness leveraging the availability of a corpus of reviews. In particular, our method exploits three characteristics of the reviews, viz., relevance, emotional intensity and specificity, towards quantifying helpfulness. We perform three rankings (one for each feature above), which are then combined to obtain a final helpfulness ranking. For the purpose of empirically evaluating our method, we use review of four product categories from Amazon review. The experimental evaluation demonstrates the effectiveness of our method in comparison to a recent and state-of-the-art baseline.</abstract>
       <url hash="91db477f">2021.ranlp-1.109</url>
@@ -1166,7 +1166,7 @@
       <author><first>Marius</first><last>Mosbach</last></author>
       <author><first>Irina</first><last>Stenger</last></author>
       <author><first>Tania</first><last>Avgustinova</last></author>
-      <author><first>Bernd</first><last>Möbius</last></author>
+      <author id="bernd-mobius"><first>Bernd</first><last>Möbius</last></author>
       <author><first>Dietrich</first><last>Klakow</last></author>
       <pages>968–977</pages>
       <abstract>We present an extended version of a tool developed for calculating linguistic distances and asymmetries in auditory perception of closely related languages. Along with evaluating the metrics available in the initial version of the tool, we introduce word adaptation entropy as an additional metric of linguistic asymmetry. Potential predictors of speech intelligibility are validated with human performance in spoken cognate recognition experiments for Bulgarian and Russian. Special attention is paid to the possibly different contributions of vowels and consonants in oral intercomprehension. Using incom.py 2.0 it is possible to calculate, visualize, and validate three measurement methods of linguistic distances and asymmetries as well as carrying out regression analyses in speech intelligibility between related languages.</abstract>
@@ -1195,7 +1195,7 @@
     </paper>
     <paper id="113">
       <title><fixed-case>COVID</fixed-case>-19 in <fixed-case>B</fixed-case>ulgarian Social Media: Factuality, Harmfulness, Propaganda, and Framing</title>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Firoj</first><last>Alam</last></author>
       <author><first>Shaden</first><last>Shaar</last></author>
       <author><first>Giovanni</first><last>Da San Martino</last></author>
@@ -1207,7 +1207,7 @@
     </paper>
     <paper id="114">
       <title>A Second Pandemic? Analysis of Fake News about <fixed-case>COVID</fixed-case>-19 Vaccines in <fixed-case>Q</fixed-case>atar</title>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Firoj</first><last>Alam</last></author>
       <author><first>Shaden</first><last>Shaar</last></author>
       <author><first>Giovanni</first><last>Da San Martino</last></author>
@@ -1219,7 +1219,7 @@
     </paper>
     <paper id="115">
       <title>A Hierarchical Entity Graph Convolutional Network for Relation Extraction across Documents</title>
-      <author><first>Tapas</first><last>Nayak</last></author>
+      <author id="tapas-nayak"><first>Tapas</first><last>Nayak</last></author>
       <author><first>Hwee Tou</first><last>Ng</last></author>
       <pages>1022–1030</pages>
       <abstract>Distantly supervised datasets for relation extraction mostly focus on sentence-level extraction, and they cover very few relations. In this work, we propose cross-document relation extraction, where the two entities of a relation tuple appear in two different documents that are connected via a chain of common entities. Following this idea, we create a dataset for two-hop relation extraction, where each chain contains exactly two documents. Our proposed dataset covers a higher number of relations than the publicly available sentence-level datasets. We also propose a hierarchical entity graph convolutional network (HEGCN) model for this task that improves performance by 1.1% F1 score on our two-hop relation extraction dataset, compared to some strong neural baselines.</abstract>
@@ -1228,7 +1228,7 @@
     </paper>
     <paper id="116">
       <title>Improving Distantly Supervised Relation Extraction with Self-Ensemble Noise Filtering</title>
-      <author><first>Tapas</first><last>Nayak</last></author>
+      <author id="tapas-nayak"><first>Tapas</first><last>Nayak</last></author>
       <author><first>Navonil</first><last>Majumder</last></author>
       <author><first>Soujanya</first><last>Poria</last></author>
       <pages>1031–1039</pages>
@@ -1250,7 +1250,7 @@
       <title>Extending a Text-to-Pictograph System to <fixed-case>F</fixed-case>rench and to Arasaac</title>
       <author><first>Magali</first><last>Norré</last></author>
       <author><first>Vincent</first><last>Vandeghinste</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Thomas</first><last>François</last></author>
       <pages>1050–1059</pages>
       <abstract>We present an adaptation of the Text-to-Picto system, initially designed for Dutch, and extended to English and Spanish. The original system, aimed at people with an intellectual disability, automatically translates text into pictographs (Sclera and Beta). We extend it to French and add a large set of Arasaac pictographs linked to WordNet 3.1. To carry out this adaptation, we automatically link the pictographs and their metadata to synsets of two French WordNets and leverage this information to translate words into pictographs. We automatically and manually evaluate our system with different corpora corresponding to different use cases, including one for medical communication between doctors and patients. The system is also compared to similar systems in other languages.</abstract>
@@ -1301,7 +1301,7 @@
       <title><fixed-case>O</fixed-case>ffend<fixed-case>ES</fixed-case>: A New Corpus in <fixed-case>S</fixed-case>panish for Offensive Language Research</title>
       <author><first>Flor Miriam</first><last>Plaza-del-Arco</last></author>
       <author><first>Arturo</first><last>Montejo-Ráez</last></author>
-      <author><first>L. Alfonso</first><last>Ureña-López</last></author>
+      <author id="l-alfonso-urena-lopez"><first>L. Alfonso</first><last>Ureña-López</last></author>
       <author><first>María-Teresa</first><last>Martín-Valdivia</last></author>
       <pages>1096–1108</pages>
       <abstract>Offensive language detection and analysis has become a major area of research in Natural Language Processing. The freedom of participation in social media has exposed online users to posts designed to denigrate, insult or hurt them according to gender, race, religion, ideology, or other personal characteristics. Focusing on young influencers from the well-known social platforms of Twitter, Instagram, and YouTube, we have collected a corpus composed of 47,128 Spanish comments manually labeled on offensive pre-defined categories. A subset of the corpus attaches a degree of confidence to each label, so both multi-class classification and multi-output regression studies are possible. In this paper, we introduce the corpus, discuss its building process, novelties, and some preliminary experiments with it to serve as a baseline for the research community.</abstract>
@@ -1310,7 +1310,7 @@
     </paper>
     <paper id="124">
       <title>On Machine Translation of User Reviews</title>
-      <author><first>Maja</first><last>Popović</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
       <author><first>Alberto</first><last>Poncelas</last></author>
       <author><first>Marija</first><last>Brkic</last></author>
       <author><first>Andy</first><last>Way</last></author>
@@ -1438,7 +1438,7 @@
     <paper id="137">
       <title>Sentiment-Aware Measure (<fixed-case>SAM</fixed-case>) for Evaluating Sentiment Transfer by Machine Translation Systems</title>
       <author><first>Hadeel</first><last>Saadany</last></author>
-      <author><first>Constantin</first><last>Orăsan</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orăsan</last></author>
       <author><first>Emad</first><last>Mohamed</last></author>
       <author><first>Ashraf</first><last>Tantavy</last></author>
       <pages>1217–1226</pages>
@@ -1458,7 +1458,7 @@
     <paper id="139">
       <title>Exploiting Domain-Specific Knowledge for Judgment Prediction Is No Panacea</title>
       <author><first>Olivier</first><last>Salaün</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <author><first>Karim</first><last>Benyekhlef</last></author>
       <pages>1234–1243</pages>
       <abstract>Legal judgment prediction (LJP) usually consists in a text classification task aimed at predicting the verdict on the basis of the fact description. The literature shows that the use of articles as input features helps improve the classification performance. In this work, we designed a verdict prediction task based on landlord-tenant disputes and we applied BERT-based models to which we fed different article-based features. Although the results obtained are consistent with the literature, the improvements with the articles are mostly obtained with the most frequent labels, suggesting that pre-trained and fine-tuned transformer-based models are not scalable as is for legal reasoning in real life scenarios as they would only excel in accurately predicting the most recurrent verdicts to the detriment of other legal outcomes.</abstract>
@@ -1469,8 +1469,8 @@
       <title>Masking and Transformer-based Models for Hyperpartisanship Detection in News</title>
       <author><first>Javier</first><last>Sánchez-Junquera</last></author>
       <author><first>Paolo</first><last>Rosso</last></author>
-      <author><first>Manuel</first><last>Montes-y-Gómez</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="manuel-montes"><first>Manuel</first><last>Montes-y-Gómez</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <pages>1244–1251</pages>
       <abstract>Hyperpartisan news show an extreme manipulation of reality based on an underlying and extreme ideological orientation. Because of its harmful effects at reinforcing one’s bias and the posterior behavior of people, hyperpartisan news detection has become an important task for computational linguists. In this paper, we evaluate two different approaches to detect hyperpartisan news. First, a text masking technique that allows us to compare style vs. topic-related features in a different perspective from previous work. Second, the transformer-based models BERT, XLM-RoBERTa, and M-BERT, known for their ability to capture semantic and syntactic patterns in the same representation. Our results corroborate previous research on this task in that topic-related features yield better results than style-based ones, although they also highlight the relevance of using higher-length n-grams. Furthermore, they show that transformer-based models are more effective than traditional methods, but this at the cost of greater computational complexity and lack of transparency. Based on our experiments, we conclude that the beginning of the news show relevant information for the transformers at distinguishing effectively between left-wing, mainstream, and right-wing orientations.</abstract>
       <url hash="e43a0870">2021.ranlp-1.140</url>
@@ -1480,7 +1480,7 @@
       <title><fixed-case>S</fixed-case>erbian <fixed-case>NER</fixed-case>&amp;Beyond: The Archaic and the Modern Intertwinned</title>
       <author><first>Branislava</first><last>Šandrih Todorović</last></author>
       <author><first>Cvetana</first><last>Krstev</last></author>
-      <author><first>Ranka</first><last>Stanković</last></author>
+      <author id="ranka-stankovic"><first>Ranka</first><last>Stanković</last></author>
       <author><first>Milica</first><last>Ikonić Nešić</last></author>
       <pages>1252–1260</pages>
       <abstract>In this work, we present a Serbian literary corpus that is being developed under the umbrella of the “Distant Reading for European Literary History” COST Action CA16204. Using this corpus of novels written more than a century ago, we have developed and made publicly available a Named Entity Recognizer (NER) trained to recognize 7 different named entity types, with a Convolutional Neural Network (CNN) architecture, having F1 score of ≈91% on the test dataset. This model has been further assessed on a separate evaluation dataset. We wrap up with comparison of the developed model with the existing one, followed by a discussion of pros and cons of the both models.</abstract>
@@ -1529,7 +1529,7 @@
       <title>A Case Study of Deep Learning-Based Multi-Modal Methods for Labeling the Presence of Questionable Content in Movie Trailers</title>
       <author><first>Mahsa</first><last>Shafaei</last></author>
       <author><first>Christos</first><last>Smailis</last></author>
-      <author><first>Ioannis</first><last>Kakadiaris</last></author>
+      <author id="ioannis-kakadiaris"><first>Ioannis</first><last>Kakadiaris</last></author>
       <author><first>Thamar</first><last>Solorio</last></author>
       <pages>1297–1307</pages>
       <abstract>In this work, we explore different approaches to combine modalities for the problem of automated age-suitability rating of movie trailers. First, we introduce a new dataset containing videos of movie trailers in English downloaded from IMDB and YouTube, along with their corresponding age-suitability rating labels. Secondly, we propose a multi-modal deep learning pipeline addressing the movie trailer age suitability rating problem. This is the first attempt to combine video, audio, and speech information for this problem, and our experimental results show that multi-modal approaches significantly outperform the best mono and bimodal models in this task.</abstract>
@@ -1571,7 +1571,7 @@
     <paper id="150">
       <title>Exploring <fixed-case>G</fixed-case>erman Multi-Level Text Simplification</title>
       <author><first>Nicolas</first><last>Spring</last></author>
-      <author><first>Annette</first><last>Rios</last></author>
+      <author id="annette-rios-gonzales"><first>Annette</first><last>Rios</last></author>
       <author><first>Sarah</first><last>Ebling</last></author>
       <pages>1339–1349</pages>
       <abstract>We report on experiments in automatic text simplification (ATS) for German with multiple simplification levels along the Common European Framework of Reference for Languages (CEFR), simplifying standard German into levels A1, A2 and B1. For that purpose, we investigate the use of source labels and pretraining on standard German, allowing us to simplify standard language to a specific CEFR level. We show that these approaches are especially effective in low-resource scenarios, where we are able to outperform a standard transformer baseline. Moreover, we introduce copy labels, which we show can help the model make a distinction between sentences that require further modifications and sentences that can be copied as-is.</abstract>
@@ -1580,7 +1580,7 @@
     </paper>
     <paper id="151">
       <title>Exploring Reliability of Gold Labels for Emotion Detection in <fixed-case>T</fixed-case>witter</title>
-      <author><first>Sanja</first><last>Stajner</last></author>
+      <author id="sanja-stajner"><first>Sanja</first><last>Stajner</last></author>
       <pages>1350–1359</pages>
       <abstract>Emotion detection from social media posts has attracted noticeable attention from natural language processing (NLP) community in recent years. The ways for obtaining gold labels for training and testing of the systems for automatic emotion detection differ significantly from one study to another, and pose the question of reliability of gold labels and obtained classification results. This study systematically explores several ways for obtaining gold labels for Ekman’s emotion model on Twitter data and the influence of the chosen strategy on the manual classification results.</abstract>
       <url hash="c77ca756">2021.ranlp-1.151</url>
@@ -1588,7 +1588,7 @@
     </paper>
     <paper id="152">
       <title>How to Obtain Reliable Labels for <fixed-case>MBTI</fixed-case> Classification from Texts?</title>
-      <author><first>Sanja</first><last>Stajner</last></author>
+      <author id="sanja-stajner"><first>Sanja</first><last>Stajner</last></author>
       <author><first>Seren</first><last>Yenikent</last></author>
       <pages>1360–1368</pages>
       <abstract>Automatic detection of the Myers-Briggs Type Indicator (MBTI) from short posts attracted noticeable attention in the last few years. Recent studies showed that this is quite a difficult task, especially on commonly used Twitter data. Obtaining MBTI labels is also difficult, as human annotation requires trained psychologists, and automatic way of obtaining them is through long questionnaires of questionable usability for the task. In this paper, we present a method for collecting reliable MBTI labels via only four carefully selected questions that can be applied to any type of textual data.</abstract>
@@ -1644,7 +1644,7 @@
     <paper id="158">
       <title><fixed-case>TR</fixed-case>-<fixed-case>SEQ</fixed-case>: Named Entity Recognition Dataset for <fixed-case>T</fixed-case>urkish Search Engine Queries</title>
       <author><first>Berkay</first><last>Topçu</last></author>
-      <author><first>İlknur</first><last>Durgar El-Kahlout</last></author>
+      <author id="ilknur-durgar-el-kahlout"><first>İlknur</first><last>Durgar El-Kahlout</last></author>
       <pages>1417–1422</pages>
       <abstract>Recognizing named entities in short search engine queries is a difficult task due to their weaker contextual information compared to long sentences. Standard named entity recognition (NER) systems that are trained on grammatically correct and long sentences fail to perform well on such queries. In this study, we share our efforts towards creating a cleaned and labeled dataset of real Turkish search engine queries (TR-SEQ) and introduce an extended label set to satisfy the search engine needs. A NER system is trained by applying the state-of-the-art deep learning method BERT to the collected data and its high performance on search engine queries is reported. Moreover, we compare our results with the state-of-the-art Turkish NER systems.</abstract>
       <url hash="c4f464ce">2021.ranlp-1.158</url>
@@ -1866,7 +1866,7 @@
       <author><first>Chenlong</first><last>Hu</last></author>
       <author><first>Hidetaka</first><last>Kamigaito</last></author>
       <author><first>Hiroya</first><last>Takamura</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>1586–1596</pages>
       <abstract>Neural sequence-to-sequence (Seq2Seq) models and BERT have achieved substantial improvements in abstractive document summarization (ADS) without and with pre-training, respectively. However, they sometimes repeatedly attend to unimportant source phrases while mistakenly ignore important ones. We present reconstruction mechanisms on two levels to alleviate this issue. The sequence-level reconstructor reconstructs the whole document from the hidden layer of the target summary, while the word embedding-level one rebuilds the average of word embeddings of the source at the target side to guarantee that as much critical information is included in the summary as possible. Based on the assumption that inverse document frequency (IDF) measures how important a word is, we further leverage the IDF weights in our embedding-level reconstructor. The proposed frameworks lead to promising improvements for ROUGE metrics and human rating on both the CNN/Daily Mail and Newsroom summarization datasets.</abstract>
       <url hash="a4ea0745">2021.ranlp-1.178</url>
@@ -1877,8 +1877,8 @@
       <author><first>Seunghak</first><last>Yu</last></author>
       <author><first>Giovanni</first><last>Da San Martino</last></author>
       <author><first>Mitra</first><last>Mohtarami</last></author>
-      <author><first>James</first><last>Glass</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>1597–1605</pages>
       <abstract>Online users today are exposed to misleading and propagandistic news articles and media posts on a daily basis. To counter thus, a number of approaches have been designed aiming to achieve a healthier and safer online news and media consumption. Automatic systems are able to support humans in detecting such content; yet, a major impediment to their broad adoption is that besides being accurate, the decisions of such systems need also to be interpretable in order to be trusted and widely adopted by users. Since misleading and propagandistic content influences readers through the use of a number of deception techniques, we propose to detect and to show the use of such techniques as a way to offer interpretability. In particular, we define qualitatively descriptive features and we analyze their suitability for detecting deception techniques. We further show that our interpretable features can be easily combined with pre-trained language models, yielding state-of-the-art results.</abstract>
       <url hash="f1c8f4a1">2021.ranlp-1.179</url>
@@ -1886,11 +1886,11 @@
     </paper>
     <paper id="180">
       <title>Generic Mechanism for Reducing Repetitions in Encoder-Decoder Models</title>
-      <author><first>Ying</first><last>Zhang</last></author>
+      <author id="ying-zhang"><first>Ying</first><last>Zhang</last></author>
       <author><first>Hidetaka</first><last>Kamigaito</last></author>
       <author><first>Tatsuya</first><last>Aoki</last></author>
       <author><first>Hiroya</first><last>Takamura</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>1606–1615</pages>
       <abstract>Encoder-decoder models have been commonly used for many tasks such as machine translation and response generation. As previous research reported, these models suffer from generating redundant repetition. In this research, we propose a new mechanism for encoder-decoder models that estimates the semantic difference of a source sentence before and after being fed into the encoder-decoder model to capture the consistency between two sides. This mechanism helps reduce repeatedly generated tokens for a variety of tasks. Evaluation results on publicly available machine translation and response generation datasets demonstrate the effectiveness of our proposal.</abstract>
       <url hash="e735f19e">2021.ranlp-1.180</url>
@@ -1908,7 +1908,7 @@
     <paper id="182">
       <title>Delexicalized Cross-lingual Dependency Parsing for <fixed-case>X</fixed-case>ibe</title>
       <author><first>He</first><last>Zhou</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <pages>1626–1635</pages>
       <abstract>Manually annotating a treebank is time-consuming and labor-intensive. We conduct delexicalized cross-lingual dependency parsing experiments, where we train the parser on one language and test on our target language. As our test case, we use Xibe, a severely under-resourced Tungusic language. We assume that choosing a closely related language as the source language will provide better results than more distant relatives. However, it is not clear how to determine those closely related languages. We investigate three different methods: choosing the typologically closest language, using LangRank, and choosing the most similar language based on perplexity. We train parsing models on the selected languages using UDify and test on different genres of Xibe data. The results show that languages selected based on typology and perplexity scores outperform those predicted by LangRank; Japanese is the optimal source language. In determining the source language, proximity to the target language is more important than large training sizes. Parsing is also influenced by genre differences, but they have little influence as long as the training data is at least as complex as the target.</abstract>
       <url hash="753b5775">2021.ranlp-1.182</url>
@@ -1942,7 +1942,7 @@
       <title>Not All Comments Are Equal: Insights into Comment Moderation from a Topic-Aware Model</title>
       <author><first>Elaine</first><last>Zosa</last></author>
       <author><first>Ravi</first><last>Shekhar</last></author>
-      <author><first>Vanja Mladen</first><last>Karan</last></author>
+      <author id="vanja-m-karan"><first>Vanja Mladen</first><last>Karan</last></author>
       <author><first>Matthew</first><last>Purver</last></author>
       <pages>1652–1662</pages>
       <abstract>Moderation of reader comments is a significant problem for online news platforms. Here, we experiment with models for automatic moderation, using a dataset of comments from a popular Croatian newspaper. Our analysis shows that while comments that violate the moderation rules mostly share common linguistic and thematic features, their content varies across the different sections of the newspaper. We therefore make our models topic-aware, incorporating semantic features from a topic model into the classification decision. Our results show that topic information improves the performance of the model, increases its confidence in correct outputs, and helps us understand the model’s outputs.</abstract>
@@ -2045,7 +2045,7 @@
       <title>Bilingual Terminology Extraction Using Neural Word Embeddings on Comparable Corpora</title>
       <author><first>Darya</first><last>Filippova</last></author>
       <author><first>Burcu</first><last>Can</last></author>
-      <author><first>Gloria</first><last>Corpas Pastor</last></author>
+      <author id="gloria-corpas-pastor"><first>Gloria</first><last>Corpas Pastor</last></author>
       <pages>58–64</pages>
       <abstract>Term and glossary management are vital steps of preparation of every language specialist, and they play a very important role at the stage of education of translation professionals. The growing trend of efficient time management and constant time constraints we may observe in every job sector increases the necessity of the automatic glossary compilation. Many well-performing bilingual AET systems are based on processing parallel data, however, such parallel corpora are not always available for a specific domain or a language pair. Domain-specific, bilingual access to information and its retrieval based on comparable corpora is a very promising area of research that requires a detailed analysis of both available data sources and the possible extraction techniques. This work focuses on domain-specific automatic terminology extraction from comparable corpora for the English – Russian language pair by utilizing neural word embeddings.</abstract>
       <url hash="be75b7e3">2021.ranlp-srw.9</url>
@@ -2097,7 +2097,7 @@
     <paper id="15">
       <title>Paragraph Similarity Matches for Generating Multiple-choice Test Items</title>
       <author><first>Halyna</first><last>Maslak</last></author>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <pages>99–108</pages>
       <abstract>Multiple-choice questions (MCQs) are widely used in knowledge assessment in educational institutions, during work interviews, in entertainment quizzes and games. Although the research on the automatic or semi-automatic generation of multiple-choice test items has been conducted since the beginning of this millennium, most approaches focus on generating questions from a single sentence. In this research, a state-of-the-art method of creating questions based on multiple sentences is introduced. It was inspired by semantic similarity matches used in the translation memory component of translation management systems. The performance of two deep learning algorithms, doc2vec and SBERT, is compared for the paragraph similarity task. The experiments are performed on the ad-hoc corpus within the EU domain. For the automatic evaluation, a smaller corpus of manually selected matching paragraphs has been compiled. The results prove the good performance of Sentence Embeddings for the given task.</abstract>
       <url hash="af636f05">2021.ranlp-srw.15</url>
@@ -2105,7 +2105,7 @@
     </paper>
     <paper id="16">
       <title>Neural Borrowing Detection with Monolingual Lexical Models</title>
-      <author><first>John</first><last>Miller</last></author>
+      <author id="john-miller"><first>John</first><last>Miller</last></author>
       <author><first>Emanuel</first><last>Pariasca</last></author>
       <author><first>Cesar</first><last>Beltran Castañon</last></author>
       <pages>109–117</pages>
@@ -2125,7 +2125,7 @@
     <paper id="18">
       <title>On Reducing Repetition in Abstractive Summarization</title>
       <author><first>Pranav</first><last>Nair</last></author>
-      <author><first>Anil Kumar</first><last>Singh</last></author>
+      <author id="anil-kumar-singh"><first>Anil Kumar</first><last>Singh</last></author>
       <pages>126–134</pages>
       <abstract>Repetition in natural language generation reduces the informativeness of text and makes it less appealing. Various techniques have been proposed to alleviate it. In this work, we explore and propose techniques to reduce repetition in abstractive summarization. First, we explore the application of unlikelihood training and embedding matrix regularizers from previous work on language modeling to abstractive summarization. Next, we extend the coverage and temporal attention mechanisms to the token level to reduce repetition. In our experiments on the CNN/Daily Mail dataset, we observe that these techniques reduce the amount of repetition and increase the informativeness of the summaries, which we confirm via human evaluation.</abstract>
       <url hash="e29ee0fa">2021.ranlp-srw.18</url>
@@ -2134,7 +2134,7 @@
     <paper id="19">
       <title>Improving Abstractive Summarization with Commonsense Knowledge</title>
       <author><first>Pranav</first><last>Nair</last></author>
-      <author><first>Anil Kumar</first><last>Singh</last></author>
+      <author id="anil-kumar-singh"><first>Anil Kumar</first><last>Singh</last></author>
       <pages>135–143</pages>
       <abstract>Large scale pretrained models have demonstrated strong performances on several natural language generation and understanding benchmarks. However, introducing commonsense into them to generate more realistic text remains a challenge. Inspired from previous work on commonsense knowledge generation and generative commonsense reasoning, we introduce two methods to add commonsense reasoning skills and knowledge into abstractive summarization models. Both methods beat the baseline on ROUGE scores, demonstrating the superiority of our models over the baseline. Human evaluation results suggest that summaries generated by our methods are more realistic and have fewer commonsensical errors.</abstract>
       <url hash="e22a129f">2021.ranlp-srw.19</url>
@@ -2190,7 +2190,7 @@
     <paper id="25">
       <title>Towards New Generation Translation Memory Systems</title>
       <author><first>Nikola</first><last>Spasovski</last></author>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <pages>180–183</pages>
       <abstract>Despite the enormous popularity of Translation Memory systems and the active research in the field, their language processing features still suffer from certain limitations. While many recent papers focus on semantic matching capabilities of TMs, this planned study will address how these tools perform when dealing with longer segments and whether this could be a cause of lower match scores. An experiment will be carried out on corpora from two different (repetitive) domains. Following the results, recommendations for future developments of new TMs will be made.</abstract>
       <url hash="1699fa53">2021.ranlp-srw.25</url>
@@ -2209,8 +2209,8 @@
       <author><first>Lionel</first><last>Tadonfouet Tadjou</last></author>
       <author><first>Fabrice</first><last>Bourge</last></author>
       <author><first>Tiphaine</first><last>Marie</last></author>
-      <author><first>Laurent</first><last>Romary</last></author>
-      <author><first>Éric</first><last>de la Clergerie</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Éric</first><last>de la Clergerie</last></author>
       <pages>193–202</pages>
       <abstract>In this paper we describe the process of build-ing a corporate corpus that will be used as a ref-erence for modelling and computing threadsfrom conversations generated using commu-nication and collaboration tools. The overallgoal of the reconstruction of threads is to beable to provide value to the collorator in var-ious use cases, such as higlighting the impor-tant parts of a running discussion, reviewingthe upcoming commitments or deadlines, etc. Since, to our knowledge, there is no avail-able corporate corpus for the French languagewhich could allow us to address this prob-lem of thread constitution, we present here amethod for building such corpora includingdifferent aspects and steps which allowed thecreation of a pipeline to pseudo-anonymisedata. Such a pipeline is a response to theconstraints induced by the General Data Pro-tection Regulation GDPR in Europe and thecompliance to the secrecy of correspondence.</abstract>
       <url hash="1ab62445">2021.ranlp-srw.27</url>
@@ -2223,7 +2223,7 @@
       <author><first>Georgi</first><last>Karadzhov</last></author>
       <author><first>Georgi</first><last>Georgiev</last></author>
       <author><first>Ivan</first><last>Koychev</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>203–209</pages>
       <abstract>In education, quiz questions have become an important tool for assessing the knowledge of students. Yet, manually preparing such questions is a tedious task, and thus automatic question generation has been proposed as a possible alternative. So far, the vast majority of research has focused on generating the question text, relying on question answering datasets with readily picked answers, and the problem of how to come up with answer candidates in the first place has been largely ignored. Here, we aim to bridge this gap. In particular, we propose a model that can generate a specified number of answer candidates for a given passage of text, which can then be used by instructors to write questions manually or can be passed as an input to automatic answer-aware question generators. Our experiments show that our proposed answer candidate generation model outperforms several baselines.</abstract>
       <url hash="a7047ccf">2021.ranlp-srw.28</url>
diff --git a/data/xml/2021.reinact.xml b/data/xml/2021.reinact.xml
index 7e309cba48..711fda2ae0 100644
--- a/data/xml/2021.reinact.xml
+++ b/data/xml/2021.reinact.xml
@@ -79,8 +79,8 @@
     </paper>
     <paper id="7">
       <title>Decoupling Pragmatics: Discriminative Decoding for Referring Expression Generation</title>
-      <author><first>Simeon</first><last>Schüz</last></author>
-      <author><first>Sina</first><last>Zarrieß</last></author>
+      <author id="simeon-junker"><first>Simeon</first><last>Schüz</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
       <pages>47–52</pages>
       <abstract>The shift to neural models in Referring Expression Generation (REG) has enabled more natural set-ups, but at the cost of interpretability. We argue that integrating pragmatic reasoning into the inference of context-agnostic generation models could reconcile traits of traditional and neural REG, as this offers a separation between context-independent, literal information and pragmatic adaptation to context. With this in mind, we apply existing decoding strategies from discriminative image captioning to REG and evaluate them in terms of pragmatic informativity, likelihood to ground-truth annotations and linguistic diversity. Our results show general effectiveness, but a relatively small gain in informativity, raising important questions for REG in general.</abstract>
       <url hash="1efa35a1">2021.reinact-1.7</url>
@@ -90,7 +90,7 @@
       <title>Generating Justifications in a Spatial Question-Answering Dialogue System for a Blocks World</title>
       <author><first>Georgiy</first><last>Platonov</last></author>
       <author><first>Benjamin</first><last>Kane</last></author>
-      <author><first>Lenhart</first><last>Schubert</last></author>
+      <author id="lenhart-schubert"><first>Lenhart</first><last>Schubert</last></author>
       <pages>53–57</pages>
       <abstract>As AI reaches wider adoption, designing systems that are explainable and interpretable becomes a critical necessity. In particular, when it comes to dialogue systems, their reasoning must be transparent and must comply with human intuitions in order for them to be integrated seamlessly into day-to-day collaborative human-machine activities. Here, we describe our ongoing work on a (general purpose) dialogue system equipped with a spatial specialist with explanatory capabilities. We applied this system to a particular task of characterizing spatial configurations of blocks in a simple physical Blocks World (BW) domain using natural locative expressions, as well as generating justifications for the proposed spatial descriptions by indicating the factors that the system used to arrive at a particular conclusion.</abstract>
       <url hash="9cfafd85">2021.reinact-1.8</url>
diff --git a/data/xml/2021.repl4nlp.xml b/data/xml/2021.repl4nlp.xml
index 41ce5ea356..a6a0e0e7a2 100644
--- a/data/xml/2021.repl4nlp.xml
+++ b/data/xml/2021.repl4nlp.xml
@@ -28,7 +28,7 @@
       <author><first>Prithviraj</first><last>Sen</last></author>
       <author><first>Huaiyu</first><last>Zhu</last></author>
       <author><first>Yunyao</first><last>Li</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <pages>1–7</pages>
       <abstract>Cross-lingual text classification (CLTC) is a challenging task made even harder still due to the lack of labeled data in low-resource languages. In this paper, we propose zero-shot instance-weighting, a general model-agnostic zero-shot learning framework for improving CLTC by leveraging source instance weighting. It adds a module on top of pre-trained language models for similarity computation of instance weights, thus aligning each source instance to the target language. During training, the framework utilizes gradient descent that is weighted by instance weights to update parameters. We evaluate this framework over seven target languages on three fundamental tasks and show its effectiveness and extensibility, by improving on F1 score up to 4% in single-source transfer and 8% in multi-source transfer. To the best of our knowledge, our method is the first to apply instance weighting in zero-shot CLTC. It is simple yet effective and easily extensible into multi-source transfer.</abstract>
       <url hash="504832bc">2021.repl4nlp-1.1</url>
@@ -98,7 +98,7 @@
     <paper id="7">
       <title>Structure-aware Sentence Encoder in Bert-Based <fixed-case>S</fixed-case>iamese Network</title>
       <author><first>Qiwei</first><last>Peng</last></author>
-      <author><first>David</first><last>Weir</last></author>
+      <author id="david-weir"><first>David</first><last>Weir</last></author>
       <author><first>Julie</first><last>Weeds</last></author>
       <pages>57–63</pages>
       <abstract>Recently, impressive performance on various natural language understanding tasks has been achieved by explicitly incorporating syntax and semantic information into pre-trained models, such as BERT and RoBERTa. However, this approach depends on problem-specific fine-tuning, and as widely noted, BERT-like models exhibit weak performance, and are inefficient, when applied to unsupervised similarity comparison tasks. Sentence-BERT (SBERT) has been proposed as a general-purpose sentence embedding method, suited to both similarity comparison and downstream tasks. In this work, we show that by incorporating structural information into SBERT, the resulting model outperforms SBERT and previous general sentence encoders on unsupervised semantic textual similarity (STS) datasets and transfer classification tasks.</abstract>
@@ -109,7 +109,7 @@
     <paper id="8">
       <title>Preserving Cross-Linguality of Pre-trained Models via Continual Learning</title>
       <author><first>Zihan</first><last>Liu</last></author>
-      <author><first>Genta Indra</first><last>Winata</last></author>
+      <author id="genta-indra-winata"><first>Genta Indra</first><last>Winata</last></author>
       <author><first>Andrea</first><last>Madotto</last></author>
       <author><first>Pascale</first><last>Fung</last></author>
       <pages>64–71</pages>
@@ -137,7 +137,7 @@
       <author><first>Pengcheng</first><last>Yang</last></author>
       <author><first>Tianyu</first><last>Liu</last></author>
       <author><first>Zhifang</first><last>Sui</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <pages>83–89</pages>
       <abstract>Conventional Knowledge Graph Completion (KGC) assumes that all test entities appear during training. However, in real-world scenarios, Knowledge Graphs (KG) evolve fast with out-of-knowledge-graph (OOKG) entities added frequently, and we need to efficiently represent these entities. Most existing Knowledge Graph Embedding (KGE) methods cannot represent OOKG entities without costly retraining on the whole KG. To enhance efficiency, we propose a simple and effective method that inductively represents OOKG entities by their optimal estimation under translational assumptions. Moreover, given pretrained embeddings of the in-knowledge-graph (IKG) entities, our method even needs no additional learning. Experimental results on two KGC tasks with OOKG entities show that our method outperforms the previous methods by a large margin with higher efficiency.</abstract>
       <url hash="16a934c4">2021.repl4nlp-1.10</url>
@@ -174,7 +174,7 @@
     <paper id="13">
       <title><fixed-case>X</fixed-case>2<fixed-case>P</fixed-case>arser: Cross-Lingual and Cross-Domain Framework for Task-Oriented Compositional Semantic Parsing</title>
       <author><first>Zihan</first><last>Liu</last></author>
-      <author><first>Genta Indra</first><last>Winata</last></author>
+      <author id="genta-indra-winata"><first>Genta Indra</first><last>Winata</last></author>
       <author><first>Peng</first><last>Xu</last></author>
       <author><first>Pascale</first><last>Fung</last></author>
       <pages>112–127</pages>
@@ -208,7 +208,7 @@
       <title>Probing Cross-Modal Representations in Multi-Step Relational Reasoning</title>
       <author><first>Iuliia</first><last>Parfenova</last></author>
       <author><first>Desmond</first><last>Elliott</last></author>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <author><first>Sandro</first><last>Pezzelle</last></author>
       <pages>152–162</pages>
       <abstract>We investigate the representations learned by vision and language models in tasks that require relational reasoning. Focusing on the problem of assessing the relative size of objects in abstract visual contexts, we analyse both one-step and two-step reasoning. For the latter, we construct a new dataset of three-image scenes and define a task that requires reasoning at the level of the individual images and across images in a scene. We probe the learned model representations using diagnostic classifiers. Our experiments show that pretrained multimodal transformer-based architectures can perform higher-level relational reasoning, and are able to learn representations for novel tasks and data that are very different from what was seen in pretraining.</abstract>
@@ -242,7 +242,7 @@
       <title>Deriving Word Vectors from Contextualized Language Models using Topic-Aware Mention Selection</title>
       <author><first>Yixiao</first><last>Wang</last></author>
       <author><first>Zied</first><last>Bouraoui</last></author>
-      <author><first>Luis</first><last>Espinosa Anke</last></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa Anke</last></author>
       <author><first>Steven</first><last>Schockaert</last></author>
       <pages>185–194</pages>
       <abstract>One of the long-standing challenges in lexical semantics consists in learning representations of words which reflect their semantic properties. The remarkable success of word embeddings for this purpose suggests that high-quality representations can be obtained by summarizing the sentence contexts of word mentions. In this paper, we propose a method for learning word representations that follows this basic strategy, but differs from standard word embeddings in two important ways. First, we take advantage of contextualized language models (CLMs) rather than bags of word vectors to encode contexts. Second, rather than learning a word vector directly, we use a topic model to partition the contexts in which words appear, and then learn different topic-specific vectors for each word. Finally, we use a task-specific supervision signal to make a soft selection of the resulting vectors. We show that this simple strategy leads to high-quality word vectors, which are more predictive of semantic properties than word embeddings and existing CLM-based strategies.</abstract>
@@ -377,7 +377,7 @@
       <author><first>Peng</first><last>Qi</last></author>
       <author><first>Guangtao</first><last>Wang</last></author>
       <author><first>Tengyu</first><last>Ma</last></author>
-      <author id="jing-huang"><first>Jing</first><last>Huang</last></author>
+      <author><first>Jing</first><last>Huang</last></author>
       <pages>307–315</pages>
       <abstract>Document-level relation extraction is a challenging task, requiring reasoning over multiple sentences to predict a set of relations in a document. In this paper, we propose a novel framework E2GRE (Entity and Evidence Guided Relation Extraction) that jointly extracts relations and the underlying evidence sentences by using large pretrained language model (LM) as input encoder. First, we propose to guide the pretrained LM’s attention mechanism to focus on relevant context by using attention probabilities as additional features for evidence prediction. Furthermore, instead of feeding the whole document into pretrained LMs to obtain entity representation, we concatenate document text with head entities to help LMs concentrate on parts of the document that are more related to the head entity. Our E2GRE jointly learns relation extraction and evidence prediction effectively, showing large gains on both these tasks, which we find are highly correlated.</abstract>
       <url hash="d034db75">2021.repl4nlp-1.30</url>
diff --git a/data/xml/2021.rocling.xml b/data/xml/2021.rocling.xml
index 59d1dea6ce..e9e3c74cbd 100644
--- a/data/xml/2021.rocling.xml
+++ b/data/xml/2021.rocling.xml
@@ -71,7 +71,7 @@
       <author><first>Tzu-Man</first><last>Wu</last></author>
       <author><first>Aleksandra</first><last>Smolka</last></author>
       <author><first>Chao-Chun</first><last>Liang</last></author>
-      <author><first>Hsin-Min</first><last>Wang</last></author>
+      <author id="hsin-min-wang"><first>Hsin-Min</first><last>Wang</last></author>
       <author><first>Kuan-Yu</first><last>Chen</last></author>
       <author><first>Yu</first><last>Tsao</last></author>
       <author><first>Keh-Yih</first><last>Su</last></author>
@@ -182,7 +182,7 @@
       <title>Mining Commonsense and Domain Knowledge from Math Word Problems</title>
       <author><first>Shih-Hung</first><last>Tsai</last></author>
       <author><first>Chao-Chun</first><last>Liang</last></author>
-      <author><first>Hsin-Min</first><last>Wang</last></author>
+      <author id="hsin-min-wang"><first>Hsin-Min</first><last>Wang</last></author>
       <author><first>Keh-Yih</first><last>Su</last></author>
       <pages>111–117</pages>
       <abstract>Current neural math solvers learn to incorporate commonsense or domain knowledge by utilizing pre-specified constants or formulas. However, as these constants and formulas are mainly human-specified, the generalizability of the solvers is limited. In this paper, we propose to explicitly retrieve the required knowledge from math problemdatasets. In this way, we can determinedly characterize the required knowledge andimprove the explainability of solvers. Our two algorithms take the problem text andthe solution equations as input. Then, they try to deduce the required commonsense and domain knowledge by integrating information from both parts. We construct two math datasets and show the effectiveness of our algorithms that they can retrieve the required knowledge for problem-solving.</abstract>
@@ -338,7 +338,7 @@
       <title>Employing low-pass filtered temporal speech features for the training of ideal ratio mask in speech enhancement</title>
       <author><first>Yan-Tong</first><last>Chen</last></author>
       <author><first>Zi-Qiang</first><last>Lin</last></author>
-      <author><first>Jeih-Weih</first><last>Hung</last></author>
+      <author id="jeih-weih-hung"><first>Jeih-Weih</first><last>Hung</last></author>
       <pages>236–242</pages>
       <abstract>The masking-based speech enhancement method pursues a multiplicative mask that applies to the spectrogram of input noise-corrupted utterance, and a deep neural network (DNN) is often used to learn the mask. In particular, the features commonly used for automatic speech recognition can serve as the input of the DNN to learn the well-behaved mask that significantly reduce the noise distortion of processed utterances. This study proposes to preprocess the input speech features for the ideal ratio mask (IRM)-based DNN by lowpass filtering in order to alleviate the noise components. In particular, we employ the discrete wavelet transform (DWT) to decompose the temporal speech feature sequence and scale down the detail coefficients, which correspond to the high-pass portion of the sequence. Preliminary experiments conducted on a subset of TIMIT corpus reveal that the proposed method can make the resulting IRM achieve higher speech quality and intelligibility for the babble noise-corrupted signals compared with the original IRM, indicating that the lowpass filtered temporal feature sequence can learn a superior IRM network for speech enhancement.</abstract>
       <url hash="91179ffe">2021.rocling-1.30</url>
@@ -359,7 +359,7 @@
       <title>Automatic Extraction of <fixed-case>E</fixed-case>nglish Grammar Pattern Correction Rules</title>
       <author><first>Kuan-Yu</first><last>Shen</last></author>
       <author><first>Yi-Chien</first><last>Lin</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>252–256</pages>
       <abstract>We introduce a method for generating error-correction rules for grammar pattern errors in a given annotated learner corpus. In our approach, annotated edits in the learner corpus are converted into edit rules for correcting common writing errors. The method involves automatic extraction of grammar patterns, and automatic alignment of the erroneous patterns and correct patterns. At run-time, grammar patterns are extracted from the grammatically correct sentences, and correction rules are retrieved by aligning the extracted grammar patterns with the erroneous patterns. Using the proposed method, we generate 1,499 high-quality correction rules related to 232 headwords. The method can be used to assist ESL students in avoiding grammatical errors, and aid teachers in correcting students’ essays. Additionally, the method can be used in the compilation of collocation error dictionaries and the construction of grammar error correction systems.</abstract>
       <url hash="fec89013">2021.rocling-1.32</url>
@@ -370,7 +370,7 @@
       <author><first>Hao-Chuan</first><last>Kao</last></author>
       <author><first>Man-Chen</first><last>Hung</last></author>
       <author><first>Lung-Hao</first><last>Lee</last></author>
-      <author><first>Yuen-Hsien</first><last>Tseng</last></author>
+      <author id="yuen-hsien-tseng"><first>Yuen-Hsien</first><last>Tseng</last></author>
       <pages>257–264</pages>
       <abstract>We use Hypergraph Attention Networks (HyperGAT) to recognize multiple labels of Chinese humor texts. We firstly represent a joke as a hypergraph. The sequential hyperedge and semantic hyperedge structures are used to construct hyperedges. Then, attention mechanisms are adopted to aggregate context information embedded in nodes and hyperedges. Finally, we use trained HyperGAT to complete the multi-label classification task. Experimental results on the Chinese humor multi-label dataset showed that HyperGAT model outperforms previous sequence-based (CNN, BiLSTM, FastText) and graph-based (Graph-CNN, TextGCN, Text Level GNN) deep learning models.</abstract>
       <url hash="2b9e9144">2021.rocling-1.33</url>
@@ -396,7 +396,7 @@
       <author><first>Lian-Hui</first><last>Tan</last></author>
       <author><first>Tzu-Ju</first><last>Lin</last></author>
       <author><first>Chun-Wei</first><last>Wang</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <pages>271–279</pages>
       <abstract>Ever-expanding evaluative texts on online forums have become an important source of sentiment analysis. This paper proposes an aspect-based annotated dataset consisting of telecom reviews on social media. We introduce a category, implicit evaluative texts, impevals for short, to investigate how the deep learning model works on these implicit reviews. We first compare two models, BertSimple and BertImpvl, and find that while both models are competent to learn simple evaluative texts, they are confused when classifying impevals. To investigate the factors underlying the correctness of the model’s predictions, we conduct a series of analyses, including qualitative error analysis and quantitative analysis of linguistic features with logistic regressions. The results show that local features that affect the overall sentential sentiment confuse the model: multiple target entities, transitional words, sarcasm, and rhetorical questions. Crucially, these linguistic features are independent of the model’s confidence measured by the classifier’s softmax probabilities. Interestingly, the sentence complexity indicated by syntax-tree depth is not correlated with the model’s correctness. In sum, this paper sheds light on the characteristics of the modern deep learning model and when it might need more supervision through linguistic evaluations.</abstract>
       <url hash="ac94c854">2021.rocling-1.35</url>
@@ -442,7 +442,7 @@
       <author><first>Yi-Chien</first><last>Lin</last></author>
       <author><first>Chun-Ho</first><last>Kwok</last></author>
       <author><first>Hai-Lun</first><last>Tu</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>301–309</pages>
       <abstract>We introduce a method for assisting English as Second Language (ESL) learners by providing translations of Collins COBUILD grammar patterns(GP) for a given word. In our approach, bilingual parallel corpus is transformed into bilingual GP pairs aimed at providing native language support for learning word usage through GPs. The method involves automatically parsing sentences to extract GPs, automatically generating translation GP pairs from bilingual sentences, and automatically extracting common bilingual GPs. At run-time, the target word is used for lookup GPs and translations, and the retrieved common GPs and their example sentences are shown to the user. We present a prototype phrase search engine, Linggle GPTrans, that implements the methods to assist ESL learners. Preliminary evaluation on a set of more than 300 GP-translation pairs shows that the methods achieve 91% accuracy.</abstract>
       <url hash="eb624eb3">2021.rocling-1.39</url>
@@ -454,7 +454,7 @@
       <author><first>Yongfu</first><last>Liao</last></author>
       <author><first>Po-Ya Angela</first><last>Wang</last></author>
       <author><first>Mao-Chang</first><last>Ku</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <pages>310–317</pages>
       <abstract>The rapid flow of information and the abundance of text data on the Internet have brought about the urgent demand for the construction of monitoring resources and techniques used for various purposes. To extract facets of information useful for particular domains from such large and dynamically growing corpora requires an unsupervised yet transparent ways of analyzing the textual data. This paper proposed a hybrid collocation analysis as a potential method to retrieve and summarize Taiwan-related topics posted on Weibo and PTT. By grouping collocates of 臺灣 ‘Taiwan’ into clusters of topics via either word embeddings clustering or Latent Dirichlet allocation, lists of collocates can be converted to probability distributions such that distances and similarities can be defined and computed. With this method, we conduct a diachronic analysis of the similarity between Weibo and PTT, providing a way to pinpoint when and how the topic similarity between the two rises or falls. A fine-grained view on the grammatical behavior and political implications is attempted, too. This study thus sheds light on alternative explainable routes for future social media listening method on the understanding of cross-strait relationship.</abstract>
       <url hash="2b1adc5c">2021.rocling-1.40</url>
@@ -484,7 +484,7 @@
       <title>Identify Bilingual Patterns and Phrases from a Bilingual Sentence Pair</title>
       <author><first>Yi-Jyun</first><last>Chen</last></author>
       <author><first>Hsin-Yun</first><last>Chung</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>333–338</pages>
       <abstract>This paper presents a method for automatically identifying bilingual grammar patterns and extracting bilingual phrase instances from a given English-Chinese sentence pair. In our approach, the English-Chinese sentence pair is parsed to identify English grammar patterns and Chinese counterparts. The method involves generating translations of each English grammar pattern and calculating translation probability of words from a word-aligned parallel corpora. The results allow us to extract the most probable English-Chinese phrase pairs in the sentence pair. We present a prototype system that applies the method to extract grammar patterns and phrases in parallel sentences. An evaluation on randomly selected examples from a dictionary shows that our approach has reasonably good performance. We use human judge to assess the bilingual phrases generated by our approach. The results have potential to assist language learning and machine translation research.</abstract>
       <url hash="b93374f5">2021.rocling-1.43</url>
@@ -567,7 +567,7 @@
     </paper>
     <paper id="51">
       <title><fixed-case>ROCLING</fixed-case>-2021 Shared Task: Dimensional Sentiment Analysis for Educational Texts</title>
-      <author><first>Liang-Chih</first><last>Yu</last></author>
+      <author id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last></author>
       <author><first>Jin</first><last>Wang</last></author>
       <author><first>Bo</first><last>Peng</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
diff --git a/data/xml/2021.scil.xml b/data/xml/2021.scil.xml
index 8ea4d291fc..9101f370fc 100644
--- a/data/xml/2021.scil.xml
+++ b/data/xml/2021.scil.xml
@@ -27,7 +27,7 @@
       <title>A Network Science Approach to Bilingual Code-switching</title>
       <author><first>Qihui</first><last>Xu</last></author>
       <author><first>Magdalena</first><last>Markowska</last></author>
-      <author><first>Martin</first><last>Chodorow</last></author>
+      <author id="martin-chodorow"><first>Martin</first><last>Chodorow</last></author>
       <author><first>Ping</first><last>Li</last></author>
       <pages>18–27</pages>
       <url hash="daf10b5b">2021.scil-1.2</url>
@@ -51,7 +51,7 @@
     <paper id="5">
       <title>Drivers of <fixed-case>E</fixed-case>nglish Syntactic Change in the <fixed-case>C</fixed-case>anadian Parliament</title>
       <author><first>Liwen</first><last>Hou</last></author>
-      <author><first>David A.</first><last>Smith</last></author>
+      <author id="david-a-smith"><first>David A.</first><last>Smith</last></author>
       <pages>51–60</pages>
       <url hash="c0737e45">2021.scil-1.5</url>
       <bibkey>hou-smith-2021-drivers</bibkey>
@@ -69,7 +69,7 @@
     <paper id="7">
       <title>Emerging <fixed-case>E</fixed-case>nglish Transitives over the Last Two Centuries</title>
       <author><first>Liwen</first><last>Hou</last></author>
-      <author><first>David A.</first><last>Smith</last></author>
+      <author id="david-a-smith"><first>David A.</first><last>Smith</last></author>
       <pages>71–80</pages>
       <url hash="efbe246f">2021.scil-1.7</url>
       <bibkey>hou-smith-2021-emerging</bibkey>
@@ -85,7 +85,7 @@
     <paper id="9">
       <title>Effects of Duration, Locality, and Surprisal in Speech Disfluency Prediction in <fixed-case>E</fixed-case>nglish Spontaneous Speech</title>
       <author><first>Samvit</first><last>Dammalapati</last></author>
-      <author><first>Rajakrishnan</first><last>Rajkumar</last></author>
+      <author id="rajakrishnan-rajkumar"><first>Rajakrishnan</first><last>Rajkumar</last></author>
       <author><first>Sidharth</first><last>Ranjan</last></author>
       <author><first>Sumeet</first><last>Agarwal</last></author>
       <pages>91–101</pages>
@@ -154,7 +154,7 @@
       <title>Learning Morphological Productivity as Meaning-Form Mappings</title>
       <author><first>Sarah</first><last>Payne</last></author>
       <author><first>Jordan</first><last>Kodner</last></author>
-      <author><first>Charles</first><last>Yang</last></author>
+      <author id="charles-yang"><first>Charles</first><last>Yang</last></author>
       <pages>177–187</pages>
       <url hash="31d8ce34">2021.scil-1.17</url>
       <bibkey>payne-etal-2021-learning</bibkey>
@@ -244,7 +244,7 @@
     <paper id="28">
       <title>Vowel Harmony Viewed as Error-Correcting Code</title>
       <author><first>Yvo</first><last>Meeres</last></author>
-      <author><first>Tommi A</first><last>Pirinen</last></author>
+      <author id="tommi-a-pirinen"><first>Tommi A</first><last>Pirinen</last></author>
       <pages>313–322</pages>
       <url hash="9d6fad8a">2021.scil-1.28</url>
       <bibkey>meeres-pirinen-2021-vowel</bibkey>
@@ -252,7 +252,7 @@
     <paper id="29">
       <title>What’s in a Span? Evaluating the Creativity of a Span-Based Neural Constituency Parser</title>
       <author><first>Daniel</first><last>Dakota</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <pages>323–333</pages>
       <url hash="b7285714">2021.scil-1.29</url>
       <bibkey>dakota-kubler-2021-whats</bibkey>
@@ -277,7 +277,7 @@
       <title>A <fixed-case>R</fixed-case>ate–<fixed-case>D</fixed-case>istortion view of human pragmatic reasoning?</title>
       <author><first>Noga</first><last>Zaslavsky</last></author>
       <author><first>Jennifer</first><last>Hu</last></author>
-      <author><first>Roger P.</first><last>Levy</last></author>
+      <author id="roger-levy"><first>Roger P.</first><last>Levy</last></author>
       <pages>347–348</pages>
       <url hash="51c53b7a">2021.scil-1.32</url>
       <bibkey>zaslavsky-etal-2021-rate</bibkey>
@@ -286,7 +286,7 @@
       <title>Apparent Communicative Efficiency in the Lexicon is Emergent</title>
       <author><first>Spencer</first><last>Caplan</last></author>
       <author><first>Jordan</first><last>Kodner</last></author>
-      <author><first>Charles</first><last>Yang</last></author>
+      <author id="charles-yang"><first>Charles</first><last>Yang</last></author>
       <pages>349–350</pages>
       <url hash="595e195a">2021.scil-1.33</url>
       <bibkey>caplan-etal-2021-apparent</bibkey>
@@ -469,7 +469,7 @@
     <paper id="55">
       <title>How to marry a star: Probabilistic constraints for meaning in context</title>
       <author><first>Katrin</first><last>Erk</last></author>
-      <author><first>Aurélie</first><last>Herbelot</last></author>
+      <author id="aurelie-herbelot"><first>Aurélie</first><last>Herbelot</last></author>
       <pages>451–453</pages>
       <url hash="52b17977">2021.scil-1.55</url>
       <bibkey>erk-herbelot-2021-marry</bibkey>
@@ -522,7 +522,7 @@
     <paper id="61">
       <title>Will it Unblend?</title>
       <author><first>Yuval</first><last>Pinter</last></author>
-      <author><first>Cassandra L.</first><last>Jacobs</last></author>
+      <author id="cassandra-l-jacobs"><first>Cassandra L.</first><last>Jacobs</last></author>
       <author><first>Jacob</first><last>Eisenstein</last></author>
       <pages>474–476</pages>
       <url hash="ab33e2fa">2021.scil-1.61</url>
diff --git a/data/xml/2021.sdp.xml b/data/xml/2021.sdp.xml
index 54984ea3af..3dd737cf0a 100644
--- a/data/xml/2021.sdp.xml
+++ b/data/xml/2021.sdp.xml
@@ -6,9 +6,9 @@
       <editor><first>Iz</first><last>Beltagy</last></editor>
       <editor><first>Arman</first><last>Cohan</last></editor>
       <editor><first>Guy</first><last>Feigenblat</last></editor>
-      <editor><first>Dayne</first><last>Freitag</last></editor>
+      <editor id="dayne-freitag"><first>Dayne</first><last>Freitag</last></editor>
       <editor><first>Tirthankar</first><last>Ghosal</last></editor>
-      <editor><first>Keith</first><last>Hall</last></editor>
+      <editor id="keith-hall"><first>Keith</first><last>Hall</last></editor>
       <editor><first>Drahomira</first><last>Herrmannova</last></editor>
       <editor><first>Petr</first><last>Knoth</last></editor>
       <editor><first>Kyle</first><last>Lo</last></editor>
@@ -17,7 +17,7 @@
       <editor><first>Michal</first><last>Shmueli-Scheuer</last></editor>
       <editor><first>Anita</first><last>de Waard</last></editor>
       <editor><first>Kuansan</first><last>Wang</last></editor>
-      <editor><first>Lucy Lu</first><last>Wang</last></editor>
+      <editor id="lucy-lu-wang"><first>Lucy Lu</first><last>Wang</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Online</address>
       <month>June</month>
@@ -43,7 +43,7 @@
       <author><first>Soyeong</first><last>Jeong</last></author>
       <author><first>Jinheon</first><last>Baek</last></author>
       <author><first>ChaeHun</first><last>Park</last></author>
-      <author><first>Jong</first><last>Park</last></author>
+      <author id="jong-c-park"><first>Jong</first><last>Park</last></author>
       <pages>7–17</pages>
       <abstract>One of the challenges in information retrieval (IR) is the vocabulary mismatch problem, which happens when the terms between queries and documents are lexically different but semantically similar. While recent work has proposed to expand the queries or documents by enriching their representations with additional relevant terms to address this challenge, they usually require a large volume of query-document pairs to train an expansion model. In this paper, we propose an Unsupervised Document Expansion with Generation (UDEG) framework with a pre-trained language model, which generates diverse supplementary sentences for the original document without using labels on query-document pairs for training. For generating sentences, we further stochastically perturb their embeddings to generate more diverse sentences for document expansion. We validate our framework on two standard IR benchmark datasets. The results show that our framework significantly outperforms relevant expansion baselines for IR.</abstract>
       <url hash="60c76cbd">2021.sdp-1.2</url>
@@ -104,7 +104,7 @@
     </paper>
     <paper id="7">
       <title>Argument Mining for Scholarly Document Processing: Taking Stock and Looking Ahead</title>
-      <author><first>Khalid</first><last>Al Khatib</last></author>
+      <author id="khalid-al-khatib"><first>Khalid</first><last>Al Khatib</last></author>
       <author><first>Tirthankar</first><last>Ghosal</last></author>
       <author><first>Yufang</first><last>Hou</last></author>
       <author><first>Anita</first><last>de Waard</last></author>
@@ -135,7 +135,7 @@
       <author><first>Arjun</first><last>Manoharan</last></author>
       <author><first>Deepak</first><last>Mittal</last></author>
       <author><first>Ramakanth</first><last>Pasunuru</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <author><first>Maneesh</first><last>Singh</last></author>
       <author><first>Mohit</first><last>Bansal</last></author>
       <author><first>Preethi</first><last>Jyothi</last></author>
diff --git a/data/xml/2021.semdeep.xml b/data/xml/2021.semdeep.xml
index bceb6d5d89..7cc506aa7b 100644
--- a/data/xml/2021.semdeep.xml
+++ b/data/xml/2021.semdeep.xml
@@ -3,11 +3,11 @@
   <volume id="1" ingest-date="2021-04-14" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 6th Workshop on Semantic Deep Learning (SemDeep-6)</booktitle>
-      <editor><first>Luis</first><last>Espinosa-Anke</last></editor>
+      <editor id="luis-espinosa-anke"><first>Luis</first><last>Espinosa-Anke</last></editor>
       <editor><first>Dagmar</first><last>Gromann</last></editor>
       <editor><first>Thierry</first><last>Declerck</last></editor>
       <editor><first>Anna</first><last>Breit</last></editor>
-      <editor><first>Jose</first><last>Camacho-Collados</last></editor>
+      <editor id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></editor>
       <editor><first>Mohammad</first><last>Taher Pilehvar</last></editor>
       <editor><first>Artem</first><last>Revenko</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -23,9 +23,9 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>CTLR</fixed-case>@<fixed-case>W</fixed-case>i<fixed-case>C</fixed-case>-<fixed-case>TSV</fixed-case>: Target Sense Verification using Marked Inputs and<fixed-case>P</fixed-case>re-trained Models</title>
-      <author><first>José G.</first><last>Moreno</last></author>
+      <author id="jose-g-moreno"><first>José G.</first><last>Moreno</last></author>
       <author><first>Elvys Linhares</first><last>Pontes</last></author>
-      <author><first>Gaël</first><last>Dias</last></author>
+      <author id="gael-dias"><first>Gaël</first><last>Dias</last></author>
       <pages>1–6</pages>
       <url hash="258a54c8">2021.semdeep-1.1</url>
       <bibkey>moreno-etal-2021-ctlr</bibkey>
@@ -50,7 +50,7 @@
     </paper>
     <paper id="4">
       <title>Relation Classification via Relation Validation</title>
-      <author><first>José G.</first><last>Moreno</last></author>
+      <author id="jose-g-moreno"><first>José G.</first><last>Moreno</last></author>
       <author><first>Antoine</first><last>Doucet</last></author>
       <author><first>Brigitte</first><last>Grau</last></author>
       <pages>20–27</pages>
diff --git a/data/xml/2021.semeval.xml b/data/xml/2021.semeval.xml
index dde1bbf8e8..f42e748a53 100644
--- a/data/xml/2021.semeval.xml
+++ b/data/xml/2021.semeval.xml
@@ -7,7 +7,7 @@
       <editor><first>Nathan</first><last>Schneider</last></editor>
       <editor><first>Natalie</first><last>Schluter</last></editor>
       <editor><first>Guy</first><last>Emerson</last></editor>
-      <editor><first>Aurelie</first><last>Herbelot</last></editor>
+      <editor id="aurelie-herbelot"><first>Aurelie</first><last>Herbelot</last></editor>
       <editor><first>Xiaodan</first><last>Zhu</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Online</address>
@@ -23,8 +23,8 @@
     <paper id="1">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2021 Task 1: Lexical Complexity Prediction</title>
       <author><first>Matthew</first><last>Shardlow</last></author>
-      <author><first>Richard</first><last>Evans</last></author>
-      <author><first>Gustavo Henrique</first><last>Paetzold</last></author>
+      <author id="richard-evans"><first>Richard</first><last>Evans</last></author>
+      <author id="gustavo-paetzold"><first>Gustavo Henrique</first><last>Paetzold</last></author>
       <author><first>Marcos</first><last>Zampieri</last></author>
       <pages>1–16</pages>
       <abstract>This paper presents the results and main findings of SemEval-2021 Task 1 - Lexical Complexity Prediction. We provided participants with an augmented version of the CompLex Corpus (Shardlow et al. 2020). CompLex is an English multi-domain corpus in which words and multi-word expressions (MWEs) were annotated with respect to their complexity using a five point Likert scale. SemEval-2021 Task 1 featured two Sub-tasks: Sub-task 1 focused on single words and Sub-task 2 focused on MWEs. The competition attracted 198 teams in total, of which 54 teams submitted official runs on the test data to Sub-task 1 and 37 to Sub-task 2.</abstract>
@@ -88,7 +88,7 @@
     <paper id="6">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2021 Task 5: Toxic Spans Detection</title>
       <author><first>John</first><last>Pavlopoulos</last></author>
-      <author><first>Jeffrey</first><last>Sorensen</last></author>
+      <author id="jeffrey-sorensen"><first>Jeffrey</first><last>Sorensen</last></author>
       <author><first>Léo</first><last>Laugier</last></author>
       <author><first>Ion</first><last>Androutsopoulos</last></author>
       <pages>59–69</pages>
@@ -106,7 +106,7 @@
       <author><first>Firoj</first><last>Alam</last></author>
       <author><first>Fabrizio</first><last>Silvestri</last></author>
       <author><first>Hamed</first><last>Firooz</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Giovanni</first><last>Da San Martino</last></author>
       <pages>70–98</pages>
       <abstract>We describe SemEval-2021 task 6 on Detection of Persuasion Techniques in Texts and Images: the data, the annotation guidelines, the evaluation setup, the results, and the participating systems. The task focused on memes and had three subtasks: (i) detecting the techniques in the text, (ii) detecting the text spans where the techniques are used, and (iii) detecting techniques in the entire meme, i.e., both in the text and in the image. It was a popular task, attracting 71 registrations, and 22 teams that eventually made an official submission on the test set. The evaluation results for the third subtask confirmed the importance of both modalities, the text and the image. Moreover, some teams reported benefits when not just combining the two modalities, e.g., by using early or late fusion, but rather modeling the interaction between them in a joint model.</abstract>
@@ -147,7 +147,7 @@
     <paper id="10">
       <title><fixed-case>L</fixed-case>ang<fixed-case>R</fixed-case>esearch<fixed-case>L</fixed-case>ab <fixed-case>NC</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2021 Task 1: Linguistic Feature Based Modelling for Lexical Complexity</title>
       <author><first>Raksha</first><last>Agarwal</last></author>
-      <author><first>Niladri</first><last>Chatterjee</last></author>
+      <author id="niladri-chatterjee"><first>Niladri</first><last>Chatterjee</last></author>
       <pages>120–125</pages>
       <abstract>The present work aims at assigning a complexity score between 0 and 1 to a target word or phrase in a given sentence. For each Single Word Target, a Random Forest Regressor is trained on a feature set consisting of lexical, semantic, and syntactic information about the target. For each Multiword Target, a set of individual word features is taken along with single word complexities in the feature space. The system yielded the Pearson correlation of 0.7402 and 0.8244 on the test set for the Single and Multiword Targets, respectively.</abstract>
       <url hash="2fbd0b16">2021.semeval-1.10</url>
@@ -210,7 +210,7 @@
     <paper id="16">
       <title><fixed-case>S</fixed-case>koltech<fixed-case>NLP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2021 Task 2: Generating Cross-Lingual Training Data for the Word-in-Context Task</title>
       <author><first>Anton</first><last>Razzhigaev</last></author>
-      <author><first>Nikolay</first><last>Arefyev</last></author>
+      <author id="nikolay-arefyev"><first>Nikolay</first><last>Arefyev</last></author>
       <author><first>Alexander</first><last>Panchenko</last></author>
       <pages>157–162</pages>
       <abstract>In this paper, we present a system for the solution of the cross-lingual and multilingual word-in-context disambiguation task. Task organizers provided monolingual data in several languages, but no cross-lingual training data were available. To address the lack of the officially provided cross-lingual training data, we decided to generate such data ourselves. We describe a simple yet effective approach based on machine translation and back translation of the lexical units to the original language used in the context of this shared task. In our experiments, we used a neural system based on the XLM-R, a pre-trained transformer-based masked language model, as a baseline. We show the effectiveness of the proposed approach as it allows to substantially improve the performance of this strong neural baseline model. In addition, in this study, we present multiple types of the XLM-R based classifier, experimenting with various ways of mixing information from the first and second occurrences of the target word in two samples.</abstract>
@@ -467,7 +467,7 @@
       <author><first>Jessica</first><last>Cox</last></author>
       <author><first>Curt</first><last>Kohler</last></author>
       <author><first>Antony</first><last>Scerri</last></author>
-      <author><first>Ron</first><last>Daniel Jr.</last></author>
+      <author id="ron-daniel-jr"><first>Ron</first><last>Daniel Jr.</last></author>
       <author><first>Paul</first><last>Groth</last></author>
       <pages>306–316</pages>
       <abstract>We describe MeasEval, a SemEval task of extracting counts, measurements, and related context from scientific documents, which is of significant importance to the creation of Knowledge Graphs that distill information from the scientific literature. This is a new task in 2021, for which over 75 submissions from 25 participants were received. We expect the data developed for this task and the findings reported to be valuable to the scientific knowledge extraction, metrology, and automated knowledge base construction communities.</abstract>
@@ -512,9 +512,9 @@
       <author><first>Anca</first><last>Dumitrache</last></author>
       <author><first>Tristan</first><last>Miller</last></author>
       <author><first>Jon</first><last>Chamberlain</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <author><first>Edwin</first><last>Simpson</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>338–347</pages>
       <abstract>Disagreement between coders is ubiquitous in virtually all datasets annotated with human judgements in both natural language processing and computer vision. However, most supervised machine learning methods assume that a single preferred interpretation exists for each item, which is at best an idealization. The aim of the SemEval-2021 shared task on learning with disagreements (Le-Wi-Di) was to provide a unified testing framework for methods for learning from data containing multiple and possibly contradictory annotations covering the best-known datasets containing information about disagreements for interpreting language and classifying images. In this paper we describe the shared task and its results.</abstract>
       <url hash="b602aa5e">2021.semeval-1.41</url>
@@ -527,8 +527,8 @@
       <author><first>Egoitz</first><last>Laparra</last></author>
       <author><first>Xin</first><last>Su</last></author>
       <author><first>Yiyun</first><last>Zhao</last></author>
-      <author><first>Özlem</first><last>Uzuner</last></author>
-      <author><first>Timothy</first><last>Miller</last></author>
+      <author id="ozlem-uzuner"><first>Özlem</first><last>Uzuner</last></author>
+      <author id="timothy-miller"><first>Timothy</first><last>Miller</last></author>
       <author><first>Steven</first><last>Bethard</last></author>
       <pages>348–356</pages>
       <abstract>This paper presents the Source-Free Domain Adaptation shared task held within SemEval-2021. The aim of the task was to explore adaptation of machine-learning models in the face of data sharing constraints. Specifically, we consider the scenario where annotations exist for a domain but cannot be shared. Instead, participants are provided with models trained on that (source) data. Participants also receive some labeled data from a new (development) domain on which to explore domain adaptation algorithms. Participants are then tested on data representing a new (target) domain. We explored this scenario with two different semantic tasks: negation detection (a text classification task) and time expression recognition (a sequence tagging task).</abstract>
@@ -541,7 +541,7 @@
       <author><first>Weikang</first><last>Wang</last></author>
       <author><first>Yi</first><last>Wu</last></author>
       <author><first>Yixiang</first><last>Liu</last></author>
-      <author><first>Pengyuan</first><last>Liu</last></author>
+      <author id="pengyuan-liu"><first>Pengyuan</first><last>Liu</last></author>
       <pages>357–363</pages>
       <abstract>Domain adaptation assumes that samples from source and target domains are freely accessible during a training phase. However, such assumption is rarely plausible in the real-world and may causes data-privacy issues, especially when the label of the source domain can be a sensitive attribute as an identifier. SemEval-2021 task 10 focuses on these issues. We participate in the task and propose novel frameworks based on self-training method. In our systems, two different frameworks are designed to solve text classification and sequence labeling. These approaches are tested to be effective which ranks the third among all system in subtask A, and ranks the first among all system in subtask B.</abstract>
       <url hash="7ca9e8b8">2021.semeval-1.43</url>
@@ -631,7 +631,7 @@
     </paper>
     <paper id="51">
       <title><fixed-case>TAPAS</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2021 Task 9: Reasoning over tables with intermediate pre-training</title>
-      <author><first>Thomas</first><last>Müller</last></author>
+      <author id="thomas-mueller"><first>Thomas</first><last>Müller</last></author>
       <author><first>Julian</first><last>Eisenschlos</last></author>
       <author><first>Syrine</first><last>Krichene</last></author>
       <pages>423–430</pages>
@@ -645,7 +645,7 @@
       <author><first>Abdullatif</first><last>Köksal</last></author>
       <author><first>Yusuf</first><last>Yüksel</last></author>
       <author><first>Bekir</first><last>Yıldırım</last></author>
-      <author><first>Arzucan</first><last>Özgür</last></author>
+      <author id="arzucan-ozgur"><first>Arzucan</first><last>Özgür</last></author>
       <pages>431–437</pages>
       <abstract>In this paper, we present our text augmentation based approach for the Table Statement Support Subtask (Phase A) of SemEval-2021 Task 9. We experiment with different text augmentation techniques such as back translation and synonym swapping using Word2Vec and WordNet. We show that text augmentation techniques lead to 2.5% improvement in F1 on the test set. Further, we investigate the impact of domain adaptation and joint learning on fact verification in tabular data by utilizing the SemTabFacts and TabFact datasets. We observe that joint learning improves the F1 scores on the SemTabFacts and TabFact test sets by 3.31% and 0.77%, respectively.</abstract>
       <url hash="a1b607d1">2021.semeval-1.52</url>
@@ -671,7 +671,7 @@
       <author><first>Kemal</first><last>Kurniawan</last></author>
       <author><first>Lea</first><last>Frermann</last></author>
       <author><first>Philip</first><last>Schulz</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>445–451</pages>
       <abstract>This paper describes PTST, a source-free unsupervised domain adaptation technique for sequence tagging, and its application to the SemEval-2021 Task 10 on time expression recognition. PTST is an extension of the cross-lingual parsimonious parser transfer framework, which uses high-probability predictions of the source model as a supervision signal in self-training. We extend the framework to a sequence prediction setting, and demonstrate its applicability to unsupervised domain adaptation. PTST achieves F1 score of 79.6% on the official test set, with the precision of 90.1%, the highest out of 14 submissions.</abstract>
       <url hash="915e1901">2021.semeval-1.54</url>
@@ -728,7 +728,7 @@
       <author><first>Yu</first><last>Su</last></author>
       <author><first>Changhong</first><last>He</last></author>
       <author><first>Lei</first><last>Lin</last></author>
-      <author><first>Chengjie</first><last>Sun</last></author>
+      <author id="cheng-jie-sun"><first>Chengjie</first><last>Sun</last></author>
       <author><first>Lili</first><last>Shan</last></author>
       <pages>485–489</pages>
       <abstract>This paper describes the winning system in the End-to-end Pipeline phase for the NLPContributionGraph task. The system is composed of three BERT-based models and the three models are used to extract sentences, entities and triples respectively. Experiments show that sampling and adversarial training can greatly boost the system. In End-to-end Pipeline phase, our system got an average F1 of 0.4703, significantly higher than the second-placed system which got an average F1 of 0.3828.</abstract>
@@ -859,7 +859,7 @@
       <author><first>Jinghang</first><last>Gu</last></author>
       <author><first>Emmanuele</first><last>Chersoni</last></author>
       <author><first>Wenjie</first><last>Li</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <pages>565–570</pages>
       <abstract>In this contribution, we describe the system presented by the PolyU CBS-Comp Team at the Task 1 of SemEval 2021, where the goal was the estimation of the complexity of words in a given sentence context. Our top system, based on a combination of lexical, syntactic, word embeddings and Transformers-derived features and on a Gradient Boosting Regressor, achieves a top correlation score of 0.754 on the subtask 1 for single words and 0.659 on the subtask 2 for multiword expressions.</abstract>
@@ -946,7 +946,7 @@
     </paper>
     <paper id="78">
       <title><fixed-case>UTFPR</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2021 Task 1: Complexity Prediction by Combining <fixed-case>BERT</fixed-case> Vectors and Classic Features</title>
-      <author><first>Gustavo Henrique</first><last>Paetzold</last></author>
+      <author id="gustavo-paetzold"><first>Gustavo Henrique</first><last>Paetzold</last></author>
       <pages>617–622</pages>
       <abstract>We describe the UTFPR systems submitted to the Lexical Complexity Prediction shared task of SemEval 2021. They perform complexity prediction by combining classic features, such as word frequency, n-gram frequency, word length, and number of senses, with BERT vectors. We test numerous feature combinations and machine learning models in our experiments and find that BERT vectors, even if not optimized for the task at hand, are a great complement to classic features. We also find that employing the principle of compositionality can potentially help in phrase complexity prediction. Our systems place 45th out of 55 for single words and 29th out of 38 for phrases.</abstract>
       <url hash="eb9f40a7">2021.semeval-1.78</url>
@@ -1063,7 +1063,7 @@
       <title><fixed-case>C</fixed-case>3<fixed-case>SL</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2021 Task 1: Predicting Lexical Complexity of Words in Specific Contexts with Sentence Embeddings</title>
       <author><first>Raul</first><last>Almeida</last></author>
       <author><first>Hegler</first><last>Tissot</last></author>
-      <author><first>Marcos Didonet Del</first><last>Fabro</last></author>
+      <author id="marcos-didonet-del-fabro"><first>Marcos Didonet Del</first><last>Fabro</last></author>
       <pages>683–687</pages>
       <abstract>We present our approach to predicting lexical complexity of words in specific contexts, as entered LCP Shared Task 1 at SemEval 2021. The approach consists of separating sentences into smaller chunks, embedding them with Sent2Vec, and reducing the embeddings into a simpler vector used as input to a neural network, the latter for predicting the complexity of words and expressions. Results show that the pre-trained sentence embeddings are not able to capture lexical complexity from the language when applied in cross-domain applications.</abstract>
       <url hash="45a85aa8">2021.semeval-1.88</url>
@@ -1164,7 +1164,7 @@
       <title><fixed-case>U</fixed-case>o<fixed-case>B</fixed-case>_<fixed-case>UK</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val 2021 Task 2: Zero-Shot and Few-Shot Learning for Multi-lingual and Cross-lingual Word Sense Disambiguation.</title>
       <author><first>Wei</first><last>Li</last></author>
       <author><first>Harish</first><last>Tayyar Madabushi</last></author>
-      <author><first>Mark</first><last>Lee</last></author>
+      <author id="mark-lee"><first>Mark</first><last>Lee</last></author>
       <pages>738–742</pages>
       <abstract>This paper describes our submission to SemEval 2021 Task 2. We compare XLM-RoBERTa Base and Large in the few-shot and zero-shot settings and additionally test the effectiveness of using a k-nearest neighbors classifier in the few-shot setting instead of the more traditional multi-layered perceptron. Our experiments on both the multi-lingual and cross-lingual data show that XLM-RoBERTa Large, unlike the Base version, seems to be able to more effectively transfer learning in a few-shot setting and that the k-nearest neighbors classifier is indeed a more powerful classifier than a multi-layered perceptron when used in few-shot learning.</abstract>
       <url hash="34d37cda">2021.semeval-1.97</url>
@@ -1195,7 +1195,7 @@
     <paper id="100">
       <title><fixed-case>G</fixed-case>loss<fixed-case>R</fixed-case>eader at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2021 Task 2: Reading Definitions Improves Contextualized Word Embeddings</title>
       <author><first>Maxim</first><last>Rachinskiy</last></author>
-      <author><first>Nikolay</first><last>Arefyev</last></author>
+      <author id="nikolay-arefyev"><first>Nikolay</first><last>Arefyev</last></author>
       <pages>756–762</pages>
       <abstract>Consulting a dictionary or a glossary is a familiar way for many humans to figure out what does a word in a particular context mean. We hypothesize that a system that can select a proper definition for a particular word occurrence can also naturally solve tasks related to word senses. To verify this hypothesis we developed a solution for the Multilingual and Cross-lingual Word-in-Context (MCL-WiC) task, that does not use any of the shared task data or other WiC data for training. Instead, it is trained to embed word definitions from English WordNet and word occurrences in English texts into the same vector space following an approach previously proposed for Word Sense Disambiguation (WSD). To estimate the similarity in meaning of two word occurrences, we compared different metrics in this shared vector space and found that L1-distance between normalized contextualized word embeddings outperforms traditionally employed cosine similarity and several other metrics. To solve the task for languages other than English, we rely on zero-shot cross-lingual transfer capabilities of the multilingual XLM-R masked language model. Despite not using MCL-WiC training data, in the shared task our approach achieves an accuracy of 89.5% on the English test set, which is only 4% less than the best system. In the multilingual subtask zero-shot cross-lingual transfer shows competitive results, that are within 2% from the best systems for Russian, French, and Arabic. In the cross-lingual subtask are within 2-4% from the best systems.</abstract>
       <url hash="a97418da">2021.semeval-1.100</url>
@@ -1227,7 +1227,7 @@
     <paper id="103">
       <title><fixed-case>LIORI</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2021 Task 2: Span Prediction and Binary Classification approaches to Word-in-Context Disambiguation</title>
       <author><first>Adis</first><last>Davletov</last></author>
-      <author><first>Nikolay</first><last>Arefyev</last></author>
+      <author id="nikolay-arefyev"><first>Nikolay</first><last>Arefyev</last></author>
       <author><first>Denis</first><last>Gordeev</last></author>
       <author><first>Alexey</first><last>Rey</last></author>
       <pages>780–786</pages>
@@ -1241,8 +1241,8 @@
       <author><first>Ciprian</first><last>Bodnar</last></author>
       <author><first>Andrada</first><last>Tapuc</last></author>
       <author><first>Cosmin</first><last>Pintilie</last></author>
-      <author><first>Daniela</first><last>Gifu</last></author>
-      <author><first>Diana</first><last>Trandabat</last></author>
+      <author id="daniela-gifu"><first>Daniela</first><last>Gifu</last></author>
+      <author id="diana-trandabat"><first>Diana</first><last>Trandabat</last></author>
       <pages>787–792</pages>
       <abstract>This paper presents a word-in-context disambiguation system. The task focuses on capturing the polysemous nature of words in a multilingual and cross-lingual setting, without considering a strict inventory of word meanings. The system applies Natural Language Processing algorithms on datasets from SemEval 2021 Task 2, being able to identify the meaning of words for the languages Arabic, Chinese, English, French and Russian, without making use of any additional mono- or multilingual resources.</abstract>
       <url hash="e84b5e15">2021.semeval-1.104</url>
@@ -1265,7 +1265,7 @@
     <paper id="106">
       <title><fixed-case>U</fixed-case>o<fixed-case>R</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2021 Task 4: Using Pre-trained <fixed-case>BERT</fixed-case> Token Embeddings for Question Answering of Abstract Meaning</title>
       <author><first>Thanet</first><last>Markchom</last></author>
-      <author><first>Huizhi</first><last>Liang</last></author>
+      <author id="huizhi-liang"><first>Huizhi</first><last>Liang</last></author>
       <pages>799–804</pages>
       <abstract>Most question answering tasks focuses on predicting concrete answers, e.g., named entities. These tasks can be normally achieved by understanding the contexts without additional information required. In Reading Comprehension of Abstract Meaning (ReCAM) task, the abstract answers are introduced. To understand abstract meanings in the context, additional knowledge is essential. In this paper, we propose an approach that leverages the pre-trained BERT Token embeddings as a prior knowledge resource. According to the results, our approach using the pre-trained BERT outperformed the baselines. It shows that the pre-trained BERT token embeddings can be used as additional knowledge for understanding abstract meanings in question answering.</abstract>
       <url hash="821c685a">2021.semeval-1.106</url>
@@ -1395,7 +1395,7 @@
       <title><fixed-case>HITMI</fixed-case>&amp;<fixed-case>T</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2021 Task 5: Integrating Transformer and <fixed-case>CRF</fixed-case> for Toxic Spans Detection</title>
       <author><first>Chenyi</first><last>Wang</last></author>
       <author><first>Tianshu</first><last>Liu</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <pages>870–874</pages>
       <abstract>This paper introduces our system at SemEval-2021 Task 5: Toxic Spans Detection. The task aims to accurately locate toxic spans within a text. Using BIO tagging scheme, we model the task as a token-level sequence labeling task. Our system uses a single model built on the model of multi-layer bidirectional transformer encoder. And we introduce conditional random field (CRF) to make the model learn the constraints between tags. We use ERNIE as pre-trained model, which is more suitable for the task accroding to our experiments. In addition, we use adversarial training with the fast gradient method (FGM) to improve the robustness of the system. Our system obtains 69.85% F1 score, ranking 3rd for the official evaluation.</abstract>
       <url hash="982ad5b7">2021.semeval-1.117</url>
@@ -1590,8 +1590,8 @@
       <title><fixed-case>SINAI</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2021 Task 5: Combining Embeddings in a <fixed-case>B</fixed-case>i<fixed-case>LSTM</fixed-case>-<fixed-case>CRF</fixed-case> model for Toxic Spans Detection</title>
       <author><first>Flor Miriam</first><last>Plaza-del-Arco</last></author>
       <author><first>Pilar</first><last>López-Úbeda</last></author>
-      <author><first>L. Alfonso</first><last>Ureña-López</last></author>
-      <author><first>M. Teresa</first><last>Martín-Valdivia</last></author>
+      <author id="l-alfonso-urena-lopez"><first>L. Alfonso</first><last>Ureña-López</last></author>
+      <author id="m-teresa-martin-valdivia"><first>M. Teresa</first><last>Martín-Valdivia</last></author>
       <pages>984–989</pages>
       <abstract>This paper describes the participation of SINAI team at Task 5: Toxic Spans Detection which consists of identifying spans that make a text toxic. Although several resources and systems have been developed so far in the context of offensive language, both annotation and tasks have mainly focused on classifying whether a text is offensive or not. However, detecting toxic spans is crucial to identify why a text is toxic and can assist human moderators to locate this type of content on social media. In order to accomplish the task, we follow a deep learning-based approach using a Bidirectional variant of a Long Short Term Memory network along with a stacked Conditional Random Field decoding layer (BiLSTM-CRF). Specifically, we test the performance of the combination of different pre-trained word embeddings for recognizing toxic entities in text. The results show that the combination of word embeddings helps in detecting offensive content. Our team ranks 29th out of 91 participants.</abstract>
       <url hash="00e3f120">2021.semeval-1.134</url>
@@ -1647,7 +1647,7 @@
       <title><fixed-case>LIIR</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2021 task 6: Detection of Persuasion Techniques In Texts and Images using <fixed-case>CLIP</fixed-case> features</title>
       <author><first>Erfan</first><last>Ghadery</last></author>
       <author><first>Damien</first><last>Sileo</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>1015–1019</pages>
       <abstract>We describe our approach for SemEval-2021 task 6 on detection of persuasion techniques in multimodal content (memes). Our system combines pretrained multimodal models (CLIP) and chained classifiers. Also, we propose to enrich the data by a data augmentation technique. Our submission achieves a rank of 8/16 in terms of F1-micro and 9/16 with F1-macro on the test set.</abstract>
       <url hash="e4638769">2021.semeval-1.139</url>
@@ -1766,7 +1766,7 @@
     </paper>
     <paper id="150">
       <title><fixed-case>M</fixed-case>in<fixed-case>D</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2021 Task 6: Propaganda Detection using Transfer Learning and Multimodal Fusion</title>
-      <author><first>Junfeng</first><last>Tian</last></author>
+      <author id="junfeng-tian"><first>Junfeng</first><last>Tian</last></author>
       <author><first>Min</first><last>Gui</last></author>
       <author><first>Chenliang</first><last>Li</last></author>
       <author><first>Ming</first><last>Yan</last></author>
@@ -1947,7 +1947,7 @@
       <title><fixed-case>U</fixed-case>o<fixed-case>R</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2021 Task 7: Utilizing Pre-trained <fixed-case>D</fixed-case>istil<fixed-case>BERT</fixed-case> Model and Multi-scale <fixed-case>CNN</fixed-case> for Humor Detection</title>
       <author><first>Zehao</first><last>Liu</last></author>
       <author><first>Carl</first><last>Haines</last></author>
-      <author><first>Huizhi</first><last>Liang</last></author>
+      <author id="huizhi-liang"><first>Huizhi</first><last>Liang</last></author>
       <pages>1179–1184</pages>
       <abstract>Humour detection is an interesting but difficult task in NLP. Because humorous might not be obvious in text, it can be embedded into context, hide behind the literal meaning and require prior knowledge to understand. We explored different shallow and deep methods to create a humour detection classifier for task 7-1a. Models like Logistic Regression, LSTM, MLP, CNN were used, and pre-trained models like DistilBert were introduced to generate accurate vector representation for textual data. We focused on applying multi-scale strategy on modelling, and compared different models. Our best model is the DistilBERT+MultiScale CNN, it used different sizes of CNN kernel to get multiple scales of features, which achieved 93.7% F1-score and 92.1% accuracy on the test set.</abstract>
       <url hash="ab13b8d1">2021.semeval-1.166</url>
@@ -2037,7 +2037,7 @@
     <paper id="174">
       <title><fixed-case>FII</fixed-case> <fixed-case>FUNNY</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2021 Task 7: <fixed-case>H</fixed-case>a<fixed-case>H</fixed-case>ackathon: Detecting and rating Humor and Offense</title>
       <author><first>Mihai</first><last>Samson</last></author>
-      <author><first>Daniela</first><last>Gifu</last></author>
+      <author id="daniela-gifu"><first>Daniela</first><last>Gifu</last></author>
       <pages>1226–1231</pages>
       <abstract>The “HaHackathon: Detecting and Rating Humor and Offense” task at the SemEval 2021 competition focuses on detecting and rating the humor level in sentences, as well as the level of offensiveness contained in these texts with humoristic tones. In this paper, we present an approach based on recent Deep Learning techniques by both trying to train the models based on the dataset solely and by trying to fine-tune pre-trained models on the gigantic corpus.</abstract>
       <url hash="9538f099">2021.semeval-1.174</url>
@@ -2086,7 +2086,7 @@
       <title><fixed-case>LIORI</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2021 Task 8: Ask Transformer for measurements</title>
       <author><first>Adis</first><last>Davletov</last></author>
       <author><first>Denis</first><last>Gordeev</last></author>
-      <author><first>Nikolay</first><last>Arefyev</last></author>
+      <author id="nikolay-arefyev"><first>Nikolay</first><last>Arefyev</last></author>
       <author><first>Emil</first><last>Davletov</last></author>
       <pages>1249–1254</pages>
       <abstract>This work describes our approach for subtasks of SemEval-2021 Task 8: MeasEval: Counts and Measurements which took the official first place in the competition. To solve all subtasks we use multi-task learning in a question-answering-like manner. We also use learnable scalar weights to weight subtasks’ contribution to the final loss in multi-task training. We fine-tune LUKE to extract quantity spans and we fine-tune RoBERTa to extract everything related to found quantities, including quantities themselves.</abstract>
@@ -2113,7 +2113,7 @@
       <title><fixed-case>V</fixed-case>olta at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2021 Task 9: Statement Verification and Evidence Finding with Tables using <fixed-case>TAPAS</fixed-case> and Transfer Learning</title>
       <author><first>Devansh</first><last>Gautam</last></author>
       <author><first>Kshitij</first><last>Gupta</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>1262–1270</pages>
       <abstract>Tables are widely used in various kinds of documents to present information concisely. Understanding tables is a challenging problem that requires an understanding of language and table structure, along with numerical and logical reasoning. In this paper, we present our systems to solve Task 9 of SemEval-2021: Statement Verification and Evidence Finding with Tables (SEM-TAB-FACTS). The task consists of two subtasks: (A) Given a table and a statement, predicting whether the table supports the statement and (B) Predicting which cells in the table provide evidence for/against the statement. We fine-tune TAPAS (a model which extends BERT’s architecture to capture tabular structure) for both the subtasks as it has shown state-of-the-art performance in various table understanding tasks. In subtask A, we evaluate how transfer learning and standardizing tables to have a single header row improves TAPAS’ performance. In subtask B, we evaluate how different fine-tuning strategies can improve TAPAS’ performance. Our systems achieve an F1 score of 67.34 in subtask A three-way classification, 72.89 in subtask A two-way classification, and 62.95 in subtask B.</abstract>
       <url hash="7cb9f3e6">2021.semeval-1.180</url>
@@ -2182,7 +2182,7 @@
       <title><fixed-case>UOR</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2021 Task 12: On Crowd Annotations; Learning with Disagreements to optimise crowd truth</title>
       <author><first>Emmanuel</first><last>Osei-Brefo</last></author>
       <author><first>Thanet</first><last>Markchom</last></author>
-      <author><first>Huizhi</first><last>Liang</last></author>
+      <author id="huizhi-liang"><first>Huizhi</first><last>Liang</last></author>
       <pages>1303–1309</pages>
       <abstract>Crowdsourcing has been ubiquitously used for annotating enormous collections of data. However, the major obstacles to using crowd-sourced labels are noise and errors from non-expert annotations. In this work, two approaches dealing with the noise and errors in crowd-sourced labels are proposed. The first approach uses Sharpness-Aware Minimization (SAM), an optimization technique robust to noisy labels. The other approach leverages a neural network layer called softmax-Crowdlayer specifically designed to learn from crowd-sourced annotations. According to the results, the proposed approaches can improve the performance of the Wide Residual Network model and Multi-layer Perception model applied on crowd-sourced datasets in the image processing domain. It also has similar and comparable results with the majority voting technique when applied to the sequential data domain whereby the Bidirectional Encoder Representations from Transformers (BERT) is used as the base model in both instances.</abstract>
       <url hash="2fd78e6c">2021.semeval-1.186</url>
diff --git a/data/xml/2021.semspace.xml b/data/xml/2021.semspace.xml
index d9ec0ee16e..e73429be4d 100644
--- a/data/xml/2021.semspace.xml
+++ b/data/xml/2021.semspace.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the 2021 Workshop on Semantic Spaces at the Intersection of NLP, Physics, and Cognitive Science (SemSpace)</booktitle>
       <editor><first>Martha</first><last>Lewis</last></editor>
-      <editor><first>Mehrnoosh</first><last>Sadrzadeh</last></editor>
+      <editor id="mehrnoosh-sadrzadeh"><first>Mehrnoosh</first><last>Sadrzadeh</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Groningen, The Netherlands</address>
       <month>June</month>
@@ -86,7 +86,7 @@
       <title>Should Semantic Vector Composition be Explicit? Can it be Linear?</title>
       <author><first>Dominic</first><last>Widdows</last></author>
       <author><first>Kristen</first><last>Howell</last></author>
-      <author><first>Trevor</first><last>Cohen</last></author>
+      <author id="trevor-cohen"><first>Trevor</first><last>Cohen</last></author>
       <pages>76–86</pages>
       <abstract>Vector representations have become a central element in semantic language modelling, leading to mathematical overlaps with many fields including quantum theory. Compositionality is a core goal for such representations: given representations for ‘wet’ and ‘fish’, how should the concept ‘wet fish’ be represented? This position paper surveys this question from two points of view. The first considers the question of whether an explicit mathematical representation can be successful using only tools from within linear algebra, or whether other mathematical tools are needed. The second considers whether semantic vector composition should be explicitly described mathematically, or whether it can be a model-internal side-effect of training a neural network. A third and newer question is whether a compositional model can be implemented on a quantum computer. Given the fundamentally linear nature of quantum mechanics, we propose that these questions are related, and that this survey may help to highlight candidate operations for future quantum implementation.</abstract>
       <url hash="0bc6c34d">2021.semspace-1.8</url>
diff --git a/data/xml/2021.sigdial.xml b/data/xml/2021.sigdial.xml
index 407e0c92e8..bcebf3f53f 100644
--- a/data/xml/2021.sigdial.xml
+++ b/data/xml/2021.sigdial.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the 22nd Annual Meeting of the Special Interest Group on Discourse and Dialogue</booktitle>
       <editor><first>Haizhou</first><last>Li</last></editor>
-      <editor><first>Gina-Anne</first><last>Levow</last></editor>
+      <editor id="gina-anne-levow"><first>Gina-Anne</first><last>Levow</last></editor>
       <editor><first>Zhou</first><last>Yu</last></editor>
       <editor><first>Chitralekha</first><last>Gupta</last></editor>
       <editor><first>Berrak</first><last>Sisman</last></editor>
@@ -27,7 +27,7 @@
     <paper id="1">
       <title>Understanding and predicting user dissatisfaction in a neural generative chatbot</title>
       <author><first>Abigail</first><last>See</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <pages>1–12</pages>
       <abstract>Neural generative dialogue agents have shown an increasing ability to hold short chitchat conversations, when evaluated by crowdworkers in controlled settings. However, their performance in real-life deployment – talking to intrinsically-motivated users in noisy environments – is less well-explored. In this paper, we perform a detailed case study of a neural generative model deployed as part of Chirpy Cardinal, an Alexa Prize socialbot. We find that unclear user utterances are a major source of generative errors such as ignoring, hallucination, unclearness and repetition. However, even in unambiguous contexts the model frequently makes reasoning errors. Though users express dissatisfaction in correlation with these errors, certain dissatisfaction types (such as offensiveness and privacy objections) depend on additional factors – such as the user’s personal attitudes, and prior unaddressed dissatisfaction in the conversation. Finally, we show that dissatisfied user utterances can be used as a semi-supervised learning signal to improve the dialogue system. We train a model to predict next-turn dissatisfaction, and show through human evaluation that as a ranking function, it selects higher-quality neural-generated utterances.</abstract>
       <url hash="062c3123">2021.sigdial-1.1</url>
@@ -37,7 +37,7 @@
     </paper>
     <paper id="2">
       <title>Towards Continuous Estimation of Dissatisfaction in Spoken Dialog</title>
-      <author><first>Nigel</first><last>Ward</last></author>
+      <author id="nigel-ward"><first>Nigel</first><last>Ward</last></author>
       <author><first>Jonathan E.</first><last>Avila</last></author>
       <author><first>Aaron M.</first><last>Alarcon</last></author>
       <pages>13–20</pages>
@@ -62,7 +62,7 @@
     </paper>
     <paper id="4">
       <title>Individual Interaction Styles: Evidence from a Spoken Chat Corpus</title>
-      <author><first>Nigel</first><last>Ward</last></author>
+      <author id="nigel-ward"><first>Nigel</first><last>Ward</last></author>
       <pages>27–31</pages>
       <abstract>here is increasing interest in modeling style choices in dialog, for example for enabling dialog systems to adapt to their users. It is commonly assumed that each user has his or her own stable characteristics, but for interaction style the truth of this assumption has not been well examined. I investigated using a vector-space model of interaction styles, derived from the Switchboard corpus of telephone conversations and a broad set of prosodic-behavior features. While most individuals exhibited interaction style tendencies, these were generally far from stable, with a predictive model based on individual tendencies outperforming a speaker-independent model by only 3.6%. The tendencies were somewhat stronger for some speakers, including generally males, and for some dimensions of variation.</abstract>
       <url hash="269878c8">2021.sigdial-1.4</url>
@@ -73,7 +73,7 @@
     <paper id="5">
       <title>Evaluation of In-Person Counseling Strategies To Develop Physical Activity Chatbot for Women</title>
       <author><first>Kai-Hui</first><last>Liang</last></author>
-      <author><first>Patrick</first><last>Lange</last></author>
+      <author id="patrick-l-lange"><first>Patrick</first><last>Lange</last></author>
       <author><first>Yoo Jung</first><last>Oh</last></author>
       <author><first>Jingwen</first><last>Zhang</last></author>
       <author><first>Yoshimi</first><last>Fukuoka</last></author>
@@ -120,7 +120,7 @@
       <author><first>Pinar</first><last>Donmez</last></author>
       <author><first>Vikas</first><last>Bhardwaj</last></author>
       <author><first>Anuj</first><last>Kumar</last></author>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <pages>66–76</pages>
       <abstract>In this paper, we study the utilization of pre-trained language models to enable few-shotNatural Language Generation (NLG) in task-oriented dialog systems. We introduce a system consisting of iterative self-training and an extensible mini-template framework that textualizes the structured input data into semi-natural text to fully take advantage of pre-trained language models. We compare var-ious representations of NLG models’ input and output and show that transforming the input and output to be similar to what the language model has seen before during pre-training improves the model’s few-shot performance substantially. We show that neural mod-els can be trained with as few as 300 annotated examples while providing high fidelity, considerably lowering the resource requirements for standing up a new domain or language. This level of data efficiency removes the need for crowd-sourced data collection resulting in higher quality data annotated by expert linguists. In addition, model maintenance and debugging processes will improve in this few-shot setting. Finally, we explore distillation and using a caching system to satisfy latency requirements of real-world systems.</abstract>
       <url hash="ac13489a">2021.sigdial-1.8</url>
@@ -144,7 +144,7 @@
     <paper id="10">
       <title>Integrated taxonomy of errors in chat-oriented dialogue systems</title>
       <author><first>Ryuichiro</first><last>Higashinaka</last></author>
-      <author><first>Masahiro</first><last>Araki</last></author>
+      <author id="masahiro-araki"><first>Masahiro</first><last>Araki</last></author>
       <author><first>Hiroshi</first><last>Tsukahara</last></author>
       <author><first>Masahiro</first><last>Mizukami</last></author>
       <pages>89–98</pages>
@@ -158,7 +158,7 @@
       <title>Effective Social Chatbot Strategies for Increasing User Initiative</title>
       <author><first>Amelia</first><last>Hardy</last></author>
       <author><first>Ashwin</first><last>Paranjape</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <pages>99–110</pages>
       <abstract>Many existing chatbots do not effectively support mixed initiative, forcing their users to either respond passively or lead constantly. We seek to improve this experience by introducing new mechanisms to encourage user initiative in social chatbot conversations. Since user initiative in this setting is distinct from initiative in human-human or task-oriented dialogue, we first propose a new definition that accounts for the unique behaviors users take in this context. Drawing from linguistics, we propose three mechanisms to promote user initiative: back-channeling, personal disclosure, and replacing questions with statements. We show that simple automatic metrics of utterance length, number of noun phrases, and diversity of user responses correlate with human judgement of initiative. Finally, we use these metrics to suggest that these strategies do result in statistically significant increases in user initiative, where frequent, but not excessive, back-channeling is the most effective strategy.</abstract>
       <url hash="bbe5972b">2021.sigdial-1.11</url>
@@ -168,12 +168,12 @@
     </paper>
     <paper id="12">
       <title>Generative Conversational Networks</title>
-      <author><first>Alexandros</first><last>Papangelis</last></author>
+      <author id="alexandros-papangelis"><first>Alexandros</first><last>Papangelis</last></author>
       <author><first>Karthik</first><last>Gopalakrishnan</last></author>
       <author><first>Aishwarya</first><last>Padmakumar</last></author>
       <author><first>Seokhwan</first><last>Kim</last></author>
-      <author><first>Gokhan</first><last>Tur</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last></author>
+      <author id="gokhan-tur"><first>Gokhan</first><last>Tur</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></author>
       <pages>111–120</pages>
       <abstract>Inspired by recent work in meta-learning and generative teaching networks, we propose a framework called Generative Conversational Networks, in which conversational agents learn to generate their own labelled training data (given some seed data) and then train themselves from that data to perform a given task. We use reinforcement learning to optimize the data generation process where the reward signal is the agent’s performance on the task. The task can be any language-related task, from intent detection to full task-oriented conversations. In this work, we show that our approach is able to generalise from seed data and performs well in limited data and limited computation settings, with significant gains for intent detection and slot tagging across multiple datasets: ATIS, TOD, SNIPS, and Restaurants8k. We show an average improvement of 35% in intent detection and 21% in slot tagging over a baseline model trained from the seed data. We also conduct an analysis of the novelty of the generated data and provide generated examples for intent detection, slot tagging, and non-goal oriented conversations.</abstract>
       <url hash="17cead64">2021.sigdial-1.12</url>
@@ -190,7 +190,7 @@
       <author><first>Jay</first><last>Pujara</last></author>
       <author><first>Xiang</first><last>Ren</last></author>
       <author id="yang-liu-icsi"><first>Yang</first><last>Liu</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></author>
       <pages>121–132</pages>
       <abstract>Smooth and effective communication requires the ability to perform latent or explicit commonsense inference. Prior commonsense reasoning benchmarks (such as SocialIQA and CommonsenseQA) mainly focus on the discriminative task of choosing the right answer from a set of candidates, and do not involve interactive language generation as in dialogue. Moreover, existing dialogue datasets do not explicitly focus on exhibiting commonsense as a facet. In this paper, we present an empirical study of commonsense in dialogue response generation. We first auto-extract commonsensical dialogues from existing dialogue datasets by leveraging ConceptNet, a commonsense knowledge graph. Furthermore, building on social contexts/situations in SocialIQA, we collect a new dialogue dataset with 25K dialogues aimed at exhibiting social commonsense in an interactive setting. We evaluate response generation models trained using these datasets and find that models trained on both extracted and our collected data produce responses that consistently exhibit more commonsense than baselines. Finally we propose an approach for automatic evaluation of commonsense that relies on features derived from ConceptNet and pre-trained language and dialog models, and show reasonable correlation with human evaluation of responses’ commonsense quality.</abstract>
       <url hash="abc43b4c">2021.sigdial-1.13</url>
@@ -203,7 +203,7 @@
       <author><first>Taha</first><last>Aksu</last></author>
       <author><first>Zhengyuan</first><last>Liu</last></author>
       <author><first>Min-Yen</first><last>Kan</last></author>
-      <author><first>Nancy</first><last>Chen</last></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last></author>
       <pages>133–143</pages>
       <abstract>We introduce a synthetic dialogue generation framework, Velocidapter, which addresses the corpus availability problem for dialogue comprehension. Velocidapter augments datasets by simulating synthetic conversations for a task-oriented dialogue domain, requiring a small amount of bootstrapping work for each new domain. We evaluate the efficacy of our framework on a task-oriented dialogue comprehension dataset, MRCWOZ, which we curate by annotating questions for slots in the restaurant, taxi, and hotel domains of the MultiWOZ 2.2 dataset (Zang et al., 2020). We run experiments within a low-resource setting, where we pretrain a model on SQuAD, fine-tuning it on either a small original data or on the synthetic data generated by our framework. Velocidapter shows significant improvements using both the transformer-based BERTBase and BiDAF as base models. We further show that the framework is easy to use by novice users and conclude that Velocidapter can greatly help training over task-oriented dialogues, especially for low-resourced emerging domains.</abstract>
       <url hash="88cab37f">2021.sigdial-1.14</url>
@@ -214,7 +214,7 @@
     <paper id="15">
       <title>An Analysis of State-of-the-Art Models for Situated Interactive <fixed-case>M</fixed-case>ulti<fixed-case>M</fixed-case>odal Conversations (<fixed-case>SIMMC</fixed-case>)</title>
       <author><first>Satwik</first><last>Kottur</last></author>
-      <author><first>Paul</first><last>Crook</last></author>
+      <author id="paul-a-crook"><first>Paul</first><last>Crook</last></author>
       <author><first>Seungwhan</first><last>Moon</last></author>
       <author><first>Ahmad</first><last>Beirami</last></author>
       <author><first>Eunjoon</first><last>Cho</last></author>
@@ -230,7 +230,7 @@
     <paper id="16">
       <title>A Simple yet Effective Method for Sentence Ordering</title>
       <author><first>Aili</first><last>Shen</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>154–160</pages>
       <abstract>Sentence ordering is the task of arranging a given bag of sentences so as to maximise the coherence of the overall text. In this work, we propose a simple yet effective training method that improves the capacity of models to capture overall text coherence based on training over pairs of sentences/segments. Experimental results show the superiority of our proposed method in in- and cross-domain settings. The utility of our method is also verified over a multi-document summarisation task.</abstract>
       <url hash="e2aab5e3">2021.sigdial-1.16</url>
@@ -243,7 +243,7 @@
       <author><first>Rachna</first><last>Konigari</last></author>
       <author><first>Saurabh</first><last>Ramola</last></author>
       <author><first>Vijay Vardhan</first><last>Alluri</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>161–166</pages>
       <abstract>Topic diversion occurs frequently with engaging open-domain dialogue systems like virtual assistants. The balance between staying on topic and rectifying the topic drift is important for a good collaborative system. In this paper, we present a model which uses a fine-tuned XLNet-base to classify the utterances pertaining to the major topic of conversation and those which are not, with a precision of 84%. We propose a preliminary study, classifying utterances into major, minor and off-topics, which further extends into a system initiative for diversion rectification. A case study was conducted where a system initiative is emulated as a response to the user going off-topic, mimicking a common occurrence of mixed initiative present in natural human-human conversation. This task of classifying utterances into those which belong to the major theme or not, would also help us in identification of relevant sentences for tasks like dialogue summarization and information extraction from conversations.</abstract>
       <url hash="a341191c">2021.sigdial-1.17</url>
@@ -317,7 +317,7 @@
     <paper id="23">
       <title>Hi-<fixed-case>DST</fixed-case>: A Hierarchical Approach for Scalable and Extensible Dialogue State Tracking</title>
       <author><first>Suvodip</first><last>Dey</last></author>
-      <author><first>Maunendra Sankar</first><last>Desarkar</last></author>
+      <author id="maunendra-sankar-desarkar"><first>Maunendra Sankar</first><last>Desarkar</last></author>
       <pages>218–227</pages>
       <abstract>Dialogue State Tracking (DST) is a sub-task of task-based dialogue systems where the user intention is tracked through a set of (domain, slot, slot-value) triplets. Existing DST models can be difficult to extend for new datasets with larger domains/slots mainly due to either of the two reasons- i) prediction of domain-slot as a pair, and ii) dependency of model parameters on the number of slots and domains. In this work, we propose to address these issues using a Hierarchical DST (Hi-DST) model. At a given turn, the model first detects a change in domain followed by domain prediction if required. Then it decides suitable action for each slot in the predicted domains and finds their value accordingly. The model parameters of Hi-DST are independent of the number of domains/slots. Due to the hierarchical modeling, it achieves O(|M|+|N|) belief state prediction for a single turn where M and N are the set of unique domains and slots respectively. We argue that the hierarchical structure helps in the model explainability and makes it easily extensible to new datasets. Experiments on the MultiWOZ dataset show that our proposed model achieves comparable joint accuracy performance to state-of-the-art DST models.</abstract>
       <url hash="99e7e7be">2021.sigdial-1.23</url>
@@ -343,7 +343,7 @@
       <title>Recent Neural Methods on Dialogue State Tracking for Task-Oriented Dialogue Systems: A Survey</title>
       <author><first>Vevake</first><last>Balaraman</last></author>
       <author><first>Seyedmostafa</first><last>Sheikhalishahi</last></author>
-      <author><first>Bernardo</first><last>Magnini</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
       <pages>239–251</pages>
       <abstract>This paper aims at providing a comprehensive overview of recent developments in dialogue state tracking (DST) for task-oriented conversational systems. We introduce the task, the main datasets that have been exploited as well as their evaluation metrics, and we analyze several proposed approaches. We distinguish between static ontology DST models, which predict a fixed set of dialogue states, and dynamic ontology models, which can predict dialogue states even when the ontology changes. We also discuss the model’s ability to track either single or multiple domains and to scale to new domains, both in terms of knowledge transfer and zero-shot learning. We cover a period from 2013 to 2020, showing a significant increase of multiple domain methods, most of them utilizing pre-trained language models.</abstract>
       <url hash="6722fe65">2021.sigdial-1.25</url>
@@ -367,7 +367,7 @@
     <paper id="27">
       <title><fixed-case>ERICA</fixed-case>: An Empathetic Android Companion for Covid-19 Quarantine</title>
       <author><first>Etsuko</first><last>Ishii</last></author>
-      <author><first>Genta Indra</first><last>Winata</last></author>
+      <author id="genta-indra-winata"><first>Genta Indra</first><last>Winata</last></author>
       <author><first>Samuel</first><last>Cahyawijaya</last></author>
       <author><first>Divesh</first><last>Lala</last></author>
       <author><first>Tatsuya</first><last>Kawahara</last></author>
@@ -455,7 +455,7 @@
       <author><first>Pengfei</first><last>Hong</last></author>
       <author><first>Siqi</first><last>Shen</last></author>
       <author><first>Navonil</first><last>Majumder</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <author><first>Soujanya</first><last>Poria</last></author>
       <pages>301–313</pages>
       <abstract>Commonsense inference to understand and explain human language is a fundamental research problem in natural language processing. Explaining human conversations poses a great challenge as it requires contextual understanding, planning, inference, and several aspects of reasoning including causal, temporal, and commonsense reasoning. In this work, we introduce CIDER – a manually curated dataset that contains dyadic dialogue explanations in the form of implicit and explicit knowledge triplets inferred using contextual commonsense inference. Extracting such rich explanations from conversations can be conducive to improving several downstream applications. The annotated triplets are categorized by the type of commonsense knowledge present (e.g., causal, conditional, temporal). We set up three different tasks conditioned on the annotated dataset: Dialogue-level Natural Language Inference, Span Extraction, and Multi-choice Span Selection. Baseline results obtained with transformer-based models reveal that the tasks are difficult, paving the way for promising future research. The dataset and the baseline implementations are publicly available at <url>https://github.com/declare-lab/CIDER</url>.</abstract>
@@ -580,9 +580,9 @@
     </paper>
     <paper id="43">
       <title>Diversity as a By-Product: Goal-oriented Language Generation Leads to Linguistic Variation</title>
-      <author><first>Simeon</first><last>Schüz</last></author>
+      <author id="simeon-junker"><first>Simeon</first><last>Schüz</last></author>
       <author><first>Ting</first><last>Han</last></author>
-      <author><first>Sina</first><last>Zarrieß</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
       <pages>411–422</pages>
       <abstract>The ability for variation in language use is necessary for speakers to achieve their conversational goals, for instance when referring to objects in visual environments. We argue that diversity should not be modelled as an independent objective in dialogue, but should rather be a result or by-product of goal-oriented language generation. Different lines of work in neural language generation investigated decoding methods for generating more diverse utterances, or increasing the informativity through pragmatic reasoning. We connect those lines of work and analyze how pragmatic reasoning during decoding affects the diversity of generated image captions. We find that boosting diversity itself does not result in more pragmatically informative captions, but pragmatic reasoning does increase lexical diversity. Finally, we discuss whether the gain in informativity is achieved in linguistically plausible ways.</abstract>
       <url hash="22ecd501">2021.sigdial-1.43</url>
@@ -636,7 +636,7 @@
       <author><first>Christian</first><last>Geishauser</last></author>
       <author><first>Michael</first><last>Heck</last></author>
       <author><first>Shutong</first><last>Feng</last></author>
-      <author><first>Milica</first><last>Gasic</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gasic</last></author>
       <pages>445–456</pages>
       <abstract>Dialogue policy optimisation via reinforcement learning requires a large number of training interactions, which makes learning with real users time consuming and expensive. Many set-ups therefore rely on a user simulator instead of humans. These user simulators have their own problems. While hand-coded, rule-based user simulators have been shown to be sufficient in small, simple domains, for complex domains the number of rules quickly becomes intractable. State-of-the-art data-driven user simulators, on the other hand, are still domain-dependent. This means that adaptation to each new domain requires redesigning and retraining. In this work, we propose a domain-independent transformer-based user simulator (TUS). The structure of TUS is not tied to a specific domain, enabling domain generalization and the learning of cross-domain user behaviour from data. We compare TUS with the state-of-the-art using automatic as well as human evaluations. TUS can compete with rule-based user simulators on pre-defined domains and is able to generalize to unseen domains in a zero-shot fashion.</abstract>
       <url hash="d70f99b2">2021.sigdial-1.47</url>
@@ -706,7 +706,7 @@
       <title>Coreference-Aware Dialogue Summarization</title>
       <author><first>Zhengyuan</first><last>Liu</last></author>
       <author><first>Ke</first><last>Shi</last></author>
-      <author><first>Nancy</first><last>Chen</last></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last></author>
       <pages>509–519</pages>
       <abstract>Summarizing conversations via neural approaches has been gaining research traction lately, yet it is still challenging to obtain practical solutions. Examples of such challenges include unstructured information exchange in dialogues, informal interactions between speakers, and dynamic role changes of speakers as the dialogue evolves. Many of such challenges result in complex coreference links. Therefore, in this work, we investigate different approaches to explicitly incorporate coreference information in neural abstractive dialogue summarization models to tackle the aforementioned challenges. Experimental results show that the proposed approaches achieve state-of-the-art performance, implying it is useful to utilize coreference information in dialogue summarization. Evaluation results on factual correctness suggest such coreference-aware models are better at tracing the information flow among interlocutors and associating accurate status/actions with the corresponding interlocutors and person mentions.</abstract>
       <url hash="9be8ac79">2021.sigdial-1.53</url>
@@ -730,7 +730,7 @@
     </paper>
     <paper id="55">
       <title>Incremental temporal summarization in multi-party meetings</title>
-      <author><first>Ramesh</first><last>Manuvinakurike</last></author>
+      <author id="ramesh-manuvinakurike"><first>Ramesh</first><last>Manuvinakurike</last></author>
       <author><first>Saurav</first><last>Sahay</last></author>
       <author><first>Wenda</first><last>Chen</last></author>
       <author><first>Lama</first><last>Nachman</last></author>
@@ -743,9 +743,9 @@
     </paper>
     <paper id="56">
       <title>Mitigating Topic Bias when Detecting Decisions in Dialogue</title>
-      <author><first>Vanja Mladen</first><last>Karan</last></author>
+      <author id="vanja-m-karan"><first>Vanja Mladen</first><last>Karan</last></author>
       <author><first>Prashant</first><last>Khare</last></author>
-      <author><first>Patrick</first><last>Healey</last></author>
+      <author id="patrick-healey"><first>Patrick</first><last>Healey</last></author>
       <author><first>Matthew</first><last>Purver</last></author>
       <pages>542–547</pages>
       <abstract>This work revisits the task of detecting decision-related utterances in multi-party dialogue. We explore performance of a traditional approach and a deep learning-based approach based on transformer language models, with the latter providing modest improvements. We then analyze topic bias in the models using topic information obtained by manual annotation. Our finding is that when detecting some types of decisions in our data, models rely more on topic specific words that decisions are about rather than on words that more generally indicate decision making. We further explore this by removing topic information from the train data. We show that this resolves the bias issues to an extent and, surprisingly, sometimes even boosts performance.</abstract>
@@ -772,7 +772,7 @@
       <title>Large-Scale Quantitative Evaluation of Dialogue Agents’ Response Strategies against Offensive Users</title>
       <author><first>Haojun</first><last>Li</last></author>
       <author><first>Dilara</first><last>Soylu</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <pages>556–561</pages>
       <abstract>As voice assistants and dialogue agents grow in popularity, so does the abuse they receive. We conducted a large-scale quantitative evaluation of the effectiveness of 4 response types (avoidance, why, empathetic, and counter), and 2 additional factors (using a redirect or a voluntarily provided name) that have not been tested by prior work. We measured their direct effectiveness on real users in-the-wild by the re-offense ratio, length of conversation after the initial response, and number of turns until the next re-offense. Our experiments confirm prior lab studies in showing that empathetic responses perform better than generic avoidance responses as well as counter responses. We show that dialogue agents should almost always guide offensive users to a new topic through the use of redirects and use the user’s name if provided. As compared to a baseline avoidance strategy employed by commercial agents, our best strategy is able to reduce the re-offense ratio from 92% to 43%.</abstract>
       <url hash="8fd5350b">2021.sigdial-1.58</url>
diff --git a/data/xml/2021.sigmorphon.xml b/data/xml/2021.sigmorphon.xml
index 8327bc0466..f0224269e2 100644
--- a/data/xml/2021.sigmorphon.xml
+++ b/data/xml/2021.sigmorphon.xml
@@ -90,7 +90,7 @@
       <author><first>Saujas</first><last>Vaduguru</last></author>
       <author><first>Aalok</first><last>Sathe</last></author>
       <author><first>Monojit</first><last>Choudhury</last></author>
-      <author><first>Dipti</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti</first><last>Sharma</last></author>
       <pages>60–71</pages>
       <abstract>Neural models excel at extracting statistical patterns from large amounts of data, but struggle to learn patterns or reason about language from only a few examples. In this paper, we ask: Can we learn explicit rules that generalize well from only a few examples? We explore this question using program synthesis. We develop a synthesis model to learn phonology rules as programs in a domain-specific language. We test the ability of our models to generalize from few training examples using our new dataset of problems from the Linguistics Olympiad, a challenging set of tasks that require strong linguistic reasoning ability. In addition to being highly sample-efficient, our approach generates human-readable programs, and allows control over the generalizability of the learnt programs.</abstract>
       <url hash="2e4fd440">2021.sigmorphon-1.7</url>
@@ -101,13 +101,13 @@
     <paper id="8">
       <title>Findings of the <fixed-case>SIGMORPHON</fixed-case> 2021 Shared Task on Unsupervised Morphological Paradigm Clustering</title>
       <author><first>Adam</first><last>Wiemerslage</last></author>
-      <author><first>Arya D.</first><last>McCarthy</last></author>
+      <author id="arya-d-mccarthy"><first>Arya D.</first><last>McCarthy</last></author>
       <author><first>Alexander</first><last>Erdmann</last></author>
       <author><first>Garrett</first><last>Nicolai</last></author>
       <author><first>Manex</first><last>Agirrezabal</last></author>
-      <author><first>Miikka</first><last>Silfverberg</last></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last></author>
       <author><first>Mans</first><last>Hulden</last></author>
-      <author><first>Katharina</first><last>Kann</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
       <pages>72–81</pages>
       <abstract>We describe the second SIGMORPHON shared task on unsupervised morphology: the goal of the SIGMORPHON 2021 Shared Task on Unsupervised Morphological Paradigm Clustering is to cluster word types from a raw text corpus into paradigms. To this end, we release corpora for 5 development and 9 test languages, as well as gold partial paradigms for evaluation. We receive 14 submissions from 4 teams that follow different strategies, and the best performing system is based on adaptor grammars. Results vary significantly across languages. However, all systems are outperformed by a supervised lemmatizer, implying that there is still room for improvement.</abstract>
       <url hash="994d226b">2021.sigmorphon-1.8</url>
@@ -117,7 +117,7 @@
     <paper id="9">
       <title><fixed-case>A</fixed-case>daptor <fixed-case>G</fixed-case>rammars for Unsupervised Paradigm Clustering</title>
       <author><first>Kate</first><last>McCurdy</last></author>
-      <author><first>Sharon</first><last>Goldwater</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
       <author><first>Adam</first><last>Lopez</last></author>
       <pages>82–89</pages>
       <abstract>This work describes the Edinburgh submission to the SIGMORPHON 2021 Shared Task 2 on unsupervised morphological paradigm clustering. Given raw text input, the task was to assign each token to a cluster with other tokens from the same paradigm. We use Adaptor Grammar segmentations combined with frequency-based heuristics to predict paradigm clusters. Our system achieved the highest average F1 score across 9 test languages, placing first out of 15 submissions.</abstract>
@@ -140,7 +140,7 @@
       <title>Unsupervised Paradigm Clustering Using Transformation Rules</title>
       <author><first>Changbing</first><last>Yang</last></author>
       <author><first>Garrett</first><last>Nicolai</last></author>
-      <author><first>Miikka</first><last>Silfverberg</last></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last></author>
       <pages>98–106</pages>
       <abstract>This paper describes the submission of the CU-UBC team for the SIGMORPHON 2021 Shared Task 2: Unsupervised morphological paradigm clustering. Our system generates paradigms using morphological transformation rules which are discovered from raw data. We experiment with two methods for discovering rules. Our first approach generates prefix and suffix transformations between similar strings. Secondly, we experiment with more general rules which can apply transformations inside the input strings in addition to prefix and suffix transformations. We find that the best overall performance is delivered by prefix and suffix rules but more general transformation rules perform better for languages with templatic morphology and very high morpheme-to-word ratios.</abstract>
       <url hash="dab58b0f">2021.sigmorphon-1.11</url>
@@ -151,7 +151,7 @@
       <title>Paradigm Clustering with Weighted Edit Distance</title>
       <author><first>Andrew</first><last>Gerlach</last></author>
       <author><first>Adam</first><last>Wiemerslage</last></author>
-      <author><first>Katharina</first><last>Kann</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
       <pages>107–114</pages>
       <abstract>This paper describes our system for the SIGMORPHON 2021 Shared Task on Unsupervised Morphological Paradigm Clustering, which asks participants to group inflected forms together according their underlying lemma without the aid of annotated training data. We employ agglomerative clustering to group word forms together using a metric that combines an orthographic distance and a semantic distance from word embeddings. We experiment with two variations of an edit distance-based model for quantifying orthographic distance, but, due to time constraints, our system does not improve over the shared task’s baseline system.</abstract>
       <url hash="de4425c3">2021.sigmorphon-1.12</url>
@@ -207,7 +207,7 @@
       <author><first>Vagrant</first><last>Gautam</last></author>
       <author><first>Wang Yau</first><last>Li</last></author>
       <author><first>Zafarullah</first><last>Mahmood</last></author>
-      <author><first>Fred</first><last>Mailhot</last></author>
+      <author id="frederic-mailhot"><first>Fred</first><last>Mailhot</last></author>
       <author><first>Shreekantha</first><last>Nadig</last></author>
       <author><first>Riqiang</first><last>Wang</last></author>
       <author><first>Nathan</first><last>Zhang</last></author>
@@ -264,7 +264,7 @@
       <title>An <fixed-case>FST</fixed-case> morphological analyzer for the Gitksan language</title>
       <author><first>Clarissa</first><last>Forbes</last></author>
       <author><first>Garrett</first><last>Nicolai</last></author>
-      <author><first>Miikka</first><last>Silfverberg</last></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last></author>
       <pages>188–197</pages>
       <abstract>This paper presents a finite-state morphological analyzer for the Gitksan language. The analyzer draws from a 1250-token Eastern dialect wordlist. It is based on finite-state technology and additionally includes two extensions which can provide analyses for out-of-vocabulary words: rules for generating predictable dialect variants, and a neural guesser component. The pre-neural analyzer, tested against interlinear-annotated texts from multiple dialects, achieves coverage of (75-81%), and maintains high precision (95-100%). The neural extension improves coverage at the cost of lowered precision.</abstract>
       <url hash="2e940c82">2021.sigmorphon-1.21</url>
@@ -275,9 +275,9 @@
     <paper id="22">
       <title>Comparative Error Analysis in Neural and Finite-state Models for Unsupervised Character-level Transduction</title>
       <author><first>Maria</first><last>Ryskina</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <author><first>Taylor</first><last>Berg-Kirkpatrick</last></author>
-      <author><first>Matthew R.</first><last>Gormley</last></author>
+      <author id="matthew-r-gormley"><first>Matthew R.</first><last>Gormley</last></author>
       <pages>198–211</pages>
       <abstract>Traditionally, character-level transduction problems have been solved with finite-state models designed to encode structural and linguistic knowledge of the underlying process, whereas recent approaches rely on the power and flexibility of sequence-to-sequence models with attention. Focusing on the less explored unsupervised learning scenario, we compare the two model classes side by side and find that they tend to make different types of errors even when achieving comparable performance. We analyze the distributions of different error classes using two unsupervised tasks as testbeds: converting informally romanized text into the native script of its language (for Russian, Arabic, and Kannada) and translating between a pair of closely related languages (Serbian and Bosnian). Finally, we investigate how combining finite-state and sequence-to-sequence models at decoding time affects the output quantitatively and qualitatively.</abstract>
       <url hash="4b3a22b2">2021.sigmorphon-1.22</url>
@@ -289,7 +289,7 @@
       <title>Finite-state Model of Shupamem Reduplication</title>
       <author><first>Magdalena</first><last>Markowska</last></author>
       <author><first>Jeffrey</first><last>Heinz</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>212–221</pages>
       <abstract>Shupamem, a language of Western Cameroon, is a tonal language which also exhibits the morpho-phonological process of full reduplication. This creates two challenges for finite-state model of its morpho-syntax and morphophonology: how to manage the full reduplication and the autosegmental nature of lexical tone. Dolatian and Heinz (2020) explain how 2-way finite-state transducers can model full reduplication without an exponential increase in states, and finite-state transducers with multiple tapes have been used to model autosegmental tiers, including tone (Wiebe, 1992; Dolatian and Rawski, 2020a). Here we synthesize 2-way finite-state transducers and multitape transducers, resulting in a finite-state formalism that subsumes both, to account for the full reduplicative processes in Shupamem which also affect tone.</abstract>
       <url hash="d14265db">2021.sigmorphon-1.23</url>
@@ -314,7 +314,7 @@
       <title><fixed-case>SIGMORPHON</fixed-case> 2021 Shared Task on Morphological Reinflection: Generalization Across Languages</title>
       <author><first>Tiago</first><last>Pimentel</last></author>
       <author><first>Maria</first><last>Ryskina</last></author>
-      <author><first>Sabrina J.</first><last>Mielke</last></author>
+      <author id="sabrina-j-mielke"><first>Sabrina J.</first><last>Mielke</last></author>
       <author><first>Shijie</first><last>Wu</last></author>
       <author><first>Eleanor</first><last>Chodroff</last></author>
       <author><first>Brian</first><last>Leonard</last></author>
@@ -327,7 +327,7 @@
       <author><first>Michael</first><last>Gasser</last></author>
       <author><first>William</first><last>Lane</last></author>
       <author><first>Matt</first><last>Coler</last></author>
-      <author><first>Arturo</first><last>Oncevay</last></author>
+      <author id="arturo-oncevay"><first>Arturo</first><last>Oncevay</last></author>
       <author><first>Jaime Rafael</first><last>Montoya Samame</last></author>
       <author><first>Gema Celeste</first><last>Silva Villegas</last></author>
       <author><first>Adam</first><last>Ek</last></author>
@@ -346,7 +346,7 @@
       <author><first>Aelita</first><last>Salchak</last></author>
       <author><first>Christopher</first><last>Straughn</last></author>
       <author><first>Zoey</first><last>Liu</last></author>
-      <author><first>Jonathan North</first><last>Washington</last></author>
+      <author id="jonathan-washington"><first>Jonathan North</first><last>Washington</last></author>
       <author><first>Duygu</first><last>Ataman</last></author>
       <author><first>Witold</first><last>Kieraś</last></author>
       <author><first>Marcin</first><last>Woliński</last></author>
@@ -354,11 +354,11 @@
       <author><first>Niklas</first><last>Stoehr</last></author>
       <author><first>Zahroh</first><last>Nuriah</last></author>
       <author><first>Shyam</first><last>Ratan</last></author>
-      <author><first>Francis M.</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis M.</first><last>Tyers</last></author>
       <author><first>Edoardo M.</first><last>Ponti</last></author>
       <author><first>Grant</first><last>Aiton</last></author>
       <author><first>Richard J.</first><last>Hatcher</last></author>
-      <author><first>Emily</first><last>Prud’hommeaux</last></author>
+      <author id="emily-prudhommeaux"><first>Emily</first><last>Prud’hommeaux</last></author>
       <author><first>Ritesh</first><last>Kumar</last></author>
       <author><first>Mans</first><last>Hulden</last></author>
       <author><first>Botond</first><last>Barta</last></author>
diff --git a/data/xml/2021.sigtyp.xml b/data/xml/2021.sigtyp.xml
index 82b3a5d070..d4ff8939c5 100644
--- a/data/xml/2021.sigtyp.xml
+++ b/data/xml/2021.sigtyp.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the Third Workshop on Computational Typology and Multilingual NLP</booktitle>
       <editor><first>Ekaterina</first><last>Vylomova</last></editor>
       <editor><first>Elizabeth</first><last>Salesky</last></editor>
-      <editor><first>Sabrina</first><last>Mielke</last></editor>
+      <editor id="sabrina-j-mielke"><first>Sabrina</first><last>Mielke</last></editor>
       <editor><first>Gabriella</first><last>Lapesa</last></editor>
       <editor><first>Ritesh</first><last>Kumar</last></editor>
       <editor><first>Harald</first><last>Hammarström</last></editor>
@@ -77,9 +77,9 @@
     </paper>
     <paper id="6">
       <title><fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et and Typology</title>
-      <author><first>Michael</first><last>Ellsworth</last></author>
-      <author><first>Collin</first><last>Baker</last></author>
-      <author><first>Miriam R. L.</first><last>Petruck</last></author>
+      <author id="michael-ellsworth"><first>Michael</first><last>Ellsworth</last></author>
+      <author id="collin-f-baker"><first>Collin</first><last>Baker</last></author>
+      <author id="miriam-r-l-petruck"><first>Miriam R. L.</first><last>Petruck</last></author>
       <pages>61–66</pages>
       <abstract>FrameNet and the Multilingual FrameNet project have produced multilingual semantic annotations of parallel texts that yield extremely fine-grained typological insights. Moreover, frame semantic annotation of a wide cross-section of languages would provide information on the limits of Frame Semantics (Fillmore 1982, Fillmore1985). Multilingual semantic annotation offers critical input for research on linguistic diversity and recurrent patterns in computational typology. Drawing on results from FrameNet annotation of parallel texts, this paper proposes frame semantic annotation as a new component to complement the state of the art in computational semantic typology.</abstract>
       <url hash="2eb4f5c4">2021.sigtyp-1.6</url>
@@ -89,7 +89,7 @@
     <paper id="7">
       <title>Family of Origin and Family of Choice: Massively Parallel Lexiconized Iterative Pretraining for Severely Low Resource Text-based Translation</title>
       <author><first>Zhong</first><last>Zhou</last></author>
-      <author><first>Alexander</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alexander</first><last>Waibel</last></author>
       <pages>67–80</pages>
       <abstract>We translate a closed text that is known in advance into a severely low resource language by leveraging massive source parallelism. In other words, given a text in 124 source languages, we translate it into a severely low resource language using only ∼1,000 lines of low resource data without any external help. Firstly, we propose a systematic method to rank and choose source languages that are close to the low resource language. We call the linguistic definition of language family Family of Origin (FAMO), and we call the empirical definition of higher-ranked languages using our metrics Family of Choice (FAMC). Secondly, we build an Iteratively Pretrained Multilingual Order-preserving Lexiconized Transformer (IPML) to train on ∼1,000 lines (∼3.5%) of low resource data. In order to translate named entities well, we build a massive lexicon table for 2,939 Bible named entities in 124 source languages, and include many that occur once and covers more than 66 severely low resource languages. Moreover, we also build a novel method of combining translations from different source languages into one. Using English as a hypothetical low resource language, we get a +23.9 BLEU increase over a multilingual baseline, and a +10.3 BLEU increase over our asymmetric baseline in the Bible dataset. We get a 42.8 BLEU score for Portuguese-English translation on the medical EMEA dataset. We also have good results for a real severely low resource Mayan language, Eastern Pokomchi.</abstract>
       <url hash="82b0ff4e">2021.sigtyp-1.7</url>
diff --git a/data/xml/2021.smm4h.xml b/data/xml/2021.smm4h.xml
index ce8520685b..272c23f773 100644
--- a/data/xml/2021.smm4h.xml
+++ b/data/xml/2021.smm4h.xml
@@ -13,12 +13,12 @@
       <editor><first>Salvador Lima</first><last>Lopez</last></editor>
       <editor><first>Ivan</first><last>Flores</last></editor>
       <editor><first>Karen</first><last>O'Connor</last></editor>
-      <editor><first>Davy</first><last>Weissenbacher</last></editor>
+      <editor id="davy-weissenbacher"><first>Davy</first><last>Weissenbacher</last></editor>
       <editor><first>Elena</first><last>Tutubalina</last></editor>
       <editor><first>Abeed</first><last>Sarker</last></editor>
       <editor><first>Juan M</first><last>Banda</last></editor>
-      <editor><first>Martin</first><last>Krallinger</last></editor>
-      <editor><first>Graciela</first><last>Gonzalez-Hernandez</last></editor>
+      <editor id="martin-krallinger"><first>Martin</first><last>Krallinger</last></editor>
+      <editor id="graciela-gonzalez"><first>Graciela</first><last>Gonzalez-Hernandez</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Mexico City, Mexico</address>
       <month>June</month>
@@ -45,7 +45,7 @@
     <paper id="2">
       <title>View Distillation with Unlabeled Data for Extracting Adverse Drug Effects from User-Generated Data</title>
       <author><first>Payam</first><last>Karisani</last></author>
-      <author><first>Jinho D.</first><last>Choi</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
       <author><first>Li</first><last>Xiong</last></author>
       <pages>7–12</pages>
       <abstract>We present an algorithm based on multi-layer transformers for identifying Adverse Drug Reactions (ADR) in social media data. Our model relies on the properties of the problem and the characteristics of contextual word embeddings to extract two views from documents. Then a classifier is trained on each view to label a set of unlabeled documents to be used as an initializer for a new classifier in the other view. Finally, the initialized classifier in each view is further trained using the initial training examples. We evaluated our model in the largest publicly available ADR dataset. The experiments testify that our model significantly outperforms the transformer-based models pretrained on domain-specific data.</abstract>
@@ -159,7 +159,7 @@
       <title><fixed-case>UACH</fixed-case>-<fixed-case>INAOE</fixed-case> at <fixed-case>SMM</fixed-case>4<fixed-case>H</fixed-case>: a <fixed-case>BERT</fixed-case> based approach for classification of <fixed-case>COVID</fixed-case>-19 <fixed-case>T</fixed-case>witter posts</title>
       <author><first>Alberto</first><last>Valdes</last></author>
       <author><first>Jesus</first><last>Lopez</last></author>
-      <author><first>Manuel</first><last>Montes</last></author>
+      <author id="manuel-montes"><first>Manuel</first><last>Montes</last></author>
       <pages>65–68</pages>
       <abstract>This work describes the participation of the Universidad Autónoma de Chihuahua - Instituto Nacional de Astrofísica, Óptica y Electrónica team at the Social Media Mining for Health Applications (SMM4H) 2021 shared task. Our team participated in task 5 and 6, both focused on the automatic classification of Twitter posts related to COVID-19. Task 5 was oriented on solving a binary classification problem, trying to identify self-reporting tweets of potential cases of COVID-19. Task 6 objective was to classify tweets containing COVID-19 symptoms. For both tasks we used models based on bidirectional encoder representations from transformers (BERT). Our objective was to determine if a model pretrained on a corpus in the domain of interest can outperform one trained on a much larger general domain corpus. Our F1 results were encouraging, 0.77 and 0.95 for task 5 and 6 respectively, having achieved the highest score among all the participants in the latter.</abstract>
       <url hash="2893b80e">2021.smm4h-1.10</url>
@@ -222,7 +222,7 @@
       <title><fixed-case>BERT</fixed-case> based Adverse Drug Effect Tweet Classification</title>
       <author><first>Tanay</first><last>Kayastha</last></author>
       <author><first>Pranjal</first><last>Gupta</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>88–90</pages>
       <abstract>This paper describes models developed for the Social Media Mining for Health (SMM4H) 2021 shared tasks. Our team participated in the first subtask that classifies tweets with Adverse Drug Effect (ADE) mentions. Our best performing model utilizes BERTweet followed by a single layer of BiLSTM. The system achieves an F-score of 0.45 on the test set without the use of any auxiliary resources such as Part-of-Speech tags, dependency tags, or knowledge from medical dictionaries.</abstract>
       <url hash="fdf60a29">2021.smm4h-1.15</url>
@@ -231,7 +231,7 @@
     </paper>
     <paper id="16">
       <title>A Joint Training Approach to Tweet Classification and Adverse Effect Extraction and Normalization for <fixed-case>SMM</fixed-case>4<fixed-case>H</fixed-case> 2021</title>
-      <author><first>Mohab</first><last>Elkaref</last></author>
+      <author id="mohab-el-karef"><first>Mohab</first><last>Elkaref</last></author>
       <author><first>Lamiece</first><last>Hassan</last></author>
       <pages>91–94</pages>
       <abstract>In this work we describe our submissions to the Social Media Mining for Health (SMM4H) 2021 Shared Task. We investigated the effectiveness of a joint training approach to Task 1, specifically classification, extraction and normalization of Adverse Drug Effect (ADE) mentions in English tweets. Our approach performed well on the normalization task, achieving an above average f1 score of 24%, but less so on classification and extraction, with f1 scores of 22% and 37% respectively. Our experiments also showed that a larger dataset with more negative results led to stronger results than a smaller more balanced dataset, even when both datasets have the same positive examples. Finally we also submitted a tuned BERT model for Task 6: Classification of Covid-19 tweets containing symptoms, which achieved an above average f1 score of 96%.</abstract>
@@ -288,7 +288,7 @@
       <title>Lasige-<fixed-case>B</fixed-case>io<fixed-case>TM</fixed-case> at <fixed-case>P</fixed-case>rof<fixed-case>NER</fixed-case>: <fixed-case>B</fixed-case>i<fixed-case>LSTM</fixed-case>-<fixed-case>CRF</fixed-case> and contextual <fixed-case>S</fixed-case>panish embeddings for Named Entity Recognition and Tweet Binary Classification</title>
       <author><first>Pedro</first><last>Ruas</last></author>
       <author><first>Vitor</first><last>Andrade</last></author>
-      <author><first>Francisco</first><last>Couto</last></author>
+      <author id="francisco-m-couto"><first>Francisco</first><last>Couto</last></author>
       <pages>108–111</pages>
       <abstract>The paper describes the participation of the Lasige-BioTM team at sub-tracks A and B of ProfNER, which was based on: i) a BiLSTM-CRF model that leverages contextual and classical word embeddings to recognize and classify the mentions, and ii) on a rule-based module to classify tweets. In the Evaluation phase, our model achieved a F1-score of 0.917 (0,031 more than the median) in sub-track A and a F1-score of 0.727 (0,034 less than the median) in sub-track B.</abstract>
       <url hash="ab88890f">2021.smm4h-1.21</url>
@@ -310,7 +310,7 @@
       <title><fixed-case>U</fixed-case>o<fixed-case>B</fixed-case> at <fixed-case>P</fixed-case>rof<fixed-case>NER</fixed-case> 2021: Data Augmentation for Classification Using Machine Translation</title>
       <author><first>Frances Adriana</first><last>Laureano De Leon</last></author>
       <author><first>Harish</first><last>Tayyar Madabushi</last></author>
-      <author><first>Mark</first><last>Lee</last></author>
+      <author id="mark-lee"><first>Mark</first><last>Lee</last></author>
       <pages>115–117</pages>
       <abstract>This paper describes the participation of the UoB-NLP team in the ProfNER-ST shared subtask 7a. The task was aimed at detecting the mention of professions in social media text. Our team experimented with two methods of improving the performance of pre-trained models: Specifically, we experimented with data augmentation through translation and the merging of multiple language inputs to meet the objective of the task. While the best performing model on the test data consisted of mBERT fine-tuned on augmented data using back-translation, the improvement is minor possibly because multi-lingual pre-trained models such as mBERT already have access to the kind of information provided through back-translation and bilingual data.</abstract>
       <url hash="a1cb734e">2021.smm4h-1.23</url>
@@ -375,7 +375,7 @@
     <paper id="29">
       <title>Classification of <fixed-case>COVID</fixed-case>19 tweets using Machine Learning Approaches</title>
       <author><first>Anupam</first><last>Mondal</last></author>
-      <author><first>Sainik</first><last>Mahata</last></author>
+      <author id="sainik-mahata"><first>Sainik</first><last>Mahata</last></author>
       <author><first>Monalisa</first><last>Dey</last></author>
       <author><first>Dipankar</first><last>Das</last></author>
       <pages>135–137</pages>
@@ -387,7 +387,7 @@
     <paper id="30">
       <title>Fine-tuning <fixed-case>BERT</fixed-case> to classify <fixed-case>COVID</fixed-case>19 tweets containing symptoms</title>
       <author><first>Rajarshi</first><last>Roychoudhury</last></author>
-      <author><first>Sudip</first><last>Naskar</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip</first><last>Naskar</last></author>
       <pages>138–140</pages>
       <abstract>Twitter is a valuable source of patient-generated data that has been used in various population health studies. The first step in many of these studies is to identify and capture Twitter messages (tweets) containing medication mentions. Identifying personal mentions of COVID19 symptoms requires distinguishing personal mentions from other mentions such as symptoms reported by others and references to news articles or other sources. In this article, we describe our submission to Task 6 of the Social Media Mining for Health Applications (SMM4H) Shared Task 2021. This task challenged participants to classify tweets where the target classes are:(1) self-reports,(2) non-personal reports, and (3) literature/news mentions. Our system used a handcrafted preprocessing and word embeddings from BERT encoder model. We achieved an F1 score of 93%</abstract>
       <url hash="bed17131">2021.smm4h-1.30</url>
@@ -399,8 +399,8 @@
       <author><first>Alberto</first><last>Mesa Murgado</last></author>
       <author><first>Ana</first><last>Parras Portillo</last></author>
       <author><first>Pilar</first><last>López Úbeda</last></author>
-      <author><first>Maite</first><last>Martin</last></author>
-      <author><first>Alfonso</first><last>Ureña-López</last></author>
+      <author id="m-teresa-martin-valdivia"><first>Maite</first><last>Martin</last></author>
+      <author id="l-alfonso-urena-lopez"><first>Alfonso</first><last>Ureña-López</last></author>
       <pages>141–145</pages>
       <abstract>This paper describes the entry of the research group SINAI at SMM4H’s ProfNER task on the identification of professions and occupations in social media related with health. Specifically we have participated in Task 7a: Tweet Binary Classification to determine whether a tweet contains mentions of occupations or not, as well as in Task 7b: NER Offset Detection and Classification aimed at predicting occupations mentions and classify them discriminating by professions and working statuses.</abstract>
       <url hash="ffbab33f">2021.smm4h-1.31</url>
@@ -420,11 +420,11 @@
     </paper>
     <paper id="33">
       <title><fixed-case>ULD</fixed-case>-<fixed-case>NUIG</fixed-case> at Social Media Mining for Health Applications (#<fixed-case>SMM</fixed-case>4<fixed-case>H</fixed-case>) Shared Task 2021</title>
-      <author><first>Atul Kr.</first><last>Ojha</last></author>
+      <author id="atul-kr-ojha"><first>Atul Kr.</first><last>Ojha</last></author>
       <author><first>Priya</first><last>Rani</last></author>
       <author><first>Koustava</first><last>Goswami</last></author>
       <author><first>Bharathi Raja</first><last>Chakravarthi</last></author>
-      <author><first>John P.</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></author>
       <pages>149–152</pages>
       <abstract>Social media platforms such as Twitter and Facebook have been utilised for various research studies, from the cohort-level discussion to community-driven approaches to address the challenges in utilizing social media data for health, clinical and biomedical information. Detection of medical jargon’s, named entity recognition, multi-word expression becomes the primary, fundamental steps in solving those challenges. In this paper, we enumerate the ULD-NUIG team’s system, designed as part of Social Media Mining for Health Applications (#SMM4H) Shared Task 2021. The team conducted a series of experiments to explore the challenges of task 6 and task 5. The submitted systems achieve F-1 0.84 and 0.53 score for task 6 and 5 respectively.</abstract>
       <url hash="fe4893d5">2021.smm4h-1.33</url>
diff --git a/data/xml/2021.smp.xml b/data/xml/2021.smp.xml
index 4b533fe6a1..6377499416 100644
--- a/data/xml/2021.smp.xml
+++ b/data/xml/2021.smp.xml
@@ -28,8 +28,8 @@
     <paper id="2">
       <title>Prosody Labelled Dataset for <fixed-case>H</fixed-case>indi</title>
       <author><first>Esha</first><last>Banerjee</last></author>
-      <author><first>Atul Kr.</first><last>Ojha</last></author>
-      <author><first>Girish</first><last>Jha</last></author>
+      <author id="atul-kr-ojha"><first>Atul Kr.</first><last>Ojha</last></author>
+      <author id="girish-nath-jha"><first>Girish</first><last>Jha</last></author>
       <pages>14–19</pages>
       <abstract>This study aims to develop an intonation labelled database for Hindi, for enhancing prosody in ASR and TTS systems, which is also helpful for building Speech to Speech Machine Translation systems. Although no single standard for prosody labelling exists in Hindi, researchers in the past have employed perceptual and statistical methods in literature to draw inferences about the behaviour of prosody patterns in Hindi. Based on such existing research and largely agreed upon intonational theories in Hindi, this study attempts to develop a manually annotated prosodic corpus of Hindi speech data, which can be used for training speech models for natural-sounding speech in the future. 500 sentences (2,550 words) for declarative and interrogative types have been labelled using Praat.</abstract>
       <url hash="400a8d93">2021.smp-1.2</url>
diff --git a/data/xml/2021.socialnlp.xml b/data/xml/2021.socialnlp.xml
index 2b10cdab8a..73d368f977 100644
--- a/data/xml/2021.socialnlp.xml
+++ b/data/xml/2021.socialnlp.xml
@@ -47,7 +47,7 @@
       <author><first>Kazuma</first><last>Murao</last></author>
       <author><first>Takeshi</first><last>Masuyama</last></author>
       <author><first>Taichi</first><last>Yatsuka</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <author><first>Satoshi</first><last>Sekine</last></author>
       <pages>24–35</pages>
       <abstract>Ranking the user comments posted on a news article is important for online news services because comment visibility directly affects the user experience. Research on ranking comments with different metrics to measure the comment quality has shown “constructiveness” used in argument analysis is promising from a practical standpoint. In this paper, we report a case study in which this constructiveness is examined in the real world. Specifically, we examine an in-house competition to improve the performance of ranking constructive comments and demonstrate the effectiveness of the best obtained model for a commercial service.</abstract>
@@ -119,7 +119,7 @@
       <title>Self-Contextualized Attention for Abusive Language Identification</title>
       <author><first>Horacio</first><last>Jarquín-Vásquez</last></author>
       <author><first>Hugo Jair</first><last>Escalante</last></author>
-      <author><first>Manuel</first><last>Montes</last></author>
+      <author id="manuel-montes"><first>Manuel</first><last>Montes</last></author>
       <pages>103–112</pages>
       <abstract>The use of attention mechanisms in deep learning approaches has become popular in natural language processing due to its outstanding performance. The use of these mechanisms allows one managing the importance of the elements of a sequence in accordance to their context, however, this importance has been observed independently between the pairs of elements of a sequence (self-attention) and between the application domain of a sequence (contextual attention), leading to the loss of relevant information and limiting the representation of the sequences. To tackle these particular issues we propose the self-contextualized attention mechanism, which trades off the previous limitations, by considering the internal and contextual relationships between the elements of a sequence. The proposed mechanism was evaluated in four standard collections for the abusive language identification task achieving encouraging results. It outperformed the current attention mechanisms and showed a competitive performance with respect to state-of-the-art approaches.</abstract>
       <url hash="31c0034d">2021.socialnlp-1.9</url>
@@ -152,7 +152,7 @@
     <paper id="12">
       <title><fixed-case>PANDORA</fixed-case> Talks: Personality and Demographics on <fixed-case>R</fixed-case>eddit</title>
       <author><first>Matej</first><last>Gjurković</last></author>
-      <author><first>Vanja Mladen</first><last>Karan</last></author>
+      <author id="vanja-m-karan"><first>Vanja Mladen</first><last>Karan</last></author>
       <author><first>Iva</first><last>Vukojević</last></author>
       <author><first>Mihaela</first><last>Bošnjak</last></author>
       <author><first>Jan</first><last>Snajder</last></author>
@@ -168,7 +168,7 @@
       <author><first>Xueming</first><last>Xu</last></author>
       <author><first>Yiwei</first><last>Zhang</last></author>
       <author><first>Ian</first><last>Stewart</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>153–162</pages>
       <abstract>Many people aim for change, but not everyone succeeds. While there are a number of social psychology theories that propose motivation-related characteristics of those who persist with change, few computational studies have explored the motivational stage of personal change. In this paper, we investigate a new dataset consisting of the writings of people who manifest intention to change, some of whom persist while others do not. Using a variety of linguistic analysis techniques, we first examine the writing patterns that distinguish the two groups of people. Persistent people tend to reference more topics related to long-term self-improvement and use a more complicated writing style. Drawing on these consistent differences, we build a classifier that can reliably identify the people more likely to persist, based on their language. Our experiments provide new insights into the motivation-related behavior of people who persist with their intention to change.</abstract>
       <url hash="b1cfcb24">2021.socialnlp-1.13</url>
diff --git a/data/xml/2021.splurobonlp.xml b/data/xml/2021.splurobonlp.xml
index 8895c4dceb..bd820f6b2e 100644
--- a/data/xml/2021.splurobonlp.xml
+++ b/data/xml/2021.splurobonlp.xml
@@ -34,7 +34,7 @@
       <author><first>Miltiadis Marios</first><last>Katsakioris</last></author>
       <author><first>Ioannis</first><last>Konstas</last></author>
       <author><first>Pierre Yves</first><last>Mignotte</last></author>
-      <author><first>Helen</first><last>Hastie</last></author>
+      <author id="helen-hastie"><first>Helen</first><last>Hastie</last></author>
       <pages>11–21</pages>
       <abstract>Robust situated dialog requires the ability to process instructions based on spatial information, which may or may not be available. We propose a model, based on LXMERT, that can extract spatial information from text instructions and attend to landmarks on OpenStreetMap (OSM) referred to in a natural language instruction. Whilst, OSM is a valuable resource, as with any open-sourced data, there is noise and variation in the names referred to on the map, as well as, variation in natural language instructions, hence the need for data-driven methods over rule-based systems. This paper demonstrates that the gold GPS location can be accurately predicted from the natural language instruction and metadata with 72% accuracy for previously seen maps and 64% for unseen maps.</abstract>
       <url hash="7cd08afd">2021.splurobonlp-1.2</url>
@@ -46,7 +46,7 @@
       <author><first>Tianai</first><last>Dong</last></author>
       <author><first>Alberto</first><last>Testoni</last></author>
       <author><first>Luciana</first><last>Benotti</last></author>
-      <author><first>Raffaella</first><last>Bernardi</last></author>
+      <author id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last></author>
       <pages>22–31</pages>
       <abstract>In this paper, we define and evaluate a methodology for extracting history-dependent spatial questions from visual dialogues. We say that a question is history-dependent if it requires (parts of) its dialogue history to be interpreted. We argue that some kinds of visual questions define a context upon which a follow-up spatial question relies. We call the question that restricts the context: trigger, and we call the spatial question that requires the trigger question to be answered: zoomer. We automatically extract different trigger and zoomer pairs based on the visual property that the questions rely on (e.g. color, number). We manually annotate the automatically extracted trigger and zoomer pairs to verify which zoomers require their trigger. We implement a simple baseline architecture based on a SOTA multimodal encoder. Our results reveal that there is much room for improvement for answering history-dependent questions.</abstract>
       <url hash="6ab7b354">2021.splurobonlp-1.3</url>
@@ -61,7 +61,7 @@
       <author><first>Haoyu</first><last>Wu</last></author>
       <author><first>Jonathan</first><last>Waxman</last></author>
       <author><first>Marcus</first><last>Hill</last></author>
-      <author><first>Lenhart</first><last>Schubert</last></author>
+      <author id="lenhart-schubert"><first>Lenhart</first><last>Schubert</last></author>
       <pages>32–41</pages>
       <abstract>Understanding spatial expressions and using them appropriately is necessary for seamless and natural human-machine interaction. However, capturing the semantics and appropriate usage of spatial prepositions is notoriously difficult, because of their vagueness and polysemy. Although modern data-driven approaches are good at capturing statistical regularities in the usage, they usually require substantial sample sizes, often do not generalize well to unseen instances and, most importantly, their structure is essentially opaque to analysis, which makes diagnosing problems and understanding their reasoning process difficult. In this work, we discuss our attempt at modeling spatial senses of prepositions in English using a combination of rule-based and statistical learning approaches. Each preposition model is implemented as a tree where each node computes certain intuitive relations associated with the preposition, with the root computing the final value of the prepositional relation itself. The models operate on a set of artificial 3D “room world” environments, designed in Blender, taking the scene itself as an input. We also discuss our annotation framework used to collect human judgments employed in the model training. Both our factored models and black-box baseline models perform quite well, but the factored models will enable reasoned explanations of spatial relation judgements.</abstract>
       <url hash="20abfb4e">2021.splurobonlp-1.4</url>
@@ -103,7 +103,7 @@
       <title>Interactive Reinforcement Learning for Table Balancing Robot</title>
       <author><first>Haein</first><last>Jeon</last></author>
       <author><first>Yewon</first><last>Kim</last></author>
-      <author><first>Bo-Yeong</first><last>Kang</last></author>
+      <author id="bo-yeong-kang"><first>Bo-Yeong</first><last>Kang</last></author>
       <pages>71–78</pages>
       <abstract>With the development of robotics, the use of robots in daily life is increasing, which has led to the need for anyone to easily train robots to improve robot use. Interactive reinforcement learning(IARL) is a method for robot training based on human–robot interaction; prior studies on IARL provide only limited types of feedback or require appropriately designed shaping rewards, which is known to be difficult and time-consuming. Therefore, in this study, we propose interactive deep reinforcement learning models based on voice feedback. In the proposed system, a robot learns the task of cooperative table balancing through deep Q-network using voice feedback provided by humans in real-time, with automatic speech recognition(ASR) and sentiment analysis to understand human voice feedback. As a result, an optimal policy convergence rate of up to 96% was realized, and performance was improved in all voice feedback-based models</abstract>
       <url hash="f5793e00">2021.splurobonlp-1.8</url>
diff --git a/data/xml/2021.spnlp.xml b/data/xml/2021.spnlp.xml
index 1e0ce7f9fb..87c1972208 100644
--- a/data/xml/2021.spnlp.xml
+++ b/data/xml/2021.spnlp.xml
@@ -7,7 +7,7 @@
       <editor><first>Sujith</first><last>Ravi</last></editor>
       <editor><first>Andreas</first><last>Vlachos</last></editor>
       <editor><first>Priyanka</first><last>Agrawal</last></editor>
-      <editor><first>André</first><last>Martins</last></editor>
+      <editor id="andre-f-t-martins"><first>André</first><last>Martins</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Online</address>
       <month>August</month>
@@ -81,12 +81,12 @@
     <paper id="6">
       <title>Using Hierarchical Class Structure to Improve Fine-Grained Claim Classification</title>
       <author><first>Erenay</first><last>Dayanik</last></author>
-      <author><first>Andre</first><last>Blessing</last></author>
+      <author id="andre-blessing"><first>Andre</first><last>Blessing</last></author>
       <author><first>Nico</first><last>Blokker</last></author>
       <author><first>Sebastian</first><last>Haunss</last></author>
       <author><first>Jonas</first><last>Kuhn</last></author>
       <author><first>Gabriella</first><last>Lapesa</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <pages>53–60</pages>
       <abstract>The analysis of public debates crucially requires the classification of political demands according to hierarchical <i>claim ontologies</i> (e.g. for immigration, a supercategory “Controlling Migration” might have subcategories “Asylum limit” or “Border installations”). A major challenge for automatic claim classification is the large number and low frequency of such subclasses. We address it by jointly predicting pairs of matching super- and subcategories. We operationalize this idea by (a) encoding soft constraints in the claim classifier and (b) imposing hard constraints via Integer Linear Programming. Our experiments with different claim classifiers on a German immigration newspaper corpus show consistent performance increases for joint prediction, in particular for infrequent categories and discuss the complementarity of the two approaches.</abstract>
       <url hash="2c230191">2021.spnlp-1.6</url>
@@ -98,7 +98,7 @@
       <author><first>Chenyang</first><last>Huang</last></author>
       <author><first>Wei</first><last>Yang</last></author>
       <author><first>Yanshuai</first><last>Cao</last></author>
-      <author><first>Osmar</first><last>Zaïane</last></author>
+      <author id="osmar-r-zaiane"><first>Osmar</first><last>Zaïane</last></author>
       <author><first>Lili</first><last>Mou</last></author>
       <pages>61–66</pages>
       <abstract>In this paper, we propose a globally normalized model for context-free grammar (CFG)-based semantic parsing. Instead of predicting a probability, our model predicts a real-valued score at each step and does not suffer from the label bias problem. Experiments show that our approach outperforms locally normalized models on small datasets, but it does not yield improvement on a large dataset.</abstract>
@@ -110,7 +110,7 @@
       <title>Comparing Span Extraction Methods for Semantic Role Labeling</title>
       <author><first>Zhisong</first><last>Zhang</last></author>
       <author><first>Emma</first><last>Strubell</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>67–77</pages>
       <abstract>In this work, we empirically compare span extraction methods for the task of semantic role labeling (SRL). While recent progress incorporating pre-trained contextualized representations into neural encoders has greatly improved SRL F1 performance on popular benchmarks, the potential costs and benefits of structured decoding in these models have become less clear. With extensive experiments on PropBank SRL datasets, we find that more structured decoding methods outperform BIO-tagging when using static (word type) embeddings across all experimental settings. However, when used in conjunction with pre-trained contextualized word representations, the benefits are diminished. We also experiment in cross-genre and cross-lingual settings and find similar trends. We further perform speed comparisons and provide analysis on the accuracy-efficiency trade-offs among different decoding methods.</abstract>
       <url hash="6daa5813">2021.spnlp-1.8</url>
diff --git a/data/xml/2021.starsem.xml b/data/xml/2021.starsem.xml
index 372d992a2f..212332c06c 100644
--- a/data/xml/2021.starsem.xml
+++ b/data/xml/2021.starsem.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of *SEM 2021: The Tenth Joint Conference on Lexical and Computational Semantics</booktitle>
       <editor><first>Lun-Wei</first><last>Ku</last></editor>
-      <editor><first>Vivi</first><last>Nastase</last></editor>
+      <editor id="vivi-nastase"><first>Vivi</first><last>Nastase</last></editor>
       <editor><first>Ivan</first><last>Vulić</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Online</address>
@@ -46,8 +46,8 @@
     <paper id="3">
       <title>Semantic shift in social networks</title>
       <author><first>Bill</first><last>Noble</last></author>
-      <author><first>Asad</first><last>Sayeed</last></author>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="asad-sayeed"><first>Asad</first><last>Sayeed</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <author><first>Staffan</first><last>Larsson</last></author>
       <pages>26–37</pages>
       <abstract>Just as the meaning of words is tied to the communities in which they are used, so too is semantic change. But how does lexical semantic change manifest differently across different communities? In this work, we investigate the relationship between community structure and semantic change in 45 communities from the social media website Reddit. We use distributional methods to quantify lexical semantic change and induce a social network on communities, based on interactions between members. We explore the relationship between semantic change and the clustering coefficient of a community’s social network graph, as well as community size and stability. While none of these factors are found to be significant on their own, we report a significant effect of their three-way interaction. We also report on significant word-level effects of frequency and change in frequency, which replicate previous findings.</abstract>
@@ -74,7 +74,7 @@
       <title>Recovering Lexically and Semantically Reused Texts</title>
       <author><first>Ansel</first><last>MacLaughlin</last></author>
       <author><first>Shaobin</first><last>Xu</last></author>
-      <author><first>David A.</first><last>Smith</last></author>
+      <author id="david-a-smith"><first>David A.</first><last>Smith</last></author>
       <pages>52–66</pages>
       <abstract>Writers often repurpose material from existing texts when composing new documents. Because most documents have more than one source, we cannot trace these connections using only models of document-level similarity. Instead, this paper considers methods for local text reuse detection (LTRD), detecting localized regions of lexically or semantically similar text embedded in otherwise unrelated material. In extensive experiments, we study the relative performance of four classes of neural and bag-of-words models on three LTRD tasks – detecting plagiarism, modeling journalists’ use of press releases, and identifying scientists’ citation of earlier papers. We conduct evaluations on three existing datasets and a new, publicly-available citation localization dataset. Our findings shed light on a number of previously-unexplored questions in the study of LTRD, including the importance of incorporating document-level context for predictions, the applicability of of-the-shelf neural models pretrained on “general” semantic textual similarity tasks such as paraphrase detection, and the trade-offs between more efficient bag-of-words and feature-based neural models and slower pairwise neural models.</abstract>
       <url hash="b8d8d726">2021.starsem-1.5</url>
@@ -95,7 +95,7 @@
       <title><fixed-case>N</fixed-case>eural<fixed-case>L</fixed-case>og: Natural Language Inference with Joint Neural and Logical Reasoning</title>
       <author><first>Zeming</first><last>Chen</last></author>
       <author><first>Qiyue</first><last>Gao</last></author>
-      <author><first>Lawrence S.</first><last>Moss</last></author>
+      <author id="lawrence-s-moss"><first>Lawrence S.</first><last>Moss</last></author>
       <pages>78–88</pages>
       <abstract>Deep learning (DL) based language models achieve high performance on various benchmarks for Natural Language Inference (NLI). And at this time, symbolic approaches to NLI are receiving less attention. Both approaches (symbolic and DL) have their advantages and weaknesses. However, currently, no method combines them in a system to solve the task of NLI. To merge symbolic and deep learning methods, we propose an inference framework called NeuralLog, which utilizes both a monotonicity-based logical inference engine and a neural network language model for phrase alignment. Our framework models the NLI task as a classic search problem and uses the beam search algorithm to search for optimal inference paths. Experiments show that our joint logic and neural inference system improves accuracy on the NLI task and can achieve state-of-art accuracy on the SICK and MED datasets.</abstract>
       <url hash="a7cff659">2021.starsem-1.7</url>
@@ -120,7 +120,7 @@
       <author><first>Mahsa</first><last>Ghaderan</last></author>
       <author><first>Amin</first><last>Pourdabiri</last></author>
       <author><first>Zahra</first><last>Sayedi</last></author>
-      <author><first>Behrouz</first><last>Minaei-Bidgoli</last></author>
+      <author id="behrouz-minaei-bidgoli"><first>Behrouz</first><last>Minaei-Bidgoli</last></author>
       <author><first>Sauleh</first><last>Eetemadi</last></author>
       <author><first>Mohammad Taher</first><last>Pilehvar</last></author>
       <pages>99–104</pages>
@@ -133,7 +133,7 @@
     <paper id="10">
       <title><fixed-case>B</fixed-case>i<fixed-case>Q</fixed-case>u<fixed-case>AD</fixed-case>: Towards <fixed-case>QA</fixed-case> based on deeper text understanding</title>
       <author><first>Frank</first><last>Grimm</last></author>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <pages>105–115</pages>
       <abstract>Recent question answering and machine reading benchmarks frequently reduce the task to one of pinpointing spans within a certain text passage that answers the given question. Typically, these systems are not required to actually understand the text on a deeper level that allows for more complex reasoning on the information contained. We introduce a new dataset called BiQuAD that requires deeper comprehension in order to answer questions in both extractive and deductive fashion. The dataset consist of 4,190 closed-domain texts and a total of 99,149 question-answer pairs. The texts are synthetically generated soccer match reports that verbalize the main events of each match. All texts are accompanied by a structured Datalog program that represents a (logical) model of its information. We show that state-of-the-art QA models do not perform well on the challenging long form contexts and reasoning requirements posed by the dataset. In particular, transformer based state-of-the-art models achieve F1-scores of only 39.0. We demonstrate how these synthetic datasets align structured knowledge with natural text and aid model introspection when approaching complex text understanding.</abstract>
       <url hash="1cb2ee65">2021.starsem-1.10</url>
@@ -144,7 +144,7 @@
       <title>Evaluating <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependency Parser Recovery of Predicate Argument Structure via <fixed-case>C</fixed-case>omp<fixed-case>C</fixed-case>hain Analysis</title>
       <author><first>Sagar</first><last>Indurkhya</last></author>
       <author><first>Beracah</first><last>Yankama</last></author>
-      <author><first>Robert C.</first><last>Berwick</last></author>
+      <author id="robert-c-berwick"><first>Robert C.</first><last>Berwick</last></author>
       <pages>116–128</pages>
       <abstract>Accurate recovery of predicate-argument structure from a Universal Dependency (UD) parse is central to downstream tasks such as extraction of semantic roles or event representations. This study introduces compchains, a categorization of the hierarchy of predicate dependency relations present within a UD parse. Accuracy of compchain classification serves as a proxy for measuring accurate recovery of predicate-argument structure from sentences with embedding. We analyzed the distribution of compchains in three UD English treebanks, EWT, GUM and LinES, revealing that these treebanks are sparse with respect to sentences with predicate-argument structure that includes predicate-argument embedding. We evaluated the CoNLL 2018 Shared Task UDPipe (v1.2) baseline (dependency parsing) models as compchain classifiers for the EWT, GUMS and LinES UD treebanks. Our results indicate that these three baseline models exhibit poorer performance on sentences with predicate-argument structure with more than one level of embedding; we used compchains to characterize the errors made by these parsers and present examples of erroneous parses produced by the parser that were identified using compchains. We also analyzed the distribution of compchains in 58 non-English UD treebanks and then used compchains to evaluate the CoNLL’18 Shared Task baseline model for each of these treebanks. Our analysis shows that performance with respect to compchain classification is only weakly correlated with the official evaluation metrics (LAS, MLAS and BLEX). We identify gaps in the distribution of compchains in several of the UD treebanks, thus providing a roadmap for how these treebanks may be supplemented. We conclude by discussing how compchains provide a new perspective on the sparsity of training data for UD parsers, as well as the accuracy of the resulting UD parses.</abstract>
       <url hash="a48eab5f">2021.starsem-1.11</url>
@@ -182,7 +182,7 @@
       <author><first>Duccio</first><last>Pappadopulo</last></author>
       <author><first>Lisa</first><last>Bauer</last></author>
       <author><first>Marco</first><last>Farina</last></author>
-      <author><first>Ozan</first><last>İrsoy</last></author>
+      <author id="ozan-irsoy"><first>Ozan</first><last>İrsoy</last></author>
       <author><first>Mohit</first><last>Bansal</last></author>
       <pages>152–159</pages>
       <abstract>Many modern messaging systems allow fast and synchronous textual communication among many users. The resulting sequence of messages hides a more complicated structure in which independent sub-conversations are interwoven with one another. This poses a challenge for any task aiming to understand the content of the chat logs or gather information from them. The ability to disentangle these conversations is then tantamount to the success of many downstream tasks such as summarization and question answering. Structured information accompanying the text such as user turn, user mentions, timestamps, is used as a cue by the participants themselves who need to follow the conversation and has been shown to be important for disentanglement. DAG-LSTMs, a generalization of Tree-LSTMs that can handle directed acyclic dependencies, are a natural way to incorporate such information and its non-sequential nature. In this paper, we apply DAG-LSTMs to the conversation disentanglement task. We perform our experiments on the Ubuntu IRC dataset. We show that the novel model we propose achieves state of the art status on the task of recovering reply-to relations and it is competitive on other disentanglement metrics.</abstract>
@@ -193,7 +193,7 @@
     <paper id="15">
       <title>Toward Diverse Precondition Generation</title>
       <author><first>Heeyoung</first><last>Kwon</last></author>
-      <author><first>Nathanael</first><last>Chambers</last></author>
+      <author id="nathanael-chambers"><first>Nathanael</first><last>Chambers</last></author>
       <author><first>Niranjan</first><last>Balasubramanian</last></author>
       <pages>160–172</pages>
       <abstract>A typical goal for language understanding is to logically connect the events of a discourse, but often connective events are not described due to their commonsense nature. In order to address this deficit, we focus here on generating precondition events. Precondition generation can be framed as a sequence-to-sequence problem: given a target event, generate a possible precondition. However, in most real-world scenarios, an event can have several preconditions, which is not always suitable for standard seq2seq frameworks. We propose DiP, the Diverse Precondition generation system that can generate unique and diverse preconditions. DiP consists of three stages of the generative process – an event sampler, a candidate generator, and a post-processor. The event sampler provides control codes (precondition triggers) which the candidate generator uses to focus its generation. Post-processing further improves the results through re-ranking and filtering. Unlike other conditional generation systems, DiP automatically generates control codes without training on diverse examples. Analysis reveals that DiP improves the diversity of preconditions significantly compared to a beam search baseline. Also, manual evaluation shows that DiP generates more preconditions than a strong nucleus sampling baseline.</abstract>
@@ -246,7 +246,7 @@
     <paper id="20">
       <title>Dependency Patterns of Complex Sentences and Semantic Disambiguation for <fixed-case>A</fixed-case>bstract <fixed-case>M</fixed-case>eaning <fixed-case>R</fixed-case>epresentation Parsing</title>
       <author><first>Yuki</first><last>Yamamoto</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <author><first>Taro</first><last>Watanabe</last></author>
       <pages>212–221</pages>
       <abstract>Abstract Meaning Representation (AMR) is a sentence-level meaning representation based on predicate argument structure. One of the challenges we find in AMR parsing is to capture the structure of complex sentences which expresses the relation between predicates. Knowing the core part of the sentence structure in advance may be beneficial in such a task. In this paper, we present a list of dependency patterns for English complex sentence constructions designed for AMR parsing. With a dedicated pattern matcher, all occurrences of complex sentence constructions are retrieved from an input sentence. While some of the subordinators have semantic ambiguities, we deal with this problem through training classification models on data derived from AMR and Wikipedia corpus, establishing a new baseline for future works. The developed complex sentence patterns and the corresponding AMR descriptions will be made public.</abstract>
@@ -257,7 +257,7 @@
     <paper id="21">
       <title>Neural Metaphor Detection with Visibility Embeddings</title>
       <author><first>Gitit</first><last>Kehat</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <pages>222–228</pages>
       <abstract>We present new results for the problem of sequence metaphor labeling, using the recently developed Visibility Embeddings. We show that concatenating such embeddings to the input of a BiLSTM obtains consistent and significant improvements at almost no cost, and we present further improved results when visibility embeddings are combined with BERT.</abstract>
       <url hash="90afa474">2021.starsem-1.21</url>
@@ -282,7 +282,7 @@
       <author><first>Dominik</first><last>Schlechtweg</last></author>
       <author><first>Enrique</first><last>Castaneda</last></author>
       <author><first>Jonas</first><last>Kuhn</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>241–251</pages>
       <abstract>We suggest to model human-annotated Word Usage Graphs capturing fine-grained semantic proximity distinctions between word uses with a Bayesian formulation of the Weighted Stochastic Block Model, a generative model for random graphs popular in biology, physics and social sciences. By providing a probabilistic model of graded word meaning we aim to approach the slippery and yet widely used notion of word sense in a novel way. The proposed framework enables us to rigorously compare models of word senses with respect to their fit to the data. We perform extensive experiments and select the empirically most adequate model.</abstract>
       <url hash="b5096b44">2021.starsem-1.23</url>
@@ -295,7 +295,7 @@
       <author><first>Julia</first><last>Bettinger</last></author>
       <author><first>Michael</first><last>Dorna</last></author>
       <author><first>Jonas</first><last>Kuhn</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>252–262</pages>
       <abstract>Predicting the difficulty of domain-specific vocabulary is an important task towards a better understanding of a domain, and to enhance the communication between lay people and experts. We investigate German closed noun compounds and focus on the interaction of compound-based lexical features (such as frequency and productivity) and terminology-based features (contrasting domain-specific and general language) across word representations and classifiers. Our prediction experiments complement insights from classification using (a) manually designed features to characterise termhood and compound formation and (b) compound and constituent word embeddings. We find that for a broad binary distinction into ‘easy’ vs. ‘difficult’ general-language compound frequency is sufficient, but for a more fine-grained four-class distinction it is crucial to include contrastive termhood features and compound and constituent features.</abstract>
       <url hash="dccad101">2021.starsem-1.24</url>
@@ -306,7 +306,7 @@
       <title>Spurious Correlations in Cross-Topic Argument Mining</title>
       <author><first>Terne Sasha</first><last>Thorn Jakobsen</last></author>
       <author><first>Maria</first><last>Barrett</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>263–277</pages>
       <abstract>Recent work in cross-topic argument mining attempts to learn models that generalise across topics rather than merely relying on within-topic spurious correlations. We examine the effectiveness of this approach by analysing the output of single-task and multi-task models for cross-topic argument mining, through a combination of linear approximations of their decision boundaries, manual feature grouping, challenge examples, and ablations across the input vocabulary. Surprisingly, we show that cross-topic models still rely mostly on spurious correlations and only generalise within closely related topics, e.g., a model trained only on closed-class words and a few common open-class words outperforms a state-of-the-art cross-topic model on distant target topics.</abstract>
       <url hash="e26daf8d">2021.starsem-1.25</url>
diff --git a/data/xml/2021.sustainlp.xml b/data/xml/2021.sustainlp.xml
index 3d3b21443f..0d51ba3eab 100644
--- a/data/xml/2021.sustainlp.xml
+++ b/data/xml/2021.sustainlp.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the Second Workshop on Simple and Efficient Natural Language Processing</booktitle>
       <publisher>Association for Computational Linguistics</publisher>
-      <editor><first>Nafise Sadat</first><last>Moosavi</last></editor>
+      <editor id="nafise-sadat-moosavi"><first>Nafise Sadat</first><last>Moosavi</last></editor>
       <editor><first>Iryna</first><last>Gurevych</last></editor>
       <editor><first>Angela</first><last>Fan</last></editor>
       <editor><first>Thomas</first><last>Wolf</last></editor>
@@ -38,7 +38,7 @@
       <title>Evaluating the carbon footprint of <fixed-case>NLP</fixed-case> methods: a survey and analysis of existing tools</title>
       <author><first>Nesrine</first><last>Bannour</last></author>
       <author><first>Sahar</first><last>Ghannay</last></author>
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <author><first>Anne-Laure</first><last>Ligozat</last></author>
       <pages>11–21</pages>
       <abstract>Modern Natural Language Processing (NLP) makes intensive use of deep learning methods because of the accuracy they offer for a variety of applications. Due to the significant environmental impact of deep learning, cost-benefit analysis including carbon footprint as well as accuracy measures has been suggested to better document the use of NLP methods for research or deployment. In this paper, we review the tools that are available to measure energy use and CO2 emissions of NLP methods. We describe the scope of the measures provided and compare the use of six tools (carbon tracker, experiment impact tracker, green algorithms, ML CO2 impact, energy usage and cumulator) on named entity recognition experiments performed on different computational set-ups (local server vs. computing facility). Based on these findings, we propose actionable recommendations to accurately measure the environmental impact of NLP experiments.</abstract>
@@ -142,8 +142,8 @@
       <author><first>Gengyu</first><last>Wang</last></author>
       <author><first>Xiaochen</first><last>Hou</last></author>
       <author><first>Diyi</first><last>Yang</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
-      <author id="jing-huang"><first>Jing</first><last>Huang</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
+      <author><first>Jing</first><last>Huang</last></author>
       <pages>79–85</pages>
       <abstract>Large pre-trained language models (PLMs) have led to great success on various commonsense question answering (QA) tasks in an end-to-end fashion. However, little attention has been paid to what commonsense knowledge is needed to deeply characterize these QA tasks. In this work, we proposed to categorize the semantics needed for these tasks using the SocialIQA as an example. Building upon our labeled social knowledge categories dataset on top of SocialIQA, we further train neural QA models to incorporate such social knowledge categories and relation information from a knowledge base. Unlike previous work, we observe our models with semantic categorizations of social knowledge can achieve comparable performance with a relatively simple model and smaller size compared to other complex approaches.</abstract>
       <url hash="499d3240">2021.sustainlp-1.10</url>
@@ -171,7 +171,7 @@
       <author><first>Lucas Høyberg</first><last>Puvis de Chavannes</last></author>
       <author><first>Mads Guldborg Kjeldgaard</first><last>Kongsbak</last></author>
       <author><first>Timmie</first><last>Rantzau</last></author>
-      <author><first>Leon</first><last>Derczynski</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
       <pages>96–118</pages>
       <abstract>Training large language models can consume a large amount of energy. We hypothesize that the language model’s configuration impacts its energy consumption, and that there is room for power consumption optimisation in modern large language models. To investigate these claims, we introduce a power consumption factor to the objective function, and explore the range of models and hyperparameter configurations that affect power. We identify multiple configuration factors that can reduce power consumption during language model training while retaining model quality.</abstract>
       <url hash="7ae1cacd">2021.sustainlp-1.12</url>
diff --git a/data/xml/2021.tacl.xml b/data/xml/2021.tacl.xml
index 7d548deb83..98cb433b94 100644
--- a/data/xml/2021.tacl.xml
+++ b/data/xml/2021.tacl.xml
@@ -26,8 +26,8 @@
     </paper>
     <paper id="2">
       <title>Revisiting Multi-Domain Machine Translation</title>
-      <author><first>MinhQuang</first><last>Pham</last></author>
-      <author><first>Josep Maria</first><last>Crego</last></author>
+      <author id="minh-quang-pham"><first>MinhQuang</first><last>Pham</last></author>
+      <author id="josep-m-crego"><first>Josep Maria</first><last>Crego</last></author>
       <author><first>François</first><last>Yvon</last></author>
       <doi>10.1162/tacl_a_00351</doi>
       <abstract>When building machine translation systems, one often needs to make the best out of heterogeneous sets of parallel data in training, and to robustly handle inputs from unexpected domains in testing. This multi-domain scenario has attracted a lot of recent work that fall under the general umbrella of transfer learning. In this study, we revisit multi-domain machine translation, with the aim to formulate the motivations for developing such systems and the associated expectations with respect to performance. Our experiments with a large sample of multi-domain systems show that most of these expectations are hardly met and suggest that further work is needed to better analyze the current behaviour of multi-domain systems and to make them fully hold their promises.</abstract>
@@ -87,7 +87,7 @@
     </paper>
     <paper id="7">
       <title>Modeling Content and Context with Deep Relational Learning</title>
-      <author><first>Maria Leonor</first><last>Pacheco</last></author>
+      <author id="maria-leonor-pacheco"><first>Maria Leonor</first><last>Pacheco</last></author>
       <author><first>Dan</first><last>Goldwasser</last></author>
       <doi>10.1162/tacl_a_00357</doi>
       <abstract>Building models for realistic natural language tasks requires dealing with long texts and accounting for complicated structural dependencies. Neural-symbolic representations have emerged as a way to combine the reasoning capabilities of symbolic methods, with the expressiveness of neural networks. However, most of the existing frameworks for combining neural and symbolic representations have been designed for classic relational learning tasks that work over a universe of symbolic entities and relations. In this paper, we present DRaiL, an open-source declarative framework for specifying deep relational models, designed to support a variety of NLP scenarios. Our framework supports easy integration with expressive language encoders, and provides an interface to study the interactions between representation, inference and learning.</abstract>
@@ -99,7 +99,7 @@
     <paper id="8">
       <title>Recursive Non-Autoregressive Graph-to-Graph Transformer for Dependency Parsing with Iterative Refinement</title>
       <author><first>Alireza</first><last>Mohammadshahi</last></author>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <doi>10.1162/tacl_a_00358</doi>
       <abstract>We propose the Recursive Non-autoregressive Graph-to-Graph Transformer architecture (RNGTr) for the iterative refinement of arbitrary graphs through the recursive application of a non-autoregressive Graph-to-Graph Transformer and apply it to syntactic dependency parsing. We demonstrate the power and effectiveness of RNGTr on several dependency corpora, using a refinement model pre-trained with BERT. We also introduce Syntactic Transformer (SynTr), a non-recursive parser similar to our refinement model. RNGTr can improve the accuracy of a variety of initial parsers on 13 languages from the Universal Dependencies Treebanks, English and Chinese Penn Treebanks, and the German CoNLL2009 corpus, even improving over the new state-of-the-art results achieved by SynTr, significantly improving the state-of-the-art for all corpora tested.</abstract>
       <pages>120–138</pages>
@@ -112,7 +112,7 @@
       <author><first>Ryan</first><last>Cotterell</last></author>
       <author><first>Lawrence</first><last>Wolf-Sonkin</last></author>
       <author><first>Damián</first><last>Blasi</last></author>
-      <author><first>Hanna</first><last>Wallach</last></author>
+      <author id="hanna-wallach"><first>Hanna</first><last>Wallach</last></author>
       <doi>10.1162/tacl_a_00355</doi>
       <abstract>We use large-scale corpora in six different gendered languages, along with tools from NLP and information theory, to test whether there is a relationship between the grammatical genders of inanimate nouns and the adjectives used to describe those nouns. For all six languages, we find that there is a statistically significant relationship. We also find that there are statistically significant relationships between the grammatical genders of inanimate nouns and the verbs that take those nouns as direct objects, as indirect objects, and as subjects. We defer deeper investigation of these relationships for future work.</abstract>
       <pages>139–159</pages>
@@ -181,7 +181,7 @@
       <title>Infusing Finetuning with Semantic Dependencies</title>
       <author><first>Zhaofeng</first><last>Wu</last></author>
       <author><first>Hao</first><last>Peng</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <doi>10.1162/tacl_a_00363</doi>
       <abstract>For natural language processing systems, two kinds of evidence support the use of text representations from neural language models “pretrained” on large unannotated corpora: performance on application-inspired benchmarks (Peters et al., 2018, inter alia), and the emergence of syntactic abstractions in those representations (Tenney et al., 2019, inter alia). On the other hand, the lack of grounded supervision calls into question how well these representations can ever capture meaning (Bender and Koller, 2020). We apply novel probes to recent language models— specifically focusing on predicate-argument structure as operationalized by semantic dependencies (Ivanova et al., 2012)—and find that, unlike syntax, semantics is not brought to the surface by today’s pretrained models. We then use convolutional graph encoders to explicitly incorporate semantic parses into task-specific finetuning, yielding benefits to natural language understanding (NLU) tasks in the GLUE benchmark. This approach demonstrates the potential for general-purpose (rather than task-specific) linguistic supervision, above and beyond conventional pretraining and finetuning. Several diagnostics help to localize the benefits of our approach.1</abstract>
       <pages>226–242</pages>
@@ -218,8 +218,8 @@
     <paper id="17">
       <title>Extractive Opinion Summarization in Quantized Transformer Spaces</title>
       <author><first>Stefanos</first><last>Angelidis</last></author>
-      <author><first>Reinald Kim</first><last>Amplayo</last></author>
-      <author><first>Yoshihiko</first><last>Suhara</last></author>
+      <author id="reinald-kim-amplayo"><first>Reinald Kim</first><last>Amplayo</last></author>
+      <author id="yoshi-suhara"><first>Yoshihiko</first><last>Suhara</last></author>
       <author><first>Xiaolan</first><last>Wang</last></author>
       <author><first>Mirella</first><last>Lapata</last></author>
       <doi>10.1162/tacl_a_00366</doi>
@@ -256,7 +256,7 @@
       <author><first>Yi</first><last>Luan</last></author>
       <author><first>Jacob</first><last>Eisenstein</last></author>
       <author><first>Kristina</first><last>Toutanova</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <doi>10.1162/tacl_a_00369</doi>
       <abstract>Dual encoders perform retrieval by encoding documents and queries into dense low-dimensional vectors, scoring each document by its inner product with the query. We investigate the capacity of this architecture relative to sparse bag-of-words models and attentional neural networks. Using both theoretical and empirical analysis, we establish connections between the encoding dimension, the margin between gold and lower-ranked documents, and the document length, suggesting limitations in the capacity of fixed-length encodings to support precise retrieval of long documents. Building on these insights, we propose a simple neural model that combines the efficiency of dual encoders with some of the expressiveness of more costly attentional architectures, and explore sparse-dense hybrids to capitalize on the precision of sparse retrieval. These models outperform strong alternatives in large-scale retrieval.</abstract>
       <pages>329–345</pages>
@@ -304,12 +304,12 @@
     </paper>
     <paper id="24">
       <title><fixed-case>S</fixed-case>umm<fixed-case>E</fixed-case>val: Re-evaluating Summarization Evaluation</title>
-      <author><first>Alexander R.</first><last>Fabbri</last></author>
+      <author id="alexander-richard-fabbri"><first>Alexander R.</first><last>Fabbri</last></author>
       <author><first>Wojciech</first><last>Kryściński</last></author>
       <author><first>Bryan</first><last>McCann</last></author>
       <author><first>Caiming</first><last>Xiong</last></author>
       <author><first>Richard</first><last>Socher</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <doi>10.1162/tacl_a_00373</doi>
       <abstract>The scarcity of comprehensive up-to-date studies on evaluation metrics for text summarization and the lack of consensus regarding evaluation protocols continue to inhibit progress. We address the existing shortcomings of summarization evaluation methods along five dimensions: 1) we re-evaluate 14 automatic evaluation metrics in a comprehensive and consistent fashion using neural summarization model outputs along with expert and crowd-sourced human annotations; 2) we consistently benchmark 23 recent summarization models using the aforementioned automatic evaluation metrics; 3) we assemble the largest collection of summaries generated by models trained on the CNN/DailyMail news dataset and share it in a unified format; 4) we implement and share a toolkit that provides an extensible and unified API for evaluating summarization models across a broad range of automatic metrics; and 5) we assemble and share the largest and most diverse, in terms of model types, collection of human judgments of model-generated summaries on the CNN/Daily Mail dataset annotated by both expert judges and crowd-source workers. We hope that this work will help promote a more complete evaluation protocol for text summarization as well as advance research in developing evaluation metrics that better correlate with human judgments.</abstract>
       <pages>391–409</pages>
@@ -351,7 +351,7 @@
       <author><first>Matthew</first><last>Lamm</last></author>
       <author><first>Tom</first><last>Kwiatkowski</last></author>
       <author><first>Dipanjan</first><last>Das</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <doi>10.1162/tacl_a_00377</doi>
       <abstract>Models for question answering, dialogue agents, and summarization often interpret the meaning of a sentence in a rich context and use that meaning in a new context. Taking excerpts of text can be problematic, as key pieces may not be explicit in a local window. We isolate and define the problem of sentence decontextualization: taking a sentence together with its context and rewriting it to be interpretable out of context, while preserving its meaning. We describe an annotation procedure, collect data on the Wikipedia corpus, and use the data to train models to automatically decontextualize sentences. We present preliminary studies that show the value of sentence decontextualization in a user-facing task, and as preprocessing for systems that perform document understanding. We argue that decontextualization is an important subtask in many downstream applications, and that the definitions and resources provided can benefit tasks that operate on sentences that occur in a richer context.</abstract>
       <pages>447–461</pages>
@@ -421,7 +421,7 @@
     </paper>
     <paper id="33">
       <title>Characterizing <fixed-case>E</fixed-case>nglish Variation across Social Media Communities with <fixed-case>BERT</fixed-case></title>
-      <author><first>Li</first><last>Lucy</last></author>
+      <author id="li-lucy"><first>Li</first><last>Lucy</last></author>
       <author><first>David</first><last>Bamman</last></author>
       <doi>10.1162/tacl_a_00383</doi>
       <abstract>Much previous work characterizing language variation across Internet social groups has focused on the types of words used by these groups. We extend this type of study by employing BERT to characterize variation in the senses of words as well, analyzing two months of English comments in 474 Reddit communities. The specificity of different sense clusters to a community, combined with the specificity of a community’s unique word types, is used to identify cases where a social group’s language deviates from the norm. We validate our metrics using user-created glossaries and draw on sociolinguistic theories to connect language variation with trends in community behavior. We find that communities with highly distinctive language are medium-sized, and their loyal and highly engaged users interact in dense networks.</abstract>
@@ -436,7 +436,7 @@
       <author><first>Le Hong</first><last>Long</last></author>
       <author><first>Yunshan</first><last>Ma</last></author>
       <author><first>Wenqiang</first><last>Lei</last></author>
-      <author><first>Tat-Seng</first><last>Chua</last></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last></author>
       <doi>10.1162/tacl_a_00384</doi>
       <abstract>Tracking dialogue states to better interpret user goals and feed downstream policy learning is a bottleneck in dialogue management. Common practice has been to treat it as a problem of classifying dialogue content into a set of pre-defined slot-value pairs, or generating values for different slots given the dialogue history. Both have limitations on considering dependencies that occur on dialogues, and are lacking of reasoning capabilities. This paper proposes to track dialogue states gradually with reasoning over dialogue turns with the help of the back-end data. Empirical results demonstrate that our method outperforms the state-of-the-art methods in terms of joint belief accuracy for MultiWOZ 2.1, a large-scale human–human dialogue dataset across multiple domains.</abstract>
       <pages>557–569</pages>
@@ -460,7 +460,7 @@
     <paper id="36">
       <title>Context-aware Adversarial Training for Name Regularity Bias in Named Entity Recognition</title>
       <author><first>Abbas</first><last>Ghaddar</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <author><first>Ahmad</first><last>Rashid</last></author>
       <author><first>Mehdi</first><last>Rezagholizadeh</last></author>
       <doi>10.1162/tacl_a_00386</doi>
@@ -490,7 +490,7 @@
       <author><first>Meladel</first><last>Mistica</last></author>
       <author><first>Bahar</first><last>Salehi</last></author>
       <author><first>Hang</first><last>Li</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Jianzhong</first><last>Qi</last></author>
       <doi>10.1162/tacl_a_00388</doi>
       <abstract>While pretrained language models (LMs) have driven impressive gains over morpho-syntactic and semantic tasks, their ability to model discourse and pragmatic phenomena is less clear. As a step towards a better understanding of their discourse modeling capabilities, we propose a sentence intrusion detection task. We examine the performance of a broad range of pretrained LMs on this detection task for English. Lacking a dataset for the task, we introduce INSteD, a novel intruder sentence detection dataset, containing 170,000+ documents constructed from English Wikipedia and CNN news articles. Our experiments show that pretrained LMs perform impressively in in-domain evaluation, but experience a substantial drop in the cross-domain setting, indicating limited generalization capacity. Further results over a novel linguistic probe dataset show that there is substantial room for improvement, especially in the cross- domain setting.</abstract>
@@ -516,7 +516,7 @@
       <author><first>Qi</first><last>Liu</last></author>
       <author><first>Lei</first><last>Yu</last></author>
       <author><first>Laura</first><last>Rimell</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
       <doi>10.1162/tacl_a_00390</doi>
       <abstract>Direct decoding for task-oriented dialogue is known to suffer from the explaining-away effect, manifested in models that prefer short and generic responses. Here we argue for the use of Bayes’ theorem to factorize the dialogue task into two models, the distribution of the context given the response, and the prior for the response itself. This approach, an instantiation of the noisy channel model, both mitigates the explaining-away effect and allows the principled incorporation of large pretrained models for the response prior. We present extensive experiments showing that a noisy channel model decodes better responses compared to direct decoding and that a two-stage pretraining strategy, employing both open-domain and task-oriented dialogue data, improves over randomly initialized models.</abstract>
       <pages>657–674</pages>
@@ -564,8 +564,8 @@
       <title>Classifying Argumentative Relations Using Logical Mechanisms and Argumentation Schemes</title>
       <author><first>Yohan</first><last>Jo</last></author>
       <author><first>Seojin</first><last>Bang</last></author>
-      <author><first>Chris</first><last>Reed</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="chris-reed"><first>Chris</first><last>Reed</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <doi>10.1162/tacl_a_00394</doi>
       <abstract>While argument mining has achieved significant success in classifying argumentative relations between statements (support, attack, and neutral), we have a limited computational understanding of logical mechanisms that constitute those relations. Most recent studies rely on black-box models, which are not as linguistically insightful as desired. On the other hand, earlier studies use rather simple lexical features, missing logical relations between statements. To overcome these limitations, our work classifies argumentative relations based on four logical and theory-informed mechanisms between two statements, namely, (i) factual consistency, (ii) sentiment coherence, (iii) causal relation, and (iv) normative relation. We demonstrate that our operationalization of these logical mechanisms classifies argumentative relations without directly training on data labeled with the relations, significantly better than several unsupervised baselines. We further demonstrate that these mechanisms also improve supervised classifiers through representation learning.</abstract>
       <pages>721–739</pages>
@@ -620,7 +620,7 @@
       <author><first>Daniel</first><last>Andor</last></author>
       <author><first>Eunsol</first><last>Choi</last></author>
       <author><first>Livio Baldini</first><last>Soares</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <doi>10.1162/tacl_a_00398</doi>
       <abstract>A question answering system that in addition to providing an answer provides an explanation of the reasoning that leads to that answer has potential advantages in terms of debuggability, extensibility, and trust. To this end, we propose QED, a linguistically informed, extensible framework for explanations in question answering. A QED explanation specifies the relationship between a question and answer according to formal semantic notions such as referential equality, sentencehood, and entailment. We describe and publicly release an expert-annotated dataset of QED explanations built upon a subset of the Google Natural Questions dataset, and report baseline models on two tasks—post- hoc explanation generation given an answer, and joint question answering and explanation generation. In the joint setting, a promising result suggests that training on a relatively small amount of QED data can improve question answering. In addition to describing the formal, language-theoretic motivations for the QED approach, we describe a large user study showing that the presence of QED explanations significantly improves the ability of untrained raters to spot errors made by a strong neural QA baseline.</abstract>
       <pages>790–806</pages>
@@ -645,7 +645,7 @@
     </paper>
     <paper id="50">
       <title>Let’s Play Mono-Poly: <fixed-case>BERT</fixed-case> Can Reveal Words’ Polysemy Level and Partitionability into Senses</title>
-      <author><first>Aina</first><last>Garí Soler</last></author>
+      <author id="aina-gari-soler"><first>Aina</first><last>Garí Soler</last></author>
       <author><first>Marianna</first><last>Apidianaki</last></author>
       <doi>10.1162/tacl_a_00400</doi>
       <abstract>Pre-trained language models (LMs) encode rich information about linguistic structure but their knowledge about lexical polysemy remains unclear. We propose a novel experimental setup for analyzing this knowledge in LMs specifically trained for different languages (English, French, Spanish, and Greek) and in multilingual BERT. We perform our analysis on datasets carefully designed to reflect different sense distributions, and control for parameters that are highly correlated with polysemy such as frequency and grammatical category. We demonstrate that BERT-derived representations reflect words’ polysemy level and their partitionability into senses. Polysemy-related information is more clearly present in English BERT embeddings, but models in other languages also manage to establish relevant distinctions between words at different polysemy levels. Our results contribute to a better understanding of the knowledge encoded in contextualized representations and open up new avenues for multilingual lexical semantics research.</abstract>
@@ -659,7 +659,7 @@
       <author><first>Beatrice</first><last>Savoldi</last></author>
       <author><first>Marco</first><last>Gaido</last></author>
       <author><first>Luisa</first><last>Bentivogli</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <doi>10.1162/tacl_a_00401</doi>
       <abstract>AbstractMachine translation (MT) technology has facilitated our daily tasks by providing accessible shortcuts for gathering, processing, and communicating information. However, it can suffer from biases that harm users and society at large. As a relatively new field of inquiry, studies of gender bias in MT still lack cohesion. This advocates for a unified framework to ease future research. To this end, we: i) critically review current conceptualizations of bias in light of theoretical insights from related disciplines, ii) summarize previous analyses aimed at assessing gender bias in MT, iii) discuss the mitigating strategies proposed so far, and iv) point toward potential directions for future work.</abstract>
@@ -672,7 +672,7 @@
       <title>Neural Event Semantics for Grounded Language Understanding</title>
       <author><first>Shyamal</first><last>Buch</last></author>
       <author><first>Li</first><last>Fei-Fei</last></author>
-      <author><first>Noah D.</first><last>Goodman</last></author>
+      <author id="noah-goodman"><first>Noah D.</first><last>Goodman</last></author>
       <doi>10.1162/tacl_a_00402</doi>
       <abstract>We present a new conjunctivist framework, neural event semantics (NES), for compositional grounded language understanding. Our approach treats all words as classifiers that compose to form a sentence meaning by multiplying output scores. These classifiers apply to spatial regions (events) and NES derives its semantic structure from language by routing events to different classifier argument inputs via soft attention. NES is trainable end-to-end by gradient descent with minimal supervision. We evaluate our method on compositional grounded language tasks in controlled synthetic and real-world settings. NES offers stronger generalization capability than standard function-based compositional frameworks, while improving accuracy over state-of-the-art neural methods on real-world language tasks.</abstract>
       <pages>875–890</pages>
@@ -682,7 +682,7 @@
     <paper id="53">
       <title>Sensitivity as a Complexity Measure for Sequence Classification Tasks</title>
       <author><first>Michael</first><last>Hahn</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <author><first>Richard</first><last>Futrell</last></author>
       <doi>10.1162/tacl_a_00403</doi>
       <abstract>We introduce a theoretical framework for understanding and predicting the complexity of sequence classification tasks, using a novel extension of the theory of Boolean function sensitivity. The sensitivity of a function, given a distribution over input sequences, quantifies the number of disjoint subsets of the input sequence that can each be individually changed to change the output. We argue that standard sequence classification methods are biased towards learning low-sensitivity functions, so that tasks requiring high sensitivity are more difficult. To that end, we show analytically that simple lexical classifiers can only express functions of bounded sensitivity, and we show empirically that low-sensitivity functions are easier to learn for LSTMs. We then estimate sensitivity on 15 NLP tasks, finding that sensitivity is higher on challenging tasks collected in GLUE than on simple text classification tasks, and that sensitivity predicts the performance both of simple lexical classifiers and of vanilla BiLSTMs without pretrained contextualized embeddings. Within a task, sensitivity predicts which inputs are hard for such simple models. Our results suggest that the success of massively pretrained contextual representations stems in part because they provide representations from which information can be extracted by low-sensitivity decoders.</abstract>
@@ -743,7 +743,7 @@
       <title>Multimodal Pretraining Unmasked: A Meta-Analysis and a Unified Framework of Vision-and-Language <fixed-case>BERT</fixed-case>s</title>
       <author><first>Emanuele</first><last>Bugliarello</last></author>
       <author><first>Ryan</first><last>Cotterell</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Desmond</first><last>Elliott</last></author>
       <doi>10.1162/tacl_a_00408</doi>
       <abstract>Large-scale pretraining and task-specific fine- tuning is now the standard methodology for many tasks in computer vision and natural language processing. Recently, a multitude of methods have been proposed for pretraining vision and language BERTs to tackle challenges at the intersection of these two key areas of AI. These models can be categorized into either single-stream or dual-stream encoders. We study the differences between these two categories, and show how they can be unified under a single theoretical framework. We then conduct controlled experiments to discern the empirical differences between five vision and language BERTs. Our experiments show that training data and hyperparameters are responsible for most of the differences between the reported results, but they also reveal that the embedding layer plays a crucial role in these massive models.</abstract>
@@ -755,7 +755,7 @@
     <paper id="59">
       <title>Maintaining Common Ground in Dynamic Environments</title>
       <author><first>Takuma</first><last>Udagawa</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <doi>10.1162/tacl_a_00409</doi>
       <abstract>Common grounding is the process of creating and maintaining mutual understandings, which is a critical aspect of sophisticated human communication. While various task settings have been proposed in existing literature, they mostly focus on creating common ground under a static context and ignore the aspect of maintaining them overtime under dynamic context. In this work, we propose a novel task setting to study the ability of both creating and maintaining common ground in dynamic environments. Based on our minimal task formulation, we collected a large-scale dataset of 5,617 dialogues to enable fine-grained evaluation and analysis of various dialogue systems. Through our dataset analyses, we highlight novel challenges introduced in our setting, such as the usage of complex spatio-temporal expressions to create and maintain common ground. Finally, we conduct extensive experiments to assess the capabilities of our baseline dialogue system and discuss future prospects of our research.</abstract>
       <pages>995–1011</pages>
@@ -769,8 +769,8 @@
       <author><first>Nora</first><last>Kassner</last></author>
       <author><first>Shauli</first><last>Ravfogel</last></author>
       <author><first>Abhilasha</first><last>Ravichander</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <author><first>Yoav</first><last>Goldberg</last></author>
       <doi>10.1162/tacl_a_00410</doi>
       <abstract>Consistency of a model—that is, the invariance of its behavior under meaning-preserving alternations in its input—is a highly desirable property in natural language processing. In this paper we study the question: Are Pretrained Language Models (PLMs) consistent with respect to factual knowledge? To this end, we create ParaRel🤘, a high-quality resource of cloze-style query English paraphrases. It contains a total of 328 paraphrases for 38 relations. Using ParaRel🤘, we show that the consistency of all PLMs we experiment with is poor— though with high variance between relations. Our analysis of the representational spaces of PLMs suggests that they have a poor structure and are currently not suitable for representing knowledge robustly. Finally, we propose a method for improving model consistency and experimentally demonstrate its effectiveness.1</abstract>
@@ -800,7 +800,7 @@
       <author><first>William</first><last>Merrill</last></author>
       <author><first>Yoav</first><last>Goldberg</last></author>
       <author><first>Roy</first><last>Schwartz</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <doi>10.1162/tacl_a_00412</doi>
       <abstract>Language models trained on billions of tokens have recently led to unprecedented results on many NLP tasks. This success raises the question of whether, in principle, a system can ever “understand” raw text without access to some form of grounding. We formally investigate the abilities of ungrounded systems to acquire meaning. Our analysis focuses on the role of “assertions”: textual contexts that provide indirect clues about the underlying semantics. We study whether assertions enable a system to emulate representations preserving semantic relations like equivalence. We find that assertions enable semantic emulation of languages that satisfy a strong notion of semantic transparency. However, for classes of languages where the same expression can take different values in different contexts, we show that emulation can become uncomputable. Finally, we discuss differences between our formal model and natural language, exploring how our results generalize to a modal setting and other semantic relations. Together, our results suggest that assertions in code or language do not provide sufficient signal to fully emulate semantic representations. We formalize ways in which ungrounded language models appear to be fundamentally limited in their ability to “understand”.</abstract>
       <pages>1047–1060</pages>
@@ -816,7 +816,7 @@
       <author><first>Mohammad Ali</first><last>Khan</last></author>
       <author><first>Yin</first><last>Yang</last></author>
       <author><first>Hassan</first><last>Sajjad</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Deming</first><last>Chen</last></author>
       <author><first>Marianne</first><last>Winslett</last></author>
       <doi>10.1162/tacl_a_00413</doi>
@@ -829,7 +829,7 @@
     <paper id="64">
       <title>He Thinks He Knows Better than the Doctors: <fixed-case>BERT</fixed-case> for Event Factuality Fails on Pragmatics</title>
       <author><first>Nanjiang</first><last>Jiang</last></author>
-      <author><first>Marie-Catherine</first><last>de Marneffe</last></author>
+      <author id="marie-catherine-de-marneffe"><first>Marie-Catherine</first><last>de Marneffe</last></author>
       <doi>10.1162/tacl_a_00414</doi>
       <abstract>We investigate how well BERT performs on predicting factuality in several existing English datasets, encompassing various linguistic constructions. Although BERT obtains a strong performance on most datasets, it does so by exploiting common surface patterns that correlate with certain factuality labels, and it fails on instances where pragmatic reasoning is necessary. Contrary to what the high performance suggests, we are still far from having a robust system for factuality prediction.</abstract>
       <pages>1081–1097</pages>
@@ -856,7 +856,7 @@
     </paper>
     <paper id="66">
       <title><fixed-case>M</fixed-case>asakha<fixed-case>NER</fixed-case>: Named Entity Recognition for <fixed-case>A</fixed-case>frican Languages</title>
-      <author><first>David Ifeoluwa</first><last>Adelani</last></author>
+      <author id="david-ifeoluwa-adelani"><first>David Ifeoluwa</first><last>Adelani</last></author>
       <author><first>Jade</first><last>Abbott</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
       <author><first>Daniel</first><last>D’souza</last></author>
@@ -875,7 +875,7 @@
       <author><first>Aremu</first><last>Anuoluwapo</last></author>
       <author><first>Catherine</first><last>Gitau</last></author>
       <author><first>Derguene</first><last>Mbaye</last></author>
-      <author><first>Jesujoba</first><last>Alabi</last></author>
+      <author id="jesujoba-alabi"><first>Jesujoba</first><last>Alabi</last></author>
       <author><first>Seid Muhie</first><last>Yimam</last></author>
       <author><first>Tajuddeen Rabiu</first><last>Gwadabe</last></author>
       <author><first>Ignatius</first><last>Ezeani</last></author>
@@ -1078,7 +1078,7 @@
     <paper id="78">
       <title>Partially Supervised Named Entity Recognition via the Expected Entity Ratio Loss</title>
       <author><first>Thomas</first><last>Effland</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <doi>10.1162/tacl_a_00429</doi>
       <abstract>We study learning named entity recognizers in the presence of missing entity annotations. We approach this setting as tagging with latent variables and propose a novel loss, the Expected Entity Ratio, to learn models in the presence of systematically missing tags. We show that our approach is both theoretically sound and empirically useful. Experimentally, we find that it meets or exceeds performance of strong and state-of-the-art baselines across a variety of languages, annotation scenarios, and amounts of labeled data. In particular, we find that it significantly outperforms the previous state-of-the-art methods from Mayhew et al. (2019) and Li et al. (2021) by +12.7 and +2.3 F1 score in a challenging setting with only 1,000 biased annotations, averaged across 7 datasets. We also show that, when combined with our approach, a novel sparse annotation scheme outperforms exhaustive annotation for modest annotation budgets.1</abstract>
       <pages>1320–1335</pages>
@@ -1148,8 +1148,8 @@
       <author><first>Nora</first><last>Kassner</last></author>
       <author><first>Shauli</first><last>Ravfogel</last></author>
       <author><first>Abhilasha</first><last>Ravichander</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <author><first>Yoav</first><last>Goldberg</last></author>
       <doi>10.1162/tacl_x_00455</doi>
       <abstract>During production of this paper, an error was introduced to the formula on the bottom of the right column of page 1020. In the last two terms of the formula, the n and m subscripts were swapped. The correct formula is:Lc=∑n=1k∑m=n+1kDKL(Qnri∥Qmri)+DKL(Qmri∥Qnri)The paper has been updated.</abstract>
@@ -1161,7 +1161,7 @@
       <title>Self-Diagnosis and Self-Debiasing: A Proposal for Reducing Corpus-Based Bias in <fixed-case>NLP</fixed-case></title>
       <author><first>Timo</first><last>Schick</last></author>
       <author><first>Sahana</first><last>Udupa</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <doi>10.1162/tacl_a_00434</doi>
       <abstract>This paper contains prompts and model outputs that are offensive in nature. When trained on large, unfiltered crawls from the Internet, language models pick up and reproduce all kinds of undesirable biases that can be found in the data: They often generate racist, sexist, violent, or otherwise toxic language. As large models require millions of training examples to achieve good performance, it is difficult to completely prevent them from being exposed to such content. In this paper, we first demonstrate a surprising finding: Pretrained language models recognize, to a considerable degree, their undesirable biases and the toxicity of the content they produce. We refer to this capability as self-diagnosis. Based on this finding, we then propose a decoding algorithm that, given only a textual description of the undesired behavior, reduces the probability of a language model producing problematic text. We refer to this approach as self-debiasing. Self-debiasing does not rely on manually curated word lists, nor does it require any training data or changes to the model’s parameters. While we by no means eliminate the issue of language models generating biased text, we believe our approach to be an important step in this direction.1</abstract>
       <pages>1408–1424</pages>
@@ -1253,7 +1253,7 @@
       <author><first>David</first><last>Francis</last></author>
       <author><first>Ella</first><last>Rabinovich</last></author>
       <author><first>Farhan</first><last>Samir</last></author>
-      <author><first>David</first><last>Mortensen</last></author>
+      <author id="david-r-mortensen"><first>David</first><last>Mortensen</last></author>
       <author><first>Suzanne</first><last>Stevenson</last></author>
       <doi>10.1162/tacl_a_00441</doi>
       <abstract>We adopt an evolutionary view on language change in which cognitive factors (in addition to social ones) affect the fitness of words and their success in the linguistic ecosystem. Specifically, we propose a variety of psycholinguistic factors—semantic, distributional, and phonological—that we hypothesize are predictive of lexical decline, in which words greatly decrease in frequency over time. Using historical data across three languages (English, French, and German), we find that most of our proposed factors show a significant difference in the expected direction between each curated set of declining words and their matched stable words. Moreover, logistic regression analyses show that semantic and distributional factors are significant in predicting declining words. Further diachronic analysis reveals that declining words tend to decrease in the diversity of their lexical contexts over time, gradually narrowing their ‘ecological niches’.</abstract>
@@ -1276,7 +1276,7 @@
       <title>Word Representation Learning in Multimodal Pre-Trained Transformers: An Intrinsic Evaluation</title>
       <author><first>Sandro</first><last>Pezzelle</last></author>
       <author><first>Ece</first><last>Takmaz</last></author>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <doi>10.1162/tacl_a_00443</doi>
       <abstract>This study carries out a systematic intrinsic evaluation of the semantic representations learned by state-of-the-art pre-trained multimodal Transformers. These representations are claimed to be task-agnostic and shown to help on many downstream language-and-vision tasks. However, the extent to which they align with human semantic intuitions remains unclear. We experiment with various models and obtain static word representations from the contextualized ones they learn. We then evaluate them against the semantic judgments provided by human speakers. In line with previous evidence, we observe a generalized advantage of multimodal representations over language- only ones on concrete word pairs, but not on abstract ones. On the one hand, this confirms the effectiveness of these models to align language and vision, which results in better semantic representations for concepts that are grounded in images. On the other hand, models are shown to follow different representation learning patterns, which sheds some light on how and when they perform multimodal integration.</abstract>
       <pages>1563–1579</pages>
diff --git a/data/xml/2021.tal.xml b/data/xml/2021.tal.xml
index c56b5275f9..adc44fc8e6 100644
--- a/data/xml/2021.tal.xml
+++ b/data/xml/2021.tal.xml
@@ -3,9 +3,9 @@
   <volume id="1" ingest-date="2023-03-16" type="journal">
     <meta>
       <booktitle>Traitement Automatique des Langues, Volume 62, Numéro 1 : Varia [Varia]</booktitle>
-      <editor><first>Cécile</first><last>Fabre</last></editor>
+      <editor id="cecile-fabre"><first>Cécile</first><last>Fabre</last></editor>
       <editor><first>Emmanuel</first><last>Morin</last></editor>
-      <editor><first>Sophie</first><last>Rosset</last></editor>
+      <editor id="sophie-rosset"><first>Sophie</first><last>Rosset</last></editor>
       <editor><first>Pascale</first><last>Sébillot</last></editor>
       <publisher>ATALA (Association pour le Traitement Automatique des Langues)</publisher>
       <address>France</address>
@@ -40,8 +40,8 @@
   <volume id="2" ingest-date="2023-03-16" type="journal">
     <meta>
       <booktitle>Traitement Automatique des Langues, Volume 62, Numéro 2 : Nouvelles applications du TAL [New applications in NLP]</booktitle>
-      <editor><first>Géraldine</first><last>Damnati</last></editor>
-      <editor><first>Diana</first><last>Inkpen</last></editor>
+      <editor id="geraldine-damnati"><first>Géraldine</first><last>Damnati</last></editor>
+      <editor id="diana-inkpen"><first>Diana</first><last>Inkpen</last></editor>
       <publisher>ATALA (Association pour le Traitement Automatique des Langues)</publisher>
       <address>France</address>
       <year>2021</year>
@@ -77,7 +77,7 @@
     <meta>
       <booktitle>Traitement Automatique des Langues, Volume 62, Numéro 3 : Diversité Linguistique [Linguistic Diversity in Natural Language Processing]</booktitle>
       <editor><first>Aarne</first><last>Ranta</last></editor>
-      <editor><first>Cyril</first><last>Goutte</last></editor>
+      <editor id="cyril-goutte"><first>Cyril</first><last>Goutte</last></editor>
       <publisher>ATALA (Association pour le Traitement Automatique des Langues)</publisher>
       <address>France</address>
       <year>2021</year>
diff --git a/data/xml/2021.teachingnlp.xml b/data/xml/2021.teachingnlp.xml
index 4d16a76578..d9103839b1 100644
--- a/data/xml/2021.teachingnlp.xml
+++ b/data/xml/2021.teachingnlp.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the Fifth Workshop on Teaching NLP</booktitle>
       <editor><first>David</first><last>Jurgens</last></editor>
       <editor><first>Varada</first><last>Kolhatkar</last></editor>
-      <editor><first>Lucy</first><last>Li</last></editor>
+      <editor id="li-lucy"><first>Lucy</first><last>Li</last></editor>
       <editor><first>Margot</first><last>Mieskes</last></editor>
       <editor><first>Ted</first><last>Pedersen</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -117,7 +117,7 @@
     </paper>
     <paper id="9">
       <title>From back to the roots into the gated woods: Deep learning for <fixed-case>NLP</fixed-case></title>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>59–61</pages>
       <abstract>Deep neural networks have revolutionized many fields, including Natural Language Processing. This paper outlines teaching materials for an introductory lecture on deep learning in Natural Language Processing (NLP). The main submitted material covers a summer school lecture on encoder-decoder models. Complementary to this is a set of jupyter notebook slides from earlier teaching, on which parts of the lecture were based on. The main goal of this teaching material is to provide an overview of neural network approaches to natural language processing, while linking modern concepts back to the roots showing traditional essential counterparts. The lecture departs from count-based statistical methods and spans up to gated recurrent networks and attention, which is ubiquitous in today’s NLP.</abstract>
       <url hash="9c78dd8d">2021.teachingnlp-1.9</url>
@@ -146,7 +146,7 @@
     </paper>
     <paper id="12">
       <title>Gaining Experience with Structured Data: Using the Resources of Dialog State Tracking Challenge 2</title>
-      <author><first>Ronnie</first><last>Smith</last></author>
+      <author id="ronnie-w-smith"><first>Ronnie</first><last>Smith</last></author>
       <pages>70–79</pages>
       <abstract>This paper describes a class project for a recently introduced undergraduate NLP course that gives computer science students the opportunity to explore the data of Dialog State Tracking Challenge 2 (DSTC 2). Student background, curriculum choices, and project details are discussed. The paper concludes with some instructor advice and final reflections.</abstract>
       <url hash="6b9f3fed">2021.teachingnlp-1.12</url>
@@ -185,7 +185,7 @@
     <paper id="16">
       <title>Introducing Information Retrieval for Biomedical Informatics Students</title>
       <author><first>Sanya</first><last>Taneja</last></author>
-      <author><first>Richard</first><last>Boyce</last></author>
+      <author id="richard-d-boyce"><first>Richard</first><last>Boyce</last></author>
       <author><first>William</first><last>Reynolds</last></author>
       <author><first>Denis</first><last>Newman-Griffis</last></author>
       <pages>96–98</pages>
@@ -246,7 +246,7 @@
     </paper>
     <paper id="21">
       <title>Natural Language Processing for Computer Scientists and Data Scientists at a Large State University</title>
-      <author><first>Casey</first><last>Kennington</last></author>
+      <author id="casey-kennington"><first>Casey</first><last>Kennington</last></author>
       <pages>115–124</pages>
       <abstract>The field of Natural Language Processing (NLP) changes rapidly, requiring course offerings to adjust with those changes, and NLP is not just for computer scientists; it’s a field that should be accessible to anyone who has a sufficient background. In this paper, I explain how students with Computer Science and Data Science backgrounds can be well-prepared for an upper-division NLP course at a large state university. The course covers probability and information theory, elementary linguistics, machine and deep learning, with an attempt to balance theoretical ideas and concepts with practical applications. I explain the course objectives, topics and assignments, reflect on adjustments to the course over the last four years, as well as feedback from students.</abstract>
       <url hash="3d504ce6">2021.teachingnlp-1.21</url>
@@ -275,7 +275,7 @@
     </paper>
     <paper id="24">
       <title>The Online Pivot: Lessons Learned from Teaching a Text and Data Mining Course in Lockdown, Enhancing online Teaching with Pair Programming and Digital Badges</title>
-      <author><first>Beatrice</first><last>Alex</last></author>
+      <author id="beatrice-alex"><first>Beatrice</first><last>Alex</last></author>
       <author><first>Clare</first><last>Llewellyn</last></author>
       <author><first>Pawel</first><last>Orzechowski</last></author>
       <author><first>Maria</first><last>Boutchkova</last></author>
diff --git a/data/xml/2021.textgraphs.xml b/data/xml/2021.textgraphs.xml
index 06f343542a..f9c5570d38 100644
--- a/data/xml/2021.textgraphs.xml
+++ b/data/xml/2021.textgraphs.xml
@@ -8,7 +8,7 @@
       <editor><first>Varvara</first><last>Logacheva</last></editor>
       <editor><first>Abhik</first><last>Jana</last></editor>
       <editor><first>Dmitry</first><last>Ustalov</last></editor>
-      <editor><first>Peter</first><last>Jansen</last></editor>
+      <editor id="peter-jansen"><first>Peter</first><last>Jansen</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Mexico City, Mexico</address>
       <month>June</month>
@@ -37,7 +37,7 @@
       <author><first>Leonardo F. R.</first><last>Ribeiro</last></author>
       <author><first>Philipp</first><last>Dufter</last></author>
       <author><first>Iryna</first><last>Gurevych</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>10–21</pages>
       <abstract>We present Graformer, a novel Transformer-based encoder-decoder architecture for graph-to-text generation. With our novel graph self-attention, the encoding of a node relies on all nodes in the input graph - not only direct neighbors - facilitating the detection of global patterns. We represent the relation between two nodes as the length of the shortest path between them. Graformer learns to weight these node-node relations differently for different attention heads, thus virtually learning differently connected views of the input graph. We evaluate Graformer on two popular graph-to-text generation benchmarks, AGENDA and WebNLG, where it achieves strong performance while using many fewer parameters than other approaches.</abstract>
       <url hash="42368bbb">2021.textgraphs-1.2</url>
@@ -71,7 +71,7 @@
       <title><fixed-case>GENE</fixed-case>: Global Event Network Embedding</title>
       <author><first>Qi</first><last>Zeng</last></author>
       <author><first>Manling</first><last>Li</last></author>
-      <author><first>Tuan</first><last>Lai</last></author>
+      <author id="tuan-lai"><first>Tuan</first><last>Lai</last></author>
       <author><first>Heng</first><last>Ji</last></author>
       <author><first>Mohit</first><last>Bansal</last></author>
       <author><first>Hanghang</first><last>Tong</last></author>
@@ -84,8 +84,8 @@
     <paper id="6">
       <title>Learning Clause Representation from Dependency-Anchor Graph for Connective Prediction</title>
       <author><first>Yanjun</first><last>Gao</last></author>
-      <author><first>Ting-Hao</first><last>Huang</last></author>
-      <author><first>Rebecca J.</first><last>Passonneau</last></author>
+      <author id="ting-hao-huang"><first>Ting-Hao</first><last>Huang</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca J.</first><last>Passonneau</last></author>
       <pages>54–66</pages>
       <abstract>Semantic representation that supports the choice of an appropriate connective between pairs of clauses inherently addresses discourse coherence, which is important for tasks such as narrative understanding, argumentation, and discourse parsing. We propose a novel clause embedding method that applies graph learning to a data structure we refer to as a dependency-anchor graph. The dependency anchor graph incorporates two kinds of syntactic information, constituency structure, and dependency relations, to highlight the subject and verb phrase relation. This enhances coherence-related aspects of representation. We design a neural model to learn a semantic representation for clauses from graph convolution over latent representations of the subject and verb phrase. We evaluate our method on two new datasets: a subset of a large corpus where the source texts are published novels, and a new dataset collected from students’ essays. The results demonstrate a significant improvement over tree-based models, confirming the importance of emphasizing the subject and verb phrase. The performance gap between the two datasets illustrates the challenges of analyzing student’s written text, plus a potential evaluation task for coherence modeling and an application for suggesting revisions to students.</abstract>
       <url hash="e80e22d5">2021.textgraphs-1.6</url>
@@ -107,7 +107,7 @@
     <paper id="8">
       <title>Selective Attention Based Graph Convolutional Networks for Aspect-Level Sentiment Classification</title>
       <author><first>Xiaochen</first><last>Hou</last></author>
-      <author id="jing-huang"><first>Jing</first><last>Huang</last></author>
+      <author><first>Jing</first><last>Huang</last></author>
       <author><first>Guangtao</first><last>Wang</last></author>
       <author><first>Peng</first><last>Qi</last></author>
       <author><first>Xiaodong</first><last>He</last></author>
@@ -169,7 +169,7 @@
       <author><first>Liang</first><last>Ma</last></author>
       <author><first>Tanay Kumar</first><last>Saha</last></author>
       <author><first>Di</first><last>Liu</last></author>
-      <author><first>Joel</first><last>Tetreault</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
       <author><first>Alejandro</first><last>Jaimes</last></author>
       <pages>132–137</pages>
       <abstract>Recent works show that the graph structure of sentences, generated from dependency parsers, has potential for improving event detection. However, they often only leverage the edges (dependencies) between words, and discard the dependency labels (e.g., nominal-subject), treating the underlying graph edges as homogeneous. In this work, we propose a novel framework for incorporating both dependencies and their labels using a recently proposed technique called Graph Transformer Network (GTN). We integrate GTN to leverage dependency relations on two existing homogeneous-graph-based models and demonstrate an improvement in the F1 score on the ACE dataset.</abstract>
@@ -180,7 +180,7 @@
     <paper id="14">
       <title>Fine-grained General Entity Typing in <fixed-case>G</fixed-case>erman using <fixed-case>G</fixed-case>erma<fixed-case>N</fixed-case>et</title>
       <author><first>Sabine</first><last>Weber</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>138–143</pages>
       <abstract>Fine-grained entity typing is important to tasks like relation extraction and knowledge base construction. We find however, that fine-grained entity typing systems perform poorly on general entities (e.g. “ex-president”) as compared to named entities (e.g. “Barack Obama”). This is due to a lack of general entities in existing training data sets. We show that this problem can be mitigated by automatically generating training data from WordNets. We use a German WordNet equivalent, GermaNet, to automatically generate training data for German general entity typing. We use this data to supplement named entity data to train a neural fine-grained entity typing system. This leads to a 10% improvement in accuracy of the prediction of level 1 FIGER types for German general entities, while decreasing named entity type prediction accuracy by only 1%.</abstract>
       <url hash="5b8372b0">2021.textgraphs-1.14</url>
diff --git a/data/xml/2021.tlt.xml b/data/xml/2021.tlt.xml
index cf857f8ad5..2cd19673d7 100644
--- a/data/xml/2021.tlt.xml
+++ b/data/xml/2021.tlt.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the 20th International Workshop on Treebanks and Linguistic Theories (TLT, SyntaxFest 2021)</booktitle>
       <editor><first>Daniel</first><last>Dakota</last></editor>
       <editor><first>Kilian</first><last>Evang</last></editor>
-      <editor><first>Sandra</first><last>Kübler</last></editor>
+      <editor id="sandra-kubler"><first>Sandra</first><last>Kübler</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Sofia, Bulgaria</address>
       <month>December</month>
@@ -38,7 +38,7 @@
       <title>Is <fixed-case>O</fixed-case>ld <fixed-case>F</fixed-case>rench tougher to parse?</title>
       <author><first>Loïc</first><last>Grobol</last></author>
       <author><first>Sophie</first><last>Prévost</last></author>
-      <author><first>Benoît</first><last>Crabbé</last></author>
+      <author id="benoit-crabbe"><first>Benoît</first><last>Crabbé</last></author>
       <pages>27–34</pages>
       <url hash="fbd32d24">2021.tlt-1.3</url>
       <bibkey>grobol-etal-2021-old</bibkey>
@@ -75,7 +75,7 @@
       <title>How Universal is Genre in <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies?</title>
       <author><first>Max</first><last>Müller-Eberstein</last></author>
       <author><first>Rob</first><last>van der Goot</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>69–85</pages>
       <url hash="8c2b4f46">2021.tlt-1.7</url>
       <bibkey>muller-eberstein-etal-2021-universal</bibkey>
@@ -100,7 +100,7 @@
       <title>Discourse Tree Structure and Dependency Distance in <fixed-case>EFL</fixed-case> Writing</title>
       <author><first>Jingting</first><last>Yuan</last></author>
       <author><first>Qiuhan</first><last>Lin</last></author>
-      <author><first>John S. Y.</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John S. Y.</first><last>Lee</last></author>
       <pages>105–115</pages>
       <url hash="a8c74239">2021.tlt-1.10</url>
       <bibkey>yuan-etal-2021-discourse</bibkey>
diff --git a/data/xml/2021.triton.xml b/data/xml/2021.triton.xml
index 8f22f58a96..00b870d5a1 100644
--- a/data/xml/2021.triton.xml
+++ b/data/xml/2021.triton.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2022-01-11" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Translation and Interpreting Technology Online Conference</booktitle>
-      <editor><first>Ruslan</first><last>Mitkov</last></editor>
+      <editor id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></editor>
       <editor><first>Vilelmini</first><last>Sosoni</last></editor>
       <editor><first>Julie Christine</first><last>Giguère</last></editor>
       <editor><first>Elena</first><last>Murgolo</last></editor>
@@ -37,7 +37,7 @@
     </paper>
     <paper id="3">
       <title>Interpreting and Technology: Is the Sky Really the Limit?</title>
-      <author><first>Gloria</first><last>Corpas Pastor</last></author>
+      <author id="gloria-corpas-pastor"><first>Gloria</first><last>Corpas Pastor</last></author>
       <pages>15–24</pages>
       <abstract>Nowadays there is a pressing need to develop interpreting-related technolo-gies, with practitioners and other end-users increasingly calling for tools tai-lored to their needs and their new interpreting scenarios. But, at the same time, interpreting as a human activity has resisted complete automation for various reasons, such as fear, unawareness, communication complexities, lack of dedicated tools, etc. Several computer-assisted interpreting tools and resources for interpreters have been developed, although they are rather modest in terms of the sup-port they provide. In the same vein, and despite the pressing need to aiding in multilingual mediation, machine interpreting is still under development, with the exception of a few success stories. This paper will present the results of VIP, a R&amp;D project on language technologies applied to interpreting. It is the ‘seed’ of a family of projects on interpreting technologies which are currently being developed or have just been completed at the Research Institute of Multilingual Language Technol-ogies (IUITLM), University of Malaga.</abstract>
       <url hash="3036bbee">2021.triton-1.3</url>
@@ -56,7 +56,7 @@
       <author><first>Despoina</first><last>Mouratidis</last></author>
       <author><first>Maria</first><last>Stasimioti</last></author>
       <author><first>Vilelmini</first><last>Sosoni</last></author>
-      <author><first>Katia Lida</first><last>Kermanidis</last></author>
+      <author id="katia-lida-kermanidis"><first>Katia Lida</first><last>Kermanidis</last></author>
       <pages>37–47</pages>
       <abstract>Due to the wide-spread development of Machine Translation (MT) systems –especially Neural Machine Translation (NMT) systems– MT evaluation, both automatic and human, has become more and more important as it helps us establish how MT systems perform. Yet, automatic evaluation metrics have lagged behind, as the most popular choices (e.g., BLEU, METEOR and ROUGE) may correlate poorly with human judgments. This paper seeks to put to the test an evaluation model based on a novel deep learning schema (NoDeeLe) used to compare two NMT systems on four different text genres, i.e. medical, legal, marketing and literary in the English-Greek language pair. The model utilizes information from the source segments, the MT outputs and the reference translation, as well as the automatic metrics BLEU, METEOR and WER. The proposed schema achieves a strong correlation with human judgment (78% average accuracy for the four texts with the highest accuracy, i.e. 85%, observed in the case of the marketing text), while it outperforms classic machine learning algorithms and automatic metrics.</abstract>
       <url hash="47af1c78">2021.triton-1.5</url>
@@ -65,7 +65,7 @@
     <paper id="6">
       <title><fixed-case>BLEU</fixed-case>, <fixed-case>METEOR</fixed-case>, <fixed-case>BERTS</fixed-case>core: Evaluation of Metrics Performance in Assessing Critical Translation Errors in Sentiment-Oriented Text</title>
       <author><first>Hadeel</first><last>Saadany</last></author>
-      <author><first>Constantin</first><last>Orasan</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orasan</last></author>
       <pages>48–56</pages>
       <abstract>Social media companies as well as censorship authorities make extensive use of artificial intelligence (AI) tools to monitor postings of hate speech, celebrations of violence or profanity. Since AI software requires massive volumes of data to train computers, automatic-translation of the online content is usually implemented to compensate for the scarcity of text in some languages. However, machine translation (MT) mistakes are a regular occurrence when translating sentiment-oriented user-generated content (UGC), especially when a low-resource language is involved. In such scenarios, the adequacy of the whole process relies on the assumption that the translation can be evaluated correctly. In this paper, we assess the ability of automatic quality metrics to detect critical machine translation errors which can cause serious misunderstanding of the affect message. We compare the performance of three canonical metrics on meaningless translations as compared to meaningful translations with a critical error that distorts the overall sentiment of the source text. We demonstrate the need for the fine-tuning of automatic metrics to make them more robust in detecting sentiment critical errors.</abstract>
       <url hash="4ffe93ea">2021.triton-1.6</url>
@@ -101,7 +101,7 @@
       <title>Cross-Lingual Named Entity Recognition via <fixed-case>F</fixed-case>ast<fixed-case>A</fixed-case>lign: a Case Study</title>
       <author><first>Ali</first><last>Hatami</last></author>
       <author><first>Ruslan</first><last>Mitkov</last></author>
-      <author><first>Gloria</first><last>Corpas Pastor</last></author>
+      <author id="gloria-corpas-pastor"><first>Gloria</first><last>Corpas Pastor</last></author>
       <pages>85–92</pages>
       <abstract>Named Entity Recognition is an essential task in natural language processing to detect entities and classify them into predetermined categories. An entity is a meaningful word, or phrase that refers to proper nouns. Named Entities play an important role in different NLP tasks such as Information Extraction, Question Answering and Machine Translation. In Machine Translation, named entities often cause translation failures regardless of local context, affecting the output quality of translation. Annotating named entities is a time-consuming and expensive process especially for low-resource languages. One solution for this problem is to use word alignment methods in bilingual parallel corpora in which just one side has been annotated. The goal is to extract named entities in the target language by using the annotated corpus of the source language. In this paper, we compare the performance of two alignment methods, Grow-diag-final-and and Intersect Symmetrisation heuristics, to exploit the annotation projection of English-Brazilian Portuguese bilingual corpus to detect named entities in Brazilian Portuguese. A NER model that is trained on annotated data extracted from the alignment methods, is used to evaluate the performance of aligners. Experimental results show the Intersect Symmetrisation is able to achieve superior performance scores compared to the Grow-diag-final-and heuristic in Brazilian Portuguese.</abstract>
       <url hash="fbef1c62">2021.triton-1.10</url>
@@ -120,7 +120,7 @@
       <title><fixed-case>S</fixed-case>mar<fixed-case>T</fixed-case>erp: A <fixed-case>CAI</fixed-case> System to Support Simultaneous Interpreters in Real-Time</title>
       <author><first>Susana</first><last>Rodriguez</last></author>
       <author><first>Roberto</first><last>Gretter</last></author>
-      <author><first>Marco</first><last>Matassoni</last></author>
+      <author id="marco-matassoni"><first>Marco</first><last>Matassoni</last></author>
       <author><first>Alvaro</first><last>Alonso</last></author>
       <author><first>Oscar</first><last>Corcho</last></author>
       <author><first>Mariano</first><last>Rico</last></author>
@@ -202,7 +202,7 @@
     </paper>
     <paper id="21">
       <title><fixed-case>M</fixed-case>ulti<fixed-case>T</fixed-case>rai<fixed-case>NMT</fixed-case>: Training Materials to Approach Neural Machine Translation from Scratch</title>
-      <author><first>Gema</first><last>Ramírez-Sánchez</last></author>
+      <author id="gema-ramirez-sanchez"><first>Gema</first><last>Ramírez-Sánchez</last></author>
       <author><first>Juan Antonio</first><last>Pérez-Ortiz</last></author>
       <author><first>Felipe</first><last>Sánchez-Martínez</last></author>
       <author><first>Caroline</first><last>Rossi</last></author>
diff --git a/data/xml/2021.udw.xml b/data/xml/2021.udw.xml
index b59ec3de9f..fa6a3027fa 100644
--- a/data/xml/2021.udw.xml
+++ b/data/xml/2021.udw.xml
@@ -19,7 +19,7 @@
     </frontmatter>
     <paper id="1">
       <title>Formae reformandae: for a reorganisation of verb form annotation in <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies illustrated by the specific case of <fixed-case>L</fixed-case>atin</title>
-      <author><first>Flavio Massimiliano</first><last>Cecchini</last></author>
+      <author id="flavio-massimiliano-cecchini"><first>Flavio Massimiliano</first><last>Cecchini</last></author>
       <pages>1–15</pages>
       <url hash="5b8abb73">2021.udw-1.1</url>
       <bibkey>cecchini-2021-formae</bibkey>
@@ -133,7 +133,7 @@
     </paper>
     <paper id="15">
       <title>Date and Time in <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies</title>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <pages>173–193</pages>
       <url hash="88b9470e">2021.udw-1.15</url>
       <bibkey>zeman-2021-date</bibkey>
diff --git a/data/xml/2021.vardial.xml b/data/xml/2021.vardial.xml
index 19319a8dc4..834156923c 100644
--- a/data/xml/2021.vardial.xml
+++ b/data/xml/2021.vardial.xml
@@ -4,9 +4,9 @@
     <meta>
       <booktitle>Proceedings of the Eighth Workshop on NLP for Similar Languages, Varieties and Dialects</booktitle>
       <editor><first>Marcos</first><last>Zampieri</last></editor>
-      <editor><first>Preslav</first><last>Nakov</last></editor>
+      <editor id="preslav-nakov"><first>Preslav</first><last>Nakov</last></editor>
       <editor><first>Nikola</first><last>Ljubešić</last></editor>
-      <editor><first>Jörg</first><last>Tiedemann</last></editor>
+      <editor id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></editor>
       <editor><first>Yves</first><last>Scherrer</last></editor>
       <editor><first>Tommi</first><last>Jauhiainen</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -26,7 +26,7 @@
       <author><first>Radu Tudor</first><last>Ionescu</last></author>
       <author><first>Heidi</first><last>Jauhiainen</last></author>
       <author><first>Tommi</first><last>Jauhiainen</last></author>
-      <author><first>Krister</first><last>Lindén</last></author>
+      <author id="krister-linden"><first>Krister</first><last>Lindén</last></author>
       <author><first>Nikola</first><last>Ljubešić</last></author>
       <author><first>Niko</first><last>Partanen</last></author>
       <author><first>Ruba</first><last>Priyadharshini</last></author>
@@ -56,7 +56,7 @@
       <author><first>Gabriella</first><last>Lapesa</last></author>
       <author><first>Reem</first><last>Alatrash</last></author>
       <author><first>Dominik</first><last>Schlechtweg</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>21–27</pages>
       <abstract>Kiezdeutsch is a variety of German predominantly spoken by teenagers from multi-ethnic urban neighborhoods in casual conversations with their peers. In recent years, the popularity of Kiezdeutsch has increased among young people, independently of their socio-economic origin, and has spread in social media, too. While previous studies have extensively investigated this language variety from a linguistic and qualitative perspective, not much has been done from a quantitative point of view. We perform the first large-scale data-driven analysis of the lexical and morpho-syntactic properties of Kiezdeutsch in comparison with standard German. At the level of results, we confirm predictions of previous qualitative analyses and integrate them with further observations on specific linguistic phenomena such as slang and self-centered speaker attitude. At the methodological level, we provide logistic regression as a framework to perform bottom-up feature selection in order to quantify differences across language varieties.</abstract>
       <url hash="d305633d">2021.vardial-1.3</url>
@@ -103,7 +103,7 @@
     <paper id="8">
       <title>Discriminating Between Similar <fixed-case>N</fixed-case>ordic Languages</title>
       <author><first>René</first><last>Haas</last></author>
-      <author><first>Leon</first><last>Derczynski</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
       <pages>67–75</pages>
       <abstract>Automatic language identification is a challenging problem. Discriminating between closely related languages is especially difficult. This paper presents a machine learning approach for automatic language identification for the Nordic languages, which often suffer miscategorisation by existing state-of-the-art tools. Concretely we will focus on discrimination between six Nordic languages: Danish, Swedish, Norwegian (Nynorsk), Norwegian (Bokmål), Faroese and Icelandic.</abstract>
       <url hash="19384313">2021.vardial-1.8</url>
@@ -113,7 +113,7 @@
       <title>Naive <fixed-case>B</fixed-case>ayes-based Experiments in <fixed-case>R</fixed-case>omanian Dialect Identification</title>
       <author><first>Tommi</first><last>Jauhiainen</last></author>
       <author><first>Heidi</first><last>Jauhiainen</last></author>
-      <author><first>Krister</first><last>Lindén</last></author>
+      <author id="krister-linden"><first>Krister</first><last>Lindén</last></author>
       <pages>76–83</pages>
       <abstract>This article describes the experiments and systems developed by the SUKI team for the second edition of the Romanian Dialect Identification (RDI) shared task which was organized as part of the 2021 VarDial Evaluation Campaign. We submitted two runs to the shared task and our second submission was the overall best submission by a noticeable margin. Our best submission used a character n-gram based naive Bayes classifier with adaptive language models. We describe our experiments on the development set leading to both submissions.</abstract>
       <url hash="584f3059">2021.vardial-1.9</url>
@@ -170,7 +170,7 @@
       <title>N-gram and Neural Models for <fixed-case>U</fixed-case>ralic Language Identification: <fixed-case>NRC</fixed-case> at <fixed-case>V</fixed-case>ar<fixed-case>D</fixed-case>ial 2021</title>
       <author><first>Gabriel</first><last>Bernier-Colborne</last></author>
       <author><first>Serge</first><last>Leger</last></author>
-      <author><first>Cyril</first><last>Goutte</last></author>
+      <author id="cyril-goutte"><first>Cyril</first><last>Goutte</last></author>
       <pages>128–134</pages>
       <abstract>We describe the systems developed by the National Research Council Canada for the Uralic language identification shared task at the 2021 VarDial evaluation campaign. We evaluated two different approaches to this task: a probabilistic classifier exploiting only character 5-grams as features, and a character-based neural network pre-trained through self-supervision, then fine-tuned on the language identification task. The former method turned out to perform better, which casts doubt on the usefulness of deep learning methods for language identification, where they have yet to convincingly and consistently outperform simpler and less costly classification algorithms exploiting n-gram features.</abstract>
       <url hash="65413398">2021.vardial-1.15</url>
diff --git a/data/xml/2021.wanlp.xml b/data/xml/2021.wanlp.xml
index 93c0bb03e4..dc4cd9cd2a 100644
--- a/data/xml/2021.wanlp.xml
+++ b/data/xml/2021.wanlp.xml
@@ -73,7 +73,7 @@
     <paper id="5">
       <title>Kawarith: an <fixed-case>A</fixed-case>rabic <fixed-case>T</fixed-case>witter Corpus for Crisis Events</title>
       <author><first>Alaa</first><last>Alharbi</last></author>
-      <author><first>Mark</first><last>Lee</last></author>
+      <author id="mark-lee"><first>Mark</first><last>Lee</last></author>
       <pages>42–52</pages>
       <abstract>Social media (SM) platforms such as Twitter provide large quantities of real-time data that can be leveraged during mass emergencies. Developing tools to support crisis-affected communities requires available datasets, which often do not exist for low resource languages. This paper introduces Kawarith a multi-dialect Arabic Twitter corpus for crisis events, comprising more than a million Arabic tweets collected during 22 crises that occurred between 2018 and 2020 and involved several types of hazard. Exploration of this content revealed the most discussed topics and information types, and the paper presents a labelled dataset from seven emergency events that serves as a gold standard for several tasks in crisis informatics research. Using annotated data from the same event, a BERT model is fine-tuned to classify tweets into different categories in the multi- label setting. Results show that BERT-based models yield good performance on this task even with small amounts of task-specific training data.</abstract>
       <url hash="210f6219">2021.wanlp-1.5</url>
@@ -91,7 +91,7 @@
     <paper id="7">
       <title><fixed-case>A</fixed-case>rabic Emoji Sentiment Lexicon (<fixed-case>A</fixed-case>rab-<fixed-case>ESL</fixed-case>): A Comparison between <fixed-case>A</fixed-case>rabic and <fixed-case>E</fixed-case>uropean Emoji Sentiment Lexicons</title>
       <author><first>Shatha Ali A.</first><last>Hakami</last></author>
-      <author><first>Robert</first><last>Hendley</last></author>
+      <author id="robert-j-hendley"><first>Robert</first><last>Hendley</last></author>
       <author><first>Phillip</first><last>Smith</last></author>
       <pages>60–71</pages>
       <abstract>Emoji (the popular digital pictograms) are sometimes seen as a new kind of artificial and universally usable and consistent writing code. In spite of their assumed universality, there is some evidence that the sense of an emoji, specifically in regard to sentiment, may change from language to language and culture to culture. This paper investigates whether contextual emoji sentiment analysis is consistent across Arabic and European languages. To conduct this investigation, we, first, created the Arabic emoji sentiment lexicon (Arab-ESL). Then, we exploited an existing European emoji sentiment lexicon to compare the sentiment conveyed in each of the two families of language and culture (Arabic and European). The results show that the pairwise correlation between the two lexicons is consistent for emoji that represent, for instance, hearts, facial expressions, and body language. However, for a subset of emoji (those that represent objects, nature, symbols, and some human activities), there are large differences in the sentiment conveyed. More interestingly, an extremely high level of inconsistency has been shown with food emoji.</abstract>
@@ -223,7 +223,7 @@
     <paper id="19">
       <title>Quranic Verses Semantic Relatedness Using <fixed-case>A</fixed-case>ra<fixed-case>BERT</fixed-case></title>
       <author><first>Abdullah</first><last>Alsaleh</last></author>
-      <author><first>Eric</first><last>Atwell</last></author>
+      <author id="eric-atwell"><first>Eric</first><last>Atwell</last></author>
       <author><first>Abdulrahman</first><last>Altahhan</last></author>
       <pages>185–190</pages>
       <abstract>Bidirectional Encoder Representations from Transformers (BERT) has gained popularity in recent years producing state-of-the-art performances across Natural Language Processing tasks. In this paper, we used AraBERT language model to classify pairs of verses provided by the QurSim dataset to either be semantically related or not. We have pre-processed The QurSim dataset and formed three datasets for comparisons. Also, we have used both versions of AraBERT, which are AraBERTv02 and AraBERTv2, to recognise which version performs the best with the given datasets. The best results was AraBERTv02 with 92% accuracy score using a dataset comprised of label ‘2’ and label '-1’, the latter was generated outside of QurSim dataset.</abstract>
@@ -324,7 +324,7 @@
       <title>Adapting <fixed-case>MARBERT</fixed-case> for Improved <fixed-case>A</fixed-case>rabic Dialect Identification: Submission to the <fixed-case>NADI</fixed-case> 2021 Shared Task</title>
       <author><first>Badr</first><last>AlKhamissi</last></author>
       <author><first>Mohamed</first><last>Gabr</last></author>
-      <author><first>Muhammad</first><last>ElNokrashy</last></author>
+      <author id="muhammad-elnokrashy"><first>Muhammad</first><last>ElNokrashy</last></author>
       <author><first>Khaled</first><last>Essam</last></author>
       <pages>260–264</pages>
       <abstract>In this paper, we tackle the Nuanced Arabic Dialect Identification (NADI) shared task (Abdul-Mageed et al., 2021) and demonstrate state-of-the-art results on all of its four subtasks. Tasks are to identify the geographic origin of short Dialectal (DA) and Modern Standard Arabic (MSA) utterances at the levels of both country and province. Our final model is an ensemble of variants built on top of MARBERT that achieves an F1-score of 34.03% for DA at the country-level development set—an improvement of 7.63% from previous work.</abstract>
@@ -357,7 +357,7 @@
       <author><first>Elsayed</first><last>Issa</last></author>
       <author><first>Mohammed</first><last>AlShakhori1</last></author>
       <author><first>Reda</first><last>Al-Bahrani</last></author>
-      <author><first>Gus</first><last>Hahn-Powell</last></author>
+      <author id="gus-hahn-powell"><first>Gus</first><last>Hahn-Powell</last></author>
       <pages>276–281</pages>
       <abstract>This work investigates the value of augmenting recurrent neural networks with feature engineering for the Second Nuanced Arabic Dialect Identification (NADI) Subtask 1.2: Country-level DA identification. We compare the performance of a simple word-level LSTM using pretrained embeddings with one enhanced using feature embeddings for engineered linguistic features. Our results show that the addition of explicit features to the LSTM is detrimental to performance. We attribute this performance loss to the bivalency of some linguistic items in some text, ubiquity of topics, and participant mobility.</abstract>
       <url hash="56b4a547">2021.wanlp-1.32</url>
@@ -378,7 +378,7 @@
     <paper id="34">
       <title>Machine Learning-Based Approach for <fixed-case>A</fixed-case>rabic Dialect Identification</title>
       <author><first>Hamada</first><last>Nayel</last></author>
-      <author><first>Ahmed</first><last>Hassan</last></author>
+      <author id="ahmed-hassan"><first>Ahmed</first><last>Hassan</last></author>
       <author><first>Mahmoud</first><last>Sobhi</last></author>
       <author><first>Ahmed</first><last>El-Sawy</last></author>
       <pages>287–290</pages>
@@ -424,7 +424,7 @@
     <paper id="39">
       <title>Multi-task Learning Using a Combination of Contextualised and Static Word Embeddings for <fixed-case>A</fixed-case>rabic Sarcasm Detection and Sentiment Analysis</title>
       <author><first>Abdullah I.</first><last>Alharbi</last></author>
-      <author><first>Mark</first><last>Lee</last></author>
+      <author id="mark-lee"><first>Mark</first><last>Lee</last></author>
       <pages>318–322</pages>
       <abstract>Sarcasm detection and sentiment analysis are important tasks in Natural Language Understanding. Sarcasm is a type of expression where the sentiment polarity is flipped by an interfering factor. In this study, we exploited this relationship to enhance both tasks by proposing a multi-task learning approach using a combination of static and contextualised embeddings. Our proposed system achieved the best result in the sarcasm detection subtask.</abstract>
       <url hash="b3a69c39">2021.wanlp-1.39</url>
@@ -508,7 +508,7 @@
     <paper id="47">
       <title>Leveraging Offensive Language for Sarcasm and Sentiment Detection in <fixed-case>A</fixed-case>rabic</title>
       <author><first>Fatemah</first><last>Husain</last></author>
-      <author><first>Ozlem</first><last>Uzuner</last></author>
+      <author id="ozlem-uzuner"><first>Ozlem</first><last>Uzuner</last></author>
       <pages>364–369</pages>
       <abstract>Sarcasm detection is one of the top challenging tasks in text classification, particularly for informal Arabic with high syntactic and semantic ambiguity. We propose two systems that harness knowledge from multiple tasks to improve the performance of the classifier. This paper presents the systems used in our participation to the two sub-tasks of the Sixth Arabic Natural Language Processing Workshop (WANLP); Sarcasm Detection and Sentiment Analysis. Our methodology is driven by the hypothesis that tweets with negative sentiment and tweets with sarcasm content are more likely to have offensive content, thus, fine-tuning the classification model using large corpus of offensive language, supports the learning process of the model to effectively detect sentiment and sarcasm contents. Results demonstrate the effectiveness of our approach for sarcasm detection task over sentiment analysis task.</abstract>
       <url hash="91852a85">2021.wanlp-1.47</url>
diff --git a/data/xml/2021.wassa.xml b/data/xml/2021.wassa.xml
index 23188fd70b..2a18afbf7c 100644
--- a/data/xml/2021.wassa.xml
+++ b/data/xml/2021.wassa.xml
@@ -3,13 +3,13 @@
   <volume id="1" ingest-date="2021-04-19" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Eleventh Workshop on Computational Approaches to Subjectivity, Sentiment and Social Media Analysis</booktitle>
-      <editor><first>Orphee</first><last>De Clercq</last></editor>
-      <editor><first>Alexandra</first><last>Balahur</last></editor>
+      <editor id="orphee-de-clercq"><first>Orphee</first><last>De Clercq</last></editor>
+      <editor id="alexandra-balahur"><first>Alexandra</first><last>Balahur</last></editor>
       <editor><first>Joao</first><last>Sedoc</last></editor>
       <editor><first>Valentin</first><last>Barriere</last></editor>
       <editor><first>Shabnam</first><last>Tafreshi</last></editor>
-      <editor><first>Sven</first><last>Buechel</last></editor>
-      <editor><first>Veronique</first><last>Hoste</last></editor>
+      <editor id="sven-buechel"><first>Sven</first><last>Buechel</last></editor>
+      <editor id="veronique-hoste"><first>Veronique</first><last>Hoste</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Online</address>
       <month>April</month>
@@ -65,7 +65,7 @@
     <paper id="5">
       <title>Emotion Ratings: How Intensity, Annotation Confidence and Agreements are Entangled</title>
       <author><first>Enrica</first><last>Troiano</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <author><first>Roman</first><last>Klinger</last></author>
       <pages>40–49</pages>
       <abstract>When humans judge the affective content of texts, they also implicitly assess the correctness of such judgment, that is, their confidence. We hypothesize that people’s (in)confidence that they performed well in an annotation task leads to (dis)agreements among each other. If this is true, confidence may serve as a diagnostic tool for systematic differences in annotations. To probe our assumption, we conduct a study on a subset of the Corpus of Contemporary American English, in which we ask raters to distinguish neutral sentences from emotion-bearing ones, while scoring the confidence of their answers. Confidence turns out to approximate inter-annotator disagreements. Further, we find that confidence is correlated to emotion intensity: perceiving stronger affect in text prompts annotators to more certain classification performances. This insight is relevant for modelling studies of intensity, as it opens the question wether automatic regressors or classifiers actually predict intensity, or rather human’s self-perceived confidence.</abstract>
@@ -75,7 +75,7 @@
     <paper id="6">
       <title>Disentangling Document Topic and Author Gender in Multiple Languages: Lessons for Adversarial Debiasing</title>
       <author><first>Erenay</first><last>Dayanik</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <pages>50–61</pages>
       <abstract>Text classification is a central tool in NLP. However, when the target classes are strongly correlated with other textual attributes, text classification models can pick up “wrong” features, leading to bad generalization and biases. In social media analysis, this problem surfaces for demographic user classes such as language, topic, or gender, which influence the generate text to a substantial extent. Adversarial training has been claimed to mitigate this problem, but thorough evaluation is missing. In this paper, we experiment with text classification of the correlated attributes of document topic and author gender, using a novel multilingual parallel corpus of TED talk transcripts. Our findings are: (a) individual classifiers for topic and author gender are indeed biased; (b) debiasing with adversarial training works for topic, but breaks down for author gender; (c) gender debiasing results differ across languages. We interpret the result in terms of feature space overlap, highlighting the role of linguistic surface realization of the target classes.</abstract>
       <url hash="e5197f67">2021.wassa-1.6</url>
@@ -183,7 +183,7 @@
       <author><first>Ilia</first><last>Markov</last></author>
       <author><first>Nikola</first><last>Ljubešić</last></author>
       <author><first>Darja</first><last>Fišer</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>149–159</pages>
       <abstract>In this paper, we describe experiments designed to evaluate the impact of stylometric and emotion-based features on hate speech detection: the task of classifying textual content into hate or non-hate speech classes. Our experiments are conducted for three languages – English, Slovene, and Dutch – both in in-domain and cross-domain setups, and aim to investigate hate speech using features that model two linguistic phenomena: the writing style of hateful social media content operationalized as function word usage on the one hand, and emotion expression in hateful messages on the other hand. The results of experiments with features that model different combinations of these phenomena support our hypothesis that stylometric and emotion-based features are robust indicators of hate speech. Their contribution remains persistent with respect to domain and language variation. We show that the combination of features that model the targeted phenomena outperforms words and character n-gram features under cross-domain conditions, and provides a significant boost to deep learning models, which currently obtain the best results, when combined with them in an ensemble.</abstract>
       <url hash="f428f44d">2021.wassa-1.16</url>
diff --git a/data/xml/2021.wat.xml b/data/xml/2021.wat.xml
index ae146c316a..54a0409606 100644
--- a/data/xml/2021.wat.xml
+++ b/data/xml/2021.wat.xml
@@ -14,14 +14,14 @@
       <editor><first>Hiroshi</first><last>Manabe</last></editor>
       <editor><first>Win Pa</first><last>Pa</last></editor>
       <editor><first>Shantipriya</first><last>Parida</last></editor>
-      <editor><first>Ondřej</first><last>Bojar</last></editor>
+      <editor id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></editor>
       <editor><first>Chenhui</first><last>Chu</last></editor>
       <editor><first>Akiko</first><last>Eriguchi</last></editor>
       <editor><first>Kaori</first><last>Abe</last></editor>
       <editor><first>Yusuke</first><last>Oda</last></editor>
       <editor><first>Katsuhito</first><last>Sudoh</last></editor>
       <editor><first>Sadao</first><last>Kurohashi</last></editor>
-      <editor><first>Pushpak</first><last>Bhattacharyya</last></editor>
+      <editor id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Online</address>
       <month>August</month>
@@ -85,7 +85,7 @@
       <title><fixed-case>NICT</fixed-case>’s Neural Machine Translation Systems for the <fixed-case>WAT</fixed-case>21 Restricted Translation Task</title>
       <author><first>Zuchao</first><last>Li</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Hai</first><last>Zhao</last></author>
       <pages>62–67</pages>
       <abstract>This paper describes our system (Team ID: nictrb) for participating in the WAT’21 restricted machine translation task. In our submitted system, we designed a new training approach for restricted machine translation. By sampling from the translation target, we can solve the problem that ordinary training data does not have a restricted vocabulary. With the further help of constrained decoding in the inference phase, we achieved better results than the baseline, confirming the effectiveness of our solution. In addition, we also tried the vanilla and sparse Transformer as the backbone network of the model, as well as model ensembling, which further improved the final translation performance.</abstract>
@@ -140,7 +140,7 @@
     <paper id="8">
       <title><fixed-case>NICT</fixed-case>-2 Translation System at <fixed-case>WAT</fixed-case>-2021: Applying a Pretrained Multilingual Encoder-Decoder Model to Low-resource Language Pairs</title>
       <author><first>Kenji</first><last>Imamura</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>90–95</pages>
       <abstract>In this paper, we present the NICT system (NICT-2) submitted to the NICT-SAP shared task at the 8th Workshop on Asian Translation (WAT-2021). A feature of our system is that we used a pretrained multilingual BART (Bidirectional and Auto-Regressive Transformer; mBART) model. Because publicly available models do not support some languages in the NICT-SAP task, we added these languages to the mBART model and then trained it using monolingual corpora extracted from Wikipedia. We fine-tuned the expanded mBART model using the parallel corpora specified by the NICT-SAP task. The BLEU scores greatly improved in comparison with those of systems without the pretrained model, including the additional languages.</abstract>
       <url hash="12623772">2021.wat-1.8</url>
@@ -168,7 +168,7 @@
       <author><first>Chanhee</first><last>Lee</last></author>
       <author><first>Hyeonseok</first><last>Moon</last></author>
       <author><first>Sugyeong</first><last>Eo</last></author>
-      <author><first>Heuiseok</first><last>Lim</last></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last></author>
       <pages>106–116</pages>
       <abstract>With the growing popularity of smart speakers, such as Amazon Alexa, speech is becoming one of the most important modes of human-computer interaction. Automatic speech recognition (ASR) is arguably the most critical component of such systems, as errors in speech recognition propagate to the downstream components and drastically degrade the user experience. A simple and effective way to improve the speech recognition accuracy is to apply automatic post-processor to the recognition result. However, training a post-processor requires parallel corpora created by human annotators, which are expensive and not scalable. To alleviate this problem, we propose Back TranScription (BTS), a denoising-based method that can create such corpora without human labor. Using a raw corpus, BTS corrupts the text using Text-to-Speech (TTS) and Speech-to-Text (STT) systems. Then, a post-processing model can be trained to reconstruct the original text given the corrupted input. Quantitative and qualitative evaluations show that a post-processor trained using our approach is highly effective in fixing non-trivial speech recognition errors such as mishandling foreign words. We present the generated parallel corpus and post-processing platform to make our results publicly available.</abstract>
       <url hash="523f9b77">2021.wat-1.10</url>
@@ -252,7 +252,7 @@
       <author><first>Abdullah Faiz Ur Rahman</first><last>Khilji</last></author>
       <author><first>Darsh</first><last>Kaushik</last></author>
       <author><first>Partha</first><last>Pakray</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>155–160</pages>
       <abstract>Machine translation performs automatic translation from one natural language to another. Neural machine translation attains a state-of-the-art approach in machine translation, but it requires adequate training data, which is a severe problem for low-resource language pairs translation. The concept of multimodal is introduced in neural machine translation (NMT) by merging textual features with visual features to improve low-resource pair translation. WAT2021 (Workshop on Asian Translation 2021) organizes a shared task of multimodal translation for English to Hindi. We have participated the same with team name CNLP-NITS-PP in two submissions: multimodal and text-only NMT. This work investigates phrase pairs injection via data augmentation approach and attains improvement over our previous work at WAT2020 on the same task in both text-only and multimodal NMT. We have achieved second rank on the challenge test set for English to Hindi multimodal translation where Bilingual Evaluation Understudy (BLEU) score of 39.28, Rank-based Intuitive Bilingual Evaluation Score (RIBES) 0.792097, and Adequacy-Fluency Metrics (AMFM) score 0.830230 respectively.</abstract>
       <url hash="cfceed20">2021.wat-1.17</url>
@@ -298,7 +298,7 @@
       <title>Optimal Word Segmentation for Neural Machine Translation into <fixed-case>D</fixed-case>ravidian Languages</title>
       <author><first>Prajit</first><last>Dhar</last></author>
       <author><first>Arianna</first><last>Bisazza</last></author>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <pages>181–190</pages>
       <abstract>Dravidian languages, such as Kannada and Tamil, are notoriously difficult to translate by state-of-the-art neural models. This stems from the fact that these languages are morphologically very rich as well as being low-resourced. In this paper, we focus on subword segmentation and evaluate Linguistically Motivated Vocabulary Reduction (LMVR) against the more commonly used SentencePiece (SP) for the task of translating from English into four different Dravidian languages. Additionally we investigate the optimal subword vocabulary size for each language. We find that SP is the overall best choice for segmentation, and that larger dictionary sizes lead to higher translation quality.</abstract>
       <url hash="61626921">2021.wat-1.21</url>
@@ -310,7 +310,7 @@
       <author><first>Rahul</first><last>Aralikatte</last></author>
       <author><first>Miryam</first><last>de Lhoneux</last></author>
       <author><first>Anoop</first><last>Kunchukuttan</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>191–197</pages>
       <abstract>This work introduces Itihasa, a large-scale translation dataset containing 93,000 pairs of Sanskrit shlokas and their English translations. The shlokas are extracted from two Indian epics viz., The Ramayana and The Mahabharata. We first describe the motivation behind the curation of such a dataset and follow up with empirical analysis to bring out its nuances. We then benchmark the performance of standard translation models on this corpus and show that even state-of-the-art transformer architectures perform poorly, emphasizing the complexity of the dataset.</abstract>
       <url hash="544fabc2">2021.wat-1.22</url>
@@ -336,7 +336,7 @@
       <author><first>Miryam</first><last>de Lhoneux</last></author>
       <author><first>Daniel</first><last>Hershcovich</last></author>
       <author><first>Marcel</first><last>Bollmann</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>205–211</pages>
       <abstract>This work shows that competitive translation results can be obtained in a constrained setting by incorporating the latest advances in memory and compute optimization. We train and evaluate large multilingual translation models using a single GPU for a maximum of 100 hours and get within 4-5 BLEU points of the top submission on the leaderboard. We also benchmark standard baselines on the PMI corpus and re-discover well-known shortcomings of translation systems and metrics.</abstract>
       <url hash="397e106b">2021.wat-1.24</url>
@@ -347,7 +347,7 @@
       <title><fixed-case>IIIT</fixed-case> Hyderabad Submission To <fixed-case>WAT</fixed-case> 2021: Efficient Multilingual <fixed-case>NMT</fixed-case> systems for <fixed-case>I</fixed-case>ndian languages</title>
       <author><first>Sourav</first><last>Kumar</last></author>
       <author><first>Salil</first><last>Aggarwal</last></author>
-      <author><first>Dipti</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti</first><last>Sharma</last></author>
       <pages>212–216</pages>
       <abstract>This paper describes the work and the systems submitted by the IIIT-Hyderbad team in the WAT 2021 MultiIndicMT shared task. The task covers 10 major languages of the Indian subcontinent. For the scope of this task, we have built multilingual systems for 20 translation directions namely English-Indic (one-to- many) and Indic-English (many-to-one). Individually, Indian languages are resource poor which hampers translation quality but by leveraging multilingualism and abundant monolingual corpora, the translation quality can be substantially boosted. But the multilingual systems are highly complex in terms of time as well as computational resources. Therefore, we are training our systems by efficiently se- lecting data that will actually contribute to most of the learning process. Furthermore, we are also exploiting the language related- ness found in between Indian languages. All the comparisons were made using BLEU score and we found that our final multilingual sys- tem significantly outperforms the baselines by an average of 11.3 and 19.6 BLEU points for English-Indic (en-xx) and Indic-English (xx- en) directions, respectively.</abstract>
       <url hash="96c7d167">2021.wat-1.25</url>
diff --git a/data/xml/2021.winlp.xml b/data/xml/2021.winlp.xml
index 39ed1435db..70e3d5aabf 100644
--- a/data/xml/2021.winlp.xml
+++ b/data/xml/2021.winlp.xml
@@ -10,7 +10,7 @@
       <editor><first>Kyathi</first><last>Chandu</last></editor>
       <editor><first>Xanda</first><last>Schofield</last></editor>
       <editor><first>Surangika</first><last>Ranathunga</last></editor>
-      <editor><first>Haley</first><last>Lepp</last></editor>
+      <editor id="haley-lepp"><first>Haley</first><last>Lepp</last></editor>
       <editor><first>Tirthankar</first><last>Ghosal</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Punta Cana, Dominican Republic</address>
@@ -55,7 +55,7 @@
       <author><first>Diana</first><last>Todea</last></author>
       <author><first>Liviu</first><last>Fodor</last></author>
       <author><first>Andreea</first><last>Luca</last></author>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <author><first>Rareș</first><last>Boian</last></author>
       <pages>21–24</pages>
       <abstract>In the current study, we analyzed 15297 texts from 39 cancer survivors who posted or commented on Reddit in order to detect the language particularities of cancer survivors from online discourse. We performed a computational linguistic analysis (part-of-speech analysis, emoji detection, sentiment analysis) on submissions around the time of the cancer diagnosis and around the time of remission. We found several significant differences in the texts posted around the time of remission compared to those around the time of diagnosis. Though our results need to be backed up by a higher corpus of data, they do cue to the fact that cancer survivors, around the time of remission, focus more on others, are more active on social media, and do not see the glass as half empty as suggested by the valence of the emojis.</abstract>
diff --git a/data/xml/2021.wmt.xml b/data/xml/2021.wmt.xml
index 2df80ca98f..667a5dd0fd 100644
--- a/data/xml/2021.wmt.xml
+++ b/data/xml/2021.wmt.xml
@@ -4,23 +4,23 @@
     <meta>
       <booktitle>Proceedings of the Sixth Conference on Machine Translation</booktitle>
       <editor><first>Loic</first><last>Barrault</last></editor>
-      <editor><first>Ondrej</first><last>Bojar</last></editor>
+      <editor id="ondrej-bojar"><first>Ondrej</first><last>Bojar</last></editor>
       <editor><first>Fethi</first><last>Bougares</last></editor>
-      <editor><first>Rajen</first><last>Chatterjee</last></editor>
+      <editor id="rajen-chatterjee"><first>Rajen</first><last>Chatterjee</last></editor>
       <editor><first>Marta R.</first><last>Costa-jussa</last></editor>
       <editor><first>Christian</first><last>Federmann</last></editor>
       <editor><first>Mark</first><last>Fishel</last></editor>
-      <editor><first>Alexander</first><last>Fraser</last></editor>
+      <editor id="alexander-fraser"><first>Alexander</first><last>Fraser</last></editor>
       <editor><first>Markus</first><last>Freitag</last></editor>
       <editor><first>Yvette</first><last>Graham</last></editor>
       <editor><first>Roman</first><last>Grundkiewicz</last></editor>
       <editor><first>Paco</first><last>Guzman</last></editor>
       <editor><first>Barry</first><last>Haddow</last></editor>
       <editor><first>Matthias</first><last>Huck</last></editor>
-      <editor><first>Antonio Jimeno</first><last>Yepes</last></editor>
+      <editor id="antonio-jimeno-yepes"><first>Antonio Jimeno</first><last>Yepes</last></editor>
       <editor><first>Philipp</first><last>Koehn</last></editor>
       <editor><first>Tom</first><last>Kocmi</last></editor>
-      <editor><first>Andre</first><last>Martins</last></editor>
+      <editor id="andre-f-t-martins"><first>Andre</first><last>Martins</last></editor>
       <editor><first>Makoto</first><last>Morishita</last></editor>
       <editor><first>Christof</first><last>Monz</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -66,7 +66,7 @@
       <author><first>Masaaki</first><last>Nagata</last></author>
       <author><first>Ajay</first><last>Nagesh</last></author>
       <author><first>Toshiaki</first><last>Nakazawa</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Santanu</first><last>Pal</last></author>
       <author><first>Allahsera Auguste</first><last>Tapo</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
@@ -87,7 +87,7 @@
       <author><first>Somya</first><last>Jain</last></author>
       <author><first>Douwe</first><last>Kiela</last></author>
       <author><first>Tristan</first><last>Thrush</last></author>
-      <author><first>Francisco</first><last>Guzmán</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzmán</last></author>
       <pages>89–99</pages>
       <abstract>We present the results of the first task on Large-Scale Multilingual Machine Translation. The task consists on the many-to-many evaluation of a single model across a variety of source and target languages. This year, the task consisted on three different settings: (i) SMALL-TASK1 (Central/South-Eastern European Languages), (ii) the SMALL-TASK2 (South-East Asian Languages), and (iii) FULL-TASK (all 101 x 100 language pairs). All the tasks used the FLORES-101 dataset as the evaluation benchmark. To ensure the longevity of the dataset, the test sets were not publicly released and the models were evaluated in a controlled environment on Dynabench. There were a total of 10 participating teams for the tasks, with a total of 151 intermediate model submissions and 13 final models. This year’s result show a significant improvement over the known base-lines with +17.8 BLEU for SMALL-TASK2, +10.6 for FULL-TASK and +3.6 for SMALL-TASK1.</abstract>
       <url hash="23ef8cf5">2021.wmt-1.2</url>
@@ -105,11 +105,11 @@
     <paper id="4">
       <title>The <fixed-case>U</fixed-case>niversity of <fixed-case>E</fixed-case>dinburgh’s <fixed-case>E</fixed-case>nglish-<fixed-case>G</fixed-case>erman and <fixed-case>E</fixed-case>nglish-<fixed-case>H</fixed-case>ausa Submissions to the <fixed-case>WMT</fixed-case>21 News Translation Task</title>
       <author><first>Pinzhen</first><last>Chen</last></author>
-      <author><first>Jindřich</first><last>Helcl</last></author>
+      <author id="jindrich-helcl"><first>Jindřich</first><last>Helcl</last></author>
       <author><first>Ulrich</first><last>Germann</last></author>
       <author><first>Laurie</first><last>Burchell</last></author>
       <author><first>Nikolay</first><last>Bogoychev</last></author>
-      <author><first>Antonio Valerio</first><last>Miceli Barone</last></author>
+      <author id="antonio-valerio-miceli-barone"><first>Antonio Valerio</first><last>Miceli Barone</last></author>
       <author><first>Jonas</first><last>Waldendorf</last></author>
       <author><first>Alexandra</first><last>Birch</last></author>
       <author><first>Kenneth</first><last>Heafield</last></author>
@@ -122,7 +122,7 @@
       <title>Tune in: The <fixed-case>AFRL</fixed-case> <fixed-case>WMT</fixed-case>21 News-Translation Systems</title>
       <author><first>Grant</first><last>Erdmann</last></author>
       <author><first>Jeremy</first><last>Gwinnup</last></author>
-      <author><first>Tim</first><last>Anderson</last></author>
+      <author id="tim-anderson"><first>Tim</first><last>Anderson</last></author>
       <pages>110–116</pages>
       <abstract>This paper describes the Air Force Research Laboratory (AFRL) machine translation sys- tems and the improvements that were developed during the WMT21 evaluation campaign. This year, we explore various methods of adapting our baseline models from WMT20 and again measure improvements in performance on the Russian–English language pair.</abstract>
       <url hash="ac7ec2b4">2021.wmt-1.5</url>
@@ -135,7 +135,7 @@
       <author><first>Christine</first><last>Basta</last></author>
       <author><first>Javier</first><last>Ferrando</last></author>
       <author><first>Marta R.</first><last>Costa-jussa</last></author>
-      <author><first>José A. R.</first><last>Fonollosa</last></author>
+      <author id="jose-a-r-fonollosa"><first>José A. R.</first><last>Fonollosa</last></author>
       <pages>117–122</pages>
       <abstract>This paper describes the submission to the WMT 2021 news translation shared task by the UPC Machine Translation group. The goal of the task is to translate German to French (De-Fr) and French to German (Fr-De). Our submission focuses on fine-tuning a pre-trained model to take advantage of monolingual data. We fine-tune mBART50 using the filtered data, and additionally, we train a Transformer model on the same data from scratch. In the experiments, we show that fine-tuning mBART50 results in 31.69 BLEU for De-Fr and 23.63 BLEU for Fr-De, which increases 2.71 and 1.90 BLEU accordingly, as compared to the model we train from scratch. Our final submission is an ensemble of these two models, further increasing 0.3 BLEU for Fr-De.</abstract>
       <url hash="ec7e9bd5">2021.wmt-1.6</url>
@@ -160,7 +160,7 @@
       <author><first>Jailan S.</first><last>ElMosalami</last></author>
       <author><first>Mohamed</first><last>Afify</last></author>
       <author><first>Ahmed Y.</first><last>Tawfik</last></author>
-      <author><first>Hany</first><last>Hassan Awadalla</last></author>
+      <author id="hany-hassan-awadalla"><first>Hany</first><last>Hassan Awadalla</last></author>
       <pages>130–135</pages>
       <abstract>This paper describes the Microsoft Egypt Development Center (EgDC) submission to the constrained track of WMT21 shared news translation task. We focus on the three relatively low resource language pairs Bengali ↔ Hindi, English ↔ Hausa and Xhosa ↔ Zulu. To overcome the limitation of relatively low parallel data we train a multilingual model using a multitask objective employing both parallel and monolingual data. In addition, we augment the data using back translation. We also train a bilingual model incorporating back translation and knowledge distillation then combine the two models using sequence-to-sequence mapping. We see around 70% relative gain in BLEU point for En ↔ Ha and around 25% relative improvements for Bn ↔ Hi and Xh ↔ Zu compared to bilingual baselines.</abstract>
       <url hash="f235080b">2021.wmt-1.8</url>
@@ -202,7 +202,7 @@
       <title><fixed-case>M</fixed-case>i<fixed-case>SS</fixed-case>@<fixed-case>WMT</fixed-case>21: Contrastive Learning-reinforced Domain Adaptation in Neural Machine Translation</title>
       <author><first>Zuchao</first><last>Li</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Hai</first><last>Zhao</last></author>
       <pages>154–161</pages>
       <abstract>In this paper, we describe our MiSS system that participated in the WMT21 news translation task. We mainly participated in the evaluation of the three translation directions of English-Chinese and Japanese-English translation tasks. In the systems submitted, we primarily considered wider networks, deeper networks, relative positional encoding, and dynamic convolutional networks in terms of model structure, while in terms of training, we investigated contrastive learning-reinforced domain adaptation, self-supervised training, and optimization objective switching training methods. According to the final evaluation results, a deeper, wider, and stronger network can improve translation performance in general, yet our data domain adaption method can improve performance even more. In addition, we found that switching to the use of our proposed objective during the finetune phase using relatively small domain-related data can effectively improve the stability of the model’s convergence and achieve better optimal performance.</abstract>
@@ -229,11 +229,11 @@
     <paper id="15">
       <title>e<fixed-case>T</fixed-case>ranslation’s Submissions to the <fixed-case>WMT</fixed-case> 2021 News Translation Task</title>
       <author><first>Csaba</first><last>Oravecz</last></author>
-      <author><first>Katina</first><last>Bontcheva</last></author>
-      <author><first>David</first><last>Kolovratník</last></author>
+      <author id="katina-bontcheva"><first>Katina</first><last>Bontcheva</last></author>
+      <author id="david-kolovratnik"><first>David</first><last>Kolovratník</last></author>
       <author><first>Bhavani</first><last>Bhaskar</last></author>
       <author><first>Michael</first><last>Jellinghaus</last></author>
-      <author><first>Andreas</first><last>Eisele</last></author>
+      <author id="andreas-eisele"><first>Andreas</first><last>Eisele</last></author>
       <pages>172–179</pages>
       <abstract>The paper describes the 3 NMT models submitted by the eTranslation team to the WMT 2021 news translation shared task. We developed systems in language pairs that are actively used in the European Commission’s eTranslation service. In the WMT news task, recent years have seen a steady increase in the need for computational resources to train deep and complex architectures to produce competitive systems. We took a different approach and explored alternative strategies focusing on data selection and filtering to improve the performance of baseline systems. In the domain constrained task for the French–German language pair our approach resulted in the best system by a significant margin in BLEU. For the other two systems (English–German and English-Czech) we tried to build competitive models using standard best practices.</abstract>
       <url hash="9070a114">2021.wmt-1.15</url>
@@ -329,7 +329,7 @@
     <paper id="22">
       <title><fixed-case>LISN</fixed-case> @ <fixed-case>WMT</fixed-case> 2021</title>
       <author><first>Jitao</first><last>Xu</last></author>
-      <author><first>Minh Quang</first><last>Pham</last></author>
+      <author id="minh-quang-pham"><first>Minh Quang</first><last>Pham</last></author>
       <author><first>Sadaf</first><last>Abdul Rauf</last></author>
       <author><first>François</first><last>Yvon</last></author>
       <pages>232–242</pages>
@@ -345,7 +345,7 @@
       <author><first>Qiu</first><last>Ran</last></author>
       <author><first>Fandong</first><last>Meng</last></author>
       <author><first>Peng</first><last>Li</last></author>
-      <author><first>Jinan</first><last>Xu</last></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last></author>
       <author><first>Jie</first><last>Zhou</last></author>
       <pages>243–254</pages>
       <abstract>This paper introduces WeChat AI’s participation in WMT 2021 shared news translation task on English-&gt;Chinese, English-&gt;Japanese, Japanese-&gt;English and English-&gt;German. Our systems are based on the Transformer (Vaswani et al., 2017) with several novel and effective variants. In our experiments, we employ data filtering, large-scale synthetic data generation (i.e., back-translation, knowledge distillation, forward-translation, iterative in-domain knowledge transfer), advanced finetuning approaches, and boosted Self-BLEU based model ensemble. Our constrained systems achieve 36.9, 46.9, 27.8 and 31.3 case-sensitive BLEU scores on English-&gt;Chinese, English-&gt;Japanese, Japanese-&gt;English and English-&gt;German, respectively. The BLEU scores of English-&gt;Chinese, English-&gt;Japanese and Japanese-&gt;English are the highest among all submissions, and that of English-&gt;German is the highest among all constrained submissions.</abstract>
@@ -420,7 +420,7 @@
       <author><first>Bishwaraj</first><last>Paul</last></author>
       <author><first>Prottay Kumar</first><last>Adhikary</last></author>
       <author><first>Partha</first><last>Pakray</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>284–287</pages>
       <abstract>The neural machine translation approach has gained popularity in machine translation because of its context analysing ability and its handling of long-term dependency issues. We have participated in the WMT21 shared task of similar language translation on a Tamil-Telugu pair with the team name: CNLP-NITS. In this task, we utilized monolingual data via pre-train word embeddings in transformer model based neural machine translation to tackle the limitation of parallel corpus. Our model has achieved a bilingual evaluation understudy (BLEU) score of 4.05, rank-based intuitive bilingual evaluation score (RIBES) score of 24.80 and translation edit rate (TER) score of 97.24 for both Tamil-to-Telugu and Telugu-to-Tamil translations respectively.</abstract>
       <url hash="49727dd6">2021.wmt-1.29</url>
@@ -429,7 +429,7 @@
     <paper id="30">
       <title>Low Resource Similar Language Neural Machine Translation for <fixed-case>T</fixed-case>amil-<fixed-case>T</fixed-case>elugu</title>
       <author><first>Vandan</first><last>Mujadia</last></author>
-      <author><first>Dipti</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti</first><last>Sharma</last></author>
       <pages>288–291</pages>
       <abstract>This paper describes the participation of team oneNLP (LTRC, IIIT-Hyderabad) for the WMT 2021 task, similar language translation. We experimented with transformer based Neural Machine Translation and explored the use of language similarity for Tamil-Telugu and Telugu-Tamil. We incorporated use of different subword configurations, script conversion and single model training for both directions as exploratory experiments.</abstract>
       <url hash="2c9400a1">2021.wmt-1.30</url>
@@ -447,7 +447,7 @@
       <title><fixed-case>NITK</fixed-case>-<fixed-case>U</fixed-case>o<fixed-case>H</fixed-case>: <fixed-case>T</fixed-case>amil-<fixed-case>T</fixed-case>elugu Machine Translation Systems for the <fixed-case>WMT</fixed-case>21 Similar Language Translation Task</title>
       <author><first>Richard</first><last>Saldanha</last></author>
       <author><first>Ananthanarayana</first><last>V. S</last></author>
-      <author><first>Anand Kumar</first><last>M</last></author>
+      <author id="anand-kumar-m"><first>Anand Kumar</first><last>M</last></author>
       <author><first>Parameswari</first><last>Krishnamurthy</last></author>
       <pages>299–303</pages>
       <abstract>In this work, two Neural Machine Translation (NMT) systems have been developed and evaluated as part of the bidirectional Tamil-Telugu similar languages translation subtask in WMT21. The OpenNMT-py toolkit has been used to create quick prototypes of the systems, following which models have been trained on the training datasets containing the parallel corpus and finally the models have been evaluated on the dev datasets provided as part of the task. Both the systems have been trained on a DGX station with 4 -V100 GPUs. The first NMT system in this work is a Transformer based 6 layer encoder-decoder model, trained for 100000 training steps, whose configuration is similar to the one provided by OpenNMT-py and this is used to create a model for bidirectional translation. The second NMT system contains two unidirectional translation models with the same configuration as the first system, with the addition of utilizing Byte Pair Encoding (BPE) for subword tokenization through the pre-trained MultiBPEmb model. Based on the dev dataset evaluation metrics for both the systems, the first system i.e. the vanilla Transformer model has been submitted as the Primary system. Since there were no improvements in the metrics during training of the second system with BPE, it has been submitted as a contrastive system.</abstract>
@@ -458,7 +458,7 @@
     <paper id="33">
       <title>A3-108 Machine Translation System for Similar Language Translation Shared Task 2021</title>
       <author><first>Saumitra</first><last>Yadav</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>304–306</pages>
       <abstract>In this paper, we describe our submissions for the Similar Language Translation Shared Task 2021. We built 3 systems in each direction for the Tamil ⇐⇒ Telugu language pair. This paper outlines experiments with various tokenization schemes to train statistical models. We also report the configuration of the submitted systems and results produced by them.</abstract>
       <url hash="a7cd9acd">2021.wmt-1.33</url>
@@ -524,7 +524,7 @@
       <author><first>Huan</first><last>Liu</last></author>
       <author><first>Junpeng</first><last>Liu</last></author>
       <author><first>Kaiyu</first><last>Huang</last></author>
-      <author><first>Degen</first><last>Huang</last></author>
+      <author id="degen-huang"><first>Degen</first><last>Huang</last></author>
       <pages>331–335</pages>
       <abstract>This paper describes DUT-NLP Lab’s submission to the WMT-21 triangular machine translation shared task. The participants are not allowed to use other data and the translation direction of this task is Russian-to-Chinese. In this task, we use the Transformer as our baseline model, and integrate several techniques to enhance the performance of the baseline, including data filtering, data selection, fine-tuning, and post-editing. Further, to make use of the English resources, such as Russian/English and Chinese/English parallel data, the relationship triangle is constructed by multilingual neural machine translation systems. As a result, our submission achieves a BLEU score of 21.9 in Russian-to-Chinese.</abstract>
       <url hash="6de7afd6">2021.wmt-1.38</url>
@@ -533,7 +533,7 @@
     <paper id="39">
       <title>Pivot Based Transfer Learning for Neural Machine Translation: <fixed-case>CFILT</fixed-case> <fixed-case>IITB</fixed-case> @ <fixed-case>WMT</fixed-case> 2021 Triangular <fixed-case>MT</fixed-case></title>
       <author><first>Shivam</first><last>Mhaskar</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>336–340</pages>
       <abstract>In this paper, we discuss the various techniques that we used to implement the Russian-Chinese machine translation system for the Triangular MT task at WMT 2021. Neural Machine translation systems based on transformer architecture have an encoder-decoder architecture, which are trained end-to-end and require a large amount of parallel corpus to produce good quality translations. This is the reason why neural machine translation systems are referred to as <i>data hungry</i>. Such a large amount of parallel corpus is majorly available for language pairs which include English and not for non-English language pairs. This is a major problem in building neural machine translation systems for non-English language pairs. We try to utilize the resources of the English language to improve the translation of non-English language pairs. We use the pivot language, that is English, to leverage transfer learning to improve the quality of Russian-Chinese translation. Compared to the baseline transformer-based neural machine translation system, we observe that the pivot language-based transfer learning technique gives a higher BLEU score.</abstract>
       <url hash="61816525">2021.wmt-1.39</url>
@@ -563,7 +563,7 @@
       <author><first>Josef</first><last>Jon</last></author>
       <author><first>Michal</first><last>Novák</last></author>
       <author><first>João Paulo</first><last>Aires</last></author>
-      <author><first>Dusan</first><last>Varis</last></author>
+      <author id="dusan-varis"><first>Dusan</first><last>Varis</last></author>
       <author><first>Ondřej</first><last>Bojar</last></author>
       <pages>354–361</pages>
       <abstract>This paper describes Charles University sub-mission for Terminology translation shared task at WMT21. The objective of this task is to design a system which translates certain terms based on a provided terminology database, while preserving high overall translation quality. We competed in English-French language pair. Our approach is based on providing the desired translations alongside the input sentence and training the model to use these provided terms. We lemmatize the terms both during the training and inference, to allow the model to learn how to produce correct surface forms of the words, when they differ from the forms provided in the terminology database.</abstract>
@@ -573,7 +573,7 @@
     <paper id="43">
       <title>Transfer Learning with Shallow Decoders: <fixed-case>BSC</fixed-case> at <fixed-case>WMT</fixed-case>2021’s Multilingual Low-Resource Translation for <fixed-case>I</fixed-case>ndo-<fixed-case>E</fixed-case>uropean Languages Shared Task</title>
       <author><first>Ksenia</first><last>Kharitonova</last></author>
-      <author><first>Ona</first><last>de Gibert Bonet</last></author>
+      <author id="ona-de-gibert"><first>Ona</first><last>de Gibert Bonet</last></author>
       <author><first>Jordi</first><last>Armengol-Estapé</last></author>
       <author><first>Mar</first><last>Rodriguez i Alvarez</last></author>
       <author><first>Maite</first><last>Melero</last></author>
@@ -585,8 +585,8 @@
     <paper id="44">
       <title><fixed-case>E</fixed-case>din<fixed-case>S</fixed-case>aar@<fixed-case>WMT</fixed-case>21: <fixed-case>N</fixed-case>orth-<fixed-case>G</fixed-case>ermanic Low-Resource Multilingual <fixed-case>NMT</fixed-case></title>
       <author><first>Svetlana</first><last>Tchistiakova</last></author>
-      <author><first>Jesujoba</first><last>Alabi</last></author>
-      <author><first>Koel</first><last>Dutta Chowdhury</last></author>
+      <author id="jesujoba-alabi"><first>Jesujoba</first><last>Alabi</last></author>
+      <author id="koel-dutta-chowdhury"><first>Koel</first><last>Dutta Chowdhury</last></author>
       <author><first>Sourav</first><last>Dutta</last></author>
       <author><first>Dana</first><last>Ruiter</last></author>
       <pages>368–375</pages>
@@ -601,7 +601,7 @@
       <author><first>Wanying</first><last>Xie</last></author>
       <author><first>Ambyera</first><last>Han</last></author>
       <author><first>Pan</first><last>Liu</last></author>
-      <author><first>Jinan</first><last>Xu</last></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last></author>
       <author><first>Qi</first><last>Ju</last></author>
       <pages>376–382</pages>
       <abstract>This paper describes TenTrans’ submission to WMT21 Multilingual Low-Resource Translation shared task for the Romance language pairs. This task focuses on improving translation quality from Catalan to Occitan, Romanian and Italian, with the assistance of related high-resource languages. We mainly utilize back-translation, pivot-based methods, multilingual models, pre-trained model fine-tuning, and in-domain knowledge transfer to improve the translation quality. On the test set, our best-submitted system achieves an average of 43.45 case-sensitive BLEU scores across all low-resource pairs. Our data, code, and pre-trained models used in this work are available in TenTrans evaluation examples.</abstract>
@@ -703,7 +703,7 @@
       <author><first>Shaohan</first><last>Huang</last></author>
       <author><first>Alexandre</first><last>Muzio</last></author>
       <author><first>Saksham</first><last>Singhal</last></author>
-      <author><first>Hany</first><last>Hassan</last></author>
+      <author id="hany-hassan-awadalla"><first>Hany</first><last>Hassan</last></author>
       <author><first>Xia</first><last>Song</last></author>
       <author><first>Furu</first><last>Wei</last></author>
       <pages>446–455</pages>
@@ -757,7 +757,7 @@
       <title>Just Ask! Evaluating Machine Translation by Asking and Answering Questions</title>
       <author><first>Mateusz</first><last>Krubiński</last></author>
       <author><first>Erfan</first><last>Ghadery</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <author><first>Pavel</first><last>Pecina</last></author>
       <pages>495–506</pages>
       <abstract>In this paper, we show that automatically-generated questions and answers can be used to evaluate the quality of Machine Translation (MT) systems. Building on recent work on the evaluation of abstractive text summarization, we propose a new metric for system-level MT evaluation, compare it with other state-of-the-art solutions, and show its robustness by conducting experiments for various MT directions.</abstract>
@@ -787,7 +787,7 @@
       <author><first>Shaxnoza</first><last>Pulatova</last></author>
       <author><first>Duygu</first><last>Ataman</last></author>
       <author><first>Julia</first><last>Kreutzer</last></author>
-      <author><first>Francis</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last></author>
       <author><first>Orhan</first><last>Firat</last></author>
       <author><first>John</first><last>Licato</last></author>
       <author><first>Sriram</first><last>Chellappan</last></author>
@@ -868,8 +868,8 @@
       <author><first>Diptesh</first><last>Kanojia</last></author>
       <author><first>Marina</first><last>Fomicheva</last></author>
       <author><first>Tharindu</first><last>Ranasinghe</last></author>
-      <author><first>Frédéric</first><last>Blain</last></author>
-      <author><first>Constantin</first><last>Orăsan</last></author>
+      <author id="frederic-blain"><first>Frédéric</first><last>Blain</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orăsan</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>625–638</pages>
       <abstract>Current Machine Translation (MT) systems achieve very good results on a growing variety of language pairs and datasets. However, they are known to produce fluent translation outputs that can contain important meaning errors, thus undermining their reliability in practice. Quality Estimation (QE) is the task of automatically assessing the performance of MT systems at test time. Thus, in order to be useful, QE systems should be able to detect such errors. However, this ability is yet to be tested in the current evaluation practices, where QE systems are assessed only in terms of their correlation with human judgements. In this work, we bridge this gap by proposing a general methodology for adversarial testing of QE for MT. First, we show that despite a high correlation with human judgements achieved by the recent SOTA, certain types of meaning errors are still problematic for QE to detect. Second, we show that on average, the ability of a given model to discriminate between meaning-preserving and meaning-altering perturbations is predictive of its overall performance, thus potentially allowing for comparing QE systems without relying on manual quality annotation.</abstract>
@@ -893,8 +893,8 @@
       <author><first>Md Mahfuz Ibn</first><last>Alam</last></author>
       <author><first>Ivana</first><last>Kvapilíková</last></author>
       <author><first>Antonios</first><last>Anastasopoulos</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
-      <author><first>Georgiana</first><last>Dinu</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
+      <author id="georgiana-dinu"><first>Georgiana</first><last>Dinu</last></author>
       <author><first>Marcello</first><last>Federico</last></author>
       <author><first>Matthias</first><last>Gallé</last></author>
       <author><first>Kweonwoo</first><last>Jung</last></author>
@@ -914,12 +914,12 @@
       <author><first>Federica</first><last>Vezzani</last></author>
       <author><first>Amy</first><last>Siu</last></author>
       <author><first>Inigo</first><last>Jauregi Unanue</last></author>
-      <author><first>Maite</first><last>Oronoz</last></author>
+      <author id="maite-oronoz"><first>Maite</first><last>Oronoz</last></author>
       <author><first>Nancy</first><last>Mah</last></author>
-      <author><first>Aurélie</first><last>Névéol</last></author>
-      <author><first>David</first><last>Martinez</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
+      <author id="david-martinez"><first>David</first><last>Martinez</last></author>
       <author><first>Rachel</first><last>Bawden</last></author>
-      <author><first>Giorgio Maria</first><last>Di Nunzio</last></author>
+      <author id="giorgio-maria-di-nunzio"><first>Giorgio Maria</first><last>Di Nunzio</last></author>
       <author><first>Roland</first><last>Roller</last></author>
       <author><first>Philippe</first><last>Thomas</last></author>
       <author><first>Cristian</first><last>Grozea</last></author>
@@ -934,7 +934,7 @@
     <paper id="71">
       <title>Findings of the <fixed-case>WMT</fixed-case> 2021 Shared Task on Quality Estimation</title>
       <author><first>Lucia</first><last>Specia</last></author>
-      <author><first>Frédéric</first><last>Blain</last></author>
+      <author id="frederic-blain"><first>Frédéric</first><last>Blain</last></author>
       <author><first>Marina</first><last>Fomicheva</last></author>
       <author><first>Chrysoula</first><last>Zerva</last></author>
       <author><first>Zhenhao</first><last>Li</last></author>
@@ -963,7 +963,7 @@
       <author><first>Chi-kiu</first><last>Lo</last></author>
       <author><first>Craig</first><last>Stewart</last></author>
       <author><first>George</first><last>Foster</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <author><first>Ondřej</first><last>Bojar</last></author>
       <pages>733–774</pages>
       <abstract>This paper presents the results of the WMT21 Metrics Shared Task. Participants were asked to score the outputs of the translation systems competing in the WMT21 News Translation Task with automatic metrics on two different domains: news and TED talks. All metrics were evaluated on how well they correlate at the system- and segment-level with human ratings. Contrary to previous years’ editions, this year we acquired our own human ratings based on expert-based human evaluation via Multidimensional Quality Metrics (MQM). This setup had several advantages: (i) expert-based evaluation has been shown to be more reliable, (ii) we were able to evaluate all metrics on two different domains using translations of the same MT systems, (iii) we added 5 additional translations coming from the same system during system development. In addition, we designed three challenge sets that evaluate the robustness of all automatic metrics. We present an extensive analysis on how well metrics perform on three language pairs: English to German, English to Russian and Chinese to English. We further show the impact of different reference translations on reference-based metrics and compare our expert-based MQM annotation with the DA scores acquired by WMT.</abstract>
@@ -1081,7 +1081,7 @@
     <paper id="81">
       <title>Dynamic Terminology Integration for <fixed-case>COVID</fixed-case>-19 and Other Emerging Domains</title>
       <author><first>Toms</first><last>Bergmanis</last></author>
-      <author><first>Mārcis</first><last>Pinnis</last></author>
+      <author id="marcis-pinnis"><first>Mārcis</first><last>Pinnis</last></author>
       <pages>821–827</pages>
       <abstract>The majority of language domains require prudent use of terminology to ensure clarity and adequacy of information conveyed. While the correct use of terminology for some languages and domains can be achieved by adapting general-purpose MT systems on large volumes of in-domain parallel data, such quantities of domain-specific data are seldom available for less-resourced languages and niche domains. Furthermore, as exemplified by COVID-19 recently, no domain-specific parallel data is readily available for emerging domains. However, the gravity of this recent calamity created a high demand for reliable translation of critical information regarding pandemic and infection prevention. This work is part of WMT2021 Shared Task: Machine Translation using Terminologies, where we describe Tilde MT systems that are capable of dynamic terminology integration at the time of translation. Our systems achieve up to 94% COVID-19 term use accuracy on the test set of the EN-FR language pair without having access to any form of in-domain information during system training.</abstract>
       <url hash="15d5f494">2021.wmt-1.81</url>
@@ -1092,7 +1092,7 @@
       <author><first>Josef</first><last>Jon</last></author>
       <author><first>Michal</first><last>Novák</last></author>
       <author><first>João Paulo</first><last>Aires</last></author>
-      <author><first>Dusan</first><last>Varis</last></author>
+      <author id="dusan-varis"><first>Dusan</first><last>Varis</last></author>
       <author><first>Ondřej</first><last>Bojar</last></author>
       <pages>828–834</pages>
       <abstract>This paper describes Charles University sub-mission for Terminology translation Shared Task at WMT21. The objective of this task is to design a system which translates certain terms based on a provided terminology database, while preserving high overall translation quality. We competed in English-French language pair. Our approach is based on providing the desired translations alongside the input sentence and training the model to use these provided terms. We lemmatize the terms both during the training and inference, to allow the model to learn how to produce correct surface forms of the words, when they differ from the forms provided in the terminology database. Our submission ranked second in Exact Match metric which evaluates the ability of the model to produce desired terms in the translation.</abstract>
@@ -1111,8 +1111,8 @@
     </paper>
     <paper id="84">
       <title><fixed-case>SYSTRAN</fixed-case> @ <fixed-case>WMT</fixed-case> 2021: Terminology Task</title>
-      <author><first>Minh Quang</first><last>Pham</last></author>
-      <author><first>Josep</first><last>Crego</last></author>
+      <author id="minh-quang-pham"><first>Minh Quang</first><last>Pham</last></author>
+      <author id="josep-m-crego"><first>Josep</first><last>Crego</last></author>
       <author><first>Antoine</first><last>Senellart</last></author>
       <author><first>Dan</first><last>Berrebbi</last></author>
       <author><first>Jean</first><last>Senellart</last></author>
@@ -1196,7 +1196,7 @@
     </paper>
     <paper id="91">
       <title><fixed-case>RTM</fixed-case> Super Learner Results at Quality Estimation Task</title>
-      <author><first>Ergun</first><last>Biçici</last></author>
+      <author id="ergun-bicici"><first>Ergun</first><last>Biçici</last></author>
       <pages>885–889</pages>
       <abstract>We obtain new results using referential translation machines (RTMs) with predictions mixed to obtain a better mixture of experts prediction. Our super learner results improve the results and provide a robust combination model.</abstract>
       <url hash="fe43d248">2021.wmt-1.91</url>
@@ -1288,7 +1288,7 @@
     </paper>
     <paper id="99">
       <title><fixed-case>NICT</fixed-case> <fixed-case>K</fixed-case>yoto Submission for the <fixed-case>WMT</fixed-case>’21 Quality Estimation Task: Multimetric Multilingual Pretraining for Critical Error Detection</title>
-      <author><first>Raphael</first><last>Rubino</last></author>
+      <author id="raphael-rubino"><first>Raphael</first><last>Rubino</last></author>
       <author><first>Atsushi</first><last>Fujita</last></author>
       <author><first>Benjamin</first><last>Marie</last></author>
       <pages>941–947</pages>
@@ -1327,10 +1327,10 @@
       <author><first>Ricardo</first><last>Rei</last></author>
       <author><first>Ana C</first><last>Farinha</last></author>
       <author><first>Pedro</first><last>Ramos</last></author>
-      <author><first>José G.</first><last>C. de Souza</last></author>
+      <author id="jose-g-c-de-souza"><first>José G.</first><last>C. de Souza</last></author>
       <author><first>Taisiya</first><last>Glushkova</last></author>
       <author><first>Miguel</first><last>Vera</last></author>
-      <author><first>Fabio</first><last>Kepler</last></author>
+      <author id="fabio-kepler"><first>Fabio</first><last>Kepler</last></author>
       <author><first>André F. T.</first><last>Martins</last></author>
       <pages>961–972</pages>
       <abstract>We present the joint contribution of IST and Unbabel to the WMT 2021 Shared Task on Quality Estimation. Our team participated on two tasks: Direct Assessment and Post-Editing Effort, encompassing a total of 35 submissions. For all submissions, our efforts focused on training multilingual models on top of OpenKiwi predictor-estimator architecture, using pre-trained multilingual encoders combined with adapters. We further experiment with and uncertainty-related objectives and features as well as training on out-of-domain direct assessment data.</abstract>
@@ -1339,11 +1339,11 @@
     </paper>
     <paper id="103">
       <title>The <fixed-case>IICT</fixed-case>-Yverdon System for the <fixed-case>WMT</fixed-case> 2021 Unsupervised <fixed-case>MT</fixed-case> and Very Low Resource Supervised <fixed-case>MT</fixed-case> Task</title>
-      <author><first>Àlex R.</first><last>Atrio</last></author>
+      <author id="alex-r-atrio"><first>Àlex R.</first><last>Atrio</last></author>
       <author><first>Gabriel</first><last>Luthier</last></author>
       <author><first>Axel</first><last>Fahy</last></author>
       <author><first>Giorgos</first><last>Vernikos</last></author>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <author><first>Ljiljana</first><last>Dolamic</last></author>
       <pages>973–981</pages>
       <abstract>In this paper, we present the systems submitted by our team from the Institute of ICT (HEIG-VD / HES-SO) to the Unsupervised MT and Very Low Resource Supervised MT task. We first study the improvements brought to a baseline system by techniques such as back-translation and initialization from a parent model. We find that both techniques are beneficial and suffice to reach performance that compares with more sophisticated systems from the 2020 task. We then present the application of this system to the 2021 task for low-resource supervised Upper Sorbian (HSB) to German translation, in both directions. Finally, we present a contrastive system for HSB-DE in both directions, and for unsupervised German to Lower Sorbian (DSB) translation, which uses multi-task training with various training schedules to improve over the baseline.</abstract>
@@ -1355,7 +1355,7 @@
       <author><first>Lukas</first><last>Edman</last></author>
       <author><first>Ahmet</first><last>Üstün</last></author>
       <author><first>Antonio</first><last>Toral</last></author>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <pages>982–988</pages>
       <abstract>This paper describes the methods behind the systems submitted by the University of Groningen for the WMT 2021 Unsupervised Machine Translation task for German–Lower Sorbian (DE–DSB): a high-resource language to a low-resource one. Our system uses a transformer encoder-decoder architecture in which we make three changes to the standard training procedure. First, our training focuses on two languages at a time, contrasting with a wealth of research on multilingual systems. Second, we introduce a novel method for initializing the vocabulary of an unseen language, achieving improvements of 3.2 BLEU for DE-&gt;DSB and 4.0 BLEU for DSB-&gt;DE.Lastly, we experiment with the order in which offline and online back-translation are used to train an unsupervised system, finding that using online back-translation first works better for DE-&gt;DSB by 2.76 BLEU. Our submissions ranked first (tied with another team) for DSB-&gt;DE and third for DE-&gt;DSB.</abstract>
       <url hash="ef576619">2021.wmt-1.104</url>
@@ -1374,7 +1374,7 @@
       <title>Language Model Pretraining and Transfer Learning for Very Low Resource Languages</title>
       <author><first>Jyotsana</first><last>Khatri</last></author>
       <author><first>Rudra</first><last>Murthy</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>995–998</pages>
       <abstract>This paper describes our submission for the shared task on Unsupervised MT and Very Low Resource Supervised MT at WMT 2021. We submitted systems for two language pairs: German ↔ Upper Sorbian (de ↔ hsb) and German-Lower Sorbian (de ↔ dsb). For de ↔ hsb, we pretrain our system using MASS (Masked Sequence to Sequence) objective and then finetune using iterative back-translation. Final finetunng is performed using the parallel data provided for translation objective. For de ↔ dsb, no parallel data is provided in the task, we use final de ↔ hsb model as initialization of the de ↔ dsb model and train it further using iterative back-translation, using the same vocabulary as used in the de ↔ hsb model.</abstract>
       <url hash="5b37e67e">2021.wmt-1.106</url>
@@ -1417,7 +1417,7 @@
       <title><fixed-case>MTEQA</fixed-case> at <fixed-case>WMT</fixed-case>21 Metrics Shared Task</title>
       <author><first>Mateusz</first><last>Krubiński</last></author>
       <author><first>Erfan</first><last>Ghadery</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <author><first>Pavel</first><last>Pecina</last></author>
       <pages>1024–1029</pages>
       <abstract>In this paper, we describe our submission to the WMT 2021 Metrics Shared Task. We use the automatically-generated questions and answers to evaluate the quality of Machine Translation (MT) systems. Our submission builds upon the recently proposed MTEQA framework. Experiments on WMT20 evaluation datasets show that at the system-level the MTEQA metric achieves performance comparable with other state-of-the-art solutions, while considering only a certain amount of information from the whole translation.</abstract>
@@ -1434,7 +1434,7 @@
       <author><first>Pedro</first><last>Ramos</last></author>
       <author><first>Taisiya</first><last>Glushkova</last></author>
       <author><first>André F. T.</first><last>Martins</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <pages>1030–1040</pages>
       <abstract>In this paper, we present the joint contribution of Unbabel and IST to the WMT 2021 Metrics Shared Task. With this year’s focus on Multidimensional Quality Metric (MQM) as the ground-truth human assessment, our aim was to steer COMET towards higher correlations with MQM. We do so by first pre-training on Direct Assessments and then fine-tuning on z-normalized MQM scores. In our experiments we also show that reference-free COMET models are becoming competitive with reference-based models, even outperforming the best COMET model from 2020 on this year’s development data. Additionally, we present COMETinho, a lightweight COMET model that is 19x faster on CPU than the original model, while also achieving state-of-the-art correlations with MQM. Finally, in the “QE as a metric” track, we also participated with a QE model trained using the OpenKiwi framework leveraging MQM scores and word-level annotations.</abstract>
       <url hash="f4b2bcd4">2021.wmt-1.111</url>
@@ -1512,7 +1512,7 @@
       <title>Learning Feature Weights using Reward Modeling for Denoising Parallel Corpora</title>
       <author><first>Gaurav</first><last>Kumar</last></author>
       <author><first>Philipp</first><last>Koehn</last></author>
-      <author><first>Sanjeev</first><last>Khudanpur</last></author>
+      <author id="sanjeev-khudanpur"><first>Sanjeev</first><last>Khudanpur</last></author>
       <pages>1100–1109</pages>
       <abstract>Large web-crawled corpora represent an excellent resource for improving the performance of Neural Machine Translation (NMT) systems across several language pairs. However, since these corpora are typically extremely noisy, their use is fairly limited. Current approaches to deal with this problem mainly focus on filtering using heuristics or single features such as language model scores or bi-lingual similarity. This work presents an alternative approach which learns weights for multiple sentence-level features. These feature weights which are optimized directly for the task of improving translation performance, are used to score and filter sentences in the noisy corpora more effectively. We provide results of applying this technique to building NMT systems using the Paracrawl corpus for Estonian-English and show that it beats strong single feature baselines and hand designed combinations. Additionally, we analyze the sensitivity of this method to different types of noise and explore if the learned weights generalize to other language pairs using the Maltese-English Paracrawl corpus.</abstract>
       <url hash="733fefb0">2021.wmt-1.118</url>
@@ -1521,7 +1521,7 @@
     </paper>
     <paper id="119">
       <title>Monotonic Simultaneous Translation with Chunk-wise Reordering and Refinement</title>
-      <author><first>HyoJung</first><last>Han</last></author>
+      <author id="hyojung-han"><first>HyoJung</first><last>Han</last></author>
       <author><first>Seokchan</first><last>Ahn</last></author>
       <author><first>Yoonjung</first><last>Choi</last></author>
       <author><first>Insoo</first><last>Chung</last></author>
diff --git a/data/xml/2021.wnut.xml b/data/xml/2021.wnut.xml
index 1503859418..50534d8249 100644
--- a/data/xml/2021.wnut.xml
+++ b/data/xml/2021.wnut.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the Seventh Workshop on Noisy User-generated Text (W-NUT 2021)</booktitle>
       <editor><first>Wei</first><last>Xu</last></editor>
       <editor><first>Alan</first><last>Ritter</last></editor>
-      <editor><first>Tim</first><last>Baldwin</last></editor>
+      <editor id="timothy-baldwin"><first>Tim</first><last>Baldwin</last></editor>
       <editor><first>Afshin</first><last>Rahimi</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Online</address>
@@ -33,7 +33,7 @@
     <paper id="2">
       <title>Finding the needle in a haystack: Extraction of Informative <fixed-case>COVID</fixed-case>-19 <fixed-case>D</fixed-case>anish Tweets</title>
       <author><first>Benjamin</first><last>Olsen</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>11–19</pages>
       <abstract>Finding informative COVID-19 posts in a stream of tweets is very useful to monitor health-related updates. Prior work focused on a balanced data setup and on English, but informative tweets are rare, and English is only one of the many languages spoken in the world. In this work, we introduce a new dataset of 5,000 tweets for finding informative COVID-19 tweets for Danish. In contrast to prior work, which balances the label distribution, we model the problem by keeping its natural distribution. We examine how well a simple probabilistic model and a convolutional neural network (CNN) perform on this task. We find a weighted CNN to work well but it is sensitive to embedding and hyperparameter choices. We hope the contributed dataset is a starting point for further work in this direction.</abstract>
       <url hash="589a2c87">2021.wnut-1.2</url>
@@ -115,7 +115,7 @@
       <author><first>Shohei</first><last>Higashiyama</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
       <author><first>Taro</first><last>Watanabe</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>67–80</pages>
       <abstract>Lexical normalization, in addition to word segmentation and part-of-speech tagging, is a fundamental task for Japanese user-generated text processing. In this paper, we propose a text editing model to solve the three task jointly and methods of pseudo-labeled data generation to overcome the problem of data deficiency. Our experiments showed that the proposed model achieved better normalization performance when trained on more diverse pseudo-labeled data.</abstract>
       <url hash="16c82883">2021.wnut-1.9</url>
@@ -170,7 +170,7 @@
     <paper id="14">
       <title>Common Sense Bias in Semantic Role Labeling</title>
       <author><first>Heather</first><last>Lent</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>114–119</pages>
       <abstract>Large-scale language models such as ELMo and BERT have pushed the horizon of what is possible in semantic role labeling (SRL), solving the out-of-vocabulary problem and enabling end-to-end systems, but they have also introduced significant biases. We evaluate three SRL parsers on very simple transitive sentences with verbs usually associated with animate subjects and objects, such as, “Mary babysat Tom”: a state-of-the-art parser based on BERT, an older parser based on GloVe, and an even older parser from before the days of word embeddings. When arguments are word forms predominantly used as person names, aligning with common sense expectations of animacy, the BERT-based parser is unsurprisingly superior; yet, with abstract or random nouns, the opposite picture emerges. We refer to this as “common sense bias” and present a challenge dataset for evaluating the extent to which parsers are sensitive to such a bias. Our code and challenge dataset are available here: github.com/coastalcph/comte</abstract>
       <url hash="ba322713">2021.wnut-1.14</url>
@@ -180,7 +180,7 @@
     <paper id="15">
       <title><fixed-case>P</fixed-case>oli<fixed-case>WAM</fixed-case>: An Exploration of a Large Scale Corpus of Political Discussions on <fixed-case>W</fixed-case>hats<fixed-case>A</fixed-case>pp Messenger</title>
       <author><first>Vivek</first><last>Srivastava</last></author>
-      <author id="mayank-singh"><first>Mayank</first><last>Singh</last></author>
+      <author><first>Mayank</first><last>Singh</last></author>
       <pages>120–130</pages>
       <abstract>WhatsApp Messenger is one of the most popular channels for spreading information with a current reach of more than 180 countries and 2 billion people. Its widespread usage has made it one of the most popular media for information propagation among the masses during any socially engaging event. In the recent past, several countries have witnessed its effectiveness and influence in political and social campaigns. We observe a high surge in information and propaganda flow during election campaigning. In this paper, we explore a high-quality large-scale user-generated dataset curated from WhatsApp comprising of 281 groups, 31,078 unique users, and 223,404 messages shared before, during, and after the Indian General Elections 2019, encompassing all major Indian political parties and leaders. In addition to the raw noisy user-generated data, we present a fine-grained annotated dataset of 3,848 messages that will be useful to understand the various dimensions of WhatsApp political campaigning. We present several complementary insights into the investigative and sensational news stories from the same period. Exploratory data analysis and experiments showcase several exciting results and future research opportunities. To facilitate reproducible research, we make the anonymized datasets available in the public domain.</abstract>
       <url hash="7c1b283e">2021.wnut-1.15</url>
@@ -226,7 +226,7 @@
       <author><first>Cheng</first><last>Chen</last></author>
       <author><first>Md Tahmid Rahman</first><last>Laskar</last></author>
       <author><first>Shashi</first><last>Bhushan</last></author>
-      <author><first>Simon</first><last>Corston-Oliver</last></author>
+      <author id="simon-corston-oliver"><first>Simon</first><last>Corston-Oliver</last></author>
       <pages>168–174</pages>
       <abstract>Automatic Speech Recognition (ASR) systems generally do not produce punctuated transcripts. To make transcripts more readable and follow the expected input format for downstream language models, it is necessary to add punctuation marks. In this paper, we tackle the punctuation restoration problem specifically for the noisy text (e.g., phone conversation scenarios). To leverage the available written text datasets, we introduce a data sampling technique based on an n-gram language model to sample more training data that are similar to our in-domain data. Moreover, we propose a two-stage fine-tuning approach that utilizes the sampled external data as well as our in-domain dataset for models based on BERT. Extensive experiments show that the proposed approach outperforms the baseline with an improvement of 1.12% F1 score.</abstract>
       <url hash="f811d030">2021.wnut-1.19</url>
@@ -258,7 +258,7 @@
     <paper id="22">
       <title>Understanding the Impact of <fixed-case>UGC</fixed-case> Specificities on Translation Quality</title>
       <author><first>José Carlos</first><last>Rosales Núñez</last></author>
-      <author><first>Djamé</first><last>Seddah</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
       <author><first>Guillaume</first><last>Wisniewski</last></author>
       <pages>189–198</pages>
       <abstract>This work takes a critical look at the evaluation of user-generated content automatic translation, the well-known specificities of which raise many challenges for MT. Our analyses show that measuring the average-case performance using a standard metric on a UGC test set falls far short of giving a reliable image of the UGC translation quality. That is why we introduce a new data set for the evaluation of UGC translation in which UGC specificities have been manually annotated using a fine-grained typology. Using this data set, we conduct several experiments to measure the impact of different kinds of UGC specificities on translation quality, more precisely than previously possible.</abstract>
@@ -270,7 +270,7 @@
       <title>Noisy <fixed-case>UGC</fixed-case> Translation at the Character Level: Revisiting Open-Vocabulary Capabilities and Robustness of Char-Based Models</title>
       <author><first>José Carlos</first><last>Rosales Núñez</last></author>
       <author><first>Guillaume</first><last>Wisniewski</last></author>
-      <author><first>Djamé</first><last>Seddah</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
       <pages>199–211</pages>
       <abstract>This work explores the capacities of character-based Neural Machine Translation to translate noisy User-Generated Content (UGC) with a strong focus on exploring the limits of such approaches to handle productive UGC phenomena, which almost by definition, cannot be seen at training time. Within a strict zero-shot scenario, we first study the detrimental impact on translation performance of various user-generated content phenomena on a small annotated dataset we developed and then show that such models are indeed incapable of handling unknown letters, which leads to catastrophic translation failure once such characters are encountered. We further confirm this behavior with a simple, yet insightful, copy task experiment and highlight the importance of reducing the vocabulary size hyper-parameter to increase the robustness of character-based models for machine translation.</abstract>
       <url hash="ce2752ee">2021.wnut-1.23</url>
@@ -323,7 +323,7 @@
       <author><first>Justyna</first><last>Gromada</last></author>
       <author><first>Ewelina</first><last>Szczekocka</last></author>
       <author><first>Robert</first><last>Kołodyński</last></author>
-      <author><first>Géraldine</first><last>Damnati</last></author>
+      <author id="geraldine-damnati"><first>Géraldine</first><last>Damnati</last></author>
       <pages>238–248</pages>
       <abstract>Following the increasing performance of neural machine translation systems, the paradigm of using automatically translated data for cross-lingual adaptation is now studied in several applicative domains. The capacity to accurately project annotations remains however an issue for sequence tagging tasks where annotation must be projected with correct spans. Additionally, when the task implies noisy user-generated text, the quality of translation and annotation projection can be affected. In this paper we propose to tackle multilingual sequence tagging with a new span alignment method and apply it to opinion target extraction from customer reviews. We show that provided suitable heuristics, translated data with automatic span-level annotation projection can yield improvements both for cross-lingual adaptation compared to zero-shot transfer, and data augmentation compared to a multilingual baseline.</abstract>
       <url hash="0c6832bf">2021.wnut-1.27</url>
@@ -334,7 +334,7 @@
       <title>A Novel Framework for Detecting Important Subevents from Crisis Events via Dynamic Semantic Graphs</title>
       <author><first>Evangelia</first><last>Spiliopoulou</last></author>
       <author><first>Tanay Kumar</first><last>Saha</last></author>
-      <author><first>Joel</first><last>Tetreault</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
       <author><first>Alejandro</first><last>Jaimes</last></author>
       <pages>249–259</pages>
       <abstract>Social media is an essential tool to share information about crisis events, such as natural disasters. Event Detection aims at extracting information in the form of an event, but considers each event in isolation, without combining information across sentences or events. Many posts in Crisis NLP contain repetitive or complementary information which needs to be aggregated (e.g., the number of trapped people and their location) for disaster response. Although previous approaches in Crisis NLP aggregate information across posts, they only use shallow representations of the content (e.g., keywords), which cannot adequately represent the semantics of a crisis event and its sub-events. In this work, we propose a novel framework to extract critical sub-events from a large-scale crisis event by combining important information across relevant tweets. Our framework first converts all the tweets from a crisis event into a temporally-ordered set of graphs. Then it extracts sub-graphs that represent semantic relationships connecting verbs and nouns in 3 to 6 node sub-graphs. It does this by learning edge weights via Dynamic Graph Convolutional Networks (DGCNs) and extracting smaller, relevant sub-graphs. Our experiments show that our extracted structures (1) are semantically meaningful sub-events and (2) contain information important for the large crisis-event. Furthermore, we show that our approach significantly outperforms event detection baselines, highlighting the importance of aggregating information across tweets for our task.</abstract>
@@ -395,7 +395,7 @@
       <author><first>Khalil</first><last>Bibi</last></author>
       <author><first>Chengyang</first><last>Li</last></author>
       <author><first>Ali</first><last>Ghodsi</last></author>
-      <author><first>Phillippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Phillippe</first><last>Langlais</last></author>
       <author><first>Mehdi</first><last>Rezagholizadeh</last></author>
       <pages>297–303</pages>
       <abstract>Knowledge Distillation (KD) is extensively used to compress and deploy large pre-trained language models on edge devices for real-world applications. However, one neglected area of research is the impact of noisy (corrupted) labels on KD. We present, to the best of our knowledge, the first study on KD with noisy labels in Natural Language Understanding (NLU). We document the scope of the problem and present two methods to mitigate the impact of label noise. Experiments on the GLUE benchmark show that our methods are effective even under high noise levels. Nevertheless, our results indicate that more research is necessary to cope with label noise under the KD.</abstract>
@@ -551,8 +551,8 @@
     <paper id="47">
       <title>Can Character-based Language Models Improve Downstream Task Performances In Low-Resource And Noisy Language Scenarios?</title>
       <author><first>Arij</first><last>Riabi</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
-      <author><first>Djamé</first><last>Seddah</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
       <pages>423–436</pages>
       <abstract>Recent impressive improvements in NLP, largely based on the success of contextual neural language models, have been mostly demonstrated on at most a couple dozen high- resource languages. Building language mod- els and, more generally, NLP systems for non- standardized and low-resource languages remains a challenging task. In this work, we fo- cus on North-African colloquial dialectal Arabic written using an extension of the Latin script, called NArabizi, found mostly on social media and messaging communication. In this low-resource scenario with data display- ing a high level of variability, we compare the downstream performance of a character-based language model on part-of-speech tagging and dependency parsing to that of monolingual and multilingual models. We show that a character-based model trained on only 99k sentences of NArabizi and fined-tuned on a small treebank of this language leads to performance close to those obtained with the same architecture pre- trained on large multilingual and monolingual models. Confirming these results a on much larger data set of noisy French user-generated content, we argue that such character-based language models can be an asset for NLP in low-resource and high language variability set- tings.</abstract>
       <url hash="0dee8f63">2021.wnut-1.47</url>
@@ -617,7 +617,7 @@
       <title>Sequence-to-Sequence Lexical Normalization with Multilingual Transformers</title>
       <author><first>Ana-Maria</first><last>Bucur</last></author>
       <author><first>Adrian</first><last>Cosma</last></author>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <pages>473–482</pages>
       <abstract>Current benchmark tasks for natural language processing contain text that is qualitatively different from the text used in informal day to day digital communication. This discrepancy has led to severe performance degradation of state-of-the-art NLP models when fine-tuned on real-world data. One way to resolve this issue is through lexical normalization, which is the process of transforming non-standard text, usually from social media, into a more standardized form. In this work, we propose a sentence-level sequence-to-sequence model based on mBART, which frames the problem as a machine translation problem. As the noisy text is a pervasive problem across languages, not just English, we leverage the multi-lingual pre-training of mBART to fine-tune it to our data. While current approaches mainly operate at the word or subword level, we argue that this approach is straightforward from a technical standpoint and builds upon existing pre-trained transformer networks. Our results show that while word-level, intrinsic, performance evaluation is behind other methods, our model improves performance on extrinsic, downstream tasks through normalization compared to models operating on raw, unprocessed, social media text.</abstract>
       <url hash="1fbf78c4">2021.wnut-1.53</url>
@@ -639,11 +639,11 @@
       <author><first>Rob</first><last>van der Goot</last></author>
       <author><first>Alan</first><last>Ramponi</last></author>
       <author><first>Arkaitz</first><last>Zubiaga</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <author><first>Benjamin</first><last>Muller</last></author>
       <author><first>Iñaki</first><last>San Vicente Roncal</last></author>
       <author><first>Nikola</first><last>Ljubešić</last></author>
-      <author><first>Özlem</first><last>Çetinoğlu</last></author>
+      <author id="ozlem-cetinoglu"><first>Özlem</first><last>Çetinoğlu</last></author>
       <author><first>Rahmad</first><last>Mahendra</last></author>
       <author><first>Talha</first><last>Çolakoğlu</last></author>
       <author><first>Timothy</first><last>Baldwin</last></author>
diff --git a/data/xml/2021.woah.xml b/data/xml/2021.woah.xml
index 99a7a0cc4a..db7b18fbd3 100644
--- a/data/xml/2021.woah.xml
+++ b/data/xml/2021.woah.xml
@@ -8,7 +8,7 @@
       <editor><first>Mathias</first><last>Lambert</last></editor>
       <editor><first>Bertie</first><last>Vidgen</last></editor>
       <editor><first>Vinodkumar</first><last>Prabhakaran</last></editor>
-      <editor><first>Zeerak</first><last>Waseem</last></editor>
+      <editor id="zeerak-talat"><first>Zeerak</first><last>Waseem</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Online</address>
       <month>August</month>
@@ -160,7 +160,7 @@
       <title>Hell Hath No Fury? Correcting Bias in the <fixed-case>NRC</fixed-case> Emotion Lexicon</title>
       <author><first>Samira</first><last>Zad</last></author>
       <author><first>Joshuan</first><last>Jimenez</last></author>
-      <author><first>Mark</first><last>Finlayson</last></author>
+      <author id="mark-finlayson"><first>Mark</first><last>Finlayson</last></author>
       <pages>102–113</pages>
       <abstract>There have been several attempts to create an accurate and thorough emotion lexicon in English, which identifies the emotional content of words. Of the several commonly used resources, the NRC emotion lexicon (Mohammad and Turney, 2013b) has received the most attention due to its availability, size, and its choice of Plutchik’s expressive 8-class emotion model. In this paper we identify a large number of troubling entries in the NRC lexicon, where words that should in most contexts be emotionally neutral, with no affect (e.g., ‘lesbian’, ‘stone’, ‘mountain’), are associated with emotional labels that are inaccurate, nonsensical, pejorative, or, at best, highly contingent and context-dependent (e.g., ‘lesbian’ labeled as Disgust and Sadness, ‘stone’ as Anger, or ‘mountain’ as Anticipation). We describe a procedure for semi-automatically correcting these problems in the NRC, which includes disambiguating POS categories and aligning NRC entries with other emotion lexicons to infer the accuracy of labels. We demonstrate via an experimental benchmark that the quality of the resources is thus improved. We release the revised resource and our code to enable other researchers to reproduce and build upon results.</abstract>
       <url hash="9ce0a453">2021.woah-1.11</url>
@@ -173,7 +173,7 @@
       <author><first>Yung-Sung</first><last>Chuang</last></author>
       <author><first>Mingye</first><last>Gao</last></author>
       <author><first>Hongyin</first><last>Luo</last></author>
-      <author><first>James</first><last>Glass</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
       <author><first>Hung-yi</first><last>Lee</last></author>
       <author><first>Yun-Nung</first><last>Chen</last></author>
       <author><first>Shang-Wen</first><last>Li</last></author>
@@ -190,7 +190,7 @@
       <author><first>Peter</first><last>Bourgonje</last></author>
       <author><first>Karolina</first><last>Zaczynska</last></author>
       <author><first>Malte</first><last>Ostendorff</last></author>
-      <author><first>Julian</first><last>Moreno-Schneider</last></author>
+      <author id="julian-moreno-schneider"><first>Julian</first><last>Moreno-Schneider</last></author>
       <author><first>Georg</first><last>Rehm</last></author>
       <pages>121–131</pages>
       <abstract>We present a data set consisting of German news articles labeled for political bias on a five-point scale in a semi-supervised way. While earlier work on hyperpartisan news detection uses binary classification (i.e., hyperpartisan or not) and English data, we argue for a more fine-grained classification, covering the full political spectrum (i.e., far-left, left, centre, right, far-right) and for extending research to German data. Understanding political bias helps in accurately detecting hate speech and online abuse. We experiment with different classification methods for political bias detection. Their comparatively low performance (a macro-F1 of 43 for our best setup, compared to a macro-F1 of 79 for the binary classification task) underlines the need for more (balanced) data annotated in a fine-grained way.</abstract>
diff --git a/data/xml/2022.aacl.xml b/data/xml/2022.aacl.xml
index b839782aff..b77e10b4f4 100644
--- a/data/xml/2022.aacl.xml
+++ b/data/xml/2022.aacl.xml
@@ -36,7 +36,7 @@
     <paper id="2">
       <title>Double Trouble: How to not Explain a Text Classifier’s Decisions Using Counterfactuals Synthesized by Masked Language Models?</title>
       <author><first>Thang</first><last>Pham</last></author>
-      <author><first>Trung</first><last>Bui</last></author>
+      <author id="trung-bui"><first>Trung</first><last>Bui</last></author>
       <author><first>Long</first><last>Mai</last></author>
       <author><first>Anh</first><last>Nguyen</last></author>
       <pages>12–31</pages>
@@ -87,8 +87,8 @@
       <title>Systematic Evaluation of Predictive Fairness</title>
       <author><first>Xudong</first><last>Han</last></author>
       <author><first>Aili</first><last>Shen</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Lea</first><last>Frermann</last></author>
       <pages>68–81</pages>
       <abstract>Mitigating bias in training on biased datasets is an important open problem. Several techniques have been proposed, however the typical evaluation regime is very limited, considering very narrow data conditions. For instance, the effect of target class imbalance and stereotyping is under-studied. To address this gap, we examine the performance of various debiasing methods across multiple tasks, spanning binary classification (Twitter sentiment), multi-class classification (profession prediction), and regression (valence prediction). Through extensive experimentation, we find that data conditions have a strong influence on relative model performance, and that general conclusions cannot be drawn about method efficacy when evaluating only on standard datasets, as is current practice in fairness research.</abstract>
@@ -124,7 +124,7 @@
     <paper id="9">
       <title><fixed-case>WAX</fixed-case>: A New Dataset for Word Association e<fixed-case>X</fixed-case>planations</title>
       <author><first>Chunhua</first><last>Liu</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <author><first>Simon De</first><last>Deyne</last></author>
       <author><first>Lea</first><last>Frermann</last></author>
       <pages>106–120</pages>
@@ -152,7 +152,7 @@
       <author><first>Gwénolé</first><last>Lecorvé</last></author>
       <author><first>Morgan</first><last>Veyret</last></author>
       <author><first>Quentin</first><last>Brabant</last></author>
-      <author><first>Lina M.</first><last>Rojas Barahona</last></author>
+      <author id="lina-m-rojas-barahona"><first>Lina M.</first><last>Rojas Barahona</last></author>
       <pages>131–147</pages>
       <abstract>This paper focuses on the generation of natural language questions based on SPARQL queries, with an emphasis on conversational use cases (follow-up question-answering). It studies what can be achieved so far based on current deep learning models (namely pretrained T5 and BART models). To do so, 4 knowledge-based QA corpora have been homogenized for the task and a new challenge set is introduced. A first series of experiments analyzes the impact of different training setups, while a second series seeks to understand what is still difficult for these models. The results from automatic metrics and human evaluation show that simple questions and frequent templates of SPARQL queries are usually well processed whereas complex questions and conversational dimensions (coreferences and ellipses) are still difficult to handle. The experimental material is publicly available on <url>https://github.com/Orange-OpenSource/sparql-to-text</url> .</abstract>
       <url hash="76aacfc3">2022.aacl-main.11</url>
@@ -212,7 +212,7 @@
       <title><fixed-case>A</fixed-case>rabic Dialect Identification with a Few Labeled Examples Using Generative Adversarial Networks</title>
       <author><first>Mahmoud</first><last>Yusuf</last></author>
       <author><first>Marwan</first><last>Torki</last></author>
-      <author><first>Nagwa</first><last>El-Makky</last></author>
+      <author id="nagwa-m-el-makky"><first>Nagwa</first><last>El-Makky</last></author>
       <pages>196–204</pages>
       <abstract>Given the challenges and complexities introduced while dealing with Dialect Arabic (DA) variations, Transformer based models, e.g., BERT, outperformed other models in dealing with the DA identification task. However, to fine-tune these models, a large corpus is required. Getting a large number high quality labeled examples for some Dialect Arabic classes is challenging and time-consuming. In this paper, we address the Dialect Arabic Identification task. We extend the transformer-based models, ARBERT and MARBERT, with unlabeled data in a generative adversarial setting using Semi-Supervised Generative Adversarial Networks (SS-GAN). Our model enabled producing high-quality embeddings for the Dialect Arabic examples and aided the model to better generalize for the downstream classification task given few labeled examples. Experimental results showed that our model reached better performance and faster convergence when only a few labeled examples are available.</abstract>
       <url hash="00e37d2c">2022.aacl-main.16</url>
@@ -236,7 +236,7 @@
       <author><first>Anthi</first><last>Papadopoulou</last></author>
       <author><first>Yunhao</first><last>Yu</last></author>
       <author><first>Pierre</first><last>Lison</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <pages>217–229</pages>
       <abstract>We present a novel approach for text sanitization, which is the task of editing a document to mask all (direct and indirect) personal identifiers and thereby conceal the identity of the individuals(s) mentioned in the text. In contrast to previous work, the approach relies on explicit measures of privacy risk, making it possible to explicitly control the trade-off between privacy protection and data utility. The approach proceeds in three steps. A neural, privacy-enhanced entity recognizer is first employed to detect and classify potential personal identifiers. We then determine which entities, or combination of entities, are likely to pose a re-identification risk through a range of privacy risk assessment measures. We present three such measures of privacy risk, respectively based on (1) span probabilities derived from a BERT language model, (2) web search queries and (3) a classifier trained on labelled data. Finally, a linear optimization solver decides which entities to mask to minimize the semantic loss while simultaneously ensuring that the estimated privacy risk remains under a given threshold. We evaluate the approach both in the absence and presence of manually annotated data. Our results highlight the potential of the approach, as well as issues specific types of personal data can introduce to the process.</abstract>
       <url hash="dd78ed2d">2022.aacl-main.18</url>
@@ -286,7 +286,7 @@
       <author><first>Anton</first><last>Chernyavskiy</last></author>
       <author><first>Ivan</first><last>Koychev</last></author>
       <author><first>Dmitry</first><last>Ilvovsky</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>266–285</pages>
       <abstract>While there has been substantial progress in developing systems to automate fact-checking, they still lack credibility in the eyes of the users. Thus, an interesting approach has emerged: to perform automatic fact-checking by verifying whether an input claim has been previously fact-checked by professional fact-checkers and to return back an article that explains their decision. This is a sensible approach as people trust manual fact-checking, and as many claims are repeated multiple times. Yet, a major issue when building such systems is the small number of known tweet–verifying article pairs available for training. Here, we aim to bridge this gap by making use of crowd fact-checking, i.e., mining claims in social media for which users have responded with a link to a fact-checking article. In particular, we mine a large-scale collection of 330,000 tweets paired with a corresponding fact-checking article. We further propose an end-to-end framework to learn from this noisy data based on modified self-adaptive training, in a distant supervision scenario. Our experiments on the CLEF’21 CheckThat! test set show improvements over the state of the art by two points absolute. Our code and datasets are available at <url>https://github.com/mhardalov/crowdchecked-claims</url></abstract>
       <url hash="2ae3c968">2022.aacl-main.22</url>
@@ -321,7 +321,7 @@
       <author><first>Francesco</first><last>Barbieri</last></author>
       <author><first>Vitor</first><last>Sousa</last></author>
       <author><first>Leonardo</first><last>Neves</last></author>
-      <author><first>Jose</first><last>Camacho-Collados</last></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></author>
       <pages>309–319</pages>
       <abstract>Recent progress in language model pre-training has led to important improvements in Named Entity Recognition (NER). Nonetheless, this progress has been mainly tested in well-formatted documents such as news, Wikipedia, or scientific articles. In social media the landscape is different, in which it adds another layer of complexity due to its noisy and dynamic nature. In this paper, we focus on NER in Twitter, one of the largest social media platforms, and construct a new NER dataset, TweetNER7, which contains seven entity types annotated over 11,382 tweets from September 2019 to August 2021. The dataset was constructed by carefully distributing the tweets over time and taking representative trends as a basis. Along with the dataset, we provide a set of language model baselines and perform an analysis on the language model performance on the task, especially analyzing the impact of different time periods. In particular, we focus on three important temporal aspects in our analysis: short-term degradation of NER models over time, strategies to fine-tune a language model over different periods, and self-labeling as an alternative to lack of recently-labeled data. TweetNER7 is released publicly (<url>https://huggingface.co/datasets/tner/tweetner7</url>) along with the models fine-tuned on it (NER models have been integrated into TweetNLP and can be found at <url>https://github.com/asahi417/tner/tree/master/examples/tweetner7_paper</url>).</abstract>
       <url hash="c43201a9">2022.aacl-main.25</url>
@@ -346,7 +346,7 @@
       <title>Cross-Lingual Open-Domain Question Answering with Answer Sentence Generation</title>
       <author><first>Benjamin</first><last>Muller</last></author>
       <author><first>Luca</first><last>Soldaini</last></author>
-      <author><first>Rik</first><last>Koncel-Kedziorski</last></author>
+      <author id="rik-koncel-kedziorski"><first>Rik</first><last>Koncel-Kedziorski</last></author>
       <author><first>Eric</first><last>Lind</last></author>
       <author><first>Alessandro</first><last>Moschitti</last></author>
       <pages>337–353</pages>
@@ -361,7 +361,7 @@
       <author><first>Yuqing</first><last>Xing</last></author>
       <author><first>Longyin</first><last>Zhang</last></author>
       <author><first>Fang</first><last>Kong</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>354–363</pages>
       <abstract>In recent years, top-down neural models have achieved significant success in text-level discourse parsing. Nevertheless, they still suffer from the top-down error propagation issue, especially when the performance on the upper-level tree nodes is terrible. In this research, we aim to learn from the correlations in between EDUs directly to shorten the hierarchical distance of the RST structure to alleviate the above problem. Specifically, we contribute a joint top-down framework that learns from both discourse dependency and constituency parsing through one shared encoder and two independent decoders. Moreover, we also explore a constituency-to-dependency conversion scheme tailored for the Chinese discourse corpus to ensure the high quality of the joint learning process. Our experimental results on CDTB show that the dependency information we use well heightens the understanding of the rhetorical structure, especially for the upper-level tree layers.</abstract>
       <url hash="a04b7972">2022.aacl-main.28</url>
@@ -377,7 +377,7 @@
       <author><first>Lei</first><last>Guo</last></author>
       <author><first>Prakash</first><last>Ishwar</last></author>
       <author><first>Margrit</first><last>Betke</last></author>
-      <author><first>Derry Tanti</first><last>Wijaya</last></author>
+      <author id="derry-tanti-wijaya"><first>Derry Tanti</first><last>Wijaya</last></author>
       <pages>364–374</pages>
       <abstract>We aim to develop methods for understanding how multimedia news exposure can affect people’s emotional responses, and we especially focus on news content related to gun violence, a very important yet polarizing issue in the U.S. We created the dataset NEmo+ by significantly extending the U.S. gun violence news-to-emotions dataset, BU-NEmo, from 320 to 1,297 news headline and lead image pairings and collecting 38,910 annotations in a large crowdsourcing experiment. In curating the NEmo+ dataset, we developed methods to identify news items that will trigger similar versus divergent emotional responses. For news items that trigger similar emotional responses, we compiled them into the NEmo+-Consensus dataset. We benchmark models on this dataset that predict a person’s dominant emotional response toward the target news item (single-label prediction). On the full NEmo+ dataset, containing news items that would lead to both differing and similar emotional responses, we also benchmark models for the novel task of predicting the distribution of evoked emotional responses in humans when presented with multi-modal news content. Our single-label and multi-label prediction models outperform baselines by large margins across several metrics.</abstract>
       <url hash="421f7e46">2022.aacl-main.29</url>
@@ -389,7 +389,7 @@
       <title><fixed-case>A</fixed-case>ug<fixed-case>CSE</fixed-case>: Contrastive Sentence Embedding with Diverse Augmentations</title>
       <author><first>Zilu</first><last>Tang</last></author>
       <author><first>Muhammed Yusuf</first><last>Kocyigit</last></author>
-      <author><first>Derry Tanti</first><last>Wijaya</last></author>
+      <author id="derry-tanti-wijaya"><first>Derry Tanti</first><last>Wijaya</last></author>
       <pages>375–398</pages>
       <abstract>Data augmentation techniques have been proven useful in many applications in NLP fields. Most augmentations are task-specific, and cannot be used as a general-purpose tool. In our work, we present AugCSE, a unified framework to utilize diverse sets of data augmentations to achieve a better, general-purpose, sentence embedding model. Building upon the latest sentence embedding models, our approach uses a simple antagonistic discriminator that differentiates the augmentation types. With the finetuning objective borrowed from domain adaptation, we show that diverse augmentations, which often lead to conflicting contrastive signals, can be tamed to produce a better and more robust sentence representation. Our methods achieve state-of-the-art results on downstream transfer tasks and perform competitively on semantic textual similarity tasks, using only unsupervised data.</abstract>
       <url hash="fcb9ca77">2022.aacl-main.30</url>
@@ -415,7 +415,7 @@
       <title>Dual-Encoder Transformers with Cross-modal Alignment for Multimodal Aspect-based Sentiment Analysis</title>
       <author><first>Zhewen</first><last>Yu</last></author>
       <author><first>Jin</first><last>Wang</last></author>
-      <author><first>Liang-Chih</first><last>Yu</last></author>
+      <author id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last></author>
       <author><first>Xuejie</first><last>Zhang</last></author>
       <pages>414–423</pages>
       <abstract>Multimodal aspect-based sentiment analysis (MABSA) aims to extract the aspect terms from text and image pairs, and then analyze their corresponding sentiment. Recent studies typically use either a pipeline method or a unified transformer based on a cross-attention mechanism. However, these methods fail to explicitly and effectively incorporate the alignment between text and image. Supervised finetuning of the universal transformers for MABSA still requires a certain number of aligned image-text pairs. This study proposes a dual-encoder transformer with cross-modal alignment (DTCA). Two auxiliary tasks, including text-only extraction and text-patch alignment are introduced to enhance cross-attention performance. To align text and image, we propose an unsupervised approach which minimizes the Wasserstein distance between both modalities, forcing both encoders to produce more appropriate representations for the final extraction. Experimental results on two benchmarks demonstrate that DTCA consistently outperforms existing methods.</abstract>
@@ -427,7 +427,7 @@
       <title><fixed-case>AVAST</fixed-case>: Attentive Variational State Tracker in a Reinforced Navigator</title>
       <author><first>Je-Wei</first><last>Jang</last></author>
       <author><first>Mahdin</first><last>Rohmatillah</last></author>
-      <author><first>Jen-Tzung</first><last>Chien</last></author>
+      <author id="jen-tzung-chien"><first>Jen-Tzung</first><last>Chien</last></author>
       <pages>424–433</pages>
       <abstract>Recently, emerging approaches have been proposed to deal with robotic navigation problems, especially vision-and-language navigation task which is one of the most realistic indoor navigation challenge tasks. This task can be modelled as a sequential decision-making problem, which is suitable to be solved by deep reinforcement learning. Unfortunately, the observations provided from the simulator in this task are not fully observable states, which exacerbate the difficulty of implementing reinforcement learning. To deal with this challenge, this paper presents a novel method, called as attentive variational state tracker (AVAST), a variational approach to approximate belief state distribution for the construction of a reinforced navigator. The variational approach is introduced to improve generalization to the unseen environment which barely achieved by traditional deterministic state tracker. In order to stabilize the learning procedure, a fine-tuning process using policy optimization is proposed. From the experimental results, the proposed AVAST does improve the generalization relative to previous works in vision-and-language navigation task. A significant performance is achieved without requiring any additional exploration in the unseen environment.</abstract>
       <url hash="c9cf55f8">2022.aacl-main.33</url>
@@ -459,7 +459,7 @@
     <paper id="36">
       <title>Bag-of-Vectors Autoencoders for Unsupervised Conditional Text Generation</title>
       <author><first>Florian</first><last>Mai</last></author>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <pages>468–488</pages>
       <abstract>Text autoencoders are often used for unsupervised conditional text generation by applying mappings in the latent space to change attributes to the desired values. Recently, Mai et al. (2020) proposed Emb2Emb, a method to learn these mappings in the embedding space of an autoencoder. However, their method is restricted to autoencoders with a single-vector embedding, which limits how much information can be retained. We address this issue by extending their method to Bag-of-Vectors Autoencoders (BoV-AEs), which encode the text into a variable-size bag of vectors that grows with the size of the text, as in attention-based models. This allows to encode and reconstruct much longer texts than standard autoencoders. Analogous to conventional autoencoders, we propose regularization techniques that facilitate learning meaningful operations in the latent space. Finally, we adapt Emb2Emb for a training scheme that learns to map an input bag to an output bag, including a novel loss function and neural architecture. Our empirical evaluations on unsupervised sentiment transfer show that our method performs substantially better than a standard autoencoder.</abstract>
       <url hash="b82151dc">2022.aacl-main.36</url>
@@ -474,7 +474,7 @@
       <author><first>Lei</first><last>Sha</last></author>
       <author><first>Can</first><last>Xu</last></author>
       <author><first>Daxin</first><last>Jiang</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <pages>489–500</pages>
       <abstract>Conversational Recommender System (CRS), which aims to recommend high-quality items to users through interactive conversations, has gained great research interest recently. A CRS is usually composed of a recommendation module and a generation module. In the previous work, these two modules are loosely connected in the model training and are shallowly integrated during inference, where a simple switching or copy mechanism is adopted to incorporate recommended items into generated responses. Moreover, the current end-to-end neural models trained on small crowd-sourcing datasets (e.g., 10K dialogs in the ReDial dataset) tend to overfit and have poor chit-chat ability. In this work, we propose a novel unified framework that integrates recommendation into the dialog (RecInDial) generation by introducing a vocabulary pointer. To tackle the low-resource issue in CRS, we finetune the large-scale pretrained language models to generate fluent and diverse responses, and introduce a knowledge-aware bias learned from an entity-oriented knowledge graph to enhance the recommendation performance. Furthermore, we propose to evaluate the CRS models in an end-to-end manner, which can reflect the overall performance of the entire system rather than the performance of individual modules, compared to the separate evaluations of the two modules used in previous work. Experiments on the benchmark dataset ReDial show our RecInDial model significantly surpasses the state-of-the-art methods. More extensive analyses show the effectiveness of our model.</abstract>
       <url hash="e5acd011">2022.aacl-main.37</url>
@@ -531,7 +531,7 @@
       <title>Affective Retrofitted Word Embeddings</title>
       <author><first>Sapan</first><last>Shah</last></author>
       <author><first>Sreedhar</first><last>Reddy</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>550–561</pages>
       <abstract>Word embeddings learned using the distributional hypothesis (e.g., GloVe, Word2vec) do not capture the affective dimensions of valence, arousal, and dominance, which are present inherently in words. We present a novel retrofitting method for updating embeddings of words for their affective meaning. It learns a non-linear transformation function that maps pre-trained embeddings to an affective vector space, in a representation learning setting. We investigate word embeddings for their capacity to cluster emotion-bearing words. The affective embeddings learned by our method achieve better inter-cluster and intra-cluster distance for words having the same emotions, as evaluated through different cluster quality metrics. For the downstream tasks on sentiment analysis and sarcasm detection, simple classification models, viz. SVM and Attention Net, learned using our affective embeddings perform better than their pre-trained counterparts (more than 1.5% improvement in F1-score) and other benchmarks. Furthermore, the difference in performance is more pronounced in limited data setting.</abstract>
       <url hash="1d1a78b5">2022.aacl-main.42</url>
@@ -543,7 +543,7 @@
       <author><first>Yingbo</first><last>Gao</last></author>
       <author><first>Christian</first><last>Herold</last></author>
       <author><first>Zijian</first><last>Yang</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>562–574</pages>
       <abstract>Encoder-decoder architecture is widely adopted for sequence-to-sequence modeling tasks. For machine translation, despite the evolution from long short-term memory networks to Transformer networks, plus the introduction and development of attention mechanism, encoder-decoder is still the de facto neural network architecture for state-of-the-art models. While the motivation for decoding information from some hidden space is straightforward, the strict separation of the encoding and decoding steps into an encoder and a decoder in the model architecture is not necessarily a must. Compared to the task of autoregressive language modeling in the target language, machine translation simply has an additional source sentence as context. Given the fact that neural language models nowadays can already handle rather long contexts in the target language, it is natural to ask whether simply concatenating the source and target sentences and training a language model to do translation would work. In this work, we investigate the aforementioned concept for machine translation. Specifically, we experiment with bilingual translation, translation with additional target monolingual data, and multilingual translation. In all cases, this alternative approach performs on par with the baseline encoder-decoder Transformer, suggesting that an encoder-decoder architecture might be redundant for neural machine translation.</abstract>
       <url hash="eddf65c3">2022.aacl-main.43</url>
@@ -645,7 +645,7 @@
       <title>Construction Repetition Reduces Information Rate in Dialogue</title>
       <author><first>Mario</first><last>Giulianelli</last></author>
       <author><first>Arabella</first><last>Sinclair</last></author>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <pages>665–682</pages>
       <abstract>Speakers repeat constructions frequently in dialogue. Due to their peculiar information-theoretic properties, repetitions can be thought of as a strategy for cost-effective communication. In this study, we focus on the repetition of lexicalised constructions—i.e., recurring multi-word units—in English open-domain spoken dialogues. We hypothesise that speakers use construction repetition to mitigate information rate, leading to an overall decrease in utterance information content over the course of a dialogue. We conduct a quantitative analysis, measuring the information content of constructions and that of their containing utterances, estimating information content with an adaptive neural language model. We observe that construction usage lowers the information content of utterances. This facilitating effect (i) increases throughout dialogues, (ii) is boosted by repetition, (iii) grows as a function of repetition frequency and density, and (iv) is stronger for repetitions of referential constructions.</abstract>
       <url hash="d60cd8be">2022.aacl-main.51</url>
@@ -695,7 +695,7 @@
       <title>Re-contextualizing Fairness in <fixed-case>NLP</fixed-case>: The Case of <fixed-case>I</fixed-case>ndia</title>
       <author><first>Shaily</first><last>Bhatt</last></author>
       <author><first>Sunipa</first><last>Dev</last></author>
-      <author><first>Partha</first><last>Talukdar</last></author>
+      <author id="partha-talukdar"><first>Partha</first><last>Talukdar</last></author>
       <author><first>Shachi</first><last>Dave</last></author>
       <author><first>Vinodkumar</first><last>Prabhakaran</last></author>
       <pages>727–740</pages>
@@ -719,7 +719,7 @@
     <paper id="57">
       <title>Unsupervised Domain Adaptation for Sparse Retrieval by Filling Vocabulary and Word Frequency Gaps</title>
       <author><first>Hiroki</first><last>Iida</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <pages>752–765</pages>
       <abstract>IR models using a pretrained language model significantly outperform lexical approaches like BM25. In particular, SPLADE, which encodes texts to sparse vectors, is an effective model for practical use because it shows robustness to out-of-domain datasets. However, SPLADE still struggles with exact matching of low-frequency words in training data. In addition, domain shifts in vocabulary and word frequencies deteriorate the IR performance of SPLADE. Because supervision data are scarce in the target domain, addressing the domain shifts without supervision data is necessary. This paper proposes an unsupervised domain adaptation method by filling vocabulary and word-frequency gaps. First, we expand a vocabulary and execute continual pretraining with a masked language model on a corpus of the target domain. Then, we multiply SPLADE-encoded sparse vectors by inverse document frequency weights to consider the importance of documents with low-frequency words. We conducted experiments using our method on datasets with a large vocabulary gap from a source domain. We show that our method outperforms the present state-of-the-art domain adaptation method. In addition, our method achieves state-of-the-art results, combined with BM25.</abstract>
       <url hash="9753f8fa">2022.aacl-main.57</url>
@@ -739,11 +739,11 @@
     </paper>
     <paper id="59">
       <title>Cross-lingual Few-Shot Learning on Unseen Languages</title>
-      <author><first>Genta</first><last>Winata</last></author>
+      <author id="genta-indra-winata"><first>Genta</first><last>Winata</last></author>
       <author><first>Shijie</first><last>Wu</last></author>
       <author><first>Mayank</first><last>Kulkarni</last></author>
       <author><first>Thamar</first><last>Solorio</last></author>
-      <author><first>Daniel</first><last>Preotiuc-Pietro</last></author>
+      <author id="daniel-preotiuc-pietro"><first>Daniel</first><last>Preotiuc-Pietro</last></author>
       <pages>777–791</pages>
       <abstract>Large pre-trained language models (LMs) have demonstrated the ability to obtain good performance on downstream tasks with limited examples in cross-lingual settings. However, this was mostly studied for relatively resource-rich languages, where at least enough unlabeled data is available to be included in pre-training a multilingual language model. In this paper, we explore the problem of cross-lingual transfer in unseen languages, where no unlabeled data is available for pre-training a model. We use a downstream sentiment analysis task across 12 languages, including 8 unseen languages, to analyze the effectiveness of several few-shot learning strategies across the three major types of model architectures and their learning dynamics. We also compare strategies for selecting languages for transfer and contrast findings across languages seen in pre-training compared to those that are not. Our findings contribute to the body of knowledge on cross-lingual models for low-resource settings that is paramount to increasing coverage, diversity, and equity in access to NLP technology. We show that, in few-shot learning, linguistically similar and geographically similar languages are useful for cross-lingual adaptation, but taking the context from a mixture of random source languages is surprisingly more effective. We also compare different model architectures and show that the encoder-only model, XLM-R, gives the best downstream task performance.</abstract>
       <url hash="478adfb6">2022.aacl-main.59</url>
@@ -812,11 +812,11 @@
     <paper id="65">
       <title>Not another Negation Benchmark: The <fixed-case>N</fixed-case>a<fixed-case>N</fixed-case>-<fixed-case>NLI</fixed-case> Test Suite for Sub-clausal Negation</title>
       <author><first>Thinh Hung</first><last>Truong</last></author>
-      <author><first>Yulia</first><last>Otmakhova</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="julia-otmakhova"><first>Yulia</first><last>Otmakhova</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <author><first>Jey Han</first><last>Lau</last></author>
-      <author><first>Karin</first><last>Verspoor</last></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last></author>
       <pages>883–894</pages>
       <abstract>Negation is poorly captured by current language models, although the extent of this problem is not widely understood. We introduce a natural language inference (NLI) test suite to enable probing the capabilities of NLP methods, with the aim of understanding sub-clausal negation. The test suite contains premise–hypothesis pairs where the premise contains sub-clausal negation and the hypothesis is constructed by making minimal modifications to the premise in order to reflect different possible interpretations. Aside from adopting standard NLI labels, our test suite is systematically constructed under a rigorous linguistic framework. It includes annotation of negation types and constructions grounded in linguistic theory, as well as the operations used to construct hypotheses. This facilitates fine-grained analysis of model performance. We conduct experiments using pre-trained language models to demonstrate that our test suite is more challenging than existing benchmarks focused on negation, and show how our annotation supports a deeper understanding of the current NLI capabilities in terms of negation and quantification.</abstract>
       <url hash="4bb78c31">2022.aacl-main.65</url>
@@ -851,9 +851,9 @@
     <paper id="68">
       <title>Dual Mechanism Priming Effects in <fixed-case>H</fixed-case>indi Word Order</title>
       <author><first>Sidharth</first><last>Ranjan</last></author>
-      <author><first>Marten</first><last>van Schijndel</last></author>
+      <author id="marten-van-schijndel"><first>Marten</first><last>van Schijndel</last></author>
       <author><first>Sumeet</first><last>Agarwal</last></author>
-      <author><first>Rajakrishnan</first><last>Rajkumar</last></author>
+      <author id="rajakrishnan-rajkumar"><first>Rajakrishnan</first><last>Rajkumar</last></author>
       <pages>936–953</pages>
       <abstract>Word order choices during sentence production can be primed by preceding sentences. In this work, we test the DUAL MECHANISM hypothesis that priming is driven by multiple different sources. Using a Hindi corpus of text productions, we model lexical priming with an n-gram cache model, and we capture more abstract syntactic priming with an adaptive neural language model. We permute the preverbal constituents of corpus sentences and then use a logistic regression model to predict which sentences actually occurred in the corpus against artificially generated meaning-equivalent variants. Our results indicate that lexical priming and lexically-independent syntactic priming affect complementary sets of verb classes. By showing that different priming influences are separable from one another, our results support the hypothesis that multiple different cognitive mechanisms underlie priming.</abstract>
       <url hash="af0005e7">2022.aacl-main.68</url>
@@ -890,7 +890,7 @@
       <author><first>Tulika</first><last>Saha</last></author>
       <author><first>Aditya Prakash</first><last>Patra</last></author>
       <author><first>Sriparna</first><last>Saha</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>978–990</pages>
       <abstract>Dialogue Act Classification (DAC) that determines the communicative intention of an utterance has been investigated widely over the years as a standalone task. But the emotional state of the speaker has a considerable effect on its pragmatic content. Sentiment as a human behavior is also closely related to emotion and one aids in the better understanding of the other. Thus, their role in identification of DAs needs to be explored. As a first step, we extend the newly released multi-modal <i>EMOTyDA</i> dataset to enclose sentiment tags for each utterance. In order to incorporate these multiple aspects, we propose a Dual Attention Mechanism (DAM) based multi-modal, multi-tasking conversational framework. The DAM module encompasses intra-modal and interactive inter-modal attentions with multiple loss optimization at various hierarchies to fuse multiple modalities efficiently and learn generalized features across all the tasks. Additionally, to counter the class-imbalance issue in dialogues, we introduce a 2-step Deferred Optimisation Schedule (DOS) that involves Meta-Net (MN) learning and deferred re-weighting where the former helps to learn an explicit weighting function from data automatically and the latter deploys a re-weighted multi-task loss with a smaller learning rate. Empirically, we establish that the joint optimisation of multi-modal DAC, SA and ER tasks along with the incorporation of 2-step DOS and MN learning produces better results compared to its different counterparts and outperforms state-of-the-art model.</abstract>
       <url hash="fffa6b51">2022.aacl-main.71</url>
@@ -925,7 +925,7 @@
       <title>Hengam: An Adversarially Trained Transformer for <fixed-case>P</fixed-case>ersian Temporal Tagging</title>
       <author><first>Sajad</first><last>Mirzababaei</last></author>
       <author><first>Amir Hossein</first><last>Kargaran</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <author><first>Ehsaneddin</first><last>Asgari</last></author>
       <pages>1013–1024</pages>
       <abstract>Many NLP main tasks benefit from an accurate understanding of temporal expressions, e.g., text summarization, question answering, and information retrieval. This paper introduces Hengam, an adversarially trained transformer for Persian temporal tagging outperforming state-of-the-art approaches on a diverse and manually created dataset. We create Hengam in the following concrete steps: (1) we develop HengamTagger, an extensible rule-based tool that can extract temporal expressions from a set of diverse language-specific patterns for any language of interest. (2) We apply HengamTagger to annotate temporal tags in a large and diverse Persian text collection (covering both formal and informal contexts) to be used as weakly labeled data. (3) We introduce an adversarially trained transformer model on HengamCorpus that can generalize over the HengamTagger’s rules. We create HengamGold, the first high-quality gold standard for Persian temporal tagging. Our trained adversarial HengamTransformer not only achieves the best performance in terms of the F1-score (a type F1-Score of 95.42 and a partial F1-Score of 91.60) but also successfully deals with language ambiguities and incorrect spellings. Our code, data, and models are publicly available at <url>https://github.com/kargaranamir/Hengam</url>.</abstract>
@@ -938,7 +938,7 @@
       <author><first>Yang Trista</first><last>Cao</last></author>
       <author><first>Kyle</first><last>Seelman</last></author>
       <author><first>Kyungjun</first><last>Lee</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <pages>1025–1034</pages>
       <abstract>In visual question answering (VQA), a machine must answer a question given an associated image. Recently, accessibility researchers have explored whether VQA can be deployed in a real-world setting where users with visual impairments learn about their environment by capturing their visual surroundings and asking questions. However, most of the existing benchmarking datasets for VQA focus on machine “understanding” and it remains unclear how progress on those datasets corresponds to improvements in this real-world use case. We aim to answer this question by evaluating discrepancies between machine “understanding” datasets (VQA-v2) and accessibility datasets (VizWiz) by evaluating a variety of VQA models. Based on our findings, we discuss opportunities and challenges in VQA for accessibility and suggest directions for future work.</abstract>
       <url hash="8ca7dd6c">2022.aacl-main.75</url>
@@ -952,7 +952,7 @@
       <author><first>Shubhashis</first><last>Sengupta</last></author>
       <author><first>Anutosh</first><last>Maitra</last></author>
       <author><first>Roshni</first><last>Ramnani</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>1035–1047</pages>
       <abstract>Task-oriented conversational agents are gaining immense popularity and success in a wide range of tasks, from flight ticket booking to online shopping. However, the existing systems presume that end-users will always have a pre-determined and servable task goal, which results in dialogue failure in hostile scenarios, such as goal unavailability. On the other hand, human agents accomplish users’ tasks even in a large number of goal unavailability scenarios by persuading them towards a very similar and servable goal. Motivated by the limitation, we propose and build a novel end-to-end multi-modal persuasive dialogue system incorporated with a personalized persuasive module aided goal controller and goal persuader. The goal controller recognizes goal conflicting/unavailability scenarios and formulates a new goal, while the goal persuader persuades users using a personalized persuasive strategy identified through dialogue context. We also present a novel automatic evaluation metric called <i>P</i>ersuasiveness <i>Me</i>asurement <i>R</i>ate (<i>PMeR</i>) for quantifying the persuasive capability of a conversational agent. The obtained improvements (both quantitative and qualitative) firmly establish the superiority and need of the proposed context-guided, personalized persuasive virtual agent over existing traditional task-oriented virtual agents. Furthermore, we also curated a multi-modal persuasive conversational dialogue corpus annotated with intent, slot, sentiment, and dialogue act for e-commerce domain.</abstract>
       <url hash="e4a3aa4e">2022.aacl-main.76</url>
@@ -967,7 +967,7 @@
       <author><first>Rajdeep</first><last>Mukherjee</last></author>
       <author><first>Kripabandhu</first><last>Ghosh</last></author>
       <author><first>Pawan</first><last>Goyal</last></author>
-      <author id="saptarshi-ghosh"><first>Saptarshi</first><last>Ghosh</last></author>
+      <author><first>Saptarshi</first><last>Ghosh</last></author>
       <pages>1048–1064</pages>
       <abstract>Summarization of legal case judgement documents is a challenging problem in Legal NLP. However, not much analyses exist on how different families of summarization models (e.g., extractive vs. abstractive) perform when applied to legal case documents. This question is particularly important since many recent transformer-based abstractive summarization models have restrictions on the number of input tokens, and legal documents are known to be very long. Also, it is an open question on how best to evaluate legal case document summarization systems. In this paper, we carry out extensive experiments with several extractive and abstractive summarization methods (both supervised and unsupervised) over three legal summarization datasets that we have developed. Our analyses, that includes evaluation by law practitioners, lead to several interesting insights on legal summarization in specific and long document summarization in general.</abstract>
       <url hash="eb844839">2022.aacl-main.77</url>
@@ -1056,7 +1056,7 @@
     <paper id="85">
       <title>Higher-Order Dependency Parsing for Arc-Polynomial Score Functions via Gradient-Based Methods and Genetic Algorithm</title>
       <author><first>Xudong</first><last>Zhang</last></author>
-      <author><first>Joseph</first><last>Le Roux</last></author>
+      <author id="joseph-le-roux"><first>Joseph</first><last>Le Roux</last></author>
       <author><first>Thierry</first><last>Charnois</last></author>
       <pages>1158–1171</pages>
       <abstract>We present a novel method for higher-order dependency parsing which takes advantage of the general form of score functions written as arc-polynomials, a general framework which encompasses common higher-order score functions, and includes new ones. This method is based on non-linear optimization techniques, namely coordinate ascent and genetic search where we iteratively update a candidate parse. Updates are formulated as gradient-based operations, and are efficiently computed by auto-differentiation libraries. Experiments show that this method obtains results matching the recent state-of-the-art second order parsers on three standard datasets.</abstract>
@@ -1115,7 +1115,7 @@
     <paper id="1">
       <title>Transfer Learning for Humor Detection by Twin Masked Yellow <fixed-case>M</fixed-case>uppets</title>
       <author><first>Aseem</first><last>Arora</last></author>
-      <author><first>Gaël</first><last>Dias</last></author>
+      <author id="gael-dias"><first>Gaël</first><last>Dias</last></author>
       <author><first>Adam</first><last>Jatowt</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
       <pages>1–7</pages>
@@ -1147,7 +1147,7 @@
     <paper id="4">
       <title>Number Theory Meets Linguistics: Modelling Noun Pluralisation Across 1497 Languages Using 2-adic Metrics</title>
       <author><first>Gregory</first><last>Baker</last></author>
-      <author><first>Diego</first><last>Molla</last></author>
+      <author id="diego-molla"><first>Diego</first><last>Molla</last></author>
       <pages>24–32</pages>
       <abstract>A simple machine learning model of pluralisation as a linear regression problem minimising a p-adic metric substantially outperforms even the most robust of Euclidean-space regressors on languages in the Indo-European, Austronesian, Trans New-Guinea, Sino-Tibetan, Nilo-Saharan, Oto-Meanguean and Atlantic-Congo language families. There is insufficient evidence to support modelling distinct noun declensions as a p-adic neighbourhood even in Indo-European languages.</abstract>
       <url hash="7a195e25">2022.aacl-short.4</url>
@@ -1334,7 +1334,7 @@
       <author><first>Helena</first><last>Balabin</last></author>
       <author><first>Julio</first><last>Hurtado</last></author>
       <author><first>Alvaro</first><last>Soto</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>154–160</pages>
       <abstract>Lifelong language learning seeks to have models continuously learn multiple tasks in a sequential order without suffering from catastrophic forgetting. State-of-the-art approaches rely on sparse experience replay as the primary approach to prevent forgetting. Experience replay usually adopts sampling methods for the memory population; however, the effect of the chosen sampling strategy on model performance has not yet been studied. In this paper, we investigate how relevant the selective memory population is in the lifelong learning process of text classification and question-answering tasks. We found that methods that randomly store a uniform number of samples from the entire data stream lead to high performances, especially for low memory size, which is consistent with computer vision studies.</abstract>
       <url hash="f4ae76a7">2022.aacl-short.20</url>
@@ -1355,7 +1355,7 @@
       <title>Multi-Type Conversational Question-Answer Generation with Closed-ended and Unanswerable Questions</title>
       <author><first>Seonjeong</first><last>Hwang</last></author>
       <author><first>Yunsu</first><last>Kim</last></author>
-      <author><first>Gary Geunbae</first><last>Lee</last></author>
+      <author id="gary-geunbae-lee"><first>Gary Geunbae</first><last>Lee</last></author>
       <pages>169–177</pages>
       <abstract>Conversational question answering (CQA) facilitates an incremental and interactive understanding of a given context, but building a CQA system is difficult for many domains due to the problem of data scarcity. In this paper, we introduce a novel method to synthesize data for CQA with various question types, including open-ended, closed-ended, and unanswerable questions. We design a different generation flow for each question type and effectively combine them in a single, shared framework. Moreover, we devise a hierarchical answerability classification (hierarchical AC) module that improves quality of the synthetic data while acquiring unanswerable questions. Manual inspections show that synthetic data generated with our framework have characteristics very similar to those of human-generated conversations. Across four domains, CQA systems trained on our synthetic data indeed show good performance close to the systems trained on human-annotated data.</abstract>
       <url hash="4b9628f3">2022.aacl-short.22</url>
@@ -1378,7 +1378,7 @@
     <paper id="24">
       <title><fixed-case>NGEP</fixed-case>: A Graph-based Event Planning Framework for Story Generation</title>
       <author><first>Chen</first><last>Tang</last></author>
-      <author id="zhihao-zhang"><first>Zhihao</first><last>Zhang</last></author>
+      <author><first>Zhihao</first><last>Zhang</last></author>
       <author><first>Tyler</first><last>Loakman</last></author>
       <author><first>Chenghua</first><last>Lin</last></author>
       <author><first>Frank</first><last>Guerin</last></author>
@@ -1427,8 +1427,8 @@
       <author><first>Shuaibo</first><last>Wang</last></author>
       <author><first>Yufeng</first><last>Chen</last></author>
       <author><first>Songming</first><last>Zhang</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
-      <author><first>Jinan</first><last>Xu</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last></author>
       <pages>221–227</pages>
       <abstract>Neural machine translation (NMT) models are known to be fragile to noisy inputs from automatic speech recognition (ASR) systems. Existing methods are usually tailored for robustness against only homophone errors which account for a small portion of realistic ASR errors. In this paper, we propose an adversarial example generation method based on confusion sets that contain words easily confusable with a target word by ASR to conduct adversarial training for NMT models. Specifically, an adversarial example is generated from the perspective of acoustic relations instead of the traditional uniform or unigram sampling from the confusion sets. Experiments on different test sets with hand-crafted and real-world noise demonstrate the effectiveness of our method over previous methods. Moreover, our approach can achieve improvements on the clean test set.</abstract>
       <url hash="18c8d6e9">2022.aacl-short.28</url>
@@ -1543,7 +1543,7 @@
     <paper id="38">
       <title>Demographic-Aware Language Model Fine-tuning as a Bias Mitigation Technique</title>
       <author><first>Aparna</first><last>Garimella</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <author><first>Akhash</first><last>Amarnath</last></author>
       <pages>311–319</pages>
       <abstract>BERT-like language models (LMs), when exposed to large unstructured datasets, are known to learn and sometimes even amplify the biases present in such data. These biases generally reflect social stereotypes with respect to gender, race, age, and others. In this paper, we analyze the variations in gender and racial biases in BERT, a large pre-trained LM, when exposed to different demographic groups. Specifically, we investigate the effect of fine-tuning BERT on text authored by historically disadvantaged demographic groups in comparison to that by advantaged groups. We show that simply by fine-tuning BERT-like LMs on text authored by certain demographic groups can result in the mitigation of social biases in these LMs against various target groups.</abstract>
@@ -1570,7 +1570,7 @@
       <author><first>Mayank</first><last>Kulkarni</last></author>
       <author><first>Lingjue</first><last>Xie</last></author>
       <author><first>Mounica</first><last>Maddela</last></author>
-      <author><first>Daniel</first><last>Preotiuc-Pietro</last></author>
+      <author id="daniel-preotiuc-pietro"><first>Daniel</first><last>Preotiuc-Pietro</last></author>
       <pages>326–333</pages>
       <abstract>Entity-centric summarization is a type of controllable summarization that aims to produce a summary of a document that is specific to a given target entity. Extractive summaries possess multiple advantages over abstractive ones such as preserving factuality and can be directly used in downstream tasks like target-based sentiment analysis or incorporated into search applications. In this paper, we explore methods to solve this task by recasting it as a sentence selection task, as supported by the EntSUM data set. We use methods inspired by information retrieval, where the input to the model is a pair representing a sentence from the original document and the target entity, in place of the query. We explore different architecture variants and loss functions in this framework with results showing an up to 5.8 F1 improvement over past state-of-the-art and outperforming the competitive entity-centric Lead 3 heuristic by 1.1 F1. In addition, we also demonstrate similarly strong results on the related task of salient sentence selection for an entity.</abstract>
       <url hash="e331dde0">2022.aacl-short.40</url>
@@ -1586,7 +1586,7 @@
       <author><first>Ramy</first><last>Eskander</last></author>
       <author><first>Cass</first><last>Lowry</last></author>
       <author><first>Richard</first><last>Compton</last></author>
-      <author><first>Judith</first><last>Klavans</last></author>
+      <author id="judith-l-klavans"><first>Judith</first><last>Klavans</last></author>
       <author><first>Maria</first><last>Polinsky</last></author>
       <author><first>Smaranda</first><last>Muresan</last></author>
       <pages>334–340</pages>
@@ -1599,7 +1599,7 @@
       <title>Self-Repetition in Abstractive Neural Summarizers</title>
       <author><first>Nikita</first><last>Salkar</last></author>
       <author><first>Thomas</first><last>Trikalinos</last></author>
-      <author><first>Byron</first><last>Wallace</last></author>
+      <author id="byron-c-wallace"><first>Byron</first><last>Wallace</last></author>
       <author><first>Ani</first><last>Nenkova</last></author>
       <pages>341–350</pages>
       <abstract>We provide a quantitative and qualitative analysis of self-repetition in the output of neural summarizers. We measure self-repetition as the number of n-grams of length four or longer that appear in multiple outputs of the same system. We analyze the behavior of three popular architectures (BART, T5, and Pegasus), fine-tuned on five datasets. In a regression analysis, we find that the three architectures have different propensities for repeating content across output summaries for inputs, with BART being particularly prone to self-repetition. Fine-tuning on more abstractive data, and on data featuring formulaic language is associated with a higher rate of self-repetition. In qualitative analysis, we find systems produce artefacts such as ads and disclaimers unrelated to the content being summarized, as well as formulaic phrases common in the fine-tuning domain. Our approach to corpus-level analysis of self-repetition may help practitioners clean up training data for summarizers and ultimately support methods for minimizing the amount of self-repetition.</abstract>
@@ -1621,7 +1621,7 @@
     </paper>
     <paper id="44">
       <title>Modeling Document-level Temporal Structures for Building Temporal Dependency Graphs</title>
-      <author><first>Prafulla Kumar</first><last>Choubey</last></author>
+      <author id="prafulla-kumar-choubey"><first>Prafulla Kumar</first><last>Choubey</last></author>
       <author><first>Ruihong</first><last>Huang</last></author>
       <pages>357–365</pages>
       <abstract>We propose to leverage news discourse profiling to model document-level temporal structures for building temporal dependency graphs. Our key observation is that the functional roles of sentences used for profiling news discourse signify different time frames relevant to a news story and can, therefore, help to recover the global temporal structure of a document. Our analyses and experiments with the widely used knowledge distillation technique show that discourse profiling effectively identifies distant inter-sentence event and (or) time expression pairs that are temporally related and otherwise difficult to locate.</abstract>
@@ -1645,7 +1645,7 @@
       <title><fixed-case>M</fixed-case>i<fixed-case>QA</fixed-case>: A Benchmark for Inference on Metaphorical Questions</title>
       <author><first>Iulia</first><last>Comșa</last></author>
       <author><first>Julian</first><last>Eisenschlos</last></author>
-      <author><first>Srini</first><last>Narayanan</last></author>
+      <author id="srini-narayanan"><first>Srini</first><last>Narayanan</last></author>
       <pages>373–381</pages>
       <abstract>We propose a benchmark to assess the capability of large language models to reason with conventional metaphors. Our benchmark combines the previously isolated topics of metaphor detection and commonsense reasoning into a single task that requires a model to make inferences by accurately selecting between the literal and metaphorical register. We examine the performance of state-of-the-art pre-trained models on binary-choice tasks and find a large discrepancy between the performance of small and very large models, going from chance to near-human level. We also analyse the largest model in a generative setting and find that although human performance is approached, careful multiple-shot prompting is required.</abstract>
       <url hash="4c373e2b">2022.aacl-short.46</url>
@@ -1656,7 +1656,7 @@
     <paper id="47">
       <title><fixed-case>GCDT</fixed-case>: A <fixed-case>C</fixed-case>hinese <fixed-case>RST</fixed-case> Treebank for Multigenre and Multilingual Discourse Parsing</title>
       <author><first>Siyao</first><last>Peng</last></author>
-      <author><first>Yang Janet</first><last>Liu</last></author>
+      <author id="yang-janet-liu"><first>Yang Janet</first><last>Liu</last></author>
       <author><first>Amir</first><last>Zeldes</last></author>
       <pages>382–391</pages>
       <abstract>A lack of large-scale human-annotated data has hampered the hierarchical discourse parsing of Chinese. In this paper, we present GCDT, the largest hierarchical discourse treebank for Mandarin Chinese in the framework of Rhetorical Structure Theory (RST). GCDT covers over 60K tokens across five genres of freely available text, using the same relation inventory as contemporary RST treebanks for English. We also report on this dataset’s parsing experiments, including state-of-the-art (SOTA) scores for Chinese RST parsing and RST parsing on the English GUM dataset, using cross-lingual training in Chinese and English with multilingual embeddings.</abstract>
@@ -1668,7 +1668,7 @@
       <title>Assessing Combinational Generalization of Language Models in Biased Scenarios</title>
       <author><first>Yanbo</first><last>Fang</last></author>
       <author><first>Zuohui</first><last>Fu</last></author>
-      <author><first>Xin</first><last>Dong</last></author>
+      <author id="xin-luna-dong"><first>Xin</first><last>Dong</last></author>
       <author><first>Yongfeng</first><last>Zhang</last></author>
       <author><first>Gerard</first><last>de Melo</last></author>
       <pages>392–397</pages>
@@ -1695,7 +1695,7 @@
       <title>Vector Space Interpolation for Query Expansion</title>
       <author><first>Deepanway</first><last>Ghosal</last></author>
       <author><first>Somak</first><last>Aditya</last></author>
-      <author><first>Sandipan</first><last>Dandapat</last></author>
+      <author id="sandipan-dandapat"><first>Sandipan</first><last>Dandapat</last></author>
       <author><first>Monojit</first><last>Choudhury</last></author>
       <pages>405–410</pages>
       <abstract>Topic-sensitive query set expansion is an important area of research that aims to improve search results for information retrieval. It is particularly crucial for queries related to sensitive and emerging topics. In this work, we describe a method for query set expansion about emerging topics using vector space interpolation. We use a transformer model called OPTIMUS, which is suitable for vector space manipulation due to its variational autoencoder nature. One of our proposed methods – Dirichlet interpolation shows promising results for query expansion. Our methods effectively generate new queries about the sensitive topic by incorporating set-level diversity, which is not captured by traditional sentence-level augmentation methods such as paraphrasing or back-translation.</abstract>
@@ -1705,7 +1705,7 @@
     </paper>
     <paper id="51">
       <title><fixed-case>S</fixed-case>ch<fixed-case>A</fixed-case>man: Spell-Checking Resources and Benchmark for Endangered Languages from Amazonia</title>
-      <author><first>Arturo</first><last>Oncevay</last></author>
+      <author id="arturo-oncevay"><first>Arturo</first><last>Oncevay</last></author>
       <author><first>Gerardo</first><last>Cardoso</last></author>
       <author><first>Carlo</first><last>Alva</last></author>
       <author><first>César</first><last>Lara Ávila</last></author>
@@ -1799,7 +1799,7 @@
       <title>How Well Do Multi-hop Reading Comprehension Models Understand Date Information?</title>
       <author><first>Xanh</first><last>Ho</last></author>
       <author><first>Saku</first><last>Sugawara</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>470–479</pages>
       <abstract>Several multi-hop reading comprehension datasets have been proposed to resolve the issue of reasoning shortcuts by which questions can be answered without performing multi-hop reasoning. However, the ability of multi-hop models to perform step-by-step reasoning when finding an answer to a comparison question remains unclear. It is also unclear how questions about the internal reasoning process are useful for training and evaluating question-answering (QA) systems. To evaluate the model precisely in a hierarchical manner, we first propose a dataset, <i>HieraDate</i>, with three probing tasks in addition to the main question: extraction, reasoning, and robustness. Our dataset is created by enhancing two previous multi-hop datasets, HotpotQA and 2WikiMultiHopQA, focusing on multi-hop questions on date information that involve both comparison and numerical reasoning. We then evaluate the ability of existing models to understand date information. Our experimental results reveal that the multi-hop models do not have the ability to subtract two dates even when they perform well in date comparison and number subtraction tasks. Other results reveal that our probing questions can help to improve the performance of the models (e.g., by +10.3 F1) on the main QA task and our dataset can be used for data augmentation to improve the robustness of the models.</abstract>
       <url hash="a76e62e0">2022.aacl-short.58</url>
@@ -1810,7 +1810,7 @@
       <title>Dodging the Data Bottleneck: Automatic Subtitling with Automatically Segmented <fixed-case>ST</fixed-case> Corpora</title>
       <author><first>Sara</first><last>Papi</last></author>
       <author><first>Alina</first><last>Karakanta</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <pages>480–487</pages>
       <abstract>Speech translation for subtitling (SubST) is the task of automatically translating speech data into well-formed subtitles by inserting subtitle breaks compliant to specific displaying guidelines. Similar to speech translation (ST), model training requires parallel data comprising audio inputs paired with their textual translations. In SubST, however, the text has to be also annotated with subtitle breaks. So far, this requirement has represented a bottleneck for system development, as confirmed by the dearth of publicly available SubST corpora. To fill this gap, we propose a method to convert existing ST corpora into SubST resources without human intervention. We build a segmenter model that automatically segments texts into proper subtitles by exploiting audio and text in a multimodal fashion, achieving high segmentation quality in zero-shot conditions. Comparative experiments with SubST systems respectively trained on manual and automatic segmentations result in similar performance, showing the effectiveness of our approach.</abstract>
@@ -1978,7 +1978,7 @@
       <author><first>Sebastian</first><last>Blank</last></author>
       <author><first>Xintong</first><last>Wang</last></author>
       <author><first>Hans-Peter</first><last>Zorn</last></author>
-      <author><first>Christian</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Christian</first><last>Biemann</last></author>
       <pages>76–83</pages>
       <abstract>The multi-modal foundation model CLIP computes representations from texts and images that achieved unprecedented performance on tasks such as zero-shot image classification. However, CLIP was pretrained on public internet data. Thus it lacks highly domain-specific knowledge. We investigate the adaptation of CLIP-based models to the chest radiography domain using the MIMIC-CXR dataset. We show that the features of the pretrained CLIP models do not transfer to this domain. We adapt CLIP to the chest radiography domain using contrastive language supervision and show that this approach yields a model that outperforms supervised learning on labels on the MIMIC-CXR dataset while also generalizing to the CheXpert and RSNA Pneumonia datasets. Furthermore, we do a detailed ablation study of the batch and dataset size. Finally, we show that language supervision allows for better explainability by using the multi-modal model to generate images from texts such that experts can inspect what the model has learned.</abstract>
       <url hash="5d8ddcd2">2022.aacl-srw.11</url>
@@ -2000,7 +2000,7 @@
       <title>Concreteness vs. Abstractness: A Selectional Preference Perspective</title>
       <author><first>Tarun</first><last>Tater</last></author>
       <author><first>Diego</first><last>Frassinelli</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>92–98</pages>
       <abstract>Concrete words refer to concepts that are strongly experienced through human senses (banana, chair, salt, etc.), whereas abstract concepts are less perceptually salient (idea, glory, justice, etc.). A clear definition of abstractness is crucial for the understanding of human cognitive processes and for the development of natural language applications such as figurative language detection. In this study, we investigate selectional preferences as a criterion to distinguish between concrete and abstract concepts and words: we hypothesise that abstract and concrete verbs and nouns differ regarding the semantic classes of their arguments. Our study uses a collection of 5,438 nouns and 1,275 verbs to exploit selectional preferences as a salient characteristic in classifying English abstract vs. concrete words, and in predicting their concreteness scores. We achieve an f1-score of 0.84 for nouns and 0.71 for verbs in classification, and Spearman’s ρ correlation of 0.86 for nouns and 0.59 for verbs.</abstract>
       <url hash="d38ebf47">2022.aacl-srw.13</url>
@@ -2068,7 +2068,7 @@
       <author><first>Seolhwa</first><last>Lee</last></author>
       <author><first>Jaehyung</first><last>Seo</last></author>
       <author><first>Kisu</first><last>Yang</last></author>
-      <author><first>Heuiseok</first><last>Lim</last></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last></author>
       <pages>17–27</pages>
       <abstract>Children with language disabilities face communication difficulties in daily life. They are often deprived of the opportunity to participate in social activities due to their difficulty in understanding or using natural language. In this regard, Augmentative and Alternative Communication (AAC) can be a practical means of communication for children with language disabilities. In this study, we propose PicTalky, which is an AI-based AAC system that helps children with language developmental disabilities to improve their communication skills and language comprehension abilities. PicTalky can process both text and pictograms more accurately by connecting a series of neural-based NLP modules. Additionally, we perform quantitative and qualitative analyses on the modules of PicTalky. By using this service, it is expected that those suffering from language problems will be able to express their intentions or desires more easily and improve their quality of life. We have made the models freely available alongside a demonstration of the web interface. Furthermore, we implemented robotics AAC for the first time by applying PicTalky to the NAO robot.</abstract>
       <url hash="f107f241">2022.aacl-demo.3</url>
@@ -2150,7 +2150,7 @@
       <author><first>Peter</first><last>Henderson</last></author>
       <author><first>Khuyagbaatar</first><last>Batsuren</last></author>
       <author><first>Dieuwke</first><last>Hupkes</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>72–87</pages>
       <abstract>We present a tool, Text Characterization Toolkit (TCT), that researchers can use to study characteristics of large datasets. Furthermore, such properties can lead to understanding the influence of such attributes on models’ behaviour. Traditionally, in most NLP research, models are usually evaluated by reporting single-number performance scores on a number of readily available benchmarks, without much deeper analysis. Here, we argue that – especially given the well-known fact that benchmarks often contain biases, artefacts, and spurious correlations – deeper results analysis should become the de-facto standard when presenting new models or benchmarks. TCT aims at filling this gap by facilitating such deeper analysis for datasets at scale, where datasets can be for training/development/evaluation. TCT includes both an easy-to-use tool, as well as off-the-shelf scripts that can be used for specific analyses. We also present use-cases from several different domains. TCT is used to predict difficult examples for given well-known trained models; TCT is also used to identify (potentially harmful) biases present in a dataset.</abstract>
       <url hash="04fbb2a1">2022.aacl-demo.9</url>
@@ -2174,7 +2174,7 @@
   <volume id="tutorials" ingest-date="2022-11-21" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 2nd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 12th International Joint Conference on Natural Language Processing: Tutorial Abstracts</booktitle>
-      <editor><first>Miguel A.</first><last>Alonso</last></editor>
+      <editor id="miguel-a-alonso"><first>Miguel A.</first><last>Alonso</last></editor>
       <editor><first>Zhongyu</first><last>Wei</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Taipei</address>
@@ -2218,7 +2218,7 @@
       <author><first>Hanzhuo</first><last>Tan</last></author>
       <author><first>Jing</first><last>Li</last></author>
       <author><first>Mingyu</first><last>Wan</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <pages>16–21</pages>
       <abstract>Cantonese is an influential Chinese variant with a large population of speakers worldwide. However, it is under-resourced in terms of the data scale and diversity, excluding Cantonese Natural Language Processing (NLP) from the stateof-the-art (SOTA) “pre-training and fine-tuning” paradigm. This tutorial will start with a substantially review of the linguistics and NLP progress for shaping language specificity, resources, and methodologies. It will be followed by an introduction to the trendy transformerbased pre-training methods, which have been largely advancing the SOTA performance of a wide range of downstream NLP tasks in numerous majority languages (e.g., English and Chinese). Based on the above, we will present the main challenges for Cantonese NLP in relation to Cantonese language idiosyncrasies of colloquialism and multilingualism, followed by the future directions to line NLP for Cantonese and other low-resource languages up to the cutting-edge pre-training practice.</abstract>
       <url hash="fa8ea2a3">2022.aacl-tutorials.3</url>
@@ -2228,7 +2228,7 @@
     <paper id="4">
       <title>Grounding Meaning Representation for Situated Reasoning</title>
       <author><first>Nikhil</first><last>Krishnaswamy</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <pages>22–27</pages>
       <abstract>As natural language technology becomes ever-present in everyday life, people will expect artificial agents to understand language use as humans do. Nevertheless, most advanced neural AI systems fail at some types of interactions that are trivial for humans (e.g., ask a smart system “What am I pointing at?”). One critical aspect of human language understanding is situated reasoning, where inferences make reference to the local context, perceptual surroundings, and contextual groundings from the interaction. In this cutting-edge tutorial, we bring to the NLP/CL community a synthesis of multimodal grounding and meaning representation techniques with formal and computational models of embodied reasoning. We will discuss existing approaches to multimodal language grounding and meaning representations, discuss the kind of information each method captures and their relative suitability to situated reasoning tasks, and demon- strate how to construct agents that conduct situated reasoning by embodying a simulated environment. In doing so, these agents also represent their human interlocutor(s) within the simulation, and are represented through their virtual embodiment in the real world, enabling true bidirectional communication with a computer using multiple modalities.</abstract>
       <url hash="bfef10fa">2022.aacl-tutorials.4</url>
@@ -2239,7 +2239,7 @@
       <title>The Battlefront of Combating Misinformation and Coping with Media Bias</title>
       <author><first>Yi</first><last>Fung</last></author>
       <author><first>Kung-Hsiang</first><last>Huang</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Heng</first><last>Ji</last></author>
       <pages>28–34</pages>
       <abstract>Misinformation is a pressing issue in modern society. It arouses a mixture of anger, distrust, confusion, and anxiety that cause damage on our daily life judgments and public policy decisions. While recent studies have explored various fake news detection and media bias detection techniques in attempts to tackle the problem, there remain many ongoing challenges yet to be addressed, as can be witnessed from the plethora of untrue and harmful content present during the COVID-19 pandemic and the international crises of late. In this tutorial, we provide researchers and practitioners with a systematic overview of the frontier in fighting misinformation. Specifically, we dive into the important research questions of how to (i) develop a robust fake news detection system, which not only fact-check information pieces provable by background knowledge but also reason about the consistency and the reliability of subtle details for emerging events; (ii) uncover the bias and agenda of news sources to better characterize misinformation; as well as (iii) correct false information and mitigate news bias, while allowing diverse opinions to be expressed. Moreover, we discuss the remaining challenges, future research directions, and exciting opportunities to help make this world a better place, with safer and more harmonic information sharing.</abstract>
diff --git a/data/xml/2022.acl.xml b/data/xml/2022.acl.xml
index 525202623d..d20ff88f24 100644
--- a/data/xml/2022.acl.xml
+++ b/data/xml/2022.acl.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</booktitle>
       <editor><first>Smaranda</first><last>Muresan</last></editor>
-      <editor><first>Preslav</first><last>Nakov</last></editor>
+      <editor id="preslav-nakov"><first>Preslav</first><last>Nakov</last></editor>
       <editor><first>Aline</first><last>Villavicencio</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Dublin, Ireland</address>
@@ -32,8 +32,8 @@
     </paper>
     <paper id="2">
       <title>Quantified Reproducibility Assessment of <fixed-case>NLP</fixed-case> Results</title>
-      <author><first>Anya</first><last>Belz</last></author>
-      <author><first>Maja</first><last>Popovic</last></author>
+      <author id="anja-belz"><first>Anya</first><last>Belz</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popovic</last></author>
       <author><first>Simon</first><last>Mille</last></author>
       <pages>16-28</pages>
       <abstract>This paper describes and tests a method for carrying out quantified reproducibility assessment (QRA) that is based on concepts and definitions from metrology. QRA produces a single score estimating the degree of reproducibility of a given system and evaluation measure, on the basis of the scores from, and differences between, different reproductions. We test QRA on 18 different system and evaluation measure combinations (involving diverse NLP tasks and types of evaluation), for each of which we have the original results and one to seven reproduction results. The proposed QRA method produces degree-of-reproducibility scores that are comparable across multiple reproductions not only of the same, but also of different, original studies. We find that the proposed method facilitates insights into causes of variation between reproductions, and as a result, allows conclusions to be drawn about what aspects of system and/or evaluation design need to be changed in order to improve reproducibility.</abstract>
@@ -78,7 +78,7 @@
       <author><first>Hanwang</first><last>Zhang</last></author>
       <author><first>Xiangnan</first><last>He</last></author>
       <author><first>Fengbin</first><last>Zhu</last></author>
-      <author><first>Tat-Seng</first><last>Chua</last></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last></author>
       <pages>57-69</pages>
       <abstract>Neural discrete reasoning (NDR) has shown remarkable progress in combining deep models with discrete reasoning. However, we find that existing NDR solution suffers from large performance drop on hypothetical questions, e.g. “what the annualized rate of return would be if the revenue in 2020 was doubled”. The key to hypothetical question answering (HQA) is counterfactual thinking, which is a natural ability of human reasoning but difficult for deep models. In this work, we devise a Learning to Imagine (L2I) module, which can be seamlessly incorporated into NDR models to perform the imagination of unseen counterfactual. In particular, we formulate counterfactual thinking into two steps: 1) identifying the fact to intervene, and 2) deriving the counterfactual from the fact and assumption, which are designed as neural networks. Based on TAT-QA, we construct a very challenging HQA dataset with 8,283 hypothetical questions. We apply the proposed L2I to TAGOP, the state-of-the-art solution on TAT-QA, validating the rationality and effectiveness of our approach.</abstract>
       <url hash="574e5135">2022.acl-long.5</url>
@@ -253,7 +253,7 @@
     <paper id="18">
       <title>Overlap-based Vocabulary Generation Improves Cross-lingual Transfer Among Related Languages</title>
       <author><first>Vaidehi</first><last>Patil</last></author>
-      <author><first>Partha</first><last>Talukdar</last></author>
+      <author id="partha-talukdar"><first>Partha</first><last>Talukdar</last></author>
       <author><first>Sunita</first><last>Sarawagi</last></author>
       <pages>219-233</pages>
       <abstract>Pre-trained multilingual language models such as mBERT and XLM-R have demonstrated great potential for zero-shot cross-lingual transfer to low web-resource languages (LRL). However, due to limited model capacity, the large difference in the sizes of available monolingual corpora between high web-resource languages (HRL) and LRLs does not provide enough scope of co-embedding the LRL with the HRL, thereby affecting the downstream task performance of LRLs. In this paper, we argue that relatedness among languages in a language family along the dimension of lexical overlap may be leveraged to overcome some of the corpora limitations of LRLs. We propose Overlap BPE (OBPE), a simple yet effective modification to the BPE vocabulary generation algorithm which enhances overlap across related languages. Through extensive experiments on multiple NLP tasks and datasets, we observe that OBPE generates a vocabulary that increases the representation of LRLs via tokens shared with HRLs. This results in improved zero-shot transfer from related HRLs to LRLs without reducing HRL representation and accuracy. Unlike previous studies that dismissed the importance of token-overlap, we show that in the low-resource related language setting, token overlap matters. Synthetically reducing the overlap to zero can cause as much as a four-fold drop in zero-shot transfer accuracy.</abstract>
@@ -496,7 +496,7 @@
     <paper id="35">
       <title><fixed-case>K</fixed-case>a<fixed-case>FSP</fixed-case>: Knowledge-Aware Fuzzy Semantic Parsing for Conversational Question Answering over a Large-Scale Knowledge Base</title>
       <author><first>Junzhuo</first><last>Li</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <pages>461-473</pages>
       <abstract>In this paper, we study two issues of semantic parsing approaches to conversational question answering over a large-scale knowledge base: (1) The actions defined in grammar are not sufficient to handle uncertain reasoning common in real-world scenarios. (2) Knowledge base information is not well exploited and incorporated into semantic parsing. To mitigate the two issues, we propose a knowledge-aware fuzzy semantic parsing framework (KaFSP). It defines fuzzy comparison operations in the grammar system for uncertain reasoning based on the fuzzy set theory. In order to enhance the interaction between semantic parsing and knowledge base, we incorporate entity triples from the knowledge base into a knowledge-aware entity disambiguation module. Additionally, we propose a multi-label classification framework to not only capture correlations between entity types and relations but also detect knowledge base information relevant to the current utterance. Both enhancements are based on pre-trained language models. Experiments on a large-scale conversational question answering benchmark demonstrate that the proposed KaFSP achieves significant improvements over previous state-of-the-art models, setting new SOTA results on 8 out of 10 question types, gaining improvements of over 10% F1 or accuracy on 3 question types, and improving overall F1 from 83.01% to 85.33%. The source code of KaFSP is available at <url>https://github.com/tjunlp-lab/KaFSP</url>.</abstract>
       <url hash="853ad82a">2022.acl-long.35</url>
@@ -649,7 +649,7 @@
       <author><first>Michal</first><last>Shmueli-Scheuer</last></author>
       <author><first>Ilya</first><last>Shnayderman</last></author>
       <author><first>Noam</first><last>Slonim</last></author>
-      <author><first>Liat</first><last>Ein-Dor</last></author>
+      <author id="liat-ein-dor"><first>Liat</first><last>Ein-Dor</last></author>
       <pages>596-609</pages>
       <abstract>Paraphrase generation has been widely used in various downstream tasks. Most tasks benefit mainly from high quality paraphrases, namely those that are semantically similar to, yet linguistically diverse from, the original sentence. Generating high-quality paraphrases is challenging as it becomes increasingly hard to preserve meaning as linguistic diversity increases. Recent works achieve nice results by controlling specific aspects of the paraphrase, such as its syntactic tree. However, they do not allow to directly control the quality of the generated paraphrase, and suffer from low flexibility and scalability. Here we propose QCPG, a quality-guided controlled paraphrase generation model, that allows directly controlling the quality dimensions. Furthermore, we suggest a method that given a sentence, identifies points in the quality control space that are expected to yield optimal generated paraphrases. We show that our method is able to generate paraphrases which maintain the original meaning while achieving higher diversity than the uncontrolled baseline. The models, the code, and the data can be found in <url>https://github.com/IBM/quality-controlled-paraphrase-generation</url>.</abstract>
       <url hash="7e199051">2022.acl-long.45</url>
@@ -777,7 +777,7 @@
       <author><first>Yue</first><last>Yu</last></author>
       <author><first>Pranav</first><last>Shetty</last></author>
       <author><first>Le</first><last>Song</last></author>
-      <author><first>Chao</first><last>Zhang</last></author>
+      <author id="chao-zhang-tu"><first>Chao</first><last>Zhang</last></author>
       <pages>745-758</pages>
       <abstract>Weakly-supervised learning (WSL) has shown promising results in addressing label scarcity on many NLP tasks, but manually designing a comprehensive, high-quality labeling rule set is tedious and difficult. We study interactive weakly-supervised learning—the problem of iteratively and automatically discovering novel labeling rules from data to improve the WSL model. Our proposed model, named PRBoost, achieves this goal via iterative prompt-based rule discovery and model boosting. It uses boosting to identify large-error instances and discovers candidate rules from them by prompting pre-trained LMs with rule templates. The candidate rules are judged by human experts, and the accepted rules are used to generate complementary weak labels and strengthen the current model. Experiments on four tasks show PRBoost outperforms state-of-the-art WSL baselines up to 7.1%, and bridges the gaps with fully supervised models.</abstract>
       <url hash="d16eff1b">2022.acl-long.55</url>
@@ -880,7 +880,7 @@
       <title>Nested Named Entity Recognition with Span-level Graphs</title>
       <author><first>Juncheng</first><last>Wan</last></author>
       <author><first>Dongyu</first><last>Ru</last></author>
-      <author><first>Weinan</first><last>Zhang</last></author>
+      <author id="weinan-zhang"><first>Weinan</first><last>Zhang</last></author>
       <author><first>Yong</first><last>Yu</last></author>
       <pages>892-903</pages>
       <abstract>Span-based methods with the neural networks backbone have great potential for the nested named entity recognition (NER) problem. However, they face problems such as degenerating when positive instances and negative instances largely overlap. Besides, the generalization ability matters a lot in nested NER, as a large proportion of entities in the test set hardly appear in the training set. In this work, we try to improve the span representation by utilizing retrieval-based span-level graphs, connecting spans and entities in the training data based on <tex-math>n</tex-math>-gram features. Specifically, we build the entity-entity graph and span-entity graph globally based on <tex-math>n</tex-math>-gram similarity to integrate the information of similar neighbor entities into the span representation. To evaluate our method, we conduct experiments on three common nested NER datasets, ACE2004, ACE2005, and GENIA datasets. Experimental results show that our method achieves general improvements on all three benchmarks (+<tex-math>0.30 \sim 0.85</tex-math> micro-F1), and obtains special superiority on low frequency entities (+<tex-math>0.56 \sim 2.08</tex-math> recall).</abstract>
@@ -892,7 +892,7 @@
       <title><fixed-case>C</fixed-case>og<fixed-case>T</fixed-case>askonomy: Cognitively Inspired Task Taxonomy Is Beneficial to Transfer Learning in <fixed-case>NLP</fixed-case></title>
       <author><first>Yifei</first><last>Luo</last></author>
       <author><first>Minghui</first><last>Xu</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <pages>904-920</pages>
       <abstract>Is there a principle to guide transfer learning across tasks in natural language processing (NLP)? Taxonomy (Zamir et al., 2018) finds that a structure exists among visual tasks, as a principle underlying transfer learning for them. In this paper, we propose a cognitively inspired framework, CogTaskonomy, to learn taxonomy for NLP tasks. The framework consists of Cognitive Representation Analytics (CRA) and Cognitive-Neural Mapping (CNM). The former employs Representational Similarity Analysis, which is commonly used in computational neuroscience to find a correlation between brain-activity measurement and computational modeling, to estimate task similarity with task-specific sentence representations. The latter learns to detect task relations by projecting neural representations from NLP models to cognitive signals (i.e., fMRI voxels). Experiments on 12 NLP tasks, where BERT/TinyBERT are used as the underlying models for transfer learning, demonstrate that the proposed CogTaxonomy is able to guide transfer learning, achieving performance competitive to the Analytic Hierarchy Process (Saaty, 1987) used in visual Taskonomy (Zamir et al., 2018) but without requiring exhaustive pairwise <tex-math>O(m^2)</tex-math> task transferring. Analyses further discover that CNM is capable of learning model-agnostic task taxonomy.</abstract>
       <url hash="5ad866ec">2022.acl-long.64</url>
@@ -927,7 +927,7 @@
       <author><first>Lin</first><last>Xu</last></author>
       <author><first>Zhongyu</first><last>Wei</last></author>
       <author><first>Weidong</first><last>Zhan</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <author><first>Sujian</first><last>Li</last></author>
       <author><first>Tianyu</first><last>Liu</last></author>
       <author><first>Zhifang</first><last>Sui</last></author>
@@ -987,7 +987,7 @@
       <author><first>Linjuan</first><last>Wu</last></author>
       <author><first>Shaojuan</first><last>Wu</last></author>
       <author><first>Xiaowang</first><last>Zhang</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Shizhan</first><last>Chen</last></author>
       <author><first>Zhiqiang</first><last>Zhuang</last></author>
       <author><first>Zhiyong</first><last>Feng</last></author>
@@ -1027,7 +1027,7 @@
       <author><first>Jun</first><last>Xu</last></author>
       <author><first>Zeyang</first><last>Lei</last></author>
       <author><first>Haifeng</first><last>Wang</last></author>
-      <author><first>Zheng-Yu</first><last>Niu</last></author>
+      <author id="zheng-yu-niu"><first>Zheng-Yu</first><last>Niu</last></author>
       <author><first>Hua</first><last>Wu</last></author>
       <pages>1024-1034</pages>
       <abstract>Most dialog systems posit that users have figured out clear and specific goals before starting an interaction. For example, users have determined the departure, the destination, and the travel time for booking a flight. However, in many scenarios, limited by experience and knowledge, users may know what they need, but still struggle to figure out clear and specific goals by determining all the necessary slots. In this paper, we identify this challenge, and make a step forward by collecting a new human-to-human mixed-type dialog corpus. It contains 5k dialog sessions and 168k utterances for 4 dialog types and 5 domains. Within each session, an agent first provides user-goal-related knowledge to help figure out clear and specific goals, and then help achieve them. Furthermore, we propose a mixed-type dialog model with a novel Prompt-based continual learning mechanism. Specifically, the mechanism enables the model to continually strengthen its ability on any specific type by utilizing existing dialog corpora effectively.</abstract>
@@ -1232,7 +1232,7 @@
       <author><first>Jay</first><last>Pujara</last></author>
       <author><first>Xiang</first><last>Ren</last></author>
       <author id="yang-liu-icsi"><first>Yang</first><last>Liu</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></author>
       <pages>1237-1252</pages>
       <abstract>Implicit knowledge, such as common sense, is key to fluid human conversations. Current neural response generation (RG) models are trained to generate responses directly, omitting unstated implicit knowledge. In this paper, we present Think-Before-Speaking (TBS), a generative approach to first externalize implicit commonsense knowledge (<tex-math>think</tex-math>) and use this knowledge to generate responses (<tex-math>speak</tex-math>). We argue that externalizing implicit knowledge allows more efficient learning, produces more informative responses, and enables more explainable models. We analyze different choices to collect knowledge-aligned dialogues, represent implicit knowledge, and transition between knowledge and dialogues. Empirical results show TBS models outperform end-to-end and knowledge-augmented RG baselines on most automatic metrics and generate more informative, specific, and commonsense-following responses, as evaluated by human annotators. TBS also generates <tex-math>knowledge</tex-math> that makes sense and is relevant to the dialogue around 85% of the time</abstract>
       <url hash="f360c6c0">2022.acl-long.88</url>
@@ -1244,7 +1244,7 @@
       <title>Flow-Adapter Architecture for Unsupervised Machine Translation</title>
       <author><first>Yihong</first><last>Liu</last></author>
       <author><first>Haris</first><last>Jabbar</last></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <pages>1253-1266</pages>
       <abstract>In this work, we propose a flow-adapter architecture for unsupervised NMT. It leverages normalizing flows to explicitly model the distributions of sentence-level latent representations, which are subsequently used in conjunction with the attention mechanism for the translation task. The primary novelties of our model are: (a) capturing language-specific sentence representations separately for each language using normalizing flows and (b) using a simple transformation of these latent representations for translating from one language to another. This architecture allows for unsupervised training of each language independently. While there is prior work on latent variables for supervised MT, to the best of our knowledge, this is the first work that uses latent variables and normalizing flows for unsupervised MT. We obtain competitive results on several unsupervised MT benchmarks.</abstract>
       <url hash="ad5618f8">2022.acl-long.89</url>
@@ -1255,7 +1255,7 @@
     <paper id="90">
       <title>Efficient Unsupervised Sentence Compression by Fine-tuning Transformers with Reinforcement Learning</title>
       <author><first>Demian</first><last>Ghalandari</last></author>
-      <author><first>Chris</first><last>Hokamp</last></author>
+      <author id="chris-hokamp"><first>Chris</first><last>Hokamp</last></author>
       <author><first>Georgiana</first><last>Ifrim</last></author>
       <pages>1267-1280</pages>
       <abstract>Sentence compression reduces the length of text by removing non-essential content while preserving important facts and grammaticality. Unsupervised objective driven methods for sentence compression can be used to create customized models without the need for ground-truth training data, while allowing flexibility in the objective function(s) that are used for learning and inference. Recent unsupervised sentence compression approaches use custom objectives to guide discrete search; however, guided search is expensive at inference time. In this work, we explore the use of reinforcement learning to train effective sentence compression models that are also fast when generating predictions. In particular, we cast the task as binary sequence labelling and fine-tune a pre-trained transformer using a simple policy gradient approach. Our approach outperforms other unsupervised models while also being more efficient at inference time.</abstract>
@@ -1307,7 +1307,7 @@
       <author><first>Yao</first><last>Zhao</last></author>
       <author><first>Joshua</first><last>Maynez</last></author>
       <author><first>Dipanjan</first><last>Das</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <author><first>Mirella</first><last>Lapata</last></author>
       <pages>1319-1339</pages>
       <abstract>We propose Composition Sampling, a simple but effective method to generate diverse outputs for conditional generation of higher quality compared to previous stochastic decoding strategies. It builds on recently proposed plan-based neural generation models (FROST, Narayan et al, 2021) that are trained to first create a composition of the output and then generate by conditioning on it and the input. Our approach avoids text degeneration by first sampling a composition in the form of an entity chain and then using beam search to generate the best possible text grounded to this entity chain. Experiments on summarization (CNN/DailyMail and XSum) and question generation (SQuAD), using existing and newly proposed automaticmetrics together with human-based evaluation, demonstrate that Composition Sampling is currently the best available decoding strategy for generating diverse meaningful outputs.</abstract>
@@ -1343,7 +1343,7 @@
     <paper id="97">
       <title>Tackling Fake News Detection by Continually Improving Social Context Representations using Graph Neural Networks</title>
       <author><first>Nikhil</first><last>Mehta</last></author>
-      <author><first>Maria Leonor</first><last>Pacheco</last></author>
+      <author id="maria-leonor-pacheco"><first>Maria Leonor</first><last>Pacheco</last></author>
       <author><first>Dan</first><last>Goldwasser</last></author>
       <pages>1363-1380</pages>
       <abstract>Easy access, variety of content, and fast widespread interactions are some of the reasons making social media increasingly popular. However, this rise has also enabled the propagation of fake news, text published by news sources with an intent to spread misinformation and sway beliefs. Detecting it is an important and challenging problem to prevent large scale misinformation and maintain a healthy society. We view fake news detection as reasoning over the relations between sources, articles they publish, and engaging users on social media in a graph framework. After embedding this information, we formulate inference operators which augment the graph edges by revealing unobserved interactions between its elements, such as similarity between documents’ contents and users’ engagement patterns. Our experiments over two challenging fake news detection tasks show that using inference operators leads to a better understanding of the social media framework enabling fake news spread, resulting in improved performance.</abstract>
@@ -1357,7 +1357,7 @@
       <author><first>Yupei</first><last>Du</last></author>
       <author><first>Qi</first><last>Zheng</last></author>
       <author><first>Yuanbin</first><last>Wu</last></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <author><first>Yan</first><last>Yang</last></author>
       <author><first>Meirong</first><last>Ma</last></author>
       <pages>1381-1395</pages>
@@ -1384,8 +1384,8 @@
       <author><first>Faisal</first><last>Ladhak</last></author>
       <author><first>Esin</first><last>Durmus</last></author>
       <author><first>He</first><last>He</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>1410-1421</pages>
       <abstract>Despite recent progress in abstractive summarization, systems still suffer from faithfulness errors. While prior work has proposed models that improve faithfulness, it is unclear whether the improvement comes from an increased level of extractiveness of the model outputs as one naive way to improve faithfulness is to make summarization models more extractive. In this work, we present a framework for evaluating the effective faithfulness of summarization systems, by generating a faithfulness-abstractiveness trade-off curve that serves as a control at different operating points on the abstractiveness spectrum. We then show that the Maximum Likelihood Estimation (MLE) baseline as well as recently proposed methods for improving faithfulness, fail to consistently improve over the control at the same level of abstractiveness. Finally, we learn a selector to identify the most faithful and abstractive summary for a given document, and show that this system can attain higher faithfulness scores in human evaluations while being more abstractive than the baseline system on two datasets. Moreover, we show that our system is able to achieve a better faithfulness-abstractiveness trade-off than the control at the same level of abstractiveness.</abstract>
       <url hash="a700f25d">2022.acl-long.100</url>
@@ -1410,7 +1410,7 @@
       <title>Spurious Correlations in Reference-Free Evaluation of Text Generation</title>
       <author><first>Esin</first><last>Durmus</last></author>
       <author><first>Faisal</first><last>Ladhak</last></author>
-      <author><first>Tatsunori</first><last>Hashimoto</last></author>
+      <author id="tatsunori-b-hashimoto"><first>Tatsunori</first><last>Hashimoto</last></author>
       <pages>1443-1454</pages>
       <abstract>Model-based, reference-free evaluation metricshave been proposed as a fast and cost-effectiveapproach to evaluate Natural Language Generation(NLG) systems. Despite promising recentresults, we find evidence that reference-freeevaluation metrics of summarization and dialoggeneration may be relying on spuriouscorrelations with measures such as word overlap,perplexity, and length. We further observethat for text summarization, these metrics havehigh error rates when ranking current state-ofthe-art abstractive summarization systems. Wedemonstrate that these errors can be mitigatedby explicitly designing evaluation metrics toavoid spurious features in reference-free evaluation.</abstract>
       <url hash="57f2a628">2022.acl-long.102</url>
@@ -1421,7 +1421,7 @@
       <title>On The Ingredients of an Effective Zero-shot Semantic Parser</title>
       <author><first>Pengcheng</first><last>Yin</last></author>
       <author><first>John</first><last>Wieting</last></author>
-      <author><first>Avirup</first><last>Sil</last></author>
+      <author id="avirup-sil"><first>Avirup</first><last>Sil</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
       <pages>1455-1474</pages>
       <abstract>Semantic parsers map natural language utterances into meaning representations (e.g., programs). Such models are typically bottlenecked by the paucity of training data due to the required laborious annotation efforts. Recent studies have performed zero-shot learning by synthesizing training examples of canonical utterances and programs from a grammar, and further paraphrasing these utterances to improve linguistic diversity. However, such synthetic examples cannot fully capture patterns in real data. In this paper we analyze zero-shot parsers through the lenses of the language and logical gaps (Herzig and Berant, 2019), which quantify the discrepancy of language and programmatic patterns between the canonical examples and real-world user-issued ones. We propose bridging these gaps using improved grammars, stronger paraphrasers, and efficient learning methods using canonical examples that most likely reflect real user intents. Our model achieves strong performance on two semantic parsing benchmarks (Scholar, Geo) with zero labeled data.</abstract>
@@ -1464,7 +1464,7 @@
       <title>Match the Script, Adapt if Multilingual: Analyzing the Effect of Multilingual Pretraining on Cross-lingual Transferability</title>
       <author><first>Yoshinari</first><last>Fujinuma</last></author>
       <author><first>Jordan</first><last>Boyd-Graber</last></author>
-      <author><first>Katharina</first><last>Kann</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
       <pages>1500-1512</pages>
       <abstract>Pretrained multilingual models enable zero-shot learning even for unseen languages, and that performance can be further improved via adaptation prior to finetuning. However, it is unclear how the number of pretraining languages influences a model’s zero-shot learning for languages unseen during pretraining. To fill this gap, we ask the following research questions: (1) How does the number of pretraining languages influence zero-shot performance on unseen target languages? (2) Does the answer to that question change with model adaptation? (3) Do the findings for our first question change if the languages used for pretraining are all related? Our experiments on pretraining with related languages indicate that choosing a diverse set of languages is crucial. Without model adaptation, surprisingly, increasing the number of pretraining languages yields better results up to adding related languages, after which performance plateaus. In contrast, with model adaptation via continued pretraining, pretraining on a larger number of languages often gives further improvement, suggesting that model adaptation is crucial to exploit additional pretraining languages.</abstract>
       <url hash="16189115">2022.acl-long.106</url>
@@ -1500,8 +1500,8 @@
     <paper id="109">
       <title>Differentiable Multi-Agent Actor-Critic for Multi-Step Radiology Report Summarization</title>
       <author><first>Sanjeev Kumar</first><last>Karn</last></author>
-      <author id="ning-liu"><first>Ning</first><last>Liu</last></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author><first>Ning</first><last>Liu</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <author><first>Oladimeji</first><last>Farri</last></author>
       <pages>1542-1553</pages>
       <abstract>The IMPRESSIONS section of a radiology report about an imaging study is a summary of the radiologist’s reasoning and conclusions, and it also aids the referring physician in confirming or excluding certain diagnoses. A cascade of tasks are required to automatically generate an abstractive summary of the typical information-rich radiology report. These tasks include acquisition of salient content from the report and generation of a concise, easily consumable IMPRESSIONS section. Prior research on radiology report summarization has focused on single-step end-to-end models – which subsume the task of salient content acquisition. To fully explore the cascade structure and explainability of radiology report summarization, we introduce two innovations. First, we design a two-step approach: extractive summarization followed by abstractive summarization. Second, we additionally break down the extractive part into two independent tasks: extraction of salient (1) sentences and (2) keywords. Experiments on English radiology reports from two clinical sites show our novel approach leads to a more precise summary compared to single-step and to two-step-with-single-extractive-process baselines with an overall improvement in F1 score of 3-4%.</abstract>
@@ -1514,7 +1514,7 @@
       <title>Online Semantic Parsing for Latency Reduction in Task-Oriented Dialogue</title>
       <award>Outstanding Paper</award>
       <author><first>Jiawei</first><last>Zhou</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <author><first>Michael</first><last>Newman</last></author>
       <author><first>Emmanouil Antonios</first><last>Platanios</last></author>
       <author><first>Sam</first><last>Thomson</last></author>
@@ -1546,7 +1546,7 @@
       <author><first>Chenguang</first><last>Zhu</last></author>
       <author><first>Budhaditya</first><last>Deb</last></author>
       <author><first>Ahmed</first><last>Awadallah</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <author><first>Rui</first><last>Zhang</last></author>
       <pages>1592-1604</pages>
       <abstract>Text summarization helps readers capture salient information from documents, news, interviews, and meetings. However, most state-of-the-art pretrained language models (LM) are unable to efficiently process long text for many summarization tasks. In this paper, we propose Summ<tex-math>^N</tex-math>, a simple, flexible, and effective multi-stage framework for input texts that are longer than the maximum context length of typical pretrained LMs. Summ<tex-math>^N</tex-math> first splits the data samples and generates a coarse summary in multiple stages and then produces the final fine-grained summary based on it. Our framework can process input text of arbitrary length by adjusting the number of stages while keeping the LM input size fixed. Moreover, it can deal with both single-source documents and dialogues, and it can be used on top of different backbone abstractive summarization models. To the best of our knowledge, Summ<tex-math>^N</tex-math> is the first multi-stage split-then-summarize framework for long input summarization. Our experiments demonstrate that Summ<tex-math>^N</tex-math> outperforms previous state-of-the-art methods by improving ROUGE scores on three long meeting summarization datasets AMI, ICSI, and QMSum, two long TV series datasets from SummScreen, and a long document summarization dataset GovReport. Our data and code are available at <url>https://github.com/psunlpgroup/Summ-N</url>.</abstract>
@@ -1559,7 +1559,7 @@
       <author><first>Kaixin</first><last>Ma</last></author>
       <author><first>Hao</first><last>Cheng</last></author>
       <author><first>Xiaodong</first><last>Liu</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <author><first>Jianfeng</first><last>Gao</last></author>
       <pages>1605-1620</pages>
       <abstract>The retriever-reader framework is popular for open-domain question answering (ODQA) due to its ability to use explicit knowledge. Although prior work has sought to increase the knowledge coverage by incorporating structured knowledge beyond text, accessing heterogeneous knowledge sources through a unified interface remains an open question. While data-to-text generation has the potential to serve as a universal interface for data and text, its feasibility for downstream tasks remains largely unknown. In this work, we bridge this gap and use the data-to-text method as a means for encoding structured knowledge for open-domain question answering. Specifically, we propose a verbalizer-retriever-reader framework for ODQA over data and text where verbalized tables from Wikipedia and graphs from Wikidata are used as augmented knowledge sources. We show that our Unified Data and Text QA, UDT-QA, can effectively benefit from the expanded knowledge index, leading to large gains over text-only baselines. Notably, our approach sets the single-model state-of-the-art on Natural Questions. Furthermore, our analyses indicate that verbalized knowledge is preferred for answer reasoning for both adapted and hot-swap settings.</abstract>
@@ -1572,9 +1572,9 @@
       <title>Principled Paraphrase Generation with Parallel Corpora</title>
       <author><first>Aitor</first><last>Ormazabal</last></author>
       <author><first>Mikel</first><last>Artetxe</last></author>
-      <author><first>Aitor</first><last>Soroa</last></author>
-      <author><first>Gorka</first><last>Labaka</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="aitor-soroa"><first>Aitor</first><last>Soroa</last></author>
+      <author id="gorka-labaka"><first>Gorka</first><last>Labaka</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <pages>1621-1638</pages>
       <abstract>Round-trip Machine Translation (MT) is a popular choice for paraphrase generation, which leverages readily available parallel corpora for supervision. In this paper, we formalize the implicit similarity function induced by this approach, and show that it is susceptible to non-paraphrase pairs sharing a single ambiguous translation. Based on these insights, we design an alternative similarity metric that mitigates this issue by requiring the entire translation distribution to match, and implement a relaxation of it through the Information Bottleneck method. Our approach incorporates an adversarial term into MT training in order to learn representations that encode as much information about the reference translation as possible, while keeping as little information about the input as possible. Paraphrases can be generated by decoding back to the source from this representation, without having to generate pivot translations. In addition to being more principled and efficient than round-trip MT, our approach offers an adjustable parameter to control the fidelity-diversity trade-off, and obtains better results in our experiments.</abstract>
       <url hash="cf46ee22">2022.acl-long.114</url>
@@ -1588,7 +1588,7 @@
       <author><first>Junjie</first><last>Hu</last></author>
       <author><first>Lidong</first><last>Bing</last></author>
       <author><first>Mahani</first><last>Aljunied</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <author><first>Luo</first><last>Si</last></author>
       <author><first>Chunyan</first><last>Miao</last></author>
       <pages>1639-1657</pages>
@@ -1637,7 +1637,7 @@
       <author><first>Budhaditya</first><last>Deb</last></author>
       <author><first>Chenguang</first><last>Zhu</last></author>
       <author><first>Ahmed</first><last>Awadallah</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <pages>1687-1698</pages>
       <abstract>Transformer-based models have achieved state-of-the-art performance on short-input summarization. However, they still struggle with summarizing longer text. In this paper, we present DYLE, a novel dynamic latent extraction approach for abstractive long-input summarization. DYLE jointly trains an extractor and a generator and treats the extracted text snippets as the latent variable, allowing dynamic snippet-level attention weights during decoding. To provide adequate supervision, we propose simple yet effective heuristics for oracle extraction as well as a consistency loss term, which encourages the extractor to approximate the averaged dynamic weights predicted by the generator. We evaluate our method on different long-document and long-dialogue summarization tasks: GovReport, QMSum, and arXiv. Experiment results show that DYLE outperforms all existing methods on GovReport and QMSum, with gains up to 6.1 ROUGE, while yielding strong results on arXiv. Further analysis shows that the proposed dynamic weights provide interpretability of our generation process.</abstract>
       <url hash="28ee25c2">2022.acl-long.118</url>
@@ -1687,7 +1687,7 @@
       <author><first>Chenxi</first><last>Gu</last></author>
       <author><first>Jonathan K.</first><last>Kummerfeld</last></author>
       <author><first>Veronica</first><last>Perez-Rosas</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>1742-1752</pages>
       <abstract>Personalized language models are designed and trained to capture language patterns specific to individual users. This makes them more accurate at predicting what a user will write. However, when a new user joins a platform and not enough text is available, it is harder to build effective personalized language models. We propose a solution for this problem, using a model trained on users that are similar to a new user. In this paper, we explore strategies for finding the similarity between new users and existing ones and methods for using the data from existing users who are a good match. We further explore the trade-off between available data for new users and how well their language can be modeled.</abstract>
       <url hash="bbb647b3">2022.acl-long.122</url>
@@ -1759,7 +1759,7 @@
       <author><first>Beatrice</first><last>Savoldi</last></author>
       <author><first>Marco</first><last>Gaido</last></author>
       <author><first>Luisa</first><last>Bentivogli</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <pages>1807-1824</pages>
       <abstract>Gender bias is largely recognized as a problematic phenomenon affecting language technologies, with recent studies underscoring that it might surface differently across languages. However, most of current evaluation practices adopt a word-level focus on a narrow set of occupational nouns under synthetic conditions. Such protocols overlook key features of grammatical gender languages, which are characterized by morphosyntactic chains of gender agreement, marked on a variety of lexical items and parts-of-speech (POS). To overcome this limitation, we enrich the natural, gender-sensitive MuST-SHE corpus (Bentivogli et al., 2020) with two new linguistic annotation layers (POS and agreement chains), and explore to what extent different lexical categories and agreement phenomena are impacted by gender skews. Focusing on speech translation, we conduct a multifaceted evaluation on three language directions (English-French/Italian/Spanish), with models trained on varying amounts of data and different word segmentation techniques. By shedding light on model behaviours, gender bias, and its detection at several levels of granularity, our findings emphasize the value of dedicated analyses beyond aggregated overall results.</abstract>
@@ -1811,8 +1811,8 @@
       <author id="changye-li-umn"><first>Changye</first><last>Li</last></author>
       <author><first>David</first><last>Knopman</last></author>
       <author><first>Weizhe</first><last>Xu</last></author>
-      <author><first>Trevor</first><last>Cohen</last></author>
-      <author><first>Serguei</first><last>Pakhomov</last></author>
+      <author id="trevor-cohen"><first>Trevor</first><last>Cohen</last></author>
+      <author id="serguei-pakhomov"><first>Serguei</first><last>Pakhomov</last></author>
       <pages>1866-1877</pages>
       <abstract>Deep learning (DL) techniques involving fine-tuning large numbers of model parameters have delivered impressive performance on the task of discriminating between language produced by cognitively healthy individuals, and those with Alzheimer’s disease (AD). However, questions remain about their ability to generalize beyond the small reference sets that are publicly available for research. As an alternative to fitting model parameters directly, we propose a novel method by which a Transformer DL model (GPT-2) pre-trained on general English text is paired with an artificially degraded version of itself (GPT-D), to compute the ratio between these two models’ <i>perplexities</i> on language from cognitively healthy and impaired individuals. This technique approaches state-of-the-art performance on text data from a widely used “Cookie Theft” picture description task, and unlike established alternatives also generalizes well to spontaneous conversations. Furthermore, GPT-D generates text with characteristics known to be associated with AD, demonstrating the induction of dementia-related linguistic anomalies. Our study is a step toward better understanding of the relationships between the inner workings of generative neural language models, the language that they produce, and the deleterious effects of dementia on human speech and language characteristics.</abstract>
       <url hash="035ff245">2022.acl-long.131</url>
@@ -1904,7 +1904,7 @@
       <author><first>Qu</first><last>Cui</last></author>
       <author><first>Shujian</first><last>Huang</last></author>
       <author><first>Shuming</first><last>Shi</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
       <pages>1958-1969</pages>
       <abstract>Interactive neural machine translation (INMT) is able to guarantee high-quality translations by taking human interactions into account. Existing IMT systems relying on lexical constrained decoding (LCD) enable humans to translate in a flexible translation order beyond the left-to-right. However, they typically suffer from two significant limitations in translation efficiency and quality due to the reliance on LCD. In this work, we propose a novel BiTIIMT system, Bilingual Text-Infilling for Interactive Neural Machine Translation. The key idea to BiTIIMT is Bilingual Text-infilling (BiTI) which aims to fill missing segments in a manually revised translation for a given source sentence. We propose a simple yet effective solution by casting this task as a sequence-to-sequence task. In this way, our system performs decoding without explicit constraints and makes full use of revised words for better translation prediction. Experiment results show that BiTiIMT performs significantly better and faster than state-of-the-art LCD-based IMT on three translation tasks.</abstract>
       <url hash="7ebaf499">2022.acl-long.138</url>
@@ -2033,7 +2033,7 @@
       <author><first>Yunlong</first><last>Liang</last></author>
       <author><first>Fandong</first><last>Meng</last></author>
       <author><first>Chulun</first><last>Zhou</last></author>
-      <author><first>Jinan</first><last>Xu</last></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last></author>
       <author><first>Yufeng</first><last>Chen</last></author>
       <author><first>Jinsong</first><last>Su</last></author>
       <author><first>Jie</first><last>Zhou</last></author>
@@ -2061,7 +2061,7 @@
       <author><first>Prem</first><last>Selvaraj</last></author>
       <author><first>Gokul</first><last>Nc</last></author>
       <author><first>Pratyush</first><last>Kumar</last></author>
-      <author><first>Mitesh</first><last>Khapra</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh</first><last>Khapra</last></author>
       <pages>2114-2133</pages>
       <abstract>AI technologies for Natural Languages have made tremendous progress recently. However, commensurate progress has not been made on Sign Languages, in particular, in recognizing signs as individual words or as complete sentences. We introduce OpenHands, a library where we take four key ideas from the NLP community for low-resource languages and apply them to sign languages for word-level recognition. First, we propose using pose extracted through pretrained models as the standard modality of data in this work to reduce training time and enable efficient inference, and we release standardized pose datasets for different existing sign language datasets. Second, we train and release checkpoints of 4 pose-based isolated sign language recognition models across 6 languages (American, Argentinian, Chinese, Greek, Indian, and Turkish), providing baselines and ready checkpoints for deployment. Third, to address the lack of labelled data, we propose self-supervised pretraining on unlabelled data. We curate and release the largest pose-based pretraining dataset on Indian Sign Language (Indian-SL). Fourth, we compare different pretraining strategies and for the first time establish that pretraining is effective for sign language recognition by demonstrating (a) improved fine-tuning performance especially in low-resource settings, and (b) high crosslingual transfer from Indian-SL to few other sign languages. We open-source all models and datasets in OpenHands with a hope that it makes research in sign languages reproducible and more accessible.</abstract>
       <url hash="916f05b0">2022.acl-long.150</url>
@@ -2115,7 +2115,7 @@
       <author><first>Dexin</first><last>Wang</last></author>
       <author><first>Kai</first><last>Fan</last></author>
       <author><first>Boxing</first><last>Chen</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <pages>2175-2187</pages>
       <abstract><tex-math>k</tex-math>-Nearest-Neighbor Machine Translation (<tex-math>k</tex-math>NN-MT) has been recently proposed as a non-parametric solution for domain adaptation in neural machine translation (NMT). It aims to alleviate the performance degradation of advanced MT systems in translating out-of-domain sentences by coordinating with an additional token-level feature-based retrieval module constructed from in-domain data. Previous studies (Khandelwal et al., 2021; Zheng et al., 2021) have already demonstrated that non-parametric NMT is even superior to models fine-tuned on out-of-domain data. In spite of this success, <tex-math>k</tex-math>NN retrieval is at the expense of high latency, in particular for large datastores. To make it practical, in this paper, we explore a more efficient <tex-math>k</tex-math>NN-MT and propose to use clustering to improve the retrieval efficiency. Concretely, we first propose a cluster-based Compact Network for feature reduction in a contrastive learning manner to compress context features into 90+% lower dimensional vectors. We then suggest a cluster-based pruning solution to filter out 10% 40% redundant nodes in large datastores while retaining translation quality. Our proposed methods achieve better or comparable performance while reducing up to 57% inference latency against the advanced non-parametric MT model on several machine translation benchmarks. Experimental results indicate that the proposed methods maintain the most useful information of the original datastore and the Compact Network shows good generalization on unseen domains. Codes are available at <url>https://github.com/tjunlp-lab/PCKMT</url>.</abstract>
       <url hash="c17cde26">2022.acl-long.154</url>
@@ -2156,7 +2156,7 @@
       <author><first>Wei</first><last>Wu</last></author>
       <author><first>Tao</first><last>Gui</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>2211-2224</pages>
       <abstract>Recent works on Lottery Ticket Hypothesis have shown that pre-trained language models (PLMs) contain smaller matching subnetworks(winning tickets) which are capable of reaching accuracy comparable to the original models. However, these tickets are proved to be notrobust to adversarial examples, and even worse than their PLM counterparts. To address this problem, we propose a novel method based on learning binary weight masks to identify robust tickets hidden in the original PLMs. Since the loss is not differentiable for the binary mask, we assign the hard concrete distribution to the masks and encourage their sparsity using a smoothing approximation of L0 regularization. Furthermore, we design an adversarial loss objective to guide the search for robust tickets and ensure that the tickets perform well bothin accuracy and robustness. Experimental results show the significant improvement of the proposed method over previous work on adversarial robustness evaluation.</abstract>
       <url hash="e5387608">2022.acl-long.157</url>
@@ -2335,7 +2335,7 @@
       <author><first>Yijin</first><last>Liu</last></author>
       <author><first>Fandong</first><last>Meng</last></author>
       <author><first>Yufeng</first><last>Chen</last></author>
-      <author><first>Jinan</first><last>Xu</last></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last></author>
       <author><first>Jian</first><last>Liu</last></author>
       <author><first>Jie</first><last>Zhou</last></author>
       <pages>2377-2389</pages>
@@ -2416,7 +2416,7 @@
       <author><first>Bailey</first><last>Kuehl</last></author>
       <author><first>Arman</first><last>Cohan</last></author>
       <author><first>Isabelle</first><last>Augenstein</last></author>
-      <author><first>Lucy Lu</first><last>Wang</last></author>
+      <author id="lucy-lu-wang"><first>Lucy Lu</first><last>Wang</last></author>
       <pages>2448-2460</pages>
       <abstract>Automated scientific fact checking is difficult due to the complexity of scientific language and a lack of significant amounts of training data, as annotation requires domain expertise. To address this challenge, we propose scientific claim generation, the task of generating one or more atomic and verifiable claims from scientific sentences, and demonstrate its usefulness in zero-shot fact checking for biomedical claims. We propose CLAIMGEN-BART, a new supervised method for generating claims supported by the literature, as well as KBIN, a novel method for generating claim negations. Additionally, we adapt an existing unsupervised entity-centric method of claim generation to biomedical claims, which we call CLAIMGEN-ENTITY. Experiments on zero-shot fact checking demonstrate that both CLAIMGEN-ENTITY and CLAIMGEN-BART, coupled with KBIN, achieve up to 90% performance of fully supervised models trained on manually annotated claims and evidence. A rigorous evaluation study demonstrates significant improvement in generated claim and negation quality over existing baselines</abstract>
       <url hash="881997a5">2022.acl-long.175</url>
@@ -2505,7 +2505,7 @@
       <author><first>Lu</first><last>Xiang</last></author>
       <author><first>Yu</first><last>Zhou</last></author>
       <author><first>Jiajun</first><last>Zhang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>2545-2558</pages>
       <abstract>Role-oriented dialogue summarization is to generate summaries for different roles in the dialogue, e.g., merchants and consumers. Existing methods handle this task by summarizing each role’s content separately and thus are prone to ignore the information from other roles. However, we believe that other roles’ content could benefit the quality of summaries, such as the omitted information mentioned by other roles. Therefore, we propose a novel role interaction enhanced method for role-oriented dialogue summarization. It adopts cross attention and decoder self-attention interactions to interactively acquire other roles’ critical information. The cross attention interaction aims to select other roles’ critical dialogue utterances, while the decoder self-attention interaction aims to obtain key information from other roles’ summaries. Experimental results have shown that our proposed method significantly outperforms strong baselines on two public role-oriented dialogue summarization datasets. Extensive analyses have demonstrated that other roles’ content could help generate summaries with more complete semantics and correct topic structures.</abstract>
       <url hash="373a737f">2022.acl-long.182</url>
@@ -2531,7 +2531,7 @@
       <title>Measuring and Mitigating Name Biases in Neural Machine Translation</title>
       <author><first>Jun</first><last>Wang</last></author>
       <author><first>Benjamin</first><last>Rubinstein</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>2576-2590</pages>
       <abstract>Neural Machine Translation (NMT) systems exhibit problematic biases, such as stereotypical gender bias in the translation of occupation terms into languages with grammatical gender. In this paper we describe a new source of bias prevalent in NMT systems, relating to translations of sentences containing person names. To correctly translate such sentences, a NMT system needs to determine the gender of the name. We show that leading systems are particularly poor at this task, especially for female given names. This bias is deeper than given name gender: we show that the translation of terms with ambiguous sentiment can also be affected by person names, and the same holds true for proper nouns denoting race. To mitigate these biases we propose a simple but effective data augmentation method based on randomly switching entities during translation, which effectively eliminates the problem without any effect on translation quality.</abstract>
       <url hash="0e3b38c6">2022.acl-long.184</url>
@@ -2557,7 +2557,7 @@
       <title><fixed-case>MSCTD</fixed-case>: A Multimodal Sentiment Chat Translation Dataset</title>
       <author><first>Yunlong</first><last>Liang</last></author>
       <author><first>Fandong</first><last>Meng</last></author>
-      <author><first>Jinan</first><last>Xu</last></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last></author>
       <author><first>Yufeng</first><last>Chen</last></author>
       <author><first>Jie</first><last>Zhou</last></author>
       <pages>2601-2613</pages>
@@ -2723,7 +2723,7 @@
     <paper id="198">
       <title>Continual Few-shot Relation Learning via Embedding Space Regularization and Data Augmentation</title>
       <author><first>Chengwei</first><last>Qin</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <pages>2776-2789</pages>
       <abstract>Existing continual relation learning (CRL) methods rely on plenty of labeled training data for learning a new task, which can be hard to acquire in real scenario as getting large and representative labeled data is often expensive and time-consuming. It is therefore necessary for the model to learn novel relational patterns with very few labeled data while avoiding catastrophic forgetting of previous task knowledge. In this paper, we formulate this challenging yet practical problem as continual few-shot relation learning (CFRL). Based on the finding that learning for new emerging few-shot tasks often results in feature distributions that are incompatible with previous tasks’ learned distributions, we propose a novel method based on embedding space regularization and data augmentation. Our method generalizes to new few-shot tasks and avoids catastrophic forgetting of previous tasks by enforcing extra constraints on the relational embeddings and by adding extra relevant data in a self-supervised manner. With extensive experiments we demonstrate that our method can significantly outperform previous state-of-the-art methods in CFRL task settings.</abstract>
       <url hash="2ec35014">2022.acl-long.198</url>
@@ -2776,7 +2776,7 @@
       <title>Learning to Mediate Disparities Towards Pragmatic Communication</title>
       <author><first>Yuwei</first><last>Bao</last></author>
       <author><first>Sayan</first><last>Ghosh</last></author>
-      <author><first>Joyce</first><last>Chai</last></author>
+      <author id="joyce-chai"><first>Joyce</first><last>Chai</last></author>
       <pages>2829-2842</pages>
       <abstract>Human communication is a collaborative process. Speakers, on top of conveying their own intent, adjust the content and language expressions by taking the listeners into account, including their knowledge background, personalities, and physical capabilities. Towards building AI agents with similar abilities in language communication, we propose a novel rational reasoning framework, Pragmatic Rational Speaker (PRS), where the speaker attempts to learn the speaker-listener disparity and adjust the speech accordingly, by adding a light-weighted disparity adjustment layer into working memory on top of speaker’s long-term memory system. By fixing the long-term memory, the PRS only needs to update its working memory to learn and adapt to different types of listeners. To validate our framework, we create a dataset that simulates different types of speaker-listener disparities in the context of referential games. Our empirical results demonstrate that the PRS is able to shift its output towards the language that listeners are able to understand, significantly improve the collaborative task outcome, and learn the disparity more efficiently than joint training.</abstract>
       <url hash="103417b9">2022.acl-long.202</url>
@@ -2844,7 +2844,7 @@
       <title><fixed-case>BRIO</fixed-case>: Bringing Order to Abstractive Summarization</title>
       <author><first>Yixin</first><last>Liu</last></author>
       <author><first>Pengfei</first><last>Liu</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
       <pages>2890-2903</pages>
       <abstract>Abstractive summarization models are commonly trained using maximum likelihood estimation, which assumes a deterministic (one-point) target distribution in which an ideal model will assign all the probability mass to the reference summary. This assumption may lead to performance degradation during inference, where the model needs to compare several system-generated (candidate) summaries that have deviated from the reference summary. To address this problem, we propose a novel training paradigm which assumes a non-deterministic distribution so that different candidate summaries are assigned probability mass according to their quality. Our method achieves a new state-of-the-art result on the CNN/DailyMail (47.78 ROUGE-1) and XSum (49.07 ROUGE-1) datasets. Further analysis also shows that our model can estimate probabilities of candidate summaries that are more correlated with their level of quality.</abstract>
@@ -2873,7 +2873,7 @@
       <author><first>Oana</first><last>Ignat</last></author>
       <author><first>Nan</first><last>Liu</last></author>
       <author><first>Jonathan</first><last>Stroud</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>2925-2940</pages>
       <abstract>We propose fill-in-the-blanks as a video understanding evaluation framework and introduce FIBER – a novel dataset consisting of 28,000 videos and descriptions in support of this evaluation framework. The fill-in-the-blanks setting tests a model’s understanding of a video by requiring it to predict a masked noun phrase in the caption of the video, given the video and the surrounding text. The FIBER benchmark does not share the weaknesses of the current state-of-the-art language-informed video understanding tasks, namely: (1) video question answering using multiple-choice questions, where models perform relatively well because they exploit linguistic biases in the task formulation, thus making our framework challenging for the current state-of-the-art systems to solve; and (2) video captioning, which relies on an open-ended evaluation framework that is often inaccurate because system answers may be perceived as incorrect if they differ in form from the ground truth. The FIBER dataset and our code are available at <url>https://lit.eecs.umich.edu/fiber/</url>.</abstract>
       <url hash="2db95385">2022.acl-long.209</url>
@@ -2956,12 +2956,12 @@
     </paper>
     <paper id="215">
       <title>Cross-Modal Discrete Representation Learning</title>
-      <author><first>Alexander</first><last>Liu</last></author>
+      <author id="alex-liu"><first>Alexander</first><last>Liu</last></author>
       <author><first>SouYoung</first><last>Jin</last></author>
       <author><first>Cheng-I</first><last>Lai</last></author>
       <author><first>Andrew</first><last>Rouditchenko</last></author>
       <author><first>Aude</first><last>Oliva</last></author>
-      <author><first>James</first><last>Glass</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
       <pages>3013-3035</pages>
       <abstract>In contrast to recent advances focusing on high-level representation learning across modalities, in this work we present a self-supervised learning framework that is able to learn a representation that captures finer levels of granularity across different modalities such as concepts or events represented by visual objects or spoken words. Our framework relies on a discretized embedding space created via vector quantization that is shared across different modalities. Beyond the shared embedding space, we propose a Cross-Modal Code Matching objective that forces the representations from different views (modalities) to have a similar distribution over the discrete embedding space such that cross-modal objects/actions localization can be performed without direct supervision. We show that the proposed discretized multi-modal fine-grained representation (e.g., pixel/word/frame) can complement high-level summary representations (e.g., video/sentence/waveform) for improved performance on cross-modal retrieval tasks. We also observe that the discretized representation uses individual clusters to represent the same semantic concept across modalities.</abstract>
       <url hash="dfbb5715">2022.acl-long.215</url>
@@ -3046,7 +3046,7 @@
       <author><first>Veronica</first><last>Perez-Rosas</last></author>
       <author><first>Charles</first><last>Welch</last></author>
       <author><first>Soujanya</first><last>Poria</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>3096-3107</pages>
       <abstract>In this paper, we study the effect of commonsense and domain knowledge while generating responses in counseling conversations using retrieval and generative methods for knowledge integration. We propose a pipeline that collects domain knowledge through web mining, and show that retrieval from both domain-specific and commonsense knowledge bases improves the quality of generated responses. We also present a model that incorporates knowledge generated by COMET using soft positional encoding and masked self-attention. We show that both retrieved and COMET-generated knowledge improve the system’s performance as measured by automatic metrics and also by human evaluation. Lastly, we present a comparative study on the types of knowledge encoded by our system showing that <i>causal</i> and <i>intentional</i> relationships benefit the generation task more than other types of commonsense relations.</abstract>
       <url hash="3cb14511">2022.acl-long.221</url>
@@ -3071,8 +3071,8 @@
     </paper>
     <paper id="223">
       <title>On Continual Model Refinement in Out-of-Distribution Data Streams</title>
-      <author><first>Bill Yuchen</first><last>Lin</last></author>
-      <author><first>Sida</first><last>Wang</last></author>
+      <author id="bill-yuchen-lin"><first>Bill Yuchen</first><last>Lin</last></author>
+      <author id="sida-i-wang"><first>Sida</first><last>Wang</last></author>
       <author><first>Xi</first><last>Lin</last></author>
       <author><first>Robin</first><last>Jia</last></author>
       <author><first>Lin</first><last>Xiao</last></author>
@@ -3149,7 +3149,7 @@
       <author><first>Alexis</first><last>Ross</last></author>
       <author><first>Tongshuang</first><last>Wu</last></author>
       <author><first>Hao</first><last>Peng</last></author>
-      <author><first>Matthew</first><last>Peters</last></author>
+      <author id="matthew-e-peters"><first>Matthew</first><last>Peters</last></author>
       <author><first>Matt</first><last>Gardner</last></author>
       <pages>3194-3213</pages>
       <abstract>Controlled text perturbation is useful for evaluating and improving model generalizability. However, current techniques rely on training a model for every target perturbation, which is expensive and hard to generalize. We present Tailor, a semantically-controlled text generation system. Tailor builds on a pretrained seq2seq model and produces textual outputs conditioned on control codes derived from semantic representations. We craft a set of operations to modify the control codes, which in turn steer generation towards targeted attributes. These operations can be further composed into higher-level ones, allowing for flexible perturbation strategies. We demonstrate the effectiveness of these perturbations in multiple applications. First, we use Tailor to automatically create high-quality contrast sets for four distinct natural language processing (NLP) tasks. These contrast sets contain fewer spurious artifacts and are complementary to manually annotated ones in their lexical diversity. Second, we show that Tailor perturbations can improve model generalization through data augmentation. Perturbing just ∼2% of training data leads to a 5.8-point gain on an NLI challenge set measuring reliance on syntactic heuristics.</abstract>
@@ -3172,7 +3172,7 @@
     </paper>
     <paper id="230">
       <title>Adaptive Testing and Debugging of <fixed-case>NLP</fixed-case> Models</title>
-      <author><first>Marco Tulio</first><last>Ribeiro</last></author>
+      <author id="marco-tulio-ribeiro"><first>Marco Tulio</first><last>Ribeiro</last></author>
       <author><first>Scott</first><last>Lundberg</last></author>
       <pages>3253-3267</pages>
       <abstract>Current approaches to testing and debugging NLP models rely on highly variable human creativity and extensive labor, or only work for a very restrictive class of bugs. We present AdaTest, a process which uses large scale language models (LMs) in partnership with human feedback to automatically write unit tests highlighting bugs in a target model. Such bugs are then addressed through an iterative text-fix-retest loop, inspired by traditional software development. In experiments with expert and non-expert users and commercial / research models for 8 different tasks, AdaTest makes users 5-10x more effective at finding bugs than current approaches, and helps users effectively fix bugs without adding new bugs.</abstract>
@@ -3258,7 +3258,7 @@
       <title>Hallucinated but Factual! Inspecting the Factuality of Hallucinations in Abstractive Summarization</title>
       <author><first>Meng</first><last>Cao</last></author>
       <author><first>Yue</first><last>Dong</last></author>
-      <author><first>Jackie</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie</first><last>Cheung</last></author>
       <pages>3340-3354</pages>
       <abstract>State-of-the-art abstractive summarization systems often generate hallucinations; i.e., content that is not directly inferable from the source text. Despite being assumed to be incorrect, we find that much hallucinated content is actually consistent with world knowledge, which we call factual hallucinations. Including these factual hallucinations in a summary can be beneficial because they provide useful background information. In this work, we propose a novel detection approach that separates factual from non-factual hallucinations of entities. Our method is based on an entity’s prior and posterior probabilities according to pre-trained and finetuned masked language models, respectively. Empirical results suggest that our method vastly outperforms two baselines in both accuracy and F1 scores and has a strong correlation with human judgments on factuality classification tasks. Furthermore, we use our method as a reward signal to train a summarization system using an off-line reinforcement learning (RL) algorithm that can significantly improve the factuality of generated summaries while maintaining the level of abstractiveness.</abstract>
       <url hash="3d6c6e75">2022.acl-long.236</url>
@@ -3270,7 +3270,7 @@
       <title><fixed-case>E</fixed-case>nt<fixed-case>SUM</fixed-case>: A Data Set for Entity-Centric Extractive Summarization</title>
       <author><first>Mounica</first><last>Maddela</last></author>
       <author><first>Mayank</first><last>Kulkarni</last></author>
-      <author><first>Daniel</first><last>Preotiuc-Pietro</last></author>
+      <author id="daniel-preotiuc-pietro"><first>Daniel</first><last>Preotiuc-Pietro</last></author>
       <pages>3355-3366</pages>
       <abstract>Controllable summarization aims to provide summaries that take into account user-specified aspects and preferences to better assist them with their information need, as opposed to the standard summarization setup which build a single generic summary of a document. We introduce a human-annotated data set EntSUM for controllable summarization with a focus on named entities as the aspects to control. We conduct an extensive quantitative analysis to motivate the task of entity-centric summarization and show that existing methods for controllable summarization fail to generate entity-centric summaries. We propose extensions to state-of-the-art summarization approaches that achieve substantially better results on our data set. Our analysis and results show the challenging nature of this task and of the proposed data set.</abstract>
       <url hash="392c8a29">2022.acl-long.237</url>
@@ -3373,7 +3373,7 @@ in the Case of Unambiguous Gender</title>
       <title>Imputing Out-of-Vocabulary Embeddings with <fixed-case>LOVE</fixed-case> Makes <fixed-case>L</fixed-case>anguage<fixed-case>M</fixed-case>odels Robust with Little Cost</title>
       <author><first>Lihu</first><last>Chen</last></author>
       <author><first>Gael</first><last>Varoquaux</last></author>
-      <author><first>Fabian</first><last>Suchanek</last></author>
+      <author id="fabian-suchanek"><first>Fabian</first><last>Suchanek</last></author>
       <pages>3488-3504</pages>
       <abstract>State-of-the-art NLP systems represent inputs with word embeddings, but these are brittle when faced with Out-of-Vocabulary (OOV) words. To address this issue, we follow the principle of mimick-like models to generate vectors for unseen words, by learning the behavior of pre-trained embeddings using only the surface form of words. We present a simple contrastive learning framework, LOVE, which extends the word representation of an existing pre-trained language model (such as BERT) and makes it robust to OOV with few additional parameters. Extensive evaluations demonstrate that our lightweight model achieves similar or even better performances than prior competitors, both on original datasets and on corrupted variants. Moreover, it can be used in a plug-and-play fashion with FastText and BERT, where it significantly improves their robustness.</abstract>
       <url hash="bf21a581">2022.acl-long.245</url>
@@ -3419,7 +3419,7 @@ in the Case of Unambiguous Gender</title>
       <author><first>Pengjie</first><last>Ren</last></author>
       <author><first>Wentao</first><last>Deng</last></author>
       <author><first>Zhumin</first><last>Chen</last></author>
-      <author><first>Maarten</first><last>de Rijke</last></author>
+      <author id="maarten-de-rijke"><first>Maarten</first><last>de Rijke</last></author>
       <pages>3543-3555</pages>
       <abstract>A dialogue response is malevolent if it is grounded in negative emotions, inappropriate behavior, or an unethical value basis in terms of content and dialogue acts. The detection of malevolent dialogue responses is attracting growing interest. Current research on detecting dialogue malevolence has limitations in terms of datasets and methods. First, available dialogue datasets related to malevolence are labeled with a single category, but in practice assigning a single category to each utterance may not be appropriate as some malevolent utterances belong to multiple labels. Second, current methods for detecting dialogue malevolence neglect label correlation. Therefore, we propose the task of multi-label dialogue malevolence detection and crowdsource a multi-label dataset, multi-label dialogue malevolence detection (MDMD) for evaluation. We also propose a multi-label malevolence detection model, multi-faceted label correlation enhanced CRF (MCRF), with two label correlation mechanisms, label correlation in taxonomy (LCT) and label correlation in context (LCC). Experiments on MDMD show that our method outperforms the best performing baseline by a large margin, i.e., 16.1%, 11.9%, 12.0%, and 6.1% on precision, recall, F1, and Jaccard score, respectively.</abstract>
       <url hash="f0c66248">2022.acl-long.248</url>
@@ -3484,7 +3484,7 @@ in the Case of Unambiguous Gender</title>
     <paper id="253">
       <title><fixed-case>C</fixed-case>onditional<fixed-case>QA</fixed-case>: A Complex Reading Comprehension Dataset with Conditional Answers</title>
       <author><first>Haitian</first><last>Sun</last></author>
-      <author><first>William</first><last>Cohen</last></author>
+      <author id="william-cohen"><first>William</first><last>Cohen</last></author>
       <author><first>Ruslan</first><last>Salakhutdinov</last></author>
       <pages>3627-3637</pages>
       <abstract>We describe a Question Answering (QA) dataset that contains complex questions with conditional answers, i.e. the answers are only applicable when certain conditions apply. We call this dataset ConditionalQA. In addition to conditional answers, the dataset also features:(1) long context documents with information that is related in logically complex ways;(2) multi-hop questions that require compositional logical reasoning;(3) a combination of extractive questions, yes/no questions, questions with multiple answers, and not-answerable questions;(4) questions asked without knowing the answers. We show that ConditionalQA is challenging for many of the existing QA models, especially in selecting answer conditions. We believe that this dataset will motivate further research in answering complex questions over long documents.</abstract>
@@ -3495,8 +3495,8 @@ in the Case of Unambiguous Gender</title>
     <paper id="254">
       <title>Prompt-free and Efficient Few-shot Learning with Language Models</title>
       <author><first>Rabeeh</first><last>Karimi Mahabadi</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <author><first>Lambert</first><last>Mathias</last></author>
       <author><first>Marzieh</first><last>Saeidi</last></author>
       <author><first>Veselin</first><last>Stoyanov</last></author>
@@ -3566,7 +3566,7 @@ in the Case of Unambiguous Gender</title>
       <author><first>John</first><last>Pavlopoulos</last></author>
       <author><first>Leo</first><last>Laugier</last></author>
       <author><first>Alexandros</first><last>Xenos</last></author>
-      <author><first>Jeffrey</first><last>Sorensen</last></author>
+      <author id="jeffrey-sorensen"><first>Jeffrey</first><last>Sorensen</last></author>
       <author><first>Ion</first><last>Androutsopoulos</last></author>
       <pages>3721-3734</pages>
       <abstract>We study the task of toxic spans detection, which concerns the detection of the spans that make a text toxic, when detecting such spans is possible. We introduce a dataset for this task, ToxicSpans, which we release publicly. By experimenting with several methods, we show that sequence labeling models perform best, but methods that add generic rationale extraction mechanisms on top of classifiers trained to predict if a post is toxic or not are also surprisingly promising. Finally, we use ToxicSpans and systems trained on it, to provide further analysis of state-of-the-art toxic to non-toxic transfer systems, as well as of human performance on that latter task. Our work highlights challenges in finer toxicity detection and mitigation.</abstract>
@@ -3743,7 +3743,7 @@ in the Case of Unambiguous Gender</title>
       <author><first>Zoey</first><last>Liu</last></author>
       <author><first>Crystal</first><last>Richardson</last></author>
       <author><first>Richard</first><last>Hatcher</last></author>
-      <author><first>Emily</first><last>Prud’hommeaux</last></author>
+      <author id="emily-prudhommeaux"><first>Emily</first><last>Prud’hommeaux</last></author>
       <pages>3933-3944</pages>
       <abstract>Languages are classified as low-resource when they lack the quantity of data necessary for training statistical and machine learning tools and models. Causes of resource scarcity vary but can include poor access to technology for developing these resources, a relatively small population of speakers, or a lack of urgency for collecting such resources in bilingual populations where the second language is high-resource. As a result, the languages described as low-resource in the literature are as different as Finnish on the one hand, with millions of speakers using it in every imaginable domain, and Seneca, with only a small-handful of fluent speakers using the language primarily in a restricted domain. While issues stemming from the lack of resources necessary to train models unite this disparate group of languages, many other issues cut across the divide between widely-spoken low-resource languages and endangered languages. In this position paper, we discuss the unique technological, cultural, practical, and ethical challenges that researchers and indigenous speech community members face when working together to develop language technology to support endangered language documentation and revitalization. We report the perspectives of language teachers, Master Speakers and elders from indigenous communities, as well as the point of view of academics. We describe an ongoing fruitful collaboration and make recommendations for future partnerships between academic researchers and language community stakeholders.</abstract>
       <url hash="07da4fea">2022.acl-long.272</url>
@@ -3753,7 +3753,7 @@ in the Case of Unambiguous Gender</title>
     <paper id="273">
       <title>Automatic Identification and Classification of Bragging in Social Media</title>
       <author><first>Mali</first><last>Jin</last></author>
-      <author><first>Daniel</first><last>Preotiuc-Pietro</last></author>
+      <author id="daniel-preotiuc-pietro"><first>Daniel</first><last>Preotiuc-Pietro</last></author>
       <author><first>A. Seza</first><last>Doğruöz</last></author>
       <author><first>Nikolaos</first><last>Aletras</last></author>
       <pages>3945-3959</pages>
@@ -3771,7 +3771,7 @@ in the Case of Unambiguous Gender</title>
       <author><first>Kejian</first><last>Shi</last></author>
       <author><first>Jiayuan</first><last>Gu</last></author>
       <author><first>Thomas</first><last>Porter</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>3960-3975</pages>
       <abstract>Document-level information extraction (IE) tasks have recently begun to be revisited in earnest using the end-to-end neural network techniques that have been successful on their sentence-level IE counterparts. Evaluation of the approaches, however, has been limited in a number of dimensions. In particular, the precision/recall/F1 scores typically reported provide few insights on the range of errors the models make. We build on the work of Kummerfeld and Klein (2013) to propose a transformation-based framework for automating error analysis in document-level event and (N-ary) relation extraction. We employ our framework to compare two state-of-the-art document-level template-filling approaches on datasets from three domains; and then, to gauge progress in IE since its inception 30 years ago, vs. four systems from the MUC-4 (1992) evaluation.</abstract>
       <url hash="ddb45aa5">2022.acl-long.274</url>
@@ -3812,7 +3812,7 @@ in the Case of Unambiguous Gender</title>
       <author><first>Ahmed</first><last>Masry</last></author>
       <author><first>Megh</first><last>Thakkar</last></author>
       <author><first>Enamul</first><last>Hoque</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <pages>4005-4023</pages>
       <abstract>Charts are commonly used for exploring data and communicating insights. Generating natural language summaries from charts can be very helpful for people in inferring key insights that would otherwise require a lot of cognitive and perceptual efforts. We present Chart-to-text, a large-scale benchmark with two datasets and a total of 44,096 charts covering a wide range of topics and chart types. We explain the dataset construction process and analyze the datasets. We also introduce a number of state-of-the-art neural models as baselines that utilize image captioning and data-to-text generation techniques to tackle two problem variations: one assumes the underlying data table of the chart is available while the other needs to extract data from chart images. Our analysis with automatic and human evaluation shows that while our best models usually generate fluent summaries and yield reasonable BLEU scores, they also suffer from hallucinations and factual errors as well as difficulties in correctly explaining complex patterns and trends in charts.</abstract>
       <url hash="c10a3fa5">2022.acl-long.277</url>
@@ -3822,9 +3822,9 @@ in the Case of Unambiguous Gender</title>
     <paper id="278">
       <title>Characterizing Idioms: Conventionality and Contingency</title>
       <author><first>Michaela</first><last>Socolof</last></author>
-      <author><first>Jackie</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie</first><last>Cheung</last></author>
       <author><first>Michael</first><last>Wagner</last></author>
-      <author><first>Timothy</first><last>O’Donnell</last></author>
+      <author id="timothy-odonnell"><first>Timothy</first><last>O’Donnell</last></author>
       <pages>4024-4037</pages>
       <abstract>Idioms are unlike most phrases in two important ways. First, words in an idiom have non-canonical meanings. Second, the non-canonical meanings of words in an idiom are contingent on the presence of other words in the idiom. Linguistic theories differ on whether these properties depend on one another, as well as whether special theoretical machinery is needed to accommodate idioms. We define two measures that correspond to the properties above, and we show that idioms fall at the expected intersection of the two dimensions, but that the dimensions themselves are not correlated. Our results suggest that introducing special machinery to handle idioms may not be warranted.</abstract>
       <url hash="b8aa684a">2022.acl-long.278</url>
@@ -3900,7 +3900,7 @@ in the Case of Unambiguous Gender</title>
       <author><first>Emily</first><last>Dinan</last></author>
       <author><first>Gavin</first><last>Abercrombie</last></author>
       <author><first>A.</first><last>Bergman</last></author>
-      <author><first>Shannon</first><last>Spruit</last></author>
+      <author id="shannon-l-spruit"><first>Shannon</first><last>Spruit</last></author>
       <author><first>Dirk</first><last>Hovy</last></author>
       <author><first>Y-Lan</first><last>Boureau</last></author>
       <author><first>Verena</first><last>Rieser</last></author>
@@ -3957,7 +3957,7 @@ in the Case of Unambiguous Gender</title>
       <author><first>Heqi</first><last>Zheng</last></author>
       <author><first>Xiao</first><last>Zhang</last></author>
       <author><first>Zewen</first><last>Chi</last></author>
-      <author><first>Heyan</first><last>Huang</last></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last></author>
       <author><first>Yan</first><last>Tan</last></author>
       <author><first>Tian</first><last>Lan</last></author>
       <author><first>Wei</first><last>Wei</last></author>
@@ -3970,11 +3970,11 @@ in the Case of Unambiguous Gender</title>
     </paper>
     <paper id="289">
       <title>Improving Compositional Generalization with Self-Training for Data-to-Text Generation</title>
-      <author><first>Sanket Vaibhav</first><last>Mehta</last></author>
+      <author id="sanket-vaibhav-mehta"><first>Sanket Vaibhav</first><last>Mehta</last></author>
       <author><first>Jinfeng</first><last>Rao</last></author>
       <author><first>Yi</first><last>Tay</last></author>
       <author><first>Mihir</first><last>Kale</last></author>
-      <author><first>Ankur</first><last>Parikh</last></author>
+      <author id="ankur-parikh"><first>Ankur</first><last>Parikh</last></author>
       <author><first>Emma</first><last>Strubell</last></author>
       <pages>4205-4219</pages>
       <abstract>Data-to-text generation focuses on generating fluent natural language responses from structured meaning representations (MRs). Such representations are compositional and it is costly to collect responses for all possible combinations of atomic meaning schemata, thereby necessitating few-shot generalization to novel MRs. In this work, we systematically study the compositional generalization of the state-of-the-art T5 models in few-shot data-to-text tasks. We show that T5 models fail to generalize to unseen MRs, and we propose a template-based input representation that considerably improves the model’s generalization capability. To further improve the model’s performance, we propose an approach based on self-training using fine-tuned BLEURT for pseudo-response selection. On the commonly-used SGD and Weather benchmarks, the proposed self-training approach improves tree accuracy by <tex-math>46\%+</tex-math> and reduces the slot error rates by <tex-math>73\%+</tex-math> over the strong T5 baselines in few-shot settings.</abstract>
@@ -4001,7 +4001,7 @@ in the Case of Unambiguous Gender</title>
       <author><first>Fei</first><last>Li</last></author>
       <author><first>Jingye</first><last>Li</last></author>
       <author><first>Hao</first><last>Fei</last></author>
-      <author><first>Donghong</first><last>Ji</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
       <pages>4232-4241</pages>
       <abstract>The state-of-the-art model for structured sentiment analysis casts the task as a dependency parsing problem, which has some limitations: (1) The label proportions for span prediction and span relation prediction are imbalanced. (2) The span lengths of sentiment tuple components may be very large in this task, which will further exacerbates the imbalance problem. (3) Two nodes in a dependency graph cannot have multiple arcs, therefore some overlapped sentiment tuples cannot be recognized. In this work, we propose nichetargeting solutions for these issues. First, we introduce a novel labeling strategy, which contains two sets of token pair labels, namely essential label set and whole label set. The essential label set consists of the basic labels for this task, which are relatively balanced and applied in the prediction layer. The whole label set includes rich labels to help our model capture various token relations, which are applied in the hidden layer to softly influence our model. Moreover, we also propose an effective model to well collaborate with our labeling strategy, which is equipped with the graph attention networks to iteratively refine token representations, and the adaptive multi-label classifier to dynamically predict multiple relations between token pairs. We perform extensive experiments on 5 benchmark datasets in four languages. Experimental results show that our model outperforms previous SOTA models by a large margin.</abstract>
       <url hash="91dae871">2022.acl-long.291</url>
@@ -4066,7 +4066,7 @@ in the Case of Unambiguous Gender</title>
       <author><first>Oliver</first><last>Eberle</last></author>
       <author><first>Stephanie</first><last>Brandl</last></author>
       <author><first>Jonas</first><last>Pilot</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>4295-4309</pages>
       <abstract>Learned self-attention functions in state-of-the-art NLP models often correlate with human attention. We investigate whether self-attention in large-scale pre-trained language models is as predictive of human eye fixation patterns during task-reading as classical cognitive models of human attention. We compare attention functions across two task-specific reading datasets for sentiment analysis and relation extraction. We find the predictiveness of large-scale pre-trained self-attention for human attention depends on ‘what is in the tail’, e.g., the syntactic nature of rare contexts. Further, we observe that task-specific fine-tuning does not increase the correlation with human task-specific reading. Through an input reduction experiment we give complementary insights on the sparsity and fidelity trade-off, showing that lower-entropy attention vectors are more faithful.</abstract>
       <url hash="523c62ff">2022.acl-long.296</url>
@@ -4126,7 +4126,7 @@ in the Case of Unambiguous Gender</title>
       <title>Scheduled Multi-task Learning for Neural Chat Translation</title>
       <author><first>Yunlong</first><last>Liang</last></author>
       <author><first>Fandong</first><last>Meng</last></author>
-      <author><first>Jinan</first><last>Xu</last></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last></author>
       <author><first>Yufeng</first><last>Chen</last></author>
       <author><first>Jie</first><last>Zhou</last></author>
       <pages>4375-4388</pages>
@@ -4142,7 +4142,7 @@ in the Case of Unambiguous Gender</title>
       <author><first>Sheng</first><last>Zhang</last></author>
       <author><first>Letizia</first><last>Tomada</last></author>
       <author><first>Sebastian</first><last>Schwemer</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>4389-4406</pages>
       <abstract>We present a benchmark suite of four datasets for evaluating the fairness of pre-trained language models and the techniques used to fine-tune them for downstream tasks. Our benchmarks cover four jurisdictions (European Council, USA, Switzerland, and China), five languages (English, German, French, Italian and Chinese) and fairness across five attributes (gender, age, region, language, and legal area). In our experiments, we evaluate pre-trained language models using several group-robust fine-tuning techniques and show that performance group disparities are vibrant in many cases, while none of these techniques guarantee fairness, nor consistently mitigate group disparities. Furthermore, we provide a quantitative and qualitative analysis of our results, highlighting open challenges in the development of robustness methods in legal NLP.</abstract>
       <url hash="719f9e5b">2022.acl-long.301</url>
@@ -4262,8 +4262,8 @@ in the Case of Unambiguous Gender</title>
     <paper id="309">
       <title><fixed-case>S</fixed-case>umma<fixed-case>R</fixed-case>eranker: A Multi-Task Mixture-of-Experts Re-ranking Framework for Abstractive Summarization</title>
       <author><first>Mathieu</first><last>Ravaut</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
-      <author><first>Nancy</first><last>Chen</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last></author>
       <pages>4504-4524</pages>
       <abstract>Sequence-to-sequence neural networks have recently achieved great success in abstractive summarization, especially through fine-tuning large pre-trained language models on the downstream dataset. These models are typically decoded with beam search to generate a unique summary. However, the search space is very large, and with the exposure bias, such decoding is not optimal. In this paper, we show that it is possible to directly train a second-stage model performing re-ranking on a set of summary candidates. Our mixture-of-experts SummaReranker learns to select a better candidate and consistently improves the performance of the base model. With a base PEGASUS, we push ROUGE scores by 5.44% on CNN- DailyMail (47.16 ROUGE-1), 1.31% on XSum (48.12 ROUGE-1) and 9.34% on Reddit TIFU (29.83 ROUGE-1), reaching a new state-of-the-art. Our code and checkpoints will be available at <url>https://github.com/ntunlp/SummaReranker</url>.</abstract>
       <url hash="49d3b192">2022.acl-long.309</url>
@@ -4278,7 +4278,7 @@ in the Case of Unambiguous Gender</title>
       <author><first>Alex</first><last>Spangher</last></author>
       <author><first>Pegah</first><last>Alipoormolabashi</last></author>
       <author><first>Marjorie</first><last>Freedman</last></author>
-      <author><first>Ralph</first><last>Weischedel</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
       <author><first>Nanyun</first><last>Peng</last></author>
       <pages>4525-4542</pages>
       <abstract>The ability to sequence unordered events is evidence of comprehension and reasoning about real world tasks/procedures. It is essential for applications such as task planning and multi-source instruction summarization. It often requires thorough understanding of temporal common sense and multimodal information, since these procedures are often conveyed by a combination of texts and images. While humans are capable of reasoning about and sequencing unordered procedural instructions, the extent to which the current machine learning methods possess such capability is still an open question. In this work, we benchmark models’ capability of reasoning over and sequencing unordered multimodal instructions by curating datasets from online instructional manuals and collecting comprehensive human annotations. We find current state-of-the-art models not only perform significantly worse than humans but also seem incapable of efficiently utilizing multimodal information. To improve machines’ performance on multimodal event sequencing, we propose sequence-aware pretraining techniques exploiting the sequential alignment properties of both texts and images, resulting in &gt; 5% improvements on perfect match ratio.</abstract>
@@ -4306,7 +4306,7 @@ in the Case of Unambiguous Gender</title>
       <title>Divide and Rule: Effective Pre-Training for Context-Aware Multi-Encoder Translation Models</title>
       <author><first>Lorenzo</first><last>Lupo</last></author>
       <author><first>Marco</first><last>Dinarelli</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <pages>4557-4572</pages>
       <abstract>Multi-encoder models are a broad family of context-aware neural machine translation systems that aim to improve translation quality by encoding document-level contextual information alongside the current sentence. The context encoding is undertaken by contextual parameters, trained on document-level data. In this work, we discuss the difficulty of training these parameters effectively, due to the sparsity of the words in need of context (i.e., the training signal), and their relevant context. We propose to pre-train the contextual parameters over split sentence pairs, which makes an efficient use of the available data for two reasons. Firstly, it increases the contextual training signal by breaking intra-sentential syntactic relations, and thus pushing the model to search the context for disambiguating clues more frequently. Secondly, it eases the retrieval of relevant context, since context segments become shorter. We propose four different splitting methods, and evaluate our approach with BLEU and contrastive test sets. Results show that it consistently improves learning of contextual parameters, both in low and high resource settings.</abstract>
       <url hash="f35afe16">2022.acl-long.312</url>
@@ -4319,7 +4319,7 @@ in the Case of Unambiguous Gender</title>
       <title>Saliency as Evidence: Event Detection with Trigger Saliency Attribution</title>
       <author><first>Jian</first><last>Liu</last></author>
       <author><first>Yufeng</first><last>Chen</last></author>
-      <author><first>Jinan</first><last>Xu</last></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last></author>
       <pages>4573-4585</pages>
       <abstract>Event detection (ED) is a critical subtask of event extraction that seeks to identify event triggers of certain types in texts. Despite significant advances in ED, existing methods typically follow a “one model fits all types” approach, which sees no differences between event types and often results in a quite skewed performance. Finding the causes of skewed performance is crucial for the robustness of an ED model, but to date there has been little exploration of this problem. This research examines the issue in depth and presents a new concept termed trigger salience attribution, which can explicitly quantify the underlying patterns of events. On this foundation, we develop a new training mechanism for ED, which can distinguish between trigger-dependent and context-dependent types and achieve promising performance on two benchmarks. Finally, by highlighting many distinct characteristics of trigger-dependent and context-dependent types, our work may promote more research into this problem.</abstract>
       <url hash="7b2b3a42">2022.acl-long.313</url>
@@ -4365,7 +4365,7 @@ in the Case of Unambiguous Gender</title>
       <title>Multilingual Generative Language Models for Zero-Shot Cross-Lingual Event Argument Extraction</title>
       <author><first>Kuan-Hao</first><last>Huang</last></author>
       <author><first>I-Hung</first><last>Hsu</last></author>
-      <author><first>Prem</first><last>Natarajan</last></author>
+      <author id="prem-natarajan"><first>Prem</first><last>Natarajan</last></author>
       <author><first>Kai-Wei</first><last>Chang</last></author>
       <author><first>Nanyun</first><last>Peng</last></author>
       <pages>4633-4646</pages>
@@ -4427,7 +4427,7 @@ in the Case of Unambiguous Gender</title>
       <title>Semi-Supervised Formality Style Transfer with Consistency Training</title>
       <author><first>Ao</first><last>Liu</last></author>
       <author><first>An</first><last>Wang</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <pages>4689-4701</pages>
       <abstract>Formality style transfer (FST) is a task that involves paraphrasing an informal sentence into a formal one without altering its meaning. To address the data-scarcity problem of existing parallel datasets, previous studies tend to adopt a cycle-reconstruction scheme to utilize additional unlabeled data, where the FST model mainly benefits from target-side unlabeled sentences. In this work, we propose a simple yet effective semi-supervised framework to better utilize source-side unlabeled sentences based on consistency training. Specifically, our approach augments pseudo-parallel data obtained from a source-side informal sentence by enforcing the model to generate similar outputs for its perturbed version. Moreover, we empirically examined the effects of various data perturbation methods and propose effective data filtering strategies to improve our framework. Experimental results on the GYAFC benchmark demonstrate that our approach can achieve state-of-the-art results, even with less than 40% of the parallel data.</abstract>
       <url hash="ba0e5893">2022.acl-long.321</url>
@@ -4512,7 +4512,7 @@ in the Case of Unambiguous Gender</title>
     <paper id="328">
       <title><fixed-case>W</fixed-case>iki<fixed-case>D</fixed-case>iverse: A Multimodal Entity Linking Dataset with Diversified Contextual Topics and Entity Types</title>
       <author><first>Xuwu</first><last>Wang</last></author>
-      <author><first>Junfeng</first><last>Tian</last></author>
+      <author id="junfeng-tian"><first>Junfeng</first><last>Tian</last></author>
       <author><first>Min</first><last>Gui</last></author>
       <author><first>Zhixu</first><last>Li</last></author>
       <author><first>Rui</first><last>Wang</last></author>
@@ -4730,7 +4730,7 @@ in the Case of Unambiguous Gender</title>
       <title>Learning From Failure: Data Capture in an <fixed-case>A</fixed-case>ustralian Aboriginal Community</title>
       <author><first>Eric</first><last>Le Ferrand</last></author>
       <author><first>Steven</first><last>Bird</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <pages>4988-4998</pages>
       <abstract>Most low resource language technology development is premised on the need to collect data for training statistical models. When we follow the typical process of recording and transcribing text for small Indigenous languages, we hit up against the so-called “transcription bottleneck.” Therefore it is worth exploring new ways of engaging with speakers which generate data while avoiding the transcription bottleneck. We have deployed a prototype app for speakers to use for confirming system guesses in an approach to transcription based on word spotting. However, in the process of testing the app we encountered many new problems for engagement with speakers. This paper presents a close-up study of the process of deploying data capture technology on the ground in an Australian Aboriginal community. We reflect on our interactions with participants and draw lessons that apply to anyone seeking to develop methods for language data collection in an Indigenous community.</abstract>
       <url hash="7dac6807">2022.acl-long.342</url>
@@ -4753,7 +4753,7 @@ in the Case of Unambiguous Gender</title>
       <author><first>Deepanway</first><last>Ghosal</last></author>
       <author><first>Siqi</first><last>Shen</last></author>
       <author><first>Navonil</first><last>Majumder</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <author><first>Soujanya</first><last>Poria</last></author>
       <pages>5010-5028</pages>
       <abstract>This paper addresses the problem of dialogue reasoning with contextualized commonsense inference. We curate CICERO, a dataset of dyadic conversations with five types of utterance-level reasoning-based inferences: cause, subsequent event, prerequisite, motivation, and emotional reaction. The dataset contains 53,105 of such inferences from 5,672 dialogues. We use this dataset to solve relevant generative and discriminative tasks: generation of cause and subsequent event; generation of prerequisite, motivation, and listener’s emotional reaction; and selection of plausible alternatives. Our results ascertain the value of such dialogue-centric commonsense knowledge datasets. It is our hope that CICERO will open new research avenues into commonsense-based dialogue reasoning.</abstract>
@@ -4775,7 +4775,7 @@ in the Case of Unambiguous Gender</title>
     </paper>
     <paper id="346">
       <title><fixed-case>SP</fixed-case>o<fixed-case>T</fixed-case>: Better Frozen Model Adaptation through Soft Prompt Transfer</title>
-      <author><first>Tu</first><last>Vu</last></author>
+      <author id="tu-vu"><first>Tu</first><last>Vu</last></author>
       <author><first>Brian</first><last>Lester</last></author>
       <author><first>Noah</first><last>Constant</last></author>
       <author><first>Rami</first><last>Al-Rfou’</last></author>
@@ -4837,9 +4837,9 @@ in the Case of Unambiguous Gender</title>
     </paper>
     <paper id="350">
       <title>The patient is more dead than alive: exploring the current state of the multi-document summarisation of the biomedical literature</title>
-      <author><first>Yulia</first><last>Otmakhova</last></author>
-      <author><first>Karin</first><last>Verspoor</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="julia-otmakhova"><first>Yulia</first><last>Otmakhova</last></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Jey Han</first><last>Lau</last></author>
       <pages>5098-5111</pages>
       <abstract>Although multi-document summarisation (MDS) of the biomedical literature is a highly valuable task that has recently attracted substantial interest, evaluation of the quality of biomedical summaries lacks consistency and transparency. In this paper, we examine the summaries generated by two current models in order to understand the deficiencies of existing evaluation approaches in the context of the challenges that arise in the MDS task. Based on this analysis, we propose a new approach to human evaluation and identify several challenges that must be overcome to develop effective biomedical MDS systems.</abstract>
@@ -4889,7 +4889,7 @@ in the Case of Unambiguous Gender</title>
       <author><first>Jungsoo</first><last>Park</last></author>
       <author><first>Sewon</first><last>Min</last></author>
       <author><first>Jaewoo</first><last>Kang</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last></author>
       <pages>5154-5166</pages>
       <abstract>Despite significant interest in developing general purpose fact checking models, it is challenging to construct a large-scale fact verification dataset with realistic real-world claims. Existing claims are either authored by crowdworkers, thereby introducing subtle biases thatare difficult to control for, or manually verified by professional fact checkers, causing them to be expensive and limited in scale. In this paper, we construct a large-scale challenging fact verification dataset called FAVIQ, consisting of 188k claims derived from an existing corpus of ambiguous information-seeking questions. The ambiguities in the questions enable automatically constructing true and false claims that reflect user confusions (e.g., the year of the movie being filmed vs. being released). Claims in FAVIQ are verified to be natural, contain little lexical bias, and require a complete understanding of the evidence for verification. Our experiments show that the state-of-the-art models are far from solving our new task. Moreover, training on our data helps in professional fact-checking, outperforming models trained on the widely used dataset FEVER or in-domain data by up to 17% absolute. Altogether, our data will serve as a challenging benchmark for natural language understanding and support future progress in professional fact checking.</abstract>
@@ -4940,7 +4940,7 @@ in the Case of Unambiguous Gender</title>
     <paper id="358">
       <title>Dynamic Prefix-Tuning for Generative Template-based Event Extraction</title>
       <author><first>Xiao</first><last>Liu</last></author>
-      <author><first>Heyan</first><last>Huang</last></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last></author>
       <author><first>Ge</first><last>Shi</last></author>
       <author><first>Bo</first><last>Wang</last></author>
       <pages>5216-5228</pages>
@@ -5006,7 +5006,7 @@ in the Case of Unambiguous Gender</title>
       <author><first>Vishrav</first><last>Chaudhary</last></author>
       <author><first>Chau</first><last>Tran</last></author>
       <author><first>Philipp</first><last>Koehn</last></author>
-      <author><first>Francisco</first><last>Guzmán</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzmán</last></author>
       <pages>5291-5305</pages>
       <abstract>Recent work in multilingual machine translation (MMT) has focused on the potential of positive transfer between languages, particularly cases where higher-resourced languages can benefit lower-resourced ones. While training an MMT model, the supervision signals learned from one language pair can be transferred to the other via the tokens shared by multiple source languages. However, the transfer is inhibited when the token overlap among source languages is small, which manifests naturally when languages use different writing systems. In this paper, we tackle inhibited transfer by augmenting the training data with alternative signals that unify different writing systems, such as phonetic, romanized, and transliterated input. We test these signals on Indic and Turkic languages, two language families where the writing systems differ but languages still share common features. Our results indicate that a straightforward multi-source self-ensemble – training a model on a mixture of various signals and ensembling the outputs of the same model fed with different signals during inference, outperforms strong ensemble baselines by 1.3 BLEU points on both language families. Further, we find that incorporating alternative inputs via self-ensemble can be particularly effective when training set is small, leading to +5 BLEU when only 5% of the total training data is accessible. Finally, our analysis demonstrates that including alternative signals yields more consistency and translates named entities more accurately, which is crucial for increased factuality of automated systems.</abstract>
       <url hash="89d2323b">2022.acl-long.363</url>
@@ -5029,7 +5029,7 @@ in the Case of Unambiguous Gender</title>
       <author><first>Sewon</first><last>Min</last></author>
       <author><first>Mike</first><last>Lewis</last></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>5316-5330</pages>
       <abstract>We introduce a noisy channel approach for language model prompting in few-shot text classification. Instead of computing the likelihood of the label given the input (referred as direct models), channel models compute the conditional probability of the input given the label, and are thereby required to explain every word in the input. We use channel models for recently proposed few-shot learning methods with no or very limited updates to the language model parameters, via either in-context demonstration or prompt tuning. Our experiments show that, for both methods, channel models significantly outperform their direct counterparts, which we attribute to their stability, i.e., lower variance and higher worst-case accuracy. We also present extensive ablations that provide recommendations for when to use channel prompt tuning instead of other competitive models (e.g., direct head tuning): channel prompt tuning is preferred when the number of training examples is small, labels in the training data are imbalanced, or generalization to unseen labels is required.</abstract>
       <url hash="479d1462">2022.acl-long.365</url>
@@ -5138,7 +5138,7 @@ in the Case of Unambiguous Gender</title>
       <title>Multi Task Learning For Zero Shot Performance Prediction of Multilingual Models</title>
       <author><first>Kabir</first><last>Ahuja</last></author>
       <author><first>Shanu</first><last>Kumar</last></author>
-      <author><first>Sandipan</first><last>Dandapat</last></author>
+      <author id="sandipan-dandapat"><first>Sandipan</first><last>Dandapat</last></author>
       <author><first>Monojit</first><last>Choudhury</last></author>
       <pages>5454-5467</pages>
       <abstract>Massively Multilingual Transformer based Language Models have been observed to be surprisingly effective on zero-shot transfer across languages, though the performance varies from language to language depending on the pivot language(s) used for fine-tuning. In this work, we build upon some of the existing techniques for predicting the zero-shot performance on a task, by modeling it as a multi-task learning problem. We jointly train predictive models for different tasks which helps us build more accurate predictors for tasks where we have test data in very few languages to measure the actual performance of the model. Our approach also lends us the ability to perform a much more robust feature selection, and identify a common set of features that influence zero-shot performance across a variety of tasks.</abstract>
@@ -5151,7 +5151,7 @@ in the Case of Unambiguous Gender</title>
       <title><tex-math>\infty</tex-math>-former: Infinite Memory Transformer</title>
       <author><first>Pedro Henrique</first><last>Martins</last></author>
       <author><first>Zita</first><last>Marinho</last></author>
-      <author><first>Andre</first><last>Martins</last></author>
+      <author id="andre-f-t-martins"><first>Andre</first><last>Martins</last></author>
       <pages>5468-5485</pages>
       <abstract>Transformers are unable to model long-term memories effectively, since the amount of computation they need to perform grows with the context length. While variations of efficient transformers have been proposed, they all have a finite memory capacity and are forced to drop old information. In this paper, we propose the <tex-math>\infty</tex-math>-former, which extends the vanilla transformer with an unbounded long-term memory. By making use of a continuous-space attention mechanism to attend over the long-term memory, the <tex-math>\infty</tex-math>-former’s attention complexity becomes independent of the context length, trading off memory length with precision.In order to control where precision is more important, <tex-math>\infty</tex-math>-former maintains “sticky memories,” being able to model arbitrarily long contexts while keeping the computation budget fixed.Experiments on a synthetic sorting task, language modeling, and document grounded dialogue generation demonstrate the <tex-math>\infty</tex-math>-former’s ability to retain information from long sequences.</abstract>
       <url hash="02e8edcb">2022.acl-long.375</url>
@@ -5177,7 +5177,7 @@ in the Case of Unambiguous Gender</title>
       <author><first>Leonie</first><last>Weissweiler</last></author>
       <author><first>Valentin</first><last>Hofmann</last></author>
       <author><first>Masoud</first><last>Jalili Sabet</last></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <pages>5506-5516</pages>
       <abstract>We introduce <b>CaMEL</b> (<b>Ca</b>se <b>M</b>arker <b>E</b>xtraction without <b>L</b>abels), a novel and challenging task in computational morphology that is especially relevant for low-resource languages. We propose a first model for CaMEL that uses a massively multilingual corpus to extract case markers in 83 languages based only on a noun phrase chunker and an alignment system. To evaluate CaMEL, we automatically construct a silver standard from UniMorph. The case markers extracted by our model can be used to detect and visualise similarities and differences between the case systems of different languages as well as to annotate fine-grained deep cases in languages in which they are not overtly marked.</abstract>
       <url hash="39db83df">2022.acl-long.377</url>
@@ -5189,7 +5189,7 @@ in the Case of Unambiguous Gender</title>
     <paper id="378">
       <title>Improving Generalizability in Implicitly Abusive Language Detection with Concept Activation Vectors</title>
       <author><first>Isar</first><last>Nejadgholi</last></author>
-      <author><first>Kathleen</first><last>Fraser</last></author>
+      <author id="kathleen-c-fraser"><first>Kathleen</first><last>Fraser</last></author>
       <author><first>Svetlana</first><last>Kiritchenko</last></author>
       <pages>5517-5529</pages>
       <abstract>Robustness of machine learning models on ever-changing real-world data is critical, especially for applications affecting human well-being such as content moderation. New kinds of abusive language continually emerge in online discussions in response to current events (e.g., COVID-19), and the deployed abuse detection systems should be updated regularly to remain accurate. In this paper, we show that general abusive language classifiers tend to be fairly reliable in detecting out-of-domain explicitly abusive utterances but fail to detect new types of more subtle, implicit abuse. Next, we propose an interpretability technique, based on the Testing Concept Activation Vector (TCAV) method from computer vision, to quantify the sensitivity of a trained model to the human-defined concepts of explicit and implicit abusive language, and use that to explain the generalizability of the model on new data, in this case, COVID-related anti-Asian hate speech. Extending this technique, we introduce a novel metric, Degree of Explicitness, for a single instance and show that the new metric is beneficial in suggesting out-of-domain unlabeled examples to effectively enrich the training data with informative, implicitly abusive texts.</abstract>
@@ -5214,7 +5214,7 @@ in the Case of Unambiguous Gender</title>
       <title>Non-neural Models Matter: a Re-evaluation of Neural Referring Expression Generation Systems</title>
       <author><first>Fahime</first><last>Same</last></author>
       <author><first>Guanyi</first><last>Chen</last></author>
-      <author><first>Kees</first><last>Van Deemter</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>Van Deemter</last></author>
       <pages>5554-5567</pages>
       <abstract>In recent years, neural models have often outperformed rule-based and classic Machine Learning approaches in NLG. These classic approaches are now often disregarded, for example when new neural models are evaluated. We argue that they should not be overlooked, since, for some tasks, well-designed non-neural approaches achieve better performance than neural ones. In this paper, the task of generating referring expressions in linguistic context is used as an example. We examined two very different English datasets (WEBNLG and WSJ), and evaluated each algorithm using both automatic and human evaluations. Overall, the results of these evaluations suggest that rule-based systems with simple rule sets achieve on-par or better performance on both datasets compared to state-of-the-art neural REG systems. In the case of the more realistic dataset, WSJ, a machine learning-based system with well-designed linguistic features performed best. We hope that our work can encourage researchers to consider non-neural models in future.</abstract>
       <url hash="c0fe8895">2022.acl-long.380</url>
@@ -5238,7 +5238,7 @@ in the Case of Unambiguous Gender</title>
     <paper id="382">
       <title>Predicate-Argument Based Bi-Encoder for Paraphrase Identification</title>
       <author><first>Qiwei</first><last>Peng</last></author>
-      <author><first>David</first><last>Weir</last></author>
+      <author id="david-weir"><first>David</first><last>Weir</last></author>
       <author><first>Julie</first><last>Weeds</last></author>
       <author><first>Yekun</first><last>Chai</last></author>
       <pages>5579-5589</pages>
@@ -5258,7 +5258,7 @@ in the Case of Unambiguous Gender</title>
       <author><first>Tao</first><last>Gui</last></author>
       <author><first>Liang</first><last>Qiao</last></author>
       <author><first>Zhanzhan</first><last>Cheng</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>5590-5600</pages>
       <abstract>NER model has achieved promising performance on standard NER benchmarks. However, recent studies show that previous approaches may over-rely on entity mention information, resulting in poor performance on out-of-vocabulary(OOV) entity recognition. In this work, we propose MINER, a novel NER learning framework, to remedy this issue from an information-theoretic perspective. The proposed approach contains two mutual information based training objectives: i) generalizing information maximization, which enhances representation via deep understanding of context and entity surface forms; ii) superfluous information minimization, which discourages representation from rotate memorizing entity names or exploiting biased cues in data. Experiments on various settings and datasets demonstrate that it achieves better performance in predicting OOV entities.</abstract>
       <url hash="c66d0386">2022.acl-long.383</url>
@@ -5298,7 +5298,7 @@ in the Case of Unambiguous Gender</title>
       <author><first>Liang</first><last>Qiao</last></author>
       <author><first>Tao</first><last>Gui</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>5634-5644</pages>
       <abstract>Adversarial robustness has attracted much attention recently, and the mainstream solution is adversarial training. However, the tradition of generating adversarial perturbations for each input embedding (in the settings of NLP) scales up the training computational complexity by the number of gradient steps it takes to obtain the adversarial samples. To address this problem, we leverage Flooding method which primarily aims at better generalization and we find promising in defending adversarial attacks. We further propose an effective criterion to bring hyper-parameter-dependent flooding into effect with a narrowed-down search space by measuring how the gradient steps taken within one epoch affect the loss of each batch. Our approach requires zero adversarial sample for training, and its time consumption is equivalent to fine-tuning, which can be 2-15 times faster than standard adversarial training. We experimentally show that our method improves BERT’s resistance to textual adversarial attacks by a large margin, and achieves state-of-the-art robust accuracy on various text classification and GLUE tasks.</abstract>
       <url hash="5f74cba4">2022.acl-long.386</url>
@@ -5323,7 +5323,7 @@ in the Case of Unambiguous Gender</title>
       <title>Finding Structural Knowledge in Multimodal-<fixed-case>BERT</fixed-case></title>
       <author><first>Victor</first><last>Milewski</last></author>
       <author><first>Miryam</first><last>de Lhoneux</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>5658-5671</pages>
       <abstract>In this work, we investigate the knowledge learned in the embeddings of multimodal-BERT models. More specifically, we probe their capabilities of storing the grammatical structure of linguistic data and the structure learned over objects in visual data. To reach that goal, we first make the inherent structure of language and visuals explicit by a dependency parse of the sentences that describe the image and by the dependencies between the object regions in the image, respectively. We call this explicit visual structure the scene tree, that is based on the dependency tree of the language description. Extensive probing experiments show that the multimodal-BERT models do not encode these scene trees.</abstract>
       <url hash="7559709e">2022.acl-long.388</url>
@@ -5416,7 +5416,7 @@ in the Case of Unambiguous Gender</title>
       <author><first>Damir</first><last>Juric</last></author>
       <author><first>Jack</first><last>Flann</last></author>
       <author><first>Ehud</first><last>Reiter</last></author>
-      <author><first>Anya</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anya</first><last>Belz</last></author>
       <author><first>Aleksandar</first><last>Savkov</last></author>
       <pages>5739-5754</pages>
       <abstract>In recent years, machine learning models have rapidly become better at generating clinical consultation notes; yet, there is little work on how to properly evaluate the generated consultation notes to understand the impact they may have on both the clinician using them and the patient’s clinical safety. To address this we present an extensive human evaluation study of consultation notes where 5 clinicians (i) listen to 57 mock consultations, (ii) write their own notes, (iii) post-edit a number of automatically generated notes, and (iv) extract all the errors, both quantitative and qualitative. We then carry out a correlation study with 18 automatic quality metrics and the human judgements. We find that a simple, character-based Levenshtein distance metric performs on par if not better than common model-based metrics like BertScore. All our findings and annotations are open-sourced.</abstract>
@@ -5487,7 +5487,7 @@ in the Case of Unambiguous Gender</title>
     </paper>
     <paper id="399">
       <title>Evaluating Extreme Hierarchical Multi-label Classification</title>
-      <author><first>Enrique</first><last>Amigo</last></author>
+      <author id="enrique-amigo"><first>Enrique</first><last>Amigo</last></author>
       <author><first>Agustín</first><last>Delgado</last></author>
       <pages>5809-5819</pages>
       <abstract>Several natural language processing (NLP) tasks are defined as a classification problem in its most complex form: Multi-label Hierarchical Extreme classification, in which items may be associated with multiple classes from a set of thousands of possible classes organized in a hierarchy and with a highly unbalanced distribution both in terms of class frequency and the number of labels per item. We analyze the state of the art of evaluation metrics based on a set of formal properties and we define an information theoretic based metric inspired by the Information Contrast Model (ICM). Experiments on synthetic data and a case study on real data show the suitability of the ICM for such scenarios.</abstract>
@@ -5570,14 +5570,14 @@ in the Case of Unambiguous Gender</title>
     </paper>
     <paper id="405">
       <title>An Effective and Efficient Entity Alignment Decoding Algorithm via Third-Order Tensor Isomorphism</title>
-      <author><first>Xin</first><last>Mao</last></author>
+      <author id="xinnian-mao"><first>Xin</first><last>Mao</last></author>
       <author><first>Meirong</first><last>Ma</last></author>
       <author><first>Hao</first><last>Yuan</last></author>
       <author><first>Jianchao</first><last>Zhu</last></author>
       <author><first>ZongYu</first><last>Wang</last></author>
       <author><first>Rui</first><last>Xie</last></author>
       <author><first>Wei</first><last>Wu</last></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <pages>5888-5898</pages>
       <abstract>Entity alignment (EA) aims to discover the equivalent entity pairs between KGs, which is a crucial step for integrating multi-source KGs.For a long time, most researchers have regarded EA as a pure graph representation learning task and focused on improving graph encoders while paying little attention to the decoding process. In this paper, we propose an effective and efficient EA Decoding Algorithm via Third-order Tensor Isomorphism (DATTI).Specifically, we derive two sets of isomorphism equations: (1) Adjacency tensor isomorphism equations and (2) Gramian tensor isomorphism equations. By combining these equations, DATTI could effectively utilize the adjacency and inner correlation isomorphisms of KGs to enhance the decoding process of EA.Extensive experiments on public datasets indicate that our decoding algorithm can deliver significant performance improvements even on the most advanced EA methods, while the extra required time is less than 3 seconds.</abstract>
       <url hash="2acedaea">2022.acl-long.405</url>
@@ -5614,7 +5614,7 @@ in the Case of Unambiguous Gender</title>
       <title>Continual Pre-training of Language Models for Math Problem Understanding with Syntax-Aware Memory Network</title>
       <author><first>Zheng</first><last>Gong</last></author>
       <author><first>Kun</first><last>Zhou</last></author>
-      <author><first>Xin</first><last>Zhao</last></author>
+      <author id="wayne-xin-zhao"><first>Xin</first><last>Zhao</last></author>
       <author><first>Jing</first><last>Sha</last></author>
       <author><first>Shijin</first><last>Wang</last></author>
       <author><first>Ji-Rong</first><last>Wen</last></author>
@@ -5749,7 +5749,7 @@ in the Case of Unambiguous Gender</title>
     <paper id="418">
       <title>Rethinking Self-Supervision Objectives for Generalizable Coherence Modeling</title>
       <author><first>Prathyusha</first><last>Jwalapuram</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <author><first>Xiang</first><last>Lin</last></author>
       <pages>6044-6059</pages>
       <abstract>Given the claims of improved text generation quality across various pre-trained neural models, we consider the coherence evaluation of machine generated text to be one of the principal applications of coherence models that needs to be investigated. Prior work in neural coherence modeling has primarily focused on devising new architectures for solving the permuted document task. We instead use a basic model architecture and show significant improvements over state of the art within the same training regime. We then design a harder self-supervision objective by increasing the ratio of negative samples within a contrastive learning setup, and enhance the model further through automatic hard negative mining coupled with a large global negative queue encoded by a momentum encoder. We show empirically that increasing the density of negative samples improves the basic model, and using a global negative queue further improves and stabilizes the model while training with hard negative samples. We evaluate the coherence model on task-independent test sets that resemble real-world applications and show significant improvements in coherence evaluations of downstream tasks.</abstract>
@@ -5787,7 +5787,7 @@ in the Case of Unambiguous Gender</title>
       <title><fixed-case>CLIP</fixed-case> Models are Few-Shot Learners: Empirical Studies on <fixed-case>VQA</fixed-case> and Visual Entailment</title>
       <author><first>Haoyu</first><last>Song</last></author>
       <author><first>Li</first><last>Dong</last></author>
-      <author><first>Weinan</first><last>Zhang</last></author>
+      <author id="weinan-zhang"><first>Weinan</first><last>Zhang</last></author>
       <author><first>Ting</first><last>Liu</last></author>
       <author><first>Furu</first><last>Wei</last></author>
       <pages>6088-6100</pages>
@@ -5819,7 +5819,7 @@ in the Case of Unambiguous Gender</title>
       <title>Debiased Contrastive Learning of Unsupervised Sentence Representations</title>
       <author><first>Kun</first><last>Zhou</last></author>
       <author><first>Beichen</first><last>Zhang</last></author>
-      <author><first>Xin</first><last>Zhao</last></author>
+      <author id="wayne-xin-zhao"><first>Xin</first><last>Zhao</last></author>
       <author><first>Ji-Rong</first><last>Wen</last></author>
       <pages>6120-6130</pages>
       <abstract>Recently, contrastive learning has been shown to be effective in improving pre-trained language models (PLM) to derive high-quality sentence representations. It aims to pull close positive examples to enhance the alignment while push apart irrelevant negatives for the uniformity of the whole representation space. However, previous works mostly adopt in-batch negatives or sample from training data at random. Such a way may cause the sampling bias that improper negatives (false negatives and anisotropy representations) are used to learn sentence representations, which will hurt the uniformity of the representation space. To address it, we present a new framework <b>DCLR</b> (Debiased Contrastive Learning of unsupervised sentence Representations) to alleviate the influence of these improper negatives.In DCLR, we design an instance weighting method to punish false negatives and generate noise-based negatives to guarantee the uniformity of the representation space.Experiments on seven semantic textual similarity tasks show that our approach is more effective than competitive baselines. Our code and data are publicly available at the link: blue<url>https://github.com/RUCAIBox/DCLR</url>.</abstract>
@@ -5874,7 +5874,7 @@ in the Case of Unambiguous Gender</title>
       <author><first>Payal</first><last>Bajaj</last></author>
       <author><first>Xia</first><last>Song</last></author>
       <author><first>Xian-Ling</first><last>Mao</last></author>
-      <author><first>Heyan</first><last>Huang</last></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last></author>
       <author><first>Furu</first><last>Wei</last></author>
       <pages>6170-6182</pages>
       <abstract>In this paper, we introduce ELECTRA-style tasks to cross-lingual language model pre-training. Specifically, we present two pre-training tasks, namely multilingual replaced token detection, and translation replaced token detection. Besides, we pretrain the model, named as XLM-E, on both multilingual and parallel corpora. Our model outperforms the baseline models on various cross-lingual understanding tasks with much less computation cost. Moreover, analysis shows that XLM-E tends to obtain better cross-lingual transferability.</abstract>
@@ -5975,21 +5975,21 @@ in the Case of Unambiguous Gender</title>
       <title><fixed-case>A</fixed-case>mericas<fixed-case>NLI</fixed-case>: Evaluating Zero-shot Natural Language Understanding of Pretrained Multilingual Models in Truly Low-resource Languages</title>
       <author><first>Abteen</first><last>Ebrahimi</last></author>
       <author><first>Manuel</first><last>Mager</last></author>
-      <author><first>Arturo</first><last>Oncevay</last></author>
+      <author id="arturo-oncevay"><first>Arturo</first><last>Oncevay</last></author>
       <author><first>Vishrav</first><last>Chaudhary</last></author>
       <author><first>Luis</first><last>Chiruzzo</last></author>
       <author><first>Angela</first><last>Fan</last></author>
       <author><first>John</first><last>Ortega</last></author>
       <author><first>Ricardo</first><last>Ramos</last></author>
-      <author><first>Annette</first><last>Rios</last></author>
-      <author><first>Ivan Vladimir</first><last>Meza Ruiz</last></author>
+      <author id="annette-rios-gonzales"><first>Annette</first><last>Rios</last></author>
+      <author id="ivan-meza-ruiz"><first>Ivan Vladimir</first><last>Meza Ruiz</last></author>
       <author><first>Gustavo</first><last>Giménez-Lugo</last></author>
-      <author><first>Elisabeth</first><last>Mager</last></author>
+      <author id="elisabeth-maier"><first>Elisabeth</first><last>Mager</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
       <author><first>Alexis</first><last>Palmer</last></author>
       <author><first>Rolando</first><last>Coto-Solano</last></author>
       <author><first>Thang</first><last>Vu</last></author>
-      <author><first>Katharina</first><last>Kann</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
       <pages>6279-6299</pages>
       <abstract>Pretrained multilingual models are able to perform cross-lingual transfer in a zero-shot setting, even for languages unseen during pretraining. However, prior work evaluating performance on unseen languages has largely been limited to low-level, syntactic tasks, and it remains unclear if zero-shot learning of high-level, semantic tasks is possible for unseen languages. To explore this question, we present AmericasNLI, an extension of XNLI (Conneau et al., 2018) to 10 Indigenous languages of the Americas. We conduct experiments with XLM-R, testing multiple zero-shot and translation-based approaches. Additionally, we explore model adaptation via continued pretraining and provide an analysis of the dataset by considering hypothesis-only models. We find that XLM-R’s zero-shot performance is poor for all 10 languages, with an average performance of 38.48%. Continued pretraining offers improvements, with an average accuracy of 43.85%. Surprisingly, training on poorly translated data by far outperforms all other methods with an accuracy of 49.12%.</abstract>
       <url hash="93d64425">2022.acl-long.435</url>
@@ -6041,7 +6041,7 @@ in the Case of Unambiguous Gender</title>
       <title><fixed-case>CONT</fixed-case>ai<fixed-case>NER</fixed-case>: Few-Shot Named Entity Recognition via Contrastive Learning</title>
       <author><first>Sarkar Snigdha Sarathi</first><last>Das</last></author>
       <author><first>Arzoo</first><last>Katiyar</last></author>
-      <author><first>Rebecca</first><last>Passonneau</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca</first><last>Passonneau</last></author>
       <author><first>Rui</first><last>Zhang</last></author>
       <pages>6338-6353</pages>
       <abstract>Named Entity Recognition (NER) in Few-Shot setting is imperative for entity tagging in low resource domains. Existing approaches only learn class-specific semantic features and intermediate representations from source domains. This affects generalizability to unseen target domains, resulting in suboptimal performances. To this end, we present CONTaiNER, a novel contrastive learning technique that optimizes the inter-token distribution distance for Few-Shot NER. Instead of optimizing class-specific attributes, CONTaiNER optimizes a generalized objective of differentiating between token categories based on their Gaussian-distributed embeddings. This effectively alleviates overfitting issues originating from training domains. Our experiments in several traditional test domains (OntoNotes, CoNLL’03, WNUT ‘17, GUM) and a new large scale Few-Shot NER dataset (Few-NERD) demonstrate that on average, CONTaiNER outperforms previous methods by 3%-13% absolute F1 points while showing consistent performance trends, even in challenging scenarios where previous approaches could not achieve appreciable performance.</abstract>
@@ -6071,7 +6071,7 @@ in the Case of Unambiguous Gender</title>
       <author><first>Vincent</first><last>Chen</last></author>
       <author><first>Kuan-Chieh</first><last>Lo</last></author>
       <author><first>Chacha</first><last>Chen</last></author>
-      <author><first>Ting-Hao</first><last>Huang</last></author>
+      <author id="ting-hao-huang"><first>Ting-Hao</first><last>Huang</last></author>
       <author><first>Lun-Wei</first><last>Ku</last></author>
       <pages>6365-6378</pages>
       <abstract>Visual storytelling (VIST) is a typical vision and language task that has seen extensive development in the natural language generation research domain. However, it remains unclear whether conventional automatic evaluation metrics for text generation are applicable on VIST. In this paper, we present the VHED (VIST Human Evaluation Data) dataset, which first re-purposes human evaluation results for automatic evaluation; hence we develop Vrank (VIST Ranker), a novel reference-free VIST metric for story evaluation. We first show that the results from commonly adopted automatic metrics for text generation have little correlation with those obtained from human evaluation, which motivates us to directly utilize human evaluation results to learn the automatic evaluation model. In the experiments, we evaluate the generated texts to predict story ranks using our model as well as other reference-based and reference-free metrics. Results show that Vrank prediction is significantly more aligned to human evaluation than other metrics with almost 30% higher accuracy when ranking story pairs. Moreover, we demonstrate that only Vrank shows human-like behavior in its strong ability to find better stories when the quality gap between two stories is high. Finally, we show the superiority of Vrank by its generalizability to pure textual stories, and conclude that this reuse of human evaluation results puts Vrank in a strong position for continued future advances.</abstract>
@@ -6160,7 +6160,7 @@ in the Case of Unambiguous Gender</title>
       <title>Compositional Generalization in Dependency Parsing</title>
       <author><first>Emily</first><last>Goodwin</last></author>
       <author><first>Siva</first><last>Reddy</last></author>
-      <author><first>Timothy</first><last>O’Donnell</last></author>
+      <author id="timothy-odonnell"><first>Timothy</first><last>O’Donnell</last></author>
       <author><first>Dzmitry</first><last>Bahdanau</last></author>
       <pages>6482-6493</pages>
       <abstract>Compositionality— the ability to combine familiar units like words into novel phrases and sentences— has been the focus of intense interest in artificial intelligence in recent years. To test compositional generalization in semantic parsing, Keysers et al. (2020) introduced Compositional Freebase Queries (CFQ). This dataset maximizes the similarity between the test and train distributions over primitive units, like words, while maximizing the compound divergence: the dissimilarity between test and train distributions over larger structures, like phrases. Dependency parsing, however, lacks a compositional generalization benchmark. In this work, we introduce a gold-standard set of dependency parses for CFQ, and use this to analyze the behaviour of a state-of-the art dependency parser (Qi et al., 2020) on the CFQ dataset. We find that increasing compound divergence degrades dependency parsing performance, although not as dramatically as semantic parsing performance. Additionally, we find the performance of the dependency parser does not uniformly degrade relative to compound divergence, and the parser performs differently on different splits with the same compound divergence. We explore a number of hypotheses for what causes the non-uniform degradation in dependency parsing performance, and identify a number of syntactic structures that drive the dependency parser’s lower performance on the most challenging splits.</abstract>
@@ -6210,7 +6210,7 @@ in the Case of Unambiguous Gender</title>
     </paper>
     <paper id="452">
       <title>Substructure Distribution Projection for Zero-Shot Cross-Lingual Dependency Parsing</title>
-      <author><first>Freda</first><last>Shi</last></author>
+      <author id="freda-shi"><first>Freda</first><last>Shi</last></author>
       <author><first>Kevin</first><last>Gimpel</last></author>
       <author><first>Karen</first><last>Livescu</last></author>
       <pages>6547-6563</pages>
@@ -6335,7 +6335,7 @@ in the Case of Unambiguous Gender</title>
     </paper>
     <paper id="462">
       <title>Reinforcement Guided Multi-Task Learning Framework for Low-Resource Stereotype Detection</title>
-      <author><first>Rajkumar</first><last>Pujari</last></author>
+      <author id="rajkumar-pujari"><first>Rajkumar</first><last>Pujari</last></author>
       <author><first>Erik</first><last>Oveson</last></author>
       <author><first>Priyanka</first><last>Kulkarni</last></author>
       <author><first>Elnaz</first><last>Nouri</last></author>
@@ -6365,7 +6365,7 @@ in the Case of Unambiguous Gender</title>
       <author><first>Yi</first><last>Mao</last></author>
       <author><first>Zhifang</first><last>Sui</last></author>
       <author><first>Weizhu</first><last>Chen</last></author>
-      <author><first>Bill</first><last>Dolan</last></author>
+      <author id="william-b-dolan"><first>Bill</first><last>Dolan</last></author>
       <pages>6723-6737</pages>
       <abstract>Large pretrained generative models like GPT-3 often suffer from hallucinating non-existent or incorrect content, which undermines their potential merits in real applications. Existing work usually attempts to detect these hallucinations based on a corresponding oracle reference at a sentence or document level. However ground-truth references may not be readily available for many free-form text generation applications, and sentence- or document-level detection may fail to provide the fine-grained signals that would prevent fallacious content in real time. As a first step to addressing these issues, we propose a novel token-level, reference-free hallucination detection task and an associated annotated dataset named HaDeS (HAllucination DEtection dataSet). To create this dataset, we first perturb a large number of text segments extracted from English language Wikipedia, and then verify these with crowd-sourced annotations. To mitigate label imbalance during annotation, we utilize an iterative model-in-loop strategy. We conduct comprehensive data analyses and create multiple baseline models.</abstract>
       <url hash="db49c565">2022.acl-long.464</url>
@@ -6498,7 +6498,7 @@ in the Case of Unambiguous Gender</title>
       <title>Length Control in Abstractive Summarization by Pretraining Information Selection</title>
       <author><first>Yizhu</first><last>Liu</last></author>
       <author><first>Qi</first><last>Jia</last></author>
-      <author><first>Kenny</first><last>Zhu</last></author>
+      <author id="kenny-zhu"><first>Kenny</first><last>Zhu</last></author>
       <pages>6885-6895</pages>
       <abstract>Previous length-controllable summarization models mostly control lengths at the decoding stage, whereas the encoding or the selection of information from the source document is not sensitive to the designed length. They also tend to generate summaries as long as those in the training data. In this paper, we propose a length-aware attention mechanism (LAAM) to adapt the encoding of the source based on the desired length. Our approach works by training LAAM on a summary length balanced dataset built from the original training data, and then fine-tuning as usual. Results show that this approach is effective in generating high-quality summaries with desired lengths and even those short lengths never seen in the original training set.</abstract>
       <url hash="b1d84990">2022.acl-long.474</url>
@@ -6514,7 +6514,7 @@ in the Case of Unambiguous Gender</title>
       <author><first>Di</first><last>Liang</last></author>
       <author><first>Sirui</first><last>Wang</last></author>
       <author><first>Wei</first><last>Wu</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>6896-6906</pages>
       <abstract>Multi-hop question generation focuses on generating complex questions that require reasoning over multiple pieces of information of the input passage. Current models with state-of-the-art performance have been able to generate the correct questions corresponding to the answers. However, most models can not ensure the complexity of generated questions, so they may generate shallow questions that can be answered without multi-hop reasoning. To address this challenge, we propose the CQG, which is a simple and effective controlled framework. CQG employs a simple method to generate the multi-hop questions that contain key entities in multi-hop reasoning chains, which ensure the complexity and quality of the questions. In addition, we introduce a novel controlled Transformer-based decoder to guarantee that key entities appear in the questions. Experiment results show that our model greatly improves performance, which also outperforms the state-of-the-art model about 25% by 5 BLEU points on HotpotQA.</abstract>
       <url hash="c508b5ac">2022.acl-long.475</url>
@@ -6526,7 +6526,7 @@ in the Case of Unambiguous Gender</title>
       <author><first>Mostafa</first><last>Abdou</last></author>
       <author><first>Vinit</first><last>Ravishankar</last></author>
       <author><first>Artur</first><last>Kulmizev</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>6907-6919</pages>
       <abstract>Recent studies have shown that language models pretrained and/or fine-tuned on randomly permuted sentences exhibit competitive performance on GLUE, putting into question the importance of word order information. Somewhat counter-intuitively, some of these studies also report that position embeddings appear to be crucial for models’ good performance with shuffled text. We probe these language models for word order information and investigate what position embeddings learned from shuffled text encode, showing that these models retain a notion of word order information. We show this is in part due to a subtlety in how shuffling is implemented in previous work – before rather than after subword segmentation. Surprisingly, we find even Language models trained on text shuffled after subword segmentation retain some semblance of information about word order because of the statistical dependencies between sentence length and unigram probabilities. Finally, we show that beyond GLUE, a variety of language understanding tasks do require word order information, often to an extent that cannot be learned through fine-tuning.</abstract>
       <url hash="56041ca8">2022.acl-long.476</url>
@@ -6569,7 +6569,7 @@ in the Case of Unambiguous Gender</title>
       <author><first>Saku</first><last>Sugawara</last></author>
       <author><first>Nikita</first><last>Nangia</last></author>
       <author><first>Alex</first><last>Warstadt</last></author>
-      <author><first>Samuel</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel</first><last>Bowman</last></author>
       <pages>6951-6971</pages>
       <abstract>For a natural language understanding benchmark to be useful in research, it has to consist of examples that are diverse and difficult enough to discriminate among current and near-future state-of-the-art systems. However, we do not yet know how best to select text sources to collect a variety of challenging examples. In this study, we crowdsource multiple-choice reading comprehension questions for passages taken from seven qualitatively distinct sources, analyzing what attributes of passages contribute to the difficulty and question types of the collected examples. To our surprise, we find that passage source, length, and readability measures do not significantly affect question difficulty. Through our manual annotation of seven reasoning types, we observe several trends between passage sources and reasoning types, e.g., logical reasoning is more often required in questions written for technical passages. These results suggest that when creating a new benchmark dataset, selecting a diverse set of passages can help ensure a diverse range of question types, but that passage difficulty need not be a priority.</abstract>
       <url hash="d4fb2f1d">2022.acl-long.479</url>
@@ -6580,7 +6580,7 @@ in the Case of Unambiguous Gender</title>
       <title>From Simultaneous to Streaming Machine Translation by Leveraging Streaming History</title>
       <author><first>Javier</first><last>Iranzo-Sánchez</last></author>
       <author><first>Jorge</first><last>Civera</last></author>
-      <author><first>Alfons</first><last>Juan</last></author>
+      <author id="alfons-juan"><first>Alfons</first><last>Juan</last></author>
       <pages>6972-6985</pages>
       <abstract>Simultaneous Machine Translation is the task of incrementally translating an input sentence before it is fully available. Currently, simultaneous translation is carried out by translating each sentence independently of the previously translated text. More generally, Streaming MT can be understood as an extension of Simultaneous MT to the incremental translation of a continuous input text stream. In this work, a state-of-the-art simultaneous sentence-level MT system is extended to the streaming setup by leveraging the streaming history. Extensive empirical results are reported on IWSLT Translation Tasks, showing that leveraging the streaming history leads to significant quality gains. In particular, the proposed system proves to compare favorably to the best performing systems.</abstract>
       <url hash="3089f909">2022.acl-long.480</url>
@@ -6617,7 +6617,7 @@ in the Case of Unambiguous Gender</title>
       <author><first>Constanza</first><last>Fierro</last></author>
       <author><first>Katerina</first><last>Margatina</last></author>
       <author><first>Phillip</first><last>Rust</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>6997-7013</pages>
       <abstract>Various efforts in the Natural Language Processing (NLP) community have been made to accommodate linguistic diversity and serve speakers of many different languages. However, it is important to acknowledge that speakers and the content they produce and require, vary not just by language, but also by culture. Although language and culture are tightly linked, there are important differences. Analogous to cross-lingual and multilingual NLP, cross-cultural and multicultural NLP considers these differences in order to better serve users of NLP systems. We propose a principled framework to frame these efforts, and survey existing and potential strategies.</abstract>
       <url hash="90c53c09">2022.acl-long.482</url>
@@ -6707,7 +6707,7 @@ in the Case of Unambiguous Gender</title>
       <author><first>Shuming</first><last>Ma</last></author>
       <author><first>Bo</first><last>Zheng</last></author>
       <author><first>Zhifang</first><last>Sui</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <author><first>Furu</first><last>Wei</last></author>
       <pages>7085-7095</pages>
       <abstract>The Mixture-of-Experts (MoE) technique can scale up the model size of Transformers with an affordable computational overhead. We point out that existing learning-to-route MoE methods suffer from the routing fluctuation issue, i.e., the target expert of the same input may change along with training, but only one expert will be activated for the input during inference. The routing fluctuation tends to harm sample efficiency because the same input updates different experts but only one is finally used. In this paper, we propose StableMoE with two training stages to address the routing fluctuation problem. In the first training stage, we learn a balanced and cohesive routing strategy and distill it into a lightweight router decoupled from the backbone model. In the second training stage, we utilize the distilled router to determine the token-to-expert assignment and freeze it for a stable routing strategy. We validate our method on language modeling and multilingual machine translation. The results show that StableMoE outperforms existing MoE methods in terms of both convergence speed and performance.</abstract>
@@ -6810,7 +6810,7 @@ in the Case of Unambiguous Gender</title>
       <author><first>Masahiro</first><last>Kaneko</last></author>
       <author><first>Sho</first><last>Takase</last></author>
       <author><first>Ayana</first><last>Niwa</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <pages>7176-7187</pages>
       <abstract>Grammatical Error Correction (GEC) should not focus only on high accuracy of corrections but also on interpretability for language learning. However, existing neural-based GEC models mainly aim at improving accuracy, and their interpretability has not been explored.A promising approach for improving interpretability is an example-based method, which uses similar retrieved examples to generate corrections. In addition, examples are beneficial in language learning, helping learners understand the basis of grammatically incorrect/correct texts and improve their confidence in writing. Therefore, we hypothesize that incorporating an example-based method into GEC can improve interpretability as well as support language learners. In this study, we introduce an Example-Based GEC (EB-GEC) that presents examples to language learners as a basis for a correction result. The examples consist of pairs of correct and incorrect sentences similar to a given input and its predicted correction. Experiments demonstrate that the examples presented by EB-GEC help language learners decide to accept or refuse suggestions from the GEC output. Furthermore, the experiments also show that retrieved examples improve the accuracy of corrections.</abstract>
       <url hash="9f235faf">2022.acl-long.496</url>
@@ -6860,7 +6860,7 @@ in the Case of Unambiguous Gender</title>
     <paper id="500">
       <title>One Country, 700+ Languages: <fixed-case>NLP</fixed-case> Challenges for Underrepresented Languages and Dialects in <fixed-case>I</fixed-case>ndonesia</title>
       <author><first>Alham Fikri</first><last>Aji</last></author>
-      <author><first>Genta Indra</first><last>Winata</last></author>
+      <author id="genta-indra-winata"><first>Genta Indra</first><last>Winata</last></author>
       <author><first>Fajri</first><last>Koto</last></author>
       <author><first>Samuel</first><last>Cahyawijaya</last></author>
       <author><first>Ade</first><last>Romadhony</last></author>
@@ -6868,7 +6868,7 @@ in the Case of Unambiguous Gender</title>
       <author><first>Kemal</first><last>Kurniawan</last></author>
       <author><first>David</first><last>Moeljadi</last></author>
       <author><first>Radityo Eko</first><last>Prasojo</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Jey Han</first><last>Lau</last></author>
       <author><first>Sebastian</first><last>Ruder</last></author>
       <pages>7226-7249</pages>
@@ -6882,8 +6882,8 @@ in the Case of Unambiguous Gender</title>
       <title>Is <fixed-case>GPT</fixed-case>-3 Text Indistinguishable from Human Text? Scarecrow: A Framework for Scrutinizing Machine Text</title>
       <author><first>Yao</first><last>Dou</last></author>
       <author><first>Maxwell</first><last>Forbes</last></author>
-      <author><first>Rik</first><last>Koncel-Kedziorski</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="rik-koncel-kedziorski"><first>Rik</first><last>Koncel-Kedziorski</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <author><first>Yejin</first><last>Choi</last></author>
       <pages>7250-7274</pages>
       <abstract>Modern neural language models can produce remarkably fluent and grammatical text. So much, in fact, that recent work by Clark et al. (2021) has reported that conventional crowdsourcing can no longer reliably distinguish between machine-authored (GPT-3) and human-authored writing. As errors in machine generations become ever subtler and harder to spot, it poses a new challenge to the research community for robust machine text evaluation. We propose a new framework called Scarecrow for scrutinizing machine text via crowd annotation. To support the broad range of real machine errors that can be identified by laypeople, the ten error categories of Scarecrow—such as redundancy, commonsense errors, and incoherence—are identified through several rounds of crowd annotation experiments without a predefined ontology. We then use Scarecrow to collect over 41k error spans in human-written and machine-generated paragraphs of English language news text. We isolate factors for detailed analysis, including parameter count, training data, and various decoding-time configurations. Our approach successfully quantifies measurable gaps between human authored text and generations from models of several sizes, including fourteen configurations of GPT-3. In addition, our analysis unveils new insights, with detailed rationales provided by laypeople, e.g., that the commonsense capabilities have been improving with larger models while math capabilities have not, and that the choices of simple decoding hyperparameters can make remarkable differences on the perceived quality of machine text. We release our training material, annotation toolkit and dataset at <url>https://yao-dou.github.io/scarecrow/</url>.</abstract>
@@ -6950,7 +6950,7 @@ in the Case of Unambiguous Gender</title>
       <award>Outstanding Paper</award>
       <author><first>Ashwin</first><last>Devaraj</last></author>
       <author><first>William</first><last>Sheffield</last></author>
-      <author><first>Byron</first><last>Wallace</last></author>
+      <author id="byron-c-wallace"><first>Byron</first><last>Wallace</last></author>
       <author><first>Junyi Jessy</first><last>Li</last></author>
       <pages>7331-7345</pages>
       <abstract>Automated simplification models aim to make input texts more readable. Such methods have the potential to make complex information accessible to a wider audience, e.g., providing access to recent medical literature which might otherwise be impenetrable for a lay reader. However, such models risk introducing errors into automatically simplified texts, for instance by inserting statements unsupported by the corresponding original text, or by omitting key information. Providing more readable but inaccurate versions of texts may in many cases be worse than providing no such access at all. The problem of factual accuracy (and the lack thereof) has received heightened attention in the context of summarization models, but the factuality of automatically simplified texts has not been investigated. We introduce a taxonomy of errors that we use to analyze both references drawn from standard simplification datasets and state-of-the-art model outputs. We find that errors often appear in both that are not captured by existing evaluation metrics, motivating a need for research into ensuring the factual accuracy of automated simplification models.</abstract>
@@ -7002,7 +7002,7 @@ in the Case of Unambiguous Gender</title>
     <paper id="510">
       <title>Weakly Supervised Word Segmentation for Computational Language Documentation</title>
       <author><first>Shu</first><last>Okabe</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <author><first>François</first><last>Yvon</last></author>
       <pages>7385-7398</pages>
       <abstract>Word and morpheme segmentation are fundamental steps of language documentation as they allow to discover lexical units in a language for which the lexicon is unknown. However, in most language documentation scenarios, linguists do not start from a blank page: they may already have a pre-existing dictionary or have initiated manual segmentation of a small part of their data. This paper studies how such a weak supervision can be taken advantage of in Bayesian non-parametric models of segmentation. Our experiments on two very low resource languages (Mboshi and Japhug), whose documentation is still in progress, show that weak supervision can be beneficial to the segmentation quality. In addition, we investigate an incremental learning scenario where manual segmentations are provided in a sequential manner. This work opens the way for interactive annotation tools for documentary linguists.</abstract>
@@ -7054,7 +7054,7 @@ in the Case of Unambiguous Gender</title>
       <author><first>Deepak</first><last>Nathani</last></author>
       <author><first>Xavier</first><last>Garcia</last></author>
       <author><first>Bidisha</first><last>Samanta</last></author>
-      <author><first>Partha</first><last>Talukdar</last></author>
+      <author id="partha-talukdar"><first>Partha</first><last>Talukdar</last></author>
       <pages>7439-7468</pages>
       <abstract>Style transfer is the task of rewriting a sentence into a target style while approximately preserving content. While most prior literature assumes access to a large style-labelled corpus, recent work (Riley et al. 2021) has attempted “few-shot” style transfer using only 3-10 sentences at inference for style extraction. In this work we study a relevant low-resource setting: style transfer for languages where no style-labelled corpora are available. We notice that existing few-shot methods perform this task poorly, often copying inputs verbatim. We push the state-of-the-art for few-shot style transfer with a new method modeling the stylistic difference between paraphrases. When compared to prior work, our model achieves 2-3x better performance in formality transfer and code-mixing addition across seven languages. Moreover, our method is better at controlling the style transfer magnitude using an input scalar knob. We report promising qualitative results for several attribute transfer tasks (sentiment transfer, simplification, gender neutralization, text anonymization) all without retraining the model. Finally, we find model evaluation to be difficult due to the lack of datasets and metrics for many languages. To facilitate future research we crowdsource formality annotations for 4000 sentence pairs in four Indic languages, and use this data to design our automatic evaluations.</abstract>
       <url hash="3a3ab99c">2022.acl-long.514</url>
@@ -7071,7 +7071,7 @@ in the Case of Unambiguous Gender</title>
       <author><first>Zhaofeng</first><last>Wu</last></author>
       <author><first>Lingpeng</first><last>Kong</last></author>
       <author><first>Roy</first><last>Schwartz</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>7469-7483</pages>
       <abstract>Transformer architectures have achieved state- of-the-art results on a variety of natural language processing (NLP) tasks. However, their attention mechanism comes with a quadratic complexity in sequence lengths, making the computational overhead prohibitive, especially for long sequences. Attention context can be seen as a random-access memory with each token taking a slot. Under this perspective, the memory size grows linearly with the sequence length, and so does the overhead of reading from it. One way to improve the efficiency is to bound the memory size. We show that disparate approaches can be subsumed into one abstraction, attention with bounded-memory control (ABC), and they vary in their organization of the memory. ABC reveals new, unexplored possibilities. First, it connects several efficient attention variants that would otherwise seem apart. Second, this abstraction gives new insights—an established approach (Wang et al., 2020b) previously thought to not be applicable in causal attention, actually is. Last, we present a new instance of ABC, which draws inspiration from existing ABC approaches, but replaces their heuristic memory-organizing functions with a learned, contextualized one. Our experiments on language modeling, machine translation, and masked language model finetuning show that our approach outperforms previous efficient attention models; compared to the strong transformer baselines, it significantly improves the inference time and space efficiency with no or negligible accuracy loss.</abstract>
       <url hash="c524d141">2022.acl-long.515</url>
@@ -7081,7 +7081,7 @@ in the Case of Unambiguous Gender</title>
     </paper>
     <paper id="516">
       <title>The Dangers of Underclaiming: Reasons for Caution When Reporting How <fixed-case>NLP</fixed-case> Systems Fail</title>
-      <author><first>Samuel</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel</first><last>Bowman</last></author>
       <pages>7484-7499</pages>
       <abstract>Researchers in NLP often frame and discuss research results in ways that serve to deemphasize the field’s successes, often in response to the field’s widespread hype. Though well-meaning, this has yielded many misleading or false claims about the limits of our best technology. This is a problem, and it may be more serious than it looks: It harms our credibility in ways that can make it harder to mitigate present-day harms, like those involving biased systems for content moderation or resume screening. It also limits our ability to prepare for the potentially enormous impacts of more distant future advances. This paper urges researchers to be careful about these claims and suggests some research directions and communication strategies that will make it easier to avoid or rebut them.</abstract>
       <url hash="3924e37c">2022.acl-long.516</url>
@@ -7156,7 +7156,7 @@ in the Case of Unambiguous Gender</title>
       <author><first>Ruolan</first><last>Yang</last></author>
       <author><first>Zitong</first><last>Li</last></author>
       <author><first>Haifeng</first><last>Tang</last></author>
-      <author><first>Kenny</first><last>Zhu</last></author>
+      <author id="kenny-zhu"><first>Kenny</first><last>Zhu</last></author>
       <pages>7579-7590</pages>
       <abstract>Existing automatic evaluation systems of chatbots mostly rely on static chat scripts as ground truth, which is hard to obtain, and requires access to the models of the bots as a form of “white-box testing”. Interactive evaluation mitigates this problem but requires human involvement. In our work, we propose an interactive chatbot evaluation framework in which chatbots compete with each other like in a sports tournament, using flexible scoring metrics. This framework can efficiently rank chatbots independently from their model architectures and the domains for which they are trained.</abstract>
       <url hash="5a449c91">2022.acl-long.522</url>
@@ -7275,7 +7275,7 @@ in the Case of Unambiguous Gender</title>
       <title>Probing for Labeled Dependency Trees</title>
       <author><first>Max</first><last>Müller-Eberstein</last></author>
       <author><first>Rob</first><last>van der Goot</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>7711-7726</pages>
       <abstract>Probing has become an important tool for analyzing representations in Natural Language Processing (NLP). For graphical NLP tasks such as dependency parsing, linear probes are currently limited to extracting undirected or unlabeled parse trees which do not capture the full task. This work introduces DepProbe, a linear probe which can extract labeled and directed dependency parse trees from embeddings while using fewer parameters and compute than prior methods. Leveraging its full task coverage and lightweight parametrization, we investigate its predictive power for selecting the best transfer language for training a full biaffine attention parser. Across 13 languages, our proposed method identifies the best source treebank 94% of the time, outperforming competitive baselines and prior work. Finally, we analyze the informativeness of task-specific subspaces in contextual embeddings as well as which benefits a full parser’s non-linear parametrization provides.</abstract>
       <url hash="49b24083">2022.acl-long.532</url>
@@ -7383,7 +7383,7 @@ in the Case of Unambiguous Gender</title>
       <title>Fair and Argumentative Language Modeling for Computational Argumentation</title>
       <author><first>Carolin</first><last>Holtermann</last></author>
       <author><first>Anne</first><last>Lauscher</last></author>
-      <author><first>Simone</first><last>Ponzetto</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone</first><last>Ponzetto</last></author>
       <pages>7841-7861</pages>
       <abstract>Although much work in NLP has focused on measuring and mitigating stereotypical bias in semantic spaces, research addressing bias in computational argumentation is still in its infancy. In this paper, we address this research gap and conduct a thorough investigation of bias in argumentative language models. To this end, we introduce ABBA, a novel resource for bias measurement specifically tailored to argumentation. We employ our resource to assess the effect of argumentative fine-tuning and debiasing on the intrinsic bias found in transformer-based language models using a lightweight adapter-based approach that is more sustainable and parameter-efficient than full fine-tuning. Finally, we analyze the potential impact of language model debiasing on the performance in argument quality prediction, a downstream task of computational argumentation. Our results show that we are able to successfully and sustainably remove bias in general and argumentative language models while preserving (and sometimes improving) model performance in downstream tasks. We make all experimental code and data available at <url>https://github.com/umanlp/FairArgumentativeLM</url>.</abstract>
       <url hash="31da9687">2022.acl-long.541</url>
@@ -7433,12 +7433,12 @@ in the Case of Unambiguous Gender</title>
       <author><first>Yuan</first><last>Ni</last></author>
       <author><first>Guotong</first><last>Xie</last></author>
       <author><first>Zhifang</first><last>Sui</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <author><first>Hui</first><last>Zong</last></author>
-      <author id="zheng-yuan"><first>Zheng</first><last>Yuan</last></author>
+      <author><first>Zheng</first><last>Yuan</last></author>
       <author><first>Linfeng</first><last>Li</last></author>
       <author><first>Jun</first><last>Yan</last></author>
-      <author><first>Hongying</first><last>Zan</last></author>
+      <author id="hongying-zan"><first>Hongying</first><last>Zan</last></author>
       <author><first>Kunli</first><last>Zhang</last></author>
       <author><first>Buzhou</first><last>Tang</last></author>
       <author><first>Qingcai</first><last>Chen</last></author>
@@ -7546,7 +7546,7 @@ in the Case of Unambiguous Gender</title>
       <author><first>Chao</first><last>Shang</last></author>
       <author><first>Guangtao</first><last>Wang</last></author>
       <author><first>Peng</first><last>Qi</last></author>
-      <author id="jing-huang"><first>Jing</first><last>Huang</last></author>
+      <author><first>Jing</first><last>Huang</last></author>
       <pages>8017-8026</pages>
       <abstract>Question answering over temporal knowledge graphs (KGs) efficiently uses facts contained in a temporal KG, which records entity relations and when they occur in time, to answer natural language questions (e.g., “Who was the president of the US before Obama?”). These questions often involve three time-related challenges that previous work fail to adequately address: 1) questions often do not specify exact timestamps of interest (e.g., “Obama” instead of 2000); 2) subtle lexical differences in time relations (e.g., “before” vs “after”); 3) off-the-shelf temporal KG embeddings that previous work builds on ignore the temporal order of timestamps, which is crucial for answering temporal-order related questions. In this paper, we propose a time-sensitive question answering (TSQA) framework to tackle these problems. TSQA features a timestamp estimation module to infer the unwritten timestamp from the question. We also employ a time-sensitive KG encoder to inject ordering information into the temporal KG embeddings that TSQA is based on. With the help of techniques to reduce the search space for potential answers, TSQA significantly outperforms the previous state of the art on a new benchmark for question answering over temporal KGs, especially achieving a 32% (absolute) error reduction on complex questions that require multiple steps of reasoning over facts in the temporal KG.</abstract>
       <url hash="2642c44d">2022.acl-long.552</url>
@@ -7643,7 +7643,7 @@ in the Case of Unambiguous Gender</title>
       <author><first>Lei</first><last>Hou</last></author>
       <author><first>Juanzi</first><last>Li</last></author>
       <author><first>Zhiyuan</first><last>Liu</last></author>
-      <author><first>Jinghui</first><last>Xiao</last></author>
+      <author id="jinghui-xiao"><first>Jinghui</first><last>Xiao</last></author>
       <pages>8128-8140</pages>
       <abstract>Program induction for answering complex questions over knowledge bases (KBs) aims to decompose a question into a multi-step program, whose execution against the KB produces the final answer. Learning to induce programs relies on a large number of parallel question-program pairs for the given KB. However, for most KBs, the gold program annotations are usually lacking, making learning difficult. In this paper, we propose the approach of program transfer, which aims to leverage the valuable program annotations on the rich-resourced KBs as external supervision signals to aid program induction for the low-resourced KBs that lack program annotations. For program transfer, we design a novel two-stage parsing framework with an efficient ontology-guided pruning strategy. First, a sketch parser translates the question into a high-level program sketch, which is the composition of functions. Second, given the question and sketch, an argument parser searches the detailed arguments from the KB for functions. During the searching, we incorporate the KB ontology to prune the search space. The experiments on ComplexWebQuestions and WebQuestionSP show that our method outperforms SOTA methods significantly, demonstrating the effectiveness of program transfer and our framework. Our codes and datasets can be obtained from <url>https://github.com/THU-KEG/ProgramTransfer</url>.</abstract>
       <url hash="a5af6aaa">2022.acl-long.559</url>
@@ -7696,7 +7696,7 @@ in the Case of Unambiguous Gender</title>
     <paper id="563">
       <title>Flexible Generation from Fragmentary Linguistic Input</title>
       <author><first>Peng</first><last>Qian</last></author>
-      <author><first>Roger</first><last>Levy</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
       <pages>8176-8196</pages>
       <abstract>The dominant paradigm for high-performance models in novel NLP tasks today is direct specialization for the task via training from scratch or fine-tuning large pre-trained models. But does direct specialization capture how humans approach novel language tasks? We hypothesize that human performance is better characterized by flexible inference through composition of basic computational motifs available to the human language user. To test this hypothesis, we formulate a set of novel fragmentary text completion tasks, and compare the behavior of three direct-specialization models against a new model we introduce, GibbsComplete, which composes two basic computational motifs central to contemporary models: masked and autoregressive word prediction. We conduct three types of evaluation: human judgments of completion quality, satisfaction of syntactic constraints imposed by the input fragment, and similarity to human behavior in the structural statistics of the completions. With no task-specific parameter tuning, GibbsComplete performs comparably to direct-specialization models in the first two evaluations, and outperforms all direct-specialization models in the third evaluation. These results support our hypothesis that human behavior in novel language tasks and environments may be better characterized by flexible composition of basic computational motifs rather than by direct specialization.</abstract>
       <url hash="b80efa43">2022.acl-long.563</url>
@@ -7782,7 +7782,7 @@ in the Case of Unambiguous Gender</title>
       <title>Generating Scientific Definitions with Controllable Complexity</title>
       <author><first>Tal</first><last>August</last></author>
       <author><first>Katharina</first><last>Reinecke</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>8298-8317</pages>
       <abstract>Unfamiliar terminology and complex language can present barriers to understanding science. Natural language processing stands to help address these issues by automatically defining unfamiliar terms. We introduce a new task and dataset for defining scientific terms and controlling the complexity of generated definitions as a way of adapting to a specific reader’s background knowledge. We test four definition generation methods for this new task, finding that a sequence-to-sequence approach is most successful. We then explore the version of the task in which definitions are generated at a target complexity level. We introduce a novel reranking approach and find in human evaluations that it offers superior fluency while also controlling complexity, compared to several controllable generation baselines.</abstract>
       <url hash="0aa38f88">2022.acl-long.569</url>
@@ -7837,7 +7837,7 @@ in the Case of Unambiguous Gender</title>
     </paper>
     <paper id="573">
       <title>Ethics Sheets for <fixed-case>AI</fixed-case> Tasks</title>
-      <author><first>Saif</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
       <pages>8368-8379</pages>
       <abstract>Several high-profile events, such as the mass testing of emotion recognition systems on vulnerable sub-populations and using question answering systems to make moral judgments, have highlighted how technology will often lead to more adverse outcomes for those that are already marginalized. At issue here are not just individual systems and datasets, but also the AI tasks themselves. In this position paper, I make a case for thinking about ethical considerations not just at the level of individual models and datasets, but also at the level of AI tasks. I will present a new form of such an effort, Ethics Sheets for AI Tasks, dedicated to fleshing out the assumptions and ethical considerations hidden in how a task is commonly framed and in the choices we make regarding the data, method, and evaluation. I will also present a template for ethics sheets with 50 ethical considerations, using the task of emotion recognition as a running example. Ethics sheets are a mechanism to engage with and document ethical considerations before building datasets and systems. Similar to survey articles, a small number of carefully created ethics sheets can serve numerous researchers and developers.</abstract>
       <url hash="e355dfab">2022.acl-long.573</url>
@@ -7865,8 +7865,8 @@ in the Case of Unambiguous Gender</title>
       <author><first>Shujian</first><last>Huang</last></author>
       <author><first>Dongqi</first><last>Wang</last></author>
       <author><first>Lihua</first><last>Qian</last></author>
-      <author><first>Xinyu</first><last>Dai</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
+      <author id="xinyu-dai"><first>Xinyu</first><last>Dai</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
       <author><first>Lei</first><last>Li</last></author>
       <pages>8398-8409</pages>
       <abstract>Recently, parallel text generation has received widespread attention due to its success in generation efficiency. Although many advanced techniques are proposed to improve its generation quality, they still need the help of an autoregressive model for training to overcome the one-to-many multi-modal phenomenon in the dataset, limiting their applications. In this paper, we propose GLAT, which employs the discrete latent variables to capture word categorical information and invoke an advanced curriculum learning technique, alleviating the multi-modality problem. Experiment results show that our method outperforms strong baselines without the help of an autoregressive model, which further broadens the application scenarios of the parallel decoding paradigm.</abstract>
@@ -7961,7 +7961,7 @@ in the Case of Unambiguous Gender</title>
       <author><first>Li</first><last>Dong</last></author>
       <author><first>Yaru</first><last>Hao</last></author>
       <author><first>Zhifang</first><last>Sui</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <author><first>Furu</first><last>Wei</last></author>
       <pages>8493-8502</pages>
       <abstract>Large-scale pretrained language models are surprisingly good at recalling factual knowledge presented in the training corpus. In this paper, we present preliminary studies on how factual knowledge is stored in pretrained Transformers by introducing the concept of knowledge neurons. Specifically, we examine the fill-in-the-blank cloze task for BERT. Given a relational fact, we propose a knowledge attribution method to identify the neurons that express the fact. We find that the activation of such knowledge neurons is positively correlated to the expression of their corresponding facts. In our case studies, we attempt to leverage knowledge neurons to edit (such as update, and erase) specific factual knowledge without fine-tuning. Our results shed light on understanding the storage of knowledge within pretrained Transformers.</abstract>
@@ -7989,7 +7989,7 @@ in the Case of Unambiguous Gender</title>
     </paper>
     <paper id="583">
       <title><fixed-case>F</fixed-case>rench <fixed-case>C</fixed-case>row<fixed-case>S</fixed-case>-Pairs: Extending a challenge dataset for measuring social bias in masked language models to a language other than <fixed-case>E</fixed-case>nglish</title>
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <author><first>Yoann</first><last>Dupont</last></author>
       <author><first>Julien</first><last>Bezançon</last></author>
       <author><first>Karën</first><last>Fort</last></author>
@@ -8002,7 +8002,7 @@ in the Case of Unambiguous Gender</title>
     </paper>
     <paper id="584">
       <title>Few-Shot Learning with <fixed-case>S</fixed-case>iamese Networks and Label Tuning</title>
-      <author><first>Thomas</first><last>Müller</last></author>
+      <author id="thomas-mueller"><first>Thomas</first><last>Müller</last></author>
       <author><first>Guillermo</first><last>Pérez-Torró</last></author>
       <author><first>Marc</first><last>Franco-Salvador</last></author>
       <pages>8532-8545</pages>
@@ -8196,7 +8196,7 @@ in the Case of Unambiguous Gender</title>
       <author><first>Dian</first><last>Yu</last></author>
       <author><first>Jianshu</first><last>Chen</last></author>
       <author><first>Dong</first><last>Yu</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>8736-8747</pages>
       <abstract>To perform well on a machine reading comprehension (MRC) task, machine readers usually require commonsense knowledge that is not explicitly mentioned in the given documents. This paper aims to extract a new kind of structured knowledge from scripts and use it to improve MRC. We focus on scripts as they contain rich verbal and nonverbal messages, and two relevant messages originally conveyed by different modalities during a short time period may serve as arguments of a piece of commonsense knowledge as they function together in daily communications. To save human efforts to name relations, we propose to represent relations implicitly by situating such an argument pair in a context and call it contextualized knowledge. To use the extracted knowledge to improve MRC, we compare several fine-tuning strategies to use the weakly-labeled MRC data constructed based on contextualized knowledge and further design a teacher-student paradigm with multiple teachers to facilitate the transfer of knowledge in weakly-labeled MRC data. Experimental results show that our paradigm outperforms other methods that use weakly-labeled data and improves a state-of-the-art baseline by 4.3% in accuracy on a Chinese multiple-choice MRC dataset C<tex-math>^3</tex-math>, wherein most of the questions require unstated prior knowledge. We also seek to transfer the knowledge to other tasks by simply adapting the resulting student reader, yielding a 2.9% improvement in F1 on a relation extraction dataset DialogRE, demonstrating the potential usefulness of the knowledge for non-MRC tasks that require document comprehension.</abstract>
       <url hash="7952bdd9">2022.acl-long.598</url>
@@ -8218,7 +8218,7 @@ in the Case of Unambiguous Gender</title>
       <title>Active Evaluation: Efficient <fixed-case>NLG</fixed-case> Evaluation with Few Pairwise Comparisons</title>
       <award>Outstanding Paper</award>
       <author><first>Akash Kumar</first><last>Mohankumar</last></author>
-      <author><first>Mitesh</first><last>Khapra</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh</first><last>Khapra</last></author>
       <pages>8761-8781</pages>
       <abstract>Recent studies have shown the advantages of evaluating NLG systems using pairwise comparisons as opposed to direct assessment. Given <tex-math>k</tex-math> systems, a naive approach for identifying the top-ranked system would be to uniformly obtain pairwise comparisons from all <tex-math>{k \choose 2}</tex-math> pairs of systems. However, this can be very expensive as the number of human annotations required would grow quadratically with <tex-math>k</tex-math>. In this work, we introduce Active Evaluation, a framework to efficiently identify the top-ranked system by actively choosing system pairs for comparison using dueling bandit algorithms. We perform extensive experiments with 13 dueling bandits algorithms on 13 NLG evaluation datasets spanning 5 tasks and show that the number of human annotations can be reduced by 80%. To further reduce the number of human annotations, we propose model-based dueling bandit algorithms which combine automatic evaluation metrics with human evaluations. Specifically, we eliminate sub-optimal systems even before the human annotation process and perform human evaluations only on test examples where the automatic metric is highly uncertain. This reduces the number of human annotations required further by 89%. In effect, we show that identifying the top-ranked system requires only a few hundred human annotations, which grow linearly with <tex-math>k</tex-math>. Lastly, we provide practical recommendations and best practices to identify the top-ranked system efficiently. Our code has been made publicly available at <url>https://github.com/akashkm99/duelnlg</url></abstract>
       <url hash="91252a69">2022.acl-long.600</url>
@@ -8274,7 +8274,7 @@ in the Case of Unambiguous Gender</title>
     <meta>
       <booktitle>Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)</booktitle>
       <editor><first>Smaranda</first><last>Muresan</last></editor>
-      <editor><first>Preslav</first><last>Nakov</last></editor>
+      <editor id="preslav-nakov"><first>Preslav</first><last>Nakov</last></editor>
       <editor><first>Aline</first><last>Villavicencio</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Dublin, Ireland</address>
@@ -8305,7 +8305,7 @@ in the Case of Unambiguous Gender</title>
       <author><first>Hua</first><last>Shen</last></author>
       <author><first>Tongshuang</first><last>Wu</last></author>
       <author><first>Wenbo</first><last>Guo</last></author>
-      <author><first>Ting-Hao</first><last>Huang</last></author>
+      <author id="ting-hao-huang"><first>Ting-Hao</first><last>Huang</last></author>
       <pages>10-19</pages>
       <abstract>Existing self-explaining models typically favor extracting the shortest possible rationales — snippets of an input text “responsible for” corresponding output — to explain the model prediction, with the assumption that shorter rationales are more intuitive to humans. However, this assumption has yet to be validated. Is the shortest rationale indeed the most human-understandable? To answer this question, we design a self-explaining model, LimitedInk, which allows users to extract rationales at any target length. Compared to existing baselines, LimitedInk achieves compatible end-task performance and human-annotated rationale agreement, making it a suitable representation of the recent class of self-explaining models. We use LimitedInk to conduct a user study on the impact of rationale length, where we ask human judges to predict the sentiment label of documents based only on LimitedInk-generated rationales with different lengths. We show rationales that are too short do not help humans predict labels better than randomly masked text, suggesting the need for more careful design of the best human rationales.</abstract>
       <url hash="96b76aec">2022.acl-short.2</url>
@@ -8318,7 +8318,7 @@ in the Case of Unambiguous Gender</title>
       <author><first>Tiago</first><last>Pimentel</last></author>
       <author><first>Thomas</first><last>Clark</last></author>
       <author><first>Ryan</first><last>Cotterell</last></author>
-      <author><first>Roger</first><last>Levy</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
       <pages>20-28</pages>
       <abstract>Numerous analyses of reading time (RT) data have been undertaken in the effort to learn more about the internal processes that occur during reading comprehension. However, data measured on words at the end of a sentence–or even clause–is often omitted due to the confounding factors introduced by so-called “wrap-up effects,” which manifests as a skewed distribution of RTs for these words. Consequently, the understanding of the cognitive processes that might be involved in these effects is limited. In this work, we attempt to learn more about these processes by looking for the existence–or absence–of a link between wrap-up effects and information theoretic quantities, such as word and context information content. We find that the information distribution of prior context is often predictive of sentence- and clause-final RTs (while not of sentence-medial RTs), which lends support to several prior hypotheses about the processes involved in wrap-up effects.</abstract>
       <url hash="f264fda6">2022.acl-short.3</url>
@@ -8401,7 +8401,7 @@ in the Case of Unambiguous Gender</title>
       <title>On Efficiently Acquiring Annotations for Multilingual Models</title>
       <author><first>Joel Ruben Antony</first><last>Moniz</last></author>
       <author><first>Barun</first><last>Patra</last></author>
-      <author><first>Matthew</first><last>Gormley</last></author>
+      <author id="matthew-r-gormley"><first>Matthew</first><last>Gormley</last></author>
       <pages>69-85</pages>
       <abstract>When tasked with supporting multiple languages for a given problem, two approaches have arisen: training a model for each language with the annotation budget divided equally among them, and training on a high-resource language followed by zero-shot transfer to the remaining languages. In this work, we show that the strategy of joint learning across multiple languages using a single model performs substantially better than the aforementioned alternatives. We also demonstrate that active learning provides additional, complementary benefits. We show that this simple approach enables the model to be data efficient by allowing it to arbitrate its annotation budget to query languages it is less certain on. We illustrate the effectiveness of our proposed method on a diverse set of tasks: a classification task with 4 languages, a sequence tagging task with 4 languages and a dependency parsing task with 5 languages. Our proposed method, whilst simple, substantially outperforms the other viable alternatives for building a model in a multilingual setting under constrained budgets.</abstract>
       <url hash="11bf2504">2022.acl-short.9</url>
@@ -8445,7 +8445,7 @@ in the Case of Unambiguous Gender</title>
       <author><first>Amanul</first><last>Haque</last></author>
       <author><first>Vaibhav</first><last>Garg</last></author>
       <author><first>Hui</first><last>Guo</last></author>
-      <author><first>Munindar</first><last>Singh</last></author>
+      <author id="munindar-p-singh"><first>Munindar</first><last>Singh</last></author>
       <pages>106-112</pages>
       <abstract>We present Pixie, a manually annotated dataset for preference classification comprising 8,890 sentences drawn from app reviews. Unlike previous studies on preference classification, Pixie contains implicit (omitting an entity being compared) and indirect (lacking comparative linguistic cues) comparisons. We find that transformer-based pretrained models, finetuned on Pixie, achieve a weighted average F1 score of 83.34% and outperform the existing state-of-the-art preference classification model (73.99%).</abstract>
       <url hash="c7e895be">2022.acl-short.13</url>
@@ -8635,7 +8635,7 @@ in the Case of Unambiguous Gender</title>
     <paper id="28">
       <title>Predicting Sentence Deletions for Text Simplification Using a Functional Discourse Structure</title>
       <author><first>Bohan</first><last>Zhang</last></author>
-      <author><first>Prafulla Kumar</first><last>Choubey</last></author>
+      <author id="prafulla-kumar-choubey"><first>Prafulla Kumar</first><last>Choubey</last></author>
       <author><first>Ruihong</first><last>Huang</last></author>
       <pages>255-261</pages>
       <abstract>Document-level text simplification often deletes some sentences besides performing lexical, grammatical or structural simplification to reduce text complexity. In this work, we focus on sentence deletions for text simplification and use a news genre-specific functional discourse structure, which categorizes sentences based on their contents and their function roles in telling a news story, for predicting sentence deletion. We incorporate sentence categories into a neural net model in two ways for predicting sentence deletions, either as additional features or by jointly predicting sentence deletions and sentence categories. Experimental results using human-annotated data show that incorporating the functional structure improves the recall of sentence deletion prediction by 6.5% and 10.7% respectively using the two methods, and improves the overall F1-score by 3.6% and 4.3% respectively.</abstract>
@@ -8694,7 +8694,7 @@ in the Case of Unambiguous Gender</title>
       <author><first>Long</first><last>Bai</last></author>
       <author><first>Wei</first><last>Li</last></author>
       <author><first>Jiafeng</first><last>Guo</last></author>
-      <author><first>Xueqi</first><last>Cheng</last></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last></author>
       <pages>290-296</pages>
       <abstract>A Temporal Knowledge Graph (TKG) is a sequence of KGs corresponding to different timestamps. TKG reasoning aims to predict potential facts in the future given the historical KG sequences. One key of this task is to mine and understand evolutional patterns of facts from these sequences. The evolutional patterns are complex in two aspects, length-diversity and time-variability. Existing models for TKG reasoning focus on modeling fact sequences of a fixed length, which cannot discover complex evolutional patterns that vary in length. Furthermore, these models are all trained offline, which cannot well adapt to the changes of evolutional patterns from then on. Thus, we propose a new model, called Complex Evolutional Network (CEN), which uses a length-aware Convolutional Neural Network (CNN) to handle evolutional patterns of different lengths via an easy-to-difficult curriculum learning strategy. Besides, we propose to learn the model under the online setting so that it can adapt to the changes of evolutional patterns over time. Extensive experiments demonstrate that CEN obtains substantial performance improvement under both the traditional offline and the proposed online settings.</abstract>
       <url hash="372e4bfd">2022.acl-short.32</url>
@@ -8760,7 +8760,7 @@ in the Case of Unambiguous Gender</title>
       <author><first>Tianyu</first><last>Liu</last></author>
       <author><first>Damai</first><last>Dai</last></author>
       <author><first>Yunbo</first><last>Cao</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <author><first>Zhifang</first><last>Sui</last></author>
       <pages>333-339</pages>
       <abstract>Abstract Meaning Representation (AMR) parsing aims to translate sentences to semantic representation with a hierarchical structure, and is recently empowered by pretrained sequence-to-sequence models. However, there exists a gap between their flat training objective (i.e., equally treats all output tokens) and the hierarchical AMR structure, which limits the model generalization. To bridge this gap, we propose a Hierarchical Curriculum Learning (HCL) framework with Structure-level (SC) and Instance-level Curricula (IC). SC switches progressively from core to detail AMR semantic elements while IC transits from structure-simple to -complex AMR instances during training. Through these two warming-up processes, HCL reduces the difficulty of learning complex structures, thus the flat model can better adapt to the AMR hierarchy. Extensive experiments on AMR2.0, AMR3.0, structure-complex and out-of-distribution situations verify the effectiveness of HCL.</abstract>
@@ -8825,7 +8825,7 @@ in the Case of Unambiguous Gender</title>
     <paper id="42">
       <title>k-<fixed-case>R</fixed-case>ater <fixed-case>R</fixed-case>eliability: <fixed-case>T</fixed-case>he Correct Unit of Reliability for Aggregated Human Annotations</title>
       <author><first>Ka</first><last>Wong</last></author>
-      <author><first>Praveen</first><last>Paritosh</last></author>
+      <author id="praveen-paritosh"><first>Praveen</first><last>Paritosh</last></author>
       <pages>378-384</pages>
       <abstract>Since the inception of crowdsourcing, aggregation has been a common strategy for dealing with unreliable data. Aggregate ratings are more reliable than individual ones. However, many Natural Language Processing (NLP) applications that rely on aggregate ratings only report the reliability of individual ratings, which is the incorrect unit of analysis. In these instances, the data reliability is under-reported, and a proposed <tex-math>k</tex-math>-rater reliability (kRR) should be used as the correct data reliability for aggregated datasets. It is a multi-rater generalization of inter-rater reliability (IRR). We conducted two replications of the WordSim-353 benchmark, and present empirical, analytical, and bootstrap-based methods for computing kRR on WordSim-353. These methods produce very similar results. We hope this discussion will nudge researchers to report kRR in addition to IRR.</abstract>
       <url hash="82a59fdf">2022.acl-short.42</url>
@@ -8835,8 +8835,8 @@ in the Case of Unambiguous Gender</title>
     <paper id="43">
       <title>An Embarrassingly Simple Method to Mitigate Undesirable Properties of Pretrained Language Model Tokenizers</title>
       <author><first>Valentin</first><last>Hofmann</last></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
-      <author><first>Janet</first><last>Pierrehumbert</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="janet-pierrehumbert"><first>Janet</first><last>Pierrehumbert</last></author>
       <pages>385-393</pages>
       <abstract>We introduce FLOTA (Few Longest Token Approximation), a simple yet effective method to improve the tokenization of pretrained language models (PLMs). FLOTA uses the vocabulary of a standard tokenizer but tries to preserve the morphological structure of words during tokenization. We evaluate FLOTA on morphological gold segmentations as well as a text classification task, using BERT, GPT-2, and XLNet as example PLMs. FLOTA leads to performance gains, makes inference more efficient, and enhances the robustness of PLMs with respect to whitespace noise.</abstract>
       <url hash="b614856f">2022.acl-short.43</url>
@@ -8860,7 +8860,7 @@ in the Case of Unambiguous Gender</title>
       <author><first>Kaitlyn</first><last>Zhou</last></author>
       <author><first>Kawin</first><last>Ethayarajh</last></author>
       <author><first>Dallas</first><last>Card</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <pages>401-423</pages>
       <abstract>Cosine similarity of contextual embeddings is used in many NLP tasks (e.g., QA, IR, MT) and metrics (e.g., BERTScore). Here, we uncover systematic ways in which word similarities estimated by cosine over BERT embeddings are understated and trace this effect to training data frequency. We find that relative to human judgements, cosine similarity underestimates the similarity of frequent words with other instances of the same word or other words across contexts, even after controlling for polysemy and other factors. We conjecture that this underestimation of similarity for high frequency words is due to differences in the representational geometry of high and low frequency words and provide a formal argument for the two-dimensional case.</abstract>
       <url hash="18aba160">2022.acl-short.45</url>
@@ -8871,7 +8871,7 @@ in the Case of Unambiguous Gender</title>
       <title>Revisiting the Compositional Generalization Abilities of Neural Sequence Models</title>
       <author><first>Arkil</first><last>Patel</last></author>
       <author><first>Satwik</first><last>Bhattamishra</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
       <author><first>Navin</first><last>Goyal</last></author>
       <pages>424-434</pages>
       <abstract>Compositional generalization is a fundamental trait in humans, allowing us to effortlessly combine known phrases to form novel sentences. Recent works have claimed that standard seq-to-seq models severely lack the ability to compositionally generalize. In this paper, we focus on one-shot primitive generalization as introduced by the popular SCAN benchmark. We demonstrate that modifying the training distribution in simple and intuitive ways enables standard seq-to-seq models to achieve near-perfect generalization performance, thereby showing that their compositional generalization abilities were previously underestimated. We perform detailed empirical analysis of this phenomenon. Our results indicate that the generalization performance of models is highly sensitive to the characteristics of the training data which should be carefully considered while designing such benchmarks in future.</abstract>
@@ -8900,7 +8900,7 @@ in the Case of Unambiguous Gender</title>
       <author><first>Jinheon</first><last>Baek</last></author>
       <author><first>Sukmin</first><last>Cho</last></author>
       <author><first>Sung Ju</first><last>Hwang</last></author>
-      <author><first>Jong</first><last>Park</last></author>
+      <author id="jong-c-park"><first>Jong</first><last>Park</last></author>
       <pages>442-452</pages>
       <abstract>Dense retrieval models, which aim at retrieving the most relevant document for an input query on a dense representation space, have gained considerable attention for their remarkable success. Yet, dense models require a vast amount of labeled training data for notable performance, whereas it is often challenging to acquire query-document pairs annotated by humans. To tackle this problem, we propose a simple but effective Document Augmentation for dense Retrieval (DAR) framework, which augments the representations of documents with their interpolation and perturbation. We validate the performance of DAR on retrieval tasks with two benchmark datasets, showing that the proposed DAR significantly outperforms relevant baselines on the dense retrieval of both the labeled and unlabeled documents.</abstract>
       <url hash="0cce5cd7">2022.acl-short.48</url>
@@ -8941,7 +8941,7 @@ in the Case of Unambiguous Gender</title>
       <author><first>Jeremy</first><last>Barnes</last></author>
       <author><first>Robin</first><last>Kurtz</last></author>
       <author><first>Stephan</first><last>Oepen</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <author><first>Erik</first><last>Velldal</last></author>
       <pages>470-478</pages>
       <abstract>This paper demonstrates how a graph-based semantic parser can be applied to the task of structured sentiment analysis, directly predicting sentiment graphs from text. We advance the state of the art on 4 out of 5 standard benchmark sets. We release the source code, models and predictions.</abstract>
@@ -8980,7 +8980,7 @@ in the Case of Unambiguous Gender</title>
       <title>How Distributed are Distributed Representations? An Observation on the Locality of Syntactic Information in Verb Agreement Tasks</title>
       <author><first>Bingzhi</first><last>Li</last></author>
       <author><first>Guillaume</first><last>Wisniewski</last></author>
-      <author><first>Benoit</first><last>Crabbé</last></author>
+      <author id="benoit-crabbe"><first>Benoit</first><last>Crabbé</last></author>
       <pages>501-507</pages>
       <abstract>This work addresses the question of the localization of syntactic information encoded in the transformers representations. We tackle this question from two perspectives, considering the object-past participle agreement in French, by identifying, first, in which part of the sentence and, second, in which part of the representation the syntactic information is encoded. The results of our experiments, using probing, causal analysis and feature selection method, show that syntactic information is encoded locally in a way consistent with the French grammar.</abstract>
       <url hash="9ffdcdc9">2022.acl-short.54</url>
@@ -9034,7 +9034,7 @@ in the Case of Unambiguous Gender</title>
       <title>S<tex-math>^4</tex-math>-Tuning: A Simple Cross-lingual Sub-network Tuning Method</title>
       <author><first>Runxin</first><last>Xu</last></author>
       <author><first>Fuli</first><last>Luo</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <author><first>Songfang</first><last>Huang</last></author>
       <author><first>Fei</first><last>Huang</last></author>
       <pages>530-537</pages>
@@ -9104,7 +9104,7 @@ in the Case of Unambiguous Gender</title>
       <title>Zero-Shot Dependency Parsing with Worst-Case Aware Automated Curriculum Learning</title>
       <author><first>Miryam</first><last>de Lhoneux</last></author>
       <author><first>Sheng</first><last>Zhang</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>578-587</pages>
       <abstract>Large multilingual pretrained language models such as mBERT and XLM-RoBERTa have been found to be surprisingly effective for cross-lingual transfer of syntactic parsing models Wu and Dredze (2019), but only between related languages. However, source and training languages are rarely related, when parsing truly low-resource languages. To close this gap, we adopt a method from multi-task learning, which relies on automated curriculum learning, to dynamically optimize for parsing performance on <i>outlier</i> languages. We show that this approach is significantly better than uniform and size-proportional sampling in the zero-shot setting.</abstract>
       <url hash="ea8a1474">2022.acl-short.64</url>
@@ -9230,7 +9230,7 @@ in the Case of Unambiguous Gender</title>
       <title>Focus on the Target’s Vocabulary: Masked Label Smoothing for Machine Translation</title>
       <author><first>Liang</first><last>Chen</last></author>
       <author><first>Runxin</first><last>Xu</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <pages>665-671</pages>
       <abstract>Label smoothing and vocabulary sharing are two widely used techniques in neural machine translation models. However, we argue that simply applying both techniques can be conflicting and even leads to sub-optimal performance. When allocating smoothed probability, original label smoothing treats the source-side words that would never appear in the target language equally to the real target-side words, which could bias the translation model. To address this issue, we propose Masked Label Smoothing (MLS), a new mechanism that masks the soft label probability of source-side words to zero. Simple yet effective, MLS manages to better integrate label smoothing with vocabulary sharing. Our extensive experiments show that MLS consistently yields improvement over original label smoothing on different datasets, including bilingual and multilingual translation from both translation quality and model’s calibration. Our code is released at <url>https://github.com/PKUnlp-icler/MLS</url></abstract>
       <url hash="bc308565">2022.acl-short.74</url>
@@ -9242,7 +9242,7 @@ in the Case of Unambiguous Gender</title>
       <title>Contrastive Learning-Enhanced Nearest Neighbor Mechanism for Multi-Label Text Classification</title>
       <author><first>Xi’ao</first><last>Su</last></author>
       <author><first>Ran</first><last>Wang</last></author>
-      <author><first>Xinyu</first><last>Dai</last></author>
+      <author id="xinyu-dai"><first>Xinyu</first><last>Dai</last></author>
       <pages>672-679</pages>
       <abstract>Multi-Label Text Classification (MLTC) is a fundamental and challenging task in natural language processing. Previous studies mainly focus on learning text representation and modeling label correlation but neglect the rich knowledge from the existing similar instances when predicting labels of a specific text. To make up for this oversight, we propose a k nearest neighbor (kNN) mechanism which retrieves several neighbor instances and interpolates the model output with their labels. Moreover, we design a multi-label contrastive learning objective that makes the model aware of the kNN classification process and improves the quality of the retrieved neighbors while inference. Extensive experiments show that our method can bring consistent and significant performance improvement to multiple MLTC models including the state-of-the-art pretrained and non-pretrained ones.</abstract>
       <url hash="838db0b6">2022.acl-short.75</url>
@@ -9332,7 +9332,7 @@ in the Case of Unambiguous Gender</title>
       <title>Unsupervised multiple-choice question generation for out-of-domain <fixed-case>Q</fixed-case>&amp;<fixed-case>A</fixed-case> fine-tuning</title>
       <author><first>Guillaume</first><last>Le Berre</last></author>
       <author><first>Christophe</first><last>Cerisara</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <author><first>Guy</first><last>Lapalme</last></author>
       <pages>732-738</pages>
       <abstract>Pre-trained models have shown very good performances on a number of question answering benchmarks especially when fine-tuned on multiple question answering datasets at once. In this work, we propose an approach for generating a fine-tuning dataset thanks to a rule-based algorithm that generates questions and answers from unannotated sentences. We show that the state-of-the-art model UnifiedQA can greatly benefit from such a system on a multiple-choice benchmark about physics, biology and chemistry it has never been trained on. We further show that improved performances may be obtained by selecting the most challenging distractors (wrong answers), with a dedicated ranker based on a pretrained RoBERTa model.</abstract>
@@ -9355,7 +9355,7 @@ in the Case of Unambiguous Gender</title>
     </paper>
     <paper id="85">
       <title>Probing the Robustness of Trained Metrics for Conversational Dialogue Systems</title>
-      <author><first>Jan</first><last>Deriu</last></author>
+      <author id="jan-milan-deriu"><first>Jan</first><last>Deriu</last></author>
       <author><first>Don</first><last>Tuggener</last></author>
       <author><first>Pius</first><last>Von Däniken</last></author>
       <author><first>Mark</first><last>Cieliebak</last></author>
@@ -9432,7 +9432,7 @@ in the Case of Unambiguous Gender</title>
     </paper>
     <paper id="91">
       <title>Code Synonyms Do Matter: Multiple Synonyms Matching Network for Automatic <fixed-case>ICD</fixed-case> Coding</title>
-      <author id="zheng-yuan"><first>Zheng</first><last>Yuan</last></author>
+      <author><first>Zheng</first><last>Yuan</last></author>
       <author><first>Chuanqi</first><last>Tan</last></author>
       <author><first>Songfang</first><last>Huang</last></author>
       <pages>808-814</pages>
@@ -9446,7 +9446,7 @@ in the Case of Unambiguous Gender</title>
       <title><fixed-case>C</fixed-case>o<fixed-case>DA</fixed-case>21: Evaluating Language Understanding Capabilities of <fixed-case>NLP</fixed-case> Models With Context-Definition Alignment</title>
       <author><first>Lütfi Kerem</first><last>Senel</last></author>
       <author><first>Timo</first><last>Schick</last></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <pages>815-824</pages>
       <abstract>Pretrained language models (PLMs) have achieved superhuman performance on many benchmarks, creating a need for harder tasks. We introduce CoDA21 (Context Definition Alignment), a challenging benchmark that measures natural language understanding (NLU) capabilities of PLMs: Given a definition and a context each for k words, but not the words themselves, the task is to align the k definitions with the k contexts. CoDA21 requires a deep understanding of contexts and definitions, including complex inference and world knowledge. We find that there is a large gap between human and PLM performance, suggesting that CoDA21 measures an aspect of NLU that is not sufficiently covered in existing benchmarks.</abstract>
       <url hash="0abc883d">2022.acl-short.92</url>
@@ -9604,7 +9604,7 @@ in the Case of Unambiguous Gender</title>
     <paper id="7">
       <title>What Do You Mean by Relation Extraction? A Survey on Datasets and Study on Scientific Relation Classification</title>
       <author><first>Elisa</first><last>Bassignana</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>67-83</pages>
       <abstract>Over the last five years, research on Relation Extraction (RE) witnessed extensive progress with many new dataset releases. At the same time, setup clarity has decreased, contributing to increased difficulty of reliable empirical evaluation (Taillé et al., 2020). In this paper, we provide a comprehensive survey of RE datasets, and revisit the task definition and its adoption by the community. We find that cross-dataset and cross-domain setups are particularly lacking. We present an empirical study on scientific Relation Classification across two datasets. Despite large data overlap, our analysis reveals substantial discrepancies in annotation. Annotation discrepancies strongly impact Relation Classification performance, explaining large drops in cross-dataset evaluations. Variation within further sub-domains exists but impacts Relation Classification only to limited degrees. Overall, our study calls for more rigour in reporting setups in RE and evaluation across multiple test sets.</abstract>
       <url hash="b64e8ad5">2022.acl-srw.7</url>
@@ -9669,7 +9669,7 @@ in the Case of Unambiguous Gender</title>
     <paper id="13">
       <title><fixed-case>AMR</fixed-case> Alignment for Morphologically-rich and Pro-drop Languages</title>
       <author><first>K. Elif</first><last>Oral</last></author>
-      <author><first>Gülşen</first><last>Eryiğit</last></author>
+      <author id="gulsen-eryigit"><first>Gülşen</first><last>Eryiğit</last></author>
       <pages>143-152</pages>
       <abstract>Alignment between concepts in an abstract meaning representation (AMR) graph and the words within a sentence is one of the important stages of AMR parsing. Although there exist high performing AMR aligners for English, unfortunately, these are not well suited for many languages where many concepts appear from morpho-semantic elements. For the first time in the literature, this paper presents an AMR aligner tailored for morphologically-rich and pro-drop languages by experimenting on the Turkish language being a prominent example of this language group. Our aligner focuses on the meaning considering the rich Turkish morphology and aligns AMR concepts that emerge from morphemes using a tree traversal approach without additional resources or rules. We evaluate our aligner over a manually annotated gold data set in terms of precision, recall and F1 score. Our aligner outperforms the Turkish adaptations of the previously proposed aligners for English and Portuguese by an F1 score of 0.87 and provides a relative error reduction of up to 76%.</abstract>
       <url hash="94b71440">2022.acl-srw.13</url>
@@ -9724,7 +9724,7 @@ in the Case of Unambiguous Gender</title>
       <title>Restricted or Not: A General Training Framework for Neural Machine Translation</title>
       <author><first>Zuchao</first><last>Li</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Hai</first><last>Zhao</last></author>
       <pages>245-251</pages>
       <abstract>Restricted machine translation incorporates human prior knowledge into translation. It restricts the flexibility of the translation to satisfy the demands of translation in specific scenarios. Existing work typically imposes constraints on beam search decoding. Although this can satisfy the requirements overall, it usually requires a larger beam size and far longer decoding time than unrestricted translation, which limits the concurrent processing ability of the translation model in deployment, and thus its practicality. In this paper, we propose a general training framework that allows a model to simultaneously support both unrestricted and restricted translation by adopting an additional auxiliary training process without constraining the decoding process. This maintains the benefits of restricted translation but greatly reduces the extra time overhead of constrained decoding, thus improving its practicality. The effectiveness of our proposed training framework is demonstrated by experiments on both original (WAT21 En<tex-math>\leftrightarrow</tex-math>Ja) and simulated (WMT14 En<tex-math>\rightarrow</tex-math>De and En<tex-math>\rightarrow</tex-math>Fr) restricted translation benchmarks.</abstract>
@@ -9803,7 +9803,7 @@ in the Case of Unambiguous Gender</title>
     <paper id="25">
       <title>Mining Logical Event Schemas From Pre-Trained Language Models</title>
       <author><first>Lane</first><last>Lawley</last></author>
-      <author><first>Lenhart</first><last>Schubert</last></author>
+      <author id="lenhart-schubert"><first>Lenhart</first><last>Schubert</last></author>
       <pages>332-345</pages>
       <abstract>We present NESL (the Neuro-Episodic Schema Learner), an event schema learning system that combines large language models, FrameNet parsing, a powerful logical representation of language, and a set of simple behavioral schemas meant to bootstrap the learning process. In lieu of a pre-made corpus of stories, our dataset is a continuous feed of “situation samples” from a pre-trained language model, which are then parsed into FrameNet frames, mapped into simple behavioral schemas, and combined and generalized into complex, hierarchical schemas for a variety of everyday scenarios. We show that careful sampling from the language model can help emphasize stereotypical properties of situations and de-emphasize irrelevant details, and that the resulting schemas specify situations more comprehensively than those learned by other systems.</abstract>
       <url hash="5952286a">2022.acl-srw.25</url>
@@ -9891,7 +9891,7 @@ in the Case of Unambiguous Gender</title>
       <author><first>Belen</first><last>Alastruey</last></author>
       <author><first>Javier</first><last>Ferrando</last></author>
       <author><first>Gerard I.</first><last>Gállego</last></author>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
       <pages>402-412</pages>
       <abstract>Transformers have achieved state-of-the-art results across multiple NLP tasks. However, the self-attention mechanism complexity scales quadratically with the sequence length, creating an obstacle for tasks involving long sequences, like in the speech domain. In this paper, we discuss the usefulness of self-attention for Direct Speech Translation. First, we analyze the layer-wise token contributions in the self-attention of the encoder, unveiling local diagonal patterns. To prove that some attention weights are avoidable, we propose to substitute the standard self-attention with a local efficient one, setting the amount of context used based on the results of the analysis. With this approach, our model matches the baseline performance, and improves the efficiency by skipping the computation of those weights that standard attention discards.</abstract>
       <url hash="8fe5f60d">2022.acl-srw.32</url>
@@ -9926,7 +9926,7 @@ in the Case of Unambiguous Gender</title>
       <title>Towards Fine-grained Classification of Climate Change related Social Media Text</title>
       <author><first>Roopal</first><last>Vaid</last></author>
       <author><first>Kartikey</first><last>Pant</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>434-443</pages>
       <abstract>With climate change becoming a cause of concern worldwide, it becomes essential to gauge people’s reactions. This can help educate and spread awareness about it and help leaders improve decision-making. This work explores the fine-grained classification and Stance detection of climate change-related social media text. Firstly, we create two datasets, ClimateStance and ClimateEng, consisting of 3777 tweets each, posted during the 2019 United Nations Framework Convention on Climate Change and comprehensively outline the dataset collection, annotation methodology, and dataset composition. Secondly, we propose the task of Climate Change stance detection based on our proposed ClimateStance dataset. Thirdly, we propose a fine-grained classification based on the ClimateEng dataset, classifying social media text into five categories: Disaster, Ocean/Water, Agriculture/Forestry, Politics, and General. We benchmark both the datasets for climate change stance detection and fine-grained classification using state-of-the-art methods in text classification. We also create a Reddit-based dataset for both the tasks, ClimateReddit, consisting of 6262 pseudo-labeled comments along with 329 manually annotated comments for the label. We then perform semi-supervised experiments for both the tasks and benchmark their results using the best-performing model for the supervised experiments. Lastly, we provide insights into the ClimateStance and ClimateReddit using part-of-speech tagging and named-entity recognition.</abstract>
       <url hash="285efec6">2022.acl-srw.35</url>
@@ -9967,7 +9967,7 @@ in the Case of Unambiguous Gender</title>
     <paper id="39">
       <title>A Dataset and <fixed-case>BERT</fixed-case>-based Models for Targeted Sentiment Analysis on <fixed-case>T</fixed-case>urkish Texts</title>
       <author><first>Mustafa Melih</first><last>Mutlu</last></author>
-      <author><first>Arzucan</first><last>Özgür</last></author>
+      <author id="arzucan-ozgur"><first>Arzucan</first><last>Özgür</last></author>
       <pages>467-472</pages>
       <abstract>Targeted Sentiment Analysis aims to extract sentiment towards a particular target from a given text. It is a field that is attracting attention due to the increasing accessibility of the Internet, which leads people to generate an enormous amount of data. Sentiment analysis, which in general requires annotated data for training, is a well-researched area for widely studied languages such as English. For low-resource languages such as Turkish, there is a lack of such annotated data. We present an annotated Turkish dataset suitable for targeted sentiment analysis. We also propose BERT-based models with different architectures to accomplish the task of targeted sentiment analysis. The results demonstrate that the proposed models outperform the traditional sentiment analysis models for the targeted sentiment analysis task.</abstract>
       <url hash="4ac36fe8">2022.acl-srw.39</url>
@@ -9980,7 +9980,7 @@ in the Case of Unambiguous Gender</title>
       <booktitle>Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics: System Demonstrations</booktitle>
       <editor><first>Valerio</first><last>Basile</last></editor>
       <editor><first>Zornitsa</first><last>Kozareva</last></editor>
-      <editor><first>Sanja</first><last>Stajner</last></editor>
+      <editor id="sanja-stajner"><first>Sanja</first><last>Stajner</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Dublin, Ireland</address>
       <month>May</month>
@@ -10019,7 +10019,7 @@ in the Case of Unambiguous Gender</title>
       <author><first>Leonardo F. R.</first><last>Ribeiro</last></author>
       <author><first>Jonas</first><last>Pfeiffer</last></author>
       <author><first>Nils</first><last>Reimers</last></author>
-      <author><first>Gözde</first><last>Şahin</last></author>
+      <author id="gozde-gul-sahin"><first>Gözde</first><last>Şahin</last></author>
       <author><first>Iryna</first><last>Gurevych</last></author>
       <pages>9-22</pages>
       <abstract>Recent advances in NLP and information retrieval have given rise to a diverse set of question answering tasks that are of different formats (e.g., extractive, abstractive), require different model architectures (e.g., generative, discriminative), and setups (e.g., with or without retrieval). Despite having a large number of powerful, specialized QA pipelines (which we refer to as Skills) that consider a single domain, model or setup, there exists no framework where users can easily explore and compare such pipelines and can extend them according to their needs. To address this issue, we present UKP-SQuARE, an extensible online QA platform for researchers which allows users to query and analyze a large collection of modern Skills via a user-friendly web interface and integrated behavioural tests. In addition, QA researchers can develop, manage, and share their custom Skills using our microservices that support a wide range of models (Transformers, Adapters, ONNX), datastores and retrieval techniques (e.g., sparse and dense). UKP-SQuARE is available on <url>https://square.ukp-lab.de</url></abstract>
@@ -10143,9 +10143,9 @@ in the Case of Unambiguous Gender</title>
       <author><first>Urmish</first><last>Thakker</last></author>
       <author><first>Khalid</first><last>Almubarak</last></author>
       <author><first>Xiangru</first><last>Tang</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <author><first>Mike Tian-jian</first><last>Jiang</last></author>
-      <author><first>Alexander</first><last>Rush</last></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last></author>
       <pages>93-104</pages>
       <abstract>PromptSource is a system for creating, sharing, and using natural language prompts. Prompts are functions that map an example from a dataset to a natural language input and target output. Using prompts to train and query language models is an emerging area in NLP that requires new tools that let users develop and refine these prompts collaboratively. PromptSource addresses the emergent challenges in this new setting with (1) a templating language for defining data-linked prompts, (2) an interface that lets users quickly iterate on prompt development by observing outputs of their prompts on many examples, and (3) a community-driven set of guidelines for contributing new prompts to a common pool. Over 2,000 prompts for roughly 170 datasets are already available in PromptSource. PromptSource is available at <url>https://github.com/bigscience-workshop/promptsource</url>.</abstract>
       <url hash="1e9e1db2">2022.acl-demo.9</url>
@@ -10178,7 +10178,7 @@ in the Case of Unambiguous Gender</title>
       <author><first>Richard</first><last>Shin</last></author>
       <author><first>Subhro</first><last>Roy</last></author>
       <author><first>Aleksandr</first><last>Nisnevich</last></author>
-      <author><first>Charles</first><last>Chen</last></author>
+      <author id="charles-chen-jr"><first>Charles</first><last>Chen</last></author>
       <author><first>Benjamin</first><last>Van Durme</last></author>
       <pages>114-126</pages>
       <abstract>Collecting data for conversational semantic parsing is a time-consuming and demanding process. In this paper we consider, given an incomplete dataset with only a small amount of data, how to build an AI-powered human-in-the-loop process to enable efficient data collection. A guided K-best selection process is proposed, which (i) generates a set of possible valid candidates; (ii) allows users to quickly traverse the set and filter incorrect parses; and (iii) asks users to select the correct parse, with minimal modification when necessary. We investigate how to best support users in efficiently traversing the candidate set and locating the correct parse, in terms of speed and accuracy. In our user study, consisting of five annotators labeling 300 instances each, we find that combining keyword searching, where keywords can be used to query relevant candidates, and keyword suggestion, where representative keywords are automatically generated, enables fast and accurate annotation.</abstract>
@@ -10191,7 +10191,7 @@ in the Case of Unambiguous Gender</title>
       <title>Hard and Soft Evaluation of <fixed-case>NLP</fixed-case> models with <fixed-case>BOO</fixed-case>t<fixed-case>ST</fixed-case>rap <fixed-case>SA</fixed-case>mpling - <fixed-case>B</fixed-case>oo<fixed-case>S</fixed-case>t<fixed-case>S</fixed-case>a</title>
       <author><first>Tommaso</first><last>Fornaciari</last></author>
       <author><first>Alexandra</first><last>Uma</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <author><first>Dirk</first><last>Hovy</last></author>
       <pages>127-134</pages>
       <abstract>Natural Language Processing (NLP) ‘s applied nature makes it necessary to select the most effective and robust models. Producing slightly higher performance is insufficient; we want to know whether this advantage will carry over to other data sets. Bootstrapped significance tests can indicate that ability. So while necessary, computing the significance of models’ performance differences has many levels of complexity. It can be tedious, especially when the experimental design has many conditions to compare and several runs of experiments. We present BooStSa, a tool that makes it easy to compute significance levels with the BOOtSTrap SAmpling procedure to evaluate models that predict not only standard hard labels but soft-labels (i.e., probability distributions over different classes) as well.</abstract>
@@ -10205,7 +10205,7 @@ in the Case of Unambiguous Gender</title>
       <author><first>Revanth</first><last>Gangi Reddy</last></author>
       <author><first>Ziqi</first><last>Wang</last></author>
       <author><first>Yi-shyuan</first><last>Chiang</last></author>
-      <author><first>Tuan</first><last>Lai</last></author>
+      <author id="tuan-lai"><first>Tuan</first><last>Lai</last></author>
       <author><first>Pengfei</first><last>Yu</last></author>
       <author><first>Zixuan</first><last>Zhang</last></author>
       <author><first>Heng</first><last>Ji</last></author>
@@ -10300,7 +10300,7 @@ in the Case of Unambiguous Gender</title>
       <title>Cue-bot: A Conversational Agent for Assistive Technology</title>
       <author><first>Shachi</first><last>H Kumar</last></author>
       <author><first>Hsuan</first><last>Su</last></author>
-      <author><first>Ramesh</first><last>Manuvinakurike</last></author>
+      <author id="ramesh-manuvinakurike"><first>Ramesh</first><last>Manuvinakurike</last></author>
       <author><first>Maximilian C.</first><last>Pinaroc</last></author>
       <author><first>Sai</first><last>Prasad</last></author>
       <author><first>Saurav</first><last>Sahay</last></author>
@@ -10398,7 +10398,7 @@ in the Case of Unambiguous Gender</title>
       <author><first>Daniel</first><last>Loureiro</last></author>
       <author><first>Francesco</first><last>Barbieri</last></author>
       <author><first>Leonardo</first><last>Neves</last></author>
-      <author><first>Luis</first><last>Espinosa Anke</last></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa Anke</last></author>
       <author><first>Jose</first><last>Camacho-collados</last></author>
       <pages>251-260</pages>
       <abstract>Despite its importance, the time variable has been largely neglected in the NLP and language model literature. In this paper, we present TimeLMs, a set of language models specialized on diachronic Twitter data. We show that a continual learning strategy contributes to enhancing Twitter-based language models’ capacity to deal with future and out-of-distribution tweets, while making them competitive with standardized and more monolithic benchmarks. We also perform a number of qualitative analyses showing how they cope with trends and peaks in activity involving specific named entities or concept drift. TimeLMs is available at github.com/cardiffnlp/timelms.</abstract>
@@ -10423,7 +10423,7 @@ in the Case of Unambiguous Gender</title>
       <title><fixed-case>Q</fixed-case>uick<fixed-case>G</fixed-case>raph: A Rapid Annotation Tool for Knowledge Graph Extraction from Technical Text</title>
       <author><first>Tyler</first><last>Bikaun</last></author>
       <author><first>Michael</first><last>Stewart</last></author>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last></author>
+      <author><first>Wei</first><last>Liu</last></author>
       <pages>270-278</pages>
       <abstract>Acquiring high-quality annotated corpora for complex multi-task information extraction (MT-IE) is an arduous and costly process for human-annotators. Adoption of unsupervised techniques for automated annotation have thus become popular. However, these techniques rely heavily on dictionaries, gazetteers, and knowledge bases. While such resources are abundant for general domains, they are scarce for specialised technical domains. To tackle this challenge, we present QuickGraph, the first collaborative MT-IE annotation tool built with indirect weak supervision and clustering to maximise annotator productivity. QuickGraph’s main contribution is a set of novel features that enable knowledge graph extraction through rapid and consistent complex multi-task entity and relation annotation. In this paper, we discuss these key features and qualitatively compare QuickGraph to existing annotation tools.</abstract>
       <url hash="fb186997">2022.acl-demo.27</url>
@@ -10436,7 +10436,7 @@ in the Case of Unambiguous Gender</title>
     <meta>
       <booktitle>Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics: Tutorial Abstracts</booktitle>
       <editor><first>Luciana</first><last>Benotti</last></editor>
-      <editor><first>Naoaki</first><last>Okazaki</last></editor>
+      <editor id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></editor>
       <editor><first>Yves</first><last>Scherrer</last></editor>
       <editor><first>Marcos</first><last>Zampieri</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -10452,7 +10452,7 @@ in the Case of Unambiguous Gender</title>
     </frontmatter>
     <paper id="1">
       <title>A Gentle Introduction to Deep Nets and Opportunities for the Future</title>
-      <author><first>Kenneth</first><last>Church</last></author>
+      <author id="kenneth-church"><first>Kenneth</first><last>Church</last></author>
       <author><first>Valia</first><last>Kordoni</last></author>
       <author><first>Gary</first><last>Marcus</last></author>
       <author><first>Ernest</first><last>Davis</last></author>
@@ -10489,7 +10489,7 @@ in the Case of Unambiguous Gender</title>
       <author><first>Chenguang</first><last>Zhu</last></author>
       <author><first>Yichong</first><last>Xu</last></author>
       <author><first>Xiang</first><last>Ren</last></author>
-      <author><first>Bill Yuchen</first><last>Lin</last></author>
+      <author id="bill-yuchen-lin"><first>Bill Yuchen</first><last>Lin</last></author>
       <author><first>Meng</first><last>Jiang</last></author>
       <author><first>Wenhao</first><last>Yu</last></author>
       <pages>12-20</pages>
@@ -10517,7 +10517,7 @@ in the Case of Unambiguous Gender</title>
     <paper id="5">
       <title>Learning with Limited Text Data</title>
       <author><first>Diyi</first><last>Yang</last></author>
-      <author><first>Ankur</first><last>Parikh</last></author>
+      <author id="ankur-parikh"><first>Ankur</first><last>Parikh</last></author>
       <author><first>Colin</first><last>Raffel</last></author>
       <pages>28-31</pages>
       <abstract>Natural Language Processing (NLP) has achieved great progress in the past decade on the basis of neural models, which often make use of large amounts of labeled data to achieve state-of-the-art performance. The dependence on labeled data prevents NLP models from being applied to low-resource settings and languages because of the time, money, and expertise that is often required to label massive amounts of textual data. Consequently, the ability to learn with limited labeled data is crucial for deploying neural systems to real-world NLP applications. Recently, numerous approaches have been explored to alleviate the need for labeled data in NLP such as data augmentation and semi-supervised learning. This tutorial aims to provide a systematic and up-to-date overview of these methods in order to help researchers and practitioners understand the landscape of approaches and the challenges associated with learning from limited labeled data, an emerging topic in the computational linguistics community. We will consider applications to a wide variety of NLP tasks (including text classification, generation, and structured prediction) and will highlight current challenges and future directions.</abstract>
diff --git a/data/xml/2022.alta.xml b/data/xml/2022.alta.xml
index 34f96f2750..9e77f8b48b 100644
--- a/data/xml/2022.alta.xml
+++ b/data/xml/2022.alta.xml
@@ -26,7 +26,7 @@
     </paper>
     <paper id="2">
       <title>Using public domain resources and off-the-shelf tools to produce high-quality multimedia texts</title>
-      <author><first>Manny</first><last>Rayner</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
       <author><first>Belinda</first><last>Chiera</last></author>
       <author><first>Cathy</first><last>Chua</last></author>
       <pages>6–15</pages>
@@ -48,7 +48,7 @@
       <author><first>Fatemeh</first><last>Shiri</last></author>
       <author><first>Tongtong</first><last>Wu</last></author>
       <author><first>Yuanfang</first><last>Li</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>22–30</pages>
       <url hash="a650b4f6">2022.alta-1.4</url>
       <bibkey>shiri-etal-2022-tcg</bibkey>
@@ -57,7 +57,7 @@
       <title>Complex Reading Comprehension Through Question Decomposition</title>
       <author><first>Xiao-Yu</first><last>Guo</last></author>
       <author><first>Yuan-Fang</first><last>Li</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>31–40</pages>
       <url hash="272ff661">2022.alta-1.5</url>
       <bibkey>guo-etal-2022-complex</bibkey>
@@ -84,7 +84,7 @@
       <title>Robustness of Hybrid Models in Cross-domain Readability Assessment</title>
       <author><first>Ho Hung</first><last>Lim</last></author>
       <author><first>Tianyuan</first><last>Cai</last></author>
-      <author><first>John S. Y.</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John S. Y.</first><last>Lee</last></author>
       <author><first>Meichun</first><last>Liu</last></author>
       <pages>62–67</pages>
       <url hash="2baaa8f6">2022.alta-1.8</url>
@@ -102,7 +102,7 @@
       <author><first>Jinghui</first><last>Liu</last></author>
       <author><first>Daniel</first><last>Capurro</last></author>
       <author><first>Anthony</first><last>Nguyen</last></author>
-      <author><first>Karin</first><last>Verspoor</last></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last></author>
       <pages>73–83</pages>
       <url hash="e657ba85">2022.alta-1.10</url>
       <bibkey>liu-etal-2022-improving</bibkey>
@@ -127,7 +127,7 @@
     </paper>
     <paper id="13">
       <title>Stability of Forensic Text Comparison System</title>
-      <author><first>Susan</first><last>Brown</last></author>
+      <author id="susan-windisch-brown"><first>Susan</first><last>Brown</last></author>
       <author><first>Shunichi</first><last>Ishihara</last></author>
       <pages>98–106</pages>
       <url hash="14e23af6">2022.alta-1.13</url>
@@ -154,7 +154,7 @@
       <title>Automatic Explanation Generation For Climate Science Claims</title>
       <author><first>Rui</first><last>Xing</last></author>
       <author><first>Shraey</first><last>Bhatia</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Jey Han</first><last>Lau</last></author>
       <pages>122–129</pages>
       <abstract>Climate change is an existential threat to humanity, the proliferation of unsubstantiated claims relating to climate science is manipulating public perception, motivating the need for fact-checking in climate science. In this work, we draw on recent work that uses retrieval-augmented generation for veracity prediction and explanation generation, in framing explanation generation as a query-focused multi-document summarization task. We adapt PRIMERA to the climate science domain by adding additional global attention on claims. Through automatic evaluation and qualitative analysis, we demonstrate that our method is effective at generating explanations.</abstract>
@@ -173,7 +173,7 @@
     <paper id="18">
       <title>Evaluating the Examiner: The Perils of <fixed-case>P</fixed-case>earson Correlation for Validating Text Similarity Metrics</title>
       <author><first>Gisela</first><last>Vallejo</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Lea</first><last>Frermann</last></author>
       <pages>130–138</pages>
       <abstract>In recent years, researchers have developed question-answering based approaches to automatically evaluate system summaries, reporting improved validity compared to word overlap-based metrics like ROUGE, in terms of correlation with human ratings of criteria including fluency and hallucination. In this paper, we take a closer look at one particular metric, QuestEval, and ask whether: (1) it can serve as a more general metric for long document similarity assessment; and (2) a single correlation score between metric scores and human ratings, as the currently standard approach, is sufficient for metric validation. We find that correlation scores can be misleading, and that score distributions and outliers should be taken into account. With these caveats in mind, QuestEval can be a promising candidate for long document similarity assessment.</abstract>
@@ -235,7 +235,7 @@
     </paper>
     <paper id="24">
       <title>Overview of the 2022 <fixed-case>ALTA</fixed-case> Shared task: <fixed-case>PIBOSO</fixed-case> sentence classification, 10 years later</title>
-      <author><first>Diego</first><last>Mollá</last></author>
+      <author id="diego-molla"><first>Diego</first><last>Mollá</last></author>
       <pages>178–182</pages>
       <abstract>The 2022 ALTA shared task has been running annually since 2010. This year, the shared task is a re-visit of the 2012 ALTA shared task. The purpose of this task is to classify sentences of medical publications using the PIBOSO taxonomy. This is a multi-label classification task which can help medical researchers and practitioners conduct Evidence Based Medicine (EBM). In this paper we present the task, the evaluation criteria, and the results of the systems participating in the shared task.</abstract>
       <url hash="06051006">2022.alta-1.24</url>
diff --git a/data/xml/2022.amta.xml b/data/xml/2022.amta.xml
index 09f27f45f5..98023a207c 100644
--- a/data/xml/2022.amta.xml
+++ b/data/xml/2022.amta.xml
@@ -8,7 +8,7 @@
       <month>September</month>
       <year>2022</year>
       <editor><first>Kevin</first><last>Duh</last></editor>
-      <editor><first>Francisco</first><last>Guzmán</last></editor>
+      <editor id="francisco-guzman"><first>Francisco</first><last>Guzmán</last></editor>
       <url hash="d9ed47cc">2022.amta-research</url>
       <venue>amta</venue>
     </meta>
@@ -21,7 +21,7 @@
       <author><first>Pintu</first><last>Lohar</last></author>
       <author><first>Sinead</first><last>Madden</last></author>
       <author><first>Edmond</first><last>O’Connor</last></author>
-      <author><first>Maja</first><last>Popovic</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popovic</last></author>
       <author><first>Tanya</first><last>Habruseva</last></author>
       <pages>1-13</pages>
       <url hash="7b97ae54">2022.amta-research.1</url>
@@ -32,7 +32,7 @@
       <title>Domain-Specific Text Generation for Machine Translation</title>
       <author><first>Yasmin</first><last>Moslem</last></author>
       <author><first>Rejwanul</first><last>Haque</last></author>
-      <author><first>John</first><last>Kelleher</last></author>
+      <author id="john-kelleher"><first>John</first><last>Kelleher</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <pages>14-30</pages>
       <url hash="f1da3e1d">2022.amta-research.2</url>
@@ -64,7 +64,7 @@
       <author><first>Mohamed</first><last>Afify</last></author>
       <author><first>Young Jin</first><last>Kim</last></author>
       <author><first>Hitokazu</first><last>Matsushita</last></author>
-      <author><first>Hany</first><last>Hassan</last></author>
+      <author id="hany-hassan-awadalla"><first>Hany</first><last>Hassan</last></author>
       <pages>58-69</pages>
       <url hash="40cc328d">2022.amta-research.5</url>
       <abstract>Multilingual Neural Machine Translation has been showing great success using transformer models. Deploying these models is challenging because they usually require large vocabulary (vocab) sizes for various languages. This limits the speed of predicting the output tokens in the last vocab projection layer. To alleviate these challenges, this paper proposes a fast vocabulary projection method via clustering which can be used for multilingual transformers on GPUs. First, we offline split the vocab search space into disjoint clusters given the hidden context vector of the decoder output, which results in much smaller vocab columns for vocab projection. Second, at inference time, the proposed method predicts the clusters and candidate active tokens for hidden context vectors at the vocab projection. This paper also includes analysis of different ways of building these clusters in multilingual settings. Our results show end-to-end speed gains in float16 GPU inference up to 25% while maintaining the BLEU score and slightly increasing memory cost. The proposed method speeds up the vocab projection step itself by up to 2.6x. We also conduct an extensive human evaluation to verify the proposed method preserves the quality of the translations from the original model.</abstract>
@@ -72,11 +72,11 @@
     </paper>
     <paper id="6">
       <title>Language Tokens: A Frustratingly Simple Approach Improves Zero-Shot Performance of Multilingual Translation</title>
-      <author><first>Muhammad</first><last>ElNokrashy</last></author>
+      <author id="muhammad-elnokrashy"><first>Muhammad</first><last>ElNokrashy</last></author>
       <author><first>Amr</first><last>Hendy</last></author>
       <author><first>Mohamed</first><last>Maher</last></author>
       <author><first>Mohamed</first><last>Afify</last></author>
-      <author><first>Hany</first><last>Hassan Awadalla</last></author>
+      <author id="hany-hassan-awadalla"><first>Hany</first><last>Hassan Awadalla</last></author>
       <pages>70-82</pages>
       <url hash="c4e3b27e">2022.amta-research.6</url>
       <abstract>This paper proposes a simple yet effective method to improve direct (X-to-Y) translation for both cases: zero-shot and when direct data is available. We modify the input tokens at both the encoder and decoder to include signals for the source and target languages. We show a performance gain when training from scratch, or finetuning a pretrained model with the pro- posed setup. In the experiments, our method shows nearly 10.0 BLEU points gain on in-house datasets depending on the checkpoint selection criteria. In a WMT evaluation campaign, From- English performance improves by 4.17 and 2.87 BLEU points, in the zero-shot setting, and when direct data is available for training, respectively. While X-to-Y improves by 1.29 BLEU over the zero-shot baseline, and 0.44 over the many-to-many baseline. In the low-resource setting, we see a 1.5 ∼ 1.7 point improvement when finetuning on X-to-Y domain data.</abstract>
@@ -122,7 +122,7 @@
     <paper id="10">
       <title>On the Effectiveness of Quasi Character-Level Models for Machine Translation</title>
       <author><first>Salvador</first><last>Carrión</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <pages>131-143</pages>
       <url hash="b53f65d5">2022.amta-research.10</url>
       <abstract>Neural Machine Translation (NMT) models often use subword-level vocabularies to deal with rare or unknown words. Although some studies have shown the effectiveness of purely character-based models, these approaches have resulted in highly expensive models in computational terms. In this work, we explore the benefits of quasi-character-level models for very low-resource languages and their ability to mitigate the effects of the catastrophic forgetting problem. First, we conduct an empirical study on the efficacy of these models, as a function of the vocabulary and training set size, for a range of languages, domains, and architectures. Next, we study the ability of these models to mitigate the effects of catastrophic forgetting in machine translation. Our work suggests that quasi-character-level models have practically the same generalization capabilities as character-based models but at lower computational costs. Furthermore, they appear to help achieve greater consistency between domains than standard subword-level models, although the catastrophic forgetting problem is not mitigated.</abstract>
@@ -153,7 +153,7 @@
     <paper id="13">
       <title>Limitations and Challenges of Unsupervised Cross-lingual Pre-training</title>
       <author><first>Martín</first><last>Quesada Zaragoza</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <pages>175-187</pages>
       <url hash="b17e42b1">2022.amta-research.13</url>
       <abstract>Cross-lingual alignment methods for monolingual language representations have received notable attention in recent years. However, their use in machine translation pre-training remains scarce. This work tries to shed light on the effects of some of the factors that play a role in cross-lingual pre-training, both for cross-lingual mappings and their integration in supervised neural models. The results show that unsupervised cross-lingual methods are effective at inducing alignment even for distant languages and they benefit noticeably from subword information. However, we find that their effectiveness as pre-training models in machine translation is severely limited due to their cross-lingual signal being easily distorted by the principal network during training. Moreover, the learned bilingual projection is too restrictive to allow said network to learn properly when the embedding weights are frozen.</abstract>
@@ -162,7 +162,7 @@
     <paper id="14">
       <title>Few-Shot Regularization to Tackle Catastrophic Forgetting in Multilingual Machine Translation</title>
       <author><first>Salvador</first><last>Carrión</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <pages>188-199</pages>
       <url hash="fd8d6bcf">2022.amta-research.14</url>
       <abstract>Increasing the number of tasks supported by a machine learning model without forgetting previously learned tasks is the goal of any lifelong learning system. In this work, we study how to mitigate the effects of the catastrophic forgetting problem to sequentially train a multilingual neural machine translation model using minimal past information. First, we describe the catastrophic forgetting phenomenon as a function of the number of tasks learned (language pairs) and the ratios of past data used during the learning of the new task. Next, we explore the importance of applying oversampling strategies for scenarios where only minimal amounts of past data are available. Finally, we derive a new loss function that minimizes the forgetting of previously learned tasks by actively re-weighting past samples and penalizing weights that deviate too much from the original model. Our work suggests that by using minimal amounts of past data and a simple regularization function, we can significantly mitigate the effects of the catastrophic forgetting phenomenon without increasing the computational costs.</abstract>
@@ -188,9 +188,9 @@
       <title>Refining an Almost Clean Translation Memory Helps Machine Translation</title>
       <author><first>Shivendra</first><last>Bhardwa</last></author>
       <author><first>David</first><last>Alfonso-Hermelo</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <author><first>Gabriel</first><last>Bernier-Colborne</last></author>
-      <author><first>Cyril</first><last>Goutte</last></author>
+      <author id="cyril-goutte"><first>Cyril</first><last>Goutte</last></author>
       <author><first>Michel</first><last>Simard</last></author>
       <pages>215-226</pages>
       <url hash="38116128">2022.amta-research.16</url>
@@ -200,8 +200,8 @@
     <paper id="17">
       <title>Practical Attacks on Machine Translation using Paraphrase</title>
       <author><first>Elizabeth M</first><last>Merkhofer</last></author>
-      <author><first>John</first><last>Henderson</last></author>
-      <author><first>Abigail</first><last>Gertner</last></author>
+      <author id="john-henderson"><first>John</first><last>Henderson</last></author>
+      <author id="abigail-s-gertner"><first>Abigail</first><last>Gertner</last></author>
       <author><first>Michael</first><last>Doyle</last></author>
       <author><first>Lily</first><last>Wong</last></author>
       <pages>227-239</pages>
@@ -222,7 +222,7 @@
     <paper id="19">
       <title>A Neural Machine Translation Approach to Translate Text to Pictographs in a Medical Speech Translation System - The <fixed-case>B</fixed-case>abel<fixed-case>D</fixed-case>r Use Case</title>
       <author><first>Jonathan</first><last>Mutal</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Magali</first><last>Norré</last></author>
       <author><first>Johanna</first><last>Gerlach</last></author>
       <author><first>Lucia Ormaechea</first><last>Grijalba</last></author>
@@ -282,7 +282,7 @@
       <author><first>Cynthia</first><last>Gao</last></author>
       <author><first>Janice</first><last>Lam</last></author>
       <author><first>Francisco</first><last>Guzman</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <author><first>Philipp</first><last>Koehn</last></author>
       <pages>309-321</pages>
       <url hash="bd5843c5">2022.amta-research.24</url>
@@ -400,7 +400,7 @@
       <title>A Multimodal Simultaneous Interpretation Prototype: Who Said What</title>
       <author><first>Xiaolin</first><last>Wang</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>132-143</pages>
       <url hash="3054b6c6">2022.amta-upg.10</url>
       <abstract>“Who said what” is essential for users to understand video streams that have more than one speaker, but conventional simultaneous interpretation systems merely present “what was said” in the form of subtitles. Because the translations unavoidably have delays and errors, users often find it difficult to trace the subtitles back to speakers. To address this problem, we propose a multimodal SI system that presents users “who said what”. Our system takes audio-visual approaches to recognize the speaker of each sentence, and then annotates its translation with the textual tag and face icon of the speaker, so that users can quickly understand the scenario. Furthermore, our system is capable of interpreting video streams in real-time on a single desktop equipped with two Quadro RTX 4000 GPUs owing to an efficient sentence-based architecture.</abstract>
@@ -473,7 +473,7 @@
       <author><first>Craig A</first><last>Stewart</last></author>
       <author><first>Madalena</first><last>Gonçalves</last></author>
       <author><first>Marianna</first><last>Buchicchio</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <pages>231-256</pages>
       <abstract>Frameworks such as Multidimensional Quality Metrics (MQM) provide detailed feedback on translation quality and can pinpoint concrete linguistic errors. The quality of a translation is, however, also closely tied to its utility in a particular use case. Many customers have highly subjective expectations of translation quality. Features such as register, discourse style and brand consistency can be difficult to accommodate given a broadly applied translation solution. In this presentation we will introduce the concept of Business Critical Errors (BCE). Adapted from MQM, the BCE framework provides a perspective on translation quality that allows us to be reactive and adaptive to expectation whilst also maintaining consistency in our translation evaluation. We will demonstrate tooling used at Unbabel that allows us to evaluate the performance of our MT models on BCE using specialized test suites as well as the ability of our AI evaluation models to successfully capture BCE information.</abstract>
       <attachment type="presentation" hash="03ed7c9b">2022.amta-upg.17.Presentation.pdf</attachment>
@@ -500,7 +500,7 @@
     <paper id="20">
       <title>Boosting Neural Machine Translation with Similar Translations</title>
       <author><first>Jitao</first><last>Xu</last></author>
-      <author><first>Josep</first><last>Crego</last></author>
+      <author id="josep-m-crego"><first>Josep</first><last>Crego</last></author>
       <author><first>Jean</first><last>Senellart</last></author>
       <pages>282-292</pages>
       <url hash="dc18e1a3">2022.amta-upg.20</url>
@@ -637,7 +637,7 @@
       <title>Robust Translation of <fixed-case>F</fixed-case>rench Live Speech Transcripts</title>
       <author><first>Elise</first><last>Bertin-Lemée</last></author>
       <author><first>Guillaume</first><last>Klein</last></author>
-      <author><first>Josep</first><last>Crego</last></author>
+      <author id="josep-m-crego"><first>Josep</first><last>Crego</last></author>
       <author><first>Jean</first><last>Senellart</last></author>
       <pages>455-464</pages>
       <url hash="181d7d5f">2022.amta-upg.32</url>
@@ -646,9 +646,9 @@
     </paper>
     <paper id="33">
       <title>Speech-to-Text and Evaluation of Multiple Machine Translation Systems</title>
-      <author><first>Evelyne</first><last>Tzoukermann</last></author>
+      <author id="evelyne-tzoukermann"><first>Evelyne</first><last>Tzoukermann</last></author>
       <author><first>Steven</first><last>Van Guilder</last></author>
-      <author><first>Jennifer</first><last>Doyon</last></author>
+      <author id="jennifer-doyon"><first>Jennifer</first><last>Doyon</last></author>
       <author><first>Ekaterina</first><last>Harke</last></author>
       <pages>465-472</pages>
       <url hash="3ad931b4">2022.amta-upg.33</url>
@@ -660,15 +660,14 @@
     <meta>
       <booktitle>Proceedings of the 15th biennial conference of the Association for Machine Translation in the Americas (Workshop 2: Corpus Generation and Corpus Augmentation for Machine Translation)</booktitle>
       <publisher>Association for Machine Translation in the Americas</publisher>
-      <address/>
       <month>September</month>
       <year>2022</year>
       <editor><first>John E.</first><last>Ortega</last></editor>
       <editor><first>Marine</first><last>Carpuat</last></editor>
       <editor><first>William</first><last>Chen</last></editor>
-      <editor><first>Katharina</first><last>Kann</last></editor>
+      <editor id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></editor>
       <editor><first>Constantine</first><last>Lignos</last></editor>
-      <editor><first>Maja</first><last>Popovic</last></editor>
+      <editor id="maja-popovic"><first>Maja</first><last>Popovic</last></editor>
       <editor><first>Shabnam</first><last>Tafreshi</last></editor>
       <url hash="440fe20d">2022.amta-coco4mt</url>
       <venue>amta</venue>
@@ -703,7 +702,7 @@
     <paper id="3">
       <title>Building and Analysis of <fixed-case>T</fixed-case>amil Lyric Corpus with Semantic Representation</title>
       <author><first>Karthika</first><last>Ranganathan</last></author>
-      <author><first>Geetha</first><last>T V</last></author>
+      <author id="t-v-geetha"><first>Geetha</first><last>T V</last></author>
       <pages>18-27</pages>
       <url hash="b395a5a2">2022.amta-coco4mt.3</url>
       <abstract>In the new era of modern technology, the cloud has become the library for many things including entertainment, i.e, the availability of lyrics. In order to create awareness about the language and to increase the interest in Tamil film lyrics, a computerized electronic format of Tamil lyrics corpus is necessary for mining the lyric documents. In this paper, the Tamil lyric corpus was collected from various books and lyric websites. Here, we also address the challenges faced while building this corpus. A corpus was created with 15286 documents and stored all the lyric information obtained in the XML format. In this paper, we also explained the Universal Networking Language (UNL) semantic representation that helps to represent the document in a language and domain independent ways. We evaluated this corpus by performing simple statistical analysis for characters, words and a few rhetorical effect analysis. We also evaluated our semantic representation with the existing work and the results are very encouraging.</abstract>
diff --git a/data/xml/2022.argmining.xml b/data/xml/2022.argmining.xml
index 7969d577b4..66791b0876 100644
--- a/data/xml/2022.argmining.xml
+++ b/data/xml/2022.argmining.xml
@@ -23,7 +23,7 @@
       <author><first>Zhexiong</first><last>Liu</last></author>
       <author><first>Meiqi</first><last>Guo</last></author>
       <author><first>Yue</first><last>Dai</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <pages>1–18</pages>
       <abstract>The growing interest in developing corpora of persuasive texts has promoted applications in automated systems, e.g., debating and essay scoring systems; however, there is little prior work mining image persuasiveness from an argumentative perspective. To expand persuasiveness mining into a multi-modal realm, we present a multi-modal dataset, ImageArg, consisting of annotations of image persuasiveness in tweets. The annotations are based on a persuasion taxonomy we developed to explore image functionalities and the means of persuasion. We benchmark image persuasiveness tasks on ImageArg using widely-used multi-modal learning methods. The experimental results show that our dataset offers a useful resource for this rich and challenging topic, and there is ample room for modeling improvement.</abstract>
       <url hash="cd65473c">2022.argmining-1.1</url>
@@ -35,7 +35,7 @@
       <author><first>Moritz</first><last>Plenz</last></author>
       <author><first>Juri</first><last>Opitz</last></author>
       <author><first>Anette</first><last>Frank</last></author>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <pages>19–33</pages>
       <abstract>We address the problem of automatically predicting the quality of a conclusion given a set of (textual) premises of an argument, focusing in particular on the task of predicting the validity and novelty of the argumentative conclusion. We propose a multi-task approach that jointly predicts the validity and novelty of the textual conclusion, relying on pre-trained language models fine-tuned on the task. As training data for this task is scarce and costly to obtain, we experimentally investigate the impact of data augmentation approaches for improving the accuracy of prediction compared to a baseline that relies on task-specific data only. We consider the generation of synthetic data as well as the integration of datasets from related argument tasks. We show that especially our synthetic data, combined with class-balancing and instance-specific learning rates, substantially improves classification results (+15.1 points in <tex-math>F_1</tex-math>-score). Using only training data retrieved from related datasets by automatically labeling them for validity and novelty, combined with synthetic data, outperforms the baseline by 11.5 points in <tex-math>F_1</tex-math>-score.</abstract>
       <url hash="c53c9a18">2022.argmining-1.2</url>
@@ -55,7 +55,7 @@
     <paper id="4">
       <title>Analyzing Culture-Specific Argument Structures in Learner Essays</title>
       <author><first>Wei-Fan</first><last>Chen</last></author>
-      <author><first>Mei-Hua</first><last>Chen</last></author>
+      <author id="mei-hua-chen"><first>Mei-Hua</first><last>Chen</last></author>
       <author><first>Garima</first><last>Mudgal</last></author>
       <author><first>Henning</first><last>Wachsmuth</last></author>
       <pages>51–61</pages>
@@ -76,7 +76,7 @@
     <paper id="6">
       <title>A Unified Representation and a Decoupled Deep Learning Architecture for Argumentation Mining of Students’ Persuasive Essays</title>
       <author><first>Muhammad Tawsif</first><last>Sazid</last></author>
-      <author><first>Robert E.</first><last>Mercer</last></author>
+      <author id="robert-e-mercer"><first>Robert E.</first><last>Mercer</last></author>
       <pages>74–83</pages>
       <abstract>We develop a novel unified representation for the argumentation mining task facilitating the extracting from text and the labelling of the non-argumentative units and argumentation components—premises, claims, and major claims—and the argumentative relations—premise to claim or premise in a support or attack relation, and claim to major-claim in a for or against relation—in an end-to-end machine learning pipeline. This tightly integrated representation combines the component and relation identification sub-problems and enables a unitary solution for detecting argumentation structures. This new representation together with a new deep learning architecture composed of a mixed embedding method, a multi-head attention layer, two biLSTM layers, and a final linear layer obtain state-of-the-art accuracy on the Persuasive Essays dataset. Also, we have introduced a decoupled solution to identify the entities and relations first, and on top of that, a second model is used to detect distance between the detected related components. An augmentation of the corpus (paragraph version) by including copies of major claims has further increased the performance.</abstract>
       <url hash="0bd37971">2022.argmining-1.6</url>
@@ -88,7 +88,7 @@
       <author><first>Anette</first><last>Frank</last></author>
       <author><first>Juri</first><last>Opitz</last></author>
       <author><first>Moritz</first><last>Plenz</last></author>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <pages>84–94</pages>
       <abstract>This paper provides an overview of the Argument Validity and Novelty Prediction Shared Task that was organized as part of the 9th Workshop on Argument Mining (ArgMining 2022). The task focused on the prediction of the validity and novelty of a conclusion given a textual premise. Validity is defined as the degree to which the conclusion is justified with respect to the given premise. Novelty defines the degree to which the conclusion contains content that is new in relation to the premise. Six groups participated in the task, submitting overall 13 system runs for the subtask of binary classification and 2 system runs for the subtask of relative classification. The results reveal that the task is challenging, with best results obtained for Validity prediction in the range of 75% F1 score, for Novelty prediction of 70% F1 score and for correctly predicting both Validity and Novelty of 45% F1 score. In this paper we summarize the task definition and dataset. We give an overview of the results obtained by the participating systems, as well as insights to be gained from the diverse contributions.</abstract>
       <url hash="c4e1e111">2022.argmining-1.7</url>
@@ -149,7 +149,7 @@
     <paper id="13">
       <title>Predicting the Presence of Reasoning Markers in Argumentative Text</title>
       <author><first>Jonathan</first><last>Clayton</last></author>
-      <author><first>Rob</first><last>Gaizauskas</last></author>
+      <author id="robert-gaizauskas"><first>Rob</first><last>Gaizauskas</last></author>
       <pages>137–142</pages>
       <abstract>This paper proposes a novel task in Argument Mining, which we will refer to as Reasoning Marker Prediction. We reuse the popular Persuasive Essays Corpus (Stab and Gurevych, 2014). Instead of using this corpus for Argument Structure Parsing, we use a simple heuristic method to identify text spans which we can identify as reasoning markers. We propose baseline methods for predicting the presence of these reasoning markers automatically, and make a script to generate the data for the task publicly available.</abstract>
       <url hash="8fb414d1">2022.argmining-1.13</url>
@@ -205,7 +205,7 @@
     </paper>
     <paper id="18">
       <title>Entity-based Claim Representation Improves Fact-Checking of Medical Content in Tweets</title>
-      <author><first>Amelie</first><last>Wührl</last></author>
+      <author id="amelie-wuhrl"><first>Amelie</first><last>Wührl</last></author>
       <author><first>Roman</first><last>Klinger</last></author>
       <pages>187–198</pages>
       <abstract>False medical information on social media poses harm to people’s health. While the need for biomedical fact-checking has been recognized in recent years, user-generated medical content has received comparably little attention. At the same time, models for other text genres might not be reusable, because the claims they have been trained with are substantially different. For instance, claims in the SciFact dataset are short and focused: “Side effects associated with antidepressants increases risk of stroke”. In contrast, social media holds naturally-occurring claims, often embedded in additional context: "‘If you take antidepressants like SSRIs, you could be at risk of a condition called serotonin syndrome’ Serotonin syndrome nearly killed me in 2010. Had symptoms of stroke and seizure.” This showcases the mismatch between real-world medical claims and the input that existing fact-checking systems expect. To make user-generated content checkable by existing models, we propose to reformulate the social-media input in such a way that the resulting claim mimics the claim characteristics in established datasets. To accomplish this, our method condenses the claim with the help of relational entity information and either compiles the claim out of an entity-relation-entity triple or extracts the shortest phrase that contains these elements. We show that the reformulated input improves the performance of various fact-checking models as opposed to checking the tweet text in its entirety.</abstract>
diff --git a/data/xml/2022.autosimtrans.xml b/data/xml/2022.autosimtrans.xml
index d4e217f71a..404d62ec93 100644
--- a/data/xml/2022.autosimtrans.xml
+++ b/data/xml/2022.autosimtrans.xml
@@ -37,7 +37,7 @@
       <title>Over-Generation Cannot Be Rewarded: Length-Adaptive Average Lagging for Simultaneous Speech Translation</title>
       <author><first>Sara</first><last>Papi</last></author>
       <author><first>Marco</first><last>Gaido</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <pages>12-17</pages>
       <abstract>Simultaneous speech translation (SimulST) systems aim at generating their output with the lowest possible latency, which is normally computed in terms of Average Lagging (AL). In this paper we highlight that, despite its widespread adoption, AL provides underestimated scores for systems that generate longer predictions compared to the corresponding references. We also show that this problem has practical relevance, as recent SimulST systems have indeed a tendency to over-generate. As a solution, we propose LAAL (Length-Adaptive Average Lagging), a modified version of the metric that takes into account the over-generation phenomenon and allows for unbiased evaluation of both under-/over-generating systems.</abstract>
diff --git a/data/xml/2022.bea.xml b/data/xml/2022.bea.xml
index fe3621b1f2..54429c05dd 100644
--- a/data/xml/2022.bea.xml
+++ b/data/xml/2022.bea.xml
@@ -144,7 +144,7 @@
       <author><first>Bowei</first><last>Zou</last></author>
       <author><first>Pengfei</first><last>Li</last></author>
       <author><first>Liangming</first><last>Pan</last></author>
-      <author><first>Ai Ti</first><last>Aw</last></author>
+      <author id="aiti-aw"><first>Ai Ti</first><last>Aw</last></author>
       <pages>61-70</pages>
       <abstract>In field of teaching, true/false questioning is an important educational method for assessing students’ general understanding of learning materials. Manually creating such questions requires extensive human effort and expert knowledge. Question Generation (QG) technique offers the possibility to automatically generate a large number of questions. However, there is limited work on automatic true/false question generation due to the lack of training data and difficulty finding question-worthy content. In this paper, we propose an unsupervised True/False Question Generation approach (TF-QG) that automatically generates true/false questions from a given passage for reading comprehension test. TF-QG consists of a template-based framework that aims to test the specific knowledge in the passage by leveraging various NLP techniques, and a generative framework to generate more flexible and complicated questions by using a novel masking-and-infilling strategy. Human evaluation shows that our approach can generate high-quality and valuable true/false questions. In addition, simulated testing on the generated questions challenges the state-of-the-art inference models from NLI, QA, and fact verification tasks.</abstract>
       <url hash="d61a76b9">2022.bea-1.10</url>
@@ -157,7 +157,7 @@
       <author><first>Abhijit</first><last>Suresh</last></author>
       <author><first>Jennifer</first><last>Jacobs</last></author>
       <author><first>Margaret</first><last>Perkoff</last></author>
-      <author><first>James H.</first><last>Martin</last></author>
+      <author id="james-h-martin"><first>James H.</first><last>Martin</last></author>
       <author><first>Tamara</first><last>Sumner</last></author>
       <pages>71-81</pages>
       <abstract>“Talk moves” are specific discursive strategies used by teachers and students to facilitate conversations in which students share their thinking, and actively consider the ideas of others, and engage in rich discussions. Experts in instructional practices often rely on cues to identify and document these strategies, for example by annotating classroom transcripts. Prior efforts to develop automated systems to classify teacher talk moves using transformers achieved a performance of 76.32% F1. In this paper, we investigate the feasibility of using enriched contextual cues to improve model performance. We applied state-of-the-art deep learning approaches for Natural Language Processing (NLP), including Robustly optimized bidirectional encoder representations from transformers (Roberta) with a special input representation that supports previous and subsequent utterances as context for talk moves classification. We worked with the publically available TalkMoves dataset, which contains utterances sourced from real-world classroom sessions (human- transcribed and annotated). Through a series of experimentations, we found that a combination of previous and subsequent utterances improved the transformers’ ability to differentiate talk moves (by 2.6% F1). These results constitute a new state of the art over previously published results and provide actionable insights to those in the broader NLP community who are working to develop similar transformer-based classification models.</abstract>
@@ -168,7 +168,7 @@
     <paper id="12">
       <title>Cross-corpora experiments of automatic proficiency assessment and error detection for spoken <fixed-case>E</fixed-case>nglish</title>
       <author><first>Stefano</first><last>Bannò</last></author>
-      <author><first>Marco</first><last>Matassoni</last></author>
+      <author id="marco-matassoni"><first>Marco</first><last>Matassoni</last></author>
       <pages>82-91</pages>
       <abstract>The growing demand for learning English as a second language has led to an increasing interest in automatic approaches for assessing spoken language proficiency. One of the most significant challenges in this field is the lack of publicly available annotated spoken data. Another common issue is the lack of consistency and coherence in human assessment. To tackle both problems, in this paper we address the task of automatically predicting the scores of spoken test responses of English-as-a-second-language learners by training neural models on written data and using the presence of grammatical errors as a feature, as they can be considered consistent indicators of proficiency through their distribution and frequency. Specifically, we train a feature extractor on EFCAMDAT, a large written corpus containing error annotations and proficiency levels assigned by human experts, in order to extract information related to grammatical errors and, in turn, we use the resulting model for inference on the CLC-FCE corpus, on the ICNALE corpus, and on the spoken section of the TLT-school corpus, a collection of proficiency tests taken by Italian students. The work investigates the impact of the feature extractor on spoken proficiency assessment as well as the written-to-spoken approach. We find that our error-based approach can be beneficial for assessing spoken proficiency. The results obtained on the considered datasets are discussed and evaluated with appropriate metrics.</abstract>
       <url hash="1ac3c172">2022.bea-1.12</url>
@@ -250,8 +250,8 @@
     </paper>
     <paper id="19">
       <title>Assessing sentence readability for <fixed-case>G</fixed-case>erman language learners with broad linguistic modeling or readability formulas: When do linguistic insights make a difference?</title>
-      <author><first>Zarah</first><last>Weiss</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="zarah-weiss"><first>Zarah</first><last>Weiss</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <pages>141-153</pages>
       <abstract>We present a new state-of-the-art sentence-wise readability assessment model for German L2 readers. We build a linguistically broadly informed machine learning model and compare its performance against four commonly used readability formulas. To understand when the linguistic insights used to inform our model make a difference for readability assessment and when simple readability formulas suffice, we compare their performance based on two common automatic readability assessment tasks: predictive regression and sentence pair ranking. We find that leveraging linguistic insights yields top performances across tasks, but that for the identification of simplified sentences also readability formulas – which are easier to compute and more accessible – can be sufficiently precise. Linguistically informed modeling, however, is the only viable option for high quality outcomes in fine-grained prediction tasks. We then explore the sentence-wise readability profile of leveled texts written for language learners at a beginning, intermediate, and advanced level of German to showcase the valuable insights that sentence-wise readability assessment can have for the adaptation of learning materials and better understand how sentences’ individual readability contributes to larger texts’ overall readability.</abstract>
       <url hash="01ada88b">2022.bea-1.19</url>
@@ -261,7 +261,7 @@
     <paper id="20">
       <title>Parametrizable exercise generation from authentic texts: Effectively targeting the language means on the curriculum</title>
       <author><first>Tanja</first><last>Heck</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <pages>154-166</pages>
       <abstract>We present a parametrizable approach to exercise generation from authentic texts that addresses the need for digital materials designed to practice the language means on the curriculum in a real-life school setting. The tool builds on a language-aware searchengine that helps identify attractive texts rich in the language means to be practiced. Making use of state-of-the-art NLP, the relevant learning targets are identified and transformed intoexercise items embedded in the original context. While the language-aware search engine ensures that these contexts match the learner‘s interests based on the search term used, and the linguistic parametrization of the system then reranks the results to prioritize texts that richly represent the learning targets, for theexercise generation to proceed on this basis, an interactive configuration panel allows users to adjust exercise complexity through a range of parameters specifying both properties of thesource sentences and of the exercises. An evaluation of exercises generated from web documents for a representative sample of language means selected from the English curriculum of 7th grade in German secondary school showed that the ombination of language-aware search and exercise generationsuccessfully facilitates the process of generating exercises from authentic texts that support practice of the pedagogical targets.</abstract>
       <url hash="a6119c75">2022.bea-1.20</url>
@@ -318,7 +318,7 @@
       <author><first>James</first><last>Fiacco</last></author>
       <author><first>Shiyan</first><last>Jiang</last></author>
       <author><first>David</first><last>Adamson</last></author>
-      <author><first>Carolyn</first><last>Rosé</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rosé</last></author>
       <pages>204-215</pages>
       <abstract>Providing effective automatic essay feedback is necessary for offering writing instruction at a massive scale. In particular, feedback for promoting coherent flow of ideas in essays is critical. In this paper we propose a state-of-the-art method for automated analysis of structure and flow of writing, referred to as Rhetorical Structure Theory (RST) parsing. In so doing, we lay a foundation for a generalizable approach to automated writing feedback related to structure and flow. We address challenges in automated rhetorical analysis when applied to student writing and evaluate our novel RST parser model on both a recent student writing dataset and a standard benchmark RST parsing dataset.</abstract>
       <url hash="bda2316f">2022.bea-1.25</url>
@@ -344,7 +344,7 @@
       <author><first>Zid</first><last>Mancenido</last></author>
       <author><first>Jing</first><last>Liu</last></author>
       <author><first>Heather</first><last>Hill</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <pages>224-233</pages>
       <url hash="74be3f9a">2022.bea-1.27</url>
       <attachment type="attachment" hash="f8d9c7a0">2022.bea-1.27.attachment.zip</attachment>
@@ -371,7 +371,7 @@
       <author><first>Jasdeep</first><last>Singh</last></author>
       <author><first>Katherine</first><last>Goodman</last></author>
       <author><first>Jean</first><last>Hertzberg</last></author>
-      <author><first>Katharina</first><last>Kann</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
       <pages>250-261</pages>
       <abstract>Recent advances in natural language processing (NLP) have greatly helped educational applications, for both teachers and students. In higher education, there is great potential to use NLP tools for advancing pedagogical research. In this paper, we focus on how NLP can help understand student experiences in engineering, thus facilitating engineering educators to carry out large scale analysis that is helpful for re-designing the curriculum. Here, we introduce a new task we call response construct tagging (RCT), in which student responses to tailored survey questions are automatically tagged for six constructs measuring transformative experiences and engineering identity of students. We experiment with state-of-the-art classification models for this task and investigate the effects of different sources of additional information. Our best model achieves an F1 score of 48. We further investigate multi-task training on the related task of sentiment classification, which improves our model’s performance to 55 F1. Finally, we provide a detailed qualitative analysis of model performance.</abstract>
       <url hash="367bd76b">2022.bea-1.29</url>
@@ -393,7 +393,7 @@
     <paper id="31">
       <title>Incremental Disfluency Detection for Spoken Learner <fixed-case>E</fixed-case>nglish</title>
       <author><first>Lucy</first><last>Skidmore</last></author>
-      <author><first>Roger</first><last>Moore</last></author>
+      <author id="roger-k-moore"><first>Roger</first><last>Moore</last></author>
       <pages>272-278</pages>
       <abstract>Incremental disfluency detection provides a framework for computing communicative meaning from hesitations, repetitions and false starts commonly found in speech. One application of this area of research is in dialogue-based computer-assisted language learning (CALL), where detecting learners’ production issues word-by-word can facilitate timely and pedagogically driven responses from an automated system. Existing research on disfluency detection in learner speech focuses on disfluency removal for subsequent downstream tasks, processing whole utterances non-incrementally. This paper instead explores the application of laughter as a feature for incremental disfluency detection and shows that when combined with silence, these features reduce the impact of learner errors on model precision as well as lead to an overall improvement of model performance. This work adds to the growing body of research incorporating laughter as a feature for dialogue processing tasks and provides further support for the application of multimodality in dialogue-based CALL systems.</abstract>
       <url hash="26e1193b">2022.bea-1.31</url>
diff --git a/data/xml/2022.bigscience.xml b/data/xml/2022.bigscience.xml
index d36bee26c7..c526ee301f 100644
--- a/data/xml/2022.bigscience.xml
+++ b/data/xml/2022.bigscience.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of BigScience Episode #5 -- Workshop on Challenges &amp; Perspectives in Creating Large Language Models</booktitle>
       <editor><first>Angela</first><last>Fan</last></editor>
-      <editor><first>Suzana</first><last>Ilic</last></editor>
+      <editor id="suzana-ilic"><first>Suzana</first><last>Ilic</last></editor>
       <editor><first>Thomas</first><last>Wolf</last></editor>
       <editor><first>Matthias</first><last>Gallé</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -39,11 +39,11 @@
       <title>Using <fixed-case>ASR</fixed-case>-Generated Text for Spoken Language Modeling</title>
       <author><first>Nicolas</first><last>Hervé</last></author>
       <author><first>Valentin</first><last>Pelloin</last></author>
-      <author><first>Benoit</first><last>Favre</last></author>
+      <author id="benoit-favre"><first>Benoit</first><last>Favre</last></author>
       <author><first>Franck</first><last>Dary</last></author>
       <author><first>Antoine</first><last>Laurent</last></author>
       <author><first>Sylvain</first><last>Meignier</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <pages>17-25</pages>
       <abstract>This papers aims at improving spoken language modeling (LM) using very large amount of automatically transcribed speech. We leverage the INA (French National Audiovisual Institute) collection and obtain 19GB of text after applying ASR on 350,000 hours of diverse TV shows. From this, spoken language models are trained either by fine-tuning an existing LM (FlauBERT) or through training a LM from scratch. The new models (FlauBERT-Oral) will be shared with the community and are evaluated not only in terms of word prediction accuracy but also for two downstream tasks : classification of TV shows and syntactic parsing of speech. Experimental results show that FlauBERT-Oral is better than its initial FlauBERT version demonstrating that, despite its inherent noisy nature, ASR-Generated text can be useful to improve spoken language modeling.</abstract>
       <url hash="db785133">2022.bigscience-1.2</url>
@@ -53,16 +53,16 @@
     </paper>
     <paper id="3">
       <title>You reap what you sow: On the Challenges of Bias Evaluation Under Multilingual Settings</title>
-      <author><first>Zeerak</first><last>Talat</last></author>
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="zeerak-talat"><first>Zeerak</first><last>Talat</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <author><first>Stella</first><last>Biderman</last></author>
-      <author><first>Miruna</first><last>Clinciu</last></author>
+      <author id="miruna-clinciu"><first>Miruna</first><last>Clinciu</last></author>
       <author><first>Manan</first><last>Dey</last></author>
       <author><first>Shayne</first><last>Longpre</last></author>
       <author><first>Sasha</first><last>Luccioni</last></author>
       <author><first>Maraim</first><last>Masoud</last></author>
       <author><first>Margaret</first><last>Mitchell</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <author><first>Shanya</first><last>Sharma</last></author>
       <author><first>Arjun</first><last>Subramonian</last></author>
       <author><first>Jaesung</first><last>Tae</last></author>
@@ -124,7 +124,7 @@
       <author><first>Christopher</first><last>Akiki</last></author>
       <author><first>Javier</first><last>De La Rosa</last></author>
       <author><first>Clémentine</first><last>Fourrier</last></author>
-      <author><first>Enrique</first><last>Manjavacas</last></author>
+      <author id="enrique-manjavacas"><first>Enrique</first><last>Manjavacas</last></author>
       <author><first>Stefan</first><last>Schweter</last></author>
       <author><first>Daniel</first><last>Van Strien</last></author>
       <pages>75-83</pages>
@@ -197,7 +197,7 @@
     <paper id="11">
       <title>Emergent Structures and Training Dynamics in Large Language Models</title>
       <author><first>Ryan</first><last>Teehan</last></author>
-      <author><first>Miruna</first><last>Clinciu</last></author>
+      <author id="miruna-clinciu"><first>Miruna</first><last>Clinciu</last></author>
       <author><first>Oleg</first><last>Serikov</last></author>
       <author><first>Eliza</first><last>Szczechla</last></author>
       <author><first>Natasha</first><last>Seelam</last></author>
diff --git a/data/xml/2022.bionlp.xml b/data/xml/2022.bionlp.xml
index b2338aa284..e1b9a7a833 100644
--- a/data/xml/2022.bionlp.xml
+++ b/data/xml/2022.bionlp.xml
@@ -4,9 +4,9 @@
     <meta>
       <booktitle>Proceedings of the 21st Workshop on Biomedical Language Processing</booktitle>
       <editor><first>Dina</first><last>Demner-Fushman</last></editor>
-      <editor><first>Kevin Bretonnel</first><last>Cohen</last></editor>
+      <editor id="k-bretonnel-cohen"><first>Kevin Bretonnel</first><last>Cohen</last></editor>
       <editor><first>Sophia</first><last>Ananiadou</last></editor>
-      <editor><first>Junichi</first><last>Tsujii</last></editor>
+      <editor id="junichi-tsujii"><first>Junichi</first><last>Tsujii</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Dublin, Ireland</address>
       <month>May</month>
@@ -72,7 +72,7 @@
       <title>Zero-Shot Aspect-Based Scientific Document Summarization using Self-Supervised Pre-training</title>
       <author><first>Amir</first><last>Soleimani</last></author>
       <author><first>Vassilina</first><last>Nikoulina</last></author>
-      <author><first>Benoit</first><last>Favre</last></author>
+      <author id="benoit-favre"><first>Benoit</first><last>Favre</last></author>
       <author><first>Salah</first><last>Ait Mokhtar</last></author>
       <pages>49–62</pages>
       <abstract>We study the zero-shot setting for the aspect-based scientific document summarization task. Summarizing scientific documents with respect to an aspect can remarkably improve document assistance systems and readers experience. However, existing large-scale datasets contain a limited variety of aspects, causing summarization models to over-fit to a small set of aspects and a specific domain. We establish baseline results in zero-shot performance (over unseen aspects and the presence of domain shift), paraphrasing, leave-one-out, and limited supervised samples experimental setups. We propose a self-supervised pre-training approach to enhance the zero-shot performance. We leverage the PubMed structured abstracts to create a biomedical aspect-based summarization dataset. Experimental results on the PubMed and FacetSum aspect-based datasets show promising performance when the model is pre-trained using unlabelled in-domain data.</abstract>
@@ -109,7 +109,7 @@
     <paper id="8">
       <title>Automatic Biomedical Term Clustering by Learning Fine-grained Term Representations</title>
       <author><first>Sihang</first><last>Zeng</last></author>
-      <author id="zheng-yuan"><first>Zheng</first><last>Yuan</last></author>
+      <author><first>Zheng</first><last>Yuan</last></author>
       <author><first>Sheng</first><last>Yu</last></author>
       <pages>91–96</pages>
       <abstract>Term clustering is important in biomedical knowledge graph construction. Using similarities between terms embedding is helpful for term clustering. State-of-the-art term embeddings leverage pretrained language models to encode terms, and use synonyms and relation knowledge from knowledge graphs to guide contrastive learning. These embeddings provide close embeddings for terms belonging to the same concept. However, from our probing experiments, these embeddings are not sensitive to minor textual differences which leads to failure for biomedical term clustering. To alleviate this problem, we adjust the sampling strategy in pretraining term embeddings by providing dynamic hard positive and negative samples during contrastive learning to learn fine-grained representations which result in better biomedical term clustering. We name our proposed method as CODER++, and it has been applied in clustering biomedical concepts in the newly released Biomedical Knowledge Graph named BIOS.</abstract>
@@ -121,7 +121,7 @@
     <paper id="9">
       <title><fixed-case>B</fixed-case>io<fixed-case>BART</fixed-case>: Pretraining and Evaluation of A Biomedical Generative Language Model</title>
       <author><first>Hongyi</first><last>Yuan</last></author>
-      <author id="zheng-yuan"><first>Zheng</first><last>Yuan</last></author>
+      <author><first>Zheng</first><last>Yuan</last></author>
       <author><first>Ruyi</first><last>Gan</last></author>
       <author><first>Jiaxing</first><last>Zhang</last></author>
       <author><first>Yutao</first><last>Xie</last></author>
@@ -239,7 +239,7 @@
     <paper id="18">
       <title>Intra-Template Entity Compatibility based Slot-Filling for Clinical Trial Information Extraction</title>
       <author><first>Christian</first><last>Witte</last></author>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <pages>178–192</pages>
       <abstract>We present a deep learning based information extraction system that can extract the design and results of a published abstract describing a Randomized Controlled Trial (RCT). In contrast to other approaches, our system does not regard the PICO elements as flat objects or labels but as structured objects. We thus model the task as the one of filling a set of templates and slots; our two-step approach recognizes relevant slot candidates as a first step and assigns them to a corresponding template as second step, relying on a learned pairwise scoring function that models the compatibility of the different slot values. We evaluate the approach on a dataset of 211 manually annotated abstracts for type 2 Diabetes and Glaucoma, showing the positive impact of modelling intra-template entity compatibility. As main benefit, our approach yields a structured object for every RCT abstract that supports the aggregation and summarization of clinical trial results across published studies and can facilitate the task of creating a systematic review or meta-analysis.</abstract>
       <url hash="ee5a3300">2022.bionlp-1.18</url>
@@ -256,7 +256,7 @@
       <author><first>Jordi</first><last>Armengol-Estapé</last></author>
       <author><first>Joaquín</first><last>Silveira-Ocampo</last></author>
       <author><first>Alfonso</first><last>Valencia</last></author>
-      <author><first>Aitor</first><last>Gonzalez-Agirre</last></author>
+      <author id="aitor-gonzalez-agirre"><first>Aitor</first><last>Gonzalez-Agirre</last></author>
       <author><first>Marta</first><last>Villegas</last></author>
       <pages>193–199</pages>
       <abstract>This work presents the first large-scale biomedical Spanish language models trained from scratch, using large biomedical corpora consisting of a total of 1.1B tokens and an EHR corpus of 95M tokens. We compared them against general-domain and other domain-specific models for Spanish on three clinical NER tasks. As main results, our models are superior across the NER tasks, rendering them more convenient for clinical NLP applications. Furthermore, our findings indicate that when enough data is available, pre-training from scratch is better than continual pre-training when tested on clinical tasks, raising an exciting research question about which approach is optimal. Our models and fine-tuning scripts are publicly available at HuggingFace and GitHub.</abstract>
@@ -272,7 +272,7 @@
       <author><first>Morgan</first><last>Wixted</last></author>
       <author><first>Alejandro</first><last>Garcia-Rudolph</last></author>
       <author><first>Catalina</first><last>Martínez-Costa</last></author>
-      <author><first>Guenter</first><last>Neumann</last></author>
+      <author id="gunter-neumann"><first>Guenter</first><last>Neumann</last></author>
       <pages>200–211</pages>
       <abstract>Despite the advances in digital healthcare systems offering curated structured knowledge, much of the critical information still lies in large volumes of unlabeled and unstructured clinical texts. These texts, which often contain protected health information (PHI), are exposed to information extraction tools for downstream applications, risking patient identification. Existing works in de-identification rely on using large-scale annotated corpora in English, which often are not suitable in real-world multilingual settings. Pre-trained language models (LM) have shown great potential for cross-lingual transfer in low-resource settings. In this work, we empirically show the few-shot cross-lingual transfer property of LMs for named entity recognition (NER) and apply it to solve a low-resource and real-world challenge of code-mixed (Spanish-Catalan) clinical notes de-identification in the stroke domain. We annotate a gold evaluation dataset to assess few-shot setting performance where we only use a few hundred labeled examples for training. Our model improves the zero-shot F1-score from 73.7% to 91.2% on the gold evaluation set when adapting Multilingual BERT (mBERT) (CITATION) from the MEDDOCAN (CITATION) corpus with our few-shot cross-lingual target corpus. When generalized to an out-of-sample test set, the best model achieves a human-evaluation F1-score of 97.2%.</abstract>
       <url hash="fa6ec883">2022.bionlp-1.20</url>
@@ -308,7 +308,7 @@
     <paper id="23">
       <title><fixed-case>B</fixed-case>io<fixed-case>C</fixed-case>ite: A Deep Learning-based Citation Linkage Framework for Biomedical Research Articles</title>
       <author><first>Sudipta</first><last>Singha Roy</last></author>
-      <author><first>Robert E.</first><last>Mercer</last></author>
+      <author id="robert-e-mercer"><first>Robert E.</first><last>Mercer</last></author>
       <pages>241–251</pages>
       <abstract>Research papers reflect scientific advances. Citations are widely used in research publications to support the new findings and show their benefits, while also regulating the information flow to make the contents clearer for the audience. A citation in a research article refers to the information’s source, but not the specific text span from that source article. In biomedical research articles, this task is challenging as the same chemical or biological component can be represented in multiple ways in different papers from various domains. This paper suggests a mechanism for linking citing sentences in a publication with cited sentences in referenced sources. The framework presented here pairs the citing sentence with all of the sentences in the reference text, and then tries to retrieve the semantically equivalent pairs. These semantically related sentences from the reference paper are chosen as the cited statements. This effort involves designing a citation linkage framework utilizing sequential and tree-structured siamese deep learning models. This paper also provides a method to create a synthetic corpus for such a task.</abstract>
       <url hash="f59e0805">2022.bionlp-1.23</url>
@@ -331,7 +331,7 @@
     </paper>
     <paper id="25">
       <title>Overview of the <fixed-case>M</fixed-case>ed<fixed-case>V</fixed-case>id<fixed-case>QA</fixed-case> 2022 Shared Task on Medical Video Question-Answering</title>
-      <author><first>Deepak</first><last>Gupta</last></author>
+      <author id="deepak-gupta"><first>Deepak</first><last>Gupta</last></author>
       <author><first>Dina</first><last>Demner-Fushman</last></author>
       <pages>264–274</pages>
       <abstract>In this paper, we present an overview of the MedVidQA 2022 shared task, collocated with the 21st BioNLP workshop at ACL 2022. The shared task addressed two of the challenges faced by medical video question answering: (I) a video classification task that explores new approaches to medical video understanding (labeling), and (ii) a visual answer localization task. Visual answer localization refers to the identification of the relevant temporal segments (start and end timestamps) in the video where the answer to the medical question is being shown or illustrated. A total of thirteen teams participated in the shared task challenges, with eleven system descriptions submitted to the workshop. The descriptions present monomodal and multi-modal approaches developed for medical video classification and visual answer localization. This paper describes the tasks, the datasets, evaluation metrics, and baseline systems for both tasks. Finally, the paper summarizes the techniques and results of the evaluation of the various approaches explored by the participating teams.</abstract>
@@ -385,7 +385,7 @@
     <paper id="29">
       <title>Data Augmentation for Rare Symptoms in Vaccine Side-Effect Detection</title>
       <author><first>Bosung</first><last>Kim</last></author>
-      <author><first>Ndapa</first><last>Nakashole</last></author>
+      <author id="ndapandula-nakashole"><first>Ndapa</first><last>Nakashole</last></author>
       <pages>310–315</pages>
       <abstract>We study the problem of entity detection and normalization applied to patient self-reports of symptoms that arise as side-effects of vaccines. Our application domain presents unique challenges that render traditional classification methods ineffective: the number of entity types is large; and many symptoms are rare, resulting in a long-tail distribution of training examples per entity type. We tackle these challenges with an autoregressive model that generates standardized names of symptoms. We introduce a data augmentation technique to increase the number of training examples for rare symptoms. Experiments on real-life patient vaccine symptom self-reports show that our approach outperforms strong baselines, and that additional examples improve performance on the long-tail entities.</abstract>
       <url hash="cf4e8dbe">2022.bionlp-1.29</url>
@@ -458,7 +458,7 @@
       <author><first>Ying</first><last>Ding</last></author>
       <author><first>Greg</first><last>Durrett</last></author>
       <author><first>Justin F.</first><last>Rousseau</last></author>
-      <author><first>Yifan</first><last>Peng</last></author>
+      <author id="yifan-peng-cmu"><first>Yifan</first><last>Peng</last></author>
       <pages>359–368</pages>
       <abstract>Generating a summary from findings has been recently explored (Zhang et al., 2018, 2020) in note types such as radiology reports that typically have short length. In this work, we focus on echocardiogram notes that is longer and more complex compared to previous note types. We formally define the task of echocardiography conclusion generation (EchoGen) as generating a conclusion given the findings section, with emphasis on key cardiac findings. To promote the development of EchoGen methods, we present a new benchmark, which consists of two datasets collected from two hospitals. We further compare both standard and start-of-the-art methods on this new benchmark, with an emphasis on factual consistency. To accomplish this, we develop a tool to automatically extract concept-attribute tuples from the text. We then propose an evaluation metric, FactComp, to compare concept-attribute tuples between the human reference and generated conclusions. Both automatic and human evaluations show that there is still a significant gap between human-written and machine-generated conclusions on echo reports in terms of factuality and overall quality.</abstract>
       <url hash="989d85eb">2022.bionlp-1.35</url>
@@ -507,7 +507,7 @@
       <author><first>Matúš</first><last>Falis</last></author>
       <author><first>Hang</first><last>Dong</last></author>
       <author><first>Alexandra</first><last>Birch</last></author>
-      <author><first>Beatrice</first><last>Alex</last></author>
+      <author id="beatrice-alex"><first>Beatrice</first><last>Alex</last></author>
       <pages>389–401</pages>
       <abstract>Medical document coding is the process of assigning labels from a structured label space (ontology – e.g., ICD-9) to medical documents. This process is laborious, costly, and error-prone. In recent years, efforts have been made to automate this process with neural models. The label spaces are large (in the order of thousands of labels) and follow a big-head long-tail label distribution, giving rise to few-shot and zero-shot scenarios. Previous efforts tried to address these scenarios within the model, leading to improvements on rare labels, but worse results on frequent ones. We propose data augmentation and synthesis techniques in order to address these scenarios. We further introduce an analysis technique for this setting inspired by confusion matrices. This analysis technique points to the positive impact of data augmentation and synthesis, but also highlights more general issues of confusion within families of codes, and underprediction.</abstract>
       <url hash="ab353a18">2022.bionlp-1.39</url>
diff --git a/data/xml/2022.blackboxnlp.xml b/data/xml/2022.blackboxnlp.xml
index 356f501d57..e0c4ec1f63 100644
--- a/data/xml/2022.blackboxnlp.xml
+++ b/data/xml/2022.blackboxnlp.xml
@@ -46,7 +46,7 @@
       <title>Where’s the Learning in Representation Learning for Compositional Semantics and the Case of Thematic Fit</title>
       <author><first>Mughilan</first><last>Muthupari</last></author>
       <author><first>Samrat</first><last>Halder</last></author>
-      <author><first>Asad</first><last>Sayeed</last></author>
+      <author id="asad-sayeed"><first>Asad</first><last>Sayeed</last></author>
       <author><first>Yuval</first><last>Marton</last></author>
       <pages>28-39</pages>
       <abstract>Observing that for certain NLP tasks, such as semantic role prediction or thematic fit estimation, random embeddings perform as well as pre-trained embeddings, we explore what settings allow for this, and examine where most of the learning is encoded: the word embeddings, the semantic role embeddings, or “the network”. We find nuanced answers, depending on the task and its relation to the training objective. We examine these representation learning aspects in multi-task learning, where role prediction and role-filling are supervised tasks, while several thematic fit tasks are outside the models’ direct supervision. We observe a non-monotonous relation between some tasks’ quality scores and the training data size. In order to better understand this observation, we analyze these results using easier, per-verb versions of these tasks.</abstract>
@@ -58,7 +58,7 @@
       <title>Sentence Ambiguity, Grammaticality and Complexity Probes</title>
       <author><first>Sunit</first><last>Bhattacharya</last></author>
       <author><first>Vilém</first><last>Zouhar</last></author>
-      <author><first>Ondrej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondrej</first><last>Bojar</last></author>
       <pages>40-50</pages>
       <abstract>It is unclear whether, how and where large pre-trained language models capture subtle linguistic traits like ambiguity, grammaticality and sentence complexity. We present results of automatic classification of these traits and compare their viability and patterns across representation types. We demonstrate that template-based datasets with surface-level artifacts should not be used for probing, careful comparisons with baselines should be done and that t-SNE plots should not be used to determine the presence of a feature among dense vectors representations. We also show how features might be highly localized in the layers for these models and get lost in the upper layers.</abstract>
       <url hash="8e6111b9">2022.blackboxnlp-1.4</url>
@@ -94,7 +94,7 @@
       <author><first>Maxime</first><last>De Bruyn</last></author>
       <author><first>Ehsan</first><last>Lotfi</last></author>
       <author><first>Jeska</first><last>Buhmann</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>80-90</pages>
       <abstract>Researchers often use games to analyze the abilities of Artificial Intelligence models. In this work, we use the game of Twenty Questions to study the world knowledge of language models. Despite its simplicity for humans, this game requires a broad knowledge of the world to answer yes/no questions. We evaluate several language models on this task and find that only the largest model has enough world knowledge to play it well, although it still has difficulties with the shape and size of objects. We also present a new method to improve the knowledge of smaller models by leveraging external information from the web. Finally, we release our dataset and Twentle, a website to interactively test the knowledge of language models by playing Twenty Questions.</abstract>
       <url hash="9571bdd4">2022.blackboxnlp-1.7</url>
@@ -139,7 +139,7 @@
       <author><first>Rasmus</first><last>Jørgensen</last></author>
       <author><first>Fiammetta</first><last>Caccavale</last></author>
       <author><first>Christian</first><last>Igel</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>131-141</pages>
       <abstract>Multilingual NLP models provide potential solutions to the digital language divide, i.e., cross-language performance disparities. Early analyses of such models have indicated good performance across training languages and good generalization to unseen, related languages. This work examines whether, between related languages, multilingual models are equally right for the right reasons, i.e., if interpretability methods reveal that the models put emphasis on the same words as humans. To this end, we provide a new trilingual, parallel corpus of rationale annotations for English, Danish, and Italian sentiment analysis models and use it to benchmark models and interpretability methods. We propose rank-biased overlap as a better metric for comparing input token attributions to human rationale annotations. Our results show: (i) models generally perform well on the languages they are trained on, and align best with human rationales in these languages; (ii) performance is higher on English, even when not a source language, but this performance is not accompanied by higher alignment with human rationales, which suggests that language models favor English, but do not facilitate successful transfer of rationales.</abstract>
       <url hash="fc4482a8">2022.blackboxnlp-1.11</url>
@@ -199,7 +199,7 @@
       <title>Understanding Domain Learning in Language Models Through Subpopulation Analysis</title>
       <author><first>Zheng</first><last>Zhao</last></author>
       <author><first>Yftah</first><last>Ziser</last></author>
-      <author><first>Shay</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay</first><last>Cohen</last></author>
       <pages>192-209</pages>
       <abstract>We investigate how different domains are encoded in modern neural network architectures. We analyze the relationship between natural language domains, model size, and the amount of training data used. The primary analysis tool we develop is based on subpopulation analysis with Singular Vector Canonical Correlation Analysis (SVCCA), which we apply to Transformer-based language models (LMs). We compare the latent representations of such a language model at its different layers from a pair of models: a model trained on multiple domains (an experimental model) and a model trained on a single domain (a control model). Through our method, we find that increasing the model capacity impacts how domain information is stored in upper and lower layers differently. In addition, we show that larger experimental models simultaneously embed domain-specific information as if they were conjoined control models. These findings are confirmed qualitatively, demonstrating the validity of our method.</abstract>
       <url hash="d258a382">2022.blackboxnlp-1.16</url>
@@ -211,7 +211,7 @@
       <author><first>Diego</first><last>Garcia-Olano</last></author>
       <author><first>Yasumasa</first><last>Onoe</last></author>
       <author><first>Joydeep</first><last>Ghosh</last></author>
-      <author><first>Byron</first><last>Wallace</last></author>
+      <author id="byron-c-wallace"><first>Byron</first><last>Wallace</last></author>
       <pages>210-224</pages>
       <abstract>Interpretable entity representations (IERs) are sparse embeddings that are “human-readable” in that dimensions correspond to fine-grained entity types and values are predicted probabilities that a given entity is of the corresponding type. These methods perform well in zero-shot and low supervision settings. Compared to standard dense neural embeddings, such interpretable representations may permit analysis and debugging. However, while fine-tuning sparse, interpretable representations improves accuracy on downstream tasks, it destroys the semantics of the dimensions which were enforced in pre-training. Can we maintain the interpretable semantics afforded by IERs while improving predictive performance on downstream tasks? Toward this end, we propose Intermediate enTity-based Sparse Interpretable Representation Learning (ItsIRL). ItsIRL realizes improved performance over prior IERs on biomedical tasks, while maintaining “interpretability” generally and their ability to support model debugging specifically. The latter is enabled in part by the ability to perform “counterfactual” fine-grained entity type manipulation, which we explore in this work. Finally, we propose a method to construct entity type based class prototypes for revealing global semantic properties of classes learned by our model. Code for pre-training and experiments will be made publicly available.</abstract>
       <url hash="b13b4d3a">2022.blackboxnlp-1.17</url>
@@ -222,7 +222,7 @@
       <title>Towards Procedural Fairness: Uncovering Biases in How a Toxic Language Classifier Uses Sentiment Information</title>
       <author><first>Isar</first><last>Nejadgholi</last></author>
       <author><first>Esma</first><last>Balkir</last></author>
-      <author><first>Kathleen</first><last>Fraser</last></author>
+      <author id="kathleen-c-fraser"><first>Kathleen</first><last>Fraser</last></author>
       <author><first>Svetlana</first><last>Kiritchenko</last></author>
       <pages>225-237</pages>
       <abstract>Previous works on the fairness of toxic language classifiers compare the output of models with different identity terms as input features but do not consider the impact of other important concepts present in the context. Here, besides identity terms, we take into account high-level latent features learned by the classifier and investigate the interaction between these features and identity terms. For a multi-class toxic language classifier, we leverage a concept-based explanation framework to calculate the sensitivity of the model to the concept of sentiment, which has been used before as a salient feature for toxic language detection. Our results show that although for some classes, the classifier has learned the sentiment information as expected, this information is outweighed by the influence of identity terms as input features. This work is a step towards evaluating procedural fairness, where unfair processes lead to unfair outcomes. The produced knowledge can guide debiasing techniques to ensure that important concepts besides identity terms are well-represented in training datasets.</abstract>
@@ -234,7 +234,7 @@
       <title>Investigating the Characteristics of a Transformer in a Few-Shot Setup: Does Freezing Layers in <fixed-case>R</fixed-case>o<fixed-case>BERT</fixed-case>a Help?</title>
       <author><first>Digvijay</first><last>Ingle</last></author>
       <author><first>Rishabh</first><last>Tripathi</last></author>
-      <author><first>Ayush</first><last>Kumar</last></author>
+      <author id="ayush-kumar"><first>Ayush</first><last>Kumar</last></author>
       <author><first>Kevin</first><last>Patel</last></author>
       <author><first>Jithendra</first><last>Vepa</last></author>
       <pages>238-248</pages>
@@ -248,7 +248,7 @@
       <title>It Is Not Easy To Detect Paraphrases: Analysing Semantic Similarity With Antonyms and Negation Using the New <fixed-case>S</fixed-case>em<fixed-case>A</fixed-case>nto<fixed-case>N</fixed-case>eg Benchmark</title>
       <author><first>Teemu</first><last>Vahtola</last></author>
       <author><first>Mathias</first><last>Creutz</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>249-262</pages>
       <abstract>We investigate to what extent a hundred publicly available, popular neural language models capture meaning systematically. Sentence embeddings obtained from pretrained or fine-tuned language models can be used to perform particular tasks, such as paraphrase detection, semantic textual similarity assessment or natural language inference. Common to all of these tasks is that paraphrastic sentences, that is, sentences that carry (nearly) the same meaning, should have (nearly) the same embeddings regardless of surface form. We demonstrate that performance varies greatly across different language models when a specific type of meaning-preserving transformation is applied: two sentences should be identified as paraphrastic if one of them contains a negated antonym in relation to the other one, such as “I am not guilty” versus “I am innocent”.We introduce and release SemAntoNeg, a new test suite containing 3152 entries for probing paraphrasticity in sentences incorporating negation and antonyms. Among other things, we show that language models fine-tuned for natural language inference outperform other types of models, especially the ones fine-tuned to produce general-purpose sentence embeddings, on the test suite. Furthermore, we show that most models designed explicitly for paraphrasing are rather mediocre in our task.</abstract>
       <url hash="5ab4a4f1">2022.blackboxnlp-1.20</url>
@@ -354,7 +354,7 @@
       <author><first>Anurag</first><last>Katakkar</last></author>
       <author><first>Clay H.</first><last>Yoo</last></author>
       <author><first>Weiqin</first><last>Wang</last></author>
-      <author><first>Zachary</first><last>Lipton</last></author>
+      <author id="zachary-c-lipton"><first>Zachary</first><last>Lipton</last></author>
       <author><first>Divyansh</first><last>Kaushik</last></author>
       <pages>346-355</pages>
       <abstract>In attempts to develop sample-efficient and interpretable algorithms, researcher have explored myriad mechanisms for collecting and exploiting feature feedback, auxiliary annotations provided for training (but not test) instances that highlight salient evidence. Examples include bounding boxes around objects and salient spans in text. Despite its intuitive appeal, feature feedback has not delivered significant gains in practical problems as assessed on iid holdout sets. However, recent works on counterfactually augmented data suggest an alternative benefit of supplemental annotations, beyond interpretability: lessening sensitivity to spurious patterns and consequently delivering gains in out-of-domain evaluations. We speculate that while existing methods for incorporating feature feedback have delivered negligible in-sample performance gains, they may nevertheless provide out-of-domain benefits. Our experiments addressing sentiment analysis, show that feature feedback methods perform significantly better on various natural out-of-domain datasets despite comparable in-domain evaluations. By contrast, performance on natural language inference remains comparable. Finally, we compare those tasks where feature feedback does (and does not) help.</abstract>
@@ -400,7 +400,7 @@
       <author><first>Deborah</first><last>Ferreira</last></author>
       <author><first>Mokanarangan</first><last>Thayaparan</last></author>
       <author><first>Marco</first><last>Valentino</last></author>
-      <author><first>Andre</first><last>Freitas</last></author>
+      <author id="andre-freitas"><first>Andre</first><last>Freitas</last></author>
       <pages>394-403</pages>
       <abstract>In the interest of interpreting neural NLI models and their reasoning strategies, we carry out a systematic probing study which investigates whether these modelscapture the crucial semantic features central to natural logic: monotonicity and concept inclusion. Correctly identifying valid inferences in downward-monotone contexts is a known stumbling block for NLI performance,subsuming linguistic phenomena such as negation scope and generalized quantifiers. To understand this difficulty, we emphasize monotonicity as a property of a context and examine the extent to which models capture relevant monotonicity information in the vector representations which are intermediate to their decision making process. Drawing on the recent advancement of the probing paradigm,we compare the presence of monotonicity features across various models. We find that monotonicity information is notably weak in the representations of popularNLI models which achieve high scores on benchmarks, and observe that previous improvements to these models based on fine-tuning strategies have introduced stronger monotonicity features together with their improved performance on challenge sets.</abstract>
       <url hash="b79d7a70">2022.blackboxnlp-1.33</url>
@@ -410,7 +410,7 @@
     <paper id="34">
       <title>Probing with Noise: Unpicking the Warp and Weft of Embeddings</title>
       <author><first>Filip</first><last>Klubicka</last></author>
-      <author><first>John</first><last>Kelleher</last></author>
+      <author id="john-kelleher"><first>John</first><last>Kelleher</last></author>
       <pages>404-417</pages>
       <abstract>Improving our understanding of how information is encoded in vector space can yield valuable interpretability insights. Alongside vector dimensions, we argue that it is possible for the vector norm to also carry linguistic information. We develop a method to test this: an extension of the probing framework which allows for relative intrinsic interpretations of probing results. It relies on introducing noise that ablates information encoded in embeddings, grounded in random baselines and confidence intervals. We apply the method to well-established probing tasks and find evidence that confirms the existence of separate information containers in English GloVe and BERT embeddings. Our correlation analysis aligns with the experimental findings that different encoders use the norm to encode different kinds of information: GloVe stores syntactic and sentence length information in the vector norm, while BERT uses it to encode contextual incongruity.</abstract>
       <url hash="16ab81eb">2022.blackboxnlp-1.34</url>
@@ -422,7 +422,7 @@
       <title>Look to the Right: Mitigating Relative Position Bias in Extractive Question Answering</title>
       <author><first>Kazutoshi</first><last>Shinoda</last></author>
       <author><first>Saku</first><last>Sugawara</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>418-425</pages>
       <abstract>Extractive question answering (QA) models tend to exploit spurious correlations to make predictions when a training set has unintended biases. This tendency results in models not being generalizable to examples where the correlations do not hold. Determining the spurious correlations QA models can exploit is crucial in building generalizable QA models in real-world applications; moreover, a method needs to be developed that prevents these models from learning the spurious correlations even when a training set is biased. In this study, we discovered that the relative position of an answer, which is defined as the relative distance from an answer span to the closest question-context overlap word, can be exploited by QA models as superficial cues for making predictions. Specifically, we find that when the relative positions in a training set are biased, the performance on examples with relative positions unseen during training is significantly degraded. To mitigate the performance degradation for unseen relative positions, we propose an ensemble-based debiasing method that does not require prior knowledge about the distribution of relative positions. We demonstrate that the proposed method mitigates the models’ reliance on relative positions using the biased and full SQuAD dataset. We hope that this study can help enhance the generalization ability of QA models in real-world applications.</abstract>
       <url hash="e644ee0c">2022.blackboxnlp-1.35</url>
diff --git a/data/xml/2022.bucc.xml b/data/xml/2022.bucc.xml
index 91ca3ef7b6..9a7d23e7ab 100644
--- a/data/xml/2022.bucc.xml
+++ b/data/xml/2022.bucc.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the BUCC Workshop within LREC 2022</booktitle>
       <editor><first>Reinhard</first><last>Rapp</last></editor>
-      <editor><first>Pierre</first><last>Zweigenbaum</last></editor>
+      <editor id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></editor>
       <editor><first>Serge</first><last>Sharoff</last></editor>
       <publisher>European Language Resources Association</publisher>
       <address>Marseille, France</address>
@@ -31,7 +31,7 @@
       <title>About Evaluating Bilingual Lexicon Induction</title>
       <author><first>Martin</first><last>Laville</last></author>
       <author><first>Emmanuel</first><last>Morin</last></author>
-      <author><first>Phillippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Phillippe</first><last>Langlais</last></author>
       <pages>8–14</pages>
       <abstract>With numerous new methods proposed recently, the evaluation of Bilingual Lexicon Induction have been quite hazardous and inconsistent across works. Some studies proposed some guidance to sanitize this; yet, they are not necessarily followed by practitioners. In this study, we try to gather these different recommendations and add our owns, with the aim to propose an unified evaluation protocol. We further show that the easiness of a benchmark while being correlated to the proximity of the language pairs being considered, is even more conditioned on the graphical similarities within the test word pairs.</abstract>
       <url hash="dfc04fe0">2022.bucc-1.2</url>
@@ -42,8 +42,8 @@
       <author><first>Silvia</first><last>Severini</last></author>
       <author><first>Viktor</first><last>Hangya</last></author>
       <author><first>Masoud</first><last>Jalili Sabet</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>15–22</pages>
       <abstract>Bilingual Word Embeddings (BWEs) are one of the cornerstones of cross-lingual transfer of NLP models. They can be built using only monolingual corpora without supervision leading to numerous works focusing on unsupervised BWEs. However, most of the current approaches to build unsupervised BWEs do not compare their results with methods based on easy-to-access cross-lingual signals. In this paper, we argue that such signals should always be considered when developing unsupervised BWE methods. The two approaches we find most effective are: 1) using identical words as seed lexicons (which unsupervised approaches incorrectly assume are not available for orthographically distinct language pairs) and 2) combining such lexicons with pairs extracted by matching romanized versions of words with an edit distance threshold. We experiment on thirteen non-Latin languages (and English) and show that such cheap signals work well and that they outperform using more complex unsupervised methods on distant language pairs such as Chinese, Japanese, Kannada, Tamil, and Thai. In addition, they are even competitive with the use of high-quality lexicons in supervised approaches. Our results show that these training signals should not be neglected when building BWEs, even for distant languages.</abstract>
       <url hash="1eb3dc86">2022.bucc-1.3</url>
@@ -53,7 +53,7 @@
       <title>Building Domain-specific Corpora from the Web: the Case of <fixed-case>E</fixed-case>uropean Digital Service Infrastructures</title>
       <author><first>Rik</first><last>van Noord</last></author>
       <author><first>Cristian</first><last>García-Romero</last></author>
-      <author><first>Miquel</first><last>Esplà-Gomis</last></author>
+      <author id="miquel-espla-gomis"><first>Miquel</first><last>Esplà-Gomis</last></author>
       <author><first>Leopoldo</first><last>Pla Sempere</last></author>
       <author><first>Antonio</first><last>Toral</last></author>
       <pages>23–32</pages>
@@ -77,7 +77,7 @@
       <author><first>Klára</first><last>Tauchmanová</last></author>
       <author><first>Kristýna</first><last>Neumannová</last></author>
       <author><first>Ivana</first><last>Kvapilíková</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>43–49</pages>
       <abstract>We present our submission to the BUCC Shared Task on bilingual term alignment in comparable specialized corpora. We devised three approaches using static embeddings with post-hoc alignment, the Monoses pipeline for unsupervised phrase-based machine translation, and contextualized multilingual embeddings. We show that contextualized embeddings from pretrained multilingual models lead to similar results as static embeddings but further improvement can be achieved by task-specific fine-tuning. Retrieving term pairs from the running phrase tables of the Monoses systems can match this enhanced performance and leads to an average precision of 0.88 on the train set.</abstract>
       <url hash="e7f28797">2022.bucc-1.6</url>
diff --git a/data/xml/2022.cai.xml b/data/xml/2022.cai.xml
index d6ea57bf9f..ec58aebd3d 100644
--- a/data/xml/2022.cai.xml
+++ b/data/xml/2022.cai.xml
@@ -45,7 +45,7 @@
       <author><first>Yeongbeom</first><last>Lim</last></author>
       <author><first>San</first><last>Kim</last></author>
       <author><first>Jin Yea</first><last>Jang</last></author>
-      <author><first>Saim</first><last>Shin</last></author>
+      <author id="saim-shin"><first>Saim</first><last>Shin</last></author>
       <author><first>Ki-Hoon</first><last>Lee</last></author>
       <pages>16–22</pages>
       <abstract>We propose a Korean multimodal dialogue system targeting emotion-based empathetic dialogues because most research in this field has been conducted in a few languages such as English and Japanese and in certain circumstances. Our dialogue system consists of an emotion detector, an empathetic response generator, a monitoring interface, a voice activity detector, a speech recognizer, a speech synthesizer, a gesture classification, and several controllers to provide both multimodality and empathy during a conversation between a human and a machine. For comparisons across visual influence on users, our dialogue system contains two versions of the user interface, a cat face-based user interface and an avatar-based user interface. We evaluated our dialogue system by investigating the dialogues in text and the average mean opinion scores under three different visual conditions, no visual, the cat face-based, and the avatar-based expressions. The experimental results stand for the importance of adequate visual expressions according to user utterances.</abstract>
@@ -80,8 +80,8 @@
       <author><first>Zhuo</first><last>Gong</last></author>
       <author><first>Daisuke</first><last>Saito</last></author>
       <author><first>Sheng</first><last>Li</last></author>
-      <author><first>Hisashi</first><last>Kawai</last></author>
-      <author><first>Nobuaki</first><last>Minematsu</last></author>
+      <author id="hisashi-kawai"><first>Hisashi</first><last>Kawai</last></author>
+      <author id="nobuaki-minematsu"><first>Nobuaki</first><last>Minematsu</last></author>
       <pages>42–47</pages>
       <abstract>Language models (LM) have played crucial roles in automatic speech recognition (ASR) to enhance end-to-end (E2E) ASR systems’ performance. There are two categories of approaches: finding better ways to integrate LMs into ASR systems and adapting on LMs to the task domain. This article will start with a reflection of interpolation-based integration methods of E2E ASR’s scores and LM’s scores. Then we will focus on LM augmentation approaches based on the noisy channel model, which is intrigued by insights obtained from the above reflection. The experiments show that we can enhance an ASR E2E model based on encoder-decoder architecture by pre-training the decoder with text data. This implies the decoder of an E2E model can be treated as an LM and reveals the possibility of enhancing the E2E model without an external LM. Based on those ideas, we proposed the implicit language model canceling method and then did more discussion about the decoder part of an E2E ASR model. The experimental results on the TED-LIUM2 dataset show that our approach achieves a 3.4% relative WER reduction compared with the baseline system, and more analytic experiments provide concrete experimental supports for our assumption.</abstract>
       <url hash="8b347fcc">2022.cai-1.6</url>
@@ -91,7 +91,7 @@
       <title>Semantic Content Prediction for Generating Interviewing Dialogues to Elicit Users’ Food Preferences</title>
       <author><first>Jie</first><last>Zeng</last></author>
       <author><first>Tatsuya</first><last>Sakato</last></author>
-      <author><first>Yukiko</first><last>Nakano</last></author>
+      <author id="yukiko-i-nakano"><first>Yukiko</first><last>Nakano</last></author>
       <pages>48–58</pages>
       <abstract>Dialogue systems that aim to acquire user models through interactions with users need to have interviewing functionality. In this study, we propose a method to generate interview dialogues to build a dialogue system that acquires user preferences for food. First, we collected 118 text-based dialogues between the interviewer and customer and annotated the communicative function and semantic content of the utterances. Next, using the corpus as training data, we created a classification model for the communicative function of the interviewer’s next utterance and a generative model that predicts the semantic content of the utterance based on the dialogue history. By representing semantic content as a sequence of tokens, we evaluated the semantic content prediction model using BLEU. The results demonstrated that the semantic content produced by the proposed method was closer to the ground truth than the semantic content transformed from the output text generated by the retrieval model and GPT-2. Further, we present some examples of dialogue generation by applying model outputs to template-based sentence generation.</abstract>
       <url hash="bf83ea99">2022.cai-1.7</url>
diff --git a/data/xml/2022.case.xml b/data/xml/2022.case.xml
index 646a635c07..d4b193f079 100644
--- a/data/xml/2022.case.xml
+++ b/data/xml/2022.case.xml
@@ -3,8 +3,8 @@
   <volume id="1" ingest-date="2022-12-07" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 5th Workshop on Challenges and Applications of Automated Extraction of Socio-political Events from Text (CASE)</booktitle>
-      <editor><first>Ali</first><last>Hürriyetoğlu</last></editor>
-      <editor><first>Hristo</first><last>Tanev</last></editor>
+      <editor id="ali-hurriyetoglu"><first>Ali</first><last>Hürriyetoğlu</last></editor>
+      <editor id="hristo-tanev"><first>Hristo</first><last>Tanev</last></editor>
       <editor><first>Vanni</first><last>Zavarella</last></editor>
       <editor><first>Erdem</first><last>Yörük</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -37,7 +37,7 @@
       <author><first>Huiling</first><last>You</last></author>
       <author><first>David</first><last>Samuel</last></author>
       <author><first>Samia</first><last>Touileb</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <pages>7-15</pages>
       <abstract>Event extraction involves the detection and extraction of both the event triggers and the corresponding arguments. Existing systems often decompose event extraction into multiple subtasks, without considering their possible interactions. In this paper, we propose EventGraph, a joint framework for event extraction, which encodes events as graphs. We represent event triggers and arguments as nodes in a semantic graph. Event extraction therefore becomes a graph parsing problem, which provides the following advantages: 1) performing event detection and argument extraction jointly; 2) detecting and extracting multiple events from a piece of text; 3) capturing the complicated interaction between event arguments and triggers. Experimental results on ACE2005 show that our model is competitive to state-of-the-art systems and has substantially improved the results on argument extraction. Additionally, we create two new datasets from ACE2005 where we keep the entire text spans for event arguments, instead of just the head word(s). Our code and models will be released as open-source.</abstract>
       <url hash="520bef61">2022.case-1.2</url>
@@ -59,8 +59,8 @@
     <paper id="4">
       <title>A Hybrid Knowledge and Transformer-Based Model for Event Detection with Automatic Self-Attention Threshold, Layer and Head Selection</title>
       <author><first>Thierry</first><last>Desot</last></author>
-      <author><first>Orphee</first><last>De Clercq</last></author>
-      <author><first>Veronique</first><last>Hoste</last></author>
+      <author id="orphee-de-clercq"><first>Orphee</first><last>De Clercq</last></author>
+      <author id="veronique-hoste"><first>Veronique</first><last>Hoste</last></author>
       <pages>21-31</pages>
       <abstract>Event and argument role detection are frequently conceived as separate tasks. In this work we conceive both processes as one taskin a hybrid event detection approach. Its main component is based on automatic keyword extraction (AKE) using the self-attention mechanism of a BERT transformer model. As a bottleneck for AKE is defining the threshold of the attention values, we propose a novel method for automatic self-attention thresholdselection. It is fueled by core event information, or simply the verb and its arguments as the backbone of an event. These are outputted by a knowledge-based syntactic parser. In a secondstep the event core is enriched with other semantically salient words provided by the transformer model. Furthermore, we propose an automatic self-attention layer and head selectionmechanism, by analyzing which self-attention cells in the BERT transformer contribute most to the hybrid event detection and which linguistic tasks they represent. This approach was integrated in a pipeline event extraction approachand outperforms three state of the art multi-task event extraction methods.</abstract>
       <url hash="4224d53d">2022.case-1.4</url>
@@ -95,7 +95,7 @@
     <paper id="7">
       <title><fixed-case>LTRC</fixed-case> @ Causal News Corpus 2022: Extracting and Identifying Causal Elements using Adapters</title>
       <author><first>Hiranmai</first><last>Sri Adibhatla</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>50-55</pages>
       <abstract>Causality detection and identification is centered on identifying semantic and cognitive connections in a sentence. In this paper, we describe the effort of team LTRC for Causal News Corpus - Event Causality Shared Task 2022 at the 5th Workshop on Challenges and Applications of Automated Extraction of Socio-political Events from Text (CASE 2022). The shared task consisted of two subtasks: 1) identifying if a sentence contains a causality relation, and 2) identifying spans of text that correspond to cause, effect and signals. We fine-tuned transformer-based models with adapters for both subtasks. Our best-performing models obtained a binary F1 score of 0.853 on held-out data for subtask 1 and a macro F1 score of 0.032 on held-out data for subtask 2. Our approach is ranked third in subtask 1 and fourth in subtask 2. The paper describes our experiments, solutions, and analysis in detail.</abstract>
       <url hash="9fec7e31">2022.case-1.7</url>
@@ -124,7 +124,7 @@
       <author><first>Esau</first><last>Villatoro-tello</last></author>
       <author><first>Martin</first><last>Fajcik</last></author>
       <author><first>Muskaan</first><last>Singh</last></author>
-      <author><first>Pavel</first><last>Smrz</last></author>
+      <author id="pavel-smrz"><first>Pavel</first><last>Smrz</last></author>
       <author><first>Petr</first><last>Motlicek</last></author>
       <pages>61-69</pages>
       <abstract>In this paper, we describe our participation in the subtask 1 of CASE-2022, Event Causality Identification with Casual News Corpus. We address the Causal Relation Identification (CRI) task by exploiting a set of simple yet complementary techniques for fine-tuning language models (LMs) on a few annotated examples (i.e., a few-shot configuration).We follow a prompt-based prediction approach for fine-tuning LMs in which the CRI task is treated as a masked language modeling problem (MLM). This approach allows LMs natively pre-trained on MLM tasks to directly generate textual responses to CRI-specific prompts. We compare the performance of this method against ensemble techniques trained on the entire dataset. Our best-performing submission was fine-tuned with only 256 instances per class, 15.7% of the all available data, and yet obtained the second-best precision (0.82), third-best accuracy (0.82), and an F1-score (0.85) very close to what was reported by the winner team (0.86).</abstract>
@@ -141,7 +141,7 @@
       <author><first>Esau</first><last>Villatoro-tello</last></author>
       <author><first>Sergio</first><last>Burdisso</last></author>
       <author><first>Petr</first><last>Motlicek</last></author>
-      <author><first>Pavel</first><last>Smrz</last></author>
+      <author id="pavel-smrz"><first>Pavel</first><last>Smrz</last></author>
       <pages>70-78</pages>
       <abstract>In this paper, we describe our shared task submissions for Subtask 2 in CASE-2022, Event Causality Identification with Casual News Corpus. The challenge focused on the automatic detection of all cause-effect-signal spans present in the sentence from news-media. We detect cause-effect-signal spans in a sentence using T5 — a pre-trained autoregressive language model. We iteratively identify all cause-effect-signal span triplets, always conditioning the prediction of the next triplet on the previously predicted ones. To predict the triplet itself, we consider different causal relationships such as cause→effect→signal. Each triplet component is generated via a language model conditioned on the sentence, the previous parts of the current triplet, and previously predicted triplets. Despite training on an extremely small dataset of 160 samples, our approach achieved competitive performance, being placed second in the competition. Furthermore, we show that assuming either cause→effect or effect→cause order achieves similar results.</abstract>
       <url hash="d87d57ac">2022.case-1.10</url>
@@ -288,7 +288,7 @@
       <author><first>Huiling</first><last>You</last></author>
       <author><first>David</first><last>Samuel</last></author>
       <author><first>Samia</first><last>Touileb</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <pages>155-160</pages>
       <abstract>This paper presents our submission to the 2022 edition of the CASE 2021 shared task 1, subtask 4. The EventGraph system adapts an end-to-end, graph-based semantic parser to the task of Protest Event Extraction and more specifically subtask 4 on event trigger and argument extraction. We experiment with various graphs, encoding the events as either “labeled-edge” or “node-centric” graphs. We show that the “node-centric” approach yields best results overall, performing well across the three languages of the task, namely English, Spanish, and Portuguese. EventGraph is ranked 3rd for English and Portuguese, and 4th for Spanish.</abstract>
       <url hash="96e63159">2022.case-1.22</url>
diff --git a/data/xml/2022.ccgpk.xml b/data/xml/2022.ccgpk.xml
index 464a608131..408abd23ed 100644
--- a/data/xml/2022.ccgpk.xml
+++ b/data/xml/2022.ccgpk.xml
@@ -3,11 +3,11 @@
   <volume id="1" ingest-date="2022-10-06" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 1st Workshop on Customized Chat Grounding Persona and Knowledge</booktitle>
-      <editor><first>Heuiseok</first><last>Lim</last></editor>
+      <editor id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last></editor>
       <editor><first>Seungryong</first><last>Kim</last></editor>
       <editor><first>Yeonsoo</first><last>Lee</last></editor>
       <editor><first>Steve</first><last>Lin</last></editor>
-      <editor><first>Paul Hongsuck</first><last>Seo</last></editor>
+      <editor id="hongsuck-seo"><first>Paul Hongsuck</first><last>Seo</last></editor>
       <editor><first>Yumin</first><last>Suh</last></editor>
       <editor><first>Yoonna</first><last>Jang</last></editor>
       <editor><first>Jungwoo</first><last>Lim</last></editor>
@@ -43,7 +43,7 @@
       <title>Proto-Gen: An end-to-end neural generator for persona and knowledge grounded response generation</title>
       <author><first>Sougata</first><last>Saha</last></author>
       <author><first>Souvik</first><last>Das</last></author>
-      <author><first>Rohini</first><last>Srihari</last></author>
+      <author id="rohini-k-srihari"><first>Rohini</first><last>Srihari</last></author>
       <pages>9–14</pages>
       <abstract>In this paper we detail the implementation of Proto-Gen, an end-to-end neural response generator capable of selecting appropriate persona and fact sentences from available options, and generating persona and fact grounded responses. Incorporating a novel interaction layer in an encoder-decoder architecture, Proto-Gen facilitates learning dependencies between facts, persona and the context, and outperforms existing baselines on the FoCus dataset for both the sub-tasks of persona and fact selection, and response generation. We further fine tune Proto-Gen’s hyperparameters, and share our results and findings.</abstract>
       <url hash="c66fe922">2022.ccgpk-1.2</url>
diff --git a/data/xml/2022.ccl.xml b/data/xml/2022.ccl.xml
index 423df3244b..4f067fa38c 100644
--- a/data/xml/2022.ccl.xml
+++ b/data/xml/2022.ccl.xml
@@ -158,7 +158,7 @@
       <title>融合知识的多目标词联合框架语义分析模型(Knowledge-integrated Joint Model For Multi-target Frame Semantic Parsing)</title>
       <author><first>Xudong</first><last>Chen</last><variant script="hani"><first>旭东</first><last>陈</last></variant></author>
       <author><first>Ce</first><last>Zheng</last><variant script="hani"><first>策</first><last>郑</last></variant></author>
-      <author><first>Baobao</first><last>Chang</last><variant script="hani"><first>宝宝</first><last>常</last></variant></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last><variant script="hani"><first>宝宝</first><last>常</last></variant></author>
       <pages>132–142</pages>
       <abstract>“框架语义分析任务是自然语言处理领域的一项基础性任务。先前的研究工作大多针对单目标词进行模型设计,无法一次性完成多个目标词的框架语义结构提取。本文提出了一个面向多目标的框架语义分析模型,实现对多目标词的联合预测。该模型对框架语义分析的各项子任务进行交互性建模,实现子任务间的双向交互。此外,本文利用关系图网络对框架关系信息进行编码,将其作为框架语义学知识融入模型中。实验表明,本文模型在不借助额外语料的情况下相比之前模型都有不同程度的提高。消融实验证明了本文模型设计的有效性。此外我们分析了模型目前存在的局限性以及未来的改进方向。”</abstract>
       <url hash="2b83c412">2022.ccl-1.13</url>
@@ -181,7 +181,7 @@
       <title>基于实体信息增强及多粒度融合的多文档摘要(Multi-Document Summarization Based on Entity Information Enhancement and Multi-Granularity Fusion)</title>
       <author><first>Jiarui</first><last>Tang</last><variant script="hani"><first>嘉蕊</first><last>唐</last></variant></author>
       <author><first>Liu</first><last>Meiling</last><variant script="hani"><first>美玲</first><last>刘</last></variant></author>
-      <author><first>Tiejun</first><last>Zhao</last><variant script="hani"><first>铁军</first><last>赵</last></variant></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last><variant script="hani"><first>铁军</first><last>赵</last></variant></author>
       <author><first>Jiyun</first><last>Zhou</last><variant script="hani"><first>继云</first><last>周</last></variant></author>
       <pages>155–165</pages>
       <abstract>“神经网络模型的快速发展使得多文档摘要可以获得人类可读的流畅的摘要,对大规模的数据进行预训练可以更好的从自然语言文本中捕捉更丰富的语义信息,并更好的作用于下游任务。目前很多的多文档摘要的工作也应用了预训练模型(如BERT)并取得了一定的效果,但是这些预训练模型不能更好的从文本中捕获事实性知识,没有考虑到多文档文本的结构化的实体-关系信息,本文提出了基于实体信息增强和多粒度融合的多文档摘要模型MGNIE,将实体关系信息融入预训练模型ERNIE中,增强知识事实以获得多层语义信息,解决摘要生成的事实一致性问题。进而从多种粒度进行多文档层次结构的融合建模,以词信息、实体信息以及句子信息捕捉长文本信息摘要生成所需的关键信息点。本文设计的模型,在国际标准评测数据集MultiNews上对比强基线模型效果和竞争力获得较大提升。”</abstract>
@@ -203,7 +203,7 @@
       <title>生成,推理与排序:基于多任务架构的数学文字题生成(Generating, Reasoning &amp; Ranking: Multitask Learning Framework for Math Word Problem Generation)</title>
       <author><first>Tianyang</first><last>Cao</last><variant script="hani"><first>天旸</first><last>曹</last></variant></author>
       <author><first>Xiaodan</first><last>Xu</last><variant script="hani"><first>晓丹</first><last>许</last></variant></author>
-      <author><first>Baobao</first><last>Chang</last><variant script="hani"><first>宝宝</first><last>常</last></variant></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last><variant script="hani"><first>宝宝</first><last>常</last></variant></author>
       <pages>178–189</pages>
       <abstract>“数学文字题是一段能反映数学等式潜在逻辑的叙述性文本。成功的数学问题生成在语言生成和教育领域都具有广阔的应用前景。前人的工作大多需要人工标注的模板或关键词作为输入,且未考虑数学表达式本身的特点。本文提出了一种多任务联合训练的问题文本生成模型。我们设计了三个辅助任务,包括数字间关系抽取、数值排序和片段替换预测。他们与生成目标联合训练,用以监督解码器的学习,增强模型对运算逻辑和问题条件的感知能力。实验证明所提方法能有效提升生成的数学文字题的质量。”</abstract>
       <url hash="1c664b28">2022.ccl-1.17</url>
@@ -272,7 +272,7 @@
     <paper id="23">
       <title>期货领域知识图谱构建(Construction of Knowledge Graph in Futures Field)</title>
       <author><first>Wenxin</first><last>Li</last><variant script="hani"><first>雯昕</first><last>李</last></variant></author>
-      <author><first>Hongying</first><last>Zan</last><variant script="hani"><first>红英</first><last>昝</last></variant></author>
+      <author id="hongying-zan"><first>Hongying</first><last>Zan</last><variant script="hani"><first>红英</first><last>昝</last></variant></author>
       <author><first>Tongfeng</first><last>Guan</last><variant script="hani"><first>同峰</first><last>关</last></variant></author>
       <author><first>Yingjie</first><last>Han</last><variant script="hani"><first>英杰</first><last>韩</last></variant></author>
       <pages>246–256</pages>
@@ -320,7 +320,7 @@
       <author><first>Houli</first><last>Ma</last><variant script="hani"><first>候丽</first><last>马</last></variant></author>
       <author><first>Ling</first><last>Dong</last><variant script="hani"><first>凌</first><last>董</last></variant></author>
       <author><first>Wenjun</first><last>Wang</last><variant script="hani"><first>文君</first><last>王</last></variant></author>
-      <author id="jian-wang"><first>Jian</first><last>Wang</last><variant script="hani"><first>剑</first><last>王</last></variant></author>
+      <author><first>Jian</first><last>Wang</last><variant script="hani"><first>剑</first><last>王</last></variant></author>
       <author><first>Shengxiang</first><last>Gao</last><variant script="hani"><first>盛祥</first><last>高</last></variant></author>
       <author><first>Zhengtao</first><last>Yu</last><variant script="hani"><first>正涛</first><last>余</last></variant></author>
       <pages>293–304</pages>
@@ -456,7 +456,7 @@
     </paper>
     <paper id="38">
       <title><fixed-case>C</fixed-case>ore<fixed-case>V</fixed-case>alue:面向价值观计算的中文核心价值-行为体系及知识库(<fixed-case>C</fixed-case>ore<fixed-case>V</fixed-case>alue: <fixed-case>C</fixed-case>hinese Core Value-Behavior Frame and Knowledge Base for Value Computing)</title>
-      <author><first>Pengyuan</first><last>Liu</last><variant script="hani"><first>鹏远</first><last>刘</last></variant></author>
+      <author id="pengyuan-liu"><first>Pengyuan</first><last>Liu</last><variant script="hani"><first>鹏远</first><last>刘</last></variant></author>
       <author><first>Sanle</first><last>Zhang</last><variant script="hani"><first>三乐</first><last>张</last></variant></author>
       <author><first>Dong</first><last>Yu</last><variant script="hani"><first>东</first><last>于</last></variant></author>
       <author><first>Lin</first><last>Bo</last><variant script="hani"><first>琳</first><last>薄</last></variant></author>
@@ -496,7 +496,7 @@
       <author><first>Yixuan</first><last>Ma</last><variant script="hani"><first>翊轩</first><last>马</last></variant></author>
       <author><first>Wenrui</first><last>Wang</last><variant script="hani"><first>文瑞</first><last>王</last></variant></author>
       <author><first>Yuzhe</first><last>Liu</last><variant script="hani"><first>宇哲</first><last>刘</last></variant></author>
-      <author><first>Muyun</first><last>Yang</last><variant script="hani"><first>沐昀</first><last>杨</last></variant></author>
+      <author id="muyun-yang"><first>Muyun</first><last>Yang</last><variant script="hani"><first>沐昀</first><last>杨</last></variant></author>
       <pages>455–463</pages>
       <abstract>“专利文献是一种重要的技术文献,是知识产权强国的重要工作内容。目前专利语料库多集中于信息检索、机器翻译以及文本文分类等领域,尚缺乏更细粒度的标注,不足以支持问答、阅读理解等新形态的人工智能技术研发。本文面向专利智能分析的需要,提出了从解决问题、技术手段、效果三个角度对发明专利进行专利标注,并最终构建了包含313篇的中文专利关键信息语料库。利用命名实体识别技术对语料库关键信息进行识别和验证,表明专利关键信息的识别是不同于领域命名实体识别的更大粒度的信息抽取难题。”</abstract>
       <url hash="6ecfa5a2">2022.ccl-1.41</url>
@@ -561,7 +561,7 @@
       <author><first>Jishun</first><last>Zhao</last><variant script="hani"><first>继舜</first><last>赵</last></variant></author>
       <author><first>Shucheng</first><last>Zhu</last><variant script="hani"><first>述承</first><last>朱</last></variant></author>
       <author><first>Ying</first><last>Liu</last><variant script="hani"><first>颖</first><last>刘</last></variant></author>
-      <author><first>Pengyuan</first><last>Liu</last><variant script="hani"><first>鹏远</first><last>刘</last></variant></author>
+      <author id="pengyuan-liu"><first>Pengyuan</first><last>Liu</last><variant script="hani"><first>鹏远</first><last>刘</last></variant></author>
       <pages>510–522</pages>
       <abstract>“尽管悲观者认为,职场中永远不可能存在性别平等。但随着人们观念的转变,愈来愈多的人们相信,职业的选择应只与个人能力相匹配,而不应由个体的性别决定。目前已经发现自然语言处理的各个任务中都存在着职业性别偏见。但这些研究往往只针对特定的英文任务,缺乏针对中文的、综合多任务的职业性别偏见测量研究。本文基于霍兰德职业模型,从中文自然语言处理中常见的三个任务出发,测量了词向量、共指消解和文本生成中的职业性别偏见,发现不同任务中的职业性别偏见既有一定的共性,又存在着独特的差异性。总体来看,不同任务中的职业性别偏见反映了现实生活中人们对于不同性别所选择职业的刻板印象。此外,在设计不同任务的偏见测量指标时,还需要考虑如语体、词序等语言学要素的影响。”</abstract>
       <url hash="7aba0a8e">2022.ccl-1.46</url>
diff --git a/data/xml/2022.cl.xml b/data/xml/2022.cl.xml
index f1446e73f0..ce91a350fd 100644
--- a/data/xml/2022.cl.xml
+++ b/data/xml/2022.cl.xml
@@ -13,7 +13,7 @@
     </meta>
     <paper id="1">
       <title>Obituary: <fixed-case>M</fixed-case>artin Kay</title>
-      <author><first>Ronald M.</first><last>Kaplan</last></author>
+      <author id="ronald-m-kaplan"><first>Ronald M.</first><last>Kaplan</last></author>
       <author><first>Hans</first><last>Uszkoreit</last></author>
       <doi>10.1162/coli_a_00424</doi>
       <pages>1–3</pages>
@@ -22,7 +22,7 @@
     </paper>
     <paper id="2">
       <title>To Augment or Not to Augment? A Comparative Study on Text Augmentation Techniques for Low-Resource <fixed-case>NLP</fixed-case></title>
-      <author><first>Gözde Gül</first><last>Şahin</last></author>
+      <author id="gozde-gul-sahin"><first>Gözde Gül</first><last>Şahin</last></author>
       <doi>10.1162/coli_a_00425</doi>
       <abstract>Data-hungry deep neural networks have established themselves as the de facto standard for many NLP tasks, including the traditional sequence tagging ones. Despite their state-of-the-art performance on high-resource languages, they still fall behind their statistical counterparts in low-resource scenarios. One methodology to counterattack this problem is text augmentation, that is, generating new synthetic training data points from existing data. Although NLP has recently witnessed several new textual augmentation techniques, the field still lacks a systematic performance analysis on a diverse set of languages and sequence tagging tasks. To fill this gap, we investigate three categories of text augmentation methodologies that perform changes on the syntax (e.g., cropping sub-sentences), token (e.g., random word insertion), and character (e.g., character swapping) levels. We systematically compare the methods on part-of-speech tagging, dependency parsing, and semantic role labeling for a diverse set of language families using various models, including the architectures that rely on pretrained multilingual contextualized language models such as mBERT. Augmentation most significantly improves dependency parsing, followed by part-of-speech tagging and semantic role labeling. We find the experimented techniques to be effective on morphologically rich languages in general rather than analytic languages such as Vietnamese. Our results suggest that the augmentation techniques can further improve over strong baselines based on mBERT, especially for dependency parsing. We identify the character-level methods as the most consistent performers, while synonym replacement and syntactic augmenters provide inconsistent improvements. Finally, we discuss that the results most heavily depend on the task, language pair (e.g., syntactic-level techniques mostly benefit higher-level tasks and morphologically richer languages), and model type (e.g., token-level augmentation provides significant improvements for BPE, while character-level ones give generally higher scores for char and mBERT based models).</abstract>
       <pages>5–42</pages>
@@ -35,7 +35,7 @@
       <author><first>Tanik</first><last>Saikh</last></author>
       <author><first>Tameesh</first><last>Biswas</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <doi>10.1162/coli_a_00429</doi>
       <abstract>The quest for new information is an inborn human trait and has always been quintessential for human survival and progress. Novelty drives curiosity, which in turn drives innovation. In Natural Language Processing (NLP), Novelty Detection refers to finding text that has some new information to offer with respect to whatever is earlier seen or known. With the exponential growth of information all across the Web, there is an accompanying menace of redundancy. A considerable portion of the Web contents are duplicates, and we need efficient mechanisms to retain new information and filter out redundant information. However, detecting redundancy at the semantic level and identifying novel text is not straightforward because the text may have less lexical overlap yet convey the same information. On top of that, non-novel/redundant information in a document may have assimilated from multiple source documents, not just one. The problem surmounts when the subject of the discourse is documents, and numerous prior documents need to be processed to ascertain the novelty/non-novelty of the current one in concern. In this work, we build upon our earlier investigations for document-level novelty detection and present a comprehensive account of our efforts toward the problem. We explore the role of pre-trained Textual Entailment (TE) models to deal with multiple source contexts and present the outcome of our current investigations. We argue that a multipremise entailment task is one close approximation toward identifying semantic-level non-novelty. Our recent approach either performs comparably or achieves significant improvement over the latest reported results on several datasets and across several related tasks (paraphrasing, plagiarism, rewrite). We critically analyze our performance with respect to the existing state of the art and show the superiority and promise of our approach for future investigations. We also present our enhanced dataset TAP-DLND 2.0 and several baselines to the community for further research on document-level novelty detection.</abstract>
       <pages>77–117</pages>
@@ -75,7 +75,7 @@
       <author><first>Zhijing</first><last>Jin</last></author>
       <author><first>Zhiting</first><last>Hu</last></author>
       <author><first>Olga</first><last>Vechtomova</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <doi>10.1162/coli_a_00426</doi>
       <abstract>Text style transfer is an important task in natural language generation, which aims to control certain attributes in the generated text, such as politeness, emotion, humor, and many others. It has a long history in the field of natural language processing, and recently has re-gained significant attention thanks to the promising performance brought by deep neural models. In this article, we present a systematic survey of the research on neural text style transfer, spanning over 100 representative articles since the first neural text style transfer work in 2017. We discuss the task formulation, existing datasets and subtasks, evaluation, as well as the rich methodologies in the presence of parallel and non-parallel data. We also provide discussions on a variety of important topics regarding the future development of this task.1</abstract>
       <pages>155–205</pages>
@@ -94,7 +94,7 @@
     <paper id="8">
       <title>Revisiting the Boundary between <fixed-case>ASR</fixed-case> and <fixed-case>NLU</fixed-case> in the Age of Conversational Dialog Systems</title>
       <author><first>Manaal</first><last>Faruqui</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tür</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tür</last></author>
       <doi>10.1162/coli_a_00430</doi>
       <abstract>As more users across the world are interacting with dialog agents in their daily life, there is a need for better speech understanding that calls for renewed attention to the dynamics between research in automatic speech recognition (ASR) and natural language understanding (NLU). We briefly review these research areas and lay out the current relationship between them. In light of the observations we make in this article, we argue that (1) NLU should be cognizant of the presence of ASR models being used upstream in a dialog system’s pipeline, (2) ASR should be able to learn from errors found in NLU, (3) there is a need for end-to-end data sets that provide semantic annotations on spoken input, (4) there should be stronger collaboration between ASR and NLU research communities.</abstract>
       <pages>221–232</pages>
@@ -112,7 +112,7 @@
     <paper id="10">
       <title>Erratum for “Formal Basis of a Language Universal”</title>
       <author><first>Miloš</first><last>Stanojević</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <doi>10.1162/coli_x_00432</doi>
       <pages>237–237</pages>
       <url hash="6fd084dd">2022.cl-1.10</url>
@@ -132,7 +132,7 @@
     </meta>
     <paper id="1">
       <title>Ethics Sheet for Automatic Emotion Recognition and Sentiment Analysis</title>
-      <author><first>Saif M.</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif M.</first><last>Mohammad</last></author>
       <doi>10.1162/coli_a_00433</doi>
       <abstract>The importance and pervasiveness of emotions in our lives makes affective computing a tremendously important and vibrant line of work. Systems for automatic emotion recognition (AER) and sentiment analysis can be facilitators of enormous progress (e.g., in improving public health and commerce) but also enablers of great harm (e.g., for suppressing dissidents and manipulating voters). Thus, it is imperative that the affective computing community actively engage with the ethical ramifications of their creations. In this article, I have synthesized and organized information from AI Ethics and Emotion Recognition literature to present fifty ethical considerations relevant to AER. Notably, this ethics sheet fleshes out assumptions hidden in how AER is commonly framed, and in the choices often made regarding the data, method, and evaluation. Special attention is paid to the implications of AER on privacy and social groups. Along the way, key recommendations are made for responsible AER. The objective of the ethics sheet is to facilitate and encourage more thoughtfulness on why to automate, how to automate, and how to judge success well before the building of AER systems. Additionally, the ethics sheet acts as a useful introductory document on emotion recognition (complementing survey articles).</abstract>
       <pages>239–278</pages>
@@ -144,7 +144,7 @@
       <title>Domain Adaptation with Pre-trained Transformers for Query-Focused Abstractive Text Summarization</title>
       <author><first>Md Tahmid Rahman</first><last>Laskar</last></author>
       <author><first>Enamul</first><last>Hoque</last></author>
-      <author><first>Jimmy Xiangji</first><last>Huang</last></author>
+      <author id="xiangji-huang"><first>Jimmy Xiangji</first><last>Huang</last></author>
       <doi>10.1162/coli_a_00434</doi>
       <abstract>The Query-Focused Text Summarization (QFTS) task aims at building systems that generate the summary of the text document(s) based on the given query. A key challenge in addressing this task is the lack of large labeled data for training the summarization model. In this article, we address this challenge by exploring a series of domain adaptation techniques. Given the recent success of pre-trained transformer models in a wide range of natural language processing tasks, we utilize such models to generate abstractive summaries for the QFTS task for both single-document and multi-document scenarios. For domain adaptation, we apply a variety of techniques using pre-trained transformer-based summarization models including transfer learning, weakly supervised learning, and distant supervision. Extensive experiments on six datasets show that our proposed approach is very effective in generating abstractive summaries for the QFTS task while setting a new state-of-the-art result in several datasets across a set of automatic and human evaluation metrics.</abstract>
       <pages>279–320</pages>
@@ -193,7 +193,7 @@
     </paper>
     <paper id="6">
       <title>Dual Attention Model for Citation Recommendation with Analyses on Explainability of Attention Mechanisms and Qualitative Experiments</title>
-      <author id="yang-zhang"><first>Yang</first><last>Zhang</last></author>
+      <author><first>Yang</first><last>Zhang</last></author>
       <author><first>Qiang</first><last>Ma</last></author>
       <doi>10.1162/coli_a_00438</doi>
       <abstract>Based on an exponentially increasing number of academic articles, discovering and citing comprehensive and appropriate resources have become non-trivial tasks. Conventional citation recommendation methods suffer from severe information losses. For example, they do not consider the section header of the paper that the author is writing and for which they need to find a citation, the relatedness between the words in the local context (the text span that describes a citation), or the importance of each word from the local context. These shortcomings make such methods insufficient for recommending adequate citations to academic manuscripts. In this study, we propose a novel embedding-based neural network called dual attention model for citation recommendation (DACR) to recommend citations during manuscript preparation. Our method adapts the embedding of three semantic pieces of information: words in the local context, structural contexts,1 and the section on which the author is working. A neural network model is designed to maximize the similarity between the embedding of the three inputs (local context words, section headers, and structural contexts) and the target citation appearing in the context. The core of the neural network model comprises self-attention and additive attention; the former aims to capture the relatedness between the contextual words and structural context, and the latter aims to learn their importance. Recommendation experiments on real-world datasets demonstrate the effectiveness of the proposed approach. To seek explainability on DACR, particularly the two attention mechanisms, the learned weights from them are investigated to determine how the attention mechanisms interpret “relatedness” and “importance” through the learned weights. In addition, qualitative analyses were conducted to testify that DACR could find necessary citations that were not noticed by the authors in the past due to the limitations of the keyword-based searching.</abstract>
@@ -212,7 +212,7 @@
     </paper>
     <paper id="8">
       <title>Boring Problems Are Sometimes the Most Interesting</title>
-      <author><first>Richard</first><last>Sproat</last></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last></author>
       <doi>10.1162/coli_a_00439</doi>
       <abstract>In a recent position paper, Turing Award Winners Yoshua Bengio, Geoffrey Hinton, and Yann LeCun make the case that symbolic methods are not needed in AI and that, while there are still many issues to be resolved, AI will be solved using purely neural methods. In this piece I issue a challenge: Demonstrate that a purely neural approach to the problem of text normalization is possible. Various groups have tried, but so far nobody has eliminated the problem of unrecoverable errors, errors where, due to insufficient training data or faulty generalization, the system substitutes some other reading for the correct one. Solutions have been proposed that involve a marriage of traditional finite-state methods with neural models, but thus far nobody has shown that the problem can be solved using neural methods alone. Though text normalization is hardly an “exciting” problem, I argue that until one can solve “boring” problems like that using purely AI methods, one cannot claim that AI is a success.</abstract>
       <pages>483–490</pages>
@@ -257,7 +257,7 @@
       <author><first>Ahmet</first><last>Üstün</last></author>
       <author><first>Arianna</first><last>Bisazza</last></author>
       <author><first>Gosse</first><last>Bouma</last></author>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <doi>10.1162/coli_a_00443</doi>
       <abstract>Recent advances in multilingual language modeling have brought the idea of a truly universal parser closer to reality. However, such models are still not immune to the “curse of multilinguality”: Cross-language interference and restrained model capacity remain major obstacles. To address this, we propose a novel language adaptation approach by introducing contextual language adapters to a multilingual parser. Contextual language adapters make it possible to learn adapters via language embeddings while sharing model parameters across languages based on contextual parameter generation. Moreover, our method allows for an easy but effective integration of existing linguistic typology features into the parsing model. Because not all typological features are available for every language, we further combine typological feature prediction with parsing in a multi-task model that achieves very competitive parsing performance without the need for an external prediction system for missing features. The resulting parser, UDapter, can be used for dependency parsing as well as sequence labeling tasks such as POS tagging, morphological tagging, and NER. In dependency parsing, it outperforms strong monolingual and multilingual baselines on the majority of both high-resource and low-resource (zero-shot) languages, showing the success of the proposed adaptation approach. In sequence labeling tasks, our parser surpasses the baseline on high resource languages, and performs very competitively in a zero-shot setting. Our in-depth analyses show that adapter generation via typological features of languages is key to this success.1</abstract>
       <pages>555–592</pages>
@@ -291,8 +291,8 @@
       <title>Survey of Low-Resource Machine Translation</title>
       <author><first>Barry</first><last>Haddow</last></author>
       <author><first>Rachel</first><last>Bawden</last></author>
-      <author><first>Antonio Valerio</first><last>Miceli Barone</last></author>
-      <author><first>Jindřich</first><last>Helcl</last></author>
+      <author id="antonio-valerio-miceli-barone"><first>Antonio Valerio</first><last>Miceli Barone</last></author>
+      <author id="jindrich-helcl"><first>Jindřich</first><last>Helcl</last></author>
       <author><first>Alexandra</first><last>Birch</last></author>
       <doi>10.1162/coli_a_00446</doi>
       <abstract>We present a survey covering the state of the art in low-resource machine translation (MT) research. There are currently around 7,000 languages spoken in the world and almost all language pairs lack significant resources for training machine translation models. There has been increasing interest in research addressing the challenge of producing useful translation models when very little translated training data is available. We present a summary of this topical research field and provide a description of the techniques evaluated by researchers in several recent shared tasks in low-resource MT.</abstract>
@@ -304,7 +304,7 @@
       <title>Position Information in Transformers: An Overview</title>
       <author><first>Philipp</first><last>Dufter</last></author>
       <author><first>Martin</first><last>Schmitt</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <doi>10.1162/coli_a_00445</doi>
       <abstract>Transformers are arguably the main workhorse in recent natural language processing research. By definition, a Transformer is invariant with respect to reordering of the input. However, language is inherently sequential and word order is essential to the semantics and syntax of an utterance. In this article, we provide an overview and theoretical comparison of existing methods to incorporate position information into Transformer models. The objectives of this survey are to (1) showcase that position information in Transformer is a vibrant and extensive research area; (2) enable the reader to compare existing methods by providing a unified notation and systematization of different approaches along important model dimensions; (3) indicate what characteristics of an application should be taken into account when selecting a position encoding; and (4) provide stimuli for future research.</abstract>
       <pages>733–763</pages>
@@ -325,7 +325,7 @@
     </meta>
     <paper id="9">
       <title>Martha Palmer and Barbara Di Eugenio Interview Martha Evens</title>
-      <author><first>Martha</first><last>Evens</last></author>
+      <author id="martha-evens"><first>Martha</first><last>Evens</last></author>
       <doi>10.1162/coli_a_00453</doi>
       <pages>765–773</pages>
       <url hash="a37c6ee9">2022.cl-4.9</url>
@@ -333,7 +333,7 @@
     </paper>
     <paper id="10">
       <title>Martha Evens, Brief Autobiography</title>
-      <author><first>Martha</first><last>Evens</last></author>
+      <author id="martha-evens"><first>Martha</first><last>Evens</last></author>
       <doi>10.1162/coli_a_00452</doi>
       <pages>775–782</pages>
       <url hash="bdfc9be7">2022.cl-4.10</url>
@@ -393,10 +393,10 @@
     </paper>
     <paper id="15">
       <title>Information Theory–based Compositional Distributional Semantics</title>
-      <author><first>Enrique</first><last>Amigó</last></author>
+      <author id="enrique-amigo"><first>Enrique</first><last>Amigó</last></author>
       <author><first>Alejandro</first><last>Ariza-Casabona</last></author>
       <author><first>Victor</first><last>Fresno</last></author>
-      <author><first>M. Antònia</first><last>Martí</last></author>
+      <author id="m-antonia-marti"><first>M. Antònia</first><last>Martí</last></author>
       <doi>10.1162/coli_a_00454</doi>
       <abstract>In the context of text representation, Compositional Distributional Semantics models aim to fuse the Distributional Hypothesis and the Principle of Compositionality. Text embedding is based on co-ocurrence distributions and the representations are in turn combined by compositional functions taking into account the text structure. However, the theoretical basis of compositional functions is still an open issue. In this article we define and study the notion of Information Theory–based Compositional Distributional Semantics (ICDS): (i) We first establish formal properties for embedding, composition, and similarity functions based on Shannon’s Information Theory; (ii) we analyze the existing approaches under this prism, checking whether or not they comply with the established desirable properties; (iii) we propose two parameterizable composition and similarity functions that generalize traditional approaches while fulfilling the formal properties; and finally (iv) we perform an empirical study on several textual similarity datasets that include sentences with a high and low lexical overlap, and on the similarity between words and their description. Our theoretical analysis and empirical results show that fulfilling formal properties affects positively the accuracy of text representation models in terms of correspondence (isometry) between the embedding and meaning spaces.</abstract>
       <pages>907–948</pages>
@@ -443,7 +443,7 @@
       <title>The Text Anonymization Benchmark (<fixed-case>TAB</fixed-case>): A Dedicated Corpus and Evaluation Framework for Text Anonymization</title>
       <author><first>Ildikó</first><last>Pilán</last></author>
       <author><first>Pierre</first><last>Lison</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <author><first>Anthi</first><last>Papadopoulou</last></author>
       <author><first>David</first><last>Sánchez</last></author>
       <author><first>Montserrat</first><last>Batet</last></author>
@@ -468,7 +468,7 @@
     </paper>
     <paper id="21">
       <title>A Metrological Perspective on Reproducibility in <fixed-case>NLP</fixed-case>*</title>
-      <author><first>Anya</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anya</first><last>Belz</last></author>
       <doi>10.1162/coli_a_00448</doi>
       <abstract>Reproducibility has become an increasingly debated topic in NLP and ML over recent years, but so far, no commonly accepted definitions of even basic terms or concepts have emerged. The range of different definitions proposed within NLP/ML not only do not agree with each other, they are also not aligned with standard scientific definitions. This article examines the standard definitions of repeatability and reproducibility provided by the meta-science of metrology, and explores what they imply in terms of how to assess reproducibility, and what adopting them would mean for reproducibility assessment in NLP/ML. It turns out the standard definitions lead directly to a method for assessing reproducibility in quantified terms that renders results from reproduction studies comparable across multiple reproductions of the same original study, as well as reproductions of different original studies. The article considers where this method sits in relation to other aspects of NLP work one might wish to assess in the context of reproducibility.</abstract>
       <pages>1125–1135</pages>
diff --git a/data/xml/2022.clasp.xml b/data/xml/2022.clasp.xml
index 479a220659..1f3c1b13fe 100644
--- a/data/xml/2022.clasp.xml
+++ b/data/xml/2022.clasp.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the 2022 CLASP Conference on (Dis)embodiment</booktitle>
       <editor><first>Simon</first><last>Dobnik</last></editor>
       <editor><first>Julian</first><last>Grove</last></editor>
-      <editor><first>Asad</first><last>Sayeed</last></editor>
+      <editor id="asad-sayeed"><first>Asad</first><last>Sayeed</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Gothenburg, Sweden</address>
       <month>September</month>
@@ -21,7 +21,7 @@
       <title>A Small but Informed and Diverse Model: The Case of the Multimodal <fixed-case>G</fixed-case>uess<fixed-case>W</fixed-case>hat!? Guessing Game</title>
       <author><first>Claudio</first><last>Greco</last></author>
       <author><first>Alberto</first><last>Testoni</last></author>
-      <author><first>Raffaella</first><last>Bernardi</last></author>
+      <author id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last></author>
       <author><first>Stella</first><last>Frank</last></author>
       <pages>1–10</pages>
       <abstract>Pre-trained Vision and Language Transformers achieve high performance on downstream tasks due to their ability to transfer representational knowledge accumulated during pretraining on substantial amounts of data. In this paper, we ask whether it is possible to compete with such models using features based on transferred (pre-trained, frozen) representations combined with a lightweight architecture. We take a multimodal guessing task as our testbed, GuessWhat?!. An ensemble of our lightweight model matches the performance of the finetuned pre-trained transformer (LXMERT). An uncertainty analysis of our ensemble shows that the lightweight transferred representations close the data uncertainty gap with LXMERT, while retaining model diversity leading to ensemble boost. We further demonstrate that LXMERT’s performance gain is due solely to its extra V&amp;L pretraining rather than because of architectural improvements. These results argue for flexible integration of multiple features and lightweight models as a viable alternative to large, cumbersome, pre-trained models.</abstract>
@@ -75,7 +75,7 @@
     <paper id="6">
       <title>Embodied Interaction in Mental Health Consultations: Some Observations on Grounding and Repair</title>
       <author><first>Jing Hui</first><last>Law</last></author>
-      <author><first>Patrick</first><last>Healey</last></author>
+      <author id="patrick-healey"><first>Patrick</first><last>Healey</last></author>
       <author><first>Rosella</first><last>Galindo Esparza</last></author>
       <pages>51–61</pages>
       <abstract>Shared physical space is an important resource for face-to-face interaction. People use the position and orientation of their bodies—relative to each other and relative to the physical environment—to determine who is part of a conversation, to manage conversational roles (e.g. speaker, addressee, side-participant) and to help co-ordinate turn-taking. These embodied uses of shared space also extend to more fine-grained aspects of interaction, such as gestures and body movements, to support topic management, orchestration of turns and grounding. This paper explores the role of embodied resources in (mis)communication in a corpus of mental health consultations. We illustrate some of the specific ways in which clinicians and patients can exploit embodiment and the position of objects in shared space to diagnose and manage moments of misunderstanding.</abstract>
diff --git a/data/xml/2022.clib.xml b/data/xml/2022.clib.xml
index 9571d71ba9..6eb8cd1680 100644
--- a/data/xml/2022.clib.xml
+++ b/data/xml/2022.clib.xml
@@ -16,7 +16,7 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>O</fixed-case>nto<fixed-case>P</fixed-case>opulis, a System for Learning Semantic Classes</title>
-      <author><first>Hristo</first><last>Tanev</last></author>
+      <author id="hristo-tanev"><first>Hristo</first><last>Tanev</last></author>
       <pages>8–12</pages>
       <abstract>Ontopopulis is a multilingual weakly supervised terminology learning algorithm which takes on its input a set of seed terms for a semantic category and an unannotated text corpus. The algorithm learns additional terms, which belong to this category. For example, for the category “environmental disasters” the input seed set in English is environmental disaster, water pollution, climate change. Among the highest ranked new terms which the system learns for this semantic class are deforestation, global warming and so on.</abstract>
       <url hash="3c9aef12">2022.clib-1.1</url>
@@ -124,7 +124,7 @@
       <author><first>Andrei-Marius</first><last>Avram</last></author>
       <author><first>Vasile</first><last>Păis</last></author>
       <author><first>Maria</first><last>Mitrofan</last></author>
-      <author><first>Verginica Barbu</first><last>Mititelu</last></author>
+      <author id="verginica-barbu-mititelu"><first>Verginica Barbu</first><last>Mititelu</last></author>
       <author><first>Elena</first><last>Irimia</last></author>
       <author><first>Valentin</first><last>Badea</last></author>
       <pages>105–112</pages>
@@ -152,7 +152,7 @@
     <paper id="15">
       <title>Sense-Annotated Corpus for <fixed-case>R</fixed-case>ussian</title>
       <author><first>Alexander</first><last>Kirillovich</last></author>
-      <author><first>Natalia</first><last>Loukachevitch</last></author>
+      <author id="natalia-loukachevitch"><first>Natalia</first><last>Loukachevitch</last></author>
       <author><first>Maksim</first><last>Kulaev</last></author>
       <author><first>Angelina</first><last>Bolshina</last></author>
       <author><first>Dmitry</first><last>Ilvovsky</last></author>
@@ -163,7 +163,7 @@
     </paper>
     <paper id="16">
       <title>A <fixed-case>R</fixed-case>omanian Treebank Annotated with Verbal Multiword Expressions</title>
-      <author><first>Verginica</first><last>Barbu Mititelu</last></author>
+      <author id="verginica-barbu-mititelu"><first>Verginica</first><last>Barbu Mititelu</last></author>
       <author><first>Mihaela</first><last>Cristescu</last></author>
       <author><first>Maria</first><last>Mitrofan</last></author>
       <author><first>Bianca-Mădălina</first><last>Zgreabăn</last></author>
@@ -194,7 +194,7 @@
     </paper>
     <paper id="19">
       <title>Language rehabilitation of people with <fixed-case>BROCA</fixed-case> aphasia using deep neural machine translation</title>
-      <author><first>Kamel</first><last>Smaili</last></author>
+      <author id="kamel-smaili"><first>Kamel</first><last>Smaili</last></author>
       <author><first>David</first><last>Langlois</last></author>
       <author><first>Peter</first><last>Pribil</last></author>
       <pages>162–170</pages>
diff --git a/data/xml/2022.clinicalnlp.xml b/data/xml/2022.clinicalnlp.xml
index f092193ce4..c900c58e58 100644
--- a/data/xml/2022.clinicalnlp.xml
+++ b/data/xml/2022.clinicalnlp.xml
@@ -126,7 +126,7 @@
       <author><first>Isabelle Rose</first><last>Alberto</last></author>
       <author><first>Pia Gabrielle</first><last>Alfonso</last></author>
       <author><first>Dana</first><last>Moukheiber</last></author>
-      <author><first>Byron</first><last>Wallace</last></author>
+      <author id="byron-c-wallace"><first>Byron</first><last>Wallace</last></author>
       <author><first>Anna</first><last>Rumshisky</last></author>
       <author><first>Jennifer</first><last>Liang</last></author>
       <author><first>Preethi</first><last>Raghavan</last></author>
@@ -167,7 +167,7 @@
     <paper id="11">
       <title>Ensemble-based Fine-Tuning Strategy for Temporal Relation Extraction from the Clinical Narrative</title>
       <author><first>Lijing</first><last>Wang</last></author>
-      <author><first>Timothy</first><last>Miller</last></author>
+      <author id="timothy-miller"><first>Timothy</first><last>Miller</last></author>
       <author><first>Steven</first><last>Bethard</last></author>
       <author><first>Guergana</first><last>Savova</last></author>
       <pages>103-108</pages>
@@ -180,7 +180,7 @@
       <title>Exploring Text Representations for Generative Temporal Relation Extraction</title>
       <author><first>Dmitriy</first><last>Dligach</last></author>
       <author><first>Steven</first><last>Bethard</last></author>
-      <author><first>Timothy</first><last>Miller</last></author>
+      <author id="timothy-miller"><first>Timothy</first><last>Miller</last></author>
       <author><first>Guergana</first><last>Savova</last></author>
       <pages>109-113</pages>
       <abstract>Sequence-to-sequence models are appealing because they allow both encoder and decoder to be shared across many tasks by formulating those tasks as text-to-text problems. Despite recently reported successes of such models, we find that engineering input/output representations for such text-to-text models is challenging. On the Clinical TempEval 2016 relation extraction task, the most natural choice of output representations, where relations are spelled out in simple predicate logic statements, did not lead to good performance. We explore a variety of input/output representations, with the most successful prompting one event at a time, and achieving results competitive with standard pairwise temporal relation extraction systems.</abstract>
diff --git a/data/xml/2022.clpsych.xml b/data/xml/2022.clpsych.xml
index ac5b5c6b4d..c1baafbd69 100644
--- a/data/xml/2022.clpsych.xml
+++ b/data/xml/2022.clpsych.xml
@@ -8,7 +8,7 @@
       <editor><first>Maria</first><last>Liakata</last></editor>
       <editor><first>Steven</first><last>Bedrick</last></editor>
       <editor><first>Bart</first><last>Desmet</last></editor>
-      <editor><first>Molly</first><last>Ireland</last></editor>
+      <editor id="molly-ireland"><first>Molly</first><last>Ireland</last></editor>
       <editor><first>Andrew</first><last>Lee</last></editor>
       <editor><first>Sean</first><last>MacAvaney</last></editor>
       <editor><first>Matthew</first><last>Purver</last></editor>
@@ -79,9 +79,9 @@
       <author><first>James</first><last>Fiumara</last></author>
       <author><first>Juhi</first><last>Pandey</last></author>
       <author><first>Christopher</first><last>Chatham</last></author>
-      <author><first>Christopher</first><last>Cieri</last></author>
-      <author><first>Robert</first><last>Schultz</last></author>
-      <author><first>Mark</first><last>Liberman</last></author>
+      <author id="christopher-cieri"><first>Christopher</first><last>Cieri</last></author>
+      <author id="robert-t-schultz"><first>Robert</first><last>Schultz</last></author>
+      <author id="mark-liberman"><first>Mark</first><last>Liberman</last></author>
       <author><first>Julia</first><last>Parish-morris</last></author>
       <pages>40-46</pages>
       <abstract>This study examined differences in linguistic features produced by autistic and neurotypical (NT) children during brief picture descriptions, and assessed feature stability over time. Weekly speech samples from well-characterized participants were collected using a telephony system designed to improve access for geographically isolated and historically marginalized communities. Results showed stable group differences in certain acoustic features, some of which may potentially serve as key outcome measures in future treatment studies. These results highlight the importance of eliciting semi-structured speech samples in a variety of contexts over time, and adds to a growing body of research showing that fine-grained naturalistic communication features hold promise for intervention research.</abstract>
@@ -148,7 +148,7 @@
       <author><first>Michael</first><last>Pullmann</last></author>
       <author><first>Thomas</first><last>Hull</last></author>
       <author><first>Patricia</first><last>Areán</last></author>
-      <author><first>Trevor</first><last>Cohen</last></author>
+      <author id="trevor-cohen"><first>Trevor</first><last>Cohen</last></author>
       <pages>105-115</pages>
       <abstract>The increasing adoption of message-based behavioral therapy enables new approaches to assessing mental health using linguistic analysis of patient-generated text. Word counting approaches have demonstrated utility for linguistic feature extraction, but deep learning methods hold additional promise given recent advances in this area. We evaluated the utility of emotion features extracted using a BERT-based model in comparison to emotions extracted using word counts as predictors of symptom severity in a large set of messages from text-based therapy sessions involving over 6,500 unique patients, accompanied by data from repeatedly administered symptom scale measurements. BERT-based emotion features explained more variance in regression models of symptom severity, and improved predictive modeling of scale-derived diagnostic categories. However, LIWC categories that are not directly related to emotions provided valuable and complementary information for modeling of symptom severity, indicating a role for both approaches in inferring the mental states underlying patient-generated language.</abstract>
       <url hash="aaf5cc20">2022.clpsych-1.9</url>
@@ -175,7 +175,7 @@
       <author><first>Justin</first><last>Tauscher</last></author>
       <author><first>Xiruo</first><last>Ding</last></author>
       <author><first>Dror</first><last>Ben-zeev</last></author>
-      <author><first>Trevor</first><last>Cohen</last></author>
+      <author id="trevor-cohen"><first>Trevor</first><last>Cohen</last></author>
       <pages>126-136</pages>
       <abstract>There is growing evidence that mobile text message exchanges between patients and therapists can augment traditional cognitive behavioral therapy. The automatic characterization of patient thinking patterns in this asynchronous text communication may guide treatment and assist in therapist training. In this work, we automatically identify distorted thinking in text-based patient-therapist exchanges, investigating the role of conversation history (context) in distortion prediction. We identify six unique types of cognitive distortions and utilize BERT-based architectures to represent text messages within the context of the conversation. We propose two approaches for leveraging dynamic conversation context in model training. By representing the text messages within the context of the broader patient-therapist conversation, the models better emulate the therapist’s task of recognizing distorted thoughts. This multi-turn classification approach also leverages the clustering of distorted thinking in the conversation timeline. We demonstrate that including conversation context, including the proposed dynamic context methods, improves distortion prediction performance. The proposed architectures and conversation encoding approaches achieve performance comparable to inter-rater agreement. The presence of any distorted thinking is identified with relatively high performance at 0.73 F1, significantly outperforming the best context-agnostic models (0.68 F1).</abstract>
       <url hash="f85a97db">2022.clpsych-1.11</url>
@@ -231,7 +231,7 @@
       <author><first>Salvatore</first><last>Giorgi</last></author>
       <author><first>Mckenzie</first><last>Himelein-wachowiak</last></author>
       <author><first>Daniel</first><last>Habib</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <author><first>Brenda</first><last>Curtis</last></author>
       <pages>177-183</pages>
       <url hash="392fb8fe">2022.clpsych-1.15</url>
@@ -268,8 +268,8 @@
       <author><first>Alicia</first><last>Perez</last></author>
       <author><first>Lourdes</first><last>Araujo</last></author>
       <author><first>Nuria</first><last>Lebea</last></author>
-      <author><first>Maite</first><last>Oronoz</last></author>
-      <author><first>Arantza</first><last>Casillas</last></author>
+      <author id="maite-oronoz"><first>Maite</first><last>Oronoz</last></author>
+      <author id="arantza-casillas"><first>Arantza</first><last>Casillas</last></author>
       <pages>199-204</pages>
       <abstract>This paper describes the participation of our group on the CLPsych 2022 shared task. For task A, which tries to capture changes in mood over time, we have applied an Approximate Nearest Neighbour (ANN) extraction technique with the aim of relabelling the user messages according to their proximity, based on the representation of these messages in a vector space. Regarding the subtask B, we have used the output of the subtask A to train a Recurrent Neural Network (RNN) to predict the risk of suicide at the user level. The results obtained are very competitive considering that our team was one of the few that made use of the organisers’ proposed virtual environment and also made use of the Task A output to predict the Task B results.</abstract>
       <url hash="ffc57143">2022.clpsych-1.17</url>
@@ -328,7 +328,7 @@
       <author><first>Prasadith</first><last>Kirinde Gamaarachchige</last></author>
       <author><first>Ahmed</first><last>Husseini Orabi</last></author>
       <author><first>Mahmoud</first><last>Husseini Orabi</last></author>
-      <author><first>Diana</first><last>Inkpen</last></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last></author>
       <pages>232-238</pages>
       <abstract>This paper investigates the impact of using Multi-Task Learning (MTL) to predict mood changes over time for each individual (social media user). The presented models were developed as a part of the Computational Linguistics and Clinical Psychology (CLPsych) 2022 shared task. Given the limited number of Reddit social media users, as well as their posts, we decided to experiment with different multi-task learning architectures to identify to what extent knowledge can be shared among similar tasks. Due to class imbalance at both post and user levels and to accommodate task alignment, we randomly sampled an equal number of instances from the respective classes and performed ensemble learning to reduce prediction variance. Faced with several constraints, we managed to produce competitive results that could provide insights into the use of multi-task learning to identify mood changes over time and suicide ideation risk.</abstract>
       <url hash="80bd6eab">2022.clpsych-1.22</url>
@@ -368,9 +368,9 @@
       <author><first>Shashanka</first><last>Subrahmanya</last></author>
       <author><first>Matthew</first><last>Matero</last></author>
       <author><first>Nikita</first><last>Soni</last></author>
-      <author><first>Sharath Chandra</first><last>Guntuku</last></author>
+      <author id="sharath-chandra-guntuku"><first>Sharath Chandra</first><last>Guntuku</last></author>
       <author><first>Johannes</first><last>Eichstaedt</last></author>
-      <author><first>H. Andrew</first><last>Schwartz</last></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last></author>
       <pages>251-258</pages>
       <abstract>Psychological states unfold dynamically; to understand and measure mental health at scale we need to detect and measure these changes from sequences of online posts. We evaluate two approaches to capturing psychological changes in text: the first relies on computing the difference between the embedding of a message with the one that precedes it, the second relies on a “human-aware” multi-level recurrent transformer (HaRT). The mood changes of timeline posts of users were annotated into three classes, ‘ordinary,’ ‘switching’ (positive to negative or vice versa) and ‘escalations’ (increasing in intensity). For classifying these mood changes, the difference-between-embeddings technique – applied to RoBERTa embeddings – showed the highest overall F1 score (0.61) across the three different classes on the test set. The technique particularly outperformed the HaRT transformer (and other baselines) in the detection of switches (F1 = .33) and escalations (F1 = .61).Consistent with the literature, the language use patterns associated with mental-health related constructs in prior work (including depression, stress, anger and anxiety) predicted both mood switches and escalations.</abstract>
       <url hash="c0cdeb82">2022.clpsych-1.25</url>
diff --git a/data/xml/2022.cltw.xml b/data/xml/2022.cltw.xml
index b473599982..240f8760e6 100644
--- a/data/xml/2022.cltw.xml
+++ b/data/xml/2022.cltw.xml
@@ -66,7 +66,7 @@
       <title>Iterated Dependencies in a <fixed-case>B</fixed-case>reton treebank and implications for a Categorial Dependency Grammar</title>
       <author><first>Annie</first><last>Foret</last></author>
       <author><first>Denis</first><last>Béchet</last></author>
-      <author><first>Valérie</first><last>Bellynck</last></author>
+      <author id="valerie-bellynck"><first>Valérie</first><last>Bellynck</last></author>
       <pages>40–46</pages>
       <abstract>Categorial Dependency Grammars (CDG) are computational grammars for natural language processing, defining dependency structures. They can be viewed as a formal system, where types are attached to words, combining the classical categorial grammars’ elimination rules with valency pairing rules able to define discontinuous (non-projective) dependencies. Algorithms have been proposed to infer grammars in this class from treebanks, with respect to Mel’čuk principles. We consider this approach with experiments on Breton. We focus in particular on ”repeatable dependencies” (iterated) and their patterns. A dependency <tex-math>d</tex-math> is iterated in a dependency structure if some word in this structure governs several other words through dependency d. We illustrate this approach with data in the universal dependencies format and dependency patterns written in Grew (a graph rewriting tool dedicated to applications in natural Language Processing).</abstract>
       <url hash="d08c2fe0">2022.cltw-1.6</url>
@@ -98,7 +98,7 @@
     <paper id="9">
       <title>Handwriting recognition for <fixed-case>S</fixed-case>cottish <fixed-case>G</fixed-case>aelic</title>
       <author><first>William</first><last>Lamb</last></author>
-      <author><first>Beatrice</first><last>Alex</last></author>
+      <author id="beatrice-alex"><first>Beatrice</first><last>Alex</last></author>
       <author><first>Mark</first><last>Sinclair</last></author>
       <pages>60–70</pages>
       <abstract>Like most other minority languages, Scottish Gaelic has limited tools and resources available for Natural Language Processing research and applications. These limitations restrict the potential of the language to participate in modern speech technology, while also restricting research in fields such as corpus linguistics and the Digital Humanities. At the same time, Gaelic has a long written history, is well-described linguistically, and is unusually well-supported in terms of potential NLP training data. For instance, archives such as the School of Scottish Studies hold thousands of digitised recordings of vernacular speech, many of which have been transcribed as paper-based, handwritten manuscripts. In this paper, we describe a project to digitise and recognise a corpus of handwritten narrative transcriptions, with the intention of re-purposing it to develop a Gaelic speech recognition system.</abstract>
@@ -121,7 +121,7 @@
     </paper>
     <paper id="11">
       <title>Cipher – Faoi Gheasa: A Game-with-a-Purpose for <fixed-case>I</fixed-case>rish</title>
-      <author><first>Elaine</first><last>Uí Dhonnchadha</last></author>
+      <author id="elaine-ui-dhonnchadha"><first>Elaine</first><last>Uí Dhonnchadha</last></author>
       <author><first>Monica</first><last>Ward</last></author>
       <author><first>Liang</first><last>Xu</last></author>
       <pages>77–84</pages>
@@ -176,7 +176,7 @@
       <author><first>Lucy</first><last>Evans</last></author>
       <author><first>William</first><last>Lamb</last></author>
       <author><first>Mark</first><last>Sinclair</last></author>
-      <author><first>Beatrice</first><last>Alex</last></author>
+      <author id="beatrice-alex"><first>Beatrice</first><last>Alex</last></author>
       <pages>110–120</pages>
       <abstract>This paper discusses our efforts to develop a full automatic speech recognition (ASR) system for Scottish Gaelic, starting from a point of limited resource. Building ASR technology is important for documenting and revitalising endangered languages; it enables existing resources to be enhanced with automatic subtitles and transcriptions, improves accessibility for users, and, in turn, encourages continued use of the language. In this paper, we explain the many difficulties faced when collecting minority language data for speech recognition. A novel cross-lingual approach to the alignment of training data is used to overcome one such difficulty, and in this way we demonstrate how majority language resources can bootstrap the development of lower-resourced language technology. We use the Kaldi speech recognition toolkit to develop several Gaelic ASR systems, and report a final WER of 26.30%. This is a 9.50% improvement on our original model.</abstract>
       <url hash="d8a6d2c6">2022.cltw-1.16</url>
diff --git a/data/xml/2022.cmcl.xml b/data/xml/2022.cmcl.xml
index 33069e2bc6..baea3c22bc 100644
--- a/data/xml/2022.cmcl.xml
+++ b/data/xml/2022.cmcl.xml
@@ -5,9 +5,9 @@
       <booktitle>Proceedings of the Workshop on Cognitive Modeling and Computational Linguistics</booktitle>
       <editor><first>Emmanuele</first><last>Chersoni</last></editor>
       <editor><first>Nora</first><last>Hollenstein</last></editor>
-      <editor><first>Cassandra</first><last>Jacobs</last></editor>
+      <editor id="cassandra-l-jacobs"><first>Cassandra</first><last>Jacobs</last></editor>
       <editor><first>Yohei</first><last>Oseki</last></editor>
-      <editor><first>Laurent</first><last>Prévot</last></editor>
+      <editor id="laurent-prevot"><first>Laurent</first><last>Prévot</last></editor>
       <editor><first>Enrico</first><last>Santus</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Dublin, Ireland</address>
@@ -23,7 +23,7 @@
     <paper id="1">
       <title>Seeing the advantage: visually grounding word embeddings to better capture human semantic knowledge</title>
       <author><first>Danny</first><last>Merkx</last></author>
-      <author><first>Stefan</first><last>Frank</last></author>
+      <author id="stefan-l-frank"><first>Stefan</first><last>Frank</last></author>
       <author><first>Mirjam</first><last>Ernestus</last></author>
       <pages>1-11</pages>
       <abstract>Distributional semantic models capture word-level meaning that is useful in many natural language processing tasks and have even been shown to capture cognitive aspects of word meaning. The majority of these models are purely text based, even though the human sensory experience is much richer. In this paper we create visually grounded word embeddings by combining English text and images and compare them to popular text-based methods, to see if visual information allows our model to better capture cognitive aspects of word meaning. Our analysis shows that visually grounded embedding similarities are more predictive of the human reaction times in a large priming experiment than the purely text-based embeddings. The visually grounded embeddings also correlate well with human word similarity ratings. Importantly, in both experiments we show that the grounded embeddings account for a unique portion of explained variance, even when we include text-based embeddings trained on huge corpora. This shows that visual grounding allows our model to capture information that cannot be extracted using text as the only source of information.</abstract>
@@ -60,7 +60,7 @@
       <title>Less Descriptive yet Discriminative: Quantifying the Properties of Multimodal Referring Utterances via <fixed-case>CLIP</fixed-case></title>
       <author><first>Ece</first><last>Takmaz</last></author>
       <author><first>Sandro</first><last>Pezzelle</last></author>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <pages>36-42</pages>
       <abstract>In this work, we use a transformer-based pre-trained multimodal model, CLIP, to shed light on the mechanisms employed by human speakers when referring to visual entities. In particular, we use CLIP to quantify the degree of descriptiveness (how well an utterance describes an image in isolation) and discriminativeness (to what extent an utterance is effective in picking out a single image among similar images) of human referring utterances within multimodal dialogues. Overall, our results show that utterances become less descriptive over time while their discriminativeness remains unchanged. Through analysis, we propose that this trend could be due to participants relying on the previous mentions in the dialogue history, as well as being able to distill the most discriminative information from the visual context. In general, our study opens up the possibility of using this and similar models to quantify patterns in human data and shed light on the underlying cognitive mechanisms.</abstract>
       <url hash="f6e0cfdc">2022.cmcl-1.4</url>
@@ -104,7 +104,7 @@
     <paper id="8">
       <title>Predicting scalar diversity with context-driven uncertainty over alternatives</title>
       <author><first>Jennifer</first><last>Hu</last></author>
-      <author><first>Roger</first><last>Levy</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
       <author><first>Sebastian</first><last>Schuster</last></author>
       <pages>68-74</pages>
       <abstract>Scalar implicature (SI) arises when a speaker uses an expression (e.g., “some”) that is semantically compatible with a logically stronger alternative on the same scale (e.g., “all”), leading the listener to infer that they did not intend to convey the stronger meaning. Prior work has demonstrated that SI rates are highly variable across scales, raising the question of what factors determine the SI strength for a particular scale. Here, we test the hypothesis that SI rates depend on the listener’s confidence in the underlying scale, which we operationalize as uncertainty over the distribution of possible alternatives conditioned on the context. We use a T5 model fine-tuned on a text infilling task to estimate this distribution. We find that scale uncertainty predicts human SI rates, measured as entropy over the sampled alternatives and over latent classes among alternatives in sentence embedding space. Furthermore, we do not find a significant effect of the surprisal of the strong scalemate. Our results suggest that pragmatic inferences depend on listeners’ context-driven uncertainty over alternatives.</abstract>
@@ -122,7 +122,7 @@
       <author><first>Neset</first><last>Tan</last></author>
       <author><first>Paul Michael</first><last>Corballis</last></author>
       <author><first>Patricia</first><last>Riddle</last></author>
-      <author><first>Michael</first><last>Witbrock</last></author>
+      <author id="michael-j-witbrock"><first>Michael</first><last>Witbrock</last></author>
       <pages>75-87</pages>
       <abstract>Attention describes cognitive processes that are important to many human phenomena including reading. The term is also used to describe the way in which transformer neural networks perform natural language processing. While attention appears to be very different under these two contexts, this paper presents an analysis of the correlations between transformer attention and overt human attention during reading tasks. An extensive analysis of human eye tracking datasets showed that the dwell times of human eye movements were strongly correlated with the attention patterns occurring in the early layers of pre-trained transformers such as BERT. Additionally, the strength of a correlation was not related to the number of parameters within a transformer. This suggests that something about the transformers’ architecture determined how closely the two measures were correlated.</abstract>
       <url hash="01d4f289">2022.cmcl-1.9</url>
@@ -133,7 +133,7 @@
     <paper id="10">
       <title>About Time: Do Transformers Learn Temporal Verbal Aspect?</title>
       <author><first>Eleni</first><last>Metheniti</last></author>
-      <author><first>Tim</first><last>Van De Cruys</last></author>
+      <author id="tim-van-de-cruys"><first>Tim</first><last>Van De Cruys</last></author>
       <author><first>Nabil</first><last>Hathout</last></author>
       <pages>88-101</pages>
       <abstract>Aspect is a linguistic concept that describes how an action, event, or state of a verb phrase is situated in time. In this paper, we explore whether different transformer models are capable of identifying aspectual features. We focus on two specific aspectual features: telicity and duration. Telicity marks whether the verb’s action or state has an endpoint or not (telic/atelic), and duration denotes whether a verb expresses an action (dynamic) or a state (stative). These features are integral to the interpretation of natural language, but also hard to annotate and identify with NLP methods. We perform experiments in English and French, and our results show that transformer models adequately capture information on telicity and duration in their vectors, even in their non-finetuned forms, but are somewhat biased with regard to verb tense and word order.</abstract>
@@ -195,7 +195,7 @@
       <title>Team <fixed-case>ÚFAL</fixed-case> at <fixed-case>CMCL</fixed-case> 2022 Shared Task: Figuring out the correct recipe for predicting Eye-Tracking features using Pretrained Language Models</title>
       <author><first>Sunit</first><last>Bhattacharya</last></author>
       <author><first>Rishu</first><last>Kumar</last></author>
-      <author><first>Ondrej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondrej</first><last>Bojar</last></author>
       <pages>130-135</pages>
       <abstract>Eye-Tracking data is a very useful source of information to study cognition and especially language comprehension in humans. In this paper, we describe our systems for the CMCL 2022 shared task on predicting eye-tracking information. We describe our experiments withpretrained models like BERT and XLM and the different ways in which we used those representations to predict four eye-tracking features. Along with analysing the effect of using two different kinds of pretrained multilingual language models and different ways of pooling the token-level representations, we also explore how contextual information affects the performance of the systems. Finally, we also explore if factors like augmenting linguistic information affect the predictions. Our submissions achieved an average MAE of 5.72 and ranked 5th in the shared task. The average MAE showed further reduction to 5.25 in post task evaluation.</abstract>
       <url hash="69bed3a8">2022.cmcl-1.15</url>
diff --git a/data/xml/2022.cmlc.xml b/data/xml/2022.cmlc.xml
index ec60176885..bc728287c0 100644
--- a/data/xml/2022.cmlc.xml
+++ b/data/xml/2022.cmlc.xml
@@ -7,7 +7,7 @@
       <editor><first>Adrien</first><last>Barbaresi</last></editor>
       <editor><first>Simon</first><last>Clematide</last></editor>
       <editor><first>Marc</first><last>Kupietz</last></editor>
-      <editor><first>Harald</first><last>Lüngen</last></editor>
+      <editor id="harald-lungen"><first>Harald</first><last>Lüngen</last></editor>
       <publisher>European Language Resources Association</publisher>
       <address>Marseille, France</address>
       <month>June</month>
@@ -23,7 +23,7 @@
       <title>Challenges in Creating a Representative Corpus of <fixed-case>R</fixed-case>omanian Micro-Blogging Text</title>
       <author><first>Vasile</first><last>Pais</last></author>
       <author><first>Maria</first><last>Mitrofan</last></author>
-      <author><first>Verginica</first><last>Barbu Mititelu</last></author>
+      <author id="verginica-barbu-mititelu"><first>Verginica</first><last>Barbu Mititelu</last></author>
       <author><first>Elena</first><last>Irimia</last></author>
       <author><first>Roxana</first><last>Micu</last></author>
       <author><first>Carol Luca</first><last>Gasan</last></author>
diff --git a/data/xml/2022.codi.xml b/data/xml/2022.codi.xml
index 0ff783b055..f0cb0b8e1b 100644
--- a/data/xml/2022.codi.xml
+++ b/data/xml/2022.codi.xml
@@ -23,7 +23,7 @@
     <paper id="1">
       <title><fixed-case>KOJAK</fixed-case>: A New Corpus for Studying <fixed-case>G</fixed-case>erman Discourse Particle ja</title>
       <author><first>Adil</first><last>Soubki</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <author><first>Chong</first><last>Kang</last></author>
       <pages>1–6</pages>
       <abstract>In German, ja can be used as a discourse particle to indicate that a proposition, according to the speaker, is believed by both the speaker and audience. We use this observation to create KoJaK, a distantly-labeled English dataset derived from Europarl for studying when a speaker believes a statement to be common ground. This corpus is then analyzed to identify lexical choices in English that correspond with German ja. Finally, we perform experiments on the dataset to predict if an English clause corresponds to a German clause containing ja and achieve an F-measure of 75.3% on a balanced test corpus.</abstract>
@@ -55,7 +55,7 @@
       <title>Evaluating Discourse Cohesion in Pre-trained Language Models</title>
       <author><first>Jie</first><last>He</last></author>
       <author><first>Wanqiu</first><last>Long</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <pages>28–34</pages>
       <abstract>Large pre-trained neural models have achieved remarkable success in natural language process (NLP), inspiring a growing body of research analyzing their ability from different aspects. In this paper, we propose a test suite to evaluate the cohesive ability of pre-trained language models. The test suite contains multiple cohesion phenomena between adjacent and non-adjacent sentences. We try to compare different pre-trained language models on these phenomena and analyze the experimental results,hoping more attention can be given to discourse cohesion in the future. The built discourse cohesion test suite will be publicly available at <url>https://github.com/probe2/discourse_cohesion</url>.</abstract>
       <url hash="177c4fe9">2022.codi-1.4</url>
@@ -66,7 +66,7 @@
       <author><first>Andrew</first><last>Shen</last></author>
       <author><first>Fajri</first><last>Koto</last></author>
       <author><first>Jey Han</first><last>Lau</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>35–41</pages>
       <abstract>We propose a novel unconstrained bottom-up approach for rhetorical discourse parsing based on sequence labelling of adjacent pairs of discourse units (DUs), based on the framework of Koto et al. (2021). We describe the unique training requirements of an unconstrained parser, and explore two different training procedures: (1) fixed left-to-right; and (2) random order in tree construction. Additionally, we introduce a novel dynamic oracle for unconstrained bottom-up parsing. Our proposed parser achieves competitive results for bottom-up rhetorical discourse parsing.</abstract>
       <url hash="22540c9a">2022.codi-1.5</url>
@@ -97,8 +97,8 @@
       <author><first>Zlata</first><last>Kikteva</last></author>
       <author><first>Kamila</first><last>Gorska</last></author>
       <author><first>Wassiliki</first><last>Siskou</last></author>
-      <author><first>Annette</first><last>Hautli-Janisz</last></author>
-      <author><first>Chris</first><last>Reed</last></author>
+      <author id="annette-hautli"><first>Annette</first><last>Hautli-Janisz</last></author>
+      <author id="chris-reed"><first>Chris</first><last>Reed</last></author>
       <pages>54–63</pages>
       <abstract>Building on the recent results of a study into the roles that are played by questions in argumentative dialogue (Hautli-Janisz et al.,2022a), we expand the analysis to investigate a newly released corpus that constitutes the largest extant corpus of closely annotated debate. Questions play a critical role in driving dialogical discourse forward; in combative or critical discursive environments, they not only provide a range of discourse management techniques, they also scaffold the semantic structure of the positions that interlocutors develop. The boundaries, however, between providing substantive answers to questions, merely responding to questions, and evading questions entirely, are fuzzy and the way in which answers, responses and evasions affect the subsequent development of dialogue and argumentation structure are poorly understood. In this paper, we explore how questions have ramifications on the large-scale structure of a debate using as our substrate the BBC television programme Question Time, the foremost topical debate show in the UK. Analysis of the data demonstrates not only that questioning plays a particularly prominent role in such debate, but also that its repercussions can reverberate through a discourse.</abstract>
       <url hash="692654be">2022.codi-1.8</url>
@@ -107,7 +107,7 @@
     <paper id="9">
       <title>Shallow Discourse Parsing for Open Information Extraction and Text Simplification</title>
       <author><first>Christina</first><last>Niklaus</last></author>
-      <author><first>André</first><last>Freitas</last></author>
+      <author id="andre-freitas"><first>André</first><last>Freitas</last></author>
       <author><first>Siegfried</first><last>Handschuh</last></author>
       <pages>64–76</pages>
       <abstract>We present a discourse-aware text simplification (TS) approach that recursively splits and rephrases complex English sentences into a semantic hierarchy of simplified sentences. Using a set of linguistically principled transformation patterns, sentences are converted into a hierarchical representation in the form of core sentences and accompanying contexts that are linked via rhetorical relations. As opposed to previously proposed sentence splitting approaches, which commonly do not take into account discourse-level aspects, our TS approach preserves the semantic relationship of the decomposed constituents in the output. A comparative analysis with the annotations contained in RST-DT shows that we capture the contextual hierarchy between the split sentences with a precision of 89% and reach an average precision of 69% for the classification of the rhetorical relations that hold between them. Moreover, an integration into state-of-the-art Open Information Extraction (IE) systems reveals that when applying our TS approach as a pre-processing step, the generated relational tuples are enriched with additional meta information, resulting in a novel lightweight semantic representation for the task of Open IE.</abstract>
@@ -129,7 +129,7 @@
       <author><first>Mathilde</first><last>Veron</last></author>
       <author><first>Olivier</first><last>Galibert</last></author>
       <author><first>Guillaume</first><last>Bernard</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <pages>86–91</pages>
       <abstract>Dialog state tracking (DST) is a core step for task-oriented dialogue systems aiming to track the user’s current goal during a dialogue. Recently a special focus has been put on applying existing DST models to new domains, in other words performing zero-shot cross-domain transfer. While recent state-of-the-art models leverage large pre-trained language models, no work has been made on understanding and improving the results of first developed zero-shot models like SUMBT. In this paper, we thus propose to improve SUMBT zero-shot results on MultiWOZ by using attention modulation during inference. This method improves SUMBT zero-shot results significantly on two domains and does not worsen the initial performance with the great advantage of needing no additional training.</abstract>
       <url hash="55ca6f07">2022.codi-1.11</url>
@@ -156,12 +156,12 @@
       <booktitle>Proceedings of the CODI-CRAC 2022 Shared Task on Anaphora, Bridging, and Discourse Deixis in Dialogue</booktitle>
       <editor><first>Juntao</first><last>Yu</last></editor>
       <editor><first>Sopan</first><last>Khosla</last></editor>
-      <editor><first>Ramesh</first><last>Manuvinakurike</last></editor>
-      <editor><first>Lori</first><last>Levin</last></editor>
+      <editor id="ramesh-manuvinakurike"><first>Ramesh</first><last>Manuvinakurike</last></editor>
+      <editor id="lori-levin"><first>Lori</first><last>Levin</last></editor>
       <editor><first>Vincent</first><last>Ng</last></editor>
-      <editor><first>Massimo</first><last>Poesio</last></editor>
+      <editor id="massimo-poesio"><first>Massimo</first><last>Poesio</last></editor>
       <editor><first>Michael</first><last>Strube</last></editor>
-      <editor><first>Carolyn</first><last>Rose</last></editor>
+      <editor id="carolyn-rose"><first>Carolyn</first><last>Rose</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Gyeongju, Republic of Korea</address>
       <month>October</month>
diff --git a/data/xml/2022.cogalex.xml b/data/xml/2022.cogalex.xml
index 71ffea65f6..fff8ad80b8 100644
--- a/data/xml/2022.cogalex.xml
+++ b/data/xml/2022.cogalex.xml
@@ -55,7 +55,7 @@
       <title><fixed-case>CAT</fixed-case> <fixed-case>M</fixed-case>any<fixed-case>N</fixed-case>ames: A New Dataset for Object Naming in <fixed-case>C</fixed-case>atalan</title>
       <author><first>Mar</first><last>Domínguez Orfila</last></author>
       <author><first>Maite</first><last>Melero Nogués</last></author>
-      <author><first>Gemma</first><last>Boleda Torrent</last></author>
+      <author id="gemma-boleda"><first>Gemma</first><last>Boleda Torrent</last></author>
       <pages>31–36</pages>
       <abstract>Object Naming is an important task within the field of Language and Vision that consists of generating a correct and appropriate name for an object given an image. The ManyNames dataset uses real-world human annotated images with multiple labels, instead of just one. In this work, we describe the adaptation of this dataset (originally in English) to Catalan, by (i) machine-translating the English labels and (ii) collecting human annotations for a subset of the original corpus and comparing both resources. Analyses reveal divergences in the lexical variation of the two sets showing potential problems of directly translated resources, particularly when there is no resource to a proper context, which in this case is conveyed by the image. The analysis also points to the impact of cultural factors in the naming task, which should be accounted for in future cross-lingual naming tasks.</abstract>
       <url hash="02402ad8">2022.cogalex-1.4</url>
@@ -76,7 +76,7 @@
     <paper id="6">
       <title>Putting <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et’s Dictionary Examples in the Context of Definition Modelling: An Empirical Analysis</title>
       <author><first>Fatemah</first><last>Almeman</last></author>
-      <author><first>Luis</first><last>Espinosa Anke</last></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa Anke</last></author>
       <pages>42–48</pages>
       <abstract>Definition modeling is the task to generate a valid definition for a given input term. This relatively novel task has been approached either with no context (i.e., given a word embedding alone) and, more recently, as word-in-context modeling. Despite their success, most works make little to no distinction between resources and their specific features (e.g., type and style of definitions, or quality of examples) when used for training. Given the high diversity lexicographic resources exhibit in terms of topic coverage, style and formal structure, it is desirable for downstream definition modeling to better understand which of them are better suited for the task. In this paper, we propose an empirical evaluation of the well-known lexical database WordNet, and specifically, its dictionary examples. We evaluate them both directly, by matching them against criteria for good dictionary writing, and indirectly, in the task of definition modeling. Our results suggest that WordNet’s dictionary examples could be improved by extending them in length, and incorporating prototypicality.</abstract>
       <url hash="2ab3bc41">2022.cogalex-1.6</url>
diff --git a/data/xml/2022.coling.xml b/data/xml/2022.coling.xml
index ab0c9848ed..e3afd6ae0f 100644
--- a/data/xml/2022.coling.xml
+++ b/data/xml/2022.coling.xml
@@ -3,12 +3,12 @@
   <volume id="1" ingest-date="2022-10-17" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 29th International Conference on Computational Linguistics</booktitle>
-      <editor><first>Nicoletta</first><last>Calzolari</last></editor>
+      <editor id="nicoletta-calzolari"><first>Nicoletta</first><last>Calzolari</last></editor>
       <editor><first>Chu-Ren</first><last>Huang</last></editor>
       <editor><first>Hansaem</first><last>Kim</last></editor>
-      <editor><first>James</first><last>Pustejovsky</last></editor>
+      <editor id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></editor>
       <editor><first>Leo</first><last>Wanner</last></editor>
-      <editor><first>Key-Sun</first><last>Choi</last></editor>
+      <editor id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></editor>
       <editor><first>Pum-Mo</first><last>Ryu</last></editor>
       <editor><first>Hsin-Hsi</first><last>Chen</last></editor>
       <editor><first>Lucia</first><last>Donatelli</last></editor>
@@ -17,7 +17,7 @@
       <editor><first>Patrizia</first><last>Paggio</last></editor>
       <editor><first>Nianwen</first><last>Xue</last></editor>
       <editor><first>Seokhwan</first><last>Kim</last></editor>
-      <editor><first>Younggyun</first><last>Hahm</last></editor>
+      <editor id="younggyun-hahm"><first>Younggyun</first><last>Hahm</last></editor>
       <editor><first>Zhong</first><last>He</last></editor>
       <editor><first>Tony Kyungil</first><last>Lee</last></editor>
       <editor><first>Enrico</first><last>Santus</last></editor>
@@ -77,10 +77,10 @@
     <paper id="5">
       <title>Measuring Morphological Fusion Using Partial Information Decomposition</title>
       <author><first>Michaela</first><last>Socolof</last></author>
-      <author><first>Jacob Louis</first><last>Hoover</last></author>
+      <author id="jacob-hoover-vigly"><first>Jacob Louis</first><last>Hoover</last></author>
       <author><first>Richard</first><last>Futrell</last></author>
       <author><first>Alessandro</first><last>Sordoni</last></author>
-      <author><first>Timothy J.</first><last>O’Donnell</last></author>
+      <author id="timothy-odonnell"><first>Timothy J.</first><last>O’Donnell</last></author>
       <pages>44–54</pages>
       <abstract>Morphological systems across languages vary when it comes to the relation between form and meaning. In some languages, a single meaning feature corresponds to a single morpheme, whereas in other languages, multiple meaning features are bundled together into one morpheme. The two types of languages have been called agglutinative and fusional, respectively, but this distinction does not capture the graded nature of the phenomenon. We provide a mathematically precise way of characterizing morphological systems using partial information decomposition, a framework for decomposing mutual information into three components: unique, redundant, and synergistic information. We show that highly fusional languages are characterized by high levels of synergy.</abstract>
       <url hash="347c2e8b">2022.coling-1.5</url>
@@ -172,7 +172,7 @@
     <paper id="14">
       <title>Character Jacobian: Modeling <fixed-case>C</fixed-case>hinese Character Meanings with Deep Learning Model</title>
       <author><first>Yu-Hsiang</first><last>Tseng</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <pages>152–162</pages>
       <abstract>Compounding, a prevalent word-formation process, presents an interesting challenge for computational models. Indeed, the relations between compounds and their constituents are often complicated. It is particularly so in Chinese morphology, where each character is almost simultaneously bound and free when treated as a morpheme. To model such word-formation process, we propose the Notch (NOnlinear Transformation of CHaracter embeddings) model and the character Jacobians. The Notch model first learns the non-linear relations between the constituents and words, and the character Jacobians further describes the character’s role in each word. In a series of experiments, we show that the Notch model predicts the embeddings of the real words from their constituents but helps account for the behavioral data of the pseudowords. Moreover, we also demonstrated that character Jacobians reflect the characters’ meanings. Taken together, the Notch model and character Jacobians may provide a new perspective on studying the word-formation process and morphology with modern deep learning.</abstract>
       <url hash="80c11d72">2022.coling-1.14</url>
@@ -193,10 +193,10 @@
     <paper id="16">
       <title>Exploring Semantic Spaces for Detecting Clustering and Switching in Verbal Fluency</title>
       <author><first>Özge</first><last>Alacam</last></author>
-      <author><first>Simeon</first><last>Schüz</last></author>
+      <author id="simeon-junker"><first>Simeon</first><last>Schüz</last></author>
       <author><first>Martin</first><last>Wegrzyn</last></author>
       <author><first>Johanna</first><last>Kißler</last></author>
-      <author><first>Sina</first><last>Zarrieß</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
       <pages>178–191</pages>
       <abstract>In this work, we explore the fitness of various word/concept representations in analyzing an experimental verbal fluency dataset providing human responses to 10 different category enumeration tasks. Based on human annotations of so-called clusters and switches between sub-categories in the verbal fluency sequences, we analyze whether lexical semantic knowledge represented in word embedding spaces (GloVe, fastText, ConceptNet, BERT) is suitable for detecting these conceptual clusters and switches within and across different categories. Our results indicate that ConceptNet embeddings, a distributional semantics method enriched with taxonomical relations, outperforms other semantic representations by a large margin. Moreover, category-specific analysis suggests that individual thresholds per category are more suited for the analysis of clustering and switching in particular embedding sub-space instead of a one-fits-all cross-category solution. The results point to interesting directions for future work on probing word embedding models on the verbal fluency task.</abstract>
       <url hash="f1025ead">2022.coling-1.16</url>
@@ -255,7 +255,7 @@
       <author><first>Nurul</first><last>Lubis</last></author>
       <author><first>Michael</first><last>Heck</last></author>
       <author><first>Shutong</first><last>Feng</last></author>
-      <author><first>Milica</first><last>Gašić</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gašić</last></author>
       <pages>266–284</pages>
       <abstract>Continual learning is one of the key components of human learning and a necessary requirement of artificial intelligence. As dialogue can potentially span infinitely many topics and tasks, a task-oriented dialogue system must have the capability to continually learn, dynamically adapting to new challenges while preserving the knowledge it already acquired. Despite the importance, continual reinforcement learning of the dialogue policy has remained largely unaddressed. The lack of a framework with training protocols, baseline models and suitable metrics, has so far hindered research in this direction. In this work we fill precisely this gap, enabling research in dialogue policy optimisation to go from static to dynamic learning. We provide a continual learning algorithm, baseline architectures and metrics for assessing continual learning models. Moreover, we propose the dynamic dialogue policy transformer (DDPT), a novel dynamic architecture that can integrate new knowledge seamlessly, is capable of handling large state spaces and obtains significant zero-shot performance when being exposed to unseen domains, without any growth in network parameter size. We validate the strengths of DDPT in simulation with two user simulators as well as with humans.</abstract>
       <url hash="6b915850">2022.coling-1.21</url>
@@ -338,7 +338,7 @@
     <paper id="28">
       <title>Schema Encoding for Transferable Dialogue State Tracking</title>
       <author><first>Hyunmin</first><last>Jeon</last></author>
-      <author><first>Gary Geunbae</first><last>Lee</last></author>
+      <author id="gary-geunbae-lee"><first>Gary Geunbae</first><last>Lee</last></author>
       <pages>355–366</pages>
       <abstract>Dialogue state tracking (DST) is an essential sub-task for task-oriented dialogue systems. Recent work has focused on deep neural models for DST. However, the neural models require a large dataset for training. Furthermore, applying them to another domain needs a new dataset because the neural models are generally trained to imitate the given dataset. In this paper, we propose Schema Encoding for Transferable Dialogue State Tracking (SET-DST), which is a neural DST method for effective transfer to new domains. Transferable DST could assist developments of dialogue systems even with few dataset on target domains. We use a schema encoder not just to imitate the dataset but to comprehend the schema of the dataset. We aim to transfer the model to new domains by encoding new schemas and using them for DST on multi-domain settings. As a result, SET-DST improved the joint accuracy by 1.46 points on MultiWOZ 2.1.</abstract>
       <url hash="860654c0">2022.coling-1.28</url>
@@ -403,7 +403,7 @@
       <author><first>Junyoung</first><last>Son</last></author>
       <author><first>Jinsung</first><last>Kim</last></author>
       <author><first>Jungwoo</first><last>Lim</last></author>
-      <author><first>Heuiseok</first><last>Lim</last></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last></author>
       <pages>412–423</pages>
       <abstract>The dialogue-based relation extraction (DialogRE) task aims to predict the relations between argument pairs that appear in dialogue. Most previous studies utilize fine-tuning pre-trained language models (PLMs) only with extensive features to supplement the low information density of the dialogue by multiple speakers. To effectively exploit inherent knowledge of PLMs without extra layers and consider scattered semantic cues on the relation between the arguments, we propose a Guiding model with RelAtional Semantics using Prompt (GRASP). We adopt a prompt-based fine-tuning approach and capture relational semantic clues of a given dialogue with 1) an argument-aware prompt marker strategy and 2) the relational clue detection task. In the experiments, GRASP achieves state-of-the-art performance in terms of both F1 and F1c scores on a DialogRE dataset even though our method only leverages PLMs without adding any extra layers.</abstract>
       <url hash="26394c3a">2022.coling-1.33</url>
@@ -464,7 +464,7 @@
       <author><first>Qixian</first><last>Zhou</last></author>
       <author><first>Jinlan</first><last>Fu</last></author>
       <author><first>Min-Yen</first><last>Kan</last></author>
-      <author><first>See-Kiong</first><last>Ng</last></author>
+      <author id="see-kiong-ng"><first>See-Kiong</first><last>Ng</last></author>
       <pages>471–484</pages>
       <abstract>Knowledge-grounded dialog systems need to incorporate smooth transitions among knowledge selected for generating responses, to ensure that dialog flows naturally. For document-grounded dialog systems, the inter- and intra-document knowledge relations can be used to model such conversational flows. We develop a novel Multi-Document Co-Referential Graph (Coref-MDG) to effectively capture the inter-document relationships based on commonsense and similarity and the intra-document co-referential structures of knowledge segments within the grounding documents. We propose CorefDiffs, a Co-referential and Differential flow management method, to linearize the static Coref-MDG into conversational sequence logic. CorefDiffs performs knowledge selection by accounting for contextual graph structures and the knowledge difference sequences. CorefDiffs significantly outperforms the state-of-the-art by 9.5%, 7.4% and 8.2% on three public benchmarks. This demonstrates that the effective modeling of co-reference and knowledge difference for dialog flows are critical for transitions in document-grounded conversation.</abstract>
       <url hash="a5d97616">2022.coling-1.38</url>
@@ -474,7 +474,7 @@
       <title><fixed-case>S</fixed-case>el<fixed-case>F</fixed-case>-Eval: Self-supervised Fine-grained Dialogue Evaluation</title>
       <author><first>Longxuan</first><last>Ma</last></author>
       <author><first>Ziyu</first><last>Zhuang</last></author>
-      <author><first>Weinan</first><last>Zhang</last></author>
+      <author id="weinan-zhang"><first>Weinan</first><last>Zhang</last></author>
       <author><first>Mingda</first><last>Li</last></author>
       <author><first>Ting</first><last>Liu</last></author>
       <pages>485–495</pages>
@@ -487,7 +487,7 @@
       <author><first>Maxime</first><last>De Bruyn</last></author>
       <author><first>Ehsan</first><last>Lotfi</last></author>
       <author><first>Jeska</first><last>Buhmann</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>496–504</pages>
       <abstract>Automatic evaluation of open-domain dialogs remains an unsolved problem. Existing methods do not correlate strongly with human annotations. In this paper, we present a new automated evaluation method based on the use of follow-ups. We measure the probability that a language model will continue the conversation with a fixed set of follow-ups (e.g. not really relevant here, what are you trying to say?). When compared against twelve existing methods, our new evaluation achieves the highest correlation with human evaluations.</abstract>
       <url hash="315d70bf">2022.coling-1.40</url>
@@ -535,7 +535,7 @@
       <title>Using Multi-Encoder Fusion Strategies to Improve Personalized Response Selection</title>
       <author><first>Souvik</first><last>Das</last></author>
       <author><first>Sougata</first><last>Saha</last></author>
-      <author><first>Rohini K.</first><last>Srihari</last></author>
+      <author id="rohini-k-srihari"><first>Rohini K.</first><last>Srihari</last></author>
       <pages>532–541</pages>
       <abstract>Personalized response selection systems are generally grounded on persona. However, a correlation exists between persona and empathy, which these systems do not explore well. Also, when a contradictory or off-topic response is selected, faithfulness to the conversation context plunges. This paper attempts to address these issues by proposing a suite of fusion strategies that capture the interaction between persona, emotion, and entailment information of the utterances. Ablation studies on the Persona-Chat dataset show that incorporating emotion and entailment improves the accuracy of response selection. We combine our fusion strategies and concept-flow encoding to train a BERT-based model which outperforms the previous methods by margins larger than 2.3% on original personas and 1.9% on revised personas in terms of hits@1 (top-1 accuracy), achieving a new state-of-the-art performance on the Persona-Chat dataset</abstract>
       <url hash="9110fb39">2022.coling-1.44</url>
@@ -570,7 +570,7 @@
     <paper id="47">
       <title><fixed-case>ET</fixed-case>5: A Novel End-to-end Framework for Conversational Machine Reading Comprehension</title>
       <author><first>Xiao</first><last>Zhang</last></author>
-      <author><first>Heyan</first><last>Huang</last></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last></author>
       <author><first>Zewen</first><last>Chi</last></author>
       <author><first>Xian-Ling</first><last>Mao</last></author>
       <pages>570–579</pages>
@@ -580,12 +580,12 @@
     </paper>
     <paper id="48">
       <title><fixed-case>C</fixed-case>o<fixed-case>HS</fixed-case>-<fixed-case>CQG</fixed-case>: Context and History Selection for Conversational Question Generation</title>
-      <author><first>Xuan Long</first><last>Do</last></author>
+      <author id="xuan-long-do"><first>Xuan Long</first><last>Do</last></author>
       <author><first>Bowei</first><last>Zou</last></author>
       <author><first>Liangming</first><last>Pan</last></author>
-      <author><first>Nancy F.</first><last>Chen</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
-      <author><first>Ai Ti</first><last>Aw</last></author>
+      <author id="nancy-chen"><first>Nancy F.</first><last>Chen</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
+      <author id="aiti-aw"><first>Ai Ti</first><last>Aw</last></author>
       <pages>580–591</pages>
       <abstract>Conversational question generation (CQG) serves as a vital task for machines to assist humans, such as interactive reading comprehension, through conversations. Compared to traditional single-turn question generation (SQG), CQG is more challenging in the sense that the generated question is required not only to be meaningful, but also to align with the provided conversation. Previous studies mainly focus on how to model the flow and alignment of the conversation, but do not thoroughly study which parts of the context and history are necessary for the model. We believe that shortening the context and history is crucial as it can help the model to optimise more on the conversational alignment property. To this end, we propose CoHS-CQG, a two-stage CQG framework, which adopts a novel CoHS module to shorten the context and history of the input. In particular, it selects the top-p sentences and history turns by calculating the relevance scores of them. Our model achieves state-of-the-art performances on CoQA in both the answer-aware and answer-unaware settings.</abstract>
       <url hash="6a8b0187">2022.coling-1.48</url>
@@ -631,9 +631,9 @@
       <title>Towards Multi-label Unknown Intent Detection</title>
       <author><first>Yawen</first><last>Ouyang</last></author>
       <author><first>Zhen</first><last>Wu</last></author>
-      <author><first>Xinyu</first><last>Dai</last></author>
+      <author id="xinyu-dai"><first>Xinyu</first><last>Dai</last></author>
       <author><first>Shujian</first><last>Huang</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
       <pages>626–635</pages>
       <abstract>Multi-class unknown intent detection has made remarkable progress recently. However, it has a strong assumption that each utterance has only one intent, which does not conform to reality because utterances often have multiple intents. In this paper, we propose a more desirable task, multi-label unknown intent detection, to detect whether the utterance contains the unknown intent, in which each utterance may contain multiple intents. In this task, the unique utterances simultaneously containing known and unknown intents make existing multi-class methods easy to fail. To address this issue, we propose an intuitive and effective method to recognize whether All Intents contained in the utterance are Known (AIK). Our high-level idea is to predict the utterance’s intent number, then check whether the utterance contains the same number of known intents. If the number of known intents is less than the number of intents, it implies that the utterance also contains unknown intents. We benchmark AIK over existing methods, and empirical results suggest that our method obtains state-of-the-art performances. For example, on the MultiWOZ 2.3 dataset, AIK significantly reduces the FPR95 by 12.25% compared to the best baseline.</abstract>
       <url hash="e49a3f02">2022.coling-1.52</url>
@@ -792,7 +792,7 @@
     <paper id="65">
       <title>Investigating the Performance of Transformer-Based <fixed-case>NLI</fixed-case> Models on Presuppositional Inferences</title>
       <author><first>Jad</first><last>Kabbara</last></author>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <pages>779–785</pages>
       <abstract>Presuppositions are assumptions that are taken for granted by an utterance, and identifying them is key to a pragmatic interpretation of language. In this paper, we investigate the capabilities of transformer models to perform NLI on cases involving presupposition. First, we present simple heuristics to create alternative “contrastive” test cases based on the ImpPres dataset and investigate the model performance on those test cases. Second, to better understand how the model is making its predictions, we analyze samples from sub-datasets of ImpPres and examine model performance on them. Overall, our findings suggest that NLI-trained transformer models seem to be exploiting specific structural and lexical cues as opposed to performing some kind of pragmatic reasoning.</abstract>
       <url hash="d5f360c7">2022.coling-1.65</url>
@@ -803,7 +803,7 @@
       <author><first>John</first><last>Murzaku</last></author>
       <author><first>Peter</first><last>Zeng</last></author>
       <author><first>Magdalena</first><last>Markowska</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>786–796</pages>
       <abstract>We present a corrected version of a subset of the FactBank data set. Previously published results on FactBank are no longer valid. We perform experiments on FactBank using multiple training paradigms, data smoothing techniques, and polarity classifiers. We argue that f-measure is an important alternative evaluation metric for factuality. We provide new state-of-the-art results for four corpora including FactBank. We perform an error analysis on Factbank combined with two similar corpora.</abstract>
       <url hash="ebddb68f">2022.coling-1.66</url>
@@ -883,7 +883,7 @@
       <title>Dialo-<fixed-case>AP</fixed-case>: A Dependency Parsing Based Argument Parser for Dialogues</title>
       <author><first>Sougata</first><last>Saha</last></author>
       <author><first>Souvik</first><last>Das</last></author>
-      <author><first>Rohini K.</first><last>Srihari</last></author>
+      <author id="rohini-k-srihari"><first>Rohini K.</first><last>Srihari</last></author>
       <pages>887–901</pages>
       <abstract>While neural approaches to argument mining (AM) have advanced considerably, most of the recent work has been limited to parsing monologues. With an urgent interest in the use of conversational agents for broader societal applications, there is a need to advance the state-of-the-art in argument parsers for dialogues. This enables progress towards more purposeful conversations involving persuasion, debate and deliberation. This paper discusses Dialo-AP, an end-to-end argument parser that constructs argument graphs from dialogues. We formulate AM as dependency parsing of elementary and argumentative discourse units; the system is trained using extensive pre-training and curriculum learning comprising nine diverse corpora. Dialo-AP is capable of generating argument graphs from dialogues by performing all sub-tasks of AM. Compared to existing state-of-the-art baselines, Dialo-AP achieves significant improvements across all tasks, which is further validated through rigorous human evaluation.</abstract>
       <url hash="d9a67199">2022.coling-1.74</url>
@@ -905,7 +905,7 @@
       <author><first>Yaxin</first><last>Fan</last></author>
       <author><first>Peifeng</first><last>Li</last></author>
       <author><first>Fang</first><last>Kong</last></author>
-      <author><first>Qiaoming</first><last>Zhu</last></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last></author>
       <pages>912–921</pages>
       <abstract>Conversational discourse parsing aims to construct an implicit utterance dependency tree to reflect the turn-taking in a multi-party conversation. Existing works are generally divided into two lines: graph-based and transition-based paradigms, which perform well for short-distance and long-distance dependency links, respectively. However, there is no study to consider the advantages of both paradigms to facilitate conversational discourse parsing. As a result, we propose a distance-aware multi-task framework DAMT that incorporates the strengths of transition-based paradigm to facilitate the graph-based paradigm from the encoding and decoding process. To promote multi-task learning on two paradigms, we first introduce an Encoding Interactive Module (EIM) to enhance the flow of semantic information between both two paradigms during the encoding step. And then we apply a Distance-Aware Graph Convolutional Network (DAGCN) in the decoding process, which can incorporate the different-distance dependency links predicted by the transition-based paradigm to facilitate the decoding of the graph-based paradigm. The experimental results on the datasets STAC and Molweni show that our method can significantly improve the performance of the SOTA graph-based paradigm on long-distance dependency links.</abstract>
       <url hash="04f0fd8e">2022.coling-1.76</url>
@@ -916,7 +916,7 @@
       <author><first>Arman</first><last>Kazmi</last></author>
       <author><first>Sidharth</first><last>Ranjan</last></author>
       <author><first>Arpit</first><last>Sharma</last></author>
-      <author><first>Rajakrishnan</first><last>Rajkumar</last></author>
+      <author id="rajakrishnan-rajkumar"><first>Rajakrishnan</first><last>Rajkumar</last></author>
       <pages>922–937</pages>
       <abstract>This work deploys linguistically motivated features to classify paragraph-level text into fiction and non-fiction genre using a logistic regression model and infers lexical and syntactic properties that distinguish the two genres. Previous works have focused on classifying document-level text into fiction and non-fiction genres, while in this work, we deal with shorter texts which are closer to real-world applications like sentiment analysis of tweets. Going beyond simple POS tag ratios proposed in Qureshi et al.(2019) for document-level classification, we extracted multiple linguistically motivated features belonging to four categories: Lexical features, POS ratio features, Syntactic features and Raw features. For the task of short-text classification, a model containing 28 best-features (selected via Recursive feature elimination with cross-validation; RFECV) confers an accuracy jump of 15.56 % over a baseline model consisting of 2 POS-ratio features found effective in previous work (cited above). The efficacy of the above model containing a linguistically motivated feature set also transfers over to another dataset viz, Baby BNC corpus. We also compared the classification accuracy of the logistic regression model with two deep-learning models. A 1D CNN model gives an increase of 2% accuracy over the logistic Regression classifier on both corpora. And the BERT-base-uncased model gives the best classification accuracy of 97% on Brown corpus and 98% on Baby BNC corpus. Although both the deep learning models give better results in terms of classification accuracy, the problem of interpreting these models remains unsolved. In contrast, regression model coefficients revealed that fiction texts tend to have more character-level diversity and have lower lexical density (quantified using content-function word ratios) compared to non-fiction texts. Moreover, subtle differences in word order exist between the two genres, i.e., in fiction texts Verbs precede Adverbs (inter-alia).</abstract>
       <url hash="ea6b6c2c">2022.coling-1.77</url>
@@ -935,7 +935,7 @@
     </paper>
     <paper id="79">
       <title>Hierarchical Information Matters: Text Classification via Tree Based Graph Neural Network</title>
-      <author id="chong-zhang"><first>Chong</first><last>Zhang</last></author>
+      <author><first>Chong</first><last>Zhang</last></author>
       <author><first>He</first><last>Zhu</last></author>
       <author><first>Xingyu</first><last>Peng</last></author>
       <author><first>Junran</first><last>Wu</last></author>
@@ -962,7 +962,7 @@
     <paper id="81">
       <title>Community Topic: Topic Model Inference by Consecutive Word Community Discovery</title>
       <author><first>Eric</first><last>Austin</last></author>
-      <author><first>Osmar R.</first><last>Zaïane</last></author>
+      <author id="osmar-r-zaiane"><first>Osmar R.</first><last>Zaïane</last></author>
       <author><first>Christine</first><last>Largeron</last></author>
       <pages>971–983</pages>
       <abstract>We present our novel, hyperparameter-free topic modelling algorithm, Community Topic. Our algorithm is based on mining communities from term co-occurrence networks. We empirically evaluate and compare Community Topic with Latent Dirichlet Allocation and the recently developed top2vec algorithm. We find that Community Topic runs faster than the competitors and produces topics that achieve higher coherence scores. Community Topic can discover coherent topics at various scales. The network representation used by Community Topic results in a natural relationship between topics and a topic hierarchy. This allows sub- and super-topics to be found on demand. These features make Community Topic the ideal tool for downstream applications such as applied research and conversational agents.</abstract>
@@ -1016,7 +1016,7 @@
     <paper id="86">
       <title><fixed-case>CONCRETE</fixed-case>: Improving Cross-lingual Fact-checking with Cross-lingual Retrieval</title>
       <author><first>Kung-Hsiang</first><last>Huang</last></author>
-      <author><first>ChengXiang</first><last>Zhai</last></author>
+      <author id="chengxiang-zhai"><first>ChengXiang</first><last>Zhai</last></author>
       <author><first>Heng</first><last>Ji</last></author>
       <pages>1024–1035</pages>
       <abstract>Fact-checking has gained increasing attention due to the widespread of falsified information. Most fact-checking approaches focus on claims made in English only due to the data scarcity issue in other languages. The lack of fact-checking datasets in low-resource languages calls for an effective cross-lingual transfer technique for fact-checking. Additionally, trustworthy information in different languages can be complementary and helpful in verifying facts. To this end, we present the first fact-checking framework augmented with cross-lingual retrieval that aggregates evidence retrieved from multiple languages through a cross-lingual retriever. Given the absence of cross-lingual information retrieval datasets with claim-like queries, we train the retriever with our proposed Cross-lingual Inverse Cloze Task (X-ICT), a self-supervised algorithm that creates training instances by translating the title of a passage. The goal for X-ICT is to learn cross-lingual retrieval in which the model learns to identify the passage corresponding to a given translated title. On the X-Fact dataset, our approach achieves 2.23% absolute F1 improvement in the zero-shot cross-lingual setup over prior systems. The source code and data are publicly available at <url>https://github.com/khuangaf/CONCRETE</url>.</abstract>
@@ -1041,7 +1041,7 @@
     </paper>
     <paper id="88">
       <title>Attribute Injection for Pretrained Language Models: A New Benchmark and an Efficient Method</title>
-      <author><first>Reinald Kim</first><last>Amplayo</last></author>
+      <author id="reinald-kim-amplayo"><first>Reinald Kim</first><last>Amplayo</last></author>
       <author><first>Kang Min</first><last>Yoo</last></author>
       <author><first>Sang-Woo</first><last>Lee</last></author>
       <pages>1051–1064</pages>
@@ -1053,11 +1053,11 @@
       <title>Towards Robust Neural Retrieval with Source Domain Synthetic Pre-Finetuning</title>
       <author><first>Revanth</first><last>Gangi Reddy</last></author>
       <author><first>Vikas</first><last>Yadav</last></author>
-      <author><first>Md Arafat</first><last>Sultan</last></author>
+      <author id="md-arafat-sultan"><first>Md Arafat</first><last>Sultan</last></author>
       <author><first>Martin</first><last>Franz</last></author>
       <author><first>Vittorio</first><last>Castelli</last></author>
       <author><first>Heng</first><last>Ji</last></author>
-      <author><first>Avirup</first><last>Sil</last></author>
+      <author id="avirup-sil"><first>Avirup</first><last>Sil</last></author>
       <pages>1065–1070</pages>
       <abstract>Research on neural IR has so far been focused primarily on standard supervised learning settings, where it outperforms traditional term matching baselines. Many practical use cases of such models, however, may involve previously unseen target domains. In this paper, we propose to improve the out-of-domain generalization of Dense Passage Retrieval (DPR) - a popular choice for neural IR - through synthetic data augmentation only in the source domain. We empirically show that pre-finetuning DPR with additional synthetic data in its source domain (Wikipedia), which we generate using a fine-tuned sequence-to-sequence generator, can be a low-cost yet effective first step towards its generalization. Across five different test sets, our augmented model shows more robust performance than DPR in both in-domain and zero-shot out-of-domain evaluation.</abstract>
       <url hash="965a3a19">2022.coling-1.89</url>
@@ -1097,7 +1097,7 @@
       <author><first>Soumitra</first><last>Ghosh</last></author>
       <author><first>Dhirendra Kumar</first><last>Maurya</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>1098–1105</pages>
       <abstract>The World Health Organization has emphasised the need of stepping up suicide prevention efforts to meet the United Nation’s Sustainable Development Goal target of 2030 (Goal 3: Good health and well-being). We address the challenging task of personality subtyping from suicide notes. Most research on personality subtyping has relied on statistical analysis and feature engineering. Moreover, state-of-the-art transformer models in the automated personality subtyping problem have received relatively less attention. We develop a novel EMotion-assisted PERSONAlity Detection Framework (EM-PERSONA). We annotate the benchmark CEASE-v2.0 suicide notes dataset with personality traits across four dichotomies: Introversion (I)-Extraversion (E), Intuition (N)-Sensing (S), Thinking (T)-Feeling (F), Judging (J)–Perceiving (P). Our proposed method outperforms all baselines on comprehensive evaluation using multiple state-of-the-art systems. Across the four dichotomies, EM-PERSONA improved accuracy by 2.04%, 3.69%, 4.52%, and 3.42%, respectively, over the highest-performing single-task systems.</abstract>
       <url hash="1c53d798">2022.coling-1.93</url>
@@ -1116,7 +1116,7 @@
     <paper id="95">
       <title>Exploring Label Hierarchy in a Generative Way for Hierarchical Text Classification</title>
       <author><first>Wei</first><last>Huang</last></author>
-      <author id="chen-liu"><first>Chen</first><last>Liu</last></author>
+      <author><first>Chen</first><last>Liu</last></author>
       <author><first>Bo</first><last>Xiao</last></author>
       <author><first>Yihua</first><last>Zhao</last></author>
       <author><first>Zhaoming</first><last>Pan</last></author>
@@ -1132,7 +1132,7 @@
       <title><fixed-case>M</fixed-case>u<fixed-case>S</fixed-case>e<fixed-case>CLIR</fixed-case>: A Multiple Senses and Cross-lingual Information Retrieval Dataset</title>
       <author><first>Wing Yan</first><last>Li</last></author>
       <author><first>Julie</first><last>Weeds</last></author>
-      <author><first>David</first><last>Weir</last></author>
+      <author id="david-weir"><first>David</first><last>Weir</last></author>
       <pages>1128–1135</pages>
       <abstract>This paper addresses a deficiency in existing cross-lingual information retrieval (CLIR) datasets and provides a robust evaluation of CLIR systems’ disambiguation ability. CLIR is commonly tackled by combining translation and traditional IR. Due to translation ambiguity, the problem of ambiguity is worse in CLIR than in monolingual IR. But existing auto-generated CLIR datasets are dominated by searches for named entity mentions, which does not provide a good measure for disambiguation performance, as named entity mentions can often be transliterated across languages and tend not to have multiple translations. Therefore, we introduce a new evaluation dataset (MuSeCLIR) to address this inadequacy. The dataset focusses on polysemous common nouns with multiple possible translations. MuSeCLIR is constructed from multilingual Wikipedia and supports searches on documents written in European (French, German, Italian) and Asian (Chinese, Japanese) languages. We provide baseline statistical and neural model results on MuSeCLIR which show that MuSeCLIR has a higher requirement on the ability of systems to disambiguate query terms.</abstract>
       <url hash="ab872241">2022.coling-1.96</url>
@@ -1183,8 +1183,8 @@
       <author><first>Yeon</first><last>Seonwoo</last></author>
       <author><first>Seunghyun</first><last>Yoon</last></author>
       <author><first>Franck</first><last>Dernoncourt</last></author>
-      <author><first>Trung</first><last>Bui</last></author>
-      <author><first>Alice</first><last>Oh</last></author>
+      <author id="trung-bui"><first>Trung</first><last>Bui</last></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last></author>
       <pages>1169–1178</pages>
       <abstract>Domain-specific documents cover terminologies and specialized knowledge. This has been the main challenge of domain-specific document retrieval systems. Previous approaches propose domain-adaptation and transfer learning methods to alleviate this problem. However, these approaches still follow the same document representation method in previous approaches; a document is embedded into a single vector. In this study, we propose VKGDR. VKGDR represents a given corpus into a graph of entities and their relations (known as a virtual knowledge graph) and computes the relevance between queries and documents based on the graph representation. We conduct three experiments 1) domain-specific document retrieval, 2) comparison of our virtual knowledge graph construction method with previous approaches, and 3) ablation study on each component of our virtual knowledge graph. From the results, we see that unsupervised VKGDR outperforms baselines in a zero-shot setting and even outperforms fully-supervised bi-encoder. We also verify that our virtual knowledge graph construction method results in better retrieval performance than previous approaches.</abstract>
       <url hash="4e54bef1">2022.coling-1.101</url>
@@ -1245,7 +1245,7 @@
     <paper id="107">
       <title>From Polarity to Intensity: Mining Morality from Semantic Space</title>
       <author><first>Chunxu</first><last>Zhao</last></author>
-      <author><first>Pengyuan</first><last>Liu</last></author>
+      <author id="pengyuan-liu"><first>Pengyuan</first><last>Liu</last></author>
       <author><first>Dong</first><last>Yu</last></author>
       <pages>1250–1262</pages>
       <abstract>Most works on computational morality focus on moral polarity recognition, i.e., distinguishing right from wrong. However, a discrete polarity label is not informative enough to reflect morality as it does not contain any degree or intensity information. Existing approaches to compute moral intensity are limited to word-level measurement and heavily rely on human labelling. In this paper, we propose MoralScore, a weakly-supervised framework that can automatically measure moral intensity from text. It only needs moral polarity labels, which are more robust and easier to acquire. Besides, the framework can capture latent moral information not only from words but also from sentence-level semantics which can provide a more comprehensive measurement. To evaluate the performance of our method, we introduce a set of evaluation metrics and conduct extensive experiments. Results show that our method achieves good performance on both automatic and human evaluations.</abstract>
@@ -1288,7 +1288,7 @@
       <title>Debiasing Isn’t Enough! – on the Effectiveness of Debiasing <fixed-case>MLM</fixed-case>s and Their Social Biases in Downstream Tasks</title>
       <author><first>Masahiro</first><last>Kaneko</last></author>
       <author><first>Danushka</first><last>Bollegala</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <pages>1299–1310</pages>
       <abstract>We study the relationship between task-agnostic intrinsic and task-specific extrinsic social bias evaluation measures for MLMs, and find that there exists only a weak correlation between these two types of evaluation measures. Moreover, we find that MLMs debiased using different methods still re-learn social biases during fine-tuning on downstream tasks. We identify the social biases in both training instances as well as their assigned labels as reasons for the discrepancy between intrinsic and extrinsic bias evaluation measurements. Overall, our findings highlight the limitations of existing MLM bias evaluation measures and raise concerns on the deployment of MLMs in downstream applications using those measures.</abstract>
       <url hash="b8a434fe">2022.coling-1.111</url>
@@ -1305,7 +1305,7 @@
     </paper>
     <paper id="113">
       <title>A Study of Implicit Bias in Pretrained Language Models against People with Disabilities</title>
-      <author><first>Pranav Narayanan</first><last>Venkit</last></author>
+      <author id="pranav-narayanan-venkit"><first>Pranav Narayanan</first><last>Venkit</last></author>
       <author><first>Mukund</first><last>Srinath</last></author>
       <author><first>Shomir</first><last>Wilson</last></author>
       <pages>1324–1332</pages>
@@ -1418,7 +1418,7 @@
     <paper id="123">
       <title>Self-Supervised Intermediate Fine-Tuning of Biomedical Language Models for Interpreting Patient Case Descriptions</title>
       <author><first>Israa</first><last>Alghanmi</last></author>
-      <author><first>Luis</first><last>Espinosa-Anke</last></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa-Anke</last></author>
       <author><first>Steven</first><last>Schockaert</last></author>
       <pages>1432–1441</pages>
       <abstract>Interpreting patient case descriptions has emerged as a challenging problem for biomedical NLP, where the aim is typically to predict diagnoses, to recommended treatments, or to answer questions about cases more generally. Previous work has found that biomedical language models often lack the knowledge that is needed for such tasks. In this paper, we aim to improve their performance through a self-supervised intermediate fine-tuning strategy based on PubMed abstracts. Our solution builds on the observation that many of these abstracts are case reports, and thus essentially patient case descriptions. As a general strategy, we propose to fine-tune biomedical language models on the task of predicting masked medical concepts from such abstracts. We find that the success of this strategy crucially depends on the selection of the medical concepts to be masked. By ensuring that these concepts are sufficiently salient, we can substantially boost the performance of biomedical language models, achieving state-of-the-art results on two benchmarks.</abstract>
@@ -1438,7 +1438,7 @@
     <paper id="125">
       <title>Can We Guide a Multi-Hop Reasoning Language Model to Incrementally Learn at Each Single-Hop?</title>
       <author><first>Jesus</first><last>Lovon-Melgarejo</last></author>
-      <author><first>Jose G.</first><last>Moreno</last></author>
+      <author id="jose-g-moreno"><first>Jose G.</first><last>Moreno</last></author>
       <author><first>Romaric</first><last>Besançon</last></author>
       <author><first>Olivier</first><last>Ferret</last></author>
       <author><first>Lynda</first><last>Tamine</last></author>
@@ -1486,7 +1486,7 @@
       <author><first>Jiayi</first><last>Chen</last></author>
       <author><first>Xiao-Yu</first><last>Guo</last></author>
       <author><first>Yuan-Fang</first><last>Li</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>1502–1510</pages>
       <abstract>Answering complex questions that require multi-step multi-type reasoning over raw text is challenging, especially when conducting numerical reasoning. Neural Module Networks (NMNs), follow the programmer-interpreter framework and design trainable modules to learn different reasoning skills. However, NMNs only have limited reasoning abilities, and lack numerical reasoning capability. We upgrade NMNs by: (a) bridging the gap between its interpreter and the complex questions; (b) introducing addition and subtraction modules that perform numerical reasoning over numbers. On a subset of DROP, experimental results show that our proposed methods enhance NMNs’ numerical reasoning skills by 17.7% improvement of F1 score and significantly outperform previous state-of-the-art models.</abstract>
       <url hash="ec8d7a22">2022.coling-1.129</url>
@@ -1516,7 +1516,7 @@
       <author><first>Kaixin</first><last>Ma</last></author>
       <author><first>Filip</first><last>Ilievski</last></author>
       <author><first>Jonathan</first><last>Francis</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <author><first>Alessandro</first><last>Oltramari</last></author>
       <pages>1534–1545</pages>
       <abstract>Procedural text understanding is a challenging language reasoning task that requires models to track entity states across the development of a narrative. We identify three core aspects required for modeling this task, namely the local and global view of the inputs, as well as the global view of outputs. Prior methods have considered a subset of these aspects, which leads to either low precision or low recall. In this paper, we propose a new model Coalescing Global and Local Information (CGLI), which builds entity- and timestep-aware input representations (local input) considering the whole context (global input), and we jointly model the entity states with a structured prediction objective (global output). Thus, CGLI simultaneously optimizes for both precision and recall. Moreover, we extend CGLI with additional output layers and integrate it into a story reasoning framework. Extensive experiments on a popular procedural text understanding dataset show that our model achieves state-of-the-art results, while experiments on a story reasoning benchmark show the positive impact of our model on downstream reasoning.</abstract>
@@ -1542,7 +1542,7 @@
       <title>Case-Based Abductive Natural Language Inference</title>
       <author><first>Marco</first><last>Valentino</last></author>
       <author><first>Mokanarangan</first><last>Thayaparan</last></author>
-      <author><first>André</first><last>Freitas</last></author>
+      <author id="andre-freitas"><first>André</first><last>Freitas</last></author>
       <pages>1556–1568</pages>
       <abstract>Most of the contemporary approaches for multi-hop Natural Language Inference (NLI) construct explanations considering each test case in isolation. However, this paradigm is known to suffer from semantic drift, a phenomenon that causes the construction of spurious explanations leading to wrong conclusions. In contrast, this paper proposes an abductive framework for multi-hop NLI exploring the retrieve-reuse-refine paradigm in Case-Based Reasoning (CBR). Specifically, we present Case-Based Abductive Natural Language Inference (CB-ANLI), a model that addresses unseen inference problems by analogical transfer of prior explanations from similar examples. We empirically evaluate the abductive framework on commonsense and scientific question answering tasks, demonstrating that CB-ANLI can be effectively integrated with sparse and dense pre-trained encoders to improve multi-hop inference, or adopted as an evidence retriever for Transformers. Moreover, an empirical analysis of semantic drift reveals that the CBR paradigm boosts the quality of the most challenging explanations, a feature that has a direct impact on robustness and accuracy in downstream inference tasks.</abstract>
       <url hash="4a203bcf">2022.coling-1.134</url>
@@ -1605,7 +1605,7 @@
     <paper id="140">
       <title>Conversational <fixed-case>QA</fixed-case> Dataset Generation with Answer Revision</title>
       <author><first>Seonjeong</first><last>Hwang</last></author>
-      <author><first>Gary Geunbae</first><last>Lee</last></author>
+      <author id="gary-geunbae-lee"><first>Gary Geunbae</first><last>Lee</last></author>
       <pages>1636–1644</pages>
       <abstract>Conversational question-answer generation is a task that automatically generates a large-scale conversational question answering dataset based on input passages. In this paper, we introduce a novel framework that extracts question-worthy phrases from a passage and then generates corresponding questions considering previous conversations. In particular, our framework revises the extracted answers after generating questions so that answers exactly match paired questions. Experimental results show that our simple answer revision approach leads to significant improvement in the quality of synthetic data. Moreover, we prove that our framework can be effectively utilized for domain adaptation of conversational question answering.</abstract>
       <url hash="471e338a">2022.coling-1.140</url>
@@ -1629,7 +1629,7 @@
       <author><first>Zhongyu</first><last>Wei</last></author>
       <author><first>Zhihao</first><last>Fan</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>1655–1665</pages>
       <abstract>Multi-hop reasoning requires aggregating multiple documents to answer a complex question. Existing methods usually decompose the multi-hop question into simpler single-hop questions to solve the problem for illustrating the explainable reasoning process. However, they ignore grounding on the supporting facts of each reasoning step, which tends to generate inaccurate decompositions. In this paper, we propose an interpretable stepwise reasoning framework to incorporate both single-hop supporting sentence identification and single-hop question generation at each intermediate step, and utilize the inference of the current hop for the next until reasoning out the final result. We employ a unified reader model for both intermediate hop reasoning and final hop inference and adopt joint optimization for more accurate and robust multi-hop reasoning. We conduct experiments on two benchmark datasets HotpotQA and 2WikiMultiHopQA. The results show that our method can effectively boost performance and also yields a better interpretable reasoning process without decomposition supervision.</abstract>
       <url hash="fa3cb015">2022.coling-1.142</url>
@@ -1708,7 +1708,7 @@
     <paper id="149">
       <title>Unsupervised Question Answering via Answer Diversifying</title>
       <author><first>Yuxiang</first><last>Nie</last></author>
-      <author><first>Heyan</first><last>Huang</last></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last></author>
       <author><first>Zewen</first><last>Chi</last></author>
       <author><first>Xian-Ling</first><last>Mao</last></author>
       <pages>1732–1742</pages>
@@ -1764,7 +1764,7 @@
       <author><first>Yonghua</first><last>Zhu</last></author>
       <author><first>Yang</first><last>Chen</last></author>
       <author><first>Qianqian</first><last>Qi</last></author>
-      <author><first>Michael</first><last>Witbrock</last></author>
+      <author id="michael-j-witbrock"><first>Michael</first><last>Witbrock</last></author>
       <author><first>Patricia</first><last>Riddle</last></author>
       <pages>1791–1800</pages>
       <abstract>Multi-hop question answering (QA) requires reasoning over multiple documents to answer a complex question and provide interpretable supporting evidence. However, providing supporting evidence is not enough to demonstrate that a model has performed the desired reasoning to reach the correct answer. Most existing multi-hop QA methods fail to answer a large fraction of sub-questions, even if their parent questions are answered correctly. In this paper, we propose the Prompt-based Conservation Learning (PCL) framework for multi-hop QA, which acquires new knowledge from multi-hop QA tasks while conserving old knowledge learned on single-hop QA tasks, mitigating forgetting. Specifically, we first train a model on existing single-hop QA tasks, and then freeze this model and expand it by allocating additional sub-networks for the multi-hop QA task. Moreover, to condition pre-trained language models to stimulate the kind of reasoning required for specific multi-hop questions, we learn soft prompts for the novel sub-networks to perform type-specific reasoning. Experimental results on the HotpotQA benchmark show that PCL is competitive for multi-hop QA and retains good performance on the corresponding single-hop sub-questions, demonstrating the efficacy of PCL in mitigating knowledge loss by forgetting.</abstract>
@@ -1912,7 +1912,7 @@
       <title>A Hybrid Model of Classification and Generation for Spatial Relation Extraction</title>
       <author><first>Feng</first><last>Wang</last></author>
       <author><first>Peifeng</first><last>Li</last></author>
-      <author><first>Qiaoming</first><last>Zhu</last></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last></author>
       <pages>1915–1924</pages>
       <abstract>Extracting spatial relations from texts is a fundamental task for natural language understanding and previous studies only regard it as a classification task, ignoring those spatial relations with null roles due to their poor information. To address the above issue, we first view spatial relation extraction as a generation task and propose a novel hybrid model HMCGR for this task. HMCGR contains a generation and a classification model, while the former can generate those null-role relations and the latter can extract those non-null-role relations to complement each other. Moreover, a reflexivity evaluation mechanism is applied to further improve the accuracy based on the reflexivity principle of spatial relation. Experimental results on SpaceEval show that HMCGR outperforms the SOTA baselines significantly.</abstract>
       <url hash="de2d3a5a">2022.coling-1.166</url>
@@ -1965,7 +1965,7 @@
       <author><first>Shengqiong</first><last>Wu</last></author>
       <author><first>Bobo</first><last>Li</last></author>
       <author><first>Liang</first><last>Zhao</last></author>
-      <author><first>Donghong</first><last>Ji</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
       <pages>1953–1964</pages>
       <abstract>Event extraction (EE) is an essential task of information extraction, which aims to extract structured event information from unstructured text. Most prior work focuses on extracting flat events while neglecting overlapped or nested ones. A few models for overlapped and nested EE includes several successive stages to extract event triggers and arguments,which suffer from error propagation. Therefore, we design a simple yet effective tagging scheme and model to formulate EE as word-word relation recognition, called OneEE. The relations between trigger or argument words are simultaneously recognized in one stage with parallel grid tagging, thus yielding a very fast event extraction speed. The model is equipped with an adaptive event fusion module to generate event-aware representations and a distance-aware predictor to integrate relative distance information for word-word relation recognition, which are empirically demonstrated to be effective mechanisms. Experiments on 3 overlapped and nested EE benchmarks, namely FewFC, Genia11, and Genia13, show that OneEE achieves the state-of-the-art (SOTA) results. Moreover, the inference speed of OneEE is faster than those of baselines in the same condition, and can be further substantially improved since it supports parallel inference.</abstract>
       <url hash="135bffa3">2022.coling-1.170</url>
@@ -1999,7 +1999,7 @@
       <author><first>Qin</first><last>Chen</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
       <author><first>Liang</first><last>He</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>1990–2000</pages>
       <abstract>Event argument extraction (EAE) aims to extract arguments with given roles from texts, which have been widely studied in natural language processing. Most previous works have achieved good performance in specific EAE datasets with dedicated neural architectures. Whereas, these architectures are usually difficult to adapt to new datasets/scenarios with various annotation schemas or formats. Furthermore, they rely on large-scale labeled data for training, which is unavailable due to the high labelling cost in most cases. In this paper, we propose a multi-format transfer learning model with variational information bottleneck, which makes use of the information especially the common knowledge in existing datasets for EAE in new datasets. Specifically, we introduce a shared-specific prompt framework to learn both format-shared and format-specific knowledge from datasets with different formats. In order to further absorb the common knowledge for EAE and eliminate the irrelevant noise, we integrate variational information bottleneck into our architecture to refine the shared representation. We conduct extensive experiments on three benchmark datasets, and obtain new state-of-the-art performance on EAE.</abstract>
       <url hash="36a21283">2022.coling-1.173</url>
@@ -2038,7 +2038,7 @@
       <author><first>Zhizheng</first><last>Wang</last></author>
       <author><first>Yuanyuan</first><last>Sun</last></author>
       <author><first>Hongfei</first><last>Lin</last></author>
-      <author id="jian-wang"><first>Jian</first><last>Wang</last></author>
+      <author><first>Jian</first><last>Wang</last></author>
       <pages>2024–2033</pages>
       <abstract>Chinese Named Entity Recognition (NER) has continued to attract research attention. However, most existing studies only explore the internal features of the Chinese language but neglect other lingual modal features. Actually, as another modal knowledge of the Chinese language, English contains rich prompts about entities that can potentially be applied to improve the performance of Chinese NER. Therefore, in this study, we explore the bilingual enhancement for Chinese NER and propose a unified bilingual interaction module called the Adapted Cross-Transformers with Global Sparse Attention (ACT-S) to capture the interaction of bilingual information. We utilize a model built upon several different ACT-Ss to integrate the rich English information into the Chinese representation. Moreover, our model can learn the interaction of information between bilinguals (inter-features) and the dependency information within Chinese (intra-features). Compared with existing Chinese NER methods, our proposed model can better handle entities with complex structures. The English text that enhances the model is automatically generated by machine translation, avoiding high labour costs. Experimental results on four well-known benchmark datasets demonstrate the effectiveness and robustness of our proposed model.</abstract>
       <url hash="99a738ff">2022.coling-1.176</url>
@@ -2047,7 +2047,7 @@
     <paper id="177">
       <title>Read Extensively, Focus Smartly: A Cross-document Semantic Enhancement Method for Visual Documents <fixed-case>NER</fixed-case></title>
       <author><first>Jun</first><last>Zhao</last></author>
-      <author><first>Xin</first><last>Zhao</last></author>
+      <author id="wayne-xin-zhao"><first>Xin</first><last>Zhao</last></author>
       <author><first>WenYu</first><last>Zhan</last></author>
       <author><first>Tao</first><last>Gui</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
@@ -2088,7 +2088,7 @@
       <author><first>Xiaolong</first><last>Jin</last></author>
       <author><first>Saiping</first><last>Guan</last></author>
       <author><first>Jiafeng</first><last>Guo</last></author>
-      <author><first>Xueqi</first><last>Cheng</last></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last></author>
       <pages>2065–2074</pages>
       <abstract>Due to the lack of labeled data in many realistic scenarios, a number of few-shot learning methods for text classification have been proposed, among which the meta learning based ones have recently attracted much attention. Such methods usually consist of a learner as the classifier and a meta learner for specializing the learner to different tasks. For the learner, learning rate is crucial to its performance. However, existing methods treat it as a hyper parameter and adjust it manually, which is time-consuming and laborious. Intuitively, for different tasks and neural network layers, the learning rates should be different and self-adaptive. For the meta learner, it requires a good generalization ability so as to quickly adapt to new tasks. Motivated by these issues, we propose a novel meta learning framework, called MetaSLRCL, for few-shot text classification. Specifically, we present a novel meta learning mechanism to obtain different learning rates for different tasks and neural network layers so as to enable the learner to quickly adapt to new training data. Moreover, we propose a task-oriented curriculum learning mechanism to help the meta learner achieve a better generalization ability by learning from different tasks with increasing difficulties. Extensive experiments on three benchmark datasets demonstrate the effectiveness of MetaSLRCL.</abstract>
       <url hash="c6f25945">2022.coling-1.180</url>
@@ -2097,11 +2097,11 @@
     <paper id="181">
       <title>A Simple Temporal Information Matching Mechanism for Entity Alignment between Temporal Knowledge Graphs</title>
       <author><first>Li</first><last>Cai</last></author>
-      <author><first>Xin</first><last>Mao</last></author>
+      <author id="xinnian-mao"><first>Xin</first><last>Mao</last></author>
       <author><first>Meirong</first><last>Ma</last></author>
       <author><first>Hao</first><last>Yuan</last></author>
       <author><first>Jianchao</first><last>Zhu</last></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <pages>2075–2086</pages>
       <abstract>Entity alignment (EA) aims to find entities in different knowledge graphs (KGs) that refer to the same object in the real world. Recent studies incorporate temporal information to augment the representations of KGs. The existing methods for EA between temporal KGs (TKGs) utilize a time-aware attention mechanisms to incorporate relational and temporal information into entity embeddings. The approaches outperform the previous methods by using temporal information. However, we believe that it is not necessary to learn the embeddings of temporal information in KGs since most TKGs have uniform temporal representations. Therefore, we propose a simple GNN model combined with a temporal information matching mechanism, which achieves better performance with less time and fewer parameters. Furthermore, since alignment seeds are difficult to label in real-world applications, we also propose a method to generate unsupervised alignment seeds via the temporal information of TKG. Extensive experiments on public datasets indicate that our supervised method significantly outperforms the previous methods and the unsupervised one has competitive performance.</abstract>
       <url hash="9ef22e4f">2022.coling-1.181</url>
@@ -2254,7 +2254,7 @@
       <title>Diverse Multi-Answer Retrieval with Determinantal Point Processes</title>
       <author><first>Poojitha</first><last>Nandigam</last></author>
       <author><first>Nikhil</first><last>Rayaprolu</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>2220–2225</pages>
       <abstract>Often questions provided to open-domain question answering systems are ambiguous. Traditional QA systems that provide a single answer are incapable of answering ambiguous questions since the question may be interpreted in several ways and may have multiple distinct answers. In this paper, we address multi-answer retrieval which entails retrieving passages that can capture majority of the diverse answers to the question. We propose a re-ranking based approach using Determinantal point processes utilizing BERT as kernels. Our method jointly considers query-passage relevance and passage-passage correlation to retrieve passages that are both query-relevant and diverse. Results demonstrate that our re-ranking technique outperforms state-of-the-art method on the AmbigQA dataset.</abstract>
       <url hash="a125df00">2022.coling-1.194</url>
@@ -2306,7 +2306,7 @@
       <author><first>Pasquale</first><last>Minervini</last></author>
       <author><first>David</first><last>Chang</last></author>
       <author><first>Pontus</first><last>Stenetorp</last></author>
-      <author><first>Guenter</first><last>Neumann</last></author>
+      <author id="gunter-neumann"><first>Guenter</first><last>Neumann</last></author>
       <pages>2259–2277</pages>
       <abstract>Relation extraction in the biomedical domain is challenging due to the lack of labeled data and high annotation costs, needing domain experts. Distant supervision is commonly used to tackle the scarcity of annotated data by automatically pairing knowledge graph relationships with raw texts. Such a pipeline is prone to noise and has added challenges to scale for covering a large number of biomedical concepts. We investigated existing broad-coverage distantly supervised biomedical relation extraction benchmarks and found a significant overlap between training and test relationships ranging from 26% to 86%. Furthermore, we noticed several inconsistencies in the data construction process of these benchmarks, and where there is no train-test leakage, the focus is on interactions between narrower entity types. This work presents a more accurate benchmark MedDistant19 for broad-coverage distantly supervised biomedical relation extraction that addresses these shortcomings and is obtained by aligning the MEDLINE abstracts with the widely used SNOMED Clinical Terms knowledge base. Lacking thorough evaluation with domain-specific language models, we also conduct experiments validating general domain relation extraction findings to biomedical relation extraction.</abstract>
       <url hash="f6616744">2022.coling-1.198</url>
@@ -2321,7 +2321,7 @@
       <author><first>Junjie</first><last>Shan</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
       <author><first>Yueming</first><last>Wu</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>2278–2287</pages>
       <abstract>Natural language understanding (NLU) models tend to rely on spurious correlations (i.e., dataset bias) to achieve high performance on in-distribution datasets but poor performance on out-of-distribution ones. Most of the existing debiasing methods often identify and weaken these samples with biased features (i.e., superficial surface features that cause such spurious correlations). However, down-weighting these samples obstructs the model in learning from the non-biased parts of these samples. To tackle this challenge, in this paper, we propose to eliminate spurious correlations in a fine-grained manner from a feature space perspective. Specifically, we introduce Random Fourier Features and weighted re-sampling to decorrelate the dependencies between features to mitigate spurious correlations. After obtaining decorrelated features, we further design a mutual-information-based method to purify them, which forces the model to learn features that are more relevant to tasks. Extensive experiments on two well-studied NLU tasks demonstrate that our method is superior to other comparative approaches.</abstract>
       <url hash="a29b5f7c">2022.coling-1.199</url>
@@ -2356,7 +2356,7 @@
       <author><first>Shuzheng</first><last>Si</last></author>
       <author><first>Shuang</first><last>Zeng</last></author>
       <author><first>Jiaxing</first><last>Lin</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <pages>2313–2318</pages>
       <abstract>Unlabeled Entity Problem (UEP) in Named Entity Recognition (NER) datasets seriously hinders the improvement of NER performance. This paper proposes SCL-RAI to cope with this problem. Firstly, we decrease the distance of span representations with the same label while increasing it for different ones via span-based contrastive learning, which relieves the ambiguity among entities and improves the robustness of the model over unlabeled entities. Then we propose retrieval augmented inference to mitigate the decision boundary shifting problem. Our method significantly outperforms the previous SOTA method by 4.21% and 8.64% F1-score on two real-world datasets.</abstract>
       <url hash="bda1ab56">2022.coling-1.202</url>
@@ -2366,7 +2366,7 @@
       <title>A Relation Extraction Dataset for Knowledge Extraction from Web Tables</title>
       <author><first>Siffi</first><last>Singh</last></author>
       <author><first>Alham Fikri</first><last>Aji</last></author>
-      <author><first>Gaurav</first><last>Singh</last></author>
+      <author id="gaurav-singh-tomar"><first>Gaurav</first><last>Singh</last></author>
       <author><first>Christos</first><last>Christodoulopoulos</last></author>
       <pages>2319–2327</pages>
       <abstract>Relational web-tables are significant sources of structural information that are widely used for relation extraction and population of facts into knowledge graphs. To transform the web-table data into knowledge, we need to identify the relations that exist between column pairs. Currently, there are only a handful of publicly available datasets with relations annotated against natural web-tables. Most datasets are constructed using synthetic tables that lack valuable metadata information, or are limited in size to be considered as a challenging evaluation set. In this paper, we present REDTab, the largest natural-table relation extraction dataset. We have annotated ~9K tables and ~22K column pairs using crowd sourced annotators from MTurk, which has 50x larger number of column pairs than the existing human-annotated benchmark. Our test set is specially designed to be challenging as observed in our experiment results using TaBERT. We publicly release REDTab as a benchmark for the evaluation process in relation extraction.</abstract>
@@ -2409,7 +2409,7 @@
     <paper id="207">
       <title>Method Entity Extraction from Biomedical Texts</title>
       <author><first>Waqar Bin</first><last>Kalim</last></author>
-      <author><first>Robert E.</first><last>Mercer</last></author>
+      <author id="robert-e-mercer"><first>Robert E.</first><last>Mercer</last></author>
       <pages>2357–2362</pages>
       <abstract>In the field of Natural Language Processing (NLP), extracting method entities from biomedical text has been a challenging task. Scientific research papers commonly consist of complex keywords and domain-specific terminologies, and new terminologies are continuously appearing. In this research, we find method terminologies in biomedical text using both rule-based and machine learning techniques. We first use linguistic features to extract method sentence candidates from a large corpus of biomedical text. Then, we construct a silver standard biomedical corpus composed of these sentences. With a rule-based method that makes use of the Stanza dependency parsing module, we label the method entities in these sentences. Using this silver standard corpus we train two machine learning algorithms to automatically extract method entities from biomedical text. Our results show that it is possible to develop machine learning models that can automatically extract method entities to a reasonable accuracy without the need for a gold standard dataset.</abstract>
       <url hash="670b6ac0">2022.coling-1.207</url>
@@ -2500,7 +2500,7 @@
     <paper id="214">
       <title>Improving Zero-Shot Entity Linking Candidate Generation with Ultra-Fine Entity Type Information</title>
       <author><first>Xuhui</first><last>Sui</last></author>
-      <author><first>Ying</first><last>Zhang</last></author>
+      <author id="ying-zhang"><first>Ying</first><last>Zhang</last></author>
       <author><first>Kehui</first><last>Song</last></author>
       <author><first>Baohang</first><last>Zhou</last></author>
       <author><first>Guoqing</first><last>Zhao</last></author>
@@ -2616,7 +2616,7 @@
       <author><first>Ziyin</first><last>Huang</last></author>
       <author><first>Yijun</first><last>Wang</last></author>
       <author><first>Changzhi</first><last>Sun</last></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <author><first>Yuanbin</first><last>Wu</last></author>
       <author><first>Xiaofeng</first><last>Mou</last></author>
       <author><first>Ding</first><last>Wang</last></author>
@@ -2711,8 +2711,8 @@
       <author><first>Zhong</first><last>Qian</last></author>
       <author><first>Heng</first><last>Zhang</last></author>
       <author><first>Peifeng</first><last>Li</last></author>
-      <author><first>Qiaoming</first><last>Zhu</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>2622–2632</pages>
       <abstract>Document-level Event Factuality Identification (DEFI) predicts the factuality of a specific event based on a document from which the event can be derived, which is a fundamental and crucial task in Natural Language Processing (NLP). However, most previous studies only considered sentence-level task and did not adopt document-level knowledge. Moreover, they modelled DEFI as a typical text classification task depending on annotated information heavily, and limited to the task-specific corpus only, which resulted in data scarcity. To tackle these issues, we propose a new framework formulating DEFI as Machine Reading Comprehension (MRC) tasks considering both Span-Extraction (Ext) and Multiple-Choice (Mch). Our model does not employ any other explicit annotated information, and utilizes Transfer Learning (TL) to extract knowledge from universal large-scale MRC corpora for cross-domain data augmentation. The empirical results on DLEFM corpus demonstrate that the proposed model outperforms several state-of-the-arts.</abstract>
       <url hash="6daa2c34">2022.coling-1.231</url>
@@ -2724,8 +2724,8 @@
       <author><first>Yu</first><last>Hong</last></author>
       <author><first>Jie</first><last>Wang</last></author>
       <author><first>Shiming</first><last>He</last></author>
-      <author><first>Jianmin</first><last>Yao</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="jianmin-yao"><first>Jianmin</first><last>Yao</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>2633–2638</pages>
       <abstract>We leverage cross-language data expansion and retraining to enhance neural Event Detection (abbr., ED) on English ACE corpus. Machine translation is utilized for expanding English training set of ED from that of Chinese. However, experimental results illustrate that such strategy actually results in performance degradation. The survey of translations suggests that the mistakenly-aligned triggers in the expanded data negatively influences the retraining process. We refer this phenomenon to “trigger falsification”. To overcome the issue, we apply heuristic rules for regulating the expanded data, fixing the distracting samples that contain the falsified triggers. The supplementary experiments show that the rule-based regulation is beneficial, yielding the improvement of about 1.6% F1-score for ED. We additionally prove that, instead of transfer learning from the translated ED data, the straight data combination by random pouring surprisingly performs better.</abstract>
       <url hash="7727b95a">2022.coling-1.232</url>
@@ -2778,8 +2778,8 @@
       <author><first>Weijia</first><last>Xu</last></author>
       <author><first>Marine</first><last>Carpuat</last></author>
       <author><first>Kenneth</first><last>Heafield</last></author>
-      <author><first>Douglas</first><last>Oard</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="douglas-w-oard"><first>Douglas</first><last>Oard</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>2668–2680</pages>
       <abstract>Query-focused summaries of foreign-language, retrieved documents can help a user understand whether a document is actually relevant to the query term. A standard approach to this problem is to first translate the source documents and then perform extractive summarization to find relevant snippets. However, in a cross-lingual setting, the query term does not necessarily appear in the translations of relevant documents. In this work, we show that constrained machine translation and constrained post-editing can improve human relevance judgments by including a query term in a summary when its translation appears in the source document. We also present several strategies for selecting only certain documents for regeneration which yield further improvements</abstract>
       <url hash="be23e5a9">2022.coling-1.236</url>
@@ -2835,10 +2835,10 @@
       <author><first>Harpreet</first><last>Singh</last></author>
       <author><first>Franck</first><last>Dernoncourt</last></author>
       <author><first>Seunghyun</first><last>Yoon</last></author>
-      <author><first>Trung</first><last>Bui</last></author>
+      <author id="trung-bui"><first>Trung</first><last>Bui</last></author>
       <author><first>Walter W.</first><last>Chang</last></author>
       <author><first>Emilia</first><last>Farcas</last></author>
-      <author><first>Ndapa</first><last>Nakashole</last></author>
+      <author id="ndapandula-nakashole"><first>Ndapa</first><last>Nakashole</last></author>
       <pages>2734–2747</pages>
       <abstract>Current medical question answering systems have difficulty processing long, detailed and informally worded questions submitted by patients, called Consumer Health Questions (CHQs). To address this issue, we introduce a medical question understanding and answering system with knowledge grounding and semantic self-supervision. Our system is a pipeline that first summarizes a long, medical, user-written question, using a supervised summarization loss. Then, our system performs a two-step retrieval to return answers. The system first matches the summarized user question with an FAQ from a trusted medical knowledge base, and then retrieves a fixed number of relevant sentences from the corresponding answer document. In the absence of labels for question matching or answer relevance, we design 3 novel, self-supervised and semantically-guided losses. We evaluate our model against two strong retrieval-based question answering baselines. Evaluators ask their own questions and rate the answers retrieved by our baselines and own system according to their relevance. They find that our system retrieves more relevant answers, while achieving speeds 20 times faster. Our self-supervised losses also help the summarizer achieve higher scores in ROUGE, as well as in human evaluation metrics.</abstract>
       <url hash="af222c2a">2022.coling-1.241</url>
@@ -2852,7 +2852,7 @@
       <author><first>Yang</first><last>Yang</last></author>
       <author><first>Baohua</first><last>Zhou</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>2748–2758</pages>
       <abstract>Existing works on rumor resolution have shown great potential in recognizing word appearance and user participation. However, they ignore the intrinsic propagation mechanisms of rumors and present poor adaptive ability when unprecedented news emerges. To exploit the fine-grained rumor diffusion patterns and generalize rumor resolution methods, we formulate a predecessor task to identify triggering posts, and then exploit their characteristics to facilitate rumor verification. We design a tree-structured annotation interface and extend PHEME dataset with labels on the message level. Data analysis shows that triggers play a critical role in verifying rumors and present similar lingual patterns across irrelevant events. We propose a graph-based model considering the direction and interaction of information flow to implement role-aware rumor resolution. Experimental results demonstrate the effectiveness of our proposed model and progressive scheme.</abstract>
       <url hash="7122fd85">2022.coling-1.242</url>
@@ -2884,7 +2884,7 @@
     <paper id="245">
       <title><fixed-case>CL</fixed-case>o<fixed-case>SE</fixed-case>: Contrastive Learning of Subframe Embeddings for Political Bias Classification of News Media</title>
       <author><first>Michelle YoungJin</first><last>Kim</last></author>
-      <author><first>Kristen Marie</first><last>Johnson</last></author>
+      <author id="kristen-johnson"><first>Kristen Marie</first><last>Johnson</last></author>
       <pages>2780–2793</pages>
       <abstract>Framing is a political strategy in which journalists and politicians emphasize certain aspects of a societal issue in order to influence and sway public opinion. Frameworks for detecting framing in news articles or social media posts are critical in understanding the spread of biased information in our society. In this paper, we propose CLoSE, a multi-task BERT-based model which uses contrastive learning to embed indicators of frames from news articles in order to predict political bias. We evaluate the performance of our proposed model on subframes and political bias classification tasks. We also demonstrate the model’s classification accuracy on zero-shot and few-shot learning tasks, providing a promising avenue for framing detection in unlabeled data.</abstract>
       <url hash="06871120">2022.coling-1.245</url>
@@ -2902,7 +2902,7 @@
     <paper id="247">
       <title><fixed-case>CXR</fixed-case> Data Annotation and Classification with Pre-trained Language Models</title>
       <author><first>Nina</first><last>Zhou</last></author>
-      <author><first>Ai Ti</first><last>Aw</last></author>
+      <author id="aiti-aw"><first>Ai Ti</first><last>Aw</last></author>
       <author><first>Zhuo Han</first><last>Liu</last></author>
       <author><first>Cher heng</first><last>Tan</last></author>
       <author><first>Yonghan</first><last>Ting</last></author>
@@ -2952,7 +2952,7 @@
       <author><first>Xuanli</first><last>He</last></author>
       <author><first>Lingjuan</first><last>Lyu</last></author>
       <author><first>Lizhen</first><last>Qu</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>2849–2860</pages>
       <abstract>Machine-learning-as-a-service (MLaaS) has attracted millions of users to their splendid large-scale models. Although published as black-box APIs, the valuable models behind these services are still vulnerable to imitation attacks. Recently, a series of works have demonstrated that attackers manage to steal or extract the victim models. Nonetheless, none of the previous stolen models can outperform the original black-box APIs. In this work, we conduct unsupervised domain adaptation and multi-victim ensemble to showing that attackers could potentially surpass victims, which is beyond previous understanding of model extraction. Extensive experiments on both benchmark datasets and real-world APIs validate that the imitators can succeed in outperforming the original black-box models on transferred domains. We consider our work as a milestone in the research of imitation attack, especially on NLP APIs, as the superior performance could influence the defense or even publishing strategy of API providers.</abstract>
       <url hash="4271cbf9">2022.coling-1.251</url>
@@ -2977,7 +2977,7 @@
       <author><first>Qin</first><last>Liu</last></author>
       <author><first>Tao</first><last>Gui</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <author><first>Rui</first><last>Xie</last></author>
       <author><first>Wei</first><last>Wu</last></author>
       <pages>2873–2882</pages>
@@ -3106,7 +3106,7 @@
       <title>Summarizing Patients’ Problems from Hospital Progress Notes Using Pre-trained Sequence-to-Sequence Models</title>
       <author><first>Yanjun</first><last>Gao</last></author>
       <author><first>Dmitriy</first><last>Dligach</last></author>
-      <author><first>Timothy</first><last>Miller</last></author>
+      <author id="timothy-miller"><first>Timothy</first><last>Miller</last></author>
       <author><first>Dongfang</first><last>Xu</last></author>
       <author><first>Matthew M. M.</first><last>Churpek</last></author>
       <author><first>Majid</first><last>Afshar</last></author>
@@ -3348,7 +3348,7 @@
     <paper id="285">
       <title>Can Transformers Process Recursive Nested Constructions, Like Humans?</title>
       <author><first>Yair</first><last>Lakretz</last></author>
-      <author><first>Théo</first><last>Desbordes</last></author>
+      <author id="theo-desbordes"><first>Théo</first><last>Desbordes</last></author>
       <author><first>Dieuwke</first><last>Hupkes</last></author>
       <author><first>Stanislas</first><last>Dehaene</last></author>
       <pages>3226–3232</pages>
@@ -3385,7 +3385,7 @@
       <title>Parameter-Efficient Mixture-of-Experts Architecture for Pre-trained Language Models</title>
       <author><first>Ze-Feng</first><last>Gao</last></author>
       <author><first>Peiyu</first><last>Liu</last></author>
-      <author><first>Wayne Xin</first><last>Zhao</last></author>
+      <author id="wayne-xin-zhao"><first>Wayne Xin</first><last>Zhao</last></author>
       <author><first>Zhong-Yi</first><last>Lu</last></author>
       <author><first>Ji-Rong</first><last>Wen</last></author>
       <pages>3263–3273</pages>
@@ -3397,7 +3397,7 @@
       <title>Pre-trained Token-replaced Detection Model as Few-shot Learner</title>
       <author><first>Zicheng</first><last>Li</last></author>
       <author><first>Shoushan</first><last>Li</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>3274–3284</pages>
       <abstract>Pre-trained masked language models have demonstrated remarkable ability as few-shot learners. In this paper, as an alternative, we propose a novel approach to few-shot learning with pre-trained token-replaced detection models like ELECTRA. In this approach, we reformulate a classification or a regression task as a token-replaced detection problem. Specifically, we first define a template and label description words for each task and put them into the input to form a natural language prompt. Then, we employ the pre-trained token-replaced detection model to predict which label description word is the most original (i.e., least replaced) among all label description words in the prompt. A systematic evaluation on 16 datasets demonstrates that our approach outperforms few-shot learners with pre-trained masked language models in both one-sentence and two-sentence learning tasks.</abstract>
       <url hash="d4ab6f20">2022.coling-1.289</url>
@@ -3469,7 +3469,7 @@
       <title><fixed-case>R</fixed-case>eal<fixed-case>M</fixed-case>ed<fixed-case>D</fixed-case>ial: A Real Telemedical Dialogue Dataset Collected from Online <fixed-case>C</fixed-case>hinese Short-Video Clips</title>
       <author><first>Bo</first><last>Xu</last></author>
       <author><first>Hongtong</first><last>Zhang</last></author>
-      <author id="jian-wang"><first>Jian</first><last>Wang</last></author>
+      <author><first>Jian</first><last>Wang</last></author>
       <author><first>Xiaokun</first><last>Zhang</last></author>
       <author><first>Dezhi</first><last>Hao</last></author>
       <author><first>Linlin</first><last>Zong</last></author>
@@ -3487,10 +3487,10 @@
       <author><first>Areej Nasser</first><last>Muhajab</last></author>
       <author><first>Isabella A.</first><last>White</last></author>
       <author><first>Gabriel</first><last>Wong</last></author>
-      <author><first>Luis</first><last>Espinosa-Anke</last></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa-Anke</last></author>
       <author><first>Leonardo</first><last>Neves</last></author>
       <author><first>Francesco</first><last>Barbieri</last></author>
-      <author><first>Jose</first><last>Camacho-Collados</last></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></author>
       <pages>3353–3359</pages>
       <abstract>Language evolves over time, and word meaning changes accordingly. This is especially true in social media, since its dynamic nature leads to faster semantic shifts, making it challenging for NLP models to deal with new content and trends. However, the number of datasets and models that specifically address the dynamic nature of these social platforms is scarce. To bridge this gap, we present TempoWiC, a new benchmark especially aimed at accelerating research in social media-based meaning shift. Our results show that TempoWiC is a challenging benchmark, even for recently-released language models specialized in social media.</abstract>
       <url hash="a5dc0dca">2022.coling-1.296</url>
@@ -3500,7 +3500,7 @@
       <title>Automatic Generation of Large-scale Multi-turn Dialogues from <fixed-case>R</fixed-case>eddit</title>
       <author><first>Daniil</first><last>Huryn</last></author>
       <author><first>William M.</first><last>Hutsell</last></author>
-      <author><first>Jinho D.</first><last>Choi</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
       <pages>3360–3373</pages>
       <abstract>This paper presents novel methods to automatically convert posts and their comments from discussion forums such as Reddit into multi-turn dialogues. Our methods are generalizable to any forums; thus, they allow us to generate a massive amount of dialogues for diverse topics that can be used to pretrain language models. Four methods are introduced, Greedy_Baseline, Greedy_Advanced, Beam Search and Threading, which are applied to posts from 10 subreddits and assessed. Each method makes a noticeable improvement over its predecessor such that the best method shows an improvement of 36.3% over the baseline for appropriateness. Our best method is applied to posts from those 10 subreddits for the creation of a corpus comprising 10,098 dialogues (3.3M tokens), 570 of which are compared against dialogues in three other datasets, Blended Skill Talk, Daily Dialogue, and Topical Chat. Our dialogues are found to be more engaging but slightly less natural than the ones in the other datasets, while it costs a fraction of human labor and money to generate our corpus compared to the others. To the best of our knowledge, it is the first work to create a large multi-turn dialogue corpus from Reddit that can advance neural dialogue systems.</abstract>
       <url hash="54fa2b84">2022.coling-1.297</url>
@@ -3523,7 +3523,7 @@
       <title><fixed-case>T</fixed-case>witter Topic Classification</title>
       <author><first>Dimosthenis</first><last>Antypas</last></author>
       <author><first>Asahi</first><last>Ushio</last></author>
-      <author><first>Jose</first><last>Camacho-Collados</last></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></author>
       <author><first>Vitor</first><last>Silva</last></author>
       <author><first>Leonardo</first><last>Neves</last></author>
       <author><first>Francesco</first><last>Barbieri</last></author>
@@ -3535,7 +3535,7 @@
     <paper id="300">
       <title>Layer or Representation Space: What Makes <fixed-case>BERT</fixed-case>-based Evaluation Metrics Robust?</title>
       <author><first>Doan Nam Long</first><last>Vu</last></author>
-      <author><first>Nafise Sadat</first><last>Moosavi</last></author>
+      <author id="nafise-sadat-moosavi"><first>Nafise Sadat</first><last>Moosavi</last></author>
       <author><first>Steffen</first><last>Eger</last></author>
       <pages>3401–3411</pages>
       <abstract>The evaluation of recent embedding-based evaluation metrics for text generation is primarily based on measuring their correlation with human evaluations on standard benchmarks. However, these benchmarks are mostly from similar domains to those used for pretraining word embeddings. This raises concerns about the (lack of) generalization of embedding-based metrics to new and noisy domains that contain a different vocabulary than the pretraining data. In this paper, we examine the robustness of BERTScore, one of the most popular embedding-based metrics for text generation. We show that (a) an embedding-based metric that has the highest correlation with human evaluations on a standard benchmark can have the lowest correlation if the amount of input noise or unknown tokens increases, (b) taking embeddings from the first layer of pretrained models improves the robustness of all metrics, and (c) the highest robustness is achieved when using character-level embeddings, instead of token-based embeddings, from the first layer of the pretrained model.</abstract>
@@ -3548,7 +3548,7 @@
       <author><first>Zoey</first><last>Liu</last></author>
       <author><first>Qingyun</first><last>Yang</last></author>
       <author><first>Yujing</first><last>Huang</last></author>
-      <author><first>Emily</first><last>Prud’hommeaux</last></author>
+      <author id="emily-prudhommeaux"><first>Emily</first><last>Prud’hommeaux</last></author>
       <pages>3412–3419</pages>
       <abstract>Difficulties with social aspects of language are among the hallmarks of autism spectrum disorder (ASD). These communication differences are thought to contribute to the challenges that adults with ASD experience when seeking employment, underscoring the need for interventions that focus on improving areas of weakness in pragmatic and social language. In this paper, we describe a transformer-based framework for identifying linguistic features associated with social aspects of communication using a corpus of conversations between adults with and without ASD and neurotypical conversational partners produced while engaging in collaborative tasks. While our framework yields strong accuracy overall, performance is significantly worse for the language of participants with ASD, suggesting that they use a more diverse set of strategies for some social linguistic functions. These results, while showing promise for the development of automated language analysis tools to support targeted language interventions for ASD, also reveal weaknesses in the ability of large contextualized language models to model neuroatypical language.</abstract>
       <url hash="7cc549a7">2022.coling-1.301</url>
@@ -3568,7 +3568,7 @@
     <paper id="303">
       <title><fixed-case>L</fixed-case>ip<fixed-case>K</fixed-case>ey: A Large-Scale News Dataset for Absent Keyphrases Generation and Abstractive Summarization</title>
       <author><first>Fajri</first><last>Koto</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Jey Han</first><last>Lau</last></author>
       <pages>3427–3437</pages>
       <abstract>Summaries, keyphrases, and titles are different ways of concisely capturing the content of a document. While most previous work has released the datasets of keyphrases and summarization separately, in this work, we introduce LipKey, the largest news corpus with human-written abstractive summaries, absent keyphrases, and titles. We jointly use the three elements via multi-task training and training as joint structured inputs, in the context of document summarization. We find that including absent keyphrases and titles as additional context to the source document improves transformer-based summarization models.</abstract>
@@ -3590,7 +3590,7 @@
     <paper id="305">
       <title>Effective Data Augmentation for Sentence Classification Using One <fixed-case>VAE</fixed-case> per Class</title>
       <author><first>Frédéric</first><last>Piedboeuf</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <pages>3454–3464</pages>
       <abstract>In recent years, data augmentation has become an important field of machine learning. While images can use simple techniques such as cropping or rotating, textual data augmentation needs more complex manipulations to ensure that the generated examples are useful. Variational auto-encoders (VAE) and its conditional variant the Conditional-VAE (CVAE) are often used to generate new textual data, both relying on a good enough training of the generator so that it doesn’t create examples of the wrong class. In this paper, we explore a simpler way to use VAE for data augmentation: the training of one VAE per class. We show on several dataset sizes, as well as on four different binary classification tasks, that it systematically outperforms other generative data augmentation techniques.</abstract>
       <url hash="c6ad77f1">2022.coling-1.305</url>
@@ -3628,7 +3628,7 @@
       <author><first>Gyeongmin</first><last>Kim</last></author>
       <author><first>Jinsung</first><last>Kim</last></author>
       <author><first>Junyoung</first><last>Son</last></author>
-      <author><first>Heuiseok</first><last>Lim</last></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last></author>
       <pages>3496–3505</pages>
       <abstract>As digitized traditional cultural heritage documents have rapidly increased, resulting in an increased need for preservation and management, practical recognition of entities and typification of their classes has become essential. To achieve this, we propose KoCHET - a Korean cultural heritage corpus for the typical entity-related tasks, i.e., named entity recognition (NER), relation extraction (RE), and entity typing (ET). Advised by cultural heritage experts based on the data construction guidelines of government-affiliated organizations, KoCHET consists of respectively 112,362, 38,765, 113,198 examples for NER, RE, and ET tasks, covering all entity types related to Korean cultural heritage. Moreover, unlike the existing public corpora, modified redistribution can be allowed both domestic and foreign researchers. Our experimental results make the practical usability of KoCHET more valuable in terms of cultural heritage. We also provide practical insights of KoCHET in terms of statistical and linguistic analysis. Our corpus is freely available at <url>https://github.com/Gyeongmin47/KoCHET</url>.</abstract>
       <url hash="b073eb6f">2022.coling-1.308</url>
@@ -3653,7 +3653,7 @@
       <author><first>Jie</first><last>Zhao</last></author>
       <author><first>Giuseppe</first><last>Castellucci</last></author>
       <author><first>Marcus</first><last>Collins</last></author>
-      <author><first>Shervin</first><last>Malmasi</last></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></author>
       <author><first>Oleg</first><last>Rokhlenko</last></author>
       <author><first>Eugene</first><last>Agichtein</last></author>
       <pages>3514–3529</pages>
@@ -3680,7 +3680,7 @@
       <author><first>Jeska</first><last>Buhmann</last></author>
       <author><first>Maxime</first><last>De Bruyn</last></author>
       <author><first>Ehsan</first><last>Lotfi</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>3539–3549</pages>
       <abstract>FAQs are important resources to find information. However, especially if a FAQ concerns many question-answer pairs, it can be a difficult and time-consuming job to find the answer you are looking for. A FAQ chatbot can ease this process by automatically retrieving the relevant answer to a user’s question. We present VaccinChatNL, a Dutch FAQ corpus on the topic of COVID-19 vaccination. Starting with 50 question-answer pairs we built VaccinChat, a FAQ chatbot, which we used to gather more user questions that were also annotated with the appropriate or new answer classes. This iterative process of gathering user questions, annotating them, and retraining the model with the increased data set led to a corpus that now contains 12,883 user questions divided over 181 answers. We provide the first publicly available Dutch FAQ answering data set of this size with large groups of semantically equivalent human-paraphrased questions. Furthermore, our study shows that before fine-tuning a classifier, continued pre-training of Dutch language models with task- and/or domain-specific data improves classification results. In addition, we show that large groups of semantically similar questions are important for obtaining well-performing intent classification models.</abstract>
       <url hash="5e04761f">2022.coling-1.312</url>
@@ -3688,7 +3688,7 @@
     </paper>
     <paper id="313">
       <title>Benchmarking Automated Clinical Language Simplification: Dataset, Algorithm, and Evaluation</title>
-      <author id="junyu-luo"><first>Junyu</first><last>Luo</last></author>
+      <author><first>Junyu</first><last>Luo</last></author>
       <author><first>Junxian</first><last>Lin</last></author>
       <author><first>Chi</first><last>Lin</last></author>
       <author><first>Cao</first><last>Xiao</last></author>
@@ -3704,7 +3704,7 @@
       <author><first>Kalvin</first><last>Chang</last></author>
       <author><first>Chenxuan</first><last>Cui</last></author>
       <author><first>Youngmin</first><last>Kim</last></author>
-      <author><first>David R.</first><last>Mortensen</last></author>
+      <author id="david-r-mortensen"><first>David R.</first><last>Mortensen</last></author>
       <pages>3563–3569</pages>
       <abstract>Most comparative datasets of Chinese varieties are not digital; however, Wiktionary includes a wealth of transcriptions of words from these varieties. The usefulness of these data is limited by the fact that they use a wide range of variety-specific romanizations, making data difficult to compare. The current work collects this data into a single constituent (IPA, or International Phonetic Alphabet) and structured form (TSV) for use in comparative linguistics and Chinese NLP. At the time of writing, the dataset contains 67,943 entries across 8 varieties and Middle Chinese. The dataset is validated on a protoform reconstruction task using an encoder-decoder cross-attention architecture (Meloni et al 2021), achieving an accuracy of 54.11%, a PER (phoneme error rate) of 17.69%, and a FER (feature error rate) of 6.60%.</abstract>
       <url hash="7e75e1ff">2022.coling-1.314</url>
@@ -3728,7 +3728,7 @@
       <title><fixed-case>IMPARA</fixed-case>: Impact-Based Metric for <fixed-case>GEC</fixed-case> Using Parallel Data</title>
       <author><first>Koki</first><last>Maeda</last></author>
       <author><first>Masahiro</first><last>Kaneko</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <pages>3578–3588</pages>
       <abstract>Automatic evaluation of grammatical error correction (GEC) is essential in developing useful GEC systems. Existing methods for automatic evaluation require multiple reference sentences or manual scores. However, such resources are expensive, thereby hindering automatic evaluation for various domains and correction styles. This paper proposes an Impact-based Metric for GEC using PARAllel data, IMPARA, which utilizes correction impacts computed by parallel data comprising pairs of grammatical/ungrammatical sentences. As parallel data is cheaper than manually assessing evaluation scores, IMPARA can reduce the cost of data creation for automatic evaluation. Correlations between IMPARA and human scores indicate that IMPARA is comparable or better than existing evaluation methods. Furthermore, we find that IMPARA can perform evaluations that fit different domains and correction styles trained on various parallel data.</abstract>
       <url hash="79c01915">2022.coling-1.316</url>
@@ -3747,7 +3747,7 @@
     <paper id="318">
       <title>Are Pretrained Multilingual Models Equally Fair across Languages?</title>
       <author><first>Laura</first><last>Cabello Piqueras</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>3597–3605</pages>
       <abstract>Pretrained multilingual language models can help bridge the digital language divide, enabling high-quality NLP models for lower-resourced languages. Studies of multilingual models have so far focused on performance, consistency, and cross-lingual generalisation. However, with their wide-spread application in the wild and downstream societal impact, it is important to put multilingual models under the same scrutiny as monolingual models. This work investigates the group fairness of multilingual models, asking whether these models are equally fair across languages. To this end, we create a new four-way multilingual dataset of parallel cloze test examples (MozArt), equipped with demographic information (balanced with regard to gender and native tongue) about the test participants. We evaluate three multilingual models on MozArt –mBERT, XLM-R, and mT5– and show that across the four target languages, the three models exhibit different levels of group disparity, e.g., exhibiting near-equal risk for Spanish, but high levels of disparity for German.</abstract>
       <url hash="d290d0ac">2022.coling-1.318</url>
@@ -3847,7 +3847,7 @@
     </paper>
     <paper id="327">
       <title>A Data-driven Approach to Named Entity Recognition for Early <fixed-case>M</fixed-case>odern <fixed-case>F</fixed-case>rench</title>
-      <author><first>Pedro</first><last>Ortiz Suarez</last></author>
+      <author id="pedro-ortiz-suarez"><first>Pedro</first><last>Ortiz Suarez</last></author>
       <author><first>Simon</first><last>Gabay</last></author>
       <pages>3722–3730</pages>
       <abstract>Named entity recognition has become an increasingly useful tool for digital humanities research, specially when it comes to historical texts. However, historical texts pose a wide range of challenges to both named entity recognition and natural language processing in general that are still difficult to address even with modern neural methods. In this article we focus in named entity recognition for historical French, and in particular for Early Modern French (16th-18th c.), i.e. Ancien Régime French. However, instead of developing a specialised architecture to tackle the particularities of this state of language, we opt for a data-driven approach by developing a new corpus with fine-grained entity annotation, covering three centuries of literature corresponding to the early modern period; we try to annotate as much data as possible producing a corpus that is many times bigger than the most popular NER evaluation corpora for both Contemporary English and French. We then fine-tune existing state-of-the-art architectures for Early Modern and Contemporary French, obtaining results that are on par with those of the current state-of-the-art NER systems for Contemporary English. Both the corpus and the fine-tuned models are released.</abstract>
@@ -3928,7 +3928,7 @@
     </paper>
     <paper id="334">
       <title><fixed-case>M</fixed-case>ulti<fixed-case>C</fixed-case>o<fixed-case>NER</fixed-case>: A Large-scale Multilingual Dataset for Complex Named Entity Recognition</title>
-      <author><first>Shervin</first><last>Malmasi</last></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></author>
       <author><first>Anjie</first><last>Fang</last></author>
       <author><first>Besnik</first><last>Fetahu</last></author>
       <author><first>Sudipta</first><last>Kar</last></author>
@@ -3960,8 +3960,8 @@
     </paper>
     <paper id="337">
       <title><fixed-case>QSTS</fixed-case>: A Question-Sensitive Text Similarity Measure for Question Generation</title>
-      <author><first>Sujatha Das</first><last>Gollapalli</last></author>
-      <author><first>See-Kiong</first><last>Ng</last></author>
+      <author id="sujatha-das-gollapalli"><first>Sujatha Das</first><last>Gollapalli</last></author>
+      <author id="see-kiong-ng"><first>See-Kiong</first><last>Ng</last></author>
       <pages>3835–3846</pages>
       <abstract>While question generation (QG) has received significant focus in conversation modeling and text generation research, the problems of comparing questions and evaluation of QG models have remained inadequately addressed. Indeed, QG models continue to be evaluated using traditional measures such as BLEU, METEOR, and ROUGE scores which were designed for other text generation problems. We propose QSTS, a novel Question-Sensitive Text Similarity measure for comparing two questions by characterizing their target intent based on question class, named-entity, and semantic similarity information from the two questions. We show that QSTS addresses several shortcomings of existing measures that depend on <tex-math>n</tex-math>-gram overlap scores and obtains superior results compared to traditional measures on publicly-available QG datasets. We also collect a novel dataset SimQG, for enabling question similarity research in QG contexts. SimQG contains questions generated by state-of-the-art QG models along with human judgements on their relevance with respect to the passage context they were generated for as well as when compared to the given reference question. Using SimQG, we showcase the key aspect of QSTS that differentiates it from all existing measures. QSTS is not only able to characterize similarity between two questions, but is also able to score questions with respect to passage contexts. Thus QSTS is, to our knowledge, the first metric that enables the measurement of QG performance in a reference-free manner.</abstract>
       <url hash="c54fd0f8">2022.coling-1.337</url>
@@ -3992,7 +3992,7 @@
     <paper id="340">
       <title><fixed-case>I</fixed-case>nfer<fixed-case>ES</fixed-case> : A Natural Language Inference Corpus for <fixed-case>S</fixed-case>panish Featuring Negation-Based Contrastive and Adversarial Examples</title>
       <author><first>Venelin</first><last>Kovatchev</last></author>
-      <author><first>Mariona</first><last>Taulé</last></author>
+      <author id="mariona-taule"><first>Mariona</first><last>Taulé</last></author>
       <pages>3873–3884</pages>
       <abstract>In this paper we present InferES - an original corpus for Natural Language Inference (NLI) in European Spanish. We propose, implement, and analyze a variety of corpus-creating strategies utilizing expert linguists and crowd workers. The objectives behind InferES are to provide high-quality data, and at the same time to facilitate the systematic evaluation of automated systems. Specifically, we focus on measuring and improving the performance of machine learning systems on negation-based adversarial examples and their ability to generalize across out-of-distribution topics. We train two transformer models on InferES (8,055 gold examples) in a variety of scenarios. Our best model obtains 72.8% accuracy, leaving a lot of room for improvement. The “hypothesis-only” baseline performs only 2%-5% higher than majority, indicating much fewer annotation artifacts than prior work. We show that models trained on InferES generalize very well across topics (both in- and out-of-distribution) and perform moderately well on negation-based adversarial examples.</abstract>
       <url hash="e7454556">2022.coling-1.340</url>
@@ -4002,8 +4002,8 @@
       <title><fixed-case>P</fixed-case>ara<fixed-case>Z</fixed-case>h-22<fixed-case>M</fixed-case>: A Large-Scale <fixed-case>C</fixed-case>hinese Parabank via Machine Translation</title>
       <author><first>Wenjie</first><last>Hao</last></author>
       <author><first>Hongfei</first><last>Xu</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
-      <author><first>Hongying</first><last>Zan</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
+      <author id="hongying-zan"><first>Hongying</first><last>Zan</last></author>
       <author><first>Lingling</first><last>Mu</last></author>
       <pages>3885–3897</pages>
       <abstract>Paraphrasing, i.e., restating the same meaning in different ways, is an important data augmentation approach for natural language processing (NLP). Zhang et al. (2019b) propose to extract sentence-level paraphrases from multiple Chinese translations of the same source texts, and construct the PKU Paraphrase Bank of 0.5M sentence pairs. However, despite being the largest Chinese parabank to date, the size of PKU parabank is limited by the availability of one-to-many sentence translation data, and cannot well support the training of large Chinese paraphrasers. In this paper, we relieve the restriction with one-to-many sentence translation data, and construct ParaZh-22M, a larger Chinese parabank that is composed of 22M sentence pairs, based on one-to-one bilingual sentence translation data and machine translation (MT). In our data augmentation experiments, we show that paraphrasing based on ParaZh-22M can bring about consistent and significant improvements over several strong baselines on a wide range of Chinese NLP tasks, including a number of Chinese natural language understanding benchmarks (CLUE) and low-resource machine translation.</abstract>
@@ -4051,8 +4051,8 @@
       <title><fixed-case>S</fixed-case>inglish Message Paraphrasing: A Joint Task of Creole Translation and Text Normalization</title>
       <author><first>Zhengyuan</first><last>Liu</last></author>
       <author><first>Shikang</first><last>Ni</last></author>
-      <author><first>Ai Ti</first><last>Aw</last></author>
-      <author><first>Nancy F.</first><last>Chen</last></author>
+      <author id="aiti-aw"><first>Ai Ti</first><last>Aw</last></author>
+      <author id="nancy-chen"><first>Nancy F.</first><last>Chen</last></author>
       <pages>3924–3936</pages>
       <abstract>Within the natural language processing community, English is by far the most resource-rich language. There is emerging interest in conducting translation via computational approaches to conform its dialects or creole languages back to standard English. This computational approach paves the way to leverage generic English language backbones, which are beneficial for various downstream tasks. However, in practical online communication scenarios, the use of language varieties is often accompanied by noisy user-generated content, making this translation task more challenging. In this work, we introduce a joint paraphrasing task of creole translation and text normalization of Singlish messages, which can shed light on how to process other language varieties and dialects. We formulate the task in three different linguistic dimensions: lexical level normalization, syntactic level editing, and semantic level rewriting. We build an annotated dataset of Singlish-to-Standard English messages, and report performance on a perturbation-resilient sequence-to-sequence model. Experimental results show that the model produces reasonable generation results, and can improve the performance of downstream tasks like stance detection.</abstract>
       <url hash="999980a1">2022.coling-1.345</url>
@@ -4074,7 +4074,7 @@
     </paper>
     <paper id="347">
       <title>One Word, Two Sides: Traces of Stance in Contextualized Word Representations</title>
-      <author><first>Aina</first><last>Garí Soler</last></author>
+      <author id="aina-gari-soler"><first>Aina</first><last>Garí Soler</last></author>
       <author><first>Matthieu</first><last>Labeau</last></author>
       <author><first>Chloé</first><last>Clavel</last></author>
       <pages>3950–3959</pages>
@@ -4097,7 +4097,7 @@
     <paper id="349">
       <title>Modelling Commonsense Properties Using Pre-Trained Bi-Encoders</title>
       <author><first>Amit</first><last>Gajbhiye</last></author>
-      <author><first>Luis</first><last>Espinosa-Anke</last></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa-Anke</last></author>
       <author><first>Steven</first><last>Schockaert</last></author>
       <pages>3971–3983</pages>
       <abstract>Grasping the commonsense properties of everyday concepts is an important prerequisite to language understanding. While contextualised language models are reportedly capable of predicting such commonsense properties with human-level accuracy, we argue that such results have been inflated because of the high similarity between training and test concepts. This means that models which capture concept similarity can perform well, even if they do not capture any knowledge of the commonsense properties themselves. In settings where there is no overlap between the properties that are considered during training and testing, we find that the empirical performance of standard language models drops dramatically. To address this, we study the possibility of fine-tuning language models to explicitly model concepts and their properties. In particular, we train separate concept and property encoders on two types of readily available data: extracted hyponym-hypernym pairs and generic sentences. Our experimental results show that the resulting encoders allow us to predict commonsense properties with much higher accuracy than is possible by directly fine-tuning language models. We also present experimental results for the related task of unsupervised hypernym discovery.</abstract>
@@ -4210,7 +4210,7 @@
       <title>Testing Large Language Models on Compositionality and Inference with Phrase-Level Adjective-Noun Entailment</title>
       <author><first>Lorenzo</first><last>Bertolini</last></author>
       <author><first>Julie</first><last>Weeds</last></author>
-      <author><first>David</first><last>Weir</last></author>
+      <author id="david-weir"><first>David</first><last>Weir</last></author>
       <pages>4084–4100</pages>
       <abstract>Previous work has demonstrated that pre-trained large language models (LLM) acquire knowledge during pre-training which enables reasoning over relationships between words (e.g, hyponymy) and more complex inferences over larger units of meaning such as sentences. Here, we investigate whether lexical entailment (LE, i.e. hyponymy or the is a relation between words) can be generalised in a compositional manner. Accordingly, we introduce PLANE (Phrase-Level Adjective-Noun Entailment), a new benchmark to test models on fine-grained compositional entailment using adjective-noun phrases. Our experiments show that knowledge extracted via In–Context and transfer learning is not enough to solve PLANE. However, a LLM trained on PLANE can generalise well to out–of–distribution sets, since the required knowledge can be stored in the representations of subwords (SW) tokens.</abstract>
       <url hash="5be0dd70">2022.coling-1.359</url>
@@ -4219,7 +4219,7 @@
     <paper id="360">
       <title>Does <fixed-case>BERT</fixed-case> Recognize an Agent? Modeling <fixed-case>D</fixed-case>owty’s Proto-Roles with Contextual Embeddings</title>
       <author><first>Mattia</first><last>Proietti</last></author>
-      <author><first>Gianluca</first><last>Lebani</last></author>
+      <author id="gianluca-e-lebani"><first>Gianluca</first><last>Lebani</last></author>
       <author><first>Alessandro</first><last>Lenci</last></author>
       <pages>4101–4112</pages>
       <abstract>Contextual embeddings build multidimensional representations of word tokens based on their context of occurrence. Such models have been shown to achieve a state-of-the-art performance on a wide variety of tasks. Yet, the community struggles in understanding what kind of semantic knowledge these representations encode. We report a series of experiments aimed at investigating to what extent one of such models, BERT, is able to infer the semantic relations that, according to Dowty’s Proto-Roles theory, a verbal argument receives by virtue of its role in the event described by the verb. This hypothesis were put to test by learning a linear mapping from the BERT’s verb embeddings to an interpretable space of semantic properties built from the linguistic dataset by White et al. (2016). In a first experiment we tested whether the semantic properties inferred from a typed version of the BERT embeddings would be more linguistically plausible than those produced by relying on static embeddings. We then move to evaluate the semantic properties inferred from the contextual embeddings both against those available in the original dataset, as well as by assessing their ability to model the semantic properties possessed by the agent of the verbs participating in the so-called causative alternation.</abstract>
@@ -4229,7 +4229,7 @@
     <paper id="361">
       <title>Towards Structure-aware Paraphrase Identification with Phrase Alignment Using Sentence Encoders</title>
       <author><first>Qiwei</first><last>Peng</last></author>
-      <author><first>David</first><last>Weir</last></author>
+      <author id="david-weir"><first>David</first><last>Weir</last></author>
       <author><first>Julie</first><last>Weeds</last></author>
       <pages>4113–4123</pages>
       <abstract>Previous works have demonstrated the effectiveness of utilising pre-trained sentence encoders based on their sentence representations for meaning comparison tasks. Though such representations are shown to capture hidden syntax structures, the direct similarity comparison between them exhibits weak sensitivity to word order and structural differences in given sentences. A single similarity score further makes the comparison process hard to interpret. Therefore, we here propose to combine sentence encoders with an alignment component by representing each sentence as a list of predicate-argument spans (where their span representations are derived from sentence encoders), and decomposing the sentence-level meaning comparison into the alignment between their spans for paraphrase identification tasks. Empirical results show that the alignment component brings in both improved performance and interpretability for various sentence encoders. After closer investigation, the proposed approach indicates increased sensitivity to structural difference and enhanced ability to distinguish non-paraphrases with high lexical overlap.</abstract>
@@ -4251,7 +4251,7 @@
       <title>Emotion Enriched Retrofitted Word Embeddings</title>
       <author><first>Sapan</first><last>Shah</last></author>
       <author><first>Sreedhar</first><last>Reddy</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>4136–4148</pages>
       <abstract>Word embeddings learned using the distributional hypothesis (e.g., GloVe, Word2vec) are good at encoding various lexical-semantic relations. However, they do not capture the emotion aspects of words. We present a novel retrofitting method for updating the vectors of emotion bearing words like fun, offence, angry, etc. The retrofitted embeddings achieve better inter-cluster and intra-cluster distance for words having the same emotions, e.g., the joy cluster containing words like fun, happiness, etc., and the anger cluster with words like offence, rage, etc., as evaluated through different cluster quality metrics. For the downstream tasks on sentiment analysis and sarcasm detection, simple classification models, such as SVM and Attention Net, learned using our retrofitted embeddings perform better than their pre-trained counterparts (about 1.5 % improvement in F1-score) as well as other benchmarks. Furthermore, the difference in performance is more pronounced in the limited data setting.</abstract>
       <url hash="5a46b762">2022.coling-1.363</url>
@@ -4282,8 +4282,8 @@
     <paper id="366">
       <title>Unsupervised Lexical Substitution with Decontextualised Embeddings</title>
       <author><first>Takashi</first><last>Wada</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <author><first>Jey Han</first><last>Lau</last></author>
       <pages>4172–4185</pages>
       <abstract>We propose a new unsupervised method for lexical substitution using pre-trained language models. Compared to previous approaches that use the generative capability of language models to predict substitutes, our method retrieves substitutes based on the similarity of contextualised and decontextualised word embeddings, i.e. the average contextual representation of a word in multiple contexts. We conduct experiments in English and Italian, and show that our method substantially outperforms strong baselines and establishes a new state-of-the-art without any explicit supervision or fine-tuning. We further show that our method performs particularly well at predicting low-frequency substitutes, and also generates a diverse list of substitute candidates, reducing morphophonetic or morphosyntactic biases induced by article-noun agreement.</abstract>
@@ -4338,8 +4338,8 @@
     <paper id="371">
       <title>Noisy Label Regularisation for Textual Regression</title>
       <author><first>Yuxia</first><last>Wang</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
-      <author><first>Karin</first><last>Verspoor</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last></author>
       <pages>4228–4240</pages>
       <abstract>Training with noisy labelled data is known to be detrimental to model performance, especially for high-capacity neural network models in low-resource domains. Our experiments suggest that standard regularisation strategies, such as weight decay and dropout, are ineffective in the face of noisy labels. We propose a simple noisy label detection method that prevents error propagation from the input layer. The approach is based on the observation that the projection of noisy labels is learned through memorisation at advanced stages of learning, and that the Pearson correlation is sensitive to outliers. Extensive experiments over real-world human-disagreement annotations as well as randomly-corrupted and data-augmented labels, across various tasks and domains, demonstrate that our method is effective, regularising noisy labels and improving generalisation performance.</abstract>
       <url hash="30f5c6ea">2022.coling-1.371</url>
@@ -4374,7 +4374,7 @@
     </paper>
     <paper id="374">
       <title>Revisiting Syllables in Language Modelling and Their Application on Low-Resource Machine Translation</title>
-      <author><first>Arturo</first><last>Oncevay</last></author>
+      <author id="arturo-oncevay"><first>Arturo</first><last>Oncevay</last></author>
       <author><first>Kervy Dante</first><last>Rivas Rojas</last></author>
       <author><first>Liz Karen</first><last>Chavez Sanchez</last></author>
       <author><first>Roberto</first><last>Zariquiey</last></author>
@@ -4396,7 +4396,7 @@
       <title>Fashioning Local Designs from Generic Speech Technologies in an <fixed-case>A</fixed-case>ustralian Aboriginal Community</title>
       <author><first>Éric</first><last>Le Ferrand</last></author>
       <author><first>Steven</first><last>Bird</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <pages>4274–4285</pages>
       <abstract>An increasing number of papers have been addressing issues related to low-resource languages and the transcription bottleneck paradigm. After several years spent in Northern Australia, where some of the strongest Aboriginal languages are spoken, we could observe a gap between the motivations depicted in research contributions in this space and the Northern Australian context. In this paper, we address this gap in research by exploring the potential of speech recognition in an Aboriginal community. We describe our work from training a spoken term detection system to its implementation in an activity with Aboriginal participants. We report here on one side how speech recognition technologies can find their place in an Aboriginal context and, on the other, methodological paths that allowed us to reach better comprehension and engagement from Aboriginal participants.</abstract>
       <url hash="e4972453">2022.coling-1.376</url>
@@ -4405,8 +4405,8 @@
     <paper id="377">
       <title>Few-Shot Pidgin Text Adaptation via Contrastive Fine-Tuning</title>
       <author><first>Ernie</first><last>Chang</last></author>
-      <author><first>Jesujoba O.</first><last>Alabi</last></author>
-      <author><first>David Ifeoluwa</first><last>Adelani</last></author>
+      <author id="jesujoba-alabi"><first>Jesujoba O.</first><last>Alabi</last></author>
+      <author id="david-ifeoluwa-adelani"><first>David Ifeoluwa</first><last>Adelani</last></author>
       <author><first>Vera</first><last>Demberg</last></author>
       <pages>4286–4291</pages>
       <abstract>The surging demand for multilingual dialogue systems often requires a costly labeling process for each language addition. For low resource languages, human annotators are continuously tasked with the adaptation of resource-rich language utterances for each new domain. However, this prohibitive and impractical process can often be a bottleneck for low resource languages that are still without proper translation systems nor parallel corpus. In particular, it is difficult to obtain task-specific low resource language annotations for the English-derived creoles (e.g. Nigerian and Cameroonian Pidgin). To address this issue, we utilize the pretrained language models i.e. BART which has shown great potential in language generation/understanding – we propose to finetune the BART model to generate utterances in Pidgin by leveraging the proximity of the source and target languages, and utilizing positive and negative examples in constrastive training objectives. We collected and released the first parallel Pidgin-English conversation corpus in two dialogue domains and showed that this simple and effective technique is suffice to yield impressive results for English-to-Pidgin generation, which are two closely-related languages.</abstract>
@@ -4417,7 +4417,7 @@
       <title>Penalizing Divergence: Multi-Parallel Translation for Low-Resource Languages of <fixed-case>N</fixed-case>orth <fixed-case>A</fixed-case>merica</title>
       <author><first>Garrett</first><last>Nicolai</last></author>
       <author><first>Changbing</first><last>Yang</last></author>
-      <author><first>Miikka</first><last>Silfverberg</last></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last></author>
       <pages>4292–4298</pages>
       <abstract>This paper explores a special case in multilingual machine translation: so called multi-parallel translation, where the target data for all language pairs are identical. While multi-parallelism offers benefits which are not available in a standard translation setting, translation models can easily overfit when training data are limited. We introduce a regularizer, the divergence penalty, which penalizes the translation model when it represents source sentences with identical target translations in divergent ways. Experiments on very low-resourced Indigenous North American languages show that an initially deficient multilingual translator can improve by 4.9 BLEU through mBART pre-training, and 5.5 BLEU points with the strategic addition of monolingual data, and that a divergence penalty leads to further increases of 0.4 BLEU. Further experiments on Germanic languages demonstrate a improvement of 0.5 BLEU when applying the divergence penalty. An investigation of the neural encoder representations learned by our translation models shows that the divergence penalty encourages models to learn a unified neural interlingua.</abstract>
       <url hash="c44a61a0">2022.coling-1.378</url>
@@ -4458,8 +4458,8 @@
     </paper>
     <paper id="382">
       <title>Adapting Pre-trained Language Models to <fixed-case>A</fixed-case>frican Languages via Multilingual Adaptive Fine-Tuning</title>
-      <author><first>Jesujoba O.</first><last>Alabi</last></author>
-      <author><first>David Ifeoluwa</first><last>Adelani</last></author>
+      <author id="jesujoba-alabi"><first>Jesujoba O.</first><last>Alabi</last></author>
+      <author id="david-ifeoluwa-adelani"><first>David Ifeoluwa</first><last>Adelani</last></author>
       <author><first>Marius</first><last>Mosbach</last></author>
       <author><first>Dietrich</first><last>Klakow</last></author>
       <pages>4336–4349</pages>
@@ -4479,7 +4479,7 @@
       <title>Improving Low-resource <fixed-case>RRG</fixed-case> Parsing with Cross-lingual Self-training</title>
       <author><first>Kilian</first><last>Evang</last></author>
       <author><first>Laura</first><last>Kallmeyer</last></author>
-      <author><first>Jakub</first><last>Waszczuk</last></author>
+      <author id="jakub-waszczuk"><first>Jakub</first><last>Waszczuk</last></author>
       <author><first>Kilu</first><last>von Prince</last></author>
       <author><first>Tatiana</first><last>Bladier</last></author>
       <author><first>Simon</first><last>Petitjean</last></author>
@@ -4508,7 +4508,7 @@
       <title>Towards Multi-Sense Cross-Lingual Alignment of Contextual Embeddings</title>
       <author><first>Linlin</first><last>Liu</last></author>
       <author><first>Thien Hai</first><last>Nguyen</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <author><first>Lidong</first><last>Bing</last></author>
       <author><first>Luo</first><last>Si</last></author>
       <pages>4381–4396</pages>
@@ -4520,7 +4520,7 @@
       <title>How to Parse a Creole: When Martinican Creole Meets <fixed-case>F</fixed-case>rench</title>
       <author><first>Ludovic</first><last>Mompelat</last></author>
       <author><first>Daniel</first><last>Dakota</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <pages>4397–4406</pages>
       <abstract>We investigate methods to develop a parser for Martinican Creole, a highly under-resourced language, using a French treebank. We compare transfer learning and multi-task learning models and examine different input features and strategies to handle the massive size imbalance between the treebanks. Surprisingly, we find that a simple concatenated (French + Martinican Creole) baseline yields optimal results even though it has access to only 80 Martinican Creole sentences. POS embeddings work better than lexical ones, but they suffer from negative transfer.</abstract>
       <url hash="4e551c40">2022.coling-1.387</url>
@@ -4540,7 +4540,7 @@
       <author><first>Nanda Putri</first><last>Romadhona</last></author>
       <author><first>Sin-En</first><last>Lu</last></author>
       <author><first>Bo-Han</first><last>Lu</last></author>
-      <author><first>Richard Tzong-Han</first><last>Tsai</last></author>
+      <author id="richard-tzong-han-tsai"><first>Richard Tzong-Han</first><last>Tsai</last></author>
       <pages>4418–4428</pages>
       <abstract>Code-mixing refers to the mixed use of multiple languages. It is prevalent in multilingual societies and is also one of the most challenging natural language processing tasks. In this paper, we study Bahasa Rojak, a dialect popular in Malaysia that consists of English, Malay, and Chinese. Aiming to establish a model to deal with the code-mixing phenomena of Bahasa Rojak, we use data augmentation to automatically construct the first Bahasa Rojak corpus for pre-training language models, which we name the Bahasa Rojak Crawled Corpus (BRCC). We also develop a new pre-trained model called “Mixed XLM”. The model can tag the language of the input token automatically to process code-mixing input. Finally, to test the effectiveness of the Mixed XLM model pre-trained on BRCC for social media scenarios where code-mixing is found frequently, we compile a new Bahasa Rojak sentiment analysis dataset, SentiBahasaRojak, with a Kappa value of 0.77.</abstract>
       <url hash="b8446522">2022.coling-1.389</url>
@@ -4570,7 +4570,7 @@
       <title>Zero-shot Disfluency Detection for <fixed-case>I</fixed-case>ndian Languages</title>
       <author><first>Rohit</first><last>Kundu</last></author>
       <author><first>Preethi</first><last>Jyothi</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>4442–4454</pages>
       <abstract>Disfluencies that appear in the transcriptions from automatic speech recognition systems tend to impair the performance of downstream NLP tasks. Disfluency correction models can help alleviate this problem. However, the unavailability of labeled data in low-resource languages impairs progress. We propose using a pretrained multilingual model, finetuned only on English disfluencies, for zero-shot disfluency detection in Indian languages. We present a detailed pipeline to synthetically generate disfluent text and create evaluation datasets for four Indian languages: Bengali, Hindi, Malayalam, and Marathi. Even in the zero-shot setting, we obtain F1 scores of 75 and higher on five disfluency types across all four languages. We also show the utility of synthetically generated disfluencies by evaluating on real disfluent text in Bengali, Hindi, and Marathi. Finetuning the multilingual model on additional synthetic Hindi disfluent text nearly doubles the number of exact matches and yields a 20-point boost in F1 scores when evaluated on real Hindi disfluent text, compared to training with only English disfluent text.</abstract>
       <url hash="8e1933af">2022.coling-1.392</url>
@@ -4599,8 +4599,8 @@
       <author><first>Xiaolin</first><last>Xing</last></author>
       <author><first>Yu</first><last>Hong</last></author>
       <author><first>Minhan</first><last>Xu</last></author>
-      <author><first>Jianmin</first><last>Yao</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="jianmin-yao"><first>Jianmin</first><last>Yao</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>4481–4491</pages>
       <abstract>Training Neural Machine Translation (NMT) models suffers from sparse parallel data, in the infrequent translation scenarios towards low-resource source languages. The existing solutions primarily concentrate on the utilization of Parent-Child (PC) transfer learning. It transfers well-trained NMT models on high-resource languages (namely Parent NMT) to low-resource languages, so as to produce Child NMT models by fine-tuning. It has been carefully demonstrated that a variety of PC variants yield significant improvements for low-resource NMT. In this paper, we intend to enhance PC-based NMT by a bidirectionally-adaptive learning strategy. Specifically, we divide inner constituents (6 transformers) of Parent encoder into two “teams”, i.e., T1 and T2. During representation learning, T1 learns to encode low-resource languages conditioned on bilingual shareable latent space. Generative adversarial network and masked language modeling are used for space-shareable encoding. On the other hand, T2 is straightforwardly transferred to low-resource languages, and fine-tuned together with T1 for low-resource translation. Briefly, T1 and T2 take actions separately for different goals. The former aims to adapt to characteristics of low-resource languages during encoding, while the latter adapts to translation experiences learned from high-resource languages. We experiment on benchmark corpora SETIMES, conducting low-resource NMT for Albanian (Sq), Macedonian (Mk), Croatian (Hr) and Romanian (Ro). Experimental results show that our method yields substantial improvements, which allows the NMT performance to reach BLEU4-scores of 62.24%, 56.93%, 50.53% and 54.65% for Sq, Mk, Hr and Ro, respectively.</abstract>
       <url hash="47533c6f">2022.coling-1.395</url>
@@ -4656,7 +4656,7 @@
       <author><first>Arpita</first><last>Kundu</last></author>
       <author><first>Subhasish</first><last>Ghosh</last></author>
       <author><first>Pratik</first><last>Saini</last></author>
-      <author><first>Tapas</first><last>Nayak</last></author>
+      <author id="tapas-nayak"><first>Tapas</first><last>Nayak</last></author>
       <author><first>Indrajit</first><last>Bhattacharya</last></author>
       <pages>4537–4543</pages>
       <abstract>Predicting difficulty of questions is crucial for technical interviews. However, such questions are long-form and more open-ended than factoid and multiple choice questions explored so far for question difficulty prediction. Existing models also require large volumes of candidate response data for training. We study weak-supervision and use unsupervised algorithms for both question generation and difficulty prediction. We create a dataset of interview questions with difficulty scores for deep learning and use it to evaluate SOTA models for question difficulty prediction trained using weak supervision. Our analysis brings out the task’s difficulty as well as the promise of weak supervision for it.</abstract>
@@ -4715,7 +4715,7 @@
       <author><first>Yejin</first><last>Kim</last></author>
       <author><first>Hodong</first><last>Lee</last></author>
       <author><first>H. Howie</first><last>Huang</last></author>
-      <author><first>Heuiseok</first><last>Lim</last></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last></author>
       <pages>4585–4592</pages>
       <abstract>Recent pre-trained language models (PLMs) achieved great success on many natural language processing tasks through learning linguistic features and contextualized sentence representation. Since attributes captured in stacked layers of PLMs are not clearly identified, straightforward approaches such as embedding the last layer are commonly preferred to derive sentence representations from PLMs. This paper introduces the attention-based pooling strategy, which enables the model to preserve layer-wise signals captured in each layer and learn digested linguistic features for downstream tasks. The contrastive learning objective can adapt the layer-wise attention pooling to both unsupervised and supervised manners. It results in regularizing the anisotropic space of pre-trained embeddings and being more uniform. We evaluate our model on standard semantic textual similarity (STS) and semantic search tasks. As a result, our method improved the performance of the base contrastive learned BERT<tex-math>_{base}</tex-math> and variants.</abstract>
       <url hash="6488dfdd">2022.coling-1.405</url>
@@ -4807,7 +4807,7 @@
       <title>Accelerating Inference for Pretrained Language Models by Unified Multi-Perspective Early Exiting</title>
       <author><first>Jun</first><last>Kong</last></author>
       <author><first>Jin</first><last>Wang</last></author>
-      <author><first>Liang-Chih</first><last>Yu</last></author>
+      <author id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last></author>
       <author><first>Xuejie</first><last>Zhang</last></author>
       <pages>4677–4686</pages>
       <abstract>Conditional computation algorithms, such as the early exiting (EE) algorithm, can be applied to accelerate the inference of pretrained language models (PLMs) while maintaining competitive performance on resource-constrained devices. However, this approach is only applied to the vertical architecture to decide which layers should be used for inference. Conversely, the operation of the horizontal perspective is ignored, and the determination of which tokens in each layer should participate in the computation fails, leading to a high redundancy for adaptive inference. To address this limitation, a unified horizontal and vertical multi-perspective early exiting (MPEE) framework is proposed in this study to accelerate the inference of transformer-based models. Specifically, the vertical architecture uses recycling EE classifier memory and weighted self-distillation to enhance the performance of the EE classifiers. Then, the horizontal perspective uses recycling class attention memory to emphasize the informative tokens. Conversely, the tokens with less information are truncated by weighted fusion and isolated from the following computation. Based on this, both horizontal and vertical EE are unified to obtain a better tradeoff between performance and efficiency. Extensive experimental results show that MPEE can achieve higher acceleration inference with competent performance than existing competitive methods.</abstract>
@@ -4841,7 +4841,7 @@
       <author><first>Mehdi</first><last>Rezagholizadeh</last></author>
       <author><first>Abbas</first><last>Ghaddar</last></author>
       <author><first>Khalil</first><last>Bibi</last></author>
-      <author><first>Phillippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Phillippe</first><last>Langlais</last></author>
       <author><first>Pascal</first><last>Poupart</last></author>
       <pages>4707–4713</pages>
       <abstract>Knowledge distillation (KD) is an efficient framework for compressing large-scale pre-trained language models. Recent years have seen a surge of research aiming to improve KD by leveraging Contrastive Learning, Intermediate Layer Distillation, Data Augmentation, and Adversarial Training. In this work, we propose a learning-based data augmentation technique tailored for knowledge distillation, called CILDA. To the best of our knowledge, this is the first time that intermediate layer representations of the main task are used in improving the quality of augmented samples. More precisely, we introduce an augmentation technique for KD based on intermediate layer matching using contrastive loss to improve masked adversarial data augmentation. CILDA outperforms existing state-of-the-art KD approaches on the GLUE benchmark, as well as in an out-of-domain evaluation.</abstract>
@@ -4925,7 +4925,7 @@
       <author><first>Hande</first><last>Celikkanat</last></author>
       <author><first>Vinit</first><last>Ravishankar</last></author>
       <author><first>Mathias</first><last>Creutz</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>4788–4800</pages>
       <abstract>We analyze the learning dynamics of neural language and translation models using Loss Change Allocation (LCA), an indicator that enables a fine-grained analysis of parameter updates when optimizing for the loss function. In other words, we can observe the contributions of different network components at training time. In this article, we systematically study masked language modeling, causal language modeling, and machine translation. We show that the choice of training objective leads to distinctive optimization procedures, even when performed on comparable Transformer architectures. We demonstrate how the various Transformer parameters are used during training, supporting that the feed-forward components of each layer are the main contributors to the optimization procedure. Finally, we find that the learning dynamics are not affected by data size and distribution but rather determined by the learning objective.</abstract>
       <url hash="bcfcbc53">2022.coling-1.424</url>
@@ -5041,7 +5041,7 @@
       <title>Knowledge Distillation with Reptile Meta-Learning for Pretrained Language Model Compression</title>
       <author><first>Xinge</first><last>Ma</last></author>
       <author><first>Jin</first><last>Wang</last></author>
-      <author><first>Liang-Chih</first><last>Yu</last></author>
+      <author id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last></author>
       <author><first>Xuejie</first><last>Zhang</last></author>
       <pages>4907–4917</pages>
       <abstract>The billions, and sometimes even trillions, of parameters involved in pre-trained language models significantly hamper their deployment in resource-constrained devices and real-time applications. Knowledge distillation (KD) can transfer knowledge from the original model (i.e., teacher) into a compact model (i.e., student) to achieve model compression. However, previous KD methods have usually frozen the teacher and applied its immutable output feature maps as soft labels to guide the student’s training. Moreover, the goal of the teacher is to achieve the best performance on downstream tasks rather than knowledge transfer. Such a fixed architecture may limit the teacher’s teaching and student’s learning abilities. Herein, a knowledge distillation method with reptile meta-learning is proposed to facilitate the transfer of knowledge from the teacher to the student. The teacher can continuously meta-learn the student’s learning objective to adjust its parameters for maximizing the student’s performance throughout the distillation process. In this way, the teacher learns to teach, produces more suitable soft labels, and transfers more appropriate knowledge to the student, resulting in improved performance. Unlike previous KD using meta-learning, the proposed method only needs to calculate the first-order derivatives to update the teacher, leading to lower computational cost but better convergence. Extensive experiments on the GLUE benchmark show the competitive performance achieved by the proposed method. For reproducibility, the code for this paper is available at: <url>https://github.com/maxinge8698/ReptileDistil</url>.</abstract>
@@ -5148,7 +5148,7 @@
       <author><first>Chenchen</first><last>Ding</last></author>
       <author><first>Hideki</first><last>Tanaka</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>5014–5020</pages>
       <abstract>In this paper we present FeatureBART, a linguistically motivated sequence-to-sequence monolingual pre-training strategy in which syntactic features such as lemma, part-of-speech and dependency labels are incorporated into the span prediction based pre-training framework (BART). These automatically extracted features are incorporated via approaches such as concatenation and relevance mechanisms, among which the latter is known to be better than the former. When used for low-resource NMT as a downstream task, we show that these feature based models give large improvements in bilingual settings and modest ones in multilingual settings over their counterparts that do not use features.</abstract>
       <url hash="347e4d7e">2022.coling-1.443</url>
@@ -5157,8 +5157,8 @@
     <paper id="444">
       <title>Multi-level Community-awareness Graph Neural Networks for Neural Machine Translation</title>
       <author><first>Binh</first><last>Nguyen</last></author>
-      <author><first>Long</first><last>Nguyen</last></author>
-      <author><first>Dien</first><last>Dinh</last></author>
+      <author id="long-nguyen"><first>Long</first><last>Nguyen</last></author>
+      <author id="dinh-dien"><first>Dien</first><last>Dinh</last></author>
       <pages>5021–5028</pages>
       <abstract>Neural Machine Translation (NMT) aims to translate the source- to the target-language while preserving the original meaning. Linguistic information such as morphology, syntactic, and semantics shall be grasped in token embeddings to produce a high-quality translation. Recent works have leveraged the powerful Graph Neural Networks (GNNs) to encode such language knowledge into token embeddings. Specifically, they use a trained parser to construct semantic graphs given sentences and then apply GNNs. However, most semantic graphs are tree-shaped and too sparse for GNNs which cause the over-smoothing problem. To alleviate this problem, we propose a novel Multi-level Community-awareness Graph Neural Network (MC-GNN) layer to jointly model local and global relationships between words and their linguistic roles in multiple communities. Intuitively, the MC-GNN layer substitutes a self-attention layer at the encoder side of a transformer-based machine translation model. Extensive experiments on four language-pair datasets with common evaluation metrics show the remarkable improvements of our method while reducing the time complexity in very long sentences.</abstract>
       <url hash="d6b3e9b9">2022.coling-1.444</url>
@@ -5191,7 +5191,7 @@
     <paper id="447">
       <title>Language Branch Gated Multilingual Neural Machine Translation</title>
       <author><first>Haoran</first><last>Sun</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <pages>5046–5053</pages>
       <abstract>Knowledge transfer across languages is crucial for multilingual neural machine translation. In this paper, we propose language branch (LB) gated multilingual neural machine translation that encourages knowledge transfer within the same language branch with a LB-gated module that is integrated into both the encoder and decoder. The LB-gated module distinguishes LB-specific parameters from global parameters shared by all languages and routes languages from the same LB to the corresponding LB-specific network. Comprehensive experiments on the OPUS-100 dataset show that the proposed approach substantially improves translation quality on both middle- and low-resource languages over previous methods. Further analysis demonstrates its ability in learning similarities between language branches.</abstract>
       <url hash="7ccead8e">2022.coling-1.447</url>
@@ -5203,7 +5203,7 @@
       <author><first>Hui</first><last>Huang</last></author>
       <author><first>Jiale</first><last>Gao</last></author>
       <author><first>Yufeng</first><last>Chen</last></author>
-      <author><first>Jinan</first><last>Xu</last></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last></author>
       <author><first>Jian</first><last>Liu</last></author>
       <pages>5054–5065</pages>
       <abstract>Back-translation has been proven to be effective in unsupervised domain adaptation of neural machine translation (NMT). However, the existing back-translation methods mainly improve domain adaptability by generating in-domain pseudo-parallel data that contains sentence-structural knowledge, paying less attention to the in-domain lexical knowledge, which may lead to poor translation of unseen in-domain words. In this paper, we propose an Iterative Constrained Back-Translation (ICBT) method to incorporate in-domain lexical knowledge on the basis of BT for unsupervised domain adaptation of NMT. Specifically, we apply lexical constraints into back-translation to generate pseudo-parallel data with in-domain lexical knowledge, and then perform round-trip iterations to incorporate more lexical knowledge. Based on this, we further explore sampling strategies of constrained words in ICBT to introduce more targeted lexical knowledge, via domain specificity and confidence estimation. Experimental results on four domains show that our approach achieves state-of-the-art results, improving the BLEU score by up to 3.08 compared to the strongest baseline, which demonstrates the effectiveness of our approach.</abstract>
@@ -5214,7 +5214,7 @@
       <title>Linguistically-Motivated <fixed-case>Y</fixed-case>orùbá-<fixed-case>E</fixed-case>nglish Machine Translation</title>
       <author><first>Ife</first><last>Adebara</last></author>
       <author><first>Muhammad</first><last>Abdul-Mageed</last></author>
-      <author><first>Miikka</first><last>Silfverberg</last></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last></author>
       <pages>5066–5075</pages>
       <abstract>Translating between languages where certain features are marked morphologically in one but absent or marked contextually in the other is an important test case for machine translation. When translating into English which marks (in)definiteness morphologically, from Yorùbá which uses bare nouns but marks these features contextually, ambiguities arise. In this work, we perform fine-grained analysis on how an SMT system compares with two NMT systems (BiLSTM and Transformer) when translating bare nouns in Yorùbá into English. We investigate how the systems what extent they identify BNs, correctly translate them, and compare with human translation patterns. We also analyze the type of errors each model makes and provide a linguistic description of these errors. We glean insights for evaluating model performance in low-resource settings. In translating bare nouns, our results show the transformer model outperforms the SMT and BiLSTM models for 4 categories, the BiLSTM outperforms the SMT model for 3 categories while the SMT outperforms the NMT models for 1 category.</abstract>
       <url hash="2af74f32">2022.coling-1.449</url>
@@ -5236,7 +5236,7 @@
       <author><first>Jian</first><last>Yang</last></author>
       <author><first>Shuming</first><last>Ma</last></author>
       <author><first>Dongdong</first><last>Zhang</last></author>
-      <author><first>Weinan</first><last>Zhang</last></author>
+      <author id="weinan-zhang"><first>Weinan</first><last>Zhang</last></author>
       <author><first>Yong</first><last>Yu</last></author>
       <author><first>Zhoujun</first><last>Li</last></author>
       <pages>5085–5097</pages>
@@ -5310,7 +5310,7 @@
     <paper id="458">
       <title>Informative Language Representation Learning for Massively Multilingual Neural Machine Translation</title>
       <author><first>Renren</first><last>Jin</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <pages>5158–5174</pages>
       <abstract>In a multilingual neural machine translation model that fully shares parameters across all languages, an artificial language token is usually used to guide translation into the desired target language. However, recent studies show that prepending language tokens sometimes fails to navigate the multilingual neural machine translation models into right translation directions, especially on zero-shot translation. To mitigate this issue, we propose two methods, language embedding embodiment and language-aware multi-head attention, to learn informative language representations to channel translation into right directions. The former embodies language embeddings into different critical switching points along the information flow from the source to the target, aiming at amplifying translation direction guiding signals. The latter exploits a matrix, instead of a vector, to represent a language in the continuous space. The matrix is chunked into multiple heads so as to learn language representations in multiple subspaces. Experiment results on two datasets for massively multilingual neural machine translation demonstrate that language-aware multi-head attention benefits both supervised and zero-shot translation and significantly alleviates the off-target translation issue. Further linguistic typology prediction experiments show that matrix-based language representations learned by our methods are capable of capturing rich linguistic typology features.</abstract>
       <url hash="7fc45a52">2022.coling-1.458</url>
@@ -5334,7 +5334,7 @@
       <author><first>Jaehyung</first><last>Seo</last></author>
       <author><first>Gyeongmin</first><last>Kim</last></author>
       <author><first>Jungseob</first><last>Lee</last></author>
-      <author><first>Heuiseok</first><last>Lim</last></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last></author>
       <pages>5181–5190</pages>
       <abstract>With the recent advance in neural machine translation demonstrating its importance, research on quality estimation (QE) has been steadily progressing. QE aims to automatically predict the quality of machine translation (MT) output without reference sentences. Despite its high utility in the real world, there remain several limitations concerning manual QE data creation: inevitably incurred non-trivial costs due to the need for translation experts, and issues with data scaling and language expansion. To tackle these limitations, we present QUAK, a Korean-English synthetic QE dataset generated in a fully automatic manner. This consists of three sub-QUAK datasets QUAK-M, QUAK-P, and QUAK-H, produced through three strategies that are relatively free from language constraints. Since each strategy requires no human effort, which facilitates scalability, we scale our data up to 1.58M for QUAK-P, H and 6.58M for QUAK-M. As an experiment, we quantitatively analyze word-level QE results in various ways while performing statistical analysis. Moreover, we show that datasets scaled in an efficient way also contribute to performance improvements by observing meaningful performance gains in QUAK-M, P when adding data up to 1.58M.</abstract>
       <url hash="b049b7b8">2022.coling-1.460</url>
@@ -5344,7 +5344,7 @@
       <title>Improving Both Domain Robustness and Domain Adaptability in Machine Translation</title>
       <author><first>Wen</first><last>Lai</last></author>
       <author><first>Jindřich</first><last>Libovický</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>5191–5204</pages>
       <abstract>We consider two problems of NMT domain adaptation using meta-learning. First, we want to reach domain robustness, i.e., we want to reach high quality on both domains seen in the training data and unseen domains. Second, we want our systems to be adaptive, i.e., making it possible to finetune systems with just hundreds of in-domain parallel sentences. We study the domain adaptability of meta-learning when improving the domain robustness of the model. In this paper, we propose a novel approach, RMLNMT (Robust Meta-Learning Framework for Neural Machine Translation Domain Adaptation), which improves the robustness of existing meta-learning models. More specifically, we show how to use a domain classifier in curriculum learning and we integrate the word-level domain mixing model into the meta-learning framework with a balanced sampling strategy. Experiments on English-German and English-Chinese translation show that RMLNMT improves in terms of both domain robustness and domain adaptability in seen and unseen domains.</abstract>
       <url hash="d4aa75e8">2022.coling-1.461</url>
@@ -5354,7 +5354,7 @@
       <title><fixed-case>C</fixed-case>o<fixed-case>D</fixed-case>o<fixed-case>NMT</fixed-case>: Modeling Cohesion Devices for Document-Level Neural Machine Translation</title>
       <author><first>Yikun</first><last>Lei</last></author>
       <author><first>Yuqi</first><last>Ren</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <pages>5205–5216</pages>
       <abstract>Cohesion devices, e.g., reiteration, coreference, are crucial for building cohesion links across sentences. In this paper, we propose a document-level neural machine translation framework, CoDoNMT, which models cohesion devices from two perspectives: Cohesion Device Masking (CoDM) and Cohesion Attention Focusing (CoAF). In CoDM, we mask cohesion devices in the current sentence and force NMT to predict them with inter-sentential context information. A prediction task is also introduced to be jointly trained with NMT. In CoAF, we attempt to guide the model to pay exclusive attention to relevant cohesion devices in the context when translating cohesion devices in the current sentence. Such a cohesion attention focusing strategy is softly applied to the self-attention layer. Experiments on three benchmark datasets demonstrate that our approach outperforms state-of-the-art document-level neural machine translation baselines. Further linguistic evaluation validates the effectiveness of the proposed model in producing cohesive translations.</abstract>
       <url hash="6a2e39b4">2022.coling-1.462</url>
@@ -5396,8 +5396,8 @@
       <title>Alleviating the Inequality of Attention Heads for Neural Machine Translation</title>
       <author><first>Zewei</first><last>Sun</last></author>
       <author><first>Shujian</first><last>Huang</last></author>
-      <author><first>Xinyu</first><last>Dai</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
+      <author id="xinyu-dai"><first>Xinyu</first><last>Dai</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
       <pages>5246–5250</pages>
       <abstract>Recent studies show that the attention heads in Transformer are not equal. We relate this phenomenon to the imbalance training of multi-head attention and the model dependence on specific heads. To tackle this problem, we propose a simple masking method: HeadMask, in two specific ways. Experiments show that translation improvements are achieved on multiple language pairs. Subsequent empirical analyses also support our assumption and confirm the effectiveness of the method.</abstract>
       <url hash="aedf2846">2022.coling-1.466</url>
@@ -5432,7 +5432,7 @@
       <title>Cross-lingual Feature Extraction from Monolingual Corpora for Low-resource Unsupervised Bilingual Lexicon Induction</title>
       <author><first>Zihao</first><last>Feng</last></author>
       <author><first>Hailong</first><last>Cao</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <author><first>Weixuan</first><last>Wang</last></author>
       <author><first>Wei</first><last>Peng</last></author>
       <pages>5278–5287</pages>
@@ -5470,7 +5470,7 @@
     <paper id="472">
       <title>Deciphering and Characterizing Out-of-Vocabulary Words for Morphologically Rich Languages</title>
       <author><first>Georgie</first><last>Botev</last></author>
-      <author><first>Arya D.</first><last>McCarthy</last></author>
+      <author id="arya-d-mccarthy"><first>Arya D.</first><last>McCarthy</last></author>
       <author><first>Winston</first><last>Wu</last></author>
       <author><first>David</first><last>Yarowsky</last></author>
       <pages>5309–5326</pages>
@@ -5590,9 +5590,9 @@
       <author><first>Yige</first><last>Chen</last></author>
       <author><first>Eunkyul Leah</first><last>Jo</last></author>
       <author><first>Yundong</first><last>Yao</last></author>
-      <author><first>KyungTae</first><last>Lim</last></author>
-      <author><first>Miikka</first><last>Silfverberg</last></author>
-      <author><first>Francis M.</first><last>Tyers</last></author>
+      <author id="kyungtae-lim"><first>KyungTae</first><last>Lim</last></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last></author>
+      <author id="francis-tyers"><first>Francis M.</first><last>Tyers</last></author>
       <author><first>Jungyeul</first><last>Park</last></author>
       <pages>5432–5437</pages>
       <abstract>In this study, we propose a morpheme-based scheme for Korean dependency parsing and adopt the proposed scheme to Universal Dependencies. We present the linguistic rationale that illustrates the motivation and the necessity of adopting the morpheme-based format, and develop scripts that convert between the original format used by Universal Dependencies and the proposed morpheme-based format automatically. The effectiveness of the proposed format for Korean dependency parsing is then testified by both statistical and neural models, including UDPipe and Stanza, with our carefully constructed morpheme-based word embedding for Korean. morphUD outperforms parsing results for all Korean UD treebanks, and we also present detailed error analysis.</abstract>
@@ -5644,8 +5644,8 @@
       <title>Belief Revision Based Caption Re-ranker with Visual Semantic Information</title>
       <author><first>Ahmed</first><last>Sabir</last></author>
       <author><first>Francesc</first><last>Moreno-Noguer</last></author>
-      <author><first>Pranava</first><last>Madhyastha</last></author>
-      <author><first>Lluís</first><last>Padró</last></author>
+      <author id="pranava-swaroop-madhyastha"><first>Pranava</first><last>Madhyastha</last></author>
+      <author id="lluis-padro"><first>Lluís</first><last>Padró</last></author>
       <pages>5488–5506</pages>
       <abstract>In this work, we focus on improving the captions generated by image-caption generation systems. We propose a novel re-ranking approach that leverages visual-semantic measures to identify the ideal caption that maximally captures the visual information in the image. Our re-ranker utilizes the Belief Revision framework (Blok et al., 2003) to calibrate the original likelihood of the top-n captions by explicitly exploiting semantic relatedness between the depicted caption and the visual context. Our experiments demonstrate the utility of our approach, where we observe that our re-ranker can enhance the performance of a typical image-captioning system without necessity of any additional training or fine-tuning.</abstract>
       <url hash="e2382cec">2022.coling-1.487</url>
@@ -5655,7 +5655,7 @@
       <title>Towards Understanding the Relation between Gestures and Language</title>
       <author><first>Artem</first><last>Abzaliev</last></author>
       <author><first>Andrew</first><last>Owens</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>5507–5520</pages>
       <abstract>In this paper, we explore the relation between gestures and language. Using a multimodal dataset, consisting of Ted talks where the language is aligned with the gestures made by the speakers, we adapt a semi-supervised multimodal model to learn gesture embeddings. We show that gestures are predictive of the native language of the speaker, and that gesture embeddings further improve language prediction result. In addition, gesture embeddings might contain some linguistic information, as we show by probing embeddings for psycholinguistic categories. Finally, we analyze the words that lead to the most expressive gestures and find that function words drive the expressiveness of gestures.</abstract>
       <url hash="575be813">2022.coling-1.488</url>
@@ -5727,7 +5727,7 @@
       <author><first>Federico</first><last>Pedeni</last></author>
       <author><first>Alessandro</first><last>Suglia</last></author>
       <author><first>Alberto</first><last>Testoni</last></author>
-      <author><first>Raffaella</first><last>Bernardi</last></author>
+      <author id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last></author>
       <pages>5597–5612</pages>
       <abstract>Artificial agents are nowadays challenged to perform embodied AI tasks. To succeed, agents must understand the meaning of verbs and how their corresponding actions transform the surrounding world. In this work, we propose ACT-Thor, a novel controlled benchmark for embodied action understanding. We use the AI2-THOR simulated environment to produce a controlled setup in which an agent, given a before-image and an associated action command, has to determine what the correct after-image is among a set of possible candidates. First, we assess the feasibility of the task via a human evaluation that resulted in 81.4% accuracy, and very high inter-annotator agreement (84.9%). Second, we design both unimodal and multimodal baselines, using state-of-the-art visual feature extractors. Our evaluation and error analysis suggest that only models that have a very structured representation of the actions together with powerful visual features can perform well on the task. However, they still fall behind human performance in a zero-shot scenario where the model is exposed to unseen (action, object) pairs. This paves the way for a systematic way of evaluating embodied AI agents that understand grounded actions.</abstract>
       <url hash="b665b07e">2022.coling-1.495</url>
@@ -5739,7 +5739,7 @@
       <author><first>Naihao</first><last>Deng</last></author>
       <author><first>Pingxuan</first><last>Huang</last></author>
       <author><first>Mihai</first><last>Burzo</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>5613–5635</pages>
       <abstract>Existing video understanding datasets mostly focus on human interactions, with little attention being paid to the “in the wild” settings, where the videos are recorded outdoors. We propose WILDQA, a video understanding dataset of videos recorded in outside settings. In addition to video question answering (Video QA), we also introduce the new task of identifying visual support for a given question and answer (Video Evidence Selection). Through evaluations using a wide range of baseline models, we show that WILDQA poses new challenges to the vision and language research communities. The dataset is available at https: //lit.eecs.umich.edu/wildqa/.</abstract>
       <url hash="0d985dda">2022.coling-1.496</url>
@@ -5810,7 +5810,7 @@
       <author><first>Zhijiang</first><last>Guo</last></author>
       <author><first>Yu</first><last>Fu</last></author>
       <author><first>Lijie</first><last>Wen</last></author>
-      <author><first>Philip S.</first><last>Yu</last></author>
+      <author id="philip-s-yu"><first>Philip S.</first><last>Yu</last></author>
       <pages>5707–5720</pages>
       <abstract>A scene graph is a semantic representation that expresses the objects, attributes, and relationships between objects in a scene. Scene graphs play an important role in many cross modality tasks, as they are able to capture the interactions between images and texts. In this paper, we focus on scene graph modification (SGM), where the system is required to learn how to update an existing scene graph based on a natural language query. Unlike previous approaches that rebuilt the entire scene graph, we frame SGM as a graph expansion task by introducing the incremental structure expanding (ISE). ISE constructs the target graph by incrementally expanding the source graph without changing the unmodified structure. Based on ISE, we further propose a model that iterates between nodes prediction and edges prediction, inferring more accurate and harmonious expansion decisions progressively. In addition, we construct a challenging dataset that contains more complicated queries and larger scene graphs than existing datasets. Experiments on four benchmarks demonstrate the effectiveness of our approach, which surpasses the previous state-of-the-art model by large margins.</abstract>
       <url hash="4e98716e">2022.coling-1.502</url>
@@ -5821,7 +5821,7 @@
       <author><first>Yike</first><last>Wu</last></author>
       <author><first>Yu</first><last>Zhao</last></author>
       <author><first>Shiwan</first><last>Zhao</last></author>
-      <author><first>Ying</first><last>Zhang</last></author>
+      <author id="ying-zhang"><first>Ying</first><last>Zhang</last></author>
       <author><first>Xiaojie</first><last>Yuan</last></author>
       <author><first>Guoqing</first><last>Zhao</last></author>
       <author><first>Ning</first><last>Jiang</last></author>
@@ -5833,7 +5833,7 @@
     <paper id="504">
       <title>Efficient Multilingual Multi-modal Pre-training through Triple Contrastive Loss</title>
       <author><first>Youhan</first><last>Lee</last></author>
-      <author><first>KyungTae</first><last>Lim</last></author>
+      <author id="kyungtae-lim"><first>KyungTae</first><last>Lim</last></author>
       <author><first>Woonhyuk</first><last>Baek</last></author>
       <author><first>Byungseok</first><last>Roh</last></author>
       <author><first>Saehoon</first><last>Kim</last></author>
@@ -5855,7 +5855,7 @@
       <title><fixed-case>GAP</fixed-case>: A Graph-aware Language Model Framework for Knowledge Graph-to-Text Generation</title>
       <author><first>Anthony</first><last>Colas</last></author>
       <author><first>Mehrdad</first><last>Alvandipour</last></author>
-      <author><first>Daisy Zhe</first><last>Wang</last></author>
+      <author id="daisy-zhe-wang"><first>Daisy Zhe</first><last>Wang</last></author>
       <pages>5755–5769</pages>
       <abstract>Recent improvements in KG-to-text generation are due to additional auxiliary pre-training tasks designed to give the fine-tune task a boost in performance. These tasks require extensive computational resources while only suggesting marginal improvements. Here, we demonstrate that by fusing graph-aware elements into existing pre-trained language models, we are able to outperform state-of-the-art models and close the gap imposed by additional pre-training tasks. We do so by proposing a mask structure to capture neighborhood information and a novel type encoder that adds a bias to the graph-attention weights depending on the connection type. Experiments on two KG-to-text benchmark datasets show our models are competitive while involving fewer parameters and no additional pre-training tasks. By formulating the problem as a framework, we can interchange the various proposed components and begin interpreting KG-to-text generative models based on the topological and type information found in a graph.</abstract>
       <url hash="39f67037">2022.coling-1.506</url>
@@ -5876,7 +5876,7 @@
       <author><first>Ming</first><last>Zhong</last></author>
       <author><first>Zhiyong</first><last>Wu</last></author>
       <author><first>Qin</first><last>Zhu</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <author><first>Xipeng</first><last>Qiu</last></author>
       <pages>5783–5793</pages>
       <abstract>Traditional training paradigms for extractive and abstractive summarization systems always only use token-level or sentence-level training objectives. However, the output summary is always evaluated from summary-level which leads to the inconsistency in training and evaluation. In this paper, we propose a Contrastive Learning based re-ranking framework for one-stage summarization called CoLo. By modeling a contrastive objective, we show that the summarization model is able to directly generate summaries according to the summary-level score without additional modules and parameters. Extensive experiments demonstrate that CoLo boosts the extractive and abstractive results of one-stage systems on CNN/DailyMail benchmark to 44.58 and 46.33 ROUGE-1 score while preserving the parameter efficiency and inference efficiency. Compared with state-of-the-art multi-stage systems, we save more than 100 GPU training hours and obtaining 3x 8x speed-up ratio during inference while maintaining comparable results.</abstract>
@@ -5887,7 +5887,7 @@
       <title>Of Human Criteria and Automatic Metrics: A Benchmark of the Evaluation of Story Generation</title>
       <author><first>Cyril</first><last>Chhun</last></author>
       <author><first>Pierre</first><last>Colombo</last></author>
-      <author><first>Fabian M.</first><last>Suchanek</last></author>
+      <author id="fabian-suchanek"><first>Fabian M.</first><last>Suchanek</last></author>
       <author><first>Chloé</first><last>Clavel</last></author>
       <pages>5794–5836</pages>
       <abstract>Research on Automatic Story Generation (ASG) relies heavily on human and automatic evaluation. However, there is no consensus on which human evaluation criteria to use, and no analysis of how well automatic criteria correlate with them. In this paper, we propose to re-evaluate ASG evaluation. We introduce a set of 6 orthogonal and comprehensive human criteria, carefully motivated by the social sciences literature. We also present HANNA, an annotated dataset of 1,056 stories produced by 10 different ASG systems. HANNA allows us to quantitatively evaluate the correlations of 72 automatic metrics with human criteria. Our analysis highlights the weaknesses of current metrics for ASG and allows us to formulate practical recommendations for ASG evaluation.</abstract>
@@ -5910,7 +5910,7 @@
       <author><first>Dongyuan</first><last>Li</last></author>
       <author><first>Jingyi</first><last>You</last></author>
       <author><first>Kotaro</first><last>Funakoshi</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>5857–5869</pages>
       <abstract>Text infilling aims to restore incomplete texts by filling in blanks, which has attracted more attention recently because of its wide application in ancient text restoration and text rewriting. However, attribute- aware text infilling is yet to be explored, and existing methods seldom focus on the infilling length of each blank or the number/location of blanks. In this paper, we propose an Attribute-aware Text Infilling method via a Pre-trained language model (A-TIP), which contains a text infilling component and a plug- and-play discriminator. Specifically, we first design a unified text infilling component with modified attention mechanisms and intra- and inter-blank positional encoding to better perceive the number of blanks and the infilling length for each blank. Then, we propose a plug-and-play discriminator to guide generation towards the direction of improving attribute relevance without decreasing text fluency. Finally, automatic and human evaluations on three open-source datasets indicate that A-TIP achieves state-of- the-art performance compared with all baselines.</abstract>
       <url hash="ce725826">2022.coling-1.511</url>
@@ -5919,7 +5919,7 @@
     <paper id="512">
       <title>Multi Graph Neural Network for Extractive Long Document Summarization</title>
       <author><first>Xuan-Dung</first><last>Doan</last></author>
-      <author><first>Le-Minh</first><last>Nguyen</last></author>
+      <author id="minh-le-nguyen"><first>Le-Minh</first><last>Nguyen</last></author>
       <author><first>Khac-Hoai Nam</first><last>Bui</last></author>
       <pages>5870–5875</pages>
       <abstract>Heterogeneous Graph Neural Networks (HeterGNN) have been recently introduced as an emergent approach for extracting document summarization (EDS) by exploiting the cross-relations between words and sentences. However, applying HeterGNN for long documents is still an open research issue. One of the main majors is the lacking of inter-sentence connections. In this regard, this paper exploits how to apply HeterGNN for long documents by building a graph on sentence-level nodes (homogeneous graph) and combine with HeterGNN for capturing the semantic information in terms of both inter and intra-sentence connections. Experiments on two benchmark datasets of long documents such as PubMed and ArXiv show that our method is able to achieve state-of-the-art results in this research field.</abstract>
@@ -5941,7 +5941,7 @@
       <author><first>Fei-Tzin</first><last>Lee</last></author>
       <author><first>Miguel</first><last>Ballesteros</last></author>
       <author><first>Feng</first><last>Nan</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>5882–5895</pages>
       <abstract>Large pretrained language models offer powerful generation capabilities, but cannot be reliably controlled at a sub-sentential level. We propose to make such fine-grained control possible in pretrained LMs by generating text directly from a semantic representation, Abstract Meaning Representation (AMR), which is augmented at the node level with syntactic control tags. We experiment with English-language generation of three modes of syntax relevant to the framing of a sentence - verb voice, verb tense, and realization of human entities - and demonstrate that they can be reliably controlled, even in settings that diverge drastically from the training distribution. These syntactic aspects contribute to how information is framed in text, something that is important for applications such as summarization which aim to highlight salient information.</abstract>
       <url hash="0dea31e8">2022.coling-1.514</url>
@@ -5952,7 +5952,7 @@
       <author><first>Ge</first><last>Luo</last></author>
       <author><first>Hebi</first><last>Li</last></author>
       <author><first>Youbiao</first><last>He</last></author>
-      <author><first>Forrest Sheng</first><last>Bao</last></author>
+      <author id="forrest-bao"><first>Forrest Sheng</first><last>Bao</last></author>
       <pages>5896–5903</pages>
       <abstract>Evaluating machine-generated summaries without a human-written reference summary has been a need for a long time. Inspired by preference labeling in existing work of summarization evaluation, we propose to judge summary quality by learning the preference rank of summaries using the Bradley-Terry power ranking model from inferior summaries generated by corrupting base summaries. Extensive experiments on several datasets show that our weakly supervised scheme can produce scores highly correlated with human ratings.</abstract>
       <url hash="4d5ebcac">2022.coling-1.515</url>
@@ -5973,7 +5973,7 @@
     <paper id="517">
       <title>Coordination Generation via Synchronized Text-Infilling</title>
       <author><first>Hiroki</first><last>Teranishi</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>5914–5924</pages>
       <abstract>Generating synthetic data for supervised learning from large-scale pre-trained language models has enhanced performances across several NLP tasks, especially in low-resource scenarios. In particular, many studies of data augmentation employ masked language models to replace words with other words in a sentence. However, most of them are evaluated on sentence classification tasks and cannot immediately be applied to tasks related to the sentence structure. In this paper, we propose a simple yet effective approach to generating sentences with a coordinate structure in which the boundaries of its conjuncts are explicitly specified. For a given span in a sentence, our method embeds a mask with a coordinating conjunction in two ways (”X and [mask]”, ”[mask] and X”) and forces masked language models to fill the two blanks with an identical text. To achieve this, we introduce decoding methods for BERT and T5 models with the constraint that predictions for different masks are synchronized. Furthermore, we develop a training framework that effectively selects synthetic examples for the supervised coordination disambiguation task. We demonstrate that our method produces promising coordination instances that provide gains for the task in low-resource settings.</abstract>
       <url hash="8c15cb23">2022.coling-1.517</url>
@@ -6036,7 +6036,7 @@
       <title><fixed-case>JPG</fixed-case> - Jointly Learn to Align: Automated Disease Prediction and Radiology Report Generation</title>
       <author><first>Jingyi</first><last>You</last></author>
       <author><first>Dongyuan</first><last>Li</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <author><first>Kenji</first><last>Suzuki</last></author>
       <pages>5989–6001</pages>
       <abstract>Automated radiology report generation aims to generate paragraphs that describe fine-grained visual differences among cases, especially those between the normal and the diseased. Existing methods seldom consider the cross-modal alignment between textual and visual features and tend to ignore disease tags as an auxiliary for report generation. To bridge the gap between textual and visual information, in this study, we propose a “Jointly learning framework for automated disease Prediction and radiology report Generation (JPG)” to improve the quality of reports through the interaction between the main task (report generation) and two auxiliary tasks (feature alignment and disease prediction). The feature alignment and disease prediction help the model learn text-correlated visual features and record diseases as keywords so that it can output high-quality reports. Besides, the improved reports in turn provide additional harder samples for feature alignment and disease prediction to learn more precise visual and textual representations and improve prediction accuracy. All components are jointly trained in a manner that helps improve them iteratively and progressively. Experimental results demonstrate the effectiveness of JPG on the most commonly used IU X-RAY dataset, showing its superior performance over multiple state-of-the-art image captioning and medical report generation methods with regard to BLEU, METEOR, and ROUGE metrics.</abstract>
@@ -6045,7 +6045,7 @@
     </paper>
     <paper id="524">
       <title>Automatic Nominalization of Clauses through Textual Entailment</title>
-      <author><first>John S. Y.</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John S. Y.</first><last>Lee</last></author>
       <author><first>Ho Hung</first><last>Lim</last></author>
       <author><first>Carol</first><last>Webster</last></author>
       <author><first>Anton</first><last>Melser</last></author>
@@ -6073,7 +6073,7 @@
       <title>Source-summary Entity Aggregation in Abstractive Summarization</title>
       <author><first>José Ángel</first><last>González</last></author>
       <author><first>Annie</first><last>Louis</last></author>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <pages>6019–6034</pages>
       <abstract>In a text, entities mentioned earlier can be referred to in later discourse by a more general description. For example, <i>Celine Dion</i> and <i>Justin Bieber</i> can be referred to by <i>Canadian singers</i> or <i>celebrities</i>. In this work, we study this phenomenon in the context of summarization, where entities from a source text are generalized in the summary. We call such instances <i>source-summary entity aggregations</i>. We categorize these aggregations into two types and analyze them in the Cnn/Dailymail corpus, showing that they are reasonably frequent. We then examine how well three state-of-the-art summarization systems can generate such aggregations within summaries. We also develop techniques to encourage them to generate more aggregations. Our results show that there is significant room for improvement in producing semantically correct aggregations.</abstract>
       <url hash="ae43aed2">2022.coling-1.526</url>
@@ -6154,7 +6154,7 @@
       <author><first>Wei</first><last>Li</last></author>
       <author><first>Xuhui</first><last>Jiang</last></author>
       <author><first>Huawei</first><last>Shen</last></author>
-      <author><first>Xueqi</first><last>Cheng</last></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last></author>
       <pages>6105–6114</pages>
       <abstract>Complex question generation over knowledge bases (KB) aims to generate natural language questions involving multiple KB relations or functional constraints. Existing methods train one encoder-decoder-based model to fit all questions. However, such a one-size-fits-all strategy may not perform well since complex questions exhibit an uneven distribution in many dimensions, such as question types, involved KB relations, and query structures, resulting in insufficient learning for long-tailed samples under different dimensions. To address this problem, we propose a meta-learning framework for complex question generation. The meta-trained generator can acquire universal and transferable meta-knowledge and quickly adapt to long-tailed samples through a few most related training samples. To retrieve similar samples for each input query, we design a self-supervised graph retriever to learn distributed representations for samples, and contrastive learning is leveraged to improve the learned representations. We conduct experiments on both WebQuestionsSP and ComplexWebQuestion, and results on long-tailed samples of different dimensions have been significantly improved, which demonstrates the effectiveness of the proposed framework.</abstract>
       <url hash="84dc7ba0">2022.coling-1.533</url>
@@ -6198,7 +6198,7 @@
       <title>Phrase-Level Localization of Inconsistency Errors in Summarization by Weak Supervision</title>
       <author><first>Masato</first><last>Takatsuka</last></author>
       <author><first>Tetsunori</first><last>Kobayashi</last></author>
-      <author><first>Yoshihiko</first><last>Hayashi</last></author>
+      <author id="yoshihiko-hayashi"><first>Yoshihiko</first><last>Hayashi</last></author>
       <pages>6151–6164</pages>
       <abstract>Although the fluency of automatically generated abstractive summaries has improved significantly with advanced methods, the inconsistency that remains in summarization is recognized as an issue to be addressed. In this study, we propose a methodology for localizing inconsistency errors in summarization. A synthetic dataset that contains a variety of factual errors likely to be produced by a common summarizer is created by applying sentence fusion, compression, and paraphrasing operations. In creating the dataset, we automatically label erroneous phrases and the dependency relations between them as “inconsistent,” which can contribute to detecting errors more adequately than existing models that rely only on dependency arc-level labels. Subsequently, this synthetic dataset is employed as weak supervision to train a model called SumPhrase, which jointly localizes errors in a summary and their corresponding sentences in the source document. The empirical results demonstrate that our SumPhrase model can detect factual errors in summarization more effectively than existing weakly supervised methods owing to the phrase-level labeling. Moreover, the joint identification of error-corresponding original sentences is proven to be effective in improving error detection accuracy.</abstract>
       <url hash="b9b05f87">2022.coling-1.537</url>
@@ -6208,7 +6208,7 @@
       <title><fixed-case>P</fixed-case>oli<fixed-case>S</fixed-case>e: Reinforcing Politeness Using User Sentiment for Customer Care Response Generation</title>
       <author><first>Mauajama</first><last>Firdaus</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>6165–6175</pages>
       <abstract>The interaction between a consumer and the customer service representative greatly contributes to the overall customer experience. Therefore, to ensure customers’ comfort and retention, it is important that customer service agents and chatbots connect with users on social, cordial, and empathetic planes. In the current work, we automatically identify the sentiment of the user and transform the neutral responses into polite responses conforming to the sentiment and the conversational history. Our technique is basically a reinforced multi-task network- the primary task being ‘polite response generation’ and the secondary task being ‘sentiment analysis’- that uses a Transformer based encoder-decoder. We use sentiment annotated conversations from Twitter as the training data. The detailed evaluation shows that our proposed approach attains superior performance compared to the baseline models.</abstract>
       <url hash="a5818205">2022.coling-1.538</url>
@@ -6228,7 +6228,7 @@
     <paper id="540">
       <title><fixed-case>A</fixed-case>rg<fixed-case>L</fixed-case>egal<fixed-case>S</fixed-case>umm: Improving Abstractive Summarization of Legal Documents with Argument Mining</title>
       <author><first>Mohamed</first><last>Elaraby</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <pages>6187–6194</pages>
       <abstract>A challenging task when generating summaries of legal documents is the ability to address their argumentative nature. We introduce a simple technique to capture the argumentative structure of legal documents by integrating argument role labeling into the summarization process. Experiments with pretrained language models show that our proposed approach improves performance over strong baselines.</abstract>
       <url hash="8e52284a">2022.coling-1.540</url>
@@ -6315,7 +6315,7 @@
       <author><first>Steven Y.</first><last>Feng</last></author>
       <author><first>Harsh</first><last>Jhamtani</last></author>
       <author><first>Malihe</first><last>Alikhani</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>6270–6284</pages>
       <abstract>A personification is a figure of speech that endows inanimate entities with properties and actions typically seen as requiring animacy. In this paper, we explore the task of personification generation. To this end, we propose PINEAPPLE: Personifying INanimate Entities by Acquiring Parallel Personification data for Learning Enhanced generation. We curate a corpus of personifications called PersonifCorp, together with automatically generated de-personified literalizations of these personifications. We demonstrate the usefulness of this parallel corpus by training a seq2seq model to personify a given literal input. Both automatic and human evaluations show that fine-tuning with PersonifCorp leads to significant gains in personification-related qualities such as animacy and interestingness. A detailed qualitative analysis also highlights key strengths and imperfections of PINEAPPLE over baselines, demonstrating a strong ability to generate diverse and creative personifications that enhance the overall appeal of a sentence.</abstract>
       <url hash="52ba11a6">2022.coling-1.547</url>
@@ -6350,7 +6350,7 @@
       <author><first>Fangwei</first><last>Zhu</last></author>
       <author><first>Juanzi</first><last>Li</last></author>
       <author><first>Lei</first><last>Hou</last></author>
-      <author><first>Jian-Yun</first><last>Nie</last></author>
+      <author id="jian-yun-nie"><first>Jian-Yun</first><last>Nie</last></author>
       <pages>6315–6326</pages>
       <abstract>Multi-Document Summarization (MDS) commonly employs the 2-stage extract-then-abstract paradigm, which first extracts a relatively short meta-document, then feeds it into the deep neural networks to generate an abstract. Previous work usually takes the ROUGE score as the label for training a scoring model to evaluate source documents. However, the trained scoring model is prone to under-fitting for low-resource settings, as it relies on the training data. To extract documents effectively, we construct prompting templates that invoke the underlying knowledge in Pre-trained Language Model (PLM) to calculate the document and keyword’s perplexity, which can assess the document’s semantic salience. Our unsupervised approach can be applied as a plug-in to boost other metrics for evaluating a document’s salience, thus improving the subsequent abstract generation. We get positive results on 2 MDS datasets, 2 data settings, and 2 abstractive backbone models, showing our method’s effectiveness. Our code is available at <url>https://github.com/THU-KEG/UPER</url></abstract>
       <url hash="f17816cc">2022.coling-1.550</url>
@@ -6361,8 +6361,8 @@
       <author><first>Tianyang</first><last>Cao</last></author>
       <author><first>Shuang</first><last>Zeng</last></author>
       <author><first>Xiaodan</first><last>Xu</last></author>
-      <author><first>Mairgup</first><last>Mansur</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="mairgup-mansur"><first>Mairgup</first><last>Mansur</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <pages>6327–6339</pages>
       <abstract>A math word problem (MWP) is a coherent narrative which reflects the underlying logic of math equations. Successful MWP generation can automate the writing of mathematics questions. Previous methods mainly generate MWP text based on inflexible pre-defined templates. In this paper, we propose a neural model for generating MWP text from math equations. Firstly, we incorporate a matching model conditioned on the domain knowledge to retrieve a MWP instance which is most consistent with the ground-truth, where the domain is a latent variable extracted with a domain summarizer. Secondly, by constructing a Quantity Cell Graph (QCG) from the retrieved MWP instance and reasoning over it, we improve the model’s comprehension of real-world scenarios and derive a domain-constrained instance sketch to guide the generation. Besides, the QCG also interacts with the equation encoder to enhance the alignment between math tokens (e.g., quantities and variables) and MWP text. Experiments and empirical analysis on educational MWP set show that our model achieves impressive performance in both automatic evaluation metrics and human evaluation metrics.</abstract>
       <url hash="948f040b">2022.coling-1.551</url>
@@ -6372,7 +6372,7 @@
       <title>Context-Tuning: Learning Contextualized Prompts for Natural Language Generation</title>
       <author><first>Tianyi</first><last>Tang</last></author>
       <author><first>Junyi</first><last>Li</last></author>
-      <author><first>Wayne Xin</first><last>Zhao</last></author>
+      <author id="wayne-xin-zhao"><first>Wayne Xin</first><last>Zhao</last></author>
       <author><first>Ji-Rong</first><last>Wen</last></author>
       <pages>6340–6354</pages>
       <abstract>Recently, pretrained language models (PLMs) have had exceptional success in language generation. To leverage the rich knowledge encoded by PLMs, a simple yet powerful paradigm is to use <i>prompts</i> in the form of either discrete tokens or continuous embeddings. In existing studies, these prompting methods are typically independent of the inputs, lacking sufficient consideration of input semantics. To address this issue, we propose a novel continuous prompting approach, called <i>context-tuning</i>, to fine-tuning PLMs for natural language generation. Firstly, the prompts are derived based on the input text to elicit useful knowledge from PLMs for generation. We refer to such prompts as <i>contextualized prompts</i>. Secondly, we use <i>continuous inverse prompting</i> to improve the process of natural language generation by modeling an inverse generation process from output to input, making the generated text more relevant to the inputs. Furthermore, we utilize a lightweight context-tuning method that fine-tunes only 0.12% of the parameters while maintaining good performance. Our code is publicly available at <url>https://github.com/RUCAIBox/Context-Tuning</url>.</abstract>
@@ -6386,7 +6386,7 @@
       <author><first>Yu</first><last>Bai</last></author>
       <author><first>Jiawei</first><last>Li</last></author>
       <author><first>Yinan</first><last>Hu</last></author>
-      <author><first>Heyan</first><last>Huang</last></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last></author>
       <author><first>Boxing</first><last>Chen</last></author>
       <pages>6355–6368</pages>
       <abstract>Few-shot abstractive summarization has become a challenging task in natural language generation. To support it, we developed a novel soft prompts architecture coupled with a prompt pre-training plus prompt fine-tuning paradigm, which is effective and tunes only extremely light parameters. To meet the structure of the generation models, the soft prompts comprise continuous input embeddings across an encoder and a decoder. Importantly, a new inner-prompt placed in the text is introduced to capture document-level information. The aim is to devote attention to understanding the document that better prompts the model to generate document-related content. In the training process, the prompt pre-training with self-supervised pseudo-data firstly teaches the model basic summarizing capability. Then, with few-shot examples, only the designed lightweight soft prompts are fine-tuned. Experimental results on the CNN/DailyMail and XSum datasets show that our method, with only 0.1% of the parameters, outperforms full-model tuning where all model parameters are tuned. It also surpasses Prompt Tuning by a large margin and delivers competitive results against Prefix-Tuning with 3% of the parameters.</abstract>
@@ -6460,7 +6460,7 @@
     </paper>
     <paper id="559">
       <title><fixed-case>CHAE</fixed-case>: Fine-Grained Controllable Story Generation with Characters, Actions and Emotions</title>
-      <author id="xinpeng-wang"><first>Xinpeng</first><last>Wang</last></author>
+      <author><first>Xinpeng</first><last>Wang</last></author>
       <author><first>Han</first><last>Jiang</last></author>
       <author><first>Zhihua</first><last>Wei</last></author>
       <author><first>Shanlin</first><last>Zhou</last></author>
@@ -6570,7 +6570,7 @@
       <author><first>Ming</first><last>Zhong</last></author>
       <author><first>Zhangyue</first><last>Yin</last></author>
       <author><first>Xipeng</first><last>Qiu</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>6540–6546</pages>
       <abstract>Pre-trained models have brought remarkable success on the text summarization task. For dialogue summarization, the subdomain of text summarization, utterances are concatenated to flat text before being processed. As a result, existing summarization systems based on pre-trained models are unable to recognize the unique format of the speaker-utterance pair well in the dialogue. To investigate this issue, we conduct probing tests and manual analysis, and find that the powerful pre-trained model can not identify different speakers well in the conversation, which leads to various factual errors. Moreover, we propose three speaker-aware supervised contrastive learning (SCL) tasks: Token-level SCL, Turn-level SCL, and Global-level SCL. Comprehensive experiments demonstrate that our methods achieve significant performance improvement on two mainstream dialogue summarization datasets. According to detailed human evaluations, pre-trained models equipped with SCL tasks effectively generate summaries with better factual consistency.</abstract>
       <url hash="9704f443">2022.coling-1.569</url>
@@ -6603,7 +6603,7 @@
       <author><first>Xin</first><last>Zhou</last></author>
       <author><first>Tao</first><last>Gui</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>6575–6585</pages>
       <abstract>Question generation over knowledge bases (KBQG) aims at generating natural questions about a subgraph, which can be answered by a given answer entity. Existing KBQG models still face two main challenges: (1) Most models often focus on the most relevant part of the answer entity, while neglecting the rest of the subgraph. (2) There are a large number of out-of-vocabulary (OOV) predicates in real-world scenarios, which are hard to adapt for most KBQG models. To address these challenges, we propose LFKQG, a controlled generation framework for Question Generation over Knowledge Bases. (1) LFKQG employs a simple controlled generation method to generate the questions containing the critical entities in the subgraph, ensuring the question is relevant to the whole subgraph. (2) We propose an optimization strategy called local fine-tuning, which can make good use of the rich information hidden in the pre-trained model to improve the ability of the model to adapt the OOV predicates. Extensive experiments show that our method outperforms existing methods significantly on three widely-used benchmark datasets SimpleQuestion, PathQuestions, and WebQuestions.</abstract>
       <url hash="ef73ff96">2022.coling-1.572</url>
@@ -6633,7 +6633,7 @@
       <title>Offensive Content Detection via Synthetic Code-Switched Text</title>
       <author><first>Cesa</first><last>Salaam</last></author>
       <author><first>Franck</first><last>Dernoncourt</last></author>
-      <author><first>Trung</first><last>Bui</last></author>
+      <author id="trung-bui"><first>Trung</first><last>Bui</last></author>
       <author><first>Danda</first><last>Rawat</last></author>
       <author><first>Seunghyun</first><last>Yoon</last></author>
       <pages>6617–6624</pages>
@@ -6651,7 +6651,7 @@
       <author><first>Giovanni Da San</first><last>Martino</last></author>
       <author><first>Shaden</first><last>Shaar</last></author>
       <author><first>Hamed</first><last>Firooz</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>6625–6643</pages>
       <abstract>Recent years have witnessed the proliferation of offensive content online such as fake news, propaganda, misinformation, and disinformation. While initially this was mostly about textual content, over time images and videos gained popularity, as they are much easier to consume, attract more attention, and spread further than text. As a result, researchers started leveraging different modalities and combinations thereof to tackle online multimodal offensive content. In this study, we offer a survey on the state-of-the-art on multimodal disinformation detection covering various combinations of modalities: text, images, speech, video, social media network structure, and temporal information. Moreover, while some studies focused on factuality, others investigated how harmful the content is. While these two components in the definition of disinformation – (i) factuality, and (ii) harmfulness –, are equally important, they are typically studied in isolation. Thus, we argue for the need to tackle disinformation detection by taking into account multiple modalities as well as both factuality and harmfulness, in the same framework. Finally, we discuss current challenges and future research directions.</abstract>
       <url hash="260c387b">2022.coling-1.576</url>
@@ -6695,7 +6695,7 @@
       <author><first>Xiaoyun</first><last>Han</last></author>
       <author><first>Binyang</first><last>Li</last></author>
       <author><first>Menglong</first><last>Lu</last></author>
-      <author id="dongsheng-li"><first>Dongsheng</first><last>Li</last></author>
+      <author><first>Dongsheng</first><last>Li</last></author>
       <pages>6680–6690</pages>
       <abstract>Early rumor detection is a key challenging task to prevent rumors from spreading widely. Sociological research shows that social bots’ behavior in the early stage has become the main reason for rumors’ wide spread. However, current models do not explicitly distinguish genuine users from social bots, and their failure in identifying rumors timely. Therefore, this paper aims at early rumor detection by accounting for social bots’ behavior, and presents a Social Bot-Aware Graph Neural Network, named SBAG. SBAG firstly pre-trains a multi-layer perception network to capture social bot features, and then constructs multiple graph neural networks by embedding the features to model the early propagation of posts, which is further used to detect rumors. Extensive experiments on three benchmark datasets show that SBAG achieves significant improvements against the baselines and also identifies rumors within 3 hours while maintaining more than 90% accuracy.</abstract>
       <url hash="825ef481">2022.coling-1.580</url>
@@ -6731,7 +6731,7 @@
       <title>Detecting Minority Arguments for Mutual Understanding: A Moderation Tool for the Online Climate Change Debate</title>
       <author><first>Cedric</first><last>Waterschoot</last></author>
       <author><first>Ernst</first><last>van den Hemel</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <pages>6715–6725</pages>
       <abstract>Moderating user comments and promoting healthy understanding is a challenging task, especially in the context of polarized topics such as climate change. We propose a moderation tool to assist moderators in promoting mutual understanding in regard to this topic. The approach is twofold. First, we train classifiers to label incoming posts for the arguments they entail, with a specific focus on minority arguments. We apply active learning to further supplement the training data with rare arguments. Second, we dive deeper into singular arguments and extract the lexical patterns that distinguish each argument from the others. Our findings indicate that climate change arguments form clearly separable clusters in the embedding space. These classes are characterized by their own unique lexical patterns that provide a quick insight in an argument’s key concepts. Additionally, supplementing our training data was necessary for our classifiers to be able to adequately recognize rare arguments. We argue that this detailed rundown of each argument provides insight into where others are coming from. These computational approaches can be part of the toolkit for content moderators and researchers struggling with polarized topics.</abstract>
       <url hash="e3ab4729">2022.coling-1.583</url>
@@ -6750,7 +6750,7 @@
     </paper>
     <paper id="585">
       <title>Structural Bias for Aspect Sentiment Triplet Extraction</title>
-      <author id="chen-zhang"><first>Chen</first><last>Zhang</last></author>
+      <author><first>Chen</first><last>Zhang</last></author>
       <author><first>Lei</first><last>Ren</last></author>
       <author><first>Fang</first><last>Ma</last></author>
       <author><first>Jingang</first><last>Wang</last></author>
@@ -6777,7 +6777,7 @@
       <author><first>Gopendra Vikram</first><last>Singh</last></author>
       <author><first>Aseem</first><last>Arora</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>6752–6761</pages>
       <abstract>In this paper, we hypothesize that humor is closely related to sentiment and emotions. Also, due to the tremendous growth in multilingual content, there is a great demand for building models and systems that support multilingual information access. To end this, we first extend the recently released Multimodal Multiparty Hindi Humor (M2H2) dataset by adding parallel English utterances corresponding to Hindi utterances and then annotating each utterance with sentiment and emotion classes. We name it Sentiment, Humor, and Emotion aware Multilingual Multimodal Multiparty Dataset (SHEMuD). Therefore, we propose a multitask framework wherein the primary task is humor detection, and the auxiliary tasks are sentiment and emotion identification. We design a multitasking framework wherein we first propose a Context Transformer to capture the deep contextual relationships with the input utterances. We then propose a Sentiment and Emotion aware Embedding (SE-Embedding) to get the overall representation of a particular emotion and sentiment w.r.t. the specific humor situation. Experimental results on the SHEMuD show the efficacy of our approach and shows that multitask learning offers an improvement over the single-task framework for both monolingual (4.86 points in Hindi and 5.9 points in English in F1-score) and multilingual (5.17 points in F1-score) setting.</abstract>
       <url hash="9147a13b">2022.coling-1.587</url>
@@ -6799,7 +6799,7 @@
       <title>Entity-Level Sentiment Analysis (<fixed-case>ELSA</fixed-case>): An Exploratory Task Survey</title>
       <author><first>Egil</first><last>Rønningstad</last></author>
       <author><first>Erik</first><last>Velldal</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <pages>6773–6783</pages>
       <abstract>This paper explores the task of identifying the overall sentiment expressed towards volitional entities (persons and organizations) in a document - what we refer to as Entity-Level Sentiment Analysis (ELSA). While identifying sentiment conveyed towards an entity is well researched for shorter texts like tweets, we find little to no research on this specific task for longer texts with multiple mentions and opinions towards the same entity. This lack of research would be understandable if ELSA can be derived from existing tasks and models. To assess this, we annotate a set of professional reviews for their overall sentiment towards each volitional entity in the text. We sample from data already annotated for document-level, sentence-level, and target-level sentiment in a multi-domain review corpus, and our results indicate that there is no single proxy task that provides this overall sentiment we seek for the entities at a satisfactory level of performance. We present a suite of experiments aiming to assess the contribution towards ELSA provided by document-, sentence-, and target-level sentiment analysis, and provide a discussion of their shortcomings. We show that sentiment in our dataset is expressed not only with an entity mention as target, but also towards targets with a sentiment-relevant relation to a volitional entity. In our data, these relations extend beyond anaphoric coreference resolution, and our findings call for further research of the topic. Finally, we also present a survey of previous relevant work.</abstract>
       <url hash="89f191de">2022.coling-1.589</url>
@@ -6810,9 +6810,9 @@
       <author><first>Fei</first><last>Zhao</last></author>
       <author><first>Zhen</first><last>Wu</last></author>
       <author><first>Siyu</first><last>Long</last></author>
-      <author><first>Xinyu</first><last>Dai</last></author>
+      <author id="xinyu-dai"><first>Xinyu</first><last>Dai</last></author>
       <author><first>Shujian</first><last>Huang</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
       <pages>6784–6794</pages>
       <abstract>Target-oriented multimodal sentiment classification (TMSC) is a new subtask of aspect-based sentiment analysis, which aims to determine the sentiment polarity of the opinion target mentioned in a (sentence, image) pair. Recently, dominant works employ the attention mechanism to capture the corresponding visual representations of the opinion target, and then aggregate them as evidence to make sentiment predictions. However, they still suffer from two problems: (1) The granularity of the opinion target in two modalities is inconsistent, which causes visual attention sometimes fail to capture the corresponding visual representations of the target; (2) Even though it is captured, there are still significant differences between the visual representations expressing the same mood, which brings great difficulty to sentiment prediction. To this end, we propose a novel Knowledge-enhanced Framework (KEF) in this paper, which can successfully exploit adjective-noun pairs extracted from the image to improve the visual attention capability and sentiment prediction capability of the TMSC task. Extensive experimental results show that our framework consistently outperforms state-of-the-art works on two public datasets.</abstract>
       <url hash="a964915b">2022.coling-1.590</url>
@@ -6844,7 +6844,7 @@
       <author><first>Hang</first><last>Jiang</last></author>
       <author><first>Doug</first><last>Beeferman</last></author>
       <author><first>Brandon</first><last>Roy</last></author>
-      <author><first>Deb</first><last>Roy</last></author>
+      <author id="deb-roy"><first>Deb</first><last>Roy</last></author>
       <pages>6818–6826</pages>
       <abstract>As political attitudes have diverged ideologically in the United States, political speech has diverged lingusitically. The ever-widening polarization between the US political parties is accelerated by an erosion of mutual understanding between them. We aim to make these communities more comprehensible to each other with a framework that probes community-specific responses to the same survey questions using community language models CommunityLM. In our framework we identify committed partisan members for each community on Twitter and fine-tune LMs on the tweets authored by them. We then assess the worldviews of the two groups using prompt-based probing of their corresponding LMs, with prompts that elicit opinions about public figures and groups surveyed by the American National Election Studies (ANES) 2020 Exploratory Testing Survey. We compare the responses generated by the LMs to the ANES survey results, and find a level of alignment that greatly exceeds several baseline methods. Our work aims to show that we can use community LMs to query the worldview of any group of people given a sufficiently large sample of their social media discussions or media diet.</abstract>
       <url hash="c123eb98">2022.coling-1.593</url>
@@ -6870,7 +6870,7 @@
       <author><first>Jens</first><last>Lemmens</last></author>
       <author><first>Jens</first><last>Van Nooten</last></author>
       <author><first>Tim</first><last>Kreutz</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>6837–6845</pages>
       <abstract>We present CoNTACT: a Dutch language model adapted to the domain of COVID-19 tweets. The model was developed by continuing the pre-training phase of RobBERT (Delobelle et al., 2020) by using 2.8M Dutch COVID-19 related tweets posted in 2021. In order to test the performance of the model and compare it to RobBERT, the two models were tested on two tasks: (1) binary vaccine hesitancy detection and (2) detection of arguments for vaccine hesitancy. For both tasks, not only Twitter but also Facebook data was used to show cross-genre performance. In our experiments, CoNTACT showed statistically significant gains over RobBERT in all experiments for task 1. For task 2, we observed substantial improvements in virtually all classes in all experiments. An error analysis indicated that the domain adaptation yielded better representations of domain-specific terminology, causing CoNTACT to make more accurate classification decisions. For task 2, we observed substantial improvements in virtually all classes in all experiments. An error analysis indicated that the domain adaptation yielded better representations of domain-specific terminology, causing CoNTACT to make more accurate classification decisions.</abstract>
       <url hash="b43d4377">2022.coling-1.595</url>
@@ -6890,7 +6890,7 @@
     <paper id="597">
       <title>Transferring Confluent Knowledge to Argument Mining</title>
       <author><first>João António</first><last>Rodrigues</last></author>
-      <author><first>António</first><last>Branco</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
       <pages>6859–6874</pages>
       <abstract>Relevant to all application domains where it is important to get at the reasons underlying sentiments and decisions, argument mining seeks to obtain structured arguments from unstructured text and has been addressed by approaches typically involving some feature and/or neural architecture engineering. By adopting a transfer learning methodology, and by means of a systematic study with a wide range of knowledge sources promisingly suitable to leverage argument mining, the aim of this paper is to empirically assess the potential of transferring such knowledge learned with confluent tasks. By adopting a lean approach that dispenses with heavier feature and model engineering, this study permitted both to gain novel empirically based insights into the argument mining task and to establish new state of the art levels of performance for its three main sub-tasks, viz. identification of argument components, classification of the components, and determination of the relation among them.</abstract>
       <url hash="e3723b9f">2022.coling-1.597</url>
@@ -6900,7 +6900,7 @@
       <title>When to Laugh and How Hard? A Multimodal Approach to Detecting Humor and Its Intensity</title>
       <author><first>Khalid</first><last>Alnajjar</last></author>
       <author><first>Mika</first><last>Hämäläinen</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <author><first>Jorma</first><last>Laaksonen</last></author>
       <author><first>Mikko</first><last>Kurimo</last></author>
       <pages>6875–6886</pages>
@@ -6924,7 +6924,7 @@
     <paper id="600">
       <title>Analyzing Persuasion Strategies of Debaters on Social Media</title>
       <author><first>Matti</first><last>Wiegmann</last></author>
-      <author><first>Khalid</first><last>Al Khatib</last></author>
+      <author id="khalid-al-khatib"><first>Khalid</first><last>Al Khatib</last></author>
       <author><first>Vishal</first><last>Khanna</last></author>
       <author><first>Benno</first><last>Stein</last></author>
       <pages>6897–6905</pages>
@@ -6998,7 +6998,7 @@
       <author><first>Shengqiong</first><last>Wu</last></author>
       <author><first>Hao</first><last>Fei</last></author>
       <author><first>Fei</first><last>Li</last></author>
-      <author><first>Donghong</first><last>Ji</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
       <pages>6955–6965</pages>
       <abstract>Emotion cause pair extraction (ECPE), as one of the derived subtasks of emotion cause analysis (ECA), shares rich inter-related features with emotion extraction (EE) and cause extraction (CE). Therefore EE and CE are frequently utilized as auxiliary tasks for better feature learning, modeled via multi-task learning (MTL) framework by prior works to achieve state-of-the-art (SoTA) ECPE results. However, existing MTL-based methods either fail to simultaneously model the specific features and the interactive feature in between, or suffer from the inconsistency of label prediction. In this work, we consider addressing the above challenges for improving ECPE by performing two alignment mechanisms with a novel Aˆ2Net model. We first propose a feature-task alignment to explicitly model the specific emotion-&amp;cause-specific features and the shared interactive feature. Besides, an inter-task alignment is implemented, in which the label distance between the ECPE and the combinations of EE&amp;CE are learned to be narrowed for better label consistency. Evaluations of benchmarks show that our methods outperform current best-performing systems on all ECA subtasks. Further analysis proves the importance of our proposed alignment mechanisms for the task.</abstract>
       <url hash="87498026">2022.coling-1.606</url>
@@ -7012,7 +7012,7 @@
       <author><first>Junjie</first><last>Ye</last></author>
       <author><first>Tao</first><last>Gui</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>6966–6977</pages>
       <abstract>Despite having achieved great success for sentiment analysis, existing neural models struggle with implicit sentiment analysis. It is because they may latch onto spurious correlations (“shortcuts”, e.g., focusing only on explicit sentiment words), resulting in undermining the effectiveness and robustness of the learned model. In this work, we propose a CausaL intervention model for implicit sEntiment ANalysis using instrumental variable (CLEAN). We first review sentiment analysis from a causal perspective and analyze the confounders existing in this task. Then, we introduce instrumental variable to eliminate the confounding causal effects, thus extracting the pure causal effect between sentence and sentiment. We compare the proposed CLEAN with several strong baselines on both the general implicit sentiment analysis and aspect-based implicit sentiment analysis tasks. The results indicate the great advantages of our model and the efficacy of implicit sentiment reasoning.</abstract>
       <url hash="63dfa16c">2022.coling-1.607</url>
@@ -7023,7 +7023,7 @@
       <author><first>Soumitra</first><last>Ghosh</last></author>
       <author><first>Gopendra Vikram</first><last>Singh</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>6978–6990</pages>
       <abstract>Mental health is a critical component of the United Nations’ Sustainable Development Goals (SDGs), particularly Goal 3, which aims to provide “good health and well-being”. The present mental health treatment gap is exacerbated by stigma, lack of human resources, and lack of research capability for implementation and policy reform. We present and discuss a novel task of detecting emotional reasoning (ER) and accompanying emotions in conversations. In particular, we create a first-of-its-kind multimodal mental health conversational corpus that is manually annotated at the utterance level with emotional reasoning and related emotion. We develop a multimodal multitask framework with a novel multimodal feature fusion technique and a contextuality learning module to handle the two tasks. Leveraging multimodal sources of information, commonsense reasoning, and through a multitask framework, our proposed model produces strong results. We achieve performance gains of 6% accuracy and 4.62% F1 on the emotion detection task and 3.56% accuracy and 3.31% F1 on the ER detection task, when compared to the existing state-of-the-art model.</abstract>
       <url hash="74779873">2022.coling-1.608</url>
@@ -7096,10 +7096,10 @@
     <paper id="614">
       <title>One-Teacher and Multiple-Student Knowledge Distillation on Sentiment Classification</title>
       <author><first>Xiaoqin</first><last>Chang</last></author>
-      <author><first>Sophia Yat Mei</first><last>Lee</last></author>
+      <author id="sophia-yat-mei-lee"><first>Sophia Yat Mei</first><last>Lee</last></author>
       <author><first>Suyang</first><last>Zhu</last></author>
       <author><first>Shoushan</first><last>Li</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>7042–7052</pages>
       <abstract>Knowledge distillation is an effective method to transfer knowledge from a large pre-trained teacher model to a compacted student model. However, in previous studies, the distilled student models are still large and remain impractical in highly speed-sensitive systems (e.g., an IR system). In this study, we aim to distill a deep pre-trained model into an extremely compacted shallow model like CNN. Specifically, we propose a novel one-teacher and multiple-student knowledge distillation approach to distill a deep pre-trained teacher model into multiple shallow student models with ensemble learning. Moreover, we leverage large-scale unlabeled data to improve the performance of students. Empirical studies on three sentiment classification tasks demonstrate that our approach achieves better results with much fewer parameters (0.9%-18%) and extremely high speedup ratios (100X-1000X).</abstract>
       <url hash="54ba8474">2022.coling-1.614</url>
@@ -7113,7 +7113,7 @@
       <author><first>Xuanting</first><last>Chen</last></author>
       <author><first>Tao</first><last>Gui</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <author><first>Rui</first><last>Xie</last></author>
       <author><first>Wei</first><last>Wu</last></author>
       <pages>7053–7064</pages>
@@ -7142,7 +7142,7 @@
       <author><first>Zhen</first><last>Wu</last></author>
       <author><first>Jindong</first><last>Wang</last></author>
       <author><first>Takahiro</first><last>Shinozaki</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <author><first>Yue</first><last>Zhang</last></author>
       <pages>7075–7085</pages>
       <abstract>Target-oriented Opinion Words Extraction (TOWE) is a fine-grained sentiment analysis task that aims to extract the corresponding opinion words of a given opinion target from the sentence. Recently, deep learning approaches have made remarkable progress on this task. Nevertheless, the TOWE task still suffers from the scarcity of training data due to the expensive data annotation process. Limited labeled data increase the risk of distribution shift between test data and training data. In this paper, we propose exploiting massive unlabeled data to reduce the risk by increasing the exposure of the model to varying distribution shifts. Specifically, we propose a novel Multi-Grained Consistency Regularization (MGCR) method to make use of unlabeled data and design two filters specifically for TOWE to filter noisy data at different granularity. Extensive experimental results on four TOWE benchmark datasets indicate the superiority of MGCR compared with current state-of-the-art methods. The in-depth analysis also demonstrates the effectiveness of the different-granularity filters.</abstract>
@@ -7167,7 +7167,7 @@
       <author><first>Chujun</first><last>Wang</last></author>
       <author><first>Siyuan</first><last>Wang</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <author><first>Libo</first><last>Wu</last></author>
       <pages>7093–7098</pages>
       <abstract>Existing research for argument representation learning mainly treats tokens in the sentence equally and ignores the implied structure information of argumentative context. In this paper, we propose to separate tokens into two groups, namely framing tokens and topic ones, to capture structural information of arguments. In addition, we consider high-level structure by incorporating paragraph-level position information. A novel structure-aware argument encoder is proposed for literature discourse analysis. Experimental results on both a self-constructed corpus and a public corpus show the effectiveness of our model. Resources are available at <url>https://github.com/lemuria-wchen/SAE</url>.</abstract>
@@ -7228,12 +7228,12 @@
     <paper id="624">
       <title>Keyphrase Prediction from Video Transcripts: New Dataset and Directions</title>
       <author><first>Amir Pouran Ben</first><last>Veyseh</last></author>
-      <author><first>Quan Hung</first><last>Tran</last></author>
+      <author id="quan-hung-tran"><first>Quan Hung</first><last>Tran</last></author>
       <author><first>Seunghyun</first><last>Yoon</last></author>
-      <author><first>Varun</first><last>Manjunatha</last></author>
+      <author id="varun-manjunatha"><first>Varun</first><last>Manjunatha</last></author>
       <author><first>Hanieh</first><last>Deilamsalehy</last></author>
       <author><first>Rajiv</first><last>Jain</last></author>
-      <author><first>Trung</first><last>Bui</last></author>
+      <author id="trung-bui"><first>Trung</first><last>Bui</last></author>
       <author><first>Walter W.</first><last>Chang</last></author>
       <author><first>Franck</first><last>Dernoncourt</last></author>
       <author><first>Thien Huu</first><last>Nguyen</last></author>
@@ -7304,7 +7304,7 @@
       <author><first>Zhiyong</first><last>Wu</last></author>
       <author><first>Dong</first><last>Xu</last></author>
       <author><first>Weifeng</first><last>Zhao</last></author>
-      <author><first>Helen</first><last>Meng</last></author>
+      <author id="helen-meng"><first>Helen</first><last>Meng</last></author>
       <pages>7193–7202</pages>
       <abstract>Naturalness and expressiveness are crucial for audiobook speech synthesis, but now are limited by the averaged global-scale speaking style representation. In this paper, we propose an unsupervised multi-scale context-sensitive text-to-speech model for audiobooks. A multi-scale hierarchical context encoder is specially designed to predict both global-scale context style embedding and local-scale context style embedding from a wider context of input text in a hierarchical manner. Likewise, a multi-scale reference encoder is introduced to extract reference style embeddings at both global and local scales from the reference speech, which is used to guide the prediction of speaking styles. On top of these, a bi-reference attention mechanism is used to align both local-scale reference style embedding sequence and local-scale context style embedding sequence with corresponding phoneme embedding sequence. Both objective and subjective experiment results on a real-world multi-speaker Mandarin novel audio dataset demonstrate the excellent performance of our proposed method over all baselines in terms of naturalness and expressiveness of the synthesized speech.</abstract>
       <url hash="876ac841">2022.coling-1.630</url>
diff --git a/data/xml/2022.computel.xml b/data/xml/2022.computel.xml
index c1e521f20a..4591221ead 100644
--- a/data/xml/2022.computel.xml
+++ b/data/xml/2022.computel.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2022-05-15" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Fifth Workshop on the Use of Computational Methods in the Study of Endangered Languages</booktitle>
-      <editor><first>Sarah</first><last>Moeller</last></editor>
+      <editor id="sarah-moeller"><first>Sarah</first><last>Moeller</last></editor>
       <editor><first>Antonios</first><last>Anastasopoulos</last></editor>
       <editor><first>Antti</first><last>Arppe</last></editor>
       <editor><first>Aditi</first><last>Chaudhary</last></editor>
@@ -64,7 +64,7 @@
     <paper id="4">
       <title><fixed-case>CLD</fixed-case>² Language Documentation Meets Natural Language Processing for Revitalising Endangered Languages</title>
       <author><first>Roberto</first><last>Zariquiey</last></author>
-      <author><first>Arturo</first><last>Oncevay</last></author>
+      <author id="arturo-oncevay"><first>Arturo</first><last>Oncevay</last></author>
       <author><first>Javier</first><last>Vera</last></author>
       <pages>20-30</pages>
       <abstract>Language revitalisation should not be understood as a direct outcome of language documentation, which is mainly focused on the creation of language repositories. Natural language processing (NLP) offers the potential to complement and exploit these repositories through the development of language technologies that may contribute to improving the vitality status of endangered languages. In this paper, we discuss the current state of the interaction between language documentation and computational linguistics, present a diagnosis of how the outputs of recent documentation projects for endangered languages are underutilised for the NLP community, and discuss how the situation could change from both the documentary linguistics and NLP perspectives. All this is introduced as a bridging paradigm dubbed as Computational Language Documentation and Development (CLD²). CLD² calls for (1) the inclusion of NLP-friendly annotated data as a deliverable of future language documentation projects; and (2) the exploitation of language documentation databases by the NLP community to promote the computerization of endangered languages, as one way to contribute to their revitalization.</abstract>
@@ -75,7 +75,7 @@
     <paper id="5">
       <title>One Wug, Two Wug+s Transformer Inflection Models Hallucinate Affixes</title>
       <author><first>Farhan</first><last>Samir</last></author>
-      <author><first>Miikka</first><last>Silfverberg</last></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last></author>
       <pages>31-40</pages>
       <abstract>Data augmentation strategies are increasingly important in NLP pipelines for low-resourced and endangered languages, and in neural morphological inflection, augmentation by so called data hallucination is a popular technique. This paper presents a detailed analysis of inflection models trained with and without data hallucination for the low-resourced Canadian Indigenous language Gitksan. Our analysis reveals evidence for a concatenative inductive bias in augmented models—in contrast to models trained without hallucination, they strongly prefer affixing inflection patterns over suppletive ones. We find that preference for affixation in general improves inflection performance in “wug test” like settings, where the model is asked to inflect lexemes missing from the training set. However, data hallucination dramatically reduces prediction accuracy for reduplicative forms due to a misanalysis of reduplication as affixation. While the overall impact of data hallucination for unseen lexemes remains positive, our findings call for greater qualitative analysis and more varied evaluation conditions in testing automatic inflection systems. Our results indicate that further innovations in data augmentation for computational morphology are desirable.</abstract>
       <url hash="cd3f32a8">2022.computel-1.5</url>
@@ -93,7 +93,7 @@
       <author><first>Michael</first><last>Higgins</last></author>
       <author><first>Roy</first><last>Barker</last></author>
       <author><first>Jane</first><last>Simpson</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <pages>41-51</pages>
       <abstract>Many archival recordings of speech from endangered languages remain unannotated and inaccessible to community members and language learning programs. One bottleneck is the time-intensive nature of annotation. An even narrower bottleneck occurs for recordings with access constraints, such as language that must be vetted or filtered by authorised community members before annotation can begin. We propose a privacy-preserving workflow to widen both bottlenecks for recordings where speech in the endangered language is intermixed with a more widely-used language such as English for meta-linguistic commentary and questions (e.g.What is the word for ‘tree’?). We integrate voice activity detection (VAD), spoken language identification (SLI), and automatic speech recognition (ASR) to transcribe the metalinguistic content, which an authorised person can quickly scan to triage recordings that can be annotated by people with lower levels of access. We report work-in-progress processing 136 hours archival audio containing a mix of English and Muruwari. Our collaborative work with the Muruwari custodian of the archival materials show that this workflow reduces metalanguage transcription time by 20% even given only minimal amounts of annotated training data, 10 utterances per language for SLI and for ASR at most 39 minutes, and possibly as little as 39 seconds.</abstract>
       <url hash="3030d588">2022.computel-1.6</url>
@@ -109,7 +109,7 @@
       <author><first>David</first><last>Huggins-Daines</last></author>
       <author><first>Christopher</first><last>Cox</last></author>
       <author><first>Fineen</first><last>Davis</last></author>
-      <author><first>Eddie</first><last>Antonio Santos</last></author>
+      <author id="eddie-antonio-santos"><first>Eddie</first><last>Antonio Santos</last></author>
       <author><first>Shankhalika</first><last>Srikanth</last></author>
       <author><first>Delasie</first><last>Torkornoo</last></author>
       <author><first>Sabrina</first><last>Yu</last></author>
@@ -139,7 +139,7 @@
       <author><first>Nedelina</first><last>Ivanova</last></author>
       <author><first>Christèle</first><last>Maizonniaux</last></author>
       <author><first>Neasa</first><last>Ní Chiaráin</last></author>
-      <author><first>Manny</first><last>Rayner</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
       <author><first>John</first><last>Sloan</last></author>
       <author><first>Ghil’ad</first><last>Zuckermann</last></author>
       <pages>68-77</pages>
@@ -235,7 +235,7 @@
     <paper id="17">
       <title>Faoi Gheasa an adaptive game for <fixed-case>I</fixed-case>rish language learning</title>
       <author><first>Liang</first><last>Xu</last></author>
-      <author><first>Elaine</first><last>Uí Dhonnchadha</last></author>
+      <author id="elaine-ui-dhonnchadha"><first>Elaine</first><last>Uí Dhonnchadha</last></author>
       <author><first>Monica</first><last>Ward</last></author>
       <pages>133-138</pages>
       <abstract>In this paper, we present a game with a purpose (GWAP) (Von Ahn 2006). The aim of the game is to promote language learning and ‘noticing’ (Skehan, 2013). The game has been designed for Irish, but the framework could be used for other languages. Irish is a minority language which means that L2 learners have limited opportunities for exposure to the language, and additionally, there are also limited (digital) learning resources available. This research incorporates game development, language pedagogy and ICALL language materials development. This paper will focus on the language materials development as this is a bottleneck in the teaching and learning of minority and endangered languages.</abstract>
@@ -312,7 +312,7 @@
       <author><first>Nikolaos</first><last>Constantinides</last></author>
       <author><first>Nikolaos</first><last>Kokkas</last></author>
       <author><first>George</first><last>Pavlidis</last></author>
-      <author><first>Stella</first><last>Markantonatou</last></author>
+      <author id="stella-markantonatou"><first>Stella</first><last>Markantonatou</last></author>
       <pages>179-186</pages>
       <abstract>The project XXXX is developing a platform to enable researchers of living languages to easily create and make available state-of-the-art spoken and textual annotated resources. As a case study we use Greek and Pomak, the latter being an endangered oral Slavic language of the Balkans (including Thrace/Greece). The linguistic documentation of Pomak is an ongoing work by an interdisciplinary team in close cooperation with the Pomak community of Greece. We describe our experience in the development of a Latin-based orthography and morphologically annotated text corpora of Pomak with state-of-the-art NLP technology. These resources will be made openly available on the XXXX site and the gold annotated corpora of Pomak will be made available on the Universal Dependencies treebank repository.</abstract>
       <url hash="d0591ac5">2022.computel-1.22</url>
@@ -324,7 +324,7 @@
       <title>Enhancing Documentation of <fixed-case>H</fixed-case>upa with Automatic Speech Recognition</title>
       <author><first>Zoey</first><last>Liu</last></author>
       <author><first>Justin</first><last>Spence</last></author>
-      <author><first>Emily</first><last>Prud’hommeaux</last></author>
+      <author id="emily-prudhommeaux"><first>Emily</first><last>Prud’hommeaux</last></author>
       <pages>187-192</pages>
       <abstract>This study investigates applications of automatic speech recognition (ASR) techniques to Hupa, a critically endangered Native American language from the Dene (Athabaskan) language family. Using around 9h12m of spoken data produced by one elder who is a first-language Hupa speaker, we experimented with different evaluation schemes and training settings. On average a fully connected deep neural network reached a word error rate of 35.26%. Our overall results illustrate the utility of ASR for making Hupa language documentation more accessible and usable. In addition, we found that when training acoustic models, using recordings with transcripts that were not carefully verified did not necessarily have a negative effect on model performance. This shows promise for speech corpora of indigenous languages that commonly include transcriptions produced by second-language speakers or linguists who have advanced knowledge in the language of interest.</abstract>
       <url hash="a25cfed9">2022.computel-1.23</url>
diff --git a/data/xml/2022.conll.xml b/data/xml/2022.conll.xml
index 16681ff096..f52528e358 100644
--- a/data/xml/2022.conll.xml
+++ b/data/xml/2022.conll.xml
@@ -44,7 +44,7 @@
       <author><first>Michael</first><last>Yoder</last></author>
       <author><first>Lynnette</first><last>Ng</last></author>
       <author><first>David West</first><last>Brown</last></author>
-      <author><first>Kathleen</first><last>Carley</last></author>
+      <author id="kathleen-m-carley"><first>Kathleen</first><last>Carley</last></author>
       <pages>27-39</pages>
       <abstract>This paper investigates how hate speech varies in systematic ways according to the identities it targets. Across multiple hate speech datasets annotated for targeted identities, we find that classifiers trained on hate speech targeting specific identity groups struggle to generalize to other targeted identities. This provides empirical evidence for differences in hate speech by target identity; we then investigate which patterns structure this variation. We find that the targeted demographic category (e.g. gender/sexuality or race/ethnicity) appears to have a greater effect on the language of hate speech than does the relative social power of the targeted identity group. We also find that words associated with hate speech targeting specific identities often relate to stereotypes, histories of oppression, current social movements, and other social contexts specific to identities. These experiments suggest the importance of considering targeted identity, as well as the social contexts associated with these identities, in automated hate speech classification</abstract>
       <url hash="9eddbf11">2022.conll-1.3</url>
@@ -118,9 +118,9 @@
     <paper id="9">
       <title>Combining Noisy Semantic Signals with Orthographic Cues: Cognate Induction for the <fixed-case>I</fixed-case>ndic Dialect Continuum</title>
       <author><first>Niyati</first><last>Bafna</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <author><first>Cristina</first><last>España-Bonet</last></author>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
       <pages>110-131</pages>
       <abstract>We present a novel method for unsupervised cognate/borrowing identification from monolingual corpora designed for low and extremely low resource scenarios, based on combining noisy semantic signals from joint bilingual spaces with orthographic cues modelling sound change. We apply our method to the North Indian dialect continuum, containing several dozens of dialects and languages spoken by more than 100 million people. Many of these languages are zero-resource and therefore natural language processing for them is non-existent. We first collect monolingual data for 26 Indic languages, 16 of which were previously zero-resource, and perform exploratory character, lexical and subword cross-lingual alignment experiments for the first time at this scale on this dialect continuum. We create bilingual evaluation lexicons against Hindi for 20 of the languages. We then apply our cognate identification method on the data, and show that our method outperforms both traditional orthography baselines as well as EM-style learnt edit distance matrices. To the best of our knowledge, this is the first work to combine traditional orthographic cues with noisy bilingual embeddings to tackle unsupervised cognate detection in a (truly) low-resource setup, showing that even noisy bilingual embeddings can act as good guides for this task. We release our multilingual dialect corpus, called HinDialect, as well as our scripts for evaluation data collection and cognate induction.</abstract>
       <url hash="919e14d0">2022.conll-1.9</url>
@@ -131,7 +131,7 @@
       <title>Detecting Unintended Social Bias in Toxic Language Datasets</title>
       <author><first>Nihar</first><last>Sahoo</last></author>
       <author><first>Himanshu</first><last>Gupta</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>132-143</pages>
       <abstract>With the rise of online hate speech, automatic detection of Hate Speech, Offensive texts as a natural language processing task is getting popular. However, very little research has been done to detect unintended social bias from these toxic language datasets. This paper introduces a new dataset ToxicBias curated from the existing dataset of Kaggle competition named “Jigsaw Unintended Bias in Toxicity Classification”. We aim to detect social biases, their categories, and targeted groups. The dataset contains instances annotated for five different bias categories, viz., gender, race/ethnicity, religion, political, and LGBTQ. We train transformer-based models using our curated datasets and report baseline performance for bias identification, target generation, and bias implications. Model biases and their mitigation are also discussed in detail. Our study motivates a systematic extraction of social bias data from toxic language datasets.</abstract>
       <url hash="6075330c">2022.conll-1.10</url>
@@ -199,7 +199,7 @@
     <paper id="16">
       <title>Leveraging a New <fixed-case>S</fixed-case>panish Corpus for Multilingual and Cross-lingual Metaphor Detection</title>
       <author><first>Elisa</first><last>Sanchez-Bayona</last></author>
-      <author><first>Rodrigo</first><last>Agerri</last></author>
+      <author id="rodrigo-agerri"><first>Rodrigo</first><last>Agerri</last></author>
       <pages>228-240</pages>
       <abstract>The lack of wide coverage datasets annotated with everyday metaphorical expressions for languages other than English is striking. This means that most research on supervised metaphor detection has been published only for that language. In order to address this issue, this work presents the first corpus annotated with naturally occurring metaphors in Spanish large enough to develop systems to perform metaphor detection. The presented dataset, CoMeta, includes texts from various domains, namely, news, political discourse, Wikipedia and reviews. In order to label CoMeta, we apply the MIPVU method, the guidelines most commonly used to systematically annotate metaphor on real data. We use our newly created dataset to provide competitive baselines by fine-tuning several multilingual and monolingual state-of-the-art large language models. Furthermore, by leveraging the existing VUAM English data in addition to CoMeta, we present the, to the best of our knowledge, first cross-lingual experiments on supervised metaphor detection. Finally, we perform a detailed error analysis that explores the seemingly high transfer of everyday metaphor across these two languages and datasets.</abstract>
       <url hash="4634a364">2022.conll-1.16</url>
@@ -221,12 +221,12 @@
     </paper>
     <paper id="18">
       <title>On Language Spaces, Scales and Cross-Lingual Transfer of <fixed-case>UD</fixed-case> Parsers</title>
-      <author><first>Tanja</first><last>Samardžić</last></author>
+      <author id="tanja-samardzic"><first>Tanja</first><last>Samardžić</last></author>
       <author><first>Ximena</first><last>Gutierrez-Vasques</last></author>
       <author><first>Rob</first><last>van der Goot</last></author>
       <author><first>Max</first><last>Müller-Eberstein</last></author>
       <author><first>Olga</first><last>Pelloni</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>266-281</pages>
       <abstract>Cross-lingual transfer of parsing models has been shown to work well for several closely-related languages, but predicting the success in other cases remains hard. Our study is a comprehensive analysis of the impact of linguistic distance on the transfer of UD parsers. As an alternative to syntactic typological distances extracted from URIEL, we propose three text-based feature spaces and show that they can be more precise predictors, especially on a more local scale, when only shorter distances are taken into account. Our analyses also reveal that the good coverage in typological databases is not among the factors that explain good transfer.</abstract>
       <url hash="87468549">2022.conll-1.18</url>
@@ -253,7 +253,7 @@
     <paper id="20">
       <title>Syntactic Surprisal From Neural Models Predicts, But Underestimates, Human Processing Difficulty From Syntactic Ambiguities</title>
       <author><first>Suhas</first><last>Arehalli</last></author>
-      <author><first>Brian</first><last>Dillon</last></author>
+      <author id="brian-w-dillon"><first>Brian</first><last>Dillon</last></author>
       <author><first>Tal</first><last>Linzen</last></author>
       <pages>301-313</pages>
       <abstract>Humans exhibit garden path effects: When reading sentences that are temporarily structurally ambiguous, they slow down when the structure is disambiguated in favor of the less preferred alternative. Surprisal theory (Hale, 2001; Levy, 2008), a prominent explanation of this finding, proposes that these slowdowns are due to the unpredictability of each of the words that occur in these sentences. Challenging this hypothesis, van Schijndel and Linzen (2021) find that estimates of the cost of word predictability derived from language models severely underestimate the magnitude of human garden path effects. In this work, we consider whether this underestimation is due to the fact that humans weight syntactic factors in their predictions more highly than language models do. We propose a method for estimating syntactic predictability from a language model, allowing us to weigh the cost of lexical and syntactic predictability independently. We find that treating syntactic predictability independently from lexical predictability indeed results in larger estimates of garden path. At the same time, even when syntactic predictability is independently weighted, surprisal still greatly underestimate the magnitude of human garden path effects. Our results support the hypothesis that predictability is not the only factor responsible for the processing cost associated with garden path sentences.</abstract>
@@ -277,7 +277,7 @@
       <title>Optimizing text representations to capture (dis)similarity between political parties</title>
       <author><first>Tanise</first><last>Ceron</last></author>
       <author><first>Nico</first><last>Blokker</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <pages>325-338</pages>
       <abstract>Even though fine-tuned neural language models have been pivotal in enabling “deep” automatic text analysis, optimizing text representations for specific applications remains a crucial bottleneck. In this study, we look at this problem in the context of a task from computational social science, namely modeling pairwise similarities between political parties. Our research question is what level of structural information is necessary to create robust text representation, contrasting a strongly informed approach (which uses both claim span and claim category annotations) with approaches that forgo one or both types of annotation with document structure-based heuristics. Evaluating our models on the manifestos of German parties for the 2021 federal election. We find that heuristics that maximize within-party over between-party similarity along with a normalization step lead to reliable party similarity prediction, without the need for manual annotation.</abstract>
       <url hash="43ef9b56">2022.conll-1.22</url>
diff --git a/data/xml/2022.constraint.xml b/data/xml/2022.constraint.xml
index 16286d84aa..22d63d1b93 100644
--- a/data/xml/2022.constraint.xml
+++ b/data/xml/2022.constraint.xml
@@ -8,7 +8,7 @@
       <editor><first>Kai</first><last>Shu</last></editor>
       <editor><first>H. Russell</first><last>Bernard</last></editor>
       <editor><first>Maria</first><last>Liakata</last></editor>
-      <editor><first>Preslav</first><last>Nakov</last></editor>
+      <editor id="preslav-nakov"><first>Preslav</first><last>Nakov</last></editor>
       <editor><first>Aseem</first><last>Srivastava</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Dublin, Ireland</address>
@@ -106,7 +106,7 @@
       <author><first>Syrielle</first><last>Montariol</last></author>
       <author><first>Étienne</first><last>Simon</last></author>
       <author><first>Arij</first><last>Riabi</last></author>
-      <author><first>Djamé</first><last>Seddah</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
       <pages>55-65</pages>
       <abstract>We propose our solution to the multimodal semantic role labeling task from the CONSTRAINT’22 workshop. The task aims at classifying entities in memes into classes such as “hero” and “villain”. We use several pre-trained multi-modal models to jointly encode the text and image of the memes, and implement three systems to classify the role of the entities. We propose dynamic sampling strategies to tackle the issue of class imbalance. Finally, we perform qualitative analysis on the representations of the entities.</abstract>
       <url hash="bbb7091a">2022.constraint-1.7</url>
diff --git a/data/xml/2022.crac.xml b/data/xml/2022.crac.xml
index 1cc490a570..1a4b3be14b 100644
--- a/data/xml/2022.crac.xml
+++ b/data/xml/2022.crac.xml
@@ -4,10 +4,10 @@
     <meta>
       <booktitle>Proceedings of the Fifth Workshop on Computational Models of Reference, Anaphora and Coreference</booktitle>
       <editor><first>Maciej</first><last>Ogrodniczuk</last></editor>
-      <editor><first>Sameer</first><last>Pradhan</last></editor>
+      <editor id="sameer-pradhan"><first>Sameer</first><last>Pradhan</last></editor>
       <editor><first>Anna</first><last>Nedoluzhko</last></editor>
       <editor><first>Vincent</first><last>Ng</last></editor>
-      <editor><first>Massimo</first><last>Poesio</last></editor>
+      <editor id="massimo-poesio"><first>Massimo</first><last>Poesio</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Gyeongju, Republic of Korea</address>
       <month>October</month>
@@ -23,7 +23,7 @@
       <title>Quantifying Discourse Support for Omitted Pronouns</title>
       <author><first>Shulin</first><last>Zhang</last></author>
       <author><first>Jixing</first><last>Li</last></author>
-      <author><first>John</first><last>Hale</last></author>
+      <author id="john-hale"><first>John</first><last>Hale</last></author>
       <pages>1–12</pages>
       <abstract>Pro-drop is commonly seen in many languages, but its discourse motivations have not been well characterized. Inspired by the topic chain theory in Chinese, this study shows how character-verb usage continuity distinguishes dropped pronouns from overt references to story characters. We model the choice to drop vs. not drop as a function of character-verb continuity. The results show that omitted subjects have higher character history-current verb continuity salience than non-omitted subjects. This is consistent with the idea that discourse coherence with a particular topic, such as a story character, indeed facilitates the omission of pronouns in languages and contexts where they are optional.</abstract>
       <url hash="0de42ba0">2022.crac-1.1</url>
@@ -76,7 +76,7 @@
       <author><first>Egil</first><last>Rønningstad</last></author>
       <author><first>Per Erik</first><last>Solberg</last></author>
       <author><first>Erik</first><last>Velldal</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <pages>48–60</pages>
       <abstract>We present the Norwegian Anaphora Resolution Corpus (NARC), the first publicly available corpus annotated with anaphoric relations between noun phrases for Norwegian. The paper describes the annotated data for 326 documents in Norwegian Bokmål, together with inter-annotator agreement and discussions of relevant statistics. We also present preliminary modelling results which are comparable to existing corpora for other languages, and discuss relevant problems in relation to both modelling and the annotations themselves.</abstract>
       <url hash="95f09c2d">2022.crac-1.6</url>
@@ -85,7 +85,7 @@
     <paper id="7">
       <title>Evaluating Coreference Resolvers on Community-based Question Answering: From Rule-based to State of the Art</title>
       <author><first>Haixia</first><last>Chai</last></author>
-      <author><first>Nafise Sadat</first><last>Moosavi</last></author>
+      <author id="nafise-sadat-moosavi"><first>Nafise Sadat</first><last>Moosavi</last></author>
       <author><first>Iryna</first><last>Gurevych</last></author>
       <author><first>Michael</first><last>Strube</last></author>
       <pages>61–73</pages>
@@ -105,8 +105,8 @@
     <paper id="9">
       <title>Investigating Cross-Document Event Coreference for <fixed-case>D</fixed-case>utch</title>
       <author><first>Loic</first><last>De Langhe</last></author>
-      <author><first>Orphee</first><last>De Clercq</last></author>
-      <author><first>Veronique</first><last>Hoste</last></author>
+      <author id="orphee-de-clercq"><first>Orphee</first><last>De Clercq</last></author>
+      <author id="veronique-hoste"><first>Veronique</first><last>Hoste</last></author>
       <pages>88–98</pages>
       <abstract>In this paper we present baseline results for Event Coreference Resolution (ECR) in Dutch using gold-standard (i.e non-predicted) event mentions. A newly developed benchmark dataset allows us to properly investigate the possibility of creating ECR systems for both within and cross-document coreference. We give an overview of the state of the art for ECR in other languages, as well as a detailed overview of existing ECR resources. Afterwards, we provide a comparative report on our own dataset. We apply a significant number of approaches that have been shown to attain good results for English ECR including feature-based models, monolingual transformer language models and multilingual language models. The best results were obtained using the monolingual BERTje model. Finally, results for all models are thoroughly analysed and visualised, as to provide insight into the inner workings of ECR and long-distance semantic NLP tasks in general.</abstract>
       <url hash="cc0c215f">2022.crac-1.9</url>
@@ -125,7 +125,7 @@
   <volume id="mcr" ingest-date="2022-10-06" type="proceedings">
     <meta>
       <booktitle>Proceedings of the CRAC 2022 Shared Task on Multilingual Coreference Resolution</booktitle>
-      <editor><first>Zdeněk</first><last>Žabokrtský</last></editor>
+      <editor id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></editor>
       <editor><first>Maciej</first><last>Ogrodniczuk</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Gyeongju, Republic of Korea</address>
@@ -148,7 +148,7 @@
       <author><first>Martin</first><last>Popel</last></author>
       <author><first>Ondřej</first><last>Pražák</last></author>
       <author><first>Jakub</first><last>Sido</last></author>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <author><first>Yilun</first><last>Zhu</last></author>
       <pages>1–17</pages>
       <abstract>This paper presents an overview of the shared task on multilingual coreference resolution associated with the CRAC 2022 workshop. Shared task participants were supposed to develop trainable systems capable of identifying mentions and clustering them according to identity coreference. The public edition of CorefUD 1.0, which contains 13 datasets for 10 languages, was used as the source of training and evaluation data. The CoNLL score used in previous coreference-oriented shared tasks was used as the main evaluation metric. There were 8 coreference prediction systems submitted by 5 participating teams; in addition, there was a competitive Transformer-based baseline system provided by the organizers at the beginning of the shared task. The winner system outperformed the baseline by 12 percentage points (in terms of the CoNLL scores averaged across all datasets for individual languages).</abstract>
diff --git a/data/xml/2022.creativesumm.xml b/data/xml/2022.creativesumm.xml
index 0cd1a9652f..37e6181a2e 100644
--- a/data/xml/2022.creativesumm.xml
+++ b/data/xml/2022.creativesumm.xml
@@ -27,7 +27,7 @@
     </paper>
     <paper id="2">
       <title>Summarization of Long Input Texts Using Multi-Layer Neural Network</title>
-      <author><first>Niladri</first><last>Chatterjee</last></author>
+      <author id="niladri-chatterjee"><first>Niladri</first><last>Chatterjee</last></author>
       <author><first>Aadyant</first><last>Khatri</last></author>
       <author><first>Raksha</first><last>Agarwal</last></author>
       <pages>13–18</pages>
@@ -112,13 +112,13 @@
     <paper id="10">
       <title><fixed-case>CREATIVESUMM</fixed-case>: Shared Task on Automatic Summarization for Creative Writing</title>
       <author><first>Divyansh</first><last>Agarwal</last></author>
-      <author><first>Alexander R.</first><last>Fabbri</last></author>
+      <author id="alexander-richard-fabbri"><first>Alexander R.</first><last>Fabbri</last></author>
       <author><first>Simeng</first><last>Han</last></author>
       <author><first>Wojciech</first><last>Kryscinski</last></author>
       <author><first>Faisal</first><last>Ladhak</last></author>
       <author><first>Bryan</first><last>Li</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <author><first>Tianyi</first><last>Zhang</last></author>
       <author><first>Sam</first><last>Wiseman</last></author>
       <pages>67–73</pages>
diff --git a/data/xml/2022.csrnlp.xml b/data/xml/2022.csrnlp.xml
index 4619ff03f3..9a04985a2d 100644
--- a/data/xml/2022.csrnlp.xml
+++ b/data/xml/2022.csrnlp.xml
@@ -23,7 +23,7 @@
       <author><first>Francesco Paolo</first><last>Lagrasta</last></author>
       <author><first>Sergio</first><last>Caputo</last></author>
       <author><first>Pierpaolo</first><last>Pontrandolfo</last></author>
-      <author><first>Giovanni</first><last>Semeraro</last></author>
+      <author id="giovanni-semeraro"><first>Giovanni</first><last>Semeraro</last></author>
       <pages>1–8</pages>
       <abstract>Sustainability reporting has become an annual requirement in many countries and for certain types of companies. Sustainability reports inform stakeholders about companies’ commitment to sustainable development and their economic, social, and environmental sustainability practices. However, the fact that norms and standards allow a certain discretion to be adopted by drafting organizations makes such reports hardly comparable in terms of layout, disclosures, key performance indicators (KPIs), and so on. In this work, we present a system based on natural language processing and information extraction techniques to retrieve relevant information from sustainability reports, compliant with the Global Reporting Initiative Standards, written in Italian and English language. Specifically, the system is able to identify references to the various sustainability topics discussed by the reports: on which page of the document those references have been found, the context of each reference, and if it is mentioned positively or negatively. The output of the system has been then evaluated against a ground truth obtained through a manual annotation process on 134 reports. Experimental outcomes highlight the affordability of the approach for improving sustainability disclosures, accessibility, and transparency, thus empowering stakeholders to conduct further analysis and considerations.</abstract>
       <url hash="275ce7f8">2022.csrnlp-1.1</url>
@@ -98,13 +98,13 @@
       <author><first>Tapan</first><last>Auti</last></author>
       <author><first>Rajdeep</first><last>Sarkar</last></author>
       <author><first>Bernardo</first><last>Stearns</last></author>
-      <author><first>Atul Kr.</first><last>Ojha</last></author>
+      <author id="atul-kr-ojha"><first>Atul Kr.</first><last>Ojha</last></author>
       <author><first>Arindam</first><last>Paul</last></author>
       <author><first>Michaela</first><last>Comerford</last></author>
       <author><first>Jay</first><last>Megaro</last></author>
       <author><first>John</first><last>Mariano</last></author>
       <author><first>Vall</first><last>Herard</last></author>
-      <author><first>John P.</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></author>
       <pages>52–57</pages>
       <abstract>Pharmaceutical text classification is an important area of research for commercial and research institutions working in the pharmaceutical domain. Addressing this task is challenging due to the need of expert verified labelled data which can be expensive and time consuming to obtain. Towards this end, we leverage predictive coding methods for the task as they have been shown to generalise well for sentence classification. Specifically, we utilise GAN-BERT architecture to classify pharmaceutical texts. To capture the domain specificity, we propose to utilise the BioBERT model as our BERT model in the GAN-BERT framework. We conduct extensive evaluation to show the efficacy of our approach over baselines on multiple metrics.</abstract>
       <url hash="9d86cb4e">2022.csrnlp-1.8</url>
diff --git a/data/xml/2022.csrr.xml b/data/xml/2022.csrr.xml
index 0139c32bdf..7741a46b1b 100644
--- a/data/xml/2022.csrr.xml
+++ b/data/xml/2022.csrr.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the First Workshop on Commonsense Representation and Reasoning (CSRR 2022)</booktitle>
       <editor><first>Antoine</first><last>Bosselut</last></editor>
       <editor><first>Xiang</first><last>Li</last></editor>
-      <editor><first>Bill Yuchen</first><last>Lin</last></editor>
+      <editor id="bill-yuchen-lin"><first>Bill Yuchen</first><last>Lin</last></editor>
       <editor><first>Vered</first><last>Shwartz</last></editor>
       <editor><first>Bodhisattwa Prasad</first><last>Majumder</last></editor>
       <editor><first>Yash Kumar</first><last>Lal</last></editor>
@@ -38,7 +38,7 @@
     <paper id="2">
       <title>Cloze Evaluation for Deeper Understanding of Commonsense Stories in <fixed-case>I</fixed-case>ndonesian</title>
       <author><first>Fajri</first><last>Koto</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Jey Han</first><last>Lau</last></author>
       <pages>8-16</pages>
       <abstract>Story comprehension that involves complex causal and temporal relations is a critical task in NLP, but previous studies have focused predominantly on English, leaving open the question of how the findings generalize to other languages, such as Indonesian. In this paper, we follow the Story Cloze Test framework of Mostafazadeh et al. (2016) in evaluating story understanding in Indonesian, by constructing a four-sentence story with one correct ending and one incorrect ending. To investigate commonsense knowledge acquisition in language models, we experimented with: (1) a classification task to predict the correct ending; and (2) a generation task to complete the story with a single sentence. We investigate these tasks in two settings: (i) monolingual training and ii) zero-shot cross-lingual transfer between Indonesian and English.</abstract>
@@ -63,7 +63,7 @@
       <author><first>Yueen</first><last>Ma</last></author>
       <author><first>Haoxuan</first><last>You</last></author>
       <author><first>Zhecan</first><last>Wang</last></author>
-      <author><first>Shih-Fu</first><last>Chang</last></author>
+      <author id="shih-fu-chang"><first>Shih-Fu</first><last>Chang</last></author>
       <pages>23-35</pages>
       <abstract>Large-scale visual-linguistic pre-training aims to capture the generic representations from multimodal features, which are essential for downstream vision-language tasks. Existing methods mostly focus on learning the semantic connections between visual objects and linguistic content, which tend to be recognitionlevel information and may not be sufficient for commonsensical reasoning tasks like VCR. In this paper, we propose a novel commonsensical vision-language pre-training framework to bridge the gap. We first augment the conventional image-caption pre-training datasets with commonsense inferences from a visuallinguistic GPT-2. To pre-train models on image, caption and commonsense inferences together, we propose two new tasks: masked commonsense modeling (MCM) and commonsense type prediction (CTP). To reduce the shortcut effect between captions and commonsense inferences, we further introduce the domain-wise adaptive masking that dynamically adjusts the masking ratio. Experimental results on downstream tasks, VCR and VQA, show the improvement of our pre-training strategy over previous methods. Human evaluation also validates the relevance, informativeness, and diversity of the generated commonsense inferences. Overall, we demonstrate the potential of incorporating commonsense knowledge into the conventional recognition-level visual-linguistic pre-training.</abstract>
       <url hash="342c8f2d">2022.csrr-1.4</url>
@@ -86,7 +86,7 @@
       <title>Knowledge-Augmented Language Models for Cause-Effect Relation Classification</title>
       <author><first>Pedram</first><last>Hosseini</last></author>
       <author><first>David A.</first><last>Broniatowski</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>43-48</pages>
       <abstract>Previous studies have shown the efficacy of knowledge augmentation methods in pretrained language models. However, these methods behave differently across domains and downstream tasks. In this work, we investigate the augmentation of pretrained language models with knowledge graph data in the cause-effect relation classification and commonsense causal reasoning tasks. After automatically verbalizing triples in ATOMIC2020, a wide coverage commonsense reasoning knowledge graph, we continually pretrain BERT and evaluate the resulting model on cause-effect pair classification and answering commonsense causal reasoning questions. Our results show that a continually pretrained language model augmented with commonsense reasoning knowledge outperforms our baselines on two commonsense causal reasoning benchmarks, COPA and BCOPA-CE, and a Temporal and Causal Reasoning (TCR) dataset, without additional improvement in model architecture or using quality-enhanced data for fine-tuning.</abstract>
       <url hash="2ea72cde">2022.csrr-1.6</url>
diff --git a/data/xml/2022.dadc.xml b/data/xml/2022.dadc.xml
index a796573f54..73c296e4a1 100644
--- a/data/xml/2022.dadc.xml
+++ b/data/xml/2022.dadc.xml
@@ -101,7 +101,7 @@
       <title>Generalized Quantifiers as a Source of Error in Multilingual <fixed-case>NLU</fixed-case> Benchmarks</title>
       <author><first>Ruixiang</first><last>Cui</last></author>
       <author><first>Daniel</first><last>Hershcovich</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>61-61</pages>
       <abstract>Logical approaches to representing language have developed and evaluated computational models of quantifier words since the 19th century, but today’s NLU models still struggle to capture their semantics. We rely on Generalized Quantifier Theory for language-independent representations of the semantics of quantifier words, to quantify their contribution to the errors of NLU models. We find that quantifiers are pervasive in NLU benchmarks, and their occurrence at test time is associated with performance drops. Multilingual models also exhibit unsatisfying quantifier reasoning abilities, but not necessarily worse for non-English languages. To facilitate directly-targeted probing, we present an adversarial generalized quantifier NLI task (GQNLI) and show that pre-trained language models have a clear lack of robustness in generalized quantifier reasoning.</abstract>
       <url hash="e516bbdc">2022.dadc-1.7</url>
@@ -114,7 +114,7 @@
       <author><first>Jason</first><last>Phang</last></author>
       <author><first>Angelica</first><last>Chen</last></author>
       <author><first>William</first><last>Huang</last></author>
-      <author><first>Samuel R.</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel R.</first><last>Bowman</last></author>
       <pages>62-62</pages>
       <abstract>Large language models increasingly saturate existing task benchmarks, in some cases outperforming humans, leaving little headroom with which to measure further progress. Adversarial dataset creation, which builds datasets using examples that a target system outputs incorrect predictions for, has been proposed as a strategy to construct more challenging datasets, avoiding the more serious challenge of building more precise benchmarks by conventional means. In this work, we study the impact of applying three common approaches for adversarial dataset creation: (1) filtering out easy examples (AFLite), (2) perturbing examples (TextFooler), and (3) model-in-the-loop data collection (ANLI and AdversarialQA), across 18 different adversary models. We find that all three methods can produce more challenging datasets, with stronger adversary models lowering the performance of evaluated models more. However, the resulting ranking of the evaluated models can also be unstable and highly sensitive to the choice of adversary model. Moreover, we find that AFLite oversamples examples with low annotator agreement, meaning that model comparisons hinge on the examples that are most contentious for humans. We recommend that researchers tread carefully when using adversarial methods for building evaluation datasets.</abstract>
       <url hash="8ab06218">2022.dadc-1.8</url>
diff --git a/data/xml/2022.dash.xml b/data/xml/2022.dash.xml
index 7baba477e0..06615e83d0 100644
--- a/data/xml/2022.dash.xml
+++ b/data/xml/2022.dash.xml
@@ -21,7 +21,7 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>MEGA</fixed-case>nno: Exploratory Labeling for <fixed-case>NLP</fixed-case> in Computational Notebooks</title>
-      <author id="dan-zhang"><first>Dan</first><last>Zhang</last><affiliation>Megagon Labs</affiliation></author>
+      <author><first>Dan</first><last>Zhang</last><affiliation>Megagon Labs</affiliation></author>
       <author><first>Hannah</first><last>Kim</last><affiliation>Megagon Labs</affiliation></author>
       <author><first>Rafael</first><last>Li Chen</last><affiliation>Megagon Labs</affiliation></author>
       <author><first>Eser</first><last>Kandogan</last><affiliation>Megagon Labs</affiliation></author>
@@ -85,7 +85,7 @@
     <paper id="6">
       <title>A Gamified Approach to Frame Semantic Role Labeling</title>
       <author><first>Emily</first><last>Amspoker</last><affiliation>Carnegie Mellon University</affiliation></author>
-      <author><first>Miriam R L</first><last>Petruck</last><affiliation>International Computer Science Institute</affiliation></author>
+      <author id="miriam-r-l-petruck"><first>Miriam R L</first><last>Petruck</last><affiliation>International Computer Science Institute</affiliation></author>
       <pages>37-42</pages>
       <abstract>Much research has investigated the possibility of creating games with a purpose (GWAPs), i.e., online games whose purpose is gathering information to address the insufficient amount of data for training and testing of large language models (Von Ahn and Dabbish, 2008). Based on such work, this paper reports on the development of a game for frame semantic role labeling, where players have fun while using semantic frames as prompts for short story writing. This game will generate more annotations for FrameNet and original content for annotation, supporting FrameNet’s goal of characterizing the English language in terms of Frame Semantics.</abstract>
       <url hash="9fd20115">2022.dash-1.6</url>
@@ -121,7 +121,7 @@
     </paper>
     <paper id="9">
       <title>Partially Humanizing Weak Supervision: Towards a Better Low Resource Pipeline for Spoken Language Understanding</title>
-      <author><first>Ayush</first><last>Kumar</last><affiliation>Observe.AI</affiliation></author>
+      <author id="ayush-kumar"><first>Ayush</first><last>Kumar</last><affiliation>Observe.AI</affiliation></author>
       <author><first>Rishabh</first><last>Tripathi</last><affiliation>Observe.AI</affiliation></author>
       <author><first>Jithendra</first><last>Vepa</last><affiliation>Observe AI</affiliation></author>
       <pages>64-73</pages>
@@ -172,9 +172,9 @@
     </paper>
     <paper id="13">
       <title>Interactively Uncovering Latent Arguments in Social Media Platforms: A Case Study on the Covid-19 Vaccine Debate</title>
-      <author><first>Maria Leonor</first><last>Pacheco</last><affiliation>University of Colorado Boulder / Microsoft Research</affiliation></author>
+      <author id="maria-leonor-pacheco"><first>Maria Leonor</first><last>Pacheco</last><affiliation>University of Colorado Boulder / Microsoft Research</affiliation></author>
       <author><first>Tunazzina</first><last>Islam</last><affiliation>Purdue University</affiliation></author>
-      <author><first>Lyle</first><last>Ungar</last><affiliation>University of Pennsylvania</affiliation></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last><affiliation>University of Pennsylvania</affiliation></author>
       <author><first>Ming</first><last>Yin</last><affiliation>Purdue University</affiliation></author>
       <author><first>Dan</first><last>Goldwasser</last><affiliation>Purdue University</affiliation></author>
       <pages>94-111</pages>
diff --git a/data/xml/2022.dclrl.xml b/data/xml/2022.dclrl.xml
index 180e288ce3..e629d109ab 100644
--- a/data/xml/2022.dclrl.xml
+++ b/data/xml/2022.dclrl.xml
@@ -20,7 +20,7 @@
       <title><fixed-case>S</fixed-case>ynt<fixed-case>A</fixed-case>ct: A Synthesized Database of Basic Emotions</title>
       <author><first>Felix</first><last>Burkhardt</last></author>
       <author><first>Florian</first><last>Eyben</last></author>
-      <author><first>Björn</first><last>Schuller</last></author>
+      <author id="bjorn-schuller"><first>Björn</first><last>Schuller</last></author>
       <pages>1–9</pages>
       <abstract>Speech emotion recognition is in the focus of research since several decades and has many applications. One problem is sparse data for supervised learning. One way to tackle this problem is the synthesis of data with emotion simulating speech synthesis approaches. We present a synthesized database of five basic emotions and neutral expression based on rule based manipulation for a diphone synthesizer which we release to the public. The database has been validated in several machine learning experiments as a training set to detect emotional expression from natural speech data. The scripts to generate such a database have been made open source and could be used to aid speech emotion recognition for a low resourced language, as MBROLA supports 35 languages</abstract>
       <url hash="2578f423">2022.dclrl-1.1</url>
diff --git a/data/xml/2022.deelio.xml b/data/xml/2022.deelio.xml
index 483aceba28..d27215476a 100644
--- a/data/xml/2022.deelio.xml
+++ b/data/xml/2022.deelio.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2022-05-15" type="proceedings">
     <meta>
       <booktitle>Proceedings of Deep Learning Inside Out (DeeLIO 2022): The 3rd Workshop on Knowledge Extraction and Integration for Deep Learning Architectures</booktitle>
-      <editor><first>Eneko</first><last>Agirre</last></editor>
+      <editor id="eneko-agirre"><first>Eneko</first><last>Agirre</last></editor>
       <editor><first>Marianna</first><last>Apidianaki</last></editor>
       <editor><first>Ivan</first><last>Vulić</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -45,7 +45,7 @@
       <author><first>Sukmin</first><last>Cho</last></author>
       <author><first>Soyeong</first><last>Jeong</last></author>
       <author><first>Wonsuk</first><last>Yang</last></author>
-      <author><first>Jong</first><last>Park</last></author>
+      <author id="jong-c-park"><first>Jong</first><last>Park</last></author>
       <pages>22-32</pages>
       <abstract>Dense retrieval aims at searching for the most relevant documents to the given query by encoding texts in the embedding space, requiring a large amount of query-document pairs to train. Since manually constructing such training data is challenging, recent work has proposed to generate synthetic queries from documents and use them to train a dense retriever. However, compared to the manually composed queries, synthetic queries do not generally ask for implicit information, therefore leading to a degraded retrieval performance. In this work, we propose Query Generation with External Knowledge (QGEK), a novel method for generating queries with external information related to the corresponding document. Specifically, we convert a query into a triplet-based template form to accommodate external information and transmit it to a pre-trained language model (PLM). We validate QGEK on both in-domain and out-domain dense retrieval settings. The dense retriever with the queries requiring implicit information is found to make good performance improvement. Also, such queries are similar to manually composed queries, confirmed by both human evaluation and unique &amp; non-unique words distribution.</abstract>
       <url hash="f5a67690">2022.deelio-1.3</url>
@@ -72,7 +72,7 @@
       <title>Jointly Identifying and Fixing Inconsistent Readings from Information Extraction Systems</title>
       <author><first>Ankur</first><last>Padia</last></author>
       <author><first>Francis</first><last>Ferraro</last></author>
-      <author><first>Tim</first><last>Finin</last></author>
+      <author id="tim-finin"><first>Tim</first><last>Finin</last></author>
       <pages>42-52</pages>
       <abstract>Moral values as commonsense norms shape our everyday individual and community behavior. The possibility to extract moral attitude rapidly from natural language is an appealing perspective that would enable a deeper understanding of social interaction dynamics and the individual cognitive and behavioral dimension. In this work we focus on detecting moral content from natural language and we test our methods on a corpus of tweets previously labeled as containing moral values or violations, according to Moral Foundation Theory. We develop and compare two different approaches: (i) a frame-based symbolic value detector based on knowledge graphs and (ii) a zero-shot machine learning model fine-tuned on a task of Natural Language Inference (NLI) and a task of emotion detection. The final outcome from our work consists in two approaches meant to perform without the need for prior training process on a moral value detection task.</abstract>
       <url hash="38e81448">2022.deelio-1.5</url>
@@ -133,7 +133,7 @@
       <author><first>Jiachang</first><last>Liu</last></author>
       <author><first>Dinghan</first><last>Shen</last></author>
       <author><first>Yizhe</first><last>Zhang</last></author>
-      <author><first>Bill</first><last>Dolan</last></author>
+      <author id="william-b-dolan"><first>Bill</first><last>Dolan</last></author>
       <author><first>Lawrence</first><last>Carin</last></author>
       <author><first>Weizhu</first><last>Chen</last></author>
       <pages>100-114</pages>
diff --git a/data/xml/2022.deeplo.xml b/data/xml/2022.deeplo.xml
index f6f0fb2a6b..2425e72d6a 100644
--- a/data/xml/2022.deeplo.xml
+++ b/data/xml/2022.deeplo.xml
@@ -29,7 +29,7 @@
       <author><first>John</first><last>Ortega</last></author>
       <author><first>William</first><last>Chen</last></author>
       <author><first>Richard</first><last>Castro</last></author>
-      <author><first>Núria</first><last>Bel</last></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last></author>
       <author><first>Cesar</first><last>Yoshikawa</last></author>
       <author><first>Renzo</first><last>Venturas</last></author>
       <author><first>Hilario</first><last>Aradiel</last></author>
@@ -68,7 +68,7 @@
       <title>Generating unlabelled data for a tri-training approach in a low resourced <fixed-case>NER</fixed-case> task</title>
       <author><first>Hugo</first><last>Boulanger</last></author>
       <author><first>Thomas</first><last>Lavergne</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <pages>30-37</pages>
       <abstract>Training a tagger for Named Entity Recognition (NER) requires a substantial amount of labeled data in the task domain. Manual labeling is a tedious and complicated task. Semisupervised learning methods can reduce the quantity of labeled data necessary to train a model. However, these methods require large quantities of unlabeled data, which remains an issue in many cases.
 
@@ -138,7 +138,7 @@ We address this problem by generating unlabeled data. Large language models have
       <author><first>Shayna</first><last>Gardiner</last></author>
       <author><first>David</first><last>Rossouw</last></author>
       <author><first>Tere</first><last>Roldán</last></author>
-      <author><first>Simon</first><last>Corston-Oliver</last></author>
+      <author id="simon-corston-oliver"><first>Simon</first><last>Corston-Oliver</last></author>
       <pages>80-89</pages>
       <abstract>Automatic Speech Recognition (ASR) systems typically produce unpunctuated transcripts that have poor readability. In addition, building a punctuation restoration system is challenging for low-resource languages, especially for domain-specific applications. In this paper, we propose a Spanish punctuation restoration system designed for a real-time customer support transcription service. To address the data sparsity of Spanish transcripts in the customer support domain, we introduce two transferlearning-based strategies: 1) domain adaptation using out-of-domain Spanish text data; 2) crosslingual transfer learning leveraging in-domain English transcript data. Our experiment results show that these strategies improve the accuracy of the Spanish punctuation restoration system.</abstract>
       <url hash="e2e79003">2022.deeplo-1.9</url>
@@ -151,7 +151,7 @@ We address this problem by generating unlabeled data. Large language models have
       <author><first>Kurt</first><last>Micallef</last></author>
       <author><first>Albert</first><last>Gatt</last></author>
       <author><first>Marc</first><last>Tanti</last></author>
-      <author><first>Lonneke</first><last>van der Plas</last></author>
+      <author id="lonneke-van-der-plas"><first>Lonneke</first><last>van der Plas</last></author>
       <author><first>Claudia</first><last>Borg</last></author>
       <pages>90-101</pages>
       <abstract>Multilingual language models such as mBERT have seen impressive cross-lingual transfer to a variety of languages, but many languages remain excluded from these models. In this paper, we analyse the effect of pre-training with monolingual data for a low-resource language that is not included in mBERT – Maltese – with a range of pre-training set ups. We conduct evaluations with the newly pre-trained models on three morphosyntactic tasks – dependency parsing, part-of-speech tagging, and named-entity recognition – and one semantic classification task – sentiment analysis. We also present a newly created corpus for Maltese, and determine the effect that the pre-training data size and domain have on the downstream performance. Our results show that using a mixture of pre-training domains is often superior to using Wikipedia text only. We also find that a fraction of this corpus is enough to make significant leaps in performance over Wikipedia-trained models. We pre-train and compare two models on the new corpus: a monolingual BERT model trained from scratch (BERTu), and a further pretrained multilingual BERT (mBERTu). The models achieve state-of-the-art performance on these tasks, despite the new corpus being considerably smaller than typically used corpora for high-resourced languages. On average, BERTu outperforms or performs competitively with mBERTu, and the largest gains are observed for higher-level tasks.</abstract>
@@ -164,7 +164,7 @@ We address this problem by generating unlabeled data. Large language models have
       <title>Building an Event Extractor with Only a Few Examples</title>
       <author><first>Pengfei</first><last>Yu</last></author>
       <author><first>Zixuan</first><last>Zhang</last></author>
-      <author><first>Clare</first><last>Voss</last></author>
+      <author id="clare-voss"><first>Clare</first><last>Voss</last></author>
       <author><first>Jonathan</first><last>May</last></author>
       <author><first>Heng</first><last>Ji</last></author>
       <pages>102-109</pages>
@@ -181,7 +181,7 @@ We address this problem by generating unlabeled data. Large language models have
       <author><first>David</first><last>Shimshoni</last></author>
       <author><first>Aditya</first><last>Singhal</last></author>
       <author><first>Sara</first><last>Rosenthal</last></author>
-      <author><first>Avirup</first><last>Sil</last></author>
+      <author id="avirup-sil"><first>Avirup</first><last>Sil</last></author>
       <pages>110-116</pages>
       <abstract>Pretrained language models have shown success in various areas of natural language processing, including reading comprehension tasks. However, when applying machine learning methods to new domains, labeled data may not always be available. To address this, we use supervised pretraining on source-domain data to reduce sample complexity on domainspecific downstream tasks. We evaluate zeroshot performance on domain-specific reading comprehension tasks by combining task transfer with domain adaptation to fine-tune a pretrained model with no labelled data from the target task. Our approach outperforms DomainAdaptive Pretraining on downstream domainspecific reading comprehension tasks in 3 out of 4 domains.</abstract>
       <url hash="6e47dbf2">2022.deeplo-1.12</url>
@@ -243,7 +243,7 @@ We address this problem by generating unlabeled data. Large language models have
       <title>Clean or Annotate: How to Spend a Limited Data Collection Budget</title>
       <author><first>Derek</first><last>Chen</last></author>
       <author><first>Zhou</first><last>Yu</last></author>
-      <author><first>Samuel R.</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel R.</first><last>Bowman</last></author>
       <pages>152-168</pages>
       <abstract>Crowdsourcing platforms are often used to collect datasets for training machine learning models, despite higher levels of inaccurate labeling compared to expert labeling. There are two common strategies to manage the impact of such noise: The first involves aggregating redundant annotations, but comes at the expense of labeling substantially fewer examples. Secondly, prior works have also considered using the entire annotation budget to label as many examples as possible and subsequently apply denoising algorithms to implicitly clean the dataset. We find a middle ground and propose an approach which reserves a fraction of annotations to explicitly clean up highly probable error samples to optimize the annotation process. In particular, we allocate a large portion of the labeling budget to form an initial dataset used to train a model. This model is then used to identify specific examples that appear most likely to be incorrect, which we spend the remaining budget to relabel. Experiments across three model variations and four natural language processing tasks show our approach outperforms or matches both label aggregation and advanced denoising methods designed to handle noisy labels when allocated the same finite annotation budget.</abstract>
       <url hash="37b1db06">2022.deeplo-1.17</url>
diff --git a/data/xml/2022.dialdoc.xml b/data/xml/2022.dialdoc.xml
index df31405be3..a8c6f9117e 100644
--- a/data/xml/2022.dialdoc.xml
+++ b/data/xml/2022.dialdoc.xml
@@ -77,7 +77,7 @@
       <title>Parameter-Efficient Abstractive Question Answering over Tables or Text</title>
       <author><first>Vaishali</first><last>Pal</last></author>
       <author><first>Evangelos</first><last>Kanoulas</last></author>
-      <author><first>Maarten</first><last>de Rijke</last></author>
+      <author id="maarten-de-rijke"><first>Maarten</first><last>de Rijke</last></author>
       <pages>41-53</pages>
       <abstract>A long-term ambition of information seeking QA systems is to reason over multi-modal contexts and generate natural answers to user queries. Today, memory intensive pre-trained language models are adapted to downstream tasks such as QA by fine-tuning the model on QA data in a specific modality like unstructured text or structured tables. To avoid training such memory-hungry models while utilizing a uniform architecture for each modality, parameter-efficient adapters add and train small task-specific bottle-neck layers between transformer layers. In this work, we study parameter-efficient abstractive QA in encoder-decoder models over structured tabular data and unstructured textual data using only 1.5% additional parameters for each modality. We also ablate over adapter layers in both encoder and decoder modules to study the efficiency-performance trade-off and demonstrate that reducing additional trainable parameters down to 0.7%-1.0% leads to comparable results. Our models out-perform current state-of-the-art models on tabular QA datasets such as Tablesum and FeTaQA, and achieve comparable performance on a textual QA dataset such as NarrativeQA using significantly less trainable parameters than fine-tuning.</abstract>
       <url hash="fd968a0b">2022.dialdoc-1.5</url>
@@ -140,7 +140,7 @@
       <author><first>Etsuko</first><last>Ishii</last></author>
       <author><first>Samuel</first><last>Cahyawijaya</last></author>
       <author><first>Zihan</first><last>Liu</last></author>
-      <author><first>Genta Indra</first><last>Winata</last></author>
+      <author id="genta-indra-winata"><first>Genta Indra</first><last>Winata</last></author>
       <author><first>Andrea</first><last>Madotto</last></author>
       <author><first>Dan</first><last>Su</last></author>
       <author><first>Pascale</first><last>Fung</last></author>
@@ -154,7 +154,7 @@
     <paper id="11">
       <title>G4: Grounding-guided Goal-oriented Dialogues Generation with Multiple Documents</title>
       <author><first>Shiwei</first><last>Zhang</last></author>
-      <author id="yiyang-du"><first>Yiyang</first><last>Du</last></author>
+      <author><first>Yiyang</first><last>Du</last></author>
       <author><first>Guanzhong</first><last>Liu</last></author>
       <author><first>Zhao</first><last>Yan</last></author>
       <author><first>Yunbo</first><last>Cao</last></author>
@@ -186,7 +186,7 @@
       <author><first>Junan</first><last>Li</last></author>
       <author><first>Hongyuan</first><last>Lu</last></author>
       <author><first>Xixin</first><last>Wu</last></author>
-      <author><first>Helen</first><last>Meng</last></author>
+      <author id="helen-meng"><first>Helen</first><last>Meng</last></author>
       <pages>123-129</pages>
       <abstract>MultiDoc2Dial presents an important challenge on modeling dialogues grounded with multiple documents. This paper proposes a pipeline system of “retrieve, re-rank, and generate”, where each component is individually optimized. This enables the passage re-ranker and response generator to fully exploit training with ground-truth data. Furthermore, we use a deep cross-encoder trained with localized hard negative passages from the retriever. For the response generator, we use grounding span prediction as an auxiliary task to be jointly trained with the main task of response generation. We also adopt a passage dropout and regularization technique to improve response generation performance. Experimental results indicate that the system clearly surpasses the competitive baseline and our team CPII-NLP ranked 1st among the public submissions on ALL four leaderboards based on the sum of F1, SacreBLEU, METEOR and RougeL scores.</abstract>
       <url hash="804578aa">2022.dialdoc-1.13</url>
@@ -226,7 +226,7 @@
       <author><first>Ali</first><last>Satvaty</last></author>
       <author><first>Sadra</first><last>Sabouri</last></author>
       <author><first>Ehsaneddin</first><last>Asgari</last></author>
-      <author><first>Hossein</first><last>Sameti</last></author>
+      <author id="hossein-sameti"><first>Hossein</first><last>Sameti</last></author>
       <pages>142-147</pages>
       <abstract>Information-seeking dialogue systems, including knowledge identification and response generation, aim to respond to users with fluent, coherent, and informative answers based on users’ needs. This paper discusses our proposed approach, <tex-math>Docalog</tex-math>, for the DialDoc-22 (MultiDoc2Dial) shared task. <tex-math>Docalog</tex-math> identifies the most relevant knowledge in the associated document, in a multi-document setting. <tex-math>Docalog</tex-math>, is a three-stage pipeline consisting of <i>(1) a document retriever model (DR. TEIT)</i>, <i>(2) an answer span prediction model</i>, and <i>(3) an ultimate span picker</i> deciding on the most likely answer span, out of all predicted spans. In the test phase of MultiDoc2Dial 2022, <tex-math>Docalog</tex-math> achieved f1-scores of 36.07% and 28.44% and SacreBLEU scores of 23.70% and 20.52%, respectively on the <i>MDD-SEEN</i> and <i>MDD-UNSEEN</i> folds.</abstract>
       <url hash="29fe31f8">2022.dialdoc-1.16</url>
@@ -243,7 +243,7 @@
       <author><first>Aditya Srikanth</first><last>Veerubhotla</last></author>
       <author><first>Ritam</first><last>Dutt</last></author>
       <author><first>Teruko</first><last>Mitamura</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <pages>148-154</pages>
       <abstract>In this paper, we present our submission to the DialDoc shared task based on the MultiDoc2Dial dataset. MultiDoc2Dial is a conversational question answering dataset that grounds dialogues in multiple documents. The task involves grounding a user’s query in a document followed by generating an appropriate response. We propose several improvements over the baseline’s retriever-reader architecture to aid in modeling goal-oriented dialogues grounded in multiple documents. Our proposed approach employs sparse representations for passage retrieval, a passage re-ranker, the fusion-in-decoder architecture for generation, and a curriculum learning training paradigm. Our approach shows a 12 point improvement in BLEU score compared to the baseline RAG model.</abstract>
       <url hash="608448d9">2022.dialdoc-1.17</url>
diff --git a/data/xml/2022.digitam.xml b/data/xml/2022.digitam.xml
index 9cf8fe85cd..2ffbdc810e 100644
--- a/data/xml/2022.digitam.xml
+++ b/data/xml/2022.digitam.xml
@@ -24,8 +24,8 @@
     <paper id="1">
       <title>A Free/Open-Source Morphological Transducer for <fixed-case>W</fixed-case>estern <fixed-case>A</fixed-case>rmenian</title>
       <author><first>Hossep</first><last>Dolatian</last></author>
-      <author><first>Daniel</first><last>Swanson</last></author>
-      <author><first>Jonathan</first><last>Washington</last></author>
+      <author id="daniel-g-swanson"><first>Daniel</first><last>Swanson</last></author>
+      <author id="jonathan-washington"><first>Jonathan</first><last>Washington</last></author>
       <pages>1–7</pages>
       <abstract>We present a free/open-source morphological transducer for Western Armenian, an endangered and low-resource Indo-European language. The transducer has virtually complete coverage of the language’s inflectional morphology. We built the lexicon by scraping online dictionaries. As of submission, the transducer has a lexicon of 75K words. It has over 90% naive coverage on different Western Armenian corpora, and high precision.</abstract>
       <url hash="513528e1">2022.digitam-1.1</url>
diff --git a/data/xml/2022.distcurate.xml b/data/xml/2022.distcurate.xml
index 620c98ff54..3ebd907249 100644
--- a/data/xml/2022.distcurate.xml
+++ b/data/xml/2022.distcurate.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2022-06-29" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Workshop on Dimensions of Meaning: Distributional and Curated Semantics (DistCurate 2022)</booktitle>
-      <editor><first>Collin F.</first><last>Baker</last></editor>
+      <editor id="collin-f-baker"><first>Collin F.</first><last>Baker</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Seattle, Washington</address>
       <month>July</month>
@@ -39,7 +39,7 @@
     <paper id="3">
       <title>Logical Story Representations via <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et + Semantic Parsing</title>
       <author><first>Lane</first><last>Lawley</last></author>
-      <author><first>Lenhart</first><last>Schubert</last></author>
+      <author id="lenhart-schubert"><first>Lenhart</first><last>Schubert</last></author>
       <pages>19-23</pages>
       <abstract>We propose a means of augmenting FrameNet parsers with a formal logic parser to obtain rich semantic representations of events. These schematic representations of the frame events, which we call Episodic Logic (EL) schemas, abstract constants to variables, preserving their types and relationships to other individuals in the same text. Due to the temporal semantics of the chosen logical formalism, all identified schemas in a text are also assigned temporally bound “episodes” and related to one another in time. The semantic role information from the FrameNet frames is also incorporated into the schema’s type constraints. We describe an implementation of this method using a neural FrameNet parser, and discuss the approach’s possible applications to question answering and open-domain event schema learning.</abstract>
       <url hash="a3d33f8a">2022.distcurate-1.3</url>
@@ -49,8 +49,8 @@
     <paper id="4">
       <title>Comparing Distributional and Curated Approaches for Cross-lingual Frame Alignment</title>
       <author><first>Collin F.</first><last>Baker</last></author>
-      <author><first>Michael</first><last>Ellsworth</last></author>
-      <author><first>Miriam R. L.</first><last>Petruck</last></author>
+      <author id="michael-ellsworth"><first>Michael</first><last>Ellsworth</last></author>
+      <author id="miriam-r-l-petruck"><first>Miriam R. L.</first><last>Petruck</last></author>
       <author><first>Arthur</first><last>Lorenzi</last></author>
       <pages>24-30</pages>
       <abstract>Despite advances in statistical approaches to the modeling of meaning, many ques- tions about the ideal way of exploiting both knowledge-based (e.g., FrameNet, WordNet) and data-based methods (e.g., BERT) remain unresolved. This workshop focuses on these questions with three session papers that run the gamut from highly distributional methods (Lekkas et al., 2022), to highly curated methods (Gamonal, 2022), and techniques with statistical methods producing structured semantics (Lawley and Schubert, 2022). In addition, we begin the workshop with a small comparison of cross-lingual techniques for frame semantic alignment for one language pair (Spanish and English). None of the distributional techniques consistently aligns the 1-best frame match from English to Spanish, all failing in at least one case. Predicting which techniques will align which frames cross-linguistically is not possible from any known characteristic of the alignment technique or the frames. Although distributional techniques are a rich source of semantic information for many tasks, at present curated, knowledge-based semantics remains the only technique that can consistently align frames across languages.</abstract>
diff --git a/data/xml/2022.dlg4nlp.xml b/data/xml/2022.dlg4nlp.xml
index 86e1250f41..a72c8b1e2b 100644
--- a/data/xml/2022.dlg4nlp.xml
+++ b/data/xml/2022.dlg4nlp.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the 2nd Workshop on Deep Learning on Graphs for Natural Language Processing (DLG4NLP 2022)</booktitle>
       <editor><first>Lingfei</first><last>Wu</last></editor>
       <editor><first>Bang</first><last>Liu</last></editor>
-      <editor><first>Rada</first><last>Mihalcea</last></editor>
+      <editor id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></editor>
       <editor><first>Jian</first><last>Pei</last></editor>
       <editor><first>Yue</first><last>Zhang</last></editor>
       <editor><first>Yunyao</first><last>Li</last></editor>
@@ -25,7 +25,7 @@
       <author><first>Wenhao</first><last>Yu</last></author>
       <author><first>Chenguang</first><last>Zhu</last></author>
       <author><first>Lianhui</first><last>Qin</last></author>
-      <author id="zhihan-zhang"><first>Zhihan</first><last>Zhang</last></author>
+      <author><first>Zhihan</first><last>Zhang</last></author>
       <author><first>Tong</first><last>Zhao</last></author>
       <author><first>Meng</first><last>Jiang</last></author>
       <pages>1-11</pages>
@@ -74,7 +74,7 @@
       <title>Graph Neural Networks for Adapting Off-the-shelf General Domain Language Models to Low-Resource Specialised Domains</title>
       <author><first>Merieme</first><last>Bouhandi</last></author>
       <author><first>Emmanuel</first><last>Morin</last></author>
-      <author><first>Thierry</first><last>Hamon</last></author>
+      <author id="thierry-hamon"><first>Thierry</first><last>Hamon</last></author>
       <pages>36-42</pages>
       <abstract>Language models encode linguistic proprieties and are used as input for more specific models. Using their word representations as-is for specialised and low-resource domains might be less efficient. Methods of adapting them exist, but these models often overlook global information about how words, terms, and concepts relate to each other in a corpus due to their strong reliance on attention. We consider that global information can influence the results of the downstream tasks, and combination with contextual information is performed using graph convolution networks or GCN built on vocabulary graphs. By outperforming baselines, we show that this architecture is profitable for domain-specific tasks.</abstract>
       <url hash="7747fea7">2022.dlg4nlp-1.5</url>
@@ -110,7 +110,7 @@
       <author><first>Zhenyun</first><last>Deng</last></author>
       <author><first>Yonghua</first><last>Zhu</last></author>
       <author><first>Qianqian</first><last>Qi</last></author>
-      <author><first>Michael</first><last>Witbrock</last></author>
+      <author id="michael-j-witbrock"><first>Michael</first><last>Witbrock</last></author>
       <author><first>Patricia</first><last>Riddle</last></author>
       <pages>71-80</pages>
       <abstract>Current graph-neural-network-based (GNN-based) approaches to multi-hop questions integrate clues from scattered paragraphs in an entity graph, achieving implicit reasoning by synchronous update of graph node representations using information from neighbours; this is poorly suited for explaining how clues are passed through the graph in hops. In this paper, we describe a structured Knowledge and contextual Information Fusion GNN (KIFGraph) whose explicit multi-hop graph reasoning mimics human step by step reasoning. Specifically, we first integrate clues at multiple levels of granularity (question, paragraph, sentence, entity) as nodes in the graph, connected by edges derived using structured semantic knowledge, then use a contextual encoder to obtain the initial node representations, followed by step-by-step two-stage graph reasoning that asynchronously updates node representations. Each node can be related to its neighbour nodes through fused structured knowledge and contextual information, reliably integrating their answer clues. Moreover, a masked attention mechanism (MAM) filters out noisy or redundant nodes and edges, to avoid ineffective clue propagation in graph reasoning. Experimental results show performance competitive with published models on the HotpotQA dataset.</abstract>
diff --git a/data/xml/2022.dravidianlangtech.xml b/data/xml/2022.dravidianlangtech.xml
index da0dee1c12..fb3d97624a 100644
--- a/data/xml/2022.dravidianlangtech.xml
+++ b/data/xml/2022.dravidianlangtech.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the Second Workshop on Speech and Language Technologies for Dravidian Languages</booktitle>
       <editor><first>Bharathi Raja</first><last>Chakravarthi</last></editor>
       <editor><first>Ruba</first><last>Priyadharshini</last></editor>
-      <editor><first>Anand Kumar</first><last>Madasamy</last></editor>
+      <editor id="anand-kumar-m"><first>Anand Kumar</first><last>Madasamy</last></editor>
       <editor><first>Parameswari</first><last>Krishnamurthy</last></editor>
       <editor><first>Elizabeth</first><last>Sherly</last></editor>
       <editor><first>Sinnathamby</first><last>Mahesan</last></editor>
@@ -171,7 +171,7 @@
       <title><fixed-case>T</fixed-case>eam<fixed-case>X</fixed-case>@<fixed-case>D</fixed-case>ravidian<fixed-case>L</fixed-case>ang<fixed-case>T</fixed-case>ech-<fixed-case>ACL</fixed-case>2022: A Comparative Analysis for Troll-Based Meme Classification</title>
       <author><first>Rabindra Nath</first><last>Nandi</last></author>
       <author><first>Firoj</first><last>Alam</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>79-85</pages>
       <abstract>The spread of fake news, propaganda, misinformation, disinformation, and harmful content online raised concerns among social mediaplatforms, government agencies, policymakers, and society as a whole. This is because such harmful or abusive content leads to several consequences to people such as physical, emotional, relational, and financial. Among different harmful content trolling-based online content is one of them, where the idea is to post a message that is provocative, offensive, or menacing with an intent to mislead the audience. The content can be textual, visual, a combination of both, or a meme. In this study, we provide a comparative analysis of troll-based memes classification using the textual, visual, and multimodal content. We report several interesting findings in terms of code-mixed text, multimodal setting, and combining an additional dataset, which shows improvements over the majority baseline.</abstract>
       <url hash="141f0715">2022.dravidianlangtech-1.13</url>
@@ -333,7 +333,7 @@
       <author><first>Angel</first><last>S</last></author>
       <author><first>Rajalakshmi</first><last>Sivanaiah</last></author>
       <author><first>Sakaya Milton</first><last>Rajendram</last></author>
-      <author><first>Mirnalinee</first><last>T T</last></author>
+      <author id="t-t-mirnalinee"><first>Mirnalinee</first><last>T T</last></author>
       <pages>165-169</pages>
       <abstract>In this paper, we present our system for the task of Emotion analysis in Tamil. Over 3.96 million people use these platforms to send messages formed using texts, images, videos, audio or combinations of these to express their thoughts and feelings. Text communication on social media platforms is quite overwhelming due to its enormous quantity and simplicity. The data must be processed to understand the general feeling felt by the author. We present a lexicon-based approach for the extraction emotion in Tamil texts. We use dictionaries of words labelled with their respective emotions. The process of assigning an emotional label to each text, and then capture the main emotion expressed in it. Finally, the F1-score in the official test set is 0.0300 and our method ranks 5th.</abstract>
       <url hash="40bd322b">2022.dravidianlangtech-1.26</url>
@@ -541,7 +541,7 @@
       <author><first>Bharathi Raja</first><last>Chakravarthi</last></author>
       <author><first>Ruba</first><last>Priyadharshini</last></author>
       <author><first>Hosahalli</first><last>Shashirekha</last></author>
-      <author><first>John</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John</first><last>McCrae</last></author>
       <pages>271-278</pages>
       <abstract>This paper presents an outline of the shared task on translation of under-resourced Dravidian languages at DravidianLangTech-2022 workshop to be held jointly with ACL 2022. A description of the datasets used, approach taken for analysis of submissions and the results have been illustrated in this paper. Five sub-tasks organized as a part of the shared task include the following translation pairs: Kannada to Tamil, Kannada to Telugu, Kannada to Sanskrit, Kannada to Malayalam and Kannada to Tulu. Training, development and test datasets were provided to all participants and results were evaluated on the gold standard datasets. A total of 16 research groups participated in the shared task and a total of 12 submission runs were made for evaluation. Bilingual Evaluation Understudy (BLEU) score was used for evaluation of the translations.</abstract>
       <url hash="686c25fe">2022.dravidianlangtech-1.41</url>
diff --git a/data/xml/2022.eamt.xml b/data/xml/2022.eamt.xml
index ca6dd15310..66a4c34e22 100644
--- a/data/xml/2022.eamt.xml
+++ b/data/xml/2022.eamt.xml
@@ -7,13 +7,13 @@
       <editor><first>Lieve</first><last>Macken</last></editor>
       <editor><first>Andrew</first><last>Rufener</last></editor>
       <editor><first>Loïc</first><last>Barrault</last></editor>
-      <editor><first>Marta R.</first><last>Costa-jussà</last></editor>
+      <editor id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></editor>
       <editor><first>Christophe</first><last>Declercq</last></editor>
       <editor><first>Maarit</first><last>Koponen</last></editor>
       <editor><first>Ellie</first><last>Kemp</last></editor>
       <editor><first>Spyridon</first><last>Pilos</last></editor>
-      <editor><first>Mikel L.</first><last>Forcada</last></editor>
-      <editor><first>Carolina</first><last>Scarton</last></editor>
+      <editor id="mikel-l-forcada"><first>Mikel L.</first><last>Forcada</last></editor>
+      <editor id="carolina-scarton"><first>Carolina</first><last>Scarton</last></editor>
       <editor><first>Joachim</first><last>Van den Bogaert</last></editor>
       <editor><first>Joke</first><last>Daems</last></editor>
       <editor><first>Arda</first><last>Tezcan</last></editor>
@@ -39,7 +39,7 @@
     </paper>
     <paper id="2">
       <title>Neural Speech Translation: From Neural Machine Translation to Direct Speech Translation</title>
-      <author><first>Mattia Antonino</first><last>Di Gangi</last></author>
+      <author id="mattia-a-di-gangi"><first>Mattia Antonino</first><last>Di Gangi</last></author>
       <pages>7–8</pages>
       <url hash="e7f82669">2022.eamt-1.2</url>
       <bibkey>gangi-2022-neural</bibkey>
@@ -54,7 +54,7 @@
     <paper id="4">
       <title>Multi-Domain Adaptation in Neural Machine Translation with Dynamic Sampling Strategies</title>
       <author><first>Minh-Quang</first><last>Pham</last></author>
-      <author><first>Josep</first><last>Crego</last></author>
+      <author id="josep-m-crego"><first>Josep</first><last>Crego</last></author>
       <author><first>François</first><last>Yvon</last></author>
       <pages>13–22</pages>
       <abstract>Building effective Neural Machine Translation models often implies accommodating diverse sets of heterogeneous data so as to optimize performance for the domain(s) of interest. Such multi-source / multi-domain adaptation problems are typically approached through instance selection or reweighting strategies, based on a static assessment of the relevance of training instances with respect to the task at hand. In this paper, we study dynamic data selection strategies that are able to automatically re-evaluate the usefulness of data samples and to evolve a data selection policy in the course of training. Based on the results of multiple experiments, we show that such methods constitute a generic framework to automatically and effectively handle a variety of real-world situations, from multi-source domain adaptation to multi-domain learning and unsupervised domain adaptation.</abstract>
@@ -75,8 +75,8 @@
       <title>Comparing and combining tagging with different decoding algorithms for back-translation in <fixed-case>NMT</fixed-case>: learnings from a low resource scenario</title>
       <author><first>Xabier</first><last>Soto</last></author>
       <author><first>Olatz</first><last>Perez-De-Viñaspre</last></author>
-      <author><first>Gorka</first><last>Labaka</last></author>
-      <author><first>Maite</first><last>Oronoz</last></author>
+      <author id="gorka-labaka"><first>Gorka</first><last>Labaka</last></author>
+      <author id="maite-oronoz"><first>Maite</first><last>Oronoz</last></author>
       <pages>31–40</pages>
       <abstract>Back-translation is a well established approach to improve the performance of Neural Machine Translation (NMT) systems when large monolingual corpora of the target language and domain are available. Recently, diverse approaches have been proposed to get better automatic evaluation results of NMT models using back-translation, including the use of sampling instead of beam search as decoding algorithm for creating the synthetic corpus. Alternatively, it has been proposed to append a tag to the back-translated corpus for helping the NMT system to distinguish the synthetic bilingual corpus from the authentic one. However, not all the combinations of the previous approaches have been tested, and thus it is not clear which is the best approach for developing a given NMT system. In this work, we empirically compare and combine existing techniques for back-translation in a real low resource setting: the translation of clinical notes from Basque into Spanish. Apart from automatically evaluating the MT systems, we ask bilingual healthcare workers to perform a human evaluation, and analyze the different synthetic corpora by measuring their lexical diversity (LD). For reproducibility and generalizability, we repeat our experiments for German to English translation using public data. The results suggest that in lower resource scenarios tagging only helps when using sampling for decoding, in contradiction with the previous literature using bigger corpora from the news domain. When fine-tuning with a few thousand bilingual in-domain sentences, one of our proposed method (tagged restricted sampling) obtains the best results both in terms of automatic and human evaluation. We will publish the code upon acceptance.</abstract>
       <url hash="d1ebe8c0">2022.eamt-1.6</url>
@@ -85,7 +85,7 @@
     <paper id="7">
       <title>Passing Parser Uncertainty to the Transformer: Labeled Dependency Distributions for Neural Machine Translation</title>
       <author><first>Dongqi</first><last>Liu</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <pages>41–50</pages>
       <abstract>Existing syntax-enriched neural machine translation (NMT) models work either with the single most-likely unlabeled parse or the set of n-best unlabeled parses coming out of an external parser. Passing a single or n-best parses to the NMT model risks propagating parse errors. Furthermore, unlabeled parses represent only syntactic groupings without their linguistically relevant categories. In this paper we explore the question: Does passing both parser uncertainty and labeled syntactic knowledge to the Transformer improve its translation performance? This paper contributes a novel method for infusing the whole labeled dependency distributions (LDD) of the source sentence’s dependency forest into the self-attention mechanism of the encoder of the Transformer. A range of experimental results on three language pairs demonstrate that the proposed approach outperforms both the vanilla Transformer as well as the single best-parse Transformer model across several evaluation metrics.</abstract>
       <url hash="f5310b32">2022.eamt-1.7</url>
@@ -103,11 +103,11 @@
       <title>Searching for <fixed-case>COMETINHO</fixed-case>: The Little Metric That Could</title>
       <author><first>Ricardo</first><last>Rei</last></author>
       <author><first>Ana C</first><last>Farinha</last></author>
-      <author><first>José G.C.</first><last>de Souza</last></author>
+      <author id="jose-g-c-de-souza"><first>José G.C.</first><last>de Souza</last></author>
       <author><first>Pedro G.</first><last>Ramos</last></author>
       <author><first>André F.T.</first><last>Martins</last></author>
-      <author><first>Luisa</first><last>Coheur</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="luisa-coheur"><first>Luisa</first><last>Coheur</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <pages>61–70</pages>
       <abstract>In recent years, several neural fine-tuned machine translation evaluation metrics such as COMET and BLEURT have been proposed. These metrics achieve much higher correlations with human judgments than lexical overlap metrics at the cost of computational efficiency and simplicity, limiting their applications to scenarios in which one has to score thousands of translation hypothesis (e.g. scoring multiple systems or Minimum Bayes Risk decoding). In this paper, we explore optimization techniques, pruning, and knowledge distillation to create more compact and faster COMET versions. Our results show that just by optimizing the code through the use of caching and length batching we can reduce inference time between 39% and 65% when scoring multiple systems. Also, we show that pruning COMET can lead to a 21% model reduction without affecting the model’s accuracy beyond 0.01 Kendall tau correlation. Furthermore, we present DISTIL-COMET a lightweight distilled version that is 80% smaller and 2.128x faster while attaining a performance close to the original model and above strong baselines such as BERTSCORE and PRISM.</abstract>
       <url hash="8db20ea1">2022.eamt-1.9</url>
@@ -116,7 +116,7 @@
     <paper id="10">
       <title>Studying Post-Editese in a Professional Context: A Pilot Study</title>
       <author><first>Lise</first><last>Volkart</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <pages>71–79</pages>
       <abstract>The past few years have seen the multiplication of studies on post-editese, following the massive adoption of post-editing in professional translation workflows. These studies mainly rely on the comparison of post-edited machine translation and human translation on artificial parallel corpora. By contrast, we investigate here post-editese on comparable corpora of authentic translation jobs for the language direction English into French. We explore commonly used scores and also proposes the use of a novel metric. Our analysis shows that post-edited machine translation is not only lexically poorer than human translation, but also less dense and less varied in terms of translation solutions. It also tends to be more prolific than human translation for our language direction. Finally, our study highlights some of the challenges of working with comparable corpora in post-editese research.</abstract>
       <url hash="38b55f5c">2022.eamt-1.10</url>
@@ -162,8 +162,8 @@
     </paper>
     <paper id="14">
       <title>On the Interaction of Regularization Factors in Low-resource Neural Machine Translation</title>
-      <author><first>Àlex R.</first><last>Atrio</last></author>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="alex-r-atrio"><first>Àlex R.</first><last>Atrio</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <pages>111–120</pages>
       <abstract>We explore the roles and interactions of the hyper-parameters governing regularization, and propose a range of values applicable to low-resource neural machine translation. We demonstrate that default or recommended values for high-resource settings are not optimal for low-resource ones, and that more aggressive regularization is needed when resources are scarce, in proportion to their scarcity. We explain our observations by the generalization abilities of sharp vs. flat basins in the loss landscape of a neural network. Results for four regularization factors corroborate our claim: batch size, learning rate, dropout rate, and gradient clipping. Moreover, we show that optimal results are obtained when using several of these factors, and that our findings generalize across datasets of different sizes and languages.</abstract>
       <url hash="1ace8bb6">2022.eamt-1.14</url>
@@ -243,7 +243,7 @@
     </paper>
     <paper id="22">
       <title>“Hi, how can <fixed-case>I</fixed-case> help you?” Improving Machine Translation of Conversational Content in a Business Context</title>
-      <author><first>Bianka</first><last>Buschbeck</last></author>
+      <author id="bianka-buschbeck"><first>Bianka</first><last>Buschbeck</last></author>
       <author><first>Jennifer</first><last>Mell</last></author>
       <author><first>Miriam</first><last>Exel</last></author>
       <author><first>Matthias</first><last>Huck</last></author>
@@ -258,7 +258,7 @@
       <author><first>Marianna</first><last>Buchicchio</last></author>
       <author><first>Craig</first><last>Stewart</last></author>
       <author><first>Helena</first><last>Moniz</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <pages>201–210</pages>
       <abstract>This paper illustrates a new evaluation framework developed at Unbabel for measuring the quality of source language text and its effect on both Machine Translation (MT) and Human Post-Edition (PE) performed by non-professional post-editors. We examine both agent and user-generated content from the Customer Support domain and propose that differentiating the two is crucial to obtaining high quality translation output. Furthermore, we present results of initial experimentation with a new evaluation typology based on the Multidimensional Quality Metrics (MQM) Framework Lommel et al., 2014), specifically tailored toward the evaluation of source language text. We show how the MQM Framework Lommel et al., 2014) can be adapted to assess errors of monolingual source texts and demonstrate how very specific source errors propagate to the MT and PE targets. Finally, we illustrate how MT systems are not robust enough to handle very specific source noise in the context of Customer Support data.</abstract>
       <url hash="a2d0c15f">2022.eamt-1.23</url>
@@ -270,7 +270,7 @@
       <author><first>Vera</first><last>Cabarrão</last></author>
       <author><first>Pedro</first><last>Mota</last></author>
       <author><first/><last>Helena Moniz</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <pages>211–219</pages>
       <abstract>This paper describes the research developed at Unbabel, a Portuguese Machine-translation start-up, that combines MT with human post-edition and focuses strictly on customer service content. We aim to contribute to furthering MT quality and good-practices by exposing the importance of having a continuously-in-development robust Named Entity Recognition system compliant with General Data Protection Regulation (GDPR). Moreover, we have tested semiautomatic strategies that support and enhance the creation of Named Entities gold standards to allow a more seamless implementation of Multilingual Named Entities Recognition Systems. The project described in this paper is the result of a shared work between Unbabel ́s linguists and Unbabel ́s AI engineering team, matured over a year. The project should, also, be taken as a statement of multidisciplinary, proving and validating the much-needed articulation between the different scientific fields that compose and characterize the area of Natural Language Processing (NLP).</abstract>
       <url hash="2aeacb76">2022.eamt-1.24</url>
@@ -280,7 +280,7 @@
       <title>Investigating automatic and manual filtering methods to produce <fixed-case>MT</fixed-case>-ready glossaries from existing ones</title>
       <author><first>Maria</first><last>Afara</last></author>
       <author><first>Randy</first><last>Scansani</last></author>
-      <author><first>Loïc</first><last>Dugast</last></author>
+      <author id="loic-dugast"><first>Loïc</first><last>Dugast</last></author>
       <pages>221–230</pages>
       <abstract>Commercial Machine Translation (MT) providers offer functionalities that allow users to leverage bilingual glossaries. This poses the question of how to turn glossaries that were intended to be used by a human translator into MT-ready ones, removing entries that could harm the MT output. We present two automatic filtering approaches - one based on rules and the second one relying on a translation memory - and a manual filtering procedure carried out by a linguist. The resulting glossaries are added to the MT model. The outputs are compared against a baseline where no glossary is used and an output produced using the original glossary. The present work aims at investigating if any of these filtering methods can bring a higher terminology accuracy without negative effects on the overall quality. Results are measured with terminology accuracy and Translation Edit Rate. We test our filters on two language pairs, En-Fr and De-En. Results show that some of the automatically filtered glossaries improve the output when compared to the baseline, and they may help reach a better balance between accuracy and overall quality, replacing the costly manual process without quality loss.</abstract>
       <url hash="db552fdb">2022.eamt-1.25</url>
@@ -323,7 +323,7 @@
       <author><first>Alina</first><last>Karakanta</last></author>
       <author><first>Luisa</first><last>Bentivogli</last></author>
       <author><first>Mauro</first><last>Cettolo</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <pages>261–270</pages>
       <abstract>Recent developments in machine translation and speech translation are opening up opportunities for computer-assisted translation tools with extended automation functions. Subtitling tools are recently being adapted for post-editing by providing automatically generated subtitles, and featuring not only machine translation, but also automatic segmentation and synchronisation. But what do professional subtitlers think of post-editing automatically generated subtitles? In this work, we conduct a survey to collect subtitlers’ impressions and feedback on the use of automatic subtitling in their workflows. Our findings show that, despite current limitations stemming mainly from speech processing errors, automatic subtitling is seen rather positively and has potential for the future.</abstract>
@@ -423,7 +423,7 @@
     <paper id="39">
       <title>Europeana Translate: Providing multilingual access to digital cultural heritage</title>
       <author><first>Eirini</first><last>Kaldeli</last></author>
-      <author><first>Mercedes</first><last>García-Martínez</last></author>
+      <author id="mercedes-garcia-martinez"><first>Mercedes</first><last>García-Martínez</last></author>
       <author><first>Antoine</first><last>Isaac</last></author>
       <author><first>Paolo Sebastiano</first><last>Scalia</last></author>
       <author><first>Arne</first><last>Stabenau</last></author>
@@ -439,7 +439,7 @@
     </paper>
     <paper id="40">
       <title>The <fixed-case>PASSAGE</fixed-case> project : <fixed-case>S</fixed-case>tandard <fixed-case>G</fixed-case>erman Subtitling of <fixed-case>S</fixed-case>wiss <fixed-case>G</fixed-case>erman <fixed-case>TV</fixed-case> content</title>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Johanna</first><last>Gerlach</last></author>
       <author><first>Jonathan</first><last>Mutal</last></author>
       <author><first>Marianne</first><last>Starlander</last></author>
@@ -451,16 +451,16 @@
     <paper id="41">
       <title><fixed-case>M</fixed-case>a<fixed-case>C</fixed-case>o<fixed-case>C</fixed-case>u: Massive collection and curation of monolingual and bilingual data: focus on under-resourced languages</title>
       <author><first>Marta</first><last>Bañón</last></author>
-      <author><first>Miquel</first><last>Esplà-Gomis</last></author>
+      <author id="miquel-espla-gomis"><first>Miquel</first><last>Esplà-Gomis</last></author>
       <author><first>Mikel L.</first><last>Forcada</last></author>
       <author><first>Cristian</first><last>García-Romero</last></author>
       <author><first>Taja</first><last>Kuzman</last></author>
       <author><first>Nikola</first><last>Ljubešić</last></author>
       <author><first>Rik</first><last>van Noord</last></author>
       <author><first>Leopoldo Pla</first><last>Sempere</last></author>
-      <author><first>Gema</first><last>Ramírez-Sánchez</last></author>
+      <author id="gema-ramirez-sanchez"><first>Gema</first><last>Ramírez-Sánchez</last></author>
       <author><first>Peter</first><last>Rupnik</last></author>
-      <author><first>Vít</first><last>Suchomel</last></author>
+      <author id="vit-suchomel"><first>Vít</first><last>Suchomel</last></author>
       <author><first>Antonio</first><last>Toral</last></author>
       <author><first>Tobias</first><last>van der Werff</last></author>
       <author><first>Jaume</first><last>Zaragoza</last></author>
@@ -491,7 +491,7 @@
     <paper id="44">
       <title><fixed-case>MT</fixed-case>ee: Open Machine Translation Platform for <fixed-case>E</fixed-case>stonian Government</title>
       <author><first>Toms</first><last>Bergmanis</last></author>
-      <author><first>Marcis</first><last>Pinnis</last></author>
+      <author id="marcis-pinnis"><first>Marcis</first><last>Pinnis</last></author>
       <author><first>Roberts</first><last>Rozis</last></author>
       <author><first>Jānis</first><last>Šlapiņš</last></author>
       <author><first>Valters</first><last>Šics</last></author>
@@ -518,7 +518,7 @@
       <author><first>Alessandro</first><last>Raganato</last></author>
       <author><first>Niki A.</first><last>Loppi</last></author>
       <author><first>Stig-Arne</first><last>Grönroos</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>311–312</pages>
       <abstract>We describe the enhancement of a multilingual NMT toolkit developed as part of the FoTran project. We devise our modular attention-bridge model, which connects language-specific components through a shared network layer. The system now supports distributed training over many nodes and GPUs in order to substantially scale up the number of languages that can be included in a modern neural translation architecture. The model enables the study of emerging language-agnostic representations and also provides a modular toolkit for efficient machine translation.</abstract>
       <url hash="19fe733f">2022.eamt-1.45</url>
@@ -535,11 +535,11 @@
     </paper>
     <paper id="47">
       <title><fixed-case>QUARTZ</fixed-case>: Quality-Aware Machine Translation</title>
-      <author><first>José G.C.</first><last>de Souza</last></author>
+      <author id="jose-g-c-de-souza"><first>José G.C.</first><last>de Souza</last></author>
       <author><first>Ricardo</first><last>Rei</last></author>
       <author><first>Ana C.</first><last>Farinha</last></author>
       <author><first>Helena</first><last>Moniz</last></author>
-      <author><first>André F. T.</first><last>Martins</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
       <pages>315–316</pages>
       <abstract>This paper presents QUARTZ, QUality-AwaRe machine Translation, a project led by Unbabel which aims at developing machine translation systems that are more robust and produce fewer critical errors. With QUARTZ we want to enable machine translation for user-generated conversational content types that do not tolerate critical errors in automatic translations.</abstract>
       <url hash="d330b565">2022.eamt-1.47</url>
@@ -606,7 +606,7 @@
     </paper>
     <paper id="53">
       <title><fixed-case>D</fixed-case>eep<fixed-case>SPIN</fixed-case>: Deep Structured Prediction for Natural Language Processing</title>
-      <author><first>André F. T.</first><last>Martins</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
       <author><first>Ben</first><last>Peters</last></author>
       <author><first>Chrysoula</first><last>Zerva</last></author>
       <author><first>Chunchuan</first><last>Lyu</last></author>
@@ -656,7 +656,7 @@
       <author><first>Alina</first><last>Karakanta</last></author>
       <author><first>Luisa</first><last>Bentivogli</last></author>
       <author><first>Mauro</first><last>Cettolo</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <pages>335–336</pages>
       <abstract>In response to the increasing interest towards automatic subtitling, this EAMT-funded project aimed at collecting subtitle post-editing data in a real use case scenario where professional subtitlers edit automatically generated subtitles. The post-editing setting includes, for the first time, automatic generation of timestamps and segmentation, and focuses on the effect of timing and segmentation edits on the post-editing process. The collected data will serve as the basis for investigating how subtitlers interact with automatic subtitling and for devising evaluation methods geared to the multimodal nature and formal requirements of subtitling.</abstract>
@@ -666,7 +666,7 @@
     <paper id="58">
       <title><fixed-case>D</fixed-case>i<fixed-case>H</fixed-case>u<fixed-case>T</fixed-case>ra: a Parallel Corpus to Analyse Differences between Human Translations</title>
       <author><first>Ekaterina</first><last>Lapshinova-Koltunski</last></author>
-      <author><first>Maja</first><last>Popović</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
       <author><first>Maarit</first><last>Koponen</last></author>
       <pages>337–338</pages>
       <abstract>This project aimed to design a corpus of parallel human translations (HTs) of the same source texts by professionals and students. The resulting corpus consists of English news and reviews source texts, their translations into Russian and Croatian, and translations of the reviews into Finnish. The corpus will be valuable for both studying variation in translation and evaluating machine translation (MT) systems.</abstract>
@@ -687,12 +687,12 @@
     </paper>
     <paper id="60">
       <title>Curated Multilingual Language Resources for <fixed-case>CEF</fixed-case> <fixed-case>AT</fixed-case> (<fixed-case>CURLICAT</fixed-case>): overall view</title>
-      <author><first>Tamás</first><last>Váradi</last></author>
+      <author id="tamas-varadi"><first>Tamás</first><last>Váradi</last></author>
       <author><first>Marko</first><last>Tadić</last></author>
       <author><first>Svetla</first><last>Koeva</last></author>
       <author><first>Maciej</first><last>Ogrodniczuk</last></author>
-      <author><first>Dan</first><last>Tufiş</last></author>
-      <author><first>Radovan</first><last>Garabík</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufiş</last></author>
+      <author id="radovan-garabik"><first>Radovan</first><last>Garabík</last></author>
       <author><first>Simon</first><last>Krek</last></author>
       <author><first>Andraž</first><last>Repar</last></author>
       <pages>341–342</pages>
@@ -745,7 +745,7 @@
       <author><first>Dimitra</first><last>Gkatzia</last></author>
       <author><first>Helena</first><last>Moniz</last></author>
       <author><first>Irene</first><last>Russo</last></author>
-      <author><first>Fabio</first><last>Kepler</last></author>
+      <author id="fabio-kepler"><first>Fabio</first><last>Kepler</last></author>
       <author><first>Iacer</first><last>Calixto</last></author>
       <author><first>Marcin</first><last>Paprzycki</last></author>
       <author><first>François</first><last>Portet</last></author>
@@ -778,7 +778,7 @@
     </paper>
     <paper id="65">
       <title>Automatic Video Dubbing at <fixed-case>A</fixed-case>pp<fixed-case>T</fixed-case>ek</title>
-      <author><first>Mattia</first><last>Di Gangi</last></author>
+      <author id="mattia-a-di-gangi"><first>Mattia</first><last>Di Gangi</last></author>
       <author><first>Nick</first><last>Rossenbach</last></author>
       <author><first>Alejandro</first><last>Pérez</last></author>
       <author><first>Parnia</first><last>Bahar</last></author>
@@ -798,13 +798,13 @@
       <author><first>Owen</first><last>Gallagher</last></author>
       <author><first>Federico</first><last>Gaspari</last></author>
       <author><first>Maria</first><last>Giagkou</last></author>
-      <author><first>Jan</first><last>Hajic</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajic</last></author>
       <author><first>Jens Peter</first><last>Kückens</last></author>
       <author><first>Teresa</first><last>Lynn</last></author>
       <author><first>Georg</first><last>Rehm</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <author><first>Katrin</first><last>Marheinecke</last></author>
-      <author><first>Stelios</first><last>Piperidis</last></author>
+      <author id="stelios-piperidis"><first>Stelios</first><last>Piperidis</last></author>
       <author><first>Natalia</first><last>Resende</last></author>
       <author><first>Tea</first><last>Vojtěchová</last></author>
       <author><first>Andy</first><last>Way</last></author>
@@ -817,7 +817,7 @@
       <title><fixed-case>LITHME</fixed-case>: Language in the Human-Machine Era</title>
       <author><first>Maarit</first><last>Koponen</last></author>
       <author><first>Kais</first><last>Allkivi-Metsoja</last></author>
-      <author><first>Antonio</first><last>Pareja-Lora</last></author>
+      <author id="antonio-pareja-lora"><first>Antonio</first><last>Pareja-Lora</last></author>
       <author><first>Dave</first><last>Sayers</last></author>
       <author><first>Márta</first><last>Seresi</last></author>
       <pages>355–356</pages>
@@ -850,7 +850,7 @@
       <author><first>Mauro</first><last>Cettolo</last></author>
       <author><first>Marco</first><last>Gaido</last></author>
       <author><first>Alina</first><last>Karakanta</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <pages>361–362</pages>
       <abstract>This project aimed at extending the test sets of the MuST-C speech translation (ST) corpus with new reference translations. The new references were collected from professional post-editors working on the output of different ST systems for three language pairs: English-German/Italian/Spanish. In this paper, we shortly describe how the data were collected and how they are distributed. As an evidence of their usefulness, we also summarise the findings of the first comparative evaluation of cascade and direct ST approaches, which was carried out relying on the collected data. The project was partially funded by the European Association for Machine Translation (EAMT) through its 2020 Sponsorship of Activities programme.</abstract>
diff --git a/data/xml/2022.ecnlp.xml b/data/xml/2022.ecnlp.xml
index 7a6d7f7310..9386b9b614 100644
--- a/data/xml/2022.ecnlp.xml
+++ b/data/xml/2022.ecnlp.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2022-05-15" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Fifth Workshop on e-Commerce and NLP (ECNLP 5)</booktitle>
-      <editor><first>Shervin</first><last>Malmasi</last></editor>
+      <editor id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></editor>
       <editor><first>Oleg</first><last>Rokhlenko</last></editor>
       <editor><first>Nicola</first><last>Ueffing</last></editor>
       <editor><first>Ido</first><last>Guy</last></editor>
@@ -64,9 +64,9 @@
     </paper>
     <paper id="4">
       <title>Data Quality Estimation Framework for Faster Tax Code Classification</title>
-      <author><first>Ravi</first><last>Kondadadi</last></author>
+      <author id="ravikumar-kondadadi"><first>Ravi</first><last>Kondadadi</last></author>
       <author><first>Allen</first><last>Williams</last></author>
-      <author><first>Nicolas</first><last>Nicolov</last></author>
+      <author id="nicolas-nicolov"><first>Nicolas</first><last>Nicolov</last></author>
       <pages>29-34</pages>
       <abstract>This paper describes a novel framework to estimate the data quality of a collection of product descriptions to identify required relevant information for accurate product listing classification for tax-code assignment. Our Data Quality Estimation (DQE) framework consists of a Question Answering (QA) based attribute value extraction model to identify missing attributes and a classification model to identify bad quality records. We show that our framework can accurately predict the quality of product descriptions. In addition to identifying low-quality product listings, our framework can also generate a detailed report at a category level showing missing product information resulting in a better customer experience.</abstract>
       <url hash="3256ac65">2022.ecnlp-1.4</url>
@@ -192,7 +192,7 @@
       <author><first>Marco</first><last>Del Tredici</last></author>
       <author><first>Weiwei</first><last>Cheng</last></author>
       <author id="bill-byrne"><first>Bill</first><last>Byrne</last></author>
-      <author><first>Adrià</first><last>Gispert</last></author>
+      <author id="adria-de-gispert"><first>Adrià</first><last>Gispert</last></author>
       <pages>99-110</pages>
       <abstract>It is of great value to answer product questions based on heterogeneous information sources available on web product pages, e.g., semi-structured attributes, text descriptions, user-provided contents, etc. However, these sources have different structures and writing styles, which poses challenges for (1) evidence ranking, (2) source selection, and (3) answer generation. In this paper, we build a benchmark with annotations for both evidence selection and answer generation covering 6 information sources. Based on this benchmark, we conduct a comprehensive study and present a set of best practices. We show that all sources are important and contribute to answering questions. Handling all sources within one single model can produce comparable confidence scores across sources and combining multiple sources for training always helps, even for sources with totally different structures. We further propose a novel data augmentation method to iteratively create training samples for answer generation, which achieves close-to-human performance with only a few thousandannotations. Finally, we perform an in-depth error analysis of model predictions and highlight the challenges for future research.</abstract>
       <url hash="b611f2ee">2022.ecnlp-1.13</url>
@@ -206,7 +206,7 @@
       <author><first>Gianni</first><last>Barlacchi</last></author>
       <author><first>Marco</first><last>Del Tredici</last></author>
       <author><first>Weiwei</first><last>Cheng</last></author>
-      <author><first>Adrià</first><last>Gispert</last></author>
+      <author id="adria-de-gispert"><first>Adrià</first><last>Gispert</last></author>
       <pages>111-120</pages>
       <abstract>Product question answering (PQA) aims to automatically address customer questions to improve their online shopping experience. Current research mainly focuses on finding answers from either unstructured text, like product descriptions and user reviews, or structured knowledge bases with pre-defined schemas. Apart from the above two sources, a lot of product information is represented in a semi-structured way, e.g., key-value pairs, lists, tables, json and xml files, etc. These semi-structured data can be a valuable answer source since they are better organized than free text, while being easier to construct than structured knowledge bases. However, little attention has been paid to them. To fill in this blank, here we study how to effectively incorporate semi-structured answer sources for PQA and focus on presenting answers in a natural, fluent sentence. To this end, we present semiPQA: a dataset to benchmark PQA over semi-structured data. It contains 11,243 written questions about json-formatted data covering 320 unique attribute types. Each data point is paired with manually-annotated text that describes its contents, so that we can train a neural answer presenter to present the data in a natural way. We provide baseline results and a deep analysis on the successes and challenges of leveraging semi-structured data for PQA. In general, state-of-the-art neural models can perform remarkably well when dealing with seen attribute types. For unseen attribute types, however, a noticeable drop is observed for both answer presentation and attribute ranking.</abstract>
       <url hash="829bab91">2022.ecnlp-1.14</url>
@@ -253,13 +253,13 @@
     <paper id="18">
       <title>Domain-specific knowledge distillation yields smaller and better models for conversational commerce</title>
       <author><first>Kristen</first><last>Howell</last></author>
-      <author id="jian-wang"><first>Jian</first><last>Wang</last></author>
+      <author><first>Jian</first><last>Wang</last></author>
       <author><first>Akshay</first><last>Hazare</last></author>
       <author><first>Joseph</first><last>Bradley</last></author>
       <author><first>Chris</first><last>Brew</last></author>
       <author><first>Xi</first><last>Chen</last></author>
       <author><first>Matthew</first><last>Dunn</last></author>
-      <author><first>Beth</first><last>Hockey</last></author>
+      <author id="beth-ann-hockey"><first>Beth</first><last>Hockey</last></author>
       <author><first>Andrew</first><last>Maurer</last></author>
       <author><first>Dominic</first><last>Widdows</last></author>
       <pages>151-160</pages>
@@ -333,7 +333,7 @@
       <title>Investigating the Generative Approach for Question Answering in <fixed-case>E</fixed-case>-Commerce</title>
       <author><first>Kalyani</first><last>Roy</last></author>
       <author><first>Vineeth</first><last>Balapanuru</last></author>
-      <author><first>Tapas</first><last>Nayak</last></author>
+      <author id="tapas-nayak"><first>Tapas</first><last>Nayak</last></author>
       <author><first>Pawan</first><last>Goyal</last></author>
       <pages>210-216</pages>
       <abstract>Many e-commerce websites provide Product-related Question Answering (PQA) platform where potential customers can ask questions related to a product, and other consumers can post an answer to that question based on their experience. Recently, there has been a growing interest in providing automated responses to product questions. In this paper, we investigate the suitability of the generative approach for PQA. We use state-of-the-art generative models proposed by Deng et al.(2020) and Lu et al.(2020) for this purpose. On closer examination, we find several drawbacks in this approach: (1) input reviews are not always utilized significantly for answer generation, (2) the performance of the models is abysmal while answering the numerical questions, (3) many of the generated answers contain phrases like “I do not know” which are taken from the reference answer in training data, and these answers do not convey any information to the customer. Although these approaches achieve a high ROUGE score, it does not reflect upon these shortcomings of the generated answers. We hope that our analysis will lead to more rigorous PQA approaches, and future research will focus on addressing these shortcomings in PQA.</abstract>
@@ -372,7 +372,7 @@
       <title>Can Pretrained Language Models Generate Persuasive, Faithful, and Informative Ad Text for Product Descriptions?</title>
       <author><first>Fajri</first><last>Koto</last></author>
       <author><first>Jey Han</first><last>Lau</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>234-243</pages>
       <abstract>For any e-commerce service, persuasive, faithful, and informative product descriptions can attract shoppers and improve sales. While not all sellers are capable of providing such interesting descriptions, a language generation system can be a source of such descriptions at scale, and potentially assist sellers to improve their product descriptions. Most previous work has addressed this task based on statistical approaches (Wang et al., 2017), limited attributes such as titles (Chen et al., 2019; Chan et al., 2020), and focused on only one product type (Wang et al., 2017; Munigala et al., 2018; Hong et al., 2021). In this paper, we jointly train image features and 10 text attributes across 23 diverse product types, with two different target text types with different writing styles: bullet points and paragraph descriptions. Our findings suggest that multimodal training with modern pretrained language models can generate fluent and persuasive advertisements, but are less faithful and informative, especially out of domain.</abstract>
       <url hash="9bb72f8c">2022.ecnlp-1.27</url>
diff --git a/data/xml/2022.emnlp.xml b/data/xml/2022.emnlp.xml
index 9872cbaa16..86eebb6e8c 100644
--- a/data/xml/2022.emnlp.xml
+++ b/data/xml/2022.emnlp.xml
@@ -38,7 +38,7 @@
       <author><first>Libiao</first><last>Peng</last><affiliation>Tsinghua University</affiliation></author>
       <author><first>Zhen</first><last>Guo</last><affiliation>Baidu</affiliation></author>
       <author><first>Wenquan</first><last>Wu</last><affiliation>Baidu</affiliation></author>
-      <author><first>Zheng-Yu</first><last>Niu</last><affiliation>Baidu Inc.</affiliation></author>
+      <author id="zheng-yu-niu"><first>Zheng-Yu</first><last>Niu</last><affiliation>Baidu Inc.</affiliation></author>
       <author><first>Hua</first><last>Wu</last><affiliation>Baidu</affiliation></author>
       <author><first>Minlie</first><last>Huang</last><affiliation>Tsinghua University</affiliation></author>
       <pages>18-29</pages>
@@ -117,7 +117,7 @@
       <author><first>Sangwoo</first><last>Cho</last><affiliation>Tecent AI Lab</affiliation></author>
       <author><first>Kaiqiang</first><last>Song</last><affiliation>Tencent AI Lab</affiliation></author>
       <author><first>Xiaoyang</first><last>Wang</last><affiliation>Tencent AI Lab</affiliation></author>
-      <author id="fei-liu"><first>Fei</first><last>Liu</last><affiliation>Emory University</affiliation></author>
+      <author><first>Fei</first><last>Liu</last><affiliation>Emory University</affiliation></author>
       <author><first>Dong</first><last>Yu</last><affiliation>Tencent AI Lab</affiliation></author>
       <pages>106-118</pages>
       <abstract>Text segmentation is important for signaling a document’s structure. Without segmenting a long document into topically coherent sections, it is difficult for readers to comprehend the text, let alone find important information. The problem is only exacerbated by a lack of segmentation in transcripts of audio/video recordings. In this paper, we explore the role that section segmentation plays in extractive summarization of written and spoken documents. Our approach learns robust sentence representations by performing summarization and segmentation simultaneously, which is further enhanced by an optimization-based regularizer to promote selection of diverse summary sentences. We conduct experiments on multiple datasets ranging from scientific articles to spoken transcripts to evaluate the model’s performance. Our findings suggest that the model can not only achieve state-of-the-art performance on publicly available benchmarks, but demonstrate better cross-genre transferability when equipped with text segmentation. We perform a series of analyses to quantify the impact of section segmentation on summarizing written and spoken documents of substantial length and complexity.</abstract>
@@ -156,7 +156,7 @@
       <author><first>Do June</first><last>Min</last><affiliation>University of Michigan</affiliation></author>
       <author><first>Verónica</first><last>Pérez-Rosas</last><affiliation>University of Michigan</affiliation></author>
       <author><first>Kenneth</first><last>Resnicow</last><affiliation>School of Public Health, University of Michigan</affiliation></author>
-      <author><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
       <pages>148-158</pages>
       <abstract>Counselor reflection is a core verbal skill used by mental health counselors to express understanding and affirmation of the client’s experience and concerns. In this paper, we propose a system for the analysis of counselor reflections. Specifically, our system takes as input one dialog turn containing a client prompt and a counselor response, and outputs a score indicating the level of reflection in the counselor response. We compile a dataset consisting of different levels of reflective listening skills, and propose the Prompt-Aware margIn Ranking (PAIR) framework that contrasts positive and negative prompt and response pairs using specially designed multi-gap and prompt-aware margin ranking losses. Through empirical evaluations and deployment of our system in a real-life educational environment, we show that our analysis model outperforms several baselines on different metrics, and can be used to provide useful feedback to counseling trainees.</abstract>
       <url hash="897ccce5">2022.emnlp-main.11</url>
@@ -262,7 +262,7 @@
       <title>Multi-<fixed-case>VQG</fixed-case>: Generating Engaging Questions for Multiple Images</title>
       <author><first>Min-Hsuan</first><last>Yeh</last><affiliation>Academia Sinica</affiliation></author>
       <author><first>Vincent</first><last>Chen</last><affiliation>University of Illinois at Urbana Champaign</affiliation></author>
-      <author><first>Ting-Hao</first><last>Huang</last><affiliation>Pennsylvania State University</affiliation></author>
+      <author id="ting-hao-huang"><first>Ting-Hao</first><last>Huang</last><affiliation>Pennsylvania State University</affiliation></author>
       <author><first>Lun-Wei</first><last>Ku</last><affiliation>Academia Sinica</affiliation></author>
       <pages>277-290</pages>
       <abstract>Generating engaging content has drawn much recent attention in the NLP community. Asking questions is a natural way to respond to photos and promote awareness. However, most answers to questions in traditional question-answering (QA) datasets are factoids, which reduce individuals’ willingness to answer. Furthermore, traditional visual question generation (VQG) confines the source data for question generation to single images, resulting in a limited ability to comprehend time-series information of the underlying event. In this paper, we propose generating engaging questions from multiple images. We present MVQG, a new dataset, and establish a series of baselines, including both end-to-end and dual-stage architectures. Results show that building stories behind the image sequence enables models togenerate engaging questions, which confirms our assumption that people typically construct a picture of the event in their minds before asking questions. These results open up an exciting challenge for visual-and-language models to implicitly construct a story behind a series of photos to allow for creativity and experience sharing and hence draw attention to downstream applications.</abstract>
@@ -276,7 +276,7 @@
       <author><first>Jannis</first><last>Bulian</last><affiliation>Google</affiliation></author>
       <author><first>Christian</first><last>Buck</last><affiliation>Google Research</affiliation></author>
       <author><first>Wojciech</first><last>Gajewski</last><affiliation>Google Research</affiliation></author>
-      <author><first>Benjamin</first><last>Börschinger</last><affiliation>Google</affiliation></author>
+      <author id="benjamin-borschinger"><first>Benjamin</first><last>Börschinger</last><affiliation>Google</affiliation></author>
       <author><first>Tal</first><last>Schuster</last><affiliation>Google</affiliation></author>
       <pages>291-305</pages>
       <abstract>The predictions of question answering (QA) systems are typically evaluated against manually annotated finite sets of one or more answers. This leads to a coverage limitation that results in underestimating the true performance of systems, and is typically addressed by extending over exact match (EM) with predefined rules or with the token-level F1 measure.In this paper, we present the first systematic conceptual and data-driven analysis to examine the shortcomings of token-level equivalence measures.To this end, we define the asymmetric notion of answer equivalence (AE), accepting answers that are equivalent to or improve over the reference, and publish over 23k human judgements for candidates produced by multiple QA systems on SQuAD.Through a careful analysis of this data, we reveal and quantify several concrete limitations of the F1 measure, such as a false impression of graduality, or missing dependence on the question.Since collecting AE annotations for each evaluated model is expensive, we learn a BERT matching (BEM) measure to approximate this task. Being a simpler task than QA, we find BEM to provide significantly better AE approximations than F1, and to more accurately reflect the performance of systems.Finally, we demonstrate the practical utility of AE and BEM on the concrete application of minimal accurate prediction sets, reducing the number of required answers by up to X2.6.</abstract>
@@ -354,7 +354,7 @@
     <paper id="26">
       <title>Translation between Molecules and Natural Language</title>
       <author><first>Carl</first><last>Edwards</last><affiliation>University of Illinois, Urbana-Champaign</affiliation></author>
-      <author><first>Tuan</first><last>Lai</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
+      <author id="tuan-lai"><first>Tuan</first><last>Lai</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
       <author><first>Kevin</first><last>Ros</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
       <author><first>Garrett</first><last>Honke</last><affiliation>X, the Moonshot Factory</affiliation></author>
       <author><first>Kyunghyun</first><last>Cho</last><affiliation>New York University</affiliation></author>
@@ -382,8 +382,8 @@
     <paper id="28">
       <title>Sentence-Incremental Neural Coreference Resolution</title>
       <author><first>Matt</first><last>Grenander</last><affiliation>University of Edinburgh</affiliation></author>
-      <author><first>Shay B.</first><last>Cohen</last><affiliation>University of Edinburgh</affiliation></author>
-      <author><first>Mark</first><last>Steedman</last><affiliation>University of Edinburgh</affiliation></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last><affiliation>University of Edinburgh</affiliation></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last><affiliation>University of Edinburgh</affiliation></author>
       <pages>427-443</pages>
       <abstract>We propose a sentence-incremental neural coreference resolution system which incrementally builds clusters after marking mention boundaries in a shift-reduce method. The system is aimed at bridging two recent approaches at coreference resolution: (1) state-of-the-art non-incremental models that incur quadratic complexity in document length with high computational cost, and (2) memory network-based models which operate incrementally but do not generalize beyond pronouns. For comparison, we simulate an incremental setting by constraining non-incremental systems to form partial coreference chains before observing new sentences. In this setting, our system outperforms comparable state-of-the-art methods by 2 F1 on OntoNotes and 6.8 F1 on the CODI-CRAC 2021 corpus. In a conventional coreference setup, our system achieves 76.3 F1 on OntoNotes and 45.5 F1 on CODI-CRAC 2021, which is comparable to state-of-the-art baselines. We also analyze variations of our system and show that the degree of incrementality in the encoder has a surprisingly large effect on the resulting performance.</abstract>
       <url hash="7cb72175">2022.emnlp-main.28</url>
@@ -453,7 +453,7 @@
       <author><first>Yi-Ting</first><last>Yeh</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Shikib</first><last>Mehri</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Maxine</first><last>Eskenazi</last><affiliation>Carnegie Mellon University</affiliation></author>
-      <author><first>Jeffrey</first><last>Bigham</last><affiliation>CMU/Apple</affiliation></author>
+      <author id="jeffrey-p-bigham"><first>Jeffrey</first><last>Bigham</last><affiliation>CMU/Apple</affiliation></author>
       <pages>505-525</pages>
       <abstract>Instruction tuning is an emergent paradigm in NLP wherein natural language instructions are leveraged with language models to induce zero-shot performance on unseen tasks. Dialogue is an especially interesting area in which to explore instruction tuning because dialogue systems perform multiple kinds of tasks related to language (e.g., natural language understanding and generation, domain-specific interaction), yet instruction tuning has not been systematically explored for dialogue-related tasks. We introduce InstructDial, an instruction tuning framework for dialogue, which consists of a repository of 48 diverse dialogue tasks in a unified text-to-text format created from 59 openly available dialogue datasets. We explore cross-task generalization ability on models tuned on InstructDial across diverse dialogue tasks. Our analysis reveals that InstructDial enables good zero-shot performance on unseen datasets and tasks such as dialogue evaluation and intent detection, and even better performance in a few-shot setting. To ensure that models adhere to instructions, we introduce novel meta-tasks. We establish benchmark zero-shot and few-shot performance of models trained using the proposed framework on multiple dialogue tasks.</abstract>
       <url hash="11578ac8">2022.emnlp-main.33</url>
@@ -547,19 +547,19 @@
       <author><first>Chien-Sheng</first><last>Wu</last><affiliation>Salesforce</affiliation></author>
       <author><first>Ming</first><last>Zhong</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
       <author><first>Pengcheng</first><last>Yin</last><affiliation>Carnegie Mellon University</affiliation></author>
-      <author><first>Sida I.</first><last>Wang</last><affiliation>Facebook AI Research</affiliation></author>
+      <author id="sida-i-wang"><first>Sida I.</first><last>Wang</last><affiliation>Facebook AI Research</affiliation></author>
       <author><first>Victor</first><last>Zhong</last><affiliation>University of Washington</affiliation></author>
       <author><first>Bailin</first><last>Wang</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <author><first>Chengzu</first><last>Li</last><affiliation>Shanghai AI Lab</affiliation></author>
       <author><first>Connor</first><last>Boyle</last><affiliation>University of Washington</affiliation></author>
       <author><first>Ansong</first><last>Ni</last><affiliation>Yale University</affiliation></author>
       <author><first>Ziyu</first><last>Yao</last><affiliation>George Mason University</affiliation></author>
-      <author><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
       <author><first>Caiming</first><last>Xiong</last><affiliation>Salesforce</affiliation></author>
       <author><first>Lingpeng</first><last>Kong</last><affiliation>The University of Hong Kong</affiliation></author>
       <author><first>Rui</first><last>Zhang</last><affiliation>Penn State University</affiliation></author>
-      <author><first>Noah A.</first><last>Smith</last><affiliation>University of Washington</affiliation></author>
-      <author><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington; Meta</affiliation></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last><affiliation>University of Washington</affiliation></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington; Meta</affiliation></author>
       <author><first>Tao</first><last>Yu</last><affiliation>University of Washington</affiliation></author>
       <pages>602-631</pages>
       <abstract>Structured knowledge grounding (SKG) leverages structured knowledge to complete user requests, such as semantic parsing over databases and question answering over knowledge bases. Since the inputs and outputs of SKG tasks are heterogeneous, they have been studied separately by different communities, which limits systematic and compatible research on SKG. In this paper, we overcome this limitation by proposing the UnifiedSKG framework, which unifies 21 SKG tasks into a text-to-text format, aiming to promote systematic SKG research, instead of being exclusive to a single task, domain, or dataset. We use UnifiedSKG to benchmark T5 with different sizes and show that T5, with simple modifications when necessary, achieves state-of-the-art performance on almost all of the 21 tasks. We further demonstrate that multi-task prefix-tuning improves the performance on most tasks, largely improving the overall performance. UnifiedSKG also facilitates the investigation of zero-shot and few-shot learning, and we show that T0, GPT-3, and Codex struggle in zero-shot and few-shot learning for SKG. We also use UnifiedSKG to conduct a series of controlled experiments on structured knowledge encoding variants across SKG tasks. UnifiedSKG is easily extensible to more tasks, and it is open-sourced at https://github.com/hkunlp/unifiedskg.</abstract>
@@ -570,9 +570,9 @@
     </paper>
     <paper id="40">
       <title>Balanced Adversarial Training: Balancing Tradeoffs between Fickleness and Obstinacy in <fixed-case>NLP</fixed-case> Models</title>
-      <author><first>Hannah</first><last>Chen</last><affiliation>University of Virginia</affiliation></author>
+      <author id="hannah-cyberey"><first>Hannah</first><last>Chen</last><affiliation>University of Virginia</affiliation></author>
       <author><first>Yangfeng</first><last>Ji</last><affiliation>University of Virginia</affiliation></author>
-      <author><first>David</first><last>Evans</last><affiliation>University of Virginia</affiliation></author>
+      <author id="david-k-evans"><first>David</first><last>Evans</last><affiliation>University of Virginia</affiliation></author>
       <pages>632-647</pages>
       <abstract>Traditional (fickle) adversarial examples involve finding a small perturbation that does not change an input’s true label but confuses the classifier into outputting a different prediction. Conversely, obstinate adversarial examples occur when an adversary finds a small perturbation that preserves the classifier’s prediction but changes the true label of an input.Adversarial training and certified robust training have shown some effectiveness in improving the robustness of machine learnt models to fickle adversarial examples. We show that standard adversarial training methods focused on reducing vulnerability to fickle adversarial examples may make a model more vulnerable to obstinate adversarial examples, with experiments for both natural language inference and paraphrase identification tasks. To counter this phenomenon, we introduce Balanced Adversarial Training, which incorporates contrastive learning to increase robustness against both fickle and obstinate adversarial examples.</abstract>
       <url hash="84d84db6">2022.emnlp-main.40</url>
@@ -597,7 +597,7 @@
       <title>Generative Language Models for Paragraph-Level Question Generation</title>
       <author><first>Asahi</first><last>Ushio</last><affiliation>Cardiff University</affiliation></author>
       <author><first>Fernando</first><last>Alva-Manchego</last><affiliation>Cardiff University</affiliation></author>
-      <author><first>Jose</first><last>Camacho-Collados</last><affiliation>Cardiff University</affiliation></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last><affiliation>Cardiff University</affiliation></author>
       <pages>670-688</pages>
       <abstract>Powerful generative models have led to recent progress in question generation (QG). However, it is difficult to measure advances in QG research since there are no standardized resources that allow a uniform comparison among approaches. In this paper, we introduce QG-Bench, a multilingual and multidomain benchmark for QG that unifies existing question answering datasets by converting them to a standard QG setting. It includes general-purpose datasets such as SQuAD for English, datasets from ten domains and two styles, as well as datasets in eight different languages. Using QG-Bench as a reference, we perform an extensive analysis of the capabilities of language models for the task. First, we propose robust QG baselines based on fine-tuning generative language models. Then, we complement automatic evaluation based on standard metrics with an extensive manual evaluation, which in turn sheds light on the difficulty of evaluating QG models. Finally, we analyse both the domain adaptability of these models as well as the effectiveness of multilingual models in languages other than English.QG-Bench is released along with the fine-tuned models presented in the paper (https://github.com/asahi417/lm-question-generation), which are also available as a demo (https://autoqg.net/).</abstract>
       <url hash="3f32331f">2022.emnlp-main.42</url>
@@ -607,7 +607,7 @@
     </paper>
     <paper id="43">
       <title>A Unified Encoder-Decoder Framework with Entity Memory</title>
-      <author id="zhihan-zhang"><first>Zhihan</first><last>Zhang</last><affiliation>University of Notre Dame</affiliation></author>
+      <author><first>Zhihan</first><last>Zhang</last><affiliation>University of Notre Dame</affiliation></author>
       <author><first>Wenhao</first><last>Yu</last><affiliation>University of Notre Dame</affiliation></author>
       <author><first>Chenguang</first><last>Zhu</last><affiliation>Microsoft Cognitive Services Research Group</affiliation></author>
       <author><first>Meng</first><last>Jiang</last><affiliation>University of Notre Dame</affiliation></author>
@@ -651,7 +651,7 @@
       <author><first>Yingjun</first><last>Mou</last><affiliation>Georgia Institute of Technology</affiliation></author>
       <author><first>Xiang</first><last>Chen</last><affiliation>Adobe Research</affiliation></author>
       <author><first>Le</first><last>Song</last><affiliation>MBZUAI</affiliation></author>
-      <author><first>Chao</first><last>Zhang</last><affiliation>Georgia Tech</affiliation></author>
+      <author id="chao-zhang-tu"><first>Chao</first><last>Zhang</last><affiliation>Georgia Tech</affiliation></author>
       <pages>730-744</pages>
       <abstract>We study the problem of extracting N-ary relation tuples from scientific articles. This task is challenging because the target knowledge tuples can reside in multiple parts and modalities of the document. Our proposed method ReSel decomposes this task into a two-stage procedure that first retrieves the most relevant paragraph/table and then selects the target entity from the retrieved component. For the high-level retrieval stage, ReSel designs a simple and effective feature set, which captures multi-level lexical and semantic similarities between the query and components. For the low-level selection stage, ReSel designs a cross-modal entity correlation graph along with a multi-view architecture, which models both semantic and document-structural relations between entities. Our experiments on three scientific information extraction datasets show that ReSel outperforms state-of-the-art baselines significantly.</abstract>
       <url hash="ace5e963">2022.emnlp-main.46</url>
@@ -661,7 +661,7 @@
     </paper>
     <paper id="47">
       <title><fixed-case>G</fixed-case>amma<fixed-case>E</fixed-case>: Gamma Embeddings for Logical Queries on Knowledge Graphs</title>
-      <author><first>Dong</first><last>Yang</last><affiliation>OPPO Guangdong Mobile Telecommunications Co., Ltd.</affiliation></author>
+      <author id="dong-yang"><first>Dong</first><last>Yang</last><affiliation>OPPO Guangdong Mobile Telecommunications Co., Ltd.</affiliation></author>
       <author><first>Peijun</first><last>Qing</last><affiliation>Xidian University</affiliation></author>
       <author><first>Yang</first><last>Li</last><affiliation>The Hong Kong Polytechnic University</affiliation></author>
       <author><first>Haonan</first><last>Lu</last><affiliation>OPPO Guangdong Mobile Telecommunications Co., Ltd.</affiliation></author>
@@ -709,9 +709,9 @@
       <author><first>Yifan</first><last>Chen</last><affiliation>University of Illinois Urbana-Champaign</affiliation></author>
       <author><first>Devamanyu</first><last>Hazarika</last><affiliation>Amazon</affiliation></author>
       <author><first>Mahdi</first><last>Namazifar</last><affiliation>Amazon</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>Amazon</affiliation></author>
+      <author><first>Yang</first><last>Liu</last><affiliation>Amazon</affiliation></author>
       <author><first>Di</first><last>Jin</last><affiliation>Amazon</affiliation></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last><affiliation>Amazon Alexa AI</affiliation></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last><affiliation>Amazon Alexa AI</affiliation></author>
       <pages>793-808</pages>
       <abstract>Prefix-tuning, or more generally continuous prompt tuning, has become an essential paradigm of parameter-efficient transfer learning. Using a large pre-trained language model (PLM), prefix-tuning can obtain strong performance by training only a small portion of parameters. In this paper, we propose to understand and further develop prefix-tuning through the kernel lens. Specifically, we make an analogy between <i>prefixes</i> and <i>inducing variables</i> in kernel methods and hypothesize that <i>prefixes</i> serving as <i>inducing variables</i> would improve their overall mechanism. From the kernel estimator perspective, we suggest a new variant of prefix-tuning—<i>inducer-tuning</i>, which shares the exact mechanism as prefix-tuning while leveraging the residual form found in adapter-tuning. This mitigates the initialization issue in prefix-tuning. Through comprehensive empirical experiments on natural language understanding and generation tasks, we demonstrate that inducer-tuning can close the performance gap between prefix-tuning and fine-tuning.</abstract>
       <url hash="3ac7e3a6">2022.emnlp-main.50</url>
@@ -724,7 +724,7 @@
       <author><first>Puneet</first><last>Mathur</last><affiliation>University of Maryland College Park</affiliation></author>
       <author><first>Gautam</first><last>Kunapuli</last><affiliation>Verisk Analytics</affiliation></author>
       <author><first>Riyaz</first><last>Bhat</last><affiliation>IBM IRL</affiliation></author>
-      <author><first>Manish</first><last>Shrivastava</last><affiliation>International Institute of Information Technology Hyderabad</affiliation></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last><affiliation>International Institute of Information Technology Hyderabad</affiliation></author>
       <author><first>Dinesh</first><last>Manocha</last><affiliation>University of Maryland</affiliation></author>
       <author><first>Maneesh</first><last>Singh</last><affiliation>Motive Technologies</affiliation></author>
       <pages>809-824</pages>
@@ -736,10 +736,10 @@
     </paper>
     <paper id="52">
       <title><fixed-case>L</fixed-case>ight<fixed-case>EA</fixed-case>: A Scalable, Robust, and Interpretable Entity Alignment Framework via Three-view Label Propagation</title>
-      <author><first>Xin</first><last>Mao</last><affiliation>ECNU</affiliation></author>
+      <author id="xinnian-mao"><first>Xin</first><last>Mao</last><affiliation>ECNU</affiliation></author>
       <author><first>Wenting</first><last>Wang</last><affiliation>Bytedance Group</affiliation></author>
       <author><first>Yuanbin</first><last>Wu</last><affiliation>East China Normal University</affiliation></author>
-      <author><first>Man</first><last>Lan</last><affiliation>East China Normal University</affiliation></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last><affiliation>East China Normal University</affiliation></author>
       <pages>825-838</pages>
       <abstract>Entity Alignment (EA) aims to find equivalent entity pairs between KGs, which is the core step to bridging and integrating multi-source KGs. In this paper, we argue that existing complex EA methods inevitably inherit the inborn defects from their neural network lineage: poor interpretability and weak scalability. Inspired by recent studies, we reinvent the classical Label Propagation algorithm to effectively run on KGs and propose a neural-free EA framework — LightEA, consisting of three efficient components: (i) Random Orthogonal Label Generation, (ii) Three-view Label Propagation, and (iii) Sparse Sinkhorn Operation.According to the extensive experiments on public datasets, LightEA has impressive scalability, robustness, and interpretability. With a mere tenth of time consumption, LightEA achieves comparable results to state-of-the-art methods across all datasets and even surpasses them on many. Besides, due to the computational process of LightEA being entirely linear, we could trace the propagation process at each step and clearly explain how the entities are aligned.</abstract>
       <url hash="33aabf75">2022.emnlp-main.52</url>
@@ -908,7 +908,7 @@
       <author><first>Machel</first><last>Reid</last><affiliation>Google</affiliation></author>
       <author><first>Victor</first><last>Zhong</last><affiliation>University of Washington</affiliation></author>
       <author><first>Suchin</first><last>Gururangan</last><affiliation>Paul G. Allen School of Computer Science; Meta AI</affiliation></author>
-      <author><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington; Meta</affiliation></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington; Meta</affiliation></author>
       <pages>964-975</pages>
       <abstract>We present M2D2, a fine-grained, massively multi-domain corpus for studying domain adaptation in language models (LMs). M2D2 consists of 8.5B tokens and spans 145 domains extracted from Wikipedia and Semantic Scholar. Using ontologies derived from Wikipedia and ArXiv categories, we organize the domains in each data source into 22 groups. This two-level hierarchy enables the study of relationships between domains and their effects on in- and out-of-domain performance after adaptation. We also present a number of insights into the nature of effective domain adaptation in LMs, as examples of the new types of studies M2D2 enables. To improve in-domain performance, we show the benefits of adapting the LM along a domain hierarchy; adapting to smaller amounts of fine-grained domain-specific data can lead to larger in-domain performance gains than larger amounts of weakly relevant data. We further demonstrate a trade-off between in-domain specialization and out-of-domain generalization within and across ontologies, as well as a strong correlation between out-of-domain performance and lexical overlap between domains.</abstract>
       <url hash="d4326df9">2022.emnlp-main.63</url>
@@ -975,8 +975,8 @@
       <title><fixed-case>ELMER</fixed-case>: A Non-Autoregressive Pre-trained Language Model for Efficient and Effective Text Generation</title>
       <author><first>Junyi</first><last>Li</last><affiliation>Gaoling School of Artificial Intelligence, Renmin University of China</affiliation></author>
       <author><first>Tianyi</first><last>Tang</last><affiliation>Renmin University of China</affiliation></author>
-      <author><first>Wayne Xin</first><last>Zhao</last><affiliation>RUC</affiliation></author>
-      <author><first>Jian-Yun</first><last>Nie</last><affiliation>University of Montreal</affiliation></author>
+      <author id="wayne-xin-zhao"><first>Wayne Xin</first><last>Zhao</last><affiliation>RUC</affiliation></author>
+      <author id="jian-yun-nie"><first>Jian-Yun</first><last>Nie</last><affiliation>University of Montreal</affiliation></author>
       <author><first>Ji-Rong</first><last>Wen</last><affiliation>Renmin University of China</affiliation></author>
       <pages>1044-1058</pages>
       <abstract>We study the text generation task under the approach of pre-trained language models (PLMs). Typically, an auto-regressive (AR) method is adopted for generating texts in a token-by-token manner. Despite many advantages of AR generation, it usually suffers from inefficient inference. Therefore, non-autoregressive (NAR) models are proposed to generate all target tokens simultaneously. However, NAR models usually generate texts of lower quality due to the absence of token dependency in the output text. In this paper, we propose ELMER: an efficient and effective PLM for NAR text generation to explicitly model the token dependency during NAR generation. By leveraging the early exit technique, ELMER enables the token generations at different layers, according to their prediction confidence (a more confident token will exit at a lower layer). Besides, we propose a novel pre-training objective, Layer Permutation Language Modeling, to pre-train ELMER by permuting the exit layer for each token in sequences. Experiments on three text generation tasks show that ELMER significantly outperforms NAR models and further narrows the performance gap with AR PLMs (ELMER (29.92) vs BART (30.61) ROUGE-L in XSUM) while achieving over 10 times inference speedup.</abstract>
@@ -1042,7 +1042,7 @@
       <author><first>Alon</first><last>Halfon</last><affiliation>IBM Research</affiliation></author>
       <author><first>Eyal</first><last>Shnarch</last><affiliation>IBM Research</affiliation></author>
       <author><first>Yotam</first><last>Perlitz</last><affiliation>IBM</affiliation></author>
-      <author><first>Liat</first><last>Ein-Dor</last><affiliation>IBM Research</affiliation></author>
+      <author id="liat-ein-dor"><first>Liat</first><last>Ein-Dor</last><affiliation>IBM Research</affiliation></author>
       <author><first>Noam</first><last>Slonim</last><affiliation>IBM Research</affiliation></author>
       <pages>1107-1119</pages>
       <abstract>Recent advances in large pretrained language models have increased attention to zero-shot text classification. In particular, models finetuned on natural language inference datasets have been widely adopted as zero-shot classifiers due to their promising results and off-the-shelf availability. However, the fact that such models are unfamiliar with the target task can lead to instability and performance issues. We propose a plug-and-play method to bridge this gap using a simple self-training approach, requiring only the class names along with an unlabeled dataset, and without the need for domain expertise or trial and error. We show that fine-tuning the zero-shot classifier on its most confident predictions leads to significant performance gains across a wide range of text classification tasks, presumably since self-training adapts the zero-shot model to the task at hand.</abstract>
@@ -1070,7 +1070,7 @@
       <author><first>Richard Yuanzhe</first><last>Pang</last><affiliation>New York University</affiliation></author>
       <author><first>Angelica</first><last>Chen</last><affiliation>New York University</affiliation></author>
       <author><first>Jason</first><last>Phang</last><affiliation>New York University</affiliation></author>
-      <author><first>Samuel R.</first><last>Bowman</last><affiliation>New York University</affiliation></author>
+      <author id="samuel-bowman"><first>Samuel R.</first><last>Bowman</last><affiliation>New York University</affiliation></author>
       <pages>1139-1156</pages>
       <abstract>Summarization datasets are often assembled either by scraping naturally occurring public-domain summaries—which are nearly always in difficult-to-work-with technical domains—or by using approximate heuristics to extract them from everyday text—which frequently yields unfaithful summaries. In this work, we turn to a slower but more straightforward approach to developing summarization benchmark data: We hire highly-qualified contractors to read stories and write original summaries from scratch. To amortize reading time, we collect five summaries per document, with the first giving an overview and the subsequent four addressing specific questions. We use this protocol to collect SQuALITY, a dataset of question-focused summaries built on the same public-domain short stories as the multiple-choice dataset QuALITY (Pang et al., 2021). Experiments with state-of-the-art summarization systems show that our dataset is challenging and that existing automatic evaluation metrics are weak indicators of quality.</abstract>
       <url hash="918d1bec">2022.emnlp-main.75</url>
@@ -1129,8 +1129,8 @@
       <author><first>Niket</first><last>Tandon</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Tanvi</first><last>Aggarwal</last><affiliation>Stony Brook University</affiliation></author>
       <author><first>Horace</first><last>Liu</last><affiliation>Stony Brook University</affiliation></author>
-      <author><first>Nathanael</first><last>Chambers</last><affiliation>US Naval Academy</affiliation></author>
-      <author><first>Raymond</first><last>Mooney</last><affiliation>University of Texas at Austin</affiliation></author>
+      <author id="nathanael-chambers"><first>Nathanael</first><last>Chambers</last><affiliation>US Naval Academy</affiliation></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last><affiliation>University of Texas at Austin</affiliation></author>
       <author><first>Niranjan</first><last>Balasubramanian</last><affiliation>Stony Brook University</affiliation></author>
       <pages>1204-1219</pages>
       <abstract>Answering questions in narratives about why events happened often requires commonsense knowledge external to the text. What aspects of this knowledge are available in large language models? What aspects can be made accessible via external commonsense resources? We study these questions in the context of answering questions in the TellMeWhy dataset using COMET as a source of relevant commonsense relations. We analyze the effects of model size (T5 and GPT3) along with methods of injecting knowledge (COMET) into these models. Results show that the largest models, as expected, yield substantial improvements over base models. Injecting external knowledge helps models of various sizes, but the amount of improvement decreases with larger model size. We also find that the format in which knowledge is provided is critical, and that smaller models benefit more from larger amounts of knowledge. Finally, we develop an ontology of knowledge types and analyze the relative coverage of the models across these categories.</abstract>
@@ -1193,7 +1193,7 @@
       <author><first>Ziqiao</first><last>Ma</last><affiliation>University of Michigan</affiliation></author>
       <author><first>Keunwoo</first><last>Yu</last><affiliation>University Of Michigan</affiliation></author>
       <author><first>Yuwei</first><last>Bao</last><affiliation>University of Michigan</affiliation></author>
-      <author><first>Joyce</first><last>Chai</last><affiliation>University of Michigan</affiliation></author>
+      <author id="joyce-chai"><first>Joyce</first><last>Chai</last><affiliation>University of Michigan</affiliation></author>
       <pages>1280-1298</pages>
       <abstract>Recent years have seen an increasing amount of work on embodied AI agents that can perform tasks by following human language instructions. However, most of these agents are reactive, meaning that they simply learn and imitate behaviors encountered in the training data. These reactive agents are insufficient for long-horizon complex tasks. To address this limitation, we propose a neuro-symbolic deliberative agent that, while following language instructions, proactively applies reasoning and planning based on its neural and symbolic representations acquired from past experience (e.g., natural language and egocentric vision). We show that our deliberative agent achieves greater than 70% improvement over reactive baselines on the challenging TEACh benchmark. Moreover, the underlying reasoning and planning processes, together with our modular framework, offer impressive transparency and explainability to the behaviors of the agent. This enables an in-depth understanding of the agent’s capabilities, which shed light on challenges and opportunities for future embodied agents for instruction following. The code is available at https://github.com/sled-group/DANLI.</abstract>
       <url hash="932681ae">2022.emnlp-main.83</url>
@@ -1277,7 +1277,7 @@
       <title>Geographic Citation Gaps in <fixed-case>NLP</fixed-case> Research</title>
       <author><first>Mukund</first><last>Rungta</last><affiliation>Georgia Institute of Technology</affiliation></author>
       <author><first>Janvijay</first><last>Singh</last><affiliation>Georgia Institute of Technology</affiliation></author>
-      <author><first>Saif M.</first><last>Mohammad</last><affiliation>National Research Council Canada</affiliation></author>
+      <author id="saif-mohammad"><first>Saif M.</first><last>Mohammad</last><affiliation>National Research Council Canada</affiliation></author>
       <author><first>Diyi</first><last>Yang</last><affiliation>Stanford University</affiliation></author>
       <pages>1371-1383</pages>
       <abstract>In a fair world, people have equitable opportunities to education, to conduct scientific research, to publish, and to get credit for their work, regardless of where they live. However, it is common knowledge among researchers that a vast number of papers accepted at top NLP venues come from a handful of western countries and (lately) China; whereas, very few papers from Africa and South America get published. Similar disparities are also believed to exist for paper citation counts. In the spirit of “what we do not measure, we cannot improve”, this work asks a series of questions on the relationship between geographical location and publication success (acceptance in top NLP venues and citation impact). We first created a dataset of 70,000 papers from the ACL Anthology, extracted their meta-information, andgenerated their citation network. We then show that not only are there substantial geographical disparities in paper acceptance and citation but also that these disparities persist even when controlling for a number of variables such as venue of publication and sub-field of NLP. Further, despite some steps taken by the NLP community to improve geographical diversity, we show that the disparity in publication metrics across locations is still on an increasing trend since the early 2000s. We release our code and dataset here: https://github.com/iamjanvijay/acl-cite-net</abstract>
@@ -1362,7 +1362,7 @@
       <author><first>Yue</first><last>Yu</last><affiliation>Georgia Institute of Technology</affiliation></author>
       <author><first>Chenyan</first><last>Xiong</last><affiliation>Microsoft Research</affiliation></author>
       <author><first>Si</first><last>Sun</last><affiliation>Tsinghua University</affiliation></author>
-      <author><first>Chao</first><last>Zhang</last><affiliation>Georgia Tech</affiliation></author>
+      <author id="chao-zhang-tu"><first>Chao</first><last>Zhang</last><affiliation>Georgia Tech</affiliation></author>
       <author><first>Arnold</first><last>Overwijk</last><affiliation>Microsoft</affiliation></author>
       <pages>1462-1479</pages>
       <abstract>We present a new zero-shot dense retrieval (ZeroDR) method, COCO-DR, to improve the generalization ability of dense retrieval by combating the distribution shifts between source training tasks and target scenarios. To mitigate the impact of document differences, COCO-DR continues pretraining the language model on the target corpora to adapt the model to target distributions via COtinuous COtrastive learning. To prepare for unseen target queries, COCO-DR leverages implicit Distributionally Robust Optimization (iDRO) to reweight samples from different source query clusters for improving model robustness over rare queries during fine-tuning. COCO-DR achieves superior average performance on BEIR, the zero-shot retrieval benchmark. At BERT_Base scale, COCO-DR Base outperforms other ZeroDR models with 60x larger size. At BERT_Large scale, COCO-DR Large outperforms the giant GPT-3 embedding model which has 500x more parameters. Our analysis shows the correlation between COCO-DR’s effectiveness in combating distribution shifts and improving zero-shot accuracy. Our code and model can be found at <url>https://github.com/OpenMatch/COCO-DR</url>.</abstract>
@@ -1376,8 +1376,8 @@
       <author><first>Liliang</first><last>Ren</last><affiliation>University of Illinois, Urbana Champaign</affiliation></author>
       <author><first>Zixuan</first><last>Zhang</last><affiliation>University of Illinois Urbana-Champaign</affiliation></author>
       <author><first>Han</first><last>Wang</last><affiliation>Amazon</affiliation></author>
-      <author><first>Clare</first><last>Voss</last><affiliation>Army Research Laboratory</affiliation></author>
-      <author><first>ChengXiang</first><last>Zhai</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
+      <author id="clare-voss"><first>Clare</first><last>Voss</last><affiliation>Army Research Laboratory</affiliation></author>
+      <author id="chengxiang-zhai"><first>ChengXiang</first><last>Zhai</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
       <author><first>Heng</first><last>Ji</last><affiliation>University of Illinois at Urbana-Champaign and Amazon (Amazon Scholar)</affiliation></author>
       <pages>1480-1494</pages>
       <abstract>Modern large-scale Pre-trained Language Models (PLMs) have achieved tremendous success on a wide range of downstream tasks. However, most of the LM pre-training objectives only focus on text reconstruction, but have not sought to learn latent-level interpretable representations of sentences. In this paper, we manage to push the language models to obtain a deeper understanding of sentences by proposing a new pre-training objective, Sparse Latent Typing, which enables the model to sparsely extract sentence-level keywords with diverse latent types. Experimental results show that our model is able to learn interpretable latent type categories in a self-supervised manner without using any external knowledge. Besides, the language model pre-trained with such an objective also significantly improves Information Extraction related downstream tasks in both supervised and few-shot settings. Our code is publicly available at https://github.com/renll/SparseLT.</abstract>
@@ -1434,7 +1434,7 @@
       <author><first>Emmanuelle</first><last>Salin</last><affiliation>LIS, Aix Marseille Université</affiliation></author>
       <author><first>Stephane</first><last>Ayache</last><affiliation>Aix-Marseille University</affiliation></author>
       <author><first>Abdellah</first><last>Fourtassi</last><affiliation>Aix-Marseille University</affiliation></author>
-      <author><first>Benoit</first><last>Favre</last><affiliation>Aix-Marseille University LIS/CNRS</affiliation></author>
+      <author id="benoit-favre"><first>Benoit</first><last>Favre</last><affiliation>Aix-Marseille University LIS/CNRS</affiliation></author>
       <pages>1538-1555</pages>
       <abstract>Recent advances in vision-and-language modeling have seen the development of Transformer architectures that achieve remarkable performance on multimodal reasoning tasks.Yet, the exact capabilities of these black-box models are still poorly understood. While much of previous work has focused on studying their ability to learn meaning at the word-level, their ability to track syntactic dependencies between words has received less attention.We take a first step in closing this gap by creating a new multimodal task targeted at evaluating understanding of predicate-noun dependencies in a controlled setup.We evaluate a range of state-of-the-art models and find that their performance on the task varies considerably, with some models performing relatively well and others at chance level. In an effort to explain this variability, our analyses indicate that the quality (and not only sheer quantity) of pretraining data is essential. Additionally, the best performing models leverage fine-grained multimodal pretraining objectives in addition to the standard image-text matching objectives.This study highlights that targeted and controlled evaluations are a crucial step for a precise and rigorous test of the multimodal knowledge of vision-and-language models.</abstract>
       <url hash="71702b8e">2022.emnlp-main.100</url>
@@ -1459,7 +1459,7 @@
       <author><first>Silvia</first><last>Severini</last><affiliation>Ludwig-Maximilians-Universität</affiliation></author>
       <author><first>Masoud</first><last>Jalili Sabet</last><affiliation>Center for Information and Speech Processing, Ludwig Maximilian University of Munich</affiliation></author>
       <author><first>François</first><last>Yvon</last><affiliation>LISN CNRS &amp; Univ. Paris Saclay</affiliation></author>
-      <author><first>Hinrich</first><last>Schütze</last><affiliation>Center for Information and Language Processing, University of Munich</affiliation></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last><affiliation>Center for Information and Language Processing, University of Munich</affiliation></author>
       <pages>1577-1589</pages>
       <abstract>Part-of-Speech (POS) tagging is an important component of the NLP pipeline, but many low-resource languages lack labeled data for training. An established method for training a POS tagger in such a scenario is to create a labeled training set by transferring from high-resource languages. In this paper, we propose a novel method for transferring labels from multiple high-resource source to low-resource target languages. We formalize POS tag projection as graph-based label propagation. Given translations of a sentence in multiple languages, we create a graph with words as nodes and alignment links as edges by aligning words for all language pairs. We then propagate node labels from source to target using a Graph Neural Network augmented with transformer layers. We show that our propagation creates training sets that allow us to train POS taggers for a diverse set of languages. When combined with enhanced contextualized embeddings, our method achieves a new state-of-the-art for unsupervised POS tagging of low-resource languages.</abstract>
       <url hash="7a70221e">2022.emnlp-main.102</url>
@@ -1559,7 +1559,7 @@
       <title>Learning Label Modular Prompts for Text Classification in the Wild</title>
       <author><first>Hailin</first><last>Chen</last><affiliation>NTU</affiliation></author>
       <author><first>Amrita</first><last>Saha</last><affiliation>Salesforce Research</affiliation></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University; Salesforce AI Research</affiliation></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University; Salesforce AI Research</affiliation></author>
       <author><first>Steven C.H.</first><last>Hoi</last><affiliation>Salesforce</affiliation></author>
       <pages>1677-1690</pages>
       <abstract>Machine learning models usually assume i.i.d data during training and testing, but data and tasks in real world often change over time. To emulate the transient nature of real world, we propose a challenging but practical task: text classification in-the-wild, which introduces different non-stationary training/testing stages. Decomposing a complex task into modular components can enable robust generalisation under such non-stationary environment. However, current modular approaches in NLP do not take advantage of recent advances in parameter efficient tuning of pretrained language models. To close this gap, we propose ModularPrompt, a label-modular prompt tuning framework for text classification tasks. In ModularPrompt, the input prompt consists of a sequence of soft label prompts, each encoding modular knowledge related to the corresponding class label. In two of most formidable settings, ModularPrompt outperforms relevant baselines by a large margin demonstrating strong generalisation ability. We also conduct comprehensive analysis to validate whether the learned prompts satisfy properties of a modular representation.</abstract>
@@ -1645,7 +1645,7 @@
       <author><first>Ananth</first><last>Agarwal</last><affiliation>Stanford University</affiliation></author>
       <author><first>Patrick</first><last>Liu</last><affiliation>Stanford University</affiliation></author>
       <author><first>Chelsea</first><last>Finn</last><affiliation>Stanford University</affiliation></author>
-      <author><first>Christopher</first><last>Manning</last><affiliation>Stanford University</affiliation></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last><affiliation>Stanford University</affiliation></author>
       <pages>1754-1768</pages>
       <abstract>While large pre-trained language models are powerful, their predictions often lack logical consistency across test inputs. For example, a state-of-the-art Macaw question-answering (QA) model answers &lt;i&gt;Yes&lt;/i&gt; to &lt;i&gt;Is a sparrow a bird?&lt;/i&gt; and &lt;i&gt;Does a bird have feet?&lt;/i&gt; but answers &lt;i&gt;No&lt;/i&gt; to &lt;i&gt;Does a sparrow have feet?&lt;/i&gt;. To address this failure mode, we propose a framework, Consistency Correction through Relation Detection, or &lt;b&gt;ConCoRD&lt;/b&gt;, for boosting the consistency and accuracy of pre-trained NLP models using pre-trained natural language inference (NLI) models without fine-tuning or re-training. Given a batch of test inputs, ConCoRD samples several candidate outputs for each input and instantiates a factor graph that accounts for both the model’s belief about the likelihood of each answer choice in isolation and the NLI model’s beliefs about pair-wise answer choice compatibility. We show that a weighted MaxSAT solver can efficiently compute high-quality answer choices under this factor graph, improving over the raw model’s predictions. Our experiments demonstrate that ConCoRD consistently boosts accuracy and consistency of off-the-shelf closed-book QA and VQA models using off-the-shelf NLI models, notably increasing accuracy of LXMERT on ConVQA by 5% absolute. See the project website (https://ericmitchell.ai/emnlp-2022-concord/) for code and data.</abstract>
       <url hash="b1d50d7a">2022.emnlp-main.115</url>
@@ -1697,7 +1697,7 @@
       <author><first>Fatemehsadat</first><last>Mireshghallah</last><affiliation>UC San Diego</affiliation></author>
       <author><first>Archit</first><last>Uniyal</last><affiliation>Panjab University</affiliation></author>
       <author><first>Tianhao</first><last>Wang</last><affiliation>University of Virginia</affiliation></author>
-      <author><first>David</first><last>Evans</last><affiliation>University of Virginia</affiliation></author>
+      <author id="david-k-evans"><first>David</first><last>Evans</last><affiliation>University of Virginia</affiliation></author>
       <author><first>Taylor</first><last>Berg-Kirkpatrick</last><affiliation>University of California San Diego</affiliation></author>
       <pages>1816-1826</pages>
       <abstract>Large language models are shown to present privacy risks through memorization of training data, andseveral recent works have studied such risks for the pre-training phase. Little attention, however, has been given to the fine-tuning phase and it is not well understood how different fine-tuning methods (such as fine-tuning the full model, the model head, and adapter) compare in terms of memorization risk. This presents increasing concern as the “pre-train and fine-tune” paradigm proliferates. In this paper, we empirically study memorization of fine-tuning methods using membership inference and extraction attacks, and show that their susceptibility to attacks is very different. We observe that fine-tuning the head of the model has the highest susceptibility to attacks, whereas fine-tuning smaller adapters appears to be less vulnerable to known extraction attacks.</abstract>
@@ -1766,8 +1766,8 @@
       <title>Stop Measuring Calibration When Humans Disagree</title>
       <author><first>Joris</first><last>Baan</last><affiliation>University of Amsterdam</affiliation></author>
       <author><first>Wilker</first><last>Aziz</last><affiliation>University of Amsterdam</affiliation></author>
-      <author><first>Barbara</first><last>Plank</last><affiliation>LMU Munich</affiliation></author>
-      <author><first>Raquel</first><last>Fernandez</last><affiliation>ILLC, University of Amsterdam</affiliation></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last><affiliation>LMU Munich</affiliation></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernandez</last><affiliation>ILLC, University of Amsterdam</affiliation></author>
       <pages>1892-1915</pages>
       <abstract>Calibration is a popular framework to evaluate whether a classifier knows when it does not know - i.e., its predictive probabilities are a good indication of how likely a prediction is to be correct. Correctness is commonly estimated against the human majority class. Recently, calibration to human majority has been measured on tasks where humans inherently disagree about which class applies. We show that measuring calibration to human majority given inherent disagreements is theoretically problematic, demonstrate this empirically on the ChaosNLI dataset, and derive several instance-level measures of calibration that capture key statistical properties of human judgements - including class frequency, ranking and entropy.</abstract>
       <url hash="8f8c6924">2022.emnlp-main.124</url>
@@ -1780,7 +1780,7 @@
       <author><first>Armineh</first><last>Nourbakhsh</last><affiliation>CMU, JP Morgan Chase</affiliation></author>
       <author><first>Cathy</first><last>Jiao</last><affiliation>CMU</affiliation></author>
       <author><first>Sameena</first><last>Shah</last><affiliation>JP Morgan</affiliation></author>
-      <author><first>Carolyn</first><last>Rosé</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rosé</last><affiliation>Carnegie Mellon University</affiliation></author>
       <pages>1916-1932</pages>
       <abstract>Quantitative reasoning is an important aspect of question answering, especially when numeric and verbal cues interact to indicate sophisticated, multi-step programs. In this paper, we demonstrate how modeling the compositional nature of quantitative text can enhance the performance and robustness of QA models, allowing them to capture arithmetic logic that is expressed verbally. Borrowing from the literature on semantic parsing, we propose a method that encourages the QA models to adjust their attention patterns and capture input/output alignments that are meaningful to the reasoning task. We show how this strategy improves program accuracy and renders the models more robust against overfitting as the number of reasoning steps grows. Our approach is designed as a standalone module which can be prepended to many existing models and trained in an end-to-end fashion without the need for additional supervisory signal. As part of this exercise, we also create a unified dataset building on four previously released numerical QA datasets over tabular data.</abstract>
       <url hash="a264bde6">2022.emnlp-main.125</url>
@@ -1792,7 +1792,7 @@
       <title>A Comprehensive Comparison of Neural Networks as Cognitive Models of Inflection</title>
       <author><first>Adam</first><last>Wiemerslage</last><affiliation>University of Colorado Boulder</affiliation></author>
       <author><first>Shiran</first><last>Dudy</last><affiliation>University of Colorado</affiliation></author>
-      <author><first>Katharina</first><last>Kann</last><affiliation>University of Colorado Boulder</affiliation></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last><affiliation>University of Colorado Boulder</affiliation></author>
       <pages>1933-1945</pages>
       <abstract>Neural networks have long been at the center of a debate around the cognitive mechanism by which humans process inflectional morphology. This debate has gravitated into NLP by way of the question: Are neural networks a feasible account for human behavior in morphological inflection?We address that question by measuring the correlation between human judgments and neural network probabilities for unknown word inflections. We test a larger range of architectures than previously studied on two important tasks for the cognitive processing debate: English past tense, and German number inflection. We find evidence that the Transformer may be a better account of human behavior than LSTMs on these datasets, and that LSTM features known to increase inflection accuracy do not always result in more human-like behavior.</abstract>
       <url hash="f779b22f">2022.emnlp-main.126</url>
@@ -1829,7 +1829,7 @@
       <author><first>Evangelia</first><last>Spiliopoulou</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Artidoro</first><last>Pagnoni</last><affiliation>University of Washington</affiliation></author>
       <author><first>Yonatan</first><last>Bisk</last><affiliation>Carnegie Mellon University</affiliation></author>
-      <author><first>Eduard</first><last>Hovy</last><affiliation>University of Melbourne</affiliation></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last><affiliation>University of Melbourne</affiliation></author>
       <pages>1982-1997</pages>
       <abstract>This paper investigates models of event implications. Specifically, how well models predict entity state-changes, by targeting their understanding of physical attributes. Nominally, Large Language models (LLM) have been exposed to procedural knowledge about how objects interact, yet our benchmarking shows they fail to reason about the world. Conversely, we also demonstrate that existing approaches often misrepresent the surprising abilities of LLMs via improper task encodings and that proper model prompting can dramatically improve performance of reported baseline results across multiple tasks. In particular, our results indicate that our prompting technique is especially useful for unseen attributes (out-of-domain) or when only limited data is available.</abstract>
       <url hash="9e374e11">2022.emnlp-main.129</url>
@@ -1854,7 +1854,7 @@
     <paper id="131">
       <title>Towards a Unified Multi-Dimensional Evaluator for Text Generation</title>
       <author><first>Ming</first><last>Zhong</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>Microsoft</affiliation></author>
+      <author><first>Yang</first><last>Liu</last><affiliation>Microsoft</affiliation></author>
       <author><first>Da</first><last>Yin</last><affiliation>University of California, Los Angeles (UCLA)</affiliation></author>
       <author><first>Yuning</first><last>Mao</last><affiliation>Meta Platforms, Inc.</affiliation></author>
       <author><first>Yizhu</first><last>Jiao</last><affiliation>University of Illinois Urbana-Champaign</affiliation></author>
@@ -1897,7 +1897,7 @@
     <paper id="134">
       <title>Entailer: Answering Questions with Faithful and Truthful Chains of Reasoning</title>
       <author><first>Oyvind</first><last>Tafjord</last><affiliation>AI2</affiliation></author>
-      <author><first>Bhavana</first><last>Dalvi Mishra</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
+      <author id="bhavana-dalvi"><first>Bhavana</first><last>Dalvi Mishra</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Peter</first><last>Clark</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <pages>2078-2093</pages>
       <abstract>Our goal is a question-answering (QA) system that can show how its answers are implied by its own internal beliefs via a systematic chain of reasoning. Such a capability would allow better understanding of why a model produced the answer it did. Our approach is to recursively combine a trained backward-chainingmodel, capable of generating a set of premises entailing an answer hypothesis, with a verifier that checks that the model itself believes those premises (and the entailment itself) through self-querying. To our knowledge, this is the first system to generate multistep chains that are both faithful (the answer follows from the reasoning) and truthful (the chain reflects the system’s own internal beliefs). In evaluation using two different datasets, users judge that a majority (70%+) of generated chains clearly show how an answer follows from a set of facts - substantially better than a high-performance baseline - while preserving answer accuracy. By materializing model beliefs that systematically support an answer, new opportunities arise for understanding the model’s system of belief, and diagnosing and correcting its misunderstandings when an answer is wrong.</abstract>
@@ -1930,7 +1930,7 @@
       <author><first>Pascale</first><last>Fung</last><affiliation>Hong Kong University of Science and Technology</affiliation></author>
       <author><first>Lambert</first><last>Mathias</last><affiliation>Facebook</affiliation></author>
       <author><first>Asli</first><last>Celikyilmaz</last><affiliation>FAIR @ Meta</affiliation></author>
-      <author><first>Mona</first><last>Diab</last><affiliation>Meta Responsible AI</affiliation></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last><affiliation>Meta Responsible AI</affiliation></author>
       <pages>2109-2120</pages>
       <abstract>Hate speech detection is complex; it relies on commonsense reasoning, knowledge of stereotypes, and an understanding of social nuance that differs from one culture to the next. It is also difficult to collect a large-scale hate speech annotated dataset. In this work, we frame this problem as a few-shot learning task, and show significant gains with decomposing the task into its “constituent” parts. In addition, we see that infusing knowledge from reasoning datasets (e.g. ATOMIC2020) improves the performance even further. Moreover, we observe that the trained models generalize to out-of-distribution datasets, showing the superiority of task decomposition and knowledge infusion compared to previously used methods. Concretely, our method outperforms the baseline by 17.83% absolute gain in the 16-shot case.</abstract>
       <url hash="d97f8856">2022.emnlp-main.136</url>
@@ -2000,7 +2000,7 @@
       <title>Prompt-and-Rerank: A Method for Zero-Shot and Few-Shot Arbitrary Textual Style Transfer with Small Language Models</title>
       <author><first>Mirac</first><last>Suzgun</last><affiliation>Stanford University</affiliation></author>
       <author><first>Luke</first><last>Melas-Kyriazi</last><affiliation>Oxford University</affiliation></author>
-      <author><first>Dan</first><last>Jurafsky</last><affiliation>Stanford University</affiliation></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last><affiliation>Stanford University</affiliation></author>
       <pages>2195-2222</pages>
       <abstract>We propose a method for arbitrary textual style transfer (TST)—the task of transforming a text into any given style—utilizing general-purpose pre-trained language models. Our method, Prompt-and-Rerank, is based on a mathematical formulation of the TST task, decomposing it into three constituent components: textual similarity, target style strength, and fluency. Our method uses zero-shot or few-shot prompting to obtain a set of candidate generations in the target style, and then re-ranks them according to the three components. Our method enables small pre-trained language models to perform on par with state-of-the-art large-scale models while using two orders of magnitude less compute and memory. We also investigate the effect of model size and prompt design (e.g., prompt paraphrasing and delimiter-pair choice) on style transfer quality across seven diverse textual style transfer datasets, finding, among other things, that delimiter-pair choice has a large impact on performance, and that models have biases on the direction of style transfer.</abstract>
       <url hash="f6b34817">2022.emnlp-main.141</url>
@@ -2169,7 +2169,7 @@
       <author><first>Malik</first><last>Altakrori</last><affiliation>McGill University /Mila</affiliation></author>
       <author><first>Thomas</first><last>Scialom</last><affiliation>Meta AI</affiliation></author>
       <author><first>Benjamin C. M.</first><last>Fung</last><affiliation>McGill University</affiliation></author>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last><affiliation>Mila / McGill University</affiliation></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last><affiliation>Mila / McGill University</affiliation></author>
       <pages>2391-2406</pages>
       <abstract>Authorship obfuscation techniques have commonly been evaluated based on their ability to hide the author’s identity (evasion) while preserving the content of the original text. However, to avoid overstating the systems’ effectiveness, evasion detection must be evaluated using competitive identification techniques in settings that mimic real-life scenarios, and the outcomes of the content-preservation evaluation have to be interpretable by potential users of these obfuscation tools. Motivated by recent work on cross-topic authorship identification and content preservation in summarization, we re-evaluate different authorship obfuscation techniques on detection evasion and content preservation. Furthermore, we propose a new information-theoretic measure to characterize the misattribution harm that can be caused by detection evasion. Our results reveal key weaknesses in state-of-the-art obfuscation techniques and a surprisingly competitive effectiveness from a back-translation baseline in all evaluation aspects.</abstract>
       <url hash="99f627e3">2022.emnlp-main.153</url>
@@ -2184,7 +2184,7 @@
       <author><first>Melanie</first><last>Subbiah</last><affiliation>Columbia University</affiliation></author>
       <author><first>Lydia</first><last>Chilton</last><affiliation>Columbia University</affiliation></author>
       <author><first>Desmond</first><last>Patton</last><affiliation>Columbia University</affiliation></author>
-      <author><first>Kathleen</first><last>McKeown</last><affiliation>Columbia University and Amazon (Amazon Scholar)</affiliation></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last><affiliation>Columbia University and Amazon (Amazon Scholar)</affiliation></author>
       <author><first>William Yang</first><last>Wang</last><affiliation>Unversity of California, Santa Barbara</affiliation></author>
       <pages>2407-2421</pages>
       <abstract>Understanding what constitutes safe text is an important issue in natural language processing and can often prevent the deployment of models deemed harmful and unsafe. One such type of safety that has been scarcely studied is commonsense physical safety, i.e. text that is not explicitly violent and requires additional commonsense knowledge to comprehend that it leads to physical harm. We create the first benchmark dataset, SafeText, comprising real-life scenarios with paired safe and physically unsafe pieces of advice. We utilize SafeText to empirically study commonsense physical safety across various models designed for text generation and commonsense reasoning tasks. We find that state-of-the-art large language models are susceptible to the generation of unsafe text and have difficulty rejecting unsafe advice. As a result, we argue for further studies of safety and the assessment of commonsense physical safety in models before release.</abstract>
@@ -2197,7 +2197,7 @@
       <title>Ground-Truth Labels Matter: A Deeper Look into Input-Label Demonstrations</title>
       <author><first>Kang Min</first><last>Yoo</last><affiliation>NAVER AI Lab</affiliation></author>
       <author><first>Junyeob</first><last>Kim</last><affiliation>Seoul National University</affiliation></author>
-      <author><first>Hyuhng Joon</first><last>Kim</last><affiliation>Seoul National University</affiliation></author>
+      <author id="hyuhng-joon-kim"><first>Hyuhng Joon</first><last>Kim</last><affiliation>Seoul National University</affiliation></author>
       <author><first>Hyunsoo</first><last>Cho</last><affiliation>Seoul National University</affiliation></author>
       <author><first>Hwiyeol</first><last>Jo</last><affiliation>Clova AI, Naver</affiliation></author>
       <author><first>Sang-Woo</first><last>Lee</last><affiliation>NAVER Clova</affiliation></author>
@@ -2345,8 +2345,8 @@
       <author><first>Emily</first><last>Gade</last><affiliation>Assistant Professor of Political Science</affiliation></author>
       <author><first>Leroy</first><last>Wang</last><affiliation>University of Washington</affiliation></author>
       <author><first>Zeyu</first><last>Wang</last><affiliation>University of Washington</affiliation></author>
-      <author><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington; Meta</affiliation></author>
-      <author><first>Noah A.</first><last>Smith</last><affiliation>University of Washington</affiliation></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington; Meta</affiliation></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last><affiliation>University of Washington</affiliation></author>
       <pages>2562-2580</pages>
       <abstract>Language models increasingly rely on massive web crawls for diverse text data. However, these sources are rife with undesirable content. As such, resources like Wikipedia, books, and news often serve as anchors for automatically selecting web text most suitable for language modeling, a process typically referred to as quality filtering. Using a new dataset of U.S. high school newspaper articles—written by students from across the country—we investigate whose language is preferred by the quality filter used for GPT-3. We find that newspapers from larger schools, located in wealthier, educated, and urban zones (ZIP codes) are more likely to be classified as high quality. We also show that this quality measurement is unaligned with other sensible metrics, such as factuality or literary acclaim. We argue that privileging any corpus as high quality entails a language ideology, and more care is needed to construct training corpora for language models, with better transparency and justification for the inclusion or exclusion of various texts.</abstract>
       <url hash="e792df3d">2022.emnlp-main.165</url>
@@ -2360,7 +2360,7 @@
       <author><first>Yang</first><last>Deng</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <author><first>Wenqiang</first><last>Lei</last><affiliation>Sichuan University</affiliation></author>
       <author><first>Wenlong</first><last>Zhao</last><affiliation>cuhk</affiliation></author>
-      <author><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Wai</first><last>Lam</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <pages>2581-2594</pages>
       <abstract>We study automatic Contract Clause Extraction (CCE) by modeling implicit relations in legal contracts. Existing CCE methods mostly treat contracts as plain text, creating a substantial barrier to understanding contracts of high complexity. In this work, we first comprehensively analyze the complexity issues of contracts and distill out three implicit relations commonly found in contracts, namely, 1) Long-range Context Relation that captures the correlations of distant clauses; 2) Term-Definition Relation that captures the relation between important terms with their corresponding definitions, and 3) Similar Clause Relation that captures the similarities between clauses of the same type. Then we propose a novel framework ConReader to exploit the above three relations for better contract understanding and improving CCE. Experimental results show that ConReader makes the prediction more interpretable and achieves new state-of-the-art on two CCE tasks in both conventional and zero-shot settings.</abstract>
@@ -2398,7 +2398,7 @@
       <title>Transfer Learning from Semantic Role Labeling to Event Argument Extraction with Template-based Slot Querying</title>
       <author><first>Zhisong</first><last>Zhang</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Emma</first><last>Strubell</last><affiliation>Carnegie Mellon University</affiliation></author>
-      <author><first>Eduard</first><last>Hovy</last><affiliation>University of Melbourne</affiliation></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last><affiliation>University of Melbourne</affiliation></author>
       <pages>2627-2647</pages>
       <abstract>In this work, we investigate transfer learning from semantic role labeling (SRL) to event argument extraction (EAE), considering their similar argument structures. We view the extraction task as a role querying problem, unifying various methods into a single framework. There are key discrepancies on role labels and distant arguments between semantic role and event argument annotations. To mitigate these discrepancies, we specify natural language-like queries to tackle the label mismatch problem and devise argument augmentation to recover distant arguments. We show that SRL annotations can serve as a valuable resource for EAE, and a template-based slot querying strategy is especially effective for facilitating the transfer. In extensive evaluations on two English EAE benchmarks, our proposed model obtains impressive zero-shot results by leveraging SRL annotations, reaching nearly 80% of the fullysupervised scores. It further provides benefits in low-resource cases, where few EAE annotations are available. Moreover, we show that our approach generalizes to cross-domain and multilingual scenarios.</abstract>
       <url hash="be5053f1">2022.emnlp-main.169</url>
@@ -2408,7 +2408,7 @@
     </paper>
     <paper id="170">
       <title>Calibrating Zero-shot Cross-lingual (Un-)structured Predictions</title>
-      <author><first>Zhengping</first><last>Jiang</last><affiliation>Johns Hopkins University</affiliation></author>
+      <author id="zheng-ping-jiang"><first>Zhengping</first><last>Jiang</last><affiliation>Johns Hopkins University</affiliation></author>
       <author><first>Anqi</first><last>Liu</last><affiliation>JHU</affiliation></author>
       <author><first>Benjamin</first><last>Van Durme</last><affiliation>Johns Hopkins University / Microsoft</affiliation></author>
       <pages>2648-2674</pages>
@@ -2451,7 +2451,7 @@
     <paper id="173">
       <title>Measuring Context-Word Biases in Lexical Semantic Datasets</title>
       <author><first>Qianchu</first><last>Liu</last><affiliation>University of Cambridge</affiliation></author>
-      <author><first>Diana</first><last>McCarthy</last><affiliation>University of Cambridge (DTAL)</affiliation></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last><affiliation>University of Cambridge (DTAL)</affiliation></author>
       <author><first>Anna</first><last>Korhonen</last><affiliation>University of Cambridge</affiliation></author>
       <pages>2699-2713</pages>
       <abstract>State-of-the-art pretrained contextualized models (PCM) eg. BERT use tasks such as WiC and WSD to evaluate their word-in-context representations. This inherently assumes that performance in these tasks reflect how well a model represents the coupled word and context semantics. We question this assumption by presenting the first quantitative analysis on the context-word interaction being tested in major contextual lexical semantic tasks. To achieve this, we run probing baselines on masked input, and propose measures to calculate and visualize the degree of context or word biases in existing datasets. The analysis was performed on both models and humans. Our findings demonstrate that models are usually not being tested for word-in-context semantics in the same way as humans are in these tasks, which helps us better understand the model-human gap. Specifically, to PCMs, most existing datasets fall into the extreme ends (the retrieval-based tasks exhibit strong target word bias while WiC-style tasks and WSD show strong context bias); In comparison, humans are less biased and achieve much better performance when both word and context are available than with masked input. We recommend our framework for understanding and controlling these biases for model interpretation and future task design.</abstract>
@@ -2488,7 +2488,7 @@
     <paper id="176">
       <title>Mitigating Data Sparsity for Short Text Topic Modeling by Topic-Semantic Contrastive Learning</title>
       <author><first>Xiaobao</first><last>Wu</last><affiliation>Nanyang Technological University</affiliation></author>
-      <author><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University, Singapore</affiliation></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University, Singapore</affiliation></author>
       <author><first>Xinshuai</first><last>Dong</last><affiliation>Nanyang Technological University</affiliation></author>
       <pages>2748-2760</pages>
       <abstract>To overcome the data sparsity issue in short text topic modeling, existing methods commonly rely on data augmentation or the data characteristic of short texts to introduce more word co-occurrence information. However, most of them do not make full use of the augmented data or the data characteristic: they insufficiently learn the relations among samples in data, leading to dissimilar topic distributions of semantically similar text pairs. To better address data sparsity, in this paper we propose a novel short text topic modeling framework, Topic-Semantic Contrastive Topic Model (TSCTM). To sufficiently model the relations among samples, we employ a new contrastive learning method with efficient positive and negative sampling strategies based on topic semantics. This contrastive learning method refines the representations, enriches the learning signals, and thus mitigates the sparsity issue. Extensive experimental results show that our TSCTM outperforms state-of-the-art baselines regardless of the data augmentation availability, producing high-quality topics and topic distributions.</abstract>
@@ -2745,7 +2745,7 @@
     </paper>
     <paper id="196">
       <title>Conformal Predictor for Improving Zero-Shot Text Classification Efficiency</title>
-      <author><first>Prafulla Kumar</first><last>Choubey</last><affiliation>Salesforce AI Research</affiliation></author>
+      <author id="prafulla-kumar-choubey"><first>Prafulla Kumar</first><last>Choubey</last><affiliation>Salesforce AI Research</affiliation></author>
       <author><first>Yu</first><last>Bai</last><affiliation>Salesforce AI Research</affiliation></author>
       <author><first>Chien-Sheng</first><last>Wu</last><affiliation>Salesforce</affiliation></author>
       <author><first>Wenhao</first><last>Liu</last><affiliation>Salesforce Research</affiliation></author>
@@ -2777,7 +2777,7 @@
       <author><first>Akhil</first><last>Kedia</last><affiliation>Samsung Electronics</affiliation></author>
       <author><first>Jongwon</first><last>Lee</last><affiliation>Samsung Research</affiliation></author>
       <author><first>Ashwin</first><last>Paranjape</last><affiliation>Stanford University</affiliation></author>
-      <author><first>Christopher</first><last>Manning</last><affiliation>Stanford University</affiliation></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last><affiliation>Stanford University</affiliation></author>
       <author><first>Kyoung-Gu</first><last>Woo</last><affiliation>Growdle Corporation</affiliation></author>
       <pages>3047-3060</pages>
       <abstract>Recent approaches to Open-domain Question Answering refer to an external knowledge base using a retriever model, optionally rerank passages with a separate reranker model and generate an answer using another reader model. Despite performing related tasks, the models have separate parameters and are weakly-coupled during training. We propose casting the retriever and the reranker as internal passage-wise attention mechanisms applied sequentially within the transformer architecture and feeding computed representations to the reader, with the hidden representations progressively refined at each stage. This allows us to use a single question answering model trained end-to-end, which is a more efficient use of model capacity and also leads to better gradient flow. We present a pre-training method to effectively train this architecture and evaluate our model on the Natural Questions and TriviaQA open datasets. For a fixed parameter budget, our model outperforms the previous state-of-the-art model by 1.0 and 0.7 exact match scores.</abstract>
@@ -2814,7 +2814,7 @@
       <title>Opinion Summarization by Weak-Supervision from Mix-structured Data</title>
       <author><first>Yizhu</first><last>Liu</last><affiliation>Shanghai Jiao Tong University</affiliation></author>
       <author><first>Qi</first><last>Jia</last><affiliation>Shanghai Jiao Tong University</affiliation></author>
-      <author><first>Kenny</first><last>Zhu</last><affiliation>Shanghai Jiao Tong University</affiliation></author>
+      <author id="kenny-zhu"><first>Kenny</first><last>Zhu</last><affiliation>Shanghai Jiao Tong University</affiliation></author>
       <pages>3086-3096</pages>
       <abstract>Opinion summarization of multiple reviews suffers from the lack of reference summaries for training.Most previous approaches construct multiple reviews and their summary based on textual similarities between reviews,resulting in information mismatch between the review input and the summary. In this paper, we convert each review into a mixof structured and unstructured data, which we call opinion-aspect pairs (OAs) and implicit sentences (ISs).We propose a new method to synthesize training pairs of such mix-structured data as input and the textual summary as output,and design a summarization model with OA encoder and IS encoder.Experiments show that our approach outperforms previous methods on Yelp, Amazon and RottenTomatos datasets.</abstract>
       <url hash="a46b0575">2022.emnlp-main.201</url>
@@ -2825,7 +2825,7 @@
       <title>Multi-level Distillation of Semantic Knowledge for Pre-training Multilingual Language Model</title>
       <author><first>Mingqi</first><last>Li</last><affiliation>Clemson University</affiliation></author>
       <author><first>Fei</first><last>Ding</last><affiliation>Clemson University</affiliation></author>
-      <author id="dan-zhang"><first>Dan</first><last>Zhang</last><affiliation>Clemson University</affiliation></author>
+      <author><first>Dan</first><last>Zhang</last><affiliation>Clemson University</affiliation></author>
       <author><first>Long</first><last>Cheng</last><affiliation>Clemson University</affiliation></author>
       <author><first>Hongxin</first><last>Hu</last><affiliation>University at Buffalo, SUNY</affiliation></author>
       <author><first>Feng</first><last>Luo</last><affiliation>Clemson University</affiliation></author>
@@ -2878,7 +2878,7 @@
       <author><first>Baoxing</first><last>Huai</last><affiliation>Huawei Technologies</affiliation></author>
       <author><first>Xin</first><last>Jiang</last><affiliation>Huawei Noah’s Ark Lab</affiliation></author>
       <author><first>Qun</first><last>Liu</last><affiliation>Huawei Noah’s Ark Lab</affiliation></author>
-      <author><first>Phillippe</first><last>Langlais</last><affiliation>Université de Montréal</affiliation></author>
+      <author id="philippe-langlais"><first>Phillippe</first><last>Langlais</last><affiliation>Université de Montréal</affiliation></author>
       <pages>3135-3151</pages>
       <abstract>There is a growing body of work in recent years to develop pre-trained language models (PLMs) for the Arabic language. This work addresses two major problems in existing Arabic PLMs that limit the progress of the Arabic NLU and NLG fields. First, existing Arabic PLMs are not well-explored and their pre-training can be improved significantly using a more methodical approach. Second, there is a lack of systematic and reproducible evaluation of these models in the literature. We revisit both the pre-training and evaluation of Arabic PLMs. In terms of pre-training, we explore the impact of the quality of the pretraining data, the size of the model, and the incorporation of character-level information on Arabic PLM. As a result, we release three new Arabic BERT-style models ( JABER, Char-JABER, and SABER), and two T5-style models (AT5S and AT5B). In terms of evaluation, we conduct a comprehensive empirical study to systematically evaluate the performance of existing state-of-the-art models on ALUE, a leaderboard-powered benchmark for Arabic NLU tasks, and on a subset of the Arabic generative tasks. We show that our models significantly outperform existing Arabic PLMs and achieve a new state-of-the-art performance on discriminative and generative Arabic NLU and NLG tasks. Our models and source code to reproduce results will be made available upon acceptance.</abstract>
       <url hash="909ad4cd">2022.emnlp-main.205</url>
@@ -3002,7 +3002,7 @@
       <author><first>Weijia</first><last>Shi</last><affiliation>ucla.edu</affiliation></author>
       <author><first>Julian</first><last>Michael</last><affiliation>University of Washington</affiliation></author>
       <author><first>Suchin</first><last>Gururangan</last><affiliation>Paul G. Allen School of Computer Science; Meta AI</affiliation></author>
-      <author><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington</affiliation></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington</affiliation></author>
       <pages>3254-3265</pages>
       <abstract>Retrieval-augmented language models (LMs) use non-parametric memory to substantially outperform their non-retrieval counterparts on perplexity-based evaluations, but it is an open question whether they achieve similar gains in few- and zero-shot end-task accuracy. We extensively study one such model, the k-nearest neighbor LM (kNN-LM), showing that the gains marginally transfer. The main challenge is to achieve coverage of the verbalizer tokens that define the different end-task class labels. To address this challenge, we also introduce kNN-Prompt, a simple and effective kNN-LM with automatically expanded fuzzy verbalizers (e.g. to expand “terrible” to also include “silly” and other task-specific synonyms for sentiment classification). Across nine diverse end-tasks, using kNN-Prompt with GPT-2 large yields significant performance boosts over strong zeroshot baselines (13.4% absolute improvement over the base LM on average). We also show that other advantages of non-parametric augmentation hold for end tasks; kNN-Prompt is effective for domain adaptation with no further training, and gains increase with the size of the retrieval model.</abstract>
       <url hash="fc5f4b9e">2022.emnlp-main.214</url>
@@ -3037,7 +3037,7 @@
     </paper>
     <paper id="217">
       <title>Making Pretrained Language Models Good Long-tailed Learners</title>
-      <author id="chen-zhang"><first>Chen</first><last>Zhang</last><affiliation>Beijing Institute of Technology</affiliation></author>
+      <author><first>Chen</first><last>Zhang</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <author><first>Lei</first><last>Ren</last><affiliation>Meituan-Dianping Group</affiliation></author>
       <author><first>Jingang</first><last>Wang</last><affiliation>Meituan</affiliation></author>
       <author><first>Wei</first><last>Wu</last><affiliation>meituan</affiliation></author>
@@ -3077,8 +3077,8 @@
     </paper>
     <paper id="220">
       <title><fixed-case>F</fixed-case>ine<fixed-case>D</fixed-case>-Eval: Fine-grained Automatic Dialogue-Level Evaluation</title>
-      <author id="chen-zhang"><first>Chen</first><last>Zhang</last><affiliation>ECE, National University of SIngapore</affiliation></author>
-      <author><first>Luis Fernando</first><last>D’Haro</last><affiliation>Speech Technology and Machine Learning Group, ETSI de Telecomunicación, Universidad Politécnica de Madrid</affiliation></author>
+      <author><first>Chen</first><last>Zhang</last><affiliation>ECE, National University of SIngapore</affiliation></author>
+      <author id="luis-fernando-dharo"><first>Luis Fernando</first><last>D’Haro</last><affiliation>Speech Technology and Machine Learning Group, ETSI de Telecomunicación, Universidad Politécnica de Madrid</affiliation></author>
       <author><first>Qiquan</first><last>Zhang</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Thomas</first><last>Friedrichs</last><affiliation>Robert Bosch (SEA) Pte Ltd</affiliation></author>
       <author><first>Haizhou</first><last>Li</last><affiliation>The Chinese University of Hong Kong, Shenzhen</affiliation></author>
@@ -3108,7 +3108,7 @@
       <author><first>Han</first><last>Guo</last><affiliation>CMU</affiliation></author>
       <author><first>Tianmin</first><last>Shu</last><affiliation>MIT</affiliation></author>
       <author><first>Meng</first><last>Song</last><affiliation>University of California, San Diego</affiliation></author>
-      <author><first>Eric</first><last>Xing</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="eric-xing"><first>Eric</first><last>Xing</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Zhiting</first><last>Hu</last><affiliation>UC San Diego</affiliation></author>
       <pages>3369-3391</pages>
       <abstract>Prompting has shown impressive success in enabling large pre-trained language models (LMs) to perform diverse NLP tasks, especially with only few downstream data. Automatically finding the optimal prompt for each task, however, is challenging. Most existing work resorts to tuning *soft* prompts (e.g., embeddings) which fall short of interpretability, reusability across LMs, and applicability when gradients are not accessible. *Discrete* prompts, on the other hand, are difficult to optimize, and are often created by “enumeration (e.g., paraphrasing)-then-selection” heuristics that do not explore the prompt space systematically. This paper proposes RLPrompt, an efficient discrete prompt optimization approach with reinforcement learning (RL). RLPrompt formulates a parameter-efficient policy network that generates the optimized discrete prompt after training with reward. To harness the complex and stochastic reward signals from the large LM environment, we incorporate effective reward stabilization that substantially enhances training efficiency. RLPrompt is flexibly applicable to different types of LMs, such as masked (e.g., BERT) and left-to-right models (e.g., GPTs), for both classification and generation tasks. Experiments on few-shot classification and unsupervised text style transfer show superior performance over a wide range of existing fine-tuning or prompting methods. Interestingly, the resulting optimized prompts are often ungrammatical gibberish text; and surprisingly, those gibberish prompts are transferrable between different LMs to retain significant performance, indicating that LM prompting may not follow human language patterns.</abstract>
@@ -3197,7 +3197,7 @@
     </paper>
     <paper id="228">
       <title>Discovering Differences in the Representation of People using Contextualized Semantic Axes</title>
-      <author><first>Li</first><last>Lucy</last><affiliation>University of California, Berkeley</affiliation></author>
+      <author id="li-lucy"><first>Li</first><last>Lucy</last><affiliation>University of California, Berkeley</affiliation></author>
       <author><first>Divya</first><last>Tadimeti</last><affiliation>University of California, Berkeley</affiliation></author>
       <author><first>David</first><last>Bamman</last><affiliation>University of California, Berkeley</affiliation></author>
       <pages>3477-3494</pages>
@@ -3233,11 +3233,11 @@
     </paper>
     <paper id="231">
       <title>Natural Language to Code Translation with Execution</title>
-      <author><first>Freda</first><last>Shi</last><affiliation>Toyota Technological Institute at Chicago</affiliation></author>
+      <author id="freda-shi"><first>Freda</first><last>Shi</last><affiliation>Toyota Technological Institute at Chicago</affiliation></author>
       <author><first>Daniel</first><last>Fried</last><affiliation>Facebook AI Research</affiliation></author>
       <author><first>Marjan</first><last>Ghazvininejad</last><affiliation>FAIR</affiliation></author>
-      <author><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington; Meta</affiliation></author>
-      <author><first>Sida I.</first><last>Wang</last><affiliation>Facebook AI Research</affiliation></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington; Meta</affiliation></author>
+      <author id="sida-i-wang"><first>Sida I.</first><last>Wang</last><affiliation>Facebook AI Research</affiliation></author>
       <pages>3533-3546</pages>
       <abstract>Generative models of code, pretrained on large corpora of programs, have shown great success in translating natural language to code (Chen et al., 2021; Austin et al., 2021; Li et al., 2022, inter alia). While these models do not explicitly incorporate program semantics (i.e., execution results) during training, they are able to generate correct solutions for many problems. However, choosing a single correct program from a generated set for each problem remains challenging. In this work, we introduce execution result–based minimum Bayes risk decoding (MBR-EXEC) for program selection and show that it improves the few-shot performance of pretrained code models on natural-language-to-code tasks. We select output programs from a generated candidate set by marginalizing over program implementations that share the same semantics. Because exact equivalence is intractable, we execute each program on a small number of test inputs to approximate semantic equivalence. Across datasets, execution or simulated execution significantly outperforms the methods that do not involve program semantics. We find that MBR-EXEC consistently improves over all execution-unaware selection methods, suggesting it as an effective approach for natural language to code translation.</abstract>
       <url hash="2fec569a">2022.emnlp-main.231</url>
@@ -3258,7 +3258,7 @@
     <paper id="233">
       <title>Language Contamination Helps Explains the Cross-lingual Capabilities of <fixed-case>E</fixed-case>nglish Pretrained Models</title>
       <author><first>Terra</first><last>Blevins</last><affiliation>University of Washington</affiliation></author>
-      <author><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington; Meta</affiliation></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington; Meta</affiliation></author>
       <pages>3563-3574</pages>
       <abstract>English pretrained language models, which make up the backbone of many modern NLP systems, require huge amounts of unlabeled training data. These models are generally presented as being trained only on English text but have been found to transfer surprisingly well to other languages. We investigate this phenomenon and find that common English pretraining corpora actually contain significant amounts of non-English text: even when less than 1% of data is not English (well within the error rate of strong language classifiers), this leads to hundreds of millions of foreign language tokens in large-scale datasets. We then demonstrate that even these small percentages of non-English data facilitate cross-lingual transfer for models trained on them, with target language performance strongly correlated to the amount of in-language data seen during pretraining. In light of these findings, we argue that no model is truly monolingual when pretrained at scale, which should be considered when evaluating cross-lingual transfer.</abstract>
       <url hash="d9e35c9c">2022.emnlp-main.233</url>
@@ -3269,7 +3269,7 @@
       <title>Analyzing the Mono- and Cross-Lingual Pretraining Dynamics of Multilingual Language Models</title>
       <author><first>Terra</first><last>Blevins</last><affiliation>University of Washington</affiliation></author>
       <author><first>Hila</first><last>Gonen</last><affiliation>UW and FAIR</affiliation></author>
-      <author><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington; Meta</affiliation></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington; Meta</affiliation></author>
       <pages>3575-3590</pages>
       <abstract>The emergent cross-lingual transfer seen in multilingual pretrained models has sparked significant interest in studying their behavior. However, because these analyses have focused on fully trained multilingual models, little is known about the dynamics of the multilingual pretraining process. We investigate when these models acquire their in-language and cross-lingual abilities by probing checkpoints taken from throughout XLM-R pretraining, using a suite of linguistic tasks. Our analysis shows that the model achieves high in-language performance early on, with lower-level linguistic skills acquired before more complex ones. In contrast, the point in pretraining when the model learns to transfer cross-lingually differs across language pairs. Interestingly, we also observe that, across many languages and tasks, the final model layer exhibits significant performance degradation over time, while linguistic knowledge propagates to lower layers of the network. Taken together, these insights highlight the complexity of multilingual pretraining and the resulting varied behavior for different languages over time.</abstract>
       <url hash="f914cd7c">2022.emnlp-main.234</url>
@@ -3320,7 +3320,7 @@
       <author><first>Jered</first><last>McInerney</last><affiliation>Northeastern University</affiliation></author>
       <author><first>Geoffrey</first><last>Young</last><affiliation>Brigham and Women’s Hospital</affiliation></author>
       <author><first>Jan-Willem</first><last>van de Meent</last><affiliation>Northeastern University, University of Amsterdam</affiliation></author>
-      <author><first>Byron</first><last>Wallace</last><affiliation>Northeastern University</affiliation></author>
+      <author id="byron-c-wallace"><first>Byron</first><last>Wallace</last><affiliation>Northeastern University</affiliation></author>
       <pages>3626-3648</pages>
       <abstract>Pretraining multimodal models on Electronic Health Records (EHRs) provides a means of learning representations that can transfer to downstream tasks with minimal supervision. Recent multimodal models induce soft local alignments between image regions and sentences. This is of particular interest in the medical domain, where alignments might highlight regions in an image relevant to specific phenomena described in free-text. While past work has suggested that attention “heatmaps” can be interpreted in this manner, there has been little evaluation of such alignments. We compare alignments from a state-of-the-art multimodal (image and text) model for EHR with human annotations that link image regions to sentences. Our main finding is that the text has an often weak or unintuitive influence on attention; alignments do not consistently reflect basic anatomical information. Moreover, synthetic modifications — such as substituting “left” for “right” — do not substantially influence highlights. Simple techniques such as allowing the model to opt out of attending to the image and few-shot finetuning show promise in terms of their ability to improve alignments with very little or no supervision. We make our code and checkpoints open-source.</abstract>
       <url hash="96ea0ff4">2022.emnlp-main.238</url>
@@ -3346,7 +3346,7 @@
       <author><first>Zhixing</first><last>Tan</last><affiliation>Tsinghua University</affiliation></author>
       <author><first>Zhaopeng</first><last>Tu</last><affiliation>Tencent AI Lab</affiliation></author>
       <author><first>Maosong</first><last>Sun</last><affiliation>Tsinghua University</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>Tsinghua University</affiliation></author>
+      <author><first>Yang</first><last>Liu</last><affiliation>Tsinghua University</affiliation></author>
       <pages>3665-3679</pages>
       <abstract>Machine translation systems are expected to cope with various types of constraints in many practical scenarios. While neural machine translation (NMT) has achieved strong performance in unconstrained cases, it is non-trivial to impose pre-specified constraints into the translation process of NMT models. Although many approaches have been proposed to address this issue, most existing methods can not satisfy the following three desiderata at the same time: (1) high translation quality, (2) high match accuracy, and (3) low latency. In this work, we propose a template-based method that can yield results with high translation quality and match accuracy and the inference speed of our method is comparable with unconstrained NMT models. Our basic idea is to rearrange the generation of constrained and unconstrained tokens through a template. Our method does not require any changes in the model architecture and the decoding algorithm. Experimental results show that the proposed template-based approach can outperform several representative baselines in both lexically and structurally constrained translation tasks.</abstract>
       <url hash="68aaa586">2022.emnlp-main.240</url>
@@ -3396,7 +3396,7 @@
       <author><first>Zhen</first><last>Yang</last><affiliation>tencent.com</affiliation></author>
       <author><first>Fandong</first><last>Meng</last><affiliation>WeChat AI, Tencent</affiliation></author>
       <author><first>Yufeng</first><last>Chen</last><affiliation>Beijing Jiaotong University</affiliation></author>
-      <author><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
       <author><first>Jie</first><last>Zhou</last><affiliation>Tencent Inc.</affiliation></author>
       <pages>3715-3725</pages>
       <abstract>Word alignment which aims to extract lexicon translation equivalents between source and target sentences, serves as a fundamental tool for natural language processing. Recent studies in this area have yielded substantial improvements by generating alignments from contextualized embeddings of the pre-trained multilingual language models. However, we find that the existing approaches capture few interactions between the input sentence pairs, which degrades the word alignment quality severely, especially for the ambiguous words in the monolingual context. To remedy this problem, we propose Cross-Align to model deep interactions between the input sentence pairs, in which the source and target sentences are encoded separately with the shared self-attention modules in the shallow layers, while cross-lingual interactions are explicitly constructed by the cross-attention modules in the upper layers. Besides, to train our model effectively, we propose a two-stage training framework, where the model is trained with a simple Translation Language Modeling (TLM) objective in the first stage and then finetuned with a self-supervised alignment objective in the second stage. Experiments show that the proposed Cross-Align achieves the state-of-the-art (SOTA) performance on four out of five language pairs.</abstract>
@@ -3409,7 +3409,7 @@
       <author><first>Tianxiang</first><last>Sun</last><affiliation>Fudan University</affiliation></author>
       <author><first>Junliang</first><last>He</last><affiliation>Chongqing University</affiliation></author>
       <author><first>Xipeng</first><last>Qiu</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>3726-3739</pages>
       <abstract>Automatic evaluation metrics are crucial to the development of generative systems. In recent years, pre-trained language model (PLM) based metrics, such as BERTScore, have been commonly adopted in various generation tasks. However, it has been demonstrated that PLMs encode a range of stereotypical societal biases, leading to a concern about the fairness of PLMs as metrics. To that end, this work presents the first systematic study on the social bias in PLM-based metrics. We demonstrate that popular PLM-based metrics exhibit significantly higher social bias than traditional metrics on 6 sensitive attributes, namely race, gender, religion, physical appearance, age, and socioeconomic status. In-depth analysis suggests that choosing paradigms (matching, regression, or generation) of the metric has a greater impact on fairness than choosing PLMs. In addition, we develop debiasing adapters that are injected into PLM layers, mitigating bias in PLM-based metrics while retaining high performance for evaluating text generation.</abstract>
       <url hash="cde90676">2022.emnlp-main.245</url>
@@ -3434,9 +3434,9 @@
     </paper>
     <paper id="247">
       <title>Not to Overfit or Underfit the Source Domains? An Empirical Study of Domain Generalization in Question Answering</title>
-      <author><first>Md Arafat</first><last>Sultan</last><affiliation>IBM Research AI</affiliation></author>
-      <author><first>Avi</first><last>Sil</last><affiliation>IBM Research AI</affiliation></author>
-      <author><first>Radu</first><last>Florian</last><affiliation>IBM Research</affiliation></author>
+      <author id="md-arafat-sultan"><first>Md Arafat</first><last>Sultan</last><affiliation>IBM Research AI</affiliation></author>
+      <author id="avirup-sil"><first>Avi</first><last>Sil</last><affiliation>IBM Research AI</affiliation></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last><affiliation>IBM Research</affiliation></author>
       <pages>3752-3761</pages>
       <abstract>Machine learning models are prone to overfitting their training (source) domains, which is commonly believed to be the reason why they falter in novel target domains. Here we examine the contrasting view that multi-source domain generalization (DG) is first and foremost a problem of mitigating source domain underfitting: models not adequately learning the signal already present in their multi-domain training data. Experiments on a reading comprehension DG benchmark show that as a model learns its source domains better—using familiar methods such as knowledge distillation (KD) from a bigger model—its zero-shot out-of-domain utility improves at an even faster pace. Improved source domain learning also demonstrates superior out-of-domain generalization over three popular existing DG approaches that aim to limit overfitting. Our implementation of KD-based domain generalization is available via PrimeQA at: https://ibm.biz/domain-generalization-with-kd.</abstract>
       <url hash="802d8f33">2022.emnlp-main.247</url>
@@ -3463,9 +3463,9 @@
       <author><first>Mike</first><last>Lewis</last><affiliation>Facebook AI Research</affiliation></author>
       <author><first>Mandar</first><last>Joshi</last><affiliation>Google</affiliation></author>
       <author><first>Armen</first><last>Aghajanyan</last><affiliation>Facebook</affiliation></author>
-      <author><first>Wen-tau</first><last>Yih</last><affiliation>Facebook AI Research</affiliation></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last><affiliation>Facebook AI Research</affiliation></author>
       <author><first>Joelle</first><last>Pineau</last><affiliation>McGill University</affiliation></author>
-      <author><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington; Meta</affiliation></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington; Meta</affiliation></author>
       <pages>3781-3797</pages>
       <abstract>We propose a simple and effective re-ranking method for improving passage retrieval in open question answering. The re-ranker re-scores retrieved passages with a zero-shot question generation model, which uses a pre-trained language model to compute the probability of the input question conditioned on a retrieved passage. This approach can be applied on top of any retrieval method (e.g. neural or keyword-based), does not require any domain- or task-specific training (and therefore is expected to generalize better to data distribution shifts), and provides rich cross-attention between query and passage (i.e. it must explain every token in the question). When evaluated on a number of open-domain retrieval datasets, our re-ranker improves strong unsupervised retrieval models by 6%-18% absolute and strong supervised models by up to 12% in terms of top-20 passage retrieval accuracy. We also obtain new state-of-the-art results on full open-domain question answering by simply adding the new re-ranker to existing models with no further changes.</abstract>
       <url hash="5a7e06ba">2022.emnlp-main.249</url>
@@ -3475,7 +3475,7 @@
     <paper id="250">
       <title>Summarizing Community-based Question-Answer Pairs</title>
       <author><first>Ting-Yao</first><last>Hsu</last><affiliation>Pennsylvania State University</affiliation></author>
-      <author><first>Yoshi</first><last>Suhara</last><affiliation>Grammarly</affiliation></author>
+      <author id="yoshi-suhara"><first>Yoshi</first><last>Suhara</last><affiliation>Grammarly</affiliation></author>
       <author><first>Xiaolan</first><last>Wang</last><affiliation>Megagon Labs</affiliation></author>
       <pages>3798-3808</pages>
       <abstract>Community-based Question Answering (CQA), which allows users to acquire their desired information, has increasingly become an essential component of online services in various domains such as E-commerce, travel, and dining. However, an overwhelming number of CQA pairs makes it difficult for users without particular intent to find useful information spread over CQA pairs. To help users quickly digest the key information, we propose the novel CQA summarization task that aims to create a concise summary from CQA pairs. To this end, we first design a multi-stage data annotation process and create a benchmark dataset, COQASUM, based on the Amazon QA corpus. We then compare a collection of extractive and abstractive summarization methods and establish a strong baseline approach DedupLED for the CQA summarization task. Our experiment further confirms two key challenges, sentence-type transfer and deduplication removal, towards the CQA summarization task. Our data and code are publicly available.</abstract>
@@ -3510,7 +3510,7 @@
     <paper id="253">
       <title>Chapter Ordering in Novels</title>
       <author><first>Allen</first><last>Kim</last><affiliation>Stony Brook University</affiliation></author>
-      <author><first>Steve</first><last>Skiena</last><affiliation>Stony Brook University</affiliation></author>
+      <author id="steven-skiena"><first>Steve</first><last>Skiena</last><affiliation>Stony Brook University</affiliation></author>
       <pages>3838-3848</pages>
       <abstract>Understanding narrative flow and text coherence in long-form documents (novels) remains an open problem in NLP.To gain insight, we explore the task of chapter ordering, reconstructing the original order of chapters in novel given a random permutation of the text. This can be seen as extending the well-known sentence ordering task to vastly larger documents: our task deals with over 9,000 novels with an average of twenty chapters each, versus standard sentence ordering datasets averaging only 5-8 sentences. We formulate the task of reconstructing order as a constraint solving problem, using minimum feedback arc set and traveling salesman problem optimization criteria, where the weights of the graph are generated based on models for character occurrences and chapter boundary detection, using relational chapter scores derived from RoBERTa. Our best methods yield a Spearman correlation of 0.59 on this novel and challenging task, substantially above baseline.</abstract>
       <url hash="53375fa3">2022.emnlp-main.253</url>
@@ -3535,7 +3535,7 @@
       <author><first>Breno William</first><last>Carvalho</last><affiliation>IBM Research</affiliation></author>
       <author><first>Ibrahim</first><last>Abdelaziz</last><affiliation>IBM Research</affiliation></author>
       <author><first>Pavan</first><last>Kapanipathi</last><affiliation>IBM Research</affiliation></author>
-      <author><first>Salim</first><last>Roukos</last><affiliation>IBM Research AI</affiliation></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last><affiliation>IBM Research AI</affiliation></author>
       <author><first>Alexander</first><last>Gray</last><affiliation>IBM Research</affiliation></author>
       <pages>3863-3875</pages>
       <abstract>Knowledge base completion (KBC) has benefitted greatly by learning explainable rules in an human-interpretable dialect such as first-order logic. Rule-based KBC has so far, mainly focussed on learning one of two types of rules: conjunction-of-disjunctions and disjunction-of-conjunctions. We qualitatively show, via examples, that one of these has an advantage over the other when it comes to achieving high quality KBC. To the best of our knowledge, we are the first to propose learning both kinds of rules within a common framework. To this end, we propose to utilize logical neural networks (LNN), a powerful neuro-symbolic AI framework that can express both kinds of rules and learn these end-to-end using gradient-based optimization. Our in-depth experiments show that our LNN-based approach to learning rules for KBC leads to roughly 10% relative improvements, if not more, over SotA rule-based KBC methods. Moreover, by showing how to combine our proposed methods with knowledge graph embeddings we further achieve an additional 7.5% relative improvement.</abstract>
@@ -3570,7 +3570,7 @@
     <paper id="258">
       <title>Sparse Teachers Can Be Dense with Knowledge</title>
       <author><first>Yi</first><last>Yang</last><affiliation>Beijing Institute of Technology</affiliation></author>
-      <author id="chen-zhang"><first>Chen</first><last>Zhang</last><affiliation>Beijing Institute of Technology</affiliation></author>
+      <author><first>Chen</first><last>Zhang</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <author><first>Dawei</first><last>Song</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <pages>3904-3915</pages>
       <abstract>Recent advances in distilling pretrained language models have discovered that, besides the expressiveness of knowledge, the student-friendliness should be taken into consideration to realize a truly knowledgeable teacher. Based on a pilot study, we find that over-parameterized teachers can produce expressive yet student-unfriendly knowledge and are thus limited in overall knowledgeableness. To remove the parameters that result in student-unfriendliness, we propose a sparse teacher trick under the guidance of an overall knowledgeable score for each teacher parameter. The knowledgeable score is essentially an interpolation of the expressiveness and student-friendliness scores. The aim is to ensure that the expressive parameters are retained while the student-unfriendly ones are removed. Extensive experiments on the GLUE benchmark show that the proposed sparse teachers can be dense with knowledge and lead to students with compelling performance in comparison with a series of competitive baselines.</abstract>
@@ -3584,7 +3584,7 @@
       <author><first>Zhengfu</first><last>He</last><affiliation>Fudan University</affiliation></author>
       <author><first>Hong</first><last>Qian</last><affiliation>East China Normal University</affiliation></author>
       <author><first>Yunhua</first><last>Zhou</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Xipeng</first><last>Qiu</last><affiliation>Fudan University</affiliation></author>
       <pages>3916-3930</pages>
       <abstract>Most downstream adaptation methods tune all or part of the parameters of pre-trained models (PTMs) through gradient descent, where the tuning cost increases linearly with the growth of the model size.By contrast, gradient-free methods only require the forward computation of the PTM to tune the prompt, retaining the benefits of efficient tuning and deployment.Though, past work on gradient-free tuning often introduces gradient descent to seek a good initialization of prompt and lacks versatility across tasks and PTMs.In this paper, we present BBTv2, an improved version of Black-Box Tuning, to drive PTMs for few-shot learning.We prepend continuous prompts to every layer of the PTM and propose a divide-and-conquer gradient-free algorithm to optimize the prompts at different layers alternately.Extensive experiments across various tasks and PTMs show that BBTv2 can achieve comparable performance to full model tuning and state-of-the-art parameter-efficient methods (e.g., Adapter, LoRA, BitFit, etc.) under few-shot settings while maintaining much fewer tunable parameters.</abstract>
@@ -3606,7 +3606,7 @@
     <paper id="261">
       <title>Mixed-effects transformers for hierarchical adaptation</title>
       <author><first>Julia</first><last>White</last><affiliation>Stanford University</affiliation></author>
-      <author><first>Noah</first><last>Goodman</last><affiliation>Stanford University</affiliation></author>
+      <author id="noah-goodman"><first>Noah</first><last>Goodman</last><affiliation>Stanford University</affiliation></author>
       <author><first>Robert</first><last>Hawkins</last><affiliation>Princeton University</affiliation></author>
       <pages>3944-3954</pages>
       <abstract>Language differs dramatically from context to context. To some degree, large language models like GPT-3 account for such variation by conditioning on strings of initial input text, or prompts. However, prompting can be ineffective when contexts are sparse, out-of-sample, or extra-textual. In this paper, we introduce the mixed-effects transformer (MET), a novel approach for learning hierarchically-structured prefixes— lightweight modules prepended to an input sequence— to account for structured variation in language use. Specifically, we show how the popular class of mixed-effects regression models may be extended to transformer-based architectures using a regularized prefix-tuning procedure with dropout. We evaluate this approach on several domain-adaptation benchmarks, finding that it learns contextual variation from minimal data while generalizing well to unseen contexts.</abstract>
@@ -3619,7 +3619,7 @@
       <title>On Measuring the Intrinsic Few-Shot Hardness of Datasets</title>
       <author><first>Xinran</first><last>Zhao</last><affiliation>Stanford University</affiliation></author>
       <author><first>Shikhar</first><last>Murty</last><affiliation>Stanford University</affiliation></author>
-      <author><first>Christopher</first><last>Manning</last><affiliation>Stanford University</affiliation></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last><affiliation>Stanford University</affiliation></author>
       <pages>3955-3963</pages>
       <abstract>While advances in pre-training have led to dramatic improvements in few-shot learning of NLP tasks, there is limited understanding of what drives successful few-shot adaptation in datasets. In particular, given a new dataset and a pre-trained model, what properties of the dataset make it few-shot learnable, and are these properties independent of the specific adaptation techniques used? We consider an extensive set of recent few-shot learning methods and show that their performance across a large number of datasets is highly correlated, showing that few-shot hardness may be intrinsic to datasets, for a given pre-trained model. To estimate intrinsic few-shot hardness, we then propose a simple and lightweight metric called Spread that captures the intuition that few-shot learning is made possible by exploiting feature-space invariances between training and test samples. Our metric better accounts for few-shot hardness compared to existing notions of hardness and is ~8-100x faster to compute.</abstract>
       <url hash="46932c59">2022.emnlp-main.262</url>
@@ -3753,7 +3753,7 @@
       <title>Syntactic Multi-view Learning for Open Information Extraction</title>
       <author><first>Kuicai</first><last>Dong</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Aixin</first><last>Sun</last><affiliation>Nanyang Technological University</affiliation></author>
-      <author><first>Jung-Jae</first><last>Kim</last><affiliation>Institute for Infocomm Research</affiliation></author>
+      <author id="jung-jae-kim"><first>Jung-Jae</first><last>Kim</last><affiliation>Institute for Infocomm Research</affiliation></author>
       <author><first>Xiaoli</first><last>Li</last><affiliation>Institute for Infocomm Research/Nanyang Technological University</affiliation></author>
       <pages>4072-4083</pages>
       <abstract>Open Information Extraction (OpenIE) aims to extract relational tuples from open-domain sentences. Traditional rule-based or statistical models were developed based on syntactic structure of sentence, identified by syntactic parsers. However, previous neural OpenIE models under-explored the useful syntactic information. In this paper, we model both constituency and dependency trees into word-level graphs, and enable neural OpenIE to learn from the syntactic structures. To better fuse heterogeneous information from the two graphs, we adopt multi-view learning to capture multiple relationships from them. Finally, the finetuned constituency and dependency representations are aggregated with sentential semantic representations for tuple generation. Experiments show that both constituency and dependency information, and the multi-view learning are effective.</abstract>
@@ -3799,7 +3799,7 @@
       <author><first>SongYang</first><last>Gao</last><affiliation>Fudan University</affiliation></author>
       <author><first>Shihan</first><last>Dou</last><affiliation>Fudan University</affiliation></author>
       <author><first>Qi</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>4112-4122</pages>
       <abstract>Dataset bias has attracted increasing attention recently for its detrimental effect on the generalization ability of fine-tuned models. The current mainstream solution is designing an additional shallow model to pre-identify biased instances. However, such two-stage methods scale up the computational complexity of training process and obstruct valid feature information while mitigating bias.To address this issue, we utilize the representation normalization method which aims at disentangling the correlations between features of encoded sentences. We find it also promising in eliminating the bias problem by providing isotropic data distribution. We further propose Kernel-Whitening, a Nystrom kernel approximation method to achieve more thorough debiasing on nonlinear spurious correlations. Our framework is end-to-end with similar time consumption to fine-tuning. Experiments show that Kernel-Whitening significantly improves the performance of BERT on out-of-distribution datasets while maintaining in-distribution accuracy.</abstract>
       <url hash="63da8a39">2022.emnlp-main.275</url>
@@ -3909,7 +3909,7 @@
       <author><first>Elisa</first><last>Bassignana</last><affiliation>IT University of Copenhagen</affiliation></author>
       <author><first>Max</first><last>Müller-Eberstein</last><affiliation>IT University of Copenhagen</affiliation></author>
       <author><first>Mike</first><last>Zhang</last><affiliation>IT University of Copenhagen</affiliation></author>
-      <author><first>Barbara</first><last>Plank</last><affiliation>LMU Munich</affiliation></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last><affiliation>LMU Munich</affiliation></author>
       <pages>4218-4227</pages>
       <abstract>With the increase in availability of large pre-trained language models (LMs) in Natural Language Processing (NLP), it becomes critical to assess their fit for a specific target task a priori—as fine-tuning the entire space of available LMs is computationally prohibitive and unsustainable. However, encoder transferability estimation has received little to no attention in NLP. In this paper, we propose to generate quantitative evidence to predict which LM, out of a pool of models, will perform best on a target task without having to fine-tune all candidates. We provide a comprehensive study on LM ranking for 10 NLP tasks spanning the two fundamental problem types of classification and structured prediction. We adopt the state-of-the-art Logarithm of Maximum Evidence (LogME) measure from Computer Vision (CV) and find that it positively correlates with final LM performance in 94% of the setups.In the first study of its kind, we further compare transferability measures with the de facto standard of human practitioner ranking, finding that evidence from quantitative metrics is more robust than pure intuition and can help identify unexpected LM candidates.</abstract>
       <url hash="492379af">2022.emnlp-main.283</url>
@@ -3921,7 +3921,7 @@
       <title>Chunk-based Nearest Neighbor Machine Translation</title>
       <author><first>Pedro Henrique</first><last>Martins</last><affiliation>Instituto de Telecomunicações, Instituto Superior Técnico</affiliation></author>
       <author><first>Zita</first><last>Marinho</last><affiliation>Deepmind</affiliation></author>
-      <author><first>André F. T.</first><last>Martins</last><affiliation>Unbabel, Instituto de Telecomunicacoes</affiliation></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last><affiliation>Unbabel, Instituto de Telecomunicacoes</affiliation></author>
       <pages>4228-4245</pages>
       <abstract>Semi-parametric models, which augment generation with retrieval, have led to impressive results in language modeling and machine translation, due to their ability to retrieve fine-grained information from a datastore of examples. One of the most prominent approaches, kNN-MT, exhibits strong domain adaptation capabilities by retrieving tokens from domain-specific datastores (Khandelwal et al., 2021). However, kNN-MT requires an expensive retrieval operation for every single generated token, leading to a very low decoding speed (around 8 times slower than a parametric model). In this paper, we introduce a chunk-based kNN-MT model which retrieves chunks of tokens from the datastore, instead of a single token. We propose several strategies for incorporating the retrieved chunks into the generation process, and for selecting the steps at which the model needs to search for neighbors in the datastore. Experiments on machine translation in two settings, static and “on-the-fly” domain adaptation, show that the chunk-based kNN-MT model leads to significant speed-ups (up to 4 times) with only a small drop in translation quality.</abstract>
       <url hash="052b7d2d">2022.emnlp-main.284</url>
@@ -3974,13 +3974,13 @@
     <paper id="288">
       <title><fixed-case>MT</fixed-case>-<fixed-case>G</fixed-case>en<fixed-case>E</fixed-case>val: A Counterfactual and Contextual Dataset for Evaluating Gender Accuracy in Machine Translation</title>
       <author><first>Anna</first><last>Currey</last><affiliation>AWS AI Labs</affiliation></author>
-      <author><first>Maria</first><last>Nadejde</last><affiliation>Amazon AWS AI</affiliation></author>
+      <author id="maria-nadejde"><first>Maria</first><last>Nadejde</last><affiliation>Amazon AWS AI</affiliation></author>
       <author><first>Raghavendra Reddy</first><last>Pappagari</last><affiliation>Amazon Web Services</affiliation></author>
       <author><first>Mia</first><last>Mayer</last><affiliation>AWS</affiliation></author>
       <author><first>Stanislas</first><last>Lauly</last><affiliation>New York University</affiliation></author>
       <author><first>Xing</first><last>Niu</last><affiliation>Amazon AI</affiliation></author>
       <author><first>Benjamin</first><last>Hsu</last><affiliation>Amazon</affiliation></author>
-      <author><first>Georgiana</first><last>Dinu</last><affiliation>Amazon AWS</affiliation></author>
+      <author id="georgiana-dinu"><first>Georgiana</first><last>Dinu</last><affiliation>Amazon AWS</affiliation></author>
       <pages>4287-4299</pages>
       <abstract>As generic machine translation (MT) quality has improved, the need for targeted benchmarks that explore fine-grained aspects of quality has increased. In particular, gender accuracy in translation can have implications in terms of output fluency, translation accuracy, and ethics. In this paper, we introduce MT-GenEval, a benchmark for evaluating gender accuracy in translation from English into eight widely-spoken languages. MT-GenEval complements existing benchmarks by providing realistic, gender-balanced, counterfactual data in eight language pairs where the gender of individuals is unambiguous in the input segment, including multi-sentence segments requiring inter-sentential gender agreement. Our data and code is publicly available under a CC BY SA 3.0 license.</abstract>
       <url hash="e4a596ea">2022.emnlp-main.288</url>
@@ -4005,7 +4005,7 @@
       <title>On the Calibration of Massively Multilingual Language Models</title>
       <author><first>Kabir</first><last>Ahuja</last><affiliation>Microsoft Research</affiliation></author>
       <author><first>Sunayana</first><last>Sitaram</last><affiliation>Microsoft Research India</affiliation></author>
-      <author><first>Sandipan</first><last>Dandapat</last><affiliation>Microsoft India</affiliation></author>
+      <author id="sandipan-dandapat"><first>Sandipan</first><last>Dandapat</last><affiliation>Microsoft India</affiliation></author>
       <author><first>Monojit</first><last>Choudhury</last><affiliation>Microsoft Research</affiliation></author>
       <pages>4310-4323</pages>
       <abstract>Massively Multilingual Language Models (MMLMs) have recently gained popularity due to their surprising effectiveness in cross-lingual transfer. While there has been much work in evaluating these models for their performance on a variety of tasks and languages, little attention has been paid on how well calibrated these models are with respect to the confidence in their predictions. We first investigate the calibration of MMLMs in the zero-shot setting and observe a clear case of miscalibration in low-resource languages or those which are typologically diverse from English. Next, we empirically show that calibration methods like temperature scaling and label smoothing do reasonably well in improving calibration in the zero-shot scenario. We also find that few-shot examples in the language can further help reduce calibration errors, often substantially. Overall, our work contributes towards building more reliable multilingual models by highlighting the issue of their miscalibration, understanding what language and model-specific factors influence it, and pointing out the strategies to improve the same.</abstract>
@@ -4057,7 +4057,7 @@
       <title>Retrieval Augmentation for Commonsense Reasoning: A Unified Approach</title>
       <author><first>Wenhao</first><last>Yu</last><affiliation>University of Notre Dame</affiliation></author>
       <author><first>Chenguang</first><last>Zhu</last><affiliation>Microsoft Cognitive Services Research Group</affiliation></author>
-      <author id="zhihan-zhang"><first>Zhihan</first><last>Zhang</last><affiliation>University of Notre Dame</affiliation></author>
+      <author><first>Zhihan</first><last>Zhang</last><affiliation>University of Notre Dame</affiliation></author>
       <author><first>Shuohang</first><last>Wang</last><affiliation>Microsoft</affiliation></author>
       <author><first>Zhuosheng</first><last>Zhang</last><affiliation>Shanghai Jiao Tong University</affiliation></author>
       <author><first>Yuwei</first><last>Fang</last><affiliation>Microsoft</affiliation></author>
@@ -4074,7 +4074,7 @@
       <author><first>Guoyin</first><last>Wang</last><affiliation>Amazon Alexa AI</affiliation></author>
       <author><first>Jiwei</first><last>Li</last><affiliation>Shannon.AI</affiliation></author>
       <author><first>Sunghyun</first><last>Park</last><affiliation>Amazon Alexa AI</affiliation></author>
-      <author><first>Sungjin</first><last>Lee</last><affiliation>Amazon Alexa AI</affiliation></author>
+      <author id="sungjin-lee"><first>Sungjin</first><last>Lee</last><affiliation>Amazon Alexa AI</affiliation></author>
       <author><first>Puyang</first><last>Xu</last><affiliation>Mobvoi</affiliation></author>
       <author><first>Ricardo</first><last>Henao</last><affiliation>Duke University</affiliation></author>
       <author><first>Lawrence</first><last>Carin</last><affiliation>Duke University</affiliation></author>
@@ -4104,7 +4104,7 @@
       <author><first>David</first><last>Thulke</last><affiliation>RWTH Aachen University</affiliation></author>
       <author><first>Yingbo</first><last>Gao</last><affiliation>RWTH Aachen University</affiliation></author>
       <author><first>Christian</first><last>Herold</last><affiliation>RWTH Aachen University</affiliation></author>
-      <author><first>Hermann</first><last>Ney</last><affiliation>RWTH Aachen University</affiliation></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last><affiliation>RWTH Aachen University</affiliation></author>
       <pages>4480-4487</pages>
       <abstract>Currently, in speech translation, the straightforward approach - cascading a recognition system with a translation system - delivers state-of-the-art results.However, fundamental challenges such as error propagation from the automatic speech recognition system still remain.To mitigate these problems, recently, people turn their attention to direct data and propose various joint training methods.In this work, we seek to answer the question of whether joint training really helps cascaded speech translation.We review recent papers on the topic and also investigate a joint training criterion by marginalizing the transcription posterior probabilities.Our findings show that a strong cascaded baseline can diminish any improvements obtained using joint training, and we suggest alternatives to joint training.We hope this work can serve as a refresher of the current speech translation landscape, and motivate research in finding more efficient and creative ways to utilize the direct data for speech translation.</abstract>
       <url hash="a8073e32">2022.emnlp-main.297</url>
@@ -4114,17 +4114,17 @@
     </paper>
     <paper id="298">
       <title><fixed-case>M</fixed-case>asakha<fixed-case>NER</fixed-case> 2.0: <fixed-case>A</fixed-case>frica-centric Transfer Learning for Named Entity Recognition</title>
-      <author><first>David Ifeoluwa</first><last>Adelani</last><affiliation>University College London</affiliation></author>
+      <author id="david-ifeoluwa-adelani"><first>David Ifeoluwa</first><last>Adelani</last><affiliation>University College London</affiliation></author>
       <author><first>Graham</first><last>Neubig</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Sebastian</first><last>Ruder</last><affiliation>Google</affiliation></author>
       <author><first>Shruti</first><last>Rijhwani</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Michael</first><last>Beukman</last><affiliation>University of the Witwatersrand</affiliation></author>
       <author><first>Chester</first><last>Palen-Michel</last><affiliation>Brandeis University</affiliation></author>
       <author><first>Constantine</first><last>Lignos</last><affiliation>Brandeis University</affiliation></author>
-      <author><first>Jesujoba O.</first><last>Alabi</last><affiliation>Saarland University</affiliation></author>
+      <author id="jesujoba-alabi"><first>Jesujoba O.</first><last>Alabi</last><affiliation>Saarland University</affiliation></author>
       <author><first>Shamsuddeen H.</first><last>Muhammad</last><affiliation>Bayero University, Kano</affiliation></author>
       <author><first>Peter</first><last>Nabende</last><affiliation>Makerere University</affiliation></author>
-      <author><first>Cheikh M. Bamba</first><last>Dione</last><affiliation>University of Bergen</affiliation></author>
+      <author id="cheikh-m-bamba-dione"><first>Cheikh M. Bamba</first><last>Dione</last><affiliation>University of Bergen</affiliation></author>
       <author><first>Andiswa</first><last>Bukula</last><affiliation>SADiLaR</affiliation></author>
       <author><first>Rooweither</first><last>Mabuya</last><affiliation>South African Centre for Digital Language Resources</affiliation></author>
       <author><first>Bonaventure F. P.</first><last>Dossou</last><affiliation>Mila</affiliation></author>
@@ -4236,8 +4236,8 @@
       <author><first>Shereen</first><last>Oraby</last><affiliation>Amazon Alexa AI</affiliation></author>
       <author><first>Alessandra</first><last>Cervone</last><affiliation>Amazon Alexa AI</affiliation></author>
       <author><first>Tagyoung</first><last>Chung</last><affiliation>Amazon Alexa AI</affiliation></author>
-      <author id="jing-huang"><first>Jing</first><last>Huang</last><affiliation>Amazon</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>Amazon</affiliation></author>
+      <author><first>Jing</first><last>Huang</last><affiliation>Amazon</affiliation></author>
+      <author><first>Yang</first><last>Liu</last><affiliation>Amazon</affiliation></author>
       <author><first>Nanyun</first><last>Peng</last><affiliation>University of California, Los Angeles</affiliation></author>
       <pages>4590-4605</pages>
       <abstract>The tasks of humor understanding and generation are challenging and subjective even for humans, requiring commonsense and real-world knowledge to master. Puns, in particular, add the challenge of fusing that knowledge with the ability to interpret lexical-semantic ambiguity. In this paper, we present the ExPUNations (ExPUN) dataset, in which we augment an existing dataset of puns with detailed crowdsourced annotations of keywords denoting the most distinctive words that make the text funny, pun explanations describing why the text is funny, and fine-grained funniness ratings. This is the first humor dataset with such extensive and fine-grained annotations specifically for puns. Based on these annotations, we propose two tasks: explanation generation to aid with pun classification and keyword-conditioned pun generation, to challenge the current state-of-the-art natural language understanding and generation models’ ability to understand and generate humor. We showcase that the annotated keywords we collect are helpful for generating better novel humorous texts in human evaluation, and that our natural language explanations can be leveraged to improve both the accuracy and robustness of humor classifiers.</abstract>
@@ -4249,7 +4249,7 @@
       <title><fixed-case>SLING</fixed-case>: <fixed-case>S</fixed-case>ino Linguistic Evaluation of Large Language Models</title>
       <author><first>Yixiao</first><last>Song</last><affiliation>University of Massachusetts Amherst</affiliation></author>
       <author><first>Kalpesh</first><last>Krishna</last><affiliation>University of Massachusetts Amherst</affiliation></author>
-      <author><first>Rajesh</first><last>Bhatt</last><affiliation>University of Massachusetts Amherst</affiliation></author>
+      <author id="rajesh-bhatt"><first>Rajesh</first><last>Bhatt</last><affiliation>University of Massachusetts Amherst</affiliation></author>
       <author><first>Mohit</first><last>Iyyer</last><affiliation>University of Massachusetts Amherst</affiliation></author>
       <pages>4606-4634</pages>
       <abstract>To understand what kinds of linguistic knowledge are encoded by pretrained Chinese language models (LMs), we introduce the benchmark of Sino LINGuistics (SLING), which consists of 38K minimal sentence pairs in Mandarin Chinese grouped into 9 high-level linguistic phenomena. Each pair demonstrates the acceptability contrast of a specific syntactic or semantic phenomenon (e.g., The keys are lost vs. The keys is lost), and an LM should assign lower perplexity to the acceptable sentence. In contrast to the CLiMP dataset (Xiang et al., 2021), which also contains Chinese minimal pairs and was created by translating the vocabulary of the English BLiMP dataset, the minimal pairs in SLING are derived primarily by applying syntactic and lexical transformations to naturally-occurring, linguist-annotated sentences from the Chinese Treebank 9.0, thus addressing severe issues in CLiMP’s data generation process. We test 18 publicly available pretrained monolingual (e.g., BERT-base-zh, CPM) and multi-lingual (e.g., mT5, XLM) language models on SLING. Our experiments show that the average accuracy for LMs is far below human performance (69.7% vs. 97.1%), while BERT-base-zh achieves the highest accuracy (84.8%) of all tested LMs, even much larger ones. Additionally, we find that most LMs have a strong gender and number (singular/plural) bias, and they perform better on local phenomena than hierarchical ones.</abstract>
@@ -4264,8 +4264,8 @@
       <author><first>Shereen</first><last>Oraby</last><affiliation>Amazon Alexa AI</affiliation></author>
       <author><first>Shuyang</first><last>Gao</last><affiliation>Amazon.com, Inc.</affiliation></author>
       <author><first>Tagyoung</first><last>Chung</last><affiliation>Amazon Alexa AI</affiliation></author>
-      <author id="jing-huang"><first>Jing</first><last>Huang</last><affiliation>Amazon</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>Amazon</affiliation></author>
+      <author><first>Jing</first><last>Huang</last><affiliation>Amazon</affiliation></author>
+      <author><first>Yang</first><last>Liu</last><affiliation>Amazon</affiliation></author>
       <author><first>Nanyun</first><last>Peng</last><affiliation>University of California, Los Angeles</affiliation></author>
       <pages>4635-4648</pages>
       <abstract>Previous work on pun generation commonly begins with a given pun word (a pair of homophones for heterographic pun generation and a polyseme for homographic pun generation) and seeks to generate an appropriate pun. While this may enable efficient pun generation, we believe that a pun is most entertaining if it fits appropriately within a given context, e.g., a given situation or dialogue. In this work, we propose a new task, context-situated pun generation, where a specific context represented by a set of keywords is provided, and the task is to first identify suitable pun words that are appropriate for the context, then generate puns based on the context keywords and the identified pun words. We collect a new dataset, CUP (Context-sitUated Pun), containing 4.5k tuples of context words and pun pairs. Based on the new data and setup, we propose a pipeline system for context-situated pun generation, including a pun word retrieval module that identifies suitable pun words for a given context, and a pun generation module that generates puns from context keywords and pun words. Human evaluation shows that 69% of our top retrieved pun words can be used to generate context-situated puns, and our generation module yields successful puns 31% of the time given a plausible tuple of context words and pun pair, almost tripling the yield of a state-of-the-art pun generation model. With an end-to-end evaluation, our pipeline system with the top-1 retrieved pun pair for a given context can generate successful puns 40% of the time, better than all other modeling variations but 32% lower than the human success rate. This highlights the difficulty of the task, and encourages more research in this direction.</abstract>
@@ -4287,7 +4287,7 @@
       <title>Concadia: Towards Image-Based Text Generation with a Purpose</title>
       <author><first>Elisa</first><last>Kreiss</last><affiliation>Stanford University</affiliation></author>
       <author><first>Fei</first><last>Fang</last><affiliation>Stanford University</affiliation></author>
-      <author><first>Noah</first><last>Goodman</last><affiliation>Stanford University</affiliation></author>
+      <author id="noah-goodman"><first>Noah</first><last>Goodman</last><affiliation>Stanford University</affiliation></author>
       <author><first>Christopher</first><last>Potts</last><affiliation>Stanford University</affiliation></author>
       <pages>4667-4684</pages>
       <abstract>Current deep learning models often achieve excellent results on benchmark image-to-text datasets but fail to generate texts that are useful in practice. We argue that to close this gap, it is vital to distinguish descriptions from captions based on their distinct communicative roles. Descriptions focus on visual features and are meant to replace an image (often to increase accessibility), whereas captions appear alongside an image to supply additional information. To motivate this distinction and help people put it into practice, we introduce the publicly available Wikipedia-based dataset Concadia consisting of 96,918 images with corresponding English-language descriptions, captions, and surrounding context. Using insights from Concadia, models trained on it, and a preregistered human-subjects experiment with human- and model-generated texts, we characterize the commonalities and differences between descriptions and captions. In addition, we show that, for generating both descriptions and captions, it is useful to augment image-to-text models with representations of the textual context in which the image appeared.</abstract>
@@ -4516,7 +4516,7 @@
     <paper id="325">
       <title>Analyzing and Evaluating Faithfulness in Dialogue Summarization</title>
       <author><first>Bin</first><last>Wang</last><affiliation>National University of Singapore</affiliation></author>
-      <author id="chen-zhang"><first>Chen</first><last>Zhang</last><affiliation>ECE, National University of SIngapore</affiliation></author>
+      <author><first>Chen</first><last>Zhang</last><affiliation>ECE, National University of SIngapore</affiliation></author>
       <author><first>Yan</first><last>Zhang</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Yiming</first><last>Chen</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Haizhou</first><last>Li</last><affiliation>The Chinese University of Hong Kong, Shenzhen</affiliation></author>
@@ -4533,9 +4533,9 @@
       <author><first>Ronan</first><last>Le Bras</last><affiliation>Allen Institute for AI</affiliation></author>
       <author><first>Hao</first><last>Peng</last><affiliation>Allen Institute for AI</affiliation></author>
       <author><first>Ximing</first><last>Lu</last><affiliation>University of Washington</affiliation></author>
-      <author><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
       <author><first>Yejin</first><last>Choi</last><affiliation>University of Washington</affiliation></author>
-      <author><first>Noah A.</first><last>Smith</last><affiliation>University of Washington</affiliation></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last><affiliation>University of Washington</affiliation></author>
       <pages>4909-4923</pages>
       <abstract>Many language generation models are now available for a wide range of generation tasks, including machine translation and summarization. Combining such diverse models may lead to further progress, but ensembling generation models is challenging during inference: conventional ensembling methods (e.g., shallow fusion) require that the models share vocabulary/tokenization schemes. We introduce Twist decoding, a simple and general text generation algorithm that benefits from diverse models at inference time. Our method does not assume the vocabulary, tokenization or even generation order is shared. Our extensive evaluations on machine translation and scientific paper summarization demonstrate that Twist decoding substantially outperforms each model decoded in isolation over various scenarios, including cases where domain-specific and general-purpose models are both available. Twist decoding also consistently outperforms the popular reranking heuristic where output candidates from one model are rescored by another. We hope that our work will encourage researchers and practitioners to examine generation models collectively, not just independently, and to seek out models with complementary strengths to the currently available models.</abstract>
       <url hash="c8ee3310">2022.emnlp-main.326</url>
@@ -4580,7 +4580,7 @@
       <author><first>Asli</first><last>Celikyilmaz</last><affiliation>FAIR @ Meta</affiliation></author>
       <author><first>Haoran</first><last>Li</last><affiliation>Facebook</affiliation></author>
       <author><first>Yashar</first><last>Mehdad</last><affiliation>Facebook AI</affiliation></author>
-      <author><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
       <pages>4949-4958</pages>
       <abstract>Abstractive dialogue summarization has long been viewed as an important standalone task in natural language processing, but no previous work has explored the possibility of whether abstractive dialogue summarization can also be used as a means to boost an NLP system’s performance on other important dialogue comprehension tasks. In this paper, we propose a novel type of dialogue summarization task - STRUctured DiaLoguE Summarization (STRUDEL) - that can help pre-trained language models to better understand dialogues and improve their performance on important dialogue comprehension tasks. In contrast to the holistic approach taken by the traditional free-form abstractive summarization task for dialogues, STRUDEL aims to decompose and imitate the hierarchical, systematic and structured mental process that we human beings usually go through when understanding and analyzing dialogues, and thus has the advantage of being more focused, specific and instructive for dialogue comprehension models to learn from. We further introduce a new STRUDEL dialogue comprehension modeling framework that integrates STRUDEL into a dialogue reasoning module over transformer encoder language models to improve their dialogue comprehension ability. In our empirical experiments on two important downstream dialogue comprehension tasks - dialogue question answering and dialogue response prediction - we demonstrate that our STRUDEL dialogue comprehension models can significantly improve the dialogue comprehension performance of transformer encoder language models.</abstract>
       <url hash="d1056f72">2022.emnlp-main.329</url>
@@ -4609,7 +4609,7 @@
       <author><first>Zihui</first><last>Gu</last><affiliation>Renmin University of China</affiliation></author>
       <author><first>Ju</first><last>Fan</last><affiliation>Renmin University of China</affiliation></author>
       <author><first>Nan</first><last>Tang</last><affiliation>Qatar Computing Research Institute, HBKU</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <author><first>Xiaoman</first><last>Zhao</last><affiliation>Renmin University of China</affiliation></author>
       <author><first>Xiaoyong</first><last>Du</last><affiliation>Renmin University of China</affiliation></author>
       <pages>4971-4983</pages>
@@ -4677,7 +4677,7 @@
     <paper id="336">
       <title>Capturing Global Structural Information in Long Document Question Answering with Compressive Graph Selector Network</title>
       <author><first>Yuxiang</first><last>Nie</last><affiliation>Beijing Institute of Technology</affiliation></author>
-      <author><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <author><first>Wei</first><last>Wei</last><affiliation>Huazhong University of Science and Technology</affiliation></author>
       <author><first>Xian-Ling</first><last>Mao</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <pages>5036-5047</pages>
@@ -4909,7 +4909,7 @@
     </paper>
     <paper id="351">
       <title>Should We Ban <fixed-case>E</fixed-case>nglish <fixed-case>NLP</fixed-case> for a Year?</title>
-      <author><first>Anders</first><last>Søgaard</last><affiliation>University of Copenhagen</affiliation></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last><affiliation>University of Copenhagen</affiliation></author>
       <pages>5254-5260</pages>
       <abstract>Around two thirds of NLP research at top venues is devoted exclusively to developing technology for speakers of English, most speech data comes from young urban speakers, and most texts used to train language models come from male writers. These biases feed into consumer technologies to widen existing inequality gaps, not only within, but also across, societies. Many have argued that it is almost impossible to mitigate inequality amplification. I argue that, on the contrary, it is quite simple to do so, and that counter-measures would have little-to-no negative impact, except for, perhaps, in the very short term.</abstract>
       <url hash="53a7fc5c">2022.emnlp-main.351</url>
@@ -4948,7 +4948,7 @@
       <author><first>Jiajun</first><last>Zhang</last><affiliation>Institute of Automation Chinese Academy of Sciences</affiliation></author>
       <author><first>Wei</first><last>Luo</last><affiliation>Alibaba</affiliation></author>
       <author><first>Zhongqiang</first><last>Huang</last><affiliation>Alibaba Group</affiliation></author>
-      <author><first>Chengqing</first><last>Zong</last><affiliation>Institute of Automation, Chinese Academy of Sciences</affiliation></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last><affiliation>Institute of Automation, Chinese Academy of Sciences</affiliation></author>
       <pages>5291-5302</pages>
       <abstract>End-to-end Speech Translation (ST) aims at translating the source language speech into target language text without generating the intermediate transcriptions. However, the training of end-to-end methods relies on parallel ST data, which are difficult and expensive to obtain. Fortunately, the supervised data for automatic speech recognition (ASR) and machine translation (MT) are usually more accessible, making zero-shot speech translation a potential direction. Existing zero-shot methods fail to align the two modalities of speech and text into a shared semantic space, resulting in much worse performance compared to the supervised ST methods. In order to enable zero-shot ST, we propose a novel Discrete Cross-Modal Alignment (DCMA) method that employs a shared discrete vocabulary space to accommodate and match both modalities of speech and text. Specifically, we introduce a vector quantization module to discretize the continuous representations of speech and text into a finite set of virtual tokens, and use ASR data to map corresponding speech and text to the same virtual token in a shared codebook. This way, source language speech can be embedded in the same semantic space as the source language text, which can be then transformed into target language text with an MT module. Experiments on multiple language pairs demonstrate that our zero-shot ST method significantly improves the SOTA, and even performers on par with the strong supervised ST baselines.</abstract>
       <url hash="dcd1d17a">2022.emnlp-main.354</url>
@@ -4960,7 +4960,7 @@
     <paper id="355">
       <title>Abstractive Summarization Guided by Latent Hierarchical Document Structure</title>
       <author><first>Yifu</first><last>Qiu</last><affiliation>University of Edinburgh</affiliation></author>
-      <author><first>Shay B.</first><last>Cohen</last><affiliation>University of Edinburgh</affiliation></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last><affiliation>University of Edinburgh</affiliation></author>
       <pages>5303-5317</pages>
       <abstract>Sequential abstractive neural summarizers often do not use the underlying structure in the input article or dependencies between the input sentences. This structure is essential to integrate and consolidate information from different parts of the text. To address this shortcoming, we propose a hierarchy-aware graph neural network (HierGNN) which captures such dependencies through three main steps: 1) learning a hierarchical document structure through a latent structure tree learned by a sparse matrix-tree computation; 2) propagating sentence information over this structure using a novel message-passing node propagation mechanism to identify salient information; 3) using graph-level attention to concentrate the decoder on salient information. Experiments confirm HierGNN improves strong sequence models such as BART, with a 0.55 and 0.75 margin in average ROUGE-1/2/L for CNN/DM and XSum. Further human evaluation demonstrates that summaries produced by our model are more relevant and less redundant than the baselines, into which HierGNN is incorporated. We also find HierGNN synthesizes summaries by fusing multiple source sentences more, rather than compressing a single source sentence, and that it processes long inputs more effectively.</abstract>
       <url hash="a2e7f114">2022.emnlp-main.355</url>
@@ -5033,7 +5033,7 @@
       <author><first>Raj</first><last>Dabre</last><affiliation>NICT</affiliation></author>
       <author><first>Ratish</first><last>Puduppully</last><affiliation>University of Edinburgh</affiliation></author>
       <author><first>Anoop</first><last>Kunchukuttan</last><affiliation>Microsoft AI and Research</affiliation></author>
-      <author><first>Mitesh M.</first><last>Khapra</last><affiliation>Indian Institute of Technology Madras</affiliation></author>
+      <author id="mitesh-m-khapra"><first>Mitesh M.</first><last>Khapra</last><affiliation>Indian Institute of Technology Madras</affiliation></author>
       <author><first>Pratyush</first><last>Kumar</last><affiliation>IIT Madras</affiliation></author>
       <pages>5363-5394</pages>
       <abstract>Natural Language Generation (NLG) for non-English languages is hampered by the scarcity of datasets in these languages. We present the IndicNLG Benchmark, a collection of datasets for benchmarking NLG for 11 Indic languages. We focus on five diverse tasks, namely, biography generation using Wikipedia infoboxes, news headline generation, sentence summarization, paraphrase generation and, question generation. We describe the created datasets and use them to benchmark the performance of several monolingual and multilingual baselines that leverage pre-trained sequence-to-sequence models. Our results exhibit the strong performance of multilingual language-specific pre-trained models, and the utility of models trained on our dataset for other related NLG tasks. Our dataset creation methods can be easily applied to modest-resource languages as they involve simple steps such as scraping news articles and Wikipedia infoboxes, light cleaning, and pivoting through machine translation data. To the best of our knowledge, the IndicNLG Benchmark is the first NLG benchmark for Indic languages and the most diverse multilingual NLG dataset, with approximately 8M examples across 5 tasks and 11 languages. The datasets and models will be publicly available.</abstract>
@@ -5045,7 +5045,7 @@
     <paper id="361">
       <title>Improving Machine Translation with Phrase Pair Injection and Corpus Filtering</title>
       <author><first>Akshay</first><last>Batheja</last><affiliation>Indian Institute of Technology Bombay</affiliation></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology Bombay and Patna</affiliation></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology Bombay and Patna</affiliation></author>
       <pages>5395-5400</pages>
       <abstract>In this paper, we show that the combination of Phrase Pair Injection and Corpus Filtering boosts the performance of Neural Machine Translation (NMT) systems. We extract parallel phrases and sentences from the pseudo-parallel corpus and augment it with the parallel corpus to train the NMT models. With the proposed approach, we observe an improvement in the Machine Translation (MT) system for 3 low-resource language pairs, Hindi-Marathi, English-Marathi, and English-Pashto, and 6 translation directions by up to 2.7 BLEU points, on the FLORES test data. These BLEU score improvements are over the models trained using the whole pseudo-parallel corpus augmented with the parallel corpus.</abstract>
       <url hash="fc62714b">2022.emnlp-main.361</url>
@@ -5083,7 +5083,7 @@
       <author><first>Peiling</first><last>Lu</last><affiliation>Microsoft</affiliation></author>
       <author><first>Xu</first><last>Tan</last><affiliation>Microsoft Research Asia</affiliation></author>
       <author><first>Rui</first><last>Wang</last><affiliation>Microsoft</affiliation></author>
-      <author id="chen-zhang"><first>Chen</first><last>Zhang</last><affiliation>Zhejiang University</affiliation></author>
+      <author><first>Chen</first><last>Zhang</last><affiliation>Zhejiang University</affiliation></author>
       <author><first>Songruoyao</first><last>Wu</last><affiliation>Zhejiang University</affiliation></author>
       <author><first>Kejun</first><last>Zhang</last><affiliation>Zhejiang University</affiliation></author>
       <author><first>Xiang-Yang</first><last>Li</last><affiliation>University of Science and Technology of China</affiliation></author>
@@ -5133,7 +5133,7 @@
       <author><first>Fandong</first><last>Meng</last><affiliation>WeChat AI, Tencent</affiliation></author>
       <author><first>Chulun</first><last>Zhou</last><affiliation>Tencent</affiliation></author>
       <author><first>Jie</first><last>Zhou</last><affiliation>Tencent Inc.</affiliation></author>
-      <author><first>Degen</first><last>Huang</last><affiliation>Dalian University of Technology</affiliation></author>
+      <author id="degen-huang"><first>Degen</first><last>Huang</last><affiliation>Dalian University of Technology</affiliation></author>
       <author><first>Jinsong</first><last>Su</last><affiliation>Xiamen university</affiliation></author>
       <pages>5468-5477</pages>
       <abstract>k-Nearest-Neighbor Machine Translation (kNN-MT) becomes an important research direction of NMT in recent years. Its main idea is to retrieve useful key-value pairs from an additional datastore to modify translations without updating the NMT model. However, the underlying retrieved noisy pairs will dramatically deteriorate the model performance. In this paper, we conduct a preliminary study and find that this problem results from not fully exploiting the prediction of the NMT model. To alleviate the impact of noise, we propose a confidence-enhanced kNN-MT model with robust training. Concretely, we introduce the NMT confidence to refine the modeling of two important components of kNN-MT: kNN distribution and the interpolation weight. Meanwhile we inject two types of perturbations into the retrieved pairs for robust training. Experimental results on four benchmark datasets demonstrate that our model not only achieves significant improvements over current kNN-MT models, but also exhibits better robustness. Our code is available at https://github.com/DeepLearnXMU/Robust-knn-mt.</abstract>
@@ -5211,7 +5211,7 @@
       <title><fixed-case>PLOG</fixed-case>: Table-to-Logic Pretraining for Logical Table-to-Text Generation</title>
       <author><first>Ao</first><last>Liu</last><affiliation>Tokyo Institute of Technology</affiliation></author>
       <author><first>Haoyu</first><last>Dong</last><affiliation>Microsoft Research</affiliation></author>
-      <author><first>Naoaki</first><last>Okazaki</last><affiliation>Tokyo Institute of Technology</affiliation></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last><affiliation>Tokyo Institute of Technology</affiliation></author>
       <author><first>Shi</first><last>Han</last><affiliation>Microsoft Research Asia</affiliation></author>
       <author><first>Dongmei</first><last>Zhang</last><affiliation>Microsoft Research</affiliation></author>
       <pages>5531-5546</pages>
@@ -5237,7 +5237,7 @@
       <author><first>Hexiang</first><last>Hu</last><affiliation>Google</affiliation></author>
       <author><first>Xi</first><last>Chen</last><affiliation>Google</affiliation></author>
       <author><first>Pat</first><last>Verga</last><affiliation>Google</affiliation></author>
-      <author><first>William</first><last>Cohen</last><affiliation>Google AI</affiliation></author>
+      <author id="william-cohen"><first>William</first><last>Cohen</last><affiliation>Google AI</affiliation></author>
       <pages>5558-5570</pages>
       <abstract>While language Models store a massive amount of world knowledge implicitly in their parameters, even very large models often fail to encode information about rare entities and events, while incurring huge computational costs. Recently, retrieval-augmented models, such as REALM, RAG, and RETRO, have incorporated world knowledge into language generation by leveraging an external non-parametric index and have demonstrated impressive performance with constrained model sizes. However, these methods are restricted to retrieving only textual knowledge, neglecting the ubiquitous amount of knowledge in other modalities like images – much of which contains information not covered by any text. To address this limitation, we propose the first Multimodal Retrieval-Augmented Transformer (MuRAG), which accesses an external non-parametric multimodal memory to augment language generation. MuRAG is pre-trained with a mixture of large-scale image-text and text-only corpora using a joint contrastive and generative loss. We perform experiments on two different datasets that require retrieving and reasoning over both images and text to answer a given query: WebQA, and MultimodalQA. Our results show that MuRAG achieves state-of-the-art accuracy, outperforming existing models by 10-20% absolute on both datasets and under both distractor and full-wiki settings.</abstract>
       <url hash="29221926">2022.emnlp-main.375</url>
@@ -5250,7 +5250,7 @@
       <author><first>Zhaoyue</first><last>Sun</last><affiliation>University of Warwick</affiliation></author>
       <author><first>Jiazheng</first><last>Li</last><affiliation>University of Warwick</affiliation></author>
       <author><first>Gabriele</first><last>Pergola</last><affiliation>University of Warwick</affiliation></author>
-      <author><first>Byron</first><last>Wallace</last><affiliation>Northeastern University</affiliation></author>
+      <author id="byron-c-wallace"><first>Byron</first><last>Wallace</last><affiliation>Northeastern University</affiliation></author>
       <author><first>Bino</first><last>John</last><affiliation>AstraZeneca</affiliation></author>
       <author><first>Nigel</first><last>Greene</last><affiliation>AstraZeneca</affiliation></author>
       <author><first>Joseph</first><last>Kim</last><affiliation>AstraZeneca</affiliation></author>
@@ -5273,7 +5273,7 @@
     </paper>
     <paper id="378">
       <title><fixed-case>S</fixed-case>im<fixed-case>QA</fixed-case>: Detecting Simultaneous <fixed-case>MT</fixed-case> Errors through Word-by-Word Question Answering</title>
-      <author><first>HyoJung</first><last>Han</last><affiliation>University of Maryland, College Park</affiliation></author>
+      <author id="hyojung-han"><first>HyoJung</first><last>Han</last><affiliation>University of Maryland, College Park</affiliation></author>
       <author><first>Marine</first><last>Carpuat</last><affiliation>University of Maryland</affiliation></author>
       <author><first>Jordan</first><last>Boyd-Graber</last><affiliation>University of Maryland</affiliation></author>
       <pages>5598-5616</pages>
@@ -5407,7 +5407,7 @@
       <author><first>Subhabrata</first><last>Mukherjee</last><affiliation>Microsoft Research</affiliation></author>
       <author><first>Xiaodong</first><last>Liu</last><affiliation>Microsoft Research</affiliation></author>
       <author><first>Jing</first><last>Gao</last><affiliation>Purdue University</affiliation></author>
-      <author><first>Ahmed Hassan</first><last>Awadallah</last><affiliation>Microsoft Research</affiliation></author>
+      <author id="ahmed-hassan"><first>Ahmed Hassan</first><last>Awadallah</last><affiliation>Microsoft Research</affiliation></author>
       <author><first>Jianfeng</first><last>Gao</last><affiliation>Microsoft Research, Redmond</affiliation></author>
       <pages>5744-5760</pages>
       <abstract>Standard fine-tuning of large pre-trained language models (PLMs) for downstream tasks requires updating hundreds of millions to billions of parameters, and storing a large copy of the PLM weights for every task resulting in increased cost for storing, sharing and serving the models. To address this, parameter-efficient fine-tuning (PEFT) techniques were introduced where small trainable components are injected in the PLM and updated during fine-tuning. We propose AdaMix as a general PEFT method that tunes a mixture of adaptation modules – given the underlying PEFT method of choice – introduced in each Transformer layer while keeping most of the PLM weights frozen. For instance, AdaMix can leverage a mixture of adapters like Houlsby or a mixture of low rank decomposition matrices like LoRA to improve downstream task performance over the corresponding PEFT methods for fully supervised and few-shot NLU and NLG tasks. Further, we design AdaMix such that it matches the same computational cost and the number of tunable parameters as the underlying PEFT method. By only tuning 0.1-0.2% of PLM parameters, we show that AdaMix outperforms SOTA parameter-efficient fine-tuning and full model fine-tuning for both NLU and NLG tasks.</abstract>
@@ -5445,7 +5445,7 @@
       <title><fixed-case>T</fixed-case>-Modules: Translation Modules for Zero-Shot Cross-Modal Machine Translation</title>
       <author><first>Paul-Ambroise</first><last>Duquenne</last><affiliation>Meta AI</affiliation></author>
       <author><first>Hongyu</first><last>Gong</last><affiliation>Facebook AI Research</affiliation></author>
-      <author><first>Benoît</first><last>Sagot</last><affiliation>Inria</affiliation></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last><affiliation>Inria</affiliation></author>
       <author><first>Holger</first><last>Schwenk</last><affiliation>Meta AI Research</affiliation></author>
       <pages>5794-5806</pages>
       <abstract>We present a new approach to perform zero-shot cross-modal transfer between speech and text for translation tasks. Multilingual speech and text are encoded in a joint fixed-size representation space. Then, we compare different approaches to decode these multimodal and multilingual fixed-size representations, enabling zero-shot translation between languages and modalities. All our models are trained without the need of cross-modal labeled translation data.Despite a fixed-size representation, we achieve very competitive results on several text and speech translation tasks. In particular, we significantly improve the state-of-the-art for zero-shot speech translation on Must-C. Incorporating a speech decoder in our framework, we introduce the first results for zero-shot direct speech-to-speech and text-to-speech translation.</abstract>
@@ -5538,7 +5538,7 @@
     <paper id="398">
       <title>A Framework for Adapting Pre-Trained Language Models to Knowledge Graph Completion</title>
       <author><first>Justin</first><last>Lovelace</last><affiliation>Cornell University</affiliation></author>
-      <author><first>Carolyn</first><last>Rosé</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rosé</last><affiliation>Carnegie Mellon University</affiliation></author>
       <pages>5937-5955</pages>
       <abstract>Recent work has demonstrated that entity representations can be extracted from pre-trained language models to develop knowledge graph completion models that are more robust to the naturally occurring sparsity found in knowledge graphs. In this work, we conduct a comprehensive exploration of how to best extract and incorporate those embeddings into knowledge graph completion models. We explore the suitability of the extracted embeddings for direct use in entity ranking and introduce both unsupervised and supervised processing methods that can lead to improved downstream performance. We then introduce supervised embedding extraction methods that can extract more informative representations. We then synthesize our findings and develop a knowledge graph completion model that significantly outperforms recent neural models.</abstract>
       <url hash="f8825568">2022.emnlp-main.398</url>
@@ -5598,9 +5598,9 @@
       <author><first>Yi</first><last>Fung</last><affiliation>University of Illinois at Urbana Champaign</affiliation></author>
       <author><first>Kathryn</first><last>Conger</last><affiliation>Universitiy of Colorado, Boulder</affiliation></author>
       <author><first>Ahmed</first><last>ELsayed</last><affiliation>University of Colorado</affiliation></author>
-      <author><first>Martha</first><last>Palmer</last><affiliation>University of Colorado</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
-      <author><first>Eduard</first><last>Hovy</last><affiliation>University of Melbourne</affiliation></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last><affiliation>University of Colorado</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last><affiliation>University of Melbourne</affiliation></author>
       <author><first>Kevin</first><last>Small</last><affiliation>Amazon</affiliation></author>
       <author><first>Heng</first><last>Ji</last><affiliation>University of Illinois at Urbana-Champaign and Amazon (Amazon Scholar)</affiliation></author>
       <pages>6002-6018</pages>
@@ -5637,7 +5637,7 @@
     <paper id="406">
       <title>The Authenticity Gap in Human Evaluation</title>
       <author><first>Kawin</first><last>Ethayarajh</last><affiliation>Stanford University</affiliation></author>
-      <author><first>Dan</first><last>Jurafsky</last><affiliation>Stanford University</affiliation></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last><affiliation>Stanford University</affiliation></author>
       <pages>6056-6070</pages>
       <abstract>Human ratings are the gold standard in NLG evaluation. The standard protocol is to collect ratings of generated text, average across annotators, and rank NLG systems by their average scores. However, little consideration has been given as to whether this approach faithfully captures human preferences. Analyzing this standard protocol through the lens of utility theory in economics, we identify the implicit assumptions it makes about annotators. These assumptions are often violated in practice, in which case annotator ratings cease to reflect their preferences. The most egregious violations come from using Likert scales, which provably reverse the direction of the true preference in certain cases. We suggest improvements to the standard protocol to make it more theoretically sound, but even in its improved form, it cannot be used to evaluate open-ended tasks like story generation. For the latter, we propose a new human evaluation protocol called system-level probabilistic assessment (SPA). When human evaluation of stories is done with SPA, we can recover the ordering of GPT-3 models by size, with statistically significant results. However, when human evaluation is done with the standard protocol, less than half of the expected preferences can be recovered (e.g., there is no significant difference between curie and davinci, despite using a highly powered test).</abstract>
       <url hash="48bbdd29">2022.emnlp-main.406</url>
@@ -5664,8 +5664,8 @@
       <author><first>Linyong</first><last>Nan</last><affiliation>Yale University</affiliation></author>
       <author><first>Budhaditya</first><last>Deb</last><affiliation>Microsoft Corporation</affiliation></author>
       <author><first>Chenguang</first><last>Zhu</last><affiliation>Microsoft Cognitive Services Research Group</affiliation></author>
-      <author><first>Ahmed Hassan</first><last>Awadallah</last><affiliation>Microsoft Research</affiliation></author>
-      <author><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
+      <author id="ahmed-hassan"><first>Ahmed Hassan</first><last>Awadallah</last><affiliation>Microsoft Research</affiliation></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
       <pages>6081-6093</pages>
       <abstract>Neural attention models have achieved significant improvements on many natural language processing tasks. However, the quadratic memory complexity of the self-attention module with respect to the input length hinders their applications in long text summarization. Instead of designing more efficient attention modules, we approach this problem by investigating if models with a restricted context can have competitive performance compared with the memory-efficient attention models that maintain a global context by treating the input as a single sequence. Our model is applied to individual pages, which contain parts of inputs grouped by the principle of locality, during both the encoding and decoding stages. We empirically investigated three kinds of locality in text summarization at different levels of granularity, ranging from sentences to documents. Our experimental results show that our model has a better performance compared with strong baseline models with efficient attention modules, and our analysis provides further insights into our locality-aware modeling strategy.</abstract>
       <url hash="c61c5aec">2022.emnlp-main.408</url>
@@ -5738,7 +5738,7 @@
       <title>A Survey of Active Learning for Natural Language Processing</title>
       <author><first>Zhisong</first><last>Zhang</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Emma</first><last>Strubell</last><affiliation>Carnegie Mellon University</affiliation></author>
-      <author><first>Eduard</first><last>Hovy</last><affiliation>University of Melbourne</affiliation></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last><affiliation>University of Melbourne</affiliation></author>
       <pages>6166-6190</pages>
       <abstract>In this work, we provide a literature review of active learning (AL) for its applications in natural language processing (NLP). In addition to a fine-grained categorization of query strategies, we also investigate several other important aspects of applying AL to NLP problems. These include AL for structured prediction tasks, annotation cost, model learning (especially with deep neural models), and starting and stopping AL. Finally, we conclude with a discussion of related topics and future directions.</abstract>
       <url hash="0d58097f">2022.emnlp-main.414</url>
@@ -5828,7 +5828,7 @@
     </paper>
     <paper id="421">
       <title><fixed-case>C</fixed-case>onv<fixed-case>F</fixed-case>in<fixed-case>QA</fixed-case>: Exploring the Chain of Numerical Reasoning in Conversational Finance Question Answering</title>
-      <author id="zhiyu-chen"><first>Zhiyu</first><last>Chen</last><affiliation>Meta</affiliation></author>
+      <author><first>Zhiyu</first><last>Chen</last><affiliation>Meta</affiliation></author>
       <author><first>Shiyang</first><last>Li</last><affiliation>UC Santa Barbara</affiliation></author>
       <author><first>Charese</first><last>Smiley</last><affiliation>JPMorgan AI Research</affiliation></author>
       <author><first>Zhiqiang</first><last>Ma</last><affiliation>JPMorgan Chase</affiliation></author>
@@ -5843,7 +5843,7 @@
     <paper id="422">
       <title>A Span-based Multimodal Variational Autoencoder for Semi-supervised Multimodal Named Entity Recognition</title>
       <author><first>Baohang</first><last>Zhou</last><affiliation>Nankai University</affiliation></author>
-      <author><first>Ying</first><last>Zhang</last><affiliation>Nankai University</affiliation></author>
+      <author id="ying-zhang"><first>Ying</first><last>Zhang</last><affiliation>Nankai University</affiliation></author>
       <author><first>Kehui</first><last>Song</last><affiliation>Nankai University</affiliation></author>
       <author><first>Wenya</first><last>Guo</last><affiliation>Nankai University</affiliation></author>
       <author><first>Guoqing</first><last>Zhao</last><affiliation>Mashang Consumer Finance Co.,Ltd.</affiliation></author>
@@ -5868,7 +5868,7 @@
     <paper id="424">
       <title>Modeling Consistency Preference via Lexical Chains for Document-level Neural Machine Translation</title>
       <author><first>Xinglin</first><last>Lyu</last><affiliation>Soochow University</affiliation></author>
-      <author><first>Junhui</first><last>Li</last><affiliation>Soochow University, Suzhou</affiliation></author>
+      <author id="junhui-li"><first>Junhui</first><last>Li</last><affiliation>Soochow University, Suzhou</affiliation></author>
       <author><first>Shimin</first><last>Tao</last><affiliation>huawei</affiliation></author>
       <author><first>Hao</first><last>Yang</last><affiliation>Huawei Co. Ltd</affiliation></author>
       <author><first>Ying</first><last>Qin</last><affiliation>Huawei Technologies</affiliation></author>
@@ -5898,7 +5898,7 @@
       <title>Factorizing Content and Budget Decisions in Abstractive Summarization of Long Documents</title>
       <author><first>Marcio</first><last>Fonseca</last><affiliation>University of Edinburgh</affiliation></author>
       <author><first>Yftah</first><last>Ziser</last><affiliation>University of Edinburgh</affiliation></author>
-      <author><first>Shay B.</first><last>Cohen</last><affiliation>University of Edinburgh</affiliation></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last><affiliation>University of Edinburgh</affiliation></author>
       <pages>6341-6364</pages>
       <abstract>We argue that disentangling content selection from the budget used to cover salient content improves the performance and applicability of abstractive summarizers. Our method, FactorSum, does this disentanglement by factorizing summarization into two steps through an energy function: (1) generation of abstractive summary views covering salient information in subsets of the input document (document views); (2) combination of these views into a final summary, following a budget and content guidance. This guidance may come from different sources, including from an advisor model such as BART or BigBird, or in oracle mode – from the reference. This factorization achieves significantly higher ROUGE scores on multiple benchmarks for long document summarization, namely PubMed, arXiv, and GovReport. Most notably, our model is effective for domain adaptation. When trained only on PubMed samples, it achieves a 46.29 ROUGE-1 score on arXiv, outperforming PEGASUS trained in domain by a large margin. Our experimental results indicate that the performance gains are due to more flexible budget adaptation and processing of shorter contexts provided by partial document views.</abstract>
       <url hash="1d693f1c">2022.emnlp-main.426</url>
@@ -5981,7 +5981,7 @@
       <author><first>Junbin</first><last>Xiao</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Yicong</first><last>Li</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Weihong</first><last>Deng</last><affiliation>Beijing University of Posts and Telecommunications</affiliation></author>
-      <author><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
       <pages>6439-6455</pages>
       <abstract>This survey aims to sort out the recent advances in video question answering (VideoQA) and point towards future directions. We firstly categorize the datasets into 1) normal VideoQA, multi-modal VideoQA and knowledge-based VideoQA, according to the modalities invoked in the question-answer pairs, or 2) factoid VideoQA and inference VideoQA, according to the technical challenges in comprehending the questions and deriving the correct answers. We then summarize the VideoQA techniques, including those mainly designed for Factoid QA (e.g., the early spatio-temporal attention-based methods and the recently Transformer-based ones) and those targeted at explicit relation and logic inference (e.g., neural modular networks, neural symbolic methods, and graph-structured methods). Aside from the backbone techniques, we delve into the specific models and find out some common and useful insights either for video modeling, question answering, or for cross-modal correspondence learning. Finally, we point out the research trend of studying beyond factoid VideoQA to inference VideoQA, as well as towards the robustness and interpretability. Additionally, we maintain a repository, https://github.com/VRU-NExT/VideoQA, to keep trace of the latest VideoQA papers, datasets, and their open-source implementations if available. With these efforts, we strongly hope this survey could shed light on the follow-up VideoQA research.</abstract>
       <url hash="4bd7aa86">2022.emnlp-main.432</url>
@@ -6138,7 +6138,7 @@
     <paper id="443">
       <title>Textual Manifold-based Defense Against Natural Language Adversarial Examples</title>
       <author><first>Dang</first><last>Nguyen Minh</last><affiliation>VinAI Research</affiliation></author>
-      <author><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University, Singapore</affiliation></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University, Singapore</affiliation></author>
       <pages>6612-6625</pages>
       <abstract>Despite the recent success of large pretrained language models in NLP, they are susceptible to adversarial examples. Concurrently, several studies on adversarial images have observed an intriguing property: the adversarial images tend to leave the low-dimensional natural data manifold. In this study, we find a similar phenomenon occurs in the contextualized embedding space of natural sentences induced by pretrained language models in which textual adversarial examples tend to have their embeddings diverge off the manifold of natural sentence embeddings. Based on this finding, we propose Textual Manifold-based Defense (TMD), a defense mechanism that learns the embedding space manifold of the underlying language model and projects novel inputs back to the approximated structure before classification. Through extensive experiments, we find that our method consistently and significantly outperforms previous defenses under various attack settings while remaining unaffected to the clean accuracy. To the best of our knowledge, this is the first kind of manifold-based defense adapted to the NLP domain.</abstract>
       <url hash="1bc8a1e2">2022.emnlp-main.443</url>
@@ -6176,7 +6176,7 @@
       <title><fixed-case>ATTEMPT</fixed-case>: Parameter-Efficient Multi-task Tuning via Attentional Mixtures of Soft Prompts</title>
       <author><first>Akari</first><last>Asai</last><affiliation>University of Washington</affiliation></author>
       <author><first>Mohammadreza</first><last>Salehi</last><affiliation>University of Washington</affiliation></author>
-      <author><first>Matthew</first><last>Peters</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
+      <author id="matthew-e-peters"><first>Matthew</first><last>Peters</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last><affiliation>University of Washington</affiliation></author>
       <pages>6655-6672</pages>
       <abstract>This work introduces a new multi-task, parameter-efficient language model (LM) tuning method that learns to transfer knowledge across different tasks via a mixture of soft prompts—small prefix embedding vectors pre-trained for different tasks. Our method, called ATTEMPT (ATTEntional Mixtures of Prompt Tuning), obtains source prompts as encodings of large-scale source tasks into a small number of parameters and trains an attention module to interpolate the source prompts and a newly initialized target prompt for every instance in the target task. During training, only the target task prompt and the attention weights, which are shared between tasks in multi-task training, are updated, while the original LM and source prompts are intact. ATTEMPT is highly parameter-efficient (e.g., updates 2,300 times fewer parameters than full fine-tuning), while it overcomes instability of prompt tuning and achieves high task performance using learned knowledge from high-resource tasks. Moreover, it is modular using pre-trained soft prompts, and can flexibly add or remove source prompts for effective knowledge transfer. Our experimental results across 21 diverse NLP datasets show that ATTEMPT significantly outperforms prompt tuning and outperforms or matches fully fine-tuned or other parameter-efficient tuning approaches that use 10 times more parameters. Finally, ATTEMPT outperforms previous work in few-shot learning settings.</abstract>
@@ -6282,7 +6282,7 @@
       <title>Improving Event Coreference Resolution Using Document-level and Topic-level Information</title>
       <author><first>Sheng</first><last>Xu</last><affiliation>Soochow University</affiliation></author>
       <author><first>Peifeng</first><last>Li</last><affiliation>Soochow University</affiliation></author>
-      <author><first>Qiaoming</first><last>Zhu</last><affiliation>Soochow University</affiliation></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last><affiliation>Soochow University</affiliation></author>
       <pages>6765-6775</pages>
       <abstract>Event coreference resolution (ECR) aims to cluster event mentions that refer to the same real-world events. Deep learning methods have achieved SOTA results on the ECR task. However, due to the encoding length limitation, previous methods either adopt classical pairwise models based on sentence-level context or split each document into multiple chunks and encode them separately. They failed to capture the interactions and contextual cues among those long-distance event mentions. Besides, high-level information, such as event topics, is rarely considered to enhance representation learning for ECR. To address the above two issues, we first apply a Longformer-based encoder to obtain the document-level embeddings and an encoder with a trigger-mask mechanism to learn sentence-level embeddings based on local context. In addition, we propose an event topic generator to infer the latent topic-level representations. Finally, using the above event embeddings, we employ a multiple tensor matching method to capture their interactions at the document, sentence, and topic levels. Experimental results on the KBP 2017 dataset show that our model outperforms the SOTA baselines.</abstract>
       <url hash="9906cc97">2022.emnlp-main.454</url>
@@ -6305,7 +6305,7 @@
     <paper id="456">
       <title>Boosting Natural Language Generation from Instructions with Meta-Learning</title>
       <author><first>Budhaditya</first><last>Deb</last><affiliation>Microsoft Corporation</affiliation></author>
-      <author><first>Ahmed Hassan</first><last>Awadallah</last><affiliation>Microsoft Research</affiliation></author>
+      <author id="ahmed-hassan"><first>Ahmed Hassan</first><last>Awadallah</last><affiliation>Microsoft Research</affiliation></author>
       <author><first>Guoqing</first><last>Zheng</last><affiliation>Microsoft Research</affiliation></author>
       <pages>6792-6808</pages>
       <abstract>Recent work has shown that language models (LMs) trained with multi-task <i>instructional learning</i> (MTIL) can solve diverse NLP tasks in zero- and few-shot settings with improved performance compared to prompt tuning. MTIL illustrates that LMs can extract and use information about the task from instructions beyond the surface patterns of the inputs and outputs. This suggests that meta-learning may further enhance the utilization of instructions for effective task transfer. In this paper we investigate whether meta-learning applied to MTIL can further improve generalization to unseen tasks in a zero-shot setting. Specifically, we propose to adapt meta-learning to MTIL in three directions: 1) Model Agnostic Meta Learning (MAML), 2) Hyper-Network (HNet) based adaptation to generate task specific parameters conditioned on instructions, and 3) an approach combining HNet and MAML. Through extensive experiments on the large scale Natural Instructions V2 dataset, we show that our proposed approaches significantly improve over strong baselines in zero-shot settings. In particular, meta-learning improves the effectiveness of instructions and is most impactful when the test tasks are strictly zero-shot (i.e. no similar tasks in the training set) and are “hard” for LMs, illustrating the potential of meta-learning for MTIL for out-of-distribution tasks.</abstract>
@@ -6380,7 +6380,7 @@
       <author><first>Xin</first><last>Li</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Ruidan</first><last>He</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Lidong</first><last>Bing</last><affiliation>Alibaba DAMO Academy</affiliation></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University; Salesforce AI Research</affiliation></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University; Salesforce AI Research</affiliation></author>
       <author><first>Luo</first><last>Si</last><affiliation>Alibaba Group Inc</affiliation></author>
       <pages>6878-6890</pages>
       <abstract>Knowledge-enhanced language representation learning has shown promising results across various knowledge-intensive NLP tasks. However, prior methods are limited in efficient utilization of multilingual knowledge graph (KG) data for language model (LM) pretraining. They often train LMs with KGs in indirect ways, relying on extra entity/relation embeddings to facilitate knowledge injection. In this work, we explore methods to make better use of the multilingual annotation and language agnostic property of KG triples, and present novel knowledge based multilingual language models (KMLMs) trained directly on the knowledge triples. We first generate a large amount of multilingual synthetic sentences using the Wikidata KG triples. Then based on the intra- and inter-sentence structures of the generated data, we design pretraining tasks to enable the LMs to not only memorize the factual knowledge but also learn useful logical patterns. Our pretrained KMLMs demonstrate significant performance improvements on a wide range of knowledge-intensive cross-lingual tasks, including named entity recognition (NER), factual knowledge retrieval, relation classification, and a newly designed logical reasoning task.</abstract>
@@ -6392,7 +6392,7 @@
       <title>Revisiting Grammatical Error Correction Evaluation and Beyond</title>
       <author><first>Peiyuan</first><last>Gong</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <author><first>Xuebo</first><last>Liu</last><affiliation>Harbin Institute of Technology, Shenzhen</affiliation></author>
-      <author><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <author><first>Min</first><last>Zhang</last><affiliation>Suda</affiliation></author>
       <pages>6891-6902</pages>
       <abstract>Pretraining-based (PT-based) automatic evaluation metrics (e.g., BERTScore and BARTScore) have been widely used in several sentence generation tasks (e.g., machine translation and text summarization) due to their better correlation with human judgments over traditional overlap-based methods. Although PT-based methods have become the de facto standard for training grammatical error correction (GEC) systems, GEC evaluation still does not benefit from pretrained knowledge. This paper takes the first step towards understanding and improving GEC evaluation with pretraining. We first find that arbitrarily applying PT-based metrics to GEC evaluation brings unsatisfactory correlation results because of the excessive attention to inessential systems outputs (e.g., unchanged parts). To alleviate the limitation, we propose a novel GEC evaluation metric to achieve the best of both worlds, namely PT-M2 which only uses PT-based metrics to score those corrected parts. Experimental results on the CoNLL14 evaluation task show that PT-M2 significantly outperforms existing methods, achieving a new state-of-the-art result of 0.949 Pearson correlation. Further analysis reveals that PT-M2 is robust to evaluate competitive GEC systems. Source code and scripts are freely available at https://github.com/pygongnlp/PT-M2.</abstract>
@@ -6411,7 +6411,7 @@
       <author><first>Yixin</first><last>Liu</last><affiliation>Yale University</affiliation></author>
       <author><first>Luke</first><last>Benson</last><affiliation>Yale University</affiliation></author>
       <author><first>Weijin</first><last>Zou</last><affiliation>Yale University</affiliation></author>
-      <author><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
       <pages>6903-6917</pages>
       <abstract>Unfaithful text generation is a common problem for text generation systems. In the case of Data-to-Text (D2T) systems, the factuality of the generated text is particularly crucial for any real-world applications. We introduce R2D2, a training framework that addresses unfaithful Data-to-Text generation by training a system both as a generator and a faithfulness discriminator with additional replacement detection and unlikelihood learning tasks. To facilitate such training, we propose two methods for sampling unfaithful sentences. We argue that the poor entity retrieval capability of D2T systems is one of the primary sources of unfaithfulness, so in addition to the existing metrics, we further propose named entity based metrics to evaluate the fidelity of D2T generations. Our experimental results show that R2D2 systems could effectively mitigate the unfaithful text generation, and they achieve new state-of-theart results on FeTaQA, LogicNLG, and ToTTo, all with significant improvements.</abstract>
       <url hash="64b24dd2">2022.emnlp-main.464</url>
@@ -6421,7 +6421,7 @@
     <paper id="465">
       <title><fixed-case>IDK</fixed-case>-<fixed-case>MRC</fixed-case>: Unanswerable Questions for <fixed-case>I</fixed-case>ndonesian Machine Reading Comprehension</title>
       <author><first>Rifki Afina</first><last>Putri</last><affiliation>KAIST</affiliation></author>
-      <author><first>Alice</first><last>Oh</last><affiliation>KAIST</affiliation></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last><affiliation>KAIST</affiliation></author>
       <pages>6918-6933</pages>
       <abstract>Machine Reading Comprehension (MRC) has become one of the essential tasks in Natural Language Understanding (NLU) as it is often included in several NLU benchmarks (Liang et al., 2020; Wilie et al., 2020). However, most MRC datasets only have answerable question type, overlooking the importance of unanswerable questions. MRC models trained only on answerable questions will select the span that is most likely to be the answer, even when the answer does not actually exist in the given passage (Rajpurkar et al., 2018). This problem especially remains in medium- to low-resource languages like Indonesian. Existing Indonesian MRC datasets (Purwarianti et al., 2007; Clark et al., 2020) are still inadequate because of the small size and limited question types, i.e., they only cover answerable questions. To fill this gap, we build a new Indonesian MRC dataset called I(n)don’tKnow- MRC (IDK-MRC) by combining the automatic and manual unanswerable question generation to minimize the cost of manual dataset construction while maintaining the dataset quality. Combined with the existing answerable questions, IDK-MRC consists of more than 10K questions in total. Our analysis shows that our dataset significantly improves the performance of Indonesian MRC models, showing a large improvement for unanswerable questions.</abstract>
       <url hash="be29873a">2022.emnlp-main.465</url>
@@ -6478,7 +6478,7 @@
       <author><first>Wenqiang</first><last>Lei</last><affiliation>Sichuan University</affiliation></author>
       <author><first>Wenxuan</first><last>Zhang</last><affiliation>DAMO Academy, Alibaba Group</affiliation></author>
       <author><first>Wai</first><last>Lam</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
-      <author><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
       <pages>6970-6984</pages>
       <abstract>To facilitate conversational question answering (CQA) over hybrid contexts in finance, we present a new dataset, named PACIFIC. Compared with existing CQA datasets, PACIFIC exhibits three key features: (i) proactivity, (ii) numerical reasoning, and (iii) hybrid context of tables and text. A new task is defined accordingly to study Proactive Conversational Question Answering (PCQA), which combines clarification question generation and CQA. In addition, we propose a novel method, namely UniPCQA, to adapt a hybrid format of input and output content in PCQA into the Seq2Seq problem, including the reformulation of the numerical reasoning process as code generation. UniPCQA performs multi-task learning over all sub-tasks in PCQA and incorporates a simple ensemble strategy to alleviate the error propagation issue in the multi-task learning by cross-validating top-<tex-math>k</tex-math> sampled Seq2Seq outputs. We benchmark the PACIFIC dataset with extensive baselines and provide comprehensive evaluations on each sub-task of PCQA.</abstract>
       <url hash="e0d0b8b6">2022.emnlp-main.469</url>
@@ -6606,7 +6606,7 @@
       <author><first>Srinivas</first><last>Ravishankar</last><affiliation>IBM Research</affiliation></author>
       <author><first>Daiki</first><last>Kimura</last><affiliation>IBM Research AI</affiliation></author>
       <author><first>Keerthiram</first><last>Murugesan</last><affiliation>IBM Research</affiliation></author>
-      <author><first>Ramón</first><last>Fernandez Astudillo</last><affiliation>IBM Research</affiliation></author>
+      <author id="ramon-fernandez-astudillo"><first>Ramón</first><last>Fernandez Astudillo</last><affiliation>IBM Research</affiliation></author>
       <author><first>Tahira</first><last>Naseem</last><affiliation>IBM Research AI</affiliation></author>
       <author><first>Pavan</first><last>Kapanipathi</last><affiliation>IBM Research</affiliation></author>
       <author><first>Alexander</first><last>Gray</last><affiliation>IBM Research</affiliation></author>
@@ -6620,7 +6620,7 @@
       <title><fixed-case>P</fixed-case>ara<fixed-case>T</fixed-case>ag: A Dataset of Paraphrase Tagging for Fine-Grained Labels, <fixed-case>NLG</fixed-case> Evaluation, and Data Augmentation</title>
       <author><first>Shuohang</first><last>Wang</last><affiliation>Microsoft</affiliation></author>
       <author><first>Ruochen</first><last>Xu</last><affiliation>Microsoft</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>Microsoft</affiliation></author>
+      <author><first>Yang</first><last>Liu</last><affiliation>Microsoft</affiliation></author>
       <author><first>Chenguang</first><last>Zhu</last><affiliation>Microsoft Cognitive Services Research Group</affiliation></author>
       <author><first>Michael</first><last>Zeng</last><affiliation>Microsoft Corp</affiliation></author>
       <pages>7111-7122</pages>
@@ -6734,7 +6734,7 @@
       <title>m<fixed-case>PLUG</fixed-case>: Effective and Efficient Vision-Language Learning by Cross-modal Skip-connections</title>
       <author><first>Chenliang</first><last>Li</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Haiyang</first><last>Xu</last><affiliation>Alibaba Damo Academy</affiliation></author>
-      <author><first>Junfeng</first><last>Tian</last><affiliation>Alibaba Group</affiliation></author>
+      <author id="junfeng-tian"><first>Junfeng</first><last>Tian</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Wei</first><last>Wang</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Ming</first><last>Yan</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Bin</first><last>Bi</last><affiliation>Alibaba</affiliation></author>
@@ -6896,9 +6896,9 @@
       <title>Does Corpus Quality Really Matter for Low-Resource Languages?</title>
       <author><first>Mikel</first><last>Artetxe</last><affiliation>Meta AI</affiliation></author>
       <author><first>Itziar</first><last>Aldabe</last><affiliation>HiTZ Center - Ixa, University of the Basque Country (UPV/EHU)</affiliation></author>
-      <author><first>Rodrigo</first><last>Agerri</last><affiliation>HiTZ Center - Ixa, University of the Basque Country UPV/EHU</affiliation></author>
+      <author id="rodrigo-agerri"><first>Rodrigo</first><last>Agerri</last><affiliation>HiTZ Center - Ixa, University of the Basque Country UPV/EHU</affiliation></author>
       <author><first>Olatz</first><last>Perez-de-Viñaspre</last><affiliation>HiTZ Center - Ixa, University of the Basque Country UPV/EHU</affiliation></author>
-      <author><first>Aitor</first><last>Soroa</last><affiliation>HiTZ Center - Ixa, University of the Basque Country UPV/EHU</affiliation></author>
+      <author id="aitor-soroa"><first>Aitor</first><last>Soroa</last><affiliation>HiTZ Center - Ixa, University of the Basque Country UPV/EHU</affiliation></author>
       <pages>7383-7390</pages>
       <abstract>The vast majority of non-English corpora are derived from automatically filtered versions of CommonCrawl. While prior work has identified major issues on the quality of these datasets (Kreutzer et al., 2021), it is not clear how this impacts downstream performance. Taking representation learning in Basque as a case study, we explore tailored crawling (manually identifying and scraping websites with high-quality content) as an alternative to filtering CommonCrawl. Our new corpus, called EusCrawl, is similar in size to the Basque portion of popular multilingual corpora like CC100 and mC4, yet it has a much higher quality according to native annotators. For instance, 66% of documents are rated as high-quality for EusCrawl, in contrast with &lt;33% for both mC4 and CC100. Nevertheless, we obtain similar results on downstream NLU tasks regardless of the corpus used for pre-training. Our work suggests that NLU performance in low-resource languages is not primarily constrained by the quality of the data, and other factors like corpus size and domain coverage can play a more important role.</abstract>
       <url hash="d254f872">2022.emnlp-main.499</url>
@@ -6921,7 +6921,7 @@
     <paper id="501">
       <title>Does Self-Rationalization Improve Robustness to Spurious Correlations?</title>
       <author><first>Alexis</first><last>Ross</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
-      <author><first>Matthew</first><last>Peters</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
+      <author id="matthew-e-peters"><first>Matthew</first><last>Peters</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Ana</first><last>Marasovic</last><affiliation>University of Utah</affiliation></author>
       <pages>7403-7416</pages>
       <abstract>Rationalization is fundamental to human reasoning and learning. NLP models trained to produce rationales along with predictions, called self-rationalization models, have been investigated for their interpretability and utility to end-users. However, the extent to which training with human-written rationales facilitates learning remains an under-explored question. We ask whether training models to self-rationalize can aid in their learning to solve tasks for the right reasons. Specifically, we evaluate how training self-rationalization models with free-text rationales affects robustness to spurious correlations in fine-tuned encoder-decoder and decoder-only models of six different sizes. We evaluate robustness to spurious correlations by measuring performance on 1) manually annotated challenge datasets and 2) subsets of original test sets where reliance on spurious correlations would fail to produce correct answers. We find that while self-rationalization can improve robustness to spurious correlations in low-resource settings, it tends to hurt robustness in higher-resource settings. Furthermore, these effects depend on model family and size, as well as on rationale content. Together, our results suggest that explainability can come at the cost of robustness; thus, appropriate care should be taken when training self-rationalizing models with the goal of creating more trustworthy models.</abstract>
@@ -6948,7 +6948,7 @@
       <title>Subword Evenness (<fixed-case>S</fixed-case>u<fixed-case>E</fixed-case>) as a Predictor of Cross-lingual Transfer to Low-resource Languages</title>
       <author><first>Olga</first><last>Pelloni</last><affiliation>University of Zurich</affiliation></author>
       <author><first>Anastassia</first><last>Shaitarova</last><affiliation>University of Zurich</affiliation></author>
-      <author><first>Tanja</first><last>Samardzic</last><affiliation>University of Zurich</affiliation></author>
+      <author id="tanja-samardzic"><first>Tanja</first><last>Samardzic</last><affiliation>University of Zurich</affiliation></author>
       <pages>7428-7445</pages>
       <abstract>Pre-trained multilingual models, such as mBERT, XLM-R and mT5, are used to improve the performance on various tasks in low-resource languages via cross-lingual transfer. In this framework, English is usually seen as the most natural choice for a transfer language (for fine-tuning or continued training of a multilingual pre-trained model), but it has been revealed recently that this is often not the best choice. The success of cross-lingual transfer seems to depend on some properties of languages, which are currently hard to explain. Successful transfer often happens between unrelated languages and it often cannot be explained by data-dependent factors.In this study, we show that languages written in non-Latin and non-alphabetic scripts (mostly Asian languages) are the best choices for improving performance on the task of Masked Language Modelling (MLM) in a diverse set of 30 low-resource languages and that the success of the transfer is well predicted by our novel measure of Subword Evenness (SuE). Transferring language models over the languages that score low on our measure results in the lowest average perplexity over target low-resource languages. Our correlation coefficients obtained with three different pre-trained multilingual models are consistently higher than all the other predictors, including text-based measures (type-token ratio, entropy) and linguistically motivated choice (genealogical and typological proximity).</abstract>
       <url hash="086a0545">2022.emnlp-main.503</url>
@@ -6984,7 +6984,7 @@
       <author><first>Utsav</first><last>Shukla</last><affiliation>Thapar Institute of Engineering Technology</affiliation></author>
       <author><first>Husrev Taha</first><last>Sencar</last><affiliation>Qatar Computing Research Institute</affiliation></author>
       <author><first>Mohamed</first><last>Nabeel</last><affiliation>Qatar Computing Research Institute, HBKU</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <pages>7470-7480</pages>
       <abstract>We study the problem of profiling news media on the Web with respect to their factuality of reporting and bias. This is an important but under-studied problem related to disinformation and “fake news” detection, but it addresses the issue at a coarser granularity compared to looking at an individual article or an individual claim. This is useful as it allows to profile entire media outlets in advance. Unlike previous work, which has focused primarily on text (e.g., on the text of the articles published by the target website, or on the textual description in their social media profiles or in Wikipedia), here our main focus is on modeling the similarity between media outlets based on the overlap of their audience. This is motivated by homophily considerations, i.e., the tendency of people to have connections to people with similar interests, which we extend to media, hypothesizing that similar types of media would be read by similar kinds of users. In particular, we propose GREENER (GRaph nEural nEtwork for News mEdia pRofiling), a model that builds a graph of inter-media connections based on their audience overlap, and then uses graph neural networks to represent each medium. We find that such representations are quite useful for predicting the factuality and the bias of news media outlets, yielding improvements over state-of-the-art results reported on two datasets. When augmented with conventionally used representations obtained from news articles, Twitter, YouTube, Facebook, and Wikipedia, prediction accuracy is found to improve by 2.5-27 macro-F1 points for the two tasks.</abstract>
       <url hash="5a552d02">2022.emnlp-main.506</url>
@@ -7013,7 +7013,7 @@
       <author><first>Chaoqun</first><last>Duan</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <author><first>Youzheng</first><last>Wu</last><affiliation>JD AI Research</affiliation></author>
       <author><first>Xiaodong</first><last>He</last><affiliation>JD AI Research</affiliation></author>
-      <author><first>Tiejun</first><last>Zhao</last><affiliation>Harbin Institute of Technology</affiliation></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <pages>7494-7507</pages>
       <abstract>Question answering requiring discrete reasoning, e.g., arithmetic computing, comparison, and counting, over knowledge is a challenging task.In this paper, we propose UniRPG, a semantic-parsing-based approach advanced in interpretability and scalability, to perform Unified discrete Reasoning over heterogeneous knowledge resources, i.e., table and text, as Program Generation. Concretely, UniRPG consists of a neural programmer and a symbolic program executor,where a program is the composition of a set of pre-defined general atomic and higher-order operations and arguments extracted from table and text.First, the programmer parses a question into a program by generating operations and copying arguments, and then, the executor derives answers from table and text based on the program.To alleviate the costly program annotation issue, we design a distant supervision approach for programmer learning, where pseudo programs are automatically constructed without annotated derivations.Extensive experiments on the TAT-QA dataset show that UniRPG achieves tremendous improvements and enhances interpretability and scalability compared with previous state-of-the-art methods, even without derivation annotation.Moreover, it achieves promising performance on the textual dataset DROP without derivation annotation.</abstract>
       <url hash="9ebe1ada">2022.emnlp-main.508</url>
@@ -7048,8 +7048,8 @@
     </paper>
     <paper id="511">
       <title>Cross-lingual neural fuzzy matching for exploiting target-language monolingual corpora in computer-aided translation</title>
-      <author><first>Miquel</first><last>Esplà-Gomis</last><affiliation>Universitat d’Alacant</affiliation></author>
-      <author><first>Víctor M.</first><last>Sánchez-Cartagena</last><affiliation>Universitat d’Alacant</affiliation></author>
+      <author id="miquel-espla-gomis"><first>Miquel</first><last>Esplà-Gomis</last><affiliation>Universitat d’Alacant</affiliation></author>
+      <author id="victor-m-sanchez-cartagena"><first>Víctor M.</first><last>Sánchez-Cartagena</last><affiliation>Universitat d’Alacant</affiliation></author>
       <author><first>Juan Antonio</first><last>Pérez-Ortiz</last><affiliation>Departament de Llenguatges i Sistemes Informàtics, Universitat d’Alacant</affiliation></author>
       <author><first>Felipe</first><last>Sánchez-Martínez</last><affiliation>Universitat d’Alacant</affiliation></author>
       <pages>7532-7543</pages>
@@ -7117,7 +7117,7 @@
       <title>Cross-Modal Similarity-Based Curriculum Learning for Image Captioning</title>
       <author><first>Hongkuan</first><last>Zhang</last><affiliation>Nagoya University</affiliation></author>
       <author><first>Saku</first><last>Sugawara</last><affiliation>National Institute of Informatics</affiliation></author>
-      <author><first>Akiko</first><last>Aizawa</last><affiliation>National Institute of Informatics</affiliation></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last><affiliation>National Institute of Informatics</affiliation></author>
       <author><first>Lei</first><last>Zhou</last><affiliation>Nagoya University</affiliation></author>
       <author><first>Ryohei</first><last>Sasano</last><affiliation>Nagoya University</affiliation></author>
       <author><first>Koichi</first><last>Takeda</last><affiliation>Nagoya University</affiliation></author>
@@ -7132,7 +7132,7 @@
       <title>Debiasing Masks: A New Framework for Shortcut Mitigation in <fixed-case>NLU</fixed-case></title>
       <author><first>Johannes Mario</first><last>Meissner</last><affiliation>The University of Tokyo</affiliation></author>
       <author><first>Saku</first><last>Sugawara</last><affiliation>National Institute of Informatics</affiliation></author>
-      <author><first>Akiko</first><last>Aizawa</last><affiliation>National Institute of Informatics</affiliation></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last><affiliation>National Institute of Informatics</affiliation></author>
       <pages>7607-7613</pages>
       <abstract>Debiasing language models from unwanted behaviors in Natural Language Understanding (NLU) tasks is a topic with rapidly increasing interest in the NLP community. Spurious statistical correlations in the data allow models to perform shortcuts and avoid uncovering more advanced and desirable linguistic features.A multitude of effective debiasing approaches has been proposed, but flexibility remains a major issue. For the most part, models must be retrained to find a new set of weights with debiased behavior.We propose a new debiasing method in which we identify debiased pruning masks that can be applied to a finetuned model. This enables the selective and conditional application of debiasing behaviors.We assume that bias is caused by a certain subset of weights in the network; our method is, in essence, a mask search to identify and remove biased weights.Our masks show equivalent or superior performance to the standard counterparts, while offering important benefits.Pruning masks can be stored with high efficiency in memory, and it becomes possible to switch among several debiasing behaviors (or revert back to the original biased model) at inference time. Finally, it opens the doors to further research on how biases are acquired by studying the generated masks. For example, we observed that the early layers and attention heads were pruned more aggressively, possibly hinting towards the location in which biases may be encoded.</abstract>
       <url hash="8b6783dd">2022.emnlp-main.517</url>
@@ -7192,7 +7192,7 @@
       <author><first>Honghai</first><last>Yu</last><affiliation>Tsinghua University</affiliation></author>
       <author><first>Xuming</first><last>Hu</last><affiliation>School of Software, Tsinghua University</affiliation></author>
       <author><first>Shu’ang</first><last>Li</last><affiliation>School of Software, Tsinghua University</affiliation></author>
-      <author id="li-lin"><first>Li</first><last>Lin</last><affiliation>Tsinghua University</affiliation></author>
+      <author><first>Li</first><last>Lin</last><affiliation>Tsinghua University</affiliation></author>
       <author><first>Fukun</first><last>Ma</last><affiliation>School of Software,Tsinghua University</affiliation></author>
       <author><first>Yawen</first><last>Yang</last><affiliation>School of Software, Tsinghua University</affiliation></author>
       <author><first>Lijie</first><last>Wen</last><affiliation>School of Software, Tsinghua University</affiliation></author>
@@ -7263,7 +7263,7 @@
       <title>Spectral Probing</title>
       <author><first>Max</first><last>Müller-Eberstein</last><affiliation>IT University of Copenhagen</affiliation></author>
       <author><first>Rob</first><last>van der Goot</last><affiliation>IT University of Copenhagen</affiliation></author>
-      <author><first>Barbara</first><last>Plank</last><affiliation>LMU Munich</affiliation></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last><affiliation>LMU Munich</affiliation></author>
       <pages>7730-7741</pages>
       <abstract>Linguistic information is encoded at varying timescales (subwords, phrases, etc.) and communicative levels, such as syntax and semantics. Contextualized embeddings have analogously been found to capture these phenomena at distinctive layers and frequencies. Leveraging these findings, we develop a fully learnable frequency filter to identify spectral profiles for any given task. It enables vastly more granular analyses than prior handcrafted filters, and improves on efficiency. After demonstrating the informativeness of spectral probing over manual filters in a monolingual setting, we investigate its multilingual characteristics across seven diverse NLP tasks in six languages. Our analyses identify distinctive spectral profiles which quantify cross-task similarity in a linguistically intuitive manner, while remaining consistent across languages—highlighting their potential as robust, lightweight task descriptors.</abstract>
       <url hash="53d62d9a">2022.emnlp-main.527</url>
@@ -7374,7 +7374,7 @@
       <author><first>Xiaohan</first><last>Zhang</last><affiliation>Institute of Automation, Chinese Academy of Sciences</affiliation></author>
       <author><first>Shaonan</first><last>Wang</last><affiliation>National Laboratory of Pattern Recognition, Institute of Automation, Chinese Academy of Sciences</affiliation></author>
       <author><first>Nan</first><last>Lin</last><affiliation>Institute of Psychology of the Chinese Academy of Sciences</affiliation></author>
-      <author><first>Chengqing</first><last>Zong</last><affiliation>Institute of Automation, Chinese Academy of Sciences</affiliation></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last><affiliation>Institute of Automation, Chinese Academy of Sciences</affiliation></author>
       <pages>7852-7861</pages>
       <abstract>Evidence from psycholinguistic studies suggests that the human brain builds a hierarchical syntactic structure during language comprehension. However, it is still unknown whether the neural basis of such structures is universal across languages. In this paper, we first analyze the differences in language structure between two diverse languages: Chinese and English. By computing the working memory requirements when applying parsing strategies to different language structures, we find that top-down parsing generates less memory load for the right-branching English and bottom-up parsing is less memory-demanding for Chinese.Then we use functional magnetic resonance imaging (fMRI) to investigate whether the brain has different syntactic adaptation strategies in processing Chinese and English. Specifically, for both Chinese and English, we extract predictors from the implementations of different parsing strategies, i.e., bottom-up and top-down. Then, these predictors are separately associated with fMRI signals. Results show that for Chinese and English, the brain utilizes bottom-up and top-down parsing strategies separately. These results reveal that the brain adopts parsing strategies with less memory processing load according to different language structures.</abstract>
       <url hash="4272cdd1">2022.emnlp-main.535</url>
@@ -7423,7 +7423,7 @@
       <title><fixed-case>S</fixed-case>ocio<fixed-case>P</fixed-case>robe: What, When, and Where Language Models Learn about Sociodemographics</title>
       <author><first>Anne</first><last>Lauscher</last><affiliation>University of Hamburg</affiliation></author>
       <author><first>Federico</first><last>Bianchi</last><affiliation>Stanford University</affiliation></author>
-      <author><first>Samuel R.</first><last>Bowman</last><affiliation>New York University</affiliation></author>
+      <author id="samuel-bowman"><first>Samuel R.</first><last>Bowman</last><affiliation>New York University</affiliation></author>
       <author><first>Dirk</first><last>Hovy</last><affiliation>Bocconi University</affiliation></author>
       <pages>7901-7918</pages>
       <abstract>Pre-trained language models (PLMs) have outperformed other NLP models on a wide range of tasks. Opting for a more thorough understanding of their capabilities and inner workings, researchers have established the extend to which they capture lower-level knowledge like grammaticality, and mid-level semantic knowledge like factual understanding. However, there is still little understanding of their knowledge of higher-level aspects of language. In particular, despite the importance of sociodemographic aspects in shaping our language, the questions of whether, where, and how PLMs encode these aspects, e.g., gender or age, is still unexplored. We address this research gap by probing the sociodemographic knowledge of different single-GPU PLMs on multiple English data sets via traditional classifier probing and information-theoretic minimum description length probing. Our results show that PLMs do encode these sociodemographics, and that this knowledge is sometimes spread across the layers of some of the tested PLMs. We further conduct a multilingual analysis and investigate the effect of supplementary training to further explore to what extent, where, and with what amount of pre-training data the knowledge is encoded. Our overall results indicate that sociodemographic knowledge is still a major challenge for NLP. PLMs require large amounts of pre-training data to acquire the knowledge and models that excel in general language understanding do not seem to own more knowledge about these aspects.</abstract>
@@ -7446,7 +7446,7 @@
       <author><first>Ahmet</first><last>Üstün</last><affiliation>University of Groningen</affiliation></author>
       <author><first>Arianna</first><last>Bisazza</last><affiliation>University of Groningen</affiliation></author>
       <author><first>Gosse</first><last>Bouma</last><affiliation>University of Groningen</affiliation></author>
-      <author><first>Gertjan</first><last>van Noord</last><affiliation>University of Groningen</affiliation></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last><affiliation>University of Groningen</affiliation></author>
       <author><first>Sebastian</first><last>Ruder</last><affiliation>Google</affiliation></author>
       <pages>7934-7949</pages>
       <abstract>Massively multilingual models are promising for transfer learning across tasks and languages. However, existing methods are unable to fully leverage training data when it is available in different task-language combinations. To exploit such heterogeneous supervision, we propose Hyper-X, a single hypernetwork that unifies multi-task and multilingual learning with efficient adaptation. It generates weights for adapter modules conditioned on both tasks and language embeddings. By learning to combine task and language-specific knowledge, our model enables zero-shot transfer for unseen languages and task-language combinations. Our experiments on a diverse set of languages demonstrate that Hyper-X achieves the best or competitive gain when a mixture of multiple resources is available, while on par with strong baseline in the standard scenario. Hyper-X is also considerably more efficient in terms of parameters and resources compared to methods that train separate adapters. Finally, Hyper-X consistently produces strong results in few-shot scenarios for new languages, showing the versatility of our approach beyond zero-shot transfer.</abstract>
@@ -7513,8 +7513,8 @@
       <author><first>Roberto</first><last>Dessì</last><affiliation>Facebook AI Research / Universitat Pompeu Fabra</affiliation></author>
       <author><first>Eleonora</first><last>Gualdoni</last><affiliation>Universitat Pompeu Fabra</affiliation></author>
       <author><first>Francesca</first><last>Franzon</last><affiliation>Universitat Pompeu Fabra</affiliation></author>
-      <author><first>Gemma</first><last>Boleda</last><affiliation>Universitat Pompeu Fabra / ICREA</affiliation></author>
-      <author><first>Marco</first><last>Baroni</last><affiliation>ICREA</affiliation></author>
+      <author id="gemma-boleda"><first>Gemma</first><last>Boleda</last><affiliation>Universitat Pompeu Fabra / ICREA</affiliation></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last><affiliation>ICREA</affiliation></author>
       <pages>7998-8007</pages>
       <abstract>We compare the 0-shot performance of a neural caption-based image retriever when given as input either human-produced captions or captions generated by a neural captioner. We conduct this comparison on the recently introduced ImageCoDe data-set (Krojer et al. 2022), which contains hard distractors nearly identical to the images to be retrieved. We find that the neural retriever has much higher performance when fed neural rather than human captions, despite the fact that the former, unlike the latter, were generated without awareness of the distractors that make the task hard. Even more remarkably, when the same neural captions are given to human subjects, their retrieval performance is almost at chance level. Our results thus add to the growing body of evidence that, even when the “language” of neural models resembles English, this superficial resemblance might be deeply misleading.</abstract>
       <url hash="489549d5">2022.emnlp-main.546</url>
@@ -7536,7 +7536,7 @@
     <paper id="548">
       <title>Bilingual Synchronization: Restoring Translational Relationships with Editing Operations</title>
       <author><first>Jitao</first><last>Xu</last><affiliation>LISN, CNRS, Paris-Saclay University</affiliation></author>
-      <author><first>Josep</first><last>Crego</last><affiliation>SYSTRAN</affiliation></author>
+      <author id="josep-m-crego"><first>Josep</first><last>Crego</last><affiliation>SYSTRAN</affiliation></author>
       <author><first>François</first><last>Yvon</last><affiliation>LISN CNRS &amp; Univ. Paris Saclay</affiliation></author>
       <pages>8016-8030</pages>
       <abstract>Machine Translation (MT) is usually viewed as a one-shot process that generates the target language equivalent of some source text from scratch. We consider here a more general setting which assumes an initial target sequence, that must be transformed into a valid translation of the source, thereby restoring parallelism between source and target. For this bilingual synchronization task, we consider several architectures (both autoregressive and non-autoregressive) and training regimes, and experiment with multiple practical settings such as simulated interactive MT, translating with Translation Memory (TM) and TM cleaning. Our results suggest that one single generic edit-based system, once fine-tuned, can compare with, or even outperform, dedicated systems specifically trained for these tasks.</abstract>
@@ -7572,7 +7572,7 @@
     <paper id="551">
       <title>Entity-Focused Dense Passage Retrieval for Outside-Knowledge Visual Question Answering</title>
       <author><first>Jialin</first><last>Wu</last><affiliation>University of Texas at Austin</affiliation></author>
-      <author><first>Raymond</first><last>Mooney</last><affiliation>University of Texas at Austin</affiliation></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last><affiliation>University of Texas at Austin</affiliation></author>
       <pages>8061-8072</pages>
       <abstract>Most Outside-Knowledge Visual Question Answering (OK-VQA) systems employ a two-stage framework that first retrieves external knowledge given the visual question and then predicts the answer based on the retrieved content. However, the retrieved knowledge is often inadequate. Retrievals are frequently too general and fail to cover specific knowledge needed to answer the question. Also, the naturally available supervision (whether the passage contains the correct answer) is weak and does not guarantee question relevancy. To address these issues, we propose an Entity-Focused Retrieval (EnFoRe) model that provides stronger supervision during training and recognizes question-relevant entities to help retrieve more specific knowledge. Experiments show that our EnFoRe model achieves superior retrieval performance on OK-VQA, the currently largest outside-knowledge VQA dataset. We also combine the retrieved knowledge with state-of-the-art VQA models, and achieve a new state-of-the-art performance on OK-VQA.</abstract>
       <url hash="104aeb6c">2022.emnlp-main.551</url>
@@ -7588,7 +7588,7 @@
       <author><first>Qi</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Jingting</first><last>Ye</last><affiliation>Fudan University</affiliation></author>
       <author><first>Menghan</first><last>Zhang</last><affiliation>Institute of Modern Languages and Linguistics, Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>8073-8092</pages>
       <abstract>Multilingual BERT (mBERT) has demonstrated considerable cross-lingual syntactic ability, whereby it enables effective zero-shot cross-lingual transfer of syntactic knowledge. The transfer is more successful between some languages, but it is not well understood what leads to this variation and whether it fairly reflects difference between languages. In this work, we investigate the distributions of grammatical relations induced from mBERT in the context of 24 typologically different languages. We demonstrate that the distance between the distributions of different languages is highly consistent with the syntactic difference in terms of linguistic formalisms. Such difference learnt via self-supervision plays a crucial role in the zero-shot transfer performance and can be predicted by variation in morphosyntactic properties between languages. These results suggest that mBERT properly encodes languages in a way consistent with linguistic diversity and provide insights into the mechanism of cross-lingual transfer.</abstract>
       <url hash="7a299dd8">2022.emnlp-main.552</url>
@@ -7615,10 +7615,10 @@
       <title>Long Text Generation with Topic-aware Discrete Latent Variable Model</title>
       <author><first>Erguang</first><last>Yang</last><affiliation>Beijing Jiaotong University</affiliation></author>
       <author><first>Mingtong</first><last>Liu</last><affiliation>Beijing Jiaotong University</affiliation></author>
-      <author><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
       <author><first>Yujie</first><last>Zhang</last><affiliation>Beijing Jiaotong University</affiliation></author>
       <author><first>Yufeng</first><last>Chen</last><affiliation>Beijing Jiaotong University</affiliation></author>
-      <author><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
       <pages>8100-8107</pages>
       <abstract>Generating coherent long texts is an important yet challenging task, particularly forthe open-ended generation. Prior work based on discrete latent codes focuses on the modeling of discourse relation, resulting in discrete codes only learning shallow semantics (Ji and Huang, 2021). A natural text always revolves around several related topics and the transition across them is natural and smooth.In this work, we investigate whether discrete latent codes can learn information of topics. To this end, we build a topic-aware latent code-guided text generation model. To encourage discrete codes to model information about topics, we propose a span-level bag-of-words training objective for the model. Automatic and manual evaluation experiments show that our method can generate more topic-relevant and coherent texts.</abstract>
       <url hash="bd710ed9">2022.emnlp-main.554</url>
@@ -7630,10 +7630,10 @@
       <author><first>Yiheng</first><last>Shu</last><affiliation>Nanjing University</affiliation></author>
       <author><first>Zhiwei</first><last>Yu</last><affiliation>Microsoft Research Asia</affiliation></author>
       <author><first>Yuhan</first><last>Li</last><affiliation>Nankai University</affiliation></author>
-      <author><first>Börje F.</first><last>Karlsson</last><affiliation>Microsoft Research Asia</affiliation></author>
+      <author id="borje-f-karlsson"><first>Börje F.</first><last>Karlsson</last><affiliation>Microsoft Research Asia</affiliation></author>
       <author><first>Tingting</first><last>Ma</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <author><first>Yuzhong</first><last>Qu</last><affiliation>Nanjing University</affiliation></author>
-      <author><first>Chin-Yew</first><last>Lin</last><affiliation>Microsoft Research</affiliation></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last><affiliation>Microsoft Research</affiliation></author>
       <pages>8108-8121</pages>
       <abstract>Pre-trained language models (PLMs) have shown their effectiveness in multiple scenarios. However, KBQA remains challenging, especially regarding coverage and generalization settings. This is due to two main factors: i) understanding the semantics of both questions and relevant knowledge from the KB; ii) generating executable logical forms with both semantic and syntactic correctness. In this paper, we present a new KBQA model, TIARA, which addresses those issues by applying multi-grained retrieval to help the PLM focus on the most relevant KB context, viz., entities, exemplary logical forms, and schema items. Moreover, constrained decoding is used to control the output space and reduce generation errors. Experiments over important benchmarks demonstrate the effectiveness of our approach. TIARA outperforms previous SOTA, including those using PLMs or oracle entity annotations, by at least 4.1 and 1.1 F1 points on GrailQA and WebQuestionsSP, respectively. Specifically on GrailQA, TIARA outperforms previous models in all categories, with an improvement of 4.7 F1 points in zero-shot generalization.</abstract>
       <url hash="e36b12e0">2022.emnlp-main.555</url>
@@ -7646,9 +7646,9 @@
       <author><first>Bin</first><last>Wang</last><affiliation>Nanjing University</affiliation></author>
       <author><first>Jiangzhou</first><last>Ju</last><affiliation>Nanjing University</affiliation></author>
       <author><first>Yang</first><last>Fan</last><affiliation>Nanjing University</affiliation></author>
-      <author><first>Xinyu</first><last>Dai</last><affiliation>National Key Laboratory for Novel Software Technology, Nanjing University</affiliation></author>
+      <author id="xinyu-dai"><first>Xinyu</first><last>Dai</last><affiliation>National Key Laboratory for Novel Software Technology, Nanjing University</affiliation></author>
       <author><first>Shujian</first><last>Huang</last><affiliation>National Key Laboratory for Novel Software Technology, Nanjing University</affiliation></author>
-      <author><first>Jiajun</first><last>Chen</last><affiliation>Nanjing University</affiliation></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last><affiliation>Nanjing University</affiliation></author>
       <pages>8122-8132</pages>
       <abstract>As one of the challenging NLP tasks, designing math word problem (MWP) solvers has attracted increasing research attention for the past few years. In previous work, models designed by taking into account the properties of the binary tree structure of mathematical expressions at the output side have achieved better performance. However, the expressions corresponding to a MWP are often diverse (e.g., <tex-math>n_1+n_2 \times n_3-n_4</tex-math>, <tex-math>n_3\times n_2-n_4+n_1</tex-math>, etc.), and so are the corresponding binary trees, which creates difficulties in model learning due to the non-deterministic output space. In this paper, we propose the Structure-Unified M-Tree Coding Solver (SUMC-Solver), which applies a tree with any M branches (M-tree) to unify the output structures. To learn the M-tree, we use a mapping to convert the M-tree into the M-tree codes, where codes store the information of the paths from tree root to leaf nodes and the information of leaf nodes themselves, and then devise a Sequence-to-Code (seq2code) model to generate the codes. Experimental results on the widely used MAWPS and Math23K datasets have demonstrated that SUMC-Solver not only outperforms several state-of-the-art models under similar experimental settings but also performs much better under low-resource conditions.</abstract>
       <url hash="0b188dce">2022.emnlp-main.556</url>
@@ -7799,7 +7799,7 @@
       <author><first>Benjamin</first><last>Dayan</last><affiliation>ETH Zurich</affiliation></author>
       <author><first>Ryan</first><last>Cotterell</last><affiliation>ETH Zürich</affiliation></author>
       <author><first>Tim</first><last>Vieira</last><affiliation>Johns Hopkins University</affiliation></author>
-      <author><first>Jason</first><last>Eisner</last><affiliation>Johns Hopkins University</affiliation></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last><affiliation>Johns Hopkins University</affiliation></author>
       <pages>8289-8305</pages>
       <abstract>Weighted finite-state automata (WSFAs) arecommonly used in NLP. Failure transitions area useful extension for compactly representingbackoffs or interpolation in n-gram modelsand CRFs, which are special cases of WFSAs.Unfortunately, applying standard algorithmsfor computing the pathsum requires expand-ing these compact failure transitions. As aresult, na ̈ıve computation of the pathsum inacyclic WFSAs with failure transitions runs inO(|Q|2|Σ|) (O(|Q||Σ|) for deterministic WF-SAs) while the equivalent algorithm in normalWFSAs runs in O(|E|), where E representsthe set of transitions, Q the set of states, andΣ the alphabet. In this work, we present moreefficient algorithms for computing the pathsumin sparse acyclic WFSAs, i.e., WFSAs with av-erage out symbol fraction s ≪ 1. In those,backward runs in O(s|Q||Σ|). We proposean algorithm for semiring-weighted automatawhich runs in O(|E| + s|Σ||Q||Tmax| log |Σ|),where |Tmax| is the size of the largest con-nected component of failure transitions. Ad-ditionally, we propose faster algorithms fortwo specific cases. For ring-weighted WF-SAs we propose an algorithm with complex-ity O(|E| + s|Σ||Q||πmax|), where |πmax| de-notes the longest path length of failure transi-tions stemming from q and Σ(q) the set of sym-bols on the outgoing transitions from q. Forsemiring-weighted WFSAs whose failure tran-sition topology satisfies a condition exemplifiedby CRFs, we propose an algorithm with com-plexity O(|E| + s|Σ||Q| log |Σ|).</abstract>
       <url hash="69d938c6">2022.emnlp-main.567</url>
@@ -7829,7 +7829,7 @@
       <author><first>Rui</first><last>Zheng</last><affiliation>Fudan University</affiliation></author>
       <author><first>Tao</first><last>Gui</last><affiliation>fudan university</affiliation></author>
       <author><first>Qi</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>8318-8331</pages>
       <abstract>Adversarial training is one of the most powerful methods to improve the robustness of pre-trained language models (PLMs). However, this approach is typically more expensive than traditional fine-tuning because of the necessity to generate adversarial examples via gradient descent. Delving into the optimization process of adversarial training, we find that robust connectivity patterns emerge in the early training phase (typically 0.15~0.3 epochs), far before parameters converge. Inspired by this finding, we dig out robust early-bird tickets (i.e., subnetworks) to develop an efficient adversarial training method: (1) searching for robust tickets with structured sparsity in the early stage; (2) fine-tuning robust tickets in the remaining time. To extract the robust tickets as early as possible, we design a ticket convergence metric to automatically terminate the searching process. Experiments show that the proposed efficient adversarial training method can achieve up to <tex-math>7\times \sim 13 \times</tex-math> training speedups while maintaining comparable or even better robustness compared to the most competitive state-of-the-art adversarial training methods.</abstract>
       <url hash="3a66fa05">2022.emnlp-main.569</url>
@@ -7857,8 +7857,8 @@
       <author><first>Vassilina</first><last>Nikoulina</last><affiliation>Naver Labs Europe</affiliation></author>
       <author><first>Alexandre</first><last>Berard</last><affiliation>Naver Labs Europe</affiliation></author>
       <author><first>Caroline</first><last>Brun</last><affiliation>Naver Labs Europe</affiliation></author>
-      <author><first>James</first><last>Henderson</last><affiliation>Idiap Research Institute</affiliation></author>
-      <author><first>Laurent</first><last>Besacier</last><affiliation>Naver Labs Europe</affiliation></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last><affiliation>Idiap Research Institute</affiliation></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last><affiliation>Naver Labs Europe</affiliation></author>
       <pages>8348-8359</pages>
       <abstract>In recent years, multilingual machine translation models have achieved promising performance on low-resource language pairs by sharing information between similar languages, thus enabling zero-shot translation. To overcome the “curse of multilinguality”, these models often opt for scaling up the number of parameters, which makes their use in resource-constrained environments challenging. We introduce SMaLL-100, a distilled version of the M2M-100(12B) model, a massively multilingual machine translation model covering 100 languages. We train SMaLL-100 with uniform sampling across all language pairs and therefore focus on preserving the performance of low-resource languages. We evaluate SMaLL-100 on different low-resource benchmarks: FLORES-101, Tatoeba, and TICO-19 and demonstrate that it outperforms previous massively multilingual models of comparable sizes (200-600M) while improving inference latency and memory usage. Additionally, our model achieves comparable results to M2M-100 (1.2B), while being 3.6x smaller and 4.3x faster at inference.</abstract>
       <url hash="0b5f490b">2022.emnlp-main.571</url>
@@ -7877,7 +7877,7 @@
       <author><first>Yong</first><last>Ding</last><affiliation>Honor Device Co., Ltd</affiliation></author>
       <author><first>Yibo</first><last>Cheung</last><affiliation>Honor Device Co., Ltd</affiliation></author>
       <author><first>Qi</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>8360-8371</pages>
       <abstract>Recently, more and more pre-trained language models are released as a cloud service. It allows users who lack computing resources to perform inference with a powerful model by uploading data to the cloud. The plain text may contain private information, as the result, users prefer to do partial computations locally and upload intermediate representations to the cloud for subsequent inference.However, recent studies have shown that intermediate representations can also be recovered to plain text with reasonable accuracy, thus the risk of privacy leakage still exists. To address this issue, we propose TextFusion, a novel method for preserving inference privacy.Specifically, we train a Fusion Predictor to dynamically fuse token representations, which hides multiple private token representations behind an unrecognizable one.Furthermore, an adversarial training regime is employed to privatize these representations. In this way, the cloud only receives incomplete and perturbed representations, making it difficult to accurately recover the complete plain text.The experimental results on diverse classification tasks show that our approach can effectively preserve inference privacy without significantly sacrificing performance in different scenarios.</abstract>
       <url hash="a18b1ee0">2022.emnlp-main.572</url>
@@ -7978,7 +7978,7 @@
     <paper id="580">
       <title>Revisiting <fixed-case>D</fixed-case>oc<fixed-case>RED</fixed-case> - Addressing the False Negative Problem in Relation Extraction</title>
       <author><first>Qingyu</first><last>Tan</last><affiliation>National University of Singapore</affiliation></author>
-      <author id="lu-xu"><first>Lu</first><last>Xu</last><affiliation>Singapore University of Technology and Design</affiliation></author>
+      <author><first>Lu</first><last>Xu</last><affiliation>Singapore University of Technology and Design</affiliation></author>
       <author><first>Lidong</first><last>Bing</last><affiliation>Alibaba DAMO Academy</affiliation></author>
       <author><first>Hwee Tou</first><last>Ng</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Sharifah Mahani</first><last>Aljunied</last><affiliation>Alibaba</affiliation></author>
@@ -7992,8 +7992,8 @@
     <paper id="581">
       <title>Towards Summary Candidates Fusion</title>
       <author><first>Mathieu</first><last>Ravaut</last><affiliation>Nanyang Technological University</affiliation></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University; Salesforce AI Research</affiliation></author>
-      <author><first>Nancy</first><last>Chen</last><affiliation>Institute for Infocomm Research, A*STAR</affiliation></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University; Salesforce AI Research</affiliation></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last><affiliation>Institute for Infocomm Research, A*STAR</affiliation></author>
       <pages>8488-8504</pages>
       <abstract>Sequence-to-sequence deep neural models fine-tuned for abstractive summarization can achieve great performance on datasets with enough human annotations. Yet, it has been shown that they have not reached their full potential, with a wide gap between the top beam search output and the oracle beam. Recently, re-ranking methods have been proposed, to learn to select a better summary candidate. However, such methods are limited by the summary quality aspects captured by the first-stage candidates. To bypass this limitation, we propose a new paradigm in second-stage abstractive summarization called SummaFusion that fuses several summary candidates to produce a novel abstractive second-stage summary. Our method works well on several summarization datasets, improving both the ROUGE scores and qualitative properties of fused summaries. It is especially good when the candidates to fuse are worse, such as in the few-shot setup where we set a new state-of-the art. We will make our code and checkpoints available at https://github.com/ntunlp/SummaFusion/.</abstract>
       <url hash="e6aa9f05">2022.emnlp-main.581</url>
@@ -8014,7 +8014,7 @@
     <paper id="583">
       <title><fixed-case>T</fixed-case>ran<fixed-case>SHER</fixed-case>: Translating Knowledge Graph Embedding with Hyper-Ellipsoidal Restriction</title>
       <author><first>Yizhi</first><last>Li</last><affiliation>University of Sheffield; Pingan Technology</affiliation></author>
-      <author id="wei-fan"><first>Wei</first><last>Fan</last><affiliation>University of Central Florida</affiliation></author>
+      <author><first>Wei</first><last>Fan</last><affiliation>University of Central Florida</affiliation></author>
       <author><first>Chao</first><last>Liu</last><affiliation>Pingan Technology</affiliation></author>
       <author><first>Chenghua</first><last>Lin</last><affiliation>Department of Computer Science, University of Sheffield</affiliation></author>
       <author><first>Jiang</first><last>Qian</last><affiliation>Pingan Technology</affiliation></author>
@@ -8122,7 +8122,7 @@
       <author><first>Chrysoula</first><last>Zerva</last><affiliation>Instituto de Telecomunicações, Instituto Superior Técnico, University of Lisbon</affiliation></author>
       <author><first>Taisiya</first><last>Glushkova</last><affiliation>Instituto de Telecomunicações, Instituto Superior Técnico, University of Lisbon</affiliation></author>
       <author><first>Ricardo</first><last>Rei</last><affiliation>Unbabel/INESC-ID</affiliation></author>
-      <author><first>André F. T.</first><last>Martins</last><affiliation>Unbabel, Instituto de Telecomunicacoes</affiliation></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last><affiliation>Unbabel, Instituto de Telecomunicacoes</affiliation></author>
       <pages>8622-8641</pages>
       <abstract>Trainable evaluation metrics for machine translation (MT) exhibit strong correlation with human judgements, but they are often hard to interpret and might produce unreliable scores under noisy or out-of-domain data. Recent work has attempted to mitigate this with simple uncertainty quantification techniques (Monte Carlo dropout and deep ensembles), however these techniques (as we show) are limited in several ways – for example, they are unable to distinguish between different kinds of uncertainty, and they are time and memory consuming. In this paper, we propose more powerful and efficient uncertainty predictors for MT evaluation, and we assess their ability to target different sources of aleatoric and epistemic uncertainty. To this end, we develop and compare training objectives for the COMET metric to enhance it with an uncertainty prediction output, including heteroscedastic regression, divergence minimization, and direct uncertainty prediction.Our experiments show improved results on uncertainty prediction for the WMT metrics task datasets, with a substantial reduction in computational costs. Moreover, they demonstrate the ability of these predictors to address specific uncertainty causes in MT evaluation, such as low quality references and out-of-domain data.</abstract>
       <url hash="a4f22107">2022.emnlp-main.591</url>
@@ -8174,7 +8174,7 @@
       <title>Measuring the Mixing of Contextual Information in the Transformer</title>
       <author><first>Javier</first><last>Ferrando</last><affiliation>UPC</affiliation></author>
       <author><first>Gerard I.</first><last>Gállego</last><affiliation>Universitat Politècnica de Catalunya</affiliation></author>
-      <author><first>Marta R.</first><last>Costa-jussà</last><affiliation>Meta AI</affiliation></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last><affiliation>Meta AI</affiliation></author>
       <pages>8698-8714</pages>
       <abstract>The Transformer architecture aggregates input information through the self-attention mechanism, but there is no clear understanding of how this information is mixed across the entire model. Additionally, recent works have demonstrated that attention weights alone are not enough to describe the flow of information. In this paper, we consider the whole attention block –multi-head attention, residual connection, and layer normalization– and define a metric to measure token-to-token interactions within each layer. Then, we aggregate layer-wise interpretations to provide input attribution scores for model predictions. Experimentally, we show that our method, ALTI (Aggregation of Layer-wise Token-to-token Interactions), provides more faithful explanations and increased robustness than gradient-based methods.</abstract>
       <url hash="0a070eb9">2022.emnlp-main.595</url>
@@ -8185,7 +8185,7 @@
       <title>Dealing with Abbreviations in the <fixed-case>S</fixed-case>lovenian Biographical Lexicon</title>
       <author><first>Angel</first><last>Daza</last><affiliation>Vrije Universiteit Amsterdam - Computational Linguistics &amp; Text Mining Lab</affiliation></author>
       <author><first>Antske</first><last>Fokkens</last><affiliation>VU Amsterdam</affiliation></author>
-      <author><first>Tomaž</first><last>Erjavec</last><affiliation>Dept. of Knowledge Technologies, Jožef Stefan Institute</affiliation></author>
+      <author id="tomaz-erjavec"><first>Tomaž</first><last>Erjavec</last><affiliation>Dept. of Knowledge Technologies, Jožef Stefan Institute</affiliation></author>
       <pages>8715-8720</pages>
       <abstract>Abbreviations present a significant challenge for NLP systems because they cause tokenization and out-of-vocabulary errors. They can also make the text less readable, especially in reference printed books, where they are extensively used. Abbreviations are especially problematic in low-resource settings, where systems are less robust to begin with. In this paper, we propose a new method for addressing the problems caused by a high density of domain-specific abbreviations in a text. We apply this method to the case of a Slovenian biographical lexicon and evaluate it on a newly developed gold-standard dataset of 51 Slovenian biographies. Our abbreviation identification method performs significantly better than commonly used ad-hoc solutions, especially at identifying unseen abbreviations. We also propose and present the results of a method for expanding the identified abbreviations in context.</abstract>
       <url hash="2224b671">2022.emnlp-main.596</url>
@@ -8223,7 +8223,7 @@
       <author><first>Gerard I.</first><last>Gállego</last><affiliation>Universitat Politècnica de Catalunya</affiliation></author>
       <author><first>Belen</first><last>Alastruey</last><affiliation>Universitat Politècnica de Catalunya</affiliation></author>
       <author><first>Carlos</first><last>Escolano</last><affiliation>Universitat Politècnica de Catalunya</affiliation></author>
-      <author><first>Marta R.</first><last>Costa-jussà</last><affiliation>Meta AI</affiliation></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last><affiliation>Meta AI</affiliation></author>
       <pages>8756-8769</pages>
       <abstract>In Neural Machine Translation (NMT), each token prediction is conditioned on the source sentence and the target prefix (what has been previously translated at a decoding step). However, previous work on interpretability in NMT has mainly focused solely on source sentence tokens’ attributions. Therefore, we lack a full understanding of the influences of every input token (source sentence and target prefix) in the model predictions. In this work, we propose an interpretability method that tracks input tokens’ attributions for both contexts. Our method, which can be extended to any encoder-decoder Transformer-based model, allows us to better comprehend the inner workings of current NMT models. We apply the proposed method to both bilingual and multilingual Transformers and present insights into their behaviour.</abstract>
       <url hash="016c1ee1">2022.emnlp-main.599</url>
@@ -8237,7 +8237,7 @@
       <author><first>Shyma</first><last>Alhuwaider</last><affiliation>King Abdullah University of Science and Technology</affiliation></author>
       <author><first>Feifan</first><last>Li</last><affiliation>University of Southern California</affiliation></author>
       <author><first>Xiangliang</first><last>Zhang</last><affiliation>University of Notre Dame</affiliation></author>
-      <author><first>Kenneth</first><last>Church</last><affiliation>Northeastern University</affiliation></author>
+      <author id="kenneth-church"><first>Kenneth</first><last>Church</last><affiliation>Northeastern University</affiliation></author>
       <author><first>Mohamed</first><last>Elhoseiny</last><affiliation>KAUST</affiliation></author>
       <pages>8770-8785</pages>
       <abstract>This paper introduces ArtELingo, a new benchmark and dataset, designed to encourage work on diversity across languages and cultures. Following ArtEmis, a collection of 80k artworks from WikiArt with 0.45M emotion labels and English-only captions, ArtELingo adds another 0.79M annotations in Arabic and Chinese, plus 4.8K in Spanish to evaluate “cultural-transfer” performance. 51K artworks have 5 annotations or more in 3 languages. This diversity makes it possible to study similarities and differences across languages and cultures. Further, we investigate captioning tasks, and find diversity improves the performance of baseline models. ArtELingo is publicly available at ‘www.artelingo.org‘ with standard splits and baseline models. We hope our work will help ease future research on multilinguality and culturally-aware AI.</abstract>
@@ -8311,7 +8311,7 @@
       <author><first>Ye</first><last>Liu</last><affiliation>Salesforce</affiliation></author>
       <author><first>Semih</first><last>Yavuz</last><affiliation>Salesforce Research</affiliation></author>
       <author><first>Rui</first><last>Meng</last><affiliation>Salesforce Research</affiliation></author>
-      <author><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
       <author><first>Caiming</first><last>Xiong</last><affiliation>Metamind</affiliation></author>
       <author><first>Yingbo</first><last>Zhou</last><affiliation>Salesforce Research</affiliation></author>
       <pages>8858-8869</pages>
@@ -8404,9 +8404,9 @@
     </paper>
     <paper id="612">
       <title>A Major Obstacle for <fixed-case>NLP</fixed-case> Research: Let’s Talk about Time Allocation!</title>
-      <author><first>Katharina</first><last>Kann</last><affiliation>University of Colorado Boulder</affiliation></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last><affiliation>University of Colorado Boulder</affiliation></author>
       <author><first>Shiran</first><last>Dudy</last><affiliation>University of Colorado</affiliation></author>
-      <author><first>Arya D.</first><last>McCarthy</last><affiliation>Johns Hopkins University</affiliation></author>
+      <author id="arya-d-mccarthy"><first>Arya D.</first><last>McCarthy</last><affiliation>Johns Hopkins University</affiliation></author>
       <pages>8959-8969</pages>
       <abstract>The field of natural language processing (NLP) has grown over the last few years: conferences have become larger, we have published an incredible amount of papers, and state-of-the-art research has been implemented in a large variety of customer-facing products. However, this paper argues that we have been less successful than we *should* have been and reflects on where and how the field fails to tap its full potential. Specifically, we demonstrate that, in recent years, **subpar time allocation has been a major obstacle for NLP research**. We outline multiple concrete problems together with their negative consequences and, importantly, suggest remedies to improve the status quo. We hope that this paper will be a starting point for discussions around which common practices are – or are *not* – beneficial for NLP research.</abstract>
       <url hash="06e6f2fb">2022.emnlp-main.612</url>
@@ -8442,7 +8442,7 @@
       <author><first>Linyong</first><last>Nan</last><affiliation>Yale University</affiliation></author>
       <author><first>Zhenting</first><last>Qi</last><affiliation>Zhejiang University</affiliation></author>
       <author><first>Rui</first><last>Zhang</last><affiliation>Penn State University</affiliation></author>
-      <author><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
       <pages>9006-9018</pages>
       <abstract>Reasoning over tabular data requires both table structure understanding and a broad set of table reasoning skills. Current models with table-specific architectures and pre-training methods perform well on understanding table structures, but they still struggle with tasks that require various table reasoning skills. In this work, we develop ReasTAP to show that high-level table reasoning skills can be injected into models during pre-training without a complex table-specific architecture design. We define 7 table reasoning skills, such as numerical operation, temporal comparison, and conjunction. Each reasoning skill is associated with one example generator, which synthesizes questions over semi-structured tables according to the sampled templates. We model the table pre-training task as a sequence generation task and pre-train ReasTAP to generate precise answers of the synthetic examples. ReasTAP is evaluated on four benchmarks covering three downstream tasks including 1) WikiSQL-Weak and WikiTQ for Table Question Answering, 2) TabFact for Table Fact Verification, and 3) LogicNLG for Faithful Table-to-Text Generation. Experimental results demonstrate that ReasTAP achieves new state-of-the-art results on all of them and delivers a significant improvement under low-resource setting. Our code is publicly available at https://github.com/Yale-LILY/ReasTAP.</abstract>
       <url hash="9397aad9">2022.emnlp-main.615</url>
@@ -8451,7 +8451,7 @@
     </paper>
     <paper id="616">
       <title>Few-shot Learning with Multilingual Generative Language Models</title>
-      <author><first>Xi Victoria</first><last>Lin</last><affiliation>Meta AI</affiliation></author>
+      <author id="xi-victoria-lin"><first>Xi Victoria</first><last>Lin</last><affiliation>Meta AI</affiliation></author>
       <author><first>Todor</first><last>Mihaylov</last><affiliation>Meta AI</affiliation></author>
       <author><first>Mikel</first><last>Artetxe</last><affiliation>Meta AI</affiliation></author>
       <author><first>Tianlu</first><last>Wang</last><affiliation>Meta</affiliation></author>
@@ -8467,9 +8467,9 @@
       <author><first>Vishrav</first><last>Chaudhary</last><affiliation>Microsoft</affiliation></author>
       <author><first>Brian</first><last>O’Horo</last><affiliation>Meta AI</affiliation></author>
       <author><first>Jeff</first><last>Wang</last><affiliation>Meta AI</affiliation></author>
-      <author><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington; Meta</affiliation></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington; Meta</affiliation></author>
       <author><first>Zornitsa</first><last>Kozareva</last><affiliation>Meta AI</affiliation></author>
-      <author><first>Mona</first><last>Diab</last><affiliation>Meta Responsible AI</affiliation></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last><affiliation>Meta Responsible AI</affiliation></author>
       <author><first>Veselin</first><last>Stoyanov</last><affiliation>Facebook</affiliation></author>
       <author><first>Xian</first><last>Li</last><affiliation>Meta AI</affiliation></author>
       <pages>9019-9052</pages>
@@ -8493,7 +8493,7 @@
       <title>Detecting Label Errors by Using Pre-Trained Language Models</title>
       <author><first>Derek</first><last>Chong</last><affiliation>Stanford University</affiliation></author>
       <author><first>Jenny</first><last>Hong</last><affiliation>Stanford University</affiliation></author>
-      <author><first>Christopher</first><last>Manning</last><affiliation>Stanford University</affiliation></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last><affiliation>Stanford University</affiliation></author>
       <pages>9074-9091</pages>
       <abstract>We show that large pre-trained language models are inherently highly capable of identifying label errors in natural language datasets: simply examining out-of-sample data points in descending order of fine-tuned task loss significantly outperforms more complex error-detection mechanisms proposed in previous work. To this end, we contribute a novel method for introducing realistic, human-originated label noise into existing crowdsourced datasets such as SNLI and TweetNLP. We show that this noise has similar properties to real, hand-verified label errors, and is harder to detect than existing synthetic noise, creating challenges for model robustness.We argue that human-originated noise is a better standard for evaluation than synthetic noise. Finally, we use crowdsourced verification to evaluate the detection of real errors on IMDB, Amazon Reviews, and Recon, and confirm that pre-trained models perform at a 9–36% higher absolute Area Under the Precision-Recall Curve than existing models.</abstract>
       <url hash="a2bd5f60">2022.emnlp-main.618</url>
@@ -8555,7 +8555,7 @@
     <paper id="623">
       <title>Improving Factual Consistency in Summarization with Compression-Based Post-Editing</title>
       <author><first>Alex</first><last>Fabbri</last><affiliation>Salesforce AI Research</affiliation></author>
-      <author><first>Prafulla Kumar</first><last>Choubey</last><affiliation>Salesforce AI Research</affiliation></author>
+      <author id="prafulla-kumar-choubey"><first>Prafulla Kumar</first><last>Choubey</last><affiliation>Salesforce AI Research</affiliation></author>
       <author><first>Jesse</first><last>Vig</last><affiliation>Salesforce Research</affiliation></author>
       <author><first>Chien-Sheng</first><last>Wu</last><affiliation>Salesforce</affiliation></author>
       <author><first>Caiming</first><last>Xiong</last><affiliation>Metamind</affiliation></author>
@@ -8602,7 +8602,7 @@
       <author><first>Yicheng</first><last>He</last><affiliation>Columbia University</affiliation></author>
       <author><first>Wenhao</first><last>Li</last><affiliation>Columbia University</affiliation></author>
       <author><first>Kai-Wei</first><last>Chang</last><affiliation>UCLA</affiliation></author>
-      <author><first>Shih-Fu</first><last>Chang</last><affiliation>Columbia University</affiliation></author>
+      <author id="shih-fu-chang"><first>Shih-Fu</first><last>Chang</last><affiliation>Columbia University</affiliation></author>
       <pages>9212-9224</pages>
       <abstract>Visual commonsense understanding requires Vision Language (VL) models to not only understand image and text but also cross-reference in-between to fully integrate and achieve comprehension of the visual scene described. Recently, various approaches have been developed and have achieved high performance on visual commonsense benchmarks. However, it is unclear whether the models really understand the visual scene and underlying commonsense knowledge due to limited evaluation data resources. To provide an in-depth analysis, we present a Multimodal Evaluation (ME) pipeline to automatically generate question-answer pairs to test models’ understanding of the visual scene, text, and related knowledge. We then take a step further to show that training with the ME data boosts the model’s performance in standard VCR evaluation. Lastly, our in-depth analysis and comparison reveal interesting findings: (1) semantically low-level information can assist the learning of high-level information but not the opposite; (2) visual information is generally under utilization compared with text.</abstract>
       <url hash="9f439fef">2022.emnlp-main.626</url>
@@ -8649,7 +8649,7 @@
     </paper>
     <paper id="630">
       <title>Overcoming Catastrophic Forgetting in Zero-Shot Cross-Lingual Generation</title>
-      <author><first>Tu</first><last>Vu</last><affiliation>University of Massachusetts Amherst</affiliation></author>
+      <author id="tu-vu"><first>Tu</first><last>Vu</last><affiliation>University of Massachusetts Amherst</affiliation></author>
       <author><first>Aditya</first><last>Barua</last><affiliation>Google</affiliation></author>
       <author><first>Brian</first><last>Lester</last><affiliation>Google</affiliation></author>
       <author><first>Daniel</first><last>Cer</last><affiliation>Google Research; University of California at Berkeley</affiliation></author>
@@ -8728,7 +8728,7 @@
       <author><first>Mahdi</first><last>Namazifar</last><affiliation>Amazon Alexa AI</affiliation></author>
       <author><first>Mohit</first><last>Bansal</last><affiliation>University of North Carolina at Chapel Hill</affiliation></author>
       <author><first>Jesse</first><last>Thomason</last><affiliation>University of Southern California</affiliation></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last><affiliation>Amazon Alexa AI</affiliation></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last><affiliation>Amazon Alexa AI</affiliation></author>
       <pages>9369-9378</pages>
       <abstract>Embodied Vision and Language Task Completion requires an embodied agent to interpret natural language instructions and egocentric visual observations to navigate through and interact with environments. In this work, we examine ALFRED, a challenging benchmark for embodied task completion, with the goal of gaining insight into how effectively models utilize language. We find evidence that sequence-to-sequence and transformer-based models trained on this benchmark are not sufficiently sensitive to changes in input language instructions. Next, we construct a new test split – ALFRED-L to test whether ALFRED models can generalize to task structures not seen during training that intuitively require the same types of language understanding required in ALFRED. Evaluation of existing models on ALFRED-L suggests that (a) models are overly reliant on the sequence in which objects are visited in typical ALFRED trajectories and fail to adapt to modifications of this sequence and (b) models trained with additional augmented trajectories are able to adapt relatively better to such changes in input language instructions.</abstract>
       <url hash="e70a11f9">2022.emnlp-main.636</url>
@@ -8740,7 +8740,7 @@
     <paper id="637">
       <title>Dungeons and Dragons as a Dialog Challenge for Artificial Intelligence</title>
       <author><first>Chris</first><last>Callison-Burch</last><affiliation>University of Pennsylvania</affiliation></author>
-      <author><first>Gaurav Singh</first><last>Tomar</last><affiliation>Google Research</affiliation></author>
+      <author id="gaurav-singh-tomar"><first>Gaurav Singh</first><last>Tomar</last><affiliation>Google Research</affiliation></author>
       <author><first>Lara J.</first><last>Martin</last><affiliation>University of Pennsylvania</affiliation></author>
       <author><first>Daphne</first><last>Ippolito</last><affiliation>University of Pennsylvania</affiliation></author>
       <author><first>Suma</first><last>Bailis</last><affiliation>Google Research</affiliation></author>
@@ -8775,7 +8775,7 @@
       <author id="chris-thomas"><first>Christopher</first><last>Thomas</last><affiliation>Columbia University</affiliation></author>
       <author><first>Hammad</first><last>Ayyubi</last><affiliation>Columbia University</affiliation></author>
       <author><first>Heng</first><last>Ji</last><affiliation>University of Illinois at Urbana-Champaign and Amazon (Amazon Scholar)</affiliation></author>
-      <author><first>Shih-Fu</first><last>Chang</last><affiliation>Columbia University</affiliation></author>
+      <author id="shih-fu-chang"><first>Shih-Fu</first><last>Chang</last><affiliation>Columbia University</affiliation></author>
       <pages>9402-9413</pages>
       <abstract>Given a long untrimmed video and natural language queries, video grounding (VG) aims to temporally localize the semantically-aligned video segments. Almost all existing VG work holds two simple but unrealistic assumptions: 1) All query sentences can be grounded in the corresponding video. 2) All query sentences for the same video are always at the same semantic scale. Unfortunately, both assumptions make today’s VG models fail to work in practice. For example, in real-world multimodal assets (eg, news articles), most of the sentences in the article can not be grounded in their affiliated videos, and they typically have rich hierarchical relations (ie, at different semantic scales). To this end, we propose a new challenging grounding task: Weakly-Supervised temporal Article Grounding (WSAG). Specifically, given an article and a relevant video, WSAG aims to localize all “groundable” sentences to the video, and these sentences are possibly at different semantic scales. Accordingly, we collect the first WSAG dataset to facilitate this task: YouwikiHow, which borrows the inherent multi-scale descriptions in wikiHow articles and plentiful YouTube videos. In addition, we propose a simple but effective method DualMIL for WSAG, which consists of a two-level MIL loss and a single-/cross- sentence constraint loss. These training objectives are carefully designed for these relaxed assumptions. Extensive ablations have verified the effectiveness of DualMIL.</abstract>
       <url hash="6c1b70af">2022.emnlp-main.639</url>
@@ -8786,7 +8786,7 @@
       <title>Exploring Dual Encoder Architectures for Question Answering</title>
       <author><first>Zhe</first><last>Dong</last><affiliation>Google Inc</affiliation></author>
       <author><first>Jianmo</first><last>Ni</last><affiliation>Google</affiliation></author>
-      <author><first>Dan</first><last>Bikel</last><affiliation>Meta</affiliation></author>
+      <author id="daniel-m-bikel"><first>Dan</first><last>Bikel</last><affiliation>Meta</affiliation></author>
       <author><first>Enrique</first><last>Alfonseca</last><affiliation>Google</affiliation></author>
       <author><first>Yuan</first><last>Wang</last><affiliation>Google</affiliation></author>
       <author><first>Chen</first><last>Qu</last><affiliation>Google</affiliation></author>
@@ -8836,7 +8836,7 @@
     </paper>
     <paper id="644">
       <title>Towards Teachable Reasoning Systems: Using a Dynamic Memory of User Feedback for Continual System Improvement</title>
-      <author><first>Bhavana</first><last>Dalvi Mishra</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
+      <author id="bhavana-dalvi"><first>Bhavana</first><last>Dalvi Mishra</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Oyvind</first><last>Tafjord</last><affiliation>AI2</affiliation></author>
       <author><first>Peter</first><last>Clark</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <pages>9465-9480</pages>
@@ -8849,7 +8849,7 @@
     <paper id="645">
       <title>Knowledge Transfer from Answer Ranking to Answer Generation</title>
       <author><first>Matteo</first><last>Gabburo</last><affiliation>University of Trento</affiliation></author>
-      <author><first>Rik</first><last>Koncel-Kedziorski</last><affiliation>Amazon</affiliation></author>
+      <author id="rik-koncel-kedziorski"><first>Rik</first><last>Koncel-Kedziorski</last><affiliation>Amazon</affiliation></author>
       <author><first>Siddhant</first><last>Garg</last><affiliation>Amazon Alexa AI</affiliation></author>
       <author><first>Luca</first><last>Soldaini</last><affiliation>Allen Institute for AI</affiliation></author>
       <author><first>Alessandro</first><last>Moschitti</last><affiliation>Amazon</affiliation></author>
@@ -9055,7 +9055,7 @@
     <paper id="660">
       <title>Leveraging <fixed-case>QA</fixed-case> Datasets to Improve Generative Data Augmentation</title>
       <author><first>Dheeraj</first><last>Mekala</last><affiliation>University of California San Diego</affiliation></author>
-      <author><first>Tu</first><last>Vu</last><affiliation>University of Massachusetts Amherst</affiliation></author>
+      <author id="tu-vu"><first>Tu</first><last>Vu</last><affiliation>University of Massachusetts Amherst</affiliation></author>
       <author><first>Timo</first><last>Schick</last><affiliation>Meta AI</affiliation></author>
       <author><first>Jingbo</first><last>Shang</last><affiliation>University of California, San Diego</affiliation></author>
       <pages>9737-9750</pages>
@@ -9095,7 +9095,7 @@
       <author><first>Meng</first><last>Cao</last><affiliation>McGill University</affiliation></author>
       <author><first>Yue</first><last>Dong</last><affiliation>McGill University</affiliation></author>
       <author><first>Jingyi</first><last>He</last><affiliation>McGill University</affiliation></author>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last><affiliation>Mila / McGill University</affiliation></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last><affiliation>Mila / McGill University</affiliation></author>
       <pages>9768-9780</pages>
       <abstract>State-of-the-art abstractive summarization systems frequently hallucinate content that is not supported by the source document, mainly due to noise in the training dataset.Existing methods opt to drop the noisy samples or tokens from the training set entirely, reducing the effective training set size and creating an artificial propensity to copy words from the source. In this work, we propose a training objective for abstractive summarization based on rejection learning, in which the model learns whether or not to reject potentially noisy tokens. We further propose a regularized decoding objective that penalizes non-factual candidate summaries during inference by using the rejection probability learned during training.We show that our method considerably improves the factuality of generated summaries in automatic and human evaluations when compared to five baseline models, and that it does so while increasing the abstractiveness of the generated summaries.</abstract>
       <url hash="5996ae97">2022.emnlp-main.663</url>
@@ -9141,7 +9141,7 @@
       <title>Correcting Diverse Factual Errors in Abstractive Summarization via Post-Editing and Language Model Infilling</title>
       <author><first>Vidhisha</first><last>Balachandran</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last><affiliation>University of Washington</affiliation></author>
-      <author><first>William</first><last>Cohen</last><affiliation>Google AI</affiliation></author>
+      <author id="william-cohen"><first>William</first><last>Cohen</last><affiliation>Google AI</affiliation></author>
       <author><first>Yulia</first><last>Tsvetkov</last><affiliation>University of Washington</affiliation></author>
       <pages>9818-9830</pages>
       <abstract>Abstractive summarization models often generate inconsistent summaries containing factual errors or hallucinated content. Recent works focus on correcting factual errors in generated summaries via post-editing. Such correction models are trained using adversarial non-factual summaries constructed using heuristic rules for injecting errors. However, generating non-factual summaries using heuristics often does not generalize well to actual model errors. In this work, we propose to generate hard, representative synthetic examples of non-factual summaries through infilling language models. With this data, we train a more robust fact-correction model to post-edit the summaries to improve factual consistency. Through quantitative and qualitative experiments on two popular summarization datasets— CNN/DM and XSum—we show that our approach vastly outperforms prior methods in correcting erroneous summaries. Our model—FactEdit—improves factuality scores by over ~11 points on CNN/DM and over ~31 points on XSum on average across multiple summarization models, producing more factual summaries while maintaining competitive summarization quality.</abstract>
@@ -9171,7 +9171,7 @@
       <author><first>Ji</first><last>Ma</last><affiliation>Google</affiliation></author>
       <author><first>Vincent</first><last>Zhao</last><affiliation>Google</affiliation></author>
       <author><first>Yi</first><last>Luan</last><affiliation>Google</affiliation></author>
-      <author><first>Keith</first><last>Hall</last><affiliation>Google Research</affiliation></author>
+      <author id="keith-hall"><first>Keith</first><last>Hall</last><affiliation>Google Research</affiliation></author>
       <author><first>Ming-Wei</first><last>Chang</last><affiliation>Google Research</affiliation></author>
       <author><first>Yinfei</first><last>Yang</last><affiliation>Google</affiliation></author>
       <pages>9844-9855</pages>
@@ -9202,7 +9202,7 @@
       <author><first>Shengqiong</first><last>Wu</last><affiliation>Wuhan University</affiliation></author>
       <author><first>Fangfang</first><last>Su</last><affiliation>School of National Cybersecurity, Key Laboratory of Aerospace Information Security and Trusted Computing, Ministry of Education,School of Computer Science, Wuhan University</affiliation></author>
       <author><first>Wenxuan</first><last>Shi</last><affiliation>Wuhan University</affiliation></author>
-      <author><first>Donghong</first><last>Ji</last><affiliation>Wuhan University</affiliation></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last><affiliation>Wuhan University</affiliation></author>
       <author><first>Bo</first><last>Cai</last><affiliation>Wuhan University</affiliation></author>
       <pages>9871-9881</pages>
       <abstract>Relation Extraction (RE) is a fundamental task of information extraction, which has attracted a large amount of research attention. Previous studies focus on extracting the relations within a sentence or document, while currently researchers begin to explore cross-document RE. However, current cross-document RE methods directly utilize text snippets surrounding target entities in multiple given documents, which brings considerable noisy and non-relevant sentences. Moreover, they utilize all the text paths in a document bag in a coarse-grained way, without considering the connections between these text paths.In this paper, we aim to address both of these shortages and push the state-of-the-art for cross-document RE. First, we focus on input construction for our RE model and propose an entity-based document-context filter to retain useful information in the given documents by using the bridge entities in the text paths. Second, we propose a cross-document RE model based on cross-path entity relation attention, which allow the entity relations across text paths to interact with each other. We compare our cross-document RE method with the state-of-the-art methods in the dataset CodRED. Our method outperforms them by at least 10% in F1, thus demonstrating its effectiveness.</abstract>
@@ -9244,7 +9244,7 @@
     <paper id="674">
       <title>Polyglot Prompt: Multilingual Multitask Prompt Training</title>
       <author><first>Jinlan</first><last>Fu</last><affiliation>National University of Singapore</affiliation></author>
-      <author><first>See-Kiong</first><last>Ng</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="see-kiong-ng"><first>See-Kiong</first><last>Ng</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Pengfei</first><last>Liu</last><affiliation>Carnegie Mellon University</affiliation></author>
       <pages>9919-9935</pages>
       <abstract>This paper aims for a potential architectural improvement for multilingual learning and asks: Can different tasks from different languages be modeled in a monolithic framework, i.e. without any task/language-specific module? The benefit of achieving this could open new doors for future multilingual research, including allowing systems trained on low resources to be further assisted by other languages as well as other tasks. We approach this goal by developing a learning framework named Polyglot Prompting to exploit prompting methods for learning a unified semantic space for different languages and tasks with multilingual prompt engineering. We performed a comprehensive evaluation of 6 tasks, namely topic classification, sentiment classification, named entity recognition, question answering, natural language inference, and summarization, covering 24 datasets and 49 languages. The experimental results demonstrated the efficacy of multilingual multitask prompt-based learning and led to inspiring observations. We also present an interpretable multilingual evaluation methodology and show how the proposed framework, multilingual multitask prompt training, works. We release all datasets prompted in the best setting and code.</abstract>
@@ -9282,7 +9282,7 @@
       <author><first>Zhiling</first><last>Zhang</last><affiliation>Shanghai Jiao Tong University</affiliation></author>
       <author><first>Siyuan</first><last>Chen</last><affiliation>Shanghai Jiao Tong University</affiliation></author>
       <author><first>Mengyue</first><last>Wu</last><affiliation>Shanghai Jiao Tong University</affiliation></author>
-      <author><first>Kenny</first><last>Zhu</last><affiliation>Shanghai Jiao Tong University</affiliation></author>
+      <author id="kenny-zhu"><first>Kenny</first><last>Zhu</last><affiliation>Shanghai Jiao Tong University</affiliation></author>
       <pages>9970-9985</pages>
       <abstract>Mental disease detection (MDD) from social media has suffered from poor generalizability and interpretability, due to lack of symptom modeling. This paper introduces PsySym, the first annotated symptom identification corpus of multiple psychiatric disorders, to facilitate further research progress. PsySym is annotated according to a knowledge graph of the 38 symptom classes related to 7 mental diseases complied from established clinical manuals and scales, and a novel annotation framework for diversity and quality. Experiments show that symptom-assisted MDD enabled by PsySym can outperform strong pure-text baselines. We also exhibit the convincing MDD explanations provided by symptom predictions with case studies, and point to their further potential applications.</abstract>
       <url hash="ae64f7b0">2022.emnlp-main.677</url>
@@ -9310,8 +9310,8 @@
       <author><first>Hannah</first><last>Rashkin</last><affiliation>Google Research</affiliation></author>
       <author><first>David</first><last>Reitter</last><affiliation>Google Research</affiliation></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last><affiliation>University of Washington</affiliation></author>
-      <author><first>Mari</first><last>Ostendorf</last><affiliation>University of Washington</affiliation></author>
-      <author><first>Gaurav Singh</first><last>Tomar</last><affiliation>Google Research</affiliation></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last><affiliation>University of Washington</affiliation></author>
+      <author id="gaurav-singh-tomar"><first>Gaurav Singh</first><last>Tomar</last><affiliation>Google Research</affiliation></author>
       <pages>10000-10014</pages>
       <abstract>Compared to standard retrieval tasks, passage retrieval for conversational question answering (CQA) poses new challenges in understanding the current user question, as each question needs to be interpreted within the dialogue context. Moreover, it can be expensive to re-train well-established retrievers such as search engines that are originally developed for non-conversational queries. To facilitate their use, we develop a query rewriting model CONQRR that rewrites a conversational question in the context into a standalone question. It is trained with a novel reward function to directly optimize towards retrieval using reinforcement learning and can be adapted to any off-the-shelf retriever. CONQRR achieves state-of-the-art results on a recent open-domain CQA dataset containing conversations from three different sources, and is effective for two different off-the-shelf retrievers. Our extensive analysis also shows the robustness of CONQRR to out-of-domain dialogues as well as to zero query rewriting supervision.</abstract>
       <url hash="42547e1e">2022.emnlp-main.679</url>
@@ -9403,7 +9403,7 @@
       <title>Adaptive Contrastive Learning on Multimodal Transformer for Review Helpfulness Prediction</title>
       <author><first>Thong</first><last>Nguyen</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Xiaobao</first><last>Wu</last><affiliation>Nanyang Technological University</affiliation></author>
-      <author><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University, Singapore</affiliation></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University, Singapore</affiliation></author>
       <author><first>Zhen</first><last>Hai</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Lidong</first><last>Bing</last><affiliation>Alibaba DAMO Academy</affiliation></author>
       <pages>10085-10096</pages>
@@ -9420,7 +9420,7 @@
       <author><first>Jiuyi</first><last>Li</last><affiliation>Dalian University of Technology</affiliation></author>
       <author><first>Huan</first><last>Liu</last><affiliation>Dalian University of Technology</affiliation></author>
       <author><first>Jinsong</first><last>Su</last><affiliation>Xiamen university</affiliation></author>
-      <author><first>Degen</first><last>Huang</last><affiliation>Dalian University of Technology</affiliation></author>
+      <author id="degen-huang"><first>Degen</first><last>Huang</last><affiliation>Dalian University of Technology</affiliation></author>
       <pages>10097-10113</pages>
       <abstract>Multilingual neural machine translation aims to translate multiple language pairs in a single model and has shown great success thanks to the knowledge transfer across languages with the shared parameters. Despite promising, this share-all paradigm suffers from insufficient ability to capture language-specific features. Currently, the common practice is to insert or search language-specific networks to balance the shared and specific features. However, those two types of features are not sufficient enough to model the complex commonality and divergence across languages, such as the locally shared features among similar languages, which leads to sub-optimal transfer, especially in massively multilingual translation. In this paper, we propose a novel token-level feature mixing method that enables the model to capture different features and dynamically determine the feature sharing across languages. Based on the observation that the tokens in the multilingual model are usually shared by different languages, we we insert a feature mixing layer into each Transformer sublayer and model each token representation as a mix of different features, with a proportion indicating its feature preference. In this way, we can perform fine-grained feature sharing and achieve better multilingual transfer. Experimental results on multilingual datasets show that our method outperforms various strong baselines and can be extended to zero-shot translation. Further analyses reveal that our method can capture different linguistic features and bridge the representation gap across languages.</abstract>
       <url hash="50f2a608">2022.emnlp-main.687</url>
@@ -9467,7 +9467,7 @@
       <title>Two is Better than Many? Binary Classification as an Effective Approach to Multi-Choice Question Answering</title>
       <author><first>Deepanway</first><last>Ghosal</last><affiliation>Singapore University of Technology and Design</affiliation></author>
       <author><first>Navonil</first><last>Majumder</last><affiliation>Singapore University of Technology and Design</affiliation></author>
-      <author><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
       <author><first>Soujanya</first><last>Poria</last><affiliation>Singapore University of Technology and Design</affiliation></author>
       <pages>10158-10166</pages>
       <abstract>We propose a simple refactoring of multi-choice question answering (MCQA) tasks as a series of binary classifications. The MCQA task is generally performed by scoring each (question, answer) pair normalized over all the pairs, and then selecting the answer from the pair that yield the highest score. For n answer choices, this is equivalent to an n-class classification setup where only one class (true answer) is correct. We instead show that classifying (question, true answer) as positive instances and (question, false answer) as negative instances is significantly more effective across various models and datasets. We show the efficacy of our proposed approach in different tasks – abductive reasoning, commonsense question answering, science question answering, and sentence completion. Our DeBERTa binary classification model reaches the top or close to the top performance on public leaderboards for these tasks. The source code of the proposed approach is available at https://github.com/declare-lab/TEAM.</abstract>
@@ -9557,7 +9557,7 @@
       <author><first>Suraj</first><last>Tripathi</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Sumit</first><last>Agarwal</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Teruko</first><last>Mitamura</last><affiliation>Carnegie Mellon University</affiliation></author>
-      <author><first>Eric</first><last>Nyberg</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last><affiliation>Carnegie Mellon University</affiliation></author>
       <pages>10243-10255</pages>
       <abstract>Code-switched (CS) data is ubiquitous in today’s globalized world, but the dearth of annotated datasets in code-switching poses a significant challenge for learning diverse tasks across different language pairs. Parameter-efficient prompt-tuning approaches conditioned on frozen language models have shown promise for transfer learning in limited-resource setups. In this paper, we propose a novel instance-based prompt composition technique, PRO-CS, for CS tasks that combine language and task knowledge. We compare our approach with prompt-tuning and fine-tuning for code-switched tasks on 10 datasets across 4 language pairs. Our model outperforms the prompt-tuning approach by significant margins across all datasets and outperforms or remains at par with fine-tuning by using just 0.18% of total parameters. We also achieve competitive results when compared with the fine-tuned model in the low-resource cross-lingual and cross-task setting, indicating the effectiveness of our approach to incorporate new code-switched tasks.</abstract>
       <url hash="f5d5059d">2022.emnlp-main.698</url>
@@ -9610,7 +9610,7 @@
       <author><first>Giwon</first><last>Hong</last><affiliation>KAIST School of Computing</affiliation></author>
       <author><first>Jeonghwan</first><last>Kim</last><affiliation>Korea Advanced Institute of Science and Technology (KAIST)</affiliation></author>
       <author><first>Junmo</first><last>Kang</last><affiliation>KAIST</affiliation></author>
-      <author><first>Sung-Hyon</first><last>Myaeng</last><affiliation>School of Computing, KAIST</affiliation></author>
+      <author id="sung-hyon-myaeng"><first>Sung-Hyon</first><last>Myaeng</last><affiliation>School of Computing, KAIST</affiliation></author>
       <pages>10288-10294</pages>
       <abstract>A graph is a suitable data structure to represent the structural information of text. Recently, multi-hop question answering (MHQA) tasks, which require inter-paragraph/sentence linkages, have come to exploit such properties of a graph. Previous approaches to MHQA relied on leveraging the graph information along with the pre-trained language model (PLM) encoders. However, this trend exhibits the following drawbacks: (i) sample inefficiency while training in a low-resource setting; (ii) lack of reusability due to changes in the model structure or input. Our work proposes the Graph-Induced Transformer (GIT) that applies graph-derived attention patterns directly into a PLM, without the need to employ external graph modules. GIT can leverage the useful inductive bias of graphs while retaining the unperturbed Transformer structure and parameters. Our experiments on HotpotQA successfully demonstrate both the sample efficient characteristic of GIT and its capacity to replace the graph modules while preserving model performance.</abstract>
       <url hash="38aaa162">2022.emnlp-main.702</url>
@@ -9659,7 +9659,7 @@
       <author><first>Qiongkai</first><last>Xu</last><affiliation>The University of Melbourne</affiliation></author>
       <author><first>Tongtong</first><last>Wu</last><affiliation>Southeast University</affiliation></author>
       <author><first>Tianyang</first><last>Zhan</last><affiliation>Bytedance Inc.</affiliation></author>
-      <author><first>Gholamreza</first><last>Haffari</last><affiliation>Monash University</affiliation></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last><affiliation>Monash University</affiliation></author>
       <pages>10335-10356</pages>
       <abstract>In this paper, we propose a variational autoencoder with disentanglement priors, VAE-Dprior, for task-specific natural language generation with none or a handful of task-specific labeled examples. In order to tackle compositional generalization across tasks, our model performs disentangled representation learning by introducing a conditional prior for the latent content space and another conditional prior for the latent label space. Both types of priors satisfy a novel property called <tex-math>\epsilon</tex-math>-disentangled. We show both empirically and theoretically that the novel priors can disentangle representations even without specific regularizations as in the prior work. The content prior enables directly sampling diverse content representations from the content space learned from the seen tasks, and fuse them with the representations of novel tasks for generating semantically diverse texts in the low-resource settings. Our extensive experiments demonstrate the superior performance of our model over competitive baselines in terms of i) data augmentation in continuous zero/few-shot learning, and ii) text style transfer in the few-shot setting.</abstract>
       <url hash="64298f77">2022.emnlp-main.706</url>
@@ -9710,9 +9710,9 @@
     <paper id="710">
       <title>Discourse Context Predictability Effects in <fixed-case>H</fixed-case>indi Word Order</title>
       <author><first>Sidharth</first><last>Ranjan</last><affiliation>Indian Institute of Technology Delhi (IIT Delhi)</affiliation></author>
-      <author><first>Marten</first><last>van Schijndel</last><affiliation>Cornell University</affiliation></author>
+      <author id="marten-van-schijndel"><first>Marten</first><last>van Schijndel</last><affiliation>Cornell University</affiliation></author>
       <author><first>Sumeet</first><last>Agarwal</last><affiliation>Indian Institute of Technology Delhi</affiliation></author>
-      <author><first>Rajakrishnan</first><last>Rajkumar</last><affiliation>Assistant Professor, Department of Humanities and Social Sciences, IISER Bhopal</affiliation></author>
+      <author id="rajakrishnan-rajkumar"><first>Rajakrishnan</first><last>Rajkumar</last><affiliation>Assistant Professor, Department of Humanities and Social Sciences, IISER Bhopal</affiliation></author>
       <pages>10390-10406</pages>
       <abstract>We test the hypothesis that discourse predictability influences Hindi syntactic choice. While prior work has shown that a number of factors (e.g., information status, dependency length, and syntactic surprisal) influence Hindi word order preferences, the role of discourse predictability is underexplored in the literature. Inspired by prior work on syntactic priming, we investigate how the words and syntactic structures in a sentence influence the word order of the following sentences. Specifically, we extract sentences from the Hindi-Urdu Treebank corpus (HUTB), permute the preverbal constituents of those sentences, and build a classifier to predict which sentences actually occurred in the corpus against artificially generated distractors. The classifier uses a number of discourse-based features and cognitive features to make its predictions, including dependency length, surprisal, and information status. We find that information status and LSTM-based discourse predictability influence word order choices, especially for non-canonical object-fronted orders. We conclude by situating our results within the broader syntactic priming literature.</abstract>
       <url hash="0213df26">2022.emnlp-main.710</url>
@@ -9769,7 +9769,7 @@
       <author><first>Hyundong</first><last>Cho</last><affiliation>USC, Information Sciences Institute</affiliation></author>
       <author><first>Pegah</first><last>Jandaghi</last><affiliation>University of Southern California</affiliation></author>
       <author><first>Dong-Ho</first><last>Lee</last><affiliation>University of Southern California</affiliation></author>
-      <author><first>Bill Yuchen</first><last>Lin</last><affiliation>University of Southern California</affiliation></author>
+      <author id="bill-yuchen-lin"><first>Bill Yuchen</first><last>Lin</last><affiliation>University of Southern California</affiliation></author>
       <author><first>Jay</first><last>Pujara</last><affiliation>University of Southern California</affiliation></author>
       <author><first>Xiang</first><last>Ren</last><affiliation>University of Southern California</affiliation></author>
       <pages>10450-10468</pages>
@@ -9847,7 +9847,7 @@
       <author><first>Xiangrui</first><last>Cai</last><affiliation>Nankai University</affiliation></author>
       <author><first>Yike</first><last>Wu</last><affiliation>Nankai University</affiliation></author>
       <author><first>Haiwei</first><last>Zhang</last><affiliation>Nankai University</affiliation></author>
-      <author><first>Ying</first><last>Zhang</last><affiliation>Nankai University</affiliation></author>
+      <author id="ying-zhang"><first>Ying</first><last>Zhang</last><affiliation>Nankai University</affiliation></author>
       <author><first>Guoqing</first><last>Zhao</last><affiliation>Mashang Consumer Finance Co.,Ltd.</affiliation></author>
       <author><first>Ning</first><last>Jiang</last><affiliation>Mashang Consumer Finance Co.,Ltd.</affiliation></author>
       <pages>10527-10536</pages>
@@ -9861,7 +9861,7 @@
       <author><first>Kaiyu</first><last>Huang</last><affiliation>Tsinghua University</affiliation></author>
       <author><first>Peng</first><last>Li</last><affiliation>Institute for AI Industry Research (AIR), Tsinghua University, China</affiliation></author>
       <author><first>Jin</first><last>Ma</last><affiliation>ustc</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>Tsinghua University</affiliation></author>
+      <author><first>Yang</first><last>Liu</last><affiliation>Tsinghua University</affiliation></author>
       <pages>10537-10550</pages>
       <abstract>In a practical real-world scenario, the longstanding goal is that a universal multilingual translation model can be incrementally updated when new language pairs arrive. Specifically, the initial vocabulary only covers some of the words in new languages, which hurts the translation quality for incremental learning. Although existing approaches attempt to address this issue by replacing the original vocabulary with a rebuilt vocabulary or constructing independent language-specific vocabularies, these methods can not meet the following three demands simultaneously: (1) High translation quality for original and incremental languages, (2) low cost for model training, (3) low time overhead for preprocessing. In this work, we propose an entropy-based vocabulary substitution (EVS) method that just needs to walk through new language pairs for incremental learning in a large-scale multilingual data updating while remaining the size of the vocabulary. Our method has access to learn new knowledge from updated training samples incrementally while keeping high translation quality for original language pairs, alleviating the issue of catastrophic forgetting. Results of experiments show that EVS can achieve better performance and save excess overhead for incremental learning in the multilingual machine translation task.</abstract>
       <url hash="ce4c24b8">2022.emnlp-main.720</url>
@@ -9908,9 +9908,9 @@
     <paper id="724">
       <title>Making Science Simple: Corpora for the Lay Summarisation of Scientific Literature</title>
       <author><first>Tomas</first><last>Goldsack</last><affiliation>University of Sheffield</affiliation></author>
-      <author id="zhihao-zhang"><first>Zhihao</first><last>Zhang</last><affiliation>Beihang University</affiliation></author>
+      <author><first>Zhihao</first><last>Zhang</last><affiliation>Beihang University</affiliation></author>
       <author><first>Chenghua</first><last>Lin</last><affiliation>Department of Computer Science, University of Sheffield</affiliation></author>
-      <author><first>Carolina</first><last>Scarton</last><affiliation>University of Sheffield</affiliation></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last><affiliation>University of Sheffield</affiliation></author>
       <pages>10589-10604</pages>
       <abstract>Lay summarisation aims to jointly summarise and simplify a given text, thus making its content more comprehensible to non-experts.Automatic approaches for lay summarisation can provide significant value in broadening access to scientific literature, enabling a greater degree of both interdisciplinary knowledge sharing and public understanding when it comes to research findings. However, current corpora for this task are limited in their size and scope, hindering the development of broadly applicable data-driven approaches. Aiming to rectify these issues, we present two novel lay summarisation datasets, PLOS (large-scale) and eLife (medium-scale), each of which contains biomedical journal articles alongside expert-written lay summaries.We provide a thorough characterisation of our lay summaries, highlighting differing levels of readability and abstractivenessbetween datasets that can be leveraged to support the needs of different applications.Finally, we benchmark our datasets using mainstream summarisation approaches and perform a manual evaluation with domain experts, demonstrating their utility and casting light on the key challenges of this task.</abstract>
       <url hash="5520489c">2022.emnlp-main.724</url>
@@ -9985,7 +9985,7 @@
       <author><first>Tianhao</first><last>Shen</last><affiliation>Tianjin University</affiliation></author>
       <author><first>Mingtong</first><last>Liu</last><affiliation>Beijing Jiaotong University</affiliation></author>
       <author><first>Ming</first><last>Zhou</last><affiliation>Langboat Technology</affiliation></author>
-      <author><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
       <pages>10659-10670</pages>
       <abstract>Negative samples have not been efficiently explored in multilingual dense passage retrieval. In this paper, we propose a novel multilingual dense passage retrieval framework, mHFN, to recover and utilize hard and false negative samples. mHFN consists of three key components: 1) a multilingual hard negative sample augmentation module that allows knowledge of indistinguishable passages to be shared across multiple languages and synthesizes new hard negative samples by interpolating representations of queries and existing hard negative samples, 2) a multilingual negative sample cache queue that stores negative samples from previous batches in each language to increase the number of multilingual negative samples used in training beyond the batch size limit, and 3) a lightweight adaptive false negative sample filter that uses generated pseudo labels to separate unlabeled false negative samples and converts them into positive passages in training. We evaluate mHFN on Mr. TyDi, a high-quality multilingual dense passage retrieval dataset covering eleven typologically diverse languages, and experimental results show that mHFN outperforms strong sparse, dense and hybrid baselines and achieves new state-of-the-art performance on all languages. Our source code is available at https://github.com/Magnetic2014/mHFN.</abstract>
       <url hash="7897e11c">2022.emnlp-main.730</url>
@@ -9994,7 +9994,7 @@
     </paper>
     <paper id="731">
       <title>The “Problem” of Human Label Variation: On Ground Truth in Data, Modeling and Evaluation</title>
-      <author><first>Barbara</first><last>Plank</last><affiliation>LMU Munich</affiliation></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last><affiliation>LMU Munich</affiliation></author>
       <pages>10671-10682</pages>
       <abstract>Human variation in labeling is often considered noise. Annotation projects for machine learning (ML) aim at minimizing human label variation, with the assumption to maximize data quality and in turn optimize and maximize machine learning metrics. However, thisconventional practice assumes that there exists a *ground truth*, and neglects that there exists genuine human variation in labeling due to disagreement, subjectivity in annotation or multiple plausible answers.In this position paper, we argue that this big open problem of <i>human label variation</i> persists and critically needs more attention to move our field forward. This is because human label variation impacts all stages of the ML pipeline: *data, modeling and evaluation*. However, few works consider all of these dimensions jointly; and existing research is fragmented. We reconcile different previously proposed notions of human label variation, provide a repository of publicly-available datasets with un-aggregated labels, depict approaches proposed so far, identify gaps and suggest ways forward. As datasets are becoming increasingly available, we hope that this synthesized view on the “problem” will lead to an open discussion on possible strategies to devise fundamentally new directions.</abstract>
       <url hash="bb4f83b2">2022.emnlp-main.731</url>
@@ -10030,7 +10030,7 @@
     <paper id="734">
       <title>Facilitating Contrastive Learning of Discourse Relational Senses by Exploiting the Hierarchy of Sense Relations</title>
       <author><first>Wanqiu</first><last>Long</last><affiliation>The University of Edinburgh</affiliation></author>
-      <author><first>Bonnie</first><last>Webber</last><affiliation>University of Edinburgh</affiliation></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last><affiliation>University of Edinburgh</affiliation></author>
       <pages>10704-10716</pages>
       <abstract>Implicit discourse relation recognition is a challenging task that involves identifying the sense or senses that hold between two adjacent spans of text, in the absense of an explicit connective between them. In both PDTB-2 (prasad et al., 2008) and PDTB-3 (Webber et al., 2019), discourse relational senses are organized into a three-level hierarchy ranging from four broad top-level senses, to more specific senses below them. Most previous work on implicitf discourse relation recognition have used the sense hierarchy simply to indicate what sense labels were available. Here we do more — incorporating the sense hierarchy into the recognition process itself and using it to select the negative examples used in contrastive learning. With no additional effort, the approach achieves state-of-the-art performance on the task. Our code is released inhttps://github.com/wanqiulong 0923/Contrastive_IDRR.</abstract>
       <url hash="38ecda26">2022.emnlp-main.734</url>
@@ -10084,7 +10084,7 @@
       <author><first>Zhaoran</first><last>Liu</last><affiliation>Zhejiang University</affiliation></author>
       <author><first>Guilin</first><last>Qi</last><affiliation>Southeast University</affiliation></author>
       <author><first>Yuan-Fang</first><last>Li</last><affiliation>Monash University</affiliation></author>
-      <author><first>Gholamreza</first><last>Haffari</last><affiliation>Monash University</affiliation></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last><affiliation>Monash University</affiliation></author>
       <pages>10751-10762</pages>
       <abstract>Relation extraction typically aims to extract semantic relationships between entities from the unstructured text.One of the most essential data sources for relation extraction is the spoken language, such as interviews and dialogues.However, the error propagation introduced in automatic speech recognition (ASR) has been ignored in relation extraction, and the end-to-end speech-based relation extraction method has been rarely explored.In this paper, we propose a new listening information extraction task, i.e., speech relation extraction.We construct the training dataset for speech relation extraction via text-to-speech systems, and we construct the testing dataset via crowd-sourcing with native English speakers.We explore speech relation extraction via two approaches: the pipeline approach conducting text-based extraction with a pretrained ASR module, and the end2end approach via a new proposed encoder-decoder model, or what we called SpeechRE.We conduct comprehensive experiments to distinguish the challenges in speech relation extraction, which may shed light on future explorations. We share the code and data on https://github.com/wutong8023/SpeechRE.</abstract>
       <url hash="ae96517d">2022.emnlp-main.738</url>
@@ -10095,7 +10095,7 @@
       <title>Structural Constraints and Natural Language Inference for End-to-End Flowchart Grounded Dialog Response Generation</title>
       <author><first>Dinesh</first><last>Raghu</last><affiliation>IBM Research</affiliation></author>
       <author><first>Suraj</first><last>Joshi</last><affiliation>Indian Institute of Technology, Delhi</affiliation></author>
-      <author><first>Sachindra</first><last>Joshi</last><affiliation>IBM</affiliation></author>
+      <author id="sachindra-joshi"><first>Sachindra</first><last>Joshi</last><affiliation>IBM</affiliation></author>
       <author><first>Mausam</first><last>-</last><affiliation>Indian Institute of Technology, Delhi</affiliation></author>
       <pages>10763-10774</pages>
       <abstract>Flowchart grounded dialog systems converse with users by following a given flowchart and a corpus of FAQs. The existing state-of-the-art approach (Raghu et al, 2021) for learning such a dialog system, named FLONET, has two main limitations. (1) It uses a Retrieval Augmented Generation (RAG) framework which represents a flowchart as a bag of nodes. By doing so, it loses the connectivity structure between nodes that can aid in better response generation. (2) Typically dialogs progress with the agent asking polar (Y/N) questions, but users often respond indirectly without the explicit use of polar words. In such cases, it fails to understand the correct polarity of the answer. To overcome these issues, we propose Structure-Aware FLONET (SA-FLONET) which infuses structural constraints derived from the connectivity structure of flowcharts into the RAG framework. It uses natural language inference to better predict the polarity of indirect Y/N answers. We find that SA-FLONET outperforms FLONET, with a success rate improvement of 68% and 123% in flowchart grounded response generation and zero-shot flowchart grounded response generation tasks respectively.</abstract>
@@ -10132,7 +10132,7 @@
       <author><first>Chi</first><last>Chen</last><affiliation>Tsinghua University</affiliation></author>
       <author><first>Peng</first><last>Li</last><affiliation>Institute for AI Industry Research (AIR), Tsinghua University, China</affiliation></author>
       <author><first>Maosong</first><last>Sun</last><affiliation>Tsinghua University</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>Tsinghua University</affiliation></author>
+      <author><first>Yang</first><last>Liu</last><affiliation>Tsinghua University</affiliation></author>
       <pages>10799-10810</pages>
       <abstract>Recently there has been an emerging interest in unsupervised vision-and-language pre-training (VLP) that learns multimodal representations without parallel image-caption data. These pioneering works significantly reduce the cost of VLP on data collection and achieve promising results compared to supervised VLP. However, existing unsupervised VLP methods take as input pre-extracted region-based visual features from external object detectors, which both limits flexibility and reduces computational efficiency. In this paper, we explore end-to-end unsupervised VLP with a vision encoder to directly encode images. The vision encoder is pre-trained on image-only data and jointly optimized during multimodal pre-training. To further enhance the learned cross-modal features, we propose a novel pre-training task that predicts which patches contain an object referred to in natural language from the encoded visual features. Extensive experiments on four vision-and-language tasks show that our approach outperforms previous unsupervised VLP methods and obtains new state-of-the-art results.</abstract>
       <url hash="a6fb0b35">2022.emnlp-main.742</url>
@@ -10159,7 +10159,7 @@
       <author><first>Jaimeen</first><last>Ahn</last><affiliation>Independent Researcher</affiliation></author>
       <author><first>Jihyung</first><last>Moon</last><affiliation>SoftlyAI</affiliation></author>
       <author><first>Sungjoon</first><last>Park</last><affiliation>SoftlyAI</affiliation></author>
-      <author><first>Alice</first><last>Oh</last><affiliation>KAIST</affiliation></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last><affiliation>KAIST</affiliation></author>
       <pages>10818-10833</pages>
       <abstract>Recent directions for offensive language detection are hierarchical modeling, identifying the type and the target of offensive language, and interpretability with offensive span annotation and prediction. These improvements are focused on English and do not transfer well to other languages because of cultural and linguistic differences. In this paper, we present the Korean Offensive Language Dataset (KOLD) comprising 40,429 comments, which are annotated hierarchically with the type and the target of offensive language, accompanied by annotations of the corresponding text spans. We collect the comments from NAVER news and YouTube platform and provide the titles of the articles and videos as the context information for the annotation process. We use these annotated comments as training data for Korean BERT and RoBERTa models and find that they are effective at offensiveness detection, target classification, and target span detection while having room for improvement for target group classification and offensive span detection. We discover that the target group distribution differs drastically from the existing English datasets, and observe that providing the context information improves the model performance in offensiveness detection (+0.3), target classification (+1.5), and target group classification (+13.1). We publicly release the dataset and baseline models.</abstract>
       <url hash="22ea4287">2022.emnlp-main.744</url>
@@ -10188,7 +10188,7 @@
       <author><first>Leonie</first><last>Weissweiler</last><affiliation>CIS, LMU Munich</affiliation></author>
       <author><first>Valentin</first><last>Hofmann</last><affiliation>University of Oxford</affiliation></author>
       <author><first>Abdullatif</first><last>Köksal</last><affiliation>LMU Munich</affiliation></author>
-      <author><first>Hinrich</first><last>Schütze</last><affiliation>Center for Information and Language Processing, University of Munich</affiliation></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last><affiliation>Center for Information and Language Processing, University of Munich</affiliation></author>
       <pages>10859-10882</pages>
       <abstract>Construction Grammar (CxG) is a paradigm from cognitive linguistics emphasising the connection between syntax and semantics. Rather than rules that operate on lexical items, it posits constructions as the central building blocks of language, i.e., linguistic units of different granularity that combine syntax and semantics. As a first step towards assessing the compatibility of CxG with the syntactic and semantic knowledge demonstrated by state-of-the-art pretrained language models (PLMs), we present an investigation of their capability to classify and understand one of the most commonly studied constructions, the English comparative correlative (CC). We conduct experiments examining the classification accuracy of a syntactic probe on the one hand and the models’ behaviour in a semantic application task on the other, with BERT, RoBERTa, and DeBERTa as the example PLMs. Our results show that all three investigated PLMs are able to recognise the structure of the CC but fail to use its meaning. While human-like performance of PLMs on many NLP tasks has been alleged, this indicates that PLMs still suffer from substantial shortcomings in central domains of linguistic knowledge.</abstract>
       <url hash="ac7ead29">2022.emnlp-main.746</url>
@@ -10202,7 +10202,7 @@
       <author><first>Qi</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Xin</first><last>Zhou</last><affiliation>Fudan University</affiliation></author>
       <author><first>Tao</first><last>Gui</last><affiliation>fudan university</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>10883-10892</pages>
       <abstract>Proof generation focuses on deductive reasoning: given a hypothesis and a set of theories, including some supporting facts and logical rules expressed in natural language, the model generates a proof tree indicating how to deduce the hypothesis from given theories.Current models with state-of-the-art performance employ the stepwise method that adds an individual node to the proof step-by-step.However, these methods actually focus on generating several proof paths rather than a whole tree.During generation, they focus on the most relevant areas of the currently generated node while neglecting the rest of the proof tree. To address this problem, we propose ProofInfer, which generates the proof tree via iterative hierarchical inference.At each step, ProofInfer adds the entire layer to the proof, where all nodes in this layer are generated simultaneously. Since the conventional autoregressive generation architecture cannot simultaneously predict multiple nodes, ProofInfer employs text-to-text paradigm.To this end, we propose a divide-and-conquer algorithm to encode the proof tree as the plain text without losing structure information.Experimental results show that ProofInfer significantly improves performance on several widely-used datasets.In addition, ProofInfer still performs well with data-limited, achieving comparable performance to the state-of-the-art model with about 40% of the training data.</abstract>
       <url hash="3e584b83">2022.emnlp-main.747</url>
@@ -10220,7 +10220,7 @@
       <author><first>Shivani</first><last>Shrivastava</last><affiliation>Goldman Sachs</affiliation></author>
       <author><first>Koustuv</first><last>Dasgupta</last><affiliation>Goldman Sachs</affiliation></author>
       <author><first>Niloy</first><last>Ganguly</last><affiliation>IIT kharagpur</affiliation></author>
-      <author id="saptarshi-ghosh"><first>Saptarshi</first><last>Ghosh</last><affiliation>IIT Kharagpur</affiliation></author>
+      <author><first>Saptarshi</first><last>Ghosh</last><affiliation>IIT Kharagpur</affiliation></author>
       <author><first>Pawan</first><last>Goyal</last><affiliation>IIT Kharagpur</affiliation></author>
       <pages>10893-10906</pages>
       <abstract>Despite tremendous progress in automatic summarization, state-of-the-art methods are predominantly trained to excel in summarizing short newswire articles, or documents with strong layout biases such as scientific articles or government reports. Efficient techniques to summarize financial documents, discussing facts and figures, have largely been unexplored, majorly due to the unavailability of suitable datasets. In this work, we present ECTSum, a new dataset with transcripts of earnings calls (ECTs), hosted by publicly traded companies, as documents, and experts-written short telegram-style bullet point summaries derived from corresponding Reuters articles. ECTs are long unstructured documents without any prescribed length limit or format. We benchmark our dataset with state-of-the-art summarization methods across various metrics evaluating the content quality and factual consistency of the generated summaries. Finally, we present a simple yet effective approach, ECT-BPS, to generate a set of bullet points that precisely capture the important facts discussed in the calls.</abstract>
@@ -10330,7 +10330,7 @@
       <title>Semantic Simplification for Sentiment Classification</title>
       <author><first>Xiaotong</first><last>Jiang</last><affiliation>Soochow University</affiliation></author>
       <author><first>Zhongqing</first><last>Wang</last><affiliation>Soochow University</affiliation></author>
-      <author><first>Guodong</first><last>Zhou</last><affiliation>Soochow University</affiliation></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last><affiliation>Soochow University</affiliation></author>
       <pages>11022-11032</pages>
       <abstract>Recent work on document-level sentiment classification has shown that the sentiment in the original text is often hard to capture, since the sentiment is usually either expressed implicitly or shifted due to the occurrences of negation and rhetorical words. To this end, we enhance the original text with a sentiment-driven simplified clause to intensify its sentiment. The simplified clause shares the same opinion with the original text but expresses the opinion much more simply. Meanwhile, we employ Abstract Meaning Representation (AMR) for generating simplified clauses, since AMR explicitly provides core semantic knowledge, and potentially offers core concepts and explicit structures of original texts. Empirical studies show the effectiveness of our proposed model over several strong baselines. The results also indicate the importance of simplified clauses for sentiment classification.</abstract>
       <url hash="f8080267">2022.emnlp-main.757</url>
@@ -10341,7 +10341,7 @@
     <paper id="758">
       <title><fixed-case>XP</fixed-case>rompt: Exploring the Extreme of Prompt Tuning</title>
       <author><first>Fang</first><last>Ma</last><affiliation>Beijing Institute of Technology</affiliation></author>
-      <author id="chen-zhang"><first>Chen</first><last>Zhang</last><affiliation>Beijing Institute of Technology</affiliation></author>
+      <author><first>Chen</first><last>Zhang</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <author><first>Lei</first><last>Ren</last><affiliation>Meituan-Dianping Group</affiliation></author>
       <author><first>Jingang</first><last>Wang</last><affiliation>Meituan</affiliation></author>
       <author><first>Qifan</first><last>Wang</last><affiliation>Meta AI</affiliation></author>
@@ -10362,7 +10362,7 @@
       <author><first>Mikel</first><last>Artetxe</last><affiliation>Meta AI</affiliation></author>
       <author><first>Mike</first><last>Lewis</last><affiliation>Facebook AI Research</affiliation></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last><affiliation>University of Washington</affiliation></author>
-      <author><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington; Meta</affiliation></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington; Meta</affiliation></author>
       <pages>11048-11064</pages>
       <abstract>Large language models (LMs) are able to in-context learn—perform a new task via inference alone by conditioning on a few input-label pairs (demonstrations) and making predictions for new inputs. However, there has been little understanding of how the model learns and which aspects of the demonstrations contribute to end task performance. In this paper, we show that ground truth demonstrations are in fact not required—randomly replacing labels in the demonstrations barely hurts performance on a range of classification and multi-choce tasks, consistently over 12 different models including GPT-3. Instead, we find that other aspects of the demonstrations are the key drivers of endtask performance, including the fact that they provide a few examples of (1) the label space, (2) the distribution of the input text, and (3) the overall format of the sequence. Together, our analysis provides a new way of understanding how and why in-context learning works, while opening up new questions about how much can be learned from large language models through inference alone.</abstract>
       <url hash="0087a7b6">2022.emnlp-main.759</url>
@@ -10416,7 +10416,7 @@
       <author><first>Ao</first><last>Zhang</last><affiliation>NUS</affiliation></author>
       <author><first>Wei</first><last>Ji</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Zhiyuan</first><last>Liu</last><affiliation>Tsinghua University</affiliation></author>
-      <author><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Maosong</first><last>Sun</last><affiliation>Tsinghua University</affiliation></author>
       <pages>11104-11117</pages>
       <abstract>Vision-language pre-training (VLP) has shown impressive performance on a wide range of cross-modal tasks, where VLP models without reliance on object detectors are becoming the mainstream due to their superior computation efficiency and competitive performance. However, the removal of object detectors also deprives the capability of VLP models in explicit object modeling, which is essential to various position-sensitive vision-language (VL) tasks, such as referring expression comprehension and visual commonsense reasoning. To address the challenge, we introduce PEVL that enhances the pre-training and prompt tuning of VLP models with explicit object position modeling. Specifically, PEVL reformulates discretized object positions and language in a unified language modeling framework, which facilitates explicit VL alignment during pre-training, and also enables flexible prompt tuning for various downstream tasks. We show that PEVL enables state-of-the-art performance of detector-free VLP models on position-sensitive tasks such as referring expression comprehension and phrase grounding, and also improves the performance on position-insensitive tasks with grounded inputs. We make the data and code for this paper publicly available at https://github.com/thunlp/PEVL.</abstract>
@@ -10430,8 +10430,8 @@
       <author><first>Shaobo</first><last>Li</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <author><first>Xiaoguang</first><last>Li</last><affiliation>Huawei Noah’s Ark Lab</affiliation></author>
       <author><first>Lifeng</first><last>Shang</last><affiliation>Noah’s Ark Lab Huawei Technologies Co. Ltd. Sha Tin, Hong Kong</affiliation></author>
-      <author><first>Chengjie</first><last>Sun</last><affiliation>Harbin Institute of Technology</affiliation></author>
-      <author><first>Bingquan</first><last>Liu</last><affiliation>Harbin Institute of Technology</affiliation></author>
+      <author id="cheng-jie-sun"><first>Chengjie</first><last>Sun</last><affiliation>Harbin Institute of Technology</affiliation></author>
+      <author id="bingquan-liu"><first>Bingquan</first><last>Liu</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <author><first>Zhenzhou</first><last>Ji</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <author><first>Xin</first><last>Jiang</last><affiliation>Huawei Noah’s Ark Lab</affiliation></author>
       <author><first>Qun</first><last>Liu</last><affiliation>Huawei Noah’s Ark Lab</affiliation></author>
@@ -10584,7 +10584,7 @@
     <paper id="775">
       <title><fixed-case>S</fixed-case>cience<fixed-case>W</fixed-case>orld: Is your Agent Smarter than a 5th Grader?</title>
       <author><first>Ruoyao</first><last>Wang</last><affiliation>University of Arizona</affiliation></author>
-      <author><first>Peter</first><last>Jansen</last><affiliation>University of Arizona</affiliation></author>
+      <author id="peter-jansen"><first>Peter</first><last>Jansen</last><affiliation>University of Arizona</affiliation></author>
       <author><first>Marc-Alexandre</first><last>Côté</last><affiliation>Microsoft Research</affiliation></author>
       <author><first>Prithviraj</first><last>Ammanabrolu</last><affiliation>Allen Institute for AI</affiliation></author>
       <pages>11279-11298</pages>
@@ -10633,8 +10633,8 @@
     <paper id="779">
       <title>Balancing out Bias: Achieving Fairness Through Balanced Training</title>
       <author><first>Xudong</first><last>Han</last><affiliation>The university of Melbourne</affiliation></author>
-      <author><first>Timothy</first><last>Baldwin</last><affiliation>The University of Melbourne</affiliation></author>
-      <author><first>Trevor</first><last>Cohn</last><affiliation>University of Melbourne</affiliation></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last><affiliation>The University of Melbourne</affiliation></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last><affiliation>University of Melbourne</affiliation></author>
       <pages>11335-11350</pages>
       <abstract>Group bias in natural language processing tasks manifests as disparities in system error rates across texts authorized by different demographic groups, typically disadvantaging minority groups. Dataset balancing has been shown to be effective at mitigating bias, however existing approaches do not directly account for correlations between author demographics and linguistic variables, limiting their effectiveness. To achieve Equal Opportunity fairness, such as equal job opportunity without regard to demographics, this paper introduces a simple, but highly effective, objective for countering bias using balanced training.We extend the method in the form of a gated model, which incorporates protected attributes as input, and show that it is effective at reducing bias in predictions through demographic input perturbation, outperforming all other bias mitigation techniques when combined with balanced training.</abstract>
       <url hash="f7a043a1">2022.emnlp-main.779</url>
@@ -10661,7 +10661,7 @@
     <paper id="781">
       <title>Identifying Physical Object Use in Sentences</title>
       <author><first>Tianyu</first><last>Jiang</last><affiliation>University of Utah</affiliation></author>
-      <author><first>Ellen</first><last>Riloff</last><affiliation>University of Utah</affiliation></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last><affiliation>University of Utah</affiliation></author>
       <pages>11362-11372</pages>
       <abstract>Commonsense knowledge about the typicalfunctions of physical objects allows people tomake inferences during sentence understanding.For example, we infer that “Sam enjoyedthe book” means that Sam enjoyed reading thebook, even though the action is implicit. Priorresearch has focused on learning the prototypicalfunctions of physical objects in order toenable inferences about implicit actions. Butmany sentences refer to objects even when theyare not used (e.g., “The book fell”). We arguethat NLP systems need to recognize whether anobject is being used before inferring how theobject is used. We define a new task called ObjectUse Classification that determines whethera physical object mentioned in a sentence wasused or likely will be used. We introduce a newdataset for this task and present a classificationmodel that exploits data augmentation methodsand FrameNet when fine-tuning a pre-trainedlanguage model. We also show that object useclassification combined with knowledge aboutthe prototypical functions of objects has thepotential to yield very good inferences aboutimplicit and anticipated actions.</abstract>
       <url hash="5be2c78e">2022.emnlp-main.781</url>
@@ -10718,10 +10718,10 @@
       <title>Improving Tokenisation by Alternative Treatment of Spaces</title>
       <author><first>Edward</first><last>Gow-Smith</last><affiliation>University of Sheffield</affiliation></author>
       <author><first>Harish</first><last>Tayyar Madabushi</last><affiliation>University of Bath</affiliation></author>
-      <author><first>Carolina</first><last>Scarton</last><affiliation>University of Sheffield</affiliation></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last><affiliation>University of Sheffield</affiliation></author>
       <author><first>Aline</first><last>Villavicencio</last><affiliation>University of Sheffield, UK</affiliation></author>
       <pages>11430-11443</pages>
-      <abstract/>
+      <abstract></abstract>
       <url hash="2ac546e0">2022.emnlp-main.786</url>
       <bibkey>gow-smith-etal-2022-improving</bibkey>
       <doi>10.18653/v1/2022.emnlp-main.786</doi>
@@ -10734,8 +10734,8 @@
       <author><first>Nicholas</first><last>Lourie</last><affiliation>New York University</affiliation></author>
       <author><first>Jungo</first><last>Kasai</last><affiliation>University of Washington</affiliation></author>
       <author><first>Yejin</first><last>Choi</last><affiliation>University of Washington</affiliation></author>
-      <author><first>Noah A.</first><last>Smith</last><affiliation>University of Washington</affiliation></author>
-      <author><first>Daniel</first><last>Weld</last><affiliation>University of Washington &amp; Allen Institute for Artificial Inelligence</affiliation></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last><affiliation>University of Washington</affiliation></author>
+      <author id="daniel-s-weld"><first>Daniel</first><last>Weld</last><affiliation>University of Washington &amp; Allen Institute for Artificial Inelligence</affiliation></author>
       <pages>11444-11458</pages>
       <abstract>While often assumed a gold standard, effective human evaluation of text generation remains an important, open area for research.We revisit this problem with a focus on producing consistent evaluations that are reproducible—over time and across different populations. We study this goal in different stages of the human evaluation pipeline. In particular, we consider design choices for the annotation interface used to elicit human judgments and their impact on reproducibility. Furthermore, we develop an automated mechanism for maintaining annotator quality via a probabilistic model that detects and excludes noisy annotators. Putting these lessons together, we introduce GENIE: a system for running standardized human evaluations across different generation tasks.We instantiate GENIE with datasets representing four core challenges in text generation: machine translation, summarization, commonsense reasoning, and machine comprehension.For each task, GENIE offers a leaderboard that automatically crowdsources annotations for submissions, evaluating them along axes such as correctness, conciseness, and fluency.We have made the GENIE leaderboards publicly available, and have already ranked 50 submissions from 10 different research groups. We hope GENIE encourages further progress toward effective, standardized evaluations for text generation.</abstract>
       <url hash="92ffa770">2022.emnlp-main.787</url>
@@ -10763,7 +10763,7 @@
       <author><first>Sam</first><last>Thomson</last><affiliation>Microsoft Semantic Machines</affiliation></author>
       <author><first>Hao</first><last>Fang</last><affiliation>Microsoft Semantic Machines</affiliation></author>
       <author><first>Benjamin</first><last>Van Durme</last><affiliation>Johns Hopkins University / Microsoft</affiliation></author>
-      <author><first>Jason</first><last>Eisner</last><affiliation>Johns Hopkins University</affiliation></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last><affiliation>Johns Hopkins University</affiliation></author>
       <author><first>Yu</first><last>Su</last><affiliation>The Ohio State University</affiliation></author>
       <pages>11473-11487</pages>
       <abstract>In natural language understanding (NLU) production systems, users’ evolving needs necessitate the addition of new features over time, indexed by new symbols added to the meaning representation space. This requires additional training data and results in ever-growing datasets. We present the first systematic investigation into this incremental symbol learning scenario. Our analysis reveals a troubling quirk in building broad-coverage NLU systems: as the training dataset grows, performance on a small set of new symbols often decreases. We show that this trend holds for multiple mainstream models on two common NLU tasks: intent recognition and semantic parsing. Rejecting class imbalance as the sole culprit, we reveal that the trend is closely associated with an effect we call source signal dilution, where strong lexical cues for the new symbol become diluted as the training dataset grows. Selectively dropping training examples to prevent dilution often reverses the trend, showing the over-reliance of mainstream neural NLU models on simple lexical cues.</abstract>
@@ -10855,7 +10855,7 @@
       <author><first>Hao</first><last>Sun</last><affiliation>Tsinghua University</affiliation></author>
       <author><first>Chujie</first><last>Zheng</last><affiliation>Tsinghua University</affiliation></author>
       <author><first>Fei</first><last>Mi</last><affiliation>Huawei</affiliation></author>
-      <author><first>Helen</first><last>Meng</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
+      <author id="helen-meng"><first>Helen</first><last>Meng</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <author><first>Minlie</first><last>Huang</last><affiliation>Tsinghua University</affiliation></author>
       <pages>11580-11599</pages>
       <abstract>Offensive language detection is increasingly crucial for maintaining a civilized social media platform and deploying pre-trained language models. However, this task in Chinese is still under exploration due to the scarcity of reliable datasets. To this end, we propose a benchmark –COLD for Chinese offensive language analysis, including a Chinese Offensive Language Dataset –COLDATASET and a baseline detector –COLDETECTOR which is trained on the dataset. We show that the COLD benchmark contributes to Chinese offensive language detection which is challenging for existing resources. We then deploy the COLDETECTOR and conduct detailed analyses on popular Chinese pre-trained language models. We first analyze the offensiveness of existing generative models and show that these models inevitably expose varying degrees of offensive issues. Furthermore, we investigate the factors that influence the offensive generations, and we find that anti-bias contents and keywords referring to certain groups or revealing negative attitudes trigger offensive outputs easier.</abstract>
@@ -10866,9 +10866,9 @@
     <paper id="797">
       <title>Fixing Model Bugs with Natural Language Patches</title>
       <author><first>Shikhar</first><last>Murty</last><affiliation>Stanford University</affiliation></author>
-      <author><first>Christopher</first><last>Manning</last><affiliation>Stanford University</affiliation></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last><affiliation>Stanford University</affiliation></author>
       <author><first>Scott</first><last>Lundberg</last><affiliation>Microsoft Research</affiliation></author>
-      <author><first>Marco Tulio</first><last>Ribeiro</last><affiliation>Microsoft Research</affiliation></author>
+      <author id="marco-tulio-ribeiro"><first>Marco Tulio</first><last>Ribeiro</last><affiliation>Microsoft Research</affiliation></author>
       <pages>11600-11613</pages>
       <abstract>Current approaches for fixing systematic problems in NLP models (e.g., regex patches, finetuning on more data) are either brittle, or labor-intensive and liable to shortcuts. In contrast, humans often provide corrections to each other through natural language. Taking inspiration from this, we explore natural language patches—declarative statements that allow developers to provide corrective feedback at the right level of abstraction, either overriding the model (“if a review gives 2 stars, the sentiment is negative”) or providing additional information the model may lack (“if something is described as the bomb, then it is good”). We model the task of determining if a patch applies separately from the task of integrating patch information, and show that with a small amount of synthetic data, we can teach models to effectively use real patches on real data—1 to 7 patches improve accuracy by ~1–4 accuracy points on different slices of a sentiment analysis dataset, and F1 by 7 points on a relation extraction dataset. Finally, we show that finetuning on as many as 100 labeled examples may be needed to match the performance of a small set of language patches.</abstract>
       <url hash="3b3432ea">2022.emnlp-main.797</url>
@@ -10968,7 +10968,7 @@
       <author><first>Todor</first><last>Mihaylov</last><affiliation>Meta AI</affiliation></author>
       <author><first>Myle</first><last>Ott</last><affiliation>Facebook AI Research</affiliation></author>
       <author><first>Sam</first><last>Shleifer</last><affiliation>Hugging Face</affiliation></author>
-      <author><first>Xi Victoria</first><last>Lin</last><affiliation>Meta AI</affiliation></author>
+      <author id="xi-victoria-lin"><first>Xi Victoria</first><last>Lin</last><affiliation>Meta AI</affiliation></author>
       <author><first>Jingfei</first><last>Du</last><affiliation>Facebook</affiliation></author>
       <author><first>Srinivasan</first><last>Iyer</last><affiliation>Facebook</affiliation></author>
       <author><first>Ramakanth</first><last>Pasunuru</last><affiliation>Meta</affiliation></author>
@@ -10982,8 +10982,8 @@
       <author><first>Punit Singh</first><last>Koura</last><affiliation>Facebook Inc.</affiliation></author>
       <author><first>Brian</first><last>O’Horo</last><affiliation>Meta AI</affiliation></author>
       <author><first>Jeffrey</first><last>Wang</last><affiliation>Meta AI</affiliation></author>
-      <author><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington; Meta</affiliation></author>
-      <author><first>Mona</first><last>Diab</last><affiliation>Meta Responsible AI</affiliation></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington; Meta</affiliation></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last><affiliation>Meta Responsible AI</affiliation></author>
       <author><first>Zornitsa</first><last>Kozareva</last><affiliation>Meta AI</affiliation></author>
       <author><first>Veselin</first><last>Stoyanov</last><affiliation>Facebook</affiliation></author>
       <pages>11699-11732</pages>
@@ -11048,7 +11048,7 @@
       <author><first>Paula</first><last>Fortuna</last><affiliation>TALN, Pompeu Fabra University</affiliation></author>
       <author><first>Monica</first><last>Dominguez</last><affiliation>Universitat Pompeu Fabra</affiliation></author>
       <author><first>Leo</first><last>Wanner</last><affiliation>ICREA and Pompeu Fabra University</affiliation></author>
-      <author><first>Zeerak</first><last>Talat</last><affiliation>Simon Fraser University</affiliation></author>
+      <author id="zeerak-talat"><first>Zeerak</first><last>Talat</last><affiliation>Simon Fraser University</affiliation></author>
       <pages>11794-11805</pages>
       <abstract>Addressing hate speech in online spaces has been conceptualized as a classification task that uses Natural Language Processing (NLP) techniques. Through this conceptualization, the hate speech detection task has relied on common conventions and practices from NLP. For instance, inter-annotator agreement is conceptualized as a way to measure dataset quality and certain metrics and benchmarks are used to assure model generalization. However, hate speech is a deeply complex and situated concept that eludes such static and disembodied practices. In this position paper, we critically reflect on these methodologies for hate speech detection, we argue that many conventions in NLP are poorly suited for the problem and encourage researchers to develop methods that are more appropriate for the task.</abstract>
       <url hash="f92f8994">2022.emnlp-main.809</url>
@@ -11072,11 +11072,11 @@
     <paper id="811">
       <title><fixed-case>O</fixed-case>pen<fixed-case>CQA</fixed-case>: Open-ended Question Answering with Charts</title>
       <author><first>Shankar</first><last>Kantharaj</last><affiliation>York University</affiliation></author>
-      <author><first>Xuan Long</first><last>Do</last><affiliation>Nanyang Technological University</affiliation></author>
+      <author id="xuan-long-do"><first>Xuan Long</first><last>Do</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Rixie Tiffany</first><last>Leong</last><affiliation>Nanyang Technological University, Singapore</affiliation></author>
       <author><first>Jia Qing</first><last>Tan</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Enamul</first><last>Hoque</last><affiliation>York University</affiliation></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University; Salesforce AI Research</affiliation></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University; Salesforce AI Research</affiliation></author>
       <pages>11817-11837</pages>
       <abstract>Charts are very popular to analyze data and convey important insights. People often analyze visualizations to answer open-ended questions that require explanatory answers. Answering such questions are often difficult and time-consuming as it requires a lot of cognitive and perceptual efforts. To address this challenge, we introduce a new task called OpenCQA, where the goal is to answer an open-ended question about a chart with descriptive texts. We present the annotation process and an in-depth analysis of our dataset. We implement and evaluate a set of baselines under three practical settings. In the first setting, a chart and the accompanying article is provided as input to the model. The second setting provides only the relevant paragraph(s) to the chart instead of the entire article, whereas the third setting requires the model to generate an answer solely based on the chart. Our analysis of the results show that the top performing models generally produce fluent and coherent text while they struggle to perform complex logical and arithmetic reasoning.</abstract>
       <url hash="9d335f0b">2022.emnlp-main.811</url>
@@ -11090,7 +11090,7 @@
       <author><first>Adhiguna</first><last>Kuncoro</last><affiliation>University of Oxford and DeepMind</affiliation></author>
       <author><first>Jordan</first><last>Hoffmann</last><affiliation>DeepMind</affiliation></author>
       <author><first>Cyprien</first><last>de Masson d’Autume</last><affiliation>DeepMind</affiliation></author>
-      <author><first>Phil</first><last>Blunsom</last><affiliation>University of Oxford</affiliation></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last><affiliation>University of Oxford</affiliation></author>
       <author><first>Aida</first><last>Nematzadeh</last><affiliation>DeepMind</affiliation></author>
       <pages>11838-11855</pages>
       <abstract>Language models (LMs) trained on large amounts of data have shown impressive performance on many NLP tasks under the zero-shot and few-shot setup. Here we aim to better understand the extent to which such models learn commonsense knowledge — a critical component of many NLP applications. We conduct a systematic and rigorous zero-shot and few-shot commonsense evaluation of large pre-trained LMs, where we: (i) carefully control for the LMs’ ability to exploit potential surface cues and annotation artefacts, and (ii) account for variations in performance that arise from factors that are not related to commonsense knowledge. Our findings highlight the limitations of pre-trained LMs in acquiring commonsense knowledge without task-specific supervision; furthermore, using larger models or few-shot evaluation is insufficient to achieve human-level commonsense performance.</abstract>
@@ -11121,7 +11121,7 @@
       <author><first>Besnik</first><last>Fetahu</last><affiliation>Amazon</affiliation></author>
       <author><first>Jie</first><last>Zhao</last><affiliation>Amazon</affiliation></author>
       <author><first>Oleg</first><last>Rokhlenko</last><affiliation>Amazon Research</affiliation></author>
-      <author><first>Shervin</first><last>Malmasi</last><affiliation>Amazon</affiliation></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last><affiliation>Amazon</affiliation></author>
       <pages>11875-11886</pages>
       <abstract>Users expect their queries to be answered by search systems, regardless of the query’s surface form, which include keyword queries and natural questions. Natural Language Understanding (NLU) components of Search and QA systems may fail to correctly interpret semantically equivalent inputs if this deviates from how the system was trained, leading to suboptimal understanding capabilities. We propose the keyword-question rewriting task to improve query understanding capabilities of NLU systems for all surface forms. To achieve this, we present CycleKQR, an unsupervised approach, enabling effective rewriting between keyword and question queries using non-parallel data.Empirically we show the impact on QA performance of unfamiliar query forms for open domain and Knowledge Base QA systems (trained on either keywords or natural language questions). We demonstrate how CycleKQR significantly improves QA performance by rewriting queries into the appropriate form, while at the same time retaining the original semantic meaning of input queries, allowing CycleKQR to improve performance by up to 3% over supervised baselines. Finally, we release a datasetof 66k keyword-question pairs.</abstract>
       <url hash="dceec899">2022.emnlp-main.814</url>
@@ -11132,7 +11132,7 @@
       <title>Model Criticism for Long-Form Text Generation</title>
       <author><first>Yuntian</first><last>Deng</last><affiliation>Harvard University</affiliation></author>
       <author><first>Volodymyr</first><last>Kuleshov</last><affiliation>Cornell Tech</affiliation></author>
-      <author><first>Alexander</first><last>Rush</last><affiliation>Cornell University</affiliation></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last><affiliation>Cornell University</affiliation></author>
       <pages>11887-11912</pages>
       <abstract>Language models have demonstrated the ability to generate highly fluent text; however, it remains unclear whether their output retains coherent high-level structure (e.g., story progression). Here, we propose to apply a statistical tool, model criticism in latent space, to evaluate the high-level structure of the generated text. Model criticism compares the distributions between real and generated data in a latent space obtained according to an assumptive generative process. Different generative processes identify specific failure modes of the underlying model. We perform experiments on three representative aspects of high-level discourse—coherence, coreference, and topicality—and find that transformer-based language models are able to capture topical structures but have a harder time maintaining structural coherence or modeling coreference.</abstract>
       <url hash="93043fc8">2022.emnlp-main.815</url>
@@ -11184,7 +11184,7 @@
       <author><first>Nirali</first><last>Parekh</last><affiliation>Dwarkadas J. Sanghvi College of Engineering</affiliation></author>
       <author><first>Karan</first><last>Waghela</last><affiliation>Santa Clara University</affiliation></author>
       <author><first>Lynette</first><last>D’Mello</last><affiliation>Dwarkadas J. Sanghvi College of Engineering</affiliation></author>
-      <author><first>Zeerak</first><last>Talat</last><affiliation>Simon Fraser University</affiliation></author>
+      <author id="zeerak-talat"><first>Zeerak</first><last>Talat</last><affiliation>Simon Fraser University</affiliation></author>
       <pages>11951-11961</pages>
       <abstract>The use of emojis affords a visual modality to, often private, textual communication.The task of predicting emojis however provides a challenge for machine learning as emoji use tends to cluster into the frequently used and the rarely used emojis.Much of the machine learning research on emoji use has focused on high resource languages and has conceptualised the task of predicting emojis around traditional server-side machine learning approaches.However, traditional machine learning approaches for private communication can introduce privacy concerns, as these approaches require all data to be transmitted to a central storage.In this paper, we seek to address the dual concerns of emphasising high resource languages for emoji prediction and risking the privacy of people’s data.We introduce a new dataset of 118k tweets (augmented from 25k unique tweets) for emoji prediction in Hindi, and propose a modification to the federated learning algorithm, CausalFedGSD, which aims to strike a balance between model performance and user privacy. We show that our approach obtains comparative scores with more complex centralised models while reducing the amount of data required to optimise the models and minimising risks to user privacy.</abstract>
       <url hash="25467c02">2022.emnlp-main.819</url>
@@ -11223,7 +11223,7 @@
       <title>Improving Low-Resource Languages in Pre-Trained Multilingual Language Models</title>
       <author><first>Viktor</first><last>Hangya</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <author><first>Hossain Shaikh</first><last>Saadi</last><affiliation>Technical University of Munich</affiliation></author>
-      <author><first>Alexander</first><last>Fraser</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <pages>11993-12006</pages>
       <abstract>Pre-trained multilingual language models are the foundation of many NLP approaches, including cross-lingual transfer solutions. However, languages with small available monolingual corpora are often not well-supported by these models leading to poor performance. We propose an unsupervised approach to improve the cross-lingual representations of low-resource languages by bootstrapping word translation pairs from monolingual corpora and using them to improve language alignment in pre-trained language models. We perform experiments on nine languages, using contextual word retrieval and zero-shot named entity recognition to measure both intrinsic cross-lingual word representation quality and downstream task performance, showing improvements on both tasks. Our results show that it is possible to improve pre-trained multilingual language models by relying only on non-parallel resources.</abstract>
       <url hash="323d2baa">2022.emnlp-main.822</url>
@@ -11322,7 +11322,7 @@
   <volume id="tutorials" ingest-date="2022-12-12" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing: Tutorial Abstracts</booktitle>
-      <editor><first>Samhaa R.</first><last>El-Beltagy</last></editor>
+      <editor id="samhaa-r-el-beltagy"><first>Samhaa R.</first><last>El-Beltagy</last></editor>
       <editor><first>Xipeng</first><last>Qiu</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Abu Dubai, UAE</address>
@@ -11341,7 +11341,7 @@
       <author><first>Ishan</first><last>Jindal</last></author>
       <author><first>Yunyao</first><last>Li</last></author>
       <author><first>Tim</first><last>O’Gorman</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Nianwen</first><last>Xue</last></author>
       <pages>1-8</pages>
       <abstract>This tutorial reviews the design of common meaning representations, SoTA models for predicting meaning representations, and the applications of meaning representations in a wide range of downstream NLP tasks and real-world applications. Reporting by a diverse team of NLP researchers from academia and industry with extensive experience in designing, building and using meaning representations, our tutorial has three components: (1) an introduction to common meaning representations, including basic concepts and design challenges; (2) a review of SoTA methods on building models for meaning representations; and (3) an overview of applications of meaning representations in downstream NLP tasks and real-world applications. We will also present qualitative comparisons of common meaning representations and a quantitative study on how their differences impact model performance. Finally, we will share best practices in choosing the right meaning representation for downstream tasks.</abstract>
@@ -11362,7 +11362,7 @@
     </paper>
     <paper id="3">
       <title>Emergent Language-Based Coordination In Deep Multi-Agent Systems</title>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <author><first>Roberto</first><last>Dessi</last></author>
       <author><first>Angeliki</first><last>Lazaridou</last></author>
       <pages>11-16</pages>
@@ -11376,7 +11376,7 @@
       <title><fixed-case>C</fixed-case>ausal<fixed-case>NLP</fixed-case> Tutorial: An Introduction to Causality for Natural Language Processing</title>
       <author><first>Zhijing</first><last>Jin</last></author>
       <author><first>Amir</first><last>Feder</last></author>
-      <author id="kun-zhang"><first>Kun</first><last>Zhang</last></author>
+      <author><first>Kun</first><last>Zhang</last></author>
       <pages>17-22</pages>
       <abstract>Causal inference is becoming an increasingly important topic in deep learning, with the potential to help with critical deep learning problems such as model robustness, interpretability, and fairness. In addition, causality is naturally widely used in various disciplines of science, to discover causal relationships among variables and estimate causal effects of interest. In this tutorial, we introduce the fundamentals of causal discovery and causal effect estimation to the natural language processing (NLP) audience, provide an overview of causal perspectives to NLP problems, and aim to inspire novel approaches to NLP further. This tutorial is inclusive to a variety of audiences and is expected to facilitate the community’s developments in formulating and addressing new, important NLP problems in light of emerging causal principles and methodologies.</abstract>
       <url hash="ae117c13">2022.emnlp-tutorials.4</url>
@@ -11502,7 +11502,7 @@
       <author><first>Daniel</first><last>Loureiro</last><affiliation>Cardiff University</affiliation></author>
       <author><first>Dimosthenis</first><last>Antypas</last><affiliation>Cardiff University</affiliation></author>
       <author><first>Joanne</first><last>Boisson</last><affiliation>Cardiff University</affiliation></author>
-      <author><first>Luis</first><last>Espinosa Anke</last><affiliation>Cardiff University</affiliation></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa Anke</last><affiliation>Cardiff University</affiliation></author>
       <author><first>Fangyu</first><last>Liu</last><affiliation>University of Cambridge</affiliation></author>
       <author><first>Eugenio</first><last>Martínez Cámara</last><affiliation>University of Granada</affiliation></author>
       <pages>38-49</pages>
@@ -11530,8 +11530,8 @@
       <author><first>Aili</first><last>Shen</last><affiliation>Alexa AI, Amazon</affiliation></author>
       <author><first>Yitong</first><last>Li</last><affiliation>Huawei Technology Co. ltd</affiliation></author>
       <author><first>Lea</first><last>Frermann</last><affiliation>Melbourne University</affiliation></author>
-      <author><first>Timothy</first><last>Baldwin</last><affiliation>The University of Melbourne</affiliation></author>
-      <author><first>Trevor</first><last>Cohn</last><affiliation>University of Melbourne</affiliation></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last><affiliation>The University of Melbourne</affiliation></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last><affiliation>University of Melbourne</affiliation></author>
       <pages>60-71</pages>
       <abstract>This paper presents FairLib, an open-source python library for assessing and improving model fairness. It provides a systematic framework for quickly accessing benchmark datasets, reproducing existing debiasing baseline models, developing new methods, evaluating models with different metrics, and visualizing their results. Its modularity and extensibility enable the framework to be used for diverse types of inputs, including natural language, images, and audio. We implement 14 debiasing methods, including pre-processing,at-training-time, and post-processing approaches. The built-in metrics cover the most commonly acknowledged fairness criteria and can be further generalized and customized for fairness evaluation.</abstract>
       <url hash="5f4511f4">2022.emnlp-demos.7</url>
@@ -11636,7 +11636,7 @@
       <title><fixed-case>K</fixed-case>eyword<fixed-case>S</fixed-case>cape: Visual Document Exploration using Contextualized Keyword Embeddings</title>
       <author><first>Henrik</first><last>Voigt</last><affiliation>Friedrich-Schiller-University</affiliation></author>
       <author><first>Monique</first><last>Meuschke</last><affiliation>University of Magdeburg</affiliation></author>
-      <author><first>Sina</first><last>Zarrieß</last><affiliation>University of Bielefeld</affiliation></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last><affiliation>University of Bielefeld</affiliation></author>
       <author><first>Kai</first><last>Lawonn</last><affiliation>University of Jena</affiliation></author>
       <pages>137-147</pages>
       <abstract>Although contextualized word embeddings have led to great improvements in automatic language understanding, their potential for practical applications in document exploration and visualization has been little explored. Common visualization techniques used for, e.g., model analysis usually provide simple scatter plots of token-level embeddings that do not provide insight into their contextual use. In this work, we propose KeywordScape, a visual exploration tool that allows to overview, summarize, and explore the semantic content of documents based on their keywords. While existing keyword-based exploration tools assume that keywords have static meanings, our tool represents keywords in terms of their contextualized embeddings. Our application visualizes these embeddings in a semantic landscape that represents keywords as islands on a spherical map. This keeps keywords with similar context close to each other, allowing for a more precise search and comparison of documents.</abstract>
@@ -11675,7 +11675,7 @@
       <author><first>Zheng</first><last>Zhang</last></author>
       <author><first>Dakuo</first><last>Wang</last></author>
       <author><first>Lucy</first><last>Yip</last></author>
-      <author><first>Liat</first><last>Ein-Dor</last></author>
+      <author id="liat-ein-dor"><first>Liat</first><last>Ein-Dor</last></author>
       <author><first>Lena</first><last>Dankin</last></author>
       <author><first>Ilya</first><last>Shnayderman</last></author>
       <author><first>Ranit</first><last>Aharonov</last></author>
@@ -11710,7 +11710,7 @@
       <title><fixed-case>B</fixed-case>ot<fixed-case>SIM</fixed-case>: An End-to-End Bot Simulation Framework for Commercial Task-Oriented Dialog Systems</title>
       <author><first>Guangsen</first><last>Wang</last><affiliation>Salesforce Research Asia</affiliation></author>
       <author><first>Samson</first><last>Tan</last><affiliation>AWS AI Research &amp; Education</affiliation></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University; Salesforce AI Research</affiliation></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University; Salesforce AI Research</affiliation></author>
       <author><first>Gang</first><last>Wu</last><affiliation>Salesforce Research</affiliation></author>
       <author><first>Jimmy</first><last>Au</last><affiliation>Salesforce</affiliation></author>
       <author><first>Steven C.h.</first><last>Hoi</last><affiliation>Salesforce</affiliation></author>
@@ -11746,7 +11746,7 @@
       <author><first>Bailey</first><last>Kuehl</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Sophie</first><last>Johnson</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Jonathan</first><last>Borchardt</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
-      <author><first>Daniel</first><last>Weld</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
+      <author id="daniel-s-weld"><first>Daniel</first><last>Weld</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Tom</first><last>Hope</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Doug</first><last>Downey</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <pages>200-213</pages>
@@ -11812,7 +11812,7 @@
       <title><fixed-case>L</fixed-case>ogi<fixed-case>T</fixed-case>orch: A <fixed-case>P</fixed-case>y<fixed-case>T</fixed-case>orch-based library for logical reasoning on natural language</title>
       <author><first>Chadi</first><last>Helwe</last><affiliation>Telecom Paris, Institut Polytechnique de Paris</affiliation></author>
       <author><first>Chloé</first><last>Clavel</last><affiliation>LTCI, Telecom-Paris, Institut Polytechnique de Paris</affiliation></author>
-      <author><first>Fabian</first><last>Suchanek</last><affiliation>Telecom Paris</affiliation></author>
+      <author id="fabian-suchanek"><first>Fabian</first><last>Suchanek</last><affiliation>Telecom Paris</affiliation></author>
       <pages>250-257</pages>
       <abstract>Logical reasoning on natural language is one of the most challenging tasks for deep learning models. There has been an increasing interest in developing new benchmarks to evaluate the reasoning capabilities of language models such as BERT. In parallel, new models based on transformers have emerged to achieve ever better performance on these datasets. However, there is currently no library for logical reasoning that includes such benchmarks and models. This paper introduces LogiTorch, a PyTorch-based library that includes different logical reasoning benchmarks, different models, as well as utility functions such as co-reference resolution. This makes it easy to directly use the preprocessed datasets, to run the models, or to finetune them with different hyperparameters. LogiTorch is open source and can be found on GitHub.</abstract>
       <url hash="d772ab2f">2022.emnlp-demos.25</url>
@@ -11845,7 +11845,7 @@
       <author><first>Abhik</first><last>Bhattacharjee</last><affiliation>Bangladesh University of Engineering and Technology</affiliation></author>
       <author><first>Abinaya</first><last>Mahendiran</last><affiliation>Mphasis NEXT Labs</affiliation></author>
       <author><first>Alex</first><last>Wang</last><affiliation>New York University</affiliation></author>
-      <author><first>Alexandros</first><last>Papangelis</last><affiliation>Amazon Alexa AI</affiliation></author>
+      <author id="alexandros-papangelis"><first>Alexandros</first><last>Papangelis</last><affiliation>Amazon Alexa AI</affiliation></author>
       <author><first>Aman</first><last>Madaan</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Angelina</first><last>Mcmillan-major</last><affiliation>University of Washington</affiliation></author>
       <author><first>Anna</first><last>Shvets</last><affiliation>FabLab by Inetum</affiliation></author>
@@ -11859,15 +11859,15 @@
       <author><first>Cristina</first><last>Garbacea</last><affiliation>University of Michigan</affiliation></author>
       <author><first>Dakuo</first><last>Wang</last><affiliation>MIT-IBM Watson AI Lab / Northeastern University</affiliation></author>
       <author><first>Daniel</first><last>Deutsch</last><affiliation>University of Pennsylvania</affiliation></author>
-      <author><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
       <author><first>Di</first><last>Jin</last><affiliation>Amazon Alexa AI</affiliation></author>
       <author><first>Dimitra</first><last>Gkatzia</last><affiliation>Edinburgh Napier University</affiliation></author>
-      <author><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
       <author><first>Elizabeth</first><last>Clark</last><affiliation>Google Research</affiliation></author>
       <author><first>Esin</first><last>Durmus</last><affiliation>Stanford University</affiliation></author>
       <author><first>Faisal</first><last>Ladhak</last><affiliation>Columbia University</affiliation></author>
       <author><first>Filip</first><last>Ginter</last><affiliation>University of Turku</affiliation></author>
-      <author><first>Genta Indra</first><last>Winata</last><affiliation>The Hong Kong University of Science and Technology</affiliation></author>
+      <author id="genta-indra-winata"><first>Genta Indra</first><last>Winata</last><affiliation>The Hong Kong University of Science and Technology</affiliation></author>
       <author><first>Hendrik</first><last>Strobelt</last><affiliation>IBM Research / MIT-IBM Watson AI Lab</affiliation></author>
       <author><first>Hiroaki</first><last>Hayashi</last><affiliation>Carnegie Mellon University / Salesforce Research</affiliation></author>
       <author><first>Jekaterina</first><last>Novikova</last><affiliation>Winterlight Labs</affiliation></author>
@@ -11886,7 +11886,7 @@
       <author id="li-zhang-upenn"><first>Li</first><last>Zhang</last><affiliation>University of Pennsylvania</affiliation></author>
       <author><first>Mahim</first><last>Pushkarna</last><affiliation>Google Research</affiliation></author>
       <author><first>Mathias</first><last>Creutz</last><affiliation>University of Helsinki</affiliation></author>
-      <author><first>Michael</first><last>White</last><affiliation>The Ohio State University</affiliation></author>
+      <author id="michael-white"><first>Michael</first><last>White</last><affiliation>The Ohio State University</affiliation></author>
       <author><first>Mihir Sanjay</first><last>Kale</last><affiliation>Google Research</affiliation></author>
       <author><first>Moussa Kamal</first><last>Eddine</last><affiliation>École Polytechnique</affiliation></author>
       <author><first>Nico</first><last>Daheim</last><affiliation>RWTH Aachen University</affiliation></author>
@@ -11902,7 +11902,7 @@
       <author><first>Saad</first><last>Mahamood</last><affiliation>trivago N.V</affiliation></author>
       <author><first>Salomey</first><last>Osei</last><affiliation>Masakhane</affiliation></author>
       <author><first>Samuel</first><last>Cahyawijaya</last><affiliation>HKUST</affiliation></author>
-      <author><first>Sanja</first><last>Štajner</last><affiliation>Pompeu Fabra University</affiliation></author>
+      <author id="sanja-stajner"><first>Sanja</first><last>Štajner</last><affiliation>Pompeu Fabra University</affiliation></author>
       <author><first>Sebastien</first><last>Montella</last><affiliation>Orange Labs</affiliation></author>
       <author><first>Shailza</first><last>Jolly</last><affiliation>TU Kaiserslautern</affiliation></author>
       <author><first>Simon</first><last>Mille</last><affiliation>Pompeu Fabra University</affiliation></author>
@@ -11927,9 +11927,9 @@
     <paper id="28">
       <title><fixed-case>KGI</fixed-case>: An Integrated Framework for Knowledge Intensive Language Tasks</title>
       <author><first>Md Faisal Mahbub</first><last>Chowdhury</last><affiliation>IBM Research AI</affiliation></author>
-      <author><first>Michael</first><last>Glass</last><affiliation>Ibm</affiliation></author>
+      <author id="michael-glass"><first>Michael</first><last>Glass</last><affiliation>Ibm</affiliation></author>
       <author><first>Gaetano</first><last>Rossiello</last><affiliation>IBM Research AI</affiliation></author>
-      <author><first>Alfio</first><last>Gliozzo</last><affiliation>IBM Research AI</affiliation></author>
+      <author id="alfio-gliozzo"><first>Alfio</first><last>Gliozzo</last><affiliation>IBM Research AI</affiliation></author>
       <author><first>Nandana</first><last>Mihindukulasooriya</last><affiliation>IBM Research AI</affiliation></author>
       <pages>282-288</pages>
       <abstract>In this paper, we present a system to showcase the capabilities of the latest state-of-the-art retrieval augmented generation models trained on knowledge-intensive language tasks, such as slot filling, open domain question answering, dialogue, and fact-checking. Moreover, given a user query, we show how the output from these different models can be combined to cross-examine the outputs of each other. Particularly, we show how accuracy in dialogue can be improved using the question answering model. We are also releasing all models used in the demo as a contribution of this paper. A short video demonstrating the system is available at <url>https://ibm.box.com/v/emnlp2022-demos</url>.</abstract>
@@ -11971,7 +11971,7 @@
       <author><first>Fan</first><last>Bai</last><affiliation>Georgia Institute of Technology</affiliation></author>
       <author><first>Alan</first><last>Ritter</last><affiliation>Georgia Institute of Technology</affiliation></author>
       <author><first>Peter</first><last>Madrid</last><affiliation>Biosciences Division, SRI International</affiliation></author>
-      <author><first>Dayne</first><last>Freitag</last><affiliation>SRI International</affiliation></author>
+      <author id="dayne-freitag"><first>Dayne</first><last>Freitag</last><affiliation>SRI International</affiliation></author>
       <author><first>John</first><last>Niekrasz</last><affiliation/></author>
       <pages>311-318</pages>
       <abstract>In this paper we present SynKB, an open-source, automatically extracted knowledge base of chemical synthesis protocols. Similar to proprietary chemistry databases such as Reaxsys, SynKB allows chemists to retrieve structured knowledge about synthetic procedures. By taking advantage of recent advances in natural language processing for procedural texts, SynKB supports more flexible queries about reaction conditions, and thus has the potential to help chemists search the literature for conditions used in relevant reactions as they design new synthetic routes. Using customized Transformer models to automatically extract information from 6 million synthesis procedures described in U.S. and EU patents, we show that for many queries, SynKB has higher recall than Reaxsys, while maintaining high precision. We plan to make SynKB available as an open-source tool; in contrast, proprietary chemistry databases require costly subscriptions.</abstract>
@@ -12053,7 +12053,7 @@
     </paper>
     <paper id="37">
       <title>Hands-On Interactive Neuro-Symbolic <fixed-case>NLP</fixed-case> with <fixed-case>DR</fixed-case>ai<fixed-case>L</fixed-case></title>
-      <author><first>Maria Leonor</first><last>Pacheco</last><affiliation>University of Colorado Boulder / Microsoft Research</affiliation></author>
+      <author id="maria-leonor-pacheco"><first>Maria Leonor</first><last>Pacheco</last><affiliation>University of Colorado Boulder / Microsoft Research</affiliation></author>
       <author><first>Shamik</first><last>Roy</last><affiliation>Purdue University</affiliation></author>
       <author><first>Dan</first><last>Goldwasser</last><affiliation>Purdue University</affiliation></author>
       <pages>371-378</pages>
@@ -12137,8 +12137,8 @@
       <author><first>Yiwen</first><last>Hu</last><affiliation>Renmin University of China</affiliation></author>
       <author><first>Zhuohao</first><last>Yu</last><affiliation>Renmin University of China</affiliation></author>
       <author><first>Wenxun</first><last>Dai</last><affiliation>Xidian university</affiliation></author>
-      <author><first>Wayne Xin</first><last>Zhao</last><affiliation>Ruc</affiliation></author>
-      <author><first>Jian-yun</first><last>Nie</last><affiliation>University of Montreal</affiliation></author>
+      <author id="wayne-xin-zhao"><first>Wayne Xin</first><last>Zhao</last><affiliation>Ruc</affiliation></author>
+      <author id="jian-yun-nie"><first>Jian-yun</first><last>Nie</last><affiliation>University of Montreal</affiliation></author>
       <author><first>Ji-rong</first><last>Wen</last><affiliation>Renmin University of China</affiliation></author>
       <pages>435-444</pages>
       <abstract>To facilitate research on text generation, this paper presents a comprehensive and unified library, TextBox 2.0, focusing on the use of pre-trained language models (PLMs). To be comprehensive, our library covers 13 common text generation tasks and their corresponding 83 datasets and further incorporates 45 PLMs covering general, translation, Chinese, dialogue, controllable, distilled, prompting, and lightweight PLMs. We also implement 4 efficient training strategies and provide 4 generation objectives for pre-training new PLMs from scratch. To be unified, we design the interfaces to support the entire research pipeline (from data loading to training and evaluation), ensuring that each step can be fulfilled in a unified way. Despite the rich functionality, it is easy to use our library, either through the friendly Python API or command line. To validate the effectiveness of our library, we conduct extensive experiments and exemplify four types of research scenarios. The project is released at the link: <url>https://github.com/RUCAIBox/TextBox#2.0</url>.</abstract>
@@ -12181,7 +12181,7 @@
       <author><first>Xiaodi</first><last>Sun</last><affiliation>Microsoft</affiliation></author>
       <author><first>Sunny</first><last>Rajagopalan</last><affiliation>Google</affiliation></author>
       <author><first>Priyanka</first><last>Nigam</last><affiliation>Amazon</affiliation></author>
-      <author><first>Weiyi</first><last>Lu</last><affiliation>Amazon</affiliation></author>
+      <author id="weiyi-liu"><first>Weiyi</first><last>Lu</last><affiliation>Amazon</affiliation></author>
       <author><first>Yi</first><last>Xu</last><affiliation>Amazon</affiliation></author>
       <author><first>Iman</first><last>Keivanloo</last><affiliation>Amazon</affiliation></author>
       <author><first>Belinda</first><last>Zeng</last><affiliation>Amazon</affiliation></author>
@@ -12197,7 +12197,7 @@
       <title>A Hybrid Approach to Cross-lingual Product Review Summarization</title>
       <author><first>Saleh</first><last>Soltan</last><affiliation>Amazon Alexa</affiliation></author>
       <author><first>Victor</first><last>Soto</last><affiliation>Amazon Inc.</affiliation></author>
-      <author><first>Ke</first><last>Tran</last><affiliation>Amazon</affiliation></author>
+      <author id="ke-m-tran"><first>Ke</first><last>Tran</last><affiliation>Amazon</affiliation></author>
       <author><first>Wael</first><last>Hamza</last><affiliation>Amazon</affiliation></author>
       <pages>18-28</pages>
       <abstract>We present a hybrid approach for product review summarization which consists of: (i) an unsupervised extractive step to extract the most important sentences out of all the reviews, and (ii) a supervised abstractive step to summarize the extracted sentences into a coherent short summary. This approach allows us to develop an efficient cross-lingual abstractive summarizer that can generate summaries in any language, given the extracted sentences out of thousands of reviews in a source language. In order to train and test the abstractive model, we create the Cross-lingual Amazon Reviews Summarization (CARS) dataset which provides English summaries for training, and English, French, Italian, Arabic, and Hindi summaries for testing based on selected English reviews. We show that the summaries generated by our model are as good as human written summaries in coherence, informativeness, non-redundancy, and fluency.</abstract>
@@ -12254,7 +12254,7 @@
       <author><first>Niranjan</first><last>Uma Naresh</last><affiliation>Amazon</affiliation></author>
       <author><first>Ziyan</first><last>Jiang</last><affiliation>Amazon</affiliation></author>
       <author><first>Ankit</first><last>Ankit</last><affiliation>Amazon</affiliation></author>
-      <author><first>Sungjin</first><last>Lee</last><affiliation>Amazon</affiliation></author>
+      <author id="sungjin-lee"><first>Sungjin</first><last>Lee</last><affiliation>Amazon</affiliation></author>
       <author><first>Jie</first><last>Hao</last><affiliation>Amazon</affiliation></author>
       <author><first>Xing</first><last>Fan</last><affiliation>Amazon</affiliation></author>
       <author><first>Chenlei</first><last>Guo</last><affiliation>Amazon</affiliation></author>
@@ -12297,7 +12297,7 @@
       <author><first>Francesco</first><last>Moramarco</last><affiliation>Babylon Health</affiliation></author>
       <author><first>Alex</first><last>Papadopoulos Korfiatis</last><affiliation>Babylon Health</affiliation></author>
       <author><first>Mark</first><last>Perera</last><affiliation>Babylon</affiliation></author>
-      <author><first>Anya</first><last>Belz</last><affiliation>ADAPT Research Centre, Dublin City University</affiliation></author>
+      <author id="anja-belz"><first>Anya</first><last>Belz</last><affiliation>ADAPT Research Centre, Dublin City University</affiliation></author>
       <author><first>Ehud</first><last>Reiter</last><affiliation>University of Aberdeen</affiliation></author>
       <pages>111-120</pages>
       <abstract>Evaluating automatically generated text is generally hard due to the inherently subjective nature of many aspects of the output quality. This difficulty is compounded in automatic consultation note generation by differing opinions between medical experts both about which patient statements should be included in generated notes and about their respective importance in arriving at a diagnosis. Previous real-world evaluations of note-generation systems saw substantial disagreement between expert evaluators. In this paper we propose a protocol that aims to increase objectivity by grounding evaluations in Consultation Checklists, which are created in a preliminary step and then used as a common point of reference during quality assessment. We observed good levels of inter-annotator agreement in a first evaluation study using the protocol; further, using Consultation Checklists produced in the study as reference for automatic metrics such as ROUGE or BERTScore improves their correlation with human judgements compared to using the original human note.</abstract>
@@ -12483,8 +12483,8 @@
     </paper>
     <paper id="23">
       <title><fixed-case>C</fixed-case>o<fixed-case>C</fixed-case>o<fixed-case>ID</fixed-case>: Learning Contrastive Representations and Compact Clusters for Semi-Supervised Intent Discovery</title>
-      <author id="qian-cao"><first>Qian</first><last>Cao</last><affiliation>Soochow University</affiliation></author>
-      <author><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
+      <author><first>Qian</first><last>Cao</last><affiliation>Soochow University</affiliation></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
       <author><first>Qinlong</first><last>Wang</last><affiliation>Leyantech AI</affiliation></author>
       <author><first>Xia</first><last>Peng</last><affiliation>Soochow University</affiliation></author>
       <pages>226-236</pages>
@@ -12496,8 +12496,8 @@
     </paper>
     <paper id="24">
       <title>Tractable &amp; Coherent Multi-Document Summarization: Discrete Optimization of Multiple Neural Modeling Streams via Integer Linear Programming</title>
-      <author><first>Litton</first><last>J Kurisinkel</last><affiliation>A-star</affiliation></author>
-      <author><first>Nancy</first><last>Chen</last><affiliation>Institute for Infocomm Research, A*STAR</affiliation></author>
+      <author id="litton-j-kurisinkel"><first>Litton</first><last>J Kurisinkel</last><affiliation>A-star</affiliation></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last><affiliation>Institute for Infocomm Research, A*STAR</affiliation></author>
       <pages>237-243</pages>
       <abstract>One key challenge in multi-document summarization is the generated summary is often less coherent compared to single document summarization due to the larger heterogeneity of the input source content. In this work, we propose a generic framework to jointly consider coherence and informativeness in multi-document summarization and offers provisions to replace individual components based on the domain of source text. In particular, the framework characterizes coherence through verb transitions and entity mentions and takes advantage of syntactic parse trees and neural modeling for intra-sentential noise pruning. The framework cast the entire problem as an integer linear programming optimization problem with neural and non-neural models as linear components. We evaluate our method in the news and legal domains. The proposed approach consistently performs better than competitive baselines for both objective metrics and human evaluation.</abstract>
       <url hash="089268e5">2022.emnlp-industry.24</url>
@@ -12573,7 +12573,7 @@
       <author><first>Vladislav</first><last>Belyaev</last><affiliation>Comcast</affiliation></author>
       <author><first>Madhuri</first><last>Emmadi</last><affiliation>Comcast</affiliation></author>
       <author><first>Craig</first><last>Murray</last><affiliation>Comcast</affiliation></author>
-      <author><first>Ferhan</first><last>Ture</last><affiliation>Comcast Applied AI Research</affiliation></author>
+      <author id="ferhan-ture"><first>Ferhan</first><last>Ture</last><affiliation>Comcast Applied AI Research</affiliation></author>
       <author><first>Jimmy</first><last>Lin</last><affiliation>University of Waterloo</affiliation></author>
       <pages>285-293</pages>
       <abstract>End-to-end automatic speech recognition systems represent the state of the art, but they rely on thousands of hours of manually annotated speech for training, as well as heavyweight computation for inference. Of course, this impedes commercialization since most companies lack vast human and computational resources. In this paper, we explore training and deploying an ASR system in the label-scarce, compute-limited setting. To reduce human labor, we use a third-party ASR system as a weak supervision source, supplemented with labeling functions derived from implicit user feedback. To accelerate inference, we propose to route production-time queries across a pool of CUDA graphs of varying input lengths, the distribution of which best matches the traffic’s. Compared to our third-party ASR, we achieve a relative improvement in word-error rate of 8% and a speedup of 600%. Our system, called SpeechNet, currently serves 12 million queries per day on our voice-enabled smart television. To our knowledge, this is the first time a large-scale, Wav2vec-based deployment has been described in the academic literature.</abstract>
@@ -12626,7 +12626,7 @@
     </paper>
     <paper id="33">
       <title>Learning Geolocations for Cold-Start and Hard-to-Resolve Addresses via Deep Metric Learning</title>
-      <author><first/><last>Govind</last><affiliation>Amazon</affiliation></author>
+      <author id="govind-kothari"><first/><last>Govind</last><affiliation>Amazon</affiliation></author>
       <author><first>Saurabh</first><last>Sohoney</last><affiliation>Amazon</affiliation></author>
       <pages>322-331</pages>
       <abstract>With evergrowing digital adoption in the society and increasing demand for businesses to deliver to customers doorstep, the last mile hop of transportation planning poses unique challenges in emerging geographies with unstructured addresses. One of the crucial inputs to facilitate effective planning is the task of geolocating customer addresses. Existing systems operate by aggregating historical delivery locations or by resolving/matching addresses to known buildings and campuses to vend a high-precision geolocation. However, by design they fail to cater to a significant fraction of addresses which are new in the system and have inaccurate or missing building level information. We propose a framework to resolve these addresses (referred to as hard-to-resolve henceforth) to a shallower granularity termed as neighbourhood. Specifically, we propose a weakly supervised deep metric learning model to encode the geospatial semantics in address embeddings. We present empirical evaluation on India (IN) and the United Arab Emirates (UAE) hard-to-resolve addresses to show significant improvements in learning geolocations i.e., 22% (IN) &amp; 55% (UAE) reduction in delivery defects (where learnt geocode is Y meters away from actual location), and 43% (IN) &amp; 90% (UAE) reduction in 50th percentile (p50) distance between learnt and actual delivery locations over the existing production system.</abstract>
@@ -12670,7 +12670,7 @@
       <author><first>Anjie</first><last>Fang</last><affiliation>Amazon</affiliation></author>
       <author><first>Besnik</first><last>Fetahu</last><affiliation>Amazon</affiliation></author>
       <author><first>Oleg</first><last>Rokhlenko</last><affiliation>Amazon</affiliation></author>
-      <author><first>Shervin</first><last>Malmasi</last><affiliation>Amazon</affiliation></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last><affiliation>Amazon</affiliation></author>
       <pages>357-370</pages>
       <abstract>Conversational Question Answering (CQA) aims to answer questions contained within dialogues, which are not easily interpretable without context. Developing a model to rewrite conversational questions into self-contained ones is an emerging solution in industry settings as it allows using existing single-turn QA systems to avoid training a CQA model from scratch. Previous work trains rewriting models using human rewrites as supervision. However, such objectives are disconnected with QA models and therefore more human-like rewrites do not guarantee better QA performance. In this paper we propose using QA feedback to supervise the rewriting model with reinforcement learning. Experiments show that our approach can effectively improve QA performance over baselines for both extractive and retrieval QA. Furthermore, human evaluation shows that our method can generate more accurate and detailed rewrites when compared to human annotations.</abstract>
       <url hash="276b1ab0">2022.emnlp-industry.36</url>
@@ -12775,7 +12775,7 @@
       <author><first>Besnik</first><last>Fetahu</last><affiliation>Amazon</affiliation></author>
       <author><first>Akash</first><last>Veeragouni</last><affiliation>Amazon</affiliation></author>
       <author><first>Oleg</first><last>Rokhlenko</last><affiliation>Amazon</affiliation></author>
-      <author><first>Shervin</first><last>Malmasi</last><affiliation>Amazon</affiliation></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last><affiliation>Amazon</affiliation></author>
       <pages>429-439</pages>
       <abstract>We describe an application of Knowledge Distillation used to distill and deploy multilingual Transformer models for voice assistants, enabling text classification for customers globally. Transformers have set new state-of-the-art results for tasks like intent classification, and multilingual models exploit cross-lingual transfer to allow serving requests across 100+ languages. However, their prohibitive inference time makes them impractical to deploy in real-world scenarios with low latency requirements, such as is the case of voice assistants. We address the problem of cross-architecture distillation of multilingual Transformers to simpler models, while maintaining multilinguality without performance degradation. Training multilingual student models has received little attention, and is our main focus. We show that a teacher-student framework, where the teacher’s unscaled activations (logits) on unlabelled data are used to supervise student model training, enables distillation of Transformers into efficient multilingual CNN models. Our student model achieves equivalent performance as the teacher, and outperforms a similar model trained on the labelled data used to train the teacher model. This approach has enabled us to accurately serve global customer requests at speed (18x improvement), scale, and low cost.</abstract>
       <url hash="543a0b2e">2022.emnlp-industry.43</url>
@@ -12822,7 +12822,7 @@
       <title>Zero-Shot Dynamic Quantization for Transformer Inference</title>
       <author><first>Yousef</first><last>El-kurdi</last><affiliation>IBM Research</affiliation></author>
       <author><first>Jerry</first><last>Quinn</last><affiliation>IBM Research</affiliation></author>
-      <author><first>Avi</first><last>Sil</last><affiliation>IBM Research AI</affiliation></author>
+      <author id="avirup-sil"><first>Avi</first><last>Sil</last><affiliation>IBM Research AI</affiliation></author>
       <pages>451-457</pages>
       <abstract>We introduce a novel run-time method for significantly reducing the accuracy loss associated with quantizing BERT-like models to 8-bit integers. Existing methods for quantizing models either modify the training procedure, or they require an additional calibration step to adjust parameters that also requires a selected held-out dataset. Our method permits taking advantage of quantization without the need for these adjustments. We present results on several NLP tasks demonstrating the usefulness of this technique.</abstract>
       <url hash="1bc70fe8">2022.emnlp-industry.45</url>
@@ -12864,7 +12864,7 @@
       <author><first>Rakesh</first><last>Chada</last><affiliation>Amazon</affiliation></author>
       <author><first>Pradeep</first><last>Natarajan</last><affiliation>Amazon</affiliation></author>
       <author><first>Chenlei</first><last>Guo</last><affiliation>Amazon</affiliation></author>
-      <author><first>Gokhan</first><last>Tur</last><affiliation>Amazon</affiliation></author>
+      <author id="gokhan-tur"><first>Gokhan</first><last>Tur</last><affiliation>Amazon</affiliation></author>
       <pages>475-483</pages>
       <abstract>In conversational AI agents, Query Rewriting (QR) plays a crucial role in reducing user frictions and satisfying their daily demands. User frictions are caused by various reasons, such as errors in the conversational AI system, users’ accent or their abridged language. In this work, we present a novel Constrained Generation Framework (CGF) for query rewriting at both global and personalized levels. It is based on the encoder-decoder framework, where the encoder takes the query and its previous dialogue turns as the input to form a context-enhanced representation, and the decoder uses constrained decoding to generate the rewrites based on the pre-defined global or personalized constrained decoding space. Extensive offline and online A/B experiments show that the proposed CGF significantly boosts the query rewriting performance.</abstract>
       <url hash="be932c34">2022.emnlp-industry.48</url>
@@ -12919,14 +12919,14 @@
     <paper id="52">
       <title><fixed-case>PLATO</fixed-case>-Ad: A Unified Advertisement Text Generation Framework with Multi-Task Prompt Learning</title>
       <author><first>Zeyang</first><last>Lei</last><affiliation>Baidu Inc.</affiliation></author>
-      <author><first>Chao</first><last>Zhang</last><affiliation>Baidu Inc.</affiliation></author>
+      <author id="chao-zhang-tu"><first>Chao</first><last>Zhang</last><affiliation>Baidu Inc.</affiliation></author>
       <author><first>Xinchao</first><last>Xu</last><affiliation>Baidu</affiliation></author>
       <author><first>Wenquan</first><last>Wu</last><affiliation>Baidu</affiliation></author>
       <author><first>Zheng-yu</first><last>Niu</last><affiliation>Baidu Inc.</affiliation></author>
       <author><first>Hua</first><last>Wu</last><affiliation>Baidu</affiliation></author>
       <author><first>Haifeng</first><last>Wang</last><affiliation>Baidu</affiliation></author>
       <author><first>Yi</first><last>Yang</last><affiliation>Baidu Inc.</affiliation></author>
-      <author><first>Shuanglong</first><last>Li</last><affiliation>Baidu Inc.</affiliation></author>
+      <author id="shuanglong-li"><first>Shuanglong</first><last>Li</last><affiliation>Baidu Inc.</affiliation></author>
       <pages>512-520</pages>
       <abstract>Online advertisement text generation aims at generating attractive and persuasive text ads to appeal to users clicking ads or purchasing products. While pretraining-based models have achieved remarkable success in generating high-quality text ads, some challenges still remain, such as ad generation in low-resource scenarios and training efficiency for multiple ad tasks. In this paper, we propose a novel unified text ad generation framework with multi-task prompt learning, called PLATO-Ad, totackle these problems. Specifically, we design a three-phase transfer learning mechanism to tackle the low-resource ad generation problem. Furthermore, we present a novel multi-task prompt learning mechanism to efficiently utilize a single lightweight model to solve multiple ad generation tasks without loss of performance compared to training a separate model for each task. Finally, we conduct offline and online evaluations and experiment results show that PLATO-Ad significantly outperforms the state-of-the-art on both offline and online metrics. PLATO-Ad has been deployed in a leading advertising platform with 3.5% CTR improvement on search ad descriptions and 10.4% CTR improvement on feed ad titles.</abstract>
       <url hash="ddd90b71">2022.emnlp-industry.52</url>
@@ -12978,7 +12978,7 @@
       <author><first>Kun</first><last>Zhou</last><affiliation>Renmin University of China</affiliation></author>
       <author><first>Yeyun</first><last>Gong</last><affiliation>Microsoft Research Asia</affiliation></author>
       <author><first>Xiao</first><last>Liu</last><affiliation>Microsoft Research Asia</affiliation></author>
-      <author><first>Wayne Xin</first><last>Zhao</last><affiliation>Ruc</affiliation></author>
+      <author id="wayne-xin-zhao"><first>Wayne Xin</first><last>Zhao</last><affiliation>Ruc</affiliation></author>
       <author><first>Yelong</first><last>Shen</last><affiliation>Microsoft</affiliation></author>
       <author><first>Anlei</first><last>Dong</last><affiliation>Microsoft</affiliation></author>
       <author><first>Jingwen</first><last>Lu</last><affiliation>Microsoft</affiliation></author>
diff --git a/data/xml/2022.emoji.xml b/data/xml/2022.emoji.xml
index 42c184dbef..a2d4111be5 100644
--- a/data/xml/2022.emoji.xml
+++ b/data/xml/2022.emoji.xml
@@ -88,7 +88,7 @@
     <paper id="7">
       <title>Semantic Congruency Facilitates Memory for Emojis</title>
       <author><first>Andriana L.</first><last>Christofalos</last></author>
-      <author><first>Laurie Beth</first><last>Feldman</last></author>
+      <author id="laurie-feldman"><first>Laurie Beth</first><last>Feldman</last></author>
       <author><first>Heather</first><last>Sheridan</last></author>
       <pages>63-68</pages>
       <abstract>Emojis can assume different relations with the sentence context in which they occur. While affective elaboration and emoji-word redundancy are frequently investigated in laboratory experiments, the role of emojis in inferential processes has received much less attention. Here, we used an online ratings task and a recognition memory task to investigate whether differences in emoji function within a sentence affect judgments of emoji-text coherence and subsequent recognition accuracy. Emojis that function as synonyms of a target word from the passages were rated as better fitting with the passage (more coherent) than emojis consistent with an inference from the passage, and both types of emojis were rated as more coherent than incongruent (unrelated) emojis. In a recognition test, emojis consistent with the semantic content of passages (synonym and inference emojis) were better recognized than incongruent emojis. Findings of the present study provide corroborating evidence that readers extract semantic information from emojis and then integrate it with surrounding passage content.</abstract>
diff --git a/data/xml/2022.eurali.xml b/data/xml/2022.eurali.xml
index 6f092b3387..a33753a926 100644
--- a/data/xml/2022.eurali.xml
+++ b/data/xml/2022.eurali.xml
@@ -3,10 +3,10 @@
   <volume id="1" ingest-date="2022-09-22" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Workshop on Resources and Technologies for Indigenous, Endangered and Lesser-resourced Languages in Eurasia within the 13th Language Resources and Evaluation Conference</booktitle>
-      <editor><first>Atul Kr.</first><last>Ojha</last></editor>
+      <editor id="atul-kr-ojha"><first>Atul Kr.</first><last>Ojha</last></editor>
       <editor><first>Sina</first><last>Ahmadi</last></editor>
       <editor><first>Chao-Hong</first><last>Liu</last></editor>
-      <editor><first>John P.</first><last>McCrae</last></editor>
+      <editor id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></editor>
       <publisher>European Language Resources Association</publisher>
       <address>Marseille, France</address>
       <month>June</month>
diff --git a/data/xml/2022.eval4nlp.xml b/data/xml/2022.eval4nlp.xml
index d50f7eeadf..30a86d7110 100644
--- a/data/xml/2022.eval4nlp.xml
+++ b/data/xml/2022.eval4nlp.xml
@@ -28,7 +28,7 @@
       <author><first>Masao</first><last>Ideuchi</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
       <author><first>Yoshiaki</first><last>Oida</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>1–10</pages>
       <url hash="728636c8">2022.eval4nlp-1.1</url>
       <doi>10.18653/v1/2022.eval4nlp-1.1</doi>
@@ -134,7 +134,7 @@
       <title>Assessing Neural Referential Form Selectors on a Realistic Multilingual Dataset</title>
       <author><first>Guanyi</first><last>Chen</last></author>
       <author><first>Fahime</first><last>Same</last></author>
-      <author><first>Kees</first><last>Van Deemter</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>Van Deemter</last></author>
       <pages>103–114</pages>
       <url hash="66bf8f1e">2022.eval4nlp-1.11</url>
       <doi>10.18653/v1/2022.eval4nlp-1.11</doi>
diff --git a/data/xml/2022.evonlp.xml b/data/xml/2022.evonlp.xml
index b61c61a87f..7eac92cf15 100644
--- a/data/xml/2022.evonlp.xml
+++ b/data/xml/2022.evonlp.xml
@@ -4,9 +4,9 @@
     <meta>
       <booktitle>Proceedings of the First Workshop on Ever Evolving NLP (EvoNLP)</booktitle>
       <editor><first>Francesco</first><last>Barbieri</last></editor>
-      <editor><first>Jose</first><last>Camacho-Collados</last></editor>
+      <editor id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></editor>
       <editor><first>Bhuwan</first><last>Dhingra</last></editor>
-      <editor><first>Luis</first><last>Espinosa-Anke</last></editor>
+      <editor id="luis-espinosa-anke"><first>Luis</first><last>Espinosa-Anke</last></editor>
       <editor><first>Elena</first><last>Gribovskaya</last></editor>
       <editor><first>Angeliki</first><last>Lazaridou</last></editor>
       <editor><first>Daniel</first><last>Loureiro</last></editor>
diff --git a/data/xml/2022.fever.xml b/data/xml/2022.fever.xml
index 407267ebab..fc1ddc648c 100644
--- a/data/xml/2022.fever.xml
+++ b/data/xml/2022.fever.xml
@@ -8,7 +8,7 @@
       <editor><first>Oana</first><last>Cocarascu</last></editor>
       <editor><first>Zhijiang</first><last>Guo</last></editor>
       <editor><first>Arpit</first><last>Mittal</last></editor>
-      <editor><first>Michael</first><last>Schlichtkrull</last></editor>
+      <editor id="michael-schlichtkrull"><first>Michael</first><last>Schlichtkrull</last></editor>
       <editor><first>James</first><last>Thorne</last></editor>
       <editor><first>Andreas</first><last>Vlachos</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -52,7 +52,7 @@
       <author><first>Nikita</first><last>Bhutani</last></author>
       <author><first>Alexander</first><last>Whedon</last></author>
       <author><first>Estevam</first><last>Hruschka</last></author>
-      <author><first>Yoshi</first><last>Suhara</last></author>
+      <author id="yoshi-suhara"><first>Yoshi</first><last>Suhara</last></author>
       <pages>16-28</pages>
       <abstract>Many people read online reviews to learn about real-world entities of their interest. However, majority of reviews only describes general experiences and opinions of the customers, and may not reveal facts that are specific to the entity being reviewed. In this work, we focus on a novel task of mining from a review corpus sentences that are unique for each entity. We refer to this task as Salient Fact Extraction. Salient facts are extremely scarce due to their very nature. Consequently, collecting labeled examples for training supervised models is tedious and cost-prohibitive. To alleviate this scarcity problem, we develop an unsupervised method, ZL-Distiller, which leverages contextual language representations of the reviews and their distributional patterns to identify salient sentences about entities. Our experiments on multiple domains (hotels, products, and restaurants) show that ZL-Distiller achieves state-of-the-art performance and further boosts the performance of other supervised/unsupervised algorithms for the task. Furthermore, we show that salient sentences mined by ZL-Distiller provide unique and detailed information about entities, which benefit downstream NLP applications including question answering and summarization.</abstract>
       <url hash="cfd98f98">2022.fever-1.3</url>
@@ -78,7 +78,7 @@
       <title>A Semantics-Aware Approach to Automated Claim Verification</title>
       <author><first>Blanca</first><last>Calvo Figueras</last></author>
       <author><first>Montse</first><last>Cuadros</last></author>
-      <author><first>Rodrigo</first><last>Agerri</last></author>
+      <author id="rodrigo-agerri"><first>Rodrigo</first><last>Agerri</last></author>
       <pages>37-48</pages>
       <abstract>The influence of fake news in the perception of reality has become a mainstream topic in the last years due to the fast propagation of misleading information. In order to help in the fight against misinformation, automated solutions to fact-checking are being actively developed within the research community. In this context, the task of Automated Claim Verification is defined as assessing the truthfulness of a claim by finding evidence about its veracity. In this work we empirically demonstrate that enriching a BERT model with explicit semantic information such as Semantic Role Labelling helps to improve results in claim verification as proposed by the FEVER benchmark. Furthermore, we perform a number of explainability tests that suggest that the semantically-enriched model is better at handling complex cases, such as those including passive forms or multiple propositions.</abstract>
       <url hash="5b6759c6">2022.fever-1.5</url>
diff --git a/data/xml/2022.fieldmatters.xml b/data/xml/2022.fieldmatters.xml
index 46e0addf43..a1245958a7 100644
--- a/data/xml/2022.fieldmatters.xml
+++ b/data/xml/2022.fieldmatters.xml
@@ -12,7 +12,7 @@
       <editor><first>Tatiana</first><last>Shavrina</last></editor>
       <editor><first>Eric Le</first><last>Ferrand</last></editor>
       <editor><first>Valentin</first><last>Malykh</last></editor>
-      <editor><first>Francis</first><last>Tyers</last></editor>
+      <editor id="francis-tyers"><first>Francis</first><last>Tyers</last></editor>
       <editor><first>Timofey</first><last>Arkhangelskiy</last></editor>
       <editor><first>Vladislav</first><last>Mikhailov</last></editor>
       <editor><first>Alena</first><last>Fenogenova</last></editor>
@@ -49,7 +49,7 @@
     </paper>
     <paper id="3">
       <title>Machine Translation Between High-resource Languages in a Language Documentation Setting</title>
-      <author><first>Katharina</first><last>Kann</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
       <author><first>Abteen</first><last>Ebrahimi</last></author>
       <author><first>Kristine</first><last>Stenzel</last></author>
       <author><first>Alexis</first><last>Palmer</last></author>
@@ -70,7 +70,7 @@
     <paper id="5">
       <title>The interaction between cognitive ease and informativeness shapes the lexicons of natural languages</title>
       <author><first>Thomas</first><last>Brochhagen</last></author>
-      <author><first>Gemma</first><last>Boleda</last></author>
+      <author id="gemma-boleda"><first>Gemma</first><last>Boleda</last></author>
       <pages>42–44</pages>
       <abstract>It is common for languages to express multiple meanings with the same word, a phenomenon known as colexification. For instance, the meanings FINGER and TOE colexify in the word “dedo” in Spanish, while they do not colexify in English. Colexification has been suggested to follow universal constraints. In particular, previous work has shown that related meanings are more prone to colexify. This tendency has been explained in terms of the cognitive pressure for ease, since expressing related meanings with the same word makes lexicons easier to learn and use. The present study examines the interplay between this pressure and a competing universal constraint, the functional pressure for languages to maximize informativeness. We hypothesize that meanings are more likely to colexify if they are related (fostering ease), but not so related as to become confusable and cause misunderstandings (fostering informativeness). We find support for this principle in data from over 1200 languages and 1400 meanings. Our results thus suggest that universal principles shape the lexicons of natural languages. More broadly, they contribute to the growing body of evidence suggesting that languages evolve to strike a balance between competing functional and cognitive pressures.</abstract>
       <url hash="95beeafd">2022.fieldmatters-1.5</url>
diff --git a/data/xml/2022.findings.xml b/data/xml/2022.findings.xml
index 47e4c7e65b..e3ffdcae8a 100644
--- a/data/xml/2022.findings.xml
+++ b/data/xml/2022.findings.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Findings of the Association for Computational Linguistics: ACL 2022</booktitle>
       <editor><first>Smaranda</first><last>Muresan</last></editor>
-      <editor><first>Preslav</first><last>Nakov</last></editor>
+      <editor id="preslav-nakov"><first>Preslav</first><last>Nakov</last></editor>
       <editor><first>Aline</first><last>Villavicencio</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Dublin, Ireland</address>
@@ -68,7 +68,7 @@
       <author><first>Emil</first><last>Biju</last></author>
       <author><first>Anirudh</first><last>Sriram</last></author>
       <author><first>Pratyush</first><last>Kumar</last></author>
-      <author><first>Mitesh</first><last>Khapra</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh</first><last>Khapra</last></author>
       <pages>31-44</pages>
       <abstract>Self-attention heads are characteristic of Transformer models and have been well studied for interpretability and pruning. In this work, we demonstrate an altogether different utility of attention heads, namely for adversarial detection. Specifically, we propose a method to construct input-specific attention subnetworks (IAS) from which we extract three features to discriminate between authentic and adversarial inputs. The resultant detector significantly improves (by over 7.5%) the state-of-the-art adversarial detection accuracy for the BERT encoder on 10 NLU datasets with 11 different adversarial attack types. We also demonstrate that our method (a) is more accurate for larger models which are likely to have more spurious correlations and thus vulnerable to adversarial attack, and (b) performs well even with modest training sets of adversarial examples.</abstract>
       <url hash="5581daa2">2022.findings-acl.4</url>
@@ -95,9 +95,9 @@
       <author><first>Sarubi</first><last>Thillainathan</last></author>
       <author><first>Shravan</first><last>Nayak</last></author>
       <author><first>Surangika</first><last>Ranathunga</last></author>
-      <author><first>David Ifeoluwa</first><last>Adelani</last></author>
+      <author id="david-ifeoluwa-adelani"><first>David Ifeoluwa</first><last>Adelani</last></author>
       <author><first>Ruisi</first><last>Su</last></author>
-      <author><first>Arya D.</first><last>McCarthy</last></author>
+      <author id="arya-d-mccarthy"><first>Arya D.</first><last>McCarthy</last></author>
       <pages>58-67</pages>
       <abstract>What can pre-trained multilingual sequence-to-sequence models like mBART contribute to translating low-resource languages? We conduct a thorough empirical experiment in 10 languages to ascertain this, considering five factors: (1) the amount of fine-tuning data, (2) the noise in the fine-tuning data, (3) the amount of pre-training data in the model, (4) the impact of domain mismatch, and (5) language typology. In addition to yielding several heuristics, the experiments form a framework for evaluating the data sensitivities of machine translation systems. While mBART is robust to domain differences, its translations for unseen and typologically distant languages remain below 3.0 BLEU. In answer to our title’s question, mBART is not a low-resource panacea; we therefore encourage shifting the emphasis from new models to new data.</abstract>
       <url hash="57ee5031">2022.findings-acl.6</url>
@@ -279,7 +279,7 @@
       <author><first>Nathan</first><last>Young</last></author>
       <author><first>Qiming</first><last>Bao</last></author>
       <author><first>Joshua</first><last>Bensemann</last></author>
-      <author><first>Michael</first><last>Witbrock</last></author>
+      <author id="michael-j-witbrock"><first>Michael</first><last>Witbrock</last></author>
       <pages>218-227</pages>
       <abstract>Transformers have recently been shown to be capable of reliably performing logical reasoning over facts and rules expressed in natural language, but abductive reasoning - inference to the best explanation of an unexpected observation - has been underexplored despite significant applications to scientific discovery, common-sense reasoning, and model interpretability. This paper presents AbductionRules, a group of natural language datasets designed to train and test generalisable abduction over natural-language knowledge bases. We use these datasets to finetune pretrained Transformers and discuss their performance, finding that our models learned generalisable abductive techniques but also learned to exploit the structure of our data. Finally, we discuss the viability of this approach to abductive reasoning and ways in which it may be improved in future work.</abstract>
       <url hash="16eed3fd">2022.findings-acl.19</url>
@@ -398,7 +398,7 @@
       <author><first>Lingbo</first><last>Mo</last></author>
       <author><first>Ashley</first><last>Lewis</last></author>
       <author><first>Huan</first><last>Sun</last></author>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <pages>322-342</pages>
       <abstract>Existing studies on semantic parsing focus on mapping a natural-language utterance to a logical form (LF) in one turn. However, because natural language may contain ambiguity and variability, this is a difficult challenge. In this work, we investigate an interactive semantic parsing framework that explains the predicted LF step by step in natural language and enables the user to make corrections through natural-language feedback for individual steps. We focus on question answering over knowledge bases (KBQA) as an instantiation of our framework, aiming to increase the transparency of the parsing process and help the user trust the final answer. We construct INSPIRED, a crowdsourced dialogue dataset derived from the ComplexWebQuestions dataset. Our experiments show that this framework has the potential to greatly improve overall parse accuracy. Furthermore, we develop a pipeline for dialogue simulation to evaluate our framework w.r.t. a variety of state-of-the-art KBQA models without further crowdsourcing effort. The results demonstrate that our framework promises to be effective across such models.</abstract>
       <url hash="e6aed9e5">2022.findings-acl.28</url>
@@ -437,7 +437,7 @@
     <paper id="31">
       <title>Towards Responsible Natural Language Annotation for the Varieties of <fixed-case>A</fixed-case>rabic</title>
       <author><first>A.</first><last>Bergman</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>364-371</pages>
       <abstract>When building NLP models, there is a tendency to aim for broader coverage, often overlooking cultural and (socio)linguistic nuance. In this position paper, we make the case for care and attention to such nuances, particularly in dataset annotation, as well as the inclusion of cultural and linguistic expertise in the process. We present a playbook for responsible dataset creation for polyglossic, multidialectal languages. This work is informed by a study on Arabic annotation of social media content.</abstract>
       <url hash="f48868e9">2022.findings-acl.31</url>
@@ -477,7 +477,7 @@
       <title><fixed-case>MDER</fixed-case>ank: A Masked Document Embedding Rank Approach for Unsupervised Keyphrase Extraction</title>
       <author><first>Linhan</first><last>Zhang</last></author>
       <author><first>Qian</first><last>Chen</last></author>
-      <author><first>Wen</first><last>Wang</last></author>
+      <author id="wen-wang"><first>Wen</first><last>Wang</last></author>
       <author><first>Chong</first><last>Deng</last></author>
       <author><first>ShiLiang</first><last>Zhang</last></author>
       <author><first>Bing</first><last>Li</last></author>
@@ -544,7 +544,7 @@
       <author><first>Zuchao</first><last>Li</last></author>
       <author><first>Yiran</first><last>Wang</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Hai</first><last>Zhao</last></author>
       <author><first>Taro</first><last>Watanabe</last></author>
       <pages>459-471</pages>
@@ -558,7 +558,7 @@
       <author><first>Prashant</first><last>Kodali</last></author>
       <author><first>Anmol</first><last>Goel</last></author>
       <author><first>Monojit</first><last>Choudhury</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <author><first>Ponnurangam</first><last>Kumaraguru</last></author>
       <pages>472-480</pages>
       <abstract>Code mixing is the linguistic phenomenon where bilingual speakers tend to switch between two or more languages in conversations. Recent work on code-mixing in computational settings has leveraged social media code mixed texts to train NLP models. For capturing the variety of code mixing in, and across corpus, Language ID (LID) tags based measures (CMI) have been proposed. Syntactical variety/patterns of code-mixing and their relationship vis-a-vis computational model’s performance is under explored. In this work, we investigate a collection of English(en)-Hindi(hi) code-mixed datasets from a syntactic lens to propose, <tex-math>SyMCoM</tex-math>, an indicator of syntactic variety in code-mixed text, with intuitive theoretical bounds. We train SoTA en-hi PoS tagger, accuracy of 93.4%, to reliably compute PoS tags on a corpus, and demonstrate the utility of <tex-math>SyMCoM</tex-math> by applying it on various syntactical categories on a collection of datasets, and compare datasets using the measure.</abstract>
@@ -621,7 +621,7 @@
       <title>Better Quality Estimation for Low Resource Corpus Mining</title>
       <author><first>Muhammed</first><last>Kocyigit</last></author>
       <author><first>Jiho</first><last>Lee</last></author>
-      <author><first>Derry</first><last>Wijaya</last></author>
+      <author id="derry-tanti-wijaya"><first>Derry</first><last>Wijaya</last></author>
       <pages>533-543</pages>
       <abstract>Quality Estimation (QE) models have the potential to change how we evaluate and maybe even train machine translation models. However, these models still lack the robustness to achieve general adoption. We show that Stateof-the-art QE models, when tested in a Parallel Corpus Mining (PCM) setting, perform unexpectedly bad due to a lack of robustness to out-of-domain examples. We propose a combination of multitask training, data augmentation and contrastive learning to achieve better and more robust QE performance. We show that our method improves QE performance significantly in the MLQE challenge and the robustness of QE models when tested in the Parallel Corpus Mining setup. We increase the accuracy in PCM by more than 0.80, making it on par with state-of-the-art PCM methods that use millions of sentence pairs to train their models. In comparison, we use a thousand times less data, 7K parallel sentences in total, and propose a novel low resource PCM method.</abstract>
       <url hash="168b2e37">2022.findings-acl.45</url>
@@ -660,7 +660,7 @@
       <title>Extracting Latent Steering Vectors from Pretrained Language Models</title>
       <author><first>Nishant</first><last>Subramani</last></author>
       <author><first>Nivedita</first><last>Suresh</last></author>
-      <author><first>Matthew</first><last>Peters</last></author>
+      <author id="matthew-e-peters"><first>Matthew</first><last>Peters</last></author>
       <pages>566-581</pages>
       <abstract>Prior work on controllable text generation has focused on learning how to control language models through trainable decoding, smart-prompt design, or fine-tuning based on a desired objective. We hypothesize that the information needed to steer the model to generate a target sentence is already encoded within the model. Accordingly, we explore a different approach altogether: extracting latent vectors directly from pretrained language model decoders without fine-tuning. Experiments show that there exist steering vectors, which, when added to the hidden states of the language model, generate a target sentence nearly perfectly (&gt; 99 BLEU) for English sentences from a variety of domains. We show that vector arithmetic can be used for unsupervised sentiment transfer on the Yelp sentiment benchmark, with performance comparable to models tailored to this task. We find that distances between steering vectors reflect sentence similarity when evaluated on a textual similarity benchmark (STS-B), outperforming pooled hidden states of models. Finally, we present an analysis of the intrinsic properties of the steering vectors. Taken together, our results suggest that frozen LMs can be effectively controlled through their latent steering space.</abstract>
       <url hash="4d0c3e8a">2022.findings-acl.48</url>
@@ -670,10 +670,10 @@
     </paper>
     <paper id="49">
       <title>Domain Generalisation of <fixed-case>NMT</fixed-case>: Fusing Adapters with Leave-One-Domain-Out Training</title>
-      <author><first>Thuy-Trang</first><last>Vu</last></author>
+      <author id="thuy-vu"><first>Thuy-Trang</first><last>Vu</last></author>
       <author><first>Shahram</first><last>Khadivi</last></author>
       <author><first>Dinh</first><last>Phung</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>582-588</pages>
       <abstract>Generalising to unseen domains is under-explored and remains a challenge in neural machine translation. Inspired by recent research in parameter-efficient transfer learning from pretrained models, this paper proposes a fusion-based generalisation method that learns to combine domain-specific parameters. We propose a leave-one-domain-out training strategy to avoid information leaking to address the challenge of not knowing the test domain during training time. Empirical results on three language pairs show that our proposed fusion method outperforms other baselines up to +0.8 BLEU score on average.</abstract>
       <url hash="af446f61">2022.findings-acl.49</url>
@@ -700,8 +700,8 @@
       <author><first>Chao</first><last>Zhao</last></author>
       <author><first>Tenghao</first><last>Huang</last></author>
       <author><first>Somnath</first><last>Basu Roy Chowdhury</last></author>
-      <author><first>Muthu Kumar</first><last>Chandrasekaran</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="muthu-kumar-chandrasekaran"><first>Muthu Kumar</first><last>Chandrasekaran</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <author><first>Snigdha</first><last>Chaturvedi</last></author>
       <pages>613-621</pages>
       <abstract>A common method for extractive multi-document news summarization is to re-formulate it as a single-document summarization problem by concatenating all documents as a single meta-document. However, this method neglects the relative importance of documents. We propose a simple approach to reorder the documents according to their relative importance before concatenating and summarizing them. The reordering makes the salient content easier to learn by the summarization model. Experiments show that our approach outperforms previous state-of-the-art methods with more complex architectures.</abstract>
@@ -714,7 +714,7 @@
       <author><first>Nikita</first><last>Soni</last></author>
       <author><first>Matthew</first><last>Matero</last></author>
       <author><first>Niranjan</first><last>Balasubramanian</last></author>
-      <author><first>H. Andrew</first><last>Schwartz</last></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last></author>
       <pages>622-636</pages>
       <abstract>Natural language is generated by people, yet traditional language modeling views words or documents as if generated independently. Here, we propose human language modeling (HuLM), a hierarchical extension to the language modeling problem where by a human- level exists to connect sequences of documents (e.g. social media messages) and capture the notion that human language is moderated by changing human states. We introduce, HaRT, a large-scale transformer model for solving HuLM, pre-trained on approximately 100,000 social media users, and demonstrate it’s effectiveness in terms of both language modeling (perplexity) for social media and fine-tuning for 4 downstream tasks spanning document- and user-levels. Results on all tasks meet or surpass the current state-of-the-art.</abstract>
       <url hash="7ec3f959">2022.findings-acl.52</url>
@@ -740,7 +740,7 @@
       <author><first>Shuxian</first><last>Zou</last></author>
       <author><first>Shaonan</first><last>Wang</last></author>
       <author><first>Jiajun</first><last>Zhang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>648-657</pages>
       <abstract>Decoding language from non-invasive brain activity has attracted increasing attention from both researchers in neuroscience and natural language processing. Due to the noisy nature of brain recordings, existing work has simplified brain-to-word decoding as a binary classification task which is to discriminate a brain signal between its corresponding word and a wrong one. This pairwise classification task, however, cannot promote the development of practical neural decoders for two reasons. First, it has to enumerate all pairwise combinations in the test set, so it is inefficient to predict a word in a large vocabulary. Second, a perfect pairwise decoder cannot guarantee the performance on direct classification. To overcome these and go a step further to a realistic neural decoder, we propose a novel Cross-Modal Cloze (CMC) task which is to predict the target word encoded in the neural image with a context as prompt. Furthermore, to address this task, we propose a general approach that leverages the pre-trained language model to predict the target word. To validate our method, we perform experiments on more than 20 participants from two brain imaging datasets. Our method achieves 28.91% top-1 accuracy and 54.19% top-5 accuracy on average across all participants, significantly outperforming several baselines. This result indicates that our model can serve as a state-of-the-art baseline for the CMC task. More importantly, it demonstrates that it is feasible to decode a certain word within a large vocabulary from its neural brain activity.</abstract>
       <url hash="3ea873e7">2022.findings-acl.54</url>
@@ -772,7 +772,7 @@
       <author><first>Kevin</first><last>Chang</last></author>
       <author><first>Yunyao</first><last>Li</last></author>
       <author><first>Lucian</first><last>Popa</last></author>
-      <author><first>ChengXiang</first><last>Zhai</last></author>
+      <author id="chengxiang-zhai"><first>ChengXiang</first><last>Zhai</last></author>
       <pages>679-692</pages>
       <abstract>We propose a probabilistic approach to select a subset of a <i>target domain representative keywords</i> from a candidate set, contrasting with a context domain. Such a task is crucial for many downstream tasks in natural language processing. To contrast the target domain and the context domain, we adapt the <i>two-component mixture model</i> concept to generate a distribution of candidate keywords. It provides more importance to the <i>distinctive</i> keywords of the target domain than common keywords contrasting with the context domain. To support the <i>representativeness</i> of the selected keywords towards the target domain, we introduce an <i>optimization algorithm</i> for selecting the subset from the generated candidate distribution. We have shown that the optimization algorithm can be efficiently implemented with a near-optimal approximation guarantee. Finally, extensive experiments on multiple domains demonstrate the superiority of our approach over other baselines for the tasks of keyword summary generation and trending keywords selection.</abstract>
       <url hash="d905da42">2022.findings-acl.56</url>
@@ -796,7 +796,7 @@
       <author><first>Kushal</first><last>Arora</last></author>
       <author><first>Layla</first><last>El Asri</last></author>
       <author><first>Hareesh</first><last>Bahuleyan</last></author>
-      <author><first>Jackie</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie</first><last>Cheung</last></author>
       <pages>700-710</pages>
       <abstract>Current language generation models suffer from issues such as repetition, incoherence, and hallucinations. An often-repeated hypothesis for this brittleness of generation models is that it is caused by the training and the generation procedure mismatch, also referred to as exposure bias. In this paper, we verify this hypothesis by analyzing exposure bias from an imitation learning perspective. We show that exposure bias leads to an accumulation of errors during generation, analyze why perplexity fails to capture this accumulation of errors, and empirically show that this accumulation results in poor generation quality.</abstract>
       <url hash="c1bd655f">2022.findings-acl.58</url>
@@ -808,7 +808,7 @@
       <title>Question Answering Infused Pre-training of General-Purpose Contextualized Representations</title>
       <author><first>Robin</first><last>Jia</last></author>
       <author><first>Mike</first><last>Lewis</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>711-728</pages>
       <abstract>We propose a pre-training objective based on question answering (QA) for learning general-purpose contextual representations, motivated by the intuition that the representation of a phrase in a passage should encode all questions that the phrase can answer in context. To this end, we train a bi-encoder QA model, which independently encodes passages and questions, to match the predictions of a more accurate cross-encoder model on 80 million synthesized QA pairs. By encoding QA-relevant information, the bi-encoder’s token-level representations are useful for non-QA downstream tasks without extensive (or in some cases, any) fine-tuning. We show large improvements over both RoBERTa-large and previous state-of-the-art results on zero-shot and few-shot paraphrase detection on four datasets, few-shot named entity recognition on two datasets, and zero-shot sentiment analysis on three datasets.</abstract>
       <url hash="37aba859">2022.findings-acl.59</url>
@@ -818,7 +818,7 @@
     <paper id="60">
       <title>Automatic Song Translation for Tonal Languages</title>
       <author><first>Fenfei</first><last>Guo</last></author>
-      <author id="chen-zhang"><first>Chen</first><last>Zhang</last></author>
+      <author><first>Chen</first><last>Zhang</last></author>
       <author><first>Zhirui</first><last>Zhang</last></author>
       <author><first>Qixin</first><last>He</last></author>
       <author><first>Kejun</first><last>Zhang</last></author>
@@ -988,7 +988,7 @@
       <title><fixed-case>DS</fixed-case>-<fixed-case>TOD</fixed-case>: Efficient Domain Specialization for Task-Oriented Dialog</title>
       <author><first>Chia-Chien</first><last>Hung</last></author>
       <author><first>Anne</first><last>Lauscher</last></author>
-      <author><first>Simone</first><last>Ponzetto</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone</first><last>Ponzetto</last></author>
       <author><first>Goran</first><last>Glavaš</last></author>
       <pages>891-904</pages>
       <abstract>Recent work has shown that self-supervised dialog-specific pretraining on large conversational datasets yields substantial gains over traditional language modeling (LM) pretraining in downstream task-oriented dialog (TOD). These approaches, however, exploit general dialogic corpora (e.g., Reddit) and thus presumably fail to reliably embed domain-specific knowledge useful for concrete downstream TOD domains. In this work, we investigate the effects of domain specialization of pretrained language models (PLMs) for TOD. Within our DS-TOD framework, we first automatically extract salient domain-specific terms, and then use them to construct DomainCC and DomainReddit – resources that we leverage for domain-specific pretraining, based on (i) masked language modeling (MLM) and (ii) response selection (RS) objectives, respectively. We further propose a resource-efficient and modular domain specialization by means of domain adapters – additional parameter-light layers in which we encode the domain knowledge. Our experiments with prominent TOD tasks – dialog state tracking (DST) and response retrieval (RR) – encompassing five domains from the MultiWOZ benchmark demonstrate the effectiveness of DS-TOD. Moreover, we show that the light-weight adapter-based specialization (1) performs comparably to full fine-tuning in single domain setups and (2) is particularly suitable for multi-domain specialization, where besides advantageous computational footprint, it can offer better TOD performance.</abstract>
@@ -1033,7 +1033,7 @@
       <author><first>Zichao</first><last>Li</last></author>
       <author><first>Prakhar</first><last>Sharma</last></author>
       <author><first>Xing Han</first><last>Lu</last></author>
-      <author><first>Jackie</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie</first><last>Cheung</last></author>
       <author><first>Siva</first><last>Reddy</last></author>
       <pages>926-937</pages>
       <abstract>Most research on question answering focuses on the pre-deployment stage; i.e., building an accurate model for deployment. In this paper, we ask the question: Can we improve QA systems further post-deployment based on user interactions? We focus on two kinds of improvements: 1) improving the QA system’s performance itself, and 2) providing the model with the ability to explain the correctness or incorrectness of an answer. We collect a retrieval-based QA dataset, FeedbackQA, which contains interactive feedback from users. We collect this dataset by deploying a base QA system to crowdworkers who then engage with the system and provide feedback on the quality of its answers. The feedback contains both structured ratings and unstructured natural language explanations. We train a neural model with this feedback data that can generate explanations and re-score answer candidates. We show that feedback data not only improves the accuracy of the deployed QA system but also other stronger non-deployed systems. The generated explanations also help users make informed decisions about the correctness of answers.</abstract>
@@ -1048,7 +1048,7 @@
       <author><first>Mokanarangan</first><last>Thayaparan</last></author>
       <author><first>Marco</first><last>Valentino</last></author>
       <author><first>Julia</first><last>Rozanova</last></author>
-      <author><first>Andre</first><last>Freitas</last></author>
+      <author id="andre-freitas"><first>Andre</first><last>Freitas</last></author>
       <pages>938-948</pages>
       <abstract>The application of Natural Language Inference (NLI) methods over large textual corpora can facilitate scientific discovery, reducing the gap between current research and the available large-scale scientific knowledge. However, contemporary NLI models are still limited in interpreting mathematical knowledge written in Natural Language, even though mathematics is an integral part of scientific argumentation for many disciplines. One of the fundamental requirements towards mathematical language understanding, is the creation of models able to meaningfully represent variables. This problem is particularly challenging since the meaning of a variable should be assigned exclusively from its defining type, i.e., the representation of a variable should come from its context. Recent research has formalised the variable typing task, a benchmark for the understanding of abstract mathematical types and variables in a sentence. In this work, we propose VarSlot, a Variable Slot-based approach, which not only delivers state-of-the-art results in the task of variable typing, but is also able to create context-based representations for variables.</abstract>
       <url hash="e1f13195">2022.findings-acl.76</url>
@@ -1071,9 +1071,9 @@
     <paper id="78">
       <title><fixed-case>BPE</fixed-case> vs. Morphological Segmentation: A Case Study on Machine Translation of Four Polysynthetic Languages</title>
       <author><first>Manuel</first><last>Mager</last></author>
-      <author><first>Arturo</first><last>Oncevay</last></author>
-      <author><first>Elisabeth</first><last>Mager</last></author>
-      <author><first>Katharina</first><last>Kann</last></author>
+      <author id="arturo-oncevay"><first>Arturo</first><last>Oncevay</last></author>
+      <author id="elisabeth-maier"><first>Elisabeth</first><last>Mager</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
       <author><first>Thang</first><last>Vu</last></author>
       <pages>961-971</pages>
       <abstract>Morphologically-rich polysynthetic languages present a challenge for NLP systems due to data sparsity, and a common strategy to handle this issue is to apply subword segmentation. We investigate a wide variety of supervised and unsupervised morphological segmentation methods for four polysynthetic languages: Nahuatl, Raramuri, Shipibo-Konibo, and Wixarika. Then, we compare the morphologically inspired segmentation methods against Byte-Pair Encodings (BPEs) as inputs for machine translation (MT) when translating to and from Spanish. We show that for all language pairs except for Nahuatl, an unsupervised morphological segmentation algorithm outperforms BPEs consistently and that, although supervised methods achieve better segmentation scores, they under-perform in MT challenges. Finally, we contribute two new morphological segmentation datasets for Raramuri and Shipibo-Konibo, and a parallel corpus for Raramuri–Spanish.</abstract>
@@ -1097,12 +1097,12 @@
     <paper id="80">
       <title>Morphological Processing of Low-Resource Languages: Where We Are and What’s Next</title>
       <author><first>Adam</first><last>Wiemerslage</last></author>
-      <author><first>Miikka</first><last>Silfverberg</last></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last></author>
       <author><first>Changbing</first><last>Yang</last></author>
-      <author><first>Arya</first><last>McCarthy</last></author>
+      <author id="arya-d-mccarthy"><first>Arya</first><last>McCarthy</last></author>
       <author><first>Garrett</first><last>Nicolai</last></author>
       <author><first>Eliana</first><last>Colunga</last></author>
-      <author><first>Katharina</first><last>Kann</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
       <pages>988-1007</pages>
       <abstract>Automatic morphological processing can aid downstream natural language processing applications, especially for low-resource languages, and assist language documentation efforts for endangered languages. Having long been multilingual, the field of computational morphology is increasingly moving towards approaches suitable for languages with minimal or no annotated resources. First, we survey recent developments in computational morphology with a focus on low-resource languages. Second, we argue that the field is ready to tackle the logical next challenge: understanding a language’s morphology from raw text alone. We perform an empirical study on a truly unsupervised version of the paradigm completion task and show that, while existing state-of-the-art models bridged by two newly proposed models we devise perform reasonably, there is still much room for improvement. The stakes are high: solving this task will increase the language coverage of morphological resources by a number of magnitudes.</abstract>
       <url hash="1d7407bb">2022.findings-acl.80</url>
@@ -1115,7 +1115,7 @@
       <author><first>Naoya</first><last>Inoue</last></author>
       <author><first>Charuta</first><last>Pethe</last></author>
       <author><first>Allen</first><last>Kim</last></author>
-      <author><first>Steven</first><last>Skiena</last></author>
+      <author id="steven-skiena"><first>Steven</first><last>Skiena</last></author>
       <pages>1008-1019</pages>
       <abstract>We address the problem of learning fixed-length vector representations of characters in novels. Recent advances in word embeddings have proven successful in learning entity representations from short texts, but fall short on longer documents because they do not capture full book-level information. To overcome the weakness of such text-based embeddings, we propose two novel methods for representing characters: (i) graph neural network-based embeddings from a full corpus-based character network; and (ii) low-dimensional embeddings constructed from the occurrence pattern of characters in each novel. We test the quality of these character embeddings using a new benchmark suite to evaluate character representations, encompassing 12 different tasks. We show that our representation techniques combined with text-based embeddings lead to the best character representations, outperforming text-based embeddings in four tasks. Our dataset and evaluation script will be made publicly available to stimulate additional work in this area.</abstract>
       <url hash="7c807cab">2022.findings-acl.81</url>
@@ -1140,7 +1140,7 @@
       <author><first>Sebastian</first><last>Peralta</last></author>
       <author><first>João</first><last>Sedoc</last></author>
       <author><first>Garrick</first><last>Sherman</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <pages>1035-1047</pages>
       <abstract>Being able to reliably estimate self-disclosure – a key component of friendship and intimacy – from language is important for many psychology studies. We build single-task models on five self-disclosure corpora, but find that these models generalize poorly; the within-domain accuracy of predicted message-level self-disclosure of the best-performing model (mean Pearson’s r=0.69) is much higher than the respective across data set accuracy (mean Pearson’s r=0.32), due to both variations in the corpora (e.g., medical vs. general topics) and labeling instructions (target variables: self-disclosure, emotional disclosure, intimacy). However, some lexical features, such as expression of negative emotions and use of first person personal pronouns such as ‘I’ reliably predict self-disclosure across corpora. We develop a multi-task model that yields better results, with an average Pearson’s r of 0.37 for out-of-corpora prediction.</abstract>
       <url hash="0acc04dc">2022.findings-acl.83</url>
@@ -1191,7 +1191,7 @@
       <author><first>Leah</first><last>Nann</last></author>
       <author><first>Haris</first><last>Jabbar</last></author>
       <author><first>Sahana</first><last>Udupa</last></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <pages>1089-1104</pages>
       <abstract>Building on current work on multilingual hate speech (e.g., Ousidhoum et al. (2019)) and hate speech reduction (e.g., Sap et al. (2020)), we present XTREMESPEECH, a new hate speech dataset containing 20,297 social media passages from Brazil, Germany, India and Kenya. The key novelty is that we directly involve the affected communities in collecting and annotating the data – as opposed to giving companies and governments control over defining and combatting hate speech. This inclusive approach results in datasets more representative of actually occurring online speech and is likely to facilitate the removal of the social media content that marginalized communities view as causing the most harm. Based on XTREMESPEECH, we establish novel tasks with accompanying baselines, provide evidence that cross-country training is generally not feasible due to cultural differences between countries and perform an interpretability analysis of BERT’s predictions.</abstract>
       <url hash="e8cf54ea">2022.findings-acl.87</url>
@@ -1255,7 +1255,7 @@
       <author><first>Oana</first><last>Ignat</last></author>
       <author><first>Jean</first><last>Maillard</last></author>
       <author><first>Vishrav</first><last>Chaudhary</last></author>
-      <author><first>Francisco</first><last>Guzmán</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzmán</last></author>
       <pages>1164-1174</pages>
       <abstract>We aim to investigate the performance of current OCR systems on low resource languages and low resource scripts. We introduce and make publicly available a novel benchmark, OCR4MT, consisting of real and synthetic data, enriched with noise, for 60 low-resource languages in low resource scripts. We evaluate state-of-the-art OCR systems on our benchmark and analyse most common errors. We show that OCR monolingual data is a valuable resource that can increase performance of Machine Translation models, when used in backtranslation. We then perform an ablation study to investigate how OCR errors impact Machine Translation performance and determine what is the minimum level of OCR quality needed for the monolingual data to be useful for Machine Translation.</abstract>
       <url hash="8d03ec3c">2022.findings-acl.92</url>
@@ -1309,7 +1309,7 @@
       <author><first>Sabine</first><last>Weber</last></author>
       <author><first>Mohammad Javad</first><last>Hosseini</last></author>
       <author><first>Liane</first><last>Guillou</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>1214-1233</pages>
       <abstract>Predicate entailment detection is a crucial task for question-answering from text, where previous work has explored unsupervised learning of entailment graphs from typed open relation triples. In this paper, we present the first pipeline for building Chinese entailment graphs, which involves a novel high-recall open relation extraction (ORE) method and the first Chinese fine-grained entity typing dataset under the FIGER type ontology. Through experiments on the Levy-Holt dataset, we verify the strength of our Chinese entailment graph, and reveal the cross-lingual complementarity: on the parallel Levy-Holt dataset, an ensemble of Chinese and English entailment graphs outperforms both monolingual graphs, and raises unsupervised SOTA by 4.7 AUC points.</abstract>
       <url hash="65f04b84">2022.findings-acl.96</url>
@@ -1373,7 +1373,7 @@
     <paper id="101">
       <title><fixed-case>C</fixed-case>o-training an <fixed-case>U</fixed-case>nsupervised <fixed-case>C</fixed-case>onstituency <fixed-case>P</fixed-case>arser with <fixed-case>W</fixed-case>eak <fixed-case>S</fixed-case>upervision</title>
       <author><first>Nickil</first><last>Maveli</last></author>
-      <author><first>Shay</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay</first><last>Cohen</last></author>
       <pages>1274-1291</pages>
       <abstract>We introduce a method for unsupervised parsing that relies on bootstrapping classifiers to identify if a node dominates a specific span in a sentence. There are two types of classifiers, an inside classifier that acts on a span, and an outside classifier that acts on everything outside of a given span. Through self-training and co-training with the two classifiers, we show that the interplay between them helps improve the accuracy of both, and as a result, effectively parse. A seed bootstrapping technique prepares the data to train these classifiers. Our analyses further validate that such an approach in conjunction with weak supervision using prior branching knowledge of a known language (left/right-branching) and minimal heuristics injects strong inductive bias into the parser, achieving 63.1 F<tex-math>_1</tex-math> on the English (PTB) test set. In addition, we show the effectiveness of our architecture by evaluating on treebanks for Chinese (CTB) and Japanese (KTB) and achieve new state-of-the-art results.</abstract>
       <url hash="6960059b">2022.findings-acl.101</url>
@@ -1471,7 +1471,7 @@
       <author><first>Lütfi Kerem</first><last>Senel</last></author>
       <author><first>Masoud</first><last>Jalili Sabet</last></author>
       <author><first>François</first><last>Yvon</last></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <pages>1384-1396</pages>
       <abstract>After a period of decrease, interest in word alignments is increasing again for their usefulness in domains such as typological research, cross-lingual annotation projection and machine translation. Generally, alignment algorithms only use bitext and do not make use of the fact that many parallel corpora are multiparallel. Here, we compute high-quality word alignments between multiple language pairs by considering all language pairs together. First, we create a multiparallel word alignment graph, joining all bilingual word alignment pairs in one graph. Next, we use graph neural networks (GNNs) to exploit the graph structure. Our GNN approach (i) utilizes information about the meaning, position and language of the input words, (ii) incorporates information from multiple parallel sentences, (iii) adds and removes edges from the initial alignments, and (iv) yields a prediction model that can generalize beyond the training sentences. We show that community detection algorithms can provide valuable information for multiparallel word alignment. Our method outperforms previous work on three word alignment datasets and on a downstream task.</abstract>
       <url hash="c8f2efd4">2022.findings-acl.108</url>
@@ -1599,7 +1599,7 @@
       <author><first>Sang-Woo</first><last>Lee</last></author>
       <author><first>Ji-Hoon</first><last>Kim</last></author>
       <author><first>Jung-Woo</first><last>Ha</last></author>
-      <author><first>Alice</first><last>Oh</last></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last></author>
       <pages>1487-1492</pages>
       <abstract>The retriever-reader pipeline has shown promising performance in open-domain QA but suffers from a very slow inference speed. Recently proposed question retrieval models tackle this problem by indexing question-answer pairs and searching for similar questions. These models have shown a significant increase in inference speed, but at the cost of lower QA performance compared to the retriever-reader models. This paper proposes a two-step question retrieval model, SQuID (Sequential Question-Indexed Dense retrieval) and distant supervision for training. SQuID uses two bi-encoders for question retrieval. The first-step retriever selects top-k similar questions, and the second-step retriever finds the most similar question from the top-k questions. We evaluate the performance and the computational efficiency of SQuID. The results show that SQuID significantly increases the performance of existing question retrieval models with a negligible loss on inference speed.</abstract>
       <url hash="aecb05fb">2022.findings-acl.117</url>
@@ -1698,8 +1698,8 @@
       <author><first>Tingting</first><last>Ma</last></author>
       <author><first>Huiqiang</first><last>Jiang</last></author>
       <author><first>Qianhui</first><last>Wu</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <pages>1584-1596</pages>
       <abstract>Few-shot named entity recognition (NER) systems aim at recognizing novel-class named entities based on only a few labeled examples. In this paper, we present a decomposed meta-learning approach which addresses the problem of few-shot NER by sequentially tackling few-shot span detection and few-shot entity typing using meta-learning. In particular, we take the few-shot span detection as a sequence labeling problem and train the span detector by introducing the model-agnostic meta-learning (MAML) algorithm to find a good model parameter initialization that could fast adapt to new entity classes. For few-shot entity typing, we propose MAML-ProtoNet, i.e., MAML-enhanced prototypical networks to find a good embedding space that can better distinguish text span representations from different entity classes. Extensive experiments on various benchmarks show that our approach achieves superior performance over prior methods.</abstract>
       <url hash="cc1f91ee">2022.findings-acl.124</url>
@@ -1757,8 +1757,8 @@
       <title>Transfer Learning and Prediction Consistency for Detecting Offensive Spans of Text</title>
       <author><first>Amir</first><last>Pouran Ben Veyseh</last></author>
       <author><first>Ning</first><last>Xu</last></author>
-      <author><first>Quan</first><last>Tran</last></author>
-      <author><first>Varun</first><last>Manjunatha</last></author>
+      <author id="quan-hung-tran"><first>Quan</first><last>Tran</last></author>
+      <author id="varun-manjunatha"><first>Varun</first><last>Manjunatha</last></author>
       <author><first>Franck</first><last>Dernoncourt</last></author>
       <author><first>Thien</first><last>Nguyen</last></author>
       <pages>1630-1637</pages>
@@ -1798,7 +1798,7 @@
       <author><first>Taha</first><last>Aksu</last></author>
       <author><first>Zhengyuan</first><last>Liu</last></author>
       <author><first>Min-Yen</first><last>Kan</last></author>
-      <author><first>Nancy</first><last>Chen</last></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last></author>
       <pages>1659-1671</pages>
       <abstract>Augmentation of task-oriented dialogues has followed standard methods used for plain-text such as back-translation, word-level manipulation, and paraphrasing despite its richly annotated structure. In this work, we introduce an augmentation framework that utilizes belief state annotations to match turns from various dialogues and form new synthetic dialogues in a bottom-up manner. Unlike other augmentation strategies, it operates with as few as five examples. Our augmentation strategy yields significant improvements when both adapting a DST model to a new domain, and when adapting a language model to the DST task, on evaluations with TRADE and TOD-BERT models. Further analysis shows that our model performs better on seen values during training, and it is also more robust to unseen values. We conclude that exploiting belief state annotations enhances dialogue augmentation and results in improved models in n-shot training scenarios.</abstract>
       <url hash="0e632aa9">2022.findings-acl.131</url>
@@ -1840,7 +1840,7 @@
       <author><first>Linyang</first><last>Li</last></author>
       <author><first>Cho-Jui</first><last>Hsieh</last></author>
       <author><first>Kai-Wei</first><last>Chang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>1694-1707</pages>
       <abstract>Most of the existing defense methods improve the adversarial robustness by making the models adapt to the training set augmented with some adversarial examples. However, the augmented adversarial examples may not be natural, which might distort the training distribution, resulting in inferior performance both in clean accuracy and adversarial robustness. In this study, we explore the feasibility of introducing a reweighting mechanism to calibrate the training distribution to obtain robust models. We propose to train text classifiers by a sample reweighting method in which the example weights are learned to minimize the loss of a validation set mixed with the clean examples and their adversarial ones in an online learning manner. Through extensive experiments, we show that there exists a reweighting mechanism to make the models more robust against adversarial attacks without the need to craft the adversarial examples for the entire training set.</abstract>
       <url hash="e4b68c0a">2022.findings-acl.134</url>
@@ -1866,8 +1866,8 @@
       <author><first>Xiaoguang</first><last>Li</last></author>
       <author><first>Lifeng</first><last>Shang</last></author>
       <author><first>Zhenhua</first><last>Dong</last></author>
-      <author><first>Chengjie</first><last>Sun</last></author>
-      <author><first>Bingquan</first><last>Liu</last></author>
+      <author id="cheng-jie-sun"><first>Chengjie</first><last>Sun</last></author>
+      <author id="bingquan-liu"><first>Bingquan</first><last>Liu</last></author>
       <author><first>Zhenzhou</first><last>Ji</last></author>
       <author><first>Xin</first><last>Jiang</last></author>
       <author><first>Qun</first><last>Liu</last></author>
@@ -1955,7 +1955,7 @@
     <paper id="143">
       <title>Modality-specific Learning Rates for Effective Multimodal Additive Late-fusion</title>
       <author><first>Yiqun</first><last>Yao</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>1824-1834</pages>
       <abstract>In multimodal machine learning, additive late-fusion is a straightforward approach to combine the feature representations from different modalities, in which the final prediction can be formulated as the sum of unimodal predictions. While it has been found that certain late-fusion models can achieve competitive performance with lower computational costs compared to complex multimodal interactive models, how to effectively search for a good late-fusion model is still an open question. Moreover, for different modalities, the best unimodal models may work under significantly different learning rates due to the nature of the modality and the computational flow of the model; thus, selecting a global learning rate for late-fusion models can result in a vanishing gradient for some modalities. To help address these issues, we propose a Modality-Specific Learning Rate (MSLR) method to effectively build late-fusion multimodal models from fine-tuned unimodal models. We investigate three different strategies to assign learning rates to different modalities. Our experiments show that MSLR outperforms global learning rates on multiple tasks and settings, and enables the models to effectively learn each modality.</abstract>
       <url hash="7b9dc305">2022.findings-acl.143</url>
@@ -1981,7 +1981,7 @@
       <author><first>Himani</first><last>Shrotriya</last></author>
       <author><first>Anoop</first><last>Kunchukuttan</last></author>
       <author><first>Ratish</first><last>Puduppully</last></author>
-      <author><first>Mitesh</first><last>Khapra</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh</first><last>Khapra</last></author>
       <author><first>Pratyush</first><last>Kumar</last></author>
       <pages>1849-1863</pages>
       <abstract>In this paper, we study pre-trained sequence-to-sequence models for a group of related languages, with a focus on Indic languages. We present IndicBART, a multilingual, sequence-to-sequence pre-trained model focusing on 11 Indic languages and English. IndicBART utilizes the orthographic similarity between Indic scripts to improve transfer learning between similar Indic languages. We evaluate IndicBART on two NLG tasks: Neural Machine Translation (NMT) and extreme summarization. Our experiments on NMT and extreme summarization show that a model specific to related languages like IndicBART is competitive with large pre-trained models like mBART50 despite being significantly smaller. It also performs well on very low-resource translation scenarios where languages are not included in pre-training or fine-tuning. Script sharing, multilingual training, and better utilization of limited model capacity contribute to the good performance of the compact IndicBART model.</abstract>
@@ -1996,7 +1996,7 @@
       <author><first>Gustavo</first><last>Hernandez Abrego</last></author>
       <author><first>Noah</first><last>Constant</last></author>
       <author><first>Ji</first><last>Ma</last></author>
-      <author><first>Keith</first><last>Hall</last></author>
+      <author id="keith-hall"><first>Keith</first><last>Hall</last></author>
       <author><first>Daniel</first><last>Cer</last></author>
       <author><first>Yinfei</first><last>Yang</last></author>
       <pages>1864-1874</pages>
@@ -2036,7 +2036,7 @@
       <author><first>Wenhao</first><last>Yu</last></author>
       <author><first>Chenguang</first><last>Zhu</last></author>
       <author><first>Lianhui</first><last>Qin</last></author>
-      <author id="zhihan-zhang"><first>Zhihan</first><last>Zhang</last></author>
+      <author><first>Zhihan</first><last>Zhang</last></author>
       <author><first>Tong</first><last>Zhao</last></author>
       <author><first>Meng</first><last>Jiang</last></author>
       <pages>1896-1906</pages>
@@ -2094,7 +2094,7 @@
       <author><first>Pouya</first><last>Pezeshkpour</last></author>
       <author><first>Sarthak</first><last>Jain</last></author>
       <author><first>Sameer</first><last>Singh</last></author>
-      <author><first>Byron</first><last>Wallace</last></author>
+      <author id="byron-c-wallace"><first>Byron</first><last>Wallace</last></author>
       <pages>1934-1946</pages>
       <abstract>Training the deep neural networks that dominate NLP requires large datasets. These are often collected automatically or via crowdsourcing, and may exhibit systematic biases or annotation artifacts. By the latter we mean spurious correlations between inputs and outputs that do not represent a generally held causal relationship between features and classes; models that exploit such correlations may appear to perform a given task well, but fail on out of sample data. In this paper, we evaluate use of different attribution methods for aiding identification of training data artifacts. We propose new hybrid approaches that combine saliency maps (which highlight important input features) with instance attribution methods (which retrieve training samples influential to a given prediction). We show that this proposed training-feature attribution can be used to efficiently uncover artifacts in training data when a challenging validation set is available. We also carry out a small user study to evaluate whether these methods are useful to NLP researchers in practice, with promising results. We make code for all methods and experiments in this paper available.</abstract>
       <url hash="648a8d6a">2022.findings-acl.153</url>
@@ -2123,7 +2123,7 @@
       <author><first>Srikanth</first><last>Doss</last></author>
       <author><first>Rishita</first><last>Anubhai</last></author>
       <author><first>Sunil</first><last>Mallya</last></author>
-      <author><first>Yaser</first><last>Al-Onaizan</last></author>
+      <author id="yaser-al-onaizan"><first>Yaser</first><last>Al-Onaizan</last></author>
       <author><first>Dan</first><last>Roth</last></author>
       <pages>1956-1971</pages>
       <abstract>We study the problem of few shot learning for named entity recognition. Specifically, we leverage the semantic information in the names of the labels as a way of giving the model additional signal and enriched priors. We propose a neural architecture that consists of two BERT encoders, one to encode the document and its tokens and another one to encode each of the labels in natural language format. Our model learns to match the representations of named entities computed by the first encoder with label representations computed by the second encoder. The label semantics signal is shown to support improved state-of-the-art results in multiple few shot NER benchmarks and on-par performance in standard benchmarks. Our model is especially effective in low resource settings.</abstract>
@@ -2153,9 +2153,9 @@
       <author><first>Karthik</first><last>Gopalakrishnan</last></author>
       <author id="yang-liu-icsi"><first>Yang</first><last>Liu</last></author>
       <author><first>Robinson</first><last>Piramuthu</last></author>
-      <author><first>Gokhan</first><last>Tur</last></author>
+      <author id="gokhan-tur"><first>Gokhan</first><last>Tur</last></author>
       <author><first>Devi</first><last>Parikh</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></author>
       <pages>1984-1994</pages>
       <abstract>Interactive robots navigating photo-realistic environments need to be trained to effectively leverage and handle the dynamic nature of dialogue in addition to the challenges underlying vision-and-language navigation (VLN). In this paper, we present VISITRON, a multi-modal Transformer-based navigator better suited to the interactive regime inherent to Cooperative Vision-and-Dialog Navigation (CVDN). VISITRON is trained to: i) identify and associate object-level concepts and semantics between the environment and dialogue history, ii) identify when to interact vs. navigate via imitation learning of a binary classification head. We perform extensive pre-training and fine-tuning ablations with VISITRON to gain empirical insights and improve performance on CVDN. VISITRON’s ability to identify when to interact leads to a natural generalization of the game-play mode introduced by Roman et al. (2020) for enabling the use of such models in different environments. VISITRON is competitive with models on the static CVDN leaderboard and attains state-of-the-art performance on the Success weighted by Path Length (SPL) metric.</abstract>
       <url hash="2fe39c00">2022.findings-acl.157</url>
@@ -2244,7 +2244,7 @@
       <title>Richer Countries and Richer Representations</title>
       <author><first>Kaitlyn</first><last>Zhou</last></author>
       <author><first>Kawin</first><last>Ethayarajh</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <pages>2074-2085</pages>
       <abstract>We examine whether some countries are more richly represented in embedding space than others. We find that countries whose names occur with low frequency in training corpora are more likely to be tokenized into subwords, are less semantically distinct in embedding space, and are less likely to be correctly predicted: e.g., Ghana (the correct answer and in-vocabulary) is not predicted for, “The country producing the most cocoa is [MASK].”. Although these performance discrepancies and representational harms are due to frequency, we find that frequency is highly correlated with a country’s GDP; thus perpetuating historic power and wealth inequalities. We analyze the effectiveness of mitigation strategies; recommend that researchers report training word frequencies; and recommend future work for the community to define and design representational guarantees.</abstract>
       <url hash="e7c3f5c9">2022.findings-acl.164</url>
@@ -2260,7 +2260,7 @@
       <author><first>Jason</first><last>Phang</last></author>
       <author><first>Jana</first><last>Thompson</last></author>
       <author><first>Phu Mon</first><last>Htut</last></author>
-      <author><first>Samuel</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel</first><last>Bowman</last></author>
       <pages>2086-2105</pages>
       <abstract>It is well documented that NLP models learn social biases, but little work has been done on how these biases manifest in model outputs for applied tasks like question answering (QA). We introduce the Bias Benchmark for QA (BBQ), a dataset of question-sets constructed by the authors that highlight attested social biases against people belonging to protected classes along nine social dimensions relevant for U.S. English-speaking contexts. Our task evaluate model responses at two levels: (i) given an under-informative context, we test how strongly responses reflect social biases, and (ii) given an adequately informative context, we test whether the model’s biases override a correct answer choice. We find that models often rely on stereotypes when the context is under-informative, meaning the model’s outputs consistently reproduce harmful biases in this setting. Though models are more accurate when the context provides an informative answer, they still rely on stereotypes and average up to 3.4 percentage points higher accuracy when the correct answer aligns with a social bias than when it conflicts, with this difference widening to over 5 points on examples targeting gender for most models tested.</abstract>
       <url hash="a354af36">2022.findings-acl.165</url>
@@ -2272,9 +2272,9 @@
       <title>Zero-shot Learning for Grapheme to Phoneme Conversion with Language Ensemble</title>
       <author><first>Xinjian</first><last>Li</last></author>
       <author><first>Florian</first><last>Metze</last></author>
-      <author><first>David</first><last>Mortensen</last></author>
+      <author id="david-r-mortensen"><first>David</first><last>Mortensen</last></author>
       <author><first>Shinji</first><last>Watanabe</last></author>
-      <author><first>Alan</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan</first><last>Black</last></author>
       <pages>2106-2115</pages>
       <abstract>Grapheme-to-Phoneme (G2P) has many applications in NLP and speech fields. Most existing work focuses heavily on languages with abundant training datasets, which limits the scope of target languages to less than 100 languages. This work attempts to apply zero-shot learning to approximate G2P models for all low-resource and endangered languages in Glottolog (about 8k languages). For any unseen target language, we first build the phylogenetic tree (i.e. language family tree) to identify top-<tex-math>k</tex-math> nearest languages for which we have training sets. Then we run models of those languages to obtain a hypothesis set, which we combine into a confusion network to propose a most likely hypothesis as an approximation to the target language. We test our approach on over 600 unseen languages and demonstrate it significantly outperforms baselines.</abstract>
       <url hash="1ce8da67">2022.findings-acl.166</url>
@@ -2290,7 +2290,7 @@
       <author><first>Changbing</first><last>Yang</last></author>
       <author><first>Edith</first><last>Coates</last></author>
       <author><first>Garrett</first><last>Nicolai</last></author>
-      <author><first>Miikka</first><last>Silfverberg</last></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last></author>
       <pages>2116-2130</pages>
       <abstract>Recent progress in NLP is driven by pretrained models leveraging massive datasets and has predominantly benefited the world’s political and economic superpowers. Technologically underserved languages are left behind because they lack such resources. Hundreds of underserved languages, nevertheless, have available data sources in the form of interlinear glossed text (IGT) from language documentation efforts. IGT remains underutilized in NLP work, perhaps because its annotations are only semi-structured and often language-specific. With this paper, we make the case that IGT data can be leveraged successfully provided that target language expertise is available. We specifically advocate for collaboration with documentary linguists. Our paper provides a roadmap for successful projects utilizing IGT data: (1) It is essential to define which NLP tasks can be accomplished with the given IGT data and how these will benefit the speech community. (2) Great care and target language expertise is required when converting the data into structured formats commonly employed in NLP. (3) Task-specific and user-specific evaluation can help to ascertain that the tools which are created benefit the target language speech community. We illustrate each step through a case study on developing a morphological reinflection system for the Tsimchianic language Gitksan.</abstract>
       <url hash="32ecd05a">2022.findings-acl.167</url>
@@ -2364,7 +2364,7 @@
     <paper id="173">
       <title>The impact of lexical and grammatical processing on generating code from natural language</title>
       <author><first>Nathanaël</first><last>Beau</last></author>
-      <author><first>Benoit</first><last>Crabbé</last></author>
+      <author id="benoit-crabbe"><first>Benoit</first><last>Crabbé</last></author>
       <pages>2204-2214</pages>
       <abstract>Considering the seq2seq architecture of Yin and Neubig (2018) for natural language to code translation, we identify four key components of importance: grammatical constraints, lexical preprocessing, input representations, and copy mechanisms. To study the impact of these components, we use a state-of-the-art architecture that relies on BERT encoder and a grammar-based decoder for which a formalization is provided. The paper highlights the importance of the lexical substitution component in the current natural language to code systems.</abstract>
       <url hash="28af5fc0">2022.findings-acl.173</url>
@@ -2417,9 +2417,9 @@
     <paper id="177">
       <title><fixed-case>C</fixed-case>hart<fixed-case>QA</fixed-case>: A Benchmark for Question Answering about Charts with Visual and Logical Reasoning</title>
       <author><first>Ahmed</first><last>Masry</last></author>
-      <author><first>Do Xuan</first><last>Long</last></author>
+      <author id="xuan-long-do"><first>Do Xuan</first><last>Long</last></author>
       <author><first>Jia Qing</first><last>Tan</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <author><first>Enamul</first><last>Hoque</last></author>
       <pages>2263-2279</pages>
       <abstract>Charts are very popular for analyzing data. When exploring charts, people often ask a variety of complex reasoning questions that involve several logical and arithmetic operations. They also commonly refer to visual features of a chart in their questions. However, most existing datasets do not focus on such complex reasoning questions as their questions are template-based and answers come from a fixed-vocabulary. In this work, we present a large-scale benchmark covering 9.6K human-written questions as well as 23.1K questions generated from human-written chart summaries. To address the unique challenges in our benchmark involving visual and logical reasoning over charts, we present two transformer-based models that combine visual features and the data table of the chart in a unified way to answer questions. While our models achieve the state-of-the-art results on the previous datasets as well as on our benchmark, the evaluation also reveals several challenges in answering complex reasoning questions.</abstract>
@@ -2457,7 +2457,7 @@
     </paper>
     <paper id="180">
       <title>Phoneme transcription of endangered languages: an evaluation of recent <fixed-case>ASR</fixed-case> architectures in the single speaker scenario</title>
-      <author><first>Gilles</first><last>Boulianne</last></author>
+      <author id="gilles-boulianne"><first>Gilles</first><last>Boulianne</last></author>
       <pages>2301-2308</pages>
       <abstract>Transcription is often reported as the bottleneck in endangered language documentation, requiring large efforts from scarce speakers and transcribers. In general, automatic speech recognition (ASR) can be accurate enough to accelerate transcription only if trained on large amounts of transcribed data. However, when a single speaker is involved, several studies have reported encouraging results for phonetic transcription even with small amounts of training. Here we expand this body of work on speaker-dependent transcription by comparing four ASR approaches, notably recent transformer and pretrained multilingual models, on a common dataset of 11 languages. To automate data preparation, training and evaluation steps, we also developed a phoneme recognition setup which handles morphologically complex languages and writing systems for which no pronunciation dictionary exists. We find that fine-tuning a multilingual pretrained model yields an average phoneme error rate (PER) of 15% for 6 languages with 99 minutes or less of transcribed data for training. For the 5 languages with between 100 and 192 minutes of training, we achieved a PER of 8.4% or less. These results on a number of varied languages suggest that ASR can now significantly reduce transcription efforts in the speaker-dependent situation common in endangered language work.</abstract>
       <url hash="60fa5a43">2022.findings-acl.180</url>
@@ -2480,7 +2480,7 @@
       <title>Combining Static and Contextualised Multilingual Embeddings</title>
       <author><first>Katharina</first><last>Hämmerl</last></author>
       <author><first>Jindřich</first><last>Libovický</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>2316-2329</pages>
       <abstract>Static and contextual multilingual embeddings have complementary strengths. Static embeddings, while less expressive than contextual language models, can be more straightforwardly aligned across multiple languages. We combine the strengths of static and contextual models to improve multilingual representations. We extract static embeddings for 40 languages from XLM-R, validate those embeddings with cross-lingual word retrieval, and then align them using VecMap. This results in high-quality, highly multilingual static embeddings. Then we apply a novel continued pre-training approach to XLM-R, leveraging the high quality alignment of our static embeddings to better align the representation space of XLM-R. We show positive results for multiple complex semantic tasks. We release the static embeddings and the continued pre-training code. Unlike most previous work, our continued pre-training approach does not require parallel text.</abstract>
       <url hash="3bea225f">2022.findings-acl.182</url>
@@ -2504,7 +2504,7 @@
       <title>Square One Bias in <fixed-case>NLP</fixed-case>: Towards a Multi-Dimensional Exploration of the Research Manifold</title>
       <author><first>Sebastian</first><last>Ruder</last></author>
       <author><first>Ivan</first><last>Vulić</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>2340-2354</pages>
       <abstract>The prototypical NLP experiment trains a standard architecture on labeled English data and optimizes for accuracy, without accounting for other dimensions such as fairness, interpretability, or computational efficiency. We show through a manual classification of recent NLP research papers that this is indeed the case and refer to it as the square one experimental setup. We observe that NLP research often goes beyond the square one setup, e.g, focusing not only on accuracy, but also on fairness or interpretability, but typically only along a single dimension. Most work targeting multilinguality, for example, considers only accuracy; most work on fairness or interpretability considers only English; and so on. Such one-dimensionality of most research means we are only exploring a fraction of the NLP research search space. We provide historical and recent examples of how the square one bias has led researchers to draw false conclusions or make unwise choices, point to promising yet unexplored directions on the research manifold, and make practical recommendations to enable more multi-dimensional research. We open-source the results of our annotations to enable further analysis.</abstract>
       <url hash="291713f7">2022.findings-acl.184</url>
@@ -2516,7 +2516,7 @@
       <author><first>Edoardo</first><last>Manino</last></author>
       <author><first>Julia</first><last>Rozanova</last></author>
       <author><first>Danilo</first><last>Carvalho</last></author>
-      <author><first>Andre</first><last>Freitas</last></author>
+      <author id="andre-freitas"><first>Andre</first><last>Freitas</last></author>
       <author><first>Lucas</first><last>Cordeiro</last></author>
       <pages>2355-2366</pages>
       <abstract>Metamorphic testing has recently been used to check the safety of neural NLP models. Its main advantage is that it does not rely on a ground truth to generate test cases. However, existing studies are mostly concerned with robustness-like metamorphic relations, limiting the scope of linguistic properties they can test. We propose three new classes of metamorphic relations, which address the properties of systematicity, compositionality and transitivity. Unlike robustness, our relations are defined over multiple source inputs, thus increasing the number of test cases that we can produce by a polynomial factor. With them, we test the internal consistency of state-of-the-art NLP models, and show that they do not always behave according to their expected linguistic properties. Lastly, we introduce a novel graphical notation that efficiently summarises the inner structure of metamorphic relations.</abstract>
@@ -2529,12 +2529,12 @@
     <paper id="186">
       <title>Improving Neural Political Statement Classification with Class Hierarchical Information</title>
       <author><first>Erenay</first><last>Dayanik</last></author>
-      <author><first>Andre</first><last>Blessing</last></author>
+      <author id="andre-blessing"><first>Andre</first><last>Blessing</last></author>
       <author><first>Nico</first><last>Blokker</last></author>
       <author><first>Sebastian</first><last>Haunss</last></author>
       <author><first>Jonas</first><last>Kuhn</last></author>
       <author><first>Gabriella</first><last>Lapesa</last></author>
-      <author><first>Sebastian</first><last>Pado</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Pado</last></author>
       <pages>2367-2382</pages>
       <abstract>Many tasks in text-based computational social science (CSS) involve the classification of political statements into categories based on a domain-specific codebook. In order to be useful for CSS analysis, these categories must be fine-grained. The typically skewed distribution of fine-grained categories, however, results in a challenging classification problem on the NLP side. This paper proposes to make use of the hierarchical relations among categories typically present in such codebooks:e.g., markets and taxation are both subcategories of economy, while borders is a subcategory of security. We use these ontological relations as prior knowledge to establish additional constraints on the learned model, thusimproving performance overall and in particular for infrequent categories. We evaluate several lightweight variants of this intuition by extending state-of-the-art transformer-based textclassifiers on two datasets and multiple languages. We find the most consistent improvement for an approach based on regularization.</abstract>
       <url hash="04b06c85">2022.findings-acl.186</url>
@@ -2579,7 +2579,7 @@
       <author><first>Yilong</first><last>He</last></author>
       <author><first>Yuan</first><last>Ni</last></author>
       <author><first>Guotong</first><last>Xie</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <author><first>Xipeng</first><last>Qiu</last></author>
       <pages>2409-2421</pages>
       <abstract>Early exiting allows instances to exit at different layers according to the estimation of difficulty. Previous works usually adopt heuristic metrics such as the entropy of internal outputs to measure instance difficulty, which suffers from generalization and threshold-tuning. In contrast, learning to exit, or learning to predict instance difficulty is a more appealing way. Though some effort has been devoted to employing such “learn-to-exit” modules, it is still unknown whether and how well the instance difficulty can be learned. As a response, we first conduct experiments on the learnability of instance difficulty, which demonstrates that modern neural models perform poorly on predicting instance difficulty. Based on this observation, we propose a simple-yet-effective Hash-based Early Exiting approach HashEE) that replaces the learn-to-exit modules with hash functions to assign each token to a fixed exiting layer. Different from previous methods, HashEE requires no internal classifiers nor extra parameters, and therefore is more efficient. HashEE can be used in various tasks (including language understanding and generation) and model architectures such as seq2seq models. Experimental results on classification, regression, and generation tasks demonstrate that HashEE can achieve higher performance with fewer FLOPs and inference time compared with previous state-of-the-art early exiting methods.</abstract>
@@ -2589,7 +2589,7 @@
     </paper>
     <paper id="190">
       <title>Auxiliary tasks to boost Biaffine Semantic Dependency Parsing</title>
-      <author><first>Marie</first><last>Candito</last></author>
+      <author id="marie-candito"><first>Marie</first><last>Candito</last></author>
       <pages>2422-2429</pages>
       <abstract>The biaffine parser of (CITATION) was successfully extended to semantic dependency parsing (SDP) (CITATION). Its performance on graphs is surprisingly high given that, without the constraint of producing a tree, all arcs for a given sentence are predicted independently from each other (modulo a shared representation of tokens).To circumvent such an independence of decision, while retaining the <tex-math>O(n^2)</tex-math> complexity and highly parallelizable architecture, we propose to use simple auxiliary tasks that introduce some form of interdependence between arcs. Experiments on the three English acyclic datasets of SemEval-2015 task 18 (CITATION), and on French deep syntactic cyclic graphs (CITATION) show modest but systematic performance gains on a near-state-of-the-art baseline using transformer-based contextualized representations. This provides a simple and robust method to boost SDP performance.</abstract>
       <url hash="1c1bbd82">2022.findings-acl.190</url>
@@ -2612,7 +2612,7 @@
     <paper id="192">
       <title>Improved Multi-label Classification under Temporal Concept Drift: Rethinking Group-Robust Algorithms in a Label-Wise Setting</title>
       <author><first>Ilias</first><last>Chalkidis</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>2441-2454</pages>
       <abstract>In document classification for, e.g., legal and biomedical text, we often deal with hundreds of classes, including very infrequent ones, as well as temporal concept drift caused by the influence of real world events, e.g., policy changes, conflicts, or pandemics. Class imbalance and drift can sometimes be mitigated by resampling the training data to simulate (or compensate for) a known target distribution, but what if the target distribution is determined by unknown future events? Instead of simply resampling uniformly to hedge our bets, we focus on the underlying optimization algorithms used to train such document classifiers and evaluate several group-robust optimization algorithms, initially proposed to mitigate group-level disparities. Reframing group-robust algorithms as adaptation algorithms under concept drift, we find that Invariant Risk Minimization and Spectral Decoupling outperform sampling-based approaches to class imbalance and concept drift, and lead to much better performance on minority classes. The effect is more pronounced the larger the label set.</abstract>
       <url hash="4bcf8dd4">2022.findings-acl.192</url>
@@ -2640,7 +2640,7 @@
       <title>Why don’t people use character-level machine translation?</title>
       <author><first>Jindřich</first><last>Libovický</last></author>
       <author><first>Helmut</first><last>Schmid</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>2470-2485</pages>
       <abstract>We present a literature and empirical survey that critically assesses the state of the art in character-level modeling for machine translation (MT). Despite evidence in the literature that character-level systems are comparable with subword systems, they are virtually never used in competitive setups in WMT competitions. We empirically show that even with recent modeling innovations in character-level natural language processing, character-level MT systems still struggle to match their subword-based counterparts. Character-level MT systems show neither better domain robustness, nor better morphological generalization, despite being often so motivated. However, we are able to show robustness towards source side noise and that translation quality does not degrade with increasing beam size at decoding time.</abstract>
       <url hash="cb530acb">2022.findings-acl.194</url>
@@ -2711,7 +2711,7 @@
       <title>Single Model Ensemble for Subword Regularized Models in Low-Resource Machine Translation</title>
       <author><first>Sho</first><last>Takase</last></author>
       <author><first>Tatsuya</first><last>Hiraoka</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <pages>2536-2541</pages>
       <abstract>Subword regularizations use multiple subword segmentations during training to improve the robustness of neural machine translation models. In previous subword regularizations, we use multiple segmentations in the training process but use only one segmentation in the inference. In this study, we propose an inference strategy to address this discrepancy. The proposed strategy approximates the marginalized likelihood by using multiple segmentations including the most plausible segmentation and several sampled segmentations. Because the proposed strategy aggregates predictions from several segmentations, we can regard it as a single model ensemble that does not require any additional cost for training. Experimental results show that the proposed strategy improves the performance of models trained with subword regularization in low-resource machine translation tasks.</abstract>
       <url hash="16e62edb">2022.findings-acl.199</url>
@@ -2723,7 +2723,7 @@
       <author><first>Christian</first><last>Herold</last></author>
       <author><first>Jan</first><last>Rosendahl</last></author>
       <author><first>Joris</first><last>Vanvinckenroye</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>2542-2551</pages>
       <abstract>The filtering and/or selection of training data is one of the core aspects to be considered when building a strong machine translation system. In their influential work, Khayrallah and Koehn (2018) investigated the impact of different types of noise on the performance of machine translation systems. In the same year the WMT introduced a shared task on parallel corpus filtering, which went on to be repeated in the following years, and resulted in many different filtering approaches being proposed. In this work we aim to combine the recent achievements in data filtering with the original analysis of Khayrallah and Koehn (2018) and investigate whether state-of-the-art filtering systems are capable of removing all the suggested noise types. We observe that most of these types of noise can be detected with an accuracy of over 90% by modern filtering systems when operating in a well studied high resource setting. However, we also find that when confronted with more refined noise categories or when working with a less common language pair, the performance of the filtering systems is far from optimal, showing that there is still room for improvement in this area of research.</abstract>
       <url hash="e5c38443">2022.findings-acl.200</url>
@@ -2774,7 +2774,7 @@
       <author><first>Hongyuan</first><last>Lu</last></author>
       <author><first>Wai</first><last>Lam</last></author>
       <author><first>Hong</first><last>Cheng</last></author>
-      <author><first>Helen</first><last>Meng</last></author>
+      <author id="helen-meng"><first>Helen</first><last>Meng</last></author>
       <pages>2591-2601</pages>
       <abstract>Dialogue agents can leverage external textual knowledge to generate responses of a higher quality. To our best knowledge, most existing works on knowledge grounded dialogue settings assume that the user intention is always answerable. Unfortunately, this is impractical as there is no guarantee that the knowledge retrievers could always retrieve the desired knowledge. Therefore, this is crucial to incorporate fallback responses to respond to unanswerable contexts appropriately while responding to the answerable contexts in an informative manner. We propose a novel framework that automatically generates a control token with the generator to bias the succeeding response towards informativeness for answerable contexts and fallback for unanswerable contexts in an end-to-end manner. Since no existing knowledge grounded dialogue dataset considers this aim, we augment the existing dataset with unanswerable contexts to conduct our experiments. Automatic and human evaluation results indicate that naively incorporating fallback responses with controlled text generation still hurts informativeness for answerable context. In contrast, our proposed framework effectively mitigates this problem while still appropriately presenting fallback responses to unanswerable contexts. Such a framework also reduces the extra burden of the additional classifier and the overheads introduced in the previous works, which operates in a pipeline manner.</abstract>
       <url hash="5c880298">2022.findings-acl.204</url>
@@ -2821,7 +2821,7 @@
       <author><first>Xinchao</first><last>Xu</last></author>
       <author><first>Zhibin</first><last>Gou</last></author>
       <author><first>Wenquan</first><last>Wu</last></author>
-      <author><first>Zheng-Yu</first><last>Niu</last></author>
+      <author id="zheng-yu-niu"><first>Zheng-Yu</first><last>Niu</last></author>
       <author><first>Hua</first><last>Wu</last></author>
       <author><first>Haifeng</first><last>Wang</last></author>
       <author><first>Shihang</first><last>Wang</last></author>
@@ -2838,7 +2838,7 @@
       <author><first>Michele</first><last>Mastromattei</last></author>
       <author><first>Francesca</first><last>Fallucchi</last></author>
       <author><first>Noemi</first><last>Scarpato</last></author>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last></author>
       <pages>2651-2662</pages>
       <abstract>Word embeddings are powerful dictionaries, which may easily capture language variations. However, these dictionaries fail to give sense to rare words, which are surprisingly often covered by traditional dictionaries. In this paper, we propose to use definitions retrieved in traditional dictionaries to produce word embeddings for rare words. For this purpose, we introduce two methods: Definition Neural Network (DefiNNet) and Define BERT (DefBERT). In our experiments, DefiNNet and DefBERT significantly outperform state-of-the-art as well as baseline methods devised for producing embeddings of unknown words. In fact, DefiNNet significantly outperforms FastText, which implements a method for the same task-based on n-grams, and DefBERT significantly outperforms the BERT method for OOV words. Then, definitions in traditional dictionaries are useful to build word embeddings for rare words.</abstract>
       <url hash="a0d5de34">2022.findings-acl.208</url>
@@ -2926,8 +2926,8 @@
     </paper>
     <paper id="215">
       <title>Graph Refinement for Coreference Resolution</title>
-      <author><first>Lesly</first><last>Miculicich</last></author>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="lesly-miculicich-werlen"><first>Lesly</first><last>Miculicich</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <pages>2732-2742</pages>
       <abstract>The state-of-the-art models for coreference resolution are based on independent mention pair-wise decisions. We propose a modelling approach that learns coreference at the document-level and takes global decisions. For this purpose, we model coreference links in a graph structure where the nodes are tokens in the text, and the edges represent the relationship between them. Our model predicts the graph in a non-autoregressive manner, then iteratively refines it based on previous predictions, allowing global dependencies between decisions. The experimental results show improvements over various baselines, reinforcing the hypothesis that document-level information improves conference resolution.</abstract>
       <url hash="df6ef574">2022.findings-acl.215</url>
@@ -3179,7 +3179,7 @@
       <title>Modular and Parameter-Efficient Multimodal Fusion with Prompting</title>
       <author><first>Sheng</first><last>Liang</last></author>
       <author><first>Mengjie</first><last>Zhao</last></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <pages>2976-2985</pages>
       <abstract>Recent research has made impressive progress in large-scale multimodal pre-training. In the context of the rapid growth of model size, it is necessary to seek efficient and flexible methods other than finetuning. In this paper, we propose to use prompt vectors to align the modalities. Our method achieves comparable performance to several other multimodal fusion methods in low-resource settings. We further show that our method is modular and parameter-efficient for processing tasks involving two or more data modalities.</abstract>
       <url hash="5cd11a03">2022.findings-acl.234</url>
@@ -3191,7 +3191,7 @@
       <title>Synchronous Refinement for Neural Machine Translation</title>
       <author><first>Kehai</first><last>Chen</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Rui</first><last>Wang</last></author>
       <author><first>Min</first><last>Zhang</last></author>
       <pages>2986-2996</pages>
@@ -3254,7 +3254,7 @@
     <paper id="240">
       <title>Factual Consistency of Multilingual Pretrained Language Models</title>
       <author><first>Constanza</first><last>Fierro</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>3046-3052</pages>
       <abstract>Pretrained language models can be queried for factual knowledge, with potential applications in knowledge base acquisition and tasks that require inference. However, for that, we need to know how reliable this knowledge is, and recent work has shown that monolingual English language models lack consistency when predicting factual knowledge, that is, they fill-in-the-blank differently for paraphrases describing the same fact. In this paper, we extend the analysis of consistency to a multilingual setting. We introduce a resource, mParaRel, and investigate (i) whether multilingual language models such as mBERT and XLM-R are more consistent than their monolingual counterparts;and (ii) if such models are equally consistent across languages. We find that mBERT is as inconsistent as English BERT in English paraphrases, but that both mBERT and XLM-R exhibit a high degree of inconsistency in English and even more so for all the other 45 languages.</abstract>
       <url hash="ca0c5ebb">2022.findings-acl.240</url>
@@ -3375,7 +3375,7 @@
     <paper id="249">
       <title>The Inefficiency of Language Models in Scholarly Retrieval: An Experimental Walk-through</title>
       <author><first>Shruti</first><last>Singh</last></author>
-      <author id="mayank-singh"><first>Mayank</first><last>Singh</last></author>
+      <author><first>Mayank</first><last>Singh</last></author>
       <pages>3153-3173</pages>
       <abstract>Language models are increasingly becoming popular in AI-powered scientific IR systems. This paper evaluates popular scientific language models in handling (i) short-query texts and (ii) textual neighbors. Our experiments showcase the inability to retrieve relevant documents for a short-query text even under the most relaxed conditions. Additionally, we leverage textual neighbors, generated by small perturbations to the original text, to demonstrate that not all perturbations lead to close neighbors in the embedding space. Further, an exhaustive categorization yields several classes of orthographically and semantically related, partially related and completely unrelated neighbors. Retrieval performance turns out to be more influenced by the surface form rather than the semantics of the text.</abstract>
       <url hash="7e85d001">2022.findings-acl.249</url>
@@ -3384,7 +3384,7 @@
     </paper>
     <paper id="250">
       <title>Fusing Heterogeneous Factors with Triaffine Mechanism for Nested Named Entity Recognition</title>
-      <author id="zheng-yuan"><first>Zheng</first><last>Yuan</last></author>
+      <author><first>Zheng</first><last>Yuan</last></author>
       <author><first>Chuanqi</first><last>Tan</last></author>
       <author><first>Songfang</first><last>Huang</last></author>
       <author><first>Fei</first><last>Huang</last></author>
@@ -3493,11 +3493,11 @@
     <paper id="257">
       <title>One Agent To Rule Them All: Towards Multi-agent Conversational <fixed-case>AI</fixed-case></title>
       <author><first>Christopher</first><last>Clarke</last></author>
-      <author><first>Joseph</first><last>Peper</last></author>
+      <author id="joseph-j-peper"><first>Joseph</first><last>Peper</last></author>
       <author><first>Karthik</first><last>Krishnamurthy</last></author>
       <author><first>Walter</first><last>Talamonti</last></author>
       <author><first>Kevin</first><last>Leach</last></author>
-      <author><first>Walter</first><last>Lasecki</last></author>
+      <author id="walter-lasecki"><first>Walter</first><last>Lasecki</last></author>
       <author><first>Yiping</first><last>Kang</last></author>
       <author><first>Lingjia</first><last>Tang</last></author>
       <author><first>Jason</first><last>Mars</last></author>
@@ -3514,7 +3514,7 @@
       <author><first>Sho</first><last>Takase</last></author>
       <author><first>Kei</first><last>Uchiumi</last></author>
       <author><first>Atsushi</first><last>Keyaki</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <pages>3268-3275</pages>
       <abstract>We present two simple modifications for word-level perturbation: Word Replacement considering Length (WR-L) and Compositional Word Replacement (CWR).In conventional word replacement, a word in an input is replaced with a word sampled from the entire vocabulary, regardless of the length and context of the target word.WR-L considers the length of a target word by sampling words from the Poisson distribution.CWR considers the compositional candidates by restricting the source of sampling to related words that appear in subword regularization. Experimental results showed that the combination of WR-L and CWR improved the performance of text classification and machine translation.</abstract>
       <url hash="eb299be3">2022.findings-acl.258</url>
@@ -3538,7 +3538,7 @@
       <title>Controlling the Focus of Pretrained Language Generation Models</title>
       <author><first>Jiabao</first><last>Ji</last></author>
       <author><first>Yoon</first><last>Kim</last></author>
-      <author><first>James</first><last>Glass</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
       <author><first>Tianxing</first><last>He</last></author>
       <pages>3291-3306</pages>
       <abstract>The finetuning of pretrained transformer-based language generation models are typically conducted in an end-to-end manner, where the model learns to attend to relevant parts of the input by itself. However, there does not exist a mechanism to directly control the model’s focus. This work aims to develop a control mechanism by which a user can select spans of context as “highlights” for the model to focus on, and generate relevant output. To achieve this goal, we augment a pretrained model with trainable “focus vectors” that are directly applied to the model’s embeddings, while the model itself is kept fixed. These vectors, trained on automatic annotations derived from attribution methods, act as indicators for context importance. We test our approach on two core generation tasks: dialogue response generation and abstractive summarization. We also collect evaluation data where the highlight-generation pairs are annotated by humans. Our experiments show that the trained focus vectors are effective in steering the model to generate outputs that are relevant to user-selected highlights.</abstract>
@@ -3551,7 +3551,7 @@
       <author><first>Hayate</first><last>Iso</last></author>
       <author><first>Xiaolan</first><last>Wang</last></author>
       <author><first>Stefanos</first><last>Angelidis</last></author>
-      <author><first>Yoshihiko</first><last>Suhara</last></author>
+      <author id="yoshi-suhara"><first>Yoshihiko</first><last>Suhara</last></author>
       <pages>3307-3324</pages>
       <abstract>Opinion summarization focuses on generating summaries that reflect popular subjective information expressed in multiple online reviews. While generated summaries offer general and concise information about a particular hotel or product, the information may be insufficient to help the user compare multiple different choices. Thus, the user may still struggle with the question “Which one should I pick?” In this paper, we propose the comparative opinion summarization task, which aims at generating two contrastive summaries and one common summary from two different candidate sets of reviews. We develop a comparative summarization framework CoCoSum, which consists of two base summarization models that jointly generate contrastive and common summaries. Experimental results on a newly created benchmark CoCoTrip show that CoCoSum can produce higher-quality contrastive and common summaries than state-of-the-art opinion summarization models. The dataset and code are available at <url>https://github.com/megagonlabs/cocosum</url></abstract>
       <url hash="547a496b">2022.findings-acl.261</url>
@@ -3588,10 +3588,10 @@
     <paper id="264">
       <title>From Stance to Concern: Adaptation of Propositional Analysis to New Tasks and Domains</title>
       <author><first>Brodie</first><last>Mather</last></author>
-      <author><first>Bonnie</first><last>Dorr</last></author>
+      <author id="bonnie-dorr"><first>Bonnie</first><last>Dorr</last></author>
       <author><first>Adam</first><last>Dalton</last></author>
-      <author><first>William</first><last>de Beaumont</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="william-de-beaumont"><first>William</first><last>de Beaumont</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <author><first>Sonja</first><last>Schmer-Galunder</last></author>
       <pages>3354-3367</pages>
       <abstract>We present a generalized paradigm for adaptation of propositional analysis (predicate-argument pairs) to new tasks and domains. We leverage an analogy between stances (belief-driven sentiment) and concerns (topical issues with moral dimensions/endorsements) to produce an explanatory representation. A key contribution is the combination of semi-automatic resource building for extraction of domain-dependent concern types (with 2-4 hours of human labor per domain) and an entirely automatic procedure for extraction of domain-independent moral dimensions and endorsement values. Prudent (automatic) selection of terms from propositional structures for lexical expansion (via semantic similarity) produces new moral dimension lexicons at three levels of granularity beyond a strong baseline lexicon. We develop a ground truth (GT) based on expert annotators and compare our concern detection output to GT, to yield 231% improvement in recall over baseline, with only a 10% loss in precision. F1 yields 66% improvement over baseline and 97.8% of human performance. Our lexically based approach yields large savings over approaches that employ costly human labor and model building. We provide to the community a newly expanded moral dimension/value lexicon, annotation guidelines, and GT.</abstract>
@@ -3604,7 +3604,7 @@
       <author><first>Scott</first><last>Novotney</last></author>
       <author><first>Sreeparna</first><last>Mukherjee</last></author>
       <author><first>Zeeshan</first><last>Ahmed</last></author>
-      <author><first>Andreas</first><last>Stolcke</last></author>
+      <author id="andreas-stolcke"><first>Andreas</first><last>Stolcke</last></author>
       <pages>3368-3379</pages>
       <abstract>We propose a framework to modularize the training of neural language models that use diverse forms of context by eliminating the need to jointly train context and within-sentence encoders. Our approach, contextual universal embeddings (CUE), trains LMs on one type of contextual data and adapts to novel context types. The model consists of a pretrained neural sentence LM, a BERT-based contextual encoder, and a masked transfomer decoder that estimates LM probabilities using sentence-internal and contextual evidence. When contextually annotated data is unavailable, our model learns to combine contextual and sentence-internal information using noisy oracle unigram embeddings as a proxy. Real context data can be introduced later and used to adapt a small number of parameters that map contextual data into the decoder’s embedding space. We validate the CUE framework on a NYTimes text corpus with multiple metadata types, for which the LM perplexity can be lowered from 36.6 to 27.4 by conditioning on context. Bootstrapping a contextual LM with only a subset of the metadata during training retains 85% of the achievable gain. Training the model initially with proxy context retains 67% of the perplexity gain after adapting to real context. Furthermore, we can swap one type of pretrained sentence LM for another without retraining the context encoders, by only adapting the decoder model. Overall, we obtain a modular framework that allows incremental, scalable training of context-enhanced LMs.</abstract>
       <url hash="23c93066">2022.findings-acl.265</url>
@@ -3735,8 +3735,8 @@
     <paper id="275">
       <title>What does it take to bake a cake? The <fixed-case>R</fixed-case>ecipe<fixed-case>R</fixed-case>ef corpus and anaphora resolution in procedural text</title>
       <author><first>Biaoyan</first><last>Fang</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
-      <author><first>Karin</first><last>Verspoor</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last></author>
       <pages>3481-3495</pages>
       <abstract>Procedural text contains rich anaphoric phenomena, yet has not received much attention in NLP. To fill this gap, we investigate the textual properties of two types of procedural text, recipes and chemical patents, and generalize an anaphora annotation framework developed for the chemical domain for modeling anaphoric phenomena in recipes. We apply this framework to annotate the RecipeRef corpus with both bridging and coreference relations. Through comparison to chemical patents, we show the complexity of anaphora resolution in recipes. We demonstrate empirically that transfer learning from the chemical domain improves resolution of anaphora in recipes, suggesting transferability of general procedural knowledge.</abstract>
       <url hash="74bab014">2022.findings-acl.275</url>
@@ -3802,7 +3802,7 @@
       <author><first>Hao</first><last>Zhou</last></author>
       <author><first>Chengqi</first><last>Zhao</last></author>
       <author><first>Shujian</first><last>Huang</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
       <author><first>Lei</first><last>Li</last></author>
       <pages>3537-3548</pages>
       <abstract>This paper does not aim at introducing a novel model for document-level neural machine translation. Instead, we head back to the original Transformer model and hope to answer the following question: Is the capacity of current models strong enough for document-level translation? Interestingly, we observe that the original Transformer with appropriate training techniques can achieve strong results for document translation, even with a length of 2000 words. We evaluate this model and several recent approaches on nine document-level datasets and two sentence-level datasets across six languages. Experiments show that document-level Transformer models outperforms sentence-level ones and many previous methods in a comprehensive set of metrics, including BLEU, four lexical indices, three newly proposed assistant linguistic indicators, and human evaluation.</abstract>
@@ -3879,7 +3879,7 @@
     <paper id="285">
       <title>Incorporating Dynamic Semantics into Pre-Trained Language Model for Aspect-based Sentiment Analysis</title>
       <author><first>Kai</first><last>Zhang</last></author>
-      <author id="kun-zhang"><first>Kun</first><last>Zhang</last></author>
+      <author><first>Kun</first><last>Zhang</last></author>
       <author><first>Mengdi</first><last>Zhang</last></author>
       <author><first>Hongke</first><last>Zhao</last></author>
       <author><first>Qi</first><last>Liu</last></author>
@@ -3921,7 +3921,7 @@
       <title>Modular Domain Adaptation</title>
       <author><first>Junshen</first><last>Chen</last></author>
       <author><first>Dallas</first><last>Card</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <pages>3633-3655</pages>
       <abstract>Off-the-shelf models are widely used by computational social science researchers to measure properties of text, such as sentiment. However, without access to source data it is difficult to account for domain shift, which represents a threat to validity. Here, we treat domain adaptation as a modular process that involves separate model producers and model consumers, and show how they can independently cooperate to facilitate more accurate measurements of text. We introduce two lightweight techniques for this scenario, and demonstrate that they reliably increase out-of-domain accuracy on four multi-domain text classification datasets when used with linear and contextual embedding models. We conclude with recommendations for model producers and consumers, and release models and replication code to accompany this paper.</abstract>
       <url hash="5a6e17ee">2022.findings-acl.288</url>
@@ -3957,7 +3957,7 @@
       <title>Addressing Resource and Privacy Constraints in Semantic Parsing Through Data Augmentation</title>
       <author><first>Kevin</first><last>Yang</last></author>
       <author><first>Olivia</first><last>Deng</last></author>
-      <author><first>Charles</first><last>Chen</last></author>
+      <author id="charles-chen-jr"><first>Charles</first><last>Chen</last></author>
       <author><first>Richard</first><last>Shin</last></author>
       <author><first>Subhro</first><last>Roy</last></author>
       <author><first>Benjamin</first><last>Van Durme</last></author>
@@ -3970,9 +3970,9 @@
     </paper>
     <paper id="292">
       <title>Improving Candidate Retrieval with Entity Profile Generation for <fixed-case>W</fixed-case>ikidata Entity Linking</title>
-      <author><first>Tuan</first><last>Lai</last></author>
+      <author id="tuan-lai"><first>Tuan</first><last>Lai</last></author>
       <author><first>Heng</first><last>Ji</last></author>
-      <author><first>ChengXiang</first><last>Zhai</last></author>
+      <author id="chengxiang-zhai"><first>ChengXiang</first><last>Zhai</last></author>
       <pages>3696-3711</pages>
       <abstract>Entity linking (EL) is the task of linking entity mentions in a document to referent entities in a knowledge base (KB). Many previous studies focus on Wikipedia-derived KBs. There is little work on EL over Wikidata, even though it is the most extensive crowdsourced KB. The scale of Wikidata can open up many new real-world applications, but its massive number of entities also makes EL challenging. To effectively narrow down the search space, we propose a novel candidate retrieval paradigm based on entity profiling. Wikidata entities and their textual fields are first indexed into a text search engine (e.g., Elasticsearch). During inference, given a mention and its context, we use a sequence-to-sequence (seq2seq) model to generate the profile of the target entity, which consists of its title and description. We use the profile to query the indexed search engine to retrieve candidate entities. Our approach complements the traditional approach of using a Wikipedia anchor-text dictionary, enabling us to further design a highly effective hybrid method for candidate retrieval. Combined with a simple cross-attention reranker, our complete EL framework achieves state-of-the-art results on three Wikidata-based datasets and strong performance on TACKBP-2010.</abstract>
       <url hash="4b80b6e3">2022.findings-acl.292</url>
@@ -4016,7 +4016,7 @@
       <author><first>Dara</first><last>Bahri</last></author>
       <author><first>Ji</first><last>Ma</last></author>
       <author><first>Jai</first><last>Gupta</last></author>
-      <author><first>Cicero</first><last>Nogueira dos Santos</last></author>
+      <author id="cicero-dos-santos"><first>Cicero</first><last>Nogueira dos Santos</last></author>
       <author><first>Yi</first><last>Tay</last></author>
       <author><first>Donald</first><last>Metzler</last></author>
       <pages>3747-3758</pages>
@@ -4062,7 +4062,7 @@
     <paper id="299">
       <title>Probing Multilingual Cognate Prediction Models</title>
       <author><first>Clémentine</first><last>Fourrier</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <pages>3786-3801</pages>
       <abstract>Character-based neural machine translation models have become the reference models for cognate prediction, a historical linguistics task. So far, all linguistic interpretations about latent information captured by such models have been based on external analysis (accuracy, raw results, errors). In this paper, we investigate what probing can tell us about both models and previous interpretations, and learn that though our models store linguistic and diachronic information, they do not achieve it in previously assumed ways.</abstract>
       <url hash="c1060f20">2022.findings-acl.299</url>
@@ -4271,7 +4271,7 @@
     <paper id="314">
       <title>Probing <fixed-case>BERT</fixed-case>’s priors with serial reproduction chains</title>
       <author><first>Takateru</first><last>Yamakoshi</last></author>
-      <author><first>Thomas</first><last>Griffiths</last></author>
+      <author id="thomas-l-griffiths"><first>Thomas</first><last>Griffiths</last></author>
       <author><first>Robert</first><last>Hawkins</last></author>
       <pages>3977-3992</pages>
       <abstract>Sampling is a promising bottom-up method for exposing what generative models have learned about language, but it remains unclear how to generate representative samples from popular masked language models (MLMs) like BERT. The MLM objective yields a dependency network with no guarantee of consistent conditional distributions, posing a problem for naive approaches. Drawing from theories of iterated learning in cognitive science, we explore the use of <i>serial reproduction chains</i> to sample from BERT’s priors. In particular, we observe that a unique and consistent estimator of the ground-truth joint distribution is given by a Generative Stochastic Network (GSN) sampler, which randomly selects which token to mask and reconstruct on each step. We show that the lexical and syntactic statistics of sentences from GSN chains closely match the ground-truth corpus distribution and perform better than other methods in a large corpus of naturalness judgments. Our findings establish a firmer theoretical foundation for bottom-up probing and highlight richer deviations from human priors.</abstract>
@@ -4298,7 +4298,7 @@
       <author><first>Ashwin</first><last>Srinivasan</last></author>
       <author><first>Ankita</first><last>Sharma</last></author>
       <author><first>Damien</first><last>Jose</last></author>
-      <author><first>Paul</first><last>Bennett</last></author>
+      <author id="paul-bennett"><first>Paul</first><last>Bennett</last></author>
       <pages>4008-4020</pages>
       <abstract>Dense retrieval (DR) methods conduct text retrieval by first encoding texts in the embedding space and then matching them by nearest neighbor search. This requires strong locality properties from the representation space, e.g., close allocations of each small group of relevant texts, which are hard to generalize to domains without sufficient training data. In this paper, we aim to improve the generalization ability of DR models from source training domains with rich supervision signals to target domains without any relevance label, in the zero-shot setting. To achieve that, we propose Momentum adversarial Domain Invariant Representation learning (MoDIR), which introduces a momentum method to train a domain classifier that distinguishes source versus target domains, and then adversarially updates the DR encoder to learn domain invariant representations. Our experiments show that MoDIR robustly outperforms its baselines on 10+ ranking datasets collected in the BEIR benchmark in the zero-shot setup, with more than 10% relative gains on datasets with enough sensitivity for DR models’ evaluation. Source code is available at <url>https://github.com/ji-xin/modir</url>.</abstract>
       <url hash="51ee267a">2022.findings-acl.316</url>
@@ -4479,9 +4479,9 @@
       <title>What is wrong with you?: Leveraging User Sentiment for Automatic Dialog Evaluation</title>
       <author><first>Sarik</first><last>Ghazarian</last></author>
       <author><first>Behnam</first><last>Hedayatnia</last></author>
-      <author><first>Alexandros</first><last>Papangelis</last></author>
+      <author id="alexandros-papangelis"><first>Alexandros</first><last>Papangelis</last></author>
       <author id="yang-liu-icsi"><first>Yang</first><last>Liu</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></author>
       <pages>4194-4204</pages>
       <abstract>Accurate automatic evaluation metrics for open-domain dialogs are in high demand. Existing model-based metrics for system response evaluation are trained on human annotated data, which is cumbersome to collect. In this work, we propose to use information that can be automatically extracted from the next user utterance, such as its sentiment or whether the user explicitly ends the conversation, as a proxy to measure the quality of the previous system response. This allows us to train on a massive set of dialogs with weak supervision, without requiring manual system turn quality annotations. Experiments show that our model is comparable to models trained on human annotated data. Furthermore, our model generalizes across both spoken and written open-domain dialog corpora collected from real and paid users.</abstract>
       <url hash="766b74d5">2022.findings-acl.331</url>
@@ -4494,8 +4494,8 @@
     <meta>
       <booktitle>Findings of the Association for Computational Linguistics: NAACL 2022</booktitle>
       <editor><first>Marine</first><last>Carpuat</last></editor>
-      <editor><first>Marie-Catherine</first><last>de Marneffe</last></editor>
-      <editor><first>Ivan Vladimir</first><last>Meza Ruiz</last></editor>
+      <editor id="marie-catherine-de-marneffe"><first>Marie-Catherine</first><last>de Marneffe</last></editor>
+      <editor id="ivan-meza-ruiz"><first>Ivan Vladimir</first><last>Meza Ruiz</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Seattle, United States</address>
       <month>July</month>
@@ -4579,7 +4579,7 @@
       <title><fixed-case>M</fixed-case>ulti<fixed-case>V</fixed-case>er<fixed-case>S</fixed-case>: Improving scientific claim verification with weak supervision and full-document context</title>
       <author><first>David</first><last>Wadden</last></author>
       <author><first>Kyle</first><last>Lo</last></author>
-      <author><first>Lucy Lu</first><last>Wang</last></author>
+      <author id="lucy-lu-wang"><first>Lucy Lu</first><last>Wang</last></author>
       <author><first>Arman</first><last>Cohan</last></author>
       <author><first>Iz</first><last>Beltagy</last></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last></author>
@@ -4668,7 +4668,7 @@
     </paper>
     <paper id="13">
       <title><fixed-case>F</fixed-case>ed<fixed-case>NLP</fixed-case>: Benchmarking Federated Learning Methods for Natural Language Processing Tasks</title>
-      <author><first>Bill Yuchen</first><last>Lin</last></author>
+      <author id="bill-yuchen-lin"><first>Bill Yuchen</first><last>Lin</last></author>
       <author><first>Chaoyang</first><last>He</last></author>
       <author><first>Zihang</first><last>Ze</last></author>
       <author><first>Hulin</first><last>Wang</last></author>
@@ -4701,7 +4701,7 @@
     <paper id="15">
       <title>Lacuna Reconstruction: Self-Supervised Pre-Training for Low-Resource Historical Document Transcription</title>
       <author><first>Nikolai</first><last>Vogler</last></author>
-      <author><first>Jonathan</first><last>Allen</last></author>
+      <author id="jonathan-allen"><first>Jonathan</first><last>Allen</last></author>
       <author><first>Matthew</first><last>Miller</last></author>
       <author><first>Taylor</first><last>Berg-Kirkpatrick</last></author>
       <pages>206-216</pages>
@@ -4755,7 +4755,7 @@
       <author><first>Kasturi</first><last>Bhattacharjee</last></author>
       <author><first>Rashmi</first><last>Gangadharaiah</last></author>
       <author><first>Dan</first><last>Roth</last></author>
-      <author><first>Carolyn</first><last>Rose</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rose</last></author>
       <pages>253-268</pages>
       <abstract>Previous studies on question answering over knowledge graphs have typically operated over a single knowledge graph (KG). This KG is assumed to be known a priori and is lever- aged similarly for all users’ queries during inference. However, such an assumption is not applicable to real-world settings, such as health- care, where one needs to handle queries of new users over unseen KGs during inference. Furthermore, privacy concerns and high computational costs render it infeasible to query the single KG that has information about all users while answering a specific user’s query. The above concerns motivate our question answer- ing setting over personalized knowledge graphs (PERKGQA) where each user has restricted access to their KG. We observe that current state-of-the-art KGQA methods that require learning prior node representations fare poorly. We propose two complementary approaches, PATHCBR and PATHRGCN for PERKGQA. The former is a simple non-parametric technique that employs case-based reasoning, while the latter is a parametric approach using graph neural networks. Our proposed methods circumvent learning prior representations, can generalize to unseen KGs, and outperform strong baselines on an academic and an internal dataset by 6.5% and 10.5%.</abstract>
       <url hash="4f3df8bd">2022.findings-naacl.19</url>
@@ -4809,7 +4809,7 @@
     <paper id="23">
       <title>Exploring the Value of Multi-View Learning for Session-Aware Query Representation</title>
       <author><first>Diego</first><last>Ortiz</last></author>
-      <author><first>Jose</first><last>Moreno</last></author>
+      <author id="jose-g-moreno"><first>Jose</first><last>Moreno</last></author>
       <author><first>Gilles</first><last>Hubert</last></author>
       <author><first>Karen</first><last>Pinel-Sauvagnat</last></author>
       <author><first>Lynda</first><last>Tamine</last></author>
@@ -4911,7 +4911,7 @@
       <author><first>Ana</first><last>Marasovic</last></author>
       <author><first>Iz</first><last>Beltagy</last></author>
       <author><first>Doug</first><last>Downey</last></author>
-      <author><first>Matthew</first><last>Peters</last></author>
+      <author id="matthew-e-peters"><first>Matthew</first><last>Peters</last></author>
       <pages>410-424</pages>
       <abstract>Self-rationalization models that predict task labels and generate free-text elaborations for their predictions could enable more intuitive interaction with NLP systems. These models are, however, currently trained with a large amount of human-written free-text explanations for each task which hinders their broader usage. We propose to study a more realistic setting of self-rationalization using few training examples. We present FEB—a standardized collection of four existing English-language datasets and associated metrics. We identify the right prompting approach by extensively exploring natural language prompts on FEB. Then, by using this prompt and scaling the model size, we demonstrate that making progress on few-shot self-rationalization is possible. We show there is still ample room for improvement in this task: the average plausibility of generated explanations assessed by human annotators is at most 51% (with GPT-3), while plausibility of human explanations is 76%. We hope that FEB and our proposed approach will spur the community to take on the few-shot self-rationalization challenge.</abstract>
       <url hash="536d2a81">2022.findings-naacl.31</url>
@@ -4937,7 +4937,7 @@
       <author><first>Aakanksha</first><last>Naik</last></author>
       <author><first>Sravanthi</first><last>Parasa</last></author>
       <author><first>Sergey</first><last>Feldman</last></author>
-      <author><first>Lucy Lu</first><last>Wang</last></author>
+      <author id="lucy-lu-wang"><first>Lucy Lu</first><last>Wang</last></author>
       <author><first>Tom</first><last>Hope</last></author>
       <pages>438-453</pages>
       <abstract>We present BEEP (Biomedical Evidence-Enhanced Predictions), a novel approach for clinical outcome prediction that retrieves patient-specific medical literature and incorporates it into predictive models. Based on each individual patient’s clinical notes, we train language models (LMs) to find relevant papers and fuse them with information from notes to predict outcomes such as in-hospital mortality. We develop methods to retrieve literature based on noisy, information-dense patient notes, and to augment existing outcome prediction models with retrieved papers in a manner that maximizes predictive accuracy. Our approach boosts predictive performance on three important clinical tasks in comparison to strong recent LM baselines, increasing F1 by up to 5 points and precision@Top-K by a large margin of over 25%.</abstract>
@@ -4950,8 +4950,8 @@
       <title>Improving Few-Shot Relation Classification by Prototypical Representation Learning with Definition Text</title>
       <author><first>Li</first><last>Zhenzhen</last></author>
       <author><first>Yuyang</first><last>Zhang</last></author>
-      <author><first>Jian-Yun</first><last>Nie</last></author>
-      <author id="dongsheng-li"><first>Dongsheng</first><last>Li</last></author>
+      <author id="jian-yun-nie"><first>Jian-Yun</first><last>Nie</last></author>
+      <author><first>Dongsheng</first><last>Li</last></author>
       <pages>454-464</pages>
       <abstract>Few-shot relation classification is difficult because the few instances available may not represent well the relation patterns. Some existing approaches explored extra information such as relation definition, in addition to the instances, to learn a better relation representation. However, the encoding of the extra information has been performed independently from the labeled instances. In this paper, we propose to learn a prototype encoder from relation definition in a way that is useful for relation instance classification. To this end, we use a joint training approach to train both a prototype encoder from definition and an instance encoder. Extensive experiments on several datasets demonstrate the effectiveness and usefulness of our prototype encoder from definition text, enabling us to outperform state-of-the-art approaches.</abstract>
       <url hash="fd8ed972">2022.findings-naacl.34</url>
@@ -4982,10 +4982,10 @@
     <paper id="36">
       <title>Multimodal Intent Discovery from Livestream Videos</title>
       <author><first>Adyasha</first><last>Maharana</last></author>
-      <author><first>Quan</first><last>Tran</last></author>
+      <author id="quan-hung-tran"><first>Quan</first><last>Tran</last></author>
       <author><first>Franck</first><last>Dernoncourt</last></author>
       <author><first>Seunghyun</first><last>Yoon</last></author>
-      <author><first>Trung</first><last>Bui</last></author>
+      <author id="trung-bui"><first>Trung</first><last>Bui</last></author>
       <author><first>Walter</first><last>Chang</last></author>
       <author><first>Mohit</first><last>Bansal</last></author>
       <pages>476-489</pages>
@@ -5034,7 +5034,7 @@
       <author><first>Seunghyun</first><last>Yoon</last></author>
       <author><first>Ajinkya</first><last>Kale</last></author>
       <author><first>Franck</first><last>Dernoncourt</last></author>
-      <author><first>Trung</first><last>Bui</last></author>
+      <author id="trung-bui"><first>Trung</first><last>Bui</last></author>
       <author><first>Mohit</first><last>Bansal</last></author>
       <pages>517-527</pages>
       <abstract>Modern image captioning models are usually trained with text similarity objectives. However, since reference captions in public datasets often describe the most salient common objects, models trained with the text similarity objectives tend to ignore specific and detailed aspects of an image that distinguish it from others. Towards more descriptive and distinctive caption generation, we propose to use CLIP, a multimodal encoder trained on huge image-text pairs from the web, to calculate multi-modal similarity and use it as a reward function. We also propose a simple finetuning strategy of CLIP text encoder to improve grammar that does not require extra text annotation. This completely eliminates the need for reference captions during the reward computation. To comprehensively evaluate descriptive captions, we introduce FineCapEval, a new dataset for caption evaluation with fine-grained criteria: overall, background, object, relations. In our experiments on text-to-image retrieval and FineCapEval, the proposed CLIP-guided model generates more distinctive captions than the CIDEroptimized model. We also show that our unsupervised grammar finetuning of the CLIP text encoder alleviates the degeneration problem of the naive CLIP reward. Lastly, we show human analysis where the annotators strongly prefer CLIP reward to CIDEr and MLE objectives on diverse criteria.</abstract>
@@ -5060,8 +5060,8 @@
       <title>Modeling Ideological Salience and Framing in Polarized Online Groups with Graph Neural Networks and Structured Sparsity</title>
       <author><first>Valentin</first><last>Hofmann</last></author>
       <author><first>Xiaowen</first><last>Dong</last></author>
-      <author><first>Janet</first><last>Pierrehumbert</last></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="janet-pierrehumbert"><first>Janet</first><last>Pierrehumbert</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <pages>536-550</pages>
       <abstract>The increasing polarization of online political discourse calls for computational tools that automatically detect and monitor ideological divides in social media. We introduce a minimally supervised method that leverages the network structure of online discussion forums, specifically Reddit, to detect polarized concepts. We model polarization along the dimensions of salience and framing, drawing upon insights from moral psychology. Our architecture combines graph neural networks with structured sparsity learning and results in representations for concepts and subreddits that capture temporal ideological dynamics such as right-wing and left-wing radicalization.</abstract>
       <url hash="b10e6a8a">2022.findings-naacl.41</url>
@@ -5077,7 +5077,7 @@
       <author><first>Muhammed</first><last>Kocyigit</last></author>
       <author><first>Seda</first><last>Akbiyik</last></author>
       <author><first>Serife Leman</first><last>Runyun</last></author>
-      <author><first>Derry</first><last>Wijaya</last></author>
+      <author id="derry-tanti-wijaya"><first>Derry</first><last>Wijaya</last></author>
       <pages>551-564</pages>
       <abstract>Large language models trained on a mixture of NLP tasks that are converted into a text-to-text format using prompts, can generalize into novel forms of language and handle novel tasks. A large body of work within prompt engineering attempts to understand the effects of input forms and prompts in achieving superior performance. We consider an alternative measure and inquire whether the way in which an input is encoded affects social biases promoted in outputs. In this paper, we study T0, a large-scale multi-task text-to-text language model trained using prompt-based learning. We consider two different forms of semantically equivalent inputs: question-answer format and premise-hypothesis format. We use an existing bias benchmark for the former BBQ and create the first bias benchmark in natural language inference BBNLI with hand-written hypotheses while also converting each benchmark into the other form. The results on two benchmarks suggest that given two different formulations of essentially the same input, T0 conspicuously acts more biased in question answering form, which is seen during training, compared to premise-hypothesis form which is unlike its training examples. Code and data are released under <url>https://github.com/feyzaakyurek/bbnli</url>.</abstract>
       <url hash="1ac22f65">2022.findings-naacl.42</url>
@@ -5089,7 +5089,7 @@
       <title>Anti-Overestimation Dialogue Policy Learning for Task-Completion Dialogue System</title>
       <author><first>Chang</first><last>Tian</last></author>
       <author><first>Wenpeng</first><last>Yin</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>565-577</pages>
       <abstract>A dialogue policy module is an essential part of task-completion dialogue systems. Recently, increasing interest has focused on reinforcement learning (RL)-based dialogue policy. Its favorable performance and wise action decisions rely on an accurate estimation of action values. The overestimation problem is a widely known issue of RL since its estimate of the maximum action value is larger than the ground truth, which results in an unstable learning process and suboptimal policy. This problem is detrimental to RL-based dialogue policy learning. To mitigate this problem, this paper proposes a dynamic partial average estimator (DPAV) of the ground truth maximum action value. DPAV calculates the partial average between the predicted maximum action value and minimum action value, where the weights are dynamically adaptive and problem-dependent. We incorporate DPAV into a deep Q-network as the dialogue policy and show that our method can achieve better or comparable results compared to top baselines on three dialogue datasets of different domains with a lower computational load. In addition, we also theoretically prove the convergence and derive the upper and lower bounds of the bias compared with those of other methods.</abstract>
       <url hash="6e89e096">2022.findings-naacl.43</url>
@@ -5101,7 +5101,7 @@
       <title><fixed-case>P</fixed-case>enn-<fixed-case>H</fixed-case>elsinki Parsed Corpus of Early <fixed-case>M</fixed-case>odern <fixed-case>E</fixed-case>nglish: First Parsing Results and Analysis</title>
       <author><first>Seth</first><last>Kulick</last></author>
       <author><first>Neville</first><last>Ryant</last></author>
-      <author><first>Beatrice</first><last>Santorini</last></author>
+      <author id="beatrice-santorini"><first>Beatrice</first><last>Santorini</last></author>
       <pages>578-593</pages>
       <abstract>The Penn-Helsinki Parsed Corpus of Early Modern English (PPCEME), a 1.7-million-word treebank that is an important resource for research in syntactic change, has several properties that present potential challenges for NLP technologies. We describe these key features of PPCEME that make it challenging for parsing, including a larger and more varied set of function tags than in the Penn Treebank, and present results for this corpus using a modified version of the Berkeley Neural Parser and the approach to function tag recovery of Gabbard et al. (2006). While this approach to function tag recovery gives reasonable results, it is in some ways inappropriate for span-based parsers. We also present further evidence of the importance of in-domain pretraining for contextualized word representations. The resulting parser will be used to parse Early English Books Online, a 1.5 billion word corpus whose utility for the study of syntactic change will be greatly increased with the addition of accurate parse trees.</abstract>
       <url hash="3131ccc8">2022.findings-naacl.44</url>
@@ -5142,12 +5142,12 @@
     </paper>
     <paper id="47">
       <title><fixed-case>C</fixed-case>o<fixed-case>C</fixed-case>o<fixed-case>A</fixed-case>-<fixed-case>MT</fixed-case>: A Dataset and Benchmark for Contrastive Controlled <fixed-case>MT</fixed-case> with Application to Formality</title>
-      <author><first>Maria</first><last>Nadejde</last></author>
+      <author id="maria-nadejde"><first>Maria</first><last>Nadejde</last></author>
       <author><first>Anna</first><last>Currey</last></author>
       <author><first>Benjamin</first><last>Hsu</last></author>
       <author><first>Xing</first><last>Niu</last></author>
       <author><first>Marcello</first><last>Federico</last></author>
-      <author><first>Georgiana</first><last>Dinu</last></author>
+      <author id="georgiana-dinu"><first>Georgiana</first><last>Dinu</last></author>
       <pages>616-632</pages>
       <abstract>The machine translation (MT) task is typically formulated as that of returning a single translation for an input segment. However, in many cases, multiple different translations are valid and the appropriate translation may depend on the intended target audience, characteristics of the speaker, or even the relationship between speakers. Specific problems arise when dealing with honorifics, particularly translating from English into languages with formality markers. For example, the sentence “Are you sure?” can be translated in German as “Sind Sie sich sicher?” (formal register) or “Bist du dir sicher?” (informal). Using wrong or inconsistent tone may be perceived as inappropriate or jarring for users of certain cultures and demographics. This work addresses the problem of learning to control target language attributes, in this case formality, from a small amount of labeled contrastive data. We introduce an annotated dataset (CoCoA-MT) and an associated evaluation metric for training and evaluating formality-controlled MT models for six diverse target languages. We show that we can train formality-controlled models by fine-tuning on labeled contrastive data, achieving high accuracy (82% in-domain and 73% out-of-domain) while maintaining overall quality.</abstract>
       <url hash="769bc50a">2022.findings-naacl.47</url>
@@ -5173,7 +5173,7 @@
       <author><first>Jesin</first><last>James</last></author>
       <author><first>Vithya</first><last>Yogarajan</last></author>
       <author><first>Isabella</first><last>Shields</last></author>
-      <author><first>Catherine</first><last>Watson</last></author>
+      <author id="catherine-i-watson"><first>Catherine</first><last>Watson</last></author>
       <author><first>Peter</first><last>Keegan</last></author>
       <author><first>Keoni</first><last>Mahelona</last></author>
       <author><first>Peter-Lucas</first><last>Jones</last></author>
@@ -5205,7 +5205,7 @@
       <author><first>Minglei</first><last>Li</last></author>
       <author><first>Xin</first><last>Jiang</last></author>
       <author><first>Qun</first><last>Liu</last></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <pages>675-692</pages>
       <abstract>Vast efforts have been devoted to creating high-performance few-shot learners, i.e., large-scale pretrained language models (PLMs) that perform well with little downstream task training data. Training PLMs has incurred significant cost, but utilizing the few-shot learners is still challenging due to their enormous size. This work focuses on a crucial question: How to make effective use of these few-shot learners? We propose LMTurk, a novel approach that treats few-shotlearners as crowdsourcing workers. The rationale is that crowdsourcing workers are in fact few-shot learners: They are shown a few illustrative examples to learn about a task and then start annotating. LMTurk employs few-shot learners built upon PLMs as workers. We show that the resulting annotations can be utilized to train models that solve the task well and are small enough to be deployable in practical scenarios. Active learning is integrated into LMTurk to reduce the amount of queries made to PLMs, minimizing the computational cost of running PLM inference passes. Altogether, LMTurk is an important step towards making effective use of current PLMs.</abstract>
       <url hash="b95005d7">2022.findings-naacl.51</url>
@@ -5256,7 +5256,7 @@
       <title><fixed-case>L</fixed-case>ong<fixed-case>T</fixed-case>5: <fixed-case>E</fixed-case>fficient Text-To-Text Transformer for Long Sequences</title>
       <author><first>Mandy</first><last>Guo</last></author>
       <author><first>Joshua</first><last>Ainslie</last></author>
-      <author><first>David</first><last>Uthus</last></author>
+      <author id="david-c-uthus"><first>David</first><last>Uthus</last></author>
       <author><first>Santiago</first><last>Ontanon</last></author>
       <author><first>Jianmo</first><last>Ni</last></author>
       <author><first>Yun-Hsuan</first><last>Sung</last></author>
@@ -5271,7 +5271,7 @@
     <paper id="56">
       <title>Challenging <fixed-case>A</fixed-case>merica: Modeling language in longer time scales</title>
       <author><first>Jakub</first><last>Pokrywka</last></author>
-      <author><first>Filip</first><last>Graliński</last></author>
+      <author id="filip-gralinski"><first>Filip</first><last>Graliński</last></author>
       <author><first>Krzysztof</first><last>Jassem</last></author>
       <author><first>Karol</first><last>Kaczmarek</last></author>
       <author><first>Krzysztof</first><last>Jurkiewicz</last></author>
@@ -5314,7 +5314,7 @@
       <author><first>Sarthak</first><last>Dash</last></author>
       <author><first>Sugato</first><last>Bagchi</last></author>
       <author><first>Nandana</first><last>Mihindukulasooriya</last></author>
-      <author><first>Alfio</first><last>Gliozzo</last></author>
+      <author id="alfio-gliozzo"><first>Alfio</first><last>Gliozzo</last></author>
       <pages>788-800</pages>
       <abstract>Representing text in tables is essential for many business intelligence tasks such as semantic retrieval, data exploration and visualization, and question answering. Existing methods that leverage pretrained Transformer encoders range from a simple construction of pseudo-sentences by concatenating text across rows or columns to complex parameter-intensive models that encode table structure and require additional pretraining. In this work, we introduce a novel encoding strategy for Transformer encoders that preserves the critical property of permutation invariance across rows or columns. Unlike existing state-of-the-art methods for Table Understanding, our proposed approach does not require any additional pretraining and still substantially outperforms existing methods in almost all instances. We demonstrate the effectiveness of our proposed approach on three table interpretation tasks: column type annotation, relation extraction, and entity linking through extensive experiments on existing tabular datasets.</abstract>
       <url hash="2aa7f298">2022.findings-naacl.59</url>
@@ -5451,7 +5451,7 @@
       <author><first>Victor</first><last>Steinborn</last></author>
       <author><first>Philipp</first><last>Dufter</last></author>
       <author><first>Haris</first><last>Jabbar</last></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <pages>921-932</pages>
       <abstract>Bias research in NLP is a rapidly growing and developing field. Similar to CrowS-Pairs (Nangia et al., 2020), we assess gender bias in masked-language models (MLMs) by studying pairs of sentences with gender swapped person references. Most bias research focuses on and often is specific to English.Using a novel methodology for creating sentence pairs that is applicable across languages, we create, based on CrowS-Pairs, a multilingual dataset for English, Finnish, German, Indonesian and Thai.Additionally, we propose <tex-math>S_{JSD}</tex-math>, a new bias measure based on Jensen–Shannon divergence, which we argue retains more information from the model output probabilities than other previously proposed bias measures for MLMs.Using multilingual MLMs, we find that <tex-math>S_{JSD}</tex-math> diagnoses the same systematic biased behavior for non-English that previous studies have found for monolingual English pre-trained MLMs. <tex-math>S_{JSD}</tex-math> outperforms the CrowS-Pairs measure, which struggles to find such biases for smaller non-English datasets.</abstract>
       <url hash="f0aabad2">2022.findings-naacl.69</url>
@@ -5467,7 +5467,7 @@
       <author><first>Chen</first><last>Liang</last></author>
       <author><first>Haoming</first><last>Jiang</last></author>
       <author><first>Siawpeng</first><last>Er</last></author>
-      <author><first>Chao</first><last>Zhang</last></author>
+      <author id="chao-zhang-tu"><first>Chao</first><last>Zhang</last></author>
       <author><first>Tuo</first><last>Zhao</last></author>
       <author><first>Hongyuan</first><last>Zha</last></author>
       <pages>933-949</pages>
@@ -5509,7 +5509,7 @@
     <paper id="73">
       <title><fixed-case>QLEVR</fixed-case>: A Diagnostic Dataset for Quantificational Language and Elementary Visual Reasoning</title>
       <author><first>Zechen</first><last>Li</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>980-996</pages>
       <abstract>Synthetic datasets have successfully been used to probe visual question-answering datasets for their reasoning abilities. CLEVR (John- son et al., 2017), for example, tests a range of visual reasoning abilities. The questions in CLEVR focus on comparisons of shapes, colors, and sizes, numerical reasoning, and existence claims. This paper introduces a minimally biased, diagnostic visual question-answering dataset, QLEVR, that goes beyond existential and numerical quantification and focus on more complex quantifiers and their combinations, e.g., asking whether there are more than two red balls that are smaller than at least three blue balls in an image. We describe how the dataset was created and present a first evaluation of state-of-the-art visual question-answering models, showing that QLEVR presents a formidable challenge to our current models. Code and Dataset are available at <url>https://github.com/zechenli03/QLEVR</url></abstract>
       <url hash="6ce0846b">2022.findings-naacl.73</url>
@@ -5573,7 +5573,7 @@
     <paper id="78">
       <title>”Diversity and Uncertainty in Moderation” are the Key to Data Selection for Multilingual Few-shot Transfer</title>
       <author><first>Shanu</first><last>Kumar</last></author>
-      <author><first>Sandipan</first><last>Dandapat</last></author>
+      <author id="sandipan-dandapat"><first>Sandipan</first><last>Dandapat</last></author>
       <author><first>Monojit</first><last>Choudhury</last></author>
       <pages>1042-1055</pages>
       <abstract>Few-shot transfer often shows substantial gain over zero-shot transfer (CITATION), which is a practically useful trade-off between fully supervised and unsupervised learning approaches for multilingual pretained model-based systems. This paper explores various strategies for selecting data for annotation that can result in a better few-shot transfer. The proposed approaches rely on multiple measures such as data entropy using <tex-math>n</tex-math>-gram language model, predictive entropy, and gradient embedding. We propose a loss embedding method for sequence labeling tasks, which induces diversity and uncertainty sampling similar to gradient embedding. The proposed data selection strategies are evaluated and compared for POS tagging, NER, and NLI tasks for up to 20 languages. Our experiments show that the gradient and loss embedding-based strategies consistently outperform random data selection baselines, with gains varying with the initial performance of the zero-shot transfer. Furthermore, the proposed method shows similar trends in improvement even when the model is fine-tuned using a lower proportion of the original task-specific labeled training data for zero-shot transfer.</abstract>
@@ -5704,9 +5704,9 @@
     <paper id="87">
       <title>Improving Code-Switching Dependency Parsing with Semi-Supervised Auxiliary Tasks</title>
       <author><first>Şaziye Betül</first><last>Özateş</last></author>
-      <author><first>Arzucan</first><last>Özgür</last></author>
+      <author id="arzucan-ozgur"><first>Arzucan</first><last>Özgür</last></author>
       <author><first>Tunga</first><last>Gungor</last></author>
-      <author><first>Özlem</first><last>Çetinoğlu</last></author>
+      <author id="ozlem-cetinoglu"><first>Özlem</first><last>Çetinoğlu</last></author>
       <pages>1159-1171</pages>
       <abstract>Code-switching dependency parsing stands as a challenging task due to both the scarcity of necessary resources and the structural difficulties embedded in code-switched languages. In this study, we introduce novel sequence labeling models to be used as auxiliary tasks for dependency parsing of code-switched text in a semi-supervised scheme. We show that using auxiliary tasks enhances the performance of an LSTM-based dependency parsing model and leads to better results compared to an XLM-R-based model with significantly less computational and time complexity. As the first study that focuses on multiple code-switching language pairs for dependency parsing, we acquire state-of-the-art scores on all of the studied languages. Our best models outperform the previous work by 7.4 LAS points on average.</abstract>
       <url hash="7a3b552e">2022.findings-naacl.87</url>
@@ -5811,7 +5811,7 @@
       <title>A Survey on Stance Detection for Mis- and Disinformation Identification</title>
       <author><first>Momchil</first><last>Hardalov</last></author>
       <author><first>Arnav</first><last>Arora</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Isabelle</first><last>Augenstein</last></author>
       <pages>1259-1277</pages>
       <abstract>Understanding attitudes expressed in texts, also known as stance detection, plays an important role in systems for detecting false information online, be it misinformation (unintentionally false) or disinformation (intentionally false information). Stance detection has been framed in different ways, including (a) as a component of fact-checking, rumour detection, and detecting previously fact-checked claims, or (b) as a task in its own right. While there have been prior efforts to contrast stance detection with other related tasks such as argumentation mining and sentiment analysis, there is no existing survey on examining the relationship between stance detection and mis- and disinformation detection. Here, we aim to bridge this gap by reviewing and analysing existing work in this area, with mis- and disinformation in focus, and discussing lessons learnt and future challenges.</abstract>
@@ -5836,7 +5836,7 @@
     <paper id="96">
       <title>To Answer or Not To Answer? Improving Machine Reading Comprehension Model with Span-based Contrastive Learning</title>
       <author><first>Yunjie</first><last>Ji</last></author>
-      <author><first>Liangyu</first><last>Chen</last></author>
+      <author id="liang-yu-chen"><first>Liangyu</first><last>Chen</last></author>
       <author><first>Chenxiao</first><last>Dou</last></author>
       <author><first>Baochang</first><last>Ma</last></author>
       <author><first>Xiangang</first><last>Li</last></author>
@@ -5851,7 +5851,7 @@
       <title>Target-Guided Dialogue Response Generation Using Commonsense and Data Augmentation</title>
       <author><first>Prakhar</first><last>Gupta</last></author>
       <author><first>Harsh</first><last>Jhamtani</last></author>
-      <author><first>Jeffrey</first><last>Bigham</last></author>
+      <author id="jeffrey-p-bigham"><first>Jeffrey</first><last>Bigham</last></author>
       <pages>1301-1317</pages>
       <abstract>Target-guided response generation enables dialogue systems to smoothly transition a conversation from a dialogue context toward a target sentence. Such control is useful for designing dialogue systems that direct a conversation toward specific goals, such as creating non-obtrusive recommendations or introducing new topics in the conversation. In this paper, we introduce a new technique for target-guided response generation, which first finds a bridging path of commonsense knowledge concepts between the source and the target, and then uses the identified bridging path to generate transition responses. Additionally, we propose techniques to re-purpose existing dialogue datasets for target-guided generation. Experiments reveal that the proposed techniques outperform various baselines on this task. Finally, we observe that the existing automated metrics for this task correlate poorly with human judgement ratings. We propose a novel evaluation metric that we demonstrate is more reliable for target-guided response evaluation. Our work generally enables dialogue system designers to exercise more control over the conversations that their systems produce.</abstract>
       <url hash="74fd28e3">2022.findings-naacl.97</url>
@@ -5863,7 +5863,7 @@
       <title><fixed-case>B</fixed-case>angla<fixed-case>BERT</fixed-case>: Language Model Pretraining and Benchmarks for Low-Resource Language Understanding Evaluation in <fixed-case>B</fixed-case>angla</title>
       <author><first>Abhik</first><last>Bhattacharjee</last></author>
       <author><first>Tahmid</first><last>Hasan</last></author>
-      <author><first>Wasi</first><last>Ahmad</last></author>
+      <author id="wasi-ahmad"><first>Wasi</first><last>Ahmad</last></author>
       <author><first>Kazi Samin</first><last>Mubasshir</last></author>
       <author><first>Md Saiful</first><last>Islam</last></author>
       <author><first>Anindya</first><last>Iqbal</last></author>
@@ -5925,7 +5925,7 @@
       <author><first>Mahdi</first><last>Namazifar</last></author>
       <author id="yang-liu-icsi"><first>Yang</first><last>Liu</last></author>
       <author><first>Di</first><last>Jin</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></author>
       <pages>1375-1388</pages>
       <abstract>The massive amount of trainable parameters in the pre-trained language models (PLMs) makes them hard to be deployed to multiple downstream tasks. To address this issue, parameter-efficient transfer learning methods have been proposed to tune only a few parameters during fine-tuning while freezing the rest. This paper looks at existing methods along this line through the <i>kernel lens</i>. Motivated by the connection between self-attention in transformer-based PLMs and kernel learning, we propose <i>kernel-wise adapters</i>, namely <i>Kernel-mix</i>, that utilize the kernel structure in self-attention to guide the assignment of the tunable parameters. These adapters use guidelines found in classical kernel learning and enable separate parameter tuning for each attention head. Our empirical results, over a diverse set of natural language generation and understanding tasks, show that our proposed adapters can attain or improve the strong performance of existing baselines.</abstract>
       <url hash="178b23ca">2022.findings-naacl.102</url>
@@ -5939,7 +5939,7 @@
       <author><first>Nithin</first><last>Anchuri</last></author>
       <author><first>Mehdi</first><last>Rezagholizadeh</last></author>
       <author><first>Abbas</first><last>Ghaddar</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <author><first>Pascal</first><last>Poupart</last></author>
       <pages>1389-1400</pages>
       <abstract>Intermediate layer knowledge distillation (KD) can improve the standard KD technique (which only targets the output of teacher and student models) especially over large pre-trained language models. However, intermediate layer distillation suffers from excessive computational burdens and engineering efforts required for setting up a proper layer mapping. To address these problems, we propose a RAndom Intermediate Layer Knowledge Distillation (RAIL-KD) approach in which, intermediate layers from the teacher model are selected randomly to be distilled into the intermediate layers of the student model. This randomized selection enforces that all teacher layers are taken into account in the training process, while reducing the computational cost of intermediate layer distillation. Also, we show that it acts as a regularizer for improving the generalizability of the student model. We perform extensive experiments on GLUE tasks as well as on out-of-domain test sets. We show that our proposed RAIL-KD approach outperforms other state-of-the-art intermediate layer KD methods considerably in both performance and training-time.</abstract>
@@ -6020,7 +6020,7 @@
     <paper id="109">
       <title>Exploring Neural Models for Query-Focused Summarization</title>
       <author><first>Jesse</first><last>Vig</last></author>
-      <author><first>Alexander</first><last>Fabbri</last></author>
+      <author id="alexander-richard-fabbri"><first>Alexander</first><last>Fabbri</last></author>
       <author><first>Wojciech</first><last>Kryscinski</last></author>
       <author><first>Chien-Sheng</first><last>Wu</last></author>
       <author><first>Wenhao</first><last>Liu</last></author>
@@ -6034,8 +6034,8 @@
     <paper id="110">
       <title><fixed-case>B</fixed-case>itext<fixed-case>E</fixed-case>dit: Automatic Bitext Editing for Improved Low-Resource Machine Translation</title>
       <author><first>Eleftheria</first><last>Briakou</last></author>
-      <author><first>Sida</first><last>Wang</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="sida-i-wang"><first>Sida</first><last>Wang</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <author><first>Marjan</first><last>Ghazvininejad</last></author>
       <pages>1469-1485</pages>
       <abstract>Mined bitexts can contain imperfect translations that yield unreliable training signals for Neural Machine Translation (NMT). While filtering such pairs out is known to improve final model quality, we argue that it is suboptimal in low-resource conditions where even mined data can be limited. In our work, we propose instead, to refine the mined bitexts via automatic editing: given a sentence in a language <tex-math>x_f</tex-math>, and a possibly imperfect translation of it <tex-math>\mathbf{x_e}</tex-math>, our model generates a revised version <tex-math>x_f'</tex-math> or <tex-math>x_e'</tex-math> that yields a more equivalent translation pair (i.e., &lt;<tex-math>x_f, x_e'</tex-math>&gt; or &lt;<tex-math>x_f', x_e</tex-math>&gt;). We use a simple editing strategy by (1) mining potentially imperfect translations for each sentence in a given bitext, (2) learning a model to reconstruct the original translations and translate, in a multi-task fashion. Experiments demonstrate that our approach successfully improves the quality of CCMatrix mined bitext for 5 low-resource language-pairs and 10 translation directions by up to 8 BLEU points, in most cases improving upon a competitive translation-based baseline.</abstract>
@@ -6111,7 +6111,7 @@
       <author><first>Vladimir</first><last>Karpukhin</last></author>
       <author><first>Stan</first><last>Peshterliev</last></author>
       <author><first>Dmytro</first><last>Okhonko</last></author>
-      <author><first>Michael</first><last>Schlichtkrull</last></author>
+      <author id="michael-schlichtkrull"><first>Michael</first><last>Schlichtkrull</last></author>
       <author><first>Sonal</first><last>Gupta</last></author>
       <author><first>Yashar</first><last>Mehdad</last></author>
       <author><first>Scott</first><last>Yih</last></author>
@@ -6140,7 +6140,7 @@
       <author><first>Piotr</first><last>Nawrot</last></author>
       <author><first>Szymon</first><last>Tworkowski</last></author>
       <author><first>Michał</first><last>Tyrolski</last></author>
-      <author><first>Lukasz</first><last>Kaiser</last></author>
+      <author id="lukasz-kaiser"><first>Lukasz</first><last>Kaiser</last></author>
       <author><first>Yuhuai</first><last>Wu</last></author>
       <author><first>Christian</first><last>Szegedy</last></author>
       <author><first>Henryk</first><last>Michalewski</last></author>
@@ -6156,7 +6156,7 @@
       <title><fixed-case>DISARM</fixed-case>: Detecting the Victims Targeted by Harmful Memes</title>
       <author><first>Shivam</first><last>Sharma</last></author>
       <author><first>Md Shad</first><last>Akhtar</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Tanmoy</first><last>Chakraborty</last></author>
       <pages>1572-1588</pages>
       <abstract>Internet memes have emerged as an increasingly popular means of communication on the web. Although memes are typically intended to elicit humour, they have been increasingly used to spread hatred, trolling, and cyberbullying, as well as to target specific individuals, communities, or society on political, socio-cultural, and psychological grounds. While previous work has focused on detecting harmful, hateful, and offensive memes in general, identifying whom these memes attack (i.e., the ‘victims’) remains a challenging and underexplored area. We attempt to address this problem in this paper. To this end, we create a dataset in which we annotate each meme with its victim(s) such as the name of the targeted person(s), organization(s), and community(ies). We then propose DISARM (Detecting vIctimS targeted by hARmful Memes), a framework that uses named-entity recognition and person identification to detect all entities a meme is referring to, and then, incorporates a novel contextualized multimodal deep neural network to classify whether the meme intends to harm these entities. We perform several systematic experiments on three different test sets, corresponding to entities that are (i) all seen while training, (ii) not seen as a harmful target while training, and (iii) not seen at all while training. The evaluation shows that DISARM significantly outperforms 10 unimodal and multimodal systems. Finally, we demonstrate that DISARM is interpretable and comparatively more generalizable and that it can reduce the relative error rate of harmful target identification by up to 9 % absolute over multimodal baseline systems.</abstract>
@@ -6216,7 +6216,7 @@
       <author><first>Shaden</first><last>Shaar</last></author>
       <author><first>Firoj</first><last>Alam</last></author>
       <author><first>Giovanni</first><last>Da San Martino</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>1619-1631</pages>
       <abstract>Recent years have seen the proliferation of disinformation and fake news online. Traditional approaches to mitigate these issues is to use manual or automatic fact-checking. Recently, another approach has emerged: checking whether the input claim has previously been fact-checked, which can be done automatically, and thus fast, while also offering credibility and explainability, thanks to the human fact-checking and explanations in the associated fact-checking article. Here, we focus on claims made in a political debate and we study the impact of modeling the context of the claim: both on the source side, i.e., in the debate, as well as on the target side, i.e., in the fact-checking explanation document. We do this by modeling the local context, the global context, as well as by means of co-reference resolution, and multi-hop reasoning over the sentences of the document describing the fact-checked claim. The experimental results show that each of these represents a valuable information source, but that modeling the source-side context is most important, and can yield 10+ points of absolute improvement over a state-of-the-art model.</abstract>
       <url hash="bb855f41">2022.findings-naacl.122</url>
@@ -6258,7 +6258,7 @@
       <author><first>Qi</first><last>Jia</last></author>
       <author><first>Yizhu</first><last>Liu</last></author>
       <author><first>Haifeng</first><last>Tang</last></author>
-      <author><first>Kenny</first><last>Zhu</last></author>
+      <author id="kenny-zhu"><first>Kenny</first><last>Zhu</last></author>
       <pages>1660-1669</pages>
       <abstract>Previous dialogue summarization techniques adapt large language models pretrained on the narrative text by injecting dialogue-specific features into the models. These features either require additional knowledge to recognize or make the resulting models harder to tune. To bridge the format gap between dialogues and narrative summaries in dialogue summarization tasks, we propose to post-train pretrained language models (PLMs) to rephrase from dialogue to narratives. After that, the model is fine-tuned for dialogue summarization as usual. Comprehensive experiments show that our approach significantly improves vanilla PLMs on dialogue summarization and outperforms other SOTA models by the summary quality and implementation costs.</abstract>
       <url hash="6cf9704e">2022.findings-naacl.125</url>
@@ -6289,9 +6289,9 @@
       <author><first>Saswati</first><last>Dana</last></author>
       <author><first>Dinesh</first><last>Garg</last></author>
       <author><first>Pavan</first><last>Kapanipathi</last></author>
-      <author><first>Salim</first><last>Roukos</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
       <author><first>Alexander</first><last>Gray</last></author>
-      <author><first>L Venkata</first><last>Subramaniam</last></author>
+      <author id="l-venkata-subramaniam"><first>L Venkata</first><last>Subramaniam</last></author>
       <pages>1681-1697</pages>
       <abstract>Entity Linking (EL) maps an entity mention in a natural language sentence to an entity in a knowledge base (KB). The Zero-shot Entity Linking (ZEL) extends the scope of EL to unseen entities at the test time without requiring new labeled data. BLINK (BERT-based) is one of the SOTA models for ZEL. Interestingly, we discovered that BLINK exhibits diminishing returns, i.e., it reaches 98% of its performance with just 1% of the training data and the remaining 99% of the data yields only a marginal increase of 2% in the performance. While this extra 2% gain makes a huge difference for downstream tasks, training BLINK on large amounts of data is very resource-intensive and impractical. In this paper, we propose a neuro-symbolic, multi-task learning approach to bridge this gap. Our approach boosts the BLINK’s performance with much less data by exploiting an auxiliary information about entity types. Specifically, we train our model on two tasks simultaneously - entity linking (primary task) and hierarchical entity type prediction (auxiliary task). The auxiliary task exploits the hierarchical structure of entity types. Our approach achieves superior performance on ZEL task with significantly less training data. On four different benchmark datasets, we show that our approach achieves significantly higher performance than SOTA models when they are trained with just 0.01%, 0.1%, or 1% of the original training data. Our code is available at <url>https://github.com/IBM/NeSLET</url>.</abstract>
       <url hash="c5f87c80">2022.findings-naacl.127</url>
@@ -6346,7 +6346,7 @@
       <author><first>Jinhao</first><last>Jiang</last></author>
       <author><first>Kun</first><last>Zhou</last></author>
       <author><first>Ji-Rong</first><last>Wen</last></author>
-      <author><first>Xin</first><last>Zhao</last></author>
+      <author id="wayne-xin-zhao"><first>Xin</first><last>Zhao</last></author>
       <pages>1730-1741</pages>
       <abstract>Commonsense reasoning in natural language is a desired ability of artificial intelligent systems. For solving complex commonsense reasoning tasks, a typical solution is to enhance pre-trained language models (PTMs) with a knowledge-aware graph neural network (GNN) encoder that models a commonsense knowledge graph (CSKG).Despite the effectiveness, these approaches are built on heavy architectures, and can’t clearly explain how external knowledge resources improve the reasoning capacity of PTMs. Considering this issue, we conduct a deep empirical analysis, and find that it is indeed <i>relation features</i> from CSKGs (but not <i>node features</i>) that mainly contribute to the performance improvement of PTMs. Based on this finding, we design a simple MLP-based knowledge encoder that utilizes statistical relation paths as features. Extensive experiments conducted on five benchmarks demonstrate the effectiveness of our approach, which also largely reduces the parameters for encoding CSKGs.Our codes and data are publicly available at <url>https://github.com/RUCAIBox/SAFE</url>.</abstract>
       <url hash="932cb689">2022.findings-naacl.131</url>
@@ -6359,9 +6359,9 @@
       <author><first>Ming</first><last>Fang</last></author>
       <author><first>Shi</first><last>Zong</last></author>
       <author><first>Jing</first><last>Li</last></author>
-      <author><first>Xinyu</first><last>Dai</last></author>
+      <author id="xinyu-dai"><first>Xinyu</first><last>Dai</last></author>
       <author><first>Shujian</first><last>Huang</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
       <pages>1742-1754</pages>
       <abstract>Complaining is a speech act that expresses a negative inconsistency between reality and human’s expectations. While prior studies mostly focus on identifying the existence or the type of complaints, in this work, we present the first study in computational linguistics of measuring the intensity of complaints from text. Analyzing complaints from such perspective is particularly useful, as complaints of certain degrees may cause severe consequences for companies or organizations. We first collect 3,103 posts about complaints in education domain from Weibo, a popular Chinese social media platform. These posts are then annotated with complaints intensity scores using Best-Worst Scaling (BWS) method. We show that complaints intensity can be accurately estimated by computational models with best mean square error achieving 0.11. Furthermore, we conduct a comprehensive linguistic analysis around complaints, including the connections between complaints and sentiment, and a cross-lingual comparison for complaints expressions used by Chinese and English speakers. We finally show that our complaints intensity scores can be incorporated for better estimating the popularity of posts on social media.</abstract>
       <url hash="fceca6c0">2022.findings-naacl.132</url>
@@ -6442,7 +6442,7 @@
       <author><first>Junmo</first><last>Kang</last></author>
       <author><first>Kyung-min</first><last>Kim</last></author>
       <author><first>Giwon</first><last>Hong</last></author>
-      <author><first>Sung-Hyon</first><last>Myaeng</last></author>
+      <author id="sung-hyon-myaeng"><first>Sung-Hyon</first><last>Myaeng</last></author>
       <pages>1811-1821</pages>
       <abstract>Numerical reasoning over text is a challenging subtask in question answering (QA) that requires both the understanding of texts and numbers. However, existing language models in these numerical reasoning QA models tend to overly rely on the pre-existing parametric knowledge at inference time, which commonly causes hallucination in interpreting numbers. Our work proposes a novel attention masked reasoning model, the NC-BERT, that learns to leverage the number-related contextual knowledge to alleviate the over-reliance on parametric knowledge and enhance the numerical reasoning capabilities of the QA model. The empirical results suggest that understanding of numbers in their context by reducing the parametric knowledge influence, and refining numerical information in the number embeddings lead to improved numerical reasoning accuracy and performance in DROP, a numerical QA dataset.</abstract>
       <url hash="0665b4bb">2022.findings-naacl.138</url>
@@ -6471,7 +6471,7 @@
       <author><first>Juhee</first><last>Son</last></author>
       <author><first>JinYeong</first><last>Bak</last></author>
       <author><first>Kyunghyun</first><last>Cho</last></author>
-      <author><first>Alice</first><last>Oh</last></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last></author>
       <pages>1832-1844</pages>
       <abstract>Historical records in Korea before the 20th century were primarily written in Hanja, an extinct language based on Chinese characters and not understood by modern Korean or Chinese speakers. Historians with expertise in this time period have been analyzing the documents, but that process is very difficult and time-consuming, and language models would significantly speed up the process. Toward building and evaluating language models for Hanja, we release the Hanja Understanding Evaluation dataset consisting of chronological attribution, topic classification, named entity recognition, and summary retrieval tasks. We also present BERT-based models continued training on the two major corpora from the 14th to the 19th centuries: the Annals of the Joseon Dynasty and Diaries of the Royal Secretariats. We compare the models with several baselines on all tasks and show there are significant improvements gained by training on the two corpora. Additionally, we run zero-shot experiments on the Daily Records of the Royal Court and Important Officials (DRRI). The DRRI dataset has not been studied much by the historians, and not at all by the NLP community.</abstract>
       <url hash="511488ca">2022.findings-naacl.140</url>
@@ -6582,10 +6582,10 @@
       <author><first>Yuwei</first><last>Cao</last></author>
       <author><first>William</first><last>Groves</last></author>
       <author><first>Tanay Kumar</first><last>Saha</last></author>
-      <author><first>Joel</first><last>Tetreault</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
       <author><first>Alejandro</first><last>Jaimes</last></author>
       <author><first>Hao</first><last>Peng</last></author>
-      <author><first>Philip</first><last>Yu</last></author>
+      <author id="philip-s-yu"><first>Philip</first><last>Yu</last></author>
       <pages>1931-1942</pages>
       <abstract>Temporal Expression Extraction (TEE) is essential for understanding time in natural language. It has applications in Natural Language Processing (NLP) tasks such as question answering, information retrieval, and causal inference. To date, work in this area has mostly focused on English as there is a scarcity of labeled data for other languages. We propose XLTime, a novel framework for multilingual TEE. XLTime works on top of pre-trained language models and leverages multi-task learning to prompt cross-language knowledge transfer both from English and within the non-English languages. XLTime alleviates problems caused by a shortage of data in the target language. We apply XLTime with different language models and show that it outperforms the previous automatic SOTA methods on French, Spanish, Portuguese, and Basque, by large margins. XLTime also closes the gap considerably on the handcrafted HeidelTime method.</abstract>
       <url hash="1c95732f">2022.findings-naacl.148</url>
@@ -6714,7 +6714,7 @@
     </paper>
     <paper id="158">
       <title>Learning to Execute Actions or Ask Clarification Questions</title>
-      <author><first>Zhengxiang</first><last>Shi</last></author>
+      <author id="zhengyan-shi"><first>Zhengxiang</first><last>Shi</last></author>
       <author><first>Yue</first><last>Feng</last></author>
       <author><first>Aldo</first><last>Lipani</last></author>
       <pages>2060-2070</pages>
@@ -6730,7 +6730,7 @@
       <author><first>Bowei</first><last>Zou</last></author>
       <author><first>Mengxing</first><last>Dong</last></author>
       <author><first>Xiao</first><last>Li</last></author>
-      <author><first>AiTi</first><last>Aw</last></author>
+      <author id="aiti-aw"><first>AiTi</first><last>Aw</last></author>
       <author><first>Yu</first><last>Hong</last></author>
       <pages>2071-2078</pages>
       <abstract>Conversational Question Answering (ConvQA) is required to answer the current question, conditioned on the observable paragraph-level context and conversation history. Previous works have intensively studied history-dependent reasoning. They perceive and absorb topic-related information of prior utterances in the interactive encoding stage. It yielded significant improvement compared to history-independent reasoning. This paper further strengthens the ConvQA encoder by establishing long-distance dependency among global utterances in multi-turn conversation. We use multi-layer transformers to resolve long-distance relationships, which potentially contribute to the reweighting of attentive information in historical utterances. Experiments on QuAC show that our method obtains a substantial improvement (1%), yielding the F1 score of 73.7%. All source codes are available at <url>https://github.com/jaytsien/GHR</url>.</abstract>
@@ -6743,10 +6743,10 @@
       <title>Learning Structural Information for Syntax-Controlled Paraphrase Generation</title>
       <author><first>Erguang</first><last>Yang</last></author>
       <author><first>Chenglin</first><last>Bai</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Yujie</first><last>Zhang</last></author>
       <author><first>Yao</first><last>Meng</last></author>
-      <author><first>Jinan</first><last>Xu</last></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last></author>
       <author><first>Yufeng</first><last>Chen</last></author>
       <pages>2079-2090</pages>
       <abstract>Syntax-controlled paraphrase generation aims to produce paraphrase conform to given syntactic patterns. To address this task, recent works have started to use parse trees (or syntactic templates) to guide generation.A constituency parse tree contains abundant structural information, such as parent-child relation, sibling relation, and the alignment relation between words and nodes. Previous works have only utilized parent-child and alignment relations, which may affect the generation quality. To address this limitation, we propose a Structural Information-augmented Syntax-Controlled Paraphrasing (SI-SCP) model. Particularly, we design a syntax encoder based on tree-transformer to capture parent-child and sibling relations. To model the alignment relation between words and nodes, we propose an attention regularization objective, which makes the decoder accurately select corresponding syntax nodes to guide the generation of words. Experiments show that SI-SCP achieves state-of-the-art performances in terms of semantic and syntactic quality on two popular benchmark datasets. Additionally, we propose a Syntactic Template Retriever (STR) to retrieve compatible syntactic structures. We validate that STR is capable of retrieving compatible syntactic structures. We further demonstrate the effectiveness of SI-SCP to generate diverse paraphrases with retrieved syntactic structures.</abstract>
@@ -6873,7 +6873,7 @@
     <paper id="169">
       <title>Specializing Pre-trained Language Models for Better Relational Reasoning via Network Pruning</title>
       <author><first>Siyu</first><last>Ren</last></author>
-      <author><first>Kenny</first><last>Zhu</last></author>
+      <author id="kenny-zhu"><first>Kenny</first><last>Zhu</last></author>
       <pages>2195-2207</pages>
       <abstract>Pretrained masked language models (PLMs) were shown to be inheriting a considerable amount of relational knowledge from the source corpora. In this paper, we present an in-depth and comprehensive study concerning specializing PLMs into relational models from the perspective of network pruning. We show that it is possible to find subnetworks capable of representing grounded commonsense relations at non-trivial sparsity while being more generalizable than original PLMs in scenarios requiring knowledge of single or multiple commonsense relations.</abstract>
       <url hash="79d46bff">2022.findings-naacl.169</url>
@@ -6919,7 +6919,7 @@
       <author><first>Hyeonseok</first><last>Moon</last></author>
       <author><first>Sugyeong</first><last>Eo</last></author>
       <author><first>Seonmin</first><last>Koo</last></author>
-      <author><first>Heuiseok</first><last>Lim</last></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last></author>
       <pages>2233-2249</pages>
       <abstract>Recent natural language understanding (NLU) research on the Korean language has been vigorously maturing with the advancements of pretrained language models and datasets. However, Korean pretrained language models still struggle to generate a short sentence with a given condition based on compositionality and commonsense reasoning (i.e., generative commonsense reasoning). The two major challenges are inadequate data resources to develop generative commonsense reasoning regarding Korean linguistic features and to evaluate language models which are necessary for natural language generation (NLG). To solve these problems, we propose a text-generation dataset for Korean generative commonsense reasoning and language model evaluation. In this work, a semi-automatic dataset construction approach filters out contents inexplicable to commonsense, ascertains quality, and reduces the cost of building the dataset. We also present an in-depth analysis of the generation results of language models with various evaluation metrics along with human-annotated scores. The whole dataset is publicly available at (<url>https://aihub.or.kr/opendata/korea-university</url>).</abstract>
       <url hash="1a666ddb">2022.findings-naacl.172</url>
@@ -6959,7 +6959,7 @@
       <author><first>Zhen</first><last>Li</last></author>
       <author><first>Bing</first><last>Xu</last></author>
       <author><first>Conghui</first><last>Zhu</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <pages>2282-2294</pages>
       <abstract>Compared with unimodal data, multimodal data can provide more features to help the model analyze the sentiment of data. Previous research works rarely consider token-level feature fusion, and few works explore learning the common features related to sentiment in multimodal data to help the model fuse multimodal features. In this paper, we propose a Contrastive Learning and Multi-Layer Fusion (CLMLF) method for multimodal sentiment detection. Specifically, we first encode text and image to obtain hidden representations, and then use a multi-layer fusion module to align and fuse the token-level features of text and image. In addition to the sentiment analysis task, we also designed two contrastive learning tasks, label based contrastive learning and data based contrastive learning tasks, which will help the model learn common features related to sentiment in multimodal data. Extensive experiments conducted on three publicly available multimodal datasets demonstrate the effectiveness of our approach for multimodal sentiment detection compared with existing methods. The codes are available for use at https: //github.com/Link-Li/CLMLF</abstract>
       <url hash="4a08350c">2022.findings-naacl.175</url>
@@ -7030,7 +7030,7 @@
     <paper id="180">
       <title>Jointly Learning Guidance Induction and Faithful Summary Generation via Conditional Variational Autoencoders</title>
       <author><first>Wang</first><last>Xu</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <pages>2340-2350</pages>
       <abstract>Abstractive summarization can generate high quality results with the development of the neural network. However, generating factual consistency summaries is a challenging task for abstractive summarization. Recent studies extract the additional information with off-the-shelf tools from the source document as a clue to guide the summary generation, which shows effectiveness to improve the faithfulness. Unlike these work, we present a novel framework based on conditional variational autoencoders, which induces the guidance information and generates the summary equipment with the guidance synchronously. Experiments on XSUM and CNNDM dataset show that our approach can generate relevant and fluent summaries which is more faithful than the existing state-of-the-art approaches, according to multiple factual consistency metrics.</abstract>
       <url hash="a5ce3be4">2022.findings-naacl.180</url>
@@ -7123,9 +7123,9 @@
       <title>Textual Entailment for Event Argument Extraction: Zero- and Few-Shot with Multi-Source Learning</title>
       <author><first>Oscar</first><last>Sainz</last></author>
       <author><first>Itziar</first><last>Gonzalez-Dios</last></author>
-      <author><first>Oier</first><last>Lopez de Lacalle</last></author>
+      <author id="oier-lopez-de-lacalle"><first>Oier</first><last>Lopez de Lacalle</last></author>
       <author><first>Bonan</first><last>Min</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <pages>2439-2455</pages>
       <abstract>Recent work has shown that NLP tasks such as Relation Extraction (RE) can be recasted as a Textual Entailment tasks using verbalizations, with strong performance in zero-shot and few-shot settings thanks to pre-trained entailment models. The fact that relations in current RE datasets are easily verbalized casts doubts on whether entailment would be effective in more complex tasks. In this work we show that entailment is also effective in Event Argument Extraction (EAE), reducing the need of manual annotation to 50% and 20% in ACE and WikiEvents, respectively, while achieving the same performance as with full training. More importantly, we show that recasting EAE as entailment alleviates the dependency on schemas, which has been a roadblock for transferring annotations between domains. Thanks to entailment, the multi-source transfer between ACE and WikiEvents further reduces annotation down to 10% and 5% (respectively) of the full training without transfer. Our analysis shows that key to good results is the use of several entailment datasets to pre-train the entailment model. Similar to previous approaches, our method requires a small amount of effort for manual verbalization: only less than 15 minutes per event argument types is needed; comparable results can be achieved from users of different level of expertise.</abstract>
       <url hash="4e026391">2022.findings-naacl.187</url>
@@ -7151,7 +7151,7 @@
       <title>Latent Group Dropout for Multilingual and Multidomain Machine Translation</title>
       <author><first>Minh-Quang</first><last>Pham</last></author>
       <author><first>François</first><last>Yvon</last></author>
-      <author><first>Josep</first><last>Crego</last></author>
+      <author id="josep-m-crego"><first>Josep</first><last>Crego</last></author>
       <pages>2469-2481</pages>
       <abstract>Multidomain and multilingual machine translation often rely on parameter sharing strategies, where large portions of the network are meant to capture the commonalities of the tasks at hand, while smaller parts are reserved to model the peculiarities of a language or a domain. In adapter-based approaches, these strategies are hardcoded in the network architecture, independent of the similarities between tasks. In this work, we propose a new method to better take advantage of these similarities, using a latent-variable model. We also develop new techniques to train this model end-to-end and report experimental results showing that the learned patterns are both meaningful and yield improved translation performance without any increase of the model size.</abstract>
       <url hash="275a004e">2022.findings-naacl.189</url>
@@ -7166,7 +7166,7 @@
       <author><first>Runxin</first><last>Xu</last></author>
       <author><first>Tianyu</first><last>Liu</last></author>
       <author><first>Zhifang</first><last>Sui</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <pages>2482-2496</pages>
       <abstract>As Abstract Meaning Representation (AMR) implicitly involves compound semantic annotations, we hypothesize auxiliary tasks which are semantically or formally related can better enhance AMR parsing. We find that 1) Semantic role labeling (SRL) and dependency parsing (DP), would bring more performance gain than other tasks e.g. MT and summarization in the text-to-AMR transition even with much less data. 2) To make a better fit for AMR, data from auxiliary tasks should be properly “AMRized” to PseudoAMR before training. Knowledge from shallow level parsing tasks can be better transferred to AMR Parsing with structure transform. 3) Intermediate-task learning is a better paradigm to introduce auxiliary tasks to AMR parsing, compared to multitask learning. From an empirical perspective, we propose a principled method to involve auxiliary tasks to boost AMR parsing. Extensive experiments show that our method achieves new state-of-the-art performance on different benchmarks especially in topology-related scores. Code and models are released at <url>https://github.com/PKUnlp-icler/ATP</url>.</abstract>
       <url hash="b7717204">2022.findings-naacl.190</url>
@@ -7260,11 +7260,11 @@
     </paper>
     <paper id="197">
       <title><fixed-case>KETOD</fixed-case>: Knowledge-Enriched Task-Oriented Dialogue</title>
-      <author id="zhiyu-chen"><first>Zhiyu</first><last>Chen</last></author>
+      <author><first>Zhiyu</first><last>Chen</last></author>
       <author><first>Bing</first><last>Liu</last></author>
       <author><first>Seungwhan</first><last>Moon</last></author>
       <author><first>Chinnadhurai</first><last>Sankar</last></author>
-      <author><first>Paul</first><last>Crook</last></author>
+      <author id="paul-a-crook"><first>Paul</first><last>Crook</last></author>
       <author><first>William Yang</first><last>Wang</last></author>
       <pages>2581-2593</pages>
       <abstract>Existing studies in dialogue system research mostly treat task-oriented dialogue and chit-chat as separate domains. Towards building a human-like assistant that can converse naturally and seamlessly with users, it is important to build a dialogue system that conducts both types of conversations effectively. In this work, we investigate how task-oriented dialogue and knowledge-grounded chit-chat can be effectively integrated into a single model. To this end, we create a new dataset, KETOD (Knowledge-Enriched Task-Oriented Dialogue), where we naturally enrich task-oriented dialogues with chit-chat based on relevant entity knowledge. We also propose two new models, SimpleToDPlus and Combiner, for the proposed task. Experimental results on both automatic and human evaluations show that the proposed methods can significantly improve the performance in knowledge-enriched response generation while maintaining a competitive task-oriented dialog performance. We believe our new dataset will be a valuable resource for future studies. Our dataset and code are publicly available at <url>https://github.com/facebookresearch/ketod</url>.</abstract>
@@ -7320,7 +7320,7 @@
     <paper id="201">
       <title>Learning from Bootstrapping and Stepwise Reinforcement Reward: A Semi-Supervised Framework for Text Style Transfer</title>
       <author><first>Zhengyuan</first><last>Liu</last></author>
-      <author><first>Nancy</first><last>Chen</last></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last></author>
       <pages>2633-2648</pages>
       <abstract>Text style transfer is an important task in controllable language generation. Supervised approaches have pushed performance improvement on style-oriented rewriting such as formality conversion. However, challenges remain due to the scarcity of large-scale parallel data in many domains. While unsupervised approaches do not rely on annotated sentence pairs for each style, they are often plagued with instability issues such as mode collapse or quality degradation. To take advantage of both supervised and unsupervised paradigms and tackle the challenges, in this work, we propose a semi-supervised framework for text style transfer. First, the learning process is bootstrapped with supervision guided by automatically constructed pseudo-parallel pairs using lexical and semantic-based methods. Then the model learns from unlabeled data via reinforcement rewards. Specifically, we propose to improve the sequence-to-sequence policy gradient via stepwise reward optimization, providing fine-grained learning signals and stabilizing the reinforced learning process. Experimental results show that the proposed approach achieves state-of-the-art performance on multiple datasets, and produces effective generation with as minimal as 10% of training data.</abstract>
       <url hash="e2c9ac34">2022.findings-naacl.201</url>
@@ -7361,7 +7361,7 @@
       <author><first>Zhongyu</first><last>Wei</last></author>
       <author><first>Zejun</first><last>Li</last></author>
       <author><first>Siyuan</first><last>Wang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <author><first>Jianqing</first><last>Fan</last></author>
       <pages>2667-2678</pages>
       <abstract>Matching model is essential for Image-Text Retrieval framework. Existing research usually train the model with a triplet loss and explore various strategy to retrieve hard negative sentences in the dataset. We argue that current retrieval-based negative sample construction approach is limited in the scale of the dataset thus fail to identify negative sample of high difficulty for every image. We propose our TAiloring neGative Sentences with Discrimination and Correction (TAGS-DC) to generate synthetic sentences automatically as negative samples. TAGS-DC is composed of masking and refilling to generate synthetic negative sentences with higher difficulty. To keep the difficulty during training, we mutually improve the retrieval and generation through parameter sharing. To further utilize fine-grained semantic of mismatch in the negative sentence, we propose two auxiliary tasks, namely word discrimination and word correction to improve the training. In experiments, we verify the effectiveness of our model on MS-COCO and Flickr30K compared with current state-of-the-art models and demonstrates its robustness and faithfulness in the further analysis.</abstract>
@@ -7466,7 +7466,7 @@
       <author><first>Yukun</first><last>Feng</last></author>
       <author><first>Amir</first><last>Fayazi</last></author>
       <author><first>Abhinav</first><last>Rastogi</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>1–10</pages>
       <abstract>Recent work has shown advantages of incorporating knowledge graphs (KGs) into BERT for various NLP tasks. One common way is to feed entity embeddings as an additional input during pre-training. There are two limitations to such a method. First, to train the entity embeddings to include rich information of factual knowledge, it typically requires access to the entire KG. This is challenging for KGs with daily changes (e.g., Wikidata). Second, it requires a large scale pre-training corpus with entity annotations and high computational cost during pre-training. In this work, we efficiently construct entity embeddings only from the type knowledge, that does not require access to the entire KG. Although the entity embeddings contain only local information, they perform very well when combined with context. Furthermore, we show that our entity embeddings, constructed from BERT’s input embeddings, can be directly incorporated into the fine-tuning phase without requiring any specialized pre-training. In addition, these entity embeddings can also be constructed on the fly without requiring a large memory footprint to store them. Finally, we propose task-specific models that incorporate our entity embeddings for entity linking, entity typing, and relation classification. Experiments show that our models have comparable or superior performance to existing models while being more resource efficient.</abstract>
       <url hash="5669a333">2022.findings-aacl.1</url>
@@ -7531,7 +7531,7 @@
     <paper id="7">
       <title>Understanding the Use of Quantifiers in <fixed-case>M</fixed-case>andarin</title>
       <author><first>Guanyi</first><last>Chen</last></author>
-      <author><first>Kees</first><last>van Deemter</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
       <pages>73–80</pages>
       <abstract>We introduce a corpus of short texts in Mandarin, in which quantified expressions figure prominently. We illustrate the significance of the corpus by examining the hypothesis (known as Huang’s “coolness” hypothesis) that speakers of East Asian Languages tend to speak more briefly but less informatively than, for example, speakers of West-European languages. The corpus results from an elicitation experiment in which participants were asked to describe abstract visual scenes. We compare the resulting corpus, called MQTUNA, with an English corpus that was collected using the same experimental paradigm. The comparison reveals that some, though not all, aspects of quantifier use support the above-mentioned hypothesis. Implications of these findings for the generation of quantified noun phrases are discussed.</abstract>
       <url hash="4d4cf3f8">2022.findings-aacl.7</url>
@@ -7542,8 +7542,8 @@
       <title>Does Representational Fairness Imply Empirical Fairness?</title>
       <author><first>Aili</first><last>Shen</last></author>
       <author><first>Xudong</first><last>Han</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Lea</first><last>Frermann</last></author>
       <pages>81–95</pages>
       <abstract>NLP technologies can cause unintended harms if learned representations encode sensitive attributes of the author, or predictions systematically vary in quality across groups. Popular debiasing approaches, like adversarial training, remove sensitive information from representations in order to reduce disparate performance, however the relation between representational fairness and empirical (performance) fairness has not been systematically studied. This paper fills this gap, and proposes a novel debiasing method building on contrastive learning to encourage a latent space that separates instances based on target label, while mixing instances that share protected attributes. Our results show the effectiveness of our new method and, more importantly, show across a set of diverse debiasing methods that <i>representational fairness does not imply empirical fairness</i>. This work highlights the importance of aligning and understanding the relation of the optimization objective and final fairness target.</abstract>
@@ -7578,7 +7578,7 @@
       <author><first>Xin</first><last>Tian</last></author>
       <author><first>Xinchao</first><last>Xu</last></author>
       <author><first>Yingzhan</first><last>Lin</last></author>
-      <author><first>Zheng-Yu</first><last>Niu</last></author>
+      <author id="zheng-yu-niu"><first>Zheng-Yu</first><last>Niu</last></author>
       <pages>107–118</pages>
       <abstract>To explore the limit of dialogue generation pre-training, we present the models of PLATO-XL with up to 11 billion parameters, trained on both Chinese and English social media conversations. To train such large models, we adopt the architecture of unified transformer with high computation and parameter efficiency. In addition, we carry out multi-party aware pre-training to better distinguish the characteristic information in social media conversations. With such designs, PLATO-XL successfully achieves superior performances as compared to other approaches in both Chinese and English chitchat. We further explore the capacity of PLATO-XL on other conversational tasks, such as knowledge grounded dialogue and task-oriented conversation. The experimental results indicate that PLATO-XL obtains state-of-the-art results across multiple conversational tasks, verifying its potential as a foundation model of conversational AI.</abstract>
       <url hash="f08866a7">2022.findings-aacl.10</url>
@@ -7661,7 +7661,7 @@
       <author><first>Zhenyun</first><last>Deng</last></author>
       <author><first>Yonghua</first><last>Zhu</last></author>
       <author><first>Lia Jisoo</first><last>Lee</last></author>
-      <author><first>Michael</first><last>Witbrock</last></author>
+      <author id="michael-j-witbrock"><first>Michael</first><last>Witbrock</last></author>
       <author><first>Jiamou</first><last>Liu</last></author>
       <pages>176–187</pages>
       <abstract>We introduce TaKG, a new table-to-text generation dataset with the following highlights: (1) TaKG defines a long-text (paragraph-level) generation task as opposed to well-established short-text (sentence-level) generation datasets. (2) TaKG is the first large-scale dataset for this task, containing three application domains and ~750,000 samples. (3) To address the divergence phenomenon, TaKG enhances table input using external knowledge graphs, extracted by a new Wikidata-based method. We then propose a new Transformer-based multimodal sequence-to-sequence architecture for TaKG that integrates two pretrained language models RoBERTa and GPT-2. Our model shows reliable performance on long-text generation across a variety of metrics, and outperforms existing models for short-text generation tasks.</abstract>
@@ -7674,7 +7674,7 @@
       <author><first>Yingbo</first><last>Gao</last></author>
       <author><first>Christian</first><last>Herold</last></author>
       <author><first>Zijian</first><last>Yang</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>188–196</pages>
       <abstract>Checkpoint averaging is a simple and effective method to boost the performance of converged neural machine translation models. The calculation is cheap to perform and the fact that the translation improvement almost comes for free, makes it widely adopted in neural machine translation research. Despite the popularity, the method itself simply takes the mean of the model parameters from several checkpoints, the selection of which is mostly based on empirical recipes without many justifications. In this work, we revisit the concept of checkpoint averaging and consider several extensions. Specifically, we experiment with ideas such as using different checkpoint selection strategies, calculating weighted average instead of simple mean, making use of gradient information and fine-tuning the interpolation weights on development data. Our results confirm the necessity of applying checkpoint averaging for optimal performance, but also suggest that the landscape between the converged checkpoints is rather flat and not much further improvement compared to simple averaging is to be obtained.</abstract>
       <url hash="42289108">2022.findings-aacl.18</url>
@@ -7685,9 +7685,9 @@
       <title>Modeling Referential Gaze in Task-oriented Settings of Varying Referential Complexity</title>
       <author><first>Özge</first><last>Alacam</last></author>
       <author><first>Eugen</first><last>Ruppert</last></author>
-      <author><first>Sina</first><last>Zarrieß</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
       <author><first>Ganeshan</first><last>Malhotra</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <author><first>Sina</first><last>Zarrieß</last></author>
       <pages>197–210</pages>
       <abstract>Referential gaze is a fundamental phenomenon for psycholinguistics and human-human communication. However, modeling referential gaze for real-world scenarios, e.g. for task-oriented communication, is lacking the well-deserved attention from the NLP community. In this paper, we address this challenging issue by proposing a novel multimodal NLP task; namely predicting when the gaze is referential. We further investigate how to model referential gaze and transfer gaze features to adapt to unseen situated settings that target different referential complexities than the training environment. We train (i) a sequential attention-based LSTM model and (ii) a multivariate transformer encoder architecture to predict whether the gaze is on a referent object. The models are evaluated on the three complexity datasets. The results indicate that the gaze features can be transferred not only among various similar tasks and scenes but also across various complexity levels. Taking the referential complexity of a scene into account is important for successful target prediction using gaze parameters especially when there is not much data for fine-tuning.</abstract>
@@ -7709,7 +7709,7 @@
     <paper id="21">
       <title><fixed-case>C</fixed-case>o<fixed-case>RAL</fixed-case>: a Context-aware <fixed-case>C</fixed-case>roatian Abusive Language Dataset</title>
       <author><first>Ravi</first><last>Shekhar</last></author>
-      <author><first>Vanja Mladen</first><last>Karan</last></author>
+      <author id="vanja-m-karan"><first>Vanja Mladen</first><last>Karan</last></author>
       <author><first>Matthew</first><last>Purver</last></author>
       <pages>217–225</pages>
       <abstract>In light of unprecedented increases in the popularity of the internet and social media, comment moderation has never been a more relevant task. Semi-automated comment moderation systems greatly aid human moderators by either automatically classifying the examples or allowing the moderators to prioritize which comments to consider first. However, the concept of inappropriate content is often subjective, and such content can be conveyed in many subtle and indirect ways. In this work, we propose CoRAL – a language and culturally aware Croatian Abusive dataset covering phenomena of implicitness and reliance on local and global context. We show experimentally that current models degrade when comments are not explicit and further degrade when language skill and context knowledge are required to interpret the comment.</abstract>
@@ -7731,12 +7731,12 @@
     </paper>
     <paper id="23">
       <title>A Multilingual Multiway Evaluation Data Set for Structured Document Translation of <fixed-case>A</fixed-case>sian Languages</title>
-      <author><first>Bianka</first><last>Buschbeck</last></author>
+      <author id="bianka-buschbeck"><first>Bianka</first><last>Buschbeck</last></author>
       <author><first>Raj</first><last>Dabre</last></author>
       <author><first>Miriam</first><last>Exel</last></author>
       <author><first>Matthias</first><last>Huck</last></author>
       <author><first>Patrick</first><last>Huy</last></author>
-      <author><first>Raphael</first><last>Rubino</last></author>
+      <author id="raphael-rubino"><first>Raphael</first><last>Rubino</last></author>
       <author><first>Hideki</first><last>Tanaka</last></author>
       <pages>237–245</pages>
       <abstract>Translation of structured content is an important application of machine translation, but the scarcity of evaluation data sets, especially for Asian languages, limits progress. In this paper we present a novel multilingual multiway evaluation data set for the translation of structured documents of the Asian languages Japanese, Korean and Chinese. We describe the data set, its creation process and important characteristics, followed by establishing and evaluating baselines using the direct translation as well as detag-project approaches. Our data set is well suited for multilingual evaluation, and it contains richer annotation tag sets than existing data sets. Our results show that massively multilingual translation models like M2M-100 and mBART-50 perform surprisingly well despite not being explicitly trained to handle structured content. The data set described in this paper and used in our experiments is released publicly.</abstract>
@@ -7790,7 +7790,7 @@
       <author><first>Shaily</first><last>Bhatt</last></author>
       <author><first>Pankaj</first><last>Singh</last></author>
       <author><first>Somak</first><last>Aditya</last></author>
-      <author><first>Sandipan</first><last>Dandapat</last></author>
+      <author id="sandipan-dandapat"><first>Sandipan</first><last>Dandapat</last></author>
       <author><first>Sunayana</first><last>Sitaram</last></author>
       <author><first>Monojit</first><last>Choudhury</last></author>
       <pages>282–295</pages>
@@ -7869,7 +7869,7 @@
       <title>Multilingual Auxiliary Tasks Training: Bridging the Gap between Languages for Zero-Shot Transfer of Hate Speech Detection Models</title>
       <author><first>Syrielle</first><last>Montariol</last></author>
       <author><first>Arij</first><last>Riabi</last></author>
-      <author><first>Djamé</first><last>Seddah</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
       <pages>347–363</pages>
       <abstract>Zero-shot cross-lingual transfer learning has been shown to be highly challenging for tasks involving a lot of linguistic specificities or when a cultural gap is present between lan- guages, such as in hate speech detection. In this paper, we highlight this limitation for hate speech detection in several domains and languages using strict experimental settings. Then, we propose to train on multilingual auxiliary tasks – sentiment analysis, named entity recognition, and tasks relying on syntactic information – to improve zero-shot transfer of hate speech detection models across languages. We show how hate speech detection models benefit from a cross-lingual knowledge proxy brought by auxiliary tasks fine-tuning and highlight these tasks’ positive impact on bridging the hate speech linguistic and cultural gap between languages.</abstract>
       <url hash="7ab68fe7">2022.findings-aacl.33</url>
@@ -7879,10 +7879,10 @@
     <paper id="34">
       <title>Chop and Change: Anaphora Resolution in Instructional Cooking Videos</title>
       <author><first>Cennet</first><last>Oguz</last></author>
-      <author><first>Ivana</first><last>Kruijff-Korbayova</last></author>
+      <author id="ivana-kruijff-korbayova"><first>Ivana</first><last>Kruijff-Korbayova</last></author>
       <author><first>Emmanuel</first><last>Vincent</last></author>
       <author><first>Pascal</first><last>Denis</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>364–374</pages>
       <abstract>Linguistic ambiguities arising from changes in entities in action flows are a key challenge in instructional cooking videos. In particular, temporally evolving entities present rich and to date understudied challenges for anaphora resolution. For example “oil” mixed with “salt” is later referred to as a “mixture”. In this paper we propose novel annotation guidelines to annotate recipes for the anaphora resolution task, reflecting change in entities. Moreover, we present experimental results for end-to-end multimodal anaphora resolution with the new annotation scheme and propose the use of temporal features for performance improvement.</abstract>
       <url hash="83df2f20">2022.findings-aacl.34</url>
@@ -7941,7 +7941,7 @@
     <paper id="39">
       <title>Differential Bias: On the Perceptibility of Stance Imbalance in Argumentation</title>
       <author><first>Alonso</first><last>Palomino</last></author>
-      <author><first>Khalid</first><last>Al Khatib</last></author>
+      <author id="khalid-al-khatib"><first>Khalid</first><last>Al Khatib</last></author>
       <author><first>Martin</first><last>Potthast</last></author>
       <author><first>Benno</first><last>Stein</last></author>
       <pages>411–421</pages>
@@ -8178,7 +8178,7 @@
       <title>Does Simultaneous Speech Translation need Simultaneous Models?</title>
       <author><first>Sara</first><last>Papi</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
       <author><first>Marco</first><last>Gaido</last><affiliation>Fondazione Bruno Kessler, University of Trento</affiliation></author>
-      <author><first>Matteo</first><last>Negri</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
       <author><first>Marco</first><last>Turchi</last><affiliation>Zoom Video Communications</affiliation></author>
       <pages>141-153</pages>
       <abstract>In simultaneous speech translation (SimulST), finding the best trade-off between high output quality and low latency is a challenging task. To meet the latency constraints posed by different application scenarios, multiple dedicated SimulST models are usually trained and maintained, generating high computational costs. In this paper, also motivated by the increased sensitivity towards sustainable AI, we investigate whether a single model trained offline can serve both offline and simultaneous applications under different latency regimes without additional training or adaptation. Experiments on en-&gt;de, es show that, aside from facilitating the adoption of well-established offline architectures and training strategies without affecting latency, offline training achieves similar or better quality compared to the standard SimulST training protocol, also being competitive with the state-of-the-art system.</abstract>
@@ -8295,7 +8295,7 @@
       <author><first>Stan</first><last>Peshterliev</last><affiliation>Meta</affiliation></author>
       <author><first>Yashar</first><last>Mehdad</last><affiliation>Facebook AI</affiliation></author>
       <author><first>Sonal</first><last>Gupta</last><affiliation>Facebook</affiliation></author>
-      <author><first>Wen-tau</first><last>Yih</last><affiliation>Facebook AI Research</affiliation></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last><affiliation>Facebook AI Research</affiliation></author>
       <pages>250-262</pages>
       <abstract>Despite their recent popularity and well-known advantages, dense retrievers still lag behind sparse methods such as BM25 in their ability to reliably match salient phrases and rare entities in the query and to generalize to out-of-domain data. It has been argued that this is an inherent limitation of dense models. We rebut this claim by introducing the Salient Phrase Aware Retriever (SPAR), a dense retriever with the lexical matching capacity of a sparse model. We show that a dense Lexical Model Λ can be trained to imitate a sparse one, and SPAR is built by augmenting a standard dense retriever with Λ. Empirically, SPAR shows superior performance on a range of tasks including five question answering datasets, MS MARCO passage retrieval, as well as the EntityQuestions and BEIR benchmarks for out-of-domain evaluation, exceeding the performance of state-of-the-art dense and sparse retrievers. The code and models of SPAR are available at: https://github.com/facebookresearch/dpr-scale/tree/main/spar</abstract>
       <url hash="6f90fa22">2022.findings-emnlp.19</url>
@@ -8340,7 +8340,7 @@
       <author><first>Shichen</first><last>Li</last><affiliation>Soochow University</affiliation></author>
       <author><first>Zhongqing</first><last>Wang</last><affiliation>Soochow University</affiliation></author>
       <author><first>Xiaotong</first><last>Jiang</last><affiliation>Soochow University</affiliation></author>
-      <author><first>Guodong</first><last>Zhou</last><affiliation>Soochow University</affiliation></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last><affiliation>Soochow University</affiliation></author>
       <pages>289-299</pages>
       <abstract>Previous studies on cross-domain sentiment classification depend on the pivot features or utilize the target data for representation learning, which ignore the semantic relevance between different domains. To this end, we exploit Abstract Meaning Representation (AMR) to help with cross-domain sentiment classification. Compared with the textual input, AMR reduces data sparsity and explicitly provides core semantic knowledge and correlations between different domains. In particular, we develop an algorithm to construct a sentiment-driven semantic graph from sentence-level AMRs. We further design two strategies to linearize the semantic graph and propose a text-graph interaction model to fuse the text and semantic graph representations for cross-domain sentiment classification. Empirical studies show the effectiveness of our proposed model over several strong baselines. The results also indicate the importance of the proposed sentiment-driven semantic graph for cross-domain sentiment classification.</abstract>
       <url hash="cebabc67">2022.findings-emnlp.22</url>
@@ -8526,7 +8526,7 @@
       <author><first>Hannah</first><last>Kirk</last><affiliation>University of Oxford</affiliation></author>
       <author><first>Abeba</first><last>Birhane</last><affiliation>Mozilla Foundation / University College Dublin</affiliation></author>
       <author><first>Bertie</first><last>Vidgen</last><affiliation>Alan Turing Institute</affiliation></author>
-      <author><first>Leon</first><last>Derczynski</last><affiliation>IT University of Copenhagen</affiliation></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last><affiliation>IT University of Copenhagen</affiliation></author>
       <pages>497-510</pages>
       <abstract>Text data can pose a risk of harm. However, the risks are not fully understood, and how to handle, present, and discuss harmful text in a safe way remains an unresolved issue in the NLP community. We provide an analytical framework categorising harms on three axes: (1) the harm type (e.g., misinformation, hate speech or racial stereotypes); (2) whether a harm is sought as a feature of the research design if explicitly studying harmful content (e.g., training a hate speech classifier), versus unsought if harmful content is encountered when working on unrelated problems (e.g., language generation or part-of-speech tagging); and (3) who it affects, from people (mis)represented in the data to those handling the data and those publishing on the data. We provide advice for practitioners, with concrete steps for mitigating harm in research and in publication. To assist implementation we introduce HarmCheck – a documentation standard for handling and presenting harmful text in research.</abstract>
       <url hash="01528908">2022.findings-emnlp.35</url>
@@ -8716,7 +8716,7 @@
       <title>Diving Deep into Modes of Fact Hallucinations in Dialogue Systems</title>
       <author><first>Souvik</first><last>Das</last><affiliation>University at Buffalo</affiliation></author>
       <author><first>Sougata</first><last>Saha</last><affiliation>State University of New York at Buffalo</affiliation></author>
-      <author><first>Rohini</first><last>Srihari</last><affiliation>University at Buffalo, SUNY</affiliation></author>
+      <author id="rohini-k-srihari"><first>Rohini</first><last>Srihari</last><affiliation>University at Buffalo, SUNY</affiliation></author>
       <pages>684-699</pages>
       <abstract>Knowledge Graph(KG) grounded conversations often use large pre-trained models and usually suffer from fact hallucination. Frequently entities with no references in knowledge sources and conversation history are introduced into responses, thus hindering the flow of the conversation—existing work attempt to overcome this issue by tweaking the training procedure or using a multi-step refining method. However, minimal effort is put into constructing an entity-level hallucination detection system, which would provide fine-grained signals that control fallacious content while generating responses. As a first step to address this issue, we dive deep to identify various modes of hallucination in KG-grounded chatbots through human feedback analysis. Secondly, we propose a series of perturbation strategies to create a synthetic dataset named FADE (FActual Dialogue Hallucination DEtection Dataset). Finally, we conduct comprehensive data analyses and create multiple baseline models for hallucination detection to compare against human-verified data and already established benchmarks.</abstract>
       <url hash="f89ec09f">2022.findings-emnlp.48</url>
@@ -8727,7 +8727,7 @@
     <paper id="49">
       <title>Representation Learning for Resource-Constrained Keyphrase Generation</title>
       <author><first>Di</first><last>Wu</last><affiliation>University of California, Los Angeles</affiliation></author>
-      <author><first>Wasi</first><last>Ahmad</last><affiliation>University of California, Los Angeles</affiliation></author>
+      <author id="wasi-ahmad"><first>Wasi</first><last>Ahmad</last><affiliation>University of California, Los Angeles</affiliation></author>
       <author><first>Sunipa</first><last>Dev</last><affiliation>Google Research</affiliation></author>
       <author><first>Kai-Wei</first><last>Chang</last><affiliation>UCLA</affiliation></author>
       <pages>700-716</pages>
@@ -8853,8 +8853,8 @@
     <paper id="58">
       <title>Influence Functions for Sequence Tagging Models</title>
       <author><first>Sarthak</first><last>Jain</last><affiliation>Northeastern University</affiliation></author>
-      <author><first>Varun</first><last>Manjunatha</last><affiliation>Adobe Research</affiliation></author>
-      <author><first>Byron</first><last>Wallace</last><affiliation>Northeastern University</affiliation></author>
+      <author id="varun-manjunatha"><first>Varun</first><last>Manjunatha</last><affiliation>Adobe Research</affiliation></author>
+      <author id="byron-c-wallace"><first>Byron</first><last>Wallace</last><affiliation>Northeastern University</affiliation></author>
       <author><first>Ani</first><last>Nenkova</last><affiliation>Adobe Research</affiliation></author>
       <pages>824-839</pages>
       <abstract>Many standard tasks in NLP (e.g., Named Entity Recognition, Part-of-Speech tagging, and Semantic Role Labeling) are naturally framed as sequence tagging problems. However, there has been comparatively little work on interpretability methods for sequence tagging models. In this paper, we extend influence functions — which aim to trace predictions back to the training points that informed them — to sequence tagging tasks. We define the influence of a training instance segment as the effect that perturbing the labels within this segment has on a test segment level prediction. We provide an efficient approximation to compute this, and show that it tracks with the “true” segment influence (measured empirically). We show the practical utility of segment influence by using the method to identify noisy annotations in NER corpora.</abstract>
@@ -8934,7 +8934,7 @@
       <author><first>Tianyi</first><last>Li</last><affiliation>University of Edinburgh</affiliation></author>
       <author><first>Mohammad Javad</first><last>Hosseini</last><affiliation>Google Research</affiliation></author>
       <author><first>Sabine</first><last>Weber</last><affiliation>University of Edinburgh</affiliation></author>
-      <author><first>Mark</first><last>Steedman</last><affiliation>University of Edinburgh</affiliation></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last><affiliation>University of Edinburgh</affiliation></author>
       <pages>903-921</pages>
       <abstract>We examine LMs’ competence of directional predicate entailments by supervised fine-tuning with prompts. Our analysis shows that contrary to their apparent success on standard NLI, LMs show limited ability to learn such directional inference; moreover, existing datasets fail to test directionality, and/or are infested by artefacts that can be learnt as proxy for entailments, yielding over-optimistic results. In response, we present BoOQA (Boolean Open QA), a robust multi-lingual evaluation benchmark for directional predicate entailments, extrinsic to existing training sets. On BoOQA, we establish baselines and show evidence of existing LM-prompting models being incompetent directional entailment learners, in contrast to entailment graphs, however limited by sparsity.</abstract>
       <url hash="1eb33699">2022.findings-emnlp.64</url>
@@ -8996,7 +8996,7 @@
       <title>Subword-Delimited Downsampling for Better Character-Level Translation</title>
       <author><first>Lukas</first><last>Edman</last><affiliation>University of Groningen</affiliation></author>
       <author><first>Antonio</first><last>Toral</last><affiliation>University of Groningen</affiliation></author>
-      <author><first>Gertjan</first><last>van Noord</last><affiliation>University of Groningen</affiliation></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last><affiliation>University of Groningen</affiliation></author>
       <pages>981-992</pages>
       <abstract>Subword-level models have been the dominant paradigm in NLP. However, character-level models have the benefit of seeing each character individually, providing the model with more detailed information that ultimately could lead to better models. Recent works have shown character-level models to be competitive with subword models, but costly in terms of time and computation. Character-level models with a downsampling component alleviate this, but at the cost of quality, particularly for machine translation. This work analyzes the problems of previous downsampling methods and introduces a novel downsampling method which is informed by subwords.This new downsampling method not only outperforms existing downsampling methods, showing that downsampling characters can be done without sacrificing quality, but also leads to promising performance compared to subword models for translation.</abstract>
       <url hash="08f17a31">2022.findings-emnlp.69</url>
@@ -9076,7 +9076,7 @@
       <author><first>Hyesung</first><last>Ji</last></author>
       <author><first>Donghoon</first><last>Shin</last></author>
       <author><first>Seungryong</first><last>Kim</last></author>
-      <author><first>Heuiseok</first><last>Lim</last></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last></author>
       <pages>1053-1066</pages>
       <abstract>To build a conversational agent that interacts fluently with humans, previous studies blend knowledge or personal profile into the pre-trained language model. However, the model that considers knowledge and persona at the same time is still limited, leading to hallucination and a passive way of using personas. We propose an effective dialogue agent that grounds external knowledge and persona simultaneously. The agent selects the proper knowledge and persona to use for generating the answers with our candidate scoring implemented with a poly-encoder. Then, our model generates the utterance with lesser hallucination and more engagingness utilizing retrieval augmented generation with knowledge-persona enhanced query. We conduct experiments on the persona-knowledge chat and achieve state-of-the-art performance in grounding and generation tasks on the automatic metrics. Moreover, we validate the answers from the models regarding hallucination and engagingness through human evaluation and qualitative results. We show our retriever’s effectiveness in extracting relevant documents compared to the other previous retrievers, along with the comparison of multiple candidate scoring methods. Code is available at <url>https://github.com/dlawjddn803/INFO</url></abstract>
       <url hash="fd269df3">2022.findings-emnlp.75</url>
@@ -9291,7 +9291,7 @@
       <author><first>Haneul</first><last>Yoo</last><affiliation>KAIST</affiliation></author>
       <author><first>JinYeong</first><last>Bak</last><affiliation>Sungkyunkwan University</affiliation></author>
       <author><first>Kyunghyun</first><last>Cho</last><affiliation>New York University</affiliation></author>
-      <author><first>Alice</first><last>Oh</last><affiliation>KAIST</affiliation></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last><affiliation>KAIST</affiliation></author>
       <pages>1260-1272</pages>
       <abstract>The Annals of Joseon Dynasty (AJD) contain the daily records of the Kings of Joseon, the 500-year kingdom preceding the modern nation of Korea.The Annals were originally written in an archaic Korean writing system, ‘Hanja’, and were translated into Korean from 1968 to 1993.The resulting translation was however too literal and contained many archaic Korean words; thus, a new expert translation effort began in 2012. Since then, the records of only one king have been completed in a decade.In parallel, expert translators are working on English translation, also at a slow pace and produced only one king’s records in English so far.Thus, we propose H2KE, a neural machine translation model, that translates historical documents in Hanja to more easily understandable Korean and to English.Built on top of multilingual neural machine translation, H2KE learns to translate a historical document written in Hanja, from both a full dataset of outdated Korean translation and a small dataset of more recently translated contemporary Korean and English.We compare our method against two baselines:a recent model that simultaneously learns to restore and translate Hanja historical documentand a Transformer based model trained only on newly translated corpora.The experiments reveal that our method significantly outperforms the baselines in terms of BLEU scores for both contemporary Korean and English translations.We further conduct extensive human evaluation which shows that our translation is preferred over the original expert translations by both experts and non-expert Korean speakers.</abstract>
       <url hash="da5cc8c6">2022.findings-emnlp.91</url>
@@ -9342,7 +9342,7 @@
       <title>Late Prompt Tuning: A Late Prompt Could Be Better Than Many Prompts</title>
       <author><first>Xiangyang</first><last>Liu</last><affiliation>Fudan University</affiliation></author>
       <author><first>Tianxiang</first><last>Sun</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Xipeng</first><last>Qiu</last><affiliation>Fudan University</affiliation></author>
       <pages>1325-1338</pages>
       <abstract>Prompt tuning is a parameter-efficient tuning (PETuning) method for utilizing pre-trained models (PTMs) that simply prepends a soft prompt to the input and only optimizes the prompt to adapt PTMs to downstream tasks. Although it is parameter- and deployment-efficient, its performance still lags behind other state-of-the-art PETuning methods. Besides, the training cost of prompt tuning is not significantly reduced due to the back-propagation through the entire model. Through empirical analyses, we shed some light on the lagging performance of prompt tuning and recognize a trade-off between the propagation distance from label signals to the inserted prompt and the influence of the prompt on model outputs. Further, we present Late Prompt Tuning (LPT) that inserts a late prompt into an intermediate layer of the PTM instead of the input layer or all layers. The late prompt is obtained by a neural prompt generator conditioned on the hidden states before the prompt insertion layer and therefore is instance-dependent. Through extensive experimental results across various tasks and PTMs, we show that LPT can achieve competitive performance to full model tuning and other PETuning methods under both full-data and few-shot scenarios while possessing faster training speed and lower memory cost.</abstract>
@@ -9384,7 +9384,7 @@
       <author><first>Nico</first><last>Daheim</last><affiliation>TU Darmstadt</affiliation></author>
       <author><first>David</first><last>Thulke</last><affiliation>RWTH Aachen University</affiliation></author>
       <author><first>Christian</first><last>Dugast</last><affiliation>RWTH, University of Aachen</affiliation></author>
-      <author><first>Hermann</first><last>Ney</last><affiliation>RWTH Aachen University</affiliation></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last><affiliation>RWTH Aachen University</affiliation></author>
       <pages>1365-1381</pages>
       <abstract>In this work, we present a model for document-grounded response generation in dialog that is decomposed into two components according to Bayes’ theorem.One component is a traditional ungrounded response generation model and the other component models the reconstruction of the grounding document based on the dialog context and generated response.We propose different approximate decoding schemes and evaluate our approach on multiple open-domain and task-oriented document-grounded dialog datasets.Our experiments show that the model is more factual in terms of automatic factuality metrics than the baseline model.Furthermore, we outline how introducing scaling factors between the components allows for controlling the tradeoff between factuality and fluency in the model output.Finally, we compare our approach to a recently proposed method to control factuality in grounded dialog, CTRL (Rashkin et al., 2021), and show that both approaches can be combined to achieve additional improvements.</abstract>
       <url hash="6f2de341">2022.findings-emnlp.98</url>
@@ -9424,7 +9424,7 @@
       <author><first>Daniel</first><last>Rotem</last><affiliation>Hebrew University of Jerusalem</affiliation></author>
       <author><first>Jungo</first><last>Kasai</last><affiliation>University of Washington</affiliation></author>
       <author><first>Ivan</first><last>Montero</last><affiliation>Paul G. Allen School of Computer Science &amp; Engineering, University of Washington</affiliation></author>
-      <author><first>Noah A.</first><last>Smith</last><affiliation>University of Washington</affiliation></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last><affiliation>University of Washington</affiliation></author>
       <author><first>Roy</first><last>Schwartz</last><affiliation>The Hebrew University of Jerusalem</affiliation></author>
       <pages>1403-1416</pages>
       <abstract>The attention mechanism is considered the backbone of the widely-used Transformer architecture. It contextualizes the input by computing input-specific attention matrices. We find that this mechanism, while powerful and elegant, is not as important as typically thought for pretrained language models. We introduce PAPA, a new probing method that replaces the input-dependent attention matrices with constant ones—the average attention weights over multiple inputs. We use PAPA to analyze several established pretrained Transformers on six downstream tasks. We find that without any input-dependent attention, all models achieve competitive performance—an average relative drop of only 8% from the probing baseline. Further, little or no performance drop is observed when replacing half of the input-dependent attention matrices with constant (input-independent) ones. Interestingly, we show that better-performing models lose more from applying our method than weaker models, suggesting that the utilization of the input-dependent attention mechanism might be a factor in their success. Our results motivate research on simpler alternatives to input-dependent attention, as well as on methods for better utilization of this mechanism in the Transformer architecture.</abstract>
@@ -9489,7 +9489,7 @@
       <author><first>Baohao</first><last>Liao</last><affiliation>University of Amsterdam</affiliation></author>
       <author><first>David</first><last>Thulke</last><affiliation>RWTH Aachen University</affiliation></author>
       <author><first>Sanjika</first><last>Hewavitharana</last><affiliation>eBay</affiliation></author>
-      <author><first>Hermann</first><last>Ney</last><affiliation>RWTH Aachen University</affiliation></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last><affiliation>RWTH Aachen University</affiliation></author>
       <author><first>Christof</first><last>Monz</last><affiliation>University of Amsterdam</affiliation></author>
       <pages>1478-1492</pages>
       <abstract>The pre-training of masked language models (MLMs) consumes massive computation to achieve good results on downstream NLP tasks, resulting in a large carbon footprint. In the vanilla MLM, the virtual tokens, [MASK]s, act as placeholders and gather the contextualized information from unmasked tokens to restore the corrupted information. It raises the question of whether we can append [MASK]s at a later layer, to reduce the sequence length for earlier layers and make the pre-training more efficient. We show: (1) [MASK]s can indeed be appended at a later layer, being disentangled from the word embedding; (2) The gathering of contextualized information from unmasked tokens can be conducted with a few layers. By further increasing the masking rate from 15% to 50%, we can pre-train RoBERTa-base and RoBERTa-large from scratch with only 78% and 68% of the original computational budget without any degradation on the GLUE benchmark. When pre-training with the original budget, our method outperforms RoBERTa for 6 out of 8 GLUE tasks, on average by 0.4%.</abstract>
@@ -9515,7 +9515,7 @@
     <paper id="108">
       <title>On the Effectiveness of Automated Metrics for Text Generation Systems</title>
       <author><first>Pius</first><last>von Däniken</last><affiliation>Zurich University of Applied Sciences ZHAW</affiliation></author>
-      <author><first>Jan</first><last>Deriu</last><affiliation>Zurich University of Applied Sciences</affiliation></author>
+      <author id="jan-milan-deriu"><first>Jan</first><last>Deriu</last><affiliation>Zurich University of Applied Sciences</affiliation></author>
       <author><first>Don</first><last>Tuggener</last><affiliation>Zurich University of Applied Sciences</affiliation></author>
       <author><first>Mark</first><last>Cieliebak</last><affiliation>Zurich University of Applied Sciences</affiliation></author>
       <pages>1503-1522</pages>
@@ -9580,12 +9580,12 @@
     </paper>
     <paper id="113">
       <title>Data Selection Curriculum for Neural Machine Translation</title>
-      <author><first>Tasnim</first><last>Mohiuddin</last><affiliation>Nanyang Technological University</affiliation></author>
+      <author id="muhammad-tasnim-mohiuddin"><first>Tasnim</first><last>Mohiuddin</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Philipp</first><last>Koehn</last><affiliation>Johns Hopkins University</affiliation></author>
       <author><first>Vishrav</first><last>Chaudhary</last><affiliation>Microsoft</affiliation></author>
       <author><first>James</first><last>Cross</last><affiliation>Facebook</affiliation></author>
       <author><first>Shruti</first><last>Bhosale</last><affiliation>University of Texas at Austin</affiliation></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University; Salesforce AI Research</affiliation></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University; Salesforce AI Research</affiliation></author>
       <pages>1569-1582</pages>
       <abstract>Neural Machine Translation (NMT) models are typically trained on heterogeneous data that are concatenated and randomly shuffled. However, not all of the training data are equally useful to the model. Curriculum training aims to present the data to the NMT models in a meaningful order. In this work, we introduce a two-stage training framework for NMT where we fine-tune a base NMT model on subsets of data, selected by both deterministic scoring using pre-trained methods and online scoring that considers prediction scores of the emerging NMT model. Through comprehensive experiments on six language pairs comprising low- and high-resource languages from WMT’21, we have shown that our curriculum strategies consistently demonstrate better quality (up to +2.2 BLEU improvement) and faster convergence (approximately 50% fewer updates).</abstract>
       <url hash="7accd5da">2022.findings-emnlp.113</url>
@@ -9614,7 +9614,7 @@
       <author><first>Amith</first><last>Ananthram</last><affiliation>Columbia University</affiliation></author>
       <author><first>Emily</first><last>Allaway</last><affiliation>Columbia University</affiliation></author>
       <author><first>Heng</first><last>Ji</last><affiliation>University of Illinois at Urbana-Champaign and Amazon (Amazon Scholar)</affiliation></author>
-      <author><first>Kathleen</first><last>McKeown</last><affiliation>Columbia University and Amazon (Amazon Scholar)</affiliation></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last><affiliation>Columbia University and Amazon (Amazon Scholar)</affiliation></author>
       <pages>1595-1609</pages>
       <abstract>Practitioners from many disciplines (e.g., political science) use expert-crafted taxonomies to make sense of large, unlabeled corpora. In this work, we study Seeded Hierarchical Clustering (SHC): the task of automatically fitting unlabeled data to such taxonomies using a small set of labeled examples. We propose HierSeed, a novel weakly supervised algorithm for this task that uses only a small set of labeled seed examples in a computation and data efficient manner. HierSeed assigns documents to topics by weighing document density against topic hierarchical structure. It outperforms unsupervised and supervised baselines for the SHC task on three real-world datasets.</abstract>
       <url hash="02f3a1b9">2022.findings-emnlp.115</url>
@@ -9725,7 +9725,7 @@
     <paper id="124">
       <title>Hyperdecoders: Instance-specific decoders for multi-task <fixed-case>NLP</fixed-case></title>
       <author><first>Hamish</first><last>Ivison</last><affiliation>Allen Institute for AI</affiliation></author>
-      <author><first>Matthew</first><last>Peters</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
+      <author id="matthew-e-peters"><first>Matthew</first><last>Peters</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <pages>1715-1730</pages>
       <abstract>We investigate input-conditioned hypernetworks for multi-tasking in NLP, generating parameter-efficient adaptations for a decoder using a hypernetwork conditioned on the output of an encoder. This approach produces a unique decoder adaptation for every input instance, allowing the network a larger degree of flexibility than prior work that only produces one decoder adaptation per task. We apply our method to sequence classification tasks, extractive QA, and summarisation and find that it surpasses previous parameter efficient fine-tuning methods and often outperforms fully finetuning the underlying model. An analysis of the embeddings used by our hypernetwork shows that they are sensitive to output label and type, suggesting that our approach better maps from encoder representations to output labels. Our code is publicly available at https://github.com/allenai/hyperdecoders.</abstract>
       <url hash="76a93837">2022.findings-emnlp.124</url>
@@ -9886,7 +9886,7 @@
       <author><first>Jiannan</first><last>Xiang</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Zhengzhong</first><last>Liu</last><affiliation>Carnegie Mellon University; Petuum INC.</affiliation></author>
       <author><first>Yucheng</first><last>Zhou</last><affiliation>UC San Diego</affiliation></author>
-      <author><first>Eric</first><last>Xing</last><affiliation>Carnegie Mellon University; MBZUAI; Petuum Inc.</affiliation></author>
+      <author id="eric-xing"><first>Eric</first><last>Xing</last><affiliation>Carnegie Mellon University; MBZUAI; Petuum Inc.</affiliation></author>
       <author><first>Zhiting</first><last>Hu</last><affiliation>UC San Diego</affiliation></author>
       <pages>1886-1899</pages>
       <abstract>Data-to-text generation is challenging due to the great variety of the input data in terms of domains (e.g., finance vs sports) or schemata (e.g., diverse predicates). Recent end-to-end neural methods thus require substantial training examples to learn to disambiguate and describe the data. Yet, real-world data-to-text problems often suffer from various data-scarce issues: one may have access to only a handful of or no training examples, and/or have to rely on examples in a different domain or schema. To fill this gap, we propose Any-Shot Data-to-Text (ASDOT), a new approach flexibly applicable to diverse settings by making efficient use of any given (or no) examples. ASDOT consists of two steps, data disambiguation and sentence fusion, both of which are amenable to be solved with off-the-shelf pretrained language models (LMs) with optional finetuning. In the data disambiguation stage, we employ the prompted GPT-3 model to understand possibly ambiguous triples from the input data and convert each into a short sentence with reduced ambiguity. The sentence fusion stage then uses an LM like T5 to fuse all the resulting sentences into a coherent paragraph as the final description. We evaluate extensively on various datasets in different scenarios, including the zero-/few-/full-shot settings, and generalization to unseen predicates and out-of-domain data. Experimental results show that ASDOT consistently achieves significant improvement over baselines, e.g., a 30.81 BLEU gain on the DART dataset under the zero-shot setting.</abstract>
@@ -9931,7 +9931,7 @@
       <author><first>Mihir</first><last>Goyal</last><affiliation>Indraprastha Institute of Information Technology Delhi</affiliation></author>
       <author><first>Ramit</first><last>Sawhney</last><affiliation>Georgia Institute of Technology</affiliation></author>
       <author><first>Ritik</first><last>Mathur</last><affiliation>Indian Institute of Technology Roorkee</affiliation></author>
-      <author><first>Jochen</first><last>Leidner</last><affiliation>Coburg University of Applied Sciences / University of Sheffield</affiliation></author>
+      <author id="jochen-l-leidner"><first>Jochen</first><last>Leidner</last><affiliation>Coburg University of Applied Sciences / University of Sheffield</affiliation></author>
       <author><first>Franck</first><last>Dernoncourt</last><affiliation>Adobe Research</affiliation></author>
       <author><first>Dinesh</first><last>Manocha</last><affiliation>University of Maryland</affiliation></author>
       <pages>1933-1940</pages>
@@ -9960,7 +9960,7 @@
 Towards Faster Fine-tuning with Less Labels in Speech Processing</title>
       <author><first>Hao</first><last>Yang</last><affiliation>Monash University</affiliation></author>
       <author><first>Jinming</first><last>Zhao</last><affiliation>Dept of Data Science and AI, Faculty of IT, Monash University</affiliation></author>
-      <author><first>Gholamreza</first><last>Haffari</last><affiliation>Monash University</affiliation></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last><affiliation>Monash University</affiliation></author>
       <author><first>Ehsan</first><last>Shareghi</last><affiliation>Monash University</affiliation></author>
       <pages>1952-1959</pages>
       <abstract>Pre-trained speech Transformers have facilitated great success across various speech processing tasks. However, fine-tuning these encoders for downstream tasks require sufficiently large training data to converge or to achieve state-of-the-art. In text domain this has been partly attributed to sub-optimality of the representation space in pre-trained Transformers. In this work, we take a sober look into pre-trained speech encoders and rewire their representation space without requiring any task-specific labels. Our method utilises neutrally synthesised version of audio inputs along with frame masking to construct positive pairs for contrastive self-supervised learning. When used for augmenting the wav2vec 2 encoder, we observe consistent improvement of isotropy in the representation space. Our experiments on 6 speech processing tasks, exhibit a significant convergence speedup during task fine-tuning as well as consistent task improvement, specially in low-resource settings.</abstract>
@@ -9975,7 +9975,7 @@ Towards Faster Fine-tuning with Less Labels in Speech Processing</title>
 Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Jinming</first><last>Zhao</last><affiliation>Dept of Data Science and AI, Faculty of IT, Monash University</affiliation></author>
       <author><first>Hao</first><last>Yang</last><affiliation>Monash University</affiliation></author>
-      <author><first>Gholamreza</first><last>Haffari</last><affiliation>Monash University</affiliation></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last><affiliation>Monash University</affiliation></author>
       <author><first>Ehsan</first><last>Shareghi</last><affiliation>Monash University</affiliation></author>
       <pages>1960-1967</pages>
       <abstract>Pre-trained speech Transformers in speech translation (ST) have facilitated state-of-the-art (SotA) results; yet, using such encoders is computationally expensive. To improve this, we present a novel Reducer Adaptor block, RedApt, that could be seamlessly integrated within any Transformer-based speech encoding architecture. Integrating the pretrained wav2vec 2 speech encoder with RedAptbrings 41% speedup, 33% memory reduction with 24% fewer FLOPs at inference. To our positive surprise, our ST model with RedApt outperforms the SotA architecture by an average of 0.68 BLEU score on 8 language pairs from Must-C.</abstract>
@@ -9998,7 +9998,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
     </paper>
     <paper id="144">
       <title>P<tex-math>\text{M}^2\text{F}^2</tex-math><fixed-case>N</fixed-case>: Patient Multi-view Multi-modal Feature Fusion Networks for Clinical Outcome Prediction</title>
-      <author><first>Ying</first><last>Zhang</last><affiliation>Nankai University</affiliation></author>
+      <author id="ying-zhang"><first>Ying</first><last>Zhang</last><affiliation>Nankai University</affiliation></author>
       <author><first>Baohang</first><last>Zhou</last><affiliation>Nankai University</affiliation></author>
       <author><first>Kehui</first><last>Song</last><affiliation>Nankai University</affiliation></author>
       <author><first>Xuhui</first><last>Sui</last><affiliation>Nankai University</affiliation></author>
@@ -10101,7 +10101,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Firoj</first><last>Alam</last><affiliation>Qatar Computing Research Institute, HBKU</affiliation></author>
       <author><first>Giovanni</first><last>Da San Martino</last><affiliation>University of Padova</affiliation></author>
       <author><first>Aisha</first><last>Mohamed</last><affiliation>University of Wisconsin-Madison</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <pages>2069-2080</pages>
       <abstract>Given the recent proliferation of false claims online, there has been a lot of manual fact-checking effort. As this is very time-consuming, human fact-checkers can benefit from tools that can support them and make them more efficient. Here, we focus on building a system that could provide such support. Given an input document, it aims to detect all sentences that contain a claim that can be verified by some previously fact-checked claims (from a given database). The output is a re-ranked list of the document sentences, so that those that can be verified are ranked as high as possible, together with corresponding evidence. Unlike previous work, which has looked into claim retrieval, here we take a document-level perspective. We create a new manually annotated dataset for the task, and we propose suitable evaluation measures. We further experiment with a learning-to-rank approach, achieving sizable performance gains over several strong baselines. Our analysis demonstrates the importance of modeling text similarity and stance, while also taking into account the veracity of the retrieved previously fact-checked claims. We believe that this research would be of interest to fact-checkers, journalists, media, and regulatory authorities.</abstract>
       <url hash="4d9798cf">2022.findings-emnlp.151</url>
@@ -10319,7 +10319,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
     <paper id="168">
       <title>Alleviating Sparsity of Open Knowledge Graphs with Ternary Contrastive Learning</title>
       <author><first>Qian</first><last>Li</last><affiliation>Northeastern University and Nanyang Technological University</affiliation></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University; Salesforce AI Research</affiliation></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University; Salesforce AI Research</affiliation></author>
       <author><first>Daling</first><last>Wang</last></author>
       <author><first>Shi</first><last>Feng</last><affiliation>Northeastern University</affiliation></author>
       <author><first>Yifei</first><last>Zhang</last><affiliation>Northeastern University</affiliation></author>
@@ -10335,7 +10335,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Sheena</first><last>Panthaplackel</last><affiliation>The University of Texas at Austin</affiliation></author>
       <author><first>Milos</first><last>Gligoric</last><affiliation>The University of Texas at Austin</affiliation></author>
       <author><first>Junyi Jessy</first><last>Li</last><affiliation>University of Texas at Austin</affiliation></author>
-      <author><first>Raymond</first><last>Mooney</last><affiliation>University of Texas at Austin</affiliation></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last><affiliation>University of Texas at Austin</affiliation></author>
       <pages>2292-2301</pages>
       <abstract>Automatically fixing software bugs is a challenging task. While recent work showed that natural language context is useful in guiding bug-fixing models, the approach required prompting developers to provide this context, which was simulated through commit messages written after the bug-fixing code changes were made. We instead propose using bug report discussions, which are available before the task is performed and are also naturally occurring, avoiding the need for any additional information from developers. For this, we augment standard bug-fixing datasets with bug report discussions. Using these newly compiled datasets, we demonstrate that various forms of natural language context derived from such discussions can aid bug-fixing, even leading to improved performance over using commit messages corresponding to the oracle bug-fixing commits.</abstract>
       <url hash="1537491d">2022.findings-emnlp.169</url>
@@ -10447,7 +10447,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Fei</first><last>Zhao</last><affiliation>Nanjing University</affiliation></author>
       <author><first>Yuchen</first><last>Shen</last><affiliation>UESTC</affiliation></author>
       <author><first>Zhen</first><last>Wu</last><affiliation>Nanjing University</affiliation></author>
-      <author><first>Xinyu</first><last>Dai</last><affiliation>National Key Laboratory for Novel Software Technology, Nanjing University</affiliation></author>
+      <author id="xinyu-dai"><first>Xinyu</first><last>Dai</last><affiliation>National Key Laboratory for Novel Software Technology, Nanjing University</affiliation></author>
       <pages>2390-2402</pages>
       <abstract>Multi-Label Few-Shot Aspect Category Detection (FS-ACD) is a new sub-task of aspect-based sentiment analysis, which aims to detect aspect categories accurately with limited training instances. Recently, dominant works use the prototypical network to accomplish this task, and employ the attention mechanism to extract keywords of aspect category from the sentences to produce the prototype for each aspect. However, they still suffer from serious noise problems: (1) due to lack of sufficient supervised data, the previous methods easily catch noisy words irrelevant to the current aspect category, which largely affects the quality of the generated prototype; (2) the semantically-close aspect categories usually generate similar prototypes, which are mutually noisy and confuse the classifier seriously. In this paper, we resort to the label information of each aspect to tackle the above problems, along with proposing a novel Label-Driven Denoising Framework (LDF). Extensive experimental results show that our framework achieves better performance than other state-of-the-art methods.</abstract>
       <url hash="7ce4483d">2022.findings-emnlp.177</url>
@@ -10460,7 +10460,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Yixing</first><last>Fan</last><affiliation>Institute of Computing Technology, CAS.</affiliation></author>
       <author><first>Jiafeng</first><last>Guo</last><affiliation>Institute of Computing Technology, CAS</affiliation></author>
       <author><first>Ruqing</first><last>Zhang</last><affiliation>CAS Key Lab of Network Data Science and Technology, Institute of Computing Technology, Chinese Academy of Sciences</affiliation></author>
-      <author><first>Xueqi</first><last>Cheng</last><affiliation>Institute of Computing Technology, CAS</affiliation></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last><affiliation>Institute of Computing Technology, CAS</affiliation></author>
       <pages>2403-2415</pages>
       <abstract>Visual Entity Linking (VEL) is a task to link regions of images with their corresponding entities in Knowledge Bases (KBs), which is beneficial for many computer vision tasks such as image retrieval, image caption, and visual question answering. While existing tasks in VEL either rely on textual data to complement a multi-modal linking or only link objects with general entities, which fails to perform named entity linking on large amounts of image data. In this paper, we consider a purely Visual-based Named Entity Linking (VNEL) task, where the input only consists of an image. The task is to identify objects of interest (i.e., visual entity mentions) in images and link them to corresponding named entities in KBs. Since each entity often contains rich visual and textual information in KBs, we thus propose three different sub-tasks, i.e., visual to visual entity linking (V2VEL), visual to textual entity linking (V2TEL), and visual to visual-textual entity linking (V2VTEL). In addition, we present a high-quality human-annotated visual person linking dataset, named WIKIPerson. Based on WIKIPerson, we establish a series of baseline algorithms for the solution of each sub-task, and conduct experiments to verify the quality of the proposed datasets and the effectiveness of baseline methods. We envision this work to be helpful for soliciting more works regarding VNEL in the future. The codes and datasets are publicly available at https: //github.com/ict-bigdatalab/VNEL.</abstract>
       <url hash="859f84a6">2022.findings-emnlp.178</url>
@@ -10660,8 +10660,8 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Chia-Hsuan</first><last>Lee</last><affiliation>University of Washington</affiliation></author>
       <author><first>Tianbao</first><last>Xie</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <author><first>Tao</first><last>Yu</last><affiliation>University of Washington</affiliation></author>
-      <author><first>Noah A.</first><last>Smith</last><affiliation>University of Washington</affiliation></author>
-      <author><first>Mari</first><last>Ostendorf</last><affiliation>University of Washington</affiliation></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last><affiliation>University of Washington</affiliation></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last><affiliation>University of Washington</affiliation></author>
       <pages>2627-2643</pages>
       <abstract>Collecting and annotating task-oriented dialogues is time-consuming and costly. Thus, zero and few shot learning for dialogue tasks presents an exciting opportunity. In this work, we propose an in-context (IC) learning framework for zero-shot and few-shot learning dialogue state tracking (DST), where a large pretrained language model (LM) takes a test instance and a few exemplars as input, and directly decodes the dialogue state without any parameter updates. This approach is more flexible and scalable than prior DST work when adapting to new domains and scenarios. To better leverage a tabular domain description in the LM prompt, we reformulate DST into a text-to-SQL problem. We also propose a novel approach to retrieve annotated dialogues as exemplars. Empirical results on MultiWOZ show that our method IC-DST substantially outperforms previous fine-tuned state-of-the-art models in few-shot settings. In addition, we test IC-DST in zero-shot settings, in which the model only takes a fixed task instruction as input, finding that it outperforms previous zero-shot methods by a large margin.</abstract>
       <url hash="2859245d">2022.findings-emnlp.193</url>
@@ -10674,7 +10674,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Akshita</first><last>Bhagia</last><affiliation>Allen Institute for AI</affiliation></author>
       <author><first>Yonatan</first><last>Bisk</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Florian</first><last>Metze</last><affiliation>Carnegie Mellon University</affiliation></author>
-      <author><first>Alan W</first><last>Black</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Ana</first><last>Marasovic</last><affiliation>University of Utah</affiliation></author>
       <pages>2644-2657</pages>
       <abstract>Combining the visual modality with pretrained language models has been surprisingly effective for simple descriptive tasks such as image captioning. More general text generation however remains elusive. We take a step back and ask: How do these models work for more complex generative tasks, i.e. conditioning on both text and images? Are multimodal models simply visually adapted language models, or do they combine they reason jointly over modalities?We investigate these questions in the context of self-rationalization (jointly generating task labels/answers and free-text explanations) of three tasks: (i) visual question answering in VQA-X, (ii) visual commonsense reasoning in VCR, and (iii) visual-textual entailment in E-SNLI-VE. We show that recent unimodal advances, CLIP image representations and scaling of language models, do not consistently improveself-rationalization in multimodal tasks. We find that no single model type works universally best across tasks, datasets, and finetuning data sizes. Our findings motivate the need for novel general backbones that move text generation from images and text beyond image captioning.</abstract>
@@ -10705,7 +10705,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Mike</first><last>Zhang</last><affiliation>IT University of Copenhagen</affiliation></author>
       <author><first>Rob</first><last>van der Goot</last><affiliation>IT University of Copenhagen</affiliation></author>
       <author><first>Christian</first><last>Hardmeier</last><affiliation>IT University of Copenhagen/Uppsala University</affiliation></author>
-      <author><first>Barbara</first><last>Plank</last><affiliation>LMU Munich</affiliation></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last><affiliation>LMU Munich</affiliation></author>
       <pages>2673-2692</pages>
       <abstract>The field of Deep Learning (DL) has undergone explosive growth during the last decade, with a substantial impact on Natural Language Processing (NLP) as well. Yet, compared to more established disciplines, a lack of common experimental standards remains an open challenge to the field at large. Starting from fundamental scientific principles, we distill ongoing discussions on experimental standards in NLP into a single, widely-applicable methodology. Following these best practices is crucial to strengthen experimental evidence, improve reproducibility and enable scientific progress. These standards are further collected in a public repository to help them transparently adapt to future needs.</abstract>
       <url hash="bb3dd3a6">2022.findings-emnlp.196</url>
@@ -10788,7 +10788,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <title>Probing for Incremental Parse States in Autoregressive Language Models</title>
       <author><first>Tiwalayo</first><last>Eisape</last><affiliation>MIT</affiliation></author>
       <author><first>Vineet</first><last>Gangireddy</last><affiliation>Harvard University</affiliation></author>
-      <author><first>Roger</first><last>Levy</last><affiliation>MIT</affiliation></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last><affiliation>MIT</affiliation></author>
       <author><first>Yoon</first><last>Kim</last><affiliation>MIT</affiliation></author>
       <pages>2801-2813</pages>
       <abstract>Next-word predictions from autoregressive neural language models show remarkable sensitivity to syntax. This work evaluates the extent to which this behavior arises as a result of a learned ability to maintain implicit representations of incremental syntactic structures. We extend work in syntactic probing to the incremental setting and present several probes for extracting incomplete syntactic structure (operationalized through parse states from a stack-based parser) from autoregressive language models. We find that our probes can be used to predict model preferences on ambiguous sentence prefixes and causally intervene on model representations and steer model behavior. This suggests implicit incremental syntactic inferences underlie next-word predictions in autoregressive neural language models.</abstract>
@@ -10836,8 +10836,8 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <title><fixed-case>MANT</fixed-case>a: Efficient Gradient-Based Tokenization for End-to-End Robust Language Modeling</title>
       <author><first>Nathan</first><last>Godey</last><affiliation>Inria / ALMAnaCH</affiliation></author>
       <author><first>Roman</first><last>Castagné</last><affiliation>Inria</affiliation></author>
-      <author><first>Éric</first><last>de la Clergerie</last><affiliation>INRIA</affiliation></author>
-      <author><first>Benoît</first><last>Sagot</last><affiliation>Inria</affiliation></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Éric</first><last>de la Clergerie</last><affiliation>INRIA</affiliation></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last><affiliation>Inria</affiliation></author>
       <pages>2859-2870</pages>
       <abstract>Static subword tokenization algorithms have been an essential component of recent works on language modeling. However, their static nature results in important flaws that degrade the models’ downstream performance and robustness. In this work, we propose MANTa, a Module for Adaptive Neural TokenizAtion. MANTa is a differentiable tokenizer trained end-to-end with the language model. The resulting system offers a trade-off between the expressiveness of byte-level models and the speed of models trained using subword tokenization. In addition, our tokenizer is highly explainable since it produces an explicit segmentation of sequences into blocks. We evaluate our pre-trained model on several English datasets from different domains as well as on synthetic noise. We find that MANTa improves robustness to character perturbations and out-of-domain data. We then show that MANTa performs comparably to other models on the general-domain GLUE benchmark. Finally, we show that it is considerably faster than strictly byte-level models.</abstract>
       <url hash="de89054b">2022.findings-emnlp.207</url>
@@ -10897,7 +10897,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>John</first><last>Judge</last><affiliation>University of California, Santa Barbara</affiliation></author>
       <author><first>Desmond</first><last>Patton</last><affiliation>Columbia University</affiliation></author>
       <author><first>Bruce</first><last>Bimber</last><affiliation>University of California, Santa Barbara</affiliation></author>
-      <author><first>Kathleen</first><last>McKeown</last><affiliation>Columbia University and Amazon (Amazon Scholar)</affiliation></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last><affiliation>Columbia University and Amazon (Amazon Scholar)</affiliation></author>
       <author><first>William Yang</first><last>Wang</last><affiliation>Unversity of California, Santa Barbara</affiliation></author>
       <pages>2914-2926</pages>
       <abstract>An increasingly prevalent problem for intelligent technologies is text safety, as uncontrolled systems may generate recommendations to their users that lead to injury or life-threatening consequences. However, the degree of explicitness of a generated statement that can cause physical harm varies. In this paper, we distinguish types of text that can lead to physical harm and establish one particularly underexplored category: covertly unsafe text. Then, we further break down this category with respect to the system’s information and discuss solutions to mitigate the generation of text in each of these subcategories. Ultimately, our work defines the problem of covertly unsafe language that causes physical harm and argues that this subtle yet dangerous issue needs to be prioritized by stakeholders and regulators. We highlight mitigation strategies to inspire future researchers to tackle this challenging problem and help improve safety within smart systems.</abstract>
@@ -10913,7 +10913,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Zezhong</first><last>Wang</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <author><first>Binyang</first><last>Li</last><affiliation>University of International Relations</affiliation></author>
       <author><first>Fangchun</first><last>Yang</last><affiliation>Beijing University of Posts and Telecommunications</affiliation></author>
-      <author><first>Kam-Fai</first><last>Wong</last><affiliation>Department of Systems Engineering and Engineering Management, The Chinese University of Hong Kong, Hong Kong</affiliation></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last><affiliation>Department of Systems Engineering and Engineering Management, The Chinese University of Hong Kong, Hong Kong</affiliation></author>
       <pages>2927-2932</pages>
       <abstract>Humor plays an important role in our daily life, as it is an essential and fascinating element in the communication between persons. Therefore, how to recognize punchlines from the dialogue, i.e. conversational humor recognition, has attracted much interest of computational linguistics communities. However, most existing work attempted to understand the conversational humor by analyzing the contextual information of the dialogue, but neglected the character of the interlocutor, such as age, gender, occupation, and so on. For instance, the same utterance could bring out humorous from a serious person, but may be a plain expression from a naive person. To this end, this paper proposes a Character Fusion Conversational Humor Recognition model (CFCHR) to explore character information to recognize conversational humor. CFCHR utilizes a multi-task learning framework that unifies two highly pertinent tasks, i.e., character extraction and punchline identification. Based on deep neural networks, we trained both tasks jointly by sharing weight to extract the common and task-invariant features while each task could still learn its task-specific features. Experiments were conducted on Chinese sitcoms corpus, which consisted of 12,677 utterances from 22 characters. The experimental results demonstrated that CFCHR could achieve 33.08% improvements in terms of F1-score over some strong baselines, and proved the effectiveness of the character information to identify the punchlines.</abstract>
       <url hash="da3a825f">2022.findings-emnlp.212</url>
@@ -10964,7 +10964,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Tanay</first><last>Dixit</last><affiliation>Indian Institute of Technology Madras</affiliation></author>
       <author><first>Bhargavi</first><last>Paranjape</last><affiliation>University of Washington</affiliation></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last><affiliation>University of Washington</affiliation></author>
-      <author><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington; Meta</affiliation></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington; Meta</affiliation></author>
       <pages>2964-2984</pages>
       <abstract>Counterfactual data augmentation (CDA) – i.e., adding minimally perturbed inputs during training – helps reduce model reliance on spurious correlations and improves generalization to out-of-distribution (OOD) data. Prior work on generating counterfactuals only considered restricted classes of perturbations, limiting their effectiveness. We present Counterfactual Generation via Retrieval and Editing (CORE), a retrieval-augmented generation framework for creating diverse counterfactual perturbations for CDA. For each training example, CORE first performs a dense retrieval over a task-related unlabeled text corpus using a learned bi-encoder and extracts relevant counterfactual excerpts. CORE then incorporates these into prompts to a large language model with few-shot learning capabilities, for counterfactual editing. Conditioning language model edits on naturally occurring data results in more diverse perturbations. Experiments on natural language inference and sentiment analysis benchmarks show that CORE counterfactuals are more effective at improving generalization to OOD data compared to other DA approaches. We also show that the CORE retrieval framework can be used to encourage diversity in manually authored perturbations.</abstract>
       <url hash="612f6817">2022.findings-emnlp.216</url>
@@ -11083,7 +11083,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <title>Learning When and What to Quote: A Quotation Recommender System with Mutual Promotion of Recommendation and Generation</title>
       <author><first>Lingzhi</first><last>Wang</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <author><first>Xingshan</first><last>Zeng</last><affiliation>Huawei Noah’s Ark Lab</affiliation></author>
-      <author><first>Kam-Fai</first><last>Wong</last><affiliation>Department of Systems Engineering and Engineering Management, The Chinese University of Hong Kong, Hong Kong</affiliation></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last><affiliation>Department of Systems Engineering and Engineering Management, The Chinese University of Hong Kong, Hong Kong</affiliation></author>
       <pages>3094-3105</pages>
       <abstract>This work extends the current quotation recommendation task to a more realistic quotation recommender system that learns to predict when to quote and what to quote jointly. The system consists of three modules (tasks), a prediction module to predict whether to quote given conversation contexts, a recommendation module to recommend suitable quotations and a generation module generating quotations or sentences in ordinary language to continue the conversation. We benchmark several competitive models for the two newly introduced tasks (i.e., when-to-quote and what-to-continue). For quotation recommendation, compared with previous work that is either generation-based or ranking-based recommendation, we propose a novel framework with mutual promotion of generation module and ranking-based recommendation module. Experiments show that our framework achieves significantly better performance than baselines on two datasets. Further experiments and analyses validate the effectiveness of the proposed mechanisms and get a better understanding of the quotation recommendation task.</abstract>
       <url hash="f2a1fb8b">2022.findings-emnlp.225</url>
@@ -11106,7 +11106,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <title>Gender Bias in Meta-Embeddings</title>
       <author><first>Masahiro</first><last>Kaneko</last><affiliation>Tokyo Institute of Technology</affiliation></author>
       <author><first>Danushka</first><last>Bollegala</last><affiliation>University of Liverpool/Amazon</affiliation></author>
-      <author><first>Naoaki</first><last>Okazaki</last><affiliation>Tokyo Institute of Technology</affiliation></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last><affiliation>Tokyo Institute of Technology</affiliation></author>
       <pages>3118-3133</pages>
       <abstract>Different methods have been proposed to develop meta-embeddings from a given set of source embeddings. However, the source embeddings can contain unfair gender-related biases, and how these influence the meta-embeddings has not been studied yet.We study the gender bias in meta-embeddings created under three different settings:(1) meta-embedding multiple sources without performing any debiasing (Multi-Source No-Debiasing),(2) meta-embedding multiple sources debiased by a single method (Multi-Source Single-Debiasing), and(3) meta-embedding a single source debiased by different methods (Single-Source Multi-Debiasing).Our experimental results show that meta-embedding amplifies the gender biases compared to input source embeddings.We find that debiasing not only the sources but also their meta-embedding is needed to mitigate those biases.Moreover, we propose a novel debiasing method based on meta-embedding learning where we use multiple debiasing methods on a single source embedding and then create a single unbiased meta-embedding.</abstract>
       <url hash="6189dc2a">2022.findings-emnlp.227</url>
@@ -11212,7 +11212,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
     <paper id="235">
       <title>Low-resource Interactive Active Labeling for Fine-tuning Language Models</title>
       <author><first>Seiji</first><last>Maekawa</last><affiliation>Osaka University</affiliation></author>
-      <author id="dan-zhang"><first>Dan</first><last>Zhang</last><affiliation>Megagon Labs</affiliation></author>
+      <author><first>Dan</first><last>Zhang</last><affiliation>Megagon Labs</affiliation></author>
       <author><first>Hannah</first><last>Kim</last><affiliation>Megagon Labs</affiliation></author>
       <author><first>Sajjadur</first><last>Rahman</last><affiliation>Megagon Labs</affiliation></author>
       <author><first>Estevam</first><last>Hruschka</last><affiliation>Megagon Labs - https://megagon.ai/</affiliation></author>
@@ -11266,7 +11266,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <title>Mix-and-Match: Scalable Dialog Response Retrieval using <fixed-case>G</fixed-case>aussian Mixture Embeddings</title>
       <author><first>Gaurav</first><last>Pandey</last><affiliation>IBM Research</affiliation></author>
       <author><first>Danish</first><last>Contractor</last><affiliation>IBM Research &amp; Indian Institute of Technology, New Delhi</affiliation></author>
-      <author><first>Sachindra</first><last>Joshi</last><affiliation>IBM</affiliation></author>
+      <author id="sachindra-joshi"><first>Sachindra</first><last>Joshi</last><affiliation>IBM</affiliation></author>
       <pages>3273-3287</pages>
       <abstract>Embedding-based approaches for dialog response retrieval embed the context-response pairs as points in the embedding space. These approaches are scalable, but fail to account for the complex, many-to-many relationships that exist between context-response pairs. On the other end of the spectrum, there are approaches that feed the context-response pairs jointly through multiple layers of neural networks. These approaches can model the complex relationships between context-response pairs, but fail to scale when the set of responses is moderately large (&gt;1000). In this paper, we propose a scalable model that can learn complex relationships between context-response pairs. Specifically, the model maps the contexts as well as responses to probability distributions over the embedding space. We train the models by optimizing the Kullback-Leibler divergence between the distributions induced by context-response pairs in the training data. We show that the resultant model achieves better performance as compared to other embedding-based approaches on publicly available conversation data.</abstract>
       <url hash="787e9970">2022.findings-emnlp.239</url>
@@ -11409,7 +11409,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
     <paper id="249">
       <title>Truncation Sampling as Language Model Desmoothing</title>
       <author><first>John</first><last>Hewitt</last><affiliation>Stanford University</affiliation></author>
-      <author><first>Christopher</first><last>Manning</last><affiliation>Stanford University</affiliation></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last><affiliation>Stanford University</affiliation></author>
       <author><first>Percy</first><last>Liang</last><affiliation>Stanford University</affiliation></author>
       <pages>3414-3427</pages>
       <abstract>Long samples of text from neural language models can be of poor quality. Truncation sampling algorithms–like top-p or top-k—address this by setting some words’ probabilities to zero at each step. This work investigates why these methods are important, and how to improve them. We propose thinking of a neural language model as a mixture of a true distribution and a smoothing distribution that avoids infinite perplexity. In this light, truncation algorithms aim to perform desmoothing, estimating a subset of the support of the true distribution. Finding a good subset is crucial: we show that top-p unnecessarily truncates high-probability words, for example causing it to truncate all words but Trump for a document that starts with Donald. We introduce eta-sampling, which truncates words below an entropy-dependent probability threshold. Compared to previous algorithms, our eta-sampling generates more plausible long documents according to humans, is better at breaking out of repetition, and behaves more reasonably on a battery of test distributions.</abstract>
@@ -11451,7 +11451,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
     <paper id="252">
       <title>Simple but Challenging: Natural Language Inference Models Fail on Simple Sentences</title>
       <author><first>Cheng</first><last>Luo</last><affiliation>Zhejiang Lab</affiliation></author>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last><affiliation>Zhejiang university</affiliation></author>
+      <author><first>Wei</first><last>Liu</last><affiliation>Zhejiang university</affiliation></author>
       <author><first>Jieyu</first><last>Lin</last><affiliation>Zhejiang University</affiliation></author>
       <author><first>Jiajie</first><last>Zou</last><affiliation>Zhejiang University</affiliation></author>
       <author><first>Ming</first><last>Xiang</last><affiliation>the University of Chicago</affiliation></author>
@@ -11514,7 +11514,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Rong</first><last>Zhang</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Hui</first><last>Xue</last><affiliation>alibaba</affiliation></author>
       <author><first>Donghong</first><last>Sun</last><affiliation>China</affiliation></author>
-      <author><first>Chao</first><last>Zhang</last><affiliation>Tsinghua University</affiliation></author>
+      <author id="chao-zhang-tu"><first>Chao</first><last>Zhang</last><affiliation>Tsinghua University</affiliation></author>
       <pages>3502-3516</pages>
       <abstract>Despite of the superb performance on a wide range of tasks, pre-trained language models (e.g., BERT) have been proved vulnerable to adversarial texts. In this paper, we present RoChBERT, a framework to build more Robust BERT-based models by utilizing a more comprehensive adversarial graph to fuse Chinese phonetic and glyph features into pre-trained representations during fine-tuning. Inspired by curriculum learning, we further propose to augment the training dataset with adversarial texts in combination with intermediate samples. Extensive experiments demonstrate that RoChBERT outperforms previous methods in significant ways: (i) robust – RoChBERT greatly improves the model robustness without sacrificing accuracy on benign texts. Specifically, the defense lowers the success rates of unlimited and limited attacks by 59.43% and 39.33% respectively, while remaining accuracy of 93.30%; (ii) flexible – RoChBERT can easily extend to various language models to solve different downstream tasks with excellent performance; and (iii) efficient – RoChBERT can be directly applied to the fine-tuning stage without pre-training language model from scratch, and the proposed data augmentation method is also low-cost.</abstract>
       <url hash="4c797f40">2022.findings-emnlp.256</url>
@@ -11595,7 +11595,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Minlie</first><last>Huang</last><affiliation>Tsinghua University</affiliation></author>
       <author><first>Xin</first><last>Jiang</last><affiliation>Huawei Noah’s Ark Lab</affiliation></author>
       <author><first>Qun</first><last>Liu</last><affiliation>Huawei Noah’s Ark Lab</affiliation></author>
-      <author><first>Helen</first><last>Meng</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
+      <author id="helen-meng"><first>Helen</first><last>Meng</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <pages>3576-3591</pages>
       <abstract>Among all the safety concerns that hinder the deployment of open-domain dialog systems (e.g., offensive languages, biases, and toxic behaviors), social bias presents an insidious challenge. Addressing this challenge requires rigorous analyses and normative reasoning. In this paper, we focus our investigation on social bias measurement to facilitate the development of unbiased dialog systems. We first propose a novel Dial-Bias Framework for analyzing the social bias in conversations using a holistic method beyond bias lexicons or dichotomous annotations. Leveraging the proposed framework, we further introduce the CDial-Bias Dataset which is, to the best of our knowledge, the first annotated Chinese social bias dialog dataset. We also establish a fine-grained dialog bias measurement benchmark and conduct in-depth ablation studies to shed light on the utility of the detailed annotations in the proposed dataset. Finally, we evaluate representative Chinese generative models with our classifiers to unveil the presence of social bias in these systems.</abstract>
       <url hash="eb0768ee">2022.findings-emnlp.262</url>
@@ -11606,7 +11606,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
     <paper id="263">
       <title><fixed-case>C</fixed-case>ross<fixed-case>RE</fixed-case>: A Cross-Domain Dataset for Relation Extraction</title>
       <author><first>Elisa</first><last>Bassignana</last><affiliation>IT University of Copenhagen</affiliation></author>
-      <author><first>Barbara</first><last>Plank</last><affiliation>LMU Munich</affiliation></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last><affiliation>LMU Munich</affiliation></author>
       <pages>3592-3604</pages>
       <abstract>Relation Extraction (RE) has attracted increasing attention, but current RE evaluation is limited to in-domain evaluation setups. Little is known on how well a RE system fares in challenging, but realistic out-of-distribution evaluation setups. To address this gap, we propose CrossRE, a new, freely-available cross-domain benchmark for RE, which comprises six distinct text domains and includes multi-label annotations. An additional innovation is that we release meta-data collected during annotation, to include explanations and flags of difficult instances. We provide an empirical evaluation with a state-of-the-art model for relation classification. As the meta-data enables us to shed new light on the state-of-the-art model, we provide a comprehensive analysis on the impact of difficult cases and find correlations between model and human annotations. Overall, our empirical investigation highlights the difficulty of cross-domain RE. We release our dataset, to spur more research in this direction.</abstract>
       <url hash="c5cf4754">2022.findings-emnlp.263</url>
@@ -11672,8 +11672,8 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Aitor</first><last>Ormazabal</last><affiliation>University of the Basque Country</affiliation></author>
       <author><first>Mikel</first><last>Artetxe</last><affiliation>Meta AI</affiliation></author>
       <author><first>Manex</first><last>Agirrezabal</last><affiliation>University of Copenhagen</affiliation></author>
-      <author><first>Aitor</first><last>Soroa</last><affiliation>HiTZ Center - Ixa, University of the Basque Country UPV/EHU</affiliation></author>
-      <author><first>Eneko</first><last>Agirre</last><affiliation>University of the Basque Country (UPV/EHU)</affiliation></author>
+      <author id="aitor-soroa"><first>Aitor</first><last>Soroa</last><affiliation>HiTZ Center - Ixa, University of the Basque Country UPV/EHU</affiliation></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last><affiliation>University of the Basque Country (UPV/EHU)</affiliation></author>
       <pages>3655-3670</pages>
       <abstract>Formal verse poetry imposes strict constraints on the meter and rhyme scheme of poems. Most prior work on generating this type of poetry uses existing poems for supervision, which are difficult to obtain for most languages and poetic forms. In this work, we propose an unsupervised approach to generate poems that follow any given meter and rhyme scheme, without requiring any poetic text for training. Our method works by splitting a regular, non-poetic corpus into phrases, prepending control codes that describe the length and end rhyme of each phrase, and training a transformer language model in the augmented corpus. The transformer learns to link the structure descriptor with the control codes to the number of lines, their length and their end rhyme. During inference, we build control codes for the desired meter and rhyme scheme, and condition our language model on them to generate formal verse poetry. Experiments in Spanish and Basque show that our approach is able to generate valid poems, which are often comparable in quality to those written by humans.</abstract>
       <url hash="adab43ba">2022.findings-emnlp.268</url>
@@ -11880,7 +11880,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
     <paper id="282">
       <title>Prompt-based Connective Prediction Method for Fine-grained Implicit Discourse Relation Recognition</title>
       <author><first>Hao</first><last>Zhou</last><affiliation>East China Normal University</affiliation></author>
-      <author><first>Man</first><last>Lan</last><affiliation>East China Normal University</affiliation></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last><affiliation>East China Normal University</affiliation></author>
       <author><first>Yuanbin</first><last>Wu</last><affiliation>East China Normal University</affiliation></author>
       <author><first>Yuefeng</first><last>Chen</last><affiliation>Transsion</affiliation></author>
       <author><first>Meirong</first><last>Ma</last><affiliation>Transsion</affiliation></author>
@@ -11915,14 +11915,14 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Cezar</first><last>Pendus</last><affiliation>IBM</affiliation></author>
       <author><first>Saswati</first><last>Dana</last><affiliation>IBM Research</affiliation></author>
       <author><first>Dinesh</first><last>Garg</last><affiliation>IBM Research AI</affiliation></author>
-      <author><first>Achille</first><last>Fokoue</last><affiliation>IBM Research</affiliation></author>
+      <author id="achille-fokoue-nkoutche"><first>Achille</first><last>Fokoue</last><affiliation>IBM Research</affiliation></author>
       <author><first>G P Shrivatsa</first><last>Bhargav</last><affiliation>IBM</affiliation></author>
       <author><first>Dinesh</first><last>Khandelwal</last><affiliation>IBM Research</affiliation></author>
       <author><first>Srinivas</first><last>Ravishankar</last><affiliation>IBM Research</affiliation></author>
       <author><first>Sairam</first><last>Gurajada</last><affiliation>IBM Research - Almaden</affiliation></author>
       <author><first>Maria</first><last>Chang</last><affiliation>IBM Research AI</affiliation></author>
       <author><first>Rosario</first><last>Uceda-Sosa</last><affiliation>IBM Research</affiliation></author>
-      <author><first>Salim</first><last>Roukos</last><affiliation>IBM Research AI</affiliation></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last><affiliation>IBM Research AI</affiliation></author>
       <author><first>Alexander</first><last>Gray</last><affiliation>IBM Research</affiliation></author>
       <author><first>Guilherme</first><last>Lima</last><affiliation>IBM Research Brazil</affiliation></author>
       <author><first>Ryan</first><last>Riegel</last><affiliation>IBM</affiliation></author>
@@ -11953,10 +11953,10 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
     </paper>
     <paper id="286">
       <title><fixed-case>M</fixed-case>3: Multi-level dataset for Multi-document summarisation of Medical studies</title>
-      <author><first>Yulia</first><last>Otmakhova</last><affiliation>University of Melbourne</affiliation></author>
-      <author><first>Karin</first><last>Verspoor</last><affiliation>RMIT University</affiliation></author>
-      <author><first>Timothy</first><last>Baldwin</last><affiliation>The University of Melbourne</affiliation></author>
-      <author><first>Antonio</first><last>Jimeno Yepes</last><affiliation>RMIT University</affiliation></author>
+      <author id="julia-otmakhova"><first>Yulia</first><last>Otmakhova</last><affiliation>University of Melbourne</affiliation></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last><affiliation>RMIT University</affiliation></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last><affiliation>The University of Melbourne</affiliation></author>
+      <author id="antonio-jimeno-yepes"><first>Antonio</first><last>Jimeno Yepes</last><affiliation>RMIT University</affiliation></author>
       <author><first>Jey Han</first><last>Lau</last><affiliation>The University of Melbourne</affiliation></author>
       <pages>3887-3901</pages>
       <abstract>We present M3 (Multi-level dataset for Multi-document summarisation of Medical studies), a benchmark dataset for evaluating the quality of summarisation systems in the biomedical domain. The dataset contains sets of multiple input documents and target summaries of three levels of complexity: documents, sentences, and propositions. The dataset also includes several levels of annotation, including biomedical entities, direction, and strength of relations between them, and the discourse relationships between the input documents (“contradiction” or “agreement”). We showcase usage scenarios of the dataset by testing 10 generic and domain-specific summarisation models in a zero-shot setting, and introduce a probing task based on counterfactuals to test if models are aware of the direction and strength of the conclusions generated from input studies.</abstract>
@@ -11993,7 +11993,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
     <paper id="289">
       <title>Probing Relational Knowledge in Language Models via Word Analogies</title>
       <author><first>Kiamehr</first><last>Rezaee</last><affiliation>Cardiff University</affiliation></author>
-      <author><first>Jose</first><last>Camacho-Collados</last><affiliation>Cardiff University</affiliation></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last><affiliation>Cardiff University</affiliation></author>
       <pages>3930-3936</pages>
       <abstract>Understanding relational knowledge plays an integral part in natural language comprehension. When it comes to pre-trained language models (PLM), prior work has been focusing on probing relational knowledge this by filling the blanks in pre-defined prompts such as “The capital of France is —". However, these probes may be affected by the co-occurrence of target relation words and entities (e.g. “capital”, “France” and “Paris”) in the pre-training corpus. In this work, we extend these probing methodologies leveraging analogical proportions as a proxy to probe relational knowledge in transformer-based PLMs without directly presenting the desired relation. In particular, we analysed the ability of PLMs to understand (1) the directionality of a given relation (e.g. Paris-France is not the same as France-Paris); (2) the ability to distinguish types on a given relation (both France and Japan are countries); and (3) the relation itself (Paris is the capital of France, but not Rome). Our results show how PLMs are extremely accurate at (1) and (2), but have clear room for improvement for (3). To better understand the reasons behind this behaviour and mistakes made by PLMs, we provide an extended quantitative analysis based on relevant factors such as frequency.</abstract>
       <url hash="1170b7b5">2022.findings-emnlp.289</url>
@@ -12051,7 +12051,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Mikel</first><last>Artetxe</last><affiliation>Meta AI</affiliation></author>
       <author><first>Jingfei</first><last>Du</last><affiliation>Facebook</affiliation></author>
       <author><first>Naman</first><last>Goyal</last><affiliation>Facebook</affiliation></author>
-      <author><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington; Meta</affiliation></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington; Meta</affiliation></author>
       <author><first>Veselin</first><last>Stoyanov</last><affiliation>Facebook</affiliation></author>
       <pages>3973-3985</pages>
       <abstract>Prior work on language model pre-training has explored different architectures and learning objectives, but differences in data, hyperparameters and evaluation make a principled comparison difficult. In this work, we focus on bidirectionality as a key factor that differentiates existing approaches, and present a comprehensive study of its role in next token prediction, text infilling, zero-shot priming and fine-tuning. We propose a new framework that generalizes prior approaches, including fully unidirectional models like GPT, fully bidirectional models like BERT, and hybrid models like CM3 and prefix LM. Our framework distinguishes between two notions of bidirectionality (bidirectional context and bidirectional attention) and allows us to control each of them separately. We find that the optimal configuration is largely application-dependent (e.g., bidirectional attention is beneficial for fine-tuning and infilling, but harmful for next token prediction and zero-shot priming). We train models with up to 6.7B parameters, and find differences to remain consistent at scale. While prior work on scaling has focused on left-to-right autoregressive models, our results suggest that this approach comes with some trade-offs, and it might be worthwhile to develop very large bidirectional models.</abstract>
@@ -12091,8 +12091,8 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Han-Chin</first><last>Shing</last><affiliation>Amazon</affiliation></author>
       <author><first>Qing</first><last>Sun</last><affiliation>Amazon</affiliation></author>
       <author><first>Christopher</first><last>Winestock</last><affiliation>Amazon</affiliation></author>
-      <author><first>Kathleen</first><last>McKeown</last><affiliation>Columbia University and Amazon (Amazon Scholar)</affiliation></author>
-      <author><first>Noémie</first><last>Elhadad</last><affiliation>Columbia University</affiliation></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last><affiliation>Columbia University and Amazon (Amazon Scholar)</affiliation></author>
+      <author id="noemie-elhadad"><first>Noémie</first><last>Elhadad</last><affiliation>Columbia University</affiliation></author>
       <pages>4009-4027</pages>
       <abstract>In real-world scenarios with naturally occurring datasets, reference summaries are noisy and may contain information that cannot be inferred from the source text. On large news corpora, removing low quality samples has been shown to reduce model hallucinations. Yet, for smaller, and/or noisier corpora, filtering is detrimental to performance. To improve reference quality while retaining all data, we propose a new approach: to selectively re-write unsupported reference sentences to better reflect source data. We automatically generate a synthetic dataset of positive and negative revisions by corrupting supported sentences and learn to revise reference sentences with contrastive learning. The intensity of revisions is treated as a controllable attribute so that, at inference, diverse candidates can be over-generated-then-rescored to balance faithfulness and abstraction. To test our methods, we extract noisy references from publicly available MIMIC-III discharge summaries for the task of hospital-course summarization, and vary the data on which models are trained. According to metrics and human evaluation, models trained on revised clinical references are much more faithful, informative, and fluent than models trained on original or filtered data.</abstract>
       <url hash="7e11f6cb">2022.findings-emnlp.296</url>
@@ -12113,7 +12113,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <title>On the Impact of Temporal Concept Drift on Model Explanations</title>
       <author><first>Zhixue</first><last>Zhao</last><affiliation>University of Sheffield</affiliation></author>
       <author><first>George</first><last>Chrysostomou</last><affiliation>The University of Sheffield</affiliation></author>
-      <author><first>Kalina</first><last>Bontcheva</last><affiliation>University of Sheffield</affiliation></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last><affiliation>University of Sheffield</affiliation></author>
       <author><first>Nikolaos</first><last>Aletras</last><affiliation>University of Sheffield</affiliation></author>
       <pages>4039-4054</pages>
       <abstract>Explanation faithfulness of model predictions in natural language processing is typically evaluated on held-out data from the same temporal distribution as the training data (i.e. synchronous settings). While model performance often deteriorates due to temporal variation (i.e. temporal concept drift), it is currently unknown how explanation faithfulness is impacted when the time span of the target data is different from the data used to train the model (i.e. asynchronous settings). For this purpose, we examine the impact of temporal variation on model explanations extracted by eight feature attribution methods and three select-then-predict models across six text classification tasks. Our experiments show that (i) faithfulness is not consistent under temporal variations across feature attribution methods (e.g. it decreases or increases depending on the method), with an attention-based method demonstrating the most robust faithfulness scores across datasets; and (ii) select-then-predict models are mostly robust in asynchronous settings with only small degradation in predictive performance. Finally, feature attribution methods show conflicting behavior when used in FRESH (i.e. a select-and-predict model) and for measuring sufficiency/comprehensiveness (i.e. as post-hoc methods), suggesting that we need more robust metrics to evaluate post-hoc explanation faithfulness. Code will be made publicly available.</abstract>
@@ -12153,7 +12153,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Chadi</first><last>Helwe</last><affiliation>Telecom Paris, Institut Polytechnique de Paris</affiliation></author>
       <author><first>Simon</first><last>Coumes</last><affiliation>École Normale Supérieure de Rennes</affiliation></author>
       <author><first>Chloé</first><last>Clavel</last><affiliation>LTCI, Telecom-Paris, Institut Polytechnique de Paris</affiliation></author>
-      <author><first>Fabian</first><last>Suchanek</last><affiliation>Telecom Paris, Institut Polytechnique de Paris</affiliation></author>
+      <author id="fabian-suchanek"><first>Fabian</first><last>Suchanek</last><affiliation>Telecom Paris, Institut Polytechnique de Paris</affiliation></author>
       <pages>4086-4099</pages>
       <abstract>Transformer-based language models achieve state-of-the-art results on several natural language processing tasks. One of these is textual entailment, i.e., the task of determining whether a premise logically entails a hypothesis. However, the models perform poorly on this task when the examples contain negations. In this paper, we propose a new definition of textual entailment that captures also negation. This allows us to develop TINA (Textual Inference with Negation Augmentation), a principled technique for negated data augmentation that can be combined with the unlikelihood loss function.Our experiments with different transformer-based models show that our method can significantly improve the performance of the models on textual entailment datasets with negation – without sacrificing performance on datasets without negation.</abstract>
       <url hash="753a5ec4">2022.findings-emnlp.301</url>
@@ -12336,7 +12336,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <title>m<tex-math>^4</tex-math> Adapter: Multilingual Multi-Domain Adaptation for Machine Translation with a Meta-Adapter</title>
       <author><first>Wen</first><last>Lai</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <author><first>Alexandra</first><last>Chronopoulou</last><affiliation>LMU Munich</affiliation></author>
-      <author><first>Alexander</first><last>Fraser</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <pages>4282-4296</pages>
       <abstract>Multilingual neural machine translation models (MNMT) yield state-of-the-art performance when evaluated on data from a domain and language pair seen at training time. However, when a MNMT model is used to translate under domain shift or to a new language pair, performance drops dramatically. We consider a very challenging scenario: adapting the MNMT model both to a new domain and to a new language pair at the same time. In this paper, we propose m^4Adapter (Multilingual Multi-Domain Adaptation for Machine Translation with a Meta-Adapter), which combines domain and language knowledge using meta-learning with adapters. We present results showing that our approach is a parameter-efficient solution which effectively adapts a model to both a new language pair and a new domain, while outperforming other adapter methods. An ablation study also shows that our approach more effectively transfers domain knowledge across different languages and language information across different domains.</abstract>
       <url hash="5d54c592">2022.findings-emnlp.315</url>
@@ -12363,8 +12363,8 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Vassilina</first><last>Nikoulina</last><affiliation>Naver Labs Europe</affiliation></author>
       <author><first>Alexandre</first><last>Berard</last><affiliation>Naver Labs Europe</affiliation></author>
       <author><first>Caroline</first><last>Brun</last><affiliation>Naver Labs Europe</affiliation></author>
-      <author><first>James</first><last>Henderson</last><affiliation>Idiap Research Institute</affiliation></author>
-      <author><first>Laurent</first><last>Besacier</last><affiliation>Naver Labs Europe</affiliation></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last><affiliation>Idiap Research Institute</affiliation></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last><affiliation>Naver Labs Europe</affiliation></author>
       <pages>4308-4329</pages>
       <abstract>Recently, very large pre-trained models achieve state-of-the-art results in various natural language processing (NLP) tasks, but their size makes it more challenging to apply them in resource-constrained environments. Compression techniques allow to drastically reduce the size of the models and therefore their inference time with negligible impact on top-tier metrics. However, the general performance averaged across multiple tasks and/or languages may hide a drastic performance drop on under-represented features, which could result in the amplification of biases encoded by the models. In this work, we assess the impact of compression methods on Multilingual Neural Machine Translation models (MNMT) for various language groups, gender, and semantic biases by extensive analysis of compressed models on different machine translation benchmarks, i.e. FLORES-101, MT-Gender, and DiBiMT. We show that the performance of under-represented languages drops significantly, while the average BLEU metric only slightly decreases. Interestingly, the removal of noisy memorization with compression leads to a significant improvement for some medium-resource languages. Finally, we demonstrate that compression amplifies intrinsic gender and semantic biases, even in high-resource languages.</abstract>
       <url hash="e96e0e12">2022.findings-emnlp.317</url>
@@ -12473,7 +12473,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Zetian</first><last>Wu</last><affiliation>Oregon State University</affiliation></author>
       <author><first>Roshan</first><last>Santhosh</last><affiliation>University of Pennsylvania</affiliation></author>
       <author><first>Tejas</first><last>Srivastava</last><affiliation>University of Pennsylvania</affiliation></author>
-      <author><first>Lyle</first><last>Ungar</last><affiliation>University of Pennsylvania</affiliation></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last><affiliation>University of Pennsylvania</affiliation></author>
       <author><first>João</first><last>Sedoc</last><affiliation>New York University</affiliation></author>
       <pages>4430-4448</pages>
       <abstract>Lexica – words and associated scores – are widely used as simple, interpretable, generalizable language features to predict sentiment, emotions, mental health, and personality. They also provide insight into the psychological features behind those moods and traits. Such lexica, historically created by human experts, are valuable to linguists, psychologists, and social scientists, but they take years of refinement and have limited coverage. In this paper, we investigate how the lexica that provide psycholinguistic insights could be computationally induced and how they should be assessed. We identify generalizability and interpretability as two essential properties of such lexica. We induce lexica using both context-oblivious and context-aware approaches, compare their predictive performance both within the training corpus and across various corpora, and evaluate their quality using crowd-worker assessment. We find that lexica induced from context-oblivious models are more generalizable and interpretable than those from more accurate context-aware transformer models. In addition, lexicon scores can identify explanatory words more reliably than a high performing transformer with feature-importance measures like SHAP.</abstract>
@@ -12513,7 +12513,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <title>Leveraging Data Recasting to Enhance Tabular Reasoning</title>
       <author><first>Aashna</first><last>Jena</last><affiliation>IIIT Hyderabad</affiliation></author>
       <author><first>Vivek</first><last>Gupta</last><affiliation>School of Computing, University of Utah</affiliation></author>
-      <author><first>Manish</first><last>Shrivastava</last><affiliation>International Institute of Information Technology Hyderabad</affiliation></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last><affiliation>International Institute of Information Technology Hyderabad</affiliation></author>
       <author><first>Julian</first><last>Eisenschlos</last><affiliation>Google</affiliation></author>
       <pages>4483-4496</pages>
       <abstract>Creating challenging tabular inference data is essential for learning complex reasoning. Prior work has mostly relied on two data generation strategies. The first is human annotation, which yields linguistically diverse data but is difficult to scale. The second category for creation is synthetic generation, which is scalable and cost effective but lacks inventiveness. In this research, we present a framework for semi-automatically recasting existing tabular data to make use of the benefits of both approaches. We utilize our framework to build tabular NLI instances from five datasets that were initially intended for tasks like table2text creation, tabular Q/A, and semantic parsing. We demonstrate that recasted data could be used as evaluation benchmarks as well as augmentation data to enhance performance on tabular NLI tasks. Furthermore, we investigate the effectiveness of models trained on recasted data in the zero-shot scenario, and analyse trends in performance across different recasted datasets types.</abstract>
@@ -12592,7 +12592,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Jinshan</first><last>Zeng</last><affiliation>Jiangxi Normal University</affiliation></author>
       <author><first>Yudong</first><last>Xie</last><affiliation>Jiangxi Normal University</affiliation></author>
       <author><first>Xianglong</first><last>Yu</last><affiliation>Jiangxi Normal University</affiliation></author>
-      <author><first>John</first><last>Lee</last><affiliation>Department of Linguistics and Translation, City University of Hong Kong</affiliation></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last><affiliation>Department of Linguistics and Translation, City University of Hong Kong</affiliation></author>
       <author><first>Ding-Xuan</first><last>Zhou</last><affiliation>School of Data Science and Department of Mathematics, City University of Hong Kong</affiliation></author>
       <pages>4557-4568</pages>
       <abstract>The readability assessment task aims to assign a difficulty grade to a text. While neural models have recently demonstrated impressive performance, most do not exploit the ordinal nature of the difficulty grades, and make little effort for model initialization to facilitate fine-tuning. We address these limitations with soft labels for ordinal regression, and with model pre-training through prediction of pairwise relative text difficulty. We incorporate these two components into a model based on hierarchical attention networks, and evaluate its performance on both English and Chinese datasets. Experimental results show that our proposed model outperforms competitive neural models and statistical classifiers on most datasets.</abstract>
@@ -12700,7 +12700,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Merve</first><last>Ünlü Menevşe</last><affiliation>Boğaziçi University</affiliation></author>
       <author><first>Yusufcan</first><last>Manav</last><affiliation>Bogazici University</affiliation></author>
       <author><first>Ebru</first><last>Arisoy</last><affiliation>MEF University</affiliation></author>
-      <author><first>Arzucan</first><last>Özgür</last><affiliation>Bogazici University</affiliation></author>
+      <author id="arzucan-ozgur"><first>Arzucan</first><last>Özgür</last><affiliation>Bogazici University</affiliation></author>
       <pages>4659-4666</pages>
       <abstract>This paper describes a framework to automatically generate a spoken question answering (QA) dataset. The framework consists of a question generation (QG) module to generate questions automatically from given text documents, a text-to-speech (TTS) module to convert the text documents into spoken form and an automatic speech recognition (ASR) module to transcribe the spoken content. The final dataset contains question-answer pairs for both the reference text and ASR transcriptions as well as the audio files corresponding to each reference text. For QG and ASR systems we used pre-trained multilingual encoder-decoder transformer models and fine-tuned these models using a limited amount of manually generated QA data and TTS-based speech data, respectively. As a proof of concept, we investigated the proposed framework for Turkish and generated the Turkish Question Answering (TurQuAse) dataset using Wikipedia articles. Manual evaluation of the automatically generated question- answer pairs and QA performance evaluation with state of-the-art models on TurQuAse show that the proposed framework is efficient for automatically generating spoken QA datasets. To the best of our knowledge, TurQuAse is the first publicly available spoken question answering dataset for Turkish. The proposed framework can be easily extended to other languages where a limited amount of QA data is available.</abstract>
       <url hash="1f854117">2022.findings-emnlp.342</url>
@@ -12761,7 +12761,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Bailey</first><last>Kuehl</last><affiliation>Allen Institute for AI</affiliation></author>
       <author><first>Arman</first><last>Cohan</last><affiliation>Allen Institute for AI</affiliation></author>
       <author><first>Iz</first><last>Beltagy</last><affiliation>Allen Institute for AI (AI2)</affiliation></author>
-      <author><first>Lucy Lu</first><last>Wang</last><affiliation>Allen Institute for AI</affiliation></author>
+      <author id="lucy-lu-wang"><first>Lucy Lu</first><last>Wang</last><affiliation>Allen Institute for AI</affiliation></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last><affiliation>University of Washington</affiliation></author>
       <pages>4719-4734</pages>
       <abstract>While research on scientific claim verification has led to the development of powerful systems that appear to approach human performance, these approaches have yet to be tested in a realistic setting against large corpora of scientific literature. Moving to this open-domain evaluation setting, however, poses unique challenges; in particular, it is infeasible to exhaustively annotate all evidence documents. In this work, we present SciFact-Open, a new test collection designed to evaluate the performance of scientific claim verification systems on a corpus of 500K research abstracts. Drawing upon pooling techniques from information retrieval, we collect evidence for scientific claims by pooling and annotating the top predictions of four state-of-the-art scientific claim verification models. We find that systems developed on smaller corpora struggle to generalize to SciFact-Open, exhibiting performance drops of at least 15 F1. In addition, analysis of the evidence in SciFact-Open reveals interesting phenomena likely to appear when claim verification systems are deployed in practice, e.g., cases where the evidence supports only a special case of the claim. Our dataset is available at https://github.com/dwadden/scifact-open.</abstract>
@@ -12816,7 +12816,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>John</first><last>Morris</last><affiliation>Cornell Tech</affiliation></author>
       <author><first>Justin</first><last>Chiu</last><affiliation>Cornell Tech</affiliation></author>
       <author><first>Ramin</first><last>Zabih</last><affiliation>Cornell Tech</affiliation></author>
-      <author><first>Alexander</first><last>Rush</last><affiliation>Cornell University</affiliation></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last><affiliation>Cornell University</affiliation></author>
       <pages>4777-4788</pages>
       <abstract>Deidentification seeks to anonymize textual data prior to distribution. Automatic deidentification primarily uses supervised named entity recognition from human-labeled data points. We propose an unsupervised deidentification method that masks words that leak personally-identifying information. The approach utilizes a specially trained reidentification model to identify individuals from redacted personal documents. Motivated by K-anonymity based privacy, we generate redactions that ensure a minimum reidentification rank for the correct profile of the document. To evaluate this approach, we consider the task of deidentifying Wikipedia Biographies, and evaluate using an adversarial reidentification metric. Compared to a set of unsupervised baselines, our approach deidentifies documents more completely while removing fewer words. Qualitatively, we see that the approach eliminates many identifying aspects that would fall outside of the common named entity based approach.</abstract>
       <url hash="e5df6274">2022.findings-emnlp.352</url>
@@ -12829,7 +12829,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Yatin</first><last>Chaudhary</last><affiliation>DRIMCo GmbH</affiliation></author>
       <author><first>Pranav</first><last>Rai</last><affiliation>LMU Munich</affiliation></author>
       <author><first>Matthias</first><last>Schubert</last><affiliation>LMU Munich</affiliation></author>
-      <author><first>Hinrich</first><last>Schütze</last><affiliation>Center for Information and Language Processing, University of Munich</affiliation></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last><affiliation>Center for Information and Language Processing, University of Munich</affiliation></author>
       <author><first>Pankaj</first><last>Gupta</last><affiliation>DRIMCO GmbH</affiliation></author>
       <pages>4789-4799</pages>
       <abstract>In this work, we combine the two paradigms: Federated Learning (FL) and Continual Learning (CL) for text classification task in cloud-edge continuum. The objective of Federated Continual Learning (FCL) is to improve deep learning models over life time at each client by (relevant and efficient) knowledge transfer without sharing data. Here, we address challenges in minimizing inter-client interference while knowledge sharing due to heterogeneous tasks across clients in FCL setup. In doing so, we propose a novel framework, Federated Selective Inter-client Transfer (FedSeIT) which selectively combines model parameters of foreign clients. To further maximize knowledge transfer, we assess domain overlap and select informative tasks from the sequence of historical tasks at each foreign client while preserving privacy. Evaluating against the baselines, we show improved performance, a gain of (average) 12.4% in text classification over a sequence of tasks using five datasets from diverse domains. To the best of our knowledge, this is the first work that applies FCL to NLP.</abstract>
@@ -12848,7 +12848,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Eui-In</first><last>Kim</last><affiliation>University of Michigan</affiliation></author>
       <author><first>Felix</first><last>Gervits</last><affiliation>US Army Research Laboratory</affiliation></author>
       <author><first>Matthew</first><last>Marge</last><affiliation>Army Research Laboratory</affiliation></author>
-      <author><first>Joyce</first><last>Chai</last><affiliation>University of Michigan</affiliation></author>
+      <author id="joyce-chai"><first>Joyce</first><last>Chai</last><affiliation>University of Michigan</affiliation></author>
       <pages>4800-4822</pages>
       <abstract>In the real world, autonomous driving agents navigate in highly dynamic environments full of unexpected situations where pre-trained models are unreliable. In these situations, what is immediately available to vehicles is often only human operators. Empowering autonomous driving agents with the ability to navigate in a continuous and dynamic environment and to communicate with humans through sensorimotor-grounded dialogue becomes critical. To this end, we introduce Dialogue On the ROad To Handle Irregular Events (DOROTHIE), a novel interactive simulation platform that enables the creation of unexpected situations on the fly to support empirical studies on situated communication with autonomous driving agents. Based on this platform, we created the Situated Dialogue Navigation (SDN), a navigation benchmark of 183 trials with a total of 8415 utterances, around 18.7 hours of control streams, and 2.9 hours of trimmed audio. SDN is developed to evaluate the agent’s ability to predict dialogue moves from humans as well as generate its own dialogue moves and physical navigation actions. We further developed a transformer-based baseline model for these SDN tasks. Our empirical results indicate that language guided-navigation in a highly dynamic environment is an extremely difficult task for end-to-end models. These results will provide insight towards future work on robust autonomous driving agents</abstract>
       <url hash="a310e035">2022.findings-emnlp.354</url>
@@ -12860,7 +12860,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <title>He Said, She Said: Style Transfer for Shifting the Perspective of Dialogues</title>
       <author><first>Amanda</first><last>Bertsch</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Graham</first><last>Neubig</last><affiliation>Carnegie Mellon University</affiliation></author>
-      <author><first>Matthew R.</first><last>Gormley</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="matthew-r-gormley"><first>Matthew R.</first><last>Gormley</last><affiliation>Carnegie Mellon University</affiliation></author>
       <pages>4823-4840</pages>
       <abstract>In this work, we define a new style transfer task: perspective shift, which reframes a dialouge from informal first person to a formal third person rephrasing of the text. This task requires challenging coreference resolution, emotion attribution, and interpretation of informal text. We explore several baseline approaches and discuss further directions on this task when applied to short dialogues. As a sample application, we demonstrate that applying perspective shifting to a dialogue summarization dataset (SAMSum) substantially improves the zero-shot performance of extractive news summarization models on this data. Additionally, supervised extractive models perform better when trained on perspective shifted data than on the original dialogues. We release our code publicly.</abstract>
       <url hash="8d2ec6fa">2022.findings-emnlp.355</url>
@@ -12932,7 +12932,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
     <paper id="361">
       <title>Train Flat, Then Compress: Sharpness-Aware Minimization Learns More Compressible Models</title>
       <author><first>Clara</first><last>Na</last><affiliation>Carnegie Mellon University</affiliation></author>
-      <author><first>Sanket Vaibhav</first><last>Mehta</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="sanket-vaibhav-mehta"><first>Sanket Vaibhav</first><last>Mehta</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Emma</first><last>Strubell</last><affiliation>Carnegie Mellon University</affiliation></author>
       <pages>4909-4936</pages>
       <abstract>Model compression by way of parameter pruning, quantization, or distillation has recently gained popularity as an approach for reducing the computational requirements of modern deep neural network models for NLP. Inspired by prior works suggesting a connection between simpler, more generalizable models and those that lie within wider loss basins, we hypothesize that optimizing for flat minima should lead to simpler parameterizations and thus more compressible models. We propose to combine sharpness-aware minimization (SAM) with various task-specific model compression methods, including iterative magnitude pruning (IMP), structured pruning with a distillation objective, and post-training dynamic quantization. Empirically, we show that optimizing for flatter minima consistently leads to greater compressibility of parameters compared to vanilla Adam when fine-tuning BERT models, with little to no loss in accuracy on the GLUE text classification and SQuAD question answering benchmarks. Moreover, SAM finds superior winning tickets during IMP that 1) are amenable to vanilla Adam optimization, and 2) transfer more effectively across tasks.</abstract>
@@ -12960,7 +12960,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Mehdi</first><last>Rezagholizadeh</last><affiliation>Noah’s Ark Lab Huawei</affiliation></author>
       <author><first>Ahmad</first><last>Rashid</last><affiliation>University of Waterloo; Huawei Noah’s Ark Lab</affiliation></author>
       <author><first>Ali</first><last>Ghodsi</last><affiliation>University of Waterloo</affiliation></author>
-      <author><first>Phillippe</first><last>Langlais</last><affiliation>Université de Montréal</affiliation></author>
+      <author id="philippe-langlais"><first>Phillippe</first><last>Langlais</last><affiliation>Université de Montréal</affiliation></author>
       <pages>4948-4954</pages>
       <abstract>Knowledge Distillation (KD) is a commonly used technique for improving the generalization of compact Pre-trained Language Models (PLMs) on downstream tasks. However, such methods impose the additional burden of training a separate teacher model for every new dataset.Alternatively, one may directly work on the improvement of the optimization procedure of the compact model towards better generalization. Recent works observe that the flatness of the local minimum correlates well with better generalization.In this work, we adapt Stochastic Weight Averaging (SWA), a method encouraging convergence to a flatter minimum, to fine-tuning PLMs. We conduct extensive experiments on various NLP tasks (text classification, question answering, and generation) and different model architectures and demonstrate that our adaptation improves the generalization without extra computation cost. Moreover, we observe that this simple optimization technique is able to outperform the state-of-the-art KD methods for compact models.</abstract>
       <url hash="9d9ac05d">2022.findings-emnlp.363</url>
@@ -12994,7 +12994,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
     <paper id="366">
       <title>Unsupervised Multi-Granularity Summarization</title>
       <author><first>Ming</first><last>Zhong</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>Microsoft</affiliation></author>
+      <author><first>Yang</first><last>Liu</last><affiliation>Microsoft</affiliation></author>
       <author><first>Suyu</first><last>Ge</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
       <author><first>Yuning</first><last>Mao</last><affiliation>Meta Platforms, Inc.</affiliation></author>
       <author><first>Yizhu</first><last>Jiao</last><affiliation>University of Illinois Urbana-Champaign</affiliation></author>
@@ -13086,7 +13086,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <title>The Undesirable Dependence on Frequency of Gender Bias Metrics Based on Word Embeddings</title>
       <author><first>Francisco</first><last>Valentini</last><affiliation>ICC (UBA - CONICET); Maestría en Data Mining (UBA)</affiliation></author>
       <author><first>Germán</first><last>Rosati</last><affiliation>CONICET / UNSAM</affiliation></author>
-      <author><first>Diego</first><last>Fernandez Slezak</last><affiliation>Universidad de Buenos Aires</affiliation></author>
+      <author id="diego-fernandez-slezak"><first>Diego</first><last>Fernandez Slezak</last><affiliation>Universidad de Buenos Aires</affiliation></author>
       <author><first>Edgar</first><last>Altszyler</last><affiliation>Departamento de Computación, Universidad de Buenos Aires; Instituto de Investigación en Ciencias de La Computación (ICC), CONICET-Universidad de Buenos Aires</affiliation></author>
       <pages>5086-5092</pages>
       <abstract>Numerous works use word embedding-based metrics to quantify societal biases and stereotypes in texts. Recent studies have found that word embeddings can capture semantic similarity but may be affected by word frequency. In this work we study the effect of frequency when measuring female vs. male gender bias with word embedding-based bias quantification methods. We find that Skip-gram with negative sampling and GloVe tend to detect male bias in high frequency words, while GloVe tends to return female bias in low frequency words. We show these behaviors still exist when words are randomly shuffled. This proves that the frequency-based effect observed in unshuffled corpora stems from properties of the metric rather than from word associations. The effect is spurious and problematic since bias metrics should depend exclusively on word co-occurrences and not individual word frequencies. Finally, we compare these results with the ones obtained with an alternative metric based on Pointwise Mutual Information. We find that this metric does not show a clear dependence on frequency, even though it is slightly skewed towards male bias across all frequencies.</abstract>
@@ -13198,9 +13198,9 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Alessandro</first><last>Moschitti</last><affiliation>Amazon</affiliation></author>
       <author><first>Marco</first><last>Del Tredici</last><affiliation>Amazon</affiliation></author>
       <author><first>Xiaoyu</first><last>Shen</last><affiliation>Amazon</affiliation></author>
-      <author><first>Thuy</first><last>Vu</last><affiliation>Amazon</affiliation></author>
+      <author id="thuy-vu"><first>Thuy</first><last>Vu</last><affiliation>Amazon</affiliation></author>
       <author id="bill-byrne"><first>Bill</first><last>Byrne</last><affiliation>University of Cambridge</affiliation></author>
-      <author><first>Adrià</first><last>de Gispert</last><affiliation>Amazon</affiliation></author>
+      <author id="adria-de-gispert"><first>Adrià</first><last>de Gispert</last><affiliation>Amazon</affiliation></author>
       <pages>5195-5208</pages>
       <abstract>We introduce question answering with a cotext in focus, a task that simulates a free interaction with a QA system. The user reads on a screen some information about a topic, and they can follow-up with questions that can be either related or not to the topic; and the answer can be found in the document containing the screen content or from other pages. We call such information context. To study the task, we construct FocusQA, a dataset for answer sentence selection (AS2) with 12,165011unique question/context pairs, and a total of 109,940 answers. To build the dataset, we developed a novel methodology that takes existing questions and pairs them with relevant contexts. To show the benefits of this approach, we present a comparative analysis with a set of questions written by humans after reading the context, showing that our approach greatly helps in eliciting more realistic question/context pairs. Finally, we show that the task poses several challenges for incorporating contextual information. In this respect, we introduce strong baselines for answer sentence selection that outperform the precision of state-of-the-art models for AS2 up to 21.3% absolute points.</abstract>
       <url hash="2cbf69d8">2022.findings-emnlp.381</url>
@@ -13265,7 +13265,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Yuan</first><last>Gong</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <author><first>Sameer</first><last>Khurana</last><affiliation>MIT</affiliation></author>
       <author><first>Rhoda</first><last>Au</last><affiliation>Boston University</affiliation></author>
-      <author><first>James</first><last>Glass</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
+      <author id="james-glass"><first>James</first><last>Glass</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <pages>5270-5283</pages>
       <abstract>Neuropsychological exams are commonly used to diagnose various kinds of cognitive impairment. They typically involve a trained examiner who conducts a series of cognitive tests with a subject. In recent years, there has been growing interest in developing machine learning methods to extract speech and language biomarkers from exam recordings to provide automated input for cognitive assessment. Inspired by recent findings suggesting that the examiner’s language can influence cognitive impairment classifications, in this paper, we study the influence of the examiner on automatic dementia identification decisions in real-world neuropsychological exams. To mitigate the influence of the examiner, we propose a systematic three-stage pipeline for detecting dementia from exam recordings. In the first stage, we perform audio-based speaker diarization (i.e., estimating who spoke when?) by incorporating speaker discriminative features. In the second stage, we employ text-based language models to identify the role of the speaker (i.e., examiner or subject). Finally, in the third stage, we employ text- and audio-based models to detect cognitive impairment from hypothesized subject segments. Our studies suggest that incorporating audio-based diarization followed by text-based role identification helps mitigate the influences from the examiner’s segments. Further, we found that the text and audio modalities complement each other, and the performance improves when we use both modalities. We also perform several carefully designed experimental studies to assess the performance of each stage.</abstract>
       <url hash="222aa737">2022.findings-emnlp.386</url>
@@ -13305,7 +13305,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <title><fixed-case>J</fixed-case>am<fixed-case>P</fixed-case>atois<fixed-case>NLI</fixed-case>: A Jamaican Patois Natural Language Inference Dataset</title>
       <author><first>Ruth-Ann</first><last>Armstrong</last><affiliation>Stanford University</affiliation></author>
       <author><first>John</first><last>Hewitt</last><affiliation>Stanford University</affiliation></author>
-      <author><first>Christopher</first><last>Manning</last><affiliation>Stanford University</affiliation></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last><affiliation>Stanford University</affiliation></author>
       <pages>5307-5320</pages>
       <abstract>JamPatoisNLI provides the first dataset for natural language inference in a creole language, Jamaican Patois.Many of the most-spoken low-resource languages are creoles. These languages commonly have a lexicon derived from a major world language and a distinctive grammar reflecting the languages of the original speakers and the process of language birth by creolization. This gives them a distinctive place in exploring the effectiveness of transfer from large monolingual or multilingual pretrained models. While our work, along with previous work, shows that transfer from these models to low-resource languages that are unrelated to languages in their training set is not very effective, we would expect stronger results from transfer to creoles. Indeed, our experiments show considerably better results from few-shot learning of JamPatoisNLI than for such unrelated languages, and help us begin to understand how the unique relationship between creoles and their high-resource base languages affect cross-lingual transfer. JamPatoisNLI, which consists of naturally-occurring premises and expert-written hypotheses, is a step towards steering research into a traditionally underserved language and a useful benchmark for understanding cross-lingual NLP.</abstract>
       <url hash="d8e6b96f">2022.findings-emnlp.389</url>
@@ -13349,7 +13349,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Kaixin</first><last>Ma</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Hao</first><last>Cheng</last><affiliation>Microsoft Research</affiliation></author>
       <author><first>Xiaodong</first><last>Liu</last><affiliation>Microsoft Research</affiliation></author>
-      <author><first>Eric</first><last>Nyberg</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Jianfeng</first><last>Gao</last><affiliation>Microsoft Research, Redmond</affiliation></author>
       <pages>5360-5374</pages>
       <abstract>We propose a novel open-domain question answering (ODQA) framework for answering single/multi-hop questions across heterogeneous knowledge sources.The key novelty of our method is the introduction of the intermediary modules into the current retriever-reader pipeline.Unlike previous methods that solely rely on the retriever for gathering all evidence in isolation,our intermediary performs a chain of reasoning over the retrieved set.Specifically, our method links the retrieved evidence with its related global context into graphs and organizes them into a candidate list of evidence chains.Built upon pretrained language models, our system achieves competitive performance on two ODQA datasets, OTT-QA and NQ, against tables and passages from Wikipedia.In particular, our model substantially outperforms the previous state-of-the-art on OTT-QA with an exact match score of 47.3 (45% relative gain).</abstract>
@@ -13402,7 +13402,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Siddharth</first><last>Dalmia</last><affiliation>Google</affiliation></author>
       <author><first>Brian</first><last>Yan</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Florian</first><last>Metze</last><affiliation>Carnegie Mellon University</affiliation></author>
-      <author><first>Alan W</first><last>Black</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Shinji</first><last>Watanabe</last><affiliation>Carnegie Mellon University</affiliation></author>
       <pages>5419-5429</pages>
       <abstract>End-to-end spoken language understanding (SLU) systems are gaining popularity over cascaded approaches due to their simplicity and ability to avoid error propagation. However, these systems model sequence labeling as a sequence prediction task causing a divergence from its well-established token-level tagging formulation. We build compositional end-to-end SLU systems that explicitly separate the added complexity of recognizing spoken mentions in SLU from the NLU task of sequence labeling. By relying on intermediate decoders trained for ASR, our end-to-end systems transform the input modality from speech to token-level representations that can be used in the traditional sequence labeling framework. This composition of ASR and NLU formulations in our end-to-end SLU system offers direct compatibility with pre-trained ASR and NLU systems, allows performance monitoring of individual components and enables the use of globally normalized losses like CRF, making them attractive in practical scenarios. Our models outperform both cascaded and direct end-to-end models on a labeling task of named entity recognition across SLU benchmarks.</abstract>
@@ -13442,7 +13442,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Rui</first><last>Sun</last><affiliation>Columbia University</affiliation></author>
       <author><first>Zhecan</first><last>Wang</last><affiliation>columbia university</affiliation></author>
       <author><first>Kai-Wei</first><last>Chang</last><affiliation>UCLA</affiliation></author>
-      <author><first>Shih-Fu</first><last>Chang</last><affiliation>Columbia University</affiliation></author>
+      <author id="shih-fu-chang"><first>Shih-Fu</first><last>Chang</last><affiliation>Columbia University</affiliation></author>
       <pages>5444-5454</pages>
       <abstract>From a visual scene containing multiple people, human is able to distinguish each individual given the context descriptions about what happened before, their mental/physical states or intentions, etc. Above ability heavily relies on human-centric commonsense knowledge and reasoning. For example, if asked to identify the “person who needs healing” in an image, we need to first know that they usually have injuries or suffering expressions, then find the corresponding visual clues before finally grounding the person. We present a new commonsense task, Human-centric Commonsense Grounding, that tests the models’ ability to ground individuals given the context descriptions about what happened before, and their mental/physical states or intentions. We further create a benchmark, HumanCog, a dataset with 130k grounded commonsensical descriptions annotated on 67k images, covering diverse types of commonsense and visual scenes. We set up a context-object-aware method as a strong baseline that outperforms previous pre-trained and non-pretrained models. Further analysis demonstrates that rich visual commonsense and powerful integration of multi-modal commonsense are essential, which sheds light on future works. Data and code will be available at https://github.com/Hxyou/HumanCog.</abstract>
       <url hash="c3475cf5">2022.findings-emnlp.399</url>
@@ -13455,7 +13455,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Bashar</first><last>Alhafni</last><affiliation>New York University</affiliation></author>
       <author><first>Ke</first><last>Zhang</last><affiliation>Dataminr, inc</affiliation></author>
       <author><first>Shihao</first><last>Ran</last><affiliation>Dataminr</affiliation></author>
-      <author><first>Joel</first><last>Tetreault</last><affiliation>Dataminr</affiliation></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last><affiliation>Dataminr</affiliation></author>
       <author><first>Alejandro</first><last>Jaimes</last><affiliation>Dataminr</affiliation></author>
       <pages>5455-5477</pages>
       <abstract>Social media has increasingly played a key role in emergency response: first responders can use public posts to better react to ongoing crisis events and deploy the necessary resources where they are most needed. Timeline extraction and abstractive summarization are critical technical tasks to leverage large numbers of social media posts about events. Unfortunately, there are few datasets for benchmarking technical approaches for those tasks. This paper presents , the largest dataset of local crisis event timelines available to date. contains 1,000 crisis event timelines across four domains: wildfires, local fires, traffic, and storms. We built using a semi-automated cluster-then-refine approach to collect data from the public Twitter stream. Our initial experiments indicate a significant gap between the performance of strong baselines compared to the human performance on both tasks.Our dataset, code, and models are publicly available (https://github.com/CrisisLTLSum/CrisisTimelines).</abstract>
@@ -13496,7 +13496,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Chenghua</first><last>Lin</last><affiliation>Department of Computer Science, University of Sheffield</affiliation></author>
       <author><first>Henglin</first><last>Huang</last><affiliation>Department of Computer Science, University of Surrey</affiliation></author>
       <author><first>Frank</first><last>Guerin</last><affiliation>University of Surrey</affiliation></author>
-      <author id="zhihao-zhang"><first>Zhihao</first><last>Zhang</last><affiliation>Beihang University</affiliation></author>
+      <author><first>Zhihao</first><last>Zhang</last><affiliation>Beihang University</affiliation></author>
       <pages>5504-5518</pages>
       <abstract>One of the key challenges of automatic story generation is how to generate a long narrative that can maintain fluency, relevance, and coherence. Despite recent progress, current story generation systems still face the challenge of how to effectively capture contextual and event features, which has a profound impact on a model’s generation performance. To address these challenges, we present EtriCA, a novel neural generation model, which improves the relevance and coherence of the generated stories through residually mapping context features to event sequences with a cross-attention mechanism. Such a feature capturing mechanism allows our model to better exploit the logical relatedness between events when generating stories. Extensive experiments based on both automatic and human evaluations show that our model significantly outperforms state-of-the-art baselines, demonstrating the effectiveness of our model in leveraging context and event features.</abstract>
       <url hash="d9580b3f">2022.findings-emnlp.403</url>
@@ -13564,7 +13564,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Tahira</first><last>Naseem</last><affiliation>IBM Research AI</affiliation></author>
       <author><first>Pavan</first><last>Kapanipathi</last><affiliation>IBM Research</affiliation></author>
       <author><first>Gaetano</first><last>Rossiello</last><affiliation>IBM Research AI</affiliation></author>
-      <author><first>Achille</first><last>Fokoue</last><affiliation>IBM Research</affiliation></author>
+      <author id="achille-fokoue-nkoutche"><first>Achille</first><last>Fokoue</last><affiliation>IBM Research</affiliation></author>
       <pages>5571-5580</pages>
       <abstract>Most existing approaches for Knowledge Base Question Answering (KBQA) focus on a specific underlying knowledge base either because of inherent assumptions in the approach, or because evaluating it on a different knowledge base requires non-trivial changes. However, many popular knowledge bases share similarities in their underlying schemas that can be leveraged to facilitate generalization across knowledge bases. To achieve this generalization, we introduce a KBQA framework based on a 2-stage architecture that explicitly separates semantic parsing from the knowledge base interaction, facilitating transfer learning across datasets and knowledge graphs. We show that pretraining on datasets with a different underlying knowledge base can nevertheless provide significant performance gains and reduce sample complexity. Our approach achieves comparable or state-of-the-art performance for LC-QuAD (DBpedia), WebQSP (Freebase), SimpleQuestions (Wikidata) and MetaQA (Wikimovies-KG).</abstract>
       <url hash="342df4a7">2022.findings-emnlp.408</url>
@@ -13598,7 +13598,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Zihuiwen</first><last>Ye</last><affiliation>University of Oxford</affiliation></author>
       <author><first>Tao</first><last>Yu</last><affiliation>University of Washington</affiliation></author>
       <author><first>Linfeng</first><last>Song</last><affiliation>Tencent AI Lab</affiliation></author>
-      <author><first>Phil</first><last>Blunsom</last><affiliation>University of Oxford</affiliation></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last><affiliation>University of Oxford</affiliation></author>
       <pages>5608-5620</pages>
       <abstract>The task of context-dependent text-to-SQL aims to convert multi-turn user utterances to formal SQL queries. This is a challenging task due to both the scarcity of training data from which to learn complex contextual dependencies and to generalize to unseen databases. In this paper we explore augmenting the training datasets using self-play, which leverages contextual information to synthesize new interactions to adapt the model to new databases. We first design a SQL-to-text model conditioned on a sampled goal query, which represents a user’s intent, that then converses with a text-to-SQL semantic parser to generate new interactions. We then filter the synthesized interactions and retrain the models with the augmented data. We find that self-play improves the accuracy of a strong baseline on SParC and CoSQL, two widely used cross-domain text-to-SQL datasets. Our analysis shows that self-play simulates various conversational thematic relations, enhances cross-domain generalization and improves beam-search.</abstract>
       <url hash="a7e88e90">2022.findings-emnlp.411</url>
@@ -13648,8 +13648,8 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Bo-Ru</first><last>Lu</last><affiliation>University of Washington</affiliation></author>
       <author><first>Yushi</first><last>Hu</last><affiliation>University of Washington</affiliation></author>
       <author><first>Hao</first><last>Cheng</last><affiliation>Microsoft Research</affiliation></author>
-      <author><first>Noah A.</first><last>Smith</last><affiliation>University of Washington</affiliation></author>
-      <author><first>Mari</first><last>Ostendorf</last><affiliation>University of Washington</affiliation></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last><affiliation>University of Washington</affiliation></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last><affiliation>University of Washington</affiliation></author>
       <pages>5657-5670</pages>
       <abstract>Human conversations can evolve in many different ways, creating challenges for automatic understanding and summarization. Goal-oriented conversations often have meaningful sub-dialogue structure, but it can be highly domain-dependent. This work introduces an unsupervised approach to learning hierarchical conversation structure, including turn and sub-dialogue segment labels, corresponding roughly to dialogue acts and sub-tasks, respectively. The decoded structure is shown to be useful in enhancing neural models of language for three conversation-level understanding tasks. Further, the learned finite-state sub-dialogue network is made interpretable through automatic summarization.</abstract>
       <url hash="b0ccaa61">2022.findings-emnlp.415</url>
@@ -13663,7 +13663,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Yichong</first><last>Xu</last><affiliation>Microsoft</affiliation></author>
       <author><first>Yuwei</first><last>Fang</last><affiliation>Microsoft</affiliation></author>
       <author><first>Wenhao</first><last>Yu</last><affiliation>University of Notre Dame</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>Microsoft</affiliation></author>
+      <author><first>Yang</first><last>Liu</last><affiliation>Microsoft</affiliation></author>
       <author><first>Hai</first><last>Zhao</last><affiliation>Shanghai Jiao Tong University</affiliation></author>
       <author><first>Chenguang</first><last>Zhu</last><affiliation>Microsoft Cognitive Services Research Group</affiliation></author>
       <author><first>Michael</first><last>Zeng</last><affiliation>Microsoft Corp</affiliation></author>
@@ -13731,8 +13731,8 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Mianzhi</first><last>Pan</last><affiliation>Nanjing University</affiliation></author>
       <author><first>Jianbing</first><last>Zhang</last><affiliation>Nanjing University</affiliation></author>
       <author><first>Shujian</first><last>Huang</last><affiliation>National Key Laboratory for Novel Software Technology, Nanjing University</affiliation></author>
-      <author><first>Xinyu</first><last>Dai</last><affiliation>National Key Laboratory for Novel Software Technology, Nanjing University</affiliation></author>
-      <author><first>Jiajun</first><last>Chen</last><affiliation>Nanjing University</affiliation></author>
+      <author id="xinyu-dai"><first>Xinyu</first><last>Dai</last><affiliation>National Key Laboratory for Novel Software Technology, Nanjing University</affiliation></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last><affiliation>Nanjing University</affiliation></author>
       <pages>5739-5749</pages>
       <abstract>In recent years, vision and language pre-training (VLP) models have advanced the state-of-the-art results in a variety of cross-modal downstream tasks. Aligning cross-modal semantics is claimed to be one of the essential capabilities of VLP models. However, it still remains unclear about the inner working mechanism of alignment in VLP models. In this paper, we propose a new probing method that is based on image captioning to first empirically study the cross-modal semantics alignment of VLP models. Our probing method is built upon the fact that given an image-caption pair, the VLP models will give a score, indicating how well two modalities are aligned; maximizing such scores will generate sentences that VLP models believe are of good alignment. Analyzing these sentences thus will reveal in what way different modalities are aligned and how well these alignments are in VLP models. We apply our probing method to five popular VLP models, including UNITER, ROSITA, ViLBERT, CLIP, and LXMERT, and provide a comprehensive analysis of the generated captions guided by these models. Our results show that VLP models (1) focus more on just aligning objects with visual words, while neglecting global semantics; (2) prefer fixed sentence patterns, thus ignoring more important textual information including fluency and grammar; and (3) deem the captions with more visual words are better aligned with images. These findings indicate that VLP models still have weaknesses in cross-modal semantics alignment and we hope this work will draw researchers’ attention to such problems when designing a new VLP model.</abstract>
       <url hash="52f65a23">2022.findings-emnlp.421</url>
@@ -13778,7 +13778,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Nikolaos</first><last>Flemotomos</last><affiliation>University of Southern California</affiliation></author>
       <author><first>Zac</first><last>Imel</last><affiliation>University of Utah</affiliation></author>
       <author><first>David</first><last>Atkins</last><affiliation>University of Washington</affiliation></author>
-      <author><first>Shrikanth</first><last>Narayanan</last><affiliation>University of Southern California</affiliation></author>
+      <author id="shrikanth-narayanan"><first>Shrikanth</first><last>Narayanan</last><affiliation>University of Southern California</affiliation></author>
       <pages>5787-5795</pages>
       <abstract>In psychotherapy interactions, the quality of a session is assessed by codifying the communicative behaviors of participants during the conversation through manual observation and annotation. Developing computational approaches for automated behavioral coding can reduce the burden on human coders and facilitate the objective evaluation of the intervention. In the real world, however, implementing such algorithms is associated with data sparsity challenges since privacy concerns lead to limited available in-domain data. In this paper, we leverage a publicly available conversation-based dataset and transfer knowledge to the low-resource behavioral coding task by performing an intermediate language model training via meta-learning. We introduce a task augmentation method to produce a large number of “analogy tasks” — tasks similar to the target one — and demonstrate that the proposed framework predicts target behaviors more accurately than all the other baseline models.</abstract>
       <url hash="1a43f642">2022.findings-emnlp.425</url>
@@ -13787,7 +13787,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
     </paper>
     <paper id="426">
       <title>Learning to Detect Noisy Labels Using Model-Based Features</title>
-      <author id="zhihao-wang"><first>Zhihao</first><last>Wang</last><affiliation>Meta</affiliation></author>
+      <author><first>Zhihao</first><last>Wang</last><affiliation>Meta</affiliation></author>
       <author><first>Zongyu</first><last>Lin</last><affiliation>Tsinghua University</affiliation></author>
       <author><first>Junjie</first><last>Wen</last><affiliation>China Merchant Bank</affiliation></author>
       <author><first>Xianxin</first><last>Chen</last><affiliation>Recurrent AI</affiliation></author>
@@ -13905,7 +13905,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Jun</first><last>Wang</last><affiliation>University of Melbourne</affiliation></author>
       <author><first>Xuanli</first><last>He</last><affiliation>Monash University</affiliation></author>
       <author><first>Benjamin</first><last>Rubinstein</last><affiliation>University of Melbourne</affiliation></author>
-      <author><first>Trevor</first><last>Cohn</last><affiliation>University of Melbourne</affiliation></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last><affiliation>University of Melbourne</affiliation></author>
       <pages>5906-5913</pages>
       <abstract>Neural machine translation (NMT) systems are vulnerable to backdoor attacks, whereby an attacker injects poisoned samples into training such that a trained model produces malicious translations. Nevertheless, there is little research on defending against such backdoor attacks in NMT. In this paper, we first show that backdoor attacks that have been successful in text classification are also effective against machine translation tasks. We then present a novel defence method that exploits a key property of most backdoor attacks: namely the asymmetry between the source and target language sentences, which is used to facilitate malicious text insertions, substitutions and suchlike. Our technique uses word alignment coupled with language model scoring to detect outlier tokens, and thus can find and filter out training instances which may contain backdoors. Experimental results demonstrate that our technique can significantly reduce the success of various attacks by up to 89.0%, while not affecting predictive accuracy.</abstract>
       <url hash="b0fa6aec">2022.findings-emnlp.435</url>
@@ -14081,7 +14081,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
     <paper id="448">
       <title><fixed-case>A</fixed-case>da<fixed-case>P</fixed-case>rompt: Adaptive Model Training for Prompt-based <fixed-case>NLP</fixed-case></title>
       <author><first>Yulong</first><last>Chen</last><affiliation>Zhejiang University, Westlake University</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>Microsoft</affiliation></author>
+      <author><first>Yang</first><last>Liu</last><affiliation>Microsoft</affiliation></author>
       <author><first>Li</first><last>Dong</last><affiliation>Microsoft Research</affiliation></author>
       <author><first>Shuohang</first><last>Wang</last><affiliation>Microsoft</affiliation></author>
       <author><first>Chenguang</first><last>Zhu</last><affiliation>Microsoft Cognitive Services Research Group</affiliation></author>
@@ -14123,7 +14123,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
     </paper>
     <paper id="451">
       <title>Generative Aspect-Based Sentiment Analysis with Contrastive Learning and Expressive Structure</title>
-      <author><first>Joseph</first><last>Peper</last><affiliation>University of Michigan</affiliation></author>
+      <author id="joseph-j-peper"><first>Joseph</first><last>Peper</last><affiliation>University of Michigan</affiliation></author>
       <author><first>Lu</first><last>Wang</last><affiliation>University of Michigan</affiliation></author>
       <pages>6089-6095</pages>
       <abstract>Generative models have demonstrated impressive results on Aspect-based Sentiment Analysis (ABSA) tasks, particularly for the emerging task of extracting Aspect-Category-Opinion-Sentiment (ACOS) quadruples. However, these models struggle with implicit sentiment expressions, which are commonly observed in opinionated content such as online reviews. In this work, we introduce GEN-SCL-NAT, which consists of two techniques for improved structured generation for ACOS quadruple extraction. First, we propose GEN-SCL, a supervised contrastive learning objective that aids quadruple prediction by encouraging the model to produce input representations that are discriminable across key input attributes, such as sentiment polarity and the existence of implicit opinions and aspects. Second, we introduce GEN-NAT, a new structured generation format that better adapts pre-trained autoregressive encoder-decoder models to extract quadruples in a generative fashion. Experimental results show that GEN-SCL-NAT achieves top performance across three ACOS datasets, averaging 1.48% F1 improvement, with a maximum 1.73% increase on the LAPTOP-L1 dataset. Additionally, we see significant gains on implicit aspect and opinion splits that have been shown as challenging for existing ACOS approaches.</abstract>
@@ -14258,7 +14258,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Yuxia</first><last>Wu</last><affiliation>Xi’an Jiaotong University</affiliation></author>
       <author><first>Lizi</first><last>Liao</last><affiliation>Singapore Management University</affiliation></author>
       <author><first>Xueming</first><last>Qian</last><affiliation>Xi’an Jiaotong University</affiliation></author>
-      <author><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
       <pages>6207-6218</pages>
       <abstract>Discovering new slots is critical to the success of dialogue systems. Most existing methods rely on automatic slot induction in unsupervised fashion or perform domain adaptation across zero or few-shot scenarios. They have difficulties in providing high-quality supervised signals to learn clustering-friendly features, and are limited in effectively transferring the prior knowledge from known slots to new slots. In this work, we propose a Semi-supervised Incremental Clustering method (SIC), to discover new slots with the aid of existing linguistic annotation models and limited known slot data. Specifically, we harvest slot value candidates with NLP model cues and innovatively formulate the slot discovery task under an incremental clustering framework. The model gradually calibrate slot representations under the supervision of generated pseudo-labels, and automatically learns to terminate when no more salient slot remains. Our thorough evaluation on five public datasets demonstrates that it significantly outperforms state-of-the-art models.</abstract>
       <url hash="4b188b34">2022.findings-emnlp.462</url>
@@ -14323,7 +14323,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Xiaolong</first><last>Jin</last><affiliation>Institute of Computing Technology, Chinese Academy of Sciences</affiliation></author>
       <author><first>Long</first><last>Bai</last><affiliation>School of Computer Science and Technology, University of Chinese Academy of Sciences; CAS Key Laboratory of Network Data Science and Technology, Institute of Computing Technology, Chinese Academy of Sciences</affiliation></author>
       <author><first>Jiafeng</first><last>Guo</last><affiliation>Institute of Computing Technology, CAS</affiliation></author>
-      <author><first>Xueqi</first><last>Cheng</last><affiliation>Institute of Computing Technology, CAS</affiliation></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last><affiliation>Institute of Computing Technology, CAS</affiliation></author>
       <pages>6266-6275</pages>
       <abstract>Prototypical network based joint methods have attracted much attention in few-shot event detection, which carry out event detection in a unified sequence tagging framework. However, these methods suffer from the inaccurate prototype representation problem, due to two main reasons: the number of instances for calculating prototypes is limited; And, they do not well capture the relationships among event prototypes. To deal with this problem, we propose a Knowledge-Enhanced self-supervised Prototypical Network, called KE-PN, for few-shot event detection. KE-PN adopts hybrid rules, which can automatically align event types to an external knowledge base, i.e., FrameNet, to obtain more instances.It proposes a self-supervised learning method to filter out noisy data from enhanced instances. KE-PN is further equipped with an auxiliary event type relationship classification module, which injects the relationship information into representations of event prototypes. Extensive experiments on three benchmark datasets, i.e., FewEvent, MAVEN, and ACE2005 demonstrate the state-of-the-art performance of KE-PN.</abstract>
       <url hash="3a11d9b0">2022.findings-emnlp.467</url>
@@ -14352,7 +14352,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Sin-En</first><last>Lu</last><affiliation>Department of Computer Science and Information Engineering, National Central University</affiliation></author>
       <author><first>Bo-Han</first><last>Lu</last><affiliation>National Central University, Taiwan</affiliation></author>
       <author><first>Chao-Yi</first><last>Lu</last><affiliation>Purdue Unversity</affiliation></author>
-      <author><first>Richard Tzong-Han</first><last>Tsai</last><affiliation>Academia Sinica</affiliation></author>
+      <author id="richard-tzong-han-tsai"><first>Richard Tzong-Han</first><last>Tsai</last><affiliation>Academia Sinica</affiliation></author>
       <pages>6287-6305</pages>
       <abstract>In natural language processing (NLP), code-mixing (CM) is a challenging task, especially when the mixed languages include dialects. In Southeast Asian countries such as Singapore, Indonesia, and Malaysia, Hokkien-Mandarin is the most widespread code-mixed language pair among Chinese immigrants, and it is also common in Taiwan. However, dialects such as Hokkien often have a scarcity of resources and the lack of an official writing system, limiting the development of dialect CM research. In this paper, we propose a method to construct a Hokkien-Mandarin CM dataset to mitigate the limitation, overcome the morphological issue under the Sino-Tibetan language family, and offer an efficient Hokkien word segmentation method through a linguistics-based toolkit. Furthermore, we use our proposed dataset and employ transfer learning to train the XLM (cross-lingual language model) for translation tasks. To fit the code-mixing scenario, we adapt XLM slightly. We found that by using linguistic knowledge, rules, and language tags, the model produces good results on CM data translation while maintaining monolingual translation quality.</abstract>
       <url hash="b2a9f574">2022.findings-emnlp.469</url>
@@ -14433,7 +14433,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Chi</first><last>Zhang</last><affiliation>HKUST</affiliation></author>
       <author><first>Dongkyu</first><last>Lee</last><affiliation>Hong Kong University of Science and Technology</affiliation></author>
       <author><first>Yingxiu</first><last>Zhao</last><affiliation>The Hong Kong University of Science and Technology</affiliation></author>
-      <author id="dongsheng-li"><first>Dongsheng</first><last>Li</last><affiliation>National University of Defense Technology</affiliation></author>
+      <author><first>Dongsheng</first><last>Li</last><affiliation>National University of Defense Technology</affiliation></author>
       <author><first>Nevin L.</first><last>Zhang</last><affiliation>Hong Kong University of Science and Technology</affiliation></author>
       <pages>6364-6376</pages>
       <abstract>Emotional conversation systems generate responses for the input queries considering the speaker’s emotions in a conversation. Existing emotional conversation systems output emotional responses according to either a given emotion or the user’s emotion reflected in the input queries. Following a given emotion may lead to an emotional drift between the given emotion and the conversation state, and following only the user’s emotion may aggravate the user’s negative feelings if users suffer from a negative mood. In this paper, we propose to generate empathetic responses catering to the user’s emotions while leading the conversation to be emotionally positive. Particularly, by abstracting the conversation corpus, we extract and store the different responding strategies for different users’ emotions and conversational topics into a memory. We encourage positive emotions in conversation via a sentiment evaluator. We model the memory outputs with a Gaussian mixture distribution and sample a final responding strategy from the distribution. The strategy acts as a condition to a transformer model to generate responses. The experiments verify our model surpasses the baseline methods in appropriateness, diversity, and generating emotionally positive responses.</abstract>
@@ -14469,8 +14469,8 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
     <paper id="478">
       <title>Model and Data Transfer for Cross-Lingual Sequence Labelling in Zero-Resource Settings</title>
       <author><first>Iker</first><last>García-Ferrero</last><affiliation>HiTZ Center - Ixa, University of the Basque Country UPV/EHU</affiliation></author>
-      <author><first>Rodrigo</first><last>Agerri</last><affiliation>HiTZ Center - Ixa, University of the Basque Country UPV/EHU</affiliation></author>
-      <author><first>German</first><last>Rigau</last><affiliation>UPV/EHU</affiliation></author>
+      <author id="rodrigo-agerri"><first>Rodrigo</first><last>Agerri</last><affiliation>HiTZ Center - Ixa, University of the Basque Country UPV/EHU</affiliation></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last><affiliation>UPV/EHU</affiliation></author>
       <pages>6403-6416</pages>
       <abstract>Zero-resource cross-lingual transfer approaches aim to apply supervised modelsfrom a source language to unlabelled target languages. In this paper we performan in-depth study of the two main techniques employed so far for cross-lingualzero-resource sequence labelling, based either on data or model transfer.Although previous research has proposed translation and annotation projection(data-based cross-lingual transfer) as an effective technique for cross-lingualsequence labelling, in this paper we experimentally demonstrate that highcapacity multilingual language models applied in a zero-shot (model-basedcross-lingual transfer) setting consistently outperform data-basedcross-lingual transfer approaches. A detailed analysis of our results suggeststhat this might be due to important differences in language use. Morespecifically, machine translation often generates a textual signal which isdifferent to what the models are exposed to when using gold standard data,which affects both the fine-tuning and evaluation processes. Our results alsoindicate that data-based cross-lingual transfer approaches remain a competitiveoption when high-capacity multilingual language models are not available.</abstract>
       <url hash="c0879c69">2022.findings-emnlp.478</url>
@@ -14481,7 +14481,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
     <paper id="479">
       <title>Early Guessing for Dialect Identification</title>
       <author><first>Vani</first><last>Kanjirangat</last><affiliation>IDSIA</affiliation></author>
-      <author><first>Tanja</first><last>Samardzic</last><affiliation>University of Zurich</affiliation></author>
+      <author id="tanja-samardzic"><first>Tanja</first><last>Samardzic</last><affiliation>University of Zurich</affiliation></author>
       <author><first>Fabio</first><last>Rinaldi</last><affiliation>IDSIA, Swiss AI Institute</affiliation></author>
       <author><first>Ljiljana</first><last>Dolamic</last><affiliation>armasuisse S&amp;T</affiliation></author>
       <pages>6417-6426</pages>
@@ -14546,7 +14546,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <title>Large-Scale Differentially Private <fixed-case>BERT</fixed-case></title>
       <author><first>Rohan</first><last>Anil</last><affiliation>Google</affiliation></author>
       <author><first>Badih</first><last>Ghazi</last><affiliation>Google</affiliation></author>
-      <author><first>Vineet</first><last>Gupta</last><affiliation>Google</affiliation></author>
+      <author id="vineet-gupta"><first>Vineet</first><last>Gupta</last><affiliation>Google</affiliation></author>
       <author><first>Ravi</first><last>Kumar</last><affiliation>Google</affiliation></author>
       <author><first>Pasin</first><last>Manurangsi</last><affiliation>Google</affiliation></author>
       <pages>6481-6491</pages>
@@ -14586,7 +14586,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <title>Weakly Supervised Headline Dependency Parsing</title>
       <author><first>Adrian</first><last>Benton</last><affiliation>Google</affiliation></author>
       <author><first>Tianze</first><last>Shi</last><affiliation>Google</affiliation></author>
-      <author><first>Ozan</first><last>İrsoy</last><affiliation>Bloomberg LP</affiliation></author>
+      <author id="ozan-irsoy"><first>Ozan</first><last>İrsoy</last><affiliation>Bloomberg LP</affiliation></author>
       <author><first>Igor</first><last>Malioutov</last><affiliation>Bloomberg L.P.</affiliation></author>
       <pages>6520-6535</pages>
       <abstract>English news headlines form a register with unique syntactic properties that have been documented in linguistics literature since the 1930s. However, headlines have received surprisingly little attention from the NLP syntactic parsing community. We aim to bridge this gap by providing the first news headline corpus of Universal Dependencies annotated syntactic dependency trees, which enables us to evaluate existing state-of-the-art dependency parsers on news headlines. To improve English news headline parsing accuracies, we develop a projection method to bootstrap silver training data from unlabeled news headline-article lead sentence pairs. Models trained on silver headline parses demonstrate significant improvements in performance over models trained solely on gold-annotated long-form texts. Ultimately, we find that, although projected silver training data improves parser performance across different news outlets, the improvement is moderated by constructions idiosyncratic to outlet.</abstract>
@@ -14600,7 +14600,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Nazneen</first><last>Rajani</last><affiliation>Hugging Face</affiliation></author>
       <author><first>Divyansh</first><last>Agarwal</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Caiming</first><last>Xiong</last><affiliation>Salesforce</affiliation></author>
-      <author><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
       <pages>6536-6558</pages>
       <abstract>The majority of existing text summarization datasets include short-form source documents that lack long-range causal and temporal dependencies, and often contain strong layout and stylistic biases. While relevant, such datasets will offer limited challenges for future text summarization systems. We address these issues by introducing BOOKSUM, a collection of datasets for long-form narrative summarization. Our dataset covers documents from the literature domain, such as novels, plays and stories, and includes highly abstractive, human written summaries on three levels of granularity of increasing difficulty: paragraph-, chapter-, and book-level. The domain and structure of our dataset poses a unique set of challenges for summarization systems, which include: processing very long documents, non-trivial causal and temporal dependencies, and rich discourse structures. To facilitate future work, we trained and evaluated multiple extractive and abstractive summarization models as baselines for our dataset.</abstract>
       <url hash="9ffd5121">2022.findings-emnlp.488</url>
@@ -14642,7 +14642,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Jian</first><last>Li</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <author><first>Hongru</first><last>Wang</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <author><first>Xingshan</first><last>Zeng</last><affiliation>Huawei Noah’s Ark Lab</affiliation></author>
-      <author><first>Kam-Fai</first><last>Wong</last><affiliation>Department of Systems Engineering and Engineering Management, The Chinese University of Hong Kong, Hong Kong</affiliation></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last><affiliation>Department of Systems Engineering and Engineering Management, The Chinese University of Hong Kong, Hong Kong</affiliation></author>
       <pages>6595-6607</pages>
       <abstract>News recommendation (NR) is essential for online news services. Existing NR methods typically adopt a news-user representation learning framework, facing two potential limitations. First, in news encoder, single candidate news encoding suffers from an insufficient semantic information problem. Second, existing graph-based NR methods are promising but lack effective news-user feature interaction, rendering the graph-based recommendation suboptimal. To overcome these limitations, we propose dual-interactive graph attention networks (DIGAT) consisting of news- and user-graph channels. In the news-graph channel, we enrich the semantics of single candidate news by incorporating the semantically relevant news information with a semantic-augmented graph (SAG). In the user-graph channel, multi-level user interests are represented with a news-topic graph. Most notably, we design a dual-graph interaction process to perform effective feature interaction between the news and user graphs, which facilitates accurate news-user representation matching. Experiment results on the benchmark dataset MIND show that DIGAT outperforms existing news recommendation methods. Further ablation studies and analyses validate the effectiveness of (1) semantic-augmented news graph modeling and (2) dual-graph interaction.</abstract>
       <url hash="af89a7c9">2022.findings-emnlp.491</url>
@@ -14737,7 +14737,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Chaoqun</first><last>Duan</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <author><first>Youzheng</first><last>Wu</last><affiliation>JD AI Research</affiliation></author>
       <author><first>Xiaodong</first><last>He</last><affiliation>JD AI Research</affiliation></author>
-      <author><first>Tiejun</first><last>Zhao</last><affiliation>tjzhao@hit.edu.cn</affiliation></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last><affiliation>tjzhao@hit.edu.cn</affiliation></author>
       <pages>6687-6697</pages>
       <abstract>Hybrid question answering (HQA) aims to answer questions over heterogeneous data, including tables and passages linked to table cells. The heterogeneous data can provide different granularity evidence to HQA models, e.t., column, row, cell, and link. Conventional HQA models usually retrieve coarse- or fine-grained evidence to reason the answer. Through comparison, we find that coarse-grained evidence is easier to retrieve but contributes less to the reasoner, while fine-grained evidence is the opposite. To preserve the advantage and eliminate the disadvantage of different granularity evidence, we propose MuGER2, a Multi-Granularity Evidence Retrieval and Reasoning approach. In evidence retrieval, a unified retriever is designed to learn the multi-granularity evidence from the heterogeneous data. In answer reasoning, an evidence selector is proposed to navigate the fine-grained evidence for the answer reader based on the learned multi-granularity evidence. Experiment results on the HybridQA dataset show that MuGER2 significantly boosts the HQA performance. Further ablation analysis verifies the effectiveness of both the retrieval and reasoning designs.</abstract>
       <url hash="c2da6696">2022.findings-emnlp.498</url>
@@ -14774,7 +14774,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Naoki</first><last>Kobayashi</last><affiliation>Tokyo Institute of Technology</affiliation></author>
       <author><first>Tsutomu</first><last>Hirao</last><affiliation>NTT Communication Science Labs.</affiliation></author>
       <author><first>Hidetaka</first><last>Kamigaito</last><affiliation>Nara Institute of Science and Technology</affiliation></author>
-      <author><first>Manabu</first><last>Okumura</last><affiliation>Tokyo Institute of Technology</affiliation></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last><affiliation>Tokyo Institute of Technology</affiliation></author>
       <author><first>Masaaki</first><last>Nagata</last><affiliation>NTT Corporation</affiliation></author>
       <pages>6725-6737</pages>
       <abstract>To promote and further develop RST-style discourse parsing models, we need a strong baseline that can be regarded as a reference for reporting reliable experimental results. This paper explores a strong baseline by integrating existing simple parsing strategies, top-down and bottom-up, with various transformer-based pre-trained language models.The experimental results obtained from two benchmark datasets demonstrate that the parsing performance strongly relies on the pre-trained language models rather than the parsing strategies.In particular, the bottom-up parser achieves large performance gains compared to the current best parser when employing DeBERTa.We further reveal that language models with a span-masking scheme especially boost the parsing performance through our analysis within intra- and multi-sentential parsing, and nuclearity prediction.</abstract>
@@ -14814,7 +14814,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <title>A <fixed-case>POMDP</fixed-case> Dialogue Policy with 3-way Grounding and Adaptive <fixed-case>S</fixed-case>ensing for Learning through Communication</title>
       <author><first>Maryam</first><last>Zare</last><affiliation>Pennsylvania State University</affiliation></author>
       <author><first>Alan</first><last>Wagner</last><affiliation>psu.edu</affiliation></author>
-      <author><first>Rebecca</first><last>Passonneau</last><affiliation>psu.edu</affiliation></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca</first><last>Passonneau</last><affiliation>psu.edu</affiliation></author>
       <pages>6767-6780</pages>
       <abstract>Agents to assist with rescue, surgery, and similar activities could collaborate better with humans if they could learn new strategic behaviors through communication. We introduce a novel POMDP dialogue policy for learning from people. The policy has 3-way grounding of language in the shared physical context, the dialogue context, and persistent knowledge. It can learn distinct but related games, and can continue learning across dialogues for complex games. A novel sensing component supports adaptation to information-sharing differences across people. The single policy performs better than oracle policies customized to specific games and information behavior.</abstract>
       <url hash="eb9c2a75">2022.findings-emnlp.504</url>
@@ -14863,7 +14863,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <title><fixed-case>WANLI</fixed-case>: Worker and <fixed-case>AI</fixed-case> Collaboration for Natural Language Inference Dataset Creation</title>
       <author><first>Alisa</first><last>Liu</last><affiliation>University of Washington</affiliation></author>
       <author><first>Swabha</first><last>Swayamdipta</last><affiliation>University of Southern California</affiliation></author>
-      <author><first>Noah A.</first><last>Smith</last><affiliation>University of Washington</affiliation></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last><affiliation>University of Washington</affiliation></author>
       <author><first>Yejin</first><last>Choi</last><affiliation>University of Washington</affiliation></author>
       <pages>6826-6847</pages>
       <abstract>A recurring challenge of crowdsourcing NLP datasets at scale is that human writers often rely on repetitive patterns when crafting examples, leading to a lack of linguistic diversity. We introduce a novel approach for dataset creation based on worker and AI collaboration, which brings together the generative strength of language models and the evaluative strength of humans. Starting with an existing dataset, MultiNLI for natural language inference (NLI), our approach uses dataset cartography to automatically identify examples that demonstrate challenging reasoning patterns, and instructs GPT-3 to compose new examples with similar patterns. Machine generated examples are then automatically filtered, and finally revised and labeled by human crowdworkers. The resulting dataset, WANLI, consists of 107,885 NLI examples and presents unique empirical strengths over existing NLI datasets. Remarkably, training a model on WANLI improves performance on eight out-of-domain test sets we consider, including by 11% on HANS and 9% on Adversarial NLI, compared to training on the 4x larger MultiNLI. Moreover, it continues to be more effective than MultiNLI augmented with other NLI datasets. Our results demonstrate the promise of leveraging natural language generation techniques and re-imagining the role of humans in the dataset creation process.</abstract>
@@ -14965,7 +14965,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Zhaofeng</first><last>Wu</last><affiliation>The Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Hao</first><last>Peng</last><affiliation>Allen Institute for AI</affiliation></author>
       <author><first>Nikolaos</first><last>Pappas</last><affiliation>Amazon Web Services (AWS AI)</affiliation></author>
-      <author><first>Noah A.</first><last>Smith</last><affiliation>University of Washington</affiliation></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last><affiliation>University of Washington</affiliation></author>
       <pages>6931-6939</pages>
       <abstract>Document-level machine translation leverages inter-sentence dependencies to produce more coherent and consistent translations. However, these models, predominantly based on transformers, are difficult to scale to long documents as their attention layers have quadratic complexity in the sequence length. Recent efforts on efficient attention improve scalability, but their effect on document translation remains unexplored. In this work, we investigate the efficacy of a recent linear attention model by Peng et al. (2021) on document translation and augment it with a sentential gate to promote a recency inductive bias. We evaluate the model on IWSLT 2015 and OpenSubtitles 2018 against the transformer, demonstrating substantially increased decoding speed on long sequences with similar or better BLEU scores. We show that sentential gating further improves translation quality on IWSLT.</abstract>
       <url hash="10c62f40">2022.findings-emnlp.515</url>
@@ -14998,7 +14998,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Han</first><last>Guo</last><affiliation>CMU</affiliation></author>
       <author><first>Bowen</first><last>Tan</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Zhengzhong</first><last>Liu</last><affiliation>Carnegie Mellon University; Petuum INC.</affiliation></author>
-      <author><first>Eric</first><last>Xing</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="eric-xing"><first>Eric</first><last>Xing</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Zhiting</first><last>Hu</last><affiliation>UC San Diego</affiliation></author>
       <pages>6969-6991</pages>
       <abstract>Maximum likelihood estimation (MLE) is the predominant algorithm for training text generation models. This paradigm relies on direct supervision examples, which is not applicable to many emerging applications, such as generating adversarial attacks or generating prompts to control language models. Reinforcement learning (RL) on the other hand offers a more flexible solution by allowing users to plug in arbitrary task metrics as reward. Yet previous RL algorithms for text generation, such as policy gradient (on-policy RL) and Q-learning (off-policy RL), are often notoriously inefficient or unstable to train due to the large sequence space and the sparse reward received only at the end of sequences. In this paper, we introduce a new RL formulation for text generation from the soft Q-learning (SQL) perspective. It enables us to draw from the latest RL advances, such as path consistency learning, to combine the best of on-/off-policy updates, and learn effectively from sparse reward. We apply the approach to a wide range of novel text generation tasks, including learning from noisy/negative examples, adversarial attacks, and prompt generation. Experiments show our approach consistently outperforms both task-specialized algorithms and the previous RL methods.</abstract>
@@ -15063,7 +15063,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Xiaoqing</first><last>Zheng</last><affiliation>Fudan University</affiliation></author>
       <author><first>Kai-Wei</first><last>Chang</last><affiliation>UCLA</affiliation></author>
       <author><first>Cho-Jui</first><last>Hsieh</last><affiliation>University of California, Los Angeles</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>7054-7063</pages>
       <abstract>The existence and pervasiveness of textual adversarial examples have raised serious concerns to security-critical applications. Many methods have been developed to defend against adversarial attacks for neural natural language processing (NLP) models.Adversarial training is one of the most successful defense methods by adding some random or intentional perturbations to the original input texts and making the models robust to the perturbed examples.In this study, we explore the feasibility of improving the adversarial robustness of NLP models by performing perturbations in the parameter space rather than the input feature space.The weight perturbation helps to find a better solution (i.e., the values of weights) that minimizes the adversarial loss among other feasible solutions.We found that the weight perturbation can significantly improve the robustness of NLP models when it is combined with the perturbation in the input embedding space, yielding the highest accuracy on both clean and adversarial examples across different datasets.</abstract>
       <url hash="789b8016">2022.findings-emnlp.523</url>
@@ -15190,7 +15190,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Yiwen</first><last>Ding</last><affiliation>University of Michigan</affiliation></author>
       <author><first>Zhiheng</first><last>Lyu</last><affiliation>The University of Hong Kong</affiliation></author>
       <author><first>Mrinmaya</first><last>Sachan</last><affiliation>ETH Zurich</affiliation></author>
-      <author><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
       <author><first>Bernhard</first><last>Schoelkopf</last><affiliation>Max-Planck Institute for Intelligent Systems</affiliation></author>
       <pages>7180-7198</pages>
       <abstract>Reasoning is central to human intelligence. However, fallacious arguments are common, and some exacerbate problems such as spreading misinformation about climate change. In this paper, we propose the task of logical fallacy detection, and provide a new dataset (Logic) of logical fallacies generally found in text, together with an additional challenge set for detecting logical fallacies in climate change claims (LogicClimate). Detecting logical fallacies is a hard problem as the model must understand the underlying logical structure of the argument. We find that existing pretrained large language models perform poorly on this task. In contrast, we show that a simple structure-aware classifier outperforms the best language model by 5.46% F1 scores on Logic and 4.51% on LogicClimate. We encourage future work to explore this task since (a) it can serve as a new reasoning challenge for language models, and (b) it can have potential applications in tackling the spread of misinformation. Our dataset and code are available at https://github.com/causalNLP/logical-fallacy</abstract>
@@ -15212,7 +15212,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
     </paper>
     <paper id="534">
       <title>Revisiting Transformer-based Models for Long Document Classification</title>
-      <author><first>Xiang</first><last>Dai</last><affiliation>CSIRO Data61</affiliation></author>
+      <author id="xiang-dai"><first>Xiang</first><last>Dai</last><affiliation>CSIRO Data61</affiliation></author>
       <author><first>Ilias</first><last>Chalkidis</last><affiliation>University of Copenhagen</affiliation></author>
       <author><first>Sune</first><last>Darkner</last><affiliation>University of Copenhagen</affiliation></author>
       <author><first>Desmond</first><last>Elliott</last><affiliation>University of Copenhagen</affiliation></author>
@@ -15289,7 +15289,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <title><fixed-case>CHIA</fixed-case>: <fixed-case>CH</fixed-case>oosing Instances to Annotate for Machine Translation</title>
       <author><first>Rajat</first><last>Bhatnagar</last><affiliation>University of Colorado Boulder</affiliation></author>
       <author><first>Ananya</first><last>Ganesh</last><affiliation>University of Colorado Boulder</affiliation></author>
-      <author><first>Katharina</first><last>Kann</last><affiliation>University of Colorado Boulder</affiliation></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last><affiliation>University of Colorado Boulder</affiliation></author>
       <pages>7299-7315</pages>
       <abstract>Neural machine translation (MT) systems have been shown to perform poorly on low-resource language pairs, for which large-scale parallel data is unavailable. Making the data annotation process faster and cheaper is therefore important to ensure equitable access to MT systems. To make optimal use of a limited annotation budget, we present CHIA (choosing instances to annotate), a method for selecting instances to annotate for machine translation. Using an existing multi-way parallel dataset of high-resource languages, we first identify instances, based on model training dynamics, that are most informative for training MT models for high-resource languages. We find that there are cross-lingual commonalities in instances that are useful for MT model training, which we use to identify instances that will be useful to train models on a new target language. Evaluating on 20 languages from two corpora, we show that training on instances selected using our method provides an average performance improvement of 1.59 BLEU over training on randomly selected instances of the same size.</abstract>
       <url hash="c7a97844">2022.findings-emnlp.540</url>
@@ -15323,7 +15323,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Yajuan</first><last>Lyu</last><affiliation>Baidu Inc.</affiliation></author>
       <author><first>Wei</first><last>Li</last><affiliation>Baidu Inc.</affiliation></author>
       <author><first>Jiafeng</first><last>Guo</last><affiliation>Institute of Computing Technology, CAS</affiliation></author>
-      <author><first>Xueqi</first><last>Cheng</last><affiliation>Institute of Computing Technology, CAS</affiliation></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last><affiliation>Institute of Computing Technology, CAS</affiliation></author>
       <pages>7328-7338</pages>
       <abstract>A Temporal Knowledge Graph (TKG) is a sequence of KGs with respective timestamps, which adopts quadruples in the form of (<i>subject</i>, <i>relation</i>, <i>object</i>, <i>timestamp</i>) to describe dynamic facts. TKG reasoning has facilitated many real-world applications via answering such queries as (<i>query entity</i>, <i>query relation</i>, <i>?</i>, <i>future timestamp</i>) about future. This is actually a matching task between a query and candidate entities based on their historical structures, which reflect behavioral trends of the entities at different timestamps. In addition, recent KGs provide background knowledge of all the entities, which is also helpful for the matching. Thus, in this paper, we propose the <b>Hi</b>storical <b>S</b>tructure <b>Match</b>ing (<b>HiSMatch</b>) model. It applies two structure encoders to capture the semantic information contained in the historical structures of the query and candidate entities. Besides, it adopts another encoder to integrate the background knowledge into the model. TKG reasoning experiments on six benchmark datasets demonstrate the significant improvement of the proposed HiSMatch model, with up to 5.6% performance improvement in MRR, compared to the state-of-the-art baselines.</abstract>
       <url hash="cb8e651a">2022.findings-emnlp.542</url>
@@ -15364,9 +15364,9 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Lisa</first><last>Bauer</last><affiliation>University of North Carolina-Chapel Hill</affiliation></author>
       <author><first>Karthik</first><last>Gopalakrishnan</last><affiliation>Amazon</affiliation></author>
       <author><first>Spandana</first><last>Gella</last><affiliation>Amazon Alexa AI</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>Amazon</affiliation></author>
+      <author><first>Yang</first><last>Liu</last><affiliation>Amazon</affiliation></author>
       <author><first>Mohit</first><last>Bansal</last><affiliation>University of North Carolina at Chapel Hill</affiliation></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last><affiliation>Amazon Alexa AI</affiliation></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last><affiliation>Amazon Alexa AI</affiliation></author>
       <pages>7372-7386</pages>
       <abstract>Prompting inputs with natural language task descriptions has emerged as a popular mechanism to elicit reasonably accurate outputs from large-scale generative language models with little to no in-context supervision. This also helps gain insight into how well language models capture the semantics of a wide range of downstream tasks purely from self-supervised pre-training on massive corpora of unlabeled text. Such models have naturally also been exposed to a lot of undesirable content like racist and sexist language and there is only some work on awareness of models along these dimensions. In this paper, we define and comprehensively evaluate how well such language models capture the semantics of four tasks for bias: diagnosis, identification, extraction and rephrasing. We define three broad classes of task descriptions for these tasks: statement, question, and completion, with numerous lexical variants within each class. We study the efficacy of prompting for each task using these classes and the null task description across several decoding methods and few-shot examples. Our analyses indicate that language models are capable of performing these tasks to widely varying degrees across different bias dimensions, such as gender and political affiliation. We believe our work is an important step towards unbiased language models by quantifying the limits of current self-supervision objectives at accomplishing such sociologically challenging tasks.</abstract>
       <url hash="96779a8c">2022.findings-emnlp.545</url>
@@ -15395,7 +15395,7 @@ Faster and Smaller Speech Translation without Quality Compromise</title>
       <author><first>Baolin</first><last>Peng</last><affiliation>Microsoft Research</affiliation></author>
       <author><first>Michel</first><last>Galley</last><affiliation>Microsoft Research</affiliation></author>
       <author><first>Sudha</first><last>Rao</last><affiliation>Microsoft Research, Redmond</affiliation></author>
-      <author><first>Bill</first><last>Dolan</last><affiliation>Microsoft Research</affiliation></author>
+      <author id="william-b-dolan"><first>Bill</first><last>Dolan</last><affiliation>Microsoft Research</affiliation></author>
       <author><first>Snigdha</first><last>Chaturvedi</last><affiliation>University of North Carolina, Chapel Hill</affiliation></author>
       <author><first>Jianfeng</first><last>Gao</last><affiliation>Microsoft Research, Redmond</affiliation></author>
       <pages>7397-7413</pages>
diff --git a/data/xml/2022.finnlp.xml b/data/xml/2022.finnlp.xml
index c24985a70e..7f61c5db86 100644
--- a/data/xml/2022.finnlp.xml
+++ b/data/xml/2022.finnlp.xml
@@ -22,7 +22,7 @@
       <title>Contextualizing Emerging Trends in Financial News Articles</title>
       <author><first>Nhu Khoa</first><last>Nguyen</last><affiliation>L3i Laboratory, La Rochelle University</affiliation></author>
       <author><first>Thierry</first><last>Delahaut</last><affiliation>La Banque Postale - Asset Management</affiliation></author>
-      <author><first>Emanuela</first><last>Boros</last><affiliation>University of La Rochelle</affiliation></author>
+      <author id="emanuela-boros"><first>Emanuela</first><last>Boros</last><affiliation>University of La Rochelle</affiliation></author>
       <author><first>Antoine</first><last>Doucet</last><affiliation>University of La Rochelle</affiliation></author>
       <author><first>Gaël</first><last>Lejeune</last><affiliation>STIH, Sorbonne Université</affiliation></author>
       <pages>1-9</pages>
@@ -173,7 +173,7 @@
     <paper id="13">
       <title><fixed-case>LIPI</fixed-case> at the <fixed-case>F</fixed-case>in<fixed-case>NLP</fixed-case>-2022 <fixed-case>ERAI</fixed-case> Task: Ensembling Sentence Transformers for Assessing Maximum Possible Profit and Loss from Online Financial Posts</title>
       <author><first>Sohom</first><last>Ghosh</last><affiliation>Fidelity Investments</affiliation></author>
-      <author><first>Sudip Kumar</first><last>Naskar</last><affiliation>Jadavpur University</affiliation></author>
+      <author id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last><affiliation>Jadavpur University</affiliation></author>
       <pages>111-115</pages>
       <abstract>Using insights from social media for making investment decisions has become mainstream. However, in the current era of information ex- plosion, it is essential to mine high-quality so- cial media posts. The FinNLP-2022 ERAI task deals with assessing Maximum Possible Profit (MPP) and Maximum Loss (ML) from social me- dia posts relating to finance. In this paper, we present our team LIPI’s approach. We ensem- bled a range of Sentence Transformers to quan- tify these posts. Unlike other teams with vary- ing performances across different metrics, our system performs consistently well. Our code is available here <url>https://github.com/sohomghosh/LIPI_ERAI_</url> FinNLP_EMNLP- 2022/</abstract>
       <url hash="bb490c8d">2022.finnlp-1.13</url>
@@ -298,7 +298,7 @@
     <paper id="23">
       <title>How Can a Teacher Make Learning From Sparse Data Softer? Application to Business Relation Extraction</title>
       <author><first>Hadjer</first><last>Khaldi</last><affiliation>IRIT - University of Paul Sabatier/ Geotrend</affiliation></author>
-      <author><first>Farah</first><last>Benamara</last><affiliation>University of Toulouse</affiliation></author>
+      <author id="farah-benamara"><first>Farah</first><last>Benamara</last><affiliation>University of Toulouse</affiliation></author>
       <author><first>Camille</first><last>Pradel</last><affiliation>Geotrend</affiliation></author>
       <author><first>Nathalie</first><last>Aussenac-Gilles</last><affiliation>Cnrs - Irit</affiliation></author>
       <pages>170-177</pages>
@@ -361,7 +361,7 @@
     <paper id="28">
       <title><fixed-case>F</fixed-case>in<fixed-case>S</fixed-case>im4-<fixed-case>ESG</fixed-case> Shared Task: Learning Semantic Similarities for the Financial Domain. Extended edition to <fixed-case>ESG</fixed-case> insights</title>
       <author><first>Juyeon</first><last>Kang</last><affiliation>Fortia Financial Solutions</affiliation></author>
-      <author><first>Ismail</first><last>El Maarouf</last><affiliation>Imprevicible</affiliation></author>
+      <author id="ismail-el-maarouf"><first>Ismail</first><last>El Maarouf</last><affiliation>Imprevicible</affiliation></author>
       <pages>211-217</pages>
       <abstract>This paper describes FinSim4-ESG 1 shared task organized in the 4th FinNLP workshopwhich is held in conjunction with the IJCAI-ECAI-2022 confer- enceThis year, the FinSim4 is extended to the Environment, Social and Government (ESG) insights and proposes two subtasks, one for ESG Taxonomy Enrichment and the other for Sustainable Sentence Prediction. Among the 28 teams registered to the shared task, a total of 8 teams submitted their systems results and 6 teams also submitted a paper to describe their method. The winner of each subtask shows good performance results of 0.85% and 0.95% in terms of accuracy, respectively.</abstract>
       <url hash="082c15d1">2022.finnlp-1.28</url>
@@ -372,7 +372,7 @@
       <title>Using Contextual Sentence Analysis Models to Recognize <fixed-case>ESG</fixed-case> Concepts</title>
       <author><first>Elvys</first><last>Linhares Pontes</last><affiliation>University of La Rochelle</affiliation></author>
       <author><first>Mohamed</first><last>Ben Jannet</last><affiliation>Laboratoire d’Informatique pour la Mécanique et les Sciences de l’Ingénieur</affiliation></author>
-      <author><first>Jose G.</first><last>Moreno</last><affiliation>Paul Sabatier University - IRIT</affiliation></author>
+      <author id="jose-g-moreno"><first>Jose G.</first><last>Moreno</last><affiliation>Paul Sabatier University - IRIT</affiliation></author>
       <author><first>Antoine</first><last>Doucet</last><affiliation>University of La Rochelle</affiliation></author>
       <pages>218-223</pages>
       <abstract>This paper summarizes the joint participation of the Trading Central Labs and the L3i laboratory of the University of La Rochelle on both sub-tasks of the <i>Shared Task FinSim-4</i> evaluation campaign. The first sub-task aims to enrich the ‘Fortia ESG taxonomy’ with new lexicon entries while the second one aims to classify sentences to either ‘sustainable’ or ‘unsustainable’ with respect to ESG (Environment, Social and Governance) related factors. For the first sub-task, we proposed a model based on pre-trained Sentence-BERT models to project sentences and concepts in a common space in order to better represent ESG concepts. The official task results show that our system yields a significant performance improvement compared to the baseline and outperforms all other submissions on the first sub-task. For the second sub-task, we combine the RoBERTa model with a feed-forward multi-layer perceptron in order to extract the context of sentences and classify them. Our model achieved high accuracy scores (over 92%) and was ranked among the top 5 systems.</abstract>
@@ -420,7 +420,7 @@
     <paper id="33">
       <title>Ranking Environment, Social And Governance Related Concepts And Assessing Sustainability Aspect of Financial Texts</title>
       <author><first>Sohom</first><last>Ghosh</last><affiliation>Fidelity Investments</affiliation></author>
-      <author><first>Sudip Kumar</first><last>Naskar</last><affiliation>Jadavpur University</affiliation></author>
+      <author id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last><affiliation>Jadavpur University</affiliation></author>
       <pages>243-249</pages>
       <abstract>Understanding Environmental, Social, and Governance (ESG) factors related to financial products has become extremely important for investors. However, manually screening through the corporate policies and reports to understand their sustainability aspect is extremely tedious. In this paper, we propose solutions to two such problems which were released as shared tasks of the FinNLP workshop of the IJCAI-2022 conference. Firstly, we train a Sentence Transformers based model which automatically ranks ESG related concepts for a given unknown term. Secondly, we fine-tune a RoBERTa model to classify financial texts as sustainable or not. Out of 26 registered teams, our team ranked 4th in sub-task 1 and 3rd in sub-task 2. The source code can be accessed from <url>https://github.com/sohomghosh/Finsim4_ESG</url></abstract>
       <url hash="7e7d25c0">2022.finnlp-1.33</url>
diff --git a/data/xml/2022.fl4nlp.xml b/data/xml/2022.fl4nlp.xml
index a159fd18c4..85e1425392 100644
--- a/data/xml/2022.fl4nlp.xml
+++ b/data/xml/2022.fl4nlp.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2022-05-15" type="proceedings">
     <meta>
       <booktitle>Proceedings of the First Workshop on Federated Learning for Natural Language Processing (FL4NLP 2022)</booktitle>
-      <editor><first>Bill Yuchen</first><last>Lin</last></editor>
+      <editor id="bill-yuchen-lin"><first>Bill Yuchen</first><last>Lin</last></editor>
       <editor><first>Chaoyang</first><last>He</last></editor>
       <editor><first>Chulin</first><last>Xie</last></editor>
       <editor><first>Fatemehsadat</first><last>Mireshghallah</last></editor>
@@ -56,7 +56,7 @@
       <title>Adaptive Differential Privacy for Language Model Training</title>
       <author><first>Xinwei</first><last>Wu</last></author>
       <author><first>Li</first><last>Gong</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <pages>21-26</pages>
       <abstract>Although differential privacy (DP) can protect language models from leaking privacy, its indiscriminative protection on all data points reduces its practical utility. Previous works improve DP training by discriminating privacy and non-privacy data. But these works rely on datasets with prior privacy information, which is not available in real-world scenarios. In this paper, we propose an Adaptive Differential Privacy (ADP) framework for language modeling without resorting to prior privacy information. We estimate the probability that a linguistic item contains privacy based on a language model. We further propose a new Adam algorithm that adjusts the degree of differential privacy noise injected to the language model according to the estimated privacy probabilities. Experiments demonstrate that our ADP improves differentially private language modeling to achieve good protection from canary attackers.</abstract>
       <url hash="6247f4b7">2022.fl4nlp-1.3</url>
diff --git a/data/xml/2022.flp.xml b/data/xml/2022.flp.xml
index 538ba681d3..58ac3744c6 100644
--- a/data/xml/2022.flp.xml
+++ b/data/xml/2022.flp.xml
@@ -43,7 +43,7 @@
     </paper>
     <paper id="3">
       <title>Transfer Learning Parallel Metaphor using Bilingual Embeddings</title>
-      <author><first>Maria</first><last>Berger</last><affiliation>Ruhr University Bochum</affiliation></author>
+      <author id="maria-berger"><first>Maria</first><last>Berger</last><affiliation>Ruhr University Bochum</affiliation></author>
       <pages>13-23</pages>
       <abstract>Automated metaphor detection in languages other than English is highly restricted as training corpora are comparably rare. One way to overcome this problem is transfer learning. This paper gives an overview on transfer learning techniques applied to NLP. We first introduce types of transfer learning, then we present work focusing on: i) transfer learning with cross-lingual embeddings; ii) transfer learning in machine translation; and iii) transfer learning using pre-trained transformer models. The paper is complemented by first experiments that make use of bilingual embeddings generated from different sources of parallel data: We i) present the preparation of a parallel Gold corpus; ii) examine the embeddings spaces to search for metaphoric words cross-lingually; iii) run first experiments in transfer learning German metaphor from English labeled data only. Results show that finding data sources for bilingual embeddings training and the vocabulary covered by these embeddings is critical for learning metaphor cross-lingually.</abstract>
       <url hash="5edf8c7d">2022.flp-1.3</url>
@@ -127,7 +127,7 @@
     </paper>
     <paper id="10">
       <title>Distribution-Based Measures of Surprise for Creative Language: Experiments with Humor and Metaphor</title>
-      <author><first>Razvan C.</first><last>Bunescu</last><affiliation>Department of Computer Science, University of North Carolina at Charlotte</affiliation></author>
+      <author id="razvan-bunescu"><first>Razvan C.</first><last>Bunescu</last><affiliation>Department of Computer Science, University of North Carolina at Charlotte</affiliation></author>
       <author><first>Oseremen O.</first><last>Uduehi</last><affiliation>School of EECS, Ohio University</affiliation></author>
       <pages>68-78</pages>
       <abstract>Novelty or surprise is a fundamental attribute of creative output. As such, we postulate that a writer’s creative use of language leads to word choices and, more importantly, corresponding semantic structures that are unexpected for the reader. In this paper we investigate measures of surprise that rely solely on word distributions computed by language models and show empirically that creative language such as humor and metaphor is strongly correlated with surprise. Surprisingly at first, information content is observed to be at least as good a predictor of creative language as any of the surprise measures investigated. However, the best prediction performance is obtained when information and surprise measures are combined, showing that surprise measures capture an aspect of creative language that goes beyond information content.</abstract>
@@ -155,7 +155,7 @@
       <author><first>Yao</first><last>Fu</last><affiliation>The University of Edinburgh</affiliation></author>
       <author><first>Valentina</first><last>Pyatkin</last><affiliation>Bar-Ilan University</affiliation></author>
       <author><first>Ian</first><last>Magnusson</last><affiliation>Allen Institute for AI</affiliation></author>
-      <author><first>Bhavana</first><last>Dalvi Mishra</last><affiliation>Allen Institute for AI</affiliation></author>
+      <author id="bhavana-dalvi"><first>Bhavana</first><last>Dalvi Mishra</last><affiliation>Allen Institute for AI</affiliation></author>
       <author><first>Peter</first><last>Clark</last><affiliation>Allen Institute for AI</affiliation></author>
       <pages>84-93</pages>
       <abstract>Figurative language (e.g., “he flew like the wind”) is challenging to understand, as it is hard to tell what implicit information is being conveyed from the surface form alone. We hypothesize that to perform this task well, the reader needs to mentally elaborate the scene being described to identify a sensible meaning of the language. We present DREAM-FLUTE, a figurative language understanding system that does this, first forming a “mental model” of situations described in a premise and hypothesis before making an entailment/contradiction decision and generating an explanation. DREAM-FLUTE uses an existing scene elaboration model, DREAM, for constructing its “mental model.” In the FigLang2022 Shared Task evaluation, DREAM-FLUTE achieved (joint) first place (Acc@60=63.3%), and can perform even better with ensemble techniques, demonstrating the effectiveness of this approach. More generally, this work suggests that adding a reflective component to pretrained language models can improve their performance beyond standard fine-tuning (3.3% improvement in Acc@60).</abstract>
@@ -182,7 +182,7 @@
       <author><first>Giacomo</first><last>Anerdi</last><affiliation>Department of Advanced Computing Sciences, Maastricht University</affiliation></author>
       <author><first>Pedro</first><last>Jeuris</last><affiliation>Department of Advanced Computing Sciences, Maastricht University</affiliation></author>
       <author><first>Marijn</first><last>ten Thij</last><affiliation>Department of Advanced Computing Sciences, Maastricht University</affiliation></author>
-      <author><first>Riza</first><last>Batista-Navarro</last><affiliation>Department of Computer Science, The University of Manchester</affiliation></author>
+      <author id="riza-theresa-batista-navarro"><first>Riza</first><last>Batista-Navarro</last><affiliation>Department of Computer Science, The University of Manchester</affiliation></author>
       <pages>100-110</pages>
       <abstract>Idiomatic expressions (or idioms) are phrases where the meaning of the phrase cannot be determined from the meaning of the individual words in the expression. Translating idioms between languages is therefore a challenging task. Transformer models based on contextual embeddings have advanced the state-of-the-art across many domains in the field of natural language processing. While research using transformers has advanced both idiom detection as well as idiom disambiguation, idiom translation has not seen a similar advancement. In this work, we investigate two approaches to fine-tuning a pretrained Text-to-Text Transfer Transformer (T5) model to perform idiom translation from English to German. The first approach directly translates English idiom-containing sentences to German, while the second is underpinned by idiom paraphrasing, firstly paraphrasing English idiomatic expressions to their simplified English versions before translating them to German. Results of our evaluation show that each of the approaches is able to generate adequate translations.</abstract>
       <url hash="ceba0ec5">2022.flp-1.14</url>
diff --git a/data/xml/2022.fnp.xml b/data/xml/2022.fnp.xml
index db0c3fd3b9..bbb5110c9f 100644
--- a/data/xml/2022.fnp.xml
+++ b/data/xml/2022.fnp.xml
@@ -21,7 +21,7 @@
       <title><fixed-case>F</fixed-case>in<fixed-case>RAD</fixed-case>: Financial Readability Assessment Dataset - 13,000+ Definitions of Financial Terms for Measuring Readability</title>
       <author><first>Sohom</first><last>Ghosh</last></author>
       <author><first>Shovon</first><last>Sengupta</last></author>
-      <author><first>Sudip</first><last>Naskar</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip</first><last>Naskar</last></author>
       <author><first>Sunny Kumar</first><last>Singh</last></author>
       <pages>1–9</pages>
       <abstract>In today’s world, the advancement and spread of the Internet and digitalization have resulted in most information being openly accessible. This holds true for financial services as well. Investors make data driven decisions by analysing publicly available information like annual reports of listed companies, details regarding asset allocation of mutual funds, etc. Many a time these financial documents contain unknown financial terms. In such cases, it becomes important to look at their definitions. However, not all definitions are equally readable. Readability largely depends on the structure, complexity and constituent terms that make up a definition. This brings in the need for automatically evaluating the readability of definitions of financial terms. This paper presents a dataset, FinRAD consisting of financial terms, their definitions and embeddings. In addition to standard readability scores (like “Flesch Reading Index (FRI)”, “Automated Readability Index (ARI)”, “SMOG Index Score (SIS)”,“Dale-Chall formula (DCF)”, etc.), it also contains the readability scores (AR) assigned based on sources from which the terms have been collected. We manually inspect a sample from it to ensure the quality of the assignment. Subsequently, we prove that the rule-based standard readability scores (like “Flesch Reading Index (FRI)”, “Automated Readability Index (ARI)”, “SMOG Index Score (SIS)”,“Dale-Chall formula (DCF)”, etc.) do not correlate well with the manually assigned binary readability scores of definitions of financial terms. Finally, we present a few neural baselines using transformer based architecture to automatically classify these definitions as readable or not. Pre-trained FinBERT model fine-tuned on FinRAD corpus performs the best (AU-ROC = 0.9927, F1 = 0.9610). This corpus can be downloaded from <url>https://github.com/sohomghosh/FinRAD_Financial_Readability_Assessment_Dataset</url>.</abstract>
@@ -76,13 +76,13 @@
       <author><first>Mahmoud</first><last>El-Haj</last></author>
       <author><first>Nadhem</first><last>Zmandar</last></author>
       <author><first>Paul</first><last>Rayson</last></author>
-      <author><first>Ahmed</first><last>AbuRa’ed</last></author>
+      <author id="ahmed-aburaed"><first>Ahmed</first><last>AbuRa’ed</last></author>
       <author><first>Marina</first><last>Litvak</last></author>
       <author><first>Nikiforos</first><last>Pittaras</last></author>
       <author><first>George</first><last>Giannakopoulos</last></author>
       <author><first>Aris</first><last>Kosmopoulos</last></author>
       <author><first>Blanca</first><last>Carbajo-Coronado</last></author>
-      <author><first>Antonio</first><last>Moreno-Sandoval</last></author>
+      <author id="antonio-moreno-sandoval"><first>Antonio</first><last>Moreno-Sandoval</last></author>
       <pages>43–52</pages>
       <abstract>This paper presents the results and findings of the Financial Narrative Summarisation Shared Task on summarising UK, Greek and Spanish annual reports. The shared task was organised as part of the Financial Narrative Processing 2022 Workshop (FNP 2022 Workshop). The Financial Narrative summarisation Shared Task (FNS-2022) has been running since 2020 as part of the Financial Narrative Processing (FNP) workshop series (El-Haj et al., 2022; El-Haj et al., 2021; El-Haj et al., 2020b; El-Haj et al., 2019c; El-Haj et al., 2018). The shared task included one main task which is the use of either abstractive or extractive automatic summarisers to summarise long documents in terms of UK, Greek and Spanish financial annual reports. This shared task is the third to target financial documents. The data for the shared task was created and collected from publicly available annual reports published by firms listed on the Stock Exchanges of UK, Greece and Spain. A total number of 14 systems from 7 different teams participated in the shared task.</abstract>
       <url hash="35121406">2022.fnp-1.6</url>
@@ -150,7 +150,7 @@
       <author><first>Sandra</first><last>Bellato</last></author>
       <author><first>Blanca</first><last>Carbajo Coronado</last></author>
       <author><first>Mahmoud</first><last>El-Haj</last></author>
-      <author><first>Ismail</first><last>El Maarouf</last></author>
+      <author id="ismail-el-maarouf"><first>Ismail</first><last>El Maarouf</last></author>
       <author><first>Mei</first><last>Gan</last></author>
       <author><first>Ana</first><last>Gisbert</last></author>
       <author><first>Antonio</first><last>Moreno Sandoval</last></author>
@@ -176,7 +176,7 @@
       <author><first>Cataldo</first><last>Musto</last></author>
       <author><first>Marco</first><last>DeGemmis</last></author>
       <author><first>Georgios</first><last>Lekkas</last></author>
-      <author><first>Giovanni</first><last>Semeraro</last></author>
+      <author id="giovanni-semeraro"><first>Giovanni</first><last>Semeraro</last></author>
       <pages>95–99</pages>
       <abstract>In this paper, we introduce the results of our submitted system to the FinTOC 2022 task. We address the task using a two-stage process: first, we detect titles using Document Image Analysis, then we train a supervised model for the hierarchical level prediction. We perform Document Image Analysis using a pre-trained Faster R-CNN on the PublyaNet dataset. We fine-tuned the model on the FinTOC 2022 training set. We extract orthographic and layout features from detected titles and use them to train a Random Forest model to predict the title level. The proposed system ranked #1 on both Title Detection and the Table of Content extraction tasks for Spanish. The system ranked #3 on both the two subtasks for English and French.</abstract>
       <url hash="99a30289">2022.fnp-1.14</url>
@@ -238,7 +238,7 @@
     <paper id="20">
       <title><fixed-case>LIPI</fixed-case> at <fixed-case>F</fixed-case>in<fixed-case>C</fixed-case>ausal 2022: Mining Causes and Effects from Financial Texts</title>
       <author><first>Sohom</first><last>Ghosh</last></author>
-      <author><first>Sudip</first><last>Naskar</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip</first><last>Naskar</last></author>
       <pages>121–123</pages>
       <abstract>While reading financial documents, investors need to know the causes and their effects. This empowers them to make data-driven decisions. Thus, there is a need to develop an automated system for extracting causes and their effects from financial texts using Natural Language Processing. In this paper, we present the approach our team LIPI followed while participating in the FinCausal 2022 shared task. This approach is based on the winning solution of the first edition of FinCausal held in the year 2020.</abstract>
       <url hash="dc079a5f">2022.fnp-1.20</url>
@@ -281,7 +281,7 @@
       <title><fixed-case>MNLP</fixed-case> at <fixed-case>F</fixed-case>in<fixed-case>C</fixed-case>ausal2022: Nested <fixed-case>NER</fixed-case> with a Generative Model</title>
       <author><first>Jooyeon</first><last>Lee</last></author>
       <author><first>Luan Huy</first><last>Pham</last></author>
-      <author><first>Özlem</first><last>Uzuner</last></author>
+      <author id="ozlem-uzuner"><first>Özlem</first><last>Uzuner</last></author>
       <pages>135–138</pages>
       <abstract>This paper describes work performed for the FinCasual 2022 Shared Task “Financial Document Causality Detection” (FinCausal 2022). As the name implies, the task involves extraction of casual and consequential elements from financial text. Our approach focuses employing Nested NER using the Text-to-Text Transformer (T5) generative transformer models while applying different combinations of datasets and tagging methods. Our system reports accuracy of 79% in Exact Match comparison and F-measure score of 92% token level measurement.</abstract>
       <url hash="98db4658">2022.fnp-1.24</url>
diff --git a/data/xml/2022.games.xml b/data/xml/2022.games.xml
index d265fb6e71..3226a24183 100644
--- a/data/xml/2022.games.xml
+++ b/data/xml/2022.games.xml
@@ -40,7 +40,7 @@
       <title>Less Text, More Visuals: Evaluating the Onboarding Phase in a <fixed-case>GWAP</fixed-case> for <fixed-case>NLP</fixed-case></title>
       <author><first>Fatima</first><last>Althani</last></author>
       <author><first>Chris</first><last>Madge</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>17–27</pages>
       <abstract>Games-with-a-purpose find attracting players a challenge. To improve player recruitment, we explored two game design elements that can increase player engagement during the onboarding phase; a narrative and a tutorial. In a qualitative study with 12 players of linguistic and language learning games, we examined the effect of presentation format on players’ engagement. Our reflexive thematic analysis found that in the onboarding phase of a GWAP for NLP, presenting players with visuals is expected and pre- senting too much text overwhelms them. Furthermore, players found that the instructions they were presented with lacked linguistic context. Additionally, the tutorial and game interface required refinement as the feedback is unsupportive and the graphics were not clear.</abstract>
       <url hash="22d564d4">2022.games-1.3</url>
diff --git a/data/xml/2022.gebnlp.xml b/data/xml/2022.gebnlp.xml
index 33813fe299..b21dbf05b2 100644
--- a/data/xml/2022.gebnlp.xml
+++ b/data/xml/2022.gebnlp.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the 4th Workshop on Gender Bias in Natural Language Processing (GeBNLP)</booktitle>
       <editor><first>Christian</first><last>Hardmeier</last></editor>
       <editor><first>Christine</first><last>Basta</last></editor>
-      <editor><first>Marta R.</first><last>Costa-jussà</last></editor>
+      <editor id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></editor>
       <editor><first>Gabriel</first><last>Stanovsky</last></editor>
       <editor><first>Hila</first><last>Gonen</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -23,7 +23,7 @@
       <title>Analyzing Hate Speech Data along Racial, Gender and Intersectional Axes</title>
       <author><first>Antonis</first><last>Maronikolakis</last></author>
       <author><first>Philip</first><last>Baader</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>1-7</pages>
       <abstract>To tackle the rising phenomenon of hate speech, efforts have been made towards data curation and analysis. When it comes to analysis of bias, previous work has focused predominantly on race. In our work, we further investigate bias in hate speech datasets along racial, gender and intersectional axes. We identify strong bias against African American English (AAE), masculine and AAE+Masculine tweets, which are annotated as disproportionately more hateful and offensive than from other demographics. We provide evidence that BERT-based models propagate this bias and show that balancing the training data for these protected attributes can lead to fairer models with regards to gender, but not race.</abstract>
       <url hash="9e71cf77">2022.gebnlp-1.1</url>
@@ -35,7 +35,7 @@
       <author><first>Jiali</first><last>Li</last></author>
       <author><first>Shucheng</first><last>Zhu</last></author>
       <author><first>Ying</first><last>Liu</last></author>
-      <author><first>Pengyuan</first><last>Liu</last></author>
+      <author id="pengyuan-liu"><first>Pengyuan</first><last>Liu</last></author>
       <pages>8-16</pages>
       <abstract>Gender is a construction in line with social perception and judgment. An important means of this construction is through languages. When natural language processing tools, such as word embeddings, associate gender with the relevant categories of social perception and judgment, it is likely to cause bias and harm to those groups that do not conform to the mainstream social perception and judgment. Using 12,251 Chinese word embeddings as intermedium, this paper studies the relationship between social perception and judgment categories and gender. The results reveal that these grammatical gender-neutral Chinese word embeddings show a certain gender bias, which is consistent with the mainstream society’s perception and judgment of gender. Men are judged by their actions and perceived as bad, easily-disgusted, bad-tempered and rational roles while women are judged by their appearances and perceived as perfect, either happy or sad, and emotional roles.</abstract>
       <url hash="378e2cb9">2022.gebnlp-1.2</url>
@@ -58,7 +58,7 @@
       <author><first>Lucy</first><last>Havens</last></author>
       <author><first>Melissa</first><last>Terras</last></author>
       <author><first>Benjamin</first><last>Bach</last></author>
-      <author><first>Beatrice</first><last>Alex</last></author>
+      <author id="beatrice-alex"><first>Beatrice</first><last>Alex</last></author>
       <pages>30-57</pages>
       <abstract>Mitigating harms from gender biased language in Natural Language Processing (NLP) systems remains a challenge, and the situated nature of language means bias is inescapable in NLP data. Though efforts to mitigate gender bias in NLP are numerous, they often vaguely define gender and bias, only consider two genders, and do not incorporate uncertainty into models. To address these limitations, in this paper we present a taxonomy of gender biased language and apply it to create annotated datasets. We created the taxonomy and annotated data with the aim of making gender bias in language transparent. If biases are communicated clearly, varieties of biased language can be better identified and measured. Our taxonomy contains eleven types of gender biases inclusive of people whose gender expressions do not fit into the binary conceptions of woman and man, and whose gender differs from that they were assigned at birth, while also allowing annotators to document unknown gender information. The taxonomy and annotated data will, in future work, underpin analysis and more equitable language model development.</abstract>
       <url hash="e3f1ba8b">2022.gebnlp-1.4</url>
@@ -85,7 +85,7 @@
     <paper id="6">
       <title>Gender Biases and Where to Find Them: Exploring Gender Bias in Pre-Trained Transformer-based Language Models Using Movement Pruning</title>
       <author><first>Przemyslaw</first><last>Joniak</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>67-73</pages>
       <abstract>Language model debiasing has emerged as an important field of study in the NLP community. Numerous debiasing techniques were proposed, but bias ablation remains an unaddressed issue. We demonstrate a novel framework for inspecting bias in pre-trained transformer-based language models via movement pruning. Given a model and a debiasing objective, our framework finds a subset of the model containing less bias than the original model. We implement our framework by pruning the model while fine-tuning it on the debasing objective. Optimized are only the pruning scores – parameters coupled with the model’s weights that act as gates. We experiment with pruning attention heads, an important building block of transformers: we prune square blocks, as well as establish a new way of pruning the entire heads. Lastly, we demonstrate the usage of our framework using gender bias, and based on our findings, we propose an improvement to an existing debiasing method. Additionally, we re-discover a bias-performance trade-off: the better the model performs, the more bias it contains.</abstract>
       <url hash="e89343e3">2022.gebnlp-1.6</url>
@@ -120,7 +120,7 @@
       <author><first>Afra Feyza</first><last>Akyürek</last></author>
       <author><first>Muhammed Yusuf</first><last>Kocyigit</last></author>
       <author><first>Sejin</first><last>Paik</last></author>
-      <author><first>Derry Tanti</first><last>Wijaya</last></author>
+      <author id="derry-tanti-wijaya"><first>Derry Tanti</first><last>Wijaya</last></author>
       <pages>76-76</pages>
       <abstract>Researchers have devised numerous ways to quantify social biases vested in pretrained language models. As some language models are capable of generating coherent completions given a set of textual prompts, several prompting datasets have been proposed to measure biases between social groups—posing language generation as a way of identifying biases. In this opinion paper, we analyze how specific choices of prompt sets, metrics, automatic tools and sampling strategies affect bias results. We find out that the practice of measuring biases through text completion is prone to yielding contradicting results under different experiment settings. We additionally provide recommendations for reporting biases in open-ended language generation for a more complete outlook of biases exhibited by a given language model. Code to reproduce the results is released under <url>https://github.com/feyzaakyurek/bias-textgen</url>.</abstract>
       <url hash="61f2116c">2022.gebnlp-1.9</url>
@@ -145,7 +145,7 @@
       <author><first>Michael Henry</first><last>Tessler</last></author>
       <author><first>Nicole</first><last>Dubosh</last></author>
       <author><first>Katherine</first><last>Hiller</last></author>
-      <author><first>Roger</first><last>Levy</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
       <pages>86-93</pages>
       <abstract>Though approximately 50% of medical school graduates today are women, female physicians tend to be underrepresented in senior positions, make less money than their male counterparts and receive fewer promotions. There is a growing body of literature demonstrating gender bias in various forms of evaluation in medicine, but this work was mainly conducted by looking for specific words using fixed dictionaries such as LIWC and focused on global assessments of performance such as recommendation letters. We use a dataset of written and quantitative assessments of medical student performance on individual shifts of work, collected across multiple institutions, to investigate the extent to which gender bias exists in a day-to-day context for medical students. We investigate differences in the narrative comments given to male and female students by both male or female faculty assessors, using a fine-tuned BERT model. This allows us to examine whether groups are written about in systematically different ways, without relying on hand-crafted wordlists or topic models. We compare these results to results from the traditional LIWC method and find that, although we find no evidence of group-level gender bias in this dataset, terms related to family and children are used more in feedback given to women.</abstract>
       <url hash="35e03906">2022.gebnlp-1.11</url>
@@ -157,7 +157,7 @@
       <author><first>Beatrice</first><last>Savoldi</last></author>
       <author><first>Marco</first><last>Gaido</last></author>
       <author><first>Luisa</first><last>Bentivogli</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <pages>94-111</pages>
       <abstract>Due to the complexity of bias and the opaque nature of current neural approaches, there is a rising interest in auditing language technologies. In this work, we contribute to such a line of inquiry by exploring the emergence of gender bias in Speech Translation (ST). As a new perspective, rather than focusing on the final systems only, we examine their evolution over the course of training. In this way, we are able to account for different variables related to the learning dynamics of gender translation, and investigate when and how gender divides emerge in ST. Accordingly, for three language pairs (en ? es, fr, it) we compare how ST systems behave for masculine and feminine translation at several levels of granularity. We find that masculine and feminine curves are dissimilar, with the feminine one being characterized by more erratic behaviour and late improvements over the course of training. Also, depending on the considered phenomena, their learning trends can be either antiphase or parallel. Overall, we show how such a progressive analysis can inform on the reliability and time-wise acquisition of gender, which is concealed by static evaluations and standard metrics.</abstract>
@@ -258,7 +258,7 @@
     <paper id="21">
       <title>Occupational Biases in <fixed-case>N</fixed-case>orwegian and Multilingual Language Models</title>
       <author><first>Samia</first><last>Touileb</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <author><first>Erik</first><last>Velldal</last></author>
       <pages>200-211</pages>
       <abstract>In this paper we explore how a demographic distribution of occupations, along gender dimensions, is reflected in pre-trained language models. We give a descriptive assessment of the distribution of occupations, and investigate to what extent these are reflected in four Norwegian and two multilingual models. To this end, we introduce a set of simple bias probes, and perform five different tasks combining gendered pronouns, first names, and a set of occupations from the Norwegian statistics bureau. We show that language specific models obtain more accurate results, and are much closer to the real-world distribution of clearly gendered occupations. However, we see that none of the models have correct representations of the occupations that are demographically balanced between genders. We also discuss the importance of the training data on which the models were trained on, and argue that template-based bias probes can sometimes be fragile, and a simple alteration in a template can change a model’s behavior.</abstract>
@@ -285,7 +285,7 @@
     <paper id="23">
       <title><fixed-case>H</fixed-case>etero<fixed-case>C</fixed-case>orpus: A Corpus for Heteronormative Language Detection</title>
       <author><first>Juan</first><last>Vásquez</last></author>
-      <author><first>Gemma</first><last>Bel-Enguix</last></author>
+      <author id="gemma-bel-enguix"><first>Gemma</first><last>Bel-Enguix</last></author>
       <author><first>Scott Thomas</first><last>Andersen</last></author>
       <author><first>Sergio-Luis</first><last>Ojeda-Trueba</last></author>
       <pages>225-234</pages>
@@ -302,7 +302,7 @@
       <author><first>Ashley</first><last>Oh</last></author>
       <author><first>Sanika</first><last>Natu</last></author>
       <author><first>Swetha</first><last>Gangu</last></author>
-      <author><first>Alan W.</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W.</first><last>Black</last></author>
       <author><first>Emma</first><last>Strubell</last></author>
       <pages>235-243</pages>
       <abstract>Films are a rich source of data for natural language processing. OpenSubtitles (Lison and Tiedemann, 2016) is a popular movie script dataset, used for training models for tasks such as machine translation and dialogue generation. However, movies often contain biases that reflect society at the time, and these biases may be introduced during pre-training and influence downstream models. We perform sentiment analysis on template infilling (Kurita et al., 2019) and the Sentence Embedding Association Test (May et al., 2019) to measure how BERT-based language models change after continued pre-training on OpenSubtitles. We consider gender bias as a primary motivating case for this analysis, while also measuring other social biases such as disability. We show that sentiment analysis on template infilling is not an effective measure of bias due to the rarity of disability and gender identifying tokens in the movie dialogue. We extend our analysis to a longitudinal study of bias in film dialogue over the last 110 years and find that continued pre-training on OpenSubtitles encodes additional bias into BERT. We show that BERT learns associations that reflect the biases and representation of each film era, suggesting that additional care must be taken when using historical data.</abstract>
@@ -337,7 +337,7 @@
       <author><first>Jaimeen</first><last>Ahn</last></author>
       <author><first>Hwaran</first><last>Lee</last></author>
       <author><first>Jinhwa</first><last>Kim</last></author>
-      <author><first>Alice</first><last>Oh</last></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last></author>
       <pages>266-272</pages>
       <abstract>Knowledge distillation is widely used to transfer the language understanding of a large model to a smaller model. However, after knowledge distillation, it was found that the smaller model is more biased by gender compared to the source large model. This paper studies what causes gender bias to increase after the knowledge distillation process. Moreover, we suggest applying a variant of the mixup on knowledge distillation, which is used to increase generalizability during the distillation process, not for augmentation. By doing so, we can significantly reduce the gender bias amplification after knowledge distillation. We also conduct an experiment on the GLUE benchmark to demonstrate that even if the mixup is applied, it does not have a significant adverse effect on the model’s performance.</abstract>
       <url hash="3933cfd4">2022.gebnlp-1.27</url>
diff --git a/data/xml/2022.gem.xml b/data/xml/2022.gem.xml
index aad765c423..a4a30eaf7f 100644
--- a/data/xml/2022.gem.xml
+++ b/data/xml/2022.gem.xml
@@ -25,8 +25,8 @@
     <paper id="1">
       <title>Improving abstractive summarization with energy-based re-ranking</title>
       <author><first>Diogo</first><last>Pernes</last></author>
-      <author><first>Afonso</first><last>Mendes</last></author>
-      <author><first>André F. T.</first><last>Martins</last></author>
+      <author id="alfonso-mendes"><first>Afonso</first><last>Mendes</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
       <pages>1-17</pages>
       <abstract>Current abstractive summarization systems present important weaknesses which prevent their deployment in real-world applications, such as the omission of relevant information and the generation of factual inconsistencies (also known as hallucinations). At the same time, automatic evaluation metrics such as CTC scores (Deng et al., 2021) have been recently proposed that exhibit a higher correlation with human judgments than traditional lexical-overlap metrics such as ROUGE. In this work, we intend to close the loop by leveraging the recent advances in summarization metrics to create quality-aware abstractive summarizers. Namely, we propose an energy-based model that learns to re-rank summaries according to one or a combination of these metrics. We experiment using several metrics to train our energy-based re-ranker and show that it consistently improves the scores achieved by the predicted summaries. Nonetheless, human evaluation results show that the re-ranking approach should be used with care for highly abstractive summaries, as the available metrics are not yet sufficiently reliable for this purpose.</abstract>
       <url hash="aae3c275">2022.gem-1.1</url>
@@ -67,8 +67,8 @@
       <author><first>Kaamraan</first><last>Khan</last></author>
       <author><first>Avinash Kumar</first><last>Singh</last></author>
       <author><first>Subhasish</first><last>Ghosh</last></author>
-      <author><first>Tapas</first><last>Nayak</last></author>
-      <author><first>Girish</first><last>Palshikar</last></author>
+      <author id="tapas-nayak"><first>Tapas</first><last>Nayak</last></author>
+      <author id="girish-palshikar"><first>Girish</first><last>Palshikar</last></author>
       <author><first>Indrajit</first><last>Bhattacharya</last></author>
       <pages>43-53</pages>
       <abstract>We explore the task of automated generation of technical interview questions from a given textbook. Such questions are different from those for reading comprehension studied in question generation literature. We curate a context based interview questions data set for Machine Learning and Deep Learning from two popular textbooks. We first explore the possibility of using a large generative language model (GPT-3) for this task in a zero shot setting. We then evaluate the performance of smaller generative models such as BART fine-tuned on weakly supervised data obtained using GPT-3 and hand-crafted templates. We deploy an automatic question importance assignment technique to figure out suitability of a question in a technical interview. It improves the evaluation results in many dimensions. We dissect the performance of these models for this task and also scrutinize the suitability of questions generated by them for use in technical interviews.</abstract>
@@ -107,7 +107,7 @@
       <author><first>John</first><last>Glover</last></author>
       <author><first>Federico</first><last>Fancellu</last></author>
       <author><first>Vasudevan</first><last>Jagannathan</last></author>
-      <author><first>Matthew R.</first><last>Gormley</last></author>
+      <author id="matthew-r-gormley"><first>Matthew R.</first><last>Gormley</last></author>
       <author><first>Thomas</first><last>Schaaf</last></author>
       <pages>97-105</pages>
       <abstract>Scoring the factuality of a generated summary involves measuring the degree to which a target text contains factual information using the input document as support. Given the similarities in the problem formulation, previous work has shown that Natural Language Inference models can be effectively repurposed to perform this task. As these models are trained to score entailment at a sentence level, several recent studies have shown that decomposing either the input document or the summary into sentences helps with factuality scoring. But is fine-grained decomposition always a winning strategy? In this paper we systematically compare different granularities of decomposition - from document to sub-sentence level, and we show that the answer is no. Our results show that incorporating additional context can yield improvement, but that this does not necessarily apply to all datasets. We also show that small changes to previously proposed entailment-based scoring methods can result in better performance, highlighting the need for caution in model and methodology selection for downstream tasks.</abstract>
@@ -170,7 +170,7 @@
       <author><first>Eduardo</first><last>Calò</last></author>
       <author><first>Elze</first><last>van der Werf</last></author>
       <author><first>Albert</first><last>Gatt</last></author>
-      <author><first>Kees</first><last>van Deemter</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
       <pages>148-171</pages>
       <abstract>Logic-to-text generation is an important yet underrepresented area of natural language generation (NLG). In particular, most previous works on this topic lack sound evaluation. We address this limitation by building and evaluating a system that generates high-quality English text given a first-order logic (FOL) formula as input. We start by analyzing the performance of Ranta (2011)’s system. Based on this analysis, we develop an extended version of the system, which we name LoLa, that performs formula simplification based on logical equivalences and syntactic transformations. We carry out an extensive evaluation of LoLa using standard automatic metrics and human evaluation. We compare the results against a baseline and Ranta (2011)’s system. The results show that LoLa outperforms the other two systems in most aspects.</abstract>
       <url hash="361c3334">2022.gem-1.13</url>
@@ -184,7 +184,7 @@
       <author><first>Štěpán Lars</first><last>Laichter</last></author>
       <author><first>Arabella</first><last>Sinclair</last></author>
       <author><first>Margot</first><last>van der Goot</last></author>
-      <author><first>Raquel</first><last>Fernandez</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernandez</last></author>
       <author><first>Sandro</first><last>Pezzelle</last></author>
       <pages>172-188</pages>
       <abstract>To be trusted and perceived as natural and coherent, conversational systems must adapt to the language of their users. While personalized dialogue is a promising direction, controlling generation for fine-grained language features remains a challenge in this approach. A recent line of research showed the effectiveness of leveraging pre-trained language models toward adapting to a text’s topic or sentiment. In this study, we build on these approaches and focus on a higher-level dimension of language variation: speakers’ age. We frame the task as a dialogue response generation, and test methods based on bag-of-words (BoW) and neural discriminators (Disc) to condition the output of GPT-2 and DialoGPT without altering the parameters of the language models. We show that Disc models achieve a higher degree of detectable control than BoW models based on automatic evaluation. In contrast, humans can partially detect age differences in BoW but not Disc responses. Since BoW responses are deemed better than Disc ones by humans, simple controllable methods thus appear to be a better tradeoff between adaptation and language quality. Our work confirms the challenges of adapting to higher-level dimensions of language variation. Moreover, it highlights the need to evaluate natural language generation thoroughly.</abstract>
@@ -260,7 +260,7 @@
     <paper id="21">
       <title>Unsupervised Token-level Hallucination Detection from Summary Generation By-products</title>
       <author><first>Andreas</first><last>Marfurt</last></author>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <pages>248-261</pages>
       <abstract>Hallucinations in abstractive summarization are model generations that are unfaithful to the source document. Current methods for detecting hallucinations operate mostly on noun phrases and named entities, and restrict themselves to the XSum dataset, which is known to have hallucinations in 3 out of 4 training examples (Maynez et al., 2020). We instead consider the CNN/DailyMail dataset where the summarization model has not seen abnormally many hallucinations during training. We automatically detect candidate hallucinations at the token level, irrespective of its part of speech. Our detection comes essentially for free, as we only use information the model already produces during generation of the summary. This enables practitioners to jointly generate a summary and identify possible hallucinations, with minimal overhead. We repurpose an existing factuality dataset and create our own token-level annotations. The evaluation on these two datasets shows that our model achieves better precision-recall tradeoffs than its competitors, which additionally require a model forward pass.</abstract>
       <url hash="bd53fc74">2022.gem-1.21</url>
@@ -272,8 +272,8 @@
       <author><first>Andreas</first><last>Marfurt</last></author>
       <author><first>Ashley</first><last>Thornton</last></author>
       <author><first>David</first><last>Sylvan</last></author>
-      <author><first>Lonneke</first><last>van der Plas</last></author>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="lonneke-van-der-plas"><first>Lonneke</first><last>van der Plas</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <pages>262-275</pages>
       <abstract>A wide variety of tasks have been framed as text-to-text tasks to allow processing by sequence-to-sequence models. We propose a new task of generating a semi-structured interpretation of a source document. The interpretation is semi-structured in that it contains mandatory and optional fields with free-text information. This structure is surfaced by human annotations, which we standardize and convert to text format. We then propose an evaluation technique that is generally applicable to any such semi-structured annotation, called equivalence classes evaluation. The evaluation technique is efficient and scalable; it creates a large number of evaluation instances from a comparably cheap clustering of the free-text information by domain experts. For our task, we release a dataset about the monetary policy of the Federal Reserve. On this corpus, our evaluation shows larger differences between pretrained models than standard text generation metrics.</abstract>
       <url hash="1b1be9ac">2022.gem-1.22</url>
@@ -320,8 +320,8 @@
     </paper>
     <paper id="26">
       <title>On reporting scores and agreement for error annotation tasks</title>
-      <author><first>Maja</first><last>Popović</last></author>
-      <author><first>Anya</first><last>Belz</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
+      <author id="anja-belz"><first>Anya</first><last>Belz</last></author>
       <pages>306-315</pages>
       <abstract>This work examines different ways of aggregating scores for error annotation in MT outputs: raw error counts, error counts normalised over total number of words (word percentage’), and error counts normalised over total number of errors (error percentage’). We use each of these three scores to calculate inter-annotator agreement in the form of Krippendorff’s <tex-math>alpha</tex-math> and Pearson’s <tex-math>r</tex-math> and compare the obtained numbers, overall and separately for different types of errors. While each score has its advantages depending on the goal of the evaluation, we argue that the best way of estimating inter-annotator agreement using such numbers are raw counts. If the annotation process ensures that the total number of words cannot differ among the annotators (for example, due to adding omission symbols), normalising over number of words will lead to the same conclusions. In contrast, total number of errors is very subjective because different annotators often perceive different amount of errors in the same text, therefore normalising over this number can indicate lower agreements.</abstract>
       <url hash="9d157754">2022.gem-1.26</url>
@@ -360,7 +360,7 @@
     </paper>
     <paper id="29">
       <title>Most <fixed-case>NLG</fixed-case> is Low-Resource: here’s what we can do about it</title>
-      <author><first>David M.</first><last>Howcroft</last></author>
+      <author id="david-m-howcroft"><first>David M.</first><last>Howcroft</last></author>
       <author><first>Dimitra</first><last>Gkatzia</last></author>
       <pages>336-350</pages>
       <abstract>Many domains and tasks in natural language generation (NLG) are inherently ‘low-resource’, where training data, tools and linguistic analyses are scarce. This poses a particular challenge to researchers and system developers in the era of machine-learning-driven NLG. In this position paper, we initially present the challenges researchers &amp; developers often encounter when dealing with low-resource settings in NLG. We then argue that it is unsustainable to collect large aligned datasets or build large language models from scratch for every possible domain due to cost, labour, and time constraints, so researching and developing methods and resources for low-resource settings is vital. We then discuss current approaches to low-resource NLG, followed by proposed solutions and promising avenues for future work in NLG for low-resource settings.</abstract>
@@ -397,7 +397,7 @@
     <paper id="33">
       <title>A Survey of Recent Error Annotation Schemes for Automatically Generated Text</title>
       <author><first>Rudali</first><last>Huidrom</last></author>
-      <author><first>Anya</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anya</first><last>Belz</last></author>
       <pages>383-398</pages>
       <abstract>While automatically computing numerical scores remains the dominant paradigm in NLP system evaluation, error analysis is receiving increasing attention, with numerous error annotation schemes being proposed for automatically generated text. However, there is little agreement about what error annotation schemes should look like, how many different types of errors should be distinguished and at what level of granularity. In this paper, our aim is to map out recent work on annotating errors in automatically generated text, with a particular focus on error taxonomies. We describe our systematic paper selection process, and survey the error annotation schemes reported in the papers, drawing out similarities and differences between them. Finally, we characterise the issues that would make it difficult to move from the current situation to a standardised error taxonomy for annotating errors in automatically generated text.</abstract>
       <url hash="0d028163">2022.gem-1.33</url>
@@ -408,7 +408,7 @@
     <paper id="34">
       <title>What’s in a (dataset’s) name? The case of <fixed-case>B</fixed-case>ig<fixed-case>P</fixed-case>atent</title>
       <author><first>Silvia</first><last>Casola</last></author>
-      <author><first>Alberto</first><last>Lavelli</last></author>
+      <author id="alberto-lavelli"><first>Alberto</first><last>Lavelli</last></author>
       <author><first>Horacio</first><last>Saggion</last></author>
       <pages>399-404</pages>
       <abstract>Sharing datasets and benchmarks has been crucial for rapidly improving Natural Language Processing models and systems. Documenting datasets’ characteristics (and any modification introduced over time) is equally important to avoid confusion and make comparisons reliable. Here, we describe the case of BigPatent, a dataset for patent summarization that exists in at least two rather different versions under the same name. While previous literature has not clearly distinguished among versions, their differences do not only lay on a surface level but also modify the dataset’s core nature and, thus, the complexity of the summarization task. While this paper describes a specific case, we aim to shed light on new challenges that might emerge in resource sharing and advocate for comprehensive documentation of datasets and models.</abstract>
@@ -458,7 +458,7 @@
       <author><first>Hwanhee</first><last>Lee</last></author>
       <author><first>Cheoneum</first><last>Park</last></author>
       <author><first>Seunghyun</first><last>Yoon</last></author>
-      <author><first>Trung</first><last>Bui</last></author>
+      <author id="trung-bui"><first>Trung</first><last>Bui</last></author>
       <author><first>Franck</first><last>Dernoncourt</last></author>
       <author><first>Juae</first><last>Kim</last></author>
       <author><first>Kyomin</first><last>Jung</last></author>
@@ -485,7 +485,7 @@
     <paper id="43">
       <title>Error Analysis of <fixed-case>T</fixed-case>o<fixed-case>TT</fixed-case>o Table-to-Text Neural <fixed-case>NLG</fixed-case> Models</title>
       <author><first>Barkavi</first><last>Sundararajan</last></author>
-      <author><first>Somayajulu</first><last>Sripada</last></author>
+      <author id="somayajulu-sripada"><first>Somayajulu</first><last>Sripada</last></author>
       <author><first>Ehud</first><last>Reiter</last></author>
       <pages>456-470</pages>
       <abstract>We report error analysis of outputs from seven Table-to-Text generation models fine-tuned on ToTTo, an open-domain English language dataset. A manual error annotation of a subset of outputs (a total of 5,278 sentences) belonging to the topic of Politics generated by these seven models has been carried out. Our error annotation focused on eight categories of errors. The error analysis shows that more than 45% of sentences from each of the seven models have been error-free. It uncovered some of the specific classes of errors such as WORD errors that are the dominant errors in all the seven models, NAME and NUMBER errors are more committed by two of the GeM benchmark models, whereas DATE-DIMENSION and OTHER category of errors are more common in our Table-to-Text models.</abstract>
@@ -524,7 +524,7 @@
       <author><first>Maxime</first><last>De Bruyn</last></author>
       <author><first>Ehsan</first><last>Lotfi</last></author>
       <author><first>Jeska</first><last>Buhmann</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>494-508</pages>
       <abstract>What do language models know about our world? This question is hard to answer but important to get right. To this end, we introduce 20Q, a novel benchmark using the Twenty Questions game to evaluate world knowledge and common sense of language models. Thanks to our overlap-free benchmark, language models learn the game of Twenty Questions without learning relevant knowledge for the test set. We uncover two intuitive factors influencing the world knowledge of language models: the size of the model and the topic frequency in the pre-training data. Moreover, we show that in-context learning is inefficient for evaluating language models’ world knowledge — fine-tuning is necessary to show their true capabilities. Lastly, our results show room for improvement to enhance the world knowledge and common sense of large language models. A potential solution would be to up-sample unfrequent topics in the pre-training of language models.</abstract>
       <url hash="93a83cfe">2022.gem-1.46</url>
@@ -536,7 +536,7 @@
       <author><first>Ehsan</first><last>Lotfi</last></author>
       <author><first>Maxime</first><last>De Bruyn</last></author>
       <author><first>Jeska</first><last>Buhmann</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>509-519</pages>
       <abstract>Generative conversational agents are known to suffer from problems like inconsistency and hallucination, and a big challenge in studying these issues remains evaluation: they are not properly reflected in common text generation metrics like perplexity or BLEU, and alternative implicit methods like semantic similarity or NLI labels can be misguided when few specific tokens are decisive. In this work we propose ConsisTest; a factual consistency benchmark including both WH and Y/N questions based on PersonaChat, along with a hybrid evaluation pipeline which aims to get the best of symbolic and sub-symbolic methods. Using these and focusing on pretrained generative models like BART, we provide detailed statistics and analysis on how the model’s consistency is affected by variations in question and context.</abstract>
       <url hash="9e3158cd">2022.gem-1.47</url>
@@ -557,7 +557,7 @@
     </paper>
     <paper id="49">
       <title>Exploring a <fixed-case>POS</fixed-case>-based Two-stage Approach for Improving Low-Resource <fixed-case>AMR</fixed-case>-to-Text Generation</title>
-      <author><first>Marco Antonio</first><last>Sobrevilla Cabezudo</last></author>
+      <author id="marco-antonio-sobrevilla-cabezudo"><first>Marco Antonio</first><last>Sobrevilla Cabezudo</last></author>
       <author><first>Thiago</first><last>Pardo</last></author>
       <pages>531-538</pages>
       <abstract>This work presents a two-stage approach for tackling low-resource AMR-to-text generation for Brazilian Portuguese. Our approach consists of (1) generating a masked surface realization in which some tokens are masked according to its Part-of-Speech class and (2) infilling the masked tokens according to the AMR graph and the previous masked surface realization. Results show a slight improvement over the baseline, mainly in BLEU (1.63) and METEOR (0.02) scores. Moreover, we evaluate the pipeline components separately, showing that the bottleneck of the pipeline is the masked surface realization. Finally, the human evaluation suggests that models still suffer from hallucinations, and some strategies to deal with the problems found are proposed.</abstract>
@@ -581,7 +581,7 @@
       <author><first>Daniel</first><last>King</last></author>
       <author><first>Zejiang</first><last>Shen</last></author>
       <author><first>Nishant</first><last>Subramani</last></author>
-      <author><first>Daniel S.</first><last>Weld</last></author>
+      <author id="daniel-s-weld"><first>Daniel S.</first><last>Weld</last></author>
       <author><first>Iz</first><last>Beltagy</last></author>
       <author><first>Doug</first><last>Downey</last></author>
       <pages>555-571</pages>
diff --git a/data/xml/2022.gwll.xml b/data/xml/2022.gwll.xml
index f92a1ce49b..d278d6b30d 100644
--- a/data/xml/2022.gwll.xml
+++ b/data/xml/2022.gwll.xml
@@ -40,7 +40,7 @@
       <author><first>Katerina</first><last>Gkirtzou</last></author>
       <author><first>Maxim</first><last>Ionov</last></author>
       <author><first>Besim</first><last>Kabashi</last></author>
-      <author><first>Fahad</first><last>Khan</last></author>
+      <author id="fahad-khan"><first>Fahad</first><last>Khan</last></author>
       <author><first>Ciprian-Octavian</first><last>Truică</last></author>
       <pages>10–18</pages>
       <abstract>Following presentations of frequency and attestations, and embeddings and distributional similarity, this paper introduces the third cornerstone of the emerging OntoLex module for Frequency, Attestation and Corpus-based Information, OntoLex-FrAC. We provide an RDF vocabulary for collocations, established as a consensus over contributions from five different institutions and numerous data sets, with the goal of eliciting feedback from reviewers, workshop audience and the scientific community in preparation of the final consolidation of the OntoLex-FrAC module, whose publication as a W3C community report is foreseen for the end of this year. The novel collocation component of OntoLex-FrAC is described in application to a lexicographic resource and corpus-based collocation scores available from the web, and finally, we demonstrate the capability and genericity of the model by showing how to retrieve and aggregate collocation information by means of SPARQL, and its export to a tabular format, so that it can be easily processed in downstream applications.</abstract>
diff --git a/data/xml/2022.hcinlp.xml b/data/xml/2022.hcinlp.xml
index 1d36e34f2e..8d7eeb97ef 100644
--- a/data/xml/2022.hcinlp.xml
+++ b/data/xml/2022.hcinlp.xml
@@ -4,11 +4,11 @@
     <meta>
       <booktitle>Proceedings of the Second Workshop on Bridging Human--Computer Interaction and Natural Language Processing</booktitle>
       <editor><first>Su Lin</first><last>Blodgett</last></editor>
-      <editor><first>Hal</first><last>Daumé III</last></editor>
+      <editor id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></editor>
       <editor><first>Michael</first><last>Madaio</last></editor>
       <editor><first>Ani</first><last>Nenkova</last></editor>
       <editor><first>Brendan</first><last>O'Connor</last></editor>
-      <editor><first>Hanna</first><last>Wallach</last></editor>
+      <editor id="hanna-wallach"><first>Hanna</first><last>Wallach</last></editor>
       <editor><first>Qian</first><last>Yang</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Seattle, Washington</address>
@@ -63,7 +63,7 @@
     </paper>
     <paper id="3">
       <title>Design Considerations for an <fixed-case>NLP</fixed-case>-Driven Empathy and Emotion Interface for Clinician Training via Telemedicine</title>
-      <author><first>Roxana</first><last>Girju</last></author>
+      <author id="roxana-girju"><first>Roxana</first><last>Girju</last></author>
       <author><first>Marina</first><last>Girju</last></author>
       <pages>21-27</pages>
       <abstract>As digital social platforms and mobile technologies become more prevalent and robust, the use of Artificial Intelligence (AI) in facilitating human communication will grow. This, in turn, will encourage development of intuitive, adaptive, and effective empathic AI interfaces that better address the needs of socially and culturally diverse communities. In this paper, we present several design considerations of an intelligent digital interface intended to guide the clinicians toward more empathetic communication. This approach allows various communities of practice to investigate how AI, on one side, and human communication and healthcare needs, on the other, can contribute to each other’s development.</abstract>
@@ -87,7 +87,7 @@
       <author><first>Erin</first><last>Pacquetet</last></author>
       <author><first>Sougata</first><last>Saha</last></author>
       <author><first>Souvik</first><last>Das</last></author>
-      <author><first>Rohini</first><last>Srihari</last></author>
+      <author id="rohini-k-srihari"><first>Rohini</first><last>Srihari</last></author>
       <pages>34-39</pages>
       <abstract>This paper analyzes data from the 2021 Amazon Alexa Prize Socialbot Grand Challenge 4, in order to better understand the differences between human-computer interactions (HCI) in a socialbot setting and conventional human-to-human interactions. We find that because socialbots are a new genre of HCI, we are still negotiating norms to guide interactions in this setting. We present several notable patterns in user behavior toward socialbots, which have important implications for guiding future work in the development of conversational agents.</abstract>
       <url hash="10a4db4d">2022.hcinlp-1.5</url>
diff --git a/data/xml/2022.humeval.xml b/data/xml/2022.humeval.xml
index 4c8a06cd8e..b141a1929f 100644
--- a/data/xml/2022.humeval.xml
+++ b/data/xml/2022.humeval.xml
@@ -3,8 +3,8 @@
   <volume id="1" ingest-date="2022-05-15" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 2nd Workshop on Human Evaluation of NLP Systems (HumEval)</booktitle>
-      <editor><first>Anya</first><last>Belz</last></editor>
-      <editor><first>Maja</first><last>Popović</last></editor>
+      <editor id="anja-belz"><first>Anya</first><last>Belz</last></editor>
+      <editor id="maja-popovic"><first>Maja</first><last>Popović</last></editor>
       <editor><first>Ehud</first><last>Reiter</last></editor>
       <editor><first>Anastasia</first><last>Shimorina</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -57,10 +57,10 @@
     </paper>
     <paper id="4">
       <title>Human evaluation of web-crawled parallel corpora for machine translation</title>
-      <author><first>Gema</first><last>Ramírez-Sánchez</last></author>
+      <author id="gema-ramirez-sanchez"><first>Gema</first><last>Ramírez-Sánchez</last></author>
       <author><first>Marta</first><last>Bañón</last></author>
       <author><first>Jaume</first><last>Zaragoza-Bernabeu</last></author>
-      <author><first>Sergio</first><last>Ortiz Rojas</last></author>
+      <author id="sergio-ortiz-rojas"><first>Sergio</first><last>Ortiz Rojas</last></author>
       <pages>32-41</pages>
       <abstract>Quality assessment has been an ongoing activity of the series of ParaCrawl efforts to crawl massive amounts of parallel data from multilingual websites for 29 languages. The goal of ParaCrawl is to get parallel data that is good for machine translation. To prove so, both, automatic (extrinsic) and human (intrinsic and extrinsic) evaluation tasks have been included as part of the quality assessment activity of the project. We sum up the various methods followed to address these evaluation tasks for the web-crawled corpora produced and their results. We review their advantages and disadvantages for the final goal of the ParaCrawl project and the related ongoing project MaCoCu.</abstract>
       <url hash="141f5f3f">2022.humeval-1.4</url>
diff --git a/data/xml/2022.icnlsp.xml b/data/xml/2022.icnlsp.xml
index a72072566a..2c004717ba 100644
--- a/data/xml/2022.icnlsp.xml
+++ b/data/xml/2022.icnlsp.xml
@@ -19,7 +19,7 @@
     <paper id="1">
       <title>Error correction and extraction in request dialogs</title>
       <author><first>Stefan</first><last>Constantin</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>2–11</pages>
       <url hash="97519347">2022.icnlsp-1.1</url>
       <bibkey>constantin-waibel-2022-error</bibkey>
@@ -37,7 +37,7 @@
       <author><first>Hongru</first><last>Wang</last></author>
       <author><first>Mingyu</first><last>Cui</last></author>
       <author><first>Zimo</first><last>Zhou</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <pages>19–29</pages>
       <url hash="eb35e388">2022.icnlsp-1.3</url>
       <bibkey>wang-etal-2022-topicrefine</bibkey>
@@ -47,7 +47,7 @@
       <author><first>Zezhong</first><last>Wang</last></author>
       <author><first>Hongru</first><last>Wang</last></author>
       <author><first>Wai Chung</first><last>Kwan</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <pages>30–39</pages>
       <url hash="98e3f9ab">2022.icnlsp-1.4</url>
       <bibkey>wang-etal-2022-prior</bibkey>
@@ -75,8 +75,8 @@
     <paper id="7">
       <title>Improving <fixed-case>NL</fixed-case>-to-Query Systems through Re-ranking of Semantic Hypothesis</title>
       <author><first>Pius</first><last>von Däniken</last></author>
-      <author><first>Jan</first><last>Deriu</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="jan-milan-deriu"><first>Jan</first><last>Deriu</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <author><first>Ursin</first><last>Brunner</last></author>
       <author><first>Mark</first><last>Cieliebak</last></author>
       <author><first>Kurt</first><last>Stockinger</last></author>
@@ -103,7 +103,7 @@
     <paper id="10">
       <title>Performance of two <fixed-case>F</fixed-case>rench <fixed-case>BERT</fixed-case> models for <fixed-case>F</fixed-case>rench language on verbatim transcripts and online posts</title>
       <author><first>Emmanuelle</first><last>Kelodjoue</last></author>
-      <author><first>Jérôme</first><last>Goulian</last></author>
+      <author id="jerome-goulian"><first>Jérôme</first><last>Goulian</last></author>
       <author><first>Didier</first><last>Schwab</last></author>
       <pages>88–94</pages>
       <url hash="aea1e260">2022.icnlsp-1.10</url>
@@ -181,7 +181,7 @@
     <paper id="19">
       <title>Comparison of Token- and Character-Level Approaches to Restoration of Spaces, Punctuation, and Capitalization in Various Languages</title>
       <author><first>Laurence</first><last>Dyer</last></author>
-      <author><first>Anthony</first><last>Hughes</last></author>
+      <author id="anthony-hughes"><first>Anthony</first><last>Hughes</last></author>
       <author><first>Dhwani</first><last>Shah</last></author>
       <author><first>Burcu</first><last>Can</last></author>
       <pages>168–178</pages>
@@ -243,7 +243,7 @@
       <title>A deep sentiment analysis of <fixed-case>T</fixed-case>unisian dialect comments on multi-domain posts in different social media platforms</title>
       <author><first>Emna</first><last>Fsih</last></author>
       <author><first>Rahma</first><last>Boujelbane</last></author>
-      <author><first>Lamia Hadrich</first><last>Belguith</last></author>
+      <author id="lamia-hadrich-belguith"><first>Lamia Hadrich</first><last>Belguith</last></author>
       <pages>226–233</pages>
       <url hash="95b00b9f">2022.icnlsp-1.26</url>
       <bibkey>fsih-etal-2022-deep</bibkey>
diff --git a/data/xml/2022.icon.xml b/data/xml/2022.icon.xml
index dc075c3ad9..f9579c5849 100644
--- a/data/xml/2022.icon.xml
+++ b/data/xml/2022.icon.xml
@@ -50,7 +50,7 @@
     <paper id="4">
       <title>Knowledge Enhanced Deep Learning Model for Radiology Text Generation</title>
       <author><first>Kaveri</first><last>Kale</last><affiliation>Indian Institute of Technology, Bombay</affiliation></author>
-      <author><first>Pushpak</first><last>Bhattacharya</last><affiliation>IIT Bombay</affiliation></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharya</last><affiliation>IIT Bombay</affiliation></author>
       <author><first>Aditya</first><last>Shetty</last><affiliation>Candy Breach Hospital, Mumbai</affiliation></author>
       <author><first>Milind</first><last>Gune</last><affiliation>Consultant Radiologist, Thane</affiliation></author>
       <author><first>Kush</first><last>Shrivastava</last><affiliation>Augnito India Pvt Ltd</affiliation></author>
@@ -65,7 +65,7 @@
       <title>Named Entity Recognition for Code-Mixed <fixed-case>K</fixed-case>annada-<fixed-case>E</fixed-case>nglish Social Media Data</title>
       <author><first>Poojitha</first><last>Nandigam</last><affiliation>IIIT Hyderabad</affiliation></author>
       <author><first>Abhinav</first><last>Appidi</last><affiliation>IIIT Hyderabad</affiliation></author>
-      <author><first>Manish</first><last>Shrivastava</last><affiliation>IIIT Hyderabad</affiliation></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last><affiliation>IIIT Hyderabad</affiliation></author>
       <pages>43-49</pages>
       <abstract>Named Entity Recognition (NER) is a critical task in the field of Natural Language Processing (NLP) and is also a sub-task of Information Extraction. There has been a significant amount of work done in entity extraction and Named Entity Recognition for resource-rich languages. Entity extraction from code-mixed social media data like tweets from twitter complicates the problem due to its unstructured, informal, and incomplete information available in tweets. Here, we present work on NER in Kannada-English code-mixed social media corpus with corresponding named entity tags referring to Organisation (Org), Person (Pers), and Location (Loc). We experimented with machine learning classification models like Conditional Random Fields (CRF), Bi-LSTM, and Bi-LSTM-CRF models on our corpus.</abstract>
       <url hash="d67bf065">2022.icon-main.5</url>
@@ -160,7 +160,7 @@
       <author><first>Sandhya</first><last>Singh</last><affiliation>Banasthali Vidyapith</affiliation></author>
       <author><first>Kushagra</first><last>Shree</last><affiliation>IIT Patna</affiliation></author>
       <author><first>Sriparna</first><last>Saha</last><affiliation>IIT Patna</affiliation></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last><affiliation>IIT Patna</affiliation></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last><affiliation>IIT Patna</affiliation></author>
       <author><first>Gladvin</first><last>Chinnadurai</last><affiliation>Lgsi</affiliation></author>
       <author><first>Manish</first><last>Vatsa</last><affiliation>Lgsi</affiliation></author>
       <pages>92-98</pages>
@@ -257,7 +257,7 @@
     <paper id="22">
       <title><fixed-case>SC</fixed-case>on<fixed-case>E</fixed-case>:Contextual Relevance based <fixed-case>S</fixed-case>ignificant <fixed-case>C</fixed-case>ompo<fixed-case>N</fixed-case>ent <fixed-case>E</fixed-case>xtraction from Contracts</title>
       <author><first>Hiranmai</first><last>Adibhatla</last><affiliation>IIIT Hyderabad</affiliation></author>
-      <author><first>Manish</first><last>Shrivastava</last><affiliation>IIIT Hyderabad</affiliation></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last><affiliation>IIIT Hyderabad</affiliation></author>
       <pages>161-171</pages>
       <abstract>Automatic extraction of “significant” components of a legal contract, has the potential to simplify the end user’s comprehension. In essence, “significant” pieces of information have 1) information pertaining to material/practical details about a specific contract and 2) information that is novel or comes as a “surprise” for a specific type of contract. It indicates that the significance of a component may be defined at an individual contract level and at a contract-type level. A component, sentence, or paragraph, may be considered significant at a contract level if it contains contract-specific information (CSI), like names, dates, or currency terms. At a contract-type level, components that deviate significantly from the norm for the type may be considered significant (type-specific information (TSI)). In this paper, we present approaches to extract “significant” components from a contract at both these levels. We attempt to do this by identifying patterns in a pool of documents of the same kind. Consequently, in our approach, the solution is formulated in two parts: identifying CSI using a BERT-based contract-specific information extractor and identifying TSI by scoring sentences in a contract for their likelihood. In this paper, we even describe the annotated corpus of contract documents that we created as a first step toward the development of such a language-processing system. We also release a dataset of contract samples containing sentences belonging to CSI and TSI.</abstract>
       <url hash="6e26fa08">2022.icon-main.22</url>
@@ -300,7 +300,7 @@
       <author><first>Ankush</first><last>Agarwal</last><affiliation>IIT Bombay</affiliation></author>
       <author><first>Sakharam</first><last>Gawade</last><affiliation>IIT Bombay</affiliation></author>
       <author><first>Sachin</first><last>Channabasavarajendra</last><affiliation>Honeywell Technology Solutions Pvt Ltd</affiliation></author>
-      <author><first>Pushpak</first><last>Bhattacharya</last><affiliation>IIT Bombay</affiliation></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharya</last><affiliation>IIT Bombay</affiliation></author>
       <pages>204-211</pages>
       <abstract>The integration of knowledge graphs with deep learning is thriving in improving the performance of various natural language processing (NLP) tasks. In this paper, we focus on knowledge-infused link prediction and question answering using language models, T5, and BLOOM across three domains:Aviation, Movie, and Web. In this context, we infuse knowledge in large and small language models and study their performance, and find the performance to be similar. For the link prediction task on the Aviation Knowledge Graph, we obtain a 0.2 hits@1 score using T5-small, T5-base, T5-large, and BLOOM. Using template-based scripts, we create a set of 1 million synthetic factoid QA pairs in the aviation domain from National Transportation Safety Board (NTSB) reports. On our curated QA pairs, the three models of T5 achieve a 0.7 hits@1 score. We validate our findings with the paired student t test and Cohen’s kappa scores. For link prediction on Aviation Knowledge Graph using T5-small and T5-large, we obtain a Cohen’s kappa score of 0.76, showing substantial agreement between the models. Thus, we infer that small language models perform similar to large language models with the infusion of knowledge.</abstract>
       <url hash="5417e464">2022.icon-main.26</url>
@@ -321,7 +321,7 @@
     <paper id="28">
       <title>Genre Transfer in <fixed-case>NMT</fixed-case>:Creating Synthetic Spoken Parallel Sentences using Written Parallel Data</title>
       <author><first>Nalin</first><last>Kumar</last><affiliation>Charles University</affiliation></author>
-      <author><first>Ondrej</first><last>Bojar</last><affiliation>Charles University</affiliation></author>
+      <author id="ondrej-bojar"><first>Ondrej</first><last>Bojar</last><affiliation>Charles University</affiliation></author>
       <pages>224-233</pages>
       <abstract>Text style transfer (TST) aims to control attributes in a given text without changing the content. The matter gets complicated when the boundary separating two styles gets blurred. We can notice similar difficulties in the case of parallel datasets in spoken and written genres. Genuine spoken features like filler words and repetitions in the existing spoken genre parallel datasets are often cleaned during transcription and translation, making the texts closer to written datasets. This poses several problems for spoken genre-specific tasks like simultaneous speech translation. This paper seeks to address the challenge of improving spoken language translations. We start by creating a genre classifier for individual sentences and then try two approaches for data augmentation using written examples:(1) a novel method that involves assembling and disassembling spoken and written neural machine translation (NMT) models, and (2) a rule-based method to inject spoken features. Though the observed results for (1) are not promising, we get some interesting insights into the solution. The model proposed in (1) fine-tuned on the synthesized data from (2) produces naturally looking spoken translations for written-to-spoken genre transfer in En-Hi translation systems. We use this system to produce a second-stage En-Hi synthetic corpus, which however lacks appropriate alignments of explicit spoken features across the languages. For the final evaluation, we fine-tune Hi-En spoken translation systems on the synthesized parallel corpora. We observe that the parallel corpus synthesized using our rule-based method produces the best results.</abstract>
       <url hash="9e531975">2022.icon-main.28</url>
@@ -355,7 +355,7 @@
       <title>Similarity Based Label Smoothing For Dialogue Generation</title>
       <author><first>Sougata</first><last>Saha</last></author>
       <author><first>Souvik</first><last>Das</last></author>
-      <author><first>Rohini</first><last>Srihari</last></author>
+      <author id="rohini-k-srihari"><first>Rohini</first><last>Srihari</last></author>
       <pages>253-259</pages>
       <abstract>Generative neural conversational systems are typically trained by minimizing the entropy loss between the training “hard” targets and the predicted logits. Performance gains and improved generalization are often achieved by employing regularization techniques like label smoothing, which converts the training “hard” targets to soft targets. However, label smoothing enforces a data independent uniform distribution on the incorrect training targets, leading to a false assumption of equiprobability. In this paper, we propose and experiment with incorporating data-dependent word similarity-based weighing methods to transform the uniform distribution of the incorrect target probabilities in label smoothing to a more realistic distribution based on semantics. We introduce hyperparameters to control the incorrect target distribution and report significant performance gains over networks trained using standard label smoothing-based loss on two standard open-domain dialogue corpora.</abstract>
       <url hash="8612aa80">2022.icon-main.31</url>
@@ -367,7 +367,7 @@
       <author><first>Subhrajit</first><last>Dey</last></author>
       <author><first>Md</first><last>Akhtar</last></author>
       <author><first>Amitava</first><last>Das</last></author>
-      <author><first>Sudip</first><last>Naskar</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip</first><last>Naskar</last></author>
       <pages>260-268</pages>
       <abstract>Sentiment analysis with deep learning in resource-constrained languages is a challenging task. In this paper, we introduce a novel approach for sentiment analysis in resource-constrained scenarios using character embedding and cross-lingual sentiment analysis with transliteration. We use this method to introduce the novel task of inducing sentiment polarity of words and sentences and aspect term sentiment analysis in the no-resource scenario. We formulate this task by taking a metalingual approach whereby we transliterate data from closely related languages and transform it into a meta language. We also demonstrated the efficacy of using character-level embedding for sentence representation. We experimented with 4 Indian languages – Bengali, Hindi, Tamil, and Telugu, and obtained encouraging results. We also presented new state-of-the-art results on the Hindi sentiment analysis dataset leveraging our metalingual character embeddings.</abstract>
       <url hash="d49c28f3">2022.icon-main.32</url>
@@ -406,7 +406,7 @@
       <title><fixed-case>T</fixed-case>e<fixed-case>Q</fixed-case>u<fixed-case>AD</fixed-case>:<fixed-case>T</fixed-case>elugu Question Answering Dataset</title>
       <author><first>Rakesh</first><last>Vemula</last></author>
       <author><first>Mani</first><last>Nuthi</last></author>
-      <author><first>Manish</first><last>Srivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Srivastava</last></author>
       <pages>300-307</pages>
       <abstract>Recent state of the art models and new datasets have advanced many Natural Language Processing areas, especially, Machine Reading Comprehension tasks have improved with the help of datasets like SQuAD (Stanford Question Answering Dataset). But, large high quality datasets are still not a reality for low resource languages like Telugu to record progress in MRC. In this paper, we present a Telugu Question Answering Dataset - TeQuAD with the size of 82k parallel triples created by translating triples from the SQuAD. We also introduce a few methods to create similar Question Answering datasets for the low resource languages. Then, we present the performance of our models which outperform baseline models on Monolingual and Cross Lingual Machine Reading Comprehension (CLMRC) setups, the best of them resulting in an F1 score of 83 % and Exact Match (EM) score of 61 %.</abstract>
       <url hash="83f33ed7">2022.icon-main.36</url>
@@ -512,7 +512,7 @@
       <author><first>Olga</first><last>Kolesnikova</last></author>
       <author><first>Moein</first><last>Shahiki Tash</last></author>
       <author><first>Grigori</first><last>Sidorov</last></author>
-      <author><first>Alexander</first><last>Gelbukh</last></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last></author>
       <pages>18-24</pages>
       <abstract>Language Identification at the Word Level in Kannada-English Texts. This paper describes the system paper of CoLI-Kanglish 2022 shared task. The goal of this task is to identify the different languages used in CoLI-Kanglish 2022. This dataset is distributed into different categories including Kannada, English, Mixed-Language, Location, Name, and Others. This Code-Mix was compiled by CoLI-Kanglish 2022 organizers from posts on social media. We use two classification techniques, KNN and SVM, and achieve an F1-score of 0.58 and place third out of nine competitors.</abstract>
       <url hash="b6036322">2022.icon-wlli.4</url>
@@ -538,7 +538,7 @@
       <author><first>Olga</first><last>Kolesnikova</last></author>
       <author><first>Moein</first><last>Shahiki Tash</last></author>
       <author><first>Grigori</first><last>Sidorov</last></author>
-      <author><first>Alexander</first><last>Gelbukh</last></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last></author>
       <pages>29-33</pages>
       <abstract>The goal of code-mixed language identification (LID) is to determine which language is spoken or written in a given segment of a speech, word, sentence, or document. Our task is to identify English, Kannada, and mixed language from the provided data. To train a model we used the CoLI-Kenglish dataset, which contains English, Kannada, and mixed-language words. In our work, we conducted several experiments in order to obtain the best performing model. Then, we implemented the best model by using Bidirectional Long Short Term Memory (Bi-LSTM), which outperformed the other trained models with an F1-score of 0.61%.</abstract>
       <url hash="f118707c">2022.icon-wlli.6</url>
@@ -562,7 +562,7 @@
       <author><first>N.</first><last>Ashraf</last></author>
       <author><first>H.l.</first><last>Shashirekha</last></author>
       <author><first>Grigori</first><last>Sidorov</last></author>
-      <author><first>Alexander</first><last>Gelbukh</last></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last></author>
       <pages>38-45</pages>
       <abstract>The task of Language Identification (LI) in text processing refers to automatically identifying the languages used in a text document. LI task is usually been studied at the document level and often in high-resource languages while giving less importance to low-resource languages. However, with the recent advance- ment in technologies, in a multilingual country like India, many low-resource language users post their comments using English and one or more language(s) in the form of code-mixed texts. Combination of Kannada and English is one such code-mixed text of mixing Kannada and English languages at various levels. To address the word level LI in code-mixed text, in CoLI-Kanglish shared task, we have focused on open-sourcing a Kannada-English code-mixed dataset for word level LI of Kannada, English and mixed-language words written in Roman script. The task includes classifying each word in the given text into one of six predefined categories, namely: Kannada (kn), English (en), Kannada-English (kn-en), Name (name), Lo-cation (location), and Other (other). Among the models submitted by all the participants, the best performing model obtained averaged-weighted and averaged-macro F1 scores of 0.86 and 0.62 respectively.</abstract>
       <url hash="79f62482">2022.icon-wlli.8</url>
diff --git a/data/xml/2022.ijclclp.xml b/data/xml/2022.ijclclp.xml
index 73e3343803..2f888d4016 100644
--- a/data/xml/2022.ijclclp.xml
+++ b/data/xml/2022.ijclclp.xml
@@ -3,9 +3,9 @@
   <volume id="1" ingest-date="2024-06-05" type="proceedings">
     <meta>
       <booktitle>International Journal of Computational Linguistics &amp; <fixed-case>C</fixed-case>hinese Language Processing, Volume 27, Number 1, June 2022</booktitle>
-      <editor><first>Siaw-Fong</first><last>Chung</last></editor>
+      <editor id="siaw-fong-chung"><first>Siaw-Fong</first><last>Chung</last></editor>
       <editor><first>Rafal</first><last>Rzepka</last></editor>
-      <editor><first>Shih-ping</first><last>Wang</last></editor>
+      <editor id="shih-ping-wang"><first>Shih-ping</first><last>Wang</last></editor>
       <publisher>Association for Computational Linguistics and Chinese Language Processing</publisher>
       <address>Taipei, Taiwan</address>
       <month>June</month>
@@ -18,8 +18,8 @@
     </frontmatter>
     <paper id="1">
       <title>The Uniqueness in Speech: Prosodic Highlights-prompted Information Content Projection in Continuous Speech Speech</title>
-      <author><first>Helen Kai-yun</first><last>Chen</last></author>
-      <author><first>Chiu-yu</first><last>Tseng</last></author>
+      <author id="helen-kaiyun-chen"><first>Helen Kai-yun</first><last>Chen</last></author>
+      <author id="chiu-yu-tseng"><first>Chiu-yu</first><last>Tseng</last></author>
       <url hash="30c44413">2022.ijclclp-1.1</url>
       <bibkey>chen-tseng-2022-uniqueness</bibkey>
     </paper>
@@ -79,8 +79,8 @@
     <paper id="1">
       <title>Aligning Sentences in a Paragraph-Paraphrased Corpus with New Embedding-based Similarity Measures</title>
       <author><first>Aleksandra Smolka</first><last>Smolka</last></author>
-      <author><first>Hsin-Min</first><last>Wang</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="hsin-min-wang"><first>Hsin-Min</first><last>Wang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <author><first>Keh-Yih</first><last>Su</last></author>
       <url hash="72067a2e">2022.ijclclp-2.1</url>
       <bibkey>smolka-etal-2022-aligning</bibkey>
@@ -112,7 +112,7 @@
       <author><first>Yu-Hsiang</first><last>Tseng</last></author>
       <author><first>Chi-Wei</first><last>Wang</last></author>
       <author><first>Fang-Chi</first><last>Yeh</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <url hash="0a9d2728">2022.ijclclp-2.4</url>
       <bibkey>chen-etal-2022-analyzing-discourse-functions</bibkey>
     </paper>
diff --git a/data/xml/2022.in2writing.xml b/data/xml/2022.in2writing.xml
index 89852bd163..5221861d5b 100644
--- a/data/xml/2022.in2writing.xml
+++ b/data/xml/2022.in2writing.xml
@@ -147,7 +147,7 @@
       <author><first>Nikos</first><last>Voskarides</last></author>
       <author><first>Edgar</first><last>Meij</last></author>
       <author><first>Sabrina</first><last>Sauer</last></author>
-      <author><first>Maarten</first><last>de Rijke</last></author>
+      <author id="maarten-de-rijke"><first>Maarten</first><last>de Rijke</last></author>
       <pages>72-73</pages>
       <abstract>Writers such as journalists often use automatic tools to find relevant content to include in their narratives. In this paper, we focus on supporting writers in the news domain to develop event-centric narratives. Given an incomplete narrative that specifies a main event and a context, we aim to retrieve news articles that discuss relevant events that would enable the continuation of the narrative. We formally define this task and propose a retrieval dataset construction procedure that relies on existing news articles to simulate incomplete narratives and relevant articles. Experiments on two datasets derived from this procedure show that state-of-the-art lexical and semantic rankers are not sufficient for this task. We show that combining those with a ranker that ranks articles by reverse chronological order outperforms those rankers alone. We also perform an in-depth quantitative and qualitative analysis of the results that sheds light on the characteristics of this task.</abstract>
       <url hash="f34aaaa3">2022.in2writing-1.10</url>
diff --git a/data/xml/2022.inlg.xml b/data/xml/2022.inlg.xml
index 28c34bfe1d..8e3e485ce2 100644
--- a/data/xml/2022.inlg.xml
+++ b/data/xml/2022.inlg.xml
@@ -4,8 +4,8 @@
     <meta>
       <booktitle>Proceedings of the 15th International Conference on Natural Language Generation</booktitle>
       <editor><first>Samira</first><last>Shaikh</last></editor>
-      <editor><first>Thiago</first><last>Ferreira</last></editor>
-      <editor><first>Amanda</first><last>Stent</last></editor>
+      <editor id="thiago-castro-ferreira"><first>Thiago</first><last>Ferreira</last></editor>
+      <editor id="amanda-stent"><first>Amanda</first><last>Stent</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Waterville, Maine, USA and virtual meeting</address>
       <month>July</month>
@@ -122,9 +122,9 @@
       <author><first>David</first><last>Schlangen</last></author>
       <author><first>Martin</first><last>Heckmann</last></author>
       <author><first>Heiko</first><last>Wersing</last></author>
-      <author><first>Sina</first><last>Zarrieß</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
       <pages>110-120</pages>
-      <abstract/>
+      <abstract></abstract>
       <url hash="3d6ba171">2022.inlg-main.9</url>
       <attachment type="software" hash="a7ede148">2022.inlg-main.9.software.zip</attachment>
       <bibkey>attari-etal-2022-generating</bibkey>
@@ -184,9 +184,9 @@
       <author><first>Kevin</first><last>Ros</last></author>
       <author><first>Maxwell</first><last>Jong</last></author>
       <author><first>Chak Ho</first><last>Chan</last></author>
-      <author><first>ChengXiang</first><last>Zhai</last></author>
+      <author id="chengxiang-zhai"><first>ChengXiang</first><last>Zhai</last></author>
       <pages>186-195</pages>
-      <abstract/>
+      <abstract></abstract>
       <url hash="89d436f2">2022.inlg-main.14</url>
       <attachment type="software" hash="469e960a">2022.inlg-main.14.software.zip</attachment>
       <bibkey>ros-etal-2022-generation</bibkey>
@@ -206,12 +206,12 @@
     </paper>
     <paper id="16">
       <title>Generating Landmark-based Manipulation Instructions from Image Pairs</title>
-      <author><first>Sina</first><last>Zarrieß</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
       <author><first>Henrik</first><last>Voigt</last></author>
       <author><first>David</first><last>Schlangen</last></author>
       <author><first>Philipp</first><last>Sadler</last></author>
       <pages>203-211</pages>
-      <abstract/>
+      <abstract></abstract>
       <url hash="7ab68662">2022.inlg-main.16</url>
       <attachment type="software" hash="3d1d44bb">2022.inlg-main.16.software.zip</attachment>
       <bibkey>zarriess-etal-2022-generating</bibkey>
@@ -226,10 +226,10 @@
       <author><first>Mert</first><last>Inan</last></author>
       <author><first>Elizabeth</first><last>Nielsen</last></author>
       <author><first>Shahab</first><last>Raji</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <author><first>Matthew</first><last>Stone</last></author>
       <pages>212-224</pages>
-      <abstract/>
+      <abstract></abstract>
       <url hash="fba33e46">2022.inlg-main.17</url>
       <attachment type="software" hash="2f487f07">2022.inlg-main.17.software.zip</attachment>
       <bibkey>alikhani-etal-2022-zero</bibkey>
@@ -266,9 +266,9 @@
       <author><first>Philipp</first><last>Heinisch</last></author>
       <author><first>Anette</first><last>Frank</last></author>
       <author><first>Juri</first><last>Opitz</last></author>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <pages>246-259</pages>
-      <abstract/>
+      <abstract></abstract>
       <url hash="08fbab70">2022.inlg-main.20</url>
       <attachment type="software" hash="047dd236">2022.inlg-main.20.software.zip</attachment>
       <bibkey>heinisch-etal-2022-strategies</bibkey>
@@ -328,9 +328,9 @@
       <title>Analogy Generation by Prompting Large Language Models: A Case Study of <fixed-case>I</fixed-case>nstruct<fixed-case>GPT</fixed-case></title>
       <author><first>Bhavya</first><last>Bhavya</last></author>
       <author><first>Jinjun</first><last>Xiong</last></author>
-      <author><first>ChengXiang</first><last>Zhai</last></author>
+      <author id="chengxiang-zhai"><first>ChengXiang</first><last>Zhai</last></author>
       <pages>298-312</pages>
-      <abstract/>
+      <abstract></abstract>
       <url hash="1ead88db">2022.inlg-main.25</url>
       <bibkey>bhavya-etal-2022-analogy</bibkey>
       <doi>10.18653/v1/2022.inlg-main.25</doi>
@@ -340,8 +340,8 @@
     <meta>
       <booktitle>Proceedings of the 15th International Conference on Natural Language Generation: System Demonstrations</booktitle>
       <editor><first>Samira</first><last>Shaikh</last></editor>
-      <editor><first>Thiago</first><last>Ferreira</last></editor>
-      <editor><first>Amanda</first><last>Stent</last></editor>
+      <editor id="thiago-castro-ferreira"><first>Thiago</first><last>Ferreira</last></editor>
+      <editor id="amanda-stent"><first>Amanda</first><last>Stent</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Waterville, Maine, USA and virtual meeting</address>
       <month>July</month>
@@ -368,7 +368,7 @@
       <title>Generating Quizzes to Support Training on Quality Management and Assurance in Space Science and Engineering</title>
       <author><first>Andres</first><last>Garcia-Silva</last></author>
       <author><first>Cristian</first><last>Berrio Aroca</last></author>
-      <author><first>Jose Manuel</first><last>Gomez-Perez</last></author>
+      <author id="jose-manuel-gomez-perez"><first>Jose Manuel</first><last>Gomez-Perez</last></author>
       <author><first>Jose</first><last>Martinez</last></author>
       <author><first>Patrick</first><last>Fleith</last></author>
       <author><first>Stefano</first><last>Scaglioni</last></author>
@@ -413,8 +413,8 @@
     <meta>
       <booktitle>Proceedings of the 15th International Conference on Natural Language Generation: Generation Challenges</booktitle>
       <editor><first>Samira</first><last>Shaikh</last></editor>
-      <editor><first>Thiago</first><last>Ferreira</last></editor>
-      <editor><first>Amanda</first><last>Stent</last></editor>
+      <editor id="thiago-castro-ferreira"><first>Thiago</first><last>Ferreira</last></editor>
+      <editor id="amanda-stent"><first>Amanda</first><last>Stent</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Waterville, Maine, USA and virtual meeting</address>
       <month>July</month>
@@ -432,7 +432,7 @@
       <author><first>Marie</first><last>Hledíková</last></author>
       <author><first>Muskaan</first><last>Singh</last></author>
       <author><first>Anna</first><last>Nedoluzhko</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>1-11</pages>
       <abstract>We would host the AutoMin generation chal- lenge at INLG 2023 as a follow-up of the first AutoMin shared task at Interspeech 2021. Our shared task primarily concerns the automated generation of meeting minutes from multi-party meeting transcripts. In our first venture, we ob- served the difficulty of the task and highlighted a number of open problems for the community to discuss, attempt, and solve. Hence, we invite the Natural Language Generation (NLG) com- munity to take part in the second iteration of AutoMin. Like the first, the second AutoMin will feature both English and Czech meetings and the core task of summarizing the manually- revised transcripts into bulleted minutes. A new challenge we are introducing this year is to devise efficient metrics for evaluating the quality of minutes. We will also host an optional track to generate minutes for European parliamentary sessions. We carefully curated the datasets for the above tasks. Our ELITR Minuting Corpus has been recently accepted to LREC 2022 and publicly released. We are already preparing a new test set for evaluating the new shared tasks. We hope to carry forward the learning from the first AutoMin and instigate more community attention and interest in this timely yet chal- lenging problem. INLG, the premier forum for the NLG community, would be an appropriate venue to discuss the challenges and future of Automatic Minuting. The main objective of the AutoMin GenChal at INLG 2023 would be to come up with efficient methods to auto- matically generate meeting minutes and design evaluation metrics to measure the quality of the minutes.</abstract>
       <url hash="460f2228">2022.inlg-genchal.1</url>
@@ -455,7 +455,7 @@
     <paper id="3">
       <title><fixed-case>H</fixed-case>inglish<fixed-case>E</fixed-case>val Generation Challenge on Quality Estimation of Synthetic Code-Mixed Text: Overview and Results</title>
       <author><first>Vivek</first><last>Srivastava</last></author>
-      <author id="mayank-singh"><first>Mayank</first><last>Singh</last></author>
+      <author><first>Mayank</first><last>Singh</last></author>
       <pages>19-25</pages>
       <abstract>We hosted a shared task to investigate the factors influencing the quality of the code- mixed text generation systems. The teams experimented with two systems that gener- ate synthetic code-mixed Hinglish sentences. They also experimented with human ratings that evaluate the generation quality of the two systems. The first-of-its-kind, proposed sub- tasks, (i) quality rating prediction and (ii) an- notators’ disagreement prediction of the syn- thetic Hinglish dataset made the shared task quite popular among the multilingual research community. A total of 46 participants com- prising 23 teams from 18 institutions reg- istered for this shared task. The detailed description of the task and the leaderboard is available at <url>https://codalab.lisn.upsaclay.fr/competitions/1688</url>.</abstract>
       <url hash="3023a5bf">2022.inlg-genchal.3</url>
@@ -468,7 +468,7 @@
       <author><first>Akshay</first><last>Goindani</last></author>
       <author><first>Anmol</first><last>Goel</last></author>
       <author><first>Naman</first><last>Ahuja</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <author><first>Ponnurangam</first><last>Kumaraguru</last></author>
       <pages>26-30</pages>
       <abstract>Code-Mixing is a phenomenon of mixing two or more languages in a speech event and is prevalent in multilingual societies. Given the low-resource nature of Code-Mixing, machine generation of code-mixed text is a prevalent approach for data augmentation. However, evaluating the quality of such machine gen- erated code-mixed text is an open problem. In our submission to HinglishEval, a shared- task collocated with INLG2022, we attempt to build models factors that impact the quality of synthetically generated code-mix text by pre- dicting ratings for code-mix quality. Hingli- shEval Shared Task consists of two sub-tasks - a) Quality rating prediction); b) Disagree- ment prediction. We leverage popular code- mixed metrics and embeddings of multilin- gual large language models (MLLMs) as fea- tures, and train task specific MLP regression models. Our approach could not beat the baseline results. However, for Subtask-A our team ranked a close second on F-1 and Co- hen’s Kappa Score measures and first for Mean Squared Error measure. For Subtask-B our ap- proach ranked third for F1 score, and first for Mean Squared Error measure. Code of our submission can be accessed here.</abstract>
@@ -507,9 +507,9 @@
     </paper>
     <paper id="8">
       <title>The 2022 <fixed-case>R</fixed-case>epro<fixed-case>G</fixed-case>en Shared Task on Reproducibility of Evaluations in <fixed-case>NLG</fixed-case>: Overview and Results</title>
-      <author><first>Anya</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anya</first><last>Belz</last></author>
       <author><first>Anastasia</first><last>Shimorina</last></author>
-      <author><first>Maja</first><last>Popović</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
       <author><first>Ehud</first><last>Reiter</last></author>
       <pages>43-51</pages>
       <abstract>Against a background of growing interest in reproducibility in NLP and ML, and as part of an ongoing research programme designed to develop theory and practice of reproducibility assessment in NLP, we organised the second shared task on reproducibility of evaluations in NLG, ReproGen 2022. This paper describes the shared task, summarises results from the reproduction studies submitted, and provides further comparative analysis of the results. Out of six initial team registrations, we received submissions from five teams. Meta-analysis of the five reproduction studies revealed varying degrees of reproducibility, and allowed further tentative conclusions about what types of evaluation tend to have better reproducibility.</abstract>
@@ -522,7 +522,7 @@
       <author><first>Ondřej</first><last>Dušek</last></author>
       <author><first>Zdeněk</first><last>Kasner</last></author>
       <author><first>Thiago</first><last>Castro Ferreira</last></author>
-      <author><first>Anya</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anya</first><last>Belz</last></author>
       <pages>52-61</pages>
       <abstract>In this paper, we present the results of two reproduction studies for the human evaluation originally reported by Dušek and Kasner (2020) in which the authors comparatively evaluated outputs produced by a semantic error detection system for data-to-text generation against reference outputs. In the first reproduction, the original evaluators repeat the evaluation, in a test of the repeatability of the original evaluation. In the second study, two new evaluators carry out the evaluation task, in a test of the reproducibility of the original evaluation under otherwise identical conditions. We describe our approach to reproduction, and present and analyse results, finding different degrees of reproducibility depending on result type, data and labelling task. Our resources are available and open-sourced.</abstract>
       <url hash="e58575d6">2022.inlg-genchal.9</url>
@@ -549,10 +549,10 @@
     </paper>
     <paper id="12">
       <title>Reproducing a Manual Evaluation of the Simplicity of Text Simplification System Outputs</title>
-      <author><first>Maja</first><last>Popović</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
       <author><first>Sheila</first><last>Castilho</last></author>
       <author><first>Rudali</first><last>Huidrom</last></author>
-      <author><first>Anya</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anya</first><last>Belz</last></author>
       <pages>80-85</pages>
       <abstract>In this paper we describe our reproduction study of the human evaluation of text simplic- ity reported by Nisioi et al. (2017). The work was carried out as part of the ReproGen Shared Task 2022 on Reproducibility of Evaluations in NLG. Our aim was to repeat the evaluation of simplicity for nine automatic text simplification systems with a different set of evaluators. We describe our experimental design together with the known aspects of the original experimental design and present the results from both studies. Pearson correlation between the original and reproduction scores is moderate to high (0.776). Inter-annotator agreement in the reproduction study is lower (0.40) than in the original study (0.66). We discuss challenges arising from the unavailability of certain aspects of the origi- nal set-up, and make several suggestions as to how reproduction of similar evaluations can be made easier in future.</abstract>
       <url hash="bc07bde7">2022.inlg-genchal.12</url>
@@ -568,7 +568,7 @@
       <author><first>Emiel</first><last>van Miltenburg</last></author>
       <author><first>Chris</first><last>van der Lee</last></author>
       <author><first>Martijn</first><last>Goudbeek</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <pages>86-93</pages>
       <abstract>In this paper, we describe our reproduction ef- fort of the paper: Towards Best Experiment Design for Evaluating Dialogue System Output by Santhanam and Shaikh (2019) for the 2022 ReproGen shared task. We aim to produce the same results, using different human evaluators, and a different implementation of the automatic metrics used in the original paper. Although overall the study posed some challenges to re- produce (e.g. difficulties with reproduction of automatic metrics and statistics), in the end we did find that the results generally replicate the findings of Santhanam and Shaikh (2019) and seem to follow similar trends.</abstract>
       <url hash="84e92f32">2022.inlg-genchal.13</url>
diff --git a/data/xml/2022.insights.xml b/data/xml/2022.insights.xml
index 8f63dd09b1..01b0680f65 100644
--- a/data/xml/2022.insights.xml
+++ b/data/xml/2022.insights.xml
@@ -83,7 +83,7 @@
     <paper id="6">
       <title>How Much Do Modifications to Transformer Language Models Affect Their Ability to Learn Linguistic Knowledge?</title>
       <author><first>Simeng</first><last>Sun</last></author>
-      <author><first>Brian</first><last>Dillon</last></author>
+      <author id="brian-w-dillon"><first>Brian</first><last>Dillon</last></author>
       <author><first>Mohit</first><last>Iyyer</last></author>
       <pages>46-53</pages>
       <abstract>Recent progress in large pretrained language models (LMs) has led to a growth of analyses examining what kinds of linguistic knowledge are encoded by these models. Due to computational constraints, existing analyses are mostly conducted on publicly-released LM checkpoints, which makes it difficult to study how various factors during <i>training</i> affect the models’ acquisition of linguistic knowledge. In this paper, we train a suite of small-scale Transformer LMs that differ from each other with respect to architectural decisions (e.g., self-attention configuration) or training objectives (e.g., multi-tasking, focal loss). We evaluate these LMs on BLiMP, a targeted evaluation benchmark of multiple English linguistic phenomena. Our experiments show that while none of these modifications yields significant improvements on aggregate, changes to the loss function result in promising improvements on several subcategories (e.g., detecting adjunct islands, correctly scoping negative polarity items). We hope our work offers useful insights for future research into designing Transformer LMs that more effectively learn linguistic knowledge.</abstract>
@@ -109,7 +109,7 @@
       <author><first>Dawei</first><last>Zhu</last></author>
       <author><first>Michael A.</first><last>Hedderich</last></author>
       <author><first>Fangzhou</first><last>Zhai</last></author>
-      <author><first>David Ifeoluwa</first><last>Adelani</last></author>
+      <author id="david-ifeoluwa-adelani"><first>David Ifeoluwa</first><last>Adelani</last></author>
       <author><first>Dietrich</first><last>Klakow</last></author>
       <pages>62-67</pages>
       <abstract>Incorrect labels in training data occur when human annotators make mistakes or when the data is generated via weak or distant supervision. It has been shown that complex noise-handling techniques - by modeling, cleaning or filtering the noisy instances - are required to prevent models from fitting this label noise. However, we show in this work that, for text classification tasks with modern NLP models like BERT, over a variety of noise types, existing noise-handling methods do not always improve its performance, and may even deteriorate it, suggesting the need for further investigation. We also back our observations with a comprehensive analysis.</abstract>
@@ -122,7 +122,7 @@
       <title>Ancestor-to-Creole Transfer is Not a Walk in the Park</title>
       <author><first>Heather</first><last>Lent</last></author>
       <author><first>Emanuele</first><last>Bugliarello</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>68-74</pages>
       <abstract>We aim to learn language models for Creole languages for which large volumes of data are not readily available, and therefore explore the potential transfer from ancestor languages (the ‘Ancestry Transfer Hypothesis’). We find that standard transfer methods do not facilitate ancestry transfer. Surprisingly, different from other non-Creole languages, a very distinct two-phase pattern emerges for Creoles: As our training losses plateau, and language models begin to overfit on their source languages, perplexity on the Creoles drop. We explore if this compression phase can lead to practically useful language models (the ‘Ancestry Bottleneck Hypothesis’), but also falsify this. Moreover, we show that Creoles even exhibit this two-phase pattern even when training on random, unrelated languages. Thus Creoles seem to be typological outliers and we speculate whether there is a link between the two observations.</abstract>
       <url hash="3f99dae5">2022.insights-1.9</url>
@@ -191,7 +191,7 @@
       <title>Clustering Examples in Multi-Dataset Benchmarks with Item Response Theory</title>
       <author><first>Pedro</first><last>Rodriguez</last></author>
       <author><first>Phu Mon</first><last>Htut</last></author>
-      <author><first>John</first><last>Lalor</last></author>
+      <author id="john-p-lalor"><first>John</first><last>Lalor</last></author>
       <author><first>João</first><last>Sedoc</last></author>
       <pages>100-112</pages>
       <abstract>In natural language processing, multi-dataset benchmarks for common tasks (e.g., SuperGLUE for natural language inference and MRQA for question answering) have risen in importance. Invariably, tasks and individual examples vary in difficulty. Recent analysis methods infer properties of examples such as difficulty. In particular, Item Response Theory (IRT) jointly infers example and model properties from the output of benchmark tasks (i.e., scores for each model-example pair). Therefore, it seems sensible that methods like IRT should be able to detect differences between datasets in a task. This work shows that current IRT models are not as good at identifying differences as we would expect, explain why this is difficult, and outline future directions that incorporate more (textual) signal from examples.</abstract>
@@ -206,7 +206,7 @@
       <author><first>Aishwarya</first><last>Padmakumar</last></author>
       <author><first>Di</first><last>Jin</last></author>
       <author><first>Mohit</first><last>Bansal</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></author>
       <pages>113-118</pages>
       <abstract>Natural language guided embodied task completion is a challenging problem since it requires understanding natural language instructions, aligning them with egocentric visual observations, and choosing appropriate actions to execute in the environment to produce desired changes. We experiment with augmenting a transformer model for this task with modules that effectively utilize a wider field of view and learn to choose whether the next step requires a navigation or manipulation action. We observed that the proposed modules resulted in improved, and in fact state-of-the-art performance on an unseen validation set of a popular benchmark dataset, ALFRED. However, our best model selected using the unseen validation set underperforms on the unseen test split of ALFRED, indicating that performance on the unseen validation set may not in itself be a sufficient indicator of whether model improvements generalize to unseen test sets. We highlight this result as we believe it may be a wider phenomenon in machine learning tasks but primarily noticeable only in benchmarks that limit evaluations on test splits, and highlights the need to modify benchmark design to better account for variance in model performance.</abstract>
       <url hash="d37bb9a1">2022.insights-1.15</url>
@@ -229,7 +229,7 @@
     <paper id="17">
       <title>The Document Vectors Using Cosine Similarity Revisited</title>
       <author><first>Zhang</first><last>Bingyu</last></author>
-      <author><first>Nikolay</first><last>Arefyev</last></author>
+      <author id="nikolay-arefyev"><first>Nikolay</first><last>Arefyev</last></author>
       <pages>129-133</pages>
       <abstract>The current state-of-the-art test accuracy (97.42%) on the IMDB movie reviews dataset was reported by Thongtan and Phienthrakul (2019) and achieved by the logistic regression classifier trained on the Document Vectors using Cosine Similarity (DV-ngrams-cosine) proposed in their paper and the Bag-of-N-grams (BON) vectors scaled by Naïve Bayesian weights. While large pre-trained Transformer-based models have shown SOTA results across many datasets and tasks, the aforementioned model has not been surpassed by them, despite being much simpler and pre-trained on the IMDB dataset only. In this paper, we describe an error in the evaluation procedure of this model, which was found when we were trying to analyze its excellent performance on the IMDB dataset. We further show that the previously reported test accuracy of 97.42% is invalid and should be corrected to 93.68%. We also analyze the model performance with different amounts of training data (subsets of the IMDB dataset) and compare it to the Transformer-based RoBERTa model. The results show that while RoBERTa has a clear advantage for larger training sets, the DV-ngrams-cosine performs better than RoBERTa when the labeled training set is very small (10 or 20 documents). Finally, we introduce a sub-sampling scheme based on Naïve Bayesian weights for the training process of the DV-ngrams-cosine, which leads to faster training and better quality.</abstract>
       <url hash="98b28aca">2022.insights-1.17</url>
@@ -239,9 +239,9 @@
     </paper>
     <paper id="18">
       <title>Challenges in including extra-linguistic context in pre-trained language models</title>
-      <author><first>Ionut</first><last>Sorodoc</last></author>
+      <author id="ionut-sorodoc"><first>Ionut</first><last>Sorodoc</last></author>
       <author><first>Laura</first><last>Aina</last></author>
-      <author><first>Gemma</first><last>Boleda</last></author>
+      <author id="gemma-boleda"><first>Gemma</first><last>Boleda</last></author>
       <pages>134-138</pages>
       <abstract>To successfully account for language, computational models need to take into account both the linguistic context (the content of the utterances) and the extra-linguistic context (for instance, the participants in a dialogue). We focus on a referential task that asks models to link entity mentions in a TV show to the corresponding characters, and design an architecture that attempts to account for both kinds of context. In particular, our architecture combines a previously proposed specialized module (an “entity library”) for character representation with transfer learning from a pre-trained language model. We find that, although the model does improve linguistic contextualization, it fails to successfully integrate extra-linguistic information about the participants in the dialogue. Our work shows that it is very challenging to incorporate extra-linguistic information into pre-trained language models.</abstract>
       <url hash="9362e98a">2022.insights-1.18</url>
@@ -279,7 +279,7 @@
       <author><first>Vinayshekhar</first><last>Kumar</last></author>
       <author><first>Vaibhav</first><last>Kumar</last></author>
       <author><first>Mukul</first><last>Bhutani</last></author>
-      <author><first>Alexander</first><last>Rudnicky</last></author>
+      <author id="alexander-rudnicky"><first>Alexander</first><last>Rudnicky</last></author>
       <pages>154-158</pages>
       <abstract>In this work, we examine the problems associated with neural dialog models under the common theme of compositionality. Specifically, we investigate three manifestations of compositionality: (1) Productivity, (2) Substitutivity, and (3) Systematicity. These manifestations shed light on the generalization, syntactic robustness, and semantic capabilities of neural dialog models. We design probing experiments by perturbing the training data to study the above phenomenon. We make informative observations based on automated metrics and hope that this work increases research interest in understanding the capacity of these models.</abstract>
       <url hash="47556e99">2022.insights-1.21</url>
diff --git a/data/xml/2022.isa.xml b/data/xml/2022.isa.xml
index 3d79de6d02..825240429e 100644
--- a/data/xml/2022.isa.xml
+++ b/data/xml/2022.isa.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2022-09-23" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 18th Joint ACL - ISO Workshop on Interoperable Semantic Annotation within LREC2022</booktitle>
-      <editor><first>Harry</first><last>Bunt</last></editor>
+      <editor id="harry-bunt"><first>Harry</first><last>Bunt</last></editor>
       <publisher>European Language Resources Association</publisher>
       <address>Marseille, France</address>
       <month>June</month>
@@ -36,7 +36,7 @@
     </paper>
     <paper id="3">
       <title>Guidelines and a Corpus for Extracting Biographical Events</title>
-      <author><first>Marco Antonio</first><last>Stranisci</last></author>
+      <author id="marco-antonio-stranisci"><first>Marco Antonio</first><last>Stranisci</last></author>
       <author><first>Enrico</first><last>Mensa</last></author>
       <author><first>Rossana</first><last>Damiano</last></author>
       <author><first>Daniele</first><last>Radicioni</last></author>
@@ -59,7 +59,7 @@
     </paper>
     <paper id="5">
       <title>Event Sequencing Annotation with <fixed-case>TIE</fixed-case>-<fixed-case>ML</fixed-case></title>
-      <author><first>Damir</first><last>Cavar</last></author>
+      <author id="damir-cavar"><first>Damir</first><last>Cavar</last></author>
       <author><first>Ali</first><last>Aljubailan</last></author>
       <author><first>Ludovic</first><last>Mompelat</last></author>
       <author><first>Yuna</first><last>Won</last></author>
@@ -74,7 +74,7 @@
     </paper>
     <paper id="6">
       <title>Measuring Similarity by Linguistic Features rather than Frequency</title>
-      <author><first>Rodolfo</first><last>Delmonte</last></author>
+      <author id="rodolfo-delmonte"><first>Rodolfo</first><last>Delmonte</last></author>
       <author><first>Nicolò</first><last>Busetto</last></author>
       <pages>42–52</pages>
       <abstract>In the use and creation of current Deep Learning Models the only number that is used for the overall computation is the frequency value associated with the current word form in the corpus, which is used to substitute it. Frequency values come in two forms: absolute and relative. Absolute frequency is used indirectly when selecting the vocabulary against which the word embeddings are created: the cutoff threshold is usually fixed at 30/50K entries of the most frequent words. Relative frequency comes in directly when computing word embeddings based on co-occurrence values of the tokens included in a window size 2/5 adjacent tokens. The latter values are then used to compute similarity, mostly based on cosine distance. In this paper we will evaluate the impact of these two frequency parameters on a small corpus of Italian sentences whose main features are two: presence of very rare words and of non-canonical structures. Rather than basing our evaluation on cosine measure alone, we propose a graded scale of scores which are linguistically motivated. The results computed on the basis of a perusal of BERT’s raw embeddings shows that the two parameters conspire to decide the level of predictability.</abstract>
@@ -86,7 +86,7 @@
       <title>Testing the Annotation Consistency of Hallidayan Transitivity Processes: A Multi-variable Structural Approach</title>
       <author><first>Min</first><last>Dong</last></author>
       <author><first>Xiaoyan</first><last>Liu</last></author>
-      <author><first>Alex Chengyu</first><last>Fang</last></author>
+      <author id="alex-chengyu-fang"><first>Alex Chengyu</first><last>Fang</last></author>
       <pages>53–60</pages>
       <abstract>SFL seeks to explain identifiable, observable phenomena of language use in context through the application of a theoretical framework which models language as a functional, meaning making system (Halliday &amp; Matthiessen 2004). Due to the lack of explicit annotation criteria and the divide between conceptual vs. syntactic criteria in practice, it has been a tough job to achieve consistency in the annotation of Hallidayn transitivity processes. The present study proposed that explicit structural and syntactic criteria should be adopted as a basis. Drawing on syntactic and grammatical features as judgement cues, we applied structurally oriented criteria for the annotation of the process categories and participant roles combining a set of interrelated syntactic variables and established the annotation criteria for contextualised circumstantial categories in structural as well as semantic terms. An experiment was carried out to test the usefulness of these annotation criteria, applying percent agreement and Cohen’s kappa as measurements of interrater reliability between the two annotators in each of the five pairs. The results verified our assumptions, albeit rather mildly, and, more significantly, offered some first empirical indications about the practical consistency of transitivity analysis in SFL. In the future work, the research team expect to draw on the insights and experience from some of the ISO standards devoted to semantic annotation such as dialogue acts (Bunt et al. 2012) and semantic roles (ISO-24617-4, 2014).</abstract>
       <url hash="94088abf">2022.isa-1.7</url>
@@ -170,11 +170,11 @@
     </paper>
     <paper id="16">
       <title>Towards Practical Semantic Interoperability in <fixed-case>NLP</fixed-case> Platforms</title>
-      <author><first>Julian</first><last>Moreno-Schneider</last></author>
+      <author id="julian-moreno-schneider"><first>Julian</first><last>Moreno-Schneider</last></author>
       <author><first>Rémi</first><last>Calizzano</last></author>
       <author><first>Florian</first><last>Kintzel</last></author>
       <author><first>Georg</first><last>Rehm</last></author>
-      <author><first>Dimitris</first><last>Galanis</last></author>
+      <author id="dimitrios-galanis"><first>Dimitris</first><last>Galanis</last></author>
       <author><first>Ian</first><last>Roberts</last></author>
       <pages>118–126</pages>
       <abstract>Interoperability is a necessity for the resolution of complex tasks that require the interconnection of several NLP services. This article presents the approaches that were adopted in three scenarios to address the respective interoperability issues. The first scenario describes the creation of a common REST API for a specific platform, the second scenario presents the interconnection of several platforms via mapping of different representation formats and the third scenario shows the complexities of interoperability through semantic schema mapping or automatic translation.</abstract>
diff --git a/data/xml/2022.iwslt.xml b/data/xml/2022.iwslt.xml
index 07610a69ea..ef16719676 100644
--- a/data/xml/2022.iwslt.xml
+++ b/data/xml/2022.iwslt.xml
@@ -54,7 +54,7 @@
       <author><first>Frithjof</first><last>Petrick</last></author>
       <author><first>Jan</first><last>Rosendahl</last></author>
       <author><first>Christian</first><last>Herold</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>32-42</pages>
       <abstract>After its introduction the Transformer architecture quickly became the gold standard for the task of neural machine translation. A major advantage of the Transformer compared to previous architectures is the faster training speed achieved by complete parallelization across timesteps due to the use of attention over recurrent layers. However, this also leads to one of the biggest problems of the Transformer, namely the quadratic time and memory complexity with respect to the input length. In this work we adapt the locality-sensitive hashing approach of Kitaev et al. (2020) to self-attention in the Transformer, we extended it to cross-attention and apply this memory efficient framework to sentence- and document-level machine translation. Our experiments show that the LSH attention scheme for sentence-level comes at the cost of slightly reduced translation quality. For document-level NMT we are able to include much bigger context sizes than what is possible with the baseline Transformer. However, more context does neither improve translation quality nor improve scores on targeted test suites.</abstract>
       <url hash="5eec2b82">2022.iwslt-1.4</url>
@@ -75,7 +75,7 @@
     <paper id="6">
       <title>Who Are We Talking About? Handling Person Names in Speech Translation</title>
       <author><first>Marco</first><last>Gaido</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <pages>62-73</pages>
       <abstract>Recent work has shown that systems for speech translation (ST) – similarly to automatic speech recognition (ASR) – poorly handle person names. This shortcoming does not only lead to errors that can seriously distort the meaning of the input, but also hinders the adoption of such systems in application scenarios (like computer-assisted interpreting) where the translation of named entities, like person names, is crucial. In this paper, we first analyse the outputs of ASR/ST systems to identify the reasons of failures in person name transcription/translation. Besides the frequency in the training data, we pinpoint the nationality of the referred person as a key factor. We then mitigate the problem by creating multilingual models, and further improve our ST systems by forcing them to jointly generate transcripts and translations, prioritising the former over the latter. Overall, our solutions result in a relative improvement in token-level person name accuracy by 47.8% on average for three language pairs (en-&gt;es,fr,it).</abstract>
@@ -87,7 +87,7 @@
       <title>Joint Generation of Captions and Subtitles with Dual Decoding</title>
       <author><first>Jitao</first><last>Xu</last></author>
       <author><first>François</first><last>Buet</last></author>
-      <author><first>Josep</first><last>Crego</last></author>
+      <author id="josep-m-crego"><first>Josep</first><last>Crego</last></author>
       <author><first>Elise</first><last>Bertin-Lemée</last></author>
       <author><first>François</first><last>Yvon</last></author>
       <pages>74-82</pages>
@@ -127,10 +127,10 @@
       <author><first>Loïc</first><last>Barrault</last></author>
       <author><first>Luisa</first><last>Bentivogli</last></author>
       <author><first>Marcely</first><last>Zanon Boito</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <author><first>Roldano</first><last>Cattoni</last></author>
       <author><first>Anna</first><last>Currey</last></author>
-      <author><first>Georgiana</first><last>Dinu</last></author>
+      <author id="georgiana-dinu"><first>Georgiana</first><last>Dinu</last></author>
       <author><first>Kevin</first><last>Duh</last></author>
       <author><first>Maha</first><last>Elbayad</last></author>
       <author><first>Clara</first><last>Emmanuel</last></author>
@@ -151,7 +151,7 @@
       <author><first>Kenton</first><last>Murray</last></author>
       <author><first>Maria</first><last>Nǎdejde</last></author>
       <author><first>Satoshi</first><last>Nakamura</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
       <author><first>Xing</first><last>Niu</last></author>
       <author><first>John</first><last>Ortega</last></author>
@@ -159,11 +159,11 @@
       <author><first>Elizabeth</first><last>Salesky</last></author>
       <author><first>Jiatong</first><last>Shi</last></author>
       <author><first>Matthias</first><last>Sperber</last></author>
-      <author><first>Sebastian</first><last>Stüker</last></author>
+      <author id="sebastian-stuker"><first>Sebastian</first><last>Stüker</last></author>
       <author><first>Katsuhito</first><last>Sudoh</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <author><first>Yogesh</first><last>Virkar</last></author>
-      <author><first>Alexander</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alexander</first><last>Waibel</last></author>
       <author><first>Changhan</first><last>Wang</last></author>
       <author><first>Shinji</first><last>Watanabe</last></author>
       <pages>98-157</pages>
@@ -188,7 +188,7 @@
       <title><fixed-case>A</fixed-case>mazon <fixed-case>A</fixed-case>lexa <fixed-case>AI</fixed-case>’s System for <fixed-case>IWSLT</fixed-case> 2022 Offline Speech Translation Shared Task</title>
       <author><first>Akshaya Vishnu Kudlu</first><last>Shanbhogue</last></author>
       <author><first>Ran</first><last>Xue</last></author>
-      <author><first>Ching-Yun</first><last>Chang</last></author>
+      <author id="ching-yun-chang"><first>Ching-Yun</first><last>Chang</last></author>
       <author><first>Sarah</first><last>Campbell</last></author>
       <pages>169-176</pages>
       <abstract>This paper describes Amazon Alexa AI’s submission to the IWSLT 2022 Offline Speech Translation Task. Our system is an end-to-end speech translation model that leverages pretrained models and cross modality transfer learning. We detail two improvements to the knowledge transfer schema. First, we implemented a new loss function that reduces knowledge gap between audio and text modalities in translation task effectively. Second, we investigate multiple finetuning strategies including sampling loss, language grouping and domain adaption. These strategies aims to bridge the gaps between speech and text translation tasks. We also implement a multi-stage segmentation and merging strategy that yields improvements on the unsegmented development datasets. Results show that the proposed loss function consistently improves BLEU scores on the development datasets for both English-German and multilingual models. Additionally, certain language pairs see BLEU score improvements with specific finetuning strategies.</abstract>
@@ -202,7 +202,7 @@
       <author><first>Sara</first><last>Papi</last></author>
       <author><first>Dennis</first><last>Fucci</last></author>
       <author><first>Giuseppe</first><last>Fiameni</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <pages>177-189</pages>
       <abstract>The primary goal of this FBK’s systems submission to the IWSLT 2022 offline and simultaneous speech translation tasks is to reduce model training costs without sacrificing translation quality. As such, we first question the need of ASR pre-training, showing that it is not essential to achieve competitive results. Second, we focus on data filtering, showing that a simple method that looks at the ratio between source and target characters yields a quality improvement of 1 BLEU. Third, we compare different methods to reduce the detrimental effect of the audio segmentation mismatch between training data manually segmented at sentence level and inference data that is automatically segmented. Towards the same goal of training cost reduction, we participate in the simultaneous task with the same model trained for offline ST. The effectiveness of our lightweight training strategy is shown by the high score obtained on the MuST-C en-de corpus (26.7 BLEU) and is confirmed in high-resource data conditions by a 1.6 BLEU improvement on the IWSLT2020 test set over last year’s winning system.</abstract>
@@ -212,13 +212,13 @@
     </paper>
     <paper id="14">
       <title>Effective combination of pretrained models - <fixed-case>KIT</fixed-case>@<fixed-case>IWSLT</fixed-case>2022</title>
-      <author><first>Ngoc-Quan</first><last>Pham</last></author>
+      <author id="ngoc-quan-pham"><first>Ngoc-Quan</first><last>Pham</last></author>
       <author><first>Tuan Nam</first><last>Nguyen</last></author>
       <author><first>Thai-Binh</first><last>Nguyen</last></author>
       <author><first>Danni</first><last>Liu</last></author>
       <author><first>Carlos</first><last>Mullov</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
-      <author><first>Alexander</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alexander</first><last>Waibel</last></author>
       <pages>190-197</pages>
       <abstract>Pretrained models in acoustic and textual modalities can potentially improve speech translation for both Cascade and End-to-end approaches. In this evaluation, we aim at empirically looking for the answer by using the wav2vec, mBART50 and DeltaLM models to improve text and speech translation models. The experiments showed that the presence of these models together with an advanced audio segmentation method results in an improvement over the previous end-to-end system by up to 7 BLEU points. More importantly, the experiments showed that given enough data and modeling capacity to overcome the training difficulty, we can outperform even very competitive Cascade systems. In our experiments, this gap can be as large as 2.0 BLEU points, the same gap that the Cascade often led over the years.</abstract>
       <url hash="a0eed6a0">2022.iwslt-1.14</url>
@@ -357,12 +357,12 @@
       <author><first>Javier</first><last>Jorge Cano</last></author>
       <author><first>Alejandro</first><last>Pérez-González-de-Martos</last></author>
       <author><first>Adrián</first><last>Giménez Pastor</last></author>
-      <author><first>Gonçal V.</first><last>Garcés Díaz-Munío</last></author>
+      <author id="goncal-v-garces-diaz-munio"><first>Gonçal V.</first><last>Garcés Díaz-Munío</last></author>
       <author><first>Pau</first><last>Baquero-Arnal</last></author>
       <author><first>Joan Albert</first><last>Silvestre-Cerdà</last></author>
       <author><first>Jorge</first><last>Civera Saiz</last></author>
       <author><first>Albert</first><last>Sanchis</last></author>
-      <author><first>Alfons</first><last>Juan</last></author>
+      <author id="alfons-juan"><first>Alfons</first><last>Juan</last></author>
       <pages>255-264</pages>
       <abstract>This work describes the participation of the MLLP-VRAIN research group in the two shared tasks of the IWSLT 2022 conference: Simultaneous Speech Translation and Speech-to-Speech Translation. We present our streaming-ready ASR, MT and TTS systems for Speech Translation and Synthesis from English into German. Our submission combines these systems by means of a cascade approach paying special attention to data preparation and decoding for streaming inference.</abstract>
       <url hash="ddc85806">2022.iwslt-1.22</url>
@@ -375,7 +375,7 @@
       <author><first>Gerard I.</first><last>Gállego</last></author>
       <author><first>Carlos</first><last>Escolano</last></author>
       <author><first>José</first><last>Fonollosa</last></author>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
       <pages>265-276</pages>
       <abstract>This paper describes the submissions of the UPC Machine Translation group to the IWSLT 2022 Offline Speech Translation and Speech-to-Speech Translation tracks. The offline task involves translating English speech to German, Japanese and Chinese text. Our Speech Translation systems are trained end-to-end and are based on large pretrained speech and text models. We use an efficient fine-tuning technique that trains only specific layers of our system, and explore the use of adapter modules for the non-trainable layers. We further investigate the suitability of different speech encoders (wav2vec 2.0, HuBERT) for our models and the impact of knowledge distillation from the Machine Translation model that we use for the decoder (mBART). For segmenting the IWSLT test sets we fine-tune a pretrained audio segmentation model and achieve improvements of 5 BLEU compared to the given segmentation. Our best single model uses HuBERT and parallel adapters and achieves 29.42 BLEU at English-German MuST-C tst-COMMON and 26.77 at IWSLT 2020 test. By ensembling many models, we further increase translation quality to 30.83 BLEU and 27.78 accordingly. Furthermore, our submission for English-Japanese achieves 15.85 and English-Chinese obtains 25.63 BLEU on the MuST-C tst-COMMON sets. Finally, we extend our system to perform English-German Speech-to-Speech Translation with a pretrained Text-to-Speech model.</abstract>
       <url hash="020821cf">2022.iwslt-1.23</url>
@@ -385,13 +385,13 @@
     <paper id="24">
       <title><fixed-case>CUNI</fixed-case>-<fixed-case>KIT</fixed-case> System for Simultaneous Speech Translation Task at <fixed-case>IWSLT</fixed-case> 2022</title>
       <author><first>Peter</first><last>Polák</last></author>
-      <author><first>Ngoc-Quan</first><last>Pham</last></author>
+      <author id="ngoc-quan-pham"><first>Ngoc-Quan</first><last>Pham</last></author>
       <author><first>Tuan Nam</first><last>Nguyen</last></author>
       <author><first>Danni</first><last>Liu</last></author>
       <author><first>Carlos</first><last>Mullov</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
-      <author><first>Alexander</first><last>Waibel</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
+      <author id="alex-waibel"><first>Alexander</first><last>Waibel</last></author>
       <pages>277-285</pages>
       <abstract>In this paper, we describe our submission to the Simultaneous Speech Translation at IWSLT 2022. We explore strategies to utilize an offline model in a simultaneous setting without the need to modify the original model. In our experiments, we show that our onlinization algorithm is almost on par with the offline setting while being 3x faster than offline in terms of latency on the test set. We also show that the onlinized offline model outperforms the best IWSLT2021 simultaneous system in medium and high latency regimes and is almost on par in the low latency regime. We make our system publicly available.</abstract>
       <url hash="e028d5d1">2022.iwslt-1.24</url>
@@ -475,7 +475,7 @@
       <author><first>Jinyi</first><last>Yang</last></author>
       <author><first>Amir</first><last>Hussein</last></author>
       <author><first>Matthew</first><last>Wiesner</last></author>
-      <author><first>Sanjeev</first><last>Khudanpur</last></author>
+      <author id="sanjeev-khudanpur"><first>Sanjeev</first><last>Khudanpur</last></author>
       <pages>319-326</pages>
       <abstract>This paper details the Johns Hopkins speech translation (ST) system used in the IWLST2022 dialect speech translation task. Our system uses a cascade of automatic speech recognition (ASR) and machine translation (MT). We use a Conformer model for ASR systems and a Transformer model for machine translation. Surprisingly, we found that while using additional ASR training data resulted in only a negligible change in performance as measured by BLEU or word error rate (WER), aggressive text normalization improved BLEU more significantly. We also describe an approach, similar to back-translation, for improving performance using synthetic dialectal source text produced from source sentences in mismatched dialects.</abstract>
       <url hash="da06169c">2022.iwslt-1.29</url>
@@ -497,7 +497,7 @@
       <title>Controlling Formality in Low-Resource <fixed-case>NMT</fixed-case> with Domain Adaptation and Re-Ranking: <fixed-case>SLT</fixed-case>-<fixed-case>CDT</fixed-case>-<fixed-case>U</fixed-case>o<fixed-case>S</fixed-case> at <fixed-case>IWSLT</fixed-case>2022</title>
       <author><first>Sebastian</first><last>Vincent</last></author>
       <author><first>Loïc</first><last>Barrault</last></author>
-      <author><first>Carolina</first><last>Scarton</last></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last></author>
       <pages>341-350</pages>
       <abstract>This paper describes the SLT-CDT-UoS group’s submission to the first Special Task on Formality Control for Spoken Language Translation, part of the IWSLT 2022 Evaluation Campaign. Our efforts were split between two fronts: data engineering and altering the objective function for best hypothesis selection. We used language-independent methods to extract formal and informal sentence pairs from the provided corpora; using English as a pivot language, we propagated formality annotations to languages treated as zero-shot in the task; we also further improved formality controlling with a hypothesis re-ranking approach. On the test sets for English-to-German and English-to-Spanish, we achieved an average accuracy of .935 within the constrained setting and .995 within unconstrained setting. In a zero-shot setting for English-to-Russian and English-to-Italian, we scored average accuracy of .590 for constrained setting and .659 for unconstrained.</abstract>
       <url hash="b99f5632">2022.iwslt-1.31</url>
diff --git a/data/xml/2022.jeptalnrecital.xml b/data/xml/2022.jeptalnrecital.xml
index bd55d125aa..7b1927655e 100644
--- a/data/xml/2022.jeptalnrecital.xml
+++ b/data/xml/2022.jeptalnrecital.xml
@@ -20,8 +20,8 @@
     <paper id="1">
       <title>Abstraction ou hallucination ? État des lieux et évaluation du risque pour les modèles de génération de résumés automatiques de type séquence-à-séquence (Abstraction or Hallucination ? Status and Risk assessment for sequence-to-sequence Automatic)</title>
       <author><first>Eunice</first><last>Akani</last></author>
-      <author><first>Benoit</first><last>Favre</last></author>
-      <author><first>Frederic</first><last>Bechet</last></author>
+      <author id="benoit-favre"><first>Benoit</first><last>Favre</last></author>
+      <author id="frederic-bechet"><first>Frederic</first><last>Bechet</last></author>
       <pages>2–11</pages>
       <abstract>La génération de texte a récemment connu un très fort intérêt au vu des avancées notables dans le domaine des modèles de langage neuronaux. Malgré ces avancées, cette tâche reste difficile quand il s’agit d’un résumé automatique de texte par abstraction. Certains systèmes de résumés génèrent des textes qui ne sont pas forcément fidèles au document source. C’est sur cette thématique que porte notre étude. Nous présentons une typologie d’erreurs pour les résumés automatique et ainsi qu’une caractérisation du phénomène de l’abstraction pour les résumés de référence afin de mieux comprendre l’ampleur de ces différents phénomènes sur les entités nommées. Nous proposons également une mesure d’évaluation du risque d’erreur lorsqu’un système tente de faire des abstractions sur les entités nommées d’un document.</abstract>
       <url hash="13faac4d">2022.jeptalnrecital-taln.1</url>
@@ -59,7 +59,7 @@
       <author><first>Yizhou</first><last>Xu</last></author>
       <author><first>Kata</first><last>Gábor</last></author>
       <author><first>Leila</first><last>Khouas</last></author>
-      <author><first>Frédérique</first><last>Segond</last></author>
+      <author id="frederique-segond"><first>Frédérique</first><last>Segond</last></author>
       <pages>42–53</pages>
       <abstract>La détection d’anomalies textuelles est une tâche importante de la fouille de textes. Plusieurs approches générales, visant l’identification de points de données aberrants, ont été appliqués dans ce domaine. Néanmoins, ces approches exploitent peu les nouvelles avancées du traitement automatique des langues naturelles (TALN). L’avènement des modèles de langage pré-entraînés comme BERT et GPT-2 a donné naissance à un nouveau paradigme de l’apprentissage automatique appelé ingénierie d’invite (prompt engineering) qui a montré de bonnes performances sur plusieurs tâches du TALN. Cet article présente un travail exploratoire visant à examiner la possibilité de détecter des anomalies textuelles à l’aide de l’ingénierie d’invite. Dans nos expérimentations, nous avons examiné la performance de différents modèles d’invite. Les résultats ont montré que l’ingénierie d’invite est une méthode prometteuse pour la détection d’anomalies textuelles.</abstract>
       <url hash="c6b52880">2022.jeptalnrecital-taln.4</url>
@@ -71,7 +71,7 @@
       <author><first>Kevin</first><last>Deturck</last></author>
       <author><first>Damien</first><last>Nouvel</last></author>
       <author><first>Namrata</first><last>Patel</last></author>
-      <author><first>Frederique</first><last>Segond</last></author>
+      <author id="frederique-segond"><first>Frederique</first><last>Segond</last></author>
       <pages>54–63</pages>
       <abstract>L’influence sociale est un phénomène important dans divers domaines, tels que l’économie et la politique, qui a gagné en résonnance avec la popularité des médias sociaux, notamment les réseaux sociaux et les forums. La majorité des travaux sur ce sujet propose des approches fondées sur des théories en sciences humaines (sociologie, linguistique), et des techniques d’analyse de réseau (mesures de propagation et de centralité) ou de TAL. Dans cet article, nous présentons un modèle d’influence inspiré de travaux en psychologie sociale, sur lequel nous construisons un système combinant un module de TAL pour détecter les messages reflétant les processus d’influence, associé à une analyse par centralité de la transmission de ces messages. Nos expériences sur le forum de débats Change My View montrent que l’approche par hybridation, comparée à la centralité seule, aide à mieux détecter les influenceurs.</abstract>
       <url hash="02ac19eb">2022.jeptalnrecital-taln.5</url>
@@ -81,9 +81,9 @@
     <paper id="6">
       <title>Étiquetage ou génération de séquences pour la compréhension automatique du langage en contexte d’interaction? (Sequence tagging or sequence generation for Natural Language Understanding ?)</title>
       <author><first>Rim</first><last>Abrougui</last></author>
-      <author><first>Géraldine</first><last>Damnati</last></author>
+      <author id="geraldine-damnati"><first>Géraldine</first><last>Damnati</last></author>
       <author><first>Johannes</first><last>Heinecke</last></author>
-      <author><first>Frédéric</first><last>Béchet</last></author>
+      <author id="frederic-bechet"><first>Frédéric</first><last>Béchet</last></author>
       <pages>64–73</pages>
       <abstract>La tâche de compréhension automatique du langage en contexte d’interaction (NLU pour Natural Language Understanding) est souvent réduite à la détection d’intentions et de concepts sur des corpus mono-domaines annotés avec une seule intention par énoncé. Afin de dépasser ce paradigme, nous cherchons à aborder des référentiels plus complexes en visant des représentations sémantiques structurées au-delà du simple modèle intention/concept. Nous nous intéressons au corpus MultiWOZ, couramment utilisé pour le suivi de l’état du dialogue. Nous questionnons la projection de ces annotations sémantiques complexes pour le NLU, en comparant plusieurs approches d’étiquetage de séquence, puis en proposant un nouveau formalisme inspiré des méthodes de génération de graphe pour la modélisation sémantique AMR. Nous discutons enfin le potentiel des approches génératives.</abstract>
       <url hash="59ee5a09">2022.jeptalnrecital-taln.6</url>
@@ -117,7 +117,7 @@
     <paper id="9">
       <title>Filtrage et régularisation pour améliorer la plausibilité des poids d’attention dans la tâche d’inférence en langue naturelle (Filtering and regularization to improve the plausibility of attention weights in <fixed-case>NLI</fixed-case>)</title>
       <author><first>Duc</first><last>Hau Nguyen</last></author>
-      <author><first>Guillaume</first><last>Gravier</last></author>
+      <author id="guillaume-gravier"><first>Guillaume</first><last>Gravier</last></author>
       <author><first>Pascale</first><last>Sébillot</last></author>
       <pages>95–103</pages>
       <abstract>Nous étudions la plausibilité d’un mécanisme d’attention pour une tâche d’inférence de phrases (entailment), c’est-à-dire sa capacité à fournir une explication plausible pour un humain de la relation entre deux phrases. En s’appuyant sur le corpus Explanation-Augmented Standford Natural Language Inference, il a été montré que les poids d’attention sont peu plausibles en pratique et tendent à ne pas se concentrer sur les tokens importants. Nous étudions ici différentes approches pour rendre les poids d’attention plus plausibles, en nous appuyant sur des masques issus d’une analyse morphosyntaxique ou sur une régularisation pour forcer la parcimonie. Nous montrons que ces stratégies permettent d’améliorer sensiblement la plausibilité des poids d’attention et s’avèrent plus performantes que les approches par carte de saillance.</abstract>
@@ -129,8 +129,8 @@
       <title>Génération de question à partir d’analyse sémantique pour l’adaptation non supervisée de modèles de compréhension de documents (Question generation from semantic analysis for unsupervised adaptation of document understanding models)</title>
       <author><first>Elie</first><last>Antoine</last></author>
       <author><first>Jeremy</first><last>Auguste</last></author>
-      <author><first>Frederic</first><last>Bechet</last></author>
-      <author><first>Géraldine</first><last>Damnati</last></author>
+      <author id="frederic-bechet"><first>Frederic</first><last>Bechet</last></author>
+      <author id="geraldine-damnati"><first>Géraldine</first><last>Damnati</last></author>
       <pages>104–115</pages>
       <abstract>La génération automatique de questions à partir de textes peut permettre d’obtenir des corpus d’apprentissage pour des modèles de compréhension de documents de type question/réponse sur des textes. Si cette tâche de génération est désormais appréhendée par des modèles de type séquence-àséquence basés sur de grands modèles de langage pré-entraînés, le choix des segments réponses à partir desquels seront générées les questions est l’un des principaux aspects différenciant les méthodes de génération de corpus de question/réponse. Nous proposons dans cette étude d’exploiter l’analyse sémantique de textes pour sélectionner des réponses plausibles et enrichir le processus de génération par des traits sémantiques génériques. Les questions générées sont évaluées dans leur capacité à être utilisées pour entraîner un modèle de question-réponse sur un nouveau corpus d’archives numérisées.</abstract>
       <url hash="2242862a">2022.jeptalnrecital-taln.10</url>
@@ -182,11 +182,11 @@
     <paper id="15">
       <title>Le projet <fixed-case>FREEM</fixed-case> : ressources, outils et enjeux pour l’étude du français d’Ancien Régime (The <fixed-case>F</fixed-case> <fixed-case>RE</fixed-case> <fixed-case>EM</fixed-case> project: Resources, tools and challenges for the study of Ancien Régime <fixed-case>F</fixed-case>rench)</title>
       <author><first>Simon</first><last>Gabay</last></author>
-      <author><first>Pedro</first><last>Ortiz Suarez</last></author>
+      <author id="pedro-ortiz-suarez"><first>Pedro</first><last>Ortiz Suarez</last></author>
       <author><first>Rachel</first><last>Bawden</last></author>
       <author><first>Alexandre</first><last>Bartz</last></author>
       <author><first>Philippe</first><last>Gambette</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <pages>154–165</pages>
       <abstract>En dépit de leur qualité certaine, les ressources et outils disponibles pour l’analyse du français d’Ancien Régime ne sont plus à même de répondre aux enjeux de la recherche en linguistique et en littérature pour cette période. Après avoir précisément défini le cadre chronologique retenu, nous présentons les corpus mis à disposition et les résultats obtenus avec eux pour plusieurs tâches de TAL fondamentales à l’étude de la langue et de la littérature.</abstract>
       <url hash="e9c2881b">2022.jeptalnrecital-taln.15</url>
@@ -290,7 +290,7 @@
       <author><first>Loïc</first><last>Fosse</last></author>
       <author><first>Duc-Hau</first><last>Nguyen</last></author>
       <author><first>Pascale</first><last>Sébillot</last></author>
-      <author><first>Guillaume</first><last>Gravier</last></author>
+      <author id="guillaume-gravier"><first>Guillaume</first><last>Gravier</last></author>
       <pages>247–256</pages>
       <abstract>Nous étudions les propriétés statistiques des plongements dans les modèles transformers pour le français. Nous nous appuyons sur une analyse de la variance, des similarités cosinus intra-phrase et du rang effectif des plongements aux différents niveaux d’un transformer, pour des modèles pré-entraînés et des modèles adaptés à la classification de textes. Nous montrons que les modèles FlauBERT et CamemBERT pré-entraînés ont des comportements très différents même si les deux ont une tendance à générer des représentations anisotropiques, c’est-à-dire se concentrant dans un cône au sein de l’espace des plongements, comme observé pour l’anglais. L’adaptation à la classification de textes modifie le comportement des modèles, notamment dans les dernières couches, et procure une tendance forte à l’alignement des plongements, réduisant également la dimension effective de l’espace au final. Nous mettons également en évidence un lien entre convergence des plongements au sein d’une phrase et classification de texte, lien dont la nature reste difficile à appréhender.</abstract>
       <url hash="a8d67bcc">2022.jeptalnrecital-taln.24</url>
@@ -312,7 +312,7 @@
       <title>Adaptation au domaine de modèles de langue à l’aide de réseaux à base de graphes (Graph Neural Networks for Adapting General Domain Language Modèles Specialised Corpora)</title>
       <author><first>Merieme</first><last>Bouhandi</last></author>
       <author><first>Emmanuel</first><last>Morin</last></author>
-      <author><first>Thierry</first><last>Hamon</last></author>
+      <author id="thierry-hamon"><first>Thierry</first><last>Hamon</last></author>
       <pages>270–279</pages>
       <abstract>Les modèles de langue prodonds encodent les propriétés linguistiques et sont utilisés comme entrée pour des modèles plus spécifiques. Utiliser leurs représentations de mots telles quelles pour des domaines peu dotés se révèle être moins efficace. De plus, ces modèles négligent souvent les informations globales sur le vocabulaire au profit d’une plus forte dépendance à l’attention. Nous considérons que ces informations influent sur les résultats des tâches en aval. Leur combinaison avec les représentations contextuelles est effectuée à l’aide de réseaux de neurones à base de graphes. Nous montrons que l’utilité de cette combinaison qui surpassent les performances de baselines.</abstract>
       <url hash="1ccb57f3">2022.jeptalnrecital-taln.26</url>
@@ -326,7 +326,7 @@
       <author><first>Agata</first><last>Savary</last></author>
       <author><first>Iskander</first><last>Keskes</last></author>
       <author><first>Jean</first><last>Yves Antoine</last></author>
-      <author><first>Lamia</first><last>Hadrich Belguith</last></author>
+      <author id="lamia-hadrich-belguith"><first>Lamia</first><last>Hadrich Belguith</last></author>
       <pages>280–286</pages>
       <abstract>Cet article décrit nos efforts pour étendre le projet PARSEME à l’arabe standard moderne. L’applicabilité du guide d’annotation de PARSEME a été testée en mesurant l’accord inter-annotateurs dès la première phase d’annotation. Un sous-ensemble de 1062 phrases du Prague Arabic Dependency Treebank (PADT) a été sélectionné et annoté indépendamment par deux locutrices natives arabes. Suite à leurs annotations, un nouveau corpus arabe avec plus de 1250 expressions polylexicales verbales (EPV) annotées a été construit.</abstract>
       <url hash="dba9360c">2022.jeptalnrecital-taln.27</url>
@@ -337,7 +337,7 @@
       <title><fixed-case>CLISTER</fixed-case> : Un corpus pour la similarité sémantique textuelle dans des cas cliniques en français (<fixed-case>CLISTER</fixed-case> : A Corpus for Semantic Textual Similarity in <fixed-case>F</fixed-case>rench Clinical Narratives)</title>
       <author><first>Nicolas</first><last>Hiebel</last></author>
       <author><first>Karën</first><last>Fort</last></author>
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <author><first>Olivier</first><last>Ferret</last></author>
       <pages>287–296</pages>
       <abstract>Le TAL repose sur la disponibilité de corpus annotés pour l’entraînement et l’évaluation de modèles. Il existe très peu de ressources pour la similarité sémantique dans le domaine clinique en français. Dans cette étude, nous proposons une définition de la similarité guidée par l’analyse clinique et l’appliquons au développement d’un nouveau corpus partagé de 1 000 paires de phrases annotées manuellement en scores de similarité. Nous évaluons ensuite le corpus par des expériences de mesure automatique de similarité. Nous montrons ainsi qu’un modèle de plongements de phrases peut capturer la similarité avec des performances à l’état de l’art sur le corpus DEFT STS (Spearman=0,8343). Nous montrons également que le contenu du corpus CLISTER est complémentaire de celui de DEFT STS.</abstract>
@@ -361,7 +361,7 @@
     </paper>
     <paper id="30">
       <title>Classification automatique de questions spontanées vs. préparées dans des transcriptions de l’oral (Automatic Classification of Spontaneous vs)</title>
-      <author><first>Iris</first><last>Eshkol-Taravella</last></author>
+      <author id="iris-eshkol"><first>Iris</first><last>Eshkol-Taravella</last></author>
       <author><first>Angèle</first><last>Barbedette</last></author>
       <author><first>Xingyu</first><last>Liu</last></author>
       <author><first>Valentin-Gabriel</first><last>Soumah</last></author>
@@ -408,7 +408,7 @@
     <paper id="34">
       <title>Fine-tuning de modèles de langues pour la veille épidémiologique multilingue avec peu de ressources (Fine-tuning Language Models for Low-resource Multilingual Epidemic Surveillance)</title>
       <author><first>Stephen</first><last>Mutuvi</last></author>
-      <author><first>Emanuela</first><last>Boros</last></author>
+      <author id="emanuela-boros"><first>Emanuela</first><last>Boros</last></author>
       <author><first>Antoine</first><last>Doucet</last></author>
       <author><first>Adam</first><last>Jatowt</last></author>
       <author><first>Gaël</first><last>Lejeune</last></author>
@@ -421,7 +421,7 @@
     </paper>
     <paper id="35">
       <title><fixed-case>F</fixed-case>rench <fixed-case>C</fixed-case>row<fixed-case>S</fixed-case>-Pairs: Extension à une langue autre que l’anglais d’un corpus de mesure des biais sociétaux dans les modèles de langue masqués (<fixed-case>F</fixed-case>rench <fixed-case>C</fixed-case>row<fixed-case>S</fixed-case>-Pairs : Extending a challenge dataset for measuring social bias in masked language models to a language other than <fixed-case>E</fixed-case>nglish)</title>
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <author><first>Yoann</first><last>Dupont</last></author>
       <author><first>Julien</first><last>Bezançon</last></author>
       <author><first>Karën</first><last>Fort</last></author>
@@ -445,8 +445,8 @@
     </paper>
     <paper id="37">
       <title>L’importance des entités pour la tâche de détection d’événements en tant que système de question-réponse (Exploring Entities in Event Detection as Question Answering)</title>
-      <author><first>Emanuela</first><last>Boros</last></author>
-      <author><first>Jose</first><last>Moreno</last></author>
+      <author id="emanuela-boros"><first>Emanuela</first><last>Boros</last></author>
+      <author id="jose-g-moreno"><first>Jose</first><last>Moreno</last></author>
       <author><first>Antoine</first><last>Doucet</last></author>
       <pages>374–383</pages>
       <abstract>Dans cet article, nous abordons un paradigme récent et peu étudié pour la tâche de détection d’événements en la présentant comme un problème de question-réponse avec possibilité de réponses multiples et le support d’entités. La tâche d’extraction des déclencheurs d’événements est ainsi transformée en une tâche d’identification des intervalles de réponse à partir d’un contexte, tout en se concentrant également sur les entités environnantes. L’architecture est basée sur un modèle de langage pré-entraîné et finement ajusté, où le contexte d’entrée est augmenté d’entités marquées à différents niveaux, de leurs positions, de leurs types et, enfin, de leurs rôles d’arguments. Nos expériences sur le corpus ACE 2005 démontrent que le modèle proposé exploite correctement les informations sur les entités dans le cadre de la détection des événements et qu’il constitue une solution viable pour cette tâche. De plus, nous démontrons que notre méthode, avec différents marqueurs d’entités, est particulièrement capable d’extraire des types d’événements non vus dans des contextes d’apprentissage en peu de coups.</abstract>
@@ -458,7 +458,7 @@
       <title>Les représentations distribuées sont-elles vraiment distribuées ? Observations sur la localisation de l’information syntaxique dans les tâches d’accord du verbe en français (How Distributed are Distributed Representations ? An Observation on the Locality of Syntactic)</title>
       <author><first>Bingzhi</first><last>Li</last></author>
       <author><first>Guillaume</first><last>Wisniewski</last></author>
-      <author><first>Benoît</first><last>Crabbé</last></author>
+      <author id="benoit-crabbe"><first>Benoît</first><last>Crabbé</last></author>
       <pages>384–391</pages>
       <abstract>Ce travail aborde la question de la localisation de l’information syntaxique qui est encodée dans les représentations de transformers. En considérant la tâche d’accord objet-participe passé en français, les résultats de nos sondes linguistiques montrent que les informations nécessaires pour accomplir la tâche sont encodées d’une manière locale dans les représentations de mots entre l’antécédent du pronom relatif objet et le participe passé cible. En plus, notre analyse causale montre que le modèle s’appuie essentiellement sur les éléments linguistiquement motivés (i.e. antécédent et pronom relatif) pour prédire le nombre du participe passé.</abstract>
       <url hash="8d0199b0">2022.jeptalnrecital-taln.38</url>
@@ -499,7 +499,7 @@
       <title>Tâches Auxiliaires Multilingues pour le Transfert de Modèles de Détection de Discours Haineux (Multilingual Auxiliary Tasks for Zero-Shot Cross-Lingual Transfer of Hate Speech Detection)</title>
       <author><first>Arij</first><last>Riabi</last></author>
       <author><first>Syrielle</first><last>Montariol</last></author>
-      <author><first>Djamé</first><last>Seddah</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
       <pages>413–423</pages>
       <abstract>La tâche de détection de contenus haineux est ardue, car elle nécessite des connaissances culturelles et contextuelles approfondies ; les connaissances nécessaires varient, entre autres, selon la langue du locateur ou la cible du contenu. Or, des données annotées pour des domaines et des langues spécifiques sont souvent absentes ou limitées. C’est là que les données dans d’autres langues peuvent être exploitées ; mais du fait de ces variations, le transfert cross-lingue est souvent difficile. Dans cet article, nous mettons en évidence cette limitation pour plusieurs domaines et langues et montrons l’impact positif de l’apprentissage de tâches auxiliaires multilingues - analyse de sentiments, reconnaissance des entités nommées et tâches reposant sur des informations morpho-syntaxiques - sur le transfert cross-lingue zéro-shot des modèles de détection de discours haineux, afin de combler ce fossé culturel.</abstract>
       <url hash="d1f7039b">2022.jeptalnrecital-taln.41</url>
@@ -508,7 +508,7 @@
     </paper>
     <paper id="42">
       <title>Tâches auxiliaires pour l’analyse biaffine en graphes de dépendances (Auxiliary tasks to boost Biaffine Semantic Dependency Parsing)</title>
-      <author><first>Marie</first><last>Candito</last></author>
+      <author id="marie-candito"><first>Marie</first><last>Candito</last></author>
       <pages>424–433</pages>
       <abstract>L’analyseur biaffine de Dozat &amp; Manning (2017), qui produit des arbres de dépendances syntaxiques, a été étendu avec succès aux graphes de dépendances syntaxico-sémantiques (Dozat &amp; Manning, 2018). Ses performances sur les graphes sont étonnamment hautes étant donné que, sans la contrainte de devoir produire un arbre, les arcs pour une phrase donnée sont prédits indépendamment les uns des autres. Pour y remédier partiellement, tout en conservant la complexité O(n2 ) et l’architecture hautement parallélisable, nous proposons d’utiliser des tâches auxiliaires qui introduisent une forme d’interdépendance entre les arcs. Les expérimentations sur les trois jeux de données anglaises de la tâche 18 SemEval-2015 (Oepen et al., 2015), et sur des graphes syntaxiques profonds en français (Ribeyre et al., 2014) montrent une amélioration modeste mais systématique, par rapport à un système de base performant, utilisant un modèle de langue pré-entraîné. Notre méthode s’avère ainsi un moyen simple et robuste d’améliorer l’analyse vers graphes de dépendances.</abstract>
       <url hash="d2385731">2022.jeptalnrecital-taln.42</url>
@@ -522,7 +522,7 @@
       <author><first>Camille</first><last>Guinaudeau</last></author>
       <author><first>Hervé</first><last>Le Borgne</last></author>
       <author><first>Romaric</first><last>Besançon</last></author>
-      <author><first>Jose</first><last>Moreno</last></author>
+      <author id="jose-g-moreno"><first>Jose</first><last>Moreno</last></author>
       <author><first>Jesús</first><last>Lovón-Melgarejo</last></author>
       <pages>434–444</pages>
       <abstract>Dans le contexte général des traitements multimodaux, nous nous intéressons à la tâche de réponse à des questions visuelles à propos d’entités nommées en utilisant des bases de connaissances (KVQAE). Nous mettons à disposition ViQuAE, un nouveau jeu de données de 3 700 questions associées à des images, annoté à l’aide d’une méthode semi-automatique. C’est le premier jeu de données de KVQAE comprenant des types d’entités variés associé à une base de connaissances composée d’1,5 million d’articles Wikipédia, incluant textes et images. Nous proposons également un modèle de référence de KVQAE en deux étapes : recherche d’information puis extraction des réponses. Les résultats de nos expériences démontrent empiriquement la difficulté de la tâche et ouvrent la voie à une meilleure représentation multimodale des entités nommées.</abstract>
@@ -559,8 +559,8 @@
       <title>Quand être absent de m<fixed-case>BERT</fixed-case> n’est que le commencement : Gérer de nouvelles langues à l’aide de modèles de langues multilingues (When Being Unseen from m<fixed-case>BERT</fixed-case> is just the Beginning : Handling New Languages With Multilingual Language Models)</title>
       <author><first>Benjamin</first><last>Muller</last></author>
       <author><first>Antonios</first><last>Anastasopoulos</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
-      <author><first>Djamé</first><last>Seddah</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
       <pages>450–451</pages>
       <abstract>L’apprentissage par transfert basé sur le pré-entraînement de modèles de langue sur une grande quantité de données brutes est devenu la norme pour obtenir des performances état de l’art en TAL. Cependant, la façon dont cette approche devrait être appliquée pour des langues inconnues, qui ne sont couvertes par aucun modèle de langue multilingue à grande échelle et pour lesquelles seule une petite quantité de données brutes est le plus souvent disponible, n’est pas claire. Dans ce travail, en comparant des modèles multilingues et monolingues, nous montrons que de tels modèles se comportent de multiples façons sur des langues inconnues. Certaines langues bénéficient grandement de l’apprentissage par transfert et se comportent de manière similaire à des langues proches riches en ressource, alors que ce n’est manifestement pas le cas pour d’autres. En nous concentrant sur ces dernières, nous montrons dans ce travail que cet échec du transfert est largement lié à l’impact du script que ces langues utilisent. Nous montrons que la translittération de ces langues améliore considérablement le potentiel des larges modèles de langue neuronaux multilingues pour des tâches en aval. Ce résultat indique une piste prometteuse pour rendre ces modèles massivement multilingues utiles pour de nouveaux ensembles de langues absentes des données d’entraînement.</abstract>
       <url hash="7fc5a439">2022.jeptalnrecital-taln.46</url>
@@ -746,7 +746,7 @@
       <author><first>Ha</first><last>Quang Le</last></author>
       <author><first>Anne</first><last>Vilnat</last></author>
       <author><first>Gabriel</first><last>Illouz</last></author>
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <pages>15–17</pages>
       <abstract>Dans cette démonstration, nous présenterons les travaux en cours pour l’annotation d’un nouveau corpus de questions-réponses en langue Française. Contrairement aux corpus existant comme “FQuad” ou “Piaf”, nous nous intéressons à l’annotation de questions-réponses “non factuelles”. En effet, si dans la littérature, de nombreux corpus et modèles de questions-réponses pré-entraînés sont disponibles, ceux-ci ne privilégient que rarement les annotations s’appuyant sur un schéma de raisonnement issue de l’agrégation de différentes sources ou contextes. L’objectif du projet associé est de parvenir à la création d’un assistant virtuel pour l’éducation, ainsi des réponses explicatives, de raisonnement et/ou d’agrégation de l’information sont à privilégier. Notons enfin, que la volumétrie des données doit être conséquente, en particulier par la considération d’approches neuronales génératives ou extractives. Actuellement, nous disposons de 262 questions et réponses obtenues durant l’étape de validation de la campagne d’annotation. Une deuxième phase d’annotation avec une volumétrie plus importante débutera fin mai 2022 (environ 8000 questions).</abstract>
       <url hash="ddfbda5d">2022.jeptalnrecital-demo.5</url>
@@ -895,7 +895,7 @@
     <paper id="5">
       <title>Reconnaissance automatique des appellations d’œuvres visuelles antiques (Recognition of classical visual works appellations)</title>
       <author><first>Aurore</first><last>Lessieux</last></author>
-      <author><first>Iris</first><last>Eshkol-Taravella</last></author>
+      <author id="iris-eshkol"><first>Iris</first><last>Eshkol-Taravella</last></author>
       <author><first>Anne-Violaine</first><last>Szabados</last></author>
       <author><first>Marlène</first><last>Nazarian</last></author>
       <pages>36–44</pages>
@@ -941,7 +941,7 @@
     <paper id="9">
       <title>Simulation d’erreurs d’<fixed-case>OCR</fixed-case> dans les systèmes de <fixed-case>TAL</fixed-case> pour le traitement de données anachroniques (Simulation of <fixed-case>OCR</fixed-case> errors in <fixed-case>NLP</fixed-case> systems for processing anachronistic data)</title>
       <author><first>Baptiste</first><last>Blouin</last></author>
-      <author><first>Benoit</first><last>Favre</last></author>
+      <author id="benoit-favre"><first>Benoit</first><last>Favre</last></author>
       <author><first>Jeremy</first><last>Auguste</last></author>
       <pages>78–87</pages>
       <abstract>L’extraction d’information offre de nouvelles perspectives au sein des recherches historiques. Cependant, la majorité des recherches liées à ce domaine s’effectue sur des données contemporaines. Malgré l’évolution constante des systèmes d’OCR, les textes historiques résultant de ce procédé contiennent toujours de multiples erreurs. Du fait d’un manque de ressources historiques dédiées au TAL, le traitement de ce domaine reste dépendant de l’utilisation de ressources contemporaines. De nombreuses études ont démontré l’impact négatif que pouvaient avoir les erreurs d’OCR sur les systèmes prêts à l’emploi contemporains. Mais l’évaluation des nouvelles architectures, proposant des résultats prometteurs sur des données récentes, face à ce problème reste encore très minime. Dans cette étude, nous quantifions l’impact des erreurs d’OCR sur trois tâches d’extraction d’information en utilisant plusieurs architectures de type Transformers. Au vu de ces résultats, nous proposons une approche permettant de réduire de plus de 50% cet impact sans avoir recours à des ressources historiques spécialisées.</abstract>
diff --git a/data/xml/2022.jlcl.xml b/data/xml/2022.jlcl.xml
index a38956db9d..b29708a463 100644
--- a/data/xml/2022.jlcl.xml
+++ b/data/xml/2022.jlcl.xml
@@ -34,7 +34,7 @@
       <editor><first>Ines</first><last>Rehbein</last></editor>
       <editor><first>Gabriella</first><last>Lapesa</last></editor>
       <editor><first>Goran</first><last>Glavaš</last></editor>
-      <editor><first>Simone Paolo</first><last>Ponzetto</last></editor>
+      <editor id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></editor>
       <publisher>German Society for Computational Lingustics and Language Technology</publisher>
       <address>Germany</address>
       <month>Jul.</month>
diff --git a/data/xml/2022.konvens.xml b/data/xml/2022.konvens.xml
index 1963413ea4..26982948e5 100644
--- a/data/xml/2022.konvens.xml
+++ b/data/xml/2022.konvens.xml
@@ -40,7 +40,7 @@
     </paper>
     <paper id="3">
       <title>Lemma Hunting: Automatic Spelling Normalization for <fixed-case>G</fixed-case>erman <fixed-case>CMC</fixed-case> Corpora</title>
-      <author><first>Eckhard</first><last>Bick</last></author>
+      <author id="eckhard-bick"><first>Eckhard</first><last>Bick</last></author>
       <pages>16–20</pages>
       <url hash="4c0f3c82">2022.konvens-1.3</url>
       <bibkey>bick-2022-lemma</bibkey>
@@ -66,8 +66,8 @@
     <paper id="6">
       <title>Adapting <fixed-case>G</fixed-case>erma<fixed-case>N</fixed-case>et for the Semantic Web</title>
       <author><first>Claus</first><last>Zinn</last></author>
-      <author><first>Marie</first><last>Hinrichs</last></author>
-      <author><first>Erhard</first><last>Hinrichs</last></author>
+      <author id="marie-hinrichs"><first>Marie</first><last>Hinrichs</last></author>
+      <author id="erhard-hinrichs"><first>Erhard</first><last>Hinrichs</last></author>
       <pages>41–47</pages>
       <url hash="132b5ae4">2022.konvens-1.6</url>
       <bibkey>zinn-etal-2022-adapting</bibkey>
@@ -87,7 +87,7 @@
       <title>Measuring Faithfulness of Abstractive Summaries</title>
       <author><first>Tim</first><last>Fischer</last></author>
       <author><first>Steffen</first><last>Remus</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>63–73</pages>
       <url hash="ed01389a">2022.konvens-1.8</url>
       <bibkey>fischer-etal-2022-measuring</bibkey>
@@ -107,7 +107,7 @@
     <paper id="10">
       <title>Do gender neutral affixes naturally reduce gender bias in static word embeddings?</title>
       <author><first>Jonas</first><last>Wagner</last></author>
-      <author><first>Sina</first><last>Zarrieß</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
       <pages>88–97</pages>
       <url hash="39b14327">2022.konvens-1.10</url>
       <bibkey>wagner-zarriess-2022-gender</bibkey>
@@ -116,7 +116,7 @@
       <title>Improved Open Source Automatic Subtitling for Lecture Videos</title>
       <author><first>Robert</first><last>Geislinger</last></author>
       <author><first>Benjamin</first><last>Milde</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>98–103</pages>
       <url hash="355a7fc1">2022.konvens-1.11</url>
       <bibkey>geislinger-etal-2022-improved</bibkey>
@@ -132,7 +132,7 @@
       <title>Improved Opinion Role Labelling in Parliamentary Debates</title>
       <author><first>Laura</first><last>Bamberg</last></author>
       <author><first>Ines</first><last>Rehbein</last></author>
-      <author><first>Simone</first><last>Ponzetto</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone</first><last>Ponzetto</last></author>
       <pages>110–120</pages>
       <url hash="619f3dce">2022.konvens-1.13</url>
       <bibkey>bamberg-etal-2022-improved</bibkey>
@@ -147,7 +147,7 @@
     </paper>
     <paper id="15">
       <title>This isn’t the bias you’re looking for: Implicit causality, names and gender in <fixed-case>G</fixed-case>erman language models</title>
-      <author><first>Sina</first><last>Zarrieß</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
       <author><first>Hannes</first><last>Groener</last></author>
       <author><first>Torgrim</first><last>Solstad</last></author>
       <author><first>Oliver</first><last>Bott</last></author>
@@ -167,7 +167,7 @@
     <paper id="17">
       <title>Semantic Role Labeling for Sentiment Inference: A Case Study</title>
       <author><first>Manfred</first><last>Klenner</last></author>
-      <author><first>Anne</first><last>Göhring</last></author>
+      <author id="anne-gohring"><first>Anne</first><last>Göhring</last></author>
       <pages>144–149</pages>
       <url hash="e1a2dad8">2022.konvens-1.17</url>
       <bibkey>klenner-gohring-2022-semantic</bibkey>
@@ -187,7 +187,7 @@
       <author><first>Saba</first><last>Anwar</last></author>
       <author><first>Fynn</first><last>Petersen-Frey</last></author>
       <author><first>Seid</first><last>Muhie Yimam</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>156–166</pages>
       <url hash="2dc5a739">2022.konvens-1.19</url>
       <bibkey>remus-etal-2022-like</bibkey>
diff --git a/data/xml/2022.latechclfl.xml b/data/xml/2022.latechclfl.xml
index dfd11ed1ec..bb7b394238 100644
--- a/data/xml/2022.latechclfl.xml
+++ b/data/xml/2022.latechclfl.xml
@@ -6,7 +6,7 @@
       <editor><first>Stefania</first><last>Degaetano</last></editor>
       <editor><first>Anna</first><last>Kazantseva</last></editor>
       <editor><first>Nils</first><last>Reiter</last></editor>
-      <editor><first>Stan</first><last>Szpakowicz</last></editor>
+      <editor id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></editor>
       <publisher>International Conference on Computational Linguistics</publisher>
       <address>Gyeongju, Republic of Korea</address>
       <month>October</month>
@@ -105,7 +105,7 @@
     </paper>
     <paper id="9">
       <title>The Distribution of Deontic Modals in Jane Austen’s Mature Novels</title>
-      <author><first>Lauren</first><last>Levine</last></author>
+      <author id="lauren-levine"><first>Lauren</first><last>Levine</last></author>
       <pages>70–74</pages>
       <abstract>Deontic modals are auxiliary verbs which express some kind of necessity, obligation, or moral recommendation. This paper investigates the collocation and distribution within Jane Austen’s six mature novels of the following deontic modals: must, should, ought, and need. We also examine the co-occurrences of these modals with name mentions of the heroines in the six novels, categorizing each occurrence with a category of obligation if applicable. The paper offers a brief explanation of the categories of obligation chosen for this investigation. In order to examine the types of obligations associated with each heroine, we then investigate the distribution of these categories in relation to mentions of each heroine. The patterns observed show a general concurrence with the thematic characterizations of Austen’s heroines which are found in literary analysis.</abstract>
       <url hash="4298e127">2022.latechclfl-1.9</url>
@@ -167,7 +167,7 @@
     <paper id="15">
       <title>Measuring Presence of Women and Men as Information Sources in News</title>
       <author><first>Muitze</first><last>Zulaika</last></author>
-      <author><first>Xabier</first><last>Saralegi</last></author>
+      <author id="xabier-saralegi"><first>Xabier</first><last>Saralegi</last></author>
       <author><first>Iñaki</first><last>San Vicente</last></author>
       <pages>126–134</pages>
       <abstract>In the news, statements from information sources are often quoted, made by individuals who interact in the news. Detecting those quotes and the gender of their sources is a key task when it comes to media analysis from a gender perspective. It is a challenging task: the structure of the quotes is variable, gender marks are not present in many languages, and quote authors are often omitted due to frequent use of coreferences. This paper proposes a strategy to measure the presence of women and men as information sources in news. We approach the problem of detecting sentences including quotes and the gender of the speaker as a joint task, by means of a supervised multiclass classifier of sentences. We have created the first datasets for Spanish and Basque by manually annotating quotes and the gender of the associated sources in news items. The results obtained show that BERT based approaches are significantly better than bag-of-words based classical ones, achieving accuracies close to 90%. We also analyse a bilingual learning strategy and generating additional training examples synthetically; both provide improvements up to 3.4% and 5.6%, respectively.</abstract>
diff --git a/data/xml/2022.lateraisse.xml b/data/xml/2022.lateraisse.xml
index 14a910edf2..5239774ec8 100644
--- a/data/xml/2022.lateraisse.xml
+++ b/data/xml/2022.lateraisse.xml
@@ -32,7 +32,7 @@
     <paper id="2">
       <title>Objectifying Women? A Syntactic Bias in <fixed-case>F</fixed-case>rench and <fixed-case>E</fixed-case>nglish Corpora.</title>
       <author><first>Yanis</first><last>da Cunha</last></author>
-      <author><first>Anne</first><last>Abeillé</last></author>
+      <author id="anne-abeille"><first>Anne</first><last>Abeillé</last></author>
       <pages>8–16</pages>
       <abstract>Gender biases in syntax have been documented for languages with grammatical gender for cases where mixed-gender coordination structures take masculine agreement, or with male-first preference in the ordering of pairs (Adam and Eve). On the basis of various annotated corpora spanning different genres (fiction, newspapers, speech and web), we show another syntactic gender bias: masculine pronouns are more often subjects than feminine pronouns, in both English and French. We find the same bias towards masculine subjects for French human nouns, which then refer to males and females. Comparing the subject of passive verbs and the object of active verbs, we show that this syntactic function bias is not reducible to a bias in semantic role assignment since it is also found with non-agentive subjects. For French fiction, we also found that the masculine syntactic function bias is larger in text written by male authors – female authors seem to be unbiased. We finally discuss two principles as possible explanations, ‘Like Me’ and ‘Easy first’, and examine the effect of the discourse tendency for men being agents and topics. We conclude by addressing the impact of such biases in language technologies.</abstract>
       <url hash="4adecc86">2022.lateraisse-1.2</url>
@@ -63,7 +63,7 @@
       <title>Identifying Hate Speech Using Neural Networks and Discourse Analysis Techniques</title>
       <author><first>Zehra Melce</first><last>Hüsünbeyi</last></author>
       <author><first>Didar</first><last>Akar</last></author>
-      <author><first>Arzucan</first><last>Özgür</last></author>
+      <author id="arzucan-ozgur"><first>Arzucan</first><last>Özgür</last></author>
       <pages>32–41</pages>
       <abstract>Discriminatory language, in particular hate speech, is a global problem posing a grave threat to democracy and human rights. Yet, it is not always easy to identify, as it is rarely explicit. In order to detect hate speech, we developed Hierarchical Attention Network (HAN) based and Bidirectional Encoder Representations from Transformer (BERT) based deep learning models to capture the changing discursive cues and understand the context around the discourse. In addition, we designed linguistic features using critical discourse analysis techniques and integrated them into these neural network models. We studied the compatibility of our model with the hate speech detection problem by comparing it with traditional machine learning models, as well as a Convolution Neural Network (CNN) based model, a Convolutional Neural Network-Gated Recurrent Unit (CNN-GRU) based model which reached significant performance results for hate speech detection. Our results on a manually annotated corpus of print media in Turkish show that the proposed approach is effective for hate speech detection. We believe that the feature sets created for the Turkish language will encourage new studies in the quantitative analysis of hate speech.</abstract>
       <url hash="63d4e5a3">2022.lateraisse-1.5</url>
diff --git a/data/xml/2022.law.xml b/data/xml/2022.law.xml
index bc5a0342ae..0a42bd16a0 100644
--- a/data/xml/2022.law.xml
+++ b/data/xml/2022.law.xml
@@ -3,8 +3,8 @@
   <volume id="1" ingest-date="2022-09-23" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 16th Linguistic Annotation Workshop (LAW-XVI) within LREC2022</booktitle>
-      <editor><first>Sameer</first><last>Pradhan</last></editor>
-      <editor><first>Sandra</first><last>Kuebler</last></editor>
+      <editor id="sameer-pradhan"><first>Sameer</first><last>Pradhan</last></editor>
+      <editor id="sandra-kubler"><first>Sandra</first><last>Kuebler</last></editor>
       <publisher>European Language Resources Association</publisher>
       <address>Marseille, France</address>
       <month>June</month>
@@ -46,7 +46,7 @@
       <title>Converting the <fixed-case>S</fixed-case>inica <fixed-case>T</fixed-case>reebank of <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese to <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies</title>
       <author><first>Yu-Ming</first><last>Hsieh</last></author>
       <author><first>Yueh-Yin</first><last>Shih</last></author>
-      <author><first>Wei-Yun</first><last>Ma</last></author>
+      <author id="wei-yun-ma"><first>Wei-Yun</first><last>Ma</last></author>
       <pages>23–30</pages>
       <abstract>This paper describes the conversion of the Sinica Treebank, one of the major Mandarin Chinese treebanks, to Universal Dependencies. The conversion is rule-based and the process involves POS tag mapping, head adjusting in line with the UD scheme and the dependency conversion. Linguistic insights into Mandarin Chinese alongwith the conversion are also discussed. The resulting corpus is the UD Chinese Sinica Treebank which contains more than fifty thousand tree structures according to the UD scheme. The dataset can be downloaded at <url>https://github.com/ckiplab/ud</url>.</abstract>
       <url hash="fa8cea66">2022.law-1.4</url>
@@ -64,7 +64,7 @@
       <title>The Sensitivity of Annotator Bias to Task Definitions in Argument Mining</title>
       <author><first>Terne Sasha</first><last>Thorn Jakobsen</last></author>
       <author><first>Maria</first><last>Barrett</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <author><first>David</first><last>Lassen</last></author>
       <pages>44–61</pages>
       <abstract>NLP models are dependent on the data they are trained on, including how this data is annotated. NLP research increasingly examines the social biases of models, but often in the light of their training data and specific social biases that can be identified in the text itself. In this paper, we present an annotation experiment that is the first to examine the extent to which social bias is sensitive to how data is annotated. We do so by collecting annotations of arguments in the same documents following four different guidelines and from four different demographic annotator backgrounds. We show that annotations exhibit widely different levels of group disparity depending on which guidelines annotators follow. The differences are not explained by task complexity, but rather by characteristics of these demographic groups, as previously identified by sociological studies. We release a dataset that is small in the number of instances but large in the number of annotations with demographic information, and our results encourage an increased awareness of annotator bias.</abstract>
@@ -84,7 +84,7 @@
     </paper>
     <paper id="8">
       <title>Advantages of a Complex Multilayer Annotation Scheme: The Case of the <fixed-case>P</fixed-case>rague Dependency Treebank</title>
-      <author><first>Eva</first><last>Hajicova</last></author>
+      <author id="eva-hajicova"><first>Eva</first><last>Hajicova</last></author>
       <author><first>Marie</first><last>Mikulová</last></author>
       <author><first>Barbora</first><last>Štěpánková</last></author>
       <author><first>Jiří</first><last>Mírovský</last></author>
@@ -109,7 +109,7 @@
       <author><first>Kevin</first><last>Deturck</last></author>
       <author><first>Damien</first><last>Nouvel</last></author>
       <author><first>Namrata</first><last>Patel</last></author>
-      <author><first>Frédérique</first><last>Segond</last></author>
+      <author id="frederique-segond"><first>Frédérique</first><last>Segond</last></author>
       <pages>85–90</pages>
       <abstract>To develop an influencer detection system, we designed an influence model based on the analysis of conversations in the “Change My View” debate forum. This led us to identify enunciative features (argumentation, emotion expression, view change, ...) related to influence between participants. In this paper, we present the annotation campaign we conducted to build up a reference corpus on these enunciative features. The annotation task was to identify in social media posts the text segments that corresponded to each enunciative feature. The posts to be annotated were extracted from two social media: the “Change My View” debate forum, with discussions on various topics, and Twitter, with posts from users identified as supporters of ISIS (Islamic State of Iraq and Syria). Over a thousand posts have been double or triple annotated throughout five annotation sessions gathering a total of 27 annotators. Some of the sessions involved the same annotators, which allowed us to analyse the evolution of their annotation work. Most of the sessions resulted in a reconciliation phase between the annotators, allowing for discussion and iterative improvement of the guidelines. We measured and analysed inter-annotator agreements over the course of the sessions, which allowed us to validate our iterative approach.</abstract>
       <url hash="883667e4">2022.law-1.10</url>
@@ -119,7 +119,7 @@
       <title>Charon: A <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et Annotation Tool for Multimodal Corpora</title>
       <author><first>Frederico</first><last>Belcavello</last></author>
       <author><first>Marcelo</first><last>Viridiano</last></author>
-      <author><first>Ely</first><last>Matos</last></author>
+      <author id="ely-edison-da-silva-matos"><first>Ely</first><last>Matos</last></author>
       <author><first>Tiago</first><last>Timponi Torrent</last></author>
       <pages>91–96</pages>
       <abstract>This paper presents Charon, a web tool for annotating multimodal corpora with FrameNet categories. Annotation can be made for corpora containing both static images and video sequences paired – or not – with text sequences. The pipeline features, besides the annotation interface, corpus import and pre-processing tools.</abstract>
@@ -140,7 +140,7 @@
     <paper id="13">
       <title><fixed-case>M</fixed-case>idas Loop: A Prioritized Human-in-the-Loop Annotation for Large Scale Multilayer Data</title>
       <author><first>Luke</first><last>Gessler</last></author>
-      <author><first>Lauren</first><last>Levine</last></author>
+      <author id="lauren-levine"><first>Lauren</first><last>Levine</last></author>
       <author><first>Amir</first><last>Zeldes</last></author>
       <pages>103–110</pages>
       <abstract>Large scale annotation of rich multilayer corpus data is expensive and time consuming, motivating approaches that integrate high quality automatic tools with active learning in order to prioritize human labeling of hard cases. A related challenge in such scenarios is the concurrent management of automatically annotated data and human annotated data, particularly where different subsets of the data have been corrected for different types of annotation and with different levels of confidence. In this paper we present [REDACTED], a collaborative, version-controlled online annotation environment for multilayer corpus data which includes integrated provenance and confidence metadata for each piece of information at the document, sentence, token and annotation level. We present a case study on improving annotation quality in an existing multilayer parse bank of English called AMALGUM, focusing on active learning in corpus preprocessing, at the surprisingly challenging level of sentence segmentation. Our results show improvements to state-of-the-art sentence segmentation and a promising workflow for getting “silver” data to approach gold standard quality.</abstract>
@@ -163,7 +163,7 @@
     </paper>
     <paper id="15">
       <title>Putting Context in <fixed-case>SNACS</fixed-case>: A 5-Way Classification of Adpositional Pragmatic Markers</title>
-      <author><first>Yang Janet</first><last>Liu</last></author>
+      <author id="yang-janet-liu"><first>Yang Janet</first><last>Liu</last></author>
       <author><first>Jena D.</first><last>Hwang</last></author>
       <author><first>Nathan</first><last>Schneider</last></author>
       <author><first>Vivek</first><last>Srikumar</last></author>
@@ -175,7 +175,7 @@
     <paper id="16">
       <title>Building a Biomedical Full-Text Part-of-Speech Corpus Semi-Automatically</title>
       <author><first>Nicholas</first><last>Elder</last></author>
-      <author><first>Robert E.</first><last>Mercer</last></author>
+      <author id="robert-e-mercer"><first>Robert E.</first><last>Mercer</last></author>
       <author><first>Sudipta</first><last>Singha Roy</last></author>
       <pages>129–138</pages>
       <abstract>This paper presents a method for semi-automatically building a corpus of full-text English-language biomedical articles annotated with part-of-speech tags. The outcomes are a semi-automatic procedure to create a large silver standard corpus of 5 million sentences drawn from a large corpus of full-text biomedical articles annotated for part-of-speech, and a robust, easy-to-use software tool that assists the investigation of differences in two tagged datasets. The method to build the corpus uses two part-of-speech taggers designed to tag biomedical abstracts followed by a human dispute settlement when the two taggers differ on the tagging of a token. The dispute resolution aspect is facilitated by the software tool which organizes and presents the disputed tags. The corpus and all of the software that has been implemented for this study are made publicly available.</abstract>
@@ -198,7 +198,7 @@
       <title>A Cognitive Approach to Annotating Causal Constructions in a Cross-Genre Corpus</title>
       <author><first>Angela</first><last>Cao</last></author>
       <author><first>Gregor</first><last>Williamson</last></author>
-      <author><first>Jinho D.</first><last>Choi</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
       <pages>151–159</pages>
       <abstract>We present a scheme for annotating causal language in various genres of text. Our annotation scheme is built on the popular categories of cause, enable, and prevent. These vague categories have many edge cases in natural language, and as such can prove difficult for annotators to consistently identify in practice. We introduce a decision based annotation method for handling these edge cases. We demonstrate that, by utilizing this method, annotators are able to achieve inter-annotator agreement which is comparable to that of previous studies. Furthermore, our method performs equally well across genres, highlighting the robustness of our annotation scheme. Finally, we observe notable variation in usage and frequency of causal language across different genres.</abstract>
       <url hash="ade3a81d">2022.law-1.18</url>
@@ -208,7 +208,7 @@
       <title>Automatic Enrichment of <fixed-case>A</fixed-case>bstract <fixed-case>M</fixed-case>eaning <fixed-case>R</fixed-case>epresentations</title>
       <author><first>Yuxin</first><last>Ji</last></author>
       <author><first>Gregor</first><last>Williamson</last></author>
-      <author><first>Jinho D.</first><last>Choi</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
       <pages>160–169</pages>
       <abstract>Abstract Meaning Representation (AMR) is a semantic graph framework which inadequately represent a number of important semantic features including number, (in)definiteness, quantifiers, and intensional contexts. Several proposals have been made to improve the representational adequacy of AMR by enriching its graph structure. However, these modifications are rarely added to existing AMR corpora due to the labor costs associated with manual annotation. In this paper, we develop an automated annotation tool which algorithmically enriches AMR graphs to better represent number, (in)definite articles, quantificational determiners, and intensional arguments. We compare our automatically produced annotations to gold-standard manual annotations and show that our automatic annotator achieves impressive results. All code for this paper, including our automatic annotation tool, is made publicly available.</abstract>
       <url hash="82862689">2022.law-1.19</url>
@@ -217,7 +217,7 @@
     <paper id="20">
       <title><fixed-case>GRAIL</fixed-case>—<fixed-case>G</fixed-case>eneralized Representation and Aggregation of Information Layers</title>
       <author><first>Sameer</first><last>Pradhan</last></author>
-      <author><first>Mark</first><last>Liberman</last></author>
+      <author id="mark-liberman"><first>Mark</first><last>Liberman</last></author>
       <pages>170–181</pages>
       <abstract>This paper identifies novel characteristics necessary to successfully represent multiple streams of natural language information from speech and text simultaneously, and proposes a multi-tiered system that implements these characteristics centered around a declarative configuration. The system facilitates easy incremental extension by allowing the creation of composable workflows of loosely coupled extensions, or plugins, allowing simple intial systems to be extended to accomodate rich representations while maintaining high data integrity. Key to this is leveraging established tools and technologies. We demonstrate using a small example.</abstract>
       <url hash="e5f69c30">2022.law-1.20</url>
diff --git a/data/xml/2022.lchange.xml b/data/xml/2022.lchange.xml
index 52d503c12a..ed7f48dbfb 100644
--- a/data/xml/2022.lchange.xml
+++ b/data/xml/2022.lchange.xml
@@ -28,9 +28,9 @@
       <author><first>Marieke</first><last>Van Erp</last></author>
       <author><first>Inger</first><last>Leemans</last></author>
       <author><first>Pasquale</first><last>Lisena</last></author>
-      <author><first>Raphael</first><last>Troncy</last></author>
+      <author id="raphael-troncy"><first>Raphael</first><last>Troncy</last></author>
       <author><first>William</first><last>Tullett</last></author>
-      <author><first>Ali</first><last>Hürriyetoğlu</last></author>
+      <author id="ali-hurriyetoglu"><first>Ali</first><last>Hürriyetoğlu</last></author>
       <author><first>Ger</first><last>Dijkstra</last></author>
       <author><first>Femke</first><last>Gordijn</last></author>
       <author><first>Elias</first><last>Jürgens</last></author>
@@ -234,7 +234,7 @@
     <paper id="17">
       <title><fixed-case>BOS</fixed-case> at <fixed-case>LSCD</fixed-case>iscovery: Lexical Substitution for Interpretable Lexical Semantic Change Detection</title>
       <author><first>Artem</first><last>Kudisov</last></author>
-      <author><first>Nikolay</first><last>Arefyev</last></author>
+      <author id="nikolay-arefyev"><first>Nikolay</first><last>Arefyev</last></author>
       <pages>165-172</pages>
       <abstract>We propose a solution for the LSCDiscovery shared task on Lexical Semantic Change Detection in Spanish. Our approach is based on generating lexical substitutes that describe old and new senses of a given word. This approach achieves the second best result in sense loss and sense gain detection subtasks. By observing those substitutes that are specific for only one time period, one can understand which senses were obtained or lost. This allows providing more detailed information about semantic change to the user and makes our method interpretable.</abstract>
       <url hash="b76ade55">2022.lchange-1.17</url>
@@ -245,7 +245,7 @@
     <paper id="18">
       <title><fixed-case>D</fixed-case>eep<fixed-case>M</fixed-case>istake at <fixed-case>LSCD</fixed-case>iscovery: Can a Multilingual Word-in-Context Model Replace Human Annotators?</title>
       <author><first>Daniil</first><last>Homskiy</last></author>
-      <author><first>Nikolay</first><last>Arefyev</last></author>
+      <author id="nikolay-arefyev"><first>Nikolay</first><last>Arefyev</last></author>
       <pages>173-179</pages>
       <abstract>In this paper we describe our solution of the LSCDiscovery shared task on Lexical Semantic Change Discovery (LSCD) in Spanish. Our solution employs a Word-in-Context (WiC) model, which is trained to determine if a particular word has the same meaning in two given contexts. We basically try to replicate the annotation of the dataset for the shared task, but replacing human annotators with a neural network. In the graded change discovery subtask, our solution has achieved the 2nd best result according to all metrics. In the main binary change detection subtask, our F1-score is 0.655 compared to 0.716 of the best submission, corresponding to the 5th place. However, in the optional sense gain detection subtask we have outperformed all other participants. During the post-evaluation experiments we compared different ways to prepare WiC data in Spanish for fine-tuning. We have found that it helps leaving only examples annotated as 1 (unrelated senses) and 4 (identical senses) rather than using 2x more examples including intermediate annotations.</abstract>
       <url hash="a81f99f9">2022.lchange-1.18</url>
@@ -295,7 +295,7 @@
     <paper id="22">
       <title><fixed-case>G</fixed-case>loss<fixed-case>R</fixed-case>eader at <fixed-case>LSCD</fixed-case>iscovery: Train to Select a Proper Gloss in <fixed-case>E</fixed-case>nglish – Discover Lexical Semantic Change in <fixed-case>S</fixed-case>panish</title>
       <author><first>Maxim</first><last>Rachinskiy</last></author>
-      <author><first>Nikolay</first><last>Arefyev</last></author>
+      <author id="nikolay-arefyev"><first>Nikolay</first><last>Arefyev</last></author>
       <pages>198-203</pages>
       <abstract>The contextualized embeddings obtained from neural networks pre-trained as Language Models (LM) or Masked Language Models (MLM) are not well suitable for solving the Lexical Semantic Change Detection (LSCD) task because they are more sensitive to changes in word forms rather than word meaning, a property previously known as the word form bias or orthographic bias. Unlike many other NLP tasks, it is also not obvious how to fine-tune such models for LSCD. In order to conclude if there are any differences between senses of a particular word in two corpora, a human annotator or a system shall analyze many examples containing this word from both corpora. This makes annotation of LSCD datasets very labour-consuming. The existing LSCD datasets contain up to 100 words that are labeled according to their semantic change, which is hardly enough for fine-tuning. To solve these problems we fine-tune the XLM-R MLM as part of a gloss-based WSD system on a large WSD dataset in English. Then we employ zero-shot cross-lingual transferability of XLM-R to build the contextualized embeddings for examples in Spanish. In order to obtain the graded change score for each word, we calculate the average distance between our improved contextualized embeddings of its old and new occurrences. For the binary change detection subtask, we apply thresholding to the same scores. Our solution has shown the best results among all other participants in all subtasks except for the optional sense gain detection subtask.</abstract>
       <url hash="ff0c8fb2">2022.lchange-1.22</url>
diff --git a/data/xml/2022.ldl.xml b/data/xml/2022.ldl.xml
index fbdf779af0..2ac285f771 100644
--- a/data/xml/2022.ldl.xml
+++ b/data/xml/2022.ldl.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the 8th Workshop on Linked Data in Linguistics within the 13th Language Resources and Evaluation Conference</booktitle>
       <editor><first>Thierry</first><last>Declerck</last></editor>
-      <editor><first>John P.</first><last>McCrae</last></editor>
+      <editor id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></editor>
       <editor><first>Elena</first><last>Montiel</last></editor>
       <editor><first>Christian</first><last>Chiarcos</last></editor>
       <editor><first>Maxim</first><last>Ionov</last></editor>
@@ -30,7 +30,7 @@
     <paper id="2">
       <title>From <fixed-case>ELT</fixed-case>e<fixed-case>C</fixed-case> Text Collection Metadata and Named Entities to Linked-data (and Back)</title>
       <author><first>Milica</first><last>Ikonić Nešić</last></author>
-      <author><first>Ranka</first><last>Stanković</last></author>
+      <author id="ranka-stankovic"><first>Ranka</first><last>Stanković</last></author>
       <author><first>Christof</first><last>Schöch</last></author>
       <author><first>Mihailo</first><last>Skoric</last></author>
       <pages>7–16</pages>
@@ -61,7 +61,7 @@
     </paper>
     <paper id="5">
       <title>Use Case: <fixed-case>R</fixed-case>omanian Language Resources in the <fixed-case>LOD</fixed-case> Paradigm</title>
-      <author><first>Verginica</first><last>Barbu Mititelu</last></author>
+      <author id="verginica-barbu-mititelu"><first>Verginica</first><last>Barbu Mititelu</last></author>
       <author><first>Elena</first><last>Irimia</last></author>
       <author><first>Vasile</first><last>Pais</last></author>
       <author><first>Andrei-Marius</first><last>Avram</last></author>
@@ -85,7 +85,7 @@
     <paper id="7">
       <title>A Cheap and Dirty Cross-Lingual Linking Service in the Cloud</title>
       <author><first>Christian</first><last>Chiarcos</last></author>
-      <author><first>Gilles</first><last>Sérasset</last></author>
+      <author id="gilles-serasset"><first>Gilles</first><last>Sérasset</last></author>
       <pages>52–60</pages>
       <abstract>In this paper, we describe the application of Linguistic Linked Open Data (LLOD) technology for dynamic cross-lingual querying on demand. Whereas most related research is focusing on providing a static linking, i.e., cross-lingual inference, and then storing the resulting links, we demonstrate the application of the federation capabilities of SPARQL to perform lexical linking on the fly. In the end, we provide a baseline functionality that uses the connection of two web services – a SPARQL end point for multilingual lexical data and another SPARQL end point for querying an English language knowledge graph – in order to perform querying an English language knowledge graph using foreign language labels. We argue that, for low-resource languages where substantial native knowledge graphs are lacking, this functionality can be used to lower the language barrier by allowing to formulate cross-linguistically applicable queries mediated by a multilingual dictionary.</abstract>
       <url hash="c04b8e32">2022.ldl-1.7</url>
@@ -102,14 +102,14 @@
     </paper>
     <paper id="9">
       <title>A Survey of Guidelines and Best Practices for the Generation, Interlinking, Publication, and Validation of Linguistic Linked Data</title>
-      <author><first>Fahad</first><last>Khan</last></author>
+      <author id="fahad-khan"><first>Fahad</first><last>Khan</last></author>
       <author><first>Christian</first><last>Chiarcos</last></author>
       <author><first>Thierry</first><last>Declerck</last></author>
       <author><first>Maria Pia</first><last>Di Buono</last></author>
       <author><first>Milan</first><last>Dojchinovski</last></author>
       <author><first>Jorge</first><last>Gracia</last></author>
       <author><first>Giedre Valunaite</first><last>Oleskeviciene</last></author>
-      <author><first>Daniela</first><last>Gifu</last></author>
+      <author id="daniela-gifu"><first>Daniela</first><last>Gifu</last></author>
       <pages>69–77</pages>
       <abstract>This article discusses a survey carried out within the NexusLinguarum COST Action which aimed to give an overview of existing guidelines (GLs) and best practices (BPs) in linguistic linked data. In particular it focused on four core tasks in the production/publication of linked data: generation, interlinking, publication, and validation. We discuss the importance of GLs and BPs for LLD before describing the survey and its results in full. Finally we offer a number of directions for future work in order to address the findings of the survey.</abstract>
       <url hash="f5259adc">2022.ldl-1.9</url>
@@ -119,8 +119,8 @@
       <title>Computational Morphology with <fixed-case>O</fixed-case>nto<fixed-case>L</fixed-case>ex-Morph</title>
       <author><first>Christian</first><last>Chiarcos</last></author>
       <author><first>Katerina</first><last>Gkirtzou</last></author>
-      <author><first>Fahad</first><last>Khan</last></author>
-      <author><first>Penny</first><last>Labropoulou</last></author>
+      <author id="fahad-khan"><first>Fahad</first><last>Khan</last></author>
+      <author id="penny-labropoulou"><first>Penny</first><last>Labropoulou</last></author>
       <author><first>Marco</first><last>Passarotti</last></author>
       <author><first>Matteo</first><last>Pellegrini</last></author>
       <pages>78–86</pages>
diff --git a/data/xml/2022.legal.xml b/data/xml/2022.legal.xml
index c8e152f071..d5148e1c71 100644
--- a/data/xml/2022.legal.xml
+++ b/data/xml/2022.legal.xml
@@ -37,7 +37,7 @@
     </paper>
     <paper id="3">
       <title>Data Protection, Privacy and <fixed-case>US</fixed-case> Regulation</title>
-      <author><first>Denise</first><last>DiPersio</last></author>
+      <author id="denise-dipersio"><first>Denise</first><last>DiPersio</last></author>
       <pages>9–16</pages>
       <abstract>This paper examines the state of data protection and privacy in the United States. There is no comprehensive federal data protection or data privacy law despite bipartisan and popular support. There are several data protection bills pending in the 2022 session of the US Congress, five of which are examined in Section 2 below. Although it is not likely that any will be enacted, the growing number reflects the concerns of citizens and lawmakers about the power of big data. Recent actions against data abuses, including data breaches, litigation and settlements, are reviewed in Section 3 of this paper. These reflect the real harm caused when personal data is misused. Section 4 contains a brief US copyright law update on the fair use exemption, highlighting a recent court decision and indications of a re-thinking of the fair use analysis. In Section 5, some observations are made on the role of privacy in data protection regulation. It is argued that privacy should be considered from the start of the data collection and technology development process. Enhanced awareness of ethical issues, including privacy, through university-level data science programs will also lay the groundwork for best practices throughout the data and development cycles.</abstract>
       <url hash="fa7eacb2">2022.legal-1.3</url>
@@ -56,9 +56,9 @@
       <title>Categorizing legal features in a metadata-oriented task: defining the conditions of use</title>
       <author><first>Mickaël</first><last>Rigault</last></author>
       <author><first>Victoria</first><last>Arranz</last></author>
-      <author><first>Valérie</first><last>Mapelli</last></author>
-      <author><first>Penny</first><last>Labropoulou</last></author>
-      <author><first>Stelios</first><last>Piperidis</last></author>
+      <author id="valerie-mapelli"><first>Valérie</first><last>Mapelli</last></author>
+      <author id="penny-labropoulou"><first>Penny</first><last>Labropoulou</last></author>
+      <author id="stelios-piperidis"><first>Stelios</first><last>Piperidis</last></author>
       <pages>22–26</pages>
       <abstract>In recent times, more attention has been brought by the Human Language Technology (HLT) community to the legal framework for making available and reusing Language Resources (LR) and tools. Licensing is now an issue that is foreseen in most research projects and that is essential to provide legal certainty for repositories when distributing resources. Some repositories such as Zenodo or Quantum Stat do not offer the possibility to search for resources by licenses which can turn the searching for relevant resources a very complex task. Other repositories such as Hugging Face propose a search feature by license which may make it difficult to figure out what use can be made of such resources. During the European Language Grid (ELG) project, we moved a step forward to link metadata with the terms and conditions of use. In this paper, we document the process we undertook to categorize legal features of licenses listed in the SPDX license list and widely used in the HLT community as well as those licenses used within the ELG platform</abstract>
       <url hash="f19eead8">2022.legal-1.5</url>
@@ -126,14 +126,14 @@
     <paper id="12">
       <title><fixed-case>MAPA</fixed-case> Project: Ready-to-Go Open-Source Datasets and Deep Learning Technology to Remove Identifying Information from Text Documents</title>
       <author><first>Victoria</first><last>Arranz</last></author>
-      <author><first>Khalid</first><last>Choukri</last></author>
+      <author id="khalid-choukri"><first>Khalid</first><last>Choukri</last></author>
       <author><first>Montse</first><last>Cuadros</last></author>
       <author><first>Aitor</first><last>García Pablos</last></author>
       <author><first>Lucie</first><last>Gianola</last></author>
       <author><first>Cyril</first><last>Grouin</last></author>
       <author><first>Manuel</first><last>Herranz</last></author>
-      <author><first>Patrick</first><last>Paroubek</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <pages>64–72</pages>
       <abstract>This paper presents the outcomes of the MAPA project, a set of annotated corpora for 24 languages of the European Union and an open-source customisable toolkit able to detect and substitute sensitive information in text documents from any domain, using state-of-the art, deep learning-based named entity recognition techniques. In the context of the project, the toolkit has been developed and tested on administrative, legal and medical documents, obtaining state-of-the-art results. As a result of the project, 24 dataset packages have been released and the de-identification toolkit is available as open source.</abstract>
       <url hash="921a51db">2022.legal-1.12</url>
@@ -157,7 +157,7 @@
       <title>Legal and Ethical Challenges in Recording Air Traffic Control Speech</title>
       <author><first>Mickaël</first><last>Rigault</last></author>
       <author><first>Claudia</first><last>Cevenini</last></author>
-      <author><first>Khalid</first><last>Choukri</last></author>
+      <author id="khalid-choukri"><first>Khalid</first><last>Choukri</last></author>
       <author><first>Martin</first><last>Kocour</last></author>
       <author><first>Karel</first><last>Veselý</last></author>
       <author><first>Igor</first><last>Szoke</last></author>
diff --git a/data/xml/2022.lnls.xml b/data/xml/2022.lnls.xml
index 4207f33de1..3634ab520b 100644
--- a/data/xml/2022.lnls.xml
+++ b/data/xml/2022.lnls.xml
@@ -52,7 +52,7 @@
       <author><first>Angelica</first><last>Chen</last></author>
       <author><first>Nikita</first><last>Nangia</last></author>
       <author><first>Jason</first><last>Phang</last></author>
-      <author><first>Samuel</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel</first><last>Bowman</last></author>
       <pages>17-28</pages>
       <abstract>Current QA systems can generate reasonable-sounding yet false answers without explanation or evidence for the generated answer, which is especially problematic when humans cannot readily check the model’s answers. This presents a challenge for building trust in machine learning systems. We take inspiration from real-world situations where difficult questions are answered by considering opposing sides (see Irving et al., 2018). For multiple-choice QA examples, we build a dataset of single arguments for both a correct and incorrect answer option in a debate-style set-up as an initial step in training models to produce explanations for two candidate answers. We use long contexts—humans familiar with the context write convincing explanations for pre-selected correct and incorrect answers, and we test if those explanations allow humans who have not read the full context to more accurately determine the correct answer. We do not find that explanations in our set-up improve human accuracy, but a baseline condition shows that providing human-selected text snippets does improve accuracy. We use these findings to suggest ways of improving the debate set up for future data collection efforts.</abstract>
       <url hash="47246998">2022.lnls-1.3</url>
diff --git a/data/xml/2022.loresmt.xml b/data/xml/2022.loresmt.xml
index 40c39a5ae5..3f786e4b75 100644
--- a/data/xml/2022.loresmt.xml
+++ b/data/xml/2022.loresmt.xml
@@ -3,13 +3,13 @@
   <volume id="1" ingest-date="2022-10-06" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Fifth Workshop on Technologies for Machine Translation of Low-Resource Languages (LoResMT 2022)</booktitle>
-      <editor><first>Atul Kr.</first><last>Ojha</last></editor>
+      <editor id="atul-kr-ojha"><first>Atul Kr.</first><last>Ojha</last></editor>
       <editor><first>Chao-Hong</first><last>Liu</last></editor>
       <editor><first>Ekaterina</first><last>Vylomova</last></editor>
       <editor><first>Jade</first><last>Abbott</last></editor>
-      <editor><first>Jonathan</first><last>Washington</last></editor>
+      <editor id="jonathan-washington"><first>Jonathan</first><last>Washington</last></editor>
       <editor><first>Nathaniel</first><last>Oco</last></editor>
-      <editor><first>Tommi A</first><last>Pirinen</last></editor>
+      <editor id="tommi-a-pirinen"><first>Tommi A</first><last>Pirinen</last></editor>
       <editor><first>Valentin</first><last>Malykh</last></editor>
       <editor><first>Varvara</first><last>Logacheva</last></editor>
       <editor><first>Xiaobing</first><last>Zhao</last></editor>
@@ -36,7 +36,7 @@
     <paper id="2">
       <title>Multiple Pivot Languages and Strategic Decoder Initialization Helps Neural Machine Translation</title>
       <author><first>Shivam</first><last>Mhaskar</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>9–14</pages>
       <abstract>In machine translation, a pivot language can be used to assist the source to target translation model. In pivot-based transfer learning, the source to pivot and the pivot to target models are used to improve the performance of the source to target model. This technique works best when both source-pivot and pivot-target are high resource language pairs and the source-target is a low resource language pair. But in some cases, such as Indic languages, the pivot to target language pair is not a high resource one. To overcome this limitation, we use multiple related languages as pivot languages to assist the source to target model. We show that using multiple pivot languages gives 2.03 BLEU and 3.05 chrF score improvement over the baseline model. We show that strategic decoder initialization while performing pivot-based transfer learning with multiple pivot languages gives a 3.67 BLEU and 5.94 chrF score improvement over the baseline model.</abstract>
       <url hash="dd56a049">2022.loresmt-1.2</url>
@@ -54,7 +54,7 @@
     <paper id="4">
       <title>The Only Chance to Understand: Machine Translation of the Severely Endangered Low-resource Languages of Eurasia</title>
       <author><first>Anna</first><last>Mosolova</last></author>
-      <author><first>Kamel</first><last>Smaili</last></author>
+      <author id="kamel-smaili"><first>Kamel</first><last>Smaili</last></author>
       <pages>23–34</pages>
       <abstract>Numerous machine translation systems have been proposed since the appearance of this task. Nowadays, new large language model-based algorithms show results that sometimes overcome human ones on the rich-resource languages. Nevertheless, it is still not the case for the low-resource languages, for which all these algorithms did not show equally impressive results. In this work, we want to compare 3 generations of machine translation models on 7 low-resource languages and make a step further by proposing a new way of automatic parallel data augmentation using the state-of-the-art generative model.</abstract>
       <url hash="bee778c2">2022.loresmt-1.4</url>
@@ -65,7 +65,7 @@
       <author><first>Nathaniel</first><last>Robinson</last></author>
       <author><first>Cameron</first><last>Hogan</last></author>
       <author><first>Nancy</first><last>Fulda</last></author>
-      <author><first>David R.</first><last>Mortensen</last></author>
+      <author id="david-r-mortensen"><first>David R.</first><last>Mortensen</last></author>
       <pages>35–42</pages>
       <abstract>Multilingual transfer techniques often improve low-resource machine translation (MT). Many of these techniques are applied without considering data characteristics. We show in the context of Haitian-to-English translation that transfer effectiveness is correlated with amount of training data and relationships between knowledge-sharing languages. Our experiments suggest that for some languages beyond a threshold of authentic data, back-translation augmentation methods are counterproductive, while cross-lingual transfer from a sufficiently related language is preferred. We complement this finding by contributing a rule-based French-Haitian orthographic and syntactic engine and a novel method for phonological embedding. When used with multilingual techniques, orthographic transformation makes statistically significant improvements over conventional methods. And in very low-resource Jamaican MT, code-switching with a transfer language for orthographic resemblance yields a 6.63 BLEU point advantage.</abstract>
       <url hash="1138bc6b">2022.loresmt-1.5</url>
@@ -95,7 +95,7 @@
     <paper id="8">
       <title><fixed-case>HFT</fixed-case>: High Frequency Tokens for Low-Resource <fixed-case>NMT</fixed-case></title>
       <author><first>Edoardo</first><last>Signoroni</last></author>
-      <author><first>Pavel</first><last>Rychlý</last></author>
+      <author id="pavel-rychly"><first>Pavel</first><last>Rychlý</last></author>
       <pages>56–63</pages>
       <abstract>Tokenization has been shown to impact the quality of downstream tasks, such as Neural Machine Translation (NMT), which is susceptible to out-of-vocabulary words and low frequency training data. Current state-of-the-art algorithms have been helpful in addressing the issues of out-of-vocabulary words, bigger vocabulary sizes and token frequency by implementing subword segmentation. We argue, however, that there is still room for improvement, in particular regarding low-frequency tokens in the training data. In this paper, we present “High Frequency Tokenizer”, or HFT, a new language-independent subword segmentation algorithm that addresses this issue. We also propose a new metric to measure the frequency coverage of a tokenizer’s vocabulary, based on a frequency rank weighted average of the frequency values of its items. We experiment with a diverse set of language corpora, vocabulary sizes, and writing systems and report improvements on both frequency statistics and on the average length of the output. We also observe a positive impact on downstream NMT.</abstract>
       <url hash="8ffc3343">2022.loresmt-1.8</url>
diff --git a/data/xml/2022.louhi.xml b/data/xml/2022.louhi.xml
index e1a5f04dea..711ed670ff 100644
--- a/data/xml/2022.louhi.xml
+++ b/data/xml/2022.louhi.xml
@@ -3,11 +3,11 @@
   <volume id="1" ingest-date="2022-12-13" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 13th International Workshop on Health Text Mining and Information Analysis (LOUHI)</booktitle>
-      <editor><first>Alberto</first><last>Lavelli</last></editor>
+      <editor id="alberto-lavelli"><first>Alberto</first><last>Lavelli</last></editor>
       <editor><first>Eben</first><last>Holderness</last></editor>
-      <editor><first>Antonio</first><last>Jimeno Yepes</last></editor>
+      <editor id="antonio-jimeno-yepes"><first>Antonio</first><last>Jimeno Yepes</last></editor>
       <editor><first>Anne-Lyse</first><last>Minard</last></editor>
-      <editor><first>James</first><last>Pustejovsky</last></editor>
+      <editor id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></editor>
       <editor><first>Fabio</first><last>Rinaldi</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Abu Dhabi, United Arab Emirates (Hybrid)</address>
@@ -37,7 +37,7 @@
       <title>Assessing the Limits of Straightforward Models for Nested Named Entity Recognition in <fixed-case>S</fixed-case>panish Clinical Narratives</title>
       <author><first>Matias</first><last>Rojas</last><affiliation>University of Chile</affiliation></author>
       <author><first>Casimiro Pio</first><last>Carrino</last><affiliation>Barcelona Supercomputing Center</affiliation></author>
-      <author><first>Aitor</first><last>Gonzalez-Agirre</last><affiliation>Barcelona Supercomputing Center</affiliation></author>
+      <author id="aitor-gonzalez-agirre"><first>Aitor</first><last>Gonzalez-Agirre</last><affiliation>Barcelona Supercomputing Center</affiliation></author>
       <author><first>Jocelyn</first><last>Dunstan</last><affiliation>University of Chile</affiliation></author>
       <author><first>Marta</first><last>Villegas</last><affiliation>Barcelona Supercomputing Center, Spain</affiliation></author>
       <pages>14-25</pages>
@@ -51,7 +51,7 @@
       <title>Can Current Explainability Help Provide References in Clinical Notes to Support Humans Annotate Medical Codes?</title>
       <author><first>Byung-Hak</first><last>Kim</last><affiliation>AKASA, Inc.</affiliation></author>
       <author><first>Zhongfen</first><last>Deng</last><affiliation>University of Illinois at Chicago, USA</affiliation></author>
-      <author><first>Philip</first><last>Yu</last><affiliation>University of Illinois at Chicago, USA</affiliation></author>
+      <author id="philip-s-yu"><first>Philip</first><last>Yu</last><affiliation>University of Illinois at Chicago, USA</affiliation></author>
       <author><first>Varun</first><last>Ganapathi</last><affiliation>AKASA, Inc.</affiliation></author>
       <pages>26-34</pages>
       <abstract>The medical codes prediction problem from clinical notes has received substantial interest in the NLP community, and several recent studies have shown the state-of-the-art (SOTA) code prediction results of full-fledged deep learning-based methods. However, most previous SOTA works based on deep learning are still in early stages in terms of providing textual references and explanations of the predicted codes, despite the fact that this level of explainability of the prediction outcomes is critical to gaining trust from professional medical coders. This raises the important question of how well current explainability methods apply to advanced neural network models such as transformers to predict correct codes and present references in clinical notes that support code prediction. First, we present an explainable Read, Attend, and Code (xRAC) framework and assess two approaches, attention score-based xRAC-ATTN and model-agnostic knowledge-distillation-based xRAC-KD, through simplified but thorough human-grounded evaluations with SOTA transformer-based model, RAC. We find that the supporting evidence text highlighted by xRAC-ATTN is of higher quality than xRAC-KD whereas xRAC-KD has potential advantages in production deployment scenarios. More importantly, we show for the first time that, given the current state of explainability methodologies, using the SOTA medical codes prediction system still requires the expertise and competencies of professional coders, even though its prediction accuracy is superior to that of human coders. This, we believe, is a very meaningful step toward developing explainable and accurate machine learning systems for fully autonomous medical code prediction from clinical notes.</abstract>
@@ -63,7 +63,7 @@
     <paper id="4">
       <title>Distinguishing between focus and background entities in biomedical corpora using discourse structure and transformers</title>
       <author><first>Antonio</first><last>Jimeno Yepes</last><affiliation>RMIT University, Australia &amp; University of Melbourne, Australia</affiliation></author>
-      <author><first>Karin</first><last>Verspoor</last><affiliation>RMIT University, Australia &amp; University of Melbourne, Australia</affiliation></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last><affiliation>RMIT University, Australia &amp; University of Melbourne, Australia</affiliation></author>
       <pages>35-40</pages>
       <abstract>Scientific documents typically contain numerous entity mentions, while only a subset are directly relevant to the key contributions of the paper. Distinguishing these focus entities from background ones effectively could improve the recovery of relevant documents and the extraction of information from documents. To study the identification of focus entities, we developed two large datasets of disease-causing biological pathogens using MEDLINE, the largest collection of biomedical citations, and PubMed Central, a collection of full text articles. The focus entities were identified using human-curated indexing on these collections. Experiments with machine learning methods to identify focus entities show that transformer methods achieve high precision and recall and that document discourse information is relevant. The work lays the foundation for more targeted retrieval/summarisation of entity-relevant documents.</abstract>
       <url hash="2ea8b949">2022.louhi-1.4</url>
@@ -75,7 +75,7 @@
       <author><first>Yanis</first><last>Labrak</last><affiliation>Avignon University, France</affiliation></author>
       <author><first>Adrien</first><last>Bazoge</last><affiliation>Nantes University, France</affiliation></author>
       <author><first>Richard</first><last>Dufour</last><affiliation>Nantes University, France</affiliation></author>
-      <author><first>Beatrice</first><last>Daille</last><affiliation>Nantes University, France</affiliation></author>
+      <author id="beatrice-daille"><first>Beatrice</first><last>Daille</last><affiliation>Nantes University, France</affiliation></author>
       <author><first>Pierre-Antoine</first><last>Gourraud</last><affiliation>Nantes University, France</affiliation></author>
       <author><first>Emmanuel</first><last>Morin</last><affiliation>Nantes University, France</affiliation></author>
       <author><first>Mickael</first><last>Rouvier</last><affiliation>Avignon University, France</affiliation></author>
@@ -89,7 +89,7 @@
       <title>A Large-Scale Dataset for Biomedical Keyphrase Generation</title>
       <author><first>Maël</first><last>Houbre</last><affiliation>Nantes University, France</affiliation></author>
       <author><first>Florian</first><last>Boudin</last><affiliation>Nantes University, France</affiliation></author>
-      <author><first>Beatrice</first><last>Daille</last><affiliation>Nantes University, France</affiliation></author>
+      <author id="beatrice-daille"><first>Beatrice</first><last>Daille</last><affiliation>Nantes University, France</affiliation></author>
       <pages>47-53</pages>
       <abstract>Keyphrase generation is the task consisting in generating a set of words or phrases that highlight the main topics of a document. There are few datasets for keyphrase generation in the biomedical domain and they do not meet the expectations in terms of size for training generative models. In this paper, we introduce kp-biomed, the first large-scale biomedical keyphrase generation dataset collected from PubMed abstracts. We train and release several generative models and conduct a series of experiments showing that using large scale datasets improves significantly the performances for present and absent keyphrase generation. The dataset and models are available online.</abstract>
       <url hash="db004c50">2022.louhi-1.6</url>
@@ -140,7 +140,7 @@
       <author><first>Hicham</first><last>El Boukkouri</last><affiliation>UniversitÃ© Paris-Saclay, CNRS, France</affiliation></author>
       <author><first>Olivier</first><last>Ferret</last><affiliation>UniversitÃ© Paris-Saclay, CEA, France</affiliation></author>
       <author><first>Thomas</first><last>Lavergne</last><affiliation>UniversitÃ© Paris-Saclay, CNRS, France</affiliation></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last><affiliation>UniversitÃ© Paris-Saclay, CNRS, France</affiliation></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last><affiliation>UniversitÃ© Paris-Saclay, CNRS, France</affiliation></author>
       <pages>69-80</pages>
       <abstract>Domain adaptation of word embeddings has mainly been explored in the context of retraining general models on large specialized corpora. While this usually yields good results, we argue that knowledge graphs, which are used less frequently, could also be utilized to enhance existing representations with specialized knowledge. In this work, we aim to shed some light on whether such knowledge injection could be achieved using a basic set of tools: graph-level embeddings and concatenation. To that end, we adopt an incremental approach where we first demonstrate that static embeddings can indeed be improved through concatenation with in-domain node2vec representations. Then, we validate this approach on contextual models and generalize it further by proposing a variant of BERT that incorporates knowledge embeddings within its hidden states through the same process of concatenation. We show that this variant outperforms plain retraining on several specialized tasks, then discuss how this simple approach could be improved further. Both our code and pre-trained models are open-sourced for future research. In this work, we conduct experiments that target the medical domain and the English language.</abstract>
       <url hash="67493637">2022.louhi-1.9</url>
@@ -293,7 +293,7 @@
       <author><first>Efsun Sarioglu</first><last>Kayi</last><affiliation>George Washington University, USA</affiliation></author>
       <author><first>Sardar</first><last>Hamidian</last><affiliation>George Washington University, USA</affiliation></author>
       <author><first>Michael</first><last>Compton</last><affiliation>Columbia University, USA</affiliation></author>
-      <author><first>Mona</first><last>Diab</last><affiliation>George Washington University, USA</affiliation></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last><affiliation>George Washington University, USA</affiliation></author>
       <pages>173-183</pages>
       <abstract>Schizophrenia is one of the most disabling mental health conditions to live with. Approximately one percent of the population has schizophrenia which makes it fairly common, and it affects many people and their families. Patients with schizophrenia suffer different symptoms: formal thought disorder (FTD), delusions, and emotional flatness. In this paper, we quantitatively and qualitatively analyze the language of patients with schizophrenia measuring various linguistic features in two modalities: speech and written text. We examine the following features: coherence and cohesion of thoughts, emotions, specificity, level of commit- ted belief (LCB), and personality traits. Our results show that patients with schizophrenia score high in fear and neuroticism compared to healthy controls. In addition, they are more committed to their beliefs, and their writing lacks details. They score lower in most of the linguistic features of cohesion with significant p-values.</abstract>
       <url hash="04be3126">2022.louhi-1.20</url>
@@ -328,7 +328,7 @@
     <paper id="23">
       <title>Enriching Deep Learning with Frame Semantics for Empathy Classification in Medical Narrative Essays</title>
       <author><first>Priyanka</first><last>Dey</last><affiliation>University of Illinois at Urbana-Champaign, USA</affiliation></author>
-      <author><first>Roxana</first><last>Girju</last><affiliation>University of Illinois at Urbana-Champaign, USA</affiliation></author>
+      <author id="roxana-girju"><first>Roxana</first><last>Girju</last><affiliation>University of Illinois at Urbana-Champaign, USA</affiliation></author>
       <pages>207-217</pages>
       <abstract>Empathy is a vital component of health care and plays a key role in the training of future doctors. Paying attention to medical students’ self-reflective stories of their interactions with patients can encourage empathy and the formation of professional identities that embody desirable values such as integrity and respect. We present a computational approach and linguistic analysis of empathic language in a large corpus of 440 essays written by pre-med students as narrated simulated patient – doctor interactions. We analyze the discourse of three kinds of empathy: cognitive, affective, and prosocial as highlighted by expert annotators. We also present various experiments with state-of-the-art recurrent neural networks and transformer models for classifying these forms of empathy. To further improve over these results, we develop a novel system architecture that makes use of frame semantics to enrich our state-of-the-art models. We show that this novel framework leads to significant improvement on the empathy classification task for this dataset.</abstract>
       <url hash="9d38aa52">2022.louhi-1.23</url>
@@ -340,7 +340,7 @@
       <title>Condition-Treatment Relation Extraction on Disease-related Social Media Data</title>
       <author><first>Sichang</first><last>Tu</last><affiliation>Emory University, USA</affiliation></author>
       <author><first>Stephen</first><last>Doogan</last><affiliation>Real Life Sciences</affiliation></author>
-      <author><first>Jinho D.</first><last>Choi</last><affiliation>Emory University, USA</affiliation></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last><affiliation>Emory University, USA</affiliation></author>
       <pages>218-228</pages>
       <abstract>Social media has become a popular platform where people share information about personal healthcare conditions, diagnostic histories, and medical plans. Analyzing posts on social media depicting such realistic information can help improve quality and clinical decision-making; however, the lack of structured resources in this genre limits us to build robust NLP models for meaningful analysis. This paper presents a new corpus annotating relations among many types of conditions, treatments, and their attributes illustrated in social media posts by patients and caregivers. For experiments, a transformer encoder is pretrained on 1M raw posts and used to train several document-level relation extraction models using our corpus. Our best-performing model achieves the F1 scores of 70.9 and 51.7 for Entity Recognition and Relation Extraction, respectively. These results are encouraging as it is the first neural model extracting complex relations of this kind on social media data.</abstract>
       <url hash="dade48b2">2022.louhi-1.24</url>
diff --git a/data/xml/2022.lrec.xml b/data/xml/2022.lrec.xml
index 3554d6ea03..1e92501a69 100644
--- a/data/xml/2022.lrec.xml
+++ b/data/xml/2022.lrec.xml
@@ -3,19 +3,19 @@
   <volume id="1" ingest-date="2022-09-20" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Thirteenth Language Resources and Evaluation Conference</booktitle>
-      <editor><first>Nicoletta</first><last>Calzolari</last></editor>
-      <editor><first>Frédéric</first><last>Béchet</last></editor>
+      <editor id="nicoletta-calzolari"><first>Nicoletta</first><last>Calzolari</last></editor>
+      <editor id="frederic-bechet"><first>Frédéric</first><last>Béchet</last></editor>
       <editor><first>Philippe</first><last>Blache</last></editor>
-      <editor><first>Khalid</first><last>Choukri</last></editor>
-      <editor><first>Christopher</first><last>Cieri</last></editor>
+      <editor id="khalid-choukri"><first>Khalid</first><last>Choukri</last></editor>
+      <editor id="christopher-cieri"><first>Christopher</first><last>Cieri</last></editor>
       <editor><first>Thierry</first><last>Declerck</last></editor>
       <editor><first>Sara</first><last>Goggi</last></editor>
       <editor><first>Hitoshi</first><last>Isahara</last></editor>
-      <editor><first>Bente</first><last>Maegaard</last></editor>
-      <editor><first>Joseph</first><last>Mariani</last></editor>
+      <editor id="bente-maegaard"><first>Bente</first><last>Maegaard</last></editor>
+      <editor id="joseph-mariani"><first>Joseph</first><last>Mariani</last></editor>
       <editor><first>Hélène</first><last>Mazo</last></editor>
-      <editor><first>Jan</first><last>Odijk</last></editor>
-      <editor><first>Stelios</first><last>Piperidis</last></editor>
+      <editor id="jan-odijk"><first>Jan</first><last>Odijk</last></editor>
+      <editor id="stelios-piperidis"><first>Stelios</first><last>Piperidis</last></editor>
       <publisher>European Language Resources Association</publisher>
       <address>Marseille, France</address>
       <month>June</month>
@@ -33,7 +33,7 @@
       <title>Domain Adaptation in Neural Machine Translation using a Qualia-Enriched <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et</title>
       <author><first>Alexandre Diniz da</first><last>Costa</last></author>
       <author><first>Mateus</first><last>Coutinho Marim</last></author>
-      <author><first>Ely</first><last>Matos</last></author>
+      <author id="ely-edison-da-silva-matos"><first>Ely</first><last>Matos</last></author>
       <author><first>Tiago</first><last>Timponi Torrent</last></author>
       <pages>1–12</pages>
       <abstract>In this paper we present Scylla, a methodology for domain adaptation of Neural Machine Translation (NMT) systems that make use of a multilingual FrameNet enriched with qualia relations as an external knowledge base. Domain adaptation techniques used in NMT usually require fine-tuning and in-domain training data, which may pose difficulties for those working with lesser-resourced languages and may also lead to performance decay of the NMT system for out-of-domain sentences. Scylla does not require fine-tuning of the NMT model, avoiding the risk of model over-fitting and consequent decrease in performance for out-of-domain translations. Two versions of Scylla are presented: one using the source sentence as input, and another one using the target sentence. We evaluate Scylla in comparison to a state-of-the-art commercial NMT system in an experiment in which 50 sentences from the Sports domain are translated from Brazilian Portuguese to English. The two versions of Scylla significantly outperform the baseline commercial system in HTER.</abstract>
@@ -60,7 +60,7 @@
       <author><first>Jaehyung</first><last>Seo</last></author>
       <author><first>Hyeonseok</first><last>Moon</last></author>
       <author><first>Sugyeong</first><last>Eo</last></author>
-      <author><first>Heuiseok</first><last>Lim</last></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last></author>
       <pages>22–28</pages>
       <abstract>In recent years, there has been an increasing need for the restoration and translation of historical languages. In this study, we attempt to translate historical records in ancient Korean language based on neural machine translation (NMT). Inspired by priming, a cognitive science theory that two different stimuli influence each other, we propose novel priming ancient-Korean NMT (AKNMT) using bilingual subword embedding initialization with structural property awareness in the ancient documents. Finally, we obtain state-of-the-art results in the AKNMT task. To the best of our knowledge, we confirm the possibility of developing a human-centric model that incorporates the concepts of cognitive science and analyzes the result from the perspective of interference and cognitive dissonance theory for the first time.</abstract>
       <url hash="b2e20ab3">2022.lrec-1.3</url>
@@ -91,7 +91,7 @@
     </paper>
     <paper id="6">
       <title>Compiling a Suitable Level of Sense Granularity in a Lexicon for <fixed-case>AI</fixed-case> Purposes: The Open Source <fixed-case>COR</fixed-case> Lexicon</title>
-      <author><first>Bolette</first><last>Pedersen</last></author>
+      <author id="bolette-sandford-pedersen"><first>Bolette</first><last>Pedersen</last></author>
       <author><first>Nathalie Carmen Hau</first><last>Sørensen</last></author>
       <author><first>Sanni</first><last>Nimb</last></author>
       <author><first>Ida</first><last>Flørke</last></author>
@@ -140,7 +140,7 @@
     </paper>
     <paper id="11">
       <title>Introducing the <fixed-case>CURLICAT</fixed-case> Corpora: Seven-language Domain Specific Annotated Corpora from Curated Sources</title>
-      <author><first>Tamás</first><last>Váradi</last></author>
+      <author id="tamas-varadi"><first>Tamás</first><last>Váradi</last></author>
       <author><first>Bence</first><last>Nyéki</last></author>
       <author><first>Svetla</first><last>Koeva</last></author>
       <author><first>Marko</first><last>Tadić</last></author>
@@ -148,11 +148,11 @@
       <author><first>Maciej</first><last>Ogrodniczuk</last></author>
       <author><first>Bartłomiej</first><last>Nitoń</last></author>
       <author><first>Piotr</first><last>Pęzik</last></author>
-      <author><first>Verginica</first><last>Barbu Mititelu</last></author>
+      <author id="verginica-barbu-mititelu"><first>Verginica</first><last>Barbu Mititelu</last></author>
       <author><first>Elena</first><last>Irimia</last></author>
       <author><first>Maria</first><last>Mitrofan</last></author>
-      <author><first>Dan</first><last>Tufiș</last></author>
-      <author><first>Radovan</first><last>Garabík</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufiș</last></author>
+      <author id="radovan-garabik"><first>Radovan</first><last>Garabík</last></author>
       <author><first>Simon</first><last>Krek</last></author>
       <author><first>Andraž</first><last>Repar</last></author>
       <pages>100–108</pages>
@@ -178,7 +178,7 @@
       <title><fixed-case>C</fixed-case>o<fixed-case>QAR</fixed-case>: Question Rewriting on <fixed-case>C</fixed-case>o<fixed-case>QA</fixed-case></title>
       <author><first>Quentin</first><last>Brabant</last></author>
       <author><first>Gwénolé</first><last>Lecorvé</last></author>
-      <author><first>Lina M.</first><last>Rojas Barahona</last></author>
+      <author id="lina-m-rojas-barahona"><first>Lina M.</first><last>Rojas Barahona</last></author>
       <pages>119–126</pages>
       <abstract>Questions asked by humans during a conversation often contain contextual dependencies, i.e., explicit or implicit references to previous dialogue turns. These dependencies take the form of coreferences (e.g., via pronoun use) or ellipses, and can make the understanding difficult for automated systems. One way to facilitate the understanding and subsequent treatments of a question is to rewrite it into an out-of-context form, i.e., a form that can be understood without the conversational context. We propose CoQAR, a corpus containing 4.5K conversations from the Conversational Question-Answering dataset CoQA, for a total of 53K follow-up question-answer pairs. Each original question was manually annotated with at least 2 at most 3 out-of-context rewritings. CoQA originally contains 8k conversations, which sum up to 127k question-answer pairs. CoQAR can be used in the supervised learning of three tasks: question paraphrasing, question rewriting and conversational question answering. In order to assess the quality of CoQAR’s rewritings, we conduct several experiments consisting in training and evaluating models for these three tasks. Our results support the idea that question rewriting can be used as a preprocessing step for (conversational and non-conversational) question answering models, thereby increasing their performances.</abstract>
       <url hash="ebdf9a31">2022.lrec-1.13</url>
@@ -203,7 +203,7 @@
       <author><first>Cristina</first><last>Giannone</last></author>
       <author><first>Andrea</first><last>Favalli</last></author>
       <author><first>Raniero</first><last>Romagnoli</last></author>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last></author>
       <pages>137–145</pages>
       <abstract>Incorporating handwritten domain scripts into neural-based task-oriented dialogue systems may be an effective way to reduce the need for large sets of annotated dialogues. In this paper, we investigate how the use of domain scripts written by conversational designers affects the performance of neural-based dialogue systems. To support this investigation, we propose the Conversational-Logic-Injection-in-Neural-Network system (CLINN) where domain scripts are coded in semi-logical rules. By using CLINN, we evaluated semi-logical rules produced by a team of differently-skilled conversational designers. We experimented with the Restaurant domain of the MultiWOZ dataset. Results show that external knowledge is extremely important for reducing the need for annotated examples for conversational systems. In fact, rules from conversational designers used in CLINN significantly outperform a state-of-the-art neural-based dialogue system when trained with smaller sets of annotated dialogues.</abstract>
       <url hash="c1fe25c5">2022.lrec-1.15</url>
@@ -223,7 +223,7 @@
       <title>Language Technologies for the Creation of Multilingual Terminologies. Lessons Learned from the <fixed-case>SSHOC</fixed-case> Project</title>
       <author><first>Federica</first><last>Gamba</last></author>
       <author><first>Francesca</first><last>Frontini</last></author>
-      <author><first>Daan</first><last>Broeder</last></author>
+      <author id="daan-broeder"><first>Daan</first><last>Broeder</last></author>
       <author><first>Monica</first><last>Monachini</last></author>
       <pages>154–163</pages>
       <abstract>This paper is framed in the context of the SSHOC project and aims at exploring how Language Technologies can help in promoting and facilitating multilingualism in the Social Sciences and Humanities (SSH). Although most SSH researchers produce culturally and societally relevant work in their local languages, metadata and vocabularies used in the SSH domain to describe and index research data are currently mostly in English. We thus investigate Natural Language Processing and Machine Translation approaches in view of providing resources and tools to foster multilingual access and discovery to SSH content across different languages. As case studies, we create and deliver as freely, openly available data a set of multilingual metadata concepts and an automatically extracted multilingual Data Stewardship terminology. The two case studies allow as well to evaluate performances of state-of-the-art tools and to derive a set of recommendations as to how best apply them. Although not adapted to the specific domain, the employed tools prove to be a valid asset to translation tasks. Nonetheless, validation of results by domain experts proficient in the language is an unavoidable phase of the whole workflow.</abstract>
@@ -253,7 +253,7 @@
     </paper>
     <paper id="20">
       <title>Cross-Lingual Link Discovery for Under-Resourced Languages</title>
-      <author><first>Michael</first><last>Rosner</last></author>
+      <author id="michael-rosner"><first>Michael</first><last>Rosner</last></author>
       <author><first>Sina</first><last>Ahmadi</last></author>
       <author><first>Elena-Simona</first><last>Apostol</last></author>
       <author><first>Julia</first><last>Bosque-Gil</last></author>
@@ -264,7 +264,7 @@
       <author><first>Dagmar</first><last>Gromann</last></author>
       <author><first>Chaya</first><last>Liebeskind</last></author>
       <author><first>Giedrė</first><last>Valūnaitė Oleškevičienė</last></author>
-      <author><first>Gilles</first><last>Sérasset</last></author>
+      <author id="gilles-serasset"><first>Gilles</first><last>Sérasset</last></author>
       <author><first>Ciprian-Octavian</first><last>Truică</last></author>
       <pages>181–192</pages>
       <abstract>In this paper, we provide an overview of current technologies for cross-lingual link discovery, and we discuss challenges, experiences and prospects of their application to under-resourced languages. We rst introduce the goals of cross-lingual linking and associated technologies, and in particular, the role that the Linked Data paradigm (Bizer et al., 2011) applied to language data can play in this context. We de ne under-resourced languages with a speci c focus on languages actively used on the internet, i.e., languages with a digitally versatile speaker community, but limited support in terms of language technology. We argue that languages for which considerable amounts of textual data and (at least) a bilingual word list are available, techniques for cross-lingual linking can be readily applied, and that these enable the implementation of downstream applications for under-resourced languages via the localisation and adaptation of existing technologies and resources.</abstract>
@@ -313,8 +313,8 @@
     </paper>
     <paper id="25">
       <title>Automatic Detection of Stigmatizing Uses of Psychiatric Terms on <fixed-case>T</fixed-case>witter</title>
-      <author><first>Véronique</first><last>Moriceau</last></author>
-      <author><first>Farah</first><last>Benamara</last></author>
+      <author id="veronique-moriceau"><first>Véronique</first><last>Moriceau</last></author>
+      <author id="farah-benamara"><first>Farah</first><last>Benamara</last></author>
       <author><first>Abdelmoumene</first><last>Boumadane</last></author>
       <pages>237–243</pages>
       <abstract>Psychiatry and people suffering from mental disorders have often been given a pejorative label that induces social rejection. Many studies have addressed discourse content about psychiatry on social media, suggesting that they convey stigmatizingrepresentations of mental health disorders. In this paper, we focus for the first time on the use of psychiatric terms in tweetsin French. We first describe the annotated dataset that we use. Then we propose several deep learning models to detectautomatically (1) the different types of use of psychiatric terms (medical use, misuse or irrelevant use), and (2) the polarityof the tweet. We show that polarity detection can be improved when done in a multitask framework in combination with typeof use detection. This confirms the observations made manually on several datasets, namely that the polarity of a tweet iscorrelated to the type of term use (misuses are mostly negative whereas medical uses are neutral). The results are interesting forboth tasks and it allows to consider the possibility for performant automatic approaches in order to conduct real-time surveyson social media, larger and less expensive than existing manual ones</abstract>
@@ -324,7 +324,7 @@
     <paper id="26">
       <title><fixed-case>C</fixed-case>o<fixed-case>VERT</fixed-case>: A Corpus of Fact-checked Biomedical <fixed-case>COVID</fixed-case>-19 Tweets</title>
       <author><first>Isabelle</first><last>Mohr</last></author>
-      <author><first>Amelie</first><last>Wührl</last></author>
+      <author id="amelie-wuhrl"><first>Amelie</first><last>Wührl</last></author>
       <author><first>Roman</first><last>Klinger</last></author>
       <pages>244–257</pages>
       <abstract>During the first two years of the COVID-19 pandemic, large volumes of biomedical information concerning this new disease have been published on social media. Some of this information can pose a real danger, particularly when false information is shared, for instance recommendations how to treat diseases without professional medical advice. Therefore, automatic fact-checking resources and systems developed specifically for medical domain are crucial. While existing fact-checking resources cover COVID-19 related information in news or quantify the amount of misinformation in tweets, there is no dataset providing fact-checked COVID-19 related Twitter posts with detailed annotations for biomedical entities, relations and relevant evidence. We contribute CoVERT, a fact-checked corpus of tweets with a focus on the domain of biomedicine and COVID-19 related (mis)information. The corpus consists of 300 tweets, each annotated with named entities and relations. We employ a novel crowdsourcing methodology to annotate all tweets with fact-checking labels and supporting evidence, which crowdworkers search for online. This methodology results in substantial inter-annotator agreement. Furthermore, we use the retrieved evidence extracts as part of a fact-checking pipeline, finding that the real-world evidence is more useful than the knowledge directly available in pretrained language models.</abstract>
@@ -334,8 +334,8 @@
     <paper id="27">
       <title><fixed-case>XLM</fixed-case>-<fixed-case>T</fixed-case>: Multilingual Language Models in <fixed-case>T</fixed-case>witter for Sentiment Analysis and Beyond</title>
       <author><first>Francesco</first><last>Barbieri</last></author>
-      <author><first>Luis</first><last>Espinosa Anke</last></author>
-      <author><first>Jose</first><last>Camacho-Collados</last></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa Anke</last></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></author>
       <pages>258–266</pages>
       <abstract>Language models are ubiquitous in current NLP, and their multilingual capacity has recently attracted considerable attention. However, current analyses have almost exclusively focused on (multilingual variants of) standard benchmarks, and have relied on clean pre-training and task-specific corpora as multilingual signals. In this paper, we introduce XLM-T, a model to train and evaluate multilingual language models in Twitter. In this paper we provide: (1) a new strong multilingual baseline consisting of an XLM-R (Conneau et al. 2020) model pre-trained on millions of tweets in over thirty languages, alongside starter code to subsequently fine-tune on a target task; and (2) a set of unified sentiment analysis Twitter datasets in eight different languages and a XLM-T model trained on this dataset.</abstract>
       <url hash="5a05483a">2022.lrec-1.27</url>
@@ -354,7 +354,7 @@
     <paper id="29">
       <title>Generating Questions from <fixed-case>W</fixed-case>ikidata Triples</title>
       <author><first>Kelvin</first><last>Han</last></author>
-      <author><first>Thiago</first><last>Castro Ferreira</last></author>
+      <author id="thiago-castro-ferreira"><first>Thiago</first><last>Castro Ferreira</last></author>
       <author><first>Claire</first><last>Gardent</last></author>
       <pages>277–290</pages>
       <abstract>Question generation from knowledge bases (or knowledge base question generation, KBQG) is the task of generating questions from structured database information, typically in the form of triples representing facts. To handle rare entities and generalize to unseen properties, previous work on KBQG resorted to extensive, often ad-hoc pre- and post-processing of the input triple. We revisit KBQG – using pre training, a new (triple, question) dataset and taking question type into account – and show that our approach outperforms previous work both in a standard and in a zero-shot setting. We also show that the extended KBQG dataset (also helpful for knowledge base question answering) we provide allows not only for better coverage in terms of knowledge base (KB) properties but also for increased output variability in that it permits the generation of multiple questions from the same KB triple.</abstract>
@@ -375,7 +375,7 @@
       <title>Evaluating the Effects of Embedding with Speaker Identity Information in Dialogue Summarization</title>
       <author><first>Yuji</first><last>Naraki</last></author>
       <author><first>Tetsuya</first><last>Sakai</last></author>
-      <author><first>Yoshihiko</first><last>Hayashi</last></author>
+      <author id="yoshihiko-hayashi"><first>Yoshihiko</first><last>Hayashi</last></author>
       <pages>298–304</pages>
       <abstract>Automatic dialogue summarization is a task used to succinctly summarize a dialogue transcript while correctly linking the speakers and their speech, which distinguishes this task from a conventional document summarization. To address this issue and reduce the “who said what”-related errors in a summary, we propose embedding the speaker identity information in the input embedding into the dialogue transcript encoder. Unlike the speaker embedding proposed by Gu et al. (2020), our proposal takes into account the informativeness of position embedding. By experimentally comparing several embedding methods, we confirmed that the scores of ROUGE and a human evaluation of the generated summaries were substantially increased by embedding speaker information at the less informative part of the fixed position embedding with sinusoidal functions.</abstract>
       <url hash="ca0bc2c3">2022.lrec-1.31</url>
@@ -469,7 +469,7 @@
       <author><first>Mihai</first><last>Dascalu</last></author>
       <author><first>Traian</first><last>Rebedea</last></author>
       <author><first>Vasile</first><last>Pais</last></author>
-      <author><first>Dan</first><last>Tufis</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufis</last></author>
       <pages>374–384</pages>
       <abstract>Running large-scale pre-trained language models in computationally constrained environments remains a challenging problem yet to be addressed, while transfer learning from these models has become prevalent in Natural Language Processing tasks. Several solutions, including knowledge distillation, network quantization, or network pruning have been previously proposed; however, these approaches focus mostly on the English language, thus widening the gap when considering low-resource languages. In this work, we introduce three light and fast versions of distilled BERT models for the Romanian language: Distil-BERT-base-ro, Distil-RoBERT-base, and DistilMulti-BERT-base-ro. The first two models resulted from the individual distillation of knowledge from two base versions of Romanian BERTs available in literature, while the last one was obtained by distilling their ensemble. To our knowledge, this is the first attempt to create publicly available Romanian distilled BERT models, which were thoroughly evaluated on five tasks: part-of-speech tagging, named entity recognition, sentiment analysis, semantic textual similarity, and dialect identification. Our experimental results argue that the three distilled models offer performance comparable to their teachers, while being twice as fast on a GPU and ~35% smaller. In addition, we further test the similarity between the predictions of our students versus their teachers by measuring their label and probability loyalty, together with regression loyalty - a new metric introduced in this work.</abstract>
       <url hash="881bcd6d">2022.lrec-1.39</url>
@@ -540,7 +540,7 @@
       <title>Kompetencer: Fine-grained Skill Classification in <fixed-case>D</fixed-case>anish Job Postings via Distant Supervision and Transfer Learning</title>
       <author><first>Mike</first><last>Zhang</last></author>
       <author><first>Kristian Nørgaard</first><last>Jensen</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>436–447</pages>
       <abstract>Skill Classification (SC) is the task of classifying job competences from job postings. This work is the first in SC applied to Danish job vacancy data. We release the first Danish job posting dataset: *Kompetencer* (_en_: competences), annotated for nested spans of competences. To improve upon coarse-grained annotations, we make use of The European Skills, Competences, Qualifications and Occupations (ESCO; le Vrang et al., (2014)) taxonomy API to obtain fine-grained labels via distant supervision. We study two setups: The zero-shot and few-shot classification setting. We fine-tune English-based models and RemBERT (Chung et al., 2020) and compare them to in-language Danish models. Our results show RemBERT significantly outperforms all other models in both the zero-shot and the few-shot setting.</abstract>
       <url hash="3bf5879a">2022.lrec-1.46</url>
@@ -592,7 +592,7 @@
       <title>Constructing A Dataset of Support and Attack Relations in Legal Arguments in Court Judgements using Linguistic Rules</title>
       <author><first>Basit</first><last>Ali</last></author>
       <author><first>Sachin</first><last>Pawar</last></author>
-      <author><first>Girish</first><last>Palshikar</last></author>
+      <author id="girish-palshikar"><first>Girish</first><last>Palshikar</last></author>
       <author><first>Rituraj</first><last>Singh</last></author>
       <pages>491–500</pages>
       <abstract>Argumentation mining is a growing area of research and has several interesting practical applications of mining legal arguments. Support and Attack relations are the backbone of any legal argument. However, there is no publicly available dataset of these relations in the context of legal arguments expressed in court judgements. In this paper, we focus on automatically constructing such a dataset of Support and Attack relations between sentences in a court judgment with reasonable accuracy. We propose three sets of rules based on linguistic knowledge and distant supervision to identify such relations from Indian Supreme Court judgments. The first rule set is based on multiple discourse connectors, the second rule set is based on common semantic structures between argumentative sentences in a close neighbourhood, and the third rule set uses the information about the source of the argument. We also explore a BERT-based sentence pair classification model which is trained on this dataset. We release the dataset of 20506 sentence pairs - 10746 Support (precision 77.3%) and 9760 Attack (precision 65.8%). We believe that this dataset and the ideas explored in designing the linguistic rules and will boost the argumentation mining research for legal arguments.</abstract>
@@ -628,7 +628,7 @@
     </paper>
     <paper id="55">
       <title>Valet: Rule-Based Information Extraction for Rapid Deployment</title>
-      <author><first>Dayne</first><last>Freitag</last></author>
+      <author id="dayne-freitag"><first>Dayne</first><last>Freitag</last></author>
       <author><first>John</first><last>Cadigan</last></author>
       <author><first>Robert</first><last>Sasseen</last></author>
       <author><first>Paul</first><last>Kalmar</last></author>
@@ -641,7 +641,7 @@
       <title>Negation Detection in <fixed-case>D</fixed-case>utch Spoken Human-Computer Conversations</title>
       <author><first>Tom</first><last>Sweers</last></author>
       <author><first>Iris</first><last>Hendrickx</last></author>
-      <author><first>Helmer</first><last>Strik</last></author>
+      <author id="helmer-strik"><first>Helmer</first><last>Strik</last></author>
       <pages>534–542</pages>
       <abstract>Proper recognition and interpretation of negation signals in text or communication is crucial for any form of full natural language understanding. It is also essential for computational approaches to natural language processing. In this study we focus on negation detection in Dutch spoken human-computer conversations. Since there exists no Dutch (dialogue) corpus annotated for negation we have annotated a Dutch corpus sample to evaluate our method for automatic negation detection. We use transfer learning and trained NegBERT (an existing BERT implementation used for negation detection) on English data with multilingual BERT to detect negation in Dutch dialogues. Our results show that adding in-domain training material improves the results. We show that we can detect both negation cues and scope in Dutch dialogues with high precision and recall. We provide a detailed error analysis and discuss the effects of cross-lingual and cross-domain transfer learning on automatic negation detection.</abstract>
       <url hash="a5f37ba0">2022.lrec-1.56</url>
@@ -650,9 +650,9 @@
     <paper id="57">
       <title>Reflections on 30 Years of Language Resource Development and Sharing</title>
       <author><first>Christopher</first><last>Cieri</last></author>
-      <author><first>Mark</first><last>Liberman</last></author>
+      <author id="mark-liberman"><first>Mark</first><last>Liberman</last></author>
       <author><first>Sunghye</first><last>Cho</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <author><first>James</first><last>Fiumara</last></author>
       <author><first>Jonathan</first><last>Wright</last></author>
       <pages>543–550</pages>
@@ -662,7 +662,7 @@
     </paper>
     <paper id="58">
       <title>Language Resources to Support Language Diversity – the <fixed-case>ELRA</fixed-case> Achievements</title>
-      <author><first>Valérie</first><last>Mapelli</last></author>
+      <author id="valerie-mapelli"><first>Valérie</first><last>Mapelli</last></author>
       <author><first>Victoria</first><last>Arranz</last></author>
       <author><first>Khalid</first><last>Choukri</last></author>
       <author><first>Hélène</first><last>Mazo</last></author>
@@ -695,9 +695,9 @@
       <title>Aspect-Based Emotion Analysis and Multimodal Coreference: A Case Study of Customer Comments on Adidas <fixed-case>I</fixed-case>nstagram Posts</title>
       <author><first>Luna</first><last>De Bruyne</last></author>
       <author><first>Akbar</first><last>Karimi</last></author>
-      <author><first>Orphee</first><last>De Clercq</last></author>
+      <author id="orphee-de-clercq"><first>Orphee</first><last>De Clercq</last></author>
       <author><first>Andrea</first><last>Prati</last></author>
-      <author><first>Veronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Veronique</first><last>Hoste</last></author>
       <pages>574–580</pages>
       <abstract>While aspect-based sentiment analysis of user-generated content has received a lot of attention in the past years, emotion detection at the aspect level has been relatively unexplored. Moreover, given the rise of more visual content on social media platforms, we want to meet the ever-growing share of multimodal content. In this paper, we present a multimodal dataset for Aspect-Based Emotion Analysis (ABEA). Additionally, we take the first steps in investigating the utility of multimodal coreference resolution in an ABEA framework. The presented dataset consists of 4,900 comments on 175 images and is annotated with aspect and emotion categories and the emotional dimensions of valence and arousal. Our preliminary experiments suggest that ABEA does not benefit from multimodal coreference resolution, and that aspect and emotion classification only requires textual information. However, when more specific information about the aspects is desired, image recognition could be essential.</abstract>
       <url hash="ec4cfb29">2022.lrec-1.61</url>
@@ -716,7 +716,7 @@
     <paper id="63">
       <title><fixed-case>N</fixed-case>aija<fixed-case>S</fixed-case>enti: A <fixed-case>N</fixed-case>igerian <fixed-case>T</fixed-case>witter Sentiment Corpus for Multilingual Sentiment Analysis</title>
       <author><first>Shamsuddeen Hassan</first><last>Muhammad</last></author>
-      <author><first>David Ifeoluwa</first><last>Adelani</last></author>
+      <author id="david-ifeoluwa-adelani"><first>David Ifeoluwa</first><last>Adelani</last></author>
       <author><first>Sebastian</first><last>Ruder</last></author>
       <author><first>Ibrahim Sa’id</first><last>Ahmad</last></author>
       <author><first>Idris</first><last>Abdulmumin</last></author>
@@ -725,7 +725,7 @@
       <author><first>Chris Chinenye</first><last>Emezue</last></author>
       <author><first>Saheed Salahudeen</first><last>Abdullahi</last></author>
       <author><first>Anuoluwapo</first><last>Aremu</last></author>
-      <author><first>Alípio</first><last>Jorge</last></author>
+      <author id="alipio-jorge"><first>Alípio</first><last>Jorge</last></author>
       <author><first>Pavel</first><last>Brazdil</last></author>
       <pages>590–602</pages>
       <abstract>Sentiment analysis is one of the most widely studied applications in NLP, but most work focuses on languages with large amounts of data. We introduce the first large-scale human-annotated Twitter sentiment dataset for the four most widely spoken languages in Nigeria—Hausa, Igbo, Nigerian-Pidgin, and Yorùbá—consisting of around 30,000 annotated tweets per language, including a significant fraction of code-mixed tweets. We propose text collection, filtering, processing and labeling methods that enable us to create datasets for these low-resource languages. We evaluate a range of pre-trained models and transfer strategies on the dataset. We find that language-specific models and language-adaptive fine-tuning generally perform best. We release the datasets, trained models, sentiment lexicons, and code to incentivize research on sentiment analysis in under-represented languages.</abstract>
@@ -764,7 +764,7 @@
     <paper id="67">
       <title>Analysis and Prediction of <fixed-case>NLP</fixed-case> Models via Task Embeddings</title>
       <author><first>Damien</first><last>Sileo</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>633–647</pages>
       <abstract>Task embeddings are low-dimensional representations that are trained to capture task properties. In this paper, we propose MetaEval, a collection of 101 NLP tasks. We fit a single transformer to all MetaEval tasks jointly while conditioning it on learned embeddings. The resulting task embeddings enable a novel analysis of the space of tasks. We then show that task aspects can be mapped to task embeddings for new tasks without using any annotated examples. Predicted embeddings can modulate the encoder for zero-shot inference and outperform a zero-shot baseline on GLUE tasks. The provided multitask setup can function as a benchmark for future transfer learning research.</abstract>
       <url hash="92bd5846">2022.lrec-1.67</url>
@@ -775,7 +775,7 @@
       <author><first>Amir</first><last>Hazem</last></author>
       <author><first>Merieme</first><last>Bouhandi</last></author>
       <author><first>Florian</first><last>Boudin</last></author>
-      <author><first>Beatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Beatrice</first><last>Daille</last></author>
       <pages>648–662</pages>
       <abstract>Automatic Term Extraction (ATE) is a key component for domain knowledge understanding and an important basis for further natural language processing applications. Even with persistent improvements, ATE still exhibits weak results exacerbated by small training data inherent to specialized domain corpora. Recently, transformers-based deep neural models, such as BERT, have proven to be efficient in many downstream NLP tasks. However, no systematic evaluation of ATE has been conducted so far. In this paper, we run an extensive study on fine-tuning pre-trained BERT models for ATE. We propose strategies that empirically show BERT’s effectiveness using cross-lingual and cross-domain transfer learning to extract single and multi-word terms. Experiments have been conducted on four specialized domains in three languages. The obtained results suggest that BERT can capture cross-domain and cross-lingual terminologically-marked contexts shared by terms, opening a new design-pattern for ATE.</abstract>
       <url hash="17683fcd">2022.lrec-1.68</url>
@@ -809,7 +809,7 @@
       <author><first>Hadeel</first><last>Saadany</last></author>
       <author><first>Prashant</first><last>Sharma</last></author>
       <author><first>Diptesh</first><last>Kanojia</last></author>
-      <author><first>Constantin</first><last>Orăsan</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orăsan</last></author>
       <pages>680–688</pages>
       <abstract>The detection and extraction of abbreviations from unstructured texts can help to improve the performance of Natural Language Processing tasks, such as machine translation and information retrieval. However, in terms of publicly available datasets, there is not enough data for training deep-neural-networks-based models to the point of generalising well over data. This paper presents PLOD, a large-scale dataset for abbreviation detection and extraction that contains 160k+ segments automatically annotated with abbreviations and their long forms. We performed manual validation over a set of instances and a complete automatic validation for this dataset. We then used it to generate several baseline models for detecting abbreviations and long forms. The best models achieved an F1-score of 0.92 for abbreviations and 0.89 for detecting their corresponding long forms. We release this dataset along with our code and all the models publicly at <url>https://github.com/surrey-nlp/PLOD-AbbreviationDetection</url></abstract>
       <url hash="43c499d1">2022.lrec-1.71</url>
@@ -882,7 +882,7 @@
       <author><first>Roberts</first><last>Darģis</last></author>
       <author><first>Ilze</first><last>Auziņa</last></author>
       <author><first>Inga</first><last>Kaija</last></author>
-      <author><first>Kristīne</first><last>Levāne-Petrova</last></author>
+      <author id="kristine-levane-petrova"><first>Kristīne</first><last>Levāne-Petrova</last></author>
       <author><first>Kristīne</first><last>Pokratniece</last></author>
       <pages>727–731</pages>
       <abstract>This paper presents the Latvian Language Learner Corpus (LaVA) developed at the Institute of Mathematics and Computer Science, University of Latvia. LaVA corpus contains 1015 essays (190k tokens and 790k characters excluding whitespaces) from foreigners studying at Latvian higher education institutions and who are learning Latvian as a foreign language in the first or second semester, reaching the A1 (possibly A2) Latvian language proficiency level. The corpus has morphological and error annotations. Error analysis and the statistics of the LaVA corpus are also provided in the paper. The corpus is publicly available at: <url>http://www.korpuss.lv/id/LaVA</url>.</abstract>
@@ -894,7 +894,7 @@
       <author><first>Kenneth</first><last>Heafield</last></author>
       <author><first>Elaine</first><last>Farrow</last></author>
       <author><first>Jelmer</first><last>van der Linde</last></author>
-      <author><first>Gema</first><last>Ramírez-Sánchez</last></author>
+      <author id="gema-ramirez-sanchez"><first>Gema</first><last>Ramírez-Sánchez</last></author>
       <author><first>Dion</first><last>Wiggins</last></author>
       <pages>732–740</pages>
       <abstract>We present the EuroPat corpus of patent-specific parallel data for 6 official European languages paired with English: German, Spanish, French, Croatian, Norwegian, and Polish. The filtered parallel corpora range in size from 51 million sentences (Spanish-English) to 154k sentences (Croatian-English), with the unfiltered (raw) corpora being up to 2 times larger. Access to clean, high quality, parallel data in technical domains such as science, engineering, and medicine is needed for training neural machine translation systems for tasks like online dispute resolution and eProcurement. Our evaluation found that the addition of EuroPat data to a generic baseline improved the performance of machine translation systems on in-domain test data in German, Spanish, French, and Polish; and in translating patent data from Croatian to English. The corpus has been released under Creative Commons Zero, and is expected to be widely useful for training high-quality machine translation systems, and particularly for those targeting technical documents such as patents and contracts.</abstract>
@@ -916,7 +916,7 @@
       <title>Criteria for the Annotation of Implicit Stereotypes</title>
       <author><first>Wolfgang</first><last>Schmeisser-Nieto</last></author>
       <author><first>Montserrat</first><last>Nofre</last></author>
-      <author><first>Mariona</first><last>Taulé</last></author>
+      <author id="mariona-taule"><first>Mariona</first><last>Taulé</last></author>
       <pages>753–762</pages>
       <abstract>The growth of social media has brought with it a massive channel for spreading and reinforcing stereotypes. This issue becomes critical when the affected targets are minority groups such as women, the LGBT+ community and immigrants. Although from the perspective of computational linguistics, the detection of this kind of stereotypes is steadily improving, most stereotypes are expressed implicitly and identifying them automatically remains a challenge. One of the problems we found for tackling this issue is the lack of an operationalised definition of implicit stereotypes that would allow us to annotate consistently new corpora by characterising the different forms in which stereotypes appear. In this paper, we present thirteen criteria for annotating implicitness which were elaborated to facilitate the subjective task of identifying the presence of stereotypes. We also present NewsCom-Implicitness, a corpus of 1,911 sentences, of which 426 comprise explicit and implicit racial stereotypes. An experiment was carried out to evaluate the applicability of these criteria. The results indicate that different criteria obtain different inter-annotator agreement values and that there is a greater agreement when more criteria can be identified in one sentence.</abstract>
       <url hash="e04d5f16">2022.lrec-1.80</url>
@@ -998,9 +998,9 @@
     <paper id="87">
       <title>Bicleaner <fixed-case>AI</fixed-case>: Bicleaner Goes Neural</title>
       <author><first>Jaume</first><last>Zaragoza-Bernabeu</last></author>
-      <author><first>Gema</first><last>Ramírez-Sánchez</last></author>
+      <author id="gema-ramirez-sanchez"><first>Gema</first><last>Ramírez-Sánchez</last></author>
       <author><first>Marta</first><last>Bañón</last></author>
-      <author><first>Sergio</first><last>Ortiz Rojas</last></author>
+      <author id="sergio-ortiz-rojas"><first>Sergio</first><last>Ortiz Rojas</last></author>
       <pages>824–831</pages>
       <abstract>This paper describes the experiments carried out during the development of the latest version of Bicleaner, named Bicleaner AI, a tool that aims at detecting noisy sentences in parallel corpora. The tool, which now implements a new neural classifier, uses state-of-the-art techniques based on pre-trained transformer-based language models fine-tuned on a binary classification task. After that, parallel corpus filtering is performed, discarding the sentences that have lower probability of being mutual translations. Our experiments, based on the training of neural machine translation (NMT) with corpora filtered using Bicleaner AI for two different scenarios, show significant improvements in translation quality compared to the previous version of the tool which implemented a classifier based on Extremely Randomized Trees.</abstract>
       <url hash="177c50a3">2022.lrec-1.87</url>
@@ -1030,7 +1030,7 @@
       <author><first>Kyle</first><last>Gorman</last></author>
       <author><first>Yustinus Ghanggo</first><last>Ate</last></author>
       <author><first>Maria</first><last>Ryskina</last></author>
-      <author><first>Sabrina</first><last>Mielke</last></author>
+      <author id="sabrina-j-mielke"><first>Sabrina</first><last>Mielke</last></author>
       <author><first>Elena</first><last>Budianskaya</last></author>
       <author><first>Charbel</first><last>El-Khaissi</last></author>
       <author><first>Tiago</first><last>Pimentel</last></author>
@@ -1042,7 +1042,7 @@
       <author><first>Delio Siticonatzi</first><last>Camaiteri</last></author>
       <author><first>Esaú Zumaeta</first><last>Rojas</last></author>
       <author><first>Didier</first><last>López Francis</last></author>
-      <author><first>Arturo</first><last>Oncevay</last></author>
+      <author id="arturo-oncevay"><first>Arturo</first><last>Oncevay</last></author>
       <author><first>Juan</first><last>López Bautista</last></author>
       <author><first>Gema Celeste Silva</first><last>Villegas</last></author>
       <author><first>Lucas Torroba</first><last>Hennigen</last></author>
@@ -1058,7 +1058,7 @@
       <author><first>Sofya</first><last>Ganieva</last></author>
       <author><first>Hilaria</first><last>Cruz</last></author>
       <author><first>Ritván</first><last>Karahóǧa</last></author>
-      <author><first>Stella</first><last>Markantonatou</last></author>
+      <author id="stella-markantonatou"><first>Stella</first><last>Markantonatou</last></author>
       <author><first>George</first><last>Pavlidis</last></author>
       <author><first>Matvey</first><last>Plugaryov</last></author>
       <author><first>Elena</first><last>Klyachko</last></author>
@@ -1080,7 +1080,7 @@
       <author><first>Brijesh</first><last>Bhatt</last></author>
       <author><first>Christopher</first><last>Straughn</last></author>
       <author><first>Zoey</first><last>Liu</last></author>
-      <author><first>Jonathan North</first><last>Washington</last></author>
+      <author id="jonathan-washington"><first>Jonathan North</first><last>Washington</last></author>
       <author><first>Yuval</first><last>Pinter</last></author>
       <author><first>Duygu</first><last>Ataman</last></author>
       <author><first>Marcin</first><last>Wolinski</last></author>
@@ -1090,7 +1090,7 @@
       <author><first>Hossep</first><last>Dolatian</last></author>
       <author><first>Zahroh</first><last>Nuriah</last></author>
       <author><first>Shyam</first><last>Ratan</last></author>
-      <author><first>Francis M.</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis M.</first><last>Tyers</last></author>
       <author><first>Edoardo M.</first><last>Ponti</last></author>
       <author><first>Grant</first><last>Aiton</last></author>
       <author><first>Aryaman</first><last>Arora</last></author>
@@ -1103,13 +1103,13 @@
       <author><first>Igor</first><last>Marchenko</last></author>
       <author><first>Polina</first><last>Mashkovtseva</last></author>
       <author><first>Alexandra</first><last>Serova</last></author>
-      <author><first>Emily</first><last>Prud’hommeaux</last></author>
+      <author id="emily-prudhommeaux"><first>Emily</first><last>Prud’hommeaux</last></author>
       <author><first>Maria</first><last>Nepomniashchaya</last></author>
       <author><first>Fausto</first><last>Giunchiglia</last></author>
       <author><first>Eleanor</first><last>Chodroff</last></author>
       <author><first>Mans</first><last>Hulden</last></author>
-      <author><first>Miikka</first><last>Silfverberg</last></author>
-      <author><first>Arya D.</first><last>McCarthy</last></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last></author>
+      <author id="arya-d-mccarthy"><first>Arya D.</first><last>McCarthy</last></author>
       <author><first>David</first><last>Yarowsky</last></author>
       <author><first>Ryan</first><last>Cotterell</last></author>
       <author><first>Reut</first><last>Tsarfaty</last></author>
@@ -1157,7 +1157,7 @@
       <author><first>Jaehyung</first><last>Seo</last></author>
       <author><first>Jungseob</first><last>Lee</last></author>
       <author><first>Sugyeong</first><last>Eo</last></author>
-      <author><first>Heuiseok</first><last>Lim</last></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last></author>
       <pages>883–891</pages>
       <abstract>Automatic post-editing (APE) refers to a research field that aims to automatically correct errors included in the translation sentences derived by the machine translation system. This study has several limitations, considering the data acquisition, because there is no official dataset for most language pairs. Moreover, the amount of data is restricted even for language pairs in which official data has been released, such as WMT. To solve this problem and promote universal APE research regardless of APE data existence, this study proposes a method for automatically generating APE data based on a noising scheme from a parallel corpus. Particularly, we propose a human mimicking errors-based noising scheme that considers a practical correction process at the human level. We propose a precise inspection to attain high performance, and we derived the optimal noising schemes that show substantial effectiveness. Through these, we also demonstrate that depending on the type of noise, the noising scheme-based APE data generation may lead to inferior performance. In addition, we propose a dynamic noise injection strategy that enables the acquisition of a robust error correction capability and demonstrated its effectiveness by comparative analysis. This study enables obtaining a high performance APE model without human-generated data and can promote universal APE research for all language pairs targeting English.</abstract>
       <url hash="f1231d93">2022.lrec-1.93</url>
@@ -1167,7 +1167,7 @@
       <title>Domain Mismatch Doesn’t Always Prevent Cross-lingual Transfer Learning</title>
       <author><first>Daniel</first><last>Edmiston</last></author>
       <author><first>Phillip</first><last>Keung</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>892–899</pages>
       <abstract>Cross-lingual transfer learning without labeled target language data or parallel text has been surprisingly effective in zero-shot cross-lingual classification, question answering, unsupervised machine translation, etc. However, some recent publications have claimed that domain mismatch prevents cross-lingual transfer, and their results show that unsupervised bilingual lexicon induction (UBLI) and unsupervised neural machine translation (UNMT) do not work well when the underlying monolingual corpora come from different domains (e.g., French text from Wikipedia but English text from UN proceedings). In this work, we show how a simple initialization regimen can overcome much of the effect of domain mismatch in cross-lingual transfer. We pre-train word and contextual embeddings on the concatenated domain-mismatched corpora, and use these as initializations for three tasks: MUSE UBLI, UN Parallel UNMT, and the SemEval 2017 cross-lingual word similarity task. In all cases, our results challenge the conclusions of prior work by showing that proper initialization can recover a large portion of the losses incurred by domain mismatch.</abstract>
       <url hash="8c772d20">2022.lrec-1.94</url>
@@ -1250,7 +1250,7 @@
       <author><first>Louis</first><last>Kobras</last></author>
       <author><first>Melf</first><last>Johannsen</last></author>
       <author><first>Peter</first><last>Kling</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>956–962</pages>
       <abstract>We present a dataset containing source code solutions to algorithmic programming exercises solved by hundreds of Bachelor-level students at the University of Hamburg. These solutions were collected during the winter semesters 2019/2020, 2020/2021 and 2021/2022. The dataset contains a set of solutions to a total of 21 tasks written in Java as well as Python and a total of over 1500 individual solutions. All solutions were submitted through Moodle and the Coderunner plugin and passed a number of test cases (including randomized tests), such that they can be considered as working correctly. All students whose solutions are included in the dataset gave their consent into publishing their solutions. The solutions are pseudonymized with a random solution ID. Included in this paper is a short analysis of the dataset containing statistical data and highlighting a few anomalies (e.g. the number of solutions per task decreases for the last few tasks due to grading rules). We plan to extend the dataset with tasks and solutions from upcoming courses.</abstract>
       <url hash="b322b01c">2022.lrec-1.101</url>
@@ -1277,7 +1277,7 @@
       <author><first>Patrick D.</first><last>Watson</last></author>
       <author><first>Tiago</first><last>Timponi Torrent</last></author>
       <author><first>Oliver</first><last>Czulo</last></author>
-      <author><first>Collin</first><last>Baker</last></author>
+      <author id="collin-f-baker"><first>Collin</first><last>Baker</last></author>
       <pages>976–986</pages>
       <abstract>Frame shift is a cross-linguistic phenomenon in translation which results in corresponding pairs of linguistic material evoking different frames. The ability to predict frame shifts would enable (semi-)automatic creation of multilingual frame annotations and thus speeding up FrameNet creation through annotation projection. Here, we first characterize how frame shifts result from other linguistic divergences such as translational divergences and construal differences. Our analysis also shows that many pairs of frames in frame shifts are multi-hop away from each other in Berkeley FrameNet’s net-like configuration. Then, we propose the Frame Shift Prediction task and demonstrate that our graph attention networks, combined with auxiliary training, can learn cross-linguistic frame-to-frame correspondence and predict frame shifts.</abstract>
       <url hash="5004d13c">2022.lrec-1.103</url>
@@ -1317,7 +1317,7 @@
       <title>A Speech Recognizer for <fixed-case>F</fixed-case>risian/<fixed-case>D</fixed-case>utch Council Meetings</title>
       <author><first>Martijn</first><last>Bentum</last></author>
       <author><first>Louis</first><last>ten Bosch</last></author>
-      <author><first>Henk</first><last>van den Heuvel</last></author>
+      <author id="henk-van-den-heuvel"><first>Henk</first><last>van den Heuvel</last></author>
       <author><first>Simone</first><last>Wills</last></author>
       <author><first>Domenique</first><last>van der Niet</last></author>
       <author><first>Jelske</first><last>Dijkstra</last></author>
@@ -1345,7 +1345,7 @@
       <author><first>Ali Can</first><last>Kocabiyikoglu</last></author>
       <author><first>François</first><last>Portet</last></author>
       <author><first>Prudence</first><last>Gibert</last></author>
-      <author><first>Hervé</first><last>Blanchon</last></author>
+      <author id="herve-blanchon"><first>Hervé</first><last>Blanchon</last></author>
       <author><first>Jean-Marc</first><last>Babouchkine</last></author>
       <author><first>Gaëtan</first><last>Gavazzi</last></author>
       <pages>1023–1031</pages>
@@ -1357,7 +1357,7 @@
       <title>Towards an Open-Source <fixed-case>D</fixed-case>utch Speech Recognition System for the Healthcare Domain</title>
       <author><first>Cristian</first><last>Tejedor-García</last></author>
       <author><first>Berrie</first><last>van der Molen</last></author>
-      <author><first>Henk</first><last>van den Heuvel</last></author>
+      <author id="henk-van-den-heuvel"><first>Henk</first><last>van den Heuvel</last></author>
       <author><first>Arjan</first><last>van Hessen</last></author>
       <author><first>Toine</first><last>Pieters</last></author>
       <pages>1032–1039</pages>
@@ -1398,10 +1398,10 @@
     <paper id="113">
       <title>Using a Knowledge Base to Automatically Annotate Speech Corpora and to Identify Sociolinguistic Variation</title>
       <author><first>Yaru</first><last>Wu</last></author>
-      <author><first>Fabian</first><last>Suchanek</last></author>
-      <author><first>Ioana</first><last>Vasilescu</last></author>
-      <author><first>Lori</first><last>Lamel</last></author>
-      <author><first>Martine</first><last>Adda-Decker</last></author>
+      <author id="fabian-suchanek"><first>Fabian</first><last>Suchanek</last></author>
+      <author id="ioana-vasilescu"><first>Ioana</first><last>Vasilescu</last></author>
+      <author id="lori-lamel"><first>Lori</first><last>Lamel</last></author>
+      <author id="martine-adda-decker"><first>Martine</first><last>Adda-Decker</last></author>
       <pages>1054–1060</pages>
       <abstract>Speech characteristics vary from speaker to speaker. While some variation phenomena are due to the overall communication setting, others are due to diastratic factors such as gender, provenance, age, and social background. The analysis of these factors, although relevant for both linguistic and speech technology communities, is hampered by the need to annotate existing corpora or to recruit, categorise, and record volunteers as a function of targeted profiles. This paper presents a methodology that uses a knowledge base to provide speaker-specific information. This can facilitate the enrichment of existing corpora with new annotations extracted from the knowledge base. The method also helps the large scale analysis by automatically extracting instances of speech variation to correlate with diastratic features. We apply our method to an over 120-hour corpus of broadcast speech in French and investigate variation patterns linked to reduction phenomena and/or specific to connected speech such as disfluencies. We find significant differences in speech rate, the use of filler words, and the rate of non-canonical realisations of frequent segments as a function of different professional categories and age groups.</abstract>
       <url hash="362c16f0">2022.lrec-1.113</url>
@@ -1411,8 +1411,8 @@
       <title>Phone Inventories and Recognition for Every Language</title>
       <author><first>Xinjian</first><last>Li</last></author>
       <author><first>Florian</first><last>Metze</last></author>
-      <author><first>David R.</first><last>Mortensen</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="david-r-mortensen"><first>David R.</first><last>Mortensen</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <author><first>Shinji</first><last>Watanabe</last></author>
       <pages>1061–1067</pages>
       <abstract>Identifying phone inventories is a crucial component in language documentation and the preservation of endangered languages. However, even the largest collection of phone inventory only covers about 2000 languages, which is only 1/4 of the total number of languages in the world. A majority of the remaining languages are endangered. In this work, we attempt to solve this problem by estimating the phone inventory for any language listed in Glottolog, which contains phylogenetic information regarding 8000 languages. In particular, we propose one probabilistic model and one non-probabilistic model, both using phylogenetic trees (“language family trees”) to measure the distance between languages. We show that our best model outperforms baseline models by 6.5 F1. Furthermore, we demonstrate that, with the proposed inventories, the phone recognition model can be customized for every language in the set, which improved the PER (phone error rate) in phone recognition by 25%.</abstract>
@@ -1447,7 +1447,7 @@
       <author><first>Jayetri</first><last>Bardhan</last></author>
       <author><first>Anthony</first><last>Colas</last></author>
       <author><first>Kirk</first><last>Roberts</last></author>
-      <author><first>Daisy Zhe</first><last>Wang</last></author>
+      <author id="daisy-zhe-wang"><first>Daisy Zhe</first><last>Wang</last></author>
       <pages>1083–1097</pages>
       <abstract>This paper develops the first question answering dataset (DrugEHRQA) containing question-answer pairs from both structured tables and unstructured notes from a publicly available Electronic Health Record (EHR). EHRs contain patient records, stored in structured tables and unstructured clinical notes. The information in structured and unstructured EHRs is not strictly disjoint: information may be duplicated, contradictory, or provide additional context between these sources. Our dataset has medication-related queries, containing over 70,000 question-answer pairs. To provide a baseline model and help analyze the dataset, we have used a simple model (MultimodalEHRQA) which uses the predictions of a modality selection network to choose between EHR tables and clinical notes to answer the questions. This is used to direct the questions to the table-based or text-based state-of-the-art QA model. In order to address the problem arising from complex, nested queries, this is the first time Relation-Aware Schema Encoding and Linking for Text-to-SQL Parsers (RAT-SQL) has been used to test the structure of query templates in EHR data. Our goal is to provide a benchmark dataset for multi-modal QA systems, and to open up new avenues of research in improving question answering over EHR structured data by using context from unstructured clinical data.</abstract>
       <url hash="f5cc6612">2022.lrec-1.117</url>
@@ -1466,10 +1466,10 @@
       <title><fixed-case>BERT</fixed-case>rade: Using Contextual Embeddings to Parse <fixed-case>O</fixed-case>ld <fixed-case>F</fixed-case>rench</title>
       <author><first>Loïc</first><last>Grobol</last></author>
       <author><first>Mathilde</first><last>Regnault</last></author>
-      <author><first>Pedro</first><last>Ortiz Suarez</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
-      <author><first>Laurent</first><last>Romary</last></author>
-      <author><first>Benoit</first><last>Crabbé</last></author>
+      <author id="pedro-ortiz-suarez"><first>Pedro</first><last>Ortiz Suarez</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
+      <author id="benoit-crabbe"><first>Benoit</first><last>Crabbé</last></author>
       <pages>1104–1113</pages>
       <abstract>The successes of contextual word embeddings learned by training large-scale language models, while remarkable, have mostly occurred for languages where significant amounts of raw texts are available and where annotated data in downstream tasks have a relatively regular spelling. Conversely, it is not yet completely clear if these models are also well suited for lesser-resourced and more irregular languages. We study the case of Old French, which is in the interesting position of having relatively limited amount of available raw text, but enough annotated resources to assess the relevance of contextual word embedding models for downstream NLP tasks. In particular, we use POS-tagging and dependency parsing to evaluate the quality of such models in a large array of configurations, including models trained from scratch from small amounts of raw text and models pre-trained on other languages but fine-tuned on Medieval French data.</abstract>
       <url hash="abcb4ba8">2022.lrec-1.119</url>
@@ -1495,7 +1495,7 @@
     </paper>
     <paper id="122">
       <title>Towards Universal Segmentations: <fixed-case>U</fixed-case>ni<fixed-case>S</fixed-case>egments 1.0</title>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
       <author><first>Niyati</first><last>Bafna</last></author>
       <author><first>Jan</first><last>Bodnár</last></author>
       <author><first>Lukáš</first><last>Kyjánek</last></author>
@@ -1509,11 +1509,11 @@
     </paper>
     <paper id="123">
       <title><fixed-case>T</fixed-case>e<fixed-case>DD</fixed-case>i Sample: Text Data Diversity Sample for Language Comparison and Multilingual <fixed-case>NLP</fixed-case></title>
-      <author><first>Steven</first><last>Moran</last></author>
+      <author id="steven-moran"><first>Steven</first><last>Moran</last></author>
       <author><first>Christian</first><last>Bentz</last></author>
       <author><first>Ximena</first><last>Gutierrez-Vasques</last></author>
       <author><first>Olga</first><last>Pelloni</last></author>
-      <author><first>Tanja</first><last>Samardzic</last></author>
+      <author id="tanja-samardzic"><first>Tanja</first><last>Samardzic</last></author>
       <pages>1150–1158</pages>
       <abstract>We present the TeDDi sample, a diversity sample of text data for language comparison and multilingual Natural Language Processing. The TeDDi sample currently features 89 languages based on the typological diversity sample in the World Atlas of Language Structures. It consists of more than 20k texts and is accompanied by open-source corpus processing tools. The aim of TeDDi is to facilitate text-based quantitative analysis of linguistic diversity. We describe in detail the TeDDi sample, how it was created, data availability, and its added value through for NLP and linguistic research.</abstract>
       <url hash="0551f185">2022.lrec-1.123</url>
@@ -1533,7 +1533,7 @@
       <author><first>Linda</first><last>Wiechetek</last></author>
       <author><first>Katri</first><last>Hiovain-Asikainen</last></author>
       <author><first>Inga Lill Sigga</first><last>Mikkelsen</last></author>
-      <author><first>Sjur</first><last>Moshagen</last></author>
+      <author id="sjur-moshagen"><first>Sjur</first><last>Moshagen</last></author>
       <author><first>Flammie</first><last>Pirinen</last></author>
       <author><first>Trond</first><last>Trosterud</last></author>
       <author><first>Børre</first><last>Gaup</last></author>
@@ -1578,7 +1578,7 @@
     <paper id="129">
       <title><fixed-case>CAMIO</fixed-case>: A Corpus for <fixed-case>OCR</fixed-case> in Multiple Languages</title>
       <author><first>Michael</first><last>Arrigo</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <author><first>Nolan</first><last>King</last></author>
       <author><first>Thao</first><last>Tran</last></author>
       <author><first>Lisa</first><last>Mason</last></author>
@@ -1619,7 +1619,7 @@
       <author><first>Yoonna</first><last>Jang</last></author>
       <author><first>Seolhwa</first><last>Lee</last></author>
       <author><first>Sungjin</first><last>Park</last></author>
-      <author><first>Heuiseok</first><last>Lim</last></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last></author>
       <pages>1242–1248</pages>
       <abstract>We propose a deep learning-based foreign language learning platform, named FreeTalky, for people who experience anxiety dealing with foreign languages, by employing a humanoid robot NAO and various deep learning models. A persona-based dialogue system that is embedded in NAO provides an interesting and consistent multi-turn dialogue for users. Also, an grammar error correction system promotes improvement in grammar skills of the users. Thus, our system enables personalized learning based on persona dialogue and facilitates grammar learning of a user using grammar error feedback. Furthermore, we verified whether FreeTalky provides practical help in alleviating xenoglossophobia by replacing the real human in the conversation with a NAO robot, through human evaluation.</abstract>
       <url hash="6d1a241a">2022.lrec-1.132</url>
@@ -1637,7 +1637,7 @@
       <title><fixed-case>D</fixed-case>ial<fixed-case>C</fixed-case>rowd 2.0: A Quality-Focused Dialog System Crowdsourcing Toolkit</title>
       <author><first>Jessica</first><last>Huynh</last></author>
       <author><first>Ting-Rui</first><last>Chiang</last></author>
-      <author><first>Jeffrey</first><last>Bigham</last></author>
+      <author id="jeffrey-p-bigham"><first>Jeffrey</first><last>Bigham</last></author>
       <author><first>Maxine</first><last>Eskenazi</last></author>
       <pages>1256–1263</pages>
       <abstract>Dialog system developers need high-quality data to train, fine-tune and assess their systems. They often use crowdsourcing for this since it provides large quantities of data from many workers. However, the data may not be of sufficiently good quality. This can be due to the way that the requester presents a task and how they interact with the workers. This paper introduces DialCrowd 2.0 to help requesters obtain higher quality data by, for example, presenting tasks more clearly and facilitating effective communication with workers. DialCrowd 2.0 guides developers in creating improved Human Intelligence Tasks (HITs) and is directly applicable to the workflows used currently by developers and researchers.</abstract>
@@ -1646,7 +1646,7 @@
     </paper>
     <paper id="135">
       <title>A Brief Survey of Textual Dialogue Corpora</title>
-      <author><first>Hugo</first><last>Gonçalo Oliveira</last></author>
+      <author id="hugo-goncalo-oliveira"><first>Hugo</first><last>Gonçalo Oliveira</last></author>
       <author><first>Patrícia</first><last>Ferreira</last></author>
       <author><first>Daniel</first><last>Martins</last></author>
       <author><first>Catarina</first><last>Silva</last></author>
@@ -1673,7 +1673,7 @@
       <author><first>Vojtěch</first><last>Hudeček</last></author>
       <author><first>Léon-Paul</first><last>Schaub</last></author>
       <author><first>Daniel</first><last>Stancl</last></author>
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <author><first>Ondřej</first><last>Dušek</last></author>
       <pages>1286–1296</pages>
       <abstract>Every model is only as strong as the data that it is trained on. In this paper, we present a new dataset, obtained by merging four publicly available annotated corpora for task-oriented dialogues in several domains (MultiWOZ 2.2, CamRest676, DSTC2 and Schema-Guided Dialogue Dataset). This way, we assess the feasibility of providing a unified ontology and annotation schema covering several domains with a relatively limited effort. We analyze the characteristics of the resulting dataset along three main dimensions: language, information content and performance. We focus on aspects likely to be pertinent for improving dialogue success, e.g. dialogue consistency. Furthermore, to assess the usability of this new corpus, we thoroughly evaluate dialogue generation performance under various conditions with the help of two prominent recent end-to-end dialogue models: MarCo and GPT-2. These models were selected as popular open implementations representative of the two main dimensions of dialogue modelling. While we did not observe a significant gain for dialogue state tracking performance, we show that using more training data from different sources can improve language modelling capabilities and positively impact dialogue flow (consistency). In addition, we provide the community with one of the largest open dataset for machine learning experiments.</abstract>
@@ -1722,12 +1722,12 @@
     </paper>
     <paper id="142">
       <title>Making a Semantic Event-type Ontology Multilingual</title>
-      <author><first>Zdenka</first><last>Uresova</last></author>
+      <author id="zdenka-uresova"><first>Zdenka</first><last>Uresova</last></author>
       <author><first>Karolina</first><last>Zaczynska</last></author>
       <author><first>Peter</first><last>Bourgonje</last></author>
-      <author><first>Eva</first><last>Fučíková</last></author>
+      <author id="eva-fucikova"><first>Eva</first><last>Fučíková</last></author>
       <author><first>Georg</first><last>Rehm</last></author>
-      <author><first>Jan</first><last>Hajic</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajic</last></author>
       <pages>1332–1343</pages>
       <abstract>We present an extension of the SynSemClass Event-type Ontology, originally conceived as a bilingual Czech-English resource. We added German entries to the classes representing the concepts of the ontology. Having a different starting point than the original work (unannotated parallel corpus without links to a valency lexicon and, of course, different existing lexical resources), it was a challenge to adapt the annotation guidelines, the data model and the tools used for the original version. We describe the process and results of working in such a setup. We also show the next steps to adapt the annotation process, data structures and formats and tools necessary to make the addition of a new language in the future more smooth and efficient, and possibly to allow for various teams to work on SynSemClass extensions to many languages concurrently. We also present the latest release which contains the results of adding German, freely available for download as well as for online access.</abstract>
       <url hash="f94da44f">2022.lrec-1.142</url>
@@ -1744,8 +1744,8 @@
     </paper>
     <paper id="144">
       <title><fixed-case>TZOS</fixed-case>: an Online Terminology Database Aimed at Working on <fixed-case>B</fixed-case>asque Academic Terminology Collaboratively</title>
-      <author><first>Izaskun</first><last>Aldezabal</last></author>
-      <author><first>Jose Mari</first><last>Arriola</last></author>
+      <author id="izaskun-aldezabal"><first>Izaskun</first><last>Aldezabal</last></author>
+      <author id="jose-mari-arriola"><first>Jose Mari</first><last>Arriola</last></author>
       <author><first>Arantxa</first><last>Otegi</last></author>
       <pages>1353–1359</pages>
       <abstract>Terminology databases are highly useful for the dissemination of specialized knowledge. In this paper we present TZOS, an online terminology database to work on Basque academic terminology collaboratively. We show how this resource integrates the Communicative Theory of Terminology, together with the methodological matters, how it is connected with real corpus GARATERM, which terminology issues arise when terms are collected and future perspectives. The main objectives of this work are to develop basic tools to research academic registers and make the terminology collected by expert users available to the community. Even though TZOS has been designed for an educational context, its flexible structure makes possible to extend it also to the professional area. In this way, we have built IZIBI-TZOS which is a Civil Engineering oriented version of TZOS. These resources are already publicly available, and the ongoing work is towards the interlinking with other lexical resources by applying linking data principles.</abstract>
@@ -1755,7 +1755,7 @@
     <paper id="145">
       <title><fixed-case>A</fixed-case>nimacy Denoting <fixed-case>G</fixed-case>erman Nouns: Annotation and Classification</title>
       <author><first>Manfred</first><last>Klenner</last></author>
-      <author><first>Anne</first><last>Göhring</last></author>
+      <author id="anne-gohring"><first>Anne</first><last>Göhring</last></author>
       <pages>1360–1364</pages>
       <abstract>In this paper, we introduce a gold standard for animacy detection comprising almost 14,500 German nouns that might be used to denote either animate entities or non-animate entities. We present inter-annotator agreement of our crowd-sourced seed annotations (9,000 nouns) and discuss the results of machine learning models applied to this data.</abstract>
       <url hash="26be35db">2022.lrec-1.145</url>
@@ -1774,7 +1774,7 @@
     </paper>
     <paper id="147">
       <title>Polar Quantification of Actor Noun Phrases for <fixed-case>G</fixed-case>erman</title>
-      <author><first>Anne</first><last>Göhring</last></author>
+      <author id="anne-gohring"><first>Anne</first><last>Göhring</last></author>
       <author><first>Manfred</first><last>Klenner</last></author>
       <pages>1376–1380</pages>
       <abstract>In this paper, we discuss work that strives to measure the degree of negativity - the negative polar load - of noun phrases, especially those denoting actors. Since no gold standard data is available for German for this quantification task, we generated a silver standard and used it to fine-tune a BERT-based intensity regressor. We evaluated the quality of the silver standard empirically and found that our lexicon-based quantification metric showed a strong correlation with human annotators.</abstract>
@@ -1794,7 +1794,7 @@
       <title><fixed-case>RED</fixed-case> v2: Enhancing <fixed-case>RED</fixed-case> Dataset for Multi-Label Emotion Detection</title>
       <author><first>Alexandra</first><last>Ciobotaru</last></author>
       <author><first>Mihai Vlad</first><last>Constantinescu</last></author>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <author><first>Stefan</first><last>Dumitrescu</last></author>
       <pages>1392–1399</pages>
       <abstract>RED (Romanian Emotion Dataset) is a machine learning-based resource developed for the automatic detection of emotions in Romanian texts, containing single-label annotated tweets with one of the following emotions: joy, fear, sadness, anger and neutral. In this work, we propose REDv2, an open-source extension of RED by adding two more emotions, trust and surprise, and by widening the annotation schema so that the resulted novel dataset is multi-label. We show the overall reliability of our dataset by computing inter-annotator agreements per tweet using a formula suitable for our annotation setup and we aggregate all annotators’ opinions into two variants of ground truth, one suitable for multi-label classification and the other suitable for text regression. We propose strong baselines with two transformer models, the Romanian BERT and the multilingual XLM-Roberta model, in both categorical and regression settings.</abstract>
@@ -1822,7 +1822,7 @@
       <title>Frustratingly Easy Performance Improvements for Low-resource Setups: A Tale on <fixed-case>BERT</fixed-case> and Segment Embeddings</title>
       <author><first>Rob</first><last>van der Goot</last></author>
       <author><first>Max</first><last>Müller-Eberstein</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>1418–1427</pages>
       <abstract>As input representation for each sub-word, the original BERT architecture proposes the sum of the sub-word embedding, position embedding and a segment embedding. Sub-word and position embeddings are well-known and studied, and encode lexical information and word position, respectively. In contrast, segment embeddings are less known and have so far received no attention, despite being ubiquitous in large pre-trained language models. The key idea of segment embeddings is to encode to which of the two sentences (segments) a word belongs to — the intuition is to inform the model about the separation of sentences for the next sentence prediction pre-training task. However, little is known on whether the choice of segment impacts performance. In this work, we try to fill this gap and empirically study the impact of the segment embedding during inference time for a variety of pre-trained embeddings and target tasks. We hypothesize that for single-sentence prediction tasks performance is not affected — neither in mono- nor multilingual setups — while it matters when swapping segment IDs in paired-sentence tasks. To our surprise, this is not the case. Although for classification tasks and monolingual BERT models no large differences are observed, particularly word-level multilingual prediction tasks are heavily impacted. For low-resource syntactic tasks, we observe impacts of segment embedding and multilingual BERT choice. We find that the default setting for the most used multilingual BERT model underperforms heavily, and a simple swap of the segment embeddings yields an average improvement of 2.5 points absolute LAS score for dependency parsing over 9 different treebanks.</abstract>
       <url hash="fac87db3">2022.lrec-1.152</url>
@@ -1851,7 +1851,7 @@
       <author><first>Mustafa</first><last>Ocal</last></author>
       <author><first>Adrian</first><last>Perez</last></author>
       <author><first>Antonela</first><last>Radas</last></author>
-      <author><first>Mark</first><last>Finlayson</last></author>
+      <author id="mark-finlayson"><first>Mark</first><last>Finlayson</last></author>
       <pages>1444–1453</pages>
       <abstract>TimeML is a scheme for representing temporal information (times, events, &amp; temporal relations) in texts. Although automatic TimeML annotation is challenging, there has been notable progress, with F1s of 0.8–0.9 for events and time detection subtasks, and F1s of 0.5–0.7 for relation extraction. Individually, these subtask results are reasonable, even good, but when combined to generate a full TimeML graph, is overall performance still acceptable? We present a novel suite of eight metrics, combined with a new graph-transformation experimental design, for holistic evaluation of TimeML graphs. We apply these metrics to four automatic TimeML annotation systems (CAEVO, TARSQI, CATENA, and ClearTK). We show that on average 1/3 of the TimeML graphs produced using these systems are inconsistent, and there is on average 1/5 more temporal indeterminacy than the gold-standard. We also show that the automatically generated graphs are on average 109 edits from the gold-standard, which is 1/3 toward complete replacement. Finally, we show that the relationship individual subtask performance and graph quality is non-linear: small errors in TimeML subtasks result in rapid degradation of final graph quality. These results suggest current automatic TimeML annotators are far from optimal and significant further improvement would be useful.</abstract>
       <url hash="496a7c29">2022.lrec-1.155</url>
@@ -1871,7 +1871,7 @@
     <paper id="157">
       <title>Challenging the Transformer-based models with a Classical <fixed-case>A</fixed-case>rabic dataset: <fixed-case>Q</fixed-case>uran and <fixed-case>H</fixed-case>adith</title>
       <author><first>Shatha</first><last>Altammami</last></author>
-      <author><first>Eric</first><last>Atwell</last></author>
+      <author id="eric-atwell"><first>Eric</first><last>Atwell</last></author>
       <pages>1462–1471</pages>
       <abstract>Transformer-based models showed near-perfect results on several downstream tasks. However, their performance on classical Arabic texts is largely unexplored. To fill this gap, we evaluate monolingual, bilingual, and multilingual state-of-the-art models to detect relatedness between the Quran (Muslim holy book) and the Hadith (Prophet Muhammed teachings), which are complex classical Arabic texts with underlying meanings that require deep human understanding. To do this, we carefully built a dataset of Quran-verse and Hadith-teaching pairs by consulting sources of reputable religious experts. This study presents the methodology of creating the dataset, which we make available on our repository, and discusses the models’ performance that calls for the imminent need to explore avenues for improving the quality of these models to capture the semantics in such complex, low-resource texts.</abstract>
       <url hash="366b8f80">2022.lrec-1.157</url>
@@ -1910,7 +1910,7 @@
     <paper id="161">
       <title>Fine-tuning vs From Scratch: Do Vision &amp; Language Models Have Similar Capabilities on Out-of-Distribution Visual Question Answering?</title>
       <author><first>Kristian Nørgaard</first><last>Jensen</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>1496–1508</pages>
       <abstract>Fine-tuning general-purpose pre-trained models has become a de-facto standard, also for Vision and Language tasks such as Visual Question Answering (VQA). In this paper, we take a step back and ask whether a fine-tuned model has superior linguistic and reasoning capabilities than a prior state-of-the-art architecture trained from scratch on the training data alone. We perform a fine-grained evaluation on out-of-distribution data, including an analysis on robustness due to linguistic variation (rephrasings). Our empirical results confirm the benefit of pre-training on overall performance and rephrasing in particular. But our results also uncover surprising limitations, particularly for answering questions involving boolean operations. To complement the empirical evaluation, this paper also surveys relevant earlier work on 1) available VQA data sets, 2) models developed for VQA, 3) pre-trained Vision+Language models, and 4) earlier fine-grained evaluation of pre-trained Vision+Language models.</abstract>
       <url hash="f5d9970b">2022.lrec-1.161</url>
@@ -1932,7 +1932,7 @@
       <author><first>Eui Jun</first><last>Hwang</last></author>
       <author><first>Sukmin</first><last>Cho</last></author>
       <author><first>Du Hui</first><last>Lee</last></author>
-      <author><first>Jong</first><last>Park</last></author>
+      <author id="jong-c-park"><first>Jong</first><last>Park</last></author>
       <pages>1519–1528</pages>
       <abstract>Sign language production (SLP) is the process of generating sign language videos from spoken language expressions. Since sign languages are highly under-resourced, existing vision-based SLP approaches suffer from out-of-vocabulary (OOV) and test-time generalization problems and thus generate low-quality translations. To address these problems, we introduce an avatar-based SLP system composed of a sign language translation (SLT) model and an avatar animation generation module. Our Transformer-based SLT model utilizes two additional strategies to resolve these problems: named entity transformation to reduce OOV tokens and context vector generation using a pretrained language model (e.g., BERT) to reliably train the decoder. Our system is validated on a new Korean-Korean Sign Language (KSL) dataset of weather forecasts and emergency announcements. Our SLT model achieves an 8.77 higher BLEU-4 score and a 4.57 higher ROUGE-L score over those of our baseline model. In a user evaluation, 93.48% of named entities were successfully identified by participants, demonstrating marked improvement on OOV issues.</abstract>
       <url hash="e617265b">2022.lrec-1.163</url>
@@ -1944,7 +1944,7 @@
       <author><first>William</first><last>Pickard</last></author>
       <author><first>Brittany</first><last>Cates</last></author>
       <author><first>Nathaniel</first><last>Blanchard</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <pages>1529–1541</pages>
       <abstract>We present a five-year retrospective on the development of the VoxWorld platform, first introduced as a multimodal platform for modeling motion language, that has evolved into a platform for rapidly building and deploying embodied agents with contextual and situational awareness, capable of interacting with humans in multiple modalities, and exploring their environments. In particular, we discuss the evolution from the theoretical underpinnings of the VoxML modeling language to a platform that accommodates both neural and symbolic inputs to build agents capable of multimodal interaction and hybrid reasoning. We focus on three distinct agent implementations and the functionality needed to accommodate all of them: Diana, a virtual collaborative agent; Kirby, a mobile robot; and BabyBAW, an agent who self-guides its own exploration of the world.</abstract>
       <url hash="6085ed69">2022.lrec-1.164</url>
@@ -1998,7 +1998,7 @@
       <author><first>Richard</first><last>Brutti</last></author>
       <author><first>Lucia</first><last>Donatelli</last></author>
       <author><first>Kenneth</first><last>Lai</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <pages>1576–1583</pages>
       <abstract>This paper presents Gesture AMR, an extension to Abstract Meaning Representation (AMR), that captures the meaning of gesture. In developing Gesture AMR, we consider how gesture form and meaning relate; how gesture packages meaning both independently and in interaction with speech; and how the meaning of gesture is temporally and contextually determined. Our case study for developing Gesture AMR is a focused human-human shared task to build block structures. We develop an initial taxonomy of gesture act relations that adheres to AMR’s existing focus on predicate-argument structure while integrating meaningful elements unique to gesture. Pilot annotation shows Gesture AMR to be more challenging than standard AMR, and illustrates the need for more work on representation of dialogue and multimodal meaning. We discuss challenges of adapting an existing meaning representation to non-speech-based modalities and outline several avenues for expanding Gesture AMR.</abstract>
       <url hash="f7155346">2022.lrec-1.169</url>
@@ -2033,9 +2033,9 @@
       <title><fixed-case>B</fixed-case>asque<fixed-case>GLUE</fixed-case>: A Natural Language Understanding Benchmark for <fixed-case>B</fixed-case>asque</title>
       <author><first>Gorka</first><last>Urbizu</last></author>
       <author><first>Iñaki</first><last>San Vicente</last></author>
-      <author><first>Xabier</first><last>Saralegi</last></author>
-      <author><first>Rodrigo</first><last>Agerri</last></author>
-      <author><first>Aitor</first><last>Soroa</last></author>
+      <author id="xabier-saralegi"><first>Xabier</first><last>Saralegi</last></author>
+      <author id="rodrigo-agerri"><first>Rodrigo</first><last>Agerri</last></author>
+      <author id="aitor-soroa"><first>Aitor</first><last>Soroa</last></author>
       <pages>1603–1612</pages>
       <abstract>Natural Language Understanding (NLU) technology has improved significantly over the last few years and multitask benchmarks such as GLUE are key to evaluate this improvement in a robust and general way. These benchmarks take into account a wide and diverse set of NLU tasks that require some form of language understanding, beyond the detection of superficial, textual clues. However, they are costly to develop and language-dependent, and therefore they are only available for a small number of languages. In this paper, we present BasqueGLUE, the first NLU benchmark for Basque, a less-resourced language, which has been elaborated from previously existing datasets and following similar criteria to those used for the construction of GLUE and SuperGLUE. We also report the evaluation of two state-of-the-art language models for Basque on BasqueGLUE, thus providing a strong baseline to compare upon. BasqueGLUE is freely available under an open license.</abstract>
       <url hash="e6478608">2022.lrec-1.172</url>
@@ -2078,9 +2078,9 @@
       <title><fixed-case>MUSS</fixed-case>: Multilingual Unsupervised Sentence Simplification by Mining Paraphrases</title>
       <author><first>Louis</first><last>Martin</last></author>
       <author><first>Angela</first><last>Fan</last></author>
-      <author><first>Éric</first><last>de la Clergerie</last></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Éric</first><last>de la Clergerie</last></author>
       <author><first>Antoine</first><last>Bordes</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <pages>1651–1664</pages>
       <abstract>Progress in sentence simplification has been hindered by a lack of labeled parallel simplification data, particularly in languages other than English. We introduce MUSS, a Multilingual Unsupervised Sentence Simplification system that does not require labeled simplification data. MUSS uses a novel approach to sentence simplification that trains strong models using sentence-level paraphrase data instead of proper simplification data. These models leverage unsupervised pretraining and controllable generation mechanisms to flexibly adjust attributes such as length and lexical complexity at inference time. We further present a method to mine such paraphrase data in any language from Common Crawl using semantic sentence embeddings, thus removing the need for labeled data. We evaluate our approach on English, French, and Spanish simplification benchmarks and closely match or outperform the previous best supervised results, despite not using any labeled simplification data. We push the state of the art further by incorporating labeled simplification data.</abstract>
       <url hash="c35475fd">2022.lrec-1.176</url>
@@ -2102,7 +2102,7 @@
     </paper>
     <paper id="178">
       <title>Combining <fixed-case>ELECTRA</fixed-case> and Adaptive Graph Encoding for Frame Identification</title>
-      <author><first>Fabio</first><last>Tamburini</last></author>
+      <author id="fabio-tamburini"><first>Fabio</first><last>Tamburini</last></author>
       <pages>1671–1679</pages>
       <abstract>This paper presents contributions in two directions: first we propose a new system for Frame Identification (FI), based on pre-trained text encoders trained discriminatively and graphs embedding, producing state of the art performance and, second, we take in consideration all the extremely different procedures used to evaluate systems for this task performing a complete evaluation over two benchmarks and all possible splits and cleaning procedures used in the FI literature.</abstract>
       <url hash="16fdf9c1">2022.lrec-1.178</url>
@@ -2110,7 +2110,7 @@
     </paper>
     <paper id="179">
       <title>Polysemy in Spoken Conversations and Written Texts</title>
-      <author><first>Aina</first><last>Garí Soler</last></author>
+      <author id="aina-gari-soler"><first>Aina</first><last>Garí Soler</last></author>
       <author><first>Matthieu</first><last>Labeau</last></author>
       <author><first>Chloé</first><last>Clavel</last></author>
       <pages>1680–1690</pages>
@@ -2188,7 +2188,7 @@
     <paper id="186">
       <title><fixed-case>D</fixed-case>i<fixed-case>H</fixed-case>u<fixed-case>T</fixed-case>ra: a Parallel Corpus to Analyse Differences between Human Translations</title>
       <author><first>Ekaterina</first><last>Lapshinova-Koltunski</last></author>
-      <author><first>Maja</first><last>Popović</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
       <author><first>Maarit</first><last>Koponen</last></author>
       <pages>1751–1760</pages>
       <abstract>This paper describes a new corpus of human translations which contains both professional and students translations. The data consists of English sources – texts from news and reviews – and their translations into Russian and Croatian, as well as of the subcorpus containing translations of the review texts into Finnish. All target languages represent mid-resourced and less or mid-investigated ones. The corpus will be valuable for studying variation in translation as it allows a direct comparison between human translations of the same source texts. The corpus will also be a valuable resource for evaluating machine translation systems. We believe that this resource will facilitate understanding and improvement of the quality issues in both human and machine translation. In the paper, we describe how the data was collected, provide information on translator groups and summarise the differences between the human translations at hand based on our preliminary results with shallow features.</abstract>
@@ -2211,7 +2211,7 @@
       <author><first>Peter</first><last>Polák</last></author>
       <author><first>Muskaan</first><last>Singh</last></author>
       <author><first>Anna</first><last>Nedoluzhko</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>1771–1779</pages>
       <abstract>Summarization is a challenging problem, and even more challenging is to manually create, correct, and evaluate the summaries. The severity of the problem grows when the inputs are multi-party dialogues in a meeting setup. To facilitate the research in this area, we present ALIGNMEET, a comprehensive tool for meeting annotation, alignment, and evaluation. The tool aims to provide an efficient and clear interface for fast annotation while mitigating the risk of introducing errors. Moreover, we add an evaluation mode that enables a comprehensive quality evaluation of meeting minutes. To the best of our knowledge, there is no such tool available. We release the tool as open source. It is also directly installable from PyPI.</abstract>
       <url hash="1875667b">2022.lrec-1.188</url>
@@ -2259,7 +2259,7 @@
     </paper>
     <paper id="193">
       <title>Annotating Attribution in <fixed-case>C</fixed-case>zech News Server Articles</title>
-      <author><first>Barbora</first><last>Hladka</last></author>
+      <author id="barbora-hladka"><first>Barbora</first><last>Hladka</last></author>
       <author><first>Jiří</first><last>Mírovský</last></author>
       <author><first>Matyáš</first><last>Kopp</last></author>
       <author><first>Václav</first><last>Moravec</last></author>
@@ -2285,7 +2285,7 @@
       <author><first>Ann</first><last>Bies</last></author>
       <author><first>Jeremy</first><last>Getman</last></author>
       <author><first>Kira</first><last>Griffitt</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <pages>1831–1838</pages>
       <abstract>This paper describes data resources created for Phase 1 of the DARPA Active Interpretation of Disparate Alternatives (AIDA) program, which aims to develop language technology that can help humans manage large volumes of sometimes conflicting information to develop a comprehensive understanding of events around the world, even when such events are described in multiple media and languages. Especially important is the need for the technology to be capable of building multiple hypotheses to account for alternative interpretations of data imbued with informational conflict. The corpus described here is designed to support these goals. It focuses on the domain of Russia-Ukraine relations and contains multimedia source data in English, Russian and Ukrainian, annotated to support development and evaluation of systems that perform extraction of entities, events, and relations from individual multimedia documents, aggregate the information across documents and languages, and produce multiple “hypotheses” about what has happened. This paper describes source data collection, annotation, and assessment.</abstract>
       <url hash="e342b731">2022.lrec-1.195</url>
@@ -2298,7 +2298,7 @@
       <author><first>Agata</first><last>Savary</last></author>
       <author><first>Iskandar</first><last>Keskes</last></author>
       <author><first>Jean-Yves</first><last>Antoine</last></author>
-      <author><first>Lamia</first><last>Hadrich-Belguith</last></author>
+      <author id="lamia-hadrich-belguith"><first>Lamia</first><last>Hadrich-Belguith</last></author>
       <pages>1839–1848</pages>
       <abstract>This paper describes our efforts to extend the PARSEME framework to Modern Standard Arabic. Theapplicability of the PARSEME guidelines was tested by measuring the inter-annotator agreement in theearly annotation stage. A subset of 1,062 sentences from the Prague Arabic Dependency Treebank PADTwas selected and annotated by two Arabic native speakers independently. Following their annotations, anew Arabic corpus with over 1,250 annotated VMWEs has been built. This corpus already exceeds thesmallest corpora of the PARSEME suite, and enables first observations. We discuss our annotation guide-line schema that shows full MWE annotation is realizable in Arabic where we get good inter-annotator agreement.</abstract>
       <url hash="842e2d04">2022.lrec-1.196</url>
@@ -2339,7 +2339,7 @@
       <author><first>Daniel</first><last>Cheng</last></author>
       <author><first>Kyle</first><last>Yan</last></author>
       <author><first>Phillip</first><last>Keung</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>1885–1889</pages>
       <abstract>Social media platforms play an increasingly important role as forums for public discourse. Many platforms use recommendation algorithms that funnel users to online groups with the goal of maximizing user engagement, which many commentators have pointed to as a source of polarization and misinformation. Understanding the role of NLP in recommender systems is an interesting research area, given the role that social media has played in world events. However, there are few standardized resources which researchers can use to build models that predict engagement with online groups on social media; each research group constructs datasets from scratch without releasing their version for reuse. In this work, we present a dataset drawn from posts and comments on the online message board Reddit. We develop baseline models for recommending subreddits to users, given the user’s post and comment history. We also study the behavior of our recommender models on subreddits that were banned in June 2020 as part of Reddit’s efforts to stop the dissemination of hate speech.</abstract>
       <url hash="d59fa8fc">2022.lrec-1.200</url>
@@ -2383,10 +2383,10 @@
       <title>A Comparative Cross Language View On Acted Databases Portraying Basic Emotions Utilising Machine Learning</title>
       <author><first>Felix</first><last>Burkhardt</last></author>
       <author><first>Anabell</first><last>Hacker</last></author>
-      <author><first>Uwe</first><last>Reichel</last></author>
+      <author id="uwe-reichel"><first>Uwe</first><last>Reichel</last></author>
       <author><first>Hagen</first><last>Wierstorf</last></author>
       <author><first>Florian</first><last>Eyben</last></author>
-      <author><first>Björn</first><last>Schuller</last></author>
+      <author id="bjorn-schuller"><first>Björn</first><last>Schuller</last></author>
       <pages>1917–1924</pages>
       <abstract>Since several decades emotional databases have been recorded by various laboratories. Many of them contain acted portrays of Darwin’s famous “big four” basic emotions. In this paper, we investigate in how far a selection of them are comparable by two approaches: on the one hand modeling similarity as performance in cross database machine learning experiments and on the other by analyzing a manually picked set of four acoustic features that represent different phonetic areas. It is interesting to see in how far specific databases (we added a synthetic one) perform well as a training set for others while some do not. Generally speaking, we found indications for both similarity as well as specificiality across languages.</abstract>
       <url hash="6ddbdf60">2022.lrec-1.204</url>
@@ -2398,7 +2398,7 @@
       <author><first>Johannes</first><last>Wagner</last></author>
       <author><first>Hagen</first><last>Wierstorf</last></author>
       <author><first>Florian</first><last>Eyben</last></author>
-      <author><first>Björn</first><last>Schuller</last></author>
+      <author id="bjorn-schuller"><first>Björn</first><last>Schuller</last></author>
       <pages>1925–1932</pages>
       <abstract>We present advancements with a software tool called Nkululeko, that lets users perform (semi-) supervised machine learning experiments in the speaker characteristics domain. It is based on audformat, a format for speech database metadata description. Due to an interface based on configurable templates, it supports best practise and very fast setup of experiments without the need to be proficient in the underlying language: Python. The paper explains the handling of Nkululeko and presents two typical experiments: comparing the expert acoustic features with artificial neural net embeddings for emotion classification and speaker age regression.</abstract>
       <url hash="b86c39a1">2022.lrec-1.205</url>
@@ -2420,7 +2420,7 @@
       <title><fixed-case>PATATRA</fixed-case> and <fixed-case>PATAF</fixed-case>req: two <fixed-case>F</fixed-case>rench databases for the documentation of within-speaker variability in speech</title>
       <author><first>Cécile</first><last>Fougeron</last></author>
       <author><first>Nicolas</first><last>Audibert</last></author>
-      <author><first>Cedric</first><last>Gendrot</last></author>
+      <author id="cedric-gendrot"><first>Cedric</first><last>Gendrot</last></author>
       <author><first>Estelle</first><last>Chardenon</last></author>
       <author><first>Louise</first><last>Wohmann</last></author>
       <pages>1939–1944</pages>
@@ -2476,9 +2476,9 @@
       <title><fixed-case>SN</fixed-case>u<fixed-case>C</fixed-case>: The <fixed-case>S</fixed-case>heffield Numbers Spoken Language Corpus</title>
       <author><first>Emma</first><last>Barker</last></author>
       <author><first>Jon</first><last>Barker</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <author><first>Ning</first><last>Ma</last></author>
-      <author><first>Monica Lestari</first><last>Paramita</last></author>
+      <author id="monica-lestari-paramita"><first>Monica Lestari</first><last>Paramita</last></author>
       <pages>1978–1984</pages>
       <abstract>We present SNuC, the first published corpus of spoken alphanumeric identifiers of the sort typically used as serial and part numbers in the manufacturing sector. The dataset contains recordings and transcriptions of over 50 native British English speakers, speaking over 13,000 multi-character alphanumeric sequences and totalling almost 20 hours of recorded speech. We describe requirements taken into account in the designing the corpus and the methodology used to construct it. We present summary statistics describing the corpus contents, as well as a preliminary investigation into errors in spoken alphanumeric identifiers. We validate the corpus by showing how it can be used to adapt a deep learning neural network based ASR system, resulting in improved recognition accuracy on the task of spoken alphanumeric identifier recognition. Finally, we discuss further potential uses for the corpus and for the tools developed to construct it.</abstract>
       <url hash="f9e1c122">2022.lrec-1.212</url>
@@ -2541,7 +2541,7 @@
       <author><first>Aswinkumar</first><last>Vijayananth</last></author>
       <author><first>Duc Bach</first><last>Ha</last></author>
       <author><first>Sven</first><last>Behnke</last></author>
-      <author><first>Joachim</first><last>Köhler</last></author>
+      <author id="joachim-kohler"><first>Joachim</first><last>Köhler</last></author>
       <pages>2022–2031</pages>
       <abstract>For research in audiovisual interview archives often it is not only of interest what is said but also how. Sentiment analysis and emotion recognition can help capture, categorize and make these different facets searchable. In particular, for oral history archives, such indexing technologies can be of great interest. These technologies can help understand the role of emotions in historical remembering. However, humans often perceive sentiments and emotions ambiguously and subjectively. Moreover, oral history interviews have multi-layered levels of complex, sometimes contradictory, sometimes very subtle facets of emotions. Therefore, the question arises of the chance machines and humans have capturing and assigning these into predefined categories. This paper investigates the ambiguity in human perception of emotions and sentiment in German oral history interviews and the impact on machine learning systems. Our experiments reveal substantial differences in human perception for different emotions. Furthermore, we report from ongoing machine learning experiments with different modalities. We show that the human perceptual ambiguity and other challenges, such as class imbalance and lack of training data, currently limit the opportunities of these technologies for oral history archives. Nonetheless, our work uncovers promising observations and possibilities for further research.</abstract>
       <url hash="ccd0ade3">2022.lrec-1.217</url>
@@ -2553,7 +2553,7 @@
       <author><first>Ioan-Bogdan</first><last>Iordache</last></author>
       <author><first>Shweta</first><last>Yadav</last></author>
       <author><first>Cornelia</first><last>Caragea</last></author>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <author><first>Dragoș</first><last>Iliescu</last></author>
       <pages>2032–2041</pages>
       <abstract>Finding the polarity of feelings in texts is a far-reaching task. Whilst the field of natural language processing has established sentiment analysis as an alluring problem, many feelings are left uncharted. In this study, we analyze the optimism and pessimism concepts from Twitter posts to effectively understand the broader dimension of psychological phenomenon. Towards this, we carried a systematic study by first exploring the linguistic peculiarities of optimism and pessimism in user-generated content. Later, we devised a multi-task knowledge distillation framework to simultaneously learn the target task of optimism detection with the help of the auxiliary task of sentiment analysis and hate speech detection. We evaluated the performance of our proposed approach on the benchmark Optimism/Pessimism Twitter dataset. Our extensive experiments show the superior- ity of our approach in correctly differentiating between optimistic and pessimistic users. Our human and automatic evaluation shows that sentiment analysis and hate speech detection are beneficial for optimism/pessimism detection.</abstract>
@@ -2611,7 +2611,7 @@
     <paper id="223">
       <title><fixed-case>A</fixed-case>esop’s fable “The North Wind and the Sun” Used as a Rosetta Stone to Extract and Map Spoken Words in Under-resourced Languages</title>
       <author><first>Elena</first><last>Knyazeva</last></author>
-      <author><first>Philippe</first><last>Boula de Mareüil</last></author>
+      <author id="philippe-boula-de-mareuil"><first>Philippe</first><last>Boula de Mareüil</last></author>
       <author><first>Frédéric</first><last>Vernier</last></author>
       <pages>2072–2079</pages>
       <abstract>This paper describes a method of semi-automatic word spotting in minority languages, from one and the same Aesop fable “The North Wind and the Sun” translated in Romance languages/dialects from Hexagonal (i.e. Metropolitan) France and languages from French Polynesia. The first task consisted of finding out how a dozen words such as “wind” and “sun” were translated in over 200 versions collected in the field — taking advantage of orthographic similarity, word position and context. Occurrences of the translations were then extracted from the phone-aligned recordings. The results were judged accurate in 96–97% of cases, both on the development corpus and a test set of unseen data. Corrected alignments were then mapped and basemaps were drawn to make various linguistic phenomena immediately visible. The paper exemplifies how regular expressions may be used for this purpose. The final result, which takes the form of an online speaking atlas (enriching the <url>https://atlas.limsi.fr</url> website), enables us to illustrate lexical, morphological or phonetic variation.</abstract>
@@ -2654,9 +2654,9 @@
     <paper id="227">
       <title>Assessing Multilinguality of Publicly Accessible Websites</title>
       <author><first>Rinalds</first><last>Vīksna</last></author>
-      <author><first>Inguna</first><last>Skadiņa</last></author>
+      <author id="inguna-skadina"><first>Inguna</first><last>Skadiņa</last></author>
       <author><first>Raivis</first><last>Skadiņš</last></author>
-      <author><first>Andrejs</first><last>Vasiļjevs</last></author>
+      <author id="andrejs-vasiljevs"><first>Andrejs</first><last>Vasiļjevs</last></author>
       <author><first>Roberts</first><last>Rozis</last></author>
       <pages>2108–2116</pages>
       <abstract>Although information on the Internet can be shared in many languages, the language presence on the World Wide Web is very disproportionate. The problem of multilingualism on the Web, in particular access, availability and quality of information in the world’s languages, has been the subject of UNESCO focus for several decades. Making European websites more multilingual is also one of the focal targets of the Connecting Europe Facility Automated Translation (CEF AT) digital service infrastructure. In order to monitor this goal, alongside other possible solutions, CEF AT needs a methodology and easy to use tool to assess the degree of multilingualism of a given website. In this paper we investigate methods and tools that automatically analyse the language diversity of the Web and propose indicators and methodology on how to measure the multilingualism of European websites. We also introduce a prototype tool based on open-source software that helps to assess multilingualism of the Web and can be independently run at set intervals. We also present initial results obtained with our tool that allows us to conclude that multilingualism on the Web is still a problem not only at the world level, but also at the European and regional level.</abstract>
@@ -2666,7 +2666,7 @@
     <paper id="228">
       <title>A Methodology for Building a Diachronic Dataset of Semantic Shifts and its Application to <fixed-case>QC</fixed-case>-<fixed-case>FR</fixed-case>-Diac-V1.0, a Free Reference for <fixed-case>F</fixed-case>rench</title>
       <author><first>David</first><last>Kletz</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <author><first>François</first><last>Lareau</last></author>
       <author><first>Patrick</first><last>Drouin</last></author>
       <pages>2117–2125</pages>
@@ -2685,7 +2685,7 @@
     </paper>
     <paper id="230">
       <title>Evaluating Gender Bias in Speech Translation</title>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
       <author><first>Christine</first><last>Basta</last></author>
       <author><first>Gerard I.</first><last>Gállego</last></author>
       <pages>2141–2147</pages>
@@ -2745,7 +2745,7 @@
       <author><first>Nishtha</first><last>Jain</last></author>
       <author><first>Declan</first><last>Groves</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
-      <author><first>Maja</first><last>Popović</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
       <pages>2188–2195</pages>
       <abstract>Studying and mitigating gender and other biases in natural language have become important areas of research from both algorithmic and data perspectives. This paper explores the idea of reducing gender bias in a language generation context by generating gender variants of sentences. Previous work in this field has either been rule-based or required large amounts of gender balanced training data. These approaches are however not scalable across multiple languages, as creating data or rules for each language is costly and time-consuming. This work explores a light-weight method to generate gender variants for a given text using pre-trained language models as the resource, without any task-specific labelled data. The approach is designed to work on multiple languages with minimal changes in the form of heuristics. To showcase that, we have tested it on a high-resourced language, namely Spanish, and a low-resourced language from a different family, namely Serbian. The approach proved to work very well on Spanish, and while the results were less positive for Serbian, it showed potential even for languages where pre-trained models are less effective.</abstract>
       <url hash="ce132ed7">2022.lrec-1.235</url>
@@ -2801,8 +2801,8 @@
       <title><fixed-case>O</fixed-case>pen<fixed-case>EL</fixed-case>: An Annotated Corpus for Entity Linking and Discourse in Open Domain Dialogue</title>
       <author><first>Wen</first><last>Cui</last></author>
       <author><first>Leanne</first><last>Rolston</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
-      <author><first>Beth Ann</first><last>Hockey</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
+      <author id="beth-ann-hockey"><first>Beth Ann</first><last>Hockey</last></author>
       <pages>2245–2256</pages>
       <abstract>Entity linking in dialogue is the task of mapping entity mentions in utterances to a target knowledge base. Prior work on entity linking has mainly focused on well-written articles such as Wikipedia, annotated newswire, or domain-specific datasets. We extend the study of entity linking to open domain dialogue by presenting the OpenEL corpus: an annotated multi-domain corpus for linking entities in natural conversation to Wikidata. Each dialogic utterance in 179 dialogues over 12 topics from the EDINA dataset has been annotated for entities realized by definite referring expressions as well as anaphoric forms such as he, she, it and they. This dataset supports training and evaluation of entity linking in open-domain dialogue, as well as analysis of the effect of using dialogue context and anaphora resolution in model training. It could also be used for fine-tuning a coreference resolution algorithm. To the best of our knowledge, this is the first substantial entity linking corpus publicly available for open-domain dialogue. We also establish baselines for this task using several existing entity linking systems. We found that the Transformer-based system Flair + BLINK has the best performance with a 0.65 F1 score. Our results show that dialogue context is extremely beneficial for entity linking in conversations, with Flair + Blink achieving an F1 of 0.61 without discourse context. These results also demonstrate the remaining performance gap between the baselines and human performance, highlighting the challenges of entity linking in open-domain dialogue, and suggesting many avenues for future research using OpenEL.</abstract>
       <url hash="838ebd42">2022.lrec-1.241</url>
@@ -2831,7 +2831,7 @@
     <paper id="244">
       <title><fixed-case>A</fixed-case>r<fixed-case>MIS</fixed-case> - The <fixed-case>A</fixed-case>rabic Misogyny and Sexism Corpus with Annotator Subjective Disagreements</title>
       <author><first>Dina</first><last>Almanea</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>2282–2291</pages>
       <abstract>The use of misogynistic and sexist language has increased in recent years in social media, and is increasing in the Arabic world in reaction to reforms attempting to remove restrictions on women lives. However, there are few benchmarks for Arabic misogyny and sexism detection, and in those the annotations are in aggregated form even though misogyny and sexism judgments are found to be highly subjective. In this paper we introduce an Arabic misogyny and sexism dataset (ArMIS) characterized by providing annotations from annotators with different degree of religious beliefs, and provide evidence that such differences do result in disagreements. To the best of our knowledge, this is the first dataset to study in detail the effect of beliefs on misogyny and sexism annotation. We also discuss proof-of-concept experiments showing that a dataset in which disagreements have not been reconciled can be used to train state-of-the-art models for misogyny and sexism detection; and consider different ways in which such models could be evaluated.</abstract>
       <url hash="c8a4321d">2022.lrec-1.244</url>
@@ -2850,7 +2850,7 @@
     <paper id="246">
       <title>The Causal News Corpus: Annotating Causal Relations in Event Sentences from News</title>
       <author><first>Fiona Anting</first><last>Tan</last></author>
-      <author><first>Ali</first><last>Hürriyetoğlu</last></author>
+      <author id="ali-hurriyetoglu"><first>Ali</first><last>Hürriyetoğlu</last></author>
       <author><first>Tommaso</first><last>Caselli</last></author>
       <author><first>Nelleke</first><last>Oostdijk</last></author>
       <author><first>Tadashi</first><last>Nomoto</last></author>
@@ -2932,8 +2932,8 @@
     </paper>
     <paper id="252">
       <title>A <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies Treebank of <fixed-case>A</fixed-case>ncient <fixed-case>H</fixed-case>ebrew</title>
-      <author><first>Daniel G.</first><last>Swanson</last></author>
-      <author><first>Francis M.</first><last>Tyers</last></author>
+      <author id="daniel-g-swanson"><first>Daniel G.</first><last>Swanson</last></author>
+      <author id="francis-tyers"><first>Francis M.</first><last>Tyers</last></author>
       <pages>2353–2361</pages>
       <abstract>In this paper we present the initial construction of a Universal Dependencies treebank with morphological annotations of Ancient Hebrew containing portions of the Hebrew Scriptures (1579 sentences, 27K tokens) for use in comparative study with ancient translations and for analysis of the development of Hebrew syntax. We construct this treebank by applying a rule-based parser (300 rules) to an existing morphologically-annotated corpus with minimal constituency structure and manually verifying the output and present the results of this semi-automated annotation process and some of the annotation decisions made in the process of applying the UD guidelines to a new language.</abstract>
       <url hash="c9a4140a">2022.lrec-1.252</url>
@@ -2945,7 +2945,7 @@
       <author><first>Bernardo</first><last>Cunha</last></author>
       <author><first>Raquel</first><last>Santos</last></author>
       <author><first>Fernando</first><last>Batista</last></author>
-      <author><first>Ricardo</first><last>Ribeiro</last></author>
+      <author id="ricardo-ribeiro"><first>Ricardo</first><last>Ribeiro</last></author>
       <pages>2362–2370</pages>
       <abstract>This paper introduces FIGHT, a dataset containing 63,450 tweets, posted before and after the official declaration of Covid-19 as a pandemic by online users in Portugal. This resource aims at contributing to the analysis of online hate speech targeting the most representative minorities in Portugal, namely the African descent and the Roma communities, and the LGBTQI community, the most commonly reported target of hate speech in social media at the European context. We present the methods for collecting the data, and provide insightful statistics on the distribution of tweets included in FIGHT, considering both the temporal and spatial dimensions. We also analyze the availability over time of tweets targeting the above-mentioned communities, distinguishing public, private and deleted tweets. We believe this study will contribute to better understand the dynamics of online hate speech in Portugal, particularly in adverse contexts, such as a pandemic outbreak, allowing the development of more informed and accurate hate speech resources for Portuguese.</abstract>
       <url hash="37d1136b">2022.lrec-1.253</url>
@@ -2965,7 +2965,7 @@
       <title>A Pragmatics-Centered Evaluation Framework for Natural Language Understanding</title>
       <author><first>Damien</first><last>Sileo</last></author>
       <author><first>Philippe</first><last>Muller</last></author>
-      <author><first>Tim</first><last>Van de Cruys</last></author>
+      <author id="tim-van-de-cruys"><first>Tim</first><last>Van de Cruys</last></author>
       <author><first>Camille</first><last>Pradel</last></author>
       <pages>2382–2394</pages>
       <abstract>New models for natural language understanding have recently made an unparalleled amount of progress, which has led some researchers to suggest that the models induce universal text representations. However, current benchmarks are predominantly targeting semantic phenomena; we make the case that pragmatics needs to take center stage in the evaluation of natural language understanding. We introduce PragmEval, a new benchmark for the evaluation of natural language understanding, that unites 11 pragmatics-focused evaluation datasets for English. PragmEval can be used as supplementary training data in a multi-task learning setup, and is publicly available, alongside the code for gathering and preprocessing the datasets. Using our evaluation suite, we show that natural language inference, a widely used pretraining task, does not result in genuinely universal representations, which presents a new challenge for multi-task learning.</abstract>
@@ -3040,18 +3040,18 @@
     <paper id="262">
       <title><fixed-case>B</fixed-case>e<fixed-case>S</fixed-case>t: The Belief and Sentiment Corpus</title>
       <author><first>Jennifer</first><last>Tracey</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <author><first>Adam</first><last>Dalton</last></author>
-      <author><first>Hoa Trang</first><last>Dang</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
-      <author><first>Bonnie</first><last>Dorr</last></author>
-      <author><first>Louise</first><last>Guthrie</last></author>
+      <author id="hoa-trang-dang"><first>Hoa Trang</first><last>Dang</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
+      <author id="bonnie-dorr"><first>Bonnie</first><last>Dorr</last></author>
+      <author id="louise-guthrie"><first>Louise</first><last>Guthrie</last></author>
       <author><first>Magdalena</first><last>Markowska</last></author>
       <author><first>Smaranda</first><last>Muresan</last></author>
       <author><first>Vinodkumar</first><last>Prabhakaran</last></author>
       <author><first>Samira</first><last>Shaikh</last></author>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
       <pages>2460–2467</pages>
       <abstract>We present the BeSt corpus, which records cognitive state: who believes what (i.e., factuality), and who has what sentiment towards what. This corpus is inspired by similar source-and-target corpora, specifically MPQA and FactBank. The corpus comprises two genres, newswire and discussion forums, in three languages, Chinese (Mandarin), English, and Spanish. The corpus is distributed through the LDC.</abstract>
       <url hash="55a6b229">2022.lrec-1.262</url>
@@ -3063,7 +3063,7 @@
       <author><first>Florian</first><last>Schneider</last></author>
       <author><first>Özge</first><last>Alacam</last></author>
       <author><first>Prateek</first><last>Chaudhury</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>2468–2477</pages>
       <abstract>MOTIF (MultimOdal ConTextualized Images For Language Learners) is a multimodal dataset that consists of 1125 comprehension texts retrieved from Wikipedia Simple Corpus. Allowing multimodal processing or enriching the context with multimodal information has proven imperative for many learning tasks, specifically for second language (L2) learning. In this respect, several traditional NLP approaches can assist L2 readers in text comprehension processes, such as simplifying text or giving dictionary descriptions for complex words. As nicely stated in the well-known proverb, sometimes “a picture is worth a thousand words” and an image can successfully complement the verbal message by enriching the representation, like in Pictionary books. This multimodal support can also assist on-the-fly text reading experience by providing a multimodal tool that chooses and displays the most relevant images for the difficult words, given the text context. This study mainly focuses on one of the key components to achieving this goal; collecting a multimodal dataset enriched with complex word annotation and validated image match.</abstract>
       <url hash="6dc0077d">2022.lrec-1.263</url>
@@ -3099,7 +3099,7 @@
       <author><first>Marc</first><last>Verhagen</last></author>
       <author><first>Kelley</first><last>Lynch</last></author>
       <author><first>Kyeongmin</first><last>Rim</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <pages>2498–2506</pages>
       <abstract>The Computational Linguistics Applications for Multimedia Services (CLAMS) platform provides access to computational content analysis tools for multimedia material. The version we present here is a robust update of an initial prototype implementation from 2019. The platform now sports a variety of image, video, audio and text processing tools that interact via a common multi-modal representation language named MMIF (Multi-Media Interchange Format). We describe the overall architecture, the MMIF format, some of the tools included in the platform, the process to set up and run a workflow, visualizations included in CLAMS, and evaluate aspects of the platform on data from the American Archive of Public Broadcasting, showing how CLAMS can add metadata to mass-digitized multimedia collections, metadata that are typically only available implicitly in now largely unsearchable digitized media in archives and libraries.</abstract>
       <url hash="362fb887">2022.lrec-1.266</url>
@@ -3114,7 +3114,7 @@
       <author><first>Yanling</first><last>Zhao</last></author>
       <author><first>Lei</first><last>Guo</last></author>
       <author><first>Margrit</first><last>Betke</last></author>
-      <author><first>Derry Tanti</first><last>Wijaya</last></author>
+      <author id="derry-tanti-wijaya"><first>Derry Tanti</first><last>Wijaya</last></author>
       <pages>2507–2516</pages>
       <abstract>Given our society’s increased exposure to multimedia formats on social media platforms, efforts to understand how digital content impacts people’s emotions are burgeoning. As such, we introduce a U.S. gun violence news dataset that contains news headline and image pairings from 840 news articles with 15K high-quality, crowdsourced annotations on emotional responses to the news pairings. We created three experimental conditions for the annotation process: two with a single modality (headline or image only), and one multimodal (headline and image together). In contrast to prior works on affectively-annotated data, our dataset includes annotations on the dominant emotion experienced with the content, the intensity of the selected emotion and an open-ended, written component. By collecting annotations on different modalities of the same news content pairings, we explore the relationship between image and text influence on human emotional response. We offer initial analysis on our dataset, showing the nuanced affective differences that appear due to modality and individual factors such as political leaning and media consumption habits. Our dataset is made publicly available to facilitate future research in affective computing.</abstract>
       <url hash="e721d4ed">2022.lrec-1.267</url>
@@ -3196,7 +3196,7 @@
     </paper>
     <paper id="275">
       <title>Exploring Transformers for Ranking <fixed-case>P</fixed-case>ortuguese Semantic Relations</title>
-      <author><first>Hugo</first><last>Gonçalo Oliveira</last></author>
+      <author id="hugo-goncalo-oliveira"><first>Hugo</first><last>Gonçalo Oliveira</last></author>
       <pages>2573–2582</pages>
       <abstract>We explored transformer-based language models for ranking instances of Portuguese lexico-semantic relations. Weights were based on the likelihood of natural language sequences that transmitted the relation instances, and expectations were that they would be useful for filtering out noisier instances. However, after analysing the weights, no strong conclusions were taken. They are not correlated with redundancy, but are lower for instances with longer and more specific arguments, which may nevertheless be a consequence of their sensitivity to the frequency of such arguments. They did also not reveal to be useful when computing word similarity with network embeddings. Despite the negative results, we see the reported experiments and insights as another contribution for better understanding transformer language models like BERT and GPT, and we make the weighted instances publicly available for further research.</abstract>
       <url hash="bb879d3e">2022.lrec-1.275</url>
@@ -3214,7 +3214,7 @@
       <title>Sentence Selection Strategies for Distilling Word Embeddings from <fixed-case>BERT</fixed-case></title>
       <author><first>Yixiao</first><last>Wang</last></author>
       <author><first>Zied</first><last>Bouraoui</last></author>
-      <author><first>Luis</first><last>Espinosa Anke</last></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa Anke</last></author>
       <author><first>Steven</first><last>Schockaert</last></author>
       <pages>2591–2600</pages>
       <abstract>Many applications crucially rely on the availability of high-quality word vectors. To learn such representations, several strategies based on language models have been proposed in recent years. While effective, these methods typically rely on a large number of contextualised vectors for each word, which makes them impractical. In this paper, we investigate whether similar results can be obtained when only a few contextualised representations of each word can be used. To this end, we analyse a range of strategies for selecting the most informative sentences. Our results show that with a careful selection strategy, high-quality word vectors can be learned from as few as 5 to 10 sentences.</abstract>
@@ -3225,7 +3225,7 @@
       <title><fixed-case>D</fixed-case>ia<fixed-case>WUG</fixed-case>: A Dataset for Diatopic Lexical Semantic Variation in <fixed-case>S</fixed-case>panish</title>
       <author><first>Gioia</first><last>Baldissin</last></author>
       <author><first>Dominik</first><last>Schlechtweg</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>2601–2609</pages>
       <abstract>We provide a novel dataset – DiaWUG – with judgements on diatopic lexical semantic variation for six Spanish variants in Europe and Latin America. In contrast to most previous meaning-based resources and studies on semantic diatopic variation, we collect annotations on semantic relatedness for Spanish target words in their contexts from both a semasiological perspective (i.e., exploring the meanings of a word given its form, thus including polysemy) and an onomasiological perspective (i.e., exploring identical meanings of words with different forms, thus including synonymy). In addition, our novel dataset exploits and extends the existing framework DURel for annotating word senses in context (Erk et al., 2013; Schlechtweg et al., 2018) and the framework-embedded Word Usage Graphs (WUGs) – which up to now have mainly be used for semasiological tasks and resources – in order to distinguish, visualize and interpret lexical semantic variation of contextualized words in Spanish from these two perspectives, i.e., semasiological and onomasiological language variation.</abstract>
       <url hash="bf8f3b2a">2022.lrec-1.278</url>
@@ -3255,7 +3255,7 @@
       <author><first>Hicham</first><last>El Boukkouri</last></author>
       <author><first>Olivier</first><last>Ferret</last></author>
       <author><first>Thomas</first><last>Lavergne</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <pages>2626–2633</pages>
       <abstract>BERT models used in specialized domains all seem to be the result of a simple strategy: initializing with the original BERT and then resuming pre-training on a specialized corpus. This method yields rather good performance (e.g. BioBERT (Lee et al., 2020), SciBERT (Beltagy et al., 2019), BlueBERT (Peng et al., 2019)). However, it seems reasonable to think that training directly on a specialized corpus, using a specialized vocabulary, could result in more tailored embeddings and thus help performance. To test this hypothesis, we train BERT models from scratch using many configurations involving general and medical corpora. Based on evaluations using four different tasks, we find that the initial corpus only has a weak influence on the performance of BERT models when these are further pre-trained on a medical corpus.</abstract>
       <url hash="5944884c">2022.lrec-1.281</url>
@@ -3276,7 +3276,7 @@
       <title>D3: A Massive Dataset of Scholarly Metadata for Analyzing the State of Computer Science Research</title>
       <author><first>Jan Philip</first><last>Wahle</last></author>
       <author><first>Terry</first><last>Ruas</last></author>
-      <author><first>Saif</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
       <author><first>Bela</first><last>Gipp</last></author>
       <pages>2642–2651</pages>
       <abstract>DBLP is the largest open-access repository of scientific articles on computer science and provides metadata associated with publications, authors, and venues. We retrieved more than 6 million publications from DBLP and extracted pertinent metadata (e.g., abstracts, author affiliations, citations) from the publication texts to create the DBLP Discovery Dataset (D3). D3 can be used to identify trends in research activity, productivity, focus, bias, accessibility, and impact of computer science research. We present an initial analysis focused on the volume of computer science research (e.g., number of papers, authors, research activity), trends in topics of interest, and citation patterns. Our findings show that computer science is a growing research field (15% annually), with an active and collaborative researcher community. While papers in recent years present more bibliographical entries in comparison to previous decades, the average number of citations has been declining. Investigating papers’ abstracts reveals that recent topic trends are clearly reflected in D3. Finally, we list further applications of D3 and pose supplemental research questions. The D3 dataset, our findings, and source code are publicly available for research purposes.</abstract>
@@ -3341,7 +3341,7 @@
     <paper id="289">
       <title>Applying Automatic Text Summarization for Fake News Detection</title>
       <author><first>Philipp</first><last>Hartl</last></author>
-      <author><first>Udo</first><last>Kruschwitz</last></author>
+      <author id="udo-kruschwitz"><first>Udo</first><last>Kruschwitz</last></author>
       <pages>2702–2713</pages>
       <abstract>The distribution of fake news is not a new but a rapidly growing problem. The shift to news consumption via social media has been one of the drivers for the spread of misleading and deliberately wrong information, as in addition to its ease of use there is rarely any veracity monitoring. Due to the harmful effects of such fake news on society, the detection of these has become increasingly important. We present an approach to the problem that combines the power of transformer-based language models while simultaneously addressing one of their inherent problems. Our framework, CMTR-BERT, combines multiple text representations, with the goal of circumventing sequential limits and related loss of information the underlying transformer architecture typically suffers from. Additionally, it enables the incorporation of contextual information. Extensive experiments on two very different, publicly available datasets demonstrates that our approach is able to set new state-of-the-art performance benchmarks. Apart from the benefit of using automatic text summarization techniques we also find that the incorporation of contextual information contributes to performance gains.</abstract>
       <url hash="88a1bc7c">2022.lrec-1.289</url>
@@ -3393,7 +3393,7 @@
     <paper id="294">
       <title><fixed-case>LIP</fixed-case>-<fixed-case>RTVE</fixed-case>: An Audiovisual Database for Continuous <fixed-case>S</fixed-case>panish in the Wild</title>
       <author><first>David</first><last>Gimeno-Gómez</last></author>
-      <author><first>Carlos-D.</first><last>Martínez-Hinarejos</last></author>
+      <author id="carlos-d-martinez-hinarejos"><first>Carlos-D.</first><last>Martínez-Hinarejos</last></author>
       <pages>2750–2758</pages>
       <abstract>Speech is considered as a multi-modal process where hearing and vision are two fundamentals pillars. In fact, several studies have demonstrated that the robustness of Automatic Speech Recognition systems can be improved when audio and visual cues are combined to represent the nature of speech. In addition, Visual Speech Recognition, an open research problem whose purpose is to interpret speech by reading the lips of the speaker, has been a focus of interest in the last decades. Nevertheless, in order to estimate these systems in the currently Deep Learning era, large-scale databases are required. On the other hand, while most of these databases are dedicated to English, other languages lack sufficient resources. Thus, this paper presents a semi-automatically annotated audiovisual database to deal with unconstrained natural Spanish, providing 13 hours of data extracted from Spanish television. Furthermore, baseline results for both speaker-dependent and speaker-independent scenarios are reported using Hidden Markov Models, a traditional paradigm that has been widely used in the field of Speech Technologies.</abstract>
       <url hash="af859548">2022.lrec-1.294</url>
@@ -3438,7 +3438,7 @@
       <author><first>Olga</first><last>Lyashevskaya</last></author>
       <author><first>Anna</first><last>Nedoluzhko</last></author>
       <author><first>Daniil</first><last>Vodolazsky</last></author>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
       <pages>2788–2797</pages>
       <abstract>Words of any language are to some extent related thought the ways they are formed. For instance, the verb ‘exempl-ify’ and the noun ‘example-s’ are both based on the word ‘example’, but the verb is derived from it, while the noun is inflected. In Natural Language Processing of Russian, the inflection is satisfactorily processed; however, there are only a few machine-trackable resources that capture derivations even though Russian has both of these morphological processes very rich. Therefore, we devote this paper to improving one of the methods of constructing such resources and to the application of the method to a Russian lexicon, which results in the creation of the largest lexical resource of Russian derivational relations. The resulting database dubbed DeriNet.RU includes more than 300 thousand lexemes connected with more than 164 thousand binary derivational relations. To create such data, we combined the existing machine-learning methods that we improved to manage this goal. The whole approach is evaluated on our newly created data set of manual, parallel annotation. The resulting DeriNet.RU is freely available under an open license agreement.</abstract>
       <url hash="4d3bf6e9">2022.lrec-1.298</url>
@@ -3462,11 +3462,11 @@
     </paper>
     <paper id="300">
       <title>Towards <fixed-case>L</fixed-case>atvian <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et</title>
-      <author><first>Peteris</first><last>Paikens</last></author>
+      <author id="peteris-paikens"><first>Peteris</first><last>Paikens</last></author>
       <author><first>Mikus</first><last>Grasmanis</last></author>
       <author><first>Agute</first><last>Klints</last></author>
       <author><first>Ilze</first><last>Lokmane</last></author>
-      <author><first>Lauma</first><last>Pretkalniņa</last></author>
+      <author id="lauma-pretkalnina"><first>Lauma</first><last>Pretkalniņa</last></author>
       <author><first>Laura</first><last>Rituma</last></author>
       <author><first>Madara</first><last>Stāde</last></author>
       <author><first>Laine</first><last>Strankale</last></author>
@@ -3478,7 +3478,7 @@
     <paper id="301">
       <title>Building Sentiment Lexicons for <fixed-case>M</fixed-case>ainland <fixed-case>S</fixed-case>candinavian Languages Using Machine Translation and Sentence Embeddings</title>
       <author><first>Peng</first><last>Liu</last></author>
-      <author><first>Cristina</first><last>Marco</last></author>
+      <author id="cristina-sanchez-marco"><first>Cristina</first><last>Marco</last></author>
       <author><first>Jon Atle</first><last>Gulla</last></author>
       <pages>2816–2825</pages>
       <abstract>This paper presents a simple but effective method to build sentiment lexicons for the three Mainland Scandinavian languages: Danish, Norwegian and Swedish. This method benefits from the English Sentiwordnet and a thesaurus in one of the target languages. Sentiment information from the English resource is mapped to the target languages by using machine translation and similarity measures based on sentence embeddings. A number of experiments with Scandinavian languages are performed in order to determine the best working sentence embedding algorithm for this task. A careful extrinsic evaluation on several datasets yields state-of-the-art results using a simple rule-based sentiment analysis algorithm. The resources are made freely available under an MIT License.</abstract>
@@ -3489,7 +3489,7 @@
       <title>A Thesaurus-based Sentiment Lexicon for <fixed-case>D</fixed-case>anish: The <fixed-case>D</fixed-case>anish Sentiment Lexicon</title>
       <author><first>Sanni</first><last>Nimb</last></author>
       <author><first>Sussi</first><last>Olsen</last></author>
-      <author><first>Bolette</first><last>Pedersen</last></author>
+      <author id="bolette-sandford-pedersen"><first>Bolette</first><last>Pedersen</last></author>
       <author><first>Thomas</first><last>Troelsgård</last></author>
       <pages>2826–2832</pages>
       <abstract>This paper describes how a newly published Danish sentiment lexicon with a high lexical coverage was compiled by use of lexicographic methods and based on the links between groups of words listed in semantic order in a thesaurus and the corresponding word sense descriptions in a comprehensive monolingual dictionary. The overall idea was to identify negative and positive sections in a thesaurus, extract the words from these sections and combine them with the dictionary information via the links. The annotation task of the dataset included several steps, and was based on the comparison of synonyms and near synonyms within a semantic field. In the cases where one of the words were included in the smaller Danish sentiment lexicon AFINN, its value there was used as inspiration and expanded to the synonyms when appropriate. In order to obtain a more practical lexicon with overall polarity values at lemma level, all the senses of the lemma were afterwards compared, taking into consideration dictionary information such as usage, style and frequency. The final lexicon contains 13,859 Danish polarity lemmas and includes morphological information. It is freely available at <url>https://github.com/dsldk/danish-sentiment-lexicon</url> (licence CC-BY-SA 4.0 International).</abstract>
@@ -3536,7 +3536,7 @@
     </paper>
     <paper id="306">
       <title>Placing multi-modal, and multi-lingual Data in the Humanities Domain on the Map: the Mythotopia Geo-tagged Corpus</title>
-      <author><first>Voula</first><last>Giouli</last></author>
+      <author id="voula-giouli"><first>Voula</first><last>Giouli</last></author>
       <author><first>Anna</first><last>Vacalopoulou</last></author>
       <author><first>Nikolaos</first><last>Sidiropoulos</last></author>
       <author><first>Christina</first><last>Flouda</last></author>
@@ -3582,7 +3582,7 @@
     </paper>
     <paper id="310">
       <title><fixed-case>H</fixed-case>ong <fixed-case>K</fixed-case>ong: Longitudinal and Synchronic Characterisations of Protest News between 1998 and 2020</title>
-      <author><first>Arya D.</first><last>McCarthy</last></author>
+      <author id="arya-d-mccarthy"><first>Arya D.</first><last>McCarthy</last></author>
       <author><first>Giovanna Maria Dora</first><last>Dore</last></author>
       <pages>2891–2900</pages>
       <abstract>This paper showcases the utility and timeliness of the Hong Kong Protest News Dataset, a highly curated collection of news articles from diverse news sources, to investigate longitudinal and synchronic news characterisations of protests in Hong Kong between 1998 and 2020. The properties of the dataset enable us to apply natural language processing to its 4522 articles and thereby study patterns of journalistic practice across newspapers. This paper sheds light on whether depth and/or manner of reporting changed over time, and if so, in what ways, or in response to what. In its focus and methodology, this paper helps bridge the gap between “validity-focused methodological debates” and the use of computational methods of analysis in the social sciences.</abstract>
@@ -3609,7 +3609,7 @@
       <author><first>Milan</first><last>Straka</last></author>
       <author><first>Jan</first><last>Štěpánek</last></author>
       <author><first>Barbora</first><last>Štěpánková</last></author>
-      <author><first>Jan</first><last>Hajic</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajic</last></author>
       <pages>2909–2918</pages>
       <abstract>This paper presents an analysis of annotation using an automatic pre-annotation for a mid-level annotation complexity task - dependency syntax annotation. It compares the annotation efforts made by annotators using a pre-annotated version (with a high-accuracy parser) and those made by fully manual annotation. The aim of the experiment is to judge the final annotation quality when pre-annotation is used. In addition, it evaluates the effect of automatic linguistically-based (rule-formulated) checks and another annotation on the same data available to the annotators, and their influence on annotation quality and efficiency. The experiment confirmed that the pre-annotation is an efficient tool for faster manual syntactic annotation which increases the consistency of the resulting annotation without reducing its quality.</abstract>
       <url hash="f0741319">2022.lrec-1.312</url>
@@ -3621,7 +3621,7 @@
       <author><first>Antonela</first><last>Radas</last></author>
       <author><first>Jared</first><last>Hummer</last></author>
       <author><first>Karine</first><last>Megerdoomian</last></author>
-      <author><first>Mark</first><last>Finlayson</last></author>
+      <author id="mark-finlayson"><first>Mark</first><last>Finlayson</last></author>
       <pages>2919–2927</pages>
       <abstract>TimeML is an annotation scheme for capturing temporal information in text. The developers of TimeML built the TimeBank corpus to both validate the scheme and provide a rich dataset of events, temporal expressions, and temporal relationships for training and testing temporal analysis systems. In our own work we have been developing methods aimed at TimeML graphs for detecting (and eventually automatically correcting) temporal inconsistencies, extracting timelines, and assessing temporal indeterminacy. In the course of this investigation we identified numerous previously unrecognized issues in the TimeBank corpus, including multiple violations of TimeML annotation guide rules, incorrectly disconnected temporal graphs, as well as inconsistent, redundant, missing, or otherwise incorrect annotations. We describe our methods for detecting and correcting these problems, which include: (a) automatic guideline checking (109 violations); (b) automatic inconsistency checking (65 inconsistent files); (c) automatic disconnectivity checking (625 incorrect breakpoints); and (d) manual comparison with the output of state-of-the-art automatic annotators to identify missing annotations (317 events, 52 temporal expressions). We provide our code as well as a set of patch files that can be applied to the TimeBank corpus to produce a corrected version for use by other researchers in the field.</abstract>
       <url hash="d59c52a8">2022.lrec-1.313</url>
@@ -3690,7 +3690,7 @@
       <author><first>Christèle</first><last>Maizonniaux</last></author>
       <author><first>Neasa</first><last>Ní Chiaráin</last></author>
       <author><first>Chadi</first><last>Raheb</last></author>
-      <author><first>Manny</first><last>Rayner</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
       <author><first>John</first><last>Sloan</last></author>
       <author><first>Nikos</first><last>Tsourakis</last></author>
       <author><first>Chunlin</first><last>Yao</last></author>
@@ -3703,8 +3703,8 @@
       <title>Cyberbullying Classifiers are Sensitive to Model-Agnostic Perturbations</title>
       <author><first>Chris</first><last>Emmery</last></author>
       <author><first>Ákos</first><last>Kádár</last></author>
-      <author><first>Grzegorz</first><last>Chrupała</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="grzegorz-chrupala"><first>Grzegorz</first><last>Chrupała</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>2976–2988</pages>
       <abstract>A limited amount of studies investigates the role of model-agnostic adversarial behavior in toxic content classification. As toxicity classifiers predominantly rely on lexical cues, (deliberately) creative and evolving language-use can be detrimental to the utility of current corpora and state-of-the-art models when they are deployed for content moderation. The less training data is available, the more vulnerable models might become. This study is, to our knowledge, the first to investigate the effect of adversarial behavior and augmentation for cyberbullying detection. We demonstrate that model-agnostic lexical substitutions significantly hurt classifier performance. Moreover, when these perturbed samples are used for augmentation, we show models become robust against word-level perturbations at a slight trade-off in overall task performance. Augmentations proposed in prior work on toxicity prove to be less effective. Our results underline the need for such evaluations in online harm areas with small corpora.</abstract>
       <url hash="185bdad4">2022.lrec-1.319</url>
@@ -3712,7 +3712,7 @@
     </paper>
     <paper id="320">
       <title>Constructing Distributions of Variation in Referring Expression Type from Corpora for Model Evaluation</title>
-      <author><first>T. Mark</first><last>Ellison</last></author>
+      <author id="t-mark-ellison"><first>T. Mark</first><last>Ellison</last></author>
       <author><first>Fahime</first><last>Same</last></author>
       <pages>2989–2997</pages>
       <abstract>The generation of referring expressions (REs) is a non-deterministic task. However, the algorithms for the generation of REs are standardly evaluated against corpora of written texts which include only one RE per each reference. Our goal in this work is firstly to reproduce one of the few studies taking the distributional nature of the RE generation into account. We add to this work, by introducing a method for exploring variation in human RE choice on the basis of longitudinal corpora - substantial corpora with a single human judgement (in the process of composition) per RE. We focus on the prediction of RE types, proper name, description and pronoun. We compare evaluations made against distributions over these types with evaluations made against parallel human judgements. Our results show agreement in the evaluation of learning algorithms against distributions constructed from parallel human evaluations and from longitudinal data.</abstract>
@@ -3735,7 +3735,7 @@
     <paper id="322">
       <title>Multi-Task Learning for Cross-Lingual Abstractive Summarization</title>
       <author><first>Sho</first><last>Takase</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <pages>3008–3016</pages>
       <abstract>We present a multi-task learning framework for cross-lingual abstractive summarization to augment training data. Recent studies constructed pseudo cross-lingual abstractive summarization data to train their neural encoder-decoders. Meanwhile, we introduce existing genuine data such as translation pairs and monolingual abstractive summarization data into training. Our proposed method, Transum, attaches a special token to the beginning of the input sentence to indicate the target task. The special token enables us to incorporate the genuine data into the training data easily. The experimental results show that Transum achieves better performance than the model trained with only pseudo cross-lingual summarization data. In addition, we achieve the top ROUGE score on Chinese-English and Arabic-English abstractive summarization. Moreover, Transum also has a positive effect on machine translation. Experimental results indicate that Transum improves the performance from the strong baseline, Transformer, in Chinese-English, Arabic-English, and English-Japanese translation datasets.</abstract>
       <url hash="2af34ced">2022.lrec-1.322</url>
@@ -3754,12 +3754,12 @@
       <author><first>Harritxu</first><last>Gete</last></author>
       <author><first>Thierry</first><last>Etchegoyhen</last></author>
       <author><first>David</first><last>Ponce</last></author>
-      <author><first>Gorka</first><last>Labaka</last></author>
+      <author id="gorka-labaka"><first>Gorka</first><last>Labaka</last></author>
       <author><first>Nora</first><last>Aranberri</last></author>
       <author><first>Ander</first><last>Corral</last></author>
-      <author><first>Xabier</first><last>Saralegi</last></author>
+      <author id="xabier-saralegi"><first>Xabier</first><last>Saralegi</last></author>
       <author><first>Igor</first><last>Ellakuria</last></author>
-      <author><first>Maite</first><last>Martin</last></author>
+      <author id="m-teresa-martin-valdivia"><first>Maite</first><last>Martin</last></author>
       <pages>3026–3037</pages>
       <abstract>Document-level Neural Machine Translation aims to increase the quality of neural translation models by taking into account contextual information. Properly modelling information beyond the sentence level can result in improved machine translation output in terms of coherence, cohesion and consistency. Suitable corpora for context-level modelling are necessary to both train and evaluate context-aware systems, but are still relatively scarce. In this work we describe TANDO, a document-level corpus for the under-resourced Basque-Spanish language pair, which we share with the scientific community. The corpus is composed of parallel data from three different domains and has been prepared with context-level information. Additionally, the corpus includes contrastive test sets for fine-grained evaluations of gender and register contextual phenomena on both source and target language sides. To establish the usefulness of the corpus, we trained and evaluated baseline Transformer models and context-aware variants based on context concatenation. Our results indicate that the corpus is suitable for fine-grained evaluation of document-level machine translation systems.</abstract>
       <url hash="4cc550ac">2022.lrec-1.324</url>
@@ -3767,14 +3767,14 @@
     </paper>
     <paper id="325">
       <title>Unsupervised Machine Translation in Real-World Scenarios</title>
-      <author><first>Ona</first><last>de Gibert</last></author>
+      <author id="ona-de-gibert"><first>Ona</first><last>de Gibert</last></author>
       <author><first>Iakes</first><last>Goenaga</last></author>
       <author><first>Jordi</first><last>Armengol-Estapé</last></author>
       <author><first>Olatz</first><last>Perez-de-Viñaspre</last></author>
-      <author><first>Carla</first><last>Parra</last></author>
+      <author id="carla-parra-escartin"><first>Carla</first><last>Parra</last></author>
       <author><first>Marina</first><last>Sánchez-Torrón</last></author>
-      <author><first>Marcis</first><last>Pinnis</last></author>
-      <author><first>Gorka</first><last>Labaka</last></author>
+      <author id="marcis-pinnis"><first>Marcis</first><last>Pinnis</last></author>
+      <author id="gorka-labaka"><first>Gorka</first><last>Labaka</last></author>
       <author><first>Maite</first><last>Melero</last></author>
       <pages>3038–3047</pages>
       <abstract>In this work, we present the work that has been carried on in the MT4All CEF project and the resources that it has generated by leveraging recent research carried out in the field of unsupervised learning. In the course of the project 18 monolingual corpora for specific domains and languages have been collected, and 12 bilingual dictionaries and translation models have been generated. As part of the research, the unsupervised MT methodology based only on monolingual corpora (Artetxe et al., 2017) has been tested on a variety of languages and domains. Results show that in specialised domains, when there is enough monolingual in-domain data, unsupervised results are comparable to those of general domain supervised translation, and that, at any rate, unsupervised techniques can be used to boost results whenever very little data is available.</abstract>
@@ -3794,7 +3794,7 @@
     <paper id="327">
       <title>On the Multilingual Capabilities of Very Large-Scale <fixed-case>E</fixed-case>nglish Language Models</title>
       <author><first>Jordi</first><last>Armengol-Estapé</last></author>
-      <author><first>Ona</first><last>de Gibert Bonet</last></author>
+      <author id="ona-de-gibert"><first>Ona</first><last>de Gibert Bonet</last></author>
       <author><first>Maite</first><last>Melero</last></author>
       <pages>3056–3068</pages>
       <abstract>Generative Pre-trained Transformers (GPTs) have recently been scaled to unprecedented sizes in the history of machine learning. These models, solely trained on the language modeling objective, have been shown to exhibit outstanding zero, one, and few-shot learning capabilities in a number of different tasks. Nevertheless, aside from anecdotal experiences, little is known regarding their multilingual capabilities, given the fact that the pre-training corpus is almost entirely composed of English text. In this work, we investigate its potential and limits in three tasks: extractive question-answering, text summarization and natural language generation for five different languages, as well as the effect of scale in terms of model size. Our results show that GPT-3 can be almost as useful for many languages as it is for English, with room for improvement if optimization of the tokenization is addressed.</abstract>
@@ -3845,7 +3845,7 @@
       <title>Building Comparable Corpora for Assessing Multi-Word Term Alignment</title>
       <author><first>Omar</first><last>Adjali</last></author>
       <author><first>Emmanuel</first><last>Morin</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <pages>3103–3112</pages>
       <abstract>Recent work has demonstrated the importance of dealing with Multi-Word Terms (MWTs) in several Natural Language Processing applications. In particular, MWTs pose serious challenges for alignment and machine translation systems because of their syntactic and semantic properties. Thus, developing algorithms that handle MWTs is becoming essential for many NLP tasks. However, the availability of bilingual and more generally multi-lingual resources is limited, especially for low-resourced languages and in specialized domains. In this paper, we propose an approach for building comparable corpora and bilingual term dictionaries that help evaluate bilingual term alignment in comparable corpora. To that aim, we exploit parallel corpora to perform automatic bilingual MWT extraction and comparable corpus construction. Parallel information helps to align bilingual MWTs and makes it easier to build comparable specialized sub-corpora. Experimental validation on an existing dataset and on manually annotated data shows the interest of the proposed methodology.</abstract>
       <url hash="d8fcc8fc">2022.lrec-1.332</url>
@@ -3932,7 +3932,7 @@
       <author><first>Muskaan</first><last>Singh</last></author>
       <author><first>Marie</first><last>Hledíková</last></author>
       <author><first>Tirthankar</first><last>Ghosal</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>3174–3182</pages>
       <abstract>Taking minutes is an essential component of every meeting, although the goals, style, and procedure of this activity (“minuting” for short) can vary. Minuting is a rather unstructured writing activity and is affected by who is taking the minutes and for whom the intended minutes are. With the rise of online meetings, automatic minuting would be an important benefit for the meeting participants as well as for those who might have missed the meeting. However, automatically generating meeting minutes is a challenging problem due to a variety of factors including the quality of automatic speech recorders (ASRs), availability of public meeting data, subjective knowledge of the minuter, etc. In this work, we present the first of its kind dataset on <i>Automatic Minuting</i>. We develop a dataset of English and Czech technical project meetings which consists of transcripts generated from ASRs, manually corrected, and minuted by several annotators. Our dataset, AutoMin, consists of 113 (English) and 53 (Czech) meetings, covering more than 160 hours of meeting content. Upon acceptance, we will publicly release (aaa.bbb.ccc) the dataset as a set of meeting transcripts and minutes, excluding the recordings for privacy reasons. A unique feature of our dataset is that most meetings are equipped with more than one minute, each created independently. Our corpus thus allows studying differences in what people find important while taking the minutes. We also provide baseline experiments for the community to explore this novel problem further. To the best of our knowledge <b>AutoMin</b> is probably the first resource on minuting in English and also in a language other than English (Czech).</abstract>
       <url hash="32f0bbf5">2022.lrec-1.340</url>
@@ -3940,7 +3940,7 @@
     </paper>
     <paper id="341">
       <title>Extracting Age-Related Stereotypes from Social Media Texts</title>
-      <author><first>Kathleen C.</first><last>Fraser</last></author>
+      <author id="kathleen-c-fraser"><first>Kathleen C.</first><last>Fraser</last></author>
       <author><first>Svetlana</first><last>Kiritchenko</last></author>
       <author><first>Isar</first><last>Nejadgholi</last></author>
       <pages>3183–3194</pages>
@@ -4005,7 +4005,7 @@
     <paper id="347">
       <title><fixed-case>SDS</fixed-case>-200: A <fixed-case>S</fixed-case>wiss <fixed-case>G</fixed-case>erman Speech to <fixed-case>S</fixed-case>tandard <fixed-case>G</fixed-case>erman Text Corpus</title>
       <author><first>Michel</first><last>Plüss</last></author>
-      <author><first>Manuela</first><last>Hürlimann</last></author>
+      <author id="manuela-huerlimann"><first>Manuela</first><last>Hürlimann</last></author>
       <author><first>Marc</first><last>Cuny</last></author>
       <author><first>Alla</first><last>Stöckli</last></author>
       <author><first>Nikolaos</first><last>Kapotis</last></author>
@@ -4014,7 +4014,7 @@
       <author><first>Christian</first><last>Scheller</last></author>
       <author><first>Yanick</first><last>Schraner</last></author>
       <author><first>Amit</first><last>Jain</last></author>
-      <author><first>Jan</first><last>Deriu</last></author>
+      <author id="jan-milan-deriu"><first>Jan</first><last>Deriu</last></author>
       <author><first>Mark</first><last>Cieliebak</last></author>
       <author><first>Manfred</first><last>Vogel</last></author>
       <pages>3250–3256</pages>
@@ -4026,9 +4026,9 @@
       <title>Extracting Linguistic Knowledge from Speech: A Study of Stop Realization in 5 <fixed-case>R</fixed-case>omance Languages</title>
       <author><first>Yaru</first><last>Wu</last></author>
       <author><first>Mathilde</first><last>Hutin</last></author>
-      <author><first>Ioana</first><last>Vasilescu</last></author>
-      <author><first>Lori</first><last>Lamel</last></author>
-      <author><first>Martine</first><last>Adda-Decker</last></author>
+      <author id="ioana-vasilescu"><first>Ioana</first><last>Vasilescu</last></author>
+      <author id="lori-lamel"><first>Lori</first><last>Lamel</last></author>
+      <author id="martine-adda-decker"><first>Martine</first><last>Adda-Decker</last></author>
       <pages>3257–3263</pages>
       <abstract>This paper builds upon recent work in leveraging the corpora and tools originally used to develop speech technologies for corpus-based linguistic studies. We address the non-canonical realization of consonants in connected speech and we focus on voicing alternation phenomena of stops in 5 standard varieties of Romance languages (French, Italian, Spanish, Portuguese, Romanian). For these languages, both large scale corpora and speech recognition systems were available for the study. We use forced alignment with pronunciation variants and machine learning techniques to examine to what extent such frequent phenomena characterize languages and what are the most triggering factors. The results confirm that voicing alternations occur in all Romance languages. Automatic classification underlines that surrounding contexts and segment duration are recurring contributing factors for modeling voicing alternation. The results of this study also demonstrate the new role that machine learning techniques such as classification algorithms can play in helping to extract linguistic knowledge from speech and to suggest interesting research directions.</abstract>
       <url hash="21069df6">2022.lrec-1.348</url>
@@ -4073,12 +4073,12 @@
     </paper>
     <paper id="352">
       <title><fixed-case>QT</fixed-case>30: A Corpus of Argument and Conflict in Broadcast Debate</title>
-      <author><first>Annette</first><last>Hautli-Janisz</last></author>
+      <author id="annette-hautli"><first>Annette</first><last>Hautli-Janisz</last></author>
       <author><first>Zlata</first><last>Kikteva</last></author>
       <author><first>Wassiliki</first><last>Siskou</last></author>
       <author><first>Kamila</first><last>Gorska</last></author>
       <author><first>Ray</first><last>Becker</last></author>
-      <author><first>Chris</first><last>Reed</last></author>
+      <author id="chris-reed"><first>Chris</first><last>Reed</last></author>
       <pages>3291–3300</pages>
       <abstract>Broadcast political debate is a core pillar of democracy: it is the public’s easiest access to opinions that shape policies and enables the general public to make informed choices. With QT30, we present the largest corpus of analysed dialogical argumentation ever created (19,842 utterances, 280,000 words) and also the largest corpus of analysed broadcast political debate to date, using 30 episodes of BBC’s ‘Question Time’ from 2020 and 2021. Question Time is the prime institution in UK broadcast political debate and features questions from the public on current political issues, which are responded to by a weekly panel of five figures of UK politics and society. QT30 is highly argumentative and combines language of well-versed political rhetoric with direct, often combative, justification-seeking of the general public. QT30 is annotated with Inference Anchoring Theory, a framework well-known in argument mining, which encodes the way arguments and conflicts are created and reacted to in dialogical settings. The resource is freely available at <url>http://corpora.aifdb.org/qt30</url>.</abstract>
       <url hash="f97abaf7">2022.lrec-1.352</url>
@@ -4117,7 +4117,7 @@
     </paper>
     <paper id="356">
       <title>Distant Reading in Digital Humanities: Case Study on the <fixed-case>S</fixed-case>erbian Part of the <fixed-case>ELT</fixed-case>e<fixed-case>C</fixed-case> Collection</title>
-      <author><first>Ranka</first><last>Stanković</last></author>
+      <author id="ranka-stankovic"><first>Ranka</first><last>Stanković</last></author>
       <author><first>Cvetana</first><last>Krstev</last></author>
       <author><first>Branislava</first><last>Šandrih Todorović</last></author>
       <author><first>Dusko</first><last>Vitas</last></author>
@@ -4134,7 +4134,7 @@
       <author><first>Judith</first><last>Sieker</last></author>
       <author><first>Svenja</first><last>Guhr</last></author>
       <author><first>Evelyn</first><last>Gius</last></author>
-      <author><first>Sina</first><last>Zarrieß</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
       <pages>3346–3353</pages>
       <abstract>Automatizing the process of understanding the global narrative structure of long texts and stories is still a major challenge for state-of-the-art natural language understanding systems, particularly because annotated data is scarce and existing annotation workflows do not scale well to the annotation of complex narrative phenomena. In this work, we focus on the identification of narrative levels in texts corresponding to stories that are embedded in stories. Lacking sufficient pre-annotated training data, we explore a solution to deal with data scarcity that is common in machine learning: the automatic augmentation of an existing small data set of annotated samples with the help of data synthesis. We present a workflow for narrative level detection, that includes the operationalization of the task, a model, and a data augmentation protocol for automatically generating narrative texts annotated with breaks between narrative levels. Our experiments suggest that narrative levels in long text constitute a challenging phenomenon for state-of-the-art NLP models, but generating training data synthetically does improve the prediction results considerably.</abstract>
       <url hash="6a09902d">2022.lrec-1.357</url>
@@ -4146,7 +4146,7 @@
       <author><first>Jonathan</first><last>Poinhos</last></author>
       <author><first>Eleni</first><last>Kogkitsidou</last></author>
       <author><first>Philippe</first><last>Gambette</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <author><first>Simon</first><last>Gabay</last></author>
       <pages>3354–3366</pages>
       <abstract>Spelling normalisation is a useful step in the study and analysis of historical language texts, whether it is manual analysis by experts or automatic analysis using downstream natural language processing (NLP) tools. Not only does it help to homogenise the variable spelling that often exists in historical texts, but it also facilitates the use of off-the-shelf contemporary NLP tools, if contemporary spelling conventions are used for normalisation. We present FREEMnorm, a new benchmark for the normalisation of Early Modern French (from the 17th century) into contemporary French and provide a thorough comparison of three different normalisation methods: ABA, an alignment-based approach and MT-approaches, (both statistical and neural), including extensive parameter searching, which is often missing in the normalisation literature.</abstract>
@@ -4156,12 +4156,12 @@
     <paper id="359">
       <title>From <fixed-case>F</fixed-case>re<fixed-case>EM</fixed-case> to D’<fixed-case>A</fixed-case>lem<fixed-case>BERT</fixed-case>: a Large Corpus and a Language Model for Early <fixed-case>M</fixed-case>odern <fixed-case>F</fixed-case>rench</title>
       <author><first>Simon</first><last>Gabay</last></author>
-      <author><first>Pedro</first><last>Ortiz Suarez</last></author>
+      <author id="pedro-ortiz-suarez"><first>Pedro</first><last>Ortiz Suarez</last></author>
       <author><first>Alexandre</first><last>Bartz</last></author>
       <author><first>Alix</first><last>Chagué</last></author>
       <author><first>Rachel</first><last>Bawden</last></author>
       <author><first>Philippe</first><last>Gambette</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <pages>3367–3374</pages>
       <abstract>anguage models for historical states of language are becoming increasingly important to allow the optimal digitisation and analysis of old textual sources. Because these historical states are at the same time more complex to process and more scarce in the corpora available, this paper presents recent efforts to overcome this difficult situation. These efforts include producing a corpus, creating the model, and evaluating it with an NLP task currently used by scholars in other ongoing projects.</abstract>
       <url hash="97d82b10">2022.lrec-1.359</url>
@@ -4170,7 +4170,7 @@
     <paper id="360">
       <title>Detecting Multiple Transitions in Literary Texts</title>
       <author><first>Nuette</first><last>Heyns</last></author>
-      <author><first>Menno</first><last>van Zaanen</last></author>
+      <author id="menno-van-zaanen"><first>Menno</first><last>van Zaanen</last></author>
       <pages>3375–3381</pages>
       <abstract>Identifying the high level structure of texts provides important information when performing distant reading analysis. The structure of texts is not necessarily linear, as transitions, such as changes in the scenery or flashbacks, can be present. As a first step in identifying this structure, we aim to identify transitions in texts. Previous work (Heyns and van Zaanen, 2021) proposed a system that can successfully identify one transition in literary texts. The text is split in snippets and LDA is applied, resulting in a sequence of topics. A transition is introduced at the point that separates the topics (before and after the point) best. In this article, we extend the existing system such that it can detect multiple transitions. Additionally, we introduce a new system that inherently handles multiple transitions in texts. The new system also relies on LDA information, but is more robust than the previous system. We apply these systems to texts with known transitions (as they are constructed by concatenating text snippets stemming from different source texts) and evaluation both systems on texts with one transition and texts with two transitions. As both systems rely on LDA to identify transitions between snippets, we also show the impact of varying the number of LDA topics on the results as well. The new system consistently outperforms the previous system, not only on texts with multiple transitions, but also on single boundary texts.</abstract>
       <url hash="cc379d64">2022.lrec-1.360</url>
@@ -4184,7 +4184,7 @@
       <author><first>Ainara</first><last>Larrondo-Ureta</last></author>
       <author><first>Simón</first><last>Peña-Fernández</last></author>
       <author><first>Olatz</first><last>Perez-de-Viñaspre</last></author>
-      <author><first>Rodrigo</first><last>Agerri</last></author>
+      <author id="rodrigo-agerri"><first>Rodrigo</first><last>Agerri</last></author>
       <pages>3382–3390</pages>
       <abstract>Parliamentary transcripts provide a valuable resource to understand the reality and know about the most important facts that occur over time in our societies. Furthermore, the political debates captured in these transcripts facilitate research on political discourse from a computational social science perspective. In this paper we release the first version of a newly compiled corpus from Basque parliamentary transcripts. The corpus is characterized by heavy Basque-Spanish code-switching, and represents an interesting resource to study political discourse in contrasting languages such as Basque and Spanish. We enrich the corpus with metadata related to relevant attributes of the speakers and speeches (language, gender, party...) and process the text to obtain named entities and lemmas. The obtained metadata is then used to perform a detailed corpus analysis which provides interesting insights about the language use of the Basque political representatives across time, parties and gender.</abstract>
       <url hash="042982b0">2022.lrec-1.361</url>
@@ -4215,7 +4215,7 @@
     </paper>
     <paper id="364">
       <title>Quantification Annotation in <fixed-case>ISO</fixed-case> 24617-12, Second Draft</title>
-      <author><first>Harry</first><last>Bunt</last></author>
+      <author id="harry-bunt"><first>Harry</first><last>Bunt</last></author>
       <author><first>Maxime</first><last>Amblard</last></author>
       <author><first>Johan</first><last>Bos</last></author>
       <author><first>Karën</first><last>Fort</last></author>
@@ -4235,7 +4235,7 @@
     <paper id="365">
       <title>The <fixed-case>LTRC</fixed-case> <fixed-case>H</fixed-case>indi-<fixed-case>T</fixed-case>elugu Parallel Corpus</title>
       <author><first>Vandan</first><last>Mujadia</last></author>
-      <author><first>Dipti</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti</first><last>Sharma</last></author>
       <pages>3417–3424</pages>
       <abstract>We present the Hindi-Telugu Parallel Corpus of different technical domains such as Natural Science, Computer Science, Law and Healthcare along with the General domain. The qualitative corpus consists of 700K parallel sentences of which 535K sentences were created using multiple methods such as extract, align and review of Hindi-Telugu corpora, end-to-end human translation, iterative back-translation driven post-editing and around 165K parallel sentences were collected from available sources in the public domain. We present the comparative assessment of created parallel corpora for representativeness and diversity. The corpus has been pre-processed for machine translation, and we trained a neural machine translation system using it and report state-of-the-art baseline results on the developed development set over multiple domains and on available benchmarks. With this, we define a new task on Domain Machine Translation for low resource language pairs such as Hindi and Telugu. The developed corpus (535K) is freely available for non-commercial research and to the best of our knowledge, this is the well curated, largest, publicly available domain parallel corpus for Hindi-Telugu.</abstract>
       <url hash="fb03375a">2022.lrec-1.365</url>
@@ -4244,7 +4244,7 @@
     <paper id="366">
       <title><fixed-case>MHE</fixed-case>: Code-Mixed Corpora for Similar Language Identification</title>
       <author><first>Priya</first><last>Rani</last></author>
-      <author><first>John P.</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></author>
       <author><first>Theodorus</first><last>Fransen</last></author>
       <pages>3425–3433</pages>
       <abstract>This paper introduces a new Magahi-Hindi-English (MHE) code-mixed data-set for similar language identification (SMLID), where Magahi is a less-resourced minority language. This corpus provides a language id at two levels: word and sentence. This data-set is the first Magahi-Hindi-English code-mixed data-set for similar language identification task. Furthermore, we will discuss the complexity of the data-set and provide a few baselines for the language identification task.</abstract>
@@ -4263,7 +4263,7 @@
       <author><first>Aman</first><last>Berhe</last></author>
       <author><first>Léo</first><last>Galmant</last></author>
       <author><first>Ruiqing</first><last>Yin</last></author>
-      <author><first>Claude</first><last>Barras</last></author>
+      <author id="claude-barras"><first>Claude</first><last>Barras</last></author>
       <pages>3434–3441</pages>
       <abstract>We introduce a dataset built around a large collection of TV (and movie) series. Those are filled with challenging multi-party dialogues. Moreover, TV series come with a very active fan base that allows the collection of metadata and accelerates annotation. With 16 TV and movie series, Bazinga! amounts to 400+ hours of speech and 8M+ tokens, including 500K+ tokens annotated with the speaker, addressee, and entity linking information. Along with the dataset, we also provide a baseline for speaker diarization, punctuation restoration, and person entity recognition. The results demonstrate the difficulty of the tasks and of transfer learning from models trained on mono-speaker audio or written text, which is more widely available. This work is a step towards better multi-party dialogue structuring and understanding. Bazinga! is available at hf.co/bazinga. Because (a large) part of Bazinga! is only partially annotated, we also expect this dataset to foster research towards self- or weakly-supervised learning methods.</abstract>
       <url hash="f4aaac3f">2022.lrec-1.367</url>
@@ -4282,11 +4282,11 @@
     </paper>
     <paper id="369">
       <title><fixed-case>W</fixed-case>e<fixed-case>C</fixed-case>an<fixed-case>T</fixed-case>alk: A New Multi-language, Multi-modal Resource for Speaker Recognition</title>
-      <author><first>Karen</first><last>Jones</last></author>
+      <author id="karen-sparck-jones"><first>Karen</first><last>Jones</last></author>
       <author><first>Kevin</first><last>Walker</last></author>
-      <author><first>Christopher</first><last>Caruso</last></author>
+      <author id="christopher-caruso"><first>Christopher</first><last>Caruso</last></author>
       <author><first>Jonathan</first><last>Wright</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <pages>3451–3456</pages>
       <abstract>The WeCanTalk (WCT) Corpus is a new multi-language, multi-modal resource for speaker recognition. The corpus contains Cantonese, Mandarin and English telephony and video speech data from over 200 multilingual speakers located in Hong Kong. Each speaker contributed at least 10 telephone conversations of 8-10 minutes’ duration collected via a custom telephone platform based in Hong Kong. Speakers also uploaded at least 3 videos in which they were both speaking and visible, along with one selfie image. At least half of the calls and videos for each speaker were in Cantonese, while their remaining recordings featured one or more different languages. Both calls and videos were made in a variety of noise conditions. All speech and video recordings were audited by experienced multilingual annotators for quality including presence of the expected language and for speaker identity. The WeCanTalk Corpus has been used to support the NIST 2021 Speaker Recognition Evaluation and will be published in the LDC catalog.</abstract>
       <url hash="7bc330c3">2022.lrec-1.369</url>
@@ -4333,7 +4333,7 @@
       <title><fixed-case>HAWP</fixed-case>: a Dataset for <fixed-case>H</fixed-case>indi Arithmetic Word Problem Solving</title>
       <author><first>Harshita</first><last>Sharma</last></author>
       <author><first>Pruthwik</first><last>Mishra</last></author>
-      <author><first>Dipti</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti</first><last>Sharma</last></author>
       <pages>3479–3490</pages>
       <abstract>Word Problem Solving remains a challenging and interesting task in NLP. A lot of research has been carried out to solve different genres of word problems with various complexity levels in recent years. However, most of the publicly available datasets and work has been carried out for English. Recently there has been a surge in this area of word problem solving in Chinese with the creation of large benchmark datastes. Apart from these two languages, labeled benchmark datasets for low resource languages are very scarce. This is the first attempt to address this issue for any Indian Language, especially Hindi. In this paper, we present HAWP (Hindi Arithmetic Word Problems), a dataset consisting of 2336 arithmetic word problems in Hindi. We also developed baseline systems for solving these word problems. We also propose a new evaluation technique for word problem solvers taking equation equivalence into account.</abstract>
       <url hash="b38abc81">2022.lrec-1.373</url>
@@ -4342,7 +4342,7 @@
     <paper id="374">
       <title>The <fixed-case>B</fixed-case>ulgarian Event Corpus: Overview and Initial <fixed-case>NER</fixed-case> Experiments</title>
       <author><first>Petya</first><last>Osenova</last></author>
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <author><first>Iva</first><last>Marinova</last></author>
       <author><first>Melania</first><last>Berbatova</last></author>
       <pages>3491–3499</pages>
@@ -4378,8 +4378,8 @@
     </paper>
     <paper id="377">
       <title>Constrained Language Models for Interactive Poem Generation</title>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
-      <author><first>Àlex</first><last>Atrio</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="alex-r-atrio"><first>Àlex</first><last>Atrio</last></author>
       <author><first>Valentin</first><last>Minder</last></author>
       <author><first>Aris</first><last>Xanthos</last></author>
       <author><first>Gabriel</first><last>Luthier</last></author>
@@ -4396,7 +4396,7 @@
       <author><first>Young Ju</first><last>Na</last></author>
       <author><first>Hoyun</first><last>Song</last></author>
       <author><first>Jisu</first><last>Shin</last></author>
-      <author><first>Jong</first><last>Park</last></author>
+      <author id="jong-c-park"><first>Jong</first><last>Park</last></author>
       <pages>3530–3541</pages>
       <abstract>Online trolls increase social costs and cause psychological damage to individuals. With the proliferation of automated accounts making use of bots for trolling, it is difficult for targeted individual users to handle the situation both quantitatively and qualitatively. To address this issue, we focus on automating the method to counter trolls, as counter responses to combat trolls encourage community users to maintain ongoing discussion without compromising freedom of expression. For this purpose, we propose a novel dataset for automatic counter response generation. In particular, we constructed a pair-wise dataset that includes troll comments and counter responses with labeled response strategies, which enables models fine-tuned on our dataset to generate responses by varying counter responses according to the specified strategy. We conducted three tasks to assess the effectiveness of our dataset and evaluated the results through both automatic and human evaluation. In human evaluation, we demonstrate that the model fine-tuned with our dataset shows a significantly improved performance in strategy-controlled sentence generation.</abstract>
       <url hash="fe551ba4">2022.lrec-1.378</url>
@@ -4446,7 +4446,7 @@
     </paper>
     <paper id="383">
       <title><fixed-case>ALEXSIS</fixed-case>: A Dataset for Lexical Simplification in <fixed-case>S</fixed-case>panish</title>
-      <author><first>Daniel</first><last>Ferrés</last></author>
+      <author id="daniel-ferres"><first>Daniel</first><last>Ferrés</last></author>
       <author><first>Horacio</first><last>Saggion</last></author>
       <pages>3582–3594</pages>
       <abstract>Lexical Simplification is the process of reducing the lexical complexity of a text by replacing difficult words with easier to read (or understand) expressions while preserving the original information and meaning. In this paper we introduce ALEXSIS, a new dataset for this task, and we use ALEXSIS to benchmark Lexical Simplification systems in Spanish. The paper describes the evaluation of three kind of approaches to Lexical Simplification, a thesaurus-based approach, a single transformers-based approach, and a combination of transformers. We also report state of the art results on a previous Lexical Simplification dataset for Spanish.</abstract>
@@ -4476,7 +4476,7 @@
       <title><fixed-case>R</fixed-case>a<fixed-case>F</fixed-case>o<fixed-case>L</fixed-case>a: A Rationale-Annotated Corpus for Detecting Indicators of Forced Labour</title>
       <author><first>Erick</first><last>Mendez Guzman</last></author>
       <author><first>Viktor</first><last>Schlegel</last></author>
-      <author><first>Riza</first><last>Batista-Navarro</last></author>
+      <author id="riza-theresa-batista-navarro"><first>Riza</first><last>Batista-Navarro</last></author>
       <pages>3610–3625</pages>
       <abstract>Forced labour is the most common type of modern slavery, and it is increasingly gaining the attention of the research and social community. Recent studies suggest that artificial intelligence (AI) holds immense potential for augmenting anti-slavery action. However, AI tools need to be developed transparently in cooperation with different stakeholders. Such tools are contingent on the availability and access to domain-specific data, which are scarce due to the near-invisible nature of forced labour. To the best of our knowledge, this paper presents the first openly accessible English corpus annotated for multi-class and multi-label forced labour detection. The corpus consists of 989 news articles retrieved from specialised data sources and annotated according to risk indicators defined by the International Labour Organization (ILO). Each news article was annotated for two aspects: (1) indicators of forced labour as classification labels and (2) snippets of the text that justify labelling decisions. We hope that our data set can help promote research on explainability for multi-class and multi-label text classification. In this work, we explain our process for collecting the data underpinning the proposed corpus, describe our annotation guidelines and present some statistical analysis of its content. Finally, we summarise the results of baseline experiments based on different variants of the Bidirectional Encoder Representation from Transformer (BERT) model.</abstract>
       <url hash="f750b998">2022.lrec-1.386</url>
@@ -4499,7 +4499,7 @@
       <author><first>Roland</first><last>Roller</last></author>
       <author><first>Oliver</first><last>Sapina</last></author>
       <author><first>Sebastian</first><last>Möller</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <pages>3637–3649</pages>
       <abstract>In this work, we present the first corpus for German Adverse Drug Reaction (ADR) detection in patient-generated content. The data consists of 4,169 binary annotated documents from a German patient forum, where users talk about health issues and get advice from medical doctors. As is common in social media data in this domain, the class labels of the corpus are very imbalanced. This and a high topic imbalance make it a very challenging dataset, since often, the same symptom can have several causes and is not always related to a medication intake. We aim to encourage further multi-lingual efforts in the domain of ADR detection and provide preliminary experiments for binary classification using different methods of zero- and few-shot learning based on a multi-lingual model. When fine-tuning XLM-RoBERTa first on English patient forum data and then on the new German data, we achieve an F1-score of 37.52 for the positive class. We make the dataset and models publicly available for the community.</abstract>
       <url hash="34de7366">2022.lrec-1.388</url>
@@ -4526,7 +4526,7 @@
       <title><fixed-case>C</fixed-case>lin<fixed-case>IDM</fixed-case>ap: Towards a Clinical <fixed-case>ID</fixed-case>s Mapping for Data Interoperability</title>
       <author><first>Elena</first><last>Zotova</last></author>
       <author><first>Montse</first><last>Cuadros</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <pages>3661–3669</pages>
       <abstract>This paper presents ClinIDMap, a tool for mapping identifiers between clinical ontologies and lexical resources. ClinIDMap interlinks identifiers from UMLS, SMOMED-CT, ICD-10 and the corresponding Wikipedia articles for concepts from the UMLS Metathesaurus. Our main goal is to provide semantic interoperability across the clinical concepts from various knowledge bases. As a side effect, the mapping enriches already annotated corpora in multiple languages with new labels. For instance, spans manually annotated with IDs from UMLS can be annotated with Semantic Types and Groups, and its corresponding SNOMED CT and ICD-10 IDs. We also experiment with sequence labelling models for detecting Diagnosis and Procedures concepts and for detecting UMLS Semantic Groups trained on Spanish, English, and bilingual corpora obtained with the new mapping procedure. The ClinIDMap tool is publicly available.</abstract>
       <url hash="1e203c3c">2022.lrec-1.390</url>
@@ -4565,7 +4565,7 @@
     <paper id="394">
       <title>How’s Business Going Worldwide ? A Multilingual Annotated Corpus for Business Relation Extraction</title>
       <author><first>Hadjer</first><last>Khaldi</last></author>
-      <author><first>Farah</first><last>Benamara</last></author>
+      <author id="farah-benamara"><first>Farah</first><last>Benamara</last></author>
       <author><first>Camille</first><last>Pradel</last></author>
       <author><first>Grégoire</first><last>Sigel</last></author>
       <author><first>Nathalie</first><last>Aussenac-Gilles</last></author>
@@ -4611,7 +4611,7 @@
       <title>Enhanced Entity Annotations for Multilingual Corpora</title>
       <author><first>Michael</first><last>Strobl</last></author>
       <author><first>Amine</first><last>Trabelsi</last></author>
-      <author><first>Osmar</first><last>Zaïane</last></author>
+      <author id="osmar-r-zaiane"><first>Osmar</first><last>Zaïane</last></author>
       <pages>3732–3740</pages>
       <abstract>Modern approaches in Natural Language Processing (NLP) require, ideally, large amounts of labelled data for model training. However, new language resources, for example, for Named Entity Recognition (NER), Co-reference Resolution (CR), Entity Linking (EL) and Relation Extraction (RE), naming a few of the most popular tasks in NLP, have always been challenging to create since manual text annotations can be very time-consuming to acquire. While there may be an acceptable amount of labelled data available for some of these tasks in one language, there may be a lack of datasets in another. WEXEA is a tool to exhaustively annotate entities in the English Wikipedia. Guidelines for editors of Wikipedia articles result, on the one hand, in only a few annotations through hyperlinks, but on the other hand, make it easier to exhaustively annotate the rest of these articles with entities than starting from scratch. We propose the following main improvements to WEXEA: Creating multi-lingual corpora, improved entity annotations using a proven NER system, annotating dates and times. A brief evaluation of the annotation quality of WEXEA is added.</abstract>
       <url hash="8d9c9927">2022.lrec-1.398</url>
@@ -4630,7 +4630,7 @@
     </paper>
     <paper id="400">
       <title><fixed-case>S</fixed-case>panish Datasets for Sensitive Entity Detection in the Legal Domain</title>
-      <author><first>Ona</first><last>de Gibert</last></author>
+      <author id="ona-de-gibert"><first>Ona</first><last>de Gibert</last></author>
       <author><first>Aitor</first><last>García-Pablos</last></author>
       <author><first>Montse</first><last>Cuadros</last></author>
       <author><first>Maite</first><last>Melero</last></author>
@@ -4654,7 +4654,7 @@
       <author><first>Meriem</first><last>Beloucif</last></author>
       <author><first>Seid Muhie</first><last>Yimam</last></author>
       <author><first>Steffen</first><last>Stahlhacke</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>3771–3779</pages>
       <abstract>Comparative Question Answering (cQA) is the task of providing concrete and accurate responses to queries such as: “Is Lyft cheaper than a regular taxi?” or “What makes a mortgage different from a regular loan?”. In this paper, we propose two new open-domain real-world datasets for identifying and labeling comparative questions. While the first dataset contains instances of English questions labeled as comparative vs. non-comparative, the second dataset provides additional labels including the objects and the aspects of comparison. We conduct several experiments that evaluate the soundness of our datasets. The evaluation of our datasets using various classifiers show promising results that reach close-to-human results on a binary classification task with a neural model using ALBERT embeddings. When approaching the unsupervised sequence labeling task, some headroom remains.</abstract>
       <url hash="b1e9156c">2022.lrec-1.402</url>
@@ -4664,7 +4664,7 @@
       <title>Decorate the Examples: A Simple Method of Prompt Design for Biomedical Relation Extraction</title>
       <author><first>Hui-Syuan</first><last>Yeh</last></author>
       <author><first>Thomas</first><last>Lavergne</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <pages>3780–3787</pages>
       <abstract>Relation extraction is a core problem for natural language processing in the biomedical domain. Recent research on relation extraction showed that prompt-based learning improves the performance on both fine-tuning on full training set and few-shot training. However, less effort has been made on domain-specific tasks where good prompt design can be even harder. In this paper, we investigate prompting for biomedical relation extraction, with experiments on the ChemProt dataset. We present a simple yet effective method to systematically generate comprehensive prompts that reformulate the relation extraction task as a cloze-test task under a simple prompt formulation. In particular, we experiment with different ranking scores for prompt selection. With BioMed-RoBERTa-base, our results show that prompting-based fine-tuning obtains gains by 14.21 F1 over its regular fine-tuning baseline, and 1.14 F1 over SciFive-Large, the current state-of-the-art on ChemProt. Besides, we find prompt-based learning requires fewer training examples to make reasonable predictions. The results demonstrate the potential of our methods in such a domain-specific relation extraction task.</abstract>
       <url hash="95785d0a">2022.lrec-1.403</url>
@@ -4695,7 +4695,7 @@
     </paper>
     <paper id="406">
       <title><fixed-case>APPR</fixed-case>eddit: a Corpus of <fixed-case>R</fixed-case>eddit Posts Annotated for Appraisal</title>
-      <author><first>Marco Antonio</first><last>Stranisci</last></author>
+      <author id="marco-antonio-stranisci"><first>Marco Antonio</first><last>Stranisci</last></author>
       <author><first>Simona</first><last>Frenda</last></author>
       <author><first>Eleonora</first><last>Ceccaldi</last></author>
       <author><first>Valerio</first><last>Basile</last></author>
@@ -4737,7 +4737,7 @@
       <author><first>Mariana</first><last>Illescas</last></author>
       <author><first>Sabina</first><last>Oporto</last></author>
       <author><first>Frederic</first><last>Blum</last></author>
-      <author><first>Arturo</first><last>Oncevay</last></author>
+      <author id="arturo-oncevay"><first>Arturo</first><last>Oncevay</last></author>
       <author><first>Javier</first><last>Vera</last></author>
       <pages>3840–3851</pages>
       <abstract>In this paper, we launch a new Universal Dependencies treebank for an endangered language from Amazonia: Kakataibo, a Panoan language spoken in Peru. We first discuss the collaborative methodology implemented, which proved effective to create a treebank in the context of a Computational Linguistic course for undergraduates. Then, we describe the general details of the treebank and the language-specific considerations implemented for the proposed annotation. We finally conduct some experiments on part-of-speech tagging and syntactic dependency parsing. We focus on monolingual and transfer learning settings, where we study the impact of a Shipibo-Konibo treebank, another Panoan language resource.</abstract>
@@ -4806,7 +4806,7 @@
     <paper id="415">
       <title>Pre-Training Language Models for Identifying Patronizing and Condescending Language: An Analysis</title>
       <author><first>Carla</first><last>Perez Almendros</last></author>
-      <author><first>Luis</first><last>Espinosa Anke</last></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa Anke</last></author>
       <author><first>Steven</first><last>Schockaert</last></author>
       <pages>3902–3911</pages>
       <abstract>Patronizing and Condescending Language (PCL) is a subtle but harmful type of discourse, yet the task of recognizing PCL remains under-studied by the NLP community. Recognizing PCL is challenging because of its subtle nature, because available datasets are limited in size, and because this task often relies on some form of commonsense knowledge. In this paper, we study to what extent PCL detection models can be improved by pre-training them on other, more established NLP tasks. We find that performance gains are indeed possible in this way, in particular when pre-training on tasks focusing on sentiment, harmful language and commonsense morality. In contrast, for tasks focusing on political speech and social justice, no or only very small improvements were witnessed. These findings improve our understanding of the nature of PCL.</abstract>
@@ -4817,7 +4817,7 @@
       <title><fixed-case>H</fixed-case>e<fixed-case>LI</fixed-case>-<fixed-case>OTS</fixed-case>, Off-the-shelf Language Identifier for Text</title>
       <author><first>Tommi</first><last>Jauhiainen</last></author>
       <author><first>Heidi</first><last>Jauhiainen</last></author>
-      <author><first>Krister</first><last>Lindén</last></author>
+      <author id="krister-linden"><first>Krister</first><last>Lindén</last></author>
       <pages>3912–3922</pages>
       <abstract>This paper introduces HeLI-OTS, an off-the-shelf text language identification tool using the HeLI language identification method. The HeLI-OTS language identifier is equipped with language models for 200 languages and licensed for academic as well as commercial use. We present the HeLI method and its use in our previous research. Then we compare the performance of the HeLI-OTS language identifier with that of fastText on two different data sets, showing that fastText favors the recall of common languages, whereas HeLI-OTS reaches both high recall and high precision for all languages. While introducing existing off-the-shelf language identification tools, we also give a picture of digital humanities-related research that uses such tools. The validity of the results of such research depends on the results given by the language identifier used, and especially for research focusing on the less common languages, the tendency to favor widely used languages might be very detrimental, which Heli-OTS is now able to remedy.</abstract>
       <url hash="f84316ec">2022.lrec-1.416</url>
@@ -4828,7 +4828,7 @@
       <author><first>Silvia</first><last>Severini</last></author>
       <author><first>Ayyoob</first><last>Imani</last></author>
       <author><first>Philipp</first><last>Dufter</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>3923–3933</pages>
       <abstract>Parallel corpora are ideal for extracting a multilingual named entity (MNE) resource, i.e., a dataset of names translated into multiple languages. Prior work on extracting MNE datasets from parallel corpora required resources such as large monolingual corpora or word aligners that are unavailable or perform poorly for underresourced languages. We present CLC-BN, a new method for creating an MNE resource, and apply it to the Parallel Bible Corpus, a corpus of more than 1000 languages. CLC-BN learns a neural transliteration model from parallel-corpus statistics, without requiring any other bilingual resources, word aligners, or seed data. Experimental results show that CLC-BN clearly outperforms prior work. We release an MNE resource for 1340 languages and demonstrate its effectiveness in two downstream tasks: knowledge graph augmentation and bilingual lexicon induction.</abstract>
       <url hash="43de353d">2022.lrec-1.417</url>
@@ -4836,12 +4836,12 @@
     </paper>
     <paper id="418">
       <title>Towards the Construction of a <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et for <fixed-case>O</fixed-case>ld <fixed-case>E</fixed-case>nglish</title>
-      <author><first>Fahad</first><last>Khan</last></author>
+      <author id="fahad-khan"><first>Fahad</first><last>Khan</last></author>
       <author><first>Francisco J.</first><last>Minaya Gómez</last></author>
       <author><first>Rafael</first><last>Cruz González</last></author>
       <author><first>Harry</first><last>Diakoff</last></author>
       <author><first>Javier E.</first><last>Diaz Vera</last></author>
-      <author><first>John P.</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></author>
       <author><first>Ciara</first><last>O’Loughlin</last></author>
       <author><first>William Michael</first><last>Short</last></author>
       <author><first>Sander</first><last>Stolk</last></author>
@@ -4852,7 +4852,7 @@
     </paper>
     <paper id="419">
       <title>A Framenet and Frame Annotator for <fixed-case>G</fixed-case>erman Social Media</title>
-      <author><first>Eckhard</first><last>Bick</last></author>
+      <author id="eckhard-bick"><first>Eckhard</first><last>Bick</last></author>
       <pages>3942–3949</pages>
       <abstract>This paper presents PFN-DE, a new, parsing- and annotation-oriented framenet for German, with almost 15,000 frames, covering 11,300 verb lemmas. The resource was developed in the context of a Danish/German social-media study on hate speech and has a strong focus on coverage, robustness and cross-language comparability. A simple annotation scheme for argument roles meshes directly with the output of a syntactic parser, facilitating frame disambiguation through slot-filler conditions based on valency, syntactic function and semantic noun class. We discuss design principles for the framenet and the frame tagger using it, and present statistics for frame and role distribution at both the lexicon (type) and corpus (token) levels. In an evaluation run on Twitter data, the parser-based frame annotator achieved an overall F-score for frame senses of 93.6%.</abstract>
       <url hash="b8df8dfd">2022.lrec-1.419</url>
@@ -4862,7 +4862,7 @@
       <title>The Robotic Surgery Procedural Framebank</title>
       <author><first>Marco</first><last>Bombieri</last></author>
       <author><first>Marco</first><last>Rospocher</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <author><first>Paolo</first><last>Fiorini</last></author>
       <pages>3950–3959</pages>
       <abstract>Robot-Assisted minimally invasive robotic surgery is the gold standard for the surgical treatment of many pathological conditions, and several manuals and academic papers describe how to perform these interventions. These high-quality, often peer-reviewed texts are the main study resource for medical personnel and consequently contain essential procedural domain-specific knowledge. The procedural knowledge therein described could be extracted, e.g., on the basis of semantic parsing models, and used to develop clinical decision support systems or even automation methods for some procedure’s steps. However, natural language understanding algorithms such as, for instance, semantic role labelers have lower efficacy and coverage issues when applied to domain others than those they are typically trained on (i.e., newswire text). To overcome this problem, starting from PropBank frames, we propose a new linguistic resource specific to the robotic-surgery domain, named Robotic Surgery Procedural Framebank (RSPF). We extract from robotic-surgical texts verbs and nouns that describe surgical actions and extend PropBank frames by adding any of new lemmas, frames or role sets required to cover missing lemmas, specific frames describing the surgical significance, or new semantic roles used in procedural surgical language. Our resource is publicly available and can be used to annotate corpora in the surgical domain to train and evaluate Semantic Role Labeling (SRL) systems in a challenging fine-grained domain setting.</abstract>
@@ -4883,7 +4883,7 @@
       <author><first>Nyoman</first><last>Juniarta</last></author>
       <author><first>Olivier</first><last>Bonami</last></author>
       <author><first>Nabil</first><last>Hathout</last></author>
-      <author><first>Fiammetta</first><last>Namer</last></author>
+      <author id="fiammetta-namer"><first>Fiammetta</first><last>Namer</last></author>
       <author><first>Yannick</first><last>Toussaint</last></author>
       <pages>3969–3976</pages>
       <abstract>We apply Formal Concept Analysis (FCA) to organize and to improve the quality of Démonette2, a French derivational database, through a detection of both missing and spurious derivations in the database. We represent each derivational family as a graph. Given that the subgraph relation exists among derivational families, FCA can group families and represent them in a partially ordered set (poset). This poset is also useful for improving the database. A family is regarded as a possible anomaly (meaning that it may have missing and/or spurious derivations) if its derivational graph is almost, but not completely identical to a large number of other families.</abstract>
@@ -4900,7 +4900,7 @@
     </paper>
     <paper id="424">
       <title>Towards the Detection of a Semantic Gap in the Chain of Commonsense Knowledge Triples</title>
-      <author><first>Yoshihiko</first><last>Hayashi</last></author>
+      <author id="yoshihiko-hayashi"><first>Yoshihiko</first><last>Hayashi</last></author>
       <pages>3984–3993</pages>
       <abstract>A commonsense knowledge resource organizes common sense that is not necessarily correct all the time, but most people are expected to know or believe. Such knowledge resources have recently been actively built and utilized in artificial intelligence, particularly natural language processing. In this paper, we discuss an important but not significantly discussed the issue of semantic gaps potentially existing in a commonsense knowledge graph and propose a machine learning-based approach to detect a semantic gap that may inhibit the proper chaining of knowledge triples. In order to establish this line of research, we created a pilot dataset from ConceptNet, in which chains consisting of two adjacent triples are sampled, and the validity of each chain is human-annotated. We also devised a few baseline methods for detecting the semantic gaps and compared them in small-scale experiments. Although the experimental results suggest that the detection of semantic gaps may not be a trivial task, we achieved several insights to further push this research direction, including the potential efficacy of sense embeddings and contextualized word representations enabled by a pre-trained language model.</abstract>
       <url hash="a78330d1">2022.lrec-1.424</url>
@@ -4991,7 +4991,7 @@
     </paper>
     <paper id="433">
       <title>The slurk Interaction Server Framework: Better Data for Better Dialog Models</title>
-      <author><first>Jana</first><last>Götze</last></author>
+      <author id="jana-gotze"><first>Jana</first><last>Götze</last></author>
       <author><first>Maike</first><last>Paetzel-Prüsmann</last></author>
       <author><first>Wencke</first><last>Liermann</last></author>
       <author><first>Tim</first><last>Diekmann</last></author>
@@ -5006,9 +5006,9 @@
       <author><first>Natalia</first><last>Kalashnikova</last></author>
       <author><first>Serge</first><last>Pajak</last></author>
       <author><first>Fabrice</first><last>Le Guel</last></author>
-      <author><first>Ioana</first><last>Vasilescu</last></author>
+      <author id="ioana-vasilescu"><first>Ioana</first><last>Vasilescu</last></author>
       <author><first>Gemma</first><last>Serrano</last></author>
-      <author><first>Laurence</first><last>Devillers</last></author>
+      <author id="laurence-devillers"><first>Laurence</first><last>Devillers</last></author>
       <pages>4079–4087</pages>
       <abstract>In this paper, we present the methodology of corpus design that will be used to study the comparison of influence between linguistic nudges with positive or negative influences and three conversational agents: robot, smart speaker, and human. We recruited forty-nine participants to form six groups. The conversational agents first asked the participants about their willingness to adopt five ecological habits and invest time and money in ecological problems. The participants were then asked the same questions but preceded by one linguistic nudge with positive or negative influence. The comparison of standard deviation and mean metrics of differences between these two notes (before the nudge and after) showed that participants were mainly affected by nudges with positive influence, even though several nudges with negative influence decreased the average note. In addition, participants from all groups were willing to spend more money than time on ecological problems. In general, our experiment’s early results suggest that a machine agent can influence participants to the same degree as a human agent. A better understanding of the power of influence of different conversational machines and the potential of influence of nudges of different polarities will lead to the development of ethical norms of human-computer interactions.</abstract>
       <url hash="c7c60e29">2022.lrec-1.434</url>
@@ -5035,7 +5035,7 @@
       <author><first>Hsien-chin</first><last>Lin</last></author>
       <author><first>Michael</first><last>Heck</last></author>
       <author><first>Carel</first><last>van Niekerk</last></author>
-      <author><first>Milica</first><last>Gasic</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gasic</last></author>
       <pages>4096–4113</pages>
       <abstract>The ability to recognise emotions lends a conversational artificial intelligence a human touch. While emotions in chit-chat dialogues have received substantial attention, emotions in task-oriented dialogues remain largely unaddressed. This is despite emotions and dialogue success having equally important roles in a natural system. Existing emotion-annotated task-oriented corpora are limited in size, label richness, and public availability, creating a bottleneck for downstream tasks. To lay a foundation for studies on emotions in task-oriented dialogues, we introduce EmoWOZ, a large-scale manually emotion-annotated corpus of task-oriented dialogues. EmoWOZ is based on MultiWOZ, a multi-domain task-oriented dialogue dataset. It contains more than 11K dialogues with more than 83K emotion annotations of user utterances. In addition to Wizard-of-Oz dialogues from MultiWOZ, we collect human-machine dialogues within the same set of domains to sufficiently cover the space of various emotions that can happen during the lifetime of a data-driven dialogue system. To the best of our knowledge, this is the first large-scale open-source corpus of its kind. We propose a novel emotion labelling scheme, which is tailored to task-oriented dialogues. We report a set of experimental results to show the usability of this corpus for emotion recognition and state tracking in task-oriented dialogues.</abstract>
       <url hash="ebb2fadd">2022.lrec-1.436</url>
@@ -5091,7 +5091,7 @@
       <author><first>Yogesh</first><last>Dawer</last></author>
       <author><first>Bornini</first><last>Lahiri</last></author>
       <author><first>Akanksha</first><last>Bansal</last></author>
-      <author><first>Atul Kr.</first><last>Ojha</last></author>
+      <author id="atul-kr-ojha"><first>Atul Kr.</first><last>Ojha</last></author>
       <pages>4149–4161</pages>
       <abstract>In this paper, we discuss the development of a multilingual dataset annotated with a hierarchical, fine-grained tagset marking different types of aggression and the “context” in which they occur. The context, here, is defined by the conversational thread in which a specific comment occurs and also the “type” of discursive role that the comment is performing with respect to the previous comment. The initial dataset, being discussed here consists of a total 59,152 annotated comments in four languages - Meitei, Bangla, Hindi, and Indian English - collected from various social media platforms such as YouTube, Facebook, Twitter and Telegram. As is usual on social media websites, a large number of these comments are multilingual, mostly code-mixed with English. The paper gives a detailed description of the tagset being used for annotation and also the process of developing a multi-label, fine-grained tagset that has been used for marking comments with aggression and bias of various kinds including sexism (called gender bias in the tagset), religious intolerance (called communal bias in the tagset), class/caste bias and ethnic/racial bias. We also define and discuss the tags that have been used for marking the different discursive role being performed through the comments, such as attack, defend, etc. Finally, we present a basic statistical analysis of the dataset. The dataset is being incrementally made publicly available on the project website.</abstract>
       <url hash="73b9d979">2022.lrec-1.441</url>
@@ -5100,7 +5100,7 @@
     <paper id="442">
       <title><fixed-case>Tweet Emotion Dynamics</fixed-case>: Emotion Word Usage in Tweets from <fixed-case>US</fixed-case> and <fixed-case>C</fixed-case>anada</title>
       <author><first>Krishnapriya</first><last>Vishnubhotla</last></author>
-      <author><first>Saif M.</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif M.</first><last>Mohammad</last></author>
       <pages>4162–4176</pages>
       <abstract>Over the last decade, Twitter has emerged as one of the most influential forums for social, political, and health discourse. In this paper, we introduce a massive dataset of more than 45 million geo-located tweets posted between 2015 and 2021 from US and Canada (TUSC), especially curated for natural language analysis. We also introduce Tweet Emotion Dynamics (TED) — metrics to capture patterns of emotions associated with tweets over time. We use TED and TUSC to explore the use of emotion-associated words across US and Canada; across 2019 (pre-pandemic), 2020 (the year the pandemic hit), and 2021 (the second year of the pandemic); and across individual tweeters. We show that Canadian tweets tend to have higher valence, lower arousal, and higher dominance than the US tweets. Further, we show that the COVID-19 pandemic had a marked impact on the emotional signature of tweets posted in 2020, when compared to the adjoining years. Finally, we determine metrics of TED for 170,000 tweeters to benchmark characteristics of TED metrics at an aggregate level. TUSC and the metrics for TED will enable a wide variety of research on studying how we use language to express ourselves, persuade, communicate, and influence, with particularly promising applications in public health, affective science, social science, and psychology.</abstract>
       <url hash="ceaa5915">2022.lrec-1.442</url>
@@ -5123,7 +5123,7 @@
       <title>Life is not Always Depressing: Exploring the Happy Moments of People Diagnosed with Depression</title>
       <author><first>Ana-Maria</first><last>Bucur</last></author>
       <author><first>Adrian</first><last>Cosma</last></author>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <pages>4186–4192</pages>
       <abstract>In this work, we explore the relationship between depression and manifestations of happiness in social media. While the majority of works surrounding depression focus on symptoms, psychological research shows that there is a strong link between seeking happiness and being diagnosed with depression. We make use of Positive-Unlabeled learning paradigm to automatically extract happy moments from social media posts of both controls and users diagnosed with depression, and qualitatively analyze them with linguistic tools such as LIWC and keyness information. We show that the life of depressed individuals is not always bleak, with positive events related to friends and family being more noteworthy to their lives compared to the more mundane happy events reported by control users.</abstract>
       <url hash="c52d46cd">2022.lrec-1.444</url>
@@ -5172,7 +5172,7 @@
       <author><first>Shreyas</first><last>Sharma</last></author>
       <author><first>Kareem</first><last>Darwish</last></author>
       <author><first>Lucas</first><last>Pavanelli</last></author>
-      <author><first>Thiago</first><last>Castro Ferreira</last></author>
+      <author id="thiago-castro-ferreira"><first>Thiago</first><last>Castro Ferreira</last></author>
       <author><first>Mohamed</first><last>Al-Badrashiny</last></author>
       <author><first>Kamer Ali</first><last>Yuksel</last></author>
       <author><first>Hassan</first><last>Sawaf</last></author>
@@ -5196,7 +5196,7 @@
       <title>Transfer Learning Methods for Domain Adaptation in Technical Logbook Datasets</title>
       <author><first>Farhad</first><last>Akhbardeh</last></author>
       <author><first>Marcos</first><last>Zampieri</last></author>
-      <author><first>Cecilia Ovesdotter</first><last>Alm</last></author>
+      <author id="cecilia-ovesdotter-alm"><first>Cecilia Ovesdotter</first><last>Alm</last></author>
       <author><first>Travis</first><last>Desell</last></author>
       <pages>4235–4244</pages>
       <abstract>Event identification in technical logbooks poses challenges given the limited logbook data available in specific technical domains, the large set of possible classes, and logbook entries typically being in short form and non-standard technical language. Technical logbook data typically has both a domain, the field it comes from (e.g., automotive), and an application, what it is used for (e.g., maintenance). In order to better handle the problem of data scarcity, using a variety of technical logbook datasets, this paper investigates the benefits of using transfer learning from sources within the same domain (but different applications), from within the same application (but different domains) and from all available data. Results show that performing transfer learning within a domain provides statistically significant improvements, and in all cases but one the best performance. Interestingly, transfer learning from within the application or across the global dataset degrades results in all cases but one, which benefited from adding as much data as possible. A further analysis of the dataset similarities shows that the datasets with higher similarity scores performed better in transfer learning tasks, suggesting that this can be utilized to determine the effectiveness of adding a dataset in a transfer learning task for technical logbooks.</abstract>
@@ -5250,7 +5250,7 @@
       <author><first>Amélie</first><last>Chatelain</last></author>
       <author><first>Alessandro</first><last>Cappelli</last></author>
       <author><first>Iacopo</first><last>Poli</last></author>
-      <author><first>Djamé</first><last>Seddah</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
       <pages>4275–4284</pages>
       <abstract>Access to large pre-trained models of varied architectures, in many different languages, is central to the democratization of NLP. We introduce PAGnol, a collection of French GPT models. Using scaling laws, we efficiently train PAGnol-XL (1.5B parameters) with the same computational budget as CamemBERT, a model 13 times smaller. PAGnol-XL is the largest model trained from scratch for the French language. We plan to train increasingly large and performing versions of PAGnol, exploring the capabilities of French extreme-scale models. For this first release, we focus on the pre-training and scaling calculations underlining PAGnol. We fit a scaling law for compute for the French language, and compare it with its English counterpart. We find the pre-training dataset significantly conditions the quality of the outputs, with common datasets such as OSCAR leading to low-quality offensive text. We evaluate our models on discriminative and generative tasks in French, comparing to other state-of-the-art French and multilingual models, and reaching the state of the art in the abstract summarization task. Our research was conducted on the public GENCI Jean Zay supercomputer, and our models up to the Large are made publicly available.</abstract>
       <url hash="9f18bca3">2022.lrec-1.455</url>
@@ -5260,7 +5260,7 @@
       <title><fixed-case>CEPOC</fixed-case>: The <fixed-case>C</fixed-case>ambridge Exams Publishing Open Cloze dataset</title>
       <author><first>Mariano</first><last>Felice</last></author>
       <author><first>Shiva</first><last>Taslimipoor</last></author>
-      <author><first>Øistein E.</first><last>Andersen</last></author>
+      <author id="oistein-e-andersen"><first>Øistein E.</first><last>Andersen</last></author>
       <author><first>Paula</first><last>Buttery</last></author>
       <pages>4285–4290</pages>
       <abstract>Open cloze tests are a standard type of exercise where examinees must complete a text by filling in the gaps without any given options to choose from. This paper presents the Cambridge Exams Publishing Open Cloze (CEPOC) dataset, a collection of open cloze tests from world-renowned English language proficiency examinations. The tests in CEPOC have been expertly designed and validated using standard principles in language research and assessment. They are prepared for language learners at different proficiency levels and hence classified into different CEFR levels (A2, B1, B2, C1, C2). This resource can be a valuable testbed for various NLP tasks. We perform a complete set of experiments on three tasks: gap filling, gap prediction, and CEFR text classification. We implement transformer-based systems based on pre-trained language models to model each task and use our dataset as a test set, providing promising benchmark results.</abstract>
@@ -5293,7 +5293,7 @@
       <author><first>Nicolas</first><last>Hiebel</last></author>
       <author><first>Olivier</first><last>Ferret</last></author>
       <author><first>Karën</first><last>Fort</last></author>
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <pages>4306–4315</pages>
       <abstract>Modern Natural Language Processing relies on the availability of annotated corpora for training and evaluating models. Such resources are scarce, especially for specialized domains in languages other than English. In particular, there are very few resources for semantic similarity in the clinical domain in French. This can be useful for many biomedical natural language processing applications, including text generation. We introduce a definition of similarity that is guided by clinical facts and apply it to the development of a new French corpus of 1,000 sentence pairs manually annotated according to similarity scores. This new sentence similarity corpus is made freely available to the community. We further evaluate the corpus through experiments of automatic similarity measurement. We show that a model of sentence embeddings can capture similarity with state-of-the-art performance on the DEFT STS shared task evaluation data set (Spearman=0.8343). We also show that the corpus is complementary to DEFT STS.</abstract>
       <url hash="ef7948d8">2022.lrec-1.459</url>
@@ -5312,7 +5312,7 @@
       <title>Modeling Noise in Paraphrase Detection</title>
       <author><first>Teemu</first><last>Vahtola</last></author>
       <author><first>Eetu</first><last>Sjöblom</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <author><first>Mathias</first><last>Creutz</last></author>
       <pages>4324–4332</pages>
       <abstract>Noisy labels in training data present a challenging issue in classification tasks, misleading a model towards incorrect decisions during training. In this paper, we propose the use of a linear noise model to augment pre-trained language models to account for label noise in fine-tuning. We test our approach in a paraphrase detection task with various levels of noise and five different languages. Our experiments demonstrate the effectiveness of the additional noise model in making the training procedures more robust and stable. Furthermore, we show that this model can be applied without further knowledge about annotation confidence and reliability of individual training examples and we analyse our results in light of data selection and sampling strategies.</abstract>
@@ -5323,9 +5323,9 @@
       <title>Give me your Intentions, <fixed-case>I</fixed-case>’ll Predict our Actions: A Two-level Classification of Speech Acts for Crisis Management in Social Media</title>
       <author><first>Enzo</first><last>Laurenti</last></author>
       <author><first>Nils</first><last>Bourgon</last></author>
-      <author><first>Farah</first><last>Benamara</last></author>
+      <author id="farah-benamara"><first>Farah</first><last>Benamara</last></author>
       <author><first>Alda</first><last>Mari</last></author>
-      <author><first>Véronique</first><last>Moriceau</last></author>
+      <author id="veronique-moriceau"><first>Véronique</first><last>Moriceau</last></author>
       <author><first>Camille</first><last>Courgeon</last></author>
       <pages>4333–4343</pages>
       <abstract>Discovered by (Austin,1962) and extensively promoted by (Searle, 1975), speech acts (SA) have been the object of extensive discussion in the philosophical and the linguistic literature, as well as in computational linguistics where the detection of SA have shown to be an important step in many down stream NLP applications. In this paper, we attempt to measure for the first time the role of SA on urgency detection in tweets, focusing on natural disasters. Indeed, SA are particularly relevant to identify intentions, desires, plans and preferences towards action, providing therefore actionable information that will help to set priorities for the human teams and decide appropriate rescue actions. To this end, we come up here with four main contributions: (1) A two-layer annotation scheme of SA both at the tweet and subtweet levels, (2) A new French dataset of 6,669 tweets annotated for both urgency and SA, (3) An in-depth analysis of the annotation campaign, highlighting the correlation between SA and urgency categories, and (4) A set of deep learning experiments to detect SA in a crisis corpus. Our results show that SA are correlated with urgency which is a first important step towards SA-aware NLP-based crisis management on social media.</abstract>
@@ -5335,9 +5335,9 @@
     <paper id="463">
       <title>Towards a Cleaner Document-Oriented Multilingual Crawled Corpus</title>
       <author><first>Julien</first><last>Abadji</last></author>
-      <author><first>Pedro</first><last>Ortiz Suarez</last></author>
-      <author><first>Laurent</first><last>Romary</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="pedro-ortiz-suarez"><first>Pedro</first><last>Ortiz Suarez</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <pages>4344–4355</pages>
       <abstract>The need for large corpora raw corpora has dramatically increased in recent years with the introduction of transfer learning and semi-supervised learning methods to Natural Language Processing. And while there have been some recent attempts to manually curate the amount of data necessary to train large language models, the main way to obtain this data is still through automatic web crawling. In this paper we take the existing multilingual web corpus OSCAR and its pipeline Ungoliant that extracts and classifies data from Common Crawl at the line level, and propose a set of improvements and automatic annotations in order to produce a new document-oriented version of OSCAR that could prove more suitable to pre-train large generative language models as well as hopefully other applications in Natural Language Processing and Digital Humanities.</abstract>
       <url hash="4d69e739">2022.lrec-1.463</url>
@@ -5399,7 +5399,7 @@
       <title>A Semi-Automated Live Interlingual Communication Workflow Featuring Intralingual Respeaking: Evaluation and Benchmarking</title>
       <author><first>Tomasz</first><last>Korybski</last></author>
       <author><first>Elena</first><last>Davitti</last></author>
-      <author><first>Constantin</first><last>Orasan</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orasan</last></author>
       <author><first>Sabine</first><last>Braun</last></author>
       <pages>4405–4413</pages>
       <abstract>In this paper, we present a semi-automated workflow for live interlingual speech-to-text communication which seeks to reduce the shortcomings of existing ASR systems: a human respeaker works with a speaker-dependent speech recognition software (e.g., Dragon Naturally Speaking) to deliver punctuated same-language output of superior quality than obtained using out-of-the-box automatic speech recognition of the original speech. This is fed into a machine translation engine (the EU’s eTranslation) to produce live-caption ready text. We benchmark the quality of the output against the output of best-in-class (human) simultaneous interpreters working with the same source speeches from plenary sessions of the European Parliament. To evaluate the accuracy and facilitate the comparison between the two types of output, we use a tailored annotation approach based on the NTR model (Romero-Fresco and Pöchhacker, 2017). We find that the semi-automated workflow combining intralingual respeaking and machine translation is capable of generating outputs that are similar in terms of accuracy and completeness to the outputs produced in the benchmarking workflow, although the small scale of our experiment requires caution in interpreting this result.</abstract>
@@ -5433,14 +5433,14 @@
     </paper>
     <paper id="471">
       <title>The Search for Agreement on Logical Fallacy Annotation of an Infodemic</title>
-      <author><first>Claire</first><last>Bonial</last></author>
+      <author id="claire-bonial"><first>Claire</first><last>Bonial</last></author>
       <author><first>Austin</first><last>Blodgett</last></author>
       <author><first>Taylor</first><last>Hudson</last></author>
-      <author><first>Stephanie M.</first><last>Lukin</last></author>
+      <author id="stephanie-lukin"><first>Stephanie M.</first><last>Lukin</last></author>
       <author><first>Jeffrey</first><last>Micher</last></author>
       <author><first>Douglas</first><last>Summers-Stay</last></author>
       <author><first>Peter</first><last>Sutor</last></author>
-      <author><first>Clare</first><last>Voss</last></author>
+      <author id="clare-voss"><first>Clare</first><last>Voss</last></author>
       <pages>4430–4438</pages>
       <abstract>We evaluate an annotation schema for labeling logical fallacy types, originally developed for a crowd-sourcing annotation paradigm, now using an annotation paradigm of two trained linguist annotators. We apply the schema to a variety of different genres of text relating to the COVID-19 pandemic. Our linguist (as opposed to crowd-sourced) annotation of logical fallacies allows us to evaluate whether the annotation schema category labels are sufficiently clear and non-overlapping for both manual and, later, system assignment. We report inter-annotator agreement results over two annotation phases as well as a preliminary assessment of the corpus for training and testing a machine learning algorithm (Pattern-Exploiting Training) for fallacy detection and recognition. The agreement results and system performance underscore the challenging nature of this annotation task and suggest that the annotation schema and paradigm must be iteratively evaluated and refined in order to arrive at a set of annotation labels that can be reproduced by human annotators and, in turn, provide reliable training data for automatic detection and recognition systems.</abstract>
       <url hash="9dd4d5f5">2022.lrec-1.471</url>
@@ -5448,7 +5448,7 @@
     </paper>
     <paper id="472">
       <title>Recovering Patient Journeys: A Corpus of Biomedical Entities and Relations on <fixed-case>T</fixed-case>witter (<fixed-case>BEAR</fixed-case>)</title>
-      <author><first>Amelie</first><last>Wührl</last></author>
+      <author id="amelie-wuhrl"><first>Amelie</first><last>Wührl</last></author>
       <author><first>Roman</first><last>Klinger</last></author>
       <pages>4439–4450</pages>
       <abstract>Text mining and information extraction for the medical domain has focused on scientific text generated by researchers. However, their access to individual patient experiences or patient-doctor interactions is limited. On social media, doctors, patients and their relatives also discuss medical information. Individual information provided by laypeople complements the knowledge available in scientific text. It reflects the patient’s journey making the value of this type of data twofold: It offers direct access to people’s perspectives, and it might cover information that is not available elsewhere, including self-treatment or self-diagnose. Named entity recognition and relation extraction are methods to structure information that is available in unstructured text. However, existing medical social media corpora focused on a comparably small set of entities and relations. In contrast, we provide rich annotation layers to model patients’ experiences in detail. The corpus consists of medical tweets annotated with a fine-grained set of medical entities and relations between them, namely 14 entity (incl. environmental factors, diagnostics, biochemical processes, patients’ quality-of-life descriptions, pathogens, medical conditions, and treatments) and 20 relation classes (incl. prevents, influences, interactions, causes). The dataset consists of 2,100 tweets with approx. 6,000 entities and 2,200 relations.</abstract>
@@ -5467,7 +5467,7 @@
     </paper>
     <paper id="474">
       <title>Entity Linking over Nested Named Entities for <fixed-case>R</fixed-case>ussian</title>
-      <author><first>Natalia</first><last>Loukachevitch</last></author>
+      <author id="natalia-loukachevitch"><first>Natalia</first><last>Loukachevitch</last></author>
       <author><first>Pavel</first><last>Braslavski</last></author>
       <author><first>Vladimir</first><last>Ivanov</last></author>
       <author><first>Tatiana</first><last>Batura</last></author>
@@ -5486,7 +5486,7 @@
       <author><first>Rahul</first><last>Sharnagat</last></author>
       <author><first>Jyotsana</first><last>Khatri</last></author>
       <author><first>Diptesh</first><last>Kanojia</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>4467–4476</pages>
       <abstract>Named Entity Recognition (NER) is a foundational NLP task that aims to provide class labels like Person, Location, Organisation, Time, and Number to words in free text. Named Entities can also be multi-word expressions where the additional I-O-B annotation information helps label them during the NER annotation process. While English and European languages have considerable annotated data for the NER task, Indian languages lack on that front- both in terms of quantity and following annotation standards. This paper releases a significantly sized standard-abiding Hindi NER dataset containing 109,146 sentences and 2,220,856 tokens, annotated with 11 tags. We discuss the dataset statistics in all their essential detail and provide an in-depth analysis of the NER tag-set used with our data. The statistics of tag-set in our dataset shows a healthy per-tag distribution especially for prominent classes like Person, Location and Organisation. Since the proof of resource-effectiveness is in building models with the resource and testing the model on benchmark data and against the leader-board entries in shared tasks, we do the same with the aforesaid data. We use different language models to perform the sequence labelling task for NER and show the efficacy of our data by performing a comparative evaluation with models trained on another dataset available for the Hindi NER task. Our dataset helps achieve a weighted F1 score of 88.78 with all the tags and 92.22 when we collapse the tag-set, as discussed in the paper. To the best of our knowledge, no available dataset meets the standards of volume (amount) and variability (diversity), as far as Hindi NER is concerned. We fill this gap through this work, which we hope will significantly help NLP for Hindi. We release this dataset with our code and models for further research at <url>https://github.com/cfiltnlp/HiNER</url></abstract>
       <url hash="39dc5311">2022.lrec-1.475</url>
@@ -5496,7 +5496,7 @@
       <title>Bootstrapping Text Anonymization Models with Distant Supervision</title>
       <author><first>Anthi</first><last>Papadopoulou</last></author>
       <author><first>Pierre</first><last>Lison</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <author><first>Ildikó</first><last>Pilán</last></author>
       <pages>4477–4487</pages>
       <abstract>We propose a novel method to bootstrap text anonymization models based on distant supervision. Instead of requiring manually labeled training data, the approach relies on a knowledge graph expressing the background information assumed to be publicly available about various individuals. This knowledge graph is employed to automatically annotate text documents including personal data about a subset of those individuals. More precisely, the method determines which text spans ought to be masked in order to guarantee k-anonymity, assuming an adversary with access to both the text documents and the background information expressed in the knowledge graph. The resulting collection of labeled documents is then used as training data to fine-tune a pre-trained language model for text anonymization. We illustrate this approach using a knowledge graph extracted from Wikidata and short biographical texts from Wikipedia. Evaluation results with a RoBERTa-based model and a manually annotated collection of 553 summaries showcase the potential of the approach, but also unveil a number of issues that may arise if the knowledge graph is noisy or incomplete. The results also illustrate that, contrary to most sequence labeling problems, the text anonymization task may admit several alternative solutions.</abstract>
@@ -5518,7 +5518,7 @@
       <author><first>Kaushik</first><last>Gedela</last></author>
       <author><first>Alex</first><last>Marr</last></author>
       <author><first>Bart</first><last>Desmet</last></author>
-      <author><first>Carolyn</first><last>Rose</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rose</last></author>
       <author><first>Chunxiao</first><last>Zhou</last></author>
       <pages>4497–4503</pages>
       <abstract>Quality assurance (QA) is an essential though underdeveloped part of the data annotation process. Although QA is supported to some extent in existing annotation tools, comprehensive support for QA is not standardly provided. In this paper we contribute QA4IE, a comprehensive QA tool for information extraction, which can (1) detect potential problems in text annotations in a timely manner, (2) accurately assess the quality of annotations, (3) visually display and summarize annotation discrepancies among annotation team members, (4) provide a comprehensive statistics report, and (5) support viewing of annotated documents interactively. This paper offers a competitive analysis comparing QA4IE and other popular annotation tools and demonstrates its features, usage, and effectiveness through a case study. The Python code, documentation, and demonstration video are available publicly at <url>https://github.com/CC-RMD-EpiBio/QA4IE</url>.</abstract>
@@ -5528,7 +5528,7 @@
     <paper id="479">
       <title>A New Dataset for Topic-Based Paragraph Classification in Genocide-Related Court Transcripts</title>
       <author><first>Miriam</first><last>Schirmer</last></author>
-      <author><first>Udo</first><last>Kruschwitz</last></author>
+      <author id="udo-kruschwitz"><first>Udo</first><last>Kruschwitz</last></author>
       <author><first>Gregor</first><last>Donabauer</last></author>
       <pages>4504–4512</pages>
       <abstract>Recent progress in natural language processing has been impressive in many different areas with transformer-based approaches setting new benchmarks for a wide range of applications. This development has also lowered the barriers for people outside the NLP community to tap into the tools and resources applied to a variety of domain-specific applications. The bottleneck however still remains the lack of annotated gold-standard collections as soon as one’s research or professional interest falls outside the scope of what is readily available. One such area is genocide-related research (also including the work of experts who have a professional interest in accessing, exploring and searching large-scale document collections on the topic, such as lawyers). We present GTC (Genocide Transcript Corpus), the first annotated corpus of genocide-related court transcripts which serves three purposes: (1) to provide a first reference corpus for the community, (2) to establish benchmark performances (using state-of-the-art transformer-based approaches) for the new classification task of paragraph identification of violence-related witness statements, (3) to explore first steps towards transfer learning within the domain. We consider our contribution to be addressing in particular this year’s hot topic on Language Technology for All.</abstract>
@@ -5541,7 +5541,7 @@
       <author><first>Rinaldo</first><last>Lima</last></author>
       <author><first>Adrian-Gabriel</first><last>Chifu</last></author>
       <author><first>Bernard</first><last>Espinasse</last></author>
-      <author><first>Sébastien</first><last>Fournier</last></author>
+      <author id="sebastien-fournier"><first>Sébastien</first><last>Fournier</last></author>
       <pages>4513–4522</pages>
       <abstract>The Relation Extraction (RE) is an important basic Natural Language Processing (NLP) for many applications, such as search engines, recommender systems, question-answering systems and others. There are many studies in this subarea of NLP that continue to be explored, such as SemEval campaigns (2010 to 2018), or DDI Extraction (2013).For more than ten years, different RE systems using mainly statistical models have been proposed as well as the frameworks to develop them. This paper focuses on frameworks allowing to develop such RE systems using deep learning models. Such frameworks should make it possible to reproduce experiments of various deep learning models and pre-processing techniques proposed in various publications. Currently, there are very few frameworks of this type, and we propose a new open and optimizable framework, called DeepREF, which is inspired by the OpenNRE and REflex existing frameworks. DeepREF allows the employment of various deep learning models, to optimize their use, to identify the best inputs and to get better results with each data set for RE and compare with other experiments, making ablation studies possible. The DeepREF Framework is evaluated on several reference corpora from various application domains.</abstract>
       <url hash="aec10555">2022.lrec-1.480</url>
@@ -5570,7 +5570,7 @@
       <title>Using Sentence-level Classification Helps Entity Extraction from Material Science Literature</title>
       <author><first>Ankan</first><last>Mullick</last></author>
       <author><first>Shubhraneel</first><last>Pal</last></author>
-      <author><first>Tapas</first><last>Nayak</last></author>
+      <author id="tapas-nayak"><first>Tapas</first><last>Nayak</last></author>
       <author><first>Seung-Cheol</first><last>Lee</last></author>
       <author><first>Satadeep</first><last>Bhattacharjee</last></author>
       <author><first>Pawan</first><last>Goyal</last></author>
@@ -5602,7 +5602,7 @@
       <author><first>Frederic</first><last>Bechet</last></author>
       <author><first>Elie</first><last>Antoine</last></author>
       <author><first>Jérémy</first><last>Auguste</last></author>
-      <author><first>Géraldine</first><last>Damnati</last></author>
+      <author id="geraldine-damnati"><first>Géraldine</first><last>Damnati</last></author>
       <pages>4561–4568</pages>
       <abstract>This paper introduces the question answering paradigm as a way to explore digitized archive collections for Social Science studies. In particular, we are interested in evaluating largely studied question generation and question answering approaches on a new type of documents, as a step forward beyond traditional benchmark evaluations. Question generation can be used as a way to provide enhanced training material for Machine Reading Question Answering algorithms but also has its own purpose in this paradigm, where relevant questions can be used as a way to create explainable links between documents. To this end, generating large amounts of question is not the only motivation, but we need to include qualitative and semantic control to the generation process. We propose a new approach for question generation, relying on a BART Transformer based generative model, for which input data are enriched by semantic constraints. Question generation and answering are evaluated on several French corpora, and the whole approach is validated on a new corpus of digitized archive collection of a French Social Science journal.</abstract>
       <url hash="e41459c2">2022.lrec-1.486</url>
@@ -5610,7 +5610,7 @@
     </paper>
     <paper id="487">
       <title>Evaluating Retrieval for Multi-domain Scientific Publications</title>
-      <author><first>Nancy</first><last>Ide</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
       <author><first>Keith</first><last>Suderman</last></author>
       <author><first>Jingxuan</first><last>Tu</last></author>
       <author><first>Marc</first><last>Verhagen</last></author>
@@ -5618,7 +5618,7 @@
       <author><first>Ian</first><last>Ross</last></author>
       <author><first>John</first><last>Lawson</last></author>
       <author><first>Andrew</first><last>Borg</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <pages>4569–4576</pages>
       <abstract>This paper provides an overview of the xDD/LAPPS Grid framework and provides results of evaluating the AskMe retrievalengine using the BEIR benchmark datasets. Our primary goal is to determine a solid baseline of performance to guide furtherdevelopment of our retrieval capabilities. Beyond this, we aim to dig deeper to determine when and why certain approachesperform well (or badly) on both in-domain and out-of-domain data, an issue that has to date received relatively little attention.</abstract>
       <url hash="8d9a5944">2022.lrec-1.487</url>
@@ -5674,7 +5674,7 @@
       <title><fixed-case>P</fixed-case>hys<fixed-case>NLU</fixed-case>: A Language Resource for Evaluating Natural Language Understanding and Explanation Coherence in Physics</title>
       <author><first>Jordan</first><last>Meadows</last></author>
       <author><first>Zili</first><last>Zhou</last></author>
-      <author><first>André</first><last>Freitas</last></author>
+      <author id="andre-freitas"><first>André</first><last>Freitas</last></author>
       <pages>4611–4619</pages>
       <abstract>In order for language models to aid physics research, they must first encode representations of mathematical and natural language discourse which lead to coherent explanations, with correct ordering and relevance of statements. We present a collection of datasets developed to evaluate the performance of language models in this regard, which measure capabilities with respect to sentence ordering, position, section prediction, and discourse coherence. Analysis of the data reveals the classes of arguments and sub-disciplines which are most common in physics discourse, as well as the sentence-level frequency of equations and expressions. We present baselines that demonstrate how contemporary language models are challenged by coherence related tasks in physics, even when trained on mathematical natural language objectives.</abstract>
       <url hash="4cbeff7a">2022.lrec-1.492</url>
@@ -5682,12 +5682,12 @@
     </paper>
     <paper id="493">
       <title><fixed-case>HECTOR</fixed-case>: A Hybrid <fixed-case>TE</fixed-case>xt <fixed-case>S</fixed-case>implifi<fixed-case>C</fixed-case>ation <fixed-case>TO</fixed-case>ol for Raw Texts in <fixed-case>F</fixed-case>rench</title>
-      <author><first>Amalia</first><last>Todirascu</last></author>
+      <author id="amalia-todirascu"><first>Amalia</first><last>Todirascu</last></author>
       <author><first>Rodrigo</first><last>Wilkens</last></author>
       <author><first>Eva</first><last>Rolin</last></author>
       <author><first>Thomas</first><last>François</last></author>
       <author><first>Delphine</first><last>Bernhard</last></author>
-      <author><first>Núria</first><last>Gala</last></author>
+      <author id="nuria-gala"><first>Núria</first><last>Gala</last></author>
       <pages>4620–4630</pages>
       <abstract>Reducing the complexity of texts by applying an Automatic Text Simplification (ATS) system has been sparking interest inthe area of Natural Language Processing (NLP) for several years and a number of methods and evaluation campaigns haveemerged targeting lexical and syntactic transformations. In recent years, several studies exploit deep learning techniques basedon very large comparable corpora. Yet the lack of large amounts of corpora (original-simplified) for French has been hinderingthe development of an ATS tool for this language. In this paper, we present our system, which is based on a combination ofmethods relying on word embeddings for lexical simplification and rule-based strategies for syntax and discourse adaptations. We present an evaluation of the lexical, syntactic and discourse-level simplifications according to automatic and humanevaluations. We discuss the performances of our system at the lexical, syntactic, and discourse levels</abstract>
       <url hash="507355f0">2022.lrec-1.493</url>
@@ -5695,7 +5695,7 @@
     </paper>
     <paper id="494">
       <title><fixed-case>A</fixed-case>i<fixed-case>RO</fixed-case> - an Interactive Learning Tool for Children at Risk of Dyslexia</title>
-      <author><first>Peter Juel</first><last>Henrichsen</last></author>
+      <author id="peter-juel-henrichsen"><first>Peter Juel</first><last>Henrichsen</last></author>
       <author><first>Stine</first><last>Fuglsang Engmose</last></author>
       <pages>4631–4636</pages>
       <abstract>This paper presents the AiRO learning tool, which is designed for use in classrooms and homes by children at risk of developing dyslexia. The tool is based on the client-server architecture with a graphical and auditive front end (providing the interaction with the learner) and all NLP-related components located at the back end (analysing the pupil’s input, deciding on the system’s response, preparing speech synthesis and other feedback, logging the pupil’s performance etc). AiRO software consists of independent modules for easy maintenance, e.g., upgrading the didactics or preparing AiROs for other languages. This paper also reports on our first tests ‘in vivo’ (November 2021) with 49 pupils (aged 6). The subjects completed 16 AiRO sessions over a four-week period. The subjects were pre- and post-tested on spelling and reading. The experimental group significantly out-performed the control group, suggesting that a new IT-supported teaching strategy may be within reach. A collection of AiRO resources (language materials, software, synthetic voice) are available as open source. At LREC, we shall present a demo of the AiRO learning tool.</abstract>
@@ -5707,7 +5707,7 @@
       <author><first>Annika</first><last>Simonsen</last></author>
       <author><first>Sandra Saxov</first><last>Lamhauge</last></author>
       <author><first>Iben Nyholm</first><last>Debess</last></author>
-      <author><first>Peter Juel</first><last>Henrichsen</last></author>
+      <author id="peter-juel-henrichsen"><first>Peter Juel</first><last>Henrichsen</last></author>
       <pages>4637–4643</pages>
       <abstract>The biggest challenges we face in developing LR and LT for Faroese is the lack of existing resources. A few resources already exist for Faroese, but many of them are either of insufficient size and quality or are not easily accessible. Therefore, the Faroese ASR project, Ravnur, set out to make a BLARK for Faroese. The BLARK is still in the making, but many of its resources have already been produced or collected. The LR status is framed by mentioning existing LR of relevant size and quality. The specific components of the BLARK are presented as well as the working principles behind the BLARK. The BLARK will be a pillar in Faroese LR, being relatively substantial in both size, quality, and diversity. It will be open-source, inviting other small languages to use it as an inspiration to create their own BLARK. We comment on the faulty yet sprouting LT situation in the Faroe Islands. The LR and LT challenges are not solved with just a BLARK. Some initiatives are therefore proposed to better the prospects of Faroese LT. The open-source principle of the project should facilitate further development.</abstract>
       <url hash="f30693c4">2022.lrec-1.495</url>
@@ -5730,7 +5730,7 @@
       <author><first>Jennifer</first><last>Jacobs</last></author>
       <author><first>Charis</first><last>Harty</last></author>
       <author><first>Margaret</first><last>Perkoff</last></author>
-      <author><first>James H.</first><last>Martin</last></author>
+      <author id="james-h-martin"><first>James H.</first><last>Martin</last></author>
       <author><first>Tamara</first><last>Sumner</last></author>
       <pages>4654–4662</pages>
       <abstract>Transcripts of teaching episodes can be effective tools to understand discourse patterns in classroom instruction. According to most educational experts, sustained classroom discourse is a critical component of equitable, engaging, and rich learning environments for students. This paper describes the TalkMoves dataset, composed of 567 human-annotated K-12 mathematics lesson transcripts (including entire lessons or portions of lessons) derived from video recordings. The set of transcripts primarily includes in-person lessons with whole-class discussions and/or small group work, as well as some online lessons. All of the transcripts are human-transcribed, segmented by the speaker (teacher or student), and annotated at the sentence level for ten discursive moves based on accountable talk theory. In addition, the transcripts include utterance-level information in the form of dialogue act labels based on the Switchboard Dialog Act Corpus. The dataset can be used by educators, policymakers, and researchers to understand the nature of teacher and student discourse in K-12 math classrooms. Portions of this dataset have been used to develop the TalkMoves application, which provides teachers with automated, immediate, and actionable feedback about their mathematics instruction.</abstract>
@@ -5776,7 +5776,7 @@
       <title>A Benchmark Corpus for the Detection of Automatically Generated Text in Academic Publications</title>
       <author><first>Vijini</first><last>Liyanage</last></author>
       <author><first>Davide</first><last>Buscaldi</last></author>
-      <author><first>Adeline</first><last>Nazarenko</last></author>
+      <author id="adeline-nazarenko"><first>Adeline</first><last>Nazarenko</last></author>
       <pages>4692–4700</pages>
       <abstract>Automatic text generation based on neural language models has achieved performance levels that make the generated text almost indistinguishable from those written by humans. Despite the value that text generation can have in various applications, it can also be employed for malicious tasks. The diffusion of such practices represent a threat to the quality of academic publishing. To address these problems, we propose in this paper two datasets comprised of artificially generated research content: a completely synthetic dataset and a partial text substitution dataset. In the first case, the content is completely generated by the GPT-2 model after a short prompt extracted from original papers. The partial or hybrid dataset is created by replacing several sentences of abstracts with sentences that are generated by the Arxiv-NLP model. We evaluate the quality of the datasets comparing the generated texts to aligned original texts using fluency metrics such as BLEU and ROUGE. The more natural the artificial texts seem, the more difficult they are to detect and the better is the benchmark. We also evaluate the difficulty of the task of distinguishing original from generated text by using state-of-the-art classification models.</abstract>
       <url hash="c1c26430">2022.lrec-1.501</url>
@@ -5809,7 +5809,7 @@
       <title>Text Classification and Prediction in the Legal Domain</title>
       <author><first>Minh-Quoc</first><last>Nghiem</last></author>
       <author><first>Paul</first><last>Baylis</last></author>
-      <author><first>André</first><last>Freitas</last></author>
+      <author id="andre-freitas"><first>André</first><last>Freitas</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
       <pages>4717–4722</pages>
       <abstract>We present a case study on the application of text classification and legal judgment prediction for flight compensation. We combine transformer-based classification models to classify responses from airlines and incorporate text data with other data types to predict a legal claim being successful. Our experimental evaluations show that our models achieve consistent and significant improvements over baselines and even outperformed human prediction when predicting a claim being successful. These models were integrated into an existing claim management system, providing substantial productivity gains for handling the case lifecycle, currently supporting several thousands of monthly processes.</abstract>
@@ -5874,7 +5874,7 @@
       <author><first>Matthieu</first><last>Allain</last></author>
       <author><first>Urszula</first><last>Czerwinska</last></author>
       <author><first>Amaury</first><last>Fouret</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <author><first>Rachel</first><last>Bawden</last></author>
       <pages>4754–4766</pages>
       <abstract>Detecting divergences in the applications of the law (where the same legal text is applied differently by two rulings) is an important task. It is the mission of the French Cour de Cassation. The first step in the detection of divergences is to detect similar cases, which is currently done manually by experts. They rely on summarised versions of the rulings (syntheses and keyword sequences), which are currently produced manually and are not available for all rulings. There is also a high degree of variability in the keyword choices and the level of granularity used. In this article, we therefore aim to provide automatic tools to facilitate the search for similar rulings. We do this by (i) providing automatic keyword sequence generation models, which can be used to improve the coverage of the analysis, and (ii) providing measures of similarity based on the available texts and augmented with predicted keyword sequences. Our experiments show that the predictions improve correlations of automatically obtained similarities against our specially colelcted human judgments of similarity.</abstract>
@@ -5994,7 +5994,7 @@
       <title>Building Dataset for Grounding of Formulae — Annotating Coreference Relations Among Math Identifiers</title>
       <author><first>Takuto</first><last>Asakura</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>4851–4858</pages>
       <abstract>Grounding the meaning of each symbol in math formulae is important for automated understanding of scientific documents. Generally speaking, the meanings of math symbols are not necessarily constant, and the same symbol is used in multiple meanings. Therefore, coreference relations between symbols need to be identified for grounding, and the task has aspects of both description alignment and coreference analysis. In this study, we annotated 15 papers selected from arXiv.org with the grounding information. In total, 12,352 occurrences of math identifiers in these papers were annotated, and all coreference relations between them were made explicit in each paper. The constructed dataset shows that regardless of the ambiguity of symbols in math formulae, coreference relations can be labeled with a high inter-annotator agreement. The constructed dataset enables us to achieve automation of formula grounding, and in turn, make deeper use of the knowledge in scientific documents using techniques such as math information extraction. The built grounding dataset is available at <url>https://sigmathling.kwarc.info/resources/grounding-</url> dataset/.</abstract>
       <url hash="64b4ca4a">2022.lrec-1.519</url>
@@ -6005,9 +6005,9 @@
       <author><first>Anna</first><last>Nedoluzhko</last></author>
       <author><first>Michal</first><last>Novák</last></author>
       <author><first>Martin</first><last>Popel</last></author>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
       <author><first>Amir</first><last>Zeldes</last></author>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <pages>4859–4872</pages>
       <abstract>Recent advances in standardization for annotated language resources have led to successful large scale efforts, such as the Universal Dependencies (UD) project for multilingual syntactically annotated data. By comparison, the important task of coreference resolution, which clusters multiple mentions of entities in a text, has yet to be standardized in terms of data formats or annotation guidelines. In this paper we present CorefUD, a multilingual collection of corpora and a standardized format for coreference resolution, compatible with morphosyntactic annotations in the UD framework and including facilities for related tasks such as named entity recognition, which forms a first step in the direction of convergence for coreference resolution across languages.</abstract>
       <url hash="20c31b8c">2022.lrec-1.520</url>
@@ -6017,10 +6017,10 @@
       <title>The Universal Anaphora Scorer</title>
       <author><first>Juntao</first><last>Yu</last></author>
       <author><first>Sopan</first><last>Khosla</last></author>
-      <author><first>Nafise Sadat</first><last>Moosavi</last></author>
+      <author id="nafise-sadat-moosavi"><first>Nafise Sadat</first><last>Moosavi</last></author>
       <author><first>Silviu</first><last>Paun</last></author>
-      <author><first>Sameer</first><last>Pradhan</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="sameer-pradhan"><first>Sameer</first><last>Pradhan</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>4873–4883</pages>
       <abstract>The aim of the Universal Anaphora initiative is to push forward the state of the art in anaphora and anaphora resolution by expanding the aspects of anaphoric interpretation which are or can be reliably annotated in anaphoric corpora, producing unified standards to annotate and encode these annotations, deliver datasets encoded according to these standards, and developing methods for evaluating models carrying out this type of interpretation. Such expansion of the scope of anaphora resolution requires a comparable expansion of the scope of the scorers used to evaluate this work. In this paper, we introduce an extended version of the Reference Coreference Scorer (Pradhan et al., 2014) that can be used to evaluate the extended range of anaphoric interpretation included in the current Universal Anaphora proposal. The UA scorer supports the evaluation of identity anaphora resolution and of bridging reference resolution, for which scorers already existed but not integrated in a single package. It also supports the evaluation of split antecedent anaphora and discourse deixis, for which no tools existed. The proposed approach to the evaluation of split antecedent anaphora is entirely novel; the proposed approach to the evaluation of discourse deixis leverages the encoding of discourse deixis proposed in Universal Anaphora to enable the use for discourse deixis of the same metrics already used for identity anaphora. The scorer was tested in the recent CODI-CRAC 2021 Shared Task on Anaphora Resolution in Dialogues.</abstract>
       <url hash="3c72f0f5">2022.lrec-1.521</url>
@@ -6080,7 +6080,7 @@
       <title>Evaluating Pre-training Objectives for Low-Resource Translation into Morphologically Rich Languages</title>
       <author><first>Prajit</first><last>Dhar</last></author>
       <author><first>Arianna</first><last>Bisazza</last></author>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <pages>4933–4943</pages>
       <abstract>The scarcity of parallel data is a major limitation for Neural Machine Translation (NMT) systems, in particular for translation into morphologically rich languages (MRLs). An important way to overcome the lack of parallel data is to leverage target monolingual data, which is typically more abundant and easier to collect. We evaluate a number of techniques to achieve this, ranging from back-translation to random token masking, on the challenging task of translating English into four typologically diverse MRLs, under low-resource settings. Additionally, we introduce Inflection Pre-Training (or PT-Inflect), a novel pre-training objective whereby the NMT system is pre-trained on the task of re-inflecting lemmatized target sentences before being trained on standard source-to-target language translation. We conduct our evaluation on four typologically diverse target MRLs, and find that PT-Inflect surpasses NMT systems trained only on parallel data. While PT-Inflect is outperformed by back-translation overall, combining the two techniques leads to gains in some of the evaluated language pairs.</abstract>
       <url hash="1a654a35">2022.lrec-1.527</url>
@@ -6090,7 +6090,7 @@
       <title>Aligning Images and Text with Semantic Role Labels for Fine-Grained Cross-Modal Understanding</title>
       <author><first>Abhidip</first><last>Bhattacharyya</last></author>
       <author><first>Cecilia</first><last>Mauceri</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Christoffer</first><last>Heckman</last></author>
       <pages>4944–4954</pages>
       <abstract>As vision processing and natural language processing continue to advance, there is increasing interest in multimodal applications, such as image retrieval, caption generation, and human-robot interaction. These tasks require close alignment between the information in the images and text. In this paper, we present a new multimodal dataset that combines state of the art semantic annotation for language with the bounding boxes of corresponding images. This richer multimodal labeling supports cross-modal inference for applications in which such alignment is useful. Our semantic representations, developed in the natural language processing community, abstract away from the surface structure of the sentence, focusing on specific actions and the roles of their participants, a level that is equally relevant to images. We then utilize these representations in the form of semantic role labels in the captions and the images and demonstrate improvements in standard tasks such as image retrieval. The potential contributions of these additional labels is evaluated using a role-aware retrieval system based on graph convolutional and recurrent neural networks. The addition of semantic roles into this system provides a significant increase in capability and greater flexibility for these tasks, and could be extended to state-of-the-art techniques relying on transformers with larger amounts of annotated data.</abstract>
@@ -6100,13 +6100,13 @@
     <paper id="529">
       <title>Rosetta-<fixed-case>LSF</fixed-case>: an Aligned Corpus of <fixed-case>F</fixed-case>rench <fixed-case>S</fixed-case>ign <fixed-case>L</fixed-case>anguage and <fixed-case>F</fixed-case>rench for Text-to-Sign Translation</title>
       <author><first>Elise</first><last>Bertin-Lemée</last></author>
-      <author><first>Annelies</first><last>Braffort</last></author>
+      <author id="annelies-braffort"><first>Annelies</first><last>Braffort</last></author>
       <author><first>Camille</first><last>Challant</last></author>
       <author><first>Claire</first><last>Danet</last></author>
       <author><first>Boris</first><last>Dauriac</last></author>
       <author><first>Michael</first><last>Filhol</last></author>
       <author><first>Emmanuella</first><last>Martinod</last></author>
-      <author><first>Jérémie</first><last>Segouat</last></author>
+      <author id="jeremie-segouat"><first>Jérémie</first><last>Segouat</last></author>
       <pages>4955–4962</pages>
       <abstract>This article presents a new French Sign Language (LSF) corpus called “Rosetta-LSF”. It was created to support future studies on the automatic translation of written French into LSF, rendered through the animation of a virtual signer. An overview of the field highlights the importance of a quality representation of LSF. In order to obtain quality animations understandable by signers, it must surpass the simple “gloss transcription” of the LSF lexical units to use in the discourse. To achieve this, we designed a corpus composed of four types of aligned data, and evaluated its usability. These are: news headlines in French, translations of these headlines into LSF in the form of videos showing animations of a virtual signer, gloss annotations of the “traditional” type—although including additional information on the context in which each gestural unit is performed as well as their potential for adaptation to another context—and AZee representations of the videos, i.e. formal expressions capturing the necessary and sufficient linguistic information. This article describes this data, exhibiting an example from the corpus. It is available online for public research.</abstract>
       <url hash="33a12bb6">2022.lrec-1.529</url>
@@ -6116,14 +6116,14 @@
       <title><fixed-case>MLQE</fixed-case>-<fixed-case>PE</fixed-case>: A Multilingual Quality Estimation and Post-Editing Dataset</title>
       <author><first>Marina</first><last>Fomicheva</last></author>
       <author><first>Shuo</first><last>Sun</last></author>
-      <author><first>Erick</first><last>Fonseca</last></author>
+      <author id="erick-fonseca"><first>Erick</first><last>Fonseca</last></author>
       <author><first>Chrysoula</first><last>Zerva</last></author>
-      <author><first>Frédéric</first><last>Blain</last></author>
+      <author id="frederic-blain"><first>Frédéric</first><last>Blain</last></author>
       <author><first>Vishrav</first><last>Chaudhary</last></author>
-      <author><first>Francisco</first><last>Guzmán</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzmán</last></author>
       <author><first>Nina</first><last>Lopatina</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
-      <author><first>André F. T.</first><last>Martins</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
       <pages>4963–4974</pages>
       <abstract>We present MLQE-PE, a new dataset for Machine Translation (MT) Quality Estimation (QE) and Automatic Post-Editing (APE). The dataset contains annotations for eleven language pairs, including both high- and low-resource languages. Specifically, it is annotated for translation quality with human labels for up to 10,000 translations per language pair in the following formats: sentence-level direct assessments and post-editing effort, and word-level binary good/bad labels. Apart from the quality-related scores, each source-translation sentence pair is accompanied by the corresponding post-edited sentence, as well as titles of the articles where the sentences were extracted from, and information on the neural MT models used to translate the text. We provide a thorough description of the data collection and annotation process as well as an analysis of the annotation distribution for each language pair. We also report the performance of baseline systems trained on the MLQE-PE dataset. The dataset is freely available and has already been used for several WMT shared tasks.</abstract>
       <url hash="c3959ea4">2022.lrec-1.530</url>
@@ -6134,7 +6134,7 @@
       <author><first>Sangwhan</first><last>Moon</last></author>
       <author><first>Won Ik</first><last>Cho</last></author>
       <author><first>Hye Joo</first><last>Han</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Nam Soo</first><last>Kim</last></author>
       <pages>4975–4983</pages>
       <abstract>Korean is a language with complex morphology that uses spaces at larger-than-word boundaries, unlike other East-Asian languages. While morpheme-based text generation can provide significant semantic advantages compared to commonly used character-level approaches, Korean morphological analyzers only provide a sequence of morpheme-level tokens, losing information in the tokenization process. Two crucial issues are the loss of spacing information and subcharacter level morpheme normalization, both of which make the tokenization result challenging to reconstruct the original input string, deterring the application to generative tasks. As this problem originates from the conventional scheme used when creating a POS tagging corpus, we propose an improvement to the existing scheme, which makes it friendlier to generative tasks. On top of that, we suggest a fully-automatic annotation of a corpus by leveraging public analyzers. We vote the surface and POS from the outcome and fill the sequence with the selected morphemes, yielding tokenization with a decent quality that incorporates space information. Our scheme is verified via an evaluation done on an external corpus, and subsequently, it is adapted to Korean Wikipedia to construct an open, permissive resource. We compare morphological analyzer performance trained on our corpus with existing methods, then perform an extrinsic evaluation on a downstream task.</abstract>
@@ -6152,7 +6152,7 @@
     </paper>
     <paper id="533">
       <title>A <fixed-case>H</fixed-case>mong Corpus with Elaborate Expression Annotations</title>
-      <author><first>David R.</first><last>Mortensen</last></author>
+      <author id="david-r-mortensen"><first>David R.</first><last>Mortensen</last></author>
       <author><first>Xinyu</first><last>Zhang</last></author>
       <author><first>Chenxuan</first><last>Cui</last></author>
       <author><first>Katherine J.</first><last>Zhang</last></author>
@@ -6164,7 +6164,7 @@
     <paper id="534">
       <title><fixed-case>ELAL</fixed-case>: An Emotion Lexicon for the Analysis of <fixed-case>A</fixed-case>lsatian Theatre Plays</title>
       <author><first>Delphine</first><last>Bernhard</last></author>
-      <author><first>Pablo</first><last>Ruiz Fabo</last></author>
+      <author id="pablo-ruiz-fabo"><first>Pablo</first><last>Ruiz Fabo</last></author>
       <pages>5001–5010</pages>
       <abstract>In this work, we present a novel and manually corrected emotion lexicon for the Alsatian dialects, including graphical variants of Alsatian lexical items. These High German dialects are spoken in the North-East of France. They are used mainly orally, and thus lack a stable and consensual spelling convention. There has nevertheless been a continuous literary production since the middle of the 17th century and, in particular, theatre plays. A large sample of Alsatian theatre plays is currently being encoded according to the Text Encoding Initiative (TEI) Guidelines. The emotion lexicon will be used to perform automatic emotion analysis in this corpus of theatre plays. We used a graph-based approach to deriving emotion scores and translations, relying only on bilingual lexicons, cognates and spelling variants. The source lexicons for emotion scores are the NRC Valence Arousal and Dominance and NRC Emotion Intensity lexicons.</abstract>
       <url hash="b798e6f0">2022.lrec-1.534</url>
@@ -6175,7 +6175,7 @@
       <author><first>Robert</first><last>Pugh</last></author>
       <author><first>Marivel</first><last>Huerta Mendez</last></author>
       <author><first>Mitsuya</first><last>Sasaki</last></author>
-      <author><first>Francis</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last></author>
       <pages>5011–5020</pages>
       <abstract>We present a morpho-syntactically-annotated corpus of Western Sierra Puebla Nahuatl that conforms to the annotation guidelines of the Universal Dependencies project. We describe the sources of the texts that make up the corpus, the annotation process, and important annotation decisions made throughout the development of the corpus. As the first indigenous language of Mexico to be added to the Universal Dependencies project, this corpus offers a good opportunity to test and more clearly define annotation guidelines for the Meso-american linguistic area, spontaneous and elicited spoken data, and code-switching.</abstract>
       <url hash="c2cb3a13">2022.lrec-1.535</url>
@@ -6184,7 +6184,7 @@
     <paper id="536">
       <title>The Construction and Evaluation of the <fixed-case>LEAFTOP</fixed-case> Dataset of Automatically Extracted Nouns in 1480 Languages</title>
       <author><first>Gregory</first><last>Baker</last></author>
-      <author><first>Diego</first><last>Molla</last></author>
+      <author id="diego-molla"><first>Diego</first><last>Molla</last></author>
       <pages>5021–5028</pages>
       <abstract>The LEAFTOP (language extracted automatically from thousands of passages) dataset consists of nouns that appear in multiple places in the four gospels of the New Testament. We use a naive approach — probabilistic inference — to identify likely translations in 1480 other languages. We evaluate this process and find that it provides lexiconaries with accuracy from 42% (Korafe) to 99% (Runyankole), averaging 72% correct across evaluated languages. The process translates up to 161 distinct lemmas from Koine Greek (average 159). We identify nouns which appear to be easy and hard to translate, language families where this technique works, and future possible improvements and extensions. The claims to novelty are: the use of a Koine Greek New Testament as the source language; using a fully-annotated manually-created grammatically parse of the source text; a custom scraper for texts in the target languages; a new metric for language similarity; a novel strategy for evaluation on low-resource languages.</abstract>
       <url hash="80bb0600">2022.lrec-1.536</url>
@@ -6244,7 +6244,7 @@
     <paper id="541">
       <title>Standard <fixed-case>G</fixed-case>erman Subtitling of <fixed-case>S</fixed-case>wiss <fixed-case>G</fixed-case>erman <fixed-case>TV</fixed-case> content: the <fixed-case>PASSAGE</fixed-case> Project</title>
       <author><first>Jonathan David</first><last>Mutal</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Johanna</first><last>Gerlach</last></author>
       <author><first>Veronika</first><last>Haberkorn</last></author>
       <pages>5063–5070</pages>
@@ -6322,20 +6322,20 @@
     </paper>
     <paper id="548">
       <title><fixed-case>L</fixed-case>atvian National Corpora Collection – Korpuss.lv</title>
-      <author><first>Baiba</first><last>Saulite</last></author>
+      <author id="baiba-saulite"><first>Baiba</first><last>Saulite</last></author>
       <author><first>Roberts</first><last>Darģis</last></author>
-      <author><first>Normunds</first><last>Gruzitis</last></author>
+      <author id="normunds-gruzitis"><first>Normunds</first><last>Gruzitis</last></author>
       <author><first>Ilze</first><last>Auzina</last></author>
-      <author><first>Kristīne</first><last>Levāne-Petrova</last></author>
-      <author><first>Lauma</first><last>Pretkalniņa</last></author>
+      <author id="kristine-levane-petrova"><first>Kristīne</first><last>Levāne-Petrova</last></author>
+      <author id="lauma-pretkalnina"><first>Lauma</first><last>Pretkalniņa</last></author>
       <author><first>Laura</first><last>Rituma</last></author>
-      <author><first>Peteris</first><last>Paikens</last></author>
-      <author><first>Arturs</first><last>Znotins</last></author>
+      <author id="peteris-paikens"><first>Peteris</first><last>Paikens</last></author>
+      <author id="arturs-znotins"><first>Arturs</first><last>Znotins</last></author>
       <author><first>Laine</first><last>Strankale</last></author>
       <author><first>Kristīne</first><last>Pokratniece</last></author>
       <author><first>Ilmārs</first><last>Poikāns</last></author>
-      <author><first>Guntis</first><last>Barzdins</last></author>
-      <author><first>Inguna</first><last>Skadiņa</last></author>
+      <author id="guntis-barzdins"><first>Guntis</first><last>Barzdins</last></author>
+      <author id="inguna-skadina"><first>Inguna</first><last>Skadiņa</last></author>
       <author><first>Anda</first><last>Baklāne</last></author>
       <author><first>Valdis</first><last>Saulespurēns</last></author>
       <author><first>Jānis</first><last>Ziediņš</last></author>
@@ -6349,7 +6349,7 @@
       <author><first>Ioan-Bogdan</first><last>Iordache</last></author>
       <author><first>Ana Sabina</first><last>Uban</last></author>
       <author><first>Catalin</first><last>Stoean</last></author>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <pages>5130–5136</pages>
       <abstract>A new data set is gathered from a Romanian financial news website for the duration of four years. It is further refined to extract only information related to one company by selecting only paragraphs and even sentences that referred to it. The relation between the extracted sentiment scores of the texts and the stock prices from the corresponding dates is investigated using various approaches like the lexicon-based Vader tool, Financial BERT, as well as Transformer-based models. Automated translation is used, since some models could be only applied for texts in English. It is encouraging that all models, be that they are applied to Romanian or English texts, indicate a correlation between the sentiment scores and the increase or decrease of the stock closing prices.</abstract>
       <url hash="211987c5">2022.lrec-1.549</url>
@@ -6358,8 +6358,8 @@
     <paper id="550">
       <title>A Free/Open-Source Morphological Analyser and Generator for Sakha</title>
       <author><first>Sardana</first><last>Ivanova</last></author>
-      <author><first>Jonathan</first><last>Washington</last></author>
-      <author><first>Francis</first><last>Tyers</last></author>
+      <author id="jonathan-washington"><first>Jonathan</first><last>Washington</last></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last></author>
       <pages>5137–5142</pages>
       <abstract>We present, to our knowledge, the first ever published morphological analyser and generator for Sakha, a marginalised language of Siberia. The transducer, developed using HFST, has coverage of solidly above 90%, and high precision. In the development of the analyser, we have expanded linguistic knowledge about Sakha, and developed strategies for complex grammatical patterns. The transducer is already being used in downstream tasks, including computer assisted language learning applications for linguistic maintenance and computational linguistic shared tasks.</abstract>
       <url hash="595d8dac">2022.lrec-1.550</url>
@@ -6428,7 +6428,7 @@
     <paper id="556">
       <title>Thematic Fit Bits: Annotation Quality and Quantity Interplay for Event Participant Representation</title>
       <author><first>Yuval</first><last>Marton</last></author>
-      <author><first>Asad</first><last>Sayeed</last></author>
+      <author id="asad-sayeed"><first>Asad</first><last>Sayeed</last></author>
       <pages>5188–5197</pages>
       <abstract>Modeling thematic fit (a verb-argument compositional semantics task) currently requires a very large burden of labeled data. We take a linguistically machine-annotated large corpus and replace corpus layers with output from higher-quality, more modern taggers. We compare the old and new corpus versions’ impact on a verb-argument fit modeling task, using a high-performing neural approach. We discover that higher annotation quality dramatically reduces our data requirement while demonstrating better supervised predicate-argument classification. But in applying the model to psycholinguistic tasks outside the training objective, we see clear gains at scale, but only in one of two thematic fit estimation tasks, and no clear gains on the other. We also see that quality improves with training size, but perhaps plateauing or even declining in one task. Last, we tested the effect of role set size. All this suggests that the quality/quantity interplay is not all you need. We replicate previous studies while modifying certain role representation details and set a new state-of-the-art in event modeling, using a fraction of the data. We make the new corpus version public.</abstract>
       <url hash="7f98c0eb">2022.lrec-1.556</url>
@@ -6477,7 +6477,7 @@
     <paper id="561">
       <title>The Automatic Extraction of Linguistic Biomarkers as a Viable Solution for the Early Diagnosis of Mental Disorders</title>
       <author><first>Gloria</first><last>Gagliardi</last></author>
-      <author><first>Fabio</first><last>Tamburini</last></author>
+      <author id="fabio-tamburini"><first>Fabio</first><last>Tamburini</last></author>
       <pages>5234–5242</pages>
       <abstract>Digital Linguistic Biomarkers extracted from spontaneous language productions proved to be very useful for the early detection of various mental disorders. This paper presents a computational pipeline for the automatic processing of oral and written texts: the tool enables the computation of a rich set of linguistic features at the acoustic, rhythmic, lexical, and morphosyntactic levels. Several applications of the instrument - for the detection of Mild Cognitive Impairments, Anorexia Nervosa, and Developmental Language Disorders - are also briefly discussed.</abstract>
       <url hash="a558bb62">2022.lrec-1.561</url>
@@ -6494,7 +6494,7 @@
     </paper>
     <paper id="563">
       <title><fixed-case>COSMOS</fixed-case>: Experimental and Comparative Studies of Concept Representations in Schoolchildren</title>
-      <author><first>Jeanne</first><last>Villaneau</last></author>
+      <author id="jeanne-villaneau"><first>Jeanne</first><last>Villaneau</last></author>
       <author><first>Farida</first><last>Said</last></author>
       <pages>5251–5260</pages>
       <abstract>COSMOS is a multidisciplinary research project investigating schoolchildren’s beliefs and representations of specific concepts under control variables (age, gender, language spoken at home). Seven concepts are studied: <i>friend, father, mother, villain, work, television</i> and <i>dog</i>. We first present the protocol used and the data collected from a survey of 184 children in two age groups (6-7 and 9-11 years) in four schools in Brittany (France). A word-level lexical study shows that children’s linguistic proficiency and lexical diversity increase with age, and we observe an interaction effect between gender and age on lexical diversity as measured with MLR (Measure of Lexical Richness). In contrast, none of the control variables affects lexical density. We also present the lemmas that schoolchildren most often associate with each concept. Generalized linear mixed-effects models reveal significant effects of age, gender, and home language on some concept-lemma associations and specific interactions between age and gender. Most of the identified effects are documented in the child development literature. To better understand the process of semantic construction in children, additional lexical analyses at the n-gram, chunk, and clause levels would be helpful. We briefly present ongoing and planned work in this direction. The COSMOS data will soon be made freely available to the scientific community.</abstract>
@@ -6504,7 +6504,7 @@
     <paper id="564">
       <title>Features of Perceived Metaphoricity on the Discourse Level: Abstractness and Emotionality</title>
       <author><first>Prisca</first><last>Piccirilli</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>5261–5273</pages>
       <abstract>Research on metaphorical language has shown ties between abstractness and emotionality with regard to metaphoricity; prior work is however limited to the word and sentence levels, and up to date there is no empirical study establishing the extent to which this is also true on the discourse level. This paper explores which textual and perceptual features human annotators perceive as important for the metaphoricity of discourses and expressions, and addresses two research questions more specifically. First, is a metaphorically-perceived discourse more abstract and more emotional in comparison to a literally- perceived discourse? Second, is a metaphorical expression preceded by a more metaphorical/abstract/emotional context than a synonymous literal alternative? We used a dataset of 1,000 corpus-extracted discourses for which crowdsourced annotators (1) provided judgements on whether they perceived the discourses as more metaphorical or more literal, and (2) systematically listed lexical terms which triggered their decisions in (1). Our results indicate that metaphorical discourses are more emotional and to a certain extent more abstract than literal discourses. However, neither the metaphoricity nor the abstractness and emotionality of the preceding discourse seem to play a role in triggering the choice between synonymous metaphorical vs. literal expressions. Our dataset is available at <url>https://www.ims.uni-stuttgart.de/data/discourse-met-lit</url>.</abstract>
       <url hash="6da1c3ad">2022.lrec-1.564</url>
@@ -6517,7 +6517,7 @@
       <author><first>Nihar</first><last>Sahoo</last></author>
       <author><first>Niteesh</first><last>Mallela</last></author>
       <author><first>Himanshu</first><last>Gupta</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <author><first>Milind</first><last>Savagaonkar</last></author>
       <author><first>Nidhi</first><last>Sultan</last></author>
       <author><first>Roshni</first><last>Ramnani</last></author>
@@ -6562,7 +6562,7 @@
       <title>Investigating Independence vs. Control: Agenda-Setting in <fixed-case>R</fixed-case>ussian News Coverage on Social Media</title>
       <author><first>Annerose</first><last>Eichel</last></author>
       <author><first>Gabriella</first><last>Lapesa</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>5314–5323</pages>
       <abstract>Agenda-setting is a widely explored phenomenon in political science: powerful stakeholders (governments or their financial supporters) have control over the media and set their agenda: political and economical powers determine which news should be salient. This is a clear case of targeted manipulation to divert the public attention from serious issues affecting internal politics (such as economic downturns and scandals) by flooding the media with potentially distracting information. We investigate agenda-setting in the Russian social media landscape, exploring the relation between economic indicators and mentions of foreign geopolitical entities, as well as of Russia itself. Our contributions are at three levels: at the level of the domain of the investigation, our study is the first to substructure the Russian media landscape in state-controlled vs. independent outlets in the context of strategic distraction from negative economic trends; at the level of the scope of the investigation, we involve a large set of geopolitical entities (while previous work has focused on the U.S.); at the qualitative level, our analysis of posts on Ukraine, whose relationship with Russia is of high geopolitical relevance, provides further insights into the contrast between state-controlled and independent outlets.</abstract>
       <url hash="962662b6">2022.lrec-1.569</url>
@@ -6595,7 +6595,7 @@
       <title>»textklang« – Towards a Multi-Modal Exploration Platform for <fixed-case>G</fixed-case>erman Poetry</title>
       <author><first>Nadja</first><last>Schauffler</last></author>
       <author><first>Toni</first><last>Bernhart</last></author>
-      <author><first>Andre</first><last>Blessing</last></author>
+      <author id="andre-blessing"><first>Andre</first><last>Blessing</last></author>
       <author><first>Gunilla</first><last>Eschenbach</last></author>
       <author><first>Markus</first><last>Gärtner</last></author>
       <author><first>Kerstin</first><last>Jung</last></author>
@@ -6650,7 +6650,7 @@
       <author><first>Chang</first><last>Shen</last></author>
       <author><first>Sally</first><last>Ma</last></author>
       <author><first>Tomoe</first><last>Mizutani</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <pages>5388–5392</pages>
       <abstract>Fast-developing fields such as Artificial Intelligence (AI) often outpace the efforts of encyclopedic sources such as Wikipedia, which either do not completely cover recently-introduced topics or lack such content entirely. As a result, methods for automatically producing content are valuable tools to address this information overload. We show that recent advances in pretrained language modeling can be combined for a two-stage extractive and abstractive approach for Wikipedia lead paragraph generation. We extend this approach to generate longer Wikipedia-style summaries with sections and examine how such methods struggle in this application through detailed studies with 100 reference human-collected surveys. This is the first study on utilizing web resources for long Wikipedia-style summaries to the best of our knowledge.</abstract>
       <url hash="153a8657">2022.lrec-1.576</url>
@@ -6661,7 +6661,7 @@
       <author><first>Sujay Kumar</first><last>Jauhar</last></author>
       <author><first>Nirupama</first><last>Chandrasekaran</last></author>
       <author><first>Michael</first><last>Gamon</last></author>
-      <author><first>Ryen</first><last>White</last></author>
+      <author id="ryen-white"><first>Ryen</first><last>White</last></author>
       <pages>5393–5403</pages>
       <abstract>Tasks are a fundamental unit of work in the daily lives of people, who are increasingly using digital means to keep track of, organize, triage, and act on them. These digital tools – such as task management applications – provide a unique opportunity to study and understand tasks and their connection to the real world, and through intelligent assistance, help people be more productive. By logging signals such as text, timestamp information, and social connectivity graphs, an increasingly rich and detailed picture of how tasks are created and organized, what makes them important, and who acts on them, can be progressively developed. Yet the context around actual task completion remains fuzzy, due to the basic disconnect between actions taken in the real world and telemetry recorded in the digital world. Thus, in this paper we compile and release a novel, real-life, large-scale dataset called MS-LaTTE that captures two core aspects of the context surrounding task completion: location and time. We describe our annotation framework and conduct a number of analyses on the data that were collected, demonstrating that it captures intuitive contextual properties for common tasks. Finally, we test the dataset on the two problems of predicting spatial and temporal task co-occurrence, concluding that predictors for co-location and co-time are both learnable, with a BERT fine-tuned model outperforming several other baselines. The MS-LaTTE dataset provides an opportunity to tackle many new modeling challenges in contextual task understanding and we hope that its release will spur future research in task intelligence more broadly.</abstract>
       <url hash="f45ac3f8">2022.lrec-1.577</url>
@@ -6708,7 +6708,7 @@
       <author><first>Marina</first><last>Santini</last></author>
       <author><first>Peter</first><last>Lundberg</last></author>
       <author><first>Yosef</first><last>Al-Abasse</last></author>
-      <author><first>Arne</first><last>Jonsson</last></author>
+      <author id="arne-jonsson"><first>Arne</first><last>Jonsson</last></author>
       <author><first>Emma</first><last>Eneling</last></author>
       <author><first>Magnus</first><last>Stridsman</last></author>
       <pages>5428–5435</pages>
@@ -6721,7 +6721,7 @@
       <author><first>Saméh</first><last>Kchaou</last></author>
       <author><first>Rahma</first><last>Boujelbane</last></author>
       <author><first>Emna</first><last>Fsih</last></author>
-      <author><first>Lamia</first><last>Hadrich-Belguith</last></author>
+      <author id="lamia-hadrich-belguith"><first>Lamia</first><last>Hadrich-Belguith</last></author>
       <pages>5436–5443</pages>
       <abstract>With the growing access to the internet, the spoken Arabic dialect language becomes informal languages written in social media. Most users post comments using their own dialect. This linguistic situation inhibits mutual understanding between internet users and makes difficult to use computational approaches since most Arabic resources are intended for the formal language: Modern Standard Arabic (MSA). In this paper, we present a pipeline to standardize the written texts in social networks by translating them to the standard language MSA. We fine-tun at first an identification bert-based model to select Tunisian Dialect (TD) from MSA and other dialects. Then, we learned transformer model to translate TD to MSA. The final system includes the translated TD text and the originally text written in MSA. Each of these steps was evaluated on the same test corpus. In order to test the effectiveness of the approach, we compared two opinion analysis models, the first intended for the Sentiment Analysis (SA) of dialect texts and the second for the MSA texts. We concluded that through standardization we obtain the best score.</abstract>
       <url hash="33a23f01">2022.lrec-1.582</url>
@@ -6739,7 +6739,7 @@
     <paper id="584">
       <title>Preliminary Results on the Evaluation of Computational Tools for the Analysis of <fixed-case>Q</fixed-case>uechua and <fixed-case>A</fixed-case>ymara</title>
       <author><first>Marcelo Yuji</first><last>Himoro</last></author>
-      <author><first>Antonio</first><last>Pareja-Lora</last></author>
+      <author id="antonio-pareja-lora"><first>Antonio</first><last>Pareja-Lora</last></author>
       <pages>5450–5459</pages>
       <abstract>This research has focused on evaluating the existing open-source morphological analyzers for two of the most widely spoken indigenous macrolanguages in South America, namely Quechua and Aymara. Firstly, we have evaluated their performance (precision, recall and F1 score) for the individual languages for which they were developed (Cuzco Quechua and Aymara). Secondly, in order to assess how these tools handle other individual languages of the macrolanguage, we have extracted some sample text from school textbooks and educational resources. This sample text was edited in the different countries where these macrolanguages are spoken (Colombia, Ecuador, Peru, Bolivia, Chile and Argentina for Quechua; and Bolivia, Peru and Chile for Aymara), and it includes their different standardized forms (10 individual languages of Quechua and 3 of Aymara). Processing this text by means of the tools, we have (i) calculated their coverage (number of words recognized and analyzed) and (ii) studied in detail the cases for which each tool was unable to generate any output. Finally, we discuss different ways in which these tools could be optimized, either to improve their performances or, in the specific case of Quechua, to cover more individual languages of this macrolanguage in future works as well.</abstract>
       <url hash="72fe1608">2022.lrec-1.584</url>
@@ -6769,7 +6769,7 @@
     <paper id="586">
       <title><fixed-case>M</fixed-case>e<fixed-case>SH</fixed-case>up: Corpus for Full Text Biomedical Document Indexing</title>
       <author><first>Xindi</first><last>Wang</last></author>
-      <author><first>Robert E.</first><last>Mercer</last></author>
+      <author id="robert-e-mercer"><first>Robert E.</first><last>Mercer</last></author>
       <author><first>Frank</first><last>Rudzicz</last></author>
       <pages>5473–5483</pages>
       <abstract>Medical Subject Heading (MeSH) indexing refers to the problem of assigning a given biomedical document with the most relevant labels from an extremely large set of MeSH terms. Currently, the vast number of biomedical articles in the PubMed database are manually annotated by human curators, which is time consuming and costly; therefore, a computational system that can assist the indexing is highly valuable. When developing supervised MeSH indexing systems, the availability of a large-scale annotated text corpus is desirable. A publicly available, large corpus that permits robust evaluation and comparison of various systems is important to the research community. We release a large scale annotated MeSH indexing corpus, MeSHup, which contains 1,342,667 full text articles, together with the associated MeSH labels and metadata, authors and publication venues that are collected from the MEDLINE database. We train an end-to-end model that combines features from documents and their associated labels on our corpus and report the new baseline.</abstract>
@@ -6780,7 +6780,7 @@
       <title>Hierarchical Annotation for Building A Suite of Clinical Natural Language Processing Tasks: Progress Note Understanding</title>
       <author><first>Yanjun</first><last>Gao</last></author>
       <author><first>Dmitriy</first><last>Dligach</last></author>
-      <author><first>Timothy</first><last>Miller</last></author>
+      <author id="timothy-miller"><first>Timothy</first><last>Miller</last></author>
       <author><first>Samuel</first><last>Tesch</last></author>
       <author><first>Ryan</first><last>Laffin</last></author>
       <author><first>Matthew M.</first><last>Churpek</last></author>
@@ -6792,7 +6792,7 @@
     </paper>
     <paper id="588">
       <title><fixed-case>KC</fixed-case>4<fixed-case>MT</fixed-case>: A High-Quality Corpus for Multilingual Machine Translation</title>
-      <author><first>Vinh Van</first><last>Nguyen</last></author>
+      <author id="vinh-van-nguyen"><first>Vinh Van</first><last>Nguyen</last></author>
       <author><first>Ha</first><last>Nguyen</last></author>
       <author><first>Huong Thanh</first><last>Le</last></author>
       <author><first>Thai Phuong</first><last>Nguyen</last></author>
@@ -6800,7 +6800,7 @@
       <author><first>Luan Nghia</first><last>Pham</last></author>
       <author><first>Anh Tuan</first><last>Phan</last></author>
       <author><first>Cong Hoang-Minh</first><last>Nguyen</last></author>
-      <author><first>Viet Hong</first><last>Tran</last></author>
+      <author id="viet-hong-tran"><first>Viet Hong</first><last>Tran</last></author>
       <author><first>Anh Huu</first><last>Tran</last></author>
       <pages>5494–5502</pages>
       <abstract>The multilingual parallel corpus is an important resource for many applications of natural language processing (NLP). For machine translation, the size and quality of the training corpus mainly affects the quality of the translation models. In this work, we present the method for building high-quality multilingual parallel corpus in the news domain and for some low-resource languages, including Vietnamese, Laos, and Khmer, to improve the quality of multilingual machine translation in these areas. We also publicized this one that includes 500.000 Vietnamese-Chinese bilingual sentence pairs; 150.000 Vietnamese-Laos bilingual sentence pairs, and 150.000 Vietnamese-Khmer bilingual sentence pairs.</abstract>
@@ -6830,7 +6830,7 @@
       <author><first>Michael</first><last>Gamon</last></author>
       <author><first>Sujay Kumar</first><last>Jauhar</last></author>
       <author><first>Diyi</first><last>Yang</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>5517–5524</pages>
       <abstract>Document authoring involves a lengthy revision process, marked by individual edits that are frequently linked to comments. Modeling the relationship between edits and comments leads to a better understanding of document evolution, potentially benefiting applications such as content summarization, and task triaging. Prior work on understanding revisions has primarily focused on classifying edit intents, but falling short of a deeper understanding of the nature of these edits. In this paper, we present explore the challenge of describing an edit at two levels: identifying the edit intent, and describing the edit using free-form text. We begin by defining a taxonomy of general edit intents and introduce a new dataset of full revision histories of Wikipedia pages, annotated with each revision’s edit intent. Using this dataset, we train a classifier that achieves a 90% accuracy in identifying edit intent. We use this classifier to train a distantly-supervised model that generates a high-level description of a revision in free-form text. Our experimental results show that incorporating edit intent information aids in generating better edit descriptions. We establish a set of baselines for the edit description task, achieving a best score of 28 ROUGE, thus demonstrating the effectiveness of our layered approach to edit understanding.</abstract>
       <url hash="f7ace795">2022.lrec-1.591</url>
@@ -6865,7 +6865,7 @@
       <title><fixed-case>CLGC</fixed-case>: A Corpus for <fixed-case>C</fixed-case>hinese Literary Grace Evaluation</title>
       <author><first>Yi</first><last>Li</last></author>
       <author><first>Dong</first><last>Yu</last></author>
-      <author><first>Pengyuan</first><last>Liu</last></author>
+      <author id="pengyuan-liu"><first>Pengyuan</first><last>Liu</last></author>
       <pages>5548–5556</pages>
       <abstract>In this paper, we construct a Chinese literary grace corpus, CLGC, with 10,000 texts and more than 1.85 million tokens. Multi-level annotations are provided for each text in our corpus, including literary grace level, sentence category, and figure-of-speech type. Based on the corpus, we dig deep into the correlation between fine-grained features (semantic information, part-of-speech and figure-of-speech, etc.) and literary grace level. We also propose a new Literary Grace Evaluation (LGE) task, which aims at making a comprehensive assessment of the literary grace level according to the text. In the end, we build some classification models with machine learning algorithms (such as SVM, TextCNN) to prove the effectiveness of our features and corpus for LGE. The results of our preliminary classification experiments have achieved 79.71% on the weighted average F1-score.</abstract>
       <url hash="ea1f36fa">2022.lrec-1.594</url>
@@ -6873,7 +6873,7 @@
     </paper>
     <paper id="595">
       <title>Anonymising the <fixed-case>SAGT</fixed-case> Speech Corpus and Treebank</title>
-      <author><first>Özlem</first><last>Çetinoğlu</last></author>
+      <author id="ozlem-cetinoglu"><first>Özlem</first><last>Çetinoğlu</last></author>
       <author><first>Antje</first><last>Schweitzer</last></author>
       <pages>5557–5564</pages>
       <abstract>Anonymisation, that is identifying and neutralising sensitive references, is a crucial part of dataset creation. In this paper, we describe the anonymisation process of a Turkish-German code-switching corpus, namely SAGT, which consists of speech data and a treebank that is built on its transcripts. We employed a selective pseudonymisation approach where we manually identified sensitive references to anonymise and replaced them with surrogate values on the treebank side. In addition to maintaining data privacy, our primary concerns in surrogate selection were keeping the integrity of code-switching properties, morphosyntactic annotation layers, and semantics. After the treebank anonymisation, we anonymised the speech data by mapping between the treebank sentences and audio transcripts with the help of Praat scripts. The treebank is publicly available for research purposes and the audio files can be obtained via an individual licence agreement.</abstract>
@@ -6918,7 +6918,7 @@
     </paper>
     <paper id="599">
       <title>Design and Evaluation of the Corpus of Everyday <fixed-case>J</fixed-case>apanese Conversation</title>
-      <author><first>Hanae</first><last>Koiso</last></author>
+      <author id="hanae-koiso"><first>Hanae</first><last>Koiso</last></author>
       <author><first>Haruka</first><last>Amatani</last></author>
       <author><first>Yasuharu</first><last>Den</last></author>
       <author><first>Yuriko</first><last>Iseki</last></author>
@@ -6958,7 +6958,7 @@
       <author><first>Bernardo</first><last>Consoli</last></author>
       <author><first>Henrique D. P.</first><last>dos Santos</last></author>
       <author><first>Ana Helena D. P. S.</first><last>Ulbrich</last></author>
-      <author><first>Renata</first><last>Vieira</last></author>
+      <author id="renata-vieira"><first>Renata</first><last>Vieira</last></author>
       <author><first>Rafael H.</first><last>Bordini</last></author>
       <pages>5609–5616</pages>
       <abstract>Computational medicine research requires clinical data for training and testing purposes, so the development of datasets composed of real hospital data is of utmost importance in this field. Most such data collections are in the English language, were collected in anglophone countries, and do not reflect other clinical realities, which increases the importance of national datasets for projects that hope to positively impact public health. This paper presents a new Brazilian Clinical Dataset containing over 70,000 admissions from 10 hospitals in two Brazilian states, composed of a sum total of over 2.5 million free-text clinical notes alongside data pertaining to patient information, prescription information, and exam results. This data was collected, organized, deidentified, and is being distributed via credentialed access for the use of the research community. In the course of presenting the new dataset, this paper will explore the new dataset’s structure, population, and potential benefits of using this dataset in clinical AI tasks.</abstract>
@@ -6967,10 +6967,10 @@
     </paper>
     <paper id="603">
       <title>Universal Grammatical Dependencies for <fixed-case>P</fixed-case>ortuguese with <fixed-case>CINTIL</fixed-case> Data, <fixed-case>LX</fixed-case> Processing and <fixed-case>CLARIN</fixed-case> support</title>
-      <author><first>António</first><last>Branco</last></author>
-      <author><first>João Ricardo</first><last>Silva</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
+      <author id="joao-silva"><first>João Ricardo</first><last>Silva</last></author>
       <author><first>Luís</first><last>Gomes</last></author>
-      <author><first>João</first><last>António Rodrigues</last></author>
+      <author id="joao-rodrigues"><first>João</first><last>António Rodrigues</last></author>
       <pages>5617–5626</pages>
       <abstract>The grammatical framework for the mapping between linguistic form and meaning representation known as Universal Dependencies relies on a non-constituency syntactic analysis that is centered on the notion of grammatical relation (e.g. Subject, Object, etc.). Given its core goal of providing a common set of analysis primitives suitable to every natural language, and its practical objective of fostering their computational grammatical processing, it keeps being an active domain of research in science and technology of language. This paper presents a new collection of quality language resources for the computational processing of the Portuguese language under the Universal Dependencies framework (UD). This is an all-encompassing, publicly available open collection of mutually consistent and inter-operable scientific resources that includes reliably annotated corpora, top-performing processing tools and expert support services: a new UPOS-annotated corpus, CINTIL-UPos, with 675K tokens and a new UD treebank, CINTIL-UDep Treebank, with nearly 38K sentences; a UPOS tagger, LX-UTagger, and a UD parser, LX-UDParser, trained on these corpora, available both as local stand-alone tools and as remote web-based services; and helpdesk support ensured by the Knowledge Center for the Science and Technology of Portuguese of the CLARIN research infrastructure.</abstract>
       <url hash="1f391d7a">2022.lrec-1.603</url>
@@ -7016,7 +7016,7 @@
     <paper id="608">
       <title>Building a Synthetic Biomedical Research Article Citation Linkage Corpus</title>
       <author><first>Sudipta</first><last>Singha Roy</last></author>
-      <author><first>Robert E.</first><last>Mercer</last></author>
+      <author id="robert-e-mercer"><first>Robert E.</first><last>Mercer</last></author>
       <pages>5665–5672</pages>
       <abstract>Citations are frequently used in publications to support the presented results and to demonstrate the previous discoveries while also assisting the reader in following the chronological progression of information through publications. In scientific publications, a citation refers to the referenced document, but it makes no mention of the exact span of text that is being referred to. Connecting the citation to this span of text is called citation linkage. In this paper, to find these citation linkages in biomedical research publications using deep learning, we provide a synthetic silver standard corpus as well as the method to build this corpus. The motivation for building this corpus is to provide a training set for deep learning models that will locate the text spans in a reference article, given a citing statement, based on semantic similarity. This corpus is composed of sentence pairs, where one sentence in each pair is the citing statement and the other one is a candidate cited statement from the referenced paper. The corpus is annotated using an unsupervised sentence embedding method. The effectiveness of this silver standard corpus for training citation linkage models is validated against a human-annotated gold standard corpus.</abstract>
       <url hash="9d746d51">2022.lrec-1.608</url>
@@ -7080,7 +7080,7 @@
       <author><first>Nirmal</first><last>Surange</last></author>
       <author><first>Pavan</first><last>Baswani</last></author>
       <author><first>Priyanka</first><last>Ravva</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>5712–5722</pages>
       <abstract>Expert human annotation for summarization is definitely an expensive task, and can not be done on huge scales. But with this work, we show that even with a crowd sourced summary generation approach, quality can be controlled by aggressive expert informed filtering and sampling-based human evaluation. We propose a pipeline that crowd-sources summarization data and then aggressively filters the content via: automatic and partial expert evaluation. Using this pipeline we create a high-quality Telugu Abstractive Summarization dataset (TeSum) which we validate with sampling-based human evaluation. We also provide baseline numbers for various models commonly used for summarization. A number of recently released datasets for summarization, scraped the web-content relying on the assumption that summary is made available with the article by the publishers. While this assumption holds for multiple resources (or news-sites) in English, it should not be generalised across languages without thorough analysis and verification. Our analysis clearly shows that this assumption does not hold true for most Indian language news resources. We show that our proposed filtration pipeline can even be applied to these large-scale scraped datasets to extract better quality article-summary pairs.</abstract>
       <url hash="9a04438b">2022.lrec-1.614</url>
@@ -7088,7 +7088,7 @@
     </paper>
     <paper id="615">
       <title>A Corpus of Simulated Counselling Sessions with Dialog Act Annotation</title>
-      <author><first>John</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
       <author><first>Haley</first><last>Fong</last></author>
       <author><first>Lai Shuen Judy</first><last>Wong</last></author>
       <author><first>Chun Chung</first><last>Mak</last></author>
@@ -7105,7 +7105,7 @@
       <author><first>Yulan</first><last>Feng</last></author>
       <author><first>Carla</first><last>Gordon</last></author>
       <author><first>Seyed Hossein</first><last>Alavi</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <author><first>Maxine</first><last>Eskenazi</last></author>
       <pages>5731–5738</pages>
       <abstract>The ultimate goal of dialog research is to develop systems that can be effectively used in interactive settings by real users. To this end, we introduced the Interactive Evaluation of Dialog Track at the 9th Dialog System Technology Challenge. This track consisted of two sub-tasks. The first sub-task involved building knowledge-grounded response generation models. The second sub-task aimed to extend dialog models beyond static datasets by assessing them in an interactive setting with real users. Our track challenges participants to develop strong response generation models and explore strategies that extend them to back-and-forth interactions with real users. The progression from static corpora to interactive evaluation introduces unique challenges and facilitates a more thorough assessment of open-domain dialog systems. This paper provides an overview of the track, including the methodology and results. Furthermore, it provides insights into how to best evaluate open-domain dialog models.</abstract>
@@ -7115,7 +7115,7 @@
     <paper id="617">
       <title><fixed-case>HADREB</fixed-case>: Human Appraisals and (<fixed-case>E</fixed-case>nglish) Descriptions of Robot Emotional Behaviors</title>
       <author><first>Josue</first><last>Torres-Fonseca</last></author>
-      <author><first>Casey</first><last>Kennington</last></author>
+      <author id="casey-kennington"><first>Casey</first><last>Kennington</last></author>
       <pages>5739–5748</pages>
       <abstract>Humans sometimes anthropomorphize everyday objects, but especially robots that have human-like qualities and that are often able to interact with and respond to humans in ways that other objects cannot. Humans especially attribute emotion to robot behaviors, partly because humans often use and interpret emotions when interacting with other humans, and they apply that capability when interacting with robots. Moreover, emotions are a fundamental part of the human language system and emotions are used as scaffolding for language learning, making them an integral part of language learning and meaning. However, there are very few datasets that explore how humans perceive the emotional states of robots and how emotional behaviors relate to human language. To address this gap we have collected HADREB, a dataset of human appraisals and English descriptions of robot emotional behaviors collected from over 30 participants. These descriptions and human emotion appraisals are collected using the Mistyrobotics Misty II and the Digital Dream Labs Cozmo (formerly Anki) robots. The dataset contains English descriptions and emotion appraisals of more than 500 descriptions and graded valence labels of 8 emotion pairs for each behavior and each robot. In this paper we describe the process of collecting and cleaning the data, give a general analysis of the data, and evaluate the usefulness of the dataset in two experiments, one using a language model to map descriptions to emotions, the other maps robot behaviors to emotions.</abstract>
       <url hash="19fce756">2022.lrec-1.617</url>
@@ -7148,7 +7148,7 @@
     <paper id="620">
       <title>Strategy-level Entrainment of Dialogue System Users in a Creative Visual Reference Resolution Task</title>
       <author><first>Deepthi</first><last>Karkada</last></author>
-      <author><first>Ramesh</first><last>Manuvinakurike</last></author>
+      <author id="ramesh-manuvinakurike"><first>Ramesh</first><last>Manuvinakurike</last></author>
       <author><first>Maike</first><last>Paetzel-Prüsmann</last></author>
       <author><first>Kallirroi</first><last>Georgila</last></author>
       <pages>5768–5777</pages>
@@ -7212,7 +7212,7 @@
     <paper id="625">
       <title>Comparing Approaches to Language Understanding for Human-Robot Dialogue: An Error Taxonomy and Analysis</title>
       <author><first>Ada</first><last>Tur</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <pages>5813–5820</pages>
       <abstract>In this paper, we compare two different approaches to language understanding for a human-robot interaction domain in which a human commander gives navigation instructions to a robot. We contrast a relevance-based classifier with a GPT-2 model, using about 2000 input-output examples as training data. With this level of training data, the relevance-based model outperforms the GPT-2 based model 79% to 8%. We also present a taxonomy of types of errors made by each model, indicating that they have somewhat different strengths and weaknesses, so we also examine the potential for a combined model.</abstract>
       <url hash="644b29c2">2022.lrec-1.625</url>
@@ -7234,7 +7234,7 @@
       <author><first>Priyanshu</first><last>Priya</last></author>
       <author><first>Mauajama</first><last>Firdaus</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>5829–5837</pages>
       <abstract>The long-standing goal of Artificial Intelligence (AI) has been to create human-like conversational systems. Such systems should have the ability to develop an emotional connection with the users, consequently, emotion recognition in dialogues has gained popularity. Emotion detection in dialogues is a challenging task because humans usually convey multiple emotions with varying degrees of intensities in a single utterance. Moreover, emotion in an utterance of a dialogue may be dependent on previous utterances making the task more complex. Recently, emotion recognition in low-resource languages like Hindi has been in great demand. However, most of the existing datasets for multi-label emotion and intensity detection in conversations are in English. To this end, we propose a large conversational dataset in Hindi named EmoInHindi for multi-label emotion and intensity recognition in conversations containing 1,814 dialogues with a total of 44,247 utterances. We prepare our dataset in a Wizard-of-Oz manner for mental health and legal counselling of crime victims. Each utterance of dialogue is annotated with one or more emotion categories from 16 emotion labels including neutral and their corresponding intensity. We further propose strong contextual baselines that can detect the emotion(s) and corresponding emotional intensity of an utterance given the conversational context.</abstract>
       <url hash="0db44bbe">2022.lrec-1.627</url>
@@ -7263,7 +7263,7 @@
       <title>A Language Modelling Approach to Quality Assessment of <fixed-case>OCR</fixed-case>’ed Historical Text</title>
       <author><first>Callum</first><last>Booth</last></author>
       <author><first>Robert</first><last>Shoemaker</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <pages>5859–5864</pages>
       <abstract>We hypothesise and evaluate a language model-based approach for scoring the quality of OCR transcriptions in the British Library Newspapers (BLN) corpus parts 1 and 2, to identify the best quality OCR for use in further natural language processing tasks, with a wider view to link individual newspaper reports of crime in nineteenth-century London to the Digital Panopticon—a structured repository of criminal lives. We mitigate the absence of gold standard transcriptions of the BLN corpus by utilising a corpus of genre-adjacent texts that capture the common and legal parlance of nineteenth-century London—the Proceedings of the Old Bailey Online—with a view to rank the BLN transcriptions by their OCR quality.</abstract>
       <url hash="60d7ad8a">2022.lrec-1.630</url>
@@ -7428,7 +7428,7 @@
       <title>Evaluation of Off-the-shelf Speech Recognizers on Different Accents in a Dialogue Domain</title>
       <author><first>Divya</first><last>Tadimeti</last></author>
       <author><first>Kallirroi</first><last>Georgila</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <pages>6001–6008</pages>
       <abstract>We evaluate several publicly available off-the-shelf (commercial and research) automatic speech recognition (ASR) systems on dialogue agent-directed English speech from speakers with General American vs. non-American accents. Our results show that the performance of the ASR systems for non-American accents is considerably worse than for General American accents. Depending on the recognizer, the absolute difference in performance between General American accents and all non-American accents combined can vary approximately from 2% to 12%, with relative differences varying approximately between 16% and 49%. This drop in performance becomes even larger when we consider specific categories of non-American accents indicating a need for more diligent collection of and training on non-native English speaker data in order to narrow this performance gap. There are performance differences across ASR systems, and while the same general pattern holds, with more errors for non-American accents, there are some accents for which the best recognizer is different than in the overall case. We expect these results to be useful for dialogue system designers in developing more robust inclusive dialogue systems, and for ASR providers in taking into account performance requirements for different accents.</abstract>
       <url hash="0da7430c">2022.lrec-1.645</url>
@@ -7458,9 +7458,9 @@
       <author><first>Souvik</first><last>Kundu</last></author>
       <author><first>José</first><last>Cañete</last></author>
       <author><first>Marcelo</first><last>Mendoza</last></author>
-      <author><first>Robert E.</first><last>Mercer</last></author>
+      <author id="robert-e-mercer"><first>Robert E.</first><last>Mercer</last></author>
       <author><first>Felipe</first><last>Bravo-Marquez</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <author><first>Alvaro</first><last>Soto</last></author>
       <pages>6024–6034</pages>
       <abstract>Due to the success of pre-trained language models, versions of languages other than English have been released in recent years. This fact implies the need for resources to evaluate these models. In the case of Spanish, there are few ways to systematically assess the models’ quality. In this paper, we narrow the gap by building two evaluation benchmarks. Inspired by previous work (Conneau and Kiela, 2018; Chen et al., 2019), we introduce Spanish SentEval and Spanish DiscoEval, aiming to assess the capabilities of stand-alone and discourse-aware sentence representations, respectively. Our benchmarks include considerable pre-existing and newly constructed datasets that address different tasks from various domains. In addition, we evaluate and analyze the most recent pre-trained Spanish language models to exhibit their capabilities and limitations. As an example, we discover that for the case of discourse evaluation tasks, mBERT, a language model trained on multiple languages, usually provides a richer latent representation than models trained only with documents in Spanish. We hope our contribution will motivate a fairer, more comparable, and less cumbersome way to evaluate future Spanish language models.</abstract>
@@ -7514,7 +7514,7 @@
       <author><first>Hoang</first><last>Van</last></author>
       <author><first>Moriba</first><last>Jah</last></author>
       <author><first>Roberto</first><last>Furfaro</last></author>
-      <author><first>Peter</first><last>Jansen</last></author>
+      <author id="peter-jansen"><first>Peter</first><last>Jansen</last></author>
       <pages>6077–6082</pages>
       <abstract>Space situational awareness typically makes use of physical measurements from radar, telescopes, and other assets to monitor satellites and other spacecraft for operational, navigational, and defense purposes. In this work we explore using textual input for the space situational awareness task. We construct a corpus of 48.5k news articles spanning all known active satellites between 2009 and 2020. Using a dependency-rule-based extraction system designed to target three high-impact events – spacecraft launches, failures, and decommissionings, we identify 1,787 space-event sentences that are then annotated by humans with 15.9k labels for event slots. We empirically demonstrate a state-of-the-art neural extraction system achieves an overall F1 between 53 and 91 per slot for event extraction in this low-resource, high-impact domain.</abstract>
       <url hash="e3006f8f">2022.lrec-1.653</url>
@@ -7615,7 +7615,7 @@
       <title>Incorporating Zoning Information into Argument Mining from Biomedical Literature</title>
       <author><first>Boyang</first><last>Liu</last></author>
       <author><first>Viktor</first><last>Schlegel</last></author>
-      <author><first>Riza</first><last>Batista-Navarro</last></author>
+      <author id="riza-theresa-batista-navarro"><first>Riza</first><last>Batista-Navarro</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
       <pages>6162–6169</pages>
       <abstract>The goal of text zoning is to segment a text into zones (i.e., Background, Conclusion) that serve distinct functions. Argumentative zoning, a specific text zoning scheme for the scientific domain, is considered as the antecedent for argument mining by many researchers. Surprisingly, however, little work is concerned with exploiting zoning information to improve the performance of argument mining models, despite the relatedness of the two tasks. In this paper, we propose two transformer-based models to incorporate zoning information into argumentative component identification and classification tasks. One model is for the sentence-level argument mining task and the other is for the token-level task. In particular, we add the zoning labels predicted by an off-the-shelf model to the beginning of each sentence, inspired by the convention commonly used biomedical abstracts. Moreover, we employ multi-head attention to transfer the sentence-level zoning information to each token in a sentence. Based on experiment results, we find a significant improvement in F1-scores for both sentence- and token-level tasks. It is worth mentioning that these zoning labels can be obtained with high accuracy by utilising readily available automated methods. Thus, existing argument mining models can be improved by incorporating zoning information without any additional annotation cost.</abstract>
@@ -7637,10 +7637,10 @@
     <paper id="665">
       <title>From Examples to Rules: Neural Guided Rule Synthesis for Information Extraction</title>
       <author><first>Robert</first><last>Vacareanu</last></author>
-      <author><first>Marco A.</first><last>Valenzuela-Escárcega</last></author>
+      <author id="marco-a-valenzuela-escarcega"><first>Marco A.</first><last>Valenzuela-Escárcega</last></author>
       <author><first>George Caique</first><last>Gouveia Barbosa</last></author>
       <author><first>Rebecca</first><last>Sharp</last></author>
-      <author><first>Gustave</first><last>Hahn-Powell</last></author>
+      <author id="gus-hahn-powell"><first>Gustave</first><last>Hahn-Powell</last></author>
       <author><first>Mihai</first><last>Surdeanu</last></author>
       <pages>6180–6189</pages>
       <abstract>While deep learning approaches to information extraction have had many successes, they can be difficult to augment or maintain as needs shift. Rule-based methods, on the other hand, can be more easily modified. However, crafting rules requires expertise in linguistics and the domain of interest, making it infeasible for most users. Here we attempt to combine the advantages of these two directions while mitigating their drawbacks. We adapt recent advances from the adjacent field of program synthesis to information extraction, synthesizing rules from provided examples. We use a transformer-based architecture to guide an enumerative search, and show that this reduces the number of steps that need to be explored before a rule is found. Further, we show that without training the synthesis algorithm on the specific domain, our synthesized rules achieve state-of-the-art performance on the 1-shot scenario of a task that focuses on few-shot learning for relation classification, and competitive performance in the 5-shot scenario.</abstract>
@@ -7729,7 +7729,7 @@
       <author><first>Ankush</first><last>Agarwal</last></author>
       <author><first>Raj</first><last>Gite</last></author>
       <author><first>Shreya</first><last>Laddha</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <author><first>Satyanarayan</first><last>Kar</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
       <author><first>Prabhjit</first><last>Thind</last></author>
@@ -7754,7 +7754,7 @@
       <title>A Large Interlinked Knowledge Graph of the <fixed-case>I</fixed-case>talian Cultural Heritage</title>
       <author><first>Stefano</first><last>Faralli</last></author>
       <author><first>Andrea</first><last>Lenzi</last></author>
-      <author><first>Paola</first><last>Velardi</last></author>
+      <author id="paola-velardi"><first>Paola</first><last>Velardi</last></author>
       <pages>6280–6289</pages>
       <abstract>Knowledge is the lifeblood for a plethora of applications such as search, recommender systems and natural language understanding. Thanks to the efforts in the fields of Semantic Web and Linked Open Data a growing number of interlinked knowledge bases are supporting the development of advanced knowledge-based applications. Unfortunately, for a large number of domain-specific applications, these knowledge bases are unavailable. In this paper, we present a resource consisting of a large knowledge graph linking the Italian cultural heritage entities (defined in the ArCo ontology) with the concepts defined on well-known knowledge bases (i.e., DBpedia and the Getty GVP ontology). We describe the methodologies adopted for the semi-automatic resource creation and provide an in-depth analysis of the resulting interlinked graph.</abstract>
       <url hash="2e074f87">2022.lrec-1.675</url>
@@ -7762,7 +7762,7 @@
     </paper>
     <paper id="676">
       <title>Training on Lexical Resources</title>
-      <author><first>Kenneth</first><last>Church</last></author>
+      <author id="kenneth-church"><first>Kenneth</first><last>Church</last></author>
       <author><first>Xingyu</first><last>Cai</last></author>
       <author><first>Yuchen</first><last>Bian</last></author>
       <pages>6290–6299</pages>
@@ -7786,9 +7786,9 @@
       <author><first>Andis</first><last>Lagzdiņš</last></author>
       <author><first>Uldis</first><last>Siliņš</last></author>
       <author><first>Toms</first><last>Bergmanis</last></author>
-      <author><first>Mārcis</first><last>Pinnis</last></author>
+      <author id="marcis-pinnis"><first>Mārcis</first><last>Pinnis</last></author>
       <author><first>Artūrs</first><last>Vasiļevskis</last></author>
-      <author><first>Andrejs</first><last>Vasiļjevs</last></author>
+      <author id="andrejs-vasiljevs"><first>Andrejs</first><last>Vasiļjevs</last></author>
       <pages>6310–6316</pages>
       <abstract>Consolidated access to current and reliable terms from different subject fields and languages is necessary for content creators and translators. Terminology is also needed in AI applications such as machine translation, speech recognition, information extraction, and other natural language processing tools. In this work, we facilitate standards-based sharing and management of terminology resources by providing an open terminology management solution - the EuroTermBank Toolkit. It allows organisations to manage and search their terms, create term collections, and share them within and outside the organisation by participating in the network of federated databases. The data curated in the federated databases are automatically shared with EuroTermBank, the largest multilingual terminology resource in Europe, allowing translators and language service providers as well as researchers and students to access terminology resources in their most current version.</abstract>
       <url hash="632e91b0">2022.lrec-1.678</url>
@@ -7829,7 +7829,7 @@
       <author><first>Cécile</first><last>Robin</last></author>
       <author><first>Gautham Vadakkekara</first><last>Suresh</last></author>
       <author><first>Víctor</first><last>Rodriguez-Doncel</last></author>
-      <author><first>John P.</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></author>
       <author><first>Paul</first><last>Buitelaar</last></author>
       <pages>6352–6360</pages>
       <abstract>Language resources are a key component of natural language processing and related research and applications. Users of language resources have different needs in terms of format, language, topics, etc. for the data they need to use. Linghub (McCrae and Cimiano, 2015) was first developed for this purpose, using the capabilities of linked data to represent metadata, and tackling the heterogeneous metadata issue. Linghub aimed at helping language resources and technology users to easily find and retrieve relevant data, and identify important information on access, topics, etc. This work describes a rejuvenation and modernisation of the 2015 platform into using a popular open source data management system, DSpace, as foundation. The new platform, Linghub2, contains updated and extended resources, more languages offered, and continues the work towards homogenisation of metadata through conversions, through linkage to standardisation strategies and community groups, such as the Open Digital Rights Language (ODRL) community group.</abstract>
@@ -7843,7 +7843,7 @@
       <author><first>Pin-Er</first><last>Chen</last></author>
       <author><first>Hsin-Yu</first><last>Chou</last></author>
       <author><first>Mao-Chang</first><last>Ku</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <pages>6361–6369</pages>
       <abstract>Constructions are direct form-meaning pairs with possible schematic slots. These slots are simultaneously constrained by the embedded construction itself and the sentential context. We propose that the constraint could be described by a conditional probability distribution. However, as this conditional probability is inevitably complex, we utilize language models to capture this distribution. Therefore, we build CxLM, a deep learning-based masked language model explicitly tuned to constructions’ schematic slots. We first compile a construction dataset consisting of over ten thousand constructions in Taiwan Mandarin. Next, an experiment is conducted on the dataset to examine to what extent a pretrained masked language model is aware of the constructions. We then fine-tune the model specifically to perform a cloze task on the opening slots. We find that the fine-tuned model predicts masked slots more accurately than baselines and generates both structurally and semantically plausible word samples. Finally, we release CxLM and its dataset as publicly available resources and hope to serve as new quantitative tools in studying construction grammar.</abstract>
       <url hash="fd8dac99">2022.lrec-1.683</url>
@@ -7888,7 +7888,7 @@
       <title>How Does the Experimental Setting Affect the Conclusions of Neural Encoding Models?</title>
       <author><first>Xiaohan</first><last>Zhang</last></author>
       <author><first>Shaonan</first><last>Wang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>6397–6404</pages>
       <abstract>Recent years have witnessed the tendency of neural encoding models on exploring brain language processing using naturalistic stimuli. Neural encoding models are data-driven methods that require an encoding model to investigate the mystery of brain mechanisms hidden in the data. As a data-driven method, the performance of encoding models is very sensitive to the experimental setting. However, it is unknown how the experimental setting further affects the conclusions of neural encoding models. This paper systematically investigated this problem and evaluated the influence of three experimental settings, i.e., the data size, the cross-validation training method, and the statistical testing method. Results demonstrate that inappropriate cross-validation training and small data size can substantially decrease the performance of encoding models, especially in the temporal lobe and the frontal lobe. And different null hypotheses in significance testing lead to highly different significant brain regions. Based on these results, we suggest a block-wise cross-validation training method and an adequate data size for increasing the performance of linear encoding models. We also propose two strict null hypotheses to control false positive discovery rates.</abstract>
       <url hash="8a5f401a">2022.lrec-1.687</url>
@@ -7908,7 +7908,7 @@
     <paper id="689">
       <title>Progress in Multilingual Speech Recognition for Low Resource Languages <fixed-case>K</fixed-case>urmanji <fixed-case>K</fixed-case>urdish, <fixed-case>C</fixed-case>ree and Inuktut</title>
       <author><first>Vishwa</first><last>Gupta</last></author>
-      <author><first>Gilles</first><last>Boulianne</last></author>
+      <author id="gilles-boulianne"><first>Gilles</first><last>Boulianne</last></author>
       <pages>6420–6428</pages>
       <abstract>This contribution presents our efforts to develop the automatic speech recognition (ASR) systems for three low resource languages: Kurmanji Kurdish, Cree and Inuktut. As a first step, we generate multilingual models from acoustic training data from 12 different languages in the hybrid DNN/HMM framework. We explore different strategies for combining the phones from different languages: either keep the phone labels separate for each language or merge the common phones. For Kurmanji Kurdish and Inuktut, keeping the phones separate gives much lower word error rate (WER), while merging phones gives lower WER for Cree. These WER are lower than training the acoustic models separately for each language. We also compare two different DNN architectures: factored time delay neural network (TDNN-F), and bidirectional long short-term memory (BLSTM) acoustic models. The TDNN-F acoustic models give significantly lower WER for Kurmanji Kurdish and Cree, while BLSTM acoustic models give significantly lower WER for Inuktut. We also show that for each language, training multilingual acoustic models by one more epoch with acoustic data from that language reduces the WER significantly. We also added 512-dimensional embedding features from cross-lingual pre-trained wav2vec2.0 XLSR-53 models, but they lead to only a small reduction in WER.</abstract>
       <url hash="4a307ec4">2022.lrec-1.689</url>
@@ -7916,7 +7916,7 @@
     </paper>
     <paper id="690">
       <title>Efficient Entity Candidate Generation for Low-Resource Languages</title>
-      <author><first>Alberto</first><last>Garcia-Duran</last></author>
+      <author id="alberto-garcia-duran"><first>Alberto</first><last>Garcia-Duran</last></author>
       <author><first>Akhil</first><last>Arora</last></author>
       <author><first>Robert</first><last>West</last></author>
       <pages>6429–6438</pages>
@@ -7930,7 +7930,7 @@
       <author><first>Kelechi</first><last>Ogueji</last></author>
       <author><first>Miryam</first><last>de Lhoneux</last></author>
       <author><first>Orevaoghene</first><last>Ahia</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>6439–6449</pages>
       <abstract>In recent years, the natural language processing (NLP) community has given increased attention to the disparity of efforts directed towards high-resource languages over low-resource ones. Efforts to remedy this delta often begin with translations of existing English datasets into other languages. However, this approach ignores that different language communities have different needs. We consider a group of low-resource languages, creole languages. Creoles are both largely absent from the NLP literature, and also often ignored by society at large due to stigma, despite these languages having sizable and vibrant communities. We demonstrate, through conversations with creole experts and surveys of creole-speaking communities, how the things needed from language technology can change dramatically from one language to another, even when the languages are considered to be very similar to each other, as with creoles. We discuss the prominent themes arising from these conversations, and ultimately demonstrate that useful language technology cannot be built without involving the relevant community.</abstract>
       <url hash="e1e6ac5e">2022.lrec-1.691</url>
@@ -7967,7 +7967,7 @@
       <author><first>Shamsuddeen</first><last>Muhammad</last></author>
       <author><first>Ibrahim Sa’id</first><last>Ahmad</last></author>
       <author><first>Subhadarshi</first><last>Panda</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <author><first>Bashir Shehu</first><last>Galadanci</last></author>
       <author><first>Bello Shehu</first><last>Bello</last></author>
       <pages>6471–6479</pages>
@@ -8007,7 +8007,7 @@
       <title>Survey on <fixed-case>T</fixed-case>hai <fixed-case>NLP</fixed-case> Language Resources and Tools</title>
       <author><first>Ratchakrit</first><last>Arreerard</last></author>
       <author><first>Stephen</first><last>Mander</last></author>
-      <author><first>Scott</first><last>Piao</last></author>
+      <author id="scott-s-l-piao"><first>Scott</first><last>Piao</last></author>
       <pages>6495–6505</pages>
       <abstract>Over the past decades, Natural Language Processing (NLP) research has been expanding to cover more languages. Recently particularly, NLP community has paid increasing attention to under-resourced languages. However, there are still many languages for which NLP research is limited in terms of both language resources and software tools. Thai language is one of the under-resourced languages in the NLP domain, although it is spoken by nearly 70 million people globally. In this paper, we report on our survey on the past development of Thai NLP research to help understand its current state and future research directions. Our survey shows that, although Thai NLP community has achieved a significant achievement over the past three decades, particularly on NLP upstream tasks such as tokenisation, research on downstream tasks such as syntactic parsing and semantic analysis is still limited. But we foresee that Thai NLP research will advance rapidly as richer Thai language resources and more robust NLP techniques become available.</abstract>
       <url hash="ae5e04d1">2022.lrec-1.697</url>
@@ -8115,7 +8115,7 @@
       <title><fixed-case>G</fixed-case>eez<fixed-case>S</fixed-case>witch: Language Identification in Typologically Related Low-resourced <fixed-case>E</fixed-case>ast <fixed-case>A</fixed-case>frican Languages</title>
       <author><first>Fitsum</first><last>Gaim</last></author>
       <author><first>Wonsuk</first><last>Yang</last></author>
-      <author><first>Jong C.</first><last>Park</last></author>
+      <author id="jong-c-park"><first>Jong C.</first><last>Park</last></author>
       <pages>6578–6584</pages>
       <abstract>Language identification is one of the fundamental tasks in natural language processing that is a prerequisite to data processing and numerous applications. Low-resourced languages with similar typologies are generally confused with each other in real-world applications such as machine translation, affecting the user’s experience. In this work, we present a language identification dataset for five typologically and phylogenetically related low-resourced East African languages that use the Ge’ez script as a writing system; namely Amharic, Blin, Ge’ez, Tigre, and Tigrinya. The dataset is built automatically from selected data sources, but we also performed a manual evaluation to assess its quality. Our approach to constructing the dataset is cost-effective and applicable to other low-resource languages. We integrated the dataset into an existing language-identification tool and also fine-tuned several Transformer based language models, achieving very strong results in all cases. While the task of language identification is easy for the informed person, such datasets can make a difference in real-world deployments and also serve as part of a benchmark for language understanding in the target languages. The data and models are made available at <url>https://github.com/fgaim/geezswitch</url>.</abstract>
       <url hash="e59de139">2022.lrec-1.707</url>
@@ -8149,7 +8149,7 @@
       <author><first>Farhan</first><last>Samir</last></author>
       <author><first>Edith</first><last>Coates</last></author>
       <author><first>Garrett</first><last>Nicolai</last></author>
-      <author><first>Miikka</first><last>Silfverberg</last></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last></author>
       <pages>6597–6606</pages>
       <abstract>This paper presents a new inflectional resource for Gitksan, a low-resource Indigenous language of Canada. We use Gitksan data in interlinear glossed format, stemming from language documentation efforts, to build a database of partial inflection tables. We then enrich this morphological resource by filling in blank slots in the partial inflection tables using neural transformer reinflection models. We extend the training data for our transformer reinflection models using two data augmentation techniques: data hallucination and back-translation. Experimental results demonstrate substantial improvements from data augmentation, with data hallucination delivering particularly impressive gains. We also release reinflection models for Gitksan.</abstract>
       <url hash="8f989933">2022.lrec-1.710</url>
@@ -8187,8 +8187,8 @@
     <paper id="714">
       <title>Aligning the <fixed-case>R</fixed-case>omanian Reference Treebank and the Valence Lexicon of <fixed-case>R</fixed-case>omanian Verbs</title>
       <author><first>Ana-Maria</first><last>Barbu</last></author>
-      <author><first>Verginica</first><last>Barbu Mititelu</last></author>
-      <author><first>Cătălin</first><last>Mititelu</last></author>
+      <author id="verginica-barbu-mititelu"><first>Verginica</first><last>Barbu Mititelu</last></author>
+      <author id="catalin-mititelu"><first>Cătălin</first><last>Mititelu</last></author>
       <pages>6626–6634</pages>
       <abstract>We present here the efforts of aligning two language resources for Romanian: the Romanian Reference Treebank and the Valence Lexicon of Romanian Verbs: for each occurrence of those verbs in the treebank that were included as entries in the lexicon, a set of valence frames is automatically assigned, then manually validated by two linguists and, when necessary, corrected. Validating a valence frame also means semantically disambiguating the verb in the respective context. The validation is done by two linguists, on complementary datasets. However, a subset of verbs were validated by both annotators and Cohen’s κ is 0.87 for this subset. The alignment we have made also serves as a method of enhancing the quality of the two resources, as in the process we identify morpho-syntactic annotation mistakes, incomplete valence frames or missing ones. Information from each resource complements the information from the other, thus their value increases. The treebank and the lexicon are freely available, while the links discovered between them are also made available on GitHub.</abstract>
       <url hash="0d2ed605">2022.lrec-1.714</url>
@@ -8197,7 +8197,7 @@
     <paper id="715">
       <title><fixed-case>P</fixed-case>orti<fixed-case>L</fixed-case>exicon-<fixed-case>UD</fixed-case>: a <fixed-case>P</fixed-case>ortuguese Lexical Resource according to <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies Model</title>
       <author><first>Lucelene</first><last>Lopes</last></author>
-      <author><first>Magali</first><last>Duran</last></author>
+      <author id="magali-sanches-duran"><first>Magali</first><last>Duran</last></author>
       <author><first>Paulo</first><last>Fernandes</last></author>
       <author><first>Thiago</first><last>Pardo</last></author>
       <pages>6635–6643</pages>
@@ -8217,7 +8217,7 @@
     </paper>
     <paper id="717">
       <title>Low-resource Neural Machine Translation: Benchmarking State-of-the-art Transformer for <fixed-case>W</fixed-case>olof&lt;-&gt;<fixed-case>F</fixed-case>rench</title>
-      <author><first>Cheikh M. Bamba</first><last>Dione</last></author>
+      <author id="cheikh-m-bamba-dione"><first>Cheikh M. Bamba</first><last>Dione</last></author>
       <author><first>Alla</first><last>Lo</last></author>
       <author><first>Elhadji Mamadou</first><last>Nguer</last></author>
       <author><first>Sileye</first><last>Ba</last></author>
@@ -8228,7 +8228,7 @@
     </paper>
     <paper id="718">
       <title>Criteria for Useful Automatic <fixed-case>R</fixed-case>omanization in <fixed-case>S</fixed-case>outh <fixed-case>A</fixed-case>sian Languages</title>
-      <author><first>Isin</first><last>Demirsahin</last></author>
+      <author id="isin-demirsahin"><first>Isin</first><last>Demirsahin</last></author>
       <author><first>Cibu</first><last>Johny</last></author>
       <author><first>Alexander</first><last>Gutkin</last></author>
       <author><first>Brian</first><last>Roark</last></author>
@@ -8273,7 +8273,7 @@
       <title>Learning How to Translate <fixed-case>N</fixed-case>orth <fixed-case>K</fixed-case>orean through <fixed-case>S</fixed-case>outh <fixed-case>K</fixed-case>orean</title>
       <author><first>Hwichan</first><last>Kim</last></author>
       <author><first>Sangwhan</first><last>Moon</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Mamoru</first><last>Komachi</last></author>
       <pages>6711–6718</pages>
       <abstract>South and North Korea both use the Korean language. However, Korean NLP research has focused on South Korean only, and existing NLP systems of the Korean language, such as neural machine translation (NMT) models, cannot properly handle North Korean inputs. Training a model using North Korean data is the most straightforward approach to solving this problem, but there is insufficient data to train NMT models. In this study, we create data for North Korean NMT models using a comparable corpus. First, we manually create evaluation data for automatic alignment and machine translation, and then, investigate automatic alignment methods suitable for North Korean. Finally, we show that a model trained by North Korean bilingual data without human annotation significantly boosts North Korean translation accuracy compared to existing South Korean models in zero-shot settings.</abstract>
@@ -8290,7 +8290,7 @@
       <author><first>Jian</first><last>Yu</last></author>
       <author><first>Wei</first><last>Chen</last></author>
       <author><first>Yanfeng</first><last>Wang</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
       <pages>6719–6727</pages>
       <abstract>Previous research for adapting a general neural machine translation (NMT) model into a specific domain usually neglects the diversity in translation within the same domain, which is a core problem for domain adaptation in real-world scenarios. One representative of such challenging scenarios is to deploy a translation system for a conference with a specific topic, e.g., global warming or coronavirus, where there are usually extremely less resources due to the limited schedule. To motivate wider investigation in such a scenario, we present a real-world fine-grained domain adaptation task in machine translation (FGraDA). The FGraDA dataset consists of Chinese-English translation task for four sub-domains of information technology: autonomous vehicles, AI education, real-time networks, and smart phone. Each sub-domain is equipped with a development set and test set for evaluation purposes. To be closer to reality, FGraDA does not employ any in-domain bilingual training data but provides bilingual dictionaries and wiki knowledge base, which can be easier obtained within a short time. We benchmark the fine-grained domain adaptation task and present in-depth analyses showing that there are still challenging problems to further improve the performance with heterogeneous resources.</abstract>
       <url hash="9e77e125">2022.lrec-1.723</url>
@@ -8381,7 +8381,7 @@
       <author><first>Cheuk Tung</first><last>Yiu</last></author>
       <author><first>Rita</first><last>Frieske</last></author>
       <author><first>Holy</first><last>Lovenia</last></author>
-      <author><first>Genta</first><last>Winata</last></author>
+      <author id="genta-indra-winata"><first>Genta</first><last>Winata</last></author>
       <author><first>Qifeng</first><last>Chen</last></author>
       <author><first>Xiaojuan</first><last>Ma</last></author>
       <author><first>Bertram</first><last>Shi</last></author>
@@ -8407,7 +8407,7 @@
       <author><first>Shuo</first><last>Xu</last></author>
       <author><first>Yuxiang</first><last>Jia</last></author>
       <author><first>Changyong</first><last>Niu</last></author>
-      <author><first>Hongying</first><last>Zan</last></author>
+      <author id="hongying-zan"><first>Hongying</first><last>Zan</last></author>
       <pages>6802–6807</pages>
       <abstract>Emotion recognition in conversation is important for an empathetic dialogue system to understand the user’s emotion and then generate appropriate emotional responses. However, most previous researches focus on modeling conversational contexts primarily based on the textual modality or simply utilizing multimodal information through feature concatenation. In order to exploit multimodal information and contextual information more effectively, we propose a multimodal directed acyclic graph (MMDAG) network by injecting information flows inside modality and across modalities into the DAG architecture. Experiments on IEMOCAP and MELD show that our model outperforms other state-of-the-art models. Comparative studies validate the effectiveness of the proposed modality fusion method.</abstract>
       <url hash="2317d59d">2022.lrec-1.733</url>
@@ -8417,7 +8417,7 @@
       <title>Automatic Gloss-level Data Augmentation for Sign Language Translation</title>
       <author><first>Jin Yea</first><last>Jang</last></author>
       <author><first>Han-Mu</first><last>Park</last></author>
-      <author><first>Saim</first><last>Shin</last></author>
+      <author id="saim-shin"><first>Saim</first><last>Shin</last></author>
       <author><first>Suna</first><last>Shin</last></author>
       <author><first>Byungcheon</first><last>Yoon</last></author>
       <author><first>Gahgene</first><last>Gweon</last></author>
@@ -8433,7 +8433,7 @@
       <author><first>Hiroaki</first><last>Nanjo</last></author>
       <author><first>Keisuke</first><last>Shirai</last></author>
       <author><first>Hirotaka</first><last>Kameko</last></author>
-      <author><first>Masatake</first><last>Dantsuji</last></author>
+      <author id="masatake-dantsuji"><first>Masatake</first><last>Dantsuji</last></author>
       <pages>6814–6821</pages>
       <abstract>We focus on image description and a corresponding assessment system for language learners. To achieve automatic assessment of image description, we construct a novel dataset, the Language Learner Image Description (LLID) dataset, which consists of images, their descriptions, and assessment annotations. Then, we propose a novel task of automatic error correction for image description, and we develop a baseline model that encodes multimodal information from a learner sentence with an image and accurately decodes a corrected sentence. Our experimental results show that the developed model can revise errors that cannot be revised without an image.</abstract>
       <url hash="eb3cab96">2022.lrec-1.735</url>
@@ -8465,7 +8465,7 @@
       <author><first>Muskan</first><last>Garg</last></author>
       <author><first>Seema</first><last>Wazarkar</last></author>
       <author><first>Muskaan</first><last>Singh</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>6837–6847</pages>
       <abstract>With the development of multimodal systems and natural language generation techniques, the resurgence of multimodal datasets has attracted significant research interests, which aims to provide new information to enrich the representation of textual data. However, there remains a lack of a comprehensive survey for this task. To this end, we take the first step and present a thorough review of this research field. This paper provides an overview of a publicly available dataset with different modalities according to the applications. Furthermore, we discuss the new frontier and give our thoughts. We hope this survey of multimodal datasets can provide the community with quick access and a general picture of the multimodal dataset for specific Natural Language Processing (NLP) applications and motivates future researches. In this context, we release the collection of all multimodal datasets easily accessible here: <url>https://github.com/drmuskangarg/Multimodal-datasets</url></abstract>
       <url hash="55bce828">2022.lrec-1.738</url>
@@ -8509,7 +8509,7 @@
       <author><first>Hiroshi</first><last>Kanayama</last></author>
       <author><first>Issei</first><last>Yoshida</last></author>
       <author><first>Masayasu</first><last>Muraoka</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>6874–6883</pages>
       <abstract>Deletion-based sentence compression in the English language has made significant progress over the past few decades. However, there is a lack of large-scale and high-quality parallel corpus (i.e., (sentence, compression) pairs) for the Chinese language to train an efficient compression system. To remedy this shortcoming, we present a dependency-tree-based method to construct a Chinese corpus with 151k pairs of sentences and compression based on Chinese language-specific characteristics. Subsequently, we trained both extractive and generative neural compression models using the constructed corpus. The experimental results show that our compression model can generate high-quality compressed sentences on both automatic and human evaluation metrics compared with the baselines. The results of the faithfulness evaluation also indicated that the Chinese compression model trained on our constructed corpus can produce more faithful compressed sentences. Furthermore, a dataset with 1,000 pairs of sentences and ground truth compression was manually created for automatic evaluation, which, we believe, will benefit future research on Chinese sentence compression.</abstract>
       <url hash="138a2632">2022.lrec-1.742</url>
@@ -8553,12 +8553,12 @@
     </paper>
     <paper id="746">
       <title>The Bull and the Bear: Summarizing Stock Market Discussions</title>
-      <author><first>Ayush</first><last>Kumar</last></author>
+      <author id="ayush-kumar"><first>Ayush</first><last>Kumar</last></author>
       <author><first>Dhyey</first><last>Jani</last></author>
       <author><first>Jay</first><last>Shah</last></author>
       <author><first>Devanshu</first><last>Thakar</last></author>
       <author><first>Varun</first><last>Jain</last></author>
-      <author id="mayank-singh"><first>Mayank</first><last>Singh</last></author>
+      <author><first>Mayank</first><last>Singh</last></author>
       <pages>6909–6913</pages>
       <abstract>Stock market investors debate and heavily discuss stock ideas, investing strategies, news and market movements on social media platforms. The discussions are significantly longer in length and require extensive domain expertise for understanding. In this paper, we curate such discussions and construct a first-of-its-kind of abstractive summarization dataset. Our curated dataset consists of 7888 Reddit posts and manually constructed summaries for 400 posts. We robustly evaluate the summaries and conduct experiments on SOTA summarization tools to showcase their limitations. We plan to make the dataset publicly available. The sample dataset is available here: <url>https://dhyeyjani.github.io/RSMC</url></abstract>
       <url hash="50bc194e">2022.lrec-1.746</url>
@@ -8568,7 +8568,7 @@
       <title>Combination of Contextualized and Non-Contextualized Layers for Lexical Substitution in <fixed-case>F</fixed-case>rench</title>
       <author><first>Kévin</first><last>Espasa</last></author>
       <author><first>Emmanuel</first><last>Morin</last></author>
-      <author><first>Olivier</first><last>Hamon</last></author>
+      <author id="olivier-hamon"><first>Olivier</first><last>Hamon</last></author>
       <pages>6914–6921</pages>
       <abstract>Lexical substitution task requires to substitute a target word by candidates in a given context. Candidates must keep meaning and grammatically of the sentence. The task, introduced in the SemEval 2007, has two objectives. The first objective is to find a list of substitutes for a target word. This list of substitutes can be obtained with lexical resources like WordNet or generated with a pre-trained language model. The second objective is to rank these substitutes using the context of the sentence. Most of the methods use vector space models or more recently embeddings to rank substitutes. Embedding methods use high contextualized representation. This representation can be over contextualized and in this way overlook good substitute candidates which are more similar on non-contextualized layers. SemDis 2014 introduced the lexical substitution task in French. We propose an application of the state-of-the-art method based on BERT in French and a novel method using contextualized and non-contextualized layers to increase the suggestion of words having a lower probability in a given context but that are more semantically similar. Experiments show our method increases the BERT based system on the OOT measure but decreases on the BEST measure in the SemDis 2014 benchmark.</abstract>
       <url hash="e8db0d83">2022.lrec-1.747</url>
@@ -8597,7 +8597,7 @@
     <paper id="750">
       <title>Emotion analysis and detection during <fixed-case>COVID</fixed-case>-19</title>
       <author><first>Tiberiu</first><last>Sosea</last></author>
-      <author><first>Chau</first><last>Pham</last></author>
+      <author id="chau-minh-pham"><first>Chau</first><last>Pham</last></author>
       <author><first>Alexander</first><last>Tekle</last></author>
       <author><first>Cornelia</first><last>Caragea</last></author>
       <author><first>Junyi Jessy</first><last>Li</last></author>
@@ -8627,8 +8627,8 @@
     </paper>
     <paper id="753">
       <title><fixed-case>V</fixed-case>accine<fixed-case>L</fixed-case>ies: A Natural Language Resource for Learning to Recognize Misinformation about the <fixed-case>COVID</fixed-case>-19 and <fixed-case>HPV</fixed-case> Vaccines</title>
-      <author><first>Maxwell</first><last>Weinzierl</last></author>
-      <author><first>Sanda</first><last>Harabagiu</last></author>
+      <author id="maxwell-weinzierl"><first>Maxwell</first><last>Weinzierl</last></author>
+      <author id="sanda-harabagiu"><first>Sanda</first><last>Harabagiu</last></author>
       <pages>6967–6975</pages>
       <abstract>Billions of COVID-19 vaccines have been administered, but many remain hesitant. Misinformation about the COVID-19 vaccines and other vaccines, propagating on social media, is believed to drive hesitancy towards vaccination. The ability to automatically recognize misinformation targeting vaccines on Twitter depends on the availability of data resources. In this paper we present VaccineLies, a large collection of tweets propagating misinformation about two vaccines: the COVID-19 vaccines and the Human Papillomavirus (HPV) vaccines. Misinformation targets are organized in vaccine-specific taxonomies, which reveal the misinformation themes and concerns. The ontological commitments of the misinformation taxonomies provide an understanding of which misinformation themes and concerns dominate the discourse about the two vaccines covered in VaccineLies. The organization into training, testing and development sets of VaccineLies invites the development of novel supervised methods for detecting misinformation on Twitter and identifying the stance towards it. Furthermore, VaccineLies can be a stepping stone for the development of datasets focusing on misinformation targeting additional vaccines.</abstract>
       <url hash="0ca4ba72">2022.lrec-1.753</url>
@@ -8637,7 +8637,7 @@
     <paper id="754">
       <title>Tackling Irony Detection using Ensemble Classifiers</title>
       <author><first>Christoph</first><last>Turban</last></author>
-      <author><first>Udo</first><last>Kruschwitz</last></author>
+      <author id="udo-kruschwitz"><first>Udo</first><last>Kruschwitz</last></author>
       <pages>6976–6984</pages>
       <abstract>Automatic approaches to irony detection have been of interest to the NLP community for a long time, yet, state-of-the-art approaches still fall way short of what one would consider a desirable performance. In part this is due to the inherent difficulty of the problem. However, in recent years ensembles of transformer-based approaches have emerged as a promising direction to push the state of the art forward in a wide range of NLP applications. A different, more recent, development is the automatic augmentation of training data. In this paper we will explore both these directions for the task of irony detection in social media. Using the common SemEval 2018 Task 3 benchmark collection we demonstrate that transformer models are well suited in ensemble classifiers for the task at hand. In the multi-class classification task we observe statistically significant improvements over strong baselines. For binary classification we achieve performance that is on par with state-of-the-art alternatives. The examined data augmentation strategies showed an effect, but are not decisive for good results.</abstract>
       <url hash="f78b0f83">2022.lrec-1.754</url>
@@ -8657,7 +8657,7 @@
       <author><first>Anupama</first><last>Ray</last></author>
       <author><first>Shubham</first><last>Mishra</last></author>
       <author><first>Apoorva</first><last>Nunna</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>6992–7003</pages>
       <abstract>While sentiment and emotion analysis have been studied extensively, the relationship between sarcasm and emotion has largely remained unexplored. A sarcastic expression may have a variety of underlying emotions. For example, “I love being ignored” belies sadness, while “my mobile is fabulous with a battery backup of only 15 minutes!” expresses frustration. Detecting the emotion behind a sarcastic expression is non-trivial yet an important task. We undertake the task of detecting the emotion in a sarcastic statement, which to the best of our knowledge, is hitherto unexplored. We start with the recently released multimodal sarcasm detection dataset (MUStARD) pre-annotated with 9 emotions. We identify and correct 343 incorrect emotion labels (out of 690). We double the size of the dataset, label it with emotions along with valence and arousal which are important indicators of emotional intensity. Finally, we label each sarcastic utterance with one of the four sarcasm types-Propositional, Embedded, Likeprefixed and Illocutionary, with the goal of advancing sarcasm detection research. Exhaustive experimentation with multimodal (text, audio, and video) fusion models establishes a benchmark for exact emotion recognition in sarcasm and outperforms the state-of-art sarcasm detection. We release the dataset enriched with various annotations and the code for research purposes: <url>https://github.com/apoorva-nunna/MUStARD_Plus_Plus</url></abstract>
       <url hash="4d7bc8f2">2022.lrec-1.756</url>
@@ -8738,7 +8738,7 @@
       <author><first>Taha</first><last>Shangipour ataei</last></author>
       <author><first>Kamyar</first><last>Darvishi</last></author>
       <author><first>Soroush</first><last>Javdan</last></author>
-      <author><first>Behrouz</first><last>Minaei-Bidgoli</last></author>
+      <author id="behrouz-minaei-bidgoli"><first>Behrouz</first><last>Minaei-Bidgoli</last></author>
       <author><first>Sauleh</first><last>Eetemadi</last></author>
       <pages>7056–7060</pages>
       <abstract>Due to the increased availability of online reviews, sentiment analysis witnessed a thriving interest from researchers. Sentiment analysis is a computational treatment of sentiment used to extract and understand the opinions of authors. While many systems were built to predict the sentiment of a document or a sentence, many others provide the necessary detail on various aspects of the entity (i.e., aspect-based sentiment analysis). Most of the available data resources were tailored to English and the other popular European languages. Although Farsi is a language with more than 110 million speakers, to the best of our knowledge, there is a lack of proper public datasets on aspect-based sentiment analysis for Farsi. This paper provides a manually annotated Farsi dataset, Pars-ABSA, annotated and verified by three native Farsi speakers. The dataset consists of 5,114 positive, 3,061 negative and 1,827 neutral data samples from 5,602 unique reviews. Moreover, as a baseline, this paper reports the performance of some aspect-based sentiment analysis methods focusing on transfer learning on Pars-ABSA.</abstract>
@@ -8749,7 +8749,7 @@
       <title><fixed-case>H</fixed-case>indi<fixed-case>MD</fixed-case>: A Multi-domain Corpora for Low-resource Sentiment Analysis</title>
       <author><first/><last>Mamta</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <author><first>Tista</first><last>Saha</last></author>
       <author><first>Alka</first><last>Kumar</last></author>
       <author><first>Shikha</first><last>Srivastava</last></author>
@@ -8920,7 +8920,7 @@
       <author><first>Hang</first><last>Jiang</last></author>
       <author><first>Yining</first><last>Hua</last></author>
       <author><first>Doug</first><last>Beeferman</last></author>
-      <author><first>Deb</first><last>Roy</last></author>
+      <author id="deb-roy"><first>Deb</first><last>Roy</last></author>
       <pages>7199–7208</pages>
       <abstract>Social media data such as Twitter messages (“tweets”) pose a particular challenge to NLP systems because of their short, noisy, and colloquial nature. Tasks such as Named Entity Recognition (NER) and syntactic parsing require highly domain-matched training data for good performance. To date, there is no complete training corpus for both NER and syntactic analysis (e.g., part of speech tagging, dependency parsing) of tweets. While there are some publicly available annotated NLP datasets of tweets, they are only designed for individual tasks. In this study, we aim to create Tweebank-NER, an English NER corpus based on Tweebank V2 (TB2), train state-of-the-art (SOTA) Tweet NLP models on TB2, and release an NLP pipeline called Twitter-Stanza. We annotate named entities in TB2 using Amazon Mechanical Turk and measure the quality of our annotations. We train the Stanza pipeline on TB2 and compare with alternative NLP frameworks (e.g., FLAIR, spaCy) and transformer-based models. The Stanza tokenizer and lemmatizer achieve SOTA performance on TB2, while the Stanza NER tagger, part-of-speech (POS) tagger, and dependency parser achieve competitive performance against non-transformer models. The transformer-based models establish a strong baseline in Tweebank-NER and achieve the new SOTA performance in POS tagging and dependency parsing on TB2. We release the dataset and make both the Stanza pipeline and BERTweet-based models available “off-the-shelf” for use in future Tweet NLP research. Our source code, data, and pre-trained models are available at: <url>https://github.com/social-machines/TweebankNLP</url>.</abstract>
       <url hash="a090d0bd">2022.lrec-1.780</url>
@@ -8930,8 +8930,8 @@
       <title>Did that happen? Predicting Social Media Posts that are Indicative of what happened in a scene: A case study of a <fixed-case>TV</fixed-case> show</title>
       <author><first>Anietie</first><last>Andy</last></author>
       <author><first>Reno</first><last>Kriz</last></author>
-      <author><first>Sharath Chandra</first><last>Guntuku</last></author>
-      <author><first>Derry Tanti</first><last>Wijaya</last></author>
+      <author id="sharath-chandra-guntuku"><first>Sharath Chandra</first><last>Guntuku</last></author>
+      <author id="derry-tanti-wijaya"><first>Derry Tanti</first><last>Wijaya</last></author>
       <author><first>Chris</first><last>Callison-Burch</last></author>
       <pages>7209–7214</pages>
       <abstract>While popular Television (TV) shows are airing, some users interested in these shows publish social media posts about the show. Analyzing social media posts related to a TV show can be beneficial for gaining insights about what happened during scenes of the show. This is a challenging task partly because a significant number of social media posts associated with a TV show or event may not clearly describe what happened during the event. In this work, we propose a method to predict social media posts (associated with scenes of a TV show) that are indicative of what transpired during the scenes of the show. We evaluate our method on social media (Twitter) posts associated with an episode of a popular TV show, Game of Thrones. We show that for each of the identified scenes, with high AUC’s, our method can predict posts that are indicative of what happened in a scene from those that are not-indicative. Based on Twitters policy, we will make the Tweeter ID’s of the Twitter posts used for this work publicly available.</abstract>
@@ -8943,7 +8943,7 @@
       <author><first>Prashant</first><last>Kodali</last></author>
       <author><first>Akshala</first><last>Bhatnagar</last></author>
       <author><first>Naman</first><last>Ahuja</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <author><first>Ponnurangam</first><last>Kumaraguru</last></author>
       <pages>7215–7219</pages>
       <abstract>Hashtag segmentation is the task of breaking a hashtag into its constituent tokens. Hashtags often encode the essence of user-generated posts, along with information like topic and sentiment, which are useful in downstream tasks. Hashtags prioritize brevity and are written in unique ways - transliterating and mixing languages, spelling variations, creative named entities. Benchmark datasets used for the hashtag segmentation task - STAN, BOUN - are small and extracted from a single set of tweets. However, datasets should reflect the variations in writing styles of hashtags and account for domain and language specificity, failing which the results will misrepresent model performance. We argue that model performance should be assessed on a wider variety of hashtags, and datasets should be carefully curated. To this end, we propose HashSet, a dataset comprising of: a) 1.9k manually annotated dataset; b) 3.3M loosely supervised dataset. HashSet dataset is sampled from a different set of tweets when compared to existing datasets and provides an alternate distribution of hashtags to build and validate hashtag segmentation models. We analyze the performance of SOTA models for Hashtag Segmentation, and show that the proposed dataset provides an alternate set of hashtags to train and assess models.</abstract>
@@ -8976,7 +8976,7 @@
       <title><fixed-case>R</fixed-case>o<fixed-case>BERT</fixed-case>uito: a pre-trained language model for social media text in <fixed-case>S</fixed-case>panish</title>
       <author><first>Juan Manuel</first><last>Pérez</last></author>
       <author><first>Damián Ariel</first><last>Furman</last></author>
-      <author><first>Laura</first><last>Alonso Alemany</last></author>
+      <author id="laura-alonso-alemany"><first>Laura</first><last>Alonso Alemany</last></author>
       <author><first>Franco M.</first><last>Luque</last></author>
       <pages>7235–7243</pages>
       <abstract>Since BERT appeared, Transformer language models and transfer learning have become state-of-the-art for natural language processing tasks. Recently, some works geared towards pre-training specially-crafted models for particular domains, such as scientific papers, medical documents, user-generated texts, among others. These domain-specific models have been shown to improve performance significantly in most tasks; however, for languages other than English, such models are not widely available. In this work, we present RoBERTuito, a pre-trained language model for user-generated text in Spanish, trained on over 500 million tweets. Experiments on a benchmark of tasks involving user-generated text showed that RoBERTuito outperformed other pre-trained language models in Spanish. In addition to this, our model has some cross-lingual abilities, achieving top results for English-Spanish tasks of the Linguistic Code-Switching Evaluation benchmark (LinCE) and also competitive performance against monolingual models in English Twitter tasks. To facilitate further research, we make RoBERTuito publicly available at the HuggingFace model hub together with the dataset used to pre-train it.</abstract>
@@ -8999,7 +8999,7 @@
       <author><first>Christopher</first><last>Song</last></author>
       <author><first>David</first><last>Harwath</last></author>
       <author><first>Tuka</first><last>Alhanai</last></author>
-      <author><first>James</first><last>Glass</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
       <pages>7253–7258</pages>
       <abstract>We present Speak, a toolkit that allows researchers to crowdsource speech audio recordings using Amazon Mechanical Turk (MTurk). Speak allows MTurk workers to submit speech recordings in response to a task prompt and stimulus (e.g. image, text excerpt, audio file) defined by researchers, a functionality that is not natively offered by MTurk at the time of writing this paper. Importantly, the toolkit employs numerous measures to ensure that speech recordings collected are of adequate quality, in order to avoid accepting unusable data and prevent abuse/fraud. Speak has demonstrated utility, having collected over 600,000 recordings to date. The toolkit is open-source and available for download.</abstract>
       <url hash="f0e24f52">2022.lrec-1.787</url>
@@ -9009,7 +9009,7 @@
       <title><fixed-case>ASCEND</fixed-case>: A Spontaneous <fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish Dataset for Code-switching in Multi-turn Conversation</title>
       <author><first>Holy</first><last>Lovenia</last></author>
       <author><first>Samuel</first><last>Cahyawijaya</last></author>
-      <author><first>Genta</first><last>Winata</last></author>
+      <author id="genta-indra-winata"><first>Genta</first><last>Winata</last></author>
       <author><first>Peng</first><last>Xu</last></author>
       <author><first>Yan</first><last>Xu</last></author>
       <author><first>Zihan</first><last>Liu</last></author>
@@ -9098,8 +9098,8 @@
       <title>Multilingual Transfer Learning for Children Automatic Speech Recognition</title>
       <author><first>Thomas</first><last>Rolland</last></author>
       <author><first>Alberto</first><last>Abad</last></author>
-      <author><first>Catia</first><last>Cucchiarini</last></author>
-      <author><first>Helmer</first><last>Strik</last></author>
+      <author id="catia-cucchiarini"><first>Catia</first><last>Cucchiarini</last></author>
+      <author id="helmer-strik"><first>Helmer</first><last>Strik</last></author>
       <pages>7314–7320</pages>
       <abstract>Despite recent advances in automatic speech recognition (ASR), the recognition of children’s speech still remains a significant challenge. This is mainly due to the high acoustic variability and the limited amount of available training data. The latter problem is particularly evident in languages other than English, which are usually less-resourced. In the current paper, we address children ASR in a number of less-resourced languages by combining several small-sized children speech corpora from these languages. In particular, we address the following research question: Does a novel two-step training strategy in which multilingual learning is followed by language-specific transfer learning outperform conventional single language/task training for children speech, as well as multilingual and transfer learning alone? Based on previous experimental results with English, we hypothesize that multilingual learning provides a better generalization of the underlying characteristics of children’s speech. Our results provide a positive answer to our research question, by showing that using transfer learning on top of a multilingual model for an unseen language outperforms conventional single language-specific learning.</abstract>
       <url hash="7ba284c3">2022.lrec-1.795</url>
@@ -9169,7 +9169,7 @@
       <author><first>Irina</first><last>Stenger</last></author>
       <author><first>Philip</first><last>Georgis</last></author>
       <author><first>Tania</first><last>Avgustinova</last></author>
-      <author><first>Bernd</first><last>Möbius</last></author>
+      <author id="bernd-mobius"><first>Bernd</first><last>Möbius</last></author>
       <author><first>Dietrich</first><last>Klakow</last></author>
       <pages>7368–7376</pages>
       <abstract>We focus on the syntactic variation and measure syntactic distances between nine Slavic languages (Belarusian, Bulgarian, Croatian, Czech, Polish, Slovak, Slovene, Russian, and Ukrainian) using symmetric measures of insertion, deletion and movement of syntactic units in the parallel sentences of the fable “The North Wind and the Sun”. Additionally, we investigate phonetic and orthographic asymmetries between selected languages by means of the information theoretical notion of surprisal. Syntactic distance and surprisal are, thus, considered as potential predictors of mutual intelligibility between related languages. In spoken and written cloze test experiments for Slavic native speakers, the presented predictors will be validated as to whether variations in syntax lead to a slower or impeded intercomprehension of Slavic texts.</abstract>
diff --git a/data/xml/2022.lt4hala.xml b/data/xml/2022.lt4hala.xml
index a7a24bfbbb..db178db554 100644
--- a/data/xml/2022.lt4hala.xml
+++ b/data/xml/2022.lt4hala.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2022-09-23" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Second Workshop on Language Technologies for Historical and Ancient Languages</booktitle>
-      <editor><first>Rachele</first><last>Sprugnoli</last></editor>
+      <editor id="rachele-sprugnoli"><first>Rachele</first><last>Sprugnoli</last></editor>
       <editor><first>Marco</first><last>Passarotti</last></editor>
       <publisher>European Language Resources Association</publisher>
       <address>Marseille, France</address>
@@ -19,7 +19,7 @@
     <paper id="1">
       <title>Identifying Cleartext in Historical Ciphers</title>
       <author><first>Maria-Elena</first><last>Gambardella</last></author>
-      <author><first>Beata</first><last>Megyesi</last></author>
+      <author id="beata-megyesi"><first>Beata</first><last>Megyesi</last></author>
       <author><first>Eva</first><last>Pettersson</last></author>
       <pages>1–9</pages>
       <abstract>In historical encrypted sources we can find encrypted text sequences, also called ciphertext, as well as non-encrypted cleartexts written in a known language. While most of the cryptanalysis focuses on the decryption of ciphertext, cleartext is often overlooked although it can give us important clues about the historical interpretation and contextualisation of the manuscript. In this paper, we investigate to what extent we can automatically distinguish cleartext from ciphertext in historical ciphers and to what extent we are able to identify its language. The problem is challenging as cleartext sequences in ciphers are often short, up to a few words, in different languages due to historical code-switching. To identify the sequences and the language(s), we chose a rule-based approach and run 7 different models using historical language models on various ciphertexts.</abstract>
@@ -89,7 +89,7 @@
     </paper>
     <paper id="8">
       <title>A Treebank-based Approach to the Supprema Constructio in Dante’s <fixed-case>L</fixed-case>atin Works</title>
-      <author><first>Flavio Massimiliano</first><last>Cecchini</last></author>
+      <author id="flavio-massimiliano-cecchini"><first>Flavio Massimiliano</first><last>Cecchini</last></author>
       <author><first>Giulia</first><last>Pedonese</last></author>
       <pages>51–58</pages>
       <abstract>This paper aims to apply a corpus-driven approach to Dante Alighieri’s Latin works using UDante, a treebank based on Dante Search and part of the Universal Dependencies project. We present a method based on the notion of barycentre applied to a dependency tree as a way to calculate the “syntactic balance” of a sentence. Its application to Dante’s Latin works shows its potential in analysing the style of an author, and contributes to the interpretation of the supprema constructio mentioned in DVE II vi 7 as a well balanced syntactic pattern modeled on Latin literary writing.</abstract>
@@ -100,7 +100,7 @@
       <title>From Inscriptions to Lexica and Back: A Platform for Editing and Linking the Languages of <fixed-case>A</fixed-case>ncient <fixed-case>I</fixed-case>taly</title>
       <author><first>Valeria</first><last>Quochi</last></author>
       <author><first>Andrea</first><last>Bellandi</last></author>
-      <author><first>Fahad</first><last>Khan</last></author>
+      <author id="fahad-khan"><first>Fahad</first><last>Khan</last></author>
       <author><first>Michele</first><last>Mallia</last></author>
       <author><first>Francesca</first><last>Murano</last></author>
       <author><first>Silvia</first><last>Piccini</last></author>
@@ -134,7 +134,7 @@
     <paper id="12">
       <title>Contextual Unsupervised Clustering of Signs for Ancient Writing Systems</title>
       <author><first>Michele</first><last>Corazza</last></author>
-      <author><first>Fabio</first><last>Tamburini</last></author>
+      <author id="fabio-tamburini"><first>Fabio</first><last>Tamburini</last></author>
       <author><first>Miguel</first><last>Valério</last></author>
       <author><first>Silvia</first><last>Ferrara</last></author>
       <pages>84–93</pages>
@@ -148,7 +148,7 @@
       <author><first>Elisa</first><last>Guadagnini</last></author>
       <author><first>Eva</first><last>Sassolini</last></author>
       <author><first>Marco</first><last>Biffi</last></author>
-      <author><first>Simonetta</first><last>Montemagni</last></author>
+      <author id="simonetta-montemagni"><first>Simonetta</first><last>Montemagni</last></author>
       <pages>94–100</pages>
       <abstract>In this paper we describe some experiments related to a corpus derived from an authoritative historical Italian dictionary, namely the Grande dizionario della lingua italiana (‘Great Dictionary of Italian Language’, in short GDLI). Thanks to the digitization and structuring of this dictionary, we have been able to set up the first nucleus of a diachronic annotated corpus that selects—according to specific criteria, and distinguishing between prose and poetry—some of the quotations that within the entries illustrate the different definitions and sub-definitions. In fact, the GDLI presents a huge collection of quotations covering the entire history of the Italian language and thus ranging from the Middle Ages to the present day. The corpus was enriched with linguistic annotation and used to train and evaluate NLP models for POS tagging and lemmatization, with promising results.</abstract>
       <url hash="5959f22f">2022.lt4hala-1.13</url>
@@ -167,8 +167,8 @@
     </paper>
     <paper id="15">
       <title>Handling Stress in Finite-State Morphological Analyzers for <fixed-case>A</fixed-case>ncient <fixed-case>G</fixed-case>reek and <fixed-case>A</fixed-case>ncient <fixed-case>H</fixed-case>ebrew</title>
-      <author><first>Daniel G.</first><last>Swanson</last></author>
-      <author><first>Francis M.</first><last>Tyers</last></author>
+      <author id="daniel-g-swanson"><first>Daniel G.</first><last>Swanson</last></author>
+      <author id="francis-tyers"><first>Francis M.</first><last>Tyers</last></author>
       <pages>108–113</pages>
       <abstract>Modeling stress placement has historically been a challenge for computational morphological analysis, especially in finite-state systems because lexically conditioned stress cannot be modeled using only rewrite rules on the phonological form of a word. However, these phenomena can be modeled fairly easily if the lexicon’s internal representation is allowed to contain more information than the pure phonological form. In this paper we describe the stress systems of Ancient Greek and Ancient Hebrew and we present two prototype finite-state morphological analyzers, one for each language, which successfully implement these stress systems by inserting a small number of control characters into the phonological form, thus conclusively refuting the claim that finite-state systems are not powerful enough to model such stress systems and arguing in favor of the continued relevance of finite-state systems as an appropriate tool for modeling the morphology of historical languages.</abstract>
       <url hash="6d11f9e2">2022.lt4hala-1.15</url>
@@ -312,7 +312,7 @@
       <title>Overview of the <fixed-case>E</fixed-case>va<fixed-case>L</fixed-case>atin 2022 Evaluation Campaign</title>
       <author><first>Rachele</first><last>Sprugnoli</last></author>
       <author><first>Marco</first><last>Passarotti</last></author>
-      <author><first>Flavio Massimiliano</first><last>Cecchini</last></author>
+      <author id="flavio-massimiliano-cecchini"><first>Flavio Massimiliano</first><last>Cecchini</last></author>
       <author><first>Margherita</first><last>Fantoli</last></author>
       <author><first>Giovanni</first><last>Moretti</last></author>
       <pages>183–188</pages>
diff --git a/data/xml/2022.ltedi.xml b/data/xml/2022.ltedi.xml
index cf7156c29a..97b0058e3a 100644
--- a/data/xml/2022.ltedi.xml
+++ b/data/xml/2022.ltedi.xml
@@ -118,7 +118,7 @@
       <title>Disambiguation of morpho-syntactic features of <fixed-case>A</fixed-case>frican <fixed-case>A</fixed-case>merican <fixed-case>E</fixed-case>nglish – the case of habitual be</title>
       <author><first>Harrison</first><last>Santiago</last></author>
       <author><first>Joshua</first><last>Martin</last></author>
-      <author><first>Sarah</first><last>Moeller</last></author>
+      <author id="sarah-moeller"><first>Sarah</first><last>Moeller</last></author>
       <author><first>Kevin</first><last>Tang</last></author>
       <pages>70-75</pages>
       <abstract>Recent research has highlighted that natural language processing (NLP) systems exhibit a bias againstAfrican American speakers. These errors are often caused by poor representation of linguistic features unique to African American English (AAE), which is due to the relatively low probability of occurrence for many such features. We present a workflow to overcome this issue in the case of habitual “be”. Habitual “be” is isomorphic, and therefore ambiguous, with other forms of uninflected “be” found in both AAE and General American English (GAE). This creates a clear challenge for bias in NLP technologies. To overcome the scarcity, we employ a combination of rule-based filters and data augmentation that generate a corpus balanced between habitual and non-habitual instances. This balanced corpus trains unbiased machine learning classifiers, as demonstrated on a corpus of AAE transcribed texts, achieving .65 F<tex-math>_1</tex-math> score at classifying habitual “be”.</abstract>
@@ -263,7 +263,7 @@
     <paper id="21">
       <title><fixed-case>D</fixed-case>eep<fixed-case>B</fixed-case>lues@<fixed-case>LT</fixed-case>-<fixed-case>EDI</fixed-case>-<fixed-case>ACL</fixed-case>2022: Depression level detection modelling through domain specific <fixed-case>BERT</fixed-case> and short text Depression classifiers</title>
       <author><first>Nawshad</first><last>Farruque</last></author>
-      <author><first>Osmar</first><last>Zaiane</last></author>
+      <author id="osmar-r-zaiane"><first>Osmar</first><last>Zaiane</last></author>
       <author><first>Randy</first><last>Goebel</last></author>
       <author><first>Sudhakar</first><last>Sivapalan</last></author>
       <pages>167-171</pages>
@@ -281,7 +281,7 @@
       <author><first>Angel</first><last>S</last></author>
       <author><first>Rajalakshmi</first><last>Sivanaiah</last></author>
       <author><first>Sakaya Milton</first><last>Rajendram</last></author>
-      <author><first>Mirnalinee</first><last>T T</last></author>
+      <author id="t-t-mirnalinee"><first>Mirnalinee</first><last>T T</last></author>
       <pages>172-176</pages>
       <abstract>In recent years social media has become one of the major forums for expressing human views and emotions. With the help of smartphones and high-speed internet, anyone can express their views on Social media. However, this can also lead to the spread of hatred and violence in society. Therefore it is necessary to build a method to find and support helpful social media content. In this paper, we studied Natural Language Processing approach for detecting Hope speech in a given sentence. The task was to classify the sentences into ‘Hope speech’ and ‘Non-hope speech’. The dataset was provided by LT-EDI organizers with text from Youtube comments. Based on the task description, we developed a system using the pre-trained language model BERT to complete this task. Our model achieved 1st rank in the Kannada language with a weighted average F1 score of 0.750, 2nd rank in the Malayalam language with a weighted average F1 score of 0.740, 3rd rank in the Tamil language with a weighted average F1 score of 0.390 and 6th rank in the English language with a weighted average F1 score of 0.880.</abstract>
       <url hash="950617e9">2022.ltedi-1.22</url>
@@ -329,7 +329,7 @@
       <author><first>Rajalakshmi</first><last>Sivanaiah</last></author>
       <author><first>Angel</first><last>S</last></author>
       <author><first>Sakaya Milton</first><last>Rajendram</last></author>
-      <author><first>Mirnalinee</first><last>T T</last></author>
+      <author id="t-t-mirnalinee"><first>Mirnalinee</first><last>T T</last></author>
       <pages>196-199</pages>
       <abstract>Depression is a common mental illness that involves sadness and lack of interest in all day-to-day activities. The task is to classify the social media text as signs of depression into three labels namely “not depressed”, “moderately depressed”, and “severely depressed”. We have build a system using Deep Learning Model “Transformers”. Transformers provides thousands of pretrained models to perform tasks on different modalities such as text, vision, and audio. The multi-class classification model used in our system is based on the ALBERT model. In the shared task ACL 2022, Our team SSN_MLRG3 obtained a Macro F1 score of 0.473.</abstract>
       <url hash="1aa67cf7">2022.ltedi-1.26</url>
@@ -356,7 +356,7 @@
       <author><first>Fazlourrahman</first><last>Balouchzahi</last></author>
       <author><first>Sabur</first><last>Butt</last></author>
       <author><first>Grigori</first><last>Sidorov</last></author>
-      <author><first>Alexander</first><last>Gelbukh</last></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last></author>
       <pages>206-211</pages>
       <abstract>Hope is an inherent part of human life and essential for improving the quality of life. Hope increases happiness and reduces stress and feelings of helplessness. Hope speech is the desired outcome for better and can be studied using text from various online sources where people express their desires and outcomes. In this paper, we address a deep-learning approach with a combination of linguistic and psycho-linguistic features for hope-speech detection. We report our best results submitted to LT-EDI-2022 which ranked 2nd and 3rd in English and Spanish respectively.</abstract>
       <url hash="af9c8baf">2022.ltedi-1.28</url>
@@ -700,7 +700,7 @@
       <author><first>Bharathi Raja</first><last>Chakravarthi</last></author>
       <author><first>Ruba</first><last>Priyadharshini</last></author>
       <author><first>Thenmozhi</first><last>Durairaj</last></author>
-      <author><first>John</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John</first><last>McCrae</last></author>
       <author><first>Paul</first><last>Buitelaar</last></author>
       <author><first>Prasanna</first><last>Kumaresan</last></author>
       <author><first>Rahul</first><last>Ponnusamy</last></author>
@@ -717,9 +717,9 @@
       <author><first>Vigneshwaran</first><last>Muralidaran</last></author>
       <author><first>Ruba</first><last>Priyadharshini</last></author>
       <author><first>Subalalitha</first><last>Cn</last></author>
-      <author><first>John</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John</first><last>McCrae</last></author>
       <author><first>Miguel Ángel</first><last>García</last></author>
-      <author><first>Salud María</first><last>Jiménez-Zafra</last></author>
+      <author id="salud-maria-jimenez-zafra"><first>Salud María</first><last>Jiménez-Zafra</last></author>
       <author><first>Rafael</first><last>Valencia-García</last></author>
       <author><first>Prasanna</first><last>Kumaresan</last></author>
       <author><first>Rahul</first><last>Ponnusamy</last></author>
diff --git a/data/xml/2022.mathnlp.xml b/data/xml/2022.mathnlp.xml
index cad62c3363..f49c88128b 100644
--- a/data/xml/2022.mathnlp.xml
+++ b/data/xml/2022.mathnlp.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the 1st Workshop on Mathematical Natural Language Processing (MathNLP)</booktitle>
       <editor><first>Deborah</first><last>Ferreira</last></editor>
       <editor><first>Marco</first><last>Valentino</last></editor>
-      <editor><first>Andre</first><last>Freitas</last></editor>
+      <editor id="andre-freitas"><first>Andre</first><last>Freitas</last></editor>
       <editor><first>Sean</first><last>Welleck</last></editor>
       <editor><first>Moritz</first><last>Schubotz</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -62,7 +62,7 @@
     <paper id="4">
       <title>Towards Autoformalization of Mathematics and Code Correctness: Experiments with Elementary Proofs</title>
       <author><first>Garett</first><last>Cunningham</last></author>
-      <author><first>Razvan</first><last>Bunescu</last></author>
+      <author id="razvan-bunescu"><first>Razvan</first><last>Bunescu</last></author>
       <author><first>David</first><last>Juedes</last></author>
       <pages>25-32</pages>
       <abstract>The ever-growing complexity of mathematical proofs makes their manual verification by mathematicians very cognitively demanding. Autoformalization seeks to address this by translating proofs written in natural language into a formal representation that is computer-verifiable via interactive theorem provers. In this paper, we introduce a semantic parsing approach, based on the Universal Transformer architecture, that translates elementary mathematical proofs into an equivalent formalization in the language of the Coq interactive theorem prover. The same architecture is also trained to translate simple imperative code decorated with Hoare triples into formally verifiable proofs of correctness in Coq. Experiments on a limited domain of artificial and human-written proofs show that the models generalize well to intermediate lengths not seen during training and variations in natural language.</abstract>
diff --git a/data/xml/2022.mia.xml b/data/xml/2022.mia.xml
index fa836c1779..eaf8060da2 100644
--- a/data/xml/2022.mia.xml
+++ b/data/xml/2022.mia.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the Workshop on Multilingual Information Access (MIA)</booktitle>
       <editor><first>Akari</first><last>Asai</last></editor>
       <editor><first>Eunsol</first><last>Choi</last></editor>
-      <editor><first>Jonathan H.</first><last>Clark</last></editor>
+      <editor id="jonathan-h-clark"><first>Jonathan H.</first><last>Clark</last></editor>
       <editor><first>Junjie</first><last>Hu</last></editor>
       <editor><first>Chia-Hsuan</first><last>Lee</last></editor>
       <editor><first>Jungo</first><last>Kasai</last></editor>
@@ -111,7 +111,7 @@
       <author><first>Sotaro</first><last>Takeshita</last></author>
       <author><first>Marco</first><last>Bombieri</last></author>
       <author><first>Goran</first><last>Glavaš</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <pages>77-90</pages>
       <abstract>This paper introduces our proposed system for the MIA Shared Task on Cross-lingual Openretrieval Question Answering (COQA). In this challenging scenario, given an input question the system has to gather evidence documents from a multilingual pool and generate from them an answer in the language of the question. We devised several approaches combining different model variants for three main components: Data Augmentation, Passage Retrieval, and Answer Generation. For passage retrieval, we evaluated the monolingual BM25 ranker against the ensemble of re-rankers based on multilingual pretrained language models (PLMs) and also variants of the shared task baseline, re-training it from scratch using a recently introduced contrastive loss that maintains a strong gradient signal throughout training by means of mixed negative samples. For answer generation, we focused on languageand domain-specialization by means of continued language model (LM) pretraining of existing multilingual encoders. Additionally, for both passage retrieval and answer generation, we augmented the training data provided by the task organizers with automatically generated question-answer pairs created from Wikipedia passages to mitigate the issue of data scarcity, particularly for the low-resource languages for which no training data were provided. Our results show that language- and domain-specialization as well as data augmentation help, especially for low-resource languages.</abstract>
       <url hash="2a21c67b">2022.mia-1.8</url>
@@ -124,7 +124,7 @@
       <author><first>Sumit</first><last>Agarwal</last></author>
       <author><first>Suraj</first><last>Tripathi</last></author>
       <author><first>Teruko</first><last>Mitamura</last></author>
-      <author><first>Carolyn Penstein</first><last>Rose</last></author>
+      <author id="carolyn-rose"><first>Carolyn Penstein</first><last>Rose</last></author>
       <pages>91-99</pages>
       <abstract>People speaking different kinds of languages search for information in a cross-lingual manner. They tend to ask questions in their language and expect the answer to be in the same language, despite the evidence lying in another language. In this paper, we present our approach for this task of cross-lingual open-domain question-answering. Our proposed method employs a passage reranker, the fusion-in-decoder technique for generation, and a wiki data entity-based post-processing system to tackle the inability to generate entities across all languages. Our end-2-end pipeline shows an improvement of 3 and 4.6 points on F1 and EM metrics respectively, when compared with the baseline CORA model on the XOR-TyDi dataset. We also evaluate the effectiveness of our proposed techniques in the zero-shot setting using the MKQA dataset and show an improvement of 5 points in F1 for high-resource and 3 points improvement for low-resource zero-shot languages. Our team, CMUmQA’s submission in the MIA-Shared task ranked 1st in the constrained setup for the dev and 2nd in the test setting.</abstract>
       <url hash="572ac35a">2022.mia-1.9</url>
diff --git a/data/xml/2022.mmlow.xml b/data/xml/2022.mmlow.xml
index 9b87c55ecc..ec8e0cc65a 100644
--- a/data/xml/2022.mmlow.xml
+++ b/data/xml/2022.mmlow.xml
@@ -35,7 +35,7 @@
       <author><first>Faerie</first><last>Mattins R</last></author>
       <author><first>Srivarshan</first><last>Selvaraj</last></author>
       <author><first>Antonette</first><last>Shibani</last></author>
-      <author><first>Anand</first><last>Kumar M</last></author>
+      <author id="anand-kumar-m"><first>Anand</first><last>Kumar M</last></author>
       <author><first>Bharathi</first><last>Raja Chakravarthi</last></author>
       <pages>9-17</pages>
       <abstract>of expressing relevant idea through social media platforms and forums. At the same time, these memes are trolled by a person who tries to get identified from the other internet users like social media users, chat rooms and blogs. The memes contain both textual and visual information. Based on the content of memes, they are trolled in online community. There is no restriction for language usage in online media. The present work focuses on whether memes are trolled or not trolled. The proposed multi modal approach achieved considerably better weighted average F1 score of 0.5437 compared to Unimodal approaches. The other performance metrics like precision, recall, accuracy and macro average have also been studied to observe the proposed system.</abstract>
diff --git a/data/xml/2022.mmmpie.xml b/data/xml/2022.mmmpie.xml
index 8b647f7234..28f0812eb7 100644
--- a/data/xml/2022.mmmpie.xml
+++ b/data/xml/2022.mmmpie.xml
@@ -43,7 +43,7 @@
       <author><first>Guillaume</first><last>Bernard</last></author>
       <author><first>Hervé</first><last>Bredin</last></author>
       <author><first>Olivier</first><last>Galibert</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <pages>15–25</pages>
       <abstract>Knowledge transfer between neural language models is a widely used technique that has proven to improve performance in a multitude of natural language tasks, in particular with the recent rise of large pre-trained language models like BERT. Similarly, high cross-lingual transfer has been shown to occur in multilingual language models. Hence, it is of great importance to better understand this phenomenon as well as its limits. While most studies about cross-lingual transfer focus on training on independent and identically distributed (i.e. i.i.d.) samples, in this paper we study cross-lingual transfer in a continual learning setting on two sequence labeling tasks: slot-filling and named entity recognition. We investigate this by training multilingual BERT on sequences of 9 languages, one language at a time, on the MultiATIS++ and MultiCoNER corpora. Our first findings are that forward transfer between languages is retained although forgetting is present. Additional experiments show that lost performance can be recovered with as little as a single training epoch even if forgetting was high, which can be explained by a progressive shift of model parameters towards a better multilingual initialization. We also find that commonly used metrics might be insufficient to assess continual learning performance.</abstract>
       <url hash="dfad7dc9">2022.mmmpie-1.3</url>
@@ -65,8 +65,8 @@
     <paper id="5">
       <title>Cost-Effective Language Driven Image Editing with <fixed-case>LX</fixed-case>-<fixed-case>DRIM</fixed-case></title>
       <author><first>Rodrigo</first><last>Santos</last></author>
-      <author><first>António</first><last>Branco</last></author>
-      <author><first>João Ricardo</first><last>Silva</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
+      <author id="joao-silva"><first>João Ricardo</first><last>Silva</last></author>
       <pages>31–43</pages>
       <abstract>Cross-modal language and image processing is envisaged as a way to improve language understanding by resorting to visual grounding, but only recently, with the emergence of neural architectures specifically tailored to cope with both modalities, has it attracted increased attention and obtained promising results. In this paper we address a cross-modal task of language-driven image design, in particular the task of altering a given image on the basis of language instructions. We also avoid the need for a specifically tailored architecture and resort instead to a general purpose model in the Transformer family. Experiments with the resulting tool, LX-DRIM, show very encouraging results, confirming the viability of the approach for language-driven image design while keeping it affordable in terms of compute and data.</abstract>
       <url hash="a705cba7">2022.mmmpie-1.5</url>
diff --git a/data/xml/2022.mmnlu.xml b/data/xml/2022.mmnlu.xml
index 1f5a479439..030d3c6113 100644
--- a/data/xml/2022.mmnlu.xml
+++ b/data/xml/2022.mmnlu.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the Massively Multilingual Natural Language Understanding Workshop (MMNLU-22)</booktitle>
       <editor><first>Jack</first><last>FitzGerald</last></editor>
       <editor><first>Kay</first><last>Rottmann</last></editor>
-      <editor><first>Julia</first><last>Hirschberg</last></editor>
+      <editor id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></editor>
       <editor><first>Mohit</first><last>Bansal</last></editor>
       <editor><first>Anna</first><last>Rumshisky</last></editor>
       <editor><first>Charith</first><last>Peris</last></editor>
@@ -123,7 +123,7 @@
       <author><first>Maxime</first><last>De bruyn</last><affiliation>University of Antwerp</affiliation></author>
       <author><first>Ehsan</first><last>Lotfi</last><affiliation>University of Antwerp</affiliation></author>
       <author><first>Jeska</first><last>Buhmann</last><affiliation/></author>
-      <author><first>Walter</first><last>Daelemans</last><affiliation>University of Antwerp</affiliation></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last><affiliation>University of Antwerp</affiliation></author>
       <pages>69-82</pages>
       <abstract>We expect to interact with home assistants irrespective of our language. However, scaling the Natural Language Understanding pipeline to multiple languages while keeping the same level of accuracy remains a challenge. In this work, we leverage the inherent multilingual aspect of translation models for the task of multilingual intent classification and slot filling. Our experiments reveal that they work equally well with general-purpose multilingual text-to-text models. Furthermore, their accuracy can be further improved by artificially increasing the size of the training set. Unfortunately, increasing the training set also increases the overlap with the test set, leading to overestimating their true capabilities. As a result, we propose two new evaluation methods capable of accounting for an overlap between the training and test set.</abstract>
       <url hash="7f83a80b">2022.mmnlu-1.8</url>
diff --git a/data/xml/2022.mrl.xml b/data/xml/2022.mrl.xml
index 8f5dec0c97..341a8d5ace 100644
--- a/data/xml/2022.mrl.xml
+++ b/data/xml/2022.mrl.xml
@@ -79,7 +79,7 @@
       <author><first>Hossain Shaikh</first><last>Saadi</last><affiliation>Technical University of Munich</affiliation></author>
       <author><first>Viktor</first><last>Hangya</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <author><first>Tobias</first><last>Eder</last><affiliation>Technical University of Munich</affiliation></author>
-      <author><first>Alexander</first><last>Fraser</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <pages>64-75</pages>
       <abstract>Contextualized word embeddings have emerged as the most important tool for performing NLP tasks in a large variety of languages. In order to improve the cross- lingual representation and transfer learning quality, contextualized embedding alignment techniques, such as mapping and model fine-tuning, are employed. Existing techniques however are time-, data- and computational resource-intensive. In this paper we analyze these techniques by utilizing three tasks: bilingual lexicon induction (BLI), word retrieval and cross-lingual natural language inference (XNLI) for a high resource (German-English) and a low resource (Bengali-English) language pair. In contrast to previous works which focus only on a few popular models, we compare five multilingual and seven monolingual language models and investigate the effect of various aspects on their performance, such as vocabulary size, number of languages used for training and number of parameters. Additionally, we propose a parameter-, data- and runtime-efficient technique which can be trained with 10% of the data, less than 10% of the time and have less than 5% of the trainable parameters compared to model fine-tuning. We show that our proposed method is competitive with resource heavy models, even outperforming them in some cases, even though it relies on less resource</abstract>
       <url hash="8a3f969e">2022.mrl-1.6</url>
@@ -90,9 +90,9 @@
       <title>How Language-Dependent is Emotion Detection? Evidence from Multilingual <fixed-case>BERT</fixed-case></title>
       <author><first>Luna</first><last>De Bruyne</last><affiliation>LT3, University of Ghent</affiliation></author>
       <author><first>Pranaydeep</first><last>Singh</last><affiliation>LT3, University of Ghent</affiliation></author>
-      <author><first>Orphee</first><last>De Clercq</last><affiliation>LT3, University of Ghent</affiliation></author>
+      <author id="orphee-de-clercq"><first>Orphee</first><last>De Clercq</last><affiliation>LT3, University of Ghent</affiliation></author>
       <author><first>Els</first><last>Lefever</last><affiliation>LT3, University of Ghent</affiliation></author>
-      <author><first>Veronique</first><last>Hoste</last><affiliation>LT3, University of Ghent</affiliation></author>
+      <author id="veronique-hoste"><first>Veronique</first><last>Hoste</last><affiliation>LT3, University of Ghent</affiliation></author>
       <pages>76-85</pages>
       <abstract>As emotion analysis in text has gained a lot of attention in the field of natural language processing, differences in emotion expression across languages could have consequences for how emotion detection models work. We evaluate the language-dependence of an mBERT-based emotion detection model by comparing language identification performance before and after fine-tuning on emotion detection, and performing (adjusted) zero-shot experiments to assess whether emotion detection models rely on language-specific information. When dealing with typologically dissimilar languages, we found evidence for the language-dependence of emotion detection.</abstract>
       <url hash="cb2d8843">2022.mrl-1.7</url>
@@ -116,7 +116,7 @@
       <author><first>Emre Can</first><last>Acikgoz</last><affiliation>Koc University</affiliation></author>
       <author><first>Tilek</first><last>Chubakov</last><affiliation>University of California Berkeley</affiliation></author>
       <author><first>Muge</first><last>Kural</last><affiliation>Koc University</affiliation></author>
-      <author><first>Gözde</first><last>Şahin</last><affiliation>Koc University</affiliation></author>
+      <author id="gozde-gul-sahin"><first>Gözde</first><last>Şahin</last><affiliation>Koc University</affiliation></author>
       <author><first>Deniz</first><last>Yuret</last><affiliation>Koc University</affiliation></author>
       <pages>100-105</pages>
       <abstract>This paper describes the KUIS-AI NLP team’s submission for the 1st Shared Task on Multilingual Clause-level Morphology (MRL2022). We present our work on all three parts of the shared task: inflection, reinflection, and analysis. We mainly explore two approaches: Trans- former models in combination with data augmentation, and exploiting the state-of-the-art language modeling techniques for morphological analysis. Data augmentation leads to a remarkable performance improvement for most of the languages in the inflection task. Prefix-tuning on pretrained mGPT model helps us to adapt reinflection and analysis tasks in a low-data setting. Additionally, we used pipeline architectures using publicly available open-source lemmatization tools and monolingual BERT- based morphological feature classifiers for rein- flection and analysis tasks, respectively. While Transformer architectures with data augmentation and pipeline architectures achieved the best results for inflection and reinflection tasks, pipelines and prefix-tuning on mGPT received the highest results for the analysis task. Our methods achieved first place in each of the three tasks and outperforms mT5-baseline with 89% for inflection, 80% for reflection, and 12% for analysis. Our code 1 is publicly available.</abstract>
@@ -131,7 +131,7 @@
       <author><first>Utkarsh</first><last>Saboo</last><affiliation>University of British Columbia</affiliation></author>
       <author><first>Xihan</first><last>Wu</last><affiliation>University of British Columbia</affiliation></author>
       <author><first>Garrett</first><last>Nicolai</last><affiliation>University of British Columbia</affiliation></author>
-      <author><first>Miikka</first><last>Silfverberg</last><affiliation>University of British Columbia</affiliation></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last><affiliation>University of British Columbia</affiliation></author>
       <pages>106-114</pages>
       <abstract>We present the University of British Columbia’s submission to the MRL shared task on multilingual clause-level morphology. Our submission extends word-level inflectional models to the clause-level in two ways: first, by evaluating the role that BPE has on the learning of inflectional morphology, and second, by evaluating the importance of a copy bias obtained through data hallucination. Experiments demonstrate a strong preference for language-tuned BPE and a copy bias over a vanilla transformer. The methods are complementary for inflection and analysis tasks – combined models see error reductions of 38% for inflection and 15.6% for analysis; However, this synergy does not hold for reinflection, which performs best under a BPE-only setting. A deeper analysis of the errors generated by our models illustrates that the copy bias may be too strong - the combined model produces predictions more similar to the copy-influenced system, despite the success of the BPE-model.</abstract>
       <url hash="459a46d6">2022.mrl-1.11</url>
@@ -170,8 +170,8 @@
       <author><first>Victoria</first><last>Basmov</last><affiliation>Bar Ilan University</affiliation></author>
       <author><first>Shadrack</first><last>Kirimi</last><affiliation>Chuka University</affiliation></author>
       <author><first>Lydia</first><last>Nishimwe</last><affiliation>Inria, Paris</affiliation></author>
-      <author><first>Benoît</first><last>Sagot</last><affiliation>Inria, Paris</affiliation></author>
-      <author><first>Djamé</first><last>Seddah</last><affiliation>Inria, Paris</affiliation></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last><affiliation>Inria, Paris</affiliation></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last><affiliation>Inria, Paris</affiliation></author>
       <author><first>Reut</first><last>Tsarfaty</last><affiliation>Bar Ilan University</affiliation></author>
       <author><first>Duygu</first><last>Ataman</last><affiliation>New York University</affiliation></author>
       <pages>134-146</pages>
diff --git a/data/xml/2022.mwe.xml b/data/xml/2022.mwe.xml
index cc962fa4fb..be608cfb07 100644
--- a/data/xml/2022.mwe.xml
+++ b/data/xml/2022.mwe.xml
@@ -21,7 +21,7 @@
     </frontmatter>
     <paper id="1">
       <title>Figurative Language in Noun Compound Models across Target Properties, Domains and Time</title>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>1</pages>
       <abstract>A variety of distributional and multi-modal computational approaches has been suggested for modelling the degrees of compositionality across types of multiword expressions and languages. As the starting point of my talk, I will present standard variants of computational models that have been proven successful in predicting the compositionality of German and English noun compounds. The main part of the talk will then be concerned with investigating the general reliability of these standard models and discussing implications for gold-standard datasets: I will demonstrate how prediction results vary (i) across representations, (ii) across empirical target properties, (iii) across compound types, (iv) across levels of abstractness, and (v) for general- vs. domain-specific language. Finally, I will present a preliminary quantitative study on diachronic changes of noun compound meanings and compositionality over time.</abstract>
       <url hash="58c9ac36">2022.mwe-1.1</url>
@@ -39,7 +39,7 @@
       <title>A General Framework for Detecting Metaphorical Collocations</title>
       <author><first>Marija</first><last>Brkić Bakarić</last></author>
       <author><first>Lucia</first><last>Načinović Prskalo</last></author>
-      <author><first>Maja</first><last>Popović</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
       <pages>3–8</pages>
       <abstract>This paper aims at identifying a specific set of collocations known under the term metaphorical collocations. In this type of collocations, a semantic shift has taken place in one of the components. Since the appropriate gold standard needs to be compiled prior to any serious endeavour to extract metaphorical collocations automatically, this paper first presents the steps taken to compile it, and then establishes appropriate evaluation framework. The process of compiling the gold standard is illustrated on one of the most frequent Croatian nouns, which resulted in the preliminary relation significance set. With the aim to investigate the possibility of facilitating the process, frequency, logDice, relation, and pretrained word embeddings are used as features in the classification task conducted on the logDice-based word sketch relation lists. Preliminary results are presented.</abstract>
       <url hash="f854dc51">2022.mwe-1.3</url>
@@ -69,7 +69,7 @@
     <paper id="6">
       <title>Support Verb Constructions across the Ocean Sea</title>
       <author><first>Jorge</first><last>Baptista</last></author>
-      <author><first>Nuno</first><last>Mamede</last></author>
+      <author id="nuno-mamede"><first>Nuno</first><last>Mamede</last></author>
       <author><first>Sónia</first><last>Reis</last></author>
       <pages>26–36</pages>
       <abstract>This paper analyses the support (or light) verb constructions (SVC) in a publicly available, manually annotated corpus of multiword expressions (MWE) in Brazilian Portuguese. The paper highlights several issues in the linguistic definitions therein adopted for these types of MWE, and reports the results from applying STRING, a rule-based parsing system, originally developed for European Portuguese, to this corpus from Brazilian Portuguese. The goal is two-fold: to improve the linguistic definition of SVC in the annotation task, as well as to gauge the major difficulties found when transposing linguistic resources between these two varieties of the same language.</abstract>
@@ -165,7 +165,7 @@
       <author><first>Xuan-Rui</first><last>Fan</last></author>
       <author><first>Edward</first><last>Gow-Smith</last></author>
       <author><first>Harish</first><last>Tayyar Madabushi</last></author>
-      <author><first>Carolina</first><last>Scarton</last></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last></author>
       <author><first>Aline</first><last>Villavicencio</last></author>
       <pages>105–111</pages>
       <abstract>Deep neural models, in particular Transformer-based pre-trained language models, require a significant amount of data to train. This need for data tends to lead to problems when dealing with idiomatic multiword expressions (MWEs), which are inherently less frequent in natural text. As such, this work explores sample efficient methods of idiomaticity detection. In particular we study the impact of Pattern Exploit Training (PET), a few-shot method of classification, and BERTRAM, an efficient method of creating contextual embeddings, on the task of idiomaticity detection. In addition, to further explore generalisability, we focus on the identification of MWEs not present in the training data. Our experiments show that while these methods improve performance on English, they are much less effective on Portuguese and Galician, leading to an overall performance about on par with vanilla mBERT. Regardless, we believe sample efficient methods for both identifying and representing potentially idiomatic MWEs are very encouraging and hold significant potential for future exploration.</abstract>
diff --git a/data/xml/2022.naacl.xml b/data/xml/2022.naacl.xml
index 488f4bf545..b9fb59f4d8 100644
--- a/data/xml/2022.naacl.xml
+++ b/data/xml/2022.naacl.xml
@@ -4,8 +4,8 @@
     <meta>
       <booktitle>Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</booktitle>
       <editor><first>Marine</first><last>Carpuat</last></editor>
-      <editor><first>Marie-Catherine</first><last>de Marneffe</last></editor>
-      <editor><first>Ivan Vladimir</first><last>Meza Ruiz</last></editor>
+      <editor id="marie-catherine-de-marneffe"><first>Marie-Catherine</first><last>de Marneffe</last></editor>
+      <editor id="ivan-meza-ruiz"><first>Ivan Vladimir</first><last>Meza Ruiz</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Seattle, United States</address>
       <month>July</month>
@@ -46,7 +46,7 @@
     </paper>
     <paper id="3">
       <title>Language Model Augmented Monotonic Attention for Simultaneous Translation</title>
-      <author><first>Sathish Reddy</first><last>Indurthi</last></author>
+      <author id="sathish-reddy-indurthi"><first>Sathish Reddy</first><last>Indurthi</last></author>
       <author><first>Mohd Abbas</first><last>Zaidi</last></author>
       <author><first>Beomseok</first><last>Lee</last></author>
       <author><first>Nikhil Kumar</first><last>Lakumarapu</last></author>
@@ -62,7 +62,7 @@
       <title>What Makes a Good and Useful Summary? <fixed-case>I</fixed-case>ncorporating Users in Automatic Summarization Research</title>
       <author><first>Maartje</first><last>Ter Hoeve</last></author>
       <author><first>Julia</first><last>Kiseleva</last></author>
-      <author><first>Maarten</first><last>de Rijke</last></author>
+      <author id="maarten-de-rijke"><first>Maarten</first><last>de Rijke</last></author>
       <pages>46-75</pages>
       <abstract>Automatic text summarization has enjoyed great progress over the years and is used in numerous applications, impacting the lives of many. Despite this development, there is little research that meaningfully investigates how the current research focus in automatic summarization aligns with users’ needs. To bridge this gap, we propose a survey methodology that can be used to investigate the needs of users of automatically generated summaries. Importantly, these needs are dependent on the target group. Hence, we design our survey in such a way that it can be easily adjusted to investigate different user groups. In this work we focus on university students, who make extensive use of summaries during their studies. We find that the current research directions of the automatic summarization community do not fully align with students’ needs. Motivated by our findings, we present ways to mitigate this mismatch in future research on automatic summarization: we propose research directions that impact the design, the development and the evaluation of automatically generated summaries.</abstract>
       <url hash="a5360123">2022.naacl-main.4</url>
@@ -86,7 +86,7 @@
     <paper id="6">
       <title>Semantic Diversity in Dialogue with Natural Language Inference</title>
       <author><first>Katherine</first><last>Stasaski</last></author>
-      <author><first>Marti</first><last>Hearst</last></author>
+      <author id="marti-a-hearst"><first>Marti</first><last>Hearst</last></author>
       <pages>85-98</pages>
       <abstract>Generating diverse, interesting responses to chitchat conversations is a problem for neural conversational agents. This paper makes two substantial contributions to improving diversity in dialogue generation. First, we propose a novel metric which uses Natural Language Inference (NLI) to measure the semantic diversity of a set of model responses for a conversation. We evaluate this metric using an established framework (Tevet and Berant, 2021) and find strong evidence indicating NLI Diversity is correlated with semantic diversity. Specifically, we show that the contradiction relation is more useful than the neutral relation for measuring this diversity and that incorporating the NLI model’s confidence achieves state-of-the-art results. Second, we demonstrate how to iteratively improve the semantic diversity of a sampled set of responses via a new generation procedure called Diversity Threshold Generation, which results in an average 137% increase in NLI Diversity compared to standard generation procedures.</abstract>
       <url hash="a6ae4c7e">2022.naacl-main.6</url>
@@ -133,7 +133,7 @@
       <author><first>Anton</first><last>Chernyavskiy</last></author>
       <author><first>Dmitry</first><last>Ilvovsky</last></author>
       <author><first>Pavel</first><last>Kalinin</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>116-126</pages>
       <abstract>The use of contrastive loss for representation learning has become prominent in computer vision, and it is now getting attention in Natural Language Processing (NLP).Here, we explore the idea of using a batch-softmax contrastive loss when fine-tuning large-scale pre-trained transformer models to learn better task-specific sentence embeddings for pairwise sentence scoring tasks. We introduce and study a number of variations in the calculation of the loss as well as in the overall training procedure; in particular, we find that a special data shuffling can be quite important. Our experimental results show sizable improvements on a number of datasets and pairwise sentence scoring tasks including classification, ranking, and regression. Finally, we offer detailed analysis and discussion, which should be useful for researchers aiming to explore the utility of contrastive loss in NLP.</abstract>
       <url hash="93a09c01">2022.naacl-main.9</url>
@@ -184,7 +184,7 @@
       <author><first>Paul</first><last>Röttger</last></author>
       <author><first>Bertie</first><last>Vidgen</last></author>
       <author><first>Dirk</first><last>Hovy</last></author>
-      <author><first>Janet</first><last>Pierrehumbert</last></author>
+      <author id="janet-pierrehumbert"><first>Janet</first><last>Pierrehumbert</last></author>
       <pages>175-190</pages>
       <abstract>Labelled data is the foundation of most natural language processing tasks. However, labelling data is difficult and there often are diverse valid beliefs about what the correct data labels should be. So far, dataset creators have acknowledged annotator subjectivity, but rarely actively managed it in the annotation process. This has led to partly-subjective datasets that fail to serve a clear downstream use. To address this issue, we propose two contrasting paradigms for data annotation. The descriptive paradigm encourages annotator subjectivity, whereas the prescriptive paradigm discourages it. Descriptive annotation allows for the surveying and modelling of different beliefs, whereas prescriptive annotation enables the training of models that consistently apply one belief. We discuss benefits and challenges in implementing both paradigms, and argue that dataset creators should explicitly aim for one or the other to facilitate the intended use of their dataset. Lastly, we conduct an annotation experiment using hate speech data that illustrates the contrast between the two paradigms.</abstract>
       <url hash="623b6ac1">2022.naacl-main.13</url>
@@ -229,7 +229,7 @@
       <author><first>Qingyu</first><last>Yin</last></author>
       <author><first>Bing</first><last>Yin</last></author>
       <author><first>Tuo</first><last>Zhao</last></author>
-      <author><first>Chao</first><last>Zhang</last></author>
+      <author id="chao-zhang-tu"><first>Chao</first><last>Zhang</last></author>
       <pages>219-230</pages>
       <abstract>User sessions empower many search and recommendation tasks on a daily basis. Such session data are semi-structured, which encode heterogeneous relations between queries and products, and each item is described by the unstructured text. Despite recent advances in self-supervised learning for text or graphs, there lack of self-supervised learning models that can effectively capture both intra-item semantics and inter-item interactions for semi-structured sessions. To fill this gap, we propose CERES, a graph-based transformer model for semi-structured session data. CERES learns representations that capture both inter- and intra-item semantics with (1) a graph-conditioned masked language pretraining task that jointly learns from item text and item-item relations; and (2) a graph-conditioned transformer architecture that propagates inter-item contexts to item-level representations. We pretrained CERES using ~468 million Amazon sessions and find that CERES outperforms strong pretraining baselines by up to 9% in three session search and entity linking tasks.</abstract>
       <url hash="511bf60b">2022.naacl-main.16</url>
@@ -256,7 +256,7 @@
       <author><first>Shang-Wen</first><last>Li</last></author>
       <author><first>Mingye</first><last>Gao</last></author>
       <author><first>Seunghak</first><last>Yu</last></author>
-      <author><first>James</first><last>Glass</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
       <pages>244-257</pages>
       <abstract>Pretrained language models have significantly improved the performance of downstream language understanding tasks, including extractive question answering, by providing high-quality contextualized word embeddings. However, training question answering models still requires large amounts of annotated data for specific domains. In this work, we propose a cooperative self-training framework, RGX, for automatically generating more non-trivial question-answer pairs to improve model performance. RGX is built upon a masked answer extraction task with an interactive learning environment containing an answer entity Recognizer, a question Generator, and an answer eXtractor. Given a passage with a masked entity, the generator generates a question around the entity, and the extractor is trained to extract the masked entity with the generated question and raw texts. The framework allows the training of question generation and answering models on any text corpora without annotation. We further leverage a self-training technique to improve the performance of both question generation and answer extraction models. Experiment results show that RGX outperforms the state-of-the-art (SOTA) pretrained language models and transfer learning approaches on standard question-answering benchmarks, and yields the new SOTA performance under given model size and transfer learning settings.</abstract>
       <url hash="f1a4f8b4">2022.naacl-main.18</url>
@@ -336,7 +336,7 @@
       <author><first>Kaitlyn</first><last>Zhou</last></author>
       <author><first>Su Lin</first><last>Blodgett</last></author>
       <author><first>Adam</first><last>Trischler</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <author><first>Kaheer</first><last>Suleman</last></author>
       <author><first>Alexandra</first><last>Olteanu</last></author>
       <pages>314-324</pages>
@@ -377,7 +377,7 @@
       <author><first>Ozge</first><last>Alacam</last></author>
       <author><first>Monique</first><last>Meuschke</last></author>
       <author><first>Kai</first><last>Lawonn</last></author>
-      <author><first>Sina</first><last>Zarrieß</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
       <pages>348-374</pages>
       <abstract>Natural language as a modality of interaction is becoming increasingly popular in the field of visualization. In addition to the popular query interfaces, other language-based interactions such as annotations, recommendations, explanations, or documentation experience growing interest. In this survey, we provide an overview of natural language-based interaction in the research area of visualization. We discuss a renowned taxonomy of visualization tasks and classify 119 related works to illustrate the state-of-the-art of how current natural language interfaces support their performance. We examine applied NLP methods and discuss human-machine dialogue structures with a focus on initiative, duration, and communicative functions in recent visualization-oriented dialogue interfaces. Based on this overview, we point out interesting areas for the future application of NLP methods in the field of visualization.</abstract>
       <url hash="c61417a2">2022.naacl-main.27</url>
@@ -409,7 +409,7 @@
       <author><first>Mark</first><last>Perera</last></author>
       <author><first>Christian</first><last>Perstl</last></author>
       <author><first>Ehud</first><last>Reiter</last></author>
-      <author><first>Anya</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anya</first><last>Belz</last></author>
       <author><first>Aleksandar</first><last>Savkov</last></author>
       <pages>385-394</pages>
       <abstract>A growing body of work uses Natural Language Processing (NLP) methods to automatically generate medical notes from audio recordings of doctor-patient consultations. However, there are very few studies on how such systems could be used in clinical practice, how clinicians would adjust to using them, or how system design should be influenced by such considerations. In this paper, we present three rounds of user studies, carried out in the context of developing a medical note generation system. We present, analyse and discuss the participating clinicians’ impressions and views of how the system ought to be adapted to be of value to them. Next, we describe a three-week test run of the system in a live telehealth clinical practice. Major findings include (i) the emergence of five different note-taking behaviours; (ii) the importance of the system generating notes in real time during the consultation; and (iii) the identification of a number of clinical use cases that could prove challenging for automatic note generation systems.</abstract>
@@ -465,7 +465,7 @@
       <author><first>Ruijia</first><last>Cheng</last></author>
       <author><first>Alison</first><last>Smith-Renner</last></author>
       <author><first>Ke</first><last>Zhang</last></author>
-      <author><first>Joel</first><last>Tetreault</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
       <author><first>Alejandro</first><last>Jaimes-Larrarte</last></author>
       <pages>431-455</pages>
       <abstract>Automatic text summarization systems commonly involve humans for preparing data or evaluating model performance, yet, there lacks a systematic understanding of humans’ roles, experience, and needs when interacting with or being assisted by AI. From a human-centered perspective, we map the design opportunities and considerations for human-AI interaction in text summarization and broader text generation tasks. We first conducted a systematic literature review of 70 papers, developing a taxonomy of five interactions in AI-assisted text generation and relevant design dimensions. We designed text summarization prototypes for each interaction. We then interviewed 16 users, aided by the prototypes, to understand their expectations, experience, and needs regarding efficiency, control, and trust with AI in text summarization and propose design considerations accordingly.</abstract>
@@ -493,7 +493,7 @@
       <author><first>Ke</first><last>Zhang</last></author>
       <author><first>Ruijia</first><last>Cheng</last></author>
       <author><first>Wenjuan</first><last>Zhang</last></author>
-      <author><first>Joel</first><last>Tetreault</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
       <author><first>Alejandro</first><last>Jaimes-Larrarte</last></author>
       <pages>475-493</pages>
       <abstract>Automatic summarization methods are efficient but can suffer from low quality. In comparison, manual summarization is expensive but produces higher quality. Can humans and AI collaborate to improve summarization performance? In similar text generation tasks (e.g., machine translation), human-AI collaboration in the form of “post-editing” AI-generated text reduces human workload and improves the quality of AI output. Therefore, we explored whether post-editing offers advantages in text summarization. Specifically, we conducted an experiment with 72 participants, comparing post-editing provided summaries with manual summarization for summary quality, human efficiency, and user experience on formal (XSum news) and informal (Reddit posts) text. This study sheds valuable insights on when post-editing is useful for text summarization: it helped in some cases (e.g., when participants lacked domain knowledge) but not in others (e.g., when provided summaries include inaccurate information). Participants’ different editing strategies and needs for assistance offer implications for future human-AI summarization systems.</abstract>
@@ -522,7 +522,7 @@
       <author><first>Jun</first><last>Yan</last></author>
       <author><first>Yang</first><last>Xiao</last></author>
       <author><first>Sagnik</first><last>Mukherjee</last></author>
-      <author><first>Bill Yuchen</first><last>Lin</last></author>
+      <author id="bill-yuchen-lin"><first>Bill Yuchen</first><last>Lin</last></author>
       <author><first>Robin</first><last>Jia</last></author>
       <author><first>Xiang</first><last>Ren</last></author>
       <pages>508-520</pages>
@@ -629,7 +629,7 @@
       <author><first>Akiko</first><last>Eriguchi</last></author>
       <author><first>Shufang</first><last>Xie</last></author>
       <author><first>Tao</first><last>Qin</last></author>
-      <author><first>Hany</first><last>Hassan</last></author>
+      <author id="hany-hassan-awadalla"><first>Hany</first><last>Hassan</last></author>
       <pages>600-606</pages>
       <abstract>Multilingual Neural Machine Translation (MNMT) enables one system to translate sentences from multiple source languages to multiple target languages, greatly reducing deployment costs compared with conventional bilingual systems. The MNMT training benefit, however, is often limited to many-to-one directions. The model suffers from poor performance in one-to-many and many-to-many with zero-shot setup. To address this issue, this paper discusses how to practically build MNMT systems that serve arbitrary X-Y translation directions while leveraging multilinguality with a two-stage training strategy of pretraining and finetuning. Experimenting with the WMT’21 multilingual translation task, we demonstrate that our systems outperform the conventional baselines of direct bilingual models and pivot translation models for most directions, averagely giving +6.0 and +4.1 BLEU, without the need for architecture change or extra data collection. Moreover, we also examine our proposed approach in an extremely large-scale data setting to accommodate practical deployment scenarios.</abstract>
       <url hash="190af786">2022.naacl-main.44</url>
@@ -791,7 +791,7 @@
     </paper>
     <paper id="56">
       <title>On the Machine Learning of Ethical Judgments from Natural Language</title>
-      <author><first>Zeerak</first><last>Talat</last></author>
+      <author id="zeerak-talat"><first>Zeerak</first><last>Talat</last></author>
       <author><first>Hagen</first><last>Blix</last></author>
       <author><first>Josef</first><last>Valvoda</last></author>
       <author><first>Maya Indira</first><last>Ganesh</last></author>
@@ -817,7 +817,7 @@
       <author><first>Lianhui</first><last>Qin</last></author>
       <author><first>Youngjae</first><last>Yu</last></author>
       <author><first>Rowan</first><last>Zellers</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <author><first>Yejin</first><last>Choi</last></author>
       <pages>780-799</pages>
       <abstract>The dominant paradigm for neural text generation is left-to-right decoding from autoregressive language models. Constrained or controllable generation under complex lexical constraints, however, requires foresight to plan ahead feasible future paths. Drawing inspiration from the <tex-math>A^*</tex-math> search algorithm, we propose NeuroLogic A*esque, a decoding algorithm that incorporates heuristic estimates of future cost. We develop lookahead heuristics that are efficient for large-scale language models, making our method a drop-in replacement for common techniques such as beam search and top-<tex-math>k</tex-math> sampling. To enable constrained generation, we build on NeuroLogic decoding (Lu et al., 2021), combining its flexibility in incorporating logical constraints with A*esque estimates of future constraint satisfaction. Our approach outperforms competitive baselines on five generation tasks, and achieves new state-of-the-art performance on table-to-text generation, constrained machine translation, and keyword-constrained generation. The improvements are particularly notable on tasks that require complex constraint satisfaction or in few-shot or zero-shot settings. NeuroLogic A*esque illustrates the power of decoding for improving and enabling new capabilities of large-scale language models.</abstract>
@@ -878,7 +878,7 @@
       <author><first>Toshiki</first><last>Kawamoto</last></author>
       <author><first>Hidetaka</first><last>Kamigaito</last></author>
       <author><first>Kotaro</first><last>Funakoshi</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>852-859</pages>
       <abstract>A repetition is a response that repeats words in the previous speaker’s utterance in a dialogue. Repetitions are essential in communication to build trust with others, as investigated in linguistic studies. In this work, we focus on repetition generation. To the best of our knowledge, this is the first neural approach to address repetition generation. We propose Weighted Label Smoothing, a smoothing method for explicitly learning which words to repeat during fine-tuning, and a repetition scoring method that can output more appropriate repetitions during decoding. We conducted automatic and human evaluations involving applying these methods to the pre-trained language model T5 for generating repetitions. The experimental results indicate that our methods outperformed baselines in both evaluations.</abstract>
       <url hash="f007b031">2022.naacl-main.62</url>
@@ -987,7 +987,7 @@
       <author><first>Liangke</first><last>Gui</last></author>
       <author><first>Borui</first><last>Wang</last></author>
       <author><first>Qiuyuan</first><last>Huang</last></author>
-      <author><first>Alexander</first><last>Hauptmann</last></author>
+      <author id="alexander-g-hauptmann"><first>Alexander</first><last>Hauptmann</last></author>
       <author><first>Yonatan</first><last>Bisk</last></author>
       <author><first>Jianfeng</first><last>Gao</last></author>
       <pages>956-968</pages>
@@ -1027,7 +1027,7 @@
       <author><first>Verena</first><last>Kaynig-Fittkau</last></author>
       <author><first>Jiuxiang</first><last>Gu</last></author>
       <author><first>Franck</first><last>Dernoncourt</last></author>
-      <author><first>Quan</first><last>Tran</last></author>
+      <author id="quan-hung-tran"><first>Quan</first><last>Tran</last></author>
       <author><first>Ani</first><last>Nenkova</last></author>
       <author><first>Dinesh</first><last>Manocha</last></author>
       <author><first>Rajiv</first><last>Jain</last></author>
@@ -1117,11 +1117,11 @@
       <title>Inducing and Using Alignments for Transition-based <fixed-case>AMR</fixed-case> Parsing</title>
       <author><first>Andrew</first><last>Drozdov</last></author>
       <author><first>Jiawei</first><last>Zhou</last></author>
-      <author><first>Radu</first><last>Florian</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
       <author><first>Andrew</first><last>McCallum</last></author>
       <author><first>Tahira</first><last>Naseem</last></author>
       <author><first>Yoon</first><last>Kim</last></author>
-      <author><first>Ramón</first><last>Astudillo</last></author>
+      <author id="ramon-fernandez-astudillo"><first>Ramón</first><last>Astudillo</last></author>
       <pages>1086-1098</pages>
       <abstract>Transition-based parsers for Abstract Meaning Representation (AMR) rely on node-to-word alignments. These alignments are learned separately from parser training and require a complex pipeline of rule-based components, pre-processing, and post-processing to satisfy domain-specific constraints. Parsers also train on a point-estimate of the alignment pipeline, neglecting the uncertainty due to the inherent ambiguity of alignment. In this work we explore two avenues for overcoming these limitations. First, we propose a neural aligner for AMR that learns node-to-word alignments without relying on complex pipelines. We subsequently explore a tighter integration of aligner and parser training by considering a distribution over oracle action sequences arising from aligner uncertainty. Empirical results show this approach leads to more accurate alignments and generalization better from the AMR2.0 to AMR3.0 corpora. We attain a new state-of-the art for gold-only trained models, matching silver-trained performance without the need for beam search on AMR3.0.</abstract>
       <url hash="2c37b181">2022.naacl-main.80</url>
@@ -1144,7 +1144,7 @@
     <paper id="82">
       <title><fixed-case>DREAM</fixed-case>: Improving Situational <fixed-case>QA</fixed-case> by First Elaborating the Situation</title>
       <author><first>Yuling</first><last>Gu</last></author>
-      <author><first>Bhavana</first><last>Dalvi</last></author>
+      <author id="bhavana-dalvi"><first>Bhavana</first><last>Dalvi</last></author>
       <author><first>Peter</first><last>Clark</last></author>
       <pages>1115-1127</pages>
       <abstract>When people answer questions about a specific situation, e.g., “I cheated on my mid-term exam last week. Was that wrong?”, cognitive science suggests that they form a mental picture of that situation before answering. While we do not know how language models (LMs) answer such questions, we conjecture that they may answer more accurately if they are also provided with additional details about the question situation, elaborating the “scene”. To test this conjecture, we train a new model, DREAM, to answer questions that elaborate the scenes that situated questions are about, and then provide those elaborations as additional context to a question-answering (QA) model. We find that DREAM is able to create better scene elaborations (more accurate, useful, and consistent) than a representative state-of-the-art, zero-shot model (Macaw). We also find that using the scene elaborations as additional context improves the answer accuracy of a downstream QA system, including beyond that obtainable by simply further fine-tuning the QA system on DREAM’s training data. These results suggest that adding focused elaborations about a situation can improve a system’s reasoning about it, and may serve as an effective way of injecting new scenario-based knowledge into QA models. Finally, our approach is dataset-neutral; we observe improved QA performance across different models, with even bigger gains on models with fewer parameters.</abstract>
@@ -1185,7 +1185,7 @@
       <author><first>Satwik</first><last>Kottur</last></author>
       <author><first>Ahmad</first><last>Beirami</last></author>
       <author><first>Shahin</first><last>Shayandeh</last></author>
-      <author><first>Paul</first><last>Crook</last></author>
+      <author id="paul-a-crook"><first>Paul</first><last>Crook</last></author>
       <author><first>Alborz</first><last>Geramifard</last></author>
       <author><first>Zhou</first><last>Yu</last></author>
       <author><first>Chinnadhurai</first><last>Sankar</last></author>
@@ -1213,7 +1213,7 @@
     <paper id="87">
       <title>Towards a Progression-Aware Autonomous Dialogue Agent</title>
       <author><first>Abraham</first><last>Sanders</last></author>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
       <author><first>Mei</first><last>Si</last></author>
       <author><first>Albert</first><last>Chang</last></author>
       <author><first>Deepanshu</first><last>Dey</last></author>
@@ -1263,7 +1263,7 @@
       <author><first>Haonan</first><last>Li</last></author>
       <author><first>Martin</first><last>Tomko</last></author>
       <author><first>Maria</first><last>Vasardani</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>1250-1260</pages>
       <abstract>Most existing reading comprehension datasets focus on single-span answers, which can be extracted as a single contiguous span from a given text passage. Multi-span questions, i.e., questions whose answer is a series of multiple discontiguous spans in the text, are common real life but are less studied. In this paper, we present MultiSpanQA, a new dataset that focuses on multi-span questions. Raw questions and contexts are extracted from the Natural Questions dataset. After multi-span re-annotation, MultiSpanQA consists of over a total of 6,000 multi-span questions in the basic version, and over 19,000 examples with unanswerable questions, and questions with single-, and multi-span answers in the expanded version. We introduce new metrics for the purposes of multi-span question answering evaluation, and establish several baselines using advanced models. Finally, we propose a new model which beats all baselines and achieves state-of-the-art on our dataset.</abstract>
       <url hash="e1a6fc51">2022.naacl-main.90</url>
@@ -1290,7 +1290,7 @@
       <title>Theory-Grounded Measurement of <fixed-case>U</fixed-case>.<fixed-case>S</fixed-case>. Social Stereotypes in <fixed-case>E</fixed-case>nglish Language Models</title>
       <author><first>Yang Trista</first><last>Cao</last></author>
       <author><first>Anna</first><last>Sotnikova</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <author><first>Rachel</first><last>Rudinger</last></author>
       <author><first>Linda</first><last>Zou</last></author>
       <pages>1276-1295</pages>
@@ -1304,7 +1304,7 @@
       <title>Sort by Structure: Language Model Ranking as Dependency Probing</title>
       <author><first>Max</first><last>Müller-Eberstein</last></author>
       <author><first>Rob</first><last>van der Goot</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>1296-1307</pages>
       <abstract>Making an informed choice of pre-trained language model (LM) is critical for performance, yet environmentally costly, and as such widely underexplored. The field of Computer Vision has begun to tackle encoder ranking, with promising forays into Natural Language Processing, however they lack coverage of linguistic tasks such as structured prediction. We propose probing to rank LMs, specifically for parsing dependencies in a given language, by measuring the degree to which labeled trees are recoverable from an LM’s contextualized embeddings. Across 46 typologically and architecturally diverse LM-language pairs, our probing approach predicts the best LM choice 79% of the time using orders of magnitude less compute than training a full parser. Within this study, we identify and analyze one recently proposed decoupled LM—RemBERT—and find it strikingly contains less inherent dependency information, but often yields the best parser after full fine-tuning. Without this outlier our approach identifies the best LM in 89% of cases.</abstract>
       <url hash="798ce526">2022.naacl-main.93</url>
@@ -1314,7 +1314,7 @@
     </paper>
     <paper id="94">
       <title>Quantifying Synthesis and Fusion and their Impact on Machine Translation</title>
-      <author><first>Arturo</first><last>Oncevay</last></author>
+      <author id="arturo-oncevay"><first>Arturo</first><last>Oncevay</last></author>
       <author><first>Duygu</first><last>Ataman</last></author>
       <author><first>Niels</first><last>Van Berkel</last></author>
       <author><first>Barry</first><last>Haddow</last></author>
@@ -1343,7 +1343,7 @@
     <paper id="96">
       <title>Efficient Hierarchical Domain Adaptation for Pretrained Language Models</title>
       <author><first>Alexandra</first><last>Chronopoulou</last></author>
-      <author><first>Matthew</first><last>Peters</last></author>
+      <author id="matthew-e-peters"><first>Matthew</first><last>Peters</last></author>
       <author><first>Jesse</first><last>Dodge</last></author>
       <pages>1336-1351</pages>
       <abstract>The remarkable success of large language models has been driven by dense models trained on massive unlabeled, unstructured corpora. These corpora typically contain text from diverse, heterogeneous sources, but information about the source of the text is rarely used during training. Transferring their knowledge to a target domain is typically done by continuing training in-domain. In this paper, we introduce a method to permit domain adaptation to many diverse domains using a computationally efficient adapter approach. Our method is based on the observation that textual domains are partially overlapping, and we represent domains as a hierarchical tree structure where each node in the tree is associated with a set of adapter weights. When combined with a frozen pretrained language model, this approach enables parameter sharing among related domains, while avoiding negative interference between unrelated ones. Experimental results with GPT-2 and a large fraction of the 100 most represented websites in C4 show across-the-board improvements in-domain. We additionally provide an inference time algorithm for a held-out domain and show that averaging over multiple paths through the tree enables further gains in generalization, while adding only a marginal cost to inference.</abstract>
@@ -1358,7 +1358,7 @@
       <author><first>Bertie</first><last>Vidgen</last></author>
       <author><first>Paul</first><last>Rottger</last></author>
       <author><first>Tristan</first><last>Thrush</last></author>
-      <author><first>Scott A.</first><last>Hale</last></author>
+      <author id="scott-a-hale"><first>Scott A.</first><last>Hale</last></author>
       <pages>1352-1368</pages>
       <abstract>Detecting online hate is a complex task, and low-performing models have harmful consequences when used for sensitive applications such as content moderation. Emoji-based hate is an emerging challenge for automated detection. We present HatemojiCheck, a test suite of 3,930 short-form statements that allows us to evaluate performance on hateful language expressed with emoji. Using the test suite, we expose weaknesses in existing hate detection models. To address these weaknesses, we create the HatemojiBuild dataset using a human-and-model-in-the-loop approach. Models built with these 5,912 adversarial examples perform substantially better at detecting emoji-based hate, while retaining strong performance on text-only hate. Both HatemojiCheck and HatemojiBuild are made publicly available.</abstract>
       <url hash="9484df21">2022.naacl-main.97</url>
@@ -1370,7 +1370,7 @@
       <title>On the Economics of Multilingual Few-shot Learning: Modeling the Cost-Performance Trade-offs of Machine Translated and Manual Data</title>
       <author><first>Kabir</first><last>Ahuja</last></author>
       <author><first>Monojit</first><last>Choudhury</last></author>
-      <author><first>Sandipan</first><last>Dandapat</last></author>
+      <author id="sandipan-dandapat"><first>Sandipan</first><last>Dandapat</last></author>
       <pages>1369-1384</pages>
       <abstract>Borrowing ideas from Production functions in micro-economics, in this paper we introduce a framework to systematically evaluate the performance and cost trade-offs between machine-translated and manually-created labelled data for task-specific fine-tuning of massively multilingual language models. We illustrate the effectiveness of our framework through a case-study on the TyDIQA-GoldP dataset. One of the interesting conclusion of the study is that if the cost of machine translation is greater than zero, the optimal performance at least cost is always achieved with at least some or only manually-created data. To our knowledge, this is the first attempt towards extending the concept of production functions to study data collection strategies for training multilingual models, and can serve as a valuable tool for other similar cost vs data trade-offs in NLP.</abstract>
       <url hash="67b74b01">2022.naacl-main.98</url>
@@ -1395,10 +1395,10 @@
       <author><first>Patrick</first><last>Fernandes</last></author>
       <author><first>António</first><last>Farinhas</last></author>
       <author><first>Ricardo</first><last>Rei</last></author>
-      <author><first>José G.</first><last>C. de Souza</last></author>
+      <author id="jose-g-c-de-souza"><first>José G.</first><last>C. de Souza</last></author>
       <author><first>Perez</first><last>Ogayo</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
-      <author><first>Andre</first><last>Martins</last></author>
+      <author id="andre-f-t-martins"><first>Andre</first><last>Martins</last></author>
       <pages>1396-1412</pages>
       <abstract>Despite the progress in machine translation quality estimation and evaluation in the last years, decoding in neural machine translation (NMT) is mostly oblivious to this and centers around finding the most probable translation according to the model (MAP decoding), approximated with beam search. In this paper, we bring together these two lines of research and propose <i>quality-aware decoding</i> for NMT, by leveraging recent breakthroughs in reference-free and reference-based MT evaluation through various inference methods like <tex-math>N</tex-math>-best reranking and minimum Bayes risk decoding. We perform an extensive comparison of various possible candidate generation and ranking methods across four datasets and two model classes and find that quality-aware decoding consistently outperforms MAP-based decoding according both to state-of-the-art automatic metrics (COMET and BLEURT) and to human assessments.</abstract>
       <url hash="08b1237d">2022.naacl-main.100</url>
@@ -1426,7 +1426,7 @@
       <author><first>Lingkai</first><last>Kong</last></author>
       <author><first>Jieyu</first><last>Zhang</last></author>
       <author><first>Rongzhi</first><last>Zhang</last></author>
-      <author><first>Chao</first><last>Zhang</last></author>
+      <author id="chao-zhang-tu"><first>Chao</first><last>Zhang</last></author>
       <pages>1422-1436</pages>
       <abstract>Although fine-tuning pre-trained language models (PLMs) renders strong performance in many NLP tasks, it relies on excessive labeled data. Recently, researchers have resorted to active fine-tuning for enhancing the label efficiency of PLM fine-tuning, but existing methods of this type usually ignore the potential of unlabeled data. We develop AcTune, a new framework that improves the label efficiency of active PLM fine-tuning by unleashing the power of unlabeled data via self-training. AcTune switches between data annotation and model self-training based on uncertainty: the unlabeled samples of high-uncertainty are selected for annotation, while the ones from low-uncertainty regions are used for model self-training. Additionally, we design (1) a region-aware sampling strategy to avoid redundant samples when querying annotations and (2) a momentum-based memory bank to dynamically aggregate the model’s pseudo labels to suppress label noise in self-training. Experiments on 6 text classification datasets show that AcTune outperforms the strongest active learning and self-training baselines and improves the label efficiency of PLM fine-tuning by 56.2% on average. Our implementation is available at <url>https://github.com/yueyu1030/actune</url>.</abstract>
       <url hash="49cd916b">2022.naacl-main.102</url>
@@ -1465,7 +1465,7 @@
       <title>Forecasting <fixed-case>COVID</fixed-case>-19 Caseloads Using Unsupervised Embedding Clusters of Social Media Posts</title>
       <author><first>Felix</first><last>Drinkall</last></author>
       <author><first>Stefan</first><last>Zohren</last></author>
-      <author><first>Janet</first><last>Pierrehumbert</last></author>
+      <author id="janet-pierrehumbert"><first>Janet</first><last>Pierrehumbert</last></author>
       <pages>1471-1484</pages>
       <abstract>We present a novel approach incorporating transformer-based language models into infectious disease modelling. Text-derived features are quantified by tracking high-density clusters of sentence-level representations of Reddit posts within specific US states’ COVID-19 subreddits. We benchmark these clustered embedding features against features extracted from other high-quality datasets. In a threshold-classification task, we show that they outperform all other feature types at predicting upward trend signals, a significant result for infectious disease modelling in areas where epidemiological data is unreliable. Subsequently, in a time-series forecasting task, we fully utilise the predictive power of the caseload and compare the relative strengths of using different supplementary datasets as covariate feature sets in a transformer-based time-series model.</abstract>
       <url hash="82ccf3e0">2022.naacl-main.105</url>
@@ -1478,7 +1478,7 @@
       <author><first>Rahul</first><last>Kumar</last></author>
       <author><first>Sandeep</first><last>Mathias</last></author>
       <author><first>Sriparna</first><last>Saha</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>1485-1495</pages>
       <abstract>Most research in the area of automatic essay grading (AEG) is geared towards scoring the essay <i>holistically</i> while there has also been little work done on scoring individual essay traits. In this paper, we describe a way to score essays using a multi-task learning (MTL) approach, where scoring the essay holistically is the primary task, and scoring the essay traits is the auxiliary task. We compare our results with a single-task learning (STL) approach, using both LSTMs and BiLSTMs. To find out which traits work best for different types of essays, we conduct ablation tests for each of the essay traits. We also report the runtime and number of training parameters for each system. We find that MTL-based BiLSTM system gives the best results for scoring the essay holistically, as well as performing well on scoring the essay traits. The MTL systems also give a speed-up of between <b>2.30</b> to <b>3.70</b> times the speed of the STL system, when it comes to scoring the essay and all the traits.</abstract>
       <url hash="10a6d923">2022.naacl-main.106</url>
@@ -1667,7 +1667,7 @@
       <author><first>Jing</first><last>Zhao</last></author>
       <author><first>Youzheng</first><last>Wu</last></author>
       <author><first>Xiaodong</first><last>He</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <pages>1655-1666</pages>
       <abstract>Machine reading comprehension (MRC) that requires discrete reasoning involving symbolic operations, e.g., addition, sorting, and counting, is a challenging task. According to this nature, semantic parsing-based methods predict interpretable but complex logical forms. However, logical form generation is nontrivial and even a little perturbation in a logical form will lead to wrong answers. To alleviate this issue, multi-predictor -based methods are proposed to directly predict different types of answers and achieve improvements. However, they ignore the utilization of symbolic operations and encounter a lack of reasoning ability and interpretability. To inherit the advantages of these two types of methods, we propose OPERA, an operation-pivoted discrete reasoning framework, where lightweight symbolic operations (compared with logical forms) as neural modules are utilized to facilitate the reasoning ability and interpretability. Specifically, operations are first selected and then softly executed to simulate the answer reasoning procedure. Extensive experiments on both DROP and RACENum datasets show the reasoning ability of OPERA. Moreover, further analysis verifies its interpretability.</abstract>
       <url hash="d21c4d72">2022.naacl-main.119</url>
@@ -1809,7 +1809,7 @@
     </paper>
     <paper id="129">
       <title>Non-Autoregressive Machine Translation: It’s Not as Fast as it Seems</title>
-      <author><first>Jindřich</first><last>Helcl</last></author>
+      <author id="jindrich-helcl"><first>Jindřich</first><last>Helcl</last></author>
       <author><first>Barry</first><last>Haddow</last></author>
       <author><first>Alexandra</first><last>Birch</last></author>
       <pages>1780-1790</pages>
@@ -1836,7 +1836,7 @@
       <title>Combining Humor and Sarcasm for Improving Political Parody Detection</title>
       <author><first>Xiao</first><last>Ao</last></author>
       <author><first>Danae</first><last>Sanchez Villegas</last></author>
-      <author><first>Daniel</first><last>Preotiuc-Pietro</last></author>
+      <author id="daniel-preotiuc-pietro"><first>Daniel</first><last>Preotiuc-Pietro</last></author>
       <author><first>Nikolaos</first><last>Aletras</last></author>
       <pages>1800-1807</pages>
       <abstract>Parody is a figurative device used for mimicking entities for comedic or critical purposes. Parody is intentionally humorous and often involves sarcasm. This paper explores jointly modelling these figurative tropes with the goal of improving performance of political parody detection in tweets. To this end, we present a multi-encoder model that combines three parallel encoders to enrich parody-specific representations with humor and sarcasm information. Experiments on a publicly available data set of political parody tweets demonstrate that our approach outperforms previous state-of-the-art methods.</abstract>
@@ -1903,7 +1903,7 @@
       <title>The Devil is in the Details: On the Pitfalls of Vocabulary Selection in Neural Machine Translation</title>
       <author><first>Tobias</first><last>Domhan</last></author>
       <author><first>Eva</first><last>Hasler</last></author>
-      <author><first>Ke</first><last>Tran</last></author>
+      <author id="ke-m-tran"><first>Ke</first><last>Tran</last></author>
       <author><first>Sony</first><last>Trenous</last></author>
       <author id="bill-byrne"><first>Bill</first><last>Byrne</last></author>
       <author><first>Felix</first><last>Hieber</last></author>
@@ -1936,7 +1936,7 @@
       <author><first>Kuan-Hao</first><last>Huang</last></author>
       <author><first>Elizabeth</first><last>Boschee</last></author>
       <author><first>Scott</first><last>Miller</last></author>
-      <author><first>Prem</first><last>Natarajan</last></author>
+      <author id="prem-natarajan"><first>Prem</first><last>Natarajan</last></author>
       <author><first>Kai-Wei</first><last>Chang</last></author>
       <author><first>Nanyun</first><last>Peng</last></author>
       <pages>1890-1908</pages>
@@ -2081,7 +2081,7 @@
       <title><fixed-case>CS</fixed-case>1<fixed-case>QA</fixed-case>: A Dataset for Assisting Code-based Question Answering in an Introductory Programming Course</title>
       <author><first>Changyoon</first><last>Lee</last></author>
       <author><first>Yeon</first><last>Seonwoo</last></author>
-      <author><first>Alice</first><last>Oh</last></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last></author>
       <pages>2026-2040</pages>
       <abstract>We introduce CS1QA, a dataset for code-based question answering in the programming education domain. CS1QA consists of 9,237 question-answer pairs gathered from chat logs in an introductory programming class using Python, and 17,698 unannotated chat data with code. Each question is accompanied with the student’s code, and the portion of the code relevant to answering the question. We carefully design the annotation process to construct CS1QA, and analyze the collected dataset in detail. The tasks for CS1QA are to predict the question type, the relevant code snippet given the question and the code and retrieving an answer from the annotated corpus. Results for the experiments on several baseline models are reported and thoroughly analyzed. The tasks for CS1QA challenge models to understand both the code and natural language. This unique dataset can be used as a benchmark for source code comprehension and question answering in the educational setting.</abstract>
       <url hash="ded73ba9">2022.naacl-main.148</url>
@@ -2095,7 +2095,7 @@
       <author><first>Kemal</first><last>Kurniawan</last></author>
       <author><first>Lea</first><last>Frermann</last></author>
       <author><first>Philip</first><last>Schulz</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>2041-2054</pages>
       <abstract>Providing technologies to communities or domains where training data is scarce or protected e.g., for privacy reasons, is becoming increasingly important. To that end, we generalise methods for unsupervised transfer from multiple input models for structured prediction. We show that the means of aggregating over the input models is critical, and that multiplying marginal probabilities of substructures to obtain high-probability structures for distant supervision is substantially better than taking the union of such structures over the input models, as done in prior work. Testing on 18 languages, we demonstrate that the method works in a cross-lingual setting, considering both dependency parsing and part-of-speech structured prediction problems. Our analyses show that the proposed method produces less noisy labels for the distant supervision.</abstract>
       <url hash="12355c86">2022.naacl-main.149</url>
@@ -2150,7 +2150,7 @@
       <title>Reference-free Summarization Evaluation via Semantic Correlation and Compression Ratio</title>
       <author><first>Yizhu</first><last>Liu</last></author>
       <author><first>Qi</first><last>Jia</last></author>
-      <author><first>Kenny</first><last>Zhu</last></author>
+      <author id="kenny-zhu"><first>Kenny</first><last>Zhu</last></author>
       <pages>2109-2115</pages>
       <abstract>A document can be summarized in a number of ways. Reference-based evaluation of summarization has been criticized for its inflexibility. The more sufficient the number of abstracts, the more accurate the evaluation results. However, it is difficult to collect sufficient reference summaries. In this paper, we propose a new automatic reference-free evaluation metric that compares semantic distribution between source document and summary by pretrained language models and considers summary compression ratio. The experiments show that this metric is more consistent with human evaluation in terms of coherence, consistency, relevance and fluency.</abstract>
       <url hash="f81bda6f">2022.naacl-main.153</url>
@@ -2274,7 +2274,7 @@
       <author><first>Yu Jin</first><last>Kim</last></author>
       <author><first>Beong-woo</first><last>Kwak</last></author>
       <author><first>Youngwook</first><last>Kim</last></author>
-      <author><first>Reinald Kim</first><last>Amplayo</last></author>
+      <author id="reinald-kim-amplayo"><first>Reinald Kim</first><last>Amplayo</last></author>
       <author><first>Seung-won</first><last>Hwang</last></author>
       <author><first>Jinyoung</first><last>Yeo</last></author>
       <pages>2244-2257</pages>
@@ -2424,7 +2424,7 @@
       <author><first>Saichethan</first><last>Reddy</last></author>
       <author><first>Anindya</first><last>Das</last></author>
       <author><first>Sriparna</first><last>Saha</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>2436-2449</pages>
       <abstract>Mental Health Disorders continue plaguing humans worldwide. Aggravating this situation is the severe shortage of qualified and competent mental health professionals (MHPs), which underlines the need for developing Virtual Assistants (VAs) that can <i>assist</i> MHPs. The data+ML for automation can come from platforms that allow visiting and posting messages in peer-to-peer anonymous manner for sharing their experiences (frequently stigmatized) and seeking support. In this paper, we propose a VA that can act as the first point of contact and comfort for mental health patients. We curate a dataset, Motivational VA: MotiVAte comprising of 7k dyadic conversations collected from a peer-to-peer support platform. The system employs two mechanisms: (i) Mental Illness Classification: an attention based BERT classifier that outputs the mental disorder category out of the 4 categories, viz., Major Depressive Disorder (MDD), Anxiety, Obsessive Compulsive Disorder (OCD) and Post-traumatic Stress Disorder (PTSD), based on the input ongoing dialog between the support seeker and the VA; and (ii) Mental Illness Conditioned Motivational Dialogue Generation (MI-MDG): a sentiment driven Reinforcement Learning (RL) based motivational response generator. The empirical evaluation demonstrates the system capability by way of outperforming several baselines.</abstract>
       <url hash="87513f45">2022.naacl-main.174</url>
@@ -2434,7 +2434,7 @@
     </paper>
     <paper id="175">
       <title><fixed-case>S</fixed-case>ue<fixed-case>N</fixed-case>es: A Weakly Supervised Approach to Evaluating Single-Document Summarization via Negative Sampling</title>
-      <author><first>Forrest</first><last>Bao</last></author>
+      <author id="forrest-bao"><first>Forrest</first><last>Bao</last></author>
       <author><first>Ge</first><last>Luo</last></author>
       <author><first>Hebi</first><last>Li</last></author>
       <author><first>Minghui</first><last>Qiu</last></author>
@@ -2493,11 +2493,11 @@
     </paper>
     <paper id="180">
       <title><fixed-case>A</fixed-case>nswer<fixed-case>S</fixed-case>umm: A Manually-Curated Dataset and Pipeline for Answer Summarization</title>
-      <author><first>Alexander</first><last>Fabbri</last></author>
+      <author id="alexander-richard-fabbri"><first>Alexander</first><last>Fabbri</last></author>
       <author><first>Xiaojian</first><last>Wu</last></author>
       <author><first>Srini</first><last>Iyer</last></author>
       <author><first>Haoran</first><last>Li</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>2508-2520</pages>
       <abstract>Community Question Answering (CQA) fora such as Stack Overflow and Yahoo! Answers contain a rich resource of answers to a wide range of community-based questions. Each question thread can receive a large number of answers with different perspectives. One goal of answer summarization is to produce a summary that reflects the range of answer perspectives. A major obstacle for this task is the absence of a dataset to provide supervision for producing such summaries. Recent works propose heuristics to create such data, but these are often noisy and do not cover all answer perspectives present. This work introduces a novel dataset of 4,631 CQA threads for answer summarization curated by professional linguists. Our pipeline gathers annotations for all subtasks of answer summarization, including relevant answer sentence selection, grouping these sentences based on perspectives, summarizing each perspective, and producing an overall summary. We analyze and benchmark state-of-the-art models on these subtasks and introduce a novel unsupervised approach for multi-perspective data augmentation that boosts summarization performance according to automatic evaluation. Finally, we propose reinforcement learning rewards to improve factual consistency and answer coverage and analyze areas for improvement.</abstract>
       <url hash="fca315ec">2022.naacl-main.180</url>
@@ -2584,7 +2584,7 @@
     </paper>
     <paper id="187">
       <title><fixed-case>QAF</fixed-case>act<fixed-case>E</fixed-case>val: Improved <fixed-case>QA</fixed-case>-Based Factual Consistency Evaluation for Summarization</title>
-      <author><first>Alexander</first><last>Fabbri</last></author>
+      <author id="alexander-richard-fabbri"><first>Alexander</first><last>Fabbri</last></author>
       <author><first>Chien-Sheng</first><last>Wu</last></author>
       <author><first>Wenhao</first><last>Liu</last></author>
       <author><first>Caiming</first><last>Xiong</last></author>
@@ -2649,7 +2649,7 @@
       <title>Necessity and Sufficiency for Explaining Text Classifiers: A Case Study in Hate Speech Detection</title>
       <author><first>Esma</first><last>Balkir</last></author>
       <author><first>Isar</first><last>Nejadgholi</last></author>
-      <author><first>Kathleen</first><last>Fraser</last></author>
+      <author id="kathleen-c-fraser"><first>Kathleen</first><last>Fraser</last></author>
       <author><first>Svetlana</first><last>Kiritchenko</last></author>
       <pages>2672-2686</pages>
       <abstract>We present a novel feature attribution method for explaining text classifiers, and analyze it in the context of hate speech detection. Although feature attribution models usually provide a single importance score for each token, we instead provide two complementary and theoretically-grounded scores – necessity and sufficiency – resulting in more informative explanations. We propose a transparent method that calculates these values by generating explicit perturbations of the input text, allowing the importance scores themselves to be explainable. We employ our method to explain the predictions of different hate speech detection models on the same set of curated examples from a test suite, and show that different values of necessity and sufficiency for identity terms correspond to different kinds of false positive errors, exposing sources of classifier bias against marginalized groups.</abstract>
@@ -2676,12 +2676,12 @@
     </paper>
     <paper id="194">
       <title><fixed-case>R</fixed-case>e2<fixed-case>G</fixed-case>: Retrieve, Rerank, Generate</title>
-      <author><first>Michael</first><last>Glass</last></author>
+      <author id="michael-glass"><first>Michael</first><last>Glass</last></author>
       <author><first>Gaetano</first><last>Rossiello</last></author>
       <author><first>Md Faisal Mahbub</first><last>Chowdhury</last></author>
       <author><first>Ankita</first><last>Naik</last></author>
       <author><first>Pengshan</first><last>Cai</last></author>
-      <author><first>Alfio</first><last>Gliozzo</last></author>
+      <author id="alfio-gliozzo"><first>Alfio</first><last>Gliozzo</last></author>
       <pages>2701-2715</pages>
       <abstract>As demonstrated by GPT-3 and T5, transformers grow in capability as parameter spaces become larger and larger. However, for tasks that require a large amount of knowledge, non-parametric memory allows models to grow dramatically with a sub-linear increase in computational cost and GPU memory requirements. Recent models such as RAG and REALM have introduced retrieval into conditional generation. These models incorporate neural initial retrieval from a corpus of passages. We build on this line of research, proposing Re2G, which combines both neural initial retrieval and reranking into a BART-based sequence-to-sequence generation. Our reranking approach also permits merging retrieval results from sources with incomparable scores, enabling an ensemble of BM25 and neural initial retrieval. To train our system end-to-end, we introduce a novel variation of knowledge distillation to train the initial retrieval, reranker and generation using only ground truth on the target sequence output. We find large gains in four diverse tasks: zero-shot slot filling, question answering, fact checking and dialog, with relative gains of 9% to 34% over the previous state-of-the-art on the KILT leaderboard. We make our code available as open source.</abstract>
       <url hash="b2321d67">2022.naacl-main.194</url>
@@ -2723,7 +2723,7 @@
       <author><first>Masahiro</first><last>Kaneko</last></author>
       <author><first>Aizhan</first><last>Imankulova</last></author>
       <author><first>Danushka</first><last>Bollegala</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <pages>2740-2750</pages>
       <abstract>Masked Language Models (MLMs) pre-trained by predicting masked tokens on large corpora have been used successfully in natural language processing tasks for a variety of languages. Unfortunately, it was reported that MLMs also learn discriminative biases regarding attributes such as gender and race. Because most studies have focused on MLMs in English, the bias of MLMs in other languages has rarely been investigated. Manual annotation of evaluation data for languages other than English has been challenging due to the cost and difficulty in recruiting annotators. Moreover, the existing bias evaluation methods require the stereotypical sentence pairs consisting of the same context with attribute words (e.g. He/She is a nurse).We propose Multilingual Bias Evaluation (MBE) score, to evaluate bias in various languages using only English attribute word lists and parallel corpora between the target language and English without requiring manually annotated data. We evaluated MLMs in eight languages using the MBE and confirmed that gender-related biases are encoded in MLMs for all those languages. We manually created datasets for gender bias in Japanese and Russian to evaluate the validity of the MBE.The results show that the bias scores reported by the MBE significantly correlates with that computed from the above manually created datasets and the existing English datasets for gender bias.</abstract>
       <url hash="2c3d20cc">2022.naacl-main.197</url>
@@ -2748,7 +2748,7 @@
       <title>Falsesum: Generating Document-level <fixed-case>NLI</fixed-case> Examples for Recognizing Factual Inconsistency in Summarization</title>
       <author><first>Prasetya</first><last>Utama</last></author>
       <author><first>Joshua</first><last>Bambrick</last></author>
-      <author><first>Nafise</first><last>Moosavi</last></author>
+      <author id="nafise-sadat-moosavi"><first>Nafise</first><last>Moosavi</last></author>
       <author><first>Iryna</first><last>Gurevych</last></author>
       <pages>2763-2776</pages>
       <abstract>Neural abstractive summarization models are prone to generate summaries that are factually inconsistent with their source documents. Previous work has introduced the task of recognizing such factual inconsistency as a downstream application of natural language inference (NLI). However, state-of-the-art NLI models perform poorly in this context due to their inability to generalize to the target task. In this work, we show that NLI models can be effective for this task when the training data is augmented with high-quality task-oriented examples. We introduce Falsesum, a data generation pipeline leveraging a controllable text generation model to perturb human-annotated summaries, introducing varying types of factual inconsistencies. Unlike previously introduced document-level NLI datasets, our generated dataset contains examples that are diverse and inconsistent yet plausible. We show that models trained on a Falsesum-augmented NLI dataset improve the state-of-the-art performance across four benchmarks for detecting factual inconsistency in summarization.</abstract>
@@ -2762,7 +2762,7 @@
       <author><first>Besnik</first><last>Fetahu</last></author>
       <author><first>Anjie</first><last>Fang</last></author>
       <author><first>Oleg</first><last>Rokhlenko</last></author>
-      <author><first>Shervin</first><last>Malmasi</last></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></author>
       <pages>2777-2790</pages>
       <abstract>Named entity recognition (NER) in a real-world setting remains challenging and is impacted by factors like text genre, corpus quality, and data availability. NER models trained on CoNLL do not transfer well to other domains, even within the same language. This is especially the case for multi-lingual models when applied to low-resource languages, and is mainly due to missing entity information. We propose an approach that with limited effort and data, addresses the NER knowledge gap across languages and domains. Our novel approach uses a token-level gating layer to augment pre-trained multilingual transformers with gazetteers containing named entities (NE) from a target language or domain. This approach provides the flexibility to jointly integrate both textual and gazetteer information dynamically: entity knowledge from gazetteers is used only when a token’s textual representation is insufficient for the NER task. Evaluation on several languages and domains demonstrates: (i) a high mismatch of reported NER performance on CoNLL vs. domain specific datasets, (ii) gazetteers significantly improve NER performance across languages and domains, and (iii) gazetteers can be flexibly incorporated to guide knowledge transfer. On cross-lingual transfer we achieve an improvement over the baseline with F1=+17.6%, and with F1=+21.3% for cross-domain transfer.</abstract>
       <url hash="c7f56069">2022.naacl-main.200</url>
@@ -2774,7 +2774,7 @@
       <title><fixed-case>M</fixed-case>eta<fixed-case>ICL</fixed-case>: Learning to Learn In Context</title>
       <author><first>Sewon</first><last>Min</last></author>
       <author><first>Mike</first><last>Lewis</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last></author>
       <pages>2791-2809</pages>
       <abstract>We introduce MetaICL (Meta-training for In-Context Learning), a new meta-training framework for few-shot learning where a pretrained language model is tuned to do in-context learning on a large set of training tasks. This meta-training enables the model to more effectively learn a new task in context at test time, by simply conditioning on a few training examples with no parameter updates or task-specific templates. We experiment on a large, diverse collection of tasks consisting of 142 NLP datasets including classification, question answering, natural language inference, paraphrase detection and more, across seven different meta-training/target splits. MetaICL outperforms a range of baselines including in-context learning without meta-training and multi-task learning followed by zero-shot transfer. We find that the gains are particularly significant for target tasks that have domain shifts from the meta-training tasks, and that using a diverse set of the meta-training tasks is key to improvements. We also show that MetaICL approaches (and sometimes beats) the performance of models fully finetuned on the target task training data, and outperforms much bigger models with nearly 8x parameters.</abstract>
@@ -2791,7 +2791,7 @@
       <author><first>Mohit</first><last>Bansal</last></author>
       <author><first>Heng</first><last>Ji</last></author>
       <author id="yang-liu-icsi"><first>Yang</first><last>Liu</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></author>
       <pages>2810-2823</pages>
       <abstract>Providing conversation models with background knowledge has been shown to make open-domain dialogues more informative and engaging. Existing models treat knowledge selection as a sentence ranking or classification problem where each sentence is handled individually, ignoring the internal semantic connection between sentences. In this work, we propose to automatically convert the background knowledge documents into document semantic graphs and then perform knowledge selection over such graphs. Our document semantic graphs preserve sentence-level information through the use of sentence nodes and provide concept connections between sentences. We apply multi-task learning to perform sentence-level knowledge selection and concept-level knowledge selection, showing that it improves sentence-level selection. Our experiments show that our semantic graph-based knowledge selection improves over sentence selection baselines for both the knowledge selection task and the end-to-end response generation task on HollE and improves generalization on unseen topics in WoW.</abstract>
       <url hash="c72a62f4">2022.naacl-main.202</url>
@@ -2883,7 +2883,7 @@
       <title>Learning to Borrow– Relation Representation for Without-Mention Entity-Pairs for Knowledge Graph Completion</title>
       <author><first>Huda</first><last>Hakami</last></author>
       <author><first>Mona</first><last>Hakami</last></author>
-      <author><first>Angrosh</first><last>Mandya</last></author>
+      <author id="angrosh-mandya"><first>Angrosh</first><last>Mandya</last></author>
       <author><first>Danushka</first><last>Bollegala</last></author>
       <pages>2887-2898</pages>
       <abstract>Prior work on integrating text corpora with knowledge graphs (KGs) to improve Knowledge Graph Embedding (KGE) have obtained good performance for entities that co-occur in sentences in text corpora. Such sentences (textual mentions of entity-pairs) are represented as Lexicalised Dependency Paths (LDPs) between two entities. However, it is not possible to represent relations between entities that do not co-occur in a single sentence using LDPs. In this paper, we propose and evaluate several methods to address this problem, where we <i>borrow</i> LDPs from the entity pairs that co-occur in sentences in the corpus (i.e. <i>with mentions</i> entity pairs) to represent entity pairs that do <i>not</i> co-occur in any sentence in the corpus (i.e. <i>without mention</i> entity pairs). We propose a supervised borrowing method, <i>SuperBorrow</i>, that learns to score the suitability of an LDP to represent a without-mentions entity pair using pre-trained entity embeddings and contextualised LDP representations. Experimental results show that SuperBorrow improves the link prediction performance of multiple widely-used prior KGE methods such as TransE, DistMult, ComplEx and RotatE.</abstract>
@@ -2921,7 +2921,7 @@
       <author><first>Wang</first><last>Xu</last></author>
       <author><first>Kehai</first><last>Chen</last></author>
       <author><first>Lili</first><last>Mou</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <pages>2920-2929</pages>
       <abstract>Document-level relation extraction (DocRE) aims to determine the relation between two entities from a document of multiple sentences. Recent studies typically represent the entire document by sequence- or graph-based models to predict the relations of all entity pairs. However, we find that such a model is not robust and exhibits bizarre behaviors: it predicts correctly when an entire test document is fed as input, but errs when non-evidence sentences are removed. To this end, we propose a Sentence Importance Estimation and Focusing (SIEF) framework for DocRE, where we design a sentence importance score and a sentence focusing loss, encouraging DocRE models to focus on evidence sentences. Experimental results on two domains show that our SIEF not only improves overall performance, but also makes DocRE models more robust. Moreover, SIEF is a general framework, shown to be effective when combined with a variety of base DocRE models.</abstract>
       <url hash="416be969">2022.naacl-main.212</url>
@@ -2933,7 +2933,7 @@
       <title>Are All the Datasets in Benchmark Necessary? A Pilot Study of Dataset Evaluation for Text Classification</title>
       <author><first>Yang</first><last>Xiao</last></author>
       <author><first>Jinlan</first><last>Fu</last></author>
-      <author><first>See-Kiong</first><last>Ng</last></author>
+      <author id="see-kiong-ng"><first>See-Kiong</first><last>Ng</last></author>
       <author><first>Pengfei</first><last>Liu</last></author>
       <pages>2930-2941</pages>
       <abstract>In this paper, we ask the research question of whether all the datasets in the benchmark are necessary. We approach this by first characterizing the distinguishability of datasets when comparing different systems. Experiments on 9 datasets and 36 systems show that several existing benchmark datasets contribute little to discriminating top-scoring systems, while those less used datasets exhibit impressive discriminative power. We further, taking the text classification task as a case study, investigate the possibility of predicting dataset discrimination based on its properties (e.g., average sentence length). Our preliminary experiments promisingly show that given a sufficient number of training experimental records, a meaningful predictor can be learned to estimate dataset discrimination over unseen datasets. We released all datasets with features explored in this work on DataLab.</abstract>
@@ -3060,8 +3060,8 @@
     </paper>
     <paper id="223">
       <title>A Few Thousand Translations Go a Long Way! Leveraging Pre-trained Models for <fixed-case>A</fixed-case>frican News Translation</title>
-      <author><first>David Ifeoluwa</first><last>Adelani</last></author>
-      <author><first>Jesujoba Oluwadara</first><last>Alabi</last></author>
+      <author id="david-ifeoluwa-adelani"><first>David Ifeoluwa</first><last>Adelani</last></author>
+      <author id="jesujoba-alabi"><first>Jesujoba Oluwadara</first><last>Alabi</last></author>
       <author><first>Angela</first><last>Fan</last></author>
       <author><first>Julia</first><last>Kreutzer</last></author>
       <author><first>Xiaoyu</first><last>Shen</last></author>
@@ -3324,7 +3324,7 @@
       <author><first>Ikuya</first><last>Yamada</last></author>
       <author><first>Koki</first><last>Washio</last></author>
       <author><first>Hiroyuki</first><last>Shindo</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>3264-3271</pages>
       <abstract>We propose a global entity disambiguation (ED) model based on BERT. To capture global contextual information for ED, our model treats not only words but also entities as input tokens, and solves the task by sequentially resolving mentions to their referent entities and using resolved entities as inputs at each step. We train the model using a large entity-annotated corpus obtained from Wikipedia. We achieve new state-of-the-art results on five standard ED datasets: AIDA-CoNLL, MSNBC, AQUAINT, ACE2004, and WNED-WIKI. The source code and model checkpoint are available at <url>https://github.com/studio-ousia/luke</url>.</abstract>
       <url hash="4e2e4869">2022.naacl-main.238</url>
@@ -3357,7 +3357,7 @@
       <author><first>Xinyu</first><last>Zhang</last></author>
       <author><first>Hao</first><last>Jiang</last></author>
       <author><first>Zhao</first><last>Cao</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <author><first>Xipeng</first><last>Qiu</last></author>
       <pages>3288-3303</pages>
       <abstract>Supersized pre-trained language models have pushed the accuracy of various natural language processing (NLP) tasks to a new state-of-the-art (SOTA). Rather than pursuing the reachless SOTA accuracy, more and more researchers start paying attention to model efficiency and usability. Different from accuracy, the metric for efficiency varies across different studies, making them hard to be fairly compared. To that end, this work presents ELUE (Efficient Language Understanding Evaluation), a standard evaluation, and a public leaderboard for efficient NLP models. ELUE is dedicated to depicting the Pareto Frontier for various language understanding tasks, such that it can tell whether and how much a method achieves Pareto improvement. Along with the benchmark, we also release a strong baseline, ElasticBERT, which allows BERT to exit at any layer in both static and dynamic ways. We demonstrate the ElasticBERT, despite its simplicity, outperforms or performs on par with SOTA compressed and early exiting models. With ElasticBERT, the proposed ELUE has a strong Pareto Frontier and makes a better evaluation for efficient NLP models.</abstract>
@@ -3449,7 +3449,7 @@
       <author><first>GuanYu</first><last>Wu</last></author>
       <author><first>Aiwei</first><last>Liu</last></author>
       <author><first>Lijie</first><last>Wen</last></author>
-      <author><first>Philip</first><last>Yu</last></author>
+      <author id="philip-s-yu"><first>Philip</first><last>Yu</last></author>
       <pages>3362-3376</pages>
       <abstract>The explosion of misinformation spreading in the media ecosystem urges for automated fact-checking. While misinformation spans both geographic and linguistic boundaries, most work in the field has focused on English. Datasets and tools available in other languages, such as Chinese, are limited. In order to bridge this gap, we construct CHEF, the first CHinese Evidence-based Fact-checking dataset of 10K real-world claims. The dataset covers multiple domains, ranging from politics to public health, and provides annotated evidence retrieved from the Internet. Further, we develop established baselines and a novel approach that is able to model the evidence retrieval as a latent variable, allowing jointly training with the veracity prediction model in an end-to-end fashion. Extensive experiments show that CHEF will provide a challenging testbed for the development of fact-checking systems designed to retrieve and reason over non-English claims.</abstract>
       <url hash="5d11e91a">2022.naacl-main.246</url>
@@ -3460,7 +3460,7 @@
     <paper id="247">
       <title><fixed-case>VGNMN</fixed-case>: Video-grounded Neural Module Networks for Video-Grounded Dialogue Systems</title>
       <author><first>Hung</first><last>Le</last></author>
-      <author><first>Nancy</first><last>Chen</last></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last></author>
       <author><first>Steven</first><last>Hoi</last></author>
       <pages>3377-3393</pages>
       <abstract>Neural module networks (NMN) have achieved success in image-grounded tasks such as Visual Question Answering (VQA) on synthetic images. However, very limited work on NMN has been studied in the video-grounded dialogue tasks. These tasks extend the complexity of traditional visual tasks with the additional visual temporal variance and language cross-turn dependencies. Motivated by recent NMN approaches on image-grounded tasks, we introduce Video-grounded Neural Module Network (VGNMN) to model the information retrieval process in video-grounded language tasks as a pipeline of neural modules. VGNMN first decomposes all language components in dialogues to explicitly resolve any entity references and detect corresponding action-based inputs from the question. The detected entities and actions are used as parameters to instantiate neural module networks and extract visual cues from the video. Our experiments show that VGNMN can achieve promising performance on a challenging video-grounded dialogue benchmark as well as a video QA benchmark.</abstract>
@@ -3472,7 +3472,7 @@
     <paper id="248">
       <title>Multimodal Dialogue State Tracking</title>
       <author><first>Hung</first><last>Le</last></author>
-      <author><first>Nancy</first><last>Chen</last></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last></author>
       <author><first>Steven</first><last>Hoi</last></author>
       <pages>3394-3415</pages>
       <abstract>Designed for tracking user goals in dialogues, a dialogue state tracker is an essential component in a dialogue system. However, the research of dialogue state tracking has largely been limited to unimodality, in which slots and slot values are limited by knowledge domains (e.g. restaurant domain with slots of restaurant name and price range) and are defined by specific database schema. In this paper, we propose to extend the definition of dialogue state tracking to multimodality. Specifically, we introduce a novel dialogue state tracking task to track the information of visual objects that are mentioned in video-grounded dialogues. Each new dialogue utterance may introduce a new video segment, new visual objects, or new object attributes and a state tracker is required to update these information slots accordingly. We created a new synthetic benchmark and designed a novel baseline, Video-Dialogue Transformer Network (VDTN), for this task. VDTN combines both object-level features and segment-level features and learns contextual dependencies between videos and dialogues to generate multimodal dialogue states. We optimized VDTN for a state generation task as well as a self-supervised video understanding task which recovers video segment or object representations. Finally, we trained VDTN to use the decoded states in a response prediction task. Together with comprehensive ablation and qualitative analysis, we discovered interesting insights towards building more capable multimodal dialogue systems.</abstract>
@@ -3551,7 +3551,7 @@
       <author><first>Jacob</first><last>Morrison</last></author>
       <author><first>Ronan</first><last>Le Bras</last></author>
       <author><first>Yejin</first><last>Choi</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>3464-3478</pages>
       <abstract>We establish THumB, a rubric-based human evaluation protocol for image captioning models. Our scoring rubrics and their definitions are carefully developed based on machine- and human-generated captions on the MSCOCO dataset. Each caption is evaluated along two main dimensions in a tradeoff (precision and recall) as well as other aspects that measure the text quality (fluency, conciseness, and inclusive language). Our evaluations demonstrate several critical problems of the current evaluation practice. Human-generated captions show substantially higher quality than machine-generated ones, especially in coverage of salient information (i.e., recall), while most automatic metrics say the opposite. Our rubric-based results reveal that CLIPScore, a recent metric that uses image features, better correlates with human judgments than conventional text-only metrics because it is more sensitive to recall. We hope that this work will promote a more transparent evaluation protocol for image captioning and its automatic metrics.</abstract>
       <url hash="d248880d">2022.naacl-main.254</url>
@@ -3583,9 +3583,9 @@
       <author><first>Tim</first><last>O’Gorman</last></author>
       <author><first>Young-Suk</first><last>Lee</last></author>
       <author><first>Jeffrey</first><last>Flanigan</last></author>
-      <author><first>Ramón</first><last>Astudillo</last></author>
-      <author><first>Radu</first><last>Florian</last></author>
-      <author><first>Salim</first><last>Roukos</last></author>
+      <author id="ramon-fernandez-astudillo"><first>Ramón</first><last>Astudillo</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
       <author><first>Nathan</first><last>Schneider</last></author>
       <pages>3496-3505</pages>
       <abstract>Despite extensive research on parsing of English sentences into Abstract Meaning Representation (AMR) graphs, which are compared to gold graphs via the Smatch metric, full-document parsing into a unified graph representation lacks well-defined representation and evaluation. Taking advantage of a super-sentential level of coreference annotation from previous work, we introduce a simple algorithm for deriving a unified graph representation, avoiding the pitfalls of information loss from over-merging and lack of coherence from under merging. Next, we describe improvements to the Smatch metric to make it tractable for comparing document-level graphs and use it to re-evaluate the best published document-level AMR parser. We also present a pipeline approach combining the top-performing AMR parser and coreference resolution systems, providing a strong baseline for future research.</abstract>
@@ -3598,9 +3598,9 @@
       <title>Learning to Transfer Prompts for Text Generation</title>
       <author><first>Junyi</first><last>Li</last></author>
       <author><first>Tianyi</first><last>Tang</last></author>
-      <author><first>Jian-Yun</first><last>Nie</last></author>
+      <author id="jian-yun-nie"><first>Jian-Yun</first><last>Nie</last></author>
       <author><first>Ji-Rong</first><last>Wen</last></author>
-      <author><first>Xin</first><last>Zhao</last></author>
+      <author id="wayne-xin-zhao"><first>Xin</first><last>Zhao</last></author>
       <pages>3506-3518</pages>
       <abstract>Pretrained language models (PLMs) have made remarkable progress in text generation tasks via fine-tuning. While, it is challenging to fine-tune PLMs in a data-scarce situation. Therefore, it is non-trivial to develop a general and lightweight model that can adapt to various text generation tasks based on PLMs. To fulfill this purpose, the recent prompt-based learning offers a potential solution. In this paper, we improve this technique and propose a novel prompt-based method (PTG) for text generation in a transferable setting. First, PTG learns a set of source prompts for various source generation tasks and then transfers these prompts as target prompts to perform target generation tasks. To consider both task- and instance-level information, we design an adaptive attention mechanism to derive the target prompts. For each data instance, PTG learns a specific target prompt by attending to highly relevant source prompts. In extensive experiments, PTG yields competitive or better results than fine-tuning methods. We release our source prompts as an open resource, where users can add or reuse them to improve new text generation tasks for future research. Code and data can be available at <url>https://github.com/RUCAIBox/Transfer-Prompts-for-Text-Generation</url>.</abstract>
       <url hash="e3609a45">2022.naacl-main.257</url>
@@ -3617,7 +3617,7 @@
       <author><first>Zhuohao</first><last>Yu</last></author>
       <author><first>Zhipeng</first><last>Chen</last></author>
       <author><first>Jingyuan</first><last>Wang</last></author>
-      <author><first>Xin</first><last>Zhao</last></author>
+      <author id="wayne-xin-zhao"><first>Xin</first><last>Zhao</last></author>
       <author><first>Ji-Rong</first><last>Wen</last></author>
       <pages>3519-3539</pages>
       <abstract>Nowadays, pretrained language models (PLMs) have dominated the majority of NLP tasks. While, little research has been conducted on systematically evaluating the language abilities of PLMs. In this paper, we present a large-scale empirical study on general language ability evaluation of PLMs (ElitePLM). In our study, we design four evaluation dimensions, memory, comprehension, reasoning, and composition, to measure ten widely-used PLMs within five categories. Our empirical results demonstrate that: (1) PLMs with varying training objectives and strategies are good at different ability tests; (2) fine-tuning PLMs in downstream tasks is usually sensitive to the data size and distribution; (3) PLMs have excellent transferability between similar tasks. Moreover, the prediction results of PLMs in our experiments are released as an open resource for more deep and detailed analysis on the language abilities of PLMs. This paper can guide the future work to select, apply, and design PLMs for specific tasks. We have made all the details of experiments publicly available at <url>https://github.com/RUCAIBox/ElitePLM</url>.</abstract>
@@ -3633,9 +3633,9 @@
       <author><first>Ronan</first><last>Le Bras</last></author>
       <author><first>Lavinia</first><last>Dunagan</last></author>
       <author><first>Jacob</first><last>Morrison</last></author>
-      <author><first>Alexander</first><last>Fabbri</last></author>
+      <author id="alexander-richard-fabbri"><first>Alexander</first><last>Fabbri</last></author>
       <author><first>Yejin</first><last>Choi</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>3540-3557</pages>
       <abstract>Natural language processing researchers have identified limitations of evaluation methodology for generation tasks, with new questions raised about the validity of automatic metrics and of crowdworker judgments. Meanwhile, efforts to improve generation models tend to depend on simple n-gram overlap metrics (e.g., BLEU, ROUGE). We argue that new advances on models and metrics should each more directly benefit and inform the other. We therefore propose a generalization of leaderboards, bidimensional leaderboards (Billboards), that simultaneously tracks progress in language generation models and metrics for their evaluation. Unlike conventional unidimensional leaderboards that sort submitted systems by predetermined metrics, a Billboard accepts both generators and evaluation metrics as competing entries. A Billboard automatically creates an ensemble metric that selects and linearly combines a few metrics based on a global analysis across generators. Further, metrics are ranked based on their correlation with human judgments. We release four Billboards for machine translation, summarization, and image captioning. We demonstrate that a linear ensemble of a few diverse metrics sometimes substantially outperforms existing metrics in isolation. Our mixed-effects model analysis shows that most automatic metrics, especially the reference-based ones, overrate machine over human generation, demonstrating the importance of updating metrics as generation models become stronger (and perhaps more similar to humans) in the future.</abstract>
       <url hash="07e02193">2022.naacl-main.259</url>
@@ -3687,7 +3687,7 @@
     </paper>
     <paper id="263">
       <title>Benchmarking Intersectional Biases in <fixed-case>NLP</fixed-case></title>
-      <author><first>John</first><last>Lalor</last></author>
+      <author id="john-p-lalor"><first>John</first><last>Lalor</last></author>
       <author><first>Yi</first><last>Yang</last></author>
       <author><first>Kendall</first><last>Smith</last></author>
       <author><first>Nicole</first><last>Forsgren</last></author>
@@ -3703,7 +3703,7 @@
     <paper id="264">
       <title>When is <fixed-case>BERT</fixed-case> Multilingual? Isolating Crucial Ingredients for Cross-lingual Transfer</title>
       <author><first>Ameet</first><last>Deshpande</last></author>
-      <author><first>Partha</first><last>Talukdar</last></author>
+      <author id="partha-talukdar"><first>Partha</first><last>Talukdar</last></author>
       <author><first>Karthik</first><last>Narasimhan</last></author>
       <pages>3610-3623</pages>
       <abstract>While recent work on multilingual language models has demonstrated their capacity for cross-lingual zero-shot transfer on downstream tasks, there is a lack of consensus in the community as to what shared properties between languages enable such transfer. Analyses involving pairs of natural languages are often inconclusive and contradictory since languages simultaneously differ in many linguistic aspects. In this paper, we perform a large-scale empirical study to isolate the effects of various linguistic properties by measuring zero-shot transfer between four diverse natural languages and their counterparts constructed by modifying aspects such as the script, word order, and syntax. Among other things, our experiments show that the absence of sub-word overlap significantly affects zero-shot transfer when languages differ in their word order, and there is a strong correlation between transfer performance and word embedding alignment between languages (e.g., <tex-math>\rho_s=0.94</tex-math> on the task of NLI). Our results call for focus in multilingual models on explicitly improving word embedding alignment between languages rather than relying on its implicit emergence.</abstract>
@@ -3716,7 +3716,7 @@
       <title>How Conservative are Language Models? Adapting to the Introduction of Gender-Neutral Pronouns</title>
       <author><first>Stephanie</first><last>Brandl</last></author>
       <author><first>Ruixiang</first><last>Cui</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>3624-3630</pages>
       <abstract>Gender-neutral pronouns have recently been introduced in many languages to a) include non-binary people and b) as a generic singular. Recent results from psycholinguistics suggest that gender-neutral pronouns (in Swedish) are not associated with human processing difficulties. This, we show, is in sharp contrast with automated processing. We show that gender-neutral pronouns in Danish, English, and Swedish are associated with higher perplexity, more dispersed attention patterns, and worse downstream performance. We argue that such conservativity in language models may limit widespread adoption of gender-neutral pronouns and must therefore be resolved.</abstract>
       <url hash="277075d4">2022.naacl-main.265</url>
@@ -3759,7 +3759,7 @@
       <title>Learning the Ordering of Coordinate Compounds and Elaborate Expressions in <fixed-case>H</fixed-case>mong, <fixed-case>L</fixed-case>ahu, and <fixed-case>C</fixed-case>hinese</title>
       <author><first>Chenxuan</first><last>Cui</last></author>
       <author><first>Katherine J.</first><last>Zhang</last></author>
-      <author><first>David</first><last>Mortensen</last></author>
+      <author id="david-r-mortensen"><first>David</first><last>Mortensen</last></author>
       <pages>3656-3669</pages>
       <abstract>Coordinate compounds (CCs) and elaborate expressions (EEs) are coordinate constructions common in languages of East and Southeast Asia. Mortensen (2006) claims that (1) the linear ordering of EEs and CCs in Hmong, Lahu, and Chinese can be predicted via phonological hierarchies and (2) that these phonological hierarchies lack a clear phonetic rationale. These claims are significant because morphosyntax has often been seen as in a feed-forward relationship with phonology, and phonological generalizations have often been assumed to be phonetically “natural”. We investigate whether the ordering of CCs and EEs can be learned empirically and whether computational models (classifiers and sequence-labeling models) learn unnatural hierarchies similar to those posited by Mortensen (2006). We find that decision trees and SVMs learn to predict the order of CCs/EEs on the basis of phonology, beating strong baselines for all three languages, with DTs learning hierarchies strikingly similar to those proposed by Mortensen. However, we also find that a neural sequence labeling model is able to learn the ordering of elaborate expressions in Hmong very effectively without using any phonological information. We argue that EE ordering can be learned through two independent routes: phonology and lexical distribution, presenting a more nuanced picture than previous work.</abstract>
       <url hash="c8b09308">2022.naacl-main.268</url>
@@ -3786,7 +3786,7 @@
       <author><first>Chia-Chien</first><last>Hung</last></author>
       <author><first>Anne</first><last>Lauscher</last></author>
       <author><first>Ivan</first><last>Vulić</last></author>
-      <author><first>Simone</first><last>Ponzetto</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone</first><last>Ponzetto</last></author>
       <author><first>Goran</first><last>Glavaš</last></author>
       <pages>3687-3703</pages>
       <abstract>Research on (multi-domain) task-oriented dialog (TOD) has predominantly focused on the English language, primarily due to the shortage of robust TOD datasets in other languages, preventing the systematic investigation of cross-lingual transfer for this crucial NLP application area. In this work, we introduce Multi2WOZ, a new multilingual multi-domain TOD dataset, derived from the well-established English dataset MultiWOZ, that spans four typologically diverse languages: Chinese, German, Arabic, and Russian. In contrast to concurrent efforts, Multi2WOZ contains gold-standard dialogs in target languages that are directly comparable with development and test portions of the English dataset, enabling reliable and comparative estimates of cross-lingual transfer performance for TOD. We then introduce a new framework for multilingual conversational specialization of pretrained language models (PrLMs) that aims to facilitate cross-lingual transfer for arbitrary downstream TOD tasks. Using such conversational PrLMs specialized for concrete target languages, we systematically benchmark a number of zero-shot and few-shot cross-lingual transfer approaches on two standard TOD tasks: Dialog State Tracking and Response Retrieval. Our experiments show that, in most setups, the best performance entails the combination of (i) conversational specialization in the target language and (ii) few-shot transfer for the concrete TOD task. Most importantly, we show that our conversational specialization in the target language allows for an exceptionally sample-efficient few-shot transfer for downstream TOD tasks.</abstract>
@@ -3834,7 +3834,7 @@
     </paper>
     <paper id="274">
       <title>Adaptable Adapters</title>
-      <author><first>Nafise</first><last>Moosavi</last></author>
+      <author id="nafise-sadat-moosavi"><first>Nafise</first><last>Moosavi</last></author>
       <author><first>Quentin</first><last>Delfosse</last></author>
       <author><first>Kristian</first><last>Kersting</last></author>
       <author><first>Iryna</first><last>Gurevych</last></author>
@@ -3908,8 +3908,8 @@
       <author><first>Tingting</first><last>Ma</last></author>
       <author><first>Qianhui</first><last>Wu</last></author>
       <author><first>Zhiwei</first><last>Yu</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <pages>3806-3818</pages>
       <abstract>Recent studies on few-shot intent detection have attempted to formulate the task as a meta-learning problem, where a meta-learning model is trained with a certain capability to quickly adapt to newly specified few-shot tasks with potentially unseen intent categories. Prototypical networks have been commonly used in this setting, with the hope that good prototypical representations could be learned to capture the semantic similarity between the query and a few labeled instances. This intuition naturally leaves a question of whether or not a good sentence representation scheme could suffice for the task without further domain-specific adaptation. In this paper, we conduct empirical studies on a number of general-purpose sentence embedding schemes, showing that good sentence embeddings without any fine-tuning on intent detection data could produce a non-trivially strong performance. Inspired by the results from our qualitative analysis, we propose a frustratingly easy modification, which leads to consistent improvements over all sentence encoding schemes, including those from the state-of-the-art prototypical network variants with task-specific fine-tuning.</abstract>
       <url hash="f4cab2c4">2022.naacl-main.279</url>
@@ -4107,7 +4107,7 @@
     </paper>
     <paper id="292">
       <title>Towards Debiasing Translation Artifacts</title>
-      <author><first>Koel</first><last>Dutta Chowdhury</last></author>
+      <author id="koel-dutta-chowdhury"><first>Koel</first><last>Dutta Chowdhury</last></author>
       <author><first>Rricha</first><last>Jalota</last></author>
       <author><first>Cristina</first><last>España-Bonet</last></author>
       <author><first>Josef</first><last>Genabith</last></author>
@@ -4146,7 +4146,7 @@
       <author><first>Chaitanya</first><last>Agarwal</last></author>
       <author><first>Vivek</first><last>Gupta</last></author>
       <author><first>Anoop</first><last>Kunchukuttan</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>4018-4037</pages>
       <abstract>Existing research on Tabular Natural Language Inference (TNLI) exclusively examines the task in a monolingual setting where the tabular premise and hypothesis are in the same language. However, due to the uneven distribution of text resources on the web across languages, it is common to have the tabular premise in a high resource language and the hypothesis in a low resource language. As a result, we present the challenging task of bilingual Tabular Natural Language Inference (bTNLI), in which the tabular premise and a hypothesis over it are in two separate languages. We construct EI-InfoTabS: an English-Indic bTNLI dataset by translating the textual hypotheses of the English TNLI dataset InfoTabS into eleven major Indian languages. We thoroughly investigate how pre-trained multilingual models learn and perform on EI-InfoTabS. Our study shows that the performance on bTNLI can be close to its monolingual counterpart, with translate-train, translate-test and unified-train being strongly competitive baselines.</abstract>
       <url hash="fcec7995">2022.naacl-main.295</url>
@@ -4157,7 +4157,7 @@
     <paper id="296">
       <title>Generative Biomedical Entity Linking via Knowledge Base-Guided Pre-training and Synonyms-Aware Fine-tuning</title>
       <author><first>Hongyi</first><last>Yuan</last></author>
-      <author id="zheng-yuan"><first>Zheng</first><last>Yuan</last></author>
+      <author><first>Zheng</first><last>Yuan</last></author>
       <author><first>Sheng</first><last>Yu</last></author>
       <pages>4038-4048</pages>
       <abstract>Entities lie in the heart of biomedical natural language understanding, and the biomedical entity linking (EL) task remains challenging due to the fine-grained and diversiform concept names. Generative methods achieve remarkable performances in general domain EL with less memory usage while requiring expensive pre-training. Previous biomedical EL methods leverage synonyms from knowledge bases (KB) which is not trivial to inject into a generative method. In this work, we use a generative approach to model biomedical EL and propose to inject synonyms knowledge in it. We propose KB-guided pre-training by constructing synthetic samples with synonyms and definitions from KB and require the model to recover concept names. We also propose synonyms-aware fine-tuning to select concept names for training, and propose decoder prompt and multi-synonyms constrained prefix tree for inference. Our method achieves state-of-the-art results on several biomedical EL tasks without candidate selection which displays the effectiveness of proposed pre-training and fine-tuning strategies. The source code is available at <url>https://github.com/Yuanhy1997/GenBioEL</url>.</abstract>
@@ -4188,7 +4188,7 @@
       <author><first>Ramy</first><last>Eskander</last></author>
       <author><first>Cass</first><last>Lowry</last></author>
       <author><first>Sujay</first><last>Khandagale</last></author>
-      <author><first>Judith</first><last>Klavans</last></author>
+      <author id="judith-l-klavans"><first>Judith</first><last>Klavans</last></author>
       <author><first>Maria</first><last>Polinsky</last></author>
       <author><first>Smaranda</first><last>Muresan</last></author>
       <pages>4061-4072</pages>
@@ -4202,8 +4202,8 @@
       <title>Optimising Equal Opportunity Fairness in Model Training</title>
       <author><first>Aili</first><last>Shen</last></author>
       <author><first>Xudong</first><last>Han</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Lea</first><last>Frermann</last></author>
       <pages>4073-4084</pages>
       <abstract>Real-world datasets often encode stereotypes and societal biases. Such biases can be implicitly captured by trained models, leading to biased predictions and exacerbating existing societal preconceptions. Existing debiasing methods, such as adversarial training and removing protected information from representations, have been shown to reduce bias. However, a disconnect between fairness criteria and training objectives makes it difficult to reason theoretically about the effectiveness of different techniques. In this work, we propose two novel training objectives which directly optimise for the widely-used criterion of <i>equal opportunity</i>, and show that they are effective in reducing bias while maintaining high performance over two classification tasks.</abstract>
@@ -4215,7 +4215,7 @@
     <paper id="300">
       <title>Leaner and Faster: Two-Stage Model Compression for Lightweight Text-Image Retrieval</title>
       <author><first>Siyu</first><last>Ren</last></author>
-      <author><first>Kenny</first><last>Zhu</last></author>
+      <author id="kenny-zhu"><first>Kenny</first><last>Zhu</last></author>
       <pages>4085-4090</pages>
       <abstract>Current text-image approaches (e.g., CLIP) typically adopt dual-encoder architecture using pre-trained vision-language representation. However, these models still pose non-trivial memory requirements and substantial incremental indexing time, which makes them less practical on mobile devices. In this paper, we present an effective two-stage framework to compress large pre-trained dual-encoder for lightweight text-image retrieval. The resulting model is smaller (39% of the original), faster (1.6x/2.9x for processing image/text respectively), yet performs on par with or better than the original full model on Flickr30K and MSCOCO benchmarks. We also open-source an accompanying realistic mobile image search application.</abstract>
       <url hash="730804a4">2022.naacl-main.300</url>
@@ -4230,7 +4230,7 @@
       <author><first>Dongyuan</first><last>Li</last></author>
       <author><first>Hidetaka</first><last>Kamigaito</last></author>
       <author><first>Kotaro</first><last>Funakoshi</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>4091-4104</pages>
       <abstract>Previous studies on the timeline summarization (TLS) task ignored the information interaction between sentences and dates, and adopted pre-defined unlearnable representations for them. They also considered date selection and event detection as two independent tasks, which makes it impossible to integrate their advantages and obtain a globally optimal summary. In this paper, we present a <i>joint learning-based heterogeneous graph attention network for TLS</i> (HeterTls), in which date selection and event detection are combined into a unified framework to improve the extraction accuracy and remove redundant sentences simultaneously. Our heterogeneous graph involves multiple types of nodes, the representations of which are iteratively learned across the heterogeneous graph attention layer. We evaluated our model on four datasets, and found that it significantly outperformed the current state-of-the-art baselines with regard to ROUGE scores and date selection metrics.</abstract>
       <url hash="c6a7b2dd">2022.naacl-main.301</url>
@@ -4340,9 +4340,9 @@
     <paper id="309">
       <title>Improving negation detection with negation-focused pre-training</title>
       <author><first>Thinh</first><last>Truong</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
-      <author><first>Karin</first><last>Verspoor</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last></author>
       <pages>4188-4193</pages>
       <abstract>Negation is a common linguistic feature that is crucial in many language understanding tasks, yet it remains a hard problem due to diversity in its expression in different types of text. Recent works show that state-of-the-art NLP models underperform on samples containing negation in various tasks, and that negation detection models do not transfer well across domains. We propose a new negation-focused pre-training strategy, involving targeted data augmentation and negation masking, to better incorporate negation information into language models. Extensive experiments on common benchmarks show that our proposed approach improves negation detection performance and generalizability over the strong baseline NegBERT (Khandelwal and Sawant, 2020).</abstract>
       <url hash="59410d4d">2022.naacl-main.309</url>
@@ -4368,13 +4368,13 @@
       <author><first>Yung-Sung</first><last>Chuang</last></author>
       <author><first>Rumen</first><last>Dangovski</last></author>
       <author><first>Hongyin</first><last>Luo</last></author>
-      <author id="yang-zhang"><first>Yang</first><last>Zhang</last></author>
+      <author><first>Yang</first><last>Zhang</last></author>
       <author><first>Shiyu</first><last>Chang</last></author>
       <author><first>Marin</first><last>Soljacic</last></author>
       <author><first>Shang-Wen</first><last>Li</last></author>
       <author><first>Scott</first><last>Yih</last></author>
       <author><first>Yoon</first><last>Kim</last></author>
-      <author><first>James</first><last>Glass</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
       <pages>4207-4218</pages>
       <abstract>We propose DiffCSE, an unsupervised contrastive learning framework for learning sentence embeddings. DiffCSE learns sentence embeddings that are sensitive to the difference between the original sentence and an edited sentence, where the edited sentence is obtained by stochastically masking out the original sentence and then sampling from a masked language model. We show that DiffSCE is an instance of equivariant contrastive learning, which generalizes contrastive learning and learns representations that are insensitive to certain types of augmentations and sensitive to other “harmful” types of augmentations. Our experiments show that DiffCSE achieves state-of-the-art results among unsupervised sentence representation learning methods, outperforming unsupervised SimCSE by 2.3 absolute points on semantic textual similarity tasks.</abstract>
       <url hash="6b2e04d4">2022.naacl-main.311</url>
@@ -4442,7 +4442,7 @@
       <author><first>Fandong</first><last>Meng</last></author>
       <author><first>Xue</first><last>Zhang</last></author>
       <author><first>Yufeng</first><last>Chen</last></author>
-      <author><first>Jinan</first><last>Xu</last></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last></author>
       <author><first>Jie</first><last>Zhou</last></author>
       <pages>4256-4266</pages>
       <abstract>Generating adversarial examples for Neural Machine Translation (NMT) with single Round-Trip Translation (RTT) has achieved promising results by releasing the meaning-preserving restriction. However, a potential pitfall for this approach is that we cannot decide whether the generated examples are adversarial to the target NMT model or the auxiliary backward one, as the reconstruction error through the RTT can be related to either. To remedy this problem, we propose a new definition for NMT adversarial examples based on the Doubly Round-Trip Translation (DRTT). Specifically, apart from the source-target-source RTT, we also consider the target-source-target one, which is utilized to pick out the authentic adversarial examples for the target NMT model. Additionally, to enhance the robustness of the NMT model, we introduce the masked language models to construct bilingual adversarial pairs based on DRTT, which are used to train the NMT model directly. Extensive experiments on both the clean and noisy test sets (including the artificial and natural noise) show that our approach substantially improves the robustness of NMT models.</abstract>
@@ -4475,7 +4475,7 @@
       <author><first>Hanson</first><last>Lu</last></author>
       <author><first>Thomas</first><last>Icard</last></author>
       <author><first>Christopher</first><last>Potts</last></author>
-      <author><first>Noah</first><last>Goodman</last></author>
+      <author id="noah-goodman"><first>Noah</first><last>Goodman</last></author>
       <pages>4288-4295</pages>
       <abstract>Distillation efforts have led to language models that are more compact and efficient without serious drops in performance. The standard approach to distillation trains a student model against two objectives: a task-specific objective (e.g., language modeling) and an imitation objective that encourages the hidden states of the student model to be similar to those of the larger teacher model. In this paper, we show that it is beneficial to augment distillation with a third objective that encourages the student to imitate the <i>causal</i> dynamics of the teacher through a distillation interchange intervention training objective (DIITO). DIITO pushes the student model to become a <i>causal abstraction</i> of the teacher model – a faithful model with simpler causal structure. DIITO is fully differentiable, easily implemented, and combines flexibly with other objectives. Compared against standard distillation with the same setting, DIITO results in lower perplexity on the WikiText-103M corpus (masked language modeling) and marked improvements on the GLUE benchmark (natural language understanding), SQuAD (question answering), and CoNLL-2003 (named entity recognition).</abstract>
       <url hash="693b594a">2022.naacl-main.318</url>
@@ -4603,7 +4603,7 @@
       <author><first>Wenting</first><last>Zhao</last></author>
       <author><first>Konstantine</first><last>Arkoudas</last></author>
       <author><first>Weiqi</first><last>Sun</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>4418-4427</pages>
       <abstract>Task-oriented parsing (TOP) aims to convert natural language into machine-readable representations of specific tasks, such as setting an alarm. A popular approach to TOP is to apply seq2seq models to generate linearized parse trees. A more recent line of work argues that pretrained seq2seq2 models are better at generating outputs that are themselves natural language, so they replace linearized parse trees with canonical natural-language paraphrases that can then be easily translated into parse trees, resulting in so-called naturalized parsers. In this work we continue to explore naturalized semantic parsing by presenting a general reduction of TOP to abstractive question answering that overcomes some limitations of canonical paraphrasing. Experimental results show that our QA-based technique outperforms state-of-the-art methods in full-data settings while achieving dramatic improvements in few-shot settings.</abstract>
       <url hash="7504204f">2022.naacl-main.328</url>
@@ -4615,10 +4615,10 @@
       <title>Learning Cross-Lingual <fixed-case>IR</fixed-case> from an <fixed-case>E</fixed-case>nglish Retriever</title>
       <author><first>Yulong</first><last>Li</last></author>
       <author><first>Martin</first><last>Franz</last></author>
-      <author><first>Md Arafat</first><last>Sultan</last></author>
+      <author id="md-arafat-sultan"><first>Md Arafat</first><last>Sultan</last></author>
       <author><first>Bhavani</first><last>Iyer</last></author>
       <author><first>Young-Suk</first><last>Lee</last></author>
-      <author><first>Avirup</first><last>Sil</last></author>
+      <author id="avirup-sil"><first>Avirup</first><last>Sil</last></author>
       <pages>4428-4436</pages>
       <abstract>We present DR.DECR (Dense Retrieval with Distillation-Enhanced Cross-Lingual Representation), a new cross-lingual information retrieval (CLIR) system trained using multi-stage knowledge distillation (KD). The teacher of DR.DECR relies on a highly effective but computationally expensive two-stage inference process consisting of query translation and monolingual IR, while the student, DR.DECR, executes a single CLIR step. We teach DR.DECR powerful multilingual representations as well as CLIR by optimizing two corresponding KD objectives. Learning useful representations of non-English text from an English-only retriever is accomplished through a cross-lingual token alignment algorithm that relies on the representation capabilities of the underlying multilingual encoders. In both in-domain and zero-shot out-of-domain evaluation, DR.DECR demonstrates far superior accuracy over direct fine-tuning with labeled CLIR data. It is also the best single-model retriever on the XOR-TyDi benchmark at the time of this writing.</abstract>
       <url hash="8286a81f">2022.naacl-main.329</url>
@@ -4721,7 +4721,7 @@
       <title>Does Pre-training Induce Systematic Inference? How Masked Language Models Acquire Commonsense Knowledge</title>
       <author><first>Ian</first><last>Porada</last></author>
       <author><first>Alessandro</first><last>Sordoni</last></author>
-      <author><first>Jackie</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie</first><last>Cheung</last></author>
       <pages>4550-4557</pages>
       <abstract>Transformer models pre-trained with a masked-language-modeling objective (e.g., BERT) encode commonsense knowledge as evidenced by behavioral probes; however, the extent to which this knowledge is acquired by systematic inference over the semantics of the pre-training corpora is an open question. To answer this question, we selectively inject verbalized knowledge into the pre-training minibatches of BERT and evaluate how well the model generalizes to supported inferences after pre-training on the injected knowledge. We find generalization does not improve over the course of pre-training BERT from scratch, suggesting that commonsense knowledge is acquired from surface-level, co-occurrence patterns rather than induced, systematic reasoning.</abstract>
       <url hash="13095938">2022.naacl-main.337</url>
@@ -4732,9 +4732,9 @@
     </paper>
     <paper id="338">
       <title>Using Paraphrases to Study Properties of Contextual Embeddings</title>
-      <author><first>Laura</first><last>Burdick</last></author>
+      <author id="laura-burdick"><first>Laura</first><last>Burdick</last></author>
       <author><first>Jonathan K.</first><last>Kummerfeld</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>4558-4568</pages>
       <abstract>We use paraphrases as a unique source of data to analyze contextualized embeddings, with a particular focus on BERT. Because paraphrases naturally encode consistent word and phrase semantics, they provide a unique lens for investigating properties of embeddings. Using the Paraphrase Database’s alignments, we study words within paraphrases as well as phrase representations. We find that contextual embeddings effectively handle polysemous words, but give synonyms surprisingly different representations in many cases. We confirm previous findings that BERT is sensitive to word order, but find slightly different patterns than prior work in terms of the level of contextualization across BERT’s layers.</abstract>
       <url hash="7d067c53">2022.naacl-main.338</url>
@@ -4759,7 +4759,7 @@
       <author><first>Danilo</first><last>Croce</last></author>
       <author><first>Simone</first><last>Filice</last></author>
       <author><first>Giuseppe</first><last>Castellucci</last></author>
-      <author><first>Roberto</first><last>Basili</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
       <pages>4587-4601</pages>
       <abstract>Even if recent Transformer-based architectures, such as BERT, achieved impressive results in semantic processing tasks, their fine-tuning stage still requires large scale training resources. Usually, Data Augmentation (DA) techniques can help to deal with low resource settings. In Text Classification tasks, the objective of DA is the generation of well-formed sentences that i) represent the desired task category and ii) are novel with respect to existing sentences. In this paper, we propose a neural approach to automatically learn to generate new examples using a pre-trained sequence-to-sequence model. We first learn a task-oriented similarity function that we use to pair similar examples. Then, we use these example pairs to train a model to generate examples. Experiments in low resource settings show that augmenting the training material with the proposed strategy systematically improves the results on text classification and natural language inference tasks by up to 10% accuracy, outperforming existing DA approaches.</abstract>
       <url hash="064b6820">2022.naacl-main.340</url>
@@ -4843,10 +4843,10 @@
     </paper>
     <paper id="346">
       <title>Quantifying Adaptability in Pre-trained Language Models with 500 Tasks</title>
-      <author><first>Belinda</first><last>Li</last></author>
+      <author id="belinda-z-li"><first>Belinda</first><last>Li</last></author>
       <author><first>Jane</first><last>Yu</last></author>
       <author><first>Madian</first><last>Khabsa</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <author><first>Alon</first><last>Halevy</last></author>
       <author><first>Jacob</first><last>Andreas</last></author>
       <pages>4696-4715</pages>
@@ -4934,7 +4934,7 @@
       <author id="fei-liu-utdallas"><first>Fei</first><last>Liu</last></author>
       <author><first>Mo</first><last>Yu</last></author>
       <author><first>Hong</first><last>Yu</last></author>
-      <author><first>Sachindra</first><last>Joshi</last></author>
+      <author id="sachindra-joshi"><first>Sachindra</first><last>Joshi</last></author>
       <pages>4781-4796</pages>
       <abstract>We propose novel AI-empowered chat bots for learning as conversation where a user does not read a passage but gains information and knowledge through conversation with a teacher bot. Our information acquisition-oriented dialogue system employs a novel adaptation of reinforced self-play so that the system can be transferred to various domains without in-domain dialogue data, and can carry out conversations both informative and attentive to users.</abstract>
       <url hash="45dc1055">2022.naacl-main.352</url>
@@ -4945,7 +4945,7 @@
     <paper id="353">
       <title>Dynamic Programming in Rank Space: Scaling Structured Inference with Low-Rank <fixed-case>HMM</fixed-case>s and <fixed-case>PCFG</fixed-case>s</title>
       <author><first>Songlin</first><last>Yang</last></author>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last></author>
+      <author><first>Wei</first><last>Liu</last></author>
       <author><first>Kewei</first><last>Tu</last></author>
       <pages>4797-4809</pages>
       <abstract>Hidden Markov Models (HMMs) and Probabilistic Context-Free Grammars (PCFGs) are widely used structured models, both of which can be represented as factor graph grammars (FGGs), a powerful formalism capable of describing a wide range of models. Recent research found it beneficial to use large state spaces for HMMs and PCFGs. However, inference with large state spaces is computationally demanding, especially for PCFGs. To tackle this challenge, we leverage tensor rank decomposition (aka. CPD) to decrease inference computational complexities for a subset of FGGs subsuming HMMs and PCFGs. We apply CPD on the factors of an FGG and then construct a new FGG defined in the rank space. Inference with the new FGG produces the same result but has a lower time complexity when the rank size is smaller than the state size. We conduct experiments on HMM language modeling and unsupervised PCFG parsing, showing better performance than previous work. Our code is publicly available at <url>https://github.com/VPeterV/RankSpace-Models</url>.</abstract>
@@ -4983,7 +4983,7 @@
       <title>Mining Clues from Incomplete Utterance: A Query-enhanced Network for Incomplete Utterance Rewriting</title>
       <author><first>Shuzheng</first><last>Si</last></author>
       <author><first>Shuang</first><last>Zeng</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <pages>4839-4847</pages>
       <abstract>Incomplete utterance rewriting has recently raised wide attention. However, previous works do not consider the semantic structural information between incomplete utterance and rewritten utterance or model the semantic structure implicitly and insufficiently. To address this problem, we propose a QUEry-Enhanced Network(QUEEN) to solve this problem. Firstly, our proposed query template explicitly brings guided semantic structural knowledge between the incomplete utterance and the rewritten utterance making model perceive where to refer back to or recover omitted tokens. Then, we adopt a fast and effective edit operation scoring network to model the relation between two tokens. Benefiting from extra information and the well-designed network, QUEEN achieves state-of-the-art performance on several public datasets.</abstract>
       <url hash="75037155">2022.naacl-main.356</url>
@@ -5024,7 +5024,7 @@
       <title>Generalized Quantifiers as a Source of Error in Multilingual <fixed-case>NLU</fixed-case> Benchmarks</title>
       <author><first>Ruixiang</first><last>Cui</last></author>
       <author><first>Daniel</first><last>Hershcovich</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>4875-4893</pages>
       <abstract>Logical approaches to representing language have developed and evaluated computational models of quantifier words since the 19th century, but today’s NLU models still struggle to capture their semantics. We rely on Generalized Quantifier Theory for language-independent representations of the semantics of quantifier words, to quantify their contribution to the errors of NLU models. We find that quantifiers are pervasive in NLU benchmarks, and their occurrence at test time is associated with performance drops. Multilingual models also exhibit unsatisfying quantifier reasoning abilities, but not necessarily worse for non-English languages. To facilitate directly-targeted probing, we present an adversarial generalized quantifier NLI task (GQNLI) and show that pre-trained language models have a clear lack of robustness in generalized quantifier reasoning.</abstract>
       <url hash="981b5c06">2022.naacl-main.359</url>
@@ -5083,7 +5083,7 @@
     <paper id="364">
       <title><fixed-case>DUCK</fixed-case>: Rumour Detection on Social Media by Modelling User and Comment Propagation Networks</title>
       <author><first>Lin</first><last>Tian</last></author>
-      <author><first>Xiuzhen</first><last>Zhang</last></author>
+      <author id="xiuzhen-jenny-zhang"><first>Xiuzhen</first><last>Zhang</last></author>
       <author><first>Jey Han</first><last>Lau</last></author>
       <pages>4939-4949</pages>
       <abstract>Social media rumours, a form of misinformation, can mislead the public and cause significant economic and social disruption. Motivated by the observation that the user network — which captures <tex-math>\textit{who}</tex-math> engage with a story — and the comment network — which captures <tex-math>\textit{how}</tex-math> they react to it — provide complementary signals for rumour detection, in this paper, we propose DUCK (rumour <tex-math>\underline{d}</tex-math>etection with <tex-math>\underline{u}</tex-math>ser and <tex-math>\underline{c}</tex-math>omment networ<tex-math>\underline{k}</tex-math>s) for rumour detection on social media. We study how to leverage transformers and graph attention networks to jointly model the contents and structure of social media conversations, as well as the network of users who engaged in these conversations. Over four widely used benchmark rumour datasets in English and Chinese, we show that DUCK produces superior performance for detecting rumours, creating a new state-of-the-art. Source code for DUCK is available at: <url>https://github.com/ltian678/DUCK-code</url>.</abstract>
@@ -5109,7 +5109,7 @@
       <author><first>Mike</first><last>Zhang</last></author>
       <author><first>Kristian</first><last>Jensen</last></author>
       <author><first>Sif</first><last>Sonniks</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>4962-4984</pages>
       <abstract>Skill Extraction (SE) is an important and widely-studied task useful to gain insights into labor market dynamics. However, there is a lacuna of datasets and annotation guidelines; available datasets are few and contain crowd-sourced labels on the span-level or labels from a predefined skill inventory. To address this gap, we introduce SKILLSPAN, a novel SE dataset consisting of 14.5K sentences and over 12.5K annotated spans. We release its respective guidelines created over three different sources annotated for hard and soft skills by domain experts. We introduce a BERT baseline (Devlin et al., 2019). To improve upon this baseline, we experiment with language models that are optimized for long spans (Joshi et al., 2020; Beltagy et al., 2020), continuous pre-training on the job posting domain (Han and Eisenstein, 2019; Gururangan et al., 2020), and multi-task learning (Caruana, 1997). Our results show that the domain-adapted models significantly outperform their non-adapted counterparts, and single-task outperforms multi-task learning.</abstract>
       <url hash="e80151ba">2022.naacl-main.366</url>
@@ -5135,7 +5135,7 @@
       <author><first>Ce</first><last>Zheng</last></author>
       <author><first>Xudong</first><last>Chen</last></author>
       <author><first>Runxin</first><last>Xu</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <pages>4998-5011</pages>
       <abstract>Frame semantic parsing is a fundamental NLP task, which consists of three subtasks: frame identification, argument identification and role classification. Most previous studies tend to neglect relations between different subtasks and arguments and pay little attention to ontological frame knowledge defined in FrameNet. In this paper, we propose a Knowledge-guided Incremental semantic parser with Double-graph (KID). We first introduce Frame Knowledge Graph (FKG), a heterogeneous graph containing both frames and FEs (Frame Elements) built on the frame knowledge so that we can derive knowledge-enhanced representations for frames and FEs. Besides, we propose Frame Semantic Graph (FSG) to represent frame semantic structures extracted from the text with graph structures. In this way, we can transform frame semantic parsing into an incremental graph construction problem to strengthen interactions between subtasks and relations between arguments. Our experiments show that KID outperforms the previous state-of-the-art method by up to 1.7 F1-score on two FrameNet datasets. Our code is availavle at <url>https://github.com/PKUnlp-icler/KID</url>.</abstract>
       <url hash="7cb68d7d">2022.naacl-main.368</url>
@@ -5150,7 +5150,7 @@
       <author><first>Tianyu</first><last>Liu</last></author>
       <author><first>Qingyu</first><last>Zhou</last></author>
       <author><first>Yunbo</first><last>Cao</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <author><first>Zhifang</first><last>Sui</last></author>
       <pages>5012-5024</pages>
       <abstract>Few-Shot Sequence Labeling (FSSL) is a canonical paradigm for the tagging models, e.g., named entity recognition and slot filling, to generalize on an emerging, resource-scarce domain. Recently, the metric-based meta-learning framework has been recognized as a promising approach for FSSL. However, most prior works assign a label to each token based on the token-level similarities, which ignores the integrality of named entities or slots. To this end, in this paper, we propose ESD, an Enhanced Span-based Decomposition method for FSSL. ESD formulates FSSL as a span-level matching problem between test query and supporting instances. Specifically, ESD decomposes the span matching problem into a series of span-level procedures, mainly including enhanced span representation, class prototype aggregation and span conflicts resolution. Extensive experiments show that ESD achieves the new state-of-the-art results on two popular FSSL benchmarks, FewNERD and SNIPS, and is proven to be more robust in the noisy and nested tagging scenarios.</abstract>
@@ -5166,7 +5166,7 @@
       <author><first>Peiyi</first><last>Wang</last></author>
       <author><first>Tianyu</first><last>Liu</last></author>
       <author><first>Shuang</first><last>Zeng</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <author><first>Zhifang</first><last>Sui</last></author>
       <pages>5025-5036</pages>
       <abstract>Most previous studies aim at extracting events from a single sentence, while document-level event extraction still remains under-explored. In this paper, we focus on extracting event arguments from an entire document, which mainly faces two critical problems: a) the long-distance dependency between trigger and arguments over sentences; b) the distracting context towards an event in the document. To address these issues, we propose a <b>T</b>wo-<b>S</b>tream <b>A</b>bstract meaning <b>R</b>epresentation enhanced extraction model (TSAR). TSAR encodes the document from different perspectives by a two-stream encoding module, to utilize local and global information and lower the impact of distracting context. Besides, TSAR introduces an AMR-guided interaction module to capture both intra-sentential and inter-sentential features, based on the locally and globally constructed AMR semantic graphs. An auxiliary boundary loss is introduced to enhance the boundary information for text spans explicitly. Extensive experiments illustrate that TSAR outperforms previous state-of-the-art by a large margin, with 2.54 F1 and 5.13 F1 performance gain on the public RAMS and WikiEvents datasets respectively, showing the superiority in the cross-sentence arguments extraction. We release our code in <url>https://github.com/PKUnlp-icler/TSAR</url>.</abstract>
@@ -5318,7 +5318,7 @@
       <title>Sketching as a Tool for Understanding and Accelerating Self-attention for Long Sequences</title>
       <author><first>Yifan</first><last>Chen</last></author>
       <author><first>Qi</first><last>Zeng</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></author>
       <author><first>Di</first><last>Jin</last></author>
       <author><first>Heng</first><last>Ji</last></author>
       <author><first>Yun</first><last>Yang</last></author>
@@ -5335,7 +5335,7 @@
       <author><first>Hongyuan</first><last>Lu</last></author>
       <author><first>Wai</first><last>Lam</last></author>
       <author><first>Hong</first><last>Cheng</last></author>
-      <author><first>Helen</first><last>Meng</last></author>
+      <author id="helen-meng"><first>Helen</first><last>Meng</last></author>
       <pages>5200-5212</pages>
       <abstract>Incorporating personas information allows diverse and engaging responses in dialogue response generation. Unfortunately, prior works have primarily focused on self personas and have overlooked the value of partner personas. Moreover, in practical applications, the availability of the gold partner personas is often not the case. This paper attempts to tackle these issues by offering a novel framework that leverages automatic partner personas generation to enhance the succeeding dialogue response generation. Our framework employs reinforcement learning with a dedicatedly designed critic network for reward judgement. Experimental results from automatic and human evaluations indicate that our framework is capable of generating relevant, interesting, coherent and informative partner personas, even compared to the ground truth partner personas. This enhances the succeeding dialogue response generation, which surpasses our competitive baselines that condition on the ground truth partner personas.</abstract>
       <url hash="9c133761">2022.naacl-main.382</url>
@@ -5385,7 +5385,7 @@
     <paper id="386">
       <title><fixed-case>S</fixed-case>yn2<fixed-case>V</fixed-case>ec: Synset Colexification Graphs for Lexical Semantic Similarity</title>
       <author><first>John</first><last>Harvill</last></author>
-      <author><first>Roxana</first><last>Girju</last></author>
+      <author id="roxana-girju"><first>Roxana</first><last>Girju</last></author>
       <author><first>Mark</first><last>Hasegawa-Johnson</last></author>
       <pages>5259-5270</pages>
       <abstract>In this paper we focus on patterns of colexification (co-expressions of form-meaning mapping in the lexicon) as an aspect of lexical-semantic organization, and use them to build large scale synset graphs across BabelNet’s typologically diverse set of 499 world languages. We introduce and compare several approaches: monolingual and cross-lingual colexification graphs, popular distributional models, and fusion approaches. The models are evaluated against human judgments on a semantic similarity task for nine languages. Our strong empirical findings also point to the importance of universality of our graph synset embedding representations with no need for any language-specific adaptation when evaluated on the lexical similarity task. The insights of our exploratory investigation of large-scale colexification graphs could inspire significant advances in NLP across languages, especially for tasks involving languages which lack dedicated lexical resources, and can benefit from language transfer from large shared cross-lingual semantic spaces.</abstract>
@@ -5400,7 +5400,7 @@
       <author><first>Nouha</first><last>Dziri</last></author>
       <author><first>Sivan</first><last>Milton</last></author>
       <author><first>Mo</first><last>Yu</last></author>
-      <author><first>Osmar</first><last>Zaiane</last></author>
+      <author id="osmar-r-zaiane"><first>Osmar</first><last>Zaiane</last></author>
       <author><first>Siva</first><last>Reddy</last></author>
       <pages>5271-5285</pages>
       <abstract>Knowledge-grounded conversational models are known to suffer from producing factually invalid statements, a phenomenon commonly called hallucination. In this work, we investigate the underlying causes of this phenomenon: is hallucination due to the training data, or to the models? We conduct a comprehensive human study on both existing knowledge-grounded conversational benchmarks and several state-of-the-art models. Our study reveals that the standard benchmarks consist of &gt; 60% hallucinated responses, leading to models that not only hallucinate but even amplify hallucinations. Our findings raise important questions on the quality of existing datasets and models trained using them. We make our annotations publicly available for future research.</abstract>
@@ -5463,7 +5463,7 @@
       <author><first>Johnny</first><last>Ma</last></author>
       <author><first>Jana</first><last>Thompson</last></author>
       <author><first>He</first><last>He</last></author>
-      <author><first>Samuel</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel</first><last>Bowman</last></author>
       <pages>5336-5358</pages>
       <abstract>To enable building and testing models on long-document comprehension, we introduce QuALITY, a multiple-choice QA dataset with context passages in English that have an average length of about 5,000 tokens, much longer than typical current models can process. Unlike in prior work with passages, our questions are written and validated by contributors who have read the entire passage, rather than relying on summaries or excerpts. In addition, only half of the questions are answerable by annotators working under tight time constraints, indicating that skimming and simple search are not enough to consistently perform well. Our baseline models perform poorly on this task (55.4%) and significantly lag behind human performance (93.5%).</abstract>
       <url hash="12ffbf57">2022.naacl-main.391</url>
@@ -5474,7 +5474,7 @@
     <paper id="392">
       <title><fixed-case>ExSum</fixed-case>: <fixed-case>F</fixed-case>rom Local Explanations to Model Understanding</title>
       <author><first>Yilun</first><last>Zhou</last></author>
-      <author><first>Marco Tulio</first><last>Ribeiro</last></author>
+      <author id="marco-tulio-ribeiro"><first>Marco Tulio</first><last>Ribeiro</last></author>
       <author><first>Julie</first><last>Shah</last></author>
       <pages>5359-5378</pages>
       <abstract>Interpretability methods are developed to understand the working mechanisms of black-box models, which is crucial to their responsible deployment. Fulfilling this goal requires both that the explanations generated by these methods are correct and that people can easily and reliably understand them. While the former has been addressed in prior work, the latter is often overlooked, resulting in informal model understanding derived from a handful of local explanations. In this paper, we introduce explanation summary (ExSum), a mathematical framework for quantifying model understanding, and propose metrics for its quality assessment. On two domains, ExSum highlights various limitations in the current practice, helps develop accurate model understanding, and reveals easily overlooked properties of the model. We also connect understandability to other properties of explanations such as human alignment, robustness, and counterfactual similarity and plausibility.</abstract>
@@ -5486,11 +5486,11 @@
     <paper id="393">
       <title>Maximum <fixed-case>B</fixed-case>ayes <fixed-case>S</fixed-case>match Ensemble Distillation for <fixed-case>AMR</fixed-case> Parsing</title>
       <author><first>Young-Suk</first><last>Lee</last></author>
-      <author><first>Ramón</first><last>Astudillo</last></author>
+      <author id="ramon-fernandez-astudillo"><first>Ramón</first><last>Astudillo</last></author>
       <author><first>Hoang</first><last>Thanh Lam</last></author>
       <author><first>Tahira</first><last>Naseem</last></author>
-      <author><first>Radu</first><last>Florian</last></author>
-      <author><first>Salim</first><last>Roukos</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
       <pages>5379-5392</pages>
       <abstract>AMR parsing has experienced an unprecendented increase in performance in the last three years, due to a mixture of effects including architecture improvements and transfer learning. Self-learning techniques have also played a role in pushing performance forward. However, for most recent high performant parsers, the effect of self-learning and silver data augmentation seems to be fading. In this paper we propose to overcome this diminishing returns of silver data by combining Smatch-based ensembling techniques with ensemble distillation. In an extensive experimental setup, we push single model English parser performance to a new state-of-the-art, 85.9 (AMR2.0) and 84.3 (AMR3.0), and return to substantial gains from silver data augmentation. We also attain a new state-of-the-art for cross-lingual AMR parsing for Chinese, German, Italian and Spanish. Finally we explore the impact of the proposed technique on domain adaptation, and show that it can produce gains rivaling those of human annotated data for QALD-9 and achieve a new state-of-the-art for BioAMR.</abstract>
       <url hash="861a2b39">2022.naacl-main.393</url>
@@ -5503,7 +5503,7 @@
       <author><first>Mycal</first><last>Tucker</last></author>
       <author><first>Tiwalayo</first><last>Eisape</last></author>
       <author><first>Peng</first><last>Qian</last></author>
-      <author><first>Roger</first><last>Levy</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
       <author><first>Julie</first><last>Shah</last></author>
       <pages>5393-5408</pages>
       <abstract>Recent causal probing literature reveals when language models and syntactic probes use similar representations. Such techniques may yield “false negative” causality results: models may use representations of syntax, but probes may have learned to use redundant encodings of the same syntactic information. We demonstrate that models do encode syntactic information redundantly and introduce a new probe design that guides probes to consider all syntactic information present in embeddings. Using these probes, we find evidence for the use of syntax in models where prior methods did not, allowing us to boost model performance by injecting syntactic information into representations.</abstract>
@@ -5515,7 +5515,7 @@
     <paper id="395">
       <title>Modeling Task Interactions in Document-Level Joint Entity and Relation Extraction</title>
       <author><first>Liyan</first><last>Xu</last></author>
-      <author><first>Jinho</first><last>Choi</last></author>
+      <author id="jinho-d-choi"><first>Jinho</first><last>Choi</last></author>
       <pages>5409-5416</pages>
       <abstract>We target on the document-level relation extraction in an end-to-end setting, where the model needs to jointly perform mention extraction, coreference resolution (COREF) and relation extraction (RE) at once, and gets evaluated in an entity-centric way. Especially, we address the two-way interaction between COREF and RE that has not been the focus by previous work, and propose to introduce explicit interaction namely Graph Compatibility (GC) that is specifically designed to leverage task characteristics, bridging decisions of two tasks for direct task interference. Our experiments are conducted on DocRED and DWIE; in addition to GC, we implement and compare different multi-task settings commonly adopted in previous work, including pipeline, shared encoders, graph propagation, to examine the effectiveness of different interactions. The result shows that GC achieves the best performance by up to 2.3/5.1 F1 improvement over the baseline.</abstract>
       <url hash="2879c972">2022.naacl-main.395</url>
@@ -5681,8 +5681,8 @@
       <author><first>Suchin</first><last>Gururangan</last></author>
       <author><first>Mike</first><last>Lewis</last></author>
       <author><first>Ari</first><last>Holtzman</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>5557-5576</pages>
       <abstract>We introduce a new domain expert mixture (DEMix) layer that enables conditioning a language model (LM) on the domain of the input text. A DEMix layer includes a collection of expert feedforward networks, each specialized to a domain, that makes the LM modular: experts can be mixed, added, or removed after initial training. Extensive experiments with autoregressive transformer LMs (up to 1.3B parameters) show that DEMix layers reduce test-time perplexity (especially for out-of-domain data), increase training efficiency, and enable rapid adaptation. Mixing experts during inference, using a parameter-free weighted ensemble, enables better generalization to heterogeneous or unseen domains. We also show it is possible to add experts to adapt to new domains without forgetting older ones, and remove experts to restrict access to unwanted domains. Overall, these results demonstrate benefits of domain modularity in language models.</abstract>
       <url hash="2de807a8">2022.naacl-main.407</url>
@@ -5789,7 +5789,7 @@
       <author><first>Haoran</first><last>Li</last></author>
       <author><first>Asli</first><last>Celikyilmaz</last></author>
       <author><first>Yashar</first><last>Mehdad</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <pages>5657-5668</pages>
       <abstract>Factual inconsistencies in generated summaries severely limit the practical applications of abstractive dialogue summarization. Although significant progress has been achieved by using pre-trained neural language models, substantial amounts of hallucinated content are found during the human evaluation. In this work, we first devised a typology of factual errors to better understand the types of hallucinations generated by current models and conducted human evaluation on popular dialog summarization dataset. We further propose a training strategy that improves the factual consistency and overall quality of summaries via a novel contrastive fine-tuning, called CONFIT. To tackle top factual errors from our annotation, we introduce additional contrastive loss with carefully designed hard negative samples and self-supervised dialogue-specific loss to capture the key information between speakers. We show that our model significantly reduces all kinds of factual errors on both SAMSum dialogue summarization and AMI meeting summarization. On both datasets, we achieve significant improvements over state-of-the-art baselines using both automatic metrics, ROUGE and BARTScore, and human evaluation.</abstract>
       <url hash="ceabb3a3">2022.naacl-main.415</url>
@@ -5811,14 +5811,14 @@
     <paper id="417">
       <title>Investigating Crowdsourcing Protocols for Evaluating the Factual Consistency of Summaries</title>
       <author><first>Xiangru</first><last>Tang</last></author>
-      <author><first>Alexander</first><last>Fabbri</last></author>
+      <author id="alexander-richard-fabbri"><first>Alexander</first><last>Fabbri</last></author>
       <author><first>Haoran</first><last>Li</last></author>
       <author><first>Ziming</first><last>Mao</last></author>
       <author><first>Griffin</first><last>Adams</last></author>
       <author><first>Borui</first><last>Wang</last></author>
       <author><first>Asli</first><last>Celikyilmaz</last></author>
       <author><first>Yashar</first><last>Mehdad</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <pages>5680-5692</pages>
       <abstract>Current pre-trained models applied for summarization are prone to factual inconsistencies that misrepresent the source text. Evaluating the factual consistency of summaries is thus necessary to develop better models. However, the human evaluation setup for evaluating factual consistency has not been standardized. To determine the factors that affect the reliability of the human evaluation, we crowdsource evaluations for factual consistency across state-of-the-art models on two news summarization datasets using the rating-based Likert Scale and ranking-based Best-Worst Scaling. Our analysis reveals that the ranking-based Best-Worst Scaling offers a more reliable measure of summary quality across datasets and that the reliability of Likert ratings highly depends on the target dataset and the evaluation design. To improve crowdsourcing reliability, we extend the scale of the Likert rating and present a scoring algorithm for Best-Worst Scaling that we call value learning. Our crowdsourcing guidelines will be publicly available to facilitate future work on factual consistency in summarization.</abstract>
       <url hash="45cd2eeb">2022.naacl-main.417</url>
@@ -5856,7 +5856,7 @@
       <author><first>Yiding</first><last>Tan</last></author>
       <author><first>Linyang</first><last>Li</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>5721-5732</pages>
       <abstract>Prompt-based methods have been successfully applied in sentence-level few-shot learning tasks, mostly owing to the sophisticated design of templates and label words. However, when applied to token-level labeling tasks such as NER, it would be time-consuming to enumerate the template queries over all potential entity spans. In this work, we propose a more elegant method to reformulate NER tasks as LM problems without any templates. Specifically, we discard the template construction process while maintaining the word prediction paradigm of pre-training models to predict a class-related pivot word (or label word) at the entity position. Meanwhile, we also explore principled ways to automatically search for appropriate label words that the pre-trained models can easily adapt to. While avoiding the complicated template-based process, the proposed LM objective also reduces the gap between different objectives used in pre-training and fine-tuning, thus it can better benefit the few-shot performance. Experimental results demonstrate the effectiveness of the proposed method over bert-tagger and template-based method under few-shot settings. Moreover, the decoding speed of the proposed method is up to 1930.12 times faster than the template-based method.</abstract>
       <url hash="6ef766f4">2022.naacl-main.420</url>
@@ -5893,8 +5893,8 @@
     <paper id="423">
       <title>Exploiting Inductive Bias in Transformers for Unsupervised Disentanglement of Syntax and Semantics with <fixed-case>VAE</fixed-case>s</title>
       <author><first>Ghazi</first><last>Felhi</last></author>
-      <author><first>Joseph</first><last>Le Roux</last></author>
-      <author><first>Djamé</first><last>Seddah</last></author>
+      <author id="joseph-le-roux"><first>Joseph</first><last>Le Roux</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
       <pages>5763-5776</pages>
       <abstract>We propose a generative model for text generation, which exhibits disentangled latent representations of syntax and semantics. Contrary to previous work, this model does not need syntactic information such as constituency parses, or semantic information such as paraphrase pairs. Our model relies solely on the inductive bias found in attention-based architectures such as Transformers. In the attention of Transformers, <tex-math>keys</tex-math> handle information selection while <tex-math>values</tex-math> specify what information is conveyed. Our model, dubbed QKVAE, uses Attention in its decoder to read latent variables where one latent variable infers keys while another infers values. We run experiments on latent representations and experiments on syntax/semantics transfer which show that QKVAE displays clear signs of disentangled syntax and semantics. We also show that our model displays competitive syntax transfer capabilities when compared to supervised models and that comparable supervised models need a fairly large amount of data (more than 50K samples) to outperform it on both syntactic and semantic transfer. The code for our experiments is publicly available.</abstract>
       <url hash="048551e7">2022.naacl-main.423</url>
@@ -5946,12 +5946,12 @@
     </paper>
     <paper id="427">
       <title>A Holistic Framework for Analyzing the <fixed-case>COVID</fixed-case>-19 Vaccine Debate</title>
-      <author><first>Maria Leonor</first><last>Pacheco</last></author>
+      <author id="maria-leonor-pacheco"><first>Maria Leonor</first><last>Pacheco</last></author>
       <author><first>Tunazzina</first><last>Islam</last></author>
       <author><first>Monal</first><last>Mahajan</last></author>
       <author><first>Andrey</first><last>Shor</last></author>
       <author><first>Ming</first><last>Yin</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <author><first>Dan</first><last>Goldwasser</last></author>
       <pages>5821-5839</pages>
       <abstract>The Covid-19 pandemic has led to infodemic of low quality information leading to poor health decisions. Combating the outcomes of this infodemic is not only a question of identifying false claims, but also reasoning about the decisions individuals make. In this work we propose a holistic analysis framework connecting stance and reason analysis, and fine-grained entity level moral sentiment analysis. We study how to model the dependencies between the different level of analysis and incorporate human insights into the learning process. Experiments show that our framework provides reliable predictions even in the low-supervision settings.</abstract>
@@ -5992,7 +5992,7 @@
     <paper id="430">
       <title>Explaining Dialogue Evaluation Metrics using Adversarial Behavioral Analysis</title>
       <author><first>Baber</first><last>Khalid</last></author>
-      <author><first>Sungjin</first><last>Lee</last></author>
+      <author id="sungjin-lee"><first>Sungjin</first><last>Lee</last></author>
       <pages>5871-5883</pages>
       <abstract>There is an increasing trend in using neural methods for dialogue model evaluation. Lack of a framework to investigate these metrics can cause dialogue models to reflect their biases and cause unforeseen problems during interactions. In this work, we propose an adversarial test-suite which generates problematic variations of various dialogue aspects, e.g. logical entailment, using automatic heuristics. We show that dialogue metrics for both open-domain and task-oriented settings are biased in their assessments of different conversation behaviors and fail to properly penalize problematic conversations, by analyzing their assessments of these problematic examples. We conclude that variability in training methodologies and data-induced biases are some of the main causes of these problems. We also conduct an investigation into the metric behaviors using a black-box interpretability model which corroborates our findings and provides evidence that metrics pay attention to the problematic conversational constructs signaling a misunderstanding of different conversation semantics.</abstract>
       <url hash="cb828148">2022.naacl-main.430</url>
@@ -6007,7 +6007,7 @@
       <author><first>Laura</first><last>Vianna</last></author>
       <author><first>Xuhui</first><last>Zhou</last></author>
       <author><first>Yejin</first><last>Choi</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>5884-5906</pages>
       <abstract>The perceived toxicity of language can vary based on someone’s identity and beliefs, but this variation is often ignored when collecting toxic language datasets, resulting in dataset and model biases. We seek to understand the *who*, *why*, and *what* behind biases in toxicity annotations. In two online studies with demographically and politically diverse participants, we investigate the effect of annotator identities (*who*) and beliefs (*why*), drawing from social psychology research about hate speech, free speech, racist beliefs, political leaning, and more. We disentangle *what* is annotated as toxic by considering posts with three characteristics: anti-Black language, African American English (AAE) dialect, and vulgarity. Our results show strong associations between annotator identity and beliefs and their ratings of toxicity. Notably, more conservative annotators and those who scored highly on our scale for racist beliefs were less likely to rate anti-Black language as toxic, but more likely to rate AAE as toxic. We additionally present a case study illustrating how a popular toxicity detection system’s ratings inherently reflect only specific beliefs and perspectives. Our findings call for contextualizing toxicity labels in social variables, which raises immense implications for toxic language annotation and detection.</abstract>
       <url hash="413b20ff">2022.naacl-main.431</url>
@@ -6046,7 +6046,7 @@
       <title><fixed-case>DACSA</fixed-case>: A large-scale Dataset for Automatic summarization of <fixed-case>C</fixed-case>atalan and <fixed-case>S</fixed-case>panish newspaper Articles</title>
       <author><first>Encarnación</first><last>Segarra Soriano</last></author>
       <author><first>Vicent</first><last>Ahuir</last></author>
-      <author><first>Lluís-F.</first><last>Hurtado</last></author>
+      <author id="lluis-f-hurtado"><first>Lluís-F.</first><last>Hurtado</last></author>
       <author><first>José</first><last>González</last></author>
       <pages>5931-5943</pages>
       <abstract>The application of supervised methods to automatic summarization requires the availability of adequate corpora consisting of a set of document-summary pairs. As in most Natural Language Processing tasks, the great majority of available datasets for summarization are in English, making it difficult to develop automatic summarization models for other languages. Although Spanish is gradually forming part of some recent summarization corpora, it is not the same for minority languages such as Catalan. In this work, we describe the construction of a corpus of Catalan and Spanish newspapers, the Dataset for Automatic summarization of Catalan and Spanish newspaper Articles (DACSA) corpus. It is a high-quality large-scale corpus that can be used to train summarization models for Catalan and Spanish.We have carried out an analysis of the corpus, both in terms of the style of the summaries and the difficulty of the summarization task. In particular, we have used a set of well-known metrics in the summarization field in order to characterize the corpus. Additionally, for benchmarking purposes, we have evaluated the performances of some extractive and abstractive summarization systems on the DACSA corpus.</abstract>
@@ -6061,7 +6061,7 @@
       <author><first>Daniel</first><last>Khashabi</last></author>
       <author><first>Suchin</first><last>Gururangan</last></author>
       <author><first>Karishma</first><last>Mandyam</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>5944-5958</pages>
       <abstract>When an NLP model is trained on text data from one time period and tested or deployed on data from another, the resulting temporal misalignment can degrade end-task performance. In this work, we establish a suite of eight diverse tasks across different domains (social media, science papers, news, and reviews) and periods of time (spanning five years or more) to quantify the effects of temporal misalignment. Our study is focused on the ubiquitous setting where a pretrained model is optionally adapted through continued domain-specific pretraining, followed by task-specific finetuning. We establish a suite of tasks across multiple domains to study temporal misalignment in modern NLP systems. We find stronger effects of temporal misalignment on task performance than have been previously reported. We also find that, while temporal adaptation through continued pretraining can help, these gains are small compared to task-specific finetuning on data from the target time period. Our findings motivate continued research to improve temporal robustness of NLP models.</abstract>
       <url hash="bfe7bbf4">2022.naacl-main.435</url>
@@ -6073,7 +6073,7 @@
       <title><fixed-case>MCSE</fixed-case>: <fixed-case>M</fixed-case>ultimodal Contrastive Learning of Sentence Embeddings</title>
       <author><first>Miaoran</first><last>Zhang</last></author>
       <author><first>Marius</first><last>Mosbach</last></author>
-      <author><first>David Ifeoluwa</first><last>Adelani</last></author>
+      <author id="david-ifeoluwa-adelani"><first>David Ifeoluwa</first><last>Adelani</last></author>
       <author><first>Michael A.</first><last>Hedderich</last></author>
       <author><first>Dietrich</first><last>Klakow</last></author>
       <pages>5959-5969</pages>
@@ -6090,7 +6090,7 @@
       <author><first>Chenwei</first><last>Zhang</last></author>
       <author><first>Shu’ang</first><last>Li</last></author>
       <author><first>Lijie</first><last>Wen</last></author>
-      <author><first>Philip</first><last>Yu</last></author>
+      <author id="philip-s-yu"><first>Philip</first><last>Yu</last></author>
       <pages>5970-5980</pages>
       <abstract>Unsupervised relation extraction aims to extract the relationship between entities from natural language sentences without prior information on relational scope or distribution. Existing works either utilize self-supervised schemes to refine relational feature signals by iteratively leveraging adaptive clustering and classification that provoke gradual drift problems, or adopt instance-wise contrastive learning which unreasonably pushes apart those sentence pairs that are semantically similar. To overcome these defects, we propose a novel contrastive learning framework named HiURE, which has the capability to derive hierarchical signals from relational feature space using cross hierarchy attention and effectively optimize relation representation of sentences under exemplar-wise contrastive learning. Experimental results on two public datasets demonstrate the advanced effectiveness and robustness of HiURE on unsupervised relation extraction when compared with state-of-the-art models.</abstract>
       <url hash="f6ac9275">2022.naacl-main.437</url>
@@ -6172,7 +6172,7 @@
       <booktitle>Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Student Research Workshop</booktitle>
       <editor><first>Daphne</first><last>Ippolito</last></editor>
       <editor><first>Liunian Harold</first><last>Li</last></editor>
-      <editor><first>Maria Leonor</first><last>Pacheco</last></editor>
+      <editor id="maria-leonor-pacheco"><first>Maria Leonor</first><last>Pacheco</last></editor>
       <editor><first>Danqi</first><last>Chen</last></editor>
       <editor><first>Nianwen</first><last>Xue</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -6214,7 +6214,7 @@
       <author><first>Mengsay</first><last>Loem</last></author>
       <author><first>Sho</first><last>Takase</last></author>
       <author><first>Masahiro</first><last>Kaneko</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <pages>16-24</pages>
       <abstract>Neural models trained with large amount of parallel data have achieved impressive performance in abstractive summarization tasks. However, large-scale parallel corpora are expensive and challenging to construct. In this work, we introduce a low-cost and effective strategy, ExtraPhrase, to augment training data for abstractive summarization tasks. ExtraPhrase constructs pseudo training data in two steps: extractive summarization and paraphrasing. We extract major parts of an input text in the extractive summarization step and obtain its diverse expressions with the paraphrasing step. Through experiments, we show that ExtraPhrase improves the performance of abstractive summarization tasks by more than 0.50 points in ROUGE scores compared to the setting without data augmentation. ExtraPhrase also outperforms existing methods such as back-translation and self-training. We also show that ExtraPhrase is significantly effective when the amount of genuine training data is remarkably small, i.e., a low-resource setting. Moreover, ExtraPhrase is more cost-efficient than the existing approaches</abstract>
       <url hash="2b495afb">2022.naacl-srw.3</url>
@@ -6280,7 +6280,7 @@
       <author><first>Xiruo</first><last>Ding</last></author>
       <author><first>Kevin</first><last>Lybarger</last></author>
       <author><first>Justin</first><last>Tauscher</last></author>
-      <author><first>Trevor</first><last>Cohen</last></author>
+      <author id="trevor-cohen"><first>Trevor</first><last>Cohen</last></author>
       <pages>68-75</pages>
       <abstract>Cognitive distortions are counterproductive patterns of thinking that are one of the targets of cognitive behavioral therapy (CBT). These can be challenging for clinicians to detect, especially those without extensive CBT training or supervision. Text classification methods can approximate expert clinician judgment in the detection of frequently occurring cognitive distortions in text-based therapy messages. However, performance with infrequent distortions is relatively poor. In this study, we address this sparsity problem with two approaches: Data Augmentation and Domain-Specific Model. The first approach includes Easy Data Augmentation, back translation, and mixup techniques. The second approach utilizes a domain-specific pretrained language model, MentalBERT. To examine the viability of different data augmentation methods, we utilized a real-world dataset of texts between therapists and clients diagnosed with serious mental illness that was annotated for distorted thinking. We found that with optimized parameter settings, mixup was helpful for rare classes. Performance improvements with an augmented model, MentalBERT, exceed those obtained with data augmentation.</abstract>
       <url hash="0b5fac5c">2022.naacl-srw.9</url>
@@ -6315,7 +6315,7 @@
       <title>Analysing the Correlation between Lexical Ambiguity and Translation Quality in a Multimodal Setting using <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et</title>
       <author><first>Ali</first><last>Hatami</last></author>
       <author><first>Paul</first><last>Buitelaar</last></author>
-      <author><first>Mihael</first><last>Arcan</last></author>
+      <author id="mihael-arcan"><first>Mihael</first><last>Arcan</last></author>
       <pages>89-95</pages>
       <abstract>Multimodal Neural Machine Translation is focusing on using visual information to translate sentences in the source language into the target language. The main idea is to utilise information from visual modalities to promote the output quality of the text-based translation model. Although the recent multimodal strategies extract the most relevant visual information in images, the effectiveness of using visual information on translation quality changes based on the text dataset. Due to this, this work studies the impact of leveraging visual information in multimodal translation models of ambiguous sentences. Our experiments analyse the Multi30k evaluation dataset and calculate ambiguity scores of sentences based on the WordNet hierarchical structure. To calculate the ambiguity of a sentence, we extract the ambiguity scores for all nouns based on the number of senses in WordNet. The main goal is to find in which sentences, visual content can improve the text-based translation model. We report the correlation between the ambiguity scores and translation quality extracted for all sentences in the English-German dataset.</abstract>
       <url hash="af9ad37b">2022.naacl-srw.12</url>
@@ -6566,7 +6566,7 @@
     <paper id="33">
       <title>Unifying Parsing and Tree-Structured Models for Generating Sentence Semantic Representations</title>
       <author><first>Antoine</first><last>Simoulin</last></author>
-      <author><first>Benoit</first><last>Crabbé</last></author>
+      <author id="benoit-crabbe"><first>Benoit</first><last>Crabbé</last></author>
       <pages>267-276</pages>
       <abstract>We introduce a novel tree-based model that learns its composition function together with its structure. The architecture produces sentence embeddings by composing words according to an induced syntactic tree. The parsing and the composition functions are explicitly connected and, therefore, learned jointly. As a result, the sentence embedding is computed according to an interpretable linguistic pattern and may be used on any downstream task. We evaluate our encoder on downstream tasks, and we observe that it outperforms tree-based models relying on external parsers. In some configurations, it is even competitive with Bert base model. Our model is capable of supporting multiple parser architectures. We exploit this property to conduct an ablation study by comparing different parser initializations. We explore to which extent the trees produced by our model compare with linguistic structures and how this initialization impacts downstream performances. We empirically observe that downstream supervision troubles producing stable parses and preserving linguistically relevant structures.</abstract>
       <url hash="14a0d134">2022.naacl-srw.33</url>
@@ -6579,7 +6579,7 @@
       <author><first>Gerard</first><last>Sant</last></author>
       <author><first>Gerard I.</first><last>Gállego</last></author>
       <author><first>Belen</first><last>Alastruey</last></author>
-      <author><first>Marta Ruiz</first><last>Costa-jussà</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta Ruiz</first><last>Costa-jussà</last></author>
       <pages>277-284</pages>
       <abstract>Transformer-based models have been achieving state-of-the-art results in several fields of Natural Language Processing. However, its direct application to speech tasks is not trivial. The nature of this sequences carries problems such as long sequence lengths and redundancy between adjacent tokens. Therefore, we believe that regular self-attention mechanism might not be well suited for it. Different approaches have been proposed to overcome these problems, such as the use of efficient attention mechanisms. However, the use of these methods usually comes with a cost, which is a performance reduction caused by information loss. In this study, we present the Multiformer, a Transformer-based model which allows the use of different attention mechanisms on each head. By doing this, the model is able to bias the self-attention towards the extraction of more diverse token interactions, and the information loss is reduced. Finally, we perform an analysis of the head contributions, and we observe that those architectures where all heads relevance is uniformly distributed obtain better results. Our results show that mixing attention patterns along the different heads and layers outperforms our baseline by up to 0.7 BLEU.</abstract>
       <url hash="094b9633">2022.naacl-srw.34</url>
@@ -6616,7 +6616,7 @@
       <booktitle>Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: System Demonstrations</booktitle>
       <editor><first>Hannaneh</first><last>Hajishirzi</last></editor>
       <editor><first>Qiang</first><last>Ning</last></editor>
-      <editor><first>Avi</first><last>Sil</last></editor>
+      <editor id="avirup-sil"><first>Avi</first><last>Sil</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Hybrid: Seattle, Washington + Online</address>
       <month>July</month>
@@ -6674,8 +6674,8 @@
       <title><fixed-case>ZS</fixed-case>4<fixed-case>IE</fixed-case>: A toolkit for Zero-Shot Information Extraction with simple Verbalizations</title>
       <author><first>Oscar</first><last>Sainz</last></author>
       <author><first>Haoling</first><last>Qiu</last></author>
-      <author><first>Oier</first><last>Lopez de Lacalle</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="oier-lopez-de-lacalle"><first>Oier</first><last>Lopez de Lacalle</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <author><first>Bonan</first><last>Min</last></author>
       <pages>27-38</pages>
       <abstract>The current workflow for Information Extraction (IE) analysts involves the definition of the entities/relations of interest and a training corpus with annotated examples. In this demonstration we introduce a new workflow where the analyst directly verbalizes the entities/relations, which are then used by a Textual Entailment model to perform zero-shot IE. We present the design and implementation of a toolkit with a user interface, as well as experiments on four IE tasks that show that the system achieves very good performance at zero-shot learning using only 5–15 minutes per type of a user’s effort. Our demonstration system is open-sourced at <url>https://github.com/BBN-E/ZS4IE</url>. A demonstration video is available at <url>https://vimeo.com/676138340</url>.</abstract>
@@ -6720,7 +6720,7 @@
       <author><first>Sha</first><last>Li</last></author>
       <author><first>Pengfei</first><last>Yu</last></author>
       <author><first>Hongwei</first><last>Wang</last></author>
-      <author><first>Tuan</first><last>Lai</last></author>
+      <author id="tuan-lai"><first>Tuan</first><last>Lai</last></author>
       <author><first>Xudong</first><last>Lin</last></author>
       <author><first>Ziqi</first><last>Wang</last></author>
       <author><first>Iris</first><last>Liu</last></author>
@@ -6746,8 +6746,8 @@
       <author><first>Carl</first><last>Vondrick</last></author>
       <author><first>Jiawei</first><last>Han</last></author>
       <author><first>Dan</first><last>Roth</last></author>
-      <author><first>Shih-Fu</first><last>Chang</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="shih-fu-chang"><first>Shih-Fu</first><last>Chang</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Heng</first><last>Ji</last></author>
       <pages>54-63</pages>
       <abstract>We introduce RESIN-11, a new schema-guided event extraction&amp;prediction framework that can be applied to a large variety of newsworthy scenarios. The framework consists of two parts: (1) an open-domain end-to-end multimedia multilingual information extraction system with weak-supervision and zero-shot learningbased techniques. (2) schema matching and schema-guided event prediction based on our curated schema library. We build a demo website based on our dockerized system and schema library publicly available for installation (<url>https://github.com/RESIN-KAIROS/RESIN-11</url>). We also include a video demonstrating the system.</abstract>
@@ -6760,9 +6760,9 @@
       <author><first>Robert</first><last>Vacareanu</last></author>
       <author><first>George C.G.</first><last>Barbosa</last></author>
       <author><first>Enrique</first><last>Noriega-Atala</last></author>
-      <author><first>Gus</first><last>Hahn-Powell</last></author>
+      <author id="gus-hahn-powell"><first>Gus</first><last>Hahn-Powell</last></author>
       <author><first>Rebecca</first><last>Sharp</last></author>
-      <author><first>Marco A.</first><last>Valenzuela-Escárcega</last></author>
+      <author id="marco-a-valenzuela-escarcega"><first>Marco A.</first><last>Valenzuela-Escárcega</last></author>
       <author><first>Mihai</first><last>Surdeanu</last></author>
       <pages>64-70</pages>
       <abstract>We propose a system that assists a user in constructing transparent information extraction models, consisting of patterns (or rules) written in a declarative language, through program synthesis. Users of our system can specify their requirements through the use of examples,which are collected with a search interface. The rule-synthesis system proposes rule candidates and the results of applying them on a textual corpus; the user has the option to accept the candidate, request another option, or adjust the examples provided to the system. Through an interactive evaluation, we show that our approach generates high-precision rules even in a 1-shot setting. On a second evaluation on a widely-used relation extraction dataset (TACRED), our method generates rules that outperform considerably manually written patterns. Our code, demo, and documentation is available at <url>https://clulab.github.io/odinsynth</url>.</abstract>
@@ -6868,7 +6868,7 @@
       <booktitle>Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Tutorial Abstracts</booktitle>
       <editor><first>Miguel</first><last>Ballesteros</last></editor>
       <editor><first>Yulia</first><last>Tsvetkov</last></editor>
-      <editor><first>Cecilia O.</first><last>Alm</last></editor>
+      <editor id="cecilia-ovesdotter-alm"><first>Cecilia O.</first><last>Alm</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Seattle, United States</address>
       <month>July</month>
@@ -6970,7 +6970,7 @@
       <author><first>Rui</first><last>Zhang</last></author>
       <author><first>Yangfeng</first><last>Ji</last></author>
       <author><first>Yue</first><last>Zhang</last></author>
-      <author><first>Rebecca J.</first><last>Passonneau</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca J.</first><last>Passonneau</last></author>
       <pages>39-47</pages>
       <abstract>Current NLP models heavily rely on effective representation learning algorithms. Contrastive learning is one such technique to learn an embedding space such that similar data sample pairs have close representations while dissimilar samples stay far apart from each other. It can be used in supervised or unsupervised settings using different loss functions to produce task-specific or general-purpose representations. While it has originally enabled the success for vision tasks, recent years have seen a growing number of publications in contrastive NLP. This first line of works not only delivers promising performance improvements in various NLP tasks, but also provides desired characteristics such as task-agnostic sentence representation, faithful text generation, data-efficient learning in zero-shot and few-shot settings, interpretability and explainability. In this tutorial, we aim to provide a gentle introduction to the fundamentals of contrastive learning approaches and the theory behind them. We then survey the benefits and the best practices of contrastive learning for various downstream NLP applications including Text Classification, Question Answering, Summarization, Text Generation, Interpretability and Explainability, Commonsense Knowledge and Reasoning, Vision-and-Language.This tutorial intends to help researchers in the NLP and computational linguistics community to understand this emerging topic and promote future research directions of using contrastive learning for NLP applications.</abstract>
       <url hash="46b089ea">2022.naacl-tutorials.6</url>
@@ -7002,7 +7002,7 @@
       <author><first>Jinseok</first><last>Nam</last></author>
       <author><first>Sarthak</first><last>Ahuja</last></author>
       <author><first>Jin-Myung</first><last>Won</last></author>
-      <author><first>Sungjin</first><last>Lee</last></author>
+      <author id="sungjin-lee"><first>Sungjin</first><last>Lee</last></author>
       <pages>1-8</pages>
       <abstract>Skill routing is an important component in large-scale conversational systems. In contrast to traditional rule-based skill routing, state-of-the-art systems use a model-based approach to enable natural conversations. To provide supervision signal required to train such models, ideas such as human annotation, replication of a rule-based system, relabeling based on user paraphrases, and bandit-based learning were suggested. However, these approaches: (a) do not scale in terms of the number of skills and skill on-boarding, (b) require a very costly expert annotation/rule-design, (c) introduce risks in the user experience with each model update. In this paper, we present a scalable self-learning approach to explore routing alternatives without causing abrupt policy changes that break the user experience, learn from the user interaction, and incrementally improve the routing via frequent model refreshes. To enable such robust frequent model updates, we suggest a simple and effective approach that ensures controlled policy updates for individual domains, followed by an off-policy evaluation for making deployment decisions without any need for lengthy A/B experimentation. We conduct various offline and online A/B experiments on a commercial large-scale conversational system to demonstrate the effectiveness of the proposed method in real-world production settings.</abstract>
       <url hash="98471458">2022.naacl-industry.1</url>
@@ -7026,7 +7026,7 @@
     </paper>
     <paper id="3">
       <title>Augmenting Poetry Composition with <fixed-case>V</fixed-case>erse by <fixed-case>V</fixed-case>erse</title>
-      <author><first>David</first><last>Uthus</last></author>
+      <author id="david-c-uthus"><first>David</first><last>Uthus</last></author>
       <author><first>Maria</first><last>Voitovich</last></author>
       <author><first>R.J.</first><last>Mical</last></author>
       <pages>18-26</pages>
@@ -7119,7 +7119,7 @@
       <author><first>Sho</first><last>Hoshino</last></author>
       <author><first>Hidetaka</first><last>Kamigaito</last></author>
       <author><first>Hiroya</first><last>Takamura</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>69-78</pages>
       <abstract>Writing an ad text that attracts people and persuades them to click or act is essential for the success of search engine advertising. Therefore, ad creators must consider various aspects of advertising appeals (A<tex-math>^3</tex-math>) such as the price, product features, and quality. However, products and services exhibit unique effective A<tex-math>^3</tex-math> for different industries. In this work, we focus on exploring the effective A<tex-math>^3</tex-math> for different industries with the aim of assisting the ad creation process. To this end, we created a dataset of advertising appeals and used an existing model that detects various aspects for ad texts. Our experiments demonstrated %through correlation analysis that different industries have their own effective A<tex-math>^3</tex-math> and that the identification of the A<tex-math>^3</tex-math> contributes to the estimation of advertising performance.</abstract>
       <url hash="a2ec38f9">2022.naacl-industry.9</url>
@@ -7158,7 +7158,7 @@
       <title>Distantly Supervised Aspect Clustering And Naming For <fixed-case>E</fixed-case>-Commerce Reviews</title>
       <author><first>Prateek</first><last>Sircar</last></author>
       <author><first>Aniket</first><last>Chakrabarti</last></author>
-      <author><first>Deepak</first><last>Gupta</last></author>
+      <author id="deepak-gupta"><first>Deepak</first><last>Gupta</last></author>
       <author><first>Anirban</first><last>Majumdar</last></author>
       <pages>94-102</pages>
       <abstract>Product aspect extraction from reviews is a critical task for e-commerce services to understand customer preferences and pain points. While aspect phrases extraction and sentiment analysis have received a lot of attention, clustering of aspect phrases and assigning human readable names to clusters in e-commerce reviews is an extremely important and challenging problem due to the scale of the reviews that makes human review infeasible. In this paper, we propose fully automated methods for clustering aspect words and generating human readable names for the clusters without any manually labeled data. We train transformer based sentence embeddings that are aware of unique e-commerce language characteristics (eg. incomplete sentences, spelling and grammar errors, vernacular etc.). We also train transformer based sequence to sequence models to generate human readable aspect names from clusters. Both the models are trained using heuristic based distant supervision. Additionally, the models are used to improve each other. Extensive empirical testing showed that the clustering model improves the Silhouette Score by 64% when compared to the state-of-the-art baseline and the aspect naming model achieves a high ROUGE-L score of 0.79.</abstract>
@@ -7185,7 +7185,7 @@
       <author><first>Yeyun</first><last>Gong</last></author>
       <author><first>Jian</first><last>Jiao</last></author>
       <author><first>Ruofei</first><last>Zhang</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Nan</first><last>Duan</last></author>
       <pages>112-120</pages>
       <abstract>Pre-trained language models (PLMs) have dramatically improved performance for many natural language processing (NLP) tasks in domains such as finance and healthcare. However, the application of PLMs in the domain of commerce, especially marketing and advertising, remains less studied. In this work, we adapt pre-training methods to the domain of commerce, by proposing CULG, a large-scale commercial universal language generation model which is pre-trained on a corpus drawn from 10 markets across 7 languages. We propose 4 commercial generation tasks and a two-stage training strategy for pre-training, and demonstrate that the proposed strategy yields performance improvements on three generation tasks as compared to single-stage pre-training. Extensive experiments show that our model outperforms other models by a large margin on commercial generation tasks, and we conclude with a discussion on additional applications over other markets, languages, and tasks.</abstract>
@@ -7240,7 +7240,7 @@
     </paper>
     <paper id="18">
       <title>Asynchronous Convergence in Multi-Task Learning via Knowledge Distillation from Converged Tasks</title>
-      <author><first>Weiyi</first><last>Lu</last></author>
+      <author id="weiyi-liu"><first>Weiyi</first><last>Lu</last></author>
       <author><first>Sunny</first><last>Rajagopalan</last></author>
       <author><first>Priyanka</first><last>Nigam</last></author>
       <author><first>Jaspreet</first><last>Singh</last></author>
@@ -7315,7 +7315,7 @@
       <title>Intent Discovery for Enterprise Virtual Assistants: Applications of Utterance Embedding and Clustering to Intent Mining</title>
       <author><first>Minhua</first><last>Chen</last></author>
       <author><first>Badrinath</first><last>Jayakumar</last></author>
-      <author><first>Michael</first><last>Johnston</last></author>
+      <author id="michael-johnston"><first>Michael</first><last>Johnston</last></author>
       <author><first>S. Eman</first><last>Mahmoodi</last></author>
       <author><first>Daniel</first><last>Pressel</last></author>
       <pages>197-208</pages>
@@ -7343,7 +7343,7 @@
       <title>Lightweight Transformers for Conversational <fixed-case>AI</fixed-case></title>
       <author><first>Daniel</first><last>Pressel</last></author>
       <author><first>Wenshuo</first><last>Liu</last></author>
-      <author><first>Michael</first><last>Johnston</last></author>
+      <author id="michael-johnston"><first>Michael</first><last>Johnston</last></author>
       <author><first>Minhua</first><last>Chen</last></author>
       <pages>221-229</pages>
       <abstract>To understand how training on conversational language impacts performance of pre-trained models on downstream dialogue tasks, we build compact Transformer-based Language Models from scratch on several large corpora of conversational data. We compare the performance and characteristics of these models against BERT and other strong baselines on dialogue probing tasks. Commercial dialogue systems typically require a small footprint and fast execution time, but recent trends are in the other direction, with an ever-increasing number of parameters, resulting in difficulties in model deployment. We focus instead on training fast, lightweight models that excel at natural language understanding (NLU) and can replace existing lower-capacity conversational AI models with similar size and speed. In the process, we develop a simple but unique curriculum-based approach that moves from general-purpose to dialogue-targeted both in terms of data and objective. Our resultant models have around 1/3 the number of parameters of BERT-base and produce better representations for a wide array of intent detection datasets using linear and Mutual-Information probing techniques. Additionally, the models can be easily fine-tuned on a single consumer GPU card and deployed in near real-time production environments.</abstract>
@@ -7369,7 +7369,7 @@
       <title>What Do Users Care About? Detecting Actionable Insights from User Feedback</title>
       <author><first>Kasturi</first><last>Bhattacharjee</last></author>
       <author><first>Rashmi</first><last>Gangadharaiah</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <author><first>Dan</first><last>Roth</last></author>
       <pages>239-246</pages>
       <abstract>Users often leave feedback on a myriad of aspects of a product which, if leveraged successfully, can help yield useful insights that can lead to further improvements down the line. Detecting actionable insights can be challenging owing to large amounts of data as well as the absence of labels in real-world scenarios. In this work, we present an aggregation and graph-based ranking strategy for unsupervised detection of these insights from real-world, noisy, user-generated feedback. Our proposed approach significantly outperforms strong baselines on two real-world user feedback datasets and one academic dataset.</abstract>
@@ -7396,7 +7396,7 @@
       <author><first>Pooja</first><last>Hiranandani</last></author>
       <author><first>Shayna</first><last>Gardiner</last></author>
       <author><first>Cheng</first><last>Chen</last></author>
-      <author><first>Simon</first><last>Corston-Oliver</last></author>
+      <author id="simon-corston-oliver"><first>Simon</first><last>Corston-Oliver</last></author>
       <author><first>Xue-Yong</first><last>Fu</last></author>
       <pages>259-267</pages>
       <abstract>For agents at a contact centre receiving calls, the most important piece of information is the reason for a given call. An agent cannot provide support on a call if they do not know why a customer is calling. In this paper we describe our implementation of a commercial system to detect Purpose of Call statements in English business call transcripts in real time. We present a detailed analysis of types of Purpose of Call statements and language patterns related to them, discuss an approach to collect rich training data by bootstrapping from a set of rules to a neural model, and describe a hybrid model which consists of a transformer-based classifier and a set of rules by leveraging insights from the analysis of call transcripts. The model achieved 88.6 F1 on average in various types of business calls when tested on real life data and has low inference time. We reflect on the challenges and design decisions when developing and deploying the system.</abstract>
@@ -7462,8 +7462,8 @@
       <author><first>Vishwajeet</first><last>Kumar</last></author>
       <author><first>Samarth</first><last>Bharadwaj</last></author>
       <author><first>Mustafa</first><last>Canim</last></author>
-      <author><first>Michael</first><last>Glass</last></author>
-      <author><first>Alfio</first><last>Gliozzo</last></author>
+      <author id="michael-glass"><first>Michael</first><last>Glass</last></author>
+      <author id="alfio-gliozzo"><first>Alfio</first><last>Gliozzo</last></author>
       <author><first>Feifei</first><last>Pan</last></author>
       <author><first>Jaydeep</first><last>Sen</last></author>
       <author><first>Karthik</first><last>Sankaranarayanan</last></author>
@@ -7514,7 +7514,7 @@
       <author><first>Siva Sankalp</first><last>Patel</last></author>
       <author><first>J William</first><last>Murdock</last></author>
       <author><first>Saloni</first><last>Potdar</last></author>
-      <author><first>Sachindra</first><last>Joshi</last></author>
+      <author id="sachindra-joshi"><first>Sachindra</first><last>Joshi</last></author>
       <pages>334-343</pages>
       <abstract>Dialogue systems can benefit from being able to search through a corpus of text to find information relevant to user requests, especially when encountering a request for which no manually curated response is available. The state-of-the-art technology for neural dense retrieval or re-ranking involves deep learning models with hundreds of millions of parameters. However, it is difficult and expensive to get such models to operate at an industrial scale, especially for cloud services that often need to support a big number of individually customized dialogue systems, each with its own text corpus. We report our work on enabling advanced neural dense retrieval systems to operate effectively at scale on relatively inexpensive hardware. We compare with leading alternative industrial solutions and show that we can provide a solution that is effective, fast, and cost-efficient.</abstract>
       <url hash="e150f233">2022.naacl-industry.37</url>
@@ -7530,7 +7530,7 @@
       <author><first>Jonathan</first><last>Johnston</last></author>
       <author><first>Xue-Yong</first><last>Fu</last></author>
       <author><first>Shashi Bhushan</first><last>Tn</last></author>
-      <author><first>Simon</first><last>Corston-Oliver</last></author>
+      <author id="simon-corston-oliver"><first>Simon</first><last>Corston-Oliver</last></author>
       <pages>344-352</pages>
       <abstract>An Entity Linking system aligns the textual mentions of entities in a text to their corresponding entries in a knowledge base. However, deploying a neural entity linking system for efficient real-time inference in production environments is a challenging task. In this work, we present a neural entity linking system that connects the product and organization type entities in business conversations to their corresponding Wikipedia and Wikidata entries. The proposed system leverages Elasticsearch to ensure inference efficiency when deployed in a resource limited cloud machine, and obtains significant improvements in terms of inference speed and memory consumption while retaining high accuracy.</abstract>
       <url hash="90745fc9">2022.naacl-industry.38</url>
diff --git a/data/xml/2022.nejlt.xml b/data/xml/2022.nejlt.xml
index fff99a1b9b..5f591b03ec 100644
--- a/data/xml/2022.nejlt.xml
+++ b/data/xml/2022.nejlt.xml
@@ -14,7 +14,7 @@
     </meta>
     <paper id="1">
       <title>Foreword to <fixed-case>NEJLT</fixed-case> Volume 8, 2022</title>
-      <author><first>Leon</first><last>Derczynski</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
       <abstract>An introduction to the Northern European Journal of Language Technology in 2022</abstract>
       <url hash="e8b71a2c">2022.nejlt-1.1</url>
       <doi>10.3384/nejlt.2000-1533.2022.4617</doi>
@@ -24,7 +24,7 @@
       <title>Task-dependent Optimal Weight Combinations for Static Embeddings</title>
       <author><first>Nathaniel</first><last>Robinson</last></author>
       <author><first>Nathaniel</first><last>Carlson</last></author>
-      <author><first>David</first><last>Mortensen</last></author>
+      <author id="david-r-mortensen"><first>David</first><last>Mortensen</last></author>
       <author><first>Elizabeth</first><last>Vargas</last></author>
       <author><first>Thomas</first><last>Fackrell</last></author>
       <author><first>Nancy</first><last>Fulda</last></author>
@@ -36,7 +36,7 @@
     <paper id="3">
       <title>Building Analyses from Syntactic Inference in Local Languages: An <fixed-case>HPSG</fixed-case> Grammar Inference System</title>
       <author><first>Kristen</first><last>Howell</last></author>
-      <author><first>Emily M.</first><last>Bender</last></author>
+      <author id="emily-m-bender"><first>Emily M.</first><last>Bender</last></author>
       <abstract>We present a grammar inference system that leverages linguistic knowledge recorded in the form of annotations in interlinear glossed text (IGT) and in a meta-grammar engineering system (the LinGO Grammar Matrix customization system) to automatically produce machine-readable HPSG grammars. Building on prior work to handle the inference of lexical classes, stems, affixes and position classes, and preliminary work on inferring case systems and word order, we introduce an integrated grammar inference system that covers a wide range of fundamental linguistic phenomena. System development was guided by 27 geneologically and geographically diverse languages, and we test the system’s cross-linguistic generalizability on an additional 5 held-out languages, using datasets provided by field linguists. Our system out-performs three baseline systems in increasing coverage while limiting ambiguity and producing richer semantic representations, while also producing richer representations than previous work in grammar inference.</abstract>
       <url hash="da808536">2022.nejlt-1.3</url>
       <doi>10.3384/nejlt.2000-1533.2022.4017</doi>
@@ -46,7 +46,7 @@
       <title>Bias Identification and Attribution in <fixed-case>NLP</fixed-case> Models With Regression and Effect Sizes</title>
       <author><first>Erenay</first><last>Dayanik</last></author>
       <author><first>Ngoc Thang</first><last>Vu</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <abstract>In recent years, there has been an increasing awareness that many NLP systems incorporate biases of various types (e.g., regarding gender or race) which can have significant negative consequences. At the same time, the techniques used to statistically analyze such biases are still relatively simple. Typically, studies test for the presence of a significant difference between two levels of a single bias variable (e.g., male vs. female) without attention to potential confounders, and do not quantify the importance of the bias variable. This article proposes to analyze bias in the output of NLP systems using multivariate regression models. They provide a robust and more informative alternative which (a) generalizes to multiple bias variables, (b) can take covariates into account, (c) can be combined with measures of effect size to quantify the size of bias. Jointly, these effects contribute to a more robust statistical analysis of bias that can be used to diagnose system behavior and extract informative examples. We demonstrate the benefits of our method by analyzing a range of current NLP models on one regression and one classification tasks (emotion intensity prediction and coreference resolution, respectively).</abstract>
       <url hash="d70ddadb">2022.nejlt-1.4</url>
       <doi>10.3384/nejlt.2000-1533.2022.3505</doi>
@@ -55,7 +55,7 @@
     <paper id="5">
       <title>Policy-focused Stance Detection in Parliamentary Debate Speeches</title>
       <author><first>Gavin</first><last>Abercrombie</last></author>
-      <author><first>Riza</first><last>Batista-Navarro</last></author>
+      <author id="riza-theresa-batista-navarro"><first>Riza</first><last>Batista-Navarro</last></author>
       <abstract>Legislative debate transcripts provide citizens with information about the activities of their elected representatives, but are difficult for people to process. We propose the novel task of policy-focused stance detection, in which both the policy proposals under debate and the position of the speakers towards those proposals are identified. We adapt a previously existing dataset to include manual annotations of policy preferences, an established schema from political science. We evaluate a range of approaches to the automatic classification of policy preferences and speech sentiment polarity, including transformer-based text representations and a multi-task learning paradigm. We find that it is possible to identify the policies under discussion using features derived from the speeches, and that incorporating motion-dependent debate modelling, previously used to classify speech sentiment, also improves performance in the classification of policy preferences. We analyse the output of the best performing system, finding that discriminating features for the task are highly domain-specific, and that speeches that address policy preferences proposed by members of the same party can be among the most difficult to predict.</abstract>
       <url hash="45e6c84c">2022.nejlt-1.5</url>
       <doi>10.3384/nejlt.2000-1533.2022.3454</doi>
@@ -98,7 +98,7 @@
       <title>Contextualized embeddings for semantic change detection: Lessons learned</title>
       <author><first>Andrey</first><last>Kutuzov</last></author>
       <author><first>Erik</first><last>Velldal</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <abstract>We present a qualitative analysis of the (potentially erroneous) outputs of contextualized embedding-based methods for detecting diachronic semantic change. First, we introduce an ensemble method outperforming previously described contextualized approaches. This method is used as a basis for an in-depth analysis of the degrees of semantic change predicted for English words across 5 decades. Our findings show that contextualized methods can often predict high change scores for words which are not undergoing any real diachronic semantic shift in the lexicographic sense of the term (or at least the status of these shifts is questionable). Such challenging cases are discussed in detail with examples, and their linguistic categorization is proposed. Our conclusion is that pre-trained contextualized language models are prone to confound changes in lexicographic senses and changes in contextual variance, which naturally stem from their distributional nature, but is different from the types of issues observed in methods based on static embeddings. Additionally, they often merge together syntactic and semantic aspects of lexical entities. We propose a range of possible future solutions to these issues.</abstract>
       <url hash="db015b96">2022.nejlt-1.9</url>
       <doi>10.3384/nejlt.2000-1533.2022.3478</doi>
diff --git a/data/xml/2022.nidcp.xml b/data/xml/2022.nidcp.xml
index ea377c2b4b..82febc67f1 100644
--- a/data/xml/2022.nidcp.xml
+++ b/data/xml/2022.nidcp.xml
@@ -4,9 +4,9 @@
     <meta>
       <booktitle>Proceedings of the 2nd Workshop on Novel Incentives in Data Collection from People: models, implementations, challenges and results within LREC 2022</booktitle>
       <editor><first>Chris</first><last>Callison-Burch</last></editor>
-      <editor><first>Christopher</first><last>Cieri</last></editor>
+      <editor id="christopher-cieri"><first>Christopher</first><last>Cieri</last></editor>
       <editor><first>James</first><last>Fiumara</last></editor>
-      <editor><first>Mark</first><last>Liberman</last></editor>
+      <editor id="mark-liberman"><first>Mark</first><last>Liberman</last></editor>
       <publisher>European Language Resources Association</publisher>
       <address>Marseille, France</address>
       <month>June</month>
@@ -34,7 +34,7 @@
     <paper id="2">
       <title>Use of a Citizen Science Platform for the Creation of a Language Resource to Study Bias in Language Models for <fixed-case>F</fixed-case>rench: A Case Study</title>
       <author><first>Karën</first><last>Fort</last></author>
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <author><first>Yoann</first><last>Dupont</last></author>
       <author><first>Julien</first><last>Bezançon</last></author>
       <pages>8–13</pages>
@@ -57,7 +57,7 @@
     <paper id="4">
       <title>Creating <fixed-case>M</fixed-case>exican <fixed-case>S</fixed-case>panish Language Resources through the Social Service Program</title>
       <author><first>Carlos Daniel</first><last>Hernandez Mena</last></author>
-      <author><first>Ivan Vladimir</first><last>Meza Ruiz</last></author>
+      <author id="ivan-meza-ruiz"><first>Ivan Vladimir</first><last>Meza Ruiz</last></author>
       <pages>20–24</pages>
       <abstract>This work presents the path toward the creation of eight Spoken Language Resources under the umbrella of the Mexican Social Service national program. This program asks undergraduate students to donate time and work for the benefit of their society as a requirement to receive their degree. The program has thousands of options for the students who enroll. We show how we created a program which has resulted in the creation of open language resources which now are freely available in different repositories. We estimate that this exercise is equivalent to a budget of more than half a million US dollars. However, since the program is based on retribution from the students to their communities there has not been a necessity of a financial budget.</abstract>
       <url hash="faffdc10">2022.nidcp-1.4</url>
diff --git a/data/xml/2022.nlg4health.xml b/data/xml/2022.nlg4health.xml
index 087188a499..eca239c5c2 100644
--- a/data/xml/2022.nlg4health.xml
+++ b/data/xml/2022.nlg4health.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2022-11-08" type="proceedings">
     <meta>
       <booktitle>Proceedings of the First Workshop on Natural Language Generation in Healthcare</booktitle>
-      <editor><first>Emiel</first><last>Krahmer</last></editor>
+      <editor id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></editor>
       <editor><first>Kathy</first><last>McCoy</last></editor>
       <editor><first>Ehud</first><last>Reiter</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -52,7 +52,7 @@
       <title>Towards Development of an Automated Health Coach</title>
       <author><first>Leighanne</first><last>Hsu</last></author>
       <author><first>Rommy</first><last>Marquez Hernandez</last></author>
-      <author><first>Kathleen</first><last>McCoy</last></author>
+      <author id="kathleen-f-mccoy"><first>Kathleen</first><last>McCoy</last></author>
       <author><first>Keith</first><last>Decker</last></author>
       <author><first>Ajith</first><last>Vemuri</last></author>
       <author><first>Greg</first><last>Dominick</last></author>
@@ -66,7 +66,7 @@
       <title>Personalizing Weekly Diet Reports</title>
       <author><first>Elena</first><last>Monfroglio</last></author>
       <author><first>Lucas</first><last>Anselma</last></author>
-      <author><first>Alessandro</first><last>Mazzei</last></author>
+      <author id="alessandro-mazzei"><first>Alessandro</first><last>Mazzei</last></author>
       <pages>40-45</pages>
       <abstract>In this paper we present the main components of a weekly diet report generator (DRG) in natural language. The idea is to produce a text that contains information on the adherence of the dishes eaten during a week to the Mediterranean diet. The system is based on a user model, a database of the dishes eaten during the week and on the automatic computation of the Mediterranean Diet Score. All these sources of information are exploited to produce a highly personalized text. The system has two main goals, related to two different kinds of users: on the one hand, when used by dietitians, the main goal is to highlight the most salient medical information of the patient diet and, on the other hand, when used by final users, the main goal is to educate them toward a Mediterranean style of eating.</abstract>
       <url hash="be64e857">2022.nlg4health-1.5</url>
diff --git a/data/xml/2022.nllp.xml b/data/xml/2022.nllp.xml
index d9e87d6e33..f79945a830 100644
--- a/data/xml/2022.nllp.xml
+++ b/data/xml/2022.nllp.xml
@@ -22,7 +22,7 @@
     <paper id="1">
       <title>On Breadth Alone: Improving the Precision of Terminology Extraction Systems on Patent Corpora</title>
       <author><first>Sean</first><last>Nordquist</last><affiliation>New York University</affiliation></author>
-      <author><first>Adam</first><last>Meyers</last><affiliation>New York University</affiliation></author>
+      <author id="adam-meyers"><first>Adam</first><last>Meyers</last><affiliation>New York University</affiliation></author>
       <pages>1-11</pages>
       <abstract>Automatic Terminology Extraction (ATE) methods are a class of linguistic, statistical, machine learning or hybrid techniques for identifying terminology in a set of documents. Most modern ATE methods use a statistical measure of how important or characteristic a potential term is to a foreground corpus by using a second background corpus as a baseline. While many variables with ATE methods have been carefully evaluated and tuned in the literature, the effects of choosing a particular background corpus over another are not obvious. In this paper, we propose a methodology that allows us to adjust the relative breadth of the foreground and background corpora in patent documents by taking advantage of the Cooperative Patent Classification (CPC) scheme. Our results show that for every foreground corpus, the broadest background corpus gave the worst performance, in the worst case that difference is 17%. Similarly, the least broad background corpus gave suboptimal performance in all three experiments. We also demonstrate qualitative differences between background corpora – narrower background corpora tend towards more technical output. We expect our results to generalize to terminology extraction for other legal and technical documents and, generally, to the foreground/background approach to ATE.</abstract>
       <url hash="b5def077">2022.nllp-1.1</url>
@@ -285,7 +285,7 @@
       <title>Detecting Relevant Differences Between Similar Legal Texts</title>
       <author><first>Xiang</first><last>Li</last><affiliation>University of Ottawa</affiliation></author>
       <author><first>Jiaxun</first><last>Gao</last><affiliation>University of Ottawa</affiliation></author>
-      <author><first>Diana</first><last>Inkpen</last><affiliation>University of Ottawa</affiliation></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last><affiliation>University of Ottawa</affiliation></author>
       <author><first>Wolfgang</first><last>Alschner</last><affiliation>University of Ottawa</affiliation></author>
       <pages>256-264</pages>
       <abstract>Given two similar legal texts, is it useful to be able to focus only on the parts that contain relevant differences. However, because of variation in linguistic structure and terminology, it is not easy to identify true semantic differences. An accurate difference detection model between similar legal texts is therefore in demand, in order to increase the efficiency of legal research and document analysis. In this paper, we automatically label a training dataset of sentence pairs using an existing legal resource of international investment treaties that were already manually annotated with metadata. Then we propose models based on state-of-the-art deep learning techniques for the novel task of detecting relevant differences. In addition to providing solutions for this task, we include models for automatically producing metadata for the treaties that do not have it.</abstract>
@@ -336,7 +336,7 @@
       <author><first>Vinay</first><last>Aggarwal</last><affiliation>Adobe</affiliation></author>
       <author><first>Ananya</first><last>Ganesh</last><affiliation>University of Colorado Boulder</affiliation></author>
       <author><first>Niyati</first><last>Chhaya</last><affiliation>Adobe Research</affiliation></author>
-      <author><first>Nandakishore</first><last>Kambhatla</last><affiliation>Adobe Research</affiliation></author>
+      <author id="nanda-kambhatla"><first>Nandakishore</first><last>Kambhatla</last><affiliation>Adobe Research</affiliation></author>
       <pages>296-304</pages>
       <url hash="9d0e3193">2022.nllp-1.28</url>
       <bibkey>garimella-etal-2022-text</bibkey>
@@ -362,7 +362,7 @@
     <paper id="30">
       <title>Computing and Exploiting Document Structure to Improve Unsupervised Extractive Summarization of Legal Case Decisions</title>
       <author><first>Yang</first><last>Zhong</last><affiliation>University of Pittsburgh</affiliation></author>
-      <author><first>Diane</first><last>Litman</last><affiliation>University of Pittsburgh</affiliation></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last><affiliation>University of Pittsburgh</affiliation></author>
       <pages>322-337</pages>
       <abstract>Though many algorithms can be used to automatically summarize legal case decisions, most fail to incorporate domain knowledge about how important sentences in a legal decision relate to a representation of its document structure. For example, analysis of a legal case sum- marization dataset demonstrates that sentences serving different types of argumentative roles in the decision appear in different sections of the document. In this work, we propose an unsupervised graph-based ranking model that uses a reweighting algorithm to exploit properties of the document structure of legal case decisions. We also explore the impact of using different methods to compute the document structure. Results on the Canadian Legal Case Law dataset show that our proposed method outperforms several strong baselines.</abstract>
       <url hash="59c098e2">2022.nllp-1.30</url>
diff --git a/data/xml/2022.nlp4call.xml b/data/xml/2022.nlp4call.xml
index 6a069f117d..b0a0f4040e 100644
--- a/data/xml/2022.nlp4call.xml
+++ b/data/xml/2022.nlp4call.xml
@@ -8,7 +8,7 @@
       <editor><first>Thomas</first><last>François</last></editor>
       <editor><first>Piet</first><last>Desmet</last></editor>
       <editor><first>Frederik</first><last>Cornillie</last></editor>
-      <editor><first>Arne</first><last>Jönsson</last></editor>
+      <editor id="arne-jonsson"><first>Arne</first><last>Jönsson</last></editor>
       <editor><first>Evelina</first><last>Rennes</last></editor>
       <publisher>LiU Electronic Press</publisher>
       <address>Louvain-la-Neuve, Belgium</address>
@@ -73,7 +73,7 @@
     <paper id="7">
       <title>Generating and authoring high-variability exercises from authentic texts</title>
       <author><first>Tanja</first><last>Heck</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <pages>61–71</pages>
       <url hash="9a1ddd49">2022.nlp4call-1.7</url>
       <bibkey>heck-meurers-2022-generating</bibkey>
@@ -120,7 +120,7 @@
     <paper id="12">
       <title>A Transformer for <fixed-case>SAG</fixed-case>: What Does it Grade?</title>
       <author><first>Nico</first><last>Willms</last></author>
-      <author><first>Ulrike</first><last>Pado</last></author>
+      <author id="ulrike-pado"><first>Ulrike</first><last>Pado</last></author>
       <pages>114–122</pages>
       <url hash="ae315f8a">2022.nlp4call-1.12</url>
       <bibkey>willms-pado-2022-transformer</bibkey>
diff --git a/data/xml/2022.nlp4convai.xml b/data/xml/2022.nlp4convai.xml
index 2673f7f4a3..469acff260 100644
--- a/data/xml/2022.nlp4convai.xml
+++ b/data/xml/2022.nlp4convai.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the 4th Workshop on NLP for Conversational AI</booktitle>
       <editor><first>Bing</first><last>Liu</last></editor>
-      <editor><first>Alexandros</first><last>Papangelis</last></editor>
+      <editor id="alexandros-papangelis"><first>Alexandros</first><last>Papangelis</last></editor>
       <editor><first>Stefan</first><last>Ultes</last></editor>
       <editor><first>Abhinav</first><last>Rastogi</last></editor>
       <editor><first>Yun-Nung</first><last>Chen</last></editor>
@@ -42,7 +42,7 @@
       <author><first>Zhiwei</first><last>Liu</last></author>
       <author><first>Ye</first><last>Liu</last></author>
       <author><first>Caiming</first><last>Xiong</last></author>
-      <author><first>Philip</first><last>Yu</last></author>
+      <author id="philip-s-yu"><first>Philip</first><last>Yu</last></author>
       <pages>12-20</pages>
       <abstract>Pre-trained Transformer-based models were reported to be robust in intent classification. In this work, we first point out the importance of in-domain out-of-scope detection in few-shot intent recognition tasks and then illustrate the vulnerability of pre-trained Transformer-based models against samples that are in-domain but out-of-scope (ID-OOS). We construct two new datasets, and empirically show that pre-trained models do not perform well on both ID-OOS examples and general out-of-scope examples, especially on fine-grained few-shot intent detection tasks.</abstract>
       <url hash="e08ad7d3">2022.nlp4convai-1.2</url>
@@ -95,7 +95,7 @@
       <title>Extracting and Inferring Personal Attributes from Dialogue</title>
       <author><first>Zhilin</first><last>Wang</last></author>
       <author><first>Xuhui</first><last>Zhou</last></author>
-      <author><first>Rik</first><last>Koncel-Kedziorski</last></author>
+      <author id="rik-koncel-kedziorski"><first>Rik</first><last>Koncel-Kedziorski</last></author>
       <author><first>Alex</first><last>Marin</last></author>
       <author><first>Fei</first><last>Xia</last></author>
       <pages>58-69</pages>
@@ -111,7 +111,7 @@
       <author><first>Xiaoyu</first><last>Shen</last></author>
       <author><first>Gianni</first><last>Barlacchi</last></author>
       <author id="bill-byrne"><first>Bill</first><last>Byrne</last></author>
-      <author><first>Adrià</first><last>de Gispert</last></author>
+      <author id="adria-de-gispert"><first>Adrià</first><last>de Gispert</last></author>
       <pages>70-76</pages>
       <abstract>In conversational QA, models have to leverage information in previous turns to answer upcoming questions. Current approaches, such as Question Rewriting, struggle to extract relevant information as the conversation unwinds. We introduce the Common Ground (CG), an approach to accumulate conversational information as it emerges and select the relevant information at every turn. We show that CG offers a more efficient and human-like way to exploit conversational information compared to existing approaches, leading to improvements on Open Domain Conversational QA.</abstract>
       <url hash="3a0d4a15">2022.nlp4convai-1.7</url>
@@ -137,8 +137,8 @@
     <paper id="9">
       <title><fixed-case>KG</fixed-case>-<fixed-case>CR</fixed-case>u<fixed-case>SE</fixed-case>: Recurrent Walks over Knowledge Graph for Explainable Conversation Reasoning using Semantic Embeddings</title>
       <author><first>Rajdeep</first><last>Sarkar</last></author>
-      <author><first>Mihael</first><last>Arcan</last></author>
-      <author><first>John</first><last>McCrae</last></author>
+      <author id="mihael-arcan"><first>Mihael</first><last>Arcan</last></author>
+      <author id="john-philip-mccrae"><first>John</first><last>McCrae</last></author>
       <pages>98-107</pages>
       <abstract>Knowledge-grounded dialogue systems utilise external knowledge such as knowledge graphs to generate informative and appropriate responses. A crucial challenge of such systems is to select facts from a knowledge graph pertinent to the dialogue context for response generation. This fact selection can be formulated as path traversal over a knowledge graph conditioned on the dialogue context. Such paths can originate from facts mentioned in the dialogue history and terminate at the facts to be mentioned in the response. These walks, in turn, provide an explanation of the flow of the conversation. This work proposes KG-CRuSE, a simple, yet effective LSTM based decoder that utilises the semantic information in the dialogue history and the knowledge graph elements to generate such paths for effective conversation explanation. Extensive evaluations showed that our model outperforms the state-of-the-art models on the OpenDialKG dataset on multiple metrics.</abstract>
       <url hash="f84cf690">2022.nlp4convai-1.9</url>
@@ -175,7 +175,7 @@
     </paper>
     <paper id="12">
       <title>Multimodal Conversational <fixed-case>AI</fixed-case>: A Survey of Datasets and Approaches</title>
-      <author><first>Anirudh</first><last>Sundar</last></author>
+      <author id="anirudh-sundar"><first>Anirudh</first><last>Sundar</last></author>
       <author><first>Larry</first><last>Heck</last></author>
       <pages>131-147</pages>
       <abstract>As humans, we experience the world with all our senses or modalities (sound, sight, touch, smell, and taste). We use these modalities, particularly sight and touch, to convey and interpret specific meanings. Multimodal expressions are central to conversations; a rich set of modalities amplify and often compensate for each other. A multimodal conversational AI system answers questions, fulfills tasks, and emulates human conversations by understanding and expressing itself via multiple modalities. This paper motivates, defines, and mathematically formulates the multimodal conversational research objective. We provide a taxonomy of research required to solve the objective: multimodal representation, fusion, alignment, translation, and co-learning. We survey state-of-the-art datasets and approaches for each research area and highlight their limiting assumptions. Finally, we identify multimodal co-learning as a promising direction for multimodal conversational AI research.</abstract>
@@ -186,7 +186,7 @@
     </paper>
     <paper id="13">
       <title>Open-domain Dialogue Generation: What We Can Do, Cannot Do, And Should Do Next</title>
-      <author><first>Katharina</first><last>Kann</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
       <author><first>Abteen</first><last>Ebrahimi</last></author>
       <author><first>Joewie</first><last>Koh</last></author>
       <author><first>Shiran</first><last>Dudy</last></author>
@@ -226,7 +226,7 @@
       <title>Stylistic Response Generation by Controlling Personality Traits and Intent</title>
       <author><first>Sougata</first><last>Saha</last></author>
       <author><first>Souvik</first><last>Das</last></author>
-      <author><first>Rohini</first><last>Srihari</last></author>
+      <author id="rohini-k-srihari"><first>Rohini</first><last>Srihari</last></author>
       <pages>197-211</pages>
       <abstract>Personality traits influence human actions and thoughts, which is manifested in day to day conversations. Although glimpses of personality traits are observable in existing open domain conversation corpora, leveraging generic language modelling for response generation overlooks the interlocutor idiosyncrasies, resulting in non-customizable personality agnostic responses. With the motivation of enabling stylistically configurable response generators, in this paper we experiment with end-to-end mechanisms to ground neural response generators based on both (i) interlocutor Big-5 personality traits, and (ii) discourse intent as stylistic control codes. Since most of the existing large scale open domain chat corpora do not include Big-5 personality traits and discourse intent, we employ automatic annotation schemes to enrich the corpora with noisy estimates of personality and intent annotations, and further assess the impact of using such features as control codes for response generation using automatic evaluation metrics, ablation studies and human judgement. Our experiments illustrate the effectiveness of this strategy resulting in improvements to existing benchmarks. Additionally, we yield two silver standard annotated corpora with intents and personality traits annotated, which can be of use to the research community.</abstract>
       <url hash="3f404b53">2022.nlp4convai-1.16</url>
@@ -240,7 +240,7 @@
       <author><first>Yong</first><last>Liu</last></author>
       <author><first>Boyang</first><last>Li</last></author>
       <author><first>Peixiang</first><last>Zhong</last></author>
-      <author id="chen-zhang"><first>Chen</first><last>Zhang</last></author>
+      <author><first>Chen</first><last>Zhang</last></author>
       <author><first>Hao</first><last>Wang</last></author>
       <author><first>Chunyan</first><last>Miao</last></author>
       <pages>212-217</pages>
diff --git a/data/xml/2022.nlp4dh.xml b/data/xml/2022.nlp4dh.xml
index 97d49e5244..f8ab9e7fd9 100644
--- a/data/xml/2022.nlp4dh.xml
+++ b/data/xml/2022.nlp4dh.xml
@@ -169,7 +169,7 @@
     </paper>
     <paper id="15">
       <title>Towards Bootstrapping a Chatbot on Industrial Heritage through Term and Relation Extraction</title>
-      <author><first>Mihael</first><last>Arcan</last></author>
+      <author id="mihael-arcan"><first>Mihael</first><last>Arcan</last></author>
       <author><first>Rory</first><last>O’Halloran</last></author>
       <author><first>Cécile</first><last>Robin</last></author>
       <author><first>Paul</first><last>Buitelaar</last></author>
@@ -181,7 +181,7 @@
     </paper>
     <paper id="16">
       <title>Non-Parametric Word Sense Disambiguation for Historical Languages</title>
-      <author><first>Enrique</first><last>Manjavacas Arevalo</last></author>
+      <author id="enrique-manjavacas"><first>Enrique</first><last>Manjavacas Arevalo</last></author>
       <author><first>Lauren</first><last>Fonteyn</last></author>
       <pages>123–134</pages>
       <abstract>Recent approaches to Word Sense Disambiguation (WSD) have profited from the enhanced contextualized word representations coming from contemporary Large Language Models (LLMs). This advancement is accompanied by a renewed interest in WSD applications in Humanities research, where the lack of suitable, specific WSD-annotated resources is a hurdle in developing ad-hoc WSD systems. Because they can exploit sentential context, LLMs are particularly suited for disambiguation tasks. Still, the application of LLMs is often limited to linear classifiers trained on top of the LLM architecture. In this paper, we follow recent developments in non-parametric learning and show how LLMs can be efficiently fine-tuned to achieve strong few-shot performance on WSD for historical languages (English and Dutch, date range: 1450-1950). We test our hypothesis using (i) a large, general evaluation set taken from large lexical databases, and (ii) a small real-world scenario involving an ad-hoc WSD task. Moreover, this paper marks the release of GysBERT, a LLM for historical Dutch.</abstract>
diff --git a/data/xml/2022.nlp4pi.xml b/data/xml/2022.nlp4pi.xml
index d87588dd5a..a396ee7677 100644
--- a/data/xml/2022.nlp4pi.xml
+++ b/data/xml/2022.nlp4pi.xml
@@ -7,7 +7,7 @@
       <editor><first>Dorottya</first><last>Demszky</last></editor>
       <editor><first>Zhijing</first><last>Jin</last></editor>
       <editor><first>Mrinmaya</first><last>Sachan</last></editor>
-      <editor><first>Joel</first><last>Tetreault</last></editor>
+      <editor id="joel-tetreault"><first>Joel</first><last>Tetreault</last></editor>
       <editor><first>Steven</first><last>Wilson</last></editor>
       <editor><first>Lu</first><last>Xiao</last></editor>
       <editor><first>Jieyu</first><last>Zhao</last></editor>
@@ -109,7 +109,7 @@
     <paper id="11">
       <title>Modelling Persuasion through Misuse of Rhetorical Appeals</title>
       <author><first>Amalie</first><last>Pauli</last><affiliation>Computer Science, Aarhus University</affiliation></author>
-      <author><first>Leon</first><last>Derczynski</last><affiliation>IT University of Copenhagen</affiliation></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last><affiliation>IT University of Copenhagen</affiliation></author>
       <author><first>Ira</first><last>Assent</last><affiliation>Department of Computer Science, Aarhus University</affiliation></author>
       <pages>89-100</pages>
       <abstract>It is important to understand how people use words to persuade each other. This helps understand debate, and detect persuasive narratives in regard to e.g. misinformation. While computational modelling of some aspects of persuasion has received some attention, a way to unify and describe the overall phenomenon of when persuasion becomes undesired and problematic, is missing. In this paper, we attempt to address this by proposing a taxonomy of computational persuasion. Drawing upon existing research and resources, this paper shows how to re-frame and re-organise current work into a coherent framework targeting the misuse of rhetorical appeals. As a study to validate these re-framings, we then train and evaluate models of persuasion adapted to our taxonomy. Our results show an application of our taxonomy, and we are able to detecting misuse of rhetorical appeals, finding that these are more often used in misinformative contexts than in true ones.</abstract>
diff --git a/data/xml/2022.nlpcss.xml b/data/xml/2022.nlpcss.xml
index 43c7be20f7..8d5f0efbe2 100644
--- a/data/xml/2022.nlpcss.xml
+++ b/data/xml/2022.nlpcss.xml
@@ -63,7 +63,7 @@
       <title>Understanding Narratives from Demographic Survey Data: a Comparative Study with Multiple Neural Topic Models</title>
       <author><first>Xiao</first><last>Xu</last><affiliation>NIDI-KNAW / University of Groningen</affiliation></author>
       <author><first>Gert</first><last>Stulp</last><affiliation>University of Groningen</affiliation></author>
-      <author><first>Antal</first><last>Van Den Bosch</last><affiliation>Utrecht University</affiliation></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>Van Den Bosch</last><affiliation>Utrecht University</affiliation></author>
       <author><first>Anne</first><last>Gauthier</last><affiliation>Nidi-knaw</affiliation></author>
       <pages>33-38</pages>
       <abstract>Fertility intentions as verbalized in surveys are a poor predictor of actual fertility outcomes, the number of children people have. This can partly be explained by the uncertainty people have in their intentions. Such uncertainties are hard to capture through traditional survey questions, although open-ended questions can be used to get insight into people’s subjective narratives of the future that determine their intentions. Analyzing such answers to open-ended questions can be done through Natural Language Processing techniques. Traditional topic models (e.g., LSA and LDA), however, often fail to do since they rely on co-occurrences, which are often rare in short survey responses. The aim of this study was to apply and evaluate topic models on demographic survey data. In this study, we applied neural topic models (e.g. BERTopic, CombinedTM) based on language models to responses from Dutch women on their fertility plans, and compared the topics and their coherence scores from each model to expert judgments. Our results show that neural models produce topics more in line with human interpretation compared to LDA. However, the coherence score could only partly reflect on this, depending on the corpus used for calculation. This research is important because, first, it helps us develop more informed strategies on model selection and evaluation for topic modeling on survey data; and second, it shows that the field of demography has much to gain from adopting NLP methods.</abstract>
@@ -181,7 +181,7 @@
       <author><first>Nikita</first><last>Soni</last><affiliation>Stony Brook University</affiliation></author>
       <author><first>Weixi</first><last>Wang</last><affiliation>Stony Brook University</affiliation></author>
       <author><first>Christian</first><last>Luhmann</last><affiliation>Stony Brook University</affiliation></author>
-      <author><first>H. Andrew</first><last>Schwartz</last><affiliation>Stony Brook University</affiliation></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last><affiliation>Stony Brook University</affiliation></author>
       <author><first>Naoya</first><last>Inoue</last><affiliation>Japan Advanced Institute of Science and Technology</affiliation></author>
       <pages>151-156</pages>
       <abstract>We address dissonant stance detection, classifying conflicting stance between two input statements. Computational models for traditional stance detection have typically been trained to indicate pro/con for a given target topic (e.g. gun control) and thus do not generalize well to new topics. In this paper, we systematically evaluate the generalizability of dissonant stance detection to situations where examples of the topic have not been seen at all or have only been seen a few times. We show that dissonant stance detection models trained on only 8 topics, none of which are the target topic, can perform as well as those trained only on a target topic. Further, adding non-target topics boosts performance further up to approximately 32 topics where accuracies start to plateau. Taken together, our experiments suggest dissonant stance detection models can generalize to new unanticipated topics, an important attribute for the social scientific study of social media where new topics emerge daily.</abstract>
diff --git a/data/xml/2022.nlperspectives.xml b/data/xml/2022.nlperspectives.xml
index 3b7640d030..1c5f656248 100644
--- a/data/xml/2022.nlperspectives.xml
+++ b/data/xml/2022.nlperspectives.xml
@@ -21,9 +21,9 @@
     </frontmatter>
     <paper id="1">
       <title>Disagreement Space in Argument Analysis</title>
-      <author><first>Annette</first><last>Hautli-Janisz</last></author>
+      <author id="annette-hautli"><first>Annette</first><last>Hautli-Janisz</last></author>
       <author><first>Ella</first><last>Schad</last></author>
-      <author><first>Chris</first><last>Reed</last></author>
+      <author id="chris-reed"><first>Chris</first><last>Reed</last></author>
       <pages>1–9</pages>
       <abstract>For a highly subjective task such as recognising speaker intention and argumentation, the traditional way of generating gold standards is to aggregate a number of labels into a single one. However, this seriously neglects the underlying richness that characterises discourse and argumentation and is also, in some cases, straightforwardly impossible. In this paper, we present QT30nonaggr, the first corpus of non-aggregated argument annotation, which will be openly available upon publication. QT30nonaggr encompasses 10% of QT30, the largest corpus of dialogical argumentation and analysed broadcast political debate currently available with 30 episodes of BBC’s ‘Question Time’ from 2020 and 2021. Based on a systematic and detailed investigation of annotation judgements across all steps of the annotation process, we structure the disagreement space with a taxonomy of the types of label disagreements in argument annotation, identifying the categories of annotation errors, fuzziness and ambiguity.</abstract>
       <url hash="b44050a5">2022.nlperspectives-1.1</url>
@@ -36,7 +36,7 @@
       <author><first>Ashkan</first><last>Kazemi</last></author>
       <author><first>Naihao</first><last>Deng</last></author>
       <author><first>Steven</first><last>Wilson</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>10–19</pages>
       <abstract>Recent studies have shown that for subjective annotation tasks, the demographics, lived experiences, and identity of annotators can have a large impact on how items are labeled. We expand on this work, hypothesizing that gender may correlate with differences in annotations for a number of NLP benchmarks, including those that are fairly subjective (e.g., affect in text) and those that are typically considered to be objective (e.g., natural language inference). We develop a robust framework to test for differences in annotation across genders for four benchmark datasets. While our results largely show a lack of statistically significant differences in annotation by males and females for these tasks, the framework can be used to analyze differences in annotation between various other demographic groups in future work. Finally, we note that most datasets are collected without annotator demographics and released only in aggregate form; we call on the community to consider annotator demographics as data is collected, and to release dis-aggregated data to allow for further work analyzing variability among annotators.</abstract>
       <url hash="7c40bef9">2022.nlperspectives-1.2</url>
@@ -68,7 +68,7 @@
     <paper id="5">
       <title>The Viability of Best-worst Scaling and Categorical Data Label Annotation Tasks in Detecting Implicit Bias</title>
       <author><first>Parker</first><last>Glenn</last></author>
-      <author><first>Cassandra L.</first><last>Jacobs</last></author>
+      <author id="cassandra-l-jacobs"><first>Cassandra L.</first><last>Jacobs</last></author>
       <author><first>Marvin</first><last>Thielk</last></author>
       <author><first>Yi</first><last>Chu</last></author>
       <pages>32–36</pages>
@@ -107,7 +107,7 @@
       <author><first>Christopher</first><last>Homan</last></author>
       <author><first>Tharindu Cyril</first><last>Weerasooriya</last></author>
       <author><first>Lora</first><last>Aroyo</last></author>
-      <author><first>Chris</first><last>Welty</last></author>
+      <author id="chris-welty"><first>Chris</first><last>Welty</last></author>
       <pages>56–65</pages>
       <abstract>Annotator disagreement is often dismissed as noise or the result of poor annotation process quality. Others have argued that it can be meaningful. But lacking a rigorous statistical foundation, the analysis of disagreement patterns can resemble a high-tech form of tea-leaf-reading. We contribute a framework for analyzing the variation of per-item annotator response distributions to data for humans-in-the-loop machine learning. We provide visualizations for, and use the framework to analyze the variance in, a crowdsourced dataset of hard-to-classify examples from the OpenImages archive.</abstract>
       <url hash="c66bf947">2022.nlperspectives-1.8</url>
@@ -118,7 +118,7 @@
       <author><first>Sofie</first><last>Labat</last></author>
       <author><first>Naomi</first><last>Ackaert</last></author>
       <author><first>Thomas</first><last>Demeester</last></author>
-      <author><first>Veronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Veronique</first><last>Hoste</last></author>
       <pages>66–72</pages>
       <abstract>This pilot study employs the Wizard of Oz technique to collect a corpus of written human-computer conversations in the domain of customer service. The resulting dataset contains 192 conversations and is used to test three hypotheses related to the expression and annotation of emotions. First, we hypothesize that there is a discrepancy between the emotion annotations of the participant (the experiencer) and the annotations of our external annotator (the observer). Furthermore, we hypothesize that the personality of the participants has an influence on the emotions they expressed, and on the way they evaluated (annotated) these emotions. We found that for an external, trained annotator, not all emotion labels were equally easy to work with. We also noticed that the trained annotator had a tendency to opt for emotion labels that were more centered in the valence-arousal space, while participants made more ‘extreme’ annotations. For the second hypothesis, we discovered a positive correlation between the personality trait extraversion and the emotion dimensions valence and dominance in our sample. Finally, for the third premise, we observed a positive correlation between the internal-external agreement on emotion labels and the personality traits conscientiousness and extraversion. Our insights and findings will be used in future research to conduct a larger Wizard of Oz experiment.</abstract>
       <url hash="19e3f60a">2022.nlperspectives-1.9</url>
@@ -129,7 +129,7 @@
       <author><first>Lucy</first><last>Havens</last></author>
       <author><first>Benjamin</first><last>Bach</last></author>
       <author><first>Melissa</first><last>Terras</last></author>
-      <author><first>Beatrice</first><last>Alex</last></author>
+      <author id="beatrice-alex"><first>Beatrice</first><last>Alex</last></author>
       <pages>73–82</pages>
       <abstract>This paper presents an overview of text visualization techniques relevant for data perspectivism, aiming to facilitate analysis of annotated datasets for the datasets’ creators and stakeholders. Data perspectivism advocates for publishing non-aggregated, annotated text data, recognizing that for highly subjective tasks, such as bias detection and hate speech detection, disagreements among annotators may indicate conflicting yet equally valid interpretations of a text. While the publication of non-aggregated, annotated data makes different interpretations of text corpora available, barriers still exist to investigating patterns and outliers in annotations of the text. Techniques from text visualization can overcome these barriers, facilitating intuitive data analysis for NLP researchers and practitioners, as well as stakeholders in NLP systems, who may not have data science or computing skills. In this paper we discuss challenges with current dataset creation practices and annotation platforms, followed by a discussion of text visualization techniques that enable open-ended, multi-faceted, and iterative analysis of annotated data.</abstract>
       <url hash="c1563fbf">2022.nlperspectives-1.10</url>
@@ -177,7 +177,7 @@
       <author><first>Tiago</first><last>Timponi Torrent</last></author>
       <author><first>Oliver</first><last>Czulo</last></author>
       <author><first>Arthur</first><last>Lorenzi</last></author>
-      <author><first>Ely</first><last>Matos</last></author>
+      <author id="ely-edison-da-silva-matos"><first>Ely</first><last>Matos</last></author>
       <author><first>Frederico</first><last>Belcavello</last></author>
       <pages>108–116</pages>
       <abstract>This paper argues in favor of the adoption of annotation practices for multimodal datasets that recognize and represent the inherently perspectivized nature of multimodal communication. To support our claim, we present a set of annotation experiments in which FrameNet annotation is applied to the Multi30k and the Flickr 30k Entities datasets. We assess the cosine similarity between the semantic representations derived from the annotation of both pictures and captions for frames. Our findings indicate that: (i) frame semantic similarity between captions of the same picture produced in different languages is sensitive to whether the caption is a translation of another caption or not, and (ii) picture annotation for semantic frames is sensitive to whether the image is annotated in presence of a caption or not.</abstract>
@@ -188,7 +188,7 @@
       <title>Change My Mind: How Syntax-based Hate Speech Recognizer Can Uncover Hidden Motivations Based on Different Viewpoints</title>
       <author><first>Michele</first><last>Mastromattei</last></author>
       <author><first>Valerio</first><last>Basile</last></author>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last></author>
       <pages>117–125</pages>
       <abstract>Hate speech recognizers may mislabel sentences by not considering the different opinions that society has on selected topics. In this paper, we show how explainable machine learning models based on syntax can help to understand the motivations that induce a sentence to be offensive to a certain demographic group. By comparing and contrasting the results, we show the key points that make a sentence labeled as hate speech and how this varies across different ethnic groups.</abstract>
       <url hash="540eba4f">2022.nlperspectives-1.15</url>
diff --git a/data/xml/2022.nlppower.xml b/data/xml/2022.nlppower.xml
index 7340667054..b25b83e5c6 100644
--- a/data/xml/2022.nlppower.xml
+++ b/data/xml/2022.nlppower.xml
@@ -75,7 +75,7 @@
       <author><first>Matthias</first><last>Lindemann</last></author>
       <author><first>Danyang</first><last>Liu</last></author>
       <author><first>Wanqiu</first><last>Long</last></author>
-      <author><first>Bonnie L.</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie L.</first><last>Webber</last></author>
       <pages>42-51</pages>
       <abstract>Recent improvements in automatic news summarization fundamentally rely on large corpora of news articles and their summaries. These corpora are often constructed by scraping news websites, which results in including not only summaries but also other kinds of texts. Apart from more generic noise, we identify straplines as a form of text scraped from news websites that commonly turn out not to be summaries. The presence of these non-summaries threatens the validity of scraped corpora as benchmarks for news summarization. We have annotated extracts from two news sources that form part of the Newsroom corpus (Grusky et al., 2018), labeling those which were straplines, those which were summaries, and those which were both. We present a rule-based strapline detection method that achieves good performance on a manually annotated test set. Automatic evaluation indicates that removing straplines and noise from the training data of a news summarizer results in higher quality summaries, with improvements as high as 7 points ROUGE score.</abstract>
       <url hash="692b2c17">2022.nlppower-1.5</url>
@@ -100,7 +100,7 @@
     <paper id="7">
       <title>Beyond Static models and test sets: Benchmarking the potential of pre-trained models across tasks and languages</title>
       <author><first>Kabir</first><last>Ahuja</last></author>
-      <author><first>Sandipan</first><last>Dandapat</last></author>
+      <author id="sandipan-dandapat"><first>Sandipan</first><last>Dandapat</last></author>
       <author><first>Sunayana</first><last>Sitaram</last></author>
       <author><first>Monojit</first><last>Choudhury</last></author>
       <pages>64-74</pages>
diff --git a/data/xml/2022.osact.xml b/data/xml/2022.osact.xml
index bb3bd68e9f..7ad65f52c9 100644
--- a/data/xml/2022.osact.xml
+++ b/data/xml/2022.osact.xml
@@ -75,7 +75,7 @@
     <paper id="6">
       <title>A Context-free <fixed-case>A</fixed-case>rabic Emoji Sentiment Lexicon (<fixed-case>CF</fixed-case>-<fixed-case>A</fixed-case>rab-<fixed-case>ESL</fixed-case>)</title>
       <author><first>Shatha Ali A.</first><last>Hakami</last></author>
-      <author><first>Robert</first><last>Hendley</last></author>
+      <author id="robert-j-hendley"><first>Robert</first><last>Hendley</last></author>
       <author><first>Phillip</first><last>Smith</last></author>
       <pages>51–59</pages>
       <abstract>Emoji can be valuable features in textual sentiment analysis. One of the key elements of the use of emoji in sentiment analysis is the emoji sentiment lexicon. However, constructing such a lexicon is a challenging task. This is because interpreting the sentiment conveyed by these pictographic symbols is highly subjective, and differs depending upon how each person perceives them. Cultural background is considered to be one of the main factors that affects emoji sentiment interpretation. Thus, we focus in this work on targeting people from Arab cultures. This is done by constructing a context-free Arabic emoji sentiment lexicon annotated by native Arabic speakers from seven different regions (Gulf, Egypt, Levant, Sudan, North Africa, Iraq, and Yemen) to see how these Arabic users label the sentiment of these symbols without a textual context. We recruited 53 annotators (males and females) to annotate 1,069 unique emoji. Then we evaluated the reliability of the annotation for each participant by applying sensitivity (Recall) and consistency (Krippendorff’s Alpha) tests. For the analysis, we investigated the resulting emoji sentiment annotations to explore the impact of the Arabic cultural context. We analyzed this cultural reflection from different perspectives, including national affiliation, use of colour indications, animal indications, weather indications and religious impact.</abstract>
@@ -99,7 +99,7 @@
     <paper id="8">
       <title>Classifying <fixed-case>A</fixed-case>rabic Crisis Tweets using Data Selection and Pre-trained Language Models</title>
       <author><first>Alaa</first><last>Alharbi</last></author>
-      <author><first>Mark</first><last>Lee</last></author>
+      <author id="mark-lee"><first>Mark</first><last>Lee</last></author>
       <pages>71–78</pages>
       <abstract>User-generated Social Media (SM) content has been explored as a valuable and accessible source of data about crises to enhance situational awareness and support humanitarian response efforts. However, the timely extraction of crisis-related SM messages is challenging as it involves processing large quantities of noisy data in real-time. Supervised machine learning methods have been successfully applied to this task but such approaches require human-labelled data, which are unlikely to be available from novel and emerging crises. Supervised machine learning algorithms trained on labelled data from past events did not usually perform well when classifying a new disaster due to data variations across events. Using the BERT embeddings, we propose and investigate an instance distance-based data selection approach for adaptation to improve classifiers’ performance under a domain shift. The K-nearest neighbours algorithm selects a subset of multi-event training data that is most similar to the target event. Results show that fine-tuning a BERT model on a selected subset of data to classify crisis tweets outperforms a model that has been fine-tuned on all available source data. We demonstrated that our approach generally works better than the self-training adaptation method. Combing the self-training with our proposed classifier does not enhance the performance.</abstract>
       <url hash="600bd715">2022.osact-1.8</url>
@@ -120,7 +120,7 @@
       <author><first>Damith</first><last>Premasiri</last></author>
       <author><first>Tharindu</first><last>Ranasinghe</last></author>
       <author><first>Wajdi</first><last>Zaghouani</last></author>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <pages>88–95</pages>
       <abstract>The task of machine reading comprehension (MRC) is a useful benchmark to evaluate the natural language understanding of machines. It has gained popularity in the natural language processing (NLP) field mainly due to the large number of datasets released for many languages. However, the research in MRC has been understudied in several domains, including religious texts. The goal of the Qur’an QA 2022 shared task is to fill this gap by producing state-of-the-art question answering and reading comprehension research on Qur’an. This paper describes the DTW entry to the Quran QA 2022 shared task. Our methodology uses transfer learning to take advantage of available Arabic MRC data. We further improve the results using various ensemble learning strategies. Our approach provided a partial Reciprocal Rank (pRR) score of 0.49 on the test set, proving its strong performance on the task.</abstract>
       <url hash="0cf1458e">2022.osact-1.10</url>
@@ -165,7 +165,7 @@
       <author><first>Sarah</first><last>Alnefaie</last></author>
       <author><first>Sanaa</first><last>Alowaidi</last></author>
       <author><first>Alaa</first><last>Alsaqer</last></author>
-      <author><first>Eric</first><last>Atwell</last></author>
+      <author id="eric-atwell"><first>Eric</first><last>Atwell</last></author>
       <author><first>Abdulrahman</first><last>Altahhan</last></author>
       <author><first>Mohammad</first><last>Alsalka</last></author>
       <pages>120–125</pages>
@@ -265,7 +265,7 @@
     <paper id="24">
       <title>Meta <fixed-case>AI</fixed-case> at <fixed-case>A</fixed-case>rabic Hate Speech 2022: <fixed-case>M</fixed-case>ulti<fixed-case>T</fixed-case>ask Learning with Self-Correction for Hate Speech Classification</title>
       <author><first>Badr</first><last>AlKhamissi</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>186–193</pages>
       <abstract>In this paper, we tackle the Arabic Fine-Grained Hate Speech Detection shared task and demonstrate significant improvements over reported baselines for its three subtasks. The tasks are to predict if a tweet contains (1) Offensive language; and whether it is considered (2) Hate Speech or not and if so, then predict the (3) Fine-Grained Hate Speech label from one of six categories. Our final solution is an ensemble of models that employs multitask learning and a self-consistency correction method yielding 82.7% on the hate speech subtask—reflecting a 3.4% relative improvement compared to previous work.</abstract>
       <url hash="6abdb939">2022.osact-1.24</url>
diff --git a/data/xml/2022.paclic.xml b/data/xml/2022.paclic.xml
index f36c283bb3..fa59199149 100644
--- a/data/xml/2022.paclic.xml
+++ b/data/xml/2022.paclic.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2023-04-23" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 36th Pacific Asia Conference on Language, Information and Computation</booktitle>
-      <editor><first>Shirley</first><last>Dita</last></editor>
+      <editor id="shirley-dita"><first>Shirley</first><last>Dita</last></editor>
       <editor><first>Arlene</first><last>Trillanes</last></editor>
       <editor><first>Rochelle Irene</first><last>Lucas</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -23,7 +23,7 @@
       <author><first>Phu-Thinh</first><last>Pham</last></author>
       <author><first>Duc</first><last>Do</last></author>
       <author><first>An-Vinh</first><last>Luong</last></author>
-      <author><first>Dien</first><last>Dinh</last></author>
+      <author id="dinh-dien"><first>Dien</first><last>Dinh</last></author>
       <pages>1–9</pages>
       <url hash="8a43e844">2022.paclic-1.1</url>
       <bibkey>vu-tran-etal-2022-integrating</bibkey>
@@ -53,7 +53,7 @@
     </paper>
     <paper id="5">
       <title>The Information Packaging of the Do-Constructions in <fixed-case>C</fixed-case>hinese, <fixed-case>R</fixed-case>ussian, and <fixed-case>C</fixed-case>zech</title>
-      <author><first>Kawai</first><last>Chui</last></author>
+      <author id="ka-wai-chui"><first>Kawai</first><last>Chui</last></author>
       <author><first>Hsiang-Lin</first><last>Yeh</last></author>
       <author><first>Shih-Hui</first><last>Lin</last></author>
       <pages>35–44</pages>
@@ -127,7 +127,7 @@
     <paper id="13">
       <title>Improving Automatic Evaluation of Acceptability Based on Language Models with a Coarse Sentence Representation</title>
       <author><first>Vijay</first><last>Daultani</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <pages>109–118</pages>
       <url hash="5ed74006">2022.paclic-1.13</url>
       <bibkey>daultani-okazaki-2022-improving</bibkey>
@@ -184,7 +184,7 @@
       <author><first>Dushyant Singh</first><last>Chauhan</last></author>
       <author><first>Mauajama</first><last>Firdaus</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>166–174</pages>
       <url hash="010bf465">2022.paclic-1.19</url>
       <bibkey>singh-etal-2022-emoji</bibkey>
@@ -384,7 +384,7 @@
     <paper id="43">
       <title>A comparison of the validity of measurement methods of the general <fixed-case>E</fixed-case>nglish proficiency by dictation and read-aloud performance</title>
       <author><first>Katsunori</first><last>Kotani</last></author>
-      <author><first>Takehiko</first><last>Yoshimi</last></author>
+      <author id="takehiko-yoshimi"><first>Takehiko</first><last>Yoshimi</last></author>
       <pages>388–395</pages>
       <url hash="add4ce7f">2022.paclic-1.43</url>
       <bibkey>kotani-yoshimi-2022-comparison</bibkey>
@@ -426,7 +426,7 @@
     <paper id="48">
       <title>A Model-Theoretic Formalization of Natural Language Inference Using Neural Network and Tableau Method</title>
       <author><first>Ayahito</first><last>Saji</last></author>
-      <author><first>Yoshihide</first><last>Kato</last></author>
+      <author id="yoshihide-kato"><first>Yoshihide</first><last>Kato</last></author>
       <author><first>Shigeki</first><last>Matsubara</last></author>
       <pages>430–437</pages>
       <url hash="f107893c">2022.paclic-1.48</url>
@@ -523,7 +523,7 @@
       <author><first>Tanik</first><last>Saikh</last></author>
       <author><first>Saprativa</first><last>Bhattacharjee</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>525–532</pages>
       <url hash="0aede248">2022.paclic-1.58</url>
       <bibkey>ghosal-etal-2022-novelty-detection</bibkey>
@@ -697,7 +697,7 @@
       <author><first>Kartik</first><last>Shinde</last></author>
       <author><first>Tirthankar</first><last>Ghosal</last></author>
       <author><first>Muskaan</first><last>Singh</last></author>
-      <author><first>Ondrej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondrej</first><last>Bojar</last></author>
       <pages>691–702</pages>
       <url hash="da30df55">2022.paclic-1.76</url>
       <bibkey>shinde-etal-2022-automatic</bibkey>
@@ -783,7 +783,7 @@
       <author><first>Duy</first><last>Vu-Tran</last></author>
       <author><first>Duc</first><last>Do</last></author>
       <author><first>An-Vinh</first><last>Luong</last></author>
-      <author><first>Dien</first><last>Dinh</last></author>
+      <author id="dinh-dien"><first>Dien</first><last>Dinh</last></author>
       <pages>777–782</pages>
       <url hash="ce254279">2022.paclic-1.85</url>
       <bibkey>pham-etal-2022-intent</bibkey>
@@ -792,7 +792,7 @@
       <title>Annotating Entity and Causal Relationships on <fixed-case>J</fixed-case>apanese Vehicle Recall Information</title>
       <author><first>Hsuan-Yu</first><last>Kuo</last></author>
       <author><first>Youmi</first><last>Ma</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <pages>783–791</pages>
       <url hash="a2ff57a3">2022.paclic-1.86</url>
       <bibkey>kuo-etal-2022-annotating</bibkey>
@@ -802,7 +802,7 @@
       <author><first>Santosh Kumar</first><last>Mishra</last></author>
       <author><first>Sushant</first><last>Sinha</last></author>
       <author><first>Sriparna</first><last>Saha</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>792–800</pages>
       <url hash="8bf136b8">2022.paclic-1.87</url>
       <bibkey>mishra-etal-2022-deep</bibkey>
@@ -844,8 +844,8 @@
       <title>Bi-directional Cross-Attention Network on <fixed-case>V</fixed-case>ietnamese Visual Question Answering</title>
       <author><first>Duy-Minh</first><last>Nguyen-Tran</last></author>
       <author><first>Tung</first><last>Le</last></author>
-      <author><first>Minh Le</first><last>Nguyen</last></author>
-      <author><first>Huy Tien</first><last>Nguyen</last></author>
+      <author id="minh-le-nguyen"><first>Minh Le</first><last>Nguyen</last></author>
+      <author id="huy-tien-nguyen"><first>Huy Tien</first><last>Nguyen</last></author>
       <pages>834–841</pages>
       <url hash="5978779f">2022.paclic-1.92</url>
       <bibkey>nguyen-tran-etal-2022-bi</bibkey>
diff --git a/data/xml/2022.pandl.xml b/data/xml/2022.pandl.xml
index 009b0e9759..28d6a1e077 100644
--- a/data/xml/2022.pandl.xml
+++ b/data/xml/2022.pandl.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the First Workshop on Pattern-based Approaches to NLP in the Age of Deep Learning</booktitle>
       <editor><first>Laura</first><last>Chiticariu</last></editor>
       <editor><first>Yoav</first><last>Goldberg</last></editor>
-      <editor><first>Gus</first><last>Hahn-Powell</last></editor>
+      <editor id="gus-hahn-powell"><first>Gus</first><last>Hahn-Powell</last></editor>
       <editor><first>Clayton T.</first><last>Morrison</last></editor>
       <editor><first>Aakanksha</first><last>Naik</last></editor>
       <editor><first>Rebecca</first><last>Sharp</last></editor>
@@ -51,7 +51,7 @@
       <author><first>Subhasish</first><last>Ghosh</last></author>
       <author><first>Arpita</first><last>Kundu</last></author>
       <author><first>Pratik</first><last>Saini</last></author>
-      <author><first>Tapas</first><last>Nayak</last></author>
+      <author id="tapas-nayak"><first>Tapas</first><last>Nayak</last></author>
       <pages>21–28</pages>
       <abstract>We explore the task of generating long-form technical questions from textbooks. Semi-structured metadata of a textbook — the table of contents and the index — provide rich cues for technical question generation. Existing literature for long-form question generation focuses mostly on reading comprehension assessment, and does not use semi-structured metadata for question generation. We design unsupervised template based algorithms for generating questions based on structural and contextual patterns in the index and ToC. We evaluate our approach on textbooks on diverse subjects and show that our approach generates high quality questions of diverse types. We show that, in comparison, zero-shot question generation using pre-trained LLMs on the same meta-data has much poorer quality.</abstract>
       <url hash="24dfe152">2022.pandl-1.3</url>
@@ -63,7 +63,7 @@
       <author><first>Onyu</first><last>Park</last></author>
       <author><first>Changhoe</first><last>Hwang</last></author>
       <author><first>Gwanghoon</first><last>Yoo</last></author>
-      <author><first>Eric</first><last>Laporte</last></author>
+      <author id="eric-laporte"><first>Eric</first><last>Laporte</last></author>
       <author><first>Jeesun</first><last>Nam</last></author>
       <pages>29–37</pages>
       <abstract>Natural language understanding (NLU) is integral to task-oriented dialog systems, but demands a considerable amount of annotated training data to increase the coverage of diverse utterances. In this study, we report the construction of a linguistic resource named FIAD (Financial Annotated Dataset) and its use to generate a Korean annotated training data for NLU in the banking customer service (CS) domain. By an empirical examination of a corpus of banking app reviews, we identified three linguistic patterns occurring in Korean request utterances: TOPIC (ENTITY, FEATURE), EVENT, and DISCOURSE MARKER. We represented them in LGGs (Local Grammar Graphs) to generate annotated data covering diverse intents and entities. To assess the practicality of the resource, we evaluate the performances of DIET-only (Intent: 0.91 /Topic [entity+feature]: 0.83), DIET+ HANBERT (I:0.94/T:0.85), DIET+ KoBERT (I:0.94/T:0.86), and DIET+ KorBERT (I:0.95/T:0.84) models trained on FIAD-generated data to extract various types of semantic items.</abstract>
@@ -76,7 +76,7 @@
       <author><first>Shinwoo</first><last>Kim</last></author>
       <author><first>Changhoe</first><last>Hwang</last></author>
       <author><first>Gwanghoon</first><last>Yoo</last></author>
-      <author><first>Eric</first><last>Laporte</last></author>
+      <author id="eric-laporte"><first>Eric</first><last>Laporte</last></author>
       <author><first>Jeesun</first><last>Nam</last></author>
       <pages>38–44</pages>
       <abstract>We report the construction of a Korean evaluation-annotated corpus, hereafter called ‘Evaluation Annotated Dataset (EVAD)’, and its use in Aspect-Based Sentiment Analysis (ABSA) extended in order to cover e-commerce reviews containing sentiment and non-sentiment linguistic patterns. The annotation process uses Semi-Automatic Symbolic Propagation (SSP). We built extensive linguistic resources formalized as a Finite-State Transducer (FST) to annotate corpora with detailed ABSA components in the fashion e-commerce domain. The ABSA approach is extended, in order to analyze user opinions more accurately and extract more detailed features of targets, by including aspect values in addition to topics and aspects, and by classifying aspect-value pairs depending whether values are unary, binary, or multiple. For evaluation, the KoBERT and KcBERT models are trained on the annotated dataset, showing robust performances of F1 0.88 and F1 0.90, respectively, on recognition of aspect-value pairs.</abstract>
@@ -85,7 +85,7 @@
     </paper>
     <paper id="6">
       <title>Accelerating Human Authorship of Information Extraction Rules</title>
-      <author><first>Dayne</first><last>Freitag</last></author>
+      <author id="dayne-freitag"><first>Dayne</first><last>Freitag</last></author>
       <author><first>John</first><last>Cadigan</last></author>
       <author><first>John</first><last>Niekrasz</last></author>
       <author><first>Robert</first><last>Sasseen</last></author>
@@ -131,7 +131,7 @@
       <author><first>Enrique</first><last>Noriega-Atala</last></author>
       <author><first>Robert</first><last>Vacareanu</last></author>
       <author><first>Gus</first><last>Hahn-Powell</last></author>
-      <author><first>Marco A.</first><last>Valenzuela-Escárcega</last></author>
+      <author id="marco-a-valenzuela-escarcega"><first>Marco A.</first><last>Valenzuela-Escárcega</last></author>
       <pages>85–93</pages>
       <abstract>We propose a neural-based approach for rule synthesis designed to help bridge the gap between the interpretability, precision and maintainability exhibited by rule-based information extraction systems with the scalability and convenience of statistical information extraction systems. This is achieved by avoiding placing the burden of learning another specialized language on domain experts and instead asking them to provide a small set of examples in the form of highlighted spans of text. We introduce a transformer-based architecture that drives a rule synthesis system that leverages a self-supervised approach for pre-training a large-scale language model complemented by an analysis of different loss functions and aggregation mechanisms for variable length sequences of user-annotated spans of text. The results are encouraging and point to different desirable properties, such as speed and quality, depending on the choice of loss and aggregation method.</abstract>
       <url hash="48b65a94">2022.pandl-1.10</url>
diff --git a/data/xml/2022.parlaclarin.xml b/data/xml/2022.parlaclarin.xml
index f84768f337..f8f328057f 100644
--- a/data/xml/2022.parlaclarin.xml
+++ b/data/xml/2022.parlaclarin.xml
@@ -22,7 +22,7 @@
       <title><fixed-case>P</fixed-case>arla<fixed-case>M</fixed-case>int <fixed-case>II</fixed-case>: The Show Must Go On</title>
       <author><first>Maciej</first><last>Ogrodniczuk</last></author>
       <author><first>Petya</first><last>Osenova</last></author>
-      <author><first>Tomaž</first><last>Erjavec</last></author>
+      <author id="tomaz-erjavec"><first>Tomaž</first><last>Erjavec</last></author>
       <author><first>Darja</first><last>Fišer</last></author>
       <author><first>Nikola</first><last>Ljubešić</last></author>
       <author><first>Çağrı</first><last>Çöltekin</last></author>
@@ -149,7 +149,7 @@
       <title><fixed-case>F</fixed-case>rame<fixed-case>AS</fixed-case>t: A Framework for Second-level Agenda Setting in Parliamentary Debates through the Lense of Comparative Agenda Topics</title>
       <author><first>Christopher</first><last>Klamm</last></author>
       <author><first>Ines</first><last>Rehbein</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <pages>92–100</pages>
       <abstract>This paper presents a framework for studying second-level political agenda setting in parliamentary debates, based on the selection of policy topics used by political actors to discuss a specific issue on the parliamentary agenda. For example, the COVID-19 pandemic as an agenda item can be contextualised as a health issue or as a civil rights issue, as a matter of macroeconomics or can be discussed in the context of social welfare. Our framework allows us to observe differences regarding how different parties discuss the same agenda item by emphasizing different topical aspects of the item. We apply and evaluate our framework on data from the German Bundestag and discuss the merits and limitations of our approach. In addition, we present a new annotated data set of parliamentary debates, following the coding schema of policy topics developed in the Comparative Agendas Project (CAP), and release models for topic classification in parliamentary debates.</abstract>
       <url hash="503285df">2022.parlaclarin-1.13</url>
@@ -188,7 +188,7 @@
       <author><first>Tommaso</first><last>Agnoloni</last></author>
       <author><first>Roberto</first><last>Bartolini</last></author>
       <author><first>Francesca</first><last>Frontini</last></author>
-      <author><first>Simonetta</first><last>Montemagni</last></author>
+      <author id="simonetta-montemagni"><first>Simonetta</first><last>Montemagni</last></author>
       <author><first>Carlo</first><last>Marchetti</last></author>
       <author><first>Valeria</first><last>Quochi</last></author>
       <author><first>Manuela</first><last>Ruisi</last></author>
@@ -202,7 +202,7 @@
       <title><fixed-case>P</fixed-case>arlament<fixed-case>P</fixed-case>arla: A Speech Corpus of <fixed-case>C</fixed-case>atalan Parliamentary Sessions</title>
       <author><first>Baybars</first><last>Kulebi</last></author>
       <author><first>Carme</first><last>Armentano-Oller</last></author>
-      <author><first>Carlos</first><last>Rodriguez-Penagos</last></author>
+      <author id="carlos-rodriguez-penagos"><first>Carlos</first><last>Rodriguez-Penagos</last></author>
       <author><first>Marta</first><last>Villegas</last></author>
       <pages>125–130</pages>
       <abstract>Recently, various end-to-end architectures of Automatic Speech Recognition (ASR) are being showcased as an important step towards providing language technologies to all languages instead of a select few such as English. However many languages are still suffering due to the “digital gap,” lacking thousands of hours of transcribed speech data openly accessible that is necessary to train modern ASR architectures. Although Catalan already has access to various open speech corpora, these corpora lack diversity and are limited in total volume. In order to address this lack of resources for Catalan language, in this work we present ParlamentParla, a corpus of more than 600 hours of speech from Catalan Parliament sessions. This corpus has already been used in training of state-of-the-art ASR systems, and proof-of-concept text-to-speech (TTS) models. In this work we explain in detail the pipeline that allows the information publicly available on the parliamentary website to be converted to a speech corpus compatible with training of ASR and possibly TTS models.</abstract>
diff --git a/data/xml/2022.politicalnlp.xml b/data/xml/2022.politicalnlp.xml
index edd48e23a3..9d4c5f163d 100644
--- a/data/xml/2022.politicalnlp.xml
+++ b/data/xml/2022.politicalnlp.xml
@@ -32,7 +32,7 @@
     <paper id="2">
       <title>Correlating Political Party Names in Tweets, Newspapers and Election Results</title>
       <author><first>Eric</first><last>Sanders</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <pages>8–15</pages>
       <abstract>Twitter has been used as a textual resource to attempt to predict the outcome of elections for over a decade. A body of literature suggests that this is not consistently possible. In this paper we test the hypothesis that mentions of political parties in tweets are better correlated with the appearance of party names in newspapers than to the intention of the tweeter to vote for that party. Five Dutch national elections are used in this study. We find only a small positive, negligible difference in Pearson’s correlation coefficient as well as in the absolute error of the relation between tweets and news, and between tweets and elections. However, we find a larger correlation and a smaller absolute error between party mentions in newspapers and the outcome of the elections in four of the five elections. This suggests that newspapers are a better starting point for predicting the election outcome than tweets.</abstract>
       <url hash="493ba6f8">2022.politicalnlp-1.2</url>
@@ -41,7 +41,7 @@
     <paper id="3">
       <title>Debating <fixed-case>E</fixed-case>urope: A Multilingual Multi-Target Stance Classification Dataset of Online Debates</title>
       <author><first>Valentin</first><last>Barriere</last></author>
-      <author><first>Alexandra</first><last>Balahur</last></author>
+      <author id="alexandra-balahur"><first>Alexandra</first><last>Balahur</last></author>
       <author><first>Brian</first><last>Ravenet</last></author>
       <pages>16–21</pages>
       <abstract>We present a new dataset of online debates in English, annotated with stance. The dataset was scraped from the “<i>Debating Europe</i>” platform, where users exchange opinions over different subjects related to the European Union. The dataset is composed of 2600 comments pertaining to 18 debates related to the “<i>European Green Deal</i>”, in a conversational setting. After presenting the dataset and the annotated sub-part, we pre-train a model for a multilingual stance classification over the X-stance dataset before fine-tuning it over our dataset, and vice-versa. The fine-tuned models are shown to improve stance classification performance on each of the datasets, even though they have different languages, topics and targets. Subsequently, we propose to enhance the performances over “<i>Debating Europe</i>” with an interaction-aware model, taking advantage of the online debate structure of the platform. We also propose a semi-supervised self-training method to take advantage of the imbalanced and unlabeled data from the whole website, leading to a final improvement of accuracy by 3.4% over a Vanilla XLM-R model.</abstract>
@@ -55,7 +55,7 @@
       <author><first>Lei</first><last>Guo</last></author>
       <author><first>Margrit</first><last>Betke</last></author>
       <author><first>Prakash</first><last>Ishwar</last></author>
-      <author><first>Derry Tanti</first><last>Wijaya</last></author>
+      <author id="derry-tanti-wijaya"><first>Derry Tanti</first><last>Wijaya</last></author>
       <pages>22–31</pages>
       <abstract>Media framing refers to highlighting certain aspect of an issue in the news to promote a particular interpretation to the audience. Supervised learning has often been used to recognize frames in news articles, requiring a known pool of frames for a particular issue, which must be identified by communication researchers through thorough manual content analysis. In this work, we devise an unsupervised learning approach to discover the frames in news articles automatically. Given a set of news articles for a given issue, e.g., gun violence, our method first extracts frame elements from these articles using related Wikipedia articles and the Wikipedia category system. It then uses a community detection approach to identify frames from these frame elements. We discuss the effectiveness of our approach by comparing the frames it generates in an unsupervised manner to the domain-expert-derived frames for the issue of gun violence, for which a supervised learning model for frame recognition exists.</abstract>
       <url hash="1f46d32f">2022.politicalnlp-1.4</url>
@@ -146,7 +146,7 @@
       <author><first>Joanna</first><last>Szwoch</last></author>
       <author><first>Mateusz</first><last>Staszkow</last></author>
       <author><first>Rafal</first><last>Rzepka</last></author>
-      <author><first>Kenji</first><last>Araki</last></author>
+      <author id="kenji-araki"><first>Kenji</first><last>Araki</last></author>
       <pages>86–90</pages>
       <abstract>In this paper we describe a Polish news corpus as an attempt to create a filtered, organized and representative set of texts coming from contemporary online press articles from two major Polish TV news providers: commercial TVN24 and state-owned TVP Info. The process consists of web scraping, data cleaning and formatting. A random sample was selected from prepared data to perform a classification task. The random forest achieved the best prediction results out of all considered models. We believe that this dataset is a valuable contribution to existing Polish language corpora as online news are considered to be formal and relatively mistake-free, therefore, a reliable source of correct written language, unlike other online platforms such as blogs or social media. Furthermore, to our knowledge, such corpus from this period of time has not been created before. In the future we would like to expand this dataset with articles coming from other online news providers, repeat the classification task on a bigger scale, utilizing other algorithms. Our data analysis outcomes might be a relevant basis to improve research on a political polarization and propaganda techniques in media.</abstract>
       <url hash="0fc0bc3c">2022.politicalnlp-1.12</url>
diff --git a/data/xml/2022.privatenlp.xml b/data/xml/2022.privatenlp.xml
index f32a71744a..ddacd9b6b9 100644
--- a/data/xml/2022.privatenlp.xml
+++ b/data/xml/2022.privatenlp.xml
@@ -36,7 +36,7 @@
       <author><first>Atula</first><last>Tejaswi Neerkaje</last></author>
       <author><first>Ramit</first><last>Sawhney</last></author>
       <author><first>Lucie</first><last>Flek</last></author>
-      <author><first>Anders</first><last>Sogaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Sogaard</last></author>
       <pages>12-12</pages>
       <abstract>The performance cost of differential privacy has, for some applications, been shown to be higher for minority groups fairness, conversely, has been shown to disproportionally compromise the privacy of members of such groups. Most work in this area has been restricted to computer vision and risk assessment. In this paper, we evaluate the impact of differential privacy on fairness across four tasks, focusing on how attempts to mitigate privacy violations and between-group performance differences interact Does privacy inhibit attempts to ensure fairness? To this end, we train epsilon, delta-differentially private models with empirical risk minimization and group distributionally robust training objectives. Consistent with previous findings, we find that differential privacy increases between-group performance differences in the baseline setting but more interestingly, differential privacy reduces between-group performance differences in the robust setting. We explain this by reinterpreting differential privacy as regularization.</abstract>
       <url hash="540319e8">2022.privatenlp-1.2</url>
diff --git a/data/xml/2022.pvlam.xml b/data/xml/2022.pvlam.xml
index 6fd0ca996f..8e8cf4d3ee 100644
--- a/data/xml/2022.pvlam.xml
+++ b/data/xml/2022.pvlam.xml
@@ -53,7 +53,7 @@
     <paper id="4">
       <title>Cognitive States and Types of Nods</title>
       <author><first>Taiga</first><last>Mori</last></author>
-      <author><first>Kristiina</first><last>Jokinen</last></author>
+      <author id="kristiina-jokinen"><first>Kristiina</first><last>Jokinen</last></author>
       <author><first>Yasuharu</first><last>Den</last></author>
       <pages>17–25</pages>
       <abstract>In this paper we will study how different types of nods are related to the cognitive states of the listener. The distinction is made between nods with movement starting upwards (up-nods) and nods with movement starting downwards (down-nods) as well as between single or repetitive nods. The data is from Japanese multiparty conversations, and the results accord with the previous findings indicating that up-nods are related to the change in the listener’s cognitive state after hearing the partner’s contribution, while down-nods convey the meaning that the listener’s cognitive state is not changed.</abstract>
diff --git a/data/xml/2022.rapid.xml b/data/xml/2022.rapid.xml
index 5602a2c4f6..ecdc8e62b7 100644
--- a/data/xml/2022.rapid.xml
+++ b/data/xml/2022.rapid.xml
@@ -7,7 +7,7 @@
       <editor><first>Charalambos K.</first><last>Themistocleous</last></editor>
       <editor><first>Kristina Lundholm</first><last>Fors</last></editor>
       <editor><first>Athanasios</first><last>Tsanas</last></editor>
-      <editor><first>Kathleen C.</first><last>Fraser</last></editor>
+      <editor id="kathleen-c-fraser"><first>Kathleen C.</first><last>Fraser</last></editor>
       <publisher>European Language Resources Association</publisher>
       <address>Marseille, France</address>
       <month>June</month>
@@ -24,7 +24,7 @@
       <author><first>Chiara</first><last>Pesenti</last></author>
       <author><first>Loes</first><last>Van Bemmel</last></author>
       <author><first>Roeland</first><last>van Hout</last></author>
-      <author><first>Helmer</first><last>Strik</last></author>
+      <author id="helmer-strik"><first>Helmer</first><last>Strik</last></author>
       <pages>1–8</pages>
       <abstract>In the current study on dysarthric speech, we investigate the effect of web-based treatment, and whether there is a difference between content and function words. Since the goal of the treatment is to speak louder, without raising pitch, we focus on acoustic-phonetic features related to loudness, intensity, and pitch. We analyse dysarthric read speech from eight speakers at word level. We also investigate whether there are differences between content words and function words, and whether the treatment has a different impact on these two classes of words. Linear Mixed-Effects models show that there are differences before and after treatment, that for some speakers the treatment has the desired effect, but not for all speakers, and that the effect of the treatment on words for the two categories does not seem to be different. To a large extent, our results are in line with the results of a previous study in which the same data were analyzed in a different way, i.e. by studying intelligibility scores.</abstract>
       <url hash="74323495">2022.rapid-1.1</url>
@@ -66,7 +66,7 @@
     <paper id="5">
       <title>Classification of <fixed-case>G</fixed-case>erman Jungian Extraversion and Introversion Texts with Assessment of Changes During the <fixed-case>COVID</fixed-case>-19 Pandemic</title>
       <author><first>Dirk</first><last>Johannßen</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <author><first>David</first><last>Scheffer</last></author>
       <pages>31–40</pages>
       <abstract>The corona pandemic and countermeasures such as social distancing and lockdowns have confronted individuals with new challenges for their mental health and well-being. It can be assumed that the Jungian psychology types of extraverts and introverts react differently to these challenges. We propose a Bi-LSTM model with an attention mechanism for classifying introversion and extraversion from German tweets, which is trained on hand-labeled data created by 335 participants. With this work, we provide this novel dataset for free use and validation. The proposed model achieves solid performance with F1 = .72. Furthermore, we created a feature engineered logistic model tree (LMT) trained on hand-labeled tweets, to which the data is also made available with this work. With this second model, German tweets before and during the pandemic have been investigated. Extraverts display more positive emotions, whilst introverts show more insight and higher rates of anxiety. Even though such a model can not replace proper psychological diagnostics, it can help shed light on linguistic markers and to help understand introversion and extraversion better for a variety of applications and investigations.</abstract>
@@ -110,7 +110,7 @@
       <title>Data Augmentation for the Post-Stroke Speech Transcription (<fixed-case>PSST</fixed-case>) Challenge: Sometimes Less Is More</title>
       <author><first>Jiahong</first><last>Yuan</last></author>
       <author><first>Xingyu</first><last>Cai</last></author>
-      <author><first>Kenneth</first><last>Church</last></author>
+      <author id="kenneth-church"><first>Kenneth</first><last>Church</last></author>
       <pages>71–79</pages>
       <abstract>We employ the method of fine-tuning wav2vec2.0 for recognition of phonemes in aphasic speech. Our effort focuses on data augmentation, by supplementing data from both in-domain and out-of-domain datasets for training. We found that although a modest amount of out-of-domain data may be helpful, the performance of the model degrades significantly when the amount of out-of-domain data is much larger than in-domain data. Our hypothesis is that fine-tuning wav2vec2.0 with a CTC loss not only learns bottom-up acoustic properties but also top-down constraints. Therefore, out-of-domain data augmentation is likely to degrade performance if there is a language model mismatch between “in” and “out” domains. For in-domain audio without ground truth labels, we found that it is beneficial to exclude samples with less confident pseudo labels. Our final model achieves 16.7% PER (phoneme error rate) on the validation set, without using a language model for decoding. The result represents a relative error reduction of 14% over the baseline model trained without data augmentation. Finally, we found that “canonicalized” phonemes are much easier to recognize than manually transcribed phonemes.</abstract>
       <url hash="9db6398d">2022.rapid-1.9</url>
diff --git a/data/xml/2022.readi.xml b/data/xml/2022.readi.xml
index b145cbcf93..0be01e7ff9 100644
--- a/data/xml/2022.readi.xml
+++ b/data/xml/2022.readi.xml
@@ -6,7 +6,7 @@
       <editor><first>Rodrigo</first><last>Wilkens</last></editor>
       <editor><first>David</first><last>Alfter</last></editor>
       <editor><first>Rémi</first><last>Cardon</last></editor>
-      <editor><first>Núria</first><last>Gala</last></editor>
+      <editor id="nuria-gala"><first>Núria</first><last>Gala</last></editor>
       <publisher>European Language Resources Association</publisher>
       <address>Marseille, France</address>
       <month>June</month>
@@ -26,12 +26,12 @@
       <author><first>Hakeem</first><last>Beedar</last></author>
       <author><first>Harald</first><last>Berthelsen</last></author>
       <author><first>Cathy</first><last>Chua</last></author>
-      <author><first>Catia</first><last>Cucchiarini</last></author>
+      <author id="catia-cucchiarini"><first>Catia</first><last>Cucchiarini</last></author>
       <author><first>Brynjarr</first><last>Eyjólfsson</last></author>
       <author><first>Nedelina</first><last>Ivanova</last></author>
       <author><first>Christèle</first><last>Maizonniaux</last></author>
       <author><first>Neasa</first><last>Ní Chiaráin</last></author>
-      <author><first>Manny</first><last>Rayner</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
       <author><first>John</first><last>Sloan</last></author>
       <author><first>Sigurður</first><last>Vigfússon</last></author>
       <author><first>Ghil’ad</first><last>Zuckermann</last></author>
@@ -73,7 +73,7 @@
       <title>The <fixed-case>S</fixed-case>wedish Simplification Toolkit: – Designed with Target Audiences in Mind</title>
       <author><first>Evelina</first><last>Rennes</last></author>
       <author><first>Marina</first><last>Santini</last></author>
-      <author><first>Arne</first><last>Jonsson</last></author>
+      <author id="arne-jonsson"><first>Arne</first><last>Jonsson</last></author>
       <pages>31–38</pages>
       <abstract>In this paper, we present the current version of The Swedish Simplification Toolkit. The toolkit includes computational and empirical tools that have been developed along the years to explore a still neglected area of NLP, namely the simplification of “standard” texts to meet the needs of target audiences. Target audiences, such as people affected by dyslexia, aphasia, autism, but also children and second language learners, require different types of text simplification and adaptation. For example, while individual with aphasia have difficulties in reading compounds (such as arbetsmarknadsdepartement, eng. ministry of employment), second language learners struggle with cultural-specific vocabulary (e.g. konflikträdd, eng. afraid of conflicts). The toolkit allows user to selectively decide the types of simplification that meet the specific needs of the target audience they belong to. The Swedish Simplification Toolkit is one of the first attempts to overcome the one-fits-all approach that is still dominant in Automatic Text Simplification, and proposes a set of computational methods that, used individually or in combination, may help individuals reduce reading (and writing) difficulties.</abstract>
       <url hash="94b73508">2022.readi-1.5</url>
diff --git a/data/xml/2022.repl4nlp.xml b/data/xml/2022.repl4nlp.xml
index cdbc87d361..1aee839f9e 100644
--- a/data/xml/2022.repl4nlp.xml
+++ b/data/xml/2022.repl4nlp.xml
@@ -15,9 +15,9 @@
       <editor><first>Isabelle</first><last>Augenstein</last></editor>
       <editor><first>Anna</first><last>Rogers</last></editor>
       <editor><first>Kyunghyun</first><last>Cho</last></editor>
-      <editor><first>Edward</first><last>Grefenstette</last></editor>
+      <editor id="edward-grefenstette"><first>Edward</first><last>Grefenstette</last></editor>
       <editor><first>Laura</first><last>Rimell</last></editor>
-      <editor><first>Chris</first><last>Dyer</last></editor>
+      <editor id="chris-dyer"><first>Chris</first><last>Dyer</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Dublin, Ireland</address>
       <month>May</month>
@@ -57,7 +57,7 @@
       <title>When does <fixed-case>CLIP</fixed-case> generalize better than unimodal models? When judging human-centric concepts</title>
       <author><first>Romain</first><last>Bielawski</last></author>
       <author><first>Benjamin</first><last>Devillers</last></author>
-      <author><first>Tim</first><last>Van De Cruys</last></author>
+      <author id="tim-van-de-cruys"><first>Tim</first><last>Van De Cruys</last></author>
       <author><first>Rufin</first><last>Vanrullen</last></author>
       <pages>29-38</pages>
       <abstract>CLIP, a vision-language network trained with a multimodal contrastive learning objective on a large dataset of images and captions, has demonstrated impressive zero-shot ability in various tasks. However, recent work showed that in comparison to unimodal (visual) networks, CLIP’s multimodal training does not benefit generalization (e.g. few-shot or transfer learning) for standard visual classification tasks such as object, street numbers or animal recognition. Here, we hypothesize that CLIP’s improved unimodal generalization abilities may be most prominent in domains that involve human-centric concepts (cultural, social, aesthetic, affective...); this is because CLIP’s training dataset is mainly composed of image annotations made by humans for other humans. To evaluate this, we use 3 tasks that require judging human-centric concepts”:” sentiment analysis on tweets, genre classification on books or movies. We introduce and publicly release a new multimodal dataset for movie genre classification. We compare CLIP’s visual stream against two visually trained networks and CLIP’s textual stream against two linguistically trained networks, as well as multimodal combinations of these networks. We show that CLIP generally outperforms other networks, whether using one or two modalities. We conclude that CLIP’s multimodal training is beneficial for both unimodal and multimodal tasks that require classification of human-centric concepts.</abstract>
@@ -164,7 +164,7 @@
       <title>Temporal Knowledge Graph Reasoning with Low-rank and Model-agnostic Representations</title>
       <author><first>Ioannis</first><last>Dikeoulias</last></author>
       <author><first>Saadullah</first><last>Amin</last></author>
-      <author><first>Günter</first><last>Neumann</last></author>
+      <author id="gunter-neumann"><first>Günter</first><last>Neumann</last></author>
       <pages>111-120</pages>
       <abstract>Temporal knowledge graph completion (TKGC) has become a popular approach for reasoning over the event and temporal knowledge graphs, targeting the completion of knowledge with accurate but missing information. In this context, tensor decomposition has successfully modeled interactions between entities and relations. Their effectiveness in static knowledge graph completion motivates us to introduce Time-LowFER, a family of parameter-efficient and time-aware extensions of the low-rank tensor factorization model LowFER. Noting several limitations in current approaches to represent time, we propose a cycle-aware time-encoding scheme for time features, which is model-agnostic and offers a more generalized representation of time. We implement our methods in a unified temporal knowledge graph embedding framework, focusing on time-sensitive data processing. The experiments show that our proposed methods perform on par or better than the state-of-the-art semantic matching models on two benchmarks.</abstract>
       <url hash="584f9f8d">2022.repl4nlp-1.12</url>
diff --git a/data/xml/2022.rocling.xml b/data/xml/2022.rocling.xml
index 817352dd84..f4b80504f9 100644
--- a/data/xml/2022.rocling.xml
+++ b/data/xml/2022.rocling.xml
@@ -33,7 +33,7 @@
       <author><first>Cheng-Chung</first><last>Fan</last></author>
       <author><first>Kuan-Yu</first><last>Chen</last></author>
       <author><first>Yu</first><last>Tsao</last></author>
-      <author><first>Hsin-Min</first><last>Wang</last></author>
+      <author id="hsin-min-wang"><first>Hsin-Min</first><last>Wang</last></author>
       <author><first>Keh-Yih</first><last>Su</last></author>
       <pages>7–14</pages>
       <abstract>This paper constructs a Chinese dialogue-based information-seeking question answering dataset CMDQA, which is mainly applied to the scenario of getting Chinese movie related information. It contains 10K QA dialogs (40K turns in total). All questions and background documents are compiled from the Wikipedia via an Internet crawler. The answers to the questions are obtained via extracting the corresponding answer spans within the related text passage. In CMDQA, in addition to searching related documents, pronouns are also added to the question to better mimic the real dialog scenario. This dataset can test the individual performance of the information retrieval, the question answering and the question re-writing modules. This paper also provides a baseline system and shows its performance on this dataset. The experiments elucidate that it still has a big gap to catch the human performance. This dataset thus provides enough challenge for the researcher to conduct related research.</abstract>
@@ -92,8 +92,8 @@
     <paper id="7">
       <title>Is Character Trigram Overlapping Ratio Still the Best Similarity Measure for Aligning Sentences in a Paraphrased Corpus?</title>
       <author><first>Aleksandra</first><last>Smolka</last></author>
-      <author><first>Hsin-Min</first><last>Wang</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="hsin-min-wang"><first>Hsin-Min</first><last>Wang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <author><first>Keh-Yih</first><last>Su</last></author>
       <pages>49–60</pages>
       <abstract>Sentence alignment is an essential step in studying the mapping among different language expressions, and the character trigram overlapping ratio was reported to be the most effective similarity measure in aligning sentences in the text simplification dataset. However, the appropriateness of each similarity measure depends on the characteristics of the corpus to be aligned. This paper studies if the character trigram is still a suitable similarity measure for the task of aligning sentences in a paragraph paraphrasing corpus. We compare several embedding-based and non-embeddings model-agnostic similarity measures, including those that have not been studied previously. The evaluation is conducted on parallel paragraphs sampled from the Webis-CPC-11 corpus, which is a paragraph paraphrasing dataset. Our results show that modern BERT-based measures such as Sentence-BERT or BERTScore can lead to significant improvement in this task.</abstract>
@@ -152,7 +152,7 @@
       <title>A Preliminary Study of the Application of Discrete Wavelet Transform Features in Conv-<fixed-case>T</fixed-case>as<fixed-case>N</fixed-case>et Speech Enhancement Model</title>
       <author><first>Yan-Tong</first><last>Chen</last></author>
       <author><first>Zong-Tai</first><last>Wu</last></author>
-      <author><first>Jeih-Weih</first><last>Hung</last></author>
+      <author id="jeih-weih-hung"><first>Jeih-Weih</first><last>Hung</last></author>
       <pages>92–99</pages>
       <abstract>Nowadays, time-domain features have been widely used in speech enhancement (SE) networks like frequency-domain features to achieve excellent performance in eliminating noise from input utterances. This study primarily investigates how to extract information from time-domain utterances to create more effective features in speech enhancement. We present employing sub-signals dwelled in multiple acoustic frequency bands in time domain and integrating them into a unified feature set. We propose using the discrete wavelet transform (DWT) to decompose each input frame signal to obtain sub-band signals, and a projection fusion process is performed on these signals to create the ultimate features. The corresponding fusion strategy is the bi-projection fusion (BPF). In short, BPF exploits the sigmoid function to create ratio masks for two feature sources. The concatenation of fused DWT features and time features serves as the encoder output of a celebrated SE framework, fully-convolutional time-domain audio separation network (Conv-TasNet), to estimate the mask and then produce the enhanced time-domain utterances. The evaluation experiments are conducted on the VoiceBank-DEMAND and VoiceBank-QUT tasks. The experimental results reveal that the proposed method achieves higher speech quality and intelligibility than the original Conv-TasNet that uses time features only, indicating that the fusion of DWT features created from the input utterances can benefit time features to learn a superior Conv-TasNet in speech enhancement.</abstract>
       <url hash="2ef61ba6">2022.rocling-1.12</url>
@@ -163,7 +163,7 @@
       <title>Exploiting the compressed spectral loss for the learning of the <fixed-case>DEMUCS</fixed-case> speech enhancement network</title>
       <author><first>Chi-En</first><last>Dai</last></author>
       <author><first>Qi-Wei</first><last>Hong</last></author>
-      <author><first>Jeih-Weih</first><last>Hung</last></author>
+      <author id="jeih-weih-hung"><first>Jeih-Weih</first><last>Hung</last></author>
       <pages>100–106</pages>
       <abstract>This study aims to improve a highly effective speech enhancement technique, DEMUCS, by revising the respective loss function in learning. DEMUCS, developed by Facebook Team, is built on the Wave-UNet and consists of convolutional layer encoding and decoding blocks with an LSTM layer in between. Although DEMUCS processes the input speech utterance purely in the time (wave) domain, the applied loss function consists of wave-domain L1 distance and multi-scale shorttime-Fourier-transform (STFT) loss. That is, both time- and frequency-domain features are taken into consideration in the learning of DEMUCS. In this study, we present revising the STFT loss in DEMUCS by employing the compressed magnitude spectrogram. The compression is done by either the power-law operation with a positive exponent less than one, or the logarithmic operation. We evaluate the presented novel framework on the VoiceBank-DEMAND database and task. The preliminary experimental results suggest that DEMUCS containing the power-law compressed magnitude spectral loss outperforms the original DEMUCS by providing the test utterances with higher objective quality and intelligibility scores (PESQ and STOI). Relatively, the logarithm compressed magnitude spectral loss does not benefit DEMUCS. Therefore, we reveal that DEMUCS can be further improved by properly revising the STFT terms of its loss function.</abstract>
       <url hash="16c624fa">2022.rocling-1.13</url>
@@ -229,7 +229,7 @@
       <author><first>Yu-Hsiang</first><last>Tseng</last></author>
       <author><first>Chi-Wei</first><last>Wang</last></author>
       <author><first>Fang-Chi</first><last>Yeh</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <pages>136–146</pages>
       <abstract>Non-lexical items are expressive devices used in conversations that are not words but are nevertheless meaningful. These items play crucial roles, such as signaling turn-taking or marking stances in interactions. However, as the non-lexical items do not stably correspond to written or phonological forms, past studies tend to focus on studying their acoustic properties, such as pitches and durations. In this paper, we investigate the discourse functions of non-lexical items through their acoustic properties and the phone embeddings extracted from a deep learning model. Firstly, we create a non-lexical item dataset based on the interpellation video clips from Taiwan’s Legislative Yuan. Then, we manually identify the non-lexical items and their discourse functions in the videos. Next, we analyze the acoustic properties of those items through statistical modeling and building classifiers based on phone embeddings extracted from a phone recognition model. We show that (1) the discourse functions have significant effects on the acoustic features; and (2) the classifiers built on phone embeddings perform better than the ones on conventional acoustic properties. These results suggest that phone embeddings may reflect the phonetic variations crucial in differentiating the discourse functions of non-lexical items.</abstract>
       <url hash="0a47bea4">2022.rocling-1.18</url>
@@ -265,7 +265,7 @@
     <paper id="21">
       <title><fixed-case>H</fixed-case>an<fixed-case>T</fixed-case>rans: An Empirical Study on Cross-Era Transferability of <fixed-case>C</fixed-case>hinese Pre-trained Language Model</title>
       <author><first>Chin-Tung</first><last>Lin</last></author>
-      <author><first>Wei-Yun</first><last>Ma</last></author>
+      <author id="wei-yun-ma"><first>Wei-Yun</first><last>Ma</last></author>
       <pages>164–173</pages>
       <abstract>The pre-trained language model has recently dominated most downstream tasks in the NLP area. Particularly, bidirectional Encoder Representations from Transformers (BERT) is the most iconic pre-trained language model among the NLP tasks. Their proposed masked-language modeling (MLM) is an indispensable part of the existing pre-trained language models. Those outperformed models for downstream tasks benefited directly from the large training corpus in the pre-training stage. However, their training corpus for modern traditional Chinese was light. Most of all, the ancient Chinese corpus is still disappearance in the pre-training stage. Therefore, we aim to address this problem by transforming the annotation data of ancient Chinese into BERT style training corpus. Then we propose a pre-trained Oldhan Chinese BERT model for the NLP community. Our proposed model outperforms the original BERT model by significantly reducing perplexity scores in masked-language modeling (MLM). Also, our fine-tuning models improve F1 scores on word segmentation and part-of-speech tasks. Then we comprehensively study zero-shot cross-eras ability in the BERT model. Finally, we visualize and investigate personal pronouns in the embedding space of ancient Chinese records from four eras. We have released our code at <url>https://github.com/ckiplab/han-transformers</url>.</abstract>
       <url hash="4b02b5f3">2022.rocling-1.21</url>
@@ -277,7 +277,7 @@
       <author><first>Tzu-I</first><last>Wu</last></author>
       <author><first>Tien-Hong</first><last>Lo</last></author>
       <author><first>Fu-An</first><last>Chao</last></author>
-      <author><first>Yao-Ting</first><last>Sung</last></author>
+      <author id="yao-ting-sung"><first>Yao-Ting</first><last>Sung</last></author>
       <author><first>Berlin</first><last>Chen</last></author>
       <pages>174–183</pages>
       <abstract>Due to the surge in global demand for English as a second language (ESL), developments of automated methods for grading speaking proficiency have gained considerable attention. This paper aims to present a computerized regime of grading the spontaneous spoken language for ESL learners. Based on the speech corpus of ESL learners recently collected in Taiwan, we first extract multi-view features (e.g., pronunciation, fluency, and prosody features) from either automatic speech recognition (ASR) transcription or audio signals. These extracted features are, in turn, fed into a tree-based classifier to produce a new set of indicative features as the input of the automated assessment system, viz. the grader. Finally, we use different machine learning models to predict ESL learners’ respective speaking proficiency and map the result into the corresponding CEFR level. The experimental results and analysis conducted on the speech corpus of ESL learners in Taiwan show that our approach holds great potential for use in automated speaking assessment, meanwhile offering more reliable predictive results than the human experts.</abstract>
@@ -301,7 +301,7 @@
       <author><first>Wen-Chao</first><last>Yeh</last></author>
       <author><first>Yu-Lun</first><last>Hsieh</last></author>
       <author><first>Yung-Chun</first><last>Chang</last></author>
-      <author><first>Wen-Lian</first><last>Hsu</last></author>
+      <author id="wen-lian-hsu"><first>Wen-Lian</first><last>Hsu</last></author>
       <pages>193–199</pages>
       <abstract>This study aims to evaluate three most popular word segmentation tool for a large Traditional Chinese corpus in terms of their efficiency, resource consumption, and cost. Specifically, we compare the performances of Jieba, CKIP, and MONPA on word segmentation, part-of-speech tagging and named entity recognition through extensive experiments. Experimental results show that MONPA using GPU for batch segmentation can greatly reduce the processing time of massive datasets. In addition, its features such as word segmentation, part-of-speech tagging, and named entity recognition are beneficial to downstream applications.</abstract>
       <url hash="bd999b0c">2022.rocling-1.24</url>
@@ -334,7 +334,7 @@
     <paper id="27">
       <title>Early Speech Production in Infants and Toddlers Later Diagnosed with Cerebral Palsy: A Retrospective Study</title>
       <author><first>Chien Ju</first><last>Chan</last></author>
-      <author><first>Li-Mei</first><last>Chen</last></author>
+      <author id="li-mei-chen"><first>Li-Mei</first><last>Chen</last></author>
       <author><first>Li-Wen</first><last>Chen</last></author>
       <pages>214–220</pages>
       <abstract>In this retrospective study, we compared the early speech development between infants with cerebral palsy (CP) and typically developing (TD) infants. The recordings of utterances were collected from two CP infants and two typically-developing (TD) infants at approximately 8 and 24 months old. The data was analyzed by volubility, consonant emergence, canonical babbling ratio (CBR), mean babbling level (MBL). The major findings show that comparing with TD group, CP group has the characteristics of: 1) lower volubility 2) CBRutter below 0.15 at 2 years old 3) MBL score below 2 at the age of 2 with a feature of above 95% in level 1 4) using consonants mainly at two oral places (bilabials and velars) and three manners of articulation (nasal, fricative, and stop) at 2 years old.</abstract>
@@ -353,7 +353,7 @@
     <paper id="29">
       <title>Speech Timing in Typically Developing <fixed-case>M</fixed-case>andarin-Speaking Children From Ages 3 To 4</title>
       <author><first>Jeng Man</first><last>Lew</last></author>
-      <author><first>Li-Mei</first><last>Chen</last></author>
+      <author id="li-mei-chen"><first>Li-Mei</first><last>Chen</last></author>
       <author><first>Yu Ching</first><last>Lin</last></author>
       <pages>230–235</pages>
       <abstract>This study aims to develop a better understanding of the speech timing development in Mandarin-speaking children from 3 to 4 years of age. Data were selected from two typically developing children. Four 50-min recordings were collected during 3 and 4 years old based on natural conversation among the observers, participants, and the parents, and the picture-naming task. Speech timing were measured by Praat, including speaking rate, articulation rate, mean length of utterance (MLUs), mean utterance duration, mean word duration, pause ratio, and volubility. Major findings of the current study are: 1) Five measurements (speaking rate, mean length of utterance(MLUs), mean utterance length, mean word duration and volubility) decreased with age in both children; 2) Articulation rate of both children increased with age; 3) Comparing with the findings from previous studies, pause ratio of both slightly increased with age. These findings not only contribute to a more comprehensive data for assessment, it also can be a reference in speech intervention.</abstract>
@@ -385,7 +385,7 @@
       <author><first>Hou-Chiang</first><last>Tseng</last></author>
       <author><first>Li-Yun</first><last>Chang</last></author>
       <author><first>Hsueh-Chih</first><last>Chen</last></author>
-      <author><first>Yao-Ting</first><last>Sung</last></author>
+      <author id="yao-ting-sung"><first>Yao-Ting</first><last>Sung</last></author>
       <pages>256–262</pages>
       <abstract>Feature analysis of Chinese characters plays a prominent role in “character-based” education. However, there is an urgent need for a text analysis system for processing the difficulty of composing components for characters, primarily based on Chinese learners’ performance. To meet this need, the purpose of this research was to provide such a system by adapting a data-driven approach. Based on Chen et al.’s (2011) Chinese Orthography Database, this research has designed and developed an system: Character Difficulty - Research on Multi-features (CD-ROM). This system provides three functions: (1) analyzing a text and providing its difficulty regarding Chinese characters; (2) decomposing characters into components and calculating the frequency of components based on the analyzed text; and (3) affording component-deriving characters based on the analyzed text and downloadable images as teaching materials. With these functions highlighting multi-level features of characters, this system has the potential to benefit the fields of Chinese character instruction, Chinese orthographic learning, and Chinese natural language processing.</abstract>
       <url hash="d260bc2d">2022.rocling-1.32</url>
@@ -399,7 +399,7 @@
       <author><first>Pankaj</first><last>Dadure</last></author>
       <author><first>Partha</first><last>Pakray</last></author>
       <author><first>Riyanka</first><last>Manna</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>263–272</pages>
       <abstract>Image captioning is a prominent Artificial Intelligence (AI) research area that deals with visual recognition and a linguistic description of the image. It is an interdisciplinary field concerning how computers can see and understand digital images
 
@@ -474,7 +474,7 @@
       <author><first>Qiu-Xia</first><last>Zhang</last></author>
       <author><first>Te-Yu</first><last>Chi</last></author>
       <author><first>Te-Lun</first><last>Yang</last></author>
-      <author><first>Jyh-Shing Roger</first><last>Jang</last></author>
+      <author id="jyh-shing-roger-jang"><first>Jyh-Shing Roger</first><last>Jang</last></author>
       <pages>321–328</pages>
       <abstract>This study uses training and validation data from the “ROCLING 2022 Chinese Health Care Named Entity Recognition Task” for modeling. The modeling process adopts technologies such as data augmentation and data post-processing, and uses the MacBERT pre-training model to build a dedicated Chinese medical field NER recognizer. During the fine-tuning process, we also added adversarial training methods, such as FGM and PGD, and the results of the final tuned model were close to the best team for task evaluation. In addition, by introducing mixed-precision training, we also greatly reduce the time cost of training.</abstract>
       <url hash="626fff6e">2022.rocling-1.40</url>
@@ -544,8 +544,8 @@
       <title>Overview of the <fixed-case>ROCLING</fixed-case> 2022 Shared Task for <fixed-case>C</fixed-case>hinese Healthcare Named Entity Recognition</title>
       <author><first>Lung-Hao</first><last>Lee</last></author>
       <author><first>Chao-Yi</first><last>Chen</last></author>
-      <author><first>Liang-Chih</first><last>Yu</last></author>
-      <author><first>Yuen-Hsien</first><last>Tseng</last></author>
+      <author id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last></author>
+      <author id="yuen-hsien-tseng"><first>Yuen-Hsien</first><last>Tseng</last></author>
       <pages>363–368</pages>
       <abstract>This paper describes the ROCLING-2022 shared task for Chinese healthcare named entity recognition, including task description, data preparation, performance metrics, and evaluation results. Among ten registered teams, seven participating teams submitted a total of 20 runs. This shared task reveals present NLP techniques for dealing with Chinese named entity recognition in the healthcare domain. All data sets with gold standards and evaluation scripts used in this shared task are publicly available for future research.</abstract>
       <url hash="c919fca7">2022.rocling-1.46</url>
diff --git a/data/xml/2022.salld.xml b/data/xml/2022.salld.xml
index 76c6d1f459..1ae9fd6326 100644
--- a/data/xml/2022.salld.xml
+++ b/data/xml/2022.salld.xml
@@ -5,8 +5,8 @@
       <booktitle>Proceedings of the 2nd Workshop on Sentiment Analysis and Linguistic Linked Data</booktitle>
       <editor><first>Ilan</first><last>Kernerman</last></editor>
       <editor><first>Sara</first><last>Carvalho</last></editor>
-      <editor><first>Carlos A.</first><last>Iglesias</last></editor>
-      <editor><first>Rachele</first><last>Sprugnoli</last></editor>
+      <editor id="carlos-a-iglesias"><first>Carlos A.</first><last>Iglesias</last></editor>
+      <editor id="rachele-sprugnoli"><first>Rachele</first><last>Sprugnoli</last></editor>
       <publisher>European Language Resources Association</publisher>
       <address>Marseille, France</address>
       <month>June</month>
@@ -28,7 +28,7 @@
     </paper>
     <paper id="2">
       <title><fixed-case>O</fixed-case>-Dang! The Ontology of Dangerous Speech Messages</title>
-      <author><first>Marco Antonio</first><last>Stranisci</last></author>
+      <author id="marco-antonio-stranisci"><first>Marco Antonio</first><last>Stranisci</last></author>
       <author><first>Simona</first><last>Frenda</last></author>
       <author><first>Mirko</first><last>Lai</last></author>
       <author><first>Oscar</first><last>Araque</last></author>
@@ -45,7 +45,7 @@
       <title>Movie Rating Prediction using Sentiment Features</title>
       <author><first>João</first><last>Ramos</last></author>
       <author><first>Diogo</first><last>Apóstolo</last></author>
-      <author><first>Hugo</first><last>Gonçalo Oliveira</last></author>
+      <author id="hugo-goncalo-oliveira"><first>Hugo</first><last>Gonçalo Oliveira</last></author>
       <pages>9–18</pages>
       <abstract>We analyze the impact of using sentiment features in the prediction of movie review scores. The effort included the creation of a new lexicon, Expanded OntoSenticNet (EON), by merging OntoSenticNet and SentiWordNet, and experiments were made on the “IMDB movie review” dataset, with the three main approaches for sentiment analysis: lexicon-based, supervised machine learning and hybrids of the previous. Hybrid approaches performed the best, demonstrating the potential of merging knowledge bases and machine learning, but supervised approaches based on review embeddings were not far.</abstract>
       <url hash="b79a1509">2022.salld-1.3</url>
@@ -54,7 +54,7 @@
     <paper id="4">
       <title>Evaluating a New <fixed-case>D</fixed-case>anish Sentiment Resource: the <fixed-case>D</fixed-case>anish Sentiment Lexicon, <fixed-case>DSL</fixed-case></title>
       <author><first>Nina</first><last>Schneidermann</last></author>
-      <author><first>Bolette</first><last>Pedersen</last></author>
+      <author id="bolette-sandford-pedersen"><first>Bolette</first><last>Pedersen</last></author>
       <pages>19–24</pages>
       <abstract>In this paper, we evaluate a new sentiment lexicon for Danish, the Danish Sentiment Lexicon (DSL), to gain input regarding how to carry out the final adjustments of the lexicon. A feature of the lexicon that differentiates it from other sentiment resources for Danish is that it is linked to a large number of other Danish lexical resources via the DDO lemma and sense inventory and the LLOD via the Danish wordnet, DanNet. We perform our evaluation on four datasets labeled with sentiments. In addition, we compare the lexicon against two existing benchmarks for Danish: the Afinn and the Sentida resources. We observe that DSL performs mostly comparably to the existing resources, but that more fine-grained explorations need to be done in order to fully exploit its possibilities given its linking properties.</abstract>
       <url hash="08c377f2">2022.salld-1.4</url>
@@ -72,7 +72,7 @@
     </paper>
     <paper id="6">
       <title>Sentiment Analysis of <fixed-case>S</fixed-case>erbian Old Novels</title>
-      <author><first>Ranka</first><last>Stanković</last></author>
+      <author id="ranka-stankovic"><first>Ranka</first><last>Stanković</last></author>
       <author><first>Miloš</first><last>Košprdić</last></author>
       <author><first>Milica</first><last>Ikonić Nešić</last></author>
       <author><first>Tijana</first><last>Radović</last></author>
diff --git a/data/xml/2022.scil.xml b/data/xml/2022.scil.xml
index eea9bf6f79..68bbbfb5be 100644
--- a/data/xml/2022.scil.xml
+++ b/data/xml/2022.scil.xml
@@ -59,7 +59,7 @@
       <title>How well do <fixed-case>LSTM</fixed-case> language models learn filler-gap dependencies?</title>
       <author><first>Satoru</first><last>Ozaki</last></author>
       <author><first>Dan</first><last>Yurovsky</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
       <pages>76–88</pages>
       <url hash="0e5d19eb">2022.scil-1.6</url>
       <bibkey>ozaki-etal-2022-well</bibkey>
@@ -90,7 +90,7 @@
     <paper id="10">
       <title>Linguistic Complexity and Planning Effects on Word Duration in <fixed-case>H</fixed-case>indi Read Aloud Speech</title>
       <author><first>Sidharth</first><last>Ranjan</last></author>
-      <author><first>Rajakrishnan</first><last>Rajkumar</last></author>
+      <author id="rajakrishnan-rajkumar"><first>Rajakrishnan</first><last>Rajkumar</last></author>
       <author><first>Sumeet</first><last>Agarwal</last></author>
       <pages>119–132</pages>
       <url hash="81a0e306">2022.scil-1.10</url>
@@ -107,7 +107,7 @@
       <title>Parsing Early <fixed-case>M</fixed-case>odern <fixed-case>E</fixed-case>nglish for Linguistic Search</title>
       <author><first>Seth</first><last>Kulick</last></author>
       <author><first>Neville</first><last>Ryant</last></author>
-      <author><first>Beatrice</first><last>Santorini</last></author>
+      <author id="beatrice-santorini"><first>Beatrice</first><last>Santorini</last></author>
       <pages>143–157</pages>
       <url hash="4bd9a125">2022.scil-1.12</url>
       <bibkey>kulick-etal-2022-parsing</bibkey>
@@ -172,7 +172,7 @@
     <paper id="20">
       <title>The interaction between cognitive ease and informativeness shapes the lexicons of natural languages</title>
       <author><first>Thomas</first><last>Brochhagen</last></author>
-      <author><first>Gemma</first><last>Boleda</last></author>
+      <author id="gemma-boleda"><first>Gemma</first><last>Boleda</last></author>
       <pages>217–219</pages>
       <url hash="621cb86e">2022.scil-1.20</url>
       <bibkey>brochhagen-boleda-2022-interaction</bibkey>
@@ -188,7 +188,7 @@
     </paper>
     <paper id="22">
       <title>Masked language models directly encode linguistic uncertainty</title>
-      <author><first>Cassandra L.</first><last>Jacobs</last></author>
+      <author id="cassandra-l-jacobs"><first>Cassandra L.</first><last>Jacobs</last></author>
       <author><first>Ryan J.</first><last>Hubbard</last></author>
       <author><first>Kara D.</first><last>Federmeier</last></author>
       <pages>225–228</pages>
@@ -208,8 +208,8 @@
       <author><first>Omri</first><last>Abend</last></author>
       <author><first>Nathan</first><last>Schneider</last></author>
       <author><first>Samuel</first><last>Gibbon</last></author>
-      <author><first>Sharon</first><last>Goldwater</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>235–240</pages>
       <url hash="ed2e00ac">2022.scil-1.24</url>
       <bibkey>szubert-etal-2022-universal</bibkey>
@@ -219,7 +219,7 @@
       <author><first>Eleonora</first><last>Gualdoni</last></author>
       <author><first>Andreas</first><last>Madebach</last></author>
       <author><first>Thomas</first><last>Brochhagen</last></author>
-      <author><first>Gemma</first><last>Boleda</last></author>
+      <author id="gemma-boleda"><first>Gemma</first><last>Boleda</last></author>
       <pages>241–243</pages>
       <url hash="c4d0c547">2022.scil-1.25</url>
       <bibkey>gualdoni-etal-2022-horse</bibkey>
diff --git a/data/xml/2022.sdp.xml b/data/xml/2022.sdp.xml
index 40f939a070..9c0837f1c7 100644
--- a/data/xml/2022.sdp.xml
+++ b/data/xml/2022.sdp.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the Third Workshop on Scholarly Document Processing</booktitle>
       <editor><first>Arman</first><last>Cohan</last></editor>
       <editor><first>Guy</first><last>Feigenblat</last></editor>
-      <editor><first>Dayne</first><last>Freitag</last></editor>
+      <editor id="dayne-freitag"><first>Dayne</first><last>Freitag</last></editor>
       <editor><first>Tirthankar</first><last>Ghosal</last></editor>
       <editor><first>Drahomira</first><last>Herrmannova</last></editor>
       <editor><first>Petr</first><last>Knoth</last></editor>
@@ -13,7 +13,7 @@
       <editor><first>Philipp</first><last>Mayr</last></editor>
       <editor><first>Michal</first><last>Shmueli-Scheuer</last></editor>
       <editor><first>Anita</first><last>de Waard</last></editor>
-      <editor><first>Lucy Lu</first><last>Wang</last></editor>
+      <editor id="lucy-lu-wang"><first>Lucy Lu</first><last>Wang</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Gyeongju, Republic of Korea</address>
       <month>October</month>
@@ -99,7 +99,7 @@
     <paper id="7">
       <title>Incorporating the Rhetoric of Scientific Language into Sentence Embeddings using Phrase-guided Distant Supervision and Metric Learning</title>
       <author><first>Kaito</first><last>Sugimoto</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>54–68</pages>
       <abstract>Communicative functions are an important rhetorical feature of scientific writing. Sentence embeddings that contain such features are highly valuable for the argumentative analysis of scientific documents, with applications in document alignment, recommendation, and academic writing assistance. Moreover, embeddings can provide a possible solution to the open-set problem, where models need to generalize to new communicative functions unseen at training time. However, existing sentence representation models are not suited for detecting functional similarity since they only consider lexical or semantic similarities. To remedy this, we propose a combined approach of distant supervision and metric learning to make a representation model more aware of the functional part of a sentence. We first leverage an existing academic phrase database to label sentences automatically with their functions. Then, we train an embedding model to capture similarities and dissimilarities from a rhetorical perspective. The experimental results demonstrate that the embeddings obtained from our model are more advantageous than existing models when retrieving functionally similar sentences. We also provide an extensive analysis of the performance differences between five metric learning objectives, revealing that traditional methods (e.g., softmax cross-entropy loss and triplet loss) outperform state-of-the-art techniques.</abstract>
       <url hash="556766b5">2022.sdp-1.7</url>
@@ -165,7 +165,7 @@
     <paper id="14">
       <title>Exploiting Unary Relations with Stacked Learning for Relation Extraction</title>
       <author><first>Yuan</first><last>Zhuang</last></author>
-      <author><first>Ellen</first><last>Riloff</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
       <author><first>Kiri L.</first><last>Wagstaff</last></author>
       <author><first>Raymond</first><last>Francis</last></author>
       <author><first>Matthew P.</first><last>Golombek</last></author>
@@ -233,7 +233,7 @@
       <title>Overview of <fixed-case>MSLR</fixed-case>2022: A Shared Task on Multi-document Summarization for Literature Reviews</title>
       <author><first>Lucy Lu</first><last>Wang</last></author>
       <author><first>Jay</first><last>DeYoung</last></author>
-      <author><first>Byron</first><last>Wallace</last></author>
+      <author id="byron-c-wallace"><first>Byron</first><last>Wallace</last></author>
       <pages>175–180</pages>
       <abstract>We provide an overview of the MSLR2022 shared task on multi-document summarization for literature reviews. The shared task was hosted at the Third Scholarly Document Processing (SDP) Workshop at COLING 2022. For this task, we provided data consisting of gold summaries extracted from review papers along with the groups of input abstracts that were synthesized into these summaries, split into two subtasks. In total, six teams participated, making 10 public submissions, 6 to the Cochrane subtask and 4 to the MSˆ2 subtask. The top scoring systems reported over 2 points ROUGE-L improvement on the Cochrane subtask, though performance improvements are not consistently reported across all automated evaluation metrics; qualitative examination of the results also suggests the inadequacy of current evaluation metrics for capturing factuality and consistency on this task. Significant work is needed to improve system performance, and more importantly, to develop better methods for automatically evaluating performance on this task.</abstract>
       <url hash="59399dbf">2022.sdp-1.20</url>
@@ -241,11 +241,11 @@
     </paper>
     <paper id="21">
       <title><fixed-case>LED</fixed-case> down the rabbit hole: exploring the potential of global attention for biomedical multi-document summarisation</title>
-      <author><first>Yulia</first><last>Otmakhova</last></author>
+      <author id="julia-otmakhova"><first>Yulia</first><last>Otmakhova</last></author>
       <author><first>Thinh Hung</first><last>Truong</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
-      <author><first>Karin</first><last>Verspoor</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last></author>
       <author><first>Jey Han</first><last>Lau</last></author>
       <pages>181–187</pages>
       <abstract>In this paper we report the experiments performed for the submission to the Multidocument summarisation for Literature Review (MSLR) Shared Task. In particular, we adopt Primera model to the biomedical domain by placing global attention on important biomedical entities in several ways. We analyse the outputs of 23 resulting models and report some patterns related to the presence of additional global attention, number of training steps and the inputs configuration.</abstract>
@@ -326,7 +326,7 @@
       <title>Overview of the <fixed-case>SV</fixed-case>-Ident 2022 Shared Task on Survey Variable Identification in Social Science Publications</title>
       <author><first>Tornike</first><last>Tsereteli</last></author>
       <author><first>Yavuz Selim</first><last>Kartal</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <author><first>Andrea</first><last>Zielinski</last></author>
       <author><first>Kai</first><last>Eckert</last></author>
       <author><first>Philipp</first><last>Mayr</last></author>
@@ -393,7 +393,7 @@
       <title><fixed-case>LTRC</fixed-case> @<fixed-case>M</fixed-case>u<fixed-case>P</fixed-case> 2022: Multi-Perspective Scientific Document Summarization Using Pre-trained Generation Models</title>
       <author><first>Ashok</first><last>Urlana</last></author>
       <author><first>Nirmal</first><last>Surange</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>279–284</pages>
       <abstract>The MuP-2022 shared task focuses on multiperspective scientific document summarization. Given a scientific document, with multiple reference summaries, our goal was to develop a model that can produce a generic summary covering as many aspects of the document as covered by all of its reference summaries. This paper describes our best official model, a finetuned BART-large, along with a discussion on the challenges of this task and some of our unofficial models including SOTA generation models. Our submitted model out performedthe given, MuP 2022 shared task, baselines on ROUGE-2, ROUGE-L and average ROUGE F1-scores. Code of our submission can be ac- cessed here.</abstract>
       <url hash="1d0b794c">2022.sdp-1.35</url>
diff --git a/data/xml/2022.semeval.xml b/data/xml/2022.semeval.xml
index 6c38744249..9840477b00 100644
--- a/data/xml/2022.semeval.xml
+++ b/data/xml/2022.semeval.xml
@@ -25,8 +25,8 @@
     <paper id="1">
       <title><fixed-case>S</fixed-case>emeval-2022 Task 1: <fixed-case>CODWOE</fixed-case> – Comparing Dictionaries and Word Embeddings</title>
       <author><first>Timothee</first><last>Mickus</last></author>
-      <author><first>Kees</first><last>Van Deemter</last></author>
-      <author><first>Mathieu</first><last>Constant</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>Van Deemter</last></author>
+      <author id="matthieu-constant"><first>Mathieu</first><last>Constant</last></author>
       <author><first>Denis</first><last>Paperno</last></author>
       <pages>1-14</pages>
       <abstract>Word embeddings have advanced the state of the art in NLP across numerous tasks. Understanding the contents of dense neural representations is of utmost interest to the computational semantics community. We propose to focus on relating these opaque word vectors with human-readable definitions, as found in dictionaries This problem naturally divides into two subtasks: converting definitions into embeddings, and converting embeddings into definitions. This task was conducted in a multilingual setting, using comparable sets of embeddings trained homogeneously.</abstract>
@@ -129,7 +129,7 @@
       <title><fixed-case>RIGA</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2022 Task 1: Scaling Recurrent Neural Networks for <fixed-case>CODWOE</fixed-case> Dictionary Modeling</title>
       <author><first>Eduards</first><last>Mukans</last></author>
       <author><first>Gus</first><last>Strazds</last></author>
-      <author><first>Guntis</first><last>Barzdins</last></author>
+      <author id="guntis-barzdins"><first>Guntis</first><last>Barzdins</last></author>
       <pages>82-87</pages>
       <abstract>Described are our two entries “emukans” and “guntis” for the definition modeling track of CODWOE SemEval-2022 Task 1. Our approach is based on careful scaling of a GRU recurrent neural network, which exhibits double descent of errors, corresponding to significant improvements also per human judgement. Our results are in the middle of the ranking table per official automatic metrics.</abstract>
       <url hash="7dcb707d">2022.semeval-1.9</url>
@@ -180,7 +180,7 @@
       <author><first>Harish</first><last>Tayyar Madabushi</last></author>
       <author><first>Edward</first><last>Gow-Smith</last></author>
       <author><first>Marcos</first><last>Garcia</last></author>
-      <author><first>Carolina</first><last>Scarton</last></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last></author>
       <author><first>Marco</first><last>Idiart</last></author>
       <author><first>Aline</first><last>Villavicencio</last></author>
       <pages>107-121</pages>
@@ -193,7 +193,7 @@
     <paper id="14">
       <title><fixed-case>H</fixed-case>elsinki-<fixed-case>NLP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2022 Task 2: A Feature-Based Approach to Multilingual Idiomaticity Detection</title>
       <author><first>Sami</first><last>Itkonen</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <author><first>Mathias</first><last>Creutz</last></author>
       <pages>122-134</pages>
       <abstract>This paper describes the University of Helsinki submission to the SemEval 2022 task on multilingual idiomaticity detection. Our system utilizes several models made available by HuggingFace, along with the baseline BERT model for the task. We focus on feature engineering based on properties that typically characterize idiomatic expressions. The additional features lead to improvements over the baseline and the final submission achieves 15th place out of 20 submissions. The paper provides error analysis of our model including visualisations of the contributions of individual features.</abstract>
@@ -265,8 +265,8 @@
     <paper id="20">
       <title><fixed-case>C</fixed-case>ardiff<fixed-case>NLP</fixed-case>-Metaphor at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2022 Task 2: Targeted Fine-tuning of Transformer-based Language Models for Idiomaticity Detection</title>
       <author><first>Joanne</first><last>Boisson</last></author>
-      <author><first>Jose</first><last>Camacho-Collados</last></author>
-      <author><first>Luis</first><last>Espinosa-Anke</last></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa-Anke</last></author>
       <pages>169-177</pages>
       <abstract>This paper describes the experiments ran for SemEval-2022 Task 2, subtask A, zero-shot and one-shot settings for idiomaticity detection. Our main approach is based on fine-tuning transformer-based language models as a baseline to perform binary classification. Our system, CardiffNLP-Metaphor, ranked 8th and 7th (respectively on zero- and one-shot settings on this task. Our main contribution lies in the extensive evaluation of transformer-based language models and various configurations, showing, among others, the potential of large multilingual models over base monolingual models. Moreover, we analyse the impact of various input parameters, which offer interesting insights on how language models work in practice.</abstract>
       <url hash="1f04d7e2">2022.semeval-1.20</url>
@@ -423,7 +423,7 @@
     <paper id="33">
       <title><fixed-case>U</fixed-case>o<fixed-case>R</fixed-case>-<fixed-case>NCL</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2022 Task 3: Fine-Tuning the <fixed-case>BERT</fixed-case>-Based Models for Validating Taxonomic Relations</title>
       <author><first>Thanet</first><last>Markchom</last></author>
-      <author><first>Huizhi</first><last>Liang</last></author>
+      <author id="huizhi-liang"><first>Huizhi</first><last>Liang</last></author>
       <author><first>Jiaoyan</first><last>Chen</last></author>
       <pages>260-265</pages>
       <abstract>In human languages, there are many presuppositional constructions that impose a constrain on the taxonomic relations between two nouns depending on their order. These constructions create a challenge in validating taxonomic relations in real-world contexts. In SemEval2022-Task3 Presupposed Taxonomies: Evaluating Neural Network Semantics (PreTENS), the organizers introduced a task regarding validating the taxonomic relations within a variety of presuppositional constructions. This task is divided into two subtasks: classification and regression. Each subtask contains three datasets in multiple languages, i.e., English, Italian and French. To tackle this task, this work proposes to fine-tune different BERT-based models pre-trained on different languages. According to the experimental results, the fine-tuned BERT-based models are effective compared to the baselines in classification. For regression, the fine-tuned models show promising performance with the possibility of improvement.</abstract>
@@ -483,7 +483,7 @@
     <paper id="38">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2022 Task 4: Patronizing and Condescending Language Detection</title>
       <author><first>Carla</first><last>Perez-Almendros</last></author>
-      <author><first>Luis</first><last>Espinosa-Anke</last></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa-Anke</last></author>
       <author><first>Steven</first><last>Schockaert</last></author>
       <pages>298-307</pages>
       <abstract>This paper presents an overview of Task 4 at SemEval-2022, which was focused on detecting Patronizing and Condescending Language (PCL) towards vulnerable communities. Two sub-tasks were considered: a binary classification task, where participants needed to classify a given paragraph as containing PCL or not, and a multi-label classification task, where participants needed to identify which types of PCL are present (if any). The task attracted more than 300 participants, 77 teams and 229 valid submissions. We provide an overview of how the task was organized, discuss the techniques that were employed by the different participants, and summarize the main resulting insights about PCL detection and categorization.</abstract>
@@ -522,7 +522,7 @@
       <title><fixed-case>BEIKE</fixed-case> <fixed-case>NLP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2022 Task 4: Prompt-Based Paragraph Classification for Patronizing and Condescending Language Detection</title>
       <author><first>Yong</first><last>Deng</last></author>
       <author><first>Chenxiao</first><last>Dou</last></author>
-      <author><first>Liangyu</first><last>Chen</last></author>
+      <author id="liang-yu-chen"><first>Liangyu</first><last>Chen</last></author>
       <author><first>Deqiang</first><last>Miao</last></author>
       <author><first>Xianghui</first><last>Sun</last></author>
       <author><first>Baochang</first><last>Ma</last></author>
@@ -595,7 +595,7 @@
       <title><fixed-case>MS</fixed-case>@<fixed-case>IW</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2022 Task 4: Patronising and Condescending Language Detection with Synthetically Generated Data</title>
       <author><first>Selina</first><last>Meyer</last></author>
       <author><first>Maximilian</first><last>Schmidhuber</last></author>
-      <author><first>Udo</first><last>Kruschwitz</last></author>
+      <author id="udo-kruschwitz"><first>Udo</first><last>Kruschwitz</last></author>
       <pages>363-368</pages>
       <abstract>In this description paper we outline the system architecture submitted to Task 4, Subtask 1 at SemEval-2022. We leverage the generative power of state of the art generative pretrained transformer models to increase training set size and remedy class imbalance issues. Our best submitted system is trained on a synthetically enhanced dataset with 10.3 times as many positive samples as the original dataset and reaches an F1 score of 50.62%, which is 10 percentage points higher than our initial system trained on an undersampled version of the original dataset. We explore possible reasons for the comparably low score in the overall task ranking and report on experiments conducted during the post-evaluation phase.</abstract>
       <url hash="9854c9e0">2022.semeval-1.47</url>
@@ -648,7 +648,7 @@
     <paper id="52">
       <title>Tesla at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2022 Task 4: Patronizing and Condescending Language Detection using Transformer-based Models with Data Augmentation</title>
       <author><first>Sahil</first><last>Bhatt</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>394-399</pages>
       <abstract>This paper describes our system for Task 4 of SemEval 2022: Patronizing and Condescending Language (PCL) Detection. For sub-task 1, where the objective is to classify a text as PCL or non-PCL, we use a T5 Model fine-tuned on the dataset. For sub-task 2, which is a multi-label classification problem, we use a RoBERTa model fine-tuned on the dataset. Given that the key challenge in this task is classification on an imbalanced dataset, our models rely on an augmented dataset that we generate using paraphrasing. We found that these two models yield the best results out of all the other approaches we tried.</abstract>
       <url hash="326039b1">2022.semeval-1.52</url>
@@ -703,7 +703,7 @@
     <paper id="57">
       <title>Team <fixed-case>LRL</fixed-case>_<fixed-case>NC</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2022 Task 4: Binary and Multi-label Classification of <fixed-case>PCL</fixed-case> using Fine-tuned Transformer-based Models</title>
       <author><first>Kushagri</first><last>Tandon</last></author>
-      <author><first>Niladri</first><last>Chatterjee</last></author>
+      <author id="niladri-chatterjee"><first>Niladri</first><last>Chatterjee</last></author>
       <pages>421-431</pages>
       <abstract>Patronizing and condescending language (PCL) can find its way into many mediums of public discourse. Presence of PCL in text can produce negative effects in the society. The challenge presented by the task emerges from the subtleties of PCL and various data dependent constraints. Hence, developing techniques to detect PCL in text, before it is propagated is vital. The aim of this paper is twofold, a) to present systems that can be used to classify a text as containing PCL or not, and b) to present systems that assign the different categories of PCL present in text. The proposed systems are primarily rooted in transformer-based pre-trained language models. Among the models submitted for Subtask 1, the best F1-Score of 0.5436 was achieved by a deep learning based ensemble model. This system secured the rank 29 in the official task ranking. For Subtask 2, the best macro-average F1-Score of 0.339 was achieved by an ensemble model combining transformer-based neural architecture with gradient boosting label-balanced classifiers. This system secured the rank 21 in the official task ranking. Among subsequently carried out experiments a variation in architecture of a system for Subtask 2 achieved a macro-average F1-Score of 0.3527.</abstract>
       <url hash="41de6cac">2022.semeval-1.57</url>
@@ -875,7 +875,7 @@
     </paper>
     <paper id="72">
       <title><fixed-case>JCT</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2022 Task 4-A: Patronism Detection in Posts Written in <fixed-case>E</fixed-case>nglish using Preprocessing Methods and various Machine Leaerning Methods</title>
-      <author><first>Yaakov</first><last>HaCohen-Kerner</last></author>
+      <author id="yaakov-hacohen-kerner"><first>Yaakov</first><last>HaCohen-Kerner</last></author>
       <author><first>Ilan</first><last>Meyrowitsch</last></author>
       <author><first>Matan</first><last>Fchima</last></author>
       <pages>519-524</pages>
@@ -903,7 +903,7 @@
       <author><first>Berta</first><last>Chulvi</last></author>
       <author><first>Paolo</first><last>Rosso</last></author>
       <author><first>Alyssa</first><last>Lees</last></author>
-      <author><first>Jeffrey</first><last>Sorensen</last></author>
+      <author id="jeffrey-sorensen"><first>Jeffrey</first><last>Sorensen</last></author>
       <pages>533-549</pages>
       <abstract>The paper describes the SemEval-2022 Task 5: Multimedia Automatic Misogyny Identification (MAMI),which explores the detection of misogynous memes on the web by taking advantage of available texts and images. The task has been organised in two related sub-tasks: the first one is focused on recognising whether a meme is misogynous or not (Sub-task A), while the second one is devoted to recognising types of misogyny (Sub-task B). MAMI has been one of the most popular tasks at SemEval-2022 with more than 400 participants, 65 teams involved in Sub-task A and 41 in Sub-task B from 13 countries. The MAMI challenge received 4214 submitted runs (of which 166 uploaded on the leader-board), denoting an enthusiastic participation for the proposed problem. The collection and annotation is described for the task dataset. The paper provides an overview of the systems proposed for the challenge, reports the results achieved in both sub-tasks and outlines a description of the main errors for a comprehension of the systems capabilities and for detailing future research perspectives.</abstract>
       <url hash="2e5a32b6">2022.semeval-1.74</url>
@@ -961,7 +961,7 @@
       <author><first>Rajalakshmi</first><last>Sivanaiah</last></author>
       <author><first>Angel</first><last>S</last></author>
       <author><first>Sakaya Milton</first><last>Rajendram</last></author>
-      <author><first>Mirnalinee</first><last>T T</last></author>
+      <author id="t-t-mirnalinee"><first>Mirnalinee</first><last>T T</last></author>
       <pages>571-574</pages>
       <abstract>Research is progressing in a fast manner in the field of offensive, hate speech, abusive and sarcastic data. Tackling hate speech against women is urgent and really needed to give respect to the lady of our life. This paper describes the system used for identifying misogynous content using images and text. The system developed by the team TECHSSN uses transformer models to detect the misogynous content from text and Convolutional Neural Network model for image data. Various models like BERT, ALBERT, XLNET and CNN are explored and the combination of ALBERT and CNN as an ensemble model provides better results than the rest. This system was developed for the task 5 of the competition, SemEval 2022.</abstract>
       <url hash="d5b07ea7">2022.semeval-1.78</url>
@@ -1081,7 +1081,7 @@
     <paper id="89">
       <title>taochen at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2022 Task 5: Multimodal Multitask Learning and Ensemble Learning</title>
       <author><first>Chen</first><last>Tao</last></author>
-      <author><first>Jung-jae</first><last>Kim</last></author>
+      <author id="jung-jae-kim"><first>Jung-jae</first><last>Kim</last></author>
       <pages>648-653</pages>
       <abstract>We present a multi-modal deep learning system for the Multimedia Automatic Misogyny Identification (MAMI) challenge, a SemEval task of identifying and classifying misogynistic messages in online memes. We adapt multi-task learning for the multimodal subtasks of the MAMI challenge to transfer knowledge among the correlated subtasks. We also leverage on ensemble learning for synergistic integration of models individually trained for the subtasks. We finally discuss about errors of the system to provide useful insights for future work.</abstract>
       <url hash="1845e726">2022.semeval-1.89</url>
@@ -1422,7 +1422,7 @@
       <author><first>Rajalakshmi</first><last>Sivanaiah</last></author>
       <author><first>Angel</first><last>S</last></author>
       <author><first>Sakaya Milton</first><last>Rajendram</last></author>
-      <author><first>Mirnalinee</first><last>T T</last></author>
+      <author id="t-t-mirnalinee"><first>Mirnalinee</first><last>T T</last></author>
       <pages>851-855</pages>
       <abstract>Irony detection in the social media is an upcoming research which places a main role in sentiment analysis and offensive language identification. Sarcasm is one form of irony that is used to provide intended comments against realism. This paper describes a method to detect intended sarcasm in text (SemEval-2022 Task 6). The TECHSSN team used Bidirectional Encoder Representations from Transformers (BERT) models and its variants to classify the text as sarcastic or non-sarcastic in English and Arabic languages. The data is preprocessed and fed to the model for training. The transformer models learn the weights during the training phase from the given dataset and predicts the output class labels for the unseen test data.</abstract>
       <url hash="1264358f">2022.semeval-1.118</url>
@@ -1457,7 +1457,7 @@
     <paper id="121">
       <title><fixed-case>U</fixed-case>o<fixed-case>R</fixed-case>-<fixed-case>NCL</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2022 Task 6: Using ensemble loss with <fixed-case>BERT</fixed-case> for intended sarcasm detection</title>
       <author><first>Emmanuel</first><last>Osei-Brefo</last></author>
-      <author><first>Huizhi</first><last>Liang</last></author>
+      <author id="huizhi-liang"><first>Huizhi</first><last>Liang</last></author>
       <pages>871-876</pages>
       <abstract>Sarcasm has gained notoriety for being difficult to detect by machine learning systems due to its figurative nature. In this paper, Bidirectional Encoder Representations from Transformers (BERT) model has been used with ensemble loss made of cross-entropy loss and negative log-likelihood loss to classify whether a given sentence is in English and Arabic tweets are sarcastic or not. From the results obtained in the experiments, our proposed BERT with ensemble loss achieved superior performance when applied to English and Arabic test datasets. For the validation dataset, our model performed better on the Arabic dataset but failed to outperform the baseline method (made of BERT with only a single loss function) when applied on the English validation set.</abstract>
       <url hash="d3e455b9">2022.semeval-1.121</url>
@@ -1504,7 +1504,7 @@
       <title><fixed-case>A</fixed-case>lex<fixed-case>U</fixed-case>-<fixed-case>AL</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2022 Task 6: Detecting Sarcasm in <fixed-case>A</fixed-case>rabic Text Using Deep Learning Techniques</title>
       <author><first>Aya</first><last>Lotfy</last></author>
       <author><first>Marwan</first><last>Torki</last></author>
-      <author><first>Nagwa</first><last>El-Makky</last></author>
+      <author id="nagwa-m-el-makky"><first>Nagwa</first><last>El-Makky</last></author>
       <pages>891-895</pages>
       <abstract>Sarcasm detection is an important task in Natural Language Understanding. Sarcasm is a form of verbal irony that occurs when there is a discrepancy between the literal and intended meanings of an expression. In this paper, we use the tweets of the Arabic dataset provided by SemEval-2022 task 6 to train deep learning classifiers to solve the sub-tasks A and C associated with the dataset. Sub-task A is to determine if the tweet is sarcastic or not. For sub-task C, given a sarcastic text and its non-sarcastic rephrase, i.e. two texts that convey the same meaning, determine which is the sarcastic one. In our solution, we utilize fine-tuned MARBERT (Abdul-Mageed et al., 2021) model with an added single linear layer on top for classification. The proposed solution achieved 0.5076 F1-sarcastic in Arabic sub-task A, accuracy of 0.7450 and F-score of 0.7442 in Arabic sub-task C. We achieved the <tex-math>2^{nd}</tex-math> and the <tex-math>9^{th}</tex-math> places for Arabic sub-tasks A and C respectively.</abstract>
       <url hash="3aa87ac4">2022.semeval-1.125</url>
@@ -1593,7 +1593,7 @@
       <title><fixed-case>TUG</fixed-case>-<fixed-case>CIC</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2021 Task 6: Two-stage Fine-tuning for Intended Sarcasm Detection</title>
       <author><first>Jason</first><last>Angel</last></author>
       <author><first>Segun</first><last>Aroyehun</last></author>
-      <author><first>Alexander</first><last>Gelbukh</last></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last></author>
       <pages>951-955</pages>
       <abstract>We present our systems and findings for the iSarcasmEval: Intended Sarcasm Detection In English and Arabic at SEMEVAL 2022. Specifically we take part in Subtask A for the English language. The task aims to determine whether a text from social media (a tweet) is sarcastic or not. We model the problem using knowledge sources, a pre-trained language model on sentiment/emotion data and a dataset focused on intended sarcasm. Our submission ranked third place among 43 teams. In addition, we show a brief error analysis of our best model to investigate challenging examples for detecting sarcasm.</abstract>
       <url hash="69b1f615">2022.semeval-1.133</url>
@@ -1627,7 +1627,7 @@
     <paper id="136">
       <title><fixed-case>FII</fixed-case> <fixed-case>UAIC</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2022 Task 6: i<fixed-case>S</fixed-case>arcasm<fixed-case>E</fixed-case>val - Intended Sarcasm Detection in <fixed-case>E</fixed-case>nglish and <fixed-case>A</fixed-case>rabic</title>
       <author><first>Tudor</first><last>Manoleasa</last></author>
-      <author><first>Daniela</first><last>Gifu</last></author>
+      <author id="daniela-gifu"><first>Daniela</first><last>Gifu</last></author>
       <author><first>Iustin</first><last>Sandu</last></author>
       <pages>970-977</pages>
       <abstract>The “iSarcasmEval - Intended Sarcasm Detection in English and Arabic” task at the SemEval 2022 competition focuses on detectingand rating the distinction between intendedand perceived sarcasm in the context of textual sarcasm detection, as well as the level ofirony contained in these texts. In the contextof SemEval, we present a binary classificationmethod which classifies the text as sarcasticor non-sarcastic (task A, for English) based onfive classical machine learning approaches bytrying to train the models based on this datasetsolely (i.e., no other datasets have been used).This process indicates low performance compared to previously studied datasets, which in2dicates that the previous ones might be biased.</abstract>
@@ -1650,7 +1650,7 @@
       <title><fixed-case>LT</fixed-case>3 at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2022 Task 6: Fuzzy-Rough Nearest Neighbor Classification for Sarcasm Detection</title>
       <author><first>Olha</first><last>Kaminska</last></author>
       <author><first>Chris</first><last>Cornelis</last></author>
-      <author><first>Veronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Veronique</first><last>Hoste</last></author>
       <pages>987-992</pages>
       <abstract>This paper describes the approach developed by the LT3 team in the Intended Sarcasm Detection task at SemEval-2022 Task 6. We considered the binary classification subtask A for English data. The presented system is based on the fuzzy-rough nearest neighbor classification method using various text embedding techniques. Our solution reached 9th place in the official leader-board for English subtask A.</abstract>
       <url hash="52148f3b">2022.semeval-1.138</url>
@@ -1734,7 +1734,7 @@
     </paper>
     <paper id="145">
       <title><fixed-case>JCT</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2022 Task 6-A: Sarcasm Detection in Tweets Written in <fixed-case>E</fixed-case>nglish and <fixed-case>A</fixed-case>rabic using Preprocessing Methods and Word N-grams</title>
-      <author><first>Yaakov</first><last>HaCohen-Kerner</last></author>
+      <author id="yaakov-hacohen-kerner"><first>Yaakov</first><last>HaCohen-Kerner</last></author>
       <author><first>Matan</first><last>Fchima</last></author>
       <author><first>Ilan</first><last>Meyrowitsch</last></author>
       <pages>1031-1038</pages>
@@ -1870,7 +1870,7 @@
       <author><first>Fabian</first><last>Flöck</last></author>
       <author><first>Devin</first><last>Gaffney</last></author>
       <author><first>Przemyslaw</first><last>Grabowicz</last></author>
-      <author><first>Scott A.</first><last>Hale</last></author>
+      <author id="scott-a-hale"><first>Scott A.</first><last>Hale</last></author>
       <author><first>David</first><last>Jurgens</last></author>
       <author><first>Mattia</first><last>Samory</last></author>
       <pages>1094-1106</pages>
@@ -1883,7 +1883,7 @@
     <paper id="156">
       <title><fixed-case>EMBEDDIA</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2022 Task 8: Investigating Sentence, Image, and Knowledge Graph Representations for Multilingual News Article Similarity</title>
       <author><first>Elaine</first><last>Zosa</last></author>
-      <author><first>Emanuela</first><last>Boros</last></author>
+      <author id="emanuela-boros"><first>Emanuela</first><last>Boros</last></author>
       <author><first>Boshko</first><last>Koloski</last></author>
       <author><first>Lidia</first><last>Pivovarova</last></author>
       <pages>1107-1113</pages>
@@ -1909,9 +1909,9 @@
     <paper id="158">
       <title><fixed-case>G</fixed-case>ate<fixed-case>NLP</fixed-case>-<fixed-case>US</fixed-case>hef at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2022 Task 8: Entity-Enriched <fixed-case>S</fixed-case>iamese Transformer for Multilingual News Article Similarity</title>
       <author><first>Iknoor</first><last>Singh</last></author>
-      <author id="yue-li"><first>Yue</first><last>Li</last></author>
+      <author><first>Yue</first><last>Li</last></author>
       <author><first>Melissa</first><last>Thong</last></author>
-      <author><first>Carolina</first><last>Scarton</last></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last></author>
       <pages>1121-1128</pages>
       <abstract>This paper describes the second-placed system on the leaderboard of SemEval-2022 Task 8: Multilingual News Article Similarity. We propose an entity-enriched Siamese Transformer which computes news article similarity based on different sub-dimensions, such as the shared narrative, entities, location and time of the event discussed in the news article. Our system exploits a Siamese network architecture using a Transformer encoder to learn document-level representations for the purpose of capturing the narrative together with the auxiliary entity-based features extracted from the news articles. The intuition behind using all these features together is to capture the similarity between news articles at different granularity levels and to assess the extent to which different news outlets write about “the same events”. Our experimental results and detailed ablation study demonstrate the effectiveness and the validity of our proposed method.</abstract>
       <url hash="71abd584">2022.semeval-1.158</url>
@@ -2022,7 +2022,7 @@
       <author><first>Hongqing</first><last>Xu</last></author>
       <author><first>Shuzhe</first><last>Zhou</last></author>
       <author><first>Bohan</first><last>Chen</last></author>
-      <author><first>Chengjie</first><last>Sun</last></author>
+      <author id="cheng-jie-sun"><first>Chengjie</first><last>Sun</last></author>
       <author><first>Yuanchao</first><last>Liu</last></author>
       <pages>1184-1189</pages>
       <abstract>This article introduces a system to solve the SemEval 2022 Task 8: Multilingual News Article Similarity. The task focuses on the consistency of events reported in two news articles. The system consists of a pre-trained model(e.g., INFOXLM and XLM-RoBERTa) to extract multilingual news features, following fully-connected networks to measure the similarity. In addition, data augmentation and Ten Fold Voting are used to enhance the model. Our final submitted model is an ensemble of three base models, with a Pearson value of 0.784 on the test dataset.</abstract>
@@ -2140,7 +2140,7 @@
       <author><first>Kelley</first><last>Lynch</last></author>
       <author><first>Richard</first><last>Brutti</last></author>
       <author><first>Roberto</first><last>Navigli</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <pages>1244-1255</pages>
       <abstract>In this task, we identify a challenge that is reflective of linguistic and cognitive competencies that humans have when speaking and reasoning. Particularly, given the intuition that textual and visual information mutually inform each other for semantic reasoning, we formulate a Competence-based Question Answering challenge, designed to involve rich semantic annotation and aligned text-video objects. The task is to answer questions from a collection of cooking recipes and videos, where each question belongs to a “question family” reflecting a specific reasoning competence. The data and task result is publicly available.</abstract>
       <url hash="fef3fa2c">2022.semeval-1.176</url>
@@ -2153,7 +2153,7 @@
       <author><first>Weihe</first><last>Zhai</last></author>
       <author><first>Mingqiang</first><last>Feng</last></author>
       <author><first>Arkaitz</first><last>Zubiaga</last></author>
-      <author><first>Bingquan</first><last>Liu</last></author>
+      <author id="bingquan-liu"><first>Bingquan</first><last>Liu</last></author>
       <pages>1256-1262</pages>
       <abstract>This paper presents the second place system for the R2VQ: competence-based multimodal question answering shared task. The purpose of this task is to involve semantic&amp;cooking roles and text-images objects when querying how well a system understands the procedure of a recipe. This task is approached with text-to-text generative model based on transformer architecture. As a result, the model can well generalise to soft constrained and other competence-based question answering problem. We propose label enclosed input method which help the model achieve significant improvement from 65.34 (baseline) to 91.3. In addition to describing the submitted system, the impact of model architecture and label selection are investigated along with remarks regarding error analysis. Finally, future works are presented.</abstract>
       <url hash="feb5f7ac">2022.semeval-1.177</url>
@@ -2202,8 +2202,8 @@
       <author><first>Enrica</first><last>Troiano</last></author>
       <author><first>Andrey</first><last>Kutuzov</last></author>
       <author><first>Jan</first><last>Buchmann</last></author>
-      <author><first>Rodrigo</first><last>Agerri</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="rodrigo-agerri"><first>Rodrigo</first><last>Agerri</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <author><first>Erik</first><last>Velldal</last></author>
       <pages>1280-1295</pages>
       <abstract>In this paper, we introduce the first SemEval shared task on Structured Sentiment Analysis, for which participants are required to predict all sentiment graphs in a text, where a single sentiment graph is composed of a sentiment holder, target, expression and polarity. This new shared task includes two subtracks (monolingual and cross-lingual) with seven datasets available in five languages, namely Norwegian, Catalan, Basque, Spanish and English. Participants submitted their predictions on a held-out test set and were evaluated on Sentiment Graph F1 . Overall, the task received over 200 submissions from 32 participating teams. We present the results of the 15 teams that provided system descriptions and our own expanded analysis of the test predictions.</abstract>
@@ -2255,7 +2255,7 @@
       <author><first>Angel</first><last>S</last></author>
       <author><first>Rajalakshmi</first><last>Sivanaiah</last></author>
       <author><first>Sakaya Milton</first><last>Rajendram</last></author>
-      <author><first>Mirnalinee</first><last>T T</last></author>
+      <author id="t-t-mirnalinee"><first>Mirnalinee</first><last>T T</last></author>
       <pages>1324-1328</pages>
       <abstract>Task 10 in SemEval 2022 is a composite task which entails analysis of opinion tuples, and recognition and demarcation of their nature. In this paper, we will elaborate on how such a methodology is implemented, how it is undertaken for a Structured Sentiment Analysis, and the results obtained thereof. To achieve this objective, we have adopted a bi-layered BiLSTM approach. In our research, a variation on the norm has been effected towards enhancement of accuracy, by basing the categorization meted out to an individual member as a by-product of its adjacent members, using specialized algorithms to ensure the veracity of the output, which has been modelled to be the holistically most accurate label for the entire sequence. Such a strategy is superior in terms of its parsing accuracy and requires less time. This manner of action has yielded an SF1 of 0.33 in the highest-performing configuration.</abstract>
       <url hash="0e841f0d">2022.semeval-1.184</url>
@@ -2399,7 +2399,7 @@
     </paper>
     <paper id="196">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2022 Task 11: Multilingual Complex Named Entity Recognition (<fixed-case>M</fixed-case>ulti<fixed-case>C</fixed-case>o<fixed-case>NER</fixed-case>)</title>
-      <author><first>Shervin</first><last>Malmasi</last></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></author>
       <author><first>Anjie</first><last>Fang</last></author>
       <author><first>Besnik</first><last>Fetahu</last></author>
       <author><first>Sudipta</first><last>Kar</last></author>
@@ -2442,7 +2442,7 @@
       <author><first>Renzo M.</first><last>Rivera-Zavala</last></author>
       <author><first>Paloma</first><last>Martinez</last></author>
       <author><first>Claudia</first><last>Moro</last></author>
-      <author><first>Emerson</first><last>Paraiso</last></author>
+      <author id="emerson-cabrera-paraiso"><first>Emerson</first><last>Paraiso</last></author>
       <pages>1448-1456</pages>
       <abstract>This study introduces the system submitted to the SemEval 2022 Task 11: MultiCoNER (Multilingual Complex Named Entity Recognition) by the UC3M-PUCPR team. We proposed an ensemble of transformer-based models for entity recognition in cross-domain texts. Our deep learning method benefits from the transformer architecture, which adopts the attention mechanism to handle the long-range dependencies of the input text. Also, the ensemble approach for named entity recognition (NER) improved the results over baselines based on individual models on two of the three tracks we participated in. The ensemble model for the code-mixed task achieves an overall performance of 76.36% F1-score, a 2.85 percentage point increase upon our individually best model for this task, XLM-RoBERTa-large (73.51%), outperforming the baseline provided for the shared task by 18.26 points. Our preliminary results suggest that contextualized language models ensembles can, even if modestly, improve the results in extracting information from unstructured data.</abstract>
       <url hash="8f3067f6">2022.semeval-1.199</url>
@@ -2520,7 +2520,7 @@
     </paper>
     <paper id="205">
       <title>Sliced at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2022 Task 11: Bigger, Better? Massively Multilingual <fixed-case>LM</fixed-case>s for Multilingual Complex <fixed-case>NER</fixed-case> on an Academic <fixed-case>GPU</fixed-case> Budget</title>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>1494-1500</pages>
       <abstract>Massively multilingual language models (MMLMs) have become a widely-used representation method, and multiple large MMLMs were proposed in recent years. A trend is to train MMLMs on larger text corpora or with more layers. In this paper we set out to test recent popular MMLMs on detecting semantically ambiguous and complex named entities with an academic GPU budget. Our submission of a single model for 11 languages on the SemEval Task 11 MultiCoNER shows that a vanilla transformer-CRF with XLM-R<tex-math>_{large}</tex-math> outperforms the more recent RemBERT, ranking 9th from 26 submissions in the multilingual track. Compared to RemBERT, the XLM-R model has the additional advantage to fit on a slice of a multi-instance GPU. As contrary to expectations and recent findings, we found RemBERT to not be the best MMLM, we further set out to investigate this discrepancy with additional experiments on multilingual Wikipedia NER data. While we expected RemBERT to have an edge on that dataset as it is closer to its pre-training data, surprisingly, our results show that this is not the case, suggesting that text domain match does not explain the discrepancy.</abstract>
       <url hash="ab2c8d56">2022.semeval-1.205</url>
@@ -2779,9 +2779,9 @@
     </paper>
     <paper id="225">
       <title>L3i at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2022 Task 11: Straightforward Additional Context for Multilingual Named Entity Recognition</title>
-      <author><first>Emanuela</first><last>Boros</last></author>
+      <author id="emanuela-boros"><first>Emanuela</first><last>Boros</last></author>
       <author><first>Carlos-Emiliano</first><last>González-Gallardo</last></author>
-      <author><first>Jose</first><last>Moreno</last></author>
+      <author id="jose-g-moreno"><first>Jose</first><last>Moreno</last></author>
       <author><first>Antoine</first><last>Doucet</last></author>
       <pages>1630-1638</pages>
       <abstract>This paper summarizes the participation of the L3i laboratory of the University of La Rochelle in the SemEval-2022 Task 11, Multilingual Complex Named Entity Recognition (MultiCoNER). The task focuses on detecting semantically ambiguous and complex entities in short and low-context monolingual and multilingual settings. We argue that using a language-specific and a multilingual language model could improve the performance of multilingual and mixed NER. Also, we consider that using additional contexts from the training set could improve the performance of a NER on short texts. Thus, we propose a straightforward technique for generating additional contexts with and without the presence of entities. Our findings suggest that, in our internal experimental setup, this approach is promising. However, we ranked above average for the high-resource languages and lower than average for low-resource and multilingual models.</abstract>
diff --git a/data/xml/2022.sigdial.xml b/data/xml/2022.sigdial.xml
index 51f2371347..f5a388b253 100644
--- a/data/xml/2022.sigdial.xml
+++ b/data/xml/2022.sigdial.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the 23rd Annual Meeting of the Special Interest Group on Discourse and Dialogue</booktitle>
       <editor><first>Oliver</first><last>Lemon</last></editor>
-      <editor><first>Dilek</first><last>Hakkani-Tur</last></editor>
+      <editor id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></editor>
       <editor><first>Junyi Jessy</first><last>Li</last></editor>
       <editor><first>Arash</first><last>Ashrafzadeh</last></editor>
       <editor><first>Daniel Hernández</first><last>Garcia</last></editor>
@@ -48,7 +48,7 @@
     <paper id="3">
       <title>Knowledge-Grounded Conversational Data Augmentation with Generative Conversational Networks</title>
       <author><first>Yen Ting</first><last>Lin</last></author>
-      <author><first>Alexandros</first><last>Papangelis</last></author>
+      <author id="alexandros-papangelis"><first>Alexandros</first><last>Papangelis</last></author>
       <author><first>Seokhwan</first><last>Kim</last></author>
       <author><first>Dilek</first><last>Hakkani-Tur</last></author>
       <pages>26–38</pages>
@@ -62,7 +62,7 @@
       <title>Guiding the Release of Safer <fixed-case>E</fixed-case>2<fixed-case>E</fixed-case> Conversational <fixed-case>AI</fixed-case> through Value Sensitive Design</title>
       <author><first>A. Stevie</first><last>Bergman</last></author>
       <author><first>Gavin</first><last>Abercrombie</last></author>
-      <author><first>Shannon</first><last>Spruit</last></author>
+      <author id="shannon-l-spruit"><first>Shannon</first><last>Spruit</last></author>
       <author><first>Dirk</first><last>Hovy</last></author>
       <author><first>Emily</first><last>Dinan</last></author>
       <author><first>Y-Lan</first><last>Boureau</last></author>
@@ -122,7 +122,7 @@
     <paper id="9">
       <title><fixed-case>Q</fixed-case>uality<fixed-case>A</fixed-case>dapt: an Automatic Dialogue Quality Estimation Framework</title>
       <author><first>John</first><last>Mendonca</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <author><first>Isabel</first><last>Trancoso</last></author>
       <pages>83–90</pages>
       <abstract>Despite considerable advances in open-domain neural dialogue systems, their evaluation remains a bottleneck. Several automated metrics have been proposed to evaluate these systems, however, they mostly focus on a single notion of quality, or, when they do combine several sub-metrics, they are computationally expensive. This paper attempts to solve the latter: QualityAdapt leverages the Adapter framework for the task of Dialogue Quality Estimation. Using well defined semi-supervised tasks, we train adapters for different subqualities and score generated responses with AdapterFusion. This compositionality provides an easy to adapt metric to the task at hand that incorporates multiple subqualities. It also reduces computational costs as individual predictions of all subqualities are obtained in a single forward pass. This approach achieves comparable results to state-of-the-art metrics on several datasets, whilst keeping the previously mentioned advantages.</abstract>
@@ -134,8 +134,8 @@
       <title>Graph Neural Network Policies and Imitation Learning for Multi-Domain Task-Oriented Dialogues</title>
       <author><first>Thibault</first><last>Cordier</last></author>
       <author><first>Tanguy</first><last>Urvoy</last></author>
-      <author><first>Fabrice</first><last>Lefèvre</last></author>
-      <author><first>Lina M.</first><last>Rojas Barahona</last></author>
+      <author id="fabrice-lefevre"><first>Fabrice</first><last>Lefèvre</last></author>
+      <author id="lina-m-rojas-barahona"><first>Lina M.</first><last>Rojas Barahona</last></author>
       <pages>91–100</pages>
       <abstract>Task-oriented dialogue systems are designed to achieve specific goals while conversing with humans. In practice, they may have to handle simultaneously several domains and tasks. The dialogue manager must therefore be able to take into account domain changes and plan over different domains/tasks in order to deal with multi-domain dialogues. However, learning with reinforcement in such context becomes difficult because the state-action dimension is larger while the reward signal remains scarce. Our experimental results suggest that structured policies based on graph neural networks combined with different degrees of imitation learning can effectively handle multi-domain dialogues. The reported experiments underline the benefit of structured policies over standard policies.</abstract>
       <url hash="6976778a">2022.sigdial-1.10</url>
@@ -172,7 +172,7 @@
       <title>Dialog Acts for Task Driven Embodied Agents</title>
       <author><first>Spandana</first><last>Gella</last></author>
       <author><first>Aishwarya</first><last>Padmakumar</last></author>
-      <author><first>Patrick</first><last>Lange</last></author>
+      <author id="patrick-l-lange"><first>Patrick</first><last>Lange</last></author>
       <author><first>Dilek</first><last>Hakkani-Tur</last></author>
       <pages>111–123</pages>
       <abstract>Embodied agents need to be able to interact in natural language – understanding task descriptions and asking appropriate follow up questions to obtain necessary information to be effective at successfully accomplishing tasks for a wide range of users. In this work, we propose a set of dialog acts for modelling such dialogs and annotate the TEACh dataset that includes over 3,000 situated, task oriented conversations (consisting of 39.5k utterances in total) with dialog acts. To our knowledge,TEACh-DA is the first large scale dataset of dialog act annotations for embodied task completion. Furthermore, we demonstrate the use of this annotated dataset in training models for tagging the dialog acts of a given utterance, predicting the dialog act of the next response given a dialog history, and use the dialog acts to guide agent’s non-dialog behaviour. In particular, our experiments on the TEACh Execution from Dialog History task where the model predicts the sequence of low level actions to be executed in the environment for embodied task completion, demonstrate that dialog acts can improve end performance by up to 2 points compared to the system without dialog acts.</abstract>
@@ -185,7 +185,7 @@
       <title>Symbol and Communicative Grounding through Object Permanence with a Mobile Robot</title>
       <author><first>Josue</first><last>Torres-Fonseca</last></author>
       <author><first>Catherine</first><last>Henry</last></author>
-      <author><first>Casey</first><last>Kennington</last></author>
+      <author id="casey-kennington"><first>Casey</first><last>Kennington</last></author>
       <pages>124–134</pages>
       <abstract>Object permanence is the ability to form and recall mental representations of objects even when they are not in view. Despite being a crucial developmental step for children, object permanence has had only some exploration as it relates to symbol and communicative grounding in spoken dialogue systems. In this paper, we leverage SLAM as a module for tracking object permanence and use a robot platform to move around a scene where it discovers objects and learns how they are denoted. We evaluated by comparing our system’s effectiveness at learning words from human dialogue partners both with and without object permanence. We found that with object permanence, human dialogue partners spoke with the robot and the robot correctly identified objects it had learned about significantly more than without object permanence, which suggests that object permanence helped facilitate communicative and symbol grounding.</abstract>
       <url hash="59987826">2022.sigdial-1.14</url>
@@ -215,7 +215,7 @@
       <author><first>Pan</first><last>Lu</last></author>
       <author><first>Weiyan</first><last>Shi</last></author>
       <author><first>Zhou</first><last>Yu</last></author>
-      <author><first>Song-Chun</first><last>Zhu</last></author>
+      <author id="song-chun-zhu"><first>Song-Chun</first><last>Zhu</last></author>
       <pages>146–158</pages>
       <abstract>Building a socially intelligent agent involves many challenges. One of which is to track the agent’s mental state transition and teach the agent to make decisions guided by its value like a human. Towards this end, we propose to incorporate mental state simulation and value modeling into dialogue agents. First, we build a hybrid mental state parser that extracts information from both the dialogue and event observations and maintains a graphical representation of the agent’s mind; Meanwhile, the transformer-based value model learns human preferences from the human value dataset, ValueNet. Empirical results show that the proposed model attains state-of-the-art performance on the dialogue/action/emotion prediction task in the fantasy text-adventure game dataset, LIGHT. We also show example cases to demonstrate: (i) how the proposed mental state parser can assist the agent’s decision by grounding on the context like locations and objects, and (ii) how the value model can help the agent make decisions based on its personal priorities.</abstract>
       <url hash="94ce9ca6">2022.sigdial-1.16</url>
@@ -227,7 +227,7 @@
       <title>Automatic Verbal Depiction of a Brick Assembly for a Robot Instructing Humans</title>
       <author><first>Rami</first><last>Younes</last></author>
       <author><first>Gérard</first><last>Bailly</last></author>
-      <author><first>Frederic</first><last>Elisei</last></author>
+      <author id="frederic-elisei"><first>Frederic</first><last>Elisei</last></author>
       <author><first>Damien</first><last>Pellier</last></author>
       <pages>159–171</pages>
       <abstract>Verbal and nonverbal communication skills are essential for human-robot interaction, in particular when the agents are involved in a shared task. We address the specific situation when the robot is the only agent knowing about the plan and the goal of the task and has to instruct the human partner. The case study is a brick assembly. We here describe a multi-layered verbal depictor whose semantic, syntactic and lexical settings have been collected and evaluated via crowdsourcing. One crowdsourced experiment involves a robot instructed pick-and-place task. We show that implicitly referring to achieved subgoals (stairs, pillows, etc) increases performance of human partners.</abstract>
@@ -251,7 +251,7 @@
       <title><fixed-case>EDU</fixed-case>-<fixed-case>AP</fixed-case>: Elementary Discourse Unit based Argument Parser</title>
       <author><first>Sougata</first><last>Saha</last></author>
       <author><first>Souvik</first><last>Das</last></author>
-      <author><first>Rohini</first><last>Srihari</last></author>
+      <author id="rohini-k-srihari"><first>Rohini</first><last>Srihari</last></author>
       <pages>183–192</pages>
       <abstract>Neural approaches to end-to-end argument mining (AM) are often formulated as dependency parsing (DP), which relies on token-level sequence labeling and intricate post-processing for extracting argumentative structures from text. Although such methods yield reasonable results, operating solely with tokens increases the possibility of discontinuous and overly segmented structures due to minor inconsistencies in token level predictions. In this paper, we propose EDU-AP, an end-to-end argument parser, that alleviates such problems in dependency-based methods by exploiting the intrinsic relationship between elementary discourse units (EDUs) and argumentative discourse units (ADUs) and operates at both token and EDU level granularity. Further, appropriately using contextual information, along with optimizing a novel objective function during training, EDU-AP achieves significant improvements across all four tasks of AM compared to existing dependency-based methods.</abstract>
       <url hash="be3f24cb">2022.sigdial-1.19</url>
@@ -276,7 +276,7 @@
       <author><first>Qingyang</first><last>Wu</last></author>
       <author><first>Song</first><last>Feng</last></author>
       <author><first>Derek</first><last>Chen</last></author>
-      <author><first>Sachindra</first><last>Joshi</last></author>
+      <author id="sachindra-joshi"><first>Sachindra</first><last>Joshi</last></author>
       <author><first>Luis</first><last>Lastras</last></author>
       <author><first>Zhou</first><last>Yu</last></author>
       <pages>204–216</pages>
@@ -290,7 +290,7 @@
       <title>When can <fixed-case>I</fixed-case> Speak? Predicting initiation points for spoken dialogue agents</title>
       <author><first>Siyan</first><last>Li</last></author>
       <author><first>Ashwin</first><last>Paranjape</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <pages>217–224</pages>
       <abstract>Current spoken dialogue systems initiate their turns after a long period of silence (700-1000ms), which leads to little real-time feedback, sluggish responses, and an overall stilted conversational flow. Humans typically respond within 200ms and successfully predicting initiation points in advance would allow spoken dialogue agents to do the same. In this work, we predict the lead-time to initiation using prosodic features from a pre-trained speech representation model (wav2vec 1.0) operating on user audio and word features from a pre-trained language model (GPT-2) operating on incremental transcriptions. To evaluate errors, we propose two metrics w.r.t. predicted and true lead times. We train and evaluate the models on the Switchboard Corpus and find that our method outperforms features from prior work on both metrics and vastly outperforms the common approach of waiting for 700ms of silence.</abstract>
       <url hash="892c5350">2022.sigdial-1.22</url>
@@ -300,7 +300,7 @@
     </paper>
     <paper id="23">
       <title>Using Interaction Style Dimensions to Characterize Spoken Dialog Corpora</title>
-      <author><first>Nigel</first><last>Ward</last></author>
+      <author id="nigel-ward"><first>Nigel</first><last>Ward</last></author>
       <pages>225–230</pages>
       <abstract>The construction of spoken dialog systems today relies heavily on appropriate corpora, but corpus selection is more an art than a science. As interaction style properties govern many aspects of dialog, they have the potential to be useful for relating and comparing corpora. This paper overviews a recently-developed model of interaction styles and shows how it can be used to identify relevant corpus differences, estimate corpus similarity, and flag likely outlier dialogs.</abstract>
       <url hash="08bd9c4b">2022.sigdial-1.23</url>
@@ -349,7 +349,7 @@
     <paper id="27">
       <title>How Well Do You Know Your Audience? Toward Socially-aware Question Generation</title>
       <author><first>Ian</first><last>Stewart</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>255–269</pages>
       <abstract>When writing, a person may need to anticipate questions from their audience, but different social groups may ask very different types of questions. If someone is writing about a problem they want to resolve, what kind of follow-up question will a domain expert ask, and could the writer better address the expert’s information needs by rewriting their original post? In this paper, we explore the task of socially-aware question generation. We collect a data set of questions and posts from social media, including background information about the question-askers’ social groups. We find that different social groups, such as experts and novices, consistently ask different types of questions. We train several text-generation models that incorporate social information, and we find that a discrete social-representation model outperforms the text-only model when different social groups ask highly different questions from one another. Our work provides a framework for developing text generation models that can help writers anticipate the information expectations of highly different social groups.</abstract>
       <url hash="70ee50d7">2022.sigdial-1.27</url>
@@ -365,7 +365,7 @@
       <author><first>Nurul</first><last>Lubis</last></author>
       <author><first>Carel</first><last>van Niekerk</last></author>
       <author><first>Michael</first><last>Heck</last></author>
-      <author><first>Milica</first><last>Gasic</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gasic</last></author>
       <pages>270–282</pages>
       <abstract>User simulators (USs) are commonly used to train task-oriented dialogue systems via reinforcement learning. The interactions often take place on semantic level for efficiency, but there is still a gap from semantic actions to natural language, which causes a mismatch between training and deployment environment. Incorporating a natural language generation (NLG) module with USs during training can partly deal with this problem. However, since the policy and NLG of USs are optimised separately, these simulated user utterances may not be natural enough in a given context. In this work, we propose a generative transformer-based user simulator (GenTUS). GenTUS consists of an encoder-decoder structure, which means it can optimise both the user policy and natural language generation jointly. GenTUS generates both semantic actions and natural language utterances, preserving interpretability and enhancing language variation. In addition, by representing the inputs and outputs as word sequences and by using a large pre-trained language model we can achieve generalisability in feature representation. We evaluate GenTUS with automatic metrics and human evaluation. Our results show that GenTUS generates more natural language and is able to transfer to an unseen ontology in a zero-shot fashion. In addition, its behaviour can be further shaped with reinforcement learning opening the door to training specialised user simulators.</abstract>
       <url hash="2ddcf18e">2022.sigdial-1.28</url>
@@ -411,7 +411,7 @@
     <paper id="32">
       <title>Structured Dialogue Discourse Parsing</title>
       <author><first>Ta-Chung</first><last>Chi</last></author>
-      <author><first>Alexander</first><last>Rudnicky</last></author>
+      <author id="alexander-rudnicky"><first>Alexander</first><last>Rudnicky</last></author>
       <pages>325–335</pages>
       <abstract>Dialogue discourse parsing aims to uncover the internal structure of a multi-participant conversation by finding all the discourse <i>links</i> and corresponding <i>relations</i>. Previous work either treats this task as a series of independent multiple-choice problems, in which the link existence and relations are decoded separately, or the encoding is restricted to only local interaction, ignoring the holistic structural information. In contrast, we propose a principled method that improves upon previous work from two perspectives: encoding and decoding. From the encoding side, we perform structured encoding on the adjacency matrix followed by the matrix-tree learning algorithm, where all discourse links and relations in the dialogue are jointly optimized based on latent tree-level distribution. From the decoding side, we perform structured inference using the modified Chiu-Liu-Edmonds algorithm, which explicitly generates the labeled multi-root non-projective spanning tree that best captures the discourse structure. In addition, unlike in previous work, we do not rely on hand-crafted features; this improves the model’s robustness. Experiments show that our method achieves new state-of-the-art, surpassing the previous model by 2.3 on STAC and 1.5 on Molweni (F1 scores).</abstract>
       <url hash="110e709a">2022.sigdial-1.32</url>
@@ -422,8 +422,8 @@
     <paper id="33">
       <title>“Do you follow me?”: A Survey of Recent Approaches in Dialogue State Tracking</title>
       <author><first>Léo</first><last>Jacqmin</last></author>
-      <author><first>Lina M.</first><last>Rojas Barahona</last></author>
-      <author><first>Benoit</first><last>Favre</last></author>
+      <author id="lina-m-rojas-barahona"><first>Lina M.</first><last>Rojas Barahona</last></author>
+      <author id="benoit-favre"><first>Benoit</first><last>Favre</last></author>
       <pages>336–350</pages>
       <abstract>While communicating with a user, a task-oriented dialogue system has to track the user’s needs at each turn according to the conversation history. This process called dialogue state tracking (DST) is crucial because it directly informs the downstream dialogue policy. DST has received a lot of interest in recent years with the text-to-text paradigm emerging as the favored approach. In this review paper, we first present the task and its associated datasets. Then, considering a large number of recent publications, we identify highlights and advances of research in 2021-2022. Although neural approaches have enabled significant progress, we argue that some critical aspects of dialogue systems such as generalizability are still underexplored. To motivate future studies, we propose several research avenues.</abstract>
       <url hash="2b44ee2d">2022.sigdial-1.33</url>
@@ -457,7 +457,7 @@
     <paper id="36">
       <title>Getting Better Dialogue Context for Knowledge Identification by Leveraging Document-level Topic Shift</title>
       <author><first>Nhat</first><last>Tran</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <pages>368–375</pages>
       <abstract>To build a goal-oriented dialogue system that can generate responses given a knowledge base, identifying the relevant pieces of information to be grounded in is vital. When the number of documents in the knowledge base is large, retrieval approaches are typically used to identify the top relevant documents. However, most prior work simply uses an entire dialogue history to guide retrieval, rather than exploiting a dialogue’s topical structure. In this work, we examine the importance of building the proper contextualized dialogue history when document-level topic shifts are present. Our results suggest that excluding irrelevant turns from the dialogue history (e.g., excluding turns not grounded in the same document as the current turn) leads to better retrieval results. We also propose a cascading approach utilizing the topical nature of a knowledge-grounded conversation to further manipulate the dialogue history used as input to the retrieval models.</abstract>
       <url hash="223b0f0c">2022.sigdial-1.36</url>
@@ -487,7 +487,7 @@
       <author><first>Jillian</first><last>Tang</last></author>
       <author><first>Avanika</first><last>Narayan</last></author>
       <author><first>Giovanni</first><last>Campagna</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <pages>376–395</pages>
       <abstract>We present Chirpy Cardinal, an open-domain social chatbot. Aiming to be both informative and conversational, our bot chats with users in an authentic, emotionally intelligent way. By integrating controlled neural generation with scaffolded, hand-written dialogue, we let both the user and bot take turns driving the conversation, producing an engaging and socially fluent experience. Deployed in the fourth iteration of the Alexa Prize Socialbot Grand Challenge, Chirpy Cardinal handled thousands of conversations per day, placing second out of nine bots with an average user rating of 3.58/5.</abstract>
       <url hash="6883106a">2022.sigdial-1.37</url>
@@ -523,7 +523,7 @@
     <paper id="40">
       <title>Entity-based De-noising Modeling for Controllable Dialogue Summarization</title>
       <author><first>Zhengyuan</first><last>Liu</last></author>
-      <author><first>Nancy</first><last>Chen</last></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last></author>
       <pages>407–418</pages>
       <abstract>Although fine-tuning pre-trained backbones produces fluent and grammatically-correct text in various language generation tasks, factual consistency in abstractive summarization remains challenging. This challenge is especially thorny for dialogue summarization, where neural models often make inaccurate associations between personal named entities and their respective actions. To tackle this type of hallucination, we present an entity-based de-noising model via text perturbation on reference summaries. We then apply this proposed approach in beam search validation, conditional training augmentation, and inference post-editing. Experimental results on the SAMSum corpus show that state-of-the-art models equipped with our proposed method achieve generation quality improvement in both automatic evaluation and human assessment.</abstract>
       <url hash="6c4f785f">2022.sigdial-1.40</url>
@@ -605,7 +605,7 @@
       <author><first>Carel</first><last>van Niekerk</last></author>
       <author><first>Michael</first><last>Heck</last></author>
       <author><first>Shutong</first><last>Feng</last></author>
-      <author><first>Milica</first><last>Gasic</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gasic</last></author>
       <pages>478–489</pages>
       <abstract>Task-oriented dialogue systems aim to fulfill user goals through natural language interactions. They are ideally evaluated with human users, which however is unattainable to do at every iteration of the development phase. Simulated users could be an alternative, however their development is nontrivial. Therefore, researchers resort to offline metrics on existing human-human corpora, which are more practical and easily reproducible. They are unfortunately limited in reflecting real performance of dialogue systems. BLEU for instance is poorly correlated with human judgment, and existing corpus-based metrics such as success rate overlook dialogue context mismatches. There is still a need for a reliable metric for task-oriented systems with good generalization and strong correlation with human judgements. In this paper, we propose the use of offline reinforcement learning for dialogue evaluation based on static data. Such an evaluator is typically called a critic and utilized for policy optimization. We go one step further and show that offline RL critics can be trained for any dialogue system as external evaluators, allowing dialogue performance comparisons across various types of systems. This approach has the benefit of being corpus- and model-independent, while attaining strong correlation with human judgements, which we confirm via an interactive user trial.</abstract>
       <url hash="9a60ee03">2022.sigdial-1.46</url>
@@ -621,7 +621,7 @@
       <author><first>Bradford</first><last>Mott</last></author>
       <author><first>Krista</first><last>Glazewski</last></author>
       <author><first>Cindy E.</first><last>Hmelo-Silver</last></author>
-      <author><first>James</first><last>Lester</last></author>
+      <author id="james-lester"><first>James</first><last>Lester</last></author>
       <pages>490–499</pages>
       <abstract>Accurate detection and appropriate handling of disruptive talk in multi-party dialogue is essential for users to achieve shared goals. In collaborative game-based learning environments, detecting and attending to disruptive talk holds significant potential since it can cause distraction and produce negative learning experiences for students. We present a novel attention-based user-aware neural architecture for disruptive talk detection that uses a sequence dropout-based regularization mechanism. The disruptive talk detection models are evaluated with multi-party dialogue collected from 72 middle school students who interacted with a collaborative game-based learning environment. Our proposed disruptive talk detection model significantly outperforms competitive baseline approaches and shows significant potential for helping to support effective collaborative learning experiences.</abstract>
       <url hash="f9769efc">2022.sigdial-1.47</url>
@@ -634,7 +634,7 @@
       <author><first>Symon</first><last>Stevens-Guille</last></author>
       <author><first>Aleksandre</first><last>Maskharashvili</last></author>
       <author><first>Xintong</first><last>Li</last></author>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <pages>500–515</pages>
       <abstract>We report results of experiments using BART (Lewis et al., 2019) and the Penn Discourse Tree Bank (Webber et al., 2019) (PDTB) to generate texts with correctly realized discourse relations. We address a question left open by previous research (Yung et al., 2021; Ko and Li, 2020) concerning whether conditioning the model on the intended discourse relation—which corresponds to adding explicit discourse relation information into the input to the model—improves its performance. Our results suggest that including discourse relation information in the input of the model significantly improves the consistency with which it produces a correctly realized discourse relation in the output. We compare our models’ performance to known results concerning the discourse structures found in written text and their possible explanations in terms of discourse interpretation strategies hypothesized in the psycholinguistics literature. Our findings suggest that natural language generation models based on current pre-trained Transformers will benefit from infusion with discourse level information if they aim to construct discourses with the intended relations.</abstract>
       <url hash="6ecde446">2022.sigdial-1.48</url>
@@ -647,7 +647,7 @@
       <author><first>Xiaoying</first><last>Zhang</last></author>
       <author><first>Baolin</first><last>Peng</last></author>
       <author><first>Jianfeng</first><last>Gao</last></author>
-      <author><first>Helen</first><last>Meng</last></author>
+      <author id="helen-meng"><first>Helen</first><last>Meng</last></author>
       <pages>516–530</pages>
       <abstract>End-to-end task bots are typically learned over a static and usually limited-size corpus. However, when deployed in dynamic, changing, and open environments to interact with users, task bots tend to fail when confronted with data that deviate from the training corpus, i.e., out-of-distribution samples. In this paper, we study the problem of automatically adapting task bots to changing environments by learning from human-bot interactions with minimum or zero human annotations. We propose SL-Agent, a novel self-learning framework for building end-to-end task bots. SL-Agent consists of a dialog model and a pre-trained reward model to predict the quality of an agent response. It enables task bots to automatically adapt to changing environments by learning from the unlabeled human-bot dialog logs accumulated after deployment via reinforcement learning with the incorporated reward model. Experimental results on four well-studied dialog tasks show the effectiveness of SL-Agent to automatically adapt to changing environments, using both automatic and human evaluations. We will release code and data for further research.</abstract>
       <url hash="35fba05f">2022.sigdial-1.49</url>
@@ -696,10 +696,10 @@
       <title>Dialogue Term Extraction using Transfer Learning and Topological Data Analysis</title>
       <author><first>Renato</first><last>Vukovic</last></author>
       <author><first>Michael</first><last>Heck</last></author>
-      <author><first>Benjamin</first><last>Ruppik</last></author>
+      <author id="benjamin-matthias-ruppik"><first>Benjamin</first><last>Ruppik</last></author>
       <author><first>Carel</first><last>van Niekerk</last></author>
       <author><first>Marcus</first><last>Zibrowius</last></author>
-      <author><first>Milica</first><last>Gasic</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gasic</last></author>
       <pages>564–581</pages>
       <abstract>Goal oriented dialogue systems were originally designed as a natural language interface to a fixed data-set of entities that users might inquire about, further described by domain, slots and values. As we move towards adaptable dialogue systems where knowledge about domains, slots and values may change, there is an increasing need to automatically extract these terms from raw dialogues or related non-dialogue data on a large scale. In this paper, we take an important step in this direction by exploring different features that can enable systems to discover realisations of domains, slots and values in dialogues in a purely data-driven fashion. The features that we examine stem from word embeddings, language modelling features, as well as topological features of the word embedding space. To examine the utility of each feature set, we train a seed model based on the widely used MultiWOZ data-set. Then, we apply this model to a different corpus, the Schema-guided dialogue data-set. Our method outperforms the previously proposed approach that relies solely on word embeddings. We also demonstrate that each of the features is responsible for discovering different kinds of content. We believe our results warrant further research towards ontology induction, and continued harnessing of topological data analysis for dialogue and natural language processing research.</abstract>
       <url hash="151e734f">2022.sigdial-1.53</url>
@@ -747,7 +747,7 @@
     <paper id="57">
       <title>Comparison of Lexical Alignment with a Teachable Robot in Human-Robot and Human-Human-Robot Interactions</title>
       <author><first>Yuya</first><last>Asano</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <author><first>Mingzhi</first><last>Yu</last></author>
       <author><first>Nikki</first><last>Lobczowski</last></author>
       <author><first>Timothy</first><last>Nokes-Malach</last></author>
diff --git a/data/xml/2022.sigmorphon.xml b/data/xml/2022.sigmorphon.xml
index edaa4b5e39..404ddacfc3 100644
--- a/data/xml/2022.sigmorphon.xml
+++ b/data/xml/2022.sigmorphon.xml
@@ -30,7 +30,7 @@
       <author><first>Simon</first><last>Todd</last></author>
       <author><first>Annie</first><last>Huang</last></author>
       <author><first>Jeremy</first><last>Needle</last></author>
-      <author><first>Jennifer</first><last>Hay</last></author>
+      <author id="jennifer-hay"><first>Jennifer</first><last>Hay</last></author>
       <author><first>Jeanette</first><last>King</last></author>
       <pages>12-22</pages>
       <abstract>We present an extension of the Morfessor Baseline model of unsupervised morphological segmentation (Creutz and Lagus, 2007) that incorporates abstract templates for reduplication, a typologically common but computationally underaddressed process. Through a detailed investigation that applies the model to Maori, the ̄ Indigenous language of Aotearoa New Zealand, we show that incorporating templates improves Morfessor’s ability to identify instances of reduplication, and does so most when there are multiple minimally-overlapping templates. We present an error analysis that reveals important factors to consider when applying the extended model and suggests useful future directions.</abstract>
@@ -64,7 +64,7 @@
       <title>A Masked Segmental Language Model for Unsupervised Natural Language Segmentation</title>
       <author><first>C.m.</first><last>Downey</last></author>
       <author><first>Fei</first><last>Xia</last></author>
-      <author><first>Gina-Anne</first><last>Levow</last></author>
+      <author id="gina-anne-levow"><first>Gina-Anne</first><last>Levow</last></author>
       <author><first>Shane</first><last>Steinert-Threlkeld</last></author>
       <pages>39-50</pages>
       <abstract>We introduce a Masked Segmental Language Model (MSLM) for joint language modeling and unsupervised segmentation. While near-perfect supervised methods have been developed for segmenting human-like linguistic units in resource-rich languages such as Chinese, many of the world’s languages are both morphologically complex, and have no large dataset of “gold” segmentations for supervised training. Segmental Language Models offer a unique approach by conducting unsupervised segmentation as the byproduct of a neural language modeling objective. However, current SLMs are limited in their scalability due to their recurrent architecture. We propose a new type of SLM for use in both unsupervised and lightly supervised segmentation tasks. The MSLM is built on a span-masking transformer architecture, harnessing a masked bidirectional modeling context and attention, as well as adding the potential for model scalability. In a series of experiments, our model outperforms the segmentation quality of recurrent SLMs on Chinese, and performs similarly to the recurrent model on English.</abstract>
@@ -86,7 +86,7 @@
     <paper id="7">
       <title>Subword-based Cross-lingual Transfer of Embeddings from <fixed-case>H</fixed-case>indi to <fixed-case>M</fixed-case>arathi and <fixed-case>N</fixed-case>epali</title>
       <author><first>Niyati</first><last>Bafna</last></author>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
       <pages>61-71</pages>
       <abstract>Word embeddings are growing to be a crucial resource in the field of NLP for any language. This work introduces a novel technique for static subword embeddings transfer for Indic languages from a relatively higher resource language to a genealogically related low resource language. We primarily work with HindiMarathi, simulating a low-resource scenario for Marathi, and confirm observed trends on Nepali. We demonstrate the consistent benefits of unsupervised morphemic segmentation on both source and target sides over the treatment performed by fastText. Our best-performing approach uses an EM-style approach to learning bilingual subword embeddings; we also show, for the first time, that a trivial “copyand-paste” embeddings transfer based on even perfect bilingual lexicons is inadequate in capturing language-specific relationships. We find that our approach substantially outperforms the fastText baselines for both Marathi and Nepali on the Word Similarity task as well as WordNetBased Synonymy Tests; on the former task, its performance for Marathi is close to that of pretrained fastText embeddings that use three orders of magnitude more Marathi data.</abstract>
       <url hash="11fa1971">2022.sigmorphon-1.7</url>
@@ -108,8 +108,8 @@
     <paper id="9">
       <title>Domain-Informed Probing of wav2vec 2.0 Embeddings for Phonetic Features</title>
       <author><first>Patrick</first><last>Cormac English</last></author>
-      <author><first>John D.</first><last>Kelleher</last></author>
-      <author><first>Julie</first><last>Carson-Berndsen</last></author>
+      <author id="john-kelleher"><first>John D.</first><last>Kelleher</last></author>
+      <author id="julie-carson-berndsen"><first>Julie</first><last>Carson-Berndsen</last></author>
       <pages>83-91</pages>
       <abstract>In recent years large transformer model architectures have become available which provide a novel means of generating high-quality vector representations of speech audio. These transformers make use of an attention mechanism to generate representations enhanced with contextual and positional information from the input sequence. Previous works have explored the capabilities of these models with regard to performance in tasks such as speech recognition and speaker verification, but there has not been a significant inquiry as to the manner in which the contextual information provided by the transformer architecture impacts the representation of phonetic information within these models. In this paper, we report the results of a number of probing experiments on the representations generated by the wav2vec 2.0 model’s transformer component, with regard to the encoding of phonetic categorization information within the generated embeddings. We find that the contextual information generated by the transformer’s operation results in enhanced capture of phonetic detail by the model, and allows for distinctions to emerge in acoustic data that are otherwise difficult to separate.</abstract>
       <url hash="4f36b2d3">2022.sigmorphon-1.9</url>
@@ -136,7 +136,7 @@
       <author><first>Aryaman</first><last>Arora</last></author>
       <author><first>Viktor</first><last>Martinovic</last></author>
       <author><first>Kyle</first><last>Gorman</last></author>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
       <author><first>Amarsanaa</first><last>Ganbold</last></author>
       <author><first>Šárka</first><last>Dohnalová</last></author>
       <author><first>Magda</first><last>Ševčíková</last></author>
@@ -152,7 +152,7 @@
     </paper>
     <paper id="12">
       <title>Sharing Data by Language Family: Data Augmentation for <fixed-case>R</fixed-case>omance Language Morpheme Segmentation</title>
-      <author><first>Lauren</first><last>Levine</last></author>
+      <author id="lauren-levine"><first>Lauren</first><last>Levine</last></author>
       <pages>117-123</pages>
       <abstract>This paper presents a basic character level sequence-to-sequence approach to morpheme segmentation for the following Romance languages: French, Italian, and Spanish. We experiment with adding a small set of additional linguistic features, as well as with sharing training data between sister languages for morphological categories with low performance in single language base models. We find that while the additional linguistic features were generally not helpful in this instance, data augmentation between sister languages did help to raise the scores of some individual morphological categories, but did not consistently result in an overall improvement when considering the aggregate of the categories.</abstract>
       <url hash="8f2271d6">2022.sigmorphon-1.12</url>
@@ -298,7 +298,7 @@
       <author><first>Changbing</first><last>Yang</last></author>
       <author><first>Ruixin (Ray)</first><last>Yang</last></author>
       <author><first>Garrett</first><last>Nicolai</last></author>
-      <author><first>Miikka</first><last>Silfverberg</last></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last></author>
       <pages>226-235</pages>
       <abstract>This paper presents experiments on morphological inflection using data from the SIGMORPHON-UniMorph 2022 Shared Task 0: Generalization and Typologically Diverse Morphological Inflection. We present a transformer inflection system, which enriches the standard transformer architecture with reverse positional encoding and type embeddings. We further apply data hallucination and lemma copying to augment training data. We train models using a two-stage procedure: (1) We first train on the augmented training data using standard backpropagation and teacher forcing. (2) We then continue training with a variant of the scheduled sampling algorithm dubbed student forcing. Our system delivers competitive performance under the small and large data conditions on the shared task datasets.</abstract>
       <url hash="9ffc6aa7">2022.sigmorphon-1.23</url>
diff --git a/data/xml/2022.signlang.xml b/data/xml/2022.signlang.xml
index 0759a3596c..1d519a4b74 100644
--- a/data/xml/2022.signlang.xml
+++ b/data/xml/2022.signlang.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the LREC2022 10th Workshop on the Representation and Processing of Sign Languages: Multilingual Sign Language Resources</booktitle>
       <editor><first>Eleni</first><last>Efthimiou</last></editor>
-      <editor><first>Stavroula-Evita</first><last>Fotinea</last></editor>
+      <editor id="stavroula-evita-fotinea"><first>Stavroula-Evita</first><last>Fotinea</last></editor>
       <editor><first>Thomas</first><last>Hanke</last></editor>
       <editor><first>Julie A.</first><last>Hochgesang</last></editor>
       <editor><first>Jette</first><last>Kristoffersen</last></editor>
diff --git a/data/xml/2022.sigtyp.xml b/data/xml/2022.sigtyp.xml
index 4d0f71cca9..09b4ab9a78 100644
--- a/data/xml/2022.sigtyp.xml
+++ b/data/xml/2022.sigtyp.xml
@@ -31,7 +31,7 @@
     <paper id="2">
       <title>Word-order Typology in Multilingual <fixed-case>BERT</fixed-case>: A Case Study in Subordinate-Clause Detection</title>
       <author><first>Dmitry</first><last>Nikolaev</last></author>
-      <author><first>Sebastian</first><last>Pado</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Pado</last></author>
       <pages>11-21</pages>
       <abstract>The capabilities and limitations of BERT and similar models are still unclear when it comes to learning syntactic abstractions, in particular across languages. In this paper, we use the task of subordinate-clause detection within and across languages to probe these properties. We show that this task is deceptively simple, with easy gains offset by a long tail of harder cases, and that BERT’s zero-shot performance is dominated by word-order effects, mirroring the SVO/VSO/SOV typology.</abstract>
       <url hash="e3b120cb">2022.sigtyp-1.2</url>
@@ -53,8 +53,8 @@
     </paper>
     <paper id="4">
       <title>Cross-linguistic Comparison of Linguistic Feature Encoding in <fixed-case>BERT</fixed-case> Models for Typologically Different Languages</title>
-      <author><first>Yulia</first><last>Otmakhova</last></author>
-      <author><first>Karin</first><last>Verspoor</last></author>
+      <author id="julia-otmakhova"><first>Yulia</first><last>Otmakhova</last></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last></author>
       <author><first>Jey Han</first><last>Lau</last></author>
       <pages>27-35</pages>
       <abstract>Though recently there have been an increased interest in how pre-trained language models encode different linguistic features, there is still a lack of systematic comparison between languages with different morphology and syntax. In this paper, using BERT as an example of a pre-trained model, we compare how three typologically different languages (English, Korean, and Russian) encode morphology and syntax features across different layers. In particular, we contrast languages which differ in a particular aspect, such as flexibility of word order, head directionality, morphological type, presence of grammatical gender, and morphological richness, across four different tasks.</abstract>
@@ -111,7 +111,7 @@
     <paper id="9">
       <title>Mockingbird at the <fixed-case>SIGTYP</fixed-case> 2022 Shared Task: Two Types of Models for the Prediction of Cognate Reflexes</title>
       <author><first>Christo</first><last>Kirov</last></author>
-      <author><first>Richard</first><last>Sproat</last></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last></author>
       <author><first>Alexander</first><last>Gutkin</last></author>
       <pages>70-79</pages>
       <abstract>The SIGTYP 2022 shared task concerns the problem of word reflex generation in a target language, given cognate words from a subset of related languages. We present two systems to tackle this problem, covering two very different modeling approaches. The first model extends transformer-based encoder-decoder sequence-to-sequence modeling, by encoding all available input cognates in parallel, and having the decoder attend to the resulting joint representation during inference. The second approach takes inspiration from the field of image restoration, where models are tasked with recovering pixels in an image that have been masked out. For reflex generation, the missing reflexes are treated as “masked pixels” in an “image” which is a representation of an entire cognate set across a language family. As in the image restoration case, cognate restoration is performed with a convolutional network.</abstract>
diff --git a/data/xml/2022.sigul.xml b/data/xml/2022.sigul.xml
index 0b0f9a89f4..460a79ce67 100644
--- a/data/xml/2022.sigul.xml
+++ b/data/xml/2022.sigul.xml
@@ -23,7 +23,7 @@
       <author><first>Bolaji</first><last>Yusuf</last></author>
       <author><first>Lucas</first><last>Ondel</last></author>
       <author><first>Aline</first><last>Villavicencio</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <pages>1–9</pages>
       <abstract>Documenting languages helps to prevent the extinction of endangered dialects - many of which are otherwise expected to disappear by the end of the century. When documenting oral languages, unsupervised word segmentation (UWS) from speech is a useful, yet challenging, task. It consists in producing time-stamps for slicing utterances into smaller segments corresponding to words, being performed from phonetic transcriptions, or in the absence of these, from the output of unsupervised speech discretization models. These discretization models are trained using raw speech only, producing discrete speech units that can be applied for downstream (text-based) tasks. In this paper we compare five of these models: three Bayesian and two neural approaches, with regards to the exploitability of the produced units for UWS. For the UWS task, we experiment with two models, using as our target language the Mboshi (Bantu C25), an unwritten language from Congo-Brazzaville. Additionally, we report results for Finnish, Hungarian, Romanian and Russian in equally low-resource settings, using only 4 hours of speech. Our results suggest that neural models for speech discretization are difficult to exploit in our setting, and that it might be necessary to adapt them to limit sequence length. We obtain our best UWS results by using Bayesian models that produce high quality, yet compressed, discrete representations of the input speech signal.</abstract>
       <url hash="0da6a6e9">2022.sigul-1.1</url>
@@ -46,7 +46,7 @@
       <author><first>Phat</first><last>Do</last></author>
       <author><first>Matt</first><last>Coler</last></author>
       <author><first>Jelske</first><last>Dijkstra</last></author>
-      <author><first>Esther</first><last>Klabbers</last></author>
+      <author id="esther-klabbers"><first>Esther</first><last>Klabbers</last></author>
       <pages>16–22</pages>
       <abstract>We propose a new approach for phoneme mapping in cross-lingual transfer learning for text-to-speech (TTS) in under-resourced languages (URLs), using phonological features from the PHOIBLE database and a language-independent mapping rule. This approach was validated through our experiment, in which we pre-trained acoustic models in Dutch, Finnish, French, Japanese, and Spanish, and fine-tuned them with 30 minutes of Frisian training data. The experiment showed an improvement in both naturalness and pronunciation accuracy in the synthesized Frisian speech when our mapping approach was used. Since this improvement also depended on the source language, we then experimented on finding a good criterion for selecting source languages. As an alternative to the traditionally used language family criterion, we tested a novel idea of using Angular Similarity of Phoneme Frequencies (ASPF), which measures the similarity between the phoneme systems of two languages. ASPF was empirically confirmed to be more effective than language family as a criterion for source language selection, and also to affect the phoneme mapping’s effectiveness. Thus, a combination of our phoneme mapping approach and the ASPF measure can be beneficially adopted by other studies involving multilingual or cross-lingual TTS for URLs.</abstract>
       <url hash="0ddc6797">2022.sigul-1.3</url>
@@ -101,7 +101,7 @@
     </paper>
     <paper id="8">
       <title>Quality versus Quantity: Building <fixed-case>C</fixed-case>atalan-<fixed-case>E</fixed-case>nglish <fixed-case>MT</fixed-case> Resources</title>
-      <author><first>Ona</first><last>de Gibert</last></author>
+      <author id="ona-de-gibert"><first>Ona</first><last>de Gibert</last></author>
       <author><first>Ksenia</first><last>Kharitonova</last></author>
       <author><first>Blanca</first><last>Calvo Figueras</last></author>
       <author><first>Jordi</first><last>Armengol-Estapé</last></author>
@@ -123,7 +123,7 @@
     <paper id="10">
       <title><fixed-case>CUNI</fixed-case> Submission to <fixed-case>MT</fixed-case>4<fixed-case>A</fixed-case>ll Shared Task</title>
       <author><first>Ivana</first><last>Kvapilíková</last></author>
-      <author><first>Ondrej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondrej</first><last>Bojar</last></author>
       <pages>78–82</pages>
       <abstract>This paper describes our submission to the MT4All Shared Task in unsupervised machine translation from English to Ukrainian, Kazakh and Georgian in the legal domain. In addition to the standard pipeline for unsupervised training (pretraining followed by denoising and back-translation), we used supervised training on a pseudo-parallel corpus retrieved from the provided mono-lingual corpora. Our system scored significantly higher than the baseline hybrid unsupervised MT system.</abstract>
       <url hash="7083c6d1">2022.sigul-1.10</url>
@@ -189,7 +189,7 @@
       <title>Machine Translation from <fixed-case>S</fixed-case>tandard <fixed-case>G</fixed-case>erman to Alemannic Dialects</title>
       <author><first>Louisa</first><last>Lambrecht</last></author>
       <author><first>Felix</first><last>Schneider</last></author>
-      <author><first>Alexander</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alexander</first><last>Waibel</last></author>
       <pages>129–136</pages>
       <abstract>Machine translation has been researched using deep neural networks in recent years. These networks require lots of data to learn abstract representations of the input stored in continuous vectors. Dialect translation has become more important since the advent of social media. In particular, when dialect speakers and standard language speakers no longer understand each other, machine translation is of rising concern. Usually, dialect translation is a typical low-resourced language setting facing data scarcity problems. Additionally, spelling inconsistencies due to varying pronunciations and the lack of spelling rules complicate translation. This paper presents the best-performing approaches to handle these problems for Alemannic dialects. The results show that back-translation and conditioning on dialectal manifestations achieve the most remarkable enhancement over the baseline. Using back-translation, a significant gain of +4.5 over the strong transformer baseline of 37.3 BLEU points is accomplished. Differentiating between several Alemannic dialects instead of treating Alemannic as one dialect leads to substantial improvements: Multi-dialectal translation surpasses the baseline on the dialectal test sets. However, training individual models outperforms the multi-dialectal approach. There, improvements range from 7.5 to 10.6 BLEU points over the baseline depending on the dialect.</abstract>
       <url hash="ad9f2341">2022.sigul-1.17</url>
@@ -200,7 +200,7 @@
       <author><first>Tadesse Destaw</first><last>Belay</last></author>
       <author><first>Seid Muhie</first><last>Yimam</last></author>
       <author><first>Abinew</first><last>Ayele</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>137–145</pages>
       <abstract>In this work, we build a Question Answering (QA) classification dataset from a social media platform, namely the Telegram public channel called @AskAnythingEthiopia. The channel has more than 78k subscribers and has existed since May 31, 2019. The platform allows asking questions that belong to various domains, like politics, economics, health, education, and so on. Since the questions are posed in a mixed-code, we apply different strategies to pre-process the dataset. Questions are posted in Amharic, English, or Amharic but in a Latin script. As part of the pre-processing tools, we build a Latin to Ethiopic Script transliteration tool. We collect 8k Amharic and 24K transliterated questions and develop deep learning-based questions answering classifiers that attain as high as an F-score of 57.29 in 20 different question classes or categories. The datasets and pre-processing scripts are open-sourced to facilitate further research on the Amharic community-based question answering.</abstract>
       <url hash="85f7fb09">2022.sigul-1.18</url>
@@ -236,7 +236,7 @@
     <paper id="22">
       <title>Building Open-source Speech Technology for Low-resource Minority Languages with <fixed-case>S</fixed-case>á<fixed-case>M</fixed-case>i as an Example – Tools, Methods and Experiments</title>
       <author><first>Katri</first><last>Hiovain-Asikainen</last></author>
-      <author><first>Sjur</first><last>Moshagen</last></author>
+      <author id="sjur-moshagen"><first>Sjur</first><last>Moshagen</last></author>
       <pages>169–175</pages>
       <abstract>This paper presents a work-in-progress report of an open-source speech technology project for indigenous Sami languages. A less detailed description of this work has been presented in a more general paper about the whole GiellaLT language infrastructure, submitted to the LREC 2022 main conference. At this stage, we have designed and collected a text corpus specifically for developing speech technology applications, namely Text-to-speech (TTS) and Automatic speech recognition (ASR) for the Lule and North Sami languages. We have also piloted and experimented with different speech synthesis technologies using a miniature speech corpus as well as developed tools for effective processing of large spoken corpora. Additionally, we discuss effective and mindful use of the speech corpus and also possibilities to use found/archive materials for training an ASR model for these languages.</abstract>
       <url hash="c4536b2b">2022.sigul-1.22</url>
@@ -245,7 +245,7 @@
     <paper id="23">
       <title>Investigating the Quality of Static Anchor Embeddings from Transformers for Under-Resourced Languages</title>
       <author><first>Pranaydeep</first><last>Singh</last></author>
-      <author><first>Orphee</first><last>De Clercq</last></author>
+      <author id="orphee-de-clercq"><first>Orphee</first><last>De Clercq</last></author>
       <author><first>Els</first><last>Lefever</last></author>
       <pages>176–184</pages>
       <abstract>This paper reports on experiments for cross-lingual transfer using the anchor-based approach of Schuster et al. (2019) for English and a low-resourced language, namely Hindi. For the sake of comparison, we also evaluate the approach on three very different higher-resourced languages, viz. Dutch, Russian and Chinese. Initially designed for ELMo embeddings, we analyze the approach for the more recent BERT family of transformers for a variety of tasks, both mono and cross-lingual. The results largely prove that like most other cross-lingual transfer approaches, the static anchor approach is underwhelming for the low-resource language, while performing adequately for the higher resourced ones. We attempt to provide insights into both the quality of the anchors, and the performance for low-shot cross-lingual transfer to better understand this performance gap. We make the extracted anchors and the modified train and test sets available for future research at <url>https://github.com/pranaydeeps/Vyaapak</url></abstract>
diff --git a/data/xml/2022.slpat.xml b/data/xml/2022.slpat.xml
index 985bf76e4b..bd11aafb1f 100644
--- a/data/xml/2022.slpat.xml
+++ b/data/xml/2022.slpat.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Ninth Workshop on Speech and Language Processing for Assistive Technologies (SLPAT-2022)</booktitle>
       <editor><first>Sarah</first><last>Ebling</last></editor>
-      <editor><first>Emily</first><last>Prud’hommeaux</last></editor>
+      <editor id="emily-prudhommeaux"><first>Emily</first><last>Prud’hommeaux</last></editor>
       <editor><first>Preethi</first><last>Vaidyanathan</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Dublin, Ireland</address>
@@ -75,7 +75,7 @@
       <title>Producing <fixed-case>S</fixed-case>tandard <fixed-case>G</fixed-case>erman Subtitles for <fixed-case>S</fixed-case>wiss <fixed-case>G</fixed-case>erman <fixed-case>TV</fixed-case> Content</title>
       <author><first>Johanna</first><last>Gerlach</last></author>
       <author><first>Jonathan</first><last>Mutal</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <pages>37-43</pages>
       <abstract>In this study we compare two approaches (neural machine translation and edit-based) and the use of synthetic data for the task of translating normalised Swiss German ASR output into correct written Standard German for subtitles, with a special focus on syntactic differences. Results suggest that NMT is better suited to this task and that relatively simple rule-based generation of training data could be a valuable approach for cases where little training data is available and transformations are simple.</abstract>
       <url hash="b49f1258">2022.slpat-1.5</url>
@@ -88,7 +88,7 @@
       <author><first>Magali</first><last>Norré</last></author>
       <author><first>Vincent</first><last>Vandeghinste</last></author>
       <author><first>Thomas</first><last>François</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <pages>44-49</pages>
       <abstract>Communication between physician and patients can lead to misunderstandings, especially for disabled people. An automatic system that translates natural language into a pictographic language is one of the solutions that could help to overcome this issue. In this preliminary study, we present the French version of a translation system using the Arasaac pictographs and we investigate the strategies used by speech therapists to translate into pictographs. We also evaluate the medical coverage of this tool for translating physician questions and patient instructions.</abstract>
       <url hash="e8cd017e">2022.slpat-1.6</url>
@@ -120,9 +120,9 @@
     </paper>
     <paper id="9">
       <title><fixed-case>C</fixed-case>ue<fixed-case>B</fixed-case>ot: Cue-Controlled Response Generation for Assistive Interaction Usages</title>
-      <author><first>Shachi</first><last>H. Kumar</last></author>
+      <author id="shachi-h-kumar"><first>Shachi</first><last>H. Kumar</last></author>
       <author><first>Hsuan</first><last>Su</last></author>
-      <author><first>Ramesh</first><last>Manuvinakurike</last></author>
+      <author id="ramesh-manuvinakurike"><first>Ramesh</first><last>Manuvinakurike</last></author>
       <author><first>Max</first><last>Pinaroc</last></author>
       <author><first>Sai</first><last>Prasad</last></author>
       <author><first>Saurav</first><last>Sahay</last></author>
diff --git a/data/xml/2022.sltat.xml b/data/xml/2022.sltat.xml
index ca2034186a..247b732162 100644
--- a/data/xml/2022.sltat.xml
+++ b/data/xml/2022.sltat.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the 7th International Workshop on Sign Language Translation and Avatar Technology: The Junction of the Visual and the Textual: Challenges and Perspectives</booktitle>
       <editor><first>Eleni</first><last>Efthimiou</last></editor>
-      <editor><first>Stavroula-Evita</first><last>Fotinea</last></editor>
+      <editor id="stavroula-evita-fotinea"><first>Stavroula-Evita</first><last>Fotinea</last></editor>
       <editor><first>Thomas</first><last>Hanke</last></editor>
       <editor><first>John C.</first><last>McDonald</last></editor>
       <editor><first>Dimitar</first><last>Shterionov</last></editor>
@@ -24,7 +24,7 @@
       <title>Synthesis for the Kinematic Control of Identity in Sign Language</title>
       <author><first>Félix</first><last>Bigand</last></author>
       <author><first>Elise</first><last>Prigent</last></author>
-      <author><first>Annelies</first><last>Braffort</last></author>
+      <author id="annelies-braffort"><first>Annelies</first><last>Braffort</last></author>
       <pages>1–6</pages>
       <abstract>Sign Language (SL) animations generated from motion capture (mocap) of real signers convey critical information about their identity. It has been suggested that this information is mostly carried by statistics of the movements kinematics. Manipulating these statistics in the generation of SL movements could allow controlling the identity of the signer, notably to preserve anonymity. This paper tests this hypothesis by presenting a novel synthesis algorithm that manipulates the identity-specific statistics of mocap recordings. The algorithm produced convincing new versions of French Sign Language discourses, which accurately modulated the identity prediction of a machine learning model. These results open up promising perspectives toward the automatic control of identity in the motion animation of virtual signers.</abstract>
       <url hash="cf2584a4">2022.sltat-1.1</url>
@@ -52,7 +52,7 @@
     <paper id="4">
       <title>Example-based Multilinear Sign Language Generation from a Hierarchical Representation</title>
       <author><first>Boris</first><last>Dauriac</last></author>
-      <author><first>Annelies</first><last>Braffort</last></author>
+      <author id="annelies-braffort"><first>Annelies</first><last>Braffort</last></author>
       <author><first>Elise</first><last>Bertin-Lemée</last></author>
       <pages>21–28</pages>
       <abstract>This article presents an original method for automatic generation of sign language (SL) content by means of the animation of an avatar, with the aim of creating animations that respect as much as possible linguistic constraints while keeping bio-realistic properties. This method is based on the use of a domain-specific bilingual corpus richly annotated with timed alignments between SL motion capture data, text and hierarchical expressions from the framework called AZee at subsentential level. Animations representing new SL content are built from blocks of animations present in the corpus and adapted to the context if necessary. A smart blending approach has been designed that allows the concatenation, replacement and adaptation of original animation blocks. This approach has been tested on a tailored testset to show as a proof of concept its potential in comprehensibility and fluidity of the animation, as well as its current limits.</abstract>
@@ -106,7 +106,7 @@
     </paper>
     <paper id="9">
       <title><fixed-case>K</fixed-case>o<fixed-case>S</fixed-case>ign Sign Language Translation Project: Introducing The <fixed-case>NIASL</fixed-case>2021 Dataset</title>
-      <author><first>Mathew</first><last>Huerta-Enochian</last></author>
+      <author id="mathew-huerta-enochian"><first>Mathew</first><last>Huerta-Enochian</last></author>
       <author><first>Du Hui</first><last>Lee</last></author>
       <author><first>Hye Jin</first><last>Myung</last></author>
       <author><first>Kang Suk</first><last>Byun</last></author>
diff --git a/data/xml/2022.smila.xml b/data/xml/2022.smila.xml
index b9dba6c777..2cb76182c8 100644
--- a/data/xml/2022.smila.xml
+++ b/data/xml/2022.smila.xml
@@ -104,7 +104,7 @@
       <title>Inhalation Noises as Endings of Laughs in Conversational Speech</title>
       <author><first>Jürgen</first><last>Trouvain</last></author>
       <author><first>Raphael</first><last>Werner</last></author>
-      <author><first>Khiet</first><last>Truong</last></author>
+      <author id="khiet-p-truong"><first>Khiet</first><last>Truong</last></author>
       <pages>28–29</pages>
       <abstract>In this study we investigate the role of inhalation noises at the end of laughter events in two conversational corpora that provide relevant annotations. A re-annotation of the categories for laughter, silence and inbreath noises enabled us to see that inhalation noises terminate laughter events in the majority of all inspected laughs with a duration comparable to inbreath noises initiating speech phases. This type of corpus analysis helps to understand the mechanisms of audible respiratory activities in speaking vs. laughing in conversations.</abstract>
       <url hash="c0611dc6">2022.smila-1.8</url>
diff --git a/data/xml/2022.smm4h.xml b/data/xml/2022.smm4h.xml
index 5cc7462c2f..0a871875fc 100644
--- a/data/xml/2022.smm4h.xml
+++ b/data/xml/2022.smm4h.xml
@@ -3,8 +3,8 @@
   <volume id="1" ingest-date="2022-10-06" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Seventh Workshop on Social Media Mining for Health Applications, Workshop &amp; Shared Task</booktitle>
-      <editor><first>Graciela</first><last>Gonzalez-Hernandez</last></editor>
-      <editor><first>Davy</first><last>Weissenbacher</last></editor>
+      <editor id="graciela-gonzalez"><first>Graciela</first><last>Gonzalez-Hernandez</last></editor>
+      <editor id="davy-weissenbacher"><first>Davy</first><last>Weissenbacher</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Gyeongju, Republic of Korea</address>
       <month>October</month>
@@ -76,8 +76,8 @@
     <paper id="6">
       <title><fixed-case>NLP</fixed-case>-<fixed-case>CIC</fixed-case>-<fixed-case>WFU</fixed-case> at <fixed-case>S</fixed-case>ocial<fixed-case>D</fixed-case>is<fixed-case>NER</fixed-case>: Disease Mention Extraction in <fixed-case>S</fixed-case>panish Tweets Using Transfer Learning and Search by Propagation</title>
       <author><first>Antonio</first><last>Tamayo</last></author>
-      <author><first>Alexander</first><last>Gelbukh</last></author>
-      <author><first>Diego</first><last>Burgos</last></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last></author>
+      <author id="diego-a-burgos"><first>Diego</first><last>Burgos</last></author>
       <pages>19–22</pages>
       <abstract>Named entity recognition (e.g., disease mention extraction) is one of the most relevant tasks for data mining in the medical field. Although it is a well-known challenge, the bulk of the efforts to tackle this task have been made using clinical texts commonly written in English. In this work, we present our contribution to the SocialDisNER competition, which consists of a transfer learning approach to extracting disease mentions in a corpus from Twitter written in Spanish. We fine-tuned a model based on mBERT and applied post-processing using regular expressions to propagate the entities identified by the model and enhance disease mention extraction. Our system achieved a competitive strict F1 of 0.851 on the testing data set.</abstract>
       <url hash="995d4339">2022.smm4h-1.6</url>
@@ -98,8 +98,8 @@
       <author><first>Mariia</first><last>Chizhikova</last></author>
       <author><first>Pilar</first><last>López-Úbeda</last></author>
       <author><first>Manuel C.</first><last>Díaz-Galiano</last></author>
-      <author><first>L. Alfonso</first><last>Ureña-López</last></author>
-      <author><first>M. Teresa</first><last>Martín-Valdivia</last></author>
+      <author id="l-alfonso-urena-lopez"><first>L. Alfonso</first><last>Ureña-López</last></author>
+      <author id="m-teresa-martin-valdivia"><first>M. Teresa</first><last>Martín-Valdivia</last></author>
       <pages>27–30</pages>
       <abstract>This paper covers participation of the SINAI team in Tasks 5 and 10 of the Social Media Mining for Health (#SSM4H) workshop at COLING-2022. These tasks focus on leveraging Twitter posts written in Spanish for healthcare research. The objective of Task 5 was to classify tweets reporting COVID-19 symptoms, while Task 10 required identifying disease mentions in Twitter posts. The presented systems explore large RoBERTa language models pre-trained on Twitter data in the case of tweet classification task and general-domain data for the disease recognition task. We also present a text pre-processing methodology implemented in both systems and describe an initial weakly-supervised fine-tuning phase alongside with a submission post-processing procedure designed for Task 10. The systems obtained 0.84 F1-score on the Task 5 and 0.77 F1-score on Task 10.</abstract>
       <url hash="b978ef2e">2022.smm4h-1.8</url>
@@ -162,8 +162,8 @@
     </paper>
     <paper id="14">
       <title><fixed-case>READ</fixed-case>-<fixed-case>B</fixed-case>io<fixed-case>M</fixed-case>ed@<fixed-case>S</fixed-case>ocial<fixed-case>D</fixed-case>is<fixed-case>NER</fixed-case>: Adaptation of an Annotation System to <fixed-case>S</fixed-case>panish Tweets</title>
-      <author><first>Antonio</first><last>Jimeno Yepes</last></author>
-      <author><first>Karin</first><last>Verspoor</last></author>
+      <author id="antonio-jimeno-yepes"><first>Antonio</first><last>Jimeno Yepes</last></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last></author>
       <pages>48–51</pages>
       <abstract>We describe the work of the READ-BioMed team for the preparation of a submission to the SocialDisNER Disease Named Entity Recognition (NER) Task (Task 10) in 2022. We had developed a system for named entity recognition for identifying biomedical concepts in English MEDLINE citations and Spanish clinical text for the LivingNER 2022 challenge. Minimal adaptation of our system was required to perform named entity recognition in the Spanish tweets in the SocialDisNER task, given the availability of Spanish pre-trained language models and the SocialDisNER training data. Minor additions included treatment of emojis and entities in hashtags and Twitter account names.</abstract>
       <url hash="dd08e1c3">2022.smm4h-1.14</url>
@@ -195,11 +195,11 @@
       <title><fixed-case>CIC</fixed-case> <fixed-case>NLP</fixed-case> at <fixed-case>SMM</fixed-case>4<fixed-case>H</fixed-case> 2022: a <fixed-case>BERT</fixed-case>-based approach for classification of social media forum posts</title>
       <author><first>Atnafu Lambebo</first><last>Tonja</last></author>
       <author><first>Olumide Ebenezer</first><last>Ojo</last></author>
-      <author><first>Mohammed Arif</first><last>Khan</last></author>
+      <author id="mohammed-arif-khan"><first>Mohammed Arif</first><last>Khan</last></author>
       <author><first>Abdul Gafar Manuel</first><last>Meque</last></author>
       <author><first>Olga</first><last>Kolesnikova</last></author>
       <author><first>Grigori</first><last>Sidorov</last></author>
-      <author><first>Alexander</first><last>Gelbukh</last></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last></author>
       <pages>58–61</pages>
       <abstract>This paper describes our submissions for the Social Media Mining for Health (SMM4H) 2022 shared tasks. We participated in 2 tasks: a) Task 4: Classification of Tweets self-reporting exact age and b) Task 9: Classification of Reddit posts self-reporting exact age. We evaluated the two( BERT and RoBERTa) transformer based models for both tasks. For Task 4 RoBERTa-Large achieved an F1 score of 0.846 on the test set and BERT-Large achieved an F1 score of 0.865 on the test set for Task 9.</abstract>
       <url hash="7fe7d8ef">2022.smm4h-1.17</url>
@@ -287,7 +287,7 @@
       <author><first>Aman</first><last>Sinha</last></author>
       <author><first>Cristina Garcia</first><last>Holgado</last></author>
       <author><first>Marianne</first><last>Clausel</last></author>
-      <author><first>Matthieu</first><last>Constant</last></author>
+      <author id="matthieu-constant"><first>Matthieu</first><last>Constant</last></author>
       <pages>85–89</pages>
       <abstract>Biomedical NER is an active research area today. Despite the availability of state-of-the-art models for standard NER tasks, their performance degrades on biomedical data due to OOV entities and the challenges encountered in specialized domains. We use Flair-NER framework to investigate the effectiveness of various contextual and static embeddings for NER on Spanish tweets, in particular, to capture complex disease mentions.</abstract>
       <url hash="d7daccb8">2022.smm4h-1.25</url>
@@ -356,7 +356,7 @@
       <author><first>Roshan</first><last>Khatri</last></author>
       <author><first>Sougata</first><last>Saha</last></author>
       <author><first>Souvik</first><last>Das</last></author>
-      <author><first>Rohini</first><last>Srihari</last></author>
+      <author id="rohini-k-srihari"><first>Rohini</first><last>Srihari</last></author>
       <pages>114–117</pages>
       <abstract>Here we discuss our implementation of two tasks in the Social Media Mining for Health Applications (SMM4H) 2022 shared tasks – classification, detection, and normalization of Adverse Events (AE) mentioned in English tweets (Task 1) and classification of English tweets self-reporting exact age (Task 4). We have explored different methods and models for binary classification, multi-class classification and named entity recognition (NER) for these tasks. We have also processed the provided dataset for noise, imbalance, and creative language expression from data. Using diverse NLP methods we classified tweets for mentions of adverse drug effects (ADEs) and self-reporting the exact age in the tweets. Further, extracted reactions from the tweets and normalized these adverse effects to a standard concept ID in the MedDRA vocabulary.</abstract>
       <url hash="43003b98">2022.smm4h-1.32</url>
@@ -449,7 +449,7 @@
       <author><first>Jinge</first><last>Wu</last></author>
       <author><first>Honghan</first><last>Wu</last></author>
       <author><first>Tony</first><last>Sun</last></author>
-      <author><first>Beatrice</first><last>Alex</last></author>
+      <author id="beatrice-alex"><first>Beatrice</first><last>Alex</last></author>
       <pages>148–152</pages>
       <abstract>This paper reports on the performance of Edinburgh_UCL_Health’s models in the Social Media Mining for Health (SMM4H) 2022 shared tasks. Our team participated in the tasks related to the Identification of Adverse Drug Events (ADEs), the classification of change in medication (change-med) and the classification of self-report of vaccination (self-vaccine). Our best performing models are based on DeepADEMiner (with respective F1= 0.64, 0.62 and 0.39 for ADE identification), on a GloVe model trained on Twitter (with F1=0.11 for the change-med) and finally on a stack embedding including a layer of Glove embedding and two layers of Flair embedding (with F1= 0.77 for self-report).</abstract>
       <url hash="1eb83d5c">2022.smm4h-1.40</url>
@@ -458,7 +458,7 @@
     <paper id="41">
       <title><fixed-case>KUL</fixed-case>@<fixed-case>SMM</fixed-case>4<fixed-case>H</fixed-case>’22: Template Augmented Adaptive Pre-training for Tweet Classification</title>
       <author><first>Sumam</first><last>Francis</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>153–155</pages>
       <abstract>This paper describes models developed for the Social Media Mining for Health (SMM4H) 2022 shared tasks. Our team participated in the first subtask that classifies tweets with Adverse Drug Effect (ADE) mentions. Our best-performing model comprises of a template augmented task adaptive pre-training and further fine-tuning on target task data. Augmentation with random prompt templates increases the amount of task-specific data to generalize the LM to the target task domain. We explore 2 pre-training strategies: Masked language modeling (MLM) and Simple contrastive pre-training (SimSCE) and the impact of adding template augmentations with these pre-training strategies. Our system achieves an F1 score of 0.433 on the test set without using supplementary resources and medical dictionaries.</abstract>
       <url hash="92b64c93">2022.smm4h-1.41</url>
@@ -537,7 +537,7 @@
       <author><first>Eulàlia</first><last>Farré-Maduell</last></author>
       <author><first>Salvador</first><last>Lima-López</last></author>
       <author><first>Antonio</first><last>Miranda-Escalada</last></author>
-      <author><first>Martin</first><last>Krallinger</last></author>
+      <author id="martin-krallinger"><first>Martin</first><last>Krallinger</last></author>
       <pages>182–189</pages>
       <abstract>There is a pressing need to exploit health-related content from social media, a global source of data where key health information is posted directly by citizens, patients and other healthcare stakeholders. Use cases of disease related social media mining include disease outbreak/surveillance, mental health and pharmacovigilance. Current efforts address the exploitation of social media beyond English. The SocialDisNER task, organized as part of the SMM4H 2022 initiative, has applied the LINKAGE methodology to select and annotate a Gold Standard corpus of 9,500 tweets in Spanish enriched with disease mentions generated by patients and medical professionals. As a complementary resource for teams participating in the SocialDisNER track, we have also created a large-scale corpus of 85,000 tweets, where in addition to disease mentions, other medical entities of relevance (e.g., medications, symptoms and procedures, among others) have been automatically labelled. Using these large-scale datasets, co-mention networks or knowledge graphs were released for each entity pair type. Out of the 47 teams registered for the task, 17 teams uploaded a total of 32 runs. The top-performing team achieved a very competitive 0.891 f-score, with a system trained following a continue pre-training strategy. We anticipate that the corpus and systems resulting from the SocialDisNER track might further foster health related text mining of social media content in Spanish and inspire disease detection strategies in other languages.</abstract>
       <url hash="cd65124c">2022.smm4h-1.48</url>
@@ -546,7 +546,7 @@
     <paper id="49">
       <title><fixed-case>R</fixed-case>omanian micro-blogging named entity recognition including health-related entities</title>
       <author><first>Vasile</first><last>Pais</last></author>
-      <author><first>Verginica</first><last>Barbu Mititelu</last></author>
+      <author id="verginica-barbu-mititelu"><first>Verginica</first><last>Barbu Mititelu</last></author>
       <author><first>Elena</first><last>Irimia</last></author>
       <author><first>Maria</first><last>Mitrofan</last></author>
       <author><first>Carol Luca</first><last>Gasan</last></author>
@@ -613,7 +613,7 @@
       <author><first>Yao</first><last>Ge</last></author>
       <author><first>Yuting</first><last>Guo</last></author>
       <author><first>Ari</first><last>Klein</last></author>
-      <author><first>Martin</first><last>Krallinger</last></author>
+      <author id="martin-krallinger"><first>Martin</first><last>Krallinger</last></author>
       <author><first>Mathias</first><last>Leddin</last></author>
       <author><first>Arjun</first><last>Magge</last></author>
       <author><first>Raul</first><last>Rodriguez-Esteban</last></author>
diff --git a/data/xml/2022.socialnlp.xml b/data/xml/2022.socialnlp.xml
index 440ea7da2d..1aaeb7d64d 100644
--- a/data/xml/2022.socialnlp.xml
+++ b/data/xml/2022.socialnlp.xml
@@ -33,7 +33,7 @@
       <author><first>Dana</first><last>Ruiter</last></author>
       <author><first>Thomas</first><last>Kleinbauer</last></author>
       <author><first>Cristina</first><last>España-Bonet</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <author><first>Dietrich</first><last>Klakow</last></author>
       <pages>11-34</pages>
       <abstract>Recent research on style transfer takes inspiration from unsupervised neural machine translation (UNMT), learning from large amounts of non-parallel data by exploiting cycle consistency loss, back-translation, and denoising autoencoders. By contrast, the use of selfsupervised NMT (SSNMT), which leverages (near) parallel instances hidden in non-parallel data more efficiently than UNMT, has not yet been explored for style transfer. In this paper we present a novel Self-Supervised Style Transfer (3ST) model, which augments SSNMT with UNMT methods in order to identify and efficiently exploit supervisory signals in non-parallel social media posts. We compare 3ST with state-of-the-art (SOTA) style transfer models across civil rephrasing, formality and polarity tasks. We show that 3ST is able to balance the three major objectives (fluency, content preservation, attribute transfer accuracy) the best, outperforming SOTA models on averaged performance across their tested tasks in automatic and human evaluation.</abstract>
diff --git a/data/xml/2022.spanlp.xml b/data/xml/2022.spanlp.xml
index e6a8992e1c..9435ac2535 100644
--- a/data/xml/2022.spanlp.xml
+++ b/data/xml/2022.spanlp.xml
@@ -24,7 +24,7 @@
       <author><first>Van-Hien</first><last>Tran</last></author>
       <author><first>Hiroki</first><last>Ouchi</last></author>
       <author><first>Taro</first><last>Watanabe</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>1-6</pages>
       <abstract>Zero-shot relation extraction (ZSRE) aims to predict target relations that cannot be observed during training. While most previous studies have focused on fully supervised relation extraction and achieved considerably high performance, less effort has been made towards ZSRE. This study proposes a new model incorporating discriminative embedding learning for both sentences and semantic relations. In addition, a self-adaptive comparator network is used to judge whether the relationship between a sentence and a relation is consistent. Experimental results on two benchmark datasets showed that the proposed method significantly outperforms the state-of-the-art methods.</abstract>
       <url hash="742950af">2022.spanlp-1.1</url>
@@ -51,7 +51,7 @@
       <title>Efficient Machine Translation Domain Adaptation</title>
       <author><first>Pedro</first><last>Martins</last></author>
       <author><first>Zita</first><last>Marinho</last></author>
-      <author><first>Andre</first><last>Martins</last></author>
+      <author id="andre-f-t-martins"><first>Andre</first><last>Martins</last></author>
       <pages>23-29</pages>
       <abstract>Machine translation models struggle when translating out-of-domain text, which makes domain adaptation a topic of critical importance. However, most domain adaptation methods focus on fine-tuning or training the entire or part of the model on every new domain, which can be costly. On the other hand, semi-parametric models have been shown to successfully perform domain adaptation by retrieving examples from an in-domain datastore (Khandelwal et al., 2021). A drawback of these retrieval-augmented models, however, is that they tend to be substantially slower. In this paper, we explore several approaches to speed up nearest neighbors machine translation. We adapt the methods recently proposed by He et al. (2021) for language modeling, and introduce a simple but effective caching strategy that avoids performing retrieval when similar contexts have been seen before. Translation quality and runtimes for several domains show the effectiveness of the proposed solutions.</abstract>
       <url hash="5884847a">2022.spanlp-1.3</url>
diff --git a/data/xml/2022.spnlp.xml b/data/xml/2022.spnlp.xml
index 66429da1c7..efa5c8b341 100644
--- a/data/xml/2022.spnlp.xml
+++ b/data/xml/2022.spnlp.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the Sixth Workshop on Structured Prediction for NLP</booktitle>
       <editor><first>Andreas</first><last>Vlachos</last></editor>
       <editor><first>Priyanka</first><last>Agrawal</last></editor>
-      <editor><first>André</first><last>Martins</last></editor>
+      <editor id="andre-f-t-martins"><first>André</first><last>Martins</last></editor>
       <editor><first>Gerasimos</first><last>Lampouras</last></editor>
       <editor><first>Chunchuan</first><last>Lyu</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -34,7 +34,7 @@
       <title>Joint Entity and Relation Extraction Based on Table Labeling Using Convolutional Neural Networks</title>
       <author><first>Youmi</first><last>Ma</last></author>
       <author><first>Tatsuya</first><last>Hiraoka</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <pages>11-21</pages>
       <abstract>This study introduces a novel approach to the joint extraction of entities and relations by stacking convolutional neural networks (CNNs) on pretrained language models. We adopt table representations to model the entities and relations, casting the entity and relation extraction as a table-labeling problem. Regarding each table as an image and each cell in a table as an image pixel, we apply two-dimensional CNNs to the tables to capture local dependencies and predict the cell labels. The experimental results showed that the performance of the proposed method is comparable to those of current state-of-art systems on the CoNLL04, ACE05, and ADE datasets. Even when freezing pretrained language model parameters, the proposed method showed a stable performance, whereas the compared methods suffered from significant decreases in performance. This observation indicates that the parameters of the pretrained encoder may incorporate dependencies among the entity and relation labels during fine-tuning.</abstract>
       <url hash="6949900b">2022.spnlp-1.2</url>
@@ -85,7 +85,7 @@
     <paper id="6">
       <title>Neural String Edit Distance</title>
       <author><first>Jindřich</first><last>Libovický</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>52-66</pages>
       <abstract>We propose the neural string edit distance model for string-pair matching and string transduction based on learnable string edit distance. We modify the original expectation-maximization learned edit distance algorithm into a differentiable loss function, allowing us to integrate it into a neural network providing a contextual representation of the input. We evaluate on cognate detection, transliteration, and grapheme-to-phoneme conversion, and show that we can trade off between performance and interpretability in a single framework. Using contextual representations, which are difficult to interpret, we match the performance of state-of-the-art string-pair matching models. Using static embeddings and a slightly different loss function, we force interpretability, at the expense of an accuracy drop.</abstract>
       <url hash="d2a70555">2022.spnlp-1.6</url>
@@ -97,7 +97,7 @@
       <author><first>Marcos</first><last>Treviso</last></author>
       <author><first>António</first><last>Góis</last></author>
       <author><first>Patrick</first><last>Fernandes</last></author>
-      <author><first>Erick</first><last>Fonseca</last></author>
+      <author id="erick-fonseca"><first>Erick</first><last>Fonseca</last></author>
       <author><first>Andre</first><last>Martins</last></author>
       <pages>67-81</pages>
       <abstract>Transformers’ quadratic complexity with respect to the input sequence length has motivated a body of work on efficient sparse approximations to softmax. An alternative path, used by entmax transformers, consists of having built-in exact sparse attention; however this approach still requires quadratic computation. In this paper, we propose Sparsefinder, a simple model trained to identify the sparsity pattern of entmax attention before computing it. We experiment with three variants of our method, based on distances, quantization, and clustering, on two tasks: machine translation (attention in the decoder) and masked language modeling (encoder-only). Our work provides a new angle to study model efficiency by doing extensive analysis of the tradeoff between the sparsity and recall of the predicted attention graph. This allows for detailed comparison between different models along their Pareto curves, important to guide future benchmarks for sparse attention models.</abstract>
diff --git a/data/xml/2022.starsem.xml b/data/xml/2022.starsem.xml
index 0d50a9be88..66ccd64d26 100644
--- a/data/xml/2022.starsem.xml
+++ b/data/xml/2022.starsem.xml
@@ -3,10 +3,10 @@
   <volume id="1" ingest-date="2022-06-29" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 11th Joint Conference on Lexical and Computational Semantics</booktitle>
-      <editor><first>Vivi</first><last>Nastase</last></editor>
+      <editor id="vivi-nastase"><first>Vivi</first><last>Nastase</last></editor>
       <editor><first>Ellie</first><last>Pavlick</last></editor>
       <editor><first>Mohammad Taher</first><last>Pilehvar</last></editor>
-      <editor><first>Jose</first><last>Camacho-Collados</last></editor>
+      <editor id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></editor>
       <editor><first>Alessandro</first><last>Raganato</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Seattle, Washington</address>
@@ -66,7 +66,7 @@
       <author><first>Samuel</first><last>Ryb</last></author>
       <author><first>Mario</first><last>Giulianelli</last></author>
       <author><first>Arabella</first><last>Sinclair</last></author>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <pages>55-68</pages>
       <abstract>We investigate the extent to which pre-trained language models acquire analytical and deductive logical reasoning capabilities as a side effect of learning word prediction. We present AnaLog, a natural language inference task designed to probe models for these capabilities, controlling for different invalid heuristics the models may adopt instead of learning the desired generalisations. We test four languagemodels on AnaLog, finding that they have all learned, to a different extent, to encode information that is predictive of entailment beyond shallow heuristics such as lexical overlap and grammaticality. We closely analyse the best performing language model and show that while it performs more consistently than other language models across logical connectives and reasoning domains, it still is sensitive to lexical and syntactic variations in the realisation of logical statements.</abstract>
       <url hash="4e50b5e3">2022.starsem-1.5</url>
@@ -98,10 +98,10 @@
     </paper>
     <paper id="8">
       <title>Multilingual Extraction and Categorization of Lexical Collocations with Graph-aware Transformers</title>
-      <author><first>Luis</first><last>Espinosa Anke</last></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa Anke</last></author>
       <author><first>Alexander</first><last>Shvets</last></author>
       <author><first>Alireza</first><last>Mohammadshahi</last></author>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <author><first>Leo</first><last>Wanner</last></author>
       <pages>89-100</pages>
       <abstract>Recognizing and categorizing lexical collocations in context is useful for language learning, dictionary compilation and downstream NLP. However, it is a challenging task due to the varying degrees of frozenness lexical collocations exhibit. In this paper, we put forward a sequence tagging BERT-based model enhanced with a graph-aware transformer architecture, which we evaluate on the task of collocation recognition in context. Our results suggest that explicitly encoding syntactic dependencies in the model architecture is helpful, and provide insights on differences in collocation typification in English, Spanish and French.</abstract>
@@ -115,7 +115,7 @@
       <author><first>Kyle</first><last>Richardson</last></author>
       <author><first>Noam</first><last>Kahlon</last></author>
       <author><first>Aviad</first><last>Sar-shalom</last></author>
-      <author><first>Nelson F.</first><last>Liu</last></author>
+      <author id="nelson-f-liu"><first>Nelson F.</first><last>Liu</last></author>
       <author><first>Reut</first><last>Tsarfaty</last></author>
       <author><first>Dafna</first><last>Shahaf</last></author>
       <pages>101-122</pages>
@@ -158,7 +158,7 @@
     <paper id="13">
       <title>Distilling Hypernymy Relations from Language Models: On the Effectiveness of Zero-Shot Taxonomy Induction</title>
       <author><first>Devansh</first><last>Jain</last></author>
-      <author><first>Luis</first><last>Espinosa Anke</last></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa Anke</last></author>
       <pages>151-156</pages>
       <abstract>In this paper, we analyze zero-shot taxonomy learning methods which are based on distilling knowledge from language models via prompting and sentence scoring. We show that, despite their simplicity, these methods outperform some supervised strategies and are competitive with the current state-of-the-art under adequate conditions. We also show that statistical and linguistic properties of prompts dictate downstream performance.</abstract>
       <url hash="24e40786">2022.starsem-1.13</url>
@@ -232,7 +232,7 @@
       <author><first>Aarne</first><last>Talman</last></author>
       <author><first>Marianna</first><last>Apidianaki</last></author>
       <author><first>Stergios</first><last>Chatzikyriakidis</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>226-233</pages>
       <abstract>A central question in natural language understanding (NLU) research is whether high performance demonstrates the models’ strong reasoning capabilities. We present an extensive series of controlled experiments where pre-trained language models are exposed to data that have undergone specific corruption transformations. These involve removing instances of specific word classes and often lead to non-sensical sentences. Our results show that performance remains high on most GLUE tasks when the models are fine-tuned or tested on corrupted data, suggesting that they leverage other cues for prediction even in non-sensical contexts. Our proposed data transformations can be used to assess the extent to which a specific dataset constitutes a proper testbed for evaluating models’ language understanding capabilities.</abstract>
       <url hash="ae26a0aa">2022.starsem-1.20</url>
@@ -241,7 +241,7 @@
     </paper>
     <paper id="21">
       <title>Leveraging Three Types of Embeddings from Masked Language Models in Idiom Token Classification</title>
-      <author id="ryosuke-takahashi"><first>Ryosuke</first><last>Takahashi</last></author>
+      <author><first>Ryosuke</first><last>Takahashi</last></author>
       <author><first>Ryohei</first><last>Sasano</last></author>
       <author><first>Koichi</first><last>Takeda</last></author>
       <pages>234-239</pages>
@@ -275,14 +275,14 @@
     </paper>
     <paper id="24">
       <title><fixed-case>P</fixed-case>rop<fixed-case>B</fixed-case>ank Comes of <fixed-case>A</fixed-case>ge—<fixed-case>L</fixed-case>arger, Smarter, and more Diverse</title>
-      <author><first>Sameer</first><last>Pradhan</last></author>
+      <author id="sameer-pradhan"><first>Sameer</first><last>Pradhan</last></author>
       <author><first>Julia</first><last>Bonn</last></author>
       <author><first>Skatje</first><last>Myers</last></author>
       <author><first>Kathryn</first><last>Conger</last></author>
       <author><first>Tim</first><last>O’gorman</last></author>
       <author><first>James</first><last>Gung</last></author>
       <author><first>Kristin</first><last>Wright-bettner</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>278-288</pages>
       <abstract>This paper describes the evolution of the PropBank approach to semantic role labeling over the last two decades. During this time the PropBank frame files have been expanded to include non-verbal predicates such as adjectives, prepositions and multi-word expressions. The number of domains, genres and languages that have been PropBanked has also expanded greatly, creating an opportunity for much more challenging and robust testing of the generalization capabilities of PropBank semantic role labeling systems. We also describe the substantial effort that has gone into ensuring the consistency and reliability of the various annotated datasets and resources, to better support the training and evaluation of such systems</abstract>
       <url hash="96b4e43e">2022.starsem-1.24</url>
@@ -293,9 +293,9 @@
       <title>Speech acts and Communicative Intentions for Urgency Detection</title>
       <author><first>Laurenti</first><last>Enzo</last></author>
       <author><first>Bourgon</first><last>Nils</last></author>
-      <author><first>Farah</first><last>Benamara</last></author>
+      <author id="farah-benamara"><first>Farah</first><last>Benamara</last></author>
       <author><first>Mari</first><last>Alda</last></author>
-      <author><first>Véronique</first><last>Moriceau</last></author>
+      <author id="veronique-moriceau"><first>Véronique</first><last>Moriceau</last></author>
       <author><first>Courgeon</first><last>Camille</last></author>
       <pages>289-298</pages>
       <abstract>Recognizing speech acts (SA) is crucial for capturing meaning beyond what is said, making communicative intentions particularly relevant to identify urgent messages. This paper attempts to measure for the first time the impact of SA on urgency detection during crises,006in tweets. We propose a new dataset annotated for both urgency and SA, and develop several deep learning architectures to inject SA into urgency detection while ensuring models generalisability. Our results show that taking speech acts into account in tweet analysis improves information type detection in an out-of-type configuration where models are evaluated in unseen event types during training. These results are encouraging and constitute a first step towards SA-aware disaster management in social media.</abstract>
@@ -306,7 +306,7 @@
     <paper id="26">
       <title>What Drives the Use of Metaphorical Language? Negative Insights from Abstractness, Affect, Discourse Coherence and Contextualized Word Representations</title>
       <author><first>Prisca</first><last>Piccirilli</last></author>
-      <author><first>Sabine</first><last>Schulte Im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte Im Walde</last></author>
       <pages>299-310</pages>
       <abstract>Given a specific discourse, which discourse properties trigger the use of metaphorical language, rather than using literal alternatives? For example, what drives people to say grasp the meaning rather than understand the meaning within a specific context? Many NLP approaches to metaphorical language rely on cognitive and (psycho-)linguistic insights and have successfully defined models of discourse coherence, abstractness and affect. In this work, we build five simple models relying on established cognitive and linguistic properties ? frequency, abstractness, affect, discourse coherence and contextualized word representations ? to predict the use of a metaphorical vs. synonymous literal expression in context. By comparing the models? outputs to human judgments, our study indicates that our selected properties are not sufficient to systematically explain metaphorical vs. literal language choices.</abstract>
       <url hash="ed0a3409">2022.starsem-1.26</url>
@@ -350,7 +350,7 @@
     <paper id="30">
       <title>Online Coreference Resolution for Dialogue Processing: Improving Mention-Linking on Real-Time Conversations</title>
       <author><first>Liyan</first><last>Xu</last></author>
-      <author><first>Jinho D.</first><last>Choi</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
       <pages>341-347</pages>
       <abstract>This paper suggests a direction of coreference resolution for online decoding on actively generated input such as dialogue, where the model accepts an utterance and its past context, then finds mentions in the current utterance as well as their referents, upon each dialogue turn. A baseline and four incremental updated models adapted from the mention linking paradigm are proposed for this new setting, which address different aspects including the singletons, speaker-grounded encoding and cross-turn mention contextualization. Our approach is assessed on three datasets: Friends, OntoNotes, and BOLT. Results show that each aspect brings out steady improvement, and our best models outperform the baseline by over 10%, presenting an effective system for this setting. Further analysis highlights the task characteristics, such as the significance of addressing the mention recall.</abstract>
       <url hash="41db02c6">2022.starsem-1.30</url>
diff --git a/data/xml/2022.suki.xml b/data/xml/2022.suki.xml
index 3955d4e0bd..a6469ac619 100644
--- a/data/xml/2022.suki.xml
+++ b/data/xml/2022.suki.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the Workshop on Structured and Unstructured Knowledge Integration (SUKI)</booktitle>
       <editor><first>Wenhu</first><last>Chen</last></editor>
       <editor><first>Xinyun</first><last>Chen</last></editor>
-      <editor id="zhiyu-chen"><first>Zhiyu</first><last>Chen</last></editor>
+      <editor><first>Zhiyu</first><last>Chen</last></editor>
       <editor><first>Ziyu</first><last>Yao</last></editor>
       <editor><first>Michihiro</first><last>Yasunaga</last></editor>
       <editor><first>Tao</first><last>Yu</last></editor>
@@ -82,7 +82,7 @@
       <title>Table Retrieval May Not Necessitate Table-specific Model Design</title>
       <author><first>Zhiruo</first><last>Wang</last></author>
       <author><first>Zhengbao</first><last>Jiang</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
       <pages>36-46</pages>
       <abstract>Tables are an important form of structured data for both human and machine readers alike, providing answers to questions that cannot, or cannot easily, be found in texts. Recent work has designed special models and training paradigms for table-related tasks such as table-based question answering and table retrieval. Though effective, they add complexity in both modeling and data acquisition compared to generic text solutions and obscure which elements are truly beneficial. In this work, we focus on the task of table retrieval, and ask: “is table-specific model design necessary for table retrieval, or can a simpler text-based model be effectively used to achieve a similar result?’’ First, we perform an analysis on a table-based portion of the Natural Questions dataset (NQ-table), and find that structure plays a negligible role in more than 70% of the cases. Based on this, we experiment with a general Dense Passage Retriever (DPR) based on text and a specialized Dense Table Retriever (DTR) that uses table-specific model designs. We find that DPR performs well without any table-specific design and training, and even achieves superior results compared to DTR when fine-tuned on properly linearized tables. We then experiment with three modules to explicitly encode table structures, namely auxiliary row/column embeddings, hard attention masks, and soft relation-based attention biases. However, none of these yielded significant improvements, suggesting that table-specific model design may not be necessary for table retrieval.</abstract>
@@ -94,8 +94,8 @@
     <paper id="6">
       <title>Transfer Learning and Masked Generation for Answer Verbalization</title>
       <author><first>Sebastien</first><last>Montella</last></author>
-      <author><first>Lina</first><last>Rojas-Barahona</last></author>
-      <author><first>Frederic</first><last>Bechet</last></author>
+      <author id="lina-m-rojas-barahona"><first>Lina</first><last>Rojas-Barahona</last></author>
+      <author id="frederic-bechet"><first>Frederic</first><last>Bechet</last></author>
       <author><first>Johannes</first><last>Heinecke</last></author>
       <author><first>Alexis</first><last>Nasr</last></author>
       <pages>47-54</pages>
diff --git a/data/xml/2022.sumeval.xml b/data/xml/2022.sumeval.xml
index 3c512c391c..a4eb003af5 100644
--- a/data/xml/2022.sumeval.xml
+++ b/data/xml/2022.sumeval.xml
@@ -8,7 +8,7 @@
       <editor><first>Barun</first><last>Patra</last></editor>
       <editor><first>Graham</first><last>Neubig</last></editor>
       <editor><first>Monojit</first><last>Choudhury</last></editor>
-      <editor><first>Sandipan</first><last>Dandapat</last></editor>
+      <editor id="sandipan-dandapat"><first>Sandipan</first><last>Dandapat</last></editor>
       <editor><first>Sunayana</first><last>Sitaram</last></editor>
       <editor><first>Vishrav</first><last>Chaudhary</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -74,7 +74,7 @@
       <title><fixed-case>I</fixed-case>ndo<fixed-case>R</fixed-case>obusta: Towards Robustness Against Diverse Code-Mixed <fixed-case>I</fixed-case>ndonesian Local Languages</title>
       <author><first>Muhammad Farid</first><last>Adilazuarda</last></author>
       <author><first>Samuel</first><last>Cahyawijaya</last></author>
-      <author><first>Genta Indra</first><last>Winata</last></author>
+      <author id="genta-indra-winata"><first>Genta Indra</first><last>Winata</last></author>
       <author><first>Pascale</first><last>Fung</last></author>
       <author><first>Ayu</first><last>Purwarianti</last></author>
       <pages>25–34</pages>
diff --git a/data/xml/2022.sustainlp.xml b/data/xml/2022.sustainlp.xml
index 3cd454d1d0..1445680922 100644
--- a/data/xml/2022.sustainlp.xml
+++ b/data/xml/2022.sustainlp.xml
@@ -45,7 +45,7 @@
       <title><fixed-case>KGR</fixed-case>efiner: Knowledge Graph Refinement for Improving Accuracy of Translational Link Prediction Methods</title>
       <author><first>Mohammad Javad</first><last>Saeedizade</last></author>
       <author><first>Najmeh</first><last>Torabian</last><affiliation>Na</affiliation></author>
-      <author><first>Behrouz</first><last>Minaei-Bidgoli</last></author>
+      <author id="behrouz-minaei-bidgoli"><first>Behrouz</first><last>Minaei-Bidgoli</last></author>
       <pages>10-16</pages>
       <abstract>Link Prediction is the task of predicting missing relations between knowledge graph entities (KG). Recent work in link prediction mainly attempted to adapt a model to increase link prediction accuracy by using more layers in neural network architecture, which heavily rely on computational resources. This paper proposes the refinement of knowledge graphs to perform link prediction operations more accurately using relatively fast translational models. Translational link prediction models have significantly less complexity than deep learning approaches; this motivated us to improve their accuracy. Our method uses the ontologies of knowledge graphs to add information as auxiliary nodes to the graph. Then, these auxiliary nodes are connected to ordinary nodes of the KG that contain auxiliary information in their hierarchy. Our experiments show that our method can significantly increase the performance of translational link prediction methods in Hit@10, Mean Rank, and Mean Reciprocal Rank.</abstract>
       <url hash="9a2d4973">2022.sustainlp-1.3</url>
@@ -57,7 +57,7 @@
       <title>Algorithmic Diversity and Tiny Models: Comparing Binary Networks and the Fruit Fly Algorithm on Document Representation Tasks</title>
       <author><first>Tanise</first><last>Ceron</last></author>
       <author><first>Nhut</first><last>Truong</last></author>
-      <author><first>Aurelie</first><last>Herbelot</last><affiliation>University of Trento</affiliation></author>
+      <author id="aurelie-herbelot"><first>Aurelie</first><last>Herbelot</last><affiliation>University of Trento</affiliation></author>
       <pages>17-28</pages>
       <abstract>Neural language models have seen a dramatic increase in size in the last years. While many still advocate that ‘bigger is better’, work in model distillation has shown that the number of parameters used by very large networks is actually more than what is required for state-of-the-art performance. This prompts an obvious question: can we build smaller models from scratch, rather than going through the inefficient process of training at scale and subsequently reducing model size. In this paper, we investigate the behaviour of a biologically inspired algorithm, based on the fruit fly’s olfactory system. This algorithm has shown good performance in the past on the task of learning word embeddings. We now put it to the test on the task of semantic hashing. Specifically, we compare the fruit fly to a standard binary network on the task of generating locality-sensitive hashes for text documents, measuring both task performance and energy consumption. Our results indicate that the two algorithms have complementary strengths while showing similar electricity usage.</abstract>
       <url hash="d9df9cb5">2022.sustainlp-1.4</url>
@@ -68,7 +68,7 @@
     <paper id="5">
       <title>Look Ma, Only 400 Samples! Revisiting the Effectiveness of Automatic N-Gram Rule Generation for Spelling Normalization in <fixed-case>F</fixed-case>ilipino</title>
       <author><first>Lorenzo Jaime</first><last>Flores</last></author>
-      <author><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
       <pages>29-35</pages>
       <abstract>With 84.75 million Filipinos online, the ability for models to process online text is crucial for developing Filipino NLP applications. To this end, spelling correction is a crucial preprocessing step for downstream processing. However, the lack of data prevents the use of language models for this task. In this paper, we propose an N-Gram + Damerau-Levenshtein distance model with automatic rule extraction. We train the model on 300 samples, and show that despite limited training data, it achieves good performance and outperforms other deep learning approaches in terms of accuracy and edit distance. Moreover, the model (1) requires little compute power, (2) trains in little time, thus allowing for retraining, and (3) is easily interpretable, allowing for direct troubleshooting, highlighting the success of traditional approaches over more complex deep learning models in settings where data is unavailable.</abstract>
       <url hash="84f2ebbb">2022.sustainlp-1.5</url>
@@ -81,7 +81,7 @@
       <author><first>Young Jin</first><last>Kim</last><affiliation>Microsoft</affiliation></author>
       <author><first>Rawn</first><last>Henry</last></author>
       <author><first>Raffy</first><last>Fahim</last><affiliation>Microsoft</affiliation></author>
-      <author><first>Hany</first><last>Hassan</last><affiliation>Microsoft</affiliation></author>
+      <author id="hany-hassan-awadalla"><first>Hany</first><last>Hassan</last><affiliation>Microsoft</affiliation></author>
       <pages>36-43</pages>
       <abstract>Mixture of Experts (MoE) models with conditional execution of sparsely activated layers has enabled training models with a much larger number of parameters. As a result, these models have achieved significantly better quality on various natural language processing tasks including machine translation. However, it remains challenging to deploy such models in real-life scenarios due to the large memory requirements and inefficient inference. In this work, we introduce a highly efficient inference framework with several optimization approaches to accelerate the computation of sparse models and cut down the memory consumption significantly. While we achieve up to 26x speed-up in terms of throughput, we also reduce the model size almost to one eighth of the original 32-bit float model by quantizing expert weights into 4-bit integers. As a result, we are able to deploy 136x larger models with 27% less cost and significantly better quality with large scale MoE model deployment compared to the existing solutions. This enables a paradigm shift in deploying large scale multilingual MoE transformers models instead of distilling into dozens of smaller models per language or task.</abstract>
       <url hash="0e007543">2022.sustainlp-1.6</url>
@@ -124,8 +124,8 @@
       <author><first>Aili</first><last>Shen</last><affiliation>Amazon</affiliation></author>
       <author><first>Yitong</first><last>Li</last><affiliation>Huawei Technologies Co., Ltd.</affiliation></author>
       <author><first>Lea</first><last>Frermann</last><affiliation>The University of Melbourne</affiliation></author>
-      <author><first>Timothy</first><last>Baldwin</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and The University of Melbourne</affiliation></author>
-      <author><first>Trevor</first><last>Cohn</last><affiliation>The University of Melbourne</affiliation></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and The University of Melbourne</affiliation></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last><affiliation>The University of Melbourne</affiliation></author>
       <pages>65-72</pages>
       <abstract>With the growing prevalence of large-scale language models, their energy footprint and potential to learn and amplify historical biases are two pressing challenges. Dataset distillation (DD) — a method for reducing the dataset size by learning a small number of synthetic samples which encode the information in the original dataset — is a method for reducing the cost of model training, however its impact on fairness has not been studied. We investigate how DD impacts on group bias, with experiments over two language classification tasks, concluding that vanilla DD preserves the bias of the dataset. We then show how existing debiasing methods can be combined with DD to produce models that are fair and accurate, at reduced training cost.</abstract>
       <url hash="aadccd18">2022.sustainlp-1.13</url>
diff --git a/data/xml/2022.tacl.xml b/data/xml/2022.tacl.xml
index 12b0b5382f..b7ec0f6c2e 100644
--- a/data/xml/2022.tacl.xml
+++ b/data/xml/2022.tacl.xml
@@ -38,7 +38,7 @@
       <author><first>Linyong</first><last>Nan</last></author>
       <author><first>Chiachun</first><last>Hsieh</last></author>
       <author><first>Ziming</first><last>Mao</last></author>
-      <author><first>Xi Victoria</first><last>Lin</last></author>
+      <author id="xi-victoria-lin"><first>Xi Victoria</first><last>Lin</last></author>
       <author><first>Neha</first><last>Verma</last></author>
       <author><first>Rui</first><last>Zhang</last></author>
       <author><first>Wojciech</first><last>Kryściński</last></author>
@@ -51,7 +51,7 @@
       <author><first>Renusree</first><last>Bandaru</last></author>
       <author><first>Jacob</first><last>Cunningham</last></author>
       <author><first>Caiming</first><last>Xiong</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <author><first>Dragomir</first><last>Radev</last></author>
       <doi>10.1162/tacl_a_00446</doi>
       <abstract>Existing table question answering datasets contain abundant factual questions that primarily evaluate a QA system’s comprehension of query and tabular data. However, restricted by their short-form answers, these datasets fail to include question–answer interactions that represent more advanced and naturally occurring information needs: questions that ask for reasoning and integration of information pieces retrieved from a structured knowledge source. To complement the existing datasets and to reveal the challenging nature of the table-based question answering task, we introduce FeTaQA, a new dataset with 10K Wikipedia-based table, question, free-form answer, supporting table cells pairs. FeTaQA is collected from noteworthy descriptions of Wikipedia tables that contain information people tend to seek; generation of these descriptions requires advanced processing that humans perform on a daily basis: Understand the question and table, retrieve, integrate, infer, and conduct text planning and surface realization to generate an answer. We provide two benchmark methods for the proposed task: a pipeline method based on semantic parsing-based QA systems and an end-to-end method based on large pretrained text generation models, and show that FeTaQA poses a challenge for both methods.</abstract>
@@ -69,21 +69,21 @@
       <author><first>Nasanbayar</first><last>Ulzii-Orshikh</last></author>
       <author><first>Allahsera</first><last>Tapo</last></author>
       <author><first>Nishant</first><last>Subramani</last></author>
-      <author><first>Artem</first><last>Sokolov</last></author>
+      <author id="artem-sokolov"><first>Artem</first><last>Sokolov</last></author>
       <author><first>Claytone</first><last>Sikasote</last></author>
       <author><first>Monang</first><last>Setyawan</last></author>
       <author><first>Supheakmungkol</first><last>Sarin</last></author>
       <author><first>Sokhar</first><last>Samb</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <author><first>Clara</first><last>Rivera</last></author>
-      <author><first>Annette</first><last>Rios</last></author>
+      <author id="annette-rios-gonzales"><first>Annette</first><last>Rios</last></author>
       <author><first>Isabel</first><last>Papadimitriou</last></author>
       <author><first>Salomey</first><last>Osei</last></author>
       <author><first>Pedro Ortiz</first><last>Suarez</last></author>
       <author><first>Iroro</first><last>Orife</last></author>
       <author><first>Kelechi</first><last>Ogueji</last></author>
       <author><first>Andre Niyongabo</first><last>Rubungo</last></author>
-      <author><first>Toan Q.</first><last>Nguyen</last></author>
+      <author id="toan-q-nguyen"><first>Toan Q.</first><last>Nguyen</last></author>
       <author><first>Mathias</first><last>Müller</last></author>
       <author><first>André</first><last>Müller</last></author>
       <author><first>Shamsuddeen Hassan</first><last>Muhammad</last></author>
@@ -122,7 +122,7 @@
     </paper>
     <paper id="5">
       <title>Canine: Pre-training an Efficient Tokenization-Free Encoder for Language Representation</title>
-      <author><first>Jonathan H.</first><last>Clark</last></author>
+      <author id="jonathan-h-clark"><first>Jonathan H.</first><last>Clark</last></author>
       <author><first>Dan</first><last>Garrette</last></author>
       <author><first>Iulia</first><last>Turc</last></author>
       <author><first>John</first><last>Wieting</last></author>
@@ -160,7 +160,7 @@
     <paper id="8">
       <title>Out-of-Domain Discourse Dependency Parsing via Bootstrapping: An Empirical Analysis on Its Effectiveness and Limitation</title>
       <author><first>Noriki</first><last>Nishida</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <doi>10.1162/tacl_a_00451</doi>
       <abstract>Discourse parsing has been studied for decades. However, it still remains challenging to utilize discourse parsing for real-world applications because the parsing accuracy degrades significantly on out-of-domain text. In this paper, we report and discuss the effectiveness and limitations of bootstrapping methods for adapting modern BERT-based discourse dependency parsers to out-of-domain text without relying on additional human supervision. Specifically, we investigate self-training, co-training, tri-training, and asymmetric tri-training of graph-based and transition-based discourse dependency parsing models, as well as confidence measures and sample selection criteria in two adaptation scenarios: monologue adaptation between scientific disciplines and dialogue genre adaptation. We also release COVID-19 Discourse Dependency Treebank (COVID19-DTB), a new manually annotated resource for discourse dependency parsing of biomedical paper abstracts. The experimental results show that bootstrapping is significantly and consistently effective for unsupervised domain adaptation of discourse dependency parsing, but the low coverage of accurately predicted pseudo labels is a bottleneck for further improvement. We show that active learning can mitigate this limitation.</abstract>
       <pages>127–144</pages>
@@ -187,7 +187,7 @@
       <author><first>Vivek</first><last>Raghavan</last></author>
       <author><first>Anoop</first><last>Kunchukuttan</last></author>
       <author><first>Pratyush</first><last>Kumar</last></author>
-      <author><first>Mitesh Shantadevi</first><last>Khapra</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh Shantadevi</first><last>Khapra</last></author>
       <doi>10.1162/tacl_a_00452</doi>
       <abstract>We present Samanantar, the largest publicly available parallel corpora collection for Indic languages. The collection contains a total of 49.7 million sentence pairs between English and 11 Indic languages (from two language families). Specifically, we compile 12.4 million sentence pairs from existing, publicly available parallel corpora, and additionally mine 37.4 million sentence pairs from the Web, resulting in a 4× increase. We mine the parallel sentences from the Web by combining many corpora, tools, and methods: (a) Web-crawled monolingual corpora, (b) document OCR for extracting sentences from scanned documents, (c) multilingual representation models for aligning sentences, and (d) approximate nearest neighbor search for searching in a large collection of sentences. Human evaluation of samples from the newly mined corpora validate the high quality of the parallel sentences across 11 languages. Further, we extract 83.4 million sentence pairs between all 55 Indic language pairs from the English-centric parallel corpus using English as the pivot language. We trained multilingual NMT models spanning all these languages on Samanantar which outperform existing models and baselines on publicly available benchmarks, such as FLORES, establishing the utility of Samanantar. Our data and models are available publicly at Samanantar and we hope they will help advance research in NMT and multilingual NLP for Indic languages.</abstract>
       <pages>145–162</pages>
@@ -199,8 +199,8 @@
       <title><fixed-case>S</fixed-case>umma<fixed-case>C</fixed-case>: Re-Visiting <fixed-case>NLI</fixed-case>-based Models for Inconsistency Detection in Summarization</title>
       <author><first>Philippe</first><last>Laban</last></author>
       <author><first>Tobias</first><last>Schnabel</last></author>
-      <author><first>Paul N.</first><last>Bennett</last></author>
-      <author><first>Marti A.</first><last>Hearst</last></author>
+      <author id="paul-bennett"><first>Paul N.</first><last>Bennett</last></author>
+      <author id="marti-a-hearst"><first>Marti A.</first><last>Hearst</last></author>
       <doi>10.1162/tacl_a_00453</doi>
       <abstract>In the summarization domain, a key requirement for summaries is to be factually consistent with the input document. Previous work has found that natural language inference (NLI) models do not perform competitively when applied to inconsistency detection. In this work, we revisit the use of NLI for inconsistency detection, finding that past work suffered from a mismatch in input granularity between NLI datasets (sentence-level), and inconsistency detection (document level). We provide a highly effective and light-weight method called SummaCConv that enables NLI models to be successfully used for this task by segmenting documents into sentence units and aggregating scores between pairs of sentences. We furthermore introduce a new benchmark called SummaC (Summary Consistency) which consists of six large inconsistency detection datasets. On this dataset, SummaCConv obtains state-of-the-art results with a balanced accuracy of 74.4%, a 5% improvement compared with prior work.</abstract>
       <pages>163–177</pages>
@@ -211,7 +211,7 @@
     <paper id="11">
       <title>A Survey on Automated Fact-Checking</title>
       <author><first>Zhijiang</first><last>Guo</last></author>
-      <author><first>Michael</first><last>Schlichtkrull</last></author>
+      <author id="michael-schlichtkrull"><first>Michael</first><last>Schlichtkrull</last></author>
       <author><first>Andreas</first><last>Vlachos</last></author>
       <doi>10.1162/tacl_a_00454</doi>
       <abstract>Fact-checking has become increasingly important due to the speed with which both information and misinformation can spread in the modern media ecosystem. Therefore, researchers have been exploring how fact-checking can be automated, using techniques based on natural language processing, machine learning, knowledge representation, and databases to automatically predict the veracity of claims. In this paper, we survey automated fact-checking stemming from natural language processing, and discuss its connections to related tasks and disciplines. In this process, we present an overview of existing datasets and models, aiming to unify the various definitions given and identify common concepts. Finally, we highlight challenges for future research.</abstract>
@@ -263,9 +263,9 @@
       <author><first>Bhuwan</first><last>Dhingra</last></author>
       <author><first>Jeremy R.</first><last>Cole</last></author>
       <author><first>Julian Martin</first><last>Eisenschlos</last></author>
-      <author><first>Daniel</first><last>Gillick</last></author>
+      <author id="dan-gillick"><first>Daniel</first><last>Gillick</last></author>
       <author><first>Jacob</first><last>Eisenstein</last></author>
-      <author><first>William W.</first><last>Cohen</last></author>
+      <author id="william-cohen"><first>William W.</first><last>Cohen</last></author>
       <doi>10.1162/tacl_a_00459</doi>
       <abstract>Many facts come with an expiration date, from the name of the President to the basketball team Lebron James plays for. However, most language models (LMs) are trained on snapshots of data collected at a specific moment in time. This can limit their utility, especially in the closed-book setting where the pretraining corpus must contain the facts the model should memorize. We introduce a diagnostic dataset aimed at probing LMs for factual knowledge that changes over time and highlight problems with LMs at either end of the spectrum—those trained on specific slices of temporal data, as well as those trained on a wide range of temporal data. To mitigate these problems, we propose a simple technique for jointly modeling text with its timestamp. This improves memorization of seen facts from the training time period, as well as calibration on predictions about unseen facts from future time periods. We also show that models trained with temporal context can be efficiently “refreshed” as new data arrives, without the need for retraining from scratch.</abstract>
       <pages>257–273</pages>
@@ -281,7 +281,7 @@
       <author><first>Mikel</first><last>Artetxe</last></author>
       <author><first>Naman</first><last>Goyal</last></author>
       <author><first>Mikhail</first><last>Plekhanov</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <author><first>Nicola</first><last>Cancedda</last></author>
       <author><first>Sebastian</first><last>Riedel</last></author>
       <author><first>Fabio</first><last>Petroni</last></author>
@@ -326,7 +326,7 @@
     <paper id="19">
       <title>Towards General Natural Language Understanding with Probabilistic Worldbuilding</title>
       <author><first>Abulhair</first><last>Saparov</last></author>
-      <author><first>Tom M.</first><last>Mitchell</last></author>
+      <author id="tom-mitchell"><first>Tom M.</first><last>Mitchell</last></author>
       <doi>10.1162/tacl_a_00463</doi>
       <abstract>We introduce the Probabilistic Worldbuilding Model (PWM), a new fully symbolic Bayesian model of semantic parsing and reasoning, as a first step in a research program toward more domain- and task-general NLU and AI. Humans create internal mental models of their observations that greatly aid in their ability to understand and reason about a large variety of problems. In PWM, the meanings of sentences, acquired facts about the world, and intermediate steps in reasoning are all expressed in a human-readable formal language, with the design goal of interpretability. PWM is Bayesian, designed specifically to be able to generalize to new domains and new tasks. We derive and implement an inference algorithm that reads sentences by parsing and abducing updates to its latent world model that capture the semantics of those sentences, and evaluate it on two out-of-domain question-answering datasets: (1) ProofWriter and (2) a new dataset we call FictionalGeoQA, designed to be more representative of real language but still simple enough to focus on evaluating reasoning ability, while being robust against heuristics. Our method outperforms baselines on both, thereby demonstrating its value as a proof-of-concept.</abstract>
       <pages>325–342</pages>
@@ -352,10 +352,10 @@
       <author><first>Rachit</first><last>Bansal</last></author>
       <author><first>Bhuwan</first><last>Dhingra</last></author>
       <author><first>Livio</first><last>Baldini Soares</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
-      <author><first>Zachary C.</first><last>Lipton</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
+      <author id="zachary-c-lipton"><first>Zachary C.</first><last>Lipton</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
-      <author><first>William W.</first><last>Cohen</last></author>
+      <author id="william-cohen"><first>William W.</first><last>Cohen</last></author>
       <doi>10.1162/tacl_a_00465</doi>
       <abstract>While many methods purport to explain predictions by highlighting salient features, what aims these explanations serve and how they ought to be evaluated often go unstated. In this work, we introduce a framework to quantify the value of explanations via the accuracy gains that they confer on a student model trained to simulate a teacher model. Crucially, the explanations are available to the student during training, but are not available at test time. Compared with prior proposals, our approach is less easily gamed, enabling principled, automatic, model-agnostic evaluation of attributions. Using our framework, we compare numerous attribution methods for text classification and question answering, and observe quantitative differences that are consistent (to a moderate to high degree) across different student model architectures and learning strategies.1</abstract>
       <pages>359–375</pages>
@@ -367,9 +367,9 @@
       <title><fixed-case>VILA</fixed-case>: Improving Structured Content Extraction from Scientific <fixed-case>PDF</fixed-case>s Using Visual Layout Groups</title>
       <author><first>Zejiang</first><last>Shen</last></author>
       <author><first>Kyle</first><last>Lo</last></author>
-      <author><first>Lucy Lu</first><last>Wang</last></author>
+      <author id="lucy-lu-wang"><first>Lucy Lu</first><last>Wang</last></author>
       <author><first>Bailey</first><last>Kuehl</last></author>
-      <author><first>Daniel S.</first><last>Weld</last></author>
+      <author id="daniel-s-weld"><first>Daniel S.</first><last>Weld</last></author>
       <author><first>Doug</first><last>Downey</last></author>
       <doi>10.1162/tacl_a_00466</doi>
       <abstract>Accurately extracting structured content from PDFs is a critical first step for NLP over scientific papers. Recent work has improved extraction accuracy by incorporating elementary layout information, for example, each token’s 2D position on the page, into language model pretraining. We introduce new methods that explicitly model VIsual LAyout (VILA) groups, that is, text lines or text blocks, to further improve performance. In our I-VILA approach, we show that simply inserting special tokens denoting layout group boundaries into model inputs can lead to a 1.9% Macro F1 improvement in token classification. In the H-VILA approach, we show that hierarchical encoding of layout-groups can result in up to 47% inference time reduction with less than 0.8% Macro F1 loss. Unlike prior layout-aware approaches, our methods do not require expensive additional pretraining, only fine-tuning, which we show can reduce training cost by up to 95%. Experiments are conducted on a newly curated evaluation suite, S2-VLUE, that unifies existing automatically labeled datasets and includes a new dataset of manual annotations covering diverse papers from 19 scientific disciplines. Pre-trained weights, benchmark datasets, and source code are available at <url>https://github.com/allenai/VILA</url>.</abstract>
@@ -381,7 +381,7 @@
     <paper id="23">
       <title>Data-driven Model Generalizability in Crosslinguistic Low-resource Morphological Segmentation</title>
       <author><first>Zoey</first><last>Liu</last></author>
-      <author><first>Emily</first><last>Prud’hommeaux</last></author>
+      <author id="emily-prudhommeaux"><first>Emily</first><last>Prud’hommeaux</last></author>
       <doi>10.1162/tacl_a_00467</doi>
       <abstract>Common designs of model evaluation typically focus on monolingual settings, where different models are compared according to their performance on a single data set that is assumed to be representative of all possible data for the task at hand. While this may be reasonable for a large data set, this assumption is difficult to maintain in low-resource scenarios, where artifacts of the data collection can yield data sets that are outliers, potentially making conclusions about model performance coincidental. To address these concerns, we investigate model generalizability in crosslinguistic low-resource scenarios. Using morphological segmentation as the test case, we compare three broad classes of models with different parameterizations, taking data from 11 languages across 6 language families. In each experimental setting, we evaluate all models on a first data set, then examine their performance consistency when introducing new randomly sampled data sets with the same size and when applying the trained models to unseen test sets of varying sizes. The results demonstrate that the extent of model generalization depends on the characteristics of the data set, and does not necessarily rely heavily on the data set size. Among the characteristics that we studied, the ratio of morpheme overlap and that of the average number of morphemes per word between the training and test sets are the two most prominent factors. Our findings suggest that future work should adopt random sampling to construct data sets with different sizes in order to make more responsible claims about model evaluation.</abstract>
       <pages>393–413</pages>
@@ -450,7 +450,7 @@
       <author><first>Momchil</first><last>Hardalov</last></author>
       <author><first>Yoan</first><last>Dinkov</last></author>
       <author><first>Isabelle</first><last>Augenstein</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <doi>10.1162/tacl_a_00472</doi>
       <abstract>We propose a novel framework for cross- lingual content flagging with limited target- language data, which significantly outperforms prior work in terms of predictive performance. The framework is based on a nearest-neighbor architecture. It is a modern instantiation of the vanilla k-nearest neighbor model, as we use Transformer representations in all its components. Our framework can adapt to new source- language instances, without the need to be retrained from scratch. Unlike prior work on neighborhood-based approaches, we encode the neighborhood information based on query– neighbor interactions. We propose two encoding schemes and we show their effectiveness using both qualitative and quantitative analysis. Our evaluation results on eight languages from two different datasets for abusive language detection show sizable improvements of up to 9.5 F1 points absolute (for Italian) over strong baselines. On average, we achieve 3.6 absolute F1 points of improvement for the three languages in the Jigsaw Multilingual dataset and 2.14 points for the WUL dataset.</abstract>
       <pages>484–502</pages>
@@ -482,7 +482,7 @@
       <author><first>Da</first><last>Ju</last></author>
       <author><first>Sanjana</first><last>Krishnan</last></author>
       <author><first>Marc’Aurelio</first><last>Ranzato</last></author>
-      <author><first>Francisco</first><last>Guzmán</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzmán</last></author>
       <author><first>Angela</first><last>Fan</last></author>
       <doi>10.1162/tacl_a_00474</doi>
       <abstract>One of the biggest challenges hindering progress in low-resource and multilingual machine translation is the lack of good evaluation benchmarks. Current evaluation benchmarks either lack good coverage of low-resource languages, consider only restricted domains, or are low quality because they are constructed using semi-automatic procedures. In this work, we introduce the Flores-101 evaluation benchmark, consisting of 3001 sentences extracted from English Wikipedia and covering a variety of different topics and domains. These sentences have been translated in 101 languages by professional translators through a carefully controlled process. The resulting dataset enables better assessment of model quality on the long tail of low-resource languages, including the evaluation of many-to-many multilingual translation systems, as all translations are fully aligned. By publicly releasing such a high-quality and high-coverage dataset, we hope to foster progress in the machine translation community and beyond.</abstract>
@@ -508,7 +508,7 @@
       <title>Relational Memory-Augmented Language Models</title>
       <author><first>Qi</first><last>Liu</last></author>
       <author><first>Dani</first><last>Yogatama</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
       <doi>10.1162/tacl_a_00476</doi>
       <abstract>We present a memory-augmented approach to condition an autoregressive language model on a knowledge graph. We represent the graph as a collection of relation triples and retrieve relevant relations for a given context to improve text generation. Experiments on WikiText-103, WMT19, and enwik8 English datasets demonstrate that our approach produces a better language model in terms of perplexity and bits per character. We also show that relational memory improves coherence, is complementary to token-based memory, and enables causal interventions. Our model provides a simple yet effective way to combine an autoregressive language model and a knowledge graph for more coherent and logical generation.</abstract>
       <pages>555–572</pages>
@@ -581,9 +581,9 @@
     <paper id="38">
       <title>Is My Model Using the Right Evidence? Systematic Probes for Examining Evidence-Based Tabular Reasoning</title>
       <author><first>Vivek</first><last>Gupta</last></author>
-      <author><first>Riyaz A.</first><last>Bhat</last></author>
+      <author id="riyaz-ahmad-bhat"><first>Riyaz A.</first><last>Bhat</last></author>
       <author><first>Atreya</first><last>Ghosal</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <author><first>Maneesh</first><last>Singh</last></author>
       <author><first>Vivek</first><last>Srikumar</last></author>
       <doi>10.1162/tacl_a_00482</doi>
@@ -596,9 +596,9 @@
     <paper id="39">
       <title>Uncertainty Estimation and Reduction of Pre-trained Models for Text Regression</title>
       <author><first>Yuxia</first><last>Wang</last></author>
-      <author><first>Daniel</first><last>Beck</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
-      <author><first>Karin</first><last>Verspoor</last></author>
+      <author id="daniel-beck"><first>Daniel</first><last>Beck</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last></author>
       <doi>10.1162/tacl_a_00483</doi>
       <abstract>State-of-the-art classification and regression models are often not well calibrated, and cannot reliably provide uncertainty estimates, limiting their utility in safety-critical applications such as clinical decision-making. While recent work has focused on calibration of classifiers, there is almost no work in NLP on calibration in a regression setting. In this paper, we quantify the calibration of pre- trained language models for text regression, both intrinsically and extrinsically. We further apply uncertainty estimates to augment training data in low-resource domains. Our experiments on three regression tasks in both self-training and active-learning settings show that uncertainty estimation can be used to increase overall performance and enhance model generalization.</abstract>
       <pages>680–696</pages>
@@ -620,7 +620,7 @@
     <paper id="41">
       <title>True Few-Shot Learning with <fixed-case>P</fixed-case>rompts—<fixed-case>A</fixed-case> Real-World Perspective</title>
       <author><first>Timo</first><last>Schick</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <doi>10.1162/tacl_a_00485</doi>
       <abstract>Prompt-based approaches excel at few-shot learning. However, Perez et al. (2021) recently cast doubt on their performance as they had difficulty getting good results in a “true” few-shot setting in which prompts and hyperparameters cannot be tuned on a dev set. In view of this, we conduct an extensive study of Pet, a method that combines textual instructions with example-based finetuning. We show that, if correctly configured, Pet performs strongly in true few-shot settings without a dev set. Crucial for this strong performance is a number of design choices, including Pet’s ability to intelligently handle multiple prompts. We put our findings to a real-world test by running Pet on RAFT, a benchmark of tasks taken from realistic NLP applications for which no labeled dev or test sets are available. Pet achieves a new state of the art on RAFT and performs close to non-expert humans for 7 out of 11 tasks. These results demonstrate that prompt-based learners can successfully be applied in true few-shot settings and underpin our belief that learning from instructions will play an important role on the path towards human-like few-shot learning capabilities.</abstract>
       <pages>716–731</pages>
@@ -631,8 +631,8 @@
     <paper id="42">
       <title>Heterogeneous Supervised Topic Models</title>
       <author><first>Dhanya</first><last>Sridhar</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
-      <author><first>David</first><last>Blei</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
+      <author id="david-blei"><first>David</first><last>Blei</last></author>
       <doi>10.1162/tacl_a_00487</doi>
       <abstract>Researchers in the social sciences are often interested in the relationship between text and an outcome of interest, where the goal is to both uncover latent patterns in the text and predict outcomes for unseen texts. To this end, this paper develops the heterogeneous supervised topic model (HSTM), a probabilistic approach to text analysis and prediction. HSTMs posit a joint model of text and outcomes to find heterogeneous patterns that help with both text analysis and prediction. The main benefit of HSTMs is that they capture heterogeneity in the relationship between text and the outcome across latent topics. To fit HSTMs, we develop a variational inference algorithm based on the auto-encoding variational Bayes framework. We study the performance of HSTMs on eight datasets and find that they consistently outperform related methods, including fine-tuned black-box models. Finally, we apply HSTMs to analyze news articles labeled with pro- or anti-tone. We find evidence of differing language used to signal a pro- and anti-tone.</abstract>
       <pages>732–745</pages>
@@ -707,8 +707,8 @@
       <title>Generate, Annotate, and Learn: <fixed-case>NLP</fixed-case> with Synthetic Text</title>
       <author><first>Xuanli</first><last>He</last></author>
       <author><first>Islam</first><last>Nassar</last></author>
-      <author><first>Jamie</first><last>Kiros</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="jamie-kiros"><first>Jamie</first><last>Kiros</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <author><first>Mohammad</first><last>Norouzi</last></author>
       <doi>10.1162/tacl_a_00492</doi>
       <abstract>This paper studies the use of language models as a source of synthetic unlabeled text for NLP. We formulate a general framework called “generate, annotate, and learn (GAL)” to take advantage of synthetic text within knowledge distillation, self-training, and few-shot learning applications. To generate high-quality task-specific text, we either fine-tune LMs on inputs from the task of interest, or prompt large LMs with few examples. We use the best available classifier to annotate synthetic text with soft pseudo labels for knowledge distillation and self-training, and use LMs to obtain hard labels for few-shot learning. We train new supervised models on the combination of labeled and pseudo-labeled data, which results in significant gains across several applications. We investigate key components of GAL and present theoretical and empirical arguments against the use of class-conditional LMs to generate synthetic labeled text instead of unlabeled text. GAL achieves new state-of-the-art knowledge distillation results for 6-layer transformers on the GLUE leaderboard.</abstract>
@@ -721,7 +721,7 @@
       <title>Saturated Transformers are Constant-Depth Threshold Circuits</title>
       <author><first>William</first><last>Merrill</last></author>
       <author><first>Ashish</first><last>Sabharwal</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <doi>10.1162/tacl_a_00493</doi>
       <abstract>Transformers have become a standard neural network architecture for many NLP problems, motivating theoretical analysis of their power in terms of formal languages. Recent work has shown that transformers with hard attention are quite limited in power (Hahn, 2020), as they can be simulated by constant-depth AND/OR circuits (Hao et al., 2022). However, hard attention is a strong assumption, which may complicate the relevance of these results in practice. In this work, we analyze the circuit complexity of transformers with saturated attention: a generalization of hard attention that more closely captures the attention patterns learnable in practical transformers. We first show that saturated transformers transcend the known limitations of hard-attention transformers. We then prove saturated transformers with floating-point values can be simulated by constant-depth threshold circuits, giving the class TC0 as an upper bound on the formal languages they recognize.</abstract>
       <pages>843–856</pages>
@@ -731,7 +731,7 @@
     </paper>
     <paper id="50">
       <title>Reducing Conversational Agents’ Overconfidence Through Linguistic Calibration</title>
-      <author><first>Sabrina J.</first><last>Mielke</last></author>
+      <author id="sabrina-j-mielke"><first>Sabrina J.</first><last>Mielke</last></author>
       <author><first>Arthur</first><last>Szlam</last></author>
       <author><first>Emily</first><last>Dinan</last></author>
       <author><first>Y-Lan</first><last>Boureau</last></author>
@@ -746,7 +746,7 @@
       <title>A Survey of Text Games for Reinforcement Learning Informed by Natural Language</title>
       <author><first>Philip</first><last>Osborne</last></author>
       <author><first>Heido</first><last>Nõmm</last></author>
-      <author><first>André</first><last>Freitas</last></author>
+      <author id="andre-freitas"><first>André</first><last>Freitas</last></author>
       <doi>10.1162/tacl_a_00495</doi>
       <abstract>Reinforcement Learning has shown success in a number of complex virtual environments. However, many challenges still exist towards solving problems with natural language as a core component. Interactive Fiction Games (or Text Games) are one such problem type that offer a set of safe, partially observable environments where natural language is required as part of the Reinforcement Learning solution. Therefore, this survey’s aim is to assist in the development of new Text Game problem settings and solutions for Reinforcement Learning informed by natural language. Specifically, this survey: 1) introduces the challenges in Text Game Reinforcement Learning problems, 2) outlines the generation tools for rendering Text Games and the subsequent environments generated, and 3) compares the agent architectures currently applied to provide a systematic review of benchmark methodologies and opportunities for future researchers.</abstract>
       <pages>873–887</pages>
@@ -779,7 +779,7 @@
       <title>Learning <fixed-case>E</fixed-case>nglish with <fixed-case>P</fixed-case>eppa <fixed-case>P</fixed-case>ig</title>
       <author><first>Mitja</first><last>Nikolaus</last></author>
       <author><first>Afra</first><last>Alishahi</last></author>
-      <author><first>Grzegorz</first><last>Chrupała</last></author>
+      <author id="grzegorz-chrupala"><first>Grzegorz</first><last>Chrupała</last></author>
       <doi>10.1162/tacl_a_00498</doi>
       <abstract>Recent computational models of the acquisition of spoken language via grounding in perception exploit associations between spoken and visual modalities and learn to represent speech and visual data in a joint vector space. A major unresolved issue from the point of ecological validity is the training data, typically consisting of images or videos paired with spoken descriptions of what is depicted. Such a setup guarantees an unrealistically strong correlation between speech and the visual data. In the real world the coupling between the linguistic and the visual modality is loose, and often confounded by correlations with non-semantic aspects of the speech signal. Here we address this shortcoming by using a dataset based on the children’s cartoon Peppa Pig. We train a simple bi-modal architecture on the portion of the data consisting of dialog between characters, and evaluate on segments containing descriptive narrations. Despite the weak and confounded signal in this training data, our model succeeds at learning aspects of the visual semantics of spoken language.</abstract>
       <pages>922–936</pages>
@@ -804,7 +804,7 @@
       <title>Adapting to the Long Tail: A Meta-Analysis of Transfer Learning Research for Language Understanding Tasks</title>
       <author><first>Aakanksha</first><last>Naik</last></author>
       <author><first>Jill</first><last>Lehman</last></author>
-      <author><first>Carolyn</first><last>Rosé</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rosé</last></author>
       <doi>10.1162/tacl_a_00500</doi>
       <abstract>Natural language understanding (NLU) has made massive progress driven by large benchmarks, but benchmarks often leave a long tail of infrequent phenomena underrepresented. We reflect on the question: Have transfer learning methods sufficiently addressed the poor performance of benchmark-trained models on the long tail? We conceptualize the long tail using macro-level dimensions (underrepresented genres, topics, etc.), and perform a qualitative meta-analysis of 100 representative papers on transfer learning research for NLU. Our analysis asks three questions: (i) Which long tail dimensions do transfer learning studies target? (ii) Which properties of adaptation methods help improve performance on the long tail? (iii) Which methodological gaps have greatest negative impact on long tail performance? Our answers highlight major avenues for future research in transfer learning for the long tail. Lastly, using our meta-analysis framework, we perform a case study comparing the performance of various adaptation methods on clinical narratives, which provides interesting insights that may enable us to make progress along these future avenues.</abstract>
       <pages>956–980</pages>
@@ -815,7 +815,7 @@
       <title>How to Dissect a <fixed-case>M</fixed-case>uppet: The Structure of Transformer Embedding Spaces</title>
       <author><first>Timothee</first><last>Mickus</last></author>
       <author><first>Denis</first><last>Paperno</last></author>
-      <author><first>Mathieu</first><last>Constant</last></author>
+      <author id="matthieu-constant"><first>Mathieu</first><last>Constant</last></author>
       <doi>10.1162/tacl_a_00501</doi>
       <abstract>Pretrained embeddings based on the Transformer architecture have taken the NLP community by storm. We show that they can mathematically be reframed as a sum of vector factors and showcase how to use this reframing to study the impact of each component. We provide evidence that multi-head attentions and feed-forwards are not equally useful in all downstream applications, as well as a quantitative overview of the effects of finetuning on the overall embedding space. This approach allows us to draw connections to a wide range of previous studies, from vector space anisotropy to attention weights.</abstract>
       <pages>981–996</pages>
@@ -851,7 +851,7 @@
       <author><first>Arabella</first><last>Sinclair</last></author>
       <author><first>Jaap</first><last>Jumelet</last></author>
       <author><first>Willem</first><last>Zuidema</last></author>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <doi>10.1162/tacl_a_00504</doi>
       <abstract>We investigate the extent to which modern neural language models are susceptible to structural priming, the phenomenon whereby the structure of a sentence makes the same structure more probable in a follow-up sentence. We explore how priming can be used to study the potential of these models to learn abstract structural information, which is a prerequisite for good performance on tasks that require natural language understanding skills. We introduce a novel metric and release Prime-LM, a large corpus where we control for various linguistic factors that interact with priming strength. We find that Transformer models indeed show evidence of structural priming, but also that the generalizations they learned are to some extent modulated by semantic information. Our experiments also show that the representations acquired by the models may not only encode abstract sequential structure but involve certain level of hierarchical syntactic information. More generally, our study shows that the priming paradigm is a useful, additional tool for gaining insights into the capacities of language models and opens the door to future priming-based investigations that probe the model’s internal states.1</abstract>
       <pages>1031–1050</pages>
@@ -867,7 +867,7 @@
       <author><first>Hugo</first><last>Laurençon</last></author>
       <author><first>Salah</first><last>Zaiem</last></author>
       <author><first>Abdelrahman</first><last>Mohamed</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <author><first>Emmanuel</first><last>Dupoux</last></author>
       <doi>10.1162/tacl_a_00505</doi>
       <abstract>Finding word boundaries in continuous speech is challenging as there is little or no equivalent of a ‘space’ delimiter between words. Popular Bayesian non-parametric models for text segmentation (Goldwater et al., 2006, 2009) use a Dirichlet process to jointly segment sentences and build a lexicon of word types. We introduce DP-Parse, which uses similar principles but only relies on an instance lexicon of word tokens, avoiding the clustering errors that arise with a lexicon of word types. On the Zero Resource Speech Benchmark 2017, our model sets a new speech segmentation state-of-the-art in 5 languages. The algorithm monotonically improves with better input representations, achieving yet higher scores when fed with weakly supervised inputs. Despite lacking a type lexicon, DP-Parse can be pipelined to a language model and learn semantic and syntactic representations as assessed by a new spoken word embedding benchmark. 1</abstract>
@@ -908,7 +908,7 @@
       <author><first>Marco</first><last>Valentino</last></author>
       <author><first>Deborah</first><last>Ferreira</last></author>
       <author><first>Julia</first><last>Rozanova</last></author>
-      <author><first>André</first><last>Freitas</last></author>
+      <author id="andre-freitas"><first>André</first><last>Freitas</last></author>
       <doi>10.1162/tacl_a_00508</doi>
       <abstract>This paper presents Diff-Explainer, the first hybrid framework for explainable multi-hop inference that integrates explicit constraints with neural architectures through differentiable convex optimization. Specifically, Diff- Explainer allows for the fine-tuning of neural representations within a constrained optimization framework to answer and explain multi-hop questions in natural language. To demonstrate the efficacy of the hybrid framework, we combine existing ILP-based solvers for multi-hop Question Answering (QA) with Transformer-based representations. An extensive empirical evaluation on scientific and commonsense QA tasks demonstrates that the integration of explicit constraints in a end-to-end differentiable framework can significantly improve the performance of non- differentiable ILP solvers (8.91%–13.3%). Moreover, additional analysis reveals that Diff-Explainer is able to achieve strong performance when compared to standalone Transformers and previous multi-hop approaches while still providing structured explanations in support of its predictions.</abstract>
       <pages>1103–1119</pages>
@@ -938,7 +938,7 @@
       <author><first>Justin</first><last>Grimmer</last></author>
       <author><first>Roi</first><last>Reichart</last></author>
       <author><first>Margaret E.</first><last>Roberts</last></author>
-      <author><first>Brandon M.</first><last>Stewart</last></author>
+      <author id="brandon-m-stewart"><first>Brandon M.</first><last>Stewart</last></author>
       <author><first>Victor</first><last>Veitch</last></author>
       <author><first>Diyi</first><last>Yang</last></author>
       <doi>10.1162/tacl_a_00511</doi>
@@ -967,7 +967,7 @@
       <author><first>Shutong</first><last>Feng</last></author>
       <author><first>Christian</first><last>Geishauser</last></author>
       <author><first>Hsien-Chin</first><last>Lin</last></author>
-      <author><first>Milica</first><last>Gašić</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gašić</last></author>
       <doi>10.1162/tacl_a_00513</doi>
       <abstract>Generalizing dialogue state tracking (DST) to new data is especially challenging due to the strong reliance on abundant and fine-grained supervision during training. Sample sparsity, distributional shift, and the occurrence of new concepts and topics frequently lead to severe performance degradation during inference. In this paper we propose a training strategy to build extractive DST models without the need for fine-grained manual span labels. Two novel input-level dropout methods mitigate the negative impact of sample sparsity. We propose a new model architecture with a unified encoder that supports value as well as slot independence by leveraging the attention mechanism. We combine the strengths of triple copy strategy DST and value matching to benefit from complementary predictions without violating the principle of ontology independence. Our experiments demonstrate that an extractive DST model can be trained without manual span labels. Our architecture and training strategies improve robustness towards sample sparsity, new concepts, and topics, leading to state-of-the-art performance on a range of benchmarks. We further highlight our model’s ability to effectively learn from non-dialogue data.</abstract>
       <pages>1175–1192</pages>
@@ -1087,7 +1087,7 @@
     <paper id="78">
       <title>Investigating Reasons for Disagreement in Natural Language Inference</title>
       <author><first>Nan-Jiang</first><last>Jiang</last></author>
-      <author><first>Marie-Catherine</first><last>de Marneffe</last></author>
+      <author id="marie-catherine-de-marneffe"><first>Marie-Catherine</first><last>de Marneffe</last></author>
       <doi>10.1162/tacl_a_00523</doi>
       <abstract>We investigate how disagreement in natural language inference (NLI) annotation arises. We developed a taxonomy of disagreement sources with 10 categories spanning 3 high- level classes. We found that some disagreements are due to uncertainty in the sentence meaning, others to annotator biases and task artifacts, leading to different interpretations of the label distribution. We explore two modeling approaches for detecting items with potential disagreement: a 4-way classification with a “Complicated” label in addition to the three standard NLI labels, and a multilabel classification approach. We found that the multilabel classification is more expressive and gives better recall of the possible interpretations in the data.</abstract>
       <pages>1357–1374</pages>
@@ -1123,8 +1123,8 @@
       <author><first>Samuel</first><last>Barrett</last></author>
       <author><first>Adhiguna</first><last>Kuncoro</last></author>
       <author><first>Miloš</first><last>Stanojević</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <doi>10.1162/tacl_a_00526</doi>
       <abstract>We introduce Transformer Grammars (TGs), a novel class of Transformer language models that combine (i) the expressive power, scalability, and strong performance of Transformers and (ii) recursive syntactic compositions, which here are implemented through a special attention mask and deterministic transformation of the linearized tree. We find that TGs outperform various strong baselines on sentence-level language modeling perplexity, as well as on multiple syntax-sensitive language modeling evaluation metrics. Additionally, we find that the recursive syntactic composition bottleneck which represents each sentence as a single vector harms perplexity on document-level language modeling, providing evidence that a different kind of memory mechanism—one that is independent of composed syntactic representations—plays an important role in current successful models of long text.</abstract>
       <pages>1423–1439</pages>
@@ -1159,7 +1159,7 @@
       <author><first>Nouha</first><last>Dziri</last></author>
       <author><first>Ehsan</first><last>Kamalloo</last></author>
       <author><first>Sivan</first><last>Milton</last></author>
-      <author><first>Osmar</first><last>Zaiane</last></author>
+      <author id="osmar-r-zaiane"><first>Osmar</first><last>Zaiane</last></author>
       <author><first>Mo</first><last>Yu</last></author>
       <author><first>Edoardo M.</first><last>Ponti</last></author>
       <author><first>Siva</first><last>Reddy</last></author>
diff --git a/data/xml/2022.tal.xml b/data/xml/2022.tal.xml
index 98e71cae41..2bb394a9f4 100644
--- a/data/xml/2022.tal.xml
+++ b/data/xml/2022.tal.xml
@@ -3,9 +3,9 @@
   <volume id="1" ingest-date="2023-07-25" type="proceedings">
     <meta>
       <booktitle>Traitement Automatique des Langues, Volume 63, Numéro 1 : Varia [Varia]</booktitle>
-      <editor><first>Cécile</first><last>Fabre</last></editor>
+      <editor id="cecile-fabre"><first>Cécile</first><last>Fabre</last></editor>
       <editor><first>Emmanuel</first><last>Morin</last></editor>
-      <editor><first>Sophie</first><last>Rosset</last></editor>
+      <editor id="sophie-rosset"><first>Sophie</first><last>Rosset</last></editor>
       <editor><first>Pascale</first><last>Sébillot</last></editor>
       <publisher>ATALA (Association pour le Traitement Automatique des Langues)</publisher>
       <address>France</address>
@@ -40,7 +40,7 @@
       <title>Survey on Narrative Structure: from Linguistic Theories to Automatic Extraction Approaches</title>
       <author><first>Aman</first><last>Berhe</last></author>
       <author><first>Camille</first><last>Guinaudeau</last></author>
-      <author><first>Claude</first><last>Barras</last></author>
+      <author id="claude-barras"><first>Claude</first><last>Barras</last></author>
       <pages>63–87</pages>
       <url hash="95212c9e">2022.tal-1.3</url>
       <bibkey>berhe-etal-2022-survey</bibkey>
@@ -50,7 +50,7 @@
     <meta>
       <booktitle>Traitement Automatique des Langues, Volume 63, Numéro 2 : Traitement automatique des langues intermodal et multimodal [Cross-modal and multimodal natural language processing]</booktitle>
       <editor><first>Gwénolé</first><last>Lecorvé</last></editor>
-      <editor><first>John D.</first><last>Kelleher</last></editor>
+      <editor id="john-kelleher"><first>John D.</first><last>Kelleher</last></editor>
       <publisher>ATALA (Association pour le Traitement Automatique des Langues)</publisher>
       <address>France</address>
       <year>2022</year>
@@ -76,7 +76,7 @@
       <author><first>Camille</first><last>Guinaudeau</last></author>
       <author><first>Hervé</first><last>Le Borgne</last></author>
       <author><first>Romaric</first><last>Besançon</last></author>
-      <author><first>Jose G.</first><last>Moreno</last></author>
+      <author id="jose-g-moreno"><first>Jose G.</first><last>Moreno</last></author>
       <author><first>Jesús</first><last>Lovón Melgarejo</last></author>
       <pages>15–39</pages>
       <url hash="67446ff2">2022.tal-2.2</url>
@@ -112,9 +112,9 @@
   <volume id="3" ingest-date="2023-07-25" type="proceedings">
     <meta>
       <booktitle>Traitement Automatique des Langues, Volume 63, Numéro 3 : Etats de l'art en TAL [Review articles in NLP]</booktitle>
-      <editor><first>Cécile</first><last>Fabre</last></editor>
+      <editor id="cecile-fabre"><first>Cécile</first><last>Fabre</last></editor>
       <editor><first>Emmanuel</first><last>Morin</last></editor>
-      <editor><first>Sophie</first><last>Rosset</last></editor>
+      <editor id="sophie-rosset"><first>Sophie</first><last>Rosset</last></editor>
       <editor><first>Pascale</first><last>Sébillot</last></editor>
       <publisher>ATALA (Association pour le Traitement Automatique des Langues)</publisher>
       <address>France</address>
@@ -150,7 +150,7 @@
       <title>Fillers in Spoken Language Understanding: Computational and Psycholinguistic Perspectives</title>
       <author><first>Tanvi</first><last>Dinkar</last></author>
       <author><first>Chloé</first><last>Clavel</last></author>
-      <author><first>Ioana</first><last>Vasilescu</last></author>
+      <author id="ioana-vasilescu"><first>Ioana</first><last>Vasilescu</last></author>
       <pages>37–62</pages>
       <url hash="4f40768e">2022.tal-3.3</url>
       <bibkey>dinkar-etal-2022-fillers</bibkey>
diff --git a/data/xml/2022.tdle.xml b/data/xml/2022.tdle.xml
index 4401b68594..bad865cd6b 100644
--- a/data/xml/2022.tdle.xml
+++ b/data/xml/2022.tdle.xml
@@ -6,7 +6,7 @@
       <editor><first>Itziar</first><last>Aldabe</last></editor>
       <editor><first>Begoña</first><last>Altuna</last></editor>
       <editor><first>Aritz</first><last>Farwell</last></editor>
-      <editor><first>German</first><last>Rigau</last></editor>
+      <editor id="german-rigau"><first>German</first><last>Rigau</last></editor>
       <publisher>European Language Resources Association</publisher>
       <address>Marseille, France</address>
       <month>June</month>
@@ -24,7 +24,7 @@
       <author><first>Owen</first><last>Gallagher</last></author>
       <author><first>Georg</first><last>Rehm</last></author>
       <author><first>Maria</first><last>Giagkou</last></author>
-      <author><first>Stelios</first><last>Piperidis</last></author>
+      <author id="stelios-piperidis"><first>Stelios</first><last>Piperidis</last></author>
       <author><first>Jane</first><last>Dunne</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <pages>1–12</pages>
@@ -44,8 +44,8 @@
     <paper id="3">
       <title>Collaborative Metadata Aggregation and Curation in Support of Digital Language Equality Monitoring</title>
       <author><first>Maria</first><last>Giagkou</last></author>
-      <author><first>Stelios</first><last>Piperidis</last></author>
-      <author><first>Penny</first><last>Labropoulou</last></author>
+      <author id="stelios-piperidis"><first>Stelios</first><last>Piperidis</last></author>
+      <author id="penny-labropoulou"><first>Penny</first><last>Labropoulou</last></author>
       <author><first>Miltos</first><last>Deligiannis</last></author>
       <author><first>Athanasia</first><last>Kolovou</last></author>
       <author><first>Leon</first><last>Voukoutis</last></author>
@@ -69,7 +69,7 @@
       <author><first>Daša</first><last>Farkaš</last></author>
       <author><first>Matea</first><last>Filko</last></author>
       <author><first>Artūrs</first><last>Vasiļevskis</last></author>
-      <author><first>Andrejs</first><last>Vasiļjevs</last></author>
+      <author id="andrejs-vasiljevs"><first>Andrejs</first><last>Vasiļjevs</last></author>
       <author><first>Jānis</first><last>Ziediņš</last></author>
       <author><first>Željka</first><last>Motika</last></author>
       <author><first>Mark</first><last>Fishel</last></author>
@@ -90,7 +90,7 @@
       <author><first>Carmen</first><last>Magariños</last></author>
       <author><first>Adina Ioana</first><last>Vladu</last></author>
       <author><first>John E.</first><last>Ortega</last></author>
-      <author><first>José Ramom</first><last>Pichel</last></author>
+      <author id="jose-ramom-pichel-campos"><first>José Ramom</first><last>Pichel</last></author>
       <author><first>Marcos</first><last>García</last></author>
       <author><first>Pablo</first><last>Gamallo</last></author>
       <author><first>Elisa</first><last>Fernández Rei</last></author>
diff --git a/data/xml/2022.term.xml b/data/xml/2022.term.xml
index 703d8b5b29..17781d15e1 100644
--- a/data/xml/2022.term.xml
+++ b/data/xml/2022.term.xml
@@ -6,7 +6,7 @@
       <editor><first>Rute</first><last>Costa</last></editor>
       <editor><first>Sara</first><last>Carvalho</last></editor>
       <editor><first>Ana Ostroški</first><last>Anić</last></editor>
-      <editor><first>Anas Fahad</first><last>Khan</last></editor>
+      <editor id="fahad-khan"><first>Anas Fahad</first><last>Khan</last></editor>
       <publisher>European Language Resources Association</publisher>
       <address>Marseille, France</address>
       <month>June</month>
@@ -30,7 +30,7 @@
       <title>Knowledge Representation and Language Simplification of Human Rights</title>
       <author><first>Sara</first><last>Silecchia</last></author>
       <author><first>Federica</first><last>Vezzani</last></author>
-      <author><first>Giorgio Maria</first><last>Di Nunzio</last></author>
+      <author id="giorgio-maria-di-nunzio"><first>Giorgio Maria</first><last>Di Nunzio</last></author>
       <pages>8–12</pages>
       <abstract>In this paper, we propose the description of a very recent interdisciplinary project aiming at analysing both the conceptual and linguistic dimensions of humanitarian rights terminology. This analysis will result in the form of a new knowledge-based multilingual terminological resource which is designed in order to meet the FAIR principles for Open Science and will serve, in the future, as a prototype for the development of a new software for the simplified rewriting of international legal texts relating to human rights. Given the early stage of the project, we will focus on the description of its rationale, the planned workflow, and the theoretical approach which will be adopted to achieve the main goal of this ambitious research project.</abstract>
       <url hash="18bf9f03">2022.term-1.2</url>
@@ -51,7 +51,7 @@
       <title>A Dataset for Term Extraction in <fixed-case>H</fixed-case>indi</title>
       <author><first>Shubhanker</first><last>Banerjee</last></author>
       <author><first>Bharathi Raja</first><last>Chakravarthi</last></author>
-      <author><first>John Philip</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John Philip</first><last>McCrae</last></author>
       <pages>19–25</pages>
       <abstract>Automatic Term Extraction (ATE) is one of the core problems in natural language processing and forms a key component of text mining pipelines of domain specific corpora. Complex low-level tasks such as machine translation and summarization for domain specific texts necessitate the use of term extraction systems. However, the development of these systems requires the use of large annotated datasets and thus there has been little progress made on this front for under-resourced languages. As a part of ongoing research, we present a dataset for term extraction from Hindi texts in this paper. To the best of our knowledge, this is the first dataset that provides term annotated documents for Hindi. Furthermore, we have evaluated this dataset on statistical term extraction methods and the results obtained indicate the problems associated with development of term extractors for under-resourced languages.</abstract>
       <url hash="65690975">2022.term-1.4</url>
@@ -73,7 +73,7 @@
       <author><first>Peter</first><last>Lundberg</last></author>
       <author><first>Tomas</first><last>Bjerner</last></author>
       <author><first>Yosef</first><last>Al-Abasse</last></author>
-      <author><first>Arne</first><last>Jonsson</last></author>
+      <author id="arne-jonsson"><first>Arne</first><last>Jonsson</last></author>
       <author><first>Thomas</first><last>Vakili</last></author>
       <pages>30–32</pages>
       <abstract>In the experiments briefly presented in this abstract, we compare the performance of a generalist Swedish pre-trained language model with a domain-specific Swedish pre-trained model on the downstream task of focussed terminology extraction of implant terms, which are terms that indicate the presence of implants in the body of patients. The fine-tuning is identical for both models. For the search strategy we rely on KD-Tree that we feed with two different lists of term seeds, one with noise and one without noise. Results shows that the use of a domain-specific pre-trained language model has a positive impact on focussed terminology extraction only when using term seeds without noise.</abstract>
@@ -83,7 +83,7 @@
     <paper id="7">
       <title><fixed-case>D</fixed-case>-Terminer: Online Demo for Monolingual and Bilingual Automatic Term Extraction</title>
       <author><first>Ayla</first><last>Rigouts Terryn</last></author>
-      <author><first>Veronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Veronique</first><last>Hoste</last></author>
       <author><first>Els</first><last>Lefever</last></author>
       <pages>33–40</pages>
       <abstract>This contribution presents D-Terminer: an open access, online demo for monolingual and multilingual automatic term extraction from parallel corpora. The monolingual term extraction is based on a recurrent neural network, with a supervised methodology that relies on pretrained embeddings. Candidate terms can be tagged in their original context and there is no need for a large corpus, as the methodology will work even for single sentences. With the bilingual term extraction from parallel corpora, potentially equivalent candidate term pairs are extracted from translation memories and manual annotation of the results shows that good equivalents are found for most candidate terms. Accompanying the release of the demo is an updated version of the ACTER Annotated Corpora for Term Extraction Research (version 1.5).</abstract>
diff --git a/data/xml/2022.textgraphs.xml b/data/xml/2022.textgraphs.xml
index 0cd99ae4d7..6b262972b5 100644
--- a/data/xml/2022.textgraphs.xml
+++ b/data/xml/2022.textgraphs.xml
@@ -69,7 +69,7 @@
       <title><fixed-case>GUSUM</fixed-case>: Graph-based Unsupervised Summarization Using Sentence Features Scoring and Sentence-<fixed-case>BERT</fixed-case></title>
       <author><first>Tuba</first><last>Gokhan</last></author>
       <author><first>Phillip</first><last>Smith</last></author>
-      <author><first>Mark</first><last>Lee</last></author>
+      <author id="mark-lee"><first>Mark</first><last>Lee</last></author>
       <pages>44–53</pages>
       <abstract>Unsupervised extractive document summarization aims to extract salient sentences from a document without requiring a labelled corpus. In existing graph-based methods, vertex and edge weights are usually created by calculating sentence similarities. In this paper, we develop a Graph-Based Unsupervised Summarization(GUSUM) method for extractive text summarization based on the principle of including the most important sentences while excluding sentences with similar meanings in the summary. We modify traditional graph ranking algorithms with recent sentence embedding models and sentence features and modify how sentence centrality is computed. We first define the sentence feature scores represented at the vertices, indicating the importance of each sentence in the document. After this stage, we use Sentence-BERT for obtaining sentence embeddings to better capture the sentence meaning. In this way, we define the edges of a graph where semantic similarities are represented. Next we create an undirected graph that includes sentence significance and similarities between sentences. In the last stage, we determine the most important sentences in the document with the ranking method we suggested on the graph created. Experiments on CNN/Daily Mail, New York Times, arXiv, and PubMed datasets show our approach achieves high performance on unsupervised graph-based summarization when evaluated both automatically and by humans.</abstract>
       <url hash="a92de595">2022.textgraphs-1.5</url>
@@ -87,7 +87,7 @@
       <title>Text-Aware Graph Embeddings for Donation Behavior Prediction</title>
       <author><first>MeiXing</first><last>Dong</last></author>
       <author><first>Xueming</first><last>Xu</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>60–69</pages>
       <abstract>Predicting user behavior is essential for a large number of applications including recommender and dialog systems, and more broadly in domains such as healthcare, education, and economics. In this paper, we show that we can effectively predict donation behavior by using text-aware graph models, building upon graphs that connect user behaviors and their interests. Using a university donation dataset, we show that the graph representation significantly improves over learning from textual representations. Moreover, we show how incorporating implicit information inferred from text associated with the graph entities brings additional improvements. Our results demonstrate the role played by text-aware graph representations in predicting donation behavior.</abstract>
       <url hash="6301d0bc">2022.textgraphs-1.7</url>
@@ -97,7 +97,7 @@
       <title>Word Sense Disambiguation of <fixed-case>F</fixed-case>rench Lexicographical Examples Using Lexical Networks</title>
       <author><first>Aman</first><last>Sinha</last></author>
       <author><first>Sandrine</first><last>Ollinger</last></author>
-      <author><first>Mathieu</first><last>Constant</last></author>
+      <author id="matthieu-constant"><first>Mathieu</first><last>Constant</last></author>
       <pages>70–76</pages>
       <abstract>This paper focuses on the task of word sense disambiguation (WSD) on lexicographic examples relying on the French Lexical Network (fr-LN). For this purpose, we exploit the lexical and relational properties of the network, that we integrated in a feedforward neural WSD model on top of pretrained French BERT embeddings. We provide a comparative study with various models and further show the impact of our approach regarding polysemic units.</abstract>
       <url hash="463bcec4">2022.textgraphs-1.8</url>
@@ -130,7 +130,7 @@
       <author><first>Marco</first><last>Valentino</last></author>
       <author><first>Deborah</first><last>Ferreira</last></author>
       <author><first>Mokanarangan</first><last>Thayaparan</last></author>
-      <author><first>André</first><last>Freitas</last></author>
+      <author id="andre-freitas"><first>André</first><last>Freitas</last></author>
       <author><first>Dmitry</first><last>Ustalov</last></author>
       <pages>105–113</pages>
       <abstract>The Shared Task on Natural Language Premise Selection (NLPS) asks participants to retrieve the set of premises that are most likely to be useful for proving a given mathematical statement from a supporting knowledge base. While previous editions of the TextGraphs shared tasks series targeted multi-hop inference for explanation regeneration in the context of science questions (Thayaparan et al., 2021; Jansen and Ustalov, 2020, 2019), NLPS aims to assess the ability of state-of-the-art approaches to operate on a mixture of natural and mathematical language and model complex multi-hop reasoning dependencies between statements. To this end, this edition of the shared task makes use of a large set of approximately 21k mathematical statements extracted from the PS-ProofWiki dataset (Ferreira and Freitas, 2020a). In this summary paper, we present the results of the 1st edition of the NLPS task, providing a description of the evaluation data, and the participating systems. Additionally, we perform a detailed analysis of the results, evaluating various aspects involved in mathematical language processing and multi-hop inference. The best-performing system achieved a MAP of 15.39, improving the performance of a TF-IDF baseline by approximately 3.0 MAP.</abstract>
diff --git a/data/xml/2022.trac.xml b/data/xml/2022.trac.xml
index 55a5b29012..ec7afaa2d0 100644
--- a/data/xml/2022.trac.xml
+++ b/data/xml/2022.trac.xml
@@ -4,9 +4,9 @@
     <meta>
       <booktitle>Proceedings of the Third Workshop on Threat, Aggression and Cyberbullying (TRAC 2022)</booktitle>
       <editor><first>Ritesh</first><last>Kumar</last></editor>
-      <editor><first>Atul Kr.</first><last>Ojha</last></editor>
+      <editor id="atul-kr-ojha"><first>Atul Kr.</first><last>Ojha</last></editor>
       <editor><first>Marcos</first><last>Zampieri</last></editor>
-      <editor><first>Shervin</first><last>Malmasi</last></editor>
+      <editor id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></editor>
       <editor><first>Daniel</first><last>Kadar</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Gyeongju, Republic of Korea</address>
@@ -62,7 +62,7 @@
     <paper id="5">
       <title>The Role of Context in Detecting the Target of Hate Speech</title>
       <author><first>Ilia</first><last>Markov</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>37–42</pages>
       <abstract>Online hate speech detection is an inherently challenging task that has recently received much attention from the natural language processing community. Despite a substantial increase in performance, considerable challenges remain and include encoding contextual information into automated hate speech detection systems. In this paper, we focus on detecting the target of hate speech in Dutch social media: whether a hateful Facebook comment is directed against migrants or not (i.e., against someone else). We manually annotate the relevant conversational context and investigate the effect of different aspects of context on performance when adding it to a Dutch transformer-based pre-trained language model, BERTje. We show that performance of the model can be significantly improved by integrating relevant contextual information.</abstract>
       <url hash="6e6c87ec">2022.trac-1.5</url>
@@ -82,7 +82,7 @@
       <title>Is More Data Better? Re-thinking the Importance of Efficiency in Abusive Language Detection with Transformers-Based Active Learning</title>
       <author><first>Hannah</first><last>Kirk</last></author>
       <author><first>Bertie</first><last>Vidgen</last></author>
-      <author><first>Scott A.</first><last>Hale</last></author>
+      <author id="scott-a-hale"><first>Scott A.</first><last>Hale</last></author>
       <pages>52–61</pages>
       <abstract>Annotating abusive language is expensive, logistically complex and creates a risk of psychological harm. However, most machine learning research has prioritized maximizing effectiveness (i.e., F1 or accuracy score) rather than data efficiency (i.e., minimizing the amount of data that is annotated). In this paper, we use simulated experiments over two datasets at varying percentages of abuse to demonstrate that transformers-based active learning is a promising approach to substantially raise efficiency whilst still maintaining high effectiveness, especially when abusive content is a smaller percentage of the dataset. This approach requires a fraction of labeled data to reach performance equivalent to training over the full dataset.</abstract>
       <url hash="e8b53d2c">2022.trac-1.7</url>
diff --git a/data/xml/2022.trustnlp.xml b/data/xml/2022.trustnlp.xml
index 4c1dbe3aab..3e86400815 100644
--- a/data/xml/2022.trustnlp.xml
+++ b/data/xml/2022.trustnlp.xml
@@ -48,7 +48,7 @@
     </paper>
     <paper id="3">
       <title>Does Moral Code have a Moral Code? Probing Delphi’s Moral Philosophy</title>
-      <author><first>Kathleen C.</first><last>Fraser</last></author>
+      <author id="kathleen-c-fraser"><first>Kathleen C.</first><last>Fraser</last></author>
       <author><first>Svetlana</first><last>Kiritchenko</last></author>
       <author><first>Esma</first><last>Balkir</last></author>
       <pages>26-42</pages>
@@ -108,7 +108,7 @@
       <author><first>Esma</first><last>Balkir</last></author>
       <author><first>Svetlana</first><last>Kiritchenko</last></author>
       <author><first>Isar</first><last>Nejadgholi</last></author>
-      <author><first>Kathleen</first><last>Fraser</last></author>
+      <author id="kathleen-c-fraser"><first>Kathleen</first><last>Fraser</last></author>
       <pages>80-92</pages>
       <abstract>Motivations for methods in explainable artificial intelligence (XAI) often include detecting, quantifying and mitigating bias, and contributing to making machine learning models fairer. However, exactly how an XAI method can help in combating biases is often left unspecified. In this paper, we briefly review trends in explainability and fairness in NLP research, identify the current practices in which explainability methods are applied to detect and mitigate bias, and investigate the barriers preventing XAI methods from being used more widely in tackling fairness issues.</abstract>
       <url hash="ceddb74d">2022.trustnlp-1.8</url>
diff --git a/data/xml/2022.tsar.xml b/data/xml/2022.tsar.xml
index b4c24c44f3..ff43d9ed38 100644
--- a/data/xml/2022.tsar.xml
+++ b/data/xml/2022.tsar.xml
@@ -3,9 +3,9 @@
   <volume id="1" ingest-date="2023-01-17" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Workshop on Text Simplification, Accessibility, and Readability (TSAR-2022)</booktitle>
-      <editor><first>Sanja</first><last>Štajner</last></editor>
+      <editor id="sanja-stajner"><first>Sanja</first><last>Štajner</last></editor>
       <editor><first>Horacio</first><last>Saggion</last></editor>
-      <editor><first>Daniel</first><last>Ferrés</last></editor>
+      <editor id="daniel-ferres"><first>Daniel</first><last>Ferrés</last></editor>
       <editor><first>Matthew</first><last>Shardlow</last></editor>
       <editor><first>Kim Cheng</first><last>Sheang</last></editor>
       <editor><first>Kai</first><last>North</last></editor>
@@ -108,7 +108,7 @@
       <author><first>Itziar</first><last>Gonzalez-Dios</last><affiliation>HiTZ Basque Center for Language Technologies - Ixa, University of the Basque Country UPV/EHU</affiliation></author>
       <author><first>Iker</first><last>Gutiérrez-Fandiño</last><affiliation>Universität Leipzig</affiliation></author>
       <author><first>Oscar m.</first><last>Cumbicus-Pineda</last><affiliation>Ixa group and Carrera de Computación, UPV/EHU and UNL</affiliation></author>
-      <author><first>Aitor</first><last>Soroa</last><affiliation>HiTZ Center - Ixa, University of the Basque Country UPV/EHU</affiliation></author>
+      <author id="aitor-soroa"><first>Aitor</first><last>Soroa</last><affiliation>HiTZ Center - Ixa, University of the Basque Country UPV/EHU</affiliation></author>
       <pages>86-97</pages>
       <abstract>Automatic Text simplification (ATS) seeks to reduce the complexity of a text for a general public or a target audience. In the last years, deep learning methods have become the most used systems in ATS research, but these systems need large and good quality datasets to be evaluated. Moreover, these data are available on a large scale only for English and in some cases with restrictive licenses. In this paper, we present IrekiaLF_es, an open-license benchmark for Spanish text simplification. It consists of a document-level corpus and a sentence-level test set that has been manually aligned. We also conduct a neurolinguistically-based evaluation of the corpus in order to reveal its suitability for text simplification. This evaluation follows the Lexicon-Unification-Linearity (LeULi) model of neurolinguistic complexity assessment. Finally, we present a set of experiments and baselines of ATS systems in a zero-shot scenario.</abstract>
       <url hash="6ddc311a">2022.tsar-1.8</url>
@@ -182,7 +182,7 @@
     </paper>
     <paper id="14">
       <title>An Investigation into the Effect of Control Tokens on Text Simplification</title>
-      <author id="zihao-li"><first>Zihao</first><last>Li</last><affiliation>Manchester Metropolitan University</affiliation></author>
+      <author><first>Zihao</first><last>Li</last><affiliation>Manchester Metropolitan University</affiliation></author>
       <author><first>Matthew</first><last>Shardlow</last><affiliation>Manchester Metropolitan University</affiliation></author>
       <author><first>Saeed</first><last>Hassan</last><affiliation>Manchester Metropolitan University</affiliation></author>
       <pages>154-165</pages>
diff --git a/data/xml/2022.tu.xml b/data/xml/2022.tu.xml
index 538fc58701..93c19d7fb1 100644
--- a/data/xml/2022.tu.xml
+++ b/data/xml/2022.tu.xml
@@ -7,7 +7,7 @@
       <editor><first>Thien Huu</first><last>Nguyen</last></editor>
       <editor><first>Viet Dac</first><last>Lai</last></editor>
       <editor><first>Amir Pouran Ben</first><last>Veyseh</last></editor>
-      <editor><first>Trung H.</first><last>Bui</last></editor>
+      <editor id="trung-bui"><first>Trung H.</first><last>Bui</last></editor>
       <editor><first>David Seunghyun</first><last>Yoon</last></editor>
       <publisher>International Conference on Computational Linguistics</publisher>
       <address>Gyeongju, South Korea</address>
@@ -47,7 +47,7 @@
       <title>Model Transfer for Event tracking as Transcript Understanding for Videos of Small Group Interaction</title>
       <author><first>Sumit</first><last>Agarwal</last></author>
       <author><first>Rosanna</first><last>Vitiello</last></author>
-      <author><first>Carolyn</first><last>Rosé</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rosé</last></author>
       <pages>20–29</pages>
       <abstract>Videos of group interactions contain a wealth of information beyond the information directly communicated in a transcript of the discussion. Tracking who has participated throughout an extended interaction and what each of their trajectories has been in relation to one another is the foundation for joint activity understanding, though it comes with some unique challenges in videos of tightly coupled group work. Motivated by insights into the properties of such scenarios, including group composition and the properties of task-oriented, goal directed tasks, we present a successful proof-of-concept. In particular, we present a transfer experiment to a dyadic robot construction task, an ablation study, and a qualitative analysis.</abstract>
       <url hash="055919ca">2022.tu-1.3</url>
diff --git a/data/xml/2022.udfestbr.xml b/data/xml/2022.udfestbr.xml
index 4a91706e4b..b622dcdb2c 100644
--- a/data/xml/2022.udfestbr.xml
+++ b/data/xml/2022.udfestbr.xml
@@ -4,8 +4,8 @@
     <meta>
       <booktitle>Proceedings of the <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies <fixed-case>B</fixed-case>razilian Festival</booktitle>
       <editor><first>Thiago Alexandre Salgueiro</first><last>Pardo</last></editor>
-      <editor><first>Ariani</first><last>Di-Felippo</last></editor>
-      <editor><first>Norton Trevisan</first><last>Roman</last></editor>
+      <editor id="ariani-di-felippo"><first>Ariani</first><last>Di-Felippo</last></editor>
+      <editor id="norton-trevisan-roman"><first>Norton Trevisan</first><last>Roman</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Fortaleza, Brazil</address>
       <month>March</month>
@@ -31,7 +31,7 @@
     <paper id="2">
       <title>Polishing the gold – how much revision do we need in treebanks?</title>
       <author><first>Elvis</first><last>Souza</last></author>
-      <author><first>Claudia</first><last>Freitas</last></author>
+      <author id="claudia-freitas"><first>Claudia</first><last>Freitas</last></author>
       <pages>1–11</pages>
       <url hash="c4709f4a">2022.udfestbr-1.2</url>
       <bibkey>souza-freitas-2022-polishing</bibkey>
@@ -58,7 +58,7 @@
     <paper id="5">
       <title>Still on arguments and adjuncts: the status of the indirect object and the adverbial adjunct relations in <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies for <fixed-case>P</fixed-case>ortuguese</title>
       <author><first>Elvis</first><last>Souza</last></author>
-      <author><first>Claudia</first><last>Freitas</last></author>
+      <author id="claudia-freitas"><first>Claudia</first><last>Freitas</last></author>
       <pages>1–10</pages>
       <url hash="86c474fe">2022.udfestbr-1.5</url>
       <bibkey>souza-freitas-2022-still</bibkey>
diff --git a/data/xml/2022.umios.xml b/data/xml/2022.umios.xml
index 9e4b0da0fb..e6357dc86a 100644
--- a/data/xml/2022.umios.xml
+++ b/data/xml/2022.umios.xml
@@ -52,7 +52,7 @@
       <author><first>Wafaa</first><last>Mohammed</last><affiliation>University of Tübingen</affiliation></author>
       <author><first>Hassan</first><last>Shahmohammadi</last><affiliation>University of Tübingen</affiliation></author>
       <author><first>Hendrik P. A.</first><last>Lensch</last><affiliation>University of Tübingen</affiliation></author>
-      <author><first>R. Harald</first><last>Baayen</last><affiliation>University of Tübingen</affiliation></author>
+      <author id="harald-baayen"><first>R. Harald</first><last>Baayen</last><affiliation>University of Tübingen</affiliation></author>
       <pages>18-28</pages>
       <abstract>Visual grounding of Language aims at enriching textual representations of language with multiple sources of visual knowledge such as images and videos. Although visual grounding is an area of intense research, inter-lingual aspects of visual grounding have not received much attention. The present study investigates the inter-lingual visual grounding of word embeddings. We propose an implicit alignment technique between the two spaces of vision and language in which inter-lingual textual information interacts in order to enrich pre-trained textual word embeddings. We focus on three languages in our experiments, namely, English, Arabic, and German. We obtained visually grounded vector representations for these languages and studied whether visual grounding on one or multiple languages improved the performance of embeddings on word similarity and categorization benchmarks. Our experiments suggest that inter-lingual knowledge improves the performance of grounded embeddings in similar languages such as German and English. However, inter-lingual grounding of German or English with Arabic led to a slight degradation in performance on word similarity benchmarks. On the other hand, we observed an opposite trend on categorization benchmarks where Arabic had the most improvement on English. In the discussion section, several reasons for those findings are laid out. We hope that our experiments provide a baseline for further research on inter lingual visual grounding.</abstract>
       <url hash="0dbcdd3a">2022.umios-1.3</url>
@@ -77,7 +77,7 @@
       <title>Discourse Relation Embeddings: Representing the Relations between Discourse Segments in Social Media</title>
       <author><first>Youngseo</first><last>Son</last><affiliation>Department of Computer Science, Stony Brook University</affiliation></author>
       <author><first>Vasudha</first><last>Varadarajan</last><affiliation>Department of Computer Science, Stony Brook University</affiliation></author>
-      <author><first>H. Andrew</first><last>Schwartz</last><affiliation>Department of Computer Science, Stony Brook University</affiliation></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last><affiliation>Department of Computer Science, Stony Brook University</affiliation></author>
       <pages>45-55</pages>
       <abstract>Discourse relations are typically modeled as a discrete class that characterizes the relation between segments of text (e.g. causal explanations, expansions). However, such predefined discrete classes limit the universe of potential relationships and their nuanced differences. Adding higher-level semantic structure to contextual word embeddings, we propose representing discourse relations as points in high dimensional continuous space. However, unlike words, discourse relations often have no surface form (relations are in between two segments, often with no word or phrase in that gap) which presents a challenge for existing embedding techniques. We present a novel method for automatically creating discourse relation embeddings (DiscRE), addressing the embedding challenge through a weakly supervised, multitask approach to learn diverse and nuanced relations in social media. Results show DiscRE representations obtain the best performance on Twitter discourse relation classification (macro F1=0.76), social media causality prediction (from F1=0.79 to 0.81), and perform beyond modern sentence and word transformers at traditional discourse relation classification, capturing novel nuanced relations (e.g. relations at the intersection of causal explanations and counterfactuals).</abstract>
       <url hash="7a07a905">2022.umios-1.5</url>
@@ -87,7 +87,7 @@
     <paper id="6">
       <title>Understanding Cross-modal Interactions in <fixed-case>V</fixed-case>&amp;<fixed-case>L</fixed-case> Models that Generate Scene Descriptions</title>
       <author><first>Michele</first><last>Cafagna</last><affiliation>University of Malta, Institute of Linguistics and Language Technology</affiliation></author>
-      <author><first>Kees</first><last>van Deemter</last><affiliation>Universiteit Utrecht, Information and Computing Sciences</affiliation></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last><affiliation>Universiteit Utrecht, Information and Computing Sciences</affiliation></author>
       <author><first>Albert</first><last>Gatt</last><affiliation>University of Malta, Institute of Linguistics and Language Technology</affiliation></author>
       <pages>56-72</pages>
       <abstract>Image captioning models tend to describe images in an object-centric way, emphasising visible objects. But image descriptions can also abstract away from objects and describe the type of scene depicted. In this paper, we explore the potential of a state of the art Vision and Language model, VinVL, to caption images at the scene level using (1) a novel dataset which pairs images with both object-centric and scene descriptions. Through (2) an in-depth analysis of the effect of the fine-tuning, we show (3) that a small amount of curated data suffices to generate scene descriptions without losing the capability to identify object-level concepts in the scene; the model acquires a more holistic view of the image compared to when object-centric descriptions are generated. We discuss the parallels between these results and insights from computational and cognitive science research on scene perception.</abstract>
diff --git a/data/xml/2022.vardial.xml b/data/xml/2022.vardial.xml
index 44bc3d5c47..fe432a9cb8 100644
--- a/data/xml/2022.vardial.xml
+++ b/data/xml/2022.vardial.xml
@@ -6,8 +6,8 @@
       <editor><first>Yves</first><last>Scherrer</last></editor>
       <editor><first>Tommi</first><last>Jauhiainen</last></editor>
       <editor><first>Nikola</first><last>Ljubešić</last></editor>
-      <editor><first>Preslav</first><last>Nakov</last></editor>
-      <editor><first>Jörg</first><last>Tiedemann</last></editor>
+      <editor id="preslav-nakov"><first>Preslav</first><last>Nakov</last></editor>
+      <editor id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></editor>
       <editor><first>Marcos</first><last>Zampieri</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Gyeongju, Republic of Korea</address>
@@ -132,7 +132,7 @@
       <title>Transfer Learning Improves <fixed-case>F</fixed-case>rench Cross-Domain Dialect Identification: <fixed-case>NRC</fixed-case> @ <fixed-case>V</fixed-case>ar<fixed-case>D</fixed-case>ial 2022</title>
       <author><first>Gabriel</first><last>Bernier-Colborne</last></author>
       <author><first>Serge</first><last>Leger</last></author>
-      <author><first>Cyril</first><last>Goutte</last></author>
+      <author id="cyril-goutte"><first>Cyril</first><last>Goutte</last></author>
       <pages>109–118</pages>
       <abstract>We describe the systems developed by the National Research Council Canada for the French Cross-Domain Dialect Identification shared task at the 2022 VarDial evaluation campaign. We evaluated two different approaches to this task: SVM and probabilistic classifiers exploiting n-grams as features, and trained from scratch on the data provided; and a pre-trained French language model, CamemBERT, that we fine-tuned on the dialect identification task. The latter method turned out to improve the macro-F1 score on the test set from 0.344 to 0.430 (25% increase), which indicates that transfer learning can be helpful for dialect identification.</abstract>
       <url hash="aba71459">2022.vardial-1.12</url>
@@ -142,7 +142,7 @@
       <title><fixed-case>I</fixed-case>talian Language and Dialect Identification and Regional <fixed-case>F</fixed-case>rench Variety Detection using Adaptive Naive <fixed-case>B</fixed-case>ayes</title>
       <author><first>Tommi</first><last>Jauhiainen</last></author>
       <author><first>Heidi</first><last>Jauhiainen</last></author>
-      <author><first>Krister</first><last>Lindén</last></author>
+      <author id="krister-linden"><first>Krister</first><last>Lindén</last></author>
       <pages>119–129</pages>
       <abstract>This article describes the language identification approach used by the SUKI team in the Identification of Languages and Dialects of Italy and the French Cross-Domain Dialect Identification shared tasks organized as part of the VarDial workshop 2022. We describe some experiments and the preprocessing techniques we used for the training data in preparation for the shared task submissions, which are also discussed. Our Naive Bayes-based adaptive system reached the first position in Italian language identification and came second in the French variety identification task.</abstract>
       <url hash="4c2113ff">2022.vardial-1.13</url>
diff --git a/data/xml/2022.wanlp.xml b/data/xml/2022.wanlp.xml
index 903b2abfda..6d827889f7 100644
--- a/data/xml/2022.wanlp.xml
+++ b/data/xml/2022.wanlp.xml
@@ -6,7 +6,7 @@
       <editor><first>Houda</first><last>Bouamor</last></editor>
       <editor><first>Hend</first><last>Al-Khalifa</last></editor>
       <editor><first>Kareem</first><last>Darwish</last></editor>
-      <editor><first>Owen</first><last>Rambow</last></editor>
+      <editor id="owen-rambow"><first>Owen</first><last>Rambow</last></editor>
       <editor><first>Fethi</first><last>Bougares</last></editor>
       <editor><first>Ahmed</first><last>Abdelali</last></editor>
       <editor><first>Nadi</first><last>Tomeh</last></editor>
@@ -38,8 +38,8 @@
     <paper id="2">
       <title>Joint Coreference Resolution for Zeros and non-Zeros in <fixed-case>A</fixed-case>rabic</title>
       <author><first>Abdulrahman</first><last>Aloraini</last><affiliation>Queen Mary University of London</affiliation></author>
-      <author><first>Sameer</first><last>Pradhan</last><affiliation>University of Pennsylvania and cemantix.org</affiliation></author>
-      <author><first>Massimo</first><last>Poesio</last><affiliation>Queen Mary University of London</affiliation></author>
+      <author id="sameer-pradhan"><first>Sameer</first><last>Pradhan</last><affiliation>University of Pennsylvania and cemantix.org</affiliation></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last><affiliation>Queen Mary University of London</affiliation></author>
       <pages>11-21</pages>
       <abstract>Most existing proposals about anaphoric zero pronoun (AZP) resolution regard full mention coreference and AZP resolution as two independent tasks, even though the two tasks are clearly related. The main issues that need tackling to develop a joint model for zero and non-zero mentions are the difference between the two types of arguments (zero pronouns, being null, provide no nominal information) and the lack of annotated datasets of a suitable size in which both types of arguments are annotated for languages other than Chinese and Japanese. In this paper, we introduce two architectures for jointly resolving AZPs and non-AZPs, and evaluate them on Arabic, a language for which, as far as we know, there has been no prior work on joint resolution. Doing this also required creating a new version of the Arabic subset of the standard coreference resolution dataset used for the CoNLL-2012 shared task (Pradhan et al.,2012) in which both zeros and non-zeros are included in a single dataset.</abstract>
       <url hash="2955c206">2022.wanlp-1.2</url>
@@ -62,7 +62,7 @@
       <author><first>Moussa</first><last>Kamal Eddine</last><affiliation>École polytechnique</affiliation></author>
       <author><first>Nadi</first><last>Tomeh</last><affiliation>LIPN, Université Sorbonne Paris Nord</affiliation></author>
       <author><first>Nizar</first><last>Habash</last><affiliation>New York University Abu Dhabi</affiliation></author>
-      <author><first>Joseph</first><last>Le Roux</last><affiliation>Université Sorbonne Paris Nord</affiliation></author>
+      <author id="joseph-le-roux"><first>Joseph</first><last>Le Roux</last><affiliation>Université Sorbonne Paris Nord</affiliation></author>
       <author><first>Michalis</first><last>Vazirgiannis</last><affiliation>Ecole Polytechnique</affiliation></author>
       <pages>31-42</pages>
       <abstract>Like most natural language understanding and generation tasks, state-of-the-art models for summarization are transformer-based sequence-to-sequence architectures that are pretrained on large corpora. While most existing models focus on English, Arabic remains understudied. In this paper we propose AraBART, the first Arabic model in which the encoder and the decoder are pretrained end-to-end, based on BART. We show that AraBART achieves the best performance on multiple abstractive summarization datasets, outperforming strong baselines including a pretrained Arabic BERT-based model, multilingual BART, Arabic T5, and a multilingual T5 model. AraBART is publicly available.</abstract>
@@ -139,11 +139,11 @@
       <author><first>Daliyah</first><last>AlZeer</last><affiliation>Taif University</affiliation></author>
       <author><first>Kawla Mohmad</first><last>Shnqiti</last><affiliation>Clangu</affiliation></author>
       <author><first>Ahmed</first><last>Elbakry</last><affiliation>Microsoft</affiliation></author>
-      <author><first>Muhammad</first><last>ElNokrashy</last><affiliation>Microsoft</affiliation></author>
+      <author id="muhammad-elnokrashy"><first>Muhammad</first><last>ElNokrashy</last><affiliation>Microsoft</affiliation></author>
       <author><first>Mohamed</first><last>Gabr</last><affiliation>Microsoft</affiliation></author>
       <author><first>Abderrahmane</first><last>Issam</last><affiliation>Archipel Cognitive</affiliation></author>
       <author><first>Abdelrahim</first><last>Qaddoumi</last><affiliation>Nyu</affiliation></author>
-      <author><first>Vijay</first><last>Shanker</last><affiliation>University of Delaware</affiliation></author>
+      <author id="k-vijay-shanker"><first>Vijay</first><last>Shanker</last><affiliation>University of Delaware</affiliation></author>
       <author><first>Mahmoud</first><last>Zyate</last><affiliation>Leyton</affiliation></author>
       <pages>98-107</pages>
       <abstract>In this paper, we present the results and findings of the Shared Task on Gender Rewriting, which was organized as part of the Seventh Arabic Natural Language Processing Workshop. The task of gender rewriting refers to generating alternatives of a given sentence to match different target user gender contexts (e.g., a female speaker with a male listener, a male speaker with a male listener, etc.). This requires changing the grammatical gender (masculine or feminine) of certain words referring to the users. In this task, we focus on Arabic, a gender-marking morphologically rich language. A total of five teams from four countries participated in the shared task.</abstract>
@@ -157,7 +157,7 @@
       <author><first>Hamdy</first><last>Mubarak</last><affiliation>Qatar Computing Research Institute</affiliation></author>
       <author><first>Wajdi</first><last>Zaghouani</last><affiliation>Hamad Bin Khalifa University</affiliation></author>
       <author><first>Giovanni</first><last>Da San Martino</last><affiliation>University of Padova</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <pages>108-118</pages>
       <abstract>Propaganda is defined as an expression of opinion or action by individuals or groups deliberately designed to influence opinions or actions of other individuals or groups with reference to predetermined ends and this is achieved by means of well-defined rhetorical and psychological devices. Currently, propaganda (or persuasion) techniques have been commonly used on social media to manipulate or mislead social media users. Automatic detection of propaganda techniques from textual, visual, or multimodal content has been studied recently, however, major of such efforts are focused on English language content. In this paper, we propose a shared task on detecting propaganda techniques for Arabic textual content. We have done a pilot annotation of 200 Arabic tweets, which we plan to extend to 2,000 tweets, covering diverse topics. We hope that the shared task will help in building a community for Arabic propaganda detection. The dataset will be made publicly available, which can help in future studies.</abstract>
       <url hash="57309b91">2022.wanlp-1.11</url>
@@ -262,7 +262,7 @@
     <paper id="20">
       <title>A Semi-supervised Approach for a Better Translation of Sentiment in Dialectical <fixed-case>A</fixed-case>rabic <fixed-case>UGT</fixed-case></title>
       <author><first>Hadeel</first><last>Saadany</last><affiliation>University of Surrey</affiliation></author>
-      <author><first>Constantin</first><last>Orăsan</last><affiliation>University of Surrey</affiliation></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orăsan</last><affiliation>University of Surrey</affiliation></author>
       <author><first>Emad</first><last>Mohamed</last><affiliation>RGCL, Wolverhampton</affiliation></author>
       <author><first>Ashraf</first><last>Tantawy</last><affiliation>De Montfort University</affiliation></author>
       <pages>214-224</pages>
@@ -286,7 +286,7 @@
       <title>Improving <fixed-case>POS</fixed-case> Tagging for <fixed-case>A</fixed-case>rabic Dialects on Out-of-Domain Texts</title>
       <author><first>Noor</first><last>Abo Mokh</last><affiliation>Indiana University</affiliation></author>
       <author><first>Daniel</first><last>Dakota</last><affiliation>Indiana University</affiliation></author>
-      <author><first>Sandra</first><last>Kübler</last><affiliation>Indiana University</affiliation></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last><affiliation>Indiana University</affiliation></author>
       <pages>238-248</pages>
       <abstract>We investigate part of speech tagging for four Arabic dialects (Gulf, Levantine, Egyptian, and Maghrebi), in an out-of-domain setting. More specifically, we look at the effectiveness of 1) upsampling the target dialect in the training data of a joint model, 2) increasing the consistency of the annotations, and 3) using word embeddings pre-trained on a large corpus of dialectal Arabic. We increase the accuracy on average by about 20 percentage points.</abstract>
       <url hash="8801dac4">2022.wanlp-1.22</url>
@@ -392,7 +392,7 @@
     <paper id="32">
       <title>Emoji Sentiment Roles for Sentiment Analysis: A Case Study in <fixed-case>A</fixed-case>rabic Texts</title>
       <author><first>Shatha Ali A.</first><last>Hakami</last><affiliation>University of Birmingham</affiliation></author>
-      <author><first>Robert</first><last>Hendley</last><affiliation>University of Birmingham</affiliation></author>
+      <author id="robert-j-hendley"><first>Robert</first><last>Hendley</last><affiliation>University of Birmingham</affiliation></author>
       <author><first>Phillip</first><last>Smith</last><affiliation>University of Birmingham</affiliation></author>
       <pages>346-355</pages>
       <abstract>Emoji (digital pictograms) are crucial features for textual sentiment analysis. However, analysing the sentiment roles of emoji is very complex. This is due to its dependency on different factors, such as textual context, cultural perspective, interlocutor’s personal traits, interlocutors’ relationships or a platforms’ functional features. This work introduces an approach to analysing the sentiment effects of emoji as textual features. Using an Arabic dataset as a benchmark, our results confirm the borrowed argument that each emoji has three different norms of sentiment role (negative, neutral or positive). Therefore, an emoji can play different sentiment roles depending upon the context. It can behave as an emphasizer, an indicator, a mitigator, a reverser or a trigger of either negative or positive sentiment within a text. In addition, an emoji may have a neutral effect (i.e., no effect) on the sentiment of the text.</abstract>
@@ -444,7 +444,7 @@
     <paper id="35">
       <title>A Pilot Study on the Collection and Computational Analysis of Linguistic Differences Amongst Men and Women in a Kuwaiti <fixed-case>A</fixed-case>rabic <fixed-case>W</fixed-case>hats<fixed-case>A</fixed-case>pp Dataset</title>
       <author><first>Hesah</first><last>Aldihan</last><affiliation>University of Sheffield</affiliation></author>
-      <author><first>Robert</first><last>Gaizauskas</last><affiliation>University of Sheffield</affiliation></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last><affiliation>University of Sheffield</affiliation></author>
       <author><first>Susan</first><last>Fitzmaurice</last><affiliation>University of Sheffield</affiliation></author>
       <pages>372-380</pages>
       <abstract>This study focuses on the collection and computational analysis of Kuwaiti Arabic (KA), which is considered a low resource dialect, to test different sociolinguistic hypotheses related to gendered language use. In this paper, we describe the collection and analysis of a corpus of WhatsApp Group chats with mixed gender Kuwaiti participants. This corpus, which we are making publicly available, is the first corpus of KA conversational data. We analyse different interactional and linguistic features to get insights about features that may be indicative of gender to inform the development of a gender classification system for KA in an upcoming study. Statistical analysis of our data shows that there is insufficient evidence to claim that there are significant differences amongst men and women with respect to number of turns, length of turns and number of emojis. However, qualitative analysis shows that men and women differ substantially in the types of emojis they use and in their use of lengthened words.</abstract>
@@ -458,7 +458,7 @@
       <author><first>Cibu</first><last>Johny</last><affiliation>Google</affiliation></author>
       <author><first>Raiomond</first><last>Doctor</last><affiliation>Google</affiliation></author>
       <author><first>Brian</first><last>Roark</last><affiliation>Google Inc.</affiliation></author>
-      <author><first>Richard</first><last>Sproat</last><affiliation>Google, Japan</affiliation></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last><affiliation>Google, Japan</affiliation></author>
       <pages>381-387</pages>
       <abstract>This paper presents an open-source software library that provides a set of finite-state transducer (FST) components and corresponding utilities for manipulating the writing systems of languages that use the Perso-Arabic script. The operations include various levels of script normalization, including visual invariance-preserving operations that subsume and go beyond the standard Unicode normalization forms, as well as transformations that modify the visual appearance of characters in accordance with the regional orthographies for eleven contemporary languages from diverse language families. The library also provides simple FST-based romanization and transliteration. We additionally attempt to formalize the typology of Perso-Arabic characters by providing one-to-many mappings from Unicode code points to the languages that use them. While our work focuses on the Arabic script diaspora rather than Arabic itself, this approach could be adopted for any language that uses the Arabic script, thus providing a unified framework for treating a script family used by close to a billion people.</abstract>
       <url hash="b010b8b8">2022.wanlp-1.36</url>
@@ -473,7 +473,7 @@
       <author><first>Christopher</first><last>Madge</last><affiliation>Queen Mary University of London,United Kingdom</affiliation></author>
       <author><first>Juntao</first><last>Yu</last><affiliation>University of Essex,United Kingdom</affiliation></author>
       <author><first>Richard</first><last>Bartle</last><affiliation>University of Essex,United Kingdom</affiliation></author>
-      <author><first>Massimo</first><last>Poesio</last><affiliation>Queen Mary University of London,United Kingdom</affiliation></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last><affiliation>Queen Mary University of London,United Kingdom</affiliation></author>
       <pages>388-393</pages>
       <abstract>Coreference resolution is a key aspect of text comprehension, but the size of the available coreference corpora for Arabic is limited in comparison to the size of the corpora for other languages. In this paper we present a Game-With-A-Purpose called Stroll with a Scroll created to collect from players coreference annotations for Arabic. The key contribution of this work is the embedding of the annotation task in a virtual world setting, as opposed to the puzzle-type games used in previously proposed Games-With-A-Purpose for coreference.</abstract>
       <url hash="b1f53051">2022.wanlp-1.37</url>
@@ -508,7 +508,7 @@
       <title>Optimizing Naive <fixed-case>B</fixed-case>ayes for <fixed-case>A</fixed-case>rabic Dialect Identification</title>
       <author><first>Tommi</first><last>Jauhiainen</last><affiliation>University of Helsinki</affiliation></author>
       <author><first>Heidi</first><last>Jauhiainen</last><affiliation>University of Helsinki</affiliation></author>
-      <author><first>Krister</first><last>Lindén</last><affiliation>University of Helsinki</affiliation></author>
+      <author id="krister-linden"><first>Krister</first><last>Lindén</last><affiliation>University of Helsinki</affiliation></author>
       <pages>409-414</pages>
       <abstract>This article describes the language identification system used by the SUKI team in the 2022 Nuanced Arabic Dialect Identification (NADI) shared task. In addition to the system description, we give some details of the dialect identification experiments we conducted while preparing our submissions. In the end, we submitted only one official run. We used a Naive Bayes-based language identifier with character n-grams from one to four, of which we implemented a new version, which automatically optimizes its parameters. We also experimented with clustering the training data according to different topics. With the macro F1 score of 0.1963 on test set A and 0.1058 on test set B, we achieved the 18th position out of the 19 competing teams.</abstract>
       <url hash="c95eed26">2022.wanlp-1.40</url>
@@ -555,7 +555,7 @@
       <author><first>Emna</first><last>Fsih</last><affiliation>ANLP Research Group / Sfax, Tunisia</affiliation></author>
       <author><first>Sameh</first><last>Kchaou</last><affiliation>ANLP Research Group / Sfax, Tunisia</affiliation></author>
       <author><first>Rahma</first><last>Boujelbane</last><affiliation>ANLP Research Group / Sfax, Tunisia</affiliation></author>
-      <author><first>Lamia</first><last>Hadrich-Belguith</last><affiliation>ANLP Research Group, MIRACL Lab, FSEGS, Sfax University</affiliation></author>
+      <author id="lamia-hadrich-belguith"><first>Lamia</first><last>Hadrich-Belguith</last><affiliation>ANLP Research Group, MIRACL Lab, FSEGS, Sfax University</affiliation></author>
       <pages>431-435</pages>
       <abstract>Arabic has a widely varying collection of dialects. With the explosion of the use of social networks, the volume of written texts has remarkably increased. Most users express themselves using their own dialect. Unfortunately, many of these dialects remain under-studied due to the scarcity of resources. Researchers and industry practitioners are increasingly interested in analyzing users’ sentiments. In this context, several approaches have been proposed, namely: traditional machine learning, deep learning transfer learning and more recently few-shot learning approaches. In this work, we compare their efficiency as part of the NADI competition to develop a country-level sentiment analysis model. Three models were beneficial for this sub-task: The first based on Sentence Transformer (ST) and achieve 43.23% on DEV set and 42.33% on TEST set, the second based on CAMeLBERT and achieve 47.85% on DEV set and 41.72% on TEST set and the third based on multi-dialect BERT model and achieve 66.72% on DEV set and 39.69% on TEST set.</abstract>
       <url hash="6c4ebdb0">2022.wanlp-1.44</url>
@@ -626,7 +626,7 @@
     <paper id="51">
       <title><fixed-case>NLP</fixed-case> <fixed-case>DI</fixed-case> at <fixed-case>NADI</fixed-case> Shared Task Subtask-1: Sub-word Level Convolutional Neural Models and Pre-trained Binary Classifiers for Dialect Identification</title>
       <author><first>Vani</first><last>Kanjirangat</last><affiliation>Idsia</affiliation></author>
-      <author><first>Tanja</first><last>Samardzic</last><affiliation>University of Zurich</affiliation></author>
+      <author id="tanja-samardzic"><first>Tanja</first><last>Samardzic</last><affiliation>University of Zurich</affiliation></author>
       <author><first>Ljiljana</first><last>Dolamic</last><affiliation>armasuisse S&amp;T</affiliation></author>
       <author><first>Fabio</first><last>Rinaldi</last><affiliation>IDSIA, Swiss AI Institute</affiliation></author>
       <pages>468-473</pages>
@@ -651,7 +651,7 @@
       <title>Building an Ensemble of Transformer Models for <fixed-case>A</fixed-case>rabic Dialect Classification and Sentiment Analysis</title>
       <author><first>Abdullah</first><last>Khered</last></author>
       <author><first>Ingy Abdelhalim</first><last>Abdelhalim</last></author>
-      <author><first>Riza</first><last>Batista-Navarro</last></author>
+      <author id="riza-theresa-batista-navarro"><first>Riza</first><last>Batista-Navarro</last></author>
       <pages>479-484</pages>
       <abstract>In this paper, we describe the approaches we developed for the Nuanced Arabic Dialect Identification (NADI) 2022 shared task, which consists of two subtasks: the identification of country-level Arabic dialects and sentiment analysis. Our team, UniManc, developed approaches to the two subtasks which are underpinned by the same model: a pre-trained MARBERT language model. For Subtask 1, we applied undersampling to create versions of the training data with a balanced distribution across classes. For Subtask 2, we further trained the original MARBERT model for the masked language modelling objective using a NADI-provided dataset of unlabelled Arabic tweets. For each of the subtasks, a MARBERT model was fine-tuned for sequence classification, using different values for hyperparameters such as seed and learning rate. This resulted in multiple model variants, which formed the basis of an ensemble model for each subtask. Based on the official NADI evaluation, our ensemble model obtained a macro-F1-score of 26.863, ranking second overall in the first subtask. In the second subtask, our ensemble model also ranked second, obtaining a macro-F1-PN score (macro-averaged F1-score over the Positive and Negative classes) of 73.544.</abstract>
       <url hash="caaa9131">2022.wanlp-1.53</url>
@@ -671,7 +671,7 @@
     <paper id="55">
       <title>Generative Approach for Gender-Rewriting Task with <fixed-case>A</fixed-case>rabic<fixed-case>T</fixed-case>5</title>
       <author><first>Sultan</first><last>Alrowili</last><affiliation>University of Delaware</affiliation></author>
-      <author><first>Vijay</first><last>Shanker</last><affiliation>University of Delaware</affiliation></author>
+      <author id="k-vijay-shanker"><first>Vijay</first><last>Shanker</last><affiliation>University of Delaware</affiliation></author>
       <pages>491-495</pages>
       <abstract>Addressing the correct gender in generative tasks (e.g., Machine Translation) has been an overlooked issue in the Arabic NLP. However, the recent introduction of the Arabic Parallel Gender Corpus (APGC) dataset has established new baselines for the Arabic Gender Rewriting task. To address the Gender Rewriting task, we first pre-train our new Seq2Seq ArabicT5 model on a 17GB of Arabic Corpora. Then, we continue pre-training our ArabicT5 model on the APGC dataset using a newly proposed method. Our evaluation shows that our ArabicT5 model, when trained on the APGC dataset, achieved competitive results against existing state-of-the-art methods. In addition, our ArabicT5 model shows better results on the APGC dataset compared to other Arabic and multilingual T5 models.</abstract>
       <url hash="15d55100">2022.wanlp-1.55</url>
@@ -680,7 +680,7 @@
     </paper>
     <paper id="56">
       <title><fixed-case>A</fixed-case>ra<fixed-case>P</fixed-case>rop at <fixed-case>WANLP</fixed-case> 2022 Shared Task: Leveraging Pre-Trained Language Models for <fixed-case>A</fixed-case>rabic Propaganda Detection</title>
-      <author><first>Gaurav</first><last>Singh</last><affiliation>Independent Research</affiliation></author>
+      <author id="gaurav-singh-tomar"><first>Gaurav</first><last>Singh</last><affiliation>Independent Research</affiliation></author>
       <pages>496-500</pages>
       <abstract>This paper presents the approach taken for the shared task on Propaganda Detection in Arabic at the Seventh Arabic Natural Language Processing Workshop (WANLP 2022). We participated in Sub-task 1 where the text of a tweet is provided, and the goal is to identify the different propaganda techniques used in it. This problem belongs to multi-label classification. For our solution, we approached leveraging different transformer based pre-trained language models with fine-tuning to solve this problem. We found that MARBERTv2 outperforms in terms of performance where F1-macro is 0.08175 and F1-micro is 0.61116 compared to other language models that we considered. Our method achieved rank 4 in the testing phase of the challenge.</abstract>
       <url hash="f2c4d243">2022.wanlp-1.56</url>
@@ -758,7 +758,7 @@
     <paper id="63">
       <title><fixed-case>IITD</fixed-case> at <fixed-case>WANLP</fixed-case> 2022 Shared Task: Multilingual Multi-Granularity Network for Propaganda Detection</title>
       <author><first>Shubham</first><last>Mittal</last><affiliation>Indian Institute of Technology Delhi</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <pages>529-533</pages>
       <abstract>We present our system for the two subtasks of the shared task on propaganda detection in Arabic, part of WANLP’2022. Subtask 1 is a multi-label classification problem to find the propaganda techniques used in a given tweet. Our system for this task uses XLM-R to predict probabilities for the target tweet to use each of the techniques. In addition to finding the techniques, subtask 2 further asks to identify the textual span for each instance of each technique that is present in the tweet; the task can be modelled as a sequence tagging problem. We use a multi-granularity network with mBERT encoder for subtask 2. Overall, our system ranks second for both subtasks (out of 14 and 3 participants, respectively). Our experimental results and analysis show that it does not help to use a much larger English corpus annotated with propaganda techniques, regardless of whether used in English or after translation to Arabic.</abstract>
       <url hash="a6f51780">2022.wanlp-1.63</url>
@@ -782,7 +782,7 @@
       <author><first>Abdullah Faiz Ur Rahman</first><last>Khilji</last><affiliation>National Institute of Technology Silchar</affiliation></author>
       <author><first>Riyanka</first><last>Manna</last><affiliation>Adamas University, Kolkata</affiliation></author>
       <author><first>Partha</first><last>Pakray</last><affiliation>National Institute of Technology Silchar</affiliation></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last><affiliation>Jadavpur University, Nit Silchar</affiliation></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last><affiliation>Jadavpur University, Nit Silchar</affiliation></author>
       <pages>541-544</pages>
       <abstract>In today’s time, online users are regularly exposed to media posts that are propagandistic. Several strategies have been developed to promote safer media consumption in Arabic to combat this. However, there is a limited available multilabel annotated social media dataset. In this work, we have used a pre-trained AraBERT twitter-base model on an expanded train data via data augmentation. Our team CNLP-NITS-PP, has achieved the third rank in subtask 1 at WANLP-2022, for propaganda detection in Arabic (shared task) in terms of micro-F1 score of 0.602.</abstract>
       <url hash="2f8ae4d3">2022.wanlp-1.65</url>
@@ -795,7 +795,7 @@
       <author><first>Abu Bakr Soliman</first><last>Mohammad</last><affiliation>Nu</affiliation></author>
       <author><first>Mohamed</first><last>Ibrahim</last><affiliation>New Giza University</affiliation></author>
       <author><first>Laila Hesham</first><last>Afify</last><affiliation>NewGiza University, School of IT</affiliation></author>
-      <author><first>Samhaa R.</first><last>El-Beltagy</last><affiliation>Newgiza University/Optomatica</affiliation></author>
+      <author id="samhaa-r-el-beltagy"><first>Samhaa R.</first><last>El-Beltagy</last><affiliation>Newgiza University/Optomatica</affiliation></author>
       <pages>545-550</pages>
       <abstract>This paper presents the system developed by the NGU_CNLP team for addressing the shared task on Propaganda Detection in Arabic at WANLP 2022. The team participated in the shared tasks’ two sub-tasks which are: 1) Propaganda technique identification in text and 2) Propaganda technique span identification. In the first sub-task, the goal is to detect all employed propaganda techniques in some given piece of text out of a possible 17 different techniques or to detect that no propaganda technique is being used in that piece of text. As such, this first sub-task is a multi-label classification problem with a pool of 18 possible labels. Subtask 2 extends sub-task 1, by requiring the identification of the exact text span in which a propaganda technique was employed, making it a sequence labeling problem. For task 1, a combination of a data augmentation strategy coupled with an enabled transformer-based model comprised our classification model. This classification model ranked first amongst the 14 systems participating in this subtask. For sub-task two, a transfer learning model was adopted. The system ranked third among the 3 different models that participated in this subtask.</abstract>
       <url hash="6d134a64">2022.wanlp-1.66</url>
diff --git a/data/xml/2022.wassa.xml b/data/xml/2022.wassa.xml
index 0e1d54257a..cb39223946 100644
--- a/data/xml/2022.wassa.xml
+++ b/data/xml/2022.wassa.xml
@@ -4,13 +4,13 @@
     <meta>
       <booktitle>Proceedings of the 12th Workshop on Computational Approaches to Subjectivity, Sentiment &amp; Social Media Analysis</booktitle>
       <editor><first>Jeremy</first><last>Barnes</last></editor>
-      <editor><first>Orphée</first><last>De Clercq</last></editor>
+      <editor id="orphee-de-clercq"><first>Orphée</first><last>De Clercq</last></editor>
       <editor><first>Valentin</first><last>Barriere</last></editor>
       <editor><first>Shabnam</first><last>Tafreshi</last></editor>
       <editor><first>Sawsan</first><last>Alqahtani</last></editor>
       <editor><first>João</first><last>Sedoc</last></editor>
       <editor><first>Roman</first><last>Klinger</last></editor>
-      <editor><first>Alexandra</first><last>Balahur</last></editor>
+      <editor id="alexandra-balahur"><first>Alexandra</first><last>Balahur</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Dublin, Ireland</address>
       <month>May</month>
@@ -52,7 +52,7 @@
       <title>Domain-Aware Contrastive Knowledge Transfer for Multi-domain Imbalanced Data</title>
       <author><first>Zixuan</first><last>Ke</last></author>
       <author><first>Mohammad</first><last>Kachuee</last></author>
-      <author><first>Sungjin</first><last>Lee</last></author>
+      <author id="sungjin-lee"><first>Sungjin</first><last>Lee</last></author>
       <pages>25-36</pages>
       <abstract>In many real-world machine learning applications, samples belong to a set of domains e.g., for product reviews each review belongs to a product category. In this paper, we study multi-domain imbalanced learning (MIL), the scenario that there is imbalance not only in classes but also in domains. In the MIL setting, different domains exhibit different patterns and there is a varying degree of similarity and divergence among domains posing opportunities and challenges for transfer learning especially when faced with limited or insufficient training data. We propose a novel domain-aware contrastive knowledge transfer method called DCMI to (1) identify the shared domain knowledge to encourage positive transfer among similar domains (in particular from head domains to tail domains); (2) isolate the domain-specific knowledge to minimize the negative transfer from dissimilar domains. We evaluated the performance of DCMI on three different datasets showing significant improvements in different MIL scenarios.</abstract>
       <url hash="0b4574c0">2022.wassa-1.3</url>
@@ -81,7 +81,7 @@
       <author><first>Els</first><last>Lefever</last></author>
       <author><first>Pranaydeep</first><last>Singh</last></author>
       <author><first>Olivier</first><last>Parent</last></author>
-      <author><first>Veronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Veronique</first><last>Hoste</last></author>
       <pages>51-61</pages>
       <abstract>In this paper, we present the SentEMO platform, a tool that provides aspect-based sentiment analysis and emotion detection of unstructured text data such as reviews, emails and customer care conversations. Currently, models have been trained for five domains and one general domain and are implemented in a pipeline approach, where the output of one model serves as the input for the next. The results are presented in three dashboards, allowing companies to gain more insights into what stakeholders think of their products and services. The SentEMO platform is available at <url>https://sentemo.ugent.be</url></abstract>
       <url hash="c3d79441">2022.wassa-1.5</url>
@@ -131,7 +131,7 @@
       <title>Evaluating Contextual Embeddings and their Extraction Layers for Depression Assessment</title>
       <author><first>Matthew</first><last>Matero</last></author>
       <author><first>Albert</first><last>Hung</last></author>
-      <author><first>H. Andrew</first><last>Schwartz</last></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last></author>
       <pages>89-94</pages>
       <abstract>Many recent works in natural language processing have demonstrated ability to assess aspects of mental health from personal discourse. At the same time, pre-trained contextual word embedding models have grown to dominate much of NLP but little is known empirically on how to best apply them for mental health assessment. Using degree of depression as a case study, we do an empirical analysis on which off-the-shelf language model, individual layers, and combinations of layers seem most promising when applied to human-level NLP tasks. Notably, we find RoBERTa most effective and, despite the standard in past work suggesting the second-to-last or concatenation of the last 4 layers, we find layer 19 (sixth-to last) is at least as good as layer 23 when using 1 layer. Further, when using multiple layers, distributing them across the second half (i.e. Layers 12+), rather than last 4, of the 24 layers yielded the most accurate results.</abstract>
       <url hash="831966c8">2022.wassa-1.9</url>
@@ -226,7 +226,7 @@
       <author><first>Aaron</first><last>Maladry</last></author>
       <author><first>Els</first><last>Lefever</last></author>
       <author><first>Cynthia</first><last>Van Hee</last></author>
-      <author><first>Veronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Veronique</first><last>Hoste</last></author>
       <pages>172-181</pages>
       <abstract>This paper presents the results of a replication experiment for automatic irony detection in Dutch social media text, investigating both a feature-based SVM classifier, as was done by Van Hee et al. (2017) and and a transformer-based approach. In addition to building a baseline model, an important goal of this research is to explore the implementation of common-sense knowledge in the form of implicit sentiment, as we strongly believe that common-sense and connotative knowledge are essential to the identification of irony and implicit meaning in tweets. We show promising results and the presented approach can provide a solid baseline and serve as a staging ground to build on in future experiments for irony detection in Dutch.</abstract>
       <url hash="2a4f1d1c">2022.wassa-1.16</url>
@@ -286,7 +286,7 @@
       <title><fixed-case>IUCL</fixed-case> at <fixed-case>WASSA</fixed-case> 2022 Shared Task: A Text-only Approach to Empathy and Emotion Detection</title>
       <author><first>Yue</first><last>Chen</last></author>
       <author><first>Yingnan</first><last>Ju</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <pages>228-232</pages>
       <abstract>Our system, IUCL, participated in the WASSA 2022 Shared Task on Empathy Detection and Emotion Classification. Our main goal in building this system is to investigate how the use of demographic attributes influences performance. Our (official) results show that our text-only systems perform very competitively, ranking first in the empathy detection task, reaching an average Pearson correlation of 0.54, and second in the emotion classification task, reaching a Macro-F of 0.572. Our systems that use both text and demographic data are less competitive.</abstract>
       <url hash="3be6f5b0">2022.wassa-1.21</url>
@@ -353,7 +353,7 @@
       <author><first>Soumitra</first><last>Ghosh</last></author>
       <author><first>Dhirendra</first><last>Maurya</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>255-260</pages>
       <abstract>Computational comprehension and identifying emotional components in language have been critical in enhancing human-computer connection in recent years. The WASSA 2022 Shared Task introduced four tracks and released a dataset of news stories: Track-1 for Empathy and Distress Prediction, Track-2 for Emotion classification, Track-3 for Personality prediction, and Track-4 for Interpersonal Reactivity Index prediction at the essay level. This paper describes our participation in the WASSA 2022 shared task on the tasks mentioned above. We developed multi-task deep learning methods to address Tracks 1 and 2 and machine learning models for Track 3 and 4. Our developed systems achieved average Pearson scores of 0.483, 0.05, and 0.08 for Track 1, 3, and 4, respectively, and a macro F1 score of 0.524 for Track 2 on the test set. We ranked 8th, 11th, 2nd and 2nd for tracks 1, 2, 3, and 4 respectively.</abstract>
       <url hash="d44145b1">2022.wassa-1.26</url>
@@ -392,7 +392,7 @@
     <paper id="29">
       <title><fixed-case>SURREY</fixed-case>-<fixed-case>CTS</fixed-case>-<fixed-case>NLP</fixed-case> at <fixed-case>WASSA</fixed-case>2022: An Experiment of Discourse and Sentiment Analysis for the Prediction of Empathy, Distress and Emotion</title>
       <author><first>Shenbin</first><last>Qian</last></author>
-      <author><first>Constantin</first><last>Orasan</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orasan</last></author>
       <author><first>Diptesh</first><last>Kanojia</last></author>
       <author><first>Hadeel</first><last>Saadany</last></author>
       <author><first>Félix</first><last>Do Carmo</last></author>
diff --git a/data/xml/2022.wat.xml b/data/xml/2022.wat.xml
index c87fb6904e..9ae673b547 100644
--- a/data/xml/2022.wat.xml
+++ b/data/xml/2022.wat.xml
@@ -24,7 +24,7 @@
       <author><first>Shantipriya</first><last>Parida</last></author>
       <author><first>Anoop</first><last>Kunchukuttan</last></author>
       <author><first>Makoto</first><last>Morishita</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <author><first>Chenhui</first><last>Chu</last></author>
       <author><first>Akiko</first><last>Eriguchi</last></author>
       <author><first>Kaori</first><last>Abe</last></author>
@@ -73,7 +73,7 @@
       <author><first>Yilun</first><last>Liu</last></author>
       <author><first>Zhen</first><last>Zhang</last></author>
       <author><first>Shimin</first><last>Tao</last></author>
-      <author><first>Junhui</first><last>Li</last></author>
+      <author id="junhui-li"><first>Junhui</first><last>Li</last></author>
       <author><first>Hao</first><last>Yang</last></author>
       <pages>59–63</pages>
       <abstract>In this paper we describe our submission to the shared tasks of the 9th Workshop on Asian Translation (WAT 2022) on NICT–SAP under the team name ”HwTscSU”. The tasks involve translation from 5 languages into English and vice-versa in two domains: IT domain and Wikinews domain. The purpose is to determine the feasibility of multilingualism, domain adaptation or document-level knowledge given very little to none clean parallel corpora for training. Our approach for all translation tasks mainly focused on pre-training NMT models on general datasets and fine-tuning them on domain-specific datasets. Due to the small amount of parallel corpora, we collected and cleaned the OPUS dataset including three IT domain corpora, i.e., GNOME, KDE4, and Ubuntu. We then trained Transformer models on the collected dataset and fine-tuned on corresponding dev set. The BLEU scores greatly improved in comparison with other systems. Our submission ranked 1st in all IT-domain tasks and in one out of eight ALT domain tasks.</abstract>
@@ -117,7 +117,7 @@
       <author><first>Sahinur Rahman</first><last>Laskar</last></author>
       <author><first>Riyanka</first><last>Manna</last></author>
       <author><first>Partha</first><last>Pakray</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>78–81</pages>
       <abstract>In the domain of natural language processing, machine translation is a well-defined task where one natural language is automatically translated to another natural language. The deep learning-based approach of machine translation, known as neural machine translation attains remarkable translational performance. However, it requires a sufficient amount of training data which is a critical issue for low-resource pair translation. To handle the data scarcity problem, the multilingual concept has been investigated in neural machine translation in different settings like many-to-one and one-to-many translation. WAT2022 (Workshop on Asian Translation 2022) organizes (hosted by the COLING 2022) Indic tasks: English-to-Indic and Indic-to-English translation tasks where we have participated as a team named CNLP-NITS-PP. Herein, we have investigated a transliteration-based approach, where Indic languages are transliterated into English script and shared sub-word level vocabulary during the training phase. We have attained BLEU scores of 2.0 (English-to-Bengali), 1.10 (English-to-Assamese), 4.50 (Bengali-to-English), and 3.50 (Assamese-to-English) translation, respectively.</abstract>
       <url hash="ab47c30f">2022.wat-1.9</url>
@@ -170,7 +170,7 @@
       <author><first>Pankaj</first><last>Dadure</last></author>
       <author><first>Riyanka</first><last>Manna</last></author>
       <author><first>Partha</first><last>Pakray</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>111–116</pages>
       <abstract>Automatic translation of one natural language to another is a popular task of natural language processing. Although the deep learning-based technique known as neural machine translation (NMT) is a widely accepted machine translation approach, it needs an adequate amount of training data, which is a challenging issue for low-resource pair translation. Moreover, the multimodal concept utilizes text and visual features to improve low-resource pair translation. WAT2022 (Workshop on Asian Translation 2022) organizes (hosted by the COLING 2022) English to Bengali multimodal translation task where we have participated as a team named CNLP-NITS-PP in two tracks: 1) text-only and 2) multimodal translation. Herein, we have proposed a transliteration-based phrase pairs augmentation approach which shows improvement in the multimodal translation task and achieved benchmark results on Bengali Visual Genome 1.0 dataset. We have attained the best results on the challenge and evaluation test set for English to Bengali multimodal translation with BLEU scores of 28.70, 43.90 and RIBES scores of 0.688931, 0.780669, respectively.</abstract>
       <url hash="d580eca8">2022.wat-1.14</url>
@@ -183,7 +183,7 @@
       <author><first>Md Faizal</first><last>Karim</last></author>
       <author><first>Riyanka</first><last>Manna</last></author>
       <author><first>Partha</first><last>Pakray</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>117–122</pages>
       <abstract>Machine translation translates one natural language to another, a well-defined natural language processing task. Neural machine translation (NMT) is a widely accepted machine translation approach, but it requires a sufficient amount of training data, which is a challenging issue for low-resource pair translation. Moreover, the multimodal concept utilizes text and visual features to improve low-resource pair translation. WAT2022 (Workshop on Asian Translation 2022) organizes (hosted by the COLING 2022) English to Hindi multimodal translation task where we have participated as a team named CNLP-NITS-PP in two tracks: 1) text-only and 2) multimodal translation. Herein, we have proposed a transliteration-based phrase pairs augmentation approach, which shows improvement in the multimodal translation task. We have attained the second best results on the challenge test set for English to Hindi multimodal translation with BLEU score of 39.30, and a RIBES score of 0.791468.</abstract>
       <url hash="5c67feb4">2022.wat-1.15</url>
diff --git a/data/xml/2022.wiesp.xml b/data/xml/2022.wiesp.xml
index 44855c20be..f5f83f757b 100644
--- a/data/xml/2022.wiesp.xml
+++ b/data/xml/2022.wiesp.xml
@@ -72,7 +72,7 @@
       <title>Linking a Hypothesis Network From the Domain of Invasion Biology to a Corpus of Scientific Abstracts: The <fixed-case>INAS</fixed-case> Dataset</title>
       <author><first>Marc</first><last>Brinner</last></author>
       <author><first>Tina</first><last>Heger</last></author>
-      <author><first>Sina</first><last>Zarriess</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarriess</last></author>
       <pages>32–42</pages>
       <abstract>We investigate the problem of identifying the major hypothesis that is addressed in a scientific paper. To this end, we present a dataset from the domain of invasion biology that organizes a set of 954 papers into a network of fine-grained domain-specific categories of hypotheses. We carry out experiments on classifying abstracts according to these categories and present a pilot study on annotating hypothesis statements within the text. We find that hypothesis statements in our dataset are complex, varied and more or less explicit, and, importantly, spread over the whole abstract. Experiments with BERT-based classifiers show that these models are able to classify complex hypothesis statements to some extent, without being trained on sentence-level text span annotations.</abstract>
       <url hash="c285ccf9">2022.wiesp-1.5</url>
@@ -119,7 +119,7 @@
     </paper>
     <paper id="9">
       <title>Detecting Entities in the Astrophysics Literature: A Comparison of Word-based and Span-based Entity Recognition Methods</title>
-      <author><first>Xiang</first><last>Dai</last></author>
+      <author id="xiang-dai"><first>Xiang</first><last>Dai</last></author>
       <author><first>Sarvnaz</first><last>Karimi</last></author>
       <pages>78–83</pages>
       <abstract>Information Extraction from scientific literature can be challenging due to the highly specialised nature of such text. We describe our entity recognition methods developed as part of the DEAL (Detecting Entities in the Astrophysics Literature) shared task. The aim of the task is to build a system that can identify Named Entities in a dataset composed by scholarly articles from astrophysics literature. We planned our participation such that it enables us to conduct an empirical comparison between word-based tagging and span-based classification methods. When evaluated on two hidden test sets provided by the organizer, our best-performing submission achieved F1 scores of 0.8307 (validation phase) and 0.7990 (testing phase).</abstract>
@@ -184,7 +184,7 @@
       <author><first>Atilla Kaan</first><last>Alkan</last></author>
       <author><first>Cyril</first><last>Grouin</last></author>
       <author><first>Fabian</first><last>Schussler</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <pages>131–139</pages>
       <abstract>The increased interest in time-domain astronomy over the last decades has resulted in a substantial increase in observation reports publication leading to a saturation of how astrophysicists read, analyze and classify information. Due to the short life span of the detected astronomical events, the information related to the characterization of new phenomena has to be communicated and analyzed very rapidly to allow other observatories to react and conduct their follow-up observations. This paper introduces TDAC: the first Corpus in Time-Domain Astrophysics, based on observation reports. We also present the NLP experiments we made for named entity recognition based on annotations we made and annotations from the WIESP NLP Challenge.</abstract>
       <url hash="7f959f7d">2022.wiesp-1.15</url>
@@ -207,7 +207,7 @@
       <author><first>Atilla Kaan</first><last>Alkan</last></author>
       <author><first>Cyril</first><last>Grouin</last></author>
       <author><first>Fabian</first><last>Schussler</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <pages>145–150</pages>
       <abstract>Detecting Entities in the Astrophysics Literature (DEAL) is a proposed shared task in the scope of the first Workshop on Information Extraction from Scientific Publications (WIESP) at AACL-IJCNLP 2022. It aims to propose systems identifying astrophysical named entities. This article presents our system based on a majority voting strategy of an ensemble composed of multiple SciBERT models. The system we propose is ranked second and outperforms the baseline provided by the organisers by achieving an F1 score of 0.7993 and a Matthews Correlation Coefficient (MCC) score of 0.8978 in the testing phase.</abstract>
       <url hash="c78ed364">2022.wiesp-1.17</url>
diff --git a/data/xml/2022.wildre.xml b/data/xml/2022.wildre.xml
index a351ee93bd..da84e7ba3d 100644
--- a/data/xml/2022.wildre.xml
+++ b/data/xml/2022.wildre.xml
@@ -3,10 +3,10 @@
   <volume id="1" ingest-date="2022-09-28" type="proceedings">
     <meta>
       <booktitle>Proceedings of the WILDRE-6 Workshop within the 13th Language Resources and Evaluation Conference</booktitle>
-      <editor><first>Girish Nath</first><last>Jha</last></editor>
+      <editor id="girish-nath-jha"><first>Girish Nath</first><last>Jha</last></editor>
       <editor><first>Sobha</first><last>L.</last></editor>
       <editor><first>Kalika</first><last>Bali</last></editor>
-      <editor><first>Atul Kr.</first><last>Ojha</last></editor>
+      <editor id="atul-kr-ojha"><first>Atul Kr.</first><last>Ojha</last></editor>
       <publisher>European Language Resources Association</publisher>
       <address>Marseille, France</address>
       <month>June</month>
@@ -38,7 +38,7 @@
     </paper>
     <paper id="3">
       <title>Leveraging Sub Label Dependencies in Code Mixed <fixed-case>I</fixed-case>ndian Languages for Part-Of-Speech Tagging using Conditional Random Fields.</title>
-      <author><first>Akash Kumar</first><last>Gautam</last></author>
+      <author id="akash-kumar-gautam"><first>Akash Kumar</first><last>Gautam</last></author>
       <pages>13–17</pages>
       <abstract>Code-mixed text sequences often lead to challenges in the task of correct identification of Part-Of-Speech tags. However, lexical dependencies created while alternating between multiple languages can be leveraged to improve the performance of such tasks. Indian languages with rich morphological structure and highly inflected nature provide such an opportunity. In this work, we exploit these sub-label dependencies using conditional random fields (CRFs) by defining feature extraction functions on three distinct language pairs (Hindi-English, Bengali-English, and Telugu-English). Our results demonstrate a significant increase in the tagging performance if the feature extraction functions employ the rich inner structure of such languages.</abstract>
       <url hash="617fb3d8">2022.wildre-1.3</url>
@@ -111,7 +111,7 @@
     <paper id="10">
       <title>Classification of Multiword Expressions in <fixed-case>M</fixed-case>alayalam</title>
       <author><first>Treesa</first><last>Cyriac</last></author>
-      <author><first>Sobha</first><last>Lalitha Devi</last></author>
+      <author id="sobha-lalitha-devi"><first>Sobha</first><last>Lalitha Devi</last></author>
       <pages>55–59</pages>
       <abstract>Multiword expression is an interesting concept in languages and the MWEs of a language are not easy for a non-native speaker to understand. It includes lexicalized phrases, idioms, collocations etc. Data on multiwords are helpful in language processing. ‘Multiword expressions in Malayalam’ is a less studied area. In this paper, we are trying to explore multiwords in Malayalam and to classify them as per the three idiosyncrasies: semantic idiosyncrasy, syntactic idiosyncrasy, and statistic idiosyncrasy. Though these are already identified, they are not being studied in Malayalam. The classification and features are given and are studied using Malayalam multiwords. Through this study, we identified how the linguistic features of Malayalam such as agglutination influence its multiword expressions in terms of pronunciation and spelling. Malayalam has a set of code-mixed multiword expressions which is also addressed in this study.</abstract>
       <url hash="69128ef4">2022.wildre-1.10</url>
@@ -123,7 +123,7 @@
       <author><first>Deepak</first><last>Alok</last></author>
       <author><first>Akanksha</first><last>Bansal</last></author>
       <author><first>Atul Kr.</first><last>Ojha</last></author>
-      <author><first>John P.</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></author>
       <pages>60–67</pages>
       <abstract>This paper presents the development of the Parallel Universal Dependency (PUD) Treebank for two Indo-Aryan languages: Bengali and Magahi. A treebank of 1,000 sentences has been created using a parallel corpus of English and the UD framework. A preliminary set of sentences was annotated manually - 600 for Bengali and 200 for Magahi. The rest of the sentences were built using the Bengali and Magahi parser. The sentences have been translated and annotated manually by the authors, some of whom are also native speakers of the languages. The objective behind this work is to build a syntactically-annotated linguistic repository for the aforementioned languages, that can prove to be a useful resource for building further NLP tools. Additionally, Bengali and Magahi parsers were also created which is built on machine learning approach. The accuracy of the Bengali parser is 78.13% in the case of UPOS; 76.99% in the case of XPOS, 56.12% in the case of UAS; and 47.19% in the case of LAS. The accuracy of Magahi parser is 71.53% in the case of UPOS; 66.44% in the case of XPOS, 58.05% in the case of UAS; and 33.07% in the case of LAS. This paper also includes an illustration of the annotation schema followed, the findings of the Parallel Universal Dependency (PUD) treebank, and it’s resulting linguistic analysis</abstract>
       <url hash="3c5c0e46">2022.wildre-1.11</url>
@@ -141,7 +141,7 @@
     <paper id="13">
       <title>Automatic Identification of Explicit Connectives in <fixed-case>M</fixed-case>alayalam</title>
       <author><first>Kumari</first><last>Sheeja S</last></author>
-      <author><first>Sobha</first><last>Lalitha Devi</last></author>
+      <author id="sobha-lalitha-devi"><first>Sobha</first><last>Lalitha Devi</last></author>
       <pages>74–79</pages>
       <abstract>This work presents an automatic identification of explicit connectives and its arguments using supervised method, Conditional Random Fields (CRFs). In this work, we focus on the identification of connectives and their arguments in the corpus. We consider explicit connectives and its arguments for the present study. The corpus we have considered has 4,000 sentences from Malayalam documents and manually annotated the corpus for POS, chunk, clause, discourse connectives and its arguments. The corpus thus annotated is used for building the base engine. The percentage of the performance of the system is evaluated based on the precision, recall and F-score and obtained encouraging results. We have analysed the errors generated by the system and used the features obtained from the anlaysis to improve the performance of the system</abstract>
       <url hash="e39b3178">2022.wildre-1.13</url>
diff --git a/data/xml/2022.winlp.xml b/data/xml/2022.winlp.xml
index 7ed45a2e67..c917b6ad52 100644
--- a/data/xml/2022.winlp.xml
+++ b/data/xml/2022.winlp.xml
@@ -8,7 +8,7 @@
       <editor><first>Bonaventure</first><last>Dossou</last></editor>
       <editor><first>Tirthankar</first><last>Ghosal</last></editor>
       <editor><first>Hatem</first><last>Haddad</last></editor>
-      <editor><first>Haley M.</first><last>Lepp</last></editor>
+      <editor id="haley-lepp"><first>Haley M.</first><last>Lepp</last></editor>
       <editor><first>Fatemehsadat</first><last>Mireshghallah</last></editor>
       <editor><first>Surangika</first><last>Ranathunga</last></editor>
       <editor><first>Xanda</first><last>Schofield</last></editor>
diff --git a/data/xml/2022.wit.xml b/data/xml/2022.wit.xml
index c48f3cbe21..9700dcc75e 100644
--- a/data/xml/2022.wit.xml
+++ b/data/xml/2022.wit.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the 2nd Workshop on Deriving Insights from User-Generated Text</booktitle>
       <editor><first>Estevam</first><last>Hruschka</last></editor>
-      <editor><first>Tom</first><last>Mitchell</last></editor>
+      <editor id="tom-mitchell"><first>Tom</first><last>Mitchell</last></editor>
       <editor><first>Dunja</first><last>Mladenic</last></editor>
       <editor><first>Marko</first><last>Grobelnik</last></editor>
       <editor><first>Nikita</first><last>Bhutani</last></editor>
diff --git a/data/xml/2022.wmt.xml b/data/xml/2022.wmt.xml
index 71b9f26060..41eb55084c 100644
--- a/data/xml/2022.wmt.xml
+++ b/data/xml/2022.wmt.xml
@@ -5,28 +5,28 @@
       <booktitle>Proceedings of the Seventh Conference on Machine Translation (WMT)</booktitle>
       <editor><first>Philipp</first><last>Koehn</last></editor>
       <editor><first>Loïc</first><last>Barrault</last></editor>
-      <editor><first>Ondřej</first><last>Bojar</last></editor>
+      <editor id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></editor>
       <editor><first>Fethi</first><last>Bougares</last></editor>
-      <editor><first>Rajen</first><last>Chatterjee</last></editor>
-      <editor><first>Marta R.</first><last>Costa-jussà</last></editor>
+      <editor id="rajen-chatterjee"><first>Rajen</first><last>Chatterjee</last></editor>
+      <editor id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></editor>
       <editor><first>Christian</first><last>Federmann</last></editor>
       <editor><first>Mark</first><last>Fishel</last></editor>
-      <editor><first>Alexander</first><last>Fraser</last></editor>
+      <editor id="alexander-fraser"><first>Alexander</first><last>Fraser</last></editor>
       <editor><first>Markus</first><last>Freitag</last></editor>
       <editor><first>Yvette</first><last>Graham</last></editor>
       <editor><first>Roman</first><last>Grundkiewicz</last></editor>
       <editor><first>Paco</first><last>Guzman</last></editor>
       <editor><first>Barry</first><last>Haddow</last></editor>
       <editor><first>Matthias</first><last>Huck</last></editor>
-      <editor><first>Antonio</first><last>Jimeno Yepes</last></editor>
+      <editor id="antonio-jimeno-yepes"><first>Antonio</first><last>Jimeno Yepes</last></editor>
       <editor><first>Tom</first><last>Kocmi</last></editor>
-      <editor><first>André</first><last>Martins</last></editor>
+      <editor id="andre-f-t-martins"><first>André</first><last>Martins</last></editor>
       <editor><first>Makoto</first><last>Morishita</last></editor>
       <editor><first>Christof</first><last>Monz</last></editor>
       <editor><first>Masaaki</first><last>Nagata</last></editor>
       <editor><first>Toshiaki</first><last>Nakazawa</last></editor>
-      <editor><first>Matteo</first><last>Negri</last></editor>
-      <editor><first>Aurélie</first><last>Névéol</last></editor>
+      <editor id="matteo-negri"><first>Matteo</first><last>Negri</last></editor>
+      <editor id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></editor>
       <editor><first>Mariana</first><last>Neves</last></editor>
       <editor><first>Martin</first><last>Popel</last></editor>
       <editor><first>Marco</first><last>Turchi</last></editor>
@@ -62,7 +62,7 @@
       <author><first>Toshiaki</first><last>Nakazawa</last><affiliation>The University of Tokyo</affiliation></author>
       <author><first>Michal</first><last>Novák</last><affiliation>Charles University, Faculty of Mathematics and Physics</affiliation></author>
       <author><first>Martin</first><last>Popel</last><affiliation>Charles University, Faculty of Mathematics and Physics, UFAL</affiliation></author>
-      <author><first>Maja</first><last>Popović</last><affiliation>ADAPT, Dublin City University</affiliation></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last><affiliation>ADAPT, Dublin City University</affiliation></author>
       <pages>1-45</pages>
       <abstract>This paper presents the results of the General Machine Translation Task organised as part of the Conference on Machine Translation (WMT) 2022. In the general MT task, participants were asked to build machine translation systems for any of 11 language pairs, to be evaluated on test sets consisting of four different domains. We evaluate system outputs with human annotators using two different techniques: reference-based direct assessment and (DA) and a combination of DA and scalar quality metric (DA+SQM).</abstract>
       <url hash="3fb27977">2022.wmt-1.1</url>
@@ -78,7 +78,7 @@
       <author><first>Eleftherios</first><last>Avramidis</last><affiliation>German Research Center for Artificial Intelligence (DFKI)</affiliation></author>
       <author><first>Tom</first><last>Kocmi</last><affiliation>Microsoft</affiliation></author>
       <author><first>George</first><last>Foster</last><affiliation>Google</affiliation></author>
-      <author><first>Alon</first><last>Lavie</last><affiliation>Unbabel/Carnegie Mellon University</affiliation></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last><affiliation>Unbabel/Carnegie Mellon University</affiliation></author>
       <author><first>André F. T.</first><last>Martins</last><affiliation>Unbabel, Instituto de Telecomunicacoes</affiliation></author>
       <pages>46-68</pages>
       <abstract>This paper presents the results of the WMT22 Metrics Shared Task. Participants submitting automatic MT evaluation metrics were asked to score the outputs of the translation systems competing in the WMT22 News Translation Task on four different domains: news, social, ecommerce, and chat. All metrics were evaluated on how well they correlate with human ratings at the system and segment level. Similar to last year, we acquired our own human ratings based on expert-based human evaluation via Multidimensional Quality Metrics (MQM). This setup had several advantages, among other things: (i) expert-based evaluation is more reliable, (ii) we extended the pool of translations by 5 additional translations based on MBR decoding or rescoring which are challenging for current metrics. In addition, we initiated a challenge set subtask, where participants had to create contrastive test suites for evaluating metrics’ ability to capture and penalise specific types of translation errors. Finally, we present an extensive analysis on how well metrics perform on three language pairs: English to German, English to Russian and Chinese to English. The results demonstrate the superiority of neural-based learned metrics and demonstrate again that overlap metrics like Bleu, spBleu or chrf correlate poorly with human ratings. The results also reveal that neural-based metrics are remarkably robust across different domains and challenges.</abstract>
@@ -88,14 +88,14 @@
     <paper id="3">
       <title>Findings of the <fixed-case>WMT</fixed-case> 2022 Shared Task on Quality Estimation</title>
       <author><first>Chrysoula</first><last>Zerva</last><affiliation>Instituto de Telecomunicações, Instituto Superior Técnico, University of Lisbon</affiliation></author>
-      <author><first>Frédéric</first><last>Blain</last><affiliation>University of Wolverhampton</affiliation></author>
+      <author id="frederic-blain"><first>Frédéric</first><last>Blain</last><affiliation>University of Wolverhampton</affiliation></author>
       <author><first>Ricardo</first><last>Rei</last><affiliation>Unbabel/INESC-ID</affiliation></author>
       <author><first>Piyawat</first><last>Lertvittayakumjorn</last><affiliation>Google</affiliation></author>
-      <author><first>José G.</first><last>C. de Souza</last><affiliation>Unbabel</affiliation></author>
+      <author id="jose-g-c-de-souza"><first>José G.</first><last>C. de Souza</last><affiliation>Unbabel</affiliation></author>
       <author><first>Steffen</first><last>Eger</last><affiliation>NLLG Lab, Bielefeld University</affiliation></author>
       <author><first>Diptesh</first><last>Kanojia</last><affiliation>University of Surrey</affiliation></author>
       <author><first>Duarte</first><last>Alves</last><affiliation>Instituto Superior Técnico / Unbabel</affiliation></author>
-      <author><first>Constantin</first><last>Orăsan</last><affiliation>University of Surrey</affiliation></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orăsan</last><affiliation>University of Surrey</affiliation></author>
       <author><first>Marina</first><last>Fomicheva</last><affiliation>University of Sheffield</affiliation></author>
       <author><first>André F. T.</first><last>Martins</last><affiliation>Unbabel, Instituto de Telecomunicacoes</affiliation></author>
       <author><first>Lucia</first><last>Specia</last><affiliation>Imperial College London</affiliation></author>
@@ -118,7 +118,7 @@
     </paper>
     <paper id="5">
       <title>Findings of the <fixed-case>WMT</fixed-case> 2022 Shared Task on Automatic Post-Editing</title>
-      <author><first>Pushpak</first><last>Bhattacharyya</last><affiliation>IIT Bombay</affiliation></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last><affiliation>IIT Bombay</affiliation></author>
       <author><first>Rajen</first><last>Chatterjee</last><affiliation>Apple Inc.</affiliation></author>
       <author><first>Markus</first><last>Freitag</last><affiliation>Google Research</affiliation></author>
       <author><first>Diptesh</first><last>Kanojia</last><affiliation>University of Surrey</affiliation></author>
@@ -176,7 +176,7 @@
     <paper id="10">
       <title>Gender Bias Mitigation for <fixed-case>NMT</fixed-case> Involving Genderless Languages</title>
       <author><first>Ander</first><last>Corral</last><affiliation>Orai NLP Technologies</affiliation></author>
-      <author><first>Xabier</first><last>Saralegi</last><affiliation>Orai NLP technologies</affiliation></author>
+      <author id="xabier-saralegi"><first>Xabier</first><last>Saralegi</last><affiliation>Orai NLP technologies</affiliation></author>
       <pages>165-176</pages>
       <abstract>It has been found that NMT systems have a strong preference towards social defaults and biases when translating certain occupations, which due to their widespread use, can unintentionally contribute to amplifying and perpetuating these patterns. In that sense, this work focuses on sentence-level gender agreement between gendered entities and occupations when translating from genderless languages to languages with grammatical gender. Specifically, we address the Basque to Spanish translation direction for which bias mitigation has not been addressed. Gender information in Basque is explicit in neither the grammar nor the morphology. It is only present in a limited number of gender specific common nouns and person proper names. We propose a template-based fine-tuning strategy with explicit gender tags to provide a stronger gender signal for the proper inflection of occupations. This strategy is compared against systems fine-tuned on real data extracted from Wikipedia biographies. We provide a detailed gender bias assessment analysis and perform a template ablation study to determine the optimal set of templates. We report a substantial gender bias mitigation (up to 50% on gender bias scores) while keeping the original translation quality.</abstract>
       <url hash="bd305e46">2022.wmt-1.10</url>
@@ -226,11 +226,11 @@
     </paper>
     <paper id="15">
       <title>Inria-<fixed-case>ALMA</fixed-case>na<fixed-case>CH</fixed-case> at <fixed-case>WMT</fixed-case> 2022: Does Transcription Help Cross-Script Machine Translation?</title>
-      <author><first>Jesujoba</first><last>Alabi</last><affiliation>Saarland University</affiliation></author>
+      <author id="jesujoba-alabi"><first>Jesujoba</first><last>Alabi</last><affiliation>Saarland University</affiliation></author>
       <author><first>Lydia</first><last>Nishimwe</last><affiliation>Inria</affiliation></author>
       <author><first>Benjamin</first><last>Muller</last><affiliation>Inria</affiliation></author>
       <author><first>Camille</first><last>Rey</last><affiliation>Inria</affiliation></author>
-      <author><first>Benoît</first><last>Sagot</last><affiliation>Inria</affiliation></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last><affiliation>Inria</affiliation></author>
       <author><first>Rachel</first><last>Bawden</last><affiliation>Inria</affiliation></author>
       <pages>233-243</pages>
       <abstract>This paper describes the Inria ALMAnaCH team submission to the WMT 2022 general translation shared task. Participating in the language directions cs,ru,uk→en and cs↔uk, we experiment with the use of a dedicated Latin-script transcription convention aimed at representing all Slavic languages involved in a way that maximises character- and word-level correspondences between them as well as with the English language. Our hypothesis was that bringing the source and target language closer could have a positive impact on machine translation results. We provide multiple comparisons, including bilingual and multilingual baselines, with and without transcription. Initial results indicate that the transcription strategy was not successful, resulting in lower results than baselines. We nevertheless submitted our multilingual, transcribed models as our primary systems, and in this paper provide some indications as to why we got these negative results.</abstract>
@@ -394,7 +394,7 @@
     <paper id="29">
       <title>e<fixed-case>T</fixed-case>ranslation’s Submissions to the <fixed-case>WMT</fixed-case>22 General Machine Translation Task</title>
       <author><first>Csaba</first><last>Oravecz</last><affiliation>European Commission, Directorate-General for Translation</affiliation></author>
-      <author><first>Katina</first><last>Bontcheva</last><affiliation>Sogeti</affiliation></author>
+      <author id="katina-bontcheva"><first>Katina</first><last>Bontcheva</last><affiliation>Sogeti</affiliation></author>
       <author><first>David</first><last>Kolovratnìk</last><affiliation>Fujitsu</affiliation></author>
       <author><first>Bogomil</first><last>Kovachev</last><affiliation>European Commission, Directorate-General for Translation</affiliation></author>
       <author><first>Christopher</first><last>Scott</last><affiliation>European Commission, Directorate-General for Translation</affiliation></author>
@@ -407,7 +407,7 @@
       <title><fixed-case>CUNI</fixed-case> Systems for the <fixed-case>WMT</fixed-case> 22 <fixed-case>C</fixed-case>zech-<fixed-case>U</fixed-case>krainian Translation Task</title>
       <author><first>Martin</first><last>Popel</last><affiliation>Charles University, Faculty of Mathematics and Physics, UFAL</affiliation></author>
       <author><first>Jindřich</first><last>Libovický</last><affiliation>Charles Univeristy</affiliation></author>
-      <author><first>Jindřich</first><last>Helcl</last><affiliation>Charles University in Prague</affiliation></author>
+      <author id="jindrich-helcl"><first>Jindřich</first><last>Helcl</last><affiliation>Charles University in Prague</affiliation></author>
       <pages>352-357</pages>
       <abstract>We present Charles University submissions to the WMT 22 GeneralTranslation Shared Task on Czech-Ukrainian and Ukrainian-Czech machine translation. We present two constrained submissions based on block back-translation and tagged back-translation and experiment with rule-basedromanization of Ukrainian. Our results show that the romanization onlyhas a minor effect on the translation quality. Further, we describe Charles Translator,a system that was developed in March 2022 as a response to the migrationfrom Ukraine to the Czech Republic. Compared to our constrained systems,it did not use the romanization and used some proprietary data sources.</abstract>
       <url hash="ff87212d">2022.wmt-1.30</url>
@@ -459,7 +459,7 @@
       <author><first>Shahram</first><last>Khadivi</last><affiliation>eBay</affiliation></author>
       <author><first>Xuanli</first><last>He</last><affiliation>Monash University</affiliation></author>
       <author><first>Dinh</first><last>Phung</last><affiliation>Monash University, Australia</affiliation></author>
-      <author><first>Gholamreza</first><last>Haffari</last><affiliation>Monash University</affiliation></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last><affiliation>Monash University</affiliation></author>
       <pages>381-396</pages>
       <abstract>Previous works mostly focus on either multilingual or multi-domain aspects of neural machine translation (NMT). This paper investigates whether the domain information can be transferred across languages on the composition of multi-domain and multilingual NMT, particularly for the incomplete data condition where in-domain bitext is missing for some language pairs. Our results in the curated leave-one-domain-out experiments show that multi-domain multilingual (MDML) NMT can boost zero-shot translation performance up to +10 gains on BLEU, as well as aid the generalisation of multi-domain NMT to the missing domain. We also explore strategies for effective integration of multilingual and multi-domain NMT, including language and domain tag combination and auxiliary task training. We find that learning domain-aware representations and adding target-language tags to the encoder leads to effective MDML-NMT.</abstract>
       <url hash="a7050285">2022.wmt-1.34</url>
@@ -471,7 +471,7 @@
       <author><first>Ting</first><last>Wang</last><affiliation>Dalian University of Technology</affiliation></author>
       <author><first>Huan</first><last>Liu</last><affiliation>Dalian University of Technology</affiliation></author>
       <author><first>Junpeng</first><last>Liu</last><affiliation>Dalian University of Technology</affiliation></author>
-      <author><first>Degen</first><last>Huang</last><affiliation>Dalian University of Technology</affiliation></author>
+      <author id="degen-huang"><first>Degen</first><last>Huang</last><affiliation>Dalian University of Technology</affiliation></author>
       <pages>397-402</pages>
       <abstract>This paper describes DUTNLP Lab’s submission to the WMT22 General MT Task on four translation directions: English to/from Chinese and English to/from Japanese under the constrained condition. Our primary system are built on several Transformer variants which employ wider FFN layer or deeper encoder layer. The bilingual data are filtered by detailed data pre-processing strategies and four data augmentation methods are combined to enlarge the training data with the provided monolingual data. Several common methods are also employed to further improve the model performance, such as fine-tuning, model ensemble and post-editing. As a result, our constrained systems achieve 29.01, 63.87, 41.84, and 24.82 BLEU scores on Chinese-to-English, English-to-Chinese, English-to-Japanese, and Japanese-to-English, respectively.</abstract>
       <url hash="48891ad5">2022.wmt-1.35</url>
@@ -562,7 +562,7 @@
     </paper>
     <paper id="42">
       <title>Test Suite Evaluation: Morphological Challenges and Pronoun Translation</title>
-      <author><first>Marion</first><last>Weller-Di Marco</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
+      <author id="marion-weller-di-marco"><first>Marion</first><last>Weller-Di Marco</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <author><first>Alexander</first><last>Fraser</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <pages>458-468</pages>
       <abstract>This paper summarizes the results of our test suite evaluation with a main focus on morphology for the language pairs English to/from German. We look at the translation of morphologically complex words (DE–EN), and evaluatewhether English noun phrases are translated as compounds vs. phrases into German. Furthermore, we investigate the preservation of morphological features (gender in EN–DE pronoun translation and number in morpho-syntacticallycomplex structures for DE–EN). Our results indicate that systems are able to interpret linguistic structures to obtain relevant information, but also that translation becomes more challenging with increasing complexity, as seen, for example, when translating words with negation or non-concatenative properties, and for the morecomplex cases of the pronoun translation task.</abstract>
@@ -574,7 +574,7 @@
       <author><first>Duarte</first><last>Alves</last><affiliation>Instituto Superior Técnico / Unbabel</affiliation></author>
       <author><first>Ricardo</first><last>Rei</last><affiliation>Unbabel/INESC-ID</affiliation></author>
       <author><first>Ana C</first><last>Farinha</last><affiliation>Unbabel</affiliation></author>
-      <author><first>José G.</first><last>C. de Souza</last><affiliation>Unbabel</affiliation></author>
+      <author id="jose-g-c-de-souza"><first>José G.</first><last>C. de Souza</last><affiliation>Unbabel</affiliation></author>
       <author><first>André F. T.</first><last>Martins</last><affiliation>Unbabel, Instituto de Telecomunicacoes</affiliation></author>
       <pages>469-478</pages>
       <abstract>Automatic translations with critical errors may lead to misinterpretations and pose several risks for the user. As such, it is important that Machine Translation (MT) Evaluation systems are robust to these errors in order to increase the reliability and safety of Machine Translation systems. Here we introduce SMAUG a novel Sentence-level Multilingual AUGmentation approach for generating translations with critical errors and apply this approach to create a test set to evaluate the robustness of MT metrics to these errors. We show that current State-of-the-Art metrics are improving their capability to distinguish translations with and without critical errors and to penalize the first accordingly. We also show that metrics tend to struggle with errors related to named entities and numbers and that there is a high variance in the robustness of current methods to translations with critical errors.</abstract>
@@ -658,7 +658,7 @@
     <paper id="49">
       <title>Unsupervised Embedding-based Metric for <fixed-case>MT</fixed-case> Evaluation with Improved Human Correlation</title>
       <author><first>Ananya</first><last>Mukherjee</last><affiliation>International Institute of Information Technology Hyderabad</affiliation></author>
-      <author><first>Manish</first><last>Shrivastava</last><affiliation>International Institute of Information Technology Hyderabad</affiliation></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last><affiliation>International Institute of Information Technology Hyderabad</affiliation></author>
       <pages>558-563</pages>
       <abstract>In this paper, we describe our submission to the WMT22 metrics shared task. Our metric focuses on computing contextual and syntactic equivalences along with lexical, morphological, and semantic similarity. The intent is to capture the fluency and context of the MT outputs along with their adequacy. Fluency is captured using syntactic similarity and context is captured using sentence similarity leveraging sentence embeddings. The final sentence translation score is the weighted combination of three similarity scores: a) Syntactic Similarity b) Lexical, Morphological and Semantic Similarity, and c) Contextual Similarity. This paper outlines two improved versions of MEE i.e., MEE2 and MEE4. Additionally, we report our experiments on language pairs of en-de, en-ru and zh-en from WMT17-19 testset and further depict the correlation with human assessments.</abstract>
       <url hash="717c4735">2022.wmt-1.49</url>
@@ -667,7 +667,7 @@
     <paper id="50">
       <title><fixed-case>REUSE</fixed-case>: <fixed-case>RE</fixed-case>ference-free <fixed-case>U</fixed-case>n<fixed-case>S</fixed-case>upervised Quality Estimation Metric</title>
       <author><first>Ananya</first><last>Mukherjee</last><affiliation>International Institute of Information Technology Hyderabad</affiliation></author>
-      <author><first>Manish</first><last>Shrivastava</last><affiliation>International Institute of Information Technology Hyderabad</affiliation></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last><affiliation>International Institute of Information Technology Hyderabad</affiliation></author>
       <pages>564-568</pages>
       <abstract>This paper describes our submission to the WMT2022 shared metrics task. Our unsupervised metric estimates the translation quality at chunk-level and sentence-level. Source and target sentence chunks are retrieved by using a multi-lingual chunker. The chunk-level similarity is computed by leveraging BERT contextual word embeddings and sentence similarity scores are calculated by leveraging sentence embeddings of Language-Agnostic BERT models. The final quality estimation score is obtained by mean pooling the chunk-level and sentence-level similarity scores. This paper outlines our experiments and also reports the correlation with human judgements for en-de, en-ru and zh-en language pairs of WMT17, WMT18 and WMT19 test sets.</abstract>
       <url hash="7613cdd8">2022.wmt-1.50</url>
@@ -688,13 +688,13 @@
     <paper id="52">
       <title><fixed-case>COMET</fixed-case>-22: Unbabel-<fixed-case>IST</fixed-case> 2022 Submission for the Metrics Shared Task</title>
       <author><first>Ricardo</first><last>Rei</last><affiliation>Unbabel/INESC-ID</affiliation></author>
-      <author><first>José G.</first><last>C. de Souza</last><affiliation>Unbabel</affiliation></author>
+      <author id="jose-g-c-de-souza"><first>José G.</first><last>C. de Souza</last><affiliation>Unbabel</affiliation></author>
       <author><first>Duarte</first><last>Alves</last><affiliation>Instituto Superior Técnico / Unbabel</affiliation></author>
       <author><first>Chrysoula</first><last>Zerva</last><affiliation>Instituto de Telecomunicações, Instituto Superior Técnico, University of Lisbon</affiliation></author>
       <author><first>Ana C</first><last>Farinha</last><affiliation>Unbabel</affiliation></author>
       <author><first>Taisiya</first><last>Glushkova</last><affiliation>Instituto de Telecomunicações, Instituto Superior Técnico, University of Lisbon</affiliation></author>
-      <author><first>Alon</first><last>Lavie</last><affiliation>Unbabel/Carnegie Mellon University</affiliation></author>
-      <author><first>Luisa</first><last>Coheur</last><affiliation>INESC-ID/Instituto Superior Técnico</affiliation></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last><affiliation>Unbabel/Carnegie Mellon University</affiliation></author>
+      <author id="luisa-coheur"><first>Luisa</first><last>Coheur</last><affiliation>INESC-ID/Instituto Superior Técnico</affiliation></author>
       <author><first>André F. T.</first><last>Martins</last><affiliation>Unbabel, Instituto de Telecomunicacoes</affiliation></author>
       <pages>578-585</pages>
       <abstract>In this paper, we present the joint contribution of Unbabel and IST to the WMT 2022 Metrics Shared Task. Our primary submission – dubbed COMET-22 – is an ensemble between a COMET estimator model trained with Direct Assessments and a newly proposed multitask model trained to predict sentence-level scores along with OK/BAD word-level tags derived from Multidimensional Quality Metrics error annotations. These models are ensembled together using a hyper-parameter search that weights different features extracted from both evaluation models and combines them into a single score. For the reference-free evaluation, we present CometKiwi. Similarly to our primary submission, CometKiwi is an ensemble between two models. A traditional predictor-estimator model inspired by OpenKiwi and our new multitask model trained on Multidimensional Quality Metrics which can also be used without references. Both our submissions show improved correlations compared to state-of-the-art metrics from last year as well as increased robustness to critical errors.</abstract>
@@ -750,7 +750,7 @@
       <author><first>Chanjun</first><last>Park</last><affiliation>Upstage</affiliation></author>
       <author><first>Hyeonseok</first><last>Moon</last><affiliation>Korea University</affiliation></author>
       <author><first>Jaehyung</first><last>Seo</last><affiliation>Korea University</affiliation></author>
-      <author><first>Heuiseok</first><last>Lim</last><affiliation>Korea University</affiliation></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last><affiliation>Korea University</affiliation></author>
       <pages>606-614</pages>
       <abstract>This paper presents KU X Upstage’s submission to the quality estimation (QE): critical error detection (CED) shared task in WMT22. We leverage the XLM-RoBERTa large model without utilizing any additional parallel data. To the best of our knowledge, we apply prompt-based fine-tuning to the QE task for the first time. To maximize the model’s language understanding capability, we reformulate the CED task to be similar to the masked language model objective, which is a pre-training strategy of the language model. We design intuitive templates and label words, and include auxiliary descriptions such as demonstration or Google Translate results in the input sequence. We further improve the performance through the template ensemble, and as a result of the shared task, our approach achieve the best performance for both English-German and Portuguese-English language pairs in an unconstrained setting.</abstract>
       <url hash="3599d093">2022.wmt-1.56</url>
@@ -763,7 +763,7 @@
       <author><first>Shujian</first><last>Huang</last><affiliation>National Key Laboratory for Novel Software Technology, Nanjing University</affiliation></author>
       <author><first>Shimin</first><last>Tao</last><affiliation>Huawei</affiliation></author>
       <author><first>Hao</first><last>Yang</last><affiliation>Huawei Co. Ltd</affiliation></author>
-      <author><first>Jiajun</first><last>Chen</last><affiliation>Nanjing University</affiliation></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last><affiliation>Nanjing University</affiliation></author>
       <pages>615-620</pages>
       <abstract>This paper presents submissions of the NJUNLP team in WMT 2022Quality Estimation shared task 1, where the goal is to predict the sentence-level and word-level quality for target machine translations. Our system explores pseudo data and multi-task learning. We propose several novel methods to generate pseudo data for different annotations using the conditional masked language model and the neural machine translation model. The proposed methods control the decoding process to generate more real pseudo translations. We pre-train the XLMR-large model with pseudo data and then fine-tune this model with real data both in the way of multi-task learning. We jointly learn sentence-level scores (with regression and rank tasks) and word-level tags (with a sequence tagging task). Our system obtains competitive results on different language pairs and ranks first place on both sentence- and word-level sub-tasks of the English-German language pair.</abstract>
       <url hash="62c2c8f3">2022.wmt-1.57</url>
@@ -778,7 +778,7 @@
       <author><first>Kazushige</first><last>Ouchi</last><affiliation>Toshiba (China) Co., Ltd.</affiliation></author>
       <author><first>Yufeng</first><last>Chen</last><affiliation>Beijing Jiaotong University</affiliation></author>
       <author><first>Jian</first><last>Liu</last><affiliation>Beijing Jiaotong University</affiliation></author>
-      <author><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
       <pages>621-626</pages>
       <abstract>This paper presents the BJTU-Toshiba joint submission for WMT 2022 quality estimation shared task. We only participate in Task 1 (quality prediction) of the shared task, focusing on the sentence-level MQM prediction. The techniques we experimented with include the integration of monolingual language models and the pre-finetuning of pre-trained representations. We tried two styles of pre-finetuning, namely Translation Language Modeling and Replaced Token Detection. We demonstrate the competitiveness of our system compared to the widely adopted XLM-RoBERTa baseline. Our system is also the top-ranking system on the Sentence-level MQM Prediction for the English-German language pairs.</abstract>
       <url hash="2d91c90b">2022.wmt-1.58</url>
@@ -801,11 +801,11 @@
       <author><first>Chrysoula</first><last>Zerva</last><affiliation>Instituto de Telecomunicações, Instituto Superior Técnico, University of Lisbon</affiliation></author>
       <author><first>Ana C</first><last>Farinha</last><affiliation>Unbabel</affiliation></author>
       <author><first>Christine</first><last>Maroti</last><affiliation>Unbabel</affiliation></author>
-      <author><first>José G.</first><last>C. de Souza</last><affiliation>Unbabel</affiliation></author>
+      <author id="jose-g-c-de-souza"><first>José G.</first><last>C. de Souza</last><affiliation>Unbabel</affiliation></author>
       <author><first>Taisiya</first><last>Glushkova</last><affiliation>Instituto de Telecomunicações, Instituto Superior Técnico, University of Lisbon</affiliation></author>
       <author><first>Duarte</first><last>Alves</last><affiliation>Instituto Superior Técnico / Unbabel</affiliation></author>
-      <author><first>Luisa</first><last>Coheur</last><affiliation>INESC-ID/Instituto Superior Técnico</affiliation></author>
-      <author><first>Alon</first><last>Lavie</last><affiliation>Unbabel/Carnegie Mellon University</affiliation></author>
+      <author id="luisa-coheur"><first>Luisa</first><last>Coheur</last><affiliation>INESC-ID/Instituto Superior Técnico</affiliation></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last><affiliation>Unbabel/Carnegie Mellon University</affiliation></author>
       <author><first>André F. T.</first><last>Martins</last><affiliation>Unbabel, Instituto de Telecomunicacoes</affiliation></author>
       <pages>634-645</pages>
       <abstract>We present the joint contribution of IST and Unbabel to the WMT 2022 Shared Task on Quality Estimation (QE). Our team participated in all three subtasks: (i) Sentence and Word-level Quality Prediction; (ii) Explainable QE; and (iii) Critical Error Detection. For all tasks we build on top of the COMET framework, connecting it with the predictor-estimator architecture of OpenKiwi, and equipping it with a word-level sequence tagger and an explanation extractor. Our results suggest that incorporating references during pretraining improves performance across several language pairs on downstream tasks, and that jointly training with sentence and word-level objectives yields a further boost. Furthermore, combining attention and gradient information proved to be the top strategy for extracting good explanations of sentence-level QE models. Overall, our submissions achieved the best results for all three tasks for almost all language pairs by a considerable margin.</abstract>
@@ -854,7 +854,7 @@
     </paper>
     <paper id="64">
       <title><fixed-case>CUNI</fixed-case> Non-Autoregressive System for the <fixed-case>WMT</fixed-case> 22 Efficient Translation Shared Task</title>
-      <author><first>Jindřich</first><last>Helcl</last><affiliation>Charles University in Prague</affiliation></author>
+      <author id="jindrich-helcl"><first>Jindřich</first><last>Helcl</last><affiliation>Charles University in Prague</affiliation></author>
       <pages>668-670</pages>
       <abstract>We present a non-autoregressive system submission to the WMT 22 Efficient Translation Shared Task. Our system was used by Helcl et al. (2022) in an attempt to provide fair comparison between non-autoregressive and autoregressive models. This submission is an effort to establish solid baselines along with sound evaluation methodology, particularly in terms of measuring the decoding speed. The model itself is a 12-layer Transformer model trained with connectionist temporal classification on knowledge-distilled dataset by a strong autoregressive teacher model.</abstract>
       <url hash="bf091683">2022.wmt-1.64</url>
@@ -898,7 +898,7 @@
     <paper id="67">
       <title><fixed-case>IIT</fixed-case> <fixed-case>B</fixed-case>ombay’s <fixed-case>WMT</fixed-case>22 Automatic Post-Editing Shared Task Submission</title>
       <author><first>Sourabh</first><last>Deoghare</last><affiliation>IIT Bombay</affiliation></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology Bombay and Patna</affiliation></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology Bombay and Patna</affiliation></author>
       <pages>682-688</pages>
       <url hash="86c1ea62">2022.wmt-1.67</url>
       <bibkey>deoghare-bhattacharyya-2022-iit</bibkey>
@@ -925,14 +925,14 @@
       <author><first>Maika</first><last>Vicente Navarro</last><affiliation>Leica Biosystems, Australia</affiliation></author>
       <author><first>Lana</first><last>Yeganova</last><affiliation>NCBI/NLM/NIH, Bethesda, USA</affiliation></author>
       <author><first>Dina</first><last>Wiemann</last><affiliation>Novartis AG, Basel, Switzerland</affiliation></author>
-      <author><first>Giorgio Maria</first><last>Di Nunzio</last><affiliation>University of Padua, Italy</affiliation></author>
+      <author id="giorgio-maria-di-nunzio"><first>Giorgio Maria</first><last>Di Nunzio</last><affiliation>University of Padua, Italy</affiliation></author>
       <author><first>Federica</first><last>Vezzani</last><affiliation>University of Padua, Italy</affiliation></author>
       <author><first>Christel</first><last>Gerardin</last><affiliation>Sorbonne Universit�, Franc</affiliation></author>
       <author><first>Rachel</first><last>Bawden</last><affiliation>Inria, Paris, France</affiliation></author>
       <author><first>Darryl Johan</first><last>Estrada</last><affiliation>Barcelona Supercomputing Center, Spain</affiliation></author>
       <author><first>Salvador</first><last>Lima-lopez</last><affiliation>Barcelona Supercomputing Center, Spain</affiliation></author>
       <author><first>Eulalia</first><last>Farre-maduel</last><affiliation>Barcelona Supercomputing Center, Spain</affiliation></author>
-      <author><first>Martin</first><last>Krallinger</last><affiliation>Barcelona Supercomputing Center, Spai</affiliation></author>
+      <author id="martin-krallinger"><first>Martin</first><last>Krallinger</last><affiliation>Barcelona Supercomputing Center, Spai</affiliation></author>
       <author><first>Cristian</first><last>Grozea</last><affiliation>Fraunhofer Institute FOKUS, Berlin, Germany</affiliation></author>
       <author><first>Aurelie</first><last>Neveol</last><affiliation>Universit� Paris-Saclay, CNRS, LISN, Orsay, France</affiliation></author>
       <pages>694-723</pages>
@@ -943,10 +943,10 @@
     <paper id="70">
       <title>Findings of the <fixed-case>WMT</fixed-case> 2022 Shared Task on Chat Translation</title>
       <author><first>Ana C</first><last>Farinha</last><affiliation>Unbabel</affiliation></author>
-      <author><first>M. Amin</first><last>Farajian</last><affiliation>Unbabel</affiliation></author>
+      <author id="m-amin-farajian"><first>M. Amin</first><last>Farajian</last><affiliation>Unbabel</affiliation></author>
       <author><first>Marianna</first><last>Buchicchio</last><affiliation>Unbabel</affiliation></author>
       <author><first>Patrick</first><last>Fernandes</last><affiliation>Carnegie Mellon University, Instituto de Telecomunicações</affiliation></author>
-      <author><first>José G.</first><last>C. de Souza</last><affiliation>Unbabel</affiliation></author>
+      <author id="jose-g-c-de-souza"><first>José G.</first><last>C. de Souza</last><affiliation>Unbabel</affiliation></author>
       <author><first>Helena</first><last>Moniz</last><affiliation>INESC-ID, University of Lisbon</affiliation></author>
       <author><first>André F. T.</first><last>Martins</last><affiliation>Unbabel, Instituto de Telecomunicacoes</affiliation></author>
       <pages>724-743</pages>
@@ -962,7 +962,7 @@
       <author><first>Alessia</first><last>Battisti</last><affiliation>University of Zurich, Switzerland</affiliation></author>
       <author><first>Michèle</first><last>Berger</last><affiliation>HfH</affiliation></author>
       <author><first>Richard</first><last>Bowden</last><affiliation>University of Surrey</affiliation></author>
-      <author><first>Annelies</first><last>Braffort</last><affiliation>LISN, CNRS, Université Paris-Saclay</affiliation></author>
+      <author id="annelies-braffort"><first>Annelies</first><last>Braffort</last><affiliation>LISN, CNRS, Université Paris-Saclay</affiliation></author>
       <author><first>Necati</first><last>Cihan Camgöz</last><affiliation>Meta</affiliation></author>
       <author><first>Cristina</first><last>España-bonet</last><affiliation>DFKI GmbH</affiliation></author>
       <author><first>Roman</first><last>Grundkiewicz</last><affiliation>Microsoft Research</affiliation></author>
@@ -971,7 +971,7 @@
       <author><first>Amit</first><last>Moryossef</last><affiliation>Bar-Ilan university, University of Zurich, ETH Zurich</affiliation></author>
       <author><first>Regula</first><last>Perrollaz</last><affiliation>Hochschule fuer Heilpaedogik</affiliation></author>
       <author><first>Sabine</first><last>Reinhard</last><affiliation>Hochschule für Heilpädagogik HfH</affiliation></author>
-      <author><first>Annette</first><last>Rios</last><affiliation>University of Zurich</affiliation></author>
+      <author id="annette-rios-gonzales"><first>Annette</first><last>Rios</last><affiliation>University of Zurich</affiliation></author>
       <author><first>Dimitar</first><last>Shterionov</last><affiliation>Tilburg University</affiliation></author>
       <author><first>Sandra</first><last>Sidler-miserez</last><affiliation>University of Teacher Education in Special Needs (HfH)</affiliation></author>
       <author><first>Katja</first><last>Tissi</last><affiliation>Hochschule fuer Heilpaedagogik</affiliation></author>
@@ -982,7 +982,7 @@
     </paper>
     <paper id="72">
       <title>Findings of the <fixed-case>WMT</fixed-case>’22 Shared Task on Large-Scale Machine Translation Evaluation for <fixed-case>A</fixed-case>frican Languages</title>
-      <author><first>David Ifeoluwa</first><last>Adelani</last><affiliation>University College London</affiliation></author>
+      <author id="david-ifeoluwa-adelani"><first>David Ifeoluwa</first><last>Adelani</last><affiliation>University College London</affiliation></author>
       <author><first>Md Mahfuz Ibn</first><last>Alam</last><affiliation>George Mason University</affiliation></author>
       <author><first>Antonios</first><last>Anastasopoulos</last><affiliation>George Mason University</affiliation></author>
       <author><first>Akshita</first><last>Bhagia</last><affiliation>Ai2</affiliation></author>
@@ -991,7 +991,7 @@
       <author><first>Fahim</first><last>Faisal</last><affiliation>George Mason University</affiliation></author>
       <author><first>Christian</first><last>Federmann</last><affiliation>Microsoft</affiliation></author>
       <author><first>Natalia</first><last>Fedorova</last><affiliation>Toloka</affiliation></author>
-      <author><first>Francisco</first><last>Guzmán</last><affiliation>Meta AI</affiliation></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzmán</last><affiliation>Meta AI</affiliation></author>
       <author><first>Sergey</first><last>Koshelev</last><affiliation>Toloka</affiliation></author>
       <author><first>Jean</first><last>Maillard</last><affiliation>Meta AI</affiliation></author>
       <author><first>Vukosi</first><last>Marivate</last><affiliation>Department of Computer Science, University of Pretoria</affiliation></author>
@@ -1007,7 +1007,7 @@
     </paper>
     <paper id="73">
       <title>Findings of the <fixed-case>WMT</fixed-case> 2022 Shared Tasks in Unsupervised <fixed-case>MT</fixed-case> and Very Low Resource Supervised <fixed-case>MT</fixed-case></title>
-      <author><first>Marion</first><last>Weller-Di Marco</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
+      <author id="marion-weller-di-marco"><first>Marion</first><last>Weller-Di Marco</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <author><first>Alexander</first><last>Fraser</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <pages>801-805</pages>
       <abstract>We present the findings of the WMT2022Shared Tasks in Unsupervised MT and VeryLow Resource Supervised MT with experiments on the language pairs German to/fromUpper Sorbian, German to/from Lower Sorbian and Lower Sorbian to/from Upper Sorbian. Upper and Lower Sorbian are minoritylanguages spoken in the Eastern parts of Germany. There are active language communitiesworking on the preservation of the languageswho also made the data used in this Shared Taskavailable.In total, four teams participated on this SharedTask, with submissions from three teams for theunsupervised sub task, and submissions fromall four teams for the supervised sub task. Inthis overview paper, we present and discuss theresults.</abstract>
@@ -1017,7 +1017,7 @@
     <paper id="74">
       <title>Overview and Results of <fixed-case>M</fixed-case>ix<fixed-case>MT</fixed-case> Shared-Task at <fixed-case>WMT</fixed-case> 2022</title>
       <author><first>Vivek</first><last>Srivastava</last><affiliation>TCS Research</affiliation></author>
-      <author id="mayank-singh"><first>Mayank</first><last>Singh</last><affiliation>IIT Gandhinagar</affiliation></author>
+      <author><first>Mayank</first><last>Singh</last><affiliation>IIT Gandhinagar</affiliation></author>
       <pages>806-811</pages>
       <abstract>In this paper, we present an overview of the WMT 2022 shared task on code-mixed machine translation (MixMT). In this shared task, we hosted two code-mixed machine translation subtasks in the following settings: (i) monolingual to code-mixed translation and (ii) code-mixed to monolingual translation. In both the subtasks, we received registration and participation from teams across the globe showing an interest and need to immediately address the challenges with machine translation involving code-mixed and low-resource languages.</abstract>
       <url hash="6f858631">2022.wmt-1.74</url>
@@ -1025,7 +1025,7 @@
     </paper>
     <paper id="75">
       <title>Findings of the Word-Level <fixed-case>A</fixed-case>uto<fixed-case>C</fixed-case>ompletion Shared Task in <fixed-case>WMT</fixed-case> 2022</title>
-      <author><first>Francisco</first><last>Casacuberta</last><affiliation>Universitat Politècnica de València</affiliation></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last><affiliation>Universitat Politècnica de València</affiliation></author>
       <author><first>George</first><last>Foster</last><affiliation>Google</affiliation></author>
       <author><first>Guoping</first><last>Huang</last><affiliation>Tencent AI Lab</affiliation></author>
       <author><first>Philipp</first><last>Koehn</last><affiliation>Johns Hopkins University</affiliation></author>
@@ -1033,7 +1033,7 @@
       <author><first>Lemao</first><last>Liu</last><affiliation>Tencent AI Lab</affiliation></author>
       <author><first>Shuming</first><last>Shi</last><affiliation>Tencent AI Lab</affiliation></author>
       <author><first>Taro</first><last>Watanabe</last><affiliation>Nara Institute of Science and Technology</affiliation></author>
-      <author><first>Chengqing</first><last>Zong</last><affiliation>Institute of Automation, Chinese Academy of Sciences</affiliation></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last><affiliation>Institute of Automation, Chinese Academy of Sciences</affiliation></author>
       <pages>812-820</pages>
       <abstract>Recent years have witnessed rapid advancements in machine translation, but the state-of-the-art machine translation system still can not satisfy the high requirements in some rigorous translation scenarios. Computer-aided translation (CAT) provides a promising solution to yield a high-quality translation with a guarantee. Unfortunately, due to the lack of popular benchmarks, the research on CAT is not well developed compared with machine translation. In this year, we hold a new shared task called Word-level AutoCompletion (WLAC) for CAT in WMT. Specifically, we introduce some resources to train a WLAC model, and particularly we collect data from CAT systems as a part of test data for this shared task. In addition, we employ both automatic and human evaluations to measure the performance of the submitted systems, and our final evaluation results reveal some findings for the WLAC task.</abstract>
       <url hash="c59f8005">2022.wmt-1.75</url>
@@ -1055,7 +1055,7 @@
       <title>Focused Concatenation for Context-Aware Neural Machine Translation</title>
       <author><first>Lorenzo</first><last>Lupo</last><affiliation>Lig</affiliation></author>
       <author><first>Marco</first><last>Dinarelli</last><affiliation>Lig</affiliation></author>
-      <author><first>Laurent</first><last>Besacier</last><affiliation>Naver Labs Europe</affiliation></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last><affiliation>Naver Labs Europe</affiliation></author>
       <pages>830-842</pages>
       <abstract>A straightforward approach to context-aware neural machine translation consists in feeding the standard encoder-decoder architecture with a window of consecutive sentences, formed by the current sentence and a number of sentences from its context concatenated to it. In this work, we propose an improved concatenation approach that encourages the model to focus on the translation of the current sentence, discounting the loss generated by target context. We also propose an additional improvement that strengthen the notion of sentence boundaries and of relative sentence distance, facilitating model compliance to the context-discounted objective. We evaluate our approach with both average-translation quality metrics and contrastive test sets for the translation of inter-sentential discourse phenomena, proving its superiority to the vanilla concatenation approach and other sophisticated context-aware systems.</abstract>
       <url hash="31941209">2022.wmt-1.77</url>
@@ -1084,7 +1084,7 @@
     <paper id="80">
       <title>Too Brittle to Touch: Comparing the Stability of Quantization and Distillation towards Developing Low-Resource <fixed-case>MT</fixed-case> Models</title>
       <author><first>Harshita</first><last>Diddee</last><affiliation>Microsoft Research India</affiliation></author>
-      <author><first>Sandipan</first><last>Dandapat</last><affiliation>Microsoft</affiliation></author>
+      <author id="sandipan-dandapat"><first>Sandipan</first><last>Dandapat</last><affiliation>Microsoft</affiliation></author>
       <author><first>Monojit</first><last>Choudhury</last><affiliation>Microsoft Research</affiliation></author>
       <author><first>Tanuja</first><last>Ganu</last><affiliation>Microsoft Research</affiliation></author>
       <author><first>Kalika</first><last>Bali</last><affiliation>Microsoft Research Labs</affiliation></author>
@@ -1132,7 +1132,7 @@
       <author><first>Gleb</first><last>Erofeev</last><affiliation>Logrus Global</affiliation></author>
       <author><first>Irina</first><last>Sorokina</last><affiliation>Logrus Global</affiliation></author>
       <author><first>Serge</first><last>Gladkoff</last><affiliation>Logrus Global</affiliation></author>
-      <author><first>Goran</first><last>Nenadic</last><affiliation>University of Manchester</affiliation></author>
+      <author id="goran-nenadic"><first>Goran</first><last>Nenadic</last><affiliation>University of Manchester</affiliation></author>
       <pages>908-919</pages>
       <abstract>Pre-trained language models (PLMs) often take advantage of the monolingual and multilingual dataset that is freely available online to acquire general or mixed domain knowledge before deployment into specific tasks. Extra-large PLMs (xLPLMs) are proposed very recently to claim supreme performances over smaller-sized PLMs such as in machine translation (MT) tasks. These xLPLMs include Meta-AI’s wmt21-dense-24-wide-en-X (2021) and NLLB (2022). In this work, we examine if xLPLMs are absolutely superior to smaller-sized PLMs in fine-tuning toward domain-specific MTs. We use two different in-domain data of different sizes: commercial automotive in-house data and clinical shared task data from the ClinSpEn2022 challenge at WMT2022. We choose the popular Marian Helsinki as smaller sized PLM and two massive-sized Mega-Transformers from Meta-AI as xLPLMs.Our experimental investigation shows that 1) on smaller-sized in-domain commercial automotive data, xLPLM wmt21-dense-24-wide-en-X indeed shows much better evaluation scores using SacreBLEU and hLEPOR metrics than smaller-sized Marian, even though its score increase rate is lower than Marian after fine-tuning; 2) on relatively larger-size well prepared clinical data fine-tuning, the xLPLM NLLB tends to lose its advantage over smaller-sized Marian on two sub-tasks (clinical terms and ontology concepts) using ClinSpEn offered metrics METEOR, COMET, and ROUGE-L, and totally lost to Marian on Task-1 (clinical cases) on all official metrics including SacreBLEU and BLEU; 3) metrics do not always agree with each other on the same tasks using the same model outputs; 4) clinic-Marian ranked No.2 on Task- 1 (via SacreBLEU/BLEU) and Task-3 (via METEOR and ROUGE) among all submissions.</abstract>
       <url hash="c951d3dd">2022.wmt-1.84</url>
@@ -1199,8 +1199,8 @@
       <title>Unbabel-<fixed-case>IST</fixed-case> at the <fixed-case>WMT</fixed-case> Chat Translation Shared Task</title>
       <author><first>João</first><last>Alves</last><affiliation>Unbabel</affiliation></author>
       <author><first>Pedro Henrique</first><last>Martins</last><affiliation>Instituto de Telecomunicações, Instituto Superior Técnico</affiliation></author>
-      <author><first>José G.</first><last>C. de Souza</last><affiliation>Unbabel</affiliation></author>
-      <author><first>M. Amin</first><last>Farajian</last><affiliation>Unbabel</affiliation></author>
+      <author id="jose-g-c-de-souza"><first>José G.</first><last>C. de Souza</last><affiliation>Unbabel</affiliation></author>
+      <author id="m-amin-farajian"><first>M. Amin</first><last>Farajian</last><affiliation>Unbabel</affiliation></author>
       <author><first>André F. T.</first><last>Martins</last><affiliation>Unbabel, Instituto de Telecomunicacoes</affiliation></author>
       <pages>943-948</pages>
       <abstract>We present the joint contribution of IST and Unbabel to the WMT 2022 Chat Translation Shared Task. We participated in all six language directions (English ↔ German, English ↔ French, English ↔ Brazilian Portuguese). Due to the lack of domain-specific data, we use mBART50, a large pretrained language model trained on millions of sentence-pairs, as our base model. We fine-tune it using a two step fine-tuning process. In the first step, we fine-tune the model on publicly available data. In the second step, we use the validation set. After having a domain specific model, we explore the use of kNN-MT as a way of incorporating domain-specific data at decoding time.</abstract>
@@ -1226,7 +1226,7 @@
       <title><fixed-case>BJTU</fixed-case>-<fixed-case>W</fixed-case>e<fixed-case>C</fixed-case>hat’s Systems for the <fixed-case>WMT</fixed-case>22 Chat Translation Task</title>
       <author><first>Yunlong</first><last>Liang</last><affiliation>Beijing Jiaotong University</affiliation></author>
       <author><first>Fandong</first><last>Meng</last><affiliation>WeChat AI, Tencent</affiliation></author>
-      <author><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
       <author><first>Yufeng</first><last>Chen</last><affiliation>Beijing Jiaotong University</affiliation></author>
       <author><first>Jie</first><last>Zhou</last><affiliation>Tencent Inc.</affiliation></author>
       <pages>955-961</pages>
@@ -1271,7 +1271,7 @@
     <paper id="94">
       <title>Spatio-temporal Sign Language Representation and Translation</title>
       <author><first>Yasser</first><last>Hamidullah</last><affiliation>Dfki</affiliation></author>
-      <author><first>Josef</first><last>Van Genabith</last><affiliation>Dfki</affiliation></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>Van Genabith</last><affiliation>Dfki</affiliation></author>
       <author><first>Cristina</first><last>España-bonet</last><affiliation>DFKI GmbH</affiliation></author>
       <pages>977-982</pages>
       <abstract>This paper describes the DFKI-MLT submission to the WMT-SLT 2022 sign language translation (SLT) task from Swiss German Sign Language (video) into German (text). State-of-the-art techniques for SLT use a generic seq2seq architecture with customized input embeddings. Instead of word embeddings as used in textual machine translation, SLT systems use features extracted from video frames. Standard approaches often do not benefit from temporal features. In our participation, we present a system that learns spatio-temporal feature representations and translation in a single model, resulting in a real end-to-end architecture expected to better generalize to new data sets. Our best system achieved <tex-math>5 \pm 1</tex-math> BLEU points on the development set, but the performance on the test dropped to <tex-math>0.11 \pm 0.06</tex-math> BLEU points.</abstract>
@@ -1313,7 +1313,7 @@
       <title>Separating Grains from the Chaff: Using Data Filtering to Improve Multilingual Translation for Low-Resourced <fixed-case>A</fixed-case>frican Languages</title>
       <author><first>Idris</first><last>Abdulmumin</last></author>
       <author><first>Michael</first><last>Beukman</last></author>
-      <author><first>Jesujoba O.</first><last>Alabi</last></author>
+      <author id="jesujoba-alabi"><first>Jesujoba O.</first><last>Alabi</last></author>
       <author><first>Chris</first><last>Emezue</last></author>
       <author><first>Everlyn</first><last>Asiko</last></author>
       <author><first>Tosin</first><last>Adewumi</last></author>
@@ -1455,7 +1455,7 @@
     <paper id="109">
       <title><fixed-case>MUNI</fixed-case>-<fixed-case>NLP</fixed-case> Systems for <fixed-case>L</fixed-case>ower <fixed-case>S</fixed-case>orbian-<fixed-case>G</fixed-case>erman and <fixed-case>L</fixed-case>ower <fixed-case>S</fixed-case>orbian-<fixed-case>U</fixed-case>pper <fixed-case>S</fixed-case>orbian Machine Translation @ <fixed-case>WMT</fixed-case>22</title>
       <author><first>Edoardo</first><last>Signoroni</last><affiliation>Faculty of Informatics, Masaryk University</affiliation></author>
-      <author><first>Pavel</first><last>Rychlý</last><affiliation>NLP Centre, Faculty of Informatics, Masaryk University</affiliation></author>
+      <author id="pavel-rychly"><first>Pavel</first><last>Rychlý</last><affiliation>NLP Centre, Faculty of Informatics, Masaryk University</affiliation></author>
       <pages>1111-1116</pages>
       <abstract>We describe our neural machine translation systems for the WMT22 shared task on unsupervised MT and very low resource supervised MT. We submit supervised NMT systems for Lower Sorbian-German and Lower Sorbian-Upper Sorbian translation in both directions. By using a novel tokenization algorithm, data augmentation techniques, such as Data Diversification (DD), and parameter optimization we improve on our baselines by 10.5-10.77 BLEU for Lower Sorbian-German and by 1.52-1.88 BLEU for Lower Sorbian-Upper Sorbian.</abstract>
       <url hash="4222a912">2022.wmt-1.109</url>
@@ -1490,7 +1490,7 @@
       <author><first>Shivam</first><last>Mangale</last><affiliation>International Institute of Information Technology - Hyderabad</affiliation></author>
       <author><first>Saransh</first><last>Rajput</last><affiliation>International Institute of Information Technology, Hyderabad</affiliation></author>
       <author><first>Tanvi</first><last>Kamble</last><affiliation>International Institute of Information Technology - Hyderabad</affiliation></author>
-      <author><first>Dipti</first><last>Sharma</last><affiliation>International Institute of Information Technology - Hyderabad</affiliation></author>
+      <author id="dipti-misra-sharma"><first>Dipti</first><last>Sharma</last><affiliation>International Institute of Information Technology - Hyderabad</affiliation></author>
       <author><first>Vasudev</first><last>Varma</last><affiliation>International Institute of Information Technology - Hyderabad</affiliation></author>
       <pages>1126-1130</pages>
       <abstract>Code-mixed machine translation has become an important task in multilingual communities and extending the task of machine translation to code mixed data has become a common task for these languages. In the shared tasks of EMNLP 2022, we try to tackle the same for both English + Hindi to Hinglish and Hinglish to English. The first task dealt with both Roman and Devanagari script as we had monolingual data in both English and Hindi whereas the second task only had data in Roman script. To our knowledge, we achieved one of the top ROUGE-L and WER scores for the first task of Monolingual to Code-Mixed machine translation. In this paper, we discuss the use of mBART with some special pre-processing and post-processing (transliteration from Devanagari to Roman) for the first task in detail and the experiments that we performed for the second task of translating code-mixed Hinglish to monolingual English.</abstract>
@@ -1536,7 +1536,7 @@
       <author><first>Shyambabu</first><last>Pandey</last><affiliation>National Institute of Technology Silchar</affiliation></author>
       <author><first>Riyanka</first><last>Manna</last><affiliation>Adamas University</affiliation></author>
       <author><first>Partha</first><last>Pakray</last><affiliation>National Institute of Technology Silchar</affiliation></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last><affiliation>National Institute of Technology Silchar</affiliation></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last><affiliation>National Institute of Technology Silchar</affiliation></author>
       <pages>1158-1161</pages>
       <abstract>The mixing of two or more languages in speech or text is known as code-mixing. In this form of communication, users mix words and phrases from multiple languages. Code-mixing is very common in the context of Indian languages due to the presence of multilingual societies. The probability of the existence of code-mixed sentences in almost all Indian languages since in India English is the dominant language for social media textual communication platforms. We have participated in the WMT22 shared task of code-mixed machine translation with the team name: CNLP-NITS-PP. In this task, we have prepared a synthetic Hinglish–English parallel corpus using transliteration of original Hindi sentences to tackle the limitation of the parallel corpus, where, we mainly considered sentences that have named-entity (proper noun) from the available English-Hindi parallel corpus. With the addition of synthetic bi-text data to the original parallel corpus (train set), our transformer-based neural machine translation models have attained recall-oriented understudy for gisting evaluation (ROUGE-L) scores of 0.23815, 0.33729, and word error rate (WER) scores of 0.95458, 0.88451 at Sub-Task-1 (English-to-Hinglish) and Sub-Task-2 (Hinglish-to-English) for test set results respectively.</abstract>
       <url hash="712aa364">2022.wmt-1.116</url>
@@ -1578,7 +1578,7 @@
       <title><fixed-case>PRHLT</fixed-case>’s Submission to <fixed-case>WLAC</fixed-case> 2022</title>
       <author><first>Angel</first><last>Navarro</last><affiliation>Prhlt</affiliation></author>
       <author><first>Miguel</first><last>Domingo</last><affiliation>Universitat Politècnica de València</affiliation></author>
-      <author><first>Francisco</first><last>Casacuberta</last><affiliation>Universitat Politècnica de València</affiliation></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last><affiliation>Universitat Politècnica de València</affiliation></author>
       <pages>1182-1186</pages>
       <abstract>This paper describes our submission to the Word-Level AutoCompletion shared task of WMT22. We participated in the English–German and German–English categories. We proposed a segment-based interactive machine translation approach whose central core is a machine translation (MT) model which generates a complete translation from the context provided by the task. From there, we obtain the word which corresponds to the autocompletion. With this approach, we aim to show that it is possible to use the MT models in the autocompletion task by simply performing minor changes at the decoding step, obtaining satisfactory results.</abstract>
       <url hash="f59fd596">2022.wmt-1.120</url>
@@ -1648,7 +1648,7 @@
       <author><first>Songming</first><last>Zhang</last><affiliation>Beijing Jiaotong University</affiliation></author>
       <author><first>Hui</first><last>Huang</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <author><first>Yufeng</first><last>Chen</last><affiliation>Beijing Jiaotong University</affiliation></author>
-      <author><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
       <author><first>Jian</first><last>Liu</last><affiliation>Beijing Jiaotong University</affiliation></author>
       <pages>1211-1216</pages>
       <abstract>Translation suggestion (TS) models are used to automatically provide alternative suggestions for incorrect spans in sentences generated by machine translation. This paper introduces the system used in our submission to the WMT’22 Translation Suggestion shared task. Our system is based on the ensemble of different translation architectures, including Transformer, SA-Transformer, and DynamicConv. We use three strategies to construct synthetic data from parallel corpora to compensate for the lack of supervised data. In addition, we introduce a multi-phase pre-training strategy, adding an additional pre-training phase with in-domain data. We rank second and third on the English-German and English-Chinese bidirectional tasks, respectively.</abstract>
diff --git a/data/xml/2022.wnu.xml b/data/xml/2022.wnu.xml
index c52e65834b..a3aeda10ab 100644
--- a/data/xml/2022.wnu.xml
+++ b/data/xml/2022.wnu.xml
@@ -74,7 +74,7 @@
       <title><fixed-case>G</fixed-case>is<fixed-case>P</fixed-case>y: A Tool for Measuring Gist Inference Score in Text</title>
       <author><first>Pedram</first><last>Hosseini</last></author>
       <author><first>Christopher</first><last>Wolfe</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <author><first>David</first><last>Broniatowski</last></author>
       <pages>38-46</pages>
       <abstract>Decision making theories such as Fuzzy-Trace Theory (FTT) suggest that individuals tend to rely on gist, or bottom-line meaning, in the text when making decisions. In this work, we delineate the process of developing GisPy, an opensource tool in Python for measuring the Gist Inference Score (GIS) in text. Evaluation of GisPy on documents in three benchmarks from the news and scientific text domains demonstrates that scores generated by our tool significantly distinguish low vs. high gist documents. Our tool is publicly available to use at: https: //github.com/phosseini/GisPy.</abstract>
diff --git a/data/xml/2022.wnut.xml b/data/xml/2022.wnut.xml
index 7c181ce89d..12b032a563 100644
--- a/data/xml/2022.wnut.xml
+++ b/data/xml/2022.wnut.xml
@@ -27,7 +27,7 @@
     <paper id="2">
       <title>Extracting Mathematical Concepts from Text</title>
       <author><first>Jacob</first><last>Collard</last></author>
-      <author><first>Valeria</first><last>de Paiva</last></author>
+      <author id="valeria-de-paiva"><first>Valeria</first><last>de Paiva</last></author>
       <author><first>Brendan</first><last>Fong</last></author>
       <author><first>Eswaran</first><last>Subrahmanian</last></author>
       <pages>15–23</pages>
@@ -113,7 +113,7 @@
       <author><first>Cheng</first><last>Chen</last></author>
       <author><first>Md Tahmid Rahman</first><last>Laskar</last></author>
       <author><first>Shashi Bhushan</first><last>Tn</last></author>
-      <author><first>Simon</first><last>Corston-Oliver</last></author>
+      <author id="simon-corston-oliver"><first>Simon</first><last>Corston-Oliver</last></author>
       <pages>96–100</pages>
       <abstract>We present a simple yet effective method to train a named entity recognition (NER) model that operates on business telephone conversation transcripts that contain noise due to the nature of spoken conversation and artifacts of automatic speech recognition. We first fine-tune LUKE, a state-of-the-art Named Entity Recognition (NER) model, on a limited amount of transcripts, then use it as the teacher model to teach a smaller DistilBERT-based student model using a large amount of weakly labeled data and a small amount of human-annotated data. The model achieves high accuracy while also satisfying the practical constraints for inclusion in a commercial telephony product: realtime performance when deployed on cost-effective CPUs rather than GPUs. In this paper, we introduce the fine-tune-then-distill method for entity recognition on real world noisy data to deploy our NER model in a limited budget production environment. By generating pseudo-labels using a large teacher model pre-trained on typed text while fine-tuned on noisy speech text to train a smaller student model, we make the student model 75x times faster while reserving 99.09% of its accuracy. These findings demonstrate that our proposed approach is very effective in limited budget scenarios to alleviate the need of human labeling of a large amount of noisy data.</abstract>
       <url hash="8045a749">2022.wnut-1.10</url>
@@ -135,7 +135,7 @@
       <author><first>Sofie</first><last>Labat</last></author>
       <author><first>Amir</first><last>Hadifar</last></author>
       <author><first>Thomas</first><last>Demeester</last></author>
-      <author><first>Veronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Veronique</first><last>Hoste</last></author>
       <pages>106–112</pages>
       <abstract>The ability to track fine-grained emotions in customer service dialogues has many real-world applications, but has not been studied extensively. This paper measures the potential of prediction models on that task, based on a real-world dataset of Dutch Twitter conversations in the domain of customer service. We find that modeling emotion trajectories has a small, but measurable benefit compared to predictions based on isolated turns. The models used in our study are shown to generalize well to different companies and economic sectors.</abstract>
       <url hash="c49de881">2022.wnut-1.12</url>
@@ -183,7 +183,7 @@
     <paper id="17">
       <title>“Kanglish alli names!” Named Entity Recognition for <fixed-case>K</fixed-case>annada-<fixed-case>E</fixed-case>nglish Code-Mixed Social Media Data</title>
       <author><first>Sumukh</first><last>S</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>154–161</pages>
       <abstract>Code-mixing (CM) is a frequently observed phenomenon on social media platforms in multilingual societies such as India. While the increase in code-mixed content on these platforms provides good amount of data for studying various aspects of code-mixing, the lack of automated text analysis tools makes such studies difficult. To overcome the same, tools such as language identifiers and parts of-speech (POS) taggers for analysing code-mixed data have been developed. One such tool is Named Entity Recognition (NER), an important Natural Language Processing (NLP) task, which is not only a subtask of Information Extraction, but is also needed for downstream NLP tasks such as semantic role labeling. While entity extraction from social media data is generally difficult due to its informal nature, code-mixed data further complicates the problem due to its informal, unstructured and incomplete information. In this work, we present the first ever corpus for Kannada-English code-mixed social media data with the corresponding named entity tags for NER. We provide strong baselines with machine learning classification models such as CRF, Bi-LSTM, and Bi-LSTM-CRF on our corpus with word, character, and lexical features.</abstract>
       <url hash="1da1a02f">2022.wnut-1.17</url>
diff --git a/data/xml/2022.woah.xml b/data/xml/2022.woah.xml
index ad04ff9669..6aef647bbc 100644
--- a/data/xml/2022.woah.xml
+++ b/data/xml/2022.woah.xml
@@ -7,7 +7,7 @@
       <editor><first>Aida</first><last>Mostafazadeh Davani</last></editor>
       <editor><first>Lambert</first><last>Mathias</last></editor>
       <editor><first>Bertie</first><last>Vidgen</last></editor>
-      <editor><first>Zeerak</first><last>Talat</last></editor>
+      <editor id="zeerak-talat"><first>Zeerak</first><last>Talat</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Seattle, Washington (Hybrid)</address>
       <month>July</month>
@@ -23,7 +23,7 @@
       <title>Separating Hate Speech and Offensive Language Classes via Adversarial Debiasing</title>
       <author><first>Shuzhou</first><last>Yuan</last></author>
       <author><first>Antonis</first><last>Maronikolakis</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>1-10</pages>
       <abstract>Research to tackle hate speech plaguing online media has made strides in providing solutions, analyzing bias and curating data. A challenging problem is ambiguity between hate speech and offensive language, causing low performance both overall and specifically for the hate speech class. It can be argued that misclassifying actual hate speech content as merely offensive can lead to further harm against targeted groups. In our work, we mitigate this potentially harmful phenomenon by proposing an adversarial debiasing method to separate the two classes. We show that our method works for English, Arabic German and Hindi, plus in a multilingual setting, improving performance over baselines.</abstract>
       <url hash="a7ac6df7">2022.woah-1.1</url>
@@ -121,7 +121,7 @@
       <title>Lost in Distillation: A Case Study in Toxicity Modeling</title>
       <author><first>Alyssa</first><last>Chvasta</last></author>
       <author><first>Alyssa</first><last>Lees</last></author>
-      <author><first>Jeffrey</first><last>Sorensen</last></author>
+      <author id="jeffrey-sorensen"><first>Jeffrey</first><last>Sorensen</last></author>
       <author><first>Lucy</first><last>Vasserman</last></author>
       <author><first>Nitesh</first><last>Goyal</last></author>
       <pages>92-101</pages>
@@ -138,7 +138,7 @@
       <author><first>Antigone</first><last>Klimi</last></author>
       <author><first>Eleftheria</first><last>Molou</last></author>
       <author><first>Alexandra</first><last>Saivanidou</last></author>
-      <author><first>Stella</first><last>Markantonatou</last></author>
+      <author id="stella-markantonatou"><first>Stella</first><last>Markantonatou</last></author>
       <pages>102-108</pages>
       <abstract>We present a cleansed version of the multilingual lexicon HURTLEX-(EL) comprising 737 offensive words of Modern Greek. We worked bottom-up in two annotation rounds and developed detailed guidelines by cross-classifying words on three dimensions: context, reference, and thematic domain. Our classification reveals a wider spectrum of thematic domains concerning the study of offensive language than previously thought Efthymiou et al. (2014) and reveals social and cultural aspects that are not included in the HURTLEX categories.</abstract>
       <url hash="2d96e06e">2022.woah-1.10</url>
@@ -220,7 +220,7 @@
       <author><first>Björn</first><last>Rönnerstrand</last></author>
       <author><first>Gregor</first><last>Rettenegger</last></author>
       <author><first>Ellen</first><last>Breitholtz</last></author>
-      <author><first>Asad</first><last>Sayeed</last></author>
+      <author id="asad-sayeed"><first>Asad</first><last>Sayeed</last></author>
       <pages>170-175</pages>
       <abstract>“Dogwhistles” are expressions intended by the speaker have two messages: a socially-unacceptable “in-group” message understood by a subset of listeners, and a benign message intended for the out-group. We take the result of a word-replacement survey of the Swedish population intended to reveal how dogwhistles are understood, and we show that the difficulty of annotating dogwhistles is reflected in the separability in the space of a sentence-transformer Swedish BERT trained on general data.</abstract>
       <url hash="27dee78f">2022.woah-1.16</url>
diff --git a/data/xml/2022.wordplay.xml b/data/xml/2022.wordplay.xml
index 3aae4aa089..0db3d5285e 100644
--- a/data/xml/2022.wordplay.xml
+++ b/data/xml/2022.wordplay.xml
@@ -19,7 +19,7 @@
     </frontmatter>
     <paper id="1">
       <title>A Systematic Survey of Text Worlds as Embodied Natural Language Environments</title>
-      <author><first>Peter</first><last>Jansen</last></author>
+      <author id="peter-jansen"><first>Peter</first><last>Jansen</last></author>
       <pages>1-15</pages>
       <abstract>Text Worlds are virtual environments for embodied agents that, unlike 2D or 3D environments, are rendered exclusively using textual descriptions. These environments offer an alternative to higher-fidelity 3D environments due to their low barrier to entry, providing the ability to study semantics, compositional inference, and other high-level tasks with rich action spaces while controlling for perceptual input. This systematic survey outlines recent developments in tooling, environments, and agent modeling for Text Worlds, while examining recent trends in knowledge graphs, common sense reasoning, transfer learning of Text World performance to higher-fidelity environments, as well as near-term development targets that, once achieved, make Text Worlds an attractive general research paradigm for natural language processing.</abstract>
       <url hash="e696184c">2022.wordplay-1.1</url>
@@ -46,7 +46,7 @@
       <author><first>Benjamin</first><last>Van Durme</last></author>
       <author><first>Olivia</first><last>Deng</last></author>
       <author><first>Akanksha</first><last>Malhotra</last></author>
-      <author><first>Bill</first><last>Dolan</last></author>
+      <author id="william-b-dolan"><first>Bill</first><last>Dolan</last></author>
       <pages>25-43</pages>
       <abstract>Non-Player Characters (NPCs) significantly enhance the player experience in many games. Historically, players’ interactions with NPCs have tended to be highly scripted, to be limited to natural language responses to be selected by the player, and to not involve dynamic change in game state. In this work, we demonstrate that use of a few example conversational prompts can power a conversational agent to generate both natural language and novel code. This approach can permit development of NPCs with which players can have grounded conversations that are free-form and less repetitive. We demonstrate our approach using OpenAI Codex (GPT-3 finetuned on GitHub), with Minecraft game development as our test bed. We show that with a few example prompts, a Codex-based agent can generate novel code, hold multi-turn conversations and answer questions about structured data. We evaluate this application using experienced gamers in a Minecraft realm and provide analysis of failure cases and suggest possible directions for solutions.</abstract>
       <url hash="d0564204">2022.wordplay-1.3</url>
diff --git a/data/xml/2023.acl.xml b/data/xml/2023.acl.xml
index 3c7753e1cd..c6a0fa2b8a 100644
--- a/data/xml/2023.acl.xml
+++ b/data/xml/2023.acl.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</booktitle>
       <editor><first>Anna</first><last>Rogers</last></editor>
       <editor><first>Jordan</first><last>Boyd-Graber</last></editor>
-      <editor><first>Naoaki</first><last>Okazaki</last></editor>
+      <editor id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Toronto, Canada</address>
       <month>July</month>
@@ -64,7 +64,7 @@
       <author><first>David</first><last>Dale</last><affiliation>Meta AI</affiliation></author>
       <author><first>Elena</first><last>Voita</last><affiliation>Meta AI</affiliation></author>
       <author><first>Loic</first><last>Barrault</last><affiliation>Meta AI</affiliation></author>
-      <author><first>Marta R.</first><last>Costa-jussà</last><affiliation>Meta AI</affiliation></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last><affiliation>Meta AI</affiliation></author>
       <pages>36-50</pages>
       <abstract>While the problem of hallucinations in neural machine translation has long been recognized, so far the progress on its alleviation is very little. Indeed, recently it turned out that without artificially encouraging models to hallucinate, previously existing methods fall short and even the standard sequence log-probability is more informative. It means that internal characteristics of the model can give much more information than we expect, and before using external models and measures, we first need to ask: how far can we go if we use nothing but the translation model itself ? We propose to use a method that evaluates the percentage of the source contribution to a generated translation. Intuitively, hallucinations are translations “detached” from the source, hence they can be identified by low source contribution. This method improves detection accuracy for the most severe hallucinations by a factor of 2 and is able to alleviate hallucinations at test time on par with the previous best approach that relies on external models. Next, if we move away from internal model characteristics and allow external tools, we show that using sentence similarity from cross-lingual embeddings further improves these results. We release the code of our experiments.</abstract>
       <url hash="d9f9390a">2023.acl-long.3</url>
@@ -167,7 +167,7 @@
       <author><first>Yusuke</first><last>Matsui</last><affiliation>The University of Tokyo</affiliation></author>
       <author><first>Masao</first><last>Utiyama</last><affiliation>NICT</affiliation></author>
       <author><first>Hideki</first><last>Tanaka</last><affiliation>NICT</affiliation></author>
-      <author><first>Eiichiro</first><last>Sumita</last><affiliation>NICT</affiliation></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last><affiliation>NICT</affiliation></author>
       <pages>174-189</pages>
       <abstract>k-nearest-neighbor machine translation (kNN-MT) (Khandelwal et al., 2021) boosts the translation performance of trained neural machine translation (NMT) models by incorporating example-search into the decoding algorithm. However, decoding is seriously time-consuming, i.e., roughly 100 to 1,000 times slower than standard NMT, because neighbor tokens are retrieved from all target tokens of parallel data in each timestep. In this paper, we propose “Subset kNN-MT”, which improves the decoding speed of kNN-MT by two methods: (1) retrieving neighbor target tokens from a subset that is the set of neighbor sentences of the input sentence, not from all sentences, and (2) efficient distance computation technique that is suitable for subset neighbor search using a look-up table. Our proposed method achieved a speed-up of up to 132.2 times and an improvement in BLEU score of up to 1.6 compared with kNN-MT in the WMT’19 De-En translation task and the domain adaptation tasks in De-En and En-Ja.</abstract>
       <url hash="f6a46cf5">2023.acl-long.10</url>
@@ -288,7 +288,7 @@
       <author><first>Aiwei</first><last>Liu</last><affiliation>School of Software, Tsinghua University</affiliation></author>
       <author><first>Yawen</first><last>Yang</last><affiliation>School of Software, Tsinghua University</affiliation></author>
       <author><first>Shuang</first><last>Li</last><affiliation>School of Software, Tsinghua University</affiliation></author>
-      <author><first>Philip S.</first><last>Yu</last><affiliation>University of Illinois at Chicago</affiliation></author>
+      <author id="philip-s-yu"><first>Philip S.</first><last>Yu</last><affiliation>University of Illinois at Chicago</affiliation></author>
       <author><first>Lijie</first><last>Wen</last><affiliation>School of Software, Tsinghua University</affiliation></author>
       <pages>322-337</pages>
       <abstract>Aspect-based sentiment analysis (ABSA) is a fine-grained sentiment classification task. Many recent works have used dependency trees to extract the relation between aspects and contexts and have achieved significant improvements. However, further improvement is limited due to the potential mismatch between the dependency tree as a syntactic structure and the sentiment classification as a semantic task. To alleviate this gap, we replace the syntactic dependency tree with the semantic structure named Abstract Meaning Representation (AMR) and propose a model called AMR-based Path Aggregation Relational Network (APARN) to take full advantage of semantic structures. In particular, we design the path aggregator and the relation-enhanced self-attention mechanism that complement each other. The path aggregator extracts semantic features from AMRs under the guidance of sentence information, while the relation-enhanced self-attention mechanism in turn improves sentence features with refined semantic information. Experimental results on four public datasets demonstrate 1.13% average F1 improvement of APARN in ABSA when compared with state-of-the-art baselines.</abstract>
@@ -425,7 +425,7 @@
     <paper id="29">
       <title>Revealing Single Frame Bias for Video-and-Language Learning</title>
       <author><first>Jie</first><last>Lei</last><affiliation>Meta Platforms, Inc</affiliation></author>
-      <author><first>Tamara</first><last>Berg</last><affiliation>University of North Carolina Chapel Hill</affiliation></author>
+      <author id="tamara-berg"><first>Tamara</first><last>Berg</last><affiliation>University of North Carolina Chapel Hill</affiliation></author>
       <author><first>Mohit</first><last>Bansal</last><affiliation>University of North Carolina at Chapel Hill</affiliation></author>
       <pages>487-507</pages>
       <abstract>Training an effective video-and-language model intuitively requires multiple frames as model inputs. However, it is unclear whether using multiple frames is beneficial to downstream tasks, and if yes, whether the performance gain is worth the drastically-increased computation and memory costs resulting from using more frames. In this work, we explore single-frame models for video-and-language learning. On a diverse set of video-and-language tasks (including text-to-video retrieval and video question answering), we show the surprising result that, with large-scale pre-training and a proper frame ensemble strategy at inference time, a single-frame trained model that does not consider temporal information can achieve better performance than existing methods that use multiple frames for training. This result reveals the existence of a strong “static appearance bias” in popular video-and-language datasets. Therefore, to allow for a more comprehensive evaluation of video-and-language models, we propose two new retrieval tasks based on existing fine-grained action recognition datasets that encourage temporal modeling. Our code is available at <url>https://github.com/jayleicn/singularity</url>.</abstract>
@@ -452,7 +452,7 @@
       <title>World-to-Words: Grounded Open Vocabulary Acquisition through Fast Mapping in Vision-Language Models</title>
       <author><first>Ziqiao</first><last>Ma</last><affiliation>University of Michigan</affiliation></author>
       <author><first>Jiayi</first><last>Pan</last><affiliation>University of Michigan at Ann Arbor</affiliation></author>
-      <author><first>Joyce</first><last>Chai</last><affiliation>University of Michigan</affiliation></author>
+      <author id="joyce-chai"><first>Joyce</first><last>Chai</last><affiliation>University of Michigan</affiliation></author>
       <pages>524-544</pages>
       <abstract>The ability to connect language units to their referents in the physical world, referred to as grounding, is crucial to learning and understanding grounded meanings of words. While humans demonstrate fast mapping in new word learning, it remains unclear whether modern vision-language models can truly represent language with their grounded meanings, and how grounding may further bootstrap new word learning. To this end, we introduce Grounded Open Vocabulary Acquisition (GOVA) to examine grounding and bootstrapping in open-world language learning. As an initial attempt, we propose World-to-Words (W2W), a novel visually-grounded language model by pre-training on image-text pairs highlighting grounding as an objective. Through extensive experiments and analysis, we demonstrate that W2W is a more coherent and fast grounded word learner, and that the grounding ability acquired during pre-training helps the model to learn unseen words more rapidly and robustly.</abstract>
       <url hash="7bc58364">2023.acl-long.31</url>
@@ -521,7 +521,7 @@
       <author><first>Patrick</first><last>Fernandes</last><affiliation>Carnegie Mellon University, Instituto de Telecomunicações</affiliation></author>
       <author><first>Kayo</first><last>Yin</last><affiliation>UC Berkeley</affiliation></author>
       <author><first>Emmy</first><last>Liu</last><affiliation>Carnegie Mellon University</affiliation></author>
-      <author><first>André</first><last>Martins</last><affiliation>Unbabel, Instituto de Telecomunicacoes</affiliation></author>
+      <author id="andre-f-t-martins"><first>André</first><last>Martins</last><affiliation>Unbabel, Instituto de Telecomunicacoes</affiliation></author>
       <author><first>Graham</first><last>Neubig</last><affiliation>Carnegie Mellon University</affiliation></author>
       <pages>606-626</pages>
       <abstract>Although proper handling of discourse significantly contributes to the quality of machine translation (MT), these improvements are not adequately measured in common translation quality metrics. Recent works in context-aware MT attempt to target a small set of discourse phenomena during evaluation, however not in a fully systematic way. In this paper, we develop the Multilingual Discourse-Aware (MuDA) benchmark, a series of taggers that identify and evaluate model performance on discourse phenomena in any given dataset. The choice of phenomena is inspired by a novel methodology to systematically identify translations that require context. This methodology confirms the difficulty of previously studied phenomena while uncovering others which were not previously addressed. We find that commonly studied context-aware MT models make only marginal improvements over context-agnostic models, which suggests these models do not handle these ambiguities effectively. We release code and data for 14 language pairs to encourage the MT community to focus on accurately capturing discourse phenomena. Code available at <url>https://github.com/neulab/contextual-mt</url></abstract>
@@ -612,7 +612,7 @@
       <author><first>Martijn</first><last>Bartelds</last><affiliation>University of Groningen</affiliation></author>
       <author><first>Nay</first><last>San</last><affiliation>Stanford University</affiliation></author>
       <author><first>Bradley</first><last>McDonnell</last><affiliation>University of Hawai‘i at Mānoa</affiliation></author>
-      <author><first>Dan</first><last>Jurafsky</last><affiliation>Stanford University</affiliation></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last><affiliation>Stanford University</affiliation></author>
       <author><first>Martijn</first><last>Wieling</last><affiliation>University of Groningen</affiliation></author>
       <pages>715-729</pages>
       <abstract>The performance of automatic speech recognition (ASR) systems has advanced substantially in recent years, particularly for languages for which a large amount of transcribed speech is available. Unfortunately, for low-resource languages, such as minority languages, regional languages or dialects, ASR performance generally remains much lower. In this study, we investigate whether data augmentation techniques could help improve low-resource ASR performance, focusing on four typologically diverse minority languages or language variants (West Germanic: Gronings, West-Frisian; Malayo-Polynesian: Besemah, Nasal). For all four languages, we examine the use of self-training, where an ASR system trained with the available human-transcribed data is used to generate transcriptions, which are then combined with the original data to train a new ASR system. For Gronings, for which there was a pre-existing text-to-speech (TTS) system available, we also examined the use of TTS to generate ASR training data from text-only sources. We find that using a self-training approach consistently yields improved performance (a relative WER reduction up to 20.5% compared to using an ASR system trained on 24 minutes of manually transcribed speech). The performance gain from TTS augmentation for Gronings was even stronger (up to 25.5% relative reduction in WER compared to a system based on 24 minutes of manually transcribed speech). In sum, our results show the benefit of using self-training or (if possible) TTS-generated data as an efficient solution to overcome the limitations of data availability for resource-scarce languages in order to improve ASR performance.</abstract>
@@ -763,7 +763,7 @@
       <author><first>Kevin</first><last>Pei</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
       <author><first>Ishan</first><last>Jindal</last><affiliation>IBM Research</affiliation></author>
       <author><first>Kevin Chen-Chuan</first><last>Chang</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
-      <author><first>ChengXiang</first><last>Zhai</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
+      <author id="chengxiang-zhai"><first>ChengXiang</first><last>Zhai</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
       <author><first>Yunyao</first><last>Li</last><affiliation>Apple</affiliation></author>
       <pages>929-949</pages>
       <abstract>Open Information Extraction (OpenIE) has been used in the pipelines of various NLP tasks. Unfortunately, there is no clear consensus on which models to use in which tasks. Muddying things further is the lack of comparisons that take differing training sets into account. In this paper, we present an application-focused empirical survey of neural OpenIE models, training sets, and benchmarks in an effort to help users choose the most suitable OpenIE systems for their applications. We find that the different assumptions made by different models and datasets have a statistically significant effect on performance, making it important to choose the most appropriate model for one’s applications. We demonstrate the applicability of our recommendations on a downstream Complex QA application.</abstract>
@@ -845,7 +845,7 @@
       <title>Being Right for Whose Right Reasons?</title>
       <author><first>Terne Sasha</first><last>Thorn Jakobsen</last><affiliation>University of Copenhagen</affiliation></author>
       <author><first>Laura</first><last>Cabello</last><affiliation>University of Copenhagen</affiliation></author>
-      <author><first>Anders</first><last>Søgaard</last><affiliation>University of Copenhagen</affiliation></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last><affiliation>University of Copenhagen</affiliation></author>
       <pages>1033-1054</pages>
       <abstract>Explainability methods are used to benchmark the extent to which model predictions align with human rationales i.e., are ‘right for the right reasons’. Previous work has failed to acknowledge, however, that what counts as a rationale is sometimes subjective. This paper presents what we think is a first of its kind, a collection of human rationale annotations augmented with the annotators demographic information. We cover three datasets spanning sentiment analysis and common-sense reasoning, and six demographic groups (balanced across age and ethnicity). Such data enables us to ask both what demographics our predictions align with and whose reasoning patterns our models’ rationales align with. We find systematic inter-group annotator disagreement and show how 16 Transformer-based models align better with rationales provided by certain demographic groups: We find that models are biased towards aligning best with older and/or white annotators. We zoom in on the effects of model size and model distillation, finding –contrary to our expectations– negative correlations between model size and rationale agreement as well as no evidence that either model size or model distillation improves fairness.</abstract>
       <url hash="c60f8667">2023.acl-long.59</url>
@@ -862,7 +862,7 @@
       <author><first>Siddharth</first><last>Verma</last><affiliation>Square</affiliation></author>
       <author><first>Zhijing</first><last>Jin</last><affiliation>Max Planck Institute &amp; ETH Zurich</affiliation></author>
       <author><first>Gargi</first><last>Ghosh</last><affiliation>Facebook</affiliation></author>
-      <author><first>Mona</first><last>Diab</last><affiliation>Meta Responsible AI</affiliation></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last><affiliation>Meta Responsible AI</affiliation></author>
       <author><first>Asli</first><last>Celikyilmaz</last><affiliation>FAIR @ Meta</affiliation></author>
       <pages>1055-1081</pages>
       <abstract>Recent advancements in large language models have enabled them to perform well on complex tasks that require step-by-step reasoning with few-shot learning. However, it is unclear whether these models are applying reasoning skills they have learnt during pre-training , or if they are simply memorizing their training corpus at finer granularity and have learnt to better understand their context. To address this question, we introduce {pasted macro ‘OUR’}model, a benchmark and suite of analyses for evaluating reasoning skills of language models. {pasted macro ‘OUR’}model enables comparing pre-trained and finetuned models on complex tasks that require reasoning skills to solve. Our benchmark provides a test bed to asses any language model on fine-grained reasoning skills, which spans over 20 datasets and covers 10 different reasoning skills. By using {pasted macro ‘OUR’}model we further investigate <i>the role of finetuning</i>. Our extensive empirical analysis shows that language models learn more reasoning skills such as textual entailment, abductive reasoning, and analogical reasoning during the finetuning stage compared to pretraining stage. However, we also find that when language models are finetuned they tend to overfit to the prompt template, which hurts the robustness of models causing generalization problems.</abstract>
@@ -880,9 +880,9 @@
       <author><first>Nora</first><last>Kassner</last><affiliation>Meta AI</affiliation></author>
       <author><first>Chunlan</first><last>Ma</last><affiliation>LMU Munich</affiliation></author>
       <author><first>Helmut</first><last>Schmid</last><affiliation>CIS, Ludwig-Maximilians-Universitaet</affiliation></author>
-      <author><first>André</first><last>Martins</last><affiliation>Unbabel, Instituto de Telecomunicacoes</affiliation></author>
+      <author id="andre-f-t-martins"><first>André</first><last>Martins</last><affiliation>Unbabel, Instituto de Telecomunicacoes</affiliation></author>
       <author><first>François</first><last>Yvon</last><affiliation>ISIR CNRS &amp; Sorbonne Université</affiliation></author>
-      <author><first>Hinrich</first><last>Schütze</last><affiliation>Center for Information and Language Processing, University of Munich</affiliation></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last><affiliation>Center for Information and Language Processing, University of Munich</affiliation></author>
       <pages>1082-1117</pages>
       <abstract>The NLP community has mainly focused on scaling Large Language Models (LLMs) vertically, i.e., making them better for about 100 languages. We instead scale LLMs horizontally: we create, through continued pretraining, Glot500-m, an LLM that covers 511 predominantly low-resource languages. An important part of this effort is to collect and clean Glot500-c, a corpus that covers these 511 languages and allows us to train Glot500-m. We evaluate Glot500-m on five diverse tasks across these languages. We observe large improvements for both high-resource and low-resource languages compared to an XLM-R baseline. Our analysis shows that no single factor explains the quality of multilingual LLM representations. Rather, a combination of factors determines quality including corpus size, script, “help” from related languages and the total capacity of the model. Our work addresses an important goal of NLP research: we should notlimit NLP to a small fraction of the world’s languages and instead strive to support as many languages as possible to bring the benefits of NLP technology to all languages and cultures. Code, data and models are available at <url>https://github.com/cisnlp/Glot500</url>.</abstract>
       <url hash="d76ea3ad">2023.acl-long.61</url>
@@ -1209,7 +1209,7 @@
       <title>Marked Personas: Using Natural Language Prompts to Measure Stereotypes in Language Models</title>
       <author><first>Myra</first><last>Cheng</last><affiliation>Stanford University</affiliation></author>
       <author><first>Esin</first><last>Durmus</last><affiliation>Stanford University</affiliation></author>
-      <author><first>Dan</first><last>Jurafsky</last><affiliation>Stanford University</affiliation></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last><affiliation>Stanford University</affiliation></author>
       <pages>1504-1532</pages>
       <abstract>To recognize and mitigate harms from large language models (LLMs), we need to understand the prevalence and nuances of stereotypes in LLM outputs. Toward this end, we present Marked Personas, a prompt-based method to measure stereotypes in LLMs for intersectional demographic groups without any lexicon or data labeling. Grounded in the sociolinguistic concept of markedness (which characterizes explicitly linguistically marked categories versus unmarked defaults), our proposed method is twofold: 1) prompting an LLM to generate personas, i.e., natural language descriptions, of the target demographic group alongside personas of unmarked, default groups; 2) identifying the words that significantly distinguish personas of the target group from corresponding unmarked ones. We find that the portrayals generated by GPT-3.5 and GPT-4 contain higher rates of racial stereotypes than human-written portrayals using the same prompts. The words distinguishing personas of marked (non-white, non-male) groups reflect patterns of othering and exoticizing these demographics. An intersectional lens further reveals tropes that dominate portrayals of marginalized groups, such as tropicalism and the hypersexualization of minoritized women. These representational harms have concerning implications for downstream applications like story generation.</abstract>
       <url hash="3e15cf28">2023.acl-long.84</url>
@@ -1225,7 +1225,7 @@
       <author><first>Yuan</first><last>Gao</last><affiliation>Nanjing University</affiliation></author>
       <author><first>Zhen</first><last>Wu</last><affiliation>Nanjing University</affiliation></author>
       <author><first>Jianbing</first><last>Zhang</last><affiliation>Nanjing University</affiliation></author>
-      <author><first>Xinyu</first><last>Dai</last><affiliation>National Key Laboratory for Novel Software Technology, Nanjing University</affiliation></author>
+      <author id="xinyu-dai"><first>Xinyu</first><last>Dai</last><affiliation>National Key Laboratory for Novel Software Technology, Nanjing University</affiliation></author>
       <pages>1533-1545</pages>
       <abstract>Out-of-distribution (OOD) detection, a fundamental task vexing real-world applications, has attracted growing attention in the NLP community. Recently fine-tuning based methods have made promising progress. However, it could be costly to store fine-tuned models for each scenario. In this paper, we depart from the classic fine-tuning based OOD detection toward a parameter-efficient alternative, and propose an unsupervised prefix-tuning based OOD detection framework termed PTO. Additionally, to take advantage of optional training data labels and targeted OOD data, two practical extensions of PTO are further proposed. Overall, PTO and its extensions offer several key advantages of being lightweight, easy-to-reproduce, and theoretically justified. Experimental results show that our methods perform comparably to, even better than, existing fine-tuning based OOD detection approaches under a wide range of metrics, detection settings, and OOD types.</abstract>
       <url hash="f48268a2">2023.acl-long.85</url>
@@ -1281,7 +1281,7 @@
       <author><first>Hao</first><last>Cheng</last><affiliation>Microsoft Research</affiliation></author>
       <author><first>Yu</first><last>Zhang</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
       <author><first>Xiaodong</first><last>Liu</last><affiliation>Microsoft Research</affiliation></author>
-      <author><first>Eric</first><last>Nyberg</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Jianfeng</first><last>Gao</last><affiliation>Microsoft Research, Redmond</affiliation></author>
       <pages>1599-1618</pages>
       <abstract>The retrieval model is an indispensable component for real-world knowledge-intensive tasks, e.g., open-domain question answering (ODQA). As separate retrieval skills are annotated for different datasets, recent work focuses on customized methods, limiting the model transfer- ability and scalability. In this work, we propose a modular retriever where individual modules correspond to key skills that can be reused across datasets. Our approach supports flexible skill configurations based on the target domain to boost performance. To mitigate task interference, we design a novel modularization parameterization inspired by sparse Transformer. We demonstrate that our model can benefit from self-supervised pretraining on Wikipedia and fine-tuning using multiple ODQA datasets, both in a multi-task fashion. Our approach outperforms recent self-supervised retrievers in zero-shot evaluations and achieves state-of-the-art fine-tuned retrieval performance on NQ, HotpotQA and OTT-QA.</abstract>
@@ -1295,7 +1295,7 @@
       <author><first>Wenya</first><last>Wang</last><affiliation>University of Washington</affiliation></author>
       <author><first>Vivek</first><last>Srikumar</last><affiliation>University of Utah</affiliation></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last><affiliation>University of Washington</affiliation></author>
-      <author><first>Noah A.</first><last>Smith</last><affiliation>University of Washington</affiliation></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last><affiliation>University of Washington</affiliation></author>
       <pages>1619-1635</pages>
       <abstract>In question answering requiring common sense, language models (e.g., GPT-3) have been used to generate text expressing background knowledge that helps improve performance. Yet the cost of working with such models is very high; in this work, we finetune smaller language models to generate useful intermediate context, referred to here as elaborations. Our framework alternates between updating two language models—an elaboration generator and an answer predictor—allowing each to influence the other. Using less than 0.5% of the parameters of GPT-3, our model outperforms alternatives with similar sizes and closes the gap with GPT-3 on four commonsense question answering benchmarks. Human evaluations show that the quality of the generated elaborations is high.</abstract>
       <url hash="9a072c4b">2023.acl-long.90</url>
@@ -1321,8 +1321,8 @@
       <author><first>Zhen</first><last>Huang</last><affiliation>National University of Defense Technology</affiliation></author>
       <author><first>Yunxiang</first><last>Zhao</last><affiliation>Beijing Institute of Biotechnology</affiliation></author>
       <author><first>Zhiliang</first><last>Tian</last><affiliation>National University of Defense Technology</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>National University of Defense Technology</affiliation></author>
-      <author id="dongsheng-li"><first>Dongsheng</first><last>Li</last><affiliation>National University of Defense Technology</affiliation></author>
+      <author><first>Yang</first><last>Liu</last><affiliation>National University of Defense Technology</affiliation></author>
+      <author><first>Dongsheng</first><last>Li</last><affiliation>National University of Defense Technology</affiliation></author>
       <pages>1650-1668</pages>
       <abstract>Self-training emerges as an important research line on domain adaptation. By taking the model’s prediction as the pseudo labels of the unlabeled data, self-training bootstraps the model with pseudo instances in the target domain. However, the prediction errors of pseudo labels (label noise) challenge the performance of self-training. To address this problem, previous approaches only use reliable pseudo instances, i.e., pseudo instances with high prediction confidence, to retrain the model. Although these strategies effectively reduce the label noise, they are prone to miss the hard examples. In this paper, we propose a new self-training framework for domain adaptation, namely Domain adversarial learning enhanced Self-Training Framework (DaMSTF). Firstly, DaMSTF involves meta-learning to estimate the importance of each pseudo instance, so as to simultaneously reduce the label noise and preserve hard examples. Secondly, we design a meta constructor for constructing the meta-validation set, which guarantees the effectiveness of the meta-learning module by improving the quality of the meta-validation set. Thirdly, we find that the meta-learning module suffers from the training guidance vanish- ment and tends to converge to an inferior optimal. To this end, we employ domain adversarial learning as a heuristic neural network initialization method, which can help the meta-learning module converge to a better optimal. Theoretically and experimentally, we demonstrate the effectiveness of the proposed DaMSTF. On the cross-domain sentiment classification task, DaMSTF improves the performance of BERT with an average of nearly 4%.</abstract>
       <url hash="cedf5e65">2023.acl-long.92</url>
@@ -1442,7 +1442,7 @@
       <author><first>Sarah</first><last>Payne</last><affiliation>Stony Brook University</affiliation></author>
       <author><first>Jordan</first><last>Kodner</last><affiliation>Stony Brook University</affiliation></author>
       <author><first>Ellen</first><last>Broselow</last><affiliation>Stony Brook University</affiliation></author>
-      <author><first>Owen</first><last>Rambow</last><affiliation>Stony Brook University</affiliation></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last><affiliation>Stony Brook University</affiliation></author>
       <pages>1793-1805</pages>
       <abstract>Explicit linguistic knowledge, encoded by resources such as rule-based morphological analyzers, continues to prove useful in downstream NLP tasks, especially for low-resource languages and dialects. Rules are an important asset in descriptive linguistic grammars. However, creating such resources is usually expensive and non-trivial, especially for spoken varieties with no written standard. In this work, we present a novel approach for automatically learning morphophonological rules of Arabic from a corpus. Motivated by classic cognitive models for rule learning, rules are generalized cautiously. Rules that are memorized for individual items are only allowed to generalize to unseen forms if they are sufficiently reliable in the training data. The learned rules are further examined to ensure that they capture true linguistic phenomena described by domain experts. We also investigate the learnability of rules in low-resource settings across different experimental setups and dialects.</abstract>
       <url hash="88b308e4">2023.acl-long.101</url>
@@ -1505,7 +1505,7 @@
     <paper id="106">
       <title>Do language models have coherent mental models of everyday things?</title>
       <author><first>Yuling</first><last>Gu</last><affiliation>Allen Institute for AI</affiliation></author>
-      <author><first>Bhavana</first><last>Dalvi Mishra</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
+      <author id="bhavana-dalvi"><first>Bhavana</first><last>Dalvi Mishra</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Peter</first><last>Clark</last><affiliation>Allen Institute for AI</affiliation></author>
       <pages>1892-1913</pages>
       <abstract>When people think of everyday things like an egg, they typically have a mental image associated with it. This allows them to correctly judge, for example, that “the yolk surrounds the shell” is a false statement. Do language models similarly have a coherent picture of such everyday things? To investigate this, we propose a benchmark dataset consisting of 100 everyday things, their parts, and the relationships between these parts, expressed as 11,720 “X relation Y?” true/false questions. Using these questions as probes, we observe that state-of-the-art pre-trained language models (LMs) like GPT-3 and Macaw have fragments of knowledge about these everyday things, but do not have fully coherent “parts mental models” (54-59% accurate, 19-43% conditional constraint violation). We propose an extension where we add a constraint satisfaction layer on top of the LM’s raw predictions to apply commonsense constraints. As well as removing inconsistencies, we find that this also significantly improves accuracy (by 16-20%), suggesting how the incoherence of the LM’s pictures of everyday things can be significantly reduced.</abstract>
@@ -1528,7 +1528,7 @@
       <title>Instruction Induction: From Few Examples to Natural Language Task Descriptions</title>
       <author><first>Or</first><last>Honovich</last><affiliation>Tel Aviv University</affiliation></author>
       <author><first>Uri</first><last>Shaham</last><affiliation>Tel Aviv University</affiliation></author>
-      <author><first>Samuel R.</first><last>Bowman</last><affiliation>New York University</affiliation></author>
+      <author id="samuel-bowman"><first>Samuel R.</first><last>Bowman</last><affiliation>New York University</affiliation></author>
       <author><first>Omer</first><last>Levy</last><affiliation>Meta AI / Tel Aviv University</affiliation></author>
       <pages>1935-1952</pages>
       <abstract>Large language models are able to perform a task by conditioning on a few input-output demonstrations - a paradigm known as in-context learning. We show that language models can explicitly infer an underlying task from a few demonstrations by prompting them to generate a natural language instruction that fits the examples. To explore this ability, we introduce the instruction induction challenge, compile a dataset consisting of 24 tasks, and define a novel evaluation metric based on executing the generated instruction. We discover that, to a large extent, the ability to generate instructions does indeed emerge when using a model that is both large enough and aligned to follow instructions; InstructGPT achieves 65.7% of human performance in our execution-based metric, while the original GPT-3 model reaches only 9.8% of human performance. This surprising result suggests that instruction induction might be a viable learning paradigm in and of itself, where instead of fitting a set of latent continuous parameters to the data, one searches for the best description in the natural language hypothesis space.</abstract>
@@ -1541,8 +1541,8 @@
       <title>In-Context Analogical Reasoning with Pre-Trained Language Models</title>
       <author><first>Xiaoyang</first><last>Hu</last><affiliation>University of Michigan</affiliation></author>
       <author><first>Shane</first><last>Storks</last><affiliation>University of Michigan</affiliation></author>
-      <author><first>Richard</first><last>Lewis</last><affiliation>University of Michigan</affiliation></author>
-      <author><first>Joyce</first><last>Chai</last><affiliation>University of Michigan</affiliation></author>
+      <author id="richard-l-lewis"><first>Richard</first><last>Lewis</last><affiliation>University of Michigan</affiliation></author>
+      <author id="joyce-chai"><first>Joyce</first><last>Chai</last><affiliation>University of Michigan</affiliation></author>
       <pages>1953-1969</pages>
       <abstract>Analogical reasoning is a fundamental capacity of human cognition that allows us to reason abstractly about novel situations by relating them to past experiences. While it is thought to be essential for robust reasoning in AI systems, conventional approaches require significant training and/or hard-coding of domain knowledge to be applied to benchmark tasks. Inspired by cognitive science research that has found connections between human language and analogy-making, we explore the use of intuitive language-based abstractions to support analogy in AI systems. Specifically, we apply large pre-trained language models (PLMs) to visual Raven’s Progressive Matrices (RPM), a common relational reasoning test. By simply encoding the perceptual features of the problem into language form, we find that PLMs exhibit a striking capacity for zero-shot relational reasoning, exceeding human performance and nearing supervised vision-based methods. We explore different encodings that vary the level of abstraction over task features, finding that higher-level abstractions further strengthen PLMs’ analogical reasoning. Our detailed analysis reveals insights on the role of model complexity, in-context learning, and prior knowledge in solving RPM tasks.</abstract>
       <url hash="e4e83f54">2023.acl-long.109</url>
@@ -1553,7 +1553,7 @@
     <paper id="110">
       <title>Peek Across: Improving Multi-Document Modeling via Cross-Document Question-Answering</title>
       <author><first>Avi</first><last>Caciularu</last><affiliation>Bar-Ilan University</affiliation></author>
-      <author><first>Matthew</first><last>Peters</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
+      <author id="matthew-e-peters"><first>Matthew</first><last>Peters</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Jacob</first><last>Goldberger</last><affiliation>Bar-Ilan University</affiliation></author>
       <author><first>Ido</first><last>Dagan</last><affiliation>Bar-Ilan University</affiliation></author>
       <author><first>Arman</first><last>Cohan</last><affiliation>Allen Institute for AI</affiliation></author>
@@ -1709,8 +1709,8 @@
       <author><first>Shuangzhi</first><last>Wu</last><affiliation>Bytedance</affiliation></author>
       <author><first>Kehai</first><last>Chen</last><affiliation>School of Computer Science and Technology, Harbin Institute of Technology</affiliation></author>
       <author><first>Hui</first><last>Di</last><affiliation>Toshiba (China) Co., Ltd.</affiliation></author>
-      <author><first>Muyun</first><last>Yang</last><affiliation>Harbin Institute of Technology</affiliation></author>
-      <author><first>Tiejun</first><last>Zhao</last><affiliation>Harbin Institute of Technology</affiliation></author>
+      <author id="muyun-yang"><first>Muyun</first><last>Yang</last><affiliation>Harbin Institute of Technology</affiliation></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <pages>2175-2190</pages>
       <abstract>State-of-the-art translation Quality Estimation (QE) models are proven to be biased. More specifically, they over-rely on monolingual features while ignoring the bilingual semantic alignment. In this work, we propose a novel method to mitigate the bias of the QE model and improve estimation performance. Our method is based on the contrastive learning between clean and noisy sentence pairs. We first introduce noise to the target side of the parallel sentence pair, forming the negative samples. With the original parallel pairs as the positive sample, the QE model is contrastively trained to distinguish the positive samples from the negative ones. This objective is jointly trained with the regression-style quality estimation, so as to prevent the QE model from overfitting to monolingual features. Experiments on WMT QE evaluation datasets demonstrate that our method improves the estimation performance by a large margin while mitigating the bias.</abstract>
       <url hash="1d098aff">2023.acl-long.121</url>
@@ -1721,7 +1721,7 @@
     <paper id="122">
       <title>Breeding Machine Translations: Evolutionary approach to survive and thrive in the world of automated evaluation</title>
       <author><first>Josef</first><last>Jon</last><affiliation>Charles University</affiliation></author>
-      <author><first>Ondřej</first><last>Bojar</last><affiliation>Charles University, MFF UFAL</affiliation></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last><affiliation>Charles University, MFF UFAL</affiliation></author>
       <pages>2191-2212</pages>
       <abstract>We propose a genetic algorithm (GA) based method for modifying <tex-math>n</tex-math>-best lists produced by a machine translation (MT) system. Our method offers an innovative approach to improving MT quality and identifying weaknesses in evaluation metrics. Using common GA operations (mutation and crossover) on a list of hypotheses in combination with a fitness function (an arbitrary MT metric), we obtain novel and diverse outputs with high metric scores. With a combination of multiple MT metrics as the fitness function, the proposed method leads to an increase in translation quality as measured by other held-out automatic metrics.With a single metric (including popular ones such as COMET) as the fitness function, we find blind spots and flaws in the metric. This allows for an automated search for adversarial examples in an arbitrary metric, without prior assumptions on the form of such example. As a demonstration of the method, we create datasets of adversarial examples and use them to show that reference-free COMET is substantially less robust than the reference-based version.</abstract>
       <url hash="4622225f">2023.acl-long.122</url>
@@ -1735,7 +1735,7 @@
       <author><first>Zhexin</first><last>Zhang</last><affiliation>Tsinghua University</affiliation></author>
       <author><first>Fei</first><last>Mi</last><affiliation>Huawei</affiliation></author>
       <author><first>Yasheng</first><last>Wang</last><affiliation>Huawei Noah’s Ark Lab</affiliation></author>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last><affiliation>xiaomi</affiliation></author>
+      <author><first>Wei</first><last>Liu</last><affiliation>xiaomi</affiliation></author>
       <author><first>Jianwei</first><last>Cui</last><affiliation>Xiaomi AI Lab</affiliation></author>
       <author><first>Bin</first><last>Wang</last><affiliation>Xiaomi AI Lab</affiliation></author>
       <author><first>Qun</first><last>Liu</last><affiliation>Huawei Noah’s Ark Lab</affiliation></author>
@@ -1822,7 +1822,7 @@
       <author><first>Xinxi</first><last>Lyu</last><affiliation>University of Washington</affiliation></author>
       <author><first>Sewon</first><last>Min</last><affiliation>University of Washington</affiliation></author>
       <author><first>Iz</first><last>Beltagy</last><affiliation>Allen Institute for AI (AI2)</affiliation></author>
-      <author><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington; Meta</affiliation></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington; Meta</affiliation></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last><affiliation>University of Washington</affiliation></author>
       <pages>2304-2317</pages>
       <abstract>Although large language models can be prompted for both zero- and few-shot learning, performance drops significantly when no demonstrations are available. In this paper, we introduce Z-ICL, a new zero-shot method that closes the gap by constructing pseudo-demonstrations for a given test input using a raw text corpus. Concretely, pseudo-demonstrations are constructed by (1) finding the nearest neighbors to the test input from the corpus and pairing them with random task labels, and (2) applying a set of techniques to reduce the amount of direct copying the model does from the resulting demonstrations. Evaluation on nine classification datasets shows that Z-ICL outperforms previous zero-shot methods by a significant margin, and is on par with in-context learning with labeled training data in the few-shot setting. Overall, Z-ICL provides a significantly higher estimate of the zero-shot performance levels of a model, and supports future efforts to develop better pseudo-demonstrations that further improve zero-shot results.</abstract>
@@ -1934,7 +1934,7 @@
       <author><first>Chengsong</first><last>Huang</last><affiliation>Fudan university</affiliation></author>
       <author><first>Xiaoqing</first><last>Zheng</last><affiliation>Fudan University</affiliation></author>
       <author><first>Jinshu</first><last>Lin</last><affiliation>Hundsun</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>2437-2449</pages>
       <abstract>Tables are widely used in research and business, which are suitable for human consumption, but not easily machine-processable, particularly when tables are present in images. One of the main challenges to extracting data from images of tables is accurately recognizing table structures, especially for complex tables with cross rows and columns. In this study, we propose a novel multi-modal pre-training model for table structure recognition, named TableVLM.With a two-stream multi-modal transformer-based encoder-decoder architecture, TableVLM learns to capture rich table structure-related features by multiple carefully-designed unsupervised objectives inspired by the notion of masked visual-language modeling. To pre-train this model, we also created a dataset, called ComplexTable, which consists of 1,000K samples to be released publicly. Experiment results show that the model built on pre-trained TableVLM can improve the performance up to 1.97% in tree-editing-distance-score on ComplexTable.</abstract>
       <url hash="115222cc">2023.acl-long.137</url>
@@ -1989,7 +1989,7 @@
       <author><first>Ran</first><last>Xu</last><affiliation>Emory University</affiliation></author>
       <author><first>Jieyu</first><last>Zhang</last><affiliation>University of Washington</affiliation></author>
       <author><first>Jiaming</first><last>Shen</last><affiliation>Google Research</affiliation></author>
-      <author><first>Chao</first><last>Zhang</last><affiliation>Georgia Tech</affiliation></author>
+      <author id="chao-zhang-tu"><first>Chao</first><last>Zhang</last><affiliation>Georgia Tech</affiliation></author>
       <pages>2499-2521</pages>
       <abstract>We present PATRON, a prompt-based data selection method for pre-trained language model fine-tuning under cold-start scenarios, i.e., no initial labeled data are available. In PATRON, we design (1) a prompt-based uncertainty propagation approach to estimate the importance of data points and (2) a partition-then-rewrite (PTR) strategy to promote sample diversity when querying for annotations. Experiments on six text classification datasets show that PATRON outperforms the strongest cold-start data selection baselines by up to 6.9%. Besides, with 128 labels only, PATRON achieves 91.0% and 92.1% of the fully supervised performance based on vanilla fine-tuning and prompt-based learning respectively. Our implementation of PATRON will be published upon acceptance.</abstract>
       <url hash="27a6c559">2023.acl-long.141</url>
@@ -2000,7 +2000,7 @@
     <paper id="142">
       <title>Training-free Neural Architecture Search for <fixed-case>RNN</fixed-case>s and Transformers</title>
       <author><first>Aaron</first><last>Serianni</last><affiliation>Princeton University</affiliation></author>
-      <author><first>Jugal</first><last>Kalita</last><affiliation>University of Colorado</affiliation></author>
+      <author id="jugal-kalita"><first>Jugal</first><last>Kalita</last><affiliation>University of Colorado</affiliation></author>
       <pages>2522-2540</pages>
       <abstract>Neural architecture search (NAS) has allowed for the automatic creation of new and effective neural network architectures, offering an alternative to the laborious process of manually designing complex architectures. However, traditional NAS algorithms are slow and require immense amounts of computing power. Recent research has investigated training-free NAS metrics for image classification architectures, drastically speeding up search algorithms. In this paper, we investigate training-free NAS metrics for recurrent neural network (RNN) and BERT-based transformer architectures, targeted towards language modeling tasks. First, we develop a new training-free metric, named hidden covariance, that predicts the trained performance of an RNN architecture and significantly outperforms existing training-free metrics. We experimentally evaluate the effectiveness of the hidden covariance metric on the NAS-Bench-NLP benchmark. Second, we find that the current search space paradigm for transformer architectures is not optimized for training-free neural architecture search. Instead, a simple qualitative analysis can effectively shrink the search space to the best performing architectures. This conclusion is based on our investigation of existing training-free metrics and new metrics developed from recent transformer pruning literature, evaluated on our own benchmark of trained BERT architectures. Ultimately, our analysis shows that the architecture search space and the training-free metric must be developed together in order to achieve effective results. Our source code is available at <url>https://github.com/aaronserianni/training-free-nas</url>.</abstract>
       <url hash="6728d4f4">2023.acl-long.142</url>
@@ -2012,7 +2012,7 @@
       <title><fixed-case>C</fixed-case>ross<fixed-case>S</fixed-case>um: Beyond <fixed-case>E</fixed-case>nglish-Centric Cross-Lingual Summarization for 1,500+ Language Pairs</title>
       <author><first>Abhik</first><last>Bhattacharjee</last><affiliation>Bangladesh University of Engineering and Technology</affiliation></author>
       <author><first>Tahmid</first><last>Hasan</last><affiliation>Bangladesh University of Engineering and Technology</affiliation></author>
-      <author><first>Wasi Uddin</first><last>Ahmad</last><affiliation>AWS AI Labs</affiliation></author>
+      <author id="wasi-ahmad"><first>Wasi Uddin</first><last>Ahmad</last><affiliation>AWS AI Labs</affiliation></author>
       <author><first>Yuan-Fang</first><last>Li</last><affiliation>Monash University</affiliation></author>
       <author><first>Yong-Bin</first><last>Kang</last><affiliation>Swinburne University of Technology</affiliation></author>
       <author><first>Rifat</first><last>Shahriyar</last><affiliation>Bangladesh University of Engineering and Technology</affiliation></author>
@@ -2042,7 +2042,7 @@
       <title>Bi-Phone: Modeling Inter Language Phonetic Influences in Text</title>
       <author><first>Abhirut</first><last>Gupta</last><affiliation>Google Research</affiliation></author>
       <author><first>Ananya B.</first><last>Sai</last><affiliation>Indian Institute of Technology, Madras</affiliation></author>
-      <author><first>Richard</first><last>Sproat</last><affiliation>Google, Japan</affiliation></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last><affiliation>Google, Japan</affiliation></author>
       <author><first>Yuri</first><last>Vasilevski</last><affiliation>Google LLC</affiliation></author>
       <author><first>James</first><last>Ren</last><affiliation>Google</affiliation></author>
       <author><first>Ambarish</first><last>Jash</last><affiliation>Google LLC</affiliation></author>
@@ -2060,7 +2060,7 @@
       <author><first>Shengqiong</first><last>Wu</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Hao</first><last>Fei</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Wei</first><last>Ji</last><affiliation>National University of Singapore</affiliation></author>
-      <author><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
       <pages>2593-2608</pages>
       <abstract>Unpaired cross-lingual image captioning has long suffered from irrelevancy and disfluency issues, due to the inconsistencies of the semantic scene and syntax attributes during transfer. In this work, we propose to address the above problems by incorporating the scene graph (SG) structures and the syntactic constituency (SC) trees. Our captioner contains the semantic structure-guided image-to-pivot captioning and the syntactic structure-guided pivot-to-target translation, two of which are joined via pivot language. We then take the SG and SC structures as pivoting, performing cross-modal semantic structure alignment and cross-lingual syntactic structure alignment learning. We further introduce cross-lingual&amp;cross-modal back-translation training to fully align the captioning and translation stages. Experiments on English-Chinese transfers show that our model shows great superiority in improving captioning relevancy and fluency.</abstract>
       <url hash="87d7051a">2023.acl-long.146</url>
@@ -2130,8 +2130,8 @@
       <author><first>Griffin</first><last>Adams</last><affiliation>Columbia University</affiliation></author>
       <author><first>Alex</first><last>Fabbri</last><affiliation>Salesforce AI Research</affiliation></author>
       <author><first>Faisal</first><last>Ladhak</last><affiliation>Columbia University</affiliation></author>
-      <author><first>Noémie</first><last>Elhadad</last><affiliation>Columbia University</affiliation></author>
-      <author><first>Kathleen</first><last>McKeown</last><affiliation>Columbia University and Amazon (Amazon Scholar)</affiliation></author>
+      <author id="noemie-elhadad"><first>Noémie</first><last>Elhadad</last><affiliation>Columbia University</affiliation></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last><affiliation>Columbia University and Amazon (Amazon Scholar)</affiliation></author>
       <pages>2680-2697</pages>
       <abstract>Two-step approaches, in which summary candidates are generated-then-reranked to return a single summary, can improve ROUGE scores over the standard single-step approach. Yet, standard decoding methods (i.e., beam search, nucleus sampling, and diverse beam search) produce candidates with redundant, and often low quality, content. In this paper, we design a novel method to generate candidates for re-ranking that addresses these issues. We ground each candidate abstract on its own unique content plan and generate distinct plan-guided abstracts using a model’s top beam. More concretely, a standard language model (a BART LM) auto-regressively generates elemental discourse unit (EDU) content plans with an extractive copy mechanism. The top K beams from the content plan generator are then used to guide a separate LM, which produces a single abstractive candidate for each distinct plan. We apply an existing re-ranker (BRIO) to abstractive candidates generated from our method, as well as baseline decoding methods. We show large relevance improvements over previously published methods on widely used single document news article corpora, with ROUGE-2 F1 gains of 0.88, 2.01, and 0.38 on CNN / Dailymail, NYT, and Xsum, respectively. A human evaluation on CNN / DM validates these results. Similarly, on 1k samples from CNN / DM, we show that prompting GPT-3 to follow EDU plans outperforms sampling-based methods by by 1.05 ROUGE-2 F1 points. Code to generate and realize plans is available at <url>https://github.com/griff4692/edu-sum</url>.</abstract>
       <url hash="20a5649e">2023.acl-long.151</url>
@@ -2160,7 +2160,7 @@
       <author><first>Xiang</first><last>Deng</last><affiliation>The Ohio State University</affiliation></author>
       <author><first>Jiaming</first><last>Shen</last><affiliation>Google Research</affiliation></author>
       <author><first>You</first><last>Wu</last><affiliation>Google LLC</affiliation></author>
-      <author><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington; Meta</affiliation></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington; Meta</affiliation></author>
       <author><first>Huan</first><last>Sun</last><affiliation>The Ohio State University</affiliation></author>
       <pages>2717-2739</pages>
       <abstract>Chain-of-Thought (CoT) prompting can dramatically improve the multi-step reasoning abilities of large language models (LLMs). CoT explicitly encourages the LLM to generate intermediate rationales for solving a problem, by providing a series of reasoning steps in the demonstrations. Despite its success, there is still little understanding of what makes CoT prompting effective and which aspects of the demonstrated reasoning steps contribute to its performance. In this paper, we show that CoT reasoning is possible even with invalid demonstrations - prompting with invalid reasoning steps can achieve over 80-90% of the performance obtained using CoT under various metrics, while still generating coherent lines of reasoning during inference. Further experiments show that other aspects of the rationales, such as being relevant to the query and correctly ordering the reasoning steps, are much more important for effective CoT reasoning. Overall, these findings both deepen our understanding of CoT prompting, and open up new questions regarding LLMs’ capability to learn to reason in context.</abstract>
@@ -2178,7 +2178,7 @@
       <author><first>Vedanuj</first><last>Goswami</last><affiliation>Meta AI</affiliation></author>
       <author><first>Philipp</first><last>Koehn</last><affiliation>Johns Hopkins University</affiliation></author>
       <author><first>Angela</first><last>Fan</last><affiliation>Facebook AI Research</affiliation></author>
-      <author><first>Francisco</first><last>Guzman</last><affiliation>Meta AI</affiliation></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzman</last><affiliation>Meta AI</affiliation></author>
       <pages>2740-2756</pages>
       <abstract>For many languages, machine translation progress is hindered by the lack of reliable training data. Models are trained on whatever pre-existing datasets may be available and then augmented with synthetic data, because it is often not economical to pay for the creation of large-scale datasets. But for the case of low-resource languages, would the creation of a few thousand professionally translated sentence pairs give any benefit? In this paper, we show that it does. We describe a broad data collection effort involving around 6k professionally translated sentence pairs for each of 39 low-resource languages, which we make publicly available. We analyse the gains of models trained on this small but high-quality data, showing that it has significant impact even when larger but lower quality pre-existing corpora are used, or when data is augmented with millions of sentences through backtranslation.</abstract>
       <url hash="a5df1427">2023.acl-long.154</url>
@@ -2262,7 +2262,7 @@
       <author><first>Marcus</first><last>Collins</last><affiliation>Amazon, Inc.</affiliation></author>
       <author><first>Nikhita</first><last>Vedula</last><affiliation>Amazon</affiliation></author>
       <author><first>Simone</first><last>Filice</last><affiliation>amazon.com</affiliation></author>
-      <author><first>Shervin</first><last>Malmasi</last><affiliation>Amazon</affiliation></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last><affiliation>Amazon</affiliation></author>
       <author><first>Oleg</first><last>Rokhlenko</last><affiliation>Amazon Research</affiliation></author>
       <pages>2847-2867</pages>
       <abstract>Methods to generate text from structured data have advanced significantly in recent years, primarily due to fine-tuning of pre-trained language models on large datasets. However, such models can fail to produce output faithful to the input data, particularly on out-of-domain data. Sufficient annotated data is often not available for specific domains, leading us to seek an unsupervised approach to improve the faithfulness of output text. Since the problem is fundamentally one of consistency between the representations of the structured data and text, we evaluate the effectiveness of cycle training in this work. Cycle training uses two models which are inverses of each other: one that generates text from structured data, and one which generates the structured data from natural language text. We show that cycle training, when initialized with a small amount of supervised data (100 samples in our case), achieves nearly the same performance as fully supervised approaches for the data-to-text generation task on the WebNLG, E2E, WTQ, and WSQL datasets. We perform extensive empirical analysis with automated evaluation metrics and a newly designed human evaluation schema to reveal different cycle training strategies’ effectiveness of reducing various types of generation errors. Our code is publicly available at <url>https://github.com/Edillower/CycleNLG</url>.</abstract>
@@ -2328,7 +2328,7 @@
       <title>Summary-Oriented Vision Modeling for Multimodal Abstractive Summarization</title>
       <author><first>Yunlong</first><last>Liang</last><affiliation>Beijing Jiaotong University</affiliation></author>
       <author><first>Fandong</first><last>Meng</last><affiliation>WeChat AI, Tencent</affiliation></author>
-      <author><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
       <author><first>Jiaan</first><last>Wang</last><affiliation>School of Computer Science and Technology, Soochow University, Suzhou, China</affiliation></author>
       <author><first>Yufeng</first><last>Chen</last><affiliation>Beijing Jiaotong University</affiliation></author>
       <author><first>Jie</first><last>Zhou</last><affiliation>Tencent Inc.</affiliation></author>
@@ -2393,7 +2393,7 @@
       <author><first>Nicolas</first><last>Stefanovitch</last><affiliation>Joint Research Centre</affiliation></author>
       <author><first>Nikolaos</first><last>Nikolaidis</last><affiliation>Athens University of Economics and Business</affiliation></author>
       <author><first>Giovanni</first><last>Da San Martino</last><affiliation>University of Padova</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <pages>3001-3022</pages>
       <abstract>We present a new multilingual multifacet dataset of news articles, each annotated for genre (objective news reporting vs. opinion vs. satire), framing (what key aspects are highlighted), and persuasion techniques (logical fallacies, emotional appeals, ad hominem attacks, etc.). The persuasion techniques are annotated at the span level, using a taxonomy of 23 fine-grained techniques grouped into 6 coarse categories. The dataset contains 1,612 news articles covering recent news on current topics of public interest in six European languages (English, French, German, Italian, Polish, and Russian), with more than 37k annotated spans of persuasion techniques. We describe the dataset and the annotation process, and we report the evaluation results of multilabel classification experiments using state-of-the-art multilingual transformers at different levels of granularity: token-level, sentence-level, paragraph-level, and document-level.</abstract>
       <url hash="cf4440d5">2023.acl-long.169</url>
@@ -2431,7 +2431,7 @@
       <author><first>Fan</first><last>Yin</last><affiliation>UCLA</affiliation></author>
       <author><first>Jesse</first><last>Vig</last><affiliation>Salesforce Research</affiliation></author>
       <author><first>Philippe</first><last>Laban</last><affiliation>Salesforce Research</affiliation></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University; Salesforce AI Research</affiliation></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University; Salesforce AI Research</affiliation></author>
       <author><first>Caiming</first><last>Xiong</last><affiliation>Salesforce</affiliation></author>
       <author><first>Chien-Sheng</first><last>Wu</last><affiliation>Salesforce</affiliation></author>
       <pages>3063-3079</pages>
@@ -2487,7 +2487,7 @@
       <title>Interpretable Word Sense Representations via Definition Generation: The Case of Semantic Change Analysis</title>
       <author><first>Mario</first><last>Giulianelli</last><affiliation>University of Amsterdam</affiliation></author>
       <author><first>Iris</first><last>Luden</last><affiliation>University of Amsterdam</affiliation></author>
-      <author><first>Raquel</first><last>Fernandez</last><affiliation>ILLC, University of Amsterdam</affiliation></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernandez</last><affiliation>ILLC, University of Amsterdam</affiliation></author>
       <author><first>Andrey</first><last>Kutuzov</last><affiliation>University of Oslo</affiliation></author>
       <pages>3130-3148</pages>
       <abstract>We propose using automatically generated natural language definitions of contextualised word usages as interpretable word and word sense representations. Given a collection of usage examples for a target word, and the corresponding data-driven usage clusters (i.e., word senses), a definition is generated for each usage with a specialised Flan-T5 language model, and the most prototypical definition in a usage cluster is chosen as the sense label. We demonstrate how the resulting sense labels can make existing approaches to semantic change analysis more interpretable, and how they can allow users — historical linguists, lexicographers, or social scientists — to explore and intuitively explain diachronic trajectories of word meaning. Semantic change analysis is only one of many possible applications of the ‘definitions as representations’ paradigm. Beyond being human-readable, contextualised definitions also outperform token or usage sentence embeddings in word-in-context semantic similarity judgements, making them a new promising type of lexical representation for NLP.</abstract>
@@ -2501,8 +2501,8 @@
       <author><first>Hao</first><last>Yan</last><affiliation>George Mason University</affiliation></author>
       <author><first>Saurabh</first><last>Srivastava</last><affiliation>George Mason University</affiliation></author>
       <author><first>Yintao</first><last>Tai</last><affiliation>University of Edinburgh</affiliation></author>
-      <author><first>Sida I.</first><last>Wang</last><affiliation>Facebook AI Research</affiliation></author>
-      <author><first>Wen-tau</first><last>Yih</last><affiliation>Meta AI - FAIR</affiliation></author>
+      <author id="sida-i-wang"><first>Sida I.</first><last>Wang</last><affiliation>Facebook AI Research</affiliation></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last><affiliation>Meta AI - FAIR</affiliation></author>
       <author><first>Ziyu</first><last>Yao</last><affiliation>George Mason University</affiliation></author>
       <pages>3149-3170</pages>
       <abstract>Interactive semantic parsing based on natural language (NL) feedback, where users provide feedback to correct the parser mistakes, has emerged as a more practical scenario than the traditional one-shot semantic parsing. However, prior work has heavily relied on human-annotated feedback data to train the interactive semantic parser, which is prohibitively expensive and not scalable. In this work, we propose a new task of simulating NL feedback for interactive semantic parsing. We accompany the task with a novel feedback evaluator. The evaluator is specifically designed to assess the quality of the simulated feedback, based on which we decide the best feedback simulator from our proposed variants. On a text-to-SQL dataset, we show that our feedback simulator can generate high-quality NL feedback to boost the error correction ability of a specific parser. In low-data settings, our feedback simulator can help achieve comparable error correction performance as trained using the costly, full set of human annotations.</abstract>
@@ -2530,7 +2530,7 @@
       <author><first>Claudia</first><last>Shi</last><affiliation>Columbia University</affiliation></author>
       <author><first>Keyon</first><last>Vafa</last><affiliation>Columbia University</affiliation></author>
       <author><first>Amir</first><last>Feder</last><affiliation>Columbia University</affiliation></author>
-      <author><first>David</first><last>Blei</last><affiliation>Columbia University</affiliation></author>
+      <author id="david-blei"><first>David</first><last>Blei</last><affiliation>Columbia University</affiliation></author>
       <pages>3186-3206</pages>
       <abstract>Controlled generation refers to the problem of creating text that contains stylistic or semantic attributes of interest. Many approaches reduce this problem to training a predictor of the desired attribute. For example, researchers hoping to deploy a large language model to produce non-toxic content may use a toxicity classifier to filter generated text. In practice, the generated text to classify, which is determined by user prompts, may come from a wide range of distributions. In this paper, we show that the performance of controlled generation may be poor if the distributions of text in response to user prompts differ from the distribution the predictor was trained on. To address this problem, we cast controlled generation under distribution shift as an invariant learning problem: the most effective predictor should be invariant across multiple text environments. We then discuss a natural solution that arises from this characterization and propose heuristics for selecting natural environments. We study this characterization and the proposed method empirically using both synthetic and real data. Experiments demonstrate both the challenge of distribution shift in controlled generation and the potential of invariance methods in this setting.</abstract>
       <url hash="c53d6906">2023.acl-long.179</url>
@@ -2567,7 +2567,7 @@
     <paper id="182">
       <title><fixed-case>H</fixed-case>y<fixed-case>P</fixed-case>e: Better Pre-trained Language Model Fine-tuning with Hidden Representation Perturbation</title>
       <author><first>Hongyi</first><last>Yuan</last><affiliation>Tsinghua University</affiliation></author>
-      <author id="zheng-yuan"><first>Zheng</first><last>Yuan</last><affiliation>Alibaba Group</affiliation></author>
+      <author><first>Zheng</first><last>Yuan</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Chuanqi</first><last>Tan</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Fei</first><last>Huang</last><affiliation>Alibaba DAMO Academy</affiliation></author>
       <author><first>Songfang</first><last>Huang</last><affiliation>Alibaba DAMO Academy</affiliation></author>
@@ -2585,7 +2585,7 @@
       <author><first>Hongwei</first><last>Wang</last><affiliation>Tencent AI Lab</affiliation></author>
       <author><first>Xiaoyang</first><last>Wang</last><affiliation>Tencent AI Lab</affiliation></author>
       <author><first>Hong</first><last>Yu</last><affiliation>University of Massachusetts, Lowell</affiliation></author>
-      <author id="fei-liu"><first>Fei</first><last>Liu</last><affiliation>Emory University</affiliation></author>
+      <author><first>Fei</first><last>Liu</last><affiliation>Emory University</affiliation></author>
       <author><first>Dong</first><last>Yu</last><affiliation>Tencent AI Lab</affiliation></author>
       <pages>3265-3280</pages>
       <abstract>The potential choices for news article headlines are enormous, and finding the right balance between conveying the essential message and capturing the reader’s attention is key to effective headlining. However, presenting the same news headline to all readers is a suboptimal strategy, because it does not take into account the different preferences and interests of diverse readers, who may be confused about why a particular article has been recommended to them and do not see a clear connection between their interests and the recommended article. In this paper, we present a novel framework that addresses these challenges by incorporating user profiling to generate personalized headlines, and a combination of automated and human evaluation methods to determine user preference for personalized headlines. Our framework utilizes a learnable relevance function to assign personalized signature phrases to users based on their reading histories, which are then used to personalize headline generation. Through extensive evaluation, we demonstrate the effectiveness of our proposed framework in generating personalized headlines that meet the needs of a diverse audience. Our framework has the potential to improve the efficacy of news recommendations and facilitate creation of personalized content.</abstract>
@@ -2611,7 +2611,7 @@
       <author><first>Shi</first><last>Feng</last><affiliation>Northeastern University</affiliation></author>
       <author><first>Daling</first><last>Wang</last><affiliation>Northeastern University</affiliation></author>
       <author><first>Yifei</first><last>Zhang</last><affiliation>Northeastern University</affiliation></author>
-      <author><first>Hinrich</first><last>Schütze</last><affiliation>Center for Information and Language Processing, University of Munich</affiliation></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last><affiliation>Center for Information and Language Processing, University of Munich</affiliation></author>
       <pages>3295-3310</pages>
       <abstract>We investigate response generation for multi-turn dialogue in generative chatbots. Existing generative modelsbased on RNNs (Recurrent Neural Networks) usually employ the last hidden state to summarize the history, which makesmodels unable to capture the subtle variability observed in different dialogues and cannot distinguish the differencesbetween dialogues that are similar in composition. In this paper, we propose Pseudo-Variational Gated Recurrent Unit (PVGRU). The key novelty of PVGRU is a recurrent summarizing variable thataggregates the accumulated distribution variations of subsequences. We train PVGRU without relying on posterior knowledge, thus avoiding the training-inference inconsistency problem. PVGRU can perceive subtle semantic variability through summarizing variables that are optimized by two objectives we employ for training: distribution consistency and reconstruction. In addition, we build a Pseudo-Variational Hierarchical Dialogue(PVHD) model based on PVGRU. Experimental results demonstrate that PVGRU can broadly improve the diversity andrelevance of responses on two benchmark datasets.</abstract>
       <url hash="a63bcf0e">2023.acl-long.185</url>
@@ -2689,7 +2689,7 @@
       <author><first>Ziheng</first><last>Li</last><affiliation>Peking University</affiliation></author>
       <author><first>Shaohan</first><last>Huang</last><affiliation>Microsoft Research Asia</affiliation></author>
       <author><first>Zihan</first><last>Zhang</last><affiliation>Microsoft</affiliation></author>
-      <author><first>Zhi-Hong</first><last>Deng</last><affiliation>Peking University</affiliation></author>
+      <author id="zhi-hong-deng"><first>Zhi-Hong</first><last>Deng</last><affiliation>Peking University</affiliation></author>
       <author><first>Qiang</first><last>Lou</last><affiliation>Microsoft</affiliation></author>
       <author><first>Haizhen</first><last>Huang</last><affiliation>Microsoft</affiliation></author>
       <author><first>Jian</first><last>Jiao</last><affiliation>Microsoft</affiliation></author>
@@ -2711,7 +2711,7 @@
       <author><first>Wen</first><last>Zhang</last><affiliation>Xiaomi AI Lab</affiliation></author>
       <author><first>Jian</first><last>Luan</last><affiliation>Xiaomi AI Lab</affiliation></author>
       <author><first>Bin</first><last>Wang</last><affiliation>Xiaomi AI Lab</affiliation></author>
-      <author><first>Degen</first><last>Huang</last><affiliation>Dalian University of Technology</affiliation></author>
+      <author id="degen-huang"><first>Degen</first><last>Huang</last><affiliation>Dalian University of Technology</affiliation></author>
       <author><first>Jinsong</first><last>Su</last><affiliation>Xiamen university</affiliation></author>
       <pages>3479-3491</pages>
       <abstract>Text image translation (TIT) aims to translate the source texts embedded in the image to target translations, which has a wide range of applications and thus has important research value. However, current studies on TIT are confronted with two main bottlenecks: 1) this task lacks a publicly available TIT dataset, 2) dominant models are constructed in a cascaded manner, which tends to suffer from the error propagation of optical character recognition (OCR). In this work, we first annotate a Chinese-English TIT dataset named OCRMT30K, providing convenience for subsequent studies. Then, we propose a TIT model with a multimodal codebook, which is able to associate the image with relevant texts, providing useful supplementary information for translation. Moreover, we present a multi-stage training framework involving text machine translation, image-text alignment, and TIT tasks, which fully exploits additional bilingual texts, OCR dataset and our OCRMT30K dataset to train our model. Extensive experiments and in-depth analyses strongly demonstrate the effectiveness of our proposed model and training framework.</abstract>
@@ -2814,7 +2814,7 @@
       <title><fixed-case>DAMP</fixed-case>: Doubly Aligned Multilingual Parser for Task-Oriented Dialogue</title>
       <author><first>William</first><last>Held</last><affiliation>Georgia Tech</affiliation></author>
       <author><first>Christopher</first><last>Hidey</last><affiliation>Google</affiliation></author>
-      <author id="fei-liu"><first>Fei</first><last>Liu</last><affiliation>Google</affiliation></author>
+      <author><first>Fei</first><last>Liu</last><affiliation>Google</affiliation></author>
       <author><first>Eric</first><last>Zhu</last><affiliation>Google</affiliation></author>
       <author><first>Rahul</first><last>Goel</last><affiliation>Google</affiliation></author>
       <author><first>Diyi</first><last>Yang</last><affiliation>Stanford University</affiliation></author>
@@ -2884,7 +2884,7 @@
       <author><first>Ran</first><last>Zmigrod</last><affiliation>University of Cambridge</affiliation></author>
       <author><first>Tim</first><last>Vieira</last><affiliation>Johns Hopkins University</affiliation></author>
       <author><first>Ryan</first><last>Cotterell</last><affiliation>ETH Zürich</affiliation></author>
-      <author><first>Jason</first><last>Eisner</last><affiliation>Johns Hopkins University + Microsoft Corporation</affiliation></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last><affiliation>Johns Hopkins University + Microsoft Corporation</affiliation></author>
       <pages>3687-3713</pages>
       <abstract>We present Earley’s (1970) context-free parsing algorithm as a deduction system, incorporating various known and new speed-ups. In particular, our presentation supports a known worst-case runtime improvement from Earley’s (1970) <tex-math>O(N^3|G||R|)</tex-math>, which is unworkable for the large grammars that arise in natural language processing, to <tex-math>O(N^3|G|)</tex-math>, which matches the complexity of CKY on a binarized version of the grammar G. Here N is the length of the sentence, |R| is the number of productions in G, and |G| is the total length of those productions. We also provide a version that achieves runtime of <tex-math>O(N^3|M|)</tex-math> with <tex-math>|M| \leq |G|</tex-math> when the grammar is represented compactly as a single finite-state automaton M (this is partly novel). We carefully treat the generalization to semiring-weighted deduction, preprocessing the grammar like Stolcke (1995) to eliminate the possibility of deduction cycles, and further generalize Stolcke’s method to compute the weights of sentence prefixes. We also provide implementation details for efficient execution, ensuring that on a preprocessed grammar, the semiring-weighted versions of our methods have the same asymptotic runtime and space requirements as the unweighted methods, including sub-cubic runtime on some grammars.</abstract>
       <url hash="64bf7d60">2023.acl-long.204</url>
@@ -2983,7 +2983,7 @@
       <author><first>Ze-Feng</first><last>Gao</last><affiliation>Renmin University of China</affiliation></author>
       <author><first>Kun</first><last>Zhou</last><affiliation>Renmin University of China</affiliation></author>
       <author><first>Peiyu</first><last>Liu</last><affiliation>Renmin University of China</affiliation></author>
-      <author><first>Wayne Xin</first><last>Zhao</last><affiliation>RUC</affiliation></author>
+      <author id="wayne-xin-zhao"><first>Wayne Xin</first><last>Zhao</last><affiliation>RUC</affiliation></author>
       <author><first>Ji-Rong</first><last>Wen</last><affiliation>Renmin University of China</affiliation></author>
       <pages>3819-3834</pages>
       <abstract>By scaling the model size, large pre-trained language models (PLMs) have shown remarkable performance in various natural language processing tasks, mostly outperforming small PLMs by a large margin. However, due to the high computational cost, the huge number of parameters also restricts the applicability of large PLMs in real-world systems. In this paper, we focus on scaling up the parameters of PLMs <i>only during</i> fine-tuning, to benefit from the over-parameterization, while without increasing <i>the inference latency</i>. Given a relatively small PLM, we over-parameterize it by employing a matrix product operator, an efficient and almost lossless decomposition method to factorize its contained parameter matrices into a set of higher-dimensional tensors.Considering the efficiency, we further propose both static and dynamic strategies to select the most important parameter matrices for over-parameterization.Extensive experiments have demonstrated that our approach can significantly boost the fine-tuning performance of small PLMs and even help small PLMs outperform <tex-math>3\times</tex-math> parameterized larger ones.Our code is publicly available at <url>https://github.com/zfgao66/OPF</url>.</abstract>
@@ -3009,7 +3009,7 @@
       <author><first>Naoki</first><last>Otani</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Jun</first><last>Araki</last><affiliation>Bosch Research</affiliation></author>
       <author><first>HyeongSik</first><last>Kim</last><affiliation>Robert Bosch LLC</affiliation></author>
-      <author><first>Eduard</first><last>Hovy</last><affiliation>University of Melbourne</affiliation></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last><affiliation>University of Melbourne</affiliation></author>
       <pages>3856-3874</pages>
       <abstract>Recent data-driven conversational models are able to return fluent, consistent, and informative responses to many kinds of requests and utterances in task-oriented scenarios. However, these responses are typically limited to just the immediate local topic instead of being wider-ranging and proactively taking the conversation further, for example making suggestions to help customers achieve their goals. This inadequacy reflects a lack of understanding of the interlocutor’s situation and implicit goal. To address the problem, we introduce a task of proactive response selection based on situational information. We present a manually-curated dataset of 1.7k English conversation examples that include situational background information plus for each conversation a set of responses, only some of which are acceptable in the situation. A responsive and informed conversation system should select the appropriate responses and avoid inappropriate ones; doing so demonstrates the ability to adequately understand the initiating request and situation. Our benchmark experiments show that this is not an easy task even for strong neural models, offering opportunities for future research.</abstract>
       <url hash="f5d1d096">2023.acl-long.214</url>
@@ -3087,7 +3087,7 @@
       <author><first>Zhengyuan</first><last>Liu</last><affiliation>Institute for Infocomm Research, A*STAR</affiliation></author>
       <author><first>Yong Keong</first><last>Yap</last><affiliation>DSO National Laboratories</affiliation></author>
       <author><first>Hai Leong</first><last>Chieu</last><affiliation>DSO National Laboratories</affiliation></author>
-      <author><first>Nancy</first><last>Chen</last><affiliation>Institute for Infocomm Research, A*STAR</affiliation></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last><affiliation>Institute for Infocomm Research, A*STAR</affiliation></author>
       <pages>3987-4001</pages>
       <abstract>Stance detection determines whether the author of a piece of text is in favor of, against, or neutral towards a specified target, and can be used to gain valuable insights into social media. The ubiquitous indirect referral of targets makes this task challenging, as it requires computational solutions to model semantic features and infer the corresponding implications from a literal statement. Moreover, the limited amount of available training data leads to subpar performance in out-of-domain and cross-target scenarios, as data-driven approaches are prone to rely on superficial and domain-specific features. In this work, we decompose the stance detection task from a linguistic perspective, and investigate key components and inference paths in this task. The stance triangle is a generic linguistic framework previously proposed to describe the fundamental ways people express their stance. We further expand it by characterizing the relationship between explicit and implicit objects. We then use the framework to extend one single training corpus with additional annotation. Experimental results show that strategically-enriched data can significantly improve the performance on out-of-domain and cross-target evaluation.</abstract>
       <url hash="03b94bd0">2023.acl-long.220</url>
@@ -3169,7 +3169,7 @@
       <author><first>Yu</first><last>Sun</last><affiliation>Fudan University</affiliation></author>
       <author><first>Xiaonan</first><last>Li</last><affiliation>Fudan University</affiliation></author>
       <author><first>Yunhua</first><last>Zhou</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Xipeng</first><last>Qiu</last><affiliation>Fudan University</affiliation></author>
       <pages>4096-4122</pages>
       <abstract>Information Extraction (IE) spans several tasks with different output structures, such as named entity recognition, relation extraction and event extraction. Previously, those tasks were solved with different models because of diverse task output structures. Through re-examining IE tasks, we find that all of them can be interpreted as extracting spans and span relations. They can further be decomposed into token-pair classification tasks by using the start and end token of a span to pinpoint the span, and using the start-to-start and end-to-end token pairs of two spans to determine the relation. Based on the reformulation, we propose a Unified Token-pair Classification architecture for Information Extraction (UTC-IE), where we introduce Plusformer on top of the token-pair feature matrix. Specifically, it models axis-aware interaction with plus-shaped self-attention and local interaction with Convolutional Neural Network over token pairs. Experiments show that our approach outperforms task-specific and unified models on all tasks in 10 datasets, and achieves better or comparable results on 2 joint IE datasets. Moreover, UTC-IE speeds up over state-of-the-art models on IE tasks significantly in most datasets, which verifies the effectiveness of our architecture.</abstract>
@@ -3204,10 +3204,10 @@
       <author><first>Linyong</first><last>Nan</last><affiliation>Yale University</affiliation></author>
       <author><first>Ruilin</first><last>Han</last><affiliation>Yale University</affiliation></author>
       <author><first>Simeng</first><last>Han</last><affiliation>Yale University</affiliation></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University; Salesforce AI Research</affiliation></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University; Salesforce AI Research</affiliation></author>
       <author><first>Chien-Sheng</first><last>Wu</last><affiliation>Salesforce</affiliation></author>
       <author><first>Caiming</first><last>Xiong</last><affiliation>Salesforce</affiliation></author>
-      <author><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
       <pages>4140-4170</pages>
       <abstract>Human evaluation is the foundation upon which the evaluation of both summarization systems and automatic metrics rests. However, existing human evaluation studies for summarization either exhibit a low inter-annotator agreement or have insufficient scale, and an in-depth analysis of human evaluation is lacking. Therefore, we address the shortcomings of existing summarization evaluation along the following axes: (1) We propose a modified summarization salience protocol, Atomic Content Units (ACUs), which is based on fine-grained semantic units and allows for a high inter-annotator agreement. (2) We curate the Robust Summarization Evaluation (RoSE) benchmark, a large human evaluation dataset consisting of 22,000 summary-level annotations over 28 top-performing systems on three datasets. (3) We conduct a comparative study of four human evaluation protocols, underscoring potential confounding factors in evaluation setups. (4) We evaluate 50 automatic metrics and their variants using the collected human annotations across evaluation protocols and demonstrate how our benchmark leads to more statistically stable and significant results. The metrics we benchmarked include recent methods based on large language models (LLMs), GPTScore and G-Eval. Furthermore, our findings have important implications for evaluating LLMs, as we show that LLMs adjusted by human feedback (e.g., GPT-3.5) may overfit unconstrained human evaluation, which is affected by the annotators’ prior, input-agnostic preferences, calling for more robust, targeted evaluation methods.</abstract>
       <url hash="2ce93b0c">2023.acl-long.228</url>
@@ -3313,8 +3313,8 @@
       <author><first>Laurie</first><last>Crist</last><affiliation>Tripadvisor</affiliation></author>
       <author><first>Misha</first><last>Britan</last><affiliation>Amazon</affiliation></author>
       <author><first>Wouter</first><last>Leeuwis</last><affiliation>Amazon</affiliation></author>
-      <author><first>Gokhan</first><last>Tur</last><affiliation>Amazon</affiliation></author>
-      <author><first>Prem</first><last>Natarajan</last><affiliation>Capital One</affiliation></author>
+      <author id="gokhan-tur"><first>Gokhan</first><last>Tur</last><affiliation>Amazon</affiliation></author>
+      <author id="prem-natarajan"><first>Prem</first><last>Natarajan</last><affiliation>Capital One</affiliation></author>
       <pages>4277-4302</pages>
       <abstract>We present the MASSIVE dataset–Multilingual Amazon Slu resource package (SLURP) for Slot-filling, Intent classification, and Virtual assistant Evaluation. MASSIVE contains 1M realistic, parallel, labeled virtual assistant utterances spanning 51 languages, 18 domains, 60 intents, and 55 slots. MASSIVE was created by tasking professional translators to localize the English-only SLURP dataset into 50 typologically diverse languages from 29 genera. We also present modeling results on XLM-R and mT5, including exact match accuracy, intent classification accuracy, and slot-filling F1 score. We have released our dataset, modeling code, and models publicly.</abstract>
       <url hash="2ec71121">2023.acl-long.235</url>
@@ -3499,7 +3499,7 @@
       <author><first>Tianxiang</first><last>Sun</last><affiliation>Fudan University</affiliation></author>
       <author><first>Qiong</first><last>Tang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Kuanning</first><last>Wang</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Xipeng</first><last>Qiu</last><affiliation>Fudan University</affiliation></author>
       <pages>4521-4534</pages>
       <abstract>We present DiffusionBERT, a new generative masked language model based on discrete dif- fusion models. Diffusion models and many pre- trained language models have a shared training objective, i.e., denoising, making it possible to combine the two powerful models and enjoy the best of both worlds. On the one hand, dif- fusion models offer a promising training strat- egy that helps improve the generation quality. On the other hand, pre-trained denoising lan- guage models (e.g., BERT) can be used as a good initialization that accelerates convergence. We explore training BERT to learn the reverse process of a discrete diffusion process with an absorbing state and elucidate several designs to improve it. First, we propose a new noise schedule for the forward diffusion process that controls the degree of noise added at each step based on the information of each token. Sec- ond, we investigate several designs of incorpo- rating the time step into BERT. Experiments on unconditional text generation demonstrate that DiffusionBERT achieves significant improve- ment over existing diffusion models for text (e.g., D3PM and Diffusion-LM) and previous generative masked language models in terms of perplexity and BLEU score. Promising re- sults in conditional generation tasks show that DiffusionBERT can generate texts of compa- rable quality and more diverse than a series of established baselines.</abstract>
@@ -3510,7 +3510,7 @@
     </paper>
     <paper id="249">
       <title>Lifting the Curse of Capacity Gap in Distilling Language Models</title>
-      <author id="chen-zhang"><first>Chen</first><last>Zhang</last><affiliation>Beijing Institute of Technology</affiliation></author>
+      <author><first>Chen</first><last>Zhang</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <author><first>Yang</first><last>Yang</last><affiliation>Meituan</affiliation></author>
       <author><first>Jiahao</first><last>Liu</last><affiliation>Meituan</affiliation></author>
       <author><first>Jingang</first><last>Wang</last><affiliation>Meituan</affiliation></author>
@@ -3528,7 +3528,7 @@
       <title>Towards Faithful Dialogues via Focus Learning</title>
       <author><first>Yifan</first><last>Deng</last><affiliation>University of Chinese Academy of Sciences</affiliation></author>
       <author><first>Xingsheng</first><last>Zhang</last><affiliation>Institute of Information Engineering, Chinese Academy of Sciences</affiliation></author>
-      <author><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <author><first>Yue</first><last>Hu</last><affiliation>Institute of Information Engineering, Chinese Academy of Sciences</affiliation></author>
       <pages>4554-4566</pages>
       <abstract>Maintaining faithfulness between responses and knowledge is an important research topic for building reliable knowledge-grounded dialogue systems. Existing models heavily rely on elaborate data engineering or increasing the model’s parameters ignoring to track the tokens that significantly influence losses, which is decisive for the optimization direction of the model in each iteration. To address this issue, we propose Focus Learning (FocusL), a novel learning approach that adjusts the contribution of each token to the optimization direction by directly scaling the corresponding objective loss. Specifically, we first introduce a positioning method by utilizing similarity distributions between knowledge and each response token to locate knowledge-aware tokens. Then, we further design a similarity-to-weight transformation to provide dynamic token-level weights for the cross-entropy loss. Finally, we use the weighted loss to encourage the model to pay special attention to the knowledge utilization. Experimental results demonstrate that our method achieves the new state-of-the-art results and generates more reliable responses while maintaining training stability.</abstract>
@@ -3552,7 +3552,7 @@
       <title>Prompter: Zero-shot Adaptive Prefixes for Dialogue State Tracking Domain Adaptation</title>
       <author><first>Taha</first><last>Aksu</last><affiliation>Institute for Infocomm Research, A*STAR / School of Computing, National University of Singapore</affiliation></author>
       <author><first>Min-Yen</first><last>Kan</last><affiliation>National University of Singapore</affiliation></author>
-      <author><first>Nancy</first><last>Chen</last><affiliation>Institute for Infocomm Research, A*STAR</affiliation></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last><affiliation>Institute for Infocomm Research, A*STAR</affiliation></author>
       <pages>4588-4603</pages>
       <abstract>A challenge in the Dialogue State Tracking (DST) field is adapting models to new domains without using any supervised data — zero-shot domain adaptation. Parameter-Efficient Transfer Learning (PETL) has the potential to address this problem due to its robustness. However, it has yet to be applied to the zero-shot scenarios, as it is not clear how to apply it unsupervisedly. Our method, Prompter, uses descriptions of target domain slots to generate dynamic prefixes that are concatenated to the key and values at each layer’s self-attention mechanism. This allows for the use of prefix-tuning in zero-shot. Prompter outperforms previous methods on both the MultiWOZ and SGD benchmarks. In generating prefixes, our analyses find that Prompter not only utilizes the semantics of slot descriptions but also how often the slots appear together in conversation. Moreover, Prompter’s gains are due to its improved ability to distinguish ”none”-valued dialogue slots, compared against baselines.</abstract>
       <url hash="6c3bd2bf">2023.acl-long.252</url>
@@ -3662,7 +3662,7 @@
       <author><first>Zhihao</first><last>Fan</last><affiliation>Fudan University</affiliation></author>
       <author><first>Haijun</first><last>Shan</last><affiliation>CEC GienTech Technology Co.,Ltd.</affiliation></author>
       <author><first>Qi</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>4706-4718</pages>
       <abstract>Logical reasoning over incomplete knowledge graphs to answer complex logical queries is a challenging task. With the emergence of new entities and relations in constantly evolving KGs, inductive logical reasoning over KGs has become a crucial problem. However, previous PLMs-based methods struggle to model the logical structures of complex queries, which limits their ability to generalize within the same structure. In this paper, we propose a structure-modeled textual encoding framework for inductive logical reasoning over KGs. It encodes linearized query structures and entities using pre-trained language models to find answers. For structure modeling of complex queries, we design stepwise instructions that implicitly prompt PLMs on the execution order of geometric operations in each query. We further separately model different geometric operations (i.e., projection, intersection, and union) on the representation space using a pre-trained encoder with additional attention and maxout layers to enhance structured modeling. We conduct experiments on two inductive logical reasoning datasets and three transductive datasets. The results demonstrate the effectiveness of our method on logical reasoning over KGs in both inductive and transductive settings.</abstract>
       <url hash="2d5a2b45">2023.acl-long.259</url>
@@ -3699,7 +3699,7 @@
       <author><first>Richard Yuanzhe</first><last>Pang</last><affiliation>New York University</affiliation></author>
       <author><first>Vishakh</first><last>Padmakumar</last><affiliation>New York University</affiliation></author>
       <author><first>Thibault</first><last>Sellam</last><affiliation>Google</affiliation></author>
-      <author><first>Ankur</first><last>Parikh</last><affiliation>Google</affiliation></author>
+      <author id="ankur-parikh"><first>Ankur</first><last>Parikh</last><affiliation>Google</affiliation></author>
       <author><first>He</first><last>He</last><affiliation>New York University</affiliation></author>
       <pages>4746-4763</pages>
       <abstract>To align conditional text generation model outputs with desired behaviors, there has been an increasing focus on training the model using reinforcement learning (RL) with reward functions learned from human annotations. Under this framework, we identify three common cases where high rewards are incorrectly assigned to undesirable patterns: noise-induced spurious correlation, naturally occurring spurious correlation, and covariate shift. We show that even though learned metrics achieve high performance on the distribution of the data used to train the reward function, the undesirable patterns may be amplified during RL training of the text generation model. While there has been discussion about reward gaming in the RL or safety community, in this discussion piece, we would like to highlight reward gaming in the natural language generation (NLG) community using concrete conditional text generation examples and discuss potential fixes and areas for future work.</abstract>
@@ -3728,7 +3728,7 @@
       <author><first>Xingxuan</first><last>Li</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Megh</first><last>Thakkar</last><affiliation>BITS Pilani</affiliation></author>
       <author><first>Xin</first><last>Li</last><affiliation>Alibaba Group</affiliation></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University; Salesforce AI Research</affiliation></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University; Salesforce AI Research</affiliation></author>
       <author><first>Luo</first><last>Si</last><affiliation>Alibaba Group Inc</affiliation></author>
       <author><first>Lidong</first><last>Bing</last><affiliation>Alibaba DAMO Academy</affiliation></author>
       <pages>4799-4816</pages>
@@ -3780,8 +3780,8 @@
     <paper id="268">
       <title>Ethical Considerations for Machine Translation of Indigenous Languages: Giving a Voice to the Speakers</title>
       <author><first>Manuel</first><last>Mager</last><affiliation>Amazon AWS</affiliation></author>
-      <author><first>Elisabeth</first><last>Mager</last><affiliation>Universidad Nacional Autonoma de Mexico</affiliation></author>
-      <author><first>Katharina</first><last>Kann</last><affiliation>University of Colorado Boulder</affiliation></author>
+      <author id="elisabeth-maier"><first>Elisabeth</first><last>Mager</last><affiliation>Universidad Nacional Autonoma de Mexico</affiliation></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last><affiliation>University of Colorado Boulder</affiliation></author>
       <author><first>Ngoc Thang</first><last>Vu</last><affiliation>University of Stuttgart</affiliation></author>
       <pages>4871-4897</pages>
       <abstract>In recent years machine translation has become very successful for high-resource language pairs. This has also sparked new interest in research on the automatic translation of low-resource languages, including Indigenous languages. However, the latter are deeply related to the ethnic and cultural groups that speak (or used to speak) them. The data collection, modeling and deploying machine translation systems thus result in new ethical questions that must be addressed. Motivated by this, we first survey the existing literature on ethical considerations for the documentation, translation, and general natural language processing for Indigenous languages. Afterward, we conduct and analyze an interview study to shed light on the positions of community leaders, teachers, and language activists regarding ethical concerns for the automatic translation of their languages. Our results show that the inclusion, at different degrees, of native speakers and community members is vital to performing better and more ethical research on Indigenous languages.</abstract>
@@ -3819,8 +3819,8 @@
       <title>Privacy-Preserving Domain Adaptation of Semantic Parsers</title>
       <author><first>Fatemehsadat</first><last>Mireshghallah</last><affiliation>UC San Diego</affiliation></author>
       <author><first>Yu</first><last>Su</last><affiliation>The Ohio State University</affiliation></author>
-      <author><first>Tatsunori</first><last>Hashimoto</last><affiliation>Stanford</affiliation></author>
-      <author><first>Jason</first><last>Eisner</last><affiliation>Johns Hopkins University + Microsoft Corporation</affiliation></author>
+      <author id="tatsunori-b-hashimoto"><first>Tatsunori</first><last>Hashimoto</last><affiliation>Stanford</affiliation></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last><affiliation>Johns Hopkins University + Microsoft Corporation</affiliation></author>
       <author><first>Richard</first><last>Shin</last><affiliation>Microsoft Semantic Machines</affiliation></author>
       <pages>4950-4970</pages>
       <abstract>Task-oriented dialogue systems often assist users with personal or confidential matters. For this reason, the developers of such a system are generally prohibited from observing actual usage. So how can they know where the system is failing and needs more training data or new functionality? In this work, we study ways in which realistic user utterances can be generated synthetically, to help increase the linguistic and functional coverage of the system, without compromising the privacy of actual users. To this end, we propose a two-stage Differentially Private (DP) generation method which first generates latent semantic parses, and then generates utterances based on the parses. Our proposed approach improves MAUVE by 2.5X and parse tree function-type overlap by 1.3X relative to current approaches for private synthetic data generation, improving both on fluency and semantic coverage. We further validate our approach on a realistic domain adaptation task of adding new functionality from private user data to a semantic parser, and show overall gains of 8.5% points on its accuracy with the new feature.</abstract>
@@ -3871,7 +3871,7 @@
       <author><first>Yutao</first><last>Zhu</last><affiliation>University of Montreal</affiliation></author>
       <author><first>Yihong</first><last>Wu</last><affiliation>Universite de Montreal</affiliation></author>
       <author><first>Kaiyu</first><last>Huang</last><affiliation>Tsinghua University</affiliation></author>
-      <author><first>Jian-Yun</first><last>Nie</last><affiliation>University of Montreal</affiliation></author>
+      <author id="jian-yun-nie"><first>Jian-Yun</first><last>Nie</last><affiliation>University of Montreal</affiliation></author>
       <pages>4998-5012</pages>
       <abstract>In conversational search, the user’s real search intent for the current conversation turn is dependent on the previous conversation history. It is challenging to determine a good search query from the whole conversation context. To avoid the expensive re-training of the query encoder, most existing methods try to learn a rewriting model to de-contextualize the current query by mimicking the manual query rewriting. However, manually rewritten queries are not always the best search queries. Thus, training a rewriting model on them would lead to sub-optimal queries. Another useful information to enhance the search query is the potential answer to the question. In this paper, we propose ConvGQR, a new framework to reformulate conversational queries based on generative pre-trained language models (PLMs), one for query rewriting and another for generating potential answers. By combining both, ConvGQR can produce better search queries. In addition, to relate query reformulation to the retrieval task, we propose a knowledge infusion mechanism to optimize both query reformulation and retrieval. Extensive experiments on four conversational search datasets demonstrate the effectiveness of ConvGQR.</abstract>
       <url hash="fafe4c95">2023.acl-long.274</url>
@@ -3885,8 +3885,8 @@
       <author><first>Mahdi</first><last>Namazifar</last><affiliation>Amazon Alexa AI</affiliation></author>
       <author><first>Devamanyu</first><last>Hazarika</last><affiliation>Amazon</affiliation></author>
       <author><first>Aishwarya</first><last>Padmakumar</last><affiliation>Amazon</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>Amazon</affiliation></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last><affiliation>Amazon Alexa AI</affiliation></author>
+      <author><first>Yang</first><last>Liu</last><affiliation>Amazon</affiliation></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last><affiliation>Amazon Alexa AI</affiliation></author>
       <pages>5013-5035</pages>
       <abstract>Large pre-trained language models (PLMs) have been shown to retain implicit knowledge within their parameters. To enhance this implicit knowledge, we propose Knowledge Injection into Language Models (KILM), a novel approach that injects entity-related knowledge into encoder-decoder PLMs, via a generative knowledge infilling objective through continued pre-training. This is done without architectural modifications to the PLMs or adding additional parameters. Experimental results over a suite of knowledge-intensive tasks spanning numerous datasets show that KILM enables models to retain more knowledge and hallucinate less while preserving their original performance on general NLU and NLG tasks. KILM also demonstrates improved zero-shot performances on tasks such as entity disambiguation, outperforming state-of-the-art models having 30x more parameters.</abstract>
       <url hash="22e6b918">2023.acl-long.275</url>
@@ -3944,13 +3944,13 @@
       <author><first>Song</first><last>Wang</last><affiliation>Microsoft Azure AI</affiliation></author>
       <author id="yang-liu-edinburgh"><first>Yang</first><last>Liu</last><affiliation>Microsoft</affiliation></author>
       <author><first>Ruochen</first><last>Xu</last><affiliation>Microsoft</affiliation></author>
-      <author><first>Hany</first><last>Hassan</last><affiliation>Microsoft</affiliation></author>
+      <author id="hany-hassan-awadalla"><first>Hany</first><last>Hassan</last><affiliation>Microsoft</affiliation></author>
       <author><first>Yu</first><last>Shi</last><affiliation>Microsoft</affiliation></author>
       <author><first>Chenguang</first><last>Zhu</last><affiliation>Microsoft Cognitive Services Research Group</affiliation></author>
       <author><first>Wayne</first><last>Xiong</last><affiliation>Microsoft Corp.</affiliation></author>
       <author><first>Michael</first><last>Zeng</last><affiliation>Microsoft Corp</affiliation></author>
       <author><first>Jianfeng</first><last>Gao</last><affiliation>Microsoft Research, Redmond</affiliation></author>
-      <author><first>Xuedong</first><last>Huang</last><affiliation>Microsoft Cloud and AI</affiliation></author>
+      <author id="xuedong-huang"><first>Xuedong</first><last>Huang</last><affiliation>Microsoft Cloud and AI</affiliation></author>
       <pages>5095-5112</pages>
       <abstract>This paper presents Z-Code++, a new pre-trained language model optimized for abstractive text summarization. The model extends the state-of-the-art encoder-decoder model using three techniques. First, we use a two-phase pre-training to improve the model’s performance on low-resource summarization tasks. The model is first pre-trained using text corpora for language understanding, then is continually pre-trained on summarization corpora for grounded text generation. Second, we replace self-attention layers in the encoder with disentangled attention layers, where each word is represented using two vectors that encode its content and position, respectively. Third, we use fusion-in-encoder, a simple yet effective method of encoding long sequences in a hierarchical manner. Z-Code++ createsa new state-of-the-art on 9 of 13 text summarization tasks across 5 languages. Our model is parameter-efficient in that it outperforms the 600x larger PaLM540B on XSum, and the finetuned 200x larger GPT3175B on SAMSum. In zero-shot and few-shot settings, our model substantially outperforms the competing models.</abstract>
       <url hash="1ca14579">2023.acl-long.279</url>
@@ -3991,7 +3991,7 @@
     <paper id="282">
       <title>Randomized Smoothing with Masked Inference for Adversarially Robust Text Classifications</title>
       <author><first>Han Cheol</first><last>Moon</last><affiliation>Nanyang Technological University</affiliation></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University; Salesforce AI Research</affiliation></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University; Salesforce AI Research</affiliation></author>
       <author><first>Ruochen</first><last>Zhao</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Megh</first><last>Thakkar</last><affiliation>BITS Pilani</affiliation></author>
       <author><first>Chi</first><last>Xu</last><affiliation>National University of Defense Technology</affiliation></author>
@@ -4046,7 +4046,7 @@
       <title><fixed-case>M</fixed-case>eta<fixed-case>A</fixed-case>dapt: Domain Adaptive Few-Shot Misinformation Detection via Meta Learning</title>
       <author><first>Zhenrui</first><last>Yue</last><affiliation>University of Illinois Urbana-Champaign</affiliation></author>
       <author><first>Huimin</first><last>Zeng</last><affiliation>University of Illinois Urbana-Champaign</affiliation></author>
-      <author id="yang-zhang"><first>Yang</first><last>Zhang</last><affiliation>University of Illinois Urbana-Champaign</affiliation></author>
+      <author><first>Yang</first><last>Zhang</last><affiliation>University of Illinois Urbana-Champaign</affiliation></author>
       <author><first>Lanyu</first><last>Shang</last><affiliation>University of Illinois at Urbana Champaign</affiliation></author>
       <author><first>Dong</first><last>Wang</last><affiliation>University of Illinois at Urbana Champaign</affiliation></author>
       <pages>5223-5239</pages>
@@ -4182,7 +4182,7 @@
       <author><first>Matthieu</first><last>Futeral</last><affiliation>Inria; Departement d’informatique de l’ENS, CNRS, PSL Research University</affiliation></author>
       <author><first>Cordelia</first><last>Schmid</last><affiliation>Inria</affiliation></author>
       <author><first>Ivan</first><last>Laptev</last><affiliation>INRIA</affiliation></author>
-      <author><first>Benoît</first><last>Sagot</last><affiliation>Inria</affiliation></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last><affiliation>Inria</affiliation></author>
       <author><first>Rachel</first><last>Bawden</last><affiliation>Inria</affiliation></author>
       <pages>5394-5413</pages>
       <abstract>One of the major challenges of machine translation (MT) is ambiguity, which can in some cases be resolved by accompanying context such as images. However, recent work in multimodal MT (MMT) has shown that obtaining improvements from images is challenging, limited not only by the difficulty of building effective cross-modal representations, but also by the lack of specific evaluation and training data. We present a new MMT approach based on a strong text-only MT model, which uses neural adapters, a novel guided self-attention mechanism and which is jointly trained on both visually-conditioned masking and MMT. We also introduce CoMMuTE, a Contrastive Multilingual Multimodal Translation Evaluation set of ambiguous sentences and their possible translations, accompanied by disambiguating images corresponding to each translation. Our approach obtains competitive results compared to strong text-only models on standard English→French, English→German and English→Czech benchmarks and outperforms baselines and state-of-the-art MMT systems by a large margin on our contrastive test set. Our code and CoMMuTE are freely available.</abstract>
@@ -4208,7 +4208,7 @@
       <author><first>Tao</first><last>Wang</last><affiliation>ByteDance AI Lab</affiliation></author>
       <author><first>Chengqi</first><last>Zhao</last><affiliation>ByteDance Inc.</affiliation></author>
       <author><first>Shujian</first><last>Huang</last><affiliation>National Key Laboratory for Novel Software Technology, Nanjing University</affiliation></author>
-      <author><first>Jiajun</first><last>Chen</last><affiliation>Nanjing University</affiliation></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last><affiliation>Nanjing University</affiliation></author>
       <author><first>Mingxuan</first><last>Wang</last><affiliation>Bytedance AI Lab</affiliation></author>
       <pages>5428-5443</pages>
       <abstract>Automatic metrics play a crucial role in machine translation. Despite the widespread use of n-gram-based metrics, there has been a recent surge in the development of pre-trained model-based metrics that focus on measuring sentence semantics. However, these neural metrics, while achieving higher correlations with human evaluations, are often considered to be black boxes with potential biases that are difficult to detect. In this study, we systematically analyze and compare various mainstream and cutting-edge automatic metrics from the perspective of their guidance for training machine translation systems. Through Minimum Risk Training (MRT), we find that certain metrics exhibit robustness defects, such as the presence of universal adversarial translations in BLEURT and BARTScore. In-depth analysis suggests two main causes of these robustness deficits: distribution biases in the training datasets, and the tendency of the metric paradigm. By incorporating token-level constraints, we enhance the robustness of evaluation metrics, which in turn leads to an improvement in the performance of machine translation systems. Codes are available at <url>https://github.com/powerpuffpomelo/fairseq_mrt</url>.</abstract>
@@ -4266,7 +4266,7 @@
       <author><first>Javier</first><last>Ferrando</last><affiliation>UPC</affiliation></author>
       <author><first>Gerard I.</first><last>Gállego</last><affiliation>Universitat Politècnica de Catalunya</affiliation></author>
       <author><first>Ioannis</first><last>Tsiamas</last><affiliation>Polytechnic University of Catalonia (UPC)</affiliation></author>
-      <author><first>Marta R.</first><last>Costa-jussà</last><affiliation>Meta AI</affiliation></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last><affiliation>Meta AI</affiliation></author>
       <pages>5486-5513</pages>
       <abstract>Language Generation Models produce words based on the previous context. Although existing methods offer input attributions as explanations for a model’s prediction, it is still unclear how prior words affect the model’s decision throughout the layers. In this work, we leverage recent advances in explainability of the Transformer and present a procedure to analyze models for language generation. Using contrastive examples, we compare the alignment of our explanations with evidence of the linguistic phenomena, and show that our method consistently aligns better than gradient-based and perturbation-based baselines. Then, we investigate the role of MLPs inside the Transformer and show that they learn features that help the model predict words that are grammatically acceptable. Lastly, we apply our method to Neural Machine Translation models, and demonstrate that they generate human-like source-target alignments for building predictions.</abstract>
       <url hash="1ba58b1e">2023.acl-long.301</url>
@@ -4294,7 +4294,7 @@
       <author><first>Sha</first><last>Li</last><affiliation>University of Illinois Urbana-Champaign</affiliation></author>
       <author><first>Manling</first><last>Li</last><affiliation>UIUC</affiliation></author>
       <author><first>Xudong</first><last>Lin</last><affiliation>Columbia University</affiliation></author>
-      <author><first>Shih-Fu</first><last>Chang</last><affiliation>Columbia University</affiliation></author>
+      <author id="shih-fu-chang"><first>Shih-Fu</first><last>Chang</last><affiliation>Columbia University</affiliation></author>
       <author><first>Mohit</first><last>Bansal</last><affiliation>University of North Carolina at Chapel Hill</affiliation></author>
       <author><first>Heng</first><last>Ji</last><affiliation>University of Illinois at Urbana-Champaign and Amazon (Amazon Scholar)</affiliation></author>
       <pages>5529-5545</pages>
@@ -4398,7 +4398,7 @@
       <author><first>Karun</first><last>Kumar</last><affiliation>Comcast</affiliation></author>
       <author><first>Pontus</first><last>Stenetorp</last><affiliation>University College London</affiliation></author>
       <author><first>Jimmy</first><last>Lin</last><affiliation>University of Waterloo</affiliation></author>
-      <author><first>Ferhan</first><last>Ture</last><affiliation>Comcast Applied AI Research</affiliation></author>
+      <author id="ferhan-ture"><first>Ferhan</first><last>Ture</last><affiliation>Comcast Applied AI Research</affiliation></author>
       <pages>5644-5659</pages>
       <abstract>Diffusion models are a milestone in text-to-image generation, but they remain poorly understood, lacking interpretability analyses. In this paper, we perform a text-image attribution analysis on Stable Diffusion, a recently open-sourced model. To produce attribution maps, we upscale and aggregate cross-attention maps in the denoising module, naming our method DAAM. We validate it by testing its segmentation ability on nouns, as well as its generalized attribution quality on all parts of speech, rated by humans. On two generated datasets, we attain a competitive 58.8-64.8 mIoU on noun segmentation and fair to good mean opinion scores (3.4-4.2) on generalized attribution. Then, we apply DAAM to study the role of syntax in the pixel space across head–dependent heat map interaction patterns for ten common dependency relations. We show that, for some relations, the head map consistently subsumes the dependent, while the opposite is true for others. Finally, we study several semantic phenomena, focusing on feature entanglement; we find that the presence of cohyponyms worsens generation quality by 9%, and descriptive adjectives attend too broadly. We are the first to interpret large diffusion models from a visuolinguistic perspective, which enables future research. Our code is at <url>https://github.com/castorini/daam</url>.</abstract>
       <url hash="09284af8">2023.acl-long.310</url>
@@ -4474,7 +4474,7 @@
     <paper id="316">
       <title>Unsupervised Discontinuous Constituency Parsing with Mildly Context-Sensitive Grammars</title>
       <author><first>Songlin</first><last>Yang</last><affiliation>ShanghaiTech University</affiliation></author>
-      <author><first>Roger</first><last>Levy</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <author><first>Yoon</first><last>Kim</last><affiliation>MIT</affiliation></author>
       <pages>5747-5766</pages>
       <abstract>We study grammar induction with mildly context-sensitive grammars for unsupervised discontinuous parsing. Using the probabilistic linear context-free rewriting system (LCFRS) formalism, our approach fixes the rule structure in advance and focuses on parameter learning with maximum likelihood. To reduce the computational complexity of both parsing and parameter estimation, we restrict the grammar formalism to LCFRS-2 (i.e., binary LCFRS with fan-out two) and further discard rules that require <tex-math>O(l^6)</tex-math> time to parse, reducing inference to <tex-math>O(l^5)</tex-math>. We find that using a large number of nonterminals is beneficial and thus make use of tensor decomposition-based rank-space dynamic programming with an embedding-based parameterization of rule probabilities to scale up the number of nonterminals. Experiments on German and Dutch show that our approach is able to induce linguistically meaningful trees with continuous and discontinuous structures.</abstract>
@@ -4488,7 +4488,7 @@
       <author><first>Satwik</first><last>Bhattamishra</last><affiliation>University of Oxford</affiliation></author>
       <author><first>Arkil</first><last>Patel</last><affiliation>Mila and McGill University</affiliation></author>
       <author><first>Varun</first><last>Kanade</last><affiliation>University of Oxford</affiliation></author>
-      <author><first>Phil</first><last>Blunsom</last><affiliation>University of Oxford</affiliation></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last><affiliation>University of Oxford</affiliation></author>
       <pages>5767-5791</pages>
       <abstract>Despite the widespread success of Transformers on NLP tasks, recent works have found that they struggle to model several formal languages when compared to recurrent models. This raises the question of why Transformers perform well in practice and whether they have any properties that enable them to generalize better than recurrent models. In this work, we conduct an extensive empirical study on Boolean functions to demonstrate the following: (i) Random Transformers are relatively more biased towards functions of low sensitivity. (ii) When trained on Boolean functions, both Transformers and LSTMs prioritize learning functions of low sensitivity, with Transformers ultimately converging to functions of lower sensitivity. (iii) On sparse Boolean functions which have low sensitivity, we find that Transformers generalize near perfectly even in the presence of noisy labels whereas LSTMs overfit and achieve poor generalization accuracy. Overall, our results provide strong quantifiable evidence that suggests differences in the inductive biases of Transformers and recurrent models which may help explain Transformer’s effective generalization performance despite relatively limited expressiveness.</abstract>
       <url hash="f1c3f9b8">2023.acl-long.317</url>
@@ -4530,7 +4530,7 @@
       <title>Verify-and-Edit: A Knowledge-Enhanced Chain-of-Thought Framework</title>
       <author><first>Ruochen</first><last>Zhao</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Xingxuan</first><last>Li</last><affiliation>Nanyang Technological University</affiliation></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University; Salesforce AI Research</affiliation></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University; Salesforce AI Research</affiliation></author>
       <author><first>Chengwei</first><last>Qin</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Lidong</first><last>Bing</last><affiliation>Alibaba DAMO Academy</affiliation></author>
       <pages>5823-5840</pages>
@@ -4562,7 +4562,7 @@
       <title>Node Placement in Argument Maps: Modeling Unidirectional Relations in High &amp; Low-Resource Scenarios</title>
       <author><first>Iman</first><last>Jundi</last><affiliation>University of Stuttgart</affiliation></author>
       <author><first>Neele</first><last>Falk</last><affiliation>University of Stuttgart</affiliation></author>
-      <author><first>Eva Maria</first><last>Vecchi</last><affiliation>Universitat Stuttgart, Institut fur Maschinelle Sprachverarbeitung</affiliation></author>
+      <author id="eva-maria-vecchi"><first>Eva Maria</first><last>Vecchi</last><affiliation>Universitat Stuttgart, Institut fur Maschinelle Sprachverarbeitung</affiliation></author>
       <author><first>Gabriella</first><last>Lapesa</last><affiliation>Universität Stuttgart, Institut für Maschinelle Sprachverarbeitung</affiliation></author>
       <pages>5854-5876</pages>
       <abstract>Argument maps structure discourse into nodes in a tree with each node being an argument that supports or opposes its parent argument. This format is more comprehensible and less redundant compared to an unstructured one. Exploring those maps and maintaining their structure by placing new arguments under suitable parents is more challenging for users with huge maps that are typical in online discussions. To support those users, we introduce the task of node placement: suggesting candidate nodes as parents for a new contribution. We establish an upper-bound of human performance, and conduct experiments with models of various sizes and training strategies. We experiment with a selection of maps from Kialo, drawn from a heterogeneous set of domains. Based on an annotation study, we highlight the ambiguity of the task that makes it challenging for both humans and models. We examine the unidirectional relation between tree nodes and show that encoding a node into different embeddings for each of the parent and child cases improves performance. We further show the few-shot effectiveness of our approach.</abstract>
@@ -4602,7 +4602,7 @@
       <title>Connective Prediction for Implicit Discourse Relation Recognition via Knowledge Distillation</title>
       <author><first>Hongyi</first><last>Wu</last><affiliation>East China Normal University</affiliation></author>
       <author><first>Hao</first><last>Zhou</last><affiliation>East China Normal University</affiliation></author>
-      <author><first>Man</first><last>Lan</last><affiliation>East China Normal University</affiliation></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last><affiliation>East China Normal University</affiliation></author>
       <author><first>Yuanbin</first><last>Wu</last><affiliation>East China Normal University</affiliation></author>
       <author><first>Yadong</first><last>Zhang</last><affiliation>East China Normal University</affiliation></author>
       <pages>5908-5923</pages>
@@ -4628,7 +4628,7 @@
       <author><first>Zhihao</first><last>Fan</last><affiliation>Fudan University</affiliation></author>
       <author><first>Jingjing</first><last>Chen</last><affiliation>Fudan University</affiliation></author>
       <author><first>Qi</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Zhongyu</first><last>Wei</last><affiliation>School of Data Science, Fudan University</affiliation></author>
       <pages>5939-5958</pages>
       <abstract>Multilingual Vision-Language Pre-training (VLP) is a promising but challenging topic due to the lack of large-scale multilingual image-text pairs. Existing works address the problem by translating English data into other languages, which is intuitive and the generated data is usually limited in form and scale. In this paper, we explore a more practical and scalable setting: weakly supervised multilingual VLP with only English image-text pairs and multilingual text corpora. We argue that the universal multilingual representation learned from texts allows the cross-modal interaction learned in English to be transferable to other languages. To this end, we propose a framework to effectively unify cross-lingual and cross-modal pre-training. For unified modeling on different data, we design an architecture with flexible modules to learn different interactions. Moreover, two unified tasks are introduced to efficiently guide the unified cross-lingual cross-modal learning. Extensive experiments demonstrate that our pre-trained model learns universal multilingual multimodal representations, allowing effective cross-lingual transfer on multimodal tasks. Code and models are available at <url>https://github.com/FudanDISC/weakly-supervised-mVLP</url>.</abstract>
@@ -4661,7 +4661,7 @@
       <author><first>Qian</first><last>Liu</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Meishan</first><last>Zhang</last><affiliation>Harbin Institute of Technology (Shenzhen), China</affiliation></author>
       <author><first>Min</first><last>Zhang</last><affiliation>Harbin Institute of Technology (Shenzhen)</affiliation></author>
-      <author><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
       <pages>5980-5994</pages>
       <abstract>In this work, we investigate a more realistic unsupervised multimodal machine translation (UMMT) setup, inference-time image-free UMMT, where the model is trained with source-text image pairs, and tested with only source-text inputs. First, we represent the input images and texts with the visual and language scene graphs (SG), where such fine-grained vision-language features ensure a holistic understanding of the semantics. To enable pure-text input during inference, we devise a visual scene hallucination mechanism that dynamically generates pseudo visual SG from the given textual SG. Several SG-pivoting based learning objectives are introduced for unsupervised translation training. On the benchmark Multi30K data, our SG-based method outperforms the best-performing baseline by significant BLEU scores on the task and setup, helping yield translations with better completeness, relevance and fluency without relying on paired images. Further in-depth analyses reveal how our model advances in the task setting.</abstract>
       <url hash="b848b3a8">2023.acl-long.329</url>
@@ -4674,9 +4674,9 @@
       <author><first>Tingting</first><last>Ma</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <author><first>Qianhui</first><last>Wu</last><affiliation>Microsoft Corporation</affiliation></author>
       <author><first>Huiqiang</first><last>Jiang</last><affiliation>Microsoft Research Asia</affiliation></author>
-      <author><first>Börje F.</first><last>Karlsson</last><affiliation>Beijing Academy of Artificial Intelligence (BAAI)</affiliation></author>
-      <author><first>Tiejun</first><last>Zhao</last><affiliation>Harbin Institute of Technology</affiliation></author>
-      <author><first>Chin-Yew</first><last>Lin</last><affiliation>Microsoft Research</affiliation></author>
+      <author id="borje-f-karlsson"><first>Börje F.</first><last>Karlsson</last><affiliation>Beijing Academy of Artificial Intelligence (BAAI)</affiliation></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last><affiliation>Harbin Institute of Technology</affiliation></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last><affiliation>Microsoft Research</affiliation></author>
       <pages>5995-6009</pages>
       <abstract>Cross-lingual named entity recognition (NER) aims to train an NER system that generalizes well to a target language by leveraging labeled data in a given source language. Previous work alleviates the data scarcity problem by translating source-language labeled data or performing knowledge distillation on target-language unlabeled data. However, these methods may suffer from label noise due to the automatic labeling process. In this paper, we propose CoLaDa, a Collaborative Label Denoising Framework, to address this problem. Specifically, we first explore a model-collaboration-based denoising scheme that enables models trained on different data sources to collaboratively denoise pseudo labels used by each other. We then present an instance-collaboration-based strategy that considers the label consistency of each token’s neighborhood in the representation space for denoising. Experiments on different benchmark datasets show that the proposed CoLaDa achieves superior results compared to previous methods, especially when generalizing to distant languages.</abstract>
       <url hash="2ca50221">2023.acl-long.330</url>
@@ -4689,7 +4689,7 @@
       <author><first>Jiao</first><last>Sun</last><affiliation>University of Southern California</affiliation></author>
       <author><first>Thibault</first><last>Sellam</last><affiliation>Google</affiliation></author>
       <author><first>Elizabeth</first><last>Clark</last><affiliation>Google Research</affiliation></author>
-      <author><first>Tu</first><last>Vu</last><affiliation>University of Massachusetts Amherst</affiliation></author>
+      <author id="tu-vu"><first>Tu</first><last>Vu</last><affiliation>University of Massachusetts Amherst</affiliation></author>
       <author><first>Timothy</first><last>Dozat</last><affiliation>Google</affiliation></author>
       <author><first>Dan</first><last>Garrette</last><affiliation>Google Research</affiliation></author>
       <author><first>Aditya</first><last>Siddhant</last><affiliation>Google</affiliation></author>
@@ -4723,7 +4723,7 @@
       <author><first>Aaron</first><last>Mueller</last><affiliation>The Johns Hopkins University</affiliation></author>
       <author><first>Kanishka</first><last>Misra</last><affiliation>Purdue University</affiliation></author>
       <author><first>Keren</first><last>Fuentes</last><affiliation>Meta</affiliation></author>
-      <author><first>Roger</first><last>Levy</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <author><first>Adina</first><last>Williams</last><affiliation>Meta Platforms, Inc.</affiliation></author>
       <pages>6043-6063</pages>
       <abstract>Targeted syntactic evaluations of language models ask whether models show stable preferences for syntactically acceptable content over minimal-pair unacceptable inputs. Our best syntactic evaluation datasets, however, provide substantially less linguistic context than models receive during pretraining. This mismatch raises an important question: how robust are models’ syntactic judgements across different contexts? In this paper, we vary the input contexts based on: length, the types of syntactic phenomena it contains, and whether or not there are grammatical violations. We find that model judgements are generally robust when placed in randomly sampled linguistic contexts, but are unstable when contexts match the test stimuli in syntactic structure. Among all tested models (GPT-2 and five variants of OPT), we find that model performance is affected when we provided contexts with matching syntactic structure: performance significantly improves when contexts are acceptable, and it significantly declines when they are unacceptable. This effect is amplified by the length of the context, except for unrelated inputs. We show that these changes in model performance are not explainable by acceptability-preserving syntactic perturbations. This sensitivity to highly specific syntactic features of the context can only be explained by the models’ implicit in-context learning abilities.</abstract>
@@ -4742,7 +4742,7 @@
       <author><first>Wenlin</first><last>Zhang</last><affiliation>Zhejiang University</affiliation></author>
       <author><first>Xiangru</first><last>Tang</last><affiliation>Yale University</affiliation></author>
       <author><first>Boyu</first><last>Mi</last><affiliation>Zhejiang University</affiliation></author>
-      <author><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
       <pages>6064-6081</pages>
       <abstract>Despite significant progress having been made in question answering on tabular data (Table QA), it’s unclear whether, and to what extent existing Table QA models are robust to task-specific perturbations, e.g., replacing key question entities or shuffling table columns. To systematically study the robustness of Table QA models, we propose a benchmark called RobuT, which builds upon existing Table QA datasets (WTQ, WikiSQL-Weak, and SQA) and includes human-annotated adversarial perturbations in terms of table header, table content, and question. Our results indicate that both state-of-the-art Table QA models and large language models (e.g., GPT-3) with few-shot learning falter in these adversarial sets. We propose to address this problem by using large language models to generate adversarial examples to enhance training, which significantly improves the robustness of Table QA models.</abstract>
       <url hash="248c07b6">2023.acl-long.334</url>
@@ -4766,7 +4766,7 @@
     <paper id="336">
       <title><fixed-case>TOME</fixed-case>: A Two-stage Approach for Model-based Retrieval</title>
       <author><first>Ruiyang</first><last>Ren</last><affiliation>Renmin University of China</affiliation></author>
-      <author><first>Wayne Xin</first><last>Zhao</last><affiliation>RUC</affiliation></author>
+      <author id="wayne-xin-zhao"><first>Wayne Xin</first><last>Zhao</last><affiliation>RUC</affiliation></author>
       <author><first>Jing</first><last>Liu</last><affiliation>Baidu Inc.</affiliation></author>
       <author><first>Hua</first><last>Wu</last><affiliation>Baidu</affiliation></author>
       <author><first>Ji-Rong</first><last>Wen</last><affiliation>Renmin University of China</affiliation></author>
@@ -4796,7 +4796,7 @@
       <author><first>Moritz</first><last>Plenz</last><affiliation>Heidelberg University</affiliation></author>
       <author><first>Juri</first><last>Opitz</last><affiliation>Heidelberg University</affiliation></author>
       <author><first>Philipp</first><last>Heinisch</last><affiliation>Bielefeld University</affiliation></author>
-      <author><first>Philipp</first><last>Cimiano</last><affiliation>Univ. Bielefeld</affiliation></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last><affiliation>Univ. Bielefeld</affiliation></author>
       <author><first>Anette</first><last>Frank</last><affiliation>Heidelberg University</affiliation></author>
       <pages>6130-6158</pages>
       <abstract>Arguments often do not make explicit how a conclusion follows from its premises. To compensate for this lack, we enrich arguments with structured background knowledge to support knowledge-intense argumentation tasks. We present a new unsupervised method for constructing Contextualized Commonsense Knowledge Graphs (CCKGs) that selects contextually relevant knowledge from large knowledge graphs (KGs) efficiently and at high quality. Our work goes beyond context-insensitive knowledge extraction heuristics by computing semantic similarity between KG triplets and textual arguments. Using these triplet similarities as weights, we extract contextualized knowledge paths that connect a conclusion to its premise, while maximizing similarity to the argument. We combine multiple paths into a CCKG that we optionally prune to reduce noise and raise precision. Intrinsic evaluation of the quality of our graphs shows that our method is effective for (re)constructing human explanation graphs. Manual evaluations in a large-scale knowledge selection setup verify high recall and precision of implicit CSK in the CCKGs. Finally, we demonstrate the effectiveness of CCKGs in a knowledge-insensitive argument quality rating task, outperforming strong baselines and rivaling a GPT-3 based system.</abstract>
@@ -4833,7 +4833,7 @@
       <author><first>Janvijay</first><last>Singh</last><affiliation>Georgia Institute of Technology</affiliation></author>
       <author><first>Mukund</first><last>Rungta</last><affiliation>Georgia Institute of Technology</affiliation></author>
       <author><first>Diyi</first><last>Yang</last><affiliation>Stanford University</affiliation></author>
-      <author><first>Saif</first><last>Mohammad</last><affiliation>National Research Council Canada</affiliation></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last><affiliation>National Research Council Canada</affiliation></author>
       <pages>6192-6208</pages>
       <abstract>Citing papers is the primary method through which modern scientific writing discusses and builds on past work. Collectively, citing a diverse set of papers (in time and area of study) is an indicator of how widely the community is reading. Yet, there is little work looking at broad temporal patterns of citation. This work systematically and empirically examines: How far back in time do we tend to go to cite papers? How has that changed over time, and what factors correlate with this citational attention/amnesia? We chose NLP as our domain of interest and analyzed approximately 71.5K papers to show and quantify several key trends in citation. Notably, around 62% of cited papers are from the immediate five years prior to publication, whereas only about 17% are more than ten years old. Furthermore, we show that the median age and age diversity of cited papers were steadily increasing from 1990 to 2014, but since then, the trend has reversed, and current NLP papers have an all-time low temporal citation diversity. Finally, we show that unlike the 1990s, the highly cited papers in the last decade were also papers with the least citation diversity, likely contributing to the intense (and arguably harmful) recency focus. Code, data, and a demo are available on the project homepage.</abstract>
       <url hash="342d65cb">2023.acl-long.341</url>
@@ -4860,11 +4860,11 @@
       <author><first>Aubrie</first><last>Amstutz</last><affiliation>TikTok</affiliation></author>
       <author><first>Chad</first><last>Atalla</last><affiliation>Microsoft</affiliation></author>
       <author><first>Su Lin</first><last>Blodgett</last><affiliation>Microsoft Research</affiliation></author>
-      <author><first>Hal</first><last>Daumé III</last><affiliation>UMD</affiliation></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last><affiliation>UMD</affiliation></author>
       <author><first>Alexandra</first><last>Olteanu</last><affiliation>Microsoft Research</affiliation></author>
       <author><first>Emily</first><last>Sheng</last><affiliation>Microsoft Research</affiliation></author>
       <author><first>Dan</first><last>Vann</last><affiliation>Microsoft</affiliation></author>
-      <author><first>Hanna</first><last>Wallach</last><affiliation>Microsoft Research</affiliation></author>
+      <author id="hanna-wallach"><first>Hanna</first><last>Wallach</last><affiliation>Microsoft Research</affiliation></author>
       <pages>6231-6251</pages>
       <abstract>It is critical to measure and mitigate fairness-related harms caused by AI text generation systems, including stereotyping and demeaning harms. To that end, we introduce FairPrism, a dataset of 5,000 examples of AI-generated English text with detailed human annotations covering a diverse set of harms relating to gender and sexuality. FairPrism aims to address several limitations of existing datasets for measuring and mitigating fairness-related harms, including improved transparency, clearer specification of dataset coverage, and accounting for annotator disagreement and harms that are context-dependent. FairPrism’s annotations include the extent of stereotyping and demeaning harms, the demographic groups targeted, and appropriateness for different applications. The annotations also include specific harms that occur in interactive contexts and harms that raise normative concerns when the “speaker” is an AI system. Due to its precision and granularity, FairPrism can be used to diagnose (1) the types of fairness-related harms that AI text generation systems cause, and (2) the potential limitations of mitigation methods, both of which we illustrate through case studies. Finally, the process we followed to develop FairPrism offers a recipe for building improved datasets for measuring and mitigating harms caused by AI systems.</abstract>
       <url hash="0a395f09">2023.acl-long.343</url>
@@ -4953,7 +4953,7 @@
       <author><first>Vaishali</first><last>Pal</last><affiliation>University of Amsterdam</affiliation></author>
       <author><first>Andrew</first><last>Yates</last><affiliation>University of Amsterdam</affiliation></author>
       <author><first>Evangelos</first><last>Kanoulas</last><affiliation>University of Amsterdam</affiliation></author>
-      <author><first>Maarten</first><last>de Rijke</last><affiliation>University of Amsterdam</affiliation></author>
+      <author id="maarten-de-rijke"><first>Maarten</first><last>de Rijke</last><affiliation>University of Amsterdam</affiliation></author>
       <pages>6322-6334</pages>
       <abstract>Recent advances in tabular question answering (QA) with large language models are constrained in their coverage and only answer questions over a single table. However, real-world queries are complex in nature, often over multiple tables in a relational database or web page. Single table questions do not involve common table operations such as set operations, Cartesian products (joins), or nested queries. Furthermore, multi-table operations often result in a tabular output, which necessitates table generation capabilities of tabular QA models. To fill this gap, we propose a new task of answering questions over multiple tables. Our model, MultiTabQA, not only answers questions over multiple tables, but also generalizes to generate tabular answers. To enable effective training, we build a pre-training dataset comprising of 132,645 SQL queries and tabular answers. Further, we evaluate the generated tables by introducing table-specific metrics of varying strictness assessing various levels of granularity of the table structure. MultiTabQA outperforms state-of-the-art single table QA models adapted to a multi-table QA setting by finetuning on three datasets: Spider, Atis and GeoQuery.</abstract>
       <url hash="6490bafa">2023.acl-long.348</url>
@@ -4985,7 +4985,7 @@
       <title><fixed-case>C</fixed-case>o<fixed-case>AD</fixed-case>: Automatic Diagnosis through Symptom and Disease Collaborative Generation</title>
       <author><first>Huimin</first><last>Wang</last><affiliation>Tencent</affiliation></author>
       <author><first>Wai Chung</first><last>Kwan</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
-      <author><first>Kam-Fai</first><last>Wong</last><affiliation>Department of Systems Engineering and Engineering Management, The Chinese University of Hong Kong, Hong Kong</affiliation></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last><affiliation>Department of Systems Engineering and Engineering Management, The Chinese University of Hong Kong, Hong Kong</affiliation></author>
       <author><first>Yefeng</first><last>Zheng</last><affiliation>Tencent</affiliation></author>
       <pages>6348-6361</pages>
       <abstract>Automatic diagnosis (AD), a critical application of AI in healthcare, employs machine learning techniques to assist doctors in gathering patient symptom information for precise disease diagnosis. The Transformer-based method utilizes an input symptom sequence, predicts itself through auto-regression, and employs the hidden state of the final symptom to determine the disease. Despite its simplicity and superior performance demonstrated, a decline in disease diagnosis accuracy is observed caused by 1) a mismatch between symptoms observed during training and generation, and 2) the effect of different symptom orders on disease prediction. To address the above obstacles, we introduce the CoAD, a novel disease and symptom collaborative generation framework, which incorporates several key innovations to improve AD: 1) aligning sentence-level disease labels with multiple possible symptom inquiry steps to bridge the gap between training and generation; 2) expanding symptom labels for each sub-sequence of symptoms to enhance annotation and eliminate the effect of symptom order; 3) developing a repeated symptom input schema to effectively and efficiently learn the expanded disease and symptom labels. We evaluate the CoAD framework using four datasets, including three public and one private, and demonstrate that it achieves an average 2.3% improvement over previous state-of-the-art results in automatic disease diagnosis. For reproducibility, we release the code and data at <url>https://github.com/KwanWaiChung/coad</url>.</abstract>
@@ -5056,7 +5056,7 @@
       <title><fixed-case>C</fixed-case>ontra<fixed-case>CLM</fixed-case>: Contrastive Learning For Causal Language Model</title>
       <author><first>Nihal</first><last>Jain</last><affiliation>AWS AI Labs</affiliation></author>
       <author><first>Dejiao</first><last>Zhang</last><affiliation>AWS AI Labs</affiliation></author>
-      <author><first>Wasi Uddin</first><last>Ahmad</last><affiliation>AWS AI Labs</affiliation></author>
+      <author id="wasi-ahmad"><first>Wasi Uddin</first><last>Ahmad</last><affiliation>AWS AI Labs</affiliation></author>
       <author><first>Zijian</first><last>Wang</last><affiliation>AWS AI Labs</affiliation></author>
       <author><first>Feng</first><last>Nan</last><affiliation>AWS AI</affiliation></author>
       <author><first>Xiaopeng</first><last>Li</last><affiliation>AWS AI Labs</affiliation></author>
@@ -5149,7 +5149,7 @@
       <author><first>Mehran</first><last>Kazemi</last><affiliation>Google Research</affiliation></author>
       <author><first>Najoung</first><last>Kim</last><affiliation>Boston University</affiliation></author>
       <author><first>Deepti</first><last>Bhatia</last><affiliation>Google</affiliation></author>
-      <author id="xin-xu"><first>Xin</first><last>Xu</last><affiliation>Google</affiliation></author>
+      <author><first>Xin</first><last>Xu</last><affiliation>Google</affiliation></author>
       <author><first>Deepak</first><last>Ramachandran</last><affiliation>Google Research</affiliation></author>
       <pages>6547-6568</pages>
       <abstract>Remarkable progress has been made on automated reasoning with natural text, by using Large Language Models (LLMs) and methods such as Chain-of-Thought prompting and Selection-Inference. These techniques search for proofs in the forward direction from axioms to the conclusion, which suffers from a combinatorial explosion of the search space, and thus high failure rates for problems requiring longer chains of reasoning. The classical automated reasoning literature has shown that reasoning in the backward direction (i.e. from intended conclusion to supporting axioms) is significantly more efficient at proof-finding. Importing this intuition into the LM setting, we develop a Backward Chaining algorithm, called LAMBADA, that decomposes reasoning into four sub-modules, that are simply implemented by few-shot prompted LLM inference. We show that LAMBADA achieves sizable accuracy boosts over state-of-the-art forward reasoning methods on two challenging logical reasoning datasets, particularly when deep and accurate proof chains are required.</abstract>
@@ -5201,7 +5201,7 @@
       <author><first>Yasheng</first><last>Wang</last><affiliation>Huawei Noah’s Ark Lab</affiliation></author>
       <author><first>Yitong</first><last>Li</last><affiliation>Huawei Technology Co. ltd</affiliation></author>
       <author><first>Lifeng</first><last>Shang</last><affiliation>Noah’s Ark Lab Huawei Technologies Co. Ltd. Sha Tin, Hong Kong</affiliation></author>
-      <author><first>Kam-Fai</first><last>Wong</last><affiliation>Department of Systems Engineering and Engineering Management, The Chinese University of Hong Kong, Hong Kong</affiliation></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last><affiliation>Department of Systems Engineering and Engineering Management, The Chinese University of Hong Kong, Hong Kong</affiliation></author>
       <author><first>Ruifeng</first><last>Xu</last><affiliation>Harbin Institute of Technology, Shenzhen</affiliation></author>
       <pages>6608-6619</pages>
       <abstract>Dialogue models are often enriched with extensive external knowledge to provide informative responses through a retrieval-augmented pipeline. Nevertheless, retrieval-augmented approaches rely on finely annotated retrieval training data and knowledge-grounded response generation data, making it costly to transfer. To tackle this challenge, this paper proposed a retrieval-free approach, KiDG, by automatically turning knowledge documents into simulated multi-turn dialogues through a Multi-Document Traversal algorithm. The simulated knowledge-intensive dialogues constructed by KiDG in one domain can be easily used to train and enhance pre-trained dialogue models’ knowledge w.r.t. this domain without costly annotation. We conduct extensive experiments comparing retrieval-augmented models and a variety of retrieval-free models. We found that dialogue models enhanced with data simulated with KiDG largely outperform state-of-the-art retrieval-free methods, and it achieves comparable performance compared to retrieval-augmented methods while being better, and cheaper at domain transfer.</abstract>
@@ -5217,7 +5217,7 @@
       <author><first>Shicheng</first><last>Xu</last><affiliation>Institute of Computing Technology, Chinese Academy of Sciences</affiliation></author>
       <author><first>Liang</first><last>Pang</last><affiliation>Institute of Computing Technology of Chinese Academy of Sciences</affiliation></author>
       <author><first>Huawei</first><last>Shen</last><affiliation>Institute of Computing Technology, Chinese Academy of Sciences</affiliation></author>
-      <author><first>Xueqi</first><last>Cheng</last><affiliation>Institute of Computing Technology, CAS</affiliation></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last><affiliation>Institute of Computing Technology, CAS</affiliation></author>
       <pages>6620-6635</pages>
       <abstract>Dense retrieval has shown promise in the first-stage retrieval process when trained on in-domain labeled datasets. However, previous studies have found that dense retrieval is hard to generalize to unseen domains due to its weak modeling of domain-invariant and interpretable feature (i.e., matching signal between two texts, which is the essence of information retrieval). In this paper, we propose a novel method to improve the generalization of dense retrieval via capturing matching signal called BERM. Fully fine-grained expression and query-oriented saliency are two properties of the matching signal. Thus, in BERM, a single passage is segmented into multiple units and two unit-level requirements are proposed for representation as the constraint in training to obtain the effective matching signal. One is semantic unit balance and the other is essential matching unit extractability. Unit-level view and balanced semantics make representation express the text in a fine-grained manner. Essential matching unit extractability makes passage representation sensitive to the given query to extract the pure matching information from the passage containing complex context. Experiments on BEIR show that our method can be effectively combined with different dense retrieval training methods (vanilla, hard negatives mining and knowledge distillation) to improve its generalization ability without any additional inference overhead and target domain data.</abstract>
       <url hash="3422652a">2023.acl-long.365</url>
@@ -5243,7 +5243,7 @@
       <title>Prompting Language Models for Linguistic Structure</title>
       <author><first>Terra</first><last>Blevins</last><affiliation>University of Washington</affiliation></author>
       <author><first>Hila</first><last>Gonen</last><affiliation>UW and FAIR</affiliation></author>
-      <author><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington; Meta</affiliation></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington; Meta</affiliation></author>
       <pages>6649-6663</pages>
       <abstract>Although pretrained language models (PLMs) can be prompted to perform a wide range of language tasks, it remains an open question how much this ability comes from generalizable linguistic understanding versus surface-level lexical patterns. To test this, we present a structured prompting approach for linguistic structured prediction tasks, allowing us to perform zero- and few-shot sequence tagging with autoregressive PLMs. We evaluate this approach on part-of-speech tagging, named entity recognition, and sentence chunking, demonstrating strong few-shot performance in all cases. We also find that while PLMs contain significant prior knowledge of task labels due to task leakage into the pretraining corpus, structured prompting can also retrieve linguistic structure with arbitrary labels. These findings indicate that the in-context learning ability and linguistic knowledge of PLMs generalizes beyond memorization of their training data.</abstract>
       <url hash="e2434da0">2023.acl-long.367</url>
@@ -5267,7 +5267,7 @@
       <title><fixed-case>RE</fixed-case>-Matching: A Fine-Grained Semantic Matching Method for Zero-Shot Relation Extraction</title>
       <author><first>Jun</first><last>Zhao</last><affiliation>Fudan University</affiliation></author>
       <author><first>WenYu</first><last>Zhan</last><affiliation>FuDan University</affiliation></author>
-      <author><first>Xin</first><last>Zhao</last><affiliation>Fudan University</affiliation></author>
+      <author id="wayne-xin-zhao"><first>Xin</first><last>Zhao</last><affiliation>Fudan University</affiliation></author>
       <author><first>Qi</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Tao</first><last>Gui</last><affiliation>fudan university</affiliation></author>
       <author><first>Zhongyu</first><last>Wei</last><affiliation>School of Data Science, Fudan University</affiliation></author>
@@ -5293,7 +5293,7 @@
       <author><first>Gunhee</first><last>Kim</last><affiliation>Seoul National University</affiliation></author>
       <author><first>Eun-Ju</first><last>Lee</last><affiliation>Seoul National University</affiliation></author>
       <author><first>Yong</first><last>Lim</last><affiliation>Seoul National University</affiliation></author>
-      <author><first>Alice</first><last>Oh</last><affiliation>KAIST</affiliation></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last><affiliation>KAIST</affiliation></author>
       <author><first>Sangchul</first><last>Park</last><affiliation>Seoul National University</affiliation></author>
       <author><first>Jung-Woo</first><last>Ha</last><affiliation>NAVER Cloud AI Lab</affiliation></author>
       <pages>6692-6712</pages>
@@ -5312,7 +5312,7 @@
       <author><first>Kihyo</first><last>Park</last><affiliation>Cornell University</affiliation></author>
       <author><first>Gyu Tae</first><last>Kim</last><affiliation>SoftlyAI</affiliation></author>
       <author><first>Minjoon</first><last>Seo</last><affiliation>KAIST</affiliation></author>
-      <author><first>Alice</first><last>Oh</last><affiliation>KAIST</affiliation></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last><affiliation>KAIST</affiliation></author>
       <pages>6713-6742</pages>
       <abstract>Research on Korean grammatical error correction (GEC) is limited, compared to other major languages such as English. We attribute this problematic circumstance to the lack of a carefully designed evaluation benchmark for Korean GEC. In this work, we collect three datasets from different sources (Kor-Lang8, Kor-Native, and Kor-Learner) that covers a wide range of Korean grammatical errors. Considering the nature of Korean grammar, We then define 14 error types for Korean and provide KAGAS (Korean Automatic Grammatical error Annotation System), which can automatically annotate error types from parallel corpora. We use KAGAS on our datasets to make an evaluation benchmark for Korean, and present baseline models trained from our datasets. We show that the model trained with our datasets significantly outperforms the currently used statistical Korean GEC system (Hanspell) on a wider range of error types, demonstrating the diversity and usefulness of the datasets. The implementations and datasets are open-sourced.</abstract>
       <url hash="b640e697">2023.acl-long.371</url>
@@ -5341,7 +5341,7 @@
       <author><first>Keerthiram</first><last>Murugesan</last><affiliation>IBM Research</affiliation></author>
       <author><first>Rosario</first><last>Uceda-Sosa</last><affiliation>IBM Research</affiliation></author>
       <author><first>Michiaki</first><last>Tatsubori</last><affiliation>IBM Research - Tokyo</affiliation></author>
-      <author><first>Achille</first><last>Fokoue</last><affiliation>IBM Research</affiliation></author>
+      <author id="achille-fokoue-nkoutche"><first>Achille</first><last>Fokoue</last><affiliation>IBM Research</affiliation></author>
       <author><first>Pavan</first><last>Kapanipathi</last><affiliation>IBM Research</affiliation></author>
       <author><first>Asim</first><last>Munawar</last><affiliation>IBM Research</affiliation></author>
       <author><first>Alexander</first><last>Gray</last><affiliation>IBM Research</affiliation></author>
@@ -5440,7 +5440,7 @@
       <author><first>Peiyuan</first><last>Gong</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <author><first>Derek F.</first><last>Wong</last><affiliation>University of Macau</affiliation></author>
       <author><first>Yang</first><last>Gao</last><affiliation>Beijing Institute of Technology</affiliation></author>
-      <author><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <author><first>Min</first><last>Zhang</last><affiliation>Harbin Institute of Technology (Shenzhen)</affiliation></author>
       <pages>6878-6892</pages>
       <abstract>Grammatical error correction (GEC) can be divided into sequence-to-edit (Seq2Edit) and sequence-to-sequence (Seq2Seq) frameworks, both of which have their pros and cons. To utilize the strengths and make up for the shortcomings of these frameworks, this paper proposes a novel method, TemplateGEC, which capitalizes on the capabilities of both Seq2Edit and Seq2Seq frameworks in error detection and correction respectively. TemplateGEC utilizes the detection labels from a Seq2Edit model, to construct the template as the input. A Seq2Seq model is employed to enforce consistency between the predictions of different templates by utilizing consistency learning. Experimental results on the Chinese NLPCC18, English BEA19 and CoNLL14 benchmarks show the effectiveness and robustness of TemplateGEC.Further analysis reveals the potential of our method in performing human-in-the-loop GEC. Source code and scripts are available at <url>https://github.com/li-aolong/TemplateGEC</url>.</abstract>
@@ -5452,7 +5452,7 @@
     <paper id="381">
       <title>Deep Model Compression Also Helps Models Capture Ambiguity</title>
       <author><first>Hancheol</first><last>Park</last><affiliation>School of Computing, KAIST</affiliation></author>
-      <author><first>Jong</first><last>Park</last><affiliation>KAIST</affiliation></author>
+      <author id="jong-c-park"><first>Jong</first><last>Park</last><affiliation>KAIST</affiliation></author>
       <pages>6893-6905</pages>
       <abstract>Natural language understanding (NLU) tasks face a non-trivial amount of ambiguous samples where veracity of their labels is debatable among annotators. NLU models should thus account for such ambiguity, but they approximate the human opinion distributions quite poorly and tend to produce over-confident predictions. To address this problem, we must consider how to exactly capture the degree of relationship between each sample and its candidate classes. In this work, we propose a novel method with deep model compression and show how such relationship can be accounted for. We see that more reasonably represented relationships can be discovered in the lower layers and that validation accuracies are converging at these layers, which naturally leads to layer pruning. We also see that distilling the relationship knowledge from a lower layer helps models produce better distribution. Experimental results demonstrate that our method makes substantial improvement on quantifying ambiguity without gold distribution labels. As positive side-effects, our method is found to reduce the model size significantly and improve latency, both attractive aspects of NLU products.</abstract>
       <url hash="f701ee15">2023.acl-long.381</url>
@@ -5520,10 +5520,10 @@
       <author><first>Liangming</first><last>Pan</last><affiliation>University of California, Santa Barbara (UCSB)</affiliation></author>
       <author><first>Xiaobao</first><last>Wu</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Xinyuan</first><last>Lu</last><affiliation>National University of Singapore</affiliation></author>
-      <author><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University, Singapore</affiliation></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University, Singapore</affiliation></author>
       <author><first>William Yang</first><last>Wang</last><affiliation>Unversity of California, Santa Barbara</affiliation></author>
       <author><first>Min-Yen</first><last>Kan</last><affiliation>National University of Singapore</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <pages>6981-7004</pages>
       <abstract>Fact-checking real-world claims often requires collecting multiple pieces of evidence and applying complex multi-step reasoning. In this paper, we present Program-Guided Fact-Checking (ProgramFC), a novel fact-checking model that decomposes complex claims into simpler sub-tasks that can be solved using a shared library of specialized functions. We first leverage the in-context learning ability of large language models to generate reasoning programs to guide the verification process. Afterward, we execute the program by delegating each sub-task to the corresponding sub-task handler. This process makes our model both explanatory and data-efficient, providing clear explanations of its reasoning process and requiring minimal training data. We evaluate ProgramFC on two challenging fact-checking datasets and show that it outperforms seven fact-checking baselines across different settings of evidence availability, with explicit output programs that benefit human debugging. Our codes and data are publicly available at <url>https://github.com/mbzuai-nlp/ProgramFC</url>.</abstract>
       <url hash="7f102326">2023.acl-long.386</url>
@@ -5709,7 +5709,7 @@
       <author><first>Kangjie</first><last>Chen</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Xiaofei</first><last>Xie</last><affiliation>Singapore Management University</affiliation></author>
       <author><first>Tianwei</first><last>Zhang</last><affiliation>Nanyang Technological University</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>Nanyang Technological University</affiliation></author>
+      <author><first>Yang</first><last>Liu</last><affiliation>Nanyang Technological University</affiliation></author>
       <pages>7236-7254</pages>
       <abstract>Backdoor attacks for neural code models have gained considerable attention due to the advancement of code intelligence. However, most existing works insert triggers into task-specific data for code-related downstream tasks, thereby limiting the scope of attacks. Moreover, the majority of attacks for pre-trained models are designed for understanding tasks. In this paper, we propose task-agnostic backdoor attacks for code pre-trained models. Our backdoored model is pre-trained with two learning strategies (i.e., Poisoned Seq2Seq learning and token representation learning) to support the multi-target attack of downstream code understanding and generation tasks. During the deployment phase, the implanted backdoors in the victim models can be activated by the designed triggers to achieve the targeted attack. We evaluate our approach on two code understanding tasks and three code generation tasks over seven datasets. Extensive experimental results demonstrate that our approach effectively and stealthily attacks code-related downstream tasks.</abstract>
       <url hash="d8fbef61">2023.acl-long.399</url>
@@ -5762,8 +5762,8 @@
       <author><first>Qianhui</first><last>Wu</last><affiliation>Microsoft Corporation</affiliation></author>
       <author><first>Huiqiang</first><last>Jiang</last><affiliation>Microsoft Research Asia</affiliation></author>
       <author><first>Haonan</first><last>Yin</last><affiliation>Tsinghua University</affiliation></author>
-      <author><first>Börje F.</first><last>Karlsson</last><affiliation>Beijing Academy of Artificial Intelligence (BAAI)</affiliation></author>
-      <author><first>Chin-Yew</first><last>Lin</last><affiliation>Microsoft Research</affiliation></author>
+      <author id="borje-f-karlsson"><first>Börje F.</first><last>Karlsson</last><affiliation>Beijing Academy of Artificial Intelligence (BAAI)</affiliation></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last><affiliation>Microsoft Research</affiliation></author>
       <pages>7317-7332</pages>
       <abstract>Self-supervised representation learning has proved to be a valuable component for out-of-distribution (OoD) detection with only the texts of in-distribution (ID) examples. These approaches either train a language model from scratch or fine-tune a pre-trained language model using ID examples, and then take the perplexity output by the language model as OoD scores. In this paper, we analyze the complementary characteristic of both methods and propose a multi-level knowledge distillation approach that integrates their strengths while mitigating their limitations. Specifically, we use a fine-tuned model as the teacher to teach a randomly initialized student model on the ID examples. Besides the prediction layer distillation, we present a similarity-based intermediate layer distillation method to thoroughly explore the representation space of the teacher model. In this way, the learned student can better represent the ID data manifold while gaining a stronger ability to map OoD examples outside the ID data manifold with the regularization inherited from pre-training. Besides, the student model sees only ID examples during parameter learning, further promoting more distinguishable features for OoD detection. We conduct extensive experiments over multiple benchmark datasets, i.e., CLINC150, SST, ROSTD, 20 NewsGroups, and AG News; showing that the proposed method yields new state-of-the-art performance. We also explore its application as an AIGC detector to distinguish answers generated by ChatGPT and human experts. It is observed that our model exceeds human evaluators in the pair-expert task on the Human ChatGPT Comparison Corpus.</abstract>
       <url hash="43da1725">2023.acl-long.403</url>
@@ -5789,7 +5789,7 @@
       <author><first>Jiazhan</first><last>Feng</last><affiliation>Peking University</affiliation></author>
       <author><first>Qingfeng</first><last>Sun</last><affiliation>Microsoft Corporation</affiliation></author>
       <author><first>Can</first><last>Xu</last><affiliation>STCA NLP Group, Microsoft</affiliation></author>
-      <author id="pu-zhao"><first>Pu</first><last>Zhao</last><affiliation>Microsoft</affiliation></author>
+      <author><first>Pu</first><last>Zhao</last><affiliation>Microsoft</affiliation></author>
       <author><first>Yaming</first><last>Yang</last><affiliation>Microsoft</affiliation></author>
       <author><first>Chongyang</first><last>Tao</last><affiliation>Microsoft Corporation</affiliation></author>
       <author><first>Dongyan</first><last>Zhao</last><affiliation>pku.edu.cn</affiliation></author>
@@ -5915,7 +5915,7 @@
       <author><first>Sireesh</first><last>Gururaja</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Ritam</first><last>Dutt</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Tinglong</first><last>Liao</last><affiliation>Carnegie Mellon University</affiliation></author>
-      <author><first>Carolyn</first><last>Rosé</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rosé</last><affiliation>Carnegie Mellon University</affiliation></author>
       <pages>7502-7514</pages>
       <abstract>Recent work has demonstrated the positive impact of incorporating linguistic representations as additional context and scaffolds on the in-domain performance of several NLP tasks. We extend this work by exploring the impact of linguistic representations on cross-domain performance in a few-shot transfer setting. An important question is whether linguistic representations enhance generalizability by providing features that function as cross-domain pivots. We focus on the task of relation extraction on three datasets of procedural text in two domains, cooking and materials science. Our approach augments a popular transformer-based architecture by alternately incorporating syntactic and semantic graphs constructed by freely available off-the-shelf tools. We examine their utility for enhancing generalization, and investigate whether earlier findings, e.g. that semantic representations can be more helpful than syntactic ones, extend to relation extraction in multiple domains. We find that while the inclusion of these graphs results in significantly higher performance in few-shot transfer, both types of graph exhibit roughly equivalent utility.</abstract>
       <url hash="0704ffc7">2023.acl-long.414</url>
@@ -5946,7 +5946,7 @@
       <author><first>Russell</first><last>Klopfer</last><affiliation>3M | MModal</affiliation></author>
       <author><first>Edmond</first><last>Lu</last><affiliation>3M Health Information Systems</affiliation></author>
       <author><first>Benjamin</first><last>Striner</last><affiliation>3M</affiliation></author>
-      <author><first>Matthew</first><last>Gormley</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="matthew-r-gormley"><first>Matthew</first><last>Gormley</last><affiliation>Carnegie Mellon University</affiliation></author>
       <pages>7534-7550</pages>
       <abstract>We introduce a dataset for evidence/rationale extraction on an extreme multi-label classification task over long medical documents. One such task is Computer-Assisted Coding (CAC) which has improved significantly in recent years, thanks to advances in machine learning technologies. Yet simply predicting a set of final codes for a patient encounter is insufficient as CAC systems are required to provide supporting textual evidence to justify the billing codes. A model able to produce accurate and reliable supporting evidence for each code would be a tremendous benefit. However, a human annotated code evidence corpus is extremely difficult to create because it requires specialized knowledge. In this paper, we introduce MDACE, the first publicly available code evidence dataset, which is built on a subset of the MIMIC-III clinical records. The dataset – annotated by professional medical coders – consists of 302 Inpatient charts with 3,934 evidence spans and 52 Profee charts with 5,563 evidence spans. We implemented several evidence extraction methods based on the EffectiveCAN model (Liu et al., 2021) to establish baseline performance on this dataset. MDACE can be used to evaluate code evidence extraction methods for CAC systems, as well as the accuracy and interpretability of deep learning models for multi-label classification. We believe that the release of MDACE will greatly improve the understanding and application of deep learning technologies for medical coding and document classification.</abstract>
       <url hash="3f266b3d">2023.acl-long.416</url>
@@ -6065,7 +6065,7 @@
       <author><first>Hongshen</first><last>Chen</last><affiliation>JD.com</affiliation></author>
       <author><first>Pengjie</first><last>Ren</last><affiliation>Shandong University</affiliation></author>
       <author><first>Zhumin</first><last>Chen</last><affiliation>Shandong University</affiliation></author>
-      <author><first>Maarten</first><last>de Rijke</last><affiliation>University of Amsterdam</affiliation></author>
+      <author id="maarten-de-rijke"><first>Maarten</first><last>de Rijke</last><affiliation>University of Amsterdam</affiliation></author>
       <author><first>Zhaochun</first><last>Ren</last><affiliation>Shandong University</affiliation></author>
       <pages>7669-7683</pages>
       <abstract>In open-domain question answering, due to the ambiguity of questions, multiple plausible answers may exist. To provide feasible answers to an ambiguous question,one approach is to directly predict all valid answers, but this can struggle with balancing relevance and diversity. An alternative is to gather candidate answers and aggregate them, but this method can be computationally costly and may neglect dependencies among answers. In this paper, we present AmbigPrompt to address the imperfections of existing approaches to answering ambiguous questions. Specifically, we integrate an answering model with a prompting model in an iterative manner. The prompting model adaptively tracks the reading process and progressively triggers the answering model to compose distinct and relevant answers. Additionally, we develop a task-specific post-pretraining approach for both the answering model and the prompting model, which greatly improves the performance of our framework. Empirical studies on two commonly-used open benchmarks show that AmbigPrompt achieves state-of-the-art or competitive results while using less memory and having a lower inference latency than competing approaches. Additionally, AmbigPrompt also performs well in low-resource settings.</abstract>
@@ -6089,7 +6089,7 @@
     <paper id="426">
       <title>Massively Multilingual Lexical Specialization of Multilingual Transformers</title>
       <author><first>Tommaso</first><last>Green</last><affiliation>University of Mannheim</affiliation></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last><affiliation>University of Mannheim</affiliation></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last><affiliation>University of Mannheim</affiliation></author>
       <author><first>Goran</first><last>Glavaš</last><affiliation>University of Würzburg</affiliation></author>
       <pages>7700-7715</pages>
       <abstract>While pretrained language models (PLMs) primarily serve as general-purpose text encoders that can be fine-tuned for a wide variety of downstream tasks, recent work has shown that they can also be rewired to produce high-quality word representations (i.e., static word embeddings) and yield good performance in type-level lexical tasks. While existing work primarily focused on the lexical specialization of monolingual PLMs with immense quantities of monolingual constraints, in this work we expose massively multilingual transformers (MMTs, e.g., mBERT or XLM-R) to multilingual lexical knowledge at scale, leveraging BabelNet as the readily available rich source of multilingual and cross-lingual type-level lexical knowledge. Concretely, we use BabelNet’s multilingual synsets to create synonym pairs (or synonym-gloss pairs) across 50 languages and then subject the MMTs (mBERT and XLM-R) to a lexical specialization procedure guided by a contrastive objective. We show that such massively multilingual lexical specialization brings substantial gains in two standard cross-lingual lexical tasks, bilingual lexicon induction and cross-lingual word similarity, as well as in cross-lingual sentence retrieval. Crucially, we observe gains for languages unseen in specialization, indicating that multilingual lexical specialization enables generalization to languages with no lexical constraints. In a series of subsequent controlled experiments, we show that the number of specialization constraints plays a much greater role than the set of languages from which they originate.</abstract>
@@ -6104,7 +6104,7 @@
       <author><first>Ekin</first><last>Akyurek</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <author><first>Ashwin</first><last>Kalyan</last><affiliation>Allen Institute for Artificial Intelligence (AI2)</affiliation></author>
       <author><first>Peter</first><last>Clark</last><affiliation>Allen Institute for AI</affiliation></author>
-      <author><first>Derry Tanti</first><last>Wijaya</last><affiliation>Boston University</affiliation></author>
+      <author id="derry-tanti-wijaya"><first>Derry Tanti</first><last>Wijaya</last><affiliation>Boston University</affiliation></author>
       <author><first>Niket</first><last>Tandon</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <pages>7716-7733</pages>
       <abstract>Despite their unprecedented success, even the largest language models make mistakes. Similar to how humans learn and improve using feedback, previous work proposed providing language models with natural language feedback to guide them in repairing their outputs. Because human-generated critiques are expensive to obtain, researchers have devised learned critique generators in lieu of human critics while assuming one can train downstream models to utilize generated feedback. However, this approach does not apply to black-box or limited access models such as ChatGPT, as they cannot be fine-tuned. Moreover, in the era of large general-purpose language agents, fine-tuning is neither computationally nor spatially efficient as it results in multiple copies of the network. In this work, we introduce RL4F (Reinforcement Learning for Feedback), a multi-agent collaborative framework where the critique generator is trained to maximize end-task performance of GPT-3, a fixed model more than 200 times its size. RL4F produces critiques that help GPT-3 revise its outputs. We study three datasets for action planning, summarization and alphabetization and show relative improvements up to 10% in multiple text similarity metrics over other learned, retrieval-augmented or prompting-based critique generators.</abstract>
@@ -6155,7 +6155,7 @@
     </paper>
     <paper id="431">
       <title>Modeling Structural Similarities between Documents for Coherence Assessment with Graph Convolutional Networks</title>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last><affiliation>Heidelberg Institute for Theoretical Studies</affiliation></author>
+      <author><first>Wei</first><last>Liu</last><affiliation>Heidelberg Institute for Theoretical Studies</affiliation></author>
       <author><first>Xiyan</first><last>Fu</last><affiliation>Heidelberg University</affiliation></author>
       <author><first>Michael</first><last>Strube</last><affiliation>Heidelberg Institute for Theoretical Studies</affiliation></author>
       <pages>7792-7808</pages>
@@ -6168,7 +6168,7 @@
     <paper id="432">
       <title><fixed-case>H</fixed-case>i<fixed-case>TIN</fixed-case>: Hierarchy-aware Tree Isomorphism Network for Hierarchical Text Classification</title>
       <author><first>He</first><last>Zhu</last><affiliation>Beihang University</affiliation></author>
-      <author id="chong-zhang"><first>Chong</first><last>Zhang</last><affiliation>Beihang University</affiliation></author>
+      <author><first>Chong</first><last>Zhang</last><affiliation>Beihang University</affiliation></author>
       <author><first>Junjie</first><last>Huang</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <author><first>Junran</first><last>Wu</last><affiliation>State Key Lab of Software Development Environment, Beihang University</affiliation></author>
       <author><first>Ke</first><last>Xu</last><affiliation>Beihang University</affiliation></author>
@@ -6315,7 +6315,7 @@
       <author><first>Jianguo</first><last>Wei</last><affiliation>Tianjin University</affiliation></author>
       <author><first>Meishan</first><last>Zhang</last><affiliation>Harbin Institute of Technology (Shenzhen), China</affiliation></author>
       <author><first>Min</first><last>Zhang</last><affiliation>Harbin Institute of Technology (Shenzhen)</affiliation></author>
-      <author><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
       <pages>7960-7977</pages>
       <abstract>Visual spatial description (VSD) aims to generate texts that describe the spatial relations of the given objects within images. Existing VSD work merely models the 2D geometrical vision features, thus inevitably falling prey to the problem of skewed spatial understanding of target objects. In this work, we investigate the incorporation of 3D scene features for VSD. With an external 3D scene extractor, we obtain the 3D objects and scene features for input images, based on which we construct a target object-centered 3D spatial scene graph (Go3D-S2G), such that we model the spatial semantics of target objects within the holistic 3D scenes. Besides, we propose a scene subgraph selecting mechanism, sampling topologically-diverse subgraphs from Go3D-S2G, where the diverse local structure features are navigated to yield spatially-diversified text generation. Experimental results on two VSD datasets demonstrate that our framework outperforms the baselines significantly, especially improving on the cases with complex visual spatial relations. Meanwhile, our method can produce more spatially-diversified generation.</abstract>
       <url hash="07deb82b">2023.acl-long.442</url>
@@ -6327,7 +6327,7 @@
       <author><first>Yuanchi</first><last>Zhang</last><affiliation>Tsinghua University</affiliation></author>
       <author><first>Peng</first><last>Li</last><affiliation>Institute for AI Industry Research (AIR), Tsinghua University, China</affiliation></author>
       <author><first>Maosong</first><last>Sun</last><affiliation>Tsinghua University</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>Tsinghua University</affiliation></author>
+      <author><first>Yang</first><last>Liu</last><affiliation>Tsinghua University</affiliation></author>
       <pages>7978-7996</pages>
       <abstract>While many parallel corpora are not publicly accessible for data copyright, data privacy and competitive differentiation reasons, trained translation models are increasingly available on open platforms. In this work, we propose a method called continual knowledge distillation to take advantage of existing translation models to improve one model of interest. The basic idea is to sequentially transfer knowledge from each trained model to the distilled model. Extensive experiments on Chinese-English and German-English datasets show that our method achieves significant and consistent improvements over strong baselines under both homogeneous and heterogeneous trained model settings and is robust to malicious models.</abstract>
       <url hash="387c3cde">2023.acl-long.443</url>
@@ -6337,9 +6337,9 @@
     </paper>
     <paper id="444">
       <title>Query Refinement Prompts for Closed-Book Long-Form <fixed-case>QA</fixed-case></title>
-      <author><first>Reinald Kim</first><last>Amplayo</last><affiliation>Google</affiliation></author>
+      <author id="reinald-kim-amplayo"><first>Reinald Kim</first><last>Amplayo</last><affiliation>Google</affiliation></author>
       <author><first>Kellie</first><last>Webster</last><affiliation>Google</affiliation></author>
-      <author><first>Michael</first><last>Collins</last><affiliation>Columbia University/Google</affiliation></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last><affiliation>Columbia University/Google</affiliation></author>
       <author><first>Dipanjan</first><last>Das</last><affiliation>Google Research</affiliation></author>
       <author><first>Shashi</first><last>Narayan</last><affiliation>Google</affiliation></author>
       <pages>7997-8012</pages>
@@ -6403,7 +6403,7 @@
       <author><first>Yufeng</first><last>Chen</last><affiliation>Beijing Jiaotong University</affiliation></author>
       <author><first>Wenjuan</first><last>Han</last><affiliation>Beijing Jiaotong University</affiliation></author>
       <author><first>Jian</first><last>Liu</last><affiliation>Beijing Jiaotong University</affiliation></author>
-      <author><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
       <pages>8062-8079</pages>
       <abstract>Knowledge distillation (KD) is a promising technique for model compression in neural machine translation. However, where the knowledge hides in KD is still not clear, which may hinder the development of KD. In this work, we first unravel this mystery from an empirical perspective and show that the knowledge comes from the top-1 predictions of teachers, which also helps us build a potential connection between word- and sequence-level KD. Further, we point out two inherent issues in vanilla word-level KD based on this finding. Firstly, the current objective of KD spreads its focus to whole distributions to learn the knowledge, yet lacks special treatment on the most crucial top-1 information. Secondly, the knowledge is largely covered by the golden information due to the fact that most top-1 predictions of teachers overlap with ground-truth tokens, which further restricts the potential of KD. To address these issues, we propose a new method named Top-1 Information Enhanced Knowledge Distillation (TIE-KD). Specifically, we design a hierarchical ranking loss to enforce the learning of the top-1 information from the teacher. Additionally, we develop an iterative KD procedure to infuse more additional knowledge by distilling on the data without ground-truth targets. Experiments on WMT’14 English-German, WMT’14 English-French and WMT’16 English-Romanian demonstrate that our method can respectively boost Transformer<tex-math>_{base}</tex-math> students by +1.04, +0.60 and +1.11 BLEU scores and significantly outperforms the vanilla word-level KD baseline. Besides, our method shows higher generalizability on different teacher-student capacity gaps than existing KD techniques.</abstract>
       <url hash="1817602f">2023.acl-long.448</url>
@@ -6491,7 +6491,7 @@
       <title><fixed-case>F</fixed-case>i<fixed-case>D</fixed-case>-<fixed-case>ICL</fixed-case>: A Fusion-in-Decoder Approach for Efficient In-Context Learning</title>
       <author><first>Qinyuan</first><last>Ye</last><affiliation>University of Southern California</affiliation></author>
       <author><first>Iz</first><last>Beltagy</last><affiliation>Allen Institute for AI (AI2)</affiliation></author>
-      <author><first>Matthew</first><last>Peters</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
+      <author id="matthew-e-peters"><first>Matthew</first><last>Peters</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Xiang</first><last>Ren</last><affiliation>University of Southern California</affiliation></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last><affiliation>University of Washington</affiliation></author>
       <pages>8158-8185</pages>
@@ -6521,7 +6521,7 @@
       <author><first>Xuxi</first><last>Chen</last><affiliation>UT Austin</affiliation></author>
       <author><first>Tianlong</first><last>Chen</last><affiliation>University of Texas at Austin</affiliation></author>
       <author><first>Weizhu</first><last>Chen</last><affiliation>Microsoft</affiliation></author>
-      <author><first>Ahmed Hassan</first><last>Awadallah</last><affiliation>Microsoft Research</affiliation></author>
+      <author id="ahmed-hassan"><first>Ahmed Hassan</first><last>Awadallah</last><affiliation>Microsoft Research</affiliation></author>
       <author><first>Zhangyang</first><last>Wang</last><affiliation>UT Austin</affiliation></author>
       <author><first>Yu</first><last>Cheng</last><affiliation>Microsoft Research</affiliation></author>
       <pages>8208-8222</pages>
@@ -6614,7 +6614,7 @@
       <title>Factual or Contextual? Disentangling Error Types in Entity Description Generation</title>
       <author><first>Navita</first><last>Goyal</last><affiliation>University of Maryland College Park</affiliation></author>
       <author><first>Ani</first><last>Nenkova</last><affiliation>Adobe Research</affiliation></author>
-      <author><first>Hal</first><last>Daumé III</last><affiliation>UMD</affiliation></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last><affiliation>UMD</affiliation></author>
       <pages>8322-8340</pages>
       <abstract>In the task of entity description generation, given a context and a specified entity, a model must describe that entity correctly and in a contextually-relevant way. In this task, as well as broader language generation tasks, the generation of a nonfactual description (factual error) versus an incongruous description (contextual error) is fundamentally different, yet often conflated. We develop an evaluation paradigm that enables us to disentangle these two types of errors in naturally occurring textual contexts. We find that factuality and congruity are often at odds, and that models specifically struggle with accurate descriptions of entities that are less familiar to people. This shortcoming of language models raises concerns around the trustworthiness of such models, since factual errors on less well-known entities are exactly those that a human reader will not recognize.</abstract>
       <url hash="cd8a1a72">2023.acl-long.463</url>
@@ -6627,7 +6627,7 @@
       <author><first>Chi</first><last>Chen</last><affiliation>Tsinghua University</affiliation></author>
       <author><first>Peng</first><last>Li</last><affiliation>Institute for AI Industry Research (AIR), Tsinghua University, China</affiliation></author>
       <author><first>Maosong</first><last>Sun</last><affiliation>Tsinghua University</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>Tsinghua University</affiliation></author>
+      <author><first>Yang</first><last>Liu</last><affiliation>Tsinghua University</affiliation></author>
       <pages>8341-8355</pages>
       <abstract>Weakly supervised vision-and-language pre-training (WVLP), which learns cross-modal representations with limited cross-modal supervision, has been shown to effectively reduce the data cost of pre-training while maintaining decent performance on downstream tasks. However, current WVLP methods use only local descriptions of images, i.e., object tags, as cross-modal anchors to construct weakly-aligned image-text pairs for pre-training. This affects the data quality and thus the effectiveness of pre-training. In this paper, we propose to directly take a small number of aligned image-text pairs as anchors, and represent each unaligned image and text by its similarities to these anchors, i.e., relative representations. We build a WVLP framework based on the relative representations, namely RELIT, which collects high-quality weakly-aligned image-text pairs from large-scale image-only and text-only data for pre-training through relative representation-based retrieval and generation. Experiments on four downstream tasks show that RELIT achieves new state-of-the-art results under the weakly supervised setting.</abstract>
       <url hash="53b8ef4e">2023.acl-long.464</url>
@@ -6657,7 +6657,7 @@
     <paper id="466">
       <title><fixed-case>A</fixed-case>rg<fixed-case>U</fixed-case>: A Controllable Factual Argument Generator</title>
       <author><first>Sougata</first><last>Saha</last><affiliation>State University of New York at Buffalo</affiliation></author>
-      <author><first>Rohini</first><last>Srihari</last><affiliation>University at Buffalo, SUNY</affiliation></author>
+      <author id="rohini-k-srihari"><first>Rohini</first><last>Srihari</last><affiliation>University at Buffalo, SUNY</affiliation></author>
       <pages>8373-8388</pages>
       <abstract>Effective argumentation is essential towards a purposeful conversation with a satisfactory outcome. For example, persuading someone to reconsider smoking might involve empathetic, well founded arguments based on facts and expert opinions about its ill-effects and the consequences on one’s family. However, the automatic generation of high-quality factual arguments can be challenging. Addressing existing controllability issues can make the recent advances in computational models for argument generation a potential solution. In this paper, we introduce ArgU: a neural argument generator capable of producing factual arguments from input facts and real-world concepts that can be explicitly controlled for stance and argument structure using Walton’s argument scheme-based control codes. Unfortunately, computational argument generation is a relatively new field and lacks datasets conducive to training. Hence, we have compiled and released an annotated corpora of 69,428 arguments spanning six topics and six argument schemes, making it the largest publicly available corpus for identifying argument schemes; the paper details our annotation and dataset creation framework. We further experiment with an argument generation strategy that establishes an inference strategy by generating an “argument template” before actual argument generation. Our results demonstrate that it is possible to automatically generate diverse arguments exhibiting different inference patterns for the same set of facts by using control codes based on argument schemes and stance.</abstract>
       <url hash="9e23434e">2023.acl-long.466</url>
@@ -6669,7 +6669,7 @@
       <title>Learning Answer Generation using Supervision from Automatic Question Answering Evaluators</title>
       <author><first>Matteo</first><last>Gabburo</last><affiliation>University of Trento</affiliation></author>
       <author><first>Siddhant</first><last>Garg</last><affiliation>Amazon Alexa AI</affiliation></author>
-      <author><first>Rik</first><last>Koncel-Kedziorski</last><affiliation>Kensho Technologies</affiliation></author>
+      <author id="rik-koncel-kedziorski"><first>Rik</first><last>Koncel-Kedziorski</last><affiliation>Kensho Technologies</affiliation></author>
       <author><first>Alessandro</first><last>Moschitti</last><affiliation>Amazon</affiliation></author>
       <pages>8389-8403</pages>
       <abstract>Recent studies show that sentence-level extractive QA, i.e., based on Answer Sentence Selection (AS2), is outperformed by Generation-based QA (GenQA) models, which generate answers using the top-k answer sentences ranked by AS2 models (a la retrieval-augmented generation style). In this paper, we propose a novel training paradigm for GenQA using supervision from automatic QA evaluation models (GAVA). Specifically, we propose three strategies to transfer knowledge from these QA evaluation models to a GenQA model: (i) augmenting training data with answers generated by the GenQA model and labelled by GAVA (either statically, before training, or (ii) dynamically, at every training epoch); and (iii) using the GAVA score for weighting the generator loss during the learning of the GenQA model. We evaluate our proposed methods on two academic and one industrial dataset, obtaining a significant improvement in answering accuracy over the previous state of the art.</abstract>
@@ -6710,7 +6710,7 @@
       <author><first>Subhajit</first><last>Chaudhury</last><affiliation>IBM Research</affiliation></author>
       <author><first>Tahira</first><last>Naseem</last><affiliation>IBM Research AI</affiliation></author>
       <author><first>Ramon</first><last>Fernandez Astudillo</last><affiliation>IBM Research</affiliation></author>
-      <author><first>Achille</first><last>Fokoue</last><affiliation>IBM Research</affiliation></author>
+      <author id="achille-fokoue-nkoutche"><first>Achille</first><last>Fokoue</last><affiliation>IBM Research</affiliation></author>
       <author><first>Tim</first><last>Klinger</last><affiliation>IBM Research AI</affiliation></author>
       <pages>8434-8448</pages>
       <abstract>Nearly all general-purpose neural semantic parsers generate logical forms in a strictly top-down autoregressive fashion. Though such systems have achieved impressive results across a variety of datasets and domains, recent works have called into question whether they are ultimately limited in their ability to compositionally generalize. In this work, we approach semantic parsing from, quite literally, the opposite direction; that is, we introduce a neural semantic parsing generation method that constructs logical forms from the bottom up, beginning from the logical form’s leaves. The system we introduce is lazy in that it incrementally builds up a set of potential semantic parses, but only expands and processes the most promising candidate parses at each generation step. Such a parsimonious expansion scheme allows the system to maintain an arbitrarily large set of parse hypotheses that are never realized and thus incur minimal computational overhead. We evaluate our approach on compositional generalization; specifically, on the challenging CFQ dataset and two other Text-to-SQL datasets where we show that our novel, bottom-up semantic parsing technique outperforms general-purpose semantic parsers while also being competitive with semantic parsers that have been tailored to each task.</abstract>
@@ -6737,7 +6737,7 @@
       <title>(<fixed-case>QA</fixed-case>)<tex-math>^2</tex-math>: Question Answering with Questionable Assumptions</title>
       <author><first>Najoung</first><last>Kim</last><affiliation>Boston University</affiliation></author>
       <author><first>Phu Mon</first><last>Htut</last><affiliation>AWS AI Labs</affiliation></author>
-      <author><first>Samuel R.</first><last>Bowman</last><affiliation>New York University</affiliation></author>
+      <author id="samuel-bowman"><first>Samuel R.</first><last>Bowman</last><affiliation>New York University</affiliation></author>
       <author><first>Jackson</first><last>Petty</last><affiliation>New York University</affiliation></author>
       <pages>8466-8487</pages>
       <abstract>Naturally occurring information-seeking questions often contain questionable assumptions—assumptions that are false or unverifiable. Questions containing questionable assumptions are challenging because they require a distinct answer strategy that deviates from typical answers for information-seeking questions. For instance, the question “When did Marie Curie discover Uranium?” cannot be answered as a typical “when” question without addressing the false assumption “Marie Curie discovered Uranium”. In this work, we propose (QA)2 (Question Answering with Questionable Assumptions), an open-domain evaluation dataset consisting of naturally occurring search engine queries that may or may not contain questionable assumptions. To be successful on (QA)2, systems must be able to detect questionable assumptions and also be able to produce adequate responses for both typical information-seeking questions and ones with questionable assumptions. Through human rater acceptability on end-to-end QA with (QA)2, we find that current models do struggle with handling questionable assumptions, leaving substantial headroom for progress.</abstract>
@@ -6761,7 +6761,7 @@
     <paper id="474">
       <title>Targeted Data Generation: Finding and Fixing Model Weaknesses</title>
       <author><first>Zexue</first><last>He</last><affiliation>University of California, San Diego</affiliation></author>
-      <author><first>Marco Tulio</first><last>Ribeiro</last><affiliation>Microsoft Research</affiliation></author>
+      <author id="marco-tulio-ribeiro"><first>Marco Tulio</first><last>Ribeiro</last><affiliation>Microsoft Research</affiliation></author>
       <author><first>Fereshte</first><last>Khani</last><affiliation>Microsoft</affiliation></author>
       <pages>8506-8520</pages>
       <abstract>Even when aggregate accuracy is high, state-of-the-art NLP models often fail systematically on specific subgroups of data, resulting in unfair outcomes and eroding user trust. Additional data collection may not help in addressing these weaknesses, as such challenging subgroups may be unknown to users, and underrepresented in the existing and new data. We propose Targeted Data Generation (TDG), a framework that automatically identifies challenging subgroups, and generates new data for those subgroups using large language models (LLMs) with a human in the loop. TDG estimates the expected benefit and potential harm of data augmentation for each subgroup, and selects the ones most likely to improve within-group performance without hurting overall performance. In our experiments, TDG significantly improves the accuracy on challenging subgroups for state-of-the-art sentiment analysis and natural language inference models, while also improving overall test accuracy.</abstract>
@@ -6787,7 +6787,7 @@
       <author><first>Junnan</first><last>Zhu</last><affiliation>Institute of Automation, Chinese Academy of Sciences</affiliation></author>
       <author><first>Haitao</first><last>Lin</last><affiliation>National Laboratory of Pattern Recognition, Institute of Automation, CAS</affiliation></author>
       <author><first>Yu</first><last>Zhou</last><affiliation>CASIA</affiliation></author>
-      <author><first>Chengqing</first><last>Zong</last><affiliation>Institute of Automation, Chinese Academy of Sciences</affiliation></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last><affiliation>Institute of Automation, Chinese Academy of Sciences</affiliation></author>
       <pages>8538-8553</pages>
       <abstract>Multimodal summarization usually suffers from the problem that the contribution of the visual modality is unclear. Existing multimodal summarization approaches focus on designing the fusion methods of different modalities, while ignoring the adaptive conditions under which visual modalities are useful. Therefore, we propose a novel Coarse-to-Fine contribution network for multimodal Summarization (CFSum) to consider different contributions of images for summarization. First, to eliminate the interference of useless images, we propose a pre-filter module to abandon useless images. Second, to make accurate use of useful images, we propose two levels of visual complement modules, word level and phrase level. Specifically, image contributions are calculated and are adopted to guide the attention of both textual and visual modalities. Experimental results have shown that CFSum significantly outperforms multiple strong baselines on the standard benchmark. Furthermore, the analysis verifies that useful images can even help generate non-visual words which are implicitly represented in the image.</abstract>
       <url hash="fa2b2b33">2023.acl-long.476</url>
@@ -6800,9 +6800,9 @@
       <author><first>Made Nindyatama</first><last>Nityasya</last><affiliation>Independent</affiliation></author>
       <author><first>Haryo</first><last>Wibowo</last><affiliation>Independent</affiliation></author>
       <author><first>Alham Fikri</first><last>Aji</last><affiliation>MBZUAI</affiliation></author>
-      <author><first>Genta</first><last>Winata</last><affiliation>Bloomberg</affiliation></author>
+      <author id="genta-indra-winata"><first>Genta</first><last>Winata</last><affiliation>Bloomberg</affiliation></author>
       <author><first>Radityo Eko</first><last>Prasojo</last><affiliation>Pitik.id</affiliation></author>
-      <author><first>Phil</first><last>Blunsom</last><affiliation>University of Oxford</affiliation></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last><affiliation>University of Oxford</affiliation></author>
       <author><first>Adhiguna</first><last>Kuncoro</last><affiliation>University of Oxford and DeepMind</affiliation></author>
       <pages>8554-8572</pages>
       <abstract>This evidence-based position paper critiques current research practices within the language model pre-training literature. Despite rapid recent progress afforded by increasingly better pre-trained language models (PLMs), current PLM research practices often conflate different possible sources of model improvement, without conducting proper ablation studies and principled comparisons between different models under comparable conditions. These practices (i) leave us ill-equipped to understand which pre-training approaches should be used under what circumstances; (ii) impede reproducibility and credit assignment; and (iii) render it difficult to understand: “How exactly does each factor contribute to the progress that we have today?” We provide a case in point by revisiting the success of BERT over its baselines, ELMo and GPT-1, and demonstrate how — under comparable conditions where the baselines are tuned to a similar extent — these baselines (and even-simpler variants thereof) can, in fact, achieve competitive or better performance than BERT. These findings demonstrate how disentangling different factors of model improvements can lead to valuable new insights. We conclude with recommendations for how to encourage and incentivize this line of work, and accelerate progress towards a better and more systematic understanding of what factors drive the progress of our foundation models today.</abstract>
@@ -6943,12 +6943,12 @@
       <author><first>Pepa</first><last>Atanasova</last><affiliation>University of Copenhagen</affiliation></author>
       <author><first>Todor</first><last>Mihaylov</last><affiliation>Meta AI</affiliation></author>
       <author><first>Galia</first><last>Angelova</last><affiliation>Institute of Information and communication Technologies, Bulgarian Academy of Sciences</affiliation></author>
-      <author><first>Kiril</first><last>Simov</last><affiliation>Artificial Intelligence and Language Technologies Department, IICT, Bulgarian Academy of Sciences</affiliation></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last><affiliation>Artificial Intelligence and Language Technologies Department, IICT, Bulgarian Academy of Sciences</affiliation></author>
       <author><first>Petya</first><last>Osenova</last><affiliation>Sofia University “St. Kl. Ohridski” and IICT-BAS</affiliation></author>
       <author><first>Veselin</first><last>Stoyanov</last><affiliation>Facebook</affiliation></author>
       <author><first>Ivan</first><last>Koychev</last><affiliation>Sofia University “St. Kliment Ohridski”</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
-      <author><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
       <pages>8733-8759</pages>
       <abstract>We present bgGLUE (Bulgarian General Language Understanding Evaluation), a benchmark for evaluating language models on Natural Language Understanding (NLU) tasks in Bulgarian. Our benchmark includes NLU tasks targeting a variety of NLP problems (e.g., natural language inference, fact-checking, named entity recognition, sentiment analysis, question answering, etc.) and machine learning tasks (sequence labeling, document-level classification, and regression). We run the first systematic evaluation of pre-trained language models for Bulgarian, comparing and contrasting results across the nine tasks in the benchmark. The evaluation results show strong performance on sequence labeling tasks, but there is a lot of room for improvement for tasks that require more complex reasoning. We make bgGLUE publicly available together with the fine-tuning and the evaluation code, as well as a public leaderboard at <url>https://bgglue.github.io</url>, and we hope that it will enable further advancements in developing NLU models for Bulgarian.</abstract>
       <url hash="579d5e09">2023.acl-long.487</url>
@@ -6975,7 +6975,7 @@
       <author><first>Ruixiang</first><last>Cui</last><affiliation>University of Copenhagen</affiliation></author>
       <author><first>Seolhwa</first><last>Lee</last><affiliation>University of Copenhagen</affiliation></author>
       <author><first>Daniel</first><last>Hershcovich</last><affiliation>University of Copenhagen</affiliation></author>
-      <author><first>Anders</first><last>Søgaard</last><affiliation>University of Copenhagen</affiliation></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last><affiliation>University of Copenhagen</affiliation></author>
       <pages>8786-8800</pages>
       <abstract>Humans can effortlessly understand the coordinate structure of sentences such as “Niels Bohr and Kurt Cobain were born in Copenhagen and Seattle, *respectively*”. In the context of natural language inference (NLI), we examine how language models (LMs) reason with respective readings (Gawron and Kehler, 2004) from two perspectives: syntactic-semantic and commonsense-world knowledge. We propose a controlled synthetic dataset WikiResNLI and a naturally occurring dataset NatResNLI to encompass various explicit and implicit realizations of “respectively”. We show that fine-tuned NLI models struggle with understanding such readings without explicit supervision. While few-shot learning is easy in the presence of explicit cues, longer training is required when the reading is evoked implicitly, leaving models to rely on common sense inferences. Furthermore, our fine-grained analysis indicates models fail to generalize across different constructions. To conclude, we demonstrate that LMs still lag behind humans in generalizing to the long tail of linguistic constructions.</abstract>
       <url hash="41b93975">2023.acl-long.489</url>
@@ -7174,7 +7174,7 @@
       <title><fixed-case>M</fixed-case>ix<fixed-case>CE</fixed-case>: Training Autoregressive Language Models by Mixing Forward and Reverse Cross-Entropies</title>
       <author><first>Shiyue</first><last>Zhang</last><affiliation>The University of North Carolina at Chapel Hill</affiliation></author>
       <author><first>Shijie</first><last>Wu</last><affiliation>Bloomberg L.P.</affiliation></author>
-      <author><first>Ozan</first><last>Irsoy</last><affiliation>Bloomberg</affiliation></author>
+      <author id="ozan-irsoy"><first>Ozan</first><last>Irsoy</last><affiliation>Bloomberg</affiliation></author>
       <author><first>Steven</first><last>Lu</last><affiliation>Bloomberg</affiliation></author>
       <author><first>Mohit</first><last>Bansal</last><affiliation>University of North Carolina at Chapel Hill</affiliation></author>
       <author><first>Mark</first><last>Dredze</last><affiliation>Johns Hopkins University</affiliation></author>
@@ -7208,7 +7208,7 @@
       <author><first>Justine</first><last>Kao</last><affiliation>Meta AI</affiliation></author>
       <author><first>Alexandre</first><last>Mourachko</last><affiliation>Meta AI</affiliation></author>
       <author><first>Holger</first><last>Schwenk</last><affiliation>Meta AI Research</affiliation></author>
-      <author><first>Marta R.</first><last>Costa-jussà</last><affiliation>Meta AI</affiliation></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last><affiliation>Meta AI</affiliation></author>
       <pages>9064-9079</pages>
       <abstract>End-to-End speech-to-speech translation (S2ST) is generally evaluated with text-based metrics. This means that generated speech has to be automatically transcribed, making the evaluation dependent on the availability and quality of automatic speech recognition (ASR) systems. In this paper, we propose a text-free evaluation metric for end-to-end S2ST, named BLASER, to avoid the dependency on ASR systems. BLASER leverages a multilingual multimodal encoder to directly encode the speech segments for source input, translation output and reference into a shared embedding space and computes a score of the translation quality that can be used as a proxy to human evaluation. To evaluate our approach, we construct training and evaluation sets from more than 40k human annotations covering seven language directions. The best results of BLASER are achieved by training with supervision from human rating scores. We show that when evaluated at the sentence level, BLASER correlates significantly better with human judgment compared to ASR dependent metrics including ASR-SENTBLEU in all translation directions and ASR-COMET in five of them. Our analysis shows combining speech and text as inputs to BLASER does not increase the correlation with human scores, but best correlations are achieved when using speech, which motivates the goal of our research. Moreover, we show that using ASR for references is detrimental for text-based metrics.</abstract>
       <url hash="8634d6b8">2023.acl-long.504</url>
@@ -7237,7 +7237,7 @@
       <title>Backpack Language Models</title>
       <author><first>John</first><last>Hewitt</last><affiliation>Stanford University</affiliation></author>
       <author><first>John</first><last>Thickstun</last><affiliation>Stanford University</affiliation></author>
-      <author><first>Christopher</first><last>Manning</last><affiliation>Stanford University</affiliation></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last><affiliation>Stanford University</affiliation></author>
       <author><first>Percy</first><last>Liang</last><affiliation>Stanford University</affiliation></author>
       <pages>9103-9125</pages>
       <abstract>We present Backpacks: a new neural architecture that marries strong modeling performancewith an interface for interpretability and control. Backpacks learn multiple non-contextual sense vectors for each word in a vocabulary, and represent a word in a sequence as a context-dependent, non-negative linear combination ofsense vectors in this sequence. We find that, after training, sense vectors specialize, each encoding a different aspect of a word. We can interpret a sense vector by inspecting its (non-contextual, linear) projection onto the output space, and intervene on these interpretable hooks to change the model’s behavior in predictable ways. We train a 170M-parameter Backpack language model on OpenWebText, matching the loss of a GPT-2 small (124Mparameter) Transformer. On lexical similarity evaluations, we find that Backpack sense vectors outperform even a 6B-parameter Transformer LM’s word embeddings. Finally, we present simple algorithms that intervene on sense vectors to perform controllable text generation and debiasing. For example, we can edit the sense vocabulary to tend more towards a topic, or localize a source of gender bias to a sense vector and globally suppress that sense.</abstract>
@@ -7294,7 +7294,7 @@
     <paper id="510">
       <title>Translation-Enhanced Multilingual Text-to-Image Generation</title>
       <author><first>Yaoyiran</first><last>Li</last><affiliation>University of Cambridge</affiliation></author>
-      <author><first>Ching-Yun</first><last>Chang</last><affiliation>Amazon.com</affiliation></author>
+      <author id="ching-yun-chang"><first>Ching-Yun</first><last>Chang</last><affiliation>Amazon.com</affiliation></author>
       <author><first>Stephen</first><last>Rawls</last><affiliation>Amazon</affiliation></author>
       <author><first>Ivan</first><last>Vulić</last><affiliation>University of Cambridge</affiliation></author>
       <author><first>Anna</first><last>Korhonen</last><affiliation>University of Cambridge</affiliation></author>
@@ -7341,7 +7341,7 @@
       <author><first>Wenbo</first><last>Zhao</last><affiliation>Amazon</affiliation></author>
       <author><first>Yiwen</first><last>Chen</last><affiliation>University of Cambridge</affiliation></author>
       <author><first>Tagyoung</first><last>Chung</last><affiliation>Amazon Alexa AI</affiliation></author>
-      <author id="jing-huang"><first>Jing</first><last>Huang</last><affiliation>Amazon</affiliation></author>
+      <author><first>Jing</first><last>Huang</last><affiliation>Amazon</affiliation></author>
       <author><first>Nanyun</first><last>Peng</last><affiliation>University of California, Los Angeles</affiliation></author>
       <pages>9235-9254</pages>
       <abstract>Automatic melody-to-lyric generation is a task in which song lyrics are generated to go with a given melody. It is of significant practical interest and more challenging than unconstrained lyric generation as the music imposes additional constraints onto the lyrics. The training data is limited as most songs are copyrighted, resulting in models that underfit the complicated cross-modal relationship between melody and lyrics. In this work, we propose a method for generating high-quality lyrics without training on any aligned melody-lyric data. Specifically, we design a hierarchical lyric generation framework that first generates a song outline and second the complete lyrics. The framework enables disentanglement of training (based purely on text) from inference (melody-guided text generation) to circumvent the shortage of parallel data. We leverage the segmentation and rhythm alignment between melody and lyrics to compile the given melody into decoding constraints as guidance during inference. The two-step hierarchical design also enables content control via the lyric outline, a much-desired feature for democratizing collaborative song creation. Experimental results show that our model can generate high-quality lyrics that are more on-topic, singable, intelligible, and coherent than strong baselines, for example SongMASS, a SOTA model trained on a parallel dataset, with a 24% relative overall quality improvement based on human ratings. Our code is available at <url>https://github.com/amazon-science/unsupervised-melody-to-lyrics-generation</url>.</abstract>
@@ -7468,7 +7468,7 @@
       <author><first>Aditya</first><last>Yedetore</last><affiliation>Boston University</affiliation></author>
       <author><first>Tal</first><last>Linzen</last><affiliation>New York University</affiliation></author>
       <author><first>Robert</first><last>Frank</last><affiliation>Yale University</affiliation></author>
-      <author><first>R. Thomas</first><last>McCoy</last><affiliation>Princeton University</affiliation></author>
+      <author id="r-thomas-mccoy"><first>R. Thomas</first><last>McCoy</last><affiliation>Princeton University</affiliation></author>
       <pages>9370-9393</pages>
       <abstract>When acquiring syntax, children consistently choose hierarchical rules over competing non-hierarchical possibilities. Is this preference due to a learning bias for hierarchical structure, or due to more general biases that interact with hierarchical cues in children’s linguistic input? We explore these possibilities by training LSTMs and Transformers - two types of neural networks without a hierarchical bias - on data similar in quantity and content to children’s linguistic input: text from the CHILDES corpus. We then evaluate what these models have learned about English yes/no questions, a phenomenon for which hierarchical structure is crucial. We find that, though they perform well at capturing the surface statistics of child-directed speech (as measured by perplexity), both model types generalize in a way more consistent with an incorrect linear rule than the correct hierarchical rule. These results suggest that human-like generalization from text alone requires stronger biases than the general sequence-processing biases of standard neural network architectures.</abstract>
       <url hash="e74da490">2023.acl-long.521</url>
@@ -7524,14 +7524,14 @@
     <paper id="525">
       <title>Open Set Relation Extraction via Unknown-Aware Training</title>
       <author><first>Jun</first><last>Zhao</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xin</first><last>Zhao</last><affiliation>Fudan University</affiliation></author>
+      <author id="wayne-xin-zhao"><first>Xin</first><last>Zhao</last><affiliation>Fudan University</affiliation></author>
       <author><first>WenYu</first><last>Zhan</last><affiliation>FuDan University</affiliation></author>
       <author><first>Qi</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Tao</first><last>Gui</last><affiliation>fudan university</affiliation></author>
       <author><first>Zhongyu</first><last>Wei</last><affiliation>School of Data Science, Fudan University</affiliation></author>
       <author><first>Yun Wen</first><last>Chen</last><affiliation>DataGrand Inc.</affiliation></author>
       <author><first>Xiang</first><last>Gao</last><affiliation>DataGrand Inc.</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>9453-9467</pages>
       <abstract>The existing supervised relation extraction methods have achieved impressive performance in a closed-set setting, in which the relations remain the same during both training and testing. In a more realistic open-set setting, unknown relations may appear in the test set. Due to the lack of supervision signals from unknown relations, a well-performing closed-set relation extractor can still confidently misclassify them into known relations. In this paper, we propose an unknown-aware training method, regularizing the model by dynamically synthesizing negative instances that can provide the missing supervision signals. Inspired by text adversarial attack, We adaptively apply small but critical perturbations to original training data,synthesizing <b>difficult enough</b> negative instances that are mistaken by the model as known relations, thus facilitating a compact decision boundary. Experimental results show that our method achieves SOTA unknown relation detection without compromising the classification of known relations.</abstract>
       <url hash="a882cd8e">2023.acl-long.525</url>
@@ -7545,7 +7545,7 @@
       <author><first>Yushuo</first><last>Chen</last><affiliation>RENMIN UNIVERSITY of CHINA</affiliation></author>
       <author><first>Yifan</first><last>Du</last><affiliation>Gaoling School of Artificial Intelligence, Renmin University of China</affiliation></author>
       <author><first>Junyi</first><last>Li</last><affiliation>Gaoling School of Artificial Intelligence, Renmin University of China</affiliation></author>
-      <author><first>Wayne Xin</first><last>Zhao</last><affiliation>RUC</affiliation></author>
+      <author id="wayne-xin-zhao"><first>Wayne Xin</first><last>Zhao</last><affiliation>RUC</affiliation></author>
       <author><first>Ji-Rong</first><last>Wen</last><affiliation>Renmin University of China</affiliation></author>
       <pages>9468-9481</pages>
       <abstract>People often imagine relevant scenes to aid in the writing process. In this work, we aim to utilize visual information for composition in the same manner as humans. We propose a method, LIVE, that makes pre-trained language models (PLMs) Learn to Imagine for Visually-augmented natural language gEneration. First, we imagine the scene based on the text: we use a diffusion model to synthesize high-quality images conditioned on the input texts. Second, we use CLIP to determine whether the text can evoke the imagination in a posterior way. Finally, our imagination is dynamic, and we conduct synthesis for each sentence rather than generate only one image for an entire paragraph. Technically, we propose a novel plug-and-play fusion layer to obtain visually-augmented representations for each text. Our vision-text fusion layer is compatible with Transformer-based architecture. We have conducted extensive experiments on four generation tasks using BART and T5, and the automatic results and human evaluation demonstrate the effectiveness of our proposed method. We will release the code, model, and data at the link: <url>https://github.com/RUCAIBox/LIVE</url>.</abstract>
@@ -7588,9 +7588,9 @@
       <title>The Best of Both Worlds: Combining Human and Machine Translations for Multilingual Semantic Parsing with Active Learning</title>
       <author><first>Zhuang</first><last>Li</last><affiliation>Monash University</affiliation></author>
       <author><first>Lizhen</first><last>Qu</last><affiliation>Monash University</affiliation></author>
-      <author><first>Philip</first><last>Cohen</last><affiliation>Openstream, Inc.</affiliation></author>
+      <author id="philip-r-cohen"><first>Philip</first><last>Cohen</last><affiliation>Openstream, Inc.</affiliation></author>
       <author><first>Raj</first><last>Tumuluri</last><affiliation>Openstream.ai</affiliation></author>
-      <author><first>Gholamreza</first><last>Haffari</last><affiliation>Monash University</affiliation></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last><affiliation>Monash University</affiliation></author>
       <pages>9511-9528</pages>
       <abstract>Multilingual semantic parsing aims to leverage the knowledge from the high-resource languages to improve low-resource semantic parsing, yet commonly suffers from the data imbalance problem. Prior works propose to utilize the translations by either humans or machines to alleviate such issues. However, human translations are expensive, while machine translations are cheap but prone to error and bias. In this work, we propose an active learning approach that exploits the strengths of both human and machine translations by iteratively adding small batches of human translations into the machine-translated training set. Besides, we propose novel aggregated acquisition criteria that help our active learning method select utterances to be manually translated. Our experiments demonstrate that an ideal utterance selection can significantly reduce the error and bias in the translated data, resulting in higher parser accuracies than the parsers merely trained on the machine-translated data.</abstract>
       <url hash="4cb32b7c">2023.acl-long.529</url>
@@ -7626,7 +7626,7 @@
       <title>Document-Level Event Argument Extraction With a Chain Reasoning Paradigm</title>
       <author><first>Jian</first><last>Liu</last><affiliation>Beijing Jiaotong University</affiliation></author>
       <author><first>Chen</first><last>Liang</last><affiliation>Beijing Jiaotong University</affiliation></author>
-      <author><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
       <author><first>Haoyan</first><last>Liu</last><affiliation>Beihang University</affiliation></author>
       <author><first>Zhe</first><last>Zhao</last><affiliation>Tencent</affiliation></author>
       <pages>9570-9583</pages>
@@ -7794,7 +7794,7 @@
       <author><first>Lucas</first><last>Torroba Hennigen</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <author><first>Tiago</first><last>Pimentel</last><affiliation>University of Cambridge</affiliation></author>
       <author><first>Clara</first><last>Meister</last><affiliation>ETH Zurich</affiliation></author>
-      <author><first>Jason</first><last>Eisner</last><affiliation>Johns Hopkins University + Microsoft Corporation</affiliation></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last><affiliation>Johns Hopkins University + Microsoft Corporation</affiliation></author>
       <author><first>Ryan</first><last>Cotterell</last><affiliation>ETH Zürich</affiliation></author>
       <pages>9744-9770</pages>
       <abstract>Language modeling, a central task in natural language processing, involves estimating a probability distribution over strings. In most cases, the estimated distribution sums to 1 over all finite strings. However, in some pathological cases, probability mass can “leak” onto the set of infinite sequences. In order to characterize the notion of leakage more precisely, this paper offers a measure-theoretic treatment of language modeling. We prove that many popular language model families are in fact tight, meaning that they will not leak in this sense. We also generalize characterizations of tightness proposed in previous works.</abstract>
@@ -7879,13 +7879,13 @@
     </paper>
     <paper id="549">
       <title>Automated Metrics for Medical Multi-Document Summarization Disagree with Human Evaluations</title>
-      <author><first>Lucy Lu</first><last>Wang</last><affiliation>University of Washington</affiliation></author>
-      <author><first>Yulia</first><last>Otmakhova</last><affiliation>University of Melbourne</affiliation></author>
+      <author id="lucy-lu-wang"><first>Lucy Lu</first><last>Wang</last><affiliation>University of Washington</affiliation></author>
+      <author id="julia-otmakhova"><first>Yulia</first><last>Otmakhova</last><affiliation>University of Melbourne</affiliation></author>
       <author><first>Jay</first><last>DeYoung</last><affiliation>Northeastern University</affiliation></author>
       <author><first>Thinh Hung</first><last>Truong</last><affiliation>The University of Melbourne</affiliation></author>
       <author><first>Bailey</first><last>Kuehl</last><affiliation>Allen Institute for AI</affiliation></author>
       <author><first>Erin</first><last>Bransom</last><affiliation>Allen Institute for AI</affiliation></author>
-      <author><first>Byron</first><last>Wallace</last><affiliation>Northeastern University</affiliation></author>
+      <author id="byron-c-wallace"><first>Byron</first><last>Wallace</last><affiliation>Northeastern University</affiliation></author>
       <pages>9871-9889</pages>
       <abstract>Evaluating multi-document summarization (MDS) quality is difficult. This is especially true in the case of MDS for biomedical literature reviews, where models must synthesize contradicting evidence reported across different documents. Prior work has shown that rather than performing the task, models may exploit shortcuts that are difficult to detect using standard n-gram similarity metrics such as ROUGE. Better automated evaluation metrics are needed, but few resources exist to assess metrics when they are proposed. Therefore, we introduce a dataset of human-assessed summary quality facets and pairwise preferences to encourage and support the development of better automated evaluation methods for literature review MDS. We take advantage of community submissions to the Multi-document Summarization for Literature Review (MSLR) shared task to compile a diverse and representative sample of generated summaries. We analyze how automated summarization evaluation metrics correlate with lexical features of generated summaries, to other automated metrics including several we propose in this work, and to aspects of human-assessed summary quality. We find that not only do automated metrics fail to capture aspects of quality as assessed by humans, in many cases the system rankings produced by these metrics are anti-correlated with rankings according to human annotators.</abstract>
       <url hash="4738e41b">2023.acl-long.549</url>
@@ -7941,7 +7941,7 @@
       <author><first>Junkai</first><last>Zhou</last><affiliation>Institute of Computing Technology, Chinese Academy of Sciences</affiliation></author>
       <author><first>Liang</first><last>Pang</last><affiliation>Institute of Computing Technology of Chinese Academy of Sciences</affiliation></author>
       <author><first>Huawei</first><last>Shen</last><affiliation>Institute of Computing Technology, Chinese Academy of Sciences</affiliation></author>
-      <author><first>Xueqi</first><last>Cheng</last><affiliation>Institute of Computing Technology, CAS</affiliation></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last><affiliation>Institute of Computing Technology, CAS</affiliation></author>
       <pages>9945-9959</pages>
       <abstract>Language models trained on large-scale corpora can generate remarkably fluent results in open-domain dialogue. However, for the persona-based dialogue generation task, consistency and coherence are also key factors, which are great challenges for language models. Existing works mainly focus on valuable data filtering, model structure modifying, or objective function designing, while their improvements are limited and hard to generalize to all types of pre-trained language models. However, we find that language models can produce consistent and coherent responses if we consider enough generations. Thus, the problems lay in large-scale response generation and target response selection. In this work, a simple but effective two-stage SimOAP strategy is proposed, i.e., over-sampling and post-evaluation. The over-sampling stage takes large-scale responses from existing trained models efficiently via off-the-shelf distilling and compressing methods, and the post-evaluation stage selects a good response based on multiple well-designed evaluation metrics from large-scale candidates. Experimental results show that the proposed plug-in SimOAP strategy improves the backbone models and outperforms the baseline strategies in both automatic and human evaluations.</abstract>
       <url hash="b910c78b">2023.acl-long.553</url>
@@ -8139,7 +8139,7 @@
       <author><first>Shane</first><last>Storks</last><affiliation>University of Michigan</affiliation></author>
       <author><first>Keunwoo</first><last>Yu</last><affiliation>University Of Michigan</affiliation></author>
       <author><first>Ziqiao</first><last>Ma</last><affiliation>University of Michigan</affiliation></author>
-      <author><first>Joyce</first><last>Chai</last><affiliation>University of Michigan</affiliation></author>
+      <author id="joyce-chai"><first>Joyce</first><last>Chai</last><affiliation>University of Michigan</affiliation></author>
       <pages>10199-10219</pages>
       <abstract>As natural language processing (NLP) has recently seen an unprecedented level of excitement, and more people are eager to enter the field, it is unclear whether current research reproducibility efforts are sufficient for this group of beginners to apply the latest developments. To understand their needs, we conducted a study with 93 students in an introductory NLP course, where students reproduced the results of recent NLP papers. Surprisingly, we find that their programming skill and comprehension of research papers have a limited impact on their effort spent completing the exercise. Instead, we find accessibility efforts by research authors to be the key to success, including complete documentation, better coding practice, and easier access to data files. Going forward, we recommend that NLP researchers pay close attention to these simple aspects of open-sourcing their work, and use insights from beginners’ feedback to provide actionable ideas on how to better support them.</abstract>
       <url hash="c5466adb">2023.acl-long.568</url>
@@ -8194,8 +8194,8 @@
       <author><first>Shujian</first><last>Huang</last><affiliation>National Key Laboratory for Novel Software Technology, Nanjing University</affiliation></author>
       <author><first>Wei</first><last>Zou</last><affiliation>Nanjing University</affiliation></author>
       <author><first>Jianbing</first><last>Zhang</last><affiliation>Nanjing University</affiliation></author>
-      <author><first>Xinyu</first><last>Dai</last><affiliation>National Key Laboratory for Novel Software Technology, Nanjing University</affiliation></author>
-      <author><first>Jiajun</first><last>Chen</last><affiliation>Nanjing University</affiliation></author>
+      <author id="xinyu-dai"><first>Xinyu</first><last>Dai</last><affiliation>National Key Laboratory for Novel Software Technology, Nanjing University</affiliation></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last><affiliation>Nanjing University</affiliation></author>
       <pages>10270-10287</pages>
       <abstract>In recent years, deep neural networks (DNNs) have achieved state-of-the-art performance on a wide range of tasks. However, limitations in interpretability have hindered their applications in the real world. This work proposes to interpret neural networks by linear decomposition and finds that the ReLU-activated Transformer can be considered as a linear model on a single input. We further leverage the linearity of the model and propose a linear decomposition of the model output to generate local explanations. Our evaluation of sentiment classification and machine translation shows that our method achieves competitive performance in efficiency and fidelity of explanation. In addition, we demonstrate the potential of our approach in applications with examples of error analysis on multiple tasks.</abstract>
       <url hash="172899b8">2023.acl-long.572</url>
@@ -8344,7 +8344,7 @@
       <author><first>Arnav</first><last>Mhaske</last><affiliation>Indian Institute of Technology, Madras</affiliation></author>
       <author><first>Harshit</first><last>Kedia</last><affiliation>Indian Institute of Technology Madras</affiliation></author>
       <author><first>Sumanth</first><last>Doddapaneni</last><affiliation>Indian Institute of Technology Madras</affiliation></author>
-      <author><first>Mitesh M.</first><last>Khapra</last><affiliation>Indian Institute of Technology Madras</affiliation></author>
+      <author id="mitesh-m-khapra"><first>Mitesh M.</first><last>Khapra</last><affiliation>Indian Institute of Technology Madras</affiliation></author>
       <author><first>Pratyush</first><last>Kumar</last><affiliation>IIT Madras</affiliation></author>
       <author><first>Rudra</first><last>Murthy</last><affiliation>IBM India Research Limited</affiliation></author>
       <author><first>Anoop</first><last>Kunchukuttan</last><affiliation>Microsoft AI and Research</affiliation></author>
@@ -8359,7 +8359,7 @@
       <title><fixed-case>CREPE</fixed-case>: Open-Domain Question Answering with False Presuppositions</title>
       <author><first>Xinyan</first><last>Yu</last><affiliation>University of Washington</affiliation></author>
       <author><first>Sewon</first><last>Min</last><affiliation>University of Washington</affiliation></author>
-      <author><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington; Meta</affiliation></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington; Meta</affiliation></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last><affiliation>University of Washington</affiliation></author>
       <pages>10457-10480</pages>
       <abstract>When asking about unfamiliar topics, information seeking users often pose questions with false presuppositions. Most existing question answering (QA) datasets, in contrast, assume all questions have well defined answers. We introduce CREPE, a QA dataset containing a natural distribution of presupposition failures from online information-seeking forums. We find that 25% of questions contain false presuppositions, and provide annotations for these presuppositions and their corrections. Through extensive baseline experiments, we show that adaptations of existing open-domain QA models can find presuppositions moderately well, but struggle when predicting whether a presupposition is factually correct. This is in large part due to difficulty in retrieving relevant evidence passages from a large text corpus. CREPE provides a benchmark to study question answering in the wild, and our analyses provide avenues for future work in better modeling and further studying the task.</abstract>
@@ -8403,7 +8403,7 @@
       <title>Multilingual Knowledge Graph Completion with Language-Sensitive Multi-Graph Attention</title>
       <author><first>Rongchuan</first><last>Tang</last><affiliation>Institute of Automation, Chinese Academy of Sciences</affiliation></author>
       <author><first>Yang</first><last>Zhao</last><affiliation>Institute of Automation, Chinese Academy of Sciences</affiliation></author>
-      <author><first>Chengqing</first><last>Zong</last><affiliation>Institute of Automation, Chinese Academy of Sciences</affiliation></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last><affiliation>Institute of Automation, Chinese Academy of Sciences</affiliation></author>
       <author><first>Yu</first><last>Zhou</last><affiliation>CASIA</affiliation></author>
       <pages>10508-10519</pages>
       <abstract>Multilingual Knowledge Graph Completion (KGC) aims to predict missing links with multilingual knowledge graphs. However, existing approaches suffer from two main drawbacks: (a) alignment dependency: the multilingual KGC is always realized with joint entity or relation alignment, which introduces additional alignment models and increases the complexity of the whole framework; (b) training inefficiency: the trained model will only be used for the completion of one target KG, although the data from all KGs are used simultaneously. To address these drawbacks, we propose a novel multilingual KGC framework with language-sensitive multi-graph attention such that the missing links on all given KGs can be inferred by a universal knowledge completion model. Specifically, we first build a relational graph neural network by sharing the embeddings of aligned nodes to transfer language-independent knowledge. Meanwhile, a language-sensitive multi-graph attention (LSMGA) is proposed to deal with the information inconsistency among different KGs. Experimental results show that our model achieves significant improvements on the DBP-5L and E-PKG datasets.</abstract>
@@ -8423,7 +8423,7 @@
       <author><first>Budhaditya</first><last>Deb</last><affiliation>Microsoft Corporation</affiliation></author>
       <author><first>Yuan-Jyue</first><last>Chen</last><affiliation>Microsoft</affiliation></author>
       <author><first>Tristan</first><last>Naumann</last><affiliation>Microsoft Research</affiliation></author>
-      <author><first>Noémie</first><last>Elhadad</last><affiliation>Columbia University</affiliation></author>
+      <author id="noemie-elhadad"><first>Noémie</first><last>Elhadad</last><affiliation>Columbia University</affiliation></author>
       <pages>10520-10542</pages>
       <abstract>Summarization models often generate text that is poorly calibrated to quality metrics because they are trained to maximize the likelihood of a single reference (MLE). To address this, recent work has added a calibration step, which exposes a model to its own ranked outputs to improve relevance or, in a separate line of work, contrasts positive and negative sets to improve faithfulness. While effective, much of this work has focused on <i>how</i> to generate and optimize these sets. Less is known about <i>why</i> one setup is more effective than another. In this work, we uncover the underlying characteristics of effective sets. For each training instance, we form a large, diverse pool of candidates and systematically vary the subsets used for calibration fine-tuning. Each selection strategy targets distinct aspects of the sets, such as lexical diversity or the size of the gap between positive and negatives. On three diverse scientific long-form summarization datasets (spanning biomedical, clinical, and chemical domains), we find, among others, that faithfulness calibration is optimal when the negative sets are extractive and more likely to be generated, whereas for relevance calibration, the metric margin between candidates should be maximized and surprise–the disagreement between model and metric defined candidate rankings–minimized.</abstract>
       <url hash="b3bde37e">2023.acl-long.587</url>
@@ -8548,7 +8548,7 @@
       <author><first>Philippe</first><last>Laban</last><affiliation>Salesforce Research</affiliation></author>
       <author><first>Jesse</first><last>Vig</last><affiliation>Salesforce Research</affiliation></author>
       <author><first>Wojciech</first><last>Kryscinski</last><affiliation>Salesforce Research</affiliation></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University; Salesforce AI Research</affiliation></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University; Salesforce AI Research</affiliation></author>
       <author><first>Caiming</first><last>Xiong</last><affiliation>Salesforce</affiliation></author>
       <author><first>Chien-Sheng</first><last>Wu</last><affiliation>Salesforce</affiliation></author>
       <pages>10674-10695</pages>
@@ -8643,13 +8643,13 @@
     </paper>
     <paper id="603">
       <title>Modeling What-to-ask and How-to-ask for Answer-unaware Conversational Question Generation</title>
-      <author><first>Xuan Long</first><last>Do</last><affiliation>Nanyang Technological University</affiliation></author>
+      <author id="xuan-long-do"><first>Xuan Long</first><last>Do</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Bowei</first><last>Zou</last><affiliation>Institute for Infocomm Research</affiliation></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University; Salesforce AI Research</affiliation></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University; Salesforce AI Research</affiliation></author>
       <author><first>Tran</first><last>Tai</last><affiliation>Mr</affiliation></author>
       <author><first>Liangming</first><last>Pan</last><affiliation>University of California, Santa Barbara (UCSB)</affiliation></author>
-      <author><first>Nancy</first><last>Chen</last><affiliation>Institute for Infocomm Research, A*STAR</affiliation></author>
-      <author><first>Ai Ti</first><last>Aw</last><affiliation>Institute for Infocomm Research</affiliation></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last><affiliation>Institute for Infocomm Research, A*STAR</affiliation></author>
+      <author id="aiti-aw"><first>Ai Ti</first><last>Aw</last><affiliation>Institute for Infocomm Research</affiliation></author>
       <pages>10785-10803</pages>
       <abstract>Conversational Question Generation (CQG) is a critical task for machines to assist humans in fulfilling their information needs through conversations. The task is generally cast into two different settings: answer-aware and answer-unaware. While the former facilitates the models by exposing the expected answer, the latter is more realistic and receiving growing attentions recently. What-to-ask and how-to-ask are the two main challenges in the answer-unaware setting. To address the first challenge, existing methods mainly select sequential sentences in context as the rationales. We argue that the conversation generated using such naive heuristics may not be natural enough as in reality, the interlocutors often talk about the relevant contents that are not necessarily sequential in context. Additionally, previous methods decide the type of question to be generated (boolean/span-based) implicitly. Modeling the question type explicitly is crucial as the answer, which hints the models to generate a boolean or span-based question, is unavailable. To this end, we present SG-CQG, a two-stage CQG framework. For the what-to-ask stage, a sentence is selected as the rationale from a semantic graph that we construct, and extract the answer span from it. For the how-to-ask stage, a classifier determines the target answer type of the question via two explicit control signals before generating and filtering. In addition, we propose Conv-Distinct, a novel evaluation metric for CQG, to evaluate the diversity of the generated conversation from a context. Compared with the existing answer-unaware CQG models, the proposed SG-CQG achieves state-of-the-art performance.</abstract>
       <url hash="c7f78062">2023.acl-long.603</url>
@@ -8729,10 +8729,10 @@
     </paper>
     <paper id="609">
       <title><fixed-case>M</fixed-case>asakha<fixed-case>POS</fixed-case>: Part-of-Speech Tagging for Typologically Diverse <fixed-case>A</fixed-case>frican languages</title>
-      <author><first>Cheikh M. Bamba</first><last>Dione</last><affiliation>Gaston Berger University</affiliation></author>
-      <author><first>David Ifeoluwa</first><last>Adelani</last><affiliation>University College London</affiliation></author>
+      <author id="cheikh-m-bamba-dione"><first>Cheikh M. Bamba</first><last>Dione</last><affiliation>Gaston Berger University</affiliation></author>
+      <author id="david-ifeoluwa-adelani"><first>David Ifeoluwa</first><last>Adelani</last><affiliation>University College London</affiliation></author>
       <author><first>Peter</first><last>Nabende</last><affiliation>Makerere University</affiliation></author>
-      <author><first>Jesujoba</first><last>Alabi</last><affiliation>Saarland University</affiliation></author>
+      <author id="jesujoba-alabi"><first>Jesujoba</first><last>Alabi</last><affiliation>Saarland University</affiliation></author>
       <author><first>Thapelo</first><last>Sindane</last><affiliation>University of Pretoria, Masakhane</affiliation></author>
       <author><first>Happy</first><last>Buzaaba</last><affiliation>RIKEN</affiliation></author>
       <author><first>Shamsuddeen Hassan</first><last>Muhammad</last><affiliation>Bayero University, Kano</affiliation></author>
@@ -8788,7 +8788,7 @@
       <author><first>Long</first><last>Bai</last><affiliation>School of Computer Science and Technology, University of Chinese Academy of Sciences; CAS Key Laboratory of Network Data Science and Technology, Institute of Computing Technology, Chinese Academy of Sciences</affiliation></author>
       <author><first>Saiping</first><last>Guan</last><affiliation>School of Computer Science and Technology, University of Chinese Academy of Sciences; CAS Key Laboratory of Network Data Science and Technology, Institute of Computing Technology, Chinese Academy of Sciences</affiliation></author>
       <author><first>Jiafeng</first><last>Guo</last><affiliation>Institute of Computing Technology, CAS</affiliation></author>
-      <author><first>Xueqi</first><last>Cheng</last><affiliation>Institute of Computing Technology, CAS</affiliation></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last><affiliation>Institute of Computing Technology, CAS</affiliation></author>
       <pages>10901-10913</pages>
       <abstract>Event Causality Identification (ECI) aims to identify causal relations between events in unstructured texts. This is a very challenging task, because causal relations are usually expressed by implicit associations between events. Existing methods usually capture such associations by directly modeling the texts with pre-trained language models, which underestimate two kinds of semantic structures vital to the ECI task, namely, event-centric structure and event-associated structure. The former includes important semantic elements related to the events to describe them more precisely, while the latter contains semantic paths between two events to provide possible supports for ECI. In this paper, we study the implicit associations between events by modeling the above explicit semantic structures, and propose a Semantic Structure Integration model (SemSIn).It utilizes a GNN-based event aggregator to integrate the event-centric structure information, and employs an LSTM-based path aggregator to capture the event-associated structure information between two events. Experimental results on three widely used datasets show that SemSIn achieves significant improvements over baseline methods.</abstract>
       <url hash="bb3933c5">2023.acl-long.610</url>
@@ -8864,7 +8864,7 @@
       <author><first>I-Hung</first><last>Hsu</last><affiliation>USC Information Sciences Institute</affiliation></author>
       <author><first>Zhiyu</first><last>Xie</last><affiliation>Tsinghua University</affiliation></author>
       <author><first>Kuan-Hao</first><last>Huang</last><affiliation>University of California, Los Angeles</affiliation></author>
-      <author><first>Prem</first><last>Natarajan</last><affiliation>Amazon / Alexa</affiliation></author>
+      <author id="prem-natarajan"><first>Prem</first><last>Natarajan</last><affiliation>Amazon / Alexa</affiliation></author>
       <author><first>Nanyun</first><last>Peng</last><affiliation>University of California, Los Angeles</affiliation></author>
       <pages>10976-10993</pages>
       <abstract>Event argument extraction (EAE) identifies event arguments and their specific roles for a given event. Recent advancement in generation-based EAE models has shown great performance and generalizability over classification-based models. However, existing generation-based EAE models mostly focus on problem re-formulation and prompt design, without incorporating additional information that has been shown to be effective for classification-based models, such as the abstract meaning representation (AMR) of the input passages. Incorporating such information into generation-based models is challenging due to the heterogeneous nature of the natural language form prevalently used in generation-based models and the structured form of AMRs. In this work, we study strategies to incorporate AMR into generation-based EAE models. We propose AMPERE, which generates AMR-aware prefixes for every layer of the generation model. Thus, the prefix introduces AMR information to the generation-based EAE model and then improves the generation. We also introduce an adjusted copy mechanism to AMPERE to help overcome potential noises brought by the AMR graph. Comprehensive experiments and analyses on ACE2005 and ERE datasets show that AMPERE can get 4% - 10% absolute F1 score improvements with reduced training data and it is in general powerful across different training sizes.</abstract>
@@ -9005,7 +9005,7 @@
       <author><first>Zhengfu</first><last>He</last><affiliation>Fudan University</affiliation></author>
       <author><first>Qin</first><last>Zhu</last><affiliation>Fudan University</affiliation></author>
       <author><first>Xipeng</first><last>Qiu</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>11156-11172</pages>
       <abstract>Prompt tuning is a parameter-efficient approach to adapting pre-trained language models to downstream tasks. Although prompt tuning has been shown to match the performance of full model tuning when training data is sufficient, it tends to struggle in few-shot learning settings. In this paper, we present Multi-task Pre-trained Modular Prompt (MP2) to boost prompt tuning for few-shot learning. MP2 is a set of combinable prompts pre-trained on 38 Chinese tasks. On downstream tasks, the pre-trained prompts are selectively activated and combined, leading to strong compositional generalization to unseen tasks. To bridge the gap between pre-training and fine-tuning, we formulate upstream and downstream tasks into a unified machine reading comprehension task. Extensive experiments under two learning paradigms, i.e., gradient descent and black-box tuning, show that MP2 significantly outperforms prompt tuning, full model tuning, and prior prompt pre-training methods in few-shot settings. In addition, we demonstrate that MP2 can achieve surprisingly fast and strong adaptation to downstream tasks by merely learning 8 parameters to combine the pre-trained modular prompts.</abstract>
       <url hash="f4a30928">2023.acl-long.625</url>
@@ -9020,7 +9020,7 @@
       <author><first>Linlin</first><last>Liu</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Yew Ken</first><last>Chia</last><affiliation>Singapore University of Technology and Design</affiliation></author>
       <author><first>Boyang</first><last>Li</last><affiliation>Nanyang Technological University</affiliation></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University; Salesforce AI Research</affiliation></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University; Salesforce AI Research</affiliation></author>
       <author><first>Lidong</first><last>Bing</last><affiliation>Alibaba DAMO Academy</affiliation></author>
       <pages>11173-11195</pages>
       <abstract>Data annotation is the process of labeling data that could be used to train machine learning models. Having high quality annotation is crucial, as it allows the model to learn the relationship between the input data and the desired output. GPT-3, a large-scale language model developed by OpenAI, has demonstrated im- impressive zero- and few-shot performance on a wide range of NLP tasks. It is therefore natural to wonder whether it can be used to effectively annotate data for NLP tasks. In this paper, we evaluate the performance of GPT-3 as a data annotator by comparing it with traditional data annotation methods and analyzing its output on a range of tasks. Through this analysis, we aim to provide insight into the potential of GPT-3 as a general-purpose data annotator in NLP.</abstract>
@@ -9089,7 +9089,7 @@
       <author><first>Akshita</first><last>Bhagia</last><affiliation>Allen Institute for AI</affiliation></author>
       <author><first>Yizhong</first><last>Wang</last><affiliation>University of Washington</affiliation></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last><affiliation>University of Washington</affiliation></author>
-      <author><first>Matthew</first><last>Peters</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
+      <author id="matthew-e-peters"><first>Matthew</first><last>Peters</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <pages>11272-11288</pages>
       <abstract>Recent NLP models have shown the remarkable ability to effectively generalise ‘zero-shot’ to new tasks using only natural language instructions as guidance. However, many of these approaches suffer from high computational costs due to their reliance on concatenating lengthy instructions with every input example, resulting in costly reprocessing of the instruction. To avoid this, we introduce Hypernetworks for INstruction Tuning (HINT), which convert task instructions and examples into parameter-efficient modules inserted into an underlying model using a pretrained text encoder, eliminating the need to include instructions in the model input. The hypernetwork in HINT also produces an encoded instruction, which we concatenate with encoded inputs during decoding to further improve performance. HINT models outperform strong state-of-the-art baselines by over 10% when controlling for compute (measured in FLOPs). By converting instructions into modules, HINT models can effectively disregard the length of instructions and few-shot example inputs in terms of compute usage. As a result, HINT can enhance its performance by up to 25% by incorporating additional few-shot data, while utilizing only up to 5% more compute. This combines the strengths of parameter-efficient fine-tuning and in-context learning.</abstract>
       <url hash="395b41a9">2023.acl-long.631</url>
@@ -9256,7 +9256,7 @@
       <title>Contrastive Error Attribution for Finetuned Language Models</title>
       <author><first>Faisal</first><last>Ladhak</last><affiliation>Columbia University</affiliation></author>
       <author><first>Esin</first><last>Durmus</last><affiliation>Stanford University</affiliation></author>
-      <author><first>Tatsunori</first><last>Hashimoto</last><affiliation>Stanford</affiliation></author>
+      <author id="tatsunori-b-hashimoto"><first>Tatsunori</first><last>Hashimoto</last><affiliation>Stanford</affiliation></author>
       <pages>11482-11498</pages>
       <abstract>Recent work has identified noisy and misannotated data as a core cause of hallucinations and unfaithful outputs in Natural Language Generation (NLG) tasks. Consequently, identifying and removing these examples is a key open challenge in creating reliable NLG systems. In this work, we introduce a framework to identify and remove low-quality training instances that lead to undesirable outputs, such as faithfulness errors in text summarization. We show that existing approaches for error tracing, such as gradient-based influence measures, do not perform reliably for detecting faithfulness errors in NLG datasets. We overcome the drawbacks of existing error tracing methods through a new, contrast-based estimate that compares undesired generations to human-corrected outputs. Our proposed method can achieve a mean average precision of 0.93 at detecting known data errors across synthetic tasks with known ground truth, substantially outperforming existing approaches. Using this approach and re-training models on cleaned data leads to a 70% reduction in entity hallucinations on the NYT dataset and a 55% reduction in semantic errors on the E2E dataset.</abstract>
       <url hash="6028baef">2023.acl-long.643</url>
@@ -9400,16 +9400,16 @@
       <author><first>Hailey</first><last>Schoelkopf</last><affiliation>EleutherAI</affiliation></author>
       <author><first>Niklas</first><last>Muennighoff</last><affiliation>Hugging Face</affiliation></author>
       <author><first>Alham Fikri</first><last>Aji</last><affiliation>MBZUAI</affiliation></author>
-      <author><first>David Ifeoluwa</first><last>Adelani</last><affiliation>University College London</affiliation></author>
+      <author id="david-ifeoluwa-adelani"><first>David Ifeoluwa</first><last>Adelani</last><affiliation>University College London</affiliation></author>
       <author><first>Khalid</first><last>Almubarak</last><affiliation>Prince Sattam bin Abdulaziz University</affiliation></author>
       <author><first>M Saiful</first><last>Bari</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Lintang</first><last>Sutawika</last><affiliation>Datasaur.ai</affiliation></author>
       <author><first>Jungo</first><last>Kasai</last><affiliation>University of Washington</affiliation></author>
       <author><first>Ahmed</first><last>Baruwa</last><affiliation>University of Oregon</affiliation></author>
-      <author><first>Genta</first><last>Winata</last><affiliation>Bloomberg</affiliation></author>
+      <author id="genta-indra-winata"><first>Genta</first><last>Winata</last><affiliation>Bloomberg</affiliation></author>
       <author><first>Stella</first><last>Biderman</last><affiliation>EleutherAI</affiliation></author>
       <author><first>Edward</first><last>Raff</last><affiliation>Booz Allen Hamilton</affiliation></author>
-      <author><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
       <author><first>Vassilina</first><last>Nikoulina</last><affiliation>Naver Labs Europe</affiliation></author>
       <pages>11682-11703</pages>
       <abstract>The BLOOM model is a large publicly available multilingual language model, but its pretraining was limited to 46 languages. To extend the benefits of BLOOM to other languages without incurring prohibitively large costs, it is desirable to adapt BLOOM to new languages not seen during pretraining. In this work, we apply existing language adaptation strategies to BLOOM and benchmark its zero-shot prompting performance on eight new languages in a resource-constrained setting. We find language adaptation to be effective at improving zero-shot performance in new languages. Surprisingly, we find that adapter-based finetuning is more effective than continued pretraining for large models. In addition, we discover that prompting performance is not significantly affected by language specifics, such as the writing system. It is primarily determined by the size of the language adaptation data. We also add new languages to BLOOMZ, which is a multitask finetuned version of BLOOM capable of following task instructions zero-shot. We find including a new language in the multitask fine-tuning mixture to be the most effective method to teach BLOOMZ a new language. We conclude that with sufficient training data language adaptation can generalize well to diverse languages. Our code is available at <url>https://github.com/bigscience-workshop/multilingual-modeling</url>.</abstract>
@@ -9489,7 +9489,7 @@
     <paper id="659">
       <title>Learning to Initialize: Can Meta Learning Improve Cross-task Generalization in Prompt Tuning?</title>
       <author><first>Chengwei</first><last>Qin</last><affiliation>Nanyang Technological University</affiliation></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University; Salesforce AI Research</affiliation></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University; Salesforce AI Research</affiliation></author>
       <author><first>Qian</first><last>Li</last><affiliation>Northeastern University</affiliation></author>
       <author><first>Ruochen</first><last>Zhao</last><affiliation>Nanyang Technological University</affiliation></author>
       <pages>11802-11832</pages>
@@ -9519,7 +9519,7 @@
       <author><first>Fitsum</first><last>Gaim</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
       <author><first>Wonsuk</first><last>Yang</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
       <author><first>Hancheol</first><last>Park</last><affiliation>School of Computing, KAIST</affiliation></author>
-      <author><first>Jong</first><last>Park</last><affiliation>KAIST</affiliation></author>
+      <author id="jong-c-park"><first>Jong</first><last>Park</last><affiliation>KAIST</affiliation></author>
       <pages>11857-11870</pages>
       <abstract>Question-Answering (QA) has seen significant advances recently, achieving near human-level performance over some benchmarks. However, these advances focus on high-resourced languages such as English, while the task remains unexplored for most other languages, mainly due to the lack of annotated datasets. This work presents a native QA dataset for an East African language, Tigrinya. The dataset contains 10.6K question-answer pairs spanning 572 paragraphs extracted from 290 news articles on various topics. The dataset construction method is discussed, which is applicable to constructing similar resources for related languages. We present comprehensive experiments and analyses of several resource-efficient approaches to QA, including monolingual, cross-lingual, and multilingual setups, along with comparisons against machine-translated silver data. Our strong baseline models reach 76% in the F1 score, while the estimated human performance is 92%, indicating that the benchmark presents a good challenge for future work. We make the dataset, models, and leaderboard publicly available.</abstract>
       <url hash="73bc8885">2023.acl-long.661</url>
@@ -9532,7 +9532,7 @@
       <title><fixed-case>ESCOXLM</fixed-case>-<fixed-case>R</fixed-case>: Multilingual Taxonomy-driven Pre-training for the Job Market Domain</title>
       <author><first>Mike</first><last>Zhang</last><affiliation>IT University of Copenhagen</affiliation></author>
       <author><first>Rob</first><last>van der Goot</last><affiliation>IT University of Copenhagen</affiliation></author>
-      <author><first>Barbara</first><last>Plank</last><affiliation>LMU Munich</affiliation></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last><affiliation>LMU Munich</affiliation></author>
       <pages>11871-11890</pages>
       <abstract>The increasing number of benchmarks for Natural Language Processing (NLP) tasks in the computational job market domain highlights the demand for methods that can handle job-related tasks such as skill extraction, skill classification, job title classification, and de-identification. While some approaches have been developed that are specific to the job market domain, there is a lack of generalized, multilingual models and benchmarks for these tasks. In this study, we introduce a language model called ESCOXLM-R, based on XLM-R-large, which uses domain-adaptive pre-training on the European Skills, Competences, Qualifications and Occupations (ESCO) taxonomy, covering 27 languages. The pre-training objectives for ESCOXLM-R include dynamic masked language modeling and a novel additional objective for inducing multilingual taxonomical ESCO relations. We comprehensively evaluate the performance of ESCOXLM-R on 6 sequence labeling and 3 classification tasks in 4 languages and find that it achieves state-of-the-art results on 6 out of 9 datasets. Our analysis reveals that ESCOXLM-R performs better on short spans and outperforms XLM-R-large on entity-level and surface-level span-F1, likely due to ESCO containing short skill and occupation titles, and encoding information on the entity-level.</abstract>
       <url hash="44f77bae">2023.acl-long.662</url>
@@ -9548,7 +9548,7 @@
       <author><first>Asish</first><last>Ghoshal</last><affiliation>Facebook AI</affiliation></author>
       <author><first>Jimmy</first><last>Lin</last><affiliation>University of Waterloo</affiliation></author>
       <author><first>Yashar</first><last>Mehdad</last><affiliation>Facebook AI</affiliation></author>
-      <author><first>Wen-tau</first><last>Yih</last><affiliation>Meta AI - FAIR</affiliation></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last><affiliation>Meta AI - FAIR</affiliation></author>
       <author><first>Xilun</first><last>Chen</last><affiliation>Meta AI</affiliation></author>
       <pages>11891-11907</pages>
       <abstract>Multi-vector retrieval methods combine the merits of sparse (e.g. BM25) and dense (e.g. DPR) retrievers and have achieved state-of-the-art performance on various retrieval tasks. These methods, however, are orders of magnitude slower and need much more space to store their indices compared to their single-vector counterparts. In this paper, we unify different multi-vector retrieval models from a token routing viewpoint and propose conditional token interaction via dynamic lexical routing, namely CITADEL, for efficient and effective multi-vector retrieval.CITADEL learns to route different token vectors to the predicted lexical keys such that a query token vector only interacts with document token vectors routed to the same key. This design significantly reduces the computation cost while maintaining high accuracy. Notably, CITADEL achieves the same or slightly better performance than the previous state of the art, ColBERT-v2, on both in-domain (MS MARCO) and out-of-domain (BEIR) evaluations, while being nearly 40 times faster. Source code and data are available at <url>https://github.com/facebookresearch/dpr-scale/tree/citadel</url>.</abstract>
@@ -9580,7 +9580,7 @@
       <author><first>Xiaoran</first><last>Liu</last><affiliation>Stony Brook University</affiliation></author>
       <author><first>Jonah</first><last>Luby</last><affiliation>Stony Brook University</affiliation></author>
       <author><first>Christian</first><last>Luhmann</last><affiliation>Stony Brook University</affiliation></author>
-      <author><first>H. Andrew</first><last>Schwartz</last><affiliation>Stony Brook University</affiliation></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last><affiliation>Stony Brook University</affiliation></author>
       <pages>11923-11936</pages>
       <abstract>While transformer-based systems have enabled greater accuracies with fewer training examples, data acquisition obstacles still persist for rare-class tasks – when the class label is very infrequent (e.g. &lt; 5% of samples). Active learning has in general been proposed to alleviate such challenges, but choice of selection strategy, the criteria by which rare-class examples are chosen, has not been systematically evaluated. Further, transformers enable iterative transfer-learning approaches. We propose and investigate transfer- and active learning solutions to the rare class problem of dissonance detection through utilizing models trained on closely related tasks and the evaluation of acquisition strategies, including a proposed probability-of-rare-class (PRC) approach. We perform these experiments for a specific rare-class problem: collecting language samples of cognitive dissonance from social media. We find that PRC is a simple and effective strategy to guide annotations and ultimately improve model accuracy while transfer-learning in a specific order can improve the cold-start performance of the learner but does not benefit iterations of active learning.</abstract>
       <url hash="bd022a18">2023.acl-long.665</url>
@@ -9594,7 +9594,7 @@
       <author><first>Qi</first><last>Jia</last><affiliation>Shanghai Jiao Tong University</affiliation></author>
       <author><first>Yizhu</first><last>Liu</last><affiliation>Meituan</affiliation></author>
       <author><first>Haifeng</first><last>Tang</last><affiliation>China Merchants Bank Credit Card Center</affiliation></author>
-      <author><first>Kenny</first><last>Zhu</last><affiliation>Shanghai Jiao Tong University</affiliation></author>
+      <author id="kenny-zhu"><first>Kenny</first><last>Zhu</last><affiliation>Shanghai Jiao Tong University</affiliation></author>
       <pages>11937-11950</pages>
       <abstract>Curriculum learning has shown promising improvements in multiple domains by training machine learning models from easy samples to hard ones. Previous works which either design rules or train models for scoring the difficulty highly rely on task-specific expertise, and cannot generalize. Inspired by the “easy-to-hard” intuition, we propose to do in-sample curriculum learning for natural language generation tasks. Our learning strategy starts training the model to generate the last few words, i.e., do sequence completion, and gradually extends to generate the whole output sequence. Comprehensive experiments show that it generalizes well to different tasks and achieves significant improvements over strong baselines.</abstract>
       <url hash="a90dfccd">2023.acl-long.666</url>
@@ -9646,7 +9646,7 @@
       <author><first>Xinyi</first><last>Mou</last><affiliation>School of Data Science, Fudan University</affiliation></author>
       <author><first>Zhongyu</first><last>Wei</last><affiliation>School of Data Science, Fudan University</affiliation></author>
       <author><first>Qi</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>11996-12012</pages>
       <abstract>Modeling political actors is at the core of quantitative political science. Existing works have incorporated contextual information to better learn the representation of political actors for specific tasks through graph models. However, they are limited to the structure and objective of training settings and can not be generalized to all politicians and other tasks. In this paper, we propose a Unified Pre-training Architecture for Political Actor Modeling based on language (UPPAM). In UPPAM, we aggregate statements to represent political actors and learn the mapping from languages to representation, instead of learning the representation of particular persons. We further design structure-aware contrastive learning and behavior-driven contrastive learning tasks, to inject multidimensional information in the political context into the mapping. In this framework, we can profile political actors from different aspects and solve various downstream tasks. Experimental results demonstrate the effectiveness and capability of generalization of our method.</abstract>
       <url hash="d31ce545">2023.acl-long.670</url>
@@ -9682,8 +9682,8 @@
     <paper id="673">
       <title>Beyond Contrastive Learning: A Variational Generative Model for Multilingual Retrieval</title>
       <author><first>John</first><last>Wieting</last><affiliation>University of Illinois; TTI-Chicago; CMU; Google</affiliation></author>
-      <author><first>Jonathan</first><last>Clark</last><affiliation>Google</affiliation></author>
-      <author><first>William</first><last>Cohen</last><affiliation>Google AI</affiliation></author>
+      <author id="jonathan-h-clark"><first>Jonathan</first><last>Clark</last><affiliation>Google</affiliation></author>
+      <author id="william-cohen"><first>William</first><last>Cohen</last><affiliation>Google AI</affiliation></author>
       <author><first>Graham</first><last>Neubig</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Taylor</first><last>Berg-Kirkpatrick</last><affiliation>University of California San Diego</affiliation></author>
       <pages>12044-12066</pages>
@@ -9700,7 +9700,7 @@
       <author><first>Tianle</first><last>Wang</last><affiliation>Shanghai Jiao Tong University</affiliation></author>
       <author><first>Sachin</first><last>Kumar</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Kyunghyun</first><last>Cho</last><affiliation>New York University</affiliation></author>
-      <author><first>James</first><last>Glass</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
+      <author id="james-glass"><first>James</first><last>Glass</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <author><first>Yulia</first><last>Tsvetkov</last><affiliation>University of Washington</affiliation></author>
       <pages>12067-12097</pages>
       <abstract>In this work, we explore a useful but often neglected methodology for robustness analysis of text generation evaluation metrics: stress tests with synthetic data. Basically, we design and synthesize a wide range of potential errors and check whether they result in a commensurate drop in the metric scores. We examine a range of recently proposed evaluation metrics based on pretrained language models, for the tasks of open-ended generation, translation, and summarization. Our experiments reveal interesting insensitivities, biases, or even loopholes in existing metrics. For example, we find that BERTScore is confused by truncation errors in summarization, and MAUVE (built on top of GPT-2) is insensitive to errors at the beginning or middle of generations. Further, we investigate the reasons behind these blind spots and suggest practical workarounds for a more reliable evaluation of text generation. We have released our code and data at <url>https://github.com/cloudygoose/blindspot_nlg</url>.</abstract>
@@ -9798,7 +9798,7 @@
       <author><first>Hoyun</first><last>Song</last><affiliation>KAIST</affiliation></author>
       <author><first>Jisu</first><last>Shin</last><affiliation>KAIST</affiliation></author>
       <author><first>Huije</first><last>Lee</last><affiliation>Korea Advanced Institute of Science and Technology (KAIST)</affiliation></author>
-      <author><first>Jong</first><last>Park</last><affiliation>KAIST</affiliation></author>
+      <author id="jong-c-park"><first>Jong</first><last>Park</last><affiliation>KAIST</affiliation></author>
       <pages>12190-12206</pages>
       <abstract>Social media is one of the most highly sought resources for analyzing characteristics of the language by its users. In particular, many researchers utilized various linguistic features of mental health problems from social media. However, existing approaches to detecting mental disorders face critical challenges, such as the scarcity of high-quality data or the trade-off between addressing the complexity of models and presenting interpretable results grounded in expert domain knowledge. To address these challenges, we design a simple but flexible model that preserves domain-based interpretability. We propose a novel approach that captures the semantic meanings directly from the text and compares them to symptom-related descriptions. Experimental results demonstrate that our model outperforms relevant baselines on various mental disorder detection tasks. Our detailed analysis shows that the proposed model is effective at leveraging domain knowledge, transferable to other mental disorders, and providing interpretable detection results.</abstract>
       <url hash="7c808d70">2023.acl-long.681</url>
@@ -9810,8 +9810,8 @@
       <title>Downstream Datasets Make Surprisingly Good Pretraining Corpora</title>
       <author><first>Kundan</first><last>Krishna</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Saurabh</first><last>Garg</last><affiliation>Carnegie Mellon University</affiliation></author>
-      <author><first>Jeffrey</first><last>Bigham</last><affiliation>CMU/Apple</affiliation></author>
-      <author><first>Zachary</first><last>Lipton</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="jeffrey-p-bigham"><first>Jeffrey</first><last>Bigham</last><affiliation>CMU/Apple</affiliation></author>
+      <author id="zachary-c-lipton"><first>Zachary</first><last>Lipton</last><affiliation>Carnegie Mellon University</affiliation></author>
       <pages>12207-12222</pages>
       <abstract>For most natural language processing tasks, the dominant practice is to finetune large pretrained transformer models (e.g., BERT) using smaller downstream datasets. Despite the success of this approach, it remains unclear to what extent these gainsare attributable to the massive background corpora employed for pretraining versus to the pretraining objectives themselves. This paper introduces a large-scale study of self-pretraining, where the same (downstream) training data is used for both pretraining and finetuning.In experiments addressing both ELECTRA and RoBERTa models and 10 distinct downstream classification datasets, we observe that self-pretraining rivals standard pretraining on the BookWiki corpus (despite using around 10x–500x less data), outperforming the latter on 7 and 5 datasets, respectively. Surprisingly, these task-specific pretrained models often perform well on other tasks,including the GLUE benchmark. Besides classification tasks, self-pretraining also provides benefits on structured output prediction tasks such as span based question answering and commonsense inference, often providing more than 50% of the performance boosts provided by pretraining on the BookWiki corpus. Our results hint that in many scenarios, performance gains attributable to pretraining are driven primarily by the pretraining objective itself and are not always attributable to the use of external pretraining data in massive amounts. These findings are especially relevant in light of concerns about intellectual property and offensive content in web-scale pretraining data.</abstract>
       <url hash="e3904434">2023.acl-long.682</url>
@@ -9825,7 +9825,7 @@
       <author><first>Chenwei</first><last>Zhang</last><affiliation>Amazon</affiliation></author>
       <author><first>Xian</first><last>Li</last><affiliation>Amazon</affiliation></author>
       <author><first>Jingbo</first><last>Shang</last><affiliation>University of California, San Diego</affiliation></author>
-      <author><first>Jinho D.</first><last>Choi</last><affiliation>Emory University</affiliation></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last><affiliation>Emory University</affiliation></author>
       <pages>12223-12239</pages>
       <abstract>We present a new task setting for attribute mining on e-commerce products, serving as a practical solution to extract open-world attributes without extensive human intervention. Our supervision comes from a high-quality seed attribute set bootstrapped from existing resources, and we aim to expand the attribute vocabulary of existing seed types, and also to discover any new attribute types automatically. A new dataset is created to support our setting, and our approach Amacer is proposed specifically to tackle the limited supervision. Especially, given that no direct supervision is available for those unseen new attributes, our novel formulation exploits self-supervised heuristic and unsupervised latent attributes, which attains implicit semantic signals as additional supervision by leveraging product context. Experiments suggest that our approach surpasses various baselines by 12 F1, expanding attributes of existing types significantly by up to 12 times, and discovering values from 39% new types.</abstract>
       <url hash="82ec47fa">2023.acl-long.683</url>
@@ -9839,7 +9839,7 @@
       <author><first>Ping</first><last>Nie</last><affiliation>Peking University</affiliation></author>
       <author><first>Jie</first><last>Cai</last><affiliation>Peking University</affiliation></author>
       <author><first>Haifeng</first><last>Wang</last><affiliation>Baidu</affiliation></author>
-      <author><first>Zheng-Yu</first><last>Niu</last><affiliation>Baidu Inc.</affiliation></author>
+      <author id="zheng-yu-niu"><first>Zheng-Yu</first><last>Niu</last><affiliation>Baidu Inc.</affiliation></author>
       <author><first>Peng</first><last>Zhang</last><affiliation>Tsinghua University</affiliation></author>
       <author><first>Mrinmaya</first><last>Sachan</last><affiliation>ETH Zurich</affiliation></author>
       <author><first>Kaiping</first><last>Peng</last><affiliation>Tsinghua University</affiliation></author>
@@ -9881,9 +9881,9 @@
       <author><first>Ari</first><last>Holtzman</last><affiliation>University of Washington</affiliation></author>
       <author><first>Daniel</first><last>Fried</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Percy</first><last>Liang</last><affiliation>Stanford University</affiliation></author>
-      <author><first>Jason</first><last>Eisner</last><affiliation>Johns Hopkins University + Microsoft Corporation</affiliation></author>
-      <author><first>Tatsunori</first><last>Hashimoto</last><affiliation>Stanford</affiliation></author>
-      <author><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington; Meta</affiliation></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last><affiliation>Johns Hopkins University + Microsoft Corporation</affiliation></author>
+      <author id="tatsunori-b-hashimoto"><first>Tatsunori</first><last>Hashimoto</last><affiliation>Stanford</affiliation></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington; Meta</affiliation></author>
       <author><first>Mike</first><last>Lewis</last><affiliation>Facebook AI Research</affiliation></author>
       <pages>12286-12312</pages>
       <abstract>Given a language model (LM), maximum probability is a poor decoding objective for open-ended generation, because it produces short and repetitive text. On the other hand, sampling can often produce incoherent text that drifts from the original topics. We propose contrastive decoding (CD), a reliable decoding approach that optimizes a contrastive objective subject to a plausibility constraint. The contrastive objective returns the difference between the likelihood under a large LM (called the expert, e.g. OPT-13B) and a small LM (called the amateur, e.g. OPT-125M), and the constraint ensures that the outputs are plausible. CD is inspired by the fact that the failures of larger LMs (e.g., repetition, inco- herence) are even more prevalent in smaller LMs, and that this difference signals which texts should be preferred. CD requires zero additional training, and produces higher quality text than decoding from the larger LM alone. It also works across model scales (OPT-13B and GPT2-1.5B) and significantly outperforms four strong decoding algorithms (e.g., nucleus, top-k) in automatic and human evaluations across wikipedia, news and story domains.</abstract>
@@ -9938,7 +9938,7 @@
     </paper>
     <paper id="691">
       <title><fixed-case>W</fixed-case>iki<fixed-case>B</fixed-case>io: a Semantic Resource for the Intersectional Analysis of Biographical Events</title>
-      <author><first>Marco Antonio</first><last>Stranisci</last><affiliation>University of Turin</affiliation></author>
+      <author id="marco-antonio-stranisci"><first>Marco Antonio</first><last>Stranisci</last><affiliation>University of Turin</affiliation></author>
       <author><first>Rossana</first><last>Damiano</last><affiliation>Università di Torino</affiliation></author>
       <author><first>Enrico</first><last>Mensa</last><affiliation>University of Turin - Department of Computer Science</affiliation></author>
       <author><first>Viviana</first><last>Patti</last><affiliation>University of Turin, Dipartimento di Informatica</affiliation></author>
@@ -9971,7 +9971,7 @@
       <author><first>Rahul</first><last>Aralikatte</last><affiliation>Mila</affiliation></author>
       <author><first>Gowtham</first><last>Ramesh</last><affiliation>University of Wisconsin Madison</affiliation></author>
       <author><first>Shreya</first><last>Goyal</last><affiliation>American Express</affiliation></author>
-      <author><first>Mitesh M.</first><last>Khapra</last><affiliation>Indian Institute of Technology Madras</affiliation></author>
+      <author id="mitesh-m-khapra"><first>Mitesh M.</first><last>Khapra</last><affiliation>Indian Institute of Technology Madras</affiliation></author>
       <author><first>Anoop</first><last>Kunchukuttan</last><affiliation>Microsoft AI and Research</affiliation></author>
       <author><first>Pratyush</first><last>Kumar</last><affiliation>IIT Madras</affiliation></author>
       <pages>12402-12426</pages>
@@ -10038,9 +10038,9 @@
       <author><first>Simone</first><last>Tedeschi</last><affiliation>Babelscape</affiliation></author>
       <author><first>Johan</first><last>Bos</last><affiliation>University of Groningen</affiliation></author>
       <author><first>Thierry</first><last>Declerck</last><affiliation>DFKI GmbH</affiliation></author>
-      <author><first>Jan</first><last>Hajič</last><affiliation>Charles University</affiliation></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last><affiliation>Charles University</affiliation></author>
       <author><first>Daniel</first><last>Hershcovich</last><affiliation>University of Copenhagen</affiliation></author>
-      <author><first>Eduard</first><last>Hovy</last><affiliation>University of Melbourne</affiliation></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last><affiliation>University of Melbourne</affiliation></author>
       <author><first>Alexander</first><last>Koller</last><affiliation>Saarland University</affiliation></author>
       <author><first>Simon</first><last>Krek</last><affiliation>Jožef Stefan Institute</affiliation></author>
       <author><first>Steven</first><last>Schockaert</last><affiliation>Cardiff University</affiliation></author>
@@ -10075,7 +10075,7 @@
     <paper id="699">
       <title>Hints on the data for language modeling of synthetic languages with transformers</title>
       <author><first>Rodolfo</first><last>Zevallos</last><affiliation>Universitat Pompeu Fabra</affiliation></author>
-      <author><first>Nuria</first><last>Bel</last><affiliation>Universitat Pompeu Fabra</affiliation></author>
+      <author id="nuria-bel"><first>Nuria</first><last>Bel</last><affiliation>Universitat Pompeu Fabra</affiliation></author>
       <pages>12508-12522</pages>
       <abstract>Language Models (LM) are becoming more and more useful for providing representations upon which to train Natural Language Processing applications. However, there is now clear evidence that attention-based transformers require a critical amount of language data to produce good enough LMs. The question we have addressed in this paper is to what extent the critical amount of data varies for languages of different morphological typology, in particular those that have a rich inflectional morphology, and whether the tokenization method to preprocess the data can make a difference. These details can be important for low-resourced languages that need to plan the production of datasets. We evaluated intrinsically and extrinsically the differences of five different languages with different pretraining dataset sizes and three different tokenization methods for each. The results confirm that the size of the vocabulary due to morphological characteristics is directly correlated with both the LM perplexity and the performance of two typical downstream tasks such as NER identification and POS labeling. The experiments also provide new evidence that a canonical tokenizer can reduce perplexity by more than a half for a polysynthetic language like Quechua as well as raising F1 from 0.8 to more than 0.9 in both downstream tasks with a LM trained with only 6M tokens.</abstract>
       <url hash="c1b98605">2023.acl-long.699</url>
@@ -10127,7 +10127,7 @@
       <title>Large-scale Lifelong Learning of In-context Instructions and How to Tackle It</title>
       <author><first>Jisoo</first><last>Mok</last><affiliation>Seoul National University</affiliation></author>
       <author><first>Jaeyoung</first><last>Do</last><affiliation>Amazon Alexa AI</affiliation></author>
-      <author><first>Sungjin</first><last>Lee</last><affiliation>Amazon Alexa AI</affiliation></author>
+      <author id="sungjin-lee"><first>Sungjin</first><last>Lee</last><affiliation>Amazon Alexa AI</affiliation></author>
       <author><first>Tara</first><last>Taghavi</last><affiliation>Amazon</affiliation></author>
       <author><first>Seunghak</first><last>Yu</last><affiliation>Naver Search US</affiliation></author>
       <author><first>Sungroh</first><last>Yoon</last><affiliation>Seoul National University</affiliation></author>
@@ -10194,7 +10194,7 @@
       <title>Unsupervised Selective Rationalization with Noise Injection</title>
       <author><first>Adam</first><last>Storek</last><affiliation>Columbia University</affiliation></author>
       <author><first>Melanie</first><last>Subbiah</last><affiliation>Columbia University</affiliation></author>
-      <author><first>Kathleen</first><last>McKeown</last><affiliation>Columbia University and Amazon (Amazon Scholar)</affiliation></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last><affiliation>Columbia University and Amazon (Amazon Scholar)</affiliation></author>
       <pages>12647-12659</pages>
       <abstract>A major issue with using deep learning models in sensitive applications is that they provide no explanation for their output. To address this problem, unsupervised selective rationalization produces rationales alongside predictions by chaining two jointly-trained components, a rationale generator and a predictor. Although this architecture guarantees that the prediction relies solely on the rationale, it does not ensure that the rationale contains a plausible explanation for the prediction. We introduce a novel training technique that effectively limits generation of implausible rationales by injecting noise between the generator and the predictor. Furthermore, we propose a new benchmark for evaluating unsupervised selective rationalization models using movie reviews from existing datasets. We achieve sizeable improvements in rationale plausibility and task accuracy over the state-of-the-art across a variety of tasks, including our new benchmark, while maintaining or improving model faithfulness.</abstract>
       <url hash="ba11e270">2023.acl-long.707</url>
@@ -10300,7 +10300,7 @@
     </paper>
     <paper id="715">
       <title><fixed-case>MGR</fixed-case>: Multi-generator Based Rationalization</title>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last><affiliation>Huazhong University of Science and Technology</affiliation></author>
+      <author><first>Wei</first><last>Liu</last><affiliation>Huazhong University of Science and Technology</affiliation></author>
       <author><first>Haozhao</first><last>Wang</last><affiliation>Huazhong University of Science and Technology</affiliation></author>
       <author><first>Jun</first><last>Wang</last><affiliation>iWudao Tech</affiliation></author>
       <author><first>Ruixuan</first><last>Li</last><affiliation>Huazhong University of Science and Technology</affiliation></author>
@@ -10322,7 +10322,7 @@
       <author><first>Di</first><last>Lu</last><affiliation>Dataminr</affiliation></author>
       <author><first>Shihao</first><last>Ran</last><affiliation>Dataminr</affiliation></author>
       <author><first>Ke</first><last>Zhang</last><affiliation>Dataminr, inc</affiliation></author>
-      <author><first>Joel</first><last>Tetreault</last><affiliation>Dataminr</affiliation></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last><affiliation>Dataminr</affiliation></author>
       <author><first>Alejandro</first><last>Jaimes</last><affiliation>Dataminr</affiliation></author>
       <pages>12788-12812</pages>
       <abstract>The proliferation of automatic faithfulness metrics for summarization has produced a need for benchmarks to evaluate them. While existing benchmarks measure the correlation with human judgements of faithfulness on model-generated summaries, they are insufficient for diagnosing whether metrics are: 1) consistent, i.e., indicate lower faithfulness as errors are introduced into a summary, 2) effective on human-written texts, and 3) sensitive to different error types (as summaries can contain multiple errors). To address these needs, we present a benchmark of unfaithful minimal pairs (BUMP), a dataset of 889 human-written, minimally different summary pairs, where a single error is introduced to a summary from the CNN/DailyMail dataset to produce an unfaithful summary. We find BUMP complements existing benchmarks in a number of ways: 1) the summaries in BUMP are harder to discriminate and less probable under SOTA summarization models, 2) unlike non-pair-based datasets, BUMP can be used to measure the consistency of metrics, and reveals that the most discriminative metrics tend not to be the most consistent, and 3) unlike datasets containing generated summaries with multiple errors, BUMP enables the measurement of metrics’ performance on individual error types.</abstract>
@@ -10422,7 +10422,7 @@
       <author><first>Kuan-Hao</first><last>Huang</last><affiliation>University of California, Los Angeles</affiliation></author>
       <author><first>Shuning</first><last>Zhang</last><affiliation>Tsinghua University</affiliation></author>
       <author><first>Wenxin</first><last>Cheng</last><affiliation>University of California, Los Angeles</affiliation></author>
-      <author><first>Prem</first><last>Natarajan</last><affiliation>Amazon / Alexa</affiliation></author>
+      <author id="prem-natarajan"><first>Prem</first><last>Natarajan</last><affiliation>Amazon / Alexa</affiliation></author>
       <author><first>Kai-Wei</first><last>Chang</last><affiliation>UCLA</affiliation></author>
       <author><first>Nanyun</first><last>Peng</last><affiliation>University of California, Los Angeles</affiliation></author>
       <pages>12917-12932</pages>
@@ -10469,7 +10469,7 @@
       <author><first>Philipp</first><last>Wicke</last><affiliation>Institute for Information and Language Processing, LMU</affiliation></author>
       <author><first>Renhao</first><last>Pei</last><affiliation>Ludwig Maximilian University of Munich</affiliation></author>
       <author><first>Robert</first><last>Zangenfeind</last><affiliation>Center for Information and Language Processing, University of Munich</affiliation></author>
-      <author><first>Hinrich</first><last>Schütze</last><affiliation>Center for Information and Language Processing, University of Munich</affiliation></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last><affiliation>Center for Information and Language Processing, University of Munich</affiliation></author>
       <pages>12969-13000</pages>
       <abstract>Languages differ in how they divide up the world into concepts and words; e.g., in contrast to English, Swahili has a single concept for ‘belly’ and ‘womb’. We investigate these differences in conceptualization across 1,335 languages by aligning concepts in a parallel corpus. To this end, we propose Conceptualizer, a method that creates a bipartite directed alignment graph between source language concepts and sets of target language strings. In a detailed linguistic analysis across all languages for one concept (‘bird’) and an evaluation on gold standard data for 32 Swadesh concepts, we show that Conceptualizer has good alignment accuracy. We demonstrate the potential of research on conceptualization in NLP with two experiments. (1) We define crosslingual stability of a concept as the degree to which it has 1-1 correspondences across languages, and show that concreteness predicts stability. (2) We represent each language by its conceptualization pattern for 83 concepts, and define a similarity measure on these representations. The resulting measure for the conceptual similarity between two languages is complementary to standard genealogical, typological, and surface similarity measures. For four out of six language families, we can assign languages to their correct family based on conceptual similarity with accuracies between 54% and 87%</abstract>
       <url hash="dfa43822">2023.acl-long.726</url>
@@ -10529,7 +10529,7 @@
       <title>Extrinsic Evaluation of Machine Translation Metrics</title>
       <author><first>Nikita</first><last>Moghe</last><affiliation>University of Edinburgh</affiliation></author>
       <author><first>Tom</first><last>Sherborne</last><affiliation>University of Edinburgh</affiliation></author>
-      <author><first>Mark</first><last>Steedman</last><affiliation>University of Edinburgh</affiliation></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last><affiliation>University of Edinburgh</affiliation></author>
       <author><first>Alexandra</first><last>Birch</last><affiliation>University of Edinburgh</affiliation></author>
       <pages>13060-13078</pages>
       <abstract>Automatic machine translation (MT) metrics are widely used to distinguish the quality of machine translation systems across relatively large test sets (system-level evaluation). However, it is unclear if automatic metrics are reliable at distinguishing good translations from bad translations at the sentence level (segment-level evaluation). In this paper, we investigate how useful MT metrics are at detecting the segment-level quality by correlating metrics with how useful the translations are for downstream task. We evaluate the segment-level performance of the most widely used MT metrics (chrF, COMET, BERTScore, etc.) on three downstream cross-lingual tasks (dialogue state tracking, question answering, and semantic parsing). For each task, we only have access to a monolingual task-specific model and a translation model. We calculate the correlation between the metric’s ability to predict a good/bad translation with the success/failure on the final task for the machine translated test sentences. Our experiments demonstrate that all metrics exhibit negligible correlation with the extrinsic evaluation of the downstream outcomes. We also find that the scores provided by neural metrics are not interpretable, in large part due to having undefined ranges. We synthesise our analysis into recommendations for future MT metrics to produce labels rather than scores for more informative interaction between machine translation and multilingual language understanding.</abstract>
@@ -10582,9 +10582,9 @@
       <author><first>Mohamed</first><last>Abdalla</last><affiliation>University of Toronto</affiliation></author>
       <author><first>Jan Philip</first><last>Wahle</last><affiliation>University of Göttingen</affiliation></author>
       <author><first>Terry</first><last>Ruas</last><affiliation>University of Göttingen</affiliation></author>
-      <author><first>Aurélie</first><last>Névéol</last><affiliation>Université Paris Saclay, CNRS, LISN</affiliation></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last><affiliation>Université Paris Saclay, CNRS, LISN</affiliation></author>
       <author><first>Fanny</first><last>Ducel</last><affiliation>Sorbonne Universite, LORIA</affiliation></author>
-      <author><first>Saif</first><last>Mohammad</last><affiliation>NRC</affiliation></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last><affiliation>NRC</affiliation></author>
       <author><first>Karen</first><last>Fort</last><affiliation>Sorbonne Universite and LORIA</affiliation></author>
       <pages>13141-13160</pages>
       <abstract>Recent advances in deep learning methods for natural language processing (NLP) have created new business opportunities and made NLP research critical for industry development. As one of the big players in the field of NLP, together with governments and universities, it is important to track the influence of industry on research. In this study, we seek to quantify and characterize industry presence in the NLP community over time. Using a corpus with comprehensive metadata of 78,187 NLP publications and 701 resumes of NLP publication authors, we explore the industry presence in the field since the early 90s. We find that industry presence among NLP authors has been steady before a steep increase over the past five years (180% growth from 2017 to 2022). A few companies account for most of the publications and provide funding to academic researchers through grants and internships. Our study shows that the presence and impact of the industry on natural language processing research are significant and fast-growing. This work calls for increased transparency of industry influence in the field.</abstract>
@@ -10611,7 +10611,7 @@
     </paper>
     <paper id="736">
       <title>Do Question Answering Modeling Improvements Hold Across Benchmarks?</title>
-      <author><first>Nelson F.</first><last>Liu</last><affiliation>Stanford University</affiliation></author>
+      <author id="nelson-f-liu"><first>Nelson F.</first><last>Liu</last><affiliation>Stanford University</affiliation></author>
       <author><first>Tony</first><last>Lee</last><affiliation>Stanford University</affiliation></author>
       <author><first>Robin</first><last>Jia</last><affiliation>University of Southern California</affiliation></author>
       <author><first>Percy</first><last>Liang</last><affiliation>Stanford University</affiliation></author>
@@ -10637,7 +10637,7 @@
       <author><first>Qinhong</first><last>Zhou</last><affiliation>Tsinghua University</affiliation></author>
       <author><first>Zonghan</first><last>Yang</last><affiliation>Tsinghua University</affiliation></author>
       <author><first>Peng</first><last>Li</last><affiliation>Institute for AI Industry Research (AIR), Tsinghua University, China</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>Tsinghua University</affiliation></author>
+      <author><first>Yang</first><last>Liu</last><affiliation>Tsinghua University</affiliation></author>
       <pages>13234-13248</pages>
       <abstract>Conventional knowledge distillation (KD) methods require access to the internal information of teachers, e.g., logits. However, such information may not always be accessible for large pre-trained language models (PLMs). In this work, we focus on decision-based KD for PLMs, where only teacher decisions (i.e., top-1 labels) are accessible. Considering the information gap between logits and decisions, we propose a novel method to estimate logits from the decision distributions. Specifically, decision distributions can be both derived as a function of logits theoretically and estimated with test-time data augmentation empirically. By combining the theoretical and empirical estimations of the decision distributions together, the estimation of logits can be successfully reduced to a simple root-finding problem. Extensive experiments show that our method significantly outperforms strong baselines on both natural language understanding and machine reading comprehension datasets.</abstract>
       <url hash="b1c395bf">2023.acl-long.738</url>
@@ -10665,7 +10665,7 @@
       <author><first>Tong</first><last>Chen</last><affiliation>The University of Queensland</affiliation></author>
       <author><first>Wei</first><last>Yuan</last><affiliation>The University of Queensland</affiliation></author>
       <author><first>Xingshan</first><last>Zeng</last><affiliation>Huawei Noah’s Ark Lab</affiliation></author>
-      <author><first>Kam-Fai</first><last>Wong</last><affiliation>Department of Systems Engineering and Engineering Management, The Chinese University of Hong Kong, Hong Kong</affiliation></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last><affiliation>Department of Systems Engineering and Engineering Management, The Chinese University of Hong Kong, Hong Kong</affiliation></author>
       <author><first>Hongzhi</first><last>Yin</last><affiliation>The University of Queensland</affiliation></author>
       <pages>13264-13276</pages>
       <abstract>Recent legislation of the “right to be forgotten” has led to the interest in machine unlearning, where the learned models are endowed with the function to forget information about specific training instances as if they have never existed in the training set. Previous work mainly focuses on computer vision scenarios and largely ignores the essentials of unlearning in NLP field, where text data contains more explicit and sensitive personal information than images. In this paper, we propose a general unlearning framework called KGA to induce forgetfulness. Different from previous work that tries to recover gradients or forces models to perform close to one specific distribution, KGA maintains distribution differences (i.e., knowledge gap). This relaxes the distribution assumption. Furthermore, we first apply the unlearning method to various NLP tasks (i.e., classification, translation, response generation) and propose several unlearning evaluation metrics with pertinence. Experiments on large-scale datasets show that KGA yields comprehensive improvements over baselines, where extensive analyses further validate the effectiveness of KGA and provide insight into unlearning for NLP tasks.</abstract>
@@ -10739,7 +10739,7 @@
     <paper id="745">
       <title>Attention as a Guide for Simultaneous Speech Translation</title>
       <author><first>Sara</first><last>Papi</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
-      <author><first>Matteo</first><last>Negri</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
       <author><first>Marco</first><last>Turchi</last><affiliation>Zoom Video Communications</affiliation></author>
       <pages>13340-13356</pages>
       <abstract>In simultaneous speech translation (SimulST), effective policies that determine when to write partial translations are crucial to reach high output quality with low latency. Towards this objective, we propose EDAtt (Encoder-Decoder Attention), an adaptive policy that exploits the attention patterns between audio source and target textual translation to guide an offline-trained ST model during simultaneous inference. EDAtt exploits the attention scores modeling the audio-translation relation to decide whether to emit a partial hypothesis or wait for more audio input. This is done under the assumption that, if attention is focused towards the most recently received speech segments, the information they provide can be insufficient to generate the hypothesis (indicating that the system has to wait for additional audio input). Results on en-&gt;de, es show that EDAtt yields better results compared to the SimulST state of the art, with gains respectively up to 7 and 4 BLEU points for the two languages, and with a reduction in computational-aware latency up to 1.4s and 0.7s compared to existing SimulST policies applied to offline-trained models.</abstract>
@@ -10832,7 +10832,7 @@
       <author><first>Shaolin</first><last>Zhu</last><affiliation>Tianjin university</affiliation></author>
       <author><first>Shangjie</first><last>Li</last><affiliation>Tianjin University</affiliation></author>
       <author><first>Yikun</first><last>Lei</last><affiliation>Tianjin university</affiliation></author>
-      <author><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
       <pages>13433-13447</pages>
       <abstract>Image translation is a task that translates an image containing text in the source language to the target language. One major challenge with image translation is the modality gap between visual text inputs and textual inputs/outputs of machine translation (MT). In this paper, we propose PEIT, an end-to-end image translation framework that bridges the modality gap with pre-trained models. It is composed of four essential components: a visual encoder, a shared encoder-decoder backbone network, a vision-text representation aligner equipped with the shared encoder and a cross-modal regularizer stacked over the shared decoder. Both the aligner and regularizer aim at reducing the modality gap. To train PEIT, we employ a two-stage pre-training strategy with an auxiliary MT task: (1) pre-training the MT model on the MT training data to initialize the shared encoder-decoder backbone network; and (2) pre-training PEIT with the aligner and regularizer on a synthesized dataset with rendered images containing text from the MT training data. In order to facilitate the evaluation of PEIT and promote research on image translation, we create a large-scale image translation corpus ECOIT containing 480K image-translation pairs via crowd-sourcing and manual post-editing from real-world images in the e-commerce domain. Experiments on the curated ECOIT benchmark dataset demonstrate that PEIT substantially outperforms both cascaded image translation systems (OCR+MT) and previous strong end-to-end image translation model, with fewer parameters and faster decoding speed.</abstract>
       <url hash="861b613e">2023.acl-long.751</url>
@@ -10873,7 +10873,7 @@
       <author><first>Yeganeh</first><last>Kordi</last><affiliation>Tehran Polytechnic</affiliation></author>
       <author><first>Swaroop</first><last>Mishra</last><affiliation>Arizona State University</affiliation></author>
       <author><first>Alisa</first><last>Liu</last><affiliation>University of Washington</affiliation></author>
-      <author><first>Noah A.</first><last>Smith</last><affiliation>University of Washington</affiliation></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last><affiliation>University of Washington</affiliation></author>
       <author><first>Daniel</first><last>Khashabi</last><affiliation>Johns Hopkins University</affiliation></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last><affiliation>University of Washington</affiliation></author>
       <pages>13484-13508</pages>
@@ -10898,7 +10898,7 @@
       <title>Dissecting Transformer Length Extrapolation via the Lens of Receptive Field Analysis</title>
       <author><first>Ta-Chung</first><last>Chi</last><affiliation>carnegie mellon university</affiliation></author>
       <author><first>Ting-Han</first><last>Fan</last><affiliation>Princeton University</affiliation></author>
-      <author><first>Alexander</first><last>Rudnicky</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="alexander-rudnicky"><first>Alexander</first><last>Rudnicky</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Peter</first><last>Ramadge</last><affiliation>Princeton University</affiliation></author>
       <pages>13522-13537</pages>
       <abstract>Length extrapolation permits training a transformer language model on short sequences that preserves perplexities when tested on substantially longer sequences.A relative positional embedding design, ALiBi, has had the widest usage to date. We dissect ALiBi via the lens of receptive field analysis empowered by a novel cumulative normalized gradient tool. The concept of receptive field further allows us to modify the vanilla Sinusoidal positional embedding to create <b>Sandwich</b>, the first parameter-free relative positional embedding design that truly length information uses longer than the training sequence. Sandwich shares with KERPLE and T5 the same logarithmic decaying temporal bias pattern with learnable relative positional embeddings; these elucidate future extrapolatable positional embedding design.</abstract>
@@ -11055,10 +11055,10 @@
       <author><first>Mengzhou</first><last>Xia</last><affiliation>Princeton University</affiliation></author>
       <author><first>Mikel</first><last>Artetxe</last><affiliation>Reka AI</affiliation></author>
       <author><first>Chunting</first><last>Zhou</last><affiliation>Meta AI</affiliation></author>
-      <author><first>Xi Victoria</first><last>Lin</last><affiliation>Meta AI</affiliation></author>
+      <author id="xi-victoria-lin"><first>Xi Victoria</first><last>Lin</last><affiliation>Meta AI</affiliation></author>
       <author><first>Ramakanth</first><last>Pasunuru</last><affiliation>Meta</affiliation></author>
       <author><first>Danqi</first><last>Chen</last><affiliation>Princeton University</affiliation></author>
-      <author><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington; Meta</affiliation></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington; Meta</affiliation></author>
       <author><first>Veselin</first><last>Stoyanov</last><affiliation>Facebook</affiliation></author>
       <pages>13711-13738</pages>
       <abstract>Scaling up language models has led to unprecedented performance gains, but little is understood about how the training dynamics change as models get larger. How do language models of different sizes learn during pre-training? Why do larger language models demonstrate more desirable behaviors? In this paper, we analyze the intermediate training checkpoints of differently sized OPT models (Zhang et al., 2022)—from 125M to 175B parameters—on next-token prediction, sequence-level generation and downstream tasks. We find that 1) at a given perplexity and independent of model sizes, a similar subset of training tokens see the most significant reduction in loss, with the rest stagnating or showing double-descent behavior (Nakkiran et al., 2020); 2) early in training, all models learn to reduce the perplexity of grammatical sequences that contain hallucinations, with small models halting at this suboptimal distribution and larger ones eventually learning to assign these sequences lower probabilities; and 3) perplexity is a strong predictor of in-context learning performance on 74 multiple-choice tasks from BIG-Bench, and this holds independent of the model size. Together, these results show that perplexity is more predictive of model behaviors than model size or training computation.</abstract>
@@ -11094,7 +11094,7 @@
       <author><first>Nuno M.</first><last>Guerreiro</last><affiliation>Instituto de Telecomunicacoes, University of Lisbon</affiliation></author>
       <author><first>Pierre</first><last>Colombo</last><affiliation>L2S CentraleSupelec</affiliation></author>
       <author><first>Pablo</first><last>Piantanida</last><affiliation>CNRS, CentraleSupelec</affiliation></author>
-      <author><first>André</first><last>Martins</last><affiliation>Unbabel, Instituto de Telecomunicacoes</affiliation></author>
+      <author id="andre-f-t-martins"><first>André</first><last>Martins</last><affiliation>Unbabel, Instituto de Telecomunicacoes</affiliation></author>
       <pages>13766-13784</pages>
       <abstract>Neural machine translation (NMT) has become the de-facto standard in real-world machine translation applications. However, NMT models can unpredictably produce severely pathological translations, known as hallucinations, that seriously undermine user trust. It becomes thus crucial to implement effective preventive strategies to guarantee their proper functioning. In this paper, we address the problem of hallucination detection in NMT by following a simple intuition: as hallucinations are detached from the source content, they exhibit encoder-decoder attention patterns that are statistically different from those of good quality translations. We frame this problem with an optimal transport formulation and propose a fully unsupervised, plug-in detector that can be used with any attention-based NMT model. Experimental results show that our detector not only outperforms all previous model-based detectors, but is also competitive with detectors that employ external models trained on millions of samples for related tasks such as quality estimation and cross-lingual sentence similarity.</abstract>
       <url hash="12c865b4">2023.acl-long.770</url>
@@ -11125,7 +11125,7 @@
       <author><first>Jiaxin</first><last>Ge</last><affiliation>Peking University</affiliation></author>
       <author><first>Hongyin</first><last>Luo</last><affiliation>MIT</affiliation></author>
       <author><first>Yoon</first><last>Kim</last><affiliation>MIT</affiliation></author>
-      <author><first>James</first><last>Glass</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
+      <author id="james-glass"><first>James</first><last>Glass</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <pages>13803-13817</pages>
       <abstract>Entailment has been recognized as an important metric for evaluating natural language understanding (NLU) models, and recent studies have found that entailment pretraining benefits weakly supervised fine-tuning. In this work, we design a prompting strategy that formulates a number of different NLU tasks as contextual entailment. This approach improves the zero-shot adaptation of pretrained entailment models. Secondly, we notice that self-training entailment-based models with unlabeled data can significantly improve the adaptation performance on downstream tasks. To achieve more stable improvement, we propose the Simple Pseudo-Label Editing (SimPLE) algorithm for better pseudo-labeling quality in self-training. We also found that both pretrained entailment-based models and the self-trained models are robust against adversarial evaluation data. Experiments on binary and multi-class classification tasks show that SimPLE leads to more robust self-training results, indicating that the self-trained entailment models are more efficient and trustworthy than large language models on language understanding tasks.</abstract>
       <url hash="3c687eee">2023.acl-long.772</url>
@@ -11164,7 +11164,7 @@
       <author><first>Soda Marem</first><last>Lo</last><affiliation>University of Turin</affiliation></author>
       <author><first>Alessandra Teresa</first><last>Cignarella</last><affiliation>Computer Science Department - University of Turin</affiliation></author>
       <author><first>Raffaella</first><last>Panizzon</last><affiliation>University of Padua</affiliation></author>
-      <author><first>Cristina</first><last>Marco</last><affiliation>Alexa AI, Amazon</affiliation></author>
+      <author id="cristina-sanchez-marco"><first>Cristina</first><last>Marco</last><affiliation>Alexa AI, Amazon</affiliation></author>
       <author><first>Bianca</first><last>Scarlini</last><affiliation>Amazon</affiliation></author>
       <author><first>Viviana</first><last>Patti</last><affiliation>University of Turin, Dipartimento di Informatica</affiliation></author>
       <author><first>Cristina</first><last>Bosco</last><affiliation>Dipartimento di Informatica - Università di Torino</affiliation></author>
@@ -11333,8 +11333,8 @@
       <title>Do You Hear The People Sing? Key Point Analysis via Iterative Clustering and Abstractive Summarisation</title>
       <author><first>Hao</first><last>Li</last><affiliation>University of Manchester</affiliation></author>
       <author><first>Viktor</first><last>Schlegel</last><affiliation>ASUS AICS</affiliation></author>
-      <author><first>Riza</first><last>Batista-Navarro</last><affiliation>Department of Computer Science, The University of Manchester</affiliation></author>
-      <author><first>Goran</first><last>Nenadic</last><affiliation>University of Manchester</affiliation></author>
+      <author id="riza-theresa-batista-navarro"><first>Riza</first><last>Batista-Navarro</last><affiliation>Department of Computer Science, The University of Manchester</affiliation></author>
+      <author id="goran-nenadic"><first>Goran</first><last>Nenadic</last><affiliation>University of Manchester</affiliation></author>
       <pages>14064-14080</pages>
       <abstract>Argument summarisation is a promising but currently under-explored field. Recent work has aimed to provide textual summaries in the form of concise and salient short texts, i.e., key points (KPs), in a task known as Key Point Analysis (KPA). One of the main challenges in KPA is finding high-quality key point candidates from dozens of arguments even in a small corpus. Furthermore, evaluating key points is crucial in ensuring that the automatically generated summaries are useful. Although automatic methods for evaluating summarisation have considerably advanced over the years, they mainly focus on sentence-level comparison, making it difficult to measure the quality of a summary (a set of KPs) as a whole. Aggravating this problem is the fact that human evaluation is costly and unreproducible. To address the above issues, we propose a two-step abstractive summarisation framework based on neural topic modelling with an iterative clustering procedure, to generate key points which are aligned with how humans identify key points. Our experiments show that our framework advances the state of the art in KPA, with performance improvement of up to 14 (absolute) percentage points, in terms of both ROUGE and our own proposed evaluation metrics. Furthermore, we evaluate the generated summaries using a novel set-based evaluation toolkit. Our quantitative analysis demonstrates the effectiveness of our proposed evaluation metrics in assessing the quality of generated KPs. Human evaluation further demonstrates the advantages of our approach and validates that our proposed evaluation metric is more consistent with human judgment than ROUGE scores.</abstract>
       <url hash="cc72b149">2023.acl-long.786</url>
@@ -11420,7 +11420,7 @@
       <title><fixed-case>LLM</fixed-case>-Blender: Ensembling Large Language Models with Pairwise Ranking and Generative Fusion</title>
       <author><first>Dongfu</first><last>Jiang</last><affiliation>Zhejiang University</affiliation></author>
       <author><first>Xiang</first><last>Ren</last><affiliation>University of Southern California</affiliation></author>
-      <author><first>Bill Yuchen</first><last>Lin</last><affiliation>Allen Institute for AI</affiliation></author>
+      <author id="bill-yuchen-lin"><first>Bill Yuchen</first><last>Lin</last><affiliation>Allen Institute for AI</affiliation></author>
       <pages>14165-14178</pages>
       <abstract>We present LLM-Blender, an ensembling framework designed to attain consistently superior performance by leveraging the diverse strengths of multiple open-source large language models (LLMs). Our framework consists of two modules: PairRanker and GenFuser, addressing the observation that optimal LLMs for different examples can significantly vary. PairRanker employs a specialized pairwise comparison method to distinguish subtle differences between candidate outputs. It jointly encodes the input text and a pair of candidates, using cross-attention encoders to determine the superior one. Our results demonstrate that PairRanker exhibits the highest correlation with ChatGPT-based ranking. Then, GenFuser aims to merge the top-ranked candidates, generating an improved output by capitalizing on their strengths and mitigating their weaknesses. To facilitate large-scale evaluation, we introduce a benchmark dataset, MixInstruct, which is a mixture of multiple instruction datasets featuring oracle pairwise comparisons. Our LLM-Blender significantly outperform individual LLMs and baseline methods across various metrics, establishing a substantial performance gap.</abstract>
       <url hash="2b7fe068">2023.acl-long.792</url>
@@ -11450,7 +11450,7 @@
       <author><first>Shaogang</first><last>Gong</last><affiliation>Queen Mary University of London</affiliation></author>
       <author><first>Hailin</first><last>Jin</last><affiliation>Adobe Research</affiliation></author>
       <author><first>Yuxin</first><last>Peng</last><affiliation>Peking University</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>Peking University</affiliation></author>
+      <author><first>Yang</first><last>Liu</last><affiliation>Peking University</affiliation></author>
       <pages>14197-14209</pages>
       <abstract>Video sentence localization aims to locate moments in an unstructured video according to a given natural language query. A main challenge is the expensive annotation costs and the annotation bias. In this work, we study video sentence localization in a zero-shot setting, which learns with only video data without any annotation. Existing zero-shot pipelines usually generate event proposals and then generate a pseudo query for each event proposal. However, their event proposals are obtained via visual feature clustering, which is query-independent and inaccurate; and the pseudo-queries are short or less interpretable. Moreover, existing approaches ignores the risk of pseudo-label noise when leveraging them in training. To address the above problems, we propose a Structure-based Pseudo Label generation (SPL), which first generate free-form interpretable pseudo queries before constructing query-dependent event proposals by modeling the event temporal structure. To mitigate the effect of pseudo-label noise, we propose a noise-resistant iterative method that repeatedly re-weight the training sample based on noise estimation to train a grounding model and correct pseudo labels. Experiments on the ActivityNet Captions and Charades-STA datasets demonstrate the advantages of our approach. Code can be found at <url>https://github.com/minghangz/SPL</url>.</abstract>
       <url hash="adc6ddea">2023.acl-long.794</url>
@@ -11465,7 +11465,7 @@
       <author><first>Vignesh</first><last>Nagarajan</last><affiliation>Indian Institute of Technology Madras</affiliation></author>
       <author><first>Anoop</first><last>Kunchukuttan</last><affiliation>Microsoft AI and Research</affiliation></author>
       <author><first>Pratyush</first><last>Kumar</last><affiliation>IIT Madras</affiliation></author>
-      <author><first>Mitesh M.</first><last>Khapra</last><affiliation>Indian Institute of Technology Madras</affiliation></author>
+      <author id="mitesh-m-khapra"><first>Mitesh M.</first><last>Khapra</last><affiliation>Indian Institute of Technology Madras</affiliation></author>
       <author><first>Raj</first><last>Dabre</last><affiliation>NICT</affiliation></author>
       <pages>14210-14228</pages>
       <abstract>The rapid growth of machine translation (MT) systems necessitates meta-evaluations of evaluation metrics to enable selection of those that best reflect MT quality. Unfortunately, most meta-evaluation studies focus on European languages, the observations for which may not always apply to other languages. Indian languages, having over a billion speakers, are linguistically different from them, and to date, there are no such systematic studies focused solely on English to Indian language MT. This paper fills this gap through a Multidimensional Quality Metric (MQM) dataset consisting of 7000 fine-grained annotations, spanning 5 Indian languages and 7 MT systems. We evaluate 16 metrics and show that, pre-trained metrics like COMET have the highest correlations with annotator scores as opposed to n-gram metrics like BLEU. We further leverage our MQM annotations to develop an Indic-COMET metric and show that it outperforms COMET counterparts in both human scores correlations and robustness scores in Indian languages. Additionally, we show that the Indic-COMET can outperform COMET on some unseen Indian languages. We hope that our dataset and analysis will facilitate further research in Indic MT evaluation.</abstract>
@@ -11523,7 +11523,7 @@
       <title>Python Code Generation by Asking Clarification Questions</title>
       <author><first>Haau-Sing (Xiaocheng)</first><last>Li</last><affiliation>UKP Lab, Technical University of Darmstadt</affiliation></author>
       <author><first>Mohsen</first><last>Mesgar</last><affiliation>UKP Lab, Technical University of Darmstadt</affiliation></author>
-      <author><first>André</first><last>Martins</last><affiliation>Unbabel, Instituto de Telecomunicacoes</affiliation></author>
+      <author id="andre-f-t-martins"><first>André</first><last>Martins</last><affiliation>Unbabel, Instituto de Telecomunicacoes</affiliation></author>
       <author><first>Iryna</first><last>Gurevych</last><affiliation>UKP Lab, Technische Universität Darmstadt</affiliation></author>
       <pages>14287-14306</pages>
       <abstract>Code generation from text requires understanding the user’s intent from a natural languagedescription and generating an executable code snippet that satisfies this intent. While recent pretrained language models demonstrate remarkable performance for this task, these models fail when the given natural language description is under-specified. In this work, we introduce a novel and more realistic setup for this task. We hypothesize that the under-specification of a natural language description can be resolved by asking clarification questions. Therefore, we collect and introduce a new dataset named CodeClarQA containing pairs of natural language descriptions and code with created synthetic clarification questions and answers. The empirical results of our evaluation of pretrained language model performance on code generation show that clarifications result in more precisely generated code, as shown by the substantial improvement of model performance in all evaluation metrics. Alongside this, our task and dataset introduce new challenges to the community, including when and what clarification questions should be asked. Our code and dataset are available on GitHub.</abstract>
@@ -11652,7 +11652,7 @@
       <author><first>Dongkuan</first><last>Xu</last><affiliation>North Carolina State University</affiliation></author>
       <author><first>Qingqing</first><last>Cao</last><affiliation>University of Washington</affiliation></author>
       <author><first>Xiaojun</first><last>Chen</last><affiliation>Shenzhen University</affiliation></author>
-      <author><first>Trevor</first><last>Cohn</last><affiliation>University of Melbourne</affiliation></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last><affiliation>University of Melbourne</affiliation></author>
       <author><first>Meng</first><last>Fang</last><affiliation>University of Liverpool</affiliation></author>
       <pages>14447-14465</pages>
       <abstract>Open domain question answering (ODQA) is a longstanding task aimed at answering factual questions from a large knowledge corpus without any explicit evidence in natural language processing (NLP). Recent works have predominantly focused on improving the answering accuracy and have achieved promising progress. However, higher accuracy often requires more memory consumption and inference latency, which might not necessarily be efficient enough for direct deployment in the real world. Thus, a trade-off between accuracy, memory consumption and processing speed is pursued. In this paper, we will survey recent advancements in the efficiency of ODQA models and conclude core techniques for achieving efficiency. Additionally, we will provide a quantitative analysis of memory cost, query speed, accuracy, and overall performance comparison. Our goal is to keep scholars informed of the latest advancements and open challenges in ODQA efficiency research and contribute to the further development of ODQA efficiency.</abstract>
@@ -11721,7 +11721,7 @@
       <title>Jointprop: Joint Semi-supervised Learning for Entity and Relation Extraction with Heterogeneous Graph-based Propagation</title>
       <author><first>Yandan</first><last>Zheng</last><affiliation>Nanyang University of Technology</affiliation></author>
       <author><first>Anran</first><last>Hao</last><affiliation>Nanyang Technological University</affiliation></author>
-      <author><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University, Singapore</affiliation></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University, Singapore</affiliation></author>
       <pages>14541-14555</pages>
       <abstract>Semi-supervised learning has been an important approach to address challenges in extracting entities and relations from limited data. However, current semi-supervised works handle the two tasks (i.e., Named Entity Recognition and Relation Extraction) separately and ignore the cross-correlation of entity and relation instances as well as the existence of similar instances across unlabeled data. To alleviate the issues, we propose Jointprop, a Heterogeneous Graph-based Propagation framework for joint semi-supervised entity and relation extraction, which captures the global structure information between individual tasks and exploits interactions within unlabeled data. Specifically, we construct a unified span-based heterogeneous graph from entity and relation candidates and propagate class labels based on confidence scores. We then employ a propagation learning scheme to leverage the affinities between labelled and unlabeled samples. Experiments on benchmark datasets show that our framework outperforms the state-of-the-art semi-supervised approaches on NER and RE tasks. We show that the joint semi-supervised learning of the two tasks benefits from their codependency and validates the importance of utilizing the shared information between unlabeled data.</abstract>
       <url hash="9d27844e">2023.acl-long.813</url>
@@ -11749,8 +11749,8 @@
     <paper id="815">
       <title>Faking Fake News for Real Fake News Detection: Propaganda-Loaded Training Data Generation</title>
       <author><first>Kung-Hsiang</first><last>Huang</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
-      <author><first>Kathleen</first><last>McKeown</last><affiliation>Columbia University and Amazon (Amazon Scholar)</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last><affiliation>Columbia University and Amazon (Amazon Scholar)</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <author><first>Yejin</first><last>Choi</last><affiliation>University of Washington</affiliation></author>
       <author><first>Heng</first><last>Ji</last><affiliation>University of Illinois at Urbana-Champaign and Amazon (Amazon Scholar)</affiliation></author>
       <pages>14571-14589</pages>
@@ -11841,7 +11841,7 @@
       <author><first>Bingsheng</first><last>Yao</last><affiliation>Rensselaer Polytechnic Institute</affiliation></author>
       <author><first>Prithviraj</first><last>Sen</last><affiliation>Amazon</affiliation></author>
       <author><first>Lucian</first><last>Popa</last><affiliation>IBM Research - Almaden</affiliation></author>
-      <author><first>James</first><last>Hendler</last><affiliation>Rensselaer Polytechnic Institute</affiliation></author>
+      <author id="james-hendler"><first>James</first><last>Hendler</last><affiliation>Rensselaer Polytechnic Institute</affiliation></author>
       <author><first>Dakuo</first><last>Wang</last><affiliation>Northeastern University</affiliation></author>
       <pages>14698-14713</pages>
       <abstract>Human-annotated labels and explanations are critical for training explainable NLP models. However, unlike human-annotated labels whose quality is easier to calibrate (e.g., with a majority vote), human-crafted free-form explanations can be quite subjective. Before blindly using them as ground truth to train ML models, a vital question needs to be asked: How do we evaluate a human-annotated explanation’s quality? In this paper, we build on the view that the quality of a human-annotated explanation can be measured based on its helpfulness (or impairment) to the ML models’ performance for the desired NLP tasks for which the annotations were collected. In comparison to the commonly used Simulatability score, we define a new metric that can take into consideration the helpfulness of an explanation for model performance at both fine-tuning and inference. With the help of a unified dataset format, we evaluated the proposed metric on five datasets (e.g., e-SNLI) against two model architectures (T5 and BART), and the results show that our proposed metric can objectively evaluate the quality of human-annotated explanations, while Simulatability falls short.</abstract>
@@ -11858,7 +11858,7 @@
       <author><first>Youngin</first><last>Lee</last><affiliation>KAIST</affiliation></author>
       <author><first>So-Yeon</first><last>Ahn</last><affiliation>Korea Advanced Institute of Science and Technology (KAIST)</affiliation></author>
       <author><first>Dongyeop</first><last>Kang</last><affiliation>University of Minnesota</affiliation></author>
-      <author><first>Alice</first><last>Oh</last><affiliation>KAIST</affiliation></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last><affiliation>KAIST</affiliation></author>
       <pages>14714-14733</pages>
       <abstract>Researchers have traditionally recruited native speakers to provide annotations for the widely used benchmark datasets. But there are languages for which recruiting native speakers is difficult, and it would help to get learners of those languages to annotate the data. In this paper, we investigate whether language learners can contribute annotations to the benchmark datasets. In a carefully controlled annotation experiment, we recruit 36 language learners, provide two types of additional resources (dictionaries and machine-translated sentences), and perform mini-tests to measure their language proficiency. We target three languages, English, Korean, and Indonesian, and four NLP tasks, sentiment analysis, natural language inference, named entity recognition, and machine reading comprehension. We find that language learners, especially those with intermediate or advanced language proficiency, are able to provide fairly accurate labels with the help of additional resources. Moreover, we show that data annotation improves learners’ language proficiency in terms of vocabulary and grammar. The implication of our findings is that broadening the annotation task to include language learners can open up the opportunity to build benchmark datasets for languages for which it is difficult to recruit native speakers.</abstract>
       <url hash="5691e465">2023.acl-long.822</url>
@@ -11872,7 +11872,7 @@
       <author><first>Hao</first><last>Fei</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Yixin</first><last>Cao</last><affiliation>Singapore Management University</affiliation></author>
       <author><first>Lidong</first><last>Bing</last><affiliation>Alibaba DAMO Academy</affiliation></author>
-      <author><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
       <pages>14734-14751</pages>
       <abstract>Existing research on multimodal relation extraction (MRE) faces two co-existing challenges, internal-information over-utilization and external-information under-exploitation. To combat that, we propose a novel framework that simultaneously implements the idea of internal-information screening and external-information exploiting. First, we represent the fine-grained semantic structures of the input image and text with the visual and textual scene graphs, which are further fused into a unified cross-modal graph (CMG). Based on CMG, we perform structure refinement with the guidance of the graph information bottleneck principle, actively denoising the less-informative features. Next, we perform topic modeling over the input image and text, incorporating latent multimodal topic features to enrich the contexts. On the benchmark MRE dataset, our system outperforms the current best model significantly. With further in-depth analyses, we reveal the great potential of our method for the MRE task.</abstract>
       <url hash="f522bcb3">2023.acl-long.823</url>
@@ -11975,8 +11975,8 @@
       <title>Abductive Commonsense Reasoning Exploiting Mutually Exclusive Explanations</title>
       <author><first>Wenting</first><last>Zhao</last><affiliation>Cornell University</affiliation></author>
       <author><first>Justin</first><last>Chiu</last><affiliation>Cornell Tech</affiliation></author>
-      <author><first>Claire</first><last>Cardie</last><affiliation>Cornell University</affiliation></author>
-      <author><first>Alexander</first><last>Rush</last><affiliation>Cornell University</affiliation></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last><affiliation>Cornell University</affiliation></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last><affiliation>Cornell University</affiliation></author>
       <pages>14883-14896</pages>
       <abstract>Abductive reasoning aims to find plausible explanations for an event. This style of reasoning is critical for commonsense tasks where there are often multiple plausible explanations. Existing approaches for abductive reasoning in natural language processing (NLP) often rely on manually generated annotations for supervision; however, such annotations can be subjective and biased. Instead of using direct supervision, this work proposes an approach for abductive commonsense reasoning that exploits the fact that only a subset of explanations is correct for a given context. The method uses posterior regularization to enforce a mutual exclusion constraint, encouraging the model to learn the distinction between fluent explanations and plausible ones. We evaluate our approach on a diverse set of abductive reasoning datasets; experimental results show that our approach outperforms or is comparable to directly applying pretrained language models in a zero-shot manner and other knowledge-augmented zero-shot methods.</abstract>
       <url hash="32b201b1">2023.acl-long.831</url>
@@ -12001,7 +12001,7 @@
       <title>Visually-augmented pretrained language models for <fixed-case>NLP</fixed-case> tasks without images</title>
       <author><first>Hangyu</first><last>Guo</last><affiliation>Harbin Institute of Technology (Shenzhen)</affiliation></author>
       <author><first>Kun</first><last>Zhou</last><affiliation>Renmin University of China</affiliation></author>
-      <author><first>Wayne Xin</first><last>Zhao</last><affiliation>RUC</affiliation></author>
+      <author id="wayne-xin-zhao"><first>Wayne Xin</first><last>Zhao</last><affiliation>RUC</affiliation></author>
       <author><first>Qinyu</first><last>Zhang</last><affiliation>Harbin Institute of Technology (Shenzhen)</affiliation></author>
       <author><first>Ji-Rong</first><last>Wen</last><affiliation>Renmin University of China</affiliation></author>
       <pages>14912-14929</pages>
@@ -12015,7 +12015,7 @@
       <title>Using counterfactual contrast to improve compositional generalization for multi-step quantitative reasoning</title>
       <author><first>Armineh</first><last>Nourbakhsh</last><affiliation>CMU, JP Morgan Chase</affiliation></author>
       <author><first>Sameena</first><last>Shah</last><affiliation>JP Morgan</affiliation></author>
-      <author><first>Carolyn</first><last>Rosé</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rosé</last><affiliation>Carnegie Mellon University</affiliation></author>
       <pages>14930-14943</pages>
       <abstract>In quantitative question answering, compositional generalization is one of the main challenges of state of the art models, especially when longer sequences of reasoning steps are required. In this paper we propose CounterComp, a method that uses counterfactual scenarios to generate samples with compositional contrast. Instead of a data augmentation approach, CounterComp is based on metric learning, which allows for direct sampling from the training set and circumvents the need for additional human labels. Our proposed auxiliary metric learning loss improves the performance of three state of the art models on four recently released datasets. We also show how the approach can improve OOD performance on unseen domains, as well as unseen compositions. Lastly, we demonstrate how the method can lead to better compositional attention patterns during training.</abstract>
       <url hash="e4e23ee2">2023.acl-long.834</url>
@@ -12033,7 +12033,7 @@
       <author><first>Yixin</first><last>Liu</last><affiliation>Yale University</affiliation></author>
       <author><first>Saad</first><last>Mahamood</last><affiliation>trivago N.V</affiliation></author>
       <author><first>Sebastian</first><last>Gehrmann</last><affiliation>Bloomberg LP</affiliation></author>
-      <author><first>Miruna</first><last>Clinciu</last><affiliation>Edinburgh Centre for Robotics</affiliation></author>
+      <author id="miruna-clinciu"><first>Miruna</first><last>Clinciu</last><affiliation>Edinburgh Centre for Robotics</affiliation></author>
       <author><first>Khyathi Raghavi</first><last>Chandu</last><affiliation>Allen Institute of AI</affiliation></author>
       <author><first>João</first><last>Sedoc</last><affiliation>New York University</affiliation></author>
       <pages>14944-14982</pages>
@@ -12062,7 +12062,7 @@
     <paper id="837">
       <title><fixed-case>M</fixed-case>eeting<fixed-case>QA</fixed-case>: Extractive Question-Answering on Meeting Transcripts</title>
       <author><first>Archiki</first><last>Prasad</last><affiliation>UNC Chapel Hill</affiliation></author>
-      <author><first>Trung</first><last>Bui</last><affiliation>Adobe Research</affiliation></author>
+      <author id="trung-bui"><first>Trung</first><last>Bui</last><affiliation>Adobe Research</affiliation></author>
       <author><first>Seunghyun</first><last>Yoon</last><affiliation>Adobe Research</affiliation></author>
       <author><first>Hanieh</first><last>Deilamsalehy</last><affiliation>Adobe Research</affiliation></author>
       <author><first>Franck</first><last>Dernoncourt</last><affiliation>Adobe Research</affiliation></author>
@@ -12077,7 +12077,7 @@
     <paper id="838">
       <title><fixed-case>FERMAT</fixed-case>: An Alternative to Accuracy for Numerical Reasoning</title>
       <author><first>Jasivan</first><last>Sivakumar</last><affiliation>University of Sheffield</affiliation></author>
-      <author><first>Nafise Sadat</first><last>Moosavi</last><affiliation>Department of Computer Science, The University of Sheffield</affiliation></author>
+      <author id="nafise-sadat-moosavi"><first>Nafise Sadat</first><last>Moosavi</last><affiliation>Department of Computer Science, The University of Sheffield</affiliation></author>
       <pages>15026-15043</pages>
       <abstract>While pre-trained language models achieve impressive performance on various NLP benchmarks, they still struggle with tasks that require numerical reasoning. Recent advances in improving numerical reasoning are mostly achieved using very large language models that contain billions of parameters and are not accessible to everyone. In addition, numerical reasoning is measured using a single score on existing datasets. As a result, we do not have a clear understanding of the strengths and shortcomings of existing models on different numerical reasoning aspects and therefore, potential ways to improve them apart from scaling them up. Inspired by CheckList (Ribeiro et al., 2020), we introduce a multi-view evaluation set for numerical reasoning in English, called FERMAT. Instead of reporting a single score on a whole dataset, FERMAT evaluates models on various key numerical reasoning aspects such as number understanding, mathematical operations, and training dependency. Apart from providing a comprehensive evaluation of models on different numerical reasoning aspects, FERMAT enables a systematic and automated generation of an arbitrarily large training or evaluation set for each aspect. The datasets and codes are publicly available to generate further multi-view data for ulterior tasks and languages.</abstract>
       <url hash="1f4691a4">2023.acl-long.838</url>
@@ -12089,7 +12089,7 @@
       <title>Don’t Forget Your <fixed-case>ABC</fixed-case>’s: Evaluating the State-of-the-Art in Chat-Oriented Dialogue Systems</title>
       <author><first>Sarah E.</first><last>Finch</last><affiliation>Emory University</affiliation></author>
       <author><first>James D.</first><last>Finch</last><affiliation>Emory University</affiliation></author>
-      <author><first>Jinho D.</first><last>Choi</last><affiliation>Emory University</affiliation></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last><affiliation>Emory University</affiliation></author>
       <pages>15044-15071</pages>
       <abstract>Despite tremendous advancements in dialogue systems, stable evaluation still requires human judgments producing notoriously high-variance metrics due to their inherent subjectivity. Moreover, methods and labels in dialogue evaluation are not fully standardized, especially for open-domain chats, with a lack of work to compare and assess the validity of those approaches. The use of inconsistent evaluation can misinform the performance of a dialogue system, which becomes a major hurdle to enhance it. Thus, a dimensional evaluation of chat-oriented open-domain dialogue systems that reliably measures several aspects of dialogue capabilities is desired. This paper presents a novel human evaluation method to estimate the rates of many{pasted macro ‘LN’} dialogue system behaviors. Our method is used to evaluate four state-of-the-art open-domain dialogue systems and compared with existing approaches. The analysis demonstrates that our behavior method is more suitable than alternative Likert-style or comparative approaches for dimensional evaluation of these systems.</abstract>
       <url hash="471fd74f">2023.acl-long.839</url>
@@ -12119,7 +12119,7 @@
       <author><first>Kaheer</first><last>Suleman</last><affiliation>Microsoft Research Montreal</affiliation></author>
       <author><first>Adam</first><last>Trischler</last><affiliation>Microsoft Research</affiliation></author>
       <author><first>Alexandra</first><last>Olteanu</last><affiliation>Microsoft Research</affiliation></author>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last><affiliation>Mila / McGill University</affiliation></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last><affiliation>Mila / McGill University</affiliation></author>
       <pages>15088-15108</pages>
       <abstract>Many state-of-the-art natural language understanding (NLU) models are based on pretrained neural language models. These models often make inferences using information from multiple sources. An important class of such inferences are those that require both background knowledge, presumably contained in a model’s pretrained parameters, and instance-specific information that is supplied at inference time. However, the integration and reasoning abilities of NLU models in the presence of multiple knowledge sources have been largely understudied. In this work, we propose a test suite of coreference resolution subtasks that require reasoning over multiple facts. These subtasks differ in terms of which knowledge sources contain the relevant facts. We also introduce subtasks where knowledge is present only at inference time using fictional knowledge. We evaluate state-of-the-art coreference resolution models on our dataset. Our results indicate that several models struggle to reason on-the-fly over knowledge observed both at pretrain time and at inference time. However, with task-specific training, a subset of models demonstrates the ability to integrate certain knowledge types from multiple sources. Still, even the best performing models seem to have difficulties with reliably integrating knowledge presented only at inference time.</abstract>
       <url hash="0bfe5c66">2023.acl-long.841</url>
@@ -12132,7 +12132,7 @@
       <author><first>Marcos</first><last>Treviso</last><affiliation>Instituto de Telecomunicacoes</affiliation></author>
       <author><first>Alexis</first><last>Ross</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <author><first>Nuno M.</first><last>Guerreiro</last><affiliation>Instituto de Telecomunicacoes, University of Lisbon</affiliation></author>
-      <author><first>André</first><last>Martins</last><affiliation>Unbabel, Instituto de Telecomunicacoes</affiliation></author>
+      <author id="andre-f-t-martins"><first>André</first><last>Martins</last><affiliation>Unbabel, Instituto de Telecomunicacoes</affiliation></author>
       <pages>15109-15126</pages>
       <abstract>Selective rationales and counterfactual examples have emerged as two effective, complementary classes of interpretability methods for analyzing and training NLP models. However, prior work has not explored how these methods can be integrated to combine their complementary advantages. We overcome this limitation by introducing CREST (ContRastive Edits with Sparse raTionalization), a joint framework for selective rationalization and counterfactual text generation, and show that this framework leads to improvements in counterfactual quality, model robustness, and interpretability. First, CREST generates valid counterfactuals that are more natural than those produced by previous methods, and subsequently can be used for data augmentation at scale, reducing the need for human-generated examples. Second, we introduce a new loss function that leverages CREST counterfactuals to regularize selective rationales and show that this regularization improves both model robustness and rationale quality, compared to methods that do not leverage CREST counterfactuals. Our results demonstrate that CREST successfully bridges the gap between selective rationales and counterfactual examples, addressing the limitations of existing methods and providing a more comprehensive view of a model’s predictions.</abstract>
       <url hash="6de43829">2023.acl-long.842</url>
@@ -12162,8 +12162,8 @@
       <author><first>Budhaditya</first><last>Deb</last><affiliation>Microsoft Corporation</affiliation></author>
       <author><first>Milagro</first><last>Teruel</last><affiliation>Microsoft Research</affiliation></author>
       <author><first>Aaron</first><last>Halfaker</last><affiliation>Microsoft</affiliation></author>
-      <author><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
-      <author><first>Ahmed Hassan</first><last>Awadallah</last><affiliation>Microsoft Research</affiliation></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
+      <author id="ahmed-hassan"><first>Ahmed Hassan</first><last>Awadallah</last><affiliation>Microsoft Research</affiliation></author>
       <pages>15144-15161</pages>
       <abstract>Despite the recent progress in language generation models, their outputs may not always meet user expectations. In this work, we study whether informational feedback in natural language can be leveraged to improve generation quality and user preference alignment. To this end, we consider factual consistency in summarization, the quality that the summary should only contain information supported by the input documents, as the user-expected preference. We collect a high-quality dataset, DeFacto, containing human demonstrations and informational natural language feedback consisting of corrective instructions, edited summaries, and explanations with respect to the factual consistency of the summary. Using our dataset, we study three natural language generation tasks: (1) editing a summary by following the human feedback, (2) generating human feedback for editing the original summary, and (3) revising the initial summary to correct factual errors by generating both the human feedback and edited summary. We show that DeFacto can provide factually consistent human-edited summaries and further insights into summarization factual consistency thanks to its informational natural language feedback. We further demonstrate that fine-tuned language models can leverage our dataset to improve the summary factual consistency, while large language models lack the zero-shot learning ability in our proposed tasks that require controllable text generation.</abstract>
       <url hash="3a8db1fe">2023.acl-long.844</url>
@@ -12231,7 +12231,7 @@
       <author><first>Peng</first><last>Li</last><affiliation>Institute for AI Industry Research (AIR), Tsinghua University, China</affiliation></author>
       <author><first>Tao</first><last>Li</last><affiliation>Meituan Inc.</affiliation></author>
       <author><first>Maosong</first><last>Sun</last><affiliation>Tsinghua University</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>Tsinghua University</affiliation></author>
+      <author><first>Yang</first><last>Liu</last><affiliation>Tsinghua University</affiliation></author>
       <pages>15233-15256</pages>
       <abstract>Recently, multi-aspect controllable text generation that controls the generated text in multiple aspects (e.g., sentiment, topic, and keywords) has attracted increasing attention. Although methods based on parameter efficient tuning like prefix-tuning could achieve multi-aspect controlling in a plug-and-play way, the mutual interference of multiple prefixes leads to significant degeneration of constraints and limits their extensibility to training-time unseen aspect combinations. In this work, we provide a theoretical lower bound for the interference and empirically found that the interference grows with the number of layers where prefixes are inserted. Based on these analyses, we propose using trainable gates to normalize the intervention of prefixes to restrain the growing interference. As a result, controlling training-time unseen combinations of aspects can be realized by simply concatenating corresponding plugins such that new constraints can be extended at a lower cost. In addition, we propose a unified way to process both categorical and free-form constraints. Experiments on text generation and machine translation demonstrate the superiority of our approach over baselines on constraint accuracy, text quality, and extensibility.</abstract>
       <url hash="5ff5f4cb">2023.acl-long.849</url>
@@ -12271,7 +12271,7 @@
       <author><first>Peng</first><last>Li</last><affiliation>Institute for AI Industry Research (AIR), Tsinghua University, China</affiliation></author>
       <author><first>Jin</first><last>Ma</last><affiliation>ustc</affiliation></author>
       <author><first>Ting</first><last>Yao</last><affiliation>Tencent</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>Tsinghua University</affiliation></author>
+      <author><first>Yang</first><last>Liu</last><affiliation>Tsinghua University</affiliation></author>
       <pages>15286-15304</pages>
       <abstract>In the real-world scenario, a longstanding goal of multilingual neural machine translation (MNMT) is that a single model can incrementally adapt to new language pairs without accessing previous training data. In this scenario, previous studies concentrate on overcoming catastrophic forgetting while lacking encouragement to learn new knowledge from incremental language pairs, especially when the incremental language is not related to the set of original languages. To better acquire new knowledge, we propose a knowledge transfer method that can efficiently adapt original MNMT models to diverse incremental language pairs. The method flexibly introduces the knowledge from an external model into original models, which encourages the models to learn new language pairs, completing the procedure of knowledge transfer. Moreover, all original parameters are frozen to ensure that translation qualities on original language pairs are not degraded. Experimental results show that our method can learn new knowledge from diverse language pairs incrementally meanwhile maintaining performance on original language pairs, outperforming various strong baselines in incremental learning for MNMT.</abstract>
       <url hash="66705449">2023.acl-long.852</url>
@@ -12286,7 +12286,7 @@
       <author><first>A. Pastor</first><last>López-Monroy</last></author>
       <author><first>Luis C.</first><last>González</last></author>
       <author><first>David E.</first><last>Losada</last></author>
-      <author><first>Manuel</first><last>Montes-y-Gómez</last></author>
+      <author id="manuel-montes"><first>Manuel</first><last>Montes-y-Gómez</last></author>
       <pages>15305-15318</pages>
       <abstract>Mental disorders affect millions of people worldwide and cause interference with their thinking and behavior. Through the past years, awareness created by health campaigns and other sources motivated the study of these disorders using information extracted from social media platforms. In this work, we aim to contribute to the study of these disorders and to the understanding of how mental problems reflect on social media. To achieve this goal, we propose a double-domain adaptation of a language model. First, we adapted the model to social media language, and then, we adapted it to the mental health domain. In both steps, we incorporated a lexical resource to guide the masking process of the language model and, therefore, to help it in paying more attention to words related to mental disorders. We have evaluated our model in the detection of signs of three major mental disorders: Anorexia, Self-harm, and Depression. Results are encouraging as they show that the proposed adaptation enhances the classification performance and yields competitive results against state-of-the-art methods.</abstract>
       <url hash="9b592d00">2023.acl-long.853</url>
@@ -12296,8 +12296,8 @@
     </paper>
     <paper id="854">
       <title>Toward Interactive Dictation</title>
-      <author><first>Belinda Z.</first><last>Li</last><affiliation>MIT</affiliation></author>
-      <author><first>Jason</first><last>Eisner</last><affiliation>Johns Hopkins University + Microsoft Corporation</affiliation></author>
+      <author id="belinda-z-li"><first>Belinda Z.</first><last>Li</last><affiliation>MIT</affiliation></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last><affiliation>Johns Hopkins University + Microsoft Corporation</affiliation></author>
       <author><first>Adam</first><last>Pauls</last><affiliation>Microsoft</affiliation></author>
       <author><first>Sam</first><last>Thomson</last><affiliation>Microsoft Semantic Machines</affiliation></author>
       <pages>15319-15338</pages>
@@ -12314,7 +12314,7 @@
       <author><first>Qiong</first><last>Tang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Hang</first><last>Yan</last><affiliation>Fudan University</affiliation></author>
       <author><first>Yuanbin</first><last>Wu</last><affiliation>East China Normal University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Xipeng</first><last>Qiu</last><affiliation>Fudan University</affiliation></author>
       <pages>15339-15353</pages>
       <abstract>Large language models (LLMs) pre-trained on massive corpora have demonstrated impressive few-shot learning ability on many NLP tasks. A common practice is to recast the task into a text-to-text format such that generative LLMs of natural language (NL-LLMs) like GPT-3 can be prompted to solve it. However, it is nontrivial to perform information extraction (IE) tasks with NL-LLMs since the output of the IE task is usually structured and therefore is hard to be converted into plain text. In this paper, we propose to recast the structured output in the form of code instead of natural language and utilize generative LLMs of code (Code-LLMs) such as Codex to perform IE tasks, in particular, named entity recognition and relation extraction. In contrast to NL-LLMs, we show that Code-LLMs can be well-aligned with these IE tasks by designing code-style prompts and formulating these IE tasks as code generation tasks. Experiment results on seven benchmarks show that our method consistently outperforms fine-tuning moderate-size pre-trained models specially designed for IE tasks (e.g., UIE) and prompting NL-LLMs under few-shot settings. We further conduct a series of in-depth analyses to demonstrate the merits of leveraging Code-LLMs for IE tasks.</abstract>
@@ -12342,7 +12342,7 @@
     <paper id="857">
       <title>Bridging The Gap: Entailment Fused-T5 for Open-retrieval Conversational Machine Reading Comprehension</title>
       <author><first>Xiao</first><last>Zhang</last><affiliation>Beijing Institute of Technology</affiliation></author>
-      <author><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <author><first>Zewen</first><last>Chi</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <author><first>Xian-Ling</first><last>Mao</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <pages>15374-15386</pages>
@@ -12426,7 +12426,7 @@
       <title>Human Inspired Progressive Alignment and Comparative Learning for Grounded Word Acquisition</title>
       <author><first>Yuwei</first><last>Bao</last><affiliation>University of Michigan</affiliation></author>
       <author><first>Barrett</first><last>Lattimer</last><affiliation>University of Michigan</affiliation></author>
-      <author><first>Joyce</first><last>Chai</last><affiliation>University of Michigan</affiliation></author>
+      <author id="joyce-chai"><first>Joyce</first><last>Chai</last><affiliation>University of Michigan</affiliation></author>
       <pages>15475-15493</pages>
       <abstract>Human language acquisition is an efficient, supervised, and continual process. In this work, we took inspiration from how human babies acquire their first language, and developed a computational process for word acquisition through comparative learning. Motivated by cognitive findings, we generated a small dataset that enables the computation models to compare the similarities and differences of various attributes, learn to filter out and extract the common information for each shared linguistic label. We frame the acquisition of words as not only the information filtration process, but also as representation-symbol mapping. This procedure does not involve a fixed vocabulary size, nor a discriminative objective, and allows the models to continually learn more concepts efficiently. Our results in controlled experiments have shown the potential of this approach for efficient continual learning of grounded words.</abstract>
       <url hash="83fce636">2023.acl-long.863</url>
@@ -12451,7 +12451,7 @@
       <author><first>Nicolas</first><last>Garneau</last><affiliation>Universite Laval</affiliation></author>
       <author><first>Catalina</first><last>Goanta</last><affiliation>Utrecht University</affiliation></author>
       <author><first>Daniel</first><last>Katz</last><affiliation>Illinois Tech - Chicago Kent College of Law</affiliation></author>
-      <author><first>Anders</first><last>Søgaard</last><affiliation>University of Copenhagen</affiliation></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last><affiliation>University of Copenhagen</affiliation></author>
       <pages>15513-15535</pages>
       <abstract>In this work, we conduct a detailed analysis on the performance of legal-oriented pre-trained language models (PLMs). We examine the interplay between their original objective, acquired knowledge, and legal language understanding capacities which we define as the upstream, probing, and downstream performance, respectively. We consider not only the models’ size but also the pre-training corpora used as important dimensions in our study. To this end, we release a multinational English legal corpus (LeXFiles) and a legal knowledge probing benchmark (LegalLAMA) to facilitate training and detailed analysis of legal-oriented PLMs. We release two new legal PLMs trained on LeXFiles and evaluate them alongside others on LegalLAMA and LexGLUE. We find that probing performance strongly correlates with upstream performance in related legal topics. On the other hand, downstream performance is mainly driven by the model’s size and prior legal knowledge which can be estimated by upstream and probing performance. Based on these findings, we can conclude that both dimensions are important for those seeking the development of domain-specific PLMs.</abstract>
       <url hash="daaae81c">2023.acl-long.865</url>
@@ -12479,7 +12479,7 @@
       <author><first>Kai</first><last>Mei</last><affiliation>Rutgers University</affiliation></author>
       <author><first>Zheng</first><last>Li</last><affiliation>CISPA Helmholtz Center for Information Security</affiliation></author>
       <author><first>Zhenting</first><last>Wang</last><affiliation>Rutgers University</affiliation></author>
-      <author id="yang-zhang"><first>Yang</first><last>Zhang</last><affiliation>CISPA Helmholtz Center for Information Security</affiliation></author>
+      <author><first>Yang</first><last>Zhang</last><affiliation>CISPA Helmholtz Center for Information Security</affiliation></author>
       <author><first>Shiqing</first><last>Ma</last><affiliation>Rutgers University</affiliation></author>
       <pages>15551-15565</pages>
       <abstract>Prompt-based learning is vulnerable to backdoor attacks. Existing backdoor attacks against prompt-based models consider injecting backdoors into the entire embedding layers or word embedding vectors. Such attacks can be easily affected by retraining on downstream tasks and with different prompting strategies, limiting the transferability of backdoor attacks. In this work, we propose transferable backdoor attacks against prompt-based models, called NOTABLE, which is independent of downstream tasks and prompting strategies. Specifically, NOTABLE injects backdoors into the encoders of PLMs by utilizing an adaptive verbalizer to bind triggers to specific words (i.e., anchors). It activates the backdoor by pasting input with triggers to reach adversary-desired anchors, achieving independence from downstream tasks and prompting strategies. We conduct experiments on six NLP tasks, three popular models, and three prompting strategies. Empirical results show that NOTABLE achieves superior attack performance (i.e., attack success rate over 90% on all the datasets), and outperforms two state-of-the-art baselines. Evaluations on three defenses show the robustness of NOTABLE. Our code can be found at <url>https://github.com/RU-System-Software-and-Security/Notable</url>.</abstract>
@@ -12492,7 +12492,7 @@
       <title>Revisiting Relation Extraction in the era of Large Language Models</title>
       <author><first>Somin</first><last>Wadhwa</last><affiliation>Northeastern University</affiliation></author>
       <author><first>Silvio</first><last>Amir</last><affiliation>Northeastern University</affiliation></author>
-      <author><first>Byron</first><last>Wallace</last><affiliation>Northeastern University</affiliation></author>
+      <author id="byron-c-wallace"><first>Byron</first><last>Wallace</last><affiliation>Northeastern University</affiliation></author>
       <pages>15566-15589</pages>
       <abstract>Relation extraction (RE) is the core NLP task of inferring semantic relationships between entities from text. Standard supervised RE techniques entail training modules to tag tokens comprising entity spans and then predict the relationship between them. Recent work has instead treated the problem as a sequence-to-sequence task, linearizing relations between entities as target strings to be generated conditioned on the input. Here we push the limits of this approach, using larger language models (GPT-3 and Flan-T5 large) than considered in prior work and evaluating their performance on standard RE tasks under varying levels of supervision. We address issues inherent to evaluating generative approaches to RE by doing human evaluations, in lieu of relying on exact matching. Under this refined evaluation, we find that: (1) Few-shot prompting with GPT-3 achieves near SOTA performance, i.e., roughly equivalent to existing fully supervised models; (2) Flan-T5 is not as capable in the few-shot setting, but supervising and fine-tuning it with Chain-of-Thought (CoT) style explanations (generated via GPT-3) yields SOTA results. We release this model as a new baseline for RE tasks.</abstract>
       <url hash="8e3cb198">2023.acl-long.868</url>
@@ -12532,7 +12532,7 @@
       <author><first>Haolin</first><last>Chen</last><affiliation>Idiap Research Institute</affiliation></author>
       <author><first>Francois</first><last>Marelli</last><affiliation>Idiap Research Institute</affiliation></author>
       <author><first>Francois</first><last>Fleuret</last><affiliation>University of Geneva</affiliation></author>
-      <author><first>James</first><last>Henderson</last><affiliation>Idiap Research Institute</affiliation></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last><affiliation>Idiap Research Institute</affiliation></author>
       <pages>15632-15654</pages>
       <abstract>Transformer-based architectures are the model of choice for natural language understanding, but they come at a significant cost, as they have quadratic complexity in the input length, require a lot of training data, and can be difficult to tune. In the pursuit of lower costs, we investigate simple MLP-based architectures. We find that existing architectures such as MLPMixer, which achieves token mixing through a static MLP applied to each feature independently, are too detached from the inductive biases required for natural language understanding. In this paper, we propose a simple variant, HyperMixer, which forms the token mixing MLP dynamically using hypernetworks. Empirically, we demonstrate that our model performs better than alternative MLP-based models, and on par with Transformers. In contrast to Transformers, HyperMixer achieves these results at substantially lower costs in terms of processing time, training data, and hyperparameter tuning.</abstract>
       <url hash="9b175299">2023.acl-long.871</url>
@@ -12572,7 +12572,7 @@
     </paper>
     <paper id="874">
       <title>Annotation-Inspired Implicit Discourse Relation Classification with Auxiliary Discourse Connective Generation</title>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last><affiliation>Heidelberg Institute for Theoretical Studies</affiliation></author>
+      <author><first>Wei</first><last>Liu</last><affiliation>Heidelberg Institute for Theoretical Studies</affiliation></author>
       <author><first>Michael</first><last>Strube</last><affiliation>Heidelberg Institute for Theoretical Studies</affiliation></author>
       <pages>15696-15712</pages>
       <abstract>Implicit discourse relation classification is a challenging task due to the absence of discourse connectives. To overcome this issue, we design an end-to-end neural model to explicitly generate discourse connectives for the task, inspired by the annotation process of PDTB. Specifically, our model jointly learns to generate discourse connectives between arguments and predict discourse relations based on the arguments and the generated connectives. To prevent our relation classifier from being misled by poor connectives generated at the early stage of training while alleviating the discrepancy between training and inference, we adopt Scheduled Sampling to the joint learning. We evaluate our method on three benchmarks, PDTB 2.0, PDTB 3.0, and PCC. Results show that our joint model significantly outperforms various baselines on three datasets, demonstrating its superiority for the task.</abstract>
@@ -12615,7 +12615,7 @@
       <title>Two-Stage Fine-Tuning for Improved Bias and Variance for Large Pretrained Language Models</title>
       <author><first>Lijing</first><last>Wang</last><affiliation>New Jersey Institute of Technology</affiliation></author>
       <author><first>Yingya</first><last>Li</last><affiliation>Harvard Medical School and Boston Children’s Hospital</affiliation></author>
-      <author><first>Timothy</first><last>Miller</last><affiliation>Boston Children’s Hospital and Harvard Medical School</affiliation></author>
+      <author id="timothy-miller"><first>Timothy</first><last>Miller</last><affiliation>Boston Children’s Hospital and Harvard Medical School</affiliation></author>
       <author><first>Steven</first><last>Bethard</last><affiliation>University of Arizona</affiliation></author>
       <author><first>Guergana</first><last>Savova</last><affiliation>Boston Children’s Hospital and Harvard Medical School</affiliation></author>
       <pages>15746-15761</pages>
@@ -12648,7 +12648,7 @@
       <author><first>Xiang</first><last>Li</last><affiliation>Amazon</affiliation></author>
       <author><first>Puyang</first><last>Xu</last><affiliation>Mobvoi</affiliation></author>
       <author><first>Sunghyun</first><last>Park</last><affiliation>Amazon Alexa AI</affiliation></author>
-      <author><first>Alice</first><last>Oh</last><affiliation>KAIST</affiliation></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last><affiliation>KAIST</affiliation></author>
       <pages>15783-15798</pages>
       <abstract>Unsupervised sentence representation learning has progressed through contrastive learning and data augmentation methods such as dropout masking. Despite this progress, sentence encoders are still limited to using only an input sentence when predicting its semantic vector. In this work, we show that the semantic meaning of a sentence is also determined by nearest-neighbor sentences that are similar to the input sentence. Based on this finding, we propose a novel unsupervised sentence encoder, RankEncoder. RankEncoder predicts the semantic vector of an input sentence by leveraging its relationship with other sentences in an external corpus, as well as the input sentence itself. We evaluate RankEncoder on semantic textual benchmark datasets. From the experimental results, we verify that 1) RankEncoder achieves 80.07% Spearman’s correlation, a 1.1% absolute improvement compared to the previous state-of-the-art performance, 2) RankEncoder is universally applicable to existing unsupervised sentence embedding methods, and 3) RankEncoder is specifically effective for predicting the similarity scores of similar sentence pairs.</abstract>
       <url hash="07099e18">2023.acl-long.879</url>
@@ -12763,7 +12763,7 @@
       <author><first>Jingjing</first><last>Xu</last><affiliation>Shanghai AI Lab</affiliation></author>
       <author><first>Shujian</first><last>Huang</last><affiliation>National Key Laboratory for Novel Software Technology, Nanjing University</affiliation></author>
       <author><first>Lingpeng</first><last>Kong</last><affiliation>The University of Hong Kong</affiliation></author>
-      <author><first>Jiajun</first><last>Chen</last><affiliation>Nanjing University</affiliation></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last><affiliation>Nanjing University</affiliation></author>
       <pages>15948-15959</pages>
       <abstract>Neural machine translation has achieved promising results on many translation tasks. However, previous studies have shown that neural models induce a non-smooth representation space, which harms its generalization results. Recently, kNN-MT has provided an effective paradigm to smooth the prediction based on neighbor representations during inference. Despite promising results, kNN-MT usually requires large inference overhead. We propose an effective training framework INK to directly smooth the representation space via adjusting representations of kNN neighbors with a small number of new parameters. The new parameters are then used to refresh the whole representation datastore to get new kNN knowledge asynchronously. This loop keeps running until convergence. Experiments on four benchmark datasets show that INK achieves average gains of 1.99 COMET and 1.0 BLEU, outperforming the state-of-the-art kNN-MT system with 0.02x memory space and 1.9x inference speedup.</abstract>
       <url hash="a9ba9817">2023.acl-long.888</url>
@@ -12777,7 +12777,7 @@
       <author><first>Kun</first><last>Huang</last><affiliation>Nanjing University of Science and Technology</affiliation></author>
       <author><first>Xiaocui</first><last>Yang</last><affiliation>School of Computer Science and Engineering, Northeastern University,</affiliation></author>
       <author><first>Pengfei</first><last>Hong</last><affiliation>Singapore University of Technology and Design</affiliation></author>
-      <author id="kun-zhang"><first>Kun</first><last>Zhang</last><affiliation>Nanjing University of Science and Technology</affiliation></author>
+      <author><first>Kun</first><last>Zhang</last><affiliation>Nanjing University of Science and Technology</affiliation></author>
       <author><first>Soujanya</first><last>Poria</last><affiliation>Singapore University of Technology and Design</affiliation></author>
       <pages>15960-15973</pages>
       <abstract>Document-level relation extraction (DocRE) aims to infer complex semantic relations among entities in a document. Distant supervision (DS) is able to generate massive auto-labeled data, which can improve DocRE performance. Recent works leverage pseudo labels generated by the pre-denoising model to reduce noise in DS data. However, unreliable pseudo labels bring new noise, e.g., adding false pseudo labels and losing correct DS labels. Therefore, how to select effective pseudo labels to denoise DS data is still a challenge in document-level distant relation extraction. To tackle this issue, we introduce uncertainty estimation technology to determine whether pseudo labels can be trusted. In this work, we propose a Document-level distant Relation Extraction framework with Uncertainty Guided label denoising, UGDRE. Specifically, we propose a novel instance-level uncertainty estimation method, which measures the reliability of the pseudo labels with overlapping relations. By further considering the long-tail problem, we design dynamic uncertainty thresholds for different types of relations to filter high-uncertainty pseudo labels. We conduct experiments on two public datasets. Our framework outperforms strong baselines by 1.91 F1 and 2.28 Ign F1 on the RE-DocRED dataset.</abstract>
@@ -12811,7 +12811,7 @@
       <author><first>Zheng Xin</first><last>Yong</last><affiliation>Brown University</affiliation></author>
       <author><first>Hailey</first><last>Schoelkopf</last><affiliation>EleutherAI</affiliation></author>
       <author><first>Xiangru</first><last>Tang</last><affiliation>Yale University</affiliation></author>
-      <author><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
       <author><first>Alham Fikri</first><last>Aji</last><affiliation>MBZUAI</affiliation></author>
       <author><first>Khalid</first><last>Almubarak</last><affiliation>Prince Sattam bin Abdulaziz University</affiliation></author>
       <author><first>Samuel</first><last>Albanie</last><affiliation>University of Cambridge</affiliation></author>
@@ -12859,7 +12859,7 @@
       <author><first>Zhiguo</first><last>Wang</last><affiliation>AWS AI Labs</affiliation></author>
       <author><first>Bonan</first><last>Min</last><affiliation>Amazon AWS AI Labs</affiliation></author>
       <author><first>William Yang</first><last>Wang</last><affiliation>Amazon AWS AI Labs</affiliation></author>
-      <author><first>Kathleen</first><last>McKeown</last><affiliation>Columbia University and Amazon (Amazon Scholar)</affiliation></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last><affiliation>Columbia University and Amazon (Amazon Scholar)</affiliation></author>
       <author><first>Vittorio</first><last>Castelli</last><affiliation>AWS AI Labs</affiliation></author>
       <author><first>Dan</first><last>Roth</last><affiliation>University of Pennsylvania</affiliation></author>
       <author><first>Bing</first><last>Xiang</last><affiliation>Amazon</affiliation></author>
@@ -12891,7 +12891,7 @@
       <author><first>Richard</first><last>Dufour</last><affiliation>LS2N - Nantes University</affiliation></author>
       <author><first>Mickael</first><last>Rouvier</last><affiliation>LIA - Avignon University</affiliation></author>
       <author><first>Emmanuel</first><last>Morin</last><affiliation>LS2N UMR CNRS 6004</affiliation></author>
-      <author><first>Béatrice</first><last>Daille</last><affiliation>Nantes Université- LS2N</affiliation></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last><affiliation>Nantes Université- LS2N</affiliation></author>
       <author><first>Pierre-Antoine</first><last>Gourraud</last><affiliation>Nantes Universite</affiliation></author>
       <pages>16207-16221</pages>
       <abstract>In recent years, pre-trained language models (PLMs) achieve the best performance on a wide range of natural language processing (NLP) tasks. While the first models were trained on general domain data, specialized ones have emerged to more effectively treat specific domains. In this paper, we propose an original study of PLMs in the medical domain on French language. We compare, for the first time, the performance of PLMs trained on both public data from the web and private data from healthcare establishments. We also evaluate different learning strategies on a set of biomedical tasks. In particular, we show that we can take advantage of already existing biomedical PLMs in a foreign language by further pre-train it on our targeted data. Finally, we release the first specialized PLMs for the biomedical field in French, called DrBERT, as well as the largest corpus of medical data under free license on which these models are trained.</abstract>
@@ -12903,7 +12903,7 @@
     <paper id="897">
       <title>Discriminative Reasoning with Sparse Event Representation for Document-level Event-Event Relation Extraction</title>
       <author><first>Changsen</first><last>Yuan</last><affiliation>Beijing Institute of Technology</affiliation></author>
-      <author><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <author><first>Yixin</first><last>Cao</last><affiliation>Singapore Management University</affiliation></author>
       <author><first>Yonggang</first><last>Wen</last><affiliation>NTU Singapore</affiliation></author>
       <pages>16222-16234</pages>
@@ -12938,7 +12938,7 @@
       <author><first>Vedanuj</first><last>Goswami</last><affiliation>Meta AI</affiliation></author>
       <author><first>Changhan</first><last>Wang</last><affiliation>Meta - Fundamental AI Research (FAIR)</affiliation></author>
       <author><first>Juan</first><last>Pino</last><affiliation>Facebook</affiliation></author>
-      <author><first>Benoît</first><last>Sagot</last><affiliation>Inria</affiliation></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last><affiliation>Inria</affiliation></author>
       <author><first>Holger</first><last>Schwenk</last><affiliation>Meta AI Research</affiliation></author>
       <pages>16251-16269</pages>
       <abstract>We present SpeechMatrix, a large-scale multilingual corpus of speech-to-speech translations mined from real speech of European Parliament recordings. It contains speech alignments in 136 language pairs with a total of 418 thousand hours of speech. To evaluate the quality of this parallel speech, we train bilingual speech-to-speech translation models on mined data only and establish extensive baseline results on EuroParl-ST, VoxPopuli and FLEURS test sets. Enabled by the multilinguality of SpeechMatrix, we also explore multilingual speech-to-speech translation, a topic which was addressed by few other works. We also demonstrate that model pre-training and sparse scaling using Mixture-of-Experts bring large gains to translation performance. The mined data and models will be publicly released</abstract>
@@ -13004,7 +13004,7 @@
       <author><first>Nikita</first><last>Nangia</last><affiliation>New York University</affiliation></author>
       <author><first>Richard Yuanzhe</first><last>Pang</last><affiliation>New York University</affiliation></author>
       <author><first>Jason</first><last>Phang</last><affiliation>New York University</affiliation></author>
-      <author><first>Samuel R.</first><last>Bowman</last><affiliation>New York University</affiliation></author>
+      <author id="samuel-bowman"><first>Samuel R.</first><last>Bowman</last><affiliation>New York University</affiliation></author>
       <pages>16334-16368</pages>
       <abstract>We present the results of the NLP Community Metasurvey. Run from May to June 2022, it elicited opinions on controversial issues, including industry influence in the field, concerns about AGI, and ethics. Our results put concrete numbers to several controversies: For example, respondents are split in half on the importance of artificial general intelligence, whether language models understand language, and the necessity of linguistic structure and inductive bias for solving NLP problems. In addition, the survey posed meta-questions, asking respondents to predict the distribution of survey responses. This allows us to uncover false sociological beliefs where the community’s predictions don’t match reality. Among other results, we find that the community greatly overestimates its own belief in the usefulness of benchmarks and the potential for scaling to solve real-world problems, while underestimating its belief in the importance of linguistic structure, inductive bias, and interdisciplinary science.</abstract>
       <url hash="3275b74d">2023.acl-long.903</url>
@@ -13045,7 +13045,7 @@
       <author><first>Hanieh</first><last>Deilamsalehy</last><affiliation>Adobe Research</affiliation></author>
       <author><first>Franck</first><last>Dernoncourt</last><affiliation>Adobe Research</affiliation></author>
       <author><first>Hassan</first><last>Foroosh</last><affiliation>University of Central Florida</affiliation></author>
-      <author id="fei-liu"><first>Fei</first><last>Liu</last><affiliation>Emory University</affiliation></author>
+      <author><first>Fei</first><last>Liu</last><affiliation>Emory University</affiliation></author>
       <pages>16409-16423</pages>
       <abstract>As the number of recorded meetings increases, it becomes increasingly important to utilize summarization technology to create useful summaries of these recordings. However, there is a crucial lack of annotated meeting corpora for developing this technology, as it can be hard to collect meetings, especially when the topics discussed are confidential. Furthermore, meeting summaries written by experienced writers are scarce, making it hard for abstractive summarizers to produce sensible output without a reliable reference. This lack of annotated corpora has hindered the development of meeting summarization technology. In this paper, we present MeetingBank, a new benchmark dataset of city council meetings over the past decade. MeetingBank is unique among other meeting corpora due to its divide-and-conquer approach, which involves dividing professionally written meeting minutes into shorter passages and aligning them with specific segments of the meeting. This breaks down the process of summarizing a lengthy meeting into smaller, more manageable tasks. The dataset provides a new testbed of various meeting summarization systems and also allows the public to gain insight into how council decisions are made. We make the collection, including meeting video links, transcripts, reference summaries, agenda, and other metadata, publicly available to facilitate the development of better meeting summarization techniques.</abstract>
       <url hash="adf7783e">2023.acl-long.906</url>
@@ -13121,7 +13121,7 @@
       <booktitle>Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)</booktitle>
       <editor><first>Anna</first><last>Rogers</last></editor>
       <editor><first>Jordan</first><last>Boyd-Graber</last></editor>
-      <editor><first>Naoaki</first><last>Okazaki</last></editor>
+      <editor id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Toronto, Canada</address>
       <month>July</month>
@@ -13135,7 +13135,7 @@
     <paper id="1">
       <title>Should you marginalize over possible tokenizations?</title>
       <author><first>Nadezhda</first><last>Chirkova</last><affiliation>Naver Labs Europe</affiliation></author>
-      <author><first>Germán</first><last>Kruszewski</last><affiliation>Naver Labs Europe</affiliation></author>
+      <author id="german-kruszewski"><first>Germán</first><last>Kruszewski</last><affiliation>Naver Labs Europe</affiliation></author>
       <author><first>Jos</first><last>Rozen</last><affiliation>NAVER LABS Europe</affiliation></author>
       <author><first>Marc</first><last>Dymetman</last><affiliation>Independent researcher</affiliation></author>
       <pages>1-12</pages>
@@ -13160,7 +13160,7 @@
       <author><first>Young Min</first><last>Kim</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Kalvin</first><last>Chang</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Chenxuan</first><last>Cui</last><affiliation>Carnegie Mellon University</affiliation></author>
-      <author><first>David R.</first><last>Mortensen</last><affiliation>Language Technologies Institute, Carnegie Mellon University</affiliation></author>
+      <author id="david-r-mortensen"><first>David R.</first><last>Mortensen</last><affiliation>Language Technologies Institute, Carnegie Mellon University</affiliation></author>
       <pages>24-38</pages>
       <abstract>Protoform reconstruction is the task of inferring what morphemes or words appeared like in the ancestral languages of a set of daughter languages. Meloni et al (2021) achieved the state-of-the-art on Latin protoform reconstruction with an RNN-based encoder-decoder with attention model. We update their model with the state-of-the-art seq2seq model: the Transformer. Our model outperforms their model on a suite of different metrics on two different datasets: their Romance data of 8,000 cognates spanning 5 languages and a Chinese dataset (Hou 2004) of 800+ cognates spanning 39 varieties. We also probe our model for potential phylogenetic signal contained in the model. Our code is publicly available at <url>https://github.com/cmu-llab/acl-2023</url>.</abstract>
       <url hash="d873386b">2023.acl-short.3</url>
@@ -13219,12 +13219,12 @@
       <title>Tracing Linguistic Markers of Influence in a Large Online Organisation</title>
       <author><first>Prashant</first><last>Khare</last><affiliation>Queen Mary University of London</affiliation></author>
       <author><first>Ravi</first><last>Shekhar</last><affiliation>University of Essex</affiliation></author>
-      <author><first>Vanja Mladen</first><last>Karan</last><affiliation>Queen Mary University</affiliation></author>
+      <author id="vanja-m-karan"><first>Vanja Mladen</first><last>Karan</last><affiliation>Queen Mary University</affiliation></author>
       <author><first>Stephen</first><last>McQuistin</last><affiliation>University of Glasgow</affiliation></author>
       <author><first>Colin</first><last>Perkins</last><affiliation>University of Glasgow</affiliation></author>
       <author><first>Ignacio</first><last>Castro</last><affiliation>Queen Mary University of London</affiliation></author>
       <author><first>Gareth</first><last>Tyson</last><affiliation>QMUL</affiliation></author>
-      <author><first>Patrick</first><last>Healey</last><affiliation>Queen Mary, University of London</affiliation></author>
+      <author id="patrick-healey"><first>Patrick</first><last>Healey</last><affiliation>Queen Mary, University of London</affiliation></author>
       <author><first>Matthew</first><last>Purver</last><affiliation>Queen Mary University of London</affiliation></author>
       <pages>82-90</pages>
       <abstract>Social science and psycholinguistic research have shown that power and status affect how people use language in a range of domains. Here, we investigate a similar question in a large, distributed, consensus-driven community with little traditional power hierarchy – the Internet Engineering Task Force (IETF), a collaborative organisation that designs internet standards. Our analysis based on lexical categories (LIWC) and BERT, shows that participants’ levels of influence can be predicted from their email text, and identify key linguistic differences (e.g., certain LIWC categories, such as “WE” are positively correlated with high-influence). We also identify the differences in language use for the same person before and after becoming influential.</abstract>
@@ -13280,7 +13280,7 @@
       <author><first>Aru</first><last>Maekawa</last><affiliation>Tokyo Institute of Technology</affiliation></author>
       <author><first>Naoki</first><last>Kobayashi</last><affiliation>LegalOnTechnologies</affiliation></author>
       <author><first>Kotaro</first><last>Funakoshi</last><affiliation>Tokyo Institute of Technology</affiliation></author>
-      <author><first>Manabu</first><last>Okumura</last><affiliation>Tokyo Institute of Technology</affiliation></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last><affiliation>Tokyo Institute of Technology</affiliation></author>
       <pages>119-127</pages>
       <abstract>Dataset distillation aims to create a small dataset of informative synthetic samples to rapidly train neural networks that retain the performance of the original dataset. In this paper, we focus on constructing distilled few-shot datasets for natural language processing (NLP) tasks to fine-tune pre-trained transformers. Specifically, we propose to introduce attention labels, which can efficiently distill the knowledge from the original dataset and transfer it to the transformer models via attention probabilities. We evaluated our dataset distillation methods in four various NLP tasks and demonstrated that it is possible to create distilled few-shot datasets with the attention labels, yielding impressive performances for fine-tuning BERT. Specifically, in AGNews, a four-class news classification task, our distilled few-shot dataset achieved up to 93.2% accuracy, which is 98.5% performance of the original dataset even with only one sample per class and only one gradient step.</abstract>
       <url hash="54e354ee">2023.acl-short.12</url>
@@ -13292,8 +13292,8 @@
       <title>Multi-Document Summarization with Centroid-Based Pretraining</title>
       <author><first>Ratish Surendran</first><last>Puduppully</last><affiliation>A-Star Research Entities</affiliation></author>
       <author><first>Parag</first><last>Jain</last><affiliation>University of Edinburgh</affiliation></author>
-      <author><first>Nancy</first><last>Chen</last><affiliation>Institute for Infocomm Research, A*STAR</affiliation></author>
-      <author><first>Mark</first><last>Steedman</last><affiliation>University of Edinburgh</affiliation></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last><affiliation>Institute for Infocomm Research, A*STAR</affiliation></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last><affiliation>University of Edinburgh</affiliation></author>
       <pages>128-138</pages>
       <abstract>In Multi-Document Summarization (MDS), the input can be modeled as a set of documents, and the output is its summary. In this paper, we focus on pretraining objectives for MDS. Specifically, we introduce a novel pretraining objective, which involves selecting the ROUGE-based centroid of each document cluster as a proxy for its summary. Our objective thus does not require human written summaries and can be utilized for pretraining on a dataset consisting solely of document sets. Through zero-shot, few-shot, and fully supervised experiments on multiple MDS datasets, we show that our model <i>Centrum</i> is better or comparable to a state-of-the-art model. We make the pretrained and fine-tuned models freely available to the research community <url>https://github.com/ratishsp/centrum</url>.</abstract>
       <url hash="68e6b11d">2023.acl-short.13</url>
@@ -13330,7 +13330,7 @@
       <title><fixed-case>H</fixed-case>i<fixed-case>P</fixed-case>ool: Modeling Long Documents Using Graph Neural Networks</title>
       <author><first>Irene</first><last>Li</last><affiliation>University of Tokyo</affiliation></author>
       <author><first>Aosong</first><last>Feng</last><affiliation>Yale University</affiliation></author>
-      <author><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
       <author><first>Rex</first><last>Ying</last><affiliation>Yale University</affiliation></author>
       <pages>161-171</pages>
       <abstract>Encoding long sequences in Natural Language Processing (NLP) is a challenging problem. Though recent pretraining language models achieve satisfying performances in many NLP tasks, they are still restricted by a pre-defined maximum length, making them challenging to be extended to longer sequences. So some recent works utilize hierarchies to model long sequences. However, most of them apply sequential models for upper hierarchies, suffering from long dependency issues. In this paper, we alleviate these issues through a graph-based method. We first chunk the sequence with a fixed length to model the sentence-level information. We then leverage graphs to model intra- and cross-sentence correlations with a new attention mechanism. Additionally, due to limited standard benchmarks for long document classification (LDC), we propose a new challenging benchmark, totaling six datasets with up to 53k samples and 4034 average tokens’ length. Evaluation shows our model surpasses competitive baselines by 2.6% in F1 score, and 4.8% on the longest sequence dataset. Our method is shown to outperform hierarchical sequential models with better performance and scalability, especially for longer sequences.</abstract>
@@ -13344,7 +13344,7 @@
       <author><first>Michael</first><last>Yoder</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Ahmad</first><last>Diab</last><affiliation>University of Pittsburgh</affiliation></author>
       <author><first>David</first><last>Brown</last><affiliation>Carnegie Mellon University</affiliation></author>
-      <author><first>Kathleen</first><last>Carley</last><affiliation>Carnegie Mellon University, Netanomics</affiliation></author>
+      <author id="kathleen-m-carley"><first>Kathleen</first><last>Carley</last><affiliation>Carnegie Mellon University, Netanomics</affiliation></author>
       <pages>172-185</pages>
       <abstract>We present a dataset and classifier for detecting the language of white supremacist extremism, a growing issue in online hate speech. Our weakly supervised classifier is trained on large datasets of text from explicitly white supremacist domains paired with neutral and anti-racist data from similar domains. We demonstrate that this approach improves generalization performance to new domains. Incorporating anti-racist texts as counterexamples to white supremacist language mitigates bias.</abstract>
       <url hash="a4798bcf">2023.acl-short.17</url>
@@ -13483,7 +13483,7 @@
       <author><first>Zhijiang</first><last>Guo</last><affiliation>University of Cambridge</affiliation></author>
       <author><first>Zhiyang</first><last>Teng</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Irwin</first><last>King</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
-      <author><first>Philip S.</first><last>Yu</last><affiliation>University of Illinois at Chicago</affiliation></author>
+      <author id="philip-s-yu"><first>Philip S.</first><last>Yu</last><affiliation>University of Illinois at Chicago</affiliation></author>
       <pages>303-311</pages>
       <abstract>Multimodal relation extraction (MRE) is the task of identifying the semantic relationships between two entities based on the context of the sentence image pair. Existing retrieval-augmented approaches mainly focused on modeling the retrieved textual knowledge, but this may not be able to accurately identify complex relations. To improve the prediction, this research proposes to retrieve textual and visual evidence based on the object, sentence, and whole image. We further develop a novel approach to synthesize the object-level, image-level, and sentence-level information for better reasoning between the same and different modalities. Extensive experiments and analyses show that the proposed method is able to effectively select and compare evidence across modalities and significantly outperforms state-of-the-art models.</abstract>
       <url hash="c2170393">2023.acl-short.27</url>
@@ -13535,7 +13535,7 @@
     <paper id="31">
       <title><fixed-case>PLUE</fixed-case>: Language Understanding Evaluation Benchmark for Privacy Policies in <fixed-case>E</fixed-case>nglish</title>
       <author><first>Jianfeng</first><last>Chi</last><affiliation>Meta AI</affiliation></author>
-      <author><first>Wasi Uddin</first><last>Ahmad</last><affiliation>AWS AI Labs</affiliation></author>
+      <author id="wasi-ahmad"><first>Wasi Uddin</first><last>Ahmad</last><affiliation>AWS AI Labs</affiliation></author>
       <author><first>Yuan</first><last>Tian</last><affiliation>University of California, Los Angeles</affiliation></author>
       <author><first>Kai-Wei</first><last>Chang</last><affiliation>UCLA</affiliation></author>
       <pages>352-365</pages>
@@ -13612,7 +13612,7 @@
     <paper id="37">
       <title>Credible without Credit: Domain Experts Assess Generative Language Models</title>
       <author><first>Denis</first><last>Peskoff</last><affiliation>Princeton University</affiliation></author>
-      <author><first>Brandon</first><last>Stewart</last><affiliation>Princeton University</affiliation></author>
+      <author id="brandon-m-stewart"><first>Brandon</first><last>Stewart</last><affiliation>Princeton University</affiliation></author>
       <pages>427-438</pages>
       <abstract>Language models have recently broken into the public consciousness with the release of the wildly popular ChatGPT. Commentators have argued that language models could replace search engines, make college essays obsolete, or even write academic research papers. All of these tasks rely on accuracy of specialized information which can be difficult to assess for non-experts. Using 10 domain experts across science and culture, we provide an initial assessment of the coherence, conciseness, accuracy, and sourcing of two language models across 100 expert-written questions. While we find the results are consistently cohesive and concise, we find that they are mixed in their accuracy. These results raise questions of the role language models should play in general-purpose and expert knowledge seeking.</abstract>
       <url hash="725041ea">2023.acl-short.37</url>
@@ -13625,7 +13625,7 @@
       <author><first>Shikhar</first><last>Murty</last><affiliation>Stanford University</affiliation></author>
       <author><first>Pratyusha</first><last>Sharma</last><affiliation>MIT</affiliation></author>
       <author><first>Jacob</first><last>Andreas</last><affiliation>MIT</affiliation></author>
-      <author><first>Christopher</first><last>Manning</last><affiliation>Stanford University</affiliation></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last><affiliation>Stanford University</affiliation></author>
       <pages>439-448</pages>
       <abstract>For humans, language production and comprehension is sensitive to the hierarchical structure of sentences. In natural language processing, past work has questioned how effectively neural sequence models like transformers capture this hierarchical structure when generalizing to structurally novel inputs. We show that transformer language models can learn to generalize hierarchically after training for extremely long periods—far beyond the point when in-domain accuracy has saturated. We call this phenomenon structural grokking. On multiple datasets, structural grokking exhibits inverted U-shaped scaling in model depth: intermediate-depth models generalize better than both very deep and very shallow transformers. When analyzing the relationship between model-internal properties and grokking, we find that optimal depth for grokking can be identified using the tree-structuredness metric of CITATION. Overall, our work provides strong evidence that, with extended training, vanilla transformers discover and use hierarchical structure.</abstract>
       <url hash="84378561">2023.acl-short.38</url>
@@ -13704,7 +13704,7 @@
       <author><first>Francisco</first><last>Valentini</last><affiliation>ICC (UBA - CONICET); Maestría en Data Mining (UBA)</affiliation></author>
       <author><first>Germán</first><last>Rosati</last><affiliation>CONICET / UNSAM</affiliation></author>
       <author><first>Damián</first><last>Blasi</last><affiliation>Harvard University and Max Planck Institute for the Science of Human History</affiliation></author>
-      <author><first>Diego</first><last>Fernandez Slezak</last><affiliation>Universidad de Buenos Aires</affiliation></author>
+      <author id="diego-fernandez-slezak"><first>Diego</first><last>Fernandez Slezak</last><affiliation>Universidad de Buenos Aires</affiliation></author>
       <author><first>Edgar</first><last>Altszyler</last><affiliation>Instituto de Investigación en Ciencias de La Computación (UBA-CONICET); GetGloby</affiliation></author>
       <pages>509-520</pages>
       <abstract>In recent years, word embeddings have been widely used to measure biases in texts. Even if they have proven to be effective in detecting a wide variety of biases, metrics based on word embeddings lack transparency and interpretability. We analyze an alternative PMI-based metric to quantify biases in texts. It can be expressed as a function of conditional probabilities, which provides a simple interpretation in terms of word co-occurrences. We also prove that it can be approximated by an odds ratio, which allows estimating confidence intervals and statistical significance of textual biases. This approach produces similar results to metrics based on word embeddings when capturing gender gaps of the real world embedded in large corpora.</abstract>
@@ -13758,7 +13758,7 @@
       <author><first>Chenkai</first><last>Sun</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
       <author><first>Jinning</first><last>Li</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
       <author><first>Hou Pong</first><last>Chan</last><affiliation>University of Macau</affiliation></author>
-      <author><first>ChengXiang</first><last>Zhai</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
+      <author id="chengxiang-zhai"><first>ChengXiang</first><last>Zhai</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
       <author><first>Heng</first><last>Ji</last><affiliation>University of Illinois at Urbana-Champaign and Amazon (Amazon Scholar)</affiliation></author>
       <pages>554-562</pages>
       <abstract>Predicting how a user responds to news events enables important applications such as allowing intelligent agents or content producers to estimate the effect on different communities and revise unreleased messages to prevent unexpected bad outcomes such as social conflict and moral injury. We present a new task, Response Forecasting on Personas for News Media, to estimate the response a persona (characterizing an individual or a group) might have upon seeing a news message. Compared to the previous efforts which only predict generic comments to news, the proposed task not only introduces personalization in the modeling but also predicts the sentiment polarity and intensity of each response. This enables more accurate and comprehensive inference on the mental state of the persona. Meanwhile, the generated sentiment dimensions make the evaluation and application more reliable. We create the first benchmark dataset, which consists of 13,357 responses to 3,847 news headlines from Twitter. We further evaluate the SOTA neural language models with our dataset. The empirical results suggest that the included persona attributes are helpful for the performance of all response dimensions. Our analysis shows that the best-performing models are capable of predicting responses that are consistent with the personas, and as a byproduct, the task formulation also enables many interesting applications in the analysis of social network groups and their opinions, such as the discovery of extreme opinion groups.</abstract>
@@ -13818,7 +13818,7 @@
       <title>Probing Physical Reasoning with Counter-Commonsense Context</title>
       <author><first>Kazushi</first><last>Kondo</last><affiliation>The University of Tokyo</affiliation></author>
       <author><first>Saku</first><last>Sugawara</last><affiliation>National Institute of Informatics</affiliation></author>
-      <author><first>Akiko</first><last>Aizawa</last><affiliation>National Institute of Informatics</affiliation></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last><affiliation>National Institute of Informatics</affiliation></author>
       <pages>603-612</pages>
       <abstract>In this study, we create a CConS (Counter-commonsense Contextual Size comparison) dataset to investigate how physical commonsense affects the contextualized size comparison task; the proposed dataset consists of both contexts that fit physical commonsense and those that do not. This dataset tests the ability of language models to predict the size relationship between objects under various contexts generated from our curated noun list and templates. We measure the ability of several masked language models and encoder-decoder models. The results show that while large language models can use prepositions such as “in” and “into” in the provided context to infer size relationships, they fail to use verbs and thus make incorrect judgments led by their prior physical commonsense.</abstract>
       <url hash="1872ab1e">2023.acl-short.53</url>
@@ -13844,7 +13844,7 @@
       <author><first>Wenjuan</first><last>Han</last><affiliation>Beijing Jiaotong University</affiliation></author>
       <author><first>Hui</first><last>Di</last><affiliation>Toshiba (China) Co., Ltd.</affiliation></author>
       <author><first>Yufeng</first><last>Chen</last><affiliation>Beijing Jiaotong University</affiliation></author>
-      <author><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
       <pages>623-636</pages>
       <abstract>Traditional machine translation evaluation relies on reference written by humans. While reference-free evaluation gets rid of the constraints of labor-intensive annotations, which can pivot easily to new domains and is more scalable. In this paper, we propose a reference-free evaluation approach that characterizes evaluation as two aspects: (1) fluency: how well the translated text conforms to normal human language usage; (2) faithfulness: how well the translated text reflects the source data. We further split the faithfulness into word-level and sentence-level. Extensive experiments spanning WMT18/19/21 Metrics segment-level daRR and MQM datasets demonstrate that our proposed reference-free approach, ReFreeEval, outperforms SOTA reference-fee metrics like YiSi-2.</abstract>
       <url hash="59238d51">2023.acl-short.55</url>
@@ -13909,7 +13909,7 @@
       <author><first>Mathias</first><last>Müller</last><affiliation>University of Zurich</affiliation></author>
       <author><first>Zifan</first><last>Jiang</last><affiliation>University of Zurich</affiliation></author>
       <author><first>Amit</first><last>Moryossef</last><affiliation>Bar-Ilan university, University of Zurich</affiliation></author>
-      <author><first>Annette</first><last>Rios</last><affiliation>University of Zurich</affiliation></author>
+      <author id="annette-rios-gonzales"><first>Annette</first><last>Rios</last><affiliation>University of Zurich</affiliation></author>
       <author><first>Sarah</first><last>Ebling</last><affiliation>University of Zurich</affiliation></author>
       <pages>682-693</pages>
       <abstract>Automatic sign language processing is gaining popularity in Natural Language Processing (NLP) research (Yin et al., 2021). In machine translation (MT) in particular, sign language translation based on glosses is a prominent approach. In this paper, we review recent works on neural gloss translation. We find that limitations of glosses in general and limitations of specific datasets are not discussed in a transparent manner and that there is no common standard for evaluation. To address these issues, we put forward concrete recommendations for future research on gloss translation. Our suggestions advocate awareness of the inherent limitations of gloss-based approaches, realistic datasets, stronger baselines and convincing evaluation.</abstract>
@@ -13996,7 +13996,7 @@
       <author><first>Austin</first><last>Simmmons</last><affiliation>RIT</affiliation></author>
       <author><first>Paridhi</first><last>Khandelwal</last><affiliation>RIT</affiliation></author>
       <author><first>Sara</first><last>Rosenthal</last><affiliation>IBM Research</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <pages>762-770</pages>
       <abstract>We present TBO, a new dataset for Target-based Offensive language identification. TBO contains post-level annotations regarding the harmfulness of an offensive post and token-level annotations comprising of the target and the offensive argument expression. Popular offensive language identification datasets for social media focus on annotation taxonomies only at the post level and more recently, some datasets have been released that feature only token-level annotations. TBO is an important resource that bridges the gap between post-level and token-level annotation datasets by introducing a single comprehensive unified annotation taxonomy. We use the TBO taxonomy to annotate post-level and token-level offensive language on English Twitter posts. We release an initial dataset of over 4,500 instances collected from Twitter and we carry out multiple experiments to compare the performance of different models trained and tested on TBO.</abstract>
       <url hash="a16115d8">2023.acl-short.66</url>
@@ -14042,7 +14042,7 @@
       <author><first>Yasaman</first><last>Boreshban</last><affiliation>Sharif University of Technology</affiliation></author>
       <author><first>Salam</first><last>Khalifa</last><affiliation>Stony Brook University</affiliation></author>
       <author><first>SeyedAbolghasem</first><last>Mirroshandel</last><affiliation>Stony Brook University</affiliation></author>
-      <author><first>Owen</first><last>Rambow</last><affiliation>Stony Brook University</affiliation></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last><affiliation>Stony Brook University</affiliation></author>
       <pages>793-803</pages>
       <abstract>Building a system for morphological processing is a challenging task in morphologically complex languages like Arabic. Although there are some deep learning based models that achieve successful results, these models rely on a large amount of annotated data. Building such datasets, specially for some of the lower-resource Arabic dialects, is very difficult, time-consuming, and expensive. In addition, some parts of the annotated data do not contain useful information for training machine learning models. Active learning strategies allow the learner algorithm to select the most informative samples for annotation. There has been little research that focuses on applying active learning for morphological inflection and morphophonological processing. In this paper, we have proposed a deep active learning method for this task. Our experiments on Egyptian Arabic show that with only about 30% of annotated data, we achieve the same results as does the state-of-the-art model on the whole dataset.</abstract>
       <url hash="7aab0ef9">2023.acl-short.69</url>
@@ -14065,7 +14065,7 @@
     <paper id="71">
       <title>Bhasa-<fixed-case>A</fixed-case>bhijnaanam: Native-script and romanized Language Identification for 22 <fixed-case>I</fixed-case>ndic languages</title>
       <author><first>Yash</first><last>Madhani</last><affiliation>Indian Institute of Technology Madras</affiliation></author>
-      <author><first>Mitesh M.</first><last>Khapra</last><affiliation>Indian Institute of Technology Madras</affiliation></author>
+      <author id="mitesh-m-khapra"><first>Mitesh M.</first><last>Khapra</last><affiliation>Indian Institute of Technology Madras</affiliation></author>
       <author><first>Anoop</first><last>Kunchukuttan</last><affiliation>Microsoft AI and Research</affiliation></author>
       <pages>816-826</pages>
       <abstract>We create publicly available language identification (LID) datasets and models in all 22 Indian languages listed in the Indian constitution in both native-script and romanized text. First, we create Bhasha-Abhijnaanam, a language identification test set for native-script as well as romanized text which spans all 22 Indic languages. We also train IndicLID, a language identifier for all the above-mentioned languages in both native and romanized script. For native-script text, it has better language coverage than existing LIDs and is competitive or better than other LIDs. IndicLID is the first LID for romanized text in Indian languages. Two major challenges for romanized text LID are the lack of training data and low-LID performance when languages are similar. We provide simple and effective solutions to these problems. In general, there has been limited work on romanized text in any language, and our findings are relevant to other languages that need romanized language identification. Our models are publicly available at <url>https://github.com/AI4Bharat/IndicLID</url> under open-source licenses. Our training and test sets are also publicly available at <url>https://huggingface.co/datasets/ai4bharat/Bhasha-Abhijnaanam</url> under open-source licenses.</abstract>
@@ -14193,13 +14193,13 @@
       <title><fixed-case>C</fixed-case>hat<fixed-case>GPT</fixed-case> for Zero-shot Dialogue State Tracking: A Solution or an Opportunity?</title>
       <author><first>Michael</first><last>Heck</last><affiliation>Heinrich Heine University</affiliation></author>
       <author><first>Nurul</first><last>Lubis</last><affiliation>Heinrich Heine University</affiliation></author>
-      <author><first>Benjamin</first><last>Ruppik</last><affiliation>Heinrich Heine University Düsseldorf</affiliation></author>
+      <author id="benjamin-matthias-ruppik"><first>Benjamin</first><last>Ruppik</last><affiliation>Heinrich Heine University Düsseldorf</affiliation></author>
       <author><first>Renato</first><last>Vukovic</last><affiliation>Heinrich Heine University Düsseldorf</affiliation></author>
       <author><first>Shutong</first><last>Feng</last><affiliation>Heinrich-Heine-Universität Düsseldorf</affiliation></author>
       <author><first>Christian</first><last>Geishauser</last><affiliation>Heinrich Heine University Duesseldorf</affiliation></author>
       <author><first>Hsien-chin</first><last>Lin</last><affiliation>Heinrich Heine University</affiliation></author>
       <author><first>Carel</first><last>van Niekerk</last><affiliation>Heinrich Heine University</affiliation></author>
-      <author><first>Milica</first><last>Gasic</last><affiliation>Heinrich Heine University Duesseldorf</affiliation></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gasic</last><affiliation>Heinrich Heine University Duesseldorf</affiliation></author>
       <pages>936-950</pages>
       <abstract>Recent research on dialog state tracking (DST) focuses on methods that allow few- and zero-shot transfer to new domains or schemas. However, performance gains heavily depend on aggressive data augmentation and fine-tuning of ever larger language model based architectures. In contrast, general purpose language models, trained on large amounts of diverse data, hold the promise of solving any kind of task without task-specific training. We present preliminary experimental results on the ChatGPT research preview, showing that ChatGPT achieves state-of-the-art performance in zero-shot DST. Despite our findings, we argue that properties inherent to general purpose models limit their ability to replace specialized systems. We further theorize that the in-context learning capabilities of such models will likely become powerful tools to support the development of dedicated dialog state trackers and enable dynamic methods.</abstract>
       <url hash="bef4c638">2023.acl-short.81</url>
@@ -14248,7 +14248,7 @@
     <paper id="85">
       <title><fixed-case>N</fixed-case>olly<fixed-case>S</fixed-case>enti: Leveraging Transfer Learning and Machine Translation for <fixed-case>N</fixed-case>igerian Movie Sentiment Classification</title>
       <author><first>Iyanuoluwa</first><last>Shode</last><affiliation>Montclair State University</affiliation></author>
-      <author><first>David Ifeoluwa</first><last>Adelani</last><affiliation>University College London</affiliation></author>
+      <author id="david-ifeoluwa-adelani"><first>David Ifeoluwa</first><last>Adelani</last><affiliation>University College London</affiliation></author>
       <author><first>JIng</first><last>Peng</last><affiliation>Montclair State University</affiliation></author>
       <author><first>Anna</first><last>Feldman</last><affiliation>Montclair State University</affiliation></author>
       <pages>986-998</pages>
@@ -14274,7 +14274,7 @@
       <title>An (unhelpful) guide to selecting the best <fixed-case>ASR</fixed-case> architecture for your under-resourced language</title>
       <author><first>Robert</first><last>Jimerson</last><affiliation>Rochester institute of Technology</affiliation></author>
       <author><first>Zoey</first><last>Liu</last><affiliation>Department of Linguistics, University of Florida</affiliation></author>
-      <author><first>Emily</first><last>Prud’hommeaux</last><affiliation>Boston College</affiliation></author>
+      <author id="emily-prudhommeaux"><first>Emily</first><last>Prud’hommeaux</last><affiliation>Boston College</affiliation></author>
       <pages>1008-1016</pages>
       <abstract>Advances in deep neural models for automatic speech recognition (ASR) have yielded dramatic improvements in ASR quality for resource-rich languages, with English ASR now achieving word error rates comparable to that of human transcribers. The vast majority of the world’s languages, however, lack the quantity of data necessary to approach this level of accuracy. In this paper we use four of the most popular ASR toolkits to train ASR models for eleven languages with limited ASR training resources: eleven widely spoken languages of Africa, Asia, and South America, one endangered language of Central America, and three critically endangered languages of North America. We find that no single architecture consistently outperforms any other. These differences in performance so far do not appear to be related to any particular feature of the datasets or characteristics of the languages. These findings have important implications for future research in ASR for under-resourced languages. ASR systems for languages with abundant existing media and available speakers may derive the most benefit simply by collecting large amounts of additional acoustic and textual training data. Communities using ASR to support endangered language documentation efforts, who cannot easily collect more data, might instead focus on exploring multiple architectures and hyperparameterizations to optimize performance within the constraints of their available data and resources.</abstract>
       <url hash="623eccfe">2023.acl-short.87</url>
@@ -14286,7 +14286,7 @@
       <title>The Ecological Fallacy in Annotation: Modeling Human Label Variation goes beyond Sociodemographics</title>
       <author><first>Matthias</first><last>Orlikowski</last><affiliation>Bielefeld University</affiliation></author>
       <author><first>Paul</first><last>Röttger</last><affiliation>University of Oxford</affiliation></author>
-      <author><first>Philipp</first><last>Cimiano</last><affiliation>Univ. Bielefeld</affiliation></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last><affiliation>Univ. Bielefeld</affiliation></author>
       <author><first>Dirk</first><last>Hovy</last><affiliation>Bocconi University</affiliation></author>
       <pages>1017-1029</pages>
       <abstract>Many NLP tasks exhibit human label variation, where different annotators give different labels to the same texts. This variation is known to depend, at least in part, on the sociodemographics of annotators. Recent research aims to model individual annotator behaviour rather than predicting aggregated labels, and we would expect that sociodemographic information is useful for these models. On the other hand, the ecological fallacy states that aggregate group behaviour, such as the behaviour of the average female annotator, does not necessarily explain individual behaviour. To account for sociodemographics in models of individual annotator behaviour, we introduce group-specific layers to multi-annotator models. In a series of experiments for toxic content detection, we find that explicitly accounting for sociodemographic attributes in this way does not significantly improve model performance. This result shows that individual annotation behaviour depends on much more than just sociodemographics.</abstract>
@@ -14311,7 +14311,7 @@
       <author><first>Vikas</first><last>Raunak</last><affiliation>Microsoft</affiliation></author>
       <author><first>Arul</first><last>Menezes</last><affiliation>Microsoft Translator</affiliation></author>
       <author><first>Matt</first><last>Post</last><affiliation>Microsoft</affiliation></author>
-      <author><first>Hany</first><last>Hassan</last><affiliation>Microsoft</affiliation></author>
+      <author id="hany-hassan-awadalla"><first>Hany</first><last>Hassan</last><affiliation>Microsoft</affiliation></author>
       <pages>1041-1050</pages>
       <abstract>Large Language Models (LLMs) such as GPT-3 have emerged as general-purpose language models capable of addressing many natural language generation or understanding tasks. On the task of Machine Translation (MT), multiple works have investigated few-shot prompting mechanisms to elicit better translations from LLMs. However, there has been relatively little investigation on how such translations differ qualitatively from the translations generated by standard Neural Machine Translation (NMT) models. In this work, we investigate these differences in terms of the literalness of translations produced by the two systems. Using literalness measures involving word alignment and monotonicity, we find that translations out of English (E-X) from GPTs tend to be less literal, while exhibiting similar or better scores on MT quality metrics. We demonstrate that this finding is borne out in human evaluations as well. We then show that these differences are especially pronounced when translating sentences that contain idiomatic expressions.</abstract>
       <url hash="19f05b53">2023.acl-short.90</url>
@@ -14363,9 +14363,9 @@
       <author><first>Ricardo</first><last>Rei</last><affiliation>Unbabel/INESC-ID</affiliation></author>
       <author><first>Nuno M.</first><last>Guerreiro</last><affiliation>Instituto de Telecomunicacoes, University of Lisbon</affiliation></author>
       <author><first>Marcos</first><last>Treviso</last><affiliation>Instituto de Telecomunicacoes</affiliation></author>
-      <author><first>Luisa</first><last>Coheur</last><affiliation>INESC-ID/Instituto Superior Tecnico</affiliation></author>
-      <author><first>Alon</first><last>Lavie</last><affiliation>Unbabel/Carnegie Mellon University</affiliation></author>
-      <author><first>André</first><last>Martins</last><affiliation>Unbabel, Instituto de Telecomunicacoes</affiliation></author>
+      <author id="luisa-coheur"><first>Luisa</first><last>Coheur</last><affiliation>INESC-ID/Instituto Superior Tecnico</affiliation></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last><affiliation>Unbabel/Carnegie Mellon University</affiliation></author>
+      <author id="andre-f-t-martins"><first>André</first><last>Martins</last><affiliation>Unbabel, Instituto de Telecomunicacoes</affiliation></author>
       <pages>1089-1105</pages>
       <abstract>Neural metrics for machine translation evaluation, such as COMET, exhibit significant improvements in their correlation with human judgments, as compared to traditional metrics based on lexical overlap, such as BLEU. Yet, neural metrics are, to a great extent, “black boxes” returning a single sentence-level score without transparency about the decision-making process. In this work, we develop and compare several neural explainability methods and demonstrate their effectiveness for interpreting state-of-the-art fine-tuned neural metrics. Our study reveals that these metrics leverage token-level information that can be directly attributed to translation errors, as assessed through comparison of token-level neural saliency maps with Multidimensional Quality Metrics (MQM) annotations and with synthetically-generated critical translation errors. To ease future research, we release our code at: <url>https://github.com/Unbabel/COMET/tree/explainable-metrics</url></abstract>
       <url hash="1d7c88ce">2023.acl-short.94</url>
@@ -14394,7 +14394,7 @@
       <author><first>Tianyu</first><last>Zhao</last><affiliation>rinna Co., Ltd.</affiliation></author>
       <author><first>Makoto</first><last>Shing</last><affiliation>rinna Co., Ltd.</affiliation></author>
       <author><first>Kei</first><last>Sawada</last><affiliation>rinna Co., Ltd.</affiliation></author>
-      <author><first>Manabu</first><last>Okumura</last><affiliation>Tokyo Institute of Technology</affiliation></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last><affiliation>Tokyo Institute of Technology</affiliation></author>
       <pages>1116-1127</pages>
       <abstract>In a controllable text generation dataset, there exist unannotated attributes that could provide irrelevant learning signals to models that use it for training and thus degrade their performance. We propose focused prefix tuning (FPT) to mitigate the problem and to enable the control to focus on the desired attribute. Experimental results show that FPT can achieve better control accuracy and text fluency than baseline models in single-attribute control tasks. In multi-attribute control tasks, FPT achieves comparable control accuracy with the state-of-the-art approach while keeping the flexibility to control new attributes without retraining existing models.</abstract>
       <url hash="0e94771c">2023.acl-short.96</url>
@@ -14451,7 +14451,7 @@
       <author><first>Zhiming</first><last>Mao</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <author><first>Huimin</first><last>Wang</last><affiliation>Tencent</affiliation></author>
       <author><first>Yiming</first><last>Du</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
-      <author><first>Kam-Fai</first><last>Wong</last><affiliation>Department of Systems Engineering and Engineering Management, The Chinese University of Hong Kong, Hong Kong</affiliation></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last><affiliation>Department of Systems Engineering and Engineering Management, The Chinese University of Hong Kong, Hong Kong</affiliation></author>
       <pages>1160-1170</pages>
       <abstract>Prior study has shown that pretrained language models (PLM) can boost the performance of text-based recommendation. In contrast to previous works that either use PLM to encode user history as a whole input text, or impose an additional aggregation network to fuse multi-turn history representations, we propose a unified local- and global-attention Transformer encoder to better model two-level contexts of user history. Moreover, conditioned on user history encoded by Transformer encoders, our framework leverages Transformer decoders to estimate the language perplexity of candidate text items, which can serve as a straightforward yet significant contrastive signal for user-item text matching. Based on this, our framework, UniTRec, unifies the contrastive objectives of discriminative matching scores and candidate text perplexity to jointly enhance text-based recommendation. Extensive evaluation shows that UniTRec delivers SOTA performance on three text-based recommendation tasks.</abstract>
       <url hash="f68e059d">2023.acl-short.100</url>
@@ -14466,7 +14466,7 @@
       <author><first>Qian</first><last>Liu</last><affiliation>Sea AI Lab</affiliation></author>
       <author><first>Lidong</first><last>Bing</last><affiliation>Alibaba DAMO Academy</affiliation></author>
       <author><first>Fei</first><last>Li</last><affiliation>Wuhan University</affiliation></author>
-      <author><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
       <pages>1171-1182</pages>
       <abstract>While sentiment analysis systems try to determine the sentiment polarities of given targets based on the key opinion expressions in input texts, in implicit sentiment analysis (ISA) the opinion cues come in an implicit and obscure manner. Thus detecting implicit sentiment requires the common-sense and multi-hop reasoning ability to infer the latent intent of opinion. Inspired by the recent chain-of-thought (CoT) idea, in this work we introduce a Three-hop Reasoning (THOR) CoT framework to mimic the human-like reasoning process for ISA. We design a three-step prompting principle for THOR to step-by-step induce the implicit aspect, opinion, and finally the sentiment polarity. Our THOR+Flan-T5 (11B) pushes the state-of-the-art (SoTA) by over 6% F1 on supervised setup. More strikingly, THOR+GPT3 (175B) boosts the SoTA by over 50% F1 on zero-shot setting.</abstract>
       <url hash="9071b9a1">2023.acl-short.101</url>
@@ -14479,7 +14479,7 @@
       <author><first>Ta-Chung</first><last>Chi</last><affiliation>carnegie mellon university</affiliation></author>
       <author><first>Ting-Han</first><last>Fan</last><affiliation>Princeton University</affiliation></author>
       <author><first>Li-Wei</first><last>Chen</last><affiliation>Carnegie Mellon University</affiliation></author>
-      <author><first>Alexander</first><last>Rudnicky</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="alexander-rudnicky"><first>Alexander</first><last>Rudnicky</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Peter</first><last>Ramadge</last><affiliation>Princeton University</affiliation></author>
       <pages>1183-1193</pages>
       <abstract>The use of positional embeddings in transformer language models is widely accepted. However, recent research has called into question the necessity of such embeddings. We further extend this inquiry by demonstrating that a randomly initialized and frozen transformer language model, devoid of positional embeddings, inherently encodes strong positional information through the shrinkage of self-attention variance. To quantify this variance, we derive the underlying distribution of each step within a transformer layer. Through empirical validation using a fully pretrained model, we show that the variance shrinkage effect still persists after extensive gradient updates. Our findings serve to justify the decision to discard positional embeddings and thus facilitate more efficient pretraining of transformer language models.</abstract>
@@ -14502,7 +14502,7 @@
       <title>Class based Influence Functions for Error Detection</title>
       <author><first>Thang</first><last>Nguyen-Duc</last><affiliation>FPT Software AI Center</affiliation></author>
       <author><first>Hoang</first><last>Thanh-Tung</last><affiliation>FPT Software AI Center</affiliation></author>
-      <author><first>Quan Hung</first><last>Tran</last><affiliation>Adobe Research</affiliation></author>
+      <author id="quan-hung-tran"><first>Quan Hung</first><last>Tran</last><affiliation>Adobe Research</affiliation></author>
       <author><first>Dang</first><last>Huu-Tien</last><affiliation>FPT Software AI Center</affiliation></author>
       <author><first>Hieu</first><last>Nguyen</last><affiliation>FPT Software AI Center</affiliation></author>
       <author><first>Anh</first><last>T. V. Dau</last><affiliation>FPT Software AI Center</affiliation></author>
@@ -14675,8 +14675,8 @@
       <title>Text-to-<fixed-case>SQL</fixed-case> Error Correction with Language Models of Code</title>
       <author><first>Ziru</first><last>Chen</last><affiliation>Ohio State University</affiliation></author>
       <author><first>Shijie</first><last>Chen</last><affiliation>The Ohio State University</affiliation></author>
-      <author><first>Michael</first><last>White</last><affiliation>The Ohio State University</affiliation></author>
-      <author><first>Raymond</first><last>Mooney</last><affiliation>University of Texas at Austin</affiliation></author>
+      <author id="michael-white"><first>Michael</first><last>White</last><affiliation>The Ohio State University</affiliation></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last><affiliation>University of Texas at Austin</affiliation></author>
       <author><first>Ali</first><last>Payani</last><affiliation>Cisco</affiliation></author>
       <author><first>Jayanth</first><last>Srinivasa</last><affiliation>Cisco Systems Inc</affiliation></author>
       <author><first>Yu</first><last>Su</last><affiliation>The Ohio State University</affiliation></author>
@@ -14708,9 +14708,9 @@
       <author><first>Chantal</first><last>Shaib</last><affiliation>Northeastern University</affiliation></author>
       <author><first>Millicent</first><last>Li</last><affiliation>Northeastern University</affiliation></author>
       <author><first>Sebastian</first><last>Joseph</last><affiliation>University of Texas at Austin</affiliation></author>
-      <author><first>Iain</first><last>Marshall</last><affiliation>King’s College London</affiliation></author>
+      <author id="iain-marshall"><first>Iain</first><last>Marshall</last><affiliation>King’s College London</affiliation></author>
       <author><first>Junyi Jessy</first><last>Li</last><affiliation>University of Texas at Austin</affiliation></author>
-      <author><first>Byron</first><last>Wallace</last><affiliation>Northeastern University</affiliation></author>
+      <author id="byron-c-wallace"><first>Byron</first><last>Wallace</last><affiliation>Northeastern University</affiliation></author>
       <pages>1387-1407</pages>
       <abstract>Large language models, particularly GPT-3, are able to produce high quality summaries ofgeneral domain news articles in few- and zero-shot settings. However, it is unclear if such models are similarly capable in more specialized domains such as biomedicine. In this paper we enlist domain experts (individuals with medical training) to evaluate summaries of biomedical articles generated by GPT-3, given no supervision. We consider bothsingle- and multi-document settings. In the former, GPT-3 is tasked with generating regular and plain-language summaries of articles describing randomized controlled trials; in thelatter, we assess the degree to which GPT-3 is able to synthesize evidence reported acrossa collection of articles. We design an annotation scheme for evaluating model outputs, withan emphasis on assessing the factual accuracy of generated summaries. We find that whileGPT-3 is able to summarize and simplify single biomedical articles faithfully, it strugglesto provide accurate aggregations of findings over multiple documents. We release all data,code, and annotations used in this work.</abstract>
       <url hash="4e80bbd6">2023.acl-short.119</url>
@@ -14798,8 +14798,8 @@
       <author><first>Xing</first><last>Niu</last><affiliation>Amazon AI</affiliation></author>
       <author><first>Benjamin</first><last>Hsu</last><affiliation>Amazon</affiliation></author>
       <author><first>Anna</first><last>Currey</last><affiliation>AWS AI Labs</affiliation></author>
-      <author><first>Georgiana</first><last>Dinu</last><affiliation>Amazon AWS</affiliation></author>
-      <author><first>Maria</first><last>Nadejde</last><affiliation>AWS AI Labs</affiliation></author>
+      <author id="georgiana-dinu"><first>Georgiana</first><last>Dinu</last><affiliation>Amazon AWS</affiliation></author>
+      <author id="maria-nadejde"><first>Maria</first><last>Nadejde</last><affiliation>AWS AI Labs</affiliation></author>
       <pages>1476-1490</pages>
       <abstract>Attribute-controlled translation (ACT) is a subtask of machine translation that involves controlling stylistic or linguistic attributes (like formality and gender) of translation outputs. While ACT has garnered attention in recent years due to its usefulness in real-world applications, progress in the task is currently limited by dataset availability, since most prior approaches rely on supervised methods. To address this limitation, we propose Retrieval and Attribute-Marking enhanced Prompting (RAMP), which leverages large multilingual language models to perform ACT in few-shot and zero-shot settings. RAMP improves generation accuracy over the standard prompting approach by (1) incorporating a semantic similarity retrieval component for selecting similar in-context examples, and (2) marking in-context examples with attribute annotations. Our comprehensive experiments show that RAMP is a viable approach in both zero-shot and few-shot settings.</abstract>
       <url hash="e77ba23b">2023.acl-short.126</url>
@@ -14810,7 +14810,7 @@
     <paper id="127">
       <title>Zero-Shot and Few-Shot Stance Detection on Varied Topics via Conditional Generation</title>
       <author><first>Haoyang</first><last>Wen</last><affiliation>Carnegie Mellon University</affiliation></author>
-      <author><first>Alexander</first><last>Hauptmann</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="alexander-g-hauptmann"><first>Alexander</first><last>Hauptmann</last><affiliation>Carnegie Mellon University</affiliation></author>
       <pages>1491-1499</pages>
       <abstract>Zero-shot and few-shot stance detection identify the polarity of text with regard to a certain target when we have only limited or no training resources for the target. Previous work generally formulates the problem into a classification setting, ignoring the potential use of label text. In this paper, we instead utilize a conditional generation framework and formulate the problem as denoising from partially-filled templates, which can better utilize the semantics among input, label, and target texts. We further propose to jointly train an auxiliary task, target prediction, and to incorporate manually constructed incorrect samples with unlikelihood training to improve the representations for both target and label texts. We also verify the effectiveness of target-related Wikipedia knowledge with the generation framework. Experiments show that our proposed method significantly outperforms several strong baselines on VAST, and achieves new state-of-the-art performance.</abstract>
       <url hash="7540a836">2023.acl-short.127</url>
@@ -14825,7 +14825,7 @@
       <author><first>Vasudha</first><last>Varadarajan</last><affiliation>Stony Brook University</affiliation></author>
       <author><first>Johannes</first><last>Eichstaedt</last><affiliation>Stanford University</affiliation></author>
       <author><first>Adithya</first><last>V Ganesan</last><affiliation>Stony Brook University</affiliation></author>
-      <author><first>H. Andrew</first><last>Schwartz</last><affiliation>Stony Brook University</affiliation></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last><affiliation>Stony Brook University</affiliation></author>
       <pages>1500-1511</pages>
       <abstract>Anxiety disorders are the most common of mental illnesses, but relatively little is known about how to detect them from language. The primary clinical manifestation of anxiety is worry associated cognitive distortions, which are likely expressed at the discourse-level of semantics. Here, we investigate the development of a modern linguistic assessment for degree of anxiety, specifically evaluating the utility of discourse-level information in addition to lexical-level large language model embeddings. We find that a combined lexico-discourse model outperforms models based solely on state-of-the-art contextual embeddings (RoBERTa), with discourse-level representations derived from Sentence-BERT and DiscRE both providing additional predictive power not captured by lexical-level representations. Interpreting the model, we find that discourse patterns of causal explanations, among others, were used significantly more by those scoring high in anxiety, dovetailing with psychological literature.</abstract>
       <url hash="b282273d">2023.acl-short.128</url>
@@ -14922,7 +14922,7 @@
       <author><first>Pierluigi</first><last>Cassotti</last><affiliation>Università degli studi di Bari</affiliation></author>
       <author><first>Lucia</first><last>Siciliani</last><affiliation>University of Bari Aldo Moro</affiliation></author>
       <author><first>Marco</first><last>DeGemmis</last><affiliation>University of Bari</affiliation></author>
-      <author><first>Giovanni</first><last>Semeraro</last><affiliation>University of Bari “Aldo Moro”</affiliation></author>
+      <author id="giovanni-semeraro"><first>Giovanni</first><last>Semeraro</last><affiliation>University of Bari “Aldo Moro”</affiliation></author>
       <author><first>Pierpaolo</first><last>Basile</last><affiliation>Department of Computer Science, University of Bari Aldo Moro</affiliation></author>
       <pages>1577-1585</pages>
       <abstract>The recent introduction of large-scale datasets for the WiC (Word in Context) task enables the creation of more reliable and meaningful contextualized word embeddings.However, most of the approaches to the WiC task use cross-encoders, which prevent the possibility of deriving comparable word embeddings.In this work, we introduce XL-LEXEME, a Lexical Semantic Change Detection model.XL-LEXEME extends SBERT, highlighting the target word in the sentence. We evaluate XL-LEXEME on the multilingual benchmarks for SemEval-2020 Task 1 - Lexical Semantic Change (LSC) Detection and the RuShiftEval shared task involving five languages: English, German, Swedish, Latin, and Russian.XL-LEXEME outperforms the state-of-the-art in English, German and Swedish with statistically significant differences from the baseline results and obtains state-of-the-art performance in the RuShiftEval shared task.</abstract>
@@ -14933,7 +14933,7 @@
     </paper>
     <paper id="136">
       <title>Theory-Grounded Computational Text Analysis</title>
-      <author><first>Arya D.</first><last>McCarthy</last><affiliation>Johns Hopkins University</affiliation></author>
+      <author id="arya-d-mccarthy"><first>Arya D.</first><last>McCarthy</last><affiliation>Johns Hopkins University</affiliation></author>
       <author><first>Giovanna Maria Dora</first><last>Dore</last><affiliation>JHU</affiliation></author>
       <pages>1586-1594</pages>
       <abstract>In this position paper, we argue that computational text analysis lacks and requires organizing principles. A broad space separates its two constituent disciplines—natural language processing and social science—which has to date been sidestepped rather than filled by applying increasingly complex computational models to problems in social science research. We contrast descriptive and integrative findings, and our review of approximately 60 papers on computational text analysis reveals that those from *ACL venues are typically descriptive. The lack of theory began at the area’s inception and has over the decades, grown more important and challenging. A return to theoretically grounded research questions will propel the area from both theoretical and methodological points of view.</abstract>
@@ -15030,7 +15030,7 @@
       <title>Event Extraction as Question Generation and Answering</title>
       <author><first>Di</first><last>Lu</last><affiliation>Dataminr</affiliation></author>
       <author><first>Shihao</first><last>Ran</last><affiliation>Dataminr</affiliation></author>
-      <author><first>Joel</first><last>Tetreault</last><affiliation>Dataminr</affiliation></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last><affiliation>Dataminr</affiliation></author>
       <author><first>Alejandro</first><last>Jaimes</last><affiliation>Dataminr</affiliation></author>
       <pages>1666-1688</pages>
       <abstract>Recent work on Event Extraction has reframed the task as Question Answering (QA), with promising results. The advantage of this approach is that it addresses the error propagation issue found in traditional token-based classification approaches by directly predicting event arguments without extracting candidates first. However, the questions are typically based on fixed templates and they rarely leverage contextual information such as relevant arguments. In addition, prior QA-based approaches have difficulty handling cases where there are multiple arguments for the same role. In this paper, we propose QGA-EE, which enables a Question Generation (QG) model to generate questions that incorporate rich contextual information instead of using fixed templates. We also propose dynamic templates to assist the training of QG model. Experiments show that QGA-EE outperforms all prior single-task-based models on the ACE05 English dataset.</abstract>
@@ -15041,7 +15041,7 @@
     </paper>
     <paper id="144">
       <title>Are Sample-Efficient <fixed-case>NLP</fixed-case> Models More Robust?</title>
-      <author><first>Nelson F.</first><last>Liu</last><affiliation>Stanford University</affiliation></author>
+      <author id="nelson-f-liu"><first>Nelson F.</first><last>Liu</last><affiliation>Stanford University</affiliation></author>
       <author><first>Ananya</first><last>Kumar</last><affiliation>Stanford University</affiliation></author>
       <author><first>Percy</first><last>Liang</last><affiliation>Stanford University</affiliation></author>
       <author><first>Robin</first><last>Jia</last><affiliation>University of Southern California</affiliation></author>
@@ -15071,7 +15071,7 @@
       <author><first>Phillip</first><last>Keung</last><affiliation>University of Washington</affiliation></author>
       <author><first>Daniel</first><last>Cheng</last><affiliation>University of Washington</affiliation></author>
       <author><first>Jungo</first><last>Kasai</last><affiliation>University of Washington</affiliation></author>
-      <author><first>Noah A.</first><last>Smith</last><affiliation>University of Washington</affiliation></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last><affiliation>University of Washington</affiliation></author>
       <pages>1723-1730</pages>
       <abstract>Large-scale language model pretraining is a very successful form of self-supervised learning in natural language processing, but it is increasingly expensive to perform as the models and pretraining corpora have become larger over time. We propose NarrowBERT, a modified transformer encoder that increases the throughput for masked language model pretraining by more than 2x. NarrowBERT sparsifies the transformer model such that the self-attention queries and feedforward layers only operate on the masked tokens of each sentence during pretraining, rather than all of the tokens as with the usual transformer encoder. We also show that NarrowBERT increases the throughput at inference time by as much as 3.5x with minimal (or no) performance degradation on sentence encoding tasks like MNLI. Finally, we examine the performance of NarrowBERT on the IMDB and Amazon reviews classification and CoNLL NER tasks and show that it is also comparable to standard BERT performance.</abstract>
       <url hash="0a682928">2023.acl-short.146</url>
@@ -15131,14 +15131,14 @@
     <paper id="150">
       <title><fixed-case>STT</fixed-case>4<fixed-case>SG</fixed-case>-350: A Speech Corpus for All <fixed-case>S</fixed-case>wiss <fixed-case>G</fixed-case>erman Dialect Regions</title>
       <author><first>Michel</first><last>Plüss</last><affiliation>University of Applied Sciences and Arts Northwestern Switzerland</affiliation></author>
-      <author><first>Jan</first><last>Deriu</last><affiliation>Zurich University of Applied Sciences</affiliation></author>
+      <author id="jan-milan-deriu"><first>Jan</first><last>Deriu</last><affiliation>Zurich University of Applied Sciences</affiliation></author>
       <author><first>Yanick</first><last>Schraner</last><affiliation>University of Applied Sciences Northwestern Switzerland</affiliation></author>
       <author><first>Claudio</first><last>Paonessa</last><affiliation>University of Applied Sciences and Arts Northwestern Switzerland</affiliation></author>
       <author><first>Julia</first><last>Hartmann</last><affiliation>FHNW</affiliation></author>
       <author><first>Larissa</first><last>Schmidt</last><affiliation>University of Zurich</affiliation></author>
       <author><first>Christian</first><last>Scheller</last><affiliation>University of Applied Sciences Northwestern Switzerland</affiliation></author>
-      <author><first>Manuela</first><last>Hürlimann</last><affiliation>Zurich University of Applied Sciences (ZHAW)</affiliation></author>
-      <author><first>Tanja</first><last>Samardžić</last><affiliation>University of Zurich</affiliation></author>
+      <author id="manuela-huerlimann"><first>Manuela</first><last>Hürlimann</last><affiliation>Zurich University of Applied Sciences (ZHAW)</affiliation></author>
+      <author id="tanja-samardzic"><first>Tanja</first><last>Samardžić</last><affiliation>University of Zurich</affiliation></author>
       <author><first>Manfred</first><last>Vogel</last><affiliation>University of Applied Sciences Northwestern Switzerland</affiliation></author>
       <author><first>Mark</first><last>Cieliebak</last><affiliation>Zurich University of Applied Sciences</affiliation></author>
       <pages>1763-1772</pages>
@@ -15191,7 +15191,7 @@
       <title><fixed-case>S</fixed-case>co<fixed-case>N</fixed-case>e: Benchmarking Negation Reasoning in Language Models With Fine-Tuning and In-Context Learning</title>
       <author><first>Jingyuan S.</first><last>She</last><affiliation>Haverford College</affiliation></author>
       <author><first>Christopher</first><last>Potts</last><affiliation>Stanford University</affiliation></author>
-      <author><first>Samuel R.</first><last>Bowman</last><affiliation>New York University</affiliation></author>
+      <author id="samuel-bowman"><first>Samuel R.</first><last>Bowman</last><affiliation>New York University</affiliation></author>
       <author><first>Atticus</first><last>Geiger</last><affiliation>Stanford University</affiliation></author>
       <pages>1803-1821</pages>
       <abstract>A number of recent benchmarks seek to assess how well models handle natural language negation. However, these benchmarks lack the controlled example paradigms that would allow us to infer whether a model had truly learned how negation morphemes semantically scope. To fill these analytical gaps, we present the Scoped Negation NLI (ScoNe-NLI) benchmark, which contains contrast sets of six examples with up to two negations where either zero, one, or both negative morphemes affect the NLI label. We use ScoNe-NLI to assess fine-tuning and in-context learning strategies. We find that RoBERTa and DeBERTa models solve ScoNe-NLI after many shot fine-tuning. For in-context learning, we test the latest InstructGPT models and find that most prompt strategies are not successful, including those using step-by-step reasoning. To better understand this result, we extend ScoNe with ScoNe-NLG, a sentence completion test set that embeds negation reasoning in short narratives. Here, InstructGPT is successful, which reveals the model can correctly reason about negation, but struggles to do so on NLI examples outside of its core pretraining regime.</abstract>
@@ -15220,8 +15220,8 @@
       <author><first>Jie</first><last>Cao</last><affiliation>University of Colorado</affiliation></author>
       <author><first>E. Margaret</first><last>Perkoff</last><affiliation>University of Colorado Boulder</affiliation></author>
       <author><first>Rosy</first><last>Southwell</last><affiliation>University of Colorado</affiliation></author>
-      <author><first>Martha</first><last>Palmer</last><affiliation>University of Colorado</affiliation></author>
-      <author><first>Katharina</first><last>Kann</last><affiliation>University of Colorado Boulder</affiliation></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last><affiliation>University of Colorado</affiliation></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last><affiliation>University of Colorado Boulder</affiliation></author>
       <pages>1833-1842</pages>
       <abstract>Recent advances in NLP have led to a rise in inter-disciplinary and application-oriented research. While this demonstrates the growing real-world impact of the field, research papers frequently feature experiments that do not account for the complexities of realistic data and environments. To explore the extent of this gap, we investigate the relationship between the real-world motivations described in NLP papers and the models and evaluation which comprise the proposed solution. We first survey papers from the NLP Applications track from ACL 2020 and EMNLP 2020, asking which papers have differences between their stated motivation and their experimental setting, and if so, mention them. We find that many papers fall short of considering real-world input and output conditions due to adopting simplified modeling or evaluation settings. As a case study, we then empirically show that the performance of an educational dialog understanding system deteriorates when used in a realistic classroom environment.</abstract>
       <url hash="2b7e4d7c">2023.acl-short.156</url>
@@ -15233,8 +15233,8 @@
       <title>How to Distill your <fixed-case>BERT</fixed-case>: An Empirical Study on the Impact of Weight Initialisation and Distillation Objectives</title>
       <author id="xinpeng-wang-lmu"><first>Xinpeng</first><last>Wang</last><affiliation>Ludwig-Maximilians-Universitaet Muenchen</affiliation></author>
       <author><first>Leonie</first><last>Weissweiler</last><affiliation>CIS, LMU Munich</affiliation></author>
-      <author><first>Hinrich</first><last>Schütze</last><affiliation>Center for Information and Language Processing, University of Munich</affiliation></author>
-      <author><first>Barbara</first><last>Plank</last><affiliation>LMU Munich</affiliation></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last><affiliation>Center for Information and Language Processing, University of Munich</affiliation></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last><affiliation>LMU Munich</affiliation></author>
       <pages>1843-1852</pages>
       <abstract>Recently, various intermediate layer distillation (ILD) objectives have been shown to improve compression of BERT models via Knowledge Distillation (KD). However, a comprehensive evaluation of the objectives in both task-specific and task-agnostic settings is lacking. To the best of our knowledge, this is the first work comprehensively evaluating distillation objectives in both settings. We show that attention transfer gives the best performance overall. We also study the impact of layer choice when initializing the student from the teacher layers, finding a significant impact on the performance in task-specific distillation. For vanilla KD and hidden states transfer, initialisation with lower layers of the teacher gives a considerable improvement over higher layers, especially on the task of QNLI (up to an absolute percentage change of 17.8 in accuracy). Attention transfer behaves consistently under different initialisation settings. We release our code as an efficient transformer-based model distillation framework for further studies.</abstract>
       <url hash="dd1be2db">2023.acl-short.157</url>
@@ -15366,8 +15366,8 @@
       <author><first>Rotem</first><last>Dror</last><affiliation>University of Pennsylvania</affiliation></author>
       <author><first>Sha</first><last>Li</last><affiliation>University of Illinois Urbana-Champaign</affiliation></author>
       <author><first>Heng</first><last>Ji</last><affiliation>University of Illinois at Urbana-Champaign and Amazon (Amazon Scholar)</affiliation></author>
-      <author><first>Martha</first><last>Palmer</last><affiliation>University of Colorado</affiliation></author>
-      <author><first>Susan Windisch</first><last>Brown</last><affiliation>University of Colorado at Boulder</affiliation></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last><affiliation>University of Colorado</affiliation></author>
+      <author id="susan-windisch-brown"><first>Susan Windisch</first><last>Brown</last><affiliation>University of Colorado at Boulder</affiliation></author>
       <author><first>Reece</first><last>Suchocki</last><affiliation>University of Colorado Boulder</affiliation></author>
       <author><first>Chris</first><last>Callison-Burch</last><affiliation>University of Pennsylvania</affiliation></author>
       <pages>1-10</pages>
@@ -15434,22 +15434,22 @@
     </paper>
     <paper id="5">
       <title><fixed-case>P</fixed-case>rime<fixed-case>QA</fixed-case>: The Prime Repository for State-of-the-Art Multilingual Question Answering Research and Development</title>
-      <author><first>Avi</first><last>Sil</last><affiliation>IBM Research AI</affiliation></author>
+      <author id="avirup-sil"><first>Avi</first><last>Sil</last><affiliation>IBM Research AI</affiliation></author>
       <author><first>Jaydeep</first><last>Sen</last><affiliation>IBM Research AI</affiliation></author>
       <author><first>Bhavani</first><last>Iyer</last><affiliation>IBM</affiliation></author>
       <author><first>Martin</first><last>Franz</last><affiliation>IBM T.J. Watson Research Center</affiliation></author>
       <author><first>Kshitij</first><last>Fadnis</last><affiliation>IBM Research</affiliation></author>
       <author><first>Mihaela</first><last>Bornea</last><affiliation>IBM Research</affiliation></author>
       <author><first>Sara</first><last>Rosenthal</last><affiliation>IBM Research</affiliation></author>
-      <author><first>Scott</first><last>McCarley</last><affiliation>IBM Research AI</affiliation></author>
+      <author id="j-scott-mccarley"><first>Scott</first><last>McCarley</last><affiliation>IBM Research AI</affiliation></author>
       <author><first>Rong</first><last>Zhang</last><affiliation>IBM.com</affiliation></author>
       <author><first>Vishwajeet</first><last>Kumar</last><affiliation>IBM Research AI</affiliation></author>
       <author><first>Yulong</first><last>Li</last><affiliation>IBM research</affiliation></author>
-      <author><first>Md Arafat</first><last>Sultan</last><affiliation>IBM Research AI</affiliation></author>
+      <author id="md-arafat-sultan"><first>Md Arafat</first><last>Sultan</last><affiliation>IBM Research AI</affiliation></author>
       <author><first>Riyaz</first><last>Bhat</last><affiliation>IBM IRL</affiliation></author>
       <author><first>Juergen</first><last>Bross</last><affiliation>IBM Research</affiliation></author>
-      <author><first>Radu</first><last>Florian</last><affiliation>IBM Research</affiliation></author>
-      <author><first>Salim</first><last>Roukos</last><affiliation>IBM Research AI</affiliation></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last><affiliation>IBM Research</affiliation></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last><affiliation>IBM Research AI</affiliation></author>
       <pages>51-62</pages>
       <abstract>The field of Question Answering (QA) has made remarkable progress in recent years, thanks to the advent of large pre-trained language models, newer realistic benchmark datasets with leaderboards, and novel algorithms for key components such as retrievers and readers. In this paper, we introduce PrimeQA: a one-stop and open-source QA repository with an aim to democratize QA research and facilitate easy replication of state-of-the-art (SOTA) QA methods. PrimeQA supports core QA functionalities like retrieval and reading comprehension as well as auxiliary capabilities such as question generation. It has been designed as an end-to-end toolkit for various use cases: building front-end applications, replicating SOTA methods on public benchmarks, and expanding pre-existing methods. PrimeQA is available at: <url>https://github.com/primeqa</url>.</abstract>
       <url hash="71b03e03">2023.acl-demo.5</url>
@@ -15489,7 +15489,7 @@
       <title>A Practical Toolkit for Multilingual Question and Answer Generation</title>
       <author><first>Asahi</first><last>Ushio</last><affiliation>Cardiff University</affiliation></author>
       <author><first>Fernando</first><last>Alva-Manchego</last><affiliation>Cardiff University</affiliation></author>
-      <author><first>Jose</first><last>Camacho-Collados</last><affiliation>Cardiff University</affiliation></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last><affiliation>Cardiff University</affiliation></author>
       <pages>86-94</pages>
       <abstract>Generating questions along with associated answers from a text has applications in several domains, such as creating reading comprehension tests for students, or improving document search by providing auxiliary questions and answers based on the query. Training models for question and answer generation (QAG) is not straightforward due to the expected structured output (i.e. a list of question and answer pairs), as it requires more than generating a single sentence. This results in a small number of publicly accessible QAG models. In this paper, we introduce AutoQG, an online service for multilingual QAG along with lmqg, an all-in-one python package for model fine-tuning, generation, and evaluation. We also release QAG models in eight languages fine-tuned on a few variants of pre-trained encoder-decoder language models, which can be used online via AutoQG or locally via lmqg. With these resources, practitioners of any level can benefit from a toolkit that includes a web interface for end users, and easy-to-use code for developers who require custom models or fine-grained controls for generation.</abstract>
       <url hash="cc883ba1">2023.acl-demo.8</url>
@@ -15563,7 +15563,7 @@
     </paper>
     <paper id="14">
       <title>disco: a toolkit for Distributional Control of Generative Models</title>
-      <author><first>Germán</first><last>Kruszewski</last><affiliation>Naver Labs Europe</affiliation></author>
+      <author id="german-kruszewski"><first>Germán</first><last>Kruszewski</last><affiliation>Naver Labs Europe</affiliation></author>
       <author><first>Jos</first><last>Rozen</last><affiliation>NAVER LABS Europe</affiliation></author>
       <author><first>Marc</first><last>Dymetman</last><affiliation>Independent researcher</affiliation></author>
       <pages>144-160</pages>
@@ -15590,7 +15590,7 @@
       <title><fixed-case>J</fixed-case>apanese-to-<fixed-case>E</fixed-case>nglish Simultaneous Dubbing Prototype</title>
       <author><first>Xiaolin</first><last>Wang</last><affiliation>NICT</affiliation></author>
       <author><first>Masao</first><last>Utiyama</last><affiliation>NICT</affiliation></author>
-      <author><first>Eiichiro</first><last>Sumita</last><affiliation>NICT</affiliation></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last><affiliation>NICT</affiliation></author>
       <pages>169-178</pages>
       <abstract>Live video streaming has become an important form of communication such as virtual conferences. However, for cross-language communication in live video streaming, reading subtitles degrades the viewing experience. To address this problem, our simultaneous dubbing prototype translates and replaces the original speech of a live video stream in a simultaneous manner. Tests on a collection of 90 public videos show that our system achieves a low average latency of 11.90 seconds for smooth playback. Our method is general and can be extended to other language pairs.</abstract>
       <url hash="4affb8ef">2023.acl-demo.16</url>
@@ -15630,7 +15630,7 @@
       <author><first>Sugyeong</first><last>Eo</last><affiliation>Korea University</affiliation></author>
       <author><first>Seounghoon</first><last>Lee</last><affiliation>Institute for Infocomm Research, A*STAR</affiliation></author>
       <author><first>Bernardo</first><last>Yahya</last><affiliation>Hankuk University of Foreign Studies</affiliation></author>
-      <author><first>Heuiseok</first><last>Lim</last><affiliation>Korea University</affiliation></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last><affiliation>Korea University</affiliation></author>
       <pages>190-207</pages>
       <abstract>English is acknowledged worldwide as a mode of communication. However, due to the absence of realistic practicing scenarios, students learning English as a foreign language (EFL) typically have limited chances to converse and share feedback with others. In this paper, we propose PEEP-Talk, a real-world situational dialogue-based chatbot designed for English education. It also naturally switches to a new topic or situation in response to out-of-topic utterances, which are common among English beginners. Furthermore, PEEP-Talk provides feedback score on conversation and grammar error correction. We performed automatic and user evaluations to validate performance and education efficiency of our system. The results show that PEEP-Talk generates appropriate responses in various real-life situations while providing accurate feedback to learners. Moreover, we demonstrate a positive impact on English-speaking, grammar, and English learning anxiety, implying that PEEP-Talk can lower the barrier to learning natural conversation in effective ways.</abstract>
       <url hash="cb4689e3">2023.acl-demo.18</url>
@@ -15727,7 +15727,7 @@
       <author><first>Raj</first><last>Dabre</last><affiliation>NICT</affiliation></author>
       <author><first>Diptesh</first><last>Kanojia</last><affiliation>University of Surrey</affiliation></author>
       <author><first>Chinmay</first><last>Sawant</last><affiliation>Surrey University</affiliation></author>
-      <author><first>Eiichiro</first><last>Sumita</last><affiliation>NICT</affiliation></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last><affiliation>NICT</affiliation></author>
       <pages>257-263</pages>
       <abstract>In this paper, we present our open-source neural machine translation (NMT) toolkit called “Yet Another Neural Machine Translation Toolkit” abbreviated as YANMTT - <url>https://github.com/prajdabre/yanmtt</url>, which is built on top of the HuggingFace Transformers library. YANMTT focuses on transfer learning and enables easy pre-training and fine-tuning of sequence-to-sequence models at scale. It can be used for training parameter-heavy models with minimal parameter sharing and efficient, lightweight models via heavy parameter sharing. Additionally, it supports parameter-efficient fine-tuning (PEFT) through adapters and prompts. Our toolkit also comes with a user interface that can be used to demonstrate these models and visualize various parts of the model. Apart from these core features, our toolkit also provides other advanced functionalities such as but not limited to document/multi-source NMT, simultaneous NMT, mixtures-of-experts, model compression and continual learning.</abstract>
       <url hash="088584f0">2023.acl-demo.24</url>
@@ -15816,8 +15816,8 @@
     </paper>
     <paper id="30">
       <title>The <fixed-case>OPUS</fixed-case>-<fixed-case>MT</fixed-case> Dashboard – A Toolkit for a Systematic Evaluation of Open Machine Translation Models</title>
-      <author><first>Jörg</first><last>Tiedemann</last><affiliation>University of Helsinki</affiliation></author>
-      <author><first>Ona</first><last>de Gibert</last><affiliation>University of Helsinki</affiliation></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last><affiliation>University of Helsinki</affiliation></author>
+      <author id="ona-de-gibert"><first>Ona</first><last>de Gibert</last><affiliation>University of Helsinki</affiliation></author>
       <pages>315-327</pages>
       <abstract>The OPUS-MT dashboard is a web-based platform that provides a comprehensive overview of open translation models. We focus on a systematic collection of benchmark results with verifiable translation performance and large coverage in terms of languages and domains. We provide results for in-house OPUS-MT and Tatoeba models as well as external models from the Huggingface repository and user-contributed translations. The functionalities of the evaluation tool include summaries of benchmarks for over 2,300 models covering 4,560 language directions and 294 languages, as well as the inspection of predicted translations against their human reference. We focus on centralization, reproducibility and coverage of MT evaluation combined with scalability. The dashboard can be accessed live at <url>https://opus.nlpl.eu/dashboard/</url>.</abstract>
       <url hash="1ce1bc89">2023.acl-demo.30</url>
@@ -15832,7 +15832,7 @@
       <author><first>Fynn</first><last>Petersen-Frey</last><affiliation>Universität Hamburg</affiliation></author>
       <author><first>Isabel</first><last>Eiser</last><affiliation>Universität Hamburg</affiliation></author>
       <author><first>Gertraud</first><last>Koch</last><affiliation>Universität Hamburg</affiliation></author>
-      <author><first>Chris</first><last>Biemann</last><affiliation>Universität Hamburg</affiliation></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last><affiliation>Universität Hamburg</affiliation></author>
       <pages>328-335</pages>
       <abstract>This work introduces the D-WISE Tool Suite (DWTS), a novel working environment for digital qualitative discourse analysis in the Digital Humanities (DH). The DWTS addresses limitations of current DH tools induced by the ever-increasing amount of heterogeneous, unstructured, and multi-modal data in which the discourses of contemporary societies are encoded. To provide meaningful insights from such data, our system leverages and combines state-of-the-art machine learning technologies from Natural Language Processing and Com-puter Vision. Further, the DWTS is conceived and developed by an interdisciplinary team ofcultural anthropologists and computer scientists to ensure the tool’s usability for modernDH research. Central features of the DWTS are: a) import of multi-modal data like text, image, audio, and video b) preprocessing pipelines for automatic annotations c) lexical and semantic search of documents d) manual span, bounding box, time-span, and frame annotations e) documentation of the research process.</abstract>
       <url hash="15af865b">2023.acl-demo.31</url>
@@ -15848,7 +15848,7 @@
       <author><first>Linyong</first><last>Nan</last><affiliation>Yale University</affiliation></author>
       <author><first>Minghao</first><last>Guo</last><affiliation>Zhejiang University</affiliation></author>
       <author><first>Arman</first><last>Cohan</last><affiliation>Allen Institute for AI</affiliation></author>
-      <author><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
       <pages>336-347</pages>
       <abstract>There are a growing number of table pre-training methods proposed for reasoning over tabular data (e.g., question answering, fact checking, and faithful text generation). However, most existing methods are benchmarked solely on a limited number of datasets, varying in configuration, which leads to a lack of unified, standardized, fair, and comprehensive comparison between methods. This paper presents OpenRT, the first open-source framework for reasoning over tabular data, to reproduce existing table pre-training models for performance comparison and develop new models quickly. We implemented and compared six table pre-training models on four question answering, one fact checking, and one faithful text generation datasets. Moreover, to enable the community to easily construct new table reasoning datasets, we developed TaRAT, an annotation tool which supports multi-person collaborative annotations for various kinds of table reasoning tasks. The researchers are able to deploy the newly-constructed dataset to OpenRT and compare the performances of different baseline systems.</abstract>
       <url hash="16f39c3c">2023.acl-demo.32</url>
@@ -15887,7 +15887,7 @@
     </paper>
     <paper id="35">
       <title><fixed-case>B</fixed-case>i<fixed-case>S</fixed-case>ync: A Bilingual Editor for Synchronized Monolingual Texts</title>
-      <author><first>Josep</first><last>Crego</last><affiliation>SYSTRAN</affiliation></author>
+      <author id="josep-m-crego"><first>Josep</first><last>Crego</last><affiliation>SYSTRAN</affiliation></author>
       <author><first>Jitao</first><last>Xu</last><affiliation>NetEase YouDao</affiliation></author>
       <author><first>François</first><last>Yvon</last><affiliation>ISIR CNRS &amp; Sorbonne Université</affiliation></author>
       <pages>369-376</pages>
@@ -16476,7 +16476,7 @@
       <title><fixed-case>M</fixed-case>ed<fixed-case>T</fixed-case>em2.0: Prompt-based Temporal Classification of Treatment Events from Discharge Summaries</title>
       <author><first>Yang</first><last>Cui</last><affiliation>University of Manchester</affiliation></author>
       <author><first>Lifeng</first><last>Han</last><affiliation>The University of Manchester</affiliation></author>
-      <author><first>Goran</first><last>Nenadic</last><affiliation>University of Manchester</affiliation></author>
+      <author id="goran-nenadic"><first>Goran</first><last>Nenadic</last><affiliation>University of Manchester</affiliation></author>
       <pages>160-183</pages>
       <abstract>Discharge summaries are comprehensive medical records that encompass vital information about a patient’s hospital stay. A crucial aspect of discharge summaries is the temporal information of treatments administered throughout the patient’s illness. With an extensive volume of clinical documents, manually extracting and compiling a patient’s medication list can be laborious, time-consuming, and susceptible to errors. The objective of this paper is to build upon the recent development on clinical NLP by temporally classifying treatments in clinical texts, specifically determining whether a treatment was administered between the time of admission and discharge from the hospital. State-of-the-art NLP methods including prompt-based learning on Generative Pre-trained Transformers (GPTs) models and fine-tuning on pre-trained language models (PLMs) such as BERT were employed to classify temporal relations between treatments and hospitalisation periods in discharge summaries. Fine-tuning with the BERT model achieved an F1 score of 92.45% and a balanced accuracy of 77.56%, while prompt learning using the T5 model and mixed templates resulted in an F1 score of 90.89% and a balanced accuracy of 72.07%.Our codes and data are available at <url>https://github.com/HECTA-UoM/MedTem</url>.</abstract>
       <url hash="5a9b129f">2023.acl-srw.27</url>
@@ -16513,7 +16513,7 @@
       <title>Probing for Hyperbole in Pre-Trained Language Models</title>
       <author><first>Nina</first><last>Schneidermann</last><affiliation>University of Copenhagen</affiliation></author>
       <author><first>Daniel</first><last>Hershcovich</last><affiliation>University of Copenhagen</affiliation></author>
-      <author><first>Bolette</first><last>Pedersen</last><affiliation>University of Copenhagen</affiliation></author>
+      <author id="bolette-sandford-pedersen"><first>Bolette</first><last>Pedersen</last><affiliation>University of Copenhagen</affiliation></author>
       <pages>200-211</pages>
       <abstract>Hyperbole is a common figure of speech, which is under-explored in NLP research. In this study, we conduct edge and minimal description length (MDL) probing experiments on three pre-trained language models (PLMs) in an attempt to explore the extent to which hyperbolic information is encoded in these models. We use both word-in-context and sentence-level representations as model inputs as a basis for comparison. We also annotate 63 hyperbole sentences from the HYPO dataset according to an operational taxonomy to conduct an error analysis to explore the encoding of different hyperbole categories. Our results show that hyperbole is to a limited extent encoded in PLMs, and mostly in the final layers. They also indicate that hyperbolic information may be better encoded by the sentence-level representations, which, due to the pragmatic nature of hyperbole, may therefore provide a more accurate and informative representation in PLMs. Finally, the inter-annotator agreement for our annotations, a Cohen’s Kappa of 0.339, suggest that the taxonomy categories may not be intuitive and need revision or simplification.</abstract>
       <url hash="0d17e5fb">2023.acl-srw.30</url>
@@ -16628,10 +16628,10 @@
       <title>Authorship Attribution of Late 19th Century Novels using <fixed-case>GAN</fixed-case>-<fixed-case>BERT</fixed-case></title>
       <author><first>Kanishka</first><last>Silva</last><affiliation>University of Wolverhampton</affiliation></author>
       <author><first>Burcu</first><last>Can</last><affiliation>University of Stirling</affiliation></author>
-      <author><first>Frédéric</first><last>Blain</last><affiliation>Tilburg University</affiliation></author>
+      <author id="frederic-blain"><first>Frédéric</first><last>Blain</last><affiliation>Tilburg University</affiliation></author>
       <author><first>Raheem</first><last>Sarwar</last><affiliation>OTEHM, Manchester Metropolitan University</affiliation></author>
       <author><first>Laura</first><last>Ugolini</last><affiliation>University of Wolverhampton</affiliation></author>
-      <author><first>Ruslan</first><last>Mitkov</last><affiliation>University of Wolverhampton</affiliation></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last><affiliation>University of Wolverhampton</affiliation></author>
       <pages>310-320</pages>
       <abstract>Authorship attribution aims to identify the author of an anonymous text. The task becomes even more worthwhile when it comes to literary works. For example, pen names were commonly used by female authors in the 19th century resulting in some literary works being incorrectly attributed or claimed. With this motivation, we collated a dataset of late 19th century novels in English. Due to the imbalance in the dataset and the unavailability of enough data per author, we employed the GANBERT model along with data sampling strategies to fine-tune a transformer-based model for authorship attribution. Differently from the earlier studies on the GAN-BERT model, we conducted transfer learning on comparatively smaller author subsets to train more focused author-specific models yielding performance over 0.88 accuracy and F1 scores. Furthermore, we observed that increasing the sample size has a negative impact on the model’s performance. Our research mainly contributes to the ongoing authorship attribution research using GAN-BERT architecture, especially in attributing disputed novelists in the late 19th century.</abstract>
       <url hash="b34b173e">2023.acl-srw.44</url>
@@ -16641,7 +16641,7 @@
     <paper id="46">
       <title>How-to Guides for Specific Audiences: A Corpus and Initial Findings</title>
       <author><first>Nicola</first><last>Fanton</last><affiliation>Universität Stuttgart</affiliation></author>
-      <author><first>Agnieszka</first><last>Falenska</last><affiliation>IMS, University of Stuttgart</affiliation></author>
+      <author id="agnieszka-falenska"><first>Agnieszka</first><last>Falenska</last><affiliation>IMS, University of Stuttgart</affiliation></author>
       <author><first>Michael</first><last>Roth</last><affiliation>University of Stuttgart</affiliation></author>
       <pages>321-333</pages>
       <abstract>Instructional texts for specific target groups should ideally take into account the prior knowledge and needs of the readers in order to guide them efficiently to their desired goals. However, targeting specific groups also carries the risk of reflecting disparate social norms and subtle stereotypes. In this paper, we investigate the extent to which how-to guides from one particular platform, wikiHow, differ in practice depending on the intended audience. We conduct two case studies in which we examine qualitative features of texts written for specific audiences. In a generalization study, we investigate which differences can also be systematically demonstrated using computational methods. The results of our studies show that guides from wikiHow, like other text genres, are subject to subtle biases. We aim to raise awareness of these inequalities as a first step to addressing them in future work.</abstract>
@@ -16722,7 +16722,7 @@
     <paper id="2">
       <title>“Knowledge is Power”: Constructing Knowledge Graph of Abdominal Organs and Using Them for Automatic Radiology Report Generation</title>
       <author><first>Kaveri</first><last>Kale</last><affiliation>Indian Institute of Technology Bombay</affiliation></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology Bombay and Patna</affiliation></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology Bombay and Patna</affiliation></author>
       <author><first>Aditya</first><last>Shetty</last><affiliation>Consultant Radiologist, Breach Candy Hospital, Mumbai</affiliation></author>
       <author><first>Milind</first><last>Gune</last><affiliation>Consultant Radiologist, Mumbai, Thane</affiliation></author>
       <author><first>Kush</first><last>Shrivastava</last><affiliation>Augnito India Pvt Ltd</affiliation></author>
@@ -16763,7 +16763,7 @@
     <paper id="5">
       <title>Constrained Policy Optimization for Controlled Self-Learning in Conversational <fixed-case>AI</fixed-case> Systems</title>
       <author><first>Mohammad</first><last>Kachuee</last><affiliation>Amazon Alexa AI</affiliation></author>
-      <author><first>Sungjin</first><last>Lee</last><affiliation>Amazon Alexa AI</affiliation></author>
+      <author id="sungjin-lee"><first>Sungjin</first><last>Lee</last><affiliation>Amazon Alexa AI</affiliation></author>
       <pages>43-52</pages>
       <abstract>Recently, self-learning methods based on user satisfaction metrics and contextual bandits have shown promising results to enable consistent improvements in conversational AI systems. However, directly targeting such metrics by off-policy bandit learning objectives often increases the risk of making abrupt policy changes that break the current user experience. In this study, we introduce a scalable framework for supporting fine-grained exploration targets for individual domains via user-defined constraints. For example, we may want to ensure fewer policy deviations in business-critical domains such as shopping, while allocating more exploration budget to domains such as music. We present a novel meta-gradient learning approach that is scalable and practical to address this problem. The proposed method adjusts constraint violation penalty terms adaptively through a meta objective that encourages balanced constraint satisfaction across domains. We conducted extensive experiments on a real-world conversational AI and using a set of realistic constraint benchmarks. The proposed approach has been deployed in production for a large-scale commercial assistant, enabling the best balance between the policy value and constraint satisfaction rate.</abstract>
       <url hash="a0427af5">2023.acl-industry.5</url>
@@ -16808,7 +16808,7 @@
     <paper id="9">
       <title><fixed-case>KG</fixed-case>-<fixed-case>FLIP</fixed-case>: Knowledge-guided Fashion-domain Language-Image Pre-training for <fixed-case>E</fixed-case>-commerce</title>
       <author><first>Qinjin</first><last>Jia</last><affiliation>North Carolina State University</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>Amazon</affiliation></author>
+      <author><first>Yang</first><last>Liu</last><affiliation>Amazon</affiliation></author>
       <author><first>Daoping</first><last>Wu</last><affiliation>Iowa State University</affiliation></author>
       <author><first>Shaoyuan</first><last>Xu</last><affiliation>Amazon</affiliation></author>
       <author><first>Huidong</first><last>Liu</last><affiliation>Amazon</affiliation></author>
@@ -16935,7 +16935,7 @@
       <author><first>Chenwei</first><last>Zhang</last><affiliation>Amazon.com</affiliation></author>
       <author><first>Binxuan</first><last>Huang</last><affiliation>Amazon.com</affiliation></author>
       <author><first>Yifan Ethan</first><last>Xu</last><affiliation>Meta</affiliation></author>
-      <author><first>Xin Luna</first><last>Dong</last><affiliation>Meta</affiliation></author>
+      <author id="xin-luna-dong"><first>Xin Luna</first><last>Dong</last><affiliation>Meta</affiliation></author>
       <author><first>Yizhou</first><last>Sun</last><affiliation>UCLA</affiliation></author>
       <pages>172-185</pages>
       <url hash="1a7afb42">2023.acl-industry.18</url>
@@ -16962,7 +16962,7 @@
       <title>Tab-<fixed-case>CQA</fixed-case>: A Tabular Conversational Question Answering Dataset on Financial Reports</title>
       <author><first>Chuang</first><last>Liu</last><affiliation>Tianjin University</affiliation></author>
       <author><first>Junzhuo</first><last>Li</last><affiliation>Tianjin University</affiliation></author>
-      <author><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
       <pages>196-207</pages>
       <abstract>Existing conversational question answering (CQA) datasets have been usually constructed from unstructured texts in English. In this paper, we propose Tab-CQA, a tabular CQA dataset created from Chinese financial reports that are extracted from listed companies in a wide range of different sectors in the past 30 years. From these reports, we select 2,463 tables, and manually generate 2,463 conversations with 35,494 QA pairs. Additionally, we select 4,578 tables, from which 4,578 conversations with 73,595 QA pairs are automatically created via a template-based method. With the manually- and automatically-generated conversations, Tab-CQA contains answerable and unanswerable questions. For the answerable questions, we further diversify them to cover a wide range of skills, e.g., table retrieval, fact checking, numerical reasoning, so as to accommodate real-world scenarios. We further propose two different tabular CQA models, a text-based model and an operation-based model, and evaluate them on Tab-CQA. Experiment results show that Tab-CQA is a very challenging dataset, where a huge performance gap exists between human and neural models. We will publicly release Tab-CQA as a benchmark testbed to promote further research on Chinese tabular CQA.</abstract>
       <url hash="2f9d69f6">2023.acl-industry.20</url>
@@ -17022,7 +17022,7 @@
       <author><first>Leanne</first><last>Rolston</last><affiliation>University of Washington</affiliation></author>
       <author><first>Jadin</first><last>Tredup</last><affiliation>LivePerson</affiliation></author>
       <author><first>Ilana</first><last>Zimmerman</last><affiliation>Liveperson</affiliation></author>
-      <author><first>Ethan</first><last>Selfridge</last><affiliation>LivePerson</affiliation></author>
+      <author id="ethan-selfridge"><first>Ethan</first><last>Selfridge</last><affiliation>LivePerson</affiliation></author>
       <author><first>Joseph</first><last>Bradley</last><affiliation>LivePerson</affiliation></author>
       <pages>248-267</pages>
       <abstract>Contacting customer service via chat is a common practice. Because employing customer service agents is expensive, many companies are turning to NLP that assists human agents by auto-generating responses that can be used directly or with modifications. With their ability to handle large context windows, Large Language Models (LLMs) are a natural fit for this use case. However, their efficacy must be balanced with the cost of training and serving them. This paper assesses the practical cost and impact of LLMs for the enterprise as a function of the usefulness of the responses that they generate. We present a cost framework for evaluating an NLP model’s utility for this use case and apply it to a single brand as a case study in the context of an existing agent assistance product. We compare three strategies for specializing an LLM — prompt engineering, fine-tuning, and knowledge distillation — using feedback from the brand’s customer service agents. We find that the usability of a model’s responses can make up for a large difference in inference cost for our case study brand, and we extrapolate our findings to the broader enterprise space.</abstract>
@@ -17091,7 +17091,7 @@
       <author><first>Anant</first><last>Khandelwal</last><affiliation>Amazon</affiliation></author>
       <author><first>Happy</first><last>Mittal</last><affiliation>Amazon</affiliation></author>
       <author><first>Shreyas</first><last>Kulkarni</last><affiliation>Amazon</affiliation></author>
-      <author><first>Deepak</first><last>Gupta</last><affiliation>Amazon</affiliation></author>
+      <author id="deepak-gupta"><first>Deepak</first><last>Gupta</last><affiliation>Amazon</affiliation></author>
       <pages>305-312</pages>
       <url hash="688e90a5">2023.acl-industry.29</url>
       <bibkey>khandelwal-etal-2023-large</bibkey>
@@ -17244,7 +17244,7 @@
       <title>Semantic Ambiguity Detection in Sentence Classification using Task-Specific Embeddings</title>
       <author><first>Jong Myoung</first><last>Kim</last><affiliation>SK Telecom</affiliation></author>
       <author><first>Young-jun</first><last>Lee</last><affiliation>KAIST</affiliation></author>
-      <author><first>Sangkeun</first><last>Jung</last><affiliation>Chungnam National University</affiliation></author>
+      <author id="sangkeun-jung"><first>Sangkeun</first><last>Jung</last><affiliation>Chungnam National University</affiliation></author>
       <author><first>Ho-jin</first><last>Choi</last><affiliation>KAIST</affiliation></author>
       <pages>425-437</pages>
       <abstract>Ambiguity is a major obstacle to providing services based on sentence classification. However, because of the structural limitations of the service, there may not be sufficient contextual information to resolve the ambiguity. In this situation, we focus on ambiguity detection so that service design considering ambiguity is possible. We utilize similarity in a semantic space to detect ambiguity in service scenarios and training data. In addition, we apply task-specific embedding to improve performance. Our results demonstrate that ambiguities and resulting labeling errors in training data or scenarios can be detected. Additionally, we confirm that it can be used to debug services</abstract>
@@ -17322,7 +17322,7 @@
       <author><first>Xiujie</first><last>Song</last><affiliation>Shanghai Jiao Tong University</affiliation></author>
       <author><first>Xuezhi</first><last>Cao</last><affiliation>Meituan</affiliation></author>
       <author><first>Yunsen</first><last>Xian</last><affiliation>Meituan</affiliation></author>
-      <author><first>Kenny</first><last>Zhu</last><affiliation>University of Texas at Arlington</affiliation></author>
+      <author id="kenny-zhu"><first>Kenny</first><last>Zhu</last><affiliation>University of Texas at Arlington</affiliation></author>
       <pages>476-486</pages>
       <abstract>As e-commerce platforms develop different business lines, a special but challenging product categorization scenario emerges, where there are multiple domain-specific category taxonomies and each of them evolves dynamically over time. In order to unify the categorization process and ensure efficiency, we propose a two-stage taxonomy-agnostic framework that relies solely on calculating the semantic relatedness between product titles and category names in the vector space. To further enhance domain transferability and better exploit cross-domain data, we design two plug-in modules: a heuristic mapping scorer and a pretrained contrastive ranking module with the help of meta concepts, which represent keyword knowledge shared across domains. Comprehensive offline experiments show that our method outperforms strong baselineson three dynamic multi-domain product categorization (DMPC) tasks,and online experiments reconfirm its efficacy with a5% increase on seasonal purchase revenue. Related datasets will be released.</abstract>
       <url hash="c653f98e">2023.acl-industry.46</url>
@@ -17333,8 +17333,8 @@
     <paper id="47">
       <title><fixed-case>DISCOSQA</fixed-case>: A Knowledge Base Question Answering System for Space Debris based on Program Induction</title>
       <author><first>Paul</first><last>Darm</last><affiliation>University of Strathclyde</affiliation></author>
-      <author><first>Antonio Valerio</first><last>Miceli Barone</last><affiliation>The University of Edinburgh</affiliation></author>
-      <author><first>Shay B.</first><last>Cohen</last><affiliation>University of Edinburgh</affiliation></author>
+      <author id="antonio-valerio-miceli-barone"><first>Antonio Valerio</first><last>Miceli Barone</last><affiliation>The University of Edinburgh</affiliation></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last><affiliation>University of Edinburgh</affiliation></author>
       <author><first>Annalisa</first><last>Riccardi</last><affiliation>University of Strathclyde</affiliation></author>
       <pages>487-499</pages>
       <abstract>Space program agencies execute complex satellite operations that need to be supported by the technical knowledge contained in their extensive information systems. Knowledge Base (KB) databases are an effective way of storing and accessing such information to scale. In this work we present a system, developed for the European Space Agency, that can answer complex natural language queries, to support engineers in accessing the information contained in a KB that models the orbital space debris environment. Our system is based on a pipeline which first generates a program sketch from a natural language question, then specializes the sketch into a concrete query program with mentions of entities, attributes and relations, and finally executes the program against the database. This pipeline decomposition approach enables us to train the system by leveraging out-of-domain data and semi-synthetic data generated by GPT-3, thus reducing overfitting and shortcut learning even with limited amount of in-domain training data.</abstract>
@@ -17483,7 +17483,7 @@
       <author><first>Yingxue</first><last>Zhou</last><affiliation>Amazon</affiliation></author>
       <author><first>Jie</first><last>Hao</last><affiliation>Amazon</affiliation></author>
       <author><first>Mukund</first><last>Rungta</last><affiliation>Georgia Institute of Technology</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>Amazon</affiliation></author>
+      <author><first>Yang</first><last>Liu</last><affiliation>Amazon</affiliation></author>
       <author><first>Eunah</first><last>Cho</last><affiliation>Amazon, Alexa AI</affiliation></author>
       <author><first>Xing</first><last>Fan</last><affiliation>Amazon Corporation</affiliation></author>
       <author><first>Yanbin</first><last>Lu</last><affiliation>Amazon</affiliation></author>
@@ -17503,7 +17503,7 @@
       <author><first>Haoming</first><last>Jiang</last><affiliation>Amazon Search</affiliation></author>
       <author><first>Shaohui</first><last>Xi</last><affiliation>University of Science and Technology of China</affiliation></author>
       <author><first>Bing</first><last>Yin</last><affiliation>Amazon.com</affiliation></author>
-      <author><first>Chao</first><last>Zhang</last><affiliation>Georgia Tech</affiliation></author>
+      <author id="chao-zhang-tu"><first>Chao</first><last>Zhang</last><affiliation>Georgia Tech</affiliation></author>
       <author><first>Tuo</first><last>Zhao</last><affiliation>Georgia Tech</affiliation></author>
       <pages>616-628</pages>
       <abstract>E-commerce queries are often short and ambiguous. Consequently, query understanding often uses query rewriting to disambiguate user-input queries. While using e-commerce search tools, users tend to enter multiple searches, which we call context, before purchasing. These history searches contain contextual insights about users’ true shopping intents. Therefore, modeling such contextual information is critical to a better query rewriting model. However, existing query rewriting models ignore users’ history behaviors and consider only the instant search query, which is often a short string offering limited information about the true shopping intent. We propose an end-to-end context-aware query rewriting model to bridge this gap, which takes the search context into account. Specifically, our model builds a session graph using the history search queries and their contained words. We then employ a graph attention mechanism that models cross-query relations and computes contextual information of the session. The model subsequently calculates session representations by combining the contextual information with the instant search query using an aggregation network. The session representations are then decoded to generate rewritten queries. Empirically, we demonstrate the superiority of our method to state-of-the-art approaches under various metrics.</abstract>
@@ -17603,7 +17603,7 @@
     <paper id="67">
       <title>“Let’s not Quote out of Context”: Unified Vision-Language Pretraining for Context Assisted Image Captioning</title>
       <author><first>Abisek</first><last>Rajakumar Kalarani</last><affiliation>Indian Institute of Technology Bombay</affiliation></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology Bombay and Patna</affiliation></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology Bombay and Patna</affiliation></author>
       <author><first>Niyati</first><last>Chhaya</last><affiliation>Adobe Research</affiliation></author>
       <author><first>Sumit</first><last>Shekhar</last><affiliation>Adobe Systems</affiliation></author>
       <pages>695-706</pages>
@@ -17647,7 +17647,7 @@
       <author id="zhiyu-chen-lehigh"><first>Zhiyu</first><last>Chen</last><affiliation>Amazon</affiliation></author>
       <author><first>Besnik</first><last>Fetahu</last><affiliation>Amazon</affiliation></author>
       <author><first>Oleg</first><last>Rokhlenko</last><affiliation>Amazon Research</affiliation></author>
-      <author><first>Shervin</first><last>Malmasi</last><affiliation>Amazon</affiliation></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last><affiliation>Amazon</affiliation></author>
       <pages>729-743</pages>
       <abstract>Spoken Question Answering (QA) is a key feature of voice assistants, usually backed by multiple QA systems. Users ask questions via spontaneous speech that can contain disfluencies, errors, and informal syntax or phrasing. This is a major challenge in QA, causing unanswered questions or irrelevant answers, leading to bad user experiences. We analyze failed QA requests to identify core challenges: lexical gaps, proposition types, complex syntactic structure, and high specificity. We propose a Semantic Question Reformulation (SURF) model offering three linguistically-grounded operations (repair, syntactic reshaping, generalization) to rewrite questions to facilitate answering. Offline evaluation on 1M unanswered questions from a leading voice assistant shows that SURF significantly improves answer rates: up to 24% of previously unanswered questions obtain relevant answers (75%). Live deployment shows positive impact for millions of customers with unanswered questions; explicit relevance feedback shows high user satisfaction.</abstract>
       <url hash="84d6c3af">2023.acl-industry.70</url>
@@ -17688,7 +17688,7 @@
       <author><first>Jason</first><last>Choi</last><affiliation>Amazon</affiliation></author>
       <author><first>Besnik</first><last>Fetahu</last><affiliation>Amazon</affiliation></author>
       <author><first>Oleg</first><last>Rokhlenko</last><affiliation>Amazon Research</affiliation></author>
-      <author><first>Shervin</first><last>Malmasi</last><affiliation>Amazon</affiliation></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last><affiliation>Amazon</affiliation></author>
       <pages>763-771</pages>
       <abstract>Frequently Asked Question (FAQ) retrieval aims at retrieving question-answer pairs for a given a user query. Integrating FAQ retrieval with product search can not only empower users to make more informed purchase decisions, but also enhance user retention through efficient post-purchase support. Providing FAQ content without disrupting user’s shopping experience poses challenges on deciding when and how to show FAQ results. Our proposed intent-aware FAQ retrieval consists of (1) an intent classifier that predicts whether the query is looking for an FAQ; (2) a reformulation model that rewrites query into a natural question. Offline evaluation demonstrates that our approach improves 12% in Hit@1 on retrieving ground-truth FAQs, while reducing latency by 95% compared to baseline systems. These improvements are further validated by real user feedback, where more than 99% of users consider FAQs displayed on top of product search results is helpful. Overall, our findings show promising directions for integrating FAQ retrieval into product search at scale.</abstract>
       <url hash="982a518c">2023.acl-industry.73</url>
@@ -17759,7 +17759,7 @@
       <author><first>Yang</first><last>Deng</last></author>
       <author><first>Wenqiang</first><last>Lei</last></author>
       <author><first>Minlie</first><last>Huang</last></author>
-      <author><first>Tat-Seng</first><last>Chua</last></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last></author>
       <pages>1-10</pages>
       <abstract>Conversational systems are envisioned to provide social support or functional service to human users via natural language interactions. Conventional conversation researches mainly focus on the responseability of the system, such as dialogue context understanding and response generation, but overlooks the design of an essential property in intelligent conversations, i.e., goal awareness. The awareness of goals means the state of not only being responsive to the users but also aware of the target conversational goal and capable of leading the conversation towards the goal, which is a significant step towards higher-level intelligence and artificial consciousness. It can not only largely improve user engagement and service efficiency in the conversation, but also empower the system to handle more complicated conversation tasks that involve strategical and motivational interactions. In this tutorial, we will introduce the recent advances on the design of agent’s awareness of goals in a wide range of conversational systems.</abstract>
       <url hash="da3ac192">2023.acl-tutorials.1</url>
@@ -17770,7 +17770,7 @@
       <title>Complex Reasoning in Natural Language</title>
       <author><first>Wenting</first><last>Zhao</last></author>
       <author><first>Mor</first><last>Geva</last></author>
-      <author><first>Bill Yuchen</first><last>Lin</last></author>
+      <author id="bill-yuchen-lin"><first>Bill Yuchen</first><last>Lin</last></author>
       <author><first>Michihiro</first><last>Yasunaga</last></author>
       <author><first>Aman</first><last>Madaan</last></author>
       <author><first>Tao</first><last>Yu</last></author>
diff --git a/data/xml/2023.alp.xml b/data/xml/2023.alp.xml
index 67c25c8513..ec6368ac93 100644
--- a/data/xml/2023.alp.xml
+++ b/data/xml/2023.alp.xml
@@ -24,7 +24,7 @@
       <title>Training and Evaluation of Named Entity Recognition Models for Classical <fixed-case>L</fixed-case>atin</title>
       <author><first>Marijke</first><last>Beersmans</last></author>
       <author><first>Evelien</first><last>de Graaf</last></author>
-      <author><first>Tim</first><last>Van de Cruys</last></author>
+      <author id="tim-van-de-cruys"><first>Tim</first><last>Van de Cruys</last></author>
       <author><first>Margherita</first><last>Fantoli</last></author>
       <pages>1–12</pages>
       <abstract>We evaluate the performance of various models on the task of named entity recognition (NER) for classical Latin. Using an existing dataset, we train two transformer-based LatinBERT models and one shallow conditional random field (CRF) model. The performance is assessed using both standard metrics and a detailed manual error analysis, and compared to the results obtained by different already released Latin NER tools. Both analyses demonstrate that the BERT models achieve a better f1-score than the other models. Furthermore, we annotate new, unseen data for further evaluation of the models, and we discuss the impact of annotation choices on the results.</abstract>
@@ -88,7 +88,7 @@
     <paper id="7">
       <title><fixed-case>L</fixed-case>atin Morphology through the Centuries: Ensuring Consistency for Better Language Processing</title>
       <author><first>Federica</first><last>Gamba</last></author>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <pages>59–67</pages>
       <abstract>This paper focuses on the process of harmonising the five Latin treebanks available in Universal Dependencies with respect to morphological annotation. We propose a workflow that allows to first spot inconsistencies and missing information, in order to detect to what extent the annotations differ, and then correct the retrieved bugs, with the goal of equalising the annotation of morphological features in the treebanks and producing more consistent linguistic data. Subsequently, we present some experiments carried out with UDPipe and Stanza in order to assess the impact of such harmonisation on parsing accuracy.</abstract>
       <url hash="e728ac73">2023.alp-1.7</url>
@@ -98,7 +98,7 @@
       <title>Cross-Lingual Constituency Parsing for <fixed-case>M</fixed-case>iddle <fixed-case>H</fixed-case>igh <fixed-case>G</fixed-case>erman: A Delexicalized Approach</title>
       <author><first>Ercong</first><last>Nie</last></author>
       <author><first>Helmut</first><last>Schmid</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>68–79</pages>
       <abstract>Constituency parsing plays a fundamental role in advancing natural language processing (NLP) tasks. However, training an automatic syntactic analysis system for ancient languages solely relying on annotated parse data is a formidable task due to the inherent challenges in building treebanks for such languages. It demands extensive linguistic expertise, leading to a scarcity of available resources. To overcome this hurdle, cross-lingual transfer techniques which require minimal or even no annotated data for low-resource target languages offer a promising solution. In this study, we focus on building a constituency parser for Middle High German (MHG) under realistic conditions, where no annotated MHG treebank is available for training. In our approach, we leverage the linguistic continuity and structural similarity between MHG and Modern German (MG), along with the abundance of MG treebank resources. Specifically, by employing the delexicalization method, we train a constituency parser on MG parse datasets and perform cross-lingual transfer to MHG parsing. Our delexicalized constituency parser demonstrates remarkable performance on the MHG test set, achieving an F1-score of 67.3%. It outperforms the best zero-shot cross-lingual baseline by a margin of 28.6% points. The encouraging results underscore the practicality and potential for automatic syntactic analysis in other ancient languages that face similar challenges as MHG.</abstract>
       <url hash="074b832b">2023.alp-1.8</url>
@@ -251,7 +251,7 @@
     <paper id="23">
       <title>A Neural Pipeline for <fixed-case>POS</fixed-case>-tagging and Lemmatizing Cuneiform Languages</title>
       <author><first>Aleksi</first><last>Sahala</last></author>
-      <author><first>Krister</first><last>Lindén</last></author>
+      <author id="krister-linden"><first>Krister</first><last>Lindén</last></author>
       <pages>203–212</pages>
       <abstract>We presented a pipeline for POS-tagging and lemmatizing cuneiform languages and evaluated its performance on Sumerian, first millennium Babylonian, Neo-Assyrian and Urartian texts extracted from Oracc. The system achieves a POS-tagging accuracy between 95-98% and a lemmatization accuracy of 94-96% depending on the language or dialect. For OOV words only, the current version can predict correct POS-tags for 83-91%, and lemmata for 68-84% of the input words. Compared with the earlier version, the current one has about 10% higher accuracy in OOV lemmatization and POS-tagging due to better neural network performance. We also tested the system for lemmatizing and POS-tagging the PROIEL Ancient Greek and Latin treebanks, achieving results similar to those with the cuneiform languages.</abstract>
       <url hash="6aca81f5">2023.alp-1.23</url>
diff --git a/data/xml/2023.alta.xml b/data/xml/2023.alta.xml
index ce7fad45c3..034c7ed29e 100644
--- a/data/xml/2023.alta.xml
+++ b/data/xml/2023.alta.xml
@@ -48,7 +48,7 @@
       <author><first>Belinda</first><last>Chiera</last></author>
       <author><first>Cathy</first><last>Chua</last></author>
       <author><first>Chadi</first><last>Raheb</last></author>
-      <author><first>Manny</first><last>Rayner</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
       <author><first>Annika</first><last>Simonsen</last></author>
       <author><first>Zhengkang</first><last>Xiang</last></author>
       <author><first>Rina</first><last>Zviel-Girshin</last></author>
@@ -60,7 +60,7 @@
     <paper id="4">
       <title>Exploring Causal Directions through Word Occurrences: Semi-supervised <fixed-case>B</fixed-case>ayesian Classification Framework</title>
       <author><first>King Tao Jason</first><last>Ng</last></author>
-      <author><first>Diego</first><last>Molla</last></author>
+      <author id="diego-molla"><first>Diego</first><last>Molla</last></author>
       <pages>30-39</pages>
       <abstract>Determining causal directions in sentences plays a critical role into understanding a cause-and-effect relationship between entities. In this paper, we show empirically that word occurrences from several Internet domains resemble the characteristics of causal directions. Our research contributes to the knowledge of the underlying data generation process behind causal directions. We propose a two-phase method: 1. Bayesian framework, which generates synthetic data from posteriors by incorporating word occurrences from the Internet domains. 2. Pre-trained BERT, which utilises semantics of words based on the context to perform classification. The proposed method achieves an improvement in performance for the Cause-Effect relations of the SemEval-2010 dataset, when compared with random guessing.</abstract>
       <url hash="5b775845">2023.alta-1.4</url>
@@ -87,7 +87,7 @@
     <paper id="7">
       <title><fixed-case>MCASP</fixed-case>: Multi-Modal Cross Attention Network for Stock Market Prediction</title>
       <author><first>Kamaladdin</first><last>Fataliyev</last></author>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last></author>
+      <author><first>Wei</first><last>Liu</last></author>
       <pages>67-77</pages>
       <abstract>Stock market prediction is considered a complex task due to the non-stationary and volatile nature of the stock markets. With the increasing amount of online data, various information sources have been analyzed to understand the underlying patterns of the price movements. However, most existing works in the literature mostly focus on either the intra-modality information within each input data type, or the inter-modal relationships among the input modalities. Different from these, in this research, we propose a novel Multi-Modal Cross Attention Network for Stock Market Prediction (MCASP) by capturing both modality-specific features and the joint influence of each modality in a unified framework. We utilize financial news, historical market data and technical indicators to predict the movement direction of the market prices. After processing the input modalities with three separate deep networks, we first construct a self-attention network that utilizes multiple Transformer models to capture the intra-modal information. Then we design a novel cross-attention network that processes the inputs in pairs to exploit the cross-modal and joint information of the modalities. Experiments with real world datasets for S&amp;P500 index forecast and the prediction of five individual stocks, demonstrate the effectiveness of the proposed multi-modal design over several state-of-the-art baseline models.</abstract>
       <url hash="c2d5576b">2023.alta-1.7</url>
@@ -113,7 +113,7 @@
       <author><first>Lizhen</first><last>Qu</last></author>
       <author><first>Yufei</first><last>Wang</last></author>
       <author><first>Ingrid</first><last>Zukerman</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>88-99</pages>
       <abstract>Flowchart-grounded troubleshooting dialogue (FTD) systems, which follow the instructions of a flowchart to diagnose users’ problems in specific domains (e.g., vehicle, laptop), have been gaining research interest in recent years. However, collecting sufficient dialogues that are naturally grounded on flowcharts is costly, thus FTD systems are impeded by scarce training data. To mitigate the data sparsity issue, we propose a plan-based synthetic data generation (PlanSDG) approach that generates diverse synthetic dialog data at scale by transforming concise flowchart into dialogues. Specifically, its generative model employs a variational-base framework with a hierarchical planning strategy that includes global and local latent planning variables. Experiments on the FloDial dataset show that synthetic dialogue produced by PlanSDG improves the performance of downstream tasks, including flowchart path retrieval and response generation, in particular on the Out-of-Flowchart settings. In addition, further analysis demonstrate the quality of synthetic data generated by PlanSDG in paths that are covered by current sample dialogues and paths that are not covered.</abstract>
       <url hash="2f37d5fb">2023.alta-1.9</url>
@@ -161,7 +161,7 @@
     <paper id="14">
       <title>Predicting Empathic Accuracy from User-Designer Interviews</title>
       <author><first>Steven</first><last>Nguyen</last></author>
-      <author><first>Daniel</first><last>Beck</last></author>
+      <author id="daniel-beck"><first>Daniel</first><last>Beck</last></author>
       <author><first>Katja</first><last>Holtta-Otto</last></author>
       <pages>125-129</pages>
       <abstract>Measuring empathy as a natural language processing task has often been limited to a subjective measure of how well individuals respond to each other in emotive situations. Cognitive empathy, or an individual’s ability to accurately assess another individual’s thoughts, remains a more novel task. In this paper, we explore natural language processing techniques to measure cognitive empathy using paired sentence data from design interviews. Our findings show that an unsupervised approach based on similarity of vectors from a Large Language Model is surprisingly promising, while adding supervision does not necessarily improve the performance. An analysis of the results highlights potential reasons for this behaviour and gives directions for future work in this space.</abstract>
@@ -172,7 +172,7 @@
       <title><fixed-case>CRF</fixed-case>-based recognition of invasive fungal infection concepts in <fixed-case>CHIFIR</fixed-case> clinical reports</title>
       <author><first>Yang</first><last>Meng</last></author>
       <author><first>Vlada</first><last>Rozova</last></author>
-      <author><first>Karin</first><last>Verspoor</last></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last></author>
       <pages>130-135</pages>
       <abstract>Named entity recognition (NER) in clinical documentation is often hindered by the use of highly specialised terminology, variation in language used to express medical findings and general scarcity of high-quality data available for training. This short paper compares a Conditional Random Fields model to the previously established dictionary-based approach and evaluates its ability to extract information from a small corpus of annotated pathology reports. The results suggest that including token descriptors as well as contextual features significantly improves precision on several concept categories while maintaining the same level of recall.</abstract>
       <url hash="e7e97213">2023.alta-1.15</url>
@@ -190,7 +190,7 @@
     </paper>
     <paper id="17">
       <title>Overview of the 2023 <fixed-case>ALTA</fixed-case> Shared Task: Discriminate between Human-Written and Machine-Generated Text</title>
-      <author><first>Diego</first><last>Molla</last></author>
+      <author id="diego-molla"><first>Diego</first><last>Molla</last></author>
       <author><first>Haolan</first><last>Zhan</last></author>
       <author><first>Xuanli</first><last>He</last></author>
       <author><first>Qiongkai</first><last>Xu</last></author>
diff --git a/data/xml/2023.americasnlp.xml b/data/xml/2023.americasnlp.xml
index 2a7fc31a22..f60cbed3ab 100644
--- a/data/xml/2023.americasnlp.xml
+++ b/data/xml/2023.americasnlp.xml
@@ -5,11 +5,11 @@
       <booktitle>Proceedings of the Workshop on Natural Language Processing for Indigenous Languages of the Americas (AmericasNLP)</booktitle>
       <editor><first>Manuel</first><last>Mager</last></editor>
       <editor><first>Abteen</first><last>Ebrahimi</last></editor>
-      <editor><first>Arturo</first><last>Oncevay</last></editor>
+      <editor id="arturo-oncevay"><first>Arturo</first><last>Oncevay</last></editor>
       <editor><first>Enora</first><last>Rice</last></editor>
       <editor><first>Shruti</first><last>Rijhwani</last></editor>
       <editor><first>Alexis</first><last>Palmer</last></editor>
-      <editor><first>Katharina</first><last>Kann</last></editor>
+      <editor id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Toronto, Canada</address>
       <month>July</month>
@@ -56,7 +56,7 @@
     </paper>
     <paper id="4">
       <title>Codex to corpus: Exploring annotation and processing for an open and extensible machine-readable edition of the Florentine Codex</title>
-      <author><first>Francis</first><last>Tyers</last><affiliation>Indiana University</affiliation></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last><affiliation>Indiana University</affiliation></author>
       <author><first>Robert</first><last>Pugh</last><affiliation>Indiana University</affiliation></author>
       <author><first>Valery</first><last>Berthoud F.</last><affiliation>Humboldt-Universitt zu Berlin</affiliation></author>
       <pages>19-29</pages>
@@ -68,7 +68,7 @@
     <paper id="5">
       <title>Developing finite-state language technology for <fixed-case>M</fixed-case>aya</title>
       <author><first>Robert</first><last>Pugh</last><affiliation>Indiana University</affiliation></author>
-      <author><first>Francis</first><last>Tyers</last><affiliation>Indiana University</affiliation></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last><affiliation>Indiana University</affiliation></author>
       <author><first>Quetzil</first><last>Castañeda</last><affiliation>Indiana University</affiliation></author>
       <pages>30-39</pages>
       <abstract>We describe a suite of finite-state language technologies for Maya, a Mayan language spoken in Mexico. At the core is a computational model of Maya morphology and phonology using a finite-state transducer. This model results in a morphological analyzer and a morphologically-informed spell-checker. All of these technologies are designed for use as both a pedagogical reading/writing aid for L2 learners and as a general language processing tool capable of supporting much of the natural variation in written Maya. We discuss the relevant features of Maya morphosyntax and orthography, and then outline the implementation details of the analyzer. To conclude, we present a longer-term vision for these tools and their use by both native speakers and learners.</abstract>
@@ -111,7 +111,7 @@
     <paper id="9">
       <title>Enriching <fixed-case>W</fixed-case>ayúunaiki-<fixed-case>S</fixed-case>panish Neural Machine Translation with Linguistic Information</title>
       <author><first>Nora</first><last>Graichen</last><affiliation>UdS</affiliation></author>
-      <author><first>Josef</first><last>Van Genabith</last><affiliation>DFKI</affiliation></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>Van Genabith</last><affiliation>DFKI</affiliation></author>
       <author><first>Cristina</first><last>España-bonet</last><affiliation>DFKI GmbH</affiliation></author>
       <pages>67-83</pages>
       <abstract>We present the first neural machine translation system for the low-resource language pair Wayúunaiki–Spanish and explore strategies to inject linguistic knowledge into the model to improve translation quality. We explore a wide range of methods and combine complementary approaches. Results indicate that incorporating linguistic information through linguistically motivated subword segmentation, factored models, and pretrained embeddings helps the system to generate improved translations, with the segmentation contributing most. In order to evaluate translation quality in a general domain and go beyond the available religious domain data, we gather and make publicly available a new test set and supplementary material. Although translation quality as measured with automatic metrics is low, we hope these resources will facilitate and support further research on Wayúunaiki.</abstract>
@@ -138,7 +138,7 @@
       <author><first>Olga</first><last>Kolesnikova</last><affiliation>Instituto Politecnico Nacional</affiliation></author>
       <author><first>Noé</first><last>Castro-Sánchez</last><affiliation>TecNM/Cenidet</affiliation></author>
       <author><first>Grigori</first><last>Sidorov</last><affiliation>CIC-IPN</affiliation></author>
-      <author><first>Alexander</first><last>Gelbukh</last><affiliation>Instituto Politcnico Nacional</affiliation></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last><affiliation>Instituto Politcnico Nacional</affiliation></author>
       <pages>94-102</pages>
       <abstract>In this paper, we present a parallel Spanish- Mazatec and Spanish-Mixtec corpus for machine translation (MT) tasks, where Mazatec and Mixtec are two indigenous Mexican languages. We evaluated the usability of the collected corpus using three different approaches: transformer, transfer learning, and fine-tuning pre-trained multilingual MT models. Fine-tuning the Facebook m2m100-48 model outperformed the other approaches, with BLEU scores of 12.09 and 22.25 for Mazatec-Spanish and Spanish-Mazatec translations, respectively, and 16.75 and 22.15 for Mixtec-Spanish and Spanish-Mixtec translations, respectively. The results indicate that translation performance is influenced by the dataset size (9,799 sentences in Mazatec and 13,235 sentences in Mixtec) and is more effective when indigenous languages are used as target languages. The findings emphasize the importance of creating parallel corpora for indigenous languages and fine-tuning models for low-resource translation tasks. Future research will investigate zero-shot and few-shot learning approaches to further improve translation performance in low-resource settings.</abstract>
       <url hash="23179653">2023.americasnlp-1.11</url>
@@ -148,7 +148,7 @@
     <paper id="12">
       <title>A finite-state morphological analyser for <fixed-case>H</fixed-case>ighland <fixed-case>P</fixed-case>uebla <fixed-case>N</fixed-case>ahuatl</title>
       <author><first>Robert</first><last>Pugh</last><affiliation>Indiana University</affiliation></author>
-      <author><first>Francis</first><last>Tyers</last><affiliation>Indiana University</affiliation></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last><affiliation>Indiana University</affiliation></author>
       <pages>103-108</pages>
       <abstract>This paper describes the development of a free/open-source finite-state morphologicaltransducer for Highland Puebla Nahuatl, a Uto-Aztecan language spoken in and around the stateof Puebla in Mexico. The finite-state toolkit used for the work is the Helsinki Finite-StateToolkit (HFST); we use the lexc formalism for modelling the morphotactics and twol formal-ism for modelling morphophonological alternations. An evaluation is presented which showsthat the transducer has a reasonable coveragearound 90%on freely-available corpora of the language, and high precisionover 95%on a manually verified test set</abstract>
       <url hash="7b34f300">2023.americasnlp-1.12</url>
@@ -244,7 +244,7 @@
       <author><first>Mikko</first><last>Aulamo</last><affiliation>University of Helsinki</affiliation></author>
       <author><first>Yves</first><last>Scherrer</last><affiliation>University of Helsinki</affiliation></author>
       <author><first>Sami</first><last>Virpioja</last><affiliation>University of Helsinki</affiliation></author>
-      <author><first>Jörg</first><last>Tiedemann</last><affiliation>University of Helsinki</affiliation></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last><affiliation>University of Helsinki</affiliation></author>
       <pages>177-191</pages>
       <abstract>The Helsinki-NLP team participated in the AmericasNLP 2023 Shared Task with 6 submissions for all 11 language pairs arising from 4 different multilingual systems. We provide a detailed look at the work that went into collecting and preprocessing the data that led to our submissions. We explore various setups for multilingual Neural Machine Translation (NMT), namely knowledge distillation and transfer learning, multilingual NMT including a high-resource language (English), language-specific fine-tuning, and multilingual NMT exclusively using low-resource data. Our multilingual Model B ranks first in 4 out of the 11 language pairs.</abstract>
       <url hash="0400493b">2023.americasnlp-1.20</url>
@@ -267,8 +267,8 @@
       <author><first>Hellina Hailu</first><last>Nigatu</last><affiliation>UC Berkeley</affiliation></author>
       <author><first>Olga</first><last>Kolesnikova</last><affiliation>Instituto Politecnico Nacional</affiliation></author>
       <author><first>Grigori</first><last>Sidorov</last><affiliation>CIC-IPN</affiliation></author>
-      <author><first>Alexander</first><last>Gelbukh</last><affiliation>Instituto Politcnico Nacional</affiliation></author>
-      <author><first>Jugal</first><last>Kalita</last><affiliation>University of Colorado</affiliation></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last><affiliation>Instituto Politcnico Nacional</affiliation></author>
+      <author id="jugal-kalita"><first>Jugal</first><last>Kalita</last><affiliation>University of Colorado</affiliation></author>
       <pages>200-205</pages>
       <abstract>This paper describes CIC NLP’s submission to the AmericasNLP 2023 Shared Task on machine translation systems for indigenous languages of the Americas. We present the system descriptions for three methods. We used two multilingual models, namely M2M-100 and mBART50, and one bilingual (one-to-one) — Helsinki NLP Spanish-English translation model, and experimented with different transfer learning setups. We experimented with 11 languages from America and report the setups we used as well as the results we achieved. Overall, the mBART setup was able to improve upon the baseline for three out of the eleven languages.</abstract>
       <url hash="43f2bd8d">2023.americasnlp-1.22</url>
diff --git a/data/xml/2023.arabicnlp.xml b/data/xml/2023.arabicnlp.xml
index 72f872c783..8701849a34 100644
--- a/data/xml/2023.arabicnlp.xml
+++ b/data/xml/2023.arabicnlp.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of ArabicNLP 2023</booktitle>
       <editor><first>Hassan</first><last>Sawaf</last></editor>
-      <editor><first>Samhaa</first><last>El-Beltagy</last></editor>
+      <editor id="samhaa-r-el-beltagy"><first>Samhaa</first><last>El-Beltagy</last></editor>
       <editor><first>Wajdi</first><last>Zaghouani</last></editor>
       <editor><first>Walid</first><last>Magdy</last></editor>
       <editor><first>Ahmed</first><last>Abdelali</last></editor>
@@ -230,7 +230,7 @@
     <paper id="17">
       <title><fixed-case>A</fixed-case>r<fixed-case>T</fixed-case>rivia: Harvesting <fixed-case>A</fixed-case>rabic <fixed-case>W</fixed-case>ikipedia to Build A New <fixed-case>A</fixed-case>rabic Question Answering Dataset</title>
       <author><first>Sultan</first><last>Alrowili</last></author>
-      <author><first>K</first><last>Vijay-Shanker</last></author>
+      <author id="k-vijay-shanker"><first>K</first><last>Vijay-Shanker</last></author>
       <pages>191-207</pages>
       <abstract>We present ArTrivia, a new Arabic question-answering dataset consisting of more than 10,000 question-answer pairs along with relevant passages, covering a wide range of 18 diverse topics in Arabic. We created our dataset using a newly proposed pipeline that leverages diverse structured data sources from Arabic Wikipedia. Moreover, we conducted a comprehensive statistical analysis of ArTrivia and assessed the performance of each component in our pipeline. Additionally, we compared the performance of ArTrivia against the existing TyDi QA dataset using various experimental setups. Our analysis highlights the significance of often overlooked aspects in dataset creation, such as answer normalization, in enhancing the quality of QA datasets. Our evaluation also shows that ArTrivia presents more challenging and out-of-distribution questions to TyDi, raising questions about the feasibility of using ArTrivia as a complementary dataset to TyDi.</abstract>
       <url hash="0bb8cd9b">2023.arabicnlp-1.17</url>
@@ -241,7 +241,7 @@
     <paper id="18">
       <title><fixed-case>A</fixed-case>r<fixed-case>S</fixed-case>arcas<fixed-case>M</fixed-case>oji Dataset: The Emoji Sentiment Roles in <fixed-case>A</fixed-case>rabic Ironic Contexts</title>
       <author><first>Shatha Ali A.</first><last>Hakami</last></author>
-      <author><first>Robert</first><last>Hendley</last></author>
+      <author id="robert-j-hendley"><first>Robert</first><last>Hendley</last></author>
       <author><first>Phillip</first><last>Smith</last></author>
       <pages>208-217</pages>
       <abstract>In digital communication, emoji are essential in decoding nuances such as irony, sarcasm, and humour. However, their incorporation in Arabic natural language processing (NLP) has been cautious because of the perceived complexities of the Arabic language. This paper introduces ArSarcasMoji, a dataset of 24,630 emoji-augmented texts, with 17. 5% that shows irony. Through our analysis, we highlight specific emoji patterns paired with sentiment roles that denote irony in Arabic texts. The research counters prevailing notions, emphasising the importance of emoji’s role in understanding Arabic textual irony, and addresses their potential for accurate irony detection in Arabic digital content.</abstract>
@@ -556,7 +556,7 @@
       <title>Rosetta Stone at <fixed-case>KSAA</fixed-case>-<fixed-case>RD</fixed-case> Shared Task: A Hop From Language Modeling To Word–Definition Alignment</title>
       <author><first>Ahmed</first><last>Elbakry</last></author>
       <author><first>Mohamed</first><last>Gabr</last></author>
-      <author><first>Muhammad</first><last>ElNokrashy</last></author>
+      <author id="muhammad-elnokrashy"><first>Muhammad</first><last>ElNokrashy</last></author>
       <author><first>Badr</first><last>AlKhamissi</last></author>
       <pages>477-482</pages>
       <abstract>A Reverse Dictionary is a tool enabling users to discover a word based on its provided definition, meaning, or description. Such a technique proves valuable in various scenarios, aiding language learners who possess a description of a word without its identity, and benefiting writers seeking precise terminology. These scenarios often encapsulate what is referred to as the “Tip-of-the-Tongue” (TOT) phenomena. In this work, we present our winning solution for the Arabic Reverse Dictionary shared task. This task focuses on deriving a vector representation of an Arabic word from its accompanying description. The shared task encompasses two distinct subtasks: the first involves an Arabic definition as input, while the second employs an English definition. For the first subtask, our approach relies on an ensemble of finetuned Arabic BERT-based models, predicting the word embedding for a given definition. The final representation is obtained through averaging the output embeddings from each model within the ensemble. In contrast, the most effective solution for the second subtask involves translating the English test definitions into Arabic and applying them to the finetuned models originally trained for the first subtask. This straightforward method achieves the highest score across both subtasks.</abstract>
@@ -572,7 +572,7 @@
       <author><first>Hamdy</first><last>Mubarak</last></author>
       <author><first>Samir</first><last>Abdaljalil</last></author>
       <author><first>Wajdi</first><last>Zaghouani</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Giovanni</first><last>Da San Martino</last></author>
       <author><first>Abed</first><last>Freihat</last></author>
       <pages>483-493</pages>
@@ -855,7 +855,7 @@
     </paper>
     <paper id="68">
       <title><fixed-case>NLP</fixed-case>eople at <fixed-case>NADI</fixed-case> 2023 Shared Task: <fixed-case>A</fixed-case>rabic Dialect Identification with Augmented Context and Multi-Stage Tuning</title>
-      <author><first>Mohab</first><last>Elkaref</last></author>
+      <author id="mohab-el-karef"><first>Mohab</first><last>Elkaref</last></author>
       <author><first>Movina</first><last>Moses</last></author>
       <author><first>Shinnosuke</first><last>Tanaka</last></author>
       <author><first>James</first><last>Barry</last></author>
@@ -894,7 +894,7 @@
       <author><first>Ingy</first><last>Abdelhalim</last></author>
       <author><first>Nadine</first><last>Abdelhalim</last></author>
       <author><first>Ahmed</first><last>Soliman</last></author>
-      <author><first>Riza</first><last>Batista-Navarro</last></author>
+      <author id="riza-theresa-batista-navarro"><first>Riza</first><last>Batista-Navarro</last></author>
       <pages>658-664</pages>
       <abstract>This paper presents the methods we developed for the Nuanced Arabic Dialect Identification (NADI) 2023 shared task, specifically targeting the two subtasks focussed on sentence-level machine translation (MT) of text written in any of four Arabic dialects (Egyptian, Emirati, Jordanian and Palestinian) to Modern Standard Arabic (MSA). Our team, UniManc, employed models based on T5: multilingual T5 (mT5), multi-task fine-tuned mT5 (mT0) and AraT5. These models were trained based on two configurations: joint model training for all regional dialects (J-R) and independent model training for every regional dialect (I-R). Based on the results of the official NADI 2023 evaluation, our I-R AraT5 model obtained an overall BLEU score of 14.76, ranking first in the Closed Dialect-to-MSA MT subtask. Moreover, in the Open Dialect-to-MSA MT subtask, our J-R AraT5 model also ranked first, obtaining an overall BLEU score of 21.10.</abstract>
       <url hash="14ee4c4a">2023.arabicnlp-1.71</url>
@@ -1001,7 +1001,7 @@
       <title><fixed-case>LKAU</fixed-case>23 at Qur’an <fixed-case>QA</fixed-case> 2023: Using Transformer Models for Retrieving Passages and Finding Answers to Questions from the Qur’an</title>
       <author><first>Sarah</first><last>Alnefaie</last></author>
       <author><first>Abdullah</first><last>Alsaleh</last></author>
-      <author><first>Eric</first><last>Atwell</last></author>
+      <author id="eric-atwell"><first>Eric</first><last>Atwell</last></author>
       <author><first>Mohammad</first><last>Alsalka</last></author>
       <author><first>Abdulrahman</first><last>Altahhan</last></author>
       <pages>720-727</pages>
@@ -1125,7 +1125,7 @@
       <author><first>Mariam</first><last>Hussein</last></author>
       <author><first>Sarah</first><last>Khaled</last></author>
       <author><first>Marwan</first><last>Torki</last></author>
-      <author><first>Nagwa</first><last>El-Makky</last></author>
+      <author id="nagwa-m-el-makky"><first>Nagwa</first><last>El-Makky</last></author>
       <pages>797-802</pages>
       <abstract>Named Entity Recognition (NER) is a crucial task in natural language processing that facilitates the extraction of vital information from text. However, NER for Arabic presents a significant challenge due to the language’s unique characteristics. In this paper, we introduce AraBINDER, our submission to the Wojood NER Shared Task 2023 (ArabicNLP 2023). The shared task comprises two sub-tasks: sub-task 1 focuses on Flat NER, while sub-task 2 centers on Nested NER. We have participated in both sub-tasks. The Bi-Encoder has proven its efficiency for NER in English. We employ AraBINDER (Arabic Bi-Encoder for Named Entity Recognition), which uses the power of two transformer encoders and employs contrastive learning to map candidate text spans and entity types into the same vector representation space. This approach frames NER as a representation learning problem that maximizes the similarity between the vector representations of an entity mention and its type. Our experiments reveal that AraBINDER achieves a micro F-1 score of 0.918 for Flat NER and 0.9 for Nested NER on the Wojood dataset.</abstract>
       <url hash="3af7de80">2023.arabicnlp-1.90</url>
@@ -1136,7 +1136,7 @@
     <paper id="91">
       <title>El-Kawaref at <fixed-case>W</fixed-case>ojood<fixed-case>NER</fixed-case> shared task: <fixed-case>S</fixed-case>taged<fixed-case>NER</fixed-case> for <fixed-case>A</fixed-case>rabic Named Entity Recognition</title>
       <author><first>Nehal</first><last>Elkaref</last></author>
-      <author><first>Mohab</first><last>Elkaref</last></author>
+      <author id="mohab-el-karef"><first>Mohab</first><last>Elkaref</last></author>
       <pages>803-808</pages>
       <abstract>Named Entity Recognition (NER) is the task of identifying word-units that correspond to mentions as location, organization, person, or currency. In this shared task we tackle flat-entity classification for Arabic, where for each word-unit a single entity should be identified. To resolve the classification problem we propose StagedNER a novel technique to fine-tuning NER downstream tasks that divides the learning process of a transformer-model into two phases, where a model is tasked to learn sequence tags and then entity tags rather than learn both together simultaneously for an input sequence. We create an ensemble of two base models using this method that yield a score of on the development set and an F1 performance of 90.03% on the validation set and 91.95% on the test set.</abstract>
       <url hash="70e13ab4">2023.arabicnlp-1.91</url>
diff --git a/data/xml/2023.argmining.xml b/data/xml/2023.argmining.xml
index b6bc578c89..f20e83c51f 100644
--- a/data/xml/2023.argmining.xml
+++ b/data/xml/2023.argmining.xml
@@ -88,7 +88,7 @@
       <title>Legal Argument Extraction from Court Judgements using Integer Linear Programming</title>
       <author><first>Basit</first><last>Ali</last></author>
       <author><first>Sachin</first><last>Pawar</last></author>
-      <author><first>Girish</first><last>Palshikar</last></author>
+      <author id="girish-palshikar"><first>Girish</first><last>Palshikar</last></author>
       <author><first>Anindita</first><last>Sinha Banerjee</last></author>
       <author><first>Dhirendra</first><last>Singh</last></author>
       <pages>52–63</pages>
@@ -138,7 +138,7 @@
       <author><first>Patrick</first><last>Katzer</last></author>
       <author><first>Mirko</first><last>Oest</last></author>
       <author><first>Steffen</first><last>Herbold</last></author>
-      <author><first>Annette</first><last>Hautli-Janisz</last></author>
+      <author id="annette-hautli"><first>Annette</first><last>Hautli-Janisz</last></author>
       <pages>100–106</pages>
       <abstract>Debate naturalness ranges on a scale from small, highly structured, and topically focused settings to larger, more spontaneous and less constrained environments. The more unconstrained a debate, the more spontaneous speakers act: they build on contextual knowledge and use anaphora or ellipses to construct their arguments. They also use rhetorical devices such as questions and imperatives to support or attack claims. In this paper, we study how the reconstruction of the actual debate contributions, i.e., utterances which contain pronouns, ellipses and fuzzy language, into full-fledged propositions which are interpretable without context impacts the prediction of argument relations and investigate the effect of incorporating contextual information for the task. We work with highly complex spontaneous debates with more than 10 speakers on a wide variety of topics. We find that in contrast to our initial hypothesis, reconstruction does not improve predictions and context only improves them when used in combination with propositions.</abstract>
       <url hash="b40c3ea2">2023.argmining-1.10</url>
@@ -149,7 +149,7 @@
       <title>Unsupervised argument reframing with a counterfactual-based approach</title>
       <author><first>Philipp</first><last>Heinisch</last></author>
       <author><first>Dimitry</first><last>Mindlin</last></author>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <pages>107–119</pages>
       <abstract>Framing is an important mechanism in argumentation, as participants in a debate tend to emphasize those aspects or dimensions of the issue under debate that support their standpoint. The task of reframing an argument, that is changing the underlying framing, has received increasing attention recently. We propose a novel unsupervised approach to argument reframing that takes inspiration from counterfactual explanation generation approaches in the field of eXplainable AI (XAI). We formalize the task as a mask-and-replace approach in which an LLM is tasked to replace masked tokens associated with a set of frames to be eliminated by other tokens related to a set of target frames to be added. Our method relies on two key mechanisms: framed decoding and reranking based on a number of metrics similar to those used in XAI to search for a suitable counterfactual. We evaluate our approach on three topics using the dataset by Ruckdeschel and Wiedemann (2022). We show that our two key mechanisms outperform an unguided LLM as a baseline by increasing the ratio of successfully reframed arguments by almost an order of magnitude.</abstract>
       <url hash="39833e08">2023.argmining-1.11</url>
@@ -161,7 +161,7 @@
       <author><first>Zhexiong</first><last>Liu</last></author>
       <author><first>Mohamed</first><last>Elaraby</last></author>
       <author><first>Yang</first><last>Zhong</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <pages>120–132</pages>
       <abstract>This paper presents an overview of the ImageArg shared task, the first multimodal Argument Mining shared task co-located with the 10th Workshop on Argument Mining at EMNLP 2023. The shared task comprises two classification subtasks - (1) Subtask-A: Argument Stance Classification; (2) Subtask-B: Image Persuasiveness Classification. The former determines the stance of a tweet containing an image and a piece of text toward a controversial topic (e.g., gun control and abortion). The latter determines whether the image makes the tweet text more persuasive. The shared task received 31 submissions for Subtask-A and 21 submissions for Subtask-B from 9 different teams across 6 countries. The top submission in Subtask-A achieved an F1-score of 0.8647 while the best submission in Subtask-B achieved an F1-score of 0.5561.</abstract>
       <url hash="01e9c480">2023.argmining-1.12</url>
@@ -266,7 +266,7 @@
       <author><first>Hariram</first><last>Veeramani</last></author>
       <author><first>Saravanan</first><last>Rajamanickam</last></author>
       <author><first>Adam Maciej</first><last>Westerski</last></author>
-      <author><first>Jung-Jae</first><last>Kim</last></author>
+      <author id="jung-jae-kim"><first>Jung-Jae</first><last>Kim</last></author>
       <pages>181–186</pages>
       <abstract>In this paper, we describe our system for ImageArg-2023 Shared Task that aims to identify an image’s stance towards a tweet and determine its persuasiveness score concerning a specific topic. In particular, the Shared Task proposes two subtasks viz. subtask (A) Multimodal Argument Stance (AS) Classification, and subtask (B) Multimodal Image Persuasiveness (IP) Classification, using a dataset composed of tweets (images and text) from controversial topics, namely gun control and abortion. For subtask A, we employ multiple transformer models using a text based approach to classify the argumentative stance of the tweet. For sub task B we adopted text based as well as multimodal learning methods to classify image persuasiveness of the tweet. Surprisingly, the text-based approach of the tweet overall performed better than the multimodal approaches considered. In summary, our best system achieved a F1 score of 0.85 for sub task (A) and 0.50 for subtask (B), and ranked 2nd in subtask (A) and 4th in subtask (B), among all teams submissions.</abstract>
       <url hash="116f57e8">2023.argmining-1.20</url>
@@ -321,9 +321,9 @@
     </paper>
     <paper id="25">
       <title><fixed-case>NUS</fixed-case>-<fixed-case>IDS</fixed-case> at <fixed-case>P</fixed-case>rag<fixed-case>T</fixed-case>ag-2023: Improving Pragmatic Tagging of Peer Reviews through Unlabeled Data</title>
-      <author><first>Sujatha Das</first><last>Gollapalli</last></author>
+      <author id="sujatha-das-gollapalli"><first>Sujatha Das</first><last>Gollapalli</last></author>
       <author><first>Yixin</first><last>Huang</last></author>
-      <author><first>See-Kiong</first><last>Ng</last></author>
+      <author id="see-kiong-ng"><first>See-Kiong</first><last>Ng</last></author>
       <pages>212–217</pages>
       <abstract>We describe our models for the Pragmatic Tagging of Peer Reviews Shared Task at the 10th Workshop on Argument Mining at EMNLP-2023. We trained multiple sentence classification models for the above competition task by employing various state-of-the-art transformer models that can be fine-tuned either in the traditional way or through instruction-based fine-tuning. Multiple model predictions on unlabeled data are combined to tentatively label unlabeled instances and augment the dataset to further improve performance on the prediction task. In particular, on the F1000RD corpus, we perform on-par with models trained on 100% of the training data while using only 10% of the data. Overall, on the competition datasets, we rank among the top-2 performers for the different data conditions.</abstract>
       <url hash="cf0cb459">2023.argmining-1.25</url>
diff --git a/data/xml/2023.at4ssl.xml b/data/xml/2023.at4ssl.xml
index 03c2791190..6ff7f60d57 100644
--- a/data/xml/2023.at4ssl.xml
+++ b/data/xml/2023.at4ssl.xml
@@ -9,7 +9,7 @@
       <editor><first>Davy Van</first><last>Landuyt</last></editor>
       <editor><first>Rehana</first><last>Omardeen</last></editor>
       <editor><first>Shaun</first><last>Oboyle</last></editor>
-      <editor><first>Annelies</first><last>Braffort</last></editor>
+      <editor id="annelies-braffort"><first>Annelies</first><last>Braffort</last></editor>
       <editor><first>Floris</first><last>Roelofsen</last></editor>
       <editor><first>Fred</first><last>Blain</last></editor>
       <editor><first>Bram</first><last>Vanroy</last></editor>
@@ -39,10 +39,10 @@
       <title>A Linked Data Approach for linking and aligning Sign Language and Spoken Language Data</title>
       <author><first>Thierry</first><last>Declerck</last></author>
       <author><first>Sam</first><last>Bigeard</last></author>
-      <author><first>Fahad</first><last>Khan</last></author>
+      <author id="fahad-khan"><first>Fahad</first><last>Khan</last></author>
       <author><first>Irene</first><last>Murtagh</last></author>
       <author><first>Sussi</first><last>Olsen</last></author>
-      <author><first>Mike</first><last>Rosner</last></author>
+      <author id="michael-rosner"><first>Mike</first><last>Rosner</last></author>
       <author><first>Ineke</first><last>Schuurman</last></author>
       <author><first>Andon</first><last>Tchechmedjiev</last></author>
       <author><first>Andy</first><last>Way</last></author>
@@ -55,7 +55,7 @@
       <title>An Open-Source Gloss-Based Baseline for Spoken to Signed Language Translation</title>
       <author><first>Amit</first><last>Moryossef</last></author>
       <author><first>Mathias</first><last>Müller</last></author>
-      <author><first>Anne</first><last>Göhring</last></author>
+      <author id="anne-gohring"><first>Anne</first><last>Göhring</last></author>
       <author><first>Zifan</first><last>Jiang</last></author>
       <author><first>Yoav</first><last>Goldberg</last></author>
       <author><first>Sarah</first><last>Ebling</last></author>
diff --git a/data/xml/2023.banglalp.xml b/data/xml/2023.banglalp.xml
index e766290ff1..084c48527c 100644
--- a/data/xml/2023.banglalp.xml
+++ b/data/xml/2023.banglalp.xml
@@ -21,7 +21,7 @@
     </frontmatter>
     <paper id="1">
       <title>Offensive Language Identification in Transliterated and Code-Mixed <fixed-case>B</fixed-case>angla</title>
-      <author><first>Md Nishat</first><last>Raihan</last></author>
+      <author id="nishat-raihan"><first>Md Nishat</first><last>Raihan</last></author>
       <author><first>Umma</first><last>Tanmoy</last></author>
       <author><first>Anika Binte</first><last>Islam</last></author>
       <author><first>Kai</first><last>North</last></author>
@@ -80,7 +80,7 @@
       <author><first>Sourabrata</first><last>Mukherjee</last></author>
       <author><first>Akanksha</first><last>Bansal</last></author>
       <author><first>Pritha</first><last>Majumdar</last></author>
-      <author><first>Atul Kr.</first><last>Ojha</last><affiliation>University of Galway, Ireland, Insight SFI Research Centre for Data Analytics, DSI, University of Galway, Ireland and Panlingua Languague Processing LLP, India</affiliation></author>
+      <author id="atul-kr-ojha"><first>Atul Kr.</first><last>Ojha</last><affiliation>University of Galway, Ireland, Insight SFI Research Centre for Data Analytics, DSI, University of Galway, Ireland and Panlingua Languague Processing LLP, India</affiliation></author>
       <author><first>Ondřej</first><last>Dušek</last><affiliation>Charles University, Prague</affiliation></author>
       <pages>34-47</pages>
       <abstract>Text style transfer (TST) involves modifying the linguistic style of a given text while retaining its core content. This paper addresses the challenging task of text style transfer in the Bangla language, which is low-resourced in this area. We present a novel Bangla dataset that facilitates text sentiment transfer, a subtask of TST, enabling the transformation of positive sentiment sentences to negative and vice versa. To establish a high-quality base for further research, we refined and corrected an existing English dataset of 1,000 sentences for sentiment transfer based on Yelp reviews, and we introduce a new human-translated Bangla dataset that parallels its English counterpart. Furthermore, we offer multiple benchmark models that serve as a validation of the dataset and baseline for further research.</abstract>
@@ -277,7 +277,7 @@
     </paper>
     <paper id="20">
       <title>nlp<fixed-case>BD</fixed-case>patriots at <fixed-case>BLP</fixed-case>-2023 Task 1: Two-Step Classification for Violence Inciting Text Detection in <fixed-case>B</fixed-case>angla - Leveraging Back-Translation and Multilinguality</title>
-      <author><first>Md Nishat</first><last>Raihan</last></author>
+      <author id="nishat-raihan"><first>Md Nishat</first><last>Raihan</last></author>
       <author><first>Dhiman</first><last>Goswami</last><affiliation>George Mason University</affiliation></author>
       <author><first>Sadiya Sayara Chowdhury</first><last>Puspo</last><affiliation>George Mason University</affiliation></author>
       <author><first>Marcos</first><last>Zampieri</last><affiliation>George Mason University</affiliation></author>
@@ -370,7 +370,7 @@
     <paper id="27">
       <title><fixed-case>UFAL</fixed-case>-<fixed-case>ULD</fixed-case> at <fixed-case>BLP</fixed-case>-2023 Task 1: Violence Detection in <fixed-case>B</fixed-case>angla Text</title>
       <author><first>Sourabrata</first><last>Mukherjee</last></author>
-      <author><first>Atul Kr.</first><last>Ojha</last><affiliation>University of Galway, Ireland, Insight SFI Research Centre for Data Analytics, DSI, University of Galway, Ireland and Panlingua Languague Processing LLP, India</affiliation></author>
+      <author id="atul-kr-ojha"><first>Atul Kr.</first><last>Ojha</last><affiliation>University of Galway, Ireland, Insight SFI Research Centre for Data Analytics, DSI, University of Galway, Ireland and Panlingua Languague Processing LLP, India</affiliation></author>
       <author><first>Ondřej</first><last>Dušek</last><affiliation>Charles University, Prague</affiliation></author>
       <pages>220-224</pages>
       <abstract>In this paper, we present UFAL-ULD team’s system, desinged as a part of the BLP Shared Task 1: Violence Inciting Text Detection (VITD). This task aims to classify text, with a particular challenge of identifying incitement to violence into Direct, Indirect or Non-violence levels. We experimented with several pre-trained sequence classification models, including XLM-RoBERTa, BanglaBERT, Bangla BERT Base, and Multilingual BERT. Our best-performing model was based on the XLM-RoBERTa-base architecture, which outperformed the baseline models. Our system was ranked 20th among the 27 teams that participated in the task.</abstract>
@@ -478,7 +478,7 @@
     <paper id="36">
       <title>M1437 at <fixed-case>BLP</fixed-case>-2023 Task 2: Harnessing <fixed-case>B</fixed-case>angla Text for Sentiment Analysis: A Transformer-based Approach</title>
       <author><first>Majidur</first><last>Rahman</last><affiliation>George Mason University</affiliation></author>
-      <author><first>Ozlem</first><last>Uzuner</last><affiliation>George Mason University</affiliation></author>
+      <author id="ozlem-uzuner"><first>Ozlem</first><last>Uzuner</last><affiliation>George Mason University</affiliation></author>
       <pages>279-285</pages>
       <abstract>Analyzing public sentiment on social media is helpful in understanding the public’s emotions about any given topic. While numerous studies have been conducted in this field, there has been limited research on Bangla social media data. Team M1437 from George Mason University participated in the Sentiment Analysis shared task of the Bangla Language Processing (BLP) Workshop at EMNLP-2023. The team fine-tuned various BERT-based Transformer architectures to solve the task. This article shows that <tex-math>BanglaBERT_{large}</tex-math>, a language model pre-trained on Bangla text, outperformed other BERT-based models. This model achieved an F1 score of 73.15% and top position in the development phase, was further tuned with external training data, and achieved an F1 score of 70.36% in the evaluation phase, securing the fourteenth place on the leaderboard. The F1 score on the test set, when <tex-math>BanglaBERT_{large}</tex-math> was trained without external training data, was 71.54%.</abstract>
       <url hash="a16b82ca">2023.banglalp-1.36</url>
@@ -489,7 +489,7 @@
     <paper id="37">
       <title>nlp<fixed-case>BD</fixed-case>patriots at <fixed-case>BLP</fixed-case>-2023 Task 2: A Transfer Learning Approach towards <fixed-case>B</fixed-case>angla Sentiment Analysis</title>
       <author><first>Dhiman</first><last>Goswami</last><affiliation>George Mason University</affiliation></author>
-      <author><first>Md Nishat</first><last>Raihan</last></author>
+      <author id="nishat-raihan"><first>Md Nishat</first><last>Raihan</last></author>
       <author><first>Sadiya Sayara Chowdhury</first><last>Puspo</last><affiliation>George Mason University</affiliation></author>
       <author><first>Marcos</first><last>Zampieri</last><affiliation>George Mason University</affiliation></author>
       <pages>286-292</pages>
@@ -590,7 +590,7 @@
     <paper id="45">
       <title><fixed-case>UFAL</fixed-case>-<fixed-case>ULD</fixed-case> at <fixed-case>BLP</fixed-case>-2023 Task 2 Sentiment Classification in <fixed-case>B</fixed-case>angla Text</title>
       <author><first>Sourabrata</first><last>Mukherjee</last></author>
-      <author><first>Atul Kr.</first><last>Ojha</last><affiliation>University of Galway, Ireland, Insight SFI Research Centre for Data Analytics, DSI, University of Galway, Ireland and Panlingua Languague Processing LLP, India</affiliation></author>
+      <author id="atul-kr-ojha"><first>Atul Kr.</first><last>Ojha</last><affiliation>University of Galway, Ireland, Insight SFI Research Centre for Data Analytics, DSI, University of Galway, Ireland and Panlingua Languague Processing LLP, India</affiliation></author>
       <author><first>Ondřej</first><last>Dušek</last><affiliation>Charles University, Prague</affiliation></author>
       <pages>336-339</pages>
       <abstract>In this paper, we present the UFAL-ULD team’s system for the BLP Shared Task 2: Sentiment Analysis of Bangla Social Media Posts. The Task 2 involves classifying text into Positive, Negative, or Neutral sentiments. As a part of this task, we conducted a series of experiments with several pre-trained sequence classification models – XLM-RoBERTa, BanglaBERT, Bangla BERT Base and Multilingual BERT. Among these, our best-performing model was based on the XLM-RoBERTa-base architecture, which outperforms baseline models. Our system was ranked 19th among the 30 teams that participated in the task.</abstract>
diff --git a/data/xml/2023.bea.xml b/data/xml/2023.bea.xml
index 0f2bb5c54e..ab2275018b 100644
--- a/data/xml/2023.bea.xml
+++ b/data/xml/2023.bea.xml
@@ -55,7 +55,7 @@
       <author><first>Richard</first><last>Zhou</last><affiliation>Yale</affiliation></author>
       <author><first>Vanessa</first><last>Yan</last><affiliation>Yale</affiliation></author>
       <author><first>Swapnil</first><last>Hingmire</last><affiliation>Na</affiliation></author>
-      <author><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
       <pages>29-43</pages>
       <abstract>Effective human learning depends on a wide selection of educational materials that align with the learner’s current understanding of the topic. While the Internet has revolutionized human learning or education, a substantial resource accessibility barrier still exists. Namely, the excess of online information can make it challenging to navigate and discover high-quality learning materials in a given subject area. In this paper, we propose an automatic pipeline for building an educational resource discovery system for new domains. The pipeline consists of three main steps: resource searching, feature extraction, and resource classification. We first collect frequent queries from a set of seed documents, and search the web with these queries to obtain candidate resources such as lecture slides and introductory blog posts. Then, we process these resources for BERT-based features and meta-features. Next, we train a tree-based classifier to decide whether they are suitable learning materials. The pipeline achieves F1 scores of 0.94 and 0.82 when evaluated on two similar but novel domains. Finally, we demonstrate how this pipeline can benefit two applications: prerequisite chain learning and leading paragraph generation for surveys. We also release a corpus of 39,728 manually labeled web resources and 659 queries from NLP, Computer Vision (CV), and Statistics (STATS).</abstract>
       <url hash="b44e48f2">2023.bea-1.3</url>
@@ -66,7 +66,7 @@
     <paper id="4">
       <title>Using Learning Analytics for Adaptive Exercise Generation</title>
       <author><first>Tanja</first><last>Heck</last><affiliation>University of Tbingen</affiliation></author>
-      <author><first>Detmar</first><last>Meurers</last><affiliation>Universitt Tbingen</affiliation></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last><affiliation>Universitt Tbingen</affiliation></author>
       <pages>44-56</pages>
       <abstract>Single Choice exercises constitute a central exercise type for language learning in a learner’s progression from mere implicit exposure through input enhancement to productive language use in open exercises. Distractors that support learning in the individual zone of proximal development should not be derived from static analyses of learner corpora, but rely on dynamic learning analytics based on half-open exercises. We demonstrate how a system’s error diagnosis module can be re-used for automatic and dynamic generation and adaptation of distractors, as well as to inform exercise generation in terms of relevant learning goals and reasonable chunking in Jumbled Sentences exercises.</abstract>
       <url hash="207dd266">2023.bea-1.4</url>
@@ -233,7 +233,7 @@
       <author><first>Yuki</first><last>Okano</last><affiliation>Tokyo Institute of Technology</affiliation></author>
       <author><first>Kotaro</first><last>Funakoshi</last><affiliation>Tokyo Institute of Technology</affiliation></author>
       <author><first>Ryo</first><last>Nagata</last><affiliation>Konan University</affiliation></author>
-      <author><first>Manabu</first><last>Okumura</last><affiliation>Tokyo Institute of Technology</affiliation></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last><affiliation>Tokyo Institute of Technology</affiliation></author>
       <pages>184-194</pages>
       <abstract>This paper proposes a new second language learning task of generating a response including specified grammatical items. We consider two approaches: 1) fine-tuning a pre-trained language model (DialoGPT) by reinforcement learning and 2) providing a few-shot prompt to a large language model (GPT-3). For reinforcement learning, we examine combinations of three reward functions that consider grammatical items, diversity, and fluency. Our experiments confirm that both approaches can generate responses including the specified grammatical items and that it is crucial to consider fluency rather than diversity as the reward function.</abstract>
       <url hash="8f8d4018">2023.bea-1.16</url>
@@ -257,7 +257,7 @@
       <author><first>Mengsay</first><last>Loem</last><affiliation>Tokyo Institute of Technology</affiliation></author>
       <author><first>Masahiro</first><last>Kaneko</last><affiliation>Tokyo Institute of Technology</affiliation></author>
       <author><first>Sho</first><last>Takase</last><affiliation>Tokyo Institute of Technology</affiliation></author>
-      <author><first>Naoaki</first><last>Okazaki</last><affiliation>Tokyo Institute of Technology</affiliation></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last><affiliation>Tokyo Institute of Technology</affiliation></author>
       <pages>205-219</pages>
       <abstract>Large-scale pre-trained language models such as GPT-3 have shown remarkable performance across various natural language processing tasks. However, applying prompt-based methods with GPT-3 for Grammatical Error Correction (GEC) tasks and their controllability remains underexplored. Controllability in GEC is crucial for real-world applications, particularly in educational settings, where the ability to tailor feedback according to learner levels and specific error types can significantly enhance the learning process. This paper investigates the performance and controllability of prompt-based methods with GPT-3 for GEC tasks using zero-shot and few-shot setting. We explore the impact of task instructions and examples on GPT-3’s output, focusing on controlling aspects such as minimal edits, fluency edits, and learner levels. Our findings demonstrate that GPT-3 could effectively perform GEC tasks, outperforming existing supervised and unsupervised approaches. We also showed that GPT-3 could achieve controllability when appropriate task instructions and examples are given.</abstract>
       <url hash="428356ca">2023.bea-1.18</url>
@@ -280,7 +280,7 @@
       <title>Towards Extracting and Understanding the Implicit Rubrics of Transformer Based Automatic Essay Scoring Models</title>
       <author><first>James</first><last>Fiacco</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>David</first><last>Adamson</last><affiliation>Turnitin</affiliation></author>
-      <author><first>Carolyn</first><last>Rose</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rose</last><affiliation>Carnegie Mellon University</affiliation></author>
       <pages>232-241</pages>
       <abstract>By aligning the functional components derived from the activations of transformer models trained for AES with external knowledge such as human-understandable feature groups, the proposed method improves the interpretability of a Longformer Automatic Essay Scoring (AES) system and provides tools for performing such analyses on further neural AES systems. The analysis focuses on models trained to score essays based on organization, main idea, support, and language. The findings provide insights into the models’ decision-making processes, biases, and limitations, contributing to the development of more transparent and reliable AES systems.</abstract>
       <url hash="0cc60850">2023.bea-1.20</url>
@@ -329,7 +329,7 @@
     <paper id="24">
       <title>Predicting the Quality of Revisions in Argumentative Writing</title>
       <author><first>Zhexiong</first><last>Liu</last><affiliation>University of Pittsburgh</affiliation></author>
-      <author><first>Diane</first><last>Litman</last><affiliation>University of Pittsburgh</affiliation></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last><affiliation>University of Pittsburgh</affiliation></author>
       <author><first>Elaine</first><last>Wang</last><affiliation>RAND Corporation</affiliation></author>
       <author><first>Lindsay</first><last>Matsumura</last><affiliation>University of Pittsburgh</affiliation></author>
       <author><first>Richard</first><last>Correnti</last><affiliation>University of Pittsburgh</affiliation></author>
@@ -344,7 +344,7 @@
       <title>Reconciling Adaptivity and Task Orientation in the Student Dashboard of an Intelligent Language Tutoring System</title>
       <author><first>Leona</first><last>Colling</last><affiliation>University of Tbingen</affiliation></author>
       <author><first>Tanja</first><last>Heck</last><affiliation>University of Tbingen</affiliation></author>
-      <author><first>Detmar</first><last>Meurers</last><affiliation>Universitt Tbingen</affiliation></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last><affiliation>Universitt Tbingen</affiliation></author>
       <pages>288-299</pages>
       <abstract>In intelligent language tutoring systems, student dashboards should display the learning progress and performance and support the navigation through the learning content. Designing an interface that transparently offers information on students’ learning in relation to specific learning targets while linking to the overarching functional goal, that motivates and organizes the practice in current foreign language teaching, is challenging. This becomes even more difficult in systems that adaptively expose students to different learning material and individualize system interactions. If such a system is used in an ecologically valid setting of blended learning, this generates additional requirements to incorporate the needs of students and teachers for control and customizability.We present the conceptual design of a student dashboard for a task-based, user-adaptive intelligent language tutoring system intended for use in real-life English classes in secondary schools. We highlight the key challenges and spell out open questions for future research.</abstract>
       <url hash="cfe1075f">2023.bea-1.25</url>
@@ -368,7 +368,7 @@
       <title><fixed-case>SIGHT</fixed-case>: A Large Annotated Dataset on Student Insights Gathered from Higher Education Transcripts</title>
       <author><first>Rose</first><last>Wang</last><affiliation>Stanford</affiliation></author>
       <author><first>Pawan</first><last>Wirawarn</last><affiliation>Stanford</affiliation></author>
-      <author><first>Noah</first><last>Goodman</last><affiliation>Stanford University</affiliation></author>
+      <author id="noah-goodman"><first>Noah</first><last>Goodman</last><affiliation>Stanford University</affiliation></author>
       <author><first>Dorottya</first><last>Demszky</last><affiliation>Stanford University</affiliation></author>
       <pages>315-351</pages>
       <abstract>Lectures are a learning experience for both students and teachers. Students learn from teachers about the subject material, while teachers learn from students about how to refine their instruction. Unfortunately, online student feedback is unstructured and abundant, making it challenging for teachers to learn and improve. We take a step towards tackling this challenge. First, we contribute a dataset for studying this problem: SIGHT is a large dataset of 288 math lecture transcripts and 15,784 comments collected from the Massachusetts Institute of Technology OpenCourseWare (MIT OCW) YouTube channel. Second, we develop a rubric for categorizing feedback types using qualitative analysis. Qualitative analysis methods are powerful in uncovering domain-specific insights, however they are costly to apply to large data sources. To overcome this challenge, we propose a set of best practices for using large language models (LLMs) to cheaply classify the comments at scale. We observe a striking correlation between the model’s and humans’ annotation: Categories with consistent human annotations (0.9 inter-rater reliability, IRR) also display higher human-model agreement (0.7), while categories with less consistent human annotations (0.7-0.8 IRR) correspondingly demonstrate lower human-model agreement (0.3-0.5). These techniques uncover useful student feedback from thousands of comments, costing around $0.002 per comment. We conclude by discussing exciting future directions on using online student feedback and improving automated annotation techniques for qualitative research.</abstract>
@@ -492,7 +492,7 @@
     <paper id="37">
       <title>Hybrid Models for Sentence Readability Assessment</title>
       <author><first>Fengkai</first><last>Liu</last><affiliation>City University of Hong Kong</affiliation></author>
-      <author><first>John</first><last>Lee</last><affiliation>City University of Hong Kong</affiliation></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last><affiliation>City University of Hong Kong</affiliation></author>
       <pages>448-454</pages>
       <abstract>Automatic readability assessment (ARA) predicts how difficult it is for the reader to understand a text. While ARA has traditionally been performed at the passage level, there has been increasing interest in ARA at the sentence level, given its applications in downstream tasks such as text simplification and language exercise generation. Recent research has suggested the effectiveness of hybrid approaches for ARA, but they have yet to be applied on the sentence level. We present the first study that compares neural and hybrid models for sentence-level ARA. We conducted experiments on graded sentences from the Wall Street Journal (WSJ) and a dataset derived from the OneStopEnglish corpus. Experimental results show that both neural and hybrid models outperform traditional classifiers trained on linguistic features. Hybrid models obtained the best accuracy on both datasets, surpassing the previous best result reported on the WSJ dataset by almost 13% absolute.</abstract>
       <url hash="178dcaa2">2023.bea-1.37</url>
@@ -736,7 +736,7 @@
     <paper id="57">
       <title>Socratic Questioning of Novice Debuggers: A Benchmark Dataset and Preliminary Evaluations</title>
       <author><first>Erfan</first><last>Al-Hossami</last><affiliation>University of North Carolina at Charlotte</affiliation></author>
-      <author><first>Razvan</first><last>Bunescu</last><affiliation>University of North Carolina at Charlotte</affiliation></author>
+      <author id="razvan-bunescu"><first>Razvan</first><last>Bunescu</last><affiliation>University of North Carolina at Charlotte</affiliation></author>
       <author><first>Ryan</first><last>Teehan</last><affiliation>New York University</affiliation></author>
       <author><first>Laurel</first><last>Powell</last><affiliation>University of North Carolina at Charlotte</affiliation></author>
       <author><first>Khyati</first><last>Mahajan</last><affiliation>University of North Carolina at Charlotte</affiliation></author>
@@ -834,7 +834,7 @@
     <paper id="65">
       <title>The <fixed-case>ADAIO</fixed-case> System at the <fixed-case>BEA</fixed-case>-2023 Shared Task: Shared Task Generating <fixed-case>AI</fixed-case> Teacher Responses in Educational Dialogues</title>
       <author><first>Adaeze</first><last>Adigwe</last><affiliation>University of Edinburgh</affiliation></author>
-      <author id="zheng-yuan"><first>Zheng</first><last>Yuan</last><affiliation>Istituto Italiano di Tecnologia, Universit di Ferrara, Italy</affiliation></author>
+      <author><first>Zheng</first><last>Yuan</last><affiliation>Istituto Italiano di Tecnologia, Universit di Ferrara, Italy</affiliation></author>
       <pages>796-804</pages>
       <abstract>This paper presents the ADAIO team’s system entry in the Building Educational Applications (BEA) 2023 Shared Task on Generating AI Teacher Responses in Educational Dialogues. The task aims to assess the performance of state-of-the-art generative models as AI teachers in producing suitable responses within a student-teacher dialogue. Our system comprises evaluating various baseline models using OpenAI GPT-3 and designing diverse prompts to prompt the OpenAI models for teacher response generation. After the challenge, our system achieved second place by employing a few-shot prompt-based approach with the OpenAI text-davinci-003 model. The results highlight the few-shot learning capabilities of large-language models, particularly OpenAI’s GPT-3, in the role of AI teachers.</abstract>
       <url hash="52bfbf16">2023.bea-1.65</url>
diff --git a/data/xml/2023.bigpicture.xml b/data/xml/2023.bigpicture.xml
index 5425efd52c..1ac74db87f 100644
--- a/data/xml/2023.bigpicture.xml
+++ b/data/xml/2023.bigpicture.xml
@@ -88,10 +88,10 @@
     </paper>
     <paper id="8">
       <title>Transformers as Graph-to-Graph Models</title>
-      <author><first>James</first><last>Henderson</last><affiliation>Idiap Research Institute</affiliation></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last><affiliation>Idiap Research Institute</affiliation></author>
       <author><first>Alireza</first><last>Mohammadshahi</last></author>
       <author><first>Andrei</first><last>Coman</last></author>
-      <author><first>Lesly</first><last>Miculicich</last><affiliation>Google</affiliation></author>
+      <author id="lesly-miculicich-werlen"><first>Lesly</first><last>Miculicich</last><affiliation>Google</affiliation></author>
       <pages>93-107</pages>
       <abstract>We argue that Transformers are essentially graph-to-graph models, with sequences just being a special case. Attention weights are functionally equivalent to graph edges. Our Graph-to-Graph Transformer architecture makes this ability explicit, by inputting graph edges into the attention weight computations and predicting graph edges with attention-like functions, thereby integrating explicit graphs into the latent graphs learned by pretrained Transformers. Adding iterative graph refinement provides a joint embedding of input, output, and latent graphs, allowing non-autoregressive graph prediction to optimise the complete graph without any bespoke pipeline or decoding strategy. Empirical results show that this architecture achieves state-of-the-art accuracies for modelling a variety of linguistic structures, integrating very effectively with the latent linguistic representations learned by pretraining.</abstract>
       <url hash="ecc64868">2023.bigpicture-1.8</url>
@@ -103,7 +103,7 @@
       <author><first>Amanda</first><last>Bertsch</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Alex</first><last>Xie</last></author>
       <author><first>Graham</first><last>Neubig</last><affiliation>Carnegie Mellon University</affiliation></author>
-      <author><first>Matthew</first><last>Gormley</last><affiliation>School of Computer Science, Carnegie Mellon University and 3M</affiliation></author>
+      <author id="matthew-r-gormley"><first>Matthew</first><last>Gormley</last><affiliation>School of Computer Science, Carnegie Mellon University and 3M</affiliation></author>
       <pages>108-122</pages>
       <abstract>Minimum Bayes Risk (MBR) decoding is a method for choosing the outputs of a machine learning system based not on the output with the highest probability, but the output with the lowest risk (expected error) among multiple candidates. It is a simple but powerful method: for an additional cost at inference time, MBR provides reliable several-point improvements across metrics for a wide variety of tasks without any additional data or training. Despite this, MBR is not frequently applied in NLP works, and knowledge of the method itself is limited. We first provide an introduction to the method and the recent literature. We show that several recent methods that do not reference MBR can be written as special cases of MBR; this reformulation provides additional theoretical justification for the performance of these methods, explaining some results that were previously only empirical. We provide theoretical and empirical results about the effectiveness of various MBR variants and make concrete recommendations for the application of MBR in NLP models, including future directions in this area.</abstract>
       <url hash="da8e5cdb">2023.bigpicture-1.9</url>
diff --git a/data/xml/2023.bionlp.xml b/data/xml/2023.bionlp.xml
index ebad61b449..0676930fae 100644
--- a/data/xml/2023.bionlp.xml
+++ b/data/xml/2023.bionlp.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the 22nd Workshop on Biomedical Natural Language Processing and BioNLP Shared Tasks</booktitle>
       <editor><first>Dina</first><last>Demner-fushman</last></editor>
       <editor><first>Sophia</first><last>Ananiadou</last></editor>
-      <editor><first>Kevin</first><last>Cohen</last></editor>
+      <editor id="k-bretonnel-cohen"><first>Kevin</first><last>Cohen</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Toronto, Canada</address>
       <month>July</month>
@@ -21,7 +21,7 @@
       <title>Multi-Source (Pre-)Training for Cross-Domain Measurement, Unit and Context Extraction</title>
       <author><first>Yueling</first><last>Li</last><affiliation>Basf Se</affiliation></author>
       <author><first>Sebastian</first><last>Martschat</last><affiliation>Basf Se</affiliation></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last><affiliation>University of Mannheim</affiliation></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last><affiliation>University of Mannheim</affiliation></author>
       <pages>1-25</pages>
       <abstract>We present a cross-domain approach for automated measurement and context extraction based on pre-trained language models. We construct a multi-source, multi-domain corpus and train an end-to-end extraction pipeline. We then apply multi-source task-adaptive pre-training and fine-tuning to benchmark the cross-domain generalization capability of our model. Further, we conceptualize and apply a task-specific error analysis and derive insights for future work. Our results suggest that multi-source training leads to the best overall results, while single-source training yields the best results for the respective individual domain. While our setup is successful at extracting quantity values and units, more research is needed to improve the extraction of contextual entities. We make the cross-domain corpus used in this work available online.</abstract>
       <url hash="134cfc72">2023.bionlp-1.1</url>
@@ -44,7 +44,7 @@
       <title>Exploring Partial Knowledge Base Inference in Biomedical Entity Linking</title>
       <author><first>Hongyi</first><last>Yuan</last><affiliation>Tsinghua University</affiliation></author>
       <author><first>Keming</first><last>Lu</last><affiliation>University of Southern California</affiliation></author>
-      <author id="zheng-yuan"><first>Zheng</first><last>Yuan</last><affiliation>Alibaba Group</affiliation></author>
+      <author><first>Zheng</first><last>Yuan</last><affiliation>Alibaba Group</affiliation></author>
       <pages>37-49</pages>
       <abstract>Biomedical entity linking (EL) consists of named entity recognition (NER) and named entity disambiguation (NED). EL models are trained on corpora labeled by a predefined KB. However, it is a common scenario that only entities within a subset of the KB are precious to stakeholders. We name this scenario partial knowledge base inference; training an EL model with one KB and inferring on the part of it without further training. In this work, we give a detailed definition and evaluation procedures for this practically valuable but significantly understudied scenario and evaluate methods from three representative EL paradigms. We construct partial KB inference benchmarks and witness a catastrophic degradation in EL performance due to dramatically precision drop. Our findings reveal these EL paradigms can not correctly handle unlinkable mentions (NIL), so they are not robust to partial KB inference. We also propose two simple-and-effective redemption methods to combat the NIL issue with little computational overhead.</abstract>
       <url hash="0c955b58">2023.bionlp-1.3</url>
@@ -87,7 +87,7 @@
     <paper id="6">
       <title>Evaluating and Improving Automatic Speech Recognition using Severity</title>
       <author><first>Ryan</first><last>Whetten</last><affiliation>Boise State University</affiliation></author>
-      <author><first>Casey</first><last>Kennington</last><affiliation>Boise State University</affiliation></author>
+      <author id="casey-kennington"><first>Casey</first><last>Kennington</last><affiliation>Boise State University</affiliation></author>
       <pages>79-91</pages>
       <abstract>A common metric for evaluating Automatic Speech Recognition (ASR) is Word Error Rate (WER) which solely takes into account discrepancies at the word-level. Although useful, WER is not guaranteed to correlate well with human judgment or performance on downstream tasks that use ASR. Meaningful assessment of ASR mistakes becomes even more important in high-stake scenarios such as health-care. We propose 2 general measures to evaluate the severity of mistakes made by ASR systems, one based on sentiment analysis and another based on text embeddings. We evaluate these measures on simulated patient-doctor conversations using 5 ASR systems. Results show that these measures capture characteristics of ASR errors that WER does not. Furthermore, we train an ASR system incorporating severity and demonstrate the potential for using severity not only in the evaluation, but in the development of ASR. Advantages and limitations of this methodology are analyzed and discussed.</abstract>
       <url hash="468c4914">2023.bionlp-1.6</url>
@@ -109,7 +109,7 @@
       <title>Good Data, Large Data, or No Data? Comparing Three Approaches in Developing Research Aspect Classifiers for Biomedical Papers</title>
       <author><first>Shreya</first><last>Chandrasekhar</last><affiliation>Penn State University</affiliation></author>
       <author><first>Chieh-Yang</first><last>Huang</last><affiliation>Pennsylvania State University</affiliation></author>
-      <author><first>Ting-Hao</first><last>Huang</last><affiliation>Pennsylvania State University</affiliation></author>
+      <author id="ting-hao-huang"><first>Ting-Hao</first><last>Huang</last><affiliation>Pennsylvania State University</affiliation></author>
       <pages>103-113</pages>
       <abstract>The rapid growth of scientific publications, particularly during the COVID-19 pandemic, emphasizes the need for tools to help researchers efficiently comprehend the latest advancements. One essential part of understanding scientific literature is research aspect classification, which categorizes sentences in abstracts to Background, Purpose, Method, and Finding. In this study, we investigate the impact of different datasets on model performance for the crowd-annotated CODA-19 research aspect classification task. Specifically, we explore the potential benefits of using the large, automatically curated PubMed 200K RCT dataset and evaluate the effectiveness of large language models (LLMs), such as LLaMA, GPT-3, ChatGPT, and GPT-4. Our results indicate that using the PubMed 200K RCT dataset does not improve performance for the CODA-19 task. We also observe that while GPT-4 performs well, it does not outperform the SciBERT model fine-tuned on the CODA-19 dataset, emphasizing the importance of a dedicated and task-aligned datasets dataset for the target task.</abstract>
       <url hash="ca62769c">2023.bionlp-1.8</url>
@@ -200,7 +200,7 @@
       <author><first>Nesrine</first><last>Bannour</last><affiliation>Universite Paris-Saclay, CNRS, LISN</affiliation></author>
       <author><first>Bastien</first><last>Rance</last><affiliation>INSERM, Universite Paris Cit, Sorbonne Paris Cit, AP-HP, HEGP, HeKa, Inria Paris</affiliation></author>
       <author><first>Xavier</first><last>Tannier</last><affiliation>Sorbonne Universite, Inserm, LIMICS</affiliation></author>
-      <author><first>Aurelie</first><last>Neveol</last><affiliation>Universite Paris Saclay, CNRS, LISN</affiliation></author>
+      <author id="aurelie-neveol"><first>Aurelie</first><last>Neveol</last><affiliation>Universite Paris Saclay, CNRS, LISN</affiliation></author>
       <pages>191-205</pages>
       <abstract>Extracting temporal relations usually entails identifying and classifying the relation between two mentions. However, the definition of temporal mentions strongly depends on the text type and the application domain. Clinical text in particular is complex. It may describe events that occurred at different times, contain redundant information and a variety of domain-specific temporal expressions. In this paper, we propose a novel event-independent representation of temporal relations that is task-independent and, therefore, domain-independent. We are interested in identifying homogeneous text portions from a temporal standpoint and classifying the relation between each text portion and the document creation time. Temporal relation extraction is cast as a sequence labeling task and evaluated on oncology notes. We further evaluate our temporal representation by the temporal positioning of toxicity events of chemotherapy administrated to colon and lung cancer patients described in French clinical reports. An overall macro F-measure of 0.86 is obtained for temporal relation extraction by a neural token classification model trained on clinical texts written in French. Our results suggest that the toxicity event extraction task can be performed successfully by automatically identifying toxicity events and placing them within the patient timeline (F-measure .62). The proposed system has the potential to assist clinicians in the preparation of tumor board meetings.</abstract>
       <url hash="1c2e442a">2023.bionlp-1.16</url>
@@ -250,7 +250,7 @@
       <title>Multiple Evidence Combination for Fact-Checking of Health-Related Information</title>
       <author><first>Pritam</first><last>Deka</last><affiliation>Queen’s University Belfast</affiliation></author>
       <author><first>Anna</first><last>Jurek-Loughrey</last><affiliation>Queen’s University Belfast</affiliation></author>
-      <author><first>Deepak</first><last>P</last><affiliation>Queen’s University Belfast</affiliation></author>
+      <author id="deepak-p"><first>Deepak</first><last>P</last><affiliation>Queen’s University Belfast</affiliation></author>
       <pages>237-247</pages>
       <abstract>Fact-checking of health-related claims has become necessary in this digital age, where any information posted online is easily available to everyone. The most effective way to verify such claims is by using evidences obtained from reliable sources of medical knowledge, such as PubMed. Recent advances in the field of NLP have helped automate such fact-checking tasks. In this work, we propose a domain-specific BERT-based model using a transfer learning approach for the task of predicting the veracity of claim-evidence pairs for the verification of health-related facts. We also improvise on a method to combine multiple evidences retrieved for a single claim, taking into consideration conflicting evidences as well. We also show how our model can be exploited when labelled data is available and how back-translation can be used to augment data when there is data scarcity.</abstract>
       <url hash="325c7f20">2023.bionlp-1.20</url>
@@ -308,7 +308,7 @@
     <paper id="25">
       <title>Extracting Drug-Drug and Protein-Protein Interactions from Text using a Continuous Update of Tree-Transformers</title>
       <author><first>Sudipta</first><last>Singha Roy</last><affiliation>The University of Western Ontario</affiliation></author>
-      <author><first>Robert E.</first><last>Mercer</last><affiliation>The University of Western Ontario</affiliation></author>
+      <author id="robert-e-mercer"><first>Robert E.</first><last>Mercer</last><affiliation>The University of Western Ontario</affiliation></author>
       <pages>280-291</pages>
       <abstract>Understanding biological mechanisms requires determining mutual protein-protein interactions (PPI). Obtaining drug-drug interactions (DDI) from scientific articles provides important information about drugs. Extracting such medical entity interactions from biomedical articles is challenging due to complex sentence structures. To address this issue, our proposed model utilizes tree-transformers to generate the sentence representation first, and then a sentence-to-word update step to fine-tune the word embeddings which are again used by the tree-transformers to generate enriched sentence representations. Using the tree-transformers helps the model preserve syntactical information and provide semantic information. The fine-tuning provided by the continuous update step adds improved semantics to the representation of each sentence. Our model outperforms other prominent models with a significant performance boost on the five standard PPI corpora and a performance boost on the one benchmark DDI corpus that are used in our experiments.</abstract>
       <url hash="2fb10305">2023.bionlp-1.25</url>
@@ -342,7 +342,7 @@
     </paper>
     <paper id="28">
       <title>End-to-end clinical temporal information extraction with multi-head attention</title>
-      <author><first>Timothy</first><last>Miller</last><affiliation>Boston Children’s Hospital and Harvard Medical School</affiliation></author>
+      <author id="timothy-miller"><first>Timothy</first><last>Miller</last><affiliation>Boston Children’s Hospital and Harvard Medical School</affiliation></author>
       <author><first>Steven</first><last>Bethard</last><affiliation>University of Arizona</affiliation></author>
       <author><first>Dmitriy</first><last>Dligach</last><affiliation>Loyola University Chicago</affiliation></author>
       <author><first>Guergana</first><last>Savova</last><affiliation>Boston Children’s Hospital and Harvard Medical School</affiliation></author>
@@ -467,7 +467,7 @@
     <paper id="38">
       <title>Can Social Media Inform Dietary Approaches for Health Management? A Dataset and Benchmark for Low-Carb Diet</title>
       <author><first>Skyler</first><last>Zou</last><affiliation>Csiro</affiliation></author>
-      <author><first>Xiang</first><last>Dai</last><affiliation>Csiro</affiliation></author>
+      <author id="xiang-dai"><first>Xiang</first><last>Dai</last><affiliation>Csiro</affiliation></author>
       <author><first>Grant</first><last>Brinkworth</last><affiliation>Csiro</affiliation></author>
       <author><first>Pennie</first><last>Taylor</last><affiliation>Csiro</affiliation></author>
       <author><first>Sarvnaz</first><last>Karimi</last><affiliation>Csiro</affiliation></author>
@@ -479,9 +479,9 @@
     </paper>
     <paper id="39">
       <title>Promoting Fairness in Classification of Quality of Medical Evidence</title>
-      <author><first>Simon</first><last>Suster</last><affiliation>University of Melbourne</affiliation></author>
-      <author><first>Timothy</first><last>Baldwin</last><affiliation>Mbzuai</affiliation></author>
-      <author><first>Karin</first><last>Verspoor</last><affiliation>RMIT University</affiliation></author>
+      <author id="simon-suster"><first>Simon</first><last>Suster</last><affiliation>University of Melbourne</affiliation></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last><affiliation>Mbzuai</affiliation></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last><affiliation>RMIT University</affiliation></author>
       <pages>413-426</pages>
       <abstract>Automatically rating the quality of published research is a critical step in medical evidence synthesis. While several methods have been proposed, their algorithmic fairness has been overlooked even though significant risks may follow when such systems are deployed in biomedical contexts. In this work, we study fairness on two systems along two sensitive attributes, participant sex and medical area. In some cases, we find important inequalities, leading us to apply various debiasing methods. Upon examining an interplay of systems’ predictive performance, fairness, as well as medically critical selective classification capabilities and calibration performance, we find that fairness can sometimes improve through debiasing, but at a cost in other performance measures.</abstract>
       <url hash="04ef0842">2023.bionlp-1.39</url>
@@ -542,7 +542,7 @@
       <title>Overview of the Problem List Summarization (<fixed-case>P</fixed-case>rob<fixed-case>S</fixed-case>um) 2023 Shared Task on Summarizing Patients’ Active Diagnoses and Problems from Electronic Health Record Progress Notes</title>
       <author><first>Yanjun</first><last>Gao</last><affiliation>University of Wisconsin Madison</affiliation></author>
       <author><first>Dmitriy</first><last>Dligach</last><affiliation>Loyola University Chicago</affiliation></author>
-      <author><first>Timothy</first><last>Miller</last><affiliation>Boston Children’s Hospital and Harvard Medical School</affiliation></author>
+      <author id="timothy-miller"><first>Timothy</first><last>Miller</last><affiliation>Boston Children’s Hospital and Harvard Medical School</affiliation></author>
       <author><first>Majid</first><last>Afshar</last><affiliation>University of Wisconsin</affiliation></author>
       <pages>461-467</pages>
       <abstract>The BioNLP Workshop 2023 initiated the launch of a shared task on Problem List Summarization (ProbSum) in January 2023. The aim of this shared task is to attract future research efforts in building NLP models for real-world diagnostic decision support applications, where a system generating relevant and accurate diagnoses will augment the healthcare providers’ decision-making process and improve the quality of care for patients. The goal for participants is to develop models that generated a list of diagnoses and problems using input from the daily care notes collected from the hospitalization of critically ill patients. Eight teams submitted their final systems to the shared task leaderboard. In this paper, we describe the tasks, datasets, evaluation metrics, and baseline systems. Additionally, the techniques and results of the evaluation of the different approaches tried by the participating teams are summarized.</abstract>
@@ -555,7 +555,7 @@
       <author><first>Tomas</first><last>Goldsack</last><affiliation>University of Sheffield</affiliation></author>
       <author><first>Zheheng</first><last>Luo</last><affiliation>University of Manchester</affiliation></author>
       <author><first>Qianqian</first><last>Xie</last><affiliation>University of Manchester</affiliation></author>
-      <author><first>Carolina</first><last>Scarton</last><affiliation>University of Sheffield</affiliation></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last><affiliation>University of Sheffield</affiliation></author>
       <author><first>Matthew</first><last>Shardlow</last><affiliation>Manchester Metropolitan University</affiliation></author>
       <author><first>Sophia</first><last>Ananiadou</last><affiliation>University of Manchester</affiliation></author>
       <author><first>Chenghua</first><last>Lin</last><affiliation>Department of Computer Science, University of Sheffield</affiliation></author>
@@ -593,7 +593,7 @@
     <paper id="47">
       <title><fixed-case>D</fixed-case>eakin<fixed-case>NLP</fixed-case> at <fixed-case>P</fixed-case>rob<fixed-case>S</fixed-case>um 2023: Clinical Progress Note Summarization with Rules and Language <fixed-case>M</fixed-case>odels<fixed-case>C</fixed-case>linical Progress Note Summarization with Rules and Languague Models</title>
       <author><first>Ming</first><last>Liu</last><affiliation>Deakin University</affiliation></author>
-      <author id="dan-zhang"><first>Dan</first><last>Zhang</last><affiliation>Deakin University</affiliation></author>
+      <author><first>Dan</first><last>Zhang</last><affiliation>Deakin University</affiliation></author>
       <author><first>Weicong</first><last>Tan</last><affiliation>Monash University</affiliation></author>
       <author><first>He</first><last>Zhang</last><affiliation>Cnpiec Kexin Ltd</affiliation></author>
       <pages>491-496</pages>
@@ -618,13 +618,13 @@
       <author><first>Hao</first><last>Li</last><affiliation>University of Manchester</affiliation></author>
       <author><first>Yuping</first><last>Wu</last><affiliation>University of Manchester</affiliation></author>
       <author><first>Viktor</first><last>Schlegel</last><affiliation>Asus Aics</affiliation></author>
-      <author><first>Riza</first><last>Batista-Navarro</last><affiliation>Department of Computer Science, The University of Manchester</affiliation></author>
+      <author id="riza-theresa-batista-navarro"><first>Riza</first><last>Batista-Navarro</last><affiliation>Department of Computer Science, The University of Manchester</affiliation></author>
       <author><first>Thanh-Tung</first><last>Nguyen</last><affiliation>Asus</affiliation></author>
       <author><first>Abhinav</first><last>Ramesh Kashyap</last><affiliation>Asus Aics</affiliation></author>
       <author><first>Xiao-Jun</first><last>Zeng</last><affiliation>University of Manchester</affiliation></author>
-      <author><first>Daniel</first><last>Beck</last><affiliation>University of Melbourne</affiliation></author>
+      <author id="daniel-beck"><first>Daniel</first><last>Beck</last><affiliation>University of Melbourne</affiliation></author>
       <author><first>Stefan</first><last>Winkler</last><affiliation>National University of Singapore</affiliation></author>
-      <author><first>Goran</first><last>Nenadic</last><affiliation>University of Manchester</affiliation></author>
+      <author id="goran-nenadic"><first>Goran</first><last>Nenadic</last><affiliation>University of Manchester</affiliation></author>
       <pages>503-509</pages>
       <abstract>Medical progress notes play a crucial role in documenting a patient’s hospital journey, including his or her condition, treatment plan, and any updates for healthcare providers. Automatic summarisation of a patient’s problems in the form of a “problem list” can aid stakeholders in understanding a patient’s condition, reducing workload and cognitive bias. BioNLP 2023 Shared Task 1A focusses on generating a list of diagnoses and problems from the provider’s progress notes during hospitalisation. In this paper, we introduce our proposed approach to this task, which integrates two complementary components. One component employs large language models (LLMs) for data augmentation; the other is an abstractive summarisation LLM with a novel pre-training objective for generating the patients’ problems summarised as a list. Our approach was ranked second among all submissions to the shared task. The performance of our model on the development and test datasets shows that our approach is more robust on unknown data, with an improvement of up to 3.1 points over the same size of the larger model.</abstract>
       <url hash="9463c73b">2023.bionlp-1.49</url>
@@ -852,7 +852,7 @@
     <paper id="68">
       <title><fixed-case>CSIRO</fixed-case> <fixed-case>D</fixed-case>ata61 Team at <fixed-case>B</fixed-case>io<fixed-case>L</fixed-case>ay<fixed-case>S</fixed-case>umm Task 1: Lay Summarisation of Biomedical Research Articles Using Generative Models</title>
       <author><first>Mong Yuan</first><last>Sim</last><affiliation>The University of Adelaide</affiliation></author>
-      <author><first>Xiang</first><last>Dai</last><affiliation>CSIRO Data61</affiliation></author>
+      <author id="xiang-dai"><first>Xiang</first><last>Dai</last><affiliation>CSIRO Data61</affiliation></author>
       <author><first>Maciej</first><last>Rybinski</last><affiliation>Csiro</affiliation></author>
       <author><first>Sarvnaz</first><last>Karimi</last><affiliation>Csiro</affiliation></author>
       <pages>629-635</pages>
diff --git a/data/xml/2023.blackboxnlp.xml b/data/xml/2023.blackboxnlp.xml
index 23a4f61420..31d1eb5af3 100644
--- a/data/xml/2023.blackboxnlp.xml
+++ b/data/xml/2023.blackboxnlp.xml
@@ -7,7 +7,7 @@
       <editor><first>Sophie</first><last>Hao</last></editor>
       <editor><first>Jaap</first><last>Jumelet</last></editor>
       <editor><first>Najoung</first><last>Kim</last></editor>
-      <editor><first>Arya</first><last>McCarthy</last></editor>
+      <editor id="arya-d-mccarthy"><first>Arya</first><last>McCarthy</last></editor>
       <editor><first>Hosein</first><last>Mohebbi</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Singapore</address>
@@ -52,7 +52,7 @@
       <author><first>Chandan</first><last>Singh</last></author>
       <author><first>John X.</first><last>Morris</last></author>
       <author><first>Jyoti</first><last>Aneja</last></author>
-      <author><first>Alexander</first><last>Rush</last></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last></author>
       <author><first>Jianfeng</first><last>Gao</last></author>
       <pages>31–55</pages>
       <abstract>Large language models (LLMs) have displayed an impressive ability to harness natural language to perform complex tasks. We explore whether we can leverage this ability to find and explain patterns in data. Specifically, given a pre-trained LLM and data examples, we apply interpretable autoprompting (iPrompt) to generate a natural language string explaining the data. iPrompt iteratively generates explanations with an LLM and reranks them based on their performance when used as a prompt. Experiments on a wide range of datasets, from synthetic mathematics to natural language understanding, show that iPrompt can yield meaningful insights by accurately finding dataset explanations that are human-interpretable. Moreover, iPrompt is reasonably efficient, as it does not require access to model gradients and works with relatively small models (e.g. ~6 billion parameters rather than &gt;=100 billion). Finally, experiments with scientific datasets show the potential for iPrompt to aid in scientific discovery.</abstract>
@@ -116,7 +116,7 @@
     <paper id="9">
       <title>Unveiling Multilinguality in Transformer Models: Exploring Language Specificity in Feed-Forward Networks</title>
       <author><first>Sunit</first><last>Bhattacharya</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>120–126</pages>
       <abstract>Recent research suggests that the feed-forward module within Transformers can be viewed as a collection of key-value memories, where the keys learn to capture specific patterns from the input based on the training examples. The values then combine the output from the ‘memories’ of the keys to generate predictions about the next token. This leads to an incremental process of prediction that gradually converges towards the final token choice near the output layers. This interesting perspective raises questions about how multilingual models might leverage this mechanism. Specifically, for autoregressive models trained on two or more languages, do all neurons (across layers) respond equally to all languages? No! Our hypothesis centers around the notion that during pre-training, certain model parameters learn strong language-specific features, while others learn more language-agnostic (shared across languages) features. To validate this, we conduct experiments utilizing parallel corpora of two languages that the model was initially pre-trained on. Our findings reveal that the layers closest to the network’s input or output tend to exhibit more language-specific behaviour compared to the layers in the middle.</abstract>
       <url hash="1ebb287d">2023.blackboxnlp-1.9</url>
@@ -136,7 +136,7 @@
     <paper id="11">
       <title>Investigating Semantic Subspaces of Transformer Sentence Embeddings through Linear Structural Probing</title>
       <author><first>Dmitry</first><last>Nikolaev</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <pages>142–154</pages>
       <abstract>The question of what kinds of linguistic information are encoded in different layers of Transformer-based language models is of considerable interest for the NLP community. Existing work, however, has overwhelmingly focused on word-level representations and encoder-only language models with the masked-token training objective. In this paper, we present experiments with semantic structural probing, a method for studying sentence-level representations via finding a subspace of the embedding space that provides suitable task-specific pairwise distances between data-points. We apply our method to language models from different families (encoder-only, decoder-only, encoder-decoder) and of different sizes in the context of two tasks, semantic textual similarity and natural-language inference. We find that model families differ substantially in their performance and layer dynamics, but that the results are largely model-size invariant.</abstract>
       <url hash="3813f932">2023.blackboxnlp-1.11</url>
@@ -157,7 +157,7 @@
       <title>Enhancing Interpretability Using Human Similarity Judgements to Prune Word Embeddings</title>
       <author><first>Natalia</first><last>Flechas Manrique</last></author>
       <author><first>Wanqian</first><last>Bao</last></author>
-      <author><first>Aurelie</first><last>Herbelot</last></author>
+      <author id="aurelie-herbelot"><first>Aurelie</first><last>Herbelot</last></author>
       <author><first>Uri</first><last>Hasson</last></author>
       <pages>169–179</pages>
       <abstract>Interpretability methods in NLP aim to provide insights into the semantics underlying specific system architectures. Focusing on word embeddings, we present a supervised-learning method that, for a given domain (e.g., sports, professions), identifies a subset of model features that strongly improve prediction of human similarity judgments. We show this method keeps only 20-40% of the original embeddings, for 8 independent semantic domains, and that it retains different feature sets across domains. We then present two approaches for interpreting the semantics of the retained features. The first obtains the scores of the domain words (co-hyponyms) on the first principal component of the retained embeddings, and extracts terms whose co-occurrence with the co-hyponyms tracks these scores’ profile. This analysis reveals that humans differentiate e.g. sports based on how gender-inclusive and international they are. The second approach uses the retained sets as variables in a probing task that predicts values along 65 semantically annotated dimensions for a dataset of 535 words. The features retained for professions are best at predicting cognitive, emotional and social dimensions, whereas features retained for fruits or vegetables best predict the gustation (taste) dimension. We discuss implications for alignment between AI systems and human knowledge.</abstract>
@@ -168,7 +168,7 @@
     <paper id="14">
       <title>When Your Language Model Cannot <fixed-case>E</fixed-case>ven Do Determiners Right: Probing for Anti-Presuppositions and the Maximize Presupposition! Principle</title>
       <author><first>Judith</first><last>Sieker</last></author>
-      <author><first>Sina</first><last>Zarrieß</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
       <pages>180–198</pages>
       <abstract>The increasing interest in probing the linguistic capabilities of large language models (LLMs) has long reached the area of semantics and pragmatics, including the phenomenon of presuppositions. In this study, we investigate a phenomenon that, however, has not yet been investigated, i.e., the phenomenon of anti-presupposition and the principle that accounts for it, the Maximize Presupposition! principle (MP!). Through an experimental investigation using psycholinguistic data and four open-source BERT model variants, we explore how language models handle different anti-presuppositions and whether they apply the MP! principle in their predictions. Further, we examine whether fine-tuning with Natural Language Inference data impacts adherence to the MP! principle. Our findings reveal that LLMs tend to replicate context-based n-grams rather than follow the MP! principle, with fine-tuning not enhancing their adherence. Notably, our results further indicate a striking difficulty of LLMs to correctly predict determiners, in relatively simple linguistic contexts.</abstract>
       <url hash="7e7c4e80">2023.blackboxnlp-1.14</url>
@@ -188,7 +188,7 @@
       <title>The Self-Contained Negation Test Set</title>
       <author><first>David</first><last>Kletz</last></author>
       <author><first>Pascal</first><last>Amsili</last></author>
-      <author><first>Marie</first><last>Candito</last></author>
+      <author id="marie-candito"><first>Marie</first><last>Candito</last></author>
       <pages>212–221</pages>
       <abstract>Several methodologies have recently been proposed to evaluate the ability of Pretrained Language Models (PLMs) to interpret negation. In this article, we build on Gubelmann and Handschuh (2022), which studies the modification of PLMs’ predictions as a function of the polarity of inputs, in English. Crucially, this test uses “self-contained” inputs ending with a masked position: depending on the polarity of a verb in the input, a particular token is either semantically ruled out or allowed at the masked position. By replicating Gubelmann and Handschuh (2022) experiments, we have uncovered flaws that weaken the conclusions that can be drawn from this test. We thus propose an improved version, the Self-Contained Neg Test, which is more controlled, more systematic, and entirely based on examples forming minimal pairs varying only in the presence or absence of verbal negation in English. When applying our test to the roberta and bert base and large models, we show that only roberta-large shows trends that match the expectations, while bert-base is mostly insensitive to negation. For all the tested models though, in a significant number of test instances the top-1 prediction remains the token that is semantically forbidden by the context, which shows how much room for improvement remains for a proper treatment of the negation phenomenon.</abstract>
       <url hash="cd4cd42d">2023.blackboxnlp-1.16</url>
@@ -231,8 +231,8 @@
       <title>Investigating the Encoding of Words in <fixed-case>BERT</fixed-case>’s Neurons Using Feature Textualization</title>
       <author><first>Tanja</first><last>Baeumel</last></author>
       <author><first>Soniya</first><last>Vijayakumar</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
-      <author><first>Guenter</first><last>Neumann</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
+      <author id="gunter-neumann"><first>Guenter</first><last>Neumann</last></author>
       <author><first>Simon</first><last>Ostermann</last></author>
       <pages>261–270</pages>
       <abstract>Pretrained language models (PLMs) form the basis of most state-of-the-art NLP technologies. Nevertheless, they are essentially black boxes: Humans do not have a clear understanding of what knowledge is encoded in different parts of the models, especially in individual neurons. A contrast is in computer vision, where feature visualization provides a decompositional interpretability technique for neurons of vision models. Activation maximization is used to synthesize inherently interpretable visual representations of the information encoded in individual neurons. Our work is inspired by this but presents a cautionary tale on the interpretability of single neurons, based on the first large-scale attempt to adapt activation maximization to NLP, and, more specifically, large PLMs. We propose feature textualization, a technique to produce dense representations of neurons in the PLM word embedding space. We apply feature textualization to the BERT model to investigate whether the knowledge encoded in individual neurons can be interpreted and symbolized. We find that the produced representations can provide insights about the knowledge encoded in individual neurons, but that individual neurons do not represent clear-cut symbolic units of language such as words. Additionally, we use feature textualization to investigate how many neurons are needed to encode words in BERT.</abstract>
@@ -263,7 +263,7 @@
     <paper id="23">
       <title>Not Wacky vs. Definitely Wacky: A Study of Scalar Adverbs in Pretrained Language Models</title>
       <author><first>Isabelle</first><last>Lorge</last></author>
-      <author><first>Janet B.</first><last>Pierrehumbert</last></author>
+      <author id="janet-pierrehumbert"><first>Janet B.</first><last>Pierrehumbert</last></author>
       <pages>296–316</pages>
       <abstract>Vector-space models of word meaning all assume that words occurring in similar contexts have similar meanings. Words that are similar in their topical associations but differ in their logical force tend to emerge as semantically close – creating well-known challenges for NLP applications that involve logical reasoning. Pretrained language models such as BERT, RoBERTa, GPT-2, and GPT-3 hold the promise of performing better on logical tasks than classic static word embeddings. However, reports are mixed about their success. Here, we advance this discussion through a systematic study of scalar adverbs, an under-explored class of words with strong logical force. Using three different tasks involving both naturalistic social media data and constructed examples, we investigate the extent to which BERT, RoBERTa, GPT-2 and GPT-3 exhibit knowledge of these common words. We ask: 1) Do the models distinguish amongst the three semantic categories of MODALITY, FREQUENCY and DEGREE? 2) Do they have implicit representations of full scales from maximally negative to maximally positive? 3) How do word frequency and contextual factors impact model performance? We find that despite capturing some aspects of logical meaning, the models still have obvious shortfalls.</abstract>
       <url hash="1f5216bc">2023.blackboxnlp-1.23</url>
@@ -272,7 +272,7 @@
     </paper>
     <paper id="24">
       <title>Rigorously Assessing Natural Language Explanations of Neurons</title>
-      <author id="jing-huang-stanford"><first>Jing</first><last>Huang</last></author>
+      <author><first>Jing</first><last>Huang</last></author>
       <author><first>Atticus</first><last>Geiger</last></author>
       <author><first>Karel</first><last>D’Oosterlinck</last></author>
       <author><first>Zhengxuan</first><last>Wu</last></author>
@@ -318,7 +318,7 @@
       <title>Systematic Generalization by Finetuning? Analyzing Pretrained Language Models Using Constituency Tests</title>
       <author><first>Aishik</first><last>Chakraborty</last></author>
       <author><first>Jackie CK</first><last>Cheung</last></author>
-      <author><first>Timothy J.</first><last>O’Donnell</last></author>
+      <author id="timothy-odonnell"><first>Timothy J.</first><last>O’Donnell</last></author>
       <pages>357–366</pages>
       <abstract>Constituents are groups of words that behave as a syntactic unit. Many linguistic phenomena (e.g., question formation, diathesis alternations) require the manipulation and rearrangement of constituents in a sentence. In this paper, we investigate how different finetuning setups affect the ability of pretrained sequence-to-sequence language models such as BART and T5 to replicate constituency tests — transformations that involve manipulating constituents in a sentence. We design multiple evaluation settings by varying the combinations of constituency tests and sentence types that a model is exposed to during finetuning. We show that models can replicate a linguistic transformation on a specific type of sentence that they saw during finetuning, but performance degrades substantially in other settings, showing a lack of systematic generalization. These results suggest that models often learn to manipulate sentences at a surface level unrelated to the constituent-level syntactic structure, for example by copying the first word of a sentence. These results may partially explain the brittleness of pretrained language models in downstream tasks.</abstract>
       <url hash="3a168b5b">2023.blackboxnlp-1.27</url>
diff --git a/data/xml/2023.bsnlp.xml b/data/xml/2023.bsnlp.xml
index 4f951d7165..2352748f29 100644
--- a/data/xml/2023.bsnlp.xml
+++ b/data/xml/2023.bsnlp.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the 9th Workshop on Slavic Natural Language Processing 2023 (SlavicNLP 2023)</booktitle>
       <editor><first>Jakub</first><last>Piskorski</last></editor>
       <editor><first>Michał</first><last>Marcińczuk</last></editor>
-      <editor><first>Preslav</first><last>Nakov</last></editor>
+      <editor id="preslav-nakov"><first>Preslav</first><last>Nakov</last></editor>
       <editor><first>Maciej</first><last>Ogrodniczuk</last></editor>
       <editor><first>Senja</first><last>Pollak</last></editor>
       <editor><first>Pavel</first><last>Přibáň</last></editor>
@@ -258,7 +258,7 @@
     <paper id="20">
       <title>Large Language Models for Multilingual <fixed-case>S</fixed-case>lavic Named Entity Linking</title>
       <author><first>Rinalds</first><last>Vīksna</last><affiliation>University of Latvia</affiliation></author>
-      <author><first>Inguna</first><last>Skadiņa</last><affiliation>Tilde/ Institute of Mathematics and Computer Science, University of Latvia</affiliation></author>
+      <author id="inguna-skadina"><first>Inguna</first><last>Skadiņa</last><affiliation>Tilde/ Institute of Mathematics and Computer Science, University of Latvia</affiliation></author>
       <author><first>Daiga</first><last>Deksne</last><affiliation>Tilde; University of Latvia</affiliation></author>
       <author><first>Roberts</first><last>Rozis</last><affiliation>Tilde</affiliation></author>
       <pages>172-178</pages>
diff --git a/data/xml/2023.c3nlp.xml b/data/xml/2023.c3nlp.xml
index a5e085c6e9..2ce7c8cfcd 100644
--- a/data/xml/2023.c3nlp.xml
+++ b/data/xml/2023.c3nlp.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the First Workshop on Cross-Cultural Considerations in NLP (C3NLP)</booktitle>
       <editor><first>Sunipa</first><last>Dev</last></editor>
       <editor><first>Vinodkumar</first><last>Prabhakaran</last></editor>
-      <editor><first>David Ifeoluwa</first><last>Adelani</last></editor>
+      <editor id="david-ifeoluwa-adelani"><first>David Ifeoluwa</first><last>Adelani</last></editor>
       <editor><first>Dirk</first><last>Hovy</last></editor>
       <editor><first>Luciana</first><last>Benotti</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -22,7 +22,7 @@
     <paper id="1">
       <title>Varepsilon kú mask: Integrating <fixed-case>Y</fixed-case>orùbá cultural greetings into machine translation</title>
       <author><first>Idris</first><last>Akinade</last><affiliation>University of Ibadan</affiliation></author>
-      <author><first>Jesujoba O.</first><last>Alabi</last><affiliation>Saarland University</affiliation></author>
+      <author id="jesujoba-alabi"><first>Jesujoba O.</first><last>Alabi</last><affiliation>Saarland University</affiliation></author>
       <author><first>David Ifeoluwa</first><last>Adelani</last><affiliation>University College London</affiliation></author>
       <author><first>Clement</first><last>Odoje</last><affiliation>University of Ibadan</affiliation></author>
       <author><first>Dietrich</first><last>Klakow</last><affiliation>Saarland University</affiliation></author>
@@ -73,7 +73,7 @@
       <title>Hate Speech Classifiers are Culturally Insensitive</title>
       <author><first>Nayeon</first><last>Lee</last><affiliation>Kaist</affiliation></author>
       <author><first>Chani</first><last>Jung</last><affiliation>School of Computing, KAIST</affiliation></author>
-      <author><first>Alice</first><last>Oh</last><affiliation>Kaist</affiliation></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last><affiliation>Kaist</affiliation></author>
       <pages>35-46</pages>
       <abstract>Increasingly, language models and machine translation are becoming valuable tools to help people communicate with others from diverse cultural backgrounds. However, current language models lack cultural awareness because they are trained on data representing only the culture within the dataset. This presents a problem in the context of hate speech classification, where cultural awareness is especially critical. This study aims to quantify the cultural insensitivity of three monolingual (Korean, English, Arabic) hate speech classifiers by evaluating their performance on translated datasets from the other two languages. Our research has revealed that hate speech classifiers evaluated on datasets from other cultures yield significantly lower F1 scores, up to almost 50%. In addition, they produce considerably higher false negative rates, with a magnitude up to five times greater, demonstrating the extent of the cultural gap. The study highlights the severity of cultural insensitivity of language models in hate speech classification.</abstract>
       <url hash="18a3d63c">2023.c3nlp-1.5</url>
@@ -85,7 +85,7 @@
       <title><fixed-case>MMT</fixed-case>: A Multilingual and Multi-Topic <fixed-case>I</fixed-case>ndian Social Media Dataset</title>
       <author><first>Dwip</first><last>Dalal</last><affiliation>Indian Institute Of Technology Gandhinagar</affiliation></author>
       <author><first>Vivek</first><last>Srivastava</last><affiliation>TCS research</affiliation></author>
-      <author id="mayank-singh"><first>Mayank</first><last>Singh</last><affiliation>IIT Gandhinagar</affiliation></author>
+      <author><first>Mayank</first><last>Singh</last><affiliation>IIT Gandhinagar</affiliation></author>
       <pages>47-52</pages>
       <abstract>Social media plays a significant role in cross-cultural communication. A vast amount of this occurs in code-mixed and multilingual form, posing a significant challenge to Natural Language Processing (NLP) tools for processing such information, like language identification, topic modeling, and named-entity recognition. To address this, we introduce a large-scale multilingual and multi-topic dataset MMT collected from Twitter (1.7 million Tweets), encompassing 13 coarse-grained and 63 fine-grained topics in the Indian context. We further annotate a subset of 5,346 tweets from the MMT dataset with various Indian languages and their code-mixed counterparts. Also, we demonstrate that the currently existing tools fail to capture the linguistic diversity in MMT on two downstream tasks, i.e., topic modeling and language identification. To facilitate future research, we will make the anonymized and annotated dataset available in the public domain.</abstract>
       <url hash="65fe525e">2023.c3nlp-1.6</url>
@@ -137,7 +137,7 @@
     </paper>
     <paper id="10">
       <title>Bias assessment for experts in discrimination, not in computer science</title>
-      <author><first>Laura</first><last>Alonso Alemany</last><affiliation>Universidad Nacional de Cordoba</affiliation></author>
+      <author id="laura-alonso-alemany"><first>Laura</first><last>Alonso Alemany</last><affiliation>Universidad Nacional de Cordoba</affiliation></author>
       <author><first>Luciana</first><last>Benotti</last><affiliation>Universidad Nacional de Cordoba</affiliation></author>
       <author><first>Hernán</first><last>Maina</last><affiliation>Facultad de Matemática, Astronomía, Física y Computación - Universidad Nacional de Córdoba - CONICET</affiliation></author>
       <author><first>Lucía</first><last>Gonzalez</last><affiliation>Facultad de Matemática, Astronomía, Física y Computación - Universidad Nacional de Córdoba</affiliation></author>
diff --git a/data/xml/2023.calcs.xml b/data/xml/2023.calcs.xml
index b961388c43..09f21e1bfd 100644
--- a/data/xml/2023.calcs.xml
+++ b/data/xml/2023.calcs.xml
@@ -3,11 +3,11 @@
   <volume id="1" ingest-date="2023-12-07" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 6th Workshop on Computational Approaches to Linguistic Code-Switching</booktitle>
-      <editor><first>Genta</first><last>Winata</last></editor>
+      <editor id="genta-indra-winata"><first>Genta</first><last>Winata</last></editor>
       <editor><first>Sudipta</first><last>Kar</last></editor>
       <editor><first>Marina</first><last>Zhukova</last></editor>
       <editor><first>Thamar</first><last>Solorio</last></editor>
-      <editor><first>Mona</first><last>Diab</last></editor>
+      <editor id="mona-diab"><first>Mona</first><last>Diab</last></editor>
       <editor><first>Sunayana</first><last>Sitaram</last></editor>
       <editor><first>Monojit</first><last>Choudhury</last></editor>
       <editor><first>Kalika</first><last>Bali</last></editor>
@@ -113,8 +113,8 @@
     <paper id="8">
       <title>Multilingual self-supervised speech representations improve the speech recognition of low-resource <fixed-case>A</fixed-case>frican languages with codeswitching</title>
       <author><first>Tolulope</first><last>Ogunremi</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <pages>83-88</pages>
       <abstract>While many speakers of low-resource languages regularly code-switch between their languages and other regional languages or English, datasets of codeswitched speech are too small to train bespoke acoustic models from scratch or do language model rescoring. Here we propose finetuning self-supervised speech representations such as wav2vec 2.0 XLSR to recognize code-switched data. We find that finetuning self-supervised multilingual representations and augmenting them with n-gram language models trained from transcripts reduces absolute word error rates by up to 20% compared to baselines of hybrid models trained from scratch on code-switched data. Our findings suggest that in circumstances with limited training data finetuning self-supervised representations is a better performing and viable solution.</abstract>
       <url hash="6860faac">2023.calcs-1.8</url>
diff --git a/data/xml/2023.case.xml b/data/xml/2023.case.xml
index d0157bece7..b16f8d6a64 100644
--- a/data/xml/2023.case.xml
+++ b/data/xml/2023.case.xml
@@ -3,8 +3,8 @@
   <volume id="1" ingest-date="2023-10-31" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 6th Workshop on Challenges and Applications of Automated Extraction of Socio-political Events from Text</booktitle>
-      <editor><first>Ali</first><last>Hürriyetoğlu</last></editor>
-      <editor><first>Hristo</first><last>Tanev</last></editor>
+      <editor id="ali-hurriyetoglu"><first>Ali</first><last>Hürriyetoğlu</last></editor>
+      <editor id="hristo-tanev"><first>Hristo</first><last>Tanev</last></editor>
       <editor><first>Vanni</first><last>Zavarella</last></editor>
       <editor><first>Reyyan</first><last>Yeniterzi</last></editor>
       <editor><first>Erdem</first><last>Yörük</last></editor>
@@ -87,7 +87,7 @@
       <author><first>Jesus</first><last>Armenta-Segura</last></author>
       <author><first>César Jesús</first><last>Núñez-Prado</last></author>
       <author><first>Grigori Olegovich</first><last>Sidorov</last></author>
-      <author><first>Alexander</first><last>Gelbukh</last></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last></author>
       <author><first>Rodrigo Francisco</first><last>Román-Godínez</last></author>
       <pages>53–59</pages>
       <abstract>Hate speech detection during times of war has become crucial in recent years, as evident with the recent Russo-Ukrainian war. In this paper, we present our submissions for both subtasks from the Multimodal Hate Speech Event Detec- tion contest at CASE 2023, RANLP 2023. We used pre-trained BERT models in both submis- sion, achieving a F1 score of 0.809 in subtask A, and F1 score of 0.567 in subtask B. In the first subtask, our result was not far from the first place, which led us to realize the lower impact of images in real-life memes about feel- ings, when compared with the impact of text. However, we observed a higher importance of images when targeting hateful feelings towards a specific entity. The source code to reproduce our results can be found at the github repository https://github.com/JesusASmx/OmeteotlAtCASE2023</abstract>
@@ -239,7 +239,7 @@
       <author><first>Vanni</first><last>Zavarella</last></author>
       <author><first>Ali</first><last>Hurriyetoglu</last></author>
       <author><first>Bertrand</first><last>De Longueville</last></author>
-      <author><first>Leonida</first><last>Della Rocca</last></author>
+      <author id="leonida-della-rocca"><first>Leonida</first><last>Della Rocca</last></author>
       <pages>160–166</pages>
       <abstract>The purpose of the shared task 2 at the Challenges and Applications of Automated Extraction of Socio-political Events from Text (CASE) 2023 workshop was to test the abilities of the participating models and systems to detect and geocode armed conflicts events in social media messages from Telegram channels reporting on the Russo Ukrainian war. The evaluation followed an approach which was introduced in CASE 2021 (Giorgi et al., 2021): For each system we consider the correlation of the spatio-temporal distribution of its detected events and the events identified for the same period in the ACLED (Armed Conflict Location and Event Data Project) database (Raleigh et al., 2010). We use ACLED for the ground truth, since it is a well established standard in the field of event extraction and political trend analysis, which relies on human annotators for the encoding of security events using a fine grained taxonomy. Two systems participated in this shared task, we report in this paper on both the shared task and the participating systems.</abstract>
       <url hash="6d7e5b74">2023.case-1.21</url>
diff --git a/data/xml/2023.cawl.xml b/data/xml/2023.cawl.xml
index c7930115dc..f38c0025aa 100644
--- a/data/xml/2023.cawl.xml
+++ b/data/xml/2023.cawl.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the Workshop on Computation and Written Language (CAWL 2023)</booktitle>
       <editor><first>Kyle</first><last>Gorman</last></editor>
-      <editor><first>Richard</first><last>Sproat</last></editor>
+      <editor id="richard-sproat"><first>Richard</first><last>Sproat</last></editor>
       <editor><first>Brian</first><last>Roark</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Toronto, Canada</address>
@@ -116,7 +116,7 @@
     </paper>
     <paper id="10">
       <title>Decipherment of Lost Ancient Scripts as Combinatorial Optimisation Using Coupled Simulated Annealing</title>
-      <author><first>Fabio</first><last>Tamburini</last><affiliation>FICLIT - University of Bologna</affiliation></author>
+      <author id="fabio-tamburini"><first>Fabio</first><last>Tamburini</last><affiliation>FICLIT - University of Bologna</affiliation></author>
       <pages>82-91</pages>
       <abstract>This paper presents a new approach to the ancient scripts decipherment problem based on combinatorial optimisation and coupled simulated annealing, an advanced non-convex optimisation procedure. Solutions are encoded by using k-permutations allowing for null, oneto-many, and many-to-one mappings between signs. The proposed system is able to produce enhanced results in cognate identification when compared to the state-of-the-art systems on standard evaluation benchmarks used in literature.</abstract>
       <url hash="740b9cbf">2023.cawl-1.10</url>
diff --git a/data/xml/2023.ccl.xml b/data/xml/2023.ccl.xml
index a98e3b6cc8..cfe3f0a626 100644
--- a/data/xml/2023.ccl.xml
+++ b/data/xml/2023.ccl.xml
@@ -21,7 +21,7 @@
       <author><first>Siyuan</first><last>Wang</last><variant script="hani"><first>思远</first><last>王</last></variant></author>
       <author><first>Zhongyu</first><last>Wei</last><variant script="hani"><first>忠钰</first><last>魏</last></variant></author>
       <author><first>Qin</first><last>Chen</last><variant script="hani"><first>琴</first><last>陈</last></variant></author>
-      <author><first>Xuanjing</first><last>Huang</last><variant script="hani"><first>萱菁</first><last>黄</last></variant></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><variant script="hani"><first>萱菁</first><last>黄</last></variant></author>
       <pages>1–16</pages>
       <abstract>“本文提出了一种基于多跳推理链的对抗攻击方法,通过向输入文本中加入对抗性的攻击文本,并测试问答模型在干扰数据下生成答案的准确性,以检测问答模型真正执行多跳推理的能力和可解释性。该方法首先从输入文本中抽取从问题实体到答案实体的推理链,并基于推理链的特征把多跳问题分为了不同的推理类型,提出了一个模型来自动化实现问题拆解和推理类型预测,然后根据推理类型对原问题进行修改来构造攻击干扰句。实验对多个多跳问答模型进行了对抗攻击测试,所有模型的性能都显著下降,验证了该攻击方法的有效性以及目前问答模型存在的不足;向原训练集中加入对抗样本进行增强训练后,模型性能均有所回升,证明了本对抗增强训练方法可以提升模型的鲁棒性。”</abstract>
       <url hash="8347a9e0">2023.ccl-1.1</url>
@@ -269,7 +269,7 @@
       <author><first>Jiali</first><last>Zuo</last><variant script="hani"><first>家莉</first><last>左</last></variant></author>
       <author><first>Anquan</first><last>Jie</last><variant script="hani"><first>安全</first><last>揭</last></variant></author>
       <author><first>Wenbin</first><last>Luo</last><variant script="hani"><first>文兵</first><last>罗</last></variant></author>
-      <author><first>Mingwen</first><last>Wang</last><variant script="hani"><first>明文</first><last>王</last></variant></author>
+      <author id="mingwen-wang"><first>Mingwen</first><last>Wang</last><variant script="hani"><first>明文</first><last>王</last></variant></author>
       <pages>229–240</pages>
       <abstract>“古籍命名实体识别对于古籍实体知识库与语料库的建设具有显著的现实意义。目前古籍命名实体识别的研究较少,主要原因是缺乏足够的训练语料。本文从《资治通鉴》入手,人工构建了一份古籍命名实体识别数据集,以此展开对古籍命名实体识别任务的研究。针对古籍文本多以单字表意且存在大量省略的语言特点,本文采用预训练词向量作为词典信息,充分利用其中蕴涵的词汇信息。实验表明,这种方法可以有效处理古籍文本中人名实体识别的问题。”</abstract>
       <url hash="85bbb1ea">2023.ccl-1.21</url>
@@ -282,7 +282,7 @@
       <author><first>Jiali</first><last>Zuo</last><variant script="hani"><first>家莉</first><last>左</last></variant></author>
       <author><first>Xueqiang</first><last>Ceng</last><variant script="hani"><first>雪强</first><last>曾</last></variant></author>
       <author><first>Zhongying</first><last>Wan</last><variant script="hani"><first>中英</first><last>万</last></variant></author>
-      <author><first>Mingwen</first><last>Wang</last><variant script="hani"><first>明文</first><last>王</last></variant></author>
+      <author id="mingwen-wang"><first>Mingwen</first><last>Wang</last><variant script="hani"><first>明文</first><last>王</last></variant></author>
       <pages>241–252</pages>
       <abstract>“实体关系抽取是信息抽取领域中一项重要任务,目前实体关系抽取任务主要聚焦于英文和现代汉语领域,关于古汉语领域的数据集构建和方法的研究目前却较少。针对这一问题,本文在研究了开源的《资治通鉴》语料后,人工构建了一个古汉语实体关系数据集,并设计了一种结合全局对应矩阵和相对位置信息的实体关系联合抽取方法。最后通过在本文构建的数据集上进行实验,证明了该方法在古汉语实体关系抽取任务上的有效性。”</abstract>
       <url hash="5c7e03d8">2023.ccl-1.22</url>
@@ -292,7 +292,7 @@
     <paper id="23">
       <title>数字人文视域下的青藏高原文旅知识图谱构建研究——以塔尔寺为例(Research on the Construction of Cultural and Tourism Knowledge Atlas on the Qinghai-Tibet Plateau from the Perspective of Digital <fixed-case>H</fixed-case>umanity——<fixed-case>A</fixed-case> case study of Kumbum Monastery)</title>
       <author><first>Xinhao</first><last>Li</last><variant script="hani"><first>鑫豪</first><last>李</last></variant></author>
-      <author><first>Weina</first><last>Zhao</last><variant script="hani"><first>维纳</first><last>赵</last></variant></author>
+      <author id="weina-zhao"><first>Weina</first><last>Zhao</last><variant script="hani"><first>维纳</first><last>赵</last></variant></author>
       <author><first>Wanyi</first><last>Zhao</last><variant script="hani"><first>婉亦</first><last>赵</last></variant></author>
       <author><first>Chaoqun</first><last>Li</last><variant script="hani"><first>超群</first><last>李</last></variant></author>
       <pages>253–263</pages>
@@ -331,7 +331,7 @@
       <title>中国社会道德变化模型与发展动因探究——基于70年《人民日报》的计量与分析 (The Model of Moral Change and Motivation in <fixed-case>C</fixed-case>hinese Society ——<fixed-case>T</fixed-case>he Vocabulary Analysis of the 70-year ”People’s Daily”)</title>
       <author><first>Hongrui</first><last>Wang</last><variant script="hani"><first>弘睿</first><last>王</last></variant></author>
       <author><first>Dong</first><last>Yu</last><variant script="hani"><first>东</first><last>于</last></variant></author>
-      <author><first>Pengyuan</first><last>Liu</last><variant script="hani"><first>鹏远</first><last>刘</last></variant></author>
+      <author id="pengyuan-liu"><first>Pengyuan</first><last>Liu</last><variant script="hani"><first>鹏远</first><last>刘</last></variant></author>
       <author><first>Liying</first><last>Ceng</last><variant script="hani"><first>立英</first><last>曾</last></variant></author>
       <pages>289–299</pages>
       <abstract>“社会道德的历时变迁研究具有重要意义。通过观察语言使用与道德变迁的历时联系,能够帮助描绘社会道德的变化趋势和发展规律、把握社会道德动态、推进道德建设。目前缺少从词汇角度、利用计算手段对大规模历时语料进行系统、全面的社会道德变迁研究。基于此,该文提出道德主题词历时计量模型,通过计量指标对1946-2015共70年的《人民日报》语料进行了历时计算与分析,观察了70年社会道德主题词的使用选择与变化。研究结果发现,道德词汇的历时使用与社会道德之间存在互动关系,反映出70年中国社会道德的历时变革与发展情况。”</abstract>
@@ -343,7 +343,7 @@
       <title>动词视角下的汉语性别表征研究——基于多语体语料库与依存分析(Gendered Representation in <fixed-case>C</fixed-case>hinese via Verbal Analysis —<fixed-case>B</fixed-case>ased on a Multi-register Corpus and Dependency Parsing)</title>
       <author><first>Yingshi</first><last>Chen</last><variant script="hani"><first>颖诗</first><last>陈</last></variant></author>
       <author><first>Dong</first><last>Yu</last><variant script="hani"><first>东</first><last>于</last></variant></author>
-      <author><first>Pengyuan</first><last>Liu</last><variant script="hani"><first>鹏远</first><last>刘</last></variant></author>
+      <author id="pengyuan-liu"><first>Pengyuan</first><last>Liu</last><variant script="hani"><first>鹏远</first><last>刘</last></variant></author>
       <pages>301–314</pages>
       <abstract>“动作是反映性别社会化的重要形式,研究汉语中动词的性别表征,可以找到语言构建不同性别身份的路径,即所采用的方式、形式。本文以依存句法关系为抓手,在四种语体的语料中抽取出和不同性别词构成依存结构的动词,统计出有显著性别差异的动词,并根据性别词充当的句子成分,结合语义进行了定量和定性分析。总体来看,大部分汉语动词表征是中性的,能体现性别的动词是少数,汉语作为一种承载着中华智慧且具有深厚文化底蕴的语言,对性别的表征是中立且平等的,这也体现出了我国的性别平等观念。而在表征性别的动词中,能看到构建男性和女性身份的两种不同路径。显著表征女性的动词在不同语体的语料中均多于显著表征男性的,但是表征男性的动词的语义分布则更为均衡,体现了“男性默认-女性专门”。在司法动词上,女性常常作为暴力行为的受害者,同时施害者男性却隐身了,体现了筜男性主宰笭女性顺从笢。不同语体的动词在构建性别时体现了不同的功能,新闻塑造了较为传统的性别规范,传统和网络文学以不同的形式打破了固有的性别规范。”</abstract>
       <url hash="e0100242">2023.ccl-1.27</url>
@@ -412,7 +412,7 @@
       <author><first>Siyi</first><last>Tang</last><variant script="hani"><first>思怡</first><last>汤</last></variant></author>
       <author><first>Shike</first><last>Wang</last><variant script="hani"><first>诗可</first><last>王</last></variant></author>
       <author><first>Dong</first><last>Yu</last><variant script="hani"><first>东</first><last>于</last></variant></author>
-      <author><first>Pengyuan</first><last>Liu</last><variant script="hani"><first>鹏远</first><last>刘</last></variant></author>
+      <author id="pengyuan-liu"><first>Pengyuan</first><last>Liu</last><variant script="hani"><first>鹏远</first><last>刘</last></variant></author>
       <pages>364–376</pages>
       <abstract>“现有的文本分级阅读研究往往从文本可读性的角度出发,以离散的文本难度等级的形式为读者推荐阅读书目。目前,仍缺少一种研究读者在阅读过程中产生的多方面、深层次阅读体验的体系结构。对此,我们调研了读者在阅读中文篇章过程中产生的不同阅读体验,提出了中文篇章多维度阅读体验的量化体系。我们将阅读过程中呈现的连续性的阅读体验归纳为多种类别,并在此基础上构建了中文篇章多维度阅读体验数据集。同时,我们探究了以大规模语言模型为基础的ChatGPT对阅读体验的量化能力,发现其虽具备强大的信息抽取和语义理解能力,在阅读体验的量化上却表现不佳。但我们发现大规模语言模型所蕴含的能力能够以知识蒸馏的方式协助深层属性的量化,基于此,我们实现了大规模语言模型增强的中文篇章多维阅读体验量化模型。模型在各维度阅读体验上的平均F1值达到0.72,高于ChatGPT的Fewshot结果0.48。”</abstract>
       <url hash="b8ad3ca0">2023.ccl-1.32</url>
@@ -426,7 +426,7 @@
       <author><first>Wenqi</first><last>Ding</last><variant script="hani"><first>文琪</first><last>丁</last></variant></author>
       <author><first>Yumeng</first><last>Fu</last><variant script="hani"><first>雨濛</first><last>付</last></variant></author>
       <author><first>Lili</first><last>Shan</last><variant script="hani"><first>丽莉</first><last>单</last></variant></author>
-      <author><first>Bingquan</first><last>Liu</last><variant script="hani"><first>秉权</first><last>刘</last></variant></author>
+      <author id="bingquan-liu"><first>Bingquan</first><last>Liu</last><variant script="hani"><first>秉权</first><last>刘</last></variant></author>
       <pages>377–387</pages>
       <abstract>“推特机器人检测任务的目标是判断一个推特账号是真人账号还是自动化机器人账号。随着自动化账号拟人算法的快速迭代,检测最新类别的自动化账号变得越来越困难。最近,预训练语言模型在自然语言生成任务和其他任务上表现出了出色的水平,当这些预训练语言模型被用于推特文本自动生成时,会为推特机器人检测任务带来很大挑战。本文研究发现,困惑度偏低和相似度偏高的现象始终出现在不同时代自动化账号的历史推文中,且此现象不受预训练语言模型的影响。针对这些发现,本文提出了一种抽取历史推文困惑度特征和相似度特征的方法,并设计了一种特征融合策略,以更好地将这些新特征应用于已有的算法模型。本文方法在选定数据集上的性能超越了已有的基准方法,并在人民网主办、传播内容认知全国重点实验室承办的社交机器人识别大赛上取得了冠军。”</abstract>
       <url hash="fabdfbbc">2023.ccl-1.33</url>
@@ -1384,7 +1384,7 @@
     </paper>
     <paper id="21">
       <title><fixed-case>CCL</fixed-case>23-Eval 任务6总结报告:电信网络诈骗案件分类(Overview of <fixed-case>CCL</fixed-case>23-Eval Task 6: Telecom Network Fraud Case Classification)</title>
-      <author><first>Chengjie</first><last>Sun</last><variant script="hani"><first>承杰</first><last>孙</last></variant></author>
+      <author id="cheng-jie-sun"><first>Chengjie</first><last>Sun</last><variant script="hani"><first>承杰</first><last>孙</last></variant></author>
       <author><first>Jie</first><last>Ji</last><variant script="hani"><first>杰</first><last>纪</last></variant></author>
       <author><first>Boyue</first><last>Shang</last><variant script="hani"><first>伯乐</first><last>尚</last></variant></author>
       <author><first>Binguan</first><last>Liu</last><variant script="hani"><first>秉权</first><last>刘</last></variant></author>
@@ -1465,7 +1465,7 @@
     <paper id="27">
       <title><fixed-case>CCL</fixed-case>23-Eval 任务7总结报告: 汉语学习者文本纠错(Overview of <fixed-case>CCL</fixed-case>23-Eval Task: <fixed-case>C</fixed-case>hinese Learner Text Correction)</title>
       <author><first>Hongxiang</first><last>Chang</last></author>
-      <author id="yang-liu-blcu"><first>Yang</first><last>Liu</last></author>
+      <author id="yang-liu-ict"><first>Yang</first><last>Liu</last></author>
       <author><first>Meng</first><last>Xu</last></author>
       <author><first>Yingying</first><last>Wang</last></author>
       <author><first>Cunliang</first><last>Kong</last></author>
diff --git a/data/xml/2023.cl.xml b/data/xml/2023.cl.xml
index 99e222aec8..6c073d938b 100644
--- a/data/xml/2023.cl.xml
+++ b/data/xml/2023.cl.xml
@@ -31,7 +31,7 @@
       <author><first>Deborah</first><last>Ferreira</last></author>
       <author><first>Magdalena</first><last>Wysocka</last></author>
       <author><first>Dónal</first><last>Landers</last></author>
-      <author><first>André</first><last>Freitas</last></author>
+      <author id="andre-freitas"><first>André</first><last>Freitas</last></author>
       <doi>10.1162/coli_a_00462</doi>
       <abstract>Specialized transformers-based models (such as BioBERT and BioMegatron) are adapted for the biomedical domain based on publicly available biomedical corpora. As such, they have the potential to encode large-scale biological knowledge. We investigate the encoding and representation of biological knowledge in these models, and its potential utility to support inference in cancer precision medicine—namely, the interpretation of the clinical significance of genomic alterations. We compare the performance of different transformer baselines; we use probing to determine the consistency of encodings for distinct entities; and we use clustering methods to compare and contrast the internal properties of the embeddings for genes, variants, drugs, and diseases. We show that these models do indeed encode biological knowledge, although some of this is lost in fine-tuning for specific tasks. Finally, we analyze how the models behave with regard to biases and imbalances in the dataset.</abstract>
       <pages>73–115</pages>
@@ -53,7 +53,7 @@
     <paper id="4">
       <title>Annotation Error Detection: Analyzing the Past and Present for a More Coherent Future</title>
       <author><first>Jan-Christoph</first><last>Klie</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <author><first>Iryna</first><last>Gurevych</last></author>
       <doi>10.1162/coli_a_00464</doi>
       <abstract>Annotated data is an essential ingredient in natural language processing for training and evaluating machine learning models. It is therefore very desirable for the annotations to be of high quality. Recent work, however, has shown that several popular datasets contain a surprising number of annotation errors or inconsistencies. To alleviate this issue, many methods for annotation error detection have been devised over the years. While researchers show that their approaches work well on their newly introduced datasets, they rarely compare their methods to previous work or on the same datasets. This raises strong concerns on methods’ general performance and makes it difficult to assess their strengths and weaknesses. We therefore reimplement 18 methods for detecting potential annotation errors and evaluate them on 9 English datasets for text classification as well as token and span labeling. In addition, we define a uniform evaluation setup including a new formalization of the annotation error detection task, evaluation protocol, and general best practices. To facilitate future research and reproducibility, we release our datasets and implementations in an easy-to-use and open source software package.1</abstract>
@@ -67,8 +67,8 @@
       <author><first>Aikaterini-Lida</first><last>Kalouli</last></author>
       <author><first>Hai</first><last>Hu</last></author>
       <author><first>Alexander F.</first><last>Webb</last></author>
-      <author><first>Lawrence S.</first><last>Moss</last></author>
-      <author><first>Valeria</first><last>de Paiva</last></author>
+      <author id="lawrence-s-moss"><first>Lawrence S.</first><last>Moss</last></author>
+      <author id="valeria-de-paiva"><first>Valeria</first><last>de Paiva</last></author>
       <doi>10.1162/coli_a_00465</doi>
       <abstract>Against the backdrop of the ever-improving Natural Language Inference (NLI) models, recent efforts have focused on the suitability of the current NLI datasets and on the feasibility of the NLI task as it is currently approached. Many of the recent studies have exposed the inherent human disagreements of the inference task and have proposed a shift from categorical labels to human subjective probability assessments, capturing human uncertainty. In this work, we show how neither the current task formulation nor the proposed uncertainty gradient are entirely suitable for solving the NLI challenges. Instead, we propose an ordered sense space annotation, which distinguishes between logical and common-sense inference. One end of the space captures non-sensical inferences, while the other end represents strictly logical scenarios. In the middle of the space, we find a continuum of common-sense, namely, the subjective and graded opinion of a “person on the street.” To arrive at the proposed annotation scheme, we perform a careful investigation of the SICK corpus and we create a taxonomy of annotation issues and guidelines. We re-annotate the corpus with the proposed annotation scheme, utilizing four symbolic inference systems, and then perform a thorough evaluation of the scheme by fine-tuning and testing commonly used pre-trained language models on the re-annotated SICK within various settings. We also pioneer a crowd annotation of a small portion of the MultiNLI corpus, showcasing that it is possible to adapt our scheme for annotation by non-experts on another NLI corpus. Our work shows the efficiency and benefits of the proposed mechanism and opens the way for a careful NLI task refinement.</abstract>
       <pages>199–243</pages>
@@ -146,7 +146,7 @@
       <title>Onception: Active Learning with Expert Advice for Real World Machine Translation</title>
       <author><first>Vânia</first><last>Mendonça</last></author>
       <author><first>Ricardo</first><last>Rei</last></author>
-      <author><first>Luísa</first><last>Coheur</last></author>
+      <author id="luisa-coheur"><first>Luísa</first><last>Coheur</last></author>
       <author><first>Alberto</first><last>Sardinha</last></author>
       <doi>10.1162/coli_a_00473</doi>
       <abstract>Active learning can play an important role in low-resource settings (i.e., where annotated data is scarce), by selecting which instances may be more worthy to annotate. Most active learning approaches for Machine Translation assume the existence of a pool of sentences in a source language, and rely on human annotators to provide translations or post-edits, which can still be costly. In this article, we apply active learning to a real-world human-in-the-loop scenario in which we assume that: (1) the source sentences may not be readily available, but instead arrive in a stream; (2) the automatic translations receive feedback in the form of a rating, instead of a correct/edited translation, since the human-in-the-loop might be a user looking for a translation, but not be able to provide one. To tackle the challenge of deciding whether each incoming pair source–translations is worthy to query for human feedback, we resort to a number of stream-based active learning query strategies. Moreover, because we do not know in advance which query strategy will be the most adequate for a certain language pair and set of Machine Translation models, we propose to dynamically combine multiple strategies using prediction with expert advice. Our experiments on different language pairs and feedback settings show that using active learning allows us to converge on the best Machine Translation systems with fewer human interactions. Furthermore, combining multiple strategies using prediction with expert advice outperforms several individual active learning strategies with even fewer interactions, particularly in partial feedback settings.</abstract>
@@ -158,7 +158,7 @@
       <title>Reflection of Demographic Background on Word Usage</title>
       <author><first>Aparna</first><last>Garimella</last></author>
       <author><first>Carmen</first><last>Banea</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <doi>10.1162/coli_a_00475</doi>
       <abstract>The availability of personal writings in electronic format provides researchers in the fields of linguistics, psychology, and computational linguistics with an unprecedented chance to study, on a large scale, the relationship between language use and the demographic background of writers, allowing us to better understand people across different demographics. In this article, we analyze the relation between language and demographics by developing cross-demographic word models to identify words with usage bias, or words that are used in significantly different ways by speakers of different demographics. Focusing on three demographic categories, namely, location, gender, and industry, we identify words with significant usage differences in each category and investigate various approaches of encoding a word’s usage, allowing us to identify language aspects that contribute to the differences. Our word models using topic-based features achieve at least 20% improvement in accuracy over the baseline for all demographic categories, even for scenarios with classification into 15 categories, illustrating the usefulness of topic-based features in identifying word usage differences. Further, we note that for location and industry, topics extracted from immediate context are the best predictors of word usages, hinting at the importance of word meaning and its grammatical function for these demographics, while for gender, topics obtained from longer contexts are better predictors for word usage.</abstract>
       <pages>373–394</pages>
@@ -170,7 +170,7 @@
       <author><first>Jiehang</first><last>Zeng</last></author>
       <author><first>Jianhan</first><last>Xu</last></author>
       <author><first>Xiaoqing</first><last>Zheng</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <doi>10.1162/coli_a_00476</doi>
       <abstract>Very recently, few certified defense methods have been developed to provably guarantee the robustness of a text classifier to adversarial synonym substitutions. However, all the existing certified defense methods assume that the defenders have been informed of how the adversaries generate synonyms, which is not a realistic scenario. In this study, we propose a certifiably robust defense method by randomly masking a certain proportion of the words in an input text, in which the above unrealistic assumption is no longer necessary. The proposed method can defend against not only word substitution-based attacks, but also character-level perturbations. We can certify the classifications of over 50% of texts to be robust to any perturbation of five words on AGNEWS, and two words on SST2 dataset. The experimental results show that our randomized smoothing method significantly outperforms recently proposed defense methods across multiple datasets under different attack algorithms.</abstract>
       <pages>395–427</pages>
@@ -224,10 +224,10 @@
     <paper id="2">
       <title>Neural Data-to-Text Generation Based on Small Datasets: Comparing the Added Value of Two Semi-Supervised Learning Approaches on Top of a Large Language Model</title>
       <author><first>Chris</first><last>van der Lee</last></author>
-      <author><first>Thiago</first><last>Castro Ferreira</last></author>
+      <author id="thiago-castro-ferreira"><first>Thiago</first><last>Castro Ferreira</last></author>
       <author><first>Chris</first><last>Emmery</last></author>
       <author><first>Travis J.</first><last>Wiltshire</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <doi>10.1162/coli_a_00484</doi>
       <abstract>This study discusses the effect of semi-supervised learning in combination with pretrained language models for data-to-text generation. It is not known whether semi-supervised learning is still helpful when a large-scale language model is also supplemented. This study aims to answer this question by comparing a data-to-text system only supplemented with a language model, to two data-to-text systems that are additionally enriched by a data augmentation or a pseudo-labeling semi-supervised learning approach. Results show that semi-supervised learning results in higher scores on diversity metrics. In terms of output quality, extending the training set of a data-to-text system with a language model using the pseudo-labeling approach did increase text quality scores, but the data augmentation approach yielded similar scores to the system without training set extension. These results indicate that semi-supervised learning approaches can bolster output quality and diversity, even when a language model is also present.</abstract>
       <pages>555–611</pages>
@@ -254,7 +254,7 @@
       <author><first>Muhammad Reza</first><last>Qorib</last></author>
       <author><first>Hannan</first><last>Cao</last></author>
       <author><first>Hwee Tou</first><last>Ng</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <doi>10.1162/coli_a_00478</doi>
       <abstract>Grammatical Error Correction (GEC) is the task of automatically detecting and correcting errors in text. The task not only includes the correction of grammatical errors, such as missing prepositions and mismatched subject–verb agreement, but also orthographic and semantic errors, such as misspellings and word choice errors, respectively. The field has seen significant progress in the last decade, motivated in part by a series of five shared tasks, which drove the development of rule-based methods, statistical classifiers, statistical machine translation, and finally neural machine translation systems, which represent the current dominant state of the art. In this survey paper, we condense the field into a single article and first outline some of the linguistic challenges of the task, introduce the most popular datasets that are available to researchers (for both English and other languages), and summarize the various methods and techniques that have been developed with a particular focus on artificial error generation. We next describe the many different approaches to evaluation as well as concerns surrounding metric reliability, especially in relation to subjective human judgments, before concluding with an overview of recent progress and suggestions for future work and remaining challenges. We hope that this survey will serve as a comprehensive resource for researchers who are new to the field or who want to be kept apprised of recent developments.</abstract>
       <pages>643–701</pages>
@@ -268,7 +268,7 @@
       <author><first>John</first><last>Pavlopoulos</last></author>
       <author><first>Vanessa</first><last>Stefanak</last></author>
       <author><first>Andrew</first><last>Senior</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <author><first>John</first><last>Bodel</last></author>
       <author><first>Jonathan</first><last>Prag</last></author>
       <author><first>Ion</first><last>Androutsopoulos</last></author>
@@ -281,7 +281,7 @@
     </paper>
     <paper id="6">
       <title>Dimensions of Explanatory Value in <fixed-case>NLP</fixed-case> Models</title>
-      <author><first>Kees</first><last>van Deemter</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
       <doi>10.1162/coli_a_00480</doi>
       <abstract>Performance on a dataset is often regarded as the key criterion for assessing NLP models. I argue for a broader perspective, which emphasizes scientific explanation. I draw on a long tradition in the philosophy of science, and on the Bayesian approach to assessing scientific theories, to argue for a plurality of criteria for assessing NLP models. To illustrate these ideas, I compare some recent models of language production with each other. I conclude by asking what it would mean for institutional policies if the NLP community took these ideas onboard.</abstract>
       <pages>749–761</pages>
@@ -298,9 +298,9 @@
     </paper>
     <paper id="8">
       <title>Obituary: <fixed-case>Y</fixed-case>orick <fixed-case>W</fixed-case>ilks</title>
-      <author><first>John</first><last>Tait</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="john-tait"><first>John</first><last>Tait</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
       <doi>10.1162/coli_a_00485</doi>
       <pages>767–772</pages>
       <url hash="1eb3e896">2023.cl-3.8</url>
@@ -333,10 +333,10 @@
       <author><first>Vitaly</first><last>Nikolaev</last></author>
       <author><first>Matthew</first><last>Lamm</last></author>
       <author><first>Lora</first><last>Aroyo</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <author><first>Dipanjan</first><last>Das</last></author>
       <author><first>Slav</first><last>Petrov</last></author>
-      <author><first>Gaurav Singh</first><last>Tomar</last></author>
+      <author id="gaurav-singh-tomar"><first>Gaurav Singh</first><last>Tomar</last></author>
       <author><first>Iulia</first><last>Turc</last></author>
       <author><first>David</first><last>Reitter</last></author>
       <doi>10.1162/coli_a_00486</doi>
@@ -373,7 +373,7 @@
       <title>Languages Through the Looking Glass of <fixed-case>BPE</fixed-case> Compression</title>
       <author><first>Ximena</first><last>Gutierrez-Vasques</last></author>
       <author><first>Christian</first><last>Bentz</last></author>
-      <author><first>Tanja</first><last>Samardžić</last></author>
+      <author id="tanja-samardzic"><first>Tanja</first><last>Samardžić</last></author>
       <doi>10.1162/coli_a_00489</doi>
       <abstract>Byte-pair encoding (BPE) is widely used in NLP for performing subword tokenization. It uncovers redundant patterns for compressing the data, and hence alleviates the sparsity problem in downstream applications. Subwords discovered during the first merge operations tend to have the most substantial impact on the compression of texts. However, the structural underpinnings of this effect have not been analyzed cross-linguistically. We conduct in-depth analyses across 47 typologically diverse languages and three parallel corpora, and thereby show that the types of recurrent patterns that have the strongest impact on compression are an indicator of morphological typology. For languages with richer inflectional morphology there is a preference for highly productive subwords on the early merges, while for languages with less inflectional morphology, idiosyncratic subwords are more prominent. Both types of patterns contribute to efficient compression. Counter to the common perception that BPE subwords are not linguistically relevant, we find patterns across languages that resemble those described in traditional typology. We thus propose a novel way to characterize languages according to their BPE subword properties, inspired by the notion of morphological productivity in linguistics. This allows us to have language vectors that encode typological knowledge induced from raw text. Our approach is easily applicable to a wider range of languages and texts, as it does not require annotated data or any external linguistic knowledge. We discuss its potential contributions to quantitative typology and multilingual NLP.</abstract>
       <pages>943–1001</pages>
diff --git a/data/xml/2023.clasp.xml b/data/xml/2023.clasp.xml
index e971cc250f..ad9c2eea74 100644
--- a/data/xml/2023.clasp.xml
+++ b/data/xml/2023.clasp.xml
@@ -23,7 +23,7 @@
       <title>Improving Few-Shot Learning with Multilingual Transfer and <fixed-case>M</fixed-case>onte <fixed-case>C</fixed-case>arlo Training Set Selection</title>
       <author><first>Antonis</first><last>Maronikolakis</last></author>
       <author><first>Paul</first><last>O’Grady</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <author><first>Matti</first><last>Lyra</last></author>
       <pages>1–10</pages>
       <abstract>In industry settings, machine learning is an attractive tool to automatize processes. Unfortunately, annotated and high-quality data is expensive to source. This problem is exacerbated in settings spanning multiple markets and languages. Thus, developing solutions for multilingual tasks with little available data is challenging. Few-shot learning is a compelling approach when building solutions in multilingual and low-resource settings, since the method not only requires just a few training examples to achieve high performance, but is also a technique agnostic to language. Even though the technique can be applied to multilingual settings, optimizing performance is an open question. In our work we show that leveraging higher-resource, task-specific language data can boost overall performance and we propose a method to select training examples per their average performance in a Monte Carlo simulation, resulting in a training set more conducive to learning. We demonstrate the effectiveness of our methods in fashion text reviews moderation, classifying reviews as related or unrelated to the given product. We show that our methodology boosts performance in multilingual (English, French, German) settings, increasing F1 score and significantly decreasing false positives.</abstract>
@@ -44,7 +44,7 @@
     <paper id="3">
       <title>Entrenchment Matters: Investigating Positional and Constructional Sensitivity in Small and Large Language Models</title>
       <author><first>Bastian</first><last>Bunzeck</last></author>
-      <author><first>Sina</first><last>Zarrieß</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
       <pages>25–37</pages>
       <abstract>The success of large language models (LMs) has also prompted a push towards smaller models, but the differences in functionality and encodings between these two types of models are not yet well understood. In this paper, we employ a perturbed masking approach to investigate differences in token influence patterns on the sequence embeddings of larger and smaller RoBERTa models. Specifically, we explore how token properties like position, length or part of speech influence their sequence embeddings. We find that there is a general tendency for sequence-final tokens to exert a higher influence. Among part-of-speech tags, nouns, numerals and punctuation marks are the most influential, with smaller deviations for individual models. These findings also align with usage-based linguistic evidence on the effect of entrenchment. Finally, we show that the relationship between data size and model size influences the variability and brittleness of these effects, hinting towards a need for holistically balanced models.</abstract>
       <url hash="86df8b33">2023.clasp-1.3</url>
@@ -142,7 +142,7 @@
     <paper id="13">
       <title>Geometry-Aware Supertagging with Heterogeneous Dynamic Convolutions</title>
       <author><first>Konstantinos</first><last>Kogkalidis</last></author>
-      <author><first>Michael</first><last>Moortgat</last></author>
+      <author id="michael-moortgat"><first>Michael</first><last>Moortgat</last></author>
       <pages>107–119</pages>
       <abstract>The syntactic categories of categorial grammar formalisms are structured units made of smaller, indivisible primitives, bound together by the underlying grammar’s category formation rules. In the trending approach of constructive supertagging, neural models are increasingly made aware of the internal category structure. In turn, this enables them to more reliably predict rare and out-of-vocabulary categories. with significant implications for grammars previously deemed too complex to find practical use. In this work, we revisit constructive supertagging from a graph-theoretic perspective, and propose a framework based on heterogeneous dynamic graph convolutions, aimed at exploiting the distinctive structure of a supertagger’s output space. We test our approach on a number of categorial grammar datasets spanning different languages and grammar formalisms, achieving substantial improvements over previous state of the art scores.</abstract>
       <url hash="b7a8076d">2023.clasp-1.13</url>
@@ -198,7 +198,7 @@
       <author><first>Georgios</first><last>Tziafas</last></author>
       <author><first>Konstantinos</first><last>Kogkalidis</last></author>
       <author><first>Gijs</first><last>Wijnholds</last></author>
-      <author><first>Michael</first><last>Moortgat</last></author>
+      <author id="michael-moortgat"><first>Michael</first><last>Moortgat</last></author>
       <pages>176–184</pages>
       <abstract>Bidirectional masked Transformers have become the core theme in the current NLP landscape. Despite their impressive benchmarks, a recurring theme in recent research has been to question such models’ capacity for syntactic generalization. In this work, we seek to address this question by adding a supervised, token-level supertagging objective to standard unsupervised pretraining, enabling the explicit incorporation of syntactic biases into the network’s training dynamics. Our approach is straightforward to implement, induces a marginal computational overhead and is general enough to adapt to a variety of settings. We apply our methodology on Lassy Large, an automatically annotated corpus of written Dutch. Our experiments suggest that our syntax-aware model performs on par with established baselines, despite Lassy Large being one order of magnitude smaller than commonly used corpora.</abstract>
       <url hash="a0c6830e">2023.clasp-1.18</url>
@@ -220,7 +220,7 @@
     <paper id="20">
       <title>On the role of resources in the age of large language models</title>
       <author><first>Simon</first><last>Dobnik</last></author>
-      <author><first>John</first><last>Kelleher</last></author>
+      <author id="john-kelleher"><first>John</first><last>Kelleher</last></author>
       <pages>191–197</pages>
       <abstract>We evaluate the role of expert-based domain knowledge and resources in relation to training large language models by referring to our work on training and evaluating neural models, also in under-resourced scenarios which we believe also informs training models for “well-resourced” languages and domains. We argue that our community needs both large-scale datasets and small but high-quality data based on expert knowledge and that both activities should work hand-in-hand.</abstract>
       <url hash="97a91907">2023.clasp-1.20</url>
diff --git a/data/xml/2023.clicit.xml b/data/xml/2023.clicit.xml
index bcc6fd9df2..a126550dbb 100644
--- a/data/xml/2023.clicit.xml
+++ b/data/xml/2023.clicit.xml
@@ -4,8 +4,8 @@
     <meta>
       <booktitle>Proceedings of the 9th Italian Conference on Computational Linguistics (CLiC-it 2023)</booktitle>
       <editor><first>Federico</first><last>Boschetti</last></editor>
-      <editor><first>Gianluca E.</first><last>Lebani</last></editor>
-      <editor><first>Bernardo</first><last>Magnini</last></editor>
+      <editor id="gianluca-e-lebani"><first>Gianluca E.</first><last>Lebani</last></editor>
+      <editor id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></editor>
       <editor><first>Nicole</first><last>Novielli</last></editor>
       <publisher>CEUR Workshop Proceedings</publisher>
       <address>Venice, Italy</address>
@@ -31,7 +31,7 @@
     </paper>
     <paper id="2">
       <title>When the Lab of <fixed-case>C</fixed-case>omp<fixed-case>L</fixed-case>ing Was Started at the University of Venice - Preface to the Proceedings of the First Workshop Held in 1982</title>
-      <author><first>Rodolfo</first><last>Delmonte</last></author>
+      <author id="rodolfo-delmonte"><first>Rodolfo</first><last>Delmonte</last></author>
       <pages>4-9</pages>
       <url hash="137f202f">2023.clicit-1.2</url>
       <bibkey>delmonte-2023-lab</bibkey>
@@ -43,7 +43,7 @@
       <author><first>Davide</first><last>Venditti</last></author>
       <author><first>Leonardo</first><last>Ranaldi</last></author>
       <author><first>Cristina</first><last>Giannone</last></author>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last></author>
       <author><first>Andrea</first><last>Favalli</last></author>
       <author><first>Raniero</first><last>Romagnoli</last></author>
       <pages>10-16</pages>
@@ -114,7 +114,7 @@
       <author><first>Pier Felice</first><last>Balestrucci</last></author>
       <author><first>Luca</first><last>Anselma</last></author>
       <author><first>Cristian</first><last>Bernareggi</last></author>
-      <author><first>Alessandro</first><last>Mazzei</last></author>
+      <author id="alessandro-mazzei"><first>Alessandro</first><last>Mazzei</last></author>
       <pages>70-77</pages>
       <url hash="a14d98f7">2023.clicit-1.10</url>
       <bibkey>balestrucci-etal-2023-building</bibkey>
@@ -164,7 +164,7 @@
       <title>Testing <fixed-case>C</fixed-case>hat<fixed-case>GPT</fixed-case> for Stability and Reasoning: A Case Study Using <fixed-case>I</fixed-case>talian Medical Specialty Tests</title>
       <author><first>Silvia</first><last>Casola</last></author>
       <author><first>Tiziano</first><last>Labruna</last></author>
-      <author><first>Alberto</first><last>Lavelli</last></author>
+      <author id="alberto-lavelli"><first>Alberto</first><last>Lavelli</last></author>
       <author><first>Bernardo</first><last>Magnini</last></author>
       <pages>113-119</pages>
       <url hash="067ab99d">2023.clicit-1.15</url>
@@ -213,7 +213,7 @@
       <title>Highway to Hell. Towards a <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies Treebank for Dante Alighieri’s Comedy</title>
       <author><first>Claudia</first><last>Corbetta</last></author>
       <author><first>Marco</first><last>Passarotti</last></author>
-      <author><first>Flavio Massimiliano</first><last>Cecchini</last></author>
+      <author id="flavio-massimiliano-cecchini"><first>Flavio Massimiliano</first><last>Cecchini</last></author>
       <author><first>Giovanni</first><last>Moretti</last></author>
       <pages>154-161</pages>
       <url hash="8d8967b0">2023.clicit-1.20</url>
@@ -263,7 +263,7 @@
       <title>How To Build Competitive Multi-gender Speech Translation Models For Controlling Speaker Gender Translation</title>
       <author><first>Marco</first><last>Gaido</last></author>
       <author><first>Dennis</first><last>Fucci</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Luisa</first><last>Bentivogli</last></author>
       <pages>203-210</pages>
       <url hash="43637244">2023.clicit-1.26</url>
@@ -301,7 +301,7 @@
       <title>End-to-end Dependency Parsing via Auto-regressive Large Language Model</title>
       <author><first>Claudiu Daniel</first><last>Hromei</last></author>
       <author><first>Danilo</first><last>Croce</last></author>
-      <author><first>Roberto</first><last>Basili</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
       <pages>236-242</pages>
       <url hash="3068c09d">2023.clicit-1.30</url>
       <bibkey>hromei-etal-2023-end</bibkey>
@@ -329,7 +329,7 @@
     <paper id="33">
       <title>Introducing Deep Learning with Data Augmentation and Corpus Construction for <fixed-case>LIS</fixed-case></title>
       <author><first>Manuela</first><last>Marchisio</last></author>
-      <author><first>Alessandro</first><last>Mazzei</last></author>
+      <author id="alessandro-mazzei"><first>Alessandro</first><last>Mazzei</last></author>
       <author><first>Dario</first><last>Sammaruga</last></author>
       <pages>259-271</pages>
       <url hash="f36144f9">2023.clicit-1.33</url>
@@ -375,8 +375,8 @@
       <title>Building Structured Synthetic Datasets: The Case of Blackbird Language Matrices (<fixed-case>BLM</fixed-case>s)</title>
       <author><first>Paola</first><last>Merlo</last></author>
       <author><first>Giuseppe</first><last>Samo</last></author>
-      <author><first>Vivi</first><last>Nastase</last></author>
-      <author id="chunyang-jiang"><first>Chunyang</first><last>Jiang</last></author>
+      <author id="vivi-nastase"><first>Vivi</first><last>Nastase</last></author>
+      <author><first>Chunyang</first><last>Jiang</last></author>
       <pages>292-302</pages>
       <url hash="5b7dc29c">2023.clicit-1.36</url>
       <bibkey>merlo-etal-2023-building</bibkey>
@@ -411,7 +411,7 @@
     <paper id="40">
       <title>Unraveling Text Coherence from the Human Perspective: a Novel Dataset for <fixed-case>I</fixed-case>talian</title>
       <author><first>Federica</first><last>Papa</last></author>
-      <author><first>Luca</first><last>Dini</last></author>
+      <author id="luca-dini"><first>Luca</first><last>Dini</last></author>
       <author><first>Dominique</first><last>Brunato</last></author>
       <author><first>Felice</first><last>Dell’Orletta</last></author>
       <pages>334-341</pages>
@@ -431,7 +431,7 @@
       <title>Are All Languages Equal? Curriculum Learning over Different Languages</title>
       <author><first>Giulia</first><last>Pucci</last></author>
       <author><first>Leonardo</first><last>Ranaldi</last></author>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last></author>
       <pages>351-360</pages>
       <url hash="f1738f0f">2023.clicit-1.42</url>
       <bibkey>pucci-etal-2023-languages</bibkey>
@@ -445,7 +445,7 @@
       <author><first>Cristina</first><last>Giannone</last></author>
       <author><first>Andrea</first><last>Favalli</last></author>
       <author><first>Raniero</first><last>Romagnoli</last></author>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last></author>
       <pages>361-368</pages>
       <url hash="5848a5f9">2023.clicit-1.43</url>
       <bibkey>ranaldi-etal-2023-prompting</bibkey>
@@ -507,7 +507,7 @@
     </paper>
     <paper id="50">
       <title>“That branch of the <fixed-case>L</fixed-case>ake of <fixed-case>C</fixed-case>omo...”: Developing a New Resource for the Analysis of <fixed-case>I</fixed-case> Promessi Sposi and its Historical Translations</title>
-      <author><first>Rachele</first><last>Sprugnoli</last></author>
+      <author id="rachele-sprugnoli"><first>Rachele</first><last>Sprugnoli</last></author>
       <author><first>Marco</first><last>Sartor</last></author>
       <pages>420-426</pages>
       <url hash="bcf0ada7">2023.clicit-1.50</url>
@@ -598,7 +598,7 @@
       <author><first>Pierluigi</first><last>Cassotti</last></author>
       <author><first>Marco</first><last>Polignano</last></author>
       <author><first>Lucia</first><last>Siciliani</last></author>
-      <author><first>Giovanni</first><last>Semeraro</last></author>
+      <author id="giovanni-semeraro"><first>Giovanni</first><last>Semeraro</last></author>
       <pages>480-484</pages>
       <url hash="454aa0dc">2023.clicit-1.59</url>
       <bibkey>basile-etal-2023-impact</bibkey>
@@ -627,7 +627,7 @@
       <author><first>Alessandra Teresa</first><last>Cignarella</last></author>
       <author><first>Simona</first><last>Frenda</last></author>
       <author><first>Mirko</first><last>Lai</last></author>
-      <author><first>Marco Antonio</first><last>Stranisci</last></author>
+      <author id="marco-antonio-stranisci"><first>Marco Antonio</first><last>Stranisci</last></author>
       <author><first>Alessandra</first><last>Urbinati</last></author>
       <pages>494-498</pages>
       <url hash="b7c6f02c">2023.clicit-1.62</url>
@@ -697,7 +697,7 @@
       <title>Exploring Sentiments in Summarization: <fixed-case>S</fixed-case>enti<fixed-case>T</fixed-case>ext<fixed-case>R</fixed-case>ank, an Emotional Variant of <fixed-case>T</fixed-case>ext<fixed-case>R</fixed-case>ank</title>
       <author><first>Md. Murad</first><last>Hossain</last></author>
       <author><first>Luca</first><last>Anselma</last></author>
-      <author><first>Alessandro</first><last>Mazzei</last></author>
+      <author id="alessandro-mazzei"><first>Alessandro</first><last>Mazzei</last></author>
       <pages>535-539</pages>
       <url hash="fee070b9">2023.clicit-1.70</url>
       <bibkey>hossain-etal-2023-exploring</bibkey>
@@ -714,7 +714,7 @@
     <paper id="72">
       <title>The Inherence of Telicity: Unveiling Temporal Reasoning in Video Question Answering</title>
       <author><first>Olga</first><last>Loginova</last></author>
-      <author><first>Raffaella</first><last>Bernardi</last></author>
+      <author id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last></author>
       <pages>546-550</pages>
       <url hash="3f5b9af4">2023.clicit-1.72</url>
       <bibkey>loginova-bernardi-2023-inherence</bibkey>
@@ -732,8 +732,8 @@
       <author><first>Leonardo</first><last>Ranaldi</last></author>
       <author><first>Giulia</first><last>Pucci</last></author>
       <author><first>Elena Sofia</first><last>Ruzzetti</last></author>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last></author>
-      <author><first>André</first><last>Freitas</last></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last></author>
+      <author id="andre-freitas"><first>André</first><last>Freitas</last></author>
       <pages>557-561</pages>
       <url hash="33826ff1">2023.clicit-1.74</url>
       <bibkey>ranaldi-etal-2023-teasing</bibkey>
@@ -744,7 +744,7 @@
       <author><first>Dario</first><last>Onorati</last></author>
       <author><first>Leonardo</first><last>Ranaldi</last></author>
       <author><first>Davide</first><last>Venditti</last></author>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last></author>
       <pages>562-569</pages>
       <url hash="ea13b4d3">2023.clicit-1.75</url>
       <bibkey>ruzzetti-etal-2023-investigating</bibkey>
@@ -753,7 +753,7 @@
       <title>Towards a New Computational Lexicon for <fixed-case>I</fixed-case>talian: Building the Morphological Layer by Harmonizing and Merging Existing Resources</title>
       <author><first>Flavia</first><last>Sciolette</last></author>
       <author><first>Simone</first><last>Marchi</last></author>
-      <author><first>Emiliano</first><last>Giovannetti</last></author>
+      <author id="emiliano-giovannetti"><first>Emiliano</first><last>Giovannetti</last></author>
       <pages>570-574</pages>
       <url hash="32a274f6">2023.clicit-1.76</url>
       <bibkey>sciolette-etal-2023-towards</bibkey>
diff --git a/data/xml/2023.clinicalnlp.xml b/data/xml/2023.clinicalnlp.xml
index aa673f4203..0e2f146ec9 100644
--- a/data/xml/2023.clinicalnlp.xml
+++ b/data/xml/2023.clinicalnlp.xml
@@ -77,7 +77,7 @@
       <author><first>Gleb</first><last>Erofeev</last></author>
       <author><first>Irina</first><last>Sorokina</last></author>
       <author><first>Serge</first><last>Gladkoff</last><affiliation>Logrus Global AI Lab</affiliation></author>
-      <author><first>Goran</first><last>Nenadic</last><affiliation>University of Manchester</affiliation></author>
+      <author id="goran-nenadic"><first>Goran</first><last>Nenadic</last><affiliation>University of Manchester</affiliation></author>
       <pages>31-40</pages>
       <abstract>Massively multilingual pre-trained language models (MMPLMs) are developed in recent years demonstrating superpowers and the pre-knowledge they acquire for downstream tasks. This work investigates whether MMPLMs can be applied to clinical domain machine translation (MT) towards entirely unseen languages via transfer learning. We carry out an experimental investigation using Meta-AI’s MMPLMs “wmt21-dense-24-wide-en-X and X-en (WMT21fb)” which were pre-trained on 7 language pairs and 14 translation directions including English to Czech, German, Hausa, Icelandic, Japanese, Russian, and Chinese, and the opposite direction. We fine-tune these MMPLMs towards English-<i>Spanish</i> language pair which <i>did not exist at all</i> in their original pre-trained corpora both implicitly and explicitly.We prepare carefully aligned <i>clinical</i> domain data for this fine-tuning, which is different from their original mixed domain knowledge.Our experimental result shows that the fine-tuning is very successful using just 250k well-aligned in-domain EN-ES segments for three sub-task translation testings: clinical cases, clinical terms, and ontology concepts. It achieves very close evaluation scores to another MMPLM NLLB from Meta-AI, which included Spanish as a high-resource setting in the pre-training.To the best of our knowledge, this is the first work on using MMPLMs towards <i>clinical domain transfer-learning NMT</i> successfully for totally unseen languages during pre-training.</abstract>
       <url hash="73fa3c4a">2023.clinicalnlp-1.5</url>
@@ -110,7 +110,7 @@
       <title>Navigating Data Scarcity: Pretraining for Medical Utterance Classification</title>
       <author><first>Do June</first><last>Min</last></author>
       <author><first>Veronica</first><last>Perez-Rosas</last><affiliation>University of Michigan - Ann Arbor</affiliation></author>
-      <author><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
       <pages>59-68</pages>
       <abstract>Pretrained language models leverage self-supervised learning to use large amounts of unlabeled text for learning contextual representations of sequences. However, in the domain of medical conversations, the availability of large, public datasets is limited due to issues of privacy and data management. In this paper, we study the effectiveness of dialog-aware pretraining objectives and multiphase training in using unlabeled data to improve LMs training for medical utterance classification. The objectives of pretraining for dialog awareness involve tasks that take into account the structure of conversations, including features such as turn-taking and the roles of speakers. The multiphase training process uses unannotated data in a sequence that prioritizes similarities and connections between different domains. We empirically evaluate these methods on conversational dialog classification tasks in the medical and counseling domains, and find that multiphase training can help achieve higher performance than standard pretraining or finetuning.</abstract>
       <url hash="5471d623">2023.clinicalnlp-1.8</url>
@@ -124,7 +124,7 @@
       <author><first>Simranjeet</first><last>Singh</last></author>
       <author><first>Jasmeet</first><last>Kaur</last><affiliation>Indraprastha Institute of Information Technology, Delhi</affiliation></author>
       <author><first>Pushpendra</first><last>Singh</last></author>
-      <author><first>Rajiv</first><last>Shah</last></author>
+      <author id="rajiv-shah"><first>Rajiv</first><last>Shah</last></author>
       <pages>69-77</pages>
       <abstract>In developing countries like India, doctors and healthcare professionals working in public health spend significant time answering health queries that are fact-based and repetitive. Therefore, we propose an automated way to answer maternal and child health-related queries. A database of Frequently Asked Questions (FAQs) and their corresponding answers generated by experts is curated from rural health workers and young mothers. We develop a Hindi chatbot that identifies k relevant Question and Answer (QnA) pairs from the database in response to a healthcare query (q) written in Devnagri script or Hindi-English (Hinglish) code-mixed script. The curated database covers 80% of all the queries that a user of our study is likely to ask. We experimented with (i) rule-based methods, (ii) sentence embeddings, and (iii) a paraphrasing classifier, to calculate the q-Q similarity. We observed that paraphrasing classifier gives the best result when trained first on an open-domain text and then on the healthcare domain. Our chatbot uses an ensemble of all three approaches. We observed that if a given q can be answered using the database, then our chatbot can provide at least one relevant QnA pair among its top three suggestions for up to 70% of the queries.</abstract>
       <url hash="567e350f">2023.clinicalnlp-1.9</url>
@@ -136,7 +136,7 @@
       <title>Multi-Task Training with In-Domain Language Models for Diagnostic Reasoning</title>
       <author><first>Brihat</first><last>Sharma</last><affiliation>University of Wisconsin - Madison</affiliation></author>
       <author><first>Yanjun</first><last>Gao</last></author>
-      <author><first>Timothy</first><last>Miller</last><affiliation>Harvard University</affiliation></author>
+      <author id="timothy-miller"><first>Timothy</first><last>Miller</last><affiliation>Harvard University</affiliation></author>
       <author><first>Matthew</first><last>Churpek</last><affiliation>University of Wisconsin - Madison</affiliation></author>
       <author><first>Majid</first><last>Afshar</last><affiliation>University of Wisconsin - Madison</affiliation></author>
       <author><first>Dmitriy</first><last>Dligach</last><affiliation>Loyola University Chicago</affiliation></author>
@@ -199,7 +199,7 @@
       <title>Training Models on Oversampled Data and a Novel Multi-class Annotation Scheme for Dementia Detection</title>
       <author><first>Nadine</first><last>Abdelhalim</last><affiliation>University of Manchester</affiliation></author>
       <author><first>Ingy</first><last>Abdelhalim</last><affiliation>University of Manchester</affiliation></author>
-      <author><first>Riza</first><last>Batista-Navarro</last><affiliation>University of Manchester</affiliation></author>
+      <author id="riza-theresa-batista-navarro"><first>Riza</first><last>Batista-Navarro</last><affiliation>University of Manchester</affiliation></author>
       <pages>118-124</pages>
       <abstract>This work introduces a novel three-class annotation scheme for text-based dementia classification in patients, based on their recorded visit interactions. Multiple models were developed utilising BERT, RoBERTa and DistilBERT. Two approaches were employed to improve the representation of dementia samples: oversampling the underrepresented data points in the original Pitt dataset and combining the Pitt with the Holland and Kempler datasets. The DistilBERT models trained on either an oversampled Pitt dataset or the combined dataset performed best in classifying the dementia class. Specifically, the model trained on the oversampled Pitt dataset and the one trained on the combined dataset obtained state-of-the-art performance with 98.8% overall accuracy and 98.6% macro-averaged F1-score, respectively. The models’ outputs were manually inspected through saliency highlighting, using Local Interpretable Model-agnostic Explanations (LIME), to provide a better understanding of its predictions.</abstract>
       <url hash="c95c3a58">2023.clinicalnlp-1.15</url>
@@ -212,7 +212,7 @@
       <author><first>Majid</first><last>Afshar</last><affiliation>University of Wisconsin - Madison</affiliation></author>
       <author><first>Dmitriy</first><last>Dligach</last><affiliation>Loyola University Chicago</affiliation></author>
       <author><first>Yanjun</first><last>Gao</last></author>
-      <author><first>Timothy</first><last>Miller</last><affiliation>Harvard University</affiliation></author>
+      <author id="timothy-miller"><first>Timothy</first><last>Miller</last><affiliation>Harvard University</affiliation></author>
       <pages>125-130</pages>
       <abstract>Text in electronic health records is organized into sections, and classifying those sections into section categories is useful for downstream tasks. In this work, we attempt to improve the transferability of section classification models by combining the dataset-specific knowledge in supervised learning models with the world knowledge inside large language models (LLMs). Surprisingly, we find that zero-shot LLMs out-perform supervised BERT-based models applied to out-of-domain data. We also find that their strengths are synergistic, so that a simple ensemble technique leads to additional performance gains.</abstract>
       <url hash="a4d99d09">2023.clinicalnlp-1.16</url>
@@ -392,7 +392,7 @@
       <author><first>Xihui</first><last>Lin</last><affiliation>Microsoft</affiliation></author>
       <author><first>Yuwen</first><last>Sun</last></author>
       <author><first>Zihan</first><last>Qian</last></author>
-      <author id="zheng-yuan"><first>Zheng</first><last>Yuan</last><affiliation>Alibaba Group</affiliation></author>
+      <author><first>Zheng</first><last>Yuan</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Tristan</first><last>Naumann</last><affiliation>Microsoft Research</affiliation></author>
       <author><first>Tianxi</first><last>Cai</last><affiliation>Harvard T.H. Chan School of Public Health</affiliation></author>
       <author><first>Junwei</first><last>Lu</last><affiliation>Harvard University</affiliation></author>
@@ -495,8 +495,8 @@
     <paper id="38">
       <title>Building blocks for complex tasks: Robust generative event extraction for radiology reports under domain shifts</title>
       <author><first>Sitong</first><last>Zhou</last></author>
-      <author><first>Meliha</first><last>Yetisgen</last><affiliation>University of Washington</affiliation></author>
-      <author><first>Mari</first><last>Ostendorf</last><affiliation>University of Washington</affiliation></author>
+      <author id="meliha-yetisgen-yildiz"><first>Meliha</first><last>Yetisgen</last><affiliation>University of Washington</affiliation></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last><affiliation>University of Washington</affiliation></author>
       <pages>344-357</pages>
       <abstract>This paper explores methods for extracting information from radiology reports that generalize across exam modalities to reduce requirements for annotated data. We demonstrate that multi-pass T5-based text-to-text generative models exhibit better generalization across exam modalities compared to approaches that employ BERT-based task-specific classification layers. We then develop methods that reduce the inference cost of the model, making large-scale corpus processing more feasible for clinical applications. Specifically, we introduce a generative technique that decomposes complex tasks into smaller subtask blocks, which improves a single-pass model when combined with multitask training. In addition, we leverage target-domain contexts during inference to enhance domain adaptation, enabling use of smaller models. Analyses offer insights into the benefits of different cost reduction strategies.</abstract>
       <url hash="775ec7e1">2023.clinicalnlp-1.38</url>
@@ -539,8 +539,8 @@
       <author><first>Bin</first><last>Han</last><affiliation>University of Washington</affiliation></author>
       <author><first>Kevin</first><last>Lybarger</last><affiliation>George Mason University</affiliation></author>
       <author><first>Nic</first><last>Dobbins</last></author>
-      <author><first>Ozlem</first><last>Uzuner</last><affiliation>George Mason University</affiliation></author>
-      <author><first>Meliha</first><last>Yetisgen</last><affiliation>University of Washington</affiliation></author>
+      <author id="ozlem-uzuner"><first>Ozlem</first><last>Uzuner</last><affiliation>George Mason University</affiliation></author>
+      <author id="meliha-yetisgen-yildiz"><first>Meliha</first><last>Yetisgen</last><affiliation>University of Washington</affiliation></author>
       <pages>385-393</pages>
       <abstract>Social determinants of health (SDOH) documented in the electronic health record through unstructured text are increasingly being studied to understand how SDOH impacts patient health outcomes. In this work, we utilize the Social History Annotation Corpus (SHAC), a multi-institutional corpus of de-identified social history sections annotated for SDOH, including substance use, employment, and living status information. We explore the automatic extraction of SDOH information with SHAC in both standoff and inline annotation formats using GPT-4 in a one-shot prompting setting. We compare GPT-4 extraction performance with a high-performing supervised approach and perform thorough error analyses. Our prompt-based GPT-4 method achieved an overall 0.652 F1 on the SHAC test set, similar to the 7th best-performing system among all teams in the n2c2 challenge with SHAC.</abstract>
       <url hash="857d81fb">2023.clinicalnlp-1.41</url>
@@ -554,9 +554,9 @@
       <author><first>Kyung Min</first><last>Chae</last><affiliation>Konyang University</affiliation></author>
       <author><first>Yousang</first><last>Cho</last></author>
       <author><first>Hyunbin</first><last>Seo</last><affiliation>teddysum</affiliation></author>
-      <author><first>KyungTae</first><last>Lim</last><affiliation>Seoul National University of Science and Technology</affiliation></author>
-      <author><first>Key-Sun</first><last>Choi</last><affiliation>Korea Advanced Institute of Science &amp; Technology and Konyang University</affiliation></author>
-      <author><first>Younggyun</first><last>Hahm</last></author>
+      <author id="kyungtae-lim"><first>KyungTae</first><last>Lim</last><affiliation>Seoul National University of Science and Technology</affiliation></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last><affiliation>Korea Advanced Institute of Science &amp; Technology and Konyang University</affiliation></author>
+      <author id="younggyun-hahm"><first>Younggyun</first><last>Hahm</last></author>
       <pages>394-402</pages>
       <abstract>In this paper, we introduce the design and various attempts for TaskB of MEDIQA-Chat 2023. The goal of TaskB in MEDIQA-Chat 2023 is to generate full clinical note from doctor-patient consultation dialogues. This task has several challenging issues, such as lack of training data, handling long dialogue inputs, and generating semi-structured clinical note which have section heads. To address these issues, we conducted various experiments and analyzed their results. We utilized the DialogLED model pre-trained on long dialogue data to handle long inputs, and we pre-trained on other dialogue datasets to address the lack of training data. We also attempted methods such as using prompts and contrastive learning for handling sections. This paper provides insights into clinical note generation through analyzing experimental methods and results, and it suggests future research directions.</abstract>
       <url hash="558d0c52">2023.clinicalnlp-1.42</url>
@@ -668,7 +668,7 @@
       <author><first>Raghav</first><last>Kapoor</last></author>
       <author><first>Medha</first><last>Palavalli</last></author>
       <author><first>Amanda</first><last>Bertsch</last><affiliation>Carnegie Mellon University</affiliation></author>
-      <author><first>Matthew</first><last>Gormley</last><affiliation>School of Computer Science, Carnegie Mellon University and 3M</affiliation></author>
+      <author id="matthew-r-gormley"><first>Matthew</first><last>Gormley</last><affiliation>School of Computer Science, Carnegie Mellon University and 3M</affiliation></author>
       <pages>490-502</pages>
       <abstract>Medical dialogue summarization is challenging due to the unstructured nature of medical conversations, the use of medical terminologyin gold summaries, and the need to identify key information across multiple symptom sets. We present a novel system for the Dialogue2Note Medical Summarization tasks in the MEDIQA 2023 Shared Task. Our approach for sectionwise summarization (Task A) is a two-stage process of selecting semantically similar dialogues and using the top-k similar dialogues as in-context examples for GPT-4. For full-note summarization (Task B), we use a similar solution with k=1. We achieved 3rd place in Task A (2nd among all teams), 4th place in Task B Division Wise Summarization (2nd among all teams), 15th place in Task A Section Header Classification (9th among all teams), and 8th place among all teams in Task B. Our results highlight the effectiveness of few-shot prompting for this task, though we also identify several weaknesses of prompting-based approaches. We compare GPT-4 performance with several finetuned baselines. We find that GPT-4 summaries are more abstractive and shorter. We make our code publicly available.</abstract>
       <url hash="57cf3fe4">2023.clinicalnlp-1.51</url>
@@ -681,7 +681,7 @@
       <author><first>Wen-wai</first><last>Yim</last></author>
       <author><first>Griffin</first><last>Adams</last></author>
       <author><first>Neal</first><last>Snider</last></author>
-      <author><first>Meliha</first><last>Yetisgen</last><affiliation>University of Washington</affiliation></author>
+      <author id="meliha-yetisgen-yildiz"><first>Meliha</first><last>Yetisgen</last><affiliation>University of Washington</affiliation></author>
       <pages>503-513</pages>
       <abstract>Automatic generation of clinical notes from doctor-patient conversations can play a key role in reducing daily doctors’ workload and improving their interactions with the patients. MEDIQA-Chat 2023 aims to advance and promote research on effective solutions through shared tasks on the automatic summarization of doctor-patient conversations and on the generation of synthetic dialogues from clinical notes for data augmentation. Seventeen teams participated in the challenge and experimented with a broad range of approaches and models. In this paper, we describe the three MEDIQA-Chat 2023 tasks, the datasets, and the participants’ results and methods. We hope that these shared tasks will lead to additional research efforts and insights on the automatic generation and evaluation of clinical notes.</abstract>
       <url hash="a6af9e7a">2023.clinicalnlp-1.52</url>
@@ -711,7 +711,7 @@
       <title><fixed-case>C</fixed-case>are4<fixed-case>L</fixed-case>ang at <fixed-case>MEDIQA</fixed-case>-Chat 2023: Fine-tuning Language Models for Classifying and Summarizing Clinical Dialogues</title>
       <author><first>Amal</first><last>Alqahtani</last><affiliation>George Washington University</affiliation></author>
       <author><first>Rana</first><last>Salama</last><affiliation>George Washington University</affiliation></author>
-      <author><first>Mona</first><last>Diab</last><affiliation>George Washington University</affiliation></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last><affiliation>George Washington University</affiliation></author>
       <author><first>Abdou</first><last>Youssef</last><affiliation>George Washington University</affiliation></author>
       <pages>524-528</pages>
       <abstract>Summarizing medical conversations is one of the tasks proposed by MEDIQA-Chat to promote research on automatic clinical note generation from doctor-patient conversations. In this paper, we present our submission to this task using fine-tuned language models, including T5, BART and BioGPT models. The fine-tuned models are evaluated using ensemble metrics including ROUGE, BERTScore andBLEURT. Among the fine-tuned models, Flan-T5 achieved the highest aggregated score for dialogue summarization.</abstract>
diff --git a/data/xml/2023.codi.xml b/data/xml/2023.codi.xml
index 992ebdcf37..cfb4361dfc 100644
--- a/data/xml/2023.codi.xml
+++ b/data/xml/2023.codi.xml
@@ -51,7 +51,7 @@
     </paper>
     <paper id="3">
       <title>Ensemble Transfer Learning for Multilingual Coreference Resolution</title>
-      <author><first>Tuan</first><last>Lai</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
+      <author id="tuan-lai"><first>Tuan</first><last>Lai</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
       <author><first>Heng</first><last>Ji</last><affiliation>University of Illinois at Urbana-Champaign and Amazon (Amazon Scholar)</affiliation></author>
       <pages>24-36</pages>
       <abstract>Entity coreference resolution is an important research problem with many applications, including information extraction and question answering. Coreference resolution for English has been studied extensively. However, there is relatively little work for other languages. A problem that frequently occurs when working with a non-English language is the scarcity of annotated training data. To overcome this challenge, we design a simple but effective ensemble-based framework that combines various transfer learning (TL) techniques. We first train several models using different TL methods. Then, during inference, we compute the unweighted average scores of the models’ predictions to extract the final set of predicted clusters. Furthermore, we also propose a low-cost TL method that bootstraps coreference resolution models by utilizing Wikipedia anchor texts. Leveraging the idea that the coreferential links naturally exist between anchor texts pointing to the same article, our method builds a sizeable distantly-supervised dataset for the target language that consists of tens of thousands of documents. We can pre-train a model on the pseudo-labeled dataset before finetuning it on the final target dataset. Experimental results on two benchmark datasets, OntoNotes and SemEval, confirm the effectiveness of our methods. Our best ensembles consistently outperform the baseline approach of simple training by up to 7.68% in the F1 score. These ensembles also achieve new state-of-the-art results for three languages: Arabic, Dutch, and Spanish.</abstract>
@@ -73,8 +73,8 @@
     <paper id="5">
       <title>Leveraging Structural Discourse Information for Event Coreference Resolution in <fixed-case>D</fixed-case>utch</title>
       <author><first>Loic</first><last>De Langhe</last><affiliation>Ghent University</affiliation></author>
-      <author><first>Orphee</first><last>De Clercq</last><affiliation>LT3, Ghent University</affiliation></author>
-      <author><first>Veronique</first><last>Hoste</last><affiliation>LT3, Ghent University</affiliation></author>
+      <author id="orphee-de-clercq"><first>Orphee</first><last>De Clercq</last><affiliation>LT3, Ghent University</affiliation></author>
+      <author id="veronique-hoste"><first>Veronique</first><last>Hoste</last><affiliation>LT3, Ghent University</affiliation></author>
       <pages>48-53</pages>
       <url hash="fa0e9374">2023.codi-1.5</url>
       <bibkey>de-langhe-etal-2023-leveraging</bibkey>
@@ -165,7 +165,7 @@
     <paper id="15">
       <title>Improving Long Context Document-Level Machine Translation</title>
       <author><first>Christian</first><last>Herold</last><affiliation>RWTH Aachen University</affiliation></author>
-      <author><first>Hermann</first><last>Ney</last><affiliation>RWTH Aachen University</affiliation></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last><affiliation>RWTH Aachen University</affiliation></author>
       <pages>112-125</pages>
       <abstract>Document-level context for neural machine translation (NMT) is crucial to improve the translation consistency and cohesion, the translation of ambiguous inputs, as well as several other linguistic phenomena. Many works have been published on the topic of document-level NMT, but most restrict the system to only local context, typically including just the one or two preceding sentences as additional information. This might be enough to resolve some ambiguous inputs, but it is probably not sufficient to capture some document-level information like the topic or style of a conversation. When increasing the context size beyond just the local context, there are two challenges: (i) the memory usage increases exponentially (ii) the translation performance starts to degrade. We argue that the widely-used attention mechanism is responsible for both issues. Therefore, we propose a constrained attention variant that focuses the attention on the most relevant parts of the sequence, while simultaneously reducing the memory consumption. For evaluation, we utilize targeted test sets in combination with novel evaluation techniques to analyze the translations in regards to specific discourse-related phenomena. We find that our approach is a good compromise between sentence-level NMT vs attending to the full context, especially in low resource scenarios.</abstract>
       <url hash="01bd6e9c">2023.codi-1.15</url>
@@ -197,7 +197,7 @@
     <paper id="21">
       <title>The distribution of discourse relations within and across turns in spontaneous conversation</title>
       <author><first>S. Magalí</first><last>López Cortez</last><affiliation>University at Buffalo</affiliation></author>
-      <author><first>Cassandra L.</first><last>Jacobs</last><affiliation>University at Buffalo</affiliation></author>
+      <author id="cassandra-l-jacobs"><first>Cassandra L.</first><last>Jacobs</last><affiliation>University at Buffalo</affiliation></author>
       <pages>156-162</pages>
       <abstract>Time pressure and topic negotiation may impose constraints on how people leverage discourse relations (DRs) in spontaneous conversational contexts. In this work, we adapt a system of DRs for written language to spontaneous dialogue using crowdsourced annotations from novice annotators. We then test whether discourse relations are used differently across several types of multi-utterance contexts. We compare the patterns of DR annotation within and across speakers and within and across turns. Ultimately, we find that different discourse contexts produce distinct distributions of discourse relations, with single-turn annotations creating the most uncertainty for annotators. Additionally, we find that the discourse relation annotations are of sufficient quality to predict from embeddings of discourse units.</abstract>
       <url hash="9216eeca">2023.codi-1.21</url>
@@ -208,7 +208,7 @@
     <paper id="22">
       <title>Embedding Mental Health Discourse for Community Recommendation</title>
       <author><first>Hy</first><last>Dang</last><affiliation>University of Notre Dame</affiliation></author>
-      <author><first>Bang</first><last>Nguyen</last><affiliation>University of Notre Dame</affiliation></author>
+      <author id="bang-nguyen"><first>Bang</first><last>Nguyen</last><affiliation>University of Notre Dame</affiliation></author>
       <author><first>Noah</first><last>Ziems</last><affiliation>University of Notre Dame</affiliation></author>
       <author><first>Meng</first><last>Jiang</last><affiliation>University of Notre Dame</affiliation></author>
       <pages>163-172</pages>
diff --git a/data/xml/2023.computel.xml b/data/xml/2023.computel.xml
index c49b3010b8..c825a6535d 100644
--- a/data/xml/2023.computel.xml
+++ b/data/xml/2023.computel.xml
@@ -6,7 +6,7 @@
       <editor><first>Atticus</first><last>Harrigan</last></editor>
       <editor><first>Aditi</first><last>Chaudhary</last></editor>
       <editor><first>Shruti</first><last>Rijhwani</last></editor>
-      <editor><first>Sarah</first><last>Moeller</last></editor>
+      <editor id="sarah-moeller"><first>Sarah</first><last>Moeller</last></editor>
       <editor><first>Antti</first><last>Arppe</last></editor>
       <editor><first>Alexis</first><last>Palmer</last></editor>
       <editor><first>Ryan</first><last>Henke</last></editor>
@@ -32,7 +32,7 @@
       <author><first>Wawan</first><last>Sahrozi</last></author>
       <author><first>Ben</first><last>Foley</last></author>
       <author><first>Bradley</first><last>McDonnell</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <pages>1–6</pages>
       <url hash="62f13f41">2023.computel-1.1</url>
       <bibkey>san-etal-2023-leveraging</bibkey>
@@ -46,7 +46,7 @@
     </paper>
     <paper id="3">
       <title>Using <fixed-case>LARA</fixed-case> to rescue a legacy <fixed-case>P</fixed-case>itjantjatjara course</title>
-      <author><first>Manny</first><last>Rayner</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
       <author><first>Sasha</first><last>Wilmoth</last></author>
       <pages>13–18</pages>
       <url hash="f4afa75d">2023.computel-1.3</url>
@@ -65,7 +65,7 @@
     </paper>
     <paper id="5">
       <title>Towards a finite-state morphological analyser for San Mateo Huave</title>
-      <author><first>Francis M.</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis M.</first><last>Tyers</last></author>
       <author><first>Samuel Herrera</first><last>Castro</last></author>
       <pages>30–37</pages>
       <url hash="3ef3acb8">2023.computel-1.5</url>
@@ -73,7 +73,7 @@
     </paper>
     <paper id="6">
       <title>Investigating Speaker Diarization of Endangered Language Data</title>
-      <author><first>Gina-Anne</first><last>Levow</last></author>
+      <author id="gina-anne-levow"><first>Gina-Anne</first><last>Levow</last></author>
       <pages>38–43</pages>
       <url hash="891f4096">2023.computel-1.6</url>
       <bibkey>levow-2023-investigating</bibkey>
@@ -117,7 +117,7 @@
       <title>Studying the impact of language model size for low-resource <fixed-case>ASR</fixed-case></title>
       <author><first>Zoey</first><last>Liu</last></author>
       <author><first>Justin</first><last>Spence</last></author>
-      <author><first>Emily</first><last>Prud’hommeaux</last></author>
+      <author id="emily-prudhommeaux"><first>Emily</first><last>Prud’hommeaux</last></author>
       <pages>77–83</pages>
       <url hash="6f3c3035">2023.computel-1.11</url>
       <bibkey>liu-etal-2023-studying</bibkey>
diff --git a/data/xml/2023.conll.xml b/data/xml/2023.conll.xml
index cee9961180..0cf8075b64 100644
--- a/data/xml/2023.conll.xml
+++ b/data/xml/2023.conll.xml
@@ -81,7 +81,7 @@
       <title>Investigating the Nature of Disagreements on Mid-Scale Ratings: A Case Study on the Abstractness-Concreteness Continuum</title>
       <author><first>Urban</first><last>Knupleš</last></author>
       <author><first>Diego</first><last>Frassinelli</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>70–86</pages>
       <abstract>Humans tend to strongly agree on ratings on a scale for extreme cases (e.g., a CAT is judged as very concrete), but judgements on mid-scale words exhibit more disagreement. Yet, collected rating norms are heavily exploited across disciplines. Our study focuses on concreteness ratings and (i) implements correlations and supervised classification to identify salient multi-modal characteristics of mid-scale words, and (ii) applies a hard clustering to identify patterns of systematic disagreement across raters. Our results suggest to either fine-tune or filter mid-scale target words before utilising them.</abstract>
       <url hash="4993d99d">2023.conll-1.6</url>
@@ -126,7 +126,7 @@
       <title>A Minimal Approach for Natural Language Action Space in Text-based Games</title>
       <author><first>Dongwon</first><last>Ryu</last></author>
       <author><first>Meng</first><last>Fang</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <author><first>Shirui</first><last>Pan</last></author>
       <author><first>Ehsan</first><last>Shareghi</last></author>
       <pages>138–154</pages>
@@ -138,7 +138,7 @@
     <paper id="11">
       <title>Structural Ambiguity and its Disambiguation in Language Model Based Parsers: the Case of <fixed-case>D</fixed-case>utch Clause Relativization</title>
       <author><first>Gijs</first><last>Wijnholds</last></author>
-      <author><first>Michael</first><last>Moortgat</last></author>
+      <author id="michael-moortgat"><first>Michael</first><last>Moortgat</last></author>
       <pages>155–164</pages>
       <abstract>This paper addresses structural ambiguity in Dutch relative clauses. By investigating the task of disambiguation by grounding, we study how the presence of a prior sentence can resolve relative clause ambiguities. We apply this method to two parsing architectures in an attempt to demystify the parsing and language model components of two present-day neural parsers. Results show that a neurosymbolic parser, based on proof nets, is more open to data bias correction than an approach based on universal dependencies, although both set-ups suffer from a comparable initial data bias.</abstract>
       <url hash="bb584b1a">2023.conll-1.11</url>
@@ -151,7 +151,7 @@
       <author><first>Atif</first><last>Mahmud</last></author>
       <author><first>Abbas</first><last>Ghaddar</last></author>
       <author><first>Mehdi</first><last>Rezagholizadeh</last></author>
-      <author><first>Phillippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Phillippe</first><last>Langlais</last></author>
       <author><first>Prasanna</first><last>Parthasarathi</last></author>
       <pages>165–182</pages>
       <abstract>Self-supervised Language Modelling (LM) objectives —like BERT masked LM— have become the default choice for pretraining language models. TOken Reordering (TOR) pretraining objectives, beyond token prediction, have not been extensively studied yet. In this work, we explore challenges that underlie the development and usefulness of such objectives on downstream language tasks. In particular, we design a novel TOR pretraining objective which predicts whether two tokens are adjacent or not given a partial bag-of-tokens input. In addition, we investigate the usefulness of Graph Isomorphism Network (GIN), when placed on top of the BERT encoder, in order to enhance the overall model ability to leverage topological signal from the encoded representations. We compare language understanding abilities of TOR to the one of MLM on word-order sensitive (e.g. Dependency Parsing) and insensitive (e.g. text classification) tasks in both full training and few-shot settings. Our results indicate that TOR is competitive to MLM on the GLUE language understanding benchmark, and slightly superior on syntax-dependent datasets, especially in the few-shot setting.</abstract>
@@ -369,7 +369,7 @@
       <author><first>Yunke</first><last>He</last></author>
       <author><first>Xixian</first><last>Liao</last></author>
       <author><first>Jialing</first><last>Liang</last></author>
-      <author><first>Gemma</first><last>Boleda</last></author>
+      <author id="gemma-boleda"><first>Gemma</first><last>Boleda</last></author>
       <pages>456–475</pages>
       <abstract>Different speakers often produce different names for the same object or entity (e.g., “woman” vs. “tourist” for a female tourist). The reasons behind variation in naming are not well understood. We create a Language and Vision dataset for Mandarin Chinese that provides an average of 20 names for 1319 naturalistic images, and investigate how familiarity with a given kind of object relates to the degree of naming variation it triggers across subjects. We propose that familiarity influences naming variation in two competing ways: increasing familiarity can either expand vocabulary, leading to higher variation, or promote convergence on conventional names, thereby reducing variation. We find evidence for both factors being at play. Our study illustrates how computational resources can be used to address research questions in Cognitive Science.</abstract>
       <url hash="7bd3e354">2023.conll-1.30</url>
@@ -426,7 +426,7 @@
     <paper id="35">
       <title>Exploring Transformers as Compact, Data-efficient Language Models</title>
       <author><first>Clayton</first><last>Fields</last></author>
-      <author><first>Casey</first><last>Kennington</last></author>
+      <author id="casey-kennington"><first>Casey</first><last>Kennington</last></author>
       <pages>521–531</pages>
       <abstract>Large scale transformer models, trained with massive datasets have become the standard in natural language processing. The huge size of most transformers make research with these models impossible for those with limited computational resources. Additionally, the enormous pretraining data requirements of transformers exclude pretraining them with many smaller datasets that might provide enlightening results. In this study, we show that transformers can be significantly reduced in size, with as few as 5.7 million parameters, and still retain most of their downstream capability. Further we show that transformer models can retain comparable results when trained on human-scale datasets, as few as 5 million words of pretraining data. Overall, the results of our study suggest transformers function well as compact, data efficient language models and that complex model compression methods, such as model distillation are not necessarily superior to pretraining reduced size transformer models from scratch.</abstract>
       <url hash="fd949d1d">2023.conll-1.35</url>
@@ -448,7 +448,7 @@
       <author><first>Koyena</first><last>Pal</last></author>
       <author><first>Jiuding</first><last>Sun</last></author>
       <author><first>Andrew</first><last>Yuan</last></author>
-      <author><first>Byron</first><last>Wallace</last></author>
+      <author id="byron-c-wallace"><first>Byron</first><last>Wallace</last></author>
       <author><first>David</first><last>Bau</last></author>
       <pages>548–560</pages>
       <abstract>We conjecture that hidden state vectors corresponding to individual input tokens encode information sufficient to accurately predict several tokens ahead. More concretely, in this paper we ask: Given a hidden (internal) representation of a single token at position t in an input, can we reliably anticipate the tokens that will appear at positions ≥ t + 2? To test this, we measure linear approximation and causal intervention methods in GPT-J-6B to evaluate the degree to which individual hidden states in the network contain signal rich enough to predict future hidden states and, ultimately, token outputs. We find that, at some layers, we can approximate a model’s output with more than 48% accuracy with respect to its prediction of subsequent tokens through a single hidden state. Finally we present a “Future Lens” visualization that uses these methods to create a new view of transformer states.</abstract>
@@ -538,7 +538,7 @@
     <paper id="2">
       <title><fixed-case>GPT</fixed-case>-wee: How Small Can a Small Language Model Really Get?</title>
       <author><first>Bastian</first><last>Bunzeck</last></author>
-      <author><first>Sina</first><last>Zarrieß</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
       <pages>35-46</pages>
       <url hash="18256e8d">2023.conll-babylm.2</url>
       <bibkey>bunzeck-zarriess-2023-gpt</bibkey>
@@ -550,7 +550,7 @@
       <author><first>Osama</first><last>Natouf</last><affiliation>Boise State University</affiliation></author>
       <author><first>Andrew</first><last>McMains</last><affiliation>Boise State University</affiliation></author>
       <author><first>Catherine</first><last>Henry</last><affiliation>Boise State University</affiliation></author>
-      <author><first>Casey</first><last>Kennington</last><affiliation>Boise State University</affiliation></author>
+      <author id="casey-kennington"><first>Casey</first><last>Kennington</last><affiliation>Boise State University</affiliation></author>
       <pages>47-57</pages>
       <url hash="b53e5300">2023.conll-babylm.3</url>
       <bibkey>fields-etal-2023-tiny</bibkey>
@@ -726,7 +726,7 @@
     </paper>
     <paper id="20">
       <title>Not all layers are equally as important: Every Layer Counts <fixed-case>BERT</fixed-case></title>
-      <author><first>Lucas</first><last>Georges Gabriel Charpentier</last><affiliation>University of Oslo</affiliation></author>
+      <author id="lucas-georges-gabriel-charpentier"><first>Lucas</first><last>Georges Gabriel Charpentier</last><affiliation>University of Oslo</affiliation></author>
       <author><first>David</first><last>Samuel</last><affiliation>University of Oslo</affiliation></author>
       <pages>238-252</pages>
       <url hash="9da3e3cd">2023.conll-babylm.20</url>
@@ -751,7 +751,7 @@
       <title>A surprisal oracle for active curriculum language modeling</title>
       <author><first>Xudong</first><last>Hong</last><affiliation>Saarland University</affiliation></author>
       <author><first>Sharid</first><last>Loáiciga</last><affiliation>University of Gothenburg</affiliation></author>
-      <author><first>Asad</first><last>Sayeed</last><affiliation>University of Gothenburg</affiliation></author>
+      <author id="asad-sayeed"><first>Asad</first><last>Sayeed</last><affiliation>University of Gothenburg</affiliation></author>
       <pages>259-268</pages>
       <url hash="558af208">2023.conll-babylm.22</url>
       <bibkey>hong-etal-2023-surprisal</bibkey>
diff --git a/data/xml/2023.contents.xml b/data/xml/2023.contents.xml
index 9ea03de7ec..150133da72 100644
--- a/data/xml/2023.contents.xml
+++ b/data/xml/2023.contents.xml
@@ -5,9 +5,9 @@
       <booktitle>Proceedings of the Workshop on Computational Terminology in NLP and Translation Studies (ConTeNTS) Incorporating the 16th Workshop on Building and Using Comparable Corpora (BUCC)</booktitle>
       <editor><first>Amal Haddad</first><last>Haddad</last></editor>
       <editor><first>Ayla Rigouts</first><last>Terryn</last></editor>
-      <editor><first>Ruslan</first><last>Mitkov</last></editor>
+      <editor id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></editor>
       <editor><first>Reinhard</first><last>Rapp</last></editor>
-      <editor><first>Pierre</first><last>Zweigenbaum</last></editor>
+      <editor id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></editor>
       <editor><first>Serge</first><last>Sharoff</last></editor>
       <publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
       <address>Varna, Bulgaria</address>
diff --git a/data/xml/2023.cpss.xml b/data/xml/2023.cpss.xml
index 8445fee16f..3fc539d991 100644
--- a/data/xml/2023.cpss.xml
+++ b/data/xml/2023.cpss.xml
@@ -7,7 +7,7 @@
       <editor><first>Gabriella</first><last>Lapesa</last></editor>
       <editor><first>Valentin</first><last>Gold</last></editor>
       <editor><first>Theresa</first><last>Gessler</last></editor>
-      <editor><first>Simone Paolo</first><last>Ponzetto</last></editor>
+      <editor id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></editor>
       <publisher>Association for Computational Lingustics</publisher>
       <address>Ingolstadt, Germany</address>
       <month>September</month>
@@ -63,7 +63,7 @@
     <paper id="6">
       <title>According to <fixed-case>BERT</fixed-case>opic, what do <fixed-case>D</fixed-case>anish Parties Debate on when they Address Energy and Environment?</title>
       <author><first>Costanza</first><last>Navarretta</last></author>
-      <author><first>Dorte H.</first><last>Hansen</last></author>
+      <author id="dorte-haltrup-hansen"><first>Dorte H.</first><last>Hansen</last></author>
       <pages>59–68</pages>
       <url hash="9f6b731d">2023.cpss-1.6</url>
       <bibkey>navarretta-hansen-2023-according</bibkey>
diff --git a/data/xml/2023.crac.xml b/data/xml/2023.crac.xml
index 9aa23cb6ba..0c8dc8c8e1 100644
--- a/data/xml/2023.crac.xml
+++ b/data/xml/2023.crac.xml
@@ -5,8 +5,8 @@
       <booktitle>Proceedings of the Sixth Workshop on Computational Models of Reference, Anaphora and Coreference (CRAC 2023)</booktitle>
       <editor><first>Maciej</first><last>Ogrodniczuk</last></editor>
       <editor><first>Vincent</first><last>Ng</last></editor>
-      <editor><first>Sameer</first><last>Pradhan</last></editor>
-      <editor><first>Massimo</first><last>Poesio</last></editor>
+      <editor id="sameer-pradhan"><first>Sameer</first><last>Pradhan</last></editor>
+      <editor id="massimo-poesio"><first>Massimo</first><last>Poesio</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Singapore</address>
       <month>December</month>
@@ -22,8 +22,8 @@
     <paper id="1">
       <title>Filling in the Gaps: Efficient Event Coreference Resolution using Graph Autoencoder Networks</title>
       <author><first>Loic</first><last>De Langhe</last></author>
-      <author><first>Orphee</first><last>De Clercq</last></author>
-      <author><first>Veronique</first><last>Hoste</last></author>
+      <author id="orphee-de-clercq"><first>Orphee</first><last>De Clercq</last></author>
+      <author id="veronique-hoste"><first>Veronique</first><last>Hoste</last></author>
       <pages>1–7</pages>
       <url hash="2d278a0a">2023.crac-main.1</url>
       <bibkey>de-langhe-etal-2023-filling</bibkey>
@@ -45,7 +45,7 @@
     <paper id="3">
       <title>Towards Transparency in Coreference Resolution: A Quantum-Inspired Approach</title>
       <author><first>Hadi</first><last>Wazni</last></author>
-      <author><first>Mehrnoosh</first><last>Sadrzadeh</last></author>
+      <author id="mehrnoosh-sadrzadeh"><first>Mehrnoosh</first><last>Sadrzadeh</last></author>
       <pages>15–27</pages>
       <url hash="4da5af23">2023.crac-main.3</url>
       <bibkey>wazni-sadrzadeh-2023-towards</bibkey>
@@ -55,7 +55,7 @@
       <title>Scalar Anaphora: Annotating Degrees of Coreference in Text</title>
       <author><first>Bingyang</first><last>Ye</last></author>
       <author><first>Jingxuan</first><last>Tu</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <pages>28–38</pages>
       <url hash="49f1d399">2023.crac-main.4</url>
       <bibkey>ye-etal-2023-scalar</bibkey>
@@ -119,7 +119,7 @@
     <paper id="9">
       <title>Integrated Annotation of Event Structure, Object States, and Entity Coreference</title>
       <author><first>Kyeongmin</first><last>Rim</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <pages>71–77</pages>
       <url hash="501377da">2023.crac-main.9</url>
       <bibkey>rim-pustejovsky-2023-integrated</bibkey>
@@ -129,7 +129,7 @@
   <volume id="sharedtask" ingest-date="2023-12-06" type="proceedings">
     <meta>
       <booktitle>Proceedings of the CRAC 2023 Shared Task on Multilingual Coreference Resolution</booktitle>
-      <editor><first>Zdeněk</first><last>Žabokrtský</last></editor>
+      <editor id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></editor>
       <editor><first>Maciej</first><last>Ogrodniczuk</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Singapore</address>
@@ -153,7 +153,7 @@
       <author><first>Martin</first><last>Popel</last></author>
       <author><first>Ondrej</first><last>Prazak</last></author>
       <author><first>Jakub</first><last>Sido</last></author>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <pages>1–18</pages>
       <abstract>This paper summarizes the second edition of the shared task on multilingual coreference resolution, held with the CRAC 2023 workshop. Just like last year, participants of the shared task were to create trainable systems that detect mentions and group them based on identity coreference; however, this year’s edition uses a slightly different primary evaluation score, and is also broader in terms of covered languages: version 1.1 of the multilingual collection of harmonized coreference resources CorefUD was used as the source of training and evaluation data this time, with 17 datasets for 12 languages. 7 systems competed in this shared task.</abstract>
       <url hash="1913e31c">2023.crac-sharedtask.1</url>
@@ -177,7 +177,7 @@
       <title>Neural End-to-End Coreference Resolution using Morphological Information</title>
       <author><first>Tuğba</first><last>Pamay Arslan</last></author>
       <author><first>Kutay</first><last>Acar</last></author>
-      <author><first>Gülşen</first><last>Eryiğit</last></author>
+      <author id="gulsen-eryigit"><first>Gülşen</first><last>Eryiğit</last></author>
       <pages>34–40</pages>
       <abstract>In morphologically rich languages, words consist of morphemes containing deeper information in morphology, and thus such languages may necessitate the use of morpheme-level representations as well as word representations. This study introduces a neural multilingual end-to-end coreference resolution system by incorporating morphological information in transformer-based word embeddings on the baseline model. This proposed model participated in the Sixth Workshop on Computational Models of Reference, Anaphora and Coreference (CRAC 2023). Including morphological information explicitly into the coreference resolution improves the performance, especially in morphologically rich languages (e.g., Catalan, Hungarian, and Turkish). The introduced model outperforms the baseline system by 2.57 percentage points on average by obtaining 59.53% CoNLL F-score.</abstract>
       <url hash="aaf6fb69">2023.crac-sharedtask.3</url>
@@ -197,7 +197,7 @@
     <paper id="5">
       <title><fixed-case>M</fixed-case>c<fixed-case>G</fixed-case>ill at <fixed-case>CRAC</fixed-case> 2023: Multilingual Generalization of Entity-Ranking Coreference Resolution Models</title>
       <author><first>Ian</first><last>Porada</last></author>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <pages>52–57</pages>
       <abstract>Our submission to the CRAC 2023 shared task, described herein, is an adapted entity-ranking model jointly trained on all 17 datasets spanning 12 languages. Our model outperforms the shared task baselines by a difference in F1 score of +8.47, achieving an ultimate F1 score of 65.43 and fourth place in the shared task. We explore design decisions related to data preprocessing, the pretrained encoder, and data mixing.</abstract>
       <url hash="50ce88ff">2023.crac-sharedtask.5</url>
diff --git a/data/xml/2023.crowdmt.xml b/data/xml/2023.crowdmt.xml
index 39e574df96..25bca81f86 100644
--- a/data/xml/2023.crowdmt.xml
+++ b/data/xml/2023.crowdmt.xml
@@ -3,13 +3,13 @@
   <volume id="1" ingest-date="2023-08-27" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 1st Workshop on Open Community-Driven Machine Translation</booktitle>
-      <editor><first>Miquel</first><last>Esplà-Gomis</last></editor>
-      <editor><first>Mikel L.</first><last>Forcada</last></editor>
+      <editor id="miquel-espla-gomis"><first>Miquel</first><last>Esplà-Gomis</last></editor>
+      <editor id="mikel-l-forcada"><first>Mikel L.</first><last>Forcada</last></editor>
       <editor><first>Taja</first><last>Kuzman</last></editor>
       <editor><first>Nikola</first><last>Ljubešić</last></editor>
       <editor><first>Rik</first><last>van Noord</last></editor>
-      <editor><first>Gema</first><last>Ramírez-Sánchez</last></editor>
-      <editor><first>Jörg</first><last>Tiedemann</last></editor>
+      <editor id="gema-ramirez-sanchez"><first>Gema</first><last>Ramírez-Sánchez</last></editor>
+      <editor id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></editor>
       <editor><first>Antonio</first><last>Toral</last></editor>
       <publisher>European Association for Machine Translation</publisher>
       <address>Tampere, Finland</address>
diff --git a/data/xml/2023.cs4oa.xml b/data/xml/2023.cs4oa.xml
index 1941cae0cd..9b5e7f8a03 100644
--- a/data/xml/2023.cs4oa.xml
+++ b/data/xml/2023.cs4oa.xml
@@ -48,7 +48,7 @@
       <title>Just Collect, Don’t Filter: Noisy Labels Do Not Improve Counterspeech Collection for Languages Without Annotated Resources</title>
       <author><first>Pauline</first><last>Möhle</last></author>
       <author><first>Matthias</first><last>Orlikowski</last></author>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <pages>44-61</pages>
       <abstract>Counterspeech on social media is rare. Consequently, it is difficult to collect naturally occurring examples, in particular for languages without annotated datasets. In this work, we study methods to increase the relevance of social media samples for counterspeech annotation when we lack annotated resources. We use the example of sourcing German data for counterspeech annotations from Twitter. We monitor tweets from German politicians and activists to collect replies. To select relevant replies we a) find replies that match German abusive keywords or b) label replies for counterspeech using a multilingual classifier fine-tuned on English data. For both approaches and a baseline setting, we annotate a random sample and use bootstrap sampling to estimate the amount of counterspeech. We find that neither the multilingual model nor the keyword approach achieve significantly higher counts of true counterspeech than the baseline. Thus, keyword lists or multi-lingual classifiers are likely not worth the added complexity beyond purposive data collection: Already without additional filtering, we gather a meaningful sample with 7,4% true counterspeech.</abstract>
       <url hash="037a72fa">2023.cs4oa-1.4</url>
diff --git a/data/xml/2023.cxgsnlp.xml b/data/xml/2023.cxgsnlp.xml
index a49bba20d5..d8eeb1c24c 100644
--- a/data/xml/2023.cxgsnlp.xml
+++ b/data/xml/2023.cxgsnlp.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2023-02-25" type="proceedings">
     <meta>
       <booktitle>Proceedings of the First International Workshop on Construction Grammars and NLP (CxGs+NLP, GURT/SyntaxFest 2023)</booktitle>
-      <editor><first>Claire</first><last>Bonial</last></editor>
+      <editor id="claire-bonial"><first>Claire</first><last>Bonial</last></editor>
       <editor><first>Harish</first><last>Tayyar Madabushi</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Washington, D.C.</address>
@@ -37,7 +37,7 @@
     <paper id="3">
       <title><fixed-case>CAL</fixed-case>a<fixed-case>M</fixed-case>o: a Constructionist Assessment of Language Models</title>
       <author><first>Ludovica</first><last>Pannitto</last></author>
-      <author><first>Aurélie</first><last>Herbelot</last></author>
+      <author id="aurelie-herbelot"><first>Aurélie</first><last>Herbelot</last></author>
       <pages>21-30</pages>
       <abstract>This paper presents a novel framework for evaluating Neural Language Models’ linguistic abilities using a constructionist approach. Not only is the usage-based model in line with the un- derlying stochastic philosophy of neural architectures, but it also allows the linguist to keep meaning as a determinant factor in the analysis. We outline the framework and present two possible scenarios for its application.</abstract>
       <url hash="e3507152">2023.cxgsnlp-1.3</url>
@@ -56,7 +56,7 @@
     <paper id="5">
       <title>Constructivist Tokenization for <fixed-case>E</fixed-case>nglish</title>
       <author><first>Allison</first><last>Fan</last></author>
-      <author><first>Weiwei</first><last>Sun</last></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last></author>
       <pages>36-40</pages>
       <abstract>This paper revisits tokenization from a theoretical perspective, and argues for the necessity of a constructivist approach to tokenization for semantic parsing and modeling language acquisition. We consider two problems: (1) (semi-) automatically converting existing lexicalist annotations, e.g. those of the Penn TreeBank, into constructivist annotations, and (2) automatic tokenization of raw texts. We demonstrate that (1) a heuristic rule-based constructivist tokenizer is able to yield relatively satisfactory accuracy when gold standard Penn TreeBank part-of-speech tags are available, but that some manual annotations are still necessary to obtain gold standard results, and (2) a neural tokenizer is able to provide accurate automatic constructivist tokenization results from raw character sequences. Our research output also includes a set of high-quality morpheme-tokenized corpora, which enable the training of computational models that more closely align with language comprehension and acquisition.</abstract>
       <url hash="0963dbef">2023.cxgsnlp-1.5</url>
@@ -87,7 +87,7 @@
     <paper id="8">
       <title>Investigating Stylistic Profiles for the Task of Empathy Classification in Medical Narrative Essays</title>
       <author><first>Priyanka</first><last>Dey</last></author>
-      <author><first>Roxana</first><last>Girju</last></author>
+      <author id="roxana-girju"><first>Roxana</first><last>Girju</last></author>
       <pages>63-74</pages>
       <abstract>One important aspect of language is how speakers generate utterances and texts to convey their intended meanings. In this paper, we bring various aspects of the Construction Grammar (CxG) and the Systemic Functional Grammar (SFG) theories in a deep learning computational framework to model empathic language. Our corpus consists of 440 essays written by premed students as narrated simulated patient–doctor interactions. We start with baseline classifiers (state-of-the-art recurrent neural networks and transformer models). Then, we enrich these models with a set of linguistic constructions proving the importance of this novel approach to the task of empathy classification for this dataset. Our results indicate the potential of such constructions to contribute to the overall empathy profile of first-person narrative essays.</abstract>
       <url hash="fc18c12d">2023.cxgsnlp-1.8</url>
@@ -111,8 +111,8 @@
       <author><first>Taiqi</first><last>He</last></author>
       <author><first>Naoki</first><last>Otani</last></author>
       <author><first>David</first><last>R. Mortensen</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>85-95</pages>
       <abstract>Construction Grammar (CxG) has recently been used as the basis for probing studies that have investigated the performance of large pretrained language models (PLMs) with respect to the structure and meaning of constructions. In this position paper, we make suggestions for the continuation and augmentation of this line of research. We look at probing methodology that was not designed with CxG in mind, as well as probing methodology that was designed for specific constructions. We analyse selected previous work in detail, and provide our view of the most important challenges and research questions that this promising new field faces.</abstract>
       <url hash="58154862">2023.cxgsnlp-1.10</url>
diff --git a/data/xml/2023.depling.xml b/data/xml/2023.depling.xml
index 7036d3a3a9..60a1c348d3 100644
--- a/data/xml/2023.depling.xml
+++ b/data/xml/2023.depling.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2023-02-26" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Seventh International Conference on Dependency Linguistics (Depling, GURT/SyntaxFest 2023)</booktitle>
-      <editor><first>Owen</first><last>Rambow</last><affiliation>Stony Brook University</affiliation></editor>
+      <editor id="owen-rambow"><first>Owen</first><last>Rambow</last><affiliation>Stony Brook University</affiliation></editor>
       <editor><first>François</first><last>Lareau</last><affiliation>Université de Montréal</affiliation></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Washington, D.C.</address>
@@ -100,7 +100,7 @@
       <author><first>Simon</first><last>Mille</last></author>
       <author><first>Josep</first><last>Ricci</last></author>
       <author><first>Alexander</first><last>Shvets</last></author>
-      <author><first>Anya</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anya</first><last>Belz</last></author>
       <pages>91-101</pages>
       <abstract>We present work in progress that aims to address the coverage issue faced by rule-based text generators. We propose a pipeline for extracting abstract dependency template (predicate-argument structures) from Wikipedia text to be used as input for generating text from structured data with the FORGe system. The pipeline comprises three main components: (i) candidate sentence retrieval, (ii) clause extraction, ranking and selection, and (iii) conversion to predicate-argument form. We present an approach and preliminary evaluation for the ranking and selection module.</abstract>
       <url hash="1b323973">2023.depling-1.9</url>
diff --git a/data/xml/2023.dialdoc.xml b/data/xml/2023.dialdoc.xml
index 1ebca44b30..9172f98612 100644
--- a/data/xml/2023.dialdoc.xml
+++ b/data/xml/2023.dialdoc.xml
@@ -59,10 +59,10 @@
     <paper id="4">
       <title>Position Matters! Empirical Study of Order Effect in Knowledge-grounded Dialogue</title>
       <author><first>Hsuan</first><last>Su</last></author>
-      <author><first>Shachi</first><last>H. Kumar</last><affiliation>Intel Labs</affiliation></author>
+      <author id="shachi-h-kumar"><first>Shachi</first><last>H. Kumar</last><affiliation>Intel Labs</affiliation></author>
       <author><first>Sahisnu</first><last>Mazumder</last><affiliation>Intel Labs, USA</affiliation></author>
       <author><first>Wenda</first><last>Chen</last></author>
-      <author><first>Ramesh</first><last>Manuvinakurike</last></author>
+      <author id="ramesh-manuvinakurike"><first>Ramesh</first><last>Manuvinakurike</last></author>
       <author><first>Eda</first><last>Okur</last><affiliation>Intel Labs</affiliation></author>
       <author><first>Saurav</first><last>Sahay</last><affiliation>Intel</affiliation></author>
       <author><first>Lama</first><last>Nachman</last></author>
@@ -116,7 +116,7 @@
       <author><first>Michael</first><last>Lucke</last></author>
       <author><first>Sheena</first><last>Dufresne</last></author>
       <author><first>Maria</first><last>Gini</last><affiliation>University of Minnesota , Twin Ciities</affiliation></author>
-      <author><first>Serguei</first><last>Pakhomov</last><affiliation>University of Minnesota - Twin Cities</affiliation></author>
+      <author id="serguei-pakhomov"><first>Serguei</first><last>Pakhomov</last><affiliation>University of Minnesota - Twin Cities</affiliation></author>
       <pages>68-79</pages>
       <abstract>In healthcare, the ability to care for oneself is reflected in the “Activities of Daily Living (ADL),” which serve as a measure of functional ability (functioning). A lack of functioning may lead to poor living conditions requiring personal care and assistance. To accurately identify those in need of support, assistance programs continuously evaluate participants’ functioning across various domains. However, the assessment process may encounter consistency issues when multiple assessors with varying levels of expertise are involved. Novice assessors, in particular, may lack the necessary preparation for real-world interactions with participants. To address this issue, we developed a dialogue system that simulates interactions between assessors and individuals of varying functioning in a natural and reproducible way. The dialogue system consists of two major modules, one for natural language understanding (NLU) and one for natural language generation (NLG), respectively. In order to generate responses consistent with the underlying knowledge base, the dialogue system requires both an understanding of the user’s query and of biographical details of an individual being simulated. To fulfill this requirement, we experimented with query classification and generated responses based on those biographical details using some recently released InstructGPT-like models.</abstract>
       <url hash="0bbc9bd3">2023.dialdoc-1.8</url>
@@ -130,7 +130,7 @@
       <author><first>Qi</first><last>Zeng</last></author>
       <author><first>Revanth</first><last>Gangi Reddy</last></author>
       <author><first>Heng</first><last>Ji</last></author>
-      <author><first>ChengXiang</first><last>Zhai</last></author>
+      <author id="chengxiang-zhai"><first>ChengXiang</first><last>Zhai</last></author>
       <pages>80-85</pages>
       <abstract>Existing reference-free turn-level evaluation metrics for chatbots inadequately capture the interaction between the user and the system. Consequently, they often correlate poorly with human evaluations. To address this issue, we propose a novel model-agnostic approach that leverages Conditional Pointwise Mutual Information (C-PMI) to measure the turn-level interaction between the system and the user based on a given evaluation dimension. Experimental results on the widely used FED dialogue evaluation dataset demonstrate that our approach significantly improves the correlation with human judgment compared with existing evaluation systems. By replacing the negative log-likelihood-based scorer with our proposed C-PMI scorer, we achieve a relative 60.5% higher Spearman correlation on average for the FED evaluation metric. Our code is publicly available at <url>https://github.com/renll/C-PMI</url>.</abstract>
       <url hash="6cc760c3">2023.dialdoc-1.9</url>
@@ -144,8 +144,8 @@
       <author><first>Wei</first><last>Fang</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <author><first>Hongyin</first><last>Luo</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <author><first>Xixin</first><last>Wu</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
-      <author><first>Helen</first><last>Meng</last></author>
-      <author><first>James</first><last>Glass</last></author>
+      <author id="helen-meng"><first>Helen</first><last>Meng</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
       <pages>86-100</pages>
       <abstract>Collecting and constructing human-annotated corpora for training conversational question-answering (CQA) models has recently been shown to be inefficient and costly. To solve this problem, previous works have proposed training QA models with automatically generated QA data. In this work, we extend earlier studies on QA synthesis, and propose an efficient QA data generation algorithm under conversational settings. Our model recognizes potential dialogue topics, generates corresponding questions, and extracts answers from grounding passages. To improve the quality of generated QAs and downstream self-training of CQA models, we propose dropout and agreement-based QA selection methods. We conduct experiments on both data augmentation and domain adaptation settings. Experiments on the QuAC and Doc2Dial tasks show that the proposed method can significantly improve the quality of generated QA data, and also improves the accuracy of self-trained CQA models based on the constructed training corpora.</abstract>
       <url hash="93e99020">2023.dialdoc-1.10</url>
@@ -159,7 +159,7 @@
       <author><first>Mitali</first><last>Potnis</last></author>
       <author><first>Srijan</first><last>Bansal</last></author>
       <author><first>Teruko</first><last>Mitamura</last><affiliation>Carnegie Mellon University</affiliation></author>
-      <author><first>Eric</first><last>Nyberg</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last><affiliation>Carnegie Mellon University</affiliation></author>
       <pages>101-108</pages>
       <abstract>The DialDoc 2023 shared task has expanded the document-grounded dialogue task to encompass multiple languages, despite having limited annotated data. This paper assesses the effectiveness of both language-agnostic and language-aware paradigms for multilingual pre-trained transformer models in a bi-encoder-based dense passage retriever (DPR), concluding that the language-agnostic approach is superior. Additionally, the study investigates the impact of query rewriting techniques using large language models, such as ChatGPT, on multilingual, document-grounded question-answering systems. The experiments conducted demonstrate that, for the examples examined, query rewriting does not enhance performance compared to the original queries. This failure is due to topic switching in final dialogue turns and irrelevant topics being considered for query rewriting.</abstract>
       <url hash="6f28e1a4">2023.dialdoc-1.11</url>
@@ -171,7 +171,7 @@
       <author><first>Ehsan</first><last>Lotfi</last><affiliation>Universiteit Antwerpen</affiliation></author>
       <author><first>Maxime</first><last>De Bruyn</last><affiliation>Antwerp University</affiliation></author>
       <author><first>Jeska.buhmann@uantwerpen.be</first><last>Jeska.buhmann@uantwerpen.be</last><affiliation>NA</affiliation></author>
-      <author><first>Walter</first><last>Daelemans</last><affiliation>University of Antwerp</affiliation></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last><affiliation>University of Antwerp</affiliation></author>
       <pages>109-121</pages>
       <abstract>Crowd-sourcing has been one of the primary ways to curate conversational data, specially for certain scenarios like grounding in knowledge. In this setting, using online platforms like AMT, non-expert participants are hired to converse with each other, following instructions which try to guide the outcome towards the desired format. The resulting data then is used for different parts of dialog modelling like knowledge selection and response selection/generation. In this work, we take a closer look into two of the most popular knowledge grounded dialog (KGD) datasets. Investigating potential biases and artefacts in knowledge selection labels, we observe that in many cases the ‘knowledge selection flow’ simply follows the order of presented knowledge pieces. In Wizard of Wikipedia (the most popular KGD dataset) we use simple content-agnostic models based on this bias to get significant knowledge selection performance. In Topical-Chat we see a similar correlation between the knowledge selection sequence and the order of entities and their segments, as provided to crowd-source workers. We believe that the observed results, question the significance and origin of the presumed dialog-level attributes like ‘knowledge flow’ in these crowd-sourced datasets.</abstract>
       <url hash="6653752c">2023.dialdoc-1.12</url>
diff --git a/data/xml/2023.disrpt.xml b/data/xml/2023.disrpt.xml
index d72d6098e9..f5411385c1 100644
--- a/data/xml/2023.disrpt.xml
+++ b/data/xml/2023.disrpt.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the 3rd Shared Task on Discourse Relation Parsing and Treebanking (DISRPT 2023)</booktitle>
       <editor><first>Chloé</first><last>Braud</last><affiliation>Irit, Cnrs</affiliation></editor>
-      <editor><first>Yang Janet</first><last>Liu</last><affiliation>Georgetown University</affiliation></editor>
+      <editor id="yang-janet-liu"><first>Yang Janet</first><last>Liu</last><affiliation>Georgetown University</affiliation></editor>
       <editor><first>Eleni</first><last>Metheniti</last><affiliation>IRIT, University of Toulouse</affiliation></editor>
       <editor><first>Philippe</first><last>Muller</last><affiliation>IRIT, University of Toulouse</affiliation></editor>
       <editor><first>Laura</first><last>Rivière</last><affiliation>Irit</affiliation></editor>
@@ -59,7 +59,7 @@
     </paper>
     <paper id="4">
       <title><fixed-case>HITS</fixed-case> at <fixed-case>DISRPT</fixed-case> 2023: Discourse Segmentation, Connective Detection, and Relation Classification</title>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last><affiliation>Heidelberg Institute for Theoretical Studies</affiliation></author>
+      <author><first>Wei</first><last>Liu</last><affiliation>Heidelberg Institute for Theoretical Studies</affiliation></author>
       <author><first>Yi</first><last>Fan</last><affiliation>Heidelberg Institute for Theoretical Studies</affiliation></author>
       <author><first>Michael</first><last>Strube</last><affiliation>Heidelberg Institute for Theoretical Studies</affiliation></author>
       <pages>43-49</pages>
diff --git a/data/xml/2023.dmr.xml b/data/xml/2023.dmr.xml
index f23c746c71..b204670ab8 100644
--- a/data/xml/2023.dmr.xml
+++ b/data/xml/2023.dmr.xml
@@ -50,10 +50,10 @@
     </paper>
     <paper id="4">
       <title><fixed-case>A</fixed-case>bstract <fixed-case>M</fixed-case>eaning <fixed-case>R</fixed-case>epresentation for Grounded Human-Robot Communication</title>
-      <author><first>Claire</first><last>Bonial</last></author>
+      <author id="claire-bonial"><first>Claire</first><last>Bonial</last></author>
       <author><first>Julie</first><last>Foresta</last></author>
       <author><first>Nicholas C.</first><last>Fung</last></author>
-      <author><first>Cory J.</first><last>Hayes</last></author>
+      <author id="cory-hayes"><first>Cory J.</first><last>Hayes</last></author>
       <author><first>Philip</first><last>Osteen</last></author>
       <author><first>Jacob</first><last>Arkin</last></author>
       <author><first>Benned</first><last>Hedegaard</last></author>
@@ -68,7 +68,7 @@
       <author><first>Christopher</first><last>Tam</last></author>
       <author><first>Richard</first><last>Brutti</last></author>
       <author><first>Kenneth</first><last>Lai</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <pages>45–51</pages>
       <abstract>Actions are critical for interpreting dialogue: they provide context for demonstratives and definite descriptions in discourse, and they continually update the common ground. This paper describes how Abstract Meaning Representation (AMR) can be used to annotate actions in multimodal human-human and human-object interactions. We conduct initial annotations of shared task and first-person point-of-view videos. We show that AMRs can be interpreted by a proxy language, such as VoxML, as executable annotation structures in order to recreate and simulate a series of annotated events.</abstract>
       <url hash="0185db45">2023.dmr-1.5</url>
@@ -118,12 +118,12 @@
       <title><fixed-case>UMR</fixed-case> Annotation of Multiword Expressions</title>
       <author><first>Julia</first><last>Bonn</last></author>
       <author><first>Andrew</first><last>Cowell</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
       <author><first>Alexis</first><last>Palmer</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <author><first>Haibo</first><last>Sun</last></author>
-      <author><first>Zdenka</first><last>Uresova</last></author>
+      <author id="zdenka-uresova"><first>Zdenka</first><last>Uresova</last></author>
       <author><first>Shira</first><last>Wein</last></author>
       <author><first>Nianwen</first><last>Xue</last></author>
       <author><first>Jin</first><last>Zhao</last></author>
@@ -157,7 +157,7 @@
       <author><first>Damián Ariel</first><last>Furman</last></author>
       <author><first>Pablo</first><last>Torres</last></author>
       <author><first>José A.</first><last>Rodríguez</last></author>
-      <author><first>Laura</first><last>Alonso Alemany</last></author>
+      <author id="laura-alonso-alemany"><first>Laura</first><last>Alonso Alemany</last></author>
       <author><first>Diego</first><last>Letzen</last></author>
       <author><first>Vanina</first><last>Martínez</last></author>
       <pages>136–153</pages>
diff --git a/data/xml/2023.dravidianlangtech.xml b/data/xml/2023.dravidianlangtech.xml
index 80d8038862..38cc34e476 100644
--- a/data/xml/2023.dravidianlangtech.xml
+++ b/data/xml/2023.dravidianlangtech.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the Third Workshop on Speech and Language Technologies for Dravidian Languages</booktitle>
       <editor><first>Bharathi R.</first><last>Chakravarthi</last></editor>
       <editor><first>Ruba</first><last>Priyadharshini</last></editor>
-      <editor><first>Anand Kumar</first><last>M</last></editor>
+      <editor id="anand-kumar-m"><first>Anand Kumar</first><last>M</last></editor>
       <editor><first>Sajeetha</first><last>Thavareesan</last></editor>
       <editor><first>Elizabeth</first><last>Sherly</last></editor>
       <publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
@@ -306,7 +306,7 @@
       <author><first>Zahra</first><last>Ahani</last></author>
       <author><first>Olga</first><last>Kolesnikova</last></author>
       <author><first>Grigori</first><last>Sidorov</last></author>
-      <author><first>Alexander</first><last>Gelbukh</last></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last></author>
       <pages>180–185</pages>
       <abstract>With the prevalence of code-mixing among speakers of Dravidian languages, DravidianLangTech proposed the shared task on Sentiment Analysis in Tamil and Tulu at RANLP 2023. This paper presents the submission of LIDOMA, which proposes a methodology that combines lexical features and Convolutional Neural Networks (CNNs) to address the challenge. A fine-tuned 6-layered CNN model is employed, achieving macro F1 scores of 0.542 and 0.199 for Tulu and Tamil, respectively</abstract>
       <url hash="7642bba3">2023.dravidianlangtech-1.25</url>
@@ -408,7 +408,7 @@
       <author><first>Tadesse</first><last>Kebede</last></author>
       <author><first>Olga</first><last>Kolesnikova</last></author>
       <author><first>Grigori</first><last>Sidorov</last></author>
-      <author><first>Alexander</first><last>Gelbukh</last></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last></author>
       <pages>239–243</pages>
       <abstract>This research paper focuses on sentiment analysis of Tamil and Tulu texts using a BERT model and an RNN model. The BERT model, which was pretrained, achieved satisfactory performance for the Tulu language, with a Macro F1 score of 0.352. On the other hand, the RNN model showed good performance for Tamil language sentiment analysis, obtaining a Macro F1 score of 0.208. As future work, the researchers aim to fine-tune the models to further improve their results after the training process.</abstract>
       <url hash="092d2c47">2023.dravidianlangtech-1.35</url>
@@ -420,7 +420,7 @@
       <author><first>Selam</first><last>Kanta</last></author>
       <author><first>Olga</first><last>Kolesnikova</last></author>
       <author><first>Grigori</first><last>Sidorov</last></author>
-      <author><first>Alexander</first><last>Gelbukh</last></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last></author>
       <pages>244–249</pages>
       <abstract>This research focuses on identifying abusive language in comments. The study utilizes deep learning models, including Long Short-Term Memory (LSTM) and Recurrent Neural Networks (RNNs), to analyze linguistic patterns. Specifically, the LSTM model, a type of RNN, is used to understand the context by capturing long-term dependencies and intricate patterns in the input sequences. The LSTM model achieves better accuracy and is enhanced through the addition of a dropout layer and early stopping. For detecting abusive language in Telugu and Tamil-English, an LSTM model is employed, while in Tamil abusive language detection, a word-level RNN is developed to identify abusive words. These models process text sequentially, considering overall content and capturing contextual dependencies.</abstract>
       <url hash="1967ec5a">2023.dravidianlangtech-1.36</url>
diff --git a/data/xml/2023.dstc.xml b/data/xml/2023.dstc.xml
index 7e5f65f0f5..ef24365d8c 100644
--- a/data/xml/2023.dstc.xml
+++ b/data/xml/2023.dstc.xml
@@ -12,7 +12,7 @@
       <editor><first>Behnam</first><last>Hedayatnia</last></editor>
       <editor><first>Satwik</first><last>Kottur</last></editor>
       <editor><first>Seungwhan</first><last>Moon</last></editor>
-      <editor id="chen-zhang"><first>Chen</first><last>Zhang</last></editor>
+      <editor><first>Chen</first><last>Zhang</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Prague, Czech Republic</address>
       <month>September</month>
@@ -23,7 +23,7 @@
     <paper id="1">
       <title>Exploring Prompt-based Multi-task Learning for Multimodal Dialog State Tracking and Immersive Multimodal Conversation</title>
       <author><first>Yirong</first><last>Chen</last><affiliation>South China University of Technology</affiliation></author>
-      <author id="ya-li"><first>Ya</first><last>Li</last><affiliation>IFLYTEK Research</affiliation></author>
+      <author><first>Ya</first><last>Li</last><affiliation>IFLYTEK Research</affiliation></author>
       <author><first>Tao</first><last>Wang</last><affiliation>iFLYTEK Research</affiliation></author>
       <author><first>Xiaofen</first><last>Xing</last><affiliation>South China University of Technology</affiliation></author>
       <author><first>Xiangmin</first><last>Xu</last><affiliation>South China University of Technology</affiliation></author>
@@ -78,7 +78,7 @@
     <paper id="5">
       <title>Multi-Stage Coarse-to-Fine Contrastive Learning for Conversation Intent Induction</title>
       <author><first>Caiyuan</first><last>Chu</last><affiliation>Chongqing University</affiliation></author>
-      <author id="ya-li"><first>Ya</first><last>Li</last><affiliation>IFLYTEK Research</affiliation></author>
+      <author><first>Ya</first><last>Li</last><affiliation>IFLYTEK Research</affiliation></author>
       <author><first>Yifan</first><last>Liu</last><affiliation>IFLYTEK Research</affiliation></author>
       <author><first>Jia-Chen</first><last>Gu</last><affiliation>University of Science and Technology of China</affiliation></author>
       <author><first>Quan</first><last>Liu</last><affiliation>iFLYTEK Research</affiliation></author>
@@ -116,7 +116,7 @@
       <author><first>Jeiyoon</first><last>Park</last><affiliation>LLSOLLU</affiliation></author>
       <author><first>Yoonna</first><last>Jang</last><affiliation>Korea University</affiliation></author>
       <author><first>Chanhee</first><last>Lee</last><affiliation>Naver</affiliation></author>
-      <author><first>Heuiseok</first><last>Lim</last><affiliation>Korea University</affiliation></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last><affiliation>Korea University</affiliation></author>
       <pages>57-66</pages>
       <abstract>The focus of this work is to investigate unsupervised approaches to overcome quintessential challenges in designing task-oriented dialog schema: assigning intent labels to each dialog turn (intent clustering) and generating a set of intents based on the intent clustering methods (intent induction). We postulate there are two salient factors for automatic induction of intents: (1) clustering algorithm for intent labeling and (2) user utterance embedding space. We compare existing off-the-shelf clustering models and embeddings based on DSTC11 evaluation. Our extensive experiments demonstrate that the combined selection of utterance embedding and clustering method in the intent induction task should be carefully considered. We also present that pretrained MiniLM with Agglomerative clustering shows significant improvement in NMI, ARI, F1, accuracy and example coverage in intent induction tasks. The source codes are available at https://github.com/Jeiyoon/dstc11-track2.</abstract>
       <url hash="f790544d">2023.dstc-1.8</url>
@@ -163,7 +163,7 @@
       <author><first>Léo</first><last>Jacqmin</last><affiliation>Orange &amp; Aix-Marseille University</affiliation></author>
       <author><first>Lucas</first><last>Druart</last><affiliation>Orange &amp; Avignon University</affiliation></author>
       <author><first>Yannick</first><last>Estève</last><affiliation>LIA - Avignon University</affiliation></author>
-      <author><first>Benoît</first><last>Favre</last><affiliation>Lab. Informatique et Systèmes / Aix-Marseille University / CNRS</affiliation></author>
+      <author id="benoit-favre"><first>Benoît</first><last>Favre</last><affiliation>Lab. Informatique et Systèmes / Aix-Marseille University / CNRS</affiliation></author>
       <author><first>Lina</first><last>M Rojas</last><affiliation>Orange</affiliation></author>
       <author><first>Valentin</first><last>Vielzeuf</last><affiliation>Orange Labs</affiliation></author>
       <pages>95-104</pages>
@@ -176,7 +176,7 @@
       <author><first>Ridong</first><last>Jiang</last><affiliation>Institute for Infocomm Research</affiliation></author>
       <author><first>Wei</first><last>Shi</last><affiliation>I2R</affiliation></author>
       <author><first>Bin</first><last>Wang</last><affiliation>National University of Singapore</affiliation></author>
-      <author id="chen-zhang"><first>Chen</first><last>Zhang</last><affiliation>National University of Singapore</affiliation></author>
+      <author><first>Chen</first><last>Zhang</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Yan</first><last>Zhang</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Chunlei</first><last>Pan</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Jung</first><last>Jae Kim</last><affiliation>I2R</affiliation></author>
@@ -215,7 +215,7 @@
       <author><first>Patrícia</first><last>Pereira</last><affiliation>Instituto Superior Técnico / INESC-ID</affiliation></author>
       <author><first>Helena</first><last>Moniz</last><affiliation>INESC-ID</affiliation></author>
       <author><first>Joao</first><last>Paulo Carvalho</last><affiliation>INESC-ID / Instituto Superior Técnico, University of Lisbon, Portugal</affiliation></author>
-      <author><first>Alon</first><last>Lavie</last><affiliation>Unbabel</affiliation></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last><affiliation>Unbabel</affiliation></author>
       <author><first>Isabel</first><last>Trancoso</last><affiliation>IST / INESC-ID</affiliation></author>
       <pages>133-143</pages>
       <abstract>Despite significant research effort in the development of automatic dialogue evaluation metrics, little thought is given to evaluating dialogues other than in English. At the same time, ensuring metrics are invariant to semantically similar responses is also an overlooked topic. In order to achieve the desired properties of robustness and multilinguality for dialogue evaluation metrics, we propose a novel framework that takes advantage of the strengths of current evaluation models with the newly-established paradigm of prompting Large Language Models (LLMs). Empirical results show our framework achieves state of the art results in terms of mean Spearman correlation scores across several benchmarks and ranks first place on both the Robust and Multilingual tasks of the DSTC11 Track 4 “Automatic Evaluation Metrics for Open-Domain Dialogue Systems”, proving the evaluation capabilities of prompted LLMs.</abstract>
@@ -308,7 +308,7 @@
       <author><first>Changxin</first><last>Ke</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <author><first>Churui</first><last>Sun</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <author><first>Longxuan</first><last>Ma</last><affiliation>Harbin Institute of Technology</affiliation></author>
-      <author><first>Wei-Nan</first><last>Zhang</last><affiliation>Harbin Institute of Technology</affiliation></author>
+      <author id="weinan-zhang"><first>Wei-Nan</first><last>Zhang</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <author><first>Ting</first><last>Liu</last><affiliation>哈尔滨工业大学</affiliation></author>
       <pages>216-225</pages>
       <abstract>We participate in the 11th Dialog System Technology Challenges (DSTC) track-5 called Task-oriented Conversational Modeling with Subjective Knowledge. Introducing subjective knowledge into task-oriented dialogue (TOD) can help the DS to understand variables of subjective user needs and to suit more dialogue scenarios. Track-5 includes several sub-tasks: 1) knowledge-seeking turn detection; 2) knowledge entity tracking; 3) knowledge entry selection; and 4) use of the selected knowledge entries for response generation. Besides the challenges of each sub-tasks own, there are two challenges across different sub-tasks. The first is that there are multiple valid knowledge entries for each knowledge-seeking turn, the accuracy of the knowledge entry selection is important for the quality of response generation. The second challenge is how to address the unseen dialogue/entities/entries in the validation and the test set. In this paper, we propose a difference-aware ensemble method to address these sub-tasks and the two challenges mentioned above. Our method helps to obtain more robust results and performs well on unseen instances. Among all the submissions for the test set, our method ranks 1st on the knowledge-seeking turn detection task and achieves 3rd on the overall automatic evaluation score. Our code and data will be released on GitHub.</abstract>
@@ -358,13 +358,13 @@
 
 for Open-Domain Dialogue Systems at <fixed-case>DSTC</fixed-case> 11 Track 4</title>
       <author><first>Mario</first><last>Rodríguez-Cantelar</last><affiliation>Universidad Politécnica de Madrid</affiliation></author>
-      <author id="chen-zhang"><first>Chen</first><last>Zhang</last><affiliation>National University of Singapore</affiliation></author>
+      <author><first>Chen</first><last>Zhang</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Chengguang</first><last>Tang</last><affiliation>Tencent</affiliation></author>
       <author><first>Ke</first><last>Shi</last><affiliation>Tencent</affiliation></author>
       <author><first>Sarik</first><last>Ghazarian</last><affiliation>ISI USC</affiliation></author>
       <author><first>João</first><last>Sedoc</last><affiliation>New York University</affiliation></author>
       <author><first>Luis</first><last>Fernando D’Haro</last><affiliation>Speech Technology and Machine Learning Group - Universidad Politécnica de Madrid</affiliation></author>
-      <author><first>Alexander I.</first><last>Rudnicky</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="alexander-rudnicky"><first>Alexander I.</first><last>Rudnicky</last><affiliation>Carnegie Mellon University</affiliation></author>
       <pages>260-273</pages>
       <abstract>The advent and fast development of neural networks have revolutionized the research on dialogue systems and subsequently have triggered various challenges regarding their automatic evaluation. Automatic evaluation of open-domain dialogue systems as an open challenge has been the center of the attention of many researchers. Despite the consistent efforts to improve automatic metrics’ correlations with human evaluation, there have been very few attempts to assess their robustness over multiple domains and dimensions. Also, their focus is mainly on the English language. All of these challenges prompt the development of automatic evaluation metrics that are reliable in various domains, dimensions, and languages. This track in the 11th Dialogue System Technology Challenge (DSTC11) is part of the ongoing effort to promote robust and multilingual automatic evaluation metrics. This article describes the datasets and baselines provided to participants and discusses the submission and result details of the two proposed subtasks.</abstract>
       <url hash="d203a908">2023.dstc-1.28</url>
@@ -376,9 +376,9 @@ for Open-Domain Dialogue Systems at <fixed-case>DSTC</fixed-case> 11 Track 4</ti
       <author><first>Spandana</first><last>Gella</last><affiliation>University of Edinburgh</affiliation></author>
       <author><first>Chao</first><last>Zhao</last><affiliation>UNC Chapel Hill</affiliation></author>
       <author><first>Di</first><last>Jin</last><affiliation>Amazon Alexa AI</affiliation></author>
-      <author><first>Alexandros</first><last>Papangelis</last><affiliation>Amazon Alexa AI</affiliation></author>
+      <author id="alexandros-papangelis"><first>Alexandros</first><last>Papangelis</last><affiliation>Amazon Alexa AI</affiliation></author>
       <author><first>Behnam</first><last>Hedayatnia</last><affiliation>Amazon Alexa AI</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>Amazon, Alexa AI</affiliation></author>
+      <author><first>Yang</first><last>Liu</last><affiliation>Amazon, Alexa AI</affiliation></author>
       <author><first>Dilek</first><last>Z Hakkani-Tur</last><affiliation>Amazon Alexa AI</affiliation></author>
       <pages>274-281</pages>
       <abstract>Conventional Task-oriented Dialogue (TOD) Systems rely on domain-specific APIs/DBs or external factual knowledge to create responses. In DSTC11 track 5, we aims to provide a new challenging task to accommodate subjective user requests (e.g.,”Is the WIFI reliable?” or “Does the restaurant have a good atmosphere?” into TOD. We release a benchmark dataset, which contains subjective knowledge-seeking dialogue contexts and manually annotated responses that are grounded in subjective knowledge sources. The challenge track received a total of 48 entries from 14 participating teams.</abstract>
diff --git a/data/xml/2023.eacl.xml b/data/xml/2023.eacl.xml
index c6cb24c424..523184e74a 100644
--- a/data/xml/2023.eacl.xml
+++ b/data/xml/2023.eacl.xml
@@ -20,7 +20,7 @@
       <title><fixed-case>P</fixed-case>i<fixed-case>C</fixed-case>: A Phrase-in-Context Dataset for Phrase Understanding and Semantic Search</title>
       <author><first>Thang</first><last>Pham</last><affiliation>Auburn University</affiliation></author>
       <author><first>Seunghyun</first><last>Yoon</last><affiliation>Adobe Research</affiliation></author>
-      <author><first>Trung</first><last>Bui</last><affiliation>Adobe Research</affiliation></author>
+      <author id="trung-bui"><first>Trung</first><last>Bui</last><affiliation>Adobe Research</affiliation></author>
       <author><first>Anh</first><last>Nguyen</last><affiliation>Auburn University</affiliation></author>
       <pages>1-26</pages>
       <abstract>While contextualized word embeddings have been a de-facto standard, learning contextualized phrase embeddings is less explored and being hindered by the lack of a human-annotated benchmark that tests machine understanding of phrase semantics given a context sentence or paragraph (instead of phrases alone). To fill this gap, we propose PiC—a dataset of ∼28K of noun phrases accompanied by their contextual Wikipedia pages and a suite of three tasks for training and evaluating phrase embeddings. Training on PiC improves ranking-models’ accuracy and remarkably pushes span selection (SS) models (i.e., predicting the start and end index of the target phrase) near human accuracy, which is 95% Exact Match (EM) on semantic search given a query phrase and a passage. Interestingly, we find evidence that such impressive performance is because the SS models learn to better capture the common meaning of a phrase regardless of its actual context. SotA models perform poorly in distinguishing two senses of the same phrase in two contexts (∼60% EM) and in estimating the similarity between two different phrases in the same context (∼70% EM).</abstract>
@@ -84,7 +84,7 @@
     </paper>
     <paper id="6">
       <title>A Two-Sided Discussion of Preregistration of <fixed-case>NLP</fixed-case> Research</title>
-      <author><first>Anders</first><last>Søgaard</last><affiliation>University of Copenhagen</affiliation></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last><affiliation>University of Copenhagen</affiliation></author>
       <author><first>Daniel</first><last>Hershcovich</last><affiliation>University of Copenhagen</affiliation></author>
       <author><first>Miryam</first><last>de Lhoneux</last><affiliation>KU Leuven</affiliation></author>
       <pages>83-93</pages>
@@ -100,7 +100,7 @@
       <author><first>Julian Martin</first><last>Eisenschlos</last><affiliation>Google</affiliation></author>
       <author><first>Jeremy R.</first><last>Cole</last><affiliation>Google Research</affiliation></author>
       <author><first>Fangyu</first><last>Liu</last><affiliation>University of Cambridge</affiliation></author>
-      <author><first>William W.</first><last>Cohen</last><affiliation>Google AI</affiliation></author>
+      <author id="william-cohen"><first>William W.</first><last>Cohen</last><affiliation>Google AI</affiliation></author>
       <pages>94-102</pages>
       <abstract>We introduce a new in-context learning paradigm to measure Large Language Models’ (LLMs) ability to learn novel words during inference. In particular, we rewrite Winograd-style co-reference resolution problems by replacing the key concept word with a synthetic but plausible word that the model must understand to complete the task. Solving this task requires the model to make use of the dictionary definition of the new word given in the prompt. This benchmark addresses word acquisition, one important aspect of the diachronic degradation known to afflict LLMs. As LLMs are frozen in time at the moment they are trained, they are normally unable to reflect the way language changes over time. We show that the accuracy of LLMs compared to the original Winograd tasks decreases radically in our benchmark, thus identifying a limitation of current models and providing a benchmark to measure future improvements in LLMs ability to do in-context learning.</abstract>
       <url hash="a733db8e">2023.eacl-main.7</url>
@@ -125,7 +125,7 @@
       <author><first>Pranav</first><last>Narayanan Venkit</last><affiliation>Pennsylvania State University</affiliation></author>
       <author><first>Sanjana</first><last>Gautam</last><affiliation>Pennsylvania State University</affiliation></author>
       <author><first>Ruchi</first><last>Panchanadikar</last><affiliation>Pennsylvania State University</affiliation></author>
-      <author><first>Ting-Hao</first><last>Huang</last><affiliation>Pennsylvania State University</affiliation></author>
+      <author id="ting-hao-huang"><first>Ting-Hao</first><last>Huang</last><affiliation>Pennsylvania State University</affiliation></author>
       <author><first>Shomir</first><last>Wilson</last><affiliation>Pennsylvania State University</affiliation></author>
       <pages>116-122</pages>
       <abstract>Little attention is placed on analyzing nationality bias in language models, especially when nationality is highly used as a factor in increasing the performance of social NLP models. This paper examines how a text generation model, GPT-2, accentuates pre-existing societal biases about country-based demonyms. We generate stories using GPT-2 for various nationalities and use sensitivity analysis to explore how the number of internet users and the country’s economic status impacts the sentiment of the stories. To reduce the propagation of biases through large language models (LLM), we explore the debiasing method of adversarial triggering. Our results show that GPT-2 demonstrates significant bias against countries with lower internet users, and adversarial triggering effectively reduces the same.</abstract>
@@ -138,7 +138,7 @@
       <title>Investigating data partitioning strategies for crosslinguistic low-resource <fixed-case>ASR</fixed-case> evaluation</title>
       <author><first>Zoey</first><last>Liu</last><affiliation>Department of Linguistics, University of Florida</affiliation></author>
       <author><first>Justin</first><last>Spence</last><affiliation>University of California, Davis</affiliation></author>
-      <author><first>Emily</first><last>Prud’hommeaux</last><affiliation>Boston College</affiliation></author>
+      <author id="emily-prudhommeaux"><first>Emily</first><last>Prud’hommeaux</last><affiliation>Boston College</affiliation></author>
       <pages>123-131</pages>
       <abstract>Many automatic speech recognition (ASR) data sets include a single pre-defined test set consisting of one or more speakers whose speech never appears in the training set. This “hold-speaker(s)-out” data partitioning strategy, however, may not be ideal for data sets in which the number of speakers is very small. This study investigates ten different data split methods for five languages with minimal ASR training resources. We find that (1) model performance varies greatly depending on which speaker is selected for testing; (2) the average word error rate (WER) across all held-out speakers is comparable not only to the average WER over multiple random splits but also to any given individual random split; (3) WER is also generally comparable when the data is split heuristically or adversarially; (4) utterance duration and intensity are comparatively more predictive factors of variability regardless of the data split. These results suggest that the widely used hold-speakers-out approach to ASR data partitioning can yield results that do not reflect model performance on unseen data or speakers. Random splits can yield more reliable and generalizable estimates when facing data sparsity.</abstract>
       <url hash="b4ef08e8">2023.eacl-main.10</url>
@@ -161,8 +161,8 @@
     <paper id="12">
       <title>Socratic Question Generation: A Novel Dataset, Models, and Evaluation</title>
       <author><first>Beng Heng</first><last>Ang</last><affiliation>Integrative Sciences and Engineering Programme, NUS</affiliation></author>
-      <author><first>Sujatha Das</first><last>Gollapalli</last><affiliation>Institute of Data Science, NUS</affiliation></author>
-      <author><first>See-Kiong</first><last>Ng</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="sujatha-das-gollapalli"><first>Sujatha Das</first><last>Gollapalli</last><affiliation>Institute of Data Science, NUS</affiliation></author>
+      <author id="see-kiong-ng"><first>See-Kiong</first><last>Ng</last><affiliation>National University of Singapore</affiliation></author>
       <pages>147-165</pages>
       <abstract>Socratic questioning is a form of reflective inquiry often employed in education to encourage critical thinking in students, and to elicit awareness of beliefs and perspectives in a subject during therapeutic counseling. Specific types of Socratic questions are employed for enabling reasoning and alternate views against the context of individual personal opinions on a topic. Socratic contexts are different from traditional question generation contexts where “answer-seeking” questions are generated against a given formal passage on a topic, narrative stories or conversations. We present SocratiQ, the first large dataset of 110K (question, context) pairs for enabling studies on Socratic Question Generation (SoQG). We provide an in-depth study on the various types of Socratic questions and present models for generating Socratic questions against a given context through prompt tuning. Our automated and human evaluation results demonstrate that our SoQG models can produce realistic, type-sensitive, human-like Socratic questions enabling potential applications in counseling and coaching.</abstract>
       <url hash="03963913">2023.eacl-main.12</url>
@@ -212,9 +212,9 @@
     </paper>
     <paper id="16">
       <title>Retrieval Enhanced Data Augmentation for Question Answering on Privacy Policies</title>
-      <author><first>Md Rizwan</first><last>Parvez</last><affiliation>University of California Los Angeles</affiliation></author>
+      <author id="md-rizwan-parvez"><first>Md Rizwan</first><last>Parvez</last><affiliation>University of California Los Angeles</affiliation></author>
       <author><first>Jianfeng</first><last>Chi</last><affiliation>Meta AI</affiliation></author>
-      <author><first>Wasi Uddin</first><last>Ahmad</last><affiliation>AWS AI Labs</affiliation></author>
+      <author id="wasi-ahmad"><first>Wasi Uddin</first><last>Ahmad</last><affiliation>AWS AI Labs</affiliation></author>
       <author><first>Yuan</first><last>Tian</last><affiliation>University of California Los Angeles</affiliation></author>
       <author><first>Kai-Wei</first><last>Chang</last><affiliation>Ucla</affiliation></author>
       <pages>201-210</pages>
@@ -296,7 +296,7 @@
       <author><first>Francesco</first><last>Multari</last><affiliation>SIRIS Lab, Research Division of SIRIS Academic</affiliation></author>
       <author><first>Nicolau</first><last>Duran-Silva</last><affiliation>SIRIS Lab, Research Division of SIRIS Academic &amp; Universitat Pompeu Fabra</affiliation></author>
       <author><first>César</first><last>Parra-Rojas</last><affiliation>SIRIS Lab, Research Division of SIRIS Academic</affiliation></author>
-      <author><first>Aitor</first><last>Gonzalez-Agirre</last><affiliation>Barcelona Supercomputing Center (BSC)</affiliation></author>
+      <author id="aitor-gonzalez-agirre"><first>Aitor</first><last>Gonzalez-Agirre</last><affiliation>Barcelona Supercomputing Center (BSC)</affiliation></author>
       <author><first>Francesco Alessandro</first><last>Massucci</last><affiliation>SIRIS Lab, Research Division of SIRIS Academic</affiliation></author>
       <author><first>Marta</first><last>Villegas</last><affiliation>Barcelona Supercomputing Center</affiliation></author>
       <pages>286-296</pages>
@@ -309,8 +309,8 @@
     <paper id="23">
       <title>Fair Enough: Standardizing Evaluation and Model Selection for Fairness Research in <fixed-case>NLP</fixed-case></title>
       <author><first>Xudong</first><last>Han</last><affiliation>The university of Melbourne</affiliation></author>
-      <author><first>Timothy</first><last>Baldwin</last><affiliation>Mbzuai</affiliation></author>
-      <author><first>Trevor</first><last>Cohn</last><affiliation>University of Melbourne</affiliation></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last><affiliation>Mbzuai</affiliation></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last><affiliation>University of Melbourne</affiliation></author>
       <pages>297-312</pages>
       <abstract>Modern NLP systems exhibit a range of biases, which a growing literature on model debiasing attempts to correct. However, current progress is hampered by a plurality of definitions of bias, means of quantification, and oftentimes vague relation between debiasing algorithms and theoretical measures of bias. This paper seeks to clarify the current situation and plot a course for meaningful progress in fair learning, with two key contributions: (1) making clear inter-relations among the current gamut of methods, and their relation to fairness theory; and (2) addressing the practical problem of model selection, which involves a trade-off between fairness and accuracy and has led to systemic issues in fairness research. Putting them together, we make several recommendations to help shape future work.</abstract>
       <url hash="aa046196">2023.eacl-main.23</url>
@@ -324,7 +324,7 @@
       <author><first>Vivek</first><last>Khetan</last><affiliation>Accenture Labs</affiliation></author>
       <author><first>Bogdan</first><last>Sacaleanu</last><affiliation>Accenture</affiliation></author>
       <author><first>Anatole</first><last>Gershman</last><affiliation>Carnegie Mellon University</affiliation></author>
-      <author><first>Eduard</first><last>Hovy</last><affiliation>University of Melbourne</affiliation></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last><affiliation>University of Melbourne</affiliation></author>
       <pages>313-327</pages>
       <abstract>We motivate and introduce CHARD: Clinical Health-Aware Reasoning across Dimensions, to investigate the capability of text generation models to act as implicit clinical knowledge bases and generate free-flow textual explanations about various health-related conditions across several dimensions. We collect and present an associated dataset, CHARDat, consisting of explanations about 52 health conditions across three clinical dimensions. We conduct extensive experiments using BART and T5 along with data augmentation, and perform automatic, human, and qualitative analyses. We show that while our models can perform decently, CHARD is very challenging with strong potential for further exploration.</abstract>
       <url hash="626fec2c">2023.eacl-main.24</url>
@@ -338,7 +338,7 @@
       <author><first>Ruqing</first><last>Zhang</last><affiliation>CAS Key Lab of Network Data Science and Technology, Institute of Computing Technology, Chinese Academy of Sciences</affiliation></author>
       <author><first>Yixing</first><last>Fan</last><affiliation>Institute of Computing Technology, CAS.</affiliation></author>
       <author><first>Jiafeng</first><last>Guo</last><affiliation>Institute of Computing Technology, CAS</affiliation></author>
-      <author><first>Xueqi</first><last>Cheng</last><affiliation>Institute of Computing Technology, CAS</affiliation></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last><affiliation>Institute of Computing Technology, CAS</affiliation></author>
       <pages>328-339</pages>
       <abstract>Recently, prompt tuning has achieved promising results in a variety of natural language processing (NLP) tasks. The typical approach is to insert text pieces (i.e. templates) into the input and transform downstream tasks into the same form as pre-training. In essence, a high-quality template is the foundation of prompt tuning to support the performance of the converted cloze-style task. However, for sarcasm recognition, it is time-consuming and requires increasingly sophisticated domain knowledge to determine the appropriate templates and label words due to its highly figurative nature. In this work, we propose SarcPrompt, to incorporate the prior knowledge about contradictory intentions into prompt tuning for sarcasm recognition. SarcPrompt is inspired by that the speaker usually says the opposite of what they actually mean in the sarcastic text. Based on this idea, we explicitly mimic the actual intention by prompt construction and indicate whether the actual intention is contradictory to the literal content by verbalizer engineering. Experiments on three public datasets with standard and low-resource settings demonstrate the effectiveness of our SarcPrompt for sarcasm recognition.</abstract>
       <url hash="867781af">2023.eacl-main.25</url>
@@ -391,7 +391,7 @@
       <title><fixed-case>D</fixed-case>i<fixed-case>TTO</fixed-case>: A Feature Representation Imitation Approach for Improving Cross-Lingual Transfer</title>
       <author><first>Shanu</first><last>Kumar</last><affiliation>Microsoft</affiliation></author>
       <author><first>Soujanya</first><last>Abbaraju</last><affiliation>Microsoft</affiliation></author>
-      <author><first>Sandipan</first><last>Dandapat</last><affiliation>Microsoft India</affiliation></author>
+      <author id="sandipan-dandapat"><first>Sandipan</first><last>Dandapat</last><affiliation>Microsoft India</affiliation></author>
       <author><first>Sunayana</first><last>Sitaram</last><affiliation>Microsoft Research India</affiliation></author>
       <author><first>Monojit</first><last>Choudhury</last><affiliation>Microsoft</affiliation></author>
       <pages>385-406</pages>
@@ -452,7 +452,7 @@
       <author><first>Minghao</first><last>Wu</last><affiliation>Monash University</affiliation></author>
       <author><first>George</first><last>Foster</last><affiliation>Google</affiliation></author>
       <author><first>Lizhen</first><last>Qu</last><affiliation>Monash University</affiliation></author>
-      <author><first>Gholamreza</first><last>Haffari</last><affiliation>Monash University</affiliation></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last><affiliation>Monash University</affiliation></author>
       <pages>448-462</pages>
       <abstract>Existing work in document-level neural machine translation commonly concatenates several consecutive sentences as a pseudo-document, and then learns inter-sentential dependencies. This strategy limits the model’s ability to leverage information from distant context. We overcome this limitation with a novel Document Flattening (DocFlat) technique that integrates Flat-Batch Attention (FBA) and Neural Context Gate (NCG) into Transformer model to utilizes information beyond the pseudo-document boundaries. FBA allows the model to attend to all the positions in the batch and model the relationships between positions explicitly and NCG identifies the useful information from the distant context. We conduct comprehensive experiments and analyses on three benchmark datasets for English-German translation, and validate the effectiveness of two variants of DocFlat. Empirical results show that our approach outperforms strong baselines with statistical significance on BLEU, COMET and accuracy on the contrastive test set. The analyses highlight that DocFlat is highly effective in capturing the long-range information.</abstract>
       <url hash="67e287f1">2023.eacl-main.33</url>
@@ -478,7 +478,7 @@
       <author><first>Soyeong</first><last>Jeong</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
       <author><first>Jinheon</first><last>Baek</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
       <author><first>Sung Ju</first><last>Hwang</last><affiliation>Kaist</affiliation></author>
-      <author><first>Jong</first><last>Park</last><affiliation>Kaist</affiliation></author>
+      <author id="jong-c-park"><first>Jong</first><last>Park</last><affiliation>Kaist</affiliation></author>
       <pages>477-490</pages>
       <abstract>Conversational Question Answering (ConvQA) models aim at answering a question with its relevant paragraph and previous question-answer pairs that occurred during conversation multiple times. To apply such models to a real-world scenario, some existing work uses predicted answers, instead of unavailable ground-truth answers, as the conversation history for inference. However, since these models usually predict wrong answers, using all the predictions without filtering significantly hampers the model performance. To address this problem, we propose to filter out inaccurate answers in the conversation history based on their estimated confidences and uncertainties from the ConvQA model, without making any architectural changes. Moreover, to make the confidence and uncertainty values more reliable, we propose to further calibrate them, thereby smoothing the model predictions. We validate our models, Answer Selection-based realistic Conversation Question Answering, on two standard ConvQA datasets, and the results show that our models significantly outperform relevant baselines. Code is available at: <url>https://github.com/starsuzi/AS-ConvQA</url>.</abstract>
       <url hash="f27d42b5">2023.eacl-main.35</url>
@@ -492,7 +492,7 @@
       <author><first>Steven Y.</first><last>Feng</last><affiliation>Stanford University</affiliation></author>
       <author><first>Varun</first><last>Gangal</last><affiliation>Asapp</affiliation></author>
       <author><first>Malihe</first><last>Alikhani</last><affiliation>University of Pittsburgh</affiliation></author>
-      <author><first>Eduard</first><last>Hovy</last><affiliation>University of Melbourne</affiliation></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last><affiliation>University of Melbourne</affiliation></author>
       <pages>491-504</pages>
       <abstract>Tongue twisters are meaningful sentences that are difficult to pronounce. The process of automatically generating tongue twisters is challenging since the generated utterance must satisfy two conditions at once: phonetic difficulty and semantic meaning. Furthermore, phonetic difficulty is itself hard to characterize and is expressed in natural tongue twisters through a heterogeneous mix of phenomena such as alliteration and homophony. In this paper, we propose PANCETTA: Phoneme Aware Neural Completion to Elicit Tongue Twisters Automatically. We leverage phoneme representations to capture the notion of phonetic difficulty, and we train language models to generate original tongue twisters on two proposed task settings. To do this, we curate a dataset called TT-Corp, consisting of existing English tongue twisters. Through automatic and human evaluation, as well as qualitative analysis, we show that PANCETTA generates novel, phonetically difficult, fluent, and semantically meaningful tongue twisters.</abstract>
       <url hash="bb9ea908">2023.eacl-main.36</url>
@@ -518,7 +518,7 @@
       <title>A Survey of Methods for Addressing Class Imbalance in Deep-Learning Based Natural Language Processing</title>
       <author><first>Sophie</first><last>Henning</last><affiliation>Bosch Center for Artificial Intelligence; Ludwig-Maximilians-Universität München</affiliation></author>
       <author><first>William</first><last>Beluch</last><affiliation>Bosch Center for Artificial Intelligence</affiliation></author>
-      <author><first>Alexander</first><last>Fraser</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <author><first>Annemarie</first><last>Friedrich</last><affiliation>Bosch Center for Artificial Intelligence</affiliation></author>
       <pages>523-540</pages>
       <abstract>Many natural language processing (NLP) tasks are naturally imbalanced, as some target categories occur much more frequently than others in the real world. In such scenarios, current NLP models tend to perform poorly on less frequent classes. Addressing class imbalance in NLP is an active research topic, yet, finding a good approach for a particular task and imbalance scenario is difficult. In this survey, the first overview on class imbalance in deep-learning based NLP, we first discuss various types of controlled and real-world class imbalance. Our survey then covers approaches that have been explicitly proposed for class-imbalanced NLP tasks or, originating in the computer vision community, have been evaluated on them. We organize the methods by whether they are based on sampling, data augmentation, choice of loss function, staged learning, or model design. Finally, we discuss open problems and how to move forward.</abstract>
@@ -548,7 +548,7 @@
       <author><first>Zhenting</first><last>Qi</last><affiliation>Zhejiang University</affiliation></author>
       <author><first>Linyong</first><last>Nan</last><affiliation>Yale University</affiliation></author>
       <author><first>Lorenzo Jaime</first><last>Flores</last><affiliation>Yale University</affiliation></author>
-      <author><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
       <pages>554-561</pages>
       <abstract>Logical Table-to-Text (LT2T) generation is tasked with generating logically faithful sentences from tables. There currently exists two challenges in the field: 1) Faithfulness: how to generate sentences that are factually correct given the table content; 2) Diversity: how to generate multiple sentences that offer different perspectives on the table. This work proposes LoFT, which utilizes logic forms as fact verifiers and content planners to control LT2T generation. Experimental results on the LogicNLG dataset demonstrate that LoFT is the first model that addresses unfaithfulness and lack of diversity issues simultaneously. Our code is publicly available at <url>https://github.com/Yale-LILY/LoFT</url>.</abstract>
       <url hash="2e845ac9">2023.eacl-main.40</url>
@@ -691,8 +691,8 @@
       <author><first>Sebastien</first><last>Montella</last><affiliation>Orange Labs</affiliation></author>
       <author><first>Alexis</first><last>Nasr</last><affiliation>Aix Marseille University</affiliation></author>
       <author><first>Johannes</first><last>Heinecke</last><affiliation>Orange Labs</affiliation></author>
-      <author><first>Frederic</first><last>Bechet</last><affiliation>Aix Marseille Universite - LIS/CNRS</affiliation></author>
-      <author><first>Lina M.</first><last>Rojas Barahona</last><affiliation>Orange Innovation Research</affiliation></author>
+      <author id="frederic-bechet"><first>Frederic</first><last>Bechet</last><affiliation>Aix Marseille Universite - LIS/CNRS</affiliation></author>
+      <author id="lina-m-rojas-barahona"><first>Lina M.</first><last>Rojas Barahona</last><affiliation>Orange Innovation Research</affiliation></author>
       <pages>727-736</pages>
       <abstract>Text generation from Abstract Meaning Representation (AMR) has substantially benefited from the popularized Pretrained Language Models (PLMs). Myriad approaches have linearized the input graph as a sequence of tokens to fit the PLM tokenization requirements. Nevertheless, this transformation jeopardizes the structural integrity of the graph and is therefore detrimental to its resulting representation. To overcome this issue, Ribeiro et al. (2021b) have recently proposed StructAdapt, a structure-aware adapter which injects the input graph connectivity within PLMs using Graph Neural Networks (GNNs). In this paper, we investigate the influence of Relative Position Embeddings (RPE) on AMR-to-Text, and, in parallel, we examine the robustness of StructAdapt. Through ablation studies, graph attack and link prediction, we reveal that RPE might be partially encoding input graphs. We suggest further research regarding the role of RPE will provide valuable insights for Graph-to-Text generation.</abstract>
       <url hash="3a7bb331">2023.eacl-main.51</url>
@@ -706,7 +706,7 @@
       <author><first>Andreas</first><last>Opedal</last><affiliation>ETH Zurich</affiliation></author>
       <author><first>Tiago</first><last>Pimentel</last><affiliation>University of Cambridge</affiliation></author>
       <author><first>Tim</first><last>Vieira</last><affiliation>Johns Hopkins University</affiliation></author>
-      <author><first>Jason</first><last>Eisner</last><affiliation>Johns Hopkins University + Microsoft Corporation</affiliation></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last><affiliation>Johns Hopkins University + Microsoft Corporation</affiliation></author>
       <author><first>Ryan</first><last>Cotterell</last><affiliation>ETH Zürich</affiliation></author>
       <pages>737-749</pages>
       <abstract>The Bar-Hillel construction is a classic result in formal language theory. It shows, by a simple construction, that the intersection of a context-free language and a regular language is itself context-free. In the construction, the regular language is specified by a finite-state automaton. However, neither the original construction (Bar-Hillel et al., 1961) nor its weighted extension (Nederhof and Satta, 2003) can handle finite-state automata with ε-arcs. While it is possible to remove ε-arcs from a finite-state automaton efficiently without modifying the language, such an operation modifies the automaton’s set of paths. We give a construction that generalizes the Bar- Hillel in the case the desired automaton has ε-arcs, and further prove that our generalized construction leads to a grammar that encodes the structure of both the input automaton and grammar while retaining the asymptotic size of the original construction.</abstract>
@@ -737,8 +737,8 @@
       <author><first>Maris</first><last>Camilleri</last><affiliation>University of Essex</affiliation></author>
       <author><first>Paloma</first><last>Garcia</last><affiliation>University of Essex</affiliation></author>
       <author><first>Jon</first><last>Chamberlain</last><affiliation>University of Essex</affiliation></author>
-      <author><first>Udo</first><last>Kruschwitz</last><affiliation>University of Regensburg</affiliation></author>
-      <author><first>Massimo</first><last>Poesio</last><affiliation>Queen Mary University of London</affiliation></author>
+      <author id="udo-kruschwitz"><first>Udo</first><last>Kruschwitz</last><affiliation>University of Regensburg</affiliation></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last><affiliation>Queen Mary University of London</affiliation></author>
       <pages>767-781</pages>
       <abstract>Although several datasets annotated for anaphoric reference / coreference exist, even the largest such datasets have limitations in term of size, range of domains, coverage of anaphoric phenomena, and size of documents included. Yet, the approaches proposed to scale up anaphoric annotation haven’t so far resulted in datasets overcoming these limitations. In this paper, we introduce a new release of a corpus for anaphoric reference labelled via a game-with-a-purpose. This new release is comparable in size to the largest existing corpora for anaphoric reference due in part to substantial activity by the players, in part thanks to the use of a new resolve-and-aggregate paradigm to ‘complete’ markable annotations through the combination of an anaphoric resolver and an aggregation method for anaphoric reference. The proposed method could be adopted to greatly speed up annotation time in other projects involving games-with-a-purpose. In addition, the corpus covers genres for which no comparable size datasets exist (Fiction and Wikipedia); it covers singletons and non-referring expressions; and it includes a substantial number of long documents ( 2K in length).</abstract>
       <url hash="494ca2ea">2023.eacl-main.54</url>
@@ -750,7 +750,7 @@
       <title>What Makes Sentences Semantically Related? A Textual Relatedness Dataset and Empirical Study</title>
       <author><first>Mohamed</first><last>Abdalla</last><affiliation>University of Toronto</affiliation></author>
       <author><first>Krishnapriya</first><last>Vishnubhotla</last><affiliation>University of Toronto</affiliation></author>
-      <author><first>Saif</first><last>Mohammad</last><affiliation>National Research Council Canada</affiliation></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last><affiliation>National Research Council Canada</affiliation></author>
       <pages>782-796</pages>
       <abstract>The degree of semantic relatedness of two units of language has long been considered fundamental to understanding meaning. Additionally, automatically determining relatedness has many applications such as question answering and summarization. However, prior NLP work has largely focused on semantic similarity, a subset of relatedness, because of a lack of relatedness datasets. In this paper, we introduce a dataset for Semantic Textual Relatedness, STR-2022, that has 5,500 English sentence pairs manually annotated using a comparative annotation framework, resulting in fine-grained scores. We show that human intuition regarding relatedness of sentence pairs is highly reliable, with a repeat annotation correlation of 0.84. We use the dataset to explore questions on what makes sentences semantically related. We also show the utility of STR-2022 for evaluating automatic methods of sentence representation and for various downstream NLP tasks. Our dataset, data statement, and annotation questionnaire can be found at: <url>https://doi.org/10.5281/zenodo.7599667</url>.</abstract>
       <url hash="5fb47b4c">2023.eacl-main.55</url>
@@ -773,7 +773,7 @@
     <paper id="57">
       <title><fixed-case>N</fixed-case>usa<fixed-case>X</fixed-case>: Multilingual Parallel Sentiment Dataset for 10 <fixed-case>I</fixed-case>ndonesian Local Languages</title>
       <award>EACL Outstanding Paper</award>
-      <author><first>Genta Indra</first><last>Winata</last><affiliation>Bloomberg</affiliation></author>
+      <author id="genta-indra-winata"><first>Genta Indra</first><last>Winata</last><affiliation>Bloomberg</affiliation></author>
       <author><first>Alham Fikri</first><last>Aji</last><affiliation>Mbzuai</affiliation></author>
       <author><first>Samuel</first><last>Cahyawijaya</last><affiliation>Hkust</affiliation></author>
       <author><first>Rahmad</first><last>Mahendra</last><affiliation>Universitas Indonesia</affiliation></author>
@@ -783,7 +783,7 @@
       <author><first>David</first><last>Moeljadi</last><affiliation>Kanda University of International Studies</affiliation></author>
       <author><first>Radityo Eko</first><last>Prasojo</last><affiliation>Pitik.id</affiliation></author>
       <author><first>Pascale</first><last>Fung</last><affiliation>Hong Kong University of Science and Technology</affiliation></author>
-      <author><first>Timothy</first><last>Baldwin</last><affiliation>Mbzuai</affiliation></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last><affiliation>Mbzuai</affiliation></author>
       <author><first>Jey Han</first><last>Lau</last><affiliation>The University of Melbourne</affiliation></author>
       <author><first>Rico</first><last>Sennrich</last><affiliation>University of Zurich</affiliation></author>
       <author><first>Sebastian</first><last>Ruder</last><affiliation>Google</affiliation></author>
@@ -836,7 +836,7 @@
     <paper id="61">
       <title>Probing Power by Prompting: Harnessing Pre-trained Language Models for Power Connotation Framing</title>
       <author><first>Shima</first><last>Khanehzar</last><affiliation>University of Melbourne</affiliation></author>
-      <author><first>Trevor</first><last>Cohn</last><affiliation>University of Melbourne</affiliation></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last><affiliation>University of Melbourne</affiliation></author>
       <author><first>Gosia</first><last>Mikolajczak</last><affiliation>Australian National University</affiliation></author>
       <author><first>Lea</first><last>Frermann</last><affiliation>Melbourne University</affiliation></author>
       <pages>873-885</pages>
@@ -891,7 +891,7 @@
       <author><first>Aru</first><last>Maekawa</last><affiliation>Tokyo Institute of Technology</affiliation></author>
       <author><first>Hidetaka</first><last>Kamigaito</last><affiliation>Nara Institute of Science and Technology</affiliation></author>
       <author><first>Kotaro</first><last>Funakoshi</last><affiliation>Tokyo Institute of Technology</affiliation></author>
-      <author><first>Manabu</first><last>Okumura</last><affiliation>Tokyo Institute of Technology</affiliation></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last><affiliation>Tokyo Institute of Technology</affiliation></author>
       <pages>930-942</pages>
       <abstract>Continual learning aims to accumulate knowledge to solve new tasks without catastrophic forgetting for previously learned tasks. Research on continual learning has led to the development of generative replay, which prevents catastrophic forgetting by generating pseudo-samples for previous tasks and learning them together with new tasks. Inspired by the biological brain, we propose the hippocampal memory indexing to enhance the generative replay by controlling sample generation using compressed features of previous training samples. It enables the generation of a specific training sample from previous tasks, thus improving the balance and quality of generated replay samples. Experimental results indicate that our method effectively controls the sample generation and consistently outperforms the performance of current generative replay methods.</abstract>
       <url hash="38b97f99">2023.eacl-main.65</url>
@@ -901,7 +901,7 @@
     </paper>
     <paper id="66">
       <title>A Survey of Multi-task Learning in Natural Language Processing: Regarding Task Relatedness and Training Methods</title>
-      <author id="zhihan-zhang"><first>Zhihan</first><last>Zhang</last><affiliation>University of Notre Dame</affiliation></author>
+      <author><first>Zhihan</first><last>Zhang</last><affiliation>University of Notre Dame</affiliation></author>
       <author><first>Wenhao</first><last>Yu</last><affiliation>University of Notre Dame</affiliation></author>
       <author><first>Mengxia</first><last>Yu</last><affiliation>University of Notre Dame</affiliation></author>
       <author><first>Zhichun</first><last>Guo</last><affiliation>University of Notre Dame</affiliation></author>
@@ -927,7 +927,7 @@
     <paper id="68">
       <title>Question-Answer Sentence Graph for Joint Modeling Answer Selection</title>
       <author><first>Roshni</first><last>Iyer</last><affiliation>University of California, Los Angeles</affiliation></author>
-      <author><first>Thuy</first><last>Vu</last><affiliation>Amazon</affiliation></author>
+      <author id="thuy-vu"><first>Thuy</first><last>Vu</last><affiliation>Amazon</affiliation></author>
       <author><first>Alessandro</first><last>Moschitti</last><affiliation>Amazon</affiliation></author>
       <author><first>Yizhou</first><last>Sun</last><affiliation>Ucla</affiliation></author>
       <pages>968-979</pages>
@@ -1022,7 +1022,7 @@
       <title>Looking for a Needle in a Haystack: A Comprehensive Study of Hallucinations in Neural Machine Translation</title>
       <author><first>Nuno M.</first><last>Guerreiro</last><affiliation>Instituto de Telecomunicacoes, University of Lisbon</affiliation></author>
       <author><first>Elena</first><last>Voita</last><affiliation>Meta AI</affiliation></author>
-      <author><first>André</first><last>Martins</last><affiliation>Unbabel, Instituto de Telecomunicacoes</affiliation></author>
+      <author id="andre-f-t-martins"><first>André</first><last>Martins</last><affiliation>Unbabel, Instituto de Telecomunicacoes</affiliation></author>
       <pages>1059-1075</pages>
       <abstract>Although the problem of hallucinations in neural machine translation (NMT) has received some attention, research on this highly pathological phenomenon lacks solid ground. Previous work has been limited in several ways: it often resorts to artificial settings where the problem is amplified, it disregards some (common) types of hallucinations, and it does not validate adequacy of detection heuristics. In this paper, we set foundations for the study of NMT hallucinations. First, we work in a natural setting, i.e., in-domain data without artificial noise neither in training nor in inference. Next, we annotate a dataset of over 3.4k sentences indicating different kinds of critical errors and hallucinations. Then, we turn to detection methods and both revisit methods used previously and propose using glass-box uncertainty-based detectors. Overall, we show that for preventive settings, (i) previously used methods are largely inadequate, (ii) sequence log-probability works best and performs on par with reference-based methods. Finally, we propose DeHallucinator, a simple method for alleviating hallucinations at test time that significantly reduces the hallucinatory rate.</abstract>
       <url hash="fe0d15a5">2023.eacl-main.75</url>
@@ -1048,7 +1048,7 @@
       <author><first>Yujin</first><last>Huang</last><affiliation>Monash University</affiliation></author>
       <author><first>Fatemeh</first><last>Shiri</last><affiliation>Faculty of Information Technology, Monash University</affiliation></author>
       <author><first>Weiqing</first><last>Wang</last><affiliation>Monash University</affiliation></author>
-      <author><first>Gholamreza</first><last>Haffari</last><affiliation>Monash University</affiliation></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last><affiliation>Monash University</affiliation></author>
       <author><first>Yuan-Fang</first><last>Li</last><affiliation>Monash University</affiliation></author>
       <pages>1090-1102</pages>
       <abstract>Semantic parsing is a technique aimed at constructing a structured representation of the meaning of a natural-language question. Recent advances in language models trained on code have shown superior performance in generating these representations compared to language models trained solely on natural language text. The existing fine-tuned neural semantic parsers are vulnerable to adversarial attacks on natural-language inputs. While it has been established that the robustness of smaller semantic parsers can be enhanced through adversarial training, this approach is not feasible for large language models in real-world scenarios, as it requires both substantial computational resources and expensive human annotation on in-domain semantic parsing data. This paper presents the first empirical study on the adversarial robustness of a prompt-based semantic parser based on CODEX, a stateof-the-art (SOTA) language model trained on code. Our results demonstrate that the large language model of code is vulnerable to carefully crafted adversarial examples. To overcome this challenge, we propose methods for enhancing robustness without requiring substantial amounts of labelled data or intensive computational resources.</abstract>
@@ -1212,7 +1212,7 @@
     <paper id="89">
       <title>Logic Against Bias: Textual Entailment Mitigates Stereotypical Sentence Reasoning</title>
       <author><first>Hongyin</first><last>Luo</last><affiliation>Mit</affiliation></author>
-      <author><first>James</first><last>Glass</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
+      <author id="james-glass"><first>James</first><last>Glass</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <pages>1243-1254</pages>
       <abstract>Due to their similarity-based learning objectives, pretrained sentence encoders often internalize stereotypical assumptions that reflect the social biases that exist within their training corpora. In this paper, we describe several kinds of stereotypes concerning different communities that are present in popular sentence representation models, including pretrained next sentence prediction and contrastive sentence representation models. We compare such models to textual entailment models that learn language logic for a variety of downstream language understanding tasks. By comparing strong pretrained models based on text similarity with textual entailment learning, we conclude that the explicit logic learning with textual entailment can significantly reduce bias and improve the recognition of social communities, without an explicit de-biasing process.</abstract>
       <url hash="d2b3003f">2023.eacl-main.89</url>
@@ -1273,7 +1273,7 @@
       <title>Exploring Paracrawl for Document-level Neural Machine Translation</title>
       <author><first>Yusser</first><last>Al Ghussin</last><affiliation>DFKI, Saarland University</affiliation></author>
       <author><first>Jingyi</first><last>Zhang</last><affiliation>Hpi</affiliation></author>
-      <author><first>Josef</first><last>van Genabith</last><affiliation>Dfki</affiliation></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last><affiliation>Dfki</affiliation></author>
       <pages>1304-1310</pages>
       <abstract>Document-level neural machine translation (NMT) has outperformed sentence-level NMT on a number of datasets. However, document-level NMT is still not widely adopted in realworld translation systems mainly due to the lack of large-scale general-domain training data for document-level NMT. We examine the effectiveness of using Paracrawl for learning document-level translation. Paracrawl is a large-scale parallel corpus crawled from the Internet and contains data from various domains. The official Paracrawl corpus was released as parallel sentences (extracted from parallel webpages) and therefore previous works only used Paracrawl for learning sentence-level translation. In this work, we extract parallel paragraphs from Paracrawl parallel webpages using automatic sentence alignments and we use the extracted parallel paragraphs as parallel documents for training document-level translation models. We show that document-level NMT models trained with only parallel paragraphs from Paracrawl can be used to translate real documents from TED, News and Europarl, outperforming sentence-level NMT models. We also perform a targeted pronoun evaluation and show that document-level models trained with Paracrawl data can help context-aware pronoun translation.</abstract>
       <url hash="448258fa">2023.eacl-main.94</url>
@@ -1301,7 +1301,7 @@
     <paper id="96">
       <title>Integrating Translation Memories into Non-Autoregressive Machine Translation</title>
       <author><first>Jitao</first><last>Xu</last><affiliation>LISN, CNRS, Paris-Saclay University</affiliation></author>
-      <author><first>Josep</first><last>Crego</last><affiliation>Systran</affiliation></author>
+      <author id="josep-m-crego"><first>Josep</first><last>Crego</last><affiliation>Systran</affiliation></author>
       <author><first>François</first><last>Yvon</last><affiliation>ISIR CNRS &amp; Sorbonne Université</affiliation></author>
       <pages>1326-1338</pages>
       <abstract>Non-autoregressive machine translation (NAT) has recently made great progress. However, most works to date have focused on standard translation tasks, even though some edit-based NAT models, such as the Levenshtein Transformer (LevT), seem well suited to translate with a Translation Memory (TM). This is the scenario considered here. We first analyze the vanilla LevT model and explain why it does not do well in this setting. We then propose a new variant, TM-LevT, and show how to effectively train this model. By modifying the data presentation and introducing an extra deletion operation, we obtain performance that are on par with an autoregressive approach, while reducing the decoding load. We also show that incorporating TMs during training dispenses to use knowledge distillation, a well-known trick used to mitigate the multimodality issue.</abstract>
@@ -1340,9 +1340,9 @@
     <paper id="99">
       <title><fixed-case>BLM</fixed-case>-<fixed-case>A</fixed-case>gr<fixed-case>F</fixed-case>: A New <fixed-case>F</fixed-case>rench Benchmark to Investigate Generalization of Agreement in Neural Networks</title>
       <author><first>Aixiu</first><last>An</last><affiliation>Université de Paris</affiliation></author>
-      <author id="chunyang-jiang"><first>Chunyang</first><last>Jiang</last><affiliation>University of Geneva</affiliation></author>
+      <author><first>Chunyang</first><last>Jiang</last><affiliation>University of Geneva</affiliation></author>
       <author><first>Maria</first><last>A. Rodriguez</last><affiliation>University of Geneva</affiliation></author>
-      <author><first>Vivi</first><last>Nastase</last><affiliation>University of Geneva</affiliation></author>
+      <author id="vivi-nastase"><first>Vivi</first><last>Nastase</last><affiliation>University of Geneva</affiliation></author>
       <author><first>Paola</first><last>Merlo</last><affiliation>University of Geneva</affiliation></author>
       <pages>1363-1374</pages>
       <abstract>Successful machine learning systems currently rely on massive amounts of data, which are very effective in hiding some of the shallowness of the learned models. To help train models with more complex and compositional skills, we need challenging data, on which a system is successful only if it detects structure and regularities, that will allow it to generalize. In this paper, we describe a French dataset (BLM-AgrF) for learning the underlying rules of subject-verb agreement in sentences, developed in the BLM framework, a new task inspired by visual IQ tests known as Raven’s Progressive Matrices. In this task, an instance consists of sequences of sentences with specific attributes. To predict the correct answer as the next element of the sequence, a model must correctly detect the generative model used to produce the dataset. We provide details and share a dataset built following this methodology. Two exploratory baselines based on commonly used architectures show that despite the simplicity of the phenomenon, it is a complex problem for deep learning systems.</abstract>
@@ -1410,7 +1410,7 @@
       <title>Made of Steel? Learning Plausible Materials for Components in the Vehicle Repair Domain</title>
       <author><first>Annerose</first><last>Eichel</last><affiliation>University of Stuttgart</affiliation></author>
       <author><first>Helena</first><last>Schlipf</last><affiliation>University of Stuttgart</affiliation></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last><affiliation>University of Stuttgart</affiliation></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last><affiliation>University of Stuttgart</affiliation></author>
       <pages>1420-1435</pages>
       <abstract>We propose a novel approach to learn domain-specific plausible materials for components in the vehicle repair domain by probing Pretrained Language Models (PLMs) in a cloze task style setting to overcome the lack of annotated datasets. We devise a new method to aggregate salient predictions from a set of cloze query templates and show that domain-adaptation using either a small, high-quality or a customized Wikipedia corpus boosts performance. When exploring resource-lean alternatives, we find a distilled PLM clearly outperforming a classic pattern-based algorithm. Further, given that 98% of our domain-specific components are multiword expressions, we successfully exploit the compositionality assumption as a way to address data sparsity.</abstract>
       <url hash="8b6e3403">2023.eacl-main.104</url>
@@ -1448,14 +1448,14 @@
     <paper id="107">
       <title>Selective In-Context Data Augmentation for Intent Detection using Pointwise <fixed-case>V</fixed-case>-Information</title>
       <author><first>Yen-Ting</first><last>Lin</last><affiliation>National Taiwan University</affiliation></author>
-      <author><first>Alexandros</first><last>Papangelis</last><affiliation>Amazon Alexa AI</affiliation></author>
+      <author id="alexandros-papangelis"><first>Alexandros</first><last>Papangelis</last><affiliation>Amazon Alexa AI</affiliation></author>
       <author><first>Seokhwan</first><last>Kim</last><affiliation>Amazon Alexa AI</affiliation></author>
-      <author><first>Sungjin</first><last>Lee</last><affiliation>Amazon Alexa AI</affiliation></author>
+      <author id="sungjin-lee"><first>Sungjin</first><last>Lee</last><affiliation>Amazon Alexa AI</affiliation></author>
       <author><first>Devamanyu</first><last>Hazarika</last><affiliation>Amazon</affiliation></author>
       <author><first>Mahdi</first><last>Namazifar</last><affiliation>Amazon Alexa AI</affiliation></author>
       <author><first>Di</first><last>Jin</last><affiliation>Amazon</affiliation></author>
       <author id="yang-liu-icsi"><first>Yang</first><last>Liu</last><affiliation>Amazon</affiliation></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last><affiliation>Amazon Alexa AI</affiliation></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last><affiliation>Amazon Alexa AI</affiliation></author>
       <pages>1463-1476</pages>
       <abstract>This work focuses on in-context data augmentation for intent detection. Having found that augmentation via in-context prompting of large pre-trained language models (PLMs) alone does not improve performance, we introduce a novel approach based on PLMs and pointwise V-information (PVI), a metric that can measure the usefulness of a datapoint for training a model. Our method first fine-tunes a PLM on a small seed of training data and then synthesizes new datapoints - utterances that correspond to given intents. It then employs intent-aware filtering, based on PVI, to remove datapoints that are not helpful to the downstream intent classifier. Our method is thus able to leverage the expressive power of large language models to produce diverse training data. Empirical results demonstrate that our method can produce synthetic training data that achieve state-of-the-art performance on three challenging intent detection datasets under few-shot settings (1.28% absolute improvement in 5-shot and 1.18% absolute in 10-shot, on average) and perform on par with the state-of-the-art in full-shot settings (within 0.01% absolute, on average).</abstract>
       <url hash="2a115fa3">2023.eacl-main.107</url>
@@ -1489,7 +1489,7 @@
     <paper id="110">
       <title>A Systematic Search for Compound Semantics in Pretrained <fixed-case>BERT</fixed-case> Architectures</title>
       <author><first>Filip</first><last>Miletic</last><affiliation>University of Stuttgart</affiliation></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last><affiliation>University of Stuttgart</affiliation></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last><affiliation>University of Stuttgart</affiliation></author>
       <pages>1499-1512</pages>
       <abstract>To date, transformer-based models such as BERT have been less successful in predicting compositionality of noun compounds than static word embeddings. This is likely related to a suboptimal use of the encoded information, reflecting an incomplete grasp of how the models represent the meanings of complex linguistic structures. This paper investigates variants of semantic knowledge derived from pretrained BERT when predicting the degrees of compositionality for 280 English noun compounds associated with human compositionality ratings. Our performance strongly improves on earlier unsupervised implementations of pretrained BERT and highlights beneficial decisions in data preprocessing, embedding computation, and compositionality estimation. The distinct linguistic roles of heads and modifiers are reflected by differences in BERT-derived representations, with empirical properties such as frequency, productivity, and ambiguity affecting model performance. The most relevant representational information is concentrated in the initial layers of the model architecture.</abstract>
       <url hash="5bc613c2">2023.eacl-main.110</url>
@@ -1512,7 +1512,7 @@
     </paper>
     <paper id="112">
       <title>Summarize and Generate to Back-translate: Unsupervised Translation of Programming Languages</title>
-      <author><first>Wasi Uddin</first><last>Ahmad</last><affiliation>AWS AI Labs</affiliation></author>
+      <author id="wasi-ahmad"><first>Wasi Uddin</first><last>Ahmad</last><affiliation>AWS AI Labs</affiliation></author>
       <author><first>Saikat</first><last>Chakraborty</last><affiliation>Microsoft Research</affiliation></author>
       <author><first>Baishakhi</first><last>Ray</last><affiliation>Columbia University</affiliation></author>
       <author><first>Kai-Wei</first><last>Chang</last><affiliation>Ucla</affiliation></author>
@@ -1554,7 +1554,7 @@
       <title>Towards More Efficient Insertion Transformer with Fractional Positional Encoding</title>
       <author><first>Zhisong</first><last>Zhang</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Yizhe</first><last>Zhang</last><affiliation>Apple</affiliation></author>
-      <author><first>Bill</first><last>Dolan</last><affiliation>Microsoft Research</affiliation></author>
+      <author id="william-b-dolan"><first>Bill</first><last>Dolan</last><affiliation>Microsoft Research</affiliation></author>
       <pages>1564-1572</pages>
       <abstract>Auto-regressive neural sequence models have been shown to be effective across text generation tasks. However, their left-to-right decoding order prevents generation from being parallelized. Insertion Transformer (Stern et al., 2019) is an attractive alternative that allows outputting multiple tokens in a single generation step. Nevertheless, due to the incompatibility between absolute positional encoding and insertion-based generation schemes, it needs to refresh the encoding of every token in the generated partial hypothesis at each step, which could be costly. We design a novel reusable positional encoding scheme for Insertion Transformers called Fractional Positional Encoding (FPE), which allows reusing representations calculated in previous steps. Empirical studies on various text generation tasks demonstrate the effectiveness of FPE, which leads to floating-point operation reduction and latency improvements on batched decoding.</abstract>
       <url hash="26d9671d">2023.eacl-main.115</url>
@@ -1581,7 +1581,7 @@
       <author><first>Pat</first><last>Verga</last><affiliation>Google</affiliation></author>
       <author><first>Michiel</first><last>de Jong</last><affiliation>University of Southern California</affiliation></author>
       <author><first>John</first><last>Wieting</last><affiliation>University of Illinois; TTI-Chicago; CMU; Google</affiliation></author>
-      <author><first>William W.</first><last>Cohen</last><affiliation>Google AI</affiliation></author>
+      <author id="william-cohen"><first>William W.</first><last>Cohen</last><affiliation>Google AI</affiliation></author>
       <pages>1597-1610</pages>
       <abstract>Existing state-of-the-art methods for open-domain question-answering (ODQA) use an open book approach in which information is first retrieved from a large text corpus or knowledge base (KB) and then reasoned over to produce an answer. A recent alternative is to retrieve from a collection of previously-generated question-answer pairs; this has several practical advantages including being more memory and compute-efficient. Question-answer pairs are also appealing in that they can be viewed as an intermediate between text and KB triples: like KB triples, they often concisely express a single relationship, but like text, have much higher coverage than traditional KBs. In this work, we describe a new QA system that augments a text-to-text model with a large memory of question-answer pairs, and a new pre-training task for the latent step of question retrieval. The pre-training task substantially simplifies training and greatly improves performance on smaller QA benchmarks. Unlike prior systems of this sort, our QA system can also answer multi-hop questions that do not explicitly appear in the collection of stored question-answer pairs.</abstract>
       <url hash="9e821db1">2023.eacl-main.117</url>
@@ -1593,7 +1593,7 @@
       <title>Gold Doesn’t Always Glitter: Spectral Removal of Linear and Nonlinear Guarded Attribute Information</title>
       <author><first>Shun</first><last>Shao</last><affiliation>University of Edinburgh</affiliation></author>
       <author><first>Yftah</first><last>Ziser</last><affiliation>University of Edinburgh</affiliation></author>
-      <author><first>Shay B.</first><last>Cohen</last><affiliation>University of Edinburgh</affiliation></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last><affiliation>University of Edinburgh</affiliation></author>
       <pages>1611-1622</pages>
       <abstract>We describe a simple and effective method (Spectral Attribute removaL; SAL) to remove private or guarded information from neural representations. Our method uses matrix decomposition to project the input representations into directions with reduced covariance with the guarded information rather than maximal covariance as factorization methods normally use. We begin with linear information removal and proceed to generalize our algorithm to the case of nonlinear information removal using kernels. Our experiments demonstrate that our algorithm retains better main task performance after removing the guarded information compared to previous work. In addition, our experiments demonstrate that we need a relatively small amount of guarded attribute data to remove information about these attributes, which lowers the exposure to sensitive data and is more suitable for low-resource scenarios.</abstract>
       <url hash="4db457b5">2023.eacl-main.118</url>
@@ -1610,7 +1610,7 @@
       <author><first>Yosuke</first><last>Higuchi</last><affiliation>Waseda University</affiliation></author>
       <author><first>Graham</first><last>Neubig</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Florian</first><last>Metze</last><affiliation>Carnegie Mellon University</affiliation></author>
-      <author><first>Alan W</first><last>Black</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Shinji</first><last>Watanabe</last><affiliation>Carnegie Mellon University</affiliation></author>
       <pages>1623-1639</pages>
       <abstract>Connectionist Temporal Classification (CTC) is a widely used approach for automatic speech recognition (ASR) that performs conditionally independent monotonic alignment. However for translation, CTC exhibits clear limitations due to the contextual and non-monotonic nature of the task and thus lags behind attentional decoder approaches in terms of translation quality. In this work, we argue that CTC does in fact make sense for translation if applied in a joint CTC/attention framework wherein CTC’s core properties can counteract several key weaknesses of pure-attention models during training and decoding. To validate this conjecture, we modify the Hybrid CTC/Attention model originally proposed for ASR to support text-to-text translation (MT) and speech-to-text translation (ST). Our proposed joint CTC/attention models outperform pure-attention baselines across six benchmark translation tasks.</abstract>
@@ -1653,7 +1653,7 @@
       <title>Cluster-Guided Label Generation in Extreme Multi-Label Classification</title>
       <author><first>Taehee</first><last>Jung</last><affiliation>Amazon Alexa AI</affiliation></author>
       <author><first>Joo-kyung</first><last>Kim</last><affiliation>Amazon Alexa AI</affiliation></author>
-      <author><first>Sungjin</first><last>Lee</last><affiliation>Amazon Alexa AI</affiliation></author>
+      <author id="sungjin-lee"><first>Sungjin</first><last>Lee</last><affiliation>Amazon Alexa AI</affiliation></author>
       <author><first>Dongyeop</first><last>Kang</last><affiliation>University of Minnesota</affiliation></author>
       <pages>1670-1685</pages>
       <abstract>For extreme multi-label classification (XMC), existing classification-based models poorly per- form for tail labels and often ignore the semantic relations among labels, like treating”Wikipedia” and “Wiki” as independent and separate labels. In this paper, we cast XMC as a generation task (XLGen), where we benefit from pre-trained text-to-text models. However, generating labels from the extremely large label space is challenging without any constraints or guidance. We, therefore, propose to guide label generation using label cluster information to hierarchically generate lower-level labels. We also find that frequency-based label ordering and using decoding ensemble methods are critical factors for the improvements in XLGen. XLGen with cluster guidance significantly outperforms the classification and generation baselines on tail labels, and also generally improves the overall performance in four popular XMC benchmarks. In human evaluation, we also find XLGen generates unseen but plausible labels. Our code is now available at <url>https://github.com/alexa/xlgen-eacl-2023</url>.</abstract>
@@ -1667,7 +1667,7 @@
       <author><first>Andrew</first><last>Lee</last><affiliation>University of Michigan</affiliation></author>
       <author><first>Jonathan K.</first><last>Kummerfeld</last><affiliation>University of Sydney</affiliation></author>
       <author><first>Larry</first><last>An</last><affiliation>University of Michigan</affiliation></author>
-      <author><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
       <pages>1686-1695</pages>
       <abstract>Understanding empathy in text dialogue data is a difficult, yet critical, skill for effective human-machine interaction. In this work, we ask whether systems are making meaningful progress on this challenge. We consider a simple model that checks if an input utterance is similar to a small set of empathetic examples. Crucially, the model does not look at what the utterance is a response to, i.e., the dialogue context. This model performs comparably to other work on standard benchmarks and even outperforms state-of-the-art models for empathetic rationale extraction by 16.7 points on T-F1 and 4.3 on IOU-F1. This indicates that current systems rely on the surface form of the response, rather than whether it is suitable in context. To confirm this, we create examples with dialogue contexts that change the interpretation of the response and show that current systems continue to label utterances as empathetic. We discuss the implications of our findings, including improvements for empathetic benchmarks and how our model can be an informative baseline.</abstract>
       <url hash="bbd2c0ea">2023.eacl-main.123</url>
@@ -1696,7 +1696,7 @@
       <title>What happens before and after: Multi-Event Commonsense in Event Coreference Resolution</title>
       <author><first>Sahithya</first><last>Ravi</last><affiliation>The University of British Columbia, Vancouver</affiliation></author>
       <author><first>Chris</first><last>Tanner</last><affiliation>MIT and Kensho Technologies</affiliation></author>
-      <author><first>Raymond</first><last>Ng</last><affiliation>Univ British Columbia</affiliation></author>
+      <author id="raymond-ng"><first>Raymond</first><last>Ng</last><affiliation>Univ British Columbia</affiliation></author>
       <author><first>Vered</first><last>Shwartz</last><affiliation>University of British Columbia</affiliation></author>
       <pages>1708-1724</pages>
       <abstract>Event coreference models cluster event mentions pertaining to the same real-world event. Recent models rely on contextualized representations to recognize coreference among lexically or contextually similar mentions. However, models typically fail to leverage commonsense inferences, which is particularly limiting for resolving lexically-divergent mentions. We propose a model that extends event mentions with temporal commonsense inferences. Given a complex sentence with multiple events, e.g., “the man killed his wife and got arrested”, with the target event “arrested”, our model generates plausible events that happen before the target event – such as “the police arrived”, and after it, such as “he was sentenced”. We show that incorporating such inferences into an existing event coreference model improves its performance, and we analyze the coreferences in which such temporal knowledge is required.</abstract>
@@ -1720,7 +1720,7 @@
       <title><fixed-case>C</fixed-case>yl<fixed-case>E</fixed-case>: Cylinder Embeddings for Multi-hop Reasoning over Knowledge Graphs</title>
       <author><first>Chau Duc Minh</first><last>Nguyen</last><affiliation>The University of Western Australia</affiliation></author>
       <author><first>Tim</first><last>French</last><affiliation>The University of Western Australia</affiliation></author>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last><affiliation>The University of Western Austarlia</affiliation></author>
+      <author><first>Wei</first><last>Liu</last><affiliation>The University of Western Austarlia</affiliation></author>
       <author><first>Michael</first><last>Stewart</last><affiliation>The University of Western Australia</affiliation></author>
       <pages>1736-1751</pages>
       <abstract>Recent geometric-based approaches have been shown to efficiently model complex logical queries (including the intersection operation) over Knowledge Graphs based on the natural representation of Venn diagram. Existing geometric-based models (using points, boxes embeddings), however, cannot handle the logical negation operation. Further, those using cones embeddings are limited to representing queries by two-dimensional shapes, which reduced their effectiveness in capturing entities query relations for correct answers. To overcome this challenge, we propose unbounded cylinder embeddings (namely CylE), which is a novel geometric-based model based on three-dimensional shapes. Our approach can handle a complete set of basic first-order logic operations (conjunctions, disjunctions and negations). CylE considers queries as Cartesian products of unbounded sector-cylinders and consider a set of nearest boxes corresponds to the set of answer entities. Precisely, the conjunctions can be represented via the intersections of unbounded sector-cylinders. Transforming queries to Disjunctive Normal Form can handle queries with disjunctions. The negations can be represented by considering the closure of complement for an arbitrary unbounded sector-cylinder. Empirical results show that the performance of multi-hop reasoning task using CylE significantly increases over state-of-the-art geometric-based query embedding models for queries without negation. For queries with negation operations, though the performance is on a par with the best performing geometric-based model, CylE significantly outperforms a recent distribution-based model.</abstract>
@@ -1750,7 +1750,7 @@
       <author><first>Yu</first><last>Cheng</last><affiliation>Microsoft Research</affiliation></author>
       <author><first>Milad</first><last>Shokouhi</last><affiliation>Microsoft</affiliation></author>
       <author><first>Xia</first><last>Hu</last><affiliation>Rice University</affiliation></author>
-      <author><first>Ahmed Hassan</first><last>Awadallah</last><affiliation>Microsoft Research</affiliation></author>
+      <author id="ahmed-hassan"><first>Ahmed Hassan</first><last>Awadallah</last><affiliation>Microsoft Research</affiliation></author>
       <pages>1766-1778</pages>
       <abstract>Recent work has focused on compressing pre-trained language models (PLMs) like BERT where the major focus has been to improve the in-distribution performance for downstream tasks. However, very few of these studies have analyzed the impact of compression on the generalizability and robustness of compressed models for out-of-distribution (OOD) data. Towards this end, we study two popular model compression techniques including knowledge distillation and pruning and show that the compressed models are significantly less robust than their PLM counterparts on OOD test sets although they obtain similar performance on in-distribution development sets for a task. Further analysis indicates that the compressed models overfit on the shortcut samples and generalize poorly on the hard ones. We further leverage this observation to develop a regularization strategy for robust model compression based on sample uncertainty.</abstract>
       <url hash="cc08d7a4">2023.eacl-main.129</url>
@@ -1775,8 +1775,8 @@
     <paper id="131">
       <title>Performance Prediction via <fixed-case>B</fixed-case>ayesian Matrix Factorisation for Multilingual Natural Language Processing Tasks</title>
       <author><first>Viktoria</first><last>Schram</last><affiliation>University of Melbourne</affiliation></author>
-      <author><first>Daniel</first><last>Beck</last><affiliation>University of Melbourne</affiliation></author>
-      <author><first>Trevor</first><last>Cohn</last><affiliation>University of Melbourne</affiliation></author>
+      <author id="daniel-beck"><first>Daniel</first><last>Beck</last><affiliation>University of Melbourne</affiliation></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last><affiliation>University of Melbourne</affiliation></author>
       <pages>1790-1801</pages>
       <abstract>Performance prediction for Natural Language Processing (NLP) seeks to reduce the experimental burden resulting from the myriad of different evaluation scenarios, e.g., the combination of languages used in multilingual transfer. In this work, we explore the framework ofBayesian matrix factorisation for performance prediction, as many experimental settings in NLP can be naturally represented in matrix format. Our approach outperforms the state-of-the-art in several NLP benchmarks, including machine translation and cross-lingual entity linking. Furthermore, it also avoids hyperparameter tuning and is able to provide uncertainty estimates over predictions.</abstract>
       <url hash="592b1d67">2023.eacl-main.131</url>
@@ -1802,7 +1802,7 @@
       <title>Don’t Mess with Mister-in-Between: Improved Negative Search for Knowledge Graph Completion</title>
       <author><first>Fan</first><last>Jiang</last><affiliation>The University of Melbourne</affiliation></author>
       <author><first>Tom</first><last>Drummond</last><affiliation>University of Melbourne</affiliation></author>
-      <author><first>Trevor</first><last>Cohn</last><affiliation>University of Melbourne</affiliation></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last><affiliation>University of Melbourne</affiliation></author>
       <pages>1818-1832</pages>
       <abstract>The best methods for knowledge graph completion use a ‘dual-encoding’ framework, a form of neural model with a bottleneck that facilitates fast approximate search over a vast collection of candidates. These approaches are trained using contrastive learning to differentiate between known positive examples and sampled negative instances. The mechanism for sampling negatives to date has been very simple, driven by pragmatic engineering considerations (e.g., using mismatched instances from the same batch). We propose several novel means of finding more informative negatives, based on searching for candidates with high lexical overlaps, from the dual-encoder model and according to knowledge graph structures. Experimental results on four benchmarks show that our best single model improves consistently over previous methods and obtains new state-of-the-art performance, including the challenging large-scale Wikidata5M dataset. Combing different kinds of strategies through model ensembling results in a further performance boost.</abstract>
       <url hash="a60fbe98">2023.eacl-main.133</url>
@@ -1946,7 +1946,7 @@
     <paper id="144">
       <title>Task and Sentiment Adaptation for Appraisal Tagging</title>
       <author><first>Lin</first><last>Tian</last><affiliation>RMIT University</affiliation></author>
-      <author><first>Xiuzhen</first><last>Zhang</last><affiliation>RMIT University</affiliation></author>
+      <author id="xiuzhen-jenny-zhang"><first>Xiuzhen</first><last>Zhang</last><affiliation>RMIT University</affiliation></author>
       <author><first>Myung Hee</first><last>Kim</last><affiliation>Defence Science Technology Group</affiliation></author>
       <author><first>Jennifer</first><last>Biggs</last><affiliation>Defence Science and Technology Group</affiliation></author>
       <pages>1960-1970</pages>
@@ -1960,7 +1960,7 @@
       <title><fixed-case>DREEAM</fixed-case>: Guiding Attention with Evidence for Improving Document-Level Relation Extraction</title>
       <author><first>Youmi</first><last>Ma</last><affiliation>Tokyo Institute of Technology</affiliation></author>
       <author><first>An</first><last>Wang</last><affiliation>Tokyo Institute of Technology</affiliation></author>
-      <author><first>Naoaki</first><last>Okazaki</last><affiliation>Tokyo Institute of Technology</affiliation></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last><affiliation>Tokyo Institute of Technology</affiliation></author>
       <pages>1971-1983</pages>
       <abstract>Document-level relation extraction (DocRE) is the task of identifying all relations between each entity pair in a document. Evidence, defined as sentences containing clues for the relationship between an entity pair, has been shown to help DocRE systems focus on relevant texts, thus improving relation extraction. However, evidence retrieval (ER) in DocRE faces two major issues: high memory consumption and limited availability of annotations. This work aims at addressing these issues to improve the usage of ER in DocRE. First, we propose DREEAM, a memory-efficient approach that adopts evidence information as the supervisory signal, thereby guiding the attention modules of the DocRE system to assign high weights to evidence. Second, we propose a self-training strategy for DREEAM to learn ER from automatically-generated evidence on massive data without evidence annotations. Experimental results reveal that our approach exhibits state-of-the-art performance on the DocRED benchmark for both DocRE and ER. To the best of our knowledge, DREEAM is the first approach to employ ER self-training.</abstract>
       <url hash="71e2be7c">2023.eacl-main.145</url>
@@ -2026,7 +2026,7 @@
       <author><first>Apoorv</first><last>Saxena</last><affiliation>Adobe Research</affiliation></author>
       <author><first>Chitrank</first><last>Gupta</last><affiliation>IIT Bombay, UT Austin</affiliation></author>
       <author><first>Mehran</first><last>Kazemi</last><affiliation>Google Research</affiliation></author>
-      <author><first>Partha</first><last>Talukdar</last><affiliation>Google Research and IISc</affiliation></author>
+      <author id="partha-talukdar"><first>Partha</first><last>Talukdar</last><affiliation>Google Research and IISc</affiliation></author>
       <author><first>Soumen</first><last>Chakrabarti</last><affiliation>IIT Bombay</affiliation></author>
       <pages>2049-2060</pages>
       <abstract>Recent years have witnessed interest in Temporal Question Answering over Knowledge Graphs (TKGQA), resulting in the development of multiple methods. However, these are highly engineered, thereby limiting their generalizability, and they do not automatically discover relevant parts of the KG during multi-hop reasoning. Relational graph convolutional networks (RGCN) provide an opportunity to address both of these challenges – we explore this direction in the paper. Specifically, we propose a novel, intuitive and interpretable scheme to modulate the messages passed through a KG edge during convolution based on the relevance of its associated period to the question. We also introduce a gating device to predict if the answer to a complex temporal question is likely to be a KG entity or time and use this prediction to guide our scoring mechanism. We evaluate the resulting system, which we call TwiRGCN, on a recent challenging dataset for multi-hop complex temporal QA called TimeQuestions. We show that TwiRGCN significantly outperforms state-of-the-art models on this dataset across diverse question types. Interestingly, TwiRGCN improves accuracy by 9–10 percentage points for the most difficult ordinal and implicit question types.</abstract>
@@ -2052,7 +2052,7 @@
       <title><fixed-case>GLADIS</fixed-case>: A General and Large Acronym Disambiguation Benchmark</title>
       <author><first>Lihu</first><last>Chen</last><affiliation>Telecom Paris &amp; Institut Polytechnique de Paris</affiliation></author>
       <author><first>Gael</first><last>Varoquaux</last><affiliation>Inria</affiliation></author>
-      <author><first>Fabian M.</first><last>Suchanek</last><affiliation>Telecom Paris</affiliation></author>
+      <author id="fabian-suchanek"><first>Fabian M.</first><last>Suchanek</last><affiliation>Telecom Paris</affiliation></author>
       <pages>2073-2088</pages>
       <url hash="9311c626">2023.eacl-main.152</url>
       <bibkey>chen-etal-2023-gladis</bibkey>
@@ -2118,7 +2118,7 @@
       <author><first>Atharva</first><last>Kulkarni</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Tharun</first><last>Suresh</last><affiliation>Indraprastha Institute of Information Technology - Delhi</affiliation></author>
       <author><first>Himanshi</first><last>Mathur</last><affiliation>Iiitd</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <author><first>Md. Shad</first><last>Akhtar</last><affiliation>Indraprastha Institute of Information Technology, Delhi</affiliation></author>
       <author><first>Tanmoy</first><last>Chakraborty</last><affiliation>IIT Delhi</affiliation></author>
       <pages>2149-2163</pages>
@@ -2170,9 +2170,9 @@
     <paper id="161">
       <title>Towards a Unified Multi-Domain Multilingual Named Entity Recognition Model</title>
       <author><first>Mayank</first><last>Kulkarni</last><affiliation>Amazon</affiliation></author>
-      <author><first>Daniel</first><last>Preotiuc-Pietro</last><affiliation>Bloomberg</affiliation></author>
+      <author id="daniel-preotiuc-pietro"><first>Daniel</first><last>Preotiuc-Pietro</last><affiliation>Bloomberg</affiliation></author>
       <author><first>Karthik</first><last>Radhakrishnan</last><affiliation>Bloomberg LP</affiliation></author>
-      <author><first>Genta Indra</first><last>Winata</last><affiliation>Bloomberg</affiliation></author>
+      <author id="genta-indra-winata"><first>Genta Indra</first><last>Winata</last><affiliation>Bloomberg</affiliation></author>
       <author><first>Shijie</first><last>Wu</last><affiliation>Bloomberg</affiliation></author>
       <author><first>Lingjue</first><last>Xie</last><affiliation>Bloomberg</affiliation></author>
       <author><first>Shaohua</first><last>Yang</last><affiliation>Bloomberg</affiliation></author>
@@ -2209,7 +2209,7 @@
     <paper id="164">
       <title>Measuring Normative and Descriptive Biases in Language Models Using Census Data</title>
       <author><first>Samia</first><last>Touileb</last><affiliation>University of Bergen</affiliation></author>
-      <author><first>Lilja</first><last>Øvrelid</last><affiliation>Dept of Informatics, University of Oslo</affiliation></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last><affiliation>Dept of Informatics, University of Oslo</affiliation></author>
       <author><first>Erik</first><last>Velldal</last><affiliation>University of Oslo</affiliation></author>
       <pages>2242-2248</pages>
       <abstract>We investigate in this paper how distributions of occupations with respect to gender is reflected in pre-trained language models. Such distributions are not always aligned to normative ideals, nor do they necessarily reflect a descriptive assessment of reality. In this paper, we introduce an approach for measuring to what degree pre-trained language models are aligned to normative and descriptive occupational distributions. To this end, we use official demographic information about gender–occupation distributions provided by the national statistics agencies of France, Norway, United Kingdom, and the United States. We manually generate template-based sentences combining gendered pronouns and nouns with occupations, and subsequently probe a selection of ten language models covering the English, French, and Norwegian languages. The scoring system we introduce in this work is language independent, and can be used on any combination of template-based sentences, occupations, and languages. The approach could also be extended to other dimensions of national census data and other demographic variables.</abstract>
@@ -2287,7 +2287,7 @@
       <author><first>Nicolas</first><last>Hiebel</last><affiliation>Université Paris Saclay, CNRS, LISN</affiliation></author>
       <author><first>Olivier</first><last>Ferret</last><affiliation>CEA List</affiliation></author>
       <author><first>Karen</first><last>Fort</last><affiliation>Sorbonne Universite and LORIA</affiliation></author>
-      <author><first>Aurélie</first><last>Névéol</last><affiliation>Université Paris Saclay, CNRS, LISN</affiliation></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last><affiliation>Université Paris Saclay, CNRS, LISN</affiliation></author>
       <pages>2320-2338</pages>
       <abstract>In sensitive domains, the sharing of corpora is restricted due to confidentiality, copyrights or trade secrets. Automatic text generation can help alleviate these issues by producing synthetic texts that mimic the linguistic properties of real documents while preserving confidentiality. In this study, we assess the usability of synthetic corpus as a substitute training corpus for clinical information extraction. Our goal is to automatically produce a clinical case corpus annotated with clinical entities and to evaluate it for a named entity recognition (NER) task. We use two auto-regressive neural models partially or fully trained on generic French texts and fine-tuned on clinical cases to produce a corpus of synthetic clinical cases. We study variants of the generation process: (i) fine-tuning on annotated vs. plain text (in that case, annotations are obtained a posteriori) and (ii) selection of generated texts based on models parameters and filtering criteria. We then train NER models with the resulting synthetic text and evaluate them on a gold standard clinical corpus. Our experiments suggest that synthetic text is useful for clinical NER.</abstract>
       <url hash="7c3d990e">2023.eacl-main.170</url>
@@ -2314,7 +2314,7 @@
       <author><first>Marco</first><last>Cognetta</last><affiliation>Tokyo Institute of Technology</affiliation></author>
       <author><first>Sangwhan</first><last>Moon</last><affiliation>Tokyo Institute of Technology</affiliation></author>
       <author><first>Lawrence</first><last>Wolf-sonkin</last><affiliation>Google Research</affiliation></author>
-      <author><first>Naoaki</first><last>Okazaki</last><affiliation>Tokyo Institute of Technology</affiliation></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last><affiliation>Tokyo Institute of Technology</affiliation></author>
       <pages>2350-2356</pages>
       <abstract>Character-level language modeling has been shown empirically to perform well on highly agglutinative or morphologically rich languages while using only a small fraction of the parameters required by (sub)word models. Korean fits nicely into this framework, except that, like other CJK languages, it has a very large character vocabulary of 11,172 unique syllables. However, unlike Japanese Kanji and Chinese Hanzi, each Korean syllable can be uniquely factored into a small set of subcharacters, called jamo. We explore a “three-hot” scheme, where we exploit the decomposability of Korean characters to model at the syllable level but using only jamo-level representations. We find that our three-hot embedding and decoding scheme alleviates the two major issues with prior syllable- and jamo-level models. Namely, it requires fewer than 1% of the embedding parameters of a syllable model, and it does not require tripling the sequence length, as with jamo models. In addition, it addresses a theoretical flaw in a prior three-hot modeling scheme. Our experiments show that, even when reducing the number of embedding parameters by 99.6% (from 11.4M to just 36k), our model suffers no loss in translation quality compared to the baseline syllable model.</abstract>
       <url hash="8386f16e">2023.eacl-main.172</url>
@@ -2420,7 +2420,7 @@
       <author><first>Bidisha</first><last>Samanta</last><affiliation>Google</affiliation></author>
       <author><first>Shachi</first><last>Dave</last><affiliation>Google Research</affiliation></author>
       <author><first>Sunita</first><last>Sarawagi</last><affiliation>IIT Bombay</affiliation></author>
-      <author><first>Partha</first><last>Talukdar</last><affiliation>Google Research and IISc</affiliation></author>
+      <author id="partha-talukdar"><first>Partha</first><last>Talukdar</last><affiliation>Google Research and IISc</affiliation></author>
       <pages>2455-2467</pages>
       <abstract>Despite cross-lingual generalization demonstrated by pre-trained multilingual models, the translate-train paradigm of transferring English datasets across multiple languages remains to be a key mechanism for training task-specific multilingual models. However, for many low-resource languages, the availability of a reliable translation service entails significant amounts of costly human-annotated translation pairs. Further, translation services may continue to be brittle due to domain mismatch between task-specific input text and general-purpose text used for training translation models. For multilingual semantic parsing, we demonstrate the effectiveness and flexibility offered by large language models (LLMs) for translating English datasets into several languages via few-shot prompting. Through extensive comparisons on two public datasets, MTOP and MASSIVE, spanning 50 languages and several domains, we show that our method of translating data using LLMs outperforms a strong translate-train baseline on 41 out of 50 languages. We study the key design choices that enable more effective multilingual data translation via prompted LLMs.</abstract>
       <url hash="0e6d56f8">2023.eacl-main.180</url>
@@ -2432,7 +2432,7 @@
       <title>Modeling Complex Event Scenarios via Simple Entity-focused Questions</title>
       <author><first>Mahnaz</first><last>Koupaee</last><affiliation>Stony Brook University</affiliation></author>
       <author><first>Greg</first><last>Durrett</last><affiliation>UT Austin</affiliation></author>
-      <author><first>Nathanael</first><last>Chambers</last><affiliation>US Naval Academy</affiliation></author>
+      <author id="nathanael-chambers"><first>Nathanael</first><last>Chambers</last><affiliation>US Naval Academy</affiliation></author>
       <author><first>Niranjan</first><last>Balasubramanian</last><affiliation>Stony Brook University</affiliation></author>
       <pages>2468-2483</pages>
       <abstract>Event scenarios are often complex and involve multiple event sequences connected through different entity participants. Exploring such complex scenarios requires an ability to branch through different sequences, something that is difficult to achieve with standard event language modeling. To address this, we propose a question-guided generation framework that models events in complex scenarios as answers to questions about participants. At any step in the generation process, the framework uses the previously-generated events as context, but generates the next event as an answer to one of three questions: what else a participant did, what else happened to a participant, or what else happened. The participants and the questions themselves can be sampled or be provided as input from a user, allowing for controllable exploration. Our empirical evaluation shows that this question-guided generation provides better coverage of participants, diverse events within a domain, comparable perplexities for modeling event sequences, and more effective control for interactive schema generation.</abstract>
@@ -2513,7 +2513,7 @@
     <paper id="187">
       <title>Towards preserving word order importance through Forced Invalidation</title>
       <author><first>Hadeel</first><last>Al-Negheimish</last><affiliation>Imperial College London</affiliation></author>
-      <author><first>Pranava</first><last>Madhyastha</last><affiliation>City, University of London</affiliation></author>
+      <author id="pranava-swaroop-madhyastha"><first>Pranava</first><last>Madhyastha</last><affiliation>City, University of London</affiliation></author>
       <author><first>Alessandra</first><last>Russo</last><affiliation>Imperial College London</affiliation></author>
       <pages>2563-2570</pages>
       <abstract>Large pre-trained language models such as BERT have been widely used as a framework for natural language understanding (NLU) tasks. However, recent findings have revealed that pre-trained language models are insensitive to word order. The performance on NLU tasks remains unchanged even after randomly permuting the word of a sentence, where crucial syntactic information is destroyed. To help preserve the importance of word order, we propose a simple approach called Forced Invalidation (FI): forcing the model to identify permuted sequences as invalid samples. We perform an extensive evaluation of our approach on various English NLU and QA based tasks over BERT-based and attention-based models over word embeddings. Our experiments demonstrate that FI significantly improves the sensitivity of the models to word order.</abstract>
@@ -2526,7 +2526,7 @@
       <title>How Many and Which Training Points Would Need to be Removed to Flip this Prediction?</title>
       <author><first>Jinghan</first><last>Yang</last><affiliation>The University of Hong Kong</affiliation></author>
       <author><first>Sarthak</first><last>Jain</last><affiliation>AWS AI Labs</affiliation></author>
-      <author><first>Byron C.</first><last>Wallace</last><affiliation>Northeastern University</affiliation></author>
+      <author id="byron-c-wallace"><first>Byron C.</first><last>Wallace</last><affiliation>Northeastern University</affiliation></author>
       <pages>2571-2584</pages>
       <url hash="e948dfa3">2023.eacl-main.188</url>
       <attachment type="dataset" hash="8370b001">2023.eacl-main.188.dataset.zip</attachment>
@@ -2548,7 +2548,7 @@
     </paper>
     <paper id="190">
       <title>Detecting Lexical Borrowings from Dominant Languages in Multilingual Wordlists</title>
-      <author><first>John E.</first><last>Miller</last><affiliation>PUCP: Pontificia Universidad Catolica del Peru</affiliation></author>
+      <author id="john-miller"><first>John E.</first><last>Miller</last><affiliation>PUCP: Pontificia Universidad Catolica del Peru</affiliation></author>
       <author><first>Johann-Mattis</first><last>List</last><affiliation>Max Planck Institute for Evolutionary Anthropology</affiliation></author>
       <pages>2599-2605</pages>
       <abstract>Language contact is a pervasive phenomenon reflected in the borrowing of words from donor to recipient languages. Most computational approaches to borrowing detection treat all languages under study as equally important, even though dominant languages have a stronger impact on heritage languages than vice versa. We test new methods for lexical borrowing detection in contact situations where dominant languages play an important role, applying two classical sequence comparison methods and one machine learning method to a sample of seven Latin American languages which have all borrowed extensively from Spanish. All systems perform well, with the supervised machine learning system outperforming the classical systems. A review of detection errors shows that borrowing detection could be substantially improved by taking into account donor words with divergent meanings from recipient words.</abstract>
@@ -2574,7 +2574,7 @@
       <author><first>Emily</first><last>Allaway</last><affiliation>Columbia University</affiliation></author>
       <author><first>Jena D.</first><last>Hwang</last><affiliation>Allen Institute for AI</affiliation></author>
       <author><first>Chandra</first><last>Bhagavatula</last><affiliation>Allen Institute for AI</affiliation></author>
-      <author><first>Kathleen</first><last>McKeown</last><affiliation>Columbia University and Amazon (Amazon Scholar)</affiliation></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last><affiliation>Columbia University and Amazon (Amazon Scholar)</affiliation></author>
       <author><first>Doug</first><last>Downey</last><affiliation>Allen Institute for AI, Northwestern University</affiliation></author>
       <author><first>Yejin</first><last>Choi</last><affiliation>University of Washington</affiliation></author>
       <pages>2618-2635</pages>
@@ -2606,7 +2606,7 @@
       <author><first>Qiming</first><last>Bao</last><affiliation>The University of Auckland</affiliation></author>
       <author><first>Yang</first><last>Chen</last><affiliation>University of Auckland</affiliation></author>
       <author><first>Mark</first><last>Gahegan</last><affiliation>University of Auckland</affiliation></author>
-      <author><first>Michael</first><last>Witbrock</last><affiliation>University of Auckland</affiliation></author>
+      <author id="michael-j-witbrock"><first>Michael</first><last>Witbrock</last><affiliation>University of Auckland</affiliation></author>
       <pages>2652-2664</pages>
       <abstract>Training machine learning models to successfully perform scientific fact-checking tasks is challenging due to the expertise bottleneck that limits the availability of appropriate training datasets. In this task, models use textual evidence to confirm scientific claims, which requires data that contains extensive domain-expert annotation. Consequently, the number of existing scientific-fact-checking datasets and the sizes of those datasets are limited. However, these limitations do not apply to multiple-choice question datasets because of the necessity of domain exams in the modern education system. As one of the first steps towards addressing the fact-checking dataset scarcity problem in scientific domains, we propose a pipeline for automatically converting multiple-choice questions into fact-checking data, which we call Multi2Claim. By applying the proposed pipeline, we generated two large-scale datasets for scientific-fact-checking tasks: Med-Fact and Gsci-Fact for the medical and general science domains, respectively. These two datasets are among the first examples of large-scale scientific-fact-checking datasets. We developed baseline models for the verdict prediction task using each dataset. Additionally, we demonstrated that the datasets could be used to improve performance with respect to the F 1 weighted metric on existing fact-checking datasets such as SciFact, HEALTHVER, COVID-Fact, and CLIMATE-FEVER. In some cases, the improvement in performance was up to a 26% increase.</abstract>
       <url hash="272c5acb">2023.eacl-main.194</url>
@@ -2668,7 +2668,7 @@
     <paper id="199">
       <title>Methods for Measuring, Updating, and Visualizing Factual Beliefs in Language Models</title>
       <author><first>Peter</first><last>Hase</last><affiliation>University of North Carolina at Chapel Hill</affiliation></author>
-      <author><first>Mona</first><last>Diab</last><affiliation>Meta Responsible AI</affiliation></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last><affiliation>Meta Responsible AI</affiliation></author>
       <author><first>Asli</first><last>Celikyilmaz</last><affiliation>FAIR @ Meta</affiliation></author>
       <author><first>Xian</first><last>Li</last><affiliation>Meta AI</affiliation></author>
       <author><first>Zornitsa</first><last>Kozareva</last><affiliation>Meta AI</affiliation></author>
@@ -2737,7 +2737,7 @@
     <paper id="204">
       <title>Behavior Cloned Transformers are Neurosymbolic Reasoners</title>
       <author><first>Ruoyao</first><last>Wang</last><affiliation>University of Arizona</affiliation></author>
-      <author><first>Peter</first><last>Jansen</last><affiliation>University of Arizona</affiliation></author>
+      <author id="peter-jansen"><first>Peter</first><last>Jansen</last><affiliation>University of Arizona</affiliation></author>
       <author><first>Marc-Alexandre</first><last>Côté</last><affiliation>Microsoft Research</affiliation></author>
       <author><first>Prithviraj</first><last>Ammanabrolu</last><affiliation>Allen Institute for AI</affiliation></author>
       <pages>2777-2788</pages>
@@ -2792,7 +2792,7 @@
       <author><first>Shirley Anugrah</first><last>Hayati</last><affiliation>University of Minnesota</affiliation></author>
       <author><first>Kyumin</first><last>Park</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
       <author><first>Dheeraj</first><last>Rajagopal</last><affiliation>Google Inc</affiliation></author>
-      <author><first>Lyle</first><last>Ungar</last><affiliation>University of Pennsylvania</affiliation></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last><affiliation>University of Pennsylvania</affiliation></author>
       <author><first>Dongyeop</first><last>Kang</last><affiliation>University of Minnesota</affiliation></author>
       <pages>2843-2856</pages>
       <abstract>Large pre-trained language models have achieved impressive results on various style classification tasks, but they often learn spurious domain-specific words to make predictions (Hayati et al., 2021). While human explanation highlights stylistic tokens as important features for this task, we observe that model explanations often do not align with them. To tackle this issue, we introduce StyLEx, a model that learns from human annotated explanations of stylistic features and jointly learns to perform the task and predict these features as model explanations. Our experiments show that StyLEx can provide human like stylistic lexical explanations without sacrificing the performance of sentence-level style prediction on both in-domain and out-of-domain datasets. Explanations from StyLEx show significant improvements in explanation metrics (sufficiency, plausibility) and when evaluated with human annotations. They are also more understandable by human judges compared to the widely-used saliency-based explanation baseline.</abstract>
@@ -2805,7 +2805,7 @@
       <title>Comparing Intrinsic Gender Bias Evaluation Measures without using Human Annotated Examples</title>
       <author><first>Masahiro</first><last>Kaneko</last><affiliation>Tokyo Institute of Technology</affiliation></author>
       <author><first>Danushka</first><last>Bollegala</last><affiliation>University of Liverpool/Amazon</affiliation></author>
-      <author><first>Naoaki</first><last>Okazaki</last><affiliation>Tokyo Institute of Technology</affiliation></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last><affiliation>Tokyo Institute of Technology</affiliation></author>
       <pages>2857-2863</pages>
       <abstract>Numerous types of social biases have been identified in pre-trained language models (PLMs), and various intrinsic bias evaluation measures have been proposed for quantifying those social biases. Prior works have relied on human annotated examples to compare existing intrinsic bias evaluation measures. However, this approach is not easily adaptable to different languages nor amenable to large scale evaluations due to the costs and difficulties when recruiting human annotators. To overcome this limitation, we propose a method to compare intrinsic gender bias evaluation measures without relying on human-annotated examples. Specifically, we create multiple bias-controlled versions of PLMs using varying amounts of male vs. female gendered sentences, mined automatically from an unannotated corpus using gender-related word lists. Next, each bias-controlled PLM is evaluated using an intrinsic bias evaluation measure, and the rank correlation between the computed bias scores and the gender proportions used to fine-tune the PLMs is computed. Experiments on multiple corpora and PLMs repeatedly show that the correlations reported by our proposed method that does not require human annotated examples are comparable to those computed using human annotated examples in prior work.</abstract>
       <url hash="64439158">2023.eacl-main.209</url>
@@ -2817,7 +2817,7 @@
       <title>Faithfulness-Aware Decoding Strategies for Abstractive Summarization</title>
       <author><first>David</first><last>Wan</last><affiliation>University of North Carolina at Chapel Hill</affiliation></author>
       <author><first>Mengwen</first><last>Liu</last><affiliation>Amazon</affiliation></author>
-      <author><first>Kathleen</first><last>McKeown</last><affiliation>Columbia University and Amazon (Amazon Scholar)</affiliation></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last><affiliation>Columbia University and Amazon (Amazon Scholar)</affiliation></author>
       <author><first>Markus</first><last>Dreyer</last><affiliation>Amazon.com</affiliation></author>
       <author><first>Mohit</first><last>Bansal</last><affiliation>University of North Carolina at Chapel Hill</affiliation></author>
       <pages>2864-2880</pages>
@@ -2943,11 +2943,11 @@
       <author><first>Dong-Ho</first><last>Lee</last><affiliation>University of Southern California</affiliation></author>
       <author><first>Ravi Kiran</first><last>Selvam</last><affiliation>University of Southern California</affiliation></author>
       <author><first>Sheikh Muhammad</first><last>Sarwar</last><affiliation>Amazon.com</affiliation></author>
-      <author><first>Bill Yuchen</first><last>Lin</last><affiliation>Allen Institute for AI</affiliation></author>
+      <author id="bill-yuchen-lin"><first>Bill Yuchen</first><last>Lin</last><affiliation>Allen Institute for AI</affiliation></author>
       <author><first>Fred</first><last>Morstatter</last><affiliation>USC Information Sciences Institute</affiliation></author>
       <author><first>Jay</first><last>Pujara</last><affiliation>University of Southern California</affiliation></author>
       <author><first>Elizabeth</first><last>Boschee</last><affiliation>Information Sciences Institute</affiliation></author>
-      <author><first>James</first><last>Allan</last><affiliation>University of Massachusetts Amherst</affiliation></author>
+      <author id="james-allan"><first>James</first><last>Allan</last><affiliation>University of Massachusetts Amherst</affiliation></author>
       <author><first>Xiang</first><last>Ren</last><affiliation>University of Southern California</affiliation></author>
       <pages>3011-3025</pages>
       <abstract>Deep neural models for named entity recognition (NER) have shown impressive results in overcoming label scarcity and generalizing to unseen entities by leveraging distant supervision and auxiliary information such as explanations. However, the costs of acquiring such additional information are generally prohibitive. In this paper, we present a novel two-stage framework (AutoTriggER) to improve NER performance by automatically generating and leveraging “entity triggers” which are human-readable cues in the text that help guide the model to make better decisions. Our framework leverages post-hoc explanation to generate rationales and strengthens a model’s prior knowledge using an embedding interpolation technique. This approach allows models to exploit triggers to infer entity boundaries and types instead of solely memorizing the entity words themselves. Through experiments on three well-studied NER datasets, AutoTriggER shows strong label-efficiency, is capable of generalizing to unseen entities, and outperforms the RoBERTa-CRF baseline by nearly 0.5 F1 points on average.</abstract>
@@ -2960,7 +2960,7 @@
       <title>Incorporating Task-Specific Concept Knowledge into Script Learning</title>
       <author><first>Chenkai</first><last>Sun</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
       <author><first>Tie</first><last>Xu</last><affiliation>Alibaba</affiliation></author>
-      <author><first>ChengXiang</first><last>Zhai</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
+      <author id="chengxiang-zhai"><first>ChengXiang</first><last>Zhai</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
       <author><first>Heng</first><last>Ji</last><affiliation>University of Illinois at Urbana-Champaign and Amazon (Amazon Scholar)</affiliation></author>
       <pages>3026-3040</pages>
       <abstract>In this paper, we present Tetris, a new task of Goal-Oriented Script Completion. Unlike previous work, it considers a more realistic and general setting, where the input includes not only the goal but also additional user context, including preferences and history. To address this problem, we propose a novel approach, which uses two techniques to improve performance: (1) concept prompting, and (2) script-oriented contrastive learning that addresses step repetition and hallucination problems. On our WikiHow-based dataset, we find that both methods improve performance.</abstract>
@@ -2977,7 +2977,7 @@
       <author><first>Ali</first><last>Kebarighotbi</last><affiliation>Amazon</affiliation></author>
       <author><first>Mohit</first><last>Bansal</last><affiliation>University of North Carolina at Chapel Hill</affiliation></author>
       <author><first>Heng</first><last>Ji</last><affiliation>University of Illinois at Urbana-Champaign and Amazon (Amazon Scholar)</affiliation></author>
-      <author><first>Prem</first><last>Natarajan</last><affiliation>Amazon</affiliation></author>
+      <author id="prem-natarajan"><first>Prem</first><last>Natarajan</last><affiliation>Amazon</affiliation></author>
       <pages>3041-3051</pages>
       <abstract>Long video content understanding poses a challenging set of research questions as it involves long-distance, cross-media reasoning and knowledge awareness. In this paper, we present a new benchmark for this problem domain, targeting the task of deep movie/TV question answering (QA) beyond previous work’s focus on simple plot summary and short video moment settings. We define several baselines based on direct retrieval of relevant context for long-distance movie QA. Observing that real-world QAs may require higher-order multi-hop inferences, we further propose a novel framework, called the DeepMaven, which extracts events, entities, and relations from the rich multimedia content in long videos to pre-construct movie knowledge graphs (movieKGs), and at the time of QA inference, complements general semantics with structured knowledge for more effective information retrieval and knowledge reasoning. We also introduce our recently collected DeepMovieQA dataset, including 1,000 long-form QA pairs from 41 hours of videos, to serve as a new and useful resource for future work. Empirical results show the DeepMaven performs competitively for both the new DeepMovieQA and the pre-existing MovieQA dataset.</abstract>
       <url hash="e1ecc9f6">2023.eacl-main.221</url>
@@ -2990,7 +2990,7 @@
       <author><first>Jeremy R.</first><last>Cole</last><affiliation>Google Research</affiliation></author>
       <author><first>Aditi</first><last>Chaudhary</last><affiliation>Google Research</affiliation></author>
       <author><first>Bhuwan</first><last>Dhingra</last><affiliation>Duke University</affiliation></author>
-      <author><first>Partha</first><last>Talukdar</last><affiliation>Google Research and IISc</affiliation></author>
+      <author id="partha-talukdar"><first>Partha</first><last>Talukdar</last><affiliation>Google Research and IISc</affiliation></author>
       <pages>3052-3060</pages>
       <abstract>Salient Span Masking (SSM) has shown itself to be an effective strategy to improve closed-book question answering performance. SSM extends general masked language model pretraining by creating additional unsupervised training sentences that mask a single entity or date span, thus oversampling factual information. Despite the success of this paradigm, the span types and sampling strategies are relatively arbitrary and not widely studied for other tasks. Thus, we investigate SSM from the perspective of temporal tasks, where learning a good representation of various temporal expressions is important. To that end, we introduce Temporal Span Masking (TSM) intermediate training. First, we find that SSM alone improves the downstream performance on three temporal tasks by an avg. +5.8 points. Further, we are able to achieve additional improvements (avg. +0.29 points) by adding the TSM task. These comprise the new best reported results on the targeted tasks. Our analysis suggests that the effectiveness of SSM stems from the sentences chosen in the training data rather than the mask choice: sentences with entities frequently also contain temporal expressions. Nonetheless, the additional targeted spans of TSM can still improve performance, especially in a zero-shot context.</abstract>
       <url hash="17a13693">2023.eacl-main.222</url>
@@ -3052,7 +3052,7 @@
     </paper>
     <paper id="227">
       <title>Why Can’t Discourse Parsing Generalize? A Thorough Investigation of the Impact of Data Diversity</title>
-      <author><first>Yang Janet</first><last>Liu</last><affiliation>Georgetown University</affiliation></author>
+      <author id="yang-janet-liu"><first>Yang Janet</first><last>Liu</last><affiliation>Georgetown University</affiliation></author>
       <author><first>Amir</first><last>Zeldes</last><affiliation>Georgetown University</affiliation></author>
       <pages>3112-3130</pages>
       <abstract>Recent advances in discourse parsing performance create the impression that, as in other NLP tasks, performance for high-resource languages such as English is finally becoming reliable. In this paper we demonstrate that this is not the case, and thoroughly investigate the impact of data diversity on RST parsing stability. We show that state-of-the-art architectures trained on the standard English newswire benchmark do not generalize well, even within the news domain. Using the two largest RST corpora of English with text from multiple genres, we quantify the impact of genre diversity in training data for achieving generalization to text types unseen during training. Our results show that a heterogeneous training regime is critical for stable and generalizable models, across parser architectures. We also provide error analyses of model outputs and out-of-domain performance. To our knowledge, this study is the first to fully evaluate cross-corpus RST parsing generalizability on complete trees, examine between-genre degradation within an RST corpus, and investigate the impact of genre diversity in training data composition.</abstract>
@@ -3069,7 +3069,7 @@
       <author><first>Trieu H.</first><last>Trinh</last><affiliation>New York University</affiliation></author>
       <author><first>Vy</first><last>Phan</last><affiliation>University of Massachusetts - Amherst</affiliation></author>
       <author><first>Lam D.</first><last>Chau</last><affiliation>Department of Biochemistry, Case Western Reserve University</affiliation></author>
-      <author><first>Minh-Thang</first><last>Luong</last><affiliation>VietAI Research</affiliation></author>
+      <author id="minh-thang-luong"><first>Minh-Thang</first><last>Luong</last><affiliation>VietAI Research</affiliation></author>
       <pages>3131-3142</pages>
       <abstract>Biomedical data and benchmarks are highly valuable yet very limited in low-resource languages other than English, such as Vietnamese. In this paper, we use a state-of-the-art translation model in English-Vietnamese to translate and produce both pretrained and supervised data in the biomedical domains. Thanks to such large-scale translation, we introduce ViPubmedT5, a pretrained Encoder-Decoder Transformer model trained on 20 million translated abstracts from the high-quality public PubMed corpus. ViPubMedT5 demonstrates state-of-the-art results on two different biomedical benchmarks in summarization and acronym disambiguation. Further, we release ViMedNLI - a new NLP task in Vietnamese translated from MedNLI using the recently public En-vi translation model and carefully refined by human experts, with evaluations of existing methods against ViPubmedT5.</abstract>
       <url hash="1e38f9b9">2023.eacl-main.228</url>
@@ -3147,9 +3147,9 @@
       <author><first>Esin</first><last>Durmus</last><affiliation>Stanford University</affiliation></author>
       <author><first>Mirac</first><last>Suzgun</last><affiliation>Stanford University</affiliation></author>
       <author><first>Tianyi</first><last>Zhang</last><affiliation>Stanford University</affiliation></author>
-      <author><first>Dan</first><last>Jurafsky</last><affiliation>Stanford University</affiliation></author>
-      <author><first>Kathleen</first><last>McKeown</last><affiliation>Columbia University and Amazon (Amazon Scholar)</affiliation></author>
-      <author><first>Tatsunori</first><last>Hashimoto</last><affiliation>Stanford</affiliation></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last><affiliation>Stanford University</affiliation></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last><affiliation>Columbia University and Amazon (Amazon Scholar)</affiliation></author>
+      <author id="tatsunori-b-hashimoto"><first>Tatsunori</first><last>Hashimoto</last><affiliation>Stanford</affiliation></author>
       <pages>3206-3219</pages>
       <abstract>Large language models (LLMs) are subject to sociocultural and other biases previously identified using intrinsic evaluations. However, when and how these intrinsic biases in pre-trained LM representations propagate to downstream, fine-tuned NLP tasks like summarization is not well understood. In this work, we investigate one type of bias—name-nationality bias—and trace it from the pre-training stage to a downstream summarization task across multiple summarization modeling choices. We show that these biases manifest themselves as hallucinations in summarization, leading to factually incorrect summaries. We also find that this propagation of biases is algorithm-dependent: more abstractive models allow biases to propagate more directly to downstream tasks as hallucinated facts. Building on these observations, we further analyze how changes to the adaptation method and fine-tuning data set affect name nationality biases and show that while they can reduce the overall rate of hallucinations, they do not change the types of biases that do appear.</abstract>
       <url hash="4f9ad5e2">2023.eacl-main.234</url>
@@ -3187,7 +3187,7 @@
       <author><first>Jay</first><last>Gala</last><affiliation>AI4Bharat</affiliation></author>
       <author><first>Deep</first><last>Gandhi</last><affiliation>University of Alberta</affiliation></author>
       <author><first>Jash</first><last>Mehta</last><affiliation>Georgia Institute of Technology</affiliation></author>
-      <author><first>Zeerak</first><last>Talat</last><affiliation>Simon Fraser University</affiliation></author>
+      <author id="zeerak-talat"><first>Zeerak</first><last>Talat</last><affiliation>Simon Fraser University</affiliation></author>
       <pages>3248-3259</pages>
       <abstract>Hate speech detection has been the subject of high research attention, due to the scale of content created on social media. In spite of the attention and the sensitive nature of the task, privacy preservation in hate speech detection has remained under-studied. The majority of research has focused on centralised machine learning infrastructures which risk leaking data. In this paper, we show that using federated machine learning can help address privacy the concerns that are inherent to hate speech detection while obtaining up to 6.81% improvement in terms of F1-score.</abstract>
       <url hash="fc486f66">2023.eacl-main.237</url>
@@ -3286,7 +3286,7 @@
       <title>Quantifying Context Mixing in Transformers</title>
       <author><first>Hosein</first><last>Mohebbi</last><affiliation>Tilburg University</affiliation></author>
       <author><first>Willem</first><last>Zuidema</last><affiliation>University of Amsterdam</affiliation></author>
-      <author><first>Grzegorz</first><last>Chrupała</last><affiliation>Tilburg University</affiliation></author>
+      <author id="grzegorz-chrupala"><first>Grzegorz</first><last>Chrupała</last><affiliation>Tilburg University</affiliation></author>
       <author><first>Afra</first><last>Alishahi</last><affiliation>Tilburg University</affiliation></author>
       <pages>3378-3400</pages>
       <abstract>Self-attention weights and their transformed variants have been the main source of information for analyzing token-to-token interactions in Transformer-based models. But despite their ease of interpretation, these weights are not faithful to the models’ decisions as they are only one part of an encoder, and other components in the encoder layer can have considerable impact on information mixing in the output representations. In this work, by expanding the scope of analysis to the whole encoder block, we propose Value Zeroing, a novel context mixing score customized for Transformers that provides us with a deeper understanding of how information is mixed at each encoder layer. We demonstrate the superiority of our context mixing score over other analysis methods through a series of complementary evaluations with different viewpoints based on linguistically informed rationales, probing, and faithfulness analysis.</abstract>
@@ -3299,7 +3299,7 @@
     <paper id="246">
       <title><fixed-case>KGVL</fixed-case>-<fixed-case>BART</fixed-case>: Knowledge Graph Augmented Visual Language <fixed-case>BART</fixed-case> for Radiology Report Generation</title>
       <author><first>Kaveri</first><last>Kale</last><affiliation>Indian Institute of Technology Bombay</affiliation></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology Bombay and Patna</affiliation></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology Bombay and Patna</affiliation></author>
       <author><first>Milind</first><last>Gune</last><affiliation>Consultant Radiologist, Thane, India</affiliation></author>
       <author><first>Aditya</first><last>Shetty</last><affiliation>Consultant Radiologist, Breach Candy Hospital, Mumbai, India</affiliation></author>
       <author><first>Rustom</first><last>Lawyer</last><affiliation>Augnito India Pvt Ltd</affiliation></author>
@@ -3314,7 +3314,7 @@
       <title>A simple but effective model for attachment in discourse parsing with multi-task learning for relation labeling</title>
       <author><first>Zineb</first><last>Bennis</last><affiliation>Institut de Recheche en Informatique de Toulouse</affiliation></author>
       <author><first>Julie</first><last>Hunter</last><affiliation>Linagora</affiliation></author>
-      <author><first>Nicholas</first><last>Asher</last><affiliation>CNRS Institut de Recherche en Informatique de Toulouse</affiliation></author>
+      <author id="nicholas-asher"><first>Nicholas</first><last>Asher</last><affiliation>CNRS Institut de Recherche en Informatique de Toulouse</affiliation></author>
       <pages>3412-3417</pages>
       <abstract>In this paper, we present a discourse parsing model for conversation trained on the STAC. We fine-tune a BERT-based model to encode pairs of discourse units and use a simple linear layer to predict discourse attachments. We then exploit a multi-task setting to predict relation labels. The multitask approach effectively aids in the difficult task of relation type prediction; our f1 score of 57 surpasses the state of the art with no loss in performance for attachment, confirming the intuitive interdependence of these two tasks. Our method also improves over previous discourse parsing models in allowing longer input sizes and in permitting attachments in which one node has multiple parents, an important feature of multiparty conversation.</abstract>
       <url hash="85bebb1b">2023.eacl-main.247</url>
@@ -3361,7 +3361,7 @@
     <paper id="251">
       <title>Semantic Specialization for Knowledge-based Word Sense Disambiguation</title>
       <author><first>Sakae</first><last>Mizuki</last><affiliation>Tokyo Institute of Technology</affiliation></author>
-      <author><first>Naoaki</first><last>Okazaki</last><affiliation>Tokyo Institute of Technology</affiliation></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last><affiliation>Tokyo Institute of Technology</affiliation></author>
       <pages>3457-3470</pages>
       <abstract>A promising approach for knowledge-based Word Sense Disambiguation (WSD) is to select the sense whose contextualized embeddings computed for its definition sentence are closest to those computed for a target word in a given sentence. This approach relies on the similarity of the sense and context embeddings computed by a pre-trained language model. We propose a semantic specialization for WSD where contextualized embeddings are adapted to the WSD task using solely lexical knowledge. The key idea is, for a given sense, to bring semantically related senses and contexts closer and send different/unrelated senses farther away. We realize this idea as the joint optimization of the Attract-Repel objective for sense pairs and the self-training objective for context-sense pairs while controlling deviations from the original embeddings. The proposed method outperformed previous studies that adapt contextualized embeddings. It achieved state-of-the-art performance on knowledge-based WSD when combined with the reranking heuristic that uses the sense inventory. We found that the similarity characteristics of specialized embeddings conform to the key idea. We also found that the (dis)similarity of embeddings between the related/different/unrelated senses correlates well with the performance of WSD.</abstract>
       <url hash="c2a383a0">2023.eacl-main.251</url>
@@ -3390,7 +3390,7 @@
       <author><first>Zishan</first><last>Ahmad</last><affiliation>Indian Institute of Technology Patna</affiliation></author>
       <author><first>Kshitij</first><last>Mishra</last><affiliation>Indian Institute of Technology Patna</affiliation></author>
       <author><first>Asif</first><last>Ekbal</last><affiliation>Indian Institute of Technology Patna</affiliation></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology Bombay and Patna</affiliation></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology Bombay and Patna</affiliation></author>
       <pages>3482-3494</pages>
       <abstract>Although there has been a plethora of work on open-domain conversational systems, most of the systems lack the mechanism of controlling the concept transitions in a dialogue. For activities like switching from casual chit-chat to task-oriented conversation, an agent with the ability to manage the flow of concepts in a conversation might be helpful. The user would find the dialogue more engaging and be more receptive to such transitions if these concept transitions were made while taking into account the user’s persona. Focusing on persona-aware concept transitions, we propose a Reinforced Persona-aware Topic-guiding Conversational System (RPTCS). Due to the lack of a persona-aware topic transition dataset, we propose a novel conversation dataset creation mechanism in which the conversational agent leads the discourse to drift to a set of target concepts depending on the persona of the speaker and the context of the conversation. To avoid scarcely available expensive human resource, the entire data-creation process is mostly automatic with human-in-loop only for quality checks. This created conversational dataset named PTCD is used to develop the RPTCS in two steps. First, a maximum likelihood estimation loss-based conversational model is trained on PTCD. Then this trained model is fine-tuned in a Reinforcement Learning (RL) framework by employing novel reward functions to assure persona, topic, and context consistency with non-repetitiveness in generated responses. Our experimental results demonstrate the strength of the proposed system with respect to strong baselines.</abstract>
       <url hash="8c63063f">2023.eacl-main.253</url>
@@ -3402,8 +3402,8 @@
       <title>What Did You Learn To Hate? A Topic-Oriented Analysis of Generalization in Hate Speech Detection</title>
       <author><first>Tom</first><last>Bourgeade</last><affiliation>IRIT, University of Toulouse</affiliation></author>
       <author><first>Patricia</first><last>Chiril</last><affiliation>University of Chicago</affiliation></author>
-      <author><first>Farah</first><last>Benamara</last><affiliation>University of toulouse</affiliation></author>
-      <author><first>Véronique</first><last>Moriceau</last><affiliation>IRIT, Université Toulouse 3</affiliation></author>
+      <author id="farah-benamara"><first>Farah</first><last>Benamara</last><affiliation>University of toulouse</affiliation></author>
+      <author id="veronique-moriceau"><first>Véronique</first><last>Moriceau</last><affiliation>IRIT, Université Toulouse 3</affiliation></author>
       <pages>3495-3508</pages>
       <abstract>Hate speech has unfortunately become a significant phenomenon on social media platforms, and it can cover various topics (misogyny, sexism, racism, xenophobia, etc.) and targets (e.g., black people, women). Various hate speech detection datasets have been proposed, some annotated for specific topics, and others for hateful speech in general. In either case, they often employ different annotation guidelines, which can lead to inconsistencies, even in datasets focusing on the same topics. This can cause issues in models trying to generalize across more data and more topics in order to improve detection accuracy. In this paper, we propose, for the first time, a topic-oriented approach to study generalization across popular hate speech datasets. We first perform a comparative analysis of the performances of Transformer-based models in capturing topic-generic and topic-specific knowledge when trained on different datasets. We then propose a novel, simple yet effective approach to study more precisely which topics are best captured in implicit manifestations of hate, showing that selecting combinations of datasets with better out-of-domain topical coverage improves the reliability of automatic hate speech detection.</abstract>
       <url hash="76d4709a">2023.eacl-main.254</url>
@@ -3416,7 +3416,7 @@
       <author><first>Zonglin</first><last>Yang</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Xinya</first><last>Du</last><affiliation>University of Texas at Dallas</affiliation></author>
       <author><first>Erik</first><last>Cambria</last><affiliation>Nanyang Technological University</affiliation></author>
-      <author><first>Claire</first><last>Cardie</last><affiliation>Cornell University</affiliation></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last><affiliation>Cornell University</affiliation></author>
       <pages>3509-3522</pages>
       <abstract>Pretrained language models have been shown to store knowledge in their parameters and have achieved reasonable performance in commonsense knowledge base completion (CKBC) tasks. However, CKBC is knowledge-intensive and it is reported that pretrained language models’ performance in knowledge-intensive tasks are limited because of their incapability of accessing and manipulating knowledge. As a result, we hypothesize that providing retrieved passages that contain relevant knowledge as additional input to the CKBC task will improve performance. In particular, we draw insights from Case-Based Reasoning (CBR) – which aims to solve a new problem by reasoning with retrieved relevant cases, and investigate the direct application of it to CKBC. On two benchmark datasets, we demonstrate through automatic and human evaluations that our End-to-end Case-Based Reasoning Framework (ECBRF) generates more valid, informative, and novel knowledge than the state-of-the-art COMET model for CKBC in both the fully supervised and few-shot settings. We provide insights on why previous retrieval-based methods only achieve merely the same performance with COMET. From the perspective of CBR, our framework addresses a fundamental question on whether CBR methodology can be utilized to improve deep learning models.</abstract>
       <url hash="90e8802a">2023.eacl-main.255</url>
@@ -3467,7 +3467,7 @@
     <paper id="259">
       <title><fixed-case>M</fixed-case>eta<fixed-case>QA</fixed-case>: Combining Expert Agents for Multi-Skill Question Answering</title>
       <author><first>Haritz</first><last>Puerto</last><affiliation>UKP Lab, TU Darmstadt</affiliation></author>
-      <author><first>Gözde</first><last>Şahin</last><affiliation>Koç University</affiliation></author>
+      <author id="gozde-gul-sahin"><first>Gözde</first><last>Şahin</last><affiliation>Koç University</affiliation></author>
       <author><first>Iryna</first><last>Gurevych</last><affiliation>UKP Lab, Technische Universität Darmstadt</affiliation></author>
       <pages>3566-3580</pages>
       <abstract>The recent explosion of question-answering (QA) datasets and models has increased the interest in the generalization of models across multiple domains and formats by either training on multiple datasets or combining multiple models. Despite the promising results of multi-dataset models, some domains or QA formats may require specific architectures, and thus the adaptability of these models might be limited. In addition, current approaches for combining models disregard cues such as question-answer compatibility. In this work, we propose to combine expert agents with a novel, flexible, and training-efficient architecture that considers questions, answer predictions, and answer-prediction confidence scores to select the best answer among a list of answer predictions. Through quantitative and qualitative experiments, we show that our model i) creates a collaboration between agents that outperforms previous multi-agent and multi-dataset approaches, ii) is highly data-efficient to train, and iii) can be adapted to any QA format. We release our code and a dataset of answer predictions from expert agents for 16 QA datasets to foster future research of multi-agent systems.</abstract>
@@ -3481,7 +3481,7 @@
       <author><first>Weixian Waylon</first><last>Li</last><affiliation>University of Edinburgh</affiliation></author>
       <author><first>Yftah</first><last>Ziser</last><affiliation>University of Edinburgh</affiliation></author>
       <author><first>Maximin</first><last>Coavoux</last><affiliation>CNRS, Université Grenoble Alpes</affiliation></author>
-      <author><first>Shay B.</first><last>Cohen</last><affiliation>University of Edinburgh</affiliation></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last><affiliation>University of Edinburgh</affiliation></author>
       <pages>3581-3593</pages>
       <abstract>We introduce a task consisting in matching a proof to a given mathematical statement. The task fits well within current research on Mathematical Information Retrieval and, more generally, mathematical article analysis (Mathematical Sciences, 2014). We present a dataset for the task (the MATcH dataset) consisting of over 180k statement-proof pairs extracted from modern mathematical research articles. We find this dataset highly representative of our task, as it consists of relatively new findings useful to mathematicians. We propose a bilinear similarity model and two decoding methods to match statements to proofs effectively. While the first decoding method matches a proof to a statement without being aware of other statements or proofs, the second method treats the task as a global matching problem. Through a symbol replacement procedure, we analyze the “insights” that pre-trained language models have in such mathematical article analysis and show that while these models perform well on this task with the best performing mean reciprocal rank of 73.7, they follow a relatively shallow symbolic analysis and matching to achieve that performance.</abstract>
       <url hash="3504790c">2023.eacl-main.260</url>
@@ -3494,8 +3494,8 @@
       <author><first>Jan-Christoph</first><last>Klie</last><affiliation>UKP Lab, Technical University of Darmstadt</affiliation></author>
       <author><first>Ji-Ung</first><last>Lee</last><affiliation>UKP, TU Darmstadt</affiliation></author>
       <author><first>Kevin</first><last>Stowe</last><affiliation>Educational Testing Services (ETS)</affiliation></author>
-      <author><first>Gözde</first><last>Şahin</last><affiliation>Koç University</affiliation></author>
-      <author><first>Nafise Sadat</first><last>Moosavi</last><affiliation>Department of Computer Science, The University of Sheffield</affiliation></author>
+      <author id="gozde-gul-sahin"><first>Gözde</first><last>Şahin</last><affiliation>Koç University</affiliation></author>
+      <author id="nafise-sadat-moosavi"><first>Nafise Sadat</first><last>Moosavi</last><affiliation>Department of Computer Science, The University of Sheffield</affiliation></author>
       <author><first>Luke</first><last>Bates</last><affiliation>Technical University of Darmstadt</affiliation></author>
       <author><first>Dominic</first><last>Petrak</last><affiliation>TU Darmstadt</affiliation></author>
       <author><first>Richard</first><last>Eckart De Castilho</last><affiliation>UKP Lab, Technische Universität Darmstadt</affiliation></author>
@@ -3586,7 +3586,7 @@
     <paper id="268">
       <title>Representation biases in sentence transformers</title>
       <author><first>Dmitry</first><last>Nikolaev</last><affiliation>University of Stuttgart</affiliation></author>
-      <author><first>Sebastian</first><last>Padó</last><affiliation>Stuttgart University</affiliation></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last><affiliation>Stuttgart University</affiliation></author>
       <pages>3701-3716</pages>
       <abstract>Variants of the BERT architecture specialised for producing full-sentence representations often achieve better performance on downstream tasks than sentence embeddings extracted from vanilla BERT. However, there is still little understanding of what properties of inputs determine the properties of such representations. In this study, we construct several sets of sentences with pre-defined lexical and syntactic structures and show that SOTA sentence transformers have a strong nominal-participant-set bias: cosine similarities between pairs of sentences are more strongly determined by the overlap in the set of their noun participants than by having the same predicates, lengthy nominal modifiers, or adjuncts. At the same time, the precise syntactic-thematic functions of the participants are largely irrelevant.</abstract>
       <url hash="bea99f74">2023.eacl-main.268</url>
@@ -3740,13 +3740,13 @@
     <paper id="280">
       <title>Meeting the Needs of Low-Resource Languages: The Value of Automatic Alignments via Pretrained Models</title>
       <author><first>Abteen</first><last>Ebrahimi</last><affiliation>University of Colorado, Boulder</affiliation></author>
-      <author><first>Arya D.</first><last>McCarthy</last><affiliation>Johns Hopkins University</affiliation></author>
-      <author><first>Arturo</first><last>Oncevay</last><affiliation>The University of Edinburgh</affiliation></author>
+      <author id="arya-d-mccarthy"><first>Arya D.</first><last>McCarthy</last><affiliation>Johns Hopkins University</affiliation></author>
+      <author id="arturo-oncevay"><first>Arturo</first><last>Oncevay</last><affiliation>The University of Edinburgh</affiliation></author>
       <author><first>John E.</first><last>Ortega</last><affiliation>Northeastern University</affiliation></author>
       <author><first>Luis</first><last>Chiruzzo</last><affiliation>Universidad de la Republica</affiliation></author>
       <author><first>Gustavo</first><last>Giménez-Lugo</last><affiliation>Universidade Tecnológica Federal do Paraná</affiliation></author>
       <author><first>Rolando</first><last>Coto-Solano</last><affiliation>Dartmouth College</affiliation></author>
-      <author><first>Katharina</first><last>Kann</last><affiliation>University of Colorado Boulder</affiliation></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last><affiliation>University of Colorado Boulder</affiliation></author>
       <pages>3912-3926</pages>
       <abstract>Large multilingual models have inspired a new class of word alignment methods, which work well for the model’s pretraining languages. However, the languages most in need of automatic alignment are low-resource and, thus, not typically included in the pretraining data. In this work, we ask: How do modern aligners perform on unseen languages, and are they better than traditional methods? We contribute gold-standard alignments for Bribri–Spanish, Guarani–Spanish, Quechua–Spanish, and Shipibo-Konibo–Spanish. With these, we evaluate state-of-the-art aligners with and without model adaptation to the target language. Finally, we also evaluate the resulting alignments extrinsically through two downstream tasks: named entity recognition and part-of-speech tagging. We find that although transformer-based methods generally outperform traditional models, the two classes of approach remain competitive with each other.</abstract>
       <url hash="d4267dc1">2023.eacl-main.280</url>
@@ -3790,7 +3790,7 @@
       <author><first>Fynn</first><last>Petersen-frey</last><affiliation>Universität Hamburg</affiliation></author>
       <author><first>Gerret</first><last>Von Nordheim</last><affiliation>Universit”ot Hamburg</affiliation></author>
       <author><first>Katharina</first><last>Kleinen-von K”onigsl”ow</last><affiliation>Universität Hamburg</affiliation></author>
-      <author><first>Chris</first><last>Biemann</last><affiliation>Universität Hamburg</affiliation></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last><affiliation>Universität Hamburg</affiliation></author>
       <pages>11-17</pages>
       <abstract>WebAnno is one of the most popular annotation tools that supports generic annotation types and distributive annotation with multiple user roles. However, WebAnno focuses on annotating span-level mentions and relations among them, making document-level annotation complicated. When it comes to the annotation and analysis of social science materials, it usually involves the creation of codes to categorize a given document. The codes, which are known as codebooks, are typically hierarchical, which enables to code the document either with a general category or more fine-grained subcategories. CodeAnno is forked from WebAnno and designed to solve the coding problems faced by many social science researchers with the following main functionalities. 1) Creation of hierarchical codebooks, with functionality to move and sort categories in the hierarchy 2) an interactive UI for codebook annotation 3) import and export of annotations in CSV format, hence being compatible with existing annotations conducted using spreadsheet applications 4) integration of an external automation component to facilitate coding using machine learning 5) project templating that allows duplicating a project structure without copying the actual documents. We present different use-cases to demonstrate the capability of CodeAnno. A shot demonstration video of the system is available here: <url>https://www.youtube.com/watch?v=RmCdTghBe-s</url></abstract>
       <url hash="1fcec9c5">2023.eacl-demo.2</url>
@@ -3819,7 +3819,7 @@
       <author><first>Akul</first><last>Singh</last><affiliation>Florida International University</affiliation></author>
       <author><first>Jared</first><last>Hummer</last><affiliation>Florida International University</affiliation></author>
       <author><first>Antonela</first><last>Radas</last><affiliation>Florida International University</affiliation></author>
-      <author><first>Mark</first><last>Finlayson</last><affiliation>Fiu</affiliation></author>
+      <author id="mark-finlayson"><first>Mark</first><last>Finlayson</last><affiliation>Fiu</affiliation></author>
       <pages>27-34</pages>
       <abstract>jTLEX is a programming library that provides a Java implementation of the TimeLine EXtraction algorithm (TLEX; Finlayson et al.,2021), along with utilities for programmatic manipulation of TimeML graphs. Timelines are useful for a number of natural language understanding tasks, such as question answering, cross-document event coreference, and summarization &amp; visualization. jTLEX provides functionality for (1) parsing TimeML annotations into Java objects, (2) construction of TimeML graphs from scratch, (3) partitioning of TimeML graphs into temporally connected subgraphs, (4) transforming temporally connected subgraphs into point algebra (PA) graphs, (5) extracting exact timeline of TimeML graphs, (6) detecting inconsistent subgraphs, and (7) calculating indeterminate sections of the timeline. The library has been tested on the entire TimeBank corpus, and comes with a suite of unit tests. We release the software as open source with a free license for non-commercial use.</abstract>
       <url hash="4f09cc1b">2023.eacl-demo.4</url>
@@ -3833,7 +3833,7 @@
       <author><first>Chau</first><last>Nguyen</last><affiliation>Japan Advanced Institute of Science and Technology</affiliation></author>
       <author><first>Vu</first><last>Tran</last><affiliation>The Institute of Statistical Mathematics, Japan</affiliation></author>
       <author><first>Ken</first><last>Satoh</last><affiliation>National Institute of Informatics, Japan</affiliation></author>
-      <author><first>Yuji</first><last>Matsumoto</last><affiliation>RIKEN Center for Advanced Intelligence Project (AIP), Japan</affiliation></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last><affiliation>RIKEN Center for Advanced Intelligence Project (AIP), Japan</affiliation></author>
       <author><first>Minh</first><last>Nguyen</last><affiliation>Japan Advanced Institute of Science and Technology</affiliation></author>
       <pages>35-42</pages>
       <abstract>In recent years, COVID-19 has impacted all aspects of human life. As a result, numerous publications relating to this disease have been issued. Due to the massive volume of publications, some retrieval systems have been developed to provide researchers with useful information. In these systems, lexical searching methods are widely used, which raises many issues related to acronyms, synonyms, and rare keywords. In this paper, we present a hybrid relation retrieval system, CovRelex-SE, based on embeddings to provide high-quality search results. Our system can be accessed through the following URL: <url>https://www.jaist.ac.jp/is/labs/nguyen-lab/systems/covrelex-se/</url></abstract>
@@ -3946,7 +3946,7 @@
       <author><first>Fantine</first><last>Huot</last><affiliation>Google</affiliation></author>
       <author><first>Joshua</first><last>Maynez</last><affiliation>Google</affiliation></author>
       <author><first>Shashi</first><last>Narayan</last><affiliation>Google</affiliation></author>
-      <author><first>Reinald Kim</first><last>Amplayo</last><affiliation>Google</affiliation></author>
+      <author id="reinald-kim-amplayo"><first>Reinald Kim</first><last>Amplayo</last><affiliation>Google</affiliation></author>
       <author><first>Kuzman</first><last>Ganchev</last><affiliation>Google</affiliation></author>
       <author><first>Annie Priyadarshini</first><last>Louis</last><affiliation>Google Research UK</affiliation></author>
       <author><first>Anders</first><last>Sandholm</last><affiliation>Google Research</affiliation></author>
@@ -3974,7 +3974,7 @@
     <paper id="15">
       <title><fixed-case>SPINDLE</fixed-case>: Spinning Raw Text into Lambda Terms with Graph Attention</title>
       <author><first>Konstantinos</first><last>Kogkalidis</last><affiliation>Utrecht University</affiliation></author>
-      <author><first>Michael</first><last>Moortgat</last><affiliation>Utrecht University</affiliation></author>
+      <author id="michael-moortgat"><first>Michael</first><last>Moortgat</last><affiliation>Utrecht University</affiliation></author>
       <author><first>Richard</first><last>Moot</last><affiliation>Cnrs</affiliation></author>
       <pages>128-135</pages>
       <abstract>This paper describes SPINDLE, an open source Python module, providing an efficient and accurate parser for written Dutch that transforms raw text input to programs for meaning composition expressed as λ terms. The parser integrates a number of breakthrough advances made in recent years. Its output consists of hi-res derivations of a multimodal type-logical grammar, capturing two orthogonal axes of syntax, namely deep function-argument structures and dependency relations. These are produced by three interdependent systems: a static type-checker asserting the well-formedness of grammatical analyses, a state-of-the-art, structurally-aware supertagger based on heterogeneous graph convolutions, and a massively parallel proof search component based on Sinkhorn iterations. Packed in the software are also handy utilities and extras for proof visualization and inference, intended to facilitate end-user utilization.</abstract>
@@ -4002,7 +4002,7 @@
       <author><first>Twin</first><last>Karmakharm</last><affiliation>University of Sheffield</affiliation></author>
       <author><first>Ian</first><last>Roberts</last><affiliation>University of Sheffield</affiliation></author>
       <author><first>Xingyi</first><last>Song</last><affiliation>University of Sheffield</affiliation></author>
-      <author><first>Kalina</first><last>Bontcheva</last><affiliation>University of Sheffield</affiliation></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last><affiliation>University of Sheffield</affiliation></author>
       <pages>145-151</pages>
       <abstract>We present GATE Teamware 2: an open-source web-based platform for managing teams of annotators working on document classification tasks. GATE Teamware 2 is an entirely re-engineered successor to GATE Teamware, using contemporary web frameworks. The software allows the management of teams of multiple annotators, project managers and administrators - including the management of annotators - across multiple projects. Projects can be configured to control and monitor the annotation statistics and have a highly flexible JSON-configurable annotation display which can include arbitrary HTML. Optionally, documents can be uploaded with pre-existing annotations and documents are served to annotators in a random order by default to reduce bias. Crucially, annotators can be trained on applying the annotation guidelines correctly and then screened for quality assurance purposes, prior to being cleared for independent annotation. GATE Teamware 2 can be self-deployed, including in container orchestration environments, or provided as private, hosted cloud instances.GATE Teamware 2 is an open-source software and can be downloaded from <url>https://github.com/GATENLP/gate-teamware.A</url> demonstration video of the system has also been made available at <url>https://youtu.be/KoXkuhc4fmM</url>.</abstract>
       <url hash="2fb90c96">2023.eacl-demo.17</url>
@@ -4018,7 +4018,7 @@
       <author><first>Marta Kristin</first><last>Larusdottir</last><affiliation>Reykjavik University</affiliation></author>
       <author><first>Hafsteinn</first><last>Einarsson</last><affiliation>University of Iceland</affiliation></author>
       <author><first>Abuzar</first><last>Khan</last><affiliation>Carnegie Mellon University</affiliation></author>
-      <author><first>Eric</first><last>Nyberg</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Hrafn</first><last>Loftsson</last><affiliation>Reykjavik University</affiliation></author>
       <pages>152-160</pages>
       <abstract>The methods used to create many of the well-known Question-Answering (QA) datasets are hard to replicate for low-resource languages. A commonality amongst these methods is hiring annotators to source answers from the internet by querying a single answer source, such as Wikipedia. Applying these methods for low-resource languages can be problematic since there is no single large answer source for these languages. Consequently, this can result in a high ratio of unanswered questions, since the amount of information in any single source is limited. To address this problem, we developed a novel crowd-sourcing platform to gather multiple-domain QA data for low-resource languages. Our platform, which consists of a mobile app and a web API, gamifies the data collection process. We successfully released the app for Icelandic (a low-resource language with about 350,000 native speakers) to build a dataset which rivals large QA datasets for high-resource languages both in terms of size and ratio of answered questions. We have made the platform open source with instructions on how to localize and deploy it to gather data for other low-resource languages.</abstract>
@@ -4030,7 +4030,7 @@
     <paper id="19">
       <title>Towards Speech to Speech Machine Translation focusing on <fixed-case>I</fixed-case>ndian Languages</title>
       <author><first>Vandan</first><last>Mujadia</last><affiliation>Student</affiliation></author>
-      <author><first>Dipti</first><last>Sharma</last><affiliation>IIIT, Hyderabad</affiliation></author>
+      <author id="dipti-misra-sharma"><first>Dipti</first><last>Sharma</last><affiliation>IIIT, Hyderabad</affiliation></author>
       <pages>161-168</pages>
       <abstract>We introduce an SSMT (Speech to Speech Machine Translation, aka Speech to Speech Video Translation) Pipeline(<url>https://ssmt.iiit.ac.in/ssmtiiith</url>), as web application for translating videos from one language to another by cascading multiple language modules. Our speech translation system combines highly accurate speech to text (ASR) for Indian English, pre-possessing modules to bridge ASR-MT gaps such as spoken disfluency and punctuation, robust machine translation (MT) systems for multiple language pairs, SRT module for translated text, text to speech (TTS) module and a module to render translated synthesized audio on the original video. It is user-friendly, flexible, and easily accessible system. We aim to provide a complete configurable speech translation experience to users and researchers with this system. It also supports human intervention where users can edit outputs of different modules and the edited output can then be used for subsequent processing to improve overall output quality. By adopting a human-in-the-loop approach, the aim is to configure technology in such a way where it can assist humans and help to reduce the involved human efforts in speech translation involving English and Indian languages. As per our understanding, this is the first fully integrated system for English to Indian languages (Hindi, Telugu, Gujarati, Marathi and Punjabi) video translation. Our evaluation shows that one can get 3.5+ MOS score using the developed pipeline with human intervention for English to Hindi. A short video demonstrating our system is available at <url>https://youtu.be/MVftzoeRg48</url>.</abstract>
       <url hash="7a7c27c0">2023.eacl-demo.19</url>
@@ -4040,7 +4040,7 @@
     </paper>
     <paper id="20">
       <title><fixed-case>T</fixed-case>ext<fixed-case>W</fixed-case>orld<fixed-case>E</fixed-case>xpress: Simulating Text Games at One Million Steps Per Second</title>
-      <author><first>Peter</first><last>Jansen</last><affiliation>University of Arizona</affiliation></author>
+      <author id="peter-jansen"><first>Peter</first><last>Jansen</last><affiliation>University of Arizona</affiliation></author>
       <author><first>Marc-alexandre</first><last>Cote</last><affiliation>Microsoft Research</affiliation></author>
       <pages>169-177</pages>
       <abstract>Text-based games offer a challenging test bed to evaluate virtual agents at language understanding, multi-step problem-solving, and common-sense reasoning. However, speed is a major limitation of current text-based games, capping at 300 steps per second, mainly due to the use of legacy tooling. In this work we present TextWorldExpress, a high-performance simulator that includes implementations of three common text game benchmarks that increases simulation throughput by approximately three orders of magnitude, reaching over one million steps per second on common desktop hardware. This significantly reduces experiment runtime, enabling billion-step-scale experiments in about one day.</abstract>
@@ -4051,7 +4051,7 @@
     </paper>
     <paper id="21">
       <title><fixed-case>T</fixed-case>ermo<fixed-case>UD</fixed-case> - a language-independent terminology extraction tool</title>
-      <author><first>Malgorzata</first><last>Marciniak</last><affiliation>Institute of Computer Science PAS</affiliation></author>
+      <author id="malgorzata-marciniak"><first>Malgorzata</first><last>Marciniak</last><affiliation>Institute of Computer Science PAS</affiliation></author>
       <author><first>Piotr</first><last>Rychlik</last><affiliation>Institute of Computer Science, Polish Academy of Sciences</affiliation></author>
       <author><first>Agnieszka</first><last>Mykowiecka</last><affiliation>Institute of Computer Science, Polish Academy of Sciences and Polish-Japanese Academy of Information Technology</affiliation></author>
       <pages>178-186</pages>
@@ -4107,7 +4107,7 @@
       <author><first>Furkan</first><last>Akkurt</last><affiliation>Boğaziçi University</affiliation></author>
       <author><first>Merve</first><last>Gürbüz</last><affiliation>Bogazici University</affiliation></author>
       <author><first>Onur</first><last>Gungor</last><affiliation>Bogazici University</affiliation></author>
-      <author><first>Arzucan</first><last>Özgür</last><affiliation>Bogazici University</affiliation></author>
+      <author id="arzucan-ozgur"><first>Arzucan</first><last>Özgür</last><affiliation>Bogazici University</affiliation></author>
       <author><first>Tunga</first><last>Güngör</last><affiliation>Bogazici University</affiliation></author>
       <pages>219-227</pages>
       <abstract>Access to natural language processing resources is essential for their continuous improvement. This can be especially challenging in educational institutions where the software development effort required to package and release research outcomes may be overwhelming and under-recognized. Access towell-prepared and reliable research outcomes is important both for their developers as well as the greater research community. This paper presents an approach to address this concern with two main goals: (1) to create an open-source easily deployable platform where resources can be easily shared and explored, and (2) to use this platform to publish open-source Turkish NLP resources (datasets and tools) created by a research lab. The Turkish Natural Language Processing (TULAP) was designed and developed as an easy-to-use platform to share dataset and tool resources which supports interactive tool demos. Numerous open access Turkish NLP resources have been shared on TULAP. All tools are containerized to support portability for custom use. This paper describes the design, implementation, and deployment of TULAP with use cases (available at <url>https://tulap.cmpe.boun.edu.tr/</url>). A short video demonstrating our system is available at <url>https://figshare.com/articles/media/TULAP_Demo/22179047</url>.</abstract>
@@ -4133,8 +4133,8 @@
       <title>Automatically Summarizing Evidence from Clinical Trials: A Prototype Highlighting Current Challenges</title>
       <author><first>Sanjana</first><last>Ramprasad</last><affiliation>Northeastern University</affiliation></author>
       <author><first>Jered</first><last>Mcinerney</last><affiliation>Northeastern University</affiliation></author>
-      <author><first>Iain</first><last>Marshall</last><affiliation>King’s College London</affiliation></author>
-      <author><first>Byron</first><last>Wallace</last><affiliation>Northeastern University</affiliation></author>
+      <author id="iain-marshall"><first>Iain</first><last>Marshall</last><affiliation>King’s College London</affiliation></author>
+      <author id="byron-c-wallace"><first>Byron</first><last>Wallace</last><affiliation>Northeastern University</affiliation></author>
       <pages>236-247</pages>
       <abstract>In this work we present TrialsSummarizer, a system that aims to automatically summarize evidence presented in the set of randomized controlled trials most relevant to a given query. Building on prior work, the system retrieves trial publications matching a query specifying a combination of condition, intervention(s), and outcome(s), and ranks these according to sample size and estimated study quality. The top-k such studies are passed through a neural multi-document summarization system, yielding a synopsis of these trials. We consider two architectures: A standard sequence-to-sequence model based on BART, and a multi-headed architecture intended to provide greater transparency and controllability to end-users. Both models produce fluent and relevant summaries of evidence retrieved for queries, but their tendency to introduce unsupported statements render them inappropriate for use in this domain at present. The proposed architecture may help users verify outputs allowing users to trace generated tokens back to inputs. The demonstration video can be found at <url>https://vimeo.com/735605060The</url> prototype, source code, and model weights are available at: <url>https://sanjanaramprasad.github.io/trials-summarizer/</url></abstract>
       <url hash="8caba601">2023.eacl-demo.27</url>
@@ -4252,7 +4252,7 @@
     <paper id="2">
       <title>Incorporating Dropped Pronouns into Coreference Resolution: The case for <fixed-case>T</fixed-case>urkish</title>
       <author><first>Tuğba</first><last>Pamay Arslan</last><affiliation>Istanbul Technical University</affiliation></author>
-      <author><first>Gülşen</first><last>Eryiğit</last><affiliation>Istanbul Technical University</affiliation></author>
+      <author id="gulsen-eryigit"><first>Gülşen</first><last>Eryiğit</last><affiliation>Istanbul Technical University</affiliation></author>
       <pages>14-25</pages>
       <abstract>Representation of coreferential relations is a challenging and actively studied topic for pro-drop and morphologically rich languages (PD-MRLs) due to dropped pronouns (e.g., null subjects and omitted possessive pronouns). These phenomena require a representation scheme at the morphology level and enhanced evaluation methods. In this paper, we propose a representation &amp; evaluation scheme to incorporate dropped pronouns into coreference resolution and validate it on the Turkish language. Using the scheme, we extend the annotations on the only existing Turkish coreference dataset, which originally did not contain annotations for dropped pronouns. We provide publicly available pre and post processors to enhance the prominent CoNLL coreference scorer also to cover coreferential relations arising from dropped pronouns. As a final step, the paper reports the first neural Turkish coreference resolution results in the literature. Although validated on Turkish, the proposed scheme is language-independent and may be used for other PD-MRLs.</abstract>
       <url hash="775db312">2023.eacl-srw.2</url>
@@ -4263,7 +4263,7 @@
     <paper id="3">
       <title>Towards Generation and Recognition of Humorous Texts in <fixed-case>P</fixed-case>ortuguese</title>
       <author><first>Marcio</first><last>Lima Inácio</last><affiliation>University of Coimbra</affiliation></author>
-      <author><first>Hugo</first><last>Gonçalo Oliveira</last><affiliation>CISUC, DEI, University of Coimbra</affiliation></author>
+      <author id="hugo-goncalo-oliveira"><first>Hugo</first><last>Gonçalo Oliveira</last><affiliation>CISUC, DEI, University of Coimbra</affiliation></author>
       <pages>26-36</pages>
       <abstract>Dealing with humor is an important step to develop Natural Language Processing tools capable of handling sophisticated semantic and pragmatic knowledge. In this context, this PhD thesis focuses on the automatic generation and recognition of verbal punning humor in Portuguese, which is still an underdeveloped language when compared to English. One of the main goals of this research is to conciliate Natural Language Generation computational models with existing theories of humor from the Humanities while avoiding mere generation by including contextual information into the generation process. Another point that is of utmost importance is the inclusion of the listener as an active part in the process of understanding and creating humor; we hope to achieve this by using concepts from Recommender Systems in our methods. Ultimately, we want to not only advance the current state-of-the-art in humor generation and recognition, but also to help the general Portuguese-speaking research community with methods, tools and resources that may aid in the development of further techniques for this language. We also expect our systems to provide insightful ideas about how humor is created and perceived by both humans and machines.</abstract>
       <url hash="b3c58e51">2023.eacl-srw.3</url>
@@ -4323,7 +4323,7 @@
       <title>Improving and Simplifying Template-Based Named Entity Recognition</title>
       <author><first>Murali</first><last>Kondragunta</last><affiliation>University of Groningen</affiliation></author>
       <author><first>Olatz</first><last>Perez-de-Viñaspre</last><affiliation>HiTZ Center - Ixa, University of the Basque Country UPV/EHU</affiliation></author>
-      <author><first>Maite</first><last>Oronoz</last><affiliation>HiTZ Center - Ixa, University of the Basque Country UPV/EHU</affiliation></author>
+      <author id="maite-oronoz"><first>Maite</first><last>Oronoz</last><affiliation>HiTZ Center - Ixa, University of the Basque Country UPV/EHU</affiliation></author>
       <pages>79-86</pages>
       <abstract>With the rise in larger language models, researchers started exploiting them by pivoting the downstream tasks as language modeling tasks using prompts. In this work, we convert the Named Entity Recognition task into a seq2seq task by generating the synthetic sentences using templates. Our main contribution is the conversion framework which provides faster inference. In addition, we test our method’s performance in resource-rich, low resource and domain transfer settings. Results show that our method achieves comparable results in the resource-rich setting and outperforms the current seq2seq paradigm state-of-the-art approach in few-shot settings. Through the experiments, we observed that the negative examples play an important role in model’s performance. We applied our approach over BART and T5-base models, and we notice that the T5 architecture aligns better with our task. The work is performed on the datasets in English language.</abstract>
       <url hash="c1b2e337">2023.eacl-srw.8</url>
@@ -4380,7 +4380,7 @@
       <author><first>Amir</first><last>Hadifar</last><affiliation>Ghent University - imec</affiliation></author>
       <author><first>Semere Kiros</first><last>Bitew</last><affiliation>Ghent University - imec, IDLab</affiliation></author>
       <author><first>Johannes</first><last>Deleu</last><affiliation>Ghent University - imec</affiliation></author>
-      <author><first>Veronique</first><last>Hoste</last><affiliation>LT3, Ghent University</affiliation></author>
+      <author id="veronique-hoste"><first>Veronique</first><last>Hoste</last><affiliation>LT3, Ghent University</affiliation></author>
       <author><first>Chris</first><last>Develder</last><affiliation>Ghent University</affiliation></author>
       <author><first>Thomas</first><last>Demeester</last><affiliation>Ghent University - imec</affiliation></author>
       <pages>123-133</pages>
@@ -4393,7 +4393,7 @@
     <paper id="14">
       <title>Towards Automatic Grammatical Error Type Classification for <fixed-case>T</fixed-case>urkish</title>
       <author><first>Harun</first><last>Uz</last><affiliation>Istanbul Technical University</affiliation></author>
-      <author><first>Gülşen</first><last>Eryiğit</last><affiliation>Istanbul Technical University</affiliation></author>
+      <author id="gulsen-eryigit"><first>Gülşen</first><last>Eryiğit</last><affiliation>Istanbul Technical University</affiliation></author>
       <pages>134-142</pages>
       <url hash="0d0f97fd">2023.eacl-srw.14</url>
       <bibkey>uz-eryigit-2023-towards</bibkey>
@@ -4404,7 +4404,7 @@
     <paper id="15">
       <title>Theoretical Conditions and Empirical Failure of Bracket Counting on Long Sequences with Linear Recurrent Networks</title>
       <author><first>Nadine</first><last>El-Naggar</last><affiliation>City, University of London</affiliation></author>
-      <author><first>Pranava</first><last>Madhyastha</last><affiliation>City, University of London</affiliation></author>
+      <author id="pranava-swaroop-madhyastha"><first>Pranava</first><last>Madhyastha</last><affiliation>City, University of London</affiliation></author>
       <author><first>Tillman</first><last>Weyde</last><affiliation>City, University of London</affiliation></author>
       <pages>143-148</pages>
       <abstract>Previous work has established that RNNs with an unbounded activation function have the capacity to count exactly. However, it has also been shown that RNNs are challenging to train effectively and generally do not learn exact counting behaviour. In this paper, we focus on this problem by studying the simplest possible RNN, a linear single-cell network. We conduct a theoretical analysis of linear RNNs and identify conditions for the models to exhibit exact counting behaviour. We provide a formal proof that these conditions are necessary and sufficient. We also conduct an empirical analysis using tasks involving a Dyck-1-like Balanced Bracket language under two different settings. We observe that linear RNNs generally do not meet the necessary and sufficient conditions for counting behaviour when trained with the standard approach. We investigate how varying the length of training sequences and utilising different target classes impacts model behaviour during training and the ability of linear RNN models to effectively approximate the indicator conditions.</abstract>
@@ -4416,7 +4416,7 @@
     <paper id="16">
       <title>Addressing Domain Changes in Task-oriented Conversational Agents through Dialogue Adaptation</title>
       <author><first>Tiziano</first><last>Labruna</last><affiliation>Fondazione Bruno Kessler and Free University of Bozen-Bolzano</affiliation></author>
-      <author><first>Bernardo</first><last>Magnini</last><affiliation>Fbk</affiliation></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last><affiliation>Fbk</affiliation></author>
       <pages>149-158</pages>
       <url hash="5b2160e8">2023.eacl-srw.16</url>
       <bibkey>labruna-magnini-2023-addressing</bibkey>
@@ -4428,8 +4428,8 @@
   <volume id="tutorials" ingest-date="2023-04-26" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics: Tutorial Abstracts</booktitle>
-      <editor><first>Fabio Massimo</first><last>Zanzotto</last></editor>
-      <editor><first>Sameer</first><last>Pradhan</last></editor>
+      <editor id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last></editor>
+      <editor id="sameer-pradhan"><first>Sameer</first><last>Pradhan</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Dubrovnik, Croatia</address>
       <month>May</month>
@@ -4444,7 +4444,7 @@
     <paper id="1">
       <title>Mining, Assessing, and Improving Arguments in <fixed-case>NLP</fixed-case> and the Social Sciences</title>
       <author><first>Gabriella</first><last>Lapesa</last></author>
-      <author><first>Eva Maria</first><last>Vecchi</last></author>
+      <author id="eva-maria-vecchi"><first>Eva Maria</first><last>Vecchi</last></author>
       <author><first>Serena</first><last>Villata</last></author>
       <author><first>Henning</first><last>Wachsmuth</last></author>
       <pages>1-6</pages>
@@ -4456,7 +4456,7 @@
     </paper>
     <paper id="2">
       <title>Emotion Analysis from Texts</title>
-      <author><first>Sanja</first><last>Stajner</last></author>
+      <author id="sanja-stajner"><first>Sanja</first><last>Stajner</last></author>
       <author><first>Roman</first><last>Klinger</last></author>
       <pages>7-12</pages>
       <abstract>Emotion analysis in text is an area of research that encompasses a set of various natural language processing (NLP) tasks, including classification and regression settings, as well as structured prediction tasks like role labelling or stimulus detection. In this tutorial, we provide an overview of research from emotion psychology which sets the ground for choosing adequate NLP methodology, and present existing resources and classification methods used for emotion analysis in texts. We further discuss appraisal theories and how events can be interpreted regarding their presumably caused emotion and briefly introduce emotion role labelling. In addition to these technical topics, we discuss the use cases of emotion analysis in text, their societal impact, ethical considerations, as well as the main challenges in the field.</abstract>
diff --git a/data/xml/2023.eamt.xml b/data/xml/2023.eamt.xml
index f411883c75..5f3fc5fbc6 100644
--- a/data/xml/2023.eamt.xml
+++ b/data/xml/2023.eamt.xml
@@ -14,10 +14,10 @@
       <editor><first>Sergi Alvarez</first><last>Vidal</last></editor>
       <editor><first>Nora</first><last>Aranberri</last></editor>
       <editor><first>Mara</first><last>Nunziatini</last></editor>
-      <editor><first>Carla Parra</first><last>Escartín</last></editor>
-      <editor><first>Mikel</first><last>Forcada</last></editor>
-      <editor><first>Maja</first><last>Popovic</last></editor>
-      <editor><first>Carolina</first><last>Scarton</last></editor>
+      <editor id="carla-parra-escartin"><first>Carla Parra</first><last>Escartín</last></editor>
+      <editor id="mikel-l-forcada"><first>Mikel</first><last>Forcada</last></editor>
+      <editor id="maja-popovic"><first>Maja</first><last>Popovic</last></editor>
+      <editor id="carolina-scarton"><first>Carolina</first><last>Scarton</last></editor>
       <editor><first>Helena</first><last>Moniz</last></editor>
       <publisher>European Association for Machine Translation</publisher>
       <address>Tampere, Finland</address>
@@ -41,7 +41,7 @@
       <title>Tailoring Domain Adaptation for Machine Translation Quality Estimation</title>
       <author><first>Javad Pourmostafa Roshan</first><last>Sharami</last></author>
       <author><first>Dimitar</first><last>Shterionov</last></author>
-      <author><first>Frédéric</first><last>Blain</last></author>
+      <author id="frederic-blain"><first>Frédéric</first><last>Blain</last></author>
       <author><first>Eva</first><last>Vanmassenhove</last></author>
       <author><first>Mirella De</first><last>Sisto</last></author>
       <author><first>Chris</first><last>Emmery</last></author>
@@ -54,7 +54,7 @@
     <paper id="3">
       <title>Example-Based Machine Translation from Textto a Hierarchical Representation of Sign Language</title>
       <author><first>Elise</first><last>Bertin-Lemée</last></author>
-      <author><first>Annelies</first><last>Braffort</last></author>
+      <author id="annelies-braffort"><first>Annelies</first><last>Braffort</last></author>
       <author><first>Camille</first><last>Challant</last></author>
       <author><first>Claire</first><last>Danet</last></author>
       <author><first>Michael</first><last>Filhol</last></author>
@@ -66,9 +66,9 @@
     <paper id="4">
       <title>Unsupervised Feature Selection for Effective Parallel Corpus Filtering</title>
       <author><first>Mikko</first><last>Aulamo</last></author>
-      <author><first>Ona</first><last>de Gibert</last></author>
+      <author id="ona-de-gibert"><first>Ona</first><last>de Gibert</last></author>
       <author><first>Sami</first><last>Virpioja</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>31–38</pages>
       <abstract>This work presents an unsupervised method of selecting filters and threshold values for the OpusFilter parallel corpus cleaning toolbox. The method clusters sentence pairs into noisy and clean categories and uses the features of the noisy cluster center as filtering parameters. Our approach utilizes feature importance analysis to disregard filters that do not differentiate between clean and noisy data. A randomly sampled subset of a given corpus is used for filter selection and ineffective filters are not run for the full corpus. We use a set of automatic evaluation metrics to assess the quality of translation models trained with data filtered by our method and data filtered with OpusFilter’s default parameters. The trained models cover English-German and English-Ukrainian in both directions. The proposed method outperforms the default parameters in all translation directions for almost all evaluation metrics.</abstract>
       <url hash="88e6b0d5">2023.eamt-1.4</url>
@@ -87,7 +87,7 @@
       <title><fixed-case>BLEU</fixed-case> Meets <fixed-case>COMET</fixed-case>: Combining Lexical and Neural Metrics Towards Robust Machine Translation Evaluation</title>
       <author><first>Taisiya</first><last>Glushkova</last></author>
       <author><first>Chrysoula</first><last>Zerva</last></author>
-      <author><first>André F. T.</first><last>Martins</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
       <pages>47–58</pages>
       <abstract>Although neural-based machine translation evaluation metrics, such as COMET or BLEURT, have achieved strong correlations with human judgements, they are sometimes unreliable in detecting certain phenomena that can be considered as critical errors, such as deviations in entities and numbers. In contrast, traditional evaluation metrics such as BLEU or chrF, which measure lexical or character overlap between translation hypotheses and human references, have lower correlations with human judgements but are sensitive to such deviations. In this paper, we investigate several ways of combining the two approaches in order to increase robustness of state-of-the-art evaluation methods to translations with critical errors. We show that by using additional information during training, such as sentence-level features and word-level tags, the trained metrics improve their capability to penalize translations with specific troublesome phenomena, which leads to gains in correlations with humans and on the recent DEMETR benchmark on several language pairs.</abstract>
       <url hash="2db75d15">2023.eamt-1.6</url>
@@ -97,7 +97,7 @@
       <title>Exploiting large pre-trained models for low-resource neural machine translation</title>
       <author><first>Aarón</first><last>Galiano-Jiménez</last></author>
       <author><first>Felipe</first><last>Sánchez-Martínez</last></author>
-      <author><first>Víctor M.</first><last>Sánchez-Cartagena</last></author>
+      <author id="victor-m-sanchez-cartagena"><first>Víctor M.</first><last>Sánchez-Cartagena</last></author>
       <author><first>Juan Antonio</first><last>Pérez-Ortiz</last></author>
       <pages>59–68</pages>
       <abstract>Pre-trained models have drastically changed the field of natural language processing by providing a way to leverage large-scale language representations to various tasks. Some pre-trained models offer general-purpose representations, while others are specialized in particular tasks, like neural machine translation (NMT). Multilingual NMT-targeted systems are often fine-tuned for specific language pairs, but there is a lack of evidence-based best-practice recommendations to guide this process. Moreover, the trend towards even larger pre-trained models has made it challenging to deploy them in the computationally restrictive environments typically found in developing regions where low-resource languages are usually spoken. We propose a pipeline to tune the mBART50 pre-trained model to 8 diverse low-resource language pairs, and then distil the resulting system to obtain lightweight and more sustainable models. Our pipeline conveniently exploits back-translation, synthetic corpus filtering, and knowledge distillation to deliver efficient, yet powerful bilingual translation models 13 times smaller than the original pre-trained ones, but with close performance in terms of BLEU.</abstract>
@@ -157,7 +157,7 @@
       <author><first>Beatriz</first><last>Silva</last></author>
       <author><first>Marianna</first><last>Buchicchio</last></author>
       <author><first>José G. C.</first><last>de Souza</last></author>
-      <author><first>André F. T.</first><last>Martins</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
       <pages>115–124</pages>
       <abstract>This paper aims to investigate the effectiveness of the k-Nearest Neighbor Machine Translation model (kNN-MT) in real-world scenarios. kNN-MT is a retrieval-augmented framework that combines the advantages of parametric models with non-parametric datastores built using a set of parallel sentences. Previous studies have primarily focused on evaluating the model using only the BLEU metric and have not tested kNN-MT in real world scenarios. Our study aims to fill this gap by conducting a comprehensive analysis on various datasets comprising different language pairs and different domains, using multiple automatic metrics and expert evaluated Multidimensional Quality Metrics (MQM). We compare kNN-MT with two alternate strategies: fine-tuning all the model parameters and adapter-based finetuning. Finally, we analyze the effect of the datastore size on translation quality, and we examine the number of entries necessary to bootstrap and configure the index.</abstract>
       <url hash="60c208d3">2023.eamt-1.12</url>
@@ -166,7 +166,7 @@
     <paper id="13">
       <title>Evaluation of <fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish Machine Translation of Emotion-Loaded Microblog Texts: A Human Annotated Dataset for the Quality Assessment of Emotion Translation</title>
       <author><first>Shenbin</first><last>Qian</last></author>
-      <author><first>Constantin</first><last>Orasan</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orasan</last></author>
       <author><first>Felix Do</first><last>Carmo</last></author>
       <author><first>Qiuliang</first><last>Li</last></author>
       <author><first>Diptesh</first><last>Kanojia</last></author>
@@ -181,7 +181,7 @@
       <author><first>Romain</first><last>Silvestri</last></author>
       <author><first>Georgios</first><last>Vernikos</last></author>
       <author><first>Ljiljana</first><last>Dolamic</last></author>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <pages>137–146</pages>
       <abstract>Subword tokenization is the de-facto standard for tokenization in neural language models and machine translation systems. Three advantages are frequently put forward in favor of subwords: shorter encoding of frequent tokens, compositionality of subwords, and ability to deal with unknown words. As their relative importance is not entirely clear yet, we propose a tokenization approach that enables us to separate frequency (the first advantage) from compositionality, thanks to the use of Huffman coding, which tokenizes words using a fixed amount of symbols. Experiments with CS-DE, EN-FR and EN-DE NMT show that frequency alone accounts for approximately 90% of the BLEU scores reached by BPE, hence compositionality has less importance than previously thought.</abstract>
       <url hash="0dafc96c">2023.eamt-1.14</url>
@@ -191,7 +191,7 @@
       <title>What Works When in Context-aware Neural Machine Translation?</title>
       <author><first>Harritxu</first><last>Gete</last></author>
       <author><first>Thierry</first><last>Etchegoyhen</last></author>
-      <author><first>Gorka</first><last>Labaka</last></author>
+      <author id="gorka-labaka"><first>Gorka</first><last>Labaka</last></author>
       <pages>147–156</pages>
       <abstract>Document-level Machine Translation has emerged as a promising means to enhance automated translation quality, but it is currently unclear how effectively context-aware models use the available context during translation. This paper aims to provide insight into the current state of models based on input concatenation, with an in-depth evaluation on English–German and English–French standard datasets. We notably evaluate the impact of data bias, antecedent part-of-speech, context complexity, and the syntactic function of the elements involved in discursive phenomena. Our experimental results indicate that the selected models do improve the overall translation in context, with varying sensitivity to the different factors we examined. We notably show that the selected context-aware models operate markedly better on regular syntactic configurations involving subject antecedents and pronouns, with degraded performance as the configurations become more dissimilar.</abstract>
       <url hash="8ad9deb4">2023.eamt-1.15</url>
@@ -209,7 +209,7 @@
     <paper id="17">
       <title>The <fixed-case>MT</fixed-case>@<fixed-case>BZ</fixed-case> corpus: machine translation &amp; legal language</title>
       <author><first>Flavia</first><last>De Camillis</last></author>
-      <author><first>Egon W.</first><last>Stemle</last></author>
+      <author id="egon-stemle"><first>Egon W.</first><last>Stemle</last></author>
       <author><first>Elena</first><last>Chiocchetti</last></author>
       <author><first>Francesco</first><last>Fernicola</last></author>
       <pages>171–180</pages>
@@ -260,7 +260,7 @@
       <title>Adaptive Machine Translation with Large Language Models</title>
       <author><first>Yasmin</first><last>Moslem</last></author>
       <author><first>Rejwanul</first><last>Haque</last></author>
-      <author><first>John D.</first><last>Kelleher</last></author>
+      <author id="john-kelleher"><first>John D.</first><last>Kelleher</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <pages>227–237</pages>
       <abstract>Consistency is a key requirement of high-quality translation. It is especially important to adhere to pre-approved terminology and adapt to corrected translations in domain-specific projects. Machine translation (MT) has achieved significant progress in the area of domain adaptation. However, real-time adaptation remains challenging. Large-scale language models (LLMs) have recently shown interesting capabilities of in-context learning, where they learn to replicate certain input-output text generation patterns, without further fine-tuning. By feeding an LLM at inference time with a prompt that consists of a list of translation pairs, it can then simulate the domain and style characteristics. This work aims to investigate how we can utilize in-context learning to improve real-time adaptive MT. Our extensive experiments show promising results at translation time. For example, GPT-3.5 can adapt to a set of in-domain sentence pairs and/or terminology while translating a new sentence. We observe that the translation quality with few-shot in-context learning can surpass that of strong encoder-decoder MT systems, especially for high-resource languages. Moreover, we investigate whether we can combine MT from strong encoder-decoder models with fuzzy matches, which can further improve translation quality, especially for less supported languages. We conduct our experiments across five diverse language pairs, namely English-to-Arabic (EN-AR), English-to-Chinese (EN-ZH), English-to-French (EN-FR), English-to-Kinyarwanda (EN-RW), and English-to-Spanish (EN-ES).</abstract>
@@ -271,7 +271,7 @@
       <title>Segment-based Interactive Machine Translation at a Character Level</title>
       <author><first>Angel</first><last>Navarro</last></author>
       <author><first>Miguel</first><last>Domingo</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <pages>239–248</pages>
       <abstract>To produce high quality translations, human translators need to review and correct machine translation hypothesis in what it is known as post-editing. In order to reduce the human effort of this process, interactive machine translation proposed a collaborative framework in which human and machine work together to generate the translations. Among the many protocols proposed throughout the years, the segment-based one established a paradigm in which the post-editor was allowed to validate correct word sequences from a translation hypothesis and introduced a word correction to help the system improve the next hypothesis. In this work we propose an extension to this protocol: instead of having to the type the complete word correction, the system will complete the user’s correction while they are typing. We evaluated our proposal under a simulated environment, achieving a significant reduction of the human effort.</abstract>
       <url hash="9966b096">2023.eamt-1.23</url>
@@ -306,7 +306,7 @@
     <paper id="27">
       <title>Analysing Mistranslation of Emotions in Multilingual Tweets by Online <fixed-case>MT</fixed-case> Tools</title>
       <author><first>Hadeel</first><last>Saadany</last></author>
-      <author><first>Constantin</first><last>Orasan</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orasan</last></author>
       <author><first>Rocio Caro</first><last>Quintana</last></author>
       <author><first>Felix Do</first><last>Carmo</last></author>
       <author><first>Leonardo</first><last>Zilio</last></author>
@@ -419,7 +419,7 @@
       <title>How can machine translation help generate <fixed-case>A</fixed-case>rab melodic improvisation?</title>
       <author><first>Fadi</first><last>Al-Ghawanmeh</last></author>
       <author><first>Alexander Refsum</first><last>Jensenius</last></author>
-      <author><first>Kamel</first><last>Smaili</last></author>
+      <author id="kamel-smaili"><first>Kamel</first><last>Smaili</last></author>
       <pages>385–392</pages>
       <abstract>This article presents a system to generate Arab music improvisation using machine translation (MT). To reach this goal, we developed a MT model to translate a vocal improvisation into an automatic instrumental oud (Arab lute) response. Given the melodic and non-metric musical form, it was necessary to develop efficient textual representations in order for classical MT models to be as successful as in common NLP applications. We experimented with Statistical and Neural MT to train our parallel corpus (Vocal → Instrument) of 6991 sentences. The best model was then used to generate improvisation by iteratively translating the translations of the most common patterns of each maqam (n-grams), producing elaborated variations conditioned to listener feedback. We constructed a dataset of 717 instrumental improvisations to extract their n-grams. Objective evaluation of MT was conducted at two levels: a sentence-level evaluation using the BLEU metric, and a higher level evaluation using musically informed metrics. Objective measures were consistent with one another. Subjective evaluations by experts from the maqam music tradition were promising, and a useful reference for understanding objective results.</abstract>
       <url hash="1e2f1719">2023.eamt-1.38</url>
@@ -492,7 +492,7 @@
       <author><first>João</first><last>Godinho</last></author>
       <author><first>Pedro</first><last>Coelho</last></author>
       <author><first>Helena</first><last>Moniz</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <pages>451–460</pages>
       <abstract>This paper illustrates a new methodology based on Test Suites (Avramidis et al., 2018) with focus on Business Critical Errors (BCEs) (Stewart et al., 2022) to evaluate the output of Machine Translation (MT) and Quality Estimation (QE) systems. We demonstrate the value of relying on semi-automatic evaluation done through scalable BCE-focused Test Suites to monitor both MT and QE systems’ performance for 8 language pairs (LPs) and a total of 4 error categories. This approach allows us to not only track the impact of new features and implementations in a real business environment, but also to identify strengths and weaknesses in models regarding different error types, and subsequently know what to improve henceforth.</abstract>
       <url hash="fc868183">2023.eamt-1.44</url>
@@ -505,7 +505,7 @@
       <author><first>Meegan</first><last>Gower</last></author>
       <author><first>Sneha</first><last>Rautmare</last></author>
       <author><first>Nishtha</first><last>Jain</last></author>
-      <author><first>John</first><last>Kelleher</last></author>
+      <author id="john-kelleher"><first>John</first><last>Kelleher</last></author>
       <pages>461–470</pages>
       <abstract>In the context of an epidemiological study involving multilingual social media, this paper reports on the ability of machine translation systems to preserve content relevant for a document classification task designed to determine whether the social media text is related to covid. The results indicate that machine translation does provide a feasible basis for scaling epidemiological social media surveillance to multiple languages. Moreover, a qualitative error analysis revealed that the majority of classification errors are not caused by MT errors.</abstract>
       <url hash="6f4a12ab">2023.eamt-1.45</url>
@@ -613,7 +613,7 @@
       <author><first>Victor Ubieto</first><last>Nogales</last></author>
       <author><first>Santiago Egea</first><last>Gomez</last></author>
       <author><first>Ineke</first><last>Schuurman</last></author>
-      <author><first>Gorka</first><last>Labaka</last></author>
+      <author id="gorka-labaka"><first>Gorka</first><last>Labaka</last></author>
       <author><first>Adrián</first><last>Núnez-Marcos</last></author>
       <author><first>Irene</first><last>Murtagh</last></author>
       <author><first>Euan</first><last>McGill</last></author>
@@ -639,16 +639,16 @@
       <title><fixed-case>M</fixed-case>a<fixed-case>C</fixed-case>o<fixed-case>C</fixed-case>u: Massive collection and curation of monolingual and bilingual data: focus on under-resourced languages</title>
       <author><first>Marta</first><last>Bañón</last></author>
       <author><first>Mălina</first><last>Chichirău</last></author>
-      <author><first>Miquel</first><last>Esplà-Gomis</last></author>
+      <author id="miquel-espla-gomis"><first>Miquel</first><last>Esplà-Gomis</last></author>
       <author><first>Mikel</first><last>Forcada</last></author>
       <author><first>Aarón</first><last>Galiano-Jiménez</last></author>
       <author><first>Taja</first><last>Kuzman</last></author>
       <author><first>Nikola</first><last>Ljubešić</last></author>
       <author><first>Rik</first><last>van Noord</last></author>
       <author><first>Leopoldo Pla</first><last>Sempere</last></author>
-      <author><first>Gema</first><last>Ramírez-Sánchez</last></author>
+      <author id="gema-ramirez-sanchez"><first>Gema</first><last>Ramírez-Sánchez</last></author>
       <author><first>Peter</first><last>Rupnik</last></author>
-      <author><first>Vit</first><last>Suchomel</last></author>
+      <author id="vit-suchomel"><first>Vit</first><last>Suchomel</last></author>
       <author><first>Antonio</first><last>Toral</last></author>
       <author><first>Jaume</first><last>Zaragoza-Bernabeu</last></author>
       <pages>505–506</pages>
@@ -664,7 +664,7 @@
       <author><first>Alessia</first><last>Battisti</last></author>
       <author><first>Michèle</first><last>Berger</last></author>
       <author><first>Richard</first><last>Bowden</last></author>
-      <author><first>Annelies</first><last>Braffort</last></author>
+      <author id="annelies-braffort"><first>Annelies</first><last>Braffort</last></author>
       <author><first>Necati Cihan</first><last>Camgoz</last></author>
       <author><first>Cristina</first><last>España-Bonet</last></author>
       <author><first>Roman</first><last>Grundkiewicz</last></author>
@@ -716,7 +716,7 @@
     <paper id="60">
       <title><fixed-case>PROPICTO</fixed-case>: Developing Speech-to-Pictograph Translation Systems to Enhance Communication Accessibility</title>
       <author><first>Lucía</first><last>Ormaechea</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Maximin</first><last>Coavoux</last></author>
       <author><first>Emmanuelle</first><last>Esperança-Rodier</last></author>
       <author><first>Johanna</first><last>Gerlach</last></author>
@@ -738,8 +738,8 @@
       <author><first>Nikolay</first><last>Bogoychev</last></author>
       <author><first>Shaoxiong</first><last>Ji</last></author>
       <author><first>Graeme</first><last>Nail</last></author>
-      <author><first>Gema</first><last>Ramírez-Sánchez</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="gema-ramirez-sanchez"><first>Gema</first><last>Ramírez-Sánchez</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <author><first>Jelmer</first><last>van der Linde</last></author>
       <author><first>Jaume</first><last>Zaragoza</last></author>
       <pages>517–518</pages>
diff --git a/data/xml/2023.emnlp.xml b/data/xml/2023.emnlp.xml
index 182766c9fc..f2c8dfb971 100644
--- a/data/xml/2023.emnlp.xml
+++ b/data/xml/2023.emnlp.xml
@@ -68,7 +68,7 @@
       <author><first>Yi</first><last>Fung</last></author>
       <author><first>Hou</first><last>Chan</last></author>
       <author><first>Tarek</first><last>Abdelzaher</last></author>
-      <author><first>ChengXiang</first><last>Zhai</last></author>
+      <author id="chengxiang-zhai"><first>ChengXiang</first><last>Zhai</last></author>
       <author><first>Heng</first><last>Ji</last></author>
       <pages>43-57</pages>
       <abstract>Automatic response forecasting for news media plays a crucial role in enabling content producers to efficiently predict the impact of news releases and prevent unexpected negative outcomes such as social conflict and moral injury. To effectively forecast responses, it is essential to develop measures that leverage the social dynamics and contextual information surrounding individuals, especially in cases where explicit profiles or historical actions of the users are limited (referred to as lurkers). As shown in a previous study, 97% of all tweets are produced by only the most active 25% of users. However, existing approaches have limited exploration of how to best process and utilize these important features. To address this gap, we propose a novel framework, named SocialSense, that leverages a large language model to induce a belief-centered graph on top of an existent social network, along with graph-based propagation to capture social dynamics. We hypothesize that the induced graph that bridges the gap between distant users who share similar beliefs allows the model to effectively capture the response patterns. Our method surpasses existing state-of-the-art in experimental evaluations for both zero-shot and supervised settings, demonstrating its effectiveness in response forecasting. Moreover, the analysis reveals the framework’s capability to effectively handle unseen user and lurker scenarios, further highlighting its robustness and practical applicability.</abstract>
@@ -142,7 +142,7 @@
       <title>Evaluating and Modeling Attribution for Cross-Lingual Question Answering</title>
       <author><first>Benjamin</first><last>Muller</last></author>
       <author><first>John</first><last>Wieting</last></author>
-      <author><first>Jonathan</first><last>Clark</last></author>
+      <author id="jonathan-h-clark"><first>Jonathan</first><last>Clark</last></author>
       <author><first>Tom</first><last>Kwiatkowski</last></author>
       <author><first>Sebastian</first><last>Ruder</last></author>
       <author><first>Livio</first><last>Soares</last></author>
@@ -163,7 +163,7 @@
       <author><first>Orevaoghene</first><last>Ahia</last></author>
       <author><first>Abraham Toluwalase</first><last>Owodunni</last></author>
       <author><first>Odunayo</first><last>Ogundepo</last></author>
-      <author><first>David Ifeoluwa</first><last>Adelani</last></author>
+      <author id="david-ifeoluwa-adelani"><first>David Ifeoluwa</first><last>Adelani</last></author>
       <author><first>Jimmy</first><last>Lin</last></author>
       <pages>158-168</pages>
       <abstract>In this study, we highlight the importance of enhancing the quality of pretraining data in multilingual language models. Existing web crawls have demonstrated quality issues, particularly in the context of low-resource languages. Consequently, we introduce a new multilingual pretraining corpus for 16 African languages, designed by carefully auditing existing pretraining corpora to understand and rectify prevalent quality issues. To compile this dataset, we undertake a rigorous examination of current data sources for thirteen languages within one of the most extensive multilingual web crawls, mC4, and extract cleaner data through meticulous auditing and improved web crawling strategies. Subsequently, we pretrain a new T5-based model on this dataset and evaluate its performance on multiple downstream tasks. Our model demonstrates better downstream effectiveness over existing pretrained models across four NLP tasks, underscoring the critical role data quality plays in pretraining language models in low-resource scenarios. Specifically, on cross-lingual QA evaluation, our new model is more than twice as effective as multilingual T5. All code, data and models are publicly available at https://github.com/castorini/AfriTeVa-keji.</abstract>
@@ -189,7 +189,7 @@
       <author><first>Huao</first><last>Li</last></author>
       <author><first>Yu</first><last>Chong</last></author>
       <author><first>Simon</first><last>Stepputtis</last></author>
-      <author><first>Joseph</first><last>Campbell</last></author>
+      <author id="joseph-p-campbell"><first>Joseph</first><last>Campbell</last></author>
       <author><first>Dana</first><last>Hughes</last></author>
       <author><first>Charles</first><last>Lewis</last></author>
       <author><first>Katia</first><last>Sycara</last></author>
@@ -206,7 +206,7 @@
       <author><first>Max</first><last>Müller-Eberstein</last></author>
       <author><first>Rob</first><last>van der Goot</last></author>
       <author><first>Leon</first><last>Weber-Genzel</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>193-203</pages>
       <abstract>Language understanding is a multi-faceted cognitive capability, which the Natural Language Processing (NLP) community has striven to model computationally for decades. Traditionally, facets of linguistic intelligence have been compartmentalized into tasks with specialized model architectures and corresponding evaluation protocols. With the advent of large language models (LLMs) the community has witnessed a dramatic shift towards general purpose, task-agnostic approaches powered by generative models. As a consequence, the traditional compartmentalized notion of language tasks is breaking down, followed by an increasing challenge for evaluation and analysis. At the same time, LLMs are being deployed in more real-world scenarios, including previously unforeseen zero-shot setups, increasing the need for trustworthy and reliable systems. Therefore, we argue that it is time to rethink what constitutes tasks and model evaluation in NLP, and pursue a more holistic view on language, placing trustworthiness at the center. Towards this goal, we review existing compartmentalized approaches for understanding the origins of a model’s functional capacity, and provide recommendations for more multi-faceted evaluation protocols.</abstract>
       <url hash="f72a9601">2023.emnlp-main.14</url>
@@ -271,7 +271,7 @@
     <paper id="19">
       <title>Understanding Compositional Data Augmentation in Typologically Diverse Morphological Inflection</title>
       <author><first>Farhan</first><last>Samir</last></author>
-      <author><first>Miikka</first><last>Silfverberg</last></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last></author>
       <pages>277-291</pages>
       <abstract>Data augmentation techniques are widely used in low-resource automatic morphological inflection to address the issue of data sparsity. However, the full implications of these techniques remain poorly understood. In this study, we aim to shed light on the theoretical aspects of the data augmentation strategy StemCorrupt, a method that generates synthetic examples by randomly substituting stem characters in existing gold standard training examples. Our analysis uncovers that StemCorrupt brings about fundamental changes in the underlying data distribution, revealing inherent compositional concatenative structure. To complement our theoretical analysis, we investigate the data-efficiency of StemCorrupt. Through evaluation across a diverse set of seven typologically distinct languages, we demonstrate that selecting a subset of datapoints with both high diversity <i>and</i> high predictive uncertainty significantly enhances the data-efficiency of compared to competitive baselines. Furthermore, we explore the impact of typological features on the choice of augmentation strategy and find that languages incorporating non-concatenativity, such as morphonological alternations, derive less benefit from synthetic examples with high predictive uncertainty. We attribute this effect to phonotactic violations induced by StemCorrupt, emphasizing the need for further research to ensure optimal performance across the entire spectrum of natural language morphology.</abstract>
       <url hash="564160d3">2023.emnlp-main.19</url>
@@ -285,7 +285,7 @@
       <author><first>Yifan</first><last>Du</last></author>
       <author><first>Kun</first><last>Zhou</last></author>
       <author><first>Jinpeng</first><last>Wang</last></author>
-      <author><first>Xin</first><last>Zhao</last></author>
+      <author id="wayne-xin-zhao"><first>Xin</first><last>Zhao</last></author>
       <author><first>Ji-Rong</first><last>Wen</last></author>
       <pages>292-305</pages>
       <abstract>Inspired by the superior language abilities of large language models (LLM), large vision-language models (LVLM) have been recently proposed by integrating powerful LLMs for improving the performance on complex multimodal tasks. Despite the promising progress on LVLMs, we find that they suffer from object hallucinations, i.e., they tend to generate objects inconsistent with the target images in the descriptions. To investigate it, this work presents the first systematic study on object hallucination of LVLMs. We conduct the evaluation experiments on several representative LVLMs, and show that they mostly suffer from severe object hallucination issues. We further discuss that the visual instructions may influence the hallucination, and find that: objects that frequently appear in the visual instructions or co-occur with the image objects are obviously prone to be hallucinated by LVLMs. Besides, we further design a polling-based query method called POPE for better evaluation of object hallucination. Experiment results show that our POPE can evaluate object hallucination in a more stable and flexible way.</abstract>
@@ -315,7 +315,7 @@
       <title>Parameter-efficient Tuning for Large Language Model without Calculating Its Gradients</title>
       <author><first>Feihu</first><last>Jin</last></author>
       <author><first>Jiajun</first><last>Zhang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>321-330</pages>
       <abstract>Fine-tuning all parameters of large language models (LLMs) requires significant computational resources and is time-consuming. Recent parameter-efficient tuning methods such as Adapter tuning, Prefix tuning, and LoRA allow for updating a small subset of parameters in large language models. However, they can only save approximately 30% of the training memory requirements, due to the problem that gradient computation and backpropagation are still necessary for these methods. This paper proposes a novel parameter-efficient tuning method for LLMs without calculating their gradients. Leveraging the discernible similarities between the parameter-efficient modules of the same task learned by both large and small language models, we put forward a strategy for transferring the parameter-efficient modules, originally derived from small language models to much larger ones. To ensure a smooth and effective adaptation process, we further introduce a Bridge model to guarantee dimensional consistency while also stimulating a dynamic interaction between the models. We demonstrate the effectiveness of our method using the T5 and GPT-2 series of language models on the SuperGLUE benchmark. Our method achieves comparable performance to both fine-tuning and parameter-efficient tuning on large language models without needing gradient-based optimization. Additionally, our method achieves up to 5.7x memory reduction compared to parameter-efficient tuning.</abstract>
       <url hash="54315f2a">2023.emnlp-main.22</url>
@@ -479,7 +479,7 @@
       <title>Selectively Answering Ambiguous Questions</title>
       <author><first>Jeremy</first><last>Cole</last></author>
       <author><first>Michael</first><last>Zhang</last></author>
-      <author><first>Daniel</first><last>Gillick</last></author>
+      <author id="dan-gillick"><first>Daniel</first><last>Gillick</last></author>
       <author><first>Julian</first><last>Eisenschlos</last></author>
       <author><first>Bhuwan</first><last>Dhingra</last></author>
       <author><first>Jacob</first><last>Eisenstein</last></author>
@@ -520,7 +520,7 @@
     <paper id="38">
       <title>Pragmatic Reasoning Unlocks Quantifier Semantics for Foundation Models</title>
       <author><first>Yiyuan</first><last>Li</last></author>
-      <author><first>Rakesh</first><last>Menon</last></author>
+      <author id="rakesh-r-menon"><first>Rakesh</first><last>Menon</last></author>
       <author><first>Sayan</first><last>Ghosh</last></author>
       <author><first>Shashank</first><last>Srivastava</last></author>
       <pages>573-591</pages>
@@ -705,7 +705,7 @@
       <author><first>Peter</first><last>West</last></author>
       <author><first>Alexander</first><last>Koller</last></author>
       <author><first>Swabha</first><last>Swayamdipta</last></author>
-      <author><first>Noah</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah</first><last>Smith</last></author>
       <author><first>Yejin</first><last>Choi</last></author>
       <pages>790-807</pages>
       <abstract>Ambiguity is an intrinsic feature of natural language. Managing ambiguity is a key part of human language understanding, allowing us to anticipate misunderstanding as communicators and revise our interpretations as listeners. As language models are increasingly employed as dialogue interfaces and writing aids, handling ambiguous language is critical to their success. We capture ambiguity in a sentence through its effect on entailment relations with another sentence, and collect AmbiEnt, a linguist-annotated benchmark of 1,645 examples with diverse kinds of ambiguity. We design a suite of tests based on AmbiEnt, presenting the first evaluation of pretrained LMs to recognize ambiguity and disentangle possible meanings. We find that the task remains extremely challenging, including for GPT-4, whose generated disambiguations are considered correct only 32% of the time in crowdworker evaluation, compared to 90% for disambiguations in our dataset. Finally, to illustrate the value of ambiguity-sensitive tools, we show that a multilabel NLI model can flag political claims in the wild that are misleading due to ambiguity. We encourage the field to rediscover the importance of ambiguity for NLP.</abstract>
@@ -839,7 +839,7 @@
       <author><first>Qiongkai</first><last>Xu</last></author>
       <author><first>Jun</first><last>Wang</last></author>
       <author><first>Benjamin</first><last>Rubinstein</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>953-967</pages>
       <abstract>Modern NLP models are often trained over large untrusted datasets, raising the potential for a malicious adversary to compromise model behaviour. For instance, backdoors can be implanted through crafting training instances with a specific textual trigger and a target label. This paper posits that backdoor poisoning attacks exhibit a spurious correlation between simple text features and classification labels, and accordingly, proposes methods for mitigating spurious correlation as means of defence. Our empirical study reveals that the malicious triggers are highly correlated to their target labels; therefore such correlations are extremely distinguishable compared to those scores of benign features, and can be used to filter out potentially problematic instances. Compared with several existing defences, our defence method significantly reduces attack success rates across backdoor attacks, and in the case of insertion-based attacks, our method provides a near-perfect defence.</abstract>
       <url hash="7a833a30">2023.emnlp-main.60</url>
@@ -859,7 +859,7 @@
       <author><first>Yifeng</first><last>Lu</last></author>
       <author><first>Denny</first><last>Zhou</last></author>
       <author><first>Tengyu</first><last>Ma</last></author>
-      <author><first>Quoc</first><last>Le</last></author>
+      <author id="quoc-le"><first>Quoc</first><last>Le</last></author>
       <pages>968-979</pages>
       <abstract>We present symbol tuning - finetuning language models on in-context input-label pairs where natural language labels (e.g., “positive/negative sentiment”) are replaced with arbitrary symbols (e.g., “foo/bar”). Symbol tuning leverages the intuition that when a model cannot use instructions or natural language labels to figure out a task, it must instead do so by learning the input-label mappings. We experiment with symbol tuning across PaLM models up to 540B parameters and observe benefits across various settings. First, symbol tuning boosts performance on unseen in-context learning tasks and is much more robust to underspecified prompts, such as those without instructions or without natural language labels. Second, symbol-tuned models are much stronger at algorithmic reasoning tasks, with up to 18.2% better performance on the List Functions benchmark and up to 15.3% better performance on the Simple Turing Concepts benchmark. Finally, symbol-tuned models show large improvements in following flipped-labels presented in-context, meaning that they are more capable of using in-context information to override prior knowledge.</abstract>
       <url hash="cb5472a7">2023.emnlp-main.61</url>
@@ -870,7 +870,7 @@
     <paper id="62">
       <title>The neural dynamics of word recognition and integration</title>
       <author><first>Jon</first><last>Gauthier</last></author>
-      <author><first>Roger</first><last>Levy</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
       <pages>980-995</pages>
       <abstract>Listeners recognize and integrate words in rapid and noisy everyday speech by combining expectations about upcoming content with incremental sensory evidence. We present a computational model of word recognition which formalizes this perceptual process in Bayesian decision theory. We fit this model to explain scalp EEG signals recorded as subjects passively listened to a fictional story, revealing both the dynamics of the online auditory word recognition process and the neural correlates of the recognition and integration of words. The model reveals distinct neural processing of words depending on whether or not they can be quickly recognized. While all words trigger a neural response characteristic of probabilistic integration — voltage modulations predicted by a word’s surprisal in context — these modulations are amplified for words which require more than roughly 150 ms of input to be recognized. We observe no difference in the latency of these neural responses according to words’ recognition times. Our results support a two-part model of speech comprehension, combining an eager and rapid process of word recognition with a temporally independent process of word integration. However, we also developed alternative models of the scalp EEG signal not incorporating word recognition dynamics which showed similar performance improvements. We discuss potential future modeling steps which may help to separate these hypotheses.</abstract>
       <url hash="1eaa19e3">2023.emnlp-main.62</url>
@@ -1040,7 +1040,7 @@
       <author><first>Ruizhe</first><last>Chen</last></author>
       <author><first>Xiangru</first><last>Tang</last></author>
       <author><first>Yumo</first><last>Xu</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <author><first>Arman</first><last>Cohan</last></author>
       <pages>1157-1172</pages>
       <abstract>People primarily consult tables to conduct data analysis or answer specific questions. Text generation systems that can provide accurate table summaries tailored to users’ information needs can facilitate more efficient access to relevant data insights. Motivated by this, we define a new query-focused table summarization task, where text generation models have to perform human-like reasoning and analysis over the given table to generate a tailored summary. We introduce a new benchmark named QTSumm for this task, which contains 7,111 human-annotated query-summary pairs over 2,934 tables covering diverse topics. We investigate a set of strong baselines on QTSumm, including text generation, table-to-text generation, and large language models. Experimental results and manual analysis reveal that the new task presents significant challenges in table-to-text generation for future research. Moreover, we propose a new approach named ReFactor, to retrieve and reason over query-relevant information from tabular data to generate several natural language facts. Experimental results demonstrate that ReFactor can bring effective improvements to baselines by concatenating the generated facts to the model input. Our data and code are publicly available at https://github.com/yale-nlp/QTSumm.</abstract>
@@ -1133,7 +1133,7 @@
       <author><first>Jiacheng</first><last>Liu</last></author>
       <author><first>Wenya</first><last>Wang</last></author>
       <author><first>Dianzhuo</first><last>Wang</last></author>
-      <author><first>Noah</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah</first><last>Smith</last></author>
       <author><first>Yejin</first><last>Choi</last></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last></author>
       <pages>1264-1287</pages>
@@ -1234,7 +1234,7 @@
       <author><first>Nicholas</first><last>Suwono</last></author>
       <author><first>Justin</first><last>Chen</last></author>
       <author><first>Tun</first><last>Hung</last></author>
-      <author><first>Ting-Hao</first><last>Huang</last></author>
+      <author id="ting-hao-huang"><first>Ting-Hao</first><last>Huang</last></author>
       <author><first>I-Bin</first><last>Liao</last></author>
       <author><first>Yung-Hui</first><last>Li</last></author>
       <author><first>Lun-Wei</first><last>Ku</last></author>
@@ -1287,7 +1287,7 @@
       <author><first>Donald</first><last>Metzler</last></author>
       <author><first>Slav</first><last>Petrov</last></author>
       <author><first>Neil</first><last>Houlsby</last></author>
-      <author><first>Quoc</first><last>Le</last></author>
+      <author id="quoc-le"><first>Quoc</first><last>Le</last></author>
       <author><first>Mostafa</first><last>Dehghani</last></author>
       <pages>1471-1486</pages>
       <abstract>Scaling language models improves performance but comes with significant computational costs. This paper proposes UL2R, a method that substantially improves existing language models and their scaling curves with a relatively tiny amount of extra compute. The key idea is to continue training a state-of-the-art large language model on a few more steps with UL2’s mixture-of-denoiser objective. We show that, with almost negligible extra computational costs and no new sources of data, we are able to substantially improve the scaling properties of large language models on downstream metrics. In this paper, we continue training a baseline language model, PaLM, with ULR2, introducing a new set of models at 8B, 62B, and 540B scale which we call U-PaLM. Impressively, at 540B scale, we show an approximately 2x computational savings rate where U-PaLM achieves the same performance as the final PaLM 540B model at around half its computational budget (i.e., saving ~4.4 million TPUv4 hours). We further show that this improved scaling curve leads to “emergent abilities” on challenging BIG-Bench tasks—for instance, U-PaLM does much better on some tasks or demonstrates better quality at much smaller scale (62B as opposed to 540B). Overall, we show that U-PaLM outperforms PaLM on many few-shot setups, including reasoning tasks with chain-of-thought (e.g., GSM8K), multilingual tasks (MGSM, TydiQA), MMLU and challenging BIG-Bench tasks.</abstract>
@@ -1302,7 +1302,7 @@
       <author><first>Taiwei</first><last>Shi</last></author>
       <author><first>Caleb</first><last>Ziems</last></author>
       <author><first>Min-Yen</first><last>Kan</last></author>
-      <author><first>Nancy</first><last>Chen</last></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last></author>
       <author><first>Zhengyuan</first><last>Liu</last></author>
       <author><first>Diyi</first><last>Yang</last></author>
       <pages>1487-1505</pages>
@@ -1347,7 +1347,7 @@
       <author><first>Fuli</first><last>Feng</last></author>
       <author><first>Yixin</first><last>Cao</last></author>
       <author><first>Jizhi</first><last>Zhang</last></author>
-      <author><first>Tat-Seng</first><last>Chua</last></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last></author>
       <pages>1539-1554</pages>
       <abstract>Large Language Model (LLM) has demonstrated significant ability in various Natural Language Processing tasks. However, their effectiveness is highly dependent on the phrasing of the task prompt, leading to research on automatic prompt optimization using labeled task data. We reveal that these prompt optimization techniques are vulnerable to distribution shifts such as subpopulation shifts, which are common for LLMs in real-world scenarios such as customer reviews analysis. In this light, we propose a new problem of robust prompt optimization for LLMs against distribution shifts, which requires the prompt optimized over the labeled source group can simultaneously generalize to an unlabeled target group. To solve this problem, we propose Generalized Prompt Optimization framework , which incorporates the unlabeled data from the target group into prompt optimization. Extensive experimental results demonstrate the effectiveness of the proposed framework with significant performance improvement on the target group and comparable performance on the source group.</abstract>
       <url hash="332cd46d">2023.emnlp-main.95</url>
@@ -1509,7 +1509,7 @@
       <author><first>Jinheon</first><last>Baek</last></author>
       <author><first>Soyeong</first><last>Jeong</last></author>
       <author><first>Minki</first><last>Kang</last></author>
-      <author><first>Jong</first><last>Park</last></author>
+      <author id="jong-c-park"><first>Jong</first><last>Park</last></author>
       <author><first>Sung</first><last>Hwang</last></author>
       <pages>1720-1736</pages>
       <abstract>Recent Language Models (LMs) have shown impressive capabilities in generating texts with the knowledge internalized in parameters. Yet, LMs often generate the factually incorrect responses to the given queries, since their knowledge may be inaccurate, incomplete, and outdated. To address this problem, previous works propose to augment LMs with the knowledge retrieved from an external knowledge source. However, such approaches often show suboptimal text generation performance due to two reasons: 1) the model may fail to retrieve the knowledge relevant to the given query, or 2) the model may not faithfully reflect the retrieved knowledge in the generated text. To overcome these, we propose to verify the output and the knowledge of the knowledge-augmented LMs with a separate verifier, which is a small LM that is trained to detect those two types of errors through instruction-finetuning. Then, when the verifier recognizes an error, we can rectify it by either retrieving new knowledge or generating new text. Further, we use an ensemble of the outputs from different instructions with a single verifier to enhance the reliability of the verification processes. We validate the effectiveness of the proposed verification steps on multiple question answering benchmarks, whose results show that the proposed verifier effectively identifies retrieval and generation errors, allowing LMs to provide more factually correct outputs. Our code is available at https://github.com/JinheonBaek/KALMV.</abstract>
@@ -1536,7 +1536,7 @@
       <title>Failures Pave the Way: Enhancing Large Language Models through Tuning-free Rule Accumulation</title>
       <author><first>Zeyuan</first><last>Yang</last></author>
       <author><first>Peng</first><last>Li</last></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <pages>1751-1777</pages>
       <abstract>Large Language Models (LLMs) have showcased impressive performance. However, due to their inability to capture relationships among samples, these frozen LLMs inevitably keep repeating similar mistakes. In this work, we propose our Tuning-free Rule Accumulation (TRAN) framework, which guides LLMs in improving their performance by learning from previous mistakes. Considering data arrives sequentially, LLMs gradually accumulate rules from incorrect cases, forming a rule collection. These rules are then utilized by the LLMs to avoid making similar mistakes when processing subsequent inputs. Moreover, the rules remain independent of the primary prompts, seamlessly complementing prompt design strategies. Experimentally, we show that TRAN improves over recent baselines by a large margin.</abstract>
       <url hash="c2b3fe44">2023.emnlp-main.109</url>
@@ -1562,7 +1562,7 @@
       <author><first>Corby</first><last>Rosset</last></author>
       <author><first>Arnold</first><last>Overwijk</last></author>
       <author><first>Jiawei</first><last>Han</last></author>
-      <author><first>Paul</first><last>Bennett</last></author>
+      <author id="paul-bennett"><first>Paul</first><last>Bennett</last></author>
       <pages>1796-1812</pages>
       <abstract>In this paper we improve the zero-shot generalization ability of language models via Mixture-Of-Memory Augmentation (MoMA), a mechanism that retrieves augmentation documents from multiple information corpora (external memories), with the option to “plug in” unseen memory at inference time. We develop a joint learning mechanism that trains the augmentation component with latent labels derived from the end retrieval task, paired with hard negatives from the memory mixture. We instantiate the model in a zero-shot dense retrieval setting by augmenting strong T5-based retrievers with MoMA. With only T5-base, our model obtains strong zero-shot retrieval accuracy on the eighteen tasks included in the standard BEIR benchmark, outperforming some systems with larger model sizes. As a plug-in-play model, our model can efficiently generalize to any unseen corpus, meanwhile achieving comparable or even better performance than methods relying on target-specific pretraining. Our analysis further illustrates the necessity of augmenting with mixture-of-memory for robust generalization, the benefits of augmentation learning, and how MoMA utilizes the plug-in memory at inference time without changing its parameters. Our code can be found at https://github.com/gesy17/MoMA.</abstract>
       <url hash="a7d3c10f">2023.emnlp-main.111</url>
@@ -1587,7 +1587,7 @@
     <paper id="113">
       <title>Towards Example-Based <fixed-case>NMT</fixed-case> with Multi-<fixed-case>L</fixed-case>evenshtein Transformers</title>
       <author><first>Maxime</first><last>Bouthors</last></author>
-      <author><first>Josep</first><last>Crego</last></author>
+      <author id="josep-m-crego"><first>Josep</first><last>Crego</last></author>
       <author><first>François</first><last>Yvon</last></author>
       <pages>1830-1846</pages>
       <abstract>Retrieval-Augmented Machine Translation (RAMT) is attracting growing attention. This is because RAMT not only improves translation metrics, but is also assumed to implement some form of domain adaptation. In this contribution, we study another salient trait of RAMT, its ability to make translation decisions more transparent by allowing users to go back to examples that contributed to these decisions. For this, we propose a novel architecture aiming to increase this transparency. This model adapts a retrieval-augmented version of the Levenshtein Transformer and makes it amenable to simultaneously edit multiple fuzzy matches found in memory. We discuss how to perform training and inference in this model, based on multi-way alignment algorithms and imitation learning. Our experiments show that editing several examples positively impacts translation scores, notably increasing the number of target spans that are copied from existing instances.</abstract>
@@ -1601,7 +1601,7 @@
       <author><first>Afra</first><last>Akyürek</last></author>
       <author><first>Eric</first><last>Pan</last></author>
       <author><first>Garry</first><last>Kuwanto</last></author>
-      <author><first>Derry</first><last>Wijaya</last></author>
+      <author id="derry-tanti-wijaya"><first>Derry</first><last>Wijaya</last></author>
       <pages>1847-1861</pages>
       <abstract>Even the most advanced language models remain susceptible to errors necessitating to modify these models without initiating a comprehensive retraining process. Model editing refers to the modification of a model’s knowledge or representations in a manner that produces the desired outcomes. Prior research primarily centered around editing factual data e.g. “Messi plays for Inter Miami” confining the definition of an edit to a knowledge triplet i.e. (subject, object, relation). However, as the applications of language models expand, so do the diverse ways in which we wish to edit and refine their outputs. In this study, we broaden the scope of the editing problem to include an array of editing cases such as debiasing and rectifying reasoning errors and define an edit as any natural language expression that solicits a change in the model’s outputs. We are introducing DUnE, an editing benchmark where edits are natural language sentences and propose that DUnE presents a challenging yet relevant task. To substantiate this claim, we conduct an extensive series of experiments testing various editing approaches to address DUnE, demonstrating their respective strengths and weaknesses. We argue that retrieval-augmented language modeling can outperform specialized editing techniques and neither set of approaches has fully solved the generalized editing problem covered by our benchmark.</abstract>
       <url hash="5f4feef2">2023.emnlp-main.114</url>
@@ -1755,7 +1755,7 @@
       <author><first>Sriram</first><last>Ganapathy</last></author>
       <author><first>Shikhar</first><last>Vashishth</last></author>
       <author><first>Sarath</first><last>Chandar</last></author>
-      <author><first>Partha</first><last>Talukdar</last></author>
+      <author id="partha-talukdar"><first>Partha</first><last>Talukdar</last></author>
       <pages>2033-2045</pages>
       <abstract>Language Models (LMs) pre-trained with selfsupervision on large text corpora have become the default starting point for developing models for various NLP tasks. Once the pre-training corpus has been assembled, all data samples in the corpus are treated with equal importance during LM pre-training. However, due to varying levels of relevance and quality of data, equal importance to all the data samples may not be the optimal choice. While data reweighting has been explored in the context of task-specific supervised learning and LM fine-tuning, model-driven reweighting for pretraining data has not been explored. We fill this important gap and propose PRESENCE, a method for jointly reweighting samples by leveraging self-influence (SI) scores as an indicator of sample importance and pre-training. PRESENCE promotes novelty and stability for model pre-training. Through extensive analysis spanning multiple model sizes, datasets, and tasks, we present PRESENCE as an important first step in the research direction of sample reweighting for pre-training language models.</abstract>
       <url hash="07b0af50">2023.emnlp-main.125</url>
@@ -1766,7 +1766,7 @@
     <paper id="126">
       <title><fixed-case>ACTOR</fixed-case>: Active Learning with Annotator-specific Classification Heads to Embrace Human Label Variation</title>
       <author id="xinpeng-wang-lmu"><first>Xinpeng</first><last>Wang</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>2046-2052</pages>
       <abstract>Label aggregation such as majority voting is commonly used to resolve annotator disagreement in dataset creation. However, this may disregard minority values and opinions. Recent studies indicate that learning from individual annotations outperforms learning from aggregated labels, though they require a considerable amount of annotation. Active learning, as an annotation cost-saving strategy, has not been fully explored in the context of learning from disagreement. We show that in the active learning setting, a multi-head model performs significantly better than a single-head model in terms of uncertainty estimation. By designing and evaluating acquisition functions with annotator-specific heads on two datasets, we show that group-level entropy works generally well on both datasets. Importantly, it achieves performance in terms of both prediction and uncertainty estimation comparable to full-scale training from disagreement, while saving 70% of the annotation budget.</abstract>
       <url hash="f0c5e02b">2023.emnlp-main.126</url>
@@ -1986,7 +1986,7 @@
       <title>A Diachronic Analysis of Paradigm Shifts in <fixed-case>NLP</fixed-case> Research: When, How, and Why?</title>
       <author><first>Aniket</first><last>Pramanick</last></author>
       <author><first>Yufang</first><last>Hou</last></author>
-      <author><first>Saif</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
       <author><first>Iryna</first><last>Gurevych</last></author>
       <pages>2312-2326</pages>
       <abstract>Understanding the fundamental concepts and trends in a scientific field is crucial for keeping abreast of its continuous advancement. In this study, we propose a systematic framework for analyzing the evolution of research topics in a scientific field using causal discovery and inference techniques. We define three variables to encompass diverse facets of the evolution of research topics within NLP and utilize a causal discovery algorithm to unveil the causal connections among these variables using observational data. Subsequently, we leverage this structure to measure the intensity of these relationships. By conducting extensive experiments on the ACL Anthology corpus, we demonstrate that our framework effectively uncovers evolutionary trends and the underlying causes for a wide range of NLP research topics. Specifically, we show that tasks and methods are primary drivers of research in NLP, with datasets following, while metrics have minimal impact.</abstract>
@@ -2074,7 +2074,7 @@
       <author><first>Andrew M.</first><last>Bean</last></author>
       <author><first>Bertie</first><last>Vidgen</last></author>
       <author><first>Paul</first><last>Röttger</last></author>
-      <author><first>Scott A.</first><last>Hale</last></author>
+      <author id="scott-a-hale"><first>Scott A.</first><last>Hale</last></author>
       <pages>2409-2430</pages>
       <abstract>Human feedback is increasingly used to steer the behaviours of Large Language Models (LLMs). However, it is unclear how to collect and incorporate feedback in a way that is efficient, effective and unbiased, especially for highly subjective human preferences and values. In this paper, we survey existing approaches for learning from human feedback, drawing on 95 papers primarily from the ACL and arXiv repositories. First, we summarise the past, pre-LLM trends for integrating human feedback into language models. Second, we give an overview of present techniques and practices, as well as the motivations for using feedback; conceptual frameworks for defining values and preferences; and how feedback is collected and from whom. Finally, we encourage a better future of feedback learning in LLMs by raising five unresolved conceptual and practical challenges.</abstract>
       <url hash="c4e863b6">2023.emnlp-main.148</url>
@@ -2192,7 +2192,7 @@
     <paper id="156">
       <title><fixed-case>NAIL</fixed-case>: Lexical Retrieval Indices with Efficient Non-Autoregressive Decoders</title>
       <author><first>Livio</first><last>Soares</last></author>
-      <author><first>Daniel</first><last>Gillick</last></author>
+      <author id="dan-gillick"><first>Daniel</first><last>Gillick</last></author>
       <author><first>Jeremy</first><last>Cole</last></author>
       <author><first>Tom</first><last>Kwiatkowski</last></author>
       <pages>2574-2589</pages>
@@ -2335,7 +2335,7 @@
       <title>Indicative Summarization of Long Discussions</title>
       <author><first>Shahbaz</first><last>Syed</last></author>
       <author><first>Dominik</first><last>Schwabe</last></author>
-      <author><first>Khalid</first><last>Al-Khatib</last></author>
+      <author id="khalid-al-khatib"><first>Khalid</first><last>Al-Khatib</last></author>
       <author><first>Martin</first><last>Potthast</last></author>
       <pages>2752-2788</pages>
       <abstract>Online forums encourage the exchange and discussion of different stances on many topics. Not only do they provide an opportunity to present one’s own arguments, but may also gather a broad cross-section of others’ arguments. However, the resulting long discussions are difficult to overview. This paper presents a novel unsupervised approach using large language models (LLMs) to generating indicative summaries for long discussions that basically serve as tables of contents. Our approach first clusters argument sentences, generates cluster labels as abstractive summaries, and classifies the generated cluster labels into argumentation frames resulting in a two-level summary. Based on an extensively optimized prompt engineering approach, we evaluate 19 LLMs for generative cluster labeling and frame classification. To evaluate the usefulness of our indicative summaries, we conduct a purpose-driven user study via a new visual interface called **Discussion Explorer**: It shows that our proposed indicative summaries serve as a convenient navigation tool to explore long discussions.</abstract>
@@ -2396,7 +2396,7 @@
       <author><first>Sha</first><last>Li</last></author>
       <author><first>Qiusi</first><last>Zhan</last></author>
       <author><first>Kathryn</first><last>Conger</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Heng</first><last>Ji</last></author>
       <author><first>Jiawei</first><last>Han</last></author>
       <pages>2823-2838</pages>
@@ -2409,7 +2409,7 @@
     <paper id="171">
       <title>Hierarchical Pretraining on Multimodal Electronic Health Records</title>
       <author><first>Xiaochen</first><last>Wang</last></author>
-      <author id="junyu-luo"><first>Junyu</first><last>Luo</last></author>
+      <author><first>Junyu</first><last>Luo</last></author>
       <author><first>Jiaqi</first><last>Wang</last></author>
       <author><first>Ziyi</first><last>Yin</last></author>
       <author><first>Suhan</first><last>Cui</last></author>
@@ -2455,7 +2455,7 @@
       <author><first>Weilong</first><last>Dong</last></author>
       <author><first>Shuangzhi</first><last>Wu</last></author>
       <author><first>Chao</first><last>Bian</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <pages>2875-2886</pages>
       <abstract>Pretrained language models have learned a vast amount of human knowledge from large-scale corpora, but their powerful memorization capability also brings the risk of data leakage. Some risks may only be discovered after the model training is completed, such as the model memorizing a specific phone number and frequently outputting it. In such cases, model developers need to eliminate specific data influences from the model to mitigate legal and ethical penalties. To effectively mitigate these risks, people often have to spend a significant amount of time and computational costs to retrain new models instead of finding ways to cure the ‘sick’ models. Therefore, we propose a method to locate and erase risky neurons in order to eliminate the impact of privacy data in the model. We use a new method based on integrated gradients to locate neurons associated with privacy texts, and then erase these neurons by setting their activation values to zero.Furthermore, we propose a risky neuron aggregation method to eliminate the influence of privacy data in the model in batches. Experimental results show that our method can effectively and quickly eliminate the impact of privacy data without affecting the model’s performance. Additionally, we demonstrate the relationship between model memorization and neurons through experiments, further illustrating the robustness of our method.</abstract>
       <url hash="524cb29b">2023.emnlp-main.174</url>
@@ -2536,7 +2536,7 @@
       <title><fixed-case>C</fixed-case>omb<fixed-case>LM</fixed-case>: Adapting Black-Box Language Models through Small Fine-Tuned Models</title>
       <author><first>Aitor</first><last>Ormazabal</last></author>
       <author><first>Mikel</first><last>Artetxe</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <pages>2961-2974</pages>
       <abstract>Methods for adapting language models (LMs) to new tasks and domains have traditionally assumed white-box access to the model, and work by modifying its parameters. However, this is incompatible with a recent trend in the field, where the highest quality models are only available as black-boxes through inference APIs. Even when the model weights are available, the computational cost of fine-tuning large LMs can be prohibitive for most practitioners. In this work, we present a lightweight method for adapting large LMs to new domains and tasks, assuming no access to their weights or intermediate activations. Our approach fine-tunes a small white-box LM and combines it with the large black-box LM at the probability level through a small network, learned on a small validation set. We validate our approach by adapting a large LM (OPT-30B) to several domains and a downstream task (machine translation), observing improved performance in all cases, of up to 9%, while using a domain expert 23x smaller.</abstract>
       <url hash="304a1475">2023.emnlp-main.180</url>
@@ -2570,7 +2570,7 @@
       <author><first>Tu</first><last>Nguyen</last></author>
       <author><first>Jade</first><last>Copet</last></author>
       <author><first>Gabriel</first><last>Synnaeve</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <author><first>Emmanuel</first><last>Dupoux</last></author>
       <pages>3008-3028</pages>
       <abstract>In NLP, text language models based on words or subwords are known to outperform their character-based counterparts. Yet, in the speech community, the standard input of spoken LMs are 20ms or 40ms-long discrete units (shorter than a phoneme). Taking inspiration from word-based LM, we introduce a Generative Spoken Language Model (GSLM) based on word-size continuous-valued audio tokens that can generate diverse and expressive language output. This is obtained by replacing lookup table for lexical types with a Lexical Embedding function, the cross entropy loss by a contrastive loss, and multinomial sampling by k-NN sampling. The resulting model is the first generative language model based on word-size continuous tokens. Its performance is on par with discrete unit GSLMs regarding generation quality as measured by automatic metrics and subjective human judgements. Moreover, it is five times more memory efficient thanks to its large 200ms units. In addition, the embeddings before and after the Lexical Embedder are phonetically and semantically interpretable.</abstract>
@@ -2659,7 +2659,7 @@
       <author><first>Daniela</first><last>Teodorescu</last></author>
       <author><first>Tiffany</first><last>Cheng</last></author>
       <author><first>Alona</first><last>Fyshe</last></author>
-      <author><first>Saif</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
       <pages>3117-3133</pages>
       <abstract>Research in psychopathology has shown that, at an aggregate level, the patterns of emotional change over time—emotion dynamics—are indicators of one’s mental health. One’s patterns of emotion change have traditionally been determined through self-reports of emotions; however, there are known issues with accuracy, bias, and convenience. Recent approaches to determining emotion dynamics from one’s everyday utterances, addresses many of these concerns, but it is not yet known whether these measures of utterance emotion dynamics (UED) correlate with mental health diagnoses. Here, for the first time, we study the relationship between tweet emotion dynamics and mental health disorders. We find that each of the UED metrics studied varied by the user’s self-disclosed diagnosis. For example: average valence was significantly higher (i.e., more positive text) in the control group compared to users with ADHD, MDD, and PTSD. Valence variability was significantly lower in the control group compared to ADHD, depression, bipolar disorder, MDD, PTSD, and OCD but not PPD. Rise and recovery rates of valence also exhibited significant differences from the control. This work provides important early evidence for how linguistic cues pertaining to emotion dynamics can play a crucial role as biosocial markers for mental illnesses and aid in the understanding, diagnosis, and management of mental health disorders.</abstract>
       <url hash="9ad38c6c">2023.emnlp-main.188</url>
@@ -2753,7 +2753,7 @@
       <author><first>Shikhar</first><last>Murty</last></author>
       <author><first>Pratyusha</first><last>Sharma</last></author>
       <author><first>Jacob</first><last>Andreas</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <pages>3233-3247</pages>
       <abstract>Recursion is a prominent feature of human language, and fundamentally challenging for self-attention due to the lack of an explicit recursive-state tracking mechanism. Consequently, Transformer language models poorly capture long-tail recursive structure and exhibit sample-inefficient syntactic generalization. This work introduces Pushdown Layers, a new self-attention layer that models recursive state via a stack tape that tracks estimated depths of every token in an incremental parse of the observed prefix. Transformer LMs with Pushdown Layers are syntactic language models that autoregressively and synchronously update this stack tape as they predict new tokens, in turn using the stack tape to softly modulate attention over tokens—for instance, learning to “skip” over closed constituents. When trained on a corpus of strings annotated with silver constituency parses, Transformers equipped with Pushdown Layers achieve dramatically better and 3-5x more sample-efficient syntactic generalization, while maintaining similar perplexities. Pushdown Layers are a drop-in replacement for standard self-attention. We illustrate this by finetuning GPT2-medium with Pushdown Layers on an automatically parsed WikiText-103, leading to improvements on several GLUE text classification tasks.</abstract>
       <url hash="bc048113">2023.emnlp-main.195</url>
@@ -2803,7 +2803,7 @@
       <author><first>Wenhao</first><last>Shi</last></author>
       <author><first>Lei</first><last>Wang</last></author>
       <author><first>Yang</first><last>Yang</last></author>
-      <author><first>See-Kiong</first><last>Ng</last></author>
+      <author id="see-kiong-ng"><first>See-Kiong</first><last>Ng</last></author>
       <author><first>Heng</first><last>Shen</last></author>
       <pages>3290-3301</pages>
       <abstract>Existing MWP solvers employ sequence or binary tree to present the solution expression and decode it from given problem description. However, such structures fail to handle the variants that can be derived via mathematical manipulation, e.g., <tex-math>(a_1+a_2)*a_3</tex-math> and <tex-math>a_1 * a_3+a_2 * a_3</tex-math> can both be possible valid solutions for a same problem but formulated as different expression sequences or trees. The multiple solution variants depicting different possible solving procedures for the same input problem would raise two issues: 1) making it hard for the model to learn the mapping function between the input and output spaces effectively, and 2) wrongly indicating <i>wrong</i> when evaluating a valid expression variant. To address these issues, we introduce a unified tree structure to present a solution expression, where the elements are permutable and identical for all the expression variants. We propose a novel non-autoregressive solver, named <i>MWP-NAS</i>, to parse the problem and deduce the solution expression based on the unified tree. For evaluating the possible expression variants, we design a path-based metric to evaluate the partial accuracy of expressions of a unified tree. The results from extensive experiments conducted on Math23K and MAWPS demonstrate the effectiveness of our proposed MWP-NAS. The codes and checkpoints are available at: <url>https://github.com/mengqunhan/MWP-NAS</url>.</abstract>
@@ -2832,11 +2832,11 @@
       <author><first>Eleftheria</first><last>Briakou</last></author>
       <author><first>Amanda</first><last>Liu</last></author>
       <author><first>Connor</first><last>Baumler</last></author>
-      <author><first>Claire</first><last>Bonial</last></author>
+      <author id="claire-bonial"><first>Claire</first><last>Bonial</last></author>
       <author><first>Jeffrey</first><last>Micher</last></author>
-      <author><first>Clare</first><last>Voss</last></author>
+      <author id="clare-voss"><first>Clare</first><last>Voss</last></author>
       <author><first>Marine</first><last>Carpuat</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <pages>3313-3330</pages>
       <abstract>NLP systems have shown impressive performance at answering questions by retrieving relevant context. However, with the increasingly large models, it is impossible and often undesirable to constrain models’ knowledge or reasoning to only the retrieved context. This leads to a mismatch between the information that <i>the models</i> access to derive the answer and the information that is available to <i>the user</i> to assess the model predicted answer. In this work, we study how users interact with QA systems in the absence of sufficient information to assess their predictions. Further, we ask whether adding the requisite background helps mitigate users’ over-reliance on predictions. Our study reveals that users rely on model predictions even in the absence of sufficient information needed to assess the model’s correctness. Providing the relevant background, however, helps users better catch model errors, reducing over-reliance on incorrect predictions. On the flip side, background information also increases users’ confidence in their accurate as well as inaccurate judgments. Our work highlights that supporting users’ verification of QA predictions is an important, yet challenging, problem.</abstract>
       <url hash="c019cd1a">2023.emnlp-main.201</url>
@@ -2848,7 +2848,7 @@
       <title><fixed-case>GROOV</fixed-case>i<fixed-case>ST</fixed-case>: A Metric for Grounding Objects in Visual Storytelling</title>
       <author><first>Aditya K</first><last>Surikuchi</last></author>
       <author><first>Sandro</first><last>Pezzelle</last></author>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <pages>3331-3339</pages>
       <abstract>A proper evaluation of stories generated for a sequence of images—the task commonly referred to as visual storytelling—must consider multiple aspects, such as coherence, grammatical correctness, and visual grounding. In this work, we focus on evaluating the degree of grounding, that is, the extent to which a story is about the entities shown in the images. We analyze current metrics, both designed for this purpose and for general vision-text alignment. Given their observed shortcomings, we propose a novel evaluation tool, GROOViST, that accounts for cross-modal dependencies, <i>temporal misalignments</i> (the fact that the order in which entities appear in the story and the image sequence may not match), and human intuitions on visual grounding. An additional advantage of GROOViST is its modular design, where the contribution of each component can be assessed and interpreted individually.</abstract>
       <url hash="c30fd93d">2023.emnlp-main.202</url>
@@ -2944,7 +2944,7 @@
       <author><first>Bowei</first><last>Zou</last></author>
       <author><first>Yifan</first><last>Fan</last></author>
       <author><first>Yanling</first><last>Li</last></author>
-      <author><first>Ai Ti</first><last>Aw</last></author>
+      <author id="aiti-aw"><first>Ai Ti</first><last>Aw</last></author>
       <author><first>Yu</first><last>Hong</last></author>
       <pages>3435-3446</pages>
       <abstract>Conversational Question Answering (CQA) aims to provide natural language answers to users in information-seeking dialogues. Existing CQA benchmarks often evaluate models using pre-collected human-human conversations. However, replacing the model-predicted dialogue history with ground truth compromises the naturalness and sustainability of CQA evaluation. While previous studies proposed using predicted history and rewriting techniques to address unresolved coreferences and incoherencies, this approach renders the question self-contained from the conversation. In this paper, we propose a novel automatic evaluation approach, interview evaluation. Specifically, ChatGPT acts as the interviewer (Q agent) with a set of carefully designed prompts, and the CQA model under test serves as the interviewee (A agent). During the interview evaluation, questions are dynamically generated by the Q agent to guide the A agent in predicting the correct answer through an interactive process. We evaluated four different models on QuAC and two models on CoQA in our experiments. The experiment results demonstrate that our interview evaluation has advantages over previous CQA evaluation approaches, particularly in terms of naturalness and coherence. The source code is made publicly available.</abstract>
@@ -2998,7 +2998,7 @@
     </paper>
     <paper id="213">
       <title><fixed-case>T</fixed-case>o<fixed-case>V</fixed-case>i<fixed-case>L</fixed-case>a<fixed-case>G</fixed-case>: Your Visual-Language Generative Model is Also An Evildoer</title>
-      <author id="xinpeng-wang"><first>Xinpeng</first><last>Wang</last></author>
+      <author><first>Xinpeng</first><last>Wang</last></author>
       <author><first>Xiaoyuan</first><last>Yi</last></author>
       <author><first>Han</first><last>Jiang</last></author>
       <author><first>Shanlin</first><last>Zhou</last></author>
@@ -3077,7 +3077,7 @@
       <author><first>Jesus</first><last>Salcido</last></author>
       <author><first>Téa</first><last>Wright</last></author>
       <author><first>Eliana</first><last>Colunga</last></author>
-      <author><first>Katharina</first><last>von der Wense</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>von der Wense</last></author>
       <pages>3588-3598</pages>
       <abstract>With recent advances in large language models (LLMs), the concept of automatically generating children’s educational materials has become increasingly realistic. Working toward the goal of age-appropriate simplicity in generated educational texts, we first examine the ability of several popular LLMs to generate stories with properly adjusted lexical and readability levels. We find that, in spite of the growing capabilities of LLMs, they do not yet possess the ability to limit their vocabulary to levels appropriate for younger age groups. As a second experiment, we explore the ability of state-of-the-art lexical simplification models to generalize to the domain of children’s stories and, thus, create an efficient pipeline for their automatic generation. In order to test these models, we develop a dataset of child-directed lexical simplification instances, with examples taken from the LLM-generated stories in our first experiment. We find that, while the strongest-performing current lexical simplification models do not perform as well on material designed for children due to their reliance on large language models behind the scenes, some models that still achieve fairly strong results on general data can mimic or even improve their performance on children-directed data with proper fine-tuning, which we conduct using our newly created child-directed simplification dataset.</abstract>
       <url hash="49a4f35d">2023.emnlp-main.218</url>
@@ -3130,7 +3130,7 @@
       <title>Retrofitting Light-weight Language Models for Emotions using Supervised Contrastive Learning</title>
       <author><first>Sapan</first><last>Shah</last></author>
       <author><first>Sreedhar</first><last>Reddy</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>3640-3654</pages>
       <abstract>We present a novel retrofitting method to induce emotion aspects into pre-trained language models (PLMs) such as BERT and RoBERTa. Our method updates pre-trained network weights using contrastive learning so that the text fragments exhibiting similar emotions are encoded nearby in the representation space, and the fragments with different emotion content are pushed apart. While doing so, it also ensures that the linguistic knowledge already present in PLMs is not inadvertently perturbed. The language models retrofitted by our method, i.e., BERTEmo and RoBERTaEmo, produce emotion-aware text representations, as evaluated through different clustering and retrieval metrics. For the downstream tasks on sentiment analysis and sarcasm detection, they perform better than their pre-trained counterparts (about 1% improvement in F1-score) and other existing approaches. Additionally, a more significant boost in performance is observed for the retrofitted models over pre-trained ones in few-shot learning setting.</abstract>
       <url hash="1e94e765">2023.emnlp-main.222</url>
@@ -3182,7 +3182,7 @@
       <title>Language Representation Projection: Can We Transfer Factual Knowledge across Languages in Multilingual Language Models?</title>
       <author><first>Shaoyang</first><last>Xu</last></author>
       <author><first>Junzhuo</first><last>Li</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <pages>3692-3702</pages>
       <abstract>Multilingual pretrained language models serve as repositories of multilingual factual knowledge. Nevertheless, a substantial performance gap of factual knowledge probing exists between high-resource languages and low-resource languages, suggesting limited implicit factual knowledge transfer across languages in multilingual pretrained language models. This paper investigates the feasibility of explicitly transferring relatively rich factual knowledge from English to non-English languages. To accomplish this, we propose two parameter-free <tex-math>\textbf{L}</tex-math>anguage <tex-math>\textbf{R}</tex-math>epresentation <tex-math>\textbf{P}</tex-math>rojection modules (LRP2). The first module converts non-English representations into English-like equivalents, while the second module reverts English-like representations back into representations of the corresponding non-English language. Experimental results on the mLAMA dataset demonstrate that LRP2 significantly improves factual knowledge retrieval accuracy and facilitates knowledge transferability across diverse non-English languages. We further investigate the working mechanism of LRP2 from the perspectives of representation space and cross-lingual knowledge neuron.</abstract>
       <url hash="6932e14b">2023.emnlp-main.226</url>
@@ -3207,7 +3207,7 @@
       <title><fixed-case>R</fixed-case>easoning<fixed-case>LM</fixed-case>: Enabling Structural Subgraph Reasoning in Pre-trained Language Models for Question Answering over Knowledge Graph</title>
       <author><first>Jinhao</first><last>Jiang</last></author>
       <author><first>Kun</first><last>Zhou</last></author>
-      <author><first>Xin</first><last>Zhao</last></author>
+      <author id="wayne-xin-zhao"><first>Xin</first><last>Zhao</last></author>
       <author><first>Yaliang</first><last>Li</last></author>
       <author><first>Ji-Rong</first><last>Wen</last></author>
       <pages>3721-3735</pages>
@@ -3321,7 +3321,7 @@
       <title>Lost in Translation, Found in Spans: Identifying Claims in Multilingual Social Media</title>
       <author><first>Shubham</first><last>Mittal</last></author>
       <author><first>Megha</first><last>Sundriyal</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>3887-3902</pages>
       <abstract>Claim span identification (CSI) is an important step in fact-checking pipelines, aiming to identify text segments that contain a check-worthy claim or assertion in a social media post. Despite its importance to journalists and human fact-checkers, it remains a severely understudied problem, and the scarce research on this topic so far has only focused on English. Here we aim to bridge this gap by creating a novel dataset, X-CLAIM, consisting of 7K real-world claims collected from numerous social media platforms in five Indian languages and English. We report strong baselines with state-of-the-art encoder-only language models (e.g., XLM-R) and we demonstrate the benefits of training on multiple languages over alternative cross-lingual transfer methods such as zero-shot transfer, or training on translated data, from a high-resource language such as English. We evaluate generative large language models from the GPT series using prompting methods on the X-CLAIM dataset and we find that they underperform the smaller encoder-only language models for low-resource languages.</abstract>
       <url hash="332f7364">2023.emnlp-main.236</url>
@@ -3337,7 +3337,7 @@
       <author><first>Jiaxi</first><last>Wu</last></author>
       <author><first>Veronika</first><last>Wirtz</last></author>
       <author><first>Traci</first><last>Hong</last></author>
-      <author><first>Derry</first><last>Wijaya</last></author>
+      <author id="derry-tanti-wijaya"><first>Derry</first><last>Wijaya</last></author>
       <pages>3903-3915</pages>
       <abstract>This paper introduces a multilingual dataset of COVID-19 vaccine misinformation, consisting of annotated tweets from three middle-income countries: Brazil, Indonesia, and Nigeria. The expertly curated dataset includes annotations for 5,952 tweets, assessing their relevance to COVID-19 vaccines, presence of misinformation, and the themes of the misinformation. To address challenges posed by domain specificity, the low-resource setting, and data imbalance, we adopt two approaches for developing COVID-19 vaccine misinformation detection models: domain-specific pre-training and text augmentation using a large language model. Our best misinformation detection models demonstrate improvements ranging from 2.7 to 15.9 percentage points in macro F1-score compared to the baseline models. Additionally, we apply our misinformation detection models in a large-scale study of 19 million unlabeled tweets from the three countries between 2020 and 2022, showcasing the practical application of our dataset and models for detecting and analyzing vaccine misinformation in multiple countries and languages. Our analysis indicates that percentage changes in the number of new COVID-19 cases are positively associated with COVID-19 vaccine misinformation rates in a staggered manner for Brazil and Indonesia, and there are significant positive associations between the misinformation rates across the three countries.</abstract>
       <url hash="da87d67f">2023.emnlp-main.237</url>
@@ -3362,7 +3362,7 @@
       <author><first>Sandra</first><last>Sandoval</last></author>
       <author><first>Jieyu</first><last>Zhao</last></author>
       <author><first>Marine</first><last>Carpuat</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <pages>3933-3945</pages>
       <abstract>We ask the question: Are there widespread disparities in machine translations of names across race/ethnicity, and gender? We hypothesize that the translation quality of names and surrounding context will be lower for names associated with US racial and ethnic minorities due to these systems’ tendencies to standardize language to predominant language patterns. We develop a dataset of names that are strongly demographically aligned and propose a translation evaluation procedure based on round-trip translation. We analyze the effect of name demographics on translation quality using generalized linear mixed effects models and find that the ability of translation systems to correctly translate female-associated names is significantly lower than male-associated names. This effect is particularly pronounced for female-associated names that are also associated with racial (Black) and ethnic (Hispanic) minorities. This disparity in translation quality between social groups for something as personal as someone’s name has significant implications for people’s professional, personal, and cultural identities, self-worth and ease of communication. Our findings suggest that more MT research is needed to improve the translation of names and to provide high-quality service for users regardless of gender, race, and ethnicity.</abstract>
       <url hash="c5fbcf5b">2023.emnlp-main.239</url>
@@ -3374,7 +3374,7 @@
       <title>Investigating Efficiently Extending Transformers for Long Input Summarization</title>
       <author><first>Jason</first><last>Phang</last></author>
       <author><first>Yao</first><last>Zhao</last></author>
-      <author><first>Peter</first><last>Liu</last></author>
+      <author id="peter-j-liu"><first>Peter</first><last>Liu</last></author>
       <pages>3946-3961</pages>
       <abstract>While large pretrained Transformer models have proven highly capable at tackling natural language tasks, handling long sequence inputs still poses a significant challenge. One such task is long input summarization, where inputs are longer than the maximum input context of most models. Through an extensive set of experiments, we investigate what model architectural changes and pretraining paradigms most efficiently adapt a pretrained Transformer for long input summarization. We find that a staggered, block-local Transformer with global encoder tokens strikes a good balance of performance and efficiency, and that an additional pretraining phase on long sequences meaningfully improves downstream summarization performance. Based on our findings, we introduce PEGASUS-X, an extension of the PEGASUS model with additional long input pretraining to handle inputs of up to 16K tokens, which achieves strong performance on long input summarization tasks comparable with much larger models.</abstract>
       <url hash="61123d60">2023.emnlp-main.240</url>
@@ -3388,7 +3388,7 @@
       <author><first>Linhao</first><last>Yu</last></author>
       <author><first>Minghui</first><last>Xu</last></author>
       <author><first>Renren</first><last>Jin</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <pages>3962-3979</pages>
       <abstract>Spoken texts (either manual or automatic transcriptions from automatic speech recognition (ASR)) often contain disfluencies and grammatical errors, which pose tremendous challenges to downstream tasks. Converting spoken into written language is hence desirable. Unfortunately, the availability of datasets for this is limited. To address this issue, we present CS2W, a Chinese Spoken-to-Written style conversion dataset comprising 7,237 spoken sentences extracted from transcribed conversational texts. Four types of conversion problems are covered in CS2W: disfluencies, grammatical errors, ASR transcription errors, and colloquial words. Our annotation convention, data, and code are publicly available at https://github.com/guozishan/CS2W.</abstract>
       <url hash="93b821cb">2023.emnlp-main.241</url>
@@ -3486,7 +3486,7 @@
       <author><first>Paolo</first><last>Rosso</last></author>
       <author><first>Robert</first><last>Litschko</last></author>
       <author><first>Goran</first><last>Glavaš</last></author>
-      <author><first>Simone</first><last>Ponzetto</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone</first><last>Ponzetto</last></author>
       <pages>4069-4085</pages>
       <abstract>Cross-lingual transfer learning from high-resource to medium and low-resource languages has shown encouraging results. However, the scarcity of resources in target languages remains a challenge. In this work, we resort to data augmentation and continual pre-training for domain adaptation to improve cross-lingual abusive language detection. For data augmentation, we analyze two existing techniques based on vicinal risk minimization and propose MIXAG, a novel data augmentation method which interpolates pairs of instances based on the angle of their representations. Our experiments involve seven languages typologically distinct from English and three different domains. The results reveal that the data augmentation strategies can enhance few-shot cross-lingual abusive language detection. Specifically, we observe that consistently in all target languages, MIXAG improves significantly in multidomain and multilingual environments. Finally, we show through an error analysis how the domain adaptation can favour the class of abusive texts (reducing false negatives), but at the same time, declines the precision of the abusive language detection model.</abstract>
       <url hash="bdfd373d">2023.emnlp-main.248</url>
@@ -3499,7 +3499,7 @@
       <author><first>Junfeng</first><last>Jiang</last></author>
       <author><first>Chengzhang</first><last>Dong</last></author>
       <author><first>Sadao</first><last>Kurohashi</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>4086-4101</pages>
       <abstract>Dialogue segmentation is a crucial task for dialogue systems allowing a better understanding of conversational texts. Despite recent progress in unsupervised dialogue segmentation methods, their performances are limited by the lack of explicit supervised signals for training. Furthermore, the precise definition of segmentation points in conversations still remains as a challenging problem, increasing the difficulty of collecting manual annotations. In this paper, we provide a feasible definition of dialogue segmentation points with the help of document-grounded dialogues and release a large-scale supervised dataset called SuperDialseg, containing 9,478 dialogues based on two prevalent document-grounded dialogue corpora, and also inherit their useful dialogue-related annotations. Moreover, we provide a benchmark including 18 models across five categories for the dialogue segmentation task with several proper evaluation metrics. Empirical studies show that supervised learning is extremely effective in in-domain datasets and models trained on SuperDialseg can achieve good generalization ability on out-of-domain data. Additionally, we also conducted human verification on the test set and the Kappa score confirmed the quality of our automatically constructed dataset. We believe our work is an important step forward in the field of dialogue segmentation.</abstract>
       <url hash="8f66434e">2023.emnlp-main.249</url>
@@ -3772,7 +3772,7 @@
       <title>Meta-Learning Online Adaptation of Language Models</title>
       <author><first>Nathan</first><last>Hu</last></author>
       <author><first>Eric</first><last>Mitchell</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <author><first>Chelsea</first><last>Finn</last></author>
       <pages>4418-4432</pages>
       <abstract>Large language models encode impressively broad world knowledge in their parameters. However, the knowledge in static language models falls out of date, limiting the model’s effective “shelf life.” While online fine-tuning can reduce this degradation, we find that naively fine-tuning on a stream of documents leads to a low level of information uptake. We hypothesize that online fine-tuning does not sufficiently attend to important information. That is, the gradient signal from important tokens representing factual information is drowned out by the gradient from inherently noisy tokens, suggesting that a dynamic, context-aware learning rate may be beneficial. We therefore propose learning which tokens to upweight. We meta-train a small, autoregressive model to reweight the language modeling loss for each token during online fine-tuning, with the objective of maximizing the out-of-date base question-answering model’s ability to answer questions about a document after a single weighted gradient step. We call this approach Context-aware Meta-learned Loss Scaling (CaMeLS). Across three different distributions of documents, our experiments find that CaMeLS provides substantially improved information uptake on streams of thousands of documents compared with standard fine-tuning and baseline heuristics for reweighting token losses.</abstract>
@@ -3804,7 +3804,7 @@
       <author><first>Weixin</first><last>Cai</last></author>
       <author><first>Yizhe</first><last>Zhang</last></author>
       <author><first>Jianfeng</first><last>Gao</last></author>
-      <author><first>Bill</first><last>Dolan</last></author>
+      <author id="william-b-dolan"><first>Bill</first><last>Dolan</last></author>
       <pages>4450-4468</pages>
       <abstract>Users interact with text, image, code, or other editors on a daily basis. However, machine learning models are rarely trained in the settings that reflect the interactivity between users and their editor. This is understandable as training AI models with real users is not only slow and costly, but what these models learn may be specific to user interface design choices. Unfortunately, this means most of the research on text, code, and image generation has focused on non-interactive settings, whereby the model is expected to get everything right without accounting for any input from a user who may be willing to help. We introduce a new Interactive Text Generation task that allows training generation models interactively without the costs of involving real users, by using user simulators that provide edits that guide the model towards a given target text. We train our interactive models using Imitation Learning, and our experiments against competitive non-interactive generation models show that models trained interactively are superior to their non-interactive counterparts, even when all models are given the same budget of user inputs or edits.</abstract>
       <url hash="ec50896c">2023.emnlp-main.270</url>
@@ -3916,8 +3916,8 @@
       <author><first>Ratish</first><last>Puduppully</last></author>
       <author><first>Anoop</first><last>Kunchukuttan</last></author>
       <author><first>Raj</first><last>Dabre</last></author>
-      <author><first>Ai Ti</first><last>Aw</last></author>
-      <author><first>Nancy</first><last>Chen</last></author>
+      <author id="aiti-aw"><first>Ai Ti</first><last>Aw</last></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last></author>
       <pages>4586-4602</pages>
       <abstract>This study investigates machine translation between related languages i.e., languages within the same family that share linguistic characteristics such as word order and lexical similarity. Machine translation through few-shot prompting leverages a small set of translation pair examples to generate translations for test sentences. This procedure requires the model to learn how to generate translations while simultaneously ensuring that token ordering is maintained to produce a fluent and accurate translation. We propose that for related languages, the task of machine translation can be simplified by leveraging the monotonic alignment characteristic of such languages. We introduce DecoMT, a novel approach of few-shot prompting that decomposes the translation process into a sequence of word chunk translations. Through automatic and human evaluation conducted on multiple related language pairs across various language families, we demonstrate that our proposed approach of decomposed prompting surpasses multiple established few-shot baseline approaches. For example, DecoMT outperforms the strong few-shot prompting BLOOM model with an average improvement of 8 chrF++ scores across the examined languages.</abstract>
       <url hash="2fa6d7b8">2023.emnlp-main.279</url>
@@ -3943,7 +3943,7 @@
       <author id="xiaolei-wang-fudan"><first>Xiaolei</first><last>Wang</last></author>
       <author><first>Xin</first><last>Zhou</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>4616-4630</pages>
       <abstract>Recently, many studies have illustrated the robustness problem of Named Entity Recognition (NER) systems: the NER models often rely on superficial entity patterns for predictions, without considering evidence from the context. Consequently, even state-of-the-art NER models generalize poorly to out-of-domain scenarios when out-of-distribution (OOD) entity patterns are introduced. Previous research attributes the robustness problem to the existence of NER dataset bias, where simpler and regular entity patterns induce shortcut learning. In this work, we bring new insights into this problem by comprehensively investigating the NER dataset bias from a dataset difficulty view. We quantify the entity-context difficulty distribution in existing datasets and explain their relationship with model robustness. Based on our findings, we explore three potential ways to de-bias the NER datasets by altering entity-context distribution, and we validate the feasibility with intensive experiments. Finally, we show that the de-biased datasets can transfer to different models and even benefit existing model-based robustness-improving methods, indicating that building more robust datasets is fundamental for building more robust NER systems.</abstract>
       <url hash="93191646">2023.emnlp-main.281</url>
@@ -3957,7 +3957,7 @@
       <author><first>Heike</first><last>Adel</last></author>
       <author><first>Lukas</first><last>Lange</last></author>
       <author><first>Jannik</first><last>Strötgen</last></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <pages>4631-4646</pages>
       <abstract>Most languages of the world pose low-resource challenges to natural language processing models. With multilingual training, knowledge can be shared among languages. However, not all languages positively influence each other and it is an open research question how to select the most suitable set of languages for multilingual training and avoid negative interference among languages whose characteristics or data distributions are not compatible. In this paper, we propose GradSim, a language grouping method based on gradient similarity. Our experiments on three diverse multilingual benchmark datasets show that it leads to the largest performance gains compared to other similarity measures and it is better correlated with cross-lingual model performance. As a result, we set the new state of the art on AfriSenti, a benchmark dataset for sentiment analysis on low-resource African languages. In our extensive analysis, we further reveal that besides linguistic features, the topics of the datasets play an important role for language grouping and that lower layers of transformer models encode language-specific features while higher layers capture task-specific information.</abstract>
       <url hash="da1e898e">2023.emnlp-main.282</url>
@@ -4069,7 +4069,7 @@
       <author><first>Amanda</first><last>Cercas Curry</last></author>
       <author><first>Tanvi</first><last>Dinkar</last></author>
       <author><first>Verena</first><last>Rieser</last></author>
-      <author><first>Zeerak</first><last>Talat</last></author>
+      <author id="zeerak-talat"><first>Zeerak</first><last>Talat</last></author>
       <pages>4776-4790</pages>
       <abstract>Automated dialogue or conversational systems are anthropomorphised by developers and personified by users. While a degree of anthropomorphism is inevitable, conscious and unconscious design choices can guide users to personify them to varying degrees. Encouraging users to relate to automated systems as if they were human can lead to transparency and trust issues, and high risk scenarios caused by over-reliance on their outputs. As a result, natural language processing researchers have investigated the factors that induce personification and develop resources to mitigate such effects. However, these efforts are fragmented, and many aspects of anthropomorphism have yet to be explored. In this paper, we discuss the linguistic factors that contribute to the anthropomorphism of dialogue systems and the harms that can arise thereof, including reinforcing gender stereotypes and conceptions of acceptable language. We recommend that future efforts towards developing dialogue systems take particular care in their design, development, release, and description; and attend to the many linguistic cues that can elicit personification by users.</abstract>
       <url hash="0b08f301">2023.emnlp-main.290</url>
@@ -4098,7 +4098,7 @@
       <author><first>Jaehyung</first><last>Seo</last></author>
       <author><first>Sugyeong</first><last>Eo</last></author>
       <author><first>Hyeonseok</first><last>Moon</last></author>
-      <author><first>Heuiseok</first><last>Lim</last></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last></author>
       <pages>4798-4815</pages>
       <abstract>Automatic Speech Recognition (ASR) systems are instrumental across various applications, with their performance being critically tied to user satisfaction. Conventional evaluation metrics for ASR systems produce a singular aggregate score, which is insufficient for understanding specific system vulnerabilities. Therefore, we aim to address the limitations of the previous ASR evaluation methods by introducing the Korean Error Explainable Benchmark Dataset for ASR and Post-processing (KEBAP). KEBAP enables comprehensive analysis of ASR systems at both speech- and text levels, thereby facilitating a more balanced assessment encompassing speech recognition accuracy and user readability. KEBAP provides 37 newly defined speech-level resources incorporating diverse noise environments and speaker characteristics categories, also presenting 13 distinct text-level error types. This paper demonstrates detailed statistical analyses of colloquial noise categories and textual error types. Furthermore, we conduct extensive validation and analysis on commercially deployed ASR systems, providing valuable insights into their performance. As a more fine-grained and real-world-centric evaluation method, KEBAP contributes to identifying and mitigating potential weaknesses in ASR systems.</abstract>
       <url hash="0d154049">2023.emnlp-main.292</url>
@@ -4127,7 +4127,7 @@
       <author><first>Xinyu</first><last>Chen</last></author>
       <author><first>Sheng</first><last>Xu</last></author>
       <author><first>Peifeng</first><last>Li</last></author>
-      <author><first>Qiaoming</first><last>Zhu</last></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last></author>
       <pages>4833-4843</pages>
       <abstract>Cross-document event coreference resolution (CD-ECR) is a task of clustering event mentions across multiple documents that refer to the same real-world events. Previous studies usually model the CD-ECR task as a pairwise similarity comparison problem by using different event mention features, and consider the highly similar event mention pairs in the same cluster as coreferent. In general, most of them only consider the local context of event mentions and ignore their implicit global information, thus failing to capture the interactions of long-distance event mentions. To address the above issue, we regard discourse structure as global information to further improve CD-ECR. First, we use a discourse rhetorical structure constructor to construct tree structures to represent documents. Then, we obtain shortest dependency paths from the tree structures to represent interactions between event mention pairs. Finally, we feed the above information to a multi-layer perceptron to capture the similarities of event mention pairs for resolving coreferent events. Experimental results on the ECB+ dataset show that our proposed model outperforms several baselines and achieves the competitive performance with the start-of-the-art baselines.</abstract>
       <url hash="04a17ddc">2023.emnlp-main.294</url>
@@ -4145,7 +4145,7 @@
       <author><first>Jungwoo</first><last>Lim</last></author>
       <author><first>Hyeonseok</first><last>Moon</last></author>
       <author><first>Kisu</first><last>Yang</last></author>
-      <author><first>Heuiseok</first><last>Lim</last></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last></author>
       <pages>4844-4861</pages>
       <abstract>Despite the striking advances in recent language generation performance, model-generated responses have suffered from the chronic problem of hallucinations that are either untrue or unfaithful to a given source. Especially in the task of knowledge grounded conversation, the models are required to generate informative responses, but hallucinated utterances lead to miscommunication. In particular, entity-level hallucination that causes critical misinformation and undesirable conversation is one of the major concerns. To address this issue, we propose a post-hoc refinement method called REM. It aims to enhance the quality and faithfulness of hallucinated utterances by refining them based on the source knowledge. If the generated utterance has a low source-faithfulness score with the given knowledge, REM mines the key entities in the knowledge and implicitly uses them for refining the utterances. We verify that our method reduces entity hallucination in the utterance. Also, we show the adaptability and efficacy of REM with extensive experiments and generative results. Our code is available at https://github.com/YOONNAJANG/REM.</abstract>
       <url hash="08813ef1">2023.emnlp-main.295</url>
@@ -4161,7 +4161,7 @@
       <author><first>Yuxuan</first><last>Fan</last></author>
       <author><first>Zhiyong</first><last>Wu</last></author>
       <author><first>Jingjing</first><last>Xu</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <pages>4862-4876</pages>
       <abstract>Previous studies have shown that large language models (LLMs) like GPTs store massive factual knowledge in their parameters. However, the stored knowledge could be false or outdated. Traditional knowledge editing methods refine LLMs via fine-tuning on texts containing specific knowledge. However, with the increasing scales of LLMs, these gradient-based approaches bring large computation costs. The trend of model-as-a-service also makes it impossible to modify knowledge in black-box LMs. Inspired by in-context learning (ICL), a new paradigm based on demonstration contexts without parameter updating, we explore whether ICL can edit factual knowledge. To answer this question, we give a comprehensive empirical study of ICL strategies. Experiments show that in-context knowledge editing (IKE), without any gradient and parameter updating, achieves a competitive success rate compared to gradient-based methods on GPT-J (6B) but with much fewer side effects, including less over-editing on similar but unrelated facts and less knowledge forgetting on previously stored knowledge. We also apply the method to larger LMs with tens or hundreds of parameters like OPT-175B, which shows the scalability of our method. The code is available at <url>https://github.com/pkunlp-icler/IKE</url>.</abstract>
       <url hash="900b3492">2023.emnlp-main.296</url>
@@ -4267,7 +4267,7 @@
       <author><first>Shaolin</first><last>Zhu</last></author>
       <author><first>Jun</first><last>Xie</last></author>
       <author><first>Baosong</first><last>Yang</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <pages>4978-4990</pages>
       <abstract>Mixture-of-Experts (MoE) based sparse architectures can significantly increase model capacity with sublinear computational overhead, which are hence widely used in massively multilingual neural machine translation (MNMT). However, they are prone to overfitting on low-resource language translation. In this paper, we propose a modularized MNMT framework that is able to flexibly assemble dense and MoE-based sparse modules to achieve the best of both worlds. The training strategy of the modularized MNMT framework consists of three stages: (1) Pre-training basic MNMT models with different training objectives or model structures, (2) Initializing modules of the framework with pre-trained couterparts (e.g., encoder, decoder and embedding layers) from the basic models and (3) Fine-tuning the modularized MNMT framework to fit modules from different models together. We pre-train three basic MNMT models from scratch: a dense model, an MoE-based sparse model and a new MoE model, termed as MoE-LGR that explores multiple Language-Group-specifc Routers to incorporate language group knowledge into MNMT. The strengths of these pre-trained models are either on low-resource language translation, high-resource language translation or zero-shot translation. Our modularized MNMT framework attempts to incorporate these advantages into a single model with reasonable initialization and fine-tuning. Experiments on widely-used benchmark datasets demonstrate that the proposed modularized MNMT framwork substantially outperforms both MoE and dense models on high- and low-resource language translation as well as zero-shot translation. Our framework facilitates the combination of different methods with their own strengths and recycling off-the-shelf models for multilingual neural machine translation. Codes are available at https://github.com/lishangjie1/MMNMT.</abstract>
       <url hash="f56aff54">2023.emnlp-main.303</url>
@@ -4304,7 +4304,7 @@
     <paper id="306">
       <title>Prompting is not a substitute for probability measurements in large language models</title>
       <author><first>Jennifer</first><last>Hu</last></author>
-      <author><first>Roger</first><last>Levy</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
       <pages>5040-5060</pages>
       <abstract>Prompting is now a dominant method for evaluating the linguistic knowledge of large language models (LLMs). While other methods directly read out models’ probability distributions over strings, prompting requires models to access this internal information by processing linguistic input, thereby implicitly testing a new type of emergent ability: metalinguistic judgment. In this study, we compare metalinguistic prompting and direct probability measurements as ways of measuring models’ linguistic knowledge. Broadly, we find that LLMs’ metalinguistic judgments are inferior to quantities directly derived from representations. Furthermore, consistency gets worse as the prompt query diverges from direct measurements of next-word probabilities. Our findings suggest that negative results relying on metalinguistic prompts cannot be taken as conclusive evidence that an LLM lacks a particular linguistic generalization. Our results also highlight the value that is lost with the move to closed APIs where access to probability distributions is limited.</abstract>
       <url hash="ffefadde">2023.emnlp-main.306</url>
@@ -4344,7 +4344,7 @@
       <author><first>Santiago</first><last>Ontanon</last></author>
       <author><first>Siddhartha</first><last>Brahma</last></author>
       <author><first>Yury</first><last>Zemlyanskiy</last></author>
-      <author><first>David</first><last>Uthus</last></author>
+      <author id="david-c-uthus"><first>David</first><last>Uthus</last></author>
       <author><first>Mandy</first><last>Guo</last></author>
       <author><first>James</first><last>Lee-Thorp</last></author>
       <author><first>Yi</first><last>Tay</last></author>
@@ -4373,7 +4373,7 @@
       <title>Cross-Cultural Analysis of Human Values, Morals, and Biases in Folk Tales</title>
       <author><first>Winston</first><last>Wu</last></author>
       <author><first>Lu</first><last>Wang</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>5113-5125</pages>
       <abstract>Folk tales are strong cultural and social influences in children’s lives, and they are known to teach morals and values. However, existing studies on folk tales are largely limited to European tales. In our study, we compile a large corpus of over 1,900 tales originating from 27 diverse cultures across six continents. Using a range of lexicons and correlation analyses, we examine how human values, morals, and gender biases are expressed in folk tales across cultures. We discover differences between cultures in prevalent values and morals, as well as cross-cultural trends in problematic gender biases. Furthermore, we find trends of reduced value expression when examining public-domain fiction stories, extrinsically validate our analyses against the multicultural Schwartz Survey of Cultural Values and the Global Gender Gap Report, and find traditional gender biases associated with values, morals, and agency. This large-scale cross-cultural study of folk tales paves the way towards future studies on how literature influences and reflects cultural norms.</abstract>
       <url hash="c156fa08">2023.emnlp-main.311</url>
@@ -4385,7 +4385,7 @@
       <author><first>Ruiqi</first><last>Zhong</last></author>
       <author><first>Charlie</first><last>Snell</last></author>
       <author><first>Dan</first><last>Klein</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>5126-5152</pages>
       <abstract>Can non-programmers annotate natural language utterances with complex programs that represent their meaning? We introduce APEL, a framework in which non-programmers select among candidate programs generated by a seed semantic parser (e.g., Codex). Since they cannot understand the candidate programs, we ask them to select indirectly by examining the programs’ input-ouput examples. For each utterance, APEL actively searches for a simple input on which the candidate programs tend to produce different outputs. It then asks the non-programmers only to choose the appropriate output, thus allowing us to infer which program is correct and could be used to fine-tune the parser. As a first case study, we recruited human non-programmers to use APEL to re-annotate SPIDER, a text-to-SQL dataset. Our approach achieved the same annotation accuracy as the original expert annotators (75%) and exposed many subtle errors in the original annotations.</abstract>
       <url hash="59ee8001">2023.emnlp-main.312</url>
@@ -4401,7 +4401,7 @@
       <author><first>Cedegao</first><last>Zhang</last></author>
       <author><first>Armando</first><last>Solar-Lezama</last></author>
       <author><first>Joshua</first><last>Tenenbaum</last></author>
-      <author><first>Roger</first><last>Levy</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
       <pages>5153-5176</pages>
       <abstract>Logical reasoning, i.e., deductively inferring the truth value of a conclusion from a set of premises, is an important task for artificial intelligence with wide potential impacts on science, mathematics, and society. While many prompting-based strategies have been proposed to enable Large Language Models (LLMs) to do such reasoning more effectively, they still appear unsatisfactory, often failing in subtle and unpredictable ways. In this work, we investigate the validity of instead reformulating such tasks as modular neurosymbolic programming, which we call LINC: Logical Inference via Neurosymbolic Computation. In LINC, the LLM acts as a semantic parser, translating premises and conclusions from natural language to expressions in first-order logic. These expressions are then offloaded to an external theorem prover, which symbolically performs deductive inference. Leveraging this approach, we observe significant performance gains on FOLIO and a balanced subset of ProofWriter for three different models in nearly all experimental conditions we evaluate. On ProofWriter, augmenting the comparatively small open-source StarCoder+ (15.5B parameters) with LINC even outperforms GPT-3.5 and GPT-4 with Chain-of-Thought (CoT) prompting by an absolute 38% and 10%, respectively. When used with GPT-4, LINC scores 26% higher than CoT on ProofWriter while performing comparatively on FOLIO. Further analysis reveals that although both methods on average succeed roughly equally often on this dataset, they exhibit distinct and complementary failure modes. We thus provide promising evidence for how logical reasoning over natural language can be tackled through jointly leveraging LLMs alongside symbolic provers. All corresponding code is publicly available.</abstract>
       <url hash="f54d1a42">2023.emnlp-main.313</url>
@@ -4517,7 +4517,7 @@
       <author><first>Debtanu</first><last>Datta</last></author>
       <author><first>Shubham</first><last>Soni</last></author>
       <author><first>Rajdeep</first><last>Mukherjee</last></author>
-      <author id="saptarshi-ghosh"><first>Saptarshi</first><last>Ghosh</last></author>
+      <author><first>Saptarshi</first><last>Ghosh</last></author>
       <pages>5291-5302</pages>
       <abstract>Automatic summarization of legal case judgments is a practically important problem that has attracted substantial research efforts in many countries. In the context of the Indian judiciary, there is an additional complexity – Indian legal case judgments are mostly written in complex English, but a significant portion of India’s population lacks command of the English language. Hence, it is crucial to summarize the legal documents in Indian languages to ensure equitable access to justice. While prior research primarily focuses on summarizing legal case judgments in their source languages, this study presents a pioneering effort toward cross-lingual summarization of English legal documents into Hindi, the most frequently spoken Indian language. We construct the first high-quality legal corpus comprising of 3,122 case judgments from prominent Indian courts in English, along with their summaries in both English and Hindi, drafted by legal practitioners. We benchmark the performance of several diverse summarization approaches on our corpus and demonstrate the need for further research in cross-lingual summarization in the legal domain.</abstract>
       <url hash="f0c82173">2023.emnlp-main.321</url>
@@ -4640,7 +4640,7 @@
       <author><first>Rafael</first><last>Rafailov</last></author>
       <author><first>Huaxiu</first><last>Yao</last></author>
       <author><first>Chelsea</first><last>Finn</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <pages>5433-5442</pages>
       <abstract>A trustworthy real-world prediction system should produce well-calibrated confidence scores; that is, its confidence in an answer should be indicative of the likelihood that the answer is correct, enabling deferral to an expert in cases of low-confidence predictions. Recent studies have shown that unsupervised pre-training produces large language models (LMs) whose conditional probabilities are remarkably well-calibrated. However, the most widely-used LMs are fine-tuned with reinforcement learning from human feedback (RLHF-LMs), and some studies have suggested that RLHF-LMs produce conditional probabilities that are very poorly calibrated. In light of this perceived weakness, we conduct a broad evaluation of methods for extracting confidence scores from RLHF-LMs. For RLHF-LMs such as ChatGPT, GPT-4, and Claude, we find that verbalized confidences emitted as output tokens are typically better-calibrated than the model’s conditional probabilities on the TriviaQA, SciQ, and TruthfulQA benchmarks, often reducing the expected calibration error by a relative 50%.</abstract>
       <url hash="ba5b28c4">2023.emnlp-main.330</url>
@@ -4704,8 +4704,8 @@
     <paper id="335">
       <title>Navigating the Grey Area: How Expressions of Uncertainty and Overconfidence Affect Language Models</title>
       <author><first>Kaitlyn</first><last>Zhou</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
-      <author><first>Tatsunori</first><last>Hashimoto</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
+      <author id="tatsunori-b-hashimoto"><first>Tatsunori</first><last>Hashimoto</last></author>
       <pages>5506-5524</pages>
       <abstract>The increased deployment of LMs for real-world tasks involving knowledge and facts makes it important to understand model epistemology: what LMs think they know, and how their attitudes toward that knowledge are affected by language use in their inputs. Here, we study an aspect of model epistemology: how epistemic markers of certainty, uncertainty, or evidentiality like “I’m sure it’s”, “I think it’s”, or “Wikipedia says it’s” affect models, and whether they contribute to model failures. We develop a typology of epistemic markers and inject 50 markers into prompts for question answering. We find that LMs are highly sensitive to epistemic markers in prompts, with accuracies varying more than 80%. Surprisingly, we find that expressions of high certainty result in a 7% decrease in accuracy as compared to low certainty expressions; similarly, factive verbs hurt performance, while evidentials benefit performance. Our analysis of a popular pretraining dataset shows that these markers of uncertainty are associated with answers on question-answering websites, while markers of certainty are associated with questions. These associations may suggest that the behavior of LMs is based on mimicking observed language use, rather than truly reflecting epistemic uncertainty.</abstract>
       <url hash="2557d780">2023.emnlp-main.335</url>
@@ -4732,7 +4732,7 @@
       <author><first>Lingjue</first><last>Xie</last></author>
       <author><first>Ella</first><last>Hofmann-Coyle</last></author>
       <author><first>Mayank</first><last>Kulkarni</last></author>
-      <author><first>Daniel</first><last>Preotiuc-Pietro</last></author>
+      <author id="daniel-preotiuc-pietro"><first>Daniel</first><last>Preotiuc-Pietro</last></author>
       <pages>5538-5547</pages>
       <abstract>Entity-centric summarization is a form of controllable summarization that aims to generate a summary for a specific entity given a document. Concise summaries are valuable in various real-life applications, as they enable users to quickly grasp the main points of the document focusing on an entity of interest. This paper presents ENTSUMV2, a more abstractive version of the original entity-centric ENTSUM summarization dataset. In ENTSUMV2 the annotated summaries are intentionally made shorter to benefit more specific and useful entity-centric summaries for downstream users. We conduct extensive experiments on this dataset using multiple abstractive summarization approaches that employ supervised fine-tuning or large-scale instruction tuning. Additionally, we perform comprehensive human evaluation that incorporates metrics for measuring crucial facets. These metrics provide a more fine-grained interpretation of the current state-of-the-art systems and highlight areas for future improvement.</abstract>
       <url hash="8ed565aa">2023.emnlp-main.337</url>
@@ -4788,7 +4788,7 @@
       <author><first>Qinglin</first><last>Zhang</last></author>
       <author><first>Jiaqing</first><last>Liu</last></author>
       <author><first>Qian</first><last>Chen</last></author>
-      <author><first>Wen</first><last>Wang</last></author>
+      <author id="wen-wang"><first>Wen</first><last>Wang</last></author>
       <pages>5592-5605</pages>
       <abstract>Topic segmentation is critical for obtaining structured documents and improving down- stream tasks such as information retrieval. Due to its ability of automatically exploring clues of topic shift from abundant labeled data, recent supervised neural models have greatly promoted the development of long document topic segmentation, but leaving the deeper relationship between coherence and topic segmentation underexplored. Therefore, this paper enhances the ability of supervised models to capture coherence from both logical structure and semantic similarity perspectives to further improve the topic segmentation performance, proposing Topic-aware Sentence Structure Prediction (TSSP) and Contrastive Semantic Similarity Learning (CSSL). Specifically, the TSSP task is proposed to force the model to comprehend structural information by learning the original relations between adjacent sentences in a disarrayed document, which is constructed by jointly disrupting the original document at topic and sentence levels. Moreover, we utilize inter- and intra-topic information to construct contrastive samples and design the CSSL objective to ensure that the sentences representations in the same topic have higher similarity, while those in different topics are less similar. Extensive experiments show that the Longformer with our approach significantly outperforms old state-of-the-art (SOTA) methods. Our approach improve <tex-math>F_{1}</tex-math> of old SOTA by 3.42 (73.74 <tex-math>\rightarrow</tex-math> 77.16) and reduces <tex-math>P_{k}</tex-math> by 1.11 points (15.0 <tex-math>\rightarrow</tex-math> 13.89) on WIKI-727K and achieves an average relative reduction of 4.3% on <tex-math>P_{k}</tex-math> on WikiSection. The average relative <tex-math>P_{k}</tex-math> drop of 8.38% on two out-of-domain datasets also demonstrates the robustness of our approach.</abstract>
       <url hash="b8b93e8a">2023.emnlp-main.341</url>
@@ -4818,7 +4818,7 @@
       <title>Information Value: Measuring Utterance Predictability as Distance from Plausible Alternatives</title>
       <author><first>Mario</first><last>Giulianelli</last></author>
       <author><first>Sarenne</first><last>Wallbridge</last></author>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <pages>5633-5653</pages>
       <abstract>We present information value, a measure which quantifies the predictability of an utterance relative to a set of plausible alternatives. We introduce a method to obtain interpretable estimates of information value using neural text generators, and exploit their psychometric predictive power to investigate the dimensions of predictability that drive human comprehension behaviour. Information value is a stronger predictor of utterance acceptability in written and spoken dialogue than aggregates of token-level surprisal and it is complementary to surprisal for predicting eye-tracked reading times.</abstract>
       <url hash="457befeb">2023.emnlp-main.343</url>
@@ -4887,7 +4887,7 @@
       <author><first>Jiayi</first><last>Pan</last></author>
       <author><first>Yuchen</first><last>Zhou</last></author>
       <author><first>Rui</first><last>Pan</last></author>
-      <author><first>Joyce</first><last>Chai</last></author>
+      <author id="joyce-chai"><first>Joyce</first><last>Chai</last></author>
       <pages>5718-5728</pages>
       <abstract>Vision-Language Models (VLMs) are trained on vast amounts of data captured by humans emulating our understanding of the world. However, known as visual illusions, human’s perception of reality isn’t always faithful to the physical world. This raises a key question: do VLMs have the similar kind of illusions as humans do, or do they faithfully learn to represent reality? To investigate this question, we build a dataset containing five types of visual illusions and formulate four tasks to examine visual illusions in state-of-the-art VLMs. Our findings have shown that although the overall alignment is low, larger models are closer to human perception and more susceptible to visual illusions. Our dataset and initial findings will promote a better understanding of visual illusions in humans and machines and provide a stepping stone for future computational models that can better align humans and machines in perceiving and communicating about the shared visual world. The code and data are available at [github.com/vl-illusion/dataset](https://github.com/vl-illusion/dataset).</abstract>
       <url hash="cea8fdc6">2023.emnlp-main.348</url>
@@ -4898,8 +4898,8 @@
     <paper id="349">
       <title>Analysing State-Backed Propaganda Websites: a New Dataset and Linguistic Study</title>
       <author><first>Freddy</first><last>Heppell</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
-      <author><first>Carolina</first><last>Scarton</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last></author>
       <pages>5729-5741</pages>
       <abstract>This paper analyses two hitherto unstudied sites sharing state-backed disinformation, Reliable Recent News (rrn.world) and WarOnFakes (waronfakes.com), which publish content in Arabic, Chinese, English, French, German, and Spanish. We describe our content acquisition methodology and perform cross-site unsupervised topic clustering on the resulting multilingual dataset. We also perform linguistic and temporal analysis of the web page translations and topics over time, and investigate articles with false publication dates. We make publicly available this new dataset of 14,053 articles, annotated with each language version, and additional metadata such as links and images. The main contribution of this paper for the NLP community is in the novel dataset which enables studies of disinformation networks, and the training of NLP tools for disinformation detection.</abstract>
       <url hash="2e60b118">2023.emnlp-main.349</url>
@@ -4996,7 +4996,7 @@
     <paper id="356">
       <title>The <fixed-case>BLA</fixed-case> Benchmark: Investigating Basic Language Abilities of Pre-Trained Multimodal Models</title>
       <author><first>Xinyi</first><last>Chen</last></author>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <author><first>Sandro</first><last>Pezzelle</last></author>
       <pages>5817-5830</pages>
       <abstract>Despite the impressive performance achieved by pre-trained language-and-vision models in downstream tasks, it remains an open question whether this reflects a proper understanding of image-text interaction. In this work, we explore to what extent they handle basic linguistic constructions—active-passive voice, coordination, and relative clauses—that even preschool children can typically master. We present BLA, a novel, automatically constructed benchmark to evaluate multimodal models on these Basic Language Abilities. We show that different types of Transformer-based systems, such as CLIP, ViLBERT, and BLIP2, generally struggle with BLA in a zero-shot setting, in line with previous findings. Our experiments, in particular, show that most of the tested models only marginally benefit when fine-tuned or prompted with construction-specific samples. Yet, the generative BLIP2 shows promising trends, especially in an in-context learning setting. This opens the door to using BLA not only as an evaluation benchmark but also to improve models’ basic language abilities.</abstract>
@@ -5034,14 +5034,14 @@
     <paper id="359">
       <title>Ditto: A Simple and Efficient Approach to Improve Sentence Embeddings</title>
       <author><first>Qian</first><last>Chen</last></author>
-      <author><first>Wen</first><last>Wang</last></author>
+      <author id="wen-wang"><first>Wen</first><last>Wang</last></author>
       <author><first>Qinglin</first><last>Zhang</last></author>
       <author><first>Siqi</first><last>Zheng</last></author>
       <author><first>Chong</first><last>Deng</last></author>
       <author><first>Hai</first><last>Yu</last></author>
       <author><first>Jiaqing</first><last>Liu</last></author>
       <author><first>Yukun</first><last>Ma</last></author>
-      <author id="chong-zhang"><first>Chong</first><last>Zhang</last></author>
+      <author><first>Chong</first><last>Zhang</last></author>
       <pages>5868-5875</pages>
       <abstract>Prior studies diagnose the anisotropy problem in sentence representations from pre-trained language models, e.g., BERT, without fine-tuning. Our analysis reveals that the sentence embeddings from BERT suffer from a bias towards uninformative words, limiting the performance in semantic textual similarity (STS) tasks. To address this bias, we propose a simple and efficient unsupervised approach, Diagonal Attention Pooling (Ditto), which weights words with model-based importance estimations and computes the weighted average of word representations from pre-trained models as sentence embeddings. Ditto can be easily applied to any pre-trained language model as a postprocessing operation. Compared to prior sentence embedding approaches, Ditto does not add parameters nor requires any learning. Empirical evaluations demonstrate that our proposed Ditto can alleviate the anisotropy problem and improve various pre-trained models on the STS benchmarks.</abstract>
       <url hash="93f75d28">2023.emnlp-main.359</url>
@@ -5160,7 +5160,7 @@
       <author><first>Jaewook</first><last>Lee</last></author>
       <author><first>Sugyeong</first><last>Eo</last></author>
       <author><first>Chanjun</first><last>Park</last></author>
-      <author><first>Heuiseok</first><last>Lim</last></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last></author>
       <pages>6014-6029</pages>
       <abstract>Korean morphological variations present unique opportunities and challenges in natural language processing (NLP), necessitating an advanced understanding of morpheme-based sentence construction. The complexity of morphological variations allows for diverse sentence forms based on the syntactic-semantic integration of functional morphemes (i.e., affixes) to lexical morphemes (i.e., roots). With this in mind, we propose a method - CHEF, replicating the morphological transformations inherent in sentences based on lexical and functional morpheme combinations through generative data augmentation. CHEF operates using a morpheme blender and a label discriminator, thereby enhancing the diversity of Korean sentence forms by capturing the properties of agglutination while maintaining label consistency. We conduct experiments on Korean multiple classification datasets, improving model performance in full- and few-shot settings. Our proposed method boosts performance beyond the preceding data augmentation methods without incurring external data usage. We demonstrate that our approach achieves comparable results yielded by augmentation techniques that use large language models (LLMs).</abstract>
       <url hash="253dd702">2023.emnlp-main.367</url>
@@ -5248,8 +5248,8 @@
       <author><first>Aishwarya</first><last>Padmakumar</last></author>
       <author><first>Mert</first><last>Inan</last></author>
       <author><first>Spandana</first><last>Gella</last></author>
-      <author><first>Patrick</first><last>Lange</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last></author>
+      <author id="patrick-l-lange"><first>Patrick</first><last>Lange</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></author>
       <pages>6114-6131</pages>
       <abstract>Embodied task completion is a challenge where an agent in a simulated environment must predict environment actions to complete tasks based on natural language instructions and ego-centric visual observations. We propose a variant of this problem where the agent predicts actions at a higher level of abstraction called a plan, which helps make agent actions more interpretable and can be obtained from the appropriate prompting of large language models. We show that multimodal transformer models can outperform language-only models for this problem but fall significantly short of oracle plans. Since collecting human-human dialogues for embodied environments is expensive and time-consuming, we propose a method to synthetically generate such dialogues, which we then use as training data for plan prediction. We demonstrate that multimodal transformer models can attain strong zero-shot performance from our synthetic data, outperforming language-only models trained on human-human data.</abstract>
       <url hash="aa7011d6">2023.emnlp-main.374</url>
@@ -5385,7 +5385,7 @@
       <title>Tree Prompting: Efficient Task Adaptation without Fine-Tuning</title>
       <author><first>Chandan</first><last>Singh</last></author>
       <author><first>John</first><last>Morris</last></author>
-      <author><first>Alexander</first><last>Rush</last></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last></author>
       <author><first>Jianfeng</first><last>Gao</last></author>
       <author><first>Yuntian</first><last>Deng</last></author>
       <pages>6253-6267</pages>
@@ -5466,8 +5466,8 @@
       <author><first>Yatao</first><last>Bian</last></author>
       <author><first>Zeyu</first><last>Qin</last></author>
       <author><first>Bingzhe</first><last>Wu</last></author>
-      <author><first>Tat-Seng</first><last>Chua</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <pages>6325-6341</pages>
       <abstract>Large language models (LLMs) outperform information retrieval techniques for downstream knowledge-intensive tasks when being prompted to generate world knowledge. However, community concerns abound regarding the factuality and potential implications of using this uncensored knowledge. In light of this, we introduce CONNER, a COmpreheNsive kNowledge Evaluation fRamework, designed to systematically and automatically evaluate generated knowledge from six important perspectives – Factuality, Relevance, Coherence, Informativeness, Helpfulness and Validity. We conduct an extensive empirical analysis of the generated knowledge from three different types of LLMs on two widely studied knowledge-intensive tasks, i.e., open-domain question answering and knowledge-grounded dialogue. Surprisingly, our study reveals that the factuality of generated knowledge, even if lower, does not significantly hinder downstream tasks. Instead, the relevance and coherence of the outputs are more important than small factual mistakes. Further, we show how to use CONNER to improve knowledge-intensive tasks by designing two strategies: Prompt Engineering and Knowledge Selection. Our evaluation code and LLM-generated knowledge with human annotations will be released to facilitate future research.</abstract>
       <url hash="b8fecd46">2023.emnlp-main.390</url>
@@ -5555,8 +5555,8 @@
       <title><fixed-case>H</fixed-case>alu<fixed-case>E</fixed-case>val: A Large-Scale Hallucination Evaluation Benchmark for Large Language Models</title>
       <author><first>Junyi</first><last>Li</last></author>
       <author><first>Xiaoxue</first><last>Cheng</last></author>
-      <author><first>Xin</first><last>Zhao</last></author>
-      <author><first>Jian-Yun</first><last>Nie</last></author>
+      <author id="wayne-xin-zhao"><first>Xin</first><last>Zhao</last></author>
+      <author id="jian-yun-nie"><first>Jian-Yun</first><last>Nie</last></author>
       <author><first>Ji-Rong</first><last>Wen</last></author>
       <pages>6449-6464</pages>
       <abstract>Large language models (LLMs), such as ChatGPT, are prone to generate hallucinations, i.e., content that conflicts with the source or cannot be verified by the factual knowledge. To understand what types of content and to which extent LLMs are apt to hallucinate, we introduce the Hallucination Evaluation for Large Language Models (HaluEval) benchmark, a large collection of generated and human-annotated hallucinated samples for evaluating the performance of LLMs in recognizing hallucination. To generate these samples, we propose a ChatGPT-based two-step framework, i.e., sampling-then-filtering. Besides, we also hire some human labelers to annotate the hallucinations in ChatGPT responses. The empirical results suggest that ChatGPT is likely to generate hallucinated content in specific topics by fabricating unverifiable information (i.e., about 19.5% user queries). Moreover, existing LLMs face great challenges in recognizing the hallucinations in texts. While, our experiments also prove that the hallucination recognition can be improved by providing external knowledge or adding reasoning steps.</abstract>
@@ -5584,7 +5584,7 @@
       <author><first>Vedanuj</first><last>Goswami</last></author>
       <author><first>Shruti</first><last>Bhosale</last></author>
       <author><first>Angela</first><last>Fan</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>6489-6499</pages>
       <abstract>Machine Translation (MT) has been widely used for cross-lingual classification, either by translating the test set into English and running inference with a monolingual model (translate-test), or translating the training set into the target languages and finetuning a multilingual model (translate-train). However, most research in the area focuses on the multilingual models rather than the MT component. We show that, by using a stronger MT system and mitigating the mismatch between training on original text and running inference on machine translated text, translate-test can do substantially better than previously assumed. The optimal approach, however, is highly task dependent, as we identify various sources of cross-lingual transfer gap that affect different tasks and approaches differently. Our work calls into question the dominance of multilingual models for cross-lingual classification, and prompts to pay more attention to MT-based baselines.</abstract>
       <url hash="aaf6eae9">2023.emnlp-main.399</url>
@@ -5618,9 +5618,9 @@
       <author><first>Atharva</first><last>Kulkarni</last></author>
       <author><first>Abhishek</first><last>Vijayakumar</last></author>
       <author><first>Haofei</first><last>Yu</last></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <author><first>Kemal</first><last>Oflazer</last></author>
-      <author><first>David</first><last>Mortensen</last></author>
+      <author id="david-r-mortensen"><first>David</first><last>Mortensen</last></author>
       <pages>6508-6524</pages>
       <abstract>Large language models (LLMs) have recently reached an impressive level of linguistic capability, prompting comparisons with human language skills. However, there have been relatively few systematic inquiries into the linguistic capabilities of the latest generation of LLMs, and those studies that do exist (i) ignore the remarkable ability of humans to generalize, (ii) focus only on English, and (iii) investigate syntax or semantics and overlook other capabilities that lie at the heart of human language, like morphology. Here, we close these gaps by conducting the first rigorous analysis of the morphological capabilities of ChatGPT in four typologically varied languages (specifically, English, German, Tamil, and Turkish). We apply a version of Berko’s (1958) wug test to ChatGPT, using novel, uncontaminated datasets for the four examined languages. We find that ChatGPT massively underperforms purpose-built systems, particularly in English. Overall, our results—through the lens of morphology—cast a new light on the linguistic capabilities of ChatGPT, suggesting that claims of human-like language skills are premature and misleading.</abstract>
       <url hash="7991e7c2">2023.emnlp-main.401</url>
@@ -5686,8 +5686,8 @@
     <paper id="406">
       <title>Construction Artifacts in Metaphor Identification Datasets</title>
       <author><first>Joanne</first><last>Boisson</last></author>
-      <author><first>Luis</first><last>Espinosa-Anke</last></author>
-      <author><first>Jose</first><last>Camacho-Collados</last></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa-Anke</last></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></author>
       <pages>6581-6590</pages>
       <abstract>Metaphor identification aims at understanding whether a given expression is used figuratively in context. However, in this paper we show how existing metaphor identification datasets can be gamed by fully ignoring the potential metaphorical expression or the context in which it occurs. We test this hypothesis in a variety of datasets and settings, and show that metaphor identification systems based on language models without complete information can be competitive with those using the full context. This is due to the construction procedures to build such datasets, which introduce unwanted biases for positive and negative classes. Finally, we test the same hypothesis on datasets that are carefully sampled from natural corpora and where this bias is not present, making these datasets more challenging and reliable.</abstract>
       <url hash="11471456">2023.emnlp-main.406</url>
@@ -5737,7 +5737,7 @@
     <paper id="410">
       <title>Rethinking Model Selection and Decoding for Keyphrase Generation with Pre-trained Sequence-to-Sequence Models</title>
       <author><first>Di</first><last>Wu</last></author>
-      <author><first>Wasi</first><last>Ahmad</last></author>
+      <author id="wasi-ahmad"><first>Wasi</first><last>Ahmad</last></author>
       <author><first>Kai-Wei</first><last>Chang</last></author>
       <pages>6642-6658</pages>
       <abstract>Keyphrase Generation (KPG) is a longstanding task in NLP with widespread applications. The advent of sequence-to-sequence (seq2seq) pre-trained language models (PLMs) has ushered in a transformative era for KPG, yielding promising performance improvements. However, many design decisions remain unexplored and are often made arbitrarily. This paper undertakes a systematic analysis of the influence of model selection and decoding strategies on PLM-based KPG. We begin by elucidating why seq2seq PLMs are apt for KPG, anchored by an attention-driven hypothesis. We then establish that conventional wisdom for selecting seq2seq PLMs lacks depth: (1) merely increasing model size or performing task-specific adaptation is not parameter-efficient; (2) although combining in-domain pre-training with task adaptation benefits KPG, it does partially hinder generalization. Regarding decoding, we demonstrate that while greedy search achieves strong F1 scores, it lags in recall compared with sampling-based methods. Based on these insights, we propose DeSel, a likelihood-based decode-select algorithm for seq2seq PLMs. DeSel improves greedy search by an average of 4.7% semantic F1 across five datasets. Our collective findings pave the way for deeper future investigations into PLM-based KPG.</abstract>
@@ -5762,7 +5762,7 @@
       <title>A Multi-Task Dataset for Assessing Discourse Coherence in <fixed-case>C</fixed-case>hinese Essays: Structure, Theme, and Logic Analysis</title>
       <author><first>Hongyi</first><last>Wu</last></author>
       <author><first>Xinshu</first><last>Shen</last></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <author><first>Shaoguang</first><last>Mao</last></author>
       <author><first>Xiaopeng</first><last>Bai</last></author>
       <author><first>Yuanbin</first><last>Wu</last></author>
@@ -5790,7 +5790,7 @@
       <title>Lifelong Sequence Generation with Dynamic Module Expansion and Adaptation</title>
       <author><first>Chengwei</first><last>Qin</last></author>
       <author><first>Chen</first><last>Chen</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <pages>6701-6714</pages>
       <abstract>Lifelong sequence generation (LSG), a problem in continual learning, aims to continually train a model on a sequence of generation tasks to learn constantly emerging new generation patterns while avoiding the forgetting of previous knowledge. Existing LSG methods mainly focus on maintaining old knowledge while paying little attention to knowledge transfer across tasks. In contrast, humans can better learn new tasks by leveraging previously acquired knowledge from similar tasks. Inspired by the learning paradigm of humans, we propose Dynamic Module Expansion and Adaptation (DMEA), which enables the model to dynamically determine the architecture for acquiring new knowledge based on task correlation and select the most similar previous tasks to facilitate adaptation to new tasks. In addition, as the learning process can easily be biased towards the current task which might cause more severe forgetting of previously learned knowledge, we propose dynamic gradient scaling to balance the learning of the current task and replayed tasks. With extensive experiments, we demonstrate that DMEA can consistently outperform existing methods in different LSG settings.</abstract>
       <url hash="b4a480c8">2023.emnlp-main.414</url>
@@ -5829,7 +5829,7 @@
       <author><first>Hailin</first><last>Chen</last></author>
       <author><first>Amrita</first><last>Saha</last></author>
       <author><first>Steven</first><last>Hoi</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <pages>6737-6749</pages>
       <abstract>With the rise of powerful closed-sourced LLMs (ChatGPT, GPT-4), there are increasing interests in distilling the capabilies of close-sourced LLMs to smaller open-sourced LLMs. Previous distillation methods usually prompt ChatGPT to generate a set of instructions and answers, for the student model to learn. However, such standard distillation approach neglects the merits and conditions of the student model. Inspired by modern teaching principles, we design a personalised distillation process, in which the student attempts to solve a task first, then the teacher provides an adaptive refinement for the student to improve. Instead of feeding the student with teacher’s prior, personalised distillation enables personalised learning for the student model, as it only learns on examples it makes mistakes upon and learns to improve its own solution. On code generation, personalised distillation consistently outperforms standard distillation with only one third of the data. With only 2.5-3K personalised examples that incur a data-collection cost of 4-6$, we boost CodeGen-mono-16B by 7% to achieve 36.4% pass@1 and StarCoder by 12.2% to achieve 45.8% pass@1 on HumanEval.</abstract>
       <url hash="e20b1475">2023.emnlp-main.417</url>
@@ -5844,7 +5844,7 @@
       <author><first>Arkadeep</first><last>Acharya</last></author>
       <author><first>Sriparna</first><last>Saha</last></author>
       <author><first>Adam</first><last>Jatowt</last></author>
-      <author><first>Sandipan</first><last>Dandapat</last></author>
+      <author id="sandipan-dandapat"><first>Sandipan</first><last>Dandapat</last></author>
       <pages>6750-6774</pages>
       <abstract>Temporal reasoning represents a vital component of human communication and understanding, yet remains an underexplored area within the context of Large Language Models (LLMs). Despite LLMs demonstrating significant proficiency in a range of tasks, a comprehensive, large-scale analysis of their temporal reasoning capabilities is missing. Our paper addresses this gap, presenting the first extensive benchmarking of LLMs on temporal reasoning tasks. We critically evaluate 8 different LLMs across 6 datasets using 3 distinct prompting strategies. Additionally, we broaden the scope of our evaluation by including in our analysis 2 Code Generation LMs. Beyond broad benchmarking of models and prompts, we also conduct a fine-grained investigation of performance across different categories of temporal tasks. We further analyze the LLMs on varying temporal aspects, offering insights into their proficiency in understanding and predicting the continuity, sequence, and progression of events over time. Our findings reveal a nuanced depiction of the capabilities and limitations of the models within temporal reasoning, offering a comprehensive reference for future research in this pivotal domain.</abstract>
       <url hash="b12633da">2023.emnlp-main.418</url>
@@ -5857,7 +5857,7 @@
       <author><first>Shreya</first><last>Havaldar</last></author>
       <author><first>Matthew</first><last>Pressimone</last></author>
       <author><first>Eric</first><last>Wong</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <pages>6775-6791</pages>
       <abstract>Understanding how styles differ across languages is advantageous for training both humans and computers to generate culturally appropriate text. We introduce an explanation framework to extract stylistic differences from multilingual LMs and compare styles across languages. Our framework (1) generates comprehensive style lexica in any language and (2) consolidates feature importances from LMs into comparable lexical categories. We apply this framework to compare politeness, creating the first holistic multilingual politeness dataset and exploring how politeness varies across four languages. Our approach enables an effective evaluation of how distinct linguistic categories contribute to stylistic variations and provides interpretable insights into how people communicate differently around the world.</abstract>
       <url hash="af8ada89">2023.emnlp-main.419</url>
@@ -5888,7 +5888,7 @@
       <author><first>Shana</first><last>Kleiner</last></author>
       <author><first>Desmond</first><last>Patton</last></author>
       <author><first>Elsbeth</first><last>Turcan</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>6805-6824</pages>
       <abstract>While biases disadvantaging African American Language (AAL) have been uncovered in models for tasks such as speech recognition and toxicity detection, there has been little investigation of these biases for language generation models like ChatGPT. We evaluate how well LLMs understand AAL in comparison to White Mainstream English (WME), the encouraged “standard” form of English taught in American classrooms. We measure large language model performance on two tasks: a counterpart generation task, where a model generates AAL given WME and vice versa, and a masked span prediction (MSP) task, where models predict a phrase hidden from their input. Using a novel dataset of AAL texts from a variety of regions and contexts, we present evidence of dialectal bias for six pre-trained LLMs through performance gaps on these tasks.</abstract>
       <url hash="cf5c54d6">2023.emnlp-main.421</url>
@@ -6028,7 +6028,7 @@
       <author><first>Weishi</first><last>Wang</last></author>
       <author><first>Yue</first><last>Wang</last></author>
       <author><first>Steven</first><last>Hoi</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <pages>6954-6968</pages>
       <abstract>Automatic program repair (APR) has gained increasing attention as an essential technique in software development to reduce manual debugging efforts and boost developers’ productivity. Recent advances in deep learning (DL) based models have demonstrated promising results by learning from large-scale bug-fix examples in a data-driven manner. However, in practical scenarios, software bugs have an imbalanced distribution, and the fixing knowledge learned by APR models often only capture the patterns of frequent error types, making it inapplicable to handle the rare error types. To address this limitation, we investigate a novel task of low-resource APR, and propose Meta-APR, a new meta-learning framework integrated with code pretrained language models to generate fixes for low-resource bugs with limited training samples. Our Meta-APR learns better error-specific knowledge from high-resource bugs through efficient first-order meta-learning optimization, which allows for a faster adaptation to the target low-resource bugs. Besides, while we adopt CodeT5, a pretrained code-aware encoder-decoder Transformer, as the backbone model for Meta-APR, it is a model-agnostic framework that can be integrated with any neural models. Extensive experimental results on three benchmarks in various programming languages verify the superiority of our method over existing DL-based APR approaches.</abstract>
       <url hash="0756606e">2023.emnlp-main.430</url>
@@ -6136,7 +6136,7 @@
       <author><first>Rricha</first><last>Jalota</last></author>
       <author><first>Koel</first><last>Chowdhury</last></author>
       <author><first>Cristina</first><last>España-Bonet</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>7086-7100</pages>
       <abstract>Translated texts exhibit systematic linguistic differences compared to original texts in the same language, and these differences are referred to as translationese. Translationese has effects on various cross-lingual natural language processing tasks, potentially leading to biased results. In this paper, we explore a novel approach to reduce translationese in translated texts: translation-based style transfer. As there are no parallel human-translated and original data in the same language, we use a self-supervised approach that can learn from comparable (rather than parallel) mono-lingual original and translated data. However, even this self-supervised approach requires some parallel data for validation. We show how we can eliminate the need for parallel validation data by combining the self-supervised loss with an unsupervised loss. This unsupervised loss leverages the original language model loss over the style-transferred output and a semantic similarity loss between the input and style-transferred output. We evaluate our approach in terms of original vs. translationese binary classification in addition to measuring content preservation and target-style fluency. The results show that our approach is able to reduce translationese classifier accuracy to a level of a random classifier after style transfer while adequately preserving the content and fluency in the target original style.</abstract>
       <url hash="ed0569c3">2023.emnlp-main.438</url>
@@ -6352,9 +6352,9 @@
     </paper>
     <paper id="454">
       <title>A Study on Accessing Linguistic Information in Pre-Trained Language Models by Using Prompts</title>
-      <author><first>Marion</first><last>Di Marco</last></author>
+      <author id="marion-weller-di-marco"><first>Marion</first><last>Di Marco</last></author>
       <author><first>Katharina</first><last>Hämmerl</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>7328-7336</pages>
       <abstract>We study whether linguistic information in pre-trained multilingual language models can be accessed by human language: So far, there is no easy method to directly obtain linguistic information and gain insights into the linguistic principles encoded in such models. We use the technique of prompting and formulate linguistic tasks to test the LM’s access to explicit grammatical principles and study how effective this method is at providing access to linguistic features. Our experiments on German, Icelandic and Spanish show that some linguistic properties can in fact be accessed through prompting, whereas others are harder to capture.</abstract>
       <url hash="04837df6">2023.emnlp-main.454</url>
@@ -6383,7 +6383,7 @@
       <author><first>Sungryull</first><last>Sohn</last></author>
       <author><first>Moontae</first><last>Lee</last></author>
       <author><first>Honglak</first><last>Lee</last></author>
-      <author><first>Joyce</first><last>Chai</last></author>
+      <author id="joyce-chai"><first>Joyce</first><last>Chai</last></author>
       <pages>7354-7379</pages>
       <abstract>Pre-trained language models (PLMs) have shown impressive performance in various language tasks. However, they are prone to spurious correlations, and often generate illusory information. In real-world applications, PLMs should justify decisions with formalized, coherent reasoning chains, but this challenge remains under-explored. Cognitive psychology theorizes that humans are capable of utilizing fast and intuitive *heuristic* thinking to make decisions based on past experience, then rationalizing the decisions through slower and deliberative *analytic* reasoning. We incorporate these interlinked dual processes in fine-tuning and in-context learning with PLMs, applying them to two language understanding tasks that require coherent physical commonsense reasoning. We show that our proposed Heuristic-Analytic Reasoning (HAR) strategies drastically improve the coherence of rationalizations for model decisions, yielding state-of-the-art results on Tiered Reasoning for Intuitive Physics (TRIP). We also find that this improved coherence is a direct result of more faithful attention to relevant language context in each step of reasoning. Our findings suggest that human-like reasoning strategies can effectively improve the coherence and reliability of PLM reasoning.</abstract>
       <url hash="06e0ee14">2023.emnlp-main.456</url>
@@ -6409,7 +6409,7 @@
       <author><first>Antonia</first><last>Karamolegkou</last></author>
       <author><first>Jiaang</first><last>Li</last></author>
       <author><first>Li</first><last>Zhou</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>7403-7412</pages>
       <abstract>Language models may memorize more than just facts, including entire chunks of texts seen during training. Fair use exemptions to copyright laws typically allow for limited use of copyrighted material without permission from the copyright holder, but typically for extraction of information from copyrighted materials, rather than <i>verbatim</i> reproduction. This work explores the issue of copyright violations and large language models through the lens of verbatim memorization, focusing on possible redistribution of copyrighted text. We present experiments with a range of language models over a collection of popular books and coding problems, providing a conservative characterization of the extent to which language models can redistribute these materials. Overall, this research highlights the need for further examination and the potential impact on future developments in natural language processing to ensure adherence to copyright regulations. Code is at https://github.com/coastalcph/CopyrightLLMs.</abstract>
       <url hash="bb894e52">2023.emnlp-main.458</url>
@@ -6436,7 +6436,7 @@
       <author><first>Wenting</first><last>Zhao</last></author>
       <author><first>Derek</first><last>Chen</last></author>
       <author><first>Saujas</first><last>Vaduguru</last></author>
-      <author><first>Alexander</first><last>Rush</last></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last></author>
       <author><first>Daniel</first><last>Fried</last></author>
       <pages>7426-7436</pages>
       <abstract>Large language models (LLMs) excel at processing and generating text and code. However, LLMs have had limited applicability in grounded task-oriented dialogue as they are difficult to steer toward task objectives and fail to handle novel grounding. We present a modular and interpretable grounded dialogue system that addresses these shortcomings by composing LLMs with a symbolic planner and grounded code execution. Our system, consists of a reader and planner: the reader leverages an LLM to convert partner utterances into executable code, calling functions that perform grounding. The translated code’s output is stored to track dialogue state, while a symbolic planner determines the next appropriate response. We evaluate our system’s performance on the demanding OneCommon dialogue task, involving collaborative reference resolution on abstract images of scattered dots. Our system substantially outperforms the previous state-of-the-art, including improving task success in human evaluations from 56% to 69% in the most challenging setting.</abstract>
@@ -6535,7 +6535,7 @@
       <title>Joint Entity and Relation Extraction with Span Pruning and Hypergraph Neural Networks</title>
       <author><first>Zhaohui</first><last>Yan</last></author>
       <author><first>Songlin</first><last>Yang</last></author>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last></author>
+      <author><first>Wei</first><last>Liu</last></author>
       <author><first>Kewei</first><last>Tu</last></author>
       <pages>7512-7526</pages>
       <abstract>Entity and Relation Extraction (ERE) is an important task in information extraction. Recent marker-based pipeline models achieve state-of-the-art performance, but still suffer from the error propagation issue. Also, most of current ERE models do not take into account higher-order interactions between multiple entities and relations, while higher-order modeling could be beneficial.In this work, we propose HyperGraph neural network for ERE (HGERE), which is built upon the PL-marker (a state-of-the-art marker-based pipleline model). To alleviate error propagation, we use a high-recall pruner mechanism to transfer the burden of entity identification and labeling from the NER module to the joint module of our model. For higher-order modeling, we build a hypergraph, where nodes are entities (provided by the span pruner) and relations thereof, and hyperedges encode interactions between two different relations or between a relation and its associated subject and object entities. We then run a hypergraph neural network for higher-order inference by applying message passing over the built hypergraph. Experiments on three widely used benchmarks (ACE2004, ACE2005 and SciERC) for ERE task show significant improvements over the previous state-of-the-art PL-marker.</abstract>
@@ -6609,7 +6609,7 @@
     </paper>
     <paper id="473">
       <title><fixed-case>R</fixed-case>o<fixed-case>B</fixed-case>o<fixed-case>C</fixed-case>o<fixed-case>P</fixed-case>: A Comprehensive <fixed-case>RO</fixed-case>mance <fixed-case>BO</fixed-case>rrowing <fixed-case>CO</fixed-case>gnate Package and Benchmark for Multilingual Cognate Identification</title>
-      <author><first>Liviu</first><last>Dinu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu</first><last>Dinu</last></author>
       <author><first>Ana</first><last>Uban</last></author>
       <author><first>Alina</first><last>Cristea</last></author>
       <author><first>Anca</first><last>Dinu</last></author>
@@ -6627,7 +6627,7 @@
       <title>Instructive Dialogue Summarization with Query Aggregations</title>
       <author><first>Bin</first><last>Wang</last></author>
       <author><first>Zhengyuan</first><last>Liu</last></author>
-      <author><first>Nancy</first><last>Chen</last></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last></author>
       <pages>7630-7653</pages>
       <abstract>Conventional dialogue summarization methods directly generate summaries and do not consider user’s specific interests. This poses challenges in cases where the users are more focused on particular topics or aspects. With the advancement of instruction-finetuned language models, we introduce instruction-tuning to dialogues to expand the capability set of dialogue summarization models. To overcome the scarcity of instructive dialogue summarization data, we propose a three-step approach to synthesize high-quality query-based summarization triples. This process involves summary-anchored query generation, query filtering and query-based summary generation. By training a unified model called InstructDS (Instructive Dialogue Summarization) on three summarization datasets with multi-purpose instructive triples, we expand the capability of dialogue summarization models. We evaluate our method on four datasets, including dialogue summarization and dialogue reading comprehension. Experimental results show that our approach outperforms the state-of-the-art models and even models with larger sizes. Additionally, our model exhibits higher generalizability and faithfulness, as confirmed by human subjective evaluations.</abstract>
       <url hash="3d34a422">2023.emnlp-main.474</url>
@@ -6741,9 +6741,9 @@
       <author><first>Mohammad</first><last>Shoeybi</last></author>
       <author><first>Yi</first><last>Dong</last></author>
       <author><first>Oleksii</first><last>Kuchaiev</last></author>
-      <author id="bo-li"><first>Bo</first><last>Li</last></author>
+      <author><first>Bo</first><last>Li</last></author>
       <author><first>Chaowei</first><last>Xiao</last></author>
-      <author><first>Anima</first><last>Anandkumar</last></author>
+      <author id="animashree-anandkumar"><first>Anima</first><last>Anandkumar</last></author>
       <author><first>Bryan</first><last>Catanzaro</last></author>
       <pages>7763-7786</pages>
       <abstract>Large decoder-only language models (LMs) can be largely improved in terms of perplexity by retrieval (e.g., RETRO), but its impact on text generation quality and downstream task accuracy is unclear. Thus, it is still an open question: shall we pretrain large autoregressive LMs with retrieval? To answer it, we perform a comprehensive study on a scalable pre-trained retrieval-augmented LM (i.e., RETRO) compared with standard GPT and retrieval-augmented GPT incorporated at fine-tuning or inference stages. We first provide the recipe to reproduce RETRO up to 9.5B parameters while retrieving a text corpus with 330B tokens. Based on that, we have the following novel findings: i) RETRO outperforms GPT on text generation with much less degeneration (i.e., repetition), moderately higher factual accuracy, and slightly lower toxicity with a nontoxic retrieval database. ii) On the LM Evaluation Harness benchmark, RETRO largely outperforms GPT on knowledge-intensive tasks, but is on par with GPT on other tasks. Furthermore, we introduce a simple variant of the model, RETRO++, which largely improves open-domain QA results of original RETRO (e.g., EM score +8.6 on Natural Question) and significantly outperforms retrieval-augmented GPT across different model sizes. Our findings highlight the promising direction of pretraining autoregressive LMs with retrieval as future foundation models. We release our implementation at: https://github.com/NVIDIA/Megatron-LM/tree/main/tools/retro.</abstract>
@@ -6757,7 +6757,7 @@
       <author><first>Xinyuan</first><last>Lu</last></author>
       <author><first>Liangming</first><last>Pan</last></author>
       <author><first>Qian</first><last>Liu</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Min-Yen</first><last>Kan</last></author>
       <pages>7787-7813</pages>
       <abstract>Current scientific fact-checking benchmarks exhibit several shortcomings, such as biases arising from crowd-sourced claims and an over-reliance on text-based evidence. We present SCITAB, a challenging evaluation dataset consisting of 1.2K expert-verified scientific claims that 1) originate from authentic scientific publications and 2) require compositional reasoning for verification. The claims are paired with evidence-containing scientific tables annotated with labels. Through extensive evaluations, we demonstrate that SCITAB poses a significant challenge to state-of-the-art models, including table-based pretraining models and large language models. All models except GPT-4 achieved performance barely above random guessing. Popular prompting techniques, such as Chain-of-Thought, do not achieve much performance gains on SCITAB. Our analysis uncovers several unique challenges posed by SCITAB, including table grounding, claim ambiguity, and compositional reasoning. Our codes and data are publicly available at https://github.com/XinyuanLu00/SciTab.</abstract>
@@ -6899,7 +6899,7 @@
     <paper id="493">
       <title>Empirical Study of Zero-Shot <fixed-case>NER</fixed-case> with <fixed-case>C</fixed-case>hat<fixed-case>GPT</fixed-case></title>
       <author><first>Tingyu</first><last>Xie</last></author>
-      <author id="qi-li"><first>Qi</first><last>Li</last></author>
+      <author><first>Qi</first><last>Li</last></author>
       <author><first>Jian</first><last>Zhang</last></author>
       <author><first>Yan</first><last>Zhang</last></author>
       <author><first>Zuozhu</first><last>Liu</last></author>
@@ -6928,7 +6928,7 @@
     <paper id="495">
       <title>Active Retrieval Augmented Generation</title>
       <author><first>Zhengbao</first><last>Jiang</last></author>
-      <author><first>Frank</first><last>Xu</last></author>
+      <author id="frank-f-xu"><first>Frank</first><last>Xu</last></author>
       <author><first>Luyu</first><last>Gao</last></author>
       <author><first>Zhiqing</first><last>Sun</last></author>
       <author><first>Qian</first><last>Liu</last></author>
@@ -6975,9 +6975,9 @@
     <paper id="498">
       <title>Enhancing Biomedical Lay Summarisation with External Knowledge Graphs</title>
       <author><first>Tomas</first><last>Goldsack</last></author>
-      <author id="zhihao-zhang"><first>Zhihao</first><last>Zhang</last></author>
+      <author><first>Zhihao</first><last>Zhang</last></author>
       <author><first>Chen</first><last>Tang</last></author>
-      <author><first>Carolina</first><last>Scarton</last></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last></author>
       <author><first>Chenghua</first><last>Lin</last></author>
       <pages>8016-8032</pages>
       <abstract>Previous approaches for automatic lay summarisation are exclusively reliant on the source article that, given it is written for a technical audience (e.g., researchers), is unlikely to explicitly define all technical concepts or state all of the background information that is relevant for a lay audience. We address this issue by augmenting eLife, an existing biomedical lay summarisation dataset, with article-specific knowledge graphs, each containing detailed information on relevant biomedical concepts. Using both automatic and human evaluations, we systematically investigate the effectiveness of three different approaches for incorporating knowledge graphs within lay summarisation models, with each method targeting a distinct area of the encoder-decoder model architecture. Our results confirm that integrating graph-based domain knowledge can significantly benefit lay summarisation by substantially increasing the readability of generated text and improving the explanation of technical concepts.</abstract>
@@ -7061,7 +7061,7 @@
       <author><first>Pascal</first><last>Denis</last></author>
       <author><first>Emmanuel</first><last>Vincent</last></author>
       <author><first>Simon</first><last>Ostermann</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>8099-8110</pages>
       <abstract>In multimodal understanding tasks, visual and linguistic ambiguities can arise. Visual ambiguity can occur when visual objects require a model to ground a referring expression in a video without strong supervision, while linguistic ambiguity can occur from changes in entities in action flows. As an example from the cooking domain, “oil” mixed with “salt” and “pepper” could later be referred to as a “mixture”. Without a clear visual-linguistic alignment, we cannot know which among several objects shown is referred to by the language expression “mixture”, and without resolved antecedents, we cannot pinpoint what the mixture is. We define this chicken-and-egg problem as Visual-linguistic Ambiguity. In this paper, we present Find2Find, a joint anaphora resolution and object localization dataset targeting the problem of <i>visual-linguistic ambiguity</i>, consisting of 500 anaphora-annotated recipes with corresponding videos. We present experimental results of a novel end-to-end joint multitask learning framework for Find2Find that fuses visual and textual information and shows improvements both for anaphora resolution and object localization with one joint model in multitask learning, as compared to a strong single-task baseline.</abstract>
       <url hash="559166b6">2023.emnlp-main.504</url>
@@ -7156,7 +7156,7 @@
     </paper>
     <paper id="510">
       <title><fixed-case>DSI</fixed-case>++: Updating Transformer Memory with New Documents</title>
-      <author><first>Sanket Vaibhav</first><last>Mehta</last></author>
+      <author id="sanket-vaibhav-mehta"><first>Sanket Vaibhav</first><last>Mehta</last></author>
       <author><first>Jai</first><last>Gupta</last></author>
       <author><first>Yi</first><last>Tay</last></author>
       <author><first>Mostafa</first><last>Dehghani</last></author>
@@ -7205,7 +7205,7 @@
     <paper id="513">
       <title>Homophone Disambiguation Reveals Patterns of Context Mixing in Speech Transformers</title>
       <author><first>Hosein</first><last>Mohebbi</last></author>
-      <author><first>Grzegorz</first><last>Chrupała</last></author>
+      <author id="grzegorz-chrupala"><first>Grzegorz</first><last>Chrupała</last></author>
       <author><first>Willem</first><last>Zuidema</last></author>
       <author><first>Afra</first><last>Alishahi</last></author>
       <pages>8249-8260</pages>
@@ -7287,7 +7287,7 @@
       <author><first>Sangwoo</first><last>Cho</last></author>
       <author><first>Xiaoyang</first><last>Wang</last></author>
       <author><first>Hassan</first><last>Foroosh</last></author>
-      <author id="fei-liu"><first>Fei</first><last>Liu</last></author>
+      <author><first>Fei</first><last>Liu</last></author>
       <pages>8344-8357</pages>
       <abstract>Human preference judgments are pivotal in guiding large language models (LLMs) to produce outputs that align with human values. Human evaluations are also used in summarization tasks to compare outputs from various systems, complementing existing automatic metrics. Despite their significance, however, there has been limited research probing these pairwise or <tex-math>k</tex-math>-wise comparisons. The collective impact and relative importance of factors such as output length, informativeness, fluency, and factual consistency are still not well understood. It is also unclear if there are other hidden factors influencing human judgments. In this paper, we conduct an in-depth examination of a collection of pairwise human judgments released by OpenAI. Utilizing the Bradley-Terry-Luce (BTL) model, we reveal the inherent preferences embedded in these human judgments. We find that the most favored factors vary across tasks and genres, whereas the least favored factors tend to be consistent, e.g., outputs are too brief, contain excessive off-focus content or hallucinated facts. Our findings have implications on the construction of balanced datasets in human preference evaluations, which is a crucial step in shaping the behaviors of future LLMs.</abstract>
       <url hash="78ef78bd">2023.emnlp-main.519</url>
@@ -7380,7 +7380,7 @@
       <author><first>Feng</first><last>Jiang</last></author>
       <author><first>Peifeng</first><last>Li</last></author>
       <author><first>Fang</first><last>Kong</last></author>
-      <author><first>Qiaoming</first><last>Zhu</last></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last></author>
       <pages>8484-8495</pages>
       <abstract>Dialogue discourse parsing aims to reflect the relation-based structure of dialogue by establishing discourse links according to discourse relations. To alleviate data sparsity, previous studies have adopted multitasking approaches to jointly learn dialogue discourse parsing with related tasks (e.g., reading comprehension) that require additional human annotation, thus limiting their generality. In this paper, we propose a multitasking framework that integrates dialogue discourse parsing with its neighboring task addressee recognition. Addressee recognition reveals the reply-to structure that partially overlaps with the relation-based structure, which can be exploited to facilitate relation-based structure learning. To this end, we first proposed a reinforcement learning agent to identify training examples from addressee recognition that are most helpful for dialog discourse parsing. Then, a task-aware structure transformer is designed to capture the shared and private dialogue structure of different tasks, thereby further promoting dialogue discourse parsing. Experimental results on both the Molweni and STAC datasets show that our proposed method can outperform the SOTA baselines. The code will be available at https://github.com/yxfanSuda/RLTST.</abstract>
       <url hash="cea58ba5">2023.emnlp-main.526</url>
@@ -7402,7 +7402,7 @@
     <paper id="528">
       <title><fixed-case>DALE</fixed-case>: Generative Data Augmentation for Low-Resource Legal <fixed-case>NLP</fixed-case></title>
       <author><first>Sreyan</first><last>Ghosh</last></author>
-      <author><first>Chandra Kiran Reddy</first><last>Evuru</last></author>
+      <author id="chandra-kiran-reddy-evuru"><first>Chandra Kiran Reddy</first><last>Evuru</last></author>
       <author><first>Sonal</first><last>Kumar</last></author>
       <author><first>S</first><last>Ramaneswaran</last></author>
       <author><first>S</first><last>Sakshi</last></author>
@@ -7451,7 +7451,7 @@
       <author><first>Begoña</first><last>Altuna</last></author>
       <author><first>Javier</first><last>Alvez</last></author>
       <author><first>Itziar</first><last>Gonzalez-Dios</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <pages>8596-8615</pages>
       <abstract>Although large language models (LLMs) have apparently acquired a certain level of grammatical knowledge and the ability to make generalizations, they fail to interpret negation, a crucial step in Natural Language Processing. We try to clarify the reasons for the sub-optimal performance of LLMs understanding negation. We introduce a large semi-automatically generated dataset of circa 400,000 descriptive sentences about commonsense knowledge that can be true or false in which negation is present in about 2/3 of the corpus in different forms. We have used our dataset with the largest available open LLMs in a zero-shot approach to grasp their generalization and inference capability and we have also fine-tuned some of the models to assess whether the understanding of negation can be trained. Our findings show that, while LLMs are proficient at classifying affirmative sentences, they struggle with negative sentences and lack a deep understanding of negation, often relying on superficial cues. Although fine-tuning the models on negative sentences improves their performance, the lack of generalization in handling negation is persistent, highlighting the ongoing challenges of LLMs regarding negation understanding and generalization. The dataset and code are publicly available.</abstract>
       <url hash="45979d26">2023.emnlp-main.531</url>
@@ -7465,7 +7465,7 @@
       <author><first>Mingtong</first><last>Liu</last></author>
       <author><first>Hongxiao</first><last>Zhang</last></author>
       <author><first>Yufeng</first><last>Chen</last></author>
-      <author><first>Jinan</first><last>Xu</last></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last></author>
       <author><first>Ming</first><last>Zhou</last></author>
       <pages>8616-8627</pages>
       <abstract>Sentence-level translation, document-level translation, translation memory, and terminology constrained translation play an important role in machine translation. Most of the previous work uses separate models or methods to solve these tasks, which is not conducive to knowledge transfer of different tasks and increases the complexity of system construction. In this work, we explore the potential of pre-trained language model in machine translation tasks and propose a Multi-Task Machine Translation (MT2) model to integrate these translation tasks. We design a novel translation-specific In-Context Learning (ICL) paradigm for model training, in which all of the translation tasks can be modeled as context-learning tasks that integrate contextual information for performance improvement. Specifically, we propose a retrieval and alignment method to obtain a large scale context-enhancement training data, then we train the model in an in-context learning manner. Furthermore, we adopt two context-dependent training strategies to encourage the model to better understand and utilize contextual information for translation. Extensive experiments on translation memory, terminology constrained translation, document-level translation, and few-shot domain-adaptation tasks demonstrate the superior performance of our model, verifying the effectiveness of our proposed approach.</abstract>
@@ -7513,7 +7513,7 @@
       <author><first>Iqra</first><last>Zahid</last></author>
       <author><first>Hao</first><last>Li</last></author>
       <author><first>Ian</first><last>Pratt-Hartmann</last></author>
-      <author><first>Riza</first><last>Batista-Navarro</last></author>
+      <author id="riza-theresa-batista-navarro"><first>Riza</first><last>Batista-Navarro</last></author>
       <pages>8680-8692</pages>
       <abstract>How do different generalised quantifiers affect the behaviour of transformer-based language models (TLMs)? The recent popularity of TLMs and the central role generalised quantifiers have traditionally played in linguistics and logic bring this question into particular focus. The current research investigating this subject has not utilised a task defined purely in a logical sense, and thus, has not captured the underlying logical significance of generalised quantifiers. Consequently, they have not answered the aforementioned question faithfully or adequately. Therefore, we investigate how different generalised quantifiers affect TLMs by employing a textual entailment problem defined in a purely logical sense, namely, model-checking with natural language. Our approach permits the automatic construction of datasets with respect to which we can assess the ability of TLMs to learn the meanings of generalised quantifiers. Our investigation reveals that TLMs generally can comprehend the logical semantics of the most common generalised quantifiers, but that distinct quantifiers influence TLMs in varying ways.</abstract>
       <url hash="43c0d84c">2023.emnlp-main.536</url>
@@ -7569,7 +7569,7 @@
       <author><first>Eric</first><last>Chang</last></author>
       <author><first>Amilcare</first><last>Gentili</last></author>
       <author><first>Julian</first><last>McAuley</last></author>
-      <author><first>Chun-Nan</first><last>Hsu</last></author>
+      <author id="chun-nan-hsu"><first>Chun-Nan</first><last>Hsu</last></author>
       <pages>8725-8744</pages>
       <abstract>Curated datasets for healthcare are often limited due to the need of human annotations from experts. In this paper, we present MedEval, a multi-level, multi-task, and multi-domain medical benchmark to facilitate the development of language models for healthcare. MedEval is comprehensive and consists of data from several healthcare systems and spans 35 human body regions from 8 examination modalities. With 22,779 collected sentences and 21,228 reports, we provide expert annotations at multiple levels, offering a granular potential usage of the data and supporting a wide range of tasks. Moreover, we systematically evaluated 10 generic and domain-specific language models under zero-shot and finetuning settings, from domain-adapted baselines in healthcare to general-purposed state-of-the-art large language models (e.g., ChatGPT). Our evaluations reveal varying effectiveness of the two categories of language models across different tasks, from which we notice the importance of instruction tuning for few-shot usage of large language models. Our investigation paves the way toward benchmarking language models for healthcare and provides valuable insights into the strengths and limitations of adopting large language models in medical domains, informing their practical applications and future advancements.</abstract>
       <url hash="c2264e30">2023.emnlp-main.540</url>
@@ -7591,7 +7591,7 @@
     <paper id="542">
       <title>Are Embedded Potatoes Still Vegetables? On the Limitations of <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Embeddings for Lexical Semantics</title>
       <author><first>Xuyou</first><last>Cheng</last></author>
-      <author><first>Michael</first><last>Schlichtkrull</last></author>
+      <author id="michael-schlichtkrull"><first>Michael</first><last>Schlichtkrull</last></author>
       <author><first>Guy</first><last>Emerson</last></author>
       <pages>8763-8775</pages>
       <abstract>Knowledge Base Embedding (KBE) models have been widely used to encode structured information from knowledge bases, including WordNet. However, the existing literature has predominantly focused on link prediction as the evaluation task, often neglecting exploration of the models’ semantic capabilities. In this paper, we investigate the potential disconnect between the performance of KBE models of WordNet on link prediction and their ability to encode semantic information, highlighting the limitations of current evaluation protocols. Our findings reveal that some top-performing KBE models on the WN18RR benchmark exhibit subpar results on two semantic tasks and two downstream tasks. These results demonstrate the inadequacy of link prediction benchmarks for evaluating the semantic capabilities of KBE models, suggesting the need for a more targeted assessment approach.</abstract>
@@ -7715,7 +7715,7 @@
       <author><first>Yftah</first><last>Ziser</last></author>
       <author><first>Anna</first><last>Korhonen</last></author>
       <author><first>Edoardo</first><last>Ponti</last></author>
-      <author><first>Shay</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay</first><last>Cohen</last></author>
       <pages>8914-8932</pages>
       <abstract>Hallucinations pose a significant challenge to the reliability of neural models for abstractive summarisation. While automatically generated summaries may be fluent, they often lack faithfulness to the original document. This issue becomes even more pronounced in low-resource languages, where summarisation requires cross-lingual transfer. With the existing faithful metrics focusing on English, even measuring the extent of this phenomenon in cross-lingual settings is hard. To address this, we first develop a novel metric, mFACT, evaluating the faithfulness of non-English summaries, leveraging translation-based transfer from multiple English faithfulness metrics. Through extensive experiments in multiple languages, we demonstrate that mFACT is best suited to detect hallucinations compared to alternative metrics. With mFACT, we assess a broad range of multilingual large language models, and find that they all tend to hallucinate often in languages different from English. We then propose a simple but effective method to reduce hallucinations in cross-lingual transfer, which weighs the loss of each training example by its faithfulness score. This method drastically increases both performance and faithfulness according to both automatic and human evaluation when compared to strong baselines for cross-lingual transfer such as MAD-X. Our code and dataset are available at https://github.com/yfqiu-nlp/mfact-summ.</abstract>
       <url hash="f4ca325d">2023.emnlp-main.551</url>
@@ -7765,7 +7765,7 @@
       <author><first>Zhiliang</first><last>Tian</last></author>
       <author><first>Xin</first><last>Niu</last></author>
       <author><first>Changjian</first><last>Wang</last></author>
-      <author id="dongsheng-li"><first>Dongsheng</first><last>Li</last></author>
+      <author><first>Dongsheng</first><last>Li</last></author>
       <author><first>Dacheng</first><last>Tao</last></author>
       <pages>8964-8974</pages>
       <abstract>Text classification tasks often encounter few-shot scenarios with limited labeled data, and addressing data scarcity is crucial. Data augmentation with mixup merges sample pairs to generate new pseudos, which can relieve the data deficiency issue in text classification. However, the quality of pseudo-samples generated by mixup exhibits significant variations. Most of the mixup methods fail to consider the varying degree of learning difficulty in different stages of training. And mixup generates new samples with one-hot labels, which encourages the model to produce a high prediction score for the correct class that is much larger than other classes, resulting in the model’s over-confidence. In this paper, we propose a self-evolution learning (SE) based mixup approach for data augmentation in text classification, which can generate more adaptive and model-friendly pseudo samples for the model training. SE caters to the growth of the model learning ability and adapts to the ability when generating training samples. To alleviate the model over-confidence, we introduce an instance-specific label smoothing regularization approach, which linearly interpolates the model’s output and one-hot labels of the original samples to generate new soft labels for label mixing up. Through experimental analysis, experiments show that our SE brings consistent and significant improvements upon different mixup methods. In-depth analyses demonstrate that SE enhances the model’s generalization ability.</abstract>
@@ -7828,7 +7828,7 @@
     </paper>
     <paper id="560">
       <title><fixed-case>CLAD</fixed-case>-<fixed-case>ST</fixed-case>: Contrastive Learning with Adversarial Data for Robust Speech Translation</title>
-      <author><first>Sathish</first><last>Indurthi</last></author>
+      <author id="sathish-reddy-indurthi"><first>Sathish</first><last>Indurthi</last></author>
       <author><first>Shamil</first><last>Chollampatt</last></author>
       <author><first>Ravi</first><last>Agrawal</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
@@ -7846,7 +7846,7 @@
       <author><first>Zhen</first><last>Wu</last></author>
       <author><first>Yawen</first><last>Ouyang</last></author>
       <author><first>Jianbing</first><last>Zhang</last></author>
-      <author><first>Xinyu</first><last>Dai</last></author>
+      <author id="xinyu-dai"><first>Xinyu</first><last>Dai</last></author>
       <pages>9057-9070</pages>
       <abstract>Multimodal Aspect-based Sentiment Analysis (MABSA) is a fine-grained Sentiment Analysis task, which has attracted growing research interests recently. Existing work mainly utilizes image information to improve the performance of MABSA task. However, most of the studies overestimate the importance of images since there are many noise images unrelated to the text in the dataset, which will have a negative impact on model learning. Although some work attempts to filter low-quality noise images by setting thresholds, relying on thresholds will inevitably filter out a lot of useful image information. Therefore, in this work, we focus on whether the negative impact of noisy images can be reduced without modifying the data. To achieve this goal, we borrow the idea of Curriculum Learning and propose a Multi-grained Multi-curriculum Denoising Framework (M2DF), which can achieve denoising by adjusting the order of training data. Extensive experimental results show that our framework consistently outperforms state-of-the-art work on three sub-tasks of MABSA.</abstract>
       <url hash="16412d46">2023.emnlp-main.561</url>
@@ -7859,7 +7859,7 @@
       <author><first>Siyuan</first><last>Chen</last></author>
       <author><first>Zhiling</first><last>Zhang</last></author>
       <author><first>Mengyue</first><last>Wu</last></author>
-      <author><first>Kenny</first><last>Zhu</last></author>
+      <author id="kenny-zhu"><first>Kenny</first><last>Zhu</last></author>
       <pages>9071-9084</pages>
       <abstract>Existing Mental Disease Detection (MDD) research largely studies the detection of a single disorder, overlooking the fact that mental diseases might occur in tandem. Many approaches are not backed by domain knowledge (e.g., psychiatric symptoms) and thus fail to produce interpretable results. To tackle these issues, we propose an MDD framework that is capable of learning the shared clues of all diseases, while also capturing the specificity of each single disease. The two-stream architecture which simultaneously processes text and symptom features can combine the strength of both modalities and offer knowledge-based explainability. Experiments on the detection of 7 diseases show that our model can boost detection performance by more than 10%, especially in relatively rare classes.</abstract>
       <url hash="863049d4">2023.emnlp-main.562</url>
@@ -8009,7 +8009,7 @@
     <paper id="573">
       <title>Causal Reasoning through Two Cognition Layers for Improving Generalization in Visual Question Answering</title>
       <author><first>Trang</first><last>Nguyen</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <pages>9221-9236</pages>
       <abstract>Generalization in Visual Question Answering (VQA) requires models to answer questions about images with contexts beyond the training distribution. Existing attempts primarily refine unimodal aspects, overlooking enhancements in multimodal aspects. Besides, diverse interpretations of the input lead to various modes of answer generation, highlighting the role of causal reasoning between interpreting and answering steps in VQA. Through this lens, we propose Cognitive pathways VQA (CopVQA) improving the multimodal predictions by emphasizing causal reasoning factors. CopVQA first operates a pool of pathways that capture diverse causal reasoning flows through interpreting and answering stages. Mirroring human cognition, we decompose the responsibility of each stage into distinct experts and a cognition-enabled component (CC). The two CCs strategically execute one expert for each stage at a time. Finally, we prioritize answer predictions governed by pathways involving both CCs while disregarding answers produced by either CC, thereby emphasizing causal reasoning and supporting generalization. Our experiments on real-life and medical data consistently verify that CopVQA improves VQA performance and generalization across baselines and domains. Notably, CopVQA achieves a new state-of-the-art (SOTA) on the PathVQA dataset and comparable accuracy to the current SOTA on VQA-CPv2, VQAv2, and VQA- RAD, with one-fourth of the model size.</abstract>
       <url hash="1c524687">2023.emnlp-main.573</url>
@@ -8023,7 +8023,7 @@
       <author><first>Kun</first><last>Zhou</last></author>
       <author><first>Zican</first><last>Dong</last></author>
       <author><first>Keming</first><last>Ye</last></author>
-      <author><first>Xin</first><last>Zhao</last></author>
+      <author id="wayne-xin-zhao"><first>Xin</first><last>Zhao</last></author>
       <author><first>Ji-Rong</first><last>Wen</last></author>
       <pages>9237-9251</pages>
       <abstract>In this paper, we aim to improve the reasoning ability of large language models (LLMs) over structured data in a unified way. Inspired by the studies on tool augmentation for LLMs, we develop an Iterative Reading-then-Reasoning (IRR) framework to solve question answering tasks based on structured data, called StructGPT. In this framework, we construct the specialized interfaces to collect relevant evidence from structured data (i.e., reading), and let LLMs concentrate on the reasoning task based on the collected information (i.e., reasoning). Specially, we propose an invoking-linearization-generation procedure to support LLMs in reasoning on the structured data with the help of the interfaces. By iterating this procedure with provided interfaces, our approach can gradually approach the target answers to a given query. Experiments conducted on three types of structured data show that StructGPT greatly improves the performance of LLMs, under the few-shot and zero-shot settings.</abstract>
@@ -8050,7 +8050,7 @@
       <author><first>Mrigank</first><last>Raman</last></author>
       <author><first>Pratyush</first><last>Maini</last></author>
       <author><first>J</first><last>Kolter</last></author>
-      <author><first>Zachary</first><last>Lipton</last></author>
+      <author id="zachary-c-lipton"><first>Zachary</first><last>Lipton</last></author>
       <author><first>Danish</first><last>Pruthi</last></author>
       <pages>9266-9286</pages>
       <abstract>In recent years, NLP practitioners have converged on the following practice: (i) import an off-the-shelf pretrained (masked) language model; (ii) append a multilayer perceptron atop the CLS token’s hidden representation (with randomly initialized weights); and (iii) fine-tune the entire model on a downstream task (MLP-FT). This procedure has produced massive gains on standard NLP benchmarks, but these models remain brittle, even to mild adversarial perturbations. In this work, we demonstrate surprising gains in adversarial robustness enjoyed by Model-tuning Via Prompts (MVP), an alternative method of adapting to downstream tasks. Rather than appending an MLP head to make output prediction, MVP appends a prompt template to the input, and makes prediction via text infilling/completion. Across 5 NLP datasets, 4 adversarial attacks, and 3 different models, MVP improves performance against adversarial substitutions by an average of 8% over standard methods and even outperforms adversarial training-based state-of-art defenses by 3.5%. By combining MVP with adversarial training, we achieve further improvements in adversarial robustness while maintaining performance on unperturbed examples. Finally, we conduct ablations to investigate the mechanism underlying these gains. Notably, we find that the main causes of vulnerability of MLP-FT can be attributed to the misalignment between pre-training and fine-tuning tasks, and the randomly initialized MLP parameters.</abstract>
@@ -8167,7 +8167,7 @@
       <author><first>Thibault</first><last>Sellam</last></author>
       <author><first>Aditya</first><last>Siddhant</last></author>
       <author><first>Dipanjan</first><last>Das</last></author>
-      <author><first>Ankur</first><last>Parikh</last></author>
+      <author id="ankur-parikh"><first>Ankur</first><last>Parikh</last></author>
       <pages>9397-9413</pages>
       <abstract>Reliable automatic evaluation of summarization systems is challenging due to the multifaceted and subjective nature of the task. This is especially the case for languages other than English, where human evaluations are scarce. In this work, we introduce SEAHORSE, a dataset for multilingual, multifaceted summarization evaluation. SEAHORSE consists of 96K summaries with human ratings along 6 dimensions of text quality: comprehensibility, repetition, grammar, attribution, main ideas, and conciseness, covering 6 languages, 9 systems, and 4 datasets. As a result of its size and scope, SEAHORSE can serve both as a benchmark to evaluate learnt metrics, as well as a large-scale resource for training such metrics. We show that metrics trained with SEAHORSE achieve strong performance on the out-of-domain meta-evaluation benchmarks TRUE (Honovich et al., 2022) and mFACE (Aharoni et al., 2022). We make the SEAHORSE dataset and metrics publicly available for future research on multilingual and multifaceted summarization evaluation.</abstract>
       <url hash="5c455923">2023.emnlp-main.584</url>
@@ -8259,7 +8259,7 @@
       <title>Multilingual estimation of political-party positioning: From label aggregation to long-input Transformers</title>
       <author><first>Dmitry</first><last>Nikolaev</last></author>
       <author><first>Tanise</first><last>Ceron</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <pages>9497-9511</pages>
       <abstract>Scaling analysis is a technique in computational political science that assigns a political actor (e.g. politician or party) a score on a predefined scale based on a (typically long) body of text (e.g. a parliamentary speech or an election manifesto). For example, political scientists have often used the left–right scale to systematically analyse political landscapes of different countries. NLP methods for automatic scaling analysis can find broad application provided they (i) are able to deal with long texts and (ii) work robustly across domains and languages. In this work, we implement and compare two approaches to automatic scaling analysis of political-party manifestos: label aggregation, a pipeline strategy relying on annotations of individual statements from the manifestos, and long-input-Transformer-based models, which compute scaling values directly from raw text. We carry out the analysis of the Comparative Manifestos Project dataset across 41 countries and 27 languages and find that the task can be efficiently solved by state-of-the-art models, with label aggregation producing the best results.</abstract>
       <url hash="6f7a6e38">2023.emnlp-main.591</url>
@@ -8307,7 +8307,7 @@
       <author><first>Santosh</first><last>T.y.s.s</last></author>
       <author><first>Oana</first><last>Ichim</last></author>
       <author><first>Isabella</first><last>Risini</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <author><first>Matthias</first><last>Grabmair</last></author>
       <pages>9558-9576</pages>
       <abstract>In legal NLP, Case Outcome Classification (COC) must not only be accurate but also trustworthy and explainable. Existing work in explainable COC has been limited to annotations by a single expert. However, it is well-known that lawyers may disagree in their assessment of case facts. We hence collect a novel dataset RaVE: Rationale Variation in ECHR, which is obtained from two experts in the domain of international human rights law, for whom we observe weak agreement. We study their disagreements and build a two-level task-independent taxonomy, supplemented with COC-specific subcategories. To our knowledge, this is the first work in the legal NLP that focuses on human label variation. We quantitatively assess different taxonomy categories and find that disagreements mainly stem from underspecification of the legal context, which poses challenges given the typically limited granularity and noise in COC metadata. We further assess the explainablility of state-of-the-art COC models on RaVE and observe limited agreement between models and experts. Overall, our case study reveals hitherto underappreciated complexities in creating benchmark datasets in legal NLP that revolve around identifying aspects of a case’s facts supposedly relevant for its outcome.</abstract>
@@ -8331,7 +8331,7 @@
     <paper id="596">
       <title>Statistical Depth for Ranking and Characterizing Transformer-Based Text Embeddings</title>
       <author><first>Parker</first><last>Seegmiller</last></author>
-      <author><first>Sarah Masud</first><last>Preum</last></author>
+      <author id="sarah-masud-preum"><first>Sarah Masud</first><last>Preum</last></author>
       <pages>9600-9611</pages>
       <abstract>The popularity of transformer-based text embeddings calls for better statistical tools for measuring distributions of such embeddings. One such tool would be a method for ranking texts within a corpus by centrality, i.e. assigning each text a number signifying how representative that text is of the corpus as a whole. However, an intrinsic center-outward ordering of high-dimensional text representations is not trivial. A <tex-math>\textit{statistical depth}</tex-math> is a function for ranking <tex-math>k</tex-math>-dimensional objects by measuring centrality with respect to some observed <tex-math>k</tex-math>-dimensional distribution. We adopt a statistical depth to measure distributions of transformer-based text embeddings, <tex-math>\textit{transformer-based text embedding (TTE) depth}</tex-math>, and introduce the practical use of this depth for both modeling and distributional inference in NLP pipelines. We first define TTE depth and an associated rank sum test for determining whether two corpora differ significantly in embedding space. We then use TTE depth for the task of in-context learning prompt selection, showing that this approach reliably improves performance over statistical baseline approaches across six text classification tasks. Finally, we use TTE depth and the associated rank sum test to characterize the distributions of synthesized and human-generated corpora, showing that five recent synthetic data augmentation processes cause a measurable distributional shift away from associated human-generated text.</abstract>
       <url hash="f795bdb5">2023.emnlp-main.596</url>
@@ -8386,9 +8386,9 @@
       <author><first>Philippe</first><last>Laban</last></author>
       <author><first>Wojciech</first><last>Kryscinski</last></author>
       <author><first>Divyansh</first><last>Agarwal</last></author>
-      <author><first>Alexander</first><last>Fabbri</last></author>
+      <author id="alexander-richard-fabbri"><first>Alexander</first><last>Fabbri</last></author>
       <author><first>Caiming</first><last>Xiong</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <author><first>Chien-Sheng</first><last>Wu</last></author>
       <pages>9662-9676</pages>
       <abstract>With the recent appearance of LLMs in practical settings, having methods that can effectively detect factual inconsistencies is crucial to reduce the propagation of misinformation and improve trust in model outputs. When testing on existing factual consistency benchmarks, we find that a few large language models (LLMs) perform competitively on classification benchmarks for factual inconsistency detection compared to traditional non-LLM methods. However, a closer analysis reveals issues with existing evaluation benchmarks, affecting evaluation precision. To address this, we propose a new protocol for inconsistency detection benchmark creation and implement it in a 10-domain benchmark called SummEdits. This new benchmark is 20 times more cost-effective per sample than previous benchmarks and highly reproducible, as we estimate inter-annotator agreement at about 0.9. Most LLMs struggle on SummEdits, with performance close to random chance. The best-performing model, GPT-4, is still 8% below estimated human performance, highlighting the gaps in LLMs’ ability to reason about facts and detect inconsistencies when they occur.</abstract>
@@ -8416,7 +8416,7 @@
       <author><first>Tin</first><last>Nguyen</last></author>
       <author><first>Jiannan</first><last>Xu</last></author>
       <author><first>Aayushi</first><last>Roy</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <author><first>Marine</first><last>Carpuat</last></author>
       <pages>9696-9717</pages>
       <abstract>Recent research at the intersection of AI explainability and fairness has focused on how explanations can improve human-plus-AI task performance as assessed by fairness measures. We propose to characterize what constitutes an explanation that is itself “fair” – an explanation that does not adversely impact specific populations. We formulate a novel evaluation method of “fair explanations” using not just accuracy and label time, but also psychological impact of explanations on different user groups across many metrics (mental discomfort, stereotype activation, and perceived workload). We apply this method in the context of content moderation of potential hate speech, and its differential impact on Asian vs. non-Asian proxy moderators, across explanation approaches (saliency map and counterfactual explanation). We find that saliency maps generally perform better and show less evidence of disparate impact (group) and individual unfairness than counterfactual explanations. Content warning: This paper contains examples of hate speech and racially discriminatory language. The authors do not support such content. Please consider your risk of discomfort carefully before continuing reading!</abstract>
@@ -8427,7 +8427,7 @@
     </paper>
     <paper id="603">
       <title>Bridging Background Knowledge Gaps in Translation with Automatic Explicitation</title>
-      <author><first>HyoJung</first><last>Han</last></author>
+      <author id="hyojung-han"><first>HyoJung</first><last>Han</last></author>
       <author><first>Jordan</first><last>Boyd-Graber</last></author>
       <author><first>Marine</first><last>Carpuat</last></author>
       <pages>9718-9735</pages>
@@ -8445,7 +8445,7 @@
       <author><first>Yufeng</first><last>Chen</last></author>
       <author><first>Jian</first><last>Liu</last></author>
       <author><first>Wenjuan</first><last>Han</last></author>
-      <author><first>Jinan</first><last>Xu</last></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last></author>
       <pages>9736-9748</pages>
       <abstract>Existing syntactically-controlled paraphrase generation (SPG) models perform promisingly with human-annotated or well-chosen syntactic templates. However, the difficulty of obtaining such templates actually hinders the practical application of SPG models. For one thing, the prohibitive cost makes it unfeasible to manually design decent templates for every source sentence. For another, the templates automatically retrieved by current heuristic methods are usually unreliable for SPG models to generate qualified paraphrases. To escape this dilemma, we propose a novel Quality-based Syntactic Template Retriever (QSTR) to retrieve templates based on the quality of the to-be-generated paraphrases. Furthermore, for situations requiring multiple paraphrases for each source sentence, we design a Diverse Templates Search (DTS) algorithm, which can enhance the diversity between paraphrases without sacrificing quality. Experiments demonstrate that QSTR can significantly surpass existing retrieval methods in generating high-quality paraphrases and even perform comparably with human-annotated templates in terms of reference-free metrics. Additionally, human evaluation and the performance on downstream tasks using our generated paraphrases for data augmentation showcase the potential of our QSTR and DTS algorithm in practical scenarios.</abstract>
       <url hash="13907f5c">2023.emnlp-main.604</url>
@@ -8499,7 +8499,7 @@
       <author><first>Aanisha</first><last>Bhattacharyya</last></author>
       <author><first>Yaman K</first><last>Singla</last></author>
       <author><first>Balaji</first><last>Krishnamurthy</last></author>
-      <author><first>Rajiv Ratn</first><last>Shah</last></author>
+      <author id="rajiv-shah"><first>Rajiv Ratn</first><last>Shah</last></author>
       <author><first>Changyou</first><last>Chen</last></author>
       <pages>9822-9839</pages>
       <abstract>Multimedia content, such as advertisements and story videos, exhibit a rich blend of creativity and multiple modalities. They incorporate elements like text, visuals, audio, and storytelling techniques, employing devices like emotions, symbolism, and slogans to convey meaning. There is a dearth of large annotated training datasets in the multimedia domain hindering the development of supervised learning models with satisfactory performance for real-world applications. On the other hand, the rise of large language models (LLMs) has witnessed remarkable zero-shot performance in various natural language processing (NLP) tasks, such as emotion classification, question answering, and topic classification. To leverage such advanced techniques to bridge this performance gap in multimedia understanding, we propose verbalizing long videos to generate their descriptions in natural language, followed by performing video-understanding tasks on the generated story as opposed to the original video. Through extensive experiments on fifteen video-understanding tasks, we demonstrate that our method, despite being zero-shot, achieves significantly better results than supervised baselines for video understanding. Furthermore, to alleviate a lack of story understanding benchmarks, we publicly release the first dataset on a crucial task in computational social science on persuasion strategy identification.</abstract>
@@ -8545,7 +8545,7 @@
       <author><first>Michal</first><last>Shmueli-Scheuer</last></author>
       <author><first>Dafna</first><last>Sheinwald</last></author>
       <author><first>Noam</first><last>Slonim</last></author>
-      <author><first>Liat</first><last>Ein-Dor</last></author>
+      <author id="liat-ein-dor"><first>Liat</first><last>Ein-Dor</last></author>
       <pages>9862-9877</pages>
       <abstract>The field of Natural Language Generation (NLG) suffers from a severe shortage of labeled data due to the extremely expensive and time-consuming process involved in manual annotation. A natural approach for coping with this problem is active learning (AL), a well-known machine learning technique for improving annotation efficiency by selectively choosing the most informative examples to label. However, while AL has been well-researched in the context of text classification, its application to NLG remains largely unexplored. In this paper, we present a first systematic study of active learning for NLG, considering a diverse set of tasks and multiple leading selection strategies, and harnessing a strong instruction-tuned model. Our results indicate that the performance of existing AL strategies is inconsistent, surpassing the baseline of random example selection in some cases but not in others. We highlight some notable differences between the classification and generation scenarios, and analyze the selection behaviors of existing AL strategies. Our findings motivate exploring novel approaches for applying AL to generation tasks.</abstract>
       <url hash="a11c243f">2023.emnlp-main.611</url>
@@ -8587,8 +8587,8 @@
       <author><first>Sachin</first><last>Kumar</last></author>
       <author><first>Hila</first><last>Gonen</last></author>
       <author><first>Jungo</first><last>Kasai</last></author>
-      <author><first>David</first><last>Mortensen</last></author>
-      <author><first>Noah</first><last>Smith</last></author>
+      <author id="david-r-mortensen"><first>David</first><last>Mortensen</last></author>
+      <author id="noah-a-smith"><first>Noah</first><last>Smith</last></author>
       <author><first>Yulia</first><last>Tsvetkov</last></author>
       <pages>9904-9923</pages>
       <abstract>Language models have graduated from being research prototypes to commercialized products offered as web APIs, and recent works have highlighted the multilingual capabilities of these products. The API vendors charge their users based on usage, more specifically on the number of “tokens” processed or generated by the underlying language models. What constitutes a token, however, is training data and model dependent with a large variance in the number of tokens required to convey the same information in different languages. In this work, we analyze the effect of this non-uniformity on the fairness of an API’s pricing policy across languages. We conduct a systematic analysis of the cost and utility of OpenAI’s language model API on multilingual benchmarks in 22 typologically diverse languages. We show evidence that speakers of a large number of the supported languages are overcharged while obtaining poorer results. These speakers tend to also come from regions where the APIs are less affordable, to begin with. Through these analyses, we aim to increase transparency around language model APIs’ pricing policies and encourage the vendors to make them more equitable.</abstract>
@@ -8646,7 +8646,7 @@
     <paper id="618">
       <title>Whispering <fixed-case>LL</fixed-case>a<fixed-case>MA</fixed-case>: A Cross-Modal Generative Error Correction Framework for Speech Recognition</title>
       <author><first>Srijith</first><last>Radhakrishnan</last></author>
-      <author><first>Chao-Han Huck</first><last>Yang</last></author>
+      <author id="chao-han-huck-yang"><first>Chao-Han Huck</first><last>Yang</last></author>
       <author><first>Sumeer Ahmad</first><last>Khan</last></author>
       <author><first>Rohit</first><last>Kumar</last></author>
       <author><first>Narsis A.</first><last>Kiani</last></author>
@@ -8662,7 +8662,7 @@
     <paper id="619">
       <title>Reducing Sequence Length by Predicting Edit Spans with Large Language Models</title>
       <author><first>Masahiro</first><last>Kaneko</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <pages>10017-10029</pages>
       <abstract>Large Language Models (LLMs) have demonstrated remarkable performance in various tasks and gained significant attention. LLMs are also used for local sequence transduction tasks, including grammatical error correction (GEC) and formality style transfer, where most tokens in a source text are kept unchanged. However, the models that generate all target tokens in such tasks have a tendency to simply copy the input text as is, without making needed changes, because the difference between input and output texts is minimal in the training data. This is also inefficient because the computational cost grows quadratically with the target sequence length with Transformer. This paper proposes predicting edit spans for the source text for local sequence transduction tasks. Representing an edit span with a position of the source text and corrected tokens, we can reduce the length of the target sequence and the computational cost for inference. We apply instruction tuning for LLMs on the supervision data of edit spans. Experiments show that the proposed method achieves comparable performance to the baseline in four tasks, paraphrasing, formality style transfer, GEC, and text simplification, despite reducing the length of the target text by as small as 21%. Furthermore, we report that the task-specific fine-tuning with the proposed method achieved state-of-the-art performance in the four tasks.</abstract>
       <url hash="6032a2e2">2023.emnlp-main.619</url>
@@ -8690,7 +8690,7 @@
       <title>Rethinking the Evaluation for Conversational Recommendation in the Era of Large Language Models</title>
       <author id="xiaolei-wang-renmin"><first>Xiaolei</first><last>Wang</last></author>
       <author><first>Xinyu</first><last>Tang</last></author>
-      <author><first>Xin</first><last>Zhao</last></author>
+      <author id="wayne-xin-zhao"><first>Xin</first><last>Zhao</last></author>
       <author><first>Jingyuan</first><last>Wang</last></author>
       <author><first>Ji-Rong</first><last>Wen</last></author>
       <pages>10052-10065</pages>
@@ -8742,7 +8742,7 @@
     <paper id="625">
       <title>Make Every Example Count: On the Stability and Utility of Self-Influence for Learning from Noisy <fixed-case>NLP</fixed-case> Datasets</title>
       <author><first>Irina</first><last>Bejan</last></author>
-      <author><first>Artem</first><last>Sokolov</last></author>
+      <author id="artem-sokolov"><first>Artem</first><last>Sokolov</last></author>
       <author><first>Katja</first><last>Filippova</last></author>
       <pages>10107-10121</pages>
       <abstract>Increasingly larger datasets have become a standard ingredient to advancing the state-of-the-art in NLP. However, data quality might have already become the bottleneck to unlock further gains. Given the diversity and the sizes of modern datasets, standard data filtering is not straight-forward to apply, because of the multifacetedness of the harmful data and elusiveness of filtering rules that would generalize across multiple tasks. We study the fitness of task-agnostic self-influence scores of training examples for data cleaning, analyze their efficacy in capturing naturally occurring outliers, and investigate to what extent self-influence based data cleaning can improve downstream performance in machine translation, question answering and text classification, building up on recent approaches to self-influence calculation and automated curriculum learning.</abstract>
@@ -8754,9 +8754,9 @@
     <paper id="626">
       <title>Appraising the Potential Uses and Harms of <fixed-case>LLM</fixed-case>s for Medical Systematic Reviews</title>
       <author><first>Hye</first><last>Yun</last></author>
-      <author><first>Iain</first><last>Marshall</last></author>
+      <author id="iain-marshall"><first>Iain</first><last>Marshall</last></author>
       <author><first>Thomas</first><last>Trikalinos</last></author>
-      <author><first>Byron</first><last>Wallace</last></author>
+      <author id="byron-c-wallace"><first>Byron</first><last>Wallace</last></author>
       <pages>10122-10139</pages>
       <abstract>Medical systematic reviews play a vital role in healthcare decision making and policy. However, their production is time-consuming, limiting the availability of high-quality and up-to-date evidence summaries. Recent advancements in LLMs offer the potential to automatically generate literature reviews on demand, addressing this issue. However, LLMs sometimes generate inaccurate (and potentially misleading) texts by hallucination or omission. In healthcare, this can make LLMs unusable at best and dangerous at worst. We conducted 16 interviews with international systematic review experts to characterize the perceived utility and risks of LLMs in the specific context of medical evidence reviews. Experts indicated that LLMs can assist in the writing process by drafting summaries, generating templates, distilling information, and crosschecking information. They also raised concerns regarding confidently composed but inaccurate LLM outputs and other potential downstream harms, including decreased accountability and proliferation of low-quality reviews. Informed by this qualitative analysis, we identify criteria for rigorous evaluation of biomedical LLMs aligned with domain expert views.</abstract>
       <url hash="320746ab">2023.emnlp-main.626</url>
@@ -8943,7 +8943,7 @@
       <author><first>Yatin</first><last>Nandwani</last></author>
       <author><first>Vineet</first><last>Kumar</last></author>
       <author><first>Dinesh</first><last>Raghu</last></author>
-      <author><first>Sachindra</first><last>Joshi</last></author>
+      <author id="sachindra-joshi"><first>Sachindra</first><last>Joshi</last></author>
       <author><first>Luis</first><last>Lastras</last></author>
       <pages>10335-10347</pages>
       <abstract>A major concern in using deep learning based generative models for document-grounded dialogs is the potential generation of responses that are not faithful to the underlying document. Existing automated metrics used for evaluating the faithfulness of response with respect to the grounding document measure the degree of similarity between the generated response and the document’s content. However, these automated metrics are far from being well aligned with human judgments. Therefore, to improve the measurement of faithfulness, we propose a new metric that utilizes (Conditional) Point-wise Mutual Information (PMI) between the generated response and the source document, conditioned on the dialogue. PMI quantifies the extent to which the document influences the generated response – with a higher PMI indicating a more faithful response. We build upon this idea to create a new decoding technique that incorporates PMI into the response generation process to predict more faithful responses. Our experiments on the BEGIN benchmark demonstrate an improved correlation of our metric with human evaluation. We also show that our decoding technique is effective in generating more faithful responses when compared to standard decoding techniques on a set of publicly available document-grounded dialog datasets.</abstract>
@@ -9158,7 +9158,7 @@
       <author><first>Zied</first><last>Bouraoui</last></author>
       <author><first>Na</first><last>Li</last></author>
       <author><first>Usashi</first><last>Chatterjee</last></author>
-      <author><first>Luis</first><last>Espinosa-Anke</last></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa-Anke</last></author>
       <author><first>Steven</first><last>Schockaert</last></author>
       <pages>10587-10596</pages>
       <abstract>Concepts play a central role in many applications. This includes settings where concepts have to be modelled in the absence of sentence context. Previous work has therefore focused on distilling decontextualised concept embeddings from language models. But concepts can be modelled from different perspectives, whereas concept embeddings typically mostly capture taxonomic structure. To address this issue, we propose a strategy for identifying what different concepts, from a potentially large concept vocabulary, have in common with others. We then represent concepts in terms of the properties they share with the other concepts. To demonstrate the practical usefulness of this way of modelling concepts, we consider the task of ultra-fine entity typing, which is a challenging multi-label classification problem. We show that by augmenting the label set with shared properties, we can improve the performance of the state-of-the-art models for this task.</abstract>
@@ -9170,7 +9170,7 @@
     <paper id="655">
       <title><fixed-case>ALD</fixed-case>i: Quantifying the <fixed-case>A</fixed-case>rabic Level of Dialectness of Text</title>
       <author><first>Amr</first><last>Keleg</last></author>
-      <author><first>Sharon</first><last>Goldwater</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
       <author><first>Walid</first><last>Magdy</last></author>
       <pages>10597-10611</pages>
       <abstract>Transcribed speech and user-generated text in Arabic typically contain a mixture of Modern Standard Arabic (MSA), the standardized language taught in schools, and Dialectal Arabic (DA), used in daily communications. To handle this variation, previous work in Arabic NLP has focused on Dialect Identification (DI) on the sentence or the token level. However, DI treats the task as binary, whereas we argue that Arabic speakers perceive a spectrum of dialectness, which we operationalize at the sentence level as the Arabic Level of Dialectness (ALDi), a continuous linguistic variable. We introduce the AOC-ALDi dataset (derived from the AOC dataset), containing 127,835 sentences (17% from news articles and 83% from user comments on those articles) which are manually labeled with their level of dialectness. We provide a detailed analysis of AOC-ALDi and show that a model trained on it can effectively identify levels of dialectness on a range of other corpora (including dialects and genres not included in AOC-ALDi), providing a more nuanced picture than traditional DI systems. Through case studies, we illustrate how ALDi can reveal Arabic speakers’ stylistic choices in different situations, a useful property for sociolinguistic analyses.</abstract>
@@ -9211,7 +9211,7 @@
     <paper id="658">
       <title>Cross-Lingual Consistency of Factual Knowledge in Multilingual Language Models</title>
       <author><first>Jirui</first><last>Qi</last></author>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <author><first>Arianna</first><last>Bisazza</last></author>
       <pages>10650-10666</pages>
       <abstract>Multilingual large-scale Pretrained Language Models (PLMs) have been shown to store considerable amounts of factual knowledge, but large variations are observed across languages. With the ultimate goal of ensuring that users with different language backgrounds obtain consistent feedback from the same model, we study the cross-lingual consistency (CLC) of factual knowledge in various multilingual PLMs. To this end, we propose a Ranking-based Consistency (RankC) metric to evaluate knowledge consistency across languages independently from accuracy. Using this metric, we conduct an in-depth analysis of the determining factors for CLC, both at model level and at language-pair level. Among other results, we find that increasing model size leads to higher factual probing accuracy in most languages, but does not improve cross-lingual consistency. Finally, we conduct a case study on CLC when new factual associations are inserted in the PLMs via model editing. Results on a small sample of facts inserted in English reveal a clear pattern whereby the new piece of knowledge transfers only to languages with which English has a high RankC score. All code and data are released at https://github.com/Betswish/Cross-Lingual-Consistency.</abstract>
@@ -9235,7 +9235,7 @@
       <title>Bridging the Digital Divide: Performance Variation across Socio-Economic Factors in Vision-Language Models</title>
       <author><first>Joan</first><last>Nwatu</last></author>
       <author><first>Oana</first><last>Ignat</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>10686-10702</pages>
       <abstract>Despite the impressive performance of current AI models reported across various tasks, performance reports often do not include evaluations of how these models perform on the specific groups that will be impacted by these technologies. Among the minority groups under-represented in AI, data from low-income households are often overlooked in data collection and model evaluation. We evaluate the performance of a state-of-the-art vision-language model (CLIP) on a geo-diverse dataset containing household images associated with different income values (DollarStreet) and show that performance inequality exists among households of different income levels. Our results indicate that performance for the poorer groups is consistently lower than the wealthier groups across various topics and countries. We highlight insights that can help mitigate these issues and propose actionable steps for economic-level inclusive AI development.</abstract>
       <url hash="1e86ee82">2023.emnlp-main.660</url>
@@ -9246,7 +9246,7 @@
     <paper id="661">
       <title>Conceptor-Aided Debiasing of Large Language Models</title>
       <author><first>Li</first><last>Yifei</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <author><first>João</first><last>Sedoc</last></author>
       <pages>10703-10727</pages>
       <abstract>Pre-trained large language models (LLMs) reflect the inherent social biases of their training corpus. Many methods have been proposed to mitigate this issue, but they often fail to debias or they sacrifice model accuracy. We use *conceptors*–a soft projection method–to identify and remove the bias subspace in LLMs such as BERT and GPT. We propose two methods of applying conceptors (1) bias subspace projection by post-processing by the conceptor NOT operation; and (2) a new architecture, conceptor-intervened BERT (CI-BERT), which explicitly incorporates the conceptor projection into all layers during training. We find that conceptor post-processing achieves state-of-the-art (SoTA) debiasing results while maintaining LLMs’ performance on the GLUE benchmark. Further, it is robust in various scenarios and can mitigate intersectional bias efficiently by its AND operation on the existing bias subspaces. Although CI-BERT’s training takes all layers’ bias into account and can beat its post-processing counterpart in bias mitigation, CI-BERT reduces the language model accuracy. We also show the importance of carefully constructing the bias subspace. The best results are obtained by removing outliers from the list of biased words, combining them (via the OR operation), and computing their embeddings using the sentences from a cleaner corpus.</abstract>
@@ -9258,7 +9258,7 @@
     <paper id="662">
       <title><fixed-case>AMR</fixed-case> Parsing is Far from Solved: <fixed-case>G</fixed-case>r<fixed-case>APES</fixed-case>, the Granular <fixed-case>AMR</fixed-case> Parsing Evaluation Suite</title>
       <author><first>Jonas</first><last>Groschwitz</last></author>
-      <author><first>Shay</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay</first><last>Cohen</last></author>
       <author><first>Lucia</first><last>Donatelli</last></author>
       <author><first>Meaghan</first><last>Fowlie</last></author>
       <pages>10728-10752</pages>
@@ -9479,7 +9479,7 @@
     <paper id="678">
       <title>We Are What We Repeatedly Do: Inducing and Deploying Habitual Schemas in Persona-Based Responses</title>
       <author><first>Benjamin</first><last>Kane</last></author>
-      <author><first>Lenhart</first><last>Schubert</last></author>
+      <author id="lenhart-schubert"><first>Lenhart</first><last>Schubert</last></author>
       <pages>10998-11016</pages>
       <abstract>Many practical applications of dialogue technology require the generation of responses according to a particular developer-specified persona. While a variety of personas can be elicited from recent large language models, the opaqueness and unpredictability of these models make it desirable to be able to specify personas in an explicit form. In previous work, personas have typically been represented as sets of one-off pieces of self-knowledge that are retrieved by the dialogue system for use in generation. However, in realistic human conversations, personas are often revealed through story-like narratives that involve rich habitual knowledge – knowledge about kinds of events that an agent often participates in (e.g., work activities, hobbies, sporting activities, favorite entertainments, etc.), including typical goals, sub-events, preconditions, and postconditions of those events. We capture such habitual knowledge using an explicit schema representation, and propose an approach to dialogue generation that retrieves relevant schemas to condition a large language model to generate persona-based responses. Furthermore, we demonstrate a method for bootstrapping the creation of such schemas by first generating generic passages from a set of simple facts, and then inducing schemas from the generated passages.</abstract>
       <url hash="14a6c867">2023.emnlp-main.678</url>
@@ -9492,7 +9492,7 @@
       <author><first>Qi</first><last>Jia</last></author>
       <author><first>Siyu</first><last>Ren</last></author>
       <author><first>Yizhu</first><last>Liu</last></author>
-      <author><first>Kenny</first><last>Zhu</last></author>
+      <author id="kenny-zhu"><first>Kenny</first><last>Zhu</last></author>
       <pages>11017-11031</pages>
       <abstract>Despite tremendous improvements in natural language generation, summarization models still suffer from the unfaithfulness issue. Previous work evaluates faithfulness either using models trained on the other tasks or in-domain synthetic data, or prompting a large model such as ChatGPT. This paper proposes to do zero-shot faithfulness evaluation simply with a moderately-sized foundation language model. We introduce a new metric FFLM, which is a combination of probability changes based on the intuition that prefixing a piece of text that is consistent with the output will increase the probability of predicting the output. Experiments show that FFLM performs competitively with or even outperforms ChatGPT on both inconsistency detection and faithfulness rating with 24x fewer parameters. FFLM also achieves improvements over other strong baselines.</abstract>
       <url hash="9188e3a7">2023.emnlp-main.679</url>
@@ -9544,7 +9544,7 @@
     <paper id="683">
       <title>A Predictive Factor Analysis of Social Biases and Task-Performance in Pretrained Masked Language Models</title>
       <author><first>Yi</first><last>Zhou</last></author>
-      <author><first>Jose</first><last>Camacho-Collados</last></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></author>
       <author><first>Danushka</first><last>Bollegala</last></author>
       <pages>11082-11100</pages>
       <abstract>Various types of social biases have been reported with pretrained Masked Language Models (MLMs) in prior work. However, multiple underlying factors are associated with an MLM such as its model size, size of the training data, training objectives, the domain from which pretraining data is sampled, tokenization, and languages present in the pretrained corpora, to name a few. It remains unclear as to which of those factors influence social biases that are learned by MLMs. To study the relationship between model factors and the social biases learned by an MLM, as well as the downstream task performance of the model, we conduct a comprehensive study over 39 pretrained MLMs covering different model sizes, training objectives, tokenization methods, training data domains and languages. Our results shed light on important factors often neglected in prior literature, such as tokenization or model objectives.</abstract>
@@ -9584,7 +9584,7 @@
     </paper>
     <paper id="686">
       <title><fixed-case>S</fixed-case>p<fixed-case>EL</fixed-case>: Structured Prediction for Entity Linking</title>
-      <author><first>Hassan</first><last>Shavarani</last></author>
+      <author id="hassan-s-shavarani"><first>Hassan</first><last>Shavarani</last></author>
       <author><first>Anoop</first><last>Sarkar</last></author>
       <pages>11123-11137</pages>
       <abstract>Entity linking is a prominent thread of research focused on structured data creation by linking spans of text to an ontology or knowledge source. We revisit the use of structured prediction for entity linking which classifies each individual input token as an entity, and aggregates the token predictions. Our system, called SpEL (Structured prediction for Entity Linking) is a state-of-the-art entity linking system that uses some new ideas to apply structured prediction to the task of entity linking including: two refined fine-tuning steps; a context sensitive prediction aggregation strategy; reduction of the size of the model’s output vocabulary, and; we address a common problem in entity-linking systems where there is a training vs. inference tokenization mismatch. Our experiments show that we can outperform the state-of-the-art on the commonly used AIDA benchmark dataset for entity linking to Wikipedia. Our method is also very compute efficient in terms of number of parameters and speed of inference.</abstract>
@@ -9598,7 +9598,7 @@
       <author><first>Philipp</first><last>Heinisch</last></author>
       <author><first>Matthias</first><last>Orlikowski</last></author>
       <author><first>Julia</first><last>Romberg</last></author>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <pages>11138-11154</pages>
       <abstract>Many annotation tasks in natural language processing are highly subjective in that there can be different valid and justified perspectives on what is a proper label for a given example. This also applies to the judgment of argument quality, where the assignment of a single ground truth is often questionable. At the same time, there are generally accepted concepts behind argumentation that form a common ground. To best represent the interplay of individual and shared perspectives, we consider a continuum of approaches ranging from models that fully aggregate perspectives into a majority label to “share nothing”-architectures in which each annotator is considered in isolation from all other annotators. In between these extremes, inspired by models used in the field of recommender systems, we investigate the extent to which architectures that predict labels for single annotators but include layers that model the relations between different annotators are beneficial. By means of two tasks of argument quality classification (argument concreteness and validity/novelty of conclusions), we show that recommender architectures increase the averaged annotator-individual F1-scores up to 43% over a majority-label model. Our findings indicate that approaches to subjectivity can benefit from relating individual perspectives.</abstract>
       <url hash="9d23d7b4">2023.emnlp-main.687</url>
@@ -9621,8 +9621,8 @@
     </paper>
     <paper id="689">
       <title>clembench: Using Game Play to Evaluate Chat-Optimized Language Models as Conversational Agents</title>
-      <author><first>Kranti</first><last>Chalamalasetti</last></author>
-      <author><first>Jana</first><last>Götze</last></author>
+      <author id="kranti-chalamalasetti"><first>Kranti</first><last>Chalamalasetti</last></author>
+      <author id="jana-gotze"><first>Jana</first><last>Götze</last></author>
       <author><first>Sherzod</first><last>Hakimov</last></author>
       <author><first>Brielen</first><last>Madureira</last></author>
       <author><first>Philipp</first><last>Sadler</last></author>
@@ -9661,7 +9661,7 @@
     <paper id="692">
       <title>Anchoring Fine-tuning of Sentence Transformer with Semantic Label Information for Efficient Truly Few-shot Classification</title>
       <author><first>Amalie</first><last>Pauli</last></author>
-      <author><first>Leon</first><last>Derczynski</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
       <author><first>Ira</first><last>Assent</last></author>
       <pages>11254-11264</pages>
       <abstract>Few-shot classification is a powerful technique, but training requires substantial computing power and data. We propose an efficient method with small model sizes and less training data with only 2-8 training instances per class. Our proposed method, AncSetFit, targets low data scenarios by anchoring the task and label information through sentence embeddings in fine-tuning a Sentence Transformer model. It uses contrastive learning and a triplet loss to enforce training instances of a class to be closest to its own textual semantic label information in the embedding space - and thereby learning to embed different class instances more distinct. AncSetFit obtains strong performance in data-sparse scenarios compared to existing methods across SST-5, Emotion detection, and AG News data, even with just two examples per class.</abstract>
@@ -9675,10 +9675,10 @@
       <author><first>Jon</first><last>Saad-Falcon</last></author>
       <author><first>Omar</first><last>Khattab</last></author>
       <author><first>Keshav</first><last>Santhanam</last></author>
-      <author><first>Radu</first><last>Florian</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
       <author><first>Martin</first><last>Franz</last></author>
-      <author><first>Salim</first><last>Roukos</last></author>
-      <author><first>Avirup</first><last>Sil</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
+      <author id="avirup-sil"><first>Avirup</first><last>Sil</last></author>
       <author><first>Md</first><last>Sultan</last></author>
       <author><first>Christopher</first><last>Potts</last></author>
       <pages>11265-11279</pages>
@@ -9750,7 +9750,7 @@
       <title>An Integrative Survey on Mental Health Conversational Agents to Bridge Computer Science and Medical Perspectives</title>
       <author><first>Young Min</first><last>Cho</last></author>
       <author><first>Sunny</first><last>Rai</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <author><first>João</first><last>Sedoc</last></author>
       <author><first>Sharath</first><last>Guntuku</last></author>
       <pages>11346-11369</pages>
@@ -9808,7 +9808,7 @@
       <author><first>Guoqing</first><last>Zheng</last></author>
       <author><first>Victor</first><last>Dibia</last></author>
       <author><first>Ahmed</first><last>Awadallah</last></author>
-      <author><first>Paul</first><last>Bennett</last></author>
+      <author id="paul-bennett"><first>Paul</first><last>Bennett</last></author>
       <pages>11445-11475</pages>
       <abstract>The remarkable abilities of large language models (LLMs) like ChatGPT and GPT-4 partially stem from the post-training processes involving human preferences encoded within a reward model as part of a Reinforcement Learning from Human Feedback (RLHF) regimen. These reward models (RMs) often lack direct knowledge of why, or under what principles, the preferences annotations were made. In this study, we identify principles that guide RMs to better align with human preferences, and then develop an axiomatic framework to generate a rich variety of preference signals to uphold them. We use these axiomatic signals to train a model for the scoring answers to longform questions. Our approach yields a <b>Preference Model</b> with only about 220M parameters that agrees with gold human-annotated preference labels more often than GPT-4. The contributions of this work include: training a standalone preference model that can score human- and LLM-generated answers on the same scale; developing an axiomatic framework for generating training data pairs tailored to certain principles; and showing that a small amount of axiomatic signals can help small models outperform GPT-4 in preference scoring. We intend to release our axiomatic data and model.</abstract>
       <url hash="3bc18db6">2023.emnlp-main.702</url>
@@ -9846,7 +9846,7 @@
       <author><first>Marco</first><last>Gaido</last></author>
       <author><first>Sara</first><last>Papi</last></author>
       <author><first>Mauro</first><last>Cettolo</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Luisa</first><last>Bentivogli</last></author>
       <pages>11505-11517</pages>
       <abstract>When translating words referring to the speaker, speech translation (ST) systems should not resort to default masculine generics nor rely on potentially misleading vocal traits. Rather, they should assign gender according to the speakers’ preference. The existing solutions to do so, though effective, are hardly feasible in practice as they involve dedicated model re-training on gender-labeled ST data. To overcome these limitations, we propose the first inference-time solution to control speaker-related gender inflections in ST. Our approach partially replaces the (biased) internal language model (LM) implicitly learned by the ST decoder with gender-specific external LMs. Experiments on en<tex-math>\rightarrow</tex-math>es/fr/it show that our solution outperforms the base models and the best training-time mitigation strategy by up to 31.0 and 1.6 points in gender accuracy, respectively, for feminine forms. The gains are even larger (up to 32.0 and 3.4) in the challenging condition where speakers’ vocal traits conflict with their gender.</abstract>
@@ -9920,7 +9920,7 @@
     <paper id="710">
       <title><fixed-case>B</fixed-case>io<fixed-case>FEG</fixed-case>: Generate Latent Features for Biomedical Entity Linking</title>
       <author><first>Xuhui</first><last>Sui</last></author>
-      <author><first>Ying</first><last>Zhang</last></author>
+      <author id="ying-zhang"><first>Ying</first><last>Zhang</last></author>
       <author><first>Xiangrui</first><last>Cai</last></author>
       <author><first>Kehui</first><last>Song</last></author>
       <author><first>Baohang</first><last>Zhou</last></author>
@@ -10034,8 +10034,8 @@
       <author><first>Devamanyu</first><last>Hazarika</last></author>
       <author><first>Shikib</first><last>Mehri</last></author>
       <author><first>Seokhwan</first><last>Kim</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <author><first>Mahdi</first><last>Namazifar</last></author>
       <pages>11709-11737</pages>
       <abstract>Instruction-based multitasking has played a critical role in the success of large language models (LLMs) in multi-turn dialog applications. While publicly available LLMs have shown promising performance, when exposed to complex instructions with multiple constraints, they lag against state-of-the-art models like ChatGPT. In this work, we hypothesize that the availability of large-scale complex demonstrations is crucial in bridging this gap. Focusing on dialog applications, we propose a novel framework, CESAR, that unifies a large number of dialog tasks in the same format and allows programmatic induction of complex instructions without any manual effort. We apply CESAR on InstructDial, a benchmark for instruction-based dialog tasks. We further enhance InstructDial with new datasets and tasks and utilize CESAR to induce complex tasks with compositional instructions. This results in a new benchmark called InstructDial++, which includes 63 datasets with 86 basic tasks and 68 composite tasks. Through rigorous experiments, we demonstrate the scalability of CESAR in providing rich instructions. Models trained on InstructDial++ can follow compositional prompts, such as prompts that ask for multiple stylistic constraints.</abstract>
@@ -10067,7 +10067,7 @@
       <author><first>Yu</first><last>Hou</last></author>
       <author><first>Nischal</first><last>Chandra</last></author>
       <author><first>Marjorie</first><last>Freedman</last></author>
-      <author><first>Ralph</first><last>Weischedel</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
       <author><first>Nanyun</first><last>Peng</last></author>
       <pages>11753-11770</pages>
       <abstract>Multimodal counterfactual reasoning is a vital yet challenging ability for AI systems. It involves predicting the outcomes of hypothetical circumstances based on vision and language inputs, which enables AI models to learn from failures and explore hypothetical scenarios. Despite its importance, there are only a few datasets targeting the counterfactual reasoning abilities of multimodal models. Among them, they only cover reasoning over synthetic environments or specific types of events (e.g. traffic collisions), making them hard to reliably benchmark the model generalization ability in diverse real-world scenarios and reasoning dimensions. To overcome these limitations, we develop a video question answering dataset, ACQUIRED: it consists of 3.9K annotated videos, encompassing a wide range of event types and incorporating both first and third-person viewpoints, which ensures a focus on real-world diversity. In addition, each video is annotated with questions that span three distinct dimensions of reasoning, including physical, social, and temporal, which can comprehensively evaluate the model counterfactual abilities along multiple aspects. We benchmark our dataset against several state-of-the-art language-only and multimodal models and experimental results demonstrate a significant performance gap (&gt;13%) between models and humans. The findings suggest that multimodal counterfactual reasoning remains an open challenge and ACQUIRED is a comprehensive and reliable benchmark for inspiring future research in this direction.</abstract>
@@ -10106,7 +10106,7 @@
       <author><first>Jochen</first><last>De Weerdt</last></author>
       <author><first>Kristof</first><last>Coussement</last></author>
       <author><first>Arno</first><last>De Caigny</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>11792-11806</pages>
       <abstract>We introduce CORE, a dataset for few-shot relation classification (RC) focused on company relations and business entities. CORE includes 4,708 instances of 12 relation types with corresponding textual evidence extracted from company Wikipedia pages. Company names and business entities pose a challenge for few-shot RC models due to the rich and diverse information associated with them. For example, a company name may represent the legal entity, products, people, or business divisions depending on the context. Therefore, deriving the relation type between entities is highly dependent on textual context. To evaluate the performance of state-of-the-art RC models on the CORE dataset, we conduct experiments in the few-shot domain adaptation setting. Our results reveal substantial performance gaps, confirming that models trained on different domains struggle to adapt to CORE. Interestingly, we find that models trained on CORE showcase improved out-of-domain performance, which highlights the importance of high-quality data for robust domain generalization. Specifically, the information richness embedded in business entities allows models to focus on contextual nuances, reducing their reliance on superficial clues such as relation-specific verbs. In addition to the dataset, we provide relevant code snippets to facilitate reproducibility and encourage further research in the field. The CORE dataset and code are publicly available at <url>https://github.com/pnborchert/CORE</url>.</abstract>
       <url hash="592f40c5">2023.emnlp-main.722</url>
@@ -10241,7 +10241,7 @@
       <title>An Empirical Study of Translation Hypothesis Ensembling with Large Language Models</title>
       <author><first>António</first><last>Farinhas</last></author>
       <author><first>José</first><last>de Souza</last></author>
-      <author><first>Andre</first><last>Martins</last></author>
+      <author id="andre-f-t-martins"><first>Andre</first><last>Martins</last></author>
       <pages>11956-11970</pages>
       <abstract>Large language models (LLMs) are becoming a one-fits-many solution, but they sometimes hallucinate or produce unreliable output. In this paper, we investigate how hypothesis ensembling can improve the quality of the generated text for the specific problem of LLM-based machine translation. We experiment with several techniques for ensembling hypotheses produced by LLMs such as ChatGPT, LLaMA, and Alpaca. We provide a comprehensive study along multiple dimensions, including the method to generate hypotheses (multiple prompts, temperature-based sampling, and beam search) and the strategy to produce the final translation (instruction-based, quality-based reranking, and minimum Bayes risk (MBR) decoding). Our results show that MBR decoding is a very effective method, that translation quality can be improved using a small number of samples, and that instruction tuning has a strong impact on the relation between the diversity of the hypotheses and the sampling temperature.</abstract>
       <url hash="515c472f">2023.emnlp-main.733</url>
@@ -10256,7 +10256,7 @@
       <author><first>Seungjoo</first><last>Lee</last></author>
       <author><first>Sungjoon</first><last>Park</last></author>
       <author><first>Yunxin</first><last>Liu</last></author>
-      <author><first>Jinho</first><last>Choi</last></author>
+      <author id="jinho-d-choi"><first>Jinho</first><last>Choi</last></author>
       <author><first>Sung-Ju</first><last>Lee</last></author>
       <pages>11971-11988</pages>
       <abstract>Psychiatrists diagnose mental disorders via the linguistic use of patients. Still, due to data privacy, existing passive mental health monitoring systems use alternative features such as activity, app usage, and location via mobile devices. We propose FedTherapist, a mobile mental health monitoring system that utilizes continuous speech and keyboard input in a privacy-preserving way via federated learning. We explore multiple model designs by comparing their performance and overhead for FedTherapist to overcome the complex nature of on-device language model training on smartphones. We further propose a Context-Aware Language Learning (CALL) methodology to effectively utilize smartphones’ large and noisy text for mental health signal sensing. Our IRB-approved evaluation of the prediction of self-reported depression, stress, anxiety, and mood from 46 participants shows higher accuracy of FedTherapist compared with the performance with non-language features, achieving 0.15 AUROC improvement and 8.21% MAE reduction.</abstract>
@@ -10291,7 +10291,7 @@
       <author><first>Hao</first><last>Yu</last></author>
       <author><first>Jiuyi</first><last>Li</last></author>
       <author><first>Jinsong</first><last>Su</last></author>
-      <author><first>Degen</first><last>Huang</last></author>
+      <author id="degen-huang"><first>Degen</first><last>Huang</last></author>
       <pages>12011-12027</pages>
       <abstract>A persistent goal of multilingual neural machine translation (MNMT) is to continually adapt the model to support new language pairs or improve some current language pairs without accessing the previous training data. To achieve this, the existing methods primarily focus on preventing catastrophic forgetting by making compromises between the original and new language pairs, leading to sub-optimal performance on both translation tasks. To mitigate this problem, we propose a dual importance-based model division method to divide the model parameters into two parts and separately model the translation of the original and new tasks. Specifically, we first remove the parameters that are negligible to the original tasks but essential to the new tasks to obtain a pruned model, which is responsible for the original translation tasks. Then we expand the pruned model with external parameters and fine-tune the newly added parameters with new training data. The whole fine-tuned model will be used for the new translation tasks. Experimental results show that our method can efficiently adapt the original model to various new translation tasks while retaining the performance of the original tasks. Further analyses demonstrate that our method consistently outperforms several strong baselines under different incremental translation scenarios.</abstract>
       <url hash="472dac2a">2023.emnlp-main.736</url>
@@ -10359,10 +10359,10 @@
       <author><first>Kalpesh</first><last>Krishna</last></author>
       <author><first>Xinxi</first><last>Lyu</last></author>
       <author><first>Mike</first><last>Lewis</last></author>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <author><first>Pang</first><last>Koh</last></author>
       <author><first>Mohit</first><last>Iyyer</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last></author>
       <pages>12076-12100</pages>
       <abstract>Evaluating the factuality of long-form text generated by large language models (LMs) is non-trivial because (1) generations often contain a mixture of supported and unsupported pieces of information, making binary judgments of quality inadequate, and (2) human evaluation is time-consuming and costly. In this paper, we introduce FACTSCORE, a new evaluation that breaks a generation into a series of atomic facts and computes the percentage of atomic facts supported by a reliable knowledge source. We conduct an extensive human evaluation to obtain FACTSCOREs of people biographies generated by several state-of-the-art commercial LMs—InstructGPT, ChatGPT, and the retrieval-augmented PerplexityAI—and report new analysis demonstrating the need for such a fine-grained score (e.g., ChatGPT only achieves 58%). Since human evaluation is costly, we also introduce an automated model that estimates FACTSCORE using retrieval and a strong language model, with less than a 2% error rate. Finally, we use this automated metric to evaluate 6,500 generations from a new set of 13 recent LMs that would have cost $26K if evaluated by humans, with various findings: GPT-4 and ChatGPT are more factual than public models, and Vicuna and Alpaca are some of the best public models. FACTSCORE is available for public use via ‘pip install factscore‘.</abstract>
@@ -10390,7 +10390,7 @@
       <author><first>Ye</first><last>Liu</last></author>
       <author><first>Chenwei</first><last>Zhang</last></author>
       <author><first>Tao</first><last>Zhang</last></author>
-      <author><first>Philip</first><last>Yu</last></author>
+      <author id="philip-s-yu"><first>Philip</first><last>Yu</last></author>
       <pages>12109-12119</pages>
       <abstract>While Chain-of-Thought prompting is popular in reasoning tasks, its application to Large Language Models (LLMs) in Natural Language Understanding (NLU) is under-explored. Motivated by multi-step reasoning of LLMs, we propose Coarse-to-Fine Chain-of-Thought (CoF-CoT) approach that breaks down NLU tasks into multiple reasoning steps where LLMs can learn to acquire and leverage essential concepts to solve tasks from different granularities. Moreover, we propose leveraging semantic-based Abstract Meaning Representation (AMR) structured knowledge as an intermediate step to capture the nuances and diverse structures of utterances, and to understand connections between their varying levels of granularity. Our proposed approach is demonstrated effective in assisting the LLMs adapt to the multi-grained NLU tasks under both zero-shot and few-shot multi-domain settings.</abstract>
       <url hash="38542bb4">2023.emnlp-main.743</url>
@@ -10456,7 +10456,7 @@
       <author><first>Guangliang</first><last>Liu</last></author>
       <author><first>Zhiyu</first><last>Xue</last></author>
       <author><first>Xitong</first><last>Zhang</last></author>
-      <author><first>Kristen</first><last>Johnson</last></author>
+      <author id="kristen-johnson"><first>Kristen</first><last>Johnson</last></author>
       <author><first>Rongrong</first><last>Wang</last></author>
       <pages>12178-12189</pages>
       <abstract>Fine-tuning pretrained language models (PLMs) for downstream tasks is a large-scale optimization problem, in which the choice of the training algorithm critically determines how well the trained model can generalize to unseen test data, especially in the context of few-shot learning. To achieve good generalization performance and avoid overfitting, techniques such as data augmentation and pruning are often applied. However, adding these regularizations necessitates heavy tuning of the hyperparameters of optimization algorithms, such as the popular Adam optimizer. In this paper, we propose a two-stage fine-tuning method, PAC-tuning, to address this optimization challenge. First, based on PAC-Bayes training, PAC-tuning directly minimizes the PAC-Bayes generalization bound to learn proper parameter distribution. Second, PAC-tuning modifies the gradient by injecting noise with the variance learned in the first stage into the model parameters during training, resulting in a variant of perturbed gradient descent (PGD). In the past, the few-shot scenario posed difficulties for PAC-Bayes training because the PAC-Bayes bound, when applied to large models with limited training data, might not be stringent. Our experimental results across 5 GLUE benchmark tasks demonstrate that PAC-tuning successfully handles the challenges of fine-tuning tasks and outperforms strong baseline methods by a visible margin, further confirming the potential to apply PAC training for any other settings where the Adam optimizer is currently used for training.</abstract>
@@ -10520,7 +10520,7 @@
     <paper id="753">
       <title>Select, Prompt, Filter: Distilling Large Language Models for Summarizing Conversations</title>
       <author><first>Minh-Quang</first><last>Pham</last></author>
-      <author><first>Sathish</first><last>Indurthi</last></author>
+      <author id="sathish-reddy-indurthi"><first>Sathish</first><last>Indurthi</last></author>
       <author><first>Shamil</first><last>Chollampatt</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <pages>12257-12265</pages>
@@ -10572,7 +10572,7 @@
       <title>Prompt as Triggers for Backdoor Attack: Examining the Vulnerability in Language Models</title>
       <author><first>Shuai</first><last>Zhao</last></author>
       <author><first>Jinming</first><last>Wen</last></author>
-      <author><first>Anh</first><last>Luu</last></author>
+      <author id="luu-anh-tuan"><first>Anh</first><last>Luu</last></author>
       <author><first>Junbo</first><last>Zhao</last></author>
       <author><first>Jie</first><last>Fu</last></author>
       <pages>12303-12317</pages>
@@ -10619,7 +10619,7 @@
       <author><first>Fajri</first><last>Koto</last></author>
       <author><first>Nurul</first><last>Aisyah</last></author>
       <author><first>Haonan</first><last>Li</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>12359-12374</pages>
       <abstract>Although large language models (LLMs) are often pre-trained on large-scale multilingual texts, their reasoning abilities and real-world knowledge are mainly evaluated based on English datasets. Assessing LLM capabilities beyond English is increasingly vital but hindered due to the lack of suitable datasets. In this work, we introduce IndoMMLU, the first multi-task language understanding benchmark for Indonesian culture and languages, which consists of questions from primary school to university entrance exams in Indonesia. By employing professional teachers, we obtain 14,981 questions across 64 tasks and education levels, with 46% of the questions focusing on assessing proficiency in the Indonesian language and knowledge of nine local languages and cultures in Indonesia. Our empirical evaluations show that GPT-3.5 only manages to pass the Indonesian primary school level, with limited knowledge of local Indonesian languages and culture. Other smaller models such as BLOOMZ and Falcon perform at even lower levels.</abstract>
       <url hash="013452cf">2023.emnlp-main.760</url>
@@ -10644,7 +10644,7 @@
       <title>Bridging Information-Theoretic and Geometric Compression in Language Models</title>
       <author><first>Emily</first><last>Cheng</last></author>
       <author><first>Corentin</first><last>Kervadec</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <pages>12397-12420</pages>
       <abstract>For a language model (LM) to faithfully model human language, it must compress vast, potentially infinite information into relatively few dimensions. We propose analyzing compression in (pre-trained) LMs from two points of view: geometric and information-theoretic. We demonstrate that the two views are highly correlated, such that the intrinsic geometric dimension of linguistic data predicts their coding length under the LM. We then show that, in turn, high compression of a linguistic dataset predicts rapid adaptation to that dataset, confirming that being able to compress linguistic information is an important part of successful LM performance. As a practical byproduct of our analysis, we evaluate a battery of intrinsic dimension estimators for the first time on linguistic data, showing that only some encapsulate the relationship between information-theoretic compression, geometric compression, and ease-of-adaptation.</abstract>
       <url hash="5717814a">2023.emnlp-main.762</url>
@@ -10655,7 +10655,7 @@
     <paper id="763">
       <title>Pre-training Language Models for Comparative Reasoning</title>
       <author><first>Mengxia</first><last>Yu</last></author>
-      <author id="zhihan-zhang"><first>Zhihan</first><last>Zhang</last></author>
+      <author><first>Zhihan</first><last>Zhang</last></author>
       <author><first>Wenhao</first><last>Yu</last></author>
       <author><first>Meng</first><last>Jiang</last></author>
       <pages>12421-12433</pages>
@@ -10674,7 +10674,7 @@
       <author><first>Wei</first><last>Zou</last></author>
       <author><first>Shimin</first><last>Tao</last></author>
       <author><first>Hao</first><last>Yang</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
       <author><first>Shujian</first><last>Huang</last></author>
       <pages>12434-12447</pages>
       <abstract>Machine translation (MT) quality estimation (QE) is a crucial task to estimate the quality of MT outputs when reference translations are unavailable. Many studies focus on generating pseudo data using large parallel corpus and achieve remarkable success in the supervised setting. However, pseudo data solutions are less satisfying in unsupervised scenarios because the pseudo labels are inaccurate or the pseudo translations differ from the real ones. To address these problems, we propose to generate pseudo data using the MT model with constrained beam search (CBSQE). CBSQE preserves the reference parts with high MT probabilities as correct translations, while the rest parts as the wrong ones for MT generation. Therefore, CBSQE can reduce the false negative labels caused by synonyms. Overall, beam search will prefer a more real hypothesis with a higher MT generation likelihood. Extensive experiments demonstrate that CBSQE outperforms strong baselines in both supervised and unsupervised settings. Analyses further show the superiority of CBSQE. The code is available at https://github.com/NJUNLP/njuqe.</abstract>
@@ -10688,7 +10688,7 @@
       <author><first>John</first><last>Morris</last></author>
       <author><first>Volodymyr</first><last>Kuleshov</last></author>
       <author><first>Vitaly</first><last>Shmatikov</last></author>
-      <author><first>Alexander</first><last>Rush</last></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last></author>
       <pages>12448-12460</pages>
       <abstract>How much private information do text embeddings reveal about the original text? We investigate the problem of embedding <i>inversion</i>, reconstructing the full text represented in dense text embeddings. We frame the problem as controlled generation: generating text that, when reembedded, is close to a fixed point in latent space. We find that although a naive model conditioned on the embedding performs poorly, a multi-step method that iteratively corrects and re-embeds text is able to recover 92% of 32-token text inputs exactly. We train our model to decode text embeddings from two state-of-the-art embedding models, and also show that our model can recover important personal information (full names) from a dataset of clinical notes.</abstract>
       <url hash="0433e208">2023.emnlp-main.765</url>
@@ -10798,7 +10798,7 @@
       <author><first>Ruochen</first><last>Zhang</last></author>
       <author><first>Samuel</first><last>Cahyawijaya</last></author>
       <author><first>Jan Christian Blaise</first><last>Cruz</last></author>
-      <author><first>Genta</first><last>Winata</last></author>
+      <author id="genta-indra-winata"><first>Genta</first><last>Winata</last></author>
       <author><first>Alham Fikri</first><last>Aji</last></author>
       <pages>12567-12582</pages>
       <abstract>Multilingual Large Language Models (LLMs) have recently shown great capabilities in a wide range of tasks, exhibiting state-of-the-art performance through zero-shot or few-shot prompting methods. While there have been extensive studies on their abilities in monolingual tasks, the investigation of their potential in the context of code-switching (CSW), the practice of alternating languages within an utterance, remains relatively uncharted. In this paper, we provide a comprehensive empirical analysis of various multilingual LLMs, benchmarking their performance across four tasks: sentiment analysis, machine translation, summarization and word-level language identification. Our results indicate that despite multilingual LLMs exhibiting promising outcomes in certain tasks using zero or few-shot prompting, they still underperform in comparison to fine-tuned models of much smaller scales. We argue that current “multilingualism’ in LLMs does not inherently imply proficiency with code-switching texts, calling for future research to bridge this discrepancy.</abstract>
@@ -10822,8 +10822,8 @@
     </paper>
     <paper id="776">
       <title>Identification of Multimodal Stance Towards Frames of Communication</title>
-      <author><first>Maxwell</first><last>Weinzierl</last></author>
-      <author><first>Sanda</first><last>Harabagiu</last></author>
+      <author id="maxwell-weinzierl"><first>Maxwell</first><last>Weinzierl</last></author>
+      <author id="sanda-harabagiu"><first>Sanda</first><last>Harabagiu</last></author>
       <pages>12597-12609</pages>
       <abstract>Frames of communication are often evoked in multimedia documents. When an author decides to add an image to a text, one or both of the modalities may evoke a communication frame. Moreover, when evoking the frame, the author also conveys her/his stance towards the frame. Until now, determining if the author is in favor of, against or has no stance towards the frame was performed automatically only when processing texts. This is due to the absence of stance annotations on multimedia documents. In this paper we introduce MMVax-Stance, a dataset of 11,300 multimedia documents retrieved from social media, which have stance annotations towards 113 different frames of communication. This dataset allowed us to experiment with several models of multimedia stance detection, which revealed important interactions between texts and images in the inference of stance towards communication frames. When inferring the text/image relations, a set of 46,606 synthetic examples of multimodal documents with known stance was generated. This greatly impacted the quality of identifying multimedia stance, yielding an improvement of 20% in F1-score.</abstract>
       <url hash="420ab511">2023.emnlp-main.776</url>
@@ -10952,7 +10952,7 @@
       <title>Rethinking Negative Pairs in Code Search</title>
       <author><first>Haochen</first><last>Li</last></author>
       <author><first>Xin</first><last>Zhou</last></author>
-      <author><first>Anh</first><last>Luu</last></author>
+      <author id="luu-anh-tuan"><first>Anh</first><last>Luu</last></author>
       <author><first>Chunyan</first><last>Miao</last></author>
       <pages>12760-12774</pages>
       <abstract>Recently, contrastive learning has become a key component in fine-tuning code search models for software development efficiency and effectiveness. It pulls together positive code snippets while pushing negative samples away given search queries. Among contrastive learning, InfoNCE is the most widely used loss function due to its better performance. However, the following problems in negative samples of InfoNCE may deteriorate its representation learning: 1) The existence of false negative samples in large code corpora due to duplications. 2). The failure to explicitly differentiate between the potential relevance of negative samples. As an example, a bubble sorting algorithm example is less “negative” than a file saving function for the quick sorting algorithm query. In this paper, we tackle the above problems by proposing a simple yet effective Soft-InfoNCE loss that inserts weight terms into InfoNCE. In our proposed loss function, we apply three methods to estimate the weights of negative pairs and show that the vanilla InfoNCE loss is a special case of Soft-InfoNCE. Theoretically, we analyze the effects of Soft-InfoNCE on controlling the distribution of learnt code representations and on deducing a more precise mutual information estimation. We furthermore discuss the superiority of proposed loss functions with other design alternatives. Extensive experiments demonstrate the effectiveness of Soft-InfoNCE and weights estimation methods under state-of-the-art code search models on a large-scale public dataset consisting of six programming languages.</abstract>
@@ -11055,7 +11055,7 @@
       <title>Context Compression for Auto-regressive Transformers with Sentinel Tokens</title>
       <author><first>Siyu</first><last>Ren</last></author>
       <author><first>Qi</first><last>Jia</last></author>
-      <author><first>Kenny</first><last>Zhu</last></author>
+      <author id="kenny-zhu"><first>Kenny</first><last>Zhu</last></author>
       <pages>12860-12867</pages>
       <abstract>The quadratic complexity of the attention module makes it gradually become the bulk of compute in Transformer-based LLMs during generation. Moreover, the excessive key-value cache that arises when dealing with long inputs also brings severe issues on memory footprint and inference latency. In this work, we propose a plug-and-play approach that is able to incrementally compress the intermediate activation of a specified span of tokens into compact ones, thereby reducing both memory and computational cost when processing subsequent context. Experiments on both in-domain language modeling and zero-shot open-ended document generation demonstrate the advantage of our approach over sparse attention baselines in terms of fluency, n-gram matching, and semantic similarity. At last, we comprehensively profile the benefit of context compression on improving the system throughout. Code is available at <url>https://github.com/DRSY/KV_Compression</url>.</abstract>
       <url hash="1bb561e3">2023.emnlp-main.794</url>
@@ -11097,7 +11097,7 @@
       <author><first>Terry</first><last>Ruas</last></author>
       <author><first>Mohamed</first><last>Abdalla</last></author>
       <author><first>Bela</first><last>Gipp</last></author>
-      <author><first>Saif</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
       <pages>12896-12913</pages>
       <abstract>Natural Language Processing (NLP) is poised to substantially influence the world. However, significant progress comes hand-in-hand with substantial risks. Addressing them requires broad engagement with various fields of study. Yet, little empirical work examines the state of such engagement (past or current). In this paper, we quantify the degree of influence between 23 fields of study and NLP (on each other). We analyzed ~77k NLP papers, ~3.1m citations from NLP papers to other papers, and ~1.8m citations from other papers to NLP papers. We show that, unlike most fields, the cross-field engagement of NLP, measured by our proposed Citation Field Diversity Index (CFDI), has declined from 0.58 in 1980 to 0.31 in 2022 (an all-time low). In addition, we find that NLP has grown more insular—citing increasingly more NLP papers and having fewer papers that act as bridges between fields. NLP citations are dominated by computer science; Less than 8% of NLP citations are to linguistics, and less than 3% are to math and psychology. These findings underscore NLP’s urgent need to reflect on its engagement with various fields.</abstract>
       <url hash="d9e7133b">2023.emnlp-main.797</url>
@@ -11155,7 +11155,7 @@
     <paper id="801">
       <title><fixed-case>M</fixed-case>ail<fixed-case>E</fixed-case>x: Email Event and Argument Extraction</title>
       <author><first>Saurabh</first><last>Srivastava</last></author>
-      <author><first>Gaurav</first><last>Singh</last></author>
+      <author id="gaurav-singh-tomar"><first>Gaurav</first><last>Singh</last></author>
       <author><first>Shou</first><last>Matsumoto</last></author>
       <author><first>Ali</first><last>Raz</last></author>
       <author><first>Paulo</first><last>Costa</last></author>
@@ -11220,7 +11220,7 @@
       <author><first>Jiangnan</first><last>Li</last></author>
       <author><first>Yice</first><last>Zhang</last></author>
       <author><first>Bin</first><last>Liang</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <author><first>Ruifeng</first><last>Xu</last></author>
       <pages>13043-13052</pages>
       <abstract>Recent efforts have endeavored to employ the sequence-to-sequence (Seq2Seq) model in Information Extraction (IE) due to its potential to tackle multiple IE tasks in a unified manner. Under this formalization, multiple structured objects are concatenated as the target sequence in a predefined order. However, structured objects, by their nature, constitute an unordered set. Consequently, this formalization introduces a potential order bias, which can impair model learning. Targeting this issue, this paper proposes a set learning approach that considers multiple permutations of structured objects to optimize set probability approximately. Notably, our approach does not require any modifications to model structures, making it easily integrated into existing generative IE frameworks. Experiments show that our method consistently improves existing frameworks on vast tasks and datasets.</abstract>
@@ -11324,7 +11324,7 @@
       <author><first>Rui</first><last>Hou</last></author>
       <author><first>Naman</first><last>Goyal</last></author>
       <author><first>Marjan</first><last>Ghazvininejad</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <author><first>Madian</first><last>Khabsa</last></author>
       <pages>13142-13152</pages>
       <abstract>Large multilingual language models typically rely on a single vocabulary shared across 100+ languages. As these models have increased in parameter count and depth, vocabulary size has remained largely unchanged. This <i>vocabulary bottleneck</i> limits the representational capabilities of multilingual models like XLM-R. In this paper, we introduce a new approach for scaling to very large multilingual vocabularies by de-emphasizing token sharing between languages with little lexical overlap and assigning vocabulary capacity to achieve sufficient coverage for each individual language. Tokenizations using our vocabulary are typically more semantically meaningful and shorter compared to XLM-R. Leveraging this improved vocabulary, we train XLM-V, a multilingual language model with a one million token vocabulary. XLM-V outperforms XLM-R on every task we tested on ranging from natural language inference (XNLI), question answering (MLQA, XQuAD, TyDiQA), to named entity recognition (WikiAnn). XLM-V is particularly effective on low-resource language tasks and outperforms XLM-R by 11.2% and 5.8% absolute on MasakhaNER and Americas NLI, respectively.</abstract>
@@ -11378,7 +11378,7 @@
       <title>Semantic Space Grounded Weighted Decoding for Multi-Attribute Controllable Dialogue Generation</title>
       <author><first>Zhiling</first><last>Zhang</last></author>
       <author><first>Mengyue</first><last>Wu</last></author>
-      <author><first>Kenny</first><last>Zhu</last></author>
+      <author id="kenny-zhu"><first>Kenny</first><last>Zhu</last></author>
       <pages>13230-13243</pages>
       <abstract>Controlling chatbot utterance generation with multiple attributes such as personalities, emotions and dialogue acts is a practically useful but under-studied problem. We propose a novel framework called DASC that possesses strong controllability with a weighted decoding paradigm, while improving generation quality with the grounding in an attribute semantics space. Generation with multiple attributes is then intuitively implemented with an interpolation of multiple attribute embeddings, which results in substantial reduction in the model sizes. Experiments show that DASC can achieve high control accuracy in generation task with the simultaneous control of 3 aspects while also producing interesting and reasonably sensible responses, even in an out-of-distribution robustness test.</abstract>
       <url hash="fd51f18f">2023.emnlp-main.817</url>
@@ -11494,7 +11494,7 @@
       <title><fixed-case>LLML</fixed-case>ingua: Compressing Prompts for Accelerated Inference of Large Language Models</title>
       <author><first>Huiqiang</first><last>Jiang</last></author>
       <author><first>Qianhui</first><last>Wu</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <author><first>Yuqing</first><last>Yang</last></author>
       <author><first>Lili</first><last>Qiu</last></author>
       <pages>13358-13376</pages>
@@ -11535,7 +11535,7 @@
     <paper id="828">
       <title>Characterizing and Verifying Scientific Claims: Qualitative Causal Structure is All You Need</title>
       <author><first>Jinxuan</first><last>Wu</last></author>
-      <author><first>Wenhan</first><last>Chao</last></author>
+      <author id="wenhan-chao"><first>Wenhan</first><last>Chao</last></author>
       <author><first>Xian</first><last>Zhou</last></author>
       <author><first>Zhunchen</first><last>Luo</last></author>
       <pages>13428-13439</pages>
@@ -11563,7 +11563,7 @@
       <author><first>Xingdi</first><last>Yuan</last></author>
       <author><first>Ziang</first><last>Xiao</last></author>
       <author><first>Marc-Alexandre</first><last>Côté</last></author>
-      <author><first>Peter</first><last>Jansen</last></author>
+      <author id="peter-jansen"><first>Peter</first><last>Jansen</last></author>
       <pages>13455-13471</pages>
       <abstract>In this work we investigate the capacity of language models to generate explicit, interpretable, and interactive world models of scientific and common-sense reasoning tasks. We operationalize this as a task of generating text games, expressed as hundreds of lines of Python code. To facilitate this task, we introduce ByteSized32, a corpus of 32 reasoning-focused text games totalling 20k lines of Python code. We empirically demonstrate that GPT-4 can use these games as templates for single-shot in-context learning, successfully producing runnable games on unseen topics in 28% of cases. When allowed to self-reflect on program errors, game runnability substantially increases to 58%. While evaluating simulation fidelity is labor intensive, we introduce a suite of automated metrics to assess game fidelity, technical validity, adherence to task specifications, and winnability, showing a high-degree of agreement with expert human ratings. We pose this as a challenge task to spur further development at the juncture of world modeling and code generation.</abstract>
       <url hash="5d123fd1">2023.emnlp-main.830</url>
@@ -11590,7 +11590,7 @@
     </paper>
     <paper id="832">
       <title><fixed-case>M</fixed-case>a<fixed-case>N</fixed-case>t<fixed-case>LE</fixed-case>: Model-agnostic Natural Language Explainer</title>
-      <author><first>Rakesh</first><last>Menon</last></author>
+      <author id="rakesh-r-menon"><first>Rakesh</first><last>Menon</last></author>
       <author><first>Kerem</first><last>Zaman</last></author>
       <author><first>Shashank</first><last>Srivastava</last></author>
       <pages>13493-13511</pages>
@@ -11770,7 +11770,7 @@
     </paper>
     <paper id="846">
       <title>Reading Order Matters: Information Extraction from Visually-rich Documents by Token Path Prediction</title>
-      <author id="chong-zhang"><first>Chong</first><last>Zhang</last></author>
+      <author><first>Chong</first><last>Zhang</last></author>
       <author><first>Ya</first><last>Guo</last></author>
       <author><first>Yi</first><last>Tu</last></author>
       <author><first>Huan</first><last>Chen</last></author>
@@ -11800,12 +11800,12 @@
     </paper>
     <paper id="848">
       <title>The Sentiment Problem: A Critical Survey towards Deconstructing Sentiment Analysis</title>
-      <author><first>Pranav</first><last>Venkit</last></author>
+      <author id="pranav-narayanan-venkit"><first>Pranav</first><last>Venkit</last></author>
       <author><first>Mukund</first><last>Srinath</last></author>
       <author><first>Sanjana</first><last>Gautam</last></author>
       <author><first>Saranya</first><last>Venkatraman</last></author>
       <author><first>Vipul</first><last>Gupta</last></author>
-      <author><first>Rebecca</first><last>Passonneau</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca</first><last>Passonneau</last></author>
       <author><first>Shomir</first><last>Wilson</last></author>
       <pages>13743-13763</pages>
       <abstract>We conduct an inquiry into the sociotechnical aspects of sentiment analysis (SA) by critically examining 189 peer-reviewed papers on their applications, models, and datasets. Our investigation stems from the recognition that SA has become an integral component of diverse sociotechnical systems, exerting influence on both social and technical users. By delving into sociological and technological literature on sentiment, we unveil distinct conceptualizations of this term in domains such as finance, government, and medicine. Our study exposes a lack of explicit definitions and frameworks for characterizing sentiment, resulting in potential challenges and biases. To tackle this issue, we propose an ethics sheet encompassing critical inquiries to guide practitioners in ensuring equitable utilization of SA. Our findings underscore the significance of adopting an interdisciplinary approach to defining sentiment in SA and offer a pragmatic solution for its implementation.</abstract>
@@ -11899,7 +11899,7 @@
     <paper id="855">
       <title>Finding Authentic Counterhate Arguments: A Case Study with Public Figures</title>
       <author><first>Abdullah</first><last>Albanyan</last></author>
-      <author><first>Ahmed</first><last>Hassan</last></author>
+      <author id="ahmed-hassan"><first>Ahmed</first><last>Hassan</last></author>
       <author><first>Eduardo</first><last>Blanco</last></author>
       <pages>13862-13876</pages>
       <abstract>We explore authentic counterhate arguments for online hateful content toward individuals. Previous efforts are limited to counterhate to fight against hateful content toward groups. Thus, we present a corpus of 54,816 hateful tweet-paragraph pairs, where the paragraphs are candidate counterhate arguments. The counterhate arguments are retrieved from 2,500 online articles from multiple sources. We propose a methodology that assures the authenticity of the counter argument and its specificity to the individual of interest. We show that finding arguments in online articles is an efficient alternative to counterhate generation approaches that may hallucinate unsupported arguments. We also present linguistic insights on the language used in counterhate arguments. Experimental results show promising results. It is more challenging, however, to identify counterhate arguments for hateful content toward individuals not included in the training set.</abstract>
@@ -11967,7 +11967,7 @@
       <author><first>Peng</first><last>Li</last></author>
       <author><first>Junpeng</first><last>Liu</last></author>
       <author><first>Maosong</first><last>Sun</last></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <pages>13938-13951</pages>
       <abstract>Although existing multilingual neural machine translation (MNMT) models have demonstrated remarkable performance to handle multiple translation directions in a single model and achieved zero-shot translation between language pairs unseen in training, they still suffer from relatively poor translation qualities for some language pairs. A practical scenario is that how to continually update MNMT models for both supervised and zero-shot translations when limited new data arrives. To this end, we propose a two-stage approach that encourages original models to acquire language-agnostic multilingual representations from new data, and preserves the model architecture without introducing parameters. Experimental results and further analysis demonstrate that our method can efficiently improve performance of existing MNMT models in translation directions where they are initially weak, and mitigates the degeneration in the original well-performing translation directions, offering flexibility in the real-world scenario.</abstract>
       <url hash="9dbd0e9f">2023.emnlp-main.860</url>
@@ -11994,15 +11994,15 @@
       <author><first>Idris</first><last>Abdulmumin</last></author>
       <author><first>Abinew Ali</first><last>Ayele</last></author>
       <author><first>Nedjma</first><last>Ousidhoum</last></author>
-      <author><first>David Ifeoluwa</first><last>Adelani</last></author>
+      <author id="david-ifeoluwa-adelani"><first>David Ifeoluwa</first><last>Adelani</last></author>
       <author><first>Seid Muhie</first><last>Yimam</last></author>
       <author><first>Ibrahim Sa'id</first><last>Ahmad</last></author>
       <author><first>Meriem</first><last>Beloucif</last></author>
-      <author><first>Saif M.</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif M.</first><last>Mohammad</last></author>
       <author><first>Sebastian</first><last>Ruder</last></author>
       <author><first>Oumaima</first><last>Hourrane</last></author>
       <author><first>Pavel</first><last>Brazdil</last></author>
-      <author><first>Alipio</first><last>Jorge</last></author>
+      <author id="alipio-jorge"><first>Alipio</first><last>Jorge</last></author>
       <author><first>Felermino Dário Mário António</first><last>Ali</last></author>
       <author><first>Davis</first><last>David</last></author>
       <author><first>Salomey</first><last>Osei</last></author>
@@ -12159,7 +12159,7 @@
       <author><first>Jian</first><last>Liu</last></author>
       <author><first>Weichang</first><last>Liu</last></author>
       <author><first>Yufeng</first><last>Chen</last></author>
-      <author><first>Jinan</first><last>Xu</last></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last></author>
       <author><first>Zhe</first><last>Zhao</last></author>
       <pages>14112-14123</pages>
       <abstract>Real-world named entity recognition (NER) datasets are notorious for their noisy nature, attributed to annotation errors, inconsistencies, and subjective interpretations. Such noises present a substantial challenge for traditional supervised learning methods. In this paper, we present a new and unified approach to tackle annotation noises for NER. Our method considers NER as a constituency tree parsing problem, utilizing a tree-structured Conditional Random Fields (CRFs) with uncertainty evaluation for integration. Through extensive experiments conducted on four real-world datasets, we demonstrate the effectiveness of our model in addressing both partial and incorrect annotation errors. Remarkably, our model exhibits superb performance even in extreme scenarios with 90% annotation noise.</abstract>
@@ -12173,7 +12173,7 @@
       <author><first>Andrea</first><last>Piergentili</last></author>
       <author><first>Beatrice</first><last>Savoldi</last></author>
       <author><first>Dennis</first><last>Fucci</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Luisa</first><last>Bentivogli</last></author>
       <pages>14124-14140</pages>
       <abstract>Gender inequality is embedded in our communication practices and perpetuated in translation technologies. This becomes particularly apparent when translating into grammatical gender languages, where machine translation (MT) often defaults to masculine and stereotypical representations by making undue binary gender assumptions. Our work addresses the rising demand for inclusive language by focusing head-on on gender-neutral translation from English to Italian. We start from the essentials: proposing a dedicated benchmark and exploring automated evaluation methods. First, we introduce GeNTE, a natural, bilingual test set for gender-neutral translation, whose creation was informed by a survey on the perception and use of neutral language. Based on GeNTE, we then overview existing reference-based evaluation approaches, highlight their limits, and propose a reference-free method more suitable to assess gender-neutral translation.</abstract>
@@ -12208,7 +12208,7 @@
       <author><first>Pengfei</first><last>Liu</last></author>
       <author><first>Fahim</first><last>Faisal</last></author>
       <author><first>Alissa</first><last>Ostapenko</last></author>
-      <author><first>Genta</first><last>Winata</last></author>
+      <author id="genta-indra-winata"><first>Genta</first><last>Winata</last></author>
       <author><first>Alham Fikri</first><last>Aji</last></author>
       <author><first>Samuel</first><last>Cahyawijaya</last></author>
       <author><first>Yulia</first><last>Tsvetkov</last></author>
@@ -12247,7 +12247,7 @@
       <author><first>Oyvind</first><last>Tafjord</last></author>
       <author><first>Ashish</first><last>Sabharwal</last></author>
       <author><first>Kyle</first><last>Richardson</last></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <author><first>Peter</first><last>Clark</last></author>
       <pages>14190-14201</pages>
       <abstract>While large language models (LLMs) are proficient at question-answering (QA), it is not always clear how (or even if) an answer follows from their latent “beliefs”. This lack of interpretability is a growing impediment to widespread use of LLMs. To address this, our goals are to make model beliefs and their inferential relationships explicit, and to resolve inconsistencies that may exist, so that answers are supported by interpretable chains of reasoning drawn from a consistent network of beliefs. Our approach, which we call REFLEX, is to add a **rational, self-reflecting layer** on top of the LLM. First, given a question, we construct a **belief graph** using a backward-chaining process to materialize relevant model beliefs (including beliefs about answer candidates) and their inferential relationships. Second, we identify and minimize contradictions in that graph using a formal constraint reasoner. We find that REFLEX significantly improves consistency (by 8%-11% absolute) without harming overall answer accuracy, resulting in answers supported by faithful chains of reasoning drawn from a more consistent belief system. This suggests a new style of system architecture in which an LLM extended with a rational layer can provide an interpretable window into system beliefs, add a systematic reasoning capability, and repair latent inconsistencies present in the LLM.</abstract>
@@ -12378,8 +12378,8 @@
       <author><first>Mario</first><last>Giulianelli</last></author>
       <author><first>Joris</first><last>Baan</last></author>
       <author><first>Wilker</first><last>Aziz</last></author>
-      <author><first>Raquel</first><last>Fernández</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>14349-14371</pages>
       <abstract>In Natural Language Generation (NLG) tasks, for any input, multiple communicative goals are plausible, and any goal can be put into words, or produced, in multiple ways. We characterise the extent to which human production varies lexically, syntactically, and semantically across four NLG tasks, connecting human production variability to aleatoric or data uncertainty. We then inspect the space of output strings shaped by a generation system’s predicted probability distribution and decoding algorithm to probe its uncertainty. For each test input, we measure the generator’s calibration to human production variability. Following this instance-level approach, we analyse NLG models and decoding strategies, demonstrating that probing a generator with multiple samples and, when possible, multiple references, provides the level of detail necessary to gain understanding of a model’s representation of uncertainty.</abstract>
       <url hash="76f6e8cd">2023.emnlp-main.887</url>
@@ -12606,7 +12606,7 @@
       <author><first>Ruofei</first><last>Lai</last></author>
       <author><first>Xinyu</first><last>Zhang</last></author>
       <author><first>Zhao</first><last>Cao</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <author><first>Zhongyu</first><last>Wei</last></author>
       <pages>14606-14620</pages>
       <abstract>The knowledge graph is a structure to store and represent knowledge, and recent studies have discussed its capability to assist language models for various applications. Some variations of knowledge graphs aim to record arguments and their relations for computational argumentation tasks. However, many must simplify semantic types to fit specific schemas, thus losing flexibility and expression ability. In this paper, we propose the **Hi**erarchical **Ar**gumentation **G**raph (Hi-ArG), a new structure to organize arguments. We also introduce two approaches to exploit Hi-ArG, including a text-graph multi-modal model GreaseArG and a new pre-training framework augmented with graph information. Experiments on two argumentation tasks have shown that after further pre-training and fine-tuning, GreaseArG supersedes same-scale language models on these tasks, while incorporating graph information during further pre-training can also improve the performance of vanilla language models. Code for this paper is available at &lt;https://github.com/ljcleo/Hi-ArG&gt;.</abstract>
@@ -12631,7 +12631,7 @@
     <paper id="904">
       <title><fixed-case>GNAT</fixed-case>: A General Narrative Alignment Tool</title>
       <author><first>Tanzir</first><last>Pial</last></author>
-      <author><first>Steven</first><last>Skiena</last></author>
+      <author id="steven-skiena"><first>Steven</first><last>Skiena</last></author>
       <pages>14636-14652</pages>
       <abstract>Algorithmic sequence alignment identifies similar segments shared between pairs of documents, and is fundamental to many NLP tasks. But it is difficult to recognize similarities between distant versions of narratives such as translations and retellings, particularly for summaries and abridgements which are much shorter than the original novels. We develop a general approach to narrative alignment coupling the Smith-Waterman algorithm from bioinformatics with modern text similarity metrics. We show that the background of alignment scores fits a Gumbel distribution, enabling us to define rigorous p-values on the significance of any alignment. We apply and evaluate our general narrative alignment tool (GNAT) on four distinct problem domains differing greatly in both the relative and absolute length of documents, namely summary-to-book alignment, translated book alignment, short story alignment, and plagiarism detection—demonstrating the power and performance of our methods.</abstract>
       <url hash="6bb87fbf">2023.emnlp-main.904</url>
@@ -12655,9 +12655,9 @@
       <title><fixed-case>U</fixed-case>ni<fixed-case>C</fixed-case>hart: A Universal Vision-language Pretrained Model for Chart Comprehension and Reasoning</title>
       <author><first>Ahmed</first><last>Masry</last></author>
       <author><first>Parsa</first><last>Kavehzadeh</last></author>
-      <author><first>Xuan Long</first><last>Do</last></author>
+      <author id="xuan-long-do"><first>Xuan Long</first><last>Do</last></author>
       <author><first>Enamul</first><last>Hoque</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <pages>14662-14684</pages>
       <abstract>Charts are widely used for data analysis, providing visual representations and insights into complex data. To facilitate chart-based data analysis using natural language, several downstream tasks have been introduced recently such as chart question answering and chart summarization. However, existing methods for these tasks often rely on pretraining on language or vision-language tasks, neglecting the explicit modeling of chart structures (e.g., how chart elements are related to each other). To address this, we first build a large corpus of charts covering diverse topics and visual styles. We then present UniChart, a pretrained model for chart comprehension and reasoning. UniChart encodes the relevant text, data, and visual elements of charts and then uses a chart-grounded text decoder for text generation. We propose several chart-specific pretraining tasks that include: (i) low-level tasks to extract the visual elements (e.g., bars, lines) and data from charts, and (ii) high-level tasks to acquire chart understanding and reasoning skills. Our experiments demonstrate that pretraining UniChart on a large corpus with chart-specific objectives, followed by fine-tuning, yields state-of-the-art performance on four downstream tasks. Moreover, our model exhibits superior generalizability to unseen chart corpus, surpassing previous approaches that lack chart-specific objectives and utilize limited chart resources.</abstract>
       <url hash="00d72f6d">2023.emnlp-main.906</url>
@@ -12892,7 +12892,7 @@
       <author><first>Lemao</first><last>Liu</last></author>
       <author><first>Guoping</first><last>Huang</last></author>
       <author><first>Shuming</first><last>Shi</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
       <author><first>Shujian</first><last>Huang</last></author>
       <pages>14903-14917</pages>
       <abstract>We present IMTLab, an open-source end-to-end interactive machine translation (IMT) system platform that enables researchers to quickly build IMT systems with state-of-the-art models, perform an end-to-end evaluation, and diagnose the weakness of systems. IMTLab treats the whole interactive translation process as a task-oriented dialogue with a human-in-the-loop setting, in which human interventions can be explicitly incorporated to produce high-quality, error-free translations. To this end, a general communication interface is designed to support the flexible IMT architectures and user policies. Based on the proposed design, we construct a simulated and real interactive environment to achieve end-to-end evaluation and leverage the framework to systematically evaluate previous IMT systems. Our simulated and manual experiments show that the prefix-constrained decoding approach still gains the lowest editing cost in the end-to-end evaluation, while BiTIIMT achieves comparable editing cost with a better interactive experience.</abstract>
@@ -12988,7 +12988,7 @@
       <author><first>Yixiao</first><last>Song</last></author>
       <author><first>Andrew</first><last>Drozdov</last></author>
       <author><first>Aparna</first><last>Garimella</last></author>
-      <author><first>Varun</first><last>Manjunatha</last></author>
+      <author id="varun-manjunatha"><first>Varun</first><last>Manjunatha</last></author>
       <author><first>Mohit</first><last>Iyyer</last></author>
       <pages>15023-15037</pages>
       <abstract>In this paper, we study the generation quality of interpolation-based retrieval-augmented language models (LMs). These methods, best exemplified by the <tex-math>k</tex-math>NN-LM, interpolate the LM’s predicted distribution of the next word with a distribution formed from the most relevant retrievals for a given prefix. While the <tex-math>k</tex-math>NN-LM and related methods yield impressive decreases in perplexity, we discover that they do not exhibit corresponding improvements in open-ended generation quality, as measured by both automatic evaluation metrics (e.g., MAUVE) and human evaluations. Digging deeper, we find that interpolating with a retrieval distribution actually increases perplexity compared to a baseline LM for the majority of tokens in the WikiText-103 test set, even though the overall perplexity is lower due to a smaller number of tokens for which perplexity dramatically decreases after interpolation. However, when decoding a long sequence at inference time, significant improvements on this smaller subset of tokens are washed out by slightly worse predictions on most tokens. Furthermore, we discover that the entropy of the retrieval distribution increases faster than that of the base LM as the generated sequence becomes longer, which indicates that retrieval is less reliable when using model-generated text as queries (i.e., is subject to exposure bias). We hope that our analysis spurs future work on improved decoding algorithms and interpolation strategies for retrieval-augmented language models.</abstract>
@@ -13095,7 +13095,7 @@
       <author><first>Cheng</first><last>Chang</last></author>
       <author><first>Qipeng</first><last>Guo</last></author>
       <author><first>Junqi</first><last>Dai</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <author><first>Xipeng</first><last>Qiu</last></author>
       <pages>15135-15153</pages>
       <abstract>Large Language Models (LLMs) have recently made significant strides in complex reasoning tasks through the Chain-of-Thought technique. Despite this progress, their reasoning is often constrained by their intrinsic understanding, lacking external insights. To address this, we propose Exchange-of-Thought (EoT), a novel framework that enables cross-model communication during problem-solving. Drawing inspiration from network topology, EoT integrates four unique communication paradigms: Memory, Report, Relay, and Debate. This paper delves into the communication dynamics and volume associated with each paradigm. To counterbalance the risks of incorrect reasoning chains, we implement a robust confidence evaluation mechanism within these communications. Our experiments across diverse complex reasoning tasks demonstrate that EoT significantly surpasses established baselines, underscoring the value of external insights in enhancing LLM performance. Furthermore, we show that EoT achieves these superior results in a cost-effective manner, marking a promising advancement for efficient and collaborative AI problem-solving.</abstract>
@@ -13165,7 +13165,7 @@
       <author><first>Yi</first><last>Fung</last></author>
       <author><first>Tuhin</first><last>Chakrabarty</last></author>
       <author><first>Hao</first><last>Guo</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <author><first>Smaranda</first><last>Muresan</last></author>
       <author><first>Heng</first><last>Ji</last></author>
       <pages>15217-15230</pages>
@@ -13207,7 +13207,7 @@
       <author><first>Sahand</first><last>Sabour</last></author>
       <author><first>Yilin</first><last>Jia</last></author>
       <author><first>Minlie</first><last>Huang</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>15264-15281</pages>
       <abstract>We propose task-adaptive tokenization as a way to adapt the generation pipeline to the specifics of a downstream task and enhance long-form generation in mental health. Inspired by insights from cognitive science, our task-adaptive tokenizer samples variable segmentations from multiple outcomes, with sampling probabilities optimized based on task-specific data. We introduce a strategy for building a specialized vocabulary and introduce a vocabulary merging protocol that allows for the integration of task-specific tokens into the pre-trained model’s tokenization step. Through extensive experiments on psychological question-answering tasks in both Chinese and English, we find that our task-adaptive tokenization approach brings a significant improvement in generation performance while using up to 60% fewer tokens. Preliminary experiments point to promising results when using our tokenization approach with very large language models.</abstract>
       <url hash="7a3703c4">2023.emnlp-main.944</url>
@@ -13274,7 +13274,7 @@
       <author><first>Dong-Ho</first><last>Lee</last></author>
       <author><first>Jay</first><last>Pujara</last></author>
       <author><first>Mohit</first><last>Sewak</last></author>
-      <author><first>Ryen</first><last>White</last></author>
+      <author id="ryen-white"><first>Ryen</first><last>White</last></author>
       <author><first>Sujay</first><last>Jauhar</last></author>
       <pages>15349-15360</pages>
       <abstract>Although large language models (LLMs) have advanced the state-of-the-art in NLP significantly, deploying them for downstream applications is still challenging due to cost, responsiveness, control, or concerns around privacy and security. As such, trainable models are still the preferred option in some cases. However, these models still require human-labeled data for optimal performance, which is expensive and time-consuming to obtain. In order to address this issue, several techniques to reduce human effort involve labeling or generating data using LLMs. Although these methods are effective for certain applications, in practice they encounter difficulties in real-world scenarios. Labeling data requires careful data selection, while generating data necessitates task-specific prompt engineering. In this paper, we propose a unified data creation pipeline that requires only a single formatting example, and which is applicable to a broad range of tasks, including traditionally problematic ones with semantically devoid label spaces. In our experiments we demonstrate that instruction-following LLMs are highly cost-effective data creators, and that models trained with these data exhibit performance better than those trained with human-labeled data (by up to 17.5%) on out-of-distribution evaluation, while maintaining comparable performance on in-distribution tasks. These results have important implications for the robustness of NLP systems deployed in the real-world.</abstract>
@@ -13288,7 +13288,7 @@
       <author><first>Yuliang</first><last>Yan</last></author>
       <author><first>Longtao</first><last>Huang</last></author>
       <author><first>Xiaoqing</first><last>Zheng</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>15361-15371</pages>
       <abstract>Large Language Models (LLMs) have made remarkable advancements in the field of natural language generation. However, the propensity of LLMs to generate inaccurate or non-factual content, termed “hallucinations”, remains a significant challenge. Current hallucination detection methods often necessitate the retrieval of great numbers of relevant evidence, thereby increasing response times. We introduce a unique framework that leverages statistical decision theory and Bayesian sequential analysis to optimize the trade-off between costs and benefits during the hallucination detection process. This approach does not require a predetermined number of observations. Instead, the analysis proceeds in a sequential manner, enabling an expeditious decision towards “belief” or “disbelief” through a stop-or-continue strategy. Extensive experiments reveal that this novel framework surpasses existing methods in both efficiency and precision of hallucination detection. Furthermore, it requires fewer retrieval steps on average, thus decreasing response times.</abstract>
       <url hash="ffded038">2023.emnlp-main.949</url>
@@ -13313,7 +13313,7 @@
       <title>Open Information Extraction via Chunks</title>
       <author><first>Kuicai</first><last>Dong</last></author>
       <author><first>Aixin</first><last>Sun</last></author>
-      <author><first>Jung-jae</first><last>Kim</last></author>
+      <author id="jung-jae-kim"><first>Jung-jae</first><last>Kim</last></author>
       <author><first>Xiaoli</first><last>Li</last></author>
       <pages>15390-15404</pages>
       <abstract>Open Information Extraction (OIE) aims to extract relational tuples from open-domain sentences. Existing OIE systems split a sentence into tokens and recognize token spans as tuple relations and arguments. We instead propose Sentence as Chunk sequence (SaC) and recognize chunk spans as tuple relations and arguments. We argue that SaC has better properties for OIE than sentence as token sequence, and evaluate four choices of chunks (i.e., CoNLL chunks, OIA simple phrases, noun phrases, and spans from SpanOIE). Also, we propose a simple end-to-end BERT-based model, Chunk-OIE, for sentence chunking and tuple extraction on top of SaC. Chunk-OIE achieves state-of-the-art results on multiple OIE datasets, showing that SaC benefits the OIE task.</abstract>
@@ -13356,7 +13356,7 @@
       <title><fixed-case>C</fixed-case>oref<fixed-case>P</fixed-case>rompt: Prompt-based Event Coreference Resolution by Measuring Event Type and Argument Compatibilities</title>
       <author><first>Sheng</first><last>Xu</last></author>
       <author><first>Peifeng</first><last>Li</last></author>
-      <author><first>Qiaoming</first><last>Zhu</last></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last></author>
       <pages>15440-15452</pages>
       <abstract>Event coreference resolution (ECR) aims to group event mentions referring to the same real-world event into clusters. Most previous studies adopt the “encoding first, then scoring” framework, making the coreference judgment rely on event encoding. Furthermore, current methods struggle to leverage human-summarized ECR rules, e.g., coreferential events should have the same event type, to guide the model. To address these two issues, we propose a prompt-based approach, CorefPrompt, to transform ECR into a cloze-style MLM (masked language model) task. This allows for simultaneous event modeling and coreference discrimination within a single template, with a fully shared context. In addition, we introduce two auxiliary prompt tasks, event-type compatibility and argument compatibility, to explicitly demonstrate the reasoning process of ECR, which helps the model make final predictions. Experimental results show that our method CorefPrompt performs well in a state-of-the-art (SOTA) benchmark.</abstract>
       <url hash="152142cb">2023.emnlp-main.954</url>
@@ -13466,7 +13466,7 @@
       <author><first>Shahreen</first><last>Aunti</last></author>
       <author><first>Charuta</first><last>Pethe</last></author>
       <author><first>Allen</first><last>Kim</last></author>
-      <author><first>Steven</first><last>Skiena</last></author>
+      <author id="steven-skiena"><first>Steven</first><last>Skiena</last></author>
       <pages>15560-15579</pages>
       <abstract>Novels are often adapted into feature films, but the differences between the two media usually require dropping sections of the source text from the movie script. Here we study this screen adaptation process by constructing narrative alignments using the Smith-Waterman local alignment algorithm coupled with SBERT embedding distance to quantify text similarity between scenes and book units. We use these alignments to perform an automated analysis of 40 adaptations, revealing insights into the screenwriting process concerning (i) faithfulness of adaptation, (ii) importance of dialog, (iii) preservation of narrative order, and (iv) gender representation issues reflective of the Bechdel test.</abstract>
       <url hash="7a469f04">2023.emnlp-main.962</url>
@@ -13479,7 +13479,7 @@
       <author><first>Jason</first><last>Wei</last></author>
       <author><first>Najoung</first><last>Kim</last></author>
       <author><first>Yi</first><last>Tay</last></author>
-      <author><first>Quoc</first><last>Le</last></author>
+      <author id="quoc-le"><first>Quoc</first><last>Le</last></author>
       <pages>15580-15591</pages>
       <abstract>Scaling up language models has been empirically shown to improve performance on a wide range of downstream tasks. However, if we were to observe worse performance as a function of scale (inverse scaling) on certain tasks, this would indicate that scaling can also encourage behaviors that are misaligned with human preferences. The Inverse Scaling Prize (McKenzie et al. 2023) identified eleven such inverse scaling tasks, evaluated on models of up to 280B parameters and up to 500 zettaFLOPs of training compute. This paper takes a closer look at these inverse scaling tasks. In this paper, we evaluate models of up to 540B parameters, trained on five times more compute than those evaluated in the Inverse Scaling Prize. With this increased range of model sizes and compute, only four out of the eleven tasks remain inverse scaling. Six tasks exhibit U-shaped scaling, where performance decreases up to a certain size, and then increases again up to the largest model evaluated (the one remaining task displays positive scaling). In addition, 1-shot examples and chain-of-thought can help mitigate undesirable scaling patterns even further. U-shaped scaling suggests that the inverse scaling trend observed in McKenzie et al. (2023) may not continue to hold for larger models, which we attribute to the presence of distractor tasks that only sufficiently large models can avoid.</abstract>
       <url hash="24e8dc70">2023.emnlp-main.963</url>
@@ -13523,7 +13523,7 @@
       <author><first>Yixin</first><last>Cao</last></author>
       <author><first>Kenji</first><last>Kawaguchi</last></author>
       <author><first>Xiang</first><last>Wang</last></author>
-      <author><first>Tat-Seng</first><last>Chua</last></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last></author>
       <pages>15623-15638</pages>
       <abstract>Language Models (LMs) have demonstrated impressive molecule understanding ability on various 1D text-related tasks. However, they inherently lack 2D graph perception — a critical ability of human professionals in comprehending molecules’ topological structures. To bridge this gap, we propose MolCA: Molecular Graph-Language Modeling with Cross-Modal Projector and Uni-Modal Adapter. MolCA enables an LM (i.e., Galactica) to understand both text- and graph-based molecular contents via the cross-modal projector. Specifically, the cross-modal projector is implemented as a Q-Former to connect a graph encoder’s representation space and an LM’s text space. Further, MolCA employs a uni-modal adapter (i.e., LoRA) for the LM’s efficient adaptation to downstream tasks. Unlike previous studies that couple an LM with a graph encoder via cross-modal contrastive learning, MolCA retains the LM’s ability of open-ended text generation and augments it with 2D graph information. To showcase its effectiveness, we extensively benchmark MolCA on tasks of molecule captioning, IUPAC name prediction, and molecule-text retrieval, on which MolCA significantly outperforms the baselines.</abstract>
       <url hash="a9ab3fe0">2023.emnlp-main.966</url>
@@ -13539,7 +13539,7 @@
       <author><first>Ran</first><last>Jing</last></author>
       <author><first>Bin</first><last>Liang</last></author>
       <author><first>Min</first><last>Yang</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <author><first>Ruifeng</first><last>Xu</last></author>
       <pages>15639-15650</pages>
       <abstract>Unintended dataset biases typically exist in existing Emotion Recognition in Conversations (ERC) datasets, including label bias, where models favor the majority class due to imbalanced training data, as well as the speaker and neutral word bias, where models make unfair predictions because of excessive correlations between specific neutral words or speakers and classes. However, previous studies in ERC generally focus on capturing context-sensitive and speaker-sensitive dependencies, ignoring the unintended dataset biases of data, which hampers the generalization and fairness in ERC. To address this issue, we propose a Training-Free Debiasing framework (TFD) that operates during prediction without additional training. To ensure compatibility with various ERC models, it does not balance data or modify the model structure. Instead, TFD extracts biases from the model by generating counterfactual utterances and contexts and mitigates them using simple yet empirically robust element-wise subtraction operations. Extensive experiments on three public datasets demonstrate that TFD effectively improves generalization ability and fairness across different ERC models.</abstract>
@@ -13594,7 +13594,7 @@
       <title><fixed-case>MQ</fixed-case>u<fixed-case>AKE</fixed-case>: Assessing Knowledge Editing in Language Models via Multi-Hop Questions</title>
       <author><first>Zexuan</first><last>Zhong</last></author>
       <author><first>Zhengxuan</first><last>Wu</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <author><first>Christopher</first><last>Potts</last></author>
       <author><first>Danqi</first><last>Chen</last></author>
       <pages>15686-15702</pages>
@@ -13677,7 +13677,7 @@
     <paper id="977">
       <title>Reinforcement Replaces Supervision: Query focused Summarization using Deep Reinforcement Learning</title>
       <author><first>Swaroop</first><last>Nath</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <author><first>Harshad</first><last>Khadilkar</last></author>
       <pages>15770-15789</pages>
       <abstract>Query-focused Summarization (QfS) deals with systems that generate summaries from document(s) based on a query. Motivated by the insight that Reinforcement Learning (RL) provides a generalization to Supervised Learning (SL) for Natural Language Generation, and thereby performs better (empirically) than SL, we use an RL-based approach for this task of QfS. Additionally, we also resolve the conflict of employing RL in Transformers with Teacher Forcing. We develop multiple Policy Gradient networks, trained on various reward signals: ROUGE, BLEU, and Semantic Similarity, which lead to a <tex-math>\mathit{10}</tex-math>-point improvement over the State-of-the-Art approach on the ROUGE-L metric for a benchmark dataset (ELI5). We also show performance of our approach in zero-shot setting for another benchmark dataset (DebatePedia) – our approach leads to results comparable to baselines, which were specifically trained on DebatePedia. To aid the RL training, we propose a better semantic similarity reward, enabled by a novel Passage Embedding scheme developed using Cluster Hypothesis. Lastly, we contribute a gold-standard test dataset to further research in QfS and Long-form Question Answering (LfQA).</abstract>
@@ -13720,7 +13720,7 @@
       <title>An Attribution Method for <fixed-case>S</fixed-case>iamese Encoders</title>
       <author><first>Lucas</first><last>Moeller</last></author>
       <author><first>Dmitry</first><last>Nikolaev</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <pages>15818-15827</pages>
       <abstract>Despite the success of Siamese encoder models such as sentence transformers (ST), little is known about the aspects of inputs they pay attention to. A barrier is that their predictions cannot be attributed to individual features, as they compare two inputs rather than processing a single one. This paper derives a local attribution method for Siamese encoders by generalizing the principle of integrated gradients to models with multiple inputs. The output takes the form of feature-pair attributions and in case of STs it can be reduced to a token–token matrix. Our method involves the introduction of integrated Jacobians and inherits the advantageous formal properties of integrated gradients: it accounts for the model’s full computation graph and is guaranteed to converge to the actual prediction. A pilot study shows that in case of STs few token pairs can dominate predictions and that STs preferentially focus on nouns and verbs. For accurate predictions, however, they need to attend to the majority of tokens and parts of speech.</abstract>
       <url hash="3826e2f0">2023.emnlp-main.980</url>
@@ -13744,7 +13744,7 @@
     <paper id="982">
       <title>Graph vs. Sequence: An Empirical Study on Knowledge Forms for Knowledge-Grounded Dialogue</title>
       <author><first>Yizhe</first><last>Yang</last></author>
-      <author><first>Heyan</first><last>Huang</last></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last></author>
       <author><first>Yuhang</first><last>Liu</last></author>
       <author><first>Yang</first><last>Gao</last></author>
       <pages>15846-15858</pages>
@@ -13781,7 +13781,7 @@
       <title><fixed-case>NL</fixed-case>2<fixed-case>TL</fixed-case>: Transforming Natural Languages to Temporal Logics using Large Language Models</title>
       <author><first>Yongchao</first><last>Chen</last></author>
       <author><first>Rujul</first><last>Gandhi</last></author>
-      <author id="yang-zhang"><first>Yang</first><last>Zhang</last></author>
+      <author><first>Yang</first><last>Zhang</last></author>
       <author><first>Chuchu</first><last>Fan</last></author>
       <pages>15880-15903</pages>
       <abstract>Temporal Logic (TL) can be used to rigorously specify complex high-level specification for systems in many engineering applications. The translation between natural language (NL) and TL has been under-explored due to the lack of dataset and generalizable model across different application domains. In this paper, we propose an accurate and generalizable transformation framework of English instructions from NL to TL, exploring the use of Large Language Models (LLMs) at multiple stages. Our contributions are twofold. First, we develop a framework to create a dataset of NL-TL pairs combining LLMs and human annotation. We publish a dataset with 23K NL-TL pairs. Then, we finetune T5 models on the lifted versions (i.e., the specific Atomic Propositions (AP) are hidden) of the NL and TL. The enhanced generalizability originates from two aspects: 1) Usage of lifted NL-TL characterizes common logical structures, without constraints of specific domains. 2) Application of LLMs in dataset creation largely enhances corpus richness. We test the generalization of trained models on five varied domains. To achieve full NL-TL transformation, we either combine the lifted model with AP recognition task or do the further finetuning on each specific domain. During the further finetuning, our model achieves higher accuracy (&gt; 95%) using only &lt;10% training data, compared with the baseline sequence to sequence (Seq2Seq) model.</abstract>
@@ -13819,7 +13819,7 @@
       <author><first>Diptesh</first><last>Kanojia</last></author>
       <author><first>Anupama</first><last>Ray</last></author>
       <author><first>Apoorva</first><last>Nunna</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>15933-15948</pages>
       <abstract>Sarcasm is a complex linguistic construct with incongruity at its very core. Detecting sarcasm depends on the actual content spoken and tonality, facial expressions, the context of an utterance, and personal traits like language proficiency and cognitive capabilities. In this paper, we propose the utilization of synthetic gaze data to improve the task performance for multimodal sarcasm detection in a conversational setting. We enrich an existing multimodal conversational dataset, i.e., MUStARD++ with gaze features. With the help of human participants, we collect gaze features for 20% of data instances, and we investigate various methods for gaze feature prediction for the rest of the dataset. We perform extrinsic and intrinsic evaluations to assess the quality of the predicted gaze features. We observe a performance gain of up to 6.6% points by adding a new modality, i.e., collected gaze features. When both collected and predicted data are used, we observe a performance gain of 2.3% points on the complete dataset. Interestingly, with only predicted gaze features, too, we observe a gain in performance (1.9% points). We retain and use the feature prediction model, which maximally correlates with collected gaze features. Our model trained on combining collected and synthetic gaze data achieves SoTA performance on the MUStARD++ dataset. To the best of our knowledge, ours is the first predict-and-use model for sarcasm detection. We publicly release the code, gaze data, and our best models for further research.</abstract>
       <url hash="d5d83549">2023.emnlp-main.988</url>
@@ -13906,7 +13906,7 @@
     </paper>
     <paper id="995">
       <title>Detecting Spoilers in Movie Reviews with External Movie Knowledge and User Networks</title>
-      <author id="heng-wang"><first>Heng</first><last>Wang</last></author>
+      <author><first>Heng</first><last>Wang</last></author>
       <author><first>Wenqian</first><last>Zhang</last></author>
       <author><first>Yuyang</first><last>Bai</last></author>
       <author><first>Zhaoxuan</first><last>Tan</last></author>
@@ -13925,7 +13925,7 @@
       <author><first>Dongyuan</first><last>Li</last></author>
       <author><first>Yusong</first><last>Wang</last></author>
       <author><first>Kotaro</first><last>Funakoshi</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>16051-16069</pages>
       <abstract>Multimodal emotion recognition aims to recognize emotions for each utterance from multiple modalities, which has received increasing attention for its application in human-machine interaction. Current graph-based methods fail to simultaneously depict global contextual features and local diverse uni-modal features in a dialogue. Furthermore, with the number of graph layers increasing, they easily fall into over-smoothing. In this paper, we propose a method for joint modality fusion and graph contrastive learning for multimodal emotion recognition (Joyful), where multimodality fusion, contrastive learning, and emotion recognition are jointly optimized. Specifically, we first design a new multimodal fusion mechanism that can provide deep interaction and fusion between the global contextual and uni-modal specific features. Then, we introduce a graph contrastive learning framework with inter- and intra-view contrastive losses to learn more distinguishable representations for samples with different sentiments. Extensive experiments on three benchmark datasets indicate that Joyful achieved state-of-the-art (SOTA) performance compared with all baselines. Code is released on Github (https://anonymous.4open.science/r/MERC-7F88).</abstract>
       <url hash="b497a374">2023.emnlp-main.996</url>
@@ -13948,7 +13948,7 @@
     <paper id="998">
       <title>Assessing the influence of attractor-verb distance on grammatical agreement in humans and language models</title>
       <author><first>Christos</first><last>Zacharopoulos</last></author>
-      <author><first>Théo</first><last>Desbordes</last></author>
+      <author id="theo-desbordes"><first>Théo</first><last>Desbordes</last></author>
       <author><first>Mathias</first><last>Sablé-Meyer</last></author>
       <pages>16081-16090</pages>
       <abstract>Subject-verb agreement in the presence of an attractor noun located between the main noun and the verb elicits complex behavior: judgments of grammaticality are modulated by the grammatical features of the attractor. For example, in the sentence <tex-math>\textit{``The girl near the boys likes climbing''}</tex-math>, the attractor (<tex-math>\textit{boys}</tex-math>) disagrees in grammatical number with the verb (<tex-math>\textit{likes}</tex-math>), creating a locally implausible transition probability. Here, we parametrically modulate the distance between the attractor and the verb while keeping the length of the sentence equal. We evaluate the performance of both humans and two artificial neural network models: both make more mistakes when the attractor is closer to the verb, but neural networks get close to the chance level while humans are mostly able to overcome the attractor interference. Additionally, we report a linear effect of attractor distance on reaction times. We hypothesize that a possible reason for the proximity effect is the calculation of transition probabilities between adjacent words. Nevertheless, classical models of attraction such as the cue-based model might suffice to explain this phenomenon, thus paving the way for new research. Data and analyses available at https://osf.io/d4g6k</abstract>
@@ -13988,8 +13988,8 @@
       <title>Hop, Union, Generate: Explainable Multi-hop Reasoning without Rationale Supervision</title>
       <author><first>Wenting</first><last>Zhao</last></author>
       <author><first>Justin</first><last>Chiu</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
-      <author><first>Alexander</first><last>Rush</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last></author>
       <pages>16119-16130</pages>
       <abstract>Explainable multi-hop question answering (QA) not only predicts answers but also identifies rationales, i. e. subsets of input sentences used to derive the answers. Existing methods rely on supervision for both answers and rationales. This problem has been extensively studied under the supervised setting, where both answer and rationale annotations are given. Because rationale annotations are expensive to collect and not always available, recent efforts have been devoted to developing methods that do not rely on supervision for rationales. However, such methods have limited capacities in modeling interactions between sentences, let alone reasoning across multiple documents. This work proposes a principled, probabilistic approach for training explainable multi-hop QA systems without rationale supervision. Our approach performs multi-hop reasoning by explicitly modeling rationales as sets, enabling the model to capture interactions between documents and sentences within a document. Experimental results show that our approach is more accurate at selecting rationales than the previous methods, while maintaining similar accuracy in predicting answers.</abstract>
       <url hash="ec3ca5e1">2023.emnlp-main.1001</url>
@@ -14000,7 +14000,7 @@
       <title>To Split or Not to Split: Composing Compounds in Contextual Vector Spaces</title>
       <author><first>Chris</first><last>Jenkins</last></author>
       <author><first>Filip</first><last>Miletic</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>16131-16136</pages>
       <abstract>We investigate the effect of sub-word tokenization on representations of German noun compounds: single orthographic words which are composed of two or more constituents but often tokenized into units that are not morphologically motivated or meaningful. Using variants of BERT models and tokenization strategies on domain-specific restricted diachronic data, we introduce a suite of evaluations relying on the masked language modelling task and compositionality prediction. We obtain the most consistent improvements by pre-splitting compounds into constituents.</abstract>
       <url hash="5f0405f1">2023.emnlp-main.1002</url>
@@ -14125,7 +14125,7 @@
     <paper id="1011">
       <title>Learning From Free-Text Human Feedback – Collect New Datasets Or Extend Existing Ones?</title>
       <author><first>Dominic</first><last>Petrak</last></author>
-      <author><first>Nafise</first><last>Moosavi</last></author>
+      <author id="nafise-sadat-moosavi"><first>Nafise</first><last>Moosavi</last></author>
       <author><first>Ye</first><last>Tian</last></author>
       <author><first>Nikolai</first><last>Rozanov</last></author>
       <author><first>Iryna</first><last>Gurevych</last></author>
@@ -14178,7 +14178,7 @@
     <paper id="1015">
       <title>A Benchmark for Reasoning with Spatial Prepositions</title>
       <author><first>Iulia</first><last>Comsa</last></author>
-      <author><first>Srini</first><last>Narayanan</last></author>
+      <author id="srini-narayanan"><first>Srini</first><last>Narayanan</last></author>
       <pages>16328-16335</pages>
       <abstract>Spatial reasoning is a fundamental building block of human cognition, used in representing, grounding, and reasoning about physical and abstract concepts. We propose a novel benchmark focused on assessing inferential properties of statements with spatial prepositions. The benchmark includes original datasets in English and Romanian and aims to probe the limits of reasoning about spatial relations in large language models. We use prompt engineering to study the performance of two families of large language models, PaLM and GPT-3, on our benchmark. Our results show considerable variability in the performance of smaller and larger models, as well as across prompts and languages. However, none of the models reaches human performance.</abstract>
       <url hash="1a736d88">2023.emnlp-main.1015</url>
@@ -14189,7 +14189,7 @@
     <paper id="1016">
       <title><fixed-case>TIMELINE</fixed-case>: Exhaustive Annotation of Temporal Relations Supporting the Automatic Ordering of Events in News Articles</title>
       <author><first>Sarah</first><last>Alsayyahi</last></author>
-      <author><first>Riza</first><last>Batista-Navarro</last></author>
+      <author id="riza-theresa-batista-navarro"><first>Riza</first><last>Batista-Navarro</last></author>
       <pages>16336-16348</pages>
       <abstract>Temporal relation extraction models have thus far been hindered by a number of issues in existing temporal relation-annotated news datasets, including: (1) low inter-annotator agreement due to the lack of specificity of their annotation guidelines in terms of what counts as a temporal relation; (2) the exclusion of long-distance relations within a given document (those spanning across different paragraphs); and (3) the exclusion of events that are not centred on verbs. This paper aims to alleviate these issues by presenting a new annotation scheme that clearly defines the criteria based on which temporal relations should be annotated. Additionally, the scheme includes events even if they are not expressed as verbs (e.g., nominalised events). Furthermore, we propose a method for annotating all temporal relations—including long-distance ones—which automates the process, hence reducing time and manual effort on the part of annotators. The result is a new dataset, the TIMELINE corpus, in which improved inter-annotator agreement was obtained, in comparison with previously reported temporal relation datasets. We report the results of training and evaluating two baseline temporal relation extraction models on the new corpus, and compare them with results obtained on the widely used MATRES corpus.</abstract>
       <url hash="fea7b131">2023.emnlp-main.1016</url>
@@ -14213,13 +14213,13 @@
     <paper id="1018">
       <title>Towards Interpretable and Efficient Automatic Reference-Based Summarization Evaluation</title>
       <author><first>Yixin</first><last>Liu</last></author>
-      <author><first>Alexander</first><last>Fabbri</last></author>
+      <author id="alexander-richard-fabbri"><first>Alexander</first><last>Fabbri</last></author>
       <author><first>Yilun</first><last>Zhao</last></author>
       <author><first>Pengfei</first><last>Liu</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <author><first>Chien-Sheng</first><last>Wu</last></author>
       <author><first>Caiming</first><last>Xiong</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <pages>16360-16368</pages>
       <abstract>Interpretability and efficiency are two important considerations for the adoption of neural automatic metrics. In this work, we develop strong-performing automatic metrics for reference-based summarization evaluation, based on a two-stage evaluation pipeline that first extracts basic information units from one text sequence and then checks the extracted units in another sequence. The metrics we developed include two-stage metrics that can provide high interpretability at both the fine-grained unit level and summary level, and one-stage metrics that achieve a balance between efficiency and interpretability. We make the developed tools publicly available at https://github.com/Yale-LILY/AutoACU.</abstract>
       <url hash="069763f9">2023.emnlp-main.1018</url>
@@ -14278,7 +14278,7 @@
     <paper id="1022">
       <title>Can language models learn analogical reasoning? Investigating training objectives and comparisons to human performance</title>
       <author><first>Molly</first><last>Petersen</last></author>
-      <author><first>Lonneke</first><last>van der Plas</last></author>
+      <author id="lonneke-van-der-plas"><first>Lonneke</first><last>van der Plas</last></author>
       <pages>16414-16425</pages>
       <abstract>While analogies are a common way to evaluate word embeddings in NLP, it is also of interest to investigate whether or not analogical reasoning is a task in itself that can be learned. In this paper, we test several ways to learn basic analogical reasoning, specifically focusing on analogies that are more typical of what is used to evaluate analogical reasoning in humans than those in commonly used NLP benchmarks. Our experiments find that models are able to learn analogical reasoning, even with a small amount of data. We additionally compare our models to a dataset with a human baseline, and find that after training models approach human performance.</abstract>
       <url hash="9f44ce5e">2023.emnlp-main.1022</url>
@@ -14328,7 +14328,7 @@
       <author><first>Shaonan</first><last>Wang</last></author>
       <author><first>Yunhao</first><last>Zhang</last></author>
       <author><first>Jiajun</first><last>Zhang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>16460-16476</pages>
       <abstract>Transformer-based models, even though achieving super-human performance on several downstream tasks, are often regarded as a black box and used as a whole. It is still unclear what mechanisms they have learned, especially their core module: multi-head attention. Inspired by functional specialization in the human brain, which helps to efficiently handle multiple tasks, this work attempts to figure out whether the multi-head attention module will evolve similar function separation under multi-tasking training. If it is, can this mechanism further improve the model performance? To investigate these questions, we introduce an interpreting method to quantify the degree of functional specialization in multi-head attention. We further propose a simple multi-task training method to increase functional specialization and mitigate negative information transfer in multi-task learning. Experimental results on seven pre-trained transformer models have demonstrated that multi-head attention does evolve functional specialization phenomenon after multi-task training which is affected by the similarity of tasks. Moreover, the multi-task training strategy based on functional specialization boosts performance in both multi-task learning and transfer learning without adding any parameters.</abstract>
       <url hash="f505c1e8">2023.emnlp-main.1026</url>
@@ -14465,7 +14465,7 @@
       <author><first>Raghav</first><last>Jain</last></author>
       <author><first>Prince</first><last>Jha</last></author>
       <author><first>Sriparna</first><last>Saha</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>16632-16645</pages>
       <abstract>With the rise of social media and online communication, the issue of cyberbullying has gained significant prominence. While extensive research is being conducted to develop more effective models for detecting cyberbullying in monolingual languages, a significant gap exists in understanding code-mixed languages and the need for explainability in this context. To address this gap, we have introduced a novel benchmark dataset named BullyExplain for explainable cyberbullying detection in code-mixed language. In this dataset, each post is meticulously annotated with four labels: bully, sentiment, target, and rationales, indicating the specific phrases responsible for identifying the post as a bully. Our current research presents an innovative unified generative framework, GenEx, which reimagines the multitask problem as a text-to-text generation task. Our proposed approach demonstrates its superiority across various evaluation metrics when applied to the BullyExplain dataset, surpassing other baseline models and current state-of-the-art approaches.</abstract>
       <url hash="452525a8">2023.emnlp-main.1035</url>
@@ -14495,7 +14495,7 @@
       <author><first>Keziah</first><last>Reina</last></author>
       <author><first>Vishnesh</first><last>Ramanathan</last></author>
       <author><first>Wei</first><last>Xu</last></author>
-      <author><first>Byron</first><last>Wallace</last></author>
+      <author id="byron-c-wallace"><first>Byron</first><last>Wallace</last></author>
       <author><first>Junyi Jessy</first><last>Li</last></author>
       <pages>16662-16692</pages>
       <abstract>Automated text simplification aims to produce simple versions of complex texts. This task is especially useful in the medical domain, where the latest medical findings are typically communicated via complex and technical articles. This creates barriers for laypeople seeking access to up-to-date medical findings, consequently impeding progress on health literacy. Most existing work on medical text simplification has focused on monolingual settings, with the result that such evidence would be available only in just one language (most often, English). This work addresses this limitation via multilingual simplification, i.e., directly simplifying complex texts into simplified texts in multiple languages. We introduce MultiCochrane, the first sentence-aligned multilingual text simplification dataset for the medical domain in four languages: English, Spanish, French, and Farsi. We evaluate fine-tuned and zero-shot models across these languages with extensive human assessments and analyses. Although models can generate viable simplified texts, we identify several outstanding challenges that this dataset might be used to address.</abstract>
@@ -14524,7 +14524,7 @@
       <author><first>Ruofei</first><last>Lai</last></author>
       <author><first>Xinyu</first><last>Zhang</last></author>
       <author><first>Zhao</first><last>Cao</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <author><first>Zhongyu</first><last>Wei</last></author>
       <pages>16705-16720</pages>
       <abstract>Counter-argument generation—a captivating area in computational linguistics—seeks to craft statements that offer opposing views. While most research has ventured into paragraph-level generation, sentence-level counter-argument generation beckons with its unique constraints and brevity-focused challenges. Furthermore, the diverse nature of counter-arguments poses challenges for evaluating model performance solely based on n-gram-based metrics. In this paper, we present the ArgTersely benchmark for sentence-level counter-argument generation, drawing from a manually annotated dataset from the ChangeMyView debate forum. We also propose Arg-LlaMA for generating high-quality counter-argument. For better evaluation, we trained a BERT-based evaluator Arg-Judge with human preference data. We conducted comparative experiments involving various baselines such as LlaMA, Alpaca, GPT-3, and others. The results show the competitiveness of our proposed framework and evaluator in counter-argument generation tasks. Code and data are available at https://github.com/amazingljy1206/ArgTersely.</abstract>
@@ -14554,7 +14554,7 @@
       <author><first>Hannah</first><last>Frost</last></author>
       <author><first>Paul</first><last>O’Regan</last></author>
       <author><first>Dónal</first><last>Landers</last></author>
-      <author><first>Andre</first><last>Freitas</last></author>
+      <author id="andre-freitas"><first>Andre</first><last>Freitas</last></author>
       <pages>16745-16764</pages>
       <abstract>How can we interpret and retrieve medical evidence to support clinical decisions? Clinical trial reports (CTR) amassed over the years contain indispensable information for the development of personalized medicine. However, it is practically infeasible to manually inspect over 400,000+ clinical trial reports in order to find the best evidence for experimental treatments. Natural Language Inference (NLI) offers a potential solution to this problem, by allowing the scalable computation of textual entailment. However, existing NLI models perform poorly on biomedical corpora, and previously published datasets fail to capture the full complexity of inference over CTRs. In this work, we present a novel resource to advance research on NLI for reasoning on CTRs. The resource includes two main tasks. Firstly, to determine the inference relation between a natural language statement, and a CTR. Secondly, to retrieve supporting facts to justify the predicted relation. We provide NLI4CT, a corpus of 2400 statements and CTRs, annotated for these tasks. Baselines on this corpus expose the limitations of existing NLI approaches, with 6 state-of-the-art NLI models achieving a maximum F1 score of 0.627. To the best of our knowledge, we are the first to design a task that covers the interpretation of full CTRs. To encourage further work on this challenging dataset, we make the corpus, competition leaderboard, and website, available on CodaLab, and code to replicate the baseline experiments on GitHub.</abstract>
       <url hash="8ad1fee3">2023.emnlp-main.1041</url>
@@ -14568,7 +14568,7 @@
       <author><first>Zhen</first><last>Wu</last></author>
       <author><first>Jianbing</first><last>Zhang</last></author>
       <author><first>Shujian</first><last>Huang</last></author>
-      <author><first>Xinyu</first><last>Dai</last></author>
+      <author id="xinyu-dai"><first>Xinyu</first><last>Dai</last></author>
       <pages>16765-16779</pages>
       <abstract>It has been well documented that a reviewer’s opinion of the nativeness of expression in an academic paper affects the likelihood of it being accepted for publication. Previous works have also shone a light on the stress and anxiety authors who are non-native English speakers experience when attempting to publish in international venues. We explore how this might be a concern in the field of Natural Language Processing (NLP) through conducting a comprehensive statistical analysis of NLP paper abstracts, identifying how authors of different linguistic backgrounds differ in the lexical, morphological, syntactic and cohesive aspects of their writing. Through our analysis, we identify that there are a number of characteristics that are highly variable across the different corpora examined in this paper. This indicates potential for the presence of linguistic bias. Therefore, we outline a set of recommendations to publishers of academic journals and conferences regarding their guidelines and resources for prospective authors in order to help enhance inclusivity and fairness.</abstract>
       <url hash="bd8d4c5f">2023.emnlp-main.1042</url>
@@ -14595,7 +14595,7 @@
       <author><first>Muhammad Umar</first><last>Salman</last></author>
       <author><first>Asif</first><last>Hanif</last></author>
       <author><first>Shady</first><last>Shehata</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>16794-16812</pages>
       <abstract>Propaganda is a form of communication intended to influence the opinions and the mindset of the public to promote a particular agenda. With the rise of social media, propaganda has spread rapidly, leading to the need for automatic propaganda detection systems. Most work on propaganda detection has focused on high-resource languages, such as English, and little effort has been made to detect propaganda for low-resource languages. Yet, it is common to find a mix of multiple languages in social media communication, a phenomenon known as code-switching. Code-switching combines different languages within the same text, which poses a challenge for automatic systems. Considering this premise, we propose a novel task of detecting propaganda techniques in code-switched text. To support this task, we create a corpus of 1,030 texts code-switching between English and Roman Urdu, annotated with 20 propaganda techniques at fragment-level. We perform a number of experiments contrasting different experimental setups, and we find that it is important to model the multilinguality directly rather than using translation as well as to use the right fine-tuning strategy. We plan to publicly release our code and dataset.</abstract>
       <url hash="ecba581f">2023.emnlp-main.1044</url>
@@ -14607,7 +14607,7 @@
       <title>Speech Recognition and Meaning Interpretation: Towards Disambiguation of Structurally Ambiguous Spoken Utterances in <fixed-case>I</fixed-case>ndonesian</title>
       <author><first>Ruhiyah</first><last>Widiaputri</last></author>
       <author><first>Ayu</first><last>Purwarianti</last></author>
-      <author><first>Dessi</first><last>Lestari</last></author>
+      <author id="dessi-puji-lestari"><first>Dessi</first><last>Lestari</last></author>
       <author><first>Kurniawati</first><last>Azizah</last></author>
       <author><first>Dipta</first><last>Tanaya</last></author>
       <author><first>Sakriani</first><last>Sakti</last></author>
@@ -14665,7 +14665,7 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>NLP</fixed-case>+<fixed-case>V</fixed-case>is: <fixed-case>NLP</fixed-case> Meets Visualization</title>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <author><first>Enamul</first><last>Hoque</last></author>
       <author><first>Jesse</first><last>Vig</last></author>
       <pages>1-6</pages>
@@ -14701,7 +14701,7 @@
       <author><first>Qinyuan</first><last>Ye</last></author>
       <author><first>Pengfei</first><last>Liu</last></author>
       <author><first>Xiang</first><last>Ren</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>19-25</pages>
       <abstract>The progress of natural language processing (NLP) is primarily driven by machine learning that optimizes a system on a large-scale set of task-specific labeled examples. This learning paradigm limits the ability of machines to have the same capabilities as humans in handling new tasks since humans can often solve unseen tasks with a couple of examples accompanied by task instructions. In addition, we may not have a chance to prepare task-specific examples of large-volume for new tasks because we cannot foresee what task needs to be addressed next and how complex to annotate for it. Therefore, task instructions act as a novel and promising resource for supervision. This tutorial targets researchers and practitioners who are interested in AI and ML technologies for NLP generalization in a low-shot scenario. In particular, we will present a diverse thread of instruction-driven NLP studies that try to answer the following questions: (i) What is task instruction? (ii) How is the process of creating datasets and evaluating systems conducted? (iii) How to encode task instructions? (iv) When and why do some instructions work better? (v) What concerns remain in LLM-driven instruction following? We will discuss several lines of frontier research that tackle those challenges and will conclude the tutorial by outlining directions for further investigation.</abstract>
       <url hash="2d27f490">2023.emnlp-tutorial.4</url>
@@ -14753,7 +14753,7 @@
     <paper id="1">
       <title>Fabricator: An Open Source Toolkit for Generating Labeled Training Data with Teacher <fixed-case>LLM</fixed-case>s</title>
       <author><first>Jonas</first><last>Golde</last><affiliation>Humboldt-University of Berlin</affiliation></author>
-      <author><first>Patrick</first><last>Haller</last><affiliation>Machine Learning Group - Humboldt University of Berlin</affiliation></author>
+      <author id="patrick-haller-zurich"><first>Patrick</first><last>Haller</last><affiliation>Machine Learning Group - Humboldt University of Berlin</affiliation></author>
       <author><first>Felix</first><last>Hamborg</last><affiliation>University of Konstanz</affiliation></author>
       <author><first>Julian</first><last>Risch</last><affiliation>deepset</affiliation></author>
       <author><first>Alan</first><last>Akbik</last><affiliation>Humboldt University of Berlin</affiliation></author>
@@ -14769,7 +14769,7 @@
       <author><first>Christian</first><last>Huber</last><affiliation>Karlsruhe Institut of Technology</affiliation></author>
       <author><first>Tu Anh</first><last>Dinh</last><affiliation>Karlsruhe Institute of Technology</affiliation></author>
       <author><first>Carlos</first><last>Mullov</last><affiliation>Karlsruhe Institute of Technology</affiliation></author>
-      <author><first>Ngoc-Quan</first><last>Pham</last><affiliation>Karlsruhe Institute of Technology</affiliation></author>
+      <author id="ngoc-quan-pham"><first>Ngoc-Quan</first><last>Pham</last><affiliation>Karlsruhe Institute of Technology</affiliation></author>
       <author><first>Thai Binh</first><last>Nguyen</last><affiliation>Karlsruhe Institute of Technology</affiliation></author>
       <author><first>Fabian</first><last>Retkowski</last><affiliation>Karlsruhe Institut of Technology</affiliation></author>
       <author><first>Stefan</first><last>Constantin</last><affiliation>Karlsruhe Institute of Technology</affiliation></author>
@@ -14778,7 +14778,7 @@
       <author><first>Zhaolin</first><last>Li</last><affiliation>Karlsruhe Institute of Technology</affiliation></author>
       <author><first>Sai</first><last>Koneru</last><affiliation>Karlsruhe Institute of Technology</affiliation></author>
       <author><first>Jan</first><last>Niehues</last><affiliation>Karlsruhe Institut of Technology</affiliation></author>
-      <author><first>Alexander</first><last>Waibel</last><affiliation>Carnegie Mellon</affiliation></author>
+      <author id="alex-waibel"><first>Alexander</first><last>Waibel</last><affiliation>Carnegie Mellon</affiliation></author>
       <pages>12-20</pages>
       <abstract>The challenge of low-latency speech translation has recently draw significant interest in the research community as shown by several publications and shared tasks. Therefore, it is essential to evaluate these different approaches in realistic scenarios. However, currently only specific aspects of the systems are evaluated and often it is not possible to compare different approaches. In this work, we propose the first framework to perform and evaluate the various aspects of low-latency speech translation under realistic conditions. The evaluation is carried out in an end-to-end fashion. This includes the segmentation of the audio as well as the run-time of the different components. Secondly, we compare different approaches to low-latency speech translation using this framework. We evaluate models with the option to revise the output as well as methods with fixed output. Furthermore, we directly compare state-of-the-art cascaded as well as end-to-end systems. Finally, the framework allows to automatically evaluate the translation quality as well as latency and also provides a web interface to show the low-latency model outputs to the user.</abstract>
       <url hash="1208a807">2023.emnlp-demo.2</url>
@@ -14830,7 +14830,7 @@
       <author><first>Nuno</first><last>Carvalhais</last></author>
       <author><first>Monique</first><last>Meuschke</last></author>
       <author><first>Markus</first><last>Reichstein</last></author>
-      <author><first>Sina</first><last>Zarrieß</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
       <author><first>Kai</first><last>Lawonn</last></author>
       <pages>70-81</pages>
       <abstract>The advent of large language models has brought about new ways of interacting with data intuitively via natural language. In recent years, a variety of visualization systems have explored the use of natural language to create and modify visualizations through visualization-oriented dialog. However, the majority of these systems rely on tailored dialog agents to analyze domain-specific data and operate domain-specific visualization tools and libraries. This is a major challenge when trying to transfer functionalities between dialog interfaces of different visualization applications. To address this issue, we propose VIST5, a visualization-oriented dialog system that focuses on easy adaptability to an application domain as well as easy transferability of language-controllable visualization library functions between applications. Its architecture is based on a retrieval-augmented T5 language model that leverages few-shot learning capabilities to enable a rapid adaptation of the system.</abstract>
@@ -14857,9 +14857,9 @@
     </paper>
     <paper id="7">
       <title>Koala: An Index for Quantifying Overlaps with Pre-training Corpora</title>
-      <author><first>Thuy-Trang</first><last>Vu</last><affiliation>Monash University</affiliation></author>
+      <author id="thuy-vu"><first>Thuy-Trang</first><last>Vu</last><affiliation>Monash University</affiliation></author>
       <author><first>Xuanli</first><last>He</last><affiliation>University College London</affiliation></author>
-      <author><first>Gholamreza</first><last>Haffari</last><affiliation>Monash University</affiliation></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last><affiliation>Monash University</affiliation></author>
       <author><first>Ehsan</first><last>Shareghi</last><affiliation>Monash University</affiliation></author>
       <pages>90-98</pages>
       <abstract>In very recent years more attention has been placed on probing the role of pre-training data in Large Language Models (LLMs) downstream behaviour. Despite the importance, there is no public tool that supports such analysis of pre-training corpora at large scale. To help research in this space, we launch Koala, a searchable index over large pre-training corpora using lossless compressed suffix arrays with highly efficient compression rate and search support. In its first release we index the public proportion of OPT 175B, GPT-3, GPT-Neo, GPT-Neo, LLaMA, BERT, ELECTRA, RoBERTA, XLNet pre-training corpora. Koala provides a framework to do forensic analysis on the current and future benchmarks as well as to assess the degree of memorization in the output from the LLMs. Koala is available for public use at https://koala-index.erc.monash.edu/.</abstract>
@@ -14897,7 +14897,7 @@
       <author><first>Dazhen</first><last>Wan</last><affiliation>Tsinghua University</affiliation></author>
       <author><first>Xiaochen</first><last>Zhu</last><affiliation>University of Cambridge, Cambridge, England</affiliation></author>
       <author><first>Jianfeng</first><last>Gao</last><affiliation>Microsoft Research, Redmond</affiliation></author>
-      <author><first>Milica</first><last>Gasic</last><affiliation>Heinrich Heine University Duesseldorf</affiliation></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gasic</last><affiliation>Heinrich Heine University Duesseldorf</affiliation></author>
       <author><first>Minlie</first><last>Huang</last><affiliation>Tsinghua University</affiliation></author>
       <pages>106-123</pages>
       <abstract>Task-oriented dialogue (TOD) systems function as digital assistants, guiding users through various tasks such as booking flights or finding restaurants. Existing toolkits for building TOD systems often fall short in delivering comprehensive arrays of data, model, and experimental environments with a user-friendly experience. We introduce ConvLab-3: a multifaceted dialogue system toolkit crafted to bridge this gap. Our unified data format simplifies the integration of diverse datasets and models, significantly reducing complexity and cost for studying generalization and transfer. Enhanced with robust reinforcement learning (RL) tools, featuring a streamlined training process, in-depth evaluation tools, and a selection of user simulators, ConvLab-3 supports the rapid development and evaluation of robust dialogue policies. Through an extensive study, we demonstrate the efficacy of transfer learning and RL and showcase that ConvLab-3 is not only a powerful tool for seasoned researchers but also an accessible platform for newcomers.</abstract>
@@ -15001,7 +15001,7 @@
     <paper id="16">
       <title><fixed-case>TP</fixed-case>-Detector: Detecting Turning Points in the Engineering Process of Large-scale Projects</title>
       <author><first>Qi</first><last>Wu</last><affiliation>Beihang University</affiliation></author>
-      <author><first>WenHan</first><last>Chao</last><affiliation>BeiHang University</affiliation></author>
+      <author id="wenhan-chao"><first>WenHan</first><last>Chao</last><affiliation>BeiHang University</affiliation></author>
       <author><first>Xian</first><last>Zhou</last><affiliation>Center for Information Research, Academy of Military Science</affiliation></author>
       <author><first>Zhunchen</first><last>Luo</last><affiliation>Center for Information Research, Academy of Military Science</affiliation></author>
       <pages>177-185</pages>
@@ -15044,12 +15044,12 @@
     </paper>
     <paper id="19">
       <title>Muted: Multilingual Targeted Offensive Speech Identification and Visualization</title>
-      <author><first>Christoph</first><last>Tillmann</last><affiliation>IBM Research</affiliation></author>
+      <author id="christoph-tillmann"><first>Christoph</first><last>Tillmann</last><affiliation>IBM Research</affiliation></author>
       <author><first>Aashka</first><last>Trivedi</last><affiliation>IBM Research</affiliation></author>
       <author><first>Sara</first><last>Rosenthal</last><affiliation>IBM Research</affiliation></author>
       <author><first>Santosh</first><last>Borse</last><affiliation>IBM Research</affiliation></author>
       <author><first>Rong</first><last>Zhang</last><affiliation>IBM.com</affiliation></author>
-      <author><first>Avirup</first><last>Sil</last><affiliation>IBM Research AI</affiliation></author>
+      <author id="avirup-sil"><first>Avirup</first><last>Sil</last><affiliation>IBM Research AI</affiliation></author>
       <author><first>Bishwaranjan</first><last>Bhattacharjee</last><affiliation>IBM T.J.Watson Researcg</affiliation></author>
       <pages>229-236</pages>
       <abstract>Offensive language such as hate, abuse, and profanity (HAP) occurs in various content on the web. While previous work has mostly dealt with sentence level annotations, there have been a few recent attempts to identify offensive spans as well. We build upon this work and introduce MUTED, a system to identify multilingual HAP content by displaying offensive arguments and their targets using heat maps to indicate their intensity. MUTED can leverage any transformer-based HAP-classification model and its attention mechanism out-of-the-box to identify toxic spans, without further fine-tuning. In addition, we use the spaCy library to identify the specific targets and arguments for the words predicted by the attention heatmaps. We present the model’s performance on identifying offensive spans and their targets in existing datasets and present new annotations on German text. Finally, we demonstrate our proposed visualization tool on multilingual inputs.</abstract>
@@ -15110,7 +15110,7 @@
       <author><first>Liangming</first><last>Pan</last><affiliation>University of California, Santa Barbara (UCSB)</affiliation></author>
       <author><first>Xinyuan</first><last>Lu</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Min-Yen</first><last>Kan</last><affiliation>National University of Singapore</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <pages>264-273</pages>
       <abstract>Fact-checking real-world claims often requires intricate, multi-step reasoning due to the absence of direct evidence to support or refute them. However, existing fact-checking systems often lack transparency in their decision-making, making it challenging for users to comprehend their reasoning process. To address this, we propose the Question-guided Multi-hop Fact-Checking (QACheck) system, which guides the model’s reasoning process by asking a series of questions critical for verifying a claim. QACheck has five key modules: a claim verifier, a question generator, a question-answering module, a QA validator, and a reasoner. Users can input a claim into QACheck, which then predicts its veracity and provides a comprehensive report detailing its reasoning process, guided by a sequence of (question, answer) pairs. QACheck also provides the source of evidence supporting each question, fostering a transparent, explainable, and user-friendly fact-checking process.</abstract>
       <url hash="adb2406e">2023.emnlp-demo.23</url>
@@ -15125,7 +15125,7 @@
       <author><first>Seyedeh Fatemeh</first><last>Ahmadi</last><affiliation>University of Guilan</affiliation></author>
       <author><first>Gita</first><last>Shojaee</last><affiliation>University of Guilan</affiliation></author>
       <author><first>Fatemeh</first><last>Kamani</last><affiliation>University of Guilan</affiliation></author>
-      <author><first>Gholamreza</first><last>Ghassem-Sani</last><affiliation>Sharif University of Technology</affiliation></author>
+      <author id="gholamreza-ghassem-sani"><first>Gholamreza</first><last>Ghassem-Sani</last><affiliation>Sharif University of Technology</affiliation></author>
       <author><first>Seyed Abolghasem</first><last>Mirroshandel</last><affiliation>Stony Brook University</affiliation></author>
       <pages>274-285</pages>
       <abstract>Question answering (QA) systems have reached human-level accuracy; however, these systems are not robust enough and are vulnerable to adversarial examples. Recently, adversarial attacks have been widely investigated in text classification. However, there have been few research efforts on this topic in QA. In this article, we have modified the attack algorithms widely used in text classification to fit those algorithms for QA systems. We have evaluated the impact of various attack methods on QA systems at character, word, and sentence levels. Furthermore, we have developed a new framework, named RobustQA, as the first open-source toolkit for investigating textual adversarial attacks in QA systems. RobustQA consists of seven modules: Tokenizer, Victim Model, Goals, Metrics, Attacker, Attack Selector, and Evaluator. It currently supports six different attack algorithms. Furthermore, the framework simplifies the development of new attack algorithms in QA. The source code and documentation of RobustQA are available at https://github.com/mirbostani/RobustQA.</abstract>
@@ -15167,7 +15167,7 @@
     </paper>
     <paper id="27">
       <title><fixed-case>M</fixed-case>ini<fixed-case>C</fixed-case>hain: A Small Library for Coding with Large Language Models</title>
-      <author><first>Alexander</first><last>Rush</last><affiliation>Cornell University</affiliation></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last><affiliation>Cornell University</affiliation></author>
       <pages>311-317</pages>
       <abstract>Programming augmented by large language models (LLMs) opens up many new application areas, but also requires care. LLMs are accurate enough, on average, to replace core functionality, yet make basic mistakes that demonstrate a lack of robustness. An ecosystem of prompting tools, from intelligent agents to new programming languages, have emerged with different solutions for patching LLMs with other tools. In this work, we introduce MiniChain, an opinionated tool for LLM augmented programming, with the design goals of ease-of-use of prototyping, transparency through automatic visualization, and a minimalistic approach to advanced features. The MiniChain library provides core primitives for coding LLM calls, separating out prompt templates, and capturing program structure. The library includes demo implementations of the main applications papers in the area, including chat-bots, code generation, retrieval-based question answering, and complex information extraction. The library is open-source and available at https://github.com/srush/MiniChain, with code demos available at https://srush-minichain.hf.space/, and video demo at https://www.youtube.com/watch?v=VszZ1VnO7sk.</abstract>
       <url hash="53c997f8">2023.emnlp-demo.27</url>
@@ -15198,9 +15198,9 @@
       <author><first>Sai</first><last>Vallurupalli</last><affiliation>University of Maryland at Baltimore County</affiliation></author>
       <author><first>Yash Kumar</first><last>Lal</last><affiliation>Stony Brook University</affiliation></author>
       <author><first>Francis</first><last>Ferraro</last><affiliation>University of Maryland, Baltimore County</affiliation></author>
-      <author><first>Nathanael</first><last>Chambers</last><affiliation>US Naval Academy</affiliation></author>
+      <author id="nathanael-chambers"><first>Nathanael</first><last>Chambers</last><affiliation>US Naval Academy</affiliation></author>
       <author><first>Greg</first><last>Durrett</last><affiliation>UT Austin</affiliation></author>
-      <author><first>Raymond</first><last>Mooney</last><affiliation>University of Texas at Austin</affiliation></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last><affiliation>University of Texas at Austin</affiliation></author>
       <author><first>Katrin</first><last>Erk</last><affiliation>University of Texas at Austin</affiliation></author>
       <author><first>Niranjan</first><last>Balasubramanian</last><affiliation>Stony Brook University</affiliation></author>
       <pages>328-335</pages>
@@ -15253,8 +15253,8 @@
       <author><first>Zixuan</first><last>Zhang</last><affiliation>University of Illinois Urbana-Champaign</affiliation></author>
       <author><first>Reece</first><last>Suchocki</last><affiliation>University of Colorado Boulder</affiliation></author>
       <author><first>Sha</first><last>Li</last><affiliation>University of Illinois Urbana-Champaign</affiliation></author>
-      <author><first>Martha</first><last>Palmer</last><affiliation>University of Colorado</affiliation></author>
-      <author><first>Susan Windisch</first><last>Brown</last><affiliation>University of Colorado at Boulder</affiliation></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last><affiliation>University of Colorado</affiliation></author>
+      <author id="susan-windisch-brown"><first>Susan Windisch</first><last>Brown</last><affiliation>University of Colorado at Boulder</affiliation></author>
       <author><first>Jiawei</first><last>Han</last><affiliation>UIUC</affiliation></author>
       <author><first>Heng</first><last>Ji</last><affiliation>University of Illinois at Urbana-Champaign and Amazon (Amazon Scholar)</affiliation></author>
       <pages>365-372</pages>
@@ -15287,8 +15287,8 @@
       <author><first>Shafiuddin Rehan</first><last>Ahmed</last><affiliation>University of Colorado Boulder</affiliation></author>
       <author><first>Julia</first><last>Bonn</last><affiliation>University of Colorado, Boulder</affiliation></author>
       <author><first>Kristin</first><last>Wright-Bettner</last><affiliation>University of Colorado Boulder</affiliation></author>
-      <author><first>Martha</first><last>Palmer</last><affiliation>University of Colorado</affiliation></author>
-      <author><first>James H.</first><last>Martin</last><affiliation>University of Colorado Boulder</affiliation></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last><affiliation>University of Colorado</affiliation></author>
+      <author id="james-h-martin"><first>James H.</first><last>Martin</last><affiliation>University of Colorado Boulder</affiliation></author>
       <pages>381-388</pages>
       <abstract>In this paper, we introduce CAMRA (Copilot for AMR Annotatations), a cutting-edge web-based tool designed for constructing Abstract Meaning Representation (AMR) from natural language text. CAMRA offers a novel approach to deep lexical semantics annotation such as AMR, treating AMR annotation akin to coding in programming languages. Leveraging the familiarity of programming paradigms, CAMRA encompasses all essential features of existing AMR editors, including example lookup, while going a step further by integrating Propbank roleset lookup as an autocomplete feature within the tool. Notably, CAMRA incorporates AMR parser models as coding co-pilots, greatly enhancing the efficiency and accuracy of AMR annotators.</abstract>
       <url hash="edee925f">2023.emnlp-demo.35</url>
@@ -15392,7 +15392,7 @@
       <author><first>Elizaveta</first><last>Goncharova</last><affiliation>NRU HSE</affiliation></author>
       <author><first>Alexander</first><last>Panchenko</last><affiliation>Skolkovo Institue of Science and Technology</affiliation></author>
       <author><first>Maxim</first><last>Panov</last><affiliation>Technology Innovation Institute</affiliation></author>
-      <author><first>Timothy</first><last>Baldwin</last><affiliation>MBZUAI</affiliation></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last><affiliation>MBZUAI</affiliation></author>
       <author><first>Artem</first><last>Shelmanov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence: MBZUAI</affiliation></author>
       <pages>446-461</pages>
       <abstract>Recent advancements in the capabilities of large language models (LLMs) have paved the way for a myriad of groundbreaking applications in various fields. However, a significant challenge arises as these models often “hallucinate”, i.e., fabricate facts without providing users an apparent means to discern the veracity of their statements. Uncertainty estimation (UE) methods are one path to safer, more responsible, and more effective use of LLMs. However, to date, research on UE methods for LLMs has been focused primarily on theoretical rather than engineering contributions. In this work, we tackle this issue by introducing LM-Polygraph, a framework with implementations of a battery of state-of-the-art UE methods for LLMs in text generation tasks, with unified program interfaces in Python. Additionally, it introduces an extendable benchmark for consistent evaluation of UE techniques by researchers, and a demo web application that enriches the standard chat dialog with confidence scores, empowering end-users to discern unreliable responses. LM-Polygraph is compatible with the most recent LLMs, including BLOOMz, LLaMA-2, ChatGPT, and GPT-4, and is designed to support future releases of similarly-styled LMs.</abstract>
@@ -15450,7 +15450,7 @@
       <author><first>Kyle</first><last>Lo</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Zejiang</first><last>Shen</last><affiliation>MIT</affiliation></author>
       <author><first>Benjamin</first><last>Newman</last><affiliation>Stanford University</affiliation></author>
-      <author><first>Joseph</first><last>Chang</last><affiliation>Allen Institute for AI</affiliation></author>
+      <author id="joseph-z-chang"><first>Joseph</first><last>Chang</last><affiliation>Allen Institute for AI</affiliation></author>
       <author><first>Russell</first><last>Authur</last><affiliation>Allen Institute for AI</affiliation></author>
       <author><first>Erin</first><last>Bransom</last><affiliation>Allen Institute for AI</affiliation></author>
       <author><first>Stefan</first><last>Candra</last><affiliation>Allen Institute for AI</affiliation></author>
@@ -15460,8 +15460,8 @@
       <author><first>Amanpreet</first><last>Singh</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Chris</first><last>Wilhelm</last><affiliation>Allen Institute for AI</affiliation></author>
       <author><first>Angele</first><last>Zamarron</last><affiliation>Allen Institute for AI</affiliation></author>
-      <author><first>Marti A.</first><last>Hearst</last><affiliation>UC Berkeley</affiliation></author>
-      <author><first>Daniel</first><last>Weld</last><affiliation>University of Washington &amp; Allen Institute for Artificial Inelligence</affiliation></author>
+      <author id="marti-a-hearst"><first>Marti A.</first><last>Hearst</last><affiliation>UC Berkeley</affiliation></author>
+      <author id="daniel-s-weld"><first>Daniel</first><last>Weld</last><affiliation>University of Washington &amp; Allen Institute for Artificial Inelligence</affiliation></author>
       <author><first>Doug</first><last>Downey</last><affiliation>Allen Institute for AI, Northwestern University</affiliation></author>
       <author><first>Luca</first><last>Soldaini</last><affiliation>Allen Institute for AI</affiliation></author>
       <pages>495-507</pages>
@@ -15857,7 +15857,7 @@
       <author><first>Siti Umairah</first><last>Md Salleh</last></author>
       <author><first>Hong Choon</first><last>Oh</last></author>
       <author><first>Pavitra</first><last>Krishnaswamy</last></author>
-      <author><first>Nancy</first><last>Chen</last></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last></author>
       <pages>185-193</pages>
       <abstract>Utilizing natural language processing techniques in clinical conversations is effective to improve the efficiency of health management workflows for medical staff and patients. Dialogue segmentation and topic categorization are two fundamental steps for processing verbose spoken conversations and highlighting informative spans for downstream tasks. However, in practical use cases, due to the variety of segmentation granularity and topic definition, and the lack of diverse annotated corpora, no generic models are readily applicable for domain-specific applications. In this work, we introduce and adopt a joint model for dialogue segmentation and topic categorization, and conduct a case study on healthcare follow-up calls for diabetes management; we provide insights from both data and model perspectives toward performance and robustness.</abstract>
       <url hash="4a7c2ee2">2023.emnlp-industry.19</url>
@@ -15991,7 +15991,7 @@
     <paper id="29">
       <title>Empower Large Language Model to Perform Better on Industrial Domain-Specific Question Answering</title>
       <author><first>Fangkai</first><last>Yang</last></author>
-      <author id="pu-zhao"><first>Pu</first><last>Zhao</last></author>
+      <author><first>Pu</first><last>Zhao</last></author>
       <author><first>Zezhong</first><last>Wang</last></author>
       <author><first>Lu</first><last>Wang</last></author>
       <author><first>Bo</first><last>Qiao</last></author>
@@ -16265,7 +16265,7 @@
     <paper id="47">
       <title>Automatic Linking of Judgements to <fixed-case>UK</fixed-case> <fixed-case>S</fixed-case>upreme <fixed-case>C</fixed-case>ourt Hearings</title>
       <author><first>Hadeel</first><last>Saadany</last></author>
-      <author><first>Constantin</first><last>Orasan</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orasan</last></author>
       <pages>492-500</pages>
       <abstract>One the most important archived legal material in the UK is the Supreme Court published judgements and video recordings of court sittings for the decided cases. The impact of Supreme Court published material extends far beyond the parties involved in any given case as it provides landmark rulings on arguable points of law of the greatest public and constitutional importance. However, the recordings of a case are usually very long which makes it both time and effort consuming for legal professionals to study the critical arguments in the legal deliberations. In this research, we summarise the second part of a combined research-industrial project for building an automated tool designed specifically to link segments in the text judgement to semantically relevant timespans in the videos of the hearings. The tool is employed as a User-Interface (UI) platform that provides a better access to justice by bookmarking the timespans in the videos which contributed to the final judgement of the case. We explain how we employ AI generative technology to retrieve the relevant links and show that the customisation of the GPT text embeddings to our dataset achieves the best accuracy for our automatic linking system.</abstract>
       <url hash="539c3dca">2023.emnlp-industry.47</url>
@@ -16336,7 +16336,7 @@
     <paper id="52">
       <title>Investigating the Role and Impact of Disfluency on Summarization</title>
       <author><first>Varun</first><last>Nathan</last></author>
-      <author><first>Ayush</first><last>Kumar</last></author>
+      <author id="ayush-kumar"><first>Ayush</first><last>Kumar</last></author>
       <author><first>Jithendra</first><last>Vepa</last></author>
       <pages>541-551</pages>
       <abstract>Contact centers handle both chat and voice calls for the same domain. As part of their workflow, it is a standard practice to summarize the conversations once they conclude. A significant distinction between chat and voice communication lies in the presence of disfluencies in voice calls, such as repetitions, restarts, and replacements. These disfluencies are generally considered noise for downstream natural language understanding (NLU) tasks. While a separate summarization model for voice calls can be trained in addition to chat specific model for the same domain, it requires manual annotations for both the channels and adds complexity arising due to maintaining two models. Therefore, it’s crucial to investigate if a model trained on fluent data can handle disfluent data effectively. While previous research explored impact of disfluency on question-answering and intent detection, its influence on summarization is inadequately studied. Our experiments reveal up to 6.99-point degradation in Rouge-L score, along with reduced fluency, consistency, and relevance when a fluent-trained model handles disfluent data. Replacement disfluencies have the highest negative impact. To mitigate this, we examine Fused-Fine Tuning by training the model with a combination of fluent and disfluent data, resulting in improved performance on both public and real-life datasets. Our work highlights the significance of incorporating disfluency in training summarization models and its advantages in an industrial setting.</abstract>
@@ -16365,7 +16365,7 @@
       <title><fixed-case>E</fixed-case>2<fixed-case>E</fixed-case> Spoken Entity Extraction for Virtual Agents</title>
       <author><first>Karan</first><last>Singla</last></author>
       <author><first>Yeon-Jun</first><last>Kim</last></author>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
       <pages>567-574</pages>
       <abstract>In human-computer conversations, extracting entities such as names, street addresses and email addresses from speech is a challenging task. In this paper, we study the impact of fine-tuning pre-trained speech encoders on extracting spoken entities in human-readable form directly from speech without the need for text transcription. We illustrate that such a direct approach optimizes the encoder to transcribe only the entity relevant portions of speech ignoring the superfluous portions such as carrier phrases, or spell name entities. In the context of dialog from an enterprise virtual agent, we demonstrate that the 1-step approach outperforms the typical 2-step approach which first generates lexical transcriptions followed by text-based entity extraction for identifying spoken entities.</abstract>
       <url hash="7eb034ca">2023.emnlp-industry.54</url>
@@ -16500,7 +16500,7 @@
       <author><first>Besnik</first><last>Fetahu</last></author>
       <author id="zhiyu-chen-lehigh"><first>Zhiyu</first><last>Chen</last></author>
       <author><first>Oleg</first><last>Rokhlenko</last></author>
-      <author><first>Shervin</first><last>Malmasi</last></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></author>
       <pages>663-674</pages>
       <abstract>E-commerce product catalogs contain billions of items. Most products have lengthy titles, as sellers pack them with product attributes to improve retrieval, and highlight key product aspects. This results in a gap between such unnatural products titles, and how customers refer to them. It also limits how e-commerce stores can use these seller-provided titles for recommendation, QA, or review summarization. Inspired by recent work on instruction-tuned LLMs, we present InstructPTS, a controllable approach for the task of Product Title Summarization (PTS). Trained using a novel instruction fine-tuning strategy, our approach is able to summarize product titles according to various criteria (e.g. number of words in a summary, inclusion of specific phrases, etc.). Extensive evaluation on a real-world e-commerce catalog shows that compared to simple fine-tuning of LLMs, our proposed approach can generate more accurate product name summaries, with an improvement of over 14 and 8 BLEU and ROUGE points, respectively.</abstract>
       <url hash="8525bcfb">2023.emnlp-industry.63</url>
@@ -16547,7 +16547,7 @@
       <author><first>Jin</first><last>Miao</last></author>
       <author><first>Xiaoyu</first><last>Sun</last></author>
       <author><first>Jiayi</first><last>Chen</last></author>
-      <author><first>Alexander</first><last>Hauptmann</last></author>
+      <author id="alexander-g-hauptmann"><first>Alexander</first><last>Hauptmann</last></author>
       <author><first>Hanjun</first><last>Dai</last></author>
       <author><first>Wei</first><last>Wei</last></author>
       <pages>707-722</pages>
@@ -16689,7 +16689,7 @@
       <author><first>Hadas</first><last>Kotek</last></author>
       <author><first>Christopher</first><last>Klein</last></author>
       <author><first>Zidi</first><last>Xiu</last></author>
-      <author><first>Jason</first><last>Williams</last></author>
+      <author id="jason-d-williams"><first>Jason</first><last>Williams</last></author>
       <pages>820-827</pages>
       <abstract>Controversy is a reflection of our zeitgeist, and an important aspect to any discourse. The rise of large language models (LLMs) as conversational systems has increased public reliance on these systems for answers to their various questions. Consequently, it is crucial to systematically examine how these models respond to questions that pertaining to ongoing debates. However, few such datasets exist in providing human-annotated labels reflecting the contemporary discussions. To foster research in this area, we propose a novel construction of a controversial questions dataset, expanding upon the publicly released Quora Question Pairs Dataset. This dataset presents challenges concerning knowledge recency, safety, fairness, and bias. We evaluate different LLMs using a subset of this dataset, illuminating how they handle controversial issues and the stances they adopt. This research ultimately contributes to our understanding of LLMs’ interaction with controversial issues, paving the way for improvements in their comprehension and handling of complex societal debates.</abstract>
       <url hash="da8fee6e">2023.emnlp-industry.76</url>
@@ -16701,7 +16701,7 @@
       <title>Angel: Enterprise Search System for the Non-Profit Industry</title>
       <author><first>Saiful</first><last>Haq</last></author>
       <author><first>Ashutosh</first><last>Sharma</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>828-835</pages>
       <abstract>Non-profit industry need a system for accurately matching fund-seekers (e.g., AMERICAN NATIONAL RED CROSS) with fund-givers (e.g., BILL AND MELINDA GATES FOUNDATION) aligned in cause (e.g., cancer) and target beneficiary group (e.g., children). In this paper, we create an enterprise search system “ANGEL” for the non-profit industry that takes a fund-giver’s mission description as input and returns a ranked list of fund-seekers as output, and vice-versa. ANGEL employs ColBERT, a neural information retrieval model, which we enhance by exploiting the two techniques of (a) Syntax-aware local attention (SLA) to combine syntactic information in the mission description with multi-head self-attention and (b) Dense Pseudo Relevance Feedback (DPRF) for augmentation of short mission descriptions. We create a mapping dictionary “non-profit-dict” to curate a “non-profit-search database” containing information on 594K fund-givers and 194K fund-seekers from IRS-990 filings for the non-profit industry search engines . We also curate a “non-profit-evaluation” dataset containing scored matching between 463 fund-givers and 100 fund-seekers. The research is in collaboration with a philanthropic startup that identifies itself as an “AI matching platform, fundraising assistant, and philanthropy search base.” Domain experts at the philanthropic startup annotate the non-profit evaluation dataset and continuously evaluate the performance of ANGEL. ANGEL achieves an improvement of 0.14 MAP@10 and 0.16 MRR@10 over the state-of-the-art baseline on the non-profit evaluation dataset. To the best of our knowledge, ours is the first effort at building an enterprise search engine based on neural information retrieval for the non-profit industry.</abstract>
       <url hash="a59a79c9">2023.emnlp-industry.77</url>
diff --git a/data/xml/2023.eval4nlp.xml b/data/xml/2023.eval4nlp.xml
index 01c48bbd2b..b3533b82b2 100644
--- a/data/xml/2023.eval4nlp.xml
+++ b/data/xml/2023.eval4nlp.xml
@@ -71,7 +71,7 @@
       <title>Zero-shot Probing of Pretrained Language Models for Geography Knowledge</title>
       <author><first>Nitin</first><last>Ramrakhiyani</last><affiliation>International Institute of Information Technology, Hyderabad and Tata Consultancy Services Limited, India</affiliation></author>
       <author><first>Vasudeva</first><last>Varma</last><affiliation>International Institute of Information Technology Hyderabad, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
-      <author><first>Girish</first><last>Palshikar</last></author>
+      <author id="girish-palshikar"><first>Girish</first><last>Palshikar</last></author>
       <author><first>Sachin</first><last>Pawar</last></author>
       <pages>49-61</pages>
       <abstract>Gauging the knowledge of Pretrained Language Models (PLMs) about facts in niche domains is an important step towards making them better in those domains. In this paper, we aim at evaluating multiple PLMs for their knowledge about world Geography. We contribute (i) a sufficiently sized dataset of masked Geography sentences to probe PLMs on masked token prediction and generation tasks, (ii) benchmark the performance of multiple PLMs on the dataset. We also provide a detailed analysis of the performance of the PLMs on different Geography facts.</abstract>
@@ -95,7 +95,7 @@
       <author><first>Yu-Peng</first><last>Chen</last><affiliation>University of Florida</affiliation></author>
       <author><first>Abhilash</first><last>Budharapu</last></author>
       <author><first>Lisa</first><last>Anthony</last><affiliation>University of Florida</affiliation></author>
-      <author><first>Bonnie</first><last>Dorr</last><affiliation>University of Florida</affiliation></author>
+      <author id="bonnie-dorr"><first>Bonnie</first><last>Dorr</last><affiliation>University of Florida</affiliation></author>
       <pages>85-99</pages>
       <abstract>With the aim of improving work efficiency, we examine how Large Language Models (LLMs) can better support the handoff of information by summarizing user interactions in collaborative intelligence analysis communication. We experiment with interaction logs, or a record of user interactions with a system. Inspired by chain-of-thought prompting, we describe a technique to avoid API token limits with recursive summarization requests. We then apply ChatGPT over multiple iterations to extract named entities, topics, and summaries, combined with interaction sequence sentences, to generate summaries of critical events and results of analysis sessions. We quantitatively evaluate the generated summaries against human-generated ones using common accuracy metrics (e.g., ROUGE-L, BLEU, BLEURT, and TER). We also report qualitative trends and the factuality of the output. We find that manipulating the audience feature or providing single-shot examples minimally influences the model’s accuracy. While our methodology successfully summarizes interaction logs, the lack of significant results raises questions about prompt engineering and summarization effectiveness generally. We call on explainable artificial intelligence research to better understand how terms and their placement may change LLM outputs, striving for more consistent prompt engineering guidelines.</abstract>
       <url hash="da75115c">2023.eval4nlp-1.7</url>
@@ -145,8 +145,8 @@
       <author><first>Fuhai</first><last>Song</last></author>
       <author><first>Hui</first><last>Huang</last></author>
       <author><first>Jinghao</first><last>Yuan</last></author>
-      <author><first>Muyun</first><last>Yang</last></author>
-      <author><first>Tiejun</first><last>Zhao</last><affiliation>Harbin Institute of Technology</affiliation></author>
+      <author id="muyun-yang"><first>Muyun</first><last>Yang</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <pages>139-148</pages>
       <abstract>Recently, Large Language Models (LLMs) have boosted the research in natural language processing and shown impressive capabilities across numerous domains, including machine translation evaluation. This paper presents our methods developed for the machine translation evaluation sub-task of the Eval4NLP 2023 Shared Task. Based on the provided LLMs, we propose a generation-based method as well as a probability-based method to perform evaluation, explore different strategies when selecting the demonstrations for in-context learning, and try different ensemble methods to further improve the evaluation accuracy. The experiment results on the development set and test set demonstrate the effectiveness of our proposed method.</abstract>
       <url hash="b245f666">2023.eval4nlp-1.11</url>
@@ -167,7 +167,7 @@
       <title><fixed-case>LTRC</fixed-case>_<fixed-case>IIITH</fixed-case>’s 2023 Submission for Prompting Large Language Models as Explainable Metrics Task</title>
       <author><first>Pavan</first><last>Baswani</last></author>
       <author><first>Ananya</first><last>Mukherjee</last></author>
-      <author><first>Manish</first><last>Shrivastava</last><affiliation>International Institute of Information Technology Hyderabad, India</affiliation></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last><affiliation>International Institute of Information Technology Hyderabad, India</affiliation></author>
       <pages>156-163</pages>
       <abstract>In this report, we share our contribution to the Eval4NLP Shared Task titled “Prompting Large Language Models as Explainable Metrics.” We build our prompts with a primary focus on effective prompting strategies, score-aggregation, and explainability for LLM-based metrics. We participated in the track for smaller models by submitting the scores along with their explanations. According to the Kendall correlation scores on the leaderboard, our MT evaluation submission ranks second-best, while our summarization evaluation submission ranks fourth, with only a 0.06 difference from the leading submission.</abstract>
       <url hash="2dc4f64a">2023.eval4nlp-1.13</url>
@@ -202,7 +202,7 @@
     <paper id="16">
       <title>Reference-Free Summarization Evaluation with Large Language Models</title>
       <author><first>Abbas</first><last>Akkasi</last><affiliation>Carleton University</affiliation></author>
-      <author><first>Kathleen</first><last>Fraser</last><affiliation>National Research Council Canada</affiliation></author>
+      <author id="kathleen-c-fraser"><first>Kathleen</first><last>Fraser</last><affiliation>National Research Council Canada</affiliation></author>
       <author><first>Majid</first><last>Komeili</last><affiliation>Carleton University</affiliation></author>
       <pages>193-201</pages>
       <abstract>With the continuous advancement in unsupervised learning methodologies, text generation has become increasingly pervasive. However, the evaluation of the quality of the generated text remains challenging. Human annotations are expensive and often show high levels of disagreement, in particular for certain tasks characterized by inherent subjectivity, such as translation and summarization.Consequently, the demand for automated metrics that can reliably assess the quality of such generative systems and their outputs has grown more pronounced than ever. In 2023, Eval4NLP organized a shared task dedicated to the automatic evaluation of outputs from two specific categories of generative systems: machine translation and summarization. This evaluation was achieved through the utilization of prompts with Large Language Models. Participating in the summarization evaluation track, we propose an approach that involves prompting LLMs to evaluate six different latent dimensions of summarization quality. In contrast to many previous approaches to summarization assessments, which emphasize lexical overlap with reference text, this method surfaces the importance of correct syntax in summarization evaluation. Our method resulted in the second-highest performance in this shared task, demonstrating its effectiveness as a reference-free evaluation.</abstract>
@@ -214,7 +214,7 @@
       <title>Little Giants: Exploring the Potential of Small <fixed-case>LLM</fixed-case>s as Evaluation Metrics in Summarization in the <fixed-case>E</fixed-case>val4<fixed-case>NLP</fixed-case> 2023 Shared Task</title>
       <author><first>Neema</first><last>Kotonya</last></author>
       <author><first>Saran</first><last>Krishnasamy</last></author>
-      <author><first>Joel</first><last>Tetreault</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
       <author><first>Alejandro</first><last>Jaimes</last><affiliation>Dataminr</affiliation></author>
       <pages>202-218</pages>
       <abstract>This paper describes and analyzes our participation in the 2023 Eval4NLP shared task, which focuses on assessing the effectiveness of prompt-based techniques to empower Large Language Models to handle the task of quality estimation, particularly in the context of evaluating machine translations and summaries. We conducted systematic experiments with various prompting techniques, including standard prompting, prompts informed by annotator instructions, and innovative chain-of-thought prompting. In addition, we integrated these approaches with zero-shot and one-shot learning methods to maximize the efficacy of our evaluation procedures. Our work reveals that combining these approaches using a “small”, open source model (orca_mini_v3_7B) yields competitive results.</abstract>
diff --git a/data/xml/2023.fever.xml b/data/xml/2023.fever.xml
index da458eb542..8e3ed47e65 100644
--- a/data/xml/2023.fever.xml
+++ b/data/xml/2023.fever.xml
@@ -9,7 +9,7 @@
       <editor><first>Oana</first><last>Cocarascu</last></editor>
       <editor><first>Zhijiang</first><last>Guo</last></editor>
       <editor><first>Arpit</first><last>Mittal</last></editor>
-      <editor><first>Michael</first><last>Schlichtkrull</last></editor>
+      <editor id="michael-schlichtkrull"><first>Michael</first><last>Schlichtkrull</last></editor>
       <editor><first>James</first><last>Thorne</last></editor>
       <editor><first>Andreas</first><last>Vlachos</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -53,7 +53,7 @@
     </paper>
     <paper id="3">
       <title>An Entity-based Claim Extraction Pipeline for Real-world Biomedical Fact-checking</title>
-      <author><first>Amelie</first><last>Wuehrl</last><affiliation>University of Stuttgart, Universität Stuttgart</affiliation></author>
+      <author id="amelie-wuhrl"><first>Amelie</first><last>Wuehrl</last><affiliation>University of Stuttgart, Universität Stuttgart</affiliation></author>
       <author><first>Lara</first><last>Grimminger</last></author>
       <author><first>Roman</first><last>Klinger</last><affiliation>University of Stuttgart</affiliation></author>
       <pages>29-37</pages>
diff --git a/data/xml/2023.fieldmatters.xml b/data/xml/2023.fieldmatters.xml
index 0474ae84f8..5e7f2fe49e 100644
--- a/data/xml/2023.fieldmatters.xml
+++ b/data/xml/2023.fieldmatters.xml
@@ -11,7 +11,7 @@
       <editor><first>Tatiana</first><last>Shavrina</last></editor>
       <editor><first>Eric</first><last>Le Ferrand</last></editor>
       <editor><first>Valentin</first><last>Malykh</last></editor>
-      <editor><first>Francis</first><last>Tyers</last></editor>
+      <editor id="francis-tyers"><first>Francis</first><last>Tyers</last></editor>
       <editor><first>Timofey</first><last>Arkhangelskiy</last></editor>
       <editor><first>Vladislav</first><last>Mikhailov</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -81,7 +81,7 @@
       <author><first>Nikolaos</first><last>Kokkas</last><affiliation>Athena Research Center</affiliation></author>
       <author><first>Vasileios</first><last>Arampatzakis</last><affiliation>Athena Research Center</affiliation></author>
       <author><first>Vasileios</first><last>Sevetlidis</last><affiliation>Athena Research Center</affiliation></author>
-      <author><first>Stella</first><last>Markantonatou</last><affiliation>ILSP/R.C. “Athena”</affiliation></author>
+      <author id="stella-markantonatou"><first>Stella</first><last>Markantonatou</last><affiliation>ILSP/R.C. “Athena”</affiliation></author>
       <author><first>George</first><last>Pavlidis</last><affiliation>Athena Research Center</affiliation></author>
       <pages>40-45</pages>
       <abstract>Automatic Speech Recognition (ASR) models can aid field linguists by facilitating the creation of text corpora from oral material. Training ASR systems for low-resource languages can be a challenging task not only due to lack of resources but also due to the work required for the preparation of a training dataset. We present a pipeline for data processing and ASR model training for low-resourced languages, based on the language family. As a case study, we collected recordings of Pomak, an endangered South East Slavic language variety spoken in Greece. Using the proposed pipeline, we trained the first Pomak ASR model.</abstract>
diff --git a/data/xml/2023.findings.xml b/data/xml/2023.findings.xml
index cbfcc8ca62..43bd9096a1 100644
--- a/data/xml/2023.findings.xml
+++ b/data/xml/2023.findings.xml
@@ -20,8 +20,8 @@
       <title>Using Punctuation as an Adversarial Attack on Deep Learning-Based <fixed-case>NLP</fixed-case> Systems: An Empirical Study</title>
       <author><first>Brian</first><last>Formento</last><affiliation>Nus</affiliation></author>
       <author><first>Chuan Sheng</first><last>Foo</last><affiliation>Institute for Infocomm Research</affiliation></author>
-      <author><first>Luu Anh</first><last>Tuan</last><affiliation>Nanyang Technological University, Singapore</affiliation></author>
-      <author><first>See Kiong</first><last>Ng</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="luu-anh-tuan"><first>Luu Anh</first><last>Tuan</last><affiliation>Nanyang Technological University, Singapore</affiliation></author>
+      <author id="see-kiong-ng"><first>See Kiong</first><last>Ng</last><affiliation>National University of Singapore</affiliation></author>
       <pages>1-34</pages>
       <abstract>This work empirically investigates punctuation insertions as adversarial attacks on NLP systems. Data from experiments on three tasks, five datasets, and six models with four attacks show that punctuation insertions, when limited to a few symbols (apostrophes and hyphens), are a superior attack vector compared to character insertions due to 1) a lower after-attack accuracy (<tex-math>A_{aft-atk}</tex-math>) than alphabetical character insertions; 2) higher semantic similarity between the resulting and original texts; and 3) a resulting text that is easier and faster to read as assessed with the Test of Word Reading Efficiency (TOWRE)). The tests also indicate that 4) grammar checking does not mitigate punctuation insertions and 5) punctuation insertions outperform word-level attacks in settings with a limited number of word synonyms and queries to the victim’s model. Our findings indicate that inserting a few punctuation types that result in easy-to-read samples is a general attack mechanism. In light of this threat, we assess the impact of punctuation insertions, potential mitigations, the mitigation’s tradeoffs, punctuation insertion’s worst-case scenarios and summarize our findings in a qualitative casual map, so that developers can design safer, more secure systems.</abstract>
       <url hash="61152bda">2023.findings-eacl.1</url>
@@ -186,7 +186,7 @@
       <author><first>Liesbeth</first><last>Allein</last><affiliation>KU Leuven</affiliation></author>
       <author><first>Marlon</first><last>Saelens</last><affiliation>KU Leuven</affiliation></author>
       <author><first>Ruben</first><last>Cartuyvels</last><affiliation>Catholic University of Leuven</affiliation></author>
-      <author><first>Marie-Francine</first><last>Moens</last><affiliation>KU Leuven</affiliation></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last><affiliation>KU Leuven</affiliation></author>
       <pages>176-189</pages>
       <abstract>Leveraging contextual knowledge has become standard practice in automated claim verification, yet the impact of temporal reasoning has been largely overlooked. Our study demonstrates that time positively influences the claim verification process of evidence-based fact-checking. The temporal aspects and relations between claims and evidence are first established through grounding on shared timelines, which are constructed using publication dates and time expressions extracted from their text. Temporal information is then provided to RNN-based and Transformer-based classifiers before or after claim and evidence encoding. Our time-aware fact-checking models surpass base models by up to 9% Micro F1 (64.17%) and 15% Macro F1 (47.43%) on the MultiFC dataset. They also outperform prior methods that explicitly model temporal relations between evidence. Our findings show that the presence of temporal information and the manner in which timelines are constructed greatly influence how fact-checking models determine the relevance and supporting or refuting character of evidence documents.</abstract>
       <url hash="58d18fd4">2023.findings-eacl.13</url>
@@ -276,7 +276,7 @@
       <author><first>Zhiruo</first><last>Wang</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Grace</first><last>Cuenca</last><affiliation>Princeton University</affiliation></author>
       <author><first>Shuyan</first><last>Zhou</last><affiliation>Carnegie Mellon University</affiliation></author>
-      <author><first>Frank F.</first><last>Xu</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="frank-f-xu"><first>Frank F.</first><last>Xu</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Graham</first><last>Neubig</last><affiliation>Carnegie Mellon University</affiliation></author>
       <pages>265-273</pages>
       <abstract>While there has been a recent burgeoning of applications at the intersection of natural and programming languages, such as code generation and code summarization, these applications are usually English-centric. This creates a barrier for program developers who are not proficient in English. To mitigate this gap in technology development across languages, we propose a multilingual dataset, MCoNaLa, to benchmark code generation from natural language commands extending beyond English. Modeled off of the methodology from the English Code/Natural Language Challenge (CoNaLa) dataset, we annotated a total of 896 NL-Code pairs in three languages: Spanish, Japanese, and Russian. We present a systematic evaluation on MCoNaLa by testing state-of-the-art code generation systems. Although the difficulties vary across three languages, all systems lag significantly behind their English counterparts, revealing the challenges in adapting code generation to new languages.</abstract>
@@ -318,7 +318,7 @@
       <title><fixed-case>CALM</fixed-case>-Bench: A Multi-task Benchmark for Evaluating Causality-Aware Language Models</title>
       <author><first>Dhairya</first><last>Dalal</last><affiliation>University of Galway</affiliation></author>
       <author><first>Paul</first><last>Buitelaar</last><affiliation>University of Galway</affiliation></author>
-      <author><first>Mihael</first><last>Arcan</last><affiliation>University of Galway</affiliation></author>
+      <author id="mihael-arcan"><first>Mihael</first><last>Arcan</last><affiliation>University of Galway</affiliation></author>
       <pages>296-311</pages>
       <abstract>Causal reasoning is a critical component of human cognition and is required across a range of question-answering (QA) tasks (such as abductive reasoning, commonsense QA, and procedural reasoning). Research on causal QA has been underdefined, task-specific, and limited in complexity. Recent advances in foundation language models (such as BERT, ERNIE, and T5) have shown the efficacy of pre-trained models across diverse QA tasks. However, there is limited research exploring the causal reasoning capabilities of those language models and no standard evaluation benchmark. To unify causal QA research, we propose CALM-Bench, a multi-task benchmark for evaluating causality-aware language models (CALM). We present a standardized definition of causal QA tasks and show empirically that causal reasoning can be generalized and transferred across different QA tasks. Additionally, we share a strong multi-task baseline model which outperforms single-task fine-tuned models on the CALM-Bench tasks.</abstract>
       <url hash="061c48e8">2023.findings-eacl.23</url>
@@ -349,7 +349,7 @@
       <author><first>Ali</first><last>Ahmadvand</last><affiliation>Emory University</affiliation></author>
       <author><first>Julia</first><last>Kiseleva</last><affiliation>Microsoft Research</affiliation></author>
       <author id="yang-liu-microsoft"><first>Yang</first><last>Liu</last><affiliation>Microsoft Research</affiliation></author>
-      <author><first>Ahmed Hassan</first><last>Awadallah</last><affiliation>Microsoft Research</affiliation></author>
+      <author id="ahmed-hassan"><first>Ahmed Hassan</first><last>Awadallah</last><affiliation>Microsoft Research</affiliation></author>
       <author><first>Ming</first><last>Zhong</last><affiliation>University of Illinois</affiliation></author>
       <author><first>Milad</first><last>Shokouhi</last><affiliation>Microsoft</affiliation></author>
       <pages>331-342</pages>
@@ -376,7 +376,7 @@
       <author><first>Daichi</first><last>Yamaguchi</last><affiliation>Nagoya University</affiliation></author>
       <author><first>Rei</first><last>Miyata</last><affiliation>Nagoya University</affiliation></author>
       <author><first>Sayuka</first><last>Shimada</last><affiliation>Nagoya University</affiliation></author>
-      <author><first>Satoshi</first><last>Sato</last><affiliation>Nagoya University</affiliation></author>
+      <author id="satoshi-sato"><first>Satoshi</first><last>Sato</last><affiliation>Nagoya University</affiliation></author>
       <pages>359-375</pages>
       <abstract>This study presents an analytical evaluation of neural text simplification (TS) systems. Because recent TS models are trained in an end-to-end fashion, it is difficult to grasp their abilities to perform particular simplification operations. For the advancement of TS research and development, we should understand in detail what current TS systems can and cannot perform in comparison with human performance. To that end, we first developed an analytical evaluation framework consisting of fine-grained taxonomies of simplification strategies (at both the surface and content levels) and errors. Using this framework, we annotated TS instances produced by professional human editors and multiple neural TS systems and compared the results. Our analyses concretely and quantitatively revealed a wide gap between humans and systems, specifically indicating that systems tend to perform deletions and local substitutions while excessively omitting important information, and that the systems can hardly perform information addition operations. Based on our analyses, we also provide detailed directions to address these limitations.</abstract>
       <url hash="4e918bac">2023.findings-eacl.27</url>
@@ -446,8 +446,8 @@
       <title>Few-Shot Structured Policy Learning for Multi-Domain and Multi-Task Dialogues</title>
       <author><first>Thibault</first><last>Cordier</last><affiliation>University of Avignon</affiliation></author>
       <author><first>Tanguy</first><last>Urvoy</last><affiliation>Orange</affiliation></author>
-      <author><first>Fabrice</first><last>Lefèvre</last><affiliation>Avignon Univ.</affiliation></author>
-      <author><first>Lina M.</first><last>Rojas Barahona</last><affiliation>Orange Innovation Research</affiliation></author>
+      <author id="fabrice-lefevre"><first>Fabrice</first><last>Lefèvre</last><affiliation>Avignon Univ.</affiliation></author>
+      <author id="lina-m-rojas-barahona"><first>Lina M.</first><last>Rojas Barahona</last><affiliation>Orange Innovation Research</affiliation></author>
       <pages>432-441</pages>
       <abstract>Reinforcement learning has been widely adopted to model dialogue managers in task-oriented dialogues. However, the user simulator provided by state-of-the-art dialogue frameworks are only rough approximations of human behaviour. The ability to learn from a small number of human interactions is hence crucial, especially on multi-domain and multi-task environments where the action space is large. We therefore propose to use structured policies to improve sample efficiency when learning on these kinds of environments. We also evaluate the impact of learning from human vs simulated experts. Among the different levels of structure that we tested, the graph neural networks (GNNs) show a remarkable superiority by reaching a success rate above 80% with only 50 dialogues when learning from simulated experts. They also show superiority when learning from human experts, although a performance drop was observed. We therefore suggest to concentrate future research efforts on bridging the gap between human data, simulators and automatic evaluators in dialogue frameworks.</abstract>
       <url hash="24f3c5da">2023.findings-eacl.32</url>
@@ -488,7 +488,7 @@
       <title>Paper Bullets: Modeling Propaganda with the Help of Metaphor</title>
       <author><first>Daniel</first><last>Baleato Rodríguez</last><affiliation>University of Amsterdam</affiliation></author>
       <author><first>Verna</first><last>Dankers</last><affiliation>University of Edinburgh</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <author><first>Ekaterina</first><last>Shutova</last><affiliation>University of Amsterdam</affiliation></author>
       <pages>472-489</pages>
       <abstract>Propaganda aims to persuade an audience by appealing to emotions and using faulty reasoning, with the purpose of promoting a particular point of view. Similarly, metaphor modifies the semantic frame, thus eliciting a response that can be used to tune up or down the emotional volume of the message. Given the close relationship between them, we hypothesize that, when modeling them computationally, it can be beneficial to do so jointly. In particular, we perform multi-task learning with propaganda identification as the main task and metaphor detection as an auxiliary task. To the best of our knowledge, this is the first work that models metaphor and propaganda together. We experiment with two datasets for identifying propaganda techniques in news articles and in memes shared on social media. We find that leveraging metaphor improves model performance, particularly for the two most common propaganda techniques: loaded language and name-calling.</abstract>
@@ -517,7 +517,7 @@
       <author><first>Feng</first><last>Nan</last><affiliation>Aws Ai</affiliation></author>
       <author><first>Nicholas</first><last>Dingwall</last><affiliation>Amazon AI Labs</affiliation></author>
       <author><first>William Yang</first><last>Wang</last><affiliation>Amazon AWS AI Labs</affiliation></author>
-      <author><first>Kathleen</first><last>McKeown</last><affiliation>Columbia University and Amazon (Amazon Scholar)</affiliation></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last><affiliation>Columbia University and Amazon (Amazon Scholar)</affiliation></author>
       <pages>512-525</pages>
       <abstract>Missing information is a common issue of dialogue summarization where some information in the reference summaries is not covered in the generated summaries. To address this issue, we propose to utilize natural language inference (NLI) models to improve coverage while avoiding introducing factual inconsistencies. Specifically, we use NLI to compute fine-grained training signals to encourage the model to generate content in the reference summaries that have not been covered, as well as to distinguish between factually consistent and inconsistent generated sentences. Experiments on the DialogSum and SAMSum datasets confirm the effectiveness of the proposed approach in balancing coverage and faithfulness, validated with automatic metrics and human evaluations. Additionally, we compute the correlation between commonly used automatic metrics with human judgments in terms of three different dimensions regarding coverage and factual consistency to provide insight into the most suitable metric for evaluating dialogue summaries.</abstract>
       <url hash="27c9d270">2023.findings-eacl.37</url>
@@ -616,7 +616,7 @@
       <title>Abstractive Document Summarization with Summary-length Prediction</title>
       <author><first>Jingun</first><last>Kwon</last><affiliation>Tokyo Institute of Technology, Naver Corporation</affiliation></author>
       <author><first>Hidetaka</first><last>Kamigaito</last><affiliation>Nara Institute of Science and Technology</affiliation></author>
-      <author><first>Manabu</first><last>Okumura</last><affiliation>Tokyo Institute of Technology</affiliation></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last><affiliation>Tokyo Institute of Technology</affiliation></author>
       <pages>618-624</pages>
       <abstract>Recently, we can obtain a practical abstractive document summarization model by fine-tuning a pre-trained language model (PLM). Since the pre-training for PLMs does not consider summarization-specific information such as the target summary length, there is a gap between the pre-training and fine-tuning for PLMs in summarization tasks. To fill the gap, we propose a method for enabling the model to understand the summarization-specific information by predicting the summary length in the encoder and generating a summary of the predicted length in the decoder in fine-tuning. Experimental results on the WikiHow, NYT, and CNN/DM datasets showed that our methods improve ROUGE scores from BART by generating summaries of appropriate lengths. Further, we observed about 3.0, 1,5, and 3.1 point improvements for ROUGE-1, -2, and -L, respectively, from GSum on the WikiHow dataset. Human evaluation results also showed that our methods improve the informativeness and conciseness of summaries.</abstract>
       <url hash="351039b8">2023.findings-eacl.45</url>
@@ -630,9 +630,9 @@
       <author><first>Jingun</first><last>Kwon</last><affiliation>Tokyo Institute of Technology, Naver Corporation</affiliation></author>
       <author><first>Hidetaka</first><last>Kamigaito</last><affiliation>Nara Institute of Science and Technology</affiliation></author>
       <author><first>Young-In</first><last>Song</last><affiliation>Naver</affiliation></author>
-      <author><first>Manabu</first><last>Okumura</last><affiliation>Tokyo Institute of Technology</affiliation></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last><affiliation>Tokyo Institute of Technology</affiliation></author>
       <pages>625-632</pages>
-      <abstract/>
+      <abstract></abstract>
       <url hash="2b3aef86">2023.findings-eacl.46</url>
       <bibkey>kwon-etal-2023-hierarchical</bibkey>
       <video href="2023.findings-eacl.46.mp4"/>
@@ -641,7 +641,7 @@
     <paper id="47">
       <title>Active Learning for Multilingual Semantic Parser</title>
       <author><first>Zhuang</first><last>Li</last><affiliation>Monash University</affiliation></author>
-      <author><first>Gholamreza</first><last>Haffari</last><affiliation>Monash University</affiliation></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last><affiliation>Monash University</affiliation></author>
       <pages>633-639</pages>
       <abstract>Current multilingual semantic parsing (MSP) datasets are almost all collected by translating the utterances in the existing datasets from the resource-rich language to the target language. However, manual translation is costly. To reduce the translation effort, this paper proposes the first active learning procedure for MSP (AL-MSP). AL-MSP selects only a subset from the existing datasets to be translated. We also propose a novel selection method that prioritizes the examples diversifying the logical form structures with more lexical choices, and a novel hyperparameter tuning method that needs no extra annotation cost. Our experiments show that AL-MSP significantly reduces translation costs with ideal selection methods. Our selection method with proper hyperparameters yields better parsing performance than the other baselines on two multilingual datasets.</abstract>
       <url hash="451225bf">2023.findings-eacl.47</url>
@@ -694,11 +694,11 @@
       <author><first>Simona</first><last>Frenda</last><affiliation>Università degli Studi di Torino</affiliation></author>
       <author><first>Mario</first><last>Laurent</last><affiliation>Université Paul Sabatier</affiliation></author>
       <author><first>Wolfgang</first><last>Schmeisser-Nieto</last><affiliation>Universitat de Barcelona</affiliation></author>
-      <author><first>Farah</first><last>Benamara</last><affiliation>University of toulouse</affiliation></author>
+      <author id="farah-benamara"><first>Farah</first><last>Benamara</last><affiliation>University of toulouse</affiliation></author>
       <author><first>Cristina</first><last>Bosco</last><affiliation>Dipartimento di Informatica - Università di Torino</affiliation></author>
-      <author><first>Véronique</first><last>Moriceau</last><affiliation>IRIT, Université Toulouse 3</affiliation></author>
+      <author id="veronique-moriceau"><first>Véronique</first><last>Moriceau</last><affiliation>IRIT, Université Toulouse 3</affiliation></author>
       <author><first>Viviana</first><last>Patti</last><affiliation>University of Turin, Dipartimento di Informatica</affiliation></author>
-      <author><first>Mariona</first><last>Taulé</last><affiliation>University of Barcelona</affiliation></author>
+      <author id="mariona-taule"><first>Mariona</first><last>Taulé</last><affiliation>University of Barcelona</affiliation></author>
       <pages>686-696</pages>
       <abstract>In this paper, we focus on the topics of misinformation and racial hoaxes from a perspective derived from both social psychology and computational linguistics. In particular, we consider the specific case of anti-immigrant feeling as a first case study for addressing racial stereotypes. We describe the first corpus-based study for multilingual racial stereotype identification in social media conversational threads. Our contributions are: (i) a multilingual corpus of racial hoaxes, (ii) a set of common guidelines for the annotation of racial stereotypes in social media texts, and a multi-layered, fine-grained scheme, psychologically grounded on the work by Fiske, including not only stereotype presence, but also contextuality, implicitness, and forms of discredit, (iii) a multilingual dataset in Italian, Spanish, and French annotated following the aforementioned guidelines, and cross-lingual comparative analyses taking into account racial hoaxes and stereotypes in online discussions. The analysis and results show the usefulness of our methodology and resources, shedding light on how racial hoaxes are spread, and enable the identification of negative stereotypes that reinforce them.</abstract>
       <url hash="7037ea87">2023.findings-eacl.51</url>
@@ -736,7 +736,7 @@
       <title><fixed-case>B</fixed-case>angla<fixed-case>NLG</fixed-case> and <fixed-case>B</fixed-case>angla<fixed-case>T</fixed-case>5: Benchmarks and Resources for Evaluating Low-Resource Natural Language Generation in <fixed-case>B</fixed-case>angla</title>
       <author><first>Abhik</first><last>Bhattacharjee</last><affiliation>Bangladesh University of Engineering and Technology</affiliation></author>
       <author><first>Tahmid</first><last>Hasan</last><affiliation>Bangladesh University of Engineering and Technology</affiliation></author>
-      <author><first>Wasi Uddin</first><last>Ahmad</last><affiliation>AWS AI Labs</affiliation></author>
+      <author id="wasi-ahmad"><first>Wasi Uddin</first><last>Ahmad</last><affiliation>AWS AI Labs</affiliation></author>
       <author><first>Rifat</first><last>Shahriyar</last><affiliation>Bangladesh University of Engineering and Technology</affiliation></author>
       <pages>726-735</pages>
       <abstract>This work presents ‘BanglaNLG,’ a comprehensive benchmark for evaluating natural language generation (NLG) models in Bangla, a widely spoken yet low-resource language. We aggregate six challenging conditional text generation tasks under the BanglaNLG benchmark, introducing a new dataset on dialogue generation in the process. Furthermore, using a clean corpus of 27.5 GB of Bangla data, we pretrain ‘BanglaT5’, a sequence-to-sequence Transformer language model for Bangla. BanglaT5 achieves state-of-the-art performance in all of these tasks, outperforming several multilingual models by up to 9% absolute gain and 32% relative gain. We are making the new dialogue dataset and the BanglaT5 model publicly available at <url>https://github.com/csebuetnlp/BanglaNLG</url> in the hope of advancing future research on Bangla NLG.</abstract>
@@ -748,7 +748,7 @@
     <paper id="55">
       <title>It’s about Time: Rethinking Evaluation on Rumor Detection Benchmarks using Chronological Splits</title>
       <author><first>Yida</first><last>Mu</last><affiliation>The University of Sheffield</affiliation></author>
-      <author><first>Kalina</first><last>Bontcheva</last><affiliation>University of Sheffield</affiliation></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last><affiliation>University of Sheffield</affiliation></author>
       <author><first>Nikolaos</first><last>Aletras</last><affiliation>University of Sheffield</affiliation></author>
       <pages>736-743</pages>
       <abstract>New events emerge over time influencing the topics of rumors in social media. Current rumor detection benchmarks use random splits as training, development and test sets which typically results in topical overlaps. Consequently, models trained on random splits may not perform well on rumor classification on previously unseen topics due to the temporal concept drift. In this paper, we provide a re-evaluation of classification models on four popular rumor detection benchmarks considering chronological instead of random splits. Our experimental results show that the use of random splits can significantly overestimate predictive performance across all datasets and models. Therefore, we suggest that rumor detection models should always be evaluated using chronological splits for minimizing topical overlaps.</abstract>
@@ -761,7 +761,7 @@
       <title><fixed-case>MUTANT</fixed-case>: A Multi-sentential Code-mixed <fixed-case>H</fixed-case>inglish Dataset</title>
       <author><first>Rahul</first><last>Gupta</last><affiliation>Indian Institute of Technology Gandhinagar</affiliation></author>
       <author><first>Vivek</first><last>Srivastava</last><affiliation>TCS Research</affiliation></author>
-      <author id="mayank-singh"><first>Mayank</first><last>Singh</last><affiliation>IIT Gandhinagar</affiliation></author>
+      <author><first>Mayank</first><last>Singh</last><affiliation>IIT Gandhinagar</affiliation></author>
       <pages>744-753</pages>
       <abstract>The multi-sentential long sequence textual data unfolds several interesting research directions pertaining to natural language processing and generation. Though we observe several high-quality long-sequence datasets for English and other monolingual languages, there is no significant effort in building such resources for code-mixed languages such as Hinglish (code-mixing of Hindi-English). In this paper, we propose a novel task of identifying multi-sentential code-mixed text (MCT) from multilingual articles. As a use case, we leverage multilingual articles from two different data sources and build a first-of-its-kind multi-sentential code-mixed Hinglish dataset i.e., MUTANT. We propose a token-level language-aware pipeline and extend the existing metrics measuring the degree of code-mixing to a multi-sentential framework and automatically identify MCT in the multilingual articles. The MUTANT dataset comprises 67k articles with 85k identified Hinglish MCTs. To facilitate future research directions, we will make the dataset and the code publicly available upon publication.</abstract>
       <url hash="aea13fc3">2023.findings-eacl.56</url>
@@ -814,7 +814,7 @@
       <author><first>Ryan</first><last>Prenger</last><affiliation>Nvidia</affiliation></author>
       <author><first>Mohammad</first><last>Shoeybi</last><affiliation>Nvidia</affiliation></author>
       <author><first>Pascale</first><last>Fung</last><affiliation>Hong Kong University of Science and Technology</affiliation></author>
-      <author><first>Anima</first><last>Anandkumar</last><affiliation>California Institute of Technology</affiliation></author>
+      <author id="animashree-anandkumar"><first>Anima</first><last>Anandkumar</last><affiliation>California Institute of Technology</affiliation></author>
       <author><first>Bryan</first><last>Catanzaro</last><affiliation>Nvidia</affiliation></author>
       <pages>793-808</pages>
       <abstract>Closed-book question answering (QA) requires a model to directly answer an open-domain question without access to any external knowledge. Prior work on closed-book QA either directly finetunes or prompts a pretrained language model (LM) to leverage the stored knowledge. However, they do not fully exploit the parameterized knowledge. To address this inefficiency, we propose a two-stage, closed-book QA framework which employs a coarse-to-fine approach to extract the relevant knowledge and answer a question. We first generate a related context for a given question by prompting a pretrained LM. We then prompt the same LM to generate an answer using the generated context and the question. Additionally, we marginalize over the generated contexts to improve the accuracies and reduce context uncertainty. Experimental results on three QA benchmarks show that our method significantly outperforms previous closed-book QA methods. For example on TriviaQA, our method improves exact match accuracy from 55.3% to 68.6%, and is on par with open-book QA methods (68.6% vs. 68.0%). Our results show that our new methodology is able to better exploit the stored knowledge in pretrained LMs without adding extra learnable parameters or needing finetuning, and paves the way for hybrid models that integrate pretrained LMs with external knowledge.</abstract>
@@ -828,7 +828,7 @@
       <author><first>Somin</first><last>Wadhwa</last><affiliation>Northeastern University</affiliation></author>
       <author><first>Vivek</first><last>Khetan</last><affiliation>Accenture Labs</affiliation></author>
       <author><first>Silvio</first><last>Amir</last><affiliation>Northeastern University</affiliation></author>
-      <author><first>Byron</first><last>Wallace</last><affiliation>Northeastern University</affiliation></author>
+      <author id="byron-c-wallace"><first>Byron</first><last>Wallace</last><affiliation>Northeastern University</affiliation></author>
       <pages>809-827</pages>
       <abstract>We present Reddit Health Online Talk (RedHOT), a corpus of 22,000 richly annotated social media posts from Reddit spanning 24 health conditions. Annotations include demarcations of spans corresponding to medical claims, personal experiences, and questions. We collect additional granular annotations on identified claims. Specifically, we mark snippets that describe patient Populations, Interventions, and Outcomes (PIO elements) within these. Using this corpus, we introduce the task of retrieving trustworthy evidence relevant to a given claim made on social media. We propose a new method to automatically derive (noisy) supervision for this task which we use to train a dense retrieval model; this outperforms baseline models. Manual evaluation of retrieval results performed by medical doctors indicate that while our system performance is promising, there is considerable room for improvement. We release all annotations collected (and scripts to assemble the dataset), and all code necessary to reproduce the results in this paper at: <url>https://sominw.com/redhot</url>.</abstract>
       <url hash="642a6f2e">2023.findings-eacl.61</url>
@@ -842,7 +842,7 @@
       <author><first>Jan</first><last>Hombeck</last><affiliation>University of Jena</affiliation></author>
       <author><first>Monique</first><last>Meuschke</last><affiliation>University of Jena</affiliation></author>
       <author><first>Kai</first><last>Lawonn</last><affiliation>University of Jena</affiliation></author>
-      <author><first>Sina</first><last>Zarrieß</last><affiliation>University of Bielefeld</affiliation></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last><affiliation>University of Bielefeld</affiliation></author>
       <pages>828-843</pages>
       <abstract>Existing language and vision models achieve impressive performance in image-text understanding. Yet, it is an open question to what extent they can be used for language understanding in 3D environments and whether they implicitly acquire 3D object knowledge, e.g. about different views of an object. In this paper, we investigate whether a state-of-the-art language and vision model, CLIP, is able to ground perspective descriptions of a 3D object and identify canonical views of common objects based on text queries. We present an evaluation framework that uses a circling camera around a 3D object to generate images from different viewpoints and evaluate them in terms of their similarity to natural language descriptions. We find that a pre-trained CLIP model performs poorly on most canonical views and that fine-tuning using hard negative sampling and random contrasting yields good results even under conditions with little available training data.</abstract>
       <url hash="a232eaf5">2023.findings-eacl.62</url>
@@ -853,13 +853,13 @@
     <paper id="63">
       <title><fixed-case>PLACES</fixed-case>: Prompting Language Models for Social Conversation Synthesis</title>
       <author><first>Maximillian</first><last>Chen</last><affiliation>Columbia University</affiliation></author>
-      <author><first>Alexandros</first><last>Papangelis</last><affiliation>Amazon Alexa AI</affiliation></author>
+      <author id="alexandros-papangelis"><first>Alexandros</first><last>Papangelis</last><affiliation>Amazon Alexa AI</affiliation></author>
       <author><first>Chenyang</first><last>Tao</last><affiliation>Amazon</affiliation></author>
       <author><first>Seokhwan</first><last>Kim</last><affiliation>Amazon Alexa AI</affiliation></author>
       <author><first>Andy</first><last>Rosenbaum</last><affiliation>Amazon</affiliation></author>
       <author id="yang-liu-icsi"><first>Yang</first><last>Liu</last><affiliation>Amazon</affiliation></author>
       <author><first>Zhou</first><last>Yu</last><affiliation>Columbia University</affiliation></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last><affiliation>Amazon Alexa AI</affiliation></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last><affiliation>Amazon Alexa AI</affiliation></author>
       <pages>844-868</pages>
       <abstract>Collecting high quality conversational data can be very expensive for most applications and infeasible for others due to privacy, ethical, or similar concerns. A promising direction to tackle this problem is to generate synthetic dialogues by prompting large language models. In this work, we use a small set of expert-written conversations as in-context examples to synthesize a social conversation dataset using prompting. We perform several thorough evaluations of our synthetic conversations compared to human-collected conversations. This includes various dimensions of conversation quality with human evaluation directly on the synthesized conversations, and interactive human evaluation of chatbots fine-tuned on the synthetically generated dataset. We additionally demonstrate that this prompting approach is generalizable to multi-party conversations, providing potential to create new synthetic data for multi-party tasks. Our synthetic multi-party conversations were rated more favorably across all measured dimensions compared to conversation excerpts sampled from a human-collected multi-party dataset.</abstract>
       <url hash="2de5ef99">2023.findings-eacl.63</url>
@@ -900,7 +900,7 @@
       <author><first>Rasmus</first><last>Jørgensen</last><affiliation>University of Copenhagen, UCPH</affiliation></author>
       <author><first>Oliver</first><last>Brandt</last><affiliation>Independent researcher</affiliation></author>
       <author><first>Mareike</first><last>Hartmann</last><affiliation>Saarland University</affiliation></author>
-      <author><first>Xiang</first><last>Dai</last><affiliation>CSIRO Data61</affiliation></author>
+      <author id="xiang-dai"><first>Xiang</first><last>Dai</last><affiliation>CSIRO Data61</affiliation></author>
       <author><first>Christian</first><last>Igel</last><affiliation>University of Copenhagen</affiliation></author>
       <author><first>Desmond</first><last>Elliott</last><affiliation>University of Copenhagen</affiliation></author>
       <pages>894-909</pages>
@@ -973,7 +973,7 @@
     <paper id="72">
       <title>Exploring Enhanced Code-Switched Noising for Pretraining in Neural Machine Translation</title>
       <author><first>Vivek</first><last>Iyer</last><affiliation>The University of Edinburgh</affiliation></author>
-      <author><first>Arturo</first><last>Oncevay</last><affiliation>The University of Edinburgh</affiliation></author>
+      <author id="arturo-oncevay"><first>Arturo</first><last>Oncevay</last><affiliation>The University of Edinburgh</affiliation></author>
       <author><first>Alexandra</first><last>Birch</last><affiliation>University of Edinburgh</affiliation></author>
       <pages>984-998</pages>
       <abstract>Multilingual pretraining approaches in Neural Machine Translation (NMT) have shown that training models to denoise synthetic code-switched data can yield impressive performance gains — owing to better multilingual semantic representations and transfer learning. However, they generated the synthetic code-switched data using non-contextual, one-to-one word translations obtained from lexicons - which can lead to significant noise in a variety of cases, including the poor handling of polysemes and multi-word expressions, violation of linguistic agreement and inability to scale to agglutinative languages. To overcome these limitations, we propose an approach called Contextual Code-Switching (CCS), where contextual, many-to-many word translations are generated using a ‘base’ NMT model. We conduct experiments on 3 different language families - Romance, Uralic, and Indo-Aryan - and show significant improvements (by up to 5.5 spBLEU points) over the previous lexicon-based SOTA approaches. We also observe that small CCS models can perform comparably or better than massive models like mBART50 and mRASP2, depending on the size of data provided. We empirically analyse several key factors responsible for these - including context, many-to-many substitutions, code-switching language count etc. - and prove that they all contribute to enhanced pretraining of multilingual NMT models.</abstract>
@@ -1068,7 +1068,7 @@
       <author><first>Nhut Huy</first><last>Pham</last><affiliation>FPT Software AI Center</affiliation></author>
       <author><first>Anh Tuan</first><last>Nguyen</last><affiliation>Microsoft</affiliation></author>
       <author><first>Truong Son</first><last>Hy</last><affiliation>University of California San Diego</affiliation></author>
-      <author><first>Tu</first><last>Vu</last><affiliation>University of Massachusetts Amherst</affiliation></author>
+      <author id="tu-vu"><first>Tu</first><last>Vu</last><affiliation>University of Massachusetts Amherst</affiliation></author>
       <pages>1071-1078</pages>
       <abstract>This paper presents ViDeBERTa, a new pre-trained monolingual language model for Vietnamese, with three versions - ViDeBERTa_xsmall, ViDeBERTa_base, and ViDeBERTa_large, which are pre-trained on a large-scale corpus of high-quality and diverse Vietnamese texts using DeBERTa architecture. Although many successful pre-trained language models based on Transformer have been widely proposed for the English language, there are still few pre-trained models for Vietnamese, a low-resource language, that perform good results on downstream tasks, especially Question answering. We fine-tune and evaluate our model on three important natural language downstream tasks, Part-of-speech tagging, Named-entity recognition, and Question answering. The empirical results demonstrate that ViDeBERTa with far fewer parameters surpasses the previous state-of-the-art models on multiple Vietnamese-specific natural language understanding tasks. Notably, ViDeBERTa_base with 86M parameters, which is only about 23% of PhoBERT_large with 370M parameters, still performs the same or better results than the previous state-of-the-art model. Our ViDeBERTa models are available at: <url>https://github.com/HySonLab/ViDeBERTa</url>.</abstract>
       <url hash="8cf0f7f1">2023.findings-eacl.79</url>
@@ -1080,7 +1080,7 @@
       <title><fixed-case>N</fixed-case>ap<fixed-case>SS</fixed-case>: Paragraph-level Medical Text Simplification via Narrative Prompting and Sentence-matching Summarization</title>
       <author><first>Junru</first><last>Lu</last><affiliation>University of Warwick</affiliation></author>
       <author><first>Jiazheng</first><last>Li</last><affiliation>King’s College London</affiliation></author>
-      <author><first>Byron</first><last>Wallace</last><affiliation>Northeastern University</affiliation></author>
+      <author id="byron-c-wallace"><first>Byron</first><last>Wallace</last><affiliation>Northeastern University</affiliation></author>
       <author><first>Yulan</first><last>He</last><affiliation>King’s College London</affiliation></author>
       <author><first>Gabriele</first><last>Pergola</last><affiliation>University of Warwick</affiliation></author>
       <pages>1079-1091</pages>
@@ -1181,7 +1181,7 @@
       <author><first>Xanh</first><last>Ho</last><affiliation>National Institute of Informatics</affiliation></author>
       <author><first>Anh-Khoa</first><last>Duong Nguyen</last><affiliation>N/a</affiliation></author>
       <author><first>Saku</first><last>Sugawara</last><affiliation>National Institute of Informatics</affiliation></author>
-      <author><first>Akiko</first><last>Aizawa</last><affiliation>National Institute of Informatics</affiliation></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last><affiliation>National Institute of Informatics</affiliation></author>
       <pages>1163-1180</pages>
       <abstract>To explain the predicted answers and evaluate the reasoning abilities of models, several studies have utilized underlying reasoning (UR) tasks in multi-hop question answering (QA) datasets. However, it remains an open question as to how effective UR tasks are for the QA task when training models on both tasks in an end-to-end manner. In this study, we address this question by analyzing the effectiveness of UR tasks (including both sentence-level and entity-level tasks) in three aspects: (1) QA performance, (2) reasoning shortcuts, and (3) robustness. While the previous models have not been explicitly trained on an entity-level reasoning prediction task, we build a multi-task model that performs three tasks together: sentence-level supporting facts prediction, entity-level reasoning prediction, and answer prediction. Experimental results on 2WikiMultiHopQA and HotpotQA-small datasets reveal that (1) UR tasks can improve QA performance. Using four debiased datasets that are newly created, we demonstrate that (2) UR tasks are helpful in preventing reasoning shortcuts in the multi-hop QA task. However, we find that (3) UR tasks do not contribute to improving the robustness of the model on adversarial questions, such as sub-questions and inverted questions. We encourage future studies to investigate the effectiveness of entity-level reasoning in the form of natural language questions (e.g., sub-question forms).</abstract>
       <url hash="df42e10e">2023.findings-eacl.87</url>
@@ -1205,7 +1205,7 @@
       <title>Multilingual <fixed-case>BERT</fixed-case> has an accent: Evaluating <fixed-case>E</fixed-case>nglish influences on fluency in multilingual models</title>
       <author><first>Isabel</first><last>Papadimitriou</last><affiliation>Stanford University</affiliation></author>
       <author><first>Kezia</first><last>Lopez</last><affiliation>Stanford University</affiliation></author>
-      <author><first>Dan</first><last>Jurafsky</last><affiliation>Stanford University</affiliation></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last><affiliation>Stanford University</affiliation></author>
       <pages>1194-1200</pages>
       <abstract>While multilingual language models can improve NLP performance on low-resource languages by leveraging higher-resource languages, they also reduce average performance on all languages (the ‘curse of multilinguality’). Here we show another problem with multilingual models: grammatical structures in higher-resource languages bleed into lower-resource languages, a phenomenon we call grammatical structure bias. We show this bias via a novel method for comparing the fluency of multilingual models to the fluency of monolingual Spanish and Greek models: testing their preference for two carefully-chosen variable grammatical structures (optional pronoun-drop in Spanish and optional Subject-Verb ordering in Greek). We find that multilingual BERT is biased toward the English-like setting (explicit pronouns and Subject-Verb-Object ordering) as compared to our monolingual control language model. With our case studies, we hope to bring to light the fine-grained ways in which multilingual models can be biased, and encourage more linguistically-aware fluency evaluation.</abstract>
       <url hash="4ce513fd">2023.findings-eacl.89</url>
@@ -1220,7 +1220,7 @@
       <author><first>Emanuele</first><last>Bugliarello</last><affiliation>University of Copenhagen</affiliation></author>
       <author><first>Elnaz</first><last>Davoodi</last><affiliation>DeepMind</affiliation></author>
       <author><first>Anita</first><last>Gergely</last><affiliation>DeepMind</affiliation></author>
-      <author><first>Phil</first><last>Blunsom</last><affiliation>University of Oxford</affiliation></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last><affiliation>University of Oxford</affiliation></author>
       <author><first>Aida</first><last>Nematzadeh</last><affiliation>DeepMind</affiliation></author>
       <pages>1201-1226</pages>
       <abstract>Vision-and-language (V&amp;L) models pretrained on large-scale multimodal data have demonstrated strong performance on various tasks such as image captioning and visual question answering (VQA). The quality of such models is commonly assessed by measuring their performance on unseen data that typically comes from the same distribution as the training data. However, when evaluated under out-of-distribution (out-of-dataset) settings for VQA, we observe that these models exhibit poor generalization. We comprehensively evaluate two pretrained V&amp;L models under different settings (i.e. classification and open-ended text generation) by conducting cross-dataset evaluations. We find that these models tend to learn to solve the benchmark, rather than learning the high-level skills required by the VQA task. We also find that in most cases generative models are less susceptible to shifts in data distribution compared to discriminative ones, and that multimodal pretraining is generally helpful for OOD generalization. Finally, we revisit assumptions underlying the use of automatic VQA evaluation metrics, and empirically show that their stringent nature repeatedly penalizes models for correct responses.</abstract>
@@ -1233,7 +1233,7 @@
       <title>Our kind of people? Detecting populist references in political debates</title>
       <author><first>Christopher</first><last>Klamm</last><affiliation>University of Mannheim</affiliation></author>
       <author><first>Ines</first><last>Rehbein</last><affiliation>University of Mannheim</affiliation></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last><affiliation>University of Mannheim</affiliation></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last><affiliation>University of Mannheim</affiliation></author>
       <pages>1227-1243</pages>
       <abstract>This paper investigates the identification of populist rhetoric in text and presents a novel cross-lingual dataset for this task. Our work is based on the definition of populism as a “communication style of political actors that refers to the people” but also includes anti-elitism as another core feature of populism. Accordingly, we annotate references to The People and The Elite in German and English parliamentary debates with a hierarchical scheme. The paper describes our dataset and annotation procedure and reports inter-annotator agreement for this task. Next, we compare and evaluate different transformer-based model architectures on a German dataset and report results for zero-shot learning on a smaller English dataset. We then show that semi-supervised tri-training can improve results in the cross-lingual setting. Our dataset can be used to investigate how political actors talk about The Elite and The People and to study how populist rhetoric is used as a strategic device.</abstract>
       <url hash="6abc8df2">2023.findings-eacl.91</url>
@@ -1257,8 +1257,8 @@
     <paper id="93">
       <title>Mini But Mighty: Efficient Multilingual Pretraining with Linguistically-Informed Data Selection</title>
       <author><first>Tolulope</first><last>Ogunremi</last><affiliation>Stanford University</affiliation></author>
-      <author><first>Dan</first><last>Jurafsky</last><affiliation>Stanford University</affiliation></author>
-      <author><first>Christopher</first><last>Manning</last><affiliation>Stanford University</affiliation></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last><affiliation>Stanford University</affiliation></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last><affiliation>Stanford University</affiliation></author>
       <pages>1251-1266</pages>
       <abstract>With the prominence of large pretrained language models, low-resource languages are rarely modelled monolingually and become victims of the “curse of multilinguality” in massively multilingual models. Recently, AfriBERTa showed that training transformer models from scratch on 1GB of data from many unrelated African languages outperforms massively multilingual models on downstream NLP tasks. Here we extend this direction, focusing on the use of related languages. We propose that training on smaller amounts of data but from related languages could match the performance of models trained on large, unrelated data. We test our hypothesis on the Niger-Congo family and its Bantu and Volta-Niger sub-families, pretraining models with data solely from Niger-Congo languages and finetuning on 4 downstream tasks: NER, part-of-speech tagging, sentiment analysis and text classification. We find that models trained on genetically related languages achieve equal performance on downstream tasks in low-resource languages despite using less training data. We recommend selecting training data based on language-relatedness when pretraining language models for low-resource languages.</abstract>
       <url hash="1229afc0">2023.findings-eacl.93</url>
@@ -1365,7 +1365,7 @@
       <author><first>Danilo</first><last>Silva De Carvalho</last><affiliation>University of Manchester</affiliation></author>
       <author><first>Giangiacomo</first><last>Mercatali</last><affiliation>University of Manchester</affiliation></author>
       <author><first>Yingji</first><last>Zhang</last><affiliation>The University of Manchester</affiliation></author>
-      <author><first>André</first><last>Freitas</last><affiliation>University of Manchester</affiliation></author>
+      <author id="andre-freitas"><first>André</first><last>Freitas</last><affiliation>University of Manchester</affiliation></author>
       <pages>1371-1384</pages>
       <abstract>Disentangling the encodings of neural models is a fundamental aspect for improving interpretability, semantic control and downstream task performance in Natural Language Processing. Currently, most disentanglement methods are unsupervised or rely on synthetic datasets with known generative factors. We argue that recurrent syntactic and semantic regularities in textual data can be used to provide the models with both structural biases and generative factors. We leverage the semantic structures present in a representative and semantically dense category of sentence types, definitional sentences, for training a Variational Autoencoder to learn disentangled representations. Our experimental results show that the proposed model outperforms unsupervised baselines on several qualitative and quantitative benchmarks for disentanglement, and it also improves the results in the downstream task of definition modeling.</abstract>
       <url hash="04379dee">2023.findings-eacl.101</url>
@@ -1574,7 +1574,7 @@
       <author><first>Chia-Chien</first><last>Hung</last><affiliation>University of Mannheim</affiliation></author>
       <author><first>Anne</first><last>Lauscher</last><affiliation>University of Hamburg</affiliation></author>
       <author><first>Dirk</first><last>Hovy</last><affiliation>Bocconi University</affiliation></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last><affiliation>University of Mannheim</affiliation></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last><affiliation>University of Mannheim</affiliation></author>
       <author><first>Goran</first><last>Glavaš</last><affiliation>University of Würzburg</affiliation></author>
       <pages>1565-1580</pages>
       <abstract>Demographic factors (e.g., gender or age) shape our language. Previous work showed that incorporating demographic factors can consistently improve performance for various NLP tasks with traditional NLP models. In this work, we investigate whether these previous findings still hold with state-of-the-art pretrained Transformer-based language models (PLMs). We use three common specialization methods proven effective for incorporating external knowledge into pretrained Transformers (e.g., domain-specific or geographic knowledge). We adapt the language representations for the demographic dimensions of gender and age, using continuous language modeling and dynamic multi-task learning for adaptation, where we couple language modeling objectives with the prediction of demographic classes. Our results, when employing a multilingual PLM, show substantial gains in task performance across four languages (English, German, French, and Danish), which is consistent with the results of previous work. However, controlling for confounding factors – primarily domain and language proficiency of Transformer-based PLMs – shows that downstream performance gains from our demographic adaptation do not actually stem from demographic knowledge. Our results indicate that demographic specialization of PLMs, while holding promise for positive societal impact, still represents an unsolved problem for (modern) NLP.</abstract>
@@ -1658,7 +1658,7 @@
       <author><first>Ching-Hsun</first><last>Tseng</last><affiliation>the University of Manchester</affiliation></author>
       <author><first>Jiayu</first><last>Shang</last><affiliation>University of Manchester</affiliation></author>
       <author><first>Shengzhong</first><last>Mao</last><affiliation>University of Manchester</affiliation></author>
-      <author><first>Goran</first><last>Nenadic</last><affiliation>University of Manchester</affiliation></author>
+      <author id="goran-nenadic"><first>Goran</first><last>Nenadic</last><affiliation>University of Manchester</affiliation></author>
       <author><first>Xiao-Jun</first><last>Zeng</last><affiliation>University of Manchester</affiliation></author>
       <pages>1655-1667</pages>
       <abstract>Extractive models usually formulate text summarization as extracting fixed top-k salient sentences from the document as a summary. Few works exploited extracting finer-grained Elementary Discourse Unit (EDU) with little analysis and justification for the extractive unit selection. Further, the selection strategy of the fixed top-k salient sentences fits the summarization need poorly, as the number of salient sentences in different documents varies and therefore a common or best k does not exist in reality. To fill these gaps, this paper first conducts the comparison analysis of oracle summaries based on EDUs and sentences, which provides evidence from both theoretical and experimental perspectives to justify and quantify that EDUs make summaries with higher automatic evaluation scores than sentences. Then, considering this merit of EDUs, this paper further proposes an EDU-level extractive model with Varying summary Lengths (EDU-VL) and develops the corresponding learning algorithm. EDU-VL learns to encode and predict probabilities of EDUs in the document, generate multiple candidate summaries with varying lengths based on various k values, and encode and score candidate summaries, in an end-to-end training manner. Finally, EDU-VL is experimented on single and multi-document benchmark datasets and shows improved performances on ROUGE scores in comparison with state-of-the-art extractive models, and further human evaluation suggests that EDU-constituent summaries maintain good grammaticality and readability.</abstract>
@@ -1671,7 +1671,7 @@
     <paper id="124">
       <title>“Chère maison” or “maison chère”? Transformer-based prediction of adjective placement in <fixed-case>F</fixed-case>rench</title>
       <author><first>Eleni</first><last>Metheniti</last><affiliation>CLLE-CNRS and IRIT-CNRS</affiliation></author>
-      <author><first>Tim</first><last>Van de Cruys</last><affiliation>University of Leuven</affiliation></author>
+      <author id="tim-van-de-cruys"><first>Tim</first><last>Van de Cruys</last><affiliation>University of Leuven</affiliation></author>
       <author><first>Wissam</first><last>Kerkri</last><affiliation>Clle</affiliation></author>
       <author><first>Juliette</first><last>Thuilier</last><affiliation>Université Toulouse Jean Jaurès &amp; CLLE-ERSS</affiliation></author>
       <author><first>Nabil</first><last>Hathout</last><affiliation>CLLE, CNRS &amp; Universite de Toulouse</affiliation></author>
@@ -1697,7 +1697,7 @@
       <title>Towards a Unified Model for Generating Answers and Explanations in Visual Question Answering</title>
       <author><first>Chenxi</first><last>Whitehouse</last><affiliation>City, University of London</affiliation></author>
       <author><first>Tillman</first><last>Weyde</last><affiliation>City, University of London</affiliation></author>
-      <author><first>Pranava</first><last>Madhyastha</last><affiliation>City, University of London</affiliation></author>
+      <author id="pranava-swaroop-madhyastha"><first>Pranava</first><last>Madhyastha</last><affiliation>City, University of London</affiliation></author>
       <pages>1693-1705</pages>
       <abstract>The field of visual question answering (VQA) has recently seen a surge in research focused on providing explanations for predicted answers. However, current systems mostly rely on separate models to predict answers and generate explanations, leading to less grounded and frequently inconsistent results. To address this, we propose a multitask learning approach towards a Unified Model for Answer and Explanation generation (UMAE). Our approach involves the addition of artificial prompt tokens to training data and fine-tuning a multimodal encoder-decoder model on a variety of VQA-related tasks. In our experiments, UMAE models surpass the prior state-of-the-art answer accuracy on A-OKVQA by 10 15%, show competitive results on OK-VQA, achieve new state-of-the-art explanation scores on A-OKVQA and VCR, and demonstrate promising out-of-domain performance on VQA-X.</abstract>
       <url hash="72fc7a80">2023.findings-eacl.126</url>
@@ -1749,7 +1749,7 @@
     <paper id="130">
       <title>Double Retrieval and Ranking for Accurate Question Answering</title>
       <author><first>Zeyu</first><last>Zhang</last><affiliation>School of Information, the University of Arizona</affiliation></author>
-      <author><first>Thuy</first><last>Vu</last><affiliation>Amazon</affiliation></author>
+      <author id="thuy-vu"><first>Thuy</first><last>Vu</last><affiliation>Amazon</affiliation></author>
       <author><first>Alessandro</first><last>Moschitti</last><affiliation>Amazon</affiliation></author>
       <pages>1751-1762</pages>
       <abstract>Recent work has shown that an answer verification step introduced in Transformer-based answer selection models can significantly improve the state of the art in Question Answering. This step is performed by aggregating the embeddings of top <tex-math>k</tex-math> answer candidates to support the verification of a target answer. Although the approach is intuitive and sound, it still shows two limitations: (i) the supporting candidates are ranked only according to the relevancy with the question and not with the answer, and (ii) the support provided by the other answer candidates is suboptimal as these are retrieved independently of the target answer. In this paper, we address both drawbacks by proposing (i) a double reranking model, which, for each target answer, selects the best support; and (ii) a second neural retrieval stage designed to encode question and answer pair as the query, which finds more specific verification information. The results on well-known datasets for Answer Sentence Selection show significant improvement over the state of the art.</abstract>
@@ -1762,7 +1762,7 @@
       <title>Evaluating the Diversity, Equity, and Inclusion of <fixed-case>NLP</fixed-case> Technology: A Case Study for <fixed-case>I</fixed-case>ndian Languages</title>
       <author><first>Simran</first><last>Khanuja</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Sebastian</first><last>Ruder</last><affiliation>Google</affiliation></author>
-      <author><first>Partha</first><last>Talukdar</last><affiliation>Google Research and IISc</affiliation></author>
+      <author id="partha-talukdar"><first>Partha</first><last>Talukdar</last><affiliation>Google Research and IISc</affiliation></author>
       <pages>1763-1777</pages>
       <abstract>In order for NLP technology to be widely applicable, fair, and useful, it needs to serve a diverse set of speakers across the world’s languages, be equitable, i.e., not unduly biased towards any particular language, and be inclusive of all users, particularly in low-resource settings where compute constraints are common. In this paper, we propose an evaluation paradigm that assesses NLP technologies across all three dimensions. While diversity and inclusion have received attention in recent literature, equity is currently unexplored. We propose to address this gap using the Gini coefficient, a well-established metric used for estimating societal wealth inequality. Using our paradigm, we highlight the distressed state of current technologies for Indian (IN) languages (a linguistically large and diverse set, with a varied speaker population), across all three dimensions. To improve upon these metrics, we demonstrate the importance of region-specific choices in model building and dataset creation, and more importantly, propose a novel, generalisable approach to optimal resource allocation during fine-tuning. Finally, we discuss steps to mitigate these biases and encourage the community to employ multi-faceted evaluation when building linguistically diverse and equitable technologies.</abstract>
       <url hash="5b4910ba">2023.findings-eacl.131</url>
@@ -1827,7 +1827,7 @@
     </paper>
     <paper id="136">
       <title>Best Practices in the Creation and Use of Emotion Lexicons</title>
-      <author><first>Saif</first><last>Mohammad</last><affiliation>Nrc</affiliation></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last><affiliation>Nrc</affiliation></author>
       <pages>1825-1836</pages>
       <abstract>Words play a central role in how we express ourselves. Lexicons of word–emotion associations are widely used in research and real-world applications for sentiment analysis, tracking emotions associated with products and policies, studying health disorders, tracking emotional arcs of stories, and so on. However, inappropriate and incorrect use of these lexicons can lead to not just sub-optimal results, but also inferences that are directly harmful to people. This paper brings together ideas from Affective Computing and AI Ethics to present, some of the practical and ethical considerations involved in the creation and use of emotion lexicons – best practices. The goal is to provide a comprehensive set of relevant considerations, so that readers (especially those new to work with emotions) can find relevant information in one place. We hope this work will facilitate more thoughtfulness when one is deciding on what emotions to work on, how to create an emotion lexicon, how to use an emotion lexicon, how to draw meaningful inferences, and how to judge success.</abstract>
       <url hash="d5a03e51">2023.findings-eacl.136</url>
@@ -1840,7 +1840,7 @@
       <author><first>Hossein</first><last>Rajaby Faghihi</last><affiliation>Michigan State University</affiliation></author>
       <author><first>Parisa</first><last>Kordjamshidi</last><affiliation>Michigan State University</affiliation></author>
       <author><first>Choh Man</first><last>Teng</last><affiliation>Institute for Human and Machine Cognition</affiliation></author>
-      <author><first>James</first><last>Allen</last><affiliation>University of Rochester</affiliation></author>
+      <author id="james-allen"><first>James</first><last>Allen</last><affiliation>University of Rochester</affiliation></author>
       <pages>1837-1849</pages>
       <abstract>In this paper, we investigate whether symbolic semantic representations, extracted from deep semantic parsers, can help reasoning over the states of involved entities in a procedural text. We consider a deep semantic parser (TRIPS) and semantic role labeling as two sources of semantic parsing knowledge. First, we propose PROPOLIS, a symbolic parsing-based procedural reasoning framework. Second, we integrate semantic parsing information into state-of-the-art neural models to conduct procedural reasoning. Our experiments indicate that explicitly incorporating such semantic knowledge improves procedural understanding. This paper presents new metrics for evaluating procedural reasoning tasks that clarify the challenges and identify differences among neural, symbolic, and integrated models.</abstract>
       <url hash="c5d442f0">2023.findings-eacl.137</url>
@@ -1963,7 +1963,7 @@
       <title>Trained on 100 million words and still in shape: <fixed-case>BERT</fixed-case> meets <fixed-case>B</fixed-case>ritish <fixed-case>N</fixed-case>ational <fixed-case>C</fixed-case>orpus</title>
       <author><first>David</first><last>Samuel</last><affiliation>University of Oslo, Language Technology Group</affiliation></author>
       <author><first>Andrey</first><last>Kutuzov</last><affiliation>University of Oslo</affiliation></author>
-      <author><first>Lilja</first><last>Øvrelid</last><affiliation>Dept of Informatics, University of Oslo</affiliation></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last><affiliation>Dept of Informatics, University of Oslo</affiliation></author>
       <author><first>Erik</first><last>Velldal</last><affiliation>University of Oslo</affiliation></author>
       <pages>1954-1974</pages>
       <abstract>While modern masked language models (LMs) are trained on ever larger corpora, we here explore the effects of down-scaling training to a modestly-sized but representative, well-balanced, and publicly available English text source – the British National Corpus. We show that pre-training on this carefully curated corpus can reach better performance than the original BERT model. We argue that this type of corpora has great potential as a language modeling benchmark. To showcase this potential, we present fair, reproducible and data-efficient comparative studies of LMs, in which we evaluate several training objectives and model architectures and replicate previous empirical results in a systematic way. We propose an optimized LM architecture called LTG-BERT.</abstract>
@@ -1975,7 +1975,7 @@
     <paper id="147">
       <title>Generating Synthetic Speech from <fixed-case>S</fixed-case>poken<fixed-case>V</fixed-case>ocab for Speech Translation</title>
       <author><first>Jinming</first><last>Zhao</last><affiliation>Dept of Data Science and AI, Faculty of IT, Monash University</affiliation></author>
-      <author><first>Gholamreza</first><last>Haffari</last><affiliation>Monash University</affiliation></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last><affiliation>Monash University</affiliation></author>
       <author><first>Ehsan</first><last>Shareghi</last><affiliation>Monash University</affiliation></author>
       <pages>1975-1981</pages>
       <abstract>Training end-to-end speech translation (ST) systems requires sufficiently large-scale data, which is unavailable for most language pairs and domains. One practical solution to the data scarcity issue is to convert text-based machine translation (MT) data to ST data via text-to-speech (TTS) systems. Yet, using TTS systems can be tedious and slow. In this work, we propose SpokenVocab, a simple, scalable and effective data augmentation technique to convert MT data to ST data on-the-fly. The idea is to retrieve and stitch audio snippets, corresponding to words in an MT sentence, from a spoken vocabulary bank. Our experiments on multiple language pairs show that stitched speech helps to improve translation quality by an average of 1.83 BLEU score, while performing equally well as TTS-generated speech in improving translation quality. We also showcase how SpokenVocab can be applied in code-switching ST for which often no TTS systems exit.</abstract>
@@ -2017,7 +2017,7 @@
     <paper id="150">
       <title>Selective-<fixed-case>LAMA</fixed-case>: Selective Prediction for Confidence-Aware Evaluation of Language Models</title>
       <author><first>Hiyori</first><last>Yoshikawa</last><affiliation>Fujitsu Limited</affiliation></author>
-      <author><first>Naoaki</first><last>Okazaki</last><affiliation>Tokyo Institute of Technology</affiliation></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last><affiliation>Tokyo Institute of Technology</affiliation></author>
       <pages>2017-2028</pages>
       <abstract>Recent studies have suggested that neural language models learn and store a large amount of facts and commonsense knowledge from training data. The ability of language models to restore such knowledge is often evaluated via zero-shot cloze-style QA tasks. However, such evaluations rely only on prediction accuracy without punishing the systems for their mistakes, e.g., simply guessing or hallucinating likely answers. Selective prediction is a more informative evaluation framework that takes the confidence of predictions into account. Under the selective prediction setting, a model is evaluated not only by the number of correct predictions, but also by the ability to filter out dubious predictions by estimating the confidence of individual predictions. Such confidence-aware evaluation is crucial for determining whether to trust zero-shot predictions of language models. In this paper, we apply the selective prediction setting to an existing benchmark, LAMA probe, and conduct extensive experiments with recent neural language models and different confidence functions. We empirically show that our Selective-LAMA evaluation is more robust to the effect of simple guesses than the conventional accuracy-based evaluation. Our evaluation reveals the importance of the choice of confidence functions by showing that simply relying on token probabilities is not always the best choice. Further analysis shows that various confidence functions exhibit different preferences over predicted tokens for a given context.</abstract>
       <url hash="270b8411">2023.findings-eacl.150</url>
@@ -2032,7 +2032,7 @@
       <author><first>Liang</first><last>Ma</last><affiliation>Dataminr</affiliation></author>
       <author><first>Di</first><last>Lu</last><affiliation>Dataminr</affiliation></author>
       <author><first>Robert L</first><last>Logan IV</last><affiliation>Dataminr</affiliation></author>
-      <author><first>Joel</first><last>Tetreault</last><affiliation>Dataminr</affiliation></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last><affiliation>Dataminr</affiliation></author>
       <author><first>Alejandro</first><last>Jaimes</last><affiliation>Dataminr</affiliation></author>
       <pages>2029-2047</pages>
       <abstract>In this paper, we present MuFaSSa (Multi-view Faithfulness Scoring via Source Ablation), a metric for evaluating faithfulness of abstractive summaries, and for guiding training of more faithful summarizers. For evaluation, MuFaSSa employs different strategies (e.g., masking entity mentions) to first remove information from the source document to form multiple ablated views. Then, the faithfulness level of each token in a generated summary is measured by the difference between the token generation probabilities when given the original document and the ablated document as inputs to trained summarizers. For training, MuFaSSa uses a novel word truncation objective that drops unfaithful tokens located by MuFaSSa in both the decoder input and output. Alignments with human-annotated faithfulness labels on AggreFact show that MuFaSSa is comparable to or better than existing metrics built on classifiers or QA models pre-trained on other tasks. In experiments on summarization with XSum and CNN/DailyMail, models trained with word truncation using MuFaSSa outperform competitive methods according to both automatic faithfulness metrics and human assessments.</abstract>
@@ -2056,8 +2056,8 @@
     <paper id="153">
       <title><fixed-case>A</fixed-case>dapter<fixed-case>S</fixed-case>oup: Weight Averaging to Improve Generalization of Pretrained Language Models</title>
       <author><first>Alexandra</first><last>Chronopoulou</last><affiliation>LMU Munich</affiliation></author>
-      <author><first>Matthew</first><last>Peters</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
-      <author><first>Alexander</first><last>Fraser</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
+      <author id="matthew-e-peters"><first>Matthew</first><last>Peters</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <author><first>Jesse</first><last>Dodge</last><affiliation>Allen Institute for AI</affiliation></author>
       <pages>2054-2063</pages>
       <abstract>Pretrained language models (PLMs) are trained on massive corpora, but often need to specialize to specific domains. A parameter-efficient adaptation method suggests training an adapter for each domain on the task of language modeling. This leads to good in-domain scores but can be impractical for domain- or resource-restricted settings. A solution is to use a related-domain adapter for the novel domain at test time. In this paper, we introduce AdapterSoup, an approach that performs weight-space averaging of adapters trained on different domains. Our approach is embarrassingly parallel: first, we train a set of domain-specific adapters; then, for each novel domain, we determine which adapters should be averaged at test time. We present extensive experiments showing that AdapterSoup consistently improves performance to new domains without extra training. We also explore weight averaging of adapters trained on the same domain with different hyper-parameters, and show that it preserves the performance of a PLM on new domains while obtaining strong in-domain results. We explore various approaches for choosing which adapters to combine, such as text clustering and semantic similarity. We find that using clustering leads to the most competitive results on novel domains.</abstract>
@@ -2212,7 +2212,7 @@
     <paper id="165">
       <title>Grammatical Error Correction through Round-Trip Machine Translation</title>
       <author><first>Yova</first><last>Kementchedjhieva</last><affiliation>University of Copenhagen</affiliation></author>
-      <author><first>Anders</first><last>Søgaard</last><affiliation>University of Copenhagen</affiliation></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last><affiliation>University of Copenhagen</affiliation></author>
       <pages>2208-2215</pages>
       <abstract>Machine translation (MT) operates on the premise of an interlingua which abstracts away from surface form while preserving meaning. A decade ago the idea of using round-trip MT to guide grammatical error correction was proposed as a way to abstract away from potential errors in surface forms (Madnani et al., 2012). At the time, it did not pan out due to the low quality of MT systems of the day. Today much stronger MT systems are available so we re-evaluate this idea across five languages and models of various sizes. We find that for extra large models input augmentation through round-trip MT has little to no effect. For more ‘workable’ model sizes, however, it yields consistent improvements, sometimes bringing the performance of a <i>base</i> or <i>large</i> model up to that of a <i>large</i> or <i>xl</i> model, respectively. The round-trip translation comes at a computational cost though, so one would have to determine whether to opt for a larger model or for input augmentation on a case-by-case basis.</abstract>
       <url hash="2d237fc0">2023.findings-eacl.165</url>
@@ -2240,7 +2240,7 @@
       <author><first>Payal</first><last>Bajaj</last><affiliation>Microsoft</affiliation></author>
       <author><first>Vishrav</first><last>Chaudhary</last><affiliation>Microsoft</affiliation></author>
       <author><first>Kate</first><last>Cook</last><affiliation>Microsoft</affiliation></author>
-      <author><first>Sandipan</first><last>Dandapat</last><affiliation>Microsoft India</affiliation></author>
+      <author id="sandipan-dandapat"><first>Sandipan</first><last>Dandapat</last><affiliation>Microsoft India</affiliation></author>
       <author><first>Sunayana</first><last>Sitaram</last><affiliation>Microsoft Research India</affiliation></author>
       <author><first>Monojit</first><last>Choudhury</last><affiliation>Microsoft</affiliation></author>
       <pages>2226-2242</pages>
@@ -2277,7 +2277,7 @@
       <author><first>Lingzhi</first><last>Wang</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <author><first>Mrinmaya</first><last>Sachan</last><affiliation>ETH Zurich</affiliation></author>
       <author><first>Xingshan</first><last>Zeng</last><affiliation>Huawei Noah’s Ark Lab</affiliation></author>
-      <author><first>Kam-Fai</first><last>Wong</last><affiliation>Department of Systems Engineering and Engineering Management, The Chinese University of Hong Kong, Hong Kong</affiliation></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last><affiliation>Department of Systems Engineering and Engineering Management, The Chinese University of Hong Kong, Hong Kong</affiliation></author>
       <pages>2268-2274</pages>
       <abstract>Conversational tutoring systems (CTSs) aim to help students master educational material with natural language interaction in the form of a dialog. CTSs have become a key pillar in educational data mining research. A key challenge in CTSs is to engage the student in the conversation while exposing them to a diverse set of teaching strategies, akin to a human teacher, thereby, helping them learn in the process. Different from previous work that generates responses given the strategies as input, we propose to jointly predict teaching strategies and generate tutor responses accordingly, which fits a more realistic application scenario. We benchmark several competitive models on three dialog tutoring datasets and propose a unified framework that combines teaching response generation and pedagogical strategy prediction, where a self-distillation mechanism is adopted to guide the teaching strategy learning and facilitate tutor response generation. Our experiments and analyses shed light on how teaching strategies affect dialog tutoring.</abstract>
       <url hash="3f366ebe">2023.findings-eacl.170</url>
@@ -2330,7 +2330,7 @@
     <paper id="174">
       <title>Are the Best Multilingual Document Embeddings simply Based on Sentence Embeddings?</title>
       <author><first>Sonal</first><last>Sannigrahi</last><affiliation>Saarland University</affiliation></author>
-      <author><first>Josef</first><last>van Genabith</last><affiliation>Dfki</affiliation></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last><affiliation>Dfki</affiliation></author>
       <author><first>Cristina</first><last>España-Bonet</last><affiliation>DFKI GmbH</affiliation></author>
       <pages>2306-2316</pages>
       <abstract>Dense vector representations for textual data are crucial in modern NLP. Word embeddings and sentence embeddings estimated from raw texts are key in achieving state-of-the-art resultsin various tasks requiring semantic understanding. However, obtaining embeddings at the document level is challenging due to computational requirements and lack of appropriate data. Instead, most approaches fall back on computing document embeddings based on sentence representations. Although there exist architectures and models to encode documents fully, they are in general limited to English and few other high-resourced languages. In this work, we provide a systematic comparison of methods to produce document-level representations from sentences based on LASER, LaBSE, and Sentence BERT pre-trained multilingual models. We compare input token number truncation, sentence averaging as well as some simple windowing and in some cases new augmented and learnable approaches, on 3 multi- and cross-lingual tasks in 8 languages belonging to 3 different language families. Our task-based extrinsic evaluations show that, independently of the language, a clever combination of sentence embeddings is usually better than encoding the full document as a single unit, even when this is possible. We demonstrate that while a simple sentence average results in a strong baseline for classification tasks, more complex combinations are necessary for semantic tasks</abstract>
@@ -2423,7 +2423,7 @@
       <title>Transformers with Learnable Activation Functions</title>
       <author><first>Haishuo</first><last>Fang</last><affiliation>UKP Lab, TU Darmstadt</affiliation></author>
       <author><first>Ji-Ung</first><last>Lee</last><affiliation>UKP, TU Darmstadt</affiliation></author>
-      <author><first>Nafise Sadat</first><last>Moosavi</last><affiliation>Department of Computer Science, The University of Sheffield</affiliation></author>
+      <author id="nafise-sadat-moosavi"><first>Nafise Sadat</first><last>Moosavi</last><affiliation>Department of Computer Science, The University of Sheffield</affiliation></author>
       <author><first>Iryna</first><last>Gurevych</last><affiliation>UKP Lab, Technische Universität Darmstadt</affiliation></author>
       <pages>2382-2398</pages>
       <abstract>Activation functions can have a significant impact on reducing the topological complexity of input data and therefore, improving a model’s performance. However, the choice of activation functions is seldom discussed or explored in Transformer-based language models. As a common practice, commonly used activation functions like Gaussian Error Linear Unit (GELU) are chosen beforehand and then remain fixed from pre-training to fine-tuning. In this paper, we investigate the impact of activation functions on Transformer-based models by utilizing rational activation functions (RAFs). In contrast to fixed activation functions (FAF), RAFs are capable of learning the optimal activation functions from data. Our experiments show that the RAF-based Transformer model (RAFT) achieves a better performance than its FAF-based counterpart (). For instance, we find that RAFT outperforms on the GLUE benchmark by 5.71 points when using only 100 training examples and by 2.05 points on SQuAD with all available data. Analyzing the shapes of the learned RAFs further unveils that they vary across different layers and different tasks; opening a promising way to better analyze and understand large, pre-trained language models.</abstract>
@@ -2445,7 +2445,7 @@
     <paper id="183">
       <title>Reliable Gradient-free and Likelihood-free Prompt Tuning</title>
       <author><first>Maohao</first><last>Shen</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
-      <author><first>Soumya</first><last>Ghosh</last><affiliation>IBM Research</affiliation></author>
+      <author id="soumya-sankar-ghosh"><first>Soumya</first><last>Ghosh</last><affiliation>IBM Research</affiliation></author>
       <author><first>Prasanna</first><last>Sattigeri</last><affiliation>IBM Research</affiliation></author>
       <author><first>Subhro</first><last>Das</last><affiliation>MIT-IBM Watson AI Lab</affiliation></author>
       <author><first>Yuheng</first><last>Bu</last><affiliation>University of Florida</affiliation></author>
@@ -2516,7 +2516,7 @@
       <author><first>Julia</first><last>Rozanova</last><affiliation>University of Manchester</affiliation></author>
       <author><first>Marco</first><last>Valentino</last><affiliation>Idiap Research Institute</affiliation></author>
       <author><first>Lucas</first><last>Cordeiro</last><affiliation>University of Manchester</affiliation></author>
-      <author><first>André</first><last>Freitas</last><affiliation>University of Manchester</affiliation></author>
+      <author id="andre-freitas"><first>André</first><last>Freitas</last><affiliation>University of Manchester</affiliation></author>
       <pages>2489-2500</pages>
       <abstract>Probing strategies have been shown to detect the presence of various linguistic features in large language models; in particular, semantic features intermediate to the “natural logic” fragment of the Natural Language Inference task (NLI). In the case of natural logic, the relation between the intermediate features and the entailment label is explicitly known: as such, this provides a ripe setting for <i>interventional</i> studies on the NLI models’ representations, allowing for stronger causal conjectures and a deeper critical analysis of interventional probing methods. In this work, we carry out new and existing representation-level interventions to investigate the effect of these semantic features on NLI classification: we perform <i>amnesic</i> probing (which removes features as directed by learned linear probes) and introduce the <i>mnestic</i> probing variation (which forgets all dimensions <i>except</i> the probe-selected ones). Furthermore, we delve into the limitations of these methods and outline some pitfalls have been obscuring the effectivity of interventional probing studies.</abstract>
       <url hash="9ee97fe8">2023.findings-eacl.188</url>
@@ -2566,7 +2566,7 @@
       <author><first>Duccio</first><last>Pappadopulo</last><affiliation>Bloomberg LP</affiliation></author>
       <author><first>Anant</first><last>Gupta</last><affiliation>Bloomberg.net</affiliation></author>
       <author><first>Leslie</first><last>Huang</last><affiliation>Bloomberg LP</affiliation></author>
-      <author><first>Ozan</first><last>Irsoy</last><affiliation>Bloomberg</affiliation></author>
+      <author id="ozan-irsoy"><first>Ozan</first><last>Irsoy</last><affiliation>Bloomberg</affiliation></author>
       <author><first>Thamar</first><last>Solorio</last><affiliation>University of Houston</affiliation></author>
       <pages>2539-2549</pages>
       <abstract>Driven by encouraging results on a wide range of tasks, the field of NLP is experiencing an accelerated race to develop bigger language models. This race for bigger models has also underscored the need to continue the pursuit of practical distillation approaches that can leverage the knowledge acquired by these big models in a compute-efficient manner. Having this goal in mind, we build on recent work to propose a hallucination-free framework for sequence tagging that is especially suited for distillation. We show empirical results of new state-of-the-art performance across multiple sequence labelling datasets and validate the usefulness of this framework for distilling a large model in a few-shot learning scenario.</abstract>
@@ -2578,7 +2578,7 @@
     <paper id="193">
       <title>Predicting Desirable Revisions of Evidence and Reasoning in Argumentative Writing</title>
       <author><first>Tazin</first><last>Afrin</last><affiliation>Educational Testing Service</affiliation></author>
-      <author><first>Diane</first><last>Litman</last><affiliation>University of Pittsburgh</affiliation></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last><affiliation>University of Pittsburgh</affiliation></author>
       <pages>2550-2561</pages>
       <abstract>We develop models to classify desirable evidence and desirable reasoning revisions in student argumentative writing. We explore two ways to improve classifier performance – using the essay context of the revision, and using the feedback students received before the revision. We perform both intrinsic and extrinsic evaluation for each of our models and report a qualitative analysis. Our results show that while a model using feedback information improves over a baseline model, models utilizing context - either alone or with feedback - are the most successful in identifying desirable revisions.</abstract>
       <url hash="7662042c">2023.findings-eacl.193</url>
@@ -2648,7 +2648,7 @@
       <booktitle>Findings of the Association for Computational Linguistics: ACL 2023</booktitle>
       <editor><first>Anna</first><last>Rogers</last></editor>
       <editor><first>Jordan</first><last>Boyd-Graber</last></editor>
-      <editor><first>Naoaki</first><last>Okazaki</last></editor>
+      <editor id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Toronto, Canada</address>
       <month>July</month>
@@ -2737,7 +2737,7 @@
       <author><first>Khang</first><last>Lam</last><affiliation>Can Tho University</affiliation></author>
       <author><first>Thieu</first><last>Doan</last><affiliation>Can Tho University</affiliation></author>
       <author><first>Khang</first><last>Pham</last><affiliation>Duy Tan University</affiliation></author>
-      <author><first>Jugal</first><last>Kalita</last><affiliation>University of Colorado</affiliation></author>
+      <author id="jugal-kalita"><first>Jugal</first><last>Kalita</last><affiliation>University of Colorado</affiliation></author>
       <pages>92-99</pages>
       <abstract>Summary sentences produced by abstractive summarization models may be coherent and comprehensive, but they lack control and rely heavily on reference summaries. The BRIO training paradigm assumes a non-deterministic distribution to reduce the model’s dependence on reference summaries, and improve model performance during inference. This paper presents a straightforward but effective technique to improve abstractive summaries by fine-tuning pre-trained language models, and training them with the BRIO paradigm. We build a text summarization dataset for Vietnamese, called VieSum. We perform experiments with abstractive summarization models trained with the BRIO paradigm on the CNNDM and the VieSum datasets. The results show that the models, trained on basic hardware, outperform all existing abstractive summarization models, especially for Vietnamese.</abstract>
       <url hash="ffed3bbb">2023.findings-acl.7</url>
@@ -2750,7 +2750,7 @@
       <author><first>Rui</first><last>Zheng</last><affiliation>Fudan University</affiliation></author>
       <author><first>Tao</first><last>Gui</last><affiliation>fudan university</affiliation></author>
       <author><first>Qi</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>100-113</pages>
       <abstract>Models trained with empirical risk minimization (ERM) are revealed to easily rely on spurious correlations, resulting in poor generalization. Group distributionally robust optimization (group DRO) can alleviate this problem by minimizing the worst-case loss over pre-defined groups. While promising, in practice factors like expensive annotations and privacy preclude the availability of group labels. More crucially, when taking a closer look at the failure modes of out-of-distribution generalization, the typical procedure of reweighting in group DRO loses efficiency. Hinged on the limitations, in this work, we reformulate the group DRO framework by proposing Q-Diversity. Characterized by an interactive training mode, Q-Diversity relaxes the group identification from annotation into direct parameterization. Furthermore, a novel mixing strategy across groups is presented to diversify the under-represented groups. In a series of experiments on both synthetic and real-world text classification tasks, results demonstrate that Q-Diversity can consistently improve worst-case accuracy under different distributional shifts, outperforming state-of-the-art alternatives.</abstract>
       <url hash="7d02f983">2023.findings-acl.8</url>
@@ -2895,9 +2895,9 @@
     </paper>
     <paper id="19">
       <title>The Larger they are, the Harder they Fail: Language Models do not Recognize Identifier Swaps in Python</title>
-      <author><first>Antonio Valerio</first><last>Miceli Barone</last><affiliation>The University of Edinburgh</affiliation></author>
+      <author id="antonio-valerio-miceli-barone"><first>Antonio Valerio</first><last>Miceli Barone</last><affiliation>The University of Edinburgh</affiliation></author>
       <author><first>Fazl</first><last>Barez</last><affiliation>The University of Edinburgh</affiliation></author>
-      <author><first>Shay B.</first><last>Cohen</last><affiliation>University of Edinburgh</affiliation></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last><affiliation>University of Edinburgh</affiliation></author>
       <author><first>Ioannis</first><last>Konstas</last><affiliation>Heriot-Watt University</affiliation></author>
       <pages>272-292</pages>
       <abstract>Large Language Models (LLMs) have successfully been applied to code generation tasks, raising the question of how well these models understand programming. Typical programming languages have invariances and equivariances in their semantics that human programmers intuitively understand and exploit, such as the (near) invariance to the renaming of identifiers. We show that LLMs not only fail to properly generate correct Python code when default function names are swapped, but some of them even become more confident in their incorrect predictions as the model size increases, an instance of the recently discovered phenomenon of Inverse Scaling, which runs contrary to the commonly observed trend of increasing prediction quality with increasing model size. Our findings indicate that, despite their astonishing typical-case performance, LLMs still lack a deep, abstract understanding of the content they manipulate, making them unsuitable for tasks that statistically deviate from their training data, and that mere scaling is not enough to achieve such capability.</abstract>
@@ -2911,7 +2911,7 @@
       <author><first>Qingbin</first><last>Liu</last><affiliation>Tencent</affiliation></author>
       <author><first>Yanchao</first><last>Hao</last><affiliation>Tencent</affiliation></author>
       <author><first>Xiaolong</first><last>Liu</last><affiliation>Tencent</affiliation></author>
-      <author id="bo-li"><first>Bo</first><last>Li</last><affiliation>Tencent</affiliation></author>
+      <author><first>Bo</first><last>Li</last><affiliation>Tencent</affiliation></author>
       <author><first>Dianbo</first><last>Sui</last><affiliation>Institute of Automation, Chinese Academy of Sciences</affiliation></author>
       <author><first>Shizhu</first><last>He</last><affiliation>Institute of Automation, Chinese Academy of Sciences</affiliation></author>
       <author><first>Kang</first><last>Liu</last><affiliation>Institute of Automation, Chinese Academy of Sciences</affiliation></author>
@@ -2941,7 +2941,7 @@
       <author><first>Terry Yue</first><last>Zhuo</last><affiliation>CSIRO’s Data61 and Monash University</affiliation></author>
       <author><first>Qiongkai</first><last>Xu</last><affiliation>The University of Melbourne</affiliation></author>
       <author><first>Xuanli</first><last>He</last><affiliation>University College London</affiliation></author>
-      <author><first>Trevor</first><last>Cohn</last><affiliation>University of Melbourne</affiliation></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last><affiliation>University of Melbourne</affiliation></author>
       <pages>319-337</pages>
       <abstract>Automatic evaluation methods for translation often require model training, and thus the availability of parallel corpora limits their applicability to low-resource settings. Round-trip translation is a potential workaround, which can reframe bilingual evaluation into a much simpler monolingual task. Early results from the era of statistical machine translation (SMT) raised fundamental concerns about the utility of this approach, based on poor correlation with human translation quality judgments. In this paper, we revisit this technique with modern neural translation (NMT) and show that round-trip translation does allow for accurate automatic evaluation without the need for reference translations. These opposite findings can be explained through the copy mechanism in SMT that is absent in NMT. We demonstrate that round-trip translation benefits multiple machine translation evaluation tasks: i) predicting forward translation scores; ii) improving the performance of a quality estimation model; and iii) identifying adversarial competitors in shared tasks via cross-system verification.</abstract>
       <url hash="2f44d0e5">2023.findings-acl.22</url>
@@ -2990,7 +2990,7 @@
       <author><first>Naveen</first><last>Badathala</last><affiliation>Indian Institute of Technology Bombay</affiliation></author>
       <author><first>Abisek</first><last>Rajakumar Kalarani</last><affiliation>Indian Institute of Technology Bombay</affiliation></author>
       <author><first>Tejpalsingh</first><last>Siledar</last><affiliation>Indian Institute of Technology, Bombay</affiliation></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology Bombay and Patna</affiliation></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology Bombay and Patna</affiliation></author>
       <pages>388-401</pages>
       <abstract>Hyperbole and metaphor are common in day-to-day communication (e.g., “I am in deep trouble”: how does trouble have depth?), which makes their detection important, especially in a conversational AI setting. Existing approaches to automatically detect metaphor and hyperbole have studied these language phenomena independently, but their relationship has hardly, if ever, been explored computationally. In this paper, we propose a multi-task deep learning framework to detect hyperbole and metaphor simultaneously. We hypothesize that metaphors help in hyperbole detection, and vice-versa. To test this hypothesis, we annotate two hyperbole datasets- HYPO and HYPO-L- with metaphor labels. Simultaneously, we annotate two metaphor datasets- TroFi and LCC- with hyperbole labels. Experiments using these datasets give an improvement of the state of the art of hyperbole detection by 12%. Additionally, our multi-task learning (MTL) approach shows an improvement of up to 17% over single-task learning (STL) for both hyperbole and metaphor detection, supporting our hypothesis. To the best of our knowledge, ours is the first demonstration of computational leveraging of linguistic intimacy between metaphor and hyperbole, leading to showing the superiority of MTL over STL for hyperbole and metaphor detection.</abstract>
       <url hash="bb6513d6">2023.findings-acl.26</url>
@@ -3018,7 +3018,7 @@
       <author><first>Zhigang</first><last>Kan</last><affiliation>National University of Defense Technology</affiliation></author>
       <author><first>Yi</first><last>Han</last><affiliation>College of Meteorology and Oceanography, National University of Defense Technology</affiliation></author>
       <author><first>Linbo</first><last>Qiao</last><affiliation>National University of Defense Technology</affiliation></author>
-      <author id="dongsheng-li"><first>Dongsheng</first><last>Li</last><affiliation>National University of Defense Technology</affiliation></author>
+      <author><first>Dongsheng</first><last>Li</last><affiliation>National University of Defense Technology</affiliation></author>
       <pages>417-430</pages>
       <abstract>Temporal knowledge graph completion that predicts missing links for incomplete temporal knowledge graphs (TKG) is gaining increasing attention. Most existing works have achieved good results by incorporating time information into static knowledge graph embedding methods. However, they ignore the contextual nature of the TKG structure, i.e., query-specific subgraph contains both structural and temporal neighboring facts. This paper presents the SToKE, a novel method that employs the pre-trained language model (PLM) to learn joint Structural and Temporal Contextualized Knowledge Embeddings.Specifically, we first construct an event evolution tree (EET) for each query to enable PLMs to handle the TKG, which can be seen as a structured event sequence recording query-relevant structural and temporal contexts. We then propose a novel temporal embedding and structural matrix to learn the time information and structural dependencies of facts in EET.Finally, we formulate TKG completion as a mask prediction problem by masking the missing entity of the query to fine-tune pre-trained language models. Experimental results on three widely used datasets show the superiority of our model.</abstract>
       <url hash="59e7aafe">2023.findings-acl.28</url>
@@ -3032,7 +3032,7 @@
       <author><first>M Saiful</first><last>Bari</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Mizanur</first><last>Rahman</last><affiliation>York University</affiliation></author>
       <author><first>Md Amran Hossen</first><last>Bhuiyan</last><affiliation>York University</affiliation></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University; Salesforce AI Research</affiliation></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University; Salesforce AI Research</affiliation></author>
       <author><first>Jimmy</first><last>Huang</last><affiliation>School of Information Technology, York University</affiliation></author>
       <pages>431-469</pages>
       <abstract>The development of large language models (LLMs) such as ChatGPT has brought a lot of attention recently. However, their evaluation in the benchmark academic datasets remains under-explored due to the difficulty of evaluating the generative outputs produced by this model against the ground truth. In this paper, we aim to present a thorough evaluation of ChatGPT’s performance on diverse academic datasets, covering tasks like question-answering, text summarization, code generation, commonsense reasoning, mathematical problem-solving, machine translation, bias detection, and ethical considerations. Specifically, we evaluate ChatGPT across 140 tasks and analyze 255K responses it generates in these datasets. This makes our work the largest evaluation of ChatGPT in NLP benchmarks. In short, our study aims to validate the strengths and weaknesses of ChatGPT in various tasks and provide insights for future research using LLMs. We also report a new emergent ability to follow multi-query instructions that we mostly found in ChatGPT and other instruction-tuned models. Our extensive evaluation shows that even though ChatGPT is capable of performing a wide variety of tasks, and may obtain impressive performance in several benchmark datasets, it is still far from achieving the ability to reliably solve many challenging tasks. By providing a thorough assessment of ChatGPT’s performance across diverse NLP tasks, this paper sets the stage for a targeted deployment of ChatGPT-like LLMs in real-world applications.</abstract>
@@ -3047,7 +3047,7 @@
       <author><first>Shiqi</first><last>Wang</last><affiliation>Sun Yat-sen University</affiliation></author>
       <author><first>Libin</first><last>Zheng</last><affiliation>Sun Yat-sen University</affiliation></author>
       <author><first>Qinliang</first><last>Su</last><affiliation>Sun Yat-sen University</affiliation></author>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last><affiliation>Sun Yat-sen University</affiliation></author>
+      <author><first>Wei</first><last>Liu</last><affiliation>Sun Yat-sen University</affiliation></author>
       <author><first>Baoquan</first><last>Zhao</last><affiliation>Sun Yat-sen University</affiliation></author>
       <author><first>Jian</first><last>Yin</last><affiliation>Sun Yat-Sen University</affiliation></author>
       <pages>470-486</pages>
@@ -3221,7 +3221,7 @@
     <paper id="43">
       <title>Exploring the Compositional Generalization in Context Dependent Text-to-<fixed-case>SQL</fixed-case> Parsing</title>
       <author><first>Aiwei</first><last>Liu</last><affiliation>School of Software, Tsinghua University</affiliation></author>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last><affiliation>School of Software, Tsinghua University</affiliation></author>
+      <author><first>Wei</first><last>Liu</last><affiliation>School of Software, Tsinghua University</affiliation></author>
       <author><first>Xuming</first><last>Hu</last><affiliation>School of Software, Tsinghua University</affiliation></author>
       <author><first>Shuang</first><last>Li</last><affiliation>School of Software, Tsinghua University</affiliation></author>
       <author><first>Fukun</first><last>Ma</last><affiliation>School of Software,Tsinghua University</affiliation></author>
@@ -3238,7 +3238,7 @@
       <author><first>John</first><last>Murzaku</last><affiliation>Stony Brook University</affiliation></author>
       <author><first>Tyler</first><last>Osborne</last><affiliation>Boston College</affiliation></author>
       <author><first>Amittai</first><last>Aviram</last><affiliation>Boston College</affiliation></author>
-      <author><first>Owen</first><last>Rambow</last><affiliation>Stony Brook University</affiliation></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last><affiliation>Stony Brook University</affiliation></author>
       <pages>701-715</pages>
       <abstract>We present a novel end-to-end generative task and system for predicting event factuality holders, targets, and their associated factuality values. We perform the first experiments using all sources and targets of factuality statements from the FactBank corpus. We perform multi-task learning with other tasks and event-factuality corpora to improve on the FactBank source and target task. We argue that careful domain specific target text output format in generative systems is important and verify this with multiple experiments on target text output structure. We redo previous state-of-the-art author-only event factuality experiments and also offer insights towards a generative paradigm for the author-only event factuality prediction task.</abstract>
       <url hash="28b07017">2023.findings-acl.44</url>
@@ -3262,9 +3262,9 @@
       <title>The Web Can Be Your Oyster for Improving Language Models</title>
       <author><first>Junyi</first><last>Li</last><affiliation>Gaoling School of Artificial Intelligence, Renmin University of China</affiliation></author>
       <author><first>Tianyi</first><last>Tang</last><affiliation>Renmin University of China</affiliation></author>
-      <author><first>Wayne Xin</first><last>Zhao</last><affiliation>RUC</affiliation></author>
+      <author id="wayne-xin-zhao"><first>Wayne Xin</first><last>Zhao</last><affiliation>RUC</affiliation></author>
       <author><first>Jingyuan</first><last>Wang</last><affiliation>Beihang University</affiliation></author>
-      <author><first>Jian-Yun</first><last>Nie</last><affiliation>University of Montreal</affiliation></author>
+      <author id="jian-yun-nie"><first>Jian-Yun</first><last>Nie</last><affiliation>University of Montreal</affiliation></author>
       <author><first>Ji-Rong</first><last>Wen</last><affiliation>Renmin University of China</affiliation></author>
       <pages>728-746</pages>
       <abstract>Pretrained language models (PLMs) encode a large amount of world knowledge. However, as such knowledge is frozen at the time of model training, the models become static and limited by the training data at that time. In order to further improve the capacity of PLMs for knowledge-intensive tasks, we consider augmenting PLMs with the large-scale web using search engine. Unlike previous augmentation sources (e.g., Wikipedia data dump), the web provides broader, more comprehensive and constantly updated information. In this paper, we present a web-augmented PLM – UniWeb, which is trained over 16 knowledge-intensive tasks in a unified text-to-text format. Instead of simply using the retrieved contents from web, our approach has made two major improvements. Firstly, we propose an adaptive search engine assisted learning method that can self-evaluate the confidence level of PLM’s predictions, and adaptively determine when to refer to the web for more data, which can avoid useless or noisy augmentation from web. Secondly, we design a pretraining task, i.e., continual knowledge learning, based on salient spans prediction, to reduce the discrepancy between the encoded and retrieved knowledge. Experiments on a wide range of knowledge-intensive tasks show that our model significantly outperforms previous retrieval-augmented methods.</abstract>
@@ -3284,14 +3284,14 @@
     </paper>
     <paper id="48">
       <title>Overcoming Catastrophic Forgetting in Massively Multilingual Continual Learning</title>
-      <author><first>Genta</first><last>Winata</last><affiliation>Bloomberg</affiliation></author>
+      <author id="genta-indra-winata"><first>Genta</first><last>Winata</last><affiliation>Bloomberg</affiliation></author>
       <author><first>Lingjue</first><last>Xie</last><affiliation>Bloomberg</affiliation></author>
       <author><first>Karthik</first><last>Radhakrishnan</last><affiliation>Bloomberg LP</affiliation></author>
       <author><first>Shijie</first><last>Wu</last><affiliation>Bloomberg L.P.</affiliation></author>
       <author><first>Xisen</first><last>Jin</last><affiliation>University of Southern California</affiliation></author>
       <author><first>Pengxiang</first><last>Cheng</last><affiliation>Bloomberg LP</affiliation></author>
       <author><first>Mayank</first><last>Kulkarni</last><affiliation>Amazon</affiliation></author>
-      <author><first>Daniel</first><last>Preotiuc-Pietro</last><affiliation>Bloomberg</affiliation></author>
+      <author id="daniel-preotiuc-pietro"><first>Daniel</first><last>Preotiuc-Pietro</last><affiliation>Bloomberg</affiliation></author>
       <pages>768-777</pages>
       <abstract>Real-life multilingual systems should be able to efficiently incorporate new languages as data distributions fed to the system evolve and shift over time. To do this, systems need to handle the issue of catastrophic forgetting, where the model performance drops for languages or tasks seen further in its past. In this paper, we study catastrophic forgetting, as well as methods to minimize this, in a massively multilingual continual learning framework involving up to 51 languages and covering both classification and sequence labeling tasks. We present LR ADJUST, a learning rate scheduling method that is simple, yet effective in preserving new information without strongly overwriting past knowledge. Furthermore, we show that this method is effective across multiple continual learning approaches. Finally, we provide further insights into the dynamics of catastrophic forgetting in this massively multilingual setup.</abstract>
       <url hash="09d803b0">2023.findings-acl.48</url>
@@ -3306,7 +3306,7 @@
       <author><first>Haoxuan</first><last>You</last><affiliation>Columbia University</affiliation></author>
       <author><first>Noel</first><last>Codella</last><affiliation>Microsoft</affiliation></author>
       <author><first>Kai-Wei</first><last>Chang</last><affiliation>UCLA</affiliation></author>
-      <author><first>Shih-Fu</first><last>Chang</last><affiliation>Columbia University</affiliation></author>
+      <author id="shih-fu-chang"><first>Shih-Fu</first><last>Chang</last><affiliation>Columbia University</affiliation></author>
       <pages>778-793</pages>
       <abstract>Vision-language tasks, such as VQA, SNLI-VE, and VCR are challenging because they require the model’s reasoning ability to understand the semantics of the visual world and natural language. Supervised methods working for vision-language tasks have been well-studied. However, solving these tasks in a zero-shot setting is less explored. Since Contrastive Language-Image Pre-training (CLIP) has shown remarkable zero-shot performance on image-text matching, previous works utilized its strong zero-shot ability by converting vision-language tasks into an image-text matching problem, and they mainly consider global-level matching (e.g., the whole image or sentence). However, we find visual and textual fine-grained information, e.g., keywords in the sentence and objects in the image, can be fairly informative for semantics understanding. Inspired by this, we propose a unified framework to take advantage of the fine-grained information for zero-shot vision-language learning, covering multiple tasks such as VQA, SNLI-VE, and VCR. Our experiments show that our framework outperforms former zero-shot methods on VQA and achieves substantial improvement on SNLI-VE and VCR. Furthermore, our ablation studies confirm the effectiveness and generalizability of our proposed method.</abstract>
       <url hash="0431c4d6">2023.findings-acl.49</url>
@@ -3400,7 +3400,7 @@
       <author><first>Yutong</first><last>Hu</last><affiliation>Peking University</affiliation></author>
       <author><first>Yansong</first><last>Feng</last><affiliation>Peking University</affiliation></author>
       <author><first>Tianyi</first><last>Li</last><affiliation>University of Edinburgh</affiliation></author>
-      <author><first>Mark</first><last>Steedman</last><affiliation>University of Edinburgh</affiliation></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last><affiliation>University of Edinburgh</affiliation></author>
       <author><first>Dongyan</first><last>Zhao</last><affiliation>pku.edu.cn</affiliation></author>
       <pages>881-894</pages>
       <abstract>Entailment graphs (EGs) with predicates as nodes and entailment relations as edges are typically incomplete, while EGs in different languages are often complementary to each other. In this paper, we propose a new task, multilingual entailment graph enhancement, which aims to utilize the entailment information from one EG to enhance another EG in a different language. The ultimate goal is to obtain an enhanced EG containing richer and more accurate entailment information. We present an align-then-enhance framework (ATE) to achieve accurate multilingual entailment graph enhancement, which first exploits a cross-graph guided interaction mechanism to automatically discover potential equivalent predicates between different EGs and then constructs more accurate enhanced entailment graphs based on soft predicate alignments. Extensive experiments show that ATE achieves better and more robust predicate alignment results between different EGs, and the enhanced entailment graphs generated by ATE outperform the original graphs for entailment detection.</abstract>
@@ -3425,7 +3425,7 @@
     </paper>
     <paper id="58">
       <title>Structured Mean-Field Variational Inference for Higher-Order Span-Based Semantic Role Labeling</title>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last><affiliation>ShanghaiTech University</affiliation></author>
+      <author><first>Wei</first><last>Liu</last><affiliation>ShanghaiTech University</affiliation></author>
       <author><first>Songlin</first><last>Yang</last><affiliation>ShanghaiTech University</affiliation></author>
       <author><first>Kewei</first><last>Tu</last><affiliation>ShanghaiTech University</affiliation></author>
       <pages>918-931</pages>
@@ -3453,7 +3453,7 @@
       <title>The Dangers of trusting Stochastic Parrots: Faithfulness and Trust in Open-domain Conversational Question Answering</title>
       <author><first>Sabrina</first><last>Chiesurin</last><affiliation>Alana AI</affiliation></author>
       <author><first>Dimitris</first><last>Dimakopoulos</last><affiliation>Alana AI</affiliation></author>
-      <author><first>Marco Antonio</first><last>Sobrevilla Cabezudo</last><affiliation>University of São Paulo</affiliation></author>
+      <author id="marco-antonio-sobrevilla-cabezudo"><first>Marco Antonio</first><last>Sobrevilla Cabezudo</last><affiliation>University of São Paulo</affiliation></author>
       <author><first>Arash</first><last>Eshghi</last><affiliation>Heriot-Watt University</affiliation></author>
       <author><first>Ioannis</first><last>Papaioannou</last><affiliation>Alana AI</affiliation></author>
       <author><first>Verena</first><last>Rieser</last><affiliation>Heriot-Watt University</affiliation></author>
@@ -3469,7 +3469,7 @@
       <author><first>Sukmin</first><last>Cho</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
       <author><first>Soyeong</first><last>Jeong</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
       <author><first>Jeong yeon</first><last>Seo</last><affiliation>KAIST</affiliation></author>
-      <author><first>Jong</first><last>Park</last><affiliation>KAIST</affiliation></author>
+      <author id="jong-c-park"><first>Jong</first><last>Park</last><affiliation>KAIST</affiliation></author>
       <pages>960-971</pages>
       <abstract>Re-rankers, which order retrieved documents with respect to the relevance score on the given query, have gained attention for the information retrieval (IR) task. Rather than fine-tuning the pre-trained language model (PLM), the large-scale language model (LLM) is utilized as a zero-shot re-ranker with excellent results. While LLM is highly dependent on the prompts, the impact and the optimization of the prompts for the zero-shot re-ranker are not explored yet. Along with highlighting the impact of optimization on the zero-shot re-ranker, we propose a novel discrete prompt optimization method, Constrained Prompt generation (Co-Prompt), with the metric estimating the optimum for re-ranking. Co-Prompt guides the generated texts from PLM toward optimal prompts based on the metric without parameter update. The experimental results demonstrate that Co-Prompt leads to outstanding re-ranking performance against the baselines. Also, Co-Prompt generates more interpretable prompts for humans against other prompt optimization methods.</abstract>
       <url hash="a3da31f9">2023.findings-acl.61</url>
@@ -3480,7 +3480,7 @@
     <paper id="62">
       <title>Triggering Multi-Hop Reasoning for Question Answering in Language Models using Soft Prompts and Random Walks</title>
       <author><first>Kanishka</first><last>Misra</last><affiliation>Purdue University</affiliation></author>
-      <author><first>Cicero</first><last>Nogueira dos Santos</last><affiliation>Google Research</affiliation></author>
+      <author id="cicero-dos-santos"><first>Cicero</first><last>Nogueira dos Santos</last><affiliation>Google Research</affiliation></author>
       <author><first>Siamak</first><last>Shakeri</last><affiliation>Google.com</affiliation></author>
       <pages>972-985</pages>
       <abstract>Despite readily memorizing world knowledge about entities, pre-trained language models (LMs) struggle to compose together two or more facts to perform multi-hop reasoning in question-answering tasks. In this work, we propose techniques that improve upon this limitation by relying on random-walks over structured knowledge graphs. Specifically, we use soft-prompts to guide LMs to chain together their encoded knowledge by learning to map multi-hop questions to random-walk paths that lead to the answer. Applying our methods on two T5 LMs shows substantial improvements over standard tuning approaches in answering questions that require multi-hop reasoning.</abstract>
@@ -3599,10 +3599,10 @@
       <author><first>Jungo</first><last>Kasai</last><affiliation>University of Washington</affiliation></author>
       <author><first>Yizhong</first><last>Wang</last><affiliation>University of Washington</affiliation></author>
       <author><first>Yushi</first><last>Hu</last><affiliation>University of Washington</affiliation></author>
-      <author><first>Mari</first><last>Ostendorf</last><affiliation>University of Washington</affiliation></author>
-      <author><first>Wen-tau</first><last>Yih</last><affiliation>Meta AI - FAIR</affiliation></author>
-      <author><first>Noah A.</first><last>Smith</last><affiliation>University of Washington</affiliation></author>
-      <author><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington; Meta</affiliation></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last><affiliation>University of Washington</affiliation></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last><affiliation>Meta AI - FAIR</affiliation></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last><affiliation>University of Washington</affiliation></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington; Meta</affiliation></author>
       <author><first>Tao</first><last>Yu</last><affiliation>University of Hong Kong</affiliation></author>
       <pages>1102-1121</pages>
       <abstract>We introduce INSTRUCTOR, a new method for computing text embeddings given task instructions: every text input is embedded together with instructions explaining the use case (e.g., task and domain descriptions). Unlike encoders from prior work that are more specialized, INSTRUCTOR is a single embedder that can generate text embeddings tailored to different downstream tasks and domains, without any further training. We first annotate instructions for 330 diverse tasks and train INSTRUCTOR on this multitask mixture with a contrastive loss. We evaluate INSTRUCTOR on 70 embedding evaluation tasks (66 of which are unseen during training), ranging from classification and information retrieval to semantic textual similarity and text generation evaluation. INSTRUCTOR, while having an order of magnitude fewer parameters than the previous best model, achieves state-of-the-art performance, with an average improvement of 3.4% compared to the previous best results on the 70 diverse datasets. Our analysis suggests that INSTRUCTOR is robust to changes in instructions, and that instruction finetuning mitigates the challenge of training a single model on diverse datasets. Our model, code, and data are available at <url>https://instructor-embedding.github.io</url>.</abstract>
@@ -3710,7 +3710,7 @@
       <author><first>Fuli</first><last>Feng</last><affiliation>University of Science and Technology of China</affiliation></author>
       <author><first>Hanwang</first><last>Zhang</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Qifan</first><last>Wang</last><affiliation>Meta AI</affiliation></author>
-      <author><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
       <pages>1220-1236</pages>
       <abstract>Machine Reading Comprehension (MRC) models easily learn spurious correlations from complex contexts such as tabular data. Counterfactual training—using the factual and counterfactual data by augmentation—has become a promising solution. However, it is costly to construct faithful counterfactual examples because it is tricky to maintain the consistency and dependency of the tabular data. In this paper, we take a more efficient fashion to ask <b>hypothetical questions</b> like <i>“in which year would the net profit be larger if the revenue in 2019 were $38,298?”</i>, whose effects on the answers are equivalent to those expensive counterfactual tables. We propose a hypothetical training framework that uses paired examples with different hypothetical questions to supervise the direction of model gradient towards the counterfactual answer change. The superior generalization results on tabular MRC datasets, including a newly constructed stress test and MultiHiertt, validate our effectiveness.</abstract>
       <url hash="d8b6bce9">2023.findings-acl.79</url>
@@ -3735,7 +3735,7 @@
       <title>Risks and <fixed-case>NLP</fixed-case> Design: A Case Study on Procedural Document <fixed-case>QA</fixed-case></title>
       <author><first>Nikita</first><last>Haduong</last><affiliation>University of Washington</affiliation></author>
       <author><first>Alice</first><last>Gao</last><affiliation>University of Washington</affiliation></author>
-      <author><first>Noah A.</first><last>Smith</last><affiliation>University of Washington</affiliation></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last><affiliation>University of Washington</affiliation></author>
       <pages>1248-1269</pages>
       <abstract>As NLP systems are increasingly deployed at scale, concerns about their potential negative impacts have attracted the attention of the research community, yet discussions of risk have mostly been at an abstract level and focused on generic AI or NLP applications. We argue that clearer assessments of risks and harms to users—and concrete strategies to mitigate them—will be possible when we specialize the analysis to more concrete applications and their plausible users. As an illustration, this paper is grounded in cooking recipe procedural document question answering (ProcDocQA), where there are well-defined risks to users such as injuries or allergic reactions. Our case study shows that an existing language model, applied in “zero-shot” mode, quantitatively answers real-world questions about recipes as well or better than the humans who have answered the questions on the web. Using a novel questionnaire informed by theoretical work on AI risk, we conduct a risk-oriented error analysis that could then inform the design of a future system to be deployed with lower risk of harm and better performance.</abstract>
       <url hash="b9807d1c">2023.findings-acl.81</url>
@@ -3762,7 +3762,7 @@
       <author><first>Raymond</first><last>Zhang</last><affiliation>Stanford University</affiliation></author>
       <author><first>Neha Nayak</first><last>Kennard</last><affiliation>University of Massachusetts Amherst</affiliation></author>
       <author><first>Daniel</first><last>Smith</last><affiliation>Stanford University</affiliation></author>
-      <author><first>Daniel</first><last>McFarland</last><affiliation>Stanford University</affiliation></author>
+      <author id="dan-mcfarland"><first>Daniel</first><last>McFarland</last><affiliation>Stanford University</affiliation></author>
       <author><first>Andrew</first><last>McCallum</last><affiliation>UMass Amherst</affiliation></author>
       <author><first>Katherine</first><last>Keith</last><affiliation>Williams College</affiliation></author>
       <pages>1284-1297</pages>
@@ -3790,7 +3790,7 @@
       <author><first>Long</first><last>Chen</last><affiliation>Hong Kong University of Science and Technology</affiliation></author>
       <author id="chris-thomas"><first>Christopher</first><last>Thomas</last><affiliation>Columbia University</affiliation></author>
       <author><first>Heng</first><last>Ji</last><affiliation>University of Illinois at Urbana-Champaign and Amazon (Amazon Scholar)</affiliation></author>
-      <author><first>Shih-Fu</first><last>Chang</last><affiliation>Columbia University</affiliation></author>
+      <author id="shih-fu-chang"><first>Shih-Fu</first><last>Chang</last><affiliation>Columbia University</affiliation></author>
       <pages>1314-1326</pages>
       <abstract>Building cross-model intelligence that can understand charts and communicate the salient information hidden behind them is an appealing challenge in the vision and language (V+L) community. The capability to uncover the underlined table data of chart figures is a critical key to automatic chart understanding. We introduce ChartT5, a V+L model that learns how to interpret table information from chart images via cross-modal pre-training on plot table pairs. Specifically, we propose two novel pre-training objectives: Masked Header Prediction (MHP) and Masked Value Prediction (MVP) to facilitate the model with different skills to interpret the table information. We have conducted extensive experiments on chart question answering and chart summarization to verify the effectiveness of the proposed pre-training strategies. In particular, on the ChartQA benchmark, our ChartT5 outperforms the state-of-the-art non-pretraining methods by over 8% performance gains.</abstract>
       <url hash="c9e290d0">2023.findings-acl.85</url>
@@ -3847,7 +3847,7 @@
       <author><first>Aiwei</first><last>Liu</last><affiliation>School of Software, Tsinghua University</affiliation></author>
       <author><first>Yawen</first><last>Yang</last><affiliation>School of Software, Tsinghua University</affiliation></author>
       <author><first>Fukun</first><last>Ma</last><affiliation>School of Software,Tsinghua University</affiliation></author>
-      <author><first>Philip S.</first><last>Yu</last><affiliation>University of Illinois at Chicago</affiliation></author>
+      <author id="philip-s-yu"><first>Philip S.</first><last>Yu</last><affiliation>University of Illinois at Chicago</affiliation></author>
       <author><first>Lijie</first><last>Wen</last><affiliation>School of Software, Tsinghua University</affiliation></author>
       <pages>1361-1374</pages>
       <abstract>Cross-lingual natural language inference is a fundamental problem in cross-lingual language understanding. Many recent works have used prompt learning to address the lack of annotated parallel corpora in XNLI.However, these methods adopt discrete prompting by simply translating the templates to the target language and need external expert knowledge to design the templates. Besides, discrete prompts of human-designed template words are not trainable vectors and can not be migrated to target languages in the inference stage flexibly. In this paper, we propose a novel Soft prompt learning framework with the Multilingual Verbalizer (SoftMV) for XNLI. SoftMV first constructs cloze-style question with soft prompts for the input sample. Then we leverage bilingual dictionaries to generate an augmented multilingual question for the original question. SoftMV adopts a multilingual verbalizer to align the representations of original and augmented multilingual questions into a unified semantic space with consistency regularization. Experimental results on XNLI demonstrate that SoftMV can achieve state-of-the-art performance and significantly outperform the previous methods under the few-shot and full-shot cross-lingual transfer settings.</abstract>
@@ -3864,7 +3864,7 @@
       <author><first>Yuanbin</first><last>Wu</last><affiliation>East China Normal University</affiliation></author>
       <author><first>Qi</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Tao</first><last>Gui</last><affiliation>fudan university</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Jin</first><last>Ma</last><affiliation>ustc</affiliation></author>
       <author><first>Ying</first><last>Shan</last><affiliation>Tencent</affiliation></author>
       <pages>1375-1386</pages>
@@ -3878,7 +3878,7 @@
       <author><first>Xinchao</first><last>Xu</last><affiliation>Baidu</affiliation></author>
       <author><first>Zeyang</first><last>Lei</last><affiliation>Baidu Inc.</affiliation></author>
       <author><first>Wenquan</first><last>Wu</last><affiliation>Baidu</affiliation></author>
-      <author><first>Zheng-Yu</first><last>Niu</last><affiliation>Baidu Inc.</affiliation></author>
+      <author id="zheng-yu-niu"><first>Zheng-Yu</first><last>Niu</last><affiliation>Baidu Inc.</affiliation></author>
       <author><first>Hua</first><last>Wu</last><affiliation>Baidu</affiliation></author>
       <author><first>Haifeng</first><last>Wang</last><affiliation>Baidu</affiliation></author>
       <pages>1387-1398</pages>
@@ -3984,7 +3984,7 @@
       <title>Prompt- and Trait Relation-aware Cross-prompt Essay Trait Scoring</title>
       <author><first>Heejin</first><last>Do</last><affiliation>Pohang University of Science and Technology</affiliation></author>
       <author><first>Yunsu</first><last>Kim</last><affiliation>POSTECH</affiliation></author>
-      <author><first>Gary Geunbae</first><last>Lee</last><affiliation>Pohang University of Science and Technology (POSTECH)</affiliation></author>
+      <author id="gary-geunbae-lee"><first>Gary Geunbae</first><last>Lee</last><affiliation>Pohang University of Science and Technology (POSTECH)</affiliation></author>
       <pages>1538-1551</pages>
       <abstract>Automated essay scoring (AES) aims to score essays written for a given prompt, which defines the writing topic. Most existing AES systems assume to grade essays of the same prompt as used in training and assign only a holistic score. However, such settings conflict with real-education situations; pre-graded essays for a particular prompt are lacking, and detailed trait scores of sub-rubrics are required. Thus, predicting various trait scores of unseen-prompt essays (called cross-prompt essay trait scoring) is a remaining challenge of AES. In this paper, we propose a robust model: prompt- and trait relation-aware cross-prompt essay trait scorer. We encode prompt-aware essay representation by essay-prompt attention and utilizing the topic-coherence feature extracted by the topic-modeling mechanism without access to labeled data; therefore, our model considers the prompt adherence of an essay, even in a cross-prompt setting. To facilitate multi-trait scoring, we design trait-similarity loss that encapsulates the correlations of traits. Experiments prove the efficacy of our model, showing state-of-the-art results for all prompts and traits. Significant improvements in low-resource-prompt and inferior traits further indicate our model’s strength.</abstract>
       <url hash="fda94d32">2023.findings-acl.98</url>
@@ -4009,7 +4009,7 @@
       <title><tex-math>2*n</tex-math> is better than <tex-math>n^2</tex-math>: Decomposing Event Coreference Resolution into Two Tractable Problems</title>
       <author><first>Shafiuddin Rehan</first><last>Ahmed</last><affiliation>University of Colorado Boulder</affiliation></author>
       <author><first>Abhijnan</first><last>Nath</last><affiliation>Colorado State University</affiliation></author>
-      <author><first>James H.</first><last>Martin</last><affiliation>University of Colorado Boulder</affiliation></author>
+      <author id="james-h-martin"><first>James H.</first><last>Martin</last><affiliation>University of Colorado Boulder</affiliation></author>
       <author><first>Nikhil</first><last>Krishnaswamy</last><affiliation>Colorado State University</affiliation></author>
       <pages>1569-1583</pages>
       <abstract>Event Coreference Resolution (ECR) is the task of linking mentions of the same event either within or across documents. Most mention pairs are not coreferent, yet many that are coreferent can be identified through simple techniques such as lemma matching of the event triggers or the sentences in which they appear. Existing methods for training coreference systems sample from a largely skewed distribution, making it difficult for the algorithm to learn coreference beyond surface matching. Additionally, these methods are intractable because of the quadratic operations needed. To address these challenges, we break the problem of ECR into two parts: a) a heuristic to efficiently filter out a large number of non-coreferent pairs, and b) a training approach on a balanced set of coreferent and non-coreferent mention pairs. By following this approach, we show that we get comparable results to the state of the art on two popular ECR datasets while significantly reducing compute requirements. We also analyze the mention pairs that are “hard” to accurately classify as coreferent or non-coreferentcode repo: <url>github.com/ahmeshaf/lemma_ce_coref</url>.</abstract>
@@ -4025,9 +4025,9 @@
       <author><first>Jiacheng</first><last>Zhu</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Mengdi</first><last>Xu</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Franck</first><last>Dernoncourt</last><affiliation>Adobe Research</affiliation></author>
-      <author><first>Trung</first><last>Bui</last><affiliation>Adobe Research</affiliation></author>
+      <author id="trung-bui"><first>Trung</first><last>Bui</last><affiliation>Adobe Research</affiliation></author>
       <author><first>Zhaowen</first><last>Wang</last><affiliation>Adobe Inc.</affiliation></author>
-      <author id="bo-li"><first>Bo</first><last>Li</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
+      <author><first>Bo</first><last>Li</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
       <author><first>Ding</first><last>Zhao</last><affiliation>CMU</affiliation></author>
       <author><first>Hailin</first><last>Jin</last><affiliation>Adobe Research</affiliation></author>
       <pages>1584-1601</pages>
@@ -4096,7 +4096,7 @@
       <author><first>Cong-Duy</first><last>Nguyen</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Zhen</first><last>Hai</last><affiliation>Data Analytics Department, Institute for Infocomm Research (I2R), A*STAR, Singapore</affiliation></author>
       <author><first>Lidong</first><last>Bing</last><affiliation>Alibaba DAMO Academy</affiliation></author>
-      <author><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University, Singapore</affiliation></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University, Singapore</affiliation></author>
       <pages>1670-1696</pages>
       <abstract>Multimodal Review Helpfulness Prediction (MRHP) aims to rank product reviews based on predicted helpfulness scores and has been widely applied in e-commerce via presenting customers with useful reviews. Previous studies commonly employ fully-connected neural networks (FCNNs) as the final score predictor and pairwise loss as the training objective. However, FCNNs have been shown to perform inefficient splitting for review features, making the model difficult to clearly differentiate helpful from unhelpful reviews. Furthermore, pairwise objective, which works on review pairs, may not completely capture the MRHP goal to produce the ranking for the entire review list, and possibly induces low generalization during testing. To address these issues, we propose a listwise attention network that clearly captures the MRHP ranking context and a listwise optimization objective that enhances model generalization. We further propose gradient-boosted decision tree as the score predictor to efficaciously partition product reviews’ representations. Extensive experiments demonstrate that our method achieves state-of-the-art results and polished generalization performance on two large-scale MRHP benchmark datasets.</abstract>
       <url hash="7f62b029">2023.findings-acl.106</url>
@@ -4147,7 +4147,7 @@
       <author><first>Wenxuan</first><last>Zhang</last><affiliation>DAMO Academy, Alibaba Group</affiliation></author>
       <author><first>Guizhen</first><last>Chen</last><affiliation>Nanyang Technological University at Singapore, Alibaba DAMO Academy</affiliation></author>
       <author><first>Xiaobao</first><last>Wu</last><affiliation>Nanyang Technological University</affiliation></author>
-      <author><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University, Singapore</affiliation></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University, Singapore</affiliation></author>
       <author><first>Chip Hong</first><last>Chang</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Lidong</first><last>Bing</last><affiliation>Alibaba DAMO Academy</affiliation></author>
       <pages>1743-1761</pages>
@@ -4165,8 +4165,8 @@
       <author><first>Aaron</first><last>Halfaker</last><affiliation>Microsoft</affiliation></author>
       <author><first>Liqun</first><last>Shao</last><affiliation>Microsoft</affiliation></author>
       <author><first>Daniel</first><last>McDuff</last><affiliation>Google</affiliation></author>
-      <author><first>Ahmed Hassan</first><last>Awadallah</last><affiliation>Microsoft Research</affiliation></author>
-      <author><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
+      <author id="ahmed-hassan"><first>Ahmed Hassan</first><last>Awadallah</last><affiliation>Microsoft Research</affiliation></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
       <author><first>Jianfeng</first><last>Gao</last><affiliation>Microsoft Research, Redmond</affiliation></author>
       <pages>1762-1773</pages>
       <abstract>Natural language contains rich logical structures and logical information, and correctly detecting and accurately understanding these logical structures and information underlying natural language texts is very crucial for NLP models’ performance on many important NLU and NLG tasks. Existing pre-trained language models based on the transformer architecture mostly adopt a classical design for constructing their input embeddings that ignores the logical structures underlying natural language texts, thus limiting their ability to better capture and encode key logical information in the input sequences. To overcome such limitations, in this paper we first propose a novel approach to construct logic-aware input embeddings for transformer language models through a combination of logic detection, logic mapping and hierarchical logical projections, and then develop a corresponding new modeling paradigm that can upgrade existing transformer language models into logical transformers to boost their performance on different NLU and NLG tasks. Our empirical experiments on four important and challenging NLU and NLG tasks demonstrate that our proposed logical transformer language models can achieve superior performance over their baseline transformer models through a deeper understanding of the logical structures of texts.</abstract>
@@ -4255,7 +4255,7 @@
       <author><first>Venkatesh</first><last>E</last><affiliation>IIT Hyderabad</affiliation></author>
       <author><first>Kaushal</first><last>Maurya</last><affiliation>IIT Hyderabad</affiliation></author>
       <author><first>Deepak</first><last>Kumar</last><affiliation>SRIB</affiliation></author>
-      <author><first>Maunendra Sankar</first><last>Desarkar</last><affiliation>IIT Hyderabad</affiliation></author>
+      <author id="maunendra-sankar-desarkar"><first>Maunendra Sankar</first><last>Desarkar</last><affiliation>IIT Hyderabad</affiliation></author>
       <pages>1879-1891</pages>
       <abstract>Diverse headline generation is an NLP task where given a news article, the goal is to generate multiple headlines that are true to the content of the article but are different among themselves. This task aims to exhibit and exploit semantically similar one-to-many relationships between a source news article and multiple target headlines. Toward this, we propose a novel model called DIVHSK. It has two components:KEYSELECT for selecting the important keywords, and SEQGEN, for finally generating the multiple diverse headlines. In KEYSELECT, we cluster the self-attention heads of the last layer of the pre-trained encoder and select the most-attentive theme and general keywords from the source article. Then, cluster-specific keyword sets guide the SEQGEN, a pre-trained encoder-decoder model, to generate diverse yet semantically similar headlines. The proposed model consistently outperformed existing literature and our strong baselines and emerged as a state-of-the-art model. We have also created a high-quality multi-reference headline dataset from news articles.</abstract>
       <url hash="97a036f6">2023.findings-acl.118</url>
@@ -4336,7 +4336,7 @@
       <author><first>Mina</first><last>Lee</last><affiliation>Stanford University</affiliation></author>
       <author><first>Xiang Lisa</first><last>Li</last><affiliation>Stanford University</affiliation></author>
       <author><first>Ende</first><last>Shen</last><affiliation>Stanford University</affiliation></author>
-      <author><first>Tatsunori</first><last>Hashimoto</last><affiliation>Stanford</affiliation></author>
+      <author id="tatsunori-b-hashimoto"><first>Tatsunori</first><last>Hashimoto</last><affiliation>Stanford</affiliation></author>
       <pages>1970-1994</pages>
       <abstract>While pretrained language models (PLMs) have greatly improved text generation, they have also been known to produce unfaithful or inappropriate content. In contrast, classic template-based systems provide strong guarantees of faithfulness at the cost of fluency. We propose TempLM, which achieves the best of both worlds by distilling a PLM into a template-based generator. On the E2E and SynthBio data-to-text datasets, we show that TempLM is more faithful than the original PLM and is more fluent than prior template systems. Notably, on an out-of-domain evaluation, TempLM reduces a finetuned BART model’s unfaithfulness rate from 83% to 0%. In a human study, we find that TempLM’s templates substantially improve upon human-written ones in BERTScore.</abstract>
       <url hash="982592d6">2023.findings-acl.124</url>
@@ -4402,7 +4402,7 @@
       <title>Reducing Sensitivity on Speaker Names for Text Generation from Dialogues</title>
       <author><first>Qi</first><last>Jia</last><affiliation>Shanghai Jiao Tong University</affiliation></author>
       <author><first>Haifeng</first><last>Tang</last><affiliation>China Merchants Bank Credit Card Center</affiliation></author>
-      <author><first>Kenny</first><last>Zhu</last><affiliation>Shanghai Jiao Tong University</affiliation></author>
+      <author id="kenny-zhu"><first>Kenny</first><last>Zhu</last><affiliation>Shanghai Jiao Tong University</affiliation></author>
       <pages>2058-2073</pages>
       <abstract>Changing speaker names consistently throughout a dialogue should not affect its meaning and corresponding outputs for text generation from dialogues. However, pre-trained language models, serving as the backbone for dialogue-processing tasks, have shown to be sensitive to nuances. This may result in unfairness in real-world applications. No comprehensive analysis of this problem has been done in the past. In this work, we propose to quantitatively measure a model’s sensitivity on speaker names, and comprehensively evaluate a number of known methods for reducing speaker name sensitivity, including a novel approach of our own. Extensive experiments on multiple datasets provide a benchmark for this problem and show the favorable performance of our approach in sensitivity reduction and quality of generation.</abstract>
       <url hash="a60658bb">2023.findings-acl.129</url>
@@ -4443,9 +4443,9 @@
       <author><first>Weijia</first><last>Shi</last><affiliation>uw.edu</affiliation></author>
       <author><first>Mike</first><last>Lewis</last><affiliation>Facebook AI Research</affiliation></author>
       <author><first>Xilun</first><last>Chen</last><affiliation>Meta AI</affiliation></author>
-      <author><first>Wen-tau</first><last>Yih</last><affiliation>Meta AI - FAIR</affiliation></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last><affiliation>Meta AI - FAIR</affiliation></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last><affiliation>University of Washington</affiliation></author>
-      <author><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington; Meta</affiliation></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington; Meta</affiliation></author>
       <pages>2097-2118</pages>
       <abstract>Existing language models (LMs) predict tokens with a softmax over a finite vocabulary, which can make it difficult to predict rare tokens or phrases. We introduce NPM, the first nonparametric masked language model that replaces this softmax with a nonparametric distribution over every phrase in a reference corpus. NPM fills in the [MASK] solely from retrieving a token from a text corpus. We show that NPM can be efficiently trained with a contrastive objective and an in-batch approximation to full corpus retrieval. Zero-shot evaluation on 16 tasks including classification, fact probing and question answering demonstrates that NPM outperforms significantly larger parametric models, either with or without a retrieve-and-generate approach. It is particularly better at dealing with rare patterns (word senses or facts) and predicting rare or nearly unseen words (e.g., non-Latin script). We release the model and code at github.com/facebookresearch/NPM.</abstract>
       <url hash="b4cf15eb">2023.findings-acl.132</url>
@@ -4475,7 +4475,7 @@
       <author><first>Patrick</first><last>Schramowski</last><affiliation>TU Darmstadt</affiliation></author>
       <author><first>Jindřich</first><last>Libovický</last><affiliation>Charles Univeristy</affiliation></author>
       <author><first>Constantin</first><last>Rothkopf</last><affiliation>Technical University of darmstadt</affiliation></author>
-      <author><first>Alexander</first><last>Fraser</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <author><first>Kristian</first><last>Kersting</last><affiliation>TU Darmstadt</affiliation></author>
       <pages>2137-2156</pages>
       <abstract>Pre-trained multilingual language models (PMLMs) are commonly used when dealing with data from multiple languages and cross-lingual transfer. However, PMLMs are trained on varying amounts of data for each language. In practice this means their performance is often much better on English than many other languages. We explore to what extent this also applies to moral norms. Do the models capture moral norms from English and impose them on other languages? Do the models exhibit random and thus potentially harmful beliefs in certain languages? Both these issues could negatively impact cross-lingual transfer and potentially lead to harmful outcomes. In this paper, we (1) apply the MORALDIRECTION framework to multilingual models, comparing results in German, Czech, Arabic, Chinese, and English, (2) analyse model behaviour on filtered parallel subtitles corpora, and (3) apply the models to a Moral Foundations Questionnaire, comparing with human responses from different countries. Our experiments demonstrate that, indeed, PMLMs encode differing moral biases, but these do not necessarily correspond to cultural differences or commonalities in human opinions. We release our code and models.</abstract>
@@ -4583,7 +4583,7 @@
     </paper>
     <paper id="143">
       <title><fixed-case>AVATAR</fixed-case>: A Parallel Corpus for <fixed-case>J</fixed-case>ava-Python Program Translation</title>
-      <author><first>Wasi Uddin</first><last>Ahmad</last><affiliation>AWS AI Labs</affiliation></author>
+      <author id="wasi-ahmad"><first>Wasi Uddin</first><last>Ahmad</last><affiliation>AWS AI Labs</affiliation></author>
       <author><first>Md Golam Rahman</first><last>Tushar</last><affiliation>Individual Contributor</affiliation></author>
       <author><first>Saikat</first><last>Chakraborty</last><affiliation>Microsoft Research</affiliation></author>
       <author><first>Kai-Wei</first><last>Chang</last><affiliation>UCLA</affiliation></author>
@@ -4626,7 +4626,7 @@
       <author><first>Wenbin</first><last>Lai</last><affiliation>Fudan University</affiliation></author>
       <author><first>Tao</first><last>Gui</last><affiliation>fudan university</affiliation></author>
       <author><first>Qi</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Jin</first><last>Ma</last><affiliation>ustc</affiliation></author>
       <author><first>Ying</first><last>Shan</last><affiliation>Tencent</affiliation></author>
       <author><first>Weifeng</first><last>Ge</last><affiliation>Fudan University</affiliation></author>
@@ -4823,7 +4823,7 @@
       <author><first>Jialu</first><last>Wang</last><affiliation>University of California, Santa Cruz</affiliation></author>
       <author><first>Xinyue</first><last>Liu</last><affiliation>UC Santa Cruz</affiliation></author>
       <author><first>Zonglin</first><last>Di</last><affiliation>UCSC</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>UC Santa Cruz</affiliation></author>
+      <author><first>Yang</first><last>Liu</last><affiliation>UC Santa Cruz</affiliation></author>
       <author><first>Xin</first><last>Wang</last><affiliation>University of California, Santa Cruz</affiliation></author>
       <pages>2560-2574</pages>
       <abstract>*Warning: This paper contains several contents that may be toxic, harmful, or offensive.*In the last few years, text-to-image generative models have gained remarkable success in generating images with unprecedented quality accompanied by a breakthrough of inference speed. Despite their rapid progress, human biases that manifest in the training examples, particularly with regard to common stereotypical biases, like gender and skin tone, still have been found in these generative models. In this work, we seek to measure more complex human biases exist in the task of text-to-image generations. Inspired by the well-known Implicit Association Test (IAT) from social psychology, we propose a novel Text-to-Image Association Test (T2IAT) framework that quantifies the implicit stereotypes between concepts and valence, and those in the images. We replicate the previously documented bias tests on generative models, including morally neutral tests on flowers and insects as well as demographic stereotypical tests on diverse social attributes. The results of these experiments demonstrate the presence of complex stereotypical behaviors in image generations.</abstract>
@@ -4915,7 +4915,7 @@
       <title>Large Language Models are Built-in Autoregressive Search Engines</title>
       <author><first>Noah</first><last>Ziems</last><affiliation>University of Notre Dame</affiliation></author>
       <author><first>Wenhao</first><last>Yu</last><affiliation>University of Notre Dame</affiliation></author>
-      <author id="zhihan-zhang"><first>Zhihan</first><last>Zhang</last><affiliation>University of Notre Dame</affiliation></author>
+      <author><first>Zhihan</first><last>Zhang</last><affiliation>University of Notre Dame</affiliation></author>
       <author><first>Meng</first><last>Jiang</last><affiliation>University of Notre Dame</affiliation></author>
       <pages>2666-2678</pages>
       <abstract>Document retrieval is a key stage of standard Web search engines. Existing dual-encoder dense retrievers obtain representations for questions and documents independently, allowing for only shallow interactions between them. To overcome this limitation, recent autoregressive search engines replace the dual-encoder architecture by directly generating identifiers for relevant documents in the candidate pool. However, the training cost of such autoregressive search engines rises sharply as the number of candidate documents increases. In this paper, we find that large language models (LLMs) can follow human instructions to directly generate URLs for document retrieval. Surprisingly, when providing a few Query-URL pairs as in-context demonstrations, LLMs can generate Web URLs where nearly 90% of the corresponding documents contain correct answers to open-domain questions. In this way, LLMs can be thought of as built-in search engines, since they have not been explicitly trained to map questions to document identifiers. Experiments demonstrate that our method can consistently achieve better retrieval performance than existing retrieval approaches by a significant margin on three open-domain question answering benchmarks, under both zero and few-shot settings. The code for this work can be found at <url>https://github.com/Ziems/llm-url</url>.</abstract>
@@ -4969,7 +4969,7 @@
       <author><first>Zitong</first><last>Li</last><affiliation>Shanghai Jiao Tong University</affiliation></author>
       <author><first>Jiawei</first><last>Li</last><affiliation>Shanghai Jiao Tong University</affiliation></author>
       <author><first>Haifeng</first><last>Tang</last><affiliation>China Merchants Bank Credit Card Center</affiliation></author>
-      <author><first>Kenny</first><last>Zhu</last><affiliation>Shanghai Jiao Tong University</affiliation></author>
+      <author id="kenny-zhu"><first>Kenny</first><last>Zhu</last><affiliation>Shanghai Jiao Tong University</affiliation></author>
       <author><first>Ruolan</first><last>Yang</last><affiliation>University of California San Diego</affiliation></author>
       <pages>2731-2745</pages>
       <abstract>Rewriting incomplete and ambiguous utterances can improve dialogue models’ understanding of the context and help them generate better results. However, the existing end-to-end models will have the problem of too large search space, resulting in poor quality of rewriting results. We propose a 2-phase rewriting framework which first predicts the empty slots in the utterance that need to be completed, and then generate the part to be filled into each positions. Our framework is simple to implement, fast to run, and achieves the state-of-the-art results on several public rewriting datasets.</abstract>
@@ -4979,10 +4979,10 @@
     </paper>
     <paper id="172">
       <title>Exploring Variation of Results from Different Experimental Conditions</title>
-      <author><first>Maja</first><last>Popović</last><affiliation>ADAPT, Dublin City University</affiliation></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last><affiliation>ADAPT, Dublin City University</affiliation></author>
       <author><first>Mohammad</first><last>Arvan</last><affiliation>University of Illinois at Chicago</affiliation></author>
       <author><first>Natalie</first><last>Parde</last><affiliation>University of Illinois at Chicago</affiliation></author>
-      <author><first>Anya</first><last>Belz</last><affiliation>ADAPT Research Centre, Dublin City University</affiliation></author>
+      <author id="anja-belz"><first>Anya</first><last>Belz</last><affiliation>ADAPT Research Centre, Dublin City University</affiliation></author>
       <pages>2746-2757</pages>
       <abstract>It might reasonably be expected that running multiple experiments for the same task using the same data and model would yield very similar results. Recent research has, however, shown this not to be the case for many NLP experiments. In this paper, we report extensive coordinated work by two NLP groups to run the training and testing pipeline for three neural text simplification models under varying experimental conditions, including different random seeds, run-time environments, and dependency versions, yielding a large number of results for each of the three models using the same data and train/dev/test set splits. From one perspective, these results can be interpreted as shedding light on the reproducibility of evaluation results for the three NTS models, and we present an in-depth analysis of the variation observed for different combinations of experimental conditions. From another perspective, the results raise the question of whether the averaged score should be considered the ‘true’ result for each model.</abstract>
       <url hash="1f22cc52">2023.findings-acl.172</url>
@@ -5018,11 +5018,11 @@
       <author><first>Sina</first><last>Semnani</last><affiliation>Stanford University</affiliation></author>
       <author><first>Jiwon</first><last>Seo</last><affiliation>Hanyang University</affiliation></author>
       <author><first>Vivek</first><last>Seshadri</last><affiliation>Microsoft Research India / Karya Inc</affiliation></author>
-      <author><first>Manish</first><last>Shrivastava</last><affiliation>International Institute of Information Technology Hyderabad</affiliation></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last><affiliation>International Institute of Information Technology Hyderabad</affiliation></author>
       <author><first>Michael</first><last>Sun</last><affiliation>Stanford University</affiliation></author>
       <author><first>Aditya</first><last>Yadavalli</last><affiliation>Karya Inc</affiliation></author>
       <author><first>Chaobin</first><last>You</last><affiliation>Tianjin University</affiliation></author>
-      <author><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
       <author><first>Monica</first><last>Lam</last><affiliation>Stanford University</affiliation></author>
       <pages>2773-2794</pages>
       <abstract>Task-oriented dialogue research has mainly focused on a few popular languages like English and Chinese, due to the high dataset creation cost for a new language. To reduce the cost, we apply manual editing to automatically translated data. We create a new multilingual benchmark, X-RiSAWOZ, by translating the Chinese RiSAWOZ to 4 languages: English, French, Hindi, Korean; and a code-mixed English-Hindi language.X-RiSAWOZ has more than 18,000 human-verified dialogue utterances for each language, and unlike most multilingual prior work, is an end-to-end dataset for building fully-functioning agents. The many difficulties we encountered in creating X-RiSAWOZ led us to develop a toolset to accelerate the post-editing of a new language dataset after translation. This toolset improves machine translation with a hybrid entity alignment technique that combines neural with dictionary-based methods, along with many automated and semi-automated validation checks. We establish strong baselines for X-RiSAWOZ by training dialogue agents in the zero- and few-shot settings where limited gold data is available in the target language. Our results suggest that our translation and post-editing methodology and toolset can be used to create new high-quality multilingual dialogue agents cost-effectively. Our dataset, code, and toolkit are released open-source.</abstract>
@@ -5062,7 +5062,7 @@
       <author><first>Shujian</first><last>Huang</last><affiliation>National Key Laboratory for Novel Software Technology, Nanjing University</affiliation></author>
       <author><first>Yunzhe</first><last>Lv</last><affiliation>Nanjing University</affiliation></author>
       <author><first>Xin</first><last>Zheng</last><affiliation>National Key Laboratory for Novel Software Technology, Nanjing University</affiliation></author>
-      <author><first>Jiajun</first><last>Chen</last><affiliation>Nanjing University</affiliation></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last><affiliation>Nanjing University</affiliation></author>
       <pages>2824-2836</pages>
       <abstract>kNN-MT presents a new paradigm for domain adaptation by building an external datastore, which usually saves all target language token occurrences in the parallel corpus. As a result, the constructed datastore is usually large and possibly redundant. In this paper, we investigate the interpretability issue of this approach: what knowledge does the NMT model need? We propose the notion of local correctness (LAC) as a new angle, which describes the potential translation correctness for a single entry and for a given neighborhood. Empirical study shows that our investigation successfully finds the conditions where the NMT model could easily fail and need related knowledge. Experiments on six diverse target domains and two language-pairs show that pruning according to local correctness brings a light and more explainable memory for kNN-MT domain adaptation.</abstract>
       <url hash="e0af3709">2023.findings-acl.177</url>
@@ -5077,7 +5077,7 @@
       <author><first>Zhen</first><last>Wu</last><affiliation>Nanjing University</affiliation></author>
       <author><first>Jiaze</first><last>Chen</last><affiliation>Bytedance AI Lab</affiliation></author>
       <author><first>Fei</first><last>Zhao</last><affiliation>Nanjing University</affiliation></author>
-      <author><first>Xinyu</first><last>Dai</last><affiliation>National Key Laboratory for Novel Software Technology, Nanjing University</affiliation></author>
+      <author id="xinyu-dai"><first>Xinyu</first><last>Dai</last><affiliation>National Key Laboratory for Novel Software Technology, Nanjing University</affiliation></author>
       <pages>2837-2853</pages>
       <abstract>Aspect Sentiment Triplet Extraction (ASTE) is widely used in various applications. However, existing ASTE datasets are limited in their ability to represent real-world scenarios, hindering the advancement of research in this area. In this paper, we introduce a new dataset, named DMASTE, which is manually annotated to better fit real-world scenarios by providing more diverse and realistic reviews for the task. The dataset includes various lengths, diverse expressions, more aspect types, and more domains than existing datasets. We conduct extensive experiments on DMASTE in multiple settings to evaluate previous ASTE approaches. Empirical results demonstrate that DMASTE is a more challenging ASTE dataset. Further analyses of in-domain and cross-domain settings provide some promising directions for future research.</abstract>
       <url hash="5896ed5d">2023.findings-acl.178</url>
@@ -5165,7 +5165,7 @@
     </paper>
     <paper id="185">
       <title>The Decades Progress on Code-Switching Research in <fixed-case>NLP</fixed-case>: A Systematic Survey on Trends and Challenges</title>
-      <author><first>Genta</first><last>Winata</last><affiliation>Bloomberg</affiliation></author>
+      <author id="genta-indra-winata"><first>Genta</first><last>Winata</last><affiliation>Bloomberg</affiliation></author>
       <author><first>Alham Fikri</first><last>Aji</last><affiliation>MBZUAI</affiliation></author>
       <author><first>Zheng Xin</first><last>Yong</last><affiliation>Brown University</affiliation></author>
       <author><first>Thamar</first><last>Solorio</last><affiliation>University of Houston</affiliation></author>
@@ -5247,7 +5247,7 @@
       <author><first>Jiani</first><last>Huang</last><affiliation>University of Pennsylvania</affiliation></author>
       <author><first>Ziyang</first><last>Li</last><affiliation>University of Pennsylvania</affiliation></author>
       <author><first>Mayur</first><last>Naik</last><affiliation>UPenn</affiliation></author>
-      <author><first>Eric</first><last>Xing</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="eric-xing"><first>Eric</first><last>Xing</last><affiliation>Carnegie Mellon University</affiliation></author>
       <pages>3062-3077</pages>
       <abstract>Pre-trained large language models (LMs) struggle to perform logical reasoning reliably despite advances in scale and compositionality. In this work, we tackle this challenge through the lens of symbolic programming. We propose DSR-LM, a Differentiable Symbolic Reasoning framework where pre-trained LMs govern the perception of factual knowledge, and a symbolic module performs deductive reasoning. In contrast to works that rely on hand-crafted logic rules, our differentiable symbolic reasoning framework efficiently learns weighted rules and applies semantic loss to further improve LMs. DSR-LM is scalable, interpretable, and allows easy integration of prior knowledge, thereby supporting extensive symbolic programming to robustly derive a logical conclusion. The results of our experiments suggest that DSR-LM improves the logical reasoning abilities of pre-trained language models, resulting in a significant increase in accuracy of over 20% on deductive reasoning benchmarks. Furthermore, DSR-LM outperforms a variety of competitive baselines when faced with systematic changes in sequence length.</abstract>
       <url hash="3214caa0">2023.findings-acl.191</url>
@@ -5272,7 +5272,7 @@
       <title>Boosting Zero-shot Cross-lingual Retrieval by Training on Artificially Code-Switched Data</title>
       <author><first>Robert</first><last>Litschko</last><affiliation>LMU Munich</affiliation></author>
       <author><first>Ekaterina</first><last>Artemova</last><affiliation>LMU Munich</affiliation></author>
-      <author><first>Barbara</first><last>Plank</last><affiliation>LMU Munich</affiliation></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last><affiliation>LMU Munich</affiliation></author>
       <pages>3096-3108</pages>
       <abstract>Transferring information retrieval (IR) models from a high-resource language (typically English) to other languages in a zero-shot fashion has become a widely adopted approach. In this work, we show that the effectiveness of zero-shot rankers diminishes when queries and documents are present in different languages. Motivated by this, we propose to train ranking models on artificially code-switched data instead, which we generate by utilizing bilingual lexicons. To this end, we experiment with lexicons induced from (1) cross-lingual word embeddings and (2) parallel Wikipedia page titles. We use the mMARCO dataset to extensively evaluate reranking models on 36 language pairs spanning Monolingual IR (MoIR), Cross-lingual IR (CLIR), and Multilingual IR (MLIR). Our results show that code-switching can yield consistent and substantial gains of 5.1 MRR@10 in CLIR and 3.9 MRR@10 in MLIR, while maintaining stable performance in MoIR. Encouragingly, the gains are especially pronounced for distant languages (up to 2x absolute gain). We further show that our approach is robust towards the ratio of code-switched tokens and also extends to unseen languages. Our results demonstrate that training on code-switched data is a cheap and effective way of generalizing zero-shot rankers for cross-lingual and multilingual retrieval.</abstract>
       <url hash="99a1f915">2023.findings-acl.193</url>
@@ -5299,7 +5299,7 @@
       <author><first>Haoran</first><last>Sun</last><affiliation>Tianjin University</affiliation></author>
       <author><first>Shaolin</first><last>Zhu</last><affiliation>Tianjin university</affiliation></author>
       <author><first>Xiaodong</first><last>Lin</last><affiliation>Rutgers University</affiliation></author>
-      <author><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
       <pages>3123-3137</pages>
       <abstract>Distilling knowledge from a high-resource task, e.g., machine translation, is an effective way to alleviate the data scarcity problem of end-to-end speech translation. However, previous works simply use the classical knowledge distillation that does not allow for adequate transfer of knowledge from machine translation. In this paper, we propose a comprehensive knowledge distillation framework for speech translation, CKDST, which is capable of comprehensively and effectively distilling knowledge from machine translation to speech translation from two perspectives: cross-modal contrastive representation distillation and simultaneous decoupled knowledge distillation. In the former, we leverage a contrastive learning objective to optmize the mutual information between speech and text representations for representation distillation in the encoder. In the later, we decouple the non-target class knowledge from target class knowledge for logits distillation in the decoder. Experiments on the MuST-C benchmark dataset demonstrate that our CKDST substantially improves the baseline by 1.2 BLEU on average in all translation directions, and outperforms previous state-of-the-art end-to-end and cascaded speech translation models.</abstract>
       <url hash="ecd6f65d">2023.findings-acl.195</url>
@@ -5311,7 +5311,7 @@
       <author><first>Shira</first><last>Wein</last><affiliation>Georgetown University</affiliation></author>
       <author><first>Christopher</first><last>Homan</last><affiliation>Rochester Institute of Technology</affiliation></author>
       <author><first>Lora</first><last>Aroyo</last><affiliation>Google</affiliation></author>
-      <author><first>Chris</first><last>Welty</last><affiliation/></author>
+      <author id="chris-welty"><first>Chris</first><last>Welty</last></author>
       <pages>3138-3161</pages>
       <abstract>Among the problems with leaderboard culture in NLP has been the widespread lack of confidence estimation in reported results. In this work, we present a framework and simulator for estimating p-values for comparisons between the results of two systems, in order to understand the confidence that one is actually better (i.e. ranked higher) than the other. What has made this difficult in the past is that each system must itself be evaluated by comparison to a gold standard. We define a null hypothesis that each system’s metric scores are drawn from the same distribution, using variance found naturally (though rarely reported) in test set items and individual labels on an item (responses) to produce the metric distributions. We create a test set that evenly mixes the responses of the two systems under the assumption the null hypothesis is true. Exploring how to best estimate the true p-value from a single test set under different metrics, tests, and sampling methods, we find that the presence of response variance (from multiple raters or multiple model versions) has a profound impact on p-value estimates for model comparison, and that choice of metric and sampling method is critical to providing statistical guarantees on model comparisons.</abstract>
       <url hash="ba8e9ecd">2023.findings-acl.196</url>
@@ -5332,8 +5332,8 @@
       <title>Towards Open-Domain <fixed-case>T</fixed-case>witter User Profile Inference</title>
       <author><first>Haoyang</first><last>Wen</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Zhenxin</first><last>Xiao</last><affiliation>Carnegie Mellon University</affiliation></author>
-      <author><first>Eduard</first><last>Hovy</last><affiliation>University of Melbourne</affiliation></author>
-      <author><first>Alexander</first><last>Hauptmann</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last><affiliation>University of Melbourne</affiliation></author>
+      <author id="alexander-g-hauptmann"><first>Alexander</first><last>Hauptmann</last><affiliation>Carnegie Mellon University</affiliation></author>
       <pages>3172-3188</pages>
       <abstract>Twitter user profile inference utilizes information from Twitter to predict user attributes (e.g., occupation, location), which is controversial because of its usefulness for downstream applications and its potential to reveal users’ privacy. Therefore, it is important for researchers to determine the extent of profiling in a safe environment to facilitate proper use and make the public aware of the potential risks. Contrary to existing approaches on limited attributes, we explore open-domain Twitter user profile inference. We conduct a case study where we collect publicly available WikiData public figure profiles and use diverse WikiData predicates for profile inference. After removing sensitive attributes, our data contains over 150K public figure profiles from WikiData, over 50 different attribute predicates, and over 700K attribute values. We further propose a prompt-based generation method, which can infer values that are implicitly mentioned in the Twitter information. Experimental results show that the generation-based approach can infer more comprehensive user profiles than baseline extraction-based methods, but limitations still remain to be applied for real-world use. We also enclose a detailed ethical statement for our data, potential benefits and risks from this work, and our efforts to mitigate the risks.</abstract>
       <url hash="e660d2be">2023.findings-acl.198</url>
@@ -5343,7 +5343,7 @@
     <paper id="199">
       <title>Eliciting Affective Events from Language Models by Multiple View Co-prompting</title>
       <author><first>Yuan</first><last>Zhuang</last><affiliation>University of Utah</affiliation></author>
-      <author><first>Ellen</first><last>Riloff</last><affiliation>University of Utah</affiliation></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last><affiliation>University of Utah</affiliation></author>
       <pages>3189-3201</pages>
       <abstract>Prior research on affective event classification showed that exploiting weakly labeled data for training can improve model performance. In this work, we propose a simpler and more effective approach for generating training data by automatically acquiring and labeling affective events with Multiple View Co-prompting, which leverages two language model prompts that provide independent views of an event. The approach starts with a modest amount of gold data and prompts pre-trained language models to generate new events. Next, information about the probable affective polarity of each event is collected from two complementary language model prompts and jointly used to assign polarity labels. Experimental results on two datasets show that the newly acquired events improve a state-of-the-art affective event classifier. We also present analyses which show that using multiple views produces polarity labels of higher quality than either view on its own.</abstract>
       <url hash="3a88b3ce">2023.findings-acl.199</url>
@@ -5381,7 +5381,7 @@
       <title>It Takes Two to Tango: Navigating Conceptualizations of <fixed-case>NLP</fixed-case> Tasks and Measurements of Performance</title>
       <author><first>Arjun</first><last>Subramonian</last><affiliation>University of California, Los Angeles</affiliation></author>
       <author><first>Xingdi</first><last>Yuan</last><affiliation>Microsoft Research</affiliation></author>
-      <author><first>Hal</first><last>Daumé III</last><affiliation>UMD</affiliation></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last><affiliation>UMD</affiliation></author>
       <author><first>Su Lin</first><last>Blodgett</last><affiliation>Microsoft Research</affiliation></author>
       <pages>3234-3279</pages>
       <abstract>Progress in NLP is increasingly measured through benchmarks; hence, contextualizing progress requires understanding when and why practitioners may disagree about the validity of benchmarks. We develop a taxonomy of disagreement, drawing on tools from measurement modeling, and distinguish between two types of disagreement: 1) how tasks are conceptualized and 2) how measurements of model performance are operationalized. To provide evidence for our taxonomy, we conduct a meta-analysis of relevant literature to understand how NLP tasks are conceptualized, as well as a survey of practitioners about their impressions of different factors that affect benchmark validity. Our meta-analysis and survey across eight tasks, ranging from coreference resolution to question answering, uncover that tasks are generally not clearly and consistently conceptualized and benchmarks suffer from operationalization disagreements. These findings support our proposed taxonomy of disagreement. Finally, based on our taxonomy, we present a framework for constructing benchmarks and documenting their limitations.</abstract>
@@ -5446,8 +5446,8 @@
       <author><first>Adam</first><last>Wiemerslage</last><affiliation>University of Colorado Boulder</affiliation></author>
       <author><first>Changbing</first><last>Yang</last><affiliation>University of British Columbia</affiliation></author>
       <author><first>Garrett</first><last>Nicolai</last><affiliation>University of British Columbia</affiliation></author>
-      <author><first>Miikka</first><last>Silfverberg</last><affiliation>University of British Columbia</affiliation></author>
-      <author><first>Katharina</first><last>Kann</last><affiliation>University of Colorado Boulder</affiliation></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last><affiliation>University of British Columbia</affiliation></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last><affiliation>University of Colorado Boulder</affiliation></author>
       <pages>3351-3365</pages>
       <abstract>With a growing focus on morphological inflection systems for languages where high-quality data is scarce, training data noise is a serious but so far largely ignored concern. We aim at closing this gap by investigating the types of noise encountered within a pipeline for truly unsupervised morphological paradigm completion and its impact on morphological inflection systems: First, we propose an error taxonomy and annotation pipeline for inflection training data. Then, we compare the effect of different types of noise on multiple state-of-the- art inflection models. Finally, we propose a novel character-level masked language modeling (CMLM) pretraining objective and explore its impact on the models’ resistance to noise. Our experiments show that various architectures are impacted differently by separate types of noise, but encoder-decoders tend to be more robust to noise than models trained with a copy bias. CMLM pretraining helps transformers, but has lower impact on LSTMs.</abstract>
       <url hash="3cfd59df">2023.findings-acl.207</url>
@@ -5462,7 +5462,7 @@
       <author><first>Qingyu</first><last>Yin</last><affiliation>Amazon</affiliation></author>
       <author><first>Zheng</first><last>Li</last><affiliation>Amazon</affiliation></author>
       <author><first>Xifeng</first><last>Yan</last><affiliation>University of California at Santa Barbara</affiliation></author>
-      <author><first>Chao</first><last>Zhang</last><affiliation>Georgia Tech</affiliation></author>
+      <author id="chao-zhang-tu"><first>Chao</first><last>Zhang</last><affiliation>Georgia Tech</affiliation></author>
       <author><first>Bing</first><last>Yin</last><affiliation>Amazon.com</affiliation></author>
       <pages>3366-3375</pages>
       <abstract>Answering complex questions often requires reasoning over knowledge graphs (KGs). State-of-the-art methods often utilize entities in questions to retrieve local subgraphs, which are then fed into KG encoder, e.g. graph neural networks (GNNs), to model their local structures and integrated into language models for question answering. However, this paradigm constrains retrieved knowledge in local subgraphs and discards more diverse triplets buried in KGs that are disconnected but useful for question answering. In this paper, we propose a simple yet effective method to first retrieve the most relevant triplets from KGs and then rerank them, which are then concatenated with questions to be fed into language models. Extensive results on both CommonsenseQA and OpenbookQA datasets show that our method can outperform state-of-the-art up to 4.6% absolute accuracy.</abstract>
@@ -5545,7 +5545,7 @@
       <author><first>Rahul</first><last>Aralikatte</last><affiliation>Mila</affiliation></author>
       <author><first>Ziling</first><last>Cheng</last><affiliation>McGill University, Mila</affiliation></author>
       <author><first>Sumanth</first><last>Doddapaneni</last><affiliation>Indian Institute of Technology Madras</affiliation></author>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last><affiliation>Mila / McGill University</affiliation></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last><affiliation>Mila / McGill University</affiliation></author>
       <pages>3468-3492</pages>
       <abstract>We present Varta, a large-scale multilingual dataset for headline generation in Indic languages. This dataset includes more than 41 million pairs of headlines and articles in 14 different Indic languages (and English), which come from a variety of high-quality news sources. To the best of our knowledge, this is the largest collection of curated news articles for Indic languages currently available. We use the collected data in a series of experiments to answer important questions related to Indic NLP and multilinguality research in general. We show that the dataset is challenging even for state-of-the-art abstractive models and that they perform only slightly better than extractive baselines. Owing to its size, we also show that the dataset can be used to pre-train strong language models that outperform competitive baselines in both NLU and NLG benchmarks.</abstract>
       <url hash="3f5fcf45">2023.findings-acl.215</url>
@@ -5693,7 +5693,7 @@
       <author><first>Gautier</first><last>Izacard</last><affiliation>Facebook AI Research</affiliation></author>
       <author><first>Sebastian</first><last>Riedel</last><affiliation>University College London / Facebook AI Research</affiliation></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last><affiliation>University of Washington</affiliation></author>
-      <author><first>Wen-tau</first><last>Yih</last><affiliation>Meta AI - FAIR</affiliation></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last><affiliation>Meta AI - FAIR</affiliation></author>
       <pages>3650-3675</pages>
       <abstract>We study the problem of retrieval with instructions, where users provide explicit descriptions of their intent along with their queries to guide a retrieval system. Our solution is a general-purpose task-aware retrieval system, trained using multi-task instruction tuning and can follow human-written instructions to find relevant documents to a given query. We introduce the first large-scale collection of 37 retrieval datasets with instructions, BERRI, and present TART, a single multi-task retrieval system trained on BERRI with instructions that can adapt to a new task without any parameter updates. TART advances the state of the art on two zero-shot retrieval benchmarks, BEIR and LOTTE, outperforming models up to three times larger. We further introduce a new evaluation setup, X2-Retrieval, to better reflect real-world scenarios in which diverse domains and tasks are pooled. TART significantly outperforms competitive baselines in this setup, further highlighting the effectiveness of guiding retrieval with instructions.</abstract>
       <url hash="6674a452">2023.findings-acl.225</url>
@@ -5703,7 +5703,7 @@
     </paper>
     <paper id="226">
       <title>Non-Repeatable Experiments and Non-Reproducible Results: The Reproducibility Crisis in Human Evaluation in <fixed-case>NLP</fixed-case></title>
-      <author><first>Anya</first><last>Belz</last><affiliation>ADAPT Research Centre, Dublin City University</affiliation></author>
+      <author id="anja-belz"><first>Anya</first><last>Belz</last><affiliation>ADAPT Research Centre, Dublin City University</affiliation></author>
       <author><first>Craig</first><last>Thomson</last><affiliation>University of Aberdeen</affiliation></author>
       <author><first>Ehud</first><last>Reiter</last><affiliation>University of Aberdeen</affiliation></author>
       <author><first>Simon</first><last>Mille</last><affiliation>ADAPT Research Centre, Dublin City University</affiliation></author>
@@ -5717,7 +5717,7 @@
       <title>Define, Evaluate, and Improve Task-Oriented Cognitive Capabilities for Instruction Generation Models</title>
       <author><first>Lingjun</first><last>Zhao</last><affiliation>University of Maryland, College Park</affiliation></author>
       <author><first>Khanh</first><last>Nguyen</last><affiliation>Princeton University</affiliation></author>
-      <author><first>Hal</first><last>Daumé III</last><affiliation>UMD</affiliation></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last><affiliation>UMD</affiliation></author>
       <pages>3688-3706</pages>
       <abstract>Recent work studies the cognitive capabilities of language models through psychological tests designed for humans. While these studies are helpful for understanding the general capabilities of these models, there is no guarantee that a model possessing sufficient capabilities to pass those tests would actually use those capabilities in performing real-life tasks. In this work, we formulate task-oriented cognitive capabilities, which are human-like cognitive capabilities that language models leverage to perform tasks. These capabilities are (i) the ability to quickly generate good candidate utterances (the search capability) (ii) the ability to predict how a listener interprets those utterances and choose the most appropriate one (the pragmatic capability). We design an evaluation scheme for comparing these capabilities of a language model with those of a human. Applying this scheme to examine various models in a navigation instruction generation problem, we find that their pragmatic capability is severely lacking. This insight leads us to augment them with better models of the listener and obtain a significant boost of 11% in success rate in guiding real humans. Our work advocates for having a principled procedure for aligning language models with humans that involves (i) formulating task-oriented capabilities, (ii) devising a method to quantify their deficiency, and (iii) iteratively improving them.</abstract>
       <url hash="006db232">2023.findings-acl.227</url>
@@ -5729,7 +5729,7 @@
       <title>Robustness of Multi-Source <fixed-case>MT</fixed-case> to Transcription Errors</title>
       <author><first>Dominik</first><last>Macháček</last><affiliation>Charles University, MFF UFAL</affiliation></author>
       <author><first>Peter</first><last>Polák</last><affiliation>Charles University, MFF UFAL</affiliation></author>
-      <author><first>Ondřej</first><last>Bojar</last><affiliation>Charles University, MFF UFAL</affiliation></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last><affiliation>Charles University, MFF UFAL</affiliation></author>
       <author><first>Raj</first><last>Dabre</last><affiliation>NICT</affiliation></author>
       <pages>3707-3723</pages>
       <abstract>Automatic speech translation is sensitive to speech recognition errors, but in a multilingual scenario, the same content may be available in various languages via simultaneous interpreting, dubbing or subtitling. In this paper, we hypothesize that leveraging multiple sources will improve translation quality if the sources complement one another in terms of correct information they contain. To this end, we first show that on a 10-hour ESIC corpus, the ASR errors in the original English speech and its simultaneous interpreting into German and Czech are mutually independent. We then use two sources, English and German, in a multi-source setting for translation into Czech to establish its robustness to ASR errors. Furthermore, we observe this robustness when translating both noisy sources together in a simultaneous translation setting. Our results show that multi-source neural machine translation has the potential to be useful in a real-time simultaneous translation setting, thereby motivating further investigation in this area.</abstract>
@@ -5802,7 +5802,7 @@
       <author><first>Asterios</first><last>Katsifodimos</last><affiliation>Amazon Web Services</affiliation></author>
       <author><first>George</first><last>Karypis</last><affiliation>University of Minnesota</affiliation></author>
       <author><first>Lijie</first><last>Wen</last><affiliation>School of Software, Tsinghua University</affiliation></author>
-      <author><first>Philip S.</first><last>Yu</last><affiliation>University of Illinois at Chicago</affiliation></author>
+      <author id="philip-s-yu"><first>Philip S.</first><last>Yu</last><affiliation>University of Illinois at Chicago</affiliation></author>
       <pages>3786-3800</pages>
       <abstract>Given a data lake of tabular data as well as a query table, how can we retrieve all the tables in the data lake that can be unioned with the query table? Table union search constitutes an essential task in data discovery and preparation as it enables data scientists to navigate massive open data repositories. Existing methods identify uniability based on column representations (word surface forms or token embeddings) and column relation represented by column representation similarity. However, the semantic similarity obtained between column representations is often insufficient to reveal latent relational features to describe the column relation between pair of columns and not robust to the table noise. To address these issues, in this paper, we propose a multi-stage self-supervised table union search framework called AutoTUS, which represents column relation as a vector– column relational representation and learn column relational representation in a multi-stage manner that can better describe column relation for unionability prediction. In particular, the large language model powered contextualized column relation encoder is updated by adaptive clustering and pseudo label classification iteratively so that the better column relational representation can be learned. Moreover, to improve the robustness of the model against table noises, we propose table noise generator to add table noise to the training table data. Experiments on real-world datasets as well as synthetic test set augmented with table noise show that AutoTUS achieves 5.2% performance gain over the SOTA baseline.</abstract>
       <url hash="fc2b0a80">2023.findings-acl.233</url>
@@ -5811,9 +5811,9 @@
     </paper>
     <paper id="234">
       <title>Bidirectional Transformer Reranker for Grammatical Error Correction</title>
-      <author><first>Ying</first><last>Zhang</last><affiliation>Tokyo Institute of Technology</affiliation></author>
+      <author id="ying-zhang"><first>Ying</first><last>Zhang</last><affiliation>Tokyo Institute of Technology</affiliation></author>
       <author><first>Hidetaka</first><last>Kamigaito</last><affiliation>Nara Institute of Science and Technology</affiliation></author>
-      <author><first>Manabu</first><last>Okumura</last><affiliation>Tokyo Institute of Technology</affiliation></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last><affiliation>Tokyo Institute of Technology</affiliation></author>
       <pages>3801-3825</pages>
       <abstract>Pre-trained seq2seq models have achieved state-of-the-art results in the grammatical error correction task. However, these models still suffer from a prediction bias due to their unidirectional decoding. Thus, we propose a bidirectional Transformer reranker (BTR), that re-estimates the probability of each candidate sentence generated by the pre-trained seq2seq model. The BTR preserves the seq2seq-style Transformer architecture but utilizes a BERT-style self-attention mechanism in the decoder to compute the probability of each target token by using masked language modeling to capture bidirectional representations from the target context. For guiding the reranking, the BTR adopts negative sampling in the objective function to minimize the unlikelihood. During inference, the BTR gives final results after comparing the reranked top-1 results with the original ones by an acceptance threshold. Experimental results show that, in reranking candidates from a pre-trained seq2seq model, T5-base, the BTR on top of T5-base could yield 65.47 and 71.27 F0.5 scores on the CoNLL-14 and BEA test sets, respectively, and yield 59.52 GLEU score on the JFLEG corpus, with improvements of 0.36, 0.76 and 0.48 points compared with the original T5-base. Furthermore, when reranking candidates from T5-large, the BTR on top of T5-base improved the original T5-large by 0.26 points on the BEA test set.</abstract>
       <url hash="5149cca9">2023.findings-acl.234</url>
@@ -5825,7 +5825,7 @@
       <title>Not Enough Data to Pre-train Your Language Model? <fixed-case>MT</fixed-case> to the Rescue!</title>
       <author><first>Gorka</first><last>Urbizu</last><affiliation>Orai NLP Technologies</affiliation></author>
       <author><first>Iñaki</first><last>San Vicente</last><affiliation>Orai NLP Technologies</affiliation></author>
-      <author><first>Xabier</first><last>Saralegi</last><affiliation>Orai NLP Technologies</affiliation></author>
+      <author id="xabier-saralegi"><first>Xabier</first><last>Saralegi</last><affiliation>Orai NLP Technologies</affiliation></author>
       <author><first>Ander</first><last>Corral</last><affiliation>Orai NLP Technologies</affiliation></author>
       <pages>3826-3836</pages>
       <abstract>In recent years, pre-trained transformer-based language models (LM) have become a key resource for implementing most NLP tasks. However, pre-training such models demands large text collections not available in most languages. In this paper, we study the use of machine-translated corpora for pre-training LMs. We answer the following research questions: RQ1: Is MT-based data an alternative to real data for learning a LM?; RQ2: Can real data be complemented with translated data and improve the resulting LM? In order to validate these two questions, several BERT models for Basque have been trained, combining real data and synthetic data translated from Spanish.The evaluation carried out on 9 NLU tasks indicates that models trained exclusively on translated data offer competitive results. Furthermore, models trained with real data can be improved with synthetic data, although further research is needed on the matter.</abstract>
@@ -5866,7 +5866,7 @@
       <author><first>Jingyun</first><last>Xu</last><affiliation>South China University of Technology</affiliation></author>
       <author><first>Changmeng</first><last>Zheng</last><affiliation>The Hong Kong Polytechnic University</affiliation></author>
       <author><first>Yi</first><last>Cai</last><affiliation>South China University of Technology</affiliation></author>
-      <author><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
       <pages>3869-3882</pages>
       <abstract>Recent studies have shown remarkable success in cross-domain named entity recognition (cross-domain NER). Despite the promising results, existing methods mainly utilize pre-training language models like BERT to represent words. As such, the original chaotic representations may challenge them to distinguish entity types of entities, leading to entity type misclassification. To this end, we attempt to utilize contrastive learning to refine the original representations and propose a model-agnostic framework named MoCL for cross-domain NER. Additionally, we respectively combine MoCL with two distinctive cross-domain NER methods and two pre-training language models to explore its generalization ability. Empirical results on seven domains show the effectiveness and good generalization ability of MoCL.</abstract>
       <url hash="07bee3b2">2023.findings-acl.238</url>
@@ -5880,7 +5880,7 @@
       <author><first>Shuang</first><last>Zeng</last><affiliation>Institute of Computational Linguistics, Peking University</affiliation></author>
       <author><first>Guoqiang</first><last>Feng</last><affiliation>Peking University</affiliation></author>
       <author><first>Jiaxing</first><last>Lin</last><affiliation>Peking University</affiliation></author>
-      <author><first>Baobao</first><last>Chang</last><affiliation>Peking University</affiliation></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last><affiliation>Peking University</affiliation></author>
       <pages>3883-3896</pages>
       <abstract>Distantly-Supervised Named Entity Recognition effectively alleviates the burden of time-consuming and expensive annotation in the supervised setting. But the context-free matching process and the limited coverage of knowledge bases introduce inaccurate and incomplete annotation noise respectively. Previous studies either considered only incomplete one or indiscriminately handle two types of noise with the same strategy. In this paper, we argue that the different causes of two types of noise bring up the requirement of different strategies in model architecture. Therefore, we propose the SANTA to handle these two types of noise separately with (1) Memory-smoothed Focal Loss and Entity-aware KNN to relieve the entity ambiguity problem caused by inaccurate annotation, and (2) Boundary Mixup to alleviate decision boundary shifting problem caused by incomplete annotation and a noise-tolerant loss to improve the model’s robustness. Benefiting from our separate tailored strategies, we confirm in the experiment that the two types of noise are well mitigated.SANTA also achieves a new state-of-the-art on five public datasets.</abstract>
       <url hash="3b367de4">2023.findings-acl.239</url>
@@ -6136,7 +6136,7 @@
       <author><first>Nicolo’</first><last>Brandizzi</last><affiliation>Sapienza University of Rome</affiliation></author>
       <author><first>Mario</first><last>Giulianelli</last><affiliation>University of Amsterdam</affiliation></author>
       <author><first>Sandro</first><last>Pezzelle</last><affiliation>University of Amsterdam</affiliation></author>
-      <author><first>Raquel</first><last>Fernandez</last><affiliation>ILLC, University of Amsterdam</affiliation></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernandez</last><affiliation>ILLC, University of Amsterdam</affiliation></author>
       <pages>4198-4217</pages>
       <abstract>Dialogue participants may have varying levels of knowledge about the topic under discussion. In such cases, it is essential for speakers to adapt their utterances by taking their audience into account. Yet, it is an open question how such adaptation can be modelled in computational agents. In this paper, we model a visually grounded referential game between a knowledgeable speaker and a listener with more limited visual and linguistic experience. Inspired by psycholinguistic theories, we endow our speaker with the ability to adapt its referring expressions via a simulation module that monitors the effectiveness of planned utterances from the listener’s perspective. We propose an adaptation mechanism building on plug-and-play approaches to controlled language generation, where utterance generation is steered on the fly by the simulator without finetuning the speaker’s underlying language model. Our results and analyses show that our approach is effective: the speaker’s utterances become closer to the listener’s domain of expertise, which leads to higher communicative success.</abstract>
       <url hash="d4f85d5e">2023.findings-acl.258</url>
@@ -6183,7 +6183,7 @@
       <title>Follow the Wisdom of the Crowd: Effective Text Generation via Minimum <fixed-case>B</fixed-case>ayes Risk Decoding</title>
       <author><first>Mirac</first><last>Suzgun</last><affiliation>Stanford University</affiliation></author>
       <author><first>Luke</first><last>Melas-Kyriazi</last><affiliation>Harvard University</affiliation></author>
-      <author><first>Dan</first><last>Jurafsky</last><affiliation>Stanford University</affiliation></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last><affiliation>Stanford University</affiliation></author>
       <pages>4265-4293</pages>
       <abstract>In open-ended natural-language generation, existing text decoding methods typically struggle to produce text which is both diverse and high-quality. Greedy and beam search are known to suffer from text degeneration and linguistic diversity issues, while temperature, top-k, and nucleus sampling yield diverse but often lower-quality outputs. In this work, we build upon Minimum Bayes Risk Decoding (MBRD), a family of decoding methods based on Bayesian risk minimization, to address this diversity-quality trade-off. Inspired by the principle of the wisdom of the crowd, MBRD seeks to select a candidate from a pool of candidates that has the least expected risk under a generative model according to a given utility function. The crowd of candidates serves as an approximation for the distribution over human-generated references. We show that MBRD generalizes numerous decoding methods, including majority voting, and can be used as a drop-in replacement for existing sampling methods. Across a wide range of tasks—such as summarization, data-to-text, translation, and textual style transfer—MBRD yields 3-7 ROUGE and BLEU point improvements, including state-of-the-art results on WebNLG and WMT’16.</abstract>
       <url hash="512c9f28">2023.findings-acl.262</url>
@@ -6337,9 +6337,9 @@
       <author><first>Maxwell</first><last>Crouse</last><affiliation>IBM Research</affiliation></author>
       <author><first>Diwakar</first><last>Mahajan</last><affiliation>IBM Research</affiliation></author>
       <author><first>Ibrahim</first><last>Abdelaziz</last><affiliation>IBM Research</affiliation></author>
-      <author><first>Achille</first><last>Fokoue</last><affiliation>IBM Research</affiliation></author>
+      <author id="achille-fokoue-nkoutche"><first>Achille</first><last>Fokoue</last><affiliation>IBM Research</affiliation></author>
       <author><first>Pavan</first><last>Kapanipathi</last><affiliation>IBM Research</affiliation></author>
-      <author><first>Salim</first><last>Roukos</last><affiliation>IBM Research AI</affiliation></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last><affiliation>IBM Research AI</affiliation></author>
       <author><first>Alexander</first><last>Gray</last><affiliation>IBM Research</affiliation></author>
       <pages>4485-4503</pages>
       <abstract>With the growing interest in large language models, the need for evaluating the quality of machine text compared to reference (typically human-generated) text has become focal attention. Most recent works focus either on task-specific evaluation metrics or study the properties of machine-generated text captured by the existing metrics. In this work, we propose a new evaluation scheme to model human judgments in 7 NLP tasks, based on the fine-grained mismatches between a pair of texts. Inspired by the recent efforts in several NLP tasks for fine-grained evaluation, we introduce a set of 13 mismatch error types such as spatial/geographic errors, entity errors, etc, to guide the model for better prediction of human judgments. We propose a neural framework for evaluating machine texts that uses these mismatch error types as auxiliary tasks and re-purposes the existing single-number evaluation metrics as additional scalar features, in addition to textual features extracted from the machine and reference texts. Our experiments reveal key insights about the existing metrics via the mismatch errors. We show that the mismatch errors between the sentence pairs on the held-out datasets from 7 NLP tasks align well with the human evaluation.</abstract>
@@ -6548,8 +6548,8 @@
     <paper id="290">
       <title>Unsupervised Paraphrasing of Multiword Expressions</title>
       <author><first>Takashi</first><last>Wada</last><affiliation>The University of Melbourne</affiliation></author>
-      <author><first>Yuji</first><last>Matsumoto</last><affiliation>Riken Center for Advanced Intelligence Project</affiliation></author>
-      <author><first>Timothy</first><last>Baldwin</last><affiliation>MBZUAI</affiliation></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last><affiliation>Riken Center for Advanced Intelligence Project</affiliation></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last><affiliation>MBZUAI</affiliation></author>
       <author><first>Jey Han</first><last>Lau</last><affiliation>The University of Melbourne</affiliation></author>
       <pages>4732-4746</pages>
       <abstract>We propose an unsupervised approach to paraphrasing multiword expressions (MWEs) in context. Our model employs only monolingual corpus data and pre-trained language models (without fine-tuning), and does not make use of any external resources such as dictionaries. We evaluate our method on the SemEval 2022 idiomatic semantic text similarity task, and show that it outperforms all unsupervised systems and rivals supervised systems.</abstract>
@@ -6607,7 +6607,7 @@
     <paper id="295">
       <title>Predicting Numerals in Text Using Nearest Neighbor Language Models</title>
       <author><first>Taku</first><last>Sakamoto</last><affiliation>The University of Tokyo</affiliation></author>
-      <author><first>Akiko</first><last>Aizawa</last><affiliation>National Institute of Informatics</affiliation></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last><affiliation>National Institute of Informatics</affiliation></author>
       <pages>4795-4809</pages>
       <abstract>Commonsense about quantitative properties is essential for a deep understanding of texts containing numerals. However, naive language models (LMs) treat numerals as string tokens; therefore, they lack an understanding of the magnitudes of numerals, resulting in a difficulty in acquiring the commonsense. In this study, we apply the <tex-math>k</tex-math>-nearest neighbor LM (<tex-math>k</tex-math>NN-LM) to the masked numeral prediction (MNP) task, which measures the quantitative commonsense of LMs.<tex-math>k</tex-math>NN-LM extends pre-trained neural LMs with the <tex-math>k</tex-math>-nearest neighbor (<tex-math>k</tex-math>NN) search.Since it can utilize patterns that appear in the datastore for prediction, we expect an improvement in numeral prediction accuracy, which is associated with a high rate of occurrence of out-of-vocabulary (OOV) words.Through experiments, we verified that the retrieval-based method is effective for fine-grained predictions of numerals from context, especially for the OOV numerals.We also compared two different context spans for context representations to improve the accuracy of <tex-math>k</tex-math>NN search by using only the words that are closely related to the masked numeral: the mask and its surrounding words, and the mask and its subsequent words.Our results reveal that using only the embeddings of mask tokens for numerals in <tex-math>k</tex-math>NN search is the most effective approach for realizing MNP tasks.</abstract>
       <url hash="355fb982">2023.findings-acl.295</url>
@@ -6669,7 +6669,7 @@
     </paper>
     <paper id="300">
       <title>Sampling Better Negatives for Distantly Supervised Named Entity Recognition</title>
-      <author id="lu-xu"><first>Lu</first><last>Xu</last><affiliation>None</affiliation></author>
+      <author><first>Lu</first><last>Xu</last><affiliation>None</affiliation></author>
       <author><first>Lidong</first><last>Bing</last><affiliation>Alibaba DAMO Academy</affiliation></author>
       <author><first>Wei</first><last>Lu</last><affiliation>Singapore University of Technology and Design</affiliation></author>
       <pages>4874-4882</pages>
@@ -6805,7 +6805,7 @@
       <author><first>Bin</first><last>Ni</last><affiliation>University of California, San Diego</affiliation></author>
       <author><first>Xiyan</first><last>Shao</last><affiliation>UCSD</affiliation></author>
       <author><first>Hengzhe</first><last>Zhang</last><affiliation>VUW</affiliation></author>
-      <author><first>Eric</first><last>Xing</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="eric-xing"><first>Eric</first><last>Xing</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Zhiting</first><last>Hu</last><affiliation>UC San Diego</affiliation></author>
       <pages>5000-5015</pages>
       <abstract>It is crucial to automatically construct knowledge graphs (KGs) of diverse new relations to support knowledge discovery and broad applications. Previous KG construction methods, based on either crowdsourcing or text mining, are often limited to a small predefined set of relations due to manual cost or restrictions in text corpus. Recent research proposed to use pretrained language models (LMs) as implicit knowledge bases that accept knowledge queries with prompts. Yet, the implicit knowledge lacks many desirable properties of a full-scale symbolic KG, such as easy access, navigation, editing, and quality assurance. In this paper, we propose a new approach of harvesting massive KGs of arbitrary relations from pretrained LMs. With minimal input of a relation definition (a prompt and a few shot of example entity pairs), the approach efficiently searches in the vast entity pair space to extract diverse accurate knowledge of the desired relation. We develop an effective search-and-rescore mechanism for improved efficiency and accuracy. We deploy the approach to harvest KGs of over 400 new relations, from LMs of varying capacities such as RoBERTaNet. Extensive human and automatic evaluations show our approach manages to extract diverse accurate knowledge, including tuples of complex relations (e.g., “A is capable of but not good at B”). The resulting KGs as a symbolic interpretation of the source LMs also reveal new insights into the LMs’ knowledge capacities.</abstract>
@@ -6835,7 +6835,7 @@
       <author><first>Yahan</first><last>Yu</last><affiliation>Tencent</affiliation></author>
       <author><first>Jian</first><last>Liu</last><affiliation>Beijing Jiaotong University</affiliation></author>
       <author><first>Yufeng</first><last>Chen</last><affiliation>Beijing Jiaotong University</affiliation></author>
-      <author><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
       <pages>5032-5045</pages>
       <abstract>Recent studies have pointed out that many well-developed Visual Question Answering (VQA) systems suffer from bias problem. Despite the remarkable performance gained on In-Distribution (ID) datasets, the VQA model might merely capture the superficial correlation from question to answer rather than showing real reasoning abilities. Therefore, when switching to Out-of-Distribution (OOD) dataset, whose test distribution is unknown or even reversed with the training set, significant drops might be demonstrated. Although efforts have been devoted to easing the negative bias effect brought by language prior and analysing its inherent cause, they are still limited by the following two aspects. First, most current debiasing methods achieve promising OOD generalization ability with a major sacrifice of the ID performance. Second, existing researches are restricted by exploiting comprehensive biases, since weakening the language bias is mainly focused, while only a few works consider vision bias. In this paper, we investigate a straightforward way to mitigate bias problem for VQA task. Specifically, we reduce bias effect by subtracting bias score from standard VQA base score. Based on such a direct strategy, we design two bias learning branches to detect more bias information, which are combined with a dynamical constraint loss to alleviate the problem of over-correction and insufficient debiasing effect. We evaluate our method on the challenging VQA v2.0 and VQA-CP V2,0 datasets and the proposed method achievessignificant improvement.</abstract>
       <url hash="2198d2bb">2023.findings-acl.311</url>
@@ -6860,9 +6860,9 @@
     </paper>
     <paper id="313">
       <title>Interactive Concept Learning for Uncovering Latent Themes in Large Text Collections</title>
-      <author><first>Maria Leonor</first><last>Pacheco</last><affiliation>University of Colorado Boulder / Microsoft Research</affiliation></author>
+      <author id="maria-leonor-pacheco"><first>Maria Leonor</first><last>Pacheco</last><affiliation>University of Colorado Boulder / Microsoft Research</affiliation></author>
       <author><first>Tunazzina</first><last>Islam</last><affiliation>Purdue University</affiliation></author>
-      <author><first>Lyle</first><last>Ungar</last><affiliation>University of Pennsylvania</affiliation></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last><affiliation>University of Pennsylvania</affiliation></author>
       <author><first>Ming</first><last>Yin</last><affiliation>Purdue University</affiliation></author>
       <author><first>Dan</first><last>Goldwasser</last><affiliation>Purdue University</affiliation></author>
       <pages>5059-5080</pages>
@@ -6878,7 +6878,7 @@
       <author><first>Shilin</first><last>Qu</last><affiliation>Monash University</affiliation></author>
       <author><first>Tongtong</first><last>Wu</last><affiliation>Monash University</affiliation></author>
       <author><first>Yuan-Fang</first><last>Li</last><affiliation>Monash University</affiliation></author>
-      <author><first>Gholamreza</first><last>Haffari</last><affiliation>Monash University</affiliation></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last><affiliation>Monash University</affiliation></author>
       <pages>5081-5089</pages>
       <abstract>Norms, which are culturally accepted guidelines for behaviours, can be integrated into conversational models to generate utterances that are appropriate for the socio-cultural context. Existing methods for norm recognition tend to focus only on surface-level features of dialogues and do not take into account the interactions within a conversation. To address this issue, we propose NormMark, a probabilistic generative Markov model to carry the latent features throughout a dialogue. These features are captured by discrete and continuous latent variables conditioned on the conversation history, and improve the model’s ability in norm recognition. The model is trainable on weakly annotated data using the variational technique. On a dataset with limited norm annotations, we show that our approach achieves higher F1 score, outperforming current state-of-the-art methods, including GPT3.</abstract>
       <url hash="5a29f6c8">2023.findings-acl.314</url>
@@ -6922,7 +6922,7 @@
       <author><first>Thomas</first><last>Fel</last><affiliation>ANITI, Brown University</affiliation></author>
       <author><first>Laurent</first><last>Risser</last><affiliation>CNRS, Institut de Mathematiques de Toulouse</affiliation></author>
       <author><first>Jean-Michel</first><last>Loubes</last><affiliation>Université Toulouse 3, Institut de Mathématiques</affiliation></author>
-      <author><first>Nicholas</first><last>Asher</last><affiliation>CNRS Institut de Recherche en Informatique de Toulouse</affiliation></author>
+      <author id="nicholas-asher"><first>Nicholas</first><last>Asher</last><affiliation>CNRS Institut de Recherche en Informatique de Toulouse</affiliation></author>
       <pages>5120-5136</pages>
       <abstract>Transformer architectures are complex and their use in NLP, while it has engendered many successes, makes their interpretability or explainability challenging. Recent debates have shown that attention maps and attribution methods are unreliable (Pruthi et al., 2019; Brunner et al., 2019). In this paper, we present some of their limitations and introduce COCKATIEL, which successfully addresses some of them. COCKATIEL is a novel, post-hoc, concept-based, model-agnostic XAI technique that generates meaningful explanations from the last layer of a neural net model trained on an NLP classification task by using Non-Negative Matrix Factorization (NMF) to discover the concepts the model leverages to make predictions and by exploiting a Sensitivity Analysis to estimate accurately the importance of each of these concepts for the model. It does so without compromising the accuracy of the underlying model or requiring a new one to be trained. We conduct experiments in single and multi-aspect sentiment analysis tasks and we show COCKATIEL’s superior ability to discover concepts that align with humans’ on Transformer models without any supervision, we objectively verify the faithfulness of its explanations through fidelity metrics, and we showcase its ability to provide meaningful explanations in two different datasets. Our code is freely available: <url>https://github.com/fanny-jourdan/cockatiel</url></abstract>
       <url hash="def49af5">2023.findings-acl.317</url>
@@ -6935,7 +6935,7 @@
       <author><first>Avik</first><last>Ray</last><affiliation>Amazon</affiliation></author>
       <author><first>Shubham</first><last>Garg</last><affiliation>Amazon.com</affiliation></author>
       <author><first>Nanyun</first><last>Peng</last><affiliation>University of California, Los Angeles</affiliation></author>
-      <author id="jing-huang"><first>Jing</first><last>Huang</last><affiliation>Amazon</affiliation></author>
+      <author><first>Jing</first><last>Huang</last><affiliation>Amazon</affiliation></author>
       <pages>5137-5151</pages>
       <abstract>Existing efforts on text synthesis for code-switching mostly require training on code-switched texts in the target language pairs, limiting the deployment of the models to cases lacking code-switched data. In this work, we study the problem of synthesizing code-switched texts for language pairs absent from the training data. We introduce GLOSS, a model built on top of a pre-trained multilingual machine translation model (PMMTM) with an additional code-switching module. This module, either an adapter or extra prefixes, learns code-switching patterns from code-switched data during training, while the primary component of GLOSS, i.e., the PMMTM, is frozen. The design of only adjusting the code-switching module prevents our model from overfitting to the constrained training data for code-switching. Hence, GLOSS exhibits the ability to generalize and synthesize code-switched texts across a broader spectrum of language pairs. Additionally, we develop a self-training algorithm on target language pairs further to enhance the reliability of GLOSS. Automatic evaluations on four language pairs show that GLOSS achieves at least 55% relative BLEU and METEOR scores improvements compared to strong baselines. Human evaluations on two language pairs further validate the success of GLOSS.</abstract>
       <url hash="d138bc12">2023.findings-acl.318</url>
@@ -6957,8 +6957,8 @@
     <paper id="320">
       <title>Data-Efficient <fixed-case>F</fixed-case>rench Language Modeling with <fixed-case>C</fixed-case>amem<fixed-case>BERT</fixed-case>a</title>
       <author><first>Wissam</first><last>Antoun</last><affiliation>Inria</affiliation></author>
-      <author><first>Benoît</first><last>Sagot</last><affiliation>Inria</affiliation></author>
-      <author><first>Djamé</first><last>Seddah</last><affiliation>Inria</affiliation></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last><affiliation>Inria</affiliation></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last><affiliation>Inria</affiliation></author>
       <pages>5174-5185</pages>
       <abstract>Recent advances in NLP have significantly improved the performance of language models on a variety of tasks. While these advances are largely driven by the availability of large amounts of data and computational power, they also benefit from the development of better training methods and architectures. In this paper, we introduce CamemBERTa, a French DeBERTa model that builds upon the DeBERTaV3 architecture and training objective. We evaluate our model’s performance on a variety of French downstream tasks and datasets, including question answering, part-of-speech tagging, dependency parsing, named entity recognition, and the FLUE benchmark, and compare against CamemBERT, the state-of-the-art monolingual model for French. Our results show that, given the same amount of training tokens, our model outperforms BERT-based models trained with MLM on most tasks. Furthermore, our new model reaches similar or superior performance on downstream tasks compared to CamemBERT, despite being trained on only 30% of its total number of input tokens. In addition to our experimental results, we also publicly release the weights and code implementation of CamemBERTa, making it the first publicly available DeBERTaV3 model outside of the original paper and the first openly available implementation of a DeBERTaV3 training objective.</abstract>
       <url hash="ca5aa7fd">2023.findings-acl.320</url>
@@ -7149,7 +7149,7 @@
       <author><first>Zifeng</first><last>Ding</last><affiliation>LMU Munich</affiliation></author>
       <author><first>Yujia</first><last>Gu</last><affiliation>Technical University of Munich</affiliation></author>
       <author><first>Heinz</first><last>Koeppl</last><affiliation>TU Darmstadt</affiliation></author>
-      <author><first>Hinrich</first><last>Schütze</last><affiliation>Center for Information and Language Processing, University of Munich</affiliation></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last><affiliation>Center for Information and Language Processing, University of Munich</affiliation></author>
       <author><first>Volker</first><last>Tresp</last><affiliation>LMU</affiliation></author>
       <pages>5433-5447</pages>
       <abstract>Since conventional knowledge embedding models cannot take full advantage of the abundant textual information, there have been extensive research efforts in enhancing knowledge embedding using texts. However, existing enhancement approaches cannot apply to <i>temporal knowledge graphs</i> (tKGs), which contain time-dependent event knowledge with complex temporal dynamics. Specifically, existing enhancement approaches often assume knowledge embedding is time-independent. In contrast, the entity embedding in tKG models usually evolves, which poses the challenge of aligning <i>temporally relevant</i> texts with entities. To this end, we propose to study enhancing temporal knowledge embedding with textual data in this paper. As an approach to this task, we propose Enhanced Temporal Knowledge Embeddings with Contextualized Language Representations (ECOLA), which takes the temporal aspect into account and injects textual information into temporal knowledge embedding. To evaluate ECOLA, we introduce three new datasets for training and evaluating ECOLA. Extensive experiments show that ECOLA significantly enhances temporal KG embedding models with up to 287% relative improvements regarding Hits@1 on the link prediction task. The code and models are publicly available on <url>https://github.com/mayhugotong/ECOLA</url>.</abstract>
@@ -7181,7 +7181,7 @@
       <author><first>Yong</first><last>Ding</last><affiliation>honor</affiliation></author>
       <author><first>Yibo</first><last>Zhang</last><affiliation>Honor Device Co., Ltd</affiliation></author>
       <author><first>Qi</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>5459-5473</pages>
       <abstract>In real-world applications, pre-trained language models are typically deployed on the cloud, allowing clients to upload data and perform compute-intensive inference remotely. To avoid sharing sensitive data directly with service providers, clients can upload numerical representations rather than plain text to the cloud. However, recent text reconstruction techniques have demonstrated that it is possible to transform representations into original words, suggesting that privacy risk remains. In this paper, we propose TextObfuscator, a novel framework for protecting inference privacy by applying random perturbations to clustered representations. The random perturbations make the representations indistinguishable from surrounding clustered representations, thus obscuring word information while retaining the original word functionality. To achieve this, we utilize prototypes to learn clustered representation, where tokens of similar functionality are encouraged to be closer to the same prototype during training. Additionally, we design different methods to find prototypes for token-level and sentence-level tasks, which can improve performance by incorporating semantic and task information. Experimental results on token and sentence classification tasks show that TextObfuscator achieves improvement over compared methods without increasing inference cost.</abstract>
       <url hash="542fad60">2023.findings-acl.337</url>
@@ -7304,7 +7304,7 @@
     </paper>
     <paper id="347">
       <title>Rethinking Semi-supervised Learning with Language Models</title>
-      <author><first>Zhengxiang</first><last>Shi</last><affiliation>University College London</affiliation></author>
+      <author id="zhengyan-shi"><first>Zhengxiang</first><last>Shi</last><affiliation>University College London</affiliation></author>
       <author><first>Francesco</first><last>Tonolini</last><affiliation>Amazon</affiliation></author>
       <author><first>Nikolaos</first><last>Aletras</last><affiliation>University of Sheffield</affiliation></author>
       <author><first>Emine</first><last>Yilmaz</last><affiliation>UCL &amp; Amazon</affiliation></author>
@@ -7318,11 +7318,11 @@
     </paper>
     <paper id="348">
       <title>Retrieval-Based Transformer for Table Augmentation</title>
-      <author><first>Michael</first><last>Glass</last><affiliation>IBM</affiliation></author>
+      <author id="michael-glass"><first>Michael</first><last>Glass</last><affiliation>IBM</affiliation></author>
       <author><first>Xueqing</first><last>Wu</last><affiliation>University of Illinois Urbana-Champaign</affiliation></author>
       <author><first>Ankita Rajaram</first><last>Naik</last><affiliation>University of Massachusetts Amherst</affiliation></author>
       <author><first>Gaetano</first><last>Rossiello</last><affiliation>IBM Research AI</affiliation></author>
-      <author><first>Alfio</first><last>Gliozzo</last><affiliation>IBM Research AI</affiliation></author>
+      <author id="alfio-gliozzo"><first>Alfio</first><last>Gliozzo</last><affiliation>IBM Research AI</affiliation></author>
       <pages>5635-5648</pages>
       <abstract>Data preparation, also called data wrangling, is considered one of the most expensive and time-consuming steps when performing analytics or building machine learning models. Preparing data typically involves collecting and merging data from complex heterogeneous, and often large-scale data sources, such as data lakes. In this paper, we introduce a novel approach toward automatic data wrangling in an attempt to alleviate the effort of end-users, e.g. data analysts, in structuring dynamic views from data lakes in the form of tabular data. Given a corpus of tables, we propose a retrieval augmented transformer model that is self-trained for the table augmentation tasks of row/column population and data imputation. Our self-learning strategy consists in randomly ablating tables from the corpus and training the retrieval-based model with the objective of reconstructing the partial tables given as input with the original values or headers. We adopt this strategy to first train the dense neural retrieval model encoding portions of tables to vectors, and then the end-to-end model trained to perform table augmentation tasks. We test on EntiTables, the standard benchmark for table augmentation, as well as introduce a new benchmark to advance further research: WebTables. Our model consistently and substantially outperforms both supervised statistical methods and the current state-of-the-art transformer-based models.</abstract>
       <url hash="da2a8c33">2023.findings-acl.348</url>
@@ -7361,7 +7361,7 @@
       <author><first>Jean</first><last>Crawford</last><affiliation>Microsoft</affiliation></author>
       <author><first>Jayant</first><last>Krishnamurthy</last><affiliation>Microsoft</affiliation></author>
       <author><first>Adam</first><last>Pauls</last><affiliation>Microsoft</affiliation></author>
-      <author><first>Jason</first><last>Eisner</last><affiliation>Johns Hopkins University + Microsoft Corporation</affiliation></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last><affiliation>Johns Hopkins University + Microsoft Corporation</affiliation></author>
       <author><first>Jacob</first><last>Andreas</last><affiliation>MIT</affiliation></author>
       <author><first>Dan</first><last>Klein</last><affiliation>UC Berkeley / Microsoft</affiliation></author>
       <pages>5682-5700</pages>
@@ -7415,7 +7415,7 @@
       <author><first>Fengran</first><last>Mo</last><affiliation>Universite de Montreal</affiliation></author>
       <author><first>Yanhao</first><last>Wang</last><affiliation>East China Normal University</affiliation></author>
       <author><first>Cen</first><last>Chen</last><affiliation>East China Normal University</affiliation></author>
-      <author><first>Jian-Yun</first><last>Nie</last><affiliation>University of Montreal</affiliation></author>
+      <author id="jian-yun-nie"><first>Jian-Yun</first><last>Nie</last><affiliation>University of Montreal</affiliation></author>
       <author><first>Chengyu</first><last>Wang</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Jamie</first><last>Cui</last><affiliation>Ant Group</affiliation></author>
       <pages>5747-5758</pages>
@@ -7430,7 +7430,7 @@
       <author><first>Ahmad</first><last>Rashid</last><affiliation>University of Waterloo; Huawei Noah’s Ark Lab</affiliation></author>
       <author><first>Ivan</first><last>Kobyzev</last><affiliation>Huawei Noah’s Ark Lab</affiliation></author>
       <author><first>Mehdi</first><last>Rezagholizadeh</last><affiliation>Noah’s Ark Lab Huawei</affiliation></author>
-      <author><first>Phillippe</first><last>Langlais</last><affiliation>Université de Montréal</affiliation></author>
+      <author id="philippe-langlais"><first>Phillippe</first><last>Langlais</last><affiliation>Université de Montréal</affiliation></author>
       <pages>5759-5774</pages>
       <abstract>Regularization techniques are crucial to improving the generalization performance and training efficiency of deep neural networks. Many deep learning algorithms rely on weight decay, dropout, batch/layer normalization to converge faster and generalize. Label Smoothing (LS) is another simple, versatile and efficient regularization which can be applied to various supervised classification tasks. Conventional LS, however, regardless of the training instance assumes that each non-target class is equally likely. In this work, we present a general framework for training with label regularization, which includes conventional LS but can also model instance-specific variants. Based on this formulation, we propose an efficient way of learning LAbel regularization by devising a Bi-level Optimization (LABO) problem. We derive a deterministic and interpretable solution of the inner loop as the optimal label smoothing without the need to store the parameters or the output of a trained model. Finally, we conduct extensive experiments and demonstrate our LABO consistently yields improvement over conventional label regularization on various fields, including seven machine translation and three image classification tasks across various neural network architectures while maintaining training efficiency.</abstract>
       <url hash="1620106e">2023.findings-acl.356</url>
@@ -7646,7 +7646,7 @@
       <author><first>Mengdi</first><last>Zhou</last><affiliation>Ant Group</affiliation></author>
       <author><first>Guilin</first><last>Qi</last><affiliation>Southeast University</affiliation></author>
       <author><first>Tianxing</first><last>Wu</last><affiliation>Southeast University</affiliation></author>
-      <author><first>Dong</first><last>Yang</last><affiliation>antfin</affiliation></author>
+      <author id="dong-yang"><first>Dong</first><last>Yang</last><affiliation>antfin</affiliation></author>
       <author><first>Liubin</first><last>Wang</last><affiliation>Ant Group</affiliation></author>
       <author><first>Yong</first><last>Pan</last><affiliation>alipay</affiliation></author>
       <pages>6007-6018</pages>
@@ -7660,7 +7660,7 @@
       <author><first>Soyeong</first><last>Jeong</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
       <author><first>Jinheon</first><last>Baek</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
       <author><first>Sung Ju</first><last>Hwang</last><affiliation>KAIST</affiliation></author>
-      <author><first>Jong</first><last>Park</last><affiliation>KAIST</affiliation></author>
+      <author id="jong-c-park"><first>Jong</first><last>Park</last><affiliation>KAIST</affiliation></author>
       <pages>6019-6031</pages>
       <abstract>Open-Domain Conversational Question Answering (ODConvQA) aims at answering questions through a multi-turn conversation based on a retriever-reader pipeline, which retrieves passages and then predicts answers with them. However, such a pipeline approach not only makes the reader vulnerable to the errors propagated from the retriever, but also demands additional effort to develop both the retriever and the reader, which further makes it slower since they are not runnable in parallel. In this work, we propose a method to directly predict answers with a phrase retrieval scheme for a sequence of words, reducing the conventional two distinct subtasks into a single one. Also, for the first time, we study its capability for ODConvQA tasks. However, simply adopting it is largely problematic, due to the dependencies between previous and current turns in a conversation. To address this problem, we further introduce a novel contrastive learning strategy, making sure to reflect previous turns when retrieving the phrase for the current context, by maximizing representational similarities of consecutive turns in a conversation while minimizing irrelevant conversational contexts. We validate our model on two ODConvQA datasets, whose experimental results show that it substantially outperforms the relevant baselines with the retriever-reader. Code is available at: <url>https://github.com/starsuzi/PRO-ConvQA</url>.</abstract>
       <url hash="6f973d26">2023.findings-acl.374</url>
@@ -7711,14 +7711,14 @@
     </paper>
     <paper id="378">
       <title><fixed-case>LEDA</fixed-case>: a Large-Organization Email-Based Decision-Dialogue-Act Analysis Dataset</title>
-      <author><first>Vanja Mladen</first><last>Karan</last><affiliation>Queen Mary University</affiliation></author>
+      <author id="vanja-m-karan"><first>Vanja Mladen</first><last>Karan</last><affiliation>Queen Mary University</affiliation></author>
       <author><first>Prashant</first><last>Khare</last><affiliation>Queen Mary University of London</affiliation></author>
       <author><first>Ravi</first><last>Shekhar</last><affiliation>University of Essex</affiliation></author>
       <author><first>Stephen</first><last>McQuistin</last><affiliation>University of Glasgow</affiliation></author>
       <author><first>Ignacio</first><last>Castro</last><affiliation>Queen Mary University of London</affiliation></author>
       <author><first>Gareth</first><last>Tyson</last><affiliation>QMUL</affiliation></author>
       <author><first>Colin</first><last>Perkins</last><affiliation>University of Glasgow</affiliation></author>
-      <author><first>Patrick</first><last>Healey</last><affiliation>Queen Mary, University of London</affiliation></author>
+      <author id="patrick-healey"><first>Patrick</first><last>Healey</last><affiliation>Queen Mary, University of London</affiliation></author>
       <author><first>Matthew</first><last>Purver</last><affiliation>Queen Mary University of London</affiliation></author>
       <pages>6080-6089</pages>
       <abstract>Collaboration increasingly happens online. This is especially true for large groups working on global tasks, with collaborators all around the globe. The size and distributed nature of such groups makes decision-making challenging. This paper proposes a set of dialog acts for the study of decision-making mechanisms in such groups, and provides a new annotated dataset based on real-world data from the public mail-archives of one such organisation – the Internet Engineering Task Force (IETF). We provide an initial data analysis showing that this dataset can be used to better understand decision-making in such organisations. Finally, we experiment with a preliminary transformer-based dialog act tagging model.</abstract>
@@ -7747,7 +7747,7 @@
       <author><first>SongEun</first><last>Lee</last><affiliation>Hyundai Motor Group</affiliation></author>
       <author><first>Changwoo</first><last>Chun</last><affiliation>Hyundai Motor Company</affiliation></author>
       <author><first>Sungsoo</first><last>Park</last><affiliation>Hyundai Motor Company</affiliation></author>
-      <author><first>Heuiseok</first><last>Lim</last><affiliation>Korea University</affiliation></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last><affiliation>Korea University</affiliation></author>
       <pages>6100-6115</pages>
       <abstract>Recent advances in QA pair generation (QAG) have raised interest in applying this technique to the educational field. However, the diversity of QA types remains a challenge despite its contributions to comprehensive learning and assessment of children. In this paper, we propose a QAG framework that enhances QA type diversity by producing different interrogative sentences and implicit/explicit answers. Our framework comprises a QFS-based answer generator, an iterative QA generator, and a relevancy-aware ranker. The two generators aim to expand the number of candidates while covering various types. The ranker trained on the in-context negative samples clarifies the top-N outputs based on the ranking score. Extensive evaluations and detailed analyses demonstrate that our approach outperforms previous state-of-the-art results by significant margins, achieving improved diversity and quality. Our task-oriented processes are consistent with real-world demand, which highlights our system’s high applicability.</abstract>
       <url hash="46067c3b">2023.findings-acl.380</url>
@@ -7770,9 +7770,9 @@
       <author><first>Yuliang</first><last>Li</last><affiliation>Meta</affiliation></author>
       <author><first>Pedro</first><last>Rodriguez</last><affiliation>Meta FAIR</affiliation></author>
       <author><first>Richard</first><last>James</last><affiliation>Meta</affiliation></author>
-      <author><first>Xi Victoria</first><last>Lin</last><affiliation>Meta AI</affiliation></author>
+      <author id="xi-victoria-lin"><first>Xi Victoria</first><last>Lin</last><affiliation>Meta AI</affiliation></author>
       <author><first>Alon</first><last>Halevy</last><affiliation>Facebook AI</affiliation></author>
-      <author><first>Wen-tau</first><last>Yih</last><affiliation>Meta AI - FAIR</affiliation></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last><affiliation>Meta AI - FAIR</affiliation></author>
       <pages>6131-6146</pages>
       <abstract>We present a reality check on large language models and inspect the promise of retrieval-augmented language models in comparison. Such language models are semi-parametric, where models integrate model parameters and knowledge from external data sources to make their predictions, as opposed to the parametric nature of vanilla large language models. We give initial experimental findings that semi-parametric architectures can be enhanced with views, a query analyzer/planner, and provenance to make a significantly more powerful system for question answering in terms of accuracy and efficiency, and potentially for other NLP tasks.</abstract>
       <url hash="34c9c266">2023.findings-acl.382</url>
@@ -7810,8 +7810,8 @@
       <title><fixed-case>P</fixed-case>ragmati<fixed-case>CQA</fixed-case>: A Dataset for Pragmatic Question Answering in Conversations</title>
       <author><first>Peng</first><last>Qi</last><affiliation>AWS AI Labs</affiliation></author>
       <author><first>Nina</first><last>Du</last><affiliation>Stanford University</affiliation></author>
-      <author><first>Christopher</first><last>Manning</last><affiliation>Stanford University</affiliation></author>
-      <author id="jing-huang"><first>Jing</first><last>Huang</last><affiliation>Amazon</affiliation></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last><affiliation>Stanford University</affiliation></author>
+      <author><first>Jing</first><last>Huang</last><affiliation>Amazon</affiliation></author>
       <pages>6175-6191</pages>
       <abstract>Pragmatic reasoning about another speaker’s unspoken intent and state of mind is crucial to efficient and effective human communication. It is virtually omnipresent in conversations between humans, e.g., when someone asks “do you have a minute?”, instead of interpreting it literally as a query about your schedule, you understand that the speaker might have requests that take time, and respond accordingly. In this paper, we present PragmatiCQA, the first large-scale open-domain question answering (QA) dataset featuring 6873 QA pairs that explores pragmatic reasoning in conversations over a diverse set of topics. We designed innovative crowdsourcing mechanisms for interest-based and task-driven data collection to address the common issue of incentive misalignment between crowdworkers and potential users. To compare computational models’ capability at pragmatic reasoning, we also propose several quantitative metrics to evaluate question answering systems on PragmatiCQA. We find that state-of-the-art systems still struggle to perform human-like pragmatic reasoning, and highlight their limitations for future research.</abstract>
       <url hash="9bda0e38">2023.findings-acl.385</url>
@@ -7982,10 +7982,10 @@
       <author><first>Yuyang</first><last>Chai</last><affiliation>Wuhan University</affiliation></author>
       <author><first>Terry Yue</first><last>Zhuo</last><affiliation>CSIRO’s Data61 and Monash University</affiliation></author>
       <author><first>Lizhen</first><last>Qu</last><affiliation>Monash University</affiliation></author>
-      <author><first>Gholamreza</first><last>Haffari</last><affiliation>Monash University</affiliation></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last><affiliation>Monash University</affiliation></author>
       <author><first>Fei</first><last>Li</last><affiliation>Wuhan University</affiliation></author>
-      <author><first>Donghong</first><last>Ji</last><affiliation>Wuhan University</affiliation></author>
-      <author><first>Quan Hung</first><last>Tran</last><affiliation>Adobe Research</affiliation></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last><affiliation>Wuhan University</affiliation></author>
+      <author id="quan-hung-tran"><first>Quan Hung</first><last>Tran</last><affiliation>Adobe Research</affiliation></author>
       <pages>6377-6390</pages>
       <abstract>Textual scene graph parsing has become increasingly important in various vision-language applications, including image caption evaluation and image retrieval. However, existing scene graph parsers that convert image captions into scene graphs often suffer from two types of errors. First, the generated scene graphs fail to capture the true semantics of the captions or the corresponding images, resulting in a lack of faithfulness. Second, the generated scene graphs have high inconsistency, with the same semantics represented by different annotations. To address these challenges, we propose a novel dataset, which involves re-annotating the captions in Visual Genome (VG) using a new intermediate representation called FACTUAL-MR. FACTUAL-MR can be directly converted into faithful and consistent scene graph annotations. Our experimental results clearly demonstrate that the parser trained on our dataset outperforms existing approaches in terms of faithfulness and consistency. This improvement leads to a significant performance boost in both image caption evaluation and zero-shot image retrieval tasks. Furthermore, we introduce a novel metric for measuring scene graph similarity, which, when combined with the improved scene graph parser, achieves state-of-the-art (SOTA) results on multiple benchmark datasets for the aforementioned tasks.</abstract>
       <url hash="341eae6a">2023.findings-acl.398</url>
@@ -8055,7 +8055,7 @@
     </paper>
     <paper id="404">
       <title>Correction of Errors in Preference Ratings from Automated Metrics for Text Generation</title>
-      <author><first>Jan</first><last>Deriu</last><affiliation>Zurich University of Applied Sciences</affiliation></author>
+      <author id="jan-milan-deriu"><first>Jan</first><last>Deriu</last><affiliation>Zurich University of Applied Sciences</affiliation></author>
       <author><first>Pius</first><last>von Däniken</last><affiliation>Zurich University of Applied Sciences ZHAW</affiliation></author>
       <author><first>Don</first><last>Tuggener</last><affiliation>Zurich University of Applied Sciences</affiliation></author>
       <author><first>Mark</first><last>Cieliebak</last><affiliation>Zurich University of Applied Sciences</affiliation></author>
@@ -8125,8 +8125,8 @@
       <author><first>Yicheng</first><last>Xu</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Yan</first><last>Gao</last><affiliation>Microsoft</affiliation></author>
       <author><first>Jian-Guang</first><last>Lou</last><affiliation>Microsoft</affiliation></author>
-      <author><first>Börje F.</first><last>Karlsson</last><affiliation>Beijing Academy of Artificial Intelligence (BAAI)</affiliation></author>
-      <author><first>Manabu</first><last>Okumura</last><affiliation>Tokyo Institute of Technology</affiliation></author>
+      <author id="borje-f-karlsson"><first>Börje F.</first><last>Karlsson</last><affiliation>Beijing Academy of Artificial Intelligence (BAAI)</affiliation></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last><affiliation>Tokyo Institute of Technology</affiliation></author>
       <pages>6535-6549</pages>
       <abstract>Hybrid Question-Answering (HQA), which targets reasoning over tables and passages linked from table cells, has witnessed significant research in recent years. A common challenge in HQA and other passage-table QA datasets is that it is generally unrealistic to iterate over all table rows, columns, and linked passages to retrieve evidence. Such a challenge made it difficult for previous studies to show their reasoning ability in retrieving answers. To bridge this gap, we propose a novel Table-alignment-based Cell-selection and Reasoning model (TACR) for hybrid text and table QA, evaluated on the HybridQA and WikiTableQuestions datasets. In evidence retrieval, we design a table-question-alignment enhanced cell-selection method to retrieve fine-grained evidence. In answer reasoning, we incorporate a QA module that treats the row containing selected cells as context. Experimental results over the HybridQA and WikiTableQuestions (WTQ) datasets show that TACR achieves state-of-the-art results on cell selection and outperforms fine-grained evidence retrieval baselines on HybridQA, while achieving competitive performance on WTQ. We also conducted a detailed analysis to demonstrate that being able to align questions to tables in the cell-selection stage can result in important gains from experiments of over 90% table row and column selection accuracy, meanwhile also improving output explainability.</abstract>
       <url hash="d8bfe448">2023.findings-acl.409</url>
@@ -8192,7 +8192,7 @@
       <author><first>Eunkyul Leah</first><last>Jo</last></author>
       <author><first>Kyuwon</first><last>Kim</last></author>
       <author><first>Xihan</first><last>Wu</last></author>
-      <author><first>KyungTae</first><last>Lim</last></author>
+      <author id="kyungtae-lim"><first>KyungTae</first><last>Lim</last></author>
       <author><first>Jungyeul</first><last>Park</last></author>
       <author><first>Chulwoo</first><last>Park</last></author>
       <pages>6613-6623</pages>
@@ -8382,7 +8382,7 @@
       <author><first>Majid</first><last>Yazdani</last><affiliation>BYJU’S Lab</affiliation></author>
       <author><first>Pouya</first><last>Yanki</last><affiliation>Meta</affiliation></author>
       <author><first>Angela</first><last>Fan</last><affiliation>Facebook AI Research</affiliation></author>
-      <author><first>James</first><last>Henderson</last><affiliation>Idiap Research Institute</affiliation></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last><affiliation>Idiap Research Institute</affiliation></author>
       <author><first>Marzieh</first><last>Saeidi</last><affiliation>Shiftlab AI</affiliation></author>
       <pages>6845-6867</pages>
       <abstract>Existing metrics for evaluating the quality of automatically generated questions such as BLEU, ROUGE, BERTScore, and BLEURT compare the reference and predicted questions, providing a high score when there is a considerable lexical overlap or semantic similarity between the candidate and the reference questions. This approach has two major shortcomings. First, we need expensive human-provided reference questions. Second, it penalises valid questions that may not have high lexical or semantic similarity to the reference questions. In this paper, we propose a new metric, RQUGE, based on the answerability of the candidate question given the context. The metric consists of a question-answering and a span scorer modules, using pre-trained models from existing literature, thus it can be used without any further training. We demonstrate that RQUGE has a higher correlation with human judgment without relying on the reference question. Additionally, RQUGE is shown to be more robust to several adversarial corruptions. Furthermore, we illustrate that we can significantly improve the performance of QA models on out-of-domain datasets by fine-tuning on synthetic data generated by a question generation model and reranked by RQUGE.</abstract>
@@ -8440,7 +8440,7 @@
     </paper>
     <paper id="433">
       <title>Words as Gatekeepers: Measuring Discipline-specific Terms and Meanings in Scholarly Publications</title>
-      <author><first>Li</first><last>Lucy</last><affiliation>University of California, Berkeley</affiliation></author>
+      <author id="li-lucy"><first>Li</first><last>Lucy</last><affiliation>University of California, Berkeley</affiliation></author>
       <author><first>Jesse</first><last>Dodge</last><affiliation>Allen Institute for AI</affiliation></author>
       <author><first>David</first><last>Bamman</last><affiliation>University of California, Berkeley</affiliation></author>
       <author><first>Katherine</first><last>Keith</last><affiliation>Williams College</affiliation></author>
@@ -8484,7 +8484,7 @@
       <author><first>Filip</first><last>Ginter</last><affiliation>University of Turku</affiliation></author>
       <author><first>Sampo</first><last>Pyysalo</last><affiliation>University of Turku</affiliation></author>
       <author><first>Rob</first><last>van der Goot</last><affiliation>IT University of Copenhagen</affiliation></author>
-      <author><first>Barbara</first><last>Plank</last><affiliation>LMU Munich</affiliation></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last><affiliation>LMU Munich</affiliation></author>
       <pages>6984-6993</pages>
       <abstract>Relation Extraction (RE) remains a challenging task, especially when considering realistic out-of-domain evaluations. One of the main reasons for this is the limited training size of current RE datasets: obtaining high-quality (manually annotated) data is extremely expensive and cannot realistically be repeated for each new domain. An intermediate training step on data from related tasks has shown to be beneficial across many NLP tasks. However, this setup still requires supplementary annotated data, which is often not available. In this paper, we investigate intermediate pre-training specifically for RE. We exploit the affinity between syntactic structure and semantic RE, and identify the syntactic relations which are closely related to RE by being on the shortest dependency path between two entities. We then take advantage of the high accuracy of current syntactic parsers in order to automatically obtain large amounts of low-cost pre-training data. By pre-training our RE model on the relevant syntactic relations, we are able to outperform the baseline in five out of six cross-domain setups, without any additional annotated data.</abstract>
       <url hash="5aacb290">2023.findings-acl.436</url>
@@ -8521,7 +8521,7 @@
       <author><first>Katharina</first><last>Hämmerl</last><affiliation>Center for Information and Language Processing, LMU</affiliation></author>
       <author><first>Alina</first><last>Fastowski</last><affiliation>Center for Information and Language Processing, LMU Munich</affiliation></author>
       <author><first>Jindřich</first><last>Libovický</last><affiliation>Charles Univeristy</affiliation></author>
-      <author><first>Alexander</first><last>Fraser</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <pages>7023-7037</pages>
       <abstract>Previous work has shown that the representations output by contextual language models are more anisotropic than static type embeddings, and typically display outlier dimensions. This seems to be true for both monolingual and multilingual models, although much less work has been done on the multilingual context. Why these outliers occur and how they affect the representations is still an active area of research. We investigate outlier dimensions and their relationship to anisotropy in multiple pre-trained multilingual language models. We focus on cross-lingual semantic similarity tasks, as these are natural tasks for evaluating multilingual representations. Specifically, we examine sentence representations. Sentence transformers which are fine-tuned on parallel resources (that are not always available) perform better on this task, and we show that their representations are more isotropic. However, we aim to improve multilingual representations in general. We investigate how much of the performance difference can be made up by only transforming the embedding space without fine-tuning, and visualise the resulting spaces. We test different operations: Removing individual outlier dimensions, cluster-based isotropy enhancement, and ZCA whitening. We publish our code for reproducibility.</abstract>
       <url hash="853e34e2">2023.findings-acl.439</url>
@@ -8573,7 +8573,7 @@
       <author><first>Shihao</first><last>Ran</last><affiliation>Dataminr</affiliation></author>
       <author><first>Di</first><last>Lu</last><affiliation>Dataminr</affiliation></author>
       <author><first>Aoife</first><last>Cahill</last><affiliation>Dataminr</affiliation></author>
-      <author><first>Joel</first><last>Tetreault</last><affiliation>Dataminr</affiliation></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last><affiliation>Dataminr</affiliation></author>
       <author><first>Alejandro</first><last>Jaimes</last><affiliation>Dataminr</affiliation></author>
       <pages>7089-7113</pages>
       <abstract>The ability to conduct retrospective analyses of attacks on human rights defenders over time and by location is important for humanitarian organizations to better understand historical or ongoing human rights violations and thus better manage the global impact of such events. We hypothesize that NLP can support such efforts by quickly processing large collections of news articles to detect and summarize the characteristics of attacks on human rights defenders. To that end, we propose a new dataset for detecting Attacks on Human Rights Defenders (HRDsAttack) consisting of crowdsourced annotations on 500 online news articles. The annotations include fine-grained information about the type and location of the attacks, as well as information about the victim(s). We demonstrate the usefulness of the dataset by using it to train and evaluate baseline models on several sub-tasks to predict the annotated characteristics.</abstract>
@@ -8587,7 +8587,7 @@
       <author><first>Christian</first><last>Herold</last><affiliation>RWTH Aachen University</affiliation></author>
       <author><first>Yingbo</first><last>Gao</last><affiliation>RWTH Aachen University</affiliation></author>
       <author><first>Mohammad</first><last>Zeineldeen</last><affiliation>RWTH Aachen University / AppTek</affiliation></author>
-      <author><first>Hermann</first><last>Ney</last><affiliation>RWTH Aachen University</affiliation></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last><affiliation>RWTH Aachen University</affiliation></author>
       <pages>7114-7123</pages>
       <abstract>The integration of language models for neural machine translation has been extensively studied in the past. It has been shown that an external language model, trained on additional target-side monolingual data, can help improve translation quality. However, there has always been the assumption that the translation model also learns an implicit target-side language model during training, which interferes with the external language model at decoding time. Recently, some works on automatic speech recognition have demonstrated that, if the implicit language model is neutralized in decoding, further improvements can be gained when integrating an external language model. In this work, we transfer this concept to the task of machine translation and compare with the most prominent way of including additional monolingual data - namely back-translation. We find that accounting for the implicit language model significantly boosts the performance of language model fusion, although this approach is still outperformed by back-translation.</abstract>
       <url hash="9c65db32">2023.findings-acl.444</url>
@@ -8701,7 +8701,7 @@
     <paper id="453">
       <title><fixed-case>I</fixed-case> run as fast as a rabbit, can you? A Multilingual Simile Dialogues Datasets</title>
       <author><first>Longxuan</first><last>Ma</last><affiliation>Research Center for Social Computing and Information Retrieval, Harbin Institute of Technology</affiliation></author>
-      <author><first>Wei-Nan</first><last>Zhang</last><affiliation>Harbin Institute of Technology</affiliation></author>
+      <author id="weinan-zhang"><first>Wei-Nan</first><last>Zhang</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <author><first>Shuhan</first><last>Zhou</last><affiliation>Beijing Language and Culture University</affiliation></author>
       <author><first>Churui</first><last>Sun</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <author><first>Changxin</first><last>Ke</last><affiliation>Harbin Institute of Technology</affiliation></author>
@@ -8746,7 +8746,7 @@
     <paper id="457">
       <title>Exploiting Commonsense Knowledge about Objects for Visual Activity Recognition</title>
       <author><first>Tianyu</first><last>Jiang</last><affiliation>University of Utah</affiliation></author>
-      <author><first>Ellen</first><last>Riloff</last><affiliation>University of Utah</affiliation></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last><affiliation>University of Utah</affiliation></author>
       <pages>7277-7285</pages>
       <abstract>Situation recognition is the task of recognizing the activity depictedin an image, including the people and objects involved. Previousmodels for this task typically train a classifier to identify theactivity using a backbone image feature extractor. We propose thatcommonsense knowledge about the objects depicted in an image can alsobe a valuable source of information for activity identification. Previous NLP research has argued that knowledge about the prototypicalfunctions of physical objects is important for language understanding,and NLP techniques have been developed to acquire this knowledge. Our work investigates whether this prototypical function knowledgecan also be beneficial for visual situation recognition. Webuild a framework that incorporates this type of commonsense knowledgein a transformer-based model that is trained to predict the actionverb for situation recognition. Our experimental results show thatadding prototypical function knowledge about physical objects doesimprove performance for the visual activity recognition task.</abstract>
       <url hash="e9c84803">2023.findings-acl.457</url>
@@ -8793,7 +8793,7 @@
       <title>Modeling Adversarial Attack on Pre-trained Language Models as Sequential Decision Making</title>
       <author><first>Xuanjie</first><last>Fang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Sijie</first><last>Cheng</last><affiliation>Fudan University</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>Tsinghua University</affiliation></author>
+      <author><first>Yang</first><last>Liu</last><affiliation>Tsinghua University</affiliation></author>
       <author><first>Wei</first><last>Wang</last><affiliation>fudan university</affiliation></author>
       <pages>7322-7336</pages>
       <abstract>Pre-trained language models (PLMs) have been widely used to underpin various downstream tasks. However, the adversarial attack task has found that PLMs are vulnerable to small perturbations. Mainstream methods adopt a detached two-stage framework to attack without considering the subsequent influence of substitution at each step. In this paper, we formally model the adversarial attack task on PLMs as a sequential decision-making problem, where the whole attack process is sequential with two decision-making problems, i.e., word finder and word substitution. Considering the attack process can only receive the final state without any direct intermediate signals, we propose to use reinforcement learning to find an appropriate sequential attack path to generate adversaries, named SDM-ATTACK. Our experimental results show that SDM-ATTACK achieves the highest attack success rate with a comparable modification rate and semantic similarity to attack fine-tuned BERT. Furthermore, our analyses demonstrate the generalization and transferability of SDM-ATTACK.Resources of this work will be released after this paper’s publication.</abstract>
@@ -8808,7 +8808,7 @@
       <author><first>Yang</first><last>Deng</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <author><first>Wai Chung</first><last>Kwan</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <author><first>Zezhong</first><last>Wang</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
-      <author><first>Kam-Fai</first><last>Wong</last><affiliation>Department of Systems Engineering and Engineering Management, The Chinese University of Hong Kong, Hong Kong</affiliation></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last><affiliation>Department of Systems Engineering and Engineering Management, The Chinese University of Hong Kong, Hong Kong</affiliation></author>
       <pages>7337-7345</pages>
       <abstract>Generating persona consistent dialogue response is important for developing an intelligent conversational agent. Recent works typically fine-tune large-scale pre-trained models on this task by concatenating persona texts and dialogue history as a single input sequence to generate the target response. While simple and effective, our analysis shows that this popular practice is seriously affected by order sensitivity where different input orders of persona sentences significantly impact the quality and consistency of generated response, resulting in severe performance fluctuations (i.e., 29.4% on GPT2 and 83.2% on BART). To mitigate the order sensitivity problem, we propose a model-agnostic framework, ORder Insensitive Generation (ORIG), which enables dialogue models to learn robust representation under different persona orders and improve the consistency of response generation. Experiments on the Persona-Chat dataset justify the effectiveness and superiority of our method with two dominant pre-trained models (GPT2 and BART).</abstract>
       <url hash="afcfbcb4">2023.findings-acl.462</url>
@@ -8818,8 +8818,8 @@
     <paper id="463">
       <title>Cost-effective Distillation of Large Language Models</title>
       <author><first>Sayantan</first><last>Dasgupta</last><affiliation>University of Melbourne</affiliation></author>
-      <author><first>Trevor</first><last>Cohn</last><affiliation>University of Melbourne</affiliation></author>
-      <author><first>Timothy</first><last>Baldwin</last><affiliation>MBZUAI</affiliation></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last><affiliation>University of Melbourne</affiliation></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last><affiliation>MBZUAI</affiliation></author>
       <pages>7346-7354</pages>
       <abstract>Knowledge distillation (KD) involves training a small “student” model to replicate the strong performance of a high-capacity “teacher” model, enabling efficient deployment in resource-constrained settings. Top-performing methods tend to be task- or architecture-specific and lack generalizability. Several existing approaches require pretraining of the teacher on task-specific datasets, which can be costly for large and unstable for small datasets. Here we propose an approach for improving KD through a novel distillation loss agnostic to the task and model architecture. We successfully apply our method to the distillation of the BERT-base and achieve highly competitive results from the distilled student across a range of GLUE tasks, especially for tasks with smaller datasets.</abstract>
       <url hash="4248ce68">2023.findings-acl.463</url>
@@ -8967,7 +8967,7 @@
       <author><first>Yinghao</first><last>Li</last><affiliation>Georgia Institute of Technology</affiliation></author>
       <author><first>Colin</first><last>Lockard</last><affiliation>Amazon</affiliation></author>
       <author><first>Prashant</first><last>Shiralkar</last><affiliation>Amazon</affiliation></author>
-      <author><first>Chao</first><last>Zhang</last><affiliation>Georgia Tech</affiliation></author>
+      <author id="chao-zhang-tu"><first>Chao</first><last>Zhang</last><affiliation>Georgia Tech</affiliation></author>
       <pages>7509-7525</pages>
       <abstract>Recommending a diversity of product types (PTs) is important for a good shopping experience when customers are looking for products around their high-level shopping interests (SIs) such as hiking. However, the SI-PT connection is typically absent in e-commerce product catalogs and expensive to construct manually due to the volume of potential SIs, which prevents us from establishing a recommender with easily accessible knowledge systems. To establish such connections, we propose to extract PTs from the Web pages containing hand-crafted PT recommendations for SIs. The extraction task is formulated as binary HTML node classification given the general observation that an HTML node in our target Web pages can present one and only one PT phrase. Accordingly, we introduce TrENC, which stands for Tree-Transformer Encoders for Node Classification. It improves the inter-node dependency modeling with modified attention mechanisms that preserve the long-term sibling and ancestor-descendant relations. TrENC also injects SI into node features for better semantic representation. Trained on pages regarding limited SIs, TrEnc is ready to be applied to other unobserved interests. Experiments on our manually constructed dataset, WebPT, show that TrENC outperforms the best baseline model by 2.37 F1 points in the zero-shot setup. The performance indicates the feasibility of constructing SI-PT relations and using them to power downstream applications such as search and recommendation.</abstract>
       <url hash="81b8d7f6">2023.findings-acl.474</url>
@@ -9040,8 +9040,8 @@
       <author><first>Tingting</first><last>Ma</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <author><first>Jiahai</first><last>Wang</last><affiliation>Sun Yat-sen University</affiliation></author>
       <author><first>Jian</first><last>Yin</last><affiliation>Sun Yat-Sen University</affiliation></author>
-      <author><first>Tiejun</first><last>Zhao</last><affiliation>Harbin Institute of Technology</affiliation></author>
-      <author><first>Chin-Yew</first><last>Lin</last><affiliation>Microsoft Research</affiliation></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last><affiliation>Harbin Institute of Technology</affiliation></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last><affiliation>Microsoft Research</affiliation></author>
       <author><first>Nan</first><last>Duan</last><affiliation>Microsoft Research Asia</affiliation></author>
       <pages>7587-7600</pages>
       <abstract>This paper presents ReasonFormer, a unified reasoning framework for mirroring the modular and compositional reasoning process of humans in complex decision-making. Inspired by dual-process theory in cognitive science, the representation module (automatic thinking) and reasoning modules (controlled thinking) are decoupled to capture different levels of cognition. Upon the top of the representation module, the pre-trained reasoning modules are modular and professional in specific and fundamental reasoning skills (e.g., logic, simple QA, etc). To mimic the controlled compositional thinking process, different reasoning modules are dynamically activated and composed in both parallel and cascaded manners to control what reasoning skills are activated and how deep the reasoning process will be reached to solve the current problems. The unified reasoning framework solves multiple tasks with a single model, and is trained and inferred in an end-to-end manner. Evaluated on 11 datasets requiring different reasoning skills and complexity, ReasonFormer demonstrates substantial performance boosts, revealing the compositional reasoning ability. Few-shot experiments exhibit better generalization ability by learning to compose pre-trained skills for new tasks with limited data, and decoupling the representation module and the reasoning modules. Further analysis shows the modularity of reasoning modules as different tasks activate distinct reasoning skills at different reasoning depths.</abstract>
@@ -9053,7 +9053,7 @@
       <title>Towards Argument-Aware Abstractive Summarization of Long Legal Opinions with Summary Reranking</title>
       <author><first>Mohamed</first><last>Elaraby</last><affiliation>University of Pittsburgh</affiliation></author>
       <author><first>Yang</first><last>Zhong</last><affiliation>University of Pittsburgh</affiliation></author>
-      <author><first>Diane</first><last>Litman</last><affiliation>University of Pittsburgh</affiliation></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last><affiliation>University of Pittsburgh</affiliation></author>
       <pages>7601-7612</pages>
       <abstract>We propose a simple approach for the abstractive summarization of long legal opinions that takes into account the argument structure of the document. Legal opinions often contain complex and nuanced argumentation, making it challenging to generate a concise summary that accurately captures the main points of the legal opinion. Our approach involves using argument role information to generate multiple candidate summaries, then reranking these candidates based on alignment with the document’s argument structure. We demonstrate the effectiveness of our approach on a dataset of long legal opinions and show that it outperforms several strong baselines.</abstract>
       <url hash="d5c68eab">2023.findings-acl.481</url>
@@ -9189,9 +9189,9 @@
       <title>Scaling Laws for <fixed-case>BERT</fixed-case> in Low-Resource Settings</title>
       <author><first>Gorka</first><last>Urbizu</last><affiliation>Orai NLP Technologies</affiliation></author>
       <author><first>Iñaki</first><last>San Vicente</last><affiliation>Orai NLP Technologies</affiliation></author>
-      <author><first>Xabier</first><last>Saralegi</last><affiliation>Orai NLP Technologies</affiliation></author>
-      <author><first>Rodrigo</first><last>Agerri</last><affiliation>HiTZ Center - Ixa, University of the Basque Country UPV/EHU</affiliation></author>
-      <author><first>Aitor</first><last>Soroa</last><affiliation>HiTZ Center - Ixa, University of the Basque Country UPV/EHU</affiliation></author>
+      <author id="xabier-saralegi"><first>Xabier</first><last>Saralegi</last><affiliation>Orai NLP Technologies</affiliation></author>
+      <author id="rodrigo-agerri"><first>Rodrigo</first><last>Agerri</last><affiliation>HiTZ Center - Ixa, University of the Basque Country UPV/EHU</affiliation></author>
+      <author id="aitor-soroa"><first>Aitor</first><last>Soroa</last><affiliation>HiTZ Center - Ixa, University of the Basque Country UPV/EHU</affiliation></author>
       <pages>7771-7789</pages>
       <abstract>Large language models are very resource intensive, both financially and environmentally, and require an amount of training data which is simply unobtainable for the majority of NLP practitioners. Previous work has researched the scaling laws of such models, but optimal ratios of model parameters, dataset size, and computation costs focused on the large scale. In contrast, we analyze the effect those variables have on the performance of language models in constrained settings, by building three lightweight BERT models (16M/51M/124M parameters) trained over a set of small corpora (5M/25M/125M words).We experiment on four languages of different linguistic characteristics (Basque, Spanish, Swahili and Finnish), and evaluate the models on MLM and several NLU tasks. We conclude that the power laws for parameters, data and compute for low-resource settings differ from the optimal scaling laws previously inferred, and data requirements should be higher. Our insights are consistent across all the languages we study, as well as across the MLM and downstream tasks. Furthermore, we experimentally establish when the cost of using a Transformer-based approach is worth taking, instead of favouring other computationally lighter solutions.</abstract>
       <url hash="ae7f3f17">2023.findings-acl.492</url>
@@ -9227,7 +9227,7 @@
     </paper>
     <paper id="495">
       <title>Putting Natural in Natural Language Processing</title>
-      <author><first>Grzegorz</first><last>Chrupała</last><affiliation>Tilburg University</affiliation></author>
+      <author id="grzegorz-chrupala"><first>Grzegorz</first><last>Chrupała</last><affiliation>Tilburg University</affiliation></author>
       <pages>7820-7827</pages>
       <abstract>Human language is firstly spoken and only secondarily written. Text, however, is a very convenient and efficient representation of language, and modern civilization has made it ubiquitous. Thus the field of NLP has overwhelmingly focused on processing written rather than spoken language. Work on spoken language, on the other hand, has been siloed off within the largely separate speech processing community which has been inordinately preoccupied with transcribing speech into text. Recent advances in deep learning have led to a fortuitous convergence in methods between speech processing and mainstream NLP. Arguably, the time is ripe for a unification of these two fields, and for starting to take spoken language seriously as the primary mode of human communication. Truly natural language processing could lead to better integration with the rest of language science and could lead to systems which are more data-efficient and more human-like, and which can communicate beyond the textual modality.</abstract>
       <url hash="db20822d">2023.findings-acl.495</url>
@@ -9288,7 +9288,7 @@
       <title>Additive manifesto decomposition: A policy domain aware method for understanding party positioning</title>
       <author><first>Tanise</first><last>Ceron</last><affiliation>University of Stuttgart</affiliation></author>
       <author><first>Dmitry</first><last>Nikolaev</last><affiliation>University of Stuttgart</affiliation></author>
-      <author><first>Sebastian</first><last>Padó</last><affiliation>Stuttgart University</affiliation></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last><affiliation>Stuttgart University</affiliation></author>
       <pages>7874-7890</pages>
       <abstract>Automatic extraction of party (dis)similarities from texts such as party election manifestos or parliamentary speeches plays an increasing role in computational political science. However, existing approaches are fundamentally limited to targeting only global party (dis)-similarity: they condense the relationship between a pair of parties into a single figure, their similarity. In aggregating over all policy domains (e.g., health or foreign policy), they do not provide any qualitative insights into which domains parties agree or disagree on. This paper proposes a workflow for estimating policy domain aware party similarity that overcomes this limitation. The workflow covers (a) definition of suitable policy domains; (b) automatic labeling of domains, if no manual labels are available; (c) computation of domain-level similarities and aggregation at a global level; (d) extraction of interpretable party positions on major policy axes via multidimensional scaling. We evaluate our workflow on manifestos from the German federal elections. We find that our method (a) yields high correlation when predicting party similarity at a global level and (b) provides accurate party-specific positions, even with automatically labelled policy domains.</abstract>
       <url hash="ce120275">2023.findings-acl.499</url>
@@ -9376,7 +9376,7 @@
       <author><first>Xiaotong</first><last>Jiang</last><affiliation>Soochow University</affiliation></author>
       <author><first>Zhongqing</first><last>Wang</last><affiliation>Soochow University</affiliation></author>
       <author><first>Yue</first><last>Zhang</last><affiliation>Westlake University</affiliation></author>
-      <author><first>Guodong</first><last>Zhou</last><affiliation>Soochow University</affiliation></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last><affiliation>Soochow University</affiliation></author>
       <pages>7971-7984</pages>
       <abstract>Extracting sentiment elements using pre-trained generative models has recently led to large improvements in aspect-based sentiment analysis benchmarks. These models avoid explicit modeling of structure between sentiment elements, which are succinct yet lack desirable properties such as structure well-formedness guarantees or built-in elements alignments. In this study, we propose an opinion tree parsing model, aiming to parse all the sentiment elements from an opinion tree, which can explicitly reveal a more comprehensive and complete aspect-level sentiment structure. In particular, we first introduce a novel context-free opinion grammar to normalize the sentiment structure. We then employ a neural chart-based opinion tree parser to fully explore the correlations among sentiment elements and parse them in the opinion tree form. Extensive experiments show the superiority of our proposed model and the capacity of the opinion tree parser with the proposed context-free opinion grammar. More importantly, our model is much faster than previous models.</abstract>
       <url hash="0a294a63">2023.findings-acl.505</url>
@@ -9494,7 +9494,7 @@
       <title>Adversarial Training for Low-Resource Disfluency Correction</title>
       <author><first>Vineet</first><last>Bhat</last><affiliation>IIT Bombay</affiliation></author>
       <author><first>Preethi</first><last>Jyothi</last><affiliation>Indian Institute of Technology Bombay</affiliation></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology Bombay and Patna</affiliation></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology Bombay and Patna</affiliation></author>
       <pages>8112-8122</pages>
       <abstract>Disfluencies commonly occur in conversational speech. Speech with disfluencies can result in noisy Automatic Speech Recognition (ASR) transcripts, which affects downstream tasks like machine translation. In this paper, we propose an adversarially-trained sequence-tagging model for Disfluency Correction (DC) that utilizes a small amount of labeled real disfluent data in conjunction with a large amount of unlabeled data. We show the benefit of our proposed technique, which crucially depends on synthetically generated disfluent data, by evaluating it for DC in three Indian languages- Bengali, Hindi, and Marathi (all from the Indo-Aryan family). Our technique also performs well in removing stuttering disfluencies in ASR transcripts introduced by speech impairments. We achieve an average 6.15 points improvement in F1-score over competitive baselines across all three languages mentioned. To the best of our knowledge, we are the first to utilize adversarial training for DC and use it to correct stuttering disfluencies in English, establishing a new benchmark for this task.</abstract>
       <url hash="8ea08f86">2023.findings-acl.514</url>
@@ -9516,7 +9516,7 @@
       <title>Stubborn Lexical Bias in Data and Models</title>
       <author><first>Sofia</first><last>Serrano</last><affiliation>University of Washington</affiliation></author>
       <author><first>Jesse</first><last>Dodge</last><affiliation>Allen Institute for AI</affiliation></author>
-      <author><first>Noah A.</first><last>Smith</last><affiliation>University of Washington</affiliation></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last><affiliation>University of Washington</affiliation></author>
       <pages>8131-8146</pages>
       <abstract>In NLP, recent work has seen increased focus on spurious correlations between various features and labels in training data, and how these influence model behavior. However, the presence and effect of such correlations are typically examined feature by feature. We investigate the cumulative impact on a model of many such intersecting features. Using a new statistical method, we examine whether such spurious patterns in data appear in models trained on the data. We select two tasks— natural language inference and duplicate-question detection— for which any unigram feature on its own should ideally be uninformative, which gives us a large pool of automatically extracted features with which to experiment. The large size of this pool allows us to investigate the intersection of features spuriously associated with (potentially different) labels. We then apply an optimization approach to *reweight* the training data, reducing thousands of spurious correlations, and examine how doing so affects models trained on the reweighted data. Surprisingly, though this method can successfully reduce lexical biases in the training data, we still find strong evidence of corresponding bias in the trained models, including worsened bias for slightly more complex features (bigrams). We close with discussion about the implications of our results on what it means to “debias” training data, and how issues of data quality can affect model bias.</abstract>
       <url hash="dcf0757f">2023.findings-acl.516</url>
@@ -9552,7 +9552,7 @@
       <author><first>Wenya</first><last>Guo</last><affiliation>Nankai University</affiliation></author>
       <author><first>Xumeng</first><last>Liu</last><affiliation>Nankai University</affiliation></author>
       <author><first>Shenglong</first><last>Yu</last><affiliation>Nankai University</affiliation></author>
-      <author><first>Ying</first><last>Zhang</last><affiliation>Nankai University</affiliation></author>
+      <author id="ying-zhang"><first>Ying</first><last>Zhang</last><affiliation>Nankai University</affiliation></author>
       <author><first>Xiaojie</first><last>Yuan</last><affiliation>Nankai University</affiliation></author>
       <pages>8184-8196</pages>
       <abstract>Multimodal aspect-based sentiment analysis (MABSA) aims to extract aspects from text-image pairs and recognize their sentiments. Existing methods make great efforts to align the whole image to corresponding aspects. However, different regions of the image may relate to different aspects in the same sentence, and coarsely establishing image-aspect alignment will introduce noise to aspect-based sentiment analysis (i.e., visual noise). Besides, the sentiment of a specific aspect can also be interfered by descriptions of other aspects (i.e., textual noise). Considering the aforementioned noises, this paper proposes an Aspect-oriented Method (AoM) to detect aspect-relevant semantic and sentiment information. Specifically, an aspect-aware attention module is designed to simultaneously select textual tokens and image blocks that are semantically related to the aspects. To accurately aggregate sentiment information, we explicitly introduce sentiment embedding into AoM, and use a graph convolutional network to model the vision-text and text-text interaction. Extensive experiments demonstrate the superiority of AoM to existing methods.</abstract>
@@ -9575,7 +9575,7 @@
       <title><fixed-case>MTC</fixed-case>ue: Learning Zero-Shot Control of Extra-Textual Attributes by Leveraging Unstructured Context in Neural Machine Translation</title>
       <author><first>Sebastian</first><last>Vincent</last><affiliation>University of Sheffield</affiliation></author>
       <author><first>Robert</first><last>Flynn</last><affiliation>University of Sheffield</affiliation></author>
-      <author><first>Carolina</first><last>Scarton</last><affiliation>University of Sheffield</affiliation></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last><affiliation>University of Sheffield</affiliation></author>
       <pages>8210-8226</pages>
       <abstract>Efficient utilisation of both intra- and extra-textual context remains one of the critical gaps between machine and human translation. Existing research has primarily focused on providing individual, well-defined types of context in translation, such as the surrounding text or discrete external variables like the speaker’s gender. This work introduces MTCue, a novel neural machine translation (NMT) framework that interprets all context (including discrete variables) as text. MTCue learns an abstract representation of context, enabling transferability across different data settings and leveraging similar attributes in low-resource scenarios. With a focus on a dialogue domain with access to document and metadata context, we extensively evaluate MTCue in four language pairs in both translation directions. Our framework demonstrates significant improvements in translation quality over a parameter-matched non-contextual baseline, as measured by BLEU (+0.88) and Comet (+1.58). Moreover, MTCue significantly outperforms a “tagging” baseline at translating English text. Analysis reveals that the context encoder of MTCue learns a representation space that organises context based on specific attributes, such as formality, enabling effective zero-shot control. Pre-training on context embeddings also improves MTCue’s few-shot performance compared to the “tagging” baseline. Finally, an ablation study conducted on model components and contextual variables further supports the robustness of MTCue for context-based NMT.</abstract>
       <url hash="9198bba3">2023.findings-acl.521</url>
@@ -9620,7 +9620,7 @@
       <author><first>Emmy</first><last>Liu</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Simran</first><last>Khanuja</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Alham Fikri</first><last>Aji</last><affiliation>MBZUAI</affiliation></author>
-      <author><first>Genta</first><last>Winata</last><affiliation>Bloomberg</affiliation></author>
+      <author id="genta-indra-winata"><first>Genta</first><last>Winata</last><affiliation>Bloomberg</affiliation></author>
       <author><first>Samuel</first><last>Cahyawijaya</last><affiliation>HKUST</affiliation></author>
       <author><first>Anuoluwapo</first><last>Aremu</last><affiliation>Masakhane</affiliation></author>
       <author><first>Perez</first><last>Ogayo</last><affiliation>Carnegie Mellon University</affiliation></author>
@@ -9661,7 +9661,7 @@
       <author><first>Ercong</first><last>Nie</last><affiliation>Centre for Information and Language Processing, LMU Munich</affiliation></author>
       <author><first>Sheng</first><last>Liang</last><affiliation>Naver Labs Europe</affiliation></author>
       <author><first>Helmut</first><last>Schmid</last><affiliation>CIS, Ludwig-Maximilians-Universitaet</affiliation></author>
-      <author><first>Hinrich</first><last>Schütze</last><affiliation>Center for Information and Language Processing, University of Munich</affiliation></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last><affiliation>Center for Information and Language Processing, University of Munich</affiliation></author>
       <pages>8320-8340</pages>
       <abstract>Multilingual Pretrained Language Models (MPLMs) perform strongly in cross-lingual transfer. We propose Prompts Augmented by Retrieval Crosslingually (PARC) to improve zero-shot performance on low-resource languages (LRLs) by augmenting the context with prompts consisting of semantically similar sentences retrieved from a high-resource language (HRL). PARC improves zero-shot performance on three downstream tasks (sentiment classification, topic categorization, natural language inference) with multilingual parallel test sets across 10 LRLs covering 6 language families in unlabeled (+5.1%) and labeled settings (+16.3%). PARC also outperforms finetuning by 3.7%. We find a significant positive correlation between cross-lingual transfer performance on one side, and the similarity between high- and low-resource languages as well as the amount of low-resource pretraining data on the other side. A robustness analysis suggests that PARC has the potential to achieve even stronger performance with more powerful MPLMs.</abstract>
       <url hash="8d254cc8">2023.findings-acl.528</url>
@@ -9672,8 +9672,8 @@
     <paper id="529">
       <title>Unsupervised Summarization Re-ranking</title>
       <author><first>Mathieu</first><last>Ravaut</last><affiliation>Nanyang Technological University</affiliation></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University; Salesforce AI Research</affiliation></author>
-      <author><first>Nancy</first><last>Chen</last><affiliation>Institute for Infocomm Research, A*STAR</affiliation></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University; Salesforce AI Research</affiliation></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last><affiliation>Institute for Infocomm Research, A*STAR</affiliation></author>
       <pages>8341-8376</pages>
       <abstract>With the rise of task-specific pre-training objectives, abstractive summarization models like PEGASUS offer appealing zero-shot performance on downstream summarization tasks. However, the performance of such unsupervised models still lags significantly behind their supervised counterparts. Similarly to the supervised setup, we notice a very high variance in quality among summary candidates from these models while only one candidate is kept as the summary output. In this paper, we propose to re-rank summary candidates in an unsupervised manner, aiming to close the performance gap between unsupervised and supervised models. Our approach improves the unsupervised PEGASUS by up to 7.27% and ChatGPT by up to 6.86% relative mean ROUGE across four widely-adopted summarization benchmarks ; and achieves relative gains of 7.51% (up to 23.73% from XSum to WikiHow) averaged over 30 zero-shot transfer setups (finetuning on a dataset, evaluating on another).</abstract>
       <url hash="c5772c14">2023.findings-acl.529</url>
@@ -9688,7 +9688,7 @@
       <author><first>Yuxin</first><last>Yang</last><affiliation>National University of Defense Technology</affiliation></author>
       <author><first>Zexin</first><last>Jian</last><affiliation>National University of Defense Technology</affiliation></author>
       <author><first>Changjian</first><last>Wang</last><affiliation>School of computer, National University of Defense Technology</affiliation></author>
-      <author id="dongsheng-li"><first>Dongsheng</first><last>Li</last><affiliation>National University of Defense Technology</affiliation></author>
+      <author><first>Dongsheng</first><last>Li</last><affiliation>National University of Defense Technology</affiliation></author>
       <pages>8377-8398</pages>
       <abstract>Attribute-based generation methods are of growing significance in controlling the generation of large pre-trained language models (PLMs). Existing studies control the generation by (1) finetuning the model with attributes or (2) guiding the inference processing toward control signals while freezing the PLM. However, finetuning approaches infuse domain bias into generation, making it hard to generate out-of-domain texts. Besides, many methods guide the inference in its word-by-word generation, pushing the word probability to the target attributes, resulting in less fluent sentences. We argue that distilling controlling information from natural texts can produce fluent sentences while maintaining high controllability. In this paper, we propose <b>GRA</b>dient-guided <b>C</b>ontrollable r<b>E</b>trieval (GRACE), a retrieval-augmented generation framework to facilitate the generation of fluent sentences with high attribute relevance. GRACE memorizes the semantic and attribute information from unlabeled corpora and applies a controllable retrieval to obtain desired information. For the generation, we design techniques to eliminate the domain bias from the retrieval results and integrate it into the generation model. Additionally, we propose a gradient-guided generation scheme that iteratively steers generation toward higher attribute relevance. Experimental results and quantities of examples verify the effectiveness of our method.</abstract>
       <url hash="88fc9bc7">2023.findings-acl.530</url>
@@ -9749,7 +9749,7 @@
       <author><first>Zewei</first><last>Sun</last><affiliation>ByteDance</affiliation></author>
       <author><first>Shanbo</first><last>Cheng</last><affiliation>Bytedance AI Lab</affiliation></author>
       <author><first>Mingxuan</first><last>Wang</last><affiliation>Bytedance AI Lab</affiliation></author>
-      <author><first>Degen</first><last>Huang</last><affiliation>Dalian University of Technology</affiliation></author>
+      <author id="degen-huang"><first>Degen</first><last>Huang</last><affiliation>Dalian University of Technology</affiliation></author>
       <author><first>Jinsong</first><last>Su</last><affiliation>Xiamen university</affiliation></author>
       <pages>8456-8473</pages>
       <abstract>We present a large-scale video subtitle translation dataset, *BigVideo*, to facilitate the study of multi-modality machine translation. Compared with the widely used *How2* and *VaTeX* datasets, *BigVideo* is more than 10 times larger, consisting of 4.5 million sentence pairs and 9,981 hours of videos. We also introduce two deliberately designed test sets to verify the necessity of visual information: *Ambiguous* with the presence of ambiguous words, and *Unambiguous* in which the text context is self-contained for translation. To better model the common semantics shared across texts and videos, we introduce a contrastive learning method in the cross-modal encoder. Extensive experiments on the *BigVideo* shows that: a) Visual information consistently improves the NMT model in terms of BLEU, BLEURT and COMET on both Ambiguous and Unambiguous test sets. b) Visual information helps disambiguation, compared to the strong text baseline on terminology-targeted scores and human evaluation.</abstract>
@@ -9922,7 +9922,7 @@
     <paper id="549">
       <title>Class-Adaptive Self-Training for Relation Extraction with Incompletely Annotated Training Data</title>
       <author><first>Qingyu</first><last>Tan</last><affiliation>National University of Singapore</affiliation></author>
-      <author id="lu-xu"><first>Lu</first><last>Xu</last><affiliation>None</affiliation></author>
+      <author><first>Lu</first><last>Xu</last><affiliation>None</affiliation></author>
       <author><first>Lidong</first><last>Bing</last><affiliation>Alibaba DAMO Academy</affiliation></author>
       <author><first>Hwee Tou</first><last>Ng</last><affiliation>National University of Singapore</affiliation></author>
       <pages>8630-8643</pages>
@@ -9951,7 +9951,7 @@
       <author><first>Qipeng</first><last>Guo</last><affiliation>Amazon Shanghai AI Lab</affiliation></author>
       <author><first>Jiawen</first><last>Wu</last><affiliation>Fudan University</affiliation></author>
       <author><first>Xipeng</first><last>Qiu</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>8653-8665</pages>
       <abstract>Large language models (LLMs) have a wealth of knowledge that allows them to excel in various Natural Language Processing (NLP) tasks. Current research focuses on enhancing their performance within their existing knowledge. Despite their vast knowledge, LLMs are still limited by the amount of information they can accommodate and comprehend. Therefore, the ability to understand their own limitations on the unknows, referred to as self-knowledge, is of paramount importance. This study aims to evaluate LLMs’ self-knowledge by assessing their ability to identify unanswerable or unknowable questions. We introduce an automated methodology to detect uncertainty in the responses of these models, providing a novel measure of their self-knowledge. We further introduce a unique dataset, SelfAware, consisting of unanswerable questions from five diverse categories and their answerable counterparts. Our extensive analysis, involving 20 LLMs including GPT-3, InstructGPT, and LLaMA, discovering an intrinsic capacity for self-knowledge within these models. Moreover, we demonstrate that in-context learning and instruction tuning can further enhance this self-knowledge. Despite this promising insight, our findings also highlight a considerable gap between the capabilities of these models and human proficiency in recognizing the limits of their knowledge.</abstract>
       <url hash="12571066">2023.findings-acl.551</url>
@@ -10042,7 +10042,7 @@
       <title><fixed-case>MVP</fixed-case>: Multi-task Supervised Pre-training for Natural Language Generation</title>
       <author><first>Tianyi</first><last>Tang</last><affiliation>Renmin University of China</affiliation></author>
       <author><first>Junyi</first><last>Li</last><affiliation>Gaoling School of Artificial Intelligence, Renmin University of China</affiliation></author>
-      <author><first>Wayne Xin</first><last>Zhao</last><affiliation>RUC</affiliation></author>
+      <author id="wayne-xin-zhao"><first>Wayne Xin</first><last>Zhao</last><affiliation>RUC</affiliation></author>
       <author><first>Ji-Rong</first><last>Wen</last><affiliation>Renmin University of China</affiliation></author>
       <pages>8758-8794</pages>
       <abstract>Pre-trained language models (PLMs) have achieved remarkable success in natural language generation (NLG) tasks. Up to now, most NLG-oriented PLMs are pre-trained in an unsupervised manner using the large-scale general corpus. In the meanwhile, an increasing number of models pre-trained with labeled data (i.e. “supervised pre-training”) showcase superior performance compared to unsupervised pre-trained models. Motivated by the success of supervised pre-training, we propose Multi-task superVised Pre-training (MVP) for natural language generation. We collect a large-scale natural language generation corpus, MVPCorpus, from 77 datasets over 11 diverse NLG tasks. Then we unify these examples into a general text-to-text format to pre-train the text generation model MVP in a supervised manner. For each task, we further pre-train specific soft prompts to stimulate the model’s capacity to perform a specific task. Our MVP model can be seen as a practice that utilizes recent instruction tuning on relatively small PLMs. Extensive experiments have demonstrated the effectiveness and generality of our MVP model in a number of NLG tasks, which achieves state-of-the-art performance on 13 out of 17 datasets, outperforming BART by 9.3% and Flan-T5 by 5.8%.</abstract>
@@ -10056,7 +10056,7 @@
       <author><first>Yu</first><last>Zhao</last><affiliation>Nankai University</affiliation></author>
       <author><first>Yike</first><last>Wu</last><affiliation>Nankai University</affiliation></author>
       <author><first>Xiangrui</first><last>Cai</last><affiliation>Nankai University</affiliation></author>
-      <author><first>Ying</first><last>Zhang</last><affiliation>Nankai University</affiliation></author>
+      <author id="ying-zhang"><first>Ying</first><last>Zhang</last><affiliation>Nankai University</affiliation></author>
       <author><first>Haiwei</first><last>Zhang</last><affiliation>Nankai University</affiliation></author>
       <author><first>Xiaojie</first><last>Yuan</last><affiliation>Nankai University</affiliation></author>
       <pages>8795-8806</pages>
@@ -10091,7 +10091,7 @@
     <paper id="562">
       <title><fixed-case>A</fixed-case>ctive<fixed-case>AED</fixed-case>: A Human in the Loop Improves Annotation Error Detection</title>
       <author><first>Leon</first><last>Weber</last><affiliation>LMU Munich</affiliation></author>
-      <author><first>Barbara</first><last>Plank</last><affiliation>LMU Munich</affiliation></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last><affiliation>LMU Munich</affiliation></author>
       <pages>8834-8845</pages>
       <abstract>Manually annotated datasets are crucial for training and evaluating Natural Language Processing models. However, recent work has discovered that even widely-used benchmark datasets contain a substantial number of erroneous annotations. This problem has been addressed with Annotation Error Detection (AED) models, which can flag such errors for human re-annotation. However, even though many of these AED methods assume a final curation step in which a human annotator decides whether the annotation is erroneous, they have been developed as static models without any human-in-the-loop component. In this work, we propose ActiveAED, an AED method that can detect errors more accurately by repeatedly querying a human for error corrections in its prediction loop. We evaluate ActiveAED on eight datasets spanning five different tasks and find that it leads to improvements over the state of the art on seven of them, with gains of up to six percentage points in average precision.</abstract>
       <url hash="e575c46e">2023.findings-acl.562</url>
@@ -10101,7 +10101,7 @@
     <paper id="563">
       <title>Assessing Word Importance Using Models Trained for Semantic Tasks</title>
       <author><first>Dávid</first><last>Javorský</last><affiliation>Charles Univerzity, Faculty of Mathematics and Physics</affiliation></author>
-      <author><first>Ondřej</first><last>Bojar</last><affiliation>Charles University, MFF UFAL</affiliation></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last><affiliation>Charles University, MFF UFAL</affiliation></author>
       <author><first>François</first><last>Yvon</last><affiliation>ISIR CNRS &amp; Sorbonne Université</affiliation></author>
       <pages>8846-8856</pages>
       <abstract>Many NLP tasks require to automatically identify the most significant words in a text. In this work, we derive word significance from models trained to solve semantic task: Natural Language Inference and Paraphrase Identification. Using an attribution method aimed to explain the predictions of these models, we derive importance scores for each input token. We evaluate their relevance using a so-called cross-task evaluation: Analyzing the performance of one model on an input masked according to the other model’s weight, we show that our method is robust with respect to the choice of the initial task. Additionally, we investigate the scores from the syntax point of view and observe interesting patterns, e.g. words closer to the root of a syntactic tree receive higher importance scores. Altogether, these observations suggest that our method can be used to identify important words in sentences without any explicit word importance labeling in training.</abstract>
@@ -10114,7 +10114,7 @@
       <author><first>Sweta</first><last>Agrawal</last><affiliation>University of Maryland</affiliation></author>
       <author><first>Chunting</first><last>Zhou</last><affiliation>Meta AI</affiliation></author>
       <author><first>Mike</first><last>Lewis</last><affiliation>Facebook AI Research</affiliation></author>
-      <author><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington; Meta</affiliation></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington; Meta</affiliation></author>
       <author><first>Marjan</first><last>Ghazvininejad</last><affiliation>FAIR</affiliation></author>
       <pages>8857-8873</pages>
       <abstract>Large-scale generative models show an impressive ability to perform a wide range of Natural Language Processing (NLP) tasks using in-context learning, where a few examples are used to describe a task to the model. For Machine Translation (MT), these examples are typically randomly sampled from the development dataset with a similar distribution as the evaluation set. However, it is unclear how the choice of these in context examples and their ordering impacts the output translation quality. In this work, we aim to understand the properties of good in-context examples for MT in both in-domain and out-of-domain settings. We show that the translation quality and the domain of the in-context examples matter and that 1-shot noisy unrelated examples can have a catastrophic impact on output quality. While concatenating multiple random examples reduces the effect of noise, a single good prompt optimized to maximize translation quality on the development dataset can elicit learned information from the pre-trained language model. Adding similar examples based on an n-gram overlap with the test source significantly and consistently improves the translation quality of the outputs, outperforming a strong kNN-MT baseline in 2 out of 4 out-of-domain datasets.</abstract>
@@ -10234,7 +10234,7 @@
     <paper id="573">
       <title>Pruning Pre-trained Language Models with Principled Importance and Self-regularization</title>
       <author><first>Siyu</first><last>Ren</last><affiliation>Shanghai Jiao Tong University</affiliation></author>
-      <author><first>Kenny</first><last>Zhu</last><affiliation>Shanghai Jiao Tong University</affiliation></author>
+      <author id="kenny-zhu"><first>Kenny</first><last>Zhu</last><affiliation>Shanghai Jiao Tong University</affiliation></author>
       <pages>8995-9008</pages>
       <abstract>Iterative pruning is one of the most effective compression methods for pre-trained language models. We discovered that finding the optimal pruning decision is an equality-constrained 0-1 Integer Linear Programming problem. The solution to this optimization problem leads to a principled importance criterion which we use to rank parameters during iterative model pruning. To mitigate the poor generalization at high sparsity levels, we propose a self-regularization scheme where model prediction is regularized by the latest checkpoint with increasing sparsity throughout pruning. Our experiments on natural language understanding, question answering, named entity recognition, and data-to-text generation with various Transformer-based PLMs show the effectiveness of the approach at various sparsity levels.</abstract>
       <url hash="23a5c802">2023.findings-acl.573</url>
@@ -10268,7 +10268,7 @@
     <paper id="576">
       <title>Data-Efficient Finetuning Using Cross-Task Nearest Neighbors</title>
       <author><first>Hamish</first><last>Ivison</last><affiliation>Allen Institute for AI</affiliation></author>
-      <author><first>Noah A.</first><last>Smith</last><affiliation>University of Washington</affiliation></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last><affiliation>University of Washington</affiliation></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last><affiliation>University of Washington</affiliation></author>
       <author><first>Pradeep</first><last>Dasigi</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <pages>9036-9061</pages>
@@ -10300,7 +10300,7 @@
       <author><first>Pengjun</first><last>Xie</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Fei</first><last>Huang</last><affiliation>Alibaba DAMO Academy</affiliation></author>
       <author><first>Lijie</first><last>Wen</last><affiliation>School of Software, Tsinghua University</affiliation></author>
-      <author><first>Philip S.</first><last>Yu</last><affiliation>University of Illinois at Chicago</affiliation></author>
+      <author id="philip-s-yu"><first>Philip S.</first><last>Yu</last><affiliation>University of Illinois at Chicago</affiliation></author>
       <pages>9072-9087</pages>
       <abstract>Data augmentation techniques have been used to alleviate the problem of scarce labeled data in various NER tasks (flat, nested, and discontinuous NER tasks). Existing augmentation techniques either manipulate the words in the original text that break the semantic coherence of the text, or exploit generative models that ignore preserving entities in the original text, which impedes the use of augmentation techniques on nested and discontinuous NER tasks. In this work, we propose a novel Entity-to-Text based data augmentation technique named EnTDA to add, delete, replace or swap entities in the entity list of the original texts, and adopt these augmented entity lists to generate semantically coherent and entity preserving texts for various NER tasks. Furthermore, we introduce a diversity beam search to increase the diversity during the text generation process. Experiments on thirteen NER datasets across three tasks (flat, nested, and discontinuous NER tasks) and two settings (full data and low resource settings) show that EnTDA could bring more performance improvements compared to the baseline augmentation techniques.</abstract>
       <url hash="8a2ce591">2023.findings-acl.578</url>
@@ -10328,7 +10328,7 @@
       <author><first>Young Jin</first><last>Kim</last><affiliation>Microsoft</affiliation></author>
       <author><first>Muhammad</first><last>Abdul-Mageed</last><affiliation>The University of British Columbia</affiliation></author>
       <author><first>Laks</first><last>Lakshmanan, V.S.</last><affiliation>UBC</affiliation></author>
-      <author><first>Ahmed Hassan</first><last>Awadallah</last><affiliation>Microsoft Research</affiliation></author>
+      <author id="ahmed-hassan"><first>Ahmed Hassan</first><last>Awadallah</last><affiliation>Microsoft Research</affiliation></author>
       <author><first>Sebastien</first><last>Bubeck</last><affiliation>Microsoft Research</affiliation></author>
       <author><first>Jianfeng</first><last>Gao</last><affiliation>Microsoft Research, Redmond</affiliation></author>
       <pages>9116-9132</pages>
@@ -10372,7 +10372,7 @@
       <author><first>Chenwei</first><last>Zhang</last><affiliation>Amazon</affiliation></author>
       <author><first>Tao</first><last>Zhang</last><affiliation>University of Illinois at Chicago</affiliation></author>
       <author><first>Eugene</first><last>Rohrbaugh</last><affiliation>Harrisburg University of Science and Technology</affiliation></author>
-      <author><first>Philip</first><last>Yu</last><affiliation>Universtiy of Illinois at Chicago</affiliation></author>
+      <author id="philip-s-yu"><first>Philip</first><last>Yu</last><affiliation>Universtiy of Illinois at Chicago</affiliation></author>
       <pages>9163-9175</pages>
       <abstract>Previous cross-lingual transfer methods are restricted to orthographic representation learning via textual scripts. This limitation hampers cross-lingual transfer and is biased towards languages sharing similar well-known scripts. To alleviate the gap between languages from different writing scripts, we propose PhoneXL, a framework incorporating phonemic transcriptions as an additional linguistic modality beyond the traditional orthographic transcriptions for cross-lingual transfer. Particularly, we propose unsupervised alignment objectives to capture (1) local one-to-one alignment between the two different modalities, (2) alignment via multi-modality contexts to leverage information from additional modalities, and (3) alignment via multilingual contexts where additional bilingual dictionaries are incorporated. We also release the first phonemic-orthographic alignment dataset on two token-level tasks (Named Entity Recognition and Part-of-Speech Tagging) among the understudied but interconnected Chinese-Japanese-Korean-Vietnamese (CJKV) languages. Our pilot study reveals phonemic transcription provides essential information beyond the orthography to enhance cross-lingual transfer and bridge the gap among CJKV languages, leading to consistent improvements on cross-lingual token-level tasks over orthographic-based multilingual PLMs.</abstract>
       <url hash="808afdad">2023.findings-acl.583</url>
@@ -10396,8 +10396,8 @@
       <author><first>Paramveer</first><last>Choudhary</last><affiliation>IIT Bombay</affiliation></author>
       <author><first>Diptesh</first><last>Kanojia</last><affiliation>University of Surrey</affiliation></author>
       <author><first>Tharindu</first><last>Ranasinghe</last><affiliation>Aston University</affiliation></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology Bombay and Patna</affiliation></author>
-      <author><first>Constantin</first><last>Orăsan</last><affiliation>University of Surrey</affiliation></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology Bombay and Patna</affiliation></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orăsan</last><affiliation>University of Surrey</affiliation></author>
       <pages>9191-9205</pages>
       <abstract>Quality Estimation (QE) is the task of evaluating machine translation output in the absence of reference translation. Conventional approaches to QE involve training separate models at different levels of granularity viz., word-level, sentence-level, and document-level, which sometimes lead to inconsistent predictions for the same input. To overcome this limitation, we focus on jointly training a single model for sentence-level and word-level QE tasks in a multi-task learning framework. Using two multi-task learning-based QE approaches, we show that multi-task learning improves the performance of both tasks. We evaluate these approaches by performing experiments in different settings, viz., single-pair, multi-pair, and zero-shot. We compare the multi-task learning-based approach with baseline QE models trained on single tasks and observe an improvement of up to 4.28% in Pearson’s correlation (r) at sentence-level and 8.46% in F1-score at word-level, in the single-pair setting. In the multi-pair setting, we observe improvements of up to 3.04% at sentence-level and 13.74% at word-level; while in the zero-shot setting, we also observe improvements of up to 5.26% and 3.05%, respectively. We make the models proposed in this paper publically available.</abstract>
       <url hash="c9bf020e">2023.findings-acl.585</url>
@@ -10463,7 +10463,7 @@
       <author><first>Yifan</first><last>Du</last><affiliation>Gaoling School of Artificial Intelligence, Renmin University of China</affiliation></author>
       <author><first>Junyi</first><last>Li</last><affiliation>Gaoling School of Artificial Intelligence, Renmin University of China</affiliation></author>
       <author><first>Tianyi</first><last>Tang</last><affiliation>Renmin University of China</affiliation></author>
-      <author><first>Wayne Xin</first><last>Zhao</last><affiliation>RUC</affiliation></author>
+      <author id="wayne-xin-zhao"><first>Wayne Xin</first><last>Zhao</last><affiliation>RUC</affiliation></author>
       <author><first>Ji-Rong</first><last>Wen</last><affiliation>Renmin University of China</affiliation></author>
       <pages>9268-9281</pages>
       <abstract>In this paper, we propose a novel language model guided captioning approach, LAMOC, for knowledge-based visual question answering (VQA). Our approach employs the generated captions by a captioning model as the context of an answer prediction model, which is a Pre-Trained Language model (PLM). As the major contribution, we leverage the guidance and feedback of the prediction model to improve the capability of the captioning model. In this way, the captioning model can become aware of the task goal and information need from the PLM. To develop our approach, we design two specific training stages, where the first stage adapts the captioning model to the prediction model (selecting more suitable caption propositions for training) and the second stage tunes the captioning model according to the task goal (learning from feedback of the PLM). Extensive experiments demonstrate the effectiveness of the proposed approach on the knowledge-based VQA task. Specifically, on the challenging A-OKVQA dataset, LAMOC outperforms several competitive zero-shot methods and even achieves comparable results to a fine-tuned VLP model. Our code is publicly available at <url>https://github.com/RUCAIBox/LAMOC</url>.</abstract>
@@ -10497,7 +10497,7 @@
     </paper>
     <paper id="593">
       <title><fixed-case>GUMS</fixed-case>um: Multi-Genre Data and Evaluation for <fixed-case>E</fixed-case>nglish Abstractive Summarization</title>
-      <author><first>Yang Janet</first><last>Liu</last><affiliation>Georgetown University</affiliation></author>
+      <author id="yang-janet-liu"><first>Yang Janet</first><last>Liu</last><affiliation>Georgetown University</affiliation></author>
       <author><first>Amir</first><last>Zeldes</last><affiliation>Georgetown University</affiliation></author>
       <pages>9315-9327</pages>
       <abstract>Automatic summarization with pre-trained language models has led to impressively fluent results, but is prone to ‘hallucinations’, low performance on non-news genres, and outputs which are not exactly summaries. Targeting ACL 2023’s ‘Reality Check’ theme, we present GUMSum, a small but carefully crafted dataset of English summaries in 12 written and spoken genres for evaluation of abstractive summarization. Summaries are highly constrained, focusing on substitutive potential, factuality, and faithfulness. We present guidelines and evaluate human agreement as well as subjective judgments on recent system outputs, comparing general-domain untuned approaches, a fine-tuned one, and a prompt-based approach, to human performance. Results show that while GPT3 achieves impressive scores, it still underperforms humans, with varying quality across genres. Human judgments reveal different types of errors in supervised, prompted, and human-generated summaries, shedding light on the challenges of producing a good summary.</abstract>
@@ -10558,7 +10558,7 @@
       <author><first>Saleh</first><last>Soltan</last><affiliation>Amazon Alexa</affiliation></author>
       <author><first>Andy</first><last>Rosenbaum</last><affiliation>Amazon</affiliation></author>
       <author><first>Tobias</first><last>Falke</last><affiliation>Amazon Alexa AI</affiliation></author>
-      <author><first>Qin</first><last>Lu</last><affiliation>Amazon.com Services LLC.</affiliation></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last><affiliation>Amazon.com Services LLC.</affiliation></author>
       <author><first>Anna</first><last>Rumshisky</last><affiliation>University of Massachusetts Lowell</affiliation></author>
       <author><first>Wael</first><last>Hamza</last><affiliation>Amazon</affiliation></author>
       <pages>9380-9394</pages>
@@ -10573,7 +10573,7 @@
       <author><first>Hao</first><last>Fei</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Meishan</first><last>Zhang</last><affiliation>Harbin Institute of Technology (Shenzhen), China</affiliation></author>
       <author><first>Min</first><last>Zhang</last><affiliation>Harbin Institute of Technology (Shenzhen)</affiliation></author>
-      <author><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
       <pages>9395-9408</pages>
       <abstract>Latest efforts on cross-lingual relation extraction (XRE) aggressively leverage the language-consistent structural features from the universal dependency (UD) resource, while they may largely suffer from biased transfer (e.g., either target-biased or source-biased) due to the inevitable linguistic disparity between languages. In this work, we investigate an unbiased UD- based XRE transfer by constructing a type of code-mixed UD forest. We first translate the sentence of the source language to the parallel target-side language, for both of which we parse the UD tree respectively. Then, we merge the source-/target-side UD structures as a unified code-mixed UD forest. With such forest features, the gaps of UD-based XRE between the training and predicting phases can be effectively closed. We conduct experiments on the ACE XRE benchmark datasets, where the results demonstrate that the proposed code-mixed UD forests help unbiased UD-based XRE transfer, with which we achieve significant XRE performance gains.</abstract>
       <url hash="bb64bddd">2023.findings-acl.599</url>
@@ -10620,7 +10620,7 @@
       <title>Visual Coherence Loss for Coherent and Visually Grounded Story Generation</title>
       <author><first>Xudong</first><last>Hong</last><affiliation>Saarland University / MPI Informatics</affiliation></author>
       <author><first>Vera</first><last>Demberg</last><affiliation>Saarland University</affiliation></author>
-      <author><first>Asad</first><last>Sayeed</last><affiliation>University of Gothenburg</affiliation></author>
+      <author id="asad-sayeed"><first>Asad</first><last>Sayeed</last><affiliation>University of Gothenburg</affiliation></author>
       <author><first>Qiankun</first><last>Zheng</last><affiliation>Saarland University</affiliation></author>
       <author><first>Bernt</first><last>Schiele</last><affiliation>MPI Informatics</affiliation></author>
       <pages>9456-9470</pages>
@@ -10692,7 +10692,7 @@
       <author><first>Shuming</first><last>Ma</last><affiliation>Microsoft Research Asia</affiliation></author>
       <author><first>Dongdong</first><last>Zhang</last><affiliation>Microsoft Research Asia</affiliation></author>
       <author><first>Furu</first><last>Wei</last><affiliation>Microsoft Research</affiliation></author>
-      <author><first>Baobao</first><last>Chang</last><affiliation>Institute of Computational Linguistic, Peking University</affiliation></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last><affiliation>Institute of Computational Linguistic, Peking University</affiliation></author>
       <pages>9542-9558</pages>
       <abstract>While multilingual neural machine translation has achieved great success, it suffers from the off-target issue, where the translation is in the wrong language. This problem is more pronounced on zero-shot translation tasks. In this work, we find that failing in encoding discriminative target language signal will lead to off-target and a closer lexical distance (i.e., KL-divergence) between two languages’ vocabularies is related with a higher off-target rate. We also find that solely isolating the vocab of different languages in the decoder can alleviate the problem. Motivated by the findings, we propose Language Aware Vocabulary Sharing (LAVS), a simple and effective algorithm to construct the multilingual vocabulary, that greatly alleviates the off-target problem of the translation model by increasing the KL-divergence between languages. We conduct experiments on a multilingual machine translation benchmark in 11 languages. Experiments show that the off-target rate for 90 translation tasks is reduced from 29% to 8%, while the overall BLEU score is improved by an average of 1.9 points without extra training cost or sacrificing the supervised directions’ performance. We release the code at <url>https://github.com/PKUnlp-icler/Off-Target-MNMT</url> for reproduction.</abstract>
       <url hash="7d1c14f3">2023.findings-acl.608</url>
@@ -10795,15 +10795,15 @@
       <author><first>Momchil</first><last>Hardalov</last><affiliation>AWS AI Labs</affiliation></author>
       <author><first>Chao</first><last>Shang</last><affiliation>Amazon AWS AI</affiliation></author>
       <author><first>Yassine</first><last>Benajiba</last><affiliation>Amazon AWS AI</affiliation></author>
-      <author><first>Lluis</first><last>Marquez</last><affiliation>AWS AI Labs</affiliation></author>
+      <author id="lluis-marquez"><first>Lluis</first><last>Marquez</last><affiliation>AWS AI Labs</affiliation></author>
       <pages>9697-9719</pages>
       <abstract>Sequence-to-sequence state-of-the-art systems for dialogue state tracking (DST) use the full dialogue history as input, represent the current state as a list with all the slots, and generate the entire state from scratch at each dialogue turn. This approach is inefficient, especially when the number of slots is large and the conversation is long. We propose Diable, a new task formalisation that simplifies the design and implementation of efficient DST systems and allows one to easily plug and play large language models. We represent the dialogue state as a table and formalise DST as a table manipulation task. At each turn, the system updates the previous state by generating table operations based on the dialogue context. Extensive experimentation on the MultiWoz datasets demonstrates that Diable (i) outperforms strong efficient DST baselines, (ii) is 2.4x more time efficient than current state-of-the-art methods while retaining competitive Joint Goal Accuracy, and (iii) is robust to noisy data annotations due to the table operations approach.</abstract>
       <url hash="54fb1ebb">2023.findings-acl.615</url>
       <bibkey>lesci-etal-2023-diable</bibkey>
       <revision id="1" href="2023.findings-acl.615v1" hash="cd2268c0"/>
       <revision id="2" href="2023.findings-acl.615v2" hash="1214c694" date="2023-08-02">Add link to code, optimise white space (i.e., promote paragraphs to subsections), and enhance table captions.</revision>
-      <doi>10.18653/v1/2023.findings-acl.615</doi>
       <revision id="3" href="2023.findings-acl.615v3" hash="54fb1ebb" date="2023-09-17">Added an author.</revision>
+      <doi>10.18653/v1/2023.findings-acl.615</doi>
     </paper>
     <paper id="616">
       <title>Neural Topic Modeling based on Cycle Adversarial Training and Contrastive Learning</title>
@@ -10834,7 +10834,7 @@
       <title>Mapping Brains with Language Models: A Survey</title>
       <author><first>Antonia</first><last>Karamolegkou</last><affiliation>Copenhagen University</affiliation></author>
       <author><first>Mostafa</first><last>Abdou</last><affiliation>Princeton University</affiliation></author>
-      <author><first>Anders</first><last>Søgaard</last><affiliation>University of Copenhagen</affiliation></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last><affiliation>University of Copenhagen</affiliation></author>
       <pages>9748-9762</pages>
       <abstract>Over the years, many researchers have seemingly made the same observation: Brain and language model activations exhibit some structural similarities, enabling linear partial mappings between features extracted from neural recordings and computational language models. In an attempt to evaluate how much evidence has been accumulated for this observation, we survey over 30 studies spanning 10 datasets and 8 metrics. How much evidence has been accumulated, and what, if anything, is missing before we can draw conclusions? Our analysis of the evaluation methods used in the literature reveals that some of the metrics are less conservative. We also find that the accumulated evidence, for now, remains ambiguous, but correlations with model size and quality provide grounds for cautious optimism.</abstract>
       <url hash="c5ffa820">2023.findings-acl.618</url>
@@ -10845,7 +10845,7 @@
       <title>Parameter-Efficient Finetuning for Robust Continual Multilingual Learning</title>
       <author><first>Kartikeya</first><last>Badola</last><affiliation>Google Research</affiliation></author>
       <author><first>Shachi</first><last>Dave</last><affiliation>Google Research</affiliation></author>
-      <author><first>Partha</first><last>Talukdar</last><affiliation>Google Research and IISc</affiliation></author>
+      <author id="partha-talukdar"><first>Partha</first><last>Talukdar</last><affiliation>Google Research and IISc</affiliation></author>
       <pages>9763-9780</pages>
       <abstract>We introduce and study the problem of Continual Multilingual Learning (CML) where a previously trained multilingual model is periodically updated using new data arriving in stages. If the new data is present only in a subset of languages, we find that the resulting model shows improved performance only on the languages included in the latest update (and a few closely related languages) while its performance on all the remaining languages degrade significantly. We address this challenge by proposing LAFT-URIEL, a parameter-efficient finetuning strategy which aims to increase the number of languages on which the model improves after an update, while reducing the magnitude of loss in performance for the remaining languages. LAFT-URIEL uses linguistic knowledge to balance overfitting and knowledge sharing across languages, allowing for an additional 25% of task languages to see an improvement in performance after an update, while also reducing the average magnitude of losses on the remaining languages by 78% relative.</abstract>
       <url hash="8089c100">2023.findings-acl.619</url>
@@ -11208,7 +11208,7 @@
       <author><first>Ibrahim Said</first><last>Ahmad</last><affiliation>Bayero University Kano</affiliation></author>
       <author><first>Ketan</first><last>Kotwal</last><affiliation>Idiap Research Institute</affiliation></author>
       <author><first>Sayan</first><last>Deb Sarkar</last><affiliation>ETH Zurich</affiliation></author>
-      <author><first>Ondřej</first><last>Bojar</last><affiliation>Charles University, MFF UFAL</affiliation></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last><affiliation>Charles University, MFF UFAL</affiliation></author>
       <author><first>Habeebah</first><last>Kakudi</last><affiliation>Bayero University, Kano</affiliation></author>
       <pages>10162-10183</pages>
       <abstract>This paper presents “HaVQA”, the first multimodal dataset for visual question answering (VQA) tasks in the Hausa language. The dataset was created by manually translating 6,022 English question-answer pairs, which are associated with 1,555 unique images from the Visual Genome dataset. As a result, the dataset provides 12,044 gold standard English-Hausa parallel sentences that were translated in a fashion that guarantees their semantic match with the corresponding visual information. We conducted several baseline experiments on the dataset, including visual question answering, visual question elicitation, text-only and multimodal machine translation.</abstract>
@@ -11221,7 +11221,7 @@
       <title>Claim-Dissector: An Interpretable Fact-Checking System with Joint Re-ranking and Veracity Prediction</title>
       <author><first>Martin</first><last>Fajcik</last><affiliation>Brno University of Technology</affiliation></author>
       <author><first>Petr</first><last>Motlicek</last><affiliation>Idiap Research Institute</affiliation></author>
-      <author><first>Pavel</first><last>Smrz</last><affiliation>Brno University of Technology</affiliation></author>
+      <author id="pavel-smrz"><first>Pavel</first><last>Smrz</last><affiliation>Brno University of Technology</affiliation></author>
       <pages>10184-10205</pages>
       <abstract>We present Claim-Dissector: a novel latent variable model for fact-checking and analysis, which given a claim and a set of retrieved evidence jointly learns to identify: (i) the relevant evidences to the given claim (ii) the veracity of the claim. We propose to disentangle the per-evidence relevance probability and its contribution to the final veracity probability in an interpretable way — the final veracity probability is proportional to a linear ensemble of per-evidence relevance probabilities. In this way, the individual contributions of evidences towards the final predicted probability can be identified. In per-evidence relevance probability, our model can further distinguish whether each relevant evidence is supporting (S) or refuting (R) the claim. This allows to quantify how much the S/R probability contributes to final verdict or to detect disagreeing evidence. Despite its interpretable nature, our system achieves results competetive with state-of-the-art on the FEVER dataset, as compared to typical two-stage system pipelines, while using significantly fewer parameters. Furthermore, our analysis shows that our model can learn fine-grained relevance cues while using coarse-grained supervision and we demonstrate it in 2 ways. (i) We show that our model can achieve competitive sentence recall while using only paragraph-level relevance supervision. (ii) Traversing towards the finest granularity of relevance, we show that our model is capable of identifying relevance at the token level. To do this, we present a new benchmark TLR-FEVER focusing on token-level interpretability — humans annotate tokens in relevant evidences they considered essential when making their judgment. Then we measure how similar are these annotations to the tokens our model is focusing on.</abstract>
       <url hash="6470b876">2023.findings-acl.647</url>
@@ -11249,7 +11249,7 @@
       <author><first>Xin</first><last>Zhang</last><affiliation>Harbin Institute of Technology (Shenzhen)</affiliation></author>
       <author><first>Chenwei</first><last>Zhang</last><affiliation>Amazon</affiliation></author>
       <author><first>Irwin</first><last>King</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
-      <author><first>Philip S.</first><last>Yu</last><affiliation>University of Illinois at Chicago</affiliation></author>
+      <author id="philip-s-yu"><first>Philip S.</first><last>Yu</last><affiliation>University of Illinois at Chicago</affiliation></author>
       <pages>10221-10234</pages>
       <abstract>Relation extraction (RE) tasks show promising performance in extracting relations from two entities mentioned in sentences, given sufficient annotations available during training. Such annotations would be labor-intensive to obtain in practice. Existing work adopts data augmentation techniques to generate pseudo-annotated sentences beyond limited annotations. These techniques neither preserve the semantic consistency of the original sentences when rule-based augmentations are adopted, nor preserve the syntax structure of sentences when expressing relations using seq2seq models, resulting in less diverse augmentations. In this work, we propose a dedicated augmentation technique for relational texts, named GDA, which uses two complementary modules to preserve both semantic consistency and syntax structures. We adopt a generative formulation and design a multi-tasking solution to achieve synergies. Furthermore, GDA adopts entity hints as the prior knowledge of the generative model to augment diverse sentences. Experimental results in three datasets under a low-resource setting showed that GDA could bring <i>2.0%</i> F1 improvements compared with no augmentation technique.</abstract>
       <url hash="9956abbd">2023.findings-acl.649</url>
@@ -11285,7 +11285,7 @@
       <author><first>Wei</first><last>Chen</last><affiliation>School of Data Science, Fudan University</affiliation></author>
       <author><first>Shiqi</first><last>Wei</last><affiliation>Fudan University</affiliation></author>
       <author><first>Zhongyu</first><last>Wei</last><affiliation>School of Data Science, Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>10278-10286</pages>
       <abstract>Symptom diagnosis in medical conversations aims to correctly extract both symptom entities and their status from the doctor-patient dialogue. In this paper, we propose a novel framework called KNSE for symptom status recognition (SSR), where the SSR is formulated as a natural language inference (NLI) task. For each mentioned symptom in a dialogue window, we first generate knowledge about the symptom and hypothesis about status of the symptom, to form a (premise, knowledge, hypothesis) triplet. The BERT model is then used to encode the triplet, which is further processed by modules including utterance aggregation, self-attention, cross-attention, and GRU to predict the symptom status. Benefiting from the NLI formalization, the proposed framework can encode more informative prior knowledge to better localize and track symptom status, which can effectively improve the performance of symptom status recognition. Preliminary experiments on Chinese medical dialogue datasets show that KNSE outperforms previous competitive baselines and has advantages in cross-disease and cross-symptom scenarios.</abstract>
       <url hash="d8a7f327">2023.findings-acl.652</url>
@@ -11313,7 +11313,7 @@
     <paper id="654">
       <title>Character Coreference Resolution in Movie Screenplays</title>
       <author><first>Sabyasachee</first><last>Baruah</last><affiliation>University of Southern California</affiliation></author>
-      <author><first>Shrikanth</first><last>Narayanan</last><affiliation>University of Southern California</affiliation></author>
+      <author id="shrikanth-narayanan"><first>Shrikanth</first><last>Narayanan</last><affiliation>University of Southern California</affiliation></author>
       <pages>10300-10313</pages>
       <abstract>Movie screenplays have a distinct narrative structure. It segments the story into scenes containing interleaving descriptions of actions, locations, and character dialogues.A typical screenplay spans several scenes and can include long-range dependencies between characters and events.A holistic document-level understanding of the screenplay requires several natural language processing capabilities, such as parsing, character identification, coreference resolution, action recognition, summarization, and attribute discovery. In this work, we develop scalable and robust methods to extract the structural information and character coreference clusters from full-length movie screenplays. We curate two datasets for screenplay parsing and character coreference — <i>MovieParse</i> and <i>MovieCoref</i>, respectively.We build a robust screenplay parser to handle inconsistencies in screenplay formatting and leverage the parsed output to link co-referring character mentions.Our coreference models can scale to long screenplay documents without drastically increasing their memory footprints.</abstract>
       <url hash="02845ffc">2023.findings-acl.654</url>
@@ -11365,7 +11365,7 @@
       <title>Which Examples Should be Multiply Annotated? Active Learning When Annotators May Disagree</title>
       <author><first>Connor</first><last>Baumler</last><affiliation>University of Maryland</affiliation></author>
       <author><first>Anna</first><last>Sotnikova</last><affiliation>University of Maryland</affiliation></author>
-      <author><first>Hal</first><last>Daumé III</last><affiliation>UMD</affiliation></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last><affiliation>UMD</affiliation></author>
       <pages>10352-10371</pages>
       <abstract>Linguistic annotations, especially for controversial topics like hate speech detection, are frequently contested due to annotator backgrounds and positionalities. In such situations, preserving this disagreement through the machine learning pipeline can be important for downstream use cases. However, capturing disagreement can increase annotation time and expense. Fortunately, for many tasks, not all examples are equally controversial; we develop an active learning approach, Disagreement Aware Active Learning (DAAL) that concentrates annotations on examples where model entropy and annotator entropy are the most different. Because we cannot know the true entropy of annotations on unlabeled examples, we estimate a model that predicts annotator entropy trained using very few multiply-labeled examples. We find that traditional uncertainty-based active learning underperforms simple passive learning on tasks with high levels of disagreement, but that our active learning approach is able to successfully improve on passive and active baselines, reducing the number of annotations required by at least 24% on average across several datasets.</abstract>
       <url hash="56fd0e85">2023.findings-acl.658</url>
@@ -11684,7 +11684,7 @@
     <paper id="683">
       <title>Replace and Report: <fixed-case>NLP</fixed-case> Assisted Radiology Report Generation</title>
       <author><first>Kaveri</first><last>Kale</last><affiliation>Indian Institute of Technology Bombay</affiliation></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology Bombay and Patna</affiliation></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology Bombay and Patna</affiliation></author>
       <author><first>Kshitij</first><last>Jadhav</last><affiliation>Indian Institute of Technology Bombay</affiliation></author>
       <pages>10731-10742</pages>
       <abstract>Clinical practice frequently uses medical imaging for diagnosis and treatment. A significant challenge for automatic radiology report generation is that the radiology reports are long narratives consisting of multiple sentences for both abnormal and normal findings. Therefore, applying conventional image captioning approaches to generate the whole report proves to be insufficient, as these are designed to briefly describe images with short sentences. We propose a template-based approach to generate radiology reports from radiographs. Our approach involves the following: i) using a multilabel image classifier, produce the tags for the input radiograph; ii) using a transformer-based model, generate pathological descriptions (a description of abnormal findings seen on radiographs) from the tags generated in step (i); iii) using a BERT-based multi-label text classifier, find the spans in the normal report template to replace with the generated pathological descriptions; and iv) using a rule-based system, replace the identified span with the generated pathological description. We performed experiments with the two most popular radiology report datasets, IU Chest X-ray and MIMIC-CXR and demonstrated that the BLEU-1, ROUGE-L, METEOR, and CIDEr scores are better than the State-of-the-Art models by 25%, 36%, 44% and 48% respectively, on the IU X-RAY dataset. To the best of our knowledge, this is the first attempt to generate chest X-ray radiology reports by first creating small sentences for abnormal findings and then replacing them in the normal report template.</abstract>
@@ -11708,7 +11708,7 @@
     </paper>
     <paper id="685">
       <title><fixed-case>C</fixed-case>a<fixed-case>PE</fixed-case>: Contrastive Parameter Ensembling for Reducing Hallucination in Abstractive Summarization</title>
-      <author><first>Prafulla Kumar</first><last>Choubey</last><affiliation>Salesforce AI Research</affiliation></author>
+      <author id="prafulla-kumar-choubey"><first>Prafulla Kumar</first><last>Choubey</last><affiliation>Salesforce AI Research</affiliation></author>
       <author><first>Alex</first><last>Fabbri</last><affiliation>Salesforce AI Research</affiliation></author>
       <author><first>Jesse</first><last>Vig</last><affiliation>Salesforce Research</affiliation></author>
       <author><first>Chien-Sheng</first><last>Wu</last><affiliation>Salesforce</affiliation></author>
@@ -11732,7 +11732,7 @@
     </paper>
     <paper id="687">
       <title>A Call for Standardization and Validation of Text Style Transfer Evaluation</title>
-      <author><first>Phil</first><last>Ostheimer</last></author>
+      <author id="phil-sidney-ostheimer"><first>Phil</first><last>Ostheimer</last></author>
       <author><first>Mayank</first><last>Nagda</last></author>
       <author><first>Marius</first><last>Kloft</last></author>
       <author><first>Sophie</first><last>Fellenz</last></author>
@@ -11830,7 +11830,7 @@
       <author><first>Sijie</first><last>Cheng</last><affiliation>Fudan University</affiliation></author>
       <author><first>Yile</first><last>Wang</last><affiliation>Tsinghua University</affiliation></author>
       <author><first>Peng</first><last>Li</last><affiliation>Institute for AI Industry Research (AIR), Tsinghua University, China</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>Tsinghua University</affiliation></author>
+      <author><first>Yang</first><last>Liu</last><affiliation>Tsinghua University</affiliation></author>
       <pages>10896-10912</pages>
       <abstract>Retrieval-augmented methods have received increasing attention to support downstream tasks by leveraging useful information from external resources. Recent studies mainly focus on exploring retrieval to solve knowledge-intensive (KI) tasks. However, the potential of retrieval for most non-knowledge-intensive (NKI) tasks remains under-explored. There are two main challenges to leveraging retrieval-augmented methods for NKI tasks: 1) the demand for diverse relevance score functions and 2) the dilemma between training cost and task performance. To address these challenges, we propose a two-stage framework for NKI tasks, named PGRA. In the first stage, we adopt a task-agnostic retriever to build a shared static index and select candidate evidence efficiently. In the second stage, we design a prompt-guided reranker to rerank the nearest evidence according to task-specific relevance for the reader. Experimental results show that PGRA outperforms other state-of-the-art retrieval-augmented methods. Our analyses further investigate the influence factors to model performance and demonstrate the generality of PGRA. The code and model will be released for further research.</abstract>
       <url hash="ae2c53c2">2023.findings-acl.693</url>
@@ -11891,7 +11891,7 @@
       <author><first>Jonathan</first><last>Rowe</last><affiliation>North Carolina State University</affiliation></author>
       <author><first>Bradford</first><last>Mott</last><affiliation>North Carolina State University</affiliation></author>
       <author><first>Snigdha</first><last>Chaturvedi</last><affiliation>University of North Carolina, Chapel Hill</affiliation></author>
-      <author><first>James</first><last>Lester</last><affiliation>North Carolina State University</affiliation></author>
+      <author id="james-lester"><first>James</first><last>Lester</last><affiliation>North Carolina State University</affiliation></author>
       <pages>10978-10992</pages>
       <abstract>Recognizing classroom dialogue acts has significant promise for yielding insight into teaching, student learning, and classroom dynamics. However, obtaining K-12 classroom dialogue data with labels is a significant challenge, and therefore, developing data-efficient methods for classroom dialogue act recognition is essential. This work addresses the challenge of classroom dialogue act recognition from limited labeled data using a contrastive learning-based self-supervised approach (SSCon). SSCon uses two independent models that iteratively improve each other’s performance by increasing the accuracy of dialogue act recognition and minimizing the embedding distance between the same dialogue acts. We evaluate the approach on three complementary dialogue act recognition datasets: the TalkMoves dataset (annotated K-12 mathematics lesson transcripts), the DailyDialog dataset (multi-turn daily conversation dialogues), and the Dialogue State Tracking Challenge 2 (DSTC2) dataset (restaurant reservation dialogues). Results indicate that our self-supervised contrastive learning-based model outperforms competitive baseline models when trained with limited examples per dialogue act. Furthermore, SSCon outperforms other few-shot models that require considerably more labeled data.</abstract>
       <url hash="36cc4945">2023.findings-acl.698</url>
@@ -11952,7 +11952,7 @@
     </paper>
     <paper id="703">
       <title>A Class-Rebalancing Self-Training Framework for Distantly-Supervised Named Entity Recognition</title>
-      <author id="qi-li"><first>Qi</first><last>Li</last><affiliation>Zhejiang University</affiliation></author>
+      <author><first>Qi</first><last>Li</last><affiliation>Zhejiang University</affiliation></author>
       <author><first>Tingyu</first><last>Xie</last><affiliation>Zhejiang University</affiliation></author>
       <author><first>Peng</first><last>Peng</last><affiliation>Zhejiang University</affiliation></author>
       <author><first>Hongwei</first><last>Wang</last><affiliation>Zhejiang University-University of Illinois at Urbana-Champaign Institute</affiliation></author>
@@ -12134,7 +12134,7 @@
     <paper id="716">
       <title>Boosting Event Extraction with Denoised Structure-to-Text Augmentation</title>
       <author><first>Bo</first><last>Wang</last><affiliation>School of Computer Science and Technology, Beijing Institute of Technology</affiliation></author>
-      <author><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <author><first>Xiaochi</first><last>Wei</last><affiliation>Baidu Inc.</affiliation></author>
       <author><first>Ge</first><last>Shi</last><affiliation>Beijing University of Technology</affiliation></author>
       <author><first>Xiao</first><last>Liu</last><affiliation>Microsoft Research Asia</affiliation></author>
@@ -12157,7 +12157,7 @@
       <author><first>Tao</first><last>Gui</last><affiliation>fudan university</affiliation></author>
       <author><first>Qi</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Zhongyu</first><last>Wei</last><affiliation>School of Data Science, Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Menghan</first><last>Zhang</last><affiliation>Institute of Modern Languages and Linguistics, Fudan University</affiliation></author>
       <pages>11282-11298</pages>
       <abstract>Deep neural networks (DNNs) have been proven to be sensitive towards perturbations on input samples, and previous works highlight that adversarial samples are even more vulnerable than normal ones. In this work, this phenomenon is illustrated frWe first show that adversarial samples locate in steep and narrow local minima of the loss landscape (high sharpness) while normal samples, which differs distinctly from adversarial ones, reside in the loss surface that is more flatter (low sharpness).om the perspective of sharpness via visualizing the input loss landscape of models. Based on this, we propose a simple and effective sharpness-based detector to distinct adversarial samples by maximizing the loss increment within the region where the inference sample is located. Considering that the notion of sharpness of a loss landscape is relative, we further propose an adaptive optimization strategy in an attempt to fairly compare the relative sharpness among different samples. Experimental results show that our approach can outperform previous detection methods by large margins (average +6.6 F1 score) for four advanced attack strategies considered in this paper across three text classification tasks.</abstract>
@@ -12285,7 +12285,7 @@
       <author><first>Jinshu</first><last>Lin</last><affiliation>Hundsun</affiliation></author>
       <author><first>Kai-Wei</first><last>Chang</last><affiliation>UCLA</affiliation></author>
       <author><first>Cho-Jui</first><last>Hsieh</last><affiliation>University of California, Los Angeles</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>11454-11465</pages>
       <abstract>We extend a non-parametric Bayesian model of (Titov and Klementiev, 2011) to deal with homonymy and polysemy by leveraging distributed contextual word and phrase representations pre-trained on a large collection of unlabelled texts. Then, unsupervised semantic parsing is performed by decomposing sentences into fragments, clustering the fragments to abstract away syntactic variations of the same meaning, and predicting predicate-argument relations between the fragments. To better model the statistical dependencies between predicates and their arguments, we further conduct a hierarchical Pitman-Yor process. An improved Metropolis-Hastings merge-split sampler is proposed to speed up the mixing and convergence of Markov chains by leveraging pre-trained distributed representations. The experimental results show that the models achieve better accuracy on both question-answering and relation extraction tasks.</abstract>
       <url hash="891ae8dc">2023.findings-acl.726</url>
@@ -12362,7 +12362,7 @@
       <author><first>Nicholas</first><last>FitzGerald</last><affiliation>Google</affiliation></author>
       <author><first>Sumit</first><last>Sanghai</last><affiliation>Google</affiliation></author>
       <author><first>Fei</first><last>Sha</last><affiliation>Google</affiliation></author>
-      <author><first>William</first><last>Cohen</last><affiliation>Google AI</affiliation></author>
+      <author id="william-cohen"><first>William</first><last>Cohen</last><affiliation>Google AI</affiliation></author>
       <pages>11534-11547</pages>
       <abstract>Fusion-in-Decoder (FiD) is a powerful retrieval-augmented language model that sets the state-of-the-art on many knowledge-intensive NLP tasks. However, the architecture used for FiD was chosen by making minimal modifications to a standard T5 model, which our analysis shows to be highly suboptimal for a retrieval-augmented model. In particular, FiD allocates the bulk of FLOPs to the encoder, while the majority of inference time results from memory bandwidth constraints in the decoder. We propose two simple changes to the FiD architecture to alleviate memory bandwidth constraints, and speed up inference by 7x. This allows us to use a much larger decoder at modest cost. We denote FiD with the above modifications as FiDO, and show that it strongly improves performance over existing FiD models for a wide range of inference budgets. For example, FiDO-Large-XXL performs faster inference than FiD-Base and achieves better performance than FiD-Large.</abstract>
       <url hash="83a871a8">2023.findings-acl.732</url>
@@ -12417,7 +12417,7 @@
     <paper id="736">
       <title>Predicting Human Translation Difficulty Using Automatic Word Alignment</title>
       <author><first>Zheng Wei</first><last>Lim</last><affiliation>University of Melbourne</affiliation></author>
-      <author><first>Trevor</first><last>Cohn</last><affiliation>University of Melbourne</affiliation></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last><affiliation>University of Melbourne</affiliation></author>
       <author><first>Charles</first><last>Kemp</last><affiliation>University of Melbourne</affiliation></author>
       <author><first>Ekaterina</first><last>Vylomova</last><affiliation>University of Melbourne</affiliation></author>
       <pages>11590-11601</pages>
@@ -12443,10 +12443,10 @@
       <author><first>Jon</first><last>Saad-Falcon</last><affiliation>Stanford University</affiliation></author>
       <author><first>Martin</first><last>Franz</last><affiliation>IBM T.J. Watson Research Center</affiliation></author>
       <author><first>Omar</first><last>Khattab</last><affiliation>Stanford University</affiliation></author>
-      <author><first>Avi</first><last>Sil</last><affiliation>IBM Research AI</affiliation></author>
-      <author><first>Radu</first><last>Florian</last><affiliation>IBM Research</affiliation></author>
-      <author><first>Md Arafat</first><last>Sultan</last><affiliation>IBM Research AI</affiliation></author>
-      <author><first>Salim</first><last>Roukos</last><affiliation>IBM Research AI</affiliation></author>
+      <author id="avirup-sil"><first>Avi</first><last>Sil</last><affiliation>IBM Research AI</affiliation></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last><affiliation>IBM Research</affiliation></author>
+      <author id="md-arafat-sultan"><first>Md Arafat</first><last>Sultan</last><affiliation>IBM Research AI</affiliation></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last><affiliation>IBM Research AI</affiliation></author>
       <author><first>Matei</first><last>Zaharia</last><affiliation>Stanford</affiliation></author>
       <author><first>Christopher</first><last>Potts</last><affiliation>Stanford University</affiliation></author>
       <pages>11613-11628</pages>
@@ -12555,7 +12555,7 @@
       <author><first>Chulun</first><last>Zhou</last><affiliation>Tencent</affiliation></author>
       <author><first>Yunlong</first><last>Liang</last><affiliation>Beijing Jiaotong University</affiliation></author>
       <author><first>Fandong</first><last>Meng</last><affiliation>WeChat AI, Tencent</affiliation></author>
-      <author><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
       <author><first>Jinsong</first><last>Su</last><affiliation>Xiamen university</affiliation></author>
       <author><first>Jie</first><last>Zhou</last><affiliation>Tencent Inc.</affiliation></author>
       <pages>11747-11762</pages>
@@ -12582,7 +12582,7 @@
       <author><first>Rongzhi</first><last>Zhang</last><affiliation>Georgia Institute of Technology</affiliation></author>
       <author><first>Yu</first><last>Meng</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
       <author><first>Jiaming</first><last>Shen</last><affiliation>Google Research</affiliation></author>
-      <author><first>Chao</first><last>Zhang</last><affiliation>Georgia Tech</affiliation></author>
+      <author id="chao-zhang-tu"><first>Chao</first><last>Zhang</last><affiliation>Georgia Tech</affiliation></author>
       <pages>11782-11805</pages>
       <abstract>With the development of large language models (LLMs), zero-shot learning has attracted much attention for various NLP tasks. Different from prior works that generate training data with billion-scale natural language generation (NLG) models, we propose a retrieval-enhanced framework to create training data from a general-domain unlabeled corpus. To realize this, we first conduct contrastive pretraining to learn an unsupervised dense retriever for extracting the most relevant documents using class-descriptive verbalizers. We then further pro- pose two simple strategies, namely Verbalizer Augmentation with Demonstrations and Self- consistency Guided Filtering to improve the topic coverage of the dataset while removing noisy examples. Experiments on nine datasets demonstrate that ReGen achieves 4.3% gain over the strongest baselines and saves around 70% of the time when compared with baselines using large NLG models. Besides, REGEN can be naturally integrated with recently proposed large language models to boost performance.</abstract>
       <url hash="2c4cd992">2023.findings-acl.748</url>
@@ -12594,7 +12594,7 @@
       <title>Race, Gender, and Age Biases in Biomedical Masked Language Models</title>
       <author><first>Michelle</first><last>Kim</last><affiliation>Michigan State University</affiliation></author>
       <author><first>Junghwan</first><last>Kim</last><affiliation>University of Michigan</affiliation></author>
-      <author><first>Kristen</first><last>Johnson</last><affiliation>Michigan State University</affiliation></author>
+      <author id="kristen-johnson"><first>Kristen</first><last>Johnson</last><affiliation>Michigan State University</affiliation></author>
       <pages>11806-11815</pages>
       <abstract>Biases cause discrepancies in healthcare services. Race, gender, and age of a patient affect interactions with physicians and the medical treatments one receives. These biases in clinical practices can be amplified following the release of pre-trained language models trained on biomedical corpora. To bring awareness to such repercussions, we examine social biases present in the biomedical masked language models. We curate prompts based on evidence-based practice and compare generated diagnoses based on biases. For a case study, we measure bias in diagnosing coronary artery disease and using cardiovascular procedures based on bias. Our study demonstrates that biomedical models are less biased than BERT in gender, while the opposite is true for race and age.</abstract>
       <url hash="29a746da">2023.findings-acl.749</url>
@@ -12642,7 +12642,7 @@
       <author><first>Zhenhui</first><last>Ye</last><affiliation>Zhejiang University</affiliation></author>
       <author><first>Qian</first><last>Chen</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Siqi</first><last>Zheng</last><affiliation>Alibaba</affiliation></author>
-      <author><first>Wen</first><last>Wang</last><affiliation>Alibaba Group</affiliation></author>
+      <author id="wen-wang"><first>Wen</first><last>Wang</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Zhang</first><last>Qinglin</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Zhou</first><last>Zhao</last><affiliation>zhejiang university</affiliation></author>
       <pages>11905-11912</pages>
@@ -12668,7 +12668,7 @@
       <title><fixed-case>SC</fixed-case>on<fixed-case>E</fixed-case>: Simplified Cone Embeddings with Symbolic Operators for Complex Logical Queries</title>
       <author><first>Chau</first><last>Nguyen</last><affiliation>The University of Western Australia</affiliation></author>
       <author><first>Tim</first><last>French</last><affiliation>The University of Western Australia</affiliation></author>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last><affiliation>The University of Western Austarlia</affiliation></author>
+      <author><first>Wei</first><last>Liu</last><affiliation>The University of Western Austarlia</affiliation></author>
       <author><first>Michael</first><last>Stewart</last><affiliation>The University of Western Australia</affiliation></author>
       <pages>11931-11946</pages>
       <abstract>Geometric representation of query embeddings (using points, particles, rectangles and cones) can effectively achieve the task of answering complex logical queries expressed in first-order logic (FOL) form over knowledge graphs, allowing intuitive encodings. However, current geometric-based methods depend on the neural approach to model FOL operators (conjunction, disjunction and negation), which are not easily explainable with considerable computation cost. We overcome this challenge by introducing a symbolic modeling approach for the FOL operators, emphasizing the direct calculation of the intersection between geometric shapes, particularly sector-cones in the embedding space, to model the conjunction operator. This approach reduces the computation cost as a non-neural approach is involved in the core logic operators. Moreover, we propose to accelerate the learning in the relation projection operator using the neural approach to emphasize the essential role of this operator in all query structures. Although empirical evidence for explainability is challenging, our approach demonstrates a significant improvement in answering complex logical queries (both non-negative and negative FOL forms) over previous geometric-based models.</abstract>
@@ -12683,7 +12683,7 @@
       <author><first>Yufeng</first><last>Shen</last><affiliation>University of Chinese Academy of Sciences</affiliation></author>
       <author><first>Wei</first><last>Ji</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Juan</first><last>Cao</last><affiliation>Institute of Computing Technology, Chinese Academy of Sciences; University of Chinese Academy of Sciences</affiliation></author>
-      <author><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
       <pages>11947-11959</pages>
       <abstract>The prevalence of short video platforms has spawned a lot of fake news videos, which have stronger propagation ability than textual fake news. Thus, automatically detecting fake news videos has been an important countermeasure in practice. Previous works commonly verify each news video individually with multimodal information. Nevertheless, news videos from different perspectives regarding the same event are commonly posted together, which contain complementary or contradictory information and thus can be used to evaluate each other mutually. To this end, we introduce a new and practical paradigm, i.e., cross-sample fake news video detection, and propose a novel framework, Neighbor-Enhanced fakE news video Detection (NEED), which integrates the neighborhood relationship of new videos belonging to the same event. NEED can be readily combined with existing single-sample detectors and further enhance their performances with the proposed graph aggregation (GA) and debunking rectification (DR) modules. Specifically, given the feature representations obtained from single-sample detectors, GA aggregates the neighborhood information with the dynamic graph to enrich the features of independent samples. After that, DR explicitly leverages the relationship between debunking videos and fake news videos to refute the candidate videos via textual and visual consistency. Extensive experiments on the public benchmark demonstrate that NEED greatly improves the performance of both single-modal (up to 8.34% in accuracy) and multimodal (up to 4.97% in accuracy) base detectors.</abstract>
       <url hash="426b54df">2023.findings-acl.756</url>
@@ -12721,7 +12721,7 @@
       <author><first>Zhiheng</first><last>Xi</last><affiliation>Fudan University</affiliation></author>
       <author><first>Rui</first><last>Zheng</last><affiliation>Fudan University</affiliation></author>
       <author><first>Yuansen</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Zhongyu</first><last>Wei</last><affiliation>School of Data Science, Fudan University</affiliation></author>
       <author><first>Minlong</first><last>Peng</last><affiliation>Baidu Inc.</affiliation></author>
       <author><first>Mingming</first><last>Sun</last><affiliation>Baidu Research</affiliation></author>
@@ -12790,7 +12790,7 @@
       <title>Improving Radiology Summarization with Radiograph and Anatomy Prompts</title>
       <author><first>Jinpeng</first><last>Hu</last><affiliation>CUHKSZ</affiliation></author>
       <author><first>Zhihong</first><last>Chen</last><affiliation>Shenzhen Research Institute of Big Data, The Chinese University of Hong Kong, Shenzhen</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>The Chinese University of Hong Kong, Shenzhen</affiliation></author>
+      <author><first>Yang</first><last>Liu</last><affiliation>The Chinese University of Hong Kong, Shenzhen</affiliation></author>
       <author><first>Xiang</first><last>Wan</last><affiliation>Shenzhen Research Institute of Big Data</affiliation></author>
       <author><first>Tsung-Hui</first><last>Chang</last><affiliation>The Chinese University of Hong Kong, Shenzhen</affiliation></author>
       <pages>12066-12080</pages>
@@ -12845,8 +12845,8 @@
       <author><first>Yung-Sung</first><last>Chuang</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <author><first>Wei</first><last>Fang</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <author><first>Shang-Wen</first><last>Li</last><affiliation>Facebook AI Research (FAIR)</affiliation></author>
-      <author><first>Wen-tau</first><last>Yih</last><affiliation>Meta AI - FAIR</affiliation></author>
-      <author><first>James</first><last>Glass</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last><affiliation>Meta AI - FAIR</affiliation></author>
+      <author id="james-glass"><first>James</first><last>Glass</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <pages>12131-12147</pages>
       <abstract>We propose EAR, a query Expansion And Reranking approach for improving passage retrieval, with the application to open-domain question answering. EAR first applies a query expansion model to generate a diverse set of queries, and then uses a query reranker to select the ones that could lead to better retrieval results. Motivated by the observation that the best query expansion often is not picked by greedy decoding, EAR trains its reranker to predict the rank orders of the gold passages when issuing the expanded queries to a given retriever. By connecting better the query expansion model and retriever, EAR significantly enhances a traditional sparse retrieval method, BM25. Empirically, EAR improves top-5/20 accuracy by 3-8 and 5-10 points in in-domain and out-of-domain settings, respectively, when compared to a vanilla query expansion model, GAR, and a dense retrieval model, DPR.</abstract>
       <url hash="9ecac413">2023.findings-acl.768</url>
@@ -12865,7 +12865,7 @@
     </paper>
     <paper id="770">
       <title>Inducing Character-level Structure in Subword-based Language Models with Type-level Interchange Intervention Training</title>
-      <author id="jing-huang-stanford"><first>Jing</first><last>Huang</last><affiliation>Stanford University</affiliation></author>
+      <author><first>Jing</first><last>Huang</last><affiliation>Stanford University</affiliation></author>
       <author><first>Zhengxuan</first><last>Wu</last><affiliation>Stanford University</affiliation></author>
       <author><first>Kyle</first><last>Mahowald</last><affiliation>University of Texas at Austin</affiliation></author>
       <author><first>Christopher</first><last>Potts</last><affiliation>Stanford University</affiliation></author>
@@ -12920,7 +12920,7 @@
       <author><first>Debayan</first><last>Banerjee</last><affiliation>Language Technology Group, University of Hamburg</affiliation></author>
       <author><first>Pranav</first><last>Nair</last><affiliation>Indian Institute of Technology (BHU), Varanasi</affiliation></author>
       <author><first>Ricardo</first><last>Usbeck</last><affiliation>Hamburg University</affiliation></author>
-      <author><first>Chris</first><last>Biemann</last><affiliation>Universität Hamburg</affiliation></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last><affiliation>Universität Hamburg</affiliation></author>
       <pages>12219-12228</pages>
       <abstract>In this work, we analyse the role of output vocabulary for text-to-text (T2T) models on the task of SPARQL semantic parsing. We perform experiments within the the context of knowledge graph question answering (KGQA), where the task is to convert questions in natural language to the SPARQL query language. We observe that the query vocabulary is distinct from human vocabulary. Language Models (LMs) are pre-dominantly trained for human language tasks, and hence, if the query vocabulary is replaced with a vocabulary more attuned to the LM tokenizer, the performance of models may improve. We carry out carefully selected vocabulary substitutions on the queries and find absolute gains in the range of 17% on the GrailQA dataset.</abstract>
       <url hash="afc1f9ec">2023.findings-acl.774</url>
@@ -13012,7 +13012,7 @@
     <paper id="781">
       <title><fixed-case>H</fixed-case>ua<fixed-case>SLIM</fixed-case>: Human Attention Motivated Shortcut Learning Identification and Mitigation for Large Language models</title>
       <author><first>Yuqi</first><last>Ren</last><affiliation>Tianjin University</affiliation></author>
-      <author><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
       <pages>12350-12365</pages>
       <abstract>Large language models have made remarkable progress on a variety of NLP tasks. However, it has been found that they tend to rely on shortcut features that spuriously correlate with labels for prediction, which weakens their generalization on out-of-distribution samples. In this paper, we propose a human attention guided approach to identifying and mitigating shortcut learning, which encourages the LLM-based target model to learn relevant features. We define an attention-based measurement to capture both model and data bias and identify shortcut tokens by exploring both human and neural attention. In a self-distillation framework, we mitigate shortcut learning by dynamically adjusting the distillation temperature according to the detected shortcut tokens and estimated shortcut degree. Additionally, we utilize human attention as a supervisory signal to constrain large language models to pay more attention to relevant tokens. Experimental results on multiple NLP tasks show that our proposed method can effectively identify shortcut tokens, and significantly improve the robustness of large language models on OOD samples, while not undermining the performance on IID data.</abstract>
       <url hash="3ab0bfaf">2023.findings-acl.781</url>
@@ -13046,7 +13046,7 @@
       <author><first>Yufan</first><last>Wang</last><affiliation>Central China Normal University,National Engineering Research Center for E-Learning</affiliation></author>
       <author><first>Bowei</first><last>Zou</last><affiliation>Institute for Infocomm Research</affiliation></author>
       <author><first>Rui</first><last>Fan</last><affiliation>Faculty of Artificial Intelligence in Education, Central China Normal University</affiliation></author>
-      <author><first>Ai Ti</first><last>Aw</last><affiliation>Institute for Infocomm Research</affiliation></author>
+      <author id="aiti-aw"><first>Ai Ti</first><last>Aw</last><affiliation>Institute for Infocomm Research</affiliation></author>
       <author><first>Tingting</first><last>He</last><affiliation>School of Computer, Central China Normal University</affiliation></author>
       <pages>12389-12402</pages>
       <abstract>In few-shot settings, fully conveying the semantic information of the dialogue act is a crucial challenge for Natural Language Generation (NLG) in the task-oriented dialogue system. An interesting fact is that NLG and Spoken Language Understanding (SLU) are a natural dual problem pair. Suppose the response generated by the NLG module can be restored to the corresponding dialogue act by the SLU module, which reflects that the generated response fully conveys the semantic information of the dialogue act. Based on this idea, a novel Dual Supervised Pre-trained Model for a few-shot Natural Language Generation (DSPM-NLG) is proposed to regularize the pre-training process. We adopt a joint model with a dual supervised framework to learn the dual correlation between NLG and SLU from the perspective of probability. In addition, a slot-masked strategy is designed to enable the model to focus better on the key slot-value pairs. DSPM-NLG is continuously trained on existing public large-scale annotated data, which thoroughly learns the duality between two tasks to enhance the semantically controlling and generalization abilities of the pre-trained model. Experiments demonstrate that our proposed model performs outstandingly on the few-shot benchmark dataset and outperforms the previous SOTA results.</abstract>
@@ -13096,7 +13096,7 @@
       <author><first>Bingyang</first><last>Ye</last><affiliation>Brandeis University</affiliation></author>
       <author><first>Marc</first><last>Verhagen</last><affiliation>Brandeis University</affiliation></author>
       <author><first>Eben</first><last>Holderness</last><affiliation>Brandeis University</affiliation></author>
-      <author><first>James</first><last>Pustejovsky</last><affiliation>Brandeis University</affiliation></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last><affiliation>Brandeis University</affiliation></author>
       <pages>12448-12460</pages>
       <abstract>We demonstrate that coreference resolution in procedural texts is significantly improved when performing transformation-based entity linking prior to coreference relation identification. When events in the text introduce changes to the state of participating entities, it is often impossible to accurately link entities in anaphoric and coreference relations without an understanding of the transformations those entities undergo. We show how adding event semantics helps to better model entity coreference. We argue that all transformation predicates, not just creation verbs, introduce a new entity into the discourse, as a kind of generalized Result Role, which is typically not textually mentioned. This allows us to model procedural texts as process graphs and to compute the coreference type for any two entities in the recipe. We present our annotation methodology and the corpus generated as well as describe experiments on coreference resolution of entity mentions under a process-oriented model of events.</abstract>
       <url hash="d5d0a684">2023.findings-acl.788</url>
@@ -13132,7 +13132,7 @@
     <paper id="791">
       <title>Lexical Translation Inconsistency-Aware Document-Level Translation Repair</title>
       <author><first>Zhen</first><last>Zhang</last><affiliation>Soochow University</affiliation></author>
-      <author><first>Junhui</first><last>Li</last><affiliation>Soochow University, Suzhou</affiliation></author>
+      <author id="junhui-li"><first>Junhui</first><last>Li</last><affiliation>Soochow University, Suzhou</affiliation></author>
       <author><first>Shimin</first><last>Tao</last><affiliation>huawei</affiliation></author>
       <author><first>Hao</first><last>Yang</last><affiliation>Huawei Co. Ltd</affiliation></author>
       <pages>12492-12505</pages>
@@ -13173,7 +13173,7 @@
     <paper id="794">
       <title>Less Likely Brainstorming: Using Language Models to Generate Alternative Hypotheses</title>
       <author><first>Liyan</first><last>Tang</last><affiliation>University of Texas at Austin</affiliation></author>
-      <author><first>Yifan</first><last>Peng</last><affiliation>Weill Cornell Medicine</affiliation></author>
+      <author id="yifan-peng-cmu"><first>Yifan</first><last>Peng</last><affiliation>Weill Cornell Medicine</affiliation></author>
       <author><first>Yanshan</first><last>Wang</last><affiliation>University of Pittsburgh</affiliation></author>
       <author><first>Ying</first><last>Ding</last><affiliation>University of Texas at Austin</affiliation></author>
       <author><first>Greg</first><last>Durrett</last><affiliation>UT Austin</affiliation></author>
@@ -13186,7 +13186,7 @@
     </paper>
     <paper id="795">
       <title>Language Modeling with Latent Situations</title>
-      <author><first>Belinda Z.</first><last>Li</last><affiliation>MIT</affiliation></author>
+      <author id="belinda-z-li"><first>Belinda Z.</first><last>Li</last><affiliation>MIT</affiliation></author>
       <author><first>Maxwell</first><last>Nye</last><affiliation>MIT</affiliation></author>
       <author><first>Jacob</first><last>Andreas</last><affiliation>MIT</affiliation></author>
       <pages>12556-12571</pages>
@@ -13199,7 +13199,7 @@
     <paper id="796">
       <title>Can Cross-Lingual Transferability of Multilingual Transformers Be Activated Without End-Task Data?</title>
       <author><first>Zewen</first><last>Chi</last><affiliation>Beijing Institute of Technology</affiliation></author>
-      <author><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <author><first>Xian-Ling</first><last>Mao</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <pages>12572-12584</pages>
       <abstract>Pretrained multilingual Transformers have achieved great success in cross-lingual transfer learning. Current methods typically activate the cross-lingual transferability of multilingual Transformers by fine-tuning them on end-task data. However, the methods cannot perform cross-lingual transfer when end-task data are unavailable. In this work, we explore whether the cross-lingual transferability can be activated without end-task data. We propose a cross-lingual transfer method, named PlugIn-X. PlugIn-X disassembles monolingual and multilingual Transformers into sub-modules, and reassembles them to be the multilingual end-task model. After representation adaptation, PlugIn-X finally performs cross-lingual transfer in a plug-and-play style. Experimental results show that PlugIn-X successfully activates the cross-lingual transferability of multilingual Transformers without accessing end-task data. Moreover, we analyze how the cross-model representation alignment affects the cross-lingual transferability.</abstract>
@@ -13370,7 +13370,7 @@
     <paper id="809">
       <title>Reproducibility in <fixed-case>NLP</fixed-case>: What Have We Learned from the Checklist?</title>
       <author><first>Ian</first><last>Magnusson</last><affiliation>Allen Institute for AI</affiliation></author>
-      <author><first>Noah A.</first><last>Smith</last><affiliation>University of Washington</affiliation></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last><affiliation>University of Washington</affiliation></author>
       <author><first>Jesse</first><last>Dodge</last><affiliation>Allen Institute for AI</affiliation></author>
       <pages>12789-12811</pages>
       <abstract>Scientific progress in NLP rests on the reproducibility of researchers’ claims. The *CL conferences created the NLP Reproducibility Checklist in 2020 to be completed by authors at submission to remind them of key information to include. We provide the first analysis of the Checklist by examining 10,405 anonymous responses to it. First, we find evidence of an increase in reporting of information on efficiency, validation performance, summary statistics, and hyperparameters after the Checklist’s introduction. Further, we show acceptance rate grows for submissions with more Yes responses. We find that the 44% of submissions that gather new data are 5% less likely to be accepted than those that did not; the average reviewer-rated reproducibility of these submissions is also 2% lower relative to the rest. We find that only 46% of submissions claim to open-source their code, though submissions that do have 8% higher reproducibility score relative to those that do not, the most for any item. We discuss what can be inferred about the state of reproducibility in NLP, and provide a set of recommendations for future conferences, including: a) allowing submitting code and appendices one week after the deadline, and b) measuring dataset reproducibility by a checklist of data collection practices.</abstract>
@@ -13383,7 +13383,7 @@
       <title>Domain Generalization via Switch Knowledge Distillation for Robust Review Representation</title>
       <author><first>You</first><last>Zhang</last><affiliation>Yunnan University</affiliation></author>
       <author><first>Jin</first><last>Wang</last><affiliation>Yunnan University</affiliation></author>
-      <author><first>Liang-Chih</first><last>Yu</last><affiliation>Yuan Ze University</affiliation></author>
+      <author id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last><affiliation>Yuan Ze University</affiliation></author>
       <author><first>Dan</first><last>Xu</last><affiliation>Yunnan University</affiliation></author>
       <author><first>Xuejie</first><last>Zhang</last><affiliation>Yunnan University</affiliation></author>
       <pages>12812-12826</pages>
@@ -13396,7 +13396,7 @@
     <paper id="811">
       <title>On Search Strategies for Document-Level Neural Machine Translation</title>
       <author><first>Christian</first><last>Herold</last><affiliation>RWTH Aachen University</affiliation></author>
-      <author><first>Hermann</first><last>Ney</last><affiliation>RWTH Aachen University</affiliation></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last><affiliation>RWTH Aachen University</affiliation></author>
       <pages>12827-12836</pages>
       <abstract>Compared to sentence-level systems, document-level neural machine translation (NMT) models produce a more consistent output across a document and are able to better resolve ambiguities within the input. There are many works on document-level NMT, mostly focusing on modifying the model architecture or training strategy to better accommodate the additional context-input. On the other hand, in most works, the question on how to perform search with the trained model is scarcely discussed, sometimes not mentioned at all. In this work, we aim to answer the question how to best utilize a context-aware translation model in decoding. We start with the most popular document-level NMT approach and compare different decoding schemes, some from the literature and others proposed by us. In the comparison, we are using both, standard automatic metrics, as well as specific linguistic phenomena on three standard document-level translation benchmarks. We find that most commonly used decoding strategies perform similar to each other and that higher quality context information has the potential to further improve the translation.</abstract>
       <url hash="d104aefa">2023.findings-acl.811</url>
@@ -13557,7 +13557,7 @@
       <author><first>Yi</first><last>Tay</last><affiliation>Google Research</affiliation></author>
       <author><first>Hyung Won</first><last>Chung</last><affiliation>Google Research</affiliation></author>
       <author><first>Aakanksha</first><last>Chowdhery</last><affiliation>Google Research</affiliation></author>
-      <author><first>Quoc</first><last>Le</last><affiliation>Google Inc</affiliation></author>
+      <author id="quoc-le"><first>Quoc</first><last>Le</last><affiliation>Google Inc</affiliation></author>
       <author><first>Ed</first><last>Chi</last><affiliation>Google Research</affiliation></author>
       <author><first>Denny</first><last>Zhou</last><affiliation>Google</affiliation></author>
       <author><first>Jason</first><last>Wei</last><affiliation>OpenAI</affiliation></author>
@@ -13634,7 +13634,7 @@
       <title>A Memory Model for Question Answering from Streaming Data Supported by Rehearsal and Anticipation of Coreference Information</title>
       <author><first>Vladimir</first><last>Araujo</last><affiliation>KU Leuven</affiliation></author>
       <author><first>Alvaro</first><last>Soto</last><affiliation>PUC</affiliation></author>
-      <author><first>Marie-Francine</first><last>Moens</last><affiliation>KU Leuven</affiliation></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last><affiliation>KU Leuven</affiliation></author>
       <pages>13124-13138</pages>
       <abstract>Existing question answering methods often assume that the input content (e.g., documents or videos) is always accessible to solve the task. Alternatively, memory networks were introduced to mimic the human process of incremental comprehension and compression of the information in a fixed-capacity memory. However, these models only learn how to maintain memory by backpropagating errors in the answers through the entire network. Instead, it has been suggested that humans have effective mechanisms to boost their memorization capacities, such as rehearsal and anticipation. Drawing inspiration from these, we propose a memory model that performs rehearsal and anticipation while processing inputs to memorize important information for solving question answering tasks from streaming data. The proposed mechanisms are applied self-supervised during training through masked modeling tasks focused on coreference information. We validate our model on a short-sequence (bAbI) dataset as well as large-sequence textual (NarrativeQA) and video (ActivityNet-QA) question answering datasets, where it achieves substantial improvements over previous memory network approaches. Furthermore, our ablation study confirms the proposed mechanisms’ importance for memory models.</abstract>
       <url hash="ffad9faf">2023.findings-acl.830</url>
@@ -13748,7 +13748,7 @@
       <title>Causal interventions expose implicit situation models for commonsense language understanding</title>
       <author><first>Takateru</first><last>Yamakoshi</last><affiliation>University of Tokyo</affiliation></author>
       <author><first>James</first><last>McClelland</last><affiliation>Stanford University</affiliation></author>
-      <author><first>Adele</first><last>Goldberg</last><affiliation>Princeton University</affiliation></author>
+      <author id="adele-goldberg"><first>Adele</first><last>Goldberg</last><affiliation>Princeton University</affiliation></author>
       <author><first>Robert</first><last>Hawkins</last><affiliation>Princeton University</affiliation></author>
       <pages>13265-13293</pages>
       <abstract>Accounts of human language processing have long appealed to implicit “situation models” that enrich comprehension with relevant but unstated world knowledge. Here, we apply causal intervention techniques to recent transformer models to analyze performance on the Winograd Schema Challenge (WSC), where a single context cue shifts interpretation of an ambiguous pronoun. We identify a relatively small circuit of attention heads that are responsible for propagating information from the context word that guides which of the candidate noun phrases the pronoun ultimately attends to. We then compare how this circuit behaves in a closely matched “syntactic” control where the situation model is not strictly necessary. These analyses suggest a distinct pathway through which implicit situation models may be constructed to guide pronoun resolution</abstract>
@@ -13763,8 +13763,8 @@
       <author><first>Shuangzhi</first><last>Wu</last><affiliation>Bytedance</affiliation></author>
       <author><first>Xinnian</first><last>Liang</last><affiliation>Beihang University</affiliation></author>
       <author><first>Zefan</first><last>Zhou</last><affiliation>Northeastern University</affiliation></author>
-      <author><first>Muyun</first><last>Yang</last><affiliation>Harbin Institute of Technology</affiliation></author>
-      <author><first>Tiejun</first><last>Zhao</last><affiliation>Harbin Institute of Technology</affiliation></author>
+      <author id="muyun-yang"><first>Muyun</first><last>Yang</last><affiliation>Harbin Institute of Technology</affiliation></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <pages>13294-13301</pages>
       <abstract>Unsupervised domain adaptation of machine translation, which adapts a pre-trained translation model to a specific domain without in-domain parallel data, has drawn extensive attention in recent years. However, most existing methods focus on the fine-tuning based techniques, which is non-extensible. In this paper, we propose a new method to perform unsupervised domain adaptation in a non-parametric manner. Our method only resorts to in-domain monolingual data, and we jointly perform nearest neighbour inference on both forward and backward translation directions. The forward translation model creates nearest neighbour datastore for the backward direction, and vice versa, strengthening each other in an iterative style. Experiments on multi-domain datasets demonstrate that our method significantly improves the in-domain translation performance and achieves state-of-the-art results among non-parametric methods.</abstract>
       <url hash="43f9dbe9">2023.findings-acl.840</url>
@@ -13788,7 +13788,7 @@
       <title>With Prejudice to None: A Few-Shot, Multilingual Transfer Learning Approach to Detect Social Bias in Low Resource Languages</title>
       <author><first>Nihar</first><last>Sahoo</last><affiliation>Indian Institute of Technology, Bombay</affiliation></author>
       <author><first>Niteesh</first><last>Mallela</last><affiliation>Indian Institute of Technology Bombay</affiliation></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology Bombay and Patna</affiliation></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology Bombay and Patna</affiliation></author>
       <pages>13316-13330</pages>
       <abstract>In this paper, we describe our work on social bias detection in a low-resource multilingual setting in which the languages are from two very divergent families- Indo-European (English, Hindi, and Italian) and Altaic (Korean). Currently, the majority of the social bias datasets available are in English and this inhibits progress on social bias detection in low-resource languages. To address this problem, we introduce a new dataset for social bias detection in Hindi and investigate multilingual transfer learning using publicly available English, Italian, and Korean datasets. The Hindi dataset contains 9k social media posts annotated for (i) binary bias labels (bias/neutral), (ii) binary labels for sentiment (positive/negative), (iii) target groups for each bias category, and (iv) rationale for annotated bias labels (a short piece of text). We benchmark our Hindi dataset using different multilingual models, with XLM-R achieving the best performance of 80.8 macro-F1 score. Our results show that the detection of social biases in resource-constrained languages such as Hindi and Korean may be improved with the use of a similar dataset in English. We also show that translating all datasets into English does not work effectively for detecting social bias, since the nuances of source language are lost in translation. All the scripts and datasets utilized in this study will be publicly available.</abstract>
       <url hash="247dd861">2023.findings-acl.842</url>
@@ -13842,7 +13842,7 @@
       <author><first>Rahul</first><last>Lokesh</last><affiliation>Samsung Research America</affiliation></author>
       <author><first>Alireza</first><last>Zareian</last><affiliation>Columbia University</affiliation></author>
       <author><first>Bo</first><last>Wu</last><affiliation>MIT-IBM Watson AI Lab</affiliation></author>
-      <author><first>Shih-Fu</first><last>Chang</last><affiliation>Columbia University</affiliation></author>
+      <author id="shih-fu-chang"><first>Shih-Fu</first><last>Chang</last><affiliation>Columbia University</affiliation></author>
       <pages>13378-13386</pages>
       <abstract>Image-caption pretraining has been quite successfully used for downstream vision tasks like zero-shot image classification and object detection. However, image-caption pretraining is still a hard problem – it requires multiple concepts (nouns) from captions to be aligned to several objects in images. To tackle this problem, we go to the roots – the best learner, children. We take inspiration from cognitive science studies dealing with children’s language learning to propose a curriculum learning framework. The learning begins with easy-to-align image caption pairs containing one concept per caption. The difficulty is progressively increased with each new phase by adding one more concept per caption. Correspondingly, the knowledge acquired in each learning phase is utilized in subsequent phases to effectively constrain the learning problem to aligning one new concept-object pair in each phase. We show that this learning strategy improves over vanilla image-caption training in various settings – pretraining from scratch, using a pretrained image or/and pretrained text encoder, low data regime etc.</abstract>
       <url hash="ce75e4ac">2023.findings-acl.846</url>
@@ -13907,7 +13907,7 @@
       <author><first>Yuntao</first><last>Bai</last><affiliation>Anthropic, PBC</affiliation></author>
       <author><first>Zac</first><last>Hatfield-Dodds</last><affiliation>Anthropic</affiliation></author>
       <author><first>Jack</first><last>Clark</last><affiliation>Anthropic</affiliation></author>
-      <author><first>Samuel R.</first><last>Bowman</last><affiliation>New York University</affiliation></author>
+      <author id="samuel-bowman"><first>Samuel R.</first><last>Bowman</last><affiliation>New York University</affiliation></author>
       <author><first>Amanda</first><last>Askell</last><affiliation>Anthropic</affiliation></author>
       <author><first>Roger</first><last>Grosse</last><affiliation>University of Toronto</affiliation></author>
       <author><first>Danny</first><last>Hernandez</last><affiliation>Anthropic</affiliation></author>
@@ -13944,8 +13944,8 @@
       <author><first>Jingye</first><last>Li</last><affiliation>ByteDance Inc.</affiliation></author>
       <author><first>Yijiang</first><last>Liu</last><affiliation>Wuhan University</affiliation></author>
       <author><first>Lizi</first><last>Liao</last><affiliation>Singapore Management University</affiliation></author>
-      <author><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
-      <author><first>Donghong</first><last>Ji</last><affiliation>Wuhan University</affiliation></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last><affiliation>Wuhan University</affiliation></author>
       <pages>13449-13467</pages>
       <abstract>The rapid development of aspect-based sentiment analysis (ABSA) within recent decades shows great potential for real-world society. The current ABSA works, however, are mostly limited to the scenario of a single text piece, leaving the study in dialogue contexts unexplored. To bridge the gap between fine-grained sentiment analysis and conversational opinion mining, in this work, we introduce a novel task of conversational aspect-based sentiment quadruple analysis, namely DiaASQ, aiming to detect the quadruple of target-aspect-opinion-sentiment in a dialogue. We manually construct a large-scale high-quality DiaASQ dataset in both Chinese and English languages. We deliberately develop a neural model to benchmark the task, which advances in effectively performing end-to-end quadruple prediction, and manages to incorporate rich dialogue-specific and discourse feature representations for better cross-utterance quadruple extraction. We hope the new benchmark will spur more advancements in the sentiment analysis community.</abstract>
       <url hash="579e6015">2023.findings-acl.849</url>
@@ -14005,7 +14005,7 @@
       <author><first>Bowei</first><last>Zou</last><affiliation>Institute for Infocomm Research</affiliation></author>
       <author><first>Rui</first><last>Fan</last><affiliation>Faculty of Artificial Intelligence in Education, Central China Normal University</affiliation></author>
       <author><first>Tingting</first><last>He</last><affiliation>School of Computer, Central China Normal University</affiliation></author>
-      <author><first>Ai Ti</first><last>Aw</last><affiliation>Institute for Infocomm Research</affiliation></author>
+      <author id="aiti-aw"><first>Ai Ti</first><last>Aw</last><affiliation>Institute for Infocomm Research</affiliation></author>
       <pages>13508-13523</pages>
       <abstract>Most previous few-shot Spoken Language Understanding (SLU) models typically need to be trained on a set of data-rich source domains and adapt to the target domain with a few examples. In this paper, we explore a more practical scenario for few-shot SLU, in which we only assume access to a pre-trained language model and a few labeled examples without any other source domain data. We concentrate on understanding how far the few-shot SLU could be pushed in this setting. To this end, we develop a prompt-based intent detection model in few-shot settings, which leverages the BERT original pre-training next sentence prediction task and the prompt template to detect the user’s intent. For slot filling, we propose an approach of reconstructing slot labels, which reduces the training complexity by reducing the number of slot labels in few-shot settings. To evaluate the few-shot SLU for a more practical scenario, we present two benchmarks, FewShotATIS and FewShotSNIPS. And a dynamic sampling strategy is designed to construct the two datasets according to the learning difficulty of each intent and slot. Experiments on FewShotATIS and FewShotSNIPS demonstrate that our proposed model achieves state-of-the-art performance.</abstract>
       <url hash="317c3c01">2023.findings-acl.853</url>
@@ -14053,7 +14053,7 @@
       <author><first>SongYang</first><last>Gao</last><affiliation>Fudan University</affiliation></author>
       <author><first>Shihan</first><last>Dou</last><affiliation>Fudan University</affiliation></author>
       <author><first>Qi</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Jin</first><last>Ma</last><affiliation>ustc</affiliation></author>
       <author><first>Ying</first><last>Shan</last><affiliation>Tencent</affiliation></author>
       <pages>13573-13581</pages>
@@ -14117,7 +14117,7 @@
     <paper id="862">
       <title><fixed-case>A</fixed-case>tten<fixed-case>W</fixed-case>alker: Unsupervised Long-Document Question Answering via Attention-based Graph Walking</title>
       <author><first>Yuxiang</first><last>Nie</last><affiliation>Beijing Institute of Technology</affiliation></author>
-      <author><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <author><first>Wei</first><last>Wei</last><affiliation>Huazhong University of Science and Technology</affiliation></author>
       <author><first>Xian-Ling</first><last>Mao</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <pages>13650-13663</pages>
@@ -14157,7 +14157,7 @@
     </paper>
     <paper id="865">
       <title><fixed-case>RISE</fixed-case>: Leveraging Retrieval Techniques for Summarization Evaluation</title>
-      <author><first>David</first><last>Uthus</last><affiliation>Google Research</affiliation></author>
+      <author id="david-c-uthus"><first>David</first><last>Uthus</last><affiliation>Google Research</affiliation></author>
       <author><first>Jianmo</first><last>Ni</last><affiliation>Google</affiliation></author>
       <pages>13697-13709</pages>
       <abstract>Evaluating automatically-generated text summaries is a challenging task. While there have been many interesting approaches, they still fall short of human evaluations. We present RISE, a new approach for evaluating summaries by leveraging techniques from information retrieval. RISE is first trained as a retrieval task using a dual-encoder retrieval setup, and can then be subsequently utilized for evaluating a generated summary given an input document, without gold reference summaries. RISE is especially well suited when working on new datasets where one may not have reference summaries available for evaluation. We conduct comprehensive experiments on the SummEval benchmark (Fabbri et al., 2021) and a long document summarization benchmark. The results show that RISE consistently achieves higher correlation with human evaluations compared to many past approaches to summarization evaluation. Furthermore, RISE also demonstrates data-efficiency and generalizability across languages.</abstract>
@@ -14182,7 +14182,7 @@
     <paper id="867">
       <title>Model Interpretability and Rationale Extraction by Input Mask Optimization</title>
       <author><first>Marc</first><last>Brinner</last><affiliation>Bielefeld University</affiliation></author>
-      <author><first>Sina</first><last>Zarrieß</last><affiliation>University of Bielefeld</affiliation></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last><affiliation>University of Bielefeld</affiliation></author>
       <pages>13722-13744</pages>
       <abstract>Concurrent with the rapid progress in neural network-based models in NLP, the need for creating explanations for the predictions of these black-box models has risen steadily. Yet, especially for complex inputs like texts or images, existing interpretability methods still struggle with deriving easily interpretable explanations that also accurately represent the basis for the model’s decision. To this end, we propose a new, model-agnostic method to generate extractive explanations for predictions made by neural networks, that is based on masking parts of the input which the model does not consider to be indicative of the respective class. The masking is done using gradient-based optimization combined with a new regularization scheme that enforces sufficiency, comprehensiveness, and compactness of the generated explanation. Our method achieves state-of-the-art results in a challenging paragraph-level rationale extraction task, showing that this task can be performed without training a specialized model. We further apply our method to image inputs and obtain high-quality explanations for image classifications, which indicates that the objectives for optimizing explanation masks in text generalize to inputs of other modalities.</abstract>
       <url hash="f5e08bde">2023.findings-acl.867</url>
@@ -14195,7 +14195,7 @@
       <author><first>Samuel</first><last>Cahyawijaya</last><affiliation>HKUST</affiliation></author>
       <author><first>Holy</first><last>Lovenia</last><affiliation>The Hong Kong University of Science and Technology</affiliation></author>
       <author><first>Alham Fikri</first><last>Aji</last><affiliation>MBZUAI</affiliation></author>
-      <author><first>Genta</first><last>Winata</last><affiliation>Bloomberg</affiliation></author>
+      <author id="genta-indra-winata"><first>Genta</first><last>Winata</last><affiliation>Bloomberg</affiliation></author>
       <author><first>Bryan</first><last>Wilie</last><affiliation>The Hong Kong University of Science and Technology</affiliation></author>
       <author><first>Fajri</first><last>Koto</last><affiliation>MBZUAI</affiliation></author>
       <author><first>Rahmad</first><last>Mahendra</last><affiliation>Universitas Indonesia</affiliation></author>
@@ -14213,7 +14213,7 @@
       <author><first>Samsul</first><last>Rahmadani</last><affiliation>Bahasa.ai</affiliation></author>
       <author><first>Yulianti</first><last>Oenang</last><affiliation>ITB</affiliation></author>
       <author><first>Ali</first><last>Septiandri</last><affiliation>Nokia Bell Labs</affiliation></author>
-      <author><first>James</first><last>Jaya</last><affiliation/></author>
+      <author><first>James</first><last>Jaya</last></author>
       <author><first>Kaustubh</first><last>Dhole</last><affiliation>Emory University</affiliation></author>
       <author><first>Arie</first><last>Suryani</last><affiliation>STEI ITB</affiliation></author>
       <author><first>Rifki Afina</first><last>Putri</last><affiliation>KAIST</affiliation></author>
@@ -14234,7 +14234,7 @@
       <author><first>Tirana</first><last>Fatyanosa</last><affiliation>Kumamoto University</affiliation></author>
       <author><first>Ziwei</first><last>Ji</last><affiliation>The Hong Kong University of Science and Technology</affiliation></author>
       <author><first>Graham</first><last>Neubig</last><affiliation>Carnegie Mellon University</affiliation></author>
-      <author><first>Timothy</first><last>Baldwin</last><affiliation>MBZUAI</affiliation></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last><affiliation>MBZUAI</affiliation></author>
       <author><first>Sebastian</first><last>Ruder</last><affiliation>Google</affiliation></author>
       <author><first>Pascale</first><last>Fung</last><affiliation>Hong Kong University of Science and Technology</affiliation></author>
       <author><first>Herry</first><last>Sujaini</last><affiliation>Tanjungpura University</affiliation></author>
@@ -14253,7 +14253,7 @@
       <author><first>Jieyi</first><last>Huang</last><affiliation>Shanghai Jiao Tong University</affiliation></author>
       <author><first>Chunhao</first><last>Zhang</last><affiliation>Shanghai Jiao Tong University</affiliation></author>
       <author><first>Mengyue</first><last>Wu</last><affiliation>Shanghai Jiao Tong University</affiliation></author>
-      <author><first>Kenny</first><last>Zhu</last><affiliation>Shanghai Jiao Tong University</affiliation></author>
+      <author id="kenny-zhu"><first>Kenny</first><last>Zhu</last><affiliation>Shanghai Jiao Tong University</affiliation></author>
       <pages>13819-13832</pages>
       <abstract>How animals communicate and whether they have languages is a persistent curiosity of human beings. However, the study of animal communications has been largely restricted to data from field recordings or in a controlled environment, which is expensive and limited in scale and variety. In this paper, we take domestic Shiba Inu dogs as an example, and extract their vocal communications from large amount of YouTube videos of Shiba Inu dogs. We classify these clips into different scenarios and locations, and further transcribe the audio into phonetically symbolic scripts through a systematic process. We discover consistent phonetic symbols among their expressions, which indicates that Shiba Inu dogs can have systematic verbal communication patterns. This reusable framework produces the first-of-its-kind Shiba Inu vocal communication dataset that will be valuable to future research in both zoology and linguistics.</abstract>
       <url hash="5682b24a">2023.findings-acl.869</url>
@@ -14277,7 +14277,7 @@
       <title>Improving Long Dialogue Summarization with Semantic Graph Representation</title>
       <author><first>Yilun</first><last>Hua</last><affiliation>Columbia University</affiliation></author>
       <author><first>Zhaoyuan</first><last>Deng</last><affiliation>Columbia University</affiliation></author>
-      <author><first>Kathleen</first><last>McKeown</last><affiliation>Columbia University and Amazon (Amazon Scholar)</affiliation></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last><affiliation>Columbia University and Amazon (Amazon Scholar)</affiliation></author>
       <pages>13851-13883</pages>
       <abstract>Although Large Language Models (LLMs) are successful in abstractive summarization of short dialogues, summarization of long dialogues remains challenging. To address this challenge, we propose a novel algorithm that processes complete dialogues comprising thousands of tokens into topic-segment-level Abstract Meaning Representation (AMR) graphs, which explicitly capture the dialogue structure, highlight salient semantics, and preserve high-level information. We also develop a new text-graph attention to leverage both graph semantics and a pretrained LLM that exploits the text. Finally, we propose an AMR node selection loss used jointly with conventional cross-entropy loss, to create additional training signals that facilitate graph feature encoding and content selection. Experiments show that our system outperforms the state-of-the-art models on multiple long dialogue summarization datasets, especially in low-resource settings, and generalizes well to out-of-domain data.</abstract>
       <url hash="6025b100">2023.findings-acl.871</url>
@@ -14463,7 +14463,7 @@
       <title>Run Like a Girl! Sport-Related Gender Bias in Language and Vision</title>
       <author><first>Sophia</first><last>Harrison</last><affiliation>Universitat Pompeu Fabra</affiliation></author>
       <author><first>Eleonora</first><last>Gualdoni</last><affiliation>Universitat Pompeu Fabra</affiliation></author>
-      <author><first>Gemma</first><last>Boleda</last><affiliation>Universitat Pompeu Fabra / ICREA</affiliation></author>
+      <author id="gemma-boleda"><first>Gemma</first><last>Boleda</last><affiliation>Universitat Pompeu Fabra / ICREA</affiliation></author>
       <pages>14093-14103</pages>
       <abstract>Gender bias in Language and Vision datasets and models has the potential to perpetuate harmful stereotypes and discrimination. We analyze gender bias in two Language and Vision datasets. Consistent with prior work, we find that both datasets underrepresent women, which promotes their invisibilization. Moreover, we hypothesize and find that a bias affects human naming choices for people playing sports: speakers produce names indicating the sport (e.g. “tennis player” or “surfer”) more often when it is a man or a boy participating in the sport than when it is a woman or a girl, with an average of 46% vs. 35% of sports-related names for each gender. A computational model trained on these naming data reproduces thebias. We argue that both the data and the model result in representational harm against women.</abstract>
       <url hash="0f7685b2">2023.findings-acl.886</url>
@@ -14490,7 +14490,7 @@
       <author><first>Lawrence</first><last>Chillrud</last><affiliation>Northwestern University</affiliation></author>
       <author><first>Amith</first><last>Ananthram</last><affiliation>Columbia University</affiliation></author>
       <author><first>Melanie</first><last>Subbiah</last><affiliation>Columbia University</affiliation></author>
-      <author><first>Kathleen</first><last>McKeown</last><affiliation>Columbia University and Amazon (Amazon Scholar)</affiliation></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last><affiliation>Columbia University and Amazon (Amazon Scholar)</affiliation></author>
       <pages>14114-14127</pages>
       <abstract>We present a new fact-checking benchmark, Check-COVID, that requires systems to verify claims about COVID-19 from news using evidence from scientific articles. This approach to fact-checking is particularly challenging as it requires checking internet text written in everyday language against evidence from journal articles written in formal academic language. Check-COVID contains 1, 504 expert-annotated news claims about the coronavirus paired with sentence-level evidence from scientific journal articles and veracity labels. It includes both extracted (journalist-written) and composed (annotator-written) claims. Experiments using both a fact-checking specific system and GPT-3.5, which respectively achieve F1 scores of 76.99 and 69.90 on this task, reveal the difficulty of automatically fact-checking both claim types and the importance of in-domain data for good performance. Our data and models are released publicly at <url>https://github.com/posuer/Check-COVID</url>.</abstract>
       <url hash="cd69710d">2023.findings-acl.888</url>
@@ -14514,7 +14514,7 @@
     <paper id="890">
       <title>Tokenization with Factorized Subword Encoding</title>
       <author><first>David</first><last>Samuel</last><affiliation>University of Oslo, Language Technology Group</affiliation></author>
-      <author><first>Lilja</first><last>Øvrelid</last><affiliation>Dept of Informatics, University of Oslo</affiliation></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last><affiliation>Dept of Informatics, University of Oslo</affiliation></author>
       <pages>14143-14161</pages>
       <abstract>In recent years, language models have become increasingly larger and more complex. However, the input representations for these models continue to rely on simple and greedy subword tokenization methods. In this paper, we propose a novel tokenization method that factorizes subwords onto discrete triplets using a VQ-VAE model. The effectiveness of the proposed tokenization method, referred to as the Factorizer, is evaluated on language modeling and morpho-syntactic tasks for 7 diverse languages. Results indicate that this method is more appropriate and robust for morphological tasks than the commonly used byte-pair encoding (BPE) tokenization algorithm.</abstract>
       <url hash="0b75f237">2023.findings-acl.890</url>
@@ -14534,7 +14534,7 @@
     <paper id="892">
       <title>“A Little is Enough”: Few-Shot Quality Estimation based Corpus Filtering improves Machine Translation</title>
       <author><first>Akshay</first><last>Batheja</last><affiliation>Indian Institute of Technology Bombay</affiliation></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology Bombay and Patna</affiliation></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology Bombay and Patna</affiliation></author>
       <pages>14175-14185</pages>
       <abstract>Quality Estimation (QE) is the task of evaluating the quality of a translation when reference translation is not available. The goal of QE aligns with the task of corpus filtering, where we assign the quality score to the sentence pairs present in the pseudo-parallel corpus. We propose a Quality Estimation based Filtering approach to extract high-quality parallel data from the pseudo-parallel corpus. To the best of our knowledge, this is a novel adaptation of QE framework to extracting quality parallel corpus from the pseudo-parallel corpus.. By training with this filtered corpus, we observe an improvement in the Machine Translation (MT) system’s performance by up to 1.8 BLEU points, for English-Marathi, Chinese-English, and Hindi-Bengali language pairs, over the baseline model. The baseline model is the one that is trained on the whole pseudo-parallel corpus. Our Few-shot QE model transfer learned from the English-Marathi QE model and fine-tuned on only 500 Hindi-Bengali training instances, shows an improvement of up to 0.6 BLEU points for Hindi-Bengali language pair, compared to the baseline model. This demonstrates the promise of transfer learning in the setting under discussion. QE systems typically require in the order of (7K-25K) of training data. Our Hindi-Bengali QE is trained on only 500 instances of training that is 1/40th of the normal requirement and achieves comparable performance. All the scripts and datasets utilized in this study will be publicly available.</abstract>
       <url hash="2991f306">2023.findings-acl.892</url>
@@ -14617,7 +14617,7 @@
       <title>An Empirical Comparison of <fixed-case>LM</fixed-case>-based Question and Answer Generation Methods</title>
       <author><first>Asahi</first><last>Ushio</last><affiliation>Cardiff University</affiliation></author>
       <author><first>Fernando</first><last>Alva-Manchego</last><affiliation>Cardiff University</affiliation></author>
-      <author><first>Jose</first><last>Camacho-Collados</last><affiliation>Cardiff University</affiliation></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last><affiliation>Cardiff University</affiliation></author>
       <pages>14262-14272</pages>
       <abstract>Question and answer generation (QAG) consists of generating a set of question-answer pairs given a context (e.g. a paragraph). This task has a variety of applications, such as data augmentation for question answering (QA) models, information retrieval and education. In this paper, we establish baselines with three different QAG methodologies that leverage sequence-to-sequence language model (LM) fine-tuning. Experiments show that an end-to-end QAG model, which is computationally light at both training and inference times, is generally robust and outperforms other more convoluted approaches. However, there are differences depending on the underlying generative LM. Finally, our analysis shows that QA models fine-tuned solely on generated question-answer pairs can be competitive when compared to supervised QA models trained on human-labeled data.</abstract>
       <url hash="65046546">2023.findings-acl.899</url>
@@ -14627,7 +14627,7 @@
     <paper id="900">
       <title>Contrastive Learning with Generated Representations for Inductive Knowledge Graph Embedding</title>
       <author><first>Qian</first><last>Li</last><affiliation>Northeastern University</affiliation></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University; Salesforce AI Research</affiliation></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University; Salesforce AI Research</affiliation></author>
       <author><first>Daling</first><last>Wang</last><affiliation>Northeastern University</affiliation></author>
       <author><first>Shi</first><last>Feng</last><affiliation>Northeastern University</affiliation></author>
       <author><first>Yifei</first><last>Zhang</last><affiliation>Northeastern University</affiliation></author>
@@ -14687,7 +14687,7 @@
       <author><first>Liang</first><last>Chen</last></author>
       <author><first>Peiyi</first><last>Wang</last></author>
       <author><first>Zhifang</first><last>Sui</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <pages>13-26</pages>
       <abstract>Abstract Meaning Representation (AMR) parsing aims to extract an abstract semantic graph from a given sentence. The sequence-to-sequence approaches, which linearize the semantic graph into a sequence of nodes and edges and generate the linearized graph directly, have achieved good performance. However, we observed that these approaches suffer from structure loss accumulation during the decoding process, leading to a much lower F1-score for nodes and edges decoded later compared to those decoded earlier. To address this issue, we propose a novel Reverse Graph Linearization (RGL) enhanced framework. RGL defines both default and reverse linearization orders of an AMR graph, where most structures at the back part of the default order appear at the front part of the reversed order and vice versa. RGL incorporates the reversed linearization to the original AMR parser through a two-pass self-distillation mechanism, which guides the model when generating the default linearizations. Our analysis shows that our proposed method significantly mitigates the problem of structure loss accumulation, outperforming the previously best AMR parsing model by 0.8 and 0.5 Smatch scores on the AMR 2.0 and AMR 3.0 dataset, respectively. The code are available at <url>https://github.com/pkunlp-icler/AMR_reverse_graph_linearization</url>.</abstract>
       <url hash="3420d7c4">2023.findings-emnlp.2</url>
@@ -14717,7 +14717,7 @@
       <author><first>Ruchi</first><last>Khapra</last></author>
       <author><first>Anoop</first><last>Kunchukuttan</last></author>
       <author><first>Pratyush</first><last>Kumar</last></author>
-      <author><first>Mitesh</first><last>Khapra</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh</first><last>Khapra</last></author>
       <pages>40-57</pages>
       <abstract>Transliteration is very important in the Indian language context due to the usage of multiple scripts and the widespread use of romanized inputs. However, few training and evaluation sets are publicly available. We introduce Aksharantar, the largest publicly available transliteration dataset for Indian languages created by mining from monolingual and parallel corpora, as well as collecting data from human annotators. The dataset contains 26 million transliteration pairs for 21 Indic languages from 3 language families using 12 scripts. Aksharantar is 21 times larger than existing datasets and is the first publicly available dataset for 7 languages and 1 language family. We also introduce a test set of 103k word pairs for 19 languages that enables a fine-grained analysis of transliteration models on native origin words, foreign words, frequent words, and rare words. Using the training set, we trained IndicXlit, a multilingual transliteration model that improves accuracy by 15% on the Dakshina test set, and establishes strong baselines on the Aksharantar testset introduced in this work. The models, mining scripts, transliteration guidelines, and datasets are available at https://github.com/AI4Bharat/IndicXlit under open-source licenses.</abstract>
       <url hash="a2690146">2023.findings-emnlp.4</url>
@@ -14729,7 +14729,7 @@
       <author><first>Junxiong</first><last>Wang</last></author>
       <author><first>Jing Nathan</first><last>Yan</last></author>
       <author><first>Albert</first><last>Gu</last></author>
-      <author><first>Alexander</first><last>Rush</last></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last></author>
       <pages>58-69</pages>
       <abstract>Transformers have been essential to pretraining success in NLP. While other architectures have been used, downstream accuracy is either significantly worse, or requires attention layers to match standard benchmarks such as GLUE. This work explores pretraining without attention by using recent advances in sequence routing based on state-space models (SSMs). Our proposed model, Bidirectional Gated SSM (BiGS), combines SSM layers with a multiplicative gating architecture that has been effective in simplified sequence modeling architectures. The model learns static layers that do not consider pair-wise interactions. Even so, BiGS is able to match BERT pretraining accuracy on GLUE and can be extended to long-form pretraining of 4096 tokens without approximation. Analysis shows that while the models have similar average accuracy, the approach has different inductive biases than BERT and scales more efficiently to longer sequences.</abstract>
       <url hash="034771f9">2023.findings-emnlp.5</url>
@@ -14739,7 +14739,7 @@
     <paper id="6">
       <title>Time-Aware Representation Learning for Time-Sensitive Question Answering</title>
       <author><first>Jungbin</first><last>Son</last></author>
-      <author><first>Alice</first><last>Oh</last></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last></author>
       <pages>70-77</pages>
       <abstract>Time is one of the crucial factors in real-world question answering (QA) problems. However, language models have difficulty understanding the relationships between time specifiers, such as ‘after’ and ‘before’, and numbers, since existing QA datasets do not include sufficient time expressions. To address this issue, we propose a Time-Context aware Question Answering (TCQA) framework. We suggest a Time-Context dependent Span Extraction (TCSE) task, and build a time-context dependent data generation framework for model training. Moreover, we present a metric to evaluate the time awareness of the QA model using TCSE. The TCSE task consists of a question and four sentence candidates classified as correct or incorrect based on time and context. The model is trained to extract the answer span from the sentence that is both correct in time and context. The model trained with TCQA outperforms baseline models up to 8.5 of the F1-score in the TimeQA dataset. Our dataset and code are available at https://github.com/sonjbin/TCQA</abstract>
       <url hash="59a16b66">2023.findings-emnlp.6</url>
@@ -14815,7 +14815,7 @@
       <author><first>Yanda</first><last>Chen</last></author>
       <author><first>Chen</first><last>Zhao</last></author>
       <author><first>Zhou</first><last>Yu</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <author><first>He</first><last>He</last></author>
       <pages>155-167</pages>
       <abstract>In-context learning (ICL) suffers from oversensitivity to the prompt, making it unreliable in real-world scenarios. We study the sensitivity of ICL with respect to multiple perturbation types. First, we find that label bias obscures the true sensitivity, and therefore prior work may have significantly underestimated ICL sensitivity. Second, we observe a strong negative correlation between ICL sensitivity and accuracy: predictions sensitive to perturbations are less likely to be correct. Motivated by these findings, we propose <tex-math>SenSel</tex-math>, a few-shot selective prediction method that abstains from sensitive predictions. Experiments on ten classification datasets show that <tex-math>SenSel</tex-math> consistently outperforms two commonly used confidence-based and entropy-based baselines on abstention decisions.</abstract>
@@ -14889,7 +14889,7 @@
       <title>Data Pruning for Efficient Model Pruning in Neural Machine Translation</title>
       <author><first>Abdul Hameed</first><last>Azeemi</last></author>
       <author><first>Ihsan</first><last>Qazi</last></author>
-      <author><first>Agha</first><last>Raza</last></author>
+      <author id="agha-ali-raza"><first>Agha</first><last>Raza</last></author>
       <pages>236-246</pages>
       <abstract>Model pruning methods reduce memory requirements and inference time of large-scale pre-trained language models after deployment. However, the actual pruning procedure is computationally intensive, involving repeated training and pruning until the required sparsity is achieved. This paper combines data pruning with movement pruning for Neural Machine Translation (NMT) to enable efficient fine-pruning. We design a dataset pruning strategy by leveraging cross-entropy scores of individual training instances. We conduct pruning experiments on the task of machine translation from Romanian-to-English and Turkish-to-English, and demonstrate that selecting hard-to-learn examples (top-k) based on training cross-entropy scores outperforms other dataset pruning methods. We empirically demonstrate that data pruning reduces the overall steps required for convergence and the training time of movement pruning. Finally, we perform a series of experiments to tease apart the role of training data during movement pruning and uncover new insights to understand the interplay between data and model pruning in the context of NMT.</abstract>
       <url hash="4d6ee223">2023.findings-emnlp.18</url>
@@ -14898,7 +14898,7 @@
     </paper>
     <paper id="19">
       <title>Long-Form Speech Translation through Segmentation with Finite-State Decoding Constraints on Large Language Models</title>
-      <author><first>Arya</first><last>McCarthy</last></author>
+      <author id="arya-d-mccarthy"><first>Arya</first><last>McCarthy</last></author>
       <author><first>Hao</first><last>Zhang</last></author>
       <author><first>Shankar</first><last>Kumar</last></author>
       <author><first>Felix</first><last>Stahlberg</last></author>
@@ -14924,11 +14924,11 @@
       <title><fixed-case>R</fixed-case>ethinking<fixed-case>TMSC</fixed-case>: An Empirical Study for Target-Oriented Multimodal Sentiment Classification</title>
       <author><first>Junjie</first><last>Ye</last></author>
       <author><first>Jie</first><last>Zhou</last></author>
-      <author><first>Junfeng</first><last>Tian</last></author>
+      <author id="junfeng-tian"><first>Junfeng</first><last>Tian</last></author>
       <author><first>Rui</first><last>Wang</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
       <author><first>Tao</first><last>Gui</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>270-277</pages>
       <abstract>Recently, Target-oriented Multimodal Sentiment Classification (TMSC) has gained significant attention among scholars. However, current multimodal models have reached a performance bottleneck. To investigate the causes of this problem, we perform extensive empirical evaluation and in-depth analysis of the datasets to answer the following questions: **Q1**: Are the modalities equally important for TMSC? **Q2**: Which multimodal fusion modules are more effective? **Q3**: Do existing datasets adequately support the research? Our experiments and analyses reveal that the current TMSC systems primarily rely on the textual modality, as most of targets’ sentiments can be determined *solely* by text. Consequently, we point out several directions to work on for the TMSC task in terms of model design and dataset construction. The code and data can be found in https://github.com/Junjie-Ye/RethinkingTMSC.</abstract>
       <url hash="c083c536">2023.findings-emnlp.21</url>
@@ -14937,7 +14937,7 @@
     </paper>
     <paper id="22">
       <title>Lexical Entrainment for Conversational Systems</title>
-      <author><first>Zhengxiang</first><last>Shi</last></author>
+      <author id="zhengyan-shi"><first>Zhengxiang</first><last>Shi</last></author>
       <author><first>Procheta</first><last>Sen</last></author>
       <author><first>Aldo</first><last>Lipani</last></author>
       <pages>278-293</pages>
@@ -14968,7 +14968,7 @@
       <author><first>Anjie</first><last>Fang</last></author>
       <author><first>Giuseppe</first><last>Castellucci</last></author>
       <author><first>Oleg</first><last>Rokhlenko</last></author>
-      <author><first>Shervin</first><last>Malmasi</last></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></author>
       <pages>310-325</pages>
       <abstract>The adoption of voice assistants like Alexa or Siri has grown rapidly, allowing users to instantly access information via voice search. Query suggestion is a standard feature of screen-based search experiences, allowing users to explore additional topics. However, this is not trivial to implement in voice-based settings. To enable this, we tackle the novel task of suggesting questions with compact and natural voice hints to allow users to ask follow-up questions. We define the task, ground it in syntactic theory and outline linguistic desiderata for spoken hints. We propose baselines and an approach using sequence-to-sequence Transformers to generate spoken hints from a list of questions. Using a new dataset of 6681 input questions and human written hints, we evaluated the models with automatic metrics and human evaluation. Results show that a naive approach of concatenating suggested questions creates poor voice hints. Our approach, which applies a linguistically-motivated pretraining task was strongly preferred by humans for producing the most natural hints.</abstract>
       <url hash="a65de61c">2023.findings-emnlp.24</url>
@@ -15015,7 +15015,7 @@
     </paper>
     <paper id="28">
       <title>Automatic Unit Test Data Generation and Actor-Critic Reinforcement Learning for Code Synthesis</title>
-      <author><first>Philip</first><last>Gorinski</last></author>
+      <author id="philip-gorinski"><first>Philip</first><last>Gorinski</last></author>
       <author><first>Matthieu</first><last>Zimmer</last></author>
       <author><first>Gerasimos</first><last>Lampouras</last></author>
       <author><first>Derrick Goh Xin</first><last>Deik</last></author>
@@ -15061,7 +15061,7 @@
       <author><first>Bernhard</first><last>Schölkopf</last></author>
       <author><first>Tom</first><last>Hope</last></author>
       <author><first>Mrinmaya</first><last>Sachan</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>415-438</pages>
       <abstract>With the recent advances in natural language processing (NLP), a vast number of applications have emerged across various use cases. Among the plethora of NLP applications, many academic researchers are motivated to do work that has a positive social impact, in line with the recent initiatives of NLP for Social Good (NLP4SG). However, it is not always obvious to researchers how their research efforts are tackling today’s big social problems. Thus, in this paper, we introduce NLP4SGPapers, a scientific dataset with three associated tasks that can help identify NLP4SG papers and characterize the NLP4SG landscape by: (1) identifying the papers that address a social problem, (2) mapping them to the corresponding UN Sustainable Development Goals (SDGs), and (3) identifying the task they are solving and the methods they are using. Using state-of-the-art NLP models, we address each of these tasks and use them on the entire ACL Anthology, resulting in a visualization workspace that gives researchers a comprehensive overview of the field of NLP4SG. Our website is available at https://nlp4sg.vercel.app . We released our data at https://huggingface.co/datasets/feradauto/NLP4SGPapers and code at https://github.com/feradauto/nlp4sg</abstract>
       <url hash="44571950">2023.findings-emnlp.31</url>
@@ -15118,7 +15118,7 @@
       <title><fixed-case>MEAL</fixed-case>: Stable and Active Learning for Few-Shot Prompting</title>
       <author><first>Abdullatif</first><last>Köksal</last></author>
       <author><first>Timo</first><last>Schick</last></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <pages>506-517</pages>
       <abstract>Few-shot classification has made great strides due to foundation models that, through priming and prompting, are highly effective few-shot learners. However, this approach has high variance both across different sets of few shots (*data selection*) and across different finetuning runs (*run variability*). This is problematic not only because it impedes the fair comparison of different approaches, but especially because it makes few-shot learning too unreliable for many real-world applications. To alleviate these issues, we make two contributions for more stable and effective few-shot learning: First, we propose novel ensembling methods and show that they substantially reduce *run variability*. Second, we introduce a new active learning (AL) criterion for *data selection* and present the first AL-based approach specifically tailored towards prompt-based learning. In our experiments, we show that our combined method, MEAL (**M**ultiprompt finetuning and prediction **E**nsembling with **A**ctive **L**earning), improves overall performance of prompt-based finetuning by 2.3 points on five diverse tasks. We publicly share our code and data splits in https://github.com/akoksal/MEAL.</abstract>
       <url hash="0986127d">2023.findings-emnlp.36</url>
@@ -15128,9 +15128,9 @@
     <paper id="37">
       <title>Structure and Label Constrained Data Augmentation for Cross-domain Few-shot <fixed-case>NER</fixed-case></title>
       <author><first>Jingyi</first><last>Zhang</last></author>
-      <author><first>Ying</first><last>Zhang</last></author>
+      <author id="ying-zhang"><first>Ying</first><last>Zhang</last></author>
       <author><first>Yufeng</first><last>Chen</last></author>
-      <author><first>Jinan</first><last>Xu</last></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last></author>
       <pages>518-530</pages>
       <abstract>Cross-domain few-shot named entity recognition (NER) is a challenging task that aims to recognize entities in target domains with limited labeled data by leveraging relevant knowledge from source domains. However, domain gaps limit the effect of knowledge transfer and harm the performance of NER models. In this paper, we analyze those domain gaps from two new perspectives, i.e., entity annotations and entity structures and leverage word-to-tag and word-to-word relations to model them, respectively. Moreover, we propose a novel method called Structure and Label Constrained Data Augmentation (SLC-DA) for Cross-domain Few-shot NER, which novelly design a label constrained pre-train task and a structure constrained optimization objectives in the data augmentation process to generate domain-specific augmented data to help NER models smoothly transition from source to target domains. We evaluate our approach on several standard datasets and achieve state-of-the-art or competitive results, demonstrating the effectiveness of our method in cross-domain few-shot NER.</abstract>
       <url hash="3db349ea">2023.findings-emnlp.37</url>
@@ -15142,7 +15142,7 @@
       <author><first>Koustava</first><last>Goswami</last></author>
       <author><first>Priya</first><last>Rani</last></author>
       <author><first>Theodorus</first><last>Fransen</last></author>
-      <author><first>John</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John</first><last>McCrae</last></author>
       <pages>531-541</pages>
       <abstract>Exploiting cognates for transfer learning in under-resourced languages is an exciting opportunity for language understanding tasks, including unsupervised machine translation, named entity recognition and information retrieval. Previous approaches mainly focused on supervised cognate detection tasks based on orthographic, phonetic or state-of-the-art contextual language models, which under-perform for most under-resourced languages. This paper proposes a novel language-agnostic weakly-supervised deep cognate detection framework for under-resourced languages using morphological knowledge from closely related languages. We train an encoder to gain morphological knowledge of a language and transfer the knowledge to perform unsupervised and weakly-supervised cognate detection tasks with and without the pivot language for the closely-related languages. While unsupervised, it overcomes the need for hand-crafted annotation of cognates. We performed experiments on different published cognate detection datasets across language families and observed not only significant improvement over the state-of-the-art but also our method outperformed the state-of-the-art supervised and unsupervised methods. Our model can be extended to a wide range of languages from any language family as it overcomes the requirement of the annotation of the cognate pairs for training.</abstract>
       <url hash="8bc0e331">2023.findings-emnlp.38</url>
@@ -15297,7 +15297,7 @@
     <paper id="50">
       <title>Adapter-<fixed-case>TST</fixed-case>: A Parameter Efficient Method for Multiple-Attribute Text Style Transfer</title>
       <author><first>Zhiqiang</first><last>Hu</last></author>
-      <author><first>Nancy</first><last>Chen</last></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last></author>
       <author><first>Roy</first><last>Lee</last></author>
       <pages>693-703</pages>
       <abstract>Adapting a large language model for multiple-attribute text style transfer via fine-tuning can be challenging due to the substantial amount of computational resources and labeled data required for the specific downstream task. In this paper, we address this challenge by introducing Adapter-TST, a framework that freezes the pre-trained model’s original parameters and enables the development of a multiple-attribute text style transfer model. Using BART as the backbone model, Adapter-TST utilizes different neural adapters to model different types of attribute information, similar to a plug-in connected to BART. Our method allows control over multiple attributes (e.g. sentiment, tense, active or passive voice) and configures the adapters’ architecture to generate multiple outputs in respect to attributes or compositional editing on the same sentence. We evaluate the proposed model on both traditional sentiment transfer and multiple-attribute transfer tasks. The experiment results demonstrate that Adapter-TST outperforms all the state-of-the-art baselines with significantly less computational resources. We have also empirically shown that each adapter is able to characterize specific stylistic attributes effectively and can be configured to perform compositional editing.</abstract>
@@ -15339,7 +15339,7 @@
       <author><first>Changhao</first><last>Guan</last></author>
       <author><first>Qingbin</first><last>Liu</last></author>
       <author><first>Jian</first><last>Liu</last></author>
-      <author><first>Jinan</first><last>Xu</last></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last></author>
       <author><first>Zhe</first><last>Zhao</last></author>
       <pages>737-746</pages>
       <abstract>Current dialogue systems face diverse user requests and rapid change domains, making quickly adapt to scenarios with previous unseen slot types become a major challenge. Recently, researchers have introduced novel slot detection (NSD) to discover potential new types. However, dialogue system with NSD does not bring practical improvements due to the system still cannot handle novel slots in subsequent interactions. In this paper, we define incremental novel slot detection (INSD), which separates the dialogue system to deal with novel types as two major phrases: 1) model discovers unknown slots, 2) training model to possess the capability to handle new classes. We provide an effective model to extract novel slots with set prediction strategy and propose a query-enhanced approach to overcome catastrophic forgetting during the process of INSD. We construct two INSD datasets to evaluate our method and experimental results show that our approach exhibits superior performance.</abstract>
@@ -15415,7 +15415,7 @@
       <author><first>Jun</first><last>Araki</last></author>
       <author><first>Arsalan</first><last>Gundroo</last></author>
       <author><first>Bingqing</first><last>Wang</last></author>
-      <author><first>Rakesh</first><last>Menon</last></author>
+      <author id="rakesh-r-menon"><first>Rakesh</first><last>Menon</last></author>
       <author><first>Md</first><last>Parvez</last></author>
       <author><first>Zhe</first><last>Feng</last></author>
       <pages>822-835</pages>
@@ -15452,7 +15452,7 @@
       <author><first>Wentao</first><last>Yao</last></author>
       <author><first>Sharath Chandra Etagi</first><last>Suresh</last></author>
       <author><first>Heng</first><last>Ji</last></author>
-      <author><first>ChengXiang</first><last>Zhai</last></author>
+      <author id="chengxiang-zhai"><first>ChengXiang</first><last>Zhai</last></author>
       <pages>873-885</pages>
       <abstract>Open-domain dialog involves generating search queries that help obtain relevant knowledge for holding informative conversations. However, it can be challenging to determine what information to retrieve when the user is passive and does not express a clear need or request. To tackle this issue, we present a novel approach that focuses on generating internet search queries that are guided by social commonsense. Specifically, we leverage a commonsense dialog system to establish connections related to the conversation topic, which subsequently guides our query generation. Our proposed framework addresses passive user interactions by integrating topic tracking, commonsense response generation and instruction-driven query generation. Through extensive evaluations, we show that our approach overcomes limitations of existing query generation techniques that rely solely on explicit dialog information, and produces search queries that are more relevant, specific, and compelling, ultimately resulting in more engaging responses.</abstract>
       <url hash="cbf92c09">2023.findings-emnlp.62</url>
@@ -15479,7 +15479,7 @@
       <author><first>Fan</first><last>Jiang</last></author>
       <author><first>Qiongkai</first><last>Xu</last></author>
       <author><first>Tom</first><last>Drummond</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>912-931</pages>
       <abstract>Neural ‘dense’ retrieval models are state of the art for many datasets, however these models often exhibit limited domain transfer ability. Existing approaches to adaptation are unwieldy, such as requiring explicit supervision, complex model architectures, or massive external models. We present <tex-math>\texttt{ABEL}</tex-math>, a simple but effective unsupervised method to enhance passage retrieval in zero-shot settings. Our technique follows a straightforward loop: a dense retriever learns from supervision signals provided by a reranker, and subsequently, the reranker is updated based on feedback from the improved retriever. By iterating this loop, the two components mutually enhance one another’s performance. Experimental results demonstrate that our unsupervised <tex-math>\texttt{ABEL}</tex-math> model outperforms both leading supervised and unsupervised retrievers on the BEIR benchmark. Meanwhile, it exhibits strong adaptation abilities to tasks and domains that were unseen during training. By either fine-tuning <tex-math>\texttt{ABEL}</tex-math> on labelled data or integrating it with existing supervised dense retrievers, we achieve state-of-the-art results.</abstract>
       <url hash="cffa23d2">2023.findings-emnlp.65</url>
@@ -15514,7 +15514,7 @@
     <paper id="68">
       <title>The Internal State of an <fixed-case>LLM</fixed-case> Knows When It’s Lying</title>
       <author><first>Amos</first><last>Azaria</last></author>
-      <author><first>Tom</first><last>Mitchell</last></author>
+      <author id="tom-mitchell"><first>Tom</first><last>Mitchell</last></author>
       <pages>967-976</pages>
       <abstract>While Large Language Models (LLMs) have shown exceptional performance in various tasks, one of their most prominent drawbacks is generating inaccurate or false information with a confident tone. In this paper, we provide evidence that the LLM’s internal state can be used to reveal the truthfulness of statements. This includes both statements provided to the LLM, and statements that the LLM itself generates. Our approach is to train a classifier that outputs the probability that a statement is truthful, based on the hidden layer activations of the LLM as it reads or generates the statement. Experiments demonstrate that given a set of test sentences, of which half are true and half false, our trained classifier achieves an average of 71% to 83% accuracy labeling which sentences are true versus false, depending on the LLM base model. Furthermore, we explore the relationship between our classifier’s performance and approaches based on the probability assigned to the sentence by the LLM. We show that while LLM-assigned sentence probability is related to sentence truthfulness, this probability is also dependent on sentence length and the frequencies of words in the sentence, resulting in our trained classifier providing a more reliable approach to detecting truthfulness, highlighting its potential to enhance the reliability of LLM-generated content and its practical applicability in real-world scenarios.</abstract>
       <url hash="2a971ddc">2023.findings-emnlp.68</url>
@@ -15527,7 +15527,7 @@
       <author><first>Peifeng</first><last>Li</last></author>
       <author><first>Feng</first><last>Jiang</last></author>
       <author><first>Xiaomin</first><last>Chu</last></author>
-      <author><first>Qiaoming</first><last>Zhu</last></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last></author>
       <pages>977-986</pages>
       <abstract>Most neural abstractive summarization models are capable of producing high-quality summaries. However, they still frequently contain factual errors. Existing factuality-oriented abstractive summarization models only consider the integration of factual information and ignore the causes of factual errors. To address this issue, we propose a factuality-oriented abstractive summarization model DASum, which is based on a new task factual relation discrimination that is able to identify the causes of factual errors. First, we use data augmentation methods to construct counterfactual summaries (i. e., negative samples), and build a factual summarization dataset. Then, we propose the factual relation discrimination task, which determines the factuality of the dependency relations in summaries during summary generation and guides our DASum to generate factual relations, thereby improving the factuality of summaries. Experimental results on the CNN/DM and XSUM datasets show that our DASum outperforms several state-of-the-art benchmarks in terms of the factual metrics.</abstract>
       <url hash="bd9dc776">2023.findings-emnlp.69</url>
@@ -15563,7 +15563,7 @@
       <author><first>Ziqiao</first><last>Ma</last></author>
       <author><first>Jacob</first><last>Sansom</last></author>
       <author><first>Run</first><last>Peng</last></author>
-      <author><first>Joyce</first><last>Chai</last></author>
+      <author id="joyce-chai"><first>Joyce</first><last>Chai</last></author>
       <pages>1011-1031</pages>
       <abstract>Large Language Models (LLMs) have generated considerable interest and debate regarding their potential emergence of Theory of Mind (ToM). Several recent inquiries reveal a lack of robust ToM in these models and pose a pressing demand to develop new benchmarks, as current ones primarily focus on different aspects of ToM and are prone to shortcuts and data leakage. In this position paper, we seek to answer two road-blocking questions: (1) How can we taxonomize a holistic landscape of machine ToM? (2) What is a more effective evaluation protocol for machine ToM? Following psychological studies, we taxonomize machine ToM into 7 mental state categories and delineate existing benchmarks to identify under-explored aspects of ToM. We argue for a holistic and situated evaluation of ToM to break ToM into individual components and treat LLMs as an agent who is physically situated in environments and socially situated in interactions with humans. Such situated evaluation provides a more comprehensive assessment of mental states and potentially mitigates the risk of shortcuts and data leakage. We further present a pilot study in a grid world setup as a proof of concept. We hope this position paper can facilitate future research to integrate ToM with LLMs and offer an intuitive means for researchers to better position their work in the landscape of ToM.</abstract>
       <url hash="59156af8">2023.findings-emnlp.72</url>
@@ -15630,7 +15630,7 @@
       <author><first>Saiping</first><last>Guan</last></author>
       <author><first>Yutao</first><last>Zeng</last></author>
       <author><first>Jiafeng</first><last>Guo</last></author>
-      <author><first>Xueqi</first><last>Cheng</last></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last></author>
       <pages>1090-1100</pages>
       <abstract>Reasoning over Temporal Knowledge Graphs (TKGs) that predicts temporal facts (e.g., events) in the future is crucial for many applications. The temporal facts in existing TKGs only contain their core entities (i.e., the entities playing core roles therein) and formulate them as quadruples, i.e., (subject entity, predicate, object entity, timestamp). This formulation oversimplifies temporal facts and inevitably causes information loss. Therefore, we propose to describe a temporal fact more accurately as an n-tuple, containing not only its predicate and core entities, but also its auxiliary entities, as well as the roles of all entities. By so doing, TKGs are augmented to N-tuple Temporal Knowledge Graphs (N-TKGs). To conduct reasoning over N-TKGs, we further propose N-tuple Evolutional Network (NE-Net). It recurrently learns the evolutional representations of entities and predicates in temporal facts at different timestamps in the history via modeling the relations among those entities and predicates. Based on the learned representations, reasoning tasks at future timestamps can be realized via task-specific decoders. Experiment results on two newly built datasets demonstrate the superiority of N-TKG and the effectiveness of NE-Net.</abstract>
       <url hash="76e07a62">2023.findings-emnlp.77</url>
@@ -15743,7 +15743,7 @@
       <author><first>Le</first><last>Zhang</last></author>
       <author><first>Yihong</first><last>Wu</last></author>
       <author><first>Fengran</first><last>Mo</last></author>
-      <author><first>Jian-Yun</first><last>Nie</last></author>
+      <author id="jian-yun-nie"><first>Jian-Yun</first><last>Nie</last></author>
       <author><first>Aishwarya</first><last>Agrawal</last></author>
       <pages>1195-1210</pages>
       <abstract>Multi-modal open-domain question answering typically requires evidence retrieval from databases across diverse modalities, such as images, tables, passages, etc. Even Large Language Models (LLMs) like GPT-4 fall short in this task. To enable LLMs to tackle the task in a zero-shot manner, we introduce MoqaGPT, a straightforward and flexible framework. Using a divide-and-conquer strategy that bypasses intricate multi-modality ranking, our framework can accommodate new modalities and seamlessly transition to new models for the task. Built upon LLMs, MoqaGPT retrieves and extracts answers from each modality separately, then fuses this multi-modal information using LLMs to produce a final answer. Our methodology boosts performance on the MMCoQA dataset, improving F1 by +37.91 points and EM by +34.07 points over the supervised baseline. On the MultiModalQA dataset, MoqaGPT surpasses the zero-shot baseline, improving F1 by 9.5 points and EM by 10.1 points, and significantly closes the gap with supervised methods. Our codebase is available at https://github.com/lezhang7/MOQAGPT.</abstract>
@@ -15767,7 +15767,7 @@
     </paper>
     <paper id="87">
       <title><fixed-case>D</fixed-case>oc<fixed-case>A</fixed-case>s<fixed-case>R</fixed-case>ef: An Empirical Study on Repurposing Reference-based Summary Quality Metrics as Reference-free Metrics</title>
-      <author><first>Forrest</first><last>Bao</last></author>
+      <author id="forrest-bao"><first>Forrest</first><last>Bao</last></author>
       <author><first>Ruixuan</first><last>Tu</last></author>
       <author><first>Ge</first><last>Luo</last></author>
       <author><first>Yinfei</first><last>Yang</last></author>
@@ -15822,7 +15822,7 @@
     <paper id="91">
       <title>Selecting Key Views for Zero-Shot Entity Linking</title>
       <author><first>Xuhui</first><last>Sui</last></author>
-      <author><first>Ying</first><last>Zhang</last></author>
+      <author id="ying-zhang"><first>Ying</first><last>Zhang</last></author>
       <author><first>Kehui</first><last>Song</last></author>
       <author><first>Baohang</first><last>Zhou</last></author>
       <author><first>Xiaojie</first><last>Yuan</last></author>
@@ -15853,7 +15853,7 @@
       <author><first>Balaji</first><last>Lakshminarayanan</last></author>
       <author><first>Jiaming</first><last>Luo</last></author>
       <author><first>Mohammad</first><last>Saleh</last></author>
-      <author><first>Peter</first><last>Liu</last></author>
+      <author id="peter-j-liu"><first>Peter</first><last>Liu</last></author>
       <pages>1324-1336</pages>
       <abstract>The evaluation of abstractive summarization models typically uses test data that is identically distributed as training data. In real-world practice, documents to be summarized may contain input noise caused by text extraction artifacts or data pipeline bugs. The robustness of model performance under distribution shift caused by such noise is relatively under studied. We present a large empirical study quantifying the sometimes severe loss in performance – up to 12 ROUGE-1 points – from different types of input noise for a range of datasets and model sizes. We then propose a light-weight method for detecting and removing such noise in the input during model inference without requiring any extra training, auxiliary models, or even prior knowledge of the type of noise. Our proposed approach effectively mitigates the loss in performance, recovering a large fraction of the performance drop, sometimes as large as 11 ROUGE-1 points.</abstract>
       <url hash="44a8a513">2023.findings-emnlp.93</url>
@@ -15892,7 +15892,7 @@
     <paper id="96">
       <title>Unveiling the Multi-Annotation Process: Examining the Influence of Annotation Quantity and Instance Difficulty on Model Performance</title>
       <author><first>Pritam</first><last>Kadasi</last></author>
-      <author id="mayank-singh"><first>Mayank</first><last>Singh</last></author>
+      <author><first>Mayank</first><last>Singh</last></author>
       <pages>1371-1388</pages>
       <abstract>The NLP community has long advocated for the construction of multi-annotator datasets to better capture the nuances of language interpretation, subjectivity, and ambiguity. This paper conducts a retrospective study to show how performance scores can vary when a dataset expands from a single annotation per instance to multiple annotations. We propose a novel multi-annotator simulation process to generate datasets with varying annotation budgets. We show that similar datasets with the same annotation budget can lead to varying performance gains. Our findings challenge the popular belief that models trained on multi-annotation examples always lead to better performance than models trained on single or few-annotation examples.</abstract>
       <url hash="74f2cd91">2023.findings-emnlp.96</url>
@@ -15904,7 +15904,7 @@
       <author><first>Yikang</first><last>Pan</last></author>
       <author><first>Liangming</first><last>Pan</last></author>
       <author><first>Wenhu</first><last>Chen</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Min-Yen</first><last>Kan</last></author>
       <author><first>William</first><last>Wang</last></author>
       <pages>1389-1403</pages>
@@ -15984,7 +15984,7 @@
       <author><first>Musa</first><last>İhtiyar</last></author>
       <author><first>Ömer</first><last>Özdemir</last></author>
       <author><first>Mustafa</first><last>Erengül</last></author>
-      <author><first>Arzucan</first><last>Özgür</last></author>
+      <author id="arzucan-ozgur"><first>Arzucan</first><last>Özgür</last></author>
       <pages>1543-1549</pages>
       <abstract>Offensive language detection is crucial in natural language processing (NLP). We investigated the importance of context for detecting such language in reply tweets on Twitter, where the use of offensive language is widespread. We collected a Turkish tweet dataset where the target group was unvaccinated people during the Covid period. Tweets in the dataset were enriched with contextual information by adding the original tweet to which a particular tweet was posted as a reply. The dataset, which includes over 28,000 tweet-reply pairs, was manually labeled by human annotators and made publicly available. In addition, we compared the performance of different machine learning models with and without contextual information. Our results show that this type of contextual information was not very useful in improving the performance of the models in general, although it slightly increased the macro-averaged F1-score of certain models.</abstract>
       <url hash="f4ab687e">2023.findings-emnlp.103</url>
@@ -15993,12 +15993,12 @@
     </paper>
     <paper id="104">
       <title>Remember what you did so you know what to do next</title>
-      <author><first>Manuel</first><last>Ciosici</last></author>
+      <author id="manuel-r-ciosici"><first>Manuel</first><last>Ciosici</last></author>
       <author><first>Alex</first><last>Hedges</last></author>
       <author><first>Yash</first><last>Kankanampati</last></author>
       <author><first>Justin</first><last>Martin</last></author>
       <author><first>Marjorie</first><last>Freedman</last></author>
-      <author><first>Ralph</first><last>Weischedel</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
       <pages>1550-1562</pages>
       <abstract>We explore using the 6B parameter GPT-J language model to create a plan for a simulated robot to achieve 30 classes of goals in ScienceWorld, a text game simulator for elementary science experiments and for which previously published empirical work has shown large language models (LLM)s to be a poor fit (Wang et al., 2022). Using the Markov assumption, the LLM outperforms the state-of-the-art based on reinforcement learning by a factor of 1.4. When we fill the LLM’s input buffer with as many prior steps as will fit, improvement rises to 3.3x. Even when training on only 6.5% of the training data, we observe a 2.3x improvement over the state-of-the-art. Our experiments show that performance varies widely across the 30 classes of actions, indicating that averaging over tasks can hide significant performance issues.</abstract>
       <url hash="93912b5c">2023.findings-emnlp.104</url>
@@ -16023,7 +16023,7 @@
       <title>Learning to Abstract with Nonparametric Variational Information Bottleneck</title>
       <author><first>Melika</first><last>Behjati</last></author>
       <author><first>Fabio</first><last>Fehr</last></author>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <pages>1576-1586</pages>
       <abstract>Learned representations at the level of characters, sub-words, words, and sentences, have each contributed to advances in understanding different NLP tasks and linguistic phenomena. However, learning textual embeddings is costly as they are tokenization specific and require different models to be trained for each level of abstraction. We introduce a novel language representation model which can learn to compress to different levels of abstraction at different layers of the same model. We apply Nonparametric Variational Information Bottleneck (NVIB) to stacked Transformer self-attention layers in the encoder, which encourages an information-theoretic compression of the representations through the model. We find that the layers within the model correspond to increasing levels of abstraction and that their representations are more linguistically informed. Finally, we show that NVIB compression results in a model which is more robust to adversarial perturbations.</abstract>
       <url hash="fd84f6f2">2023.findings-emnlp.106</url>
@@ -16108,7 +16108,7 @@
     </paper>
     <paper id="113">
       <title>Simple Hardware-Efficient <fixed-case>PCFG</fixed-case>s with Independent Left and Right Productions</title>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last></author>
+      <author><first>Wei</first><last>Liu</last></author>
       <author><first>Songlin</first><last>Yang</last></author>
       <author><first>Yoon</first><last>Kim</last></author>
       <author><first>Kewei</first><last>Tu</last></author>
@@ -16137,7 +16137,7 @@
       <author><first>Diptesh</first><last>Kanojia</last></author>
       <author><first>Fred</first><last>Blain</last></author>
       <author><first>Tharindu</first><last>Ranasinghe</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>1686-1698</pages>
       <abstract>Automatic Post-Editing (APE) systems are prone to over-correction of the Machine Translation (MT) outputs. While Word-level Quality Estimation (QE) system can provide a way to curtail the over-correction, a significant performance gain has not been observed thus far by utilizing existing APE and QE combination strategies. In this paper, we propose joint training of a model on APE and QE tasks to improve the APE. Our proposed approach utilizes a multi-task learning (MTL) methodology, which shows significant improvement while treating both tasks as a ‘bargaining game’ during training. Moreover, we investigate various existing combination strategies and show that our approach achieves state-of-the-art performance for a ‘distant’ language pair, viz., English-Marathi. We observe an improvement of 1.09 TER and 1.37 BLEU points over a baseline QE-Unassisted APE system for English-Marathi, while also observing 0.46 TER and 0.62 BLEU points for English-German. Further, we discuss the results qualitatively and show how our approach helps reduce over-correction, thereby improving the APE performance. We also observe that the degree of integration between QE and APE directly correlates with the APE performance gain. We release our code and models publicly.</abstract>
       <url hash="63fe8baa">2023.findings-emnlp.115</url>
@@ -16168,7 +16168,7 @@
       <author><first>Kyle</first><last>Erwin</last></author>
       <author><first>Ndivhuwo</first><last>Makondo</last></author>
       <author><first>Ibrahim</first><last>Abdelaziz</last></author>
-      <author><first>Achille</first><last>Fokoue</last></author>
+      <author id="achille-fokoue-nkoutche"><first>Achille</first><last>Fokoue</last></author>
       <author><first>Alexander</first><last>Gray</last></author>
       <author><first>Maxwell</first><last>Crouse</last></author>
       <author><first>Subhajit</first><last>Chaudhury</last></author>
@@ -16187,7 +16187,7 @@
       <author><first>Vitaly</first><last>Nikolaev</last></author>
       <author><first>Jan</first><last>Botha</last></author>
       <author><first>Michael</first><last>Chavinda</last></author>
-      <author><first>Ankur</first><last>Parikh</last></author>
+      <author id="ankur-parikh"><first>Ankur</first><last>Parikh</last></author>
       <author><first>Clara</first><last>Rivera</last></author>
       <pages>1719-1740</pages>
       <abstract>Existing data-to-text generation datasets are mostly limited to English. To address this lack of data, we create Table-to-Text in African languages (TaTA), the first large multilingual table-to-text dataset with a focus on African languages. We created TaTA by transcribing figures and accompanying text in bilingual reports by the Demographic and Health Surveys Program, followed by professional translation to make the dataset fully parallel. TaTA includes 8,700 examples in nine languages including four African languages (Hausa, Igbo, Swahili, and Yorùbá) and a zero-shot test language (Russian). We additionally release screenshots of the original figures for future research on multilingual multi-modal approaches. Through an in-depth human evaluation, we show that TaTA is challenging for current models and that less than half the outputs from an mT5-XXL-based model are understandable and attributable to the source data. Our results highlight a) the need for validating metrics; and b) the importance of domain-specific metrics.</abstract>
@@ -16201,7 +16201,7 @@
       <author><first>Mayank</first><last>Agarwal</last></author>
       <author><first>Alexander</first><last>Shypula</last></author>
       <author><first>Bailin</first><last>Wang</last></author>
-      <author><first>Derry</first><last>Wijaya</last></author>
+      <author id="derry-tanti-wijaya"><first>Derry</first><last>Wijaya</last></author>
       <author><first>Jie</first><last>Chen</last></author>
       <author><first>Yoon</first><last>Kim</last></author>
       <pages>1741-1788</pages>
@@ -16217,7 +16217,7 @@
       <author><first>Jiacheng</first><last>Zhu</last></author>
       <author><first>Mengdi</first><last>Xu</last></author>
       <author><first>Douglas</first><last>Weber</last></author>
-      <author id="bo-li"><first>Bo</first><last>Li</last></author>
+      <author><first>Bo</first><last>Li</last></author>
       <author><first>Ding</first><last>Zhao</last></author>
       <pages>1789-1804</pages>
       <abstract>Brain Signals, such as Electroencephalography (EEG), and human languages have been widely explored independently for many downstream tasks, however, the connection between them has not been well explored. In this study, we explore the relationship and dependency between EEG and language. To study at the representation level, we introduced <b>MTAM</b>, a <b>M</b>ultimodal <b>T</b>ransformer <b>A</b>lignment <b>M</b>odel, to observe coordinated representations between the two modalities. We used various relationship alignment-seeking techniques, such as Canonical Correlation Analysis and Wasserstein Distance, as loss functions to transfigure features. On downstream applications, sentiment analysis and relation detection, we achieved new state-of-the-art results on two datasets, ZuCo and K-EmoCon. Our method achieved an F1-score improvement of 1.7% on K-EmoCon and 9.3% on Zuco datasets for sentiment analysis, and 7.4% on ZuCo for relation detection. In addition, we provide interpretations of the performance improvement: (1) feature distribution shows the effectiveness of the alignment module for discovering and encoding the relationship between EEG and language; (2) alignment weights show the influence of different language semantics as well as EEG frequency features; (3) brain topographical maps provide an intuitive demonstration of the connectivity in the brain regions. Our code is available at <url>https://github.com/Jason-Qiu/EEG_Language_Alignment</url>.</abstract>
@@ -16245,7 +16245,7 @@
       <author><first>Bowei</first><last>Zou</last></author>
       <author><first>Yifan</first><last>Fan</last></author>
       <author><first>Xibo</first><last>Li</last></author>
-      <author><first>Ai Ti</first><last>Aw</last></author>
+      <author id="aiti-aw"><first>Ai Ti</first><last>Aw</last></author>
       <author><first>Yu</first><last>Hong</last></author>
       <pages>1817-1826</pages>
       <abstract>Graph reasoning contributes to the integration of discretely-distributed attentive information (clues) for Multi-party Dialogue Reading Comprehension (MDRC). This is attributed primarily to multi-hop reasoning over global conversational structures. However, existing approaches barely apply questions for anti-noise graph reasoning. More seriously, the local semantic structures in utterances are neglected, although they are beneficial for bridging across semantically-related clues. In this paper, we propose a question-aware global-to-local graph reasoning approach. It expands the canonical Interlocutor-Utterance graph by introducing a question node, enabling comprehensive global graph reasoning. More importantly, it constructs a semantic-role graph for each utterance, and accordingly performs local graph reasoning conditioned on the semantic relations. We design a two-stage encoder network to implement the progressive reasoning from the global graph to local. The experiments on the benchmark datasets Molweni and FriendsQA show that our approach yields significant improvements, compared to BERT and ELECTRA baselines. It achieves 73.6% and 77.2% F1-scores on Molweni and FriendsQA, respectively, outperforming state-of-the-art methods that employ different pretrained language models as backbones.</abstract>
@@ -16280,7 +16280,7 @@
     <paper id="125">
       <title><fixed-case>XTREME</fixed-case>-<fixed-case>UP</fixed-case>: A User-Centric Scarce-Data Benchmark for Under-Represented Languages</title>
       <author><first>Sebastian</first><last>Ruder</last></author>
-      <author><first>Jonathan H.</first><last>Clark</last></author>
+      <author id="jonathan-h-clark"><first>Jonathan H.</first><last>Clark</last></author>
       <author><first>Alexander</first><last>Gutkin</last></author>
       <author><first>Mihir</first><last>Kale</last></author>
       <author><first>Min</first><last>Ma</last></author>
@@ -16297,7 +16297,7 @@
       <author><first>Brian</first><last>Roark</last></author>
       <author><first>Bidisha</first><last>Samanta</last></author>
       <author><first>Connie</first><last>Tao</last></author>
-      <author><first>David I.</first><last>Adelani</last></author>
+      <author id="david-ifeoluwa-adelani"><first>David I.</first><last>Adelani</last></author>
       <author><first>Vera</first><last>Axelrod</last></author>
       <author><first>Isaac</first><last>Caswell</last></author>
       <author><first>Colin</first><last>Cherry</last></author>
@@ -16305,7 +16305,7 @@
       <author><first>Reeve</first><last>Ingle</last></author>
       <author><first>Melvin</first><last>Johnson</last></author>
       <author><first>Dmitry</first><last>Panteleev</last></author>
-      <author><first>Partha</first><last>Talukdar</last></author>
+      <author id="partha-talukdar"><first>Partha</first><last>Talukdar</last></author>
       <pages>1856-1884</pages>
       <abstract>Data scarcity is a crucial issue for the development of highly multilingual NLP systems. Yet for many under-represented languages (ULs) — languages for which NLP research is particularly far behind in meeting user needs — it is feasible to annotate small amounts of data. Motivated by this, we propose XTREME-UP, a benchmark defined by: its focus on the scarce-data scenario rather than zero-shot; its focus on user-centric tasks — tasks with broad adoption by speakers of high-resource languages; and its focus on under-represented languages where this scarce-data scenario tends to be most realistic. XTREME-UP evaluates the capabilities of language models across 88 under-represented languages over 9 key user-centric technologies including ASR, OCR, MT, and information access tasks that are of general utility. We create new datasets for OCR, autocomplete, semantic parsing, and transliteration, and build on and refine existing datasets for other tasks. XTREME-UP provides methodology for evaluating many modeling scenarios including text only, multi-modal (vision, audio, and text), supervised parameter tuning, and in-context learning. We evaluate commonly used models on the benchmark. We release all code and scripts to train and evaluate models.</abstract>
       <url hash="f0c09eed">2023.findings-emnlp.125</url>
@@ -16415,7 +16415,7 @@
       <author id="zhiyu-chen-lehigh"><first>Zhiyu</first><last>Chen</last></author>
       <author><first>Sudipta</first><last>Kar</last></author>
       <author><first>Oleg</first><last>Rokhlenko</last></author>
-      <author><first>Shervin</first><last>Malmasi</last></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></author>
       <pages>2027-2051</pages>
       <abstract>We present MULTICONER V2, a dataset for fine-grained Named Entity Recognition covering 33 entity classes across 12 languages, in both monolingual and multilingual settings. This dataset aims to tackle the following practical challenges in NER: (i) effective handling of fine-grained classes that include complex entities like movie titles, and (ii) performance degradation due to noise generated from typing mistakes or OCR errors. The dataset is compiled from open resources like Wikipedia and Wikidata, and is publicly available. Evaluation based on the XLM-RoBERTa baseline highlights the unique challenges posed by MULTICONER V2: (i) the fine-grained taxonomy is challenging, where the scores are low with macro-F1=0.63 (across all languages), and (ii) the corruption strategy significantly impairs performance, with entity corruption resulting in 9% lower performance relative to non-entity corruptions across all languages. This highlights the greater impact of entity noise in contrast to context noise.</abstract>
       <url hash="7185cd60">2023.findings-emnlp.134</url>
@@ -16472,8 +16472,8 @@
       <author><first>Kangxi</first><last>Wu</last></author>
       <author><first>Liang</first><last>Pang</last></author>
       <author><first>Huawei</first><last>Shen</last></author>
-      <author><first>Xueqi</first><last>Cheng</last></author>
-      <author><first>Tat-Seng</first><last>Chua</last></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last></author>
       <pages>2113-2133</pages>
       <abstract>Generated texts from large language models (LLMs) are remarkably close to high-quality human-authored text, raising concerns about their potential misuse in spreading false information and academic misconduct. Consequently, there is an urgent need for a highly practical detection tool capable of accurately identifying the source of a given text. However, existing detection tools typically rely on access to LLMs and can only differentiate between machine-generated and human-authored text, failing to meet the requirements of fine-grained tracing, intermediary judgment, and rapid detection. Therefore, we propose LLMDet, a model-specific, secure, efficient, and extendable detection tool, that can source text from specific LLMs, such as GPT-2, OPT, LLaMA, and others. In LLMDet, we record the next-token probabilities of salient n-grams as features to calculate proxy perplexity for each LLM. By jointly analyzing the proxy perplexities of LLMs, we can determine the source of the generated text. Experimental results show that LLMDet yields impressive detection performance while ensuring speed and security, achieving 98.54% precision and about <tex-math>\times 5.0</tex-math> faster for recognizing human-authored text. Additionally, LLMDet can effortlessly extend its detection capabilities to a new open-source model. We will provide an open-source tool at <url>https://github.com/TrustedLLM/LLMDet</url>.</abstract>
       <url hash="2cd14485">2023.findings-emnlp.139</url>
@@ -16540,7 +16540,7 @@
       <author><first>Jiaxiang</first><last>Chen</last></author>
       <author><first>Yu</first><last>Hong</last></author>
       <author><first>Qingting</first><last>Xu</last></author>
-      <author><first>Jianmin</first><last>Yao</last></author>
+      <author id="jianmin-yao"><first>Jianmin</first><last>Yao</last></author>
       <pages>2190-2197</pages>
       <abstract>We tackle Aspect Term Extraction (ATE), a task of automatically extracting aspect terms from sentences. The current Pretrained Language Model (PLM) based extractors have achieved significant improvements. They primarily benefit from context-aware encoding. However, a considerable number of sentences in ATE corpora contain uninformative or low-quality contexts. Such sentences frequently act as “troublemakers” during test. In this study, we explore the context-oriented quality improvement method. Specifically, we propose to automatically rewrite the sentences from the perspectives of virtual experts with different roles, such as a “chef” in the restaurant domain. On this basis, we perform ATE over the paraphrased sentences during test, using the well-trained extractors without any change. In the experiments, we leverage ChatGPT to determine virtual experts in the considered domains, and induce ChatGPT to generate paraphrases conditioned on the roles of virtual experts. We experiment on the benchmark SemEval datasets, including Laptop-domain L14 and Restaurant-domain R14-16. The experimental results show that our approach effectively recalls the inconspicuous aspect terms like “al di la”, although it reduces the precision. In addition, it is proven that our approach can be substantially improved by redundancy elimination and multi-role voting. More importantly, our approach can be used to expand the predictions obtained on the original sentences. This yields state-of-the-art performance (i.e., F1-scores of 86.2%, 89.3%, 77.7%, 82.7% on L14 and R14-16) without retraining or fine-tuning the baseline extractors.</abstract>
       <url hash="f2636c0a">2023.findings-emnlp.144</url>
@@ -16589,7 +16589,7 @@
       <author><first>Ziqi</first><last>Zhao</last></author>
       <author><first>Zhumin</first><last>Chen</last></author>
       <author><first>Pengjie</first><last>Ren</last></author>
-      <author><first>Maarten</first><last>de Rijke</last></author>
+      <author id="maarten-de-rijke"><first>Maarten</first><last>de Rijke</last></author>
       <author><first>Zhaochun</first><last>Ren</last></author>
       <pages>2228-2240</pages>
       <abstract>Few-shot named entity recognition (NER) has shown remarkable progress in identifying entities in low-resource domains. However, few-shot NER methods still struggle with out-of-domain (OOD) examples due to their reliance on manual labeling for the target domain. To address this limitation, recent studies enable generalization to an unseen target domain with only a few labeled examples using data augmentation techniques. Two important challenges remain: First, augmentation is limited to the training data, resulting in minimal overlap between the generated data and OOD examples. Second, knowledge transfer is implicit and insufficient, severely hindering model generalizability and the integration of knowledge from the source domain. In this paper, we propose a framework, prompt learning with type-related features (PLTR), to address these challenges. To identify useful knowledge in the source domain and enhance knowledge transfer, PLTR automatically extracts entity type-related features (TRFs) based on mutual information criteria. To bridge the gap between training and OOD data, PLTR generates a unique prompt for each unseen example by selecting relevant TRFs. We show that PLTR achieves significant performance improvements on in-domain and cross-domain datasets. The use of PLTR facilitates model adaptation and increases representation similarities between the source and unseen domains.</abstract>
@@ -16717,7 +16717,7 @@
       <author><first>Xingjian</first><last>Shi</last></author>
       <author><first>Kaixiang</first><last>Lin</last></author>
       <author><first>Aston</first><last>Zhang</last></author>
-      <author><first>Andrew</first><last>Wilson</last></author>
+      <author id="andrew-wilson"><first>Andrew</first><last>Wilson</last></author>
       <pages>2414-2432</pages>
       <abstract>A particularly successful class of approaches for few-shot learning combines language models with prompts - hand-crafted task descriptions that complement data samples. However, designing prompts by hand for each task commonly requires domain knowledge and substantial guesswork. We observe, in the context of classification tasks, that instruction finetuned language models are remarkably robust towards some dimensions of a prompt’s design. We subsequently propose a simple method to eliminate the need for handcrafted prompts, named AuT-Few. This approach consists of (i) a prompt retrieval module that selects suitable task instructions from the instruction-tuning knowledge base, and (ii) the generation of two distinct, semantically meaningful, class descriptions and a selection mechanism via cross-validation. Over 12 datasets, spanning 8 classification tasks, we show that AuT-Few outperforms current state-of-the-art few-shot learning methods. Moreover, AuT-Few is the best ranking method across datasets on the RAFT few-shot benchmark. Notably, these results are achieved without task-specific handcrafted prompts on unseen tasks.</abstract>
       <url hash="c8667788">2023.findings-emnlp.158</url>
@@ -16792,7 +16792,7 @@
       <author><first>Jingcheng</first><last>Deng</last></author>
       <author><first>Liang</first><last>Pang</last></author>
       <author><first>Huawei</first><last>Shen</last></author>
-      <author><first>Xueqi</first><last>Cheng</last></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last></author>
       <pages>2500-2510</pages>
       <abstract>Retrieval-augmented language models show promise in addressing issues like outdated information and hallucinations in language models (LMs). However, current research faces two main problems: 1) determining what information to retrieve, and 2) effectively combining retrieved information during generation. We argue that valuable retrieved information should not only be related to the current source text but also consider the future target text, given the nature of LMs that model future tokens. Moreover, we propose that aggregation using latent variables derived from a compact latent space is more efficient than utilizing explicit raw text, which is limited by context length and susceptible to noise. Therefore, we introduce RegaVAE, a retrieval-augmented language model built upon the variational auto-encoder (VAE). It encodes the text corpus into a latent space, capturing current and future information from both source and target text. Additionally, we leverage the VAE to initialize the latent space and adopt the probabilistic form of the retrieval generation paradigm by expanding the Gaussian prior distribution into a Gaussian mixture distribution. Theoretical analysis provides an optimizable upper bound for RegaVAE. Experimental results on various datasets demonstrate significant improvements in text generation quality and hallucination removal.</abstract>
       <url hash="fc830c16">2023.findings-emnlp.164</url>
@@ -16916,7 +16916,7 @@
       <title>Multi-label and Multi-target Sampling of Machine Annotation for Computational Stance Detection</title>
       <author><first>Zhengyuan</first><last>Liu</last></author>
       <author><first>Hai Leong</first><last>Chieu</last></author>
-      <author><first>Nancy</first><last>Chen</last></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last></author>
       <pages>2641-2649</pages>
       <abstract>Data collection from manual labeling provides domain-specific and task-aligned supervision for data-driven approaches, and a critical mass of well-annotated resources is required to achieve reasonable performance in natural language processing tasks. However, manual annotations are often challenging to scale up in terms of time and budget, especially when domain knowledge, capturing subtle semantic features, and reasoning steps are needed. In this paper, we investigate the efficacy of leveraging large language models on automated labeling for computational stance detection. We empirically observe that while large language models show strong potential as an alternative to human annotators, their sensitivity to task-specific instructions and their intrinsic biases pose intriguing yet unique challenges in machine annotation. We introduce a multi-label and multi-target sampling strategy to optimize the annotation quality. Experimental results on the benchmark stance detection corpora show that our method can significantly improve performance and learning efficacy.</abstract>
       <url hash="2ed53329">2023.findings-emnlp.174</url>
@@ -17019,7 +17019,7 @@
       <author><first>Liang</first><last>Cheng</last></author>
       <author><first>Mohammad</first><last>Hosseini</last></author>
       <author><first>Mark</first><last>Johnson</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>2758-2774</pages>
       <abstract>Large Language Models (LLMs) are claimed to be capable of Natural Language Inference (NLI), necessary for applied tasks like question answering and summarization. We present a series of behavioral studies on several LLM families (LLaMA, GPT-3.5, and PaLM) which probe their behavior using controlled experiments. We establish two biases originating from pretraining which predict much of their behavior, and show that these are major sources of hallucination in generative LLMs. First, memorization at the level of sentences: we show that, regardless of the premise, models falsely label NLI test samples as entailing when the hypothesis is attested in training data, and that entities are used as “indices’ to access the memorized data. Second, statistical patterns of usage learned at the level of corpora: we further show a similar effect when the premise predicate is less frequent than that of the hypothesis in the training data, a bias following from previous studies. We demonstrate that LLMs perform significantly worse on NLI test samples which do not conform to these biases than those which do, and we offer these as valuable controls for future LLM evaluation.</abstract>
       <url hash="6acdf9ab">2023.findings-emnlp.182</url>
@@ -17090,7 +17090,7 @@
       <author><first>Ming</first><last>Yan</last></author>
       <author><first>Guohai</first><last>Xu</last></author>
       <author><first>Chenliang</first><last>Li</last></author>
-      <author><first>Junfeng</first><last>Tian</last></author>
+      <author id="junfeng-tian"><first>Junfeng</first><last>Tian</last></author>
       <author><first>Qi</first><last>Qian</last></author>
       <author><first>Ji</first><last>Zhang</last></author>
       <author><first>Qin</first><last>Jin</last></author>
@@ -17112,7 +17112,7 @@
       <author><first>Shihan</first><last>Dou</last></author>
       <author><first>Tao</first><last>Gui</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>2859-2873</pages>
       <abstract>Reinforcement learning from human feedback serves as a crucial bridge, aligning large language models with human and societal values. This alignment requires a vast corpus of human feedback to learn a reward model, which is subsequently used to finetune language models. However, we have identified that the reward model often finds shortcuts to bypass its intended objectives, misleadingly assuming that humans prefer longer responses. The emergence of length bias often induces the model to favor longer outputs, yet it doesn’t equate to an increase in helpful information within these outputs. In this paper, we propose an innovative solution, applying the Product-of-Experts (PoE) technique to separate reward modeling from the influence of sequence length. In our framework, the main expert concentrates on understanding human intents, while the biased expert targets the identification and capture of length bias. To further enhance the learning of bias, we introduce perturbations into the bias-focused expert, disrupting the flow of semantic information. Experimental results validate the effectiveness of our approach, indicating that language model performance is improved, irrespective of sequence length.</abstract>
       <url hash="bb09e889">2023.findings-emnlp.188</url>
@@ -17124,7 +17124,7 @@
       <author><first>Ziyue</first><last>Wang</last></author>
       <author><first>Chi</first><last>Chen</last></author>
       <author><first>Peng</first><last>Li</last></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <pages>2874-2890</pages>
       <abstract>Large Language Models (LLMs) demonstrate impressive reasoning ability and the maintenance of world knowledge not only in natural language tasks, but also in some vision-language tasks such as open-domain knowledge-based visual question answering (OK-VQA). As images are invisible to LLMs, researchers convert images to text to engage LLMs into the visual question reasoning procedure. This leads to discrepancies between images and their textual representations presented to LLMs, which consequently impedes final reasoning performance. To fill the information gap and better leverage the reasoning capability, we design a framework that enables LLMs to proactively ask relevant questions to unveil more details in the image, along with filters for refining the generated information. We validate our idea on OK-VQA and A-OKVQA. Our method continuously boosts the performance of baselines methods by an average gain of 2.15% on OK-VQA, and achieves consistent improvements across different LLMs.</abstract>
       <url hash="f0158b42">2023.findings-emnlp.189</url>
@@ -17212,7 +17212,7 @@
       <author><first>Bin</first><last>Liang</last></author>
       <author><first>Bing</first><last>Qin</last></author>
       <author><first>Min</first><last>Yang</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <author><first>Ruifeng</first><last>Xu</last></author>
       <pages>2969-2979</pages>
       <abstract>Thanks in part to the availability of copious annotated resources for some entity categories, existing studies have achieved superior performance in multimodal named entity recognition (MNER). However, in the real-world scenario, it is infeasible to enumerate all entity categories in advance. Therefore, in this paper, we formulate a new few-shot multimodal named entity recognition (FewMNER) task, which aims to effectively locate and identify named entities for a text-image pair only using a small number of labeled examples. Further, we explore the merit of in-context learning (ICL) and propose a novel framework to deal with FewMNER, where three points are taken into account: i.e., converting visual modality, selecting useful examples, and designing an effective task demonstration. Specifically, we first employ an image caption model to convert images into textual descriptions, enabling large language models to absorb information from visual modality. Then, we use the ranking of the sum of similarity rankings from both text and image modalities to select k-nearest examples, which form a demonstration context. Finally, we utilize the MNER definition and the meaning of each entity category as effective instruction. Extensive experimental results demonstrate that our framework outperforms baselines under several few-shot settings.</abstract>
@@ -17354,7 +17354,7 @@
       <author><first>Sukmin</first><last>Cho</last></author>
       <author><first>Jeongyeon</first><last>Seo</last></author>
       <author><first>Soyeong</first><last>Jeong</last></author>
-      <author><first>Jong</first><last>Park</last></author>
+      <author id="jong-c-park"><first>Jong</first><last>Park</last></author>
       <pages>3145-3157</pages>
       <abstract>Large language models (LLMs) enable zero-shot approaches in open-domain question answering (ODQA), yet with limited advancements as the reader is compared to the retriever. This study aims at the feasibility of a zero-shot reader that addresses the challenges of computational cost and the need for labeled data. We find that LLMs are distracted due to irrelevant documents in the retrieved set and the overconfidence of the generated answers when they are exploited as zero-shot readers. To tackle these problems, we mitigate the impact of such documents via Distraction-aware Answer Selection (DAS) with a negation-based instruction and score adjustment for proper answer selection. Experimental results show that our approach successfully handles distraction across diverse scenarios, enhancing the performance of zero-shot readers. Furthermore, unlike supervised readers struggling with unseen data, zero-shot readers demonstrate outstanding transferability without any training.</abstract>
       <url hash="af8f3281">2023.findings-emnlp.207</url>
@@ -17391,7 +17391,7 @@
       <title>Towards Informative Open-ended Text Generation with Dynamic Knowledge Triples</title>
       <author><first>Zixuan</first><last>Ren</last></author>
       <author><first>Yang</first><last>Zhao</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>3189-3203</pages>
       <abstract>Pretrained language models (PLMs), especially large language models (LLMs) demonstrate impressive capabilities in open-ended text generation. While our statistical results show that LLMs often suffer from over-concentrated information, where the generated texts overly focus on the given prompt and fail to provide sufficient background and detailed information as humans do. To address this issue, we propose a dynamic knowledge-guided informative open-ended text generation approach, that utilizes a knowledge graph to help the model generate more contextually related entities and detailed facts. Specifically, we first employ a local knowledge filter to extract relevant knowledge from the comprehensive knowledge graph for a given topic sentence. Then we introduce a dynamic knowledge selector to predict the entity to be mentioned in the subsequent sentence. Finally, we utilize a knowledge-enhanced text generator to produce a more informative output. To evaluate the effectiveness of our approach, we evaluate the proposed approach in two scenarios: fine-tuning for small PLMs and prompt tuning for LLMs. Experimental results show that our approach could generate more informative texts than baselines.</abstract>
       <url hash="97192a55">2023.findings-emnlp.210</url>
@@ -17491,7 +17491,7 @@
       <author><first>Zijian</first><last>Ding</last></author>
       <author><first>Alison</first><last>Smith-Renner</last></author>
       <author><first>Wenjuan</first><last>Zhang</last></author>
-      <author><first>Joel</first><last>Tetreault</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
       <author><first>Alejandro</first><last>Jaimes</last></author>
       <pages>3321-3339</pages>
       <abstract>To explore how humans can best leverage LLMs for writing and how interacting with these models affects feelings of ownership and trust in the writing process, we compared common human-AI interaction types (e.g., guiding system, selecting from system outputs, post-editing outputs) in the context of LLM-assisted news headline generation. While LLMs alone can generate satisfactory news headlines, on average, human control is needed to fix undesirable model outputs. Of the interaction methods, guiding and selecting model output added the most benefit with the lowest cost (in time and effort). Further, AI assistance did not harm participants’ perception of control compared to freeform editing.</abstract>
@@ -17657,7 +17657,7 @@
     <paper id="230">
       <title>Detecting Syntactic Change with Pre-trained Transformer Models</title>
       <author><first>Liwen</first><last>Hou</last></author>
-      <author><first>David</first><last>Smith</last></author>
+      <author id="david-a-smith"><first>David</first><last>Smith</last></author>
       <pages>3564-3574</pages>
       <abstract>We investigate the ability of Transformer-based language models to find syntactic differences between the English of the early 1800s and that of the late 1900s. First, we show that a fine-tuned BERT model can distinguish between text from these two periods using syntactic information only; to show this, we employ a strategy to hide semantic information from the text. Second, we make further use of fine-tuned BERT models to identify specific instances of syntactic change and specific words for which a new part of speech was introduced. To do this, we employ an automatic part-of-speech (POS) tagger and use it to train corpora-specific taggers based only on BERT representations pretrained on different corpora. Notably, our methods of identifying specific candidates for syntactic change avoid using any automatic POS tagger on old text, where its performance may be unreliable; instead, our methods only use untagged old text together with tagged modern text. We examine samples and distributional properties of the model output to validate automatically identified cases of syntactic change. Finally, we use our techniques to confirm the historical rise of the progressive construction, a known example of syntactic change.</abstract>
       <url hash="83d35615">2023.findings-emnlp.230</url>
@@ -17706,7 +17706,7 @@
       <title>Exploring Graph Pre-training for Aspect-based Sentiment Analysis</title>
       <author><first>Xiaoyi</first><last>Bao</last></author>
       <author><first>Zhongqing</first><last>Wang</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>3623-3634</pages>
       <abstract>Existing studies tend to extract the sentiment elements in a generative manner in order to avoid complex modeling. Despite their effectiveness, they ignore importance of the relationships between sentiment elements that could be crucial, making the large pre-trained generative models sub-optimal for modeling sentiment knowledge. Therefore, we introduce two pre-training paradigms to improve the generation model by exploring graph pre-training that targeting to strengthen the model in capturing the elements’ relationships. Specifically, We first employ an Element-level Graph Pre-training paradigm, which is designed to improve the structure awareness of the generative model. Then, we design a Task Decomposition Pre-training paradigm to make the generative model generalizable and robust against various irregular sentiment quadruples. Extensive experiments show the superiority of our proposed method, validate the correctness of our motivation.</abstract>
       <url hash="ed263e09">2023.findings-emnlp.234</url>
@@ -17719,8 +17719,8 @@
       <author><first>Xiaobao</first><last>Wu</last></author>
       <author><first>Xinshuai</first><last>Dong</last></author>
       <author><first>Cong-Duy</first><last>Nguyen</last></author>
-      <author><first>See-Kiong</first><last>Ng</last></author>
-      <author><first>Anh</first><last>Luu</last></author>
+      <author id="see-kiong-ng"><first>See-Kiong</first><last>Ng</last></author>
+      <author id="luu-anh-tuan"><first>Anh</first><last>Luu</last></author>
       <pages>3635-3649</pages>
       <abstract>Temporal Language Grounding seeks to localize video moments that semantically correspond to a natural language query. Recent advances employ the attention mechanism to learn the relations between video moments and the text query. However, naive attention might not be able to appropriately capture such relations, resulting in ineffective distributions where target video moments are difficult to separate from the remaining ones. To resolve the issue, we propose an energy-based model framework to explicitly learn moment-query distributions. Moreover, we propose DemaFormer, a novel Transformer-based architecture that utilizes exponential moving average with a learnable damping factor to effectively encode moment-query inputs. Comprehensive experiments on four public temporal language grounding datasets showcase the superiority of our methods over the state-of-the-art baselines.</abstract>
       <url hash="8a87d914">2023.findings-emnlp.235</url>
@@ -17774,7 +17774,7 @@
       <author><first>Cenyuan</first><last>Zhang</last></author>
       <author><first>Chengsong</first><last>Huang</last></author>
       <author><first>Hua</first><last>Cai</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>3685-3694</pages>
       <abstract>Large pre-trained language models (PLMs) have achieved remarkable success, making them highly valuable intellectual property due to their expensive training costs. Consequently, model watermarking, a method developed to protect the intellectual property of neural models, has emerged as a crucial yet underexplored technique. The problem of watermarking PLMs has remained unsolved since the parameters of PLMs will be updated when fine-tuned on downstream datasets, and then embedded watermarks could be removed easily due to the catastrophic forgetting phenomenon. This study investigates the feasibility of watermarking PLMs by embedding backdoors that can be triggered by specific inputs. We employ contrastive learning during the watermarking phase, allowing the representations of specific inputs to be isolated from others and mapped to a particular label after fine-tuning. Moreover, we demonstrate that by combining weight perturbation with the proposed method, watermarks can be embedded in a flatter region of the loss landscape, thereby increasing their robustness to watermark removal. Extensive experiments on multiple datasets demonstrate that the embedded watermarks can be robustly extracted without any knowledge about downstream tasks, and with a high success rate.</abstract>
       <url hash="acde7072">2023.findings-emnlp.239</url>
@@ -17821,8 +17821,8 @@
       <author><first>Yuan</first><last>Gong</last></author>
       <author><first>Yoon</first><last>Kim</last></author>
       <author><first>Xixin</first><last>Wu</last></author>
-      <author><first>Helen</first><last>Meng</last></author>
-      <author><first>James</first><last>Glass</last></author>
+      <author id="helen-meng"><first>Helen</first><last>Meng</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
       <pages>3717-3729</pages>
       <abstract>Large language models (LLMs) have been significantly improved by instruction fine-tuning, but still lack transparency and the ability to utilize up-to-date knowledge and information. In this work, we propose search-augmented instruction learning (SAIL), which grounds the language generation and instruction following abilities on complex search results generated by in-house and external search engines. With an instruction tuning corpus, we collect search results for each training case from different search APIs and domains, and construct a new search-grounded training set containing (instruction, grounding information, response) triplets. We then fine-tune the LLaMA-7B model on the constructed training set. Since the collected results contain unrelated and disputing languages, the model needs to learn to ground on trustworthy search results, filter out distracting passages, and generate the target response. The search result-denoising process entails explicit trustworthy information selection and multi-hop reasoning, since the retrieved passages might be informative but not contain the instruction-following answer. Experiments show that the fine-tuned SAIL-7B model has a strong instruction-following ability, and it performs significantly better on transparency-sensitive tasks, including open-ended question answering and fact checking.</abstract>
       <url hash="a42d9088">2023.findings-emnlp.242</url>
@@ -17851,7 +17851,7 @@
       <author><first>Ruotian</first><last>Ma</last></author>
       <author><first>Tao</first><last>Gui</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>3749-3762</pages>
       <abstract>Pre-trained language models (PLMs) are often deployed as cloud services, enabling users to upload textual data and perform inference remotely. However, users’ personal text often contains sensitive information, and sharing such data directly with the service providers can lead to serious privacy leakage. To address this problem, we introduce a novel privacy-preserving inference framework called <b>
           <i>MixPi</i>
@@ -17864,7 +17864,7 @@
       <title><fixed-case>F</fixed-case>ine<fixed-case>P</fixed-case>rompt: Unveiling the Role of Finetuned Inductive Bias on Compositional Reasoning in <fixed-case>GPT</fixed-case>-4</title>
       <author><first>Jeonghwan</first><last>Kim</last></author>
       <author><first>Giwon</first><last>Hong</last></author>
-      <author><first>Sung-Hyon</first><last>Myaeng</last></author>
+      <author id="sung-hyon-myaeng"><first>Sung-Hyon</first><last>Myaeng</last></author>
       <author><first>Joyce</first><last>Whang</last></author>
       <pages>3763-3775</pages>
       <abstract>Compositional reasoning across texts has been a long-standing challenge in natural language processing. With large language models like GPT-4 taking over the field, prompting techniques such as chain-of-thought (CoT) were proposed to unlock compositional, multi-step reasoning capabilities of LLMs. Despite their success, the prompts demand significant human effort to discover and validate them. Our work draws attention to the idea of transferring task-specific inductive biases from finetuned models to prompts, as a way of improving GPT-4’s compositional reasoning capabilities. To leverage these inductive biases, we formulate prompt templates to ease the transfer of inductive biases. The experimental results on multi-hop question answering and numerical reasoning over text show that our proposed prompt scheme shows competitive zero-shot and few-shot performances compared to existing prompts on complicated reasoning tasks, highlighting the importance of adopting the validated biases of the previous paradigm.</abstract>
@@ -17981,7 +17981,7 @@
       <author><first>Zhaohui</first><last>Li</last></author>
       <author><first>Susan</first><last>Lloyd</last></author>
       <author><first>Matthew</first><last>Beckman</last></author>
-      <author><first>Rebecca</first><last>Passonneau</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca</first><last>Passonneau</last></author>
       <pages>3879-3891</pages>
       <abstract>STEM educators must trade off the ease of assessing selected response (SR) questions, like multiple choice, with constructed response (CR) questions, where students articulate their own reasoning. Our work addresses a CR type new to NLP but common in college STEM, consisting of multiple questions per context. To relate the context, the questions, the reference responses, and students’ answers, we developed an Answer-state Recurrent Relational Network (AsRRN). In recurrent time-steps, relation vectors are learned for specific dependencies in a computational graph, where the nodes encode the distinct types of text input. AsRRN incorporates contrastive loss for better representation learning, which improves performance and supports student feedback. AsRRN was developed on a new dataset of 6,532 student responses to three, two-part CR questions. AsRRN outperforms classifiers based on LLMs, a previous relational network for CR questions, and few-shot learning with GPT-3.5. Ablation studies show the distinct contributions of AsRRN’s dependency structure, the number of time steps in the recurrence, and the contrastive loss.</abstract>
       <url hash="2278ebec">2023.findings-emnlp.254</url>
@@ -17996,7 +17996,7 @@
       <author><first>Kaisong</first><last>Song</last></author>
       <author><first>Yangyang</first><last>Kang</last></author>
       <author><first>Jiaxiang</first><last>Chen</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>3892-3897</pages>
       <abstract>Comparative Opinion Quintuple Extraction (COQE) aims to predict comparative opinion quintuples from comparative sentences. These quintuples include subject, object, shareable aspect, comparative opinion, and preference. The existing pipeline-based COQE method fails in error propagation. In addition, the complexity and insufficient amounts of annotated data hinder the performance of COQE models. In this paper, we introduce a novel approach called low-resource comparative opinion quintuple extraction by Data Augmentation with Prompting (DAP). Firstly, we present an end-to-end model architecture better suited to the data augmentation method from triplets to quintuples and can effectively avoid error propagation. Additionally, we introduce a data-centric augmentation approach that leverages the robust generative abilities of ChatGPT and integrates transfer learning techniques. Experimental results over three datasets (Camera, Car, Ele) demonstrate that our approach yields substantial improvements and achieves state-of-the-art results. The source code and data are publicly released at: https://github.com/qtxu-nlp/COQE-DAP.</abstract>
       <url hash="9570c937">2023.findings-emnlp.255</url>
@@ -18090,7 +18090,7 @@
       <author><first>Yuxin</first><last>Yang</last></author>
       <author><first>Yanqi</first><last>Shi</last></author>
       <author><first>Zhen</first><last>Huang</last></author>
-      <author id="dongsheng-li"><first>Dongsheng</first><last>Li</last></author>
+      <author><first>Dongsheng</first><last>Li</last></author>
       <pages>3980-3998</pages>
       <abstract>Conditional story generation is significant in human-machine interaction, particularly in producing stories with complex plots. While Large language models (LLMs) perform well on multiple NLP tasks, including story generation, it is challenging to generate stories with both complex and creative plots. Existing methods often rely on detailed prompts to guide LLMs to meet target conditions, which inadvertently restrict the creative potential of the generated stories. We argue that leveraging information from exemplary human-written stories facilitates generating more diverse plotlines. Delving deeper into story details helps build complex and credible plots. In this paper, we propose a retrieval-auGmented stoRy generation framework with a fOrest of eVidEnce (GROVE) to enhance stories’ complexity. We build a retrieval repository for target conditions to produce few-shot examples to prompt LLMs. Additionally, we design an “asking-why” prompting scheme that extracts a forest of evidence, providing compensation for the ambiguities that may occur in the generated story. This iterative process uncovers underlying story backgrounds. Finally, we select the most fitting chains of evidence from the evidence forest and integrate them into the generated story, thereby enhancing the narrative’s complexity and credibility. Experimental results and numerous examples verify the effectiveness of our method.</abstract>
       <url hash="f8b9209a">2023.findings-emnlp.262</url>
@@ -18144,7 +18144,7 @@
       <title>Hallucination Detection for Grounded Instruction Generation</title>
       <author><first>Lingjun</first><last>Zhao</last></author>
       <author><first>Khanh</first><last>Nguyen</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <pages>4044-4053</pages>
       <abstract>We investigate the problem of generating instructions to guide humans to navigate in simulated residential environments. A major issue with current models is hallucination: they generate references to actions or objects that are inconsistent with what a human follower would perform or encounter along the described path. We develop a model that detects these hallucinated references by adopting a model pre-trained on a large corpus of image-text pairs, and fine-tuning it with a contrastive loss that separates correct instructions from instructions containing synthesized hallucinations. Our final model outperforms several baselines, including using word probability estimated by the instruction-generation model, and supervised models based on LSTM and Transformer.</abstract>
       <url hash="9389f8e2">2023.findings-emnlp.266</url>
@@ -18158,7 +18158,7 @@
       <author><first>Damir</first><last>Korenčić</last></author>
       <author><first>Ivan</first><last>Grubisic</last></author>
       <author><first>Paolo</first><last>Papotti</last></author>
-      <author><first>Raphael</first><last>Troncy</last></author>
+      <author id="raphael-troncy"><first>Raphael</first><last>Troncy</last></author>
       <author><first>Paolo</first><last>Rosso</last></author>
       <pages>4054-4063</pages>
       <abstract>Large language models have recently risen in popularity due to their ability to perform many natural language tasks without requiring any fine-tuning. In this work, we focus on two novel ideas: (1) generating definitions from examples and using them for zero-shot classification, and (2) investigating how an LLM makes use of the definitions. We thoroughly analyze the performance of GPT-3 model for fine-grained multi-label conspiracy theory classification of tweets using zero-shot labeling. In doing so, we asses how to improve the labeling by providing minimal but meaningful context in the form of the definitions of the labels. We compare descriptive noun phrases, human-crafted definitions, introduce a new method to help the model generate definitions from examples, and propose a method to evaluate GPT-3’s understanding of the definitions. We demonstrate that improving definitions of class labels has a direct consequence on the downstream classification results.</abstract>
@@ -18205,7 +18205,7 @@
     <paper id="271">
       <title>Evaluating Emotion Arcs Across Languages: Bridging the Global Divide in Sentiment Analysis</title>
       <author><first>Daniela</first><last>Teodorescu</last></author>
-      <author><first>Saif</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
       <pages>4124-4137</pages>
       <abstract>Emotion arcs capture how an individual (or a population) feels over time. They are widely used in industry and research; however, there is little work on evaluating the automatically generated arcs. This is because of the difficulty of establishing the true (gold) emotion arc. Our work, for the first time, systematically and quantitatively evaluates automatically generated emotion arcs. We also compare two common ways of generating emotion arcs: Machine-Learning (ML) models and Lexicon-Only (LexO) methods. By running experiments on 18 diverse datasets in 9 languages, we show that despite being markedly poor at instance level emotion classification, LexO methods are highly accurate at generating emotion arcs when aggregating information from hundreds of instances. We also show, through experiments on six indigenous African languages, as well as Arabic, and Spanish, that automatic translations of English emotion lexicons can be used to generate high-quality emotion arcs in less-resource languages. This opens up avenues for work on emotions in languages from around the world; which is crucial for commerce, public policy, and health research in service of speakers often left behind. Code and resources: https://github.com/dteodore/EmotionArcs</abstract>
       <url hash="d293cb3a">2023.findings-emnlp.271</url>
@@ -18231,7 +18231,7 @@
       <title>Chain-of-Thought Embeddings for Stance Detection on Social Media</title>
       <author><first>Joseph</first><last>Gatto</last></author>
       <author><first>Omar</first><last>Sharif</last></author>
-      <author><first>Sarah M.</first><last>Preum</last></author>
+      <author id="sarah-masud-preum"><first>Sarah M.</first><last>Preum</last></author>
       <pages>4154-4161</pages>
       <abstract>Stance detection on social media is challenging for Large Language Models (LLMs), as emerging slang and colloquial language in online conversations often contain deeply implicit stance labels. Chain-of-Thought (COT) prompting has recently been shown to improve performance on stance detection tasks — alleviating some of these issues. However, COT prompting still struggles with implicit stance identification. This challenge arises because many samples are initially challenging to comprehend before a model becomes familiar with the slang and evolving knowledge related to different topics, all of which need to be acquired through the training data. In this study, we address this problem by introducing COT Embeddings which improve COT performance on stance detection tasks by embedding COT reasonings and integrating them into a traditional RoBERTa-based stance detection pipeline. Our analysis demonstrates that 1) text encoders can leverage COT reasonings with minor errors or hallucinations that would otherwise distort the COT output label. 2) Text encoders can overlook misleading COT reasoning when a sample’s prediction heavily depends on domain-specific patterns. Our model achieves SOTA performance on multiple stance detection datasets collected from social media.</abstract>
       <url hash="c713e584">2023.findings-emnlp.273</url>
@@ -18315,7 +18315,7 @@
     <paper id="279">
       <title>Women Wearing Lipstick: Measuring the Bias Between an Object and Its Related Gender</title>
       <author><first>Ahmed</first><last>Sabir</last></author>
-      <author><first>Lluís</first><last>Padró</last></author>
+      <author id="lluis-padro"><first>Lluís</first><last>Padró</last></author>
       <pages>4234-4240</pages>
       <abstract>In this paper, we investigate the impact of objects on gender bias in image captioning systems. Our results show that only gender-specific objects have a strong gender bias (e.g., women-lipstick). In addition, we propose a visual semantic-based gender score that measures the degree of bias and can be used as a plug-in for any image captioning system. Our experiments demonstrate the utility of the gender score, since we observe that our score can measure the bias relation between a caption and its related gender; therefore, our score can be used as an additional metric to the existing Object Gender Co-Occ approach.</abstract>
       <url hash="d616408a">2023.findings-emnlp.279</url>
@@ -18378,7 +18378,7 @@
     </paper>
     <paper id="284">
       <title>Empowering Psychotherapy with Large Language Models: Cognitive Distortion Detection through Diagnosis of Thought Prompting</title>
-      <author id="zhiyu-chen"><first>Zhiyu</first><last>Chen</last></author>
+      <author><first>Zhiyu</first><last>Chen</last></author>
       <author><first>Yujie</first><last>Lu</last></author>
       <author><first>William</first><last>Wang</last></author>
       <pages>4295-4304</pages>
@@ -18479,8 +18479,8 @@
       <author><first>Liang</first><last>Pang</last></author>
       <author><first>Zihao</first><last>Wei</last></author>
       <author><first>Huawei</first><last>Shen</last></author>
-      <author><first>Xueqi</first><last>Cheng</last></author>
-      <author><first>Tat-Seng</first><last>Chua</last></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last></author>
       <pages>4424-4436</pages>
       <abstract>Multi-aspect controllable text generation aims to generate fluent sentences that possess multiple desired attributes simultaneously. Traditional methods either require expensive iteration / searching within the discrete text space during the decoding stage, or train separate controllers for each aspect, resulting in a degradation of text quality due to the discrepancy between different aspects. To address these limitations, we introduce a novel approach for <tex-math>\textbf{M}</tex-math>ulti-<tex-math>\textbf{a}</tex-math>spect <tex-math>\textbf{c}</tex-math>ontrol, namely MacLaSa, that estimates compact <tex-math>\textbf{La}</tex-math>tent space for multiple aspects, and performs efficient <tex-math>\textbf{Sa}</tex-math>mpling with a fast sampler. To eliminate the domain discrepancies between different aspects, we first utilize a variational autoencoder (VAE) network to map text sequences from various data sources into close latent representations. The estimated latent space enables the formulation of joint energy-based models and the plugging in of arbitrary attribute discriminators to achieve multi-aspect control. Afterwards, we draw latent samples with a fast sampler based on ordinary differential equations and feed sampled examples to the VAE decoder to produce target text sequences. Experimental results demonstrate that MacLaSa outperforms strong baselines on both attribute relevance and textual quality while maintaining a high inference speed.</abstract>
       <url hash="7475a45d">2023.findings-emnlp.292</url>
@@ -18492,9 +18492,9 @@
       <author><first>Ye</first><last>Liu</last></author>
       <author><first>Semih</first><last>Yavuz</last></author>
       <author><first>Rui</first><last>Meng</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <author><first>Caiming</first><last>Xiong</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <author><first>Yingbo</first><last>Zhou</last></author>
       <pages>4437-4451</pages>
       <abstract>The dominant paradigm of textual question answering systems is based on end-to-end neural networks, which excels at answering natural language questions but falls short on complex ones. This stands in contrast to the broad adaptation of semantic parsing approaches over structured data sources (e.g., relational database, knowledge graphs), that convert natural language questions to logical forms and execute them with query engines. Towards combining the strengths of neural and symbolic methods, we propose a framework of question parsing and execution on textual QA. It comprises two central pillars: (1) We parse the question of varying complexity into an intermediate representation, named H-expression, which is composed of simple questions as the primitives and symbolic operations representing the relationships among them; (2) To execute the resulting H-expressions, we design a hybrid executor, which integrates the deterministic rules to translate the symbolic operations with a drop-in neural reader network to answer each decomposed simple question. Hence, the proposed framework can be viewed as a top-down question parsing followed by a bottom-up answer backtracking. The resulting H-expressions closely guide the execution process, offering higher precision besides better interpretability while still preserving the advantages of the neural readers for resolving its primitive elements. Our extensive experiments on MuSiQue, 2WikiQA, HotpotQA, and NQ show that the proposed parsing and hybrid execution framework outperforms existing approaches in supervised, few-shot, and zero-shot settings, while also effectively exposing its underlying reasoning process.</abstract>
@@ -18568,7 +18568,7 @@
     <paper id="299">
       <title>Statistically Profiling Biases in Natural Language Reasoning Datasets and Models</title>
       <author><first>Shanshan</first><last>Huang</last></author>
-      <author><first>Kenny</first><last>Zhu</last></author>
+      <author id="kenny-zhu"><first>Kenny</first><last>Zhu</last></author>
       <pages>4521-4530</pages>
       <abstract>Recent studies have shown that many natural language understanding and reasoning datasets contain statistical cues that can be exploited by NLP models, resulting in an overestimation of their capabilities. Existing methods, such as “hypothesis-only” tests and CheckList, are limited in identifying these cues and evaluating model weaknesses. We introduce ICQ (I-See-Cue), a lightweight, general statistical profiling framework that automatically identifies potential biases in multiple-choice NLU datasets without requiring additional test cases. ICQ assesses the extent to which models exploit these biases through black-box testing, addressing the limitations of current methods. In this work, we conduct a comprehensive evaluation of statistical biases in 10 popular NLU datasets and 4 models, confirming prior findings, revealing new insights, and offering an online demonstration system to encourage users to assess their own datasets and models. Furthermore, we present a case study on investigating ChatGPT’s bias, providing valuable recommendations for practical applications.</abstract>
       <url hash="ff20ee3b">2023.findings-emnlp.299</url>
@@ -18606,7 +18606,7 @@
       <author><first>Alisa</first><last>Liu</last></author>
       <author><first>Orevaoghene</first><last>Ahia</last></author>
       <author><first>Hila</first><last>Gonen</last></author>
-      <author><first>Noah</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah</first><last>Smith</last></author>
       <pages>4555-4569</pages>
       <abstract>The translation of ambiguous text presents a challenge for translation systems, as it requires using the surrounding context to disambiguate the intended meaning as much as possible. While prior work has studied ambiguities that result from different grammatical features of the source and target language, we study semantic ambiguities that exist in the source (English in this work) itself. In particular, we focus on idioms that are open to both literal and figurative interpretations (e.g., goose egg), and collect TIDE, a dataset of 512 pairs of English sentences containing idioms with disambiguating context such that one is literal (it laid a goose egg) and another is figurative (they scored a goose egg, as in a score of zero). In experiments, we compare MT-specific models and language models for (i) their preference when given an ambiguous subsentence, (ii) their sensitivity to disambiguating context, and (iii) the performance disparity between figurative and literal source sentences. We find that current MT models consistently translate English idioms literally, even when the context suggests a figurative interpretation. On the other hand, LMs are far more context-aware, although there remain disparities across target languages. Our findings underline the potential of LMs as a strong backbone for context-aware translation.</abstract>
       <url hash="4ad0713c">2023.findings-emnlp.302</url>
@@ -18650,8 +18650,8 @@
     <paper id="306">
       <title>More than Votes? Voting and Language based Partisanship in the <fixed-case>US</fixed-case> <fixed-case>S</fixed-case>upreme <fixed-case>C</fixed-case>ourt</title>
       <author><first>Biaoyan</first><last>Fang</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Lea</first><last>Frermann</last></author>
       <pages>4604-4614</pages>
       <abstract>Understanding the prevalence and dynamics of justice partisanship and ideology in the US Supreme Court is critical in studying jurisdiction. Most research quantifies partisanship based on voting behavior, and oral arguments in the courtroom — the last essential procedure before the final case outcome — have not been well studied for this purpose. To address this gap, we present a framework for analyzing the language of justices in the courtroom for partisan signals, and study how partisanship in speech aligns with voting patterns. Our results show that the affiliated party of justices can be predicted reliably from their oral contributions. We further show a strong correlation between language partisanship and voting ideology.</abstract>
@@ -18729,7 +18729,7 @@
       <title>Mind the Gap: Automated Corpus Creation for Enthymeme Detection and Reconstruction in Learner Arguments</title>
       <author><first>Maja</first><last>Stahl</last></author>
       <author><first>Nick</first><last>Düsterhus</last></author>
-      <author><first>Mei-Hua</first><last>Chen</last></author>
+      <author id="mei-hua-chen"><first>Mei-Hua</first><last>Chen</last></author>
       <author><first>Henning</first><last>Wachsmuth</last></author>
       <pages>4703-4717</pages>
       <abstract>Writing strong arguments can be challenging for learners. It requires to select and arrange multiple argumentative discourse units (ADUs) in a logical and coherent way as well as to decide which ADUs to leave implicit, so called enthymemes. However, when important ADUs are missing, readers might not be able to follow the reasoning or understand the argument’s main point. This paper introduces two new tasks for learner arguments: to identify gaps in arguments (enthymeme detection) and to fill such gaps (enthymeme reconstruction). Approaches to both tasks may help learners improve their argument quality. We study how corpora for these tasks can be created automatically by deleting ADUs from an argumentative text that are central to the argument and its quality, while maintaining the text’s naturalness. Based on the ICLEv3 corpus of argumentative learner essays, we create 40,089 argument instances for enthymeme detection and reconstruction. Through manual studies, we provide evidence that the proposed corpus creation process leads to the desired quality reduction, and results in arguments that are similarly natural to those written by learners. Finally, first baseline approaches to enthymeme detection and reconstruction demonstrate the corpus’ usefulness.</abstract>
@@ -18755,13 +18755,13 @@
       <author><first>Hailin</first><last>Chen</last></author>
       <author><first>Weishi</first><last>Wang</last></author>
       <author><first>Fangkai</first><last>Jiao</last></author>
-      <author><first>Xuan Long</first><last>Do</last></author>
+      <author id="xuan-long-do"><first>Xuan Long</first><last>Do</last></author>
       <author><first>Chengwei</first><last>Qin</last></author>
       <author><first>Bosheng</first><last>Ding</last></author>
       <author><first>Xiaobao</first><last>Guo</last></author>
       <author><first>Minzhi</first><last>Li</last></author>
       <author><first>Xingxuan</first><last>Li</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <pages>4736-4756</pages>
       <abstract>As Large Language Models (LLMs) become popular, there emerged an important trend of using multimodality to augment the LLMs’ generation ability, which enables LLMs to better interact with the world. However, there lacks a unified perception of at which stage and how to incorporate different modalities. In this survey, we review methods that assist and augment generative models by retrieving multimodal knowledge, whose formats range from images, codes, tables, graphs, to audio. Such methods offer a promising solution to important concerns such as factuality, reasoning, interpretability, and robustness. By providing an in-depth review, this survey is expected to provide scholars with a deeper understanding of the methods’ applications and encourage them to adapt existing techniques to the fast-growing field of LLMs.</abstract>
       <url hash="3d34ec89">2023.findings-emnlp.314</url>
@@ -18946,7 +18946,7 @@
       <author><first>Jiarui</first><last>Li</last></author>
       <author><first>Xiaolei</first><last>Gu</last></author>
       <author><first>Shizhu</first><last>He</last></author>
-      <author id="bo-li"><first>Bo</first><last>Li</last></author>
+      <author><first>Bo</first><last>Li</last></author>
       <author><first>Jianxin</first><last>Li</last></author>
       <pages>4929-4941</pages>
       <abstract>Pre-trained sentence representations are crucial for identifying significant sentences in unsupervised document extractive summarization. However, the traditional two-step paradigm of pre-training and sentence-ranking, creates a gap due to differing optimization objectives. To address this issue, we argue that utilizing pre-trained embeddings derived from a process specifically designed to optimize informative and distinctive sentence representations helps rank significant sentences. To do so, we propose a novel graph pre-training auto-encoder to obtain sentence embeddings by explicitly modelling intra-sentential distinctive features and inter-sentential cohesive features through sentence-word bipartite graphs. These fine-tuned sentence embeddings are then utilized in a graph-based ranking algorithm for unsupervised summarization. Our method is a plug-and-play pre-trained model that produces predominant performance for unsupervised summarization frameworks by providing summary-worthy sentence representations. It surpasses heavy BERT- or RoBERTa-based sentence representations in downstream tasks.</abstract>
@@ -18973,7 +18973,7 @@
       <author><first>Mei</first><last>Tu</last></author>
       <author><first>Yang</first><last>Zhao</last></author>
       <author><first>Yu</first><last>Zhou</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>4959-4965</pages>
       <abstract>Text image machine translation (TIMT) which translates source language text images into target language texts has attracted intensive attention in recent years. Although the end-to-end TIMT model directly generates target translation from encoded text image features with an efficient architecture, it lacks the recognized source language information resulting in a decrease in translation performance. In this paper, we propose a novel Cross-modal Cross-lingual Interactive Model (CCIM) to incorporate source language information by synchronously generating source language and target language results through an interactive attention mechanism between two language decoders. Extensive experimental results have shown the interactive decoder significantly outperforms end-to-end TIMT models and has faster decoding speed with smaller model size than cascade models.</abstract>
       <url hash="5e043aa8">2023.findings-emnlp.330</url>
@@ -19070,7 +19070,7 @@
       <author><first>Yuchen</first><last>Tian</last></author>
       <author><first>Yunzhe</first><last>Li</last></author>
       <author><first>Qian</first><last>Chen</last></author>
-      <author><first>Wen</first><last>Wang</last></author>
+      <author id="wen-wang"><first>Wen</first><last>Wang</last></author>
       <pages>5067-5089</pages>
       <abstract>Recent code translation techniques exploit neural machine translation models to translate source code from one programming language to another to satisfy production compatibility or to improve efficiency of codebase maintenance. Most existing code translation datasets only focus on a single pair of popular programming languages. To advance research on code translation and meet diverse requirements of real-world applications, we construct **CodeTransOcean**, a large-scale comprehensive benchmark that supports the largest variety of programming languages for code translation. CodeTransOcean consists of three novel multilingual datasets, namely, **MultilingualTrans** supporting translations between multiple popular programming languages, **NicheTrans** for translating between niche programming languages and popular ones, and **LLMTrans** for evaluating executability of translated code by large language models (LLMs). CodeTransOcean also includes a novel cross-framework dataset, **DLTrans**, for translating deep learning code across different frameworks. We develop multilingual modeling approaches for code translation and demonstrate their great potential in improving the translation quality of both low-resource and high-resource language pairs and boosting the training efficiency. We also propose a novel evaluation metric **Debugging Success Rate@K** for program-level code translation. Last but not least, we evaluate LLM ChatGPT on our datasets and investigate its potential for fuzzy execution predictions. We build baselines for CodeTransOcean and analyze challenges of code translation for guiding future research. The CodeTransOcean datasets and code are publicly available at https://github.com/WeixiangYAN/CodeTransOcean.</abstract>
       <url hash="25da2107">2023.findings-emnlp.337</url>
@@ -19103,7 +19103,7 @@
     <paper id="340">
       <title>Robustness Tests for Automatic Machine Translation Metrics with Adversarial Attacks</title>
       <author><first>Yichen</first><last>Huang</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>5126-5135</pages>
       <abstract>We investigate MT evaluation metric performance on adversarially-synthesized texts, to shed light on metric robustness. We experiment with word- and character-level attacks on three popular machine translation metrics: BERTScore, BLEURT, and COMET. Our human experiments validate that automatic metrics tend to overpenalize adversarially-degraded translations. We also identify inconsistencies in BERTScore ratings, where it judges the original sentence and the adversarially-degraded one as similar, while judging the degraded translation as notably worse than the original with respect to the reference. We identify patterns of brittleness that motivate more robust metric development.</abstract>
       <url hash="003d72c8">2023.findings-emnlp.340</url>
@@ -19147,7 +19147,7 @@
       <title><fixed-case>D</fixed-case>oc<fixed-case>T</fixed-case>rack: A Visually-Rich Document Dataset Really Aligned with Human Eye Movement for Machine Reading</title>
       <author><first>Hao</first><last>Wang</last></author>
       <author><first>Qingxuan</first><last>Wang</last></author>
-      <author id="yue-li"><first>Yue</first><last>Li</last></author>
+      <author><first>Yue</first><last>Li</last></author>
       <author><first>Changqing</first><last>Wang</last></author>
       <author><first>Chenhui</first><last>Chu</last></author>
       <author><first>Rui</first><last>Wang</last></author>
@@ -19252,7 +19252,7 @@
       <title>Better Together: Enhancing Generative Knowledge Graph Completion with Language Models and Neighborhood Information</title>
       <author><first>Alla</first><last>Chepurova</last></author>
       <author><first>Aydar</first><last>Bulatov</last></author>
-      <author><first>Yuri</first><last>Kuratov</last></author>
+      <author id="yurii-kuratov"><first>Yuri</first><last>Kuratov</last></author>
       <author><first>Mikhail</first><last>Burtsev</last></author>
       <pages>5306-5316</pages>
       <abstract>Real-world Knowledge Graphs (KGs) often suffer from incompleteness, which limits their potential performance. Knowledge Graph Completion (KGC) techniques aim to address this issue. However, traditional KGC methods are computationally intensive and impractical for large-scale KGs, necessitating the learning of dense node embeddings and computing pairwise distances. Generative transformer-based language models (e.g., T5 and recent KGT5) offer a promising solution as they can predict the tail nodes directly. In this study, we propose to include node neighborhoods as additional information to improve KGC methods based on language models. We examine the effects of this imputation and show that, on both inductive and transductive Wikidata subsets, our method outperforms KGT5 and conventional KGC approaches. We also provide an extensive analysis of the impact of neighborhood on model prediction and show its importance. Furthermore, we point the way to significantly improve KGC through more effective neighborhood selection.</abstract>
@@ -19264,7 +19264,7 @@
       <title><fixed-case>D</fixed-case>elta<fixed-case>S</fixed-case>core: Fine-Grained Story Evaluation with Perturbations</title>
       <author><first>Zhuohan</first><last>Xie</last></author>
       <author><first>Miao</first><last>Li</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <author><first>Jey</first><last>Lau</last></author>
       <pages>5317-5331</pages>
       <abstract>Numerous evaluation metrics have been developed for natural language generation tasks, but their effectiveness in evaluating stories is limited as they are not specifically tailored to assess intricate aspects of storytelling, such as fluency and interestingness. In this paper, we introduce DeltaScore, a novel methodology that uses perturbation techniques for the evaluation of nuanced story aspects. We posit that the extent to which a story excels in a specific aspect (e.g., fluency) correlates with the magnitude of its susceptibility to particular perturbations (e.g., the introduction of typos). Given this, we measure the quality of an aspect by calculating the likelihood difference between pre- and post-perturbation states using pre-trained language models. We compare DeltaScore with existing metrics on storytelling datasets from two domains in five fine-grained story aspects: fluency, coherence, relatedness, logicality, and interestingness. DeltaScore demonstrates strong performance, revealing a surprising finding that one specific perturbation proves highly effective in capturing multiple aspects. Source code is available on our GitHub repository.</abstract>
@@ -19288,10 +19288,10 @@
     <paper id="355">
       <title>Don’t waste a single annotation: improving single-label classifiers through soft labels</title>
       <author><first>Ben</first><last>Wu</last></author>
-      <author id="yue-li"><first>Yue</first><last>Li</last></author>
+      <author><first>Yue</first><last>Li</last></author>
       <author><first>Yida</first><last>Mu</last></author>
-      <author><first>Carolina</first><last>Scarton</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
       <author><first>Xingyi</first><last>Song</last></author>
       <pages>5347-5355</pages>
       <abstract>In this paper, we address the limitations of the common data annotation and training methods for objective single-label classification tasks. Typically, when annotating such tasks annotators are only asked to provide a single label for each sample and annotator disagreement is discarded when a final hard label is decided through majority voting. We challenge this traditional approach, acknowledging that determining the appropriate label can be difficult due to the ambiguity and lack of context in the data samples. Rather than discarding the information from such ambiguous annotations, our soft label method makes use of them for training. Our findings indicate that additional annotator information, such as confidence, secondary label and disagreement, can be used to effectively generate soft labels. Training classifiers with these soft labels then leads to improved performance and calibration on the hard label test set.</abstract>
@@ -19332,7 +19332,7 @@
     <paper id="358">
       <title>Licon: A Diverse, Controllable and Challenging Linguistic Concept Learning Benchmark</title>
       <author><first>Shenglong</first><last>Yu</last></author>
-      <author><first>Ying</first><last>Zhang</last></author>
+      <author id="ying-zhang"><first>Ying</first><last>Zhang</last></author>
       <author><first>Wenya</first><last>Guo</last></author>
       <author><first>Zhengkun</first><last>Zhang</last></author>
       <author><first>Ru</first><last>Zhou</last></author>
@@ -19372,7 +19372,7 @@
     <paper id="361">
       <title>Multimodal Automated Fact-Checking: A Survey</title>
       <author><first>Mubashara</first><last>Akhtar</last></author>
-      <author><first>Michael</first><last>Schlichtkrull</last></author>
+      <author id="michael-schlichtkrull"><first>Michael</first><last>Schlichtkrull</last></author>
       <author><first>Zhijiang</first><last>Guo</last></author>
       <author><first>Oana</first><last>Cocarascu</last></author>
       <author><first>Elena</first><last>Simperl</last></author>
@@ -19401,7 +19401,7 @@
       <author><first>Ryan</first><last>Rossi</last></author>
       <author><first>Sungchul</first><last>Kim</last></author>
       <author><first>C.</first><last>Giles</last></author>
-      <author><first>Ting-Hao</first><last>Huang</last></author>
+      <author id="ting-hao-huang"><first>Ting-Hao</first><last>Huang</last></author>
       <pages>5464-5474</pages>
       <abstract>There is growing interest in systems that generate captions for scientific figures. However, assessing these systems’ output poses a significant challenge. Human evaluation requires academic expertise and is costly, while automatic evaluation depends on often low-quality author-written captions. This paper investigates using large language models (LLMs) as a cost-effective, reference-free method for evaluating figure captions. We first constructed SCICAP-EVAL, a human evaluation dataset that contains human judgments for 3,600 scientific figure captions, both original and machine-made, for 600 arXiv figures. We then prompted LLMs like GPT-4 and GPT-3 to score (1-6) each caption based on its potential to aid reader understanding, given relevant context such as figure-mentioning paragraphs. Results show that GPT-4, used as a zero-shot evaluator, outperformed all other models and even surpassed assessments made by computer science undergraduates, achieving a Kendall correlation score of 0.401 with Ph.D. students’ rankings.</abstract>
       <url hash="0d2afb56">2023.findings-emnlp.363</url>
@@ -19476,7 +19476,7 @@
     <paper id="369">
       <title>Self-Supervised Behavior Cloned Transformers are Path Crawlers for Text Games</title>
       <author><first>Ruoyao</first><last>Wang</last></author>
-      <author><first>Peter</first><last>Jansen</last></author>
+      <author id="peter-jansen"><first>Peter</first><last>Jansen</last></author>
       <pages>5555-5565</pages>
       <abstract>In this work, we introduce a self-supervised behavior cloning transformer for text games, which are challenging benchmarks for multi-step reasoning in virtual environments. Traditionally, Behavior Cloning Transformers excel in such tasks but rely on supervised training data. Our approach auto-generates training data by exploring trajectories (defined by common macro-action sequences) that lead to reward within the games, while determining the generality and utility of these trajectories by rapidly training small models then evalauating their performance on unseen development games. Through empirical analysis, we show our method consistently uncovers generalizable training data, achieving about 90% performance of supervised systems across three benchmark text games.</abstract>
       <url hash="f147a82c">2023.findings-emnlp.369</url>
@@ -19498,7 +19498,7 @@
     </paper>
     <paper id="371">
       <title>x<fixed-case>D</fixed-case>ial-Eval: A Multilingual Open-Domain Dialogue Evaluation Benchmark</title>
-      <author id="chen-zhang"><first>Chen</first><last>Zhang</last></author>
+      <author><first>Chen</first><last>Zhang</last></author>
       <author><first>Luis</first><last>D’Haro</last></author>
       <author><first>Chengguang</first><last>Tang</last></author>
       <author><first>Ke</first><last>Shi</last></author>
@@ -19546,7 +19546,7 @@
       <author><first>Yuxiang</first><last>Lu</last></author>
       <author><first>Yu</first><last>Hong</last></author>
       <author><first>Zhipang</first><last>Wang</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>5634-5640</pages>
       <abstract>The aim of implicit discourse relation recognition is to comprehend the sense of connection between two arguments. In this work, we present a classification method that is solely based on generative models. Our proposed approach employs a combination of instruction templates and in-context learning to refine the generative model for effectively addressing the implicit discourse relation recognition task. Furthermore, we utilize Chain-of-Thoughts to partition the inference process into a sequence of three successive stages. This strategy enables us to fully utilize the autoregressive generative model’s potential for knowledge acquisition and inference, ultimately leading to enhanced performance on this natural language understanding task. The results of our experiments, evaluated on benchmark datasets PDTB 2.0, PDTB 3.0, and the CoNLL16 shared task, demonstrate superior performance compared to previous state-of-the-art models.</abstract>
       <url hash="30c36e37">2023.findings-emnlp.374</url>
@@ -19559,7 +19559,7 @@
       <author><first>Rui</first><last>Wang</last></author>
       <author><first>Zhihua</first><last>Wei</last></author>
       <author><first>Yu</first><last>Li</last></author>
-      <author id="xinpeng-wang"><first>Xinpeng</first><last>Wang</last></author>
+      <author><first>Xinpeng</first><last>Wang</last></author>
       <pages>5641-5656</pages>
       <abstract>Opinion summarization is expected to digest larger review sets and provide summaries from different perspectives. However, most existing solutions are deficient in epitomizing extensive reviews and offering opinion summaries from various angles due to the lack of designs for information selection. To this end, we propose SubSumm, a supervised summarization framework for large-scale multi-perspective opinion summarization. SubSumm consists of a review sampling strategy set and a two-stage training scheme. The sampling strategies take sentiment orientation and contrastive information value into consideration, with which the review subsets from different perspectives and quality levels can be selected. Subsequently, the summarizer is encouraged to learn from the sub-optimal and optimal subsets successively in order to capitalize on the massive input. Experimental results on AmaSum and Rotten Tomatoes datasets demonstrate that SubSumm is adept at generating pros, cons, and verdict summaries from hundreds of input reviews. Furthermore, our in-depth analysis verifies that the advanced selection of review subsets and the two-stage training scheme are vital to boosting the summarization performance.</abstract>
       <url hash="6b612301">2023.findings-emnlp.375</url>
@@ -19597,7 +19597,7 @@
       <author><first>Muru</first><last>Zhang</last></author>
       <author><first>Sewon</first><last>Min</last></author>
       <author><first>Ludwig</first><last>Schmidt</last></author>
-      <author><first>Noah</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah</first><last>Smith</last></author>
       <author><first>Mike</first><last>Lewis</last></author>
       <pages>5687-5711</pages>
       <abstract>We investigate the ability of language models to perform compositional reasoning tasks where the overall solution depends on correctly composing the answers to sub-problems. We measure how often models can correctly answer all sub-problems but not generate the overall solution, a ratio we call the compositionality gap. We evaluate this ratio by asking multi-hop questions with answers that require composing multiple facts unlikely to have been observed together during pretraining. In the GPT-3 family of models, as model size increases we show that the single-hop question answering performance improves faster than the multi-hop performance does, therefore the compositionality gap does not decrease. This surprising result suggests that while more powerful models memorize and recall more factual knowledge, they show no corresponding improvement in their ability to perform this kind of compositional reasoning. We then demonstrate how elicitive prompting (such as chain of thought) narrows the compositionality gap by reasoning explicitly instead of implicitly. We present a new method, self-ask, that further improves on chain of thought. In our method, the model explicitly asks itself (and then answers) follow-up questions before answering the initial question. We finally show that self-ask’s structured prompting lets us easily plug in a search engine to answer the follow-up questions, which additionally improves accuracy.</abstract>
@@ -19773,7 +19773,7 @@
     </paper>
     <paper id="392">
       <title>Universal Domain Adaptation for Robust Handling of Distributional Shifts in <fixed-case>NLP</fixed-case></title>
-      <author><first>Hyuhng</first><last>Kim</last></author>
+      <author id="hyuhng-joon-kim"><first>Hyuhng</first><last>Kim</last></author>
       <author><first>Hyunsoo</first><last>Cho</last></author>
       <author><first>Sang-Woo</first><last>Lee</last></author>
       <author><first>Junyeob</first><last>Kim</last></author>
@@ -19804,7 +19804,7 @@
       <author><first>Haoran</first><last>Sun</last></author>
       <author><first>Yikun</first><last>Lei</last></author>
       <author><first>Shaolin</first><last>Zhu</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <pages>5920-5932</pages>
       <abstract>Deep neural networks have demonstrated their capacity in extracting features from speech inputs. However, these features may include non-linguistic speech factors such as timbre and speaker identity, which are not directly related to translation. In this paper, we propose a content-centric speech representation disentanglement learning framework for speech translation, CCSRD, which decomposes speech representations into content representations and non-linguistic representations via representation disentanglement learning. CCSRD consists of a content encoder that encodes linguistic content information from the speech input, a non-content encoder that models non-linguistic speech features, and a disentanglement module that learns disentangled representations with a cyclic reconstructor, feature reconstructor and speaker classifier trained in a multi-task learning way. Experiments on the MuST-C benchmark dataset demonstrate that CCSRD achieves an average improvement of +0.9 BLEU in two settings across five translation directions over the baseline, outperforming state-of-the-art end-to-end speech translation models and cascaded models.</abstract>
       <url hash="0afdbcc2">2023.findings-emnlp.394</url>
@@ -19839,7 +19839,7 @@
       <title>Transformer Working Memory Enables Regular Language Reasoning And Natural Language Length Extrapolation</title>
       <author><first>Ta-Chung</first><last>Chi</last></author>
       <author><first>Ting-Han</first><last>Fan</last></author>
-      <author><first>Alexander</first><last>Rudnicky</last></author>
+      <author id="alexander-rudnicky"><first>Alexander</first><last>Rudnicky</last></author>
       <author><first>Peter</first><last>Ramadge</last></author>
       <pages>5972-5984</pages>
       <abstract>Unlike recurrent models, conventional wisdom has it that Transformers cannot perfectly model regular languages. Inspired by the notion of working memory, we propose a new Transformer variant named RegularGPT. With its novel combination of Weight-Sharing, Adaptive-Depth, and Sliding-Dilated-Attention, RegularGPT constructs working memory along the depth dimension, thereby enabling efficient and successful modeling of regular languages such as PARITY. We further test RegularGPT on the task of natural language length extrapolation and surprisingly find that it rediscovers the local windowed attention effect deemed necessary in prior work for length extrapolation.</abstract>
@@ -19892,7 +19892,7 @@
       <author><first>Zhongqing</first><last>Wang</last></author>
       <author><first>Qingqing</first><last>Zhao</last></author>
       <author><first>Xiaotong</first><last>Jiang</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>6038-6048</pages>
       <abstract>Synaesthesia refers to the description of perceptions in one sensory modality through concepts from other modalities. It involves not only a linguistic phenomenon, but also a cognitive phenomenon structuring human thought and action, which makes understanding it challenging. As a means of cognition, synaesthesia is rendered by more than sensory modalities, cue and stimulus can also play an important role in expressing and understanding it. In addition, understanding synaesthesia involves many cognitive efforts, such as identifying the semantic relationship between sensory words and modalities. Therefore, we propose a unified framework focusing on annotating all kinds of synaesthetic elements and fully exploring the relationship among them. In particular, we introduce a new annotation scheme, including sensory modalities as well as their cues and stimuli, which facilitate understanding synaesthetic information collectively. We further design a structure generation model to capture the relations among synaesthetic elements and generate them jointly. Through extensive experiments, the importance of proposed dataset can be verified by the statistics and progressive performances. In addition, our proposed model yields state-of-the-art results, demonstrating its effectiveness.</abstract>
       <url hash="816c830d">2023.findings-emnlp.401</url>
@@ -19998,7 +19998,7 @@
       <author><first>Amir Hossein</first><last>Kargaran</last></author>
       <author><first>Ayyoob</first><last>Imani</last></author>
       <author><first>François</first><last>Yvon</last></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <pages>6155-6218</pages>
       <abstract>Several recent papers have published good solutions for language identification (LID) for about 300 high-resource and medium-resource languages. However, there is no LID available that (i) covers a wide range of low-resource languages, (ii) is rigorously evaluated and reliable and (iii) efficient and easy to use. Here, we publish GlotLID-M, an LID model that satisfies the desiderata of wide coverage, reliability and efficiency. It identifies 1665 languages, a large increase in coverage compared to prior work. In our experiments, GlotLID-M outperforms four baselines (CLD3, FT176, OpenLID and NLLB) when balancing F1 and false positive rate (FPR). We analyze the unique challenges that low-resource LID poses: incorrect corpus metadata, leakage from high-resource languages, difficulty separating closely related languages, handling of macrolanguage vs varieties and in general noisy data. We hope that integrating GlotLID-M into dataset creation pipelines will improve quality and enhance accessibility of NLP technology for low-resource languages and cultures. GlotLID-M model, code, and list of data sources are available: https://github.com/cisnlp/GlotLID.</abstract>
       <url hash="d231faeb">2023.findings-emnlp.410</url>
@@ -20031,7 +20031,7 @@
       <author><first>Yu Lu</first><last>Liu</last></author>
       <author><first>Meng</first><last>Cao</last></author>
       <author><first>Su Lin</first><last>Blodgett</last></author>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <author><first>Alexandra</first><last>Olteanu</last></author>
       <author><first>Adam</first><last>Trischler</last></author>
       <pages>6246-6261</pages>
@@ -20086,7 +20086,7 @@
       <title>Strong and Efficient Baselines for Open Domain Conversational Question Answering</title>
       <author><first>Andrei</first><last>Coman</last></author>
       <author><first>Gianni</first><last>Barlacchi</last></author>
-      <author><first>Adrià</first><last>de Gispert</last></author>
+      <author id="adria-de-gispert"><first>Adrià</first><last>de Gispert</last></author>
       <pages>6305-6314</pages>
       <abstract>Unlike the Open Domain Question Answering (ODQA) setting, the conversational (ODConvQA) domain has received limited attention when it comes to reevaluating baselines for both efficiency and effectiveness. In this paper, we study the State-of-the-Art (SotA) Dense Passage Retrieval (DPR) retriever and Fusion-in-Decoder (FiD) reader pipeline, and show that it significantly underperforms when applied to ODConvQA tasks due to various limitations. We then propose and evaluate strong yet simple and efficient baselines, by introducing a fast reranking component between the retriever and the reader, and by performing targeted finetuning steps. Experiments on two ODConvQA tasks, namely TopiOCQA and OR-QuAC, show that our method improves the SotA results, while reducing reader’s latency by 60%. Finally, we provide new and valuable insights into the development of challenging baselines that serve as a reference for future, more intricate approaches, including those that leverage Large Language Models (LLMs).</abstract>
       <url hash="75a385be">2023.findings-emnlp.417</url>
@@ -20151,9 +20151,9 @@
       <author><first>Hengran</first><last>Zhang</last></author>
       <author><first>Ruqing</first><last>Zhang</last></author>
       <author><first>Jiafeng</first><last>Guo</last></author>
-      <author><first>Maarten</first><last>de Rijke</last></author>
+      <author id="maarten-de-rijke"><first>Maarten</first><last>de Rijke</last></author>
       <author><first>Yixing</first><last>Fan</last></author>
-      <author><first>Xueqi</first><last>Cheng</last></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last></author>
       <pages>6373-6384</pages>
       <abstract>Retrieval-enhanced methods have become a primary approach in fact verification (FV); it requires reasoning over multiple retrieved pieces of evidence to verify the integrity of a claim. To retrieve evidence, existing work often employs off-the-shelf retrieval models whose design is based on the probability ranking principle. We argue that, rather than relevance, for FV we need to focus on the utility that a claim verifier derives from the retrieved evidence. We introduce the <tex-math>\textbf{feedback-based evidence retriever} (FER)</tex-math> that optimizes the evidence retrieval process by incorporating feedback from the claim verifier. As a feedback signal we use the divergence in utility between how effectively the verifier utilizes the retrieved evidence and the ground-truth evidence to produce the final claim label. Empirical studies demonstrate the superiority of FER over prevailing baselines.</abstract>
       <url hash="d436dc30">2023.findings-emnlp.422</url>
@@ -20168,7 +20168,7 @@
       <author><first>Barlas</first><last>Oguz</last></author>
       <author><first>Jimmy</first><last>Lin</last></author>
       <author><first>Yashar</first><last>Mehdad</last></author>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <author><first>Xilun</first><last>Chen</last></author>
       <pages>6385-6400</pages>
       <abstract>Various techniques have been developed in recent years to improve dense retrieval (DR), such as unsupervised contrastive learning and pseudo-query generation. Existing DRs, however, often suffer from effectiveness tradeoffs between supervised and zero-shot retrieval, which some argue was due to the limited model capacity. We contradict this hypothesis and show that a generalizable DR can be trained to achieve high accuracy in both supervised and zero-shot retrieval without increasing model size. In particular, we systematically examine the contrastive learning of DRs, under the framework of Data Augmentation (DA). Our study shows that common DA practices such as query augmentation with generative models and pseudo-relevance label creation using a cross-encoder, are often inefficient and sub-optimal. We hence propose a new DA approach with diverse queries and sources of supervision to progressively train a generalizable DR. As a result, DRAGON, our Dense Retriever trained with diverse AuGmentatiON, is the first BERT-base-sized DR to achieve state-of-the-art effectiveness in both supervised and zero-shot evaluations and even competes with models using more complex late interaction.</abstract>
@@ -20268,7 +20268,7 @@
       <author><first>Jun</first><last>Zhao</last></author>
       <author><first>Tao</first><last>Gui</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>6491-6502</pages>
       <abstract>The inductive inference of the knowledge graph aims to complete the potential relations between the new unknown entities in the graph. Most existing methods are based on entity-independent features such as graph structure information and relationship information to inference. However, the neighborhood of these new entities is often too sparse to obtain enough information to build these features effectively. In this work, we propose a knowledge graph inductive inference method that fuses ontology information. Based on the enclosing subgraph, we bring in feature embeddings of concepts corresponding to entities to learn the semantic information implicit in the ontology. Considering that the ontology information of entities may be missing, we build a type constraint regular loss to explicitly model the semantic connections between entities and concepts, and thus capture the missing concepts of entities. Experimental results show that our approach significantly outperforms large language models like ChatGPT on two benchmark datasets, YAGO21K-610 and DB45K-165, and improves the MRR metrics by 15.4% and 44.1%, respectively, when compared with the state-of-the-art methods.</abstract>
       <url hash="844c70e8">2023.findings-emnlp.431</url>
@@ -20289,7 +20289,7 @@
     <paper id="433">
       <title>Harnessing the Power of Large Language Models for Empathetic Response Generation: Empirical Investigations and Improvements</title>
       <author><first>Yushan</first><last>Qian</last></author>
-      <author><first>Weinan</first><last>Zhang</last></author>
+      <author id="weinan-zhang"><first>Weinan</first><last>Zhang</last></author>
       <author><first>Ting</first><last>Liu</last></author>
       <pages>6516-6528</pages>
       <abstract>Empathetic dialogue is an indispensable part of building harmonious social relationships and contributes to the development of a helpful AI. Previous approaches are mainly based on fine small-scale language models. With the advent of ChatGPT, the application effect of large language models (LLMs) in this field has attracted great attention. This work empirically investigates the performance of LLMs in generating empathetic responses and proposes three improvement methods of semantically similar in-context learning, two-stage interactive generation, and combination with the knowledge base. Extensive experiments show that LLMs can significantly benefit from our proposed methods and is able to achieve state-of-the-art performance in both automatic and human evaluations. Additionally, we explore the possibility of GPT-4 simulating human evaluators.</abstract>
@@ -20304,7 +20304,7 @@
       <author><first>Sander</first><last>Schulhoff</last></author>
       <author><first>Benjamin</first><last>Wachspress</last></author>
       <author><first>Alan</first><last>Blinder</last></author>
-      <author><first>Brandon</first><last>Stewart</last></author>
+      <author id="brandon-m-stewart"><first>Brandon</first><last>Stewart</last></author>
       <pages>6529-6539</pages>
       <abstract>Markets and policymakers around the world hang on the consequential monetary policy decisions made by the Federal Open Market Committee (FOMC). Publicly available textual documentation of their meetings provides insight into members’ attitudes about the economy. We use GPT-4 to quantify dissent among members on the topic of inflation. We find that transcripts and minutes reflect the diversity of member views about the macroeconomic outlook in a way that is lost or omitted from the public statements. In fact, diverging opinions that shed light upon the committee’s “true” attitudes are almost entirely omitted from the final statements. Hence, we argue that forecasting FOMC sentiment based solely on statements will not sufficiently reflect dissent among the hawks and doves.</abstract>
       <url hash="c998ae09">2023.findings-emnlp.434</url>
@@ -20331,7 +20331,7 @@
     <paper id="436">
       <title>Inverse Reinforcement Learning for Text Summarization</title>
       <author><first>Yu</first><last>Fu</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Yue</first><last>Dong</last></author>
       <pages>6559-6570</pages>
       <abstract>We introduce inverse reinforcement learning (IRL) as an effective paradigm for training abstractive summarization models, imitating human summarization behaviors. Our IRL model estimates the reward function using a suite of important sub-rewards for summarization and concurrently optimizes the policy network. Experimental results across datasets in different domains (CNN/DailyMail and WikiHow) and various model sizes (BART-base and BART-large) demonstrate the superiority of our proposed IRL model for summarization over MLE and RL baselines. The resulting summaries exhibit greater similarity to human-crafted gold references, outperforming MLE and RL baselines on metrics such as ROUGE, coverage, novelty, compression ratio, factuality, and human evaluations.</abstract>
@@ -20371,7 +20371,7 @@
       <title>From Chaos to Clarity: Claim Normalization to Empower Fact-Checking</title>
       <author><first>Megha</first><last>Sundriyal</last></author>
       <author><first>Tanmoy</first><last>Chakraborty</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>6594-6609</pages>
       <abstract>With the proliferation of social media platforms, users are exposed to vast information, including posts containing misleading claims. However, the pervasive noise inherent in these posts presents a challenge in identifying precise and prominent claims that require verification. Extracting the core assertions from such posts is arduous and time-consuming. We introduce a novel task, called Claim Normalization (<tex-math>\textit{aka ClaimNorm}</tex-math>) that aims to decompose complex and noisy social media posts into more straightforward and understandable forms, termed <tex-math>\textit{normalized claims}</tex-math>. We propose <tex-math>\texttt{CACN}</tex-math> , a pioneering approach that leverages chain-of-thought and claim check-worthiness estimation, mimicking human reasoning processes, to comprehend intricate claims. Moreover, we capitalize on large language models’ powerful in-context learning abilities to provide guidance and improve the claim normalization process. To evaluate the effectiveness of our proposed model, we meticulously compile a comprehensive real-world dataset, <tex-math>\texttt{CLAN}</tex-math>, comprising more than <tex-math>6k</tex-math> instances of social media posts alongside their respective normalized claims. Experimentation demonstrates that <tex-math>\texttt{CACN}</tex-math> outperforms several baselines across various evaluation measures. A rigorous error analysis validates <tex-math>\texttt{CACN}</tex-math>‘s capabilities and pitfalls. We release our dataset and code at https://github.com/LCS2-IIITD/CACN-EMNLP-2023.</abstract>
       <url hash="341b7cda">2023.findings-emnlp.439</url>
@@ -20510,7 +20510,7 @@
       <author><first>Jinsung</first><last>Kim</last></author>
       <author><first>Jungwoo</first><last>Lim</last></author>
       <author><first>Yoonna</first><last>Jang</last></author>
-      <author><first>Heuiseok</first><last>Lim</last></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last></author>
       <pages>6755-6761</pages>
       <abstract>Cross-document relation extraction (CodRED) task aims to infer the relation between two entities mentioned in different documents within a reasoning path. Previous studies have concentrated on merely capturing implicit relations between the entities. However, humans usually utilize explicit information chains such as hyperlinks or additional searches to find the relations between two entities. Inspired by this, we propose Path wIth expLOraTion (PILOT) that provides the enhanced reasoning path by exploring the explicit clue information within the documents. PILOT finds the bridging entities which directly guide the paths between the entities and then employs them as stepstones to navigate desirable paths. We show that models with PILOT outperform the baselines in the CodRED task. Furthermore, we offer a variety of analyses to verify the validity of the reasoning paths constructed through PILOT, including evaluations using large language models such as ChatGPT.</abstract>
       <url hash="83d8ae25">2023.findings-emnlp.450</url>
@@ -20587,7 +20587,7 @@
       <title>Conditional Natural Language Inference</title>
       <author><first>Youngwoo</first><last>Kim</last></author>
       <author><first>Razieh</first><last>Rahimi</last></author>
-      <author><first>James</first><last>Allan</last></author>
+      <author id="james-allan"><first>James</first><last>Allan</last></author>
       <pages>6833-6851</pages>
       <abstract>To properly explain sentence pairs that provide contradictory (different) information for different conditions, we introduce the task of conditional natural language inference (Cond-NLI) and focus on automatically extracting contradictory aspects and their conditions from a sentence pair. Cond-NLI can help to provide a full spectrum of information, such as when there are multiple answers to a question each addressing a specific condition, or reviews with different opinions for different conditions. We show that widely-used feature-attribution explanation models are not suitable for finding conditions, especially when sentences are long and are written independently. We propose a simple yet effective model for the original NLI task that can successfully extract conditions while not requiring token-level annotations. Our model enhances the interpretability of the NLI task while maintaining comparable accuracy. To evaluate models for the Cond-NLI, we build and release a token-level annotated dataset BioClaim which contains potentially contradictory claims from the biomedical domain. Our experiments show that our proposed model outperforms the full cross-encoder and other baselines in extracting conditions. It also performs on-par with GPT-3 which has an order of magnitude more parameters and trained on a huge amount of data.</abstract>
       <url hash="f5a6eac1">2023.findings-emnlp.456</url>
@@ -20682,7 +20682,7 @@
       <author><first>Koki</first><last>Maeda</last></author>
       <author><first>Shuhei</first><last>Kurita</last></author>
       <author><first>Taiki</first><last>Miyanishi</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <pages>6940-6954</pages>
       <abstract>A 360-degree image captures the entire scene without the limitations of a camera’s field of view, which makes it difficult to describe all the contexts in a single caption. We propose a novel task called Query-based Image Captioning (QuIC) for 360-degree images, where a query (words or short phrases) specifies the context to describe. This task is more challenging than the conventional image captioning task, which describes salient objects in images, as it requires fine-grained scene understanding to select the contents consistent with user’s intent based on the query. We construct a dataset for the new task that comprises 3,940 360-degree images and 18,459 pairs of queries and captions annotated manually. Experiments demonstrate that fine-tuning image captioning models further on our dataset can generate more diverse and controllable captions from multiple contexts of 360-degree images.</abstract>
       <url hash="ccdece92">2023.findings-emnlp.463</url>
@@ -20727,7 +20727,7 @@
     </paper>
     <paper id="467">
       <title>Evaluating Verifiability in Generative Search Engines</title>
-      <author><first>Nelson</first><last>Liu</last></author>
+      <author id="nelson-f-liu"><first>Nelson</first><last>Liu</last></author>
       <author><first>Tianyi</first><last>Zhang</last></author>
       <author><first>Percy</first><last>Liang</last></author>
       <pages>7001-7025</pages>
@@ -20766,8 +20766,8 @@
       <title><fixed-case>R</fixed-case>o<fixed-case>MQA</fixed-case>: A Benchmark for Robust, Multi-evidence, Multi-answer Question Answering</title>
       <author><first>Victor</first><last>Zhong</last></author>
       <author><first>Weijia</first><last>Shi</last></author>
-      <author><first>Wen-tau</first><last>Yih</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>7055-7067</pages>
       <abstract>We introduce RoMQA, the first benchmark for robust, multi-evidence, multi-answer question answering (QA). RoMQA contains clusters of questions that are derived from related constraints mined from the Wikidata knowledge graph. RoMQA evaluates robustness of QA models to varying constraints by measuring worst-case performance within each question cluster. Compared to prior QA datasets, RoMQA has more human-written questions that require reasoning over more evidence text and have, on average, many more correct answers. In addition, human annotators rate RoMQA questions as more natural or likely to be asked by people. We evaluate state-of-the-art large language models in zero-shot, few-shot, and fine-tuning settings, and find that RoMQA is challenging: zeroshot and few-shot models perform similarly to naive baselines, while supervised retrieval methods perform well below gold evidence upper bounds. Moreover, existing models are not robust to variations in question constraints, but can be made more robust by tuning on clusters of related questions. Our results show that RoMQA is a challenging benchmark for large language models, and provides a quantifiable test to build more robust QA methods.</abstract>
       <url hash="e5eb53b5">2023.findings-emnlp.470</url>
@@ -20778,7 +20778,7 @@
       <title>Leveraging Multiple Teachers for Test-Time Adaptation of Language-Guided Classifiers</title>
       <author><first>Kangda</first><last>Wei</last></author>
       <author><first>Sayan</first><last>Ghosh</last></author>
-      <author><first>Rakesh</first><last>Menon</last></author>
+      <author id="rakesh-r-menon"><first>Rakesh</first><last>Menon</last></author>
       <author><first>Shashank</first><last>Srivastava</last></author>
       <pages>7068-7088</pages>
       <abstract>Recent approaches have explored language- guided classifiers capable of classifying examples from novel tasks when provided with task-specific natural language explanations, instructions or prompts (Sanh et al., 2022; R. Menon et al., 2022). While these classifiers can generalize in zero-shot settings, their task performance often varies substantially between different language explanations in unpredictable ways (Lu et al., 2022; Gonen et al., 2022). Also, current approaches fail to leverage unlabeled examples that may be available in many scenarios. Here, we introduce TALC, a framework that uses data programming to adapt a language-guided classifier for a new task during inference when provided with explanations from multiple teachers and unlabeled test examples. Our results show that TALC consistently outperforms a competitive baseline from prior work by an impressive 9.3% (relative improvement). Further, we demonstrate the robustness of TALC to variations in the quality and quantity of provided explanations, highlighting its potential in scenarios where learning from multiple teachers or a crowd is involved. Our code is available at: https://github.com/WeiKangda/TALC.git.</abstract>
@@ -20789,7 +20789,7 @@
     <paper id="472">
       <title>Summarizing Multiple Documents with Conversational Structure for Meta-Review Generation</title>
       <author><first>Miao</first><last>Li</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <author><first>Jey</first><last>Lau</last></author>
       <pages>7089-7112</pages>
       <abstract>We present PeerSum, a novel dataset for generating meta-reviews of scientific papers. The meta-reviews can be interpreted as abstractive summaries of reviews, multi-turn discussions and the paper abstract. These source documents have a rich inter-document relationship with an explicit hierarchical conversational structure, cross-references and (occasionally) conflicting information. To introduce the structural inductive bias into pre-trained language models, we introduce RAMMER (Relationship-aware Multi-task Meta-review Generator), a model that uses sparse attention based on the conversational structure and a multi-task training objective that predicts metadata features (e.g., review ratings). Our experimental results show that RAMMER outperforms other strong baseline models in terms of a suite of automatic evaluation metrics. Further analyses, however, reveal that RAMMER and other models struggle to handle conflicts in source documents, suggesting meta-review generation is a challenging task and a promising avenue for further research.</abstract>
@@ -20934,8 +20934,8 @@
     </paper>
     <paper id="484">
       <title>Lexical Repetitions Lead to Rote Learning: Unveiling the Impact of Lexical Overlap in Train and Test Reference Summaries</title>
-      <author><first>Prafulla</first><last>Choubey</last></author>
-      <author><first>Alexander</first><last>Fabbri</last></author>
+      <author id="prafulla-kumar-choubey"><first>Prafulla</first><last>Choubey</last></author>
+      <author id="alexander-richard-fabbri"><first>Alexander</first><last>Fabbri</last></author>
       <author><first>Caiming</first><last>Xiong</last></author>
       <author><first>Chien-Sheng</first><last>Wu</last></author>
       <pages>7269-7283</pages>
@@ -21009,7 +21009,7 @@
       <author><first>Ruihao</first><last>Shui</last></author>
       <author><first>Yixin</first><last>Cao</last></author>
       <author><first>Xiang</first><last>Wang</last></author>
-      <author><first>Tat-Seng</first><last>Chua</last></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last></author>
       <pages>7337-7348</pages>
       <abstract>Large language models (LLMs) have demonstrated great potential for domain-specific applications, such as the law domain. However, recent disputes over GPT-4’s law evaluation raise questions concerning their performance in real-world legal tasks. To systematically investigate their competency in the law, we design practical baseline solutions based on LLMs and test on the task of legal judgment prediction. In our solutions, LLMs can work alone to answer open questions or coordinate with an information retrieval (IR) system to learn from similar cases or solve simplified multi-choice questions. We show that similar cases and multi-choice options, namely label candidates, included in prompts can help LLMs recall domain knowledge that is critical for expertise legal reasoning. We additionally present an intriguing paradox wherein an IR system surpasses the performance of LLM+IR due to limited gains acquired by weaker LLMs from powerful IR systems. In such case, the role of LLMs becomes redundant. Our evaluation pipeline can be easily extended into other tasks to facilitate evaluations in other domains. Code is available at https://github.com/srhthu/LM-CompEval-Legal</abstract>
       <url hash="95387d51">2023.findings-emnlp.490</url>
@@ -21157,7 +21157,7 @@
       <author><first>Hongru</first><last>Wang</last></author>
       <author><first>Wai-Chung</first><last>Kwan</last></author>
       <author><first>David</first><last>Ho</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <pages>7468-7479</pages>
       <abstract>Knowledge probing is a task to assess the knowledge encoded within pre-trained language models (PLMs) by having the PLM complete prompts such as “Italy is located in __,”. The model’s prediction precision serves as a lower bound for the amount of knowledge it contains. Subsequent works explore training a series of vectors as prompts to guide PLMs towards more accurate predictions. However, these methods compromise the readability of the prompts. We cannot directly understand these prompts from their literal meaning, making it difficult to verify whether they are correct. Consequently, the credibility of probing results derived from these prompts is diminished. To address the issue, we propose a novel method called ReadPrompt, which aims to identify meaningful sentences to serve as prompts. Experiments show that ReadPrompt achieves state-of-the-art performance on the current knowledge probing benchmark. Moreover, since the prompt is readable, we discovered a misalignment between constructed prompts and knowledge, which is also present in current prompting methods verified by an attack experiment. We claim that the probing outcomes of the current prompting methods are unreliable that overestimate the knowledge contained within PLMs.</abstract>
       <url hash="f9e4eba0">2023.findings-emnlp.501</url>
@@ -21260,7 +21260,7 @@
     </paper>
     <paper id="509">
       <title>Culturally Aware Natural Language Inference</title>
-      <author id="jing-huang-stanford"><first>Jing</first><last>Huang</last></author>
+      <author><first>Jing</first><last>Huang</last></author>
       <author><first>Diyi</first><last>Yang</last></author>
       <pages>7591-7609</pages>
       <abstract>Humans produce and consume language in a particular cultural context, which includes knowledge about specific norms and practices. A listener’s awareness of the cultural context is critical for interpreting the speaker’s meaning. A simple expression like *I didn’t leave a tip* implies a strong sense of dissatisfaction when tipping is assumed to be the norm. As NLP systems reach users from different cultures, achieving culturally aware language understanding becomes increasingly important. However, current research has focused on building cultural knowledge bases without studying how such knowledge leads to contextualized interpretations of texts. In this work, we operationalize cultural variations in language understanding through a natural language inference (NLI) task that surfaces cultural variations as label disagreement between annotators from different cultural groups. We introduce the first Culturally Aware Natural Language Inference (CALI) dataset with 2.7K premise-hypothesis pairs annotated by two cultural groups located in the U.S. and India. With CALI, we categorize how cultural norms affect language understanding and present an evaluation framework to assess at which levels large language models are culturally aware. Our dataset is available at https://github.com/SALT-NLP/CulturallyAwareNLI.</abstract>
@@ -21283,7 +21283,7 @@
       <author><first>Zichao</first><last>Li</last></author>
       <author><first>Ines</first><last>Arous</last></author>
       <author><first>Siva</first><last>Reddy</last></author>
-      <author><first>Jackie</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie</first><last>Cheung</last></author>
       <pages>7623-7636</pages>
       <abstract>The potential of using a large language model (LLM) as a knowledge base (KB) has sparked significant interest. To maintain the knowledge acquired by LLMs, we need to ensure that the editing of learned facts respects internal logical constraints, which are known as dependency of knowledge. Existing work on editing LLMs has partially addressed the issue of dependency, when the editing of a fact should apply to its lexical variations without disrupting irrelevant ones. However, they neglect the dependency between a fact and its logical implications. We propose an evaluation protocol with an accompanying question-answering dataset, StandUp, that provides a comprehensive assessment of the editing process considering the above notions of dependency. Our protocol involves setting up a controlled environment in which we edit facts and monitor their impact on LLMs, along with their implications based on If-Then rules. Extensive experiments on StandUp show that existing knowledge editing methods are sensitive to the surface form of knowledge, and that they have limited performance in inferring the implications of edited facts.</abstract>
       <url hash="0ce0035f">2023.findings-emnlp.511</url>
@@ -21295,7 +21295,7 @@
       <author><first>Aparna</first><last>Elangovan</last></author>
       <author><first>Estrid</first><last>He</last></author>
       <author><first>Yuan</first><last>Li</last></author>
-      <author><first>Karin</first><last>Verspoor</last></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last></author>
       <pages>7637-7649</pages>
       <abstract>BERT-based models have had strong performance on leaderboards, yet have been demonstrably worse in real-world settings requiring generalization. Limited quantities of training data is considered a key impediment to achieving generalizability in machine learning. In this paper, we examine the impact of training data quality, not quantity, on a model’s generalizability. We consider two characteristics of training data: the portion of human-adversarial (h-adversarial), i.e. sample pairs with seemingly minor differences but different ground-truth labels, and human-affable (h-affable) training samples, i.e. sample pairs with minor differences but the same ground-truth label. We find that for a fixed size of training samples, as a rule of thumb, having 10-30% h-adversarial instances improves the precision, and therefore F1, by up to 20 points in the tasks of text classification and relation extraction. Increasing h-adversarials beyond this range can result in performance plateaus or even degradation. In contrast, h-affables may not contribute to a model’s generalizability and may even degrade generalization performance.</abstract>
       <url hash="a1402183">2023.findings-emnlp.512</url>
@@ -21333,7 +21333,7 @@
       <author><first>Rui</first><last>Xia</last></author>
       <author><first>Jianfei</first><last>Yu</last></author>
       <author><first>Zhen</first><last>Wu</last></author>
-      <author><first>Xinyu</first><last>Dai</last></author>
+      <author id="xinyu-dai"><first>Xinyu</first><last>Dai</last></author>
       <pages>7687-7698</pages>
       <abstract>Most previous studies on aspect-based sentiment analysis (ABSA) were carried out at the sentence level, while the research of document-level ABSA has not received enough attention. In this work, we focus on the document-level targeted sentiment analysis task, which aims to extract the opinion targets consisting of multi-level entities from a review document and predict their sentiments. We propose a Sequence-to-Structure (Seq2Struct) approach to address the task, which is able to explicitly model the hierarchical structure among multiple opinion targets in a document, and capture the long-distance dependencies among affiliated entities across sentences. In addition to the existing Seq2Seq approach, we further construct four strong baselines with different pretrained models. Experimental results on six domains show that our Seq2Struct approach outperforms all the baselines significantly. Aside from the performance advantage in outputting the multi-level target-sentiment pairs, our approach has another significant advantage - it can explicitly display the hierarchical structure of the opinion targets within a document. Our source code is publicly released at https://github.com/NUSTM/Doc-TSA-Seq2Struct.</abstract>
       <url hash="13ee7d71">2023.findings-emnlp.515</url>
@@ -21457,7 +21457,7 @@
       <author><first>Lifeng</first><last>Shang</last></author>
       <author><first>Xin</first><last>Jiang</last></author>
       <author><first>Qun</first><last>Liu</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <pages>7829-7844</pages>
       <abstract>Pretrained language models (PLMs) based knowledge-grounded dialogue systems are prone to generate responses that are factually inconsistent with the provided knowledge source. In such inconsistent responses, the dialogue models fail to accurately express the external factual knowledge they rely upon. Inspired by previous work which identified that feedforward networks (FFNs) within Transformers are responsible for factual knowledge expressions, we investigate two methods to efficiently improve the factual expression capability of FFNs by knowledge enhancement and alignment respectively. We first propose K-Dial, which explicitly introduces extended FFNs in Transformers to enhance factual knowledge expressions given the specific patterns of knowledge-grounded dialogue inputs. Additionally, we apply the reinforcement learning for factual consistency (RLFC) method to implicitly adjust FFNs’ expressions in responses by aligning with gold knowledge for the factual consistency preference. To comprehensively assess the factual consistency and dialogue quality of responses, we employ extensive automatic measures and human evaluations including sophisticated fine-grained NLI-based metrics. Experimental results on WoW and CMU_DoG datasets demonstrate that our methods efficiently enhance the ability of the FFN module to convey factual knowledge, validating the efficacy of improving factual consistency for knowledge-grounded dialogue systems.</abstract>
       <url hash="fca9ba8c">2023.findings-emnlp.525</url>
@@ -21485,7 +21485,7 @@
       <title>Frequency Balanced Datasets Lead to Better Language Models</title>
       <author><first>Rodolfo</first><last>Zevallos</last></author>
       <author><first>Mireia</first><last>Farrús</last></author>
-      <author><first>Núria</first><last>Bel</last></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last></author>
       <pages>7859-7872</pages>
       <abstract>This paper reports on the experiments aimed to improve our understanding of the role of the amount of data required for training attention-based transformer language models. Specifically, we investigate the impact of reducing the immense amounts of required pre-training data through sampling strategies that identify and reduce high-frequency tokens as different studies have indicated that the existence of very high-frequency tokens in pre-training data might bias learning, causing undesired effects. In this light, we describe our sampling algorithm that iteratively assesses token frequencies and removes sentences that contain still high-frequency tokens, eventually delivering a balanced, linguistically correct dataset. We evaluate the results in terms of model perplexity and fine-tuning linguistic probing tasks, NLP downstream tasks as well as more semantic SuperGlue tasks. The results show that pre-training with the resulting balanced dataset allows reducing up to three times the pre-training data.</abstract>
       <url hash="da139af9">2023.findings-emnlp.527</url>
@@ -21550,7 +21550,7 @@
     <paper id="532">
       <title>Linguistic Compression in Single-Sentence Human-Written Summaries</title>
       <author><first>Fangcong</first><last>Yin</last></author>
-      <author><first>Marten</first><last>van Schijndel</last></author>
+      <author id="marten-van-schijndel"><first>Marten</first><last>van Schijndel</last></author>
       <pages>7922-7935</pages>
       <abstract>Summarizing texts involves significant cognitive efforts to compress information. While advances in automatic summarization systems have drawn attention from the NLP and linguistics communities to this topic, there is a lack of computational studies of linguistic patterns in human-written summaries. This work presents a large-scale corpus study of human-written single-sentence summaries. We analyzed the linguistic compression patterns from source documents to summaries at different granularities, and we found that summaries are generally written with morphological expansion, increased lexical diversity, and similar positional arrangements of specific words compared to the source across different genres. We also studied how linguistic compressions of different factors affect reader judgments of quality through a human study, with the results showing that the use of morphological and syntactic changes by summary writers matches reader preferences while lexical diversity and word specificity preferences are not aligned between summary writers and readers.</abstract>
       <url hash="808d54a0">2023.findings-emnlp.532</url>
@@ -21576,7 +21576,7 @@
       <author><first>Jinsung</first><last>Kim</last></author>
       <author><first>Jeongwook</first><last>Kim</last></author>
       <author><first>Yuna</first><last>Hur</last></author>
-      <author><first>Heuiseok</first><last>Lim</last></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last></author>
       <pages>7950-7963</pages>
       <abstract>To build ultimate dialogue agents, previous studies suggest models that ground both persona and knowledge. However, applying the dialogue system directly to the usual conversation is still limited because the system requires a complete sentence-formed persona and knowledge candidate sets from the given dataset. In contrast to the dialogue setting in the dataset, humans utilize semantic concepts in their minds rather than a set of pre-defined candidate sentences. Following this manner of human dialogue, we suggest an adaptive dialogue system that is applicable to situations where complete sentence-formed candidates are not given. Our model generates consistent and relevant persona descriptions and identifies relevant knowledge for engaging and knowledgeable responses, even with fragmentary information. We show that our model outperforms previous baselines that utilize persona and knowledge candidate sentences and conduct the human evaluation on the machine-generated responses. In addition, we conduct ablation studies to demonstrate the effectiveness of each component of our model. Furthermore, we apply our model to other dialogue datasets that only ground knowledge or persona to showcase its adaptability. Our code is available at https://github.com/dlawjddn803/BeCand.</abstract>
       <url hash="121e5f48">2023.findings-emnlp.534</url>
@@ -21642,7 +21642,7 @@
       <author><first>Elena Sofia</first><last>Ruzzetti</last></author>
       <author><first>Davide</first><last>Venditti</last></author>
       <author><first>Leonardo</first><last>Ranaldi</last></author>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last></author>
       <pages>8006-8034</pages>
       <abstract>Instruction-Following Language Models (IFLMs) are promising and versatile tools for solving many downstream, information-seeking tasks. Given their success, there is an urgent need to have a shared resource to determine whether existing and new IFLMs are prone to produce biased language interactions. In this paper, we propose Prompt Association Test (P-AT): a new resource for testing the presence of social biases in IFLMs. P-AT stems from WEAT (Caliskan et al., 2017) and generalizes the notion of measuring social biases to IFLMs. Basically, we cast WEAT word tests in promptized classification tasks, and we associate a metric - the bias score. Our resource consists of 2310 prompts. We then experimented with several families of IFLMs discovering gender and race biases in all the analyzed models. We expect P-AT to be an important tool for quantifying bias across different dimensions and, therefore, for encouraging the creation of fairer IFLMs before their distortions have consequences in the real world.</abstract>
       <url hash="9910fde5">2023.findings-emnlp.539</url>
@@ -21687,7 +21687,7 @@
       <author><first>Yifan</first><last>Gao</last></author>
       <author><first>Zheng</first><last>Li</last></author>
       <author><first>Xian</first><last>Li</last></author>
-      <author><first>Chao</first><last>Zhang</last></author>
+      <author id="chao-zhang-tu"><first>Chao</first><last>Zhang</last></author>
       <author><first>Bing</first><last>Yin</last></author>
       <author><first>William</first><last>Wang</last></author>
       <author><first>Xiaodan</first><last>Zhu</last></author>
@@ -21775,7 +21775,7 @@
       <author><first>Bo</first><last>Wang</last></author>
       <author><first>Gary</first><last>Bader</last></author>
       <author><first>Kyle</first><last>Lo</last></author>
-      <author><first>Lucy</first><last>Wang</last></author>
+      <author id="lucy-lu-wang"><first>Lucy</first><last>Wang</last></author>
       <author><first>Arman</first><last>Cohan</last></author>
       <pages>8177-8199</pages>
       <abstract>Multi-document summarization (MDS) assumes a set of topic-related documents are provided as input. In practice, this document set is not always available; it would need to be retrieved given an information need, i.e. a question or topic statement, a setting we dub “open-domain’ MDS. We study this more challenging setting by formalizing the task and bootstrapping it using existing datasets, retrievers and summarizers. Via extensive automatic and human evaluation, we determine: (1) state-of-the-art summarizers suffer large reductions in performance when applied to open-domain MDS, (2) additional training in the open-domain setting can reduce this sensitivity to imperfect retrieval, and (3) summarizers are insensitive to the retrieval of duplicate documents and the order of retrieved documents, but highly sensitive to other errors, like the retrieval of irrelevant documents. Based on our results, we provide practical guidelines to enable future work on open-domain MDS, e.g. how to choose the number of retrieved documents to summarize. Our results suggest that new retrieval and summarization methods and annotated resources for training and evaluation are necessary for further progress in the open-domain setting.</abstract>
@@ -21802,7 +21802,7 @@
       <author><first>Mohammad</first><last>Taghizadeh</last></author>
       <author><first>Adil</first><last>Soubki</last></author>
       <author><first>Seyed</first><last>Mirroshandel</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>8221-8233</pages>
       <abstract>When we communicate with other humans, we do not simply generate a sequence of words. Rather, we use our cognitive state (beliefs, desires, intentions) and our model of the audience’s cognitive state to create utterances that affect the audience’s cognitive state in the intended manner. An important part of cognitive state is the common ground, which is the content the speaker believes, and the speaker believes the audience believes, and so on. While much attention has been paid to common ground in cognitive science, there has not been much work in natural language processing. In this paper, we introduce a new annotation and corpus to capture common ground. We then describe some initial experiments extracting propositions from dialog and tracking their status in the common ground from the perspective of each speaker.</abstract>
       <url hash="fa18c26f">2023.findings-emnlp.551</url>
@@ -21814,7 +21814,7 @@
       <author><first>Chenglei</first><last>Si</last></author>
       <author><first>Weijia</first><last>Shi</last></author>
       <author><first>Chen</first><last>Zhao</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <author><first>Jordan</first><last>Boyd-Graber</last></author>
       <pages>8234-8249</pages>
       <abstract>While recent large language models (LLMs) improve on various question answering (QA) datasets, it remains difficult for a single model to generalize across question types that require distinct reasoning abilities. We provide empirical evidence that state-of-the-art LLMs suffer from poor generalizability on reasoning types beyond those seen in the prompt. To remedy this, we propose a Mixture-of-Reasoning-Experts (MORE) framework that ensembles diverse specialized language models. We specialize the backbone language model with prompts optimized for different reasoning categories, including factual, multihop, mathematical, and commonsense reasoning. Our key insight is to leverage agreement among the specialized experts to select the best answer for each question, or to abstain from answering. This gives MORE higher accuracy than any single specialized model on a collection of 12 QA datasets from four reasoning types. Beyond generalizability, the interpretable design of MORE improves selective question answering results compared to baselines without incorporating inter-expert agreement. This framework is also more interpretable and useful to human consumers of QA outputs. Our human study confirms that presenting expert predictions and the answer selection process helps annotators more accurately calibrate when to trust the system’s output. We release all code and data to facilitate future work.</abstract>
@@ -21928,7 +21928,7 @@
       <author><first>Xiaohua</first><last>Wang</last></author>
       <author><first>Xiang</first><last>Zhou</last></author>
       <author><first>Xiaoqing</first><last>Zheng</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>8366-8375</pages>
       <abstract>Recently, much research in psychology has benefited from the advances in machine learning techniques. Some recent studies showed that it is possible to build automated scoring models for children’s mindreading. These models were trained on a set of manually-labeled question-response pairs, which were collected by asking children to answer one or two questions after a short story is told or a video clip is played. However, existing models did not take the features of the stories and video clips into account when scoring, which obviously will reduce the accuracy of the scoring models. Furthermore, considering that different psychological tests may contain the same questions, this approach cannot be extended to other related psychological test datasets. In this study, we proposed a multi-modal learning framework to leverage the features extracted from the stories and videos related to the questions being asked during the children’s mindreading evaluation. Experimental results show that the scores produced by the proposed models agree well with those graded by human experts, highlighting the potential of the proposed network architecture for practical automated children’s mindreading scoring systems.</abstract>
       <url hash="d29a64a2">2023.findings-emnlp.561</url>
@@ -21941,7 +21941,7 @@
       <author><first>Haotian</first><last>Ye</last></author>
       <author><first>Leonie</first><last>Weissweiler</last></author>
       <author><first>Renhao</first><last>Pei</last></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <pages>8376-8401</pages>
       <abstract>In comparative linguistics, colexification refers to the phenomenon of a lexical form conveying two or more distinct meanings. Existing work on colexification patterns relies on annotated word lists, limiting scalability and usefulness in NLP. In contrast, we identify colexification patterns of more than 2,000 concepts across 1,335 languages directly from an unannotated parallel corpus. We then propose simple and effective methods to build multilingual graphs from the colexification patterns: <b>ColexNet</b> and <b>ColexNet+</b>. ColexNet’s nodes are concepts and its edges are colexifications. In ColexNet+, concept nodes are additionally linked through intermediate nodes, each representing an ngram in one of 1,334 languages. We use ColexNet+ to train ColexNet+, high-quality multilingual embeddings that are well-suited for transfer learning. In our experiments, we first show that ColexNet achieves high recall on CLICS, a dataset of crosslingual colexifications. We then evaluate ColexNet+ on roundtrip translation, sentence retrieval and sentence classification and show that our embeddings surpass several transfer learning baselines. This demonstrates the benefits of using colexification as a source of information in multilingual NLP.</abstract>
       <url hash="2231b927">2023.findings-emnlp.562</url>
@@ -21951,7 +21951,7 @@
     <paper id="563">
       <title>Injecting structural hints: Using language models to study inductive biases in language learning</title>
       <author><first>Isabel</first><last>Papadimitriou</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <pages>8402-8413</pages>
       <abstract>Both humans and transformer language models are able to learn language without explicit structural supervision. What cognitive inductive biases make this learning possible? Here, we examine the effect of different inductive learning biases by actively controlling the inductive biases of artificial learners: we structurally bias models by pretraining on synthetic formally-structured data, and evaluate these structural biases by fine-tuning on three typologically-distant human languages: English, Japanese, and Basque. We investigate the effect on downstream language perplexity of three types of inductive bias: 1) recursive, hierarchical processing 2) unrestricted token-token dependencies that can’t be modeled by context-free grammars, and 3) a Zipfian power-law vocabulary distribution. We show that complex, non-context-free interactions between tokens form the best inductive biases. Our study leverages the capabilities of transformer models to run controlled language learning experiments that are not possible to run on humans, and surfaces hypotheses about the structures that facilitate language learning in both humans and machines.</abstract>
       <url hash="e8d86429">2023.findings-emnlp.563</url>
@@ -21982,7 +21982,7 @@
       <author><first>Haozhe</first><last>Zhang</last></author>
       <author><first>Zuyu</first><last>Zhao</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <author><first>Zhongyu</first><last>Wei</last></author>
       <pages>8429-8441</pages>
       <abstract>Representation learning on graph has been demonstrated to be a powerful tool for solving real-world problems. Text-attributed graph carries both semantic and structural information among different types of graphs. Existing works have paved the way for knowledge extraction of this type of data by leveraging language models or graph neural networks or combination of them. However, these works suffer from issues like underutilization of relationships between nodes or words or unaffordable memory cost. In this paper, we propose a Node Representation Update Pre-training Architecture based on Co-modeling Text and Graph (NRUP). In NRUP, we construct a hierarchical text-attributed graph that incorporates both original nodes and word nodes. Meanwhile, we apply four self-supervised tasks for different level of constructed graph. We further design the pre-training framework to update the features of nodes during training epochs. We conduct the experiment on the benchmark dataset ogbn-arxiv. Our method achieves outperformance compared to baselines, fully demonstrating its validity and generalization.</abstract>
@@ -22016,7 +22016,7 @@
       <author><first>Denis</first><last>McInerney</last></author>
       <author><first>Geoffrey</first><last>Young</last></author>
       <author><first>Jan-Willem</first><last>van de Meent</last></author>
-      <author><first>Byron</first><last>Wallace</last></author>
+      <author id="byron-c-wallace"><first>Byron</first><last>Wallace</last></author>
       <pages>8477-8494</pages>
       <abstract>We propose CHiLL (Crafting High-Level Latents), an approach for natural-language specification of features for linear models. CHiLL prompts LLMs with expert-crafted queries to generate interpretable features from health records. The resulting noisy labels are then used to train a simple linear classifier. Generating features based on queries to an LLM can empower physicians to use their domain expertise to craft features that are clinically meaningful for a downstream task of interest, without having to manually extract these from raw EHR. We are motivated by a real-world risk prediction task, but as a reproducible proxy, we use MIMIC-III and MIMIC-CXR data and standard predictive tasks (e.g., 30-day readmission) to evaluate this approach. We find that linear models using automatically extracted features are comparably performant to models using reference features, and provide greater interpretability than linear models using “Bag-of-Words” features. We verify that learned feature weights align well with clinical expectations.</abstract>
       <url hash="8771a209">2023.findings-emnlp.568</url>
@@ -22075,7 +22075,7 @@
       <title>Citance-Contextualized Summarization of Scientific Papers</title>
       <author><first>Shahbaz</first><last>Syed</last></author>
       <author><first>Ahmad Dawar</first><last>Hakimi</last></author>
-      <author><first>Khalid</first><last>Al-Khatib</last></author>
+      <author id="khalid-al-khatib"><first>Khalid</first><last>Al-Khatib</last></author>
       <author><first>Martin</first><last>Potthast</last></author>
       <pages>8551-8568</pages>
       <abstract>Current approaches to automatic summarization of scientific papers generate informative summaries in the form of abstracts. However, abstracts are not intended to show the relationship between a paper and the references cited in it. We propose a new contextualized summarization approach that can generate an informative summary conditioned on a given sentence containing the citation of a reference (a so-called “citance”). This summary outlines content of the cited paper relevant to the citation location. Thus, our approach extracts and models the citances of a paper, retrieves relevant passages from cited papers, and generates abstractive summaries tailored to each citance. We evaluate our approach using **Webis-Context-SciSumm-2023**, a new dataset containing 540K computer science papers and 4.6M citances therein.</abstract>
@@ -22118,7 +22118,7 @@
       <author><first>Wenhao</first><last>Li</last></author>
       <author><first>Noel</first><last>Codella</last></author>
       <author><first>Kai-Wei</first><last>Chang</last></author>
-      <author><first>Shih-Fu</first><last>Chang</last></author>
+      <author id="shih-fu-chang"><first>Shih-Fu</first><last>Chang</last></author>
       <pages>8598-8617</pages>
       <abstract>Vision-language (VL) understanding tasks evaluate models’ comprehension of complex visual scenes through multiple-choice questions. However, we have identified two dataset biases that models can exploit as shortcuts to resolve various VL tasks correctly without proper understanding. The first type of dataset bias is Unbalanced Matching bias, where the correct answer overlaps the question and image more than the incorrect answers. The second type of dataset bias is Distractor Similarity bias, where incorrect answers are overly dissimilar to the correct answer but significantly similar to other incorrect answers within the same sample. To address these dataset biases, we first propose Adversarial Data Synthesis (ADS) to generate synthetic training and debiased evaluation data. We then introduce Intra-sample Counterfactual Training (ICT) to assist models in utilizing the synthesized training data, particularly the counterfactual data, via focusing on intra-sample differentiation. Extensive experiments demonstrate the effectiveness of ADS and ICT in consistently improving model performance across different benchmarks, even in domain-shifted scenarios.</abstract>
       <url hash="73da184d">2023.findings-emnlp.576</url>
@@ -22127,7 +22127,7 @@
     </paper>
     <paper id="577">
       <title>The Intended Uses of Automated Fact-Checking Artefacts: Why, How and Who</title>
-      <author><first>Michael</first><last>Schlichtkrull</last></author>
+      <author id="michael-schlichtkrull"><first>Michael</first><last>Schlichtkrull</last></author>
       <author><first>Nedjma</first><last>Ousidhoum</last></author>
       <author><first>Andreas</first><last>Vlachos</last></author>
       <pages>8618-8642</pages>
@@ -22224,7 +22224,7 @@
       <author><first>Tianlong</first><last>Li</last></author>
       <author><first>Longtao</first><last>Huang</last></author>
       <author><first>Xiaoqing</first><last>Zheng</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>8734-8746</pages>
       <abstract>Prompt tuning has been proven to be successful on various tasks by incorporating a small number of trainable parameters while freezing large pre-trained language models (PLMs). However, it is still unsettled how to generate more proper prompts for any individual examples and how to extend prompt tuning to multi-task learning scenarios by leveraging cross-task features. To address these challenges, we propose a token-wise prompt tuning (TPT), in which a bank of finer-grained soft prompt tokens is built for multi-task learning by memory network. The tokens are retrieved from the bank against an input example and assembled to an instance-dependent prompt. Extensive experimental results on 14 datasets demonstrated that the models enhanced by our TPT performed far better than full parameter fine-tuned models and achieved state-of-the-art by tuning only 0.035% parameters.</abstract>
       <url hash="29bdd9d3">2023.findings-emnlp.584</url>
@@ -22234,7 +22234,7 @@
     <paper id="585">
       <title>A Rewriting Approach for Gender Inclusivity in <fixed-case>P</fixed-case>ortuguese</title>
       <author><first>Leonor</first><last>Veloso</last></author>
-      <author><first>Luisa</first><last>Coheur</last></author>
+      <author id="luisa-coheur"><first>Luisa</first><last>Coheur</last></author>
       <author><first>Rui</first><last>Ribeiro</last></author>
       <pages>8747-8759</pages>
       <abstract>In recent years, there has been a notable rise in research interest regarding the integration of gender-inclusive and gender-neutral language in natural language processing models. A specific area of focus that has gained practical and academic significant interest is gender-neutral rewriting, which involves converting binary-gendered text to its gender-neutral counterpart. However, current approaches to gender-neutral rewriting for gendered languages tend to rely on large datasets, which may not be an option for languages with fewer resources, such as Portuguese. In this paper, we present a rule-based and a neural-based tool for gender-neutral rewriting for Portuguese, a heavily gendered Romance language whose morphology creates different challenges from the ones tackled by other gender-neutral rewriters. Our neural approach relies on fine-tuning large multilingual machine translation models on examples generated by the rule-based model. We evaluate both models on texts from different sources and contexts. We provide the first Portuguese dataset explicitly containing gender-neutral language and neopronouns, as well as a manually annotated golden collection of 500 sentences that allows for evaluation of future work.</abstract>
@@ -22317,9 +22317,9 @@
       <author><first>Kundan</first><last>Krishna</last></author>
       <author><first>Prakhar</first><last>Gupta</last></author>
       <author><first>Sanjana</first><last>Ramprasad</last></author>
-      <author><first>Byron</first><last>Wallace</last></author>
-      <author><first>Jeffrey</first><last>Bigham</last></author>
-      <author><first>Zachary</first><last>Lipton</last></author>
+      <author id="byron-c-wallace"><first>Byron</first><last>Wallace</last></author>
+      <author id="jeffrey-p-bigham"><first>Jeffrey</first><last>Bigham</last></author>
+      <author id="zachary-c-lipton"><first>Zachary</first><last>Lipton</last></author>
       <pages>8826-8845</pages>
       <abstract>While the NLP community has produced numerous summarization benchmarks, none provide the rich annotations required to simultaneously address many important problems related to control and reliability. We introduce a Wikipedia-derived benchmark, complemented by a rich set of crowd-sourced annotations, that supports 8 interrelated tasks: (i) extractive summarization; (ii) abstractive summarization; (iii) topic-based summarization; (iv) compressing selected sentences into a one-line summary; (v) surfacing evidence for a summary sentence; (vi) predicting the factual accuracy of a summary sentence; (vii) identifying unsubstantiated spans in a summary sentence; (viii) correcting factual errors in summaries. We compare various methods on this benchmark and discover that on multiple tasks, moderately-sized fine-tuned models consistently outperform much larger few-shot prompted language models. For factuality-related tasks, we also evaluate existing heuristics to create training data and find that training on them results in worse performance than training on <tex-math>20\times</tex-math> less human-labeled data. Our articles draw from 6 domains, facilitating cross-domain analysis. On some tasks, the amount of training data matters more than the domain where it comes from, while for other tasks training specifically on data from the target domain, even if limited, is more beneficial.</abstract>
       <url hash="003b801f">2023.findings-emnlp.592</url>
@@ -22331,7 +22331,7 @@
       <author><first>Chayan</first><last>Sarkar</last></author>
       <author><first>Avik</first><last>Mitra</last></author>
       <author><first>Pradip</first><last>Pramanick</last></author>
-      <author><first>Tapas</first><last>Nayak</last></author>
+      <author id="tapas-nayak"><first>Tapas</first><last>Nayak</last></author>
       <pages>8846-8857</pages>
       <abstract>Natural language serves as the primary mode of communication when an intelligent agent with a physical presence engages with human beings. While a plethora of research focuses on natural language understanding (NLU), encompassing endeavors such as sentiment analysis, intent prediction, question answering, and summarization, the scope of NLU directed at situations necessitating tangible actions by an embodied agent remains limited. The inherent ambiguity and incompleteness inherent in natural language present challenges for intelligent agents striving to decipher human intention. To tackle this predicament head-on, we introduce a novel system known as task and argument grounding for Embodied agents (tagE). At its core, our system employs an inventive neural network model designed to extract a series of tasks from complex task instructions expressed in natural language. Our proposed model adopts an encoder-decoder framework enriched with nested decoding to effectively extract tasks and their corresponding arguments from these intricate instructions. These extracted tasks are then mapped (or grounded) to the robot’s established collection of skills, while the arguments find grounding in objects present within the environment. To facilitate the training and evaluation of our system, we have curated a dataset featuring complex instructions. The results of our experiments underscore the prowess of our approach, as it outperforms robust baseline models.</abstract>
       <url hash="d2d983e5">2023.findings-emnlp.593</url>
@@ -22755,7 +22755,7 @@
     <paper id="627">
       <title>Unsupervised Lexical Simplification with Context Augmentation</title>
       <author><first>Takashi</first><last>Wada</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Jey Han</first><last>Lau</last></author>
       <pages>9368-9379</pages>
       <abstract>We propose a new unsupervised lexical simplification method that uses only monolingual data and pre-trained language models. Given a target word and its context, our method generates substitutes based on the target context and also additional contexts sampled from monolingual data. We conduct experiments in English, Portuguese, and Spanish on the TSAR-2022 shared task, and show that our model substantially outperforms other unsupervised systems across all languages. We also establish a new state-of-the-art by ensembling our model with GPT-3.5. Lastly, we evaluate our model on the SWORDS lexical substitution data set, achieving a state-of-the-art result.</abstract>
@@ -22765,7 +22765,7 @@
     </paper>
     <paper id="628">
       <title>m<fixed-case>L</fixed-case>ong<fixed-case>T</fixed-case>5: A Multilingual and Efficient Text-To-Text Transformer for Longer Sequences</title>
-      <author><first>David</first><last>Uthus</last></author>
+      <author id="david-c-uthus"><first>David</first><last>Uthus</last></author>
       <author><first>Santiago</first><last>Ontanon</last></author>
       <author><first>Joshua</first><last>Ainslie</last></author>
       <author><first>Mandy</first><last>Guo</last></author>
@@ -22925,7 +22925,7 @@
       <author><first>Yasheng</first><last>Wang</last></author>
       <author><first>Wai-Chung</first><last>Kwan</last></author>
       <author><first>Irwin</first><last>King</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <pages>9556-9569</pages>
       <abstract>Open-domain dialogue system usually requires different sources of knowledge to generate more informative and evidential responses. However, existing knowledge-grounded dialogue systems either focus on a single knowledge source or overlook the dependency between multiple sources of knowledge, which may result in generating inconsistent or even paradoxical responses. To incorporate multiple knowledge sources and dependencies between them, we propose SAFARI, a novel framework that leverages the exceptional capabilities of large language models (LLMs) in planning, understanding, and incorporating under both supervised and unsupervised settings. Specifically, SAFARI decouples the knowledge grounding into multiple sources and response generation, which allows easy extension to various knowledge sources including the possibility of not using any sources. To study the problem, we construct a personalized knowledge-grounded dialogue dataset Knowledge Behind Persona (KBP), which is the first to consider the dependency between persona and implicit knowledge. Experimental results on the KBP dataset demonstrate that the SAFARI framework can effectively produce persona-consistent and knowledge-enhanced responses.</abstract>
       <url hash="c75ba749">2023.findings-emnlp.641</url>
@@ -22950,7 +22950,7 @@
     <paper id="643">
       <title>Conversational Recommender System and Large Language Model Are Made for Each Other in <fixed-case>E</fixed-case>-commerce Pre-sales Dialogue</title>
       <author><first>Yuanxing</first><last>Liu</last></author>
-      <author><first>Weinan</first><last>Zhang</last></author>
+      <author id="weinan-zhang"><first>Weinan</first><last>Zhang</last></author>
       <author><first>Yifan</first><last>Chen</last></author>
       <author><first>Yuchi</first><last>Zhang</last></author>
       <author><first>Haopeng</first><last>Bai</last></author>
@@ -22982,7 +22982,7 @@
       <author id="huy-nguyen-bcl"><first>Huy</first><last>Nguyen</last></author>
       <author><first>Chien</first><last>Nguyen</last></author>
       <author><first>Linh</first><last>Ngo</last></author>
-      <author><first>Anh</first><last>Luu</last></author>
+      <author id="luu-anh-tuan"><first>Anh</first><last>Luu</last></author>
       <author><first>Thien</first><last>Nguyen</last></author>
       <pages>9621-9629</pages>
       <abstract>Continual Relation Extraction (CRE) aims to continuously train a model to learn new relations while preserving its ability on previously learned relations. Similar to other continual learning problems, in CRE, models experience representation shift, where learned deep space changes in the continual learning process, which leads to the downgrade in the performance of the old tasks. In this work, we will provide an insight into this phenomenon under the spectral viewpoint. Our key argument is that, for each class shape, if its eigenvectors (or spectral components) do not change much, the shape is well-preserved. We then conduct a spectral experiment and show that, for the shape of each class, the eigenvectors with larger eigenvalue are more preserved after learning new tasks which means these vectors are good at keeping class shapes. Based on this analysis, we propose a simple yet effective class-wise regularization that improve the eigenvalues in the representation learning. We observe that our proposed regularization leads to an increase in the eigenvalues. Extensive experiments on two benchmark datasets, FewRel and TACRED, show the effectiveness of our proposed method with significant improvement in performance compared to the state-of-the-art models. Further analyses also verify our hypothesis that larger eigenvalues lead to better performance and vice versa.</abstract>
@@ -23014,7 +23014,7 @@
       <author><first>Ofir</first><last>Arviv</last></author>
       <author><first>Yoav</first><last>Katz</last></author>
       <author><first>Noam</first><last>Slonim</last></author>
-      <author><first>Liat</first><last>Ein-Dor</last></author>
+      <author id="liat-ein-dor"><first>Liat</first><last>Ein-Dor</last></author>
       <pages>9647-9676</pages>
       <abstract>Topical Text Classification (TTC) is an ancient, yet timely research area in natural language processing, with many practical applications. The recent dramatic advancements in large LMs raise the question of how well these models can perform in this task in a zero-shot scenario. Here, we share a first comprehensive study, comparing the zero-shot performance of a variety of LMs over TTC23, a large benchmark collection of 23 publicly available TTC datasets, covering a wide range of domains and styles. In addition, we leverage this new TTC benchmark to create LMs that are specialized in TTC, by fine-tuning these LMs over a subset of the datasets and evaluating their performance over the remaining, held-out datasets. We show that the TTC-specialized LMs obtain the top performance on our benchmark, by a significant margin. Our code and model are made available for the community. We hope that the results presented in this work will serve as a useful guide for practitioners interested in topical text classification.</abstract>
       <url hash="5959fc0c">2023.findings-emnlp.647</url>
@@ -23044,7 +23044,7 @@
       <author><first>Shangqing</first><last>Liu</last></author>
       <author><first>Xiaofei</first><last>Xie</last></author>
       <author><first>Yves</first><last>Le Traon</last></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <pages>9706-9716</pages>
       <abstract>Existing methods for generating adversarial code examples face several challenges: limted availability of substitute variables, high verification costs for these substitutes, and the creation of adversarial samples with noticeable perturbations. To address these concerns, our proposed approach, RNNS, uses a search seed based on historical attacks to find potential adversarial substitutes. Rather than directly using the discrete substitutes, they are mapped to a continuous vector space using a pre-trained variable name encoder. Based on the vector representation, RNNS predicts and selects better substitutes for attacks. We evaluated the performance of RNNS across six coding tasks encompassing three programming languages: Java, Python, and C. We employed three pre-trained code models (CodeBERT, GraphCodeBERT, and CodeT5) that resulted in a cumulative of 18 victim models. The results demonstrate that RNNS outperforms baselines in terms of ASR and QT. Furthermore, the perturbation of adversarial examples introduced by RNNS is smaller compared to the baselines in terms of the number of replaced variables and the change in variable length. Lastly, our experiments indicate that RNNS is efficient in attacking defended models and can be employed for adversarial training.</abstract>
       <url hash="9d98d1e4">2023.findings-emnlp.649</url>
@@ -23164,7 +23164,7 @@
     </paper>
     <paper id="659">
       <title>Auto-Instruct: Automatic Instruction Generation and Ranking for Black-Box Language Models</title>
-      <author id="zhihan-zhang"><first>Zhihan</first><last>Zhang</last></author>
+      <author><first>Zhihan</first><last>Zhang</last></author>
       <author><first>Shuohang</first><last>Wang</last></author>
       <author><first>Wenhao</first><last>Yu</last></author>
       <author><first>Yichong</first><last>Xu</last></author>
@@ -23348,7 +23348,7 @@
       <author><first>Lu</first><last>Xiang</last></author>
       <author><first>Yang</first><last>Zhao</last></author>
       <author><first>Yu</first><last>Zhou</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>10043-10053</pages>
       <abstract>Document image translation (DIT) aims to translate text embedded in images from one language to another. It is a challenging task that needs to understand visual layout with text semantics simultaneously. However, existing methods struggle to capture the crucial visual layout in real-world complex document images. In this work, we make the first attempt to incorporate layout knowledge into DIT in an end-to-end way. Specifically, we propose a novel Layout-aware end-to-end Document Image Translation (LayoutDIT) with multi-step conductive decoder. A layout-aware encoder is first introduced to model visual layout relations with raw OCR results. Then a novel multi-step conductive decoder is unified with hidden states conduction across three step-decoders to achieve the document translation step by step. Benefiting from the layout-aware end-to-end joint training, our LayoutDIT outperforms state-of-the-art methods with better parameter efficiency. Besides, we create a new multi-domain document image translation dataset to validate the model’s generalization. Extensive experiments show that LayoutDIT has a good generalization in diverse and complex layout scenes.</abstract>
       <url hash="556f2226">2023.findings-emnlp.673</url>
@@ -23358,7 +23358,7 @@
     <paper id="674">
       <title>Balaur: Language Model Pretraining with Lexical Semantic Relations</title>
       <author><first>Andrei</first><last>Mircea</last></author>
-      <author><first>Jackie</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie</first><last>Cheung</last></author>
       <pages>10054-10070</pages>
       <abstract>Lexical semantic relations (LSRs) characterize meaning relationships between words and play an important role in systematic generalization on lexical inference tasks. Notably, several tasks that require knowledge of hypernymy still pose a challenge for pretrained language models (LMs) such as BERT, underscoring the need to better align their linguistic behavior with our knowledge of LSRs. In this paper, we propose Balaur, a model that addresses this challenge by modeling LSRs directly in the LM’s hidden states throughout pretraining. Motivating our approach is the hypothesis that the internal representations of LMs can provide an interface to their observable linguistic behavior, and that by controlling one we can influence the other. We validate our hypothesis and demonstrate that Balaur generally improves the performance of large transformer-based LMs on a comprehensive set of hypernymy-informed tasks, as well as on the original LM objective. Code and data are made available at https://github.com/mirandrom/balaur</abstract>
       <url hash="36faf2b6">2023.findings-emnlp.674</url>
@@ -23418,8 +23418,8 @@
       <author><first>Hila</first><last>Gonen</last></author>
       <author><first>Srini</first><last>Iyer</last></author>
       <author><first>Terra</first><last>Blevins</last></author>
-      <author><first>Noah</first><last>Smith</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="noah-a-smith"><first>Noah</first><last>Smith</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>10136-10148</pages>
       <abstract>Language models can be prompted to perform a wide variety of tasks with zero- and few-shot in-context learning. However, performance varies significantly with the choice of prompt, and we do not yet understand why this happens. In this paper, we analyze the factors that contribute to this variance and establish a new empirical hypothesis: the performance of a prompt is predicted by the extent to which the model is familiar with the language it contains. Over a wide range of tasks, we show that the lower the perplexity of the prompt, the better it is able to perform the task, when considering reasonable prompts that are related to it. As part of our analysis, we also devise a method to automatically extend a small seed set of manually written prompts by paraphrasing with GPT3 and backtranslation. This larger set allows us to verify that perplexity is a strong predictor of the success of a prompt and we show that the lowest perplexity prompts are consistently effective.</abstract>
       <url hash="b32fe2ea">2023.findings-emnlp.679</url>
@@ -23493,7 +23493,7 @@
       <title>Hierarchical Prompting Assists Large Language Model on Web Navigation</title>
       <author><first>Robert</first><last>Lo</last></author>
       <author><first>Abishek</first><last>Sridhar</last></author>
-      <author><first>Frank</first><last>Xu</last></author>
+      <author id="frank-f-xu"><first>Frank</first><last>Xu</last></author>
       <author><first>Hao</first><last>Zhu</last></author>
       <author><first>Shuyan</first><last>Zhou</last></author>
       <pages>10217-10244</pages>
@@ -23542,7 +23542,7 @@
       <author><first>Jingting</first><last>Ye</last></author>
       <author><first>Tao</first><last>Gui</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>10262-10274</pages>
       <abstract>Reports of human-like behaviors in foundation models are growing, with psychological theories providing enduring tools to investigate these behaviors. However, current research tends to directly apply these human-oriented tools without verifying the faithfulness of their outcomes. In this paper, we introduce a framework, RealBehavior, which is designed to characterize the humanoid behaviors of models faithfully. Beyond simply measuring behaviors, our framework assesses the faithfulness of results based on reproducibility, internal and external consistency, and generalizability. Our findings suggest that a simple application of psychological tools cannot faithfully characterize all human-like behaviors. Moreover, we discuss the impacts of aligning models with human and social values, arguing for the necessity of diversifying alignment objectives to prevent the creation of models with restricted characteristics.</abstract>
       <url hash="609d7e8a">2023.findings-emnlp.688</url>
@@ -23568,7 +23568,7 @@
       <author><first>Do June</first><last>Min</last></author>
       <author><first>Veronica</first><last>Perez-Rosas</last></author>
       <author><first>Ken</first><last>Resnicow</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>10289-10302</pages>
       <abstract>Reflective listening is a fundamental skill that counselors must acquire to achieve proficiency in motivational interviewing (MI). It involves responding in a manner that acknowledges and explores the meaning of what the client has expressed in the conversation. In this work, we introduce the task of counseling response rewriting, which transforms non-reflective statements into reflective responses. We introduce VERVE, a template-based rewriting system with paraphrase-augmented training and adaptive template updating. VERVE first creates a template by identifying and filtering out tokens that are not relevant to reflections and constructs a reflective response using the template. Paraphrase-augmented training allows the model to learn less-strict fillings of masked spans, and adaptive template updating helps discover effective templates for rewriting without significantly removing the original content. Using both automatic and human evaluations, we compare our method against text rewriting baselines and show that our framework is effective in turning non-reflective statements into more reflective responses while achieving a good content preservation-reflection style trade-off.</abstract>
       <url hash="f0736a74">2023.findings-emnlp.690</url>
@@ -23580,7 +23580,7 @@
       <author><first>Yile</first><last>Wang</last></author>
       <author><first>Peng</first><last>Li</last></author>
       <author><first>Maosong</first><last>Sun</last></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <pages>10303-10315</pages>
       <abstract>Large language models (LLMs) have shown superior performance without task-specific fine-tuning. Despite the success, the knowledge stored in the parameters of LLMs could still be incomplete and difficult to update due to the computational costs. As complementary, retrieval-based methods can offer non-parametric world knowledge and improve the performance on tasks such as question answering. However, we find that the retrieved knowledge does not always help and even has a negative impact on original responses occasionally. To better make use of both internal knowledge and external world knowledge, we investigate eliciting the model’s ability to recognize what they know and do not know (which is also called “self-knowledge”) and propose Self-Knowledge guided Retrieval augmentation (SKR), a simple yet effective method which can let LLMs refer to the questions they have previously encountered and adaptively call for external resources when dealing with new questions. We evaluate SKR on multiple datasets and demonstrate that it outperforms chain-of-thought based and fully retrieval-based methods by using either InstructGPT or ChatGPT.</abstract>
       <url hash="a999a538">2023.findings-emnlp.691</url>
@@ -23605,7 +23605,7 @@
       <author><first>Changwoo</first><last>Chun</last></author>
       <author><first>SongEun</first><last>Lee</last></author>
       <author><first>Jaehyung</first><last>Seo</last></author>
-      <author><first>Heuiseok</first><last>Lim</last></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last></author>
       <pages>10334-10343</pages>
       <abstract>Large language models (LLMs) have demonstrated impressive capabilities in natural language processing. However, their ability to establish causal relationships, particularly in the context of temporal interventions and language hallucinations, remains challenging. This paper presents <b>CReTIHC</b>, a novel dataset designed to test and enhance the causal reasoning abilities of LLMs. The dataset is constructed using a unique approach that incorporates elements of verbal hallucinations and temporal interventions through the reengineering of existing causal inference datasets. This transformation creates complex scenarios that push LLMs to critically evaluate the information presented and identify cause-and-effect relationships. The CReTIHC dataset serves as a pioneering tool for improving LLM’s causal inference capabilities, paving the way for a more nuanced understanding of causal relationships in natural language processing (NLP) tasks. The whole dataset is publicly accessible at: (https://github.com/ChangwooChun/CReTIHC)</abstract>
       <url hash="ce39d040">2023.findings-emnlp.693</url>
@@ -23676,7 +23676,7 @@
       <author><first>Rosella</first><last>Galindo Esparza</last></author>
       <author><first>Kehai</first><last>Chen</last></author>
       <author><first>Yang</first><last>Xiang</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <author><first>Min</first><last>Zhang</last></author>
       <pages>10413-10420</pages>
       <abstract>In the era of large models, low-resource question-answering tasks lag, emphasizing the importance of data augmentation - a key research avenue in natural language processing. The main challenges include leveraging the large model’s internal knowledge for data augmentation, determining which QA data component - the question, passage, or answer - benefits most from augmentation, and retaining consistency in the augmented content without inducing excessive noise. To tackle these, we introduce PQQ, an innovative approach for question data augmentation consisting of Prompt Answer, Question Generation, and Question Filter. Our experiments reveal that ChatGPT underperforms on the experimental data, yet our PQQ method excels beyond existing augmentation strategies. Further, its universal applicability is validated through successful tests on high-resource QA tasks like SQUAD1.1 and TriviaQA.</abstract>
@@ -23720,7 +23720,7 @@
     <paper id="703">
       <title>Investigating the Effect of Pre-finetuning <fixed-case>BERT</fixed-case> Models on <fixed-case>NLI</fixed-case> Involving Presuppositions</title>
       <author><first>Jad</first><last>Kabbara</last></author>
-      <author><first>Jackie</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie</first><last>Cheung</last></author>
       <pages>10482-10494</pages>
       <abstract>We explore the connection between presupposition, discourse and sarcasm and propose to leverage that connection in a transfer learning scenario with the goal of improving the performance of NLI models on cases involving presupposition. We exploit advances in training transformer-based models that show that pre-finetuning—–i.e., finetuning the model on an additional task or dataset before the actual finetuning phase—–can help these models, in some cases, achieve a higher performance on a given downstream task. Building on those advances and that aforementioned connection, we propose pre-finetuning NLI models on carefully chosen tasks in an attempt to improve their performance on NLI cases involving presupposition. We notice that, indeed, pre-finetuning on those tasks leads to performance improvements. Furthermore, we run several diagnostic tests to understand whether these gains are merely a byproduct of additional training data. The results show that, while additional training data seems to be helping on its own in some cases, the choice of the tasks plays a role in the performance improvements.</abstract>
       <url hash="ed2d748b">2023.findings-emnlp.703</url>
@@ -23830,7 +23830,7 @@
       <author><first>Liang</first><last>Chen</last></author>
       <author><first>Hongru</first><last>Wang</last></author>
       <author><first>Wenqiang</first><last>Lei</last></author>
-      <author><first>Tat-Seng</first><last>Chua</last></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last></author>
       <pages>10602-10621</pages>
       <abstract>Conversational systems based on Large Language Models (LLMs), such as ChatGPT, show exceptional proficiency in context understanding and response generation. However, they still possess limitations, such as failing to ask clarifying questions to ambiguous queries or refuse users’ unreasonable requests, both of which are considered as key aspects of a conversational agent’s proactivity. This raises the question of whether LLM-based conversational systems are equipped to handle proactive dialogue problems. In this work, we conduct a comprehensive analysis of LLM-based conversational systems, specifically focusing on three key aspects of proactive dialogues: clarification, target-guided, and non-collaborative dialogues. To trigger the proactivity of LLMs, we propose the Proactive Chain-of-Thought prompting scheme, which augments LLMs with the goal planning capability over descriptive reasoning chains. Empirical findings are discussed to promote future studies on LLM-based proactive dialogue systems.</abstract>
       <url hash="236dc7ca">2023.findings-emnlp.711</url>
@@ -23841,7 +23841,7 @@
       <title>Ecologically Valid Explanations for Label Variation in <fixed-case>NLI</fixed-case></title>
       <author><first>Nan-Jiang</first><last>Jiang</last></author>
       <author><first>Chenhao</first><last>Tan</last></author>
-      <author><first>Marie-Catherine</first><last>de Marneffe</last></author>
+      <author id="marie-catherine-de-marneffe"><first>Marie-Catherine</first><last>de Marneffe</last></author>
       <pages>10622-10633</pages>
       <abstract>Human label variation, or annotation disagreement, exists in many natural language processing (NLP) tasks, including natural language inference (NLI). To gain direct evidence of how NLI label variation arises, we build LiveNLI, an English dataset of 1,415 ecologically valid explanations (annotators explain the NLI labels they chose) for 122 MNLI items (at least 10 explanations per item). The LiveNLI explanations confirm that people can systematically vary on their interpretation and highlight within-label variation: annotators sometimes choose the same label for different reasons. This suggests that explanations are crucial for navigating label interpretations in general. We few-shot prompt large language models to generate explanations but the results are inconsistent: they sometimes produces valid and informative explanations, but it also generates implausible ones that do not support the label, highlighting directions for improvement.</abstract>
       <url hash="05472d6b">2023.findings-emnlp.712</url>
@@ -23880,7 +23880,7 @@
       <author><first>Rui</first><last>Zheng</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
       <author><first>Tao</first><last>Gui</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>10658-10671</pages>
       <abstract>Benefiting from massive corpora and advanced hardware, large language models (LLMs) exhibit remarkable capabilities in language understanding and generation. However, their performance degrades in scenarios where multiple tasks are encountered sequentially, also known as catastrophic forgetting. In this paper, we propose orthogonal low-rank adaptation (O-LoRA), a simple and efficient approach for continual learning in language models, effectively mitigating catastrophic forgetting while learning new tasks. Specifically, O-LoRA learns tasks in different (low-rank) vector subspaces that are kept orthogonal to each other in order to minimize interference. Our method induces only marginal additional parameter costs and requires no user data storage for replay. Experimental results on continual learning benchmarks show that our method outperforms state-of-the-art methods. Furthermore, compared to previous approaches, our method excels in preserving the generalization ability of LLMs on unseen tasks.</abstract>
       <url hash="9dda0a07">2023.findings-emnlp.715</url>
@@ -23905,7 +23905,7 @@
       <author><first>Yufan</first><last>Wu</last></author>
       <author><first>Yinghui</first><last>He</last></author>
       <author><first>Yilin</first><last>Jia</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <author><first>Yulong</first><last>Chen</last></author>
       <author><first>Naihao</first><last>Deng</last></author>
       <pages>10691-10706</pages>
@@ -23918,7 +23918,7 @@
       <title>Image and Text: Fighting the same Battle? Super Resolution Learning for Imbalanced Text Classification</title>
       <author><first>Romain</first><last>Meunier</last></author>
       <author><first>Benamara</first><last>Farah</last></author>
-      <author><first>Véronique</first><last>Moriceau</last></author>
+      <author id="veronique-moriceau"><first>Véronique</first><last>Moriceau</last></author>
       <author><first>Patricia</first><last>Stolf</last></author>
       <pages>10707-10720</pages>
       <abstract>In this paper, we propose SRL4NLP, a new approach for data augmentation by drawing an analogy between image and text processing: Super-resolution learning. This method is based on using high-resolution images to overcome the problem of low resolution images. While this technique is a common usage in image processing when images have a low resolution or are too noisy, it has never been used in NLP. We therefore propose the first adaptation of this method for text classification and evaluate its effectiveness on urgency detection from tweets posted in crisis situations, a very challenging task where messages are scarce and highly imbalanced. We show that this strategy is efficient when compared to competitive state-of-the-art data augmentation techniques on several benchmarks datasets in two languages.</abstract>
@@ -23968,8 +23968,8 @@
       <author><first>Jon</first><last>Campos</last></author>
       <author><first>Iker</first><last>García-Ferrero</last></author>
       <author><first>Julen</first><last>Etxaniz</last></author>
-      <author><first>Oier Lopez</first><last>de Lacalle</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="oier-lopez-de-lacalle"><first>Oier Lopez</first><last>de Lacalle</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <pages>10776-10787</pages>
       <abstract>In this position paper we argue that the classical evaluation on Natural Language Processing (NLP) tasks using annotated benchmarks is in trouble. The worst kind of data contamination happens when a Large Language Model (LLM) is trained on the test split of a benchmark, and then evaluated in the same benchmark. The extent of the problem is unknown, as it is not straightforward to measure. Contamination causes an overestimation of the performance of a contaminated model in a target benchmark and associated task with respect to their non-contaminated counterparts. The consequences can be very harmful, with wrong scientific conclusions being published while other correct ones are discarded. This position paper defines different levels of data contamination and argues for a community effort, including the development of automatic and semi-automatic measures to detect when data from a benchmark was exposed to a model, and suggestions for flagging papers with conclusions that are compromised by data contamination.</abstract>
       <url hash="ef0461c9">2023.findings-emnlp.722</url>
@@ -23992,7 +23992,7 @@
       <title>Argument mining as a multi-hop generative machine reading comprehension task</title>
       <author><first>Boyang</first><last>Liu</last></author>
       <author><first>Viktor</first><last>Schlegel</last></author>
-      <author><first>Riza</first><last>Batista-Navarro</last></author>
+      <author id="riza-theresa-batista-navarro"><first>Riza</first><last>Batista-Navarro</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
       <pages>10846-10858</pages>
       <abstract>Argument mining (AM) is a natural language processing task that aims to generate an argumentative graph given an unstructured argumentative text. An argumentative graph that consists of argumentative components and argumentative relations contains completed information of an argument and exhibits the logic of an argument. As the argument structure of an argumentative text can be regarded as an answer to a “why” question, the whole argument structure is therefore similar to the “chain of thought” concept, i.e., the sequence of ideas that lead to a specific conclusion for a given argument (Wei et al., 2022). For argumentative texts in the same specific genre, the “chain of thought” of such texts is usually similar, i.e., in a student essay, there is usually a major claim supported by several claims, and then a number of premises which are related to the claims are included (Eger et al., 2017). In this paper, we propose a new perspective which transfers the argument mining task into a multi-hop reading comprehension task, allowing the model to learn the argument structure as a “chain of thought”. We perform a comprehensive evaluation of our approach on two AM benchmarks and find that we surpass SOTA results. A detailed analysis shows that specifically the “chain of thought” information is helpful for the argument mining task.</abstract>
@@ -24027,7 +24027,7 @@
       <author><first>Yuanhang</first><last>Tang</last></author>
       <author><first>Yawen</first><last>Ouyang</last></author>
       <author><first>Zhen</first><last>Wu</last></author>
-      <author><first>Xinyu</first><last>Dai</last></author>
+      <author id="xinyu-dai"><first>Xinyu</first><last>Dai</last></author>
       <pages>10886-10901</pages>
       <abstract>Several recent studies have shown that advanced models for natural language understanding (NLU) are prone to capture biased features that are independent of the task but spuriously correlated to labels. Such models often perform well on in-distribution (ID) datasets but fail to generalize to out-of-distribution (OOD) datasets. Existing solutions can be separated into two orthogonal approaches: model-centric methods and data-centric methods. Model-centric methods improve OOD performance at the expense of ID performance. Data-centric strategies usually boost both of them via data-level manipulations such as generative data augmentation. However, the high cost of fine-tuning a generator to produce valid samples limits the potential of such approaches. To address this issue, we propose PDD, a framework that conducts training-free Perturbations on samples containing biased features to Debias NLU Datasets. PDD works by iteratively conducting perturbations via pre-trained mask language models (MLM). PDD exhibits the advantage of low cost by adopting a training-free perturbation strategy and further improves the label consistency by utilizing label information during perturbations. Extensive experiments demonstrate that PDD shows competitive performance with previous state-of-the-art debiasing strategies. When combined with the model-centric debiasing methods, PDD establishes a new state-of-the-art.</abstract>
       <url hash="b20c2d37">2023.findings-emnlp.726</url>
@@ -24123,7 +24123,7 @@
       <author><first>Hila</first><last>Gonen</last></author>
       <author><first>Ari</first><last>Holtzman</last></author>
       <author><first>Yulia</first><last>Tsvetkov</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>10994-11005</pages>
       <abstract>Large language models can perform downstream tasks in a zero-shot fashion, given natural language prompts that specify the desired behavior. Such prompts are typically hand engineered, but can also be learned with gradient-based methods from labeled data. However, it is underexplored what factors make the prompts effective, especially when the prompts are in natural language. In this paper, we investigate common attributes shared by effective prompts in classification problems. We first propose a human readable prompt tuning method (FluentPrompt) based on Langevin dynamics that incorporates a fluency constraint to find a distribution of effective and fluent prompts. Our analysis reveals that effective prompts are topically related to the task domain and calibrate the prior probability of output labels. Based on these findings, we also propose a method for generating prompts using only unlabeled data, outperforming strong baselines by an average of 7.0% accuracy across three tasks.</abstract>
       <url hash="44aaa698">2023.findings-emnlp.733</url>
@@ -24264,7 +24264,7 @@
       <author><first>Ricardo</first><last>Rei</last></author>
       <author><first>José</first><last>de Souza</last></author>
       <author><first>Pierre</first><last>Colombo</last></author>
-      <author><first>Andre</first><last>Martins</last></author>
+      <author id="andre-f-t-martins"><first>Andre</first><last>Martins</last></author>
       <pages>11127-11148</pages>
       <abstract>Large language models (LLMs) are a promising avenue for machine translation (MT). However, current LLM-based MT systems are brittle: their effectiveness highly depends on the choice of few-shot examples and they often require extra post-processing due to overgeneration. Alternatives such as finetuning on translation instructions are computationally expensive and may weaken in-context learning capabilities, due to overspecialization. In this paper, we provide a closer look at this problem. We start by showing that adapter-based finetuning with LoRA matches the performance of traditional finetuning while reducing the number of training parameters by a factor of 50. This method also outperforms few-shot prompting and eliminates the need for post-processing or in-context examples. However, we show that finetuning generally degrades few-shot performance, hindering adaptation capabilities. Finally, to obtain the best of both worlds, we propose a simple approach that incorporates few-shot examples during finetuning. Experiments on 10 language pairs show that our proposed approach recovers the original few-shot capabilities while keeping the added benefits of finetuning.</abstract>
       <url hash="206d3e33">2023.findings-emnlp.744</url>
@@ -24308,7 +24308,7 @@
     <paper id="748">
       <title>Long-Horizon Dialogue Understanding for Role Identification in the Game of Avalon with Large Language Models</title>
       <author><first>Simon</first><last>Stepputtis</last></author>
-      <author><first>Joseph</first><last>Campbell</last></author>
+      <author id="joseph-p-campbell"><first>Joseph</first><last>Campbell</last></author>
       <author><first>Yaqi</first><last>Xie</last></author>
       <author><first>Zhengyang</first><last>Qi</last></author>
       <author><first>Wenxin</first><last>Zhang</last></author>
@@ -24414,7 +24414,7 @@
       <author><first>Gengyu</first><last>Wang</last></author>
       <author><first>Hammad</first><last>Ayyubi</last></author>
       <author><first>Kai-Wei</first><last>Chang</last></author>
-      <author><first>Shih-Fu</first><last>Chang</last></author>
+      <author id="shih-fu-chang"><first>Shih-Fu</first><last>Chang</last></author>
       <pages>11289-11303</pages>
       <abstract>The field of vision-and-language (VL) understanding has made unprecedented progress with end-to-end large pre-trained VL models (VLMs). However, they still fall short in zero-shot reasoning tasks that require multi-step inferencing. To achieve this goal, previous works resort to a divide-and-conquer pipeline. In this paper, we argue that previous efforts have several inherent shortcomings: 1) They rely on domain-specific sub-question decomposing models. 2) They force models to predict the final answer even if the sub-questions or sub-answers provide insufficient information. We address these limitations via IdealGPT, a framework that iteratively decomposes VL reasoning using large language models (LLMs). Specifically, IdealGPT utilizes an LLM to generate sub-questions, a VLM to provide corresponding sub-answers, and another LLM to reason to achieve the final answer. These three modules perform the divide-and-conquer procedure iteratively until the model is confident about the final answer to the main question. We evaluate IdealGPT on multiple challenging VL reasoning tasks under a zero-shot setting. In particular, our IdealGPT outperforms the best existing GPT-4-like models by an absolute 10% on VCR and 15% on SNLI-VE. Code is available at https://github.com/Hxyou/IdealGPT.</abstract>
       <url hash="ad943552">2023.findings-emnlp.755</url>
@@ -24511,7 +24511,7 @@
       <author><first>Jia</first><last>Liu</last></author>
       <author><first>Tao</first><last>Gui</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>11383-11406</pages>
       <abstract>To enhance the multi-step reasoning capabilities of large language models, researchers have extensively explored prompting methods, notably the Chain-of-Thought (CoT) method which explicitly elicits human-like rationales. However, they have inadvertently overlooked the potential of enhancing model reasoning performance by formulating higher-quality problems. In this work, we start from the problem side and propose Self-Polish (SP), a novel method that facilitates the model’s reasoning by guiding it to progressively refine the given problems to be more comprehensible and solvable. We also explore several automatic prompting varients and propose the Self-Polish prompt bank for the community. SP is orthogonal to all other prompting methods of answer/reasoning side like CoT, allowing for seamless integration with state-of-the-art techniques for further improvement. Thorough experiments show that the proposed method attains notable and consistent effectiveness on five reasoning benchmarks across different models. Furthermore, our method also showcases impressive performance on robustness evaluation. Codes and prompts are available at https://github.com/WooooDyy/Self-Polish.</abstract>
       <url hash="c73f6ae8">2023.findings-emnlp.762</url>
@@ -24536,7 +24536,7 @@
       <author><first>Ashita</first><last>Saxena</last></author>
       <author><first>Raja</first><last>Kumar</last></author>
       <author><first>Abhijit</first><last>Mishra</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>11424-11438</pages>
       <abstract>Detecting hallucinations in natural language processing (NLP) is a critical undertaking that demands a deep understanding of both the semantic and pragmatic aspects of languages. Cognitive approaches that leverage users’ behavioural signals, such as gaze, have demonstrated effectiveness in addressing NLP tasks with similar linguistic complexities. However, their potential in the context of hallucination detection remains largely unexplored. In this paper, we propose a novel cognitive approach for hallucination detection that leverages gaze signals from humans. We first collect and introduce an eye tracking corpus (IITB-HGC: IITB-Hallucination Gaze corpus) consisting of 500 instances, annotated by five annotators for hallucination detection. Our analysis reveals that humans selectively attend to relevant parts of the text based on distributional similarity, similar to the attention bias phenomenon in psychology. We identify two attention strategies employed by humans: global attention, which focuses on the most informative sentence, and local attention, which focuses on important words within a sentence. Leveraging these insights, we propose a novel cognitive framework for hallucination detection that incorporates these attention biases. Experimental evaluations on the FactCC dataset demonstrate the efficacy of our approach, obtaining a balanced accuracy of 87.1%. Our study highlights the potential of gaze-based approaches in addressing the task of hallucination detection and sheds light on the cognitive processes employed by humans in identifying inconsistencies.</abstract>
       <url hash="79c899d5">2023.findings-emnlp.764</url>
@@ -24561,7 +24561,7 @@
     <paper id="766">
       <title>Enhancing Accessible Communication: from <fixed-case>E</fixed-case>uropean <fixed-case>P</fixed-case>ortuguese to <fixed-case>P</fixed-case>ortuguese <fixed-case>S</fixed-case>ign <fixed-case>L</fixed-case>anguage</title>
       <author><first>Catarina</first><last>Sousa</last></author>
-      <author><first>Luisa</first><last>Coheur</last></author>
+      <author id="luisa-coheur"><first>Luisa</first><last>Coheur</last></author>
       <author><first>Mara</first><last>Moita</last></author>
       <pages>11452-11460</pages>
       <abstract>Portuguese Sign Language (LGP) is the official language in deaf education in Portugal. Current approaches in developing a translation system between European Portuguese and LGP rely on hand-crafted rules. In this paper, we present a fully automatic corpora-driven rule-based machine translation system between European Portuguese and LGP glosses, and also two neural machine translation models. We also contribute with the LGP-5-Domain corpus, composed of five different text domains, built with the help of our rule-based system, and used to train the neural models. In addition, we provide a gold collection, annotated by LGP experts, that can be used for future evaluations. Compared with the only similar available translation system, PE2LGP, results are always improved with the new rule-based model, which competes for the highest scores with one of the neural models.</abstract>
@@ -24594,7 +24594,7 @@
       <title>Domain Adaptation for Sentiment Analysis Using Robust Internal Representations</title>
       <author><first>Mohammad</first><last>Rostami</last></author>
       <author><first>Digbalay</first><last>Bose</last></author>
-      <author><first>Shrikanth</first><last>Narayanan</last></author>
+      <author id="shrikanth-narayanan"><first>Shrikanth</first><last>Narayanan</last></author>
       <author><first>Aram</first><last>Galstyan</last></author>
       <pages>11484-11498</pages>
       <abstract>Sentiment analysis is a costly yet necessary task for enterprises to study the opinions of their customers to improve their products and to determine optimal marketing strategies. Due to the existence of a wide range of domains across different products and services, cross-domain sentiment analysis methods have received significant attention. These methods mitigate the domain gap between different applications by training cross-domain generalizable classifiers which relax the need for data annotation for each domain. We develop a domain adaptation method which induces large margins between data representations that belong to different classes in an embedding space. This embedding space is trained to be domain-agnostic by matching the data distributions across the domains. Large interclass margins in the source domain help to reduce the effect of “domain shift” in the target domain. Theoretical and empirical analysis are provided to demonstrate that the proposed method is effective.</abstract>
@@ -24701,8 +24701,8 @@
       <author><first>Ashok</first><last>Urlana</last></author>
       <author><first>Pinzhen</first><last>Chen</last></author>
       <author><first>Zheng</first><last>Zhao</last></author>
-      <author><first>Shay</first><last>Cohen</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="shay-b-cohen"><first>Shay</first><last>Cohen</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <author><first>Barry</first><last>Haddow</last></author>
       <pages>11606-11628</pages>
       <abstract>This paper introduces PMIndiaSum, a multilingual and massively parallel summarization corpus focused on languages in India. Our corpus provides a training and testing ground for four language families, 14 languages, and the largest to date with 196 language pairs. We detail our construction workflow including data acquisition, processing, and quality assurance. Furthermore, we publish benchmarks for monolingual, cross-lingual, and multilingual summarization by fine-tuning, prompting, as well as translate-and-summarize. Experimental results confirm the crucial role of our data in aiding summarization between Indian languages. Our dataset is publicly available and can be freely modified and re-distributed.</abstract>
@@ -24722,7 +24722,7 @@
       <author><first>Yuxuan</first><last>Lu</last></author>
       <author><first>Shashank</first><last>Srivastava</last></author>
       <author><first>Yunyao</first><last>Li</last></author>
-      <author><first>James</first><last>Hendler</last></author>
+      <author id="james-hendler"><first>James</first><last>Hendler</last></author>
       <author><first>Dakuo</first><last>Wang</last></author>
       <pages>11629-11643</pages>
       <abstract>Real-world domain experts (e.g., doctors) rarely annotate only a decision label in their day-to-day workflow without providing explanations. Yet, existing low-resource learning techniques, such as Active Learning (AL), that aim to support human annotators mostly focus on the label while neglecting the natural language explanation of a data point. This work proposes a novel AL architecture to support experts’ real-world need for label and explanation annotations in low-resource scenarios. Our AL architecture leverages an explanation-generation model to produce explanations guided by human explanations, a prediction model that utilizes generated explanations toward prediction faithfully, and a novel data diversity-based AL sampling strategy that benefits from the explanation annotations. Automated and human evaluations demonstrate the effectiveness of incorporating explanations into AL sampling and the improved human annotation efficiency and trustworthiness with our AL architecture. Additional ablation studies illustrate the potential of our AL architecture for transfer learning, generalizability, and integration with large language models (LLMs). While LLMs exhibit exceptional explanation-generation capabilities for relatively simple tasks, their effectiveness in complex real-world tasks warrants further in-depth study.</abstract>
@@ -24756,7 +24756,7 @@
     <paper id="781">
       <title><fixed-case>SYMPTOMIFY</fixed-case>: Transforming Symptom Annotations with Language Model Knowledge Harvesting</title>
       <author><first>Bosung</first><last>Kim</last></author>
-      <author><first>Ndapa</first><last>Nakashole</last></author>
+      <author id="ndapandula-nakashole"><first>Ndapa</first><last>Nakashole</last></author>
       <pages>11667-11681</pages>
       <abstract>Given the high-stakes nature of healthcare decision-making, we aim to improve the efficiency of human annotators rather than replacing them with fully automated solutions. We introduce a new comprehensive resource, SYMPTOMIFY, a dataset of annotated vaccine adverse reaction reports detailing individual vaccine reactions. The dataset, consisting of over 800k reports, surpasses previous datasets in size. Notably, it features reasoning-based explanations alongside background knowledge obtained via language model knowledge harvesting. We evaluate performance across various methods and learning paradigms, paving the way for future comparisons and benchmarking.</abstract>
       <url hash="2cd013cd">2023.findings-emnlp.781</url>
@@ -24831,7 +24831,7 @@
       <author><first>Alexander</first><last>Shan</last></author>
       <author><first>John</first><last>Bauer</last></author>
       <author><first>Riley</first><last>Carlson</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <pages>11778-11791</pages>
       <abstract>The vast majority of the popular English named entity recognition (NER) datasets contain American or British English data, despite the existence of many global varieties of English. As such, it is unclear whether they generalize for analyzing use of English globally. To test this, we build a newswire dataset, the Worldwide English NER Dataset, to analyze NER model performance on low-resource English variants from around the world. We test widely used NER toolkits and transformer models, including models using the pre-trained contextual models RoBERTa and ELECTRA, on three datasets: a commonly used British English newswire dataset, CoNLL 2003, a more American focused dataset OntoNotes, and our global dataset. All models trained on the CoNLL or OntoNotes datasets experienced significant performance drops—over 10 F1 in some cases—when tested on the Worldwide English dataset. Upon examination of region-specific errors, we observe the greatest performance drops for Oceania and Africa, while Asia and the Middle East had comparatively strong performance. Lastly, we find that a combined model trained on the Worldwide dataset and either CoNLL or OntoNotes lost only 1-2 F1 on both test sets.</abstract>
       <url hash="7f4928d2">2023.findings-emnlp.788</url>
@@ -24883,7 +24883,7 @@
       <title>Identifying <fixed-case>Early Maladaptive Schemas</fixed-case> from Mental Health Question Texts</title>
       <author><first>Sujatha</first><last>Gollapalli</last></author>
       <author><first>Beng</first><last>Ang</last></author>
-      <author><first>See-Kiong</first><last>Ng</last></author>
+      <author id="see-kiong-ng"><first>See-Kiong</first><last>Ng</last></author>
       <pages>11832-11843</pages>
       <abstract>In Psychotherapy, maladaptive schemas– negative perceptions that an individual has of the self, others, or the world that endure despite objective reality–often lead to resistance to treatments and relapse of mental health issues such as depression, anxiety, panic attacks etc. Identification of early maladaptive schemas (EMS) is thus a crucial step during Schema Therapy-based counseling sessions, where patients go through a detailed and lengthy EMS questionnaire. However, such an approach is not practical in ‘offline’ counseling scenarios, such as community QA forums which are gaining popularity for people seeking mental health support. In this paper, we investigate both LLM (Large Language Models) and non-LLM approaches for identifying EMS labels using resources from Schema Therapy. Our evaluation indicates that recent LLMs can be effective for identifying EMS but their predictions lack explainability and are too sensitive to precise ‘prompts’. Both LLM and non-LLM methods are unable to reliably address the null cases, i.e. cases with no EMS labels. However, we posit that the two approaches show complementary properties and together, they can be used to further devise techniques for EMS identification.</abstract>
       <url hash="e247edb0">2023.findings-emnlp.792</url>
@@ -24901,13 +24901,13 @@
       <author><first>Linxi</first><last>Fan</last></author>
       <author><first>Zhiding</first><last>Yu</last></author>
       <author><first>Shiyi</first><last>Lan</last></author>
-      <author id="bo-li"><first>Bo</first><last>Li</last></author>
+      <author><first>Bo</first><last>Li</last></author>
       <author><first>Mohammad</first><last>Shoeybi</last></author>
       <author><first>Ming-Yu</first><last>Liu</last></author>
       <author><first>Yuke</first><last>Zhu</last></author>
       <author><first>Bryan</first><last>Catanzaro</last></author>
       <author><first>Chaowei</first><last>Xiao</last></author>
-      <author><first>Anima</first><last>Anandkumar</last></author>
+      <author id="animashree-anandkumar"><first>Anima</first><last>Anandkumar</last></author>
       <pages>11844-11857</pages>
       <abstract>Augmenting pretrained language models (LMs) with a vision encoder (e.g., Flamingo) has obtained state-of-the-art results in image-to-text generation. However, these models store all the knowledge within their parameters, thus often requiring enormous model parameters to model the abundant visual concepts and very rich text descriptions. Additionally, they are inefficient in incorporating new data, requiring a computational-expensive fine-tuning process. In this work, we introduce a Retrieval-augmented Visual Language Model, Re-ViLM, built upon the Flamingo, that supports retrieving the relevant knowledge from the external database for zero and in-context few-shot image-to-text generations. By storing certain knowledge explicitly in the external database, our approach reduces the number of model parameters and can easily accommodate new data during evaluation by simply updating the database. We also construct an interleaved image and text data that facilitates in-context few-shot learning capabilities.We demonstrate that Re-ViLM significantly boosts performance for image-to-text generation tasks, especially for zero-shot and few-shot generation in out-of-domain settings with 4x less parameters compared with baseline methods.</abstract>
       <url hash="7104755c">2023.findings-emnlp.793</url>
@@ -24946,8 +24946,8 @@
       <author><first>Prakhar</first><last>Gupta</last></author>
       <author><first>Di</first><last>Jin</last></author>
       <author><first>Siva</first><last>Reddy</last></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></author>
       <pages>11882-11910</pages>
       <abstract>While large neural-based conversational models have become increasingly proficient dialogue agents, recent work has highlighted safety issues with these systems. For example, these systems can be goaded into generating toxic content, often perpetuating social biases or stereotypes. We investigate a retrieval-based approach for reducing bias and toxicity in responses from chatbots. It uses in-context learning to steer a model towards safer generations. Concretely, to generate a response to an unsafe dialogue context, we retrieve demonstrations of safe responses to similar dialogue contexts. We find our method performs competitively with existing approaches to dialogue safety without requiring training. We also show, using automatic and human evaluation, that reductions in toxicity obtained using our approach are not at the cost engagingness or coherency. Finally, we note our method can be used in compliment to existing dialogue safety approaches, such as RLHF.</abstract>
       <url hash="331bf588">2023.findings-emnlp.796</url>
@@ -24978,7 +24978,7 @@
       <author><first>Zhengyang</first><last>Wang</last></author>
       <author><first>Bing</first><last>Yin</last></author>
       <author><first>Heng</first><last>Ji</last></author>
-      <author><first>Chao</first><last>Zhang</last></author>
+      <author id="chao-zhang-tu"><first>Chao</first><last>Zhang</last></author>
       <pages>11925-11931</pages>
       <abstract>Current abstractive summarization models often generate inconsistent content, i.e. texts that are not directly inferable from the source document, are not consistent with respect to world knowledge, or are self-contradictory. These inconsistencies motivate a new consistency taxonomy that we define as faithfulness, factuality, and self-supportiveness. However, most recent work on reducing inconsistency in document summarization only focuses on faithfulness detection and correction while ignoring other inconsistency phenomena, which limits the model’s scalability. To improve the general consistency we introduce EnergySum, where we apply the Residual Energy-based Model by designing energy scorers that reflect each type of consistency. These energy scores are utilized in candidate re-ranking during the sampling process. Experiments on XSUM and CNN/DM datasets show that EnergySum mitigates the trade-off between accuracy and consistency.</abstract>
       <url hash="34caa7f4">2023.findings-emnlp.798</url>
@@ -24995,8 +24995,8 @@
       <author><first>Xingyao</first><last>Wang</last></author>
       <author><first>Yi</first><last>Fung</last></author>
       <author><first>Charles</first><last>Yu</last></author>
-      <author><first>Joel</first><last>Tetreault</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <author><first>Heng</first><last>Ji</last></author>
       <pages>11932-11951</pages>
       <abstract>The recent explosion of performance of large language models (LLMs) has changed the field of Natural Language Processing (NLP) more abruptly and seismically than any other shift in the field’s 80 year history. This has resulted in concerns that the field will become homogenized and resource-intensive. This new status quo has put many academic researchers, especially PhD students, at a disadvantage. This paper aims to define a new NLP playground by proposing 20+ PhD-dissertation-worthy research directions, covering theoretical analysis, new and challenging problems, learning paradigms and interdisciplinary applications.</abstract>
@@ -25045,7 +25045,7 @@
       <title>Noisy Self-Training with Synthetic Queries for Dense Retrieval</title>
       <author><first>Fan</first><last>Jiang</last></author>
       <author><first>Tom</first><last>Drummond</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>11991-12008</pages>
       <abstract>Although existing neural retrieval models reveal promising results when training data is abundant and the performance keeps improving as training data increases, collecting high-quality annotated data is prohibitively costly. To this end, we introduce a novel noisy self-training framework combined with synthetic queries, showing that neural retrievers can be improved in a self-evolution manner with no reliance on any external models. Experimental results show that our method improves consistently over existing methods on both general-domain (e.g., MS-MARCO) and out-of-domain (i.e., BEIR) retrieval benchmarks. Extra analysis on low-resource settings reveals that our method is data efficient and outperforms competitive baselines, with as little as 30% of labelled training data. Further extending the framework for reranker training demonstrates that the proposed method is general and yields additional gains on tasks of diverse domains.</abstract>
       <url hash="9dc5ce06">2023.findings-emnlp.803</url>
@@ -25084,7 +25084,7 @@
       <author><first>Zezhong</first><last>Wang</last></author>
       <author><first>Bin</first><last>Liang</last></author>
       <author><first>Ruifeng</first><last>Xu</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <pages>12047-12064</pages>
       <abstract>Large Language Models (LLMs), such as ChatGPT, greatly empower dialogue systems with strong language understanding and generation capabilities. However, most of the previous works prompt the LLMs to directly generate a response based on the dialogue context, overlooking the underlying linguistic cues about the user status exhibited in the context. Such in-depth dialogue scenarios are challenging for existing LLMs to figure out the user’s hidden needs and respond satisfactorily through a single-step inference. To this end, we propose a novel linguistic cue-based chain-of-thoughts (Cue-CoT), which enhances the LLMs inference with an intermediate reasoning step to find cues exhibited in the dialogue, aiming to provide a more personalized and engaging response. To evaluate the approach, we build a benchmark with in-depth dialogue questions, consisting of 6 datasets in both Chinese and English, targeting 3 major linguistic cues during the conversation: personality, emotion, and psychology. We conducted experiments on the proposed benchmark with 5 LLMs under both zero-shot and one-shot settings. Empirical results demonstrate our proposed Cue-CoT method outperforms standard prompting methods in terms of both helpfulness and acceptability on all datasets.</abstract>
       <url hash="6b608d0f">2023.findings-emnlp.806</url>
@@ -25123,7 +25123,7 @@
       <author><first>Deepanway</first><last>Ghosal</last></author>
       <author><first>Navonil</first><last>Majumder</last></author>
       <author><first>Roy</first><last>Lee</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <author><first>Soujanya</first><last>Poria</last></author>
       <pages>12096-12102</pages>
       <abstract>Visual question answering (VQA) is the task of answering questions about an image. The task assumes an understanding of both the image and the question to provide a natural language answer. VQA has gained popularity in recent years due to its potential applications in a wide range of fields, including robotics, education, and healthcare. In this paper, we focus on knowledge-augmented VQA, where answering the question requires commonsense knowledge, world knowledge, and reasoning about ideas and concepts not present in the image. We propose a multimodal framework that uses language guidance (LG) in the form of rationales, image captions, scene graphs, etc to answer questions more accurately. We benchmark our method on the multi-choice question-answering task of the A-OKVQA, Science-QA, VSR, and IconQA datasets using CLIP and BLIP models. We show that the use of language guidance is a simple but powerful and effective strategy for visual question answering. Our language guidance improves the performance of CLIP by 7.6% and BLIP-2 by 4.8% in the challenging A-OKVQA dataset. We also observe consistent improvement in performance on the Science-QA, VSR, and IconQA datasets when using the proposed language guidances. The implementation of LG-VQA is publicly available at https://github.com/declare-lab/LG-VQA.</abstract>
@@ -25135,7 +25135,7 @@
       <title><fixed-case>XLS</fixed-case>-<fixed-case>R</fixed-case> fine-tuning on noisy word boundaries for unsupervised speech segmentation into words</title>
       <author><first>Robin</first><last>Algayres</last></author>
       <author><first>Pablo</first><last>Diego-Simon</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <author><first>Emmanuel</first><last>Dupoux</last></author>
       <pages>12103-12112</pages>
       <abstract>Due to the absence of explicit word boundaries in the speech stream, the task of segmenting spoken sentences into word units without text supervision is particularly challenging. In this work, we leverage the most recent self-supervised speech models that have proved to quickly adapt to new tasks through fine-tuning, even in low resource conditions. Taking inspiration from semi-supervised learning, we fine-tune an XLS-R model to predict word boundaries themselves produced by top-tier speech segmentation systems: DPDP, VG-HuBERT and DP-Parse. Once XLS-R is fine-tuned, it is used to infer new word boundary labels that are used in turn for another fine-tuning step. Our method consistently improves the performance of each system and set a new state-of-the-art that is, on average 130% higher than the previous one as measured by the F1 score on correctly discovered word tokens on five corpora featuring different languages. Finally, our system can segment speech from languages unseen during fine-tuning in a zero-shot fashion.</abstract>
@@ -25175,7 +25175,7 @@
       <author><first>Geng</first><last>Tu</last></author>
       <author><first>Bin</first><last>Liang</last></author>
       <author><first>Bing</first><last>Qin</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <author><first>Ruifeng</first><last>Xu</last></author>
       <pages>12160-12173</pages>
       <abstract>Multiple knowledge (e.g., co-reference, topics, emotional causes, etc) has been demonstrated effective for emotion detection. However, exploring this knowledge in Emotion Recognition in Conversations (ERC) is currently a blank slate due to the lack of annotated data and the high cost involved in obtaining such knowledge. Fortunately, the emergence of Large Language Models (LLMs) holds promise in filling this void. Therefore, we propose a Multiple Knowledge Fusion Model (MKFM) to effectively integrate such knowledge generated by LLMs for ERC and empirically study its impact on the model. Experimental results on three public datasets have demonstrated the effectiveness of multiple knowledge for ERC. Furthermore, we conduct a detailed analysis of the contribution and complementarity of this knowledge.</abstract>
@@ -25190,7 +25190,7 @@
       <author><first>Kun</first><last>Kuang</last></author>
       <author><first>Yating</first><last>Zhang</last></author>
       <author><first>Lei</first><last>Wang</last></author>
-      <author><first>Anh</first><last>Luu</last></author>
+      <author id="luu-anh-tuan"><first>Anh</first><last>Luu</last></author>
       <author><first>Yi</first><last>Yang</last></author>
       <author><first>Fei</first><last>Wu</last></author>
       <pages>12174-12185</pages>
@@ -25218,7 +25218,7 @@
       <author><first>Alberto</first><last>Lusoli</last></author>
       <author><first>Philippa</first><last>Adams</last></author>
       <author><first>Mugdha</first><last>Pandya</last></author>
-      <author><first>Maite</first><last>Taboada</last></author>
+      <author id="maite-taboada"><first>Maite</first><last>Taboada</last></author>
       <author><first>Diana</first><last>Maynard</last></author>
       <author><first>Wendy Hui Kyong</first><last>Chun</last></author>
       <pages>12194-12209</pages>
@@ -25242,7 +25242,7 @@
       <title><fixed-case>M</fixed-case>eta<fixed-case>R</fixed-case>e<fixed-case>V</fixed-case>ision: Meta-Learning with Retrieval for Visually Grounded Compositional Concept Acquisition</title>
       <author><first>Guangyue</first><last>Xu</last></author>
       <author><first>Parisa</first><last>Kordjamshidi</last></author>
-      <author><first>Joyce</first><last>Chai</last></author>
+      <author id="joyce-chai"><first>Joyce</first><last>Chai</last></author>
       <pages>12224-12236</pages>
       <abstract>Humans have the ability to learn novel compositional concepts by recalling primitive concepts acquired from past experience and generalizing these primitive concepts to novel compositions. Inspired by the above human’s compositional learning procedure, in this paper, we propose MetaReVision, a retrievalenhanced meta-learning model to solve the visually grounded compositional concept learning problem. The proposed MetaReVision consists of a retrieval module and a meta-learning module which are designed to incorporate retrieved primitive concepts as supporting set to meta-train visual-language models for grounded compositional concept recognition. Through meta-learning from episodes constructed by the retriever, MetaReVision learns a generic compositional representation that can be fast updated to recognize novel composi tional concepts. We create CompCOCO and CompFlickr to benchmark the grounded compositional concept learning. Our experimental results show MetaReVision outperforms other competitive baselines and the retrieval module does plays an important role in this compositional learning process.</abstract>
       <url hash="b21a2711">2023.findings-emnlp.818</url>
@@ -25255,7 +25255,7 @@
       <author><first>Yerin</first><last>Hwang</last></author>
       <author><first>Hyeongu</first><last>Yun</last></author>
       <author><first>Seunghyun</first><last>Yoon</last></author>
-      <author><first>Trung</first><last>Bui</last></author>
+      <author id="trung-bui"><first>Trung</first><last>Bui</last></author>
       <author><first>Kyomin</first><last>Jung</last></author>
       <pages>12237-12258</pages>
       <abstract>Vulnerability to lexical perturbation is a critical weakness of automatic evaluation metrics for image captioning. This paper proposes Perturbation Robust Multi-Lingual CLIPScore(PR-MCS), which exhibits robustness to such perturbations, as a novel reference-free image captioning metric applicable to multiple languages. To achieve perturbation robustness, we fine-tune the text encoder of CLIP with our language-agnostic method to distinguish the perturbed text from the original text. To verify the robustness of PR-MCS, we introduce a new fine-grained evaluation dataset consisting of detailed captions, critical objects, and the relationships between the objects for 3,000 images in five languages. In our experiments, PR-MCS significantly outperforms baseline metrics in capturing lexical noise of all various perturbation types in all five languages, while maintaining a strong correlation with human judgments.</abstract>
@@ -25280,8 +25280,8 @@
     <paper id="821">
       <title><fixed-case>BLM</fixed-case>-s/l<fixed-case>E</fixed-case>: A structured dataset of <fixed-case>E</fixed-case>nglish spray-load verb alternations for testing generalization in <fixed-case>LLM</fixed-case>s</title>
       <author><first>Giuseppe</first><last>Samo</last></author>
-      <author><first>Vivi</first><last>Nastase</last></author>
-      <author id="chunyang-jiang"><first>Chunyang</first><last>Jiang</last></author>
+      <author id="vivi-nastase"><first>Vivi</first><last>Nastase</last></author>
+      <author><first>Chunyang</first><last>Jiang</last></author>
       <author><first>Paola</first><last>Merlo</last></author>
       <pages>12276-12287</pages>
       <abstract>Current NLP models appear to be achieving performance comparable to human capabilities on well-established benchmarks. New benchmarks are now necessary to test deeper layers of understanding of natural languages by these models. Blackbird’s Language Matrices are a recently developed framework that draws inspiration from tests of human analytic intelligence. The BLM task has revealed that successful performances in previously studied linguistic problems do not yet stem from a deep understanding of the generative factors that define these problems. In this study, we define a new BLM task for predicate-argument structure, and develop a structured dataset for its investigation, concentrating on the spray-load verb alternations in English, as a case study. The context sentences include one alternant from the spray-load alternation and the target sentence is the other alternant, to be chosen among a minimally contrastive and adversarial set of answers. We describe the generation process of the dataset and the reasoning behind the generating rules. The dataset aims to facilitate investigations into how verb information is encoded in sentence embeddings and how models generalize to the complex properties of argument structures. Benchmarking experiments conducted on the dataset and qualitative error analysis on the answer set reveal the inherent challenges associated with the problem even for current high-performing representations.</abstract>
@@ -25325,7 +25325,7 @@
       <author><first>Alex</first><last>de la Iglesia</last></author>
       <author><first>Megan</first><last>Su</last></author>
       <author><first>Xiao</first><last>Zheng</last></author>
-      <author><first>Joyce</first><last>Chai</last></author>
+      <author id="joyce-chai"><first>Joyce</first><last>Chai</last></author>
       <pages>12325-12341</pages>
       <abstract>Despite tremendous advances in AI, it remains a significant challenge to develop interactive task guidance systems that can offer situated, personalized guidance and assist humans in various tasks. These systems need to have a sophisticated understanding of the user as well as the environment, and make timely accurate decisions on when and what to say. To address this issue, we created a new multimodal benchmark dataset, Watch, Talk and Guide (WTaG) based on natural interaction between a human user and a human instructor. We further proposed two tasks: User and Environment Understanding, and Instructor Decision Making. We leveraged several foundation models to study to what extent these models can be quickly adapted to perceptually enabled task guidance. Our quantitative, qualitative, and human evaluation results show that these models can demonstrate fair performances in some cases with no task-specific training, but a fast and reliable adaptation remains a significant challenge. Our benchmark and baselines will provide a stepping stone for future work on situated task guidance.</abstract>
       <url hash="e37184cf">2023.findings-emnlp.824</url>
@@ -25355,7 +25355,7 @@
       <author><first>Haoyang</first><last>Huang</last></author>
       <author><first>Tianyi</first><last>Tang</last></author>
       <author><first>Dongdong</first><last>Zhang</last></author>
-      <author><first>Xin</first><last>Zhao</last></author>
+      <author id="wayne-xin-zhao"><first>Xin</first><last>Zhao</last></author>
       <author><first>Ting</first><last>Song</last></author>
       <author><first>Yan</first><last>Xia</last></author>
       <author><first>Furu</first><last>Wei</last></author>
@@ -25370,7 +25370,7 @@
       <author><first>Jinyan</first><last>Su</last></author>
       <author><first>Terry</first><last>Zhuo</last></author>
       <author><first>Di</first><last>Wang</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>12395-12412</pages>
       <abstract>With the rapid progress of Large language models (LLMs) and the huge amount of text they generate, it becomes impractical to manually distinguish whether a text is machine-generated. The growing use of LLMs in social media and education, prompts us to develop methods to detect machine-generated text, preventing malicious use such as plagiarism, misinformation, and propaganda. In this paper, we introduce two novel zero-shot methods for detecting machine-generated text by leveraging the Log-Rank information. One is called DetectLLM-LRR, which is fast and efficient, and the other is called DetectLLM-NPR, which is more accurate, but slower due to the need for perturbations. Our experiments on three datasets and seven language models show that our proposed methods improve over the state of the art by 3.9 and 1.75 AUROC points absolute. Moreover, DetectLLM-NPR needs fewer perturbations than previous work to achieve the same level of performance, which makes it more practical for real-world use. We also investigate the efficiency-performance trade-off based on users’ preference for these two measures and provide intuition for using them in practice effectively. We release the data and the code of both methods in https://github.com/mbzuai-nlp/DetectLLM.</abstract>
       <url hash="96cf3bf8">2023.findings-emnlp.827</url>
@@ -25440,7 +25440,7 @@
       <author><first>Siyang</first><last>Liu</last></author>
       <author><first>Winston</first><last>Wu</last></author>
       <author><first>Lu</first><last>Wang</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>12475-12498</pages>
       <abstract>Annotator disagreement is ubiquitous in natural language processing (NLP) tasks. There are multiple reasons for such disagreements, including the subjectivity of the task, difficult cases, unclear guidelines, and so on. Rather than simply aggregating labels to obtain data annotations, we instead try to directly model the diverse perspectives of the annotators, and explicitly account for annotators’ idiosyncrasies in the modeling process by creating representations for each annotator (*annotator embeddings*) and also their annotations (*annotation embeddings*). In addition, we propose **TID-8**, **T**he **I**nherent **D**isagreement - **8** dataset, a benchmark that consists of eight existing language understanding datasets that have inherent annotator disagreement. We test our approach on TID-8 and show that our approach helps models learn significantly better from disagreements on six different datasets in TID-8 while increasing model size by fewer than 1% parameters. By capturing the unique tendencies and subjectivity of individual annotators through embeddings, our representations prime AI models to be inclusive of diverse viewpoints.</abstract>
       <url hash="6c3c290b">2023.findings-emnlp.832</url>
@@ -25493,9 +25493,9 @@
       <author><first>Yousef</first><last>El-Kurdi</last></author>
       <author><first>Tahira</first><last>Naseem</last></author>
       <author><first>Asim</first><last>Munawar</last></author>
-      <author><first>Radu</first><last>Florian</last></author>
-      <author><first>Salim</first><last>Roukos</last></author>
-      <author><first>Ramón</first><last>Astudillo</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
+      <author id="ramon-fernandez-astudillo"><first>Ramón</first><last>Astudillo</last></author>
       <pages>12561-12571</pages>
       <abstract>Using in-context learning (ICL) for data generation, techniques such as Self-Instruct (Wang et al., 2023) or the follow-up Alpaca (Taori et al., 2023) can train strong conversational agents with only a small amount of human supervision. One limitation of these approaches is that they resort to very large language models (around 175B parameters) that are also proprietary and non-public. Here we explore the application of such techniques to language models that are much smaller (around 10B–40B parameters) and have permissive licenses. We find the Self-Instruct approach to be less effective at these sizes and propose new ICL methods that draw on two main ideas: (a) categorization and simplification of the ICL templates to make prompt learning easier for the LM, and (b) ensembling over multiple LM outputs to help select high-quality synthetic examples. Our algorithm leverages the 175 Self-Instruct seed tasks and employs separate pipelines for instructions that require an input and instructions that do not. Empirical investigations with different LMs show that: (1) Our proposed method yields higher-quality instruction tuning data than Self-Instruct, (2) It improves performances of both vanilla and instruction-tuned LMs by significant margins, and (3) Smaller instruction-tuned LMs generate more useful examples than their larger un-tuned counterparts.</abstract>
       <url hash="2c7e28c2">2023.findings-emnlp.836</url>
@@ -25506,7 +25506,7 @@
       <title>The Less the Merrier? Investigating Language Representation in Multilingual Models</title>
       <author><first>Hellina</first><last>Nigatu</last></author>
       <author><first>Atnafu</first><last>Tonja</last></author>
-      <author><first>Jugal</first><last>Kalita</last></author>
+      <author id="jugal-kalita"><first>Jugal</first><last>Kalita</last></author>
       <pages>12572-12589</pages>
       <abstract>Multilingual Language Models offer a way to incorporate multiple languages in one model and utilize cross-language transfer learning to improve performance for different Natural Language Processing (NLP) tasks. Despite progress in multilingual models, not all languages are supported as well, particularly in low-resource settings. In this work, we investigate the linguistic representation of different languages in multilingual models. We start by asking the question which languages are supported in popular multilingual models and which languages are left behind. Then, for included languages, we look at models’ learned representations based on language family and dialect and try to understand how models’ learned representations for (1) seen and (2) unseen languages vary across different language groups. In addition, we test and analyze performance on downstream tasks such as text generation and Named Entity Recognition. We observe from our experiments that community-centered models—models that focus on languages of a given family or geographical location and are built by communities who speak them—perform better at distinguishing between languages in the same family for low-resource languages. Our paper contributes to the literature in understanding multilingual models and their shortcomings and offers insights on potential ways to improve them.</abstract>
       <url hash="26b9d6eb">2023.findings-emnlp.837</url>
@@ -25520,9 +25520,9 @@
       <author><first>Francesco</first><last>Barbieri</last></author>
       <author><first>Leonardo</first><last>Neves</last></author>
       <author><first>Kiamehr</first><last>Rezaee</last></author>
-      <author><first>Luis</first><last>Espinosa-Anke</last></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa-Anke</last></author>
       <author><first>Jiaxin</first><last>Pei</last></author>
-      <author><first>Jose</first><last>Camacho-Collados</last></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></author>
       <pages>12590-12607</pages>
       <abstract>Despite its relevance, the maturity of NLP for social media pales in comparison with general-purpose models, metrics and benchmarks. This fragmented landscape makes it hard for the community to know, for instance, given a task, which is the best performing model and how it compares with others. To alleviate this issue, we introduce a unified benchmark for NLP evaluation in social media, SuperTweetEval, which includes a heterogeneous set of tasks and datasets combined, adapted and constructed from scratch. We benchmarked the performance of a wide range of models on SuperTweetEval and our results suggest that, despite the recent advances in language modelling, social media remains challenging.</abstract>
       <url hash="7ac25c93">2023.findings-emnlp.838</url>
@@ -25595,7 +25595,7 @@
       <title>Probing Representations for Document-level Event Extraction</title>
       <author><first>Barry</first><last>Wang</last></author>
       <author><first>Xinya</first><last>Du</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>12675-12683</pages>
       <abstract>The probing classifiers framework has been employed for interpreting deep neural network models for a variety of natural language processing (NLP) applications. Studies, however, have largely focused on sentencelevel NLP tasks. This work is the first to apply the probing paradigm to representations learned for document-level information extraction (IE). We designed eight embedding probes to analyze surface, semantic, and event-understanding capabilities relevant to document-level event extraction. We apply them to the representations acquired by learning models from three different LLM-based document-level IE approaches on a standard dataset. We found that trained encoders from these models yield embeddings that can modestly improve argument detections and labeling but only slightly enhance event-level tasks, albeit trade-offs in information helpful for coherence and event-type prediction. We further found that encoder models struggle with document length and cross-sentence discourse.</abstract>
       <url hash="5bfbf3a1">2023.findings-emnlp.844</url>
@@ -25646,7 +25646,7 @@
       <author><first>Ahmet</first><last>Akbiyik</last></author>
       <author><first>M.</first><last>Kilavuz</last></author>
       <author><first>Anna</first><last>Korhonen</last></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <pages>12735-12747</pages>
       <abstract>Pretrained language models (PLMs) are key components in NLP, but they contain strong social biases. Quantifying these biases is challenging because current methods focusing on fill-the-mask objectives are sensitive to slight changes in input. To address this, we propose a bias probing technique called LABDet, for evaluating social bias in PLMs with a robust and language-agnostic method. For nationality as a case study, we show that LABDet “surfaces” nationality bias by training a classifier on top of a frozen PLM on non-nationality sentiment detection. We find consistent patterns of nationality bias across monolingual PLMs in six languages that align with historical and political context. We also show for English BERT that bias surfaced by LABDet correlates well with bias in the pretraining data; thus, our work is one of the few studies that directly links pretraining data to PLM behavior. Finally, we verify LABDet’s reliability and applicability to different templates and languages through an extensive set of robustness checks. We publicly share our code and dataset in https://github.com/akoksal/LABDet.</abstract>
       <url hash="8ff8051d">2023.findings-emnlp.848</url>
@@ -25683,8 +25683,8 @@
       <title>A Joint Matrix Factorization Analysis of Multilingual Representations</title>
       <author><first>Zheng</first><last>Zhao</last></author>
       <author><first>Yftah</first><last>Ziser</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
-      <author><first>Shay</first><last>Cohen</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
+      <author id="shay-b-cohen"><first>Shay</first><last>Cohen</last></author>
       <pages>12764-12783</pages>
       <abstract>We present an analysis tool based on joint matrix factorization for comparing latent representations of multilingual and monolingual models. An alternative to probing, this tool allows us to analyze multiple sets of representations in a joint manner. Using this tool, we study to what extent and how morphosyntactic features are reflected in the representations learned by multilingual pre-trained models. We conduct a large-scale empirical study of over 33 languages and 17 morphosyntactic categories. Our findings demonstrate variations in the encoding of morphosyntactic information across upper and lower layers, with category-specific differences influenced by language properties. Hierarchical clustering of the factorization outputs yields a tree structure that is related to phylogenetic trees manually crafted by linguists. Moreover, we find the factorization outputs exhibit strong associations with performance observed across different cross-lingual tasks. We release our code to facilitate future research.</abstract>
       <url hash="dff83051">2023.findings-emnlp.851</url>
@@ -25732,7 +25732,7 @@
       <title><fixed-case>DISCO</fixed-case>: A Large Scale Human Annotated Corpus for Disfluency Correction in <fixed-case>I</fixed-case>ndo-<fixed-case>E</fixed-case>uropean Languages</title>
       <author><first>Vineet</first><last>Bhat</last></author>
       <author><first>Preethi</first><last>Jyothi</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>12833-12857</pages>
       <abstract>Disfluency correction (DC) is the process of removing disfluent elements like fillers, repetitions and corrections from spoken utterances to create readable and interpretable text. DC is a vital post-processing step applied to Automatic Speech Recognition (ASR) outputs, before subsequent processing by downstream language understanding tasks. Existing DC research has primarily focused on English due to the unavailability of large-scale open-source datasets. Towards the goal of multilingual disfluency correction, we present a high-quality human-annotated DC corpus covering four important Indo-European languages: English, Hindi, German and French. We provide extensive analysis of results of state-of-the-art DC models across all four languages obtaining F1 scores of 97.55 (English), 94.29 (Hindi), 95.89 (German) and 92.97 (French). To demonstrate the benefits of DC on downstream tasks, we show that DC leads to 5.65 points increase in BLEU scores on average when used in conjunction with a state-of-the-art Machine Translation (MT) system. We release code to run our experiments along with our annotated dataset here.</abstract>
       <url hash="edbf56ef">2023.findings-emnlp.855</url>
@@ -25756,7 +25756,7 @@
       <title>Misery Loves Complexity: Exploring Linguistic Complexity in the Context of Emotion Detection</title>
       <author><first>Pranaydeep</first><last>Singh</last></author>
       <author><first>Luna</first><last>De Bruyne</last></author>
-      <author><first>Orphée</first><last>De Clercq</last></author>
+      <author id="orphee-de-clercq"><first>Orphée</first><last>De Clercq</last></author>
       <author><first>Els</first><last>Lefever</last></author>
       <pages>12871-12880</pages>
       <abstract>Given the omnipresence of social media in our society, thoughts and opinions are being shared online in an unprecedented manner. This means that both positive and negative emotions can be equally and freely expressed. However, the negativity bias posits that human beings are inherently drawn to and more moved by negativity and, as a consequence, negative emotions get more traffic. Correspondingly, when writing about emotions this negativity bias could lead to expressions of negative emotions that are linguistically more complex. In this paper, we attempt to use readability and linguistic complexity metrics to better understand the manifestation of emotions on social media platforms like Reddit based on the widely-used GoEmotions dataset. We demonstrate that according to most metrics, negative emotions indeed tend to generate more complex text than positive emotions. In addition, we examine whether a higher complexity hampers the automatic identification of emotions. To answer this question, we fine-tuned three state-of-the-art transformers (BERT, RoBERTa, and SpanBERT) on the same emotion detection dataset. We demonstrate that these models often fail to predict emotions for the more complex texts. More advanced LLMs like RoBERTa and SpanBERT also fail to improve by significant margins on complex samples. This calls for a more nuanced interpretation of the emotion detection performance of transformer models. We make the automatically annotated data available for further research at: https://huggingface.co/datasets/pranaydeeps/CAMEO</abstract>
@@ -25802,7 +25802,7 @@
     <paper id="861">
       <title><fixed-case>COMET</fixed-case>-<fixed-case>M</fixed-case>: Reasoning about Multiple Events in Complex Sentences</title>
       <author><first>Sahithya</first><last>Ravi</last></author>
-      <author><first>Raymond</first><last>Ng</last></author>
+      <author id="raymond-ng"><first>Raymond</first><last>Ng</last></author>
       <author><first>Vered</first><last>Shwartz</last></author>
       <pages>12921-12937</pages>
       <abstract>Understanding the speaker’s intended meaning often involves drawing commonsense inferences to reason about what is not stated explicitly. In multi-event sentences, it requires understanding the relationships between events based on contextual knowledge. We propose COMET-M (Multi-Event), an event-centric commonsense model capable of generating commonsense inferences for a target event within a complex sentence. COMET-M builds upon COMET (Bosselut et al., 2019), which excels at generating event-centric inferences for simple sentences, but struggles with the complexity of multi-event sentences prevalent in natural text. To overcome this limitation, we curate a Multi-Event Inference (MEI) dataset of 35K human-written inferences. We train COMET-M on the human-written inferences and also create baselines using automatically labeled examples. Experimental results demonstrate the significant performance improvement of COMET-M over COMET in generating multi-event inferences. Moreover, COMET-M successfully produces distinct inferences for each target event, taking the complete context into consideration. COMET-M holds promise for downstream tasks involving natural text such as coreference resolution, dialogue, and story understanding.</abstract>
@@ -25838,7 +25838,7 @@
     </paper>
     <paper id="864">
       <title>From Words to Wires: Generating Functioning Electronic Devices from Natural Language Descriptions</title>
-      <author><first>Peter</first><last>Jansen</last></author>
+      <author id="peter-jansen"><first>Peter</first><last>Jansen</last></author>
       <pages>12972-12990</pages>
       <abstract>In this work, we show that contemporary language models have a previously unknown skill – the capacity for electronic circuit design from high-level textual descriptions, akin to code generation. We introduce two benchmarks: PINS100, assessing model knowledge of electrical components, and MICRO25, evaluating a model’s capability to design common microcontroller circuits and code in the Arduino ecosystem that involve input, output, sensors, motors, protocols, and logic – with models such as GPT-4 and Claude-V1 achieving between 60% to 96% Pass@1 on generating full devices. We include six case studies of using language models as a design assistant for moderately complex devices, such as a radiation-powered random number generator, an emoji keyboard, a visible spectrometer, and several assistive devices, while offering a qualitative analysis performance, outlining evaluation challenges, and suggesting areas of development to improve complex circuit design and practical utility. With this work, we aim to spur research at the juncture of natural language processing and electronic design.</abstract>
       <url hash="52954b5f">2023.findings-emnlp.864</url>
@@ -25849,7 +25849,7 @@
       <title>Data-efficient Active Learning for Structured Prediction with Partial Annotation and Self-Training</title>
       <author><first>Zhisong</first><last>Zhang</last></author>
       <author><first>Emma</first><last>Strubell</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>12991-13008</pages>
       <abstract>In this work we propose a pragmatic method that reduces the annotation cost for structured label spaces using active learning. Our approach leverages partial annotation, which reduces labeling costs for structured outputs by selecting only the most informative sub-structures for annotation. We also utilize self-training to incorporate the current model’s automatic predictions as pseudo-labels for un-annotated sub-structures. A key challenge in effectively combining partial annotation with self-training to reduce annotation cost is determining which sub-structures to select to label. To address this challenge, we adopt an error estimator to adaptively decide the partial selection ratio according to the current model’s capability. In evaluations spanning four structured prediction tasks, we show that our combination of partial annotation and self-training using an adaptive selection ratio reduces annotation cost over strong full annotation baselines under a fair comparison scheme that takes reading time into consideration.</abstract>
       <url hash="3935e965">2023.findings-emnlp.865</url>
@@ -25887,7 +25887,7 @@
       <author><first>Changjiang</first><last>Gao</last></author>
       <author><first>Shujian</first><last>Huang</last></author>
       <author><first>Jixing</first><last>Li</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
       <pages>13042-13055</pages>
       <abstract>Recent large language models (LLMs) have revealed strong abilities to understand natural language. Since most of them share the same basic structure, i.e. the transformer block, possible contributors to their success in the training process are scaling and instruction tuning. However, how these factors affect the models’ language perception is unclear. This work compares the self-attention of several existing LLMs (LLaMA, Alpaca and Vicuna) in different sizes (7B, 13B, 30B, 65B), together with eye saccade, an aspect of human reading attention, to assess the effect of scaling and instruction tuning on language perception. Results show that scaling enhances the human resemblance and improves the effective attention by reducing the trivial pattern reliance, while instruction tuning does not. However, instruction tuning significantly enhances the models’ sensitivity to instructions. We also find that current LLMs are consistently closer to non-native than native speakers in attention, suggesting a sub-optimal language perception of all models. Our code and data used in the analysis is available on GitHub.</abstract>
       <url hash="954e9774">2023.findings-emnlp.868</url>
@@ -26015,7 +26015,7 @@
       <author><first>Amir</first><last>Pouran Ben Veyseh</last></author>
       <author><first>Hieu</first><last>Man</last></author>
       <author><first>Franck</first><last>Dernoncourt</last></author>
-      <author><first>Trung</first><last>Bui</last></author>
+      <author id="trung-bui"><first>Trung</first><last>Bui</last></author>
       <author><first>Thien Huu</first><last>Nguyen</last></author>
       <pages>13171-13189</pages>
       <abstract>Over the last few years, large language models (LLMs) have emerged as the most important breakthroughs in natural language processing (NLP) that fundamentally transform research and developments in the field. ChatGPT represents one of the most exciting LLM systems developed recently to showcase impressive skills for language generation and highly attract public attention. Among various exciting applications discovered for ChatGPT in English, the model can process and generate texts for multiple languages due to its multilingual training data. Given the broad adoption of ChatGPT for English in different problems and areas, a natural question is whether ChatGPT can also be applied effectively for other languages or it is necessary to develop more language-specific technologies. The answer to this question requires a thorough evaluation of ChatGPT over multiple tasks with diverse languages and large datasets (i.e., beyond reported anecdotes), which is still missing or limited in current research. Our work aims to fill this gap for the evaluation of ChatGPT and similar LLMs to provide more comprehensive information for multilingual NLP applications. In particular, we evaluate ChatGPT on 7 different tasks, covering 37 diverse languages with high, medium, low, and extremely low resources. Compared to the performance of previous models, our extensive experiments demonstrate the worse performance of ChatGPT for different NLP tasks and languages, calling for further research to develop better models and understanding for multilingual learning.</abstract>
@@ -26027,7 +26027,7 @@
       <title>Subspace Chronicles: How Linguistic Information Emerges, Shifts and Interacts during Language Model Training</title>
       <author><first>Max</first><last>Müller-Eberstein</last></author>
       <author><first>Rob</first><last>van der Goot</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <author><first>Ivan</first><last>Titov</last></author>
       <pages>13190-13208</pages>
       <abstract>Representational spaces learned via language modeling are fundamental to Natural Language Processing (NLP), however there has been limited understanding regarding how and when during training various types of linguistic information emerge and interact. Leveraging a novel information theoretic probing suite, which enables direct comparisons of not just task performance, but their representational subspaces, we analyze nine tasks covering syntax, semantics and reasoning, across 2M pre-training steps and five seeds. We identify critical learning phases across tasks and time, during which subspaces emerge, share information, and later disentangle to specialize. Across these phases, syntactic knowledge is acquired rapidly after 0.5% of full training. Continued performance improvements primarily stem from the acquisition of open-domain knowledge, while semantics and reasoning tasks benefit from later boosts to long-range contextualization and higher specialization. Measuring cross-task similarity further reveals that linguistically related tasks share information throughout training, and do so more during the critical phase of learning than before or after. Our findings have implications for model interpretability, multi-task learning, and learning from limited data.</abstract>
@@ -26055,7 +26055,7 @@
       <author><first>Paul</first><last>Liang</last></author>
       <author><first>Sheryl</first><last>Mathew</last></author>
       <author><first>Mengrou</first><last>Shou</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <author><first>Louis-Philippe</first><last>Morency</last></author>
       <pages>13222-13234</pages>
       <abstract>The self-supervised objective of masked prediction has led to promising performance gains on a variety of downstream tasks. However, while most approaches randomly mask tokens, there is strong intuition that deciding what to mask can substantially improve learning outcomes. We investigate this in continued pretraining setting in which pretrained models continue to pretrain on domain-specific data before performing some downstream task. We introduce Difference-Masking, a masking strategy that automatically chooses what to mask during continued pretraining by considering what makes a task domain different from the pretraining domain. Empirically, we find that Difference-Masking outperforms baselines on continued pretraining settings across four diverse language-only and multimodal video tasks.</abstract>
@@ -26091,7 +26091,7 @@
     <paper id="884">
       <title>Conditioning on Dialog Acts improves Empathy Style Transfer</title>
       <author><first>Renyi</first><last>Qu</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <author><first>João</first><last>Sedoc</last></author>
       <pages>13254-13271</pages>
       <abstract>We explore the role of dialog acts in style transfer, specifically empathy style transfer – rewriting a sentence to make it more empathetic without changing its meaning. Specifically, we use two novel few-shot prompting strategies: target prompting, which only uses examples of the target style (unlike traditional prompting with source/target pairs), and dialog-act-conditioned prompting, which first estimates the dialog act of the source sentence and then makes it more empathetic using few-shot examples of the same dialog act. Our study yields two key findings: (1) Target prompting typically improves empathy more effectively while maintaining the same level of semantic similarity; (2) Dialog acts matter. Dialog-act-conditioned prompting enhances empathy while preserving both semantics and the dialog-act type. Different dialog acts benefit differently from different prompting methods, highlighting the need for further investigation of the role of dialog acts in style transfer.</abstract>
@@ -26115,7 +26115,7 @@
       <title>From Speculation Detection to Trustworthy Relational Tuples in Information Extraction</title>
       <author><first>Kuicai</first><last>Dong</last></author>
       <author><first>Aixin</first><last>Sun</last></author>
-      <author><first>Jung-jae</first><last>Kim</last></author>
+      <author id="jung-jae-kim"><first>Jung-jae</first><last>Kim</last></author>
       <author><first>Xiaoli</first><last>Li</last></author>
       <pages>13287-13299</pages>
       <abstract>Speculation detection is an important NLP task to identify text factuality. However, the extracted speculative information (e.g., speculative polarity, cue, and scope) lacks structure and poses challenges for direct utilization in downstream tasks. Open Information Extraction (OIE), on the other hand, extracts structured tuples as facts, without examining the certainty of these tuples. Bridging this gap between speculation detection and information extraction becomes imperative to generate structured speculative information and trustworthy relational tuples. Existing studies on speculation detection are defined at sentence level; but even if a sentence is determined to be speculative, not all factual tuples extracted from it are speculative. In this paper, we propose to study speculations in OIE tuples and determine whether a tuple is speculative. We formally define the research problem of tuple-level speculation detection. We then conduct detailed analysis on the LSOIE dataset which provides labels for speculative tuples. Lastly, we propose a baseline model SpecTup for this new research task.</abstract>
@@ -26166,7 +26166,7 @@
     <paper id="890">
       <title>Decomposed Prompt Tuning via Low-Rank Reparameterization</title>
       <author><first>Yao</first><last>Xiao</last></author>
-      <author id="lu-xu"><first>Lu</first><last>Xu</last></author>
+      <author><first>Lu</first><last>Xu</last></author>
       <author><first>Jiaxi</first><last>Li</last></author>
       <author><first>Wei</first><last>Lu</last></author>
       <author><first>Xiaoli</first><last>Li</last></author>
@@ -26182,7 +26182,7 @@
       <author><first>Baolin</first><last>Peng</last></author>
       <author><first>Kun</first><last>Li</last></author>
       <author><first>Jingyan</first><last>Zhou</last></author>
-      <author><first>Helen</first><last>Meng</last></author>
+      <author id="helen-meng"><first>Helen</first><last>Meng</last></author>
       <pages>13348-13369</pages>
       <abstract>Building and maintaining end-to-end task bots using minimal human effort is a long-standing challenge in dialog research. In this work, we introduce SGP-TOD, Schema-Guided Prompting for building Task-Oriented Dialog systems effortlessly based on large language models (LLMs). Utilizing the predefined task schema, i.e., belief instruction and dialog policy, we instruct fixed LLMs to generate appropriate responses on novel tasks, without the need for training data. Specifically, SGP-TOD comprises three components: an LLM for interacting with users, a Dialog State Tracking (DST) Prompter to aid the LLM in tracking dialog states with the given belief instruction, and a Policy Prompter to direct the LLM to generate proper responses adhering to the provided dialog policy. Experimental results on Multiwoz, RADDLE, and STAR datasets show that our training-free strategy, SGP-TOD, yields state-of-the-art (SOTA) zero-shot performance, significantly surpassing the few-shot approaches. In a domain-extension setting, SGP-TOD aptly adapts to new functionalities by merely adding supplementary schema rules. We make our code and data publicly available.</abstract>
       <url hash="24c601cd">2023.findings-emnlp.891</url>
@@ -26260,7 +26260,7 @@
       <author><first>Ce</first><last>Zheng</last></author>
       <author><first>Bofei</first><last>Gao</last></author>
       <author><first>Haozhe</first><last>Zhao</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <pages>13455-13466</pages>
       <abstract>Frame identification aims to find semantic frames associated with target words in a sentence. Recent researches measure the similarity or matching score between targets and candidate frames by modeling frame definitions. However, they either lack sufficient representation learning of the definitions or face challenges in efficiently selecting the most suitable frame from over 1000 candidate frames. Moreover, commonly used lexicon filtering (<tex-math>lf</tex-math>) to obtain candidate frames for the target may ignore out-of-vocabulary targets and cause inadequate frame modeling. In this paper, we propose CoFFTEA, a <tex-math>\underline{Co}</tex-math>arse-to-<tex-math>\underline{F}</tex-math>ine <tex-math>\underline{F}</tex-math>rame and <tex-math>\underline{T}</tex-math>arget <tex-math>\underline{E}</tex-math>ncoders <tex-math>\underline{A}</tex-math>rchitecture. With contrastive learning and dual encoders, CoFFTEA efficiently and effectively models the alignment between frames and targets. By employing a coarse-to-fine curriculum learning procedure, CoFFTEA gradually learns to differentiate frames with varying degrees of similarity. Experimental results demonstrate that CoFFTEA outperforms previous models by 0.93 overall scores and 1.53 R@1 without <tex-math>lf</tex-math>. Further analysis suggests that CoFFTEA can better model the relationships between frame and frame, as well as target and target. The code for our approach is available at https://github.com/pkunlp-icler/COFFTEA.</abstract>
       <url hash="225bd44d">2023.findings-emnlp.897</url>
@@ -26290,7 +26290,7 @@
       <author><first>Sudhanshu</first><last>Singh</last></author>
       <author><first>Muthusamy</first><last>Chelliah</last></author>
       <author><first>Nikesh</first><last>Garera</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>13480-13491</pages>
       <abstract>In e-commerce, opinion summarization is the process of condensing the opinions presented in product reviews. However, the absence of large amounts of supervised datasets presents challenges in generating both aspect-specific and general opinion summaries. Existing approaches have attempted to address these challenges through synthetic dataset creation (SDC). However, general opinion summarization models struggle to generate summaries faithful to the input reviews whereas aspect-specific opinion summarization models are limited due to their reliance on human-specified aspects and seed words. To address this, we propose SDC strategies tailored for general and aspect-specific opinion summarization. We experimented on three e-commerce test sets: Oposum+, Amazon, and Flipkart. For general opinion summarization, pre-trained language model (PLM) fine-tuned on our general synthetic dataset surpass the SOTA on average by 2.3 R1 points. Faithfulness evaluation metrics and human evaluations indicate that our model-generated summaries are more faithful to the input compared to others. For aspect-specific opinion summarization, PLM fine-tuned on our aspect-specific synthetic dataset surpass SOTA by ~ 1 R1 point without the aid of any human-specified aspects or seed words.</abstract>
       <url hash="d2f28c62">2023.findings-emnlp.899</url>
@@ -26303,8 +26303,8 @@
       <author><first>Pinyun</first><last>Fu</last></author>
       <author><first>Qi</first><last>Huang</last></author>
       <author><first>Bowei</first><last>Zou</last></author>
-      <author><first>AiTi</first><last>Aw</last></author>
-      <author><first>Mingwen</first><last>Wang</last></author>
+      <author id="aiti-aw"><first>AiTi</first><last>Aw</last></author>
+      <author id="mingwen-wang"><first>Mingwen</first><last>Wang</last></author>
       <pages>13492-13503</pages>
       <abstract>Rumors spread rapidly through online social microblogs at a relatively low cost, causing substantial economic losses and negative consequences in our daily lives. Existing rumor detection models often neglect the underlying semantic coherence between text and image components in multimodal posts, as well as the challenges posed by incomplete modalities in single modal posts, such as missing text or images. This paper presents CLKD-IMRD, a novel framework for Incomplete Modality Rumor Detection. CLKD-IMRD employs Contrastive Learning and Knowledge Distillation to capture the semantic consistency between text and image pairs, while also enhancing model generalization to incomplete modalities within individual posts. Extensive experimental results demonstrate that our CLKD-IMRD outperforms state-of-the-art methods on two English and two Chinese benchmark datasets for rumor detection in social media.</abstract>
       <url hash="047b1b72">2023.findings-emnlp.900</url>
@@ -26479,7 +26479,7 @@
       <author><first>Yaswanth</first><last>Narsupalli</last></author>
       <author><first>Sreevatsa</first><last>Muppirala</last></author>
       <author><first>Sriram</first><last>Krishnan</last></author>
-      <author><first>Pavankumar</first><last>Satuluri</last></author>
+      <author id="pavankumar-satuluri"><first>Pavankumar</first><last>Satuluri</last></author>
       <author><first>Amba</first><last>Kulkarni</last></author>
       <author><first>Pawan</first><last>Goyal</last></author>
       <pages>13679-13692</pages>
@@ -26519,7 +26519,7 @@
       <author><first>Yiqing</first><last>Xie</last></author>
       <author><first>Atharva</first><last>Naik</last></author>
       <author><first>Daniel</first><last>Fried</last></author>
-      <author><first>Carolyn</first><last>Rose</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rose</last></author>
       <pages>13725-13739</pages>
       <abstract>One major challenge of translating code between programming languages is that parallel training data is often limited. To overcome this challenge, we present two data augmentation techniques, one that builds comparable corpora (i.e., code pairs with similar functionality), and another that augments existing parallel data with multiple reference translations. Specifically, we build and analyze multiple types of comparable corpora, including programs generated from natural language documentation using a code generation model. Furthermore, to reduce overfitting to a single reference translation, we automatically generate additional translation references for available parallel data and filter the translations by unit tests, which increases variation in target translations. Experiments show that our data augmentation techniques significantly improve CodeT5 for translation between Java, Python, and C++ by an average of 7.5% Computational Accuracy (CA@1), which verifies the correctness of translations by execution. The code is available at https://github.com/Veronicium/CMTrans.</abstract>
       <url hash="87c73bab">2023.findings-emnlp.917</url>
@@ -26552,7 +26552,7 @@
       <author><first>Peiyu</first><last>Liu</last></author>
       <author><first>Ze-Feng</first><last>Gao</last></author>
       <author><first>Yushuo</first><last>Chen</last></author>
-      <author><first>Xin</first><last>Zhao</last></author>
+      <author id="wayne-xin-zhao"><first>Xin</first><last>Zhao</last></author>
       <author><first>Ji-Rong</first><last>Wen</last></author>
       <pages>13771-13785</pages>
       <abstract>In this paper, we propose a highly parameter-efficient approach to scaling pre-trained language models (PLMs) to a deeper model depth. Unlike prior work that shares all parameters or uses extra blocks, we design a more capable parameter-sharing architecture based on matrix product operator (MPO), an efficient tensor decomposition method to factorize the parameter matrix into a set of local tensors. Based on such a decomposition, we share the important local tensor across all layers for reducing the model size and meanwhile keep layer-specific tensors (also using Adapters) for enhancing the adaptation flexibility. To improve the model training, we further propose a stable initialization algorithm tailored for the MPO-based architecture. Extensive experiments have demonstrated the effectiveness of our proposed model in enhancing scalability and achieving higher performance (i.e., with fewer parameters than BERT-base, we successfully scale the model depth by a factor of 4x and even achieve 0.1 points higher than BERT-large for GLUE score). The code to reproduce the results of this paper can be found at https://github.com/RUCAIBox/MPOBERT-code.</abstract>
@@ -26620,7 +26620,7 @@
     <paper id="925">
       <title>A Parallel Corpus for <fixed-case>V</fixed-case>ietnamese Central-Northern Dialect Text Transfer</title>
       <author><first>Thang</first><last>Le</last></author>
-      <author><first>Anh</first><last>Luu</last></author>
+      <author id="luu-anh-tuan"><first>Anh</first><last>Luu</last></author>
       <pages>13839-13855</pages>
       <abstract>The Vietnamese language embodies dialectal variants closely attached to the nation’s three macro-regions: the Northern, Central and Southern regions. As the northern dialect forms the basis of the standard language, it’s considered the prestige dialect. While the northern dialect differs from the remaining two in certain aspects, it almost shares an identical lexicon with the southern dialect, making the textual attributes nearly interchangeable. In contrast, the central dialect possesses a number of unique vocabularies and is less mutually intelligible to the standard dialect. Through preliminary experiments, we observe that current NLP models do not possess understandings of the Vietnamese central dialect text, which most likely originates from the lack of resources. To facilitate research on this domain, we introduce a new parallel corpus for Vietnamese central-northern dialect text transfer. Via exhaustive benchmarking, we discover monolingual language models’ superiority over their multilingual counterparts on the dialect transfer task. We further demonstrate that fine-tuned transfer models can seamlessly improve the performance of existing NLP systems on the central dialect domain with dedicated results in translation and text-image retrieval tasks.</abstract>
       <url hash="183115f6">2023.findings-emnlp.925</url>
@@ -26698,7 +26698,7 @@
       <author><first>Qi</first><last>Zhang</last></author>
       <author><first>Jingting</first><last>Ye</last></author>
       <author><first>Menghan</first><last>Zhang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>13951-13976</pages>
       <abstract>Large language models (LLMs) have exhibited considerable cross-lingual generalization abilities, whereby they implicitly transfer knowledge across languages. However, the transfer is not equally successful for all languages, especially for low-resource ones, which poses an ongoing challenge. It is unclear whether we have reached the limits of implicit cross-lingual generalization and if explicit knowledge transfer is viable. In this paper, we investigate the potential for explicitly aligning conceptual correspondence between languages to enhance cross-lingual generalization. Using the syntactic aspect of language as a testbed, our analyses of 43 languages reveal a high degree of alignability among the spaces of structural concepts within each language for both encoder-only and decoder-only LLMs. We then propose a meta-learning-based method to learn to align conceptual spaces of different languages, which facilitates zero-shot and few-shot generalization in concept classification and also offers insights into the cross-lingual in-context learning phenomenon. Experiments on syntactic analysis tasks show that our approach achieves competitive results with state-of-the-art methods and narrows the performance gap between languages, particularly benefiting those with limited resources.</abstract>
       <url hash="1d72f4bb">2023.findings-emnlp.931</url>
@@ -26709,7 +26709,7 @@
       <title>Thorny Roses: Investigating the Dual Use Dilemma in Natural Language Processing</title>
       <author><first>Lucie-Aimée</first><last>Kaffee</last></author>
       <author><first>Arnav</first><last>Arora</last></author>
-      <author><first>Zeerak</first><last>Talat</last></author>
+      <author id="zeerak-talat"><first>Zeerak</first><last>Talat</last></author>
       <author><first>Isabelle</first><last>Augenstein</last></author>
       <pages>13977-13998</pages>
       <abstract>Dual use, the intentional, harmful reuse of technology and scientific artefacts, is an ill-defined problem within the context of Natural Language Processing (NLP). As large language models (LLMs) have advanced in their capabilities and become more accessible, the risk of their intentional misuse becomes more prevalent. To prevent such intentional malicious use, it is necessary for NLP researchers and practitioners to understand and mitigate the risks of their research. Hence, we present an NLP-specific definition of dual use informed by researchers and practitioners in the field. Further, we propose a checklist focusing on dual-use in NLP, that can be integrated into existing conference ethics-frameworks. The definition and checklist are created based on a survey of NLP researchers and practitioners.</abstract>
@@ -26744,14 +26744,14 @@
     <paper id="935">
       <title><fixed-case>D</fixed-case>ial<fixed-case>G</fixed-case>uide: Aligning Dialogue Model Behavior with Developer Guidelines</title>
       <author><first>Prakhar</first><last>Gupta</last></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <author><first>Di</first><last>Jin</last></author>
       <author><first>Behnam</first><last>Hedayatnia</last></author>
       <author><first>Spandana</first><last>Gella</last></author>
       <author><first>Sijia</first><last>Liu</last></author>
-      <author><first>Patrick</first><last>Lange</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last></author>
+      <author id="patrick-l-lange"><first>Patrick</first><last>Lange</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></author>
       <pages>14031-14047</pages>
       <abstract>Dialogue models are able to generate coherent and fluent responses, but they can still be challenging to control and may produce non-engaging, unsafe results. This unpredictability diminishes user trust and can hinder the use of the models in the real world. To address this, we introduce DialGuide, a novel framework for controlling dialogue model behavior using natural language rules, or guidelines. These guidelines provide information about the context they are applicable to and what should be included in the response, allowing the models to generate responses that are more closely aligned with the developer’s expectations and intent. We evaluate DialGuide on three tasks in open-domain dialogue response generation: guideline selection, response generation, and response entailment verification. Our dataset contains 10,737 positive and 15,467 negative dialogue context-response-guideline triplets across two domains - chit-chat and safety. We provide baseline models for the tasks and benchmark their performance. We also demonstrate that DialGuide is effective in the dialogue safety domain, producing safe and engaging responses that follow developer guidelines.</abstract>
       <url hash="557a2d9e">2023.findings-emnlp.935</url>
@@ -26769,7 +26769,7 @@
       <author><first>Huanqi</first><last>Cao</last></author>
       <author><first>Xin</first><last>Cheng</last></author>
       <author><first>Michael</first><last>Chung</last></author>
-      <author><first>Leon</first><last>Derczynski</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
       <author><first>Xingjian</first><last>Du</last></author>
       <author><first>Matteo</first><last>Grella</last></author>
       <author><first>Kranthi</first><last>Gv</last></author>
@@ -26836,7 +26836,7 @@
       <title>Is Robustness Transferable across Languages in Multilingual Neural Machine Translation?</title>
       <author><first>Leiyu</first><last>Pan</last></author>
       <author><first/><last>Supryadi</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <pages>14114-14125</pages>
       <abstract>Robustness, the ability of models to maintain performance in the face of perturbations, is critical for developing reliable NLP systems. Recent studies have shown promising results in improving the robustness of models through adversarial training and data augmentation. However, in machine translation, most of these studies have focused on bilingual machine translation with a single translation direction. In this paper, we investigate the transferability of robustness across different languages in multilingual neural machine translation. We propose a robustness transfer analysis protocol and conduct a series of experiments. In particular, we use character-, word-, and multi-level noises to attack the specific translation direction of the multilingual neural machine translation model and evaluate the robustness of other translation directions. Our findings demonstrate that the robustness gained in one translation direction can indeed transfer to other translation directions. Additionally, we empirically find scenarios where robustness to character-level noise and word-level noise is more likely to transfer.</abstract>
       <url hash="1697c86a">2023.findings-emnlp.940</url>
@@ -26851,7 +26851,7 @@
       <author><first>Hisham</first><last>Cholakkal</last></author>
       <author><first>Rao Muhammad</first><last>Anwer</last></author>
       <author><first>Salman</first><last>Khan</last></author>
-      <author><first>Fahad</first><last>Khan</last></author>
+      <author id="fahad-khan"><first>Fahad</first><last>Khan</last></author>
       <pages>14126-14136</pages>
       <abstract>Climate change is one of the most significant challenges we face together as a society. Creating awareness and educating policy makers the wide-ranging impact of climate change is an essential step towards a sustainable future. Recently, Large Language Models (LLMs) like ChatGPT and Bard have shown impressive conversational abilities and excel in a wide variety of NLP tasks. While these models are close-source, recently alternative open-source LLMs such as Stanford Alpaca and Vicuna have shown promising results. However, these open-source models are not specifically tailored for climate related domain specific information and also struggle to generate meaningful responses in other languages such as, Arabic. To this end, we propose a light-weight Arabic Mini-ClimateGPT that is built on an open-source LLM and is specifically fine-tuned on a conversational-style instruction tuning curated Arabic dataset Clima500-Instruct with over 500k instructions about climate change and sustainability. Further, our model also utilizes a vector embedding based retrieval mechanism during inference. We validate our proposed model through quantitative and qualitative evaluations on climate-related queries. Our model surpasses the baseline LLM in 88.3% of cases during ChatGPT-based evaluation. Furthermore, our human expert evaluation reveals an 81.6% preference for our model’s responses over multiple popular open-source models. Our open-source demos, models and curated instruction sets are available here : https://github.com/mbzuai-oryx/ClimateGPT</abstract>
       <url hash="e50bf567">2023.findings-emnlp.941</url>
@@ -26887,7 +26887,7 @@
     <paper id="944">
       <title>Selective Demonstrations for Cross-domain Text-to-<fixed-case>SQL</fixed-case></title>
       <author><first>Shuaichen</first><last>Chang</last></author>
-      <author><first>Eric</first><last>Fosler-Lussier</last></author>
+      <author id="eric-fosler-lussier"><first>Eric</first><last>Fosler-Lussier</last></author>
       <pages>14174-14189</pages>
       <abstract>Large language models (LLMs) with in-context learning have demonstrated impressive generalization capabilities in the cross-domain text-to-SQL task, without the use of in-domain annotations. However, incorporating in-domain demonstration examples has been found to greatly enhance LLMs’ performance. In this paper, we delve into the key factors within in-domain examples that contribute to the improvement and explore whether we can harness these benefits without relying on in-domain annotations. Based on our findings, we propose a demonstration selection framework, ODIS, which utilizes both out-of-domain examples and synthetically generated in-domain examples to construct demonstrations. By retrieving demonstrations from hybrid sources, ODIS leverages the advantages of both, showcasing its effectiveness compared to baseline methods that rely on a single data source. Furthermore, ODIS outperforms state-of-the-art approaches on two cross-domain text-to-SQL datasets, with improvements of 1.1 and 11.8 points in execution accuracy, respectively.</abstract>
       <url hash="8de00874">2023.findings-emnlp.944</url>
@@ -27004,7 +27004,7 @@
       <title>Mitigating Data Imbalance and Representation Degeneration in Multilingual Machine Translation</title>
       <author><first>Wen</first><last>Lai</last></author>
       <author><first>Alexandra</first><last>Chronopoulou</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>14279-14294</pages>
       <abstract>Despite advances in multilingual neural machine translation (MNMT), we argue that there are still two major challenges in this area: data imbalance and representation degeneration. The data imbalance problem refers to the imbalance in the amount of parallel corpora for all language pairs, especially for long-tail languages (i.e., very low-resource languages). The representation degeneration problem refers to the problem of encoded tokens tending to appear only in a small subspace of the full space available to the MNMT model. To solve these two issues, we propose Bi-ACL, a framework which only requires target-side monolingual data and a bilingual dictionary to improve the performance of the MNMT model. We define two modules, named bidirectional autoencoder and bidirectional contrastive learning, which we combine with an online constrained beam search and a curriculum learning sampling strategy. Extensive experiments show that our proposed method is more effective than strong baselines both in long-tail languages and in high-resource languages. We also demonstrate that our approach is capable of transferring knowledge between domains and languages in zero-shot scenarios.</abstract>
       <url hash="ad1c5678">2023.findings-emnlp.953</url>
@@ -27030,7 +27030,7 @@
       <title>The Locality and Symmetry of Positional Encodings</title>
       <author><first>Lihu</first><last>Chen</last></author>
       <author><first>Gael</first><last>Varoquaux</last></author>
-      <author><first>Fabian</first><last>Suchanek</last></author>
+      <author id="fabian-suchanek"><first>Fabian</first><last>Suchanek</last></author>
       <pages>14313-14331</pages>
       <abstract>Positional Encodings (PEs) are used to inject word-order information into transformer-based language models. While they can significantly enhance the quality of sentence representations, their specific contribution to language models is not fully understood, especially given recent findings that various positional encodings are insensitive to word order. In this work, we conduct a systematic study of positional encodings in <b>Bidirectional Masked Language Models</b> (BERT-style) , which complements existing work in three aspects: (1) We uncover the core function of PEs by identifying two common properties, Locality and Symmetry; (2) We show that the two properties are closely correlated with the performances of downstream tasks; (3) We quantify the weakness of current PEs by introducing two new probing tasks, on which current PEs perform poorly. We believe that these results are the basis for developing better PEs for transformer-based language models.</abstract>
       <url hash="15098d62">2023.findings-emnlp.955</url>
@@ -27043,7 +27043,7 @@
       <author><first>Xiaohu</first><last>Zhao</last></author>
       <author><first>Yikun</first><last>Lei</last></author>
       <author><first>Shaolin</first><last>Zhu</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <pages>14332-14348</pages>
       <abstract>In this paper, we employ Singular Value Canonical Correlation Analysis (SVCCA) to analyze representations learnt in a multilingual end-to-end speech translation model trained over 22 languages. SVCCA enables us to estimate representational similarity across languages and layers, enhancing our understanding of the functionality of multilingual speech translation and its potential connection to multilingual neural machine translation. The multilingual speech translation model is trained on the CoVoST 2 dataset in all possible directions, and we utilize LASER to extract parallel bitext data for SVCCA analysis. We derive three major findings from our analysis: (I) Linguistic similarity loses its efficacy in multilingual speech translation when the training data for a specific language is limited. (II) Enhanced encoder representations and well-aligned audio-text data significantly improve translation quality, surpassing the bilingual counterparts when the training data is not compromised. (III) The encoder representations of multilingual speech translation demonstrate superior performance in predicting phonetic features in linguistic typology prediction. With these findings, we propose that releasing the constraint of limited data for low-resource languages and subsequently combining them with linguistically related high-resource languages could offer a more effective approach for multilingual end-to-end speech translation.</abstract>
       <url hash="110a1872">2023.findings-emnlp.956</url>
@@ -27081,7 +27081,7 @@
       <title>Unnatural language processing: How do language models handle machine-generated prompts?</title>
       <author><first>Corentin</first><last>Kervadec</last></author>
       <author><first>Francesca</first><last>Franzon</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <pages>14377-14392</pages>
       <abstract>Language model prompt optimization research has shown that semantically and grammatically well-formed manually crafted prompts are routinely outperformed by automatically generated token sequences with no apparent meaning or syntactic structure, including sequences of vectors from a model’s embedding space. We use machine-generated prompts to probe how models respond to input that is not composed of natural language expressions. We study the behavior of models of different sizes in multiple semantic tasks in response to both continuous and discrete machine-generated prompts, and compare it to the behavior in response to human-generated natural-language prompts. Even when producing a similar output, machine-generated and human prompts trigger different response patterns through the network processing pathways, including different perplexities, different attention and output entropy distributions, and different unit activation profiles. We provide preliminary insight into the nature of the units activated by different prompt types, suggesting that only natural language prompts recruit a genuinely linguistic circuit.</abstract>
       <url hash="76520991">2023.findings-emnlp.959</url>
@@ -27134,7 +27134,7 @@
       <author><first>Felicia</first><last>Logozzo</last></author>
       <author><first>Michele</first><last>Mastromattei</last></author>
       <author><first>Leonardo</first><last>Ranaldi</last></author>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last></author>
       <pages>14447-14461</pages>
       <abstract>The impressive achievements of transformers force NLP researchers to delve into how these models represent the underlying structure of natural language. In this paper, we propose a novel standpoint to investigate the above issue: using typological similarities among languages to observe how their respective monolingual models encode structural information. We aim to layer-wise compare transformers for typologically similar languages to observe whether these similarities emerge for particular layers. For this investigation, we propose to use Centered Kernel Alignment to measure similarity among weight matrices. We found that syntactic typological similarity is consistent with the similarity between the weights in the middle layers, which are the pretrained BERT layers to which syntax encoding is generally attributed. Moreover, we observe that a domain adaptation on semantically equivalent texts enhances this similarity among weight matrices.</abstract>
       <url hash="114b617a">2023.findings-emnlp.963</url>
@@ -27345,7 +27345,7 @@
       <author><first>Cong-Duy</first><last>Nguyen</last></author>
       <author><first>Thong</first><last>Nguyen</last></author>
       <author><first>Duc</first><last>Vu</last></author>
-      <author><first>Anh</first><last>Luu</last></author>
+      <author id="luu-anh-tuan"><first>Anh</first><last>Luu</last></author>
       <pages>14714-14724</pages>
       <abstract>The effectiveness of a model is heavily reliant on the quality of the fusion representation of multiple modalities in multimodal sentiment analysis. Moreover, each modality is extracted from raw input and integrated with the rest to construct a multimodal representation. Although previous methods have proposed multimodal representations and achieved promising results, most of them focus on forming positive and negative pairs, neglecting the variation in sentiment scores within the same class. Additionally, they fail to capture the significance of unimodal representations in the fusion vector. To address these limitations, we introduce a framework called Supervised Angular-based Contrastive Learning for Multimodal Sentiment Analysis. This framework aims to enhance discrimination and generalizability of the multimodal representation and overcome biases in the fusion vector’s modality. Our experimental results, along with visualizations on two widely used datasets, demonstrate the effectiveness of our approach.</abstract>
       <url hash="a50c1690">2023.findings-emnlp.980</url>
@@ -27356,7 +27356,7 @@
       <title>Efficient Multilingual Language Model Compression through Vocabulary Trimming</title>
       <author><first>Asahi</first><last>Ushio</last></author>
       <author><first>Yi</first><last>Zhou</last></author>
-      <author><first>Jose</first><last>Camacho-Collados</last></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></author>
       <pages>14725-14739</pages>
       <abstract>Multilingual language models (LMs) have become a powerful tool in NLP, especially for non-English languages. Nevertheless, model parameters of multilingual LMs remain large due to the larger embedding matrix of the vocabulary covering tokens in different languages. Instead, monolingual LMs can be trained in a target language with the language-specific vocabulary only. In this paper, we propose vocabulary-trimming (VT), a method to reduce a multilingual LM vocabulary to a target language by deleting potentially irrelevant tokens from its vocabulary. In theory, VT can compress any existing multilingual LM to any language covered by the original model. In our experiments, we show that VT can retain the original performance of the multilingual LM, while being considerably smaller in size than the original multilingual LM. The evaluation is performed over four NLP tasks (two generative and two classification tasks) among four widely used multilingual LMs in seven languages. The results show that this methodology can keep the best of both monolingual and multilingual worlds by keeping a small size as monolingual models without the need for specifically retraining them, and can even help limit potentially harmful social biases.</abstract>
       <url hash="31f74fd1">2023.findings-emnlp.981</url>
@@ -27405,7 +27405,7 @@
       <author><first>Kun</first><last>Zhou</last></author>
       <author><first>Beichen</first><last>Zhang</last></author>
       <author><first>Zheng</first><last>Gong</last></author>
-      <author><first>Xin</first><last>Zhao</last></author>
+      <author id="wayne-xin-zhao"><first>Xin</first><last>Zhao</last></author>
       <author><first>Ji-Rong</first><last>Wen</last></author>
       <pages>14777-14790</pages>
       <abstract>Although large language models (LLMs) have achieved excellent performance in a variety of evaluation benchmarks, they still struggle in complex reasoning tasks which require specific knowledge and multi-hop reasoning. To improve the reasoning abilities, we propose <tex-math>\textbf{ChatCoT}</tex-math>, a tool-augmented chain-of-thought reasoning framework for chat-based LLMs (<tex-math>\textit{e.g.,}</tex-math> ChatGPT). In ChatCoT, we model the chain-of-thought (CoT) reasoning as multi-turn conversations, to utilize tools in a more natural way through chatting. At each turn, LLMs can either interact with tools or perform the reasoning. Our approach can effectively leverage the multi-turn conversation ability of chat-based LLMs, and integrate the thought chain following and tools manipulation in a unified way. Specially, we initialize the early turns of the conversation by the knowledge about tools, tasks, and reasoning format, and propose an iterative <tex-math>\textit{tool-augmented reasoning}</tex-math> step to perform step-by-step tool-augmented reasoning. The experiment results on two complex reasoning datasets (MATH and HotpotQA) have shown the effectiveness of ChatCoT on complex reasoning tasks, achieving a 7.9% relative improvement over the state-of-the-art baseline.</abstract>
@@ -27472,7 +27472,7 @@
       <title>Prefix-Tuning Based Unsupervised Text Style Transfer</title>
       <author><first>Huiyu</first><last>Mai</last></author>
       <author><first>Wenhao</first><last>Jiang</last></author>
-      <author><first>Zhi-Hong</first><last>Deng</last></author>
+      <author id="zhi-hong-deng"><first>Zhi-Hong</first><last>Deng</last></author>
       <pages>14847-14856</pages>
       <abstract>Unsupervised text style transfer aims at training a generative model that can alter the style of the input sentence while preserving its content without using any parallel data. In this paper, we employ powerful pre-trained large language models and present a new prefix-tuning-based method for unsupervised text style transfer. We construct three different kinds of prefixes, i.e., shared prefix, style prefix, and content prefix, to encode task-specific information, target style, and the content information of the input sentence, respectively. Compared to embeddings used by previous works, the proposed prefixes can provide richer information for the model. Furthermore, we adopt a recursive way of using language models in the process of style transfer. This strategy provides a more effective way for the interactions between the input sentence and GPT-2, helps the model construct more informative prefixes, and thus, helps improve the performance. Evaluations on the well-known datasets show that our method outperforms the state-of-the-art baselines. Results, analysis of ablation studies, and subjective evaluations from humans are also provided for a deeper understanding of the proposed method.</abstract>
       <url hash="6a1a9a7d">2023.findings-emnlp.990</url>
@@ -27511,7 +27511,7 @@
       <title>Qualitative Code Suggestion: A Human-Centric Approach to Qualitative Coding</title>
       <author><first>Cesare</first><last>Spinoso-Di Piano</last></author>
       <author><first>Samira</first><last>Rahimi</last></author>
-      <author><first>Jackie</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie</first><last>Cheung</last></author>
       <pages>14887-14909</pages>
       <abstract>Qualitative coding is a content analysis method in which researchers read through a text corpus and assign descriptive labels or qualitative codes to passages. It is an arduous and manual process which human-computer interaction (HCI) studies have shown could greatly benefit from NLP techniques to assist qualitative coders. Yet, previous attempts at leveraging language technologies have set up qualitative coding as a fully automatable classification problem. In this work, we take a more assistive approach by defining the task of qualitative code suggestion (QCS) in which a ranked list of previously assigned qualitative codes is suggested from an identified passage. In addition to being user-motivated, QCS integrates previously ignored properties of qualitative coding such as the sequence in which passages are annotated, the importance of rare codes and the differences in annotation styles between coders. We investigate the QCS task by releasing the first publicly available qualitative coding dataset, CVDQuoding, consisting of interviews conducted with women at risk of cardiovascular disease. In addition, we conduct a human evaluation which shows that our systems consistently make relevant code suggestions.</abstract>
       <url hash="f6217b77">2023.findings-emnlp.993</url>
@@ -27523,7 +27523,7 @@
       <author><first>Yunlong</first><last>Liang</last></author>
       <author><first>Fandong</first><last>Meng</last></author>
       <author><first>Jiaan</first><last>Wang</last></author>
-      <author><first>Jinan</first><last>Xu</last></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last></author>
       <author><first>Yufeng</first><last>Chen</last></author>
       <author><first>Jie</first><last>Zhou</last></author>
       <pages>14910-14922</pages>
@@ -27556,7 +27556,7 @@
       <author><first>Jaesung</first><last>Tae</last></author>
       <author><first>Ellen</first><last>Zhang</last></author>
       <author><first>Arman</first><last>Cohan</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <pages>14935-14956</pages>
       <abstract>In-context learning (ICL) has emerged as a new approach to various natural language processing tasks, utilizing large language models (LLMs) to make predictions based on context that has been supplemented with a few examples or task-specific instructions. In this paper, we aim to extend this method to question answering tasks that utilize structured knowledge sources, and improve Text-to-SQL systems by exploring various prompt design strategies for employing LLMs. We conduct a systematic investigation into different demonstration selection methods and optimal instruction formats for prompting LLMs in the Text-to-SQL task. Our approach involves leveraging the syntactic structure of an example’s SQL query to retrieve demonstrations, and we demonstrate that pursuing both diversity and similarity in demonstration selection leads to enhanced performance. Furthermore, we show that LLMs benefit from database-related knowledge augmentations. Our most effective strategy outperforms the state-of-the-art system by 2.5 points (Execution Accuracy) and the best fine-tuned system by 5.1 points on the Spider dataset. These results highlight the effectiveness of our approach in adapting LLMs to the Text-to-SQL task, and we present an analysis of the factors contributing to the success of our strategy.</abstract>
       <url hash="143b6ccc">2023.findings-emnlp.996</url>
@@ -27568,9 +27568,9 @@
       <author><first>Odunayo</first><last>Ogundepo</last></author>
       <author><first>Tajuddeen R.</first><last>Gwadabe</last></author>
       <author><first>Clara E.</first><last>Rivera</last></author>
-      <author><first>Jonathan H.</first><last>Clark</last></author>
+      <author id="jonathan-h-clark"><first>Jonathan H.</first><last>Clark</last></author>
       <author><first>Sebastian</first><last>Ruder</last></author>
-      <author><first>David Ifeoluwa</first><last>Adelani</last></author>
+      <author id="david-ifeoluwa-adelani"><first>David Ifeoluwa</first><last>Adelani</last></author>
       <author><first>Bonaventure F. P.</first><last>Dossou</last></author>
       <author><first>Abdou Aziz</first><last>Diop</last></author>
       <author><first>Claytone</first><last>Sikasote</last></author>
@@ -27608,7 +27608,7 @@
       <author><first>Eunice</first><last>Mukonde</last></author>
       <author><first>Falalu Ibrahim</first><last>Lawan</last></author>
       <author><first>Ibrahim Said</first><last>Ahmad</last></author>
-      <author><first>Jesujoba O.</first><last>Alabi</last></author>
+      <author id="jesujoba-alabi"><first>Jesujoba O.</first><last>Alabi</last></author>
       <author><first>Martin</first><last>Namukombo</last></author>
       <author><first>Mbonu</first><last>Chinedu</last></author>
       <author><first>Mofya</first><last>Phiri</last></author>
@@ -27694,7 +27694,7 @@
       <author><first>Haozhe</first><last>Zhang</last></author>
       <author><first>Zuyu</first><last>Zhao</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <author><first>Zhongyu</first><last>Wei</last></author>
       <pages>15034-15045</pages>
       <abstract>Online community is composed of communities, users, and user-generated textual content, with rich information that can help us solve social problems. Previous research hasn’t fully utilized these three components and the relationship among them. What’s more, they can’t adapt to a wide range of downstream tasks. To solve these problems, we focus on a framework that simultaneously considers communities, users, and texts. And it can easily connect with a variety of downstream tasks related to social media. Specifically, we use a ternary heterogeneous graph to model online communities. Text reconstruction and edge generation are used to learn structural and semantic knowledge among communities, users, and texts. By leveraging this pre-trained model, we achieve promising results across multiple downstream tasks, such as violation detection, sentiment analysis, and community recommendation. Our exploration will improve online community modeling.</abstract>
@@ -27844,8 +27844,8 @@
     <paper id="1015">
       <title><fixed-case>T</fixed-case>-Projection: High Quality Annotation Projection for Sequence Labeling Tasks</title>
       <author><first>Iker</first><last>García-Ferrero</last></author>
-      <author><first>Rodrigo</first><last>Agerri</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="rodrigo-agerri"><first>Rodrigo</first><last>Agerri</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <pages>15203-15217</pages>
       <abstract>In the absence of readily available labeled data for a given sequence labeling task and language, annotation projection has been proposed as one of the possible strategies to automatically generate annotated data. Annotation projection has often been formulated as the task of transporting, on parallel corpora, the labels pertaining to a given span in the source language into its corresponding span in the target language. In this paper we present T-Projection, a novel approach for annotation projection that leverages large pretrained text2text language models and state-of-the-art machine translation technology. T-Projection decomposes the label projection task into two subtasks: (i) A candidate generation step, in which a set of projection candidates using a multilingual T5 model is generated and, (ii) a candidate selection step, in which the generated candidates are ranked based on translation probabilities. We conducted experiments on intrinsic and extrinsic tasks in 5 Indo-European and 8 low-resource African languages. We demostrate that T-projection outperforms previous annotation projection methods by a wide margin. We believe that T-Projection can help to automatically alleviate the lack of high-quality training data for sequence labeling tasks. Code and data are publicly available.</abstract>
       <url hash="515888d2">2023.findings-emnlp.1015</url>
@@ -27879,8 +27879,8 @@
       <title>Dialect Transfer for <fixed-case>S</fixed-case>wiss <fixed-case>G</fixed-case>erman Speech Translation</title>
       <author><first>Claudio</first><last>Paonessa</last></author>
       <author><first>Yanick</first><last>Schraner</last></author>
-      <author><first>Jan</first><last>Deriu</last></author>
-      <author><first>Manuela</first><last>Hürlimann</last></author>
+      <author id="jan-milan-deriu"><first>Jan</first><last>Deriu</last></author>
+      <author id="manuela-huerlimann"><first>Manuela</first><last>Hürlimann</last></author>
       <author><first>Manfred</first><last>Vogel</last></author>
       <author><first>Mark</first><last>Cieliebak</last></author>
       <pages>15240-15254</pages>
@@ -27905,7 +27905,7 @@
       <author><first>Ajay</first><last>Patel</last></author>
       <author><first>Delip</first><last>Rao</last></author>
       <author><first>Ansh</first><last>Kothary</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <author><first>Chris</first><last>Callison-Burch</last></author>
       <pages>15270-15290</pages>
       <abstract>Style representation learning builds content-independent representations of author style in text. To date, no large dataset of texts with stylometric annotations on a wide range of style dimensions has been compiled, perhaps because the linguistic expertise to perform such annotation would be prohibitively expensive. Therefore, current style representation approaches make use of unsupervised neural methods to disentangle style from content to create style vectors. These approaches, however, result in uninterpretable representations, complicating their usage in downstream applications like authorship attribution where auditing and explainability is critical. In this work, we use prompting to perform stylometry on a large number of texts to generate a synthetic stylometry dataset. We use this synthetic data to then train human-interpretable style representations we call LISA embeddings. We release our synthetic dataset (StyleGenome) and our interpretable style embedding model (LISA) as resources.</abstract>
@@ -27992,7 +27992,7 @@
     </paper>
     <paper id="1027">
       <title>Incorporating Object-Level Visual Context for Multimodal Fine-Grained Entity Typing</title>
-      <author><first>Ying</first><last>Zhang</last></author>
+      <author id="ying-zhang"><first>Ying</first><last>Zhang</last></author>
       <author><first>Wenbo</first><last>Fan</last></author>
       <author><first>Kehui</first><last>Song</last></author>
       <author><first>Yu</first><last>Zhao</last></author>
@@ -28075,7 +28075,7 @@
       <author><first>Jinheon</first><last>Baek</last></author>
       <author><first>Sukmin</first><last>Cho</last></author>
       <author><first>Sung</first><last>Hwang</last></author>
-      <author><first>Jong</first><last>Park</last></author>
+      <author id="jong-c-park"><first>Jong</first><last>Park</last></author>
       <pages>15459-15469</pages>
       <abstract>Recent instruction-finetuned large language models (LMs) have achieved notable performances in various tasks, such as question-answering (QA). However, despite their ability to memorize a vast amount of general knowledge across diverse tasks, they might be suboptimal on specific tasks due to their limited capacity to transfer and adapt knowledge to target tasks. Moreover, further finetuning LMs with labeled datasets is often infeasible due to their absence, but it is also questionable if we can transfer smaller LMs having limited knowledge only with unlabeled test data. In this work, we show and investigate the capabilities of smaller self-adaptive LMs, only with unlabeled test data. In particular, we first stochastically generate multiple answers, and then ensemble them while filtering out low-quality samples to mitigate noise from inaccurate labels. Our proposed self-adaption strategy demonstrates significant performance improvements on benchmark QA datasets with higher robustness across diverse prompts, enabling LMs to stay stable. Code is available at: https://github.com/starsuzi/T-SAS.</abstract>
       <url hash="a7fe5cae">2023.findings-emnlp.1033</url>
@@ -28156,7 +28156,7 @@
       <title>Calibrated Seq2seq Models for Efficient and Generalizable Ultra-fine Entity Typing</title>
       <author><first>Yanlin</first><last>Feng</last></author>
       <author><first>Adithya</first><last>Pratapa</last></author>
-      <author><first>David</first><last>Mortensen</last></author>
+      <author id="david-r-mortensen"><first>David</first><last>Mortensen</last></author>
       <pages>15550-15560</pages>
       <abstract>Ultra-fine entity typing plays a crucial role in information extraction by predicting fine-grained semantic types for entity mentions in text. However, this task poses significant challenges due to the massive number of entity types in the output space. The current state-of-the-art approaches, based on standard multi-label classifiers or cross-encoder models, suffer from poor generalization performance or inefficient inference speed. In this paper, we present CASENT, a seq2seq model designed for ultra-fine entity typing that predicts ultra-fine types with calibrated confidence scores. Our model takes an entity mention as input and employs constrained beam search to generate multiple types autoregressively. The raw sequence probabilities associated with the predicted types are then transformed into confidence scores using a novel calibration method. We conduct extensive experiments on the UFET dataset which contains over <tex-math>10k</tex-math> types. Our method outperforms the previous state-of-the-art in terms of F1 score and calibration error, while achieving an inference speedup of over 50 times. Additionally, we demonstrate the generalization capabilities of our model by evaluating it in zero-shot and few-shot settings on five specialized domain entity typing datasets that are unseen during training. Remarkably, our model outperforms large language models with 10 times more parameters in the zero-shot setting, and when fine-tuned on 50 examples, it significantly outperforms ChatGPT on all datasets.</abstract>
       <url hash="97d0afe0">2023.findings-emnlp.1040</url>
@@ -28167,9 +28167,9 @@
       <title>Learning Semantic Role Labeling from Compatible Label Sequences</title>
       <author><first>Tao</first><last>Li</last></author>
       <author><first>Ghazaleh</first><last>Kazeminejad</last></author>
-      <author><first>Susan</first><last>Brown</last></author>
+      <author id="susan-windisch-brown"><first>Susan</first><last>Brown</last></author>
       <author><first>Vivek</first><last>Srikumar</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>15561-15572</pages>
       <abstract>Semantic role labeling (SRL) has multiple disjoint label sets, e.g., VerbNet and PropBank. Creating these datasets is challenging, therefore a natural question is how to use each one to help the other. Prior work has shown that cross-task interaction helps, but only explored multitask learning so far. A common issue with multi-task setup is that argument sequences are still separately decoded, running the risk of generating structurally inconsistent label sequences (as per lexicons like Semlink). In this paper, we eliminate such issue with a framework that jointly models VerbNet and PropBank labels as one sequence. In this setup, we show that enforcing Semlink constraints during decoding constantly improves the overall F1. With special input constructions, our joint model infers VerbNet arguments from given PropBank arguments with over 99 F1. For learning, we propose a constrained marginal model that learns with knowledge defined in Semlink to further benefit from the large amounts of PropBank-only data. On the joint benchmark based on CoNLL05, our models achieve state-of-the-art F1’s, outperforming the prior best in-domain model by 3.5 (VerbNet) and 0.8 (PropBank). For out-of-domain generalization, our models surpass the prior best by 3.4 (VerbNet) and 0.2 (PropBank).</abstract>
       <url hash="829fb000">2023.findings-emnlp.1041</url>
@@ -28203,7 +28203,7 @@
     <paper id="1044">
       <title>Is <fixed-case>C</fixed-case>hat<fixed-case>GPT</fixed-case> the ultimate Data Augmentation Algorithm?</title>
       <author><first>Frédéric</first><last>Piedboeuf</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <pages>15606-15615</pages>
       <abstract>In the aftermath of GPT-3.5, commonly known as ChatGPT, research have attempted to assess its capacity for lowering annotation cost, either by doing zero-shot learning, generating new data, or replacing human annotators. Some studies have also investigated its use for data augmentation (DA), but only in limited contexts, which still leaves the question of how ChatGPT performs compared to state-of-the-art algorithms. In this paper, we use ChatGPT to create new data both with paraphrasing and with zero-shot generation, and compare it to seven other algorithms. We show that while ChatGPT performs exceptionally well on some simpler data, it overall does not perform better than the other algorithms, yet demands a much larger implication from the practitioner due to the ChatGPT often refusing to answer due to sensitive content in the datasets.</abstract>
       <url hash="d3015450">2023.findings-emnlp.1044</url>
@@ -28244,7 +28244,7 @@
       <title>Unveiling the Power of Argument Arrangement in Online Persuasive Discussions</title>
       <author><first>Nailia</first><last>Mirzakhmedova</last></author>
       <author><first>Johannes</first><last>Kiesel</last></author>
-      <author><first>Khalid</first><last>Al-Khatib</last></author>
+      <author id="khalid-al-khatib"><first>Khalid</first><last>Al-Khatib</last></author>
       <author><first>Benno</first><last>Stein</last></author>
       <pages>15659-15671</pages>
       <abstract>Previous research on argumentation in online discussions has largely focused on examining individual comments and neglected the interactive nature of discussions. In line with previous work, we represent individual comments as sequences of semantic argumentative unit types. However, because it is intuitively necessary for dialogical argumentation to address the opposing viewpoints, we extend this model by clustering type sequences into different argument arrangement patterns and representing discussions as sequences of these patterns. These sequences of patterns are a symbolic representation of argumentation strategies that capture the overall structure of discussions. Using this novel approach, we conduct an in-depth analysis of the strategies in 34,393 discussions from the online discussion forum Change My View and show that our discussion model is effective for persuasiveness prediction, outperforming LLM-based classifiers on the same data. Our results provide valuable insights into argumentation dynamics in online discussions and, through the presented prediction procedure, are of practical importance for writing assistance and persuasive text generation systems.</abstract>
@@ -28357,7 +28357,7 @@
       <title>Unleashing the Multilingual Encoder Potential: Boosting Zero-Shot Performance via Probability Calibration</title>
       <author><first>Ercong</first><last>Nie</last></author>
       <author><first>Helmut</first><last>Schmid</last></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <pages>15774-15782</pages>
       <abstract>Pretrained multilingual encoder models can directly perform zero-shot multilingual tasks or linguistic probing by reformulating the input examples into cloze-style prompts. This is accomplished by predicting the probabilities of the label words at the masked token position, without requiring any updates to the model parameters. However, the performance of this method is limited by the model’s bias toward predicting label words which frequently occurred during the pretraining. These words typically receive high probabilities. To address this issue, we combine the models with calibration techniques which modify the probabilities of label words predicted by the models. We first validate the effectiveness of a proposed simple calibration method together with other existing techniques on monolingual encoders in both zero- and few-shot scenarios. We subsequently employ these calibration techniques on multilingual encoders, resulting in substantial performance improvements across a wide range of tasks.</abstract>
       <url hash="405a01f5">2023.findings-emnlp.1056</url>
@@ -28369,7 +28369,7 @@
       <author><first>Ruiyang</first><last>Ren</last></author>
       <author><first>Yingqi</first><last>Qu</last></author>
       <author><first>Jing</first><last>Liu</last></author>
-      <author><first>Xin</first><last>Zhao</last></author>
+      <author id="wayne-xin-zhao"><first>Xin</first><last>Zhao</last></author>
       <author><first>Qifei</first><last>Wu</last></author>
       <author><first>Yuchen</first><last>Ding</last></author>
       <author><first>Hua</first><last>Wu</last></author>
@@ -28419,11 +28419,11 @@
   <volume id="ijcnlp" ingest-date="2024-01-18" type="proceedings">
     <meta>
       <booktitle>Findings of the Association for Computational Linguistics: IJCNLP-AACL 2023 (Findings)</booktitle>
-      <editor><first>Jong C.</first><last>Park</last></editor>
+      <editor id="jong-c-park"><first>Jong C.</first><last>Park</last></editor>
       <editor><first>Yuki</first><last>Arase</last></editor>
       <editor><first>Baotian</first><last>Hu</last></editor>
       <editor><first>Wei</first><last>Lu</last></editor>
-      <editor><first>Derry</first><last>Wijaya</last></editor>
+      <editor id="derry-tanti-wijaya"><first>Derry</first><last>Wijaya</last></editor>
       <editor><first>Ayu</first><last>Purwarianti</last></editor>
       <editor><first>Adila Alfa</first><last>Krisnadhi</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -28443,7 +28443,7 @@
       <author><first>Yao</first><last>Wan</last></author>
       <author><first>Yibo</first><last>Wang</last></author>
       <author><first>Zhongfen</first><last>Deng</last></author>
-      <author><first>Philip S.</first><last>Yu</last></author>
+      <author id="philip-s-yu"><first>Philip S.</first><last>Yu</last></author>
       <pages>1–12</pages>
       <url hash="a6786489">2023.findings-ijcnlp.1</url>
       <bibkey>zhao-etal-2023-localize</bibkey>
@@ -28455,7 +28455,7 @@
       <author><first>Wenting</first><last>Zhao</last></author>
       <author><first>Yao</first><last>Wan</last></author>
       <author><first>Zhongfen</first><last>Deng</last></author>
-      <author><first>Philip</first><last>Yu</last></author>
+      <author id="philip-s-yu"><first>Philip</first><last>Yu</last></author>
       <pages>13–19</pages>
       <url hash="06b95d66">2023.findings-ijcnlp.2</url>
       <bibkey>wang-etal-2023-named</bibkey>
@@ -28466,7 +28466,7 @@
       <author><first>Imtiaz</first><last>Karim</last></author>
       <author><first>Kazi Samin</first><last>Mubasshir</last></author>
       <author><first>Mirza Masfiqur</first><last>Rahman</last></author>
-      <author><first>Elisa</first><last>Bertino</last></author>
+      <author id="elisa-bertino"><first>Elisa</first><last>Bertino</last></author>
       <pages>20–38</pages>
       <url hash="3a21d68e">2023.findings-ijcnlp.3</url>
       <bibkey>karim-etal-2023-spec5g</bibkey>
@@ -28516,7 +28516,7 @@
     <paper id="8">
       <title><fixed-case>P</fixed-case>hrase<fixed-case>S</fixed-case>umm: Abstractive Short Phrase Summarization</title>
       <author><first>Kasturi</first><last>Bhattacharjee</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <author><first>Rashmi</first><last>Gangadharaiah</last></author>
       <pages>80–94</pages>
       <url hash="05e175c5">2023.findings-ijcnlp.8</url>
@@ -28527,7 +28527,7 @@
       <title>Location Aware Modular Biencoder for Tourism Question Answering</title>
       <author><first>Haonan</first><last>Li</last></author>
       <author><first>Martin</first><last>Tomko</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>95–109</pages>
       <url hash="15aa02f4">2023.findings-ijcnlp.9</url>
       <bibkey>li-etal-2023-location</bibkey>
@@ -28562,7 +28562,7 @@
       <author><first>Polina</first><last>Chernomorchenko</last></author>
       <author><first>Anastasiia</first><last>Demidova</last></author>
       <author><first>Alexander</first><last>Panchenko</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>134–148</pages>
       <url hash="502cdf08">2023.findings-ijcnlp.12</url>
       <bibkey>nikishina-etal-2023-predicting</bibkey>
@@ -28583,7 +28583,7 @@
       <author><first>Chenyang</first><last>Huang</last></author>
       <author><first>Fei</first><last>Huang</last></author>
       <author><first>Zaixiang</first><last>Zheng</last></author>
-      <author><first>Osmar</first><last>Zaïane</last></author>
+      <author id="osmar-r-zaiane"><first>Osmar</first><last>Zaïane</last></author>
       <author><first>Hao</first><last>Zhou</last></author>
       <author><first>Lili</first><last>Mou</last></author>
       <pages>161–170</pages>
@@ -28689,7 +28689,7 @@
       <author><first>Bogdan</first><last>Sacaleanu</last></author>
       <author><first>Anatole</first><last>Gershman</last></author>
       <author><first>Andrew E. Fano</first><last>Fano</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>250–260</pages>
       <url hash="d64bf43c">2023.findings-ijcnlp.23</url>
       <bibkey>rajagopal-etal-2023-template</bibkey>
@@ -28720,7 +28720,7 @@
       <author><first>Atakan</first><last>Kara</last></author>
       <author><first>Farrin</first><last>Marouf Sofian</last></author>
       <author><first>Andrew</first><last>Bond</last></author>
-      <author><first>Gözde</first><last>Şahin</last></author>
+      <author id="gozde-gul-sahin"><first>Gözde</first><last>Şahin</last></author>
       <pages>278–290</pages>
       <url hash="e6ad4b41">2023.findings-ijcnlp.26</url>
       <bibkey>kara-etal-2023-gecturk</bibkey>
@@ -28739,9 +28739,9 @@
       <title><fixed-case>I</fixed-case>nd<fixed-case>IE</fixed-case>: A Multilingual Open Information Extraction Tool For <fixed-case>I</fixed-case>ndic Languages</title>
       <author><first>Ritwik</first><last>Mishra</last></author>
       <author><first>Simranjeet</first><last>Singh</last></author>
-      <author><first>Rajiv Ratn</first><last>Shah</last></author>
+      <author id="rajiv-shah"><first>Rajiv Ratn</first><last>Shah</last></author>
       <author><first>Ponnurangam</first><last>Kumaraguru</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>312–326</pages>
       <url hash="93b135f3">2023.findings-ijcnlp.28</url>
       <bibkey>mishra-etal-2023-indie</bibkey>
@@ -28831,7 +28831,7 @@
     <paper id="37">
       <title><fixed-case>STRONG</fixed-case> – Structure Controllable Legal Opinion Summary Generation</title>
       <author><first>Yang</first><last>Zhong</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <pages>431–448</pages>
       <url hash="032510aa">2023.findings-ijcnlp.37</url>
       <bibkey>zhong-litman-2023-strong</bibkey>
diff --git a/data/xml/2023.finnlp.xml b/data/xml/2023.finnlp.xml
index e08809e40c..82ca5a37fa 100644
--- a/data/xml/2023.finnlp.xml
+++ b/data/xml/2023.finnlp.xml
@@ -168,7 +168,7 @@
     <paper id="14">
       <title><fixed-case>E</fixed-case>a<fixed-case>S</fixed-case>y<fixed-case>G</fixed-case>uide : <fixed-case>ESG</fixed-case> Issue Identification Framework leveraging Abilities of Generative Large Language Models</title>
       <author><first>Hanwool</first><last>Lee</last></author>
-      <author id="jonghyun-choi"><first>Jonghyun</first><last>Choi</last></author>
+      <author><first>Jonghyun</first><last>Choi</last></author>
       <author><first>Sohyeon</first><last>Kwon</last></author>
       <author><first>Sungbum</first><last>Jung</last></author>
       <pages>127–132</pages>
@@ -270,7 +270,7 @@
       <author><first>Sachin</first><last>Pawar</last></author>
       <author><first>Aditi</first><last>Pawde</last></author>
       <author><first>Manoj</first><last>Apte</last></author>
-      <author><first>Girish</first><last>Palshikar</last></author>
+      <author id="girish-palshikar"><first>Girish</first><last>Palshikar</last></author>
       <pages>31–41</pages>
       <abstract>Audit reports are a window to the financial health of a company and hence gauging coverage of various audit aspects in them is important. In this paper, we aim at determining an audit report’s coverage through classification of its sentences into multiple domain specific classes. In a weakly supervised setting, we employ a rule-based approach to automatically create training data for a BERT-based multi-label classifier. We then devise an ensemble to combine both the rule based and classifier approaches. Further, we employ two novel ways to improve the ensemble’s generalization: (i) through an active learning based approach and, (ii) through a LLM based review. We demonstrate that our proposed approaches outperform several baselines. We show utility of the proposed approaches to measure audit coverage on a large dataset of 2.8K audit reports.</abstract>
       <url hash="af232d09">2023.finnlp-2.4</url>
@@ -280,7 +280,7 @@
     <paper id="5">
       <title><fixed-case>GPT</fixed-case>-<fixed-case>F</fixed-case>in<fixed-case>RE</fixed-case>: In-context Learning for Financial Relation Extraction using Large Language Models</title>
       <author><first>Pawan</first><last>Rajpoot</last></author>
-      <author><first>Ankur</first><last>Parikh</last></author>
+      <author id="ankur-parikh"><first>Ankur</first><last>Parikh</last></author>
       <pages>42–45</pages>
       <abstract>Relation extraction (RE) is a crucial task in natural language processing (NLP) that aims to identify and classify relationships between entities mentioned in text. In the financial domain, relation extraction plays a vital role in extracting valuable information from financial documents, such as news articles, earnings reports, and company filings. This paper describes our solution to relation extraction on one such dataset REFinD. The dataset was released along with shared task as a part of the Fourth Workshop on Knowledge Discovery from Unstructured Data in Financial Services, co-located with SIGIR 2023. In this paper, we employed OpenAI models under the framework of in-context learning (ICL). We utilized two retrieval strategies to find top K relevant in-context learning demonstrations / examples from training data for a given test example. The first retrieval mechanism, we employed, is a learning-free dense retriever and the other system is a learning-based retriever. We were able to achieve 3rd rank overall. Our best F1-score is 0.718.</abstract>
       <url hash="db6ee0f3">2023.finnlp-2.5</url>
@@ -321,7 +321,7 @@
       <author><first>Harsha</first><last>Vardhan</last></author>
       <author><first>Sohom</first><last>Ghosh</last></author>
       <author><first>Ponnurangam</first><last>Kumaraguru</last></author>
-      <author><first>Sudip</first><last>Naskar</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip</first><last>Naskar</last></author>
       <pages>57–61</pages>
       <abstract>With the growing interest in Green Investing, Environmental, Social, and Governance (ESG) factors related to Institutions and financial entities has become extremely important for investors. While the classification of potential ESG factors is an important issue, identifying whether the factors positively or negatively impact the Institution is also a key aspect to consider while making evaluations for ESG scores. This paper presents our solution to identify ESG impact types in four languages (English, Chinese, Japanese, French) released as shared tasks during the FinNLP workshop at the IJCNLP-AACL-2023 conference. We use a combination of translation, masked language modeling, paraphrasing, and classification to solve this problem and use a generalized pipeline that performs well across all four languages. Our team ranked 1st in the Chinese and Japanese sub-tasks.</abstract>
       <url hash="20956e57">2023.finnlp-2.8</url>
diff --git a/data/xml/2023.gem.xml b/data/xml/2023.gem.xml
index b5ee702340..079d85ef7c 100644
--- a/data/xml/2023.gem.xml
+++ b/data/xml/2023.gem.xml
@@ -39,7 +39,7 @@
       <author><first>Vera</first><last>Cabarrão</last><affiliation>Unbabel</affiliation></author>
       <author><first>Ana C</first><last>Farinha</last><affiliation>Unbabel</affiliation></author>
       <author><first>Helena</first><last>Moniz</last><affiliation>INESC-ID, University of Lisbon, EAMT</affiliation></author>
-      <author><first>Alon</first><last>Lavie</last><affiliation>Unbabel/Carnegie Mellon University</affiliation></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last><affiliation>Unbabel/Carnegie Mellon University</affiliation></author>
       <author><first>Isabel</first><last>Trancoso</last><affiliation>INESC-ID / IST Univ. Lisbon</affiliation></author>
       <pages>9-21</pages>
       <abstract>Task-oriented conversational datasets often lack topic variability and linguistic diversity. However, with the advent of Large Language Models (LLMs) pretrained on extensive, multilingual and diverse text data, these limitations seem overcome. Nevertheless, their generalisability to different languages and domains in dialogue applications remains uncertain without benchmarking datasets. This paper presents a holistic annotation approach for emotion and conversational quality in the context of bilingual customer support conversations. By performing annotations that take into consideration the complete instances that compose a conversation, one can form a broader perspective of the dialogue as a whole. Furthermore, it provides a unique and valuable resource for the development of text classification models. To this end, we present benchmarks for Emotion Recognition and Dialogue Quality Estimation and show that further research is needed to leverage these models in a production setting.</abstract>
@@ -86,7 +86,7 @@
       <author><first>João</first><last>Sedoc</last><affiliation>New York University</affiliation></author>
       <author><first>Diyi</first><last>Yang</last><affiliation>Stanford University</affiliation></author>
       <author><first>Haiyi</first><last>Zhu</last><affiliation>Carnegie Mellon University</affiliation></author>
-      <author><first>Lyle</first><last>Ungar</last><affiliation>University of Pennsylvania</affiliation></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last><affiliation>University of Pennsylvania</affiliation></author>
       <pages>62-75</pages>
       <abstract>Online peer counseling platforms enable conversations between millions of people seeking and offering mental health support. Among counseling skills, reflective listening, i.e., capturing and returning to the client something the client has said, is important for positive therapeutic outcomes. We introduce a reflection generation system for online mental health support conversations leveraging GPT-3, a large language model. We compare few-shot learning against fine-tuning and assess the impact of the quality of training examples as measured by fluency, reflection resemblance, and overall preference. Fine-tuned GPT-3 generates responses that human evaluators rate as comparable in reflection quality to responses used for tuning. Models based on high-quality responses generate substantially better reflections than ones tuned on actual responses from a large online counseling service–and better reflections than the actual counselor responses. These results suggest the care needed in selecting examples for tuning generative models.</abstract>
       <url hash="c4df9163">2023.gem-1.6</url>
@@ -149,7 +149,7 @@
     <paper id="11">
       <title>Adapting Pre-trained Generative Models for Extractive Question Answering</title>
       <author><first>Prabir</first><last>Mallick</last><affiliation>TCS research</affiliation></author>
-      <author><first>Tapas</first><last>Nayak</last><affiliation>TCS Research</affiliation></author>
+      <author id="tapas-nayak"><first>Tapas</first><last>Nayak</last><affiliation>TCS Research</affiliation></author>
       <author><first>Indrajit</first><last>Bhattacharya</last><affiliation>TCS Research</affiliation></author>
       <pages>128-137</pages>
       <abstract>Pre-trained Generative models such as BART, T5, etc. have gained prominence as a preferred method for text generation in various natural language processing tasks, including abstractive long-form question answering (QA) and summarization. However, the potential of generative models in extractive QA tasks, where discriminative models are commonly employed, remains largely unexplored. Discriminative models often encounter challenges associated with label sparsity, particularly when only a small portion of the context contains the answer. The challenge is more pronounced for multi-span answers. In this work, we introduce a novel approach that uses the power of pre-trained generative models to address extractive QA tasks by generating indexes corresponding to context tokens or sentences that form part of the answer. Through comprehensive evaluations on multiple extractive QA datasets, including MultiSpanQA, BioASQ, MASHQA, and WikiQA, we demonstrate the superior performance of our proposed approach compared to existing state-of-the-art models.</abstract>
@@ -173,7 +173,7 @@
       <author><first>Mengxia</first><last>Yu</last><affiliation>University of Notre Dame</affiliation></author>
       <author><first>Sara</first><last>Rosenthal</last><affiliation>IBM Research</affiliation></author>
       <author><first>Mihaela</first><last>Bornea</last><affiliation>IBM Research</affiliation></author>
-      <author><first>Avi</first><last>Sil</last><affiliation>IBM Research AI</affiliation></author>
+      <author id="avirup-sil"><first>Avi</first><last>Sil</last><affiliation>IBM Research AI</affiliation></author>
       <pages>155-164</pages>
       <abstract>In this study, we focus on the challenge of improving Long-form Question Answering (LFQA) by extracting and effectively utilizing knowledge from a large set of retrieved passages. We first demonstrate the importance of accurate evidence retrieval for LFQA, showing that optimal extracted knowledge from passages significantly benefits the generation. We also show that the choice of generative models impacts the system’s ability to leverage the evidence and produce answers that are grounded in the retrieved passages. We propose a Mixture of Experts (MoE) model as an alternative to the Fusion in Decoder (FiD) used in state-of-the-art LFQA systems and we compare these two models in our experiments.</abstract>
       <url hash="6b5b0fc2">2023.gem-1.13</url>
@@ -202,7 +202,7 @@
     <paper id="16">
       <title>Synthetic Dialogue Dataset Generation using <fixed-case>LLM</fixed-case> Agents</title>
       <author><first>Yelaman</first><last>Abdullin</last><affiliation>Macquarie University</affiliation></author>
-      <author><first>Diego</first><last>Molla</last><affiliation>Macquarie University</affiliation></author>
+      <author id="diego-molla"><first>Diego</first><last>Molla</last><affiliation>Macquarie University</affiliation></author>
       <author><first>Bahadorreza</first><last>Ofoghi</last><affiliation>Deakin University</affiliation></author>
       <author><first>John</first><last>Yearwood</last><affiliation>Deakin University</affiliation></author>
       <author><first>Qingyang</first><last>Li</last><affiliation>The University of Melbourne</affiliation></author>
@@ -243,10 +243,10 @@
       <title>Multi-domain Summarization from Leaderboards to Practice: Re-examining Automatic and Human Evaluation</title>
       <author><first>David</first><last>Demeter</last><affiliation>Northwestern University</affiliation></author>
       <author><first>Oshin</first><last>Agarwal</last><affiliation>Bloomberg</affiliation></author>
-      <author><first>Simon</first><last>Ben Igeri</last><affiliation>Northwestern University</affiliation></author>
+      <author id="simon-benigeri"><first>Simon</first><last>Ben Igeri</last><affiliation>Northwestern University</affiliation></author>
       <author><first>Marko</first><last>Sterbentz</last><affiliation>Northwestern University</affiliation></author>
       <author><first>Neil</first><last>Molino</last><affiliation>IDA/CCS</affiliation></author>
-      <author><first>John</first><last>Conroy</last><affiliation>IDA Center for Computing Sciences</affiliation></author>
+      <author id="john-conroy"><first>John</first><last>Conroy</last><affiliation>IDA Center for Computing Sciences</affiliation></author>
       <author><first>Ani</first><last>Nenkova</last><affiliation>Adobe Research</affiliation></author>
       <pages>233-242</pages>
       <abstract>Existing literature does not give much guidance on how to build the best possible multi-domain summarization model from existing components. We present an extensive evaluation of popular pre-trained models on a wide range of datasets to inform the selection of both the model and the training data for robust summarization across several domains. We find that fine-tuned BART performs better than T5 and PEGASUS, both on in-domain and out-of-domain data, regardless of the dataset used for fine-tuning. While BART has the best performance, it does vary considerably across domains. A multi-domain summarizer that works well for all domains can be built by simply fine-tuning on diverse domains. It even performs better than an in-domain summarizer, even when using fewer total training examples. While the success of such a multi-domain summarization model is clear through automatic evaluation, by conducting a human evaluation, we find that there are variations that can not be captured by any of the automatic evaluation metrics and thus not reflected in standard leaderboards. Furthermore, we find that conducting reliable human evaluation can be complex as well. Even experienced summarization researchers can be inconsistent with one another in their assessment of the quality of a summary, and also with themselves when re-annotating the same summary. The findings of our study are two-fold. First, BART fine-tuned on heterogeneous domains is a great multi-domain summarizer for practical purposes. At the same time, we need to re-examine not just automatic evaluation metrics but also human evaluation methods to responsibly measure progress in summarization.</abstract>
@@ -283,7 +283,7 @@
       <author><first>Omar</first><last>Sharif</last></author>
       <author><first>Parker</first><last>Seegmiller</last></author>
       <author><first>Philip</first><last>Bohlman</last></author>
-      <author><first>Sarah M.</first><last>Preum</last></author>
+      <author id="sarah-masud-preum"><first>Sarah M.</first><last>Preum</last></author>
       <pages>277-288</pages>
       <abstract>Amidst the sharp rise in the evaluation of large language models (LLMs) on various tasks, we find that semantic textual similarity (STS) has been under-explored. In this study, we show that STS can be cast as a text generation problem while maintaining strong performance on multiple STS benchmarks. Additionally, we show generative LLMs significantly outperform existing encoder-based STS models when characterizing the semantic similarity between two texts with complex semantic relationships dependent on world knowledge. We validate this claim by evaluating both generative LLMs and existing encoder-based STS models on three newly-collected STS challenge sets which require world knowledge in the domains of Health, Politics, and Sports. All newly-collected data is sourced from social media content posted after May 2023 to ensure the performance of closed-source models like ChatGPT cannot be credited to memorization. Our results show that, on average, generative LLMs outperform the best encoder-only baselines by an average of 22.3% on STS tasks requiring world knowledge. Our results suggest generative language models with STS-specific prompting strategies achieve state-of-the-art performance in complex, domain-specific STS tasks.</abstract>
       <url hash="3106a585">2023.gem-1.23</url>
@@ -293,7 +293,7 @@
     <paper id="24">
       <title>To Burst or Not to Burst: Generating and Quantifying Improbable Text</title>
       <author><first>Kuleen</first><last>Sasse</last><affiliation>Johns Hopkins University</affiliation></author>
-      <author><first>Efsun</first><last>Sarioglu Kayi</last><affiliation>Johns Hopkins APL</affiliation></author>
+      <author id="efsun-sarioglu-kayi"><first>Efsun</first><last>Sarioglu Kayi</last><affiliation>Johns Hopkins APL</affiliation></author>
       <author><first>Samuel</first><last>Barham</last><affiliation>University of Maryland</affiliation></author>
       <author><first>Edward</first><last>Staley</last><affiliation>JHUAPL</affiliation></author>
       <pages>289-309</pages>
@@ -349,7 +349,7 @@
       <author><first>Ehsan</first><last>Lotfi</last><affiliation>University of Antwerp, CLiPS</affiliation></author>
       <author><first>Maxime</first><last>De Bruyn</last><affiliation>University of Antwerp</affiliation></author>
       <author><first>Jeska</first><last>Buhmann</last><affiliation>University of Antwerp</affiliation></author>
-      <author><first>Walter</first><last>Daelemans</last><affiliation>University of Antwerp, CLiPS</affiliation></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last><affiliation>University of Antwerp, CLiPS</affiliation></author>
       <pages>353-371</pages>
       <abstract>The new wave of Large Language Models (LLM) has offered an efficient tool to curate sizeable conversational datasets. So far studies have mainly focused on task-oriented or generic open-domain dialogs, and have not fully explored the ability of LLMs in following complicated prompts. In this work, we focus on personalization, and employ LLMs to curate a dataset which is difficult and costly to crowd-source: PersonalityChat is a synthetic conversational dataset based upon the popular PersonaChat dataset, but conditioned on both personas and (Big-5) personality traits. Evaluating models fine-tuned on this dataset, we show that the personality trait labels can be used for trait-based personalization of generative dialogue models. We also perform a head-to-head comparison between PersonalityChat and PersonaChat, and show that training on the distilled dataset results in more fluent and coherent dialog agents in the small-model regime.</abstract>
       <url hash="3fa92bfc">2023.gem-1.29</url>
diff --git a/data/xml/2023.genbench.xml b/data/xml/2023.genbench.xml
index 63470d8a0c..6190fc3c58 100644
--- a/data/xml/2023.genbench.xml
+++ b/data/xml/2023.genbench.xml
@@ -27,7 +27,7 @@
       <title>90% F1 Score in Relation Triple Extraction: Is it Real?</title>
       <author><first>Pratik</first><last>Saini</last><affiliation>TCS Research, India</affiliation></author>
       <author><first>Samiran</first><last>Pal</last><affiliation>TCS Research, India</affiliation></author>
-      <author><first>Tapas</first><last>Nayak</last><affiliation>TCS Research, India</affiliation></author>
+      <author id="tapas-nayak"><first>Tapas</first><last>Nayak</last><affiliation>TCS Research, India</affiliation></author>
       <author><first>Indrajit</first><last>Bhattacharya</last><affiliation>TCS Research, India</affiliation></author>
       <pages>1-11</pages>
       <abstract>Extracting relational triples from text is a crucial task for constructing knowledge bases. Recent advancements in joint entity and relation extraction models have demonstrated remarkable F1 scores (≥ 90%) in accurately extracting relational triples from free text. However, these models have been evaluated under restrictive experimental settings and unrealistic datasets. They overlook sentences with zero triples (zerocardinality), thereby simplifying the task. In this paper, we present a benchmark study of state-of-the-art joint entity and relation extraction models under a more realistic setting. We include sentences that lack any triples in our experiments, providing a comprehensive evaluation. Our findings reveal a significant decline (approximately 10-15% in one dataset and 6-14% in another dataset) in the models’ F1 scores within this realistic experimental setup. Furthermore, we propose a two-step modeling approach that utilizes a simple BERT-based classifier. This approach leads to overall performance improvement in these models within the realistic experimental setting.</abstract>
@@ -68,7 +68,7 @@
       <title>Evaluating Neural Language Models as Cognitive Models of Language Acquisition</title>
       <author><first>Héctor Javier</first><last>Vázquez Martínez</last><affiliation>University of Pennsylvania</affiliation></author>
       <author><first>Annika</first><last>Heuser</last><affiliation>University of Pennsylvania</affiliation></author>
-      <author><first>Charles</first><last>Yang</last><affiliation>University of Pennsylvania</affiliation></author>
+      <author id="charles-yang"><first>Charles</first><last>Yang</last><affiliation>University of Pennsylvania</affiliation></author>
       <author><first>Jordan</first><last>Kodner</last><affiliation>Stony Brook University</affiliation></author>
       <pages>48-64</pages>
       <abstract>The success of neural language models (LMs) on many technological tasks has brought about their potential relevance as scientific theories of language despite some clear differences between LM training and child language acquisition. In this paper we argue that some of the most prominent benchmarks for evaluating the syntactic capacities of LMs may not be sufficiently rigorous. In particular, we show that the template-based benchmarks lack the structural diversity commonly found in the theoretical and psychological studies of language. When trained on small-scale data modeling child language acquisition, the LMs can be readily matched by simple baseline models. We advocate for the use of the readily available, carefully curated datasets that have been evaluated for gradient acceptability by large pools of native speakers and are designed to probe the structural basis of grammar specifically. On one such dataset, the LI-Adger dataset, LMs evaluate sentences in a way inconsistent with human language users. We conclude with suggestions for better connecting LMs with the empirical study of child language acquisition.</abstract>
@@ -171,9 +171,9 @@
     <paper id="13">
       <title>Blackbird Language Matrices Tasks for Generalization</title>
       <author><first>Paola</first><last>Merlo</last><affiliation>University of Geneva</affiliation></author>
-      <author id="chunyang-jiang"><first>Chunyang</first><last>Jiang</last><affiliation>University of Geneva</affiliation></author>
+      <author><first>Chunyang</first><last>Jiang</last><affiliation>University of Geneva</affiliation></author>
       <author><first>Giuseppe</first><last>Samo</last><affiliation>University of Geneva</affiliation></author>
-      <author><first>Vivi</first><last>Nastase</last><affiliation>University of Geneva</affiliation></author>
+      <author id="vivi-nastase"><first>Vivi</first><last>Nastase</last><affiliation>University of Geneva</affiliation></author>
       <pages>163-172</pages>
       <abstract>To develop a system with near-human language capabilities, we need to understand current systems’ generalisation and compositional abilities. We approach this by generating compositional, structured data, inspired from visual intelligence tests, that depend on the problem-solvers being able to disentangle objects and their absolute and relative properties in a sequence of images. We design an analogous task and develop the corresponding datasets that capture specific linguistic phenomena and their properties. Solving each problem instance depends on detecting the relevant linguistic objects and generative rules of the problem. We propose two datasets modelling two linguistic phenomena – subject-verb agreement in French, and verb alternations in English. The datasets can be used to investigate how LLMs encode linguistic objects, such as phrases, their grammatical and semantic properties, such as number or semantic role, and how such information is combined to correctly solve each problem. Specifically generated error types help investigate the behaviour of the system, which important information it is able to detect, and which structures mislead it.</abstract>
       <url hash="497d3b29">2023.genbench-1.13</url>
diff --git a/data/xml/2023.gitt.xml b/data/xml/2023.gitt.xml
index 1fd01595fd..c183932197 100644
--- a/data/xml/2023.gitt.xml
+++ b/data/xml/2023.gitt.xml
@@ -77,7 +77,7 @@
       <title>Reducing Gender Bias in <fixed-case>NMT</fixed-case> with <fixed-case>FUDGE</fixed-case></title>
       <author><first>Tianshuai</first><last>Lu</last></author>
       <author><first>Noëmi</first><last>Aepli</last></author>
-      <author><first>Annette</first><last>Rios</last></author>
+      <author id="annette-rios-gonzales"><first>Annette</first><last>Rios</last></author>
       <pages>61–69</pages>
       <abstract>Gender bias appears in many neural machine translation (NMT) models and commercial translation software. Research has become more aware of this problem in recent years and there has been work on mitigating gender bias. However, the challenge of addressing gender bias in NMT persists. This work utilizes a controlled text generation method, Future Discriminators for Generation (FUDGE), to reduce the so-called Speaking As gender bias. This bias emerges when translating from English to a language that openly marks the gender of the speaker. We evaluate the model on MuST-SHE, a challenge set to specifically evaluate gender translation. The results demonstrate improvements in the translation accuracy of the feminine terms.</abstract>
       <url hash="f3eab5a8">2023.gitt-1.6</url>
@@ -89,7 +89,7 @@
       <author><first>Dennis</first><last>Fucci</last></author>
       <author><first>Beatrice</first><last>Savoldi</last></author>
       <author><first>Luisa</first><last>Bentivogli</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <pages>71–83</pages>
       <abstract>Gender inclusivity in language technologies has become a prominent research topic. In this study, we explore gender-neutral translation (GNT) as a form of gender inclusivity and a goal to be achieved by machine translation (MT) models, which have been found to perpetuate gender bias and discrimination. Specifically, we focus on translation from English into Italian, a language pair representative of salient gender-related linguistic transfer problems. To define GNT, we review a selection of relevant institutional guidelines for gender-inclusive language, discuss its scenarios of use, and examine the technical challenges of performing GNT in MT, concluding with a discussion of potential solutions to encourage advancements toward greater inclusivity in MT.</abstract>
       <url hash="8e34ff6f">2023.gitt-1.7</url>
diff --git a/data/xml/2023.gwc.xml b/data/xml/2023.gwc.xml
index 9ed61ebb60..8eafb4628f 100644
--- a/data/xml/2023.gwc.xml
+++ b/data/xml/2023.gwc.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2023-10-30" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 12th Global Wordnet Conference</booktitle>
-      <editor><first>German</first><last>Rigau</last></editor>
+      <editor id="german-rigau"><first>German</first><last>Rigau</last></editor>
       <editor><first>Francis</first><last>Bond</last></editor>
       <editor><first>Alexandre</first><last>Rademaker</last></editor>
       <publisher>Global Wordnet Association</publisher>
@@ -20,7 +20,7 @@
     <paper id="1">
       <title>Probing Taxonomic and Thematic Embeddings for Taxonomic Information</title>
       <author><first>Filip</first><last>Klubička</last></author>
-      <author><first>John</first><last>Kelleher</last></author>
+      <author id="john-kelleher"><first>John</first><last>Kelleher</last></author>
       <pages>1–13</pages>
       <abstract>Modelling taxonomic and thematic relatedness is important for building AI with comprehensive natural language understanding. The goal of this paper is to learn more about how taxonomic information is structurally encoded in embeddings. To do this, we design a new hypernym-hyponym probing task and perform a comparative probing study of taxonomic and thematic SGNS and GloVe embeddings. Our experiments indicate that both types of embeddings encode some taxonomic information, but the amount, as well as the geometric properties of the encodings, are independently related to both the encoder architecture, as well as the embedding training data. Specifically, we find that only taxonomic embeddings carry taxonomic information in their norm, which is determined by the underlying distribution in the data.</abstract>
       <url hash="e402fbbb">2023.gwc-1.1</url>
@@ -57,7 +57,7 @@
     </paper>
     <paper id="5">
       <title>On the Acquisition of <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Relations in <fixed-case>P</fixed-case>ortuguese from Pretrained Masked Language Models</title>
-      <author><first>Hugo Gonçalo</first><last>Oliveira</last></author>
+      <author id="hugo-goncalo-oliveira"><first>Hugo Gonçalo</first><last>Oliveira</last></author>
       <pages>41–49</pages>
       <abstract>This paper studies the application of pretrained BERT in the acquisition of synonyms, antonyms, hypernyms and hyponyms in Portuguese. Masked patterns indicating those relations were compiled with the help of a service for validating semantic relations, and then used for prompting three pretrained BERT models, one multilingual and two for Portuguese (base and large). Predictions for the masks were evaluated in two different test sets. Results achieved by the monolingual models are interesting enough for considering these models as a source for enriching wordnets, especially when predicting hypernyms of nouns. Previously reported performances on prediction were improved with new patterns and with the large model. When it comes to selecting the related word from a set of four options, performance is even better, but not enough for outperforming the selection of the most similar word, as computed with static word embeddings.</abstract>
       <url hash="73668c5b">2023.gwc-1.5</url>
@@ -123,8 +123,8 @@
     </paper>
     <paper id="12">
       <title>Some Considerations in the Construction of a Historical Language <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et</title>
-      <author><first>Fahad</first><last>Khan</last></author>
-      <author><first>John P.</first><last>McCrae</last></author>
+      <author id="fahad-khan"><first>Fahad</first><last>Khan</last></author>
+      <author id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></author>
       <author><first>Francisco Javier Minaya</first><last>Gómez</last></author>
       <author><first>Rafael Cruz</first><last>González</last></author>
       <author><first>Javier E.</first><last>Díaz-Vera</last></author>
@@ -147,7 +147,7 @@
       <title>How do We Treat Systematic Polysemy in Wordnets and Similar Resources? – Using Human Intuition and Contextualized Embeddings as Guidance</title>
       <author><first>Nathalie</first><last>Sørensen</last></author>
       <author><first>Sanni</first><last>Nimb</last></author>
-      <author><first>Bolette</first><last>Pedersen</last></author>
+      <author id="bolette-sandford-pedersen"><first>Bolette</first><last>Pedersen</last></author>
       <pages>117–126</pages>
       <abstract>Systematic polysemy is a well-known linguistic phenomenon where a group of lemmas follow the same polysemy pattern. However, when compiling a lexical resource like a wordnet, a problem arises regarding when to underspecify the two (or more) meanings by one (complex) sense and when to systematically split into separate senses. In this work, we present an extensive analysis of the systematic polysemy patterns in Danish, and in our preliminary study, we examine a subset of these with experiments on human intuition and contextual embeddings. The aim of this preparatory work is to enable future guidelines for each polysemy type. In the future, we hope to expand this approach and thereby hopefully obtain a sense inventory which is distributionally verified and thereby more suitable for NLP.</abstract>
       <url hash="01392b5d">2023.gwc-1.14</url>
@@ -185,11 +185,11 @@
     <paper id="18">
       <title>Documenting the Open Multilingual <fixed-case>W</fixed-case>ordnet</title>
       <author><first>Francis</first><last>Bond</last></author>
-      <author><first>Michael Wayne</first><last>Goodman</last></author>
+      <author id="michael-wayne-goodman"><first>Michael Wayne</first><last>Goodman</last></author>
       <author><first>Ewa</first><last>Rudnicka</last></author>
       <author><first>Luis Morgado</first><last>da Costa</last></author>
       <author><first>Alexandre</first><last>Rademaker</last></author>
-      <author><first>John P.</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></author>
       <pages>150–157</pages>
       <abstract>In this project note we describe our work to make better documentation for the Open Multilingual Wordnet (OMW), a platform integrating many open wordnets. This includes the documentation of the OMW website itself as well as of semantic relations used by the component wordnets. Some of this documentation work was done with the support of the Google Season of Docs. The OMW project page, which links both to the actual OMW server and the documentation has been moved to a new location: https://omwn.org.</abstract>
       <url hash="da8f7b5f">2023.gwc-1.18</url>
@@ -198,8 +198,8 @@
     <paper id="19">
       <title>Mapping <fixed-case>G</fixed-case>erma<fixed-case>N</fixed-case>et for the Semantic Web using <fixed-case>O</fixed-case>nto<fixed-case>L</fixed-case>ex-Lemon</title>
       <author><first>Claus</first><last>Zinn</last></author>
-      <author><first>Marie</first><last>Hinrichs</last></author>
-      <author><first>Erhard</first><last>Hinrichs</last></author>
+      <author id="marie-hinrichs"><first>Marie</first><last>Hinrichs</last></author>
+      <author id="erhard-hinrichs"><first>Erhard</first><last>Hinrichs</last></author>
       <pages>158–166</pages>
       <abstract>GermaNet is a large lexical-semantic net that relates German nouns, verbs, and adjectives semantically. The word net has been manually constructed over the last 25 years and hence presents a high-quality, valuable resource for German. While GermaNet is maintained in a Postgres database, all its content can be exported as an XML-based serialisation. Recently, this XML representation has been converted into RDF, largely by staying close to GermaNet’s principle of arrangement where lexunits that share the same meaning are grouped together into so-called synsets. With each lexical unit and synset now globally addressable via a unique resource identifier, it has become much easier to link together GermaNet entries with other lexical and semantic resources. In terms of semantic interoperability, however, the RDF variant of GermaNet leaves much to be desired. In this paper, we describe yet another conversion from GermaNet’s XML representation to RDF. The new conversion makes use of the OntoLex-Lemon ontology, and therefore, presents a decisive step toward a GermaNet representation with a much higher level of semantic interoperability, and which makes it possible to use GermaNet with other wordnets that already support this conceptualisation of lexica.</abstract>
       <url hash="1ba1cd4d">2023.gwc-1.19</url>
@@ -237,10 +237,10 @@
     </paper>
     <paper id="23">
       <title><fixed-case>L</fixed-case>atvian <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et</title>
-      <author><first>Peteris</first><last>Paikens</last></author>
+      <author id="peteris-paikens"><first>Peteris</first><last>Paikens</last></author>
       <author><first>Agute</first><last>Klints</last></author>
       <author><first>Ilze</first><last>Lokmane</last></author>
-      <author><first>Lauma</first><last>Pretkalniņa</last></author>
+      <author id="lauma-pretkalnina"><first>Lauma</first><last>Pretkalniņa</last></author>
       <author><first>Laura</first><last>Rituma</last></author>
       <author><first>Madara</first><last>Stāde</last></author>
       <author><first>Laine</first><last>Strankale</last></author>
@@ -272,7 +272,7 @@
     </paper>
     <paper id="26">
       <title>Reusing the <fixed-case>D</fixed-case>anish <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et for a New Central Word Register for <fixed-case>D</fixed-case>anish - a Project Report</title>
-      <author><first>Bolette</first><last>Pedersen</last></author>
+      <author id="bolette-sandford-pedersen"><first>Bolette</first><last>Pedersen</last></author>
       <author><first>Sanni</first><last>Nimb</last></author>
       <author><first>Nathalie</first><last>Sørensen</last></author>
       <author><first>Sussi</first><last>Olsen</last></author>
@@ -285,7 +285,7 @@
     </paper>
     <paper id="27">
       <title>Recent Developments in <fixed-case>BTB</fixed-case>-<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et</title>
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <author><first>Petya</first><last>Osenova</last></author>
       <pages>220–227</pages>
       <abstract>The paper reports on recent developments in Bulgarian BTB-WordNet (BTB-WN). This resource is viewed as playing a central role with respect to the integration and interlinking of various language resources such as: e-dictionaries (morphological, terminological, bilingual, orthographic, etymological and explanatory, etc., including editions from previous periods); corpora (coming from outside or being internal - like the corpus of definitions as well as the corpus of examples to synset meanings); ontologies (such as CIDOC-CRM, DBpedia, etc.); sources of world knowledge (such as information from the Bulgarian Encyclopedia, Wikipedia, etc.). The paper also gives information about a number of applications built on BTB-WN. These are: the Bulgaria-centered knowledge graph, the All about word application as well as some education-oriented exercises.</abstract>
@@ -418,8 +418,8 @@
     <paper id="40">
       <title>What do Language Models know about word senses? Zero-Shot <fixed-case>WSD</fixed-case> with Language Models and Domain Inventories</title>
       <author><first>Oscar</first><last>Sainz</last></author>
-      <author><first>Oier Lopez</first><last>de Lacalle</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="oier-lopez-de-lacalle"><first>Oier Lopez</first><last>de Lacalle</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <author><first>German</first><last>Rigau</last></author>
       <pages>331–342</pages>
       <abstract>Language Models are the core for almost any Natural Language Processing system nowadays. One of their particularities is their contextualized representations, a game changer feature when a disambiguation between word senses is necessary. In this paper we aim to explore to what extent language models are capable of discerning among senses at inference time. We performed this analysis by prompting commonly used Languages Models such as BERT or RoBERTa to perform the task of Word Sense Disambiguation (WSD). We leverage the relation between word senses and domains, and cast WSD as a textual entailment problem, where the different hypothesis refer to the domains of the word senses. Our results show that this approach is indeed effective, close to supervised systems.</abstract>
diff --git a/data/xml/2023.humeval.xml b/data/xml/2023.humeval.xml
index 9bc117f75c..bebb09e89d 100644
--- a/data/xml/2023.humeval.xml
+++ b/data/xml/2023.humeval.xml
@@ -3,8 +3,8 @@
   <volume id="1" ingest-date="2023-10-31" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 3rd Workshop on Human Evaluation of NLP Systems</booktitle>
-      <editor><first>Anya</first><last>Belz</last></editor>
-      <editor><first>Maja</first><last>Popović</last></editor>
+      <editor id="anja-belz"><first>Anya</first><last>Belz</last></editor>
+      <editor id="maja-popovic"><first>Maja</first><last>Popović</last></editor>
       <editor><first>Ehud</first><last>Reiter</last></editor>
       <editor><first>Craig</first><last>Thomson</last></editor>
       <editor><first>João</first><last>Sedoc</last></editor>
@@ -36,7 +36,7 @@
       <author><first>Jessica</first><last>Chen</last></author>
       <author><first>Si Yuan</first><last>Chang</last></author>
       <author><first>Qi Chwen</first><last>Ong</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <author><first>Josip</first><last>Car</last></author>
       <pages>11–22</pages>
       <abstract>Human evaluation plays a crucial role in Natural Language Processing (NLP) as it assesses the quality and relevance of developed systems, thereby facilitating their enhancement. However, the absence of widely accepted human evaluation metrics in NLP hampers fair comparisons among different systems and the establishment of universal assessment standards. Through an extensive analysis of existing literature on human evaluation metrics, we identified several gaps in NLP evaluation methodologies. These gaps served as motivation for developing our own hierarchical evaluation framework. The proposed framework offers notable advantages, particularly in providing a more comprehensive representation of the NLP system’s performance. We applied this framework to evaluate the developed Machine Reading Comprehension system, which was utilized within a human-AI symbiosis model. The results highlighted the associations between the quality of inputs and outputs, underscoring the necessity to evaluate both components rather than solely focusing on outputs. In future work, we will investigate the potential time-saving benefits of our proposed framework for evaluators assessing NLP systems.</abstract>
@@ -66,7 +66,7 @@
     <paper id="5">
       <title>Some lessons learned reproducing human evaluation of a data-to-text system</title>
       <author><first>Javier</first><last>González Corbelle</last></author>
-      <author><first>Jose</first><last>Alonso</last></author>
+      <author id="jose-m-alonso"><first>Jose</first><last>Alonso</last></author>
       <author><first>Alberto</first><last>Bugarín-Diz</last></author>
       <pages>49–68</pages>
       <abstract>This paper presents a human evaluation reproduction study regarding the data-to-text generation task. The evaluation focuses in counting the supported and contradicting facts generated by a neural data-to-text model with a macro planning stage. The model is tested generating sport summaries for the ROTOWIRE dataset. We first describe the approach to reproduction that is agreed in the context of the ReproHum project. Then, we detail the entire configuration of the original human evaluation and the adaptations that had to be made to reproduce such an evaluation. Finally, we compare the reproduction results with those reported in the paper that was taken as reference.</abstract>
@@ -91,7 +91,7 @@
       <author><first>Martijn</first><last>Goudbeek</last></author>
       <author><first>Chris</first><last>van der Lee</last></author>
       <author><first>Frédéric</first><last>Tomas</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <pages>75–88</pages>
       <abstract>This paper is part of the larger ReproHum project, where different teams of researchers aim to reproduce published experiments from the NLP literature. Specifically, ReproHum focuses on the reproducibility of human evaluation studies, where participants indicate the quality of different outputs of Natural Language Generation (NLG) systems. This is necessary because without reproduction studies, we do not know how reliable earlier results are. This paper aims to reproduce the second human evaluation study of Puduppully &amp; Lapata (2021), while another lab is attempting to do the same. This experiment uses best-worst scaling to determine the relative performance of different NLG systems. We found that the worst performing system in the original study is now in fact the best performing system across the board. This means that we cannot fully reproduce the original results. We also carry out alternative analyses of the data, and discuss how our results may be combined with the other reproduction study that is carried out in parallel with this paper.</abstract>
       <url hash="fc9261ac">2023.humeval-1.7</url>
@@ -112,7 +112,7 @@
       <author><first>Qixiang</first><last>Fang</last></author>
       <author><first>Pablo</first><last>Mosteiro</last></author>
       <author><first>Albert</first><last>Gatt</last></author>
-      <author><first>Kees</first><last>van Deemter</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
       <pages>97–123</pages>
       <abstract>There is a growing concern regarding the reproducibility of human evaluation studies in NLP. As part of the ReproHum campaign, we conducted a study to assess the reproducibility of a recent human evaluation study in NLP. Specifically, we attempted to reproduce a human evaluation of a novel approach to enhance Role-Oriented Dialogue Summarization by considering the influence of role interactions. Despite our best efforts to adhere to the reported setup, we were unable to reproduce the statistical results as presented in the original paper. While no contradictory evidence was found, our study raises questions about the validity of the reported statistical significance results, and/or the comprehensiveness with which the original study was reported. In this paper, we provide a comprehensive account of our reproduction study, detailing the methodologies employed, data collection, and analysis procedures. We discuss the implications of our findings for the broader issue of reproducibility in NLP research. Our findings serve as a cautionary reminder of the challenges in conducting reproducible human evaluations and prompt further discussions within the NLP community.</abstract>
       <url hash="0b3ac317">2023.humeval-1.9</url>
@@ -139,7 +139,7 @@
     </paper>
     <paper id="12">
       <title>Reproducing a Comparative Evaluation of <fixed-case>G</fixed-case>erman Text-to-Speech Systems</title>
-      <author><first>Manuela</first><last>Hürlimann</last></author>
+      <author id="manuela-huerlimann"><first>Manuela</first><last>Hürlimann</last></author>
       <author><first>Mark</first><last>Cieliebak</last></author>
       <pages>136–144</pages>
       <abstract>This paper describes the reproduction of a human evaluation in Language-Agnostic Meta- Learning for Low-Resource Text-to-Speech with Articulatory Features reported in Lux and Vu (2022). It is a contribution to the ReproNLP 2023 Shared Task on Reproducibility of Evaluations in NLP. The original evaluation assessed the naturalness of audio generated by different Text-to-Speech (TTS) systems for German, and our goal was to repeat the experiment with a different set of evaluators. We reproduced the evaluation based on data and instructions provided by the original authors, with some uncertainty concerning the randomisation of question order. Evaluators were recruited via email to relevant mailing lists and we received 157 responses over the course of three weeks. Our initial results show low reproducibility, but when we assume that the systems of the original and repeat evaluation experiment have been transposed, the reproducibility assessment improves markedly. We do not know if and at what point such a transposition happened; however, an initial analysis of our audio and video files provides some evidence that the system assignment in our repeat experiment is correct.</abstract>
@@ -159,7 +159,7 @@
     <paper id="14">
       <title><fixed-case>H</fixed-case>um<fixed-case>E</fixed-case>val’23 Reproduction Report for Paper 0040: Human Evaluation of Automatically Detected Over- and Undertranslations</title>
       <author><first>Filip</first><last>Klubička</last></author>
-      <author><first>John D.</first><last>Kelleher</last></author>
+      <author id="john-kelleher"><first>John D.</first><last>Kelleher</last></author>
       <pages>153–189</pages>
       <abstract>This report describes a reproduction of a human evaluation study evaluating automatically detected over- and undertranslations obtained using neural machine translation approaches. While the scope of the original study is much broader, a human evaluation is included as part of its system evaluation. We attempt an exact reproduction of this human evaluation, pertaining to translations on the the English-German language pair. While encountering minor logistical challenges, with all the source material being publicly available and some additional instructions provided by the original authors, we were able to reproduce the original experiment with only minor differences in the results.</abstract>
       <url hash="d9bea0ca">2023.humeval-1.14</url>
diff --git a/data/xml/2023.icard.xml b/data/xml/2023.icard.xml
index 4eb1669bb0..a6e52d09bf 100644
--- a/data/xml/2023.icard.xml
+++ b/data/xml/2023.icard.xml
@@ -20,7 +20,7 @@
       <author><first>Joanna</first><last>Kruyt</last></author>
       <author><first>Katarína</first><last>Polónyiová</last></author>
       <author><first>Daniela</first><last>Ostatníková</last></author>
-      <author><first>Štefan</first><last>Beňuš</last></author>
+      <author id="stefan-benus"><first>Štefan</first><last>Beňuš</last></author>
       <pages>1-11</pages>
       <url hash="02e7bf46">2023.icard-1.1</url>
       <bibkey>kruyt-etal-2023-global</bibkey>
@@ -38,7 +38,7 @@
     <paper id="3">
       <title>Computational Analysis of Backchannel Usage and Overlap Length in Autistic Children</title>
       <author><first>Grace O.</first><last>Lawley</last></author>
-      <author><first>Peter A.</first><last>Heeman</last></author>
+      <author id="peter-a-heeman"><first>Peter A.</first><last>Heeman</last></author>
       <author><first>Steven</first><last>Bedrick</last></author>
       <pages>17-23</pages>
       <url hash="beb6955a">2023.icard-1.3</url>
@@ -48,7 +48,7 @@
       <title>Interactional coordination between conversation partners with autism using non-verbal cues in dialogues</title>
       <author><first>Tahiya</first><last>Chowdhury</last></author>
       <author><first>Veronica</first><last>Romero</last></author>
-      <author><first>Amanda</first><last>Stent</last></author>
+      <author id="amanda-stent"><first>Amanda</first><last>Stent</last></author>
       <pages>24-34</pages>
       <url hash="2543c73c">2023.icard-1.4</url>
       <bibkey>chowdhury-etal-2023-interactional</bibkey>
diff --git a/data/xml/2023.icnlsp.xml b/data/xml/2023.icnlsp.xml
index 53db0101e9..825cef8606 100644
--- a/data/xml/2023.icnlsp.xml
+++ b/data/xml/2023.icnlsp.xml
@@ -114,7 +114,7 @@
       <author><first>Lucía</first><last>Ormaechea</last></author>
       <author><first>Nikos</first><last>Tsourakis</last></author>
       <author><first>Didier</first><last>Schwab</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Benjamin</first><last>Lecouteux</last></author>
       <pages>120–133</pages>
       <url hash="8b822fd9">2023.icnlsp-1.12</url>
@@ -186,7 +186,7 @@
       <author><first>Beining</first><last>Xu</last></author>
       <author><first>Xiangyu</first><last>Zhang</last></author>
       <author><first>Hexin</first><last>Liu</last></author>
-      <author><first>Wenhan</first><last>Chao</last></author>
+      <author id="wenhan-chao"><first>Wenhan</first><last>Chao</last></author>
       <author><first>Paola</first><last>Garcia</last></author>
       <pages>200–211</pages>
       <url hash="bd33c38f">2023.icnlsp-1.20</url>
@@ -219,8 +219,8 @@
     <paper id="24">
       <title>Typological classification of <fixed-case>E</fixed-case>uropean <fixed-case>P</fixed-case>ortuguese fricatives: a cross-language forced alignment and pronunciation variants study</title>
       <author><first>Anisia</first><last>Popescu</last></author>
-      <author><first>Lori</first><last>Lamel</last></author>
-      <author><first>Ioana</first><last>Vasilescu</last></author>
+      <author id="lori-lamel"><first>Lori</first><last>Lamel</last></author>
+      <author id="ioana-vasilescu"><first>Ioana</first><last>Vasilescu</last></author>
       <pages>239–243</pages>
       <url hash="1986c0de">2023.icnlsp-1.24</url>
       <bibkey>popescu-etal-2023-typological</bibkey>
@@ -254,7 +254,7 @@
       <title>Comparing Modular and End-To-End Approaches in <fixed-case>ASR</fixed-case> for Well-Resourced and Low-Resourced Languages</title>
       <author><first>Aditya</first><last>Parikh</last></author>
       <author><first>Louis</first><last>ten Bosch</last></author>
-      <author><first>Henk</first><last>van den Heuvel</last></author>
+      <author id="henk-van-den-heuvel"><first>Henk</first><last>van den Heuvel</last></author>
       <author><first>Cristian</first><last>Tejedor-Garcia</last></author>
       <pages>266–273</pages>
       <url hash="8dcc779d">2023.icnlsp-1.28</url>
diff --git a/data/xml/2023.icon.xml b/data/xml/2023.icon.xml
index 128ce961fb..4585e97e4f 100644
--- a/data/xml/2023.icon.xml
+++ b/data/xml/2023.icon.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the 20th International Conference on Natural Language Processing (ICON)</booktitle>
       <editor><first>Jyoti</first><last>D. Pawar</last></editor>
-      <editor><first>Sobha</first><last>Lalitha Devi</last></editor>
+      <editor id="sobha-lalitha-devi"><first>Sobha</first><last>Lalitha Devi</last></editor>
       <publisher>NLP Association of India (NLPAI)</publisher>
       <address>Goa University, Goa, India</address>
       <month>December</month>
@@ -136,7 +136,7 @@
       <title><fixed-case>NLI</fixed-case> to the Rescue: Mapping Entailment Classes to Hallucination Categories in Abstractive Summarization</title>
       <author><first>Naveen</first><last>Badathala</last></author>
       <author><first>Ashita</first><last>Saxena</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>120–132</pages>
       <abstract>In this paper, we detect hallucinations in summaries generated by abstractive summarization models. We focus on three types of hallucination viz. intrinsic, extrinsic, and nonhallucinated. The method used for detecting hallucination is based on textual entailment. Given a premise and a hypothesis, textual entailment classifies the hypothesis as contradiction, neutral, or entailment. These three classes of textual entailment are mapped to intrinsic, extrinsic, and non-hallucinated respectively. We fine-tune a RoBERTa-large model on NLI datasets and use it to detect hallucinations on the XSumFaith dataset. We demonstrate that our simple approach using textual entailment outperforms the existing factuality inconsistency detection systems by 12% and we provide insightful analysis of all types of hallucination. To advance research in this area, we create and release a dataset, XSumFaith++, which contains balanced instances of hallucinated and non-hallucinated summaries.</abstract>
       <url hash="6dce95bd">2023.icon-1.12</url>
@@ -157,7 +157,7 @@
     <paper id="14">
       <title><fixed-case>H</fixed-case>indi Causal <fixed-case>T</fixed-case>ime<fixed-case>B</fixed-case>ank: an Annotated Causal Event Corpus</title>
       <author><first>Tanvi</first><last>Kamble</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>145–150</pages>
       <abstract>Events and states have gained importance in NLP and information retrieval for being semantically rich temporal and spatial information indicators. Event causality helps us identify which events are necessary for another event to occur. The cause-effect event pairs can be relevant for multiple NLP tasks like question answering, summarization, etc. Multiple efforts have been made to identify causal events in documents but very little work has been done in this field in the Hindi language. We create an annotated corpus for detecting and classifying causal event relations on top of the Hindi Timebank (Goel et al., 2020), the ‘Hindi Causal Timebank’ (Hindi CTB). We introduce semantic causal relations like Purpose, Reason, and Enablement inspired from Bejan and Harabagiu (2008)’s annotation scheme and add some special cases particular to Hindi language.</abstract>
       <url hash="e71f33f8">2023.icon-1.14</url>
@@ -166,7 +166,7 @@
     <paper id="15">
       <title>Enriching Electronic Health Record with Semantic Features <fixed-case>U</fixed-case>tilising<fixed-case>P</fixed-case>retrained Transformers</title>
       <author><first>Lena</first><last>AlMutair</last></author>
-      <author><first>Eric</first><last>Atwell</last></author>
+      <author id="eric-atwell"><first>Eric</first><last>Atwell</last></author>
       <author><first>Nishant</first><last>Ravikumar</last></author>
       <pages>151–161</pages>
       <abstract>Electronic Health Records (EHRs) have revolutionised healthcare by enhancing patient care and facilitating provider communication. Nevertheless, the efficient extraction of valuable information from EHRs poses challenges, primarily due to the overwhelming volume of unstructured data, the wide variability in data formats, and the lack of standardised labels. Leveraging deep learning and concept embeddings, we address the gap in context-aware systems for EHRs. The proposed solution was evaluated on the MIMIC III dataset and demonstrated superior performance compared to other methodologies. We addressed the positive impact of the latent feature combined with the note representation in four different settings. Model performance was evaluated using a case study conducted with BertScore, assessing precision, recall, and F1 scores. The model excels in Medical Natural Language Inference (MedNLI) with an 89.3% accuracy, further boosted to 90.5% through retraining the embeddings using International Classification of Diseases (ICD) codes, which we formally designate as ClinicNarrIR. The ClinicNarrIR was tested with 1000 randomly sampled notes, achieving an N DCG@10 score of approximately 0.54 with accuracy@10 of 0.85. The study also demonstrates a high correlation between the results produced by the proposed representation and medical coders. Notably, in all evaluation cases, the optimal base pretrained model that emerged was BlueBERT.</abstract>
@@ -201,7 +201,7 @@
       <author><first>Bhakti</first><last>Jadhav</last></author>
       <author><first>Amruta</first><last>Barbadikar</last></author>
       <author><first>Amba</first><last>Kulkarni</last></author>
-      <author><first>Malhar</first><last>Kulkarni</last></author>
+      <author id="malhar-kulkarni"><first>Malhar</first><last>Kulkarni</last></author>
       <pages>180–190</pages>
       <abstract>Processing and understanding of figurative speech is a challenging task for computers as well as humans. In this paper, we present a case of Upamā alaṅkāra (simile). The verbal cognition of the Upamā alaṅkāra by a human is presented as a dependency tree, which involves the identification of various components such as upamāna (vehicle), upameya (topic), sādhāran.a-dharma (common property) and upamādyotaka (word indicating similitude). This involves the repetition of elliptical elements. Further, we show, how the same dependency tree may be represented without any loss of information, even without repetition of elliptical elements. Such a representation would be useful for the computational processing of the alaṅkāras.</abstract>
       <url hash="7117d654">2023.icon-1.18</url>
@@ -223,7 +223,7 @@
       <author><first>Ankush</first><last>Agarwal</last></author>
       <author><first>Sakharam</first><last>Gawade</last></author>
       <author><first>Amar Prakash</first><last>Azad</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>202–294</pages>
       <abstract>Large language models (LLMs) have demon- strated remarkable performance in a wide range of natural language tasks. However, as these models continue to grow in size, they face sig- nificant challenges in terms of computational costs. Additionally, LLMs often lack efficient domain-specific understanding, which is par- ticularly crucial in specialized fields such as aviation and healthcare. To boost the domain- specific understanding, we propose, KITLM 1 , a novel knowledge base integration approach into language model through relevant informa- tion infusion. By integrating pertinent knowl- edge, not only the performance of the lan- guage model is greatly enhanced, but the model size requirement is also significantly reduced while achieving comparable performance. Our proposed knowledge-infused model surpasses the performance of both GPT-3.5-turbo and the state-of-the-art knowledge infusion method, SKILL, achieving over 1.5 times improvement in exact match scores on the MetaQA. KITLM showed a similar performance boost in the avi- ation domain with AeroQA. The drastic perfor- mance improvement of KITLM over the exist- ing methods can be attributed to the infusion of relevant knowledge while mitigating noise. In addition, we release two curated datasets to accelerate knowledge infusion research in specialized fields: a) AeroQA, a new bench- mark dataset designed for multi-hop question- answering within the aviation domain, and b) Aviation Corpus, a dataset constructed from unstructured text extracted from the National Transportation Safety Board reports. Our re- search contributes to advancing the field of domain-specific language understanding and showcases the potential of knowledge infusion techniques in improving the performance.</abstract>
       <url hash="c2e12208">2023.icon-1.20</url>
@@ -252,7 +252,7 @@
     <paper id="23">
       <title>Sentiment Analysis for the Mizo Language: A Comparative Study of Classical Machine Learning and Transfer Learning Approaches</title>
       <author><first>Mercy</first><last>Lalthangmawii</last></author>
-      <author><first>Thoudam Doren</first><last>Singh</last></author>
+      <author id="thoudam-doren-singh"><first>Thoudam Doren</first><last>Singh</last></author>
       <pages>308–317</pages>
       <abstract>Sentiment analysis, a subfield of natural language processing (NLP) has witnessed significant advancements in the analysis of usergenerated contents across diverse languages. However, its application to low-resource languages remains a challenge. This research addresses this gap by conducting a comprehensive sentiment analysis experiment in the context of the Mizo language, a low-resource language predominantly spoken in the Indian state of Mizoram and neighboring regions. Our study encompasses the evaluation of various machine learning models including Support Vector Machine (SVM), Decision Tree, Random Forest, K-Nearest Neighbor (K-NN), Logistic Regression and transfer learning using XLM-RoBERTa. The findings reveal the suitability of SVM as a robust performer in Mizo sentiment analysis demonstrating the highest F1 Score and Accuracy among the models tested. XLM-RoBERTa, a transfer learning model exhibits competitive performance highlighting the potential of leveraging pre-trained multilingual models in low-resource language sentiment analysis tasks. This research advances our understanding of sentiment analysis in lowresource languages and serves as a stepping stone for future investigations in this domain.</abstract>
       <url hash="8aec4502">2023.icon-1.23</url>
@@ -318,9 +318,9 @@
       <author><first>Karan</first><last>Singla</last></author>
       <author><first>Shahab</first><last>Jalalv</last></author>
       <author><first>Yeon-Jun</first><last>Kim</last></author>
-      <author><first>Andrej</first><last>Ljolje</last></author>
+      <author id="andrej-ljolje"><first>Andrej</first><last>Ljolje</last></author>
       <author><first>Antonio Moreno</first><last>Daniel</last></author>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
       <author><first>Benjamin</first><last>Stern</last></author>
       <pages>370–377</pages>
       <abstract>Recent studies have made some progress in refining end-to-end (E2E) speech recognition encoders by applying Connectionist Temporal Classification (CTC) loss to enhance named entity recognition within transcriptions. However, these methods have been constrained by their exclusive use of the ASCII character set, allowing only a limited array of semantic labels. We propose 1SPU, a 1-step Speech Processing Unit which can recognize speech events (e.g: speaker change) or an NL event (Intent, Emotion) while also transcribing vocal content. It extends the E2E automatic speech recognition (ASR) system’s vocabulary by adding a set of unused placeholder symbols, conceptually akin to the &lt;pad&gt; tokens used in sequence modeling. These placeholders are then assigned to represent semantic events (in form of tags) and are integrated into the transcription process as distinct tokens. We demonstrate notable improvements on the SLUE benchmark and yields results that are on par with those for the SLURP dataset. Additionally, we provide a visual analysis of the system’s proficiency in accurately pinpointing meaningful tokens over time, illustrating the enhancement in transcription quality through the utilization of supplementary semantic tags.</abstract>
@@ -330,7 +330,7 @@
     <paper id="30">
       <title>Consolidating Strategies for Countering Hate Speech Using Persuasive Dialogues</title>
       <author><first>Sougata</first><last>Saha</last></author>
-      <author><first>Rohini</first><last>Srihari</last></author>
+      <author id="rohini-k-srihari"><first>Rohini</first><last>Srihari</last></author>
       <pages>378–392</pages>
       <abstract>Hateful comments are prevalent on social media platforms. Although tools for automatically detecting, flagging, and blocking such false, offensive, and harmful content online have lately matured, such reactive and brute force methods alone provide short-term and superficial remedies while the perpetrators persist. With the public availability of large language models which can generate articulate synthetic and engaging content at scale, there are concerns about the rapid growth of dissemination of such malicious content on the web. There is now a need to focus on deeper, long-term solutions that involve engaging with the human perpetrator behind the source of the content to change their viewpoint or at least bring down the rhetoric using persuasive means. To do that, we propose defining and experimenting with controllable strategies for generating counterarguments to hateful comments in online conversations. We experiment with controlling response generation using features based on (i) argument structure and reasoning-based Walton argument schemes, (ii) counter-argument speech acts, and (iii) human characteristicsbased qualities such as Big-5 personality traits and human values. Using automatic and human evaluations, we determine the best combination of features that generate fluent, argumentative, and logically sound arguments for countering hate. We further share the developed computational models for automatically annotating text with such features, and a silver-standard annotated version of an existing hate speech dialog corpora.</abstract>
       <url hash="da26a479">2023.icon-1.30</url>
@@ -475,7 +475,7 @@
       <title>Comparing <fixed-case>DAE</fixed-case>-based and <fixed-case>MASS</fixed-case>-based <fixed-case>UNMT</fixed-case>: Robustness to Word-Order Divergence in <fixed-case>E</fixed-case>nglish–&gt;<fixed-case>I</fixed-case>ndic Language Pairs</title>
       <author><first>Tamali</first><last>Banerjee</last></author>
       <author><first>Rudra</first><last>Murthy</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>491–496</pages>
       <abstract>The proliferation of fake news poses a significant challenge in the digital era. Detecting false information, especially in non-English languages, is crucial to combating misinformation effectively. In this research, we introduce a novel approach for Dravidian fake news detection by harnessing the capabilities of the MuRIL transformer model, further enhanced by gradient accumulation techniques. Our study focuses on the Dravidian languages, a diverse group of languages spoken in South India, which are often underserved in natural language processing research. We optimize memory usage, stabilize training, and improve the model’s overall performance by accumulating gradients over multiple batches. The proposed model exhibits promising results in terms of both accuracy and efficiency. Our findings underline the significance of adapting state-of-the-art techniques, such as MuRIL-based models and gradient accumulation, to non-English language.</abstract>
       <url hash="7434fd5d">2023.icon-1.44</url>
@@ -519,7 +519,7 @@
       <author><first>Kalahasti Ganesh</first><last>Srivatsa</last></author>
       <author><first>Sabyasachi</first><last>Mukhopadhyay</last></author>
       <author><first>Ganesh</first><last>Katrapati</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>523–533</pages>
       <abstract>Infrastructure as Code (IaC) is a revolutionary approach which has gained significant prominence in the Industry. IaC manages and provisions IT infrastructure using machinereadable code by enabling automation, consistency across the environments, reproducibility, version control, error reduction and enhancement in scalability. However, IaC orchestration is often a painstaking effort which requires specialised skills as well as a lot of manual effort. Automation of IaC is a necessity in the present conditions of the Industry and in this survey, we study the feasibility of applying Large Language Models (LLM) to address this problem. LLMs are large neural network-based models which have demonstrated significant language processing abilities and shown to be capable of following a range of instructions within a broad scope. Recently, they have also been adapted for code understanding and generation tasks successfully, which makes them a promising choice for the automatic generation of IaC configurations. In this survey, we delve into the details of IaC, usage of IaC in different platforms, their challenges, LLMs in terms of code-generation aspects and the importance of LLMs in IaC along with our own experiments. Finally, we conclude by presenting the challenges in this area and highlighting the scope for future research.</abstract>
       <url hash="6b34f022">2023.icon-1.48</url>
@@ -531,7 +531,7 @@
       <author><first>Melkamu</first><last>Mersha</last></author>
       <author><first>Ananya</first><last>Kalita</last></author>
       <author><first>Olga</first><last>Kolesnikova</last></author>
-      <author><first>Jugal</first><last>Kalita</last></author>
+      <author id="jugal-kalita"><first>Jugal</first><last>Kalita</last></author>
       <pages>534–539</pages>
       <abstract>This paper presents the creation of initial bilingual corpora for thirteen very low-resource languages of India, all from Northeast India. It also presents the results of initial translation efforts in these languages. It creates the first-ever parallel corpora for these languages and provides initial benchmark neural machine translation results for these languages. We intend to extend these corpora to include a large number of low-resource Indian languages and integrate the effort with our prior work with African and American-Indian languages to create corpora covering a large number of languages from across the world.</abstract>
       <url hash="72af9a90">2023.icon-1.49</url>
@@ -541,7 +541,7 @@
       <title>Kurosawa: A Script Writer’s Assistant</title>
       <author><first>Prerak</first><last>Gandhi</last></author>
       <author><first>Vishal</first><last>Pramanik</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>540–550</pages>
       <abstract>Storytelling is the lifeline of the entertainment industrymovies, TV shows, and stand-up comedies, all need stories. A good and gripping script is the lifeline of storytelling and demands creativity and resource investment. Good scriptwriters are rare to find and often work under severe time pressure. Consequently, entertainment media are actively looking for automation. In this paper, we present an AIbased script-writing workbench called KUROSAWA which addresses the tasks of plot generation and script generation. Plot generation aims to generate a coherent and creative plot (600–800 words) given a prompt (15–40 words). Script generation, on the other hand, generates a scene (200–500 words) in a screenplay format from a brief description (15–40 words). Kurosawa needs data to train. We use a 4-act structure of storytelling to annotate the plot dataset manually. We create a dataset of 1000 manually annotated plots and their corresponding prompts/storylines and a gold-standard dataset of 1000 scenes with four main elements — scene headings, action lines, dialogues, and character names — tagged individually. We fine-tune GPT-3 with the above datasets to generate plots and scenes. These plots and scenes are first evaluated and then used by the scriptwriters of a large and famous media platform ErosNow. We release the annotated datasets and the models trained on these datasets as a working benchmark for automatic movie plot and script generation.</abstract>
       <url hash="a7eb61b0">2023.icon-1.50</url>
@@ -580,7 +580,7 @@
       <title>Mytho-Annotator: An Annotation tool for <fixed-case>I</fixed-case>ndian <fixed-case>H</fixed-case>indu Mythology</title>
       <author><first>Apurba</first><last>Paul</last></author>
       <author><first>Anupam</first><last>Mondal</last></author>
-      <author><first>Sainik Kumar</first><last>Mahata</last></author>
+      <author id="sainik-mahata"><first>Sainik Kumar</first><last>Mahata</last></author>
       <author><first>Srijan</first><last>Seal</last></author>
       <author><first>Prasun</first><last>Sarkar</last></author>
       <author><first>Dipankar</first><last>Das</last></author>
@@ -671,10 +671,10 @@
     </paper>
     <paper id="63">
       <title>Transfer learning in low-resourced <fixed-case>MT</fixed-case>: An empirical study</title>
-      <author><first>Sainik Kumar</first><last>Mahata</last></author>
+      <author id="sainik-mahata"><first>Sainik Kumar</first><last>Mahata</last></author>
       <author><first>Dipanjan</first><last>Saha</last></author>
       <author><first>Dipankar</first><last>Das</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>646–650</pages>
       <abstract>Translation systems rely on a large and goodquality parallel corpus for producing reliable translations. However, obtaining such a corpus for low-resourced languages is a challenge. New research has shown that transfer learning can mitigate this issue by augmenting lowresourced MT systems with high-resourced ones. In this work, we explore two types of transfer learning techniques, namely, crosslingual transfer learning and multilingual training, both with information augmentation, to examine the degree of performance improvement following the augmentation. Furthermore, we use languages of the same family (Romanic, in our case), to investigate the role of the shared linguistic property, in producing dependable translations.</abstract>
       <url hash="b0cf5b1c">2023.icon-1.63</url>
@@ -771,7 +771,7 @@
       <author><first>Gargi</first><last>Roy</last></author>
       <author><first>Amit</first><last>Barman</last></author>
       <author><first>Indranil</first><last>Dutta</last></author>
-      <author><first>Sudip Kumar</first><last>Naskar</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last></author>
       <pages>720–728</pages>
       <abstract>With the recent surge and exponential growth of social media usage, scrutinizing social media content for the presence of any hateful content is of utmost importance. Researchers have been diligently working since the past decade on distinguishing between content that promotes hatred and content that does not. Traditionally, the main focus has been on analyzing textual content. However, recent research attempts have also commenced into the identification of audio-based content. Nevertheless, studies have shown that relying solely on audio or text-based content may be ineffective, as recent upsurge indicates that individuals often employ sarcasm in their speech and writing. To overcome these challenges, we present an approach to identify whether a speech promotes hate or not utilizing both audio and textual representations. Our methodology is based on the Transformer framework that incorporates both audio and text sampling, accompanied by our very own layer called “Attentive Fusion”. The results of our study surpassed previous stateof-the-art techniques, achieving an impressive macro F1 score of 0.927 on the Test Set.</abstract>
       <url hash="6121f965">2023.icon-1.72</url>
@@ -844,7 +844,7 @@
       <author><first>Indranil</first><last>Dutta</last></author>
       <author><first>Shouvik Kumar</first><last>Guha</last></author>
       <author><first>Samir</first><last>Karmakar</last></author>
-      <author><first>Sudip Kumar</first><last>Naskar</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last></author>
       <pages>773–778</pages>
       <abstract>There is an evident lack of implementation of Machine Learning (ML) in the legal domain in India, and any research that does take place in this domain is usually based on data from the higher courts of law and works with English data. The lower courts and data from the different regional languages of India are often overlooked. In this paper, we deploy a Convolutional Neural Network (CNN) architecture on a corpus of Hindi legal documents. We perform a bail Prediction task with the help of a CNN model and achieve an overall accuracy of 93% which is an improvement on the benchmark accuracy, set by Kapoor et al. (2022), albeit in data from 20 districts of the Indian state of Uttar Pradesh.</abstract>
       <url hash="cdc7758e">2023.icon-1.79</url>
@@ -867,8 +867,8 @@
       <author><first>Ningthoujam Avichandra</first><last>Singh</last></author>
       <author><first>Loitongbam Sanayai</first><last>Meetei</last></author>
       <author><first>Ningthoujam Justwant</first><last>Singh</last></author>
-      <author><first>Thoudam Doren</first><last>Singh</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="thoudam-doren-singh"><first>Thoudam Doren</first><last>Singh</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>791–796</pages>
       <abstract>In this work, we focus on the development of machine translation (MT) models of a lowresource language pair viz. English-Manipuri. Manipuri is one of the eight scheduled languages of the Indian constitution. Manipuri is currently written in two different scripts: one is its original script called Meitei Mayek and the other is the Bengali script. We evaluate the performance of English-Manipuri MT models based on transformer and transfer learning technique. Our MT models are trained using a dataset of 69,065 parallel sentences and validated on 500 sentences. Using 500 test sentences, the English to Manipuri MT models achieved a BLEU score of 19.13 and 29.05 with mT5 and OpenNMT respectively. The results demonstrate that the OpenNMT model significantly outperforms the mT5 model. Additionally, Manipuri to English MT system trained with OpenNMT model reported a BLEU score of 30.90. We also carried out a comparative analysis between the Bengali script and the transliterated Meitei Mayek script for English-Manipuri MT models. This analysis reveals that the transliterated version enhances the MT model performance resulting in a notable +2.35 improvement in the BLEU score.</abstract>
       <url hash="ee4c9a48">2023.icon-1.81</url>
@@ -943,7 +943,7 @@
       <author><first>Ryan</first><last>Price</last></author>
       <author><first>Bhargav Srinivas</first><last>Chinnari</last></author>
       <author><first>Yeon-Jun</first><last>Kim</last></author>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
       <pages>832–842</pages>
       <url hash="255d5c45">2023.icon-1.87</url>
       <bibkey>karan-etal-2023-combining</bibkey>
diff --git a/data/xml/2023.ijcnlp.xml b/data/xml/2023.ijcnlp.xml
index 8a6f4f1f91..46102b375e 100644
--- a/data/xml/2023.ijcnlp.xml
+++ b/data/xml/2023.ijcnlp.xml
@@ -3,11 +3,11 @@
   <volume id="main" ingest-date="2024-01-04" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 13th International Joint Conference on Natural Language Processing and the 3rd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)</booktitle>
-      <editor><first>Jong C.</first><last>Park</last></editor>
+      <editor id="jong-c-park"><first>Jong C.</first><last>Park</last></editor>
       <editor><first>Yuki</first><last>Arase</last></editor>
       <editor><first>Baotian</first><last>Hu</last></editor>
       <editor><first>Wei</first><last>Lu</last></editor>
-      <editor><first>Derry</first><last>Wijaya</last></editor>
+      <editor id="derry-tanti-wijaya"><first>Derry</first><last>Wijaya</last></editor>
       <editor><first>Ayu</first><last>Purwarianti</last></editor>
       <editor><first>Adila Alfa</first><last>Krisnadhi</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -66,7 +66,7 @@
       <author><first>Hongru</first><last>Wang</last></author>
       <author><first>Zezhong</first><last>Wang</last></author>
       <author><first>Wai Chung</first><last>Kwan</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <pages>54–65</pages>
       <url hash="8223875f">2023.ijcnlp-main.5</url>
       <bibkey>wang-etal-2023-mcml</bibkey>
@@ -76,13 +76,13 @@
       <title><fixed-case>RECESS</fixed-case>: Resource for Extracting Cause, Effect, and Signal Spans</title>
       <author><first>Fiona Anting</first><last>Tan</last></author>
       <author><first>Hansi</first><last>Hettiarachchi</last></author>
-      <author><first>Ali</first><last>Hürriyetoğlu</last></author>
+      <author id="ali-hurriyetoglu"><first>Ali</first><last>Hürriyetoğlu</last></author>
       <author><first>Nelleke</first><last>Oostdijk</last></author>
       <author><first>Tommaso</first><last>Caselli</last></author>
       <author><first>Tadashi</first><last>Nomoto</last></author>
       <author><first>Onur</first><last>Uca</last></author>
       <author><first>Farhana Ferdousi</first><last>Liza</last></author>
-      <author><first>See-Kiong</first><last>Ng</last></author>
+      <author id="see-kiong-ng"><first>See-Kiong</first><last>Ng</last></author>
       <pages>66–82</pages>
       <url hash="d8f9d241">2023.ijcnlp-main.6</url>
       <bibkey>tan-etal-2023-recess</bibkey>
@@ -123,10 +123,10 @@
     </paper>
     <paper id="10">
       <title><fixed-case>M</fixed-case>asakha<fixed-case>NEWS</fixed-case>: News Topic Classification for <fixed-case>A</fixed-case>frican languages</title>
-      <author><first>David Ifeoluwa</first><last>Adelani</last></author>
+      <author id="david-ifeoluwa-adelani"><first>David Ifeoluwa</first><last>Adelani</last></author>
       <author><first>Marek</first><last>Masiak</last></author>
       <author><first>Israel Abebe</first><last>Azime</last></author>
-      <author><first>Jesujoba</first><last>Alabi</last></author>
+      <author id="jesujoba-alabi"><first>Jesujoba</first><last>Alabi</last></author>
       <author><first>Atnafu Lambebo</first><last>Tonja</last></author>
       <author><first>Christine</first><last>Mwase</last></author>
       <author><first>Odunayo</first><last>Ogundepo</last></author>
@@ -214,7 +214,7 @@
     </paper>
     <paper id="13">
       <title>Phylogeny-Inspired Soft Prompts For Data-to-Text Generation in Low-Resource Languages</title>
-      <author><first>William</first><last>Soto Martinez</last></author>
+      <author id="william-soto-martinez"><first>William</first><last>Soto Martinez</last></author>
       <author><first>Yannick</first><last>Parmentier</last></author>
       <author><first>Claire</first><last>Gardent</last></author>
       <pages>186–198</pages>
@@ -269,8 +269,8 @@
       <author><first>Hayate</first><last>Iso</last></author>
       <author><first>Nikita</first><last>Bhutani</last></author>
       <author><first>Estevam</first><last>Hruschka</last></author>
-      <author><first>Ndapa</first><last>Nakashole</last></author>
-      <author><first>Tom</first><last>Mitchell</last></author>
+      <author id="ndapandula-nakashole"><first>Ndapa</first><last>Nakashole</last></author>
+      <author id="tom-mitchell"><first>Tom</first><last>Mitchell</last></author>
       <pages>272–284</pages>
       <url hash="43a16c0a">2023.ijcnlp-main.18</url>
       <bibkey>kim-etal-2023-zero</bibkey>
@@ -321,7 +321,7 @@
       <author><first>Chanjun</first><last>Park</last></author>
       <author><first>Hyeonseok</first><last>Moon</last></author>
       <author><first>Jaehyung</first><last>Seo</last></author>
-      <author><first>Heuiseok</first><last>Lim</last></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last></author>
       <pages>344–358</pages>
       <url hash="362a40c6">2023.ijcnlp-main.22</url>
       <bibkey>jung-etal-2023-informative</bibkey>
@@ -339,7 +339,7 @@
     </paper>
     <paper id="24">
       <title>Rethinking the Role of Entity Type in Relation Classification</title>
-      <author><first>Xiang</first><last>Dai</last></author>
+      <author id="xiang-dai"><first>Xiang</first><last>Dai</last></author>
       <author><first>Sarvnaz</first><last>Karimi</last></author>
       <author><first>Stephen</first><last>Wan</last></author>
       <pages>374–384</pages>
@@ -362,9 +362,9 @@
       <author><first>Artem</first><last>Abzaliev</last></author>
       <author><first>Naihao</first><last>Deng</last></author>
       <author><first>Rui</first><last>Hou</last></author>
-      <author><first>Scott A.</first><last>Hale</last></author>
+      <author id="scott-a-hale"><first>Scott A.</first><last>Hale</last></author>
       <author><first>Veronica</first><last>Perez-Rosas</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>398–407</pages>
       <url hash="2dedf1f9">2023.ijcnlp-main.26</url>
       <bibkey>kazemi-etal-2023-query</bibkey>
@@ -375,7 +375,7 @@
       <author><first>Yiran</first><last>Wang</last></author>
       <author><first>Taro</first><last>Watanabe</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>408–419</pages>
       <url hash="b6f8c29a">2023.ijcnlp-main.27</url>
       <bibkey>wang-etal-2023-24</bibkey>
@@ -474,8 +474,8 @@
       <title>Emerging Challenges in Personalized Medicine: Assessing Demographic Effects on Biomedical Question Answering Systems</title>
       <author><first>Sagi</first><last>Shaier</last></author>
       <author><first>Kevin</first><last>Bennett</last></author>
-      <author><first>Lawrence</first><last>Hunter</last></author>
-      <author><first>Katharina</first><last>Kann</last></author>
+      <author id="lawrence-hunter"><first>Lawrence</first><last>Hunter</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
       <pages>540–550</pages>
       <url hash="7664fb0c">2023.ijcnlp-main.36</url>
       <bibkey>shaier-etal-2023-emerging</bibkey>
@@ -486,7 +486,7 @@
       <author><first>Nick</first><last>McKenna</last></author>
       <author><first>Tianyi</first><last>Li</last></author>
       <author><first>Mark</first><last>Johnson</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>551–563</pages>
       <url hash="552881d4">2023.ijcnlp-main.37</url>
       <bibkey>mckenna-etal-2023-smoothing</bibkey>
@@ -555,7 +555,7 @@
       <title>Valla: Standardizing and Benchmarking Authorship Attribution and Verification Through Empirical Evaluation and Comparative Analysis</title>
       <author><first>Jacob</first><last>Tyo</last></author>
       <author><first>Bhuwan</first><last>Dhingra</last></author>
-      <author><first>Zachary C.</first><last>Lipton</last></author>
+      <author id="zachary-c-lipton"><first>Zachary C.</first><last>Lipton</last></author>
       <pages>649–660</pages>
       <url hash="5c8546c9">2023.ijcnlp-main.43</url>
       <bibkey>tyo-etal-2023-valla</bibkey>
@@ -596,7 +596,7 @@
       <title>Analyzing and Predicting Persistence of News Tweets</title>
       <author><first>Maggie</first><last>Liu</last></author>
       <author><first>Jing</first><last>Wang</last></author>
-      <author><first>Daniel</first><last>Preotiuc-Pietro</last></author>
+      <author id="daniel-preotiuc-pietro"><first>Daniel</first><last>Preotiuc-Pietro</last></author>
       <pages>719–732</pages>
       <url hash="70c863d3">2023.ijcnlp-main.46</url>
       <bibkey>liu-etal-2023-analyzing</bibkey>
@@ -620,10 +620,10 @@
       <author><first>Artem</first><last>Vazhentsev</last></author>
       <author><first>Artem</first><last>Shelmanov</last></author>
       <author><first>Xudong</first><last>Han</last></author>
-      <author><first>Simon</first><last>Suster</last></author>
+      <author id="simon-suster"><first>Simon</first><last>Suster</last></author>
       <author><first>Maxim</first><last>Panov</last></author>
       <author><first>Alexander</first><last>Panchenko</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>744–770</pages>
       <url hash="ad651962">2023.ijcnlp-main.48</url>
       <bibkey>kuzmin-etal-2023-uncertainty</bibkey>
@@ -661,7 +661,7 @@
     <paper id="52">
       <title>Benchmarking Procedural Language Understanding for Low-Resource Languages: A Case Study on <fixed-case>T</fixed-case>urkish</title>
       <author><first>Arda</first><last>Uzunoglu</last></author>
-      <author><first>Gözde</first><last>Şahin</last></author>
+      <author id="gozde-gul-sahin"><first>Gözde</first><last>Şahin</last></author>
       <pages>804–819</pages>
       <url hash="d8c714dc">2023.ijcnlp-main.52</url>
       <bibkey>uzunoglu-sahin-2023-benchmarking</bibkey>
@@ -681,7 +681,7 @@
     <paper id="54">
       <title><fixed-case>F</fixed-case>i<fixed-case>R</fixed-case>o: Finite-context Indexing of Restricted Output Space for <fixed-case>NLP</fixed-case> Models Facing Noisy Input</title>
       <author><first>Minh</first><last>Nguyen</last></author>
-      <author><first>Nancy</first><last>Chen</last></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last></author>
       <pages>834–845</pages>
       <url hash="5459e553">2023.ijcnlp-main.54</url>
       <bibkey>nguyen-chen-2023-firo</bibkey>
@@ -757,7 +757,7 @@
       <author><first>Hanung</first><last>Linuwih</last></author>
       <author><first>Bryan</first><last>Wilie</last></author>
       <author><first>Galih</first><last>Muridan</last></author>
-      <author><first>Genta</first><last>Winata</last></author>
+      <author id="genta-indra-winata"><first>Genta</first><last>Winata</last></author>
       <author><first>David</first><last>Moeljadi</last></author>
       <author><first>Alham Fikri</first><last>Aji</last></author>
       <author><first>Ayu</first><last>Purwarianti</last></author>
@@ -860,9 +860,9 @@
       <author><first>Levon</first><last>Haroutunian</last></author>
       <author><first>Zhuang</first><last>Li</last></author>
       <author><first>Lucian</first><last>Galescu</last></author>
-      <author><first>Philip</first><last>Cohen</last></author>
+      <author id="philip-r-cohen"><first>Philip</first><last>Cohen</last></author>
       <author><first>Raj</first><last>Tumuluri</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>1067–1082</pages>
       <url hash="8cba3dc6">2023.ijcnlp-main.69</url>
       <bibkey>haroutunian-etal-2023-reranking</bibkey>
@@ -906,11 +906,11 @@
   <volume id="short" ingest-date="2024-01-04" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 13th International Joint Conference on Natural Language Processing and the 3rd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics (Volume 2: Short Papers)</booktitle>
-      <editor><first>Jong C.</first><last>Park</last></editor>
+      <editor id="jong-c-park"><first>Jong C.</first><last>Park</last></editor>
       <editor><first>Yuki</first><last>Arase</last></editor>
       <editor><first>Baotian</first><last>Hu</last></editor>
       <editor><first>Wei</first><last>Lu</last></editor>
-      <editor><first>Derry</first><last>Wijaya</last></editor>
+      <editor id="derry-tanti-wijaya"><first>Derry</first><last>Wijaya</last></editor>
       <editor><first>Ayu</first><last>Purwarianti</last></editor>
       <editor><first>Adila Alfa</first><last>Krisnadhi</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -949,7 +949,7 @@
       <title><fixed-case>SQUARE</fixed-case>: Automatic Question Answering Evaluation using Multiple Positive and Negative References</title>
       <author><first>Matteo</first><last>Gabburo</last></author>
       <author><first>Siddhant</first><last>Garg</last></author>
-      <author><first>Rik</first><last>Koncel-Kedziorski</last></author>
+      <author id="rik-koncel-kedziorski"><first>Rik</first><last>Koncel-Kedziorski</last></author>
       <author><first>Alessandro</first><last>Moschitti</last></author>
       <pages>20–28</pages>
       <url hash="ca5ba0f2">2023.ijcnlp-short.3</url>
@@ -960,7 +960,7 @@
       <title>The Impact of Debiasing on the Performance of Language Models in Downstream Tasks is Underestimated</title>
       <author><first>Masahiro</first><last>Kaneko</last></author>
       <author><first>Danushka</first><last>Bollegala</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <pages>29–36</pages>
       <url hash="fe4e2ae0">2023.ijcnlp-short.4</url>
       <bibkey>kaneko-etal-2023-impact</bibkey>
@@ -980,7 +980,7 @@
     <paper id="6">
       <title>Do the Benefits of Joint Models for Relation Extraction Extend to Document-level Tasks?</title>
       <author><first>Pratik</first><last>Saini</last></author>
-      <author><first>Tapas</first><last>Nayak</last></author>
+      <author id="tapas-nayak"><first>Tapas</first><last>Nayak</last></author>
       <author><first>Indrajit</first><last>Bhattacharya</last></author>
       <pages>43–51</pages>
       <url hash="2765efe4">2023.ijcnlp-short.6</url>
@@ -1027,11 +1027,11 @@
     </paper>
     <paper id="11">
       <title>Efficient Zero-Shot Cross-lingual Inference via Retrieval</title>
-      <author><first>Genta</first><last>Winata</last></author>
+      <author id="genta-indra-winata"><first>Genta</first><last>Winata</last></author>
       <author><first>Lingjue</first><last>Xie</last></author>
       <author><first>Karthik</first><last>Radhakrishnan</last></author>
       <author><first>Yifan</first><last>Gao</last></author>
-      <author><first>Daniel</first><last>Preotiuc-Pietro</last></author>
+      <author id="daniel-preotiuc-pietro"><first>Daniel</first><last>Preotiuc-Pietro</last></author>
       <pages>93–104</pages>
       <url hash="b12b9d52">2023.ijcnlp-short.11</url>
       <bibkey>winata-etal-2023-efficient</bibkey>
@@ -1049,8 +1049,8 @@
     <paper id="13">
       <title>Who Are All The Stochastic Parrots Imitating? They Should Tell Us!</title>
       <author><first>Sagi</first><last>Shaier</last></author>
-      <author><first>Lawrence</first><last>Hunter</last></author>
-      <author><first>Katharina</first><last>Kann</last></author>
+      <author id="lawrence-hunter"><first>Lawrence</first><last>Hunter</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
       <pages>113–120</pages>
       <url hash="0b5fb7cc">2023.ijcnlp-short.13</url>
       <bibkey>shaier-etal-2023-stochastic</bibkey>
@@ -1060,7 +1060,7 @@
       <title>Incorporating Singletons and Mention-based Features in Coreference Resolution via Multi-task Learning for Better Generalization</title>
       <author><first>Yilun</first><last>Zhu</last></author>
       <author><first>Siyao</first><last>Peng</last></author>
-      <author><first>Sameer</first><last>Pradhan</last></author>
+      <author id="sameer-pradhan"><first>Sameer</first><last>Pradhan</last></author>
       <author><first>Amir</first><last>Zeldes</last></author>
       <pages>121–130</pages>
       <url hash="64374600">2023.ijcnlp-short.14</url>
@@ -1073,7 +1073,7 @@
       <author><first>Congying</first><last>Xia</last></author>
       <author><first>Wenpeng</first><last>Yin</last></author>
       <author><first>Tingting</first><last>Liang</last></author>
-      <author><first>Philip</first><last>Yu</last></author>
+      <author id="philip-s-yu"><first>Philip</first><last>Yu</last></author>
       <pages>131–138</pages>
       <url hash="e647c520">2023.ijcnlp-short.15</url>
       <bibkey>du-etal-2023-labels</bibkey>
@@ -1085,7 +1085,7 @@
       <author><first>Fatemeh</first><last>Shiri</last></author>
       <author><first>Van</first><last>Nguyen</last></author>
       <author><first>Yuan-Fang</first><last>Li</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>139–145</pages>
       <url hash="52f61074">2023.ijcnlp-short.16</url>
       <bibkey>moghimifar-etal-2023-theia</bibkey>
@@ -1143,7 +1143,7 @@
       <title>Are Machine Reading Comprehension Systems Robust to Context Paraphrasing?</title>
       <author><first>Yulong</first><last>Wu</last></author>
       <author><first>Viktor</first><last>Schlegel</last></author>
-      <author><first>Riza</first><last>Batista-Navarro</last></author>
+      <author id="riza-theresa-batista-navarro"><first>Riza</first><last>Batista-Navarro</last></author>
       <pages>184–196</pages>
       <url hash="4ead99c6">2023.ijcnlp-short.21</url>
       <bibkey>wu-etal-2023-machine</bibkey>
@@ -1152,8 +1152,8 @@
     <paper id="22">
       <title>It’s not only What You Say, It’s also Who It’s Said to: Counterfactual Analysis of Interactive Behavior in the Courtroom</title>
       <author><first>Biaoyan</first><last>Fang</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Lea</first><last>Frermann</last></author>
       <pages>197–207</pages>
       <url hash="ecd54fac">2023.ijcnlp-short.22</url>
@@ -1333,7 +1333,7 @@
       <author><first>Alyssa Marie</first><last>Loo</last></author>
       <author><first>Lintang</first><last>Sutawika</last></author>
       <author><first>Skyler</first><last>Wang</last></author>
-      <author><first>Genta Indra</first><last>Winata</last></author>
+      <author id="genta-indra-winata"><first>Genta Indra</first><last>Winata</last></author>
       <author><first>Zheng-Xin</first><last>Yong</last></author>
       <author><first>Ruochen</first><last>Zhang</last></author>
       <author><first>A. Seza</first><last>Doğruöz</last></author>
@@ -1426,7 +1426,7 @@
       <title>Turning Whisper into Real-Time Transcription System</title>
       <author><first>Dominik</first><last>Macháček</last></author>
       <author><first>Raj</first><last>Dabre</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>17–24</pages>
       <url hash="32e13cbe">2023.ijcnlp-demo.3</url>
       <bibkey>machacek-etal-2023-turning</bibkey>
diff --git a/data/xml/2023.inlg.xml b/data/xml/2023.inlg.xml
index 79da3ed210..d051fae418 100644
--- a/data/xml/2023.inlg.xml
+++ b/data/xml/2023.inlg.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the 16th International Natural Language Generation Conference</booktitle>
       <editor><first>C. Maria</first><last>Keet</last></editor>
       <editor><first>Hung-Yi</first><last>Lee</last></editor>
-      <editor><first>Sina</first><last>Zarrieß</last></editor>
+      <editor id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Prague, Czechia</address>
       <month>September</month>
@@ -91,7 +91,7 @@
       <author><first>Gromit Yeuk-Yin</first><last>Chan</last></author>
       <author><first>Eunyee</first><last>Koh</last></author>
       <author><first>C Lee</first><last>Giles</last></author>
-      <author><first>Ting-Hao</first><last>Huang</last></author>
+      <author id="ting-hao-huang"><first>Ting-Hao</first><last>Huang</last></author>
       <pages>80–92</pages>
       <abstract>Good figure captions help paper readers understand complex scientific figures. Unfortunately, even published papers often have poorly written captions. Automatic caption generation could aid paper writers by providing good starting captions that can be refined for better quality. Prior work often treated figure caption generation as a vision-to-language task. In this paper, we show that it can be more effectively tackled as a text summarization task in scientific documents. We fine-tuned PEGASUS, a pre-trained abstractive summarization model, to specifically summarize figure-referencing paragraphs (e.g., “Figure 3 shows...”) into figure captions. Experiments on large-scale arXiv figures show that our method outperforms prior vision methods in both automatic and human evaluations. We further conducted an in-depth investigation focused on two key challenges: (i) the common presence of low-quality author-written captions and (ii) the lack of clear standards for good captions. Our code and data are available at: https://github.com/Crowd-AI-Lab/Generating-Figure-Captions-as-a-Text-Summarization-Task.</abstract>
       <url hash="a86436c2">2023.inlg-main.6</url>
@@ -103,7 +103,7 @@
       <title>Models of reference production: How do they withstand the test of time?</title>
       <author><first>Fahime</first><last>Same</last></author>
       <author><first>Guanyi</first><last>Chen</last></author>
-      <author><first>Kees</first><last>van Deemter</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
       <pages>93–105</pages>
       <abstract>In recent years, many NLP studies have focused solely on performance improvement. In this work, we focus on the linguistic and scientific aspects of NLP. We use the task of generating referring expressions in context (REG-in-context) as a case study and start our analysis from GREC, a comprehensive set of shared tasks in English that addressed this topic over a decade ago. We ask what the performance of models would be if we assessed them (1) on more realistic datasets, and (2) using more advanced methods. We test the models using different evaluation metrics and feature selection experiments. We conclude that GREC can no longer be regarded as offering a reliable assessment of models’ ability to mimic human reference production, because the results are highly impacted by the choice of corpus and evaluation metrics. Our results also suggest that pre-trained language models are less dependent on the choice of corpus than classic Machine Learning models, and therefore make more robust class predictions.</abstract>
       <url hash="6c4e84b2">2023.inlg-main.7</url>
@@ -114,7 +114,7 @@
       <title>Generating Faithful Text From a Knowledge Graph with Noisy Reference Text</title>
       <author><first>Tahsina</first><last>Hashem</last></author>
       <author><first>Weiqing</first><last>Wang</last></author>
-      <author><first>Derry Tanti</first><last>Wijaya</last></author>
+      <author id="derry-tanti-wijaya"><first>Derry Tanti</first><last>Wijaya</last></author>
       <author><first>Mohammed Eunus</first><last>Ali</last></author>
       <author><first>Yuan-Fang</first><last>Li</last></author>
       <pages>106–122</pages>
@@ -151,7 +151,7 @@
       <author><first>Leonardo</first><last>Bertolazzi</last></author>
       <author><first>Davide</first><last>Mazzaccara</last></author>
       <author><first>Filippo</first><last>Merlo</last></author>
-      <author><first>Raffaella</first><last>Bernardi</last></author>
+      <author id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last></author>
       <pages>153–162</pages>
       <abstract>Large Language Models, and ChatGPT in particular, have recently grabbed the attention of the community and the media. Having reached high language proficiency, attention has been shifting toward its reasoning capabilities. In this paper, our main aim is to evaluate ChatGPT’s question generation in a task where language production should be driven by an implicit reasoning process. To this end, we employ the 20-Questions game, traditionally used within the Cognitive Science community to inspect the information seeking-strategy’s development. This task requires a series of interconnected skills: asking informative questions, stepwise updating the hypothesis space, and stopping asking questions when enough information has been collected. We build hierarchical hypothesis spaces, exploiting feature norms collected from humans vs. ChatGPT itself, and we inspect the efficiency and informativeness of ChatGPT’s strategy. Our results show that ChatGPT’s performance gets closer to an optimal agent only when prompted to explicitly list the updated space stepwise.</abstract>
       <url hash="9e0a8c6c">2023.inlg-main.11</url>
@@ -213,7 +213,7 @@
       <author><first>Clement</first><last>Rebuffel</last></author>
       <author><first>Ehud</first><last>Reiter</last></author>
       <author><first>Laure</first><last>Soulier</last></author>
-      <author><first>Somayajulu</first><last>Sripada</last></author>
+      <author id="somayajulu-sripada"><first>Somayajulu</first><last>Sripada</last></author>
       <author><first>Patrick</first><last>Gallinari</last></author>
       <pages>221–236</pages>
       <abstract>Neural data-to-text systems lack the control and factual accuracy required to generate useful and insightful summaries of multidimensional data. We propose a solution in the form of data views, where each view describes an entity and its attributes along specific dimensions. A sequence of views can then be used as a high-level schema for document planning, with the neural model handling the complexities of micro-planning and surface realization. We show that our view-based system retains factual accuracy while offering high-level control of output that can be tailored based on user preference or other norms within the domain.</abstract>
@@ -236,7 +236,7 @@
     <paper id="18">
       <title>Metric-Based In-context Learning: A Case Study in Text Simplification</title>
       <author><first>Subhadra</first><last>Vadlamannati</last></author>
-      <author><first>Gözde</first><last>Şahin</last></author>
+      <author id="gozde-gul-sahin"><first>Gözde</first><last>Şahin</last></author>
       <pages>253–268</pages>
       <abstract>In-context learning (ICL) for large language models has proven to be a powerful approach for many natural language processing tasks. However, determining the best method to select examples for ICL is nontrivial as the results can vary greatly depending on the quality, quantity, and order of examples used. In this paper, we conduct a case study on text simplification (TS) to investigate how to select the best and most robust examples for ICL. We propose Metric-Based in-context Learning (MBL) method that utilizes commonly used TS metrics such as SARI, compression ratio, and BERT-Precision for selection. Through an extensive set of experiments with various-sized GPT models on standard TS benchmarks such as TurkCorpus and ASSET, we show that examples selected by the top SARI scores perform the best on larger models such as GPT-175B, while the compression ratio generally performs better on smaller models such as GPT-13B and GPT-6.7B. Furthermore, we demonstrate that MBL is generally robust to example orderings and out-of-domain test sets, and outperforms strong baselines and state-of-the-art finetuned language models. Finally, we show that the behavior of large GPT models can be implicitly controlled by the chosen metric. Our research provides a new framework for selecting examples in ICL, and demonstrates its effectiveness in text simplification tasks, breaking new ground for more accurate and efficient NLG systems.</abstract>
       <url hash="a46468c1">2023.inlg-main.18</url>
@@ -270,7 +270,7 @@
     <paper id="21">
       <title><fixed-case>HL</fixed-case> Dataset: Visually-grounded Description of Scenes, Actions and Rationales</title>
       <author><first>Michele</first><last>Cafagna</last></author>
-      <author><first>Kees</first><last>van Deemter</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
       <author><first>Albert</first><last>Gatt</last></author>
       <pages>293–312</pages>
       <abstract>Current captioning datasets focus on object-centric captions, describing the visible objects in the image, often ending up stating the obvious (for humans), e.g. “people eating food in a park”. Although these datasets are useful to evaluate the ability of Vision &amp; Language models to recognize and describe visual content, they do not support controlled experiments involving model testing or fine-tuning, with more high-level captions, which humans find easy and natural to produce. For example, people often describe images based on the type of scene they depict (“people at a holiday resort”) and the actions they perform (“people having a picnic”). Such concepts are based on personal experience and contribute to forming common sense assumptions. We present the High-Level Dataset, a dataset extending 14997 images from the COCO dataset, aligned with a new set of 134,973 human-annotated (high-level) captions collected along three axes: scenes, actions and rationales. We further extend this dataset with confidence scores collected from an independent set of readers, as well as a set of narrative captions generated synthetically, by combining each of the three axes. We describe this dataset and analyse it extensively. We also present baseline results for the High-Level Captioning task.</abstract>
@@ -294,7 +294,7 @@
     <paper id="23">
       <title>The Next Chapter: A Study of Large Language Models in Storytelling</title>
       <author><first>Zhuohan</first><last>Xie</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <author><first>Jey Han</first><last>Lau</last></author>
       <pages>323–351</pages>
       <abstract>To enhance the quality of generated stories, recent story generation models have been investigating the utilization of higher-level attributes like plots or commonsense knowledge. The application of prompt-based learning with large language models (LLMs), exemplified by GPT-3, has exhibited remarkable performance in diverse natural language processing (NLP) tasks. This paper conducts a comprehensive investigation, utilizing both automatic and human evaluation, to compare the story generation capacity of LLMs with recent models across three datasets with variations in style, register, and length of stories. The results demonstrate that LLMs generate stories of significantly higher quality compared to other story generation models. Moreover, they exhibit a level of performance that competes with human authors, albeit with the preliminary observation that they tend to replicate real stories in situations involving world knowledge, resembling a form of plagiarism.</abstract>
@@ -318,7 +318,7 @@
       <author><first>Sharan</first><last>Narasimhan</last></author>
       <author><first>Pooja</first><last>H</last></author>
       <author><first>Suvodip</first><last>Dey</last></author>
-      <author><first>Maunendra Sankar</first><last>Desarkar</last></author>
+      <author id="maunendra-sankar-desarkar"><first>Maunendra Sankar</first><last>Desarkar</last></author>
       <pages>362–374</pages>
       <abstract>Text Style Transfer (TST) is performable through approaches such as latent space disentanglement, cycle-consistency losses, prototype editing etc. The prototype editing approach, which is known to be quite successful in TST, involves two key phases a) Masking of source style-associated tokens and b) Reconstruction of this source-style masked sentence conditioned with the target style. We follow a similar transduction method, in which we transpose the more difficult direct source to target TST task to a simpler Style-Masked Language Model (SMLM) Task, wherein, similar to BERT (CITATION), the goal of our model is now to reconstruct the source sentence from its style-masked version. We arrive at the SMLM mechanism naturally by formulating prototype editing/ transduction methods in a probabilistic framework, where TST resolves into estimating a hypothetical parallel dataset from a partially observed parallel dataset, wherein each domain is assumed to have a common latent style-masked prior. To generate this style-masked prior, we use “Explainable Attention” as our choice of attribution for a more precise style-masking step and also introduce a cost-effective and accurate “Attribution-Surplus” method of determining the position of masks from any arbitrary attribution model in O(1) time. We empirically show that this non-generational approach well suites the “content preserving” criteria for a task like TST, even for a complex style like Discourse Manipulation. Our model, the Style MLM, outperforms strong TST baselines and is on par with state-of-the-art TST models, which use complex architectures and orders of more parameters.</abstract>
       <url hash="12fa2e6f">2023.inlg-main.25</url>
@@ -410,8 +410,8 @@
     <paper id="33">
       <title>Reducing named entity hallucination risk to ensure faithful summary generation</title>
       <author><first>Eunice</first><last>Akani</last></author>
-      <author><first>Benoit</first><last>Favre</last></author>
-      <author><first>Frederic</first><last>Bechet</last></author>
+      <author id="benoit-favre"><first>Benoit</first><last>Favre</last></author>
+      <author id="frederic-bechet"><first>Frederic</first><last>Bechet</last></author>
       <author><first>Romain</first><last>Gemignani</last></author>
       <pages>437–442</pages>
       <abstract>The faithfulness of abstractive text summarization at the named entities level is the focus of this study. We propose to add a new criterion to the summary selection method based on the “risk” of generating entities that do not belong to the source document. This method is based on the assumption that Out-Of-Document entities are more likely to be hallucinations. This assumption was verified by a manual annotation of the entities occurring in a set of generated summaries on the CNN/DM corpus. This study showed that only 29% of the entities outside the source document were inferrable by the annotators, leading to 71% of hallucinations among OOD entities. We test our selection method on the CNN/DM corpus and show that it significantly reduces the hallucination risk on named entities while maintaining competitive results with respect to automatic evaluation metrics like ROUGE.</abstract>
@@ -422,7 +422,7 @@
     </paper>
     <paper id="34">
       <title>Building a dual dataset of text- and image-grounded conversations and summarisation in Gàidhlig (<fixed-case>S</fixed-case>cottish <fixed-case>G</fixed-case>aelic)</title>
-      <author><first>David M.</first><last>Howcroft</last></author>
+      <author id="david-m-howcroft"><first>David M.</first><last>Howcroft</last></author>
       <author><first>William</first><last>Lamb</last></author>
       <author><first>Anna</first><last>Groundwater</last></author>
       <author><first>Dimitra</first><last>Gkatzia</last></author>
@@ -452,7 +452,7 @@
       <author><first>Simon</first><last>Mille</last></author>
       <author><first>Francois</first><last>Lareau</last></author>
       <author><first>Stamatia</first><last>Dasiopoulou</last></author>
-      <author><first>Anya</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anya</first><last>Belz</last></author>
       <pages>455–466</pages>
       <abstract>Rule-based text generators lack the coverage and fluency of their neural counterparts, but have two big advantages over them: (i) they are entirely controllable and do not hallucinate; and (ii) they can fully explain how an output was generated from an input. In this paper we leverage these two advantages to create large and reliable synthetic datasets with multiple human-intelligible intermediate representations. We present the Modular Data-to-Text (Mod-D2T) Dataset which incorporates ten intermediate-level representations between input triple sets and output text; the mappings from one level to the next can broadly be interpreted as the traditional modular tasks of an NLG pipeline. We describe the Mod-D2T dataset, evaluate its quality via manual validation and discuss its applications and limitations. Data, code and documentation are available at https://github.com/mille-s/Mod-D2T.</abstract>
       <url hash="1ad1c710">2023.inlg-main.36</url>
@@ -465,7 +465,7 @@
       <booktitle>Proceedings of the 16th International Natural Language Generation Conference: System Demonstrations</booktitle>
       <editor><first>C. Maria</first><last>Keet</last></editor>
       <editor><first>Hung-Yi</first><last>Lee</last></editor>
-      <editor><first>Sina</first><last>Zarrieß</last></editor>
+      <editor id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Prague, Czechia</address>
       <month>September</month>
@@ -492,7 +492,7 @@
     </paper>
     <paper id="2">
       <title>enunlg: a Python library for reproducible neural data-to-text experimentation</title>
-      <author><first>David M.</first><last>Howcroft</last></author>
+      <author id="david-m-howcroft"><first>David M.</first><last>Howcroft</last></author>
       <author><first>Dimitra</first><last>Gkatzia</last></author>
       <pages>4–5</pages>
       <abstract>Over the past decade, a variety of neural architectures for data-to-text generation (NLG) have been proposed. However, each system typically has its own approach to pre- and post-processing and other implementation details. Diversity in implementations is desirable, but it also confounds attempts to compare model performance: are the differences due to the proposed architectures or are they a byproduct of the libraries used or a result of pre- and post-processing decisions made? To improve reproducibility, we re-implement several pre-Transformer neural models for data-to-text NLG within a single framework to facilitate direct comparisons of the models themselves and better understand the contributions of other design choices. We release our library at https://github.com/NapierNLP/enunlg to serve as a baseline for ongoing work in this area including research on NLG for low-resource languages where transformers might not be optimal.</abstract>
@@ -541,7 +541,7 @@
     <paper id="1">
       <title><fixed-case>LOWRECORP</fixed-case>: the Low-Resource <fixed-case>NLG</fixed-case> Corpus Building Challenge</title>
       <author><first>Khyathi Raghavi</first><last>Chandu</last></author>
-      <author><first>David M.</first><last>Howcroft</last></author>
+      <author id="david-m-howcroft"><first>David M.</first><last>Howcroft</last></author>
       <author><first>Dimitra</first><last>Gkatzia</last></author>
       <author><first>Yi-Ling</first><last>Chung</last></author>
       <author><first>Yufang</first><last>Hou</last></author>
@@ -565,7 +565,7 @@
       <title>Visually Grounded Story Generation Challenge</title>
       <author><first>Xudong</first><last>Hong</last></author>
       <author><first>Khushboo</first><last>Mehra</last></author>
-      <author><first>Asad</first><last>Sayeed</last></author>
+      <author id="asad-sayeed"><first>Asad</first><last>Sayeed</last></author>
       <author><first>Vera</first><last>Demberg</last></author>
       <pages>17–22</pages>
       <abstract>Recent large pre-trained models have achieved strong performance in multimodal language generation, which requires a joint effort of vision and language modeling. However, most previous generation tasks are based on single image input and produce short text descriptions that are not grounded on the input images. In this work, we propose a shared task on visually grounded story generation. The input is an image sequence, and the output is a story that is conditioned on the input images. This task is particularly challenging because: 1) the protagonists in the generated stories need to be grounded in the images and 2) the output story should be a coherent long-form text. We aim to advance the study of vision-based story generation by accepting submissions that propose new methods as well as new evaluation measures.</abstract>
@@ -647,7 +647,7 @@
       <title>The <fixed-case>T</fixed-case>okyo Tech and <fixed-case>AIST</fixed-case> System at the <fixed-case>G</fixed-case>en<fixed-case>C</fixed-case>hal 2022 Shared Task on Feedback Comment Generation</title>
       <author><first>Shota</first><last>Koyama</last></author>
       <author><first>Hiroya</first><last>Takamura</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <pages>74–78</pages>
       <abstract>This paper describes the Tokyo Tech and AIST system in the GenChal 2022 shared task, which is the first shared task of feedback comment generation. We adopted five methods: data cleaning, fine-tuning pre-trained models, correcting errors in learners’ sentences, appending a correcting operation, and filtering out irrelevant outputs. Our system achieved F1 = 43.4 on the test dataset.</abstract>
       <url hash="22b99e21">2023.inlg-genchal.11</url>
@@ -657,7 +657,7 @@
       <title>Feedback comment generation using predicted grammatical terms</title>
       <author><first>Kunitaka</first><last>Jimichi</last></author>
       <author><first>Kotaro</first><last>Funakoshi</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>79–83</pages>
       <abstract>The purpose of feedback comment generation is to provide useful feedback comments for a wide range of errors in learners’ essays from a language learning perspective. Since it is difficult to obtain appropriate comments at a practical level with rule-based or retrieval- based methods, we explore neural-based gen- erative methods with pre-trained models. We further assume the effectiveness of consider- ing grammatical terms in generating feedback comments. Specifically, this paper proposes T5-based models using predicted grammati- cal terms, submitted to FCG GenChal, and presents their results. By using correct gram- matical terms, our model could improve the BLEU score by 19.0 points, compared with the baseline T5 without grammatical terms on the development dataset. Furthermore, by using predicted grammatical terms, our model could improve the manual evaluation score by 2.33 points, compared with the baseline T5 without grammatical terms on the test dataset.</abstract>
       <url hash="37624194">2023.inlg-genchal.12</url>
@@ -717,7 +717,7 @@
     <paper id="16">
       <title>Team Iterate @ <fixed-case>A</fixed-case>uto<fixed-case>M</fixed-case>in 2023 - Experiments with Iterative Minuting</title>
       <author><first>František</first><last>Kmječ</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>114–120</pages>
       <abstract>This report describes the development of our system for automatic minuting created for the AutoMin 2023 Task A. As a baseline, we utilize a system based on the BART encoder-decoder model paired with a preprocessing pipeline similar to the one introduced by the winning solutions at AutoMin 2021. We then further explore the possibilities for iterative summarization by constructing an iterative minuting dataset from the provided data, finetuning on it and feeding the model previously generated minutes. We also experiment with adding more context by utilizing the Longformer encoder-decoder model and finetuning it on the SAMSum dataset. Our submitted solution is of the baseline approach, since we were unable to match its performance with our iterative variants. With the baseline, we achieve a ROUGE-1 score of 0.368 on the ELITR minuting corpus development set. We finally explore the performance of Vicuna 13B quantized language model for summarization.</abstract>
       <url hash="de714ff8">2023.inlg-genchal.16</url>
@@ -728,7 +728,7 @@
       <author><first>Ismaël</first><last>Rousseau</last></author>
       <author><first>Loïc</first><last>Fosse</last></author>
       <author><first>Youness</first><last>Dkhissi</last></author>
-      <author><first>Geraldine</first><last>Damnati</last></author>
+      <author id="geraldine-damnati"><first>Geraldine</first><last>Damnati</last></author>
       <author><first>Gwénolé</first><last>Lecorvé</last></author>
       <pages>121–131</pages>
       <abstract>This document reports the approach of our team Darbarer for the main task (Task A) of the AutoMin 2023 challenge. Our system is composed of four main modules. The first module relies on a text simplification model aiming at standardizing the utterances of the conversation and compressing the input in order to focus on informative content. The second module handles summarization by employing a straightforward segmentation strategy and a fine-tuned BART-based generative model. Then a titling module has been trained in order to propose a short description of each summarized block. Lastly, we apply a post-processing step aimed at enhancing readability through specific formatting rules. Our contributions lie in the first, third and last steps. Our system generates precise and concise minutes. We provide a detailed description of our modules, discuss the difficulty of evaluating their impact and propose an analysis of observed errors in our generated minutes.</abstract>
@@ -747,7 +747,7 @@
     <paper id="19">
       <title>Overview of the Second Shared Task on Automatic Minuting (<fixed-case>A</fixed-case>uto<fixed-case>M</fixed-case>in) at <fixed-case>INLG</fixed-case> 2023</title>
       <author><first>Tirthankar</first><last>Ghosal</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <author><first>Marie</first><last>Hledíková</last></author>
       <author><first>Tom</first><last>Kocmi</last></author>
       <author><first>Anna</first><last>Nedoluzhko</last></author>
diff --git a/data/xml/2023.insights.xml b/data/xml/2023.insights.xml
index 801c09f2b4..305453a3cb 100644
--- a/data/xml/2023.insights.xml
+++ b/data/xml/2023.insights.xml
@@ -23,7 +23,7 @@
     </frontmatter>
     <paper id="1">
       <title>Missing Information, Unresponsive Authors, Experimental Flaws: The Impossibility of Assessing the Reproducibility of Previous Human Evaluations in <fixed-case>NLP</fixed-case></title>
-      <author><first>Anya</first><last>Belz</last><affiliation>ADAPT Research Centre, Dublin City University</affiliation></author>
+      <author id="anja-belz"><first>Anya</first><last>Belz</last><affiliation>ADAPT Research Centre, Dublin City University</affiliation></author>
       <author><first>Craig</first><last>Thomson</last><affiliation>University of Aberdeen</affiliation></author>
       <author><first>Ehud</first><last>Reiter</last><affiliation>University of Aberdeen</affiliation></author>
       <author><first>Gavin</first><last>Abercrombie</last><affiliation>Heriot-Watt University</affiliation></author>
@@ -32,7 +32,7 @@
       <author><first>Anouck</first><last>Braggaar</last><affiliation>Tilburg University</affiliation></author>
       <author><first>Mark</first><last>Cieliebak</last><affiliation>Zurich University of Applied Sciences</affiliation></author>
       <author><first>Elizabeth</first><last>Clark</last><affiliation>Google Research</affiliation></author>
-      <author><first>Kees</first><last>van Deemter</last><affiliation>Utrecht University</affiliation></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last><affiliation>Utrecht University</affiliation></author>
       <author><first>Tanvi</first><last>Dinkar</last><affiliation>Heriot-Watt University</affiliation></author>
       <author><first>Ondřej</first><last>Dušek</last><affiliation>Charles University Prague</affiliation></author>
       <author><first>Steffen</first><last>Eger</last><affiliation>Bielefeld University</affiliation></author>
@@ -42,11 +42,11 @@
       <author><first>Dimitra</first><last>Gkatzia</last><affiliation>Edinburgh Napier University</affiliation></author>
       <author><first>Javier</first><last>González-Corbelle</last><affiliation>Universidade de Santiago de Compostela</affiliation></author>
       <author><first>Dirk</first><last>Hovy</last><affiliation>Bocconi University</affiliation></author>
-      <author><first>Manuela</first><last>Hürlimann</last><affiliation>Zurich University of Applied Sciences</affiliation></author>
+      <author id="manuela-huerlimann"><first>Manuela</first><last>Hürlimann</last><affiliation>Zurich University of Applied Sciences</affiliation></author>
       <author><first>Takumi</first><last>Ito</last><affiliation>Tohoku University</affiliation></author>
-      <author><first>John D.</first><last>Kelleher</last><affiliation>Technological University Dublin</affiliation></author>
+      <author id="john-kelleher"><first>John D.</first><last>Kelleher</last><affiliation>Technological University Dublin</affiliation></author>
       <author><first>Filip</first><last>Klubicka</last><affiliation>Technological University Dublin</affiliation></author>
-      <author><first>Emiel</first><last>Krahmer</last><affiliation>Tilburg University</affiliation></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last><affiliation>Tilburg University</affiliation></author>
       <author><first>Huiyuan</first><last>Lai</last><affiliation>Groningen University</affiliation></author>
       <author><first>Chris</first><last>van der Lee</last><affiliation>Tilburg University</affiliation></author>
       <author><first>Yiru</first><last>Li</last><affiliation>Groningen University</affiliation></author>
@@ -59,7 +59,7 @@
       <author><first>Ondřej</first><last>Plátek</last><affiliation>Charles University Prague</affiliation></author>
       <author><first>Verena</first><last>Rieser</last><affiliation>Heriot-Watt University</affiliation></author>
       <author><first>Jie</first><last>Ruan</last><affiliation>Peking University</affiliation></author>
-      <author><first>Joel</first><last>Tetreault</last><affiliation>Dataminr</affiliation></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last><affiliation>Dataminr</affiliation></author>
       <author><first>Antonio</first><last>Toral</last><affiliation>Groningen University</affiliation></author>
       <author><first>Xiaojun</first><last>Wan</last><affiliation>Peking University</affiliation></author>
       <author><first>Leo</first><last>Wanner</last><affiliation>Universitat Pompeu Fabra</affiliation></author>
@@ -98,7 +98,7 @@
       <author><first>Qi Chwen</first><last>Ong</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Duy</first><last>Phung</last><affiliation>VinAI Research Vietnam</affiliation></author>
       <author><first>Mathieu</first><last>Ravaut</last><affiliation>Nanyang Technological University</affiliation></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University; Salesforce AI Research</affiliation></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University; Salesforce AI Research</affiliation></author>
       <author><first>Josip</first><last>Car</last><affiliation>LKCMedicine, NTU Singapore</affiliation></author>
       <pages>19-32</pages>
       <abstract>Low-quality data can cause downstream problems in high-stakes applications. Data-centric approach emphasizes on improving dataset quality to enhance model performance. High-quality datasets are needed for general-purpose Large Language Models (LLMs) training, as well as for domain-specific models, which are usually small in size as it is costly to engage a large number of domain experts for their creation. Thus, it is vital to ensure high-quality domain-specific training data. In this paper, we propose a framework for enhancing the data quality of original datasets. (Code and dataset are available at https://github.com/IvaBojic/framework). We applied the proposed framework to four biomedical datasets and showed relative improvement of up to 33%/40% for fine-tuning of retrieval/reader models on the BioASQ dataset when using back translation to enhance the original dataset quality.</abstract>
@@ -111,7 +111,7 @@
       <title>Encoding Sentence Position in Context-Aware Neural Machine Translation with Concatenation</title>
       <author><first>Lorenzo</first><last>Lupo</last><affiliation>LIG</affiliation></author>
       <author><first>Marco</first><last>Dinarelli</last><affiliation>LIG</affiliation></author>
-      <author><first>Laurent</first><last>Besacier</last><affiliation>Naver Labs Europe</affiliation></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last><affiliation>Naver Labs Europe</affiliation></author>
       <pages>33-44</pages>
       <abstract>Context-aware translation can be achieved by processing a concatenation of consecutive sentences with the standard Transformer architecture. This paper investigates the intuitive idea of providing the model with explicit information about the position of the sentences contained in the concatenation window. We compare various methods to encode sentence positions into token representations, including novel methods. Our results show that the Transformer benefits from certain sentence position encoding methods on English to Russian translation, if trained with a context-discounted loss. However, the same benefits are not observed on English to German. Further empirical efforts are necessary to define the conditions under which the proposed approach is beneficial.</abstract>
       <url hash="939d268f">2023.insights-1.4</url>
@@ -215,8 +215,8 @@
     <paper id="13">
       <title>What Does <fixed-case>BERT</fixed-case> actually Learn about Event Coreference? Probing Structural Information in a Fine-Tuned <fixed-case>D</fixed-case>utch Language Model</title>
       <author><first>Loic</first><last>De Langhe</last><affiliation>Ghent University</affiliation></author>
-      <author><first>Orphee</first><last>De Clercq</last><affiliation>LT3, Ghent University</affiliation></author>
-      <author><first>Veronique</first><last>Hoste</last><affiliation>LT3, Ghent University</affiliation></author>
+      <author id="orphee-de-clercq"><first>Orphee</first><last>De Clercq</last><affiliation>LT3, Ghent University</affiliation></author>
+      <author id="veronique-hoste"><first>Veronique</first><last>Hoste</last><affiliation>LT3, Ghent University</affiliation></author>
       <pages>103-108</pages>
       <abstract>We probe structural and discourse aspects of coreferential relationships in a fine-tuned Dutch BERT event coreference model. Previous research has suggested that no such knowledge is encoded in BERT-based models and the classification of coreferential relationships ultimately rests on outward lexical similarity. While we show that BERT can encode a (very) limited number of these discourse aspects (thus disproving assumptions in earlier research), we also note that knowledge of many structural features of coreferential relationships is absent from the encodings generated by the fine-tuned BERT model.</abstract>
       <url hash="f24b1876">2023.insights-1.13</url>
diff --git a/data/xml/2023.isa.xml b/data/xml/2023.isa.xml
index 7203c8408d..5598e339e2 100644
--- a/data/xml/2023.isa.xml
+++ b/data/xml/2023.isa.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2023-10-22" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 19th Joint ACL-ISO Workshop on Interoperable Semantics (ISA-19)</booktitle>
-      <editor><first>Harry</first><last>Bunt</last></editor>
+      <editor id="harry-bunt"><first>Harry</first><last>Bunt</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Nancy, France</address>
       <month>June</month>
@@ -22,10 +22,10 @@
       <author><first>Kathryn</first><last>Conger</last></author>
       <author><first>Anatole</first><last>Gershman</last></author>
       <author><first>Rosario</first><last>Uceda-Sosa</last></author>
-      <author><first>Susan Windisch</first><last>Brown</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
-      <author><first>Peter</first><last>Anick</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="susan-windisch-brown"><first>Susan Windisch</first><last>Brown</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
+      <author id="peter-anick"><first>Peter</first><last>Anick</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>1–10</pages>
       <abstract>With 102,530,067 items currently in its crowd-sourced knowledge base, Wikidata provides NLP practitioners a unique and powerful resource for inference and reasoning over real-world entities. However, because Wikidata is very entity focused, <i>events</i> and <i>actions</i> are often labeled with eventive nouns (e.g., the process of diagnosing a person’s illness is labeled “diagnosis”), and the typical participants in an event are not described or linked to that event concept (e.g., the medical professional or patient). Motivated by a need for an adaptable, comprehensive, domain-flexible ontology for information extraction, including identifying the roles entities are playing in an event, we present a curated subset of Wikidata in which events have been enriched with PropBank roles. To enable richer narrative understanding between events from Wikidata concepts, we have also provided a comprehensive mapping from temporal Qnodes and Pnodes to the Allen Interval Temporal Logic relations.</abstract>
       <url hash="7cef5c6f">2023.isa-1.1</url>
@@ -98,7 +98,7 @@
       <title>An Abstract Specification of <fixed-case>V</fixed-case>ox<fixed-case>ML</fixed-case> as an Annotation Language</title>
       <author><first>Kiyong</first><last>Lee</last></author>
       <author><first>Nikhil</first><last>Krishnaswamy</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <pages>66–74</pages>
       <abstract>VoxML is a modeling language used to map natural language expressions into real time visualizations using real-world semantic knowledge of objects and events. Its utility has been demonstrated in embodied simulation environmens and in agent-object interactions in situated human-agent communicative. It is enriched to work with notions of affordances, both Gibsonian and Telic, and habitat for various interactions between the rational agent (human) and an object. This paper aims to specify VoxML as an annotation language in general abstract terms. It then shows how it works on annotating linguistic data that express visually perceptible human-object interactions. The annotation structures thus generated will be interpreted against the enriched minimal model created by VoxML as a modeling language while supporting the modeling purposes of VoxML linguistically.</abstract>
       <url hash="9ebfabc0">2023.isa-1.9</url>
diff --git a/data/xml/2023.iwcs.xml b/data/xml/2023.iwcs.xml
index f28d5614f8..1b1d66edd9 100644
--- a/data/xml/2023.iwcs.xml
+++ b/data/xml/2023.iwcs.xml
@@ -37,7 +37,7 @@
     <paper id="3">
       <title>Probing <fixed-case>BERT</fixed-case>’s ability to encode sentence modality and modal verb sense across varieties of <fixed-case>E</fixed-case>nglish</title>
       <author><first>Jonas</first><last>Wagner</last></author>
-      <author><first>Sina</first><last>Zarrieß</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
       <pages>28–38</pages>
       <abstract>In this research, we investigate whether BERT can differentiate between modal verb senses and sentence modalities and whether it performs equally well on different varieties of English. We fit probing classifiers under two conditions: contextualised embeddings of modal verbs and sentence embeddings. We also investigate BERT’s ability to predict masked modal verbs. Additionally, we classify separately for each modal verb to investigate whether BERT encodes different representations of senses for each individual verb. Lastly, we employ classifiers on data from different varieties of English to determine whether non-American English data is an additional hurdle. Results indicate that BERT has different representations for distinct senses for each modal verb, but does not represent modal sense independently from modal verbs. We also show that performance in different varieties of English is not equal, pointing to a necessary shift in the way we train large language models towards more linguistic diversity. We make our annotated dataset of modal sense in different varieties of English available at https://github.com/wagner-jonas/VEM.</abstract>
       <url hash="97483327">2023.iwcs-1.3</url>
@@ -49,7 +49,7 @@
       <author><first>Kyeongmin</first><last>Rim</last></author>
       <author><first>Eben</first><last>Holderness</last></author>
       <author><first>Bingyang</first><last>Ye</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <pages>39–49</pages>
       <abstract>Understanding inferences from text requires more than merely recovering surface arguments, adjuncts, or strings associated with the query terms. As humans, we interpret sentences as contextualized components of a narrative or discourse, by both filling in missing information, and reasoning about event consequences. In this paper, we define the process of rewriting a textual expression (lexeme or phrase) such that it reduces ambiguity while also making explicit the underlying semantics that is not (necessarily) expressed in the economy of sentence structure as Dense Paraphrasing (DP). We apply the DP techniques on the English procedural texts from the cooking recipe domain, and provide the scope and design of the application that involves creating a graph representation of events and generating hidden arguments through paraphrasing. We provide insights on how this DP process can enrich a source text by showing that the dense-paraphrased event graph is a good resource to large LLMs such as GPT-3 to generate reliable paraphrases; and by experimenting baselines for automaticDP generation. Finally, we demonstrate the utility of the dataset and event graph structure by providing a case study on the out-of-domain modeling and different DP prompts and GPT models for paraphrasing.</abstract>
       <url hash="af822f5b">2023.iwcs-1.4</url>
@@ -59,7 +59,7 @@
       <title>Towards Unsupervised Compositional Entailment with Multi-Graph Embedding Models</title>
       <author><first>Lorenzo</first><last>Bertolini</last></author>
       <author><first>Julie</first><last>Weeds</last></author>
-      <author><first>David</first><last>Weir</last></author>
+      <author id="david-weir"><first>David</first><last>Weir</last></author>
       <pages>50–61</pages>
       <abstract>Compositionality and inference are essential features of human language, and should hence be simultaneously accessible to a model of meaning. Despite being theory-grounded, distributional models can only be directly tested on compositionality, usually through similarity judgements, while testing for inference requires external resources. Recent work has shown that knowledge graph embeddings (KGE) architectures can be used to train distributional models capable of learning syntax-aware compositional representations, by training on syntactic graphs. We propose to expand such work with Multi-Graphs embedding (MuG) models, a new set of models learning from syntactic and knowledge-graphs. Using a phrase-level inference task, we show how MuGs can simultaneously handle syntax-aware composition and inference, and remain competitive distributional models with respect to lexical and compositional similarity.</abstract>
       <url hash="2cbeb3d6">2023.iwcs-1.5</url>
@@ -68,7 +68,7 @@
     <paper id="6">
       <title>Gender-tailored Semantic Role Profiling for <fixed-case>G</fixed-case>erman</title>
       <author><first>Manfred</first><last>Klenner</last></author>
-      <author><first>Anne</first><last>Göhring</last></author>
+      <author id="anne-gohring"><first>Anne</first><last>Göhring</last></author>
       <author><first>Alison</first><last>Kim</last></author>
       <author><first>Dylan</first><last>Massey</last></author>
       <pages>62–66</pages>
@@ -125,7 +125,7 @@
     <paper id="12">
       <title>The Universe of Utterances According to <fixed-case>BERT</fixed-case></title>
       <author><first>Dmitry</first><last>Nikolaev</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <pages>99–105</pages>
       <abstract>It has been argued that BERT “rediscovers the traditional NLP pipeline”, with lower layers extracting morphosyntactic features and higher layers creating holistic sentence-level representations. In this paper, we critically examine this assumption through a principle-component-guided analysis, extracing sets of inputs that correspond to specific activation patterns in BERT sentence representations. We find that even in higher layers, the model mostly picks up on a variegated bunch of low-level features, many related to sentence complexity, that presumably arise from its specific pre-training objectives.</abstract>
       <url hash="0fce0a15">2023.iwcs-1.12</url>
@@ -145,7 +145,7 @@
       <title>Semantically Informed Data Augmentation for Unscoped Episodic Logical Forms</title>
       <author><first>Mandar</first><last>Juvekar</last></author>
       <author><first>Gene</first><last>Kim</last></author>
-      <author><first>Lenhart</first><last>Schubert</last></author>
+      <author id="lenhart-schubert"><first>Lenhart</first><last>Schubert</last></author>
       <pages>116–133</pages>
       <abstract>Unscoped Logical Form (ULF) of Episodic Logic is a meaning representation format that captures the overall semantic type structure of natural language while leaving certain finer details, such as word sense and quantifier scope, underspecified for ease of parsing and annotation. While a learned parser exists to convert English to ULF, its performance is severely limited by the lack of a large dataset to train the system. We present a ULF dataset augmentation method that samples type-coherent ULF expressions using the ULF semantic type system and filters out samples corresponding to implausible English sentences using a pretrained language model. Our data augmentation method is configurable with parameters that trade off between plausibility of samples with sample novelty and augmentation size. We find that the best configuration of this augmentation method substantially improves parser performance beyond using the existing unaugmented dataset.</abstract>
       <url hash="beca199d">2023.iwcs-1.14</url>
@@ -181,7 +181,7 @@
     </paper>
     <paper id="18">
       <title><fixed-case>R</fixed-case>a<fixed-case>TE</fixed-case>: a Reproducible automatic Taxonomy Evaluation by Filling the Gap</title>
-      <author><first>Phillippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Phillippe</first><last>Langlais</last></author>
       <author><first>Tianjian Lucas</first><last>Gao</last></author>
       <pages>173–182</pages>
       <abstract>Taxonomies are an essential knowledge representation, yet most studies on automatic taxonomy construction (ATC) resort to manual evaluation to score proposed algorithms. We argue that automatic taxonomy evaluation (ATE) is just as important as taxonomy construction. We propose RaTE, an automatic label-free taxonomy scoring procedure, which relies on a large pre-trained language model. We apply our evaluation procedure to three state-of-the-art ATC algorithms with which we built seven taxonomies from the Yelp domain, and show that 1) RaTE correlates well with human judgments and 2) artificially degrading a taxonomy leads to decreasing RaTE score.</abstract>
@@ -193,10 +193,10 @@
       <author><first>Juntao</first><last>Yu</last></author>
       <author><first>Michal</first><last>Novák</last></author>
       <author><first>Abdulrahman</first><last>Aloraini</last></author>
-      <author><first>Nafise Sadat</first><last>Moosavi</last></author>
+      <author id="nafise-sadat-moosavi"><first>Nafise Sadat</first><last>Moosavi</last></author>
       <author><first>Silviu</first><last>Paun</last></author>
-      <author><first>Sameer</first><last>Pradhan</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="sameer-pradhan"><first>Sameer</first><last>Pradhan</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>183–194</pages>
       <abstract>The aim of the Universal Anaphora initiative is to push forward the state of the art both in anaphora (coreference) annotation and in the evaluation of models for anaphora resolution. The first release of the Universal Anaphora Scorer (Yu et al., 2022b) supported the scoring not only of identity anaphora as in the Reference Coreference Scorer (Pradhan et al., 2014) but also of split antecedent anaphoric reference, bridging references, and discourse deixis. That scorer was used in the CODI-CRAC 2021/2022 Shared Tasks on Anaphora Resolution in Dialogues (Khosla et al., 2021; Yu et al., 2022a). A modified version of the scorer supporting discontinuous markables and the COREFUD markup format was also used in the CRAC 2022 Shared Task on Multilingual Coreference Resolution (Zabokrtsky et al., 2022). In this paper, we introduce the second release of the scorer, merging the two previous versions, which can score reference with discontinuous markables and zero anaphora resolution.</abstract>
       <url hash="b6eaf1be">2023.iwcs-1.19</url>
@@ -233,7 +233,7 @@
     <paper id="23">
       <title>The argument–adjunct distinction in <fixed-case>BERT</fixed-case>: A <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et-based investigation</title>
       <author><first>Dmitry</first><last>Nikolaev</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <pages>233–239</pages>
       <abstract>The distinction between arguments and adjuncts is a fundamental assumption of several linguistic theories. In this study, we investigate to what extent this distinction is picked up by a Transformer-based language model. We use BERT as a case study, operationalizing arguments and adjuncts as core and non-core FrameNet frame elements, respectively, and tying them to activations of particular BERT neurons. We present evidence, from English and Korean, that BERT learns more dedicated representations for arguments than for adjuncts when fine-tuned on the FrameNet frame-identification task. We also show that this distinction is already present in a weaker form in the vanilla pre-trained model.</abstract>
       <url hash="4becba3d">2023.iwcs-1.23</url>
@@ -304,7 +304,7 @@
     <paper id="30">
       <title>Use Defines Possibilities: Reasoning about Object Function to Interpret and Execute Robot Instructions</title>
       <author><first>Mollie</first><last>Shichman</last></author>
-      <author><first>Claire</first><last>Bonial</last></author>
+      <author id="claire-bonial"><first>Claire</first><last>Bonial</last></author>
       <author><first>Austin</first><last>Blodgett</last></author>
       <author><first>Taylor</first><last>Hudson</last></author>
       <author><first>Francis</first><last>Ferraro</last></author>
diff --git a/data/xml/2023.iwslt.xml b/data/xml/2023.iwslt.xml
index de22ca75ec..5bf2bf9423 100644
--- a/data/xml/2023.iwslt.xml
+++ b/data/xml/2023.iwslt.xml
@@ -23,14 +23,14 @@
       <author><first>Sweta</first><last>Agrawal</last><affiliation>UMD</affiliation></author>
       <author><first>Antonios</first><last>Anastasopoulos</last><affiliation>GMU</affiliation></author>
       <author><first>Luisa</first><last>Bentivogli</last><affiliation>FBK</affiliation></author>
-      <author><first>Ondřej</first><last>Bojar</last><affiliation>Charles U.</affiliation></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last><affiliation>Charles U.</affiliation></author>
       <author><first>Claudia</first><last>Borg</last><affiliation>U. Malta</affiliation></author>
       <author><first>Marine</first><last>Carpuat</last><affiliation>UMD</affiliation></author>
       <author><first>Roldano</first><last>Cattoni</last><affiliation>FBK</affiliation></author>
       <author><first>Mauro</first><last>Cettolo</last><affiliation>FBK</affiliation></author>
       <author><first>Mingda</first><last>Chen</last><affiliation>Meta</affiliation></author>
       <author><first>William</first><last>Chen</last><affiliation>CMU</affiliation></author>
-      <author><first>Khalid</first><last>Choukri</last><affiliation>ELDA</affiliation></author>
+      <author id="khalid-choukri"><first>Khalid</first><last>Choukri</last><affiliation>ELDA</affiliation></author>
       <author><first>Alexandra</first><last>Chronopoulou</last><affiliation>LMU</affiliation></author>
       <author><first>Anna</first><last>Currey</last><affiliation>AWS</affiliation></author>
       <author><first>Thierry</first><last>Declerck</last><affiliation>DFKI</affiliation></author>
@@ -55,9 +55,9 @@
       <author><first>Paul</first><last>McNamee</last><affiliation>JHU</affiliation></author>
       <author><first>John</first><last>P. McCrae</last><affiliation>U. Galway</affiliation></author>
       <author><first>Kenton</first><last>Murray</last><affiliation>JHU</affiliation></author>
-      <author><first>Maria</first><last>Nadejde</last><affiliation>AWS</affiliation></author>
+      <author id="maria-nadejde"><first>Maria</first><last>Nadejde</last><affiliation>AWS</affiliation></author>
       <author><first>Satoshi</first><last>Nakamura</last><affiliation>NAIST</affiliation></author>
-      <author><first>Matteo</first><last>Negri</last><affiliation>FBK</affiliation></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last><affiliation>FBK</affiliation></author>
       <author><first>Ha</first><last>Nguyen</last><affiliation>Avignon U.</affiliation></author>
       <author><first>Jan</first><last>Niehues</last><affiliation>KIT</affiliation></author>
       <author><first>Xing</first><last>Niu</last><affiliation>AWS</affiliation></author>
@@ -65,19 +65,19 @@
       <author><first>John</first><last>E. Ortega</last><affiliation>Northeastern U.</affiliation></author>
       <author><first>Proyag</first><last>Pal</last><affiliation>U. Edinburgh</affiliation></author>
       <author><first>Juan</first><last>Pino</last><affiliation>Meta</affiliation></author>
-      <author><first>Lonneke</first><last>van der Plas</last><affiliation>IDIAP</affiliation></author>
+      <author id="lonneke-van-der-plas"><first>Lonneke</first><last>van der Plas</last><affiliation>IDIAP</affiliation></author>
       <author><first>Peter</first><last>Polák</last><affiliation>Charles U.</affiliation></author>
       <author><first>Elijah</first><last>Rippeth</last><affiliation>UMD</affiliation></author>
       <author><first>Elizabeth</first><last>Salesky</last><affiliation>JHU</affiliation></author>
       <author><first>Jiatong</first><last>Shi</last><affiliation>CMU</affiliation></author>
       <author><first>Matthias</first><last>Sperber</last><affiliation>Apple</affiliation></author>
-      <author><first>Sebastian</first><last>Stüker</last><affiliation>Zoom</affiliation></author>
+      <author id="sebastian-stuker"><first>Sebastian</first><last>Stüker</last><affiliation>Zoom</affiliation></author>
       <author><first>Katsuhito</first><last>Sudoh</last><affiliation>NAIST</affiliation></author>
       <author><first>Yun</first><last>Tang</last><affiliation>Meta</affiliation></author>
       <author><first>Brian</first><last>Thompson</last><affiliation>AWS</affiliation></author>
       <author><first>Kevin</first><last>Tran</last><affiliation>Meta</affiliation></author>
       <author><first>Marco</first><last>Turchi</last><affiliation>Zoom</affiliation></author>
-      <author><first>Alex</first><last>Waibel</last><affiliation>CMU</affiliation></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last><affiliation>CMU</affiliation></author>
       <author><first>Mingxuan</first><last>Wang</last><affiliation>Bytedance</affiliation></author>
       <author><first>Shinji</first><last>Watanabe</last><affiliation>CMU</affiliation></author>
       <author><first>Rodolfo</first><last>Zevallos</last><affiliation>U. Pompeu Fabra</affiliation></author>
@@ -94,7 +94,7 @@
       <author><first>Elizabeth</first><last>Salesky</last><affiliation>Johns Hopkins University</affiliation></author>
       <author><first>Kareem</first><last>Darwish</last><affiliation>aiXplain Inc.</affiliation></author>
       <author><first>Mohamed</first><last>Al-Badrashiny</last><affiliation>aiXplain inc.</affiliation></author>
-      <author><first>Mona</first><last>Diab</last><affiliation>Meta Responsible AI</affiliation></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last><affiliation>Meta Responsible AI</affiliation></author>
       <author><first>Jan</first><last>Niehues</last><affiliation>Karlsruhe Institut of Technology</affiliation></author>
       <pages>62-78</pages>
       <abstract>We present the ACL 60/60 evaluation sets for multilingual translation of ACL 2022 technical presentations into 10 target languages. This dataset enables further research into multilingual speech translation under realistic recording conditions with unsegmented audio and domain-specific terminology, applying NLP tools to text and speech in the technical domain, and evaluating and improving model robustness to diverse speaker demographics.</abstract>
@@ -123,7 +123,7 @@
     <paper id="4">
       <title>Improving End-to-End Speech Translation by Imitation-Based Knowledge Distillation with Synthetic Transcripts</title>
       <author><first>Rebekka</first><last>Hubert</last><affiliation>Heidelberg University</affiliation></author>
-      <author><first>Artem</first><last>Sokolov</last><affiliation>Google</affiliation></author>
+      <author id="artem-sokolov"><first>Artem</first><last>Sokolov</last><affiliation>Google</affiliation></author>
       <author><first>Stefan</first><last>Riezler</last><affiliation>Heidelberg University</affiliation></author>
       <pages>89-101</pages>
       <abstract>End-to-end automatic speech translation (AST) relies on data that combines audio inputs with text translation outputs. Previous work used existing large parallel corpora of transcriptions and translations in a knowledge distillation (KD) setup to distill a neural machine translation (NMT) into an AST student model. While KD allows using larger pretrained models, the reliance of previous KD approaches on manual audio transcripts in the data pipeline restricts the applicability of this framework to AST. We present an imitation learning approach where a teacher NMT system corrects the errors of an AST student without relying on manual transcripts. We show that the NMT teacher can recover from errors in automatic transcriptions and is able to correct erroneous translations of the AST student, leading to improvements of about 4 BLEU points over the standard AST end-to-end baseline on the English-German CoVoST-2 and MuST-C datasets, respectively. Code and data are publicly available: <url>https://github.com/HubReb/imitkd_ast/releases/tag/v1.1</url></abstract>
@@ -150,11 +150,11 @@
       <author><first>Thai</first><last>Binh Nguyen</last><affiliation>Karlsruhe Institute of Technology</affiliation></author>
       <author><first>Sai</first><last>Koneru</last><affiliation>Karlsruhe Institute of Technology</affiliation></author>
       <author><first>Enes</first><last>Yavuz Ugan</last><affiliation>Karlsruhe Institute of Technology</affiliation></author>
-      <author><first>Ngoc-Quan</first><last>Pham</last><affiliation>Karlsruhe Institute of Technology</affiliation></author>
+      <author id="ngoc-quan-pham"><first>Ngoc-Quan</first><last>Pham</last><affiliation>Karlsruhe Institute of Technology</affiliation></author>
       <author><first>Tuan</first><last>Nam Nguyen</last><affiliation>Karlsruhe Institute of Technology</affiliation></author>
       <author><first>Tu</first><last>Anh Dinh</last><affiliation>Karlsruhe Institute of Technology</affiliation></author>
       <author><first>Carlos</first><last>Mullov</last><affiliation>Karlsruhe Institute of Technology</affiliation></author>
-      <author><first>Alexander</first><last>Waibel</last><affiliation>Carnegie Mellon</affiliation></author>
+      <author id="alex-waibel"><first>Alexander</first><last>Waibel</last><affiliation>Carnegie Mellon</affiliation></author>
       <author><first>Jan</first><last>Niehues</last><affiliation>Karlsruhe Institut of Technology</affiliation></author>
       <pages>113-122</pages>
       <abstract>Many existing speech translation benchmarks focus on native-English speech in high-quality recording conditions, which often do not match the conditions in real-life use-cases. In this paper, we describe our speech translation system for the multilingual track of IWSLT 2023, which focuses on the translation of scientific conference talks. The test condition features accented input speech and terminology-dense contents. The tasks requires translation into 10 languages of varying amounts of resources. In absence of training data from the target domain, we use a retrieval-based approach (<tex-math>k</tex-math>NN-MT) for effective adaptation (<tex-math>+0.8</tex-math> BLEU for speech translation). We also use adapters to easily integrate incremental training data from data augmentation, and show that it matches the performance of re-training. We observe that cascaded systems are more easily adaptable towards specific target domains, due to their separate modules. Our cascaded speech system outperforms its end-to-end counterpart on scientific talk translation, although their performance remains similar on TED talks.</abstract>
@@ -174,7 +174,7 @@
     <paper id="8">
       <title>Enhancing Video Translation Context with Object Labels</title>
       <author><first>Jeremy</first><last>Gwinnup</last><affiliation>Air Force Research Laboratory</affiliation></author>
-      <author><first>Tim</first><last>Anderson</last><affiliation>Air Force Research Laboratory</affiliation></author>
+      <author id="tim-anderson"><first>Tim</first><last>Anderson</last><affiliation>Air Force Research Laboratory</affiliation></author>
       <author><first>Brian</first><last>Ore</last><affiliation>AFRL</affiliation></author>
       <author><first>Eric</first><last>Hansen</last><affiliation>Air Force Research Laboratory</affiliation></author>
       <author><first>Kevin</first><last>Duh</last><affiliation>Johns Hopkins University</affiliation></author>
@@ -222,7 +222,7 @@
       <title>Direct Models for Simultaneous Translation and Automatic Subtitling: <fixed-case>FBK</fixed-case>@<fixed-case>IWSLT</fixed-case>2023</title>
       <author><first>Sara</first><last>Papi</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
       <author><first>Marco</first><last>Gaido</last><affiliation>Fondazione Bruno Kessler, University of Trento</affiliation></author>
-      <author><first>Matteo</first><last>Negri</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
       <pages>159-168</pages>
       <abstract>This paper describes the FBK’s participation in the Simultaneous Translation and Automatic Subtitling tracks of the IWSLT 2023 Evaluation Campaign. Our submission focused on the use of direct architectures to perform both tasks: for the simultaneous one, we leveraged the knowledge already acquired by offline-trained models and directly applied a policy to obtain the real-time inference; for the subtitling one, we adapted the direct ST model to produce well-formed subtitles and exploited the same architecture to produce timestamps needed for the subtitle synchronization with audiovisual content. Our English-German SimulST system shows a reduced computational-aware latency compared to the one achieved by the top-ranked systems in the 2021 and 2022 rounds of the task, with gains of up to 3.5 BLEU. Our automatic subtitling system outperforms the only-existing solution based on a direct system by 3.7 and 1.7 SubER in English-German and English-Spanish respectively.</abstract>
       <url hash="c4023a91">2023.iwslt-1.11</url>
@@ -232,7 +232,7 @@
     <paper id="12">
       <title><fixed-case>MT</fixed-case> Metrics Correlate with Human Ratings of Simultaneous Speech Translation</title>
       <author><first>Dominik</first><last>Macháček</last><affiliation>Charles University, MFF UFAL</affiliation></author>
-      <author><first>Ondřej</first><last>Bojar</last><affiliation>Charles University, MFF UFAL</affiliation></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last><affiliation>Charles University, MFF UFAL</affiliation></author>
       <author><first>Raj</first><last>Dabre</last><affiliation>NICT</affiliation></author>
       <pages>169-179</pages>
       <abstract>There have been several meta-evaluation studies on the correlation between human ratings and offline machine translation (MT) evaluation metrics such as BLEU, chrF2, BertScore and COMET. These metrics have been used to evaluate simultaneous speech translation (SST) but their correlations with human ratings of SST, which has been recently collected as Continuous Ratings (CR), are unclear. In this paper, we leverage the evaluations of candidate systems submitted to the English-German SST task at IWSLT 2022 and conduct an extensive correlation analysis of CR and the aforementioned metrics. Our study reveals that the offline metrics are well correlated with CR and can be reliably used for evaluating machine translation in simultaneous mode, with some limitations on the test set size. We conclude that given the current quality levels of SST, these metrics can be used as proxies for CR, alleviating the need for large scale human evaluation. Additionally, we observe that correlations of the metrics with translation as a reference is significantly higher than with simultaneous interpreting, and thus we recommend the former for reliable evaluation.</abstract>
@@ -394,7 +394,7 @@
       <author><first>Parnia</first><last>Bahar</last><affiliation>AppTek</affiliation></author>
       <author><first>Patrick</first><last>Wilken</last><affiliation>AppTek</affiliation></author>
       <author><first>Javier</first><last>Iranzo-Sánchez</last><affiliation>Universitat Politcnica de Valncia</affiliation></author>
-      <author><first>Mattia</first><last>Di Gangi</last><affiliation>AppTek GmbH</affiliation></author>
+      <author id="mattia-a-di-gangi"><first>Mattia</first><last>Di Gangi</last><affiliation>AppTek GmbH</affiliation></author>
       <author><first>Evgeny</first><last>Matusov</last><affiliation>AppTek</affiliation></author>
       <author><first>Zoltán</first><last>Tüske</last><affiliation>IBM Research</affiliation></author>
       <pages>251-260</pages>
@@ -448,7 +448,7 @@
       <author><first>Neha</first><last>Verma</last><affiliation>Johns Hopkins University</affiliation></author>
       <author><first>Thomas</first><last>Thebaud</last><affiliation>Johns Hopkins University</affiliation></author>
       <author><first>Matthew</first><last>Wiesner</last><affiliation>Johns Hopkins University</affiliation></author>
-      <author><first>Sanjeev</first><last>Khudanpur</last><affiliation>Johns Hopkins University</affiliation></author>
+      <author id="sanjeev-khudanpur"><first>Sanjeev</first><last>Khudanpur</last><affiliation>Johns Hopkins University</affiliation></author>
       <pages>283-290</pages>
       <abstract>This paper presents JHU’s submissions to the IWSLT 2023 dialectal and low-resource track of Tunisian Arabic to English speech translation. The Tunisian dialect lacks formal orthography and abundant training data, making it challenging to develop effective speech translation (ST) systems. To address these challenges, we explore the integration of large pre-trained machine translation (MT) models, such as mBART and NLLB-200 in both end-to-end (E2E) and cascaded speech translation (ST) systems. We also improve the performance of automatic speech recognition (ASR) through the use of pseudo-labeling data augmentation and channel matching on telephone data. Finally, we combine our E2E and cascaded ST systems with Minimum Bayes-Risk decoding. Our combined system achieves a BLEU score of 21.6 and 19.1 on test2 and test3, respectively.</abstract>
       <url hash="9a9b1d46">2023.iwslt-1.26</url>
@@ -473,7 +473,7 @@
       <author><first>Bismarck</first><last>Bamfo Odoom</last><affiliation>Johns Hopkins University</affiliation></author>
       <author><first>Ujvala</first><last>Pradeep</last><affiliation>Johns Hopkins University</affiliation></author>
       <author><first>Matthew</first><last>Wiesner</last><affiliation>Johns Hopkins University</affiliation></author>
-      <author><first>Sanjeev</first><last>Khudanpur</last><affiliation>Johns Hopkins University</affiliation></author>
+      <author id="sanjeev-khudanpur"><first>Sanjeev</first><last>Khudanpur</last><affiliation>Johns Hopkins University</affiliation></author>
       <pages>302-310</pages>
       <abstract>We describe the Johns Hopkins ACL 60-60 Speech Translation systems submitted to the IWSLT 2023 Multilingual track, where we were tasked to translate ACL presentations from English into 10 languages. We developed cascaded speech translation systems for both the constrained and unconstrained subtracks. Our systems make use of pre-trained models as well as domain-specific corpora for this highly technical evaluation-only task. We find that the specific technical domain which ACL presentations fall into presents a unique challenge for both ASR and MT, and we present an error analysis and an ACL-specific corpus we produced to enable further work in this area.</abstract>
       <url hash="f849c76e">2023.iwslt-1.28</url>
@@ -609,10 +609,10 @@
       <title>Towards Efficient Simultaneous Speech Translation: <fixed-case>CUNI</fixed-case>-<fixed-case>KIT</fixed-case> System for Simultaneous Track at <fixed-case>IWSLT</fixed-case> 2023</title>
       <author><first>Peter</first><last>Polák</last><affiliation>Charles University, MFF UFAL</affiliation></author>
       <author><first>Danni</first><last>Liu</last><affiliation>Karlsruhe Institute of Technology</affiliation></author>
-      <author><first>Ngoc-Quan</first><last>Pham</last><affiliation>Karlsruhe Institute of Technology</affiliation></author>
+      <author id="ngoc-quan-pham"><first>Ngoc-Quan</first><last>Pham</last><affiliation>Karlsruhe Institute of Technology</affiliation></author>
       <author><first>Jan</first><last>Niehues</last><affiliation>Karlsruhe Institut of Technology</affiliation></author>
-      <author><first>Alexander</first><last>Waibel</last><affiliation>Carnegie Mellon</affiliation></author>
-      <author><first>Ondřej</first><last>Bojar</last><affiliation>Charles University, MFF UFAL</affiliation></author>
+      <author id="alex-waibel"><first>Alexander</first><last>Waibel</last><affiliation>Carnegie Mellon</affiliation></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last><affiliation>Charles University, MFF UFAL</affiliation></author>
       <pages>389-396</pages>
       <abstract>In this paper, we describe our submission to the Simultaneous Track at IWSLT 2023. This year, we continue with the successful setup from the last year, however, we adopt the latest methods that further improve the translation quality. Additionally, we propose a novel online policy for attentional encoder-decoder models. The policy prevents the model to generate translation beyond the current speech input by using an auxiliary CTC output layer. We show that the proposed simultaneous policy can be applied to both streaming blockwise models and offline encoder-decoder models. We observe significant improvements in quality (up to 1.1 BLEU) and the computational footprint (up to 45% relative RTF).</abstract>
       <url hash="a8342aa1">2023.iwslt-1.37</url>
@@ -654,7 +654,7 @@
       <author><first>Seungjun</first><last>Lee</last><affiliation>Korea University</affiliation></author>
       <author><first>Hyeonseok</first><last>Moon</last><affiliation>Korea University</affiliation></author>
       <author><first>Chanjun</first><last>Park</last><affiliation>Upstage</affiliation></author>
-      <author><first>Heuiseok</first><last>Lim</last><affiliation>Korea University</affiliation></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last><affiliation>Korea University</affiliation></author>
       <pages>420-432</pages>
       <abstract>In this paper, we present the KU x Upstage team’s submission for the Special Task on Formality Control on Spoken Language Translation, which involves translating English into four languages with diverse grammatical formality markers. Our methodology comprises two primary components: 1) a language-specific data-driven approach, and 2) the generation of synthetic data through the employment of large-scale language models and empirically-grounded prompt engineering. By adapting methodologies and models to accommodate the unique linguistic properties of each language, we observe a notable enhancement in performance relative to the baseline, substantiating the heightened efficacy of data-driven approaches. Moreover, our devised prompt engineering strategy yields superior synthetic translation instances.</abstract>
       <url hash="16b2e8db">2023.iwslt-1.40</url>
@@ -671,7 +671,7 @@
       <author><first>Kurt</first><last>Micallef</last><affiliation>University of Malta</affiliation></author>
       <author><first>Ahnaf</first><last>Mozib Samin</last><affiliation>University of Malta</affiliation></author>
       <author><first>Andrea</first><last>DeMarco</last><affiliation>University of Malta</affiliation></author>
-      <author><first>Lonneke</first><last>van der Plas</last><affiliation>IDIAP</affiliation></author>
+      <author id="lonneke-van-der-plas"><first>Lonneke</first><last>van der Plas</last><affiliation>IDIAP</affiliation></author>
       <author><first>Claudia</first><last>Borg</last><affiliation>University of Malta</affiliation></author>
       <pages>433-441</pages>
       <abstract>For the 2023 IWSLT Maltese Speech Translation Task, UM-DFKI jointly presents a cascade solution which achieves 0.6 BLEU. While this is the first time that a Maltese speech translation task has been released by IWSLT, this paper explores previous solutions for other speech translation tasks, focusing primarily on low-resource scenarios. Moreover, we present our method of fine-tuning XLS-R models for Maltese ASR using a collection of multi-lingual speech corpora as well as the fine-tuning of the mBART model for Maltese to English machine translation.</abstract>
@@ -746,7 +746,7 @@
       <author><first>Jiaao</first><last>Zhan</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <author><first>Qian</first><last>Chen</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Boxing</first><last>Chen</last><affiliation>Huawei</affiliation></author>
-      <author><first>Wen</first><last>Wang</last><affiliation>Alibaba Group</affiliation></author>
+      <author id="wen-wang"><first>Wen</first><last>Wang</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Yu</first><last>Bai</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <author><first>Yang</first><last>Gao</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <pages>478-490</pages>
@@ -759,8 +759,8 @@
       <title>On the Copying Problem of Unsupervised <fixed-case>NMT</fixed-case>: A Training Schedule with a Language Discriminator Loss</title>
       <author><first>Yihong</first><last>Liu</last><affiliation>LMU Munich</affiliation></author>
       <author><first>Alexandra</first><last>Chronopoulou</last><affiliation>LMU Munich</affiliation></author>
-      <author><first>Hinrich</first><last>Schütze</last><affiliation>Center for Information and Language Processing, University of Munich</affiliation></author>
-      <author><first>Alexander</first><last>Fraser</last><affiliation>Ludwig-Maximilians-Universitt Mnchen</affiliation></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last><affiliation>Center for Information and Language Processing, University of Munich</affiliation></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last><affiliation>Ludwig-Maximilians-Universitt Mnchen</affiliation></author>
       <pages>491-502</pages>
       <abstract>Although unsupervised neural machine translation (UNMT) has achieved success in many language pairs, the copying problem, i.e., directly copying some parts of the input sentence as the translation, is common among distant language pairs, especially when low-resource languages are involved. We find this issue is closely related to an unexpected copying behavior during online back-translation (BT). In this work, we propose a simple but effective training schedule that incorporates a language discriminator loss. The loss imposes constraints on the intermediate translation so that the translation is in the desired language. By conducting extensive experiments on different language pairs, including similar and distant, high and low-resource languages, we find that our method alleviates the copying problem, thus improving the translation performance on low-resource languages.</abstract>
       <url hash="d1497fc5">2023.iwslt-1.48</url>
diff --git a/data/xml/2023.jeptalnrecital.xml b/data/xml/2023.jeptalnrecital.xml
index 74ccf5db56..6144dc4ede 100644
--- a/data/xml/2023.jeptalnrecital.xml
+++ b/data/xml/2023.jeptalnrecital.xml
@@ -3,7 +3,7 @@
   <volume id="long" ingest-date="2023-08-31" type="proceedings">
     <meta>
       <booktitle>Actes de CORIA-TALN 2023. Actes de la 30e Conférence sur le Traitement Automatique des Langues Naturelles (TALN), volume 1 : travaux de recherche originaux -- articles longs</booktitle>
-      <editor><first>Christophe</first><last>Servan</last></editor>
+      <editor id="christophe-servan"><first>Christophe</first><last>Servan</last></editor>
       <editor><first>Anne</first><last>Vilnat</last></editor>
       <publisher>ATALA</publisher>
       <address>Paris, France</address>
@@ -19,7 +19,7 @@
       <title>Étude de méthodes d’augmentation de données pour la reconnaissance d’entités nommées en astrophysique</title>
       <author><first>Atilla Kaan</first><last>Alkan</last></author>
       <author><first>Cyril</first><last>Grouin</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <pages>1–13</pages>
       <abstract>Dans cet article nous étudions l’intérêt de l’augmentation de données pour le repérage d’entités nommées en domaine de spécialité : l’astrophysique. Pour cela, nous comparons trois méthodes d’augmentation en utilisant deux récents corpus annotés du domaine : DEAL et TDAC, tous deux en anglais. Nous avons générés les données artificielles en utilisant des méthodes à base de règles et à base de modèles de langue. Les données ont ensuite été ajoutées de manière itérative pour affiner un système de détection d’entités. Les résultats permettent de constater un effet de seuil : ajouter des données artificielles au-delà d’une certaine quantité ne présente plus d’intérêt et peut dégrader la F-mesure. Sur les deux corpus, le seuil varie selon la méthode employée, et en fonction du modèle de langue utilisé. Cette étude met également en évidence que l’augmentation de données est plus efficace sur de petits corpus, ce qui est cohérent avec d’autres études antérieures. En effet, nos expériences montrent qu’il est possible d’améliorer de 1 point la F-mesure sur le corpus DEAL, et jusqu’à 2 points sur le corpus TDAC.</abstract>
       <url hash="34b9084e">2023.jeptalnrecital-long.1</url>
@@ -30,8 +30,8 @@
       <title>Towards a Robust Detection of Language Model-Generated Text: Is <fixed-case>C</fixed-case>hat<fixed-case>GPT</fixed-case> that easy to detect?</title>
       <author><first>Wissam</first><last>Antoun</last></author>
       <author><first>Virginie</first><last>Mouilleron</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
-      <author><first>Djamé</first><last>Seddah</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
       <pages>14–27</pages>
       <abstract>Recent advances in natural language processing (NLP) have led to the development of large language models (LLMs) such as ChatGPT. This paper proposes a methodology for developing and evaluating ChatGPT detectors for French text, with a focus on investigating their robustness on out-of-domain data and against common attack schemes. The proposed method involves translating an English dataset into French and training a classifier on the translated data. Results show that the detectors can effectively detect ChatGPT-generated text, with a degree of robustness against basic attack techniques in in-domain settings. However, vulnerabilities are evident in out-of-domain contexts, highlighting the challenge of detecting adversarial text. The study emphasizes caution when applying in-domain testing results to a wider variety of content. We provide our translated datasets and models as open-source resources.</abstract>
       <url hash="9a58daa5">2023.jeptalnrecital-long.2</url>
@@ -41,8 +41,8 @@
       <title>Cross-lingual Strategies for Low-resource Language Modeling: A Study on Five <fixed-case>I</fixed-case>ndic Dialects</title>
       <author><first>Niyati</first><last>Bafna</last></author>
       <author><first>Cristina</first><last>España-Bonet</last></author>
-      <author><first>Josef</first><last>Van Genabith</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>Van Genabith</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <author><first>Rachel</first><last>Bawden</last></author>
       <pages>28–42</pages>
       <abstract>Neural language models play an increasingly central role for language processing, given their success for a range of NLP tasks. In this study, we compare some canonical strategies in language modeling for low-resource scenarios, evaluating all models by their (finetuned) performance on a POS-tagging downstream task. We work with five (extremely) low-resource dialects from the Indic dialect continuum (Braj, Awadhi, Bhojpuri, Magahi, Maithili), which are closely related to each other and the standard mid-resource dialect, Hindi. The strategies we evaluate broadly include from-scratch pretraining, and cross-lingual transfer between the dialects as well as from different kinds of off-the- shelf multilingual models; we find that a model pretrained on other mid-resource Indic dialects and languages, with extended pretraining on target dialect data, consistently outperforms other models. We interpret our results in terms of dataset sizes, phylogenetic relationships, and corpus statistics, as well as particularities of this linguistic system.</abstract>
@@ -75,7 +75,7 @@
       <title>Tri-apprentissage génératif : génération de données pour de la reconnaissance d’entitées nommées semi-supervisé</title>
       <author><first>Hugo</first><last>Boulanger</last></author>
       <author><first>Thomas</first><last>Lavergne</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <pages>68–79</pages>
       <abstract>Le développement de solutions de traitement automatique de la langue pour de nouvelles tâches nécessite des données, dont l’obtention est coûteuses. L’accès aux données peut être limité en raison de la nature sensible des données. La plupart des travaux récents ont exploité de grands modèles pré-entraînés pour initialiser des versions spécialisées de ceux-ci. La spécialisation d’un tel modèle nécessite toujours une quantité élevée de données étiquetées spécifiques à la tâche cible. Nous utilisons l’apprentissage semi-supervisé pour entraîner des modèles dans un contexte où le nombre d’exemples étiquetés est limité et le nombre de données non étiquetées est nul. Nous étudions plusieurs méthodes pour générer le corpus non étiqueté nécessaire à l’utilisation de l’apprentissage semi-supervisé. Nous introduisons les méthodes de génération entre les épisodes d’entraînement et utilisons les modèles entraînés pour filtrer les exemples générés. Nous testons cette génération avec le tri-apprentissage et l’auto-apprentissage sur des corpus Anglais et Français.</abstract>
       <url hash="578633a9">2023.jeptalnrecital-long.6</url>
@@ -85,7 +85,7 @@
     <paper id="7">
       <title>Évaluation d’un générateur automatique de reformulations médicales</title>
       <author><first>Ioana</first><last>Buhnila</last></author>
-      <author><first>Amalia</first><last>Todirascu</last></author>
+      <author id="amalia-todirascu"><first>Amalia</first><last>Todirascu</last></author>
       <pages>80–93</pages>
       <abstract>Les textes médicaux sont difficiles à comprendre pour le grand public à cause des termes de spécialité. Ces notions médicales ont besoin d’être reformulées en utilisant des mots de la langue commune. La reformulation représente le processus de réécriture qui a le rôle d’expliquer ou simplifier une phrase ou syntagme. Nous présentons la méthodologie de construction d’un jeu de données original (termes et reformulations) permettant la détection et génération des nouvelles reformulations médicales. Pour compléter ce corpus, nous menons des expériences de génération automatique de reformulations médicales sous-phrastiques avec l’outil APT (Nighojkar &amp; Licato, 2021), qui s’appuie sur des techniques d’apprentissage profond. Nous adaptons le modèle de langue de type Transformer T5 (Raffel et al., 2020) avec des termes médicaux et leur reformulations annotés manuellement en français et en roumain, langue romane peu dotée en ressources pour le TAL. Nous présentons une analyse détaillée des résultats de la génération automatique des paraphrases.</abstract>
       <url hash="1cb0ca5f">2023.jeptalnrecital-long.7</url>
@@ -96,7 +96,7 @@
       <title>Étude comparative des plongements lexicaux pour l’extraction d’entités nommées en français</title>
       <author><first>Danrun</first><last>Cao</last></author>
       <author><first>Nicolat</first><last>Béchet</last></author>
-      <author><first>Pierre-François</first><last>Marteau</last></author>
+      <author id="pierre-francois-marteau"><first>Pierre-François</first><last>Marteau</last></author>
       <pages>94–104</pages>
       <abstract>Dans ce papier nous présentons une étude comparative des méthodes de plongements lexicaux pour le français sur la tâche de Reconnaissance d’entités nommées (REN). L’objectif est de comparer la performance de chaque méthode sur la même tâche et sous les mêmes conditions de travail. Nous utilisons comme corpus d’étude la proportion française du corpus WikiNER. Il s’agit d’un corpus de 3,5 millions tokens avec 4 types d’entités. 10 types de plongements lexicaux sont étudiés, y compris les plongements non-contextuels, des contextuels et éventuellement ceux à base de transformer. Pour chaque plongement, nous entraînons un BiLSTM-CRF comme classifieur. Pour les modèles à base de transformer, nous comparons également leur performance sous un autre cas d’usage: fine-tuning.</abstract>
       <url hash="81dfe71d">2023.jeptalnrecital-long.8</url>
@@ -127,7 +127,7 @@
       <author><first>Loïc</first><last>Fosse</last></author>
       <author><first>Duc Hau</first><last>Nguyen</last></author>
       <author><first>Pascale</first><last>Sébillot</last></author>
-      <author><first>Guillaume</first><last>Gravier</last></author>
+      <author id="guillaume-gravier"><first>Guillaume</first><last>Gravier</last></author>
       <pages>137–150</pages>
       <abstract>Plusieurs études ont mis en évidence l’anisotropie des plongements issus d’un modèle BERT au sein d’un énoncé, c’est-à-dire leur concentration dans une direction donnée, notamment dans une tâche de classification. Dans cet article, nous cherchons à mieux comprendre ce phénomène et comment cette convergence se construit en analysant finement les propriétés géométriques des plongements, des clés et des valeurs dans une couche d’auto-attention. Nous montrons que la direction vers laquelle les plongements s’alignent caractérise la classe d’appartenance de l’énoncé. Nous étudions ensuite le fonctionnement intrinsèque de la couche d’auto-attention et les mécanismes en jeu entre clés et valeurs pour garantir la construction d’une représentation anisotrope. Cette construction se fait de manière progressive lorsque plusieurs couches sont empilés. Elle s’avère également robuste à des contraintes externes sur la distribution des poids d’attention, compensées par le modèle en jouant sur les valeurs et les clés.</abstract>
       <url hash="59a68b57">2023.jeptalnrecital-long.11</url>
@@ -184,8 +184,8 @@
       <title>Classification de tweets en situation d’urgence pour la gestion de crises</title>
       <author><first>Romain</first><last>Meunier</last></author>
       <author><first>Leila</first><last>Moudjari</last></author>
-      <author><first>Farah</first><last>Benamara</last></author>
-      <author><first>Véronique</first><last>Moriceau</last></author>
+      <author id="farah-benamara"><first>Farah</first><last>Benamara</last></author>
+      <author id="veronique-moriceau"><first>Véronique</first><last>Moriceau</last></author>
       <author><first>Alda</first><last>Mari</last></author>
       <author><first>Patricia</first><last>Stolf</last></author>
       <pages>204–216</pages>
@@ -206,7 +206,7 @@
     <paper id="18">
       <title>Stratégies d’apprentissage actif pour la reconnaissance d’entités nommées en français</title>
       <author><first>Marco</first><last>Naguib</last></author>
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <author><first>Xavier</first><last>Tannier</last></author>
       <pages>232–247</pages>
       <abstract>L’annotation manuelle de corpus est un processus coûteux et lent, notamment pour la tâche de re-connaissance d’entités nommées. L’apprentissage actif vise à rendre ce processus plus efficace, ensélectionnant les portions les plus pertinentes à annoter. Certaines stratégies visent à sélectionner lesportions les plus représentatives du corpus, d’autres, les plus informatives au modèle de langage.Malgré un intérêt grandissant pour l’apprentissage actif, rares sont les études qui comparent cesdifférentes stratégies dans un contexte de reconnaissance d’entités nommées médicales. Nous pro-posons une comparaison de ces stratégies en fonction des performances de chacune sur 3 corpus dedocuments cliniques en langue française : MERLOT, QuaeroFrenchMed et E3C. Nous comparonsles stratégies de sélection mais aussi les différentes façons de les évaluer. Enfin, nous identifions lesstratégies qui semblent les plus efficaces et mesurons l’amélioration qu’elles présentent, à différentesphases de l’apprentissage.</abstract>
@@ -217,7 +217,7 @@
     <paper id="19">
       <title>Détecter une erreur dans les phrases coordonnées au sein des rédactions universitaires</title>
       <author><first>Laura</first><last>Noreskal</last></author>
-      <author><first>Iris</first><last>Eshkol-Taravella</last></author>
+      <author id="iris-eshkol"><first>Iris</first><last>Eshkol-Taravella</last></author>
       <author><first>Marianne</first><last>Desmets</last></author>
       <pages>248–261</pages>
       <abstract>Beaucoup d’étudiants rencontrent des difficultés dans la maîtrise du français écrit. Sur la base d’une enquête linguistique préliminaire, il est apparu que les constructions syntaxiques comprenant des coordinations et des constructions elliptiques forment des contextes linguistiques sensibles aux erreurs ou aux maladresses dans les écrits des étudiants. Notre recherche vise à développer un outil de détection automatique de phrases coordonnées erronées dans les rédactions des étudiants afin de leur permettre de s’auto-former en expression écrite. Après avoir constitué le corpus de phrases coordonnées extraites des différents écrits universitaires (exercices, examens, devoirs, rapports de stage et mémoires), nous avons établi une typologie des erreurs qui a servi de modèle pour l’annotation du corpus. Nous avons entrainé premièrement des classifieurs afin de détecter deux étiquettes: erronée et correcte puis, dans un second temps, un classifieur multi-label pour diagnostiquer l’erreur.</abstract>
@@ -267,8 +267,8 @@ In NLP, the automatic detection of logical contradictions between statements is
       <author><first>Anfu</first><last>Tang</last></author>
       <author><first>Robert</first><last>Bossy</last></author>
       <author><first>Louise</first><last>Deléger</last></author>
-      <author><first>Claire</first><last>Nédellec</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="claire-nedellec"><first>Claire</first><last>Nédellec</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <pages>298–310</pages>
       <abstract>L’intégration de connaissances externes dans les modèles neuronaux est très étudiée pour améliorer les performances des modèles de langue pré-entraînés, notamment en domaine biomédical. Dans cet article, nous explorons la contribution de plongements de bases de connaissances à une tâche d’extraction de relations. Pour deux mentions d’entités candidates dans un texte, nous faisons l’hypothèse que la connaissance de relations entre elles, issue d’une base de connaissances (BC) externe, aide à prédire l’existence d’une relation dans le texte, y compris lorsque les relations de BC sont différentes de celles du texte. Notre approche consiste à calculer des plongements du graphe de BC et à estimer la possibilité pour chaque paire d’entité du texte qu’elle soit reliée par une relation de BC. Les expériences menées sur trois tâches d’extraction de relations en domaine biomédical montrent que notre méthode surpasse le modèle PubMedBERT de base et donne des performances comparables aux méthodes de l’état de l’art.</abstract>
       <url hash="24effcf5">2023.jeptalnrecital-long.23</url>
@@ -278,7 +278,7 @@ In NLP, the automatic detection of logical contradictions between statements is
     <paper id="24">
       <title>Derrière les plongements de relations</title>
       <author><first>Hugo</first><last>Thomas</last></author>
-      <author><first>Guillaume</first><last>Gravier</last></author>
+      <author id="guillaume-gravier"><first>Guillaume</first><last>Gravier</last></author>
       <author><first>Pascale</first><last>Sébillot</last></author>
       <pages>311–322</pages>
       <abstract>Dans cet article, plutôt que nous arrêter aux scores de performance habituellement fournis (par ex. mesure F1), nous proposons une analyse approfondie, selon différents critères, des modélisations de relations employées par plusieurs architectures de modèles de typage de relations. Cette analyse vise à mieux comprendre l’organisation de l’espace latent des modélisations et ses propriétés, enjeu important pour les modèles se fondant sur les distances dans cet espace. Dans cet objectif d’analyse des plongements, nous étudions l’influence, sur ces modélisations, du vocabulaire, de la syntaxe, de la sémantique des relations, de la représentation des entités nommées liées, ainsi que la géométrie de leur espace latent. Il en ressort que les modélisations de relations sont apprises de manière inégale d’un modèle à un autre entraînés de la même manière ; dans ce cas, les indicateurs que nous proposons sont de nouveaux éléments de compréhension de l’espace latent d’un modèle afin de mieux exploiter ses propriétés.</abstract>
@@ -289,7 +289,7 @@ In NLP, the automatic detection of logical contradictions between statements is
     <paper id="25">
       <title><fixed-case>C</fixed-case>amem<fixed-case>BERT</fixed-case>-bio : Un modèle de langue français savoureux et meilleur pour la santé</title>
       <author><first>Rian</first><last>Touchent</last></author>
-      <author><first>Laurent</first><last>Romary</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
       <author><first>Eric</first><last>De La Clergerie</last></author>
       <pages>323–334</pages>
       <abstract>Les données cliniques dans les hôpitaux sont de plus en plus accessibles pour la recherche à travers les entrepôts de données de santé, cependant ces documents sont non-structurés. Il est donc nécessaire d’extraire les informations des comptes-rendus médicaux. L’utilisation du transfert d’apprentissage grâce à des modèles de type BERT comme CamemBERT ont permis des avancées majeures, notamment pour la reconnaissance d’entités nommées. Cependant, ces modèles sont entraînés pour le langage courant et sont moins performants sur des données biomédicales. C’est pourquoi nous proposons un nouveau jeu de données biomédical public français sur lequel nous avons poursuivi le pré-entraînement de CamemBERT. Ainsi, nous présentons une première version de CamemBERT-bio, un modèle public spécialisé pour le domaine biomédical français qui montre un gain de 2,54 points de F-mesure en moyenne sur différents jeux d’évaluations de reconnaissance d’entités nommées biomédicales.</abstract>
@@ -311,7 +311,7 @@ In NLP, the automatic detection of logical contradictions between statements is
     <paper id="27">
       <title>Exploring Data-Centric Strategies for <fixed-case>F</fixed-case>rench Patent Classification: A Baseline and Comparisons</title>
       <author><first>You</first><last>Zuo</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <author><first>Kim</first><last>Gerdes</last></author>
       <author><first>Houda</first><last>Mouzoun</last></author>
       <author><first>Samir</first><last>Ghamri Doudane</last></author>
@@ -324,7 +324,7 @@ In NLP, the automatic detection of logical contradictions between statements is
   <volume id="short" ingest-date="2023-08-31" type="proceedings">
     <meta>
       <booktitle>Actes de CORIA-TALN 2023. Actes de la 30e Conférence sur le Traitement Automatique des Langues Naturelles (TALN), volume 2 : travaux de recherche originaux -- articles courts</booktitle>
-      <editor><first>Christophe</first><last>Servan</last></editor>
+      <editor id="christophe-servan"><first>Christophe</first><last>Servan</last></editor>
       <editor><first>Anne</first><last>Vilnat</last></editor>
       <publisher>ATALA</publisher>
       <address>Paris, France</address>
@@ -341,7 +341,7 @@ In NLP, the automatic detection of logical contradictions between statements is
       <author><first>Nesrine</first><last>Bannour</last></author>
       <author><first>Xavier</first><last>Tannier</last></author>
       <author><first>Bastien</first><last>Rance</last></author>
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <pages>1–14</pages>
       <abstract>L’extraction de relations temporelles consiste à identifier et classifier la relation entre deux mentions. Néanmoins, la définition des mentions temporelles dépend largement du type du texte et du domained’application. En particulier, le texte clinique est complexe car il décrit des évènements se produisant à des moments différents et contient des informations redondantes et diverses expressions temporellesspécifiques au domaine. Dans cet article, nous proposons une nouvelle représentation des relations temporelles, qui est indépendante du domaine et de l’objectif de la tâche d’extraction. Nous nousintéressons à extraire la relation entre chaque portion du texte et la date de création du document. Nous formulons l’extraction de relations temporelles comme une tâche d’étiquetage de séquences.Une macro F-mesure de 0,8 est obtenue par un modèle neuronal entraîné sur des textes cliniques, écrits en français. Nous évaluons notre représentation temporelle par le positionnement temporel desévènements de toxicité des chimiothérapies.</abstract>
       <url hash="b9673c07">2023.jeptalnrecital-short.1</url>
@@ -363,7 +363,7 @@ In NLP, the automatic detection of logical contradictions between statements is
       <author><first>Hee-Soo</first><last>Choi</last></author>
       <author><first>Karën</first><last>Fort</last></author>
       <author><first>Bruno</first><last>Guillaume</last></author>
-      <author><first>Mathieu</first><last>Constant</last></author>
+      <author id="matthieu-constant"><first>Mathieu</first><last>Constant</last></author>
       <pages>23–36</pages>
       <abstract>Au début du XXIe siècle, le français faisait encore partie des langues peu dotées. Grâce aux efforts de la communauté française du traitement automatique des langues (TAL), de nombreuses ressources librement disponibles ont été produites, dont des lexiques du français. À travers cet article, nous nous intéressons à leur devenir dans la communauté par le prisme des actes de la conférence TALN sur une période de 20 ans.</abstract>
       <url hash="333fe639">2023.jeptalnrecital-short.3</url>
@@ -376,7 +376,7 @@ In NLP, the automatic detection of logical contradictions between statements is
       <title>Attention sur les spans pour l’analyse syntaxique en constituants</title>
       <author><first>Nicolas</first><last>Floquet</last></author>
       <author><first>Nadi</first><last>Tomeh</last></author>
-      <author><first>Joseph</first><last>Le Roux</last></author>
+      <author id="joseph-le-roux"><first>Joseph</first><last>Le Roux</last></author>
       <author><first>Thierry</first><last>Charnois</last></author>
       <pages>37–45</pages>
       <abstract>Nous présentons une extension aux analyseurs syntaxiques en constituants neuronaux modernes qui consiste à doter les constituants potentiels d’une représentation vectorielle affinée en fonction du contexte par plusieurs applications successives d’un module de type transformer efficace (pooling par attention puis transformation non-linéaire).Nous appliquons cette extension à l’analyseur CRF de Yu Zhang &amp; Al.Expérimentalement, nous testons cette extension sur deux corpus (PTB et FTB) avec ou sans vecteurs de mots dynamiques: cette extension permet d’avoir un gain constant dans toutes les configurations.</abstract>
@@ -389,7 +389,7 @@ In NLP, the automatic detection of logical contradictions between statements is
       <author><first>Nicolas</first><last>Hiebel</last></author>
       <author><first>Ferret</first><last>Olivier</last></author>
       <author><first>Karën</first><last>Fort</last></author>
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <pages>46–54</pages>
       <abstract>Les ressources textuelles disponibles dans le domaine biomédical sont rares pour des raisons de confidentialité. Des données existent mais ne sont pas partageables, c’est pourquoi il est intéressant de s’inspirer de ces données pour en générer de nouvelles sans contrainte de partage. Une difficulté majeure de la génération de données médicales est que les données générées doivent ressembler aux données originales sans compromettre leur confidentialité. L’évaluation de cette tâche est donc difficile. Dans cette étude, nous étendons l’évaluation de corpus cliniques générés en français en y ajoutant une dimension sémantique à l’aide de plongements de phrases. Nous recherchons des phrases proches à l’aide de similarité cosinus entre plongements, et analysons les scores de similarité. Nous observons que les phrases synthétiques sont thématiquement proches du corpus original, mais suffisamment éloignées pour ne pas être de simples reformulations qui compromettraient la confidentialité.</abstract>
       <url hash="f64bfee9">2023.jeptalnrecital-short.5</url>
@@ -445,7 +445,7 @@ In NLP, the automatic detection of logical contradictions between statements is
   <volume id="statement" ingest-date="2023-08-31" type="proceedings">
     <meta>
       <booktitle>Actes de CORIA-TALN 2023. Actes de la 30e Conférence sur le Traitement Automatique des Langues Naturelles (TALN), volume 3 : prises de position en TAL</booktitle>
-      <editor><first>Christophe</first><last>Servan</last></editor>
+      <editor id="christophe-servan"><first>Christophe</first><last>Servan</last></editor>
       <editor><first>Anne</first><last>Vilnat</last></editor>
       <publisher>ATALA</publisher>
       <address>Paris, France</address>
@@ -480,7 +480,7 @@ In NLP, the automatic detection of logical contradictions between statements is
   <volume id="international" ingest-date="2023-08-31" type="proceedings">
     <meta>
       <booktitle>Actes de CORIA-TALN 2023. Actes de la 30e Conférence sur le Traitement Automatique des Langues Naturelles (TALN), volume 4 : articles déjà soumis ou acceptés en conférence internationale</booktitle>
-      <editor><first>Christophe</first><last>Servan</last></editor>
+      <editor id="christophe-servan"><first>Christophe</first><last>Servan</last></editor>
       <editor><first>Anne</first><last>Vilnat</last></editor>
       <publisher>ATALA</publisher>
       <address>Paris, France</address>
@@ -498,8 +498,8 @@ In NLP, the automatic detection of logical contradictions between statements is
       <author><first>Hyun Jung</first><last>Kang</last></author>
       <author><first>Ismaël</first><last>Rousseau</last></author>
       <author><first>Ghislaine</first><last>Azémard</last></author>
-      <author><first>Frédéric</first><last>Béchet</last></author>
-      <author><first>Géraldine</first><last>Damnati</last></author>
+      <author id="frederic-bechet"><first>Frédéric</first><last>Béchet</last></author>
+      <author id="geraldine-damnati"><first>Géraldine</first><last>Damnati</last></author>
       <pages>1–9</pages>
       <abstract>Cette article présente une méthode d’exploration de documents basée sur la création d’un ensemble synthétique de questions et de réponses portant sur le corpus, ensemble qui est ensuite utilisé pour établir des liens explicables entre les documents. Nous menons une évaluation quantitative et qualitative des questions automatiquement générées en termes de leur forme et de leur pertinence pour l’exploration de la collection. De plus, nous présentons une étude quantitative des liens obtenus grâce à notre méthode sur une collection de document provenant d’archives numérisés.</abstract>
       <url hash="81930ce1">2023.jeptalnrecital-international.1</url>
@@ -532,7 +532,7 @@ In NLP, the automatic detection of logical contradictions between statements is
     <paper id="4">
       <title>Traduction à base d’exemples du texte vers une représentation hiérarchique de la langue des signes</title>
       <author><first>Elise</first><last>Bertin-Lemée</last></author>
-      <author><first>Annelies</first><last>Braffort</last></author>
+      <author id="annelies-braffort"><first>Annelies</first><last>Braffort</last></author>
       <author><first>Camille</first><last>Challant</last></author>
       <author><first>Claire</first><last>Danet</last></author>
       <author><first>Michael</first><last>Filhol</last></author>
@@ -560,7 +560,7 @@ In NLP, the automatic detection of logical contradictions between statements is
     </paper>
     <paper id="6">
       <title>Un mot, deux facettes : traces des opinions dans les représentations contextualisées des mots</title>
-      <author><first>Aina</first><last>Garí Soler</last></author>
+      <author id="aina-gari-soler"><first>Aina</first><last>Garí Soler</last></author>
       <author><first>Matthieu</first><last>Labeau</last></author>
       <author><first>Chloe</first><last>Clavel</last></author>
       <pages>49–57</pages>
@@ -584,10 +584,10 @@ In NLP, the automatic detection of logical contradictions between statements is
     <paper id="8">
       <title>Injection de connaissances temporelles dans la reconnaissance d’entités nommées historiques</title>
       <author><first>Carlos-Emiliano</first><last>González-Gallardo</last></author>
-      <author><first>Emanuela</first><last>Boros</last></author>
+      <author id="emanuela-boros"><first>Emanuela</first><last>Boros</last></author>
       <author><first>Edward</first><last>Giamphy</last></author>
       <author><first>Ahmed</first><last>Hamdi</last></author>
-      <author><first>Jose</first><last>Moreno</last></author>
+      <author id="jose-g-moreno"><first>Jose</first><last>Moreno</last></author>
       <author><first>Antoine</first><last>Doucet</last></author>
       <pages>65–73</pages>
       <abstract>Dans cet article, nous abordons la reconnaissance d’entités nommées dans des documents historiques multilingues. Cette tâche présente des multiples défis tels que les erreurs générées suite à la numérisa- tion et la reconnaissance optique des caractères de ces documents. En outre, les documents historiques posent un autre défi puisque leurs collections sont distribuées sur une période de temps assez longue et suivent éventuellement plusieurs conventions orthographiques qui évoluent au fil du temps. Nous explorons, dans ce travail, l’idée d’injecter des connaissance temporelles à l’aide de graphes pour une reconnaissance d’entités nommées plus performante. Plus précisément, nous récupérons des contextes supplémentaires, sémantiquement pertinents, en explorant les informations temporelles fournies par les collections historiques et nous les incluons en tant que représentations mises en commun dans un modèle NER basé sur un transformeur. Nous expérimentons avec deux collections récentes en anglais, français et allemand, composées de journaux historiques (19C-20C) et de commentaires classiques (19C). Les résultats montrent l’efficacité de l’injection de connaissances temporelles dans des ensembles de données, des langues et des types d’entités différents.</abstract>
@@ -598,10 +598,10 @@ In NLP, the automatic detection of logical contradictions between statements is
     <paper id="9">
       <title>Oui mais... <fixed-case>C</fixed-case>hat<fixed-case>GPT</fixed-case> peut-il identifier des entités dans des documents historiques ?</title>
       <author><first>Carlos-Emiliano</first><last>González-Gallardo</last></author>
-      <author><first>Emanuela</first><last>Boros</last></author>
+      <author id="emanuela-boros"><first>Emanuela</first><last>Boros</last></author>
       <author><first>Nancy</first><last>Girdhar</last></author>
       <author><first>Ahmed</first><last>Hamdi</last></author>
-      <author><first>Jose</first><last>Moreno</last></author>
+      <author id="jose-g-moreno"><first>Jose</first><last>Moreno</last></author>
       <author><first>Antoine</first><last>Doucet</last></author>
       <pages>74–82</pages>
       <abstract>Les modèles de langage de grande taille (LLM) sont exploités depuis plusieurs années maintenant, obtenant des performances de pointe dans la reconnaissance d’entités à partir de documents modernes. Depuis quelques mois, l’agent conversationnel ChatGPT a suscité beaucoup d’intérêt auprès de la communauté scientifique et du grand public en raison de sa capacité à générer des réponses plausibles. Dans cet article, nous explorons cette compétence à travers la tâche de reconnaissance et de classification d’entités nommées (NERC) dans des sources primaires (des journaux historiques et des commentaires classiques) d’une manière zero-shot et en la comparant avec les systèmes de pointe basés sur des modèles de langage. Nos résultats indiquent plusieurs lacunes dans l’identification des entités dans le texte historique, qui concernant la cohérence des guidelines d’annotation des entités, la complexité des entités et du changement de code et la spécificité du prompt. De plus, comme prévu, l’inaccessibilité des archives historiques a également un impact sur les performances de ChatGPT.</abstract>
@@ -624,8 +624,8 @@ In NLP, the automatic detection of logical contradictions between statements is
       <title>Effet de l’anthropomorphisme des machines sur le français adressé aux robots: Étude du débit de parole et de la fluence</title>
       <author><first>Natalia</first><last>Kalashnikova</last></author>
       <author><first>Mathilde</first><last>Hutin</last></author>
-      <author><first>Ioana</first><last>Vasilescu</last></author>
-      <author><first>Laurence</first><last>Devillers</last></author>
+      <author id="ioana-vasilescu"><first>Ioana</first><last>Vasilescu</last></author>
+      <author id="laurence-devillers"><first>Laurence</first><last>Devillers</last></author>
       <pages>92–100</pages>
       <abstract>“Robot-directed speech” désigne la parole adressée à un appareil robotique, des petites enceintes domestiques aux robots humanoïdes grandeur-nature. Les études passées ont analysé les propriétés phonétiques et linguistiques de ce type de parole ou encore l’effet de l’anthropomorphisme des appareils sur la sociabilité des interactions, mais l’effet de l’anthropomorphisme sur les réalisations linguistiques n’a encore jamais été exploré. Notre étude propose de combler ce manque avec l’analyse d’un paramètre phonétique (débit de parole) et d’un paramètre linguistique (fréquence des pauses remplies) sur la parole adressée à l’enceinte vs. au robot humanoïde vs. à l’humain. Les données de 71 francophones natifs indiquent que les énoncés adressés aux humains sont plus longs, plus rapides et plus dysfluents que ceux adressés à l’enceinte et au robot. La parole adressée à l’enceinte et au robot est significativement différente de la parole adressée à l’humain, mais pas l’une de l’autre, indiquant l’existence d’un type particulier de la parole adressée aux machines.</abstract>
       <url hash="b361b65a">2023.jeptalnrecital-international.11</url>
@@ -635,7 +635,7 @@ In NLP, the automatic detection of logical contradictions between statements is
     <paper id="12">
       <title>Détection de la nasalité du locuteur à partir de réseaux de neurones convolutifs et validation par des données aérodynamiques</title>
       <author><first>Lila</first><last>Kim</last></author>
-      <author><first>Cedric</first><last>Gendrot</last></author>
+      <author id="cedric-gendrot"><first>Cedric</first><last>Gendrot</last></author>
       <author><first>Amélie</first><last>Elmerich</last></author>
       <author><first>Angelique</first><last>Amelot</last></author>
       <author><first>Shinji</first><last>Maeda</last></author>
@@ -652,7 +652,7 @@ In NLP, the automatic detection of logical contradictions between statements is
       <author><first>Richard</first><last>Dufour</last></author>
       <author><first>Mickael</first><last>Rouvier</last></author>
       <author><first>Emmanuel</first><last>Morin</last></author>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <author><first>Pierre-Antoine</first><last>Gourraud</last></author>
       <pages>109–120</pages>
       <abstract>Ces dernières années, les modèles de langage pré-entraînés ont obtenu les meilleures performances sur un large éventail de tâches de traitement automatique du langage naturel (TALN). Alors que les premiers modèles ont été entraînés sur des données issues de domaines généraux, des modèles spécialisés sont apparus pour traiter plus efficacement des domaines spécifiques. Dans cet article, nous proposons une étude originale de modèles de langue dans le domaine médical en français. Nous comparons pour la première fois les performances de modèles entraînés sur des données publiques issues du web et sur des données privées issues d’établissements de santé. Nous évaluons également différentes stratégies d’apprentissage sur un ensemble de tâches biomédicales. Enfin, nous publions les premiers modèles spécialisés pour le domaine biomédical en français, appelés DrBERT, ainsi que le plus grand corpus de données médicales sous licence libre sur lequel ces modèles sont entraînés.</abstract>
@@ -679,7 +679,7 @@ In NLP, the automatic detection of logical contradictions between statements is
     <paper id="15">
       <title>Détection de faux tickets de caisse à l’aide d’entités et de relations basées sur une ontologie de domaine</title>
       <author><first>Beatriz</first><last>Martínez Tornés</last></author>
-      <author><first>Emanuela</first><last>Boros</last></author>
+      <author id="emanuela-boros"><first>Emanuela</first><last>Boros</last></author>
       <author><first>Petra</first><last>Gomez-Krämer</last></author>
       <author><first>Antoine</first><last>Doucet</last></author>
       <author><first>Jean-Marc</first><last>Ogier</last></author>
@@ -693,7 +693,7 @@ In NLP, the automatic detection of logical contradictions between statements is
       <title>Jeu de données de tickets de caisse pour la détection de fraude documentaire</title>
       <author><first>Beatriz</first><last>Martínez Tornés</last></author>
       <author><first>Théo</first><last>Taburet</last></author>
-      <author><first>Emanuela</first><last>Boros</last></author>
+      <author id="emanuela-boros"><first>Emanuela</first><last>Boros</last></author>
       <author><first>Kais</first><last>Rouis</last></author>
       <author><first>Petra</first><last>Gomez-Krämer</last></author>
       <author><first>Nicolas</first><last>Sidere</last></author>
@@ -710,7 +710,7 @@ In NLP, the automatic detection of logical contradictions between statements is
       <author><first>Ahmed</first><last>Njifenjou</last></author>
       <author><first>Virgile</first><last>Sucal</last></author>
       <author><first>Bassam</first><last>Jabaian</last></author>
-      <author><first>Fabrice</first><last>Lefèvre</last></author>
+      <author id="fabrice-lefevre"><first>Fabrice</first><last>Lefèvre</last></author>
       <pages>148–158</pages>
       <abstract>Dans cet article, nous proposons une étude de la portabilité linguistique des modèles de langage pré-appris (MLPs) appliqués à une tâche de dialogue à domaine ouvert. La langue cible (L_T) retenue dans cette étude est le français. Elle dispose de peu de ressources spécifiques pour la tâche considérée et nous permet de réaliser une évaluation humaine. La langue source (L_S) est l’anglais qui concentre la majorité des travaux récents dans ce domaine. Construire des MLPs spécifiques pour chaque langue nécessite de collecter de nouveaux jeux de données et cela est coûteux. Ainsi, à partir des ressources disponibles en L_S et L_T, nous souhaitons évaluer les performances atteignables par un système de conversation en L_T . Pour cela, nous proposons trois approches : TrainOnTarget où le corpus L_S est traduit vers L_T avant l’affinage du modèle, TestOnSource où un modèle L_S est couplé avec des modules de traduction au moment du décodage et TrainOnSourceAdaptOnTarget, qui utilise un MLP multilingue - ici BLOOM (BigScience Workshop, 2022) - avec l’architecture MAD-X Adapter (Pfeiffer et al., 2020) pour apprendre la tâche en L_S et l’adapter à L_T . Les modèles sont évalués dans des conditions de dialogue oral et les stratégies sont comparées en termes de qualité perçue lors l’interaction.</abstract>
       <url hash="bffdaaf9">2023.jeptalnrecital-international.17</url>
@@ -786,7 +786,7 @@ In NLP, the automatic detection of logical contradictions between statements is
       <title>Augmentation de jeux de données <fixed-case>RI</fixed-case> pour la recherche conversationnelle à initiative mixte</title>
       <author><first>Pierre</first><last>Erbacher</last></author>
       <author><first>Philippe</first><last>Preux</last></author>
-      <author><first>Jian-Yun</first><last>Nie</last></author>
+      <author id="jian-yun-nie"><first>Jian-Yun</first><last>Nie</last></author>
       <author><first>Laure</first><last>Soulier</last></author>
       <pages>37–58</pages>
       <abstract>Une des particularités des systèmes de recherche conversationnelle est qu’ils impliquent des initiatives mixtes telles que des questions de clarification des requêtes générées par le système pour mieux comprendre le besoin utilisateur. L’évaluation de ces systèmes à grande échelle sur la tâche finale de RI est très difficile et nécessite des ensembles de données adéquats contenant de telles interactions. Cependant, les jeux de données actuels se concentrent uniquement sur les tâches traditionnelles de RI ad hoc ou sur les tâches de clarification de la requête. Pour combler cette lacune, nous proposons une méthodologie pour construire automatiquement des ensembles de données de RI conversationnelle à grande échelle à partir d’ensembles de données de RI ad hoc afin de faciliter les explorations sur la RI conversationnelle. Nous effectuons une évaluation approfondie montrant la qualité et la pertinence des interactions générées pour chaque requête initiale. Cet article montre la faisabilité et l’utilité de l’augmentation des ensembles de données de RI ad-hoc pour la RI conversationnelle.</abstract>
@@ -819,7 +819,7 @@ In NLP, the automatic detection of logical contradictions between statements is
     <paper id="6">
       <title>Adaptation de domaine pour la recherche dense par annotation automatique</title>
       <author><first>Minghan</first><last>Li</last></author>
-      <author><first>Eric</first><last>Gaussier</last></author>
+      <author id="eric-gaussier"><first>Eric</first><last>Gaussier</last></author>
       <pages>93–110</pages>
       <abstract>Bien que la recherche d’information neuronale ait connu des améliorations, les modèles de recherche dense ont une capacité de généralisation à de nouveaux domaines limitée, contrairement aux modèles basés sur l’interaction. Les approches d’apprentissage adversarial et de génération de requêtes n’ont pas résolu ce problème. Cet article propose une approche d’auto-supervision utilisant des étiquettes de pseudo-pertinence automatiquement générées pour le domaine cible. Le modèle T53B est utilisé pour réordonner une liste de documents fournie par BM25 afin d’obtenir une annotation des exemples positifs. L’extraction des exemples négatifs est effectuée en explorant différentes stratégies. Les expériences montrent que cette approche aide le modèle dense sur le domaine cible et améliore l’approche de génération de requêtes GPL.</abstract>
       <url hash="a1018d15">2023.jeptalnrecital-coria.6</url>
@@ -886,7 +886,7 @@ In NLP, the automatic detection of logical contradictions between statements is
     <paper id="12">
       <title>Reconnaissance d’Entités Nommées fondée sur des Modèles de Langue Enrichis avec des Définitions des Types d’Entités</title>
       <author><first>Jesús</first><last>Lovón Melgarejo</last></author>
-      <author><first>Jose</first><last>Moreno</last></author>
+      <author id="jose-g-moreno"><first>Jose</first><last>Moreno</last></author>
       <author><first>Romaric</first><last>Besançon</last></author>
       <author><first>Olivier</first><last>Ferret</last></author>
       <author><first>Lynda</first><last>Tamine</last></author>
@@ -912,7 +912,7 @@ In NLP, the automatic detection of logical contradictions between statements is
     <paper id="14">
       <title>Highlighting exact matching via marking strategies for ad hoc document ranking with pretrained contextualized language models</title>
       <author><first>Lila</first><last>Boualili</last></author>
-      <author><first>Jose</first><last>Moreno</last></author>
+      <author id="jose-g-moreno"><first>Jose</first><last>Moreno</last></author>
       <author><first>Mohand</first><last>Boughanem</last></author>
       <pages>201–201</pages>
       <abstract>Les modèles de langue pré-entraînés (MLPs) à l’instar de BERT se sont révélés remarquablement efficaces pour le classement ad hoc. Contrairement aux modèles antérieurs à BERT qui nécessitent des composants neuronaux spécialisés pour capturer les différents aspects de la pertinence entre la requête et le document, les MLPs sont uniquement basés sur des blocs de “transformers” où l’attention est le seul mécanisme utilisé pour extraire des signaux à partir des interactions entre les termes de la requête et le document. Grâce à l’attention croisée du “transformer”, BERT s’est avéré être un modèle d’appariement sémantique efficace. Cependant, l’appariement exact reste un signal essentiel pour évaluer la pertinence d’un document par rapport à une requête de recherche d’informations, en dehors de l’appariement sémantique. Dans cet article, nous partons de l’hypothèse que BERT pourrait bénéficier d’indices explicites d’appariement exact pour mieux s’adapter à la tâche d’estimation de pertinence. Dans ce travail, nous explorons des stratégies d’intégration des signaux d’appariement exact en utilisant des “tokens” de marquage permettant de mettre en évidence les correspondances exactes entre les termes de la requête et ceux du document. Nous constatons que cette approche de marquage simple améliore de manière significative le modèle BERT vanille de référence. Nous démontrons empiriquement l’efficacité de notre approche par le biais d’expériences exhaustives sur trois collections standards en recherche d’information (RI). Les résultats montrent que les indices explicites de correspondance exacte transmis par le marquage sont bénéfiques pour des MLPs aussi bien BERT que pour ELECTRA. Nos résultats confirment que les indices traditionnels de RI, tels que la correspondance exacte de termes, sont toujours utiles pour les nouveaux modèles contextualisés pré-entraînés tels que BERT.</abstract>
@@ -939,7 +939,7 @@ In NLP, the automatic detection of logical contradictions between statements is
       <author><first>Nam</first><last>Le Hai</last></author>
       <author><first>Thomas</first><last>Gerald</last></author>
       <author><first>Thibault</first><last>Formal</last></author>
-      <author><first>Jian-Yun</first><last>Nie</last></author>
+      <author id="jian-yun-nie"><first>Jian-Yun</first><last>Nie</last></author>
       <author><first>Benjamin</first><last>Piwowarksi</last></author>
       <author><first>Laure</first><last>Soulier</last></author>
       <pages>207–212</pages>
@@ -951,10 +951,10 @@ In NLP, the automatic detection of logical contradictions between statements is
     <paper id="17">
       <title>The Power of Selecting Key Blocks with Local Pre-ranking for Long Document Information Retrieval</title>
       <author><first>Minghan</first><last>Li</last></author>
-      <author><first>Diana Nicoleta</first><last>Popa</last></author>
+      <author id="diana-nicoleta-popa"><first>Diana Nicoleta</first><last>Popa</last></author>
       <author><first>Johan</first><last>Chagnon</last></author>
       <author><first>Yagmur Gizem</first><last>Cinar</last></author>
-      <author><first>Eric</first><last>Gaussier</last></author>
+      <author id="eric-gaussier"><first>Eric</first><last>Gaussier</last></author>
       <pages>213–213</pages>
       <abstract>Les réseaux neuronaux profonds et les modèles fondés sur les transformeurs comme BERT ont envahi le domaine de la recherche d’informations (RI) ces dernières années. Leur succès est lié au mécanisme d’auto-attention qui permet de capturer les dépendances entre les mots indépendamment de leur distance. Cependant, en raison de sa complexité quadratique dans le nombre de mots, ce mécanisme ne peut être directement utilisé sur de longues séquences, ce qui ne permet pas de déployer entièrement les modèles neuronaux sur des documents longs pouvant contenir des milliers de mots. Trois stratégies standard ont été adoptées pour contourner ce problème. La première consiste à tronquer les documents longs, la deuxième à segmenter les documents longs en passages plus courts et la dernière à remplacer le module d’auto-attention par des modules d’attention parcimonieux. Dans le premier cas, des informations importantes peuvent être perdues et le jugement de pertinence n’est fondé que sur une partie de l’information contenue dans le document. Dans le deuxième cas, une architecture hiérarchique peut être adoptée pour construire une représentation du document sur la base des représentations de chaque passage. Cela dit, malgré ses résultats prometteurs, cette stratégie reste coûteuse en temps, en mémoire et en énergie. Dans le troisième cas, les contraintes de parcimonie peuvent conduire à manquer des dépendances importantes et, in fine, à des résultats sous-optimaux. L’approche que nous proposons est légèrement différente de ces stratégies et vise à capturer, dans les documents longs, les blocs les plus importants permettant de décider du statut, pertinent ou non, de l’ensemble du document. Elle repose sur trois étapes principales : (a) la sélection de blocs clés (c’est-à-dire susceptibles d’être pertinents) avec un pré-classement local en utilisant soit des modèles de RI classiques, soit un module d’apprentissage, (b) l’apprentissage d’une représentation conjointe des requêtes et des blocs clés à l’aide d’un modèle BERT standard, et (c) le calcul d’un score de pertinence final qui peut être considéré comme une agrégation d’informations de pertinence locale. Dans cet article, nous menons tout d’abord une analyse qui révèle que les signaux de pertinence peuvent apparaître à différents endroits dans les documents et que de tels signaux sont mieux capturés par des relations sémantiques que par des correspondances exactes. Nous examinons ensuite plusieurs méthodes pour sélectionner les blocs pertinents et montrons comment intégrer ces méthodes dans les modèles récents de RI.</abstract>
       <url hash="43d3316e">2023.jeptalnrecital-coria.17</url>
@@ -985,7 +985,7 @@ In NLP, the automatic detection of logical contradictions between statements is
   <volume id="rjc" ingest-date="2023-06-25" type="proceedings">
     <meta>
       <booktitle>Actes de CORIA-TALN 2023. Actes des 16e Rencontres Jeunes Chercheurs en RI (RJCRI) et 25e Rencontre des Étudiants Chercheurs en Informatique pour le Traitement Automatique des Langues (RÉCITAL)</booktitle>
-      <editor><first>Marie</first><last>Candito</last></editor>
+      <editor id="marie-candito"><first>Marie</first><last>Candito</last></editor>
       <editor><first>Thomas</first><last>Gerald</last></editor>
       <editor><first>José G</first><last>Moreno</last></editor>
       <publisher>ATALA</publisher>
@@ -1103,7 +1103,7 @@ In NLP, the automatic detection of logical contradictions between statements is
     <meta>
       <booktitle>Actes de CORIA-TALN 2023. Actes de l'atelier "Analyse et Recherche de Textes Scientifiques" (ARTS)@TALN 2023</booktitle>
       <editor><first>Florian</first><last>Boudin</last></editor>
-      <editor><first>Béatrice</first><last>Daille</last></editor>
+      <editor id="beatrice-daille"><first>Béatrice</first><last>Daille</last></editor>
       <editor><first>Richard</first><last>Dufour</last></editor>
       <editor><first>Oumaima</first><last>El</last></editor>
       <editor><first>Maël</first><last>Houbre</last></editor>
@@ -1131,7 +1131,7 @@ In NLP, the automatic detection of logical contradictions between statements is
       <author><first>Pascal</first><last>De Groote</last></author>
       <author><first>Michel</first><last>Komajda</last></author>
       <author><first>Emmanuel</first><last>Morin</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <pages>1–7</pages>
       <abstract>La pré-annotation automatique de textes est une tâche essentielle qui peut faciliter l’annotationd’un corpus de textes. Dans le contexte de la cardiologie, l’annotation est une tâche complexe quinécessite des connaissances approfondies dans le domaine et une expérience pratique dans le métier.Pré-annoter les textes vise à diminuer le temps de sollicitation des experts, facilitant leur concentrationsur les aspects plus critiques de l’annotation. Nous rapportons ici une expérience de pré-annotationde textes cliniques en cardiologie : nous présentons ses modalités et les observations que nous enretirons sur l’interaction avec les experts du domaine et la mise au point du schéma d’an</abstract>
       <url hash="65830462">2023.jeptalnrecital-arts.1</url>
@@ -1142,11 +1142,11 @@ In NLP, the automatic detection of logical contradictions between statements is
       <title><fixed-case>M</fixed-case>a<fixed-case>TOS</fixed-case>: Traduction automatique pour la science ouverte</title>
       <author><first>Maud</first><last>Bénard</last></author>
       <author><first>Alexandra</first><last>Mestivier</last></author>
-      <author><first>Natalie</first><last>Kubler</last></author>
+      <author id="natalie-kubler"><first>Natalie</first><last>Kubler</last></author>
       <author><first>Lichao</first><last>Zhu</last></author>
       <author><first>Rachel</first><last>Bawden</last></author>
       <author><first>Eric</first><last>De La Clergerie</last></author>
-      <author><first>Laurent</first><last>Romary</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
       <author><first>Mathilde</first><last>Huguin</last></author>
       <author><first>Jean-François</first><last>Nominé</last></author>
       <author><first>Ziqian</first><last>Peng</last></author>
@@ -1198,7 +1198,7 @@ In NLP, the automatic detection of logical contradictions between statements is
       <author><first>Haytame</first><last>Fallah</last></author>
       <author><first>Elisabeth</first><last>Murisasco</last></author>
       <author><first>Emmanuel</first><last>Bruno</last></author>
-      <author><first>Patrice</first><last>Bellot</last></author>
+      <author id="patrice-bellot"><first>Patrice</first><last>Bellot</last></author>
       <pages>34–40</pages>
       <abstract>Dans cet article, nous proposons des approches pour améliorer les architectures basées sur des transformeurs pour la classification de documents multi-labels. Les dépendances entre les labels sont cruciales dans ce contexte. Notre méthode, appelée DepReg, ajoute un terme de régularisation à la fonction de perte pour encourager le modèle à prédire des labels susceptibles de coexister. Nous introduisons également un nouveau jeu de données nommé “arXiv-ACM”, composé de résumés scientifiques de la bibliothèque numérique arXiv, étiquetés avec les mots-clés ACM correspondants.</abstract>
       <url hash="310f8dec">2023.jeptalnrecital-arts.6</url>
@@ -1282,7 +1282,7 @@ In NLP, the automatic detection of logical contradictions between statements is
       <author><first>Aman</first><last>Sinha</last></author>
       <author><first>Sam</first><last>Bigeard</last></author>
       <author><first>Marianne</first><last>Clausel</last></author>
-      <author><first>Mathieu</first><last>Constant</last></author>
+      <author id="matthieu-constant"><first>Mathieu</first><last>Constant</last></author>
       <pages>80–85</pages>
       <abstract>The number of scientific articles is increasing tremendously across all domains to such an extent that it has become hard for researchers to remain up-to-date. Evidently, scientific language understanding systems and Information Extraction (IE) systems, with the advancement of Natural Language Processing (NLP) techniques, are benefiting the needs of users. Although the majority of the practices for building such systems are data-driven, advocating the idea of “The more, the better”. In this work, we revisit the paradigm - questioning what type of data : text (title, abstract) or citations, can have more impact on the performance of scientific language understanding systems.</abstract>
       <url hash="2e0b2dd3">2023.jeptalnrecital-arts.14</url>
@@ -1293,7 +1293,7 @@ In NLP, the automatic detection of logical contradictions between statements is
     <meta>
       <booktitle>Actes de CORIA-TALN 2023. Actes du Défi Fouille de Textes@TALN2023</booktitle>
       <editor><first>Adrien</first><last>Bazoge</last></editor>
-      <editor><first>Béatrice</first><last>Daille</last></editor>
+      <editor id="beatrice-daille"><first>Béatrice</first><last>Daille</last></editor>
       <editor><first>Richard</first><last>Dufour</last></editor>
       <editor><first>Yanis</first><last>Labrak</last></editor>
       <editor><first>Emmanuel</first><last>Morin</last></editor>
@@ -1371,7 +1371,7 @@ In NLP, the automatic detection of logical contradictions between statements is
     </paper>
     <paper id="5">
       <title><fixed-case>LIS</fixed-case>@<fixed-case>DEFT</fixed-case>’23 : les <fixed-case>LLM</fixed-case>s peuvent-ils répondre à des <fixed-case>QCM</fixed-case> ? (a) oui; (b) non; (c) je ne sais pas.</title>
-      <author><first>Benoit</first><last>Favre</last></author>
+      <author id="benoit-favre"><first>Benoit</first><last>Favre</last></author>
       <pages>46–56</pages>
       <abstract>Cet article présente un ensemble d’expériences sur la tâche de réponse à des questions à choix multiple de DEFT 2023. Des grands modèles de langage sont amorcés avec les questions afin de collecter les réponses générées. Les résultats montrent que les modèles ouverts sans affinage obtiennent des performances similaires à celles d’un système supervisé fondé sur BERT, et que l’affinage sur les données de la tâche apporte des améliorations.</abstract>
       <url hash="b0d07bb3">2023.jeptalnrecital-deft.5</url>
@@ -1406,7 +1406,7 @@ In NLP, the automatic detection of logical contradictions between statements is
   <volume id="demos" ingest-date="2023-10-05" type="proceedings">
     <meta>
       <booktitle>Actes de CORIA-TALN 2023. Actes de la 30e Conférence sur le Traitement Automatique des Langues Naturelles (TALN), volume 5 : démonstrations</booktitle>
-      <editor><first>Christophe</first><last>Servan</last></editor>
+      <editor id="christophe-servan"><first>Christophe</first><last>Servan</last></editor>
       <editor><first>Anne</first><last>Vilnat</last></editor>
       <publisher>ATALA</publisher>
       <address>Paris, France</address>
@@ -1469,7 +1469,7 @@ In NLP, the automatic detection of logical contradictions between statements is
   <volume id="projet" ingest-date="2023-10-05" type="proceedings">
     <meta>
       <booktitle>Actes de CORIA-TALN 2023. Actes de la 30e Conférence sur le Traitement Automatique des Langues Naturelles (TALN), volume 6 : projets</booktitle>
-      <editor><first>Christophe</first><last>Servan</last></editor>
+      <editor id="christophe-servan"><first>Christophe</first><last>Servan</last></editor>
       <editor><first>Anne</first><last>Vilnat</last></editor>
       <publisher>ATALA</publisher>
       <address>Paris, France</address>
@@ -1492,13 +1492,13 @@ In NLP, the automatic detection of logical contradictions between statements is
     </paper>
     <paper id="2">
       <title>Projet Gender Equality Monitor (<fixed-case>GEM</fixed-case>)</title>
-      <author><first>Gilles</first><last>Adda</last></author>
+      <author id="gilles-adda"><first>Gilles</first><last>Adda</last></author>
       <author><first>François</first><last>Buet</last></author>
       <author><first>Sahar</first><last>Ghannay</last></author>
       <author><first>Cyril</first><last>Grouin</last></author>
       <author><first>Camille</first><last>Guinaudeau</last></author>
       <author><first>Lufei</first><last>Liu</last></author>
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <author><first>Albert</first><last>Rilliard</last></author>
       <author><first>Uro</first><last>Rémi</last></author>
       <pages>21–21</pages>
@@ -1517,7 +1517,7 @@ In NLP, the automatic detection of logical contradictions between statements is
     </paper>
     <paper id="4">
       <title><fixed-case>TEMITALC</fixed-case> : Text Mining et <fixed-case>TAL</fixed-case> pour Analyser le Langage des Cachalots</title>
-      <author><first>Jose</first><last>Coch</last></author>
+      <author id="jose-coch"><first>Jose</first><last>Coch</last></author>
       <author><first>Olivier</first><last>Adam</last></author>
       <pages>23–25</pages>
       <abstract>Les cachalots (Physeter macrocephalus) sont les plus grands des cétacés à dents. Comme tous les cétacés, ils communiquent notamment par des émissions vocales. Les cachalots produisent des clics au cours de leurs activités vitales et leurs interactions sociales. Certains de ces sons sont organisés en séquences temporelles, appelées « codas ». Depuis plus d’une dizaine d’années, des échanges audio ou « conversations » entre cachalots sont enregistrés dans de nombreux endroits dans le monde, par exemple dans l’Océan Pacifique, dans les Caraïbes et dans l’Océan Indien. La particularité des échanges vocaux entre cachalots fait que ces codas sont numérisables relativement facilement. Ainsi, il existe des corpus de transcriptions de conversations en particulier venant des origines géographiques citées. Durant 2022, une collaboration entre le Service NLP de Dassault Systèmes et l’équipe Bioacoustique de Sorbonne Université, basée sur les enregistrements sonores collectés et mis à disposition par Longitude 181 et Label Bleu Production, nous a permis d’initier un projet d’application des techniques de Text Mining et Traitement Automatique du Langage à l’étude du langage des cachalots. Nous avons exposé les premiers résultats du projet dans un article publié dans les Actes de l’atelier TextMine’23 de la conférence EGC’2023 concernant un corpus de cachalots résidents au large de l’Ile Maurice et identifiés individuellement. Nous utilisons dans ce projet le logiciel Proxem Studio, qui a la particularité de pouvoir être appliqué sans modèle de langue préalable car il peut construire des modèles de langue à partir des corpus à analyser. L’objectif du projet couvre les points suivants : - Optimiser et automatiser la transcription en codas des échanges audio entre cachalots, - Analyser les propriétés formelles du langage des cachalots : mettre en évidence que l’ordre entre codas a une importance, et découvrir s‘il est possible de décrire une proto-syntaxe de ce langage, - Mettre au point un référentiel d’éléments non linguistiques (comportements sociaux, données démographiques, relations familiales) et identifier des codas ou des séquences de codas montrant une corrélation avec ces éléments non linguistiques, et in fine, avancer des hypothèses sur la fonction de certaines codas ou séquences de codas, - Etudier les corrélations entre les participants à chaque conversation et les codas émis afin de déterminer si des codas ou séquences de codas peuvent être associées à des individus. Le projet bénéficie d’un financement de Dassault Systèmes et de Sorbonne Université. La fin du projet est prévue pour décembre 2024. Nos résultats vont contribuer ainsi à décrire le sophistiqué langage d’une espèce non-humaine.</abstract>
@@ -1527,7 +1527,7 @@ In NLP, the automatic detection of logical contradictions between statements is
     </paper>
     <paper id="5">
       <title>mu<fixed-case>D</fixed-case>ial<fixed-case>B</fixed-case>ot, vers l’interaction humain-robot multimodale pro-active</title>
-      <author><first>Fabrice</first><last>Lefèvre</last></author>
+      <author id="fabrice-lefevre"><first>Fabrice</first><last>Lefèvre</last></author>
       <author><first>Timothée</first><last>Dhaussy</last></author>
       <author><first>Bassam</first><last>Jabaian</last></author>
       <author><first>Ahmed</first><last>Njifenjou</last></author>
@@ -1570,7 +1570,7 @@ In NLP, the automatic detection of logical contradictions between statements is
       <author><first>Pierre</first><last>Erbacher</last></author>
       <author><first>Thomas</first><last>Gerald</last></author>
       <author><first>Hanane</first><last>Djeddal</last></author>
-      <author><first>Jian-Yun</first><last>Nie</last></author>
+      <author id="jian-yun-nie"><first>Jian-Yun</first><last>Nie</last></author>
       <author><first>Philippe</first><last>Preux</last></author>
       <pages>36–36</pages>
       <abstract>Le projet ANR JCJC SESAMS s’intéresse depuis 2018 au paradigme désormais actuels des systèmes de recherche d’information conversationnels. L’objectif est de formaliser des modèles de recherche d’information capables de fluidifier les interactions avec les utilisateurs pendant une session de recherche. Nous abordons différents enjeux : la prise en compte d’une conversation en langage naturel en contexte d’une recherche d’information, la génération d’interactions permettant de clarifier les besoins en information, la génération de réponse en langage naturel, ainsi que l’apprentissage continu pour s’adapter aux nouveaux besoins des utilisateurs. Nous présenterons dans ce poster ces différents enjeux et les contributions associées. Nous pourrons également discuter les perspectives de recherche dans ce domaine suite au développement récents des gros modèles de langue.</abstract>
diff --git a/data/xml/2023.konvens.xml b/data/xml/2023.konvens.xml
index 41a68e800e..18b4e4e80a 100644
--- a/data/xml/2023.konvens.xml
+++ b/data/xml/2023.konvens.xml
@@ -23,7 +23,7 @@
       <author><first>Bolei</first><last>Ma</last></author>
       <author><first>Ercong</first><last>Nie</last></author>
       <author><first>Helmut</first><last>Schmid</last></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <pages>1–16</pages>
       <url hash="e66ed71c">2023.konvens-main.1</url>
       <bibkey>ma-etal-2023-prompt</bibkey>
@@ -57,7 +57,7 @@
       <author><first>Maximilian</first><last>Maurer</last></author>
       <author><first>Chris</first><last>Jenkins</last></author>
       <author><first>Filip</first><last>Miletić</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>40–51</pages>
       <url hash="f15da26d">2023.konvens-main.4</url>
       <bibkey>maurer-etal-2023-classifying</bibkey>
@@ -69,7 +69,7 @@
       <author><first>Florian</first><last>Schneider</last></author>
       <author><first>Isabel</first><last>Eiser</last></author>
       <author><first>Gertraud</first><last>Koch</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>52–62</pages>
       <url hash="3f8af368">2023.konvens-main.5</url>
       <bibkey>petersen-frey-etal-2023-qualitative</bibkey>
@@ -87,7 +87,7 @@
     <paper id="7">
       <title>Steps towards Addressing Text Classification in Low-Resource Languages</title>
       <author><first>Maximilian</first><last>Weißenbacher</last></author>
-      <author><first>Udo</first><last>Kruschwitz</last></author>
+      <author id="udo-kruschwitz"><first>Udo</first><last>Kruschwitz</last></author>
       <pages>69–76</pages>
       <url hash="24fe3aa9">2023.konvens-main.7</url>
       <bibkey>weissenbacher-kruschwitz-2023-steps</bibkey>
@@ -113,7 +113,7 @@
     </paper>
     <paper id="10">
       <title>Linking <fixed-case>D</fixed-case>anish Parser Output to a Central Word Repository - From Morphosemantic Disambiguation to Unique Identifiers</title>
-      <author><first>Eckhard</first><last>Bick</last></author>
+      <author id="eckhard-bick"><first>Eckhard</first><last>Bick</last></author>
       <pages>93–101</pages>
       <url hash="143f9aeb">2023.konvens-main.10</url>
       <bibkey>bick-2023-linking</bibkey>
@@ -225,7 +225,7 @@
     <paper id="22">
       <title>Political claim identification and categorization in a multilingual setting: First experiments</title>
       <author><first>Urs</first><last>Zaberer</last></author>
-      <author><first>Sebastian</first><last>Pado</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Pado</last></author>
       <author><first>Gabriella</first><last>Lapesa</last></author>
       <pages>219–228</pages>
       <url hash="add587f3">2023.konvens-main.22</url>
@@ -235,7 +235,7 @@
       <title>Policy Domain Prediction from Party Manifestos with Adapters and Knowledge Enhanced Transformers</title>
       <author><first>Hsiao-Chu</first><last>Yu</last></author>
       <author><first>Ines</first><last>Rehbein</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <pages>229–244</pages>
       <url hash="684183ef">2023.konvens-main.23</url>
       <bibkey>yu-etal-2023-policy</bibkey>
diff --git a/data/xml/2023.latechclfl.xml b/data/xml/2023.latechclfl.xml
index a56bcd8cd5..4086fffc85 100644
--- a/data/xml/2023.latechclfl.xml
+++ b/data/xml/2023.latechclfl.xml
@@ -6,7 +6,7 @@
       <editor><first>Stefania</first><last>Degaetano-Ortlieb</last></editor>
       <editor><first>Anna</first><last>Kazantseva</last></editor>
       <editor><first>Nils</first><last>Reiter</last></editor>
-      <editor><first>Stan</first><last>Szpakowicz</last></editor>
+      <editor id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Dubrovnik, Croatia</address>
       <month>May</month>
@@ -20,7 +20,7 @@
     </frontmatter>
     <paper id="1">
       <title>Standard and Non-standard Adverbial Markers: a Diachronic Analysis in <fixed-case>M</fixed-case>odern <fixed-case>C</fixed-case>hinese Literature</title>
-      <author><first>John</first><last>Lee</last><affiliation>City University of Hong Kong</affiliation></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last><affiliation>City University of Hong Kong</affiliation></author>
       <author><first>Fangqiong</first><last>Zhan</last><affiliation>Nanyang Technological University National Institute of Education</affiliation></author>
       <author><first>Wenxiu</first><last>Xie</last><affiliation>City University of Hong Kong</affiliation></author>
       <author><first>Xiao</first><last>Han</last><affiliation>Universiti Kebangsaan Malaysia</affiliation></author>
@@ -35,8 +35,8 @@
     </paper>
     <paper id="2">
       <title><fixed-case>GP</fixed-case>oe<fixed-case>T</fixed-case>: a Language Model Trained for Rhyme Generation on Synthetic Data</title>
-      <author><first>Andrei</first><last>Popescu-Belis</last><affiliation>Heig-vd / Hes-so</affiliation></author>
-      <author><first>Àlex R.</first><last>Atrio</last><affiliation>Heig-vd / Hes-so &amp; Epfl</affiliation></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last><affiliation>Heig-vd / Hes-so</affiliation></author>
+      <author id="alex-r-atrio"><first>Àlex R.</first><last>Atrio</last><affiliation>Heig-vd / Hes-so &amp; Epfl</affiliation></author>
       <author><first>Bastien</first><last>Bernath</last><affiliation>Epfl</affiliation></author>
       <author><first>Etienne</first><last>Boisson</last><affiliation>Epfl</affiliation></author>
       <author><first>Teo</first><last>Ferrari</last><affiliation>Hes-so</affiliation></author>
@@ -113,7 +113,7 @@
       <title>Emotion Recognition based on Psychological Components in Guided Narratives for Emotion Regulation</title>
       <author><first>Gustave</first><last>Cortal</last><affiliation>Ecole Normale Superieure de Paris-Saclay</affiliation></author>
       <author><first>Alain</first><last>Finkel</last><affiliation>Ecole Normale Superieure de Paris-Saclay</affiliation></author>
-      <author><first>Patrick</first><last>Paroubek</last><affiliation>Laboratoire Interdisciplinaire des Sciences du Numérique</affiliation></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last><affiliation>Laboratoire Interdisciplinaire des Sciences du Numérique</affiliation></author>
       <author><first>Lina</first><last>Ye</last><affiliation>Ecole Normale Superieure de Paris-Saclay</affiliation></author>
       <pages>72-81</pages>
       <abstract>Emotion regulation is a crucial element in dealing with emotional events and has positive effects on mental health. This paper aims to provide a more comprehensive understanding of emotional events by introducing a new French corpus of emotional narratives collected using a questionnaire for emotion regulation. We follow the theoretical framework of the Component Process Model which considers emotions as dynamic processes composed of four interrelated components (behavior, feeling, thinking and territory). Each narrative is related to a discrete emotion and is structured based on all emotion components by the writers. We study the interaction of components and their impact on emotion classification with machine learning methods and pre-trained language models. Our results show that each component improves prediction performance, and that the best results are achieved by jointly considering all components. Our results also show the effectiveness of pre-trained language models in predicting discrete emotion from certain components, which reveal differences in how emotion components are expressed.</abstract>
@@ -218,8 +218,8 @@
       <author><first>Hyun Jung</first><last>Kang</last><affiliation>Orange Innovation</affiliation></author>
       <author><first>Ismaël</first><last>Rousseau</last><affiliation>Orange Innovation</affiliation></author>
       <author><first>Ghislaine</first><last>Azémard</last><affiliation>Université Paris 8, Chaire UNESCO Innovation,Transmission, Édition Numériques</affiliation></author>
-      <author><first>Frederic</first><last>Bechet</last><affiliation>Aix Marseille Universite - LIS/CNRS</affiliation></author>
-      <author><first>Geraldine</first><last>Damnati</last><affiliation>Orange Innovation</affiliation></author>
+      <author id="frederic-bechet"><first>Frederic</first><last>Bechet</last><affiliation>Aix Marseille Universite - LIS/CNRS</affiliation></author>
+      <author id="geraldine-damnati"><first>Geraldine</first><last>Damnati</last><affiliation>Orange Innovation</affiliation></author>
       <pages>141-151</pages>
       <abstract>This paper proposes a new approach for exploring digitized humanities and social sciences collections based on explainable links built from questions. Our experiments show the quality of our automatically generated questions and their relevance in a local context as well as the originality of the links produced by embeddings based on these questions. Analyses have also been performed to understand the types of questions generated on our corpus, and the related uses that can enrich the exploration. The relationships between the co-references and the questions generated, and the answers extracted from the text were also discussed and open a path for future improvements for our system in their resolution.</abstract>
       <url hash="c6c5bacb">2023.latechclfl-1.16</url>
diff --git a/data/xml/2023.law.xml b/data/xml/2023.law.xml
index f560eed7ce..234c1a5292 100644
--- a/data/xml/2023.law.xml
+++ b/data/xml/2023.law.xml
@@ -39,7 +39,7 @@
     </paper>
     <paper id="3">
       <title>Difficulties in Handling Mathematical Expressions in <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies</title>
-      <author><first>Lauren</first><last>Levine</last><affiliation>Georgetown University</affiliation></author>
+      <author id="lauren-levine"><first>Lauren</first><last>Levine</last><affiliation>Georgetown University</affiliation></author>
       <pages>19-30</pages>
       <abstract>In this paper, we give a brief survey of the difficulties in handling the syntax of mathematical expressions in Universal Dependencies, focusing on examples from English language corpora. We first examine the prevalence and current handling of mathematical expressions in UD corpora. We then examine several strategies for how to approach the handling of syntactic dependencies for such expressions: as multi-word expressions, as a domain appropriate for code-switching, or as approximate to other types of natural language. Ultimately, we argue that mathematical expressions should primarily be analyzed as natural language, and we offer recommendations for the treatment of basic mathematical expressions as analogous to English natural language.</abstract>
       <url hash="7f5a61e9">2023.law-1.3</url>
@@ -49,7 +49,7 @@
     <paper id="4">
       <title>A Dataset for Physical and Abstract Plausibility and Sources of Human Disagreement</title>
       <author><first>Annerose</first><last>Eichel</last><affiliation>University of Stuttgart</affiliation></author>
-      <author><first>Sabine</first><last>Schulte Im Walde</last><affiliation>University of Stuttgart</affiliation></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte Im Walde</last><affiliation>University of Stuttgart</affiliation></author>
       <pages>31-45</pages>
       <abstract>We present a novel dataset for physical and abstract plausibility of events in English. Based on naturally occurring sentences extracted from Wikipedia, we infiltrate degrees of abstractness, and automatically generate perturbed pseudo-implausible events. We annotate a filtered and balanced subset for plausibility using crowd-sourcing, and perform extensive cleansing to ensure annotation quality. In-depth quantitative analyses indicate that annotators favor plausibility over implausibility and disagree more on implausible events. Furthermore, our plausibility dataset is the first to capture abstractness in events to the same extent as concreteness, and we find that event abstractness has an impact on plausibility ratings: more concrete event participants trigger a perception of implausibility.</abstract>
       <url hash="25a5e2eb">2023.law-1.4</url>
@@ -111,9 +111,9 @@
     <paper id="9">
       <title>Extending an Event-type Ontology: Adding Verbs and Classes Using Fine-tuned <fixed-case>LLM</fixed-case>s Suggestions</title>
       <author><first>Jana</first><last>Straková</last><affiliation>Charles University, Faculty of Mathematics and Physics, Institute of Formal and Applied Linguistics</affiliation></author>
-      <author><first>Eva</first><last>Fučíková</last><affiliation>Charles University</affiliation></author>
-      <author><first>Jan</first><last>Hajič</last><affiliation>Charles University</affiliation></author>
-      <author><first>Zdeňka</first><last>Urešová</last><affiliation>Charles University</affiliation></author>
+      <author id="eva-fucikova"><first>Eva</first><last>Fučíková</last><affiliation>Charles University</affiliation></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last><affiliation>Charles University</affiliation></author>
+      <author id="zdenka-uresova"><first>Zdeňka</first><last>Urešová</last><affiliation>Charles University</affiliation></author>
       <pages>85-95</pages>
       <abstract>In this project, we have investigated the use of advanced machine learning methods, specifically fine-tuned large language models, for pre-annotating data for a lexical extension task, namely adding descriptive words (verbs) to an existing (but incomplete, as of yet) ontology of event types. Several research questions have been focused on, from the investigation of a possible heuristics to provide at least hints to annotators which verbs to include and which are outside the current version of the ontology, to the possible use of the automatic scores to help the annotators to be more efficient in finding a threshold for identifying verbs that cannot be assigned to any existing class and therefore they are to be used as seeds for a new class. We have also carefully examined the correlation of the automatic scores with the human annotation. While the correlation turned out to be strong, its influence on the annotation proper is modest due to its near linearity, even though the mere fact of such pre-annotation leads to relatively short annotation times.</abstract>
       <url hash="f40a9fe9">2023.law-1.9</url>
@@ -179,7 +179,7 @@
       <author><first>Michael</first><last>Regan</last><affiliation>University of Washington</affiliation></author>
       <author><first>Adam</first><last>Pollins</last><affiliation>University of Colorado Boulder</affiliation></author>
       <author><first>Nikhil</first><last>Krishnaswamy</last><affiliation>Colorado State University</affiliation></author>
-      <author><first>James H.</first><last>Martin</last><affiliation>University of Colorado Boulder</affiliation></author>
+      <author id="james-h-martin"><first>James H.</first><last>Martin</last><affiliation>University of Colorado Boulder</affiliation></author>
       <pages>136-145</pages>
       <abstract>Annotating cross-document event coreference links is a time-consuming and cognitively demanding task that can compromise annotation quality and efficiency. To address this, we propose a model-in-the-loop annotation approach for event coreference resolution, where a machine learning model suggests likely corefering event pairs only. We evaluate the effectiveness of this approach by first simulating the annotation process and then, using a novel annotator-centric Recall-Annotation effort trade-off metric, we compare the results of various underlying models and datasets. We finally present a method for obtaining 97% recall while substantially reducing the workload required by a fully manual annotation process.</abstract>
       <url hash="169a3f0d">2023.law-1.14</url>
@@ -213,9 +213,9 @@
       <author><first>Tatsuya</first><last>Aoyama</last><affiliation>Georgetown University</affiliation></author>
       <author><first>Shabnam</first><last>Behzad</last><affiliation>Georgetown University</affiliation></author>
       <author><first>Luke</first><last>Gessler</last><affiliation>Georgetown University</affiliation></author>
-      <author><first>Lauren</first><last>Levine</last><affiliation>Georgetown University</affiliation></author>
+      <author id="lauren-levine"><first>Lauren</first><last>Levine</last><affiliation>Georgetown University</affiliation></author>
       <author><first>Jessica</first><last>Lin</last><affiliation>Georgetown University</affiliation></author>
-      <author><first>Yang Janet</first><last>Liu</last><affiliation>Georgetown University</affiliation></author>
+      <author id="yang-janet-liu"><first>Yang Janet</first><last>Liu</last><affiliation>Georgetown University</affiliation></author>
       <author><first>Siyao</first><last>Peng</last><affiliation>Georgetown University</affiliation></author>
       <author><first>Yilun</first><last>Zhu</last><affiliation>Georgetown University</affiliation></author>
       <author><first>Amir</first><last>Zeldes</last><affiliation>Georgetown University</affiliation></author>
@@ -243,7 +243,7 @@
       <author><first>Tamás</first><last>Zombori</last><affiliation>University of Szeged</affiliation></author>
       <author><first>Gergő</first><last>Szabó</last><affiliation>University of Szeged</affiliation></author>
       <author><first>Zsolt</first><last>Szántó</last><affiliation>University of Szeged</affiliation></author>
-      <author><first>Richárd</first><last>Farkas</last><affiliation>University of Szeged</affiliation></author>
+      <author id="richard-farkas"><first>Richárd</first><last>Farkas</last><affiliation>University of Szeged</affiliation></author>
       <pages>188-198</pages>
       <abstract>Within the research presented in this article, we created a new question answering benchmark database for Hungarian called MILQA. When creating the dataset, we basically followed the principles of the English SQuAD 2.0, however, like in some more recent English question answering datasets, we introduced a number of innovations beyond SQuAD: e.g., yes/no-questions, list-like answers consisting of several text spans, long answers, questions requiring calculation and other question types where you cannot simply copy the answer from the text. For all these non-extractive question types, the pragmatically adequate form of the answer was also added to make the training of generative models possible. We implemented and evaluated a set of baseline retrieval and answer span extraction models on the dataset. BM25 performed better than any vector-based solution for retrieval. Cross-lingual transfer from English significantly improved span extraction models.</abstract>
       <url hash="9afffe3f">2023.law-1.19</url>
@@ -268,7 +268,7 @@
       <author><first>Kristin</first><last>Wright-bettner</last><affiliation>University of Colorado Boulder</affiliation></author>
       <author><first>Skatje</first><last>Myers</last><affiliation>University of Colorado at Boulder</affiliation></author>
       <author><first>Nianwen</first><last>Xue</last><affiliation>Brandeis University</affiliation></author>
-      <author><first>Martha</first><last>Palmer</last><affiliation>University of Colorado</affiliation></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last><affiliation>University of Colorado</affiliation></author>
       <pages>211-219</pages>
       <abstract>UMR-Writer is a web-based tool for annotating semantic graphs with the Uniform Meaning Representation (UMR) scheme. UMR is a graph-based semantic representation that can be applied cross-linguistically for deep semantic analysis of texts. In this work, we implemented a new keyboard interface in UMR-Writer 2.0, which is a powerful addition to the original mouse interface, supporting faster annotation for more experienced annotators. The new interface also addresses issues with the original mouse interface. Additionally, we demonstrate an efficient workflow for annotation project management in UMR-Writer 2.0, which has been applied to many projects.</abstract>
       <url hash="293fa2b5">2023.law-1.21</url>
@@ -327,7 +327,7 @@
       <title>Enriching the <fixed-case>NA</fixed-case>rabizi Treebank: A Multifaceted Approach to Supporting an Under-Resourced Language</title>
       <author><first>Arij</first><last>Riabi</last><affiliation>Inria; Sorbonne Université</affiliation></author>
       <author><first>Menel</first><last>Mahamdi</last><affiliation>Inria Paris</affiliation></author>
-      <author><first>Djamé</first><last>Seddah</last><affiliation>Inria</affiliation></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last><affiliation>Inria</affiliation></author>
       <pages>266-278</pages>
       <abstract>In this paper we address the scarcity of annotated data for NArabizi, a Romanized form of North African Arabic used mostly on social media, which poses challenges for Natural Language Processing (NLP). We introduce an enriched version of NArabizi Treebank (Seddah et al., 2020) with three main contributions: the addition of two novel annotation layers (named entity recognition and offensive language detection) and a re-annotation of the tokenization, morpho-syntactic and syntactic layers that ensure annotation consistency. Our experimental results, using different tokenization schemes, showcase the value of our contributions and highlight the impact of working with non-gold tokenization for NER and dependency parsing. To facilitate future research, we make these annotations publicly available. Our enhanced NArabizi Treebank paves the way for creating sophisticated language models and NLP tools for this under-represented language.</abstract>
       <url hash="6fa9eadc">2023.law-1.26</url>
diff --git a/data/xml/2023.lchange.xml b/data/xml/2023.lchange.xml
index cdf07c5b7b..c90ad3a342 100644
--- a/data/xml/2023.lchange.xml
+++ b/data/xml/2023.lchange.xml
@@ -35,7 +35,7 @@
     <paper id="2">
       <title>Domain-Adapting <fixed-case>BERT</fixed-case> for Attributing Manuscript, Century and Region in Pre-<fixed-case>M</fixed-case>odern <fixed-case>S</fixed-case>lavic Texts</title>
       <author><first>Piroska</first><last>Lendvai</last><affiliation>Bavarian Academy of Sciences and Humanities</affiliation></author>
-      <author><first>Uwe</first><last>Reichel</last><affiliation>Hungarian Research Centre for Linguistics</affiliation></author>
+      <author id="uwe-reichel"><first>Uwe</first><last>Reichel</last><affiliation>Hungarian Research Centre for Linguistics</affiliation></author>
       <author><first>Anna</first><last>Jouravel</last></author>
       <author><first>Achim</first><last>Rabus</last><affiliation>University of Freiburg</affiliation></author>
       <author><first>Elena</first><last>Renje</last><affiliation>University of Freiburg</affiliation></author>
@@ -84,7 +84,7 @@
     <paper id="6">
       <title>Political dogwhistles and community divergence in semantic change</title>
       <author><first>Max</first><last>Boholm</last><affiliation>University of Gothenburg</affiliation></author>
-      <author><first>Asad</first><last>Sayeed</last><affiliation>University of Gothenburg</affiliation></author>
+      <author id="asad-sayeed"><first>Asad</first><last>Sayeed</last><affiliation>University of Gothenburg</affiliation></author>
       <pages>53-65</pages>
       <abstract>We test whether the development of political dogwhistles can be observed using language change measures; specifically, does the development of a “hidden” message in a dogwhistle show up as differences in semantic change between communities over time? We take Swedish-language dogwhistles related to the on-going immigration debate and measure differences over time in their rate of semantic change between two Swedish-language community forums, Flashback and Familjeliv, the former representing an in-group for understanding the “hidden” meaning of the dogwhistles. We find that multiple measures are sensitive enough to detect differences over time, in that the meaning changes in Flashback over the relevant time period but not in Familjeliv. We also examine the sensitivity of multiple modeling approaches to semantic change in the matter of community divergence.</abstract>
       <url hash="99496059">2023.lchange-1.6</url>
@@ -143,7 +143,7 @@
       <author><first>Sanne</first><last>Hoeken</last><affiliation>Universität Bielefeld</affiliation></author>
       <author><first>Sophie</first><last>Spliethoff</last><affiliation>Universität Bielefeld</affiliation></author>
       <author><first>Silke</first><last>Schwandt</last><affiliation>Universität Bielefeld</affiliation></author>
-      <author><first>Sina</first><last>Zarrieß</last><affiliation>Bielefeld University</affiliation></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last><affiliation>Bielefeld University</affiliation></author>
       <author><first>Özge</first><last>Alacam</last><affiliation>Bielefeld University</affiliation></author>
       <pages>100-111</pages>
       <abstract>The investigation of lexical change has predominantly focused on generic language evolution, not suited for detecting shifts in a particular domain, such as hate speech. Our study introduces the task of identifying changes in lexical semantics related to hate speech within historical texts. We present an interdisciplinary approach that brings together NLP and History, yielding a pilot dataset comprising 16th-century Early Modern English religious writings during the Protestant Reformation. We provide annotations for both semantic shifts and hatefulness on this data and, thereby, combine the tasks of Lexical Semantic Change Detection and Hate Speech Detection. Our framework and resulting dataset facilitate the evaluation of our applied methods, advancing the analysis of hate speech evolution.</abstract>
@@ -180,7 +180,7 @@
       <author><first>Anna</first><last>Cai</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Ting</first><last>Chen</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Annie</first><last>Zhang</last><affiliation>Carnegie Mellon University</affiliation></author>
-      <author><first>David</first><last>Mortensen</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="david-r-mortensen"><first>David</first><last>Mortensen</last><affiliation>Carnegie Mellon University</affiliation></author>
       <pages>129-142</pages>
       <abstract>We describe a set of new methods to partially automate linguistic phylogenetic inference given (1) cognate sets with their respective protoforms and sound laws, (2) a mapping from phones to their articulatory features and (3) a typological database of sound changes.We train a neural network on these sound change data to weight articulatory distances between phones and predict intermediate sound change steps between historical protoforms and their modern descendants, replacing a linguistic expert in part of a parsimony-based phylogenetic inference algorithm. In our best experiments on Tukanoan languages, this method produces trees with a Generalized Quartet Distance of 0.12 from a tree that used expert annotations, a significant improvement over other semi-automated baselines. We discuss potential benefits and drawbacks to our neural approach and parsimony-based tree prediction. We also experiment with a minimal generalization learner for automatic sound law induction, finding it less effective than sound laws from expert annotation. Our code is publicly available.</abstract>
       <url hash="6f89995b">2023.lchange-1.14</url>
diff --git a/data/xml/2023.ldk.xml b/data/xml/2023.ldk.xml
index 39a9452654..af04404d3d 100644
--- a/data/xml/2023.ldk.xml
+++ b/data/xml/2023.ldk.xml
@@ -49,7 +49,7 @@
     </paper>
     <paper id="5">
       <title>Leveraging <fixed-case>DB</fixed-case>nary Data to Enrich Information of Multiword Terms in <fixed-case>W</fixed-case>iktionary</title>
-      <author><first>Gilles</first><last>Sérasset</last></author>
+      <author id="gilles-serasset"><first>Gilles</first><last>Sérasset</last></author>
       <author><first>Thierry</first><last>Declerck</last></author>
       <author><first>Lenka</first><last>Bajčetić</last></author>
       <pages>49-60</pages>
@@ -66,7 +66,7 @@
     <paper id="7">
       <title>Linking the Computational Historical Semantics corpus to the <fixed-case>L</fixed-case>i<fixed-case>L</fixed-case>a Knowledge Base of Interoperable Linguistic Resources for <fixed-case>L</fixed-case>atin</title>
       <author><first>Giulia</first><last>Pedonese</last></author>
-      <author><first>Flavio Massimiliano</first><last>Cecchini</last></author>
+      <author id="flavio-massimiliano-cecchini"><first>Flavio Massimiliano</first><last>Cecchini</last></author>
       <author><first>Marco Carlo</first><last>Passarotti</last></author>
       <pages>74-85</pages>
       <url hash="fd26397f">2023.ldk-1.7</url>
@@ -78,7 +78,7 @@
       <author><first>Pierluigi</first><last>Cassotti</last></author>
       <author><first>Davide</first><last>Di Pierro</last></author>
       <author><first>Paola</first><last>Marongiu</last></author>
-      <author><first>Anas Fahad</first><last>Khan</last></author>
+      <author id="fahad-khan"><first>Anas Fahad</first><last>Khan</last></author>
       <author><first>Stefano</first><last>Ferilli</last></author>
       <author><first>Pierpaolo</first><last>Basile</last></author>
       <pages>86-96</pages>
@@ -89,7 +89,7 @@
       <title>Contextual Profiling of Charged Terms in Historical Newspapers</title>
       <author><first>Ryan</first><last>Brate</last></author>
       <author><first>Marieke</first><last>Van Erp</last></author>
-      <author><first>Antal</first><last>Van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>Van den Bosch</last></author>
       <pages>97-108</pages>
       <url hash="cbc51666">2023.ldk-1.9</url>
       <bibkey>brate-etal-2023-contextual</bibkey>
@@ -99,7 +99,7 @@
       <author><first>Adrian</first><last>Doyle</last></author>
       <author><first>Theodorus</first><last>Fransen</last></author>
       <author><first>Bernardo</first><last>Stearns</last></author>
-      <author><first>John P.</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></author>
       <author><first>Oksana</first><last>Dereza</last></author>
       <author><first>Priya</first><last>Rani</last></author>
       <pages>109-120</pages>
@@ -116,7 +116,7 @@
     <paper id="12">
       <title>Multimodal Offensive Meme Classification with Natural Language Inference</title>
       <author><first>Shardul</first><last>Suryawanshi</last></author>
-      <author><first>Mihael</first><last>Arcan</last></author>
+      <author id="mihael-arcan"><first>Mihael</first><last>Arcan</last></author>
       <author><first>Suzanne</first><last>Little</last></author>
       <author><first>Paul</first><last>Buitelaar</last></author>
       <pages>134-145</pages>
@@ -149,7 +149,7 @@
     </paper>
     <paper id="16">
       <title>Towards <fixed-case>ELT</fixed-case>e<fixed-case>C</fixed-case>-<fixed-case>LLOD</fixed-case>: <fixed-case>E</fixed-case>uropean Literary Text Collection Linguistic Linked Open Data</title>
-      <author><first>Ranka</first><last>Stanković</last></author>
+      <author id="ranka-stankovic"><first>Ranka</first><last>Stanković</last></author>
       <author><first>Christian</first><last>Chiarcos</last></author>
       <author><first>Miloš</first><last>Utvić</last></author>
       <author><first>Olivera</first><last>Kitanović</last></author>
@@ -169,7 +169,7 @@
       <title><fixed-case>L</fixed-case>ex<fixed-case>E</fixed-case>x<fixed-case>M</fixed-case>achina<fixed-case>QA</fixed-case>: A framework for the automatic induction ofontology lexica for Question Answering over Linked Data</title>
       <author><first>Mohammad Fazleh</first><last>Elahi</last></author>
       <author><first>Basil</first><last>Ell</last></author>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <pages>207-218</pages>
       <url hash="2e9ca9e7">2023.ldk-1.18</url>
       <bibkey>elahi-etal-2023-lexexmachinaqa</bibkey>
@@ -211,7 +211,7 @@
       <title><fixed-case>MG</fixed-case>2<fixed-case>P</fixed-case>: An Empirical Study Of Multilingual Training for <fixed-case>M</fixed-case>anx <fixed-case>G</fixed-case>2<fixed-case>P</fixed-case></title>
       <author><first>Shubhanker</first><last>Banerjee</last></author>
       <author><first>Bharathi Raja</first><last>Chakravarthi</last></author>
-      <author><first>John P.</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></author>
       <pages>246-255</pages>
       <url hash="88c5e4fb">2023.ldk-1.23</url>
       <bibkey>banerjee-etal-2023-mg2p</bibkey>
@@ -296,8 +296,8 @@
     <paper id="31">
       <title><fixed-case>ISO</fixed-case> <fixed-case>LMF</fixed-case> 24613-6: A Revised Syntax Semantics Module for the Lexical Markup Framework</title>
       <author><first>Francesca</first><last>Frontini</last></author>
-      <author><first>Laurent</first><last>Romary</last></author>
-      <author><first>Anas Fahad</first><last>Khan</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
+      <author id="fahad-khan"><first>Anas Fahad</first><last>Khan</last></author>
       <pages>316-321</pages>
       <url hash="c247b260">2023.ldk-1.31</url>
       <bibkey>frontini-etal-2023-iso</bibkey>
@@ -321,7 +321,7 @@
     </paper>
     <paper id="34">
       <title>Knowledge Storage Ecosystem: an Open Source Tool for <fixed-case>NLP</fixed-case> Results Management (Documents and Semantic Information)</title>
-      <author><first>Julian</first><last>Moreno-Schneider</last></author>
+      <author id="julian-moreno-schneider"><first>Julian</first><last>Moreno-Schneider</last></author>
       <author><first>Maria Gonzalez</first><last>Garcia</last></author>
       <author><first>Georg</first><last>Rehm</last></author>
       <pages>334-339</pages>
@@ -333,7 +333,7 @@
       <author><first>Florentina</first><last>Armaselu</last></author>
       <author><first>Elena-Simona</first><last>Apostol</last></author>
       <author><first>Christian</first><last>Chiarcos</last></author>
-      <author><first>Anas Fahad</first><last>Khan</last></author>
+      <author id="fahad-khan"><first>Anas Fahad</first><last>Khan</last></author>
       <author><first>Chaya</first><last>Liebeskind</last></author>
       <author><first>Barbara</first><last>McGillivray</last></author>
       <author><first>Ciprian-Octavian</first><last>Truica</last></author>
@@ -347,7 +347,7 @@
       <title>Adopting Linguistic Linked Data Principles: Insights on Users’ Experience</title>
       <author><first>Verginica</first><last>Mititelu</last></author>
       <author><first>Maria Pia</first><last>Di Buono</last></author>
-      <author><first>Hugo Gonçalo</first><last>Oliveira</last></author>
+      <author id="hugo-goncalo-oliveira"><first>Hugo Gonçalo</first><last>Oliveira</last></author>
       <author><first>Blerina</first><last>Spahiu</last></author>
       <author><first>Giedrė</first><last>Valūnaitė-Oleškevičienė</last></author>
       <pages>347-357</pages>
@@ -356,7 +356,7 @@
     </paper>
     <paper id="37">
       <title><fixed-case>GPT</fixed-case>3 as a <fixed-case>P</fixed-case>ortuguese Lexical Knowledge Base?</title>
-      <author><first>Hugo Gonçalo</first><last>Oliveira</last></author>
+      <author id="hugo-goncalo-oliveira"><first>Hugo Gonçalo</first><last>Oliveira</last></author>
       <author><first>Ricardo</first><last>Rodrigues</last></author>
       <pages>358-363</pages>
       <url hash="89cea78a">2023.ldk-1.37</url>
@@ -377,7 +377,7 @@
     <paper id="39">
       <title><fixed-case>CURED</fixed-case>4<fixed-case>NLG</fixed-case>: A Dataset for Table-to-Text Generation</title>
       <author><first>Nivranshu</first><last>Pasricha</last></author>
-      <author><first>Mihael</first><last>Arcan</last></author>
+      <author id="mihael-arcan"><first>Mihael</first><last>Arcan</last></author>
       <author><first>Paul</first><last>Buitelaar</last></author>
       <pages>374-384</pages>
       <url hash="b795dac9">2023.ldk-1.39</url>
@@ -386,7 +386,7 @@
     <paper id="40">
       <title>Beyond Concatenative Morphology: Applying <fixed-case>O</fixed-case>nto<fixed-case>L</fixed-case>ex-Morph to <fixed-case>M</fixed-case>altese</title>
       <author><first>Maxim</first><last>Ionov</last></author>
-      <author><first>Mike</first><last>Rosner</last></author>
+      <author id="michael-rosner"><first>Mike</first><last>Rosner</last></author>
       <pages>385-391</pages>
       <url hash="eafa03f9">2023.ldk-1.40</url>
       <bibkey>ionov-rosner-2023-beyond</bibkey>
@@ -402,7 +402,7 @@
     </paper>
     <paper id="42">
       <title>Validation of the Bigger Analogy Test Set Translation into <fixed-case>C</fixed-case>roatian, <fixed-case>L</fixed-case>ithuanian and <fixed-case>S</fixed-case>lovak</title>
-      <author><first>Radovan</first><last>Garabík</last></author>
+      <author id="radovan-garabik"><first>Radovan</first><last>Garabík</last></author>
       <author><first>Ana</first><last>Ostroški Anić</last></author>
       <author><first>Sigita</first><last>Rackevičienė</last></author>
       <author><first>Giedrė</first><last>Valūnaitė-Oleškevičienė</last></author>
@@ -419,8 +419,8 @@
       <author><first>Chaya</first><last>Liebeskind</last></author>
       <author><first>Giedrė Valūnaitė</first><last>Oleškevičienė</last></author>
       <author><first>Andrius</first><last>Utka</last></author>
-      <author><first>Daniela</first><last>Gifu</last></author>
-      <author><first>Anas Fahad</first><last>Khan</last></author>
+      <author id="daniela-gifu"><first>Daniela</first><last>Gifu</last></author>
+      <author id="fahad-khan"><first>Anas Fahad</first><last>Khan</last></author>
       <author><first>Elena-Simona</first><last>Apostol</last></author>
       <author><first>Ciprian-Octavian</first><last>Truica</last></author>
       <pages>410-416</pages>
@@ -430,7 +430,7 @@
     <paper id="44">
       <title><fixed-case>DB</fixed-case>nary2<fixed-case>V</fixed-case>ec: Preliminary Study on Lexical Embeddings for Downstream <fixed-case>NLP</fixed-case> Tasks</title>
       <author><first>Nakanyseth</first><last>Vuth</last></author>
-      <author><first>Gilles</first><last>Sérasset</last></author>
+      <author id="gilles-serasset"><first>Gilles</first><last>Sérasset</last></author>
       <pages>417-427</pages>
       <url hash="dfa3a91b">2023.ldk-1.44</url>
       <bibkey>vuth-serasset-2023-dbnary2vec</bibkey>
@@ -480,7 +480,7 @@
       <author><first>Pin-Er</first><last>Chen</last></author>
       <author><first>Hsin-Yu</first><last>Chou</last></author>
       <author><first>Yu-Hsiang</first><last>Tseng</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <pages>455-465</pages>
       <url hash="49c87350">2023.ldk-1.49</url>
       <bibkey>wang-etal-2023-lexical</bibkey>
@@ -496,7 +496,7 @@
     <paper id="51">
       <title><fixed-case>DRIPPS</fixed-case>: a Corpus with Discourse Relations in Perfect Participial Sentences</title>
       <author><first>Purificação</first><last>Silvano</last></author>
-      <author><first>João</first><last>Cordeiro</last></author>
+      <author id="joao-paulo-cordeiro"><first>João</first><last>Cordeiro</last></author>
       <author><first>António</first><last>Leal</last></author>
       <author><first>Sebastião</first><last>Pais</last></author>
       <pages>470-481</pages>
@@ -653,7 +653,7 @@
     </paper>
     <paper id="67">
       <title><fixed-case>SLIWC</fixed-case>, Morality, <fixed-case>N</fixed-case>arr<fixed-case>O</fixed-case>nt and Senpy Annotations: four vocabularies to fight radicalization</title>
-      <author><first>J. Fernando</first><last>Sánchez-Rada</last></author>
+      <author id="j-fernando-sanchez-rada"><first>J. Fernando</first><last>Sánchez-Rada</last></author>
       <author><first>Oscar</first><last>Araque</last></author>
       <author><first>Guillermo</first><last>García-Grao</last></author>
       <author><first>Carlos Á.</first><last>Iglesias</last></author>
@@ -672,7 +672,7 @@
     <paper id="69">
       <title>Football terminology: compilation and transformation into <fixed-case>O</fixed-case>nto<fixed-case>L</fixed-case>ex-Lemon resource</title>
       <author><first>Jelena</first><last>Lazarević</last></author>
-      <author><first>Ranka</first><last>Stanković</last></author>
+      <author id="ranka-stankovic"><first>Ranka</first><last>Stanković</last></author>
       <author><first>Mihailo</first><last>Škorić</last></author>
       <author><first>Biljana</first><last>Rujević</last></author>
       <pages>634-645</pages>
@@ -682,7 +682,7 @@
     <paper id="70">
       <title>The Importance of Being Interoperable: Theoretical and Practical Implications in Converting <fixed-case>TBX</fixed-case> to <fixed-case>O</fixed-case>nto<fixed-case>L</fixed-case>ex-Lemon</title>
       <author><first>Andrea</first><last>Bellandi</last></author>
-      <author><first>Giorgio Maria</first><last>Di Nunzio</last></author>
+      <author id="giorgio-maria-di-nunzio"><first>Giorgio Maria</first><last>Di Nunzio</last></author>
       <author><first>Silvia</first><last>Piccini</last></author>
       <author><first>Federica</first><last>Vezzani</last></author>
       <pages>646-651</pages>
diff --git a/data/xml/2023.limo.xml b/data/xml/2023.limo.xml
index cf7d64eba2..42831a2c35 100644
--- a/data/xml/2023.limo.xml
+++ b/data/xml/2023.limo.xml
@@ -36,7 +36,7 @@
       <author><first>Daniel</first><last>Djahangir</last></author>
       <author><first>Seid</first><last>Muhie Yimam</last></author>
       <author><first>Steffen</first><last>Remus</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>6–11</pages>
       <url hash="902b5c29">2023.limo-1.2</url>
       <bibkey>geislinger-etal-2023-multi</bibkey>
diff --git a/data/xml/2023.loresmt.xml b/data/xml/2023.loresmt.xml
index 3b13091678..2070bbf1f6 100644
--- a/data/xml/2023.loresmt.xml
+++ b/data/xml/2023.loresmt.xml
@@ -3,12 +3,12 @@
   <volume id="1" ingest-date="2023-04-29" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Sixth Workshop on Technologies for Machine Translation of Low-Resource Languages (LoResMT 2023)</booktitle>
-      <editor><first>Atul Kr.</first><last>Ojha</last></editor>
+      <editor id="atul-kr-ojha"><first>Atul Kr.</first><last>Ojha</last></editor>
       <editor><first>Chao-hong</first><last>Liu</last></editor>
       <editor><first>Ekaterina</first><last>Vylomova</last></editor>
       <editor><first>Flammie</first><last>Pirinen</last></editor>
       <editor><first>Jade</first><last>Abbott</last></editor>
-      <editor><first>Jonathan</first><last>Washington</last></editor>
+      <editor id="jonathan-washington"><first>Jonathan</first><last>Washington</last></editor>
       <editor><first>Nathaniel</first><last>Oco</last></editor>
       <editor><first>Valentin</first><last>Malykh</last></editor>
       <editor><first>Varvara</first><last>Logacheva</last></editor>
@@ -28,7 +28,7 @@
       <title>Train Global, Tailor Local: Minimalist Multilingual Translation into Endangered Languages</title>
       <author><first>Zhong</first><last>Zhou</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Jan</first><last>Niehues</last><affiliation>Karlsruhe Institut of Technology</affiliation></author>
-      <author><first>Alexander</first><last>Waibel</last><affiliation>Carnegie Mellon</affiliation></author>
+      <author id="alex-waibel"><first>Alexander</first><last>Waibel</last><affiliation>Carnegie Mellon</affiliation></author>
       <pages>1-15</pages>
       <abstract>In many humanitarian scenarios, translation into severely low resource languages often does not require a universal translation engine, but a dedicated text-specific translation engine. For example, healthcare records, hygienic procedures, government communication, emergency procedures and religious texts are all limited texts. While generic translation engines for all languages do not exist, translation of multilingually known limited texts into new, endangered languages may be possible and reduce human translation effort. We attempt to leverage translation resources from rich resource languages to efficiently produce best possible translation quality for well known texts, which is available in multiple languages, in a new, severely low resource language. We examine two approaches: 1.) best selection of seed sentences to jump start translations in a new language in view of best generalization to the remainder of a larger targeted text(s), and 2.) we adapt large general multilingual translation engines from many other languages to focus on a specific text in a new, unknown language. We find that adapting large pretrained multilingual models to the domain/text first and then to the severely low resource language works best. If we also select a best set of seed sentences, we can improve average chrF performance on new test languages from a baseline of 21.9 to 50.7, while reducing the number of seed sentences to only ∼1,000 in the new, unknown language.</abstract>
       <url hash="e1d2c61e">2023.loresmt-1.1</url>
@@ -65,10 +65,10 @@
     </paper>
     <paper id="4">
       <title>A Simplified Training Pipeline for Low-Resource and Unsupervised Machine Translation</title>
-      <author><first>Àlex R.</first><last>Atrio</last><affiliation>Heig-vd / Hes-so &amp; Epfl</affiliation></author>
+      <author id="alex-r-atrio"><first>Àlex R.</first><last>Atrio</last><affiliation>Heig-vd / Hes-so &amp; Epfl</affiliation></author>
       <author><first>Alexis</first><last>Allemann</last><affiliation>Heig-vd / Hes-so</affiliation></author>
       <author><first>Ljiljana</first><last>Dolamic</last><affiliation>armasuisse S&amp;T</affiliation></author>
-      <author><first>Andrei</first><last>Popescu-Belis</last><affiliation>Heig-vd / Hes-so</affiliation></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last><affiliation>Heig-vd / Hes-so</affiliation></author>
       <pages>47-58</pages>
       <url hash="9daf6f42">2023.loresmt-1.4</url>
       <bibkey>atrio-etal-2023-simplified</bibkey>
@@ -80,7 +80,7 @@
       <title>Language-Family Adapters for Low-Resource Multilingual Neural Machine Translation</title>
       <author><first>Alexandra</first><last>Chronopoulou</last><affiliation>LMU Munich</affiliation></author>
       <author><first>Dario</first><last>Stojanovski</last><affiliation>Microsoft</affiliation></author>
-      <author><first>Alexander</first><last>Fraser</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <pages>59-72</pages>
       <abstract>Large multilingual models trained with self-supervision achieve state-of-the-art results in a wide range of natural language processing tasks. Self-supervised pretrained models are often fine-tuned on parallel data from one or multiple language pairs for machine translation. Multilingual fine-tuning improves performance on low-resource languages but requires modifying the entire model and can be prohibitively expensive. Training a new adapter on each language pair or training a single adapter on all language pairs without updating the pretrained model has been proposed as a parameter-efficient alternative. However, the former does not permit any sharing between languages, while the latter shares parameters for all languages and is susceptible to negative interference. In this paper, we propose training language-family adapters on top of mBART-50 to facilitate cross-lingual transfer. Our approach outperforms related baselines, yielding higher translation scores on average when translating from English to 17 different low-resource languages. We also show that language-family adapters provide an effective method to translate to languages unseen during pretraining.</abstract>
       <url hash="a847d0f5">2023.loresmt-1.5</url>
@@ -156,7 +156,7 @@
     <paper id="10">
       <title>Evaluating Sentence Alignment Methods in a Low-Resource Setting: An <fixed-case>E</fixed-case>nglish-<fixed-case>Y</fixed-case>orù<fixed-case>B</fixed-case>á Study Case</title>
       <author><first>Edoardo</first><last>Signoroni</last><affiliation>NLP Centre, Faculty of Informatics, Masaryk University</affiliation></author>
-      <author><first>Pavel</first><last>Rychlý</last><affiliation>NLP Centre, Faculty of Informatics, Masaryk University</affiliation></author>
+      <author id="pavel-rychly"><first>Pavel</first><last>Rychlý</last><affiliation>NLP Centre, Faculty of Informatics, Masaryk University</affiliation></author>
       <pages>123-129</pages>
       <abstract>Parallel corpora are still crucial to train effective Machine Translation systems. This is even more true for low-resource language pairs, for which Neural Machine Translation has been shown to be less robust to domain mismatch and noise. Due to time and resource constraints, parallel corpora are mostly created with sentence alignment methods which automatically infer alignments. Recent work focused on state-of-the-art pre-trained sentence embeddings-based methods which are available only for a tiny fraction of the world’s languages. In this paper, we evaluate the performance of four widely used algorithms on the low-resource English-Yorùbá language pair against a multidomain benchmark parallel corpus on two experiments involving 1-to-1 alignments with and without reordering. We find that, at least for this language pair, earlier and simpler methods are more suited to the task, all the while not requiring additional data or resources. We also report that the methods we evaluated perform differently across distinct domains, thus indicating that some approach may be better for a specific domain or textual structure.</abstract>
       <url hash="fccd5197">2023.loresmt-1.10</url>
diff --git a/data/xml/2023.ltedi.xml b/data/xml/2023.ltedi.xml
index 8f1e9efd41..d5d3eea617 100644
--- a/data/xml/2023.ltedi.xml
+++ b/data/xml/2023.ltedi.xml
@@ -25,7 +25,7 @@
       <author><first>Nerses</first><last>Yuzbashyan</last></author>
       <author><first>Nikolay</first><last>Banar</last></author>
       <author><first>Ilia</first><last>Markov</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>1–9</pages>
       <abstract>Conventional techniques for detecting online hate speech rely on the availability of a sufficient number of annotated instances, which can be costly and time consuming. For this reason, zero-shot or few-shot detection can offer an attractive alternative. In this paper, we explore a zero-shot detection approach based on natural language inference (NLI) models. Since the performance of the models in this approach depends heavily on the choice of a hypothesis, our goal is to determine which factors affect the quality of detection. We conducted a set of experiments with three NLI models and four hate speech datasets. We demonstrate that a zero-shot NLI-based approach is competitive with approaches that require supervised learning, yet they are highly sensitive to the choice of hypothesis. In addition, our experiments indicate that the results for a set of hypotheses on different model-data pairs are positively correlated, and that the correlation is higher for different datasets when using the same model than it is for different models when using the same dataset. These results suggest that if we find a hypothesis that works well for a specific model and domain or for a specific type of hate speech, we can use that hypothesis with the same model also within a different domain. While, another model might require different suitable hypotheses in order to demonstrate high performance.</abstract>
       <url hash="b4f59943">2023.ltedi-1.1</url>
@@ -34,7 +34,7 @@
     <paper id="2">
       <title><fixed-case>E</fixed-case>nglish2<fixed-case>BSL</fixed-case>: A Rule-Based System for Translating <fixed-case>E</fixed-case>nglish into <fixed-case>B</fixed-case>ritish <fixed-case>S</fixed-case>ign <fixed-case>L</fixed-case>anguage</title>
       <author><first>Phoebe Alexandra</first><last>Pinney</last></author>
-      <author><first>Riza</first><last>Batista-Navarro</last></author>
+      <author id="riza-theresa-batista-navarro"><first>Riza</first><last>Batista-Navarro</last></author>
       <pages>10–16</pages>
       <abstract>British Sign Language (BSL) is a complex language with its own vocabulary and grammatical structure, separate from English. Despite its long-standing and widespread use by Deaf communities within the UK, thus far, there have been no effective tools for translating written English into BSL. This overt lack of available resources made learning the language highly inaccessible for most people, exacerbating the communication barrier between hearing and Deaf individuals. This paper introduces a rule-based translation system, designed with the ambitious aim of creating the first web application that is not only able to translate sentences in written English into a BSL video output, but can also serve as a learning aid to empower the development of BSL proficiency.</abstract>
       <url hash="e8757b3f">2023.ltedi-1.2</url>
@@ -42,7 +42,7 @@
     </paper>
     <paper id="3">
       <title>Multilingual Models for Sentiment and Abusive Language Detection for <fixed-case>D</fixed-case>ravidian Languages</title>
-      <author><first>Anand Kumar</first><last>M</last></author>
+      <author id="anand-kumar-m"><first>Anand Kumar</first><last>M</last></author>
       <pages>17–24</pages>
       <abstract>This paper presents the TFIDF based LSTM and Hierarchical Attention Networks (HAN) for code-mixed abusive comment detection and sentiment analysis for Dravidian languages. The traditional TF-IDF-based techniques have out- performed the Hierarchical Attention models in both the sentiment analysis and abusive language detection tasks. The Tulu sentiment analysis system demonstrated better performance for the Positive and Neutral classes, whereas the Tamil sentiment analysis system exhibited lower performance overall. This highlights the need for more balanced datasets and additional research to enhance the accuracy of sentiment analysis in the Tamil language. In terms of abusive language detection, the TF-IDF-LSTM models generally outperformed the Hierarchical Attention models. However, the mixed models displayed better performance for specific classes such as “Homophobia” and “Xenophobia.” This implies that considering both code-mixed and original script data can offer a different perspective for research in social media analysis.</abstract>
       <url hash="d91a3835">2023.ltedi-1.3</url>
@@ -81,7 +81,7 @@
       <author><first>Rahul</first><last>Ponnusamy</last></author>
       <author><first>Malliga</first><last>S</last></author>
       <author><first>Paul</first><last>Buitelaar</last></author>
-      <author><first>Miguel Ángel</first><last>García-Cumbreras</last></author>
+      <author id="miguel-angel-garcia-cumbreras"><first>Miguel Ángel</first><last>García-Cumbreras</last></author>
       <author><first>Salud María</first><last>Jimenez-Zafra</last></author>
       <author><first>Jose Antonio</first><last>Garcia-Diaz</last></author>
       <author><first>Rafael</first><last>Valencia-Garcia</last></author>
@@ -96,13 +96,13 @@
       <author><first>Prasanna Kumar</first><last>Kumaresan</last></author>
       <author><first>Bharathi Raja</first><last>Chakravarthi</last></author>
       <author><first>Subalalitha</first><last>Cn</last></author>
-      <author><first>Miguel Ángel</first><last>García-Cumbreras</last></author>
+      <author id="miguel-angel-garcia-cumbreras"><first>Miguel Ángel</first><last>García-Cumbreras</last></author>
       <author><first>Salud María</first><last>Jiménez Zafra</last></author>
       <author><first>José Antonio</first><last>García-Díaz</last></author>
       <author><first>Rafael</first><last>Valencia-García</last></author>
       <author><first>Momchil</first><last>Hardalov</last></author>
       <author><first>Ivan</first><last>Koychev</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Daniel</first><last>García-Baena</last></author>
       <author><first>Kishore Kumar</first><last>Ponnusamy</last></author>
       <pages>47–53</pages>
@@ -122,7 +122,7 @@
     <paper id="9">
       <title>Evaluating the Impact of Stereotypes and Language Combinations on Gender Bias Occurrence in <fixed-case>NMT</fixed-case> Generic Systems</title>
       <author><first>Bertille</first><last>Triboulet</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <pages>62–70</pages>
       <abstract>Machine translation, and more specifically neural machine translation (NMT), have been proven to be subject to gender bias in recent years. Many studies have focused on evaluating and reducing this phenomenon, mainly through the analysis of occupational nouns’ translation for the same type of language combinations. In this paper, we reproduce a similar test set than in previous studies to investigate the influence of stereotypes and language combinations’ nature (formed with English, French and Italian) on gender bias occurrence in NMT. Similarly to previous studies, we confirm stereotypes as a major source of gender bias, especially in female contexts, while observing bias even in language combinations traditionally less examined.</abstract>
       <url hash="d25d1cea">2023.ltedi-1.9</url>
@@ -441,7 +441,7 @@
       <title>Interns@<fixed-case>LT</fixed-case>-<fixed-case>EDI</fixed-case> : Detecting Signs of Depression from Social Media Text</title>
       <author><first>Koushik</first><last>L</last></author>
       <author><first>Hariharan R.</first><last>L</last></author>
-      <author><first>Anand Kumar</first><last>M</last></author>
+      <author id="anand-kumar-m"><first>Anand Kumar</first><last>M</last></author>
       <pages>262–265</pages>
       <abstract>This submission presents our approach for depression detection in social media text. The methodology includes data collection, preprocessing - SMOTE, feature extraction/selection - TF-IDF and Glove, model development- SVM, CNN and Bi-LSTM, training, evaluation, optimisation, and validation. The proposed methodology aims to contribute to the accurate detection of depression.</abstract>
       <url hash="436e6a4d">2023.ltedi-1.40</url>
diff --git a/data/xml/2023.matching.xml b/data/xml/2023.matching.xml
index cb8322962a..40c721a7f5 100644
--- a/data/xml/2023.matching.xml
+++ b/data/xml/2023.matching.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the First Workshop on Matching From Unstructured and Structured Data (MATCHING 2023)</booktitle>
       <editor><first>Estevam</first><last>Hruschka</last></editor>
-      <editor><first>Tom</first><last>Mitchell</last></editor>
+      <editor id="tom-mitchell"><first>Tom</first><last>Mitchell</last></editor>
       <editor><first>Sajjadur</first><last>Rahman</last></editor>
       <editor><first>Dunja</first><last>Mladenić</last></editor>
       <editor><first>Marko</first><last>Grobelnik</last></editor>
@@ -22,7 +22,7 @@
     <paper id="1">
       <title>Text-To-<fixed-case>KG</fixed-case> Alignment: Comparing Current Methods on Classification Tasks</title>
       <author><first>Sondre</first><last>Wold</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <author><first>Erik</first><last>Velldal</last></author>
       <pages>1-13</pages>
       <abstract>In contrast to large text corpora, knowledge graphs (KG) provide dense and structured representations of factual information. This makes them attractive for systems that supplement or ground the knowledge found in pre-trained language models with an external knowledge source. This has especially been the case for classification tasks, where recent work has focused on creating pipeline models that retrieve information from KGs like ConceptNet as additional context. Many of these models consist of multiple components, and although they differ in the number and nature of these parts, they all have in common that for some given text query, they attempt to identify and retrieve a relevant subgraph from the KG. Due to the noise and idiosyncrasies often found in KGs, it is not known how current methods compare to a scenario where the aligned subgraph is completely relevant to the query. In this work, we try to bridge this knowledge gap by reviewing current approaches to text-to-KG alignment and evaluating them on two datasets where manually created graphs are available, providing insights into the effectiveness of current methods. We release our code for reproducibility.</abstract>
@@ -120,7 +120,7 @@
       <author><first>Michele</first><last>Merler</last></author>
       <author><first>Mihir</first><last>Choudhury</last></author>
       <author><first>Raju</first><last>Pavuluri</last></author>
-      <author><first>Munindar</first><last>Singh</last></author>
+      <author id="munindar-p-singh"><first>Munindar</first><last>Singh</last></author>
       <author><first>Maja</first><last>Vukovic</last></author>
       <pages>109-119</pages>
       <abstract>Entity standardization maps noisy mentions from free-form text to standard entities in a knowledge base. The unique challenge of this task relative to other entity-related tasks is the lack of surrounding context and numerous variations in the surface form of the mentions, especially when it comes to generalization across domains where labeled data is scarce. Previous research mostly focuses on developing models either heavily relying on context, or dedicated solely to a specific domain. In contrast, we propose CoSiNES, a generic and adaptable framework with Contrastive Siamese Network for Entity Standardization that effectively adapts a pretrained language model to capture the syntax and semantics of the entities in a new domain. We construct a new dataset in the technology domain, which contains 640 technical stack entities and 6,412 mentions collected from industrial content management systems. We demonstrate that CoSiNES yields higher accuracy and faster runtime than baselines derived from leading methods in this domain. CoSiNES also achieves competitive performance in four standard datasets from the chemistry, medicine, and biomedical domains, demonstrating its cross-domain applicability. Code and data is available at <url>https://github.com/konveyor/tackle-container-advisor/tree/main/entity_standardizer/cosines</url></abstract>
diff --git a/data/xml/2023.mmnlg.xml b/data/xml/2023.mmnlg.xml
index d32804d563..de206df788 100644
--- a/data/xml/2023.mmnlg.xml
+++ b/data/xml/2023.mmnlg.xml
@@ -44,8 +44,8 @@
     </paper>
     <paper id="3">
       <title>Keeping an Eye on Context: Attention Allocation over Input Partitions in Referring Expression Generation</title>
-      <author><first>Simeon</first><last>Schüz</last><affiliation>Bielefeld University</affiliation></author>
-      <author><first>Sina</first><last>Zarrieß</last><affiliation>University of Bielefeld</affiliation></author>
+      <author id="simeon-junker"><first>Simeon</first><last>Schüz</last><affiliation>Bielefeld University</affiliation></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last><affiliation>University of Bielefeld</affiliation></author>
       <pages>20-27</pages>
       <abstract>In Referring Expression Generation, model inputs are often composed of different representations, including the visual properties of the intended referent, its relative position and size, and the visual context. Yet, the extent to which this information influences the generation process of black-box neural models is largely unclear. We investigate the relative weighting of target, location, and context information in the attention components of a Transformer-based generation model. Our results show a general target bias, which, however, depends on the content of the generated expressions, pointing to interesting directions for future research.</abstract>
       <url hash="3ee65552">2023.mmnlg-1.3</url>
@@ -73,7 +73,7 @@
     <paper id="6">
       <title>The 2023 <fixed-case>W</fixed-case>eb<fixed-case>NLG</fixed-case> Shared Task on Low Resource Languages. Overview and Evaluation Results (<fixed-case>W</fixed-case>eb<fixed-case>NLG</fixed-case> 2023)</title>
       <author><first>Liam</first><last>Cripwell</last></author>
-      <author><first>Anya</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anya</first><last>Belz</last></author>
       <author><first>Claire</first><last>Gardent</last></author>
       <author><first>Albert</first><last>Gatt</last></author>
       <author><first>Claudia</first><last>Borg</last></author>
@@ -112,7 +112,7 @@
     <paper id="9">
       <title>Data-to-text Generation for Severely Under-Resourced Languages with <fixed-case>GPT</fixed-case>-3.5: A Bit of Help Needed from <fixed-case>G</fixed-case>oogle <fixed-case>T</fixed-case>ranslate (<fixed-case>W</fixed-case>eb<fixed-case>NLG</fixed-case> 2023)</title>
       <author><first>Michela</first><last>Lorandi</last><affiliation>Dublin City University</affiliation></author>
-      <author><first>Anya</first><last>Belz</last><affiliation>ADAPT Research Centre, Dublin City University</affiliation></author>
+      <author id="anja-belz"><first>Anya</first><last>Belz</last><affiliation>ADAPT Research Centre, Dublin City University</affiliation></author>
       <pages>80-86</pages>
       <abstract>LLMs are great at tasks involving English which dominates in their training data. We explore their ability to address tasks involving languages that are severely under-represented in their training data. More specifically, we do this in the context of data-to-text generation for Irish, Maltese, Welsh and Breton. During the prompt-engineering phase we tested GPT-3.5 and~4 with a range of prompt types and formats on a small sample of example input/output pairs. We then fully evaluated the two most promising prompts in two scenarios: (i) direct generation into the under-resourced languages, and (ii) generation into English followed by translation into the under-resourced languages. We find that few-shot prompting works better for direct generation into under-resourced languages, but that the difference disappears when pivoting via English. The few-shot + translation system variants were submitted to the WebNLG 2023 shared task where they outperformed all other systems by substantial margins in all languages on all automatic metrics. We conclude that good performance can be achieved with state-of-the-art LLMs out-of-the box for under-resourced languages. However, best results (for Welsh) of BLEU 25.12, ChrF++ 0.55, and TER 0.64 are well below the lowest ranked English system at WebNLG’20 with BLEU 0.391, ChrF++ 0.579, and TER 0.564.</abstract>
       <url hash="fd39696d">2023.mmnlg-1.9</url>
@@ -121,11 +121,11 @@
     <paper id="10">
       <title><fixed-case>DCU</fixed-case>/<fixed-case>TCD</fixed-case>-<fixed-case>FORG</fixed-case>e at <fixed-case>W</fixed-case>eb<fixed-case>NLG</fixed-case>’23: <fixed-case>I</fixed-case>rish rules! (<fixed-case>W</fixed-case>eg<fixed-case>NLG</fixed-case> 2023)</title>
       <author><first>Simon</first><last>Mille</last><affiliation>ADAPT Research Centre, Dublin City University</affiliation></author>
-      <author><first>Elaine</first><last>Uí Dhonnchadha</last><affiliation>Trinity College Dublin</affiliation></author>
+      <author id="elaine-ui-dhonnchadha"><first>Elaine</first><last>Uí Dhonnchadha</last><affiliation>Trinity College Dublin</affiliation></author>
       <author><first>Stamatia</first><last>Dasiopoulou</last><affiliation>NTT DATA</affiliation></author>
       <author><first>Lauren</first><last>Cassidy</last><affiliation>ADAPT Research Centre, Dublin City University</affiliation></author>
       <author><first>Brian</first><last>Davis</last><affiliation>Dublin City University</affiliation></author>
-      <author><first>Anya</first><last>Belz</last><affiliation>ADAPT Research Centre, Dublin City University</affiliation></author>
+      <author id="anja-belz"><first>Anya</first><last>Belz</last><affiliation>ADAPT Research Centre, Dublin City University</affiliation></author>
       <pages>87-92</pages>
       <abstract>In this paper, we describe the submission of Dublin City University (DCU) and Trinity College Dublin (TCD) for the WebNLG 2023 shared task. We present a fully rule-based pipeline for generating Irish texts from DBpedia triple sets which comprises 4 components: triple lexicalisation, generation of noninflected Irish text, inflection generation, and post-processing.</abstract>
       <url hash="50975aa2">2023.mmnlg-1.10</url>
diff --git a/data/xml/2023.mrl.xml b/data/xml/2023.mrl.xml
index d4bf9b68d6..27f5a9259d 100644
--- a/data/xml/2023.mrl.xml
+++ b/data/xml/2023.mrl.xml
@@ -95,8 +95,8 @@
       <author><first>Viktor</first><last>Hangya</last></author>
       <author><first>Silvia</first><last>Severini</last></author>
       <author><first>Radoslav</first><last>Ralev</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>95-105</pages>
       <url hash="998df467">2023.mrl-1.8</url>
       <bibkey>hangya-etal-2023-multilingual</bibkey>
@@ -129,7 +129,7 @@
       <author><first>Bhushan</first><last>Kotnis</last></author>
       <author><first>Carolin</first><last>Lawrence</last></author>
       <author><first>Goran</first><last>Glavaš</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <pages>125-138</pages>
       <url hash="6e03acf6">2023.mrl-1.11</url>
       <bibkey>ma-etal-2023-improving-cross</bibkey>
@@ -151,7 +151,7 @@
       <author><first>Subhabrata</first><last>Mukherjee</last></author>
       <author><first>David P.</first><last>Woodruff</last></author>
       <author><first>Barnabas</first><last>Poczos</last></author>
-      <author><first>Hany</first><last>Hassan</last></author>
+      <author id="hany-hassan-awadalla"><first>Hany</first><last>Hassan</last></author>
       <pages>164-172</pages>
       <url hash="ef91a0d3">2023.mrl-1.13</url>
       <bibkey>pham-etal-2023-task</bibkey>
@@ -244,8 +244,8 @@
     <paper id="21">
       <title>Multi-<fixed-case>E</fixed-case>u<fixed-case>P</fixed-case>: The Multilingual <fixed-case>E</fixed-case>uropean Parliament Dataset for Analysis of Bias in Information Retrieval</title>
       <author><first>Jinrui</first><last>Yang</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>282-291</pages>
       <url hash="e4004255">2023.mrl-1.21</url>
       <bibkey>yang-etal-2023-multi-eup</bibkey>
@@ -262,7 +262,7 @@
     </paper>
     <paper id="23">
       <title><fixed-case>CUNI</fixed-case> Submission to <fixed-case>MRL</fixed-case> 2023 Shared Task on Multi-lingual Multi-task Information Retrieval</title>
-      <author><first>Jindřich</first><last>Helcl</last></author>
+      <author id="jindrich-helcl"><first>Jindřich</first><last>Helcl</last></author>
       <author><first>Jindřich</first><last>Libovický</last></author>
       <pages>302-309</pages>
       <url hash="55baf96e">2023.mrl-1.23</url>
@@ -272,7 +272,7 @@
     <paper id="24">
       <title>Findings of the 1st Shared Task on Multi-lingual Multi-task Information Retrieval at <fixed-case>MRL</fixed-case> 2023</title>
       <author><first>Francesco</first><last>Tinner</last></author>
-      <author><first>David Ifeoluwa</first><last>Adelani</last></author>
+      <author id="david-ifeoluwa-adelani"><first>David Ifeoluwa</first><last>Adelani</last></author>
       <author><first>Chris</first><last>Emezue</last></author>
       <author><first>Mammad</first><last>Hajili</last></author>
       <author><first>Omer</first><last>Goldman</last></author>
diff --git a/data/xml/2023.mtsummit.xml b/data/xml/2023.mtsummit.xml
index 751cef45a3..ca9f8eb900 100644
--- a/data/xml/2023.mtsummit.xml
+++ b/data/xml/2023.mtsummit.xml
@@ -41,7 +41,7 @@
       <author><first>Jyotsana</first><last>Khatri</last></author>
       <author><first>Rudra</first><last>Murthy</last></author>
       <author><first>Amar Prakash</first><last>Azad</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>26–34</pages>
       <abstract>In this paper, we compare two approaches to train a multilingual language model: (i) simple multilingual learning using data-mixing, and (ii) meta-learning. We examine the performance of these models by extending them to unseen language pairs and further finetune them for the task of unsupervised NMT. We perform several experiments with varying amounts of data and give a comparative analysis of the approaches. We observe that both approaches give a comparable performance, and meta-learning gives slightly better results in a few cases of low amounts of data. For Oriya-Punjabi language pair, meta-learning performs better than multilingual learning when using 2M, and 3M sentences.</abstract>
       <url hash="a3991172">2023.mtsummit-research.3</url>
@@ -102,7 +102,7 @@
       <author><first>Yujie</first><last>Zhang</last></author>
       <author><first>Yuanmeng</first><last>Chen</last></author>
       <author><first>Yufeng</first><last>Chen</last></author>
-      <author><first>Jinan</first><last>Xu</last></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last></author>
       <pages>99–110</pages>
       <abstract>Currently, multi-domain neural machine translation (NMT) has become a significant research topic in domain adaptation machine translation, which trains a single model by mixing data from multiple domains. Multi-domain NMT aims to improve the performance of the low-resources domain through data augmentation. However, mixed domain data brings more translation ambiguity. Previous work focused on domain-general or domain-context knowledge learning, respectively. Therefore, there is a challenge for acquiring domain-general or domain-context knowledge simultaneously. To this end, we propose a unified framework for learning simultaneously domain-general and domain-specific knowledge, we are the first to apply parameter differentiation in multi-domain NMT. Specifically, we design the differentiation criterion and differentiation granularity to obtain domain-specific parameters. Experimental results on multi-domain UM-corpus English-to-Chinese and OPUS German-to-English datasets show that the average BLEU scores of the proposed method exceed the strong baseline by 1.22 and 1.87, respectively. In addition, we investigate the case study to illustrate the effectiveness of the proposed method in acquiring domain knowledge.</abstract>
       <url hash="f2eedf3f">2023.mtsummit-research.9</url>
@@ -136,7 +136,7 @@
     <paper id="12">
       <title>Boosting Unsupervised Machine Translation with Pseudo-Parallel Data</title>
       <author><first>Ivana</first><last>Kvapilíková</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>135–147</pages>
       <abstract>Even with the latest developments in deep learning and large-scale language modeling, the task of machine translation (MT) of low-resource languages remains a challenge. Neural MT systems can be trained in an unsupervised way without any translation resources but the quality lags behind, especially in truly low-resource conditions. We propose a training strategy that relies on pseudo-parallel sentence pairs mined from monolingual corpora in addition to synthetic sentence pairs back-translated from monolingual corpora. We experiment with different training schedules and reach an improvement of up to 14.5 BLEU points (English to Ukrainian) over a baseline trained on back-translated data only.</abstract>
       <url hash="ffe713a1">2023.mtsummit-research.12</url>
@@ -145,7 +145,7 @@
     <paper id="13">
       <title>A Study on the Effectiveness of Large Language Models for Translation with Markup</title>
       <author><first>Raj</first><last>Dabre</last></author>
-      <author><first>Bianka</first><last>Buschbeck</last></author>
+      <author id="bianka-buschbeck"><first>Bianka</first><last>Buschbeck</last></author>
       <author><first>Miriam</first><last>Exel</last></author>
       <author><first>Hideki</first><last>Tanaka</last></author>
       <pages>148–159</pages>
@@ -177,7 +177,7 @@
       <title>Beyond Correlation: Making Sense of the Score Differences of New <fixed-case>MT</fixed-case> Evaluation Metrics</title>
       <author><first>Chi-kiu</first><last>Lo</last></author>
       <author><first>Rebecca</first><last>Knowles</last></author>
-      <author><first>Cyril</first><last>Goutte</last></author>
+      <author id="cyril-goutte"><first>Cyril</first><last>Goutte</last></author>
       <pages>186–199</pages>
       <abstract>While many new automatic metrics for machine translation evaluation have been proposed in recent years, BLEU scores are still used as the primary metric in the vast majority of MT research papers. There are many reasons that researchers may be reluctant to switch to new metrics, from external pressures (reviewers, prior work) to the ease of use of metric toolkits. Another reason is a lack of intuition about the meaning of novel metric scores. In this work, we examine “rules of thumb” about metric score differences and how they do (and do not) correspond to human judgments of statistically significant differences between systems. In particular, we show that common rules of thumb about BLEU score differences do not in fact guarantee that human annotators will find significant differences between systems. We also show ways in which these rules of thumb fail to generalize across translation directions or domains.</abstract>
       <url hash="3324c47a">2023.mtsummit-research.16</url>
@@ -187,7 +187,7 @@
       <title>Bad <fixed-case>MT</fixed-case> Systems are Good for Quality Estimation</title>
       <author><first>Iryna</first><last>Tryhubyshyn</last></author>
       <author><first>Aleš</first><last>Tamchyna</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>200–208</pages>
       <abstract>Quality estimation (QE) is the task of predicting quality of outputs produced by machine translation (MT) systems. Currently, the highest-performing QE systems are supervised and require training on data with golden quality scores. In this paper, we investigate the impact of the quality of the underlying MT outputs on the performance of QE systems. We find that QE models trained on datasets with lower-quality translations often outperform those trained on higher-quality data. We also demonstrate that good performance can be achieved by using a mix of data from different MT systems.</abstract>
       <url hash="cb530057">2023.mtsummit-research.17</url>
@@ -207,7 +207,7 @@
     <paper id="19">
       <title>Instance-Based Domain Adaptation for Improving Terminology Translation</title>
       <author><first>Prashanth</first><last>Nayak</last></author>
-      <author><first>John</first><last>Kelleher</last></author>
+      <author id="john-kelleher"><first>John</first><last>Kelleher</last></author>
       <author><first>Rejwanul</first><last>Haque</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <pages>222–234</pages>
@@ -227,7 +227,7 @@
     <paper id="21">
       <title>The Role of Compounds in Human vs. Machine Translation Quality</title>
       <author><first>Kristyna</first><last>Neumannova</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>248–260</pages>
       <abstract>We focus on the production of German compounds in English-to-German manual and automatic translation. On the example of WMT21 news translation test set, we observe that even the best MT systems produce much fewer compounds compared to three independent manual translations. Despite this striking difference, we observe that this insufficiency is not apparent in manual evaluation methods that target the overall translation quality (DA and MQM). Simple automatic methods like BLEU somewhat surprisingly provide a better indication of this quality aspect. Our manual analysis of system outputs, including our freshly trained Transformer models, confirms that current deep neural systems operating at the level of subword units are capable of constructing novel words, including novel compounds. This effect however cannot be measured using static dictionaries of compounds such as GermaNet. German compounds thus pose an interesting challenge for future development of MT systems.</abstract>
       <url hash="9ed2e602">2023.mtsummit-research.21</url>
@@ -257,7 +257,7 @@
     <paper id="24">
       <title>A Context-Aware Annotation Framework for Customer Support Live Chat Machine Translation</title>
       <author><first>Miguel</first><last>Menezes</last></author>
-      <author><first>M. Amin</first><last>Farajian</last></author>
+      <author id="m-amin-farajian"><first>M. Amin</first><last>Farajian</last></author>
       <author><first>Helena</first><last>Moniz</last></author>
       <author><first>João Varelas</first><last>Graça</last></author>
       <pages>286–297</pages>
@@ -269,7 +269,7 @@
       <title>Targeted Data Augmentation Improves Context-aware Neural Machine Translation</title>
       <author><first>Harritxu</first><last>Gete</last></author>
       <author><first>Thierry</first><last>Etchegoyhen</last></author>
-      <author><first>Gorka</first><last>Labaka</last></author>
+      <author id="gorka-labaka"><first>Gorka</first><last>Labaka</last></author>
       <pages>298–312</pages>
       <abstract>Progress in document-level Machine Translation is hindered by the lack of parallel training data that include context information. In this work, we evaluate the potential of data augmentation techniques to circumvent these limitations, showing that significant gains can be achieved via upsampling, similar context sampling and back-translations, targeted on context-relevant data. We apply these methods on standard document-level datasets in English-German and English-French and demonstrate their relevance to improve the translation of contextual phenomena. In particular, we show that relatively small volumes of targeted data augmentation lead to significant improvements over a strong context-concatenation baseline and standard back-translation of document-level data. We also compare the accuracy of the selected methods depending on data volumes or distance to relevant context information, and explore their use in combination.</abstract>
       <url hash="5fb87bb9">2023.mtsummit-research.25</url>
@@ -315,7 +315,7 @@
       <title>Pivot Translation for Zero-resource Language Pairs Based on a Multilingual Pretrained Model</title>
       <author><first>Kenji</first><last>Imamura</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>348–359</pages>
       <abstract>A multilingual translation model enables a single model to handle multiple languages. However, the translation qualities of unlearned language pairs (i.e., zero-shot translation qualities) are still poor. By contrast, pivot translation translates source texts into target ones via a pivot language such as English, thus enabling machine translation without parallel texts between the source and target languages. In this paper, we perform pivot translation using a multilingual model and compare it with direct translation. We improve the translation quality without using parallel texts of direct translation by fine-tuning the model with machine-translated pseudo-translations. We also discuss what type of parallel texts are suitable for effectively improving the translation quality in multilingual pivot translation.</abstract>
       <url hash="32ae51fc">2023.mtsummit-research.29</url>
@@ -324,7 +324,7 @@
     <paper id="30">
       <title>Character-level <fixed-case>NMT</fixed-case> and language similarity</title>
       <author><first>Josef</first><last>Jon</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>360–371</pages>
       <abstract>We explore the effectiveness of character-level neural machine translation using Transformer architecture for various levels of language similarity and size of the training dataset. We evaluate the models using automatic MT metrics and show that translation between similar languages benefits from character-level input segmentation, while for less related languages, character-level vanilla Transformer-base often lags behind subword-level segmentation. We confirm previous findings that it is possible to close the gap by finetuning the already trained subword-level models to character-level.</abstract>
       <url hash="5c932080">2023.mtsummit-research.30</url>
@@ -333,10 +333,10 @@
     <paper id="31">
       <title>Negative Lexical Constraints in Neural Machine Translation</title>
       <author><first>Josef</first><last>Jon</last></author>
-      <author><first>Dusan</first><last>Varis</last></author>
+      <author id="dusan-varis"><first>Dusan</first><last>Varis</last></author>
       <author><first>Michal</first><last>Novák</last></author>
       <author><first>João Paulo</first><last>Aires</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>372–384</pages>
       <abstract>This paper explores negative lexical constraining in English to Czech neural machine translation. Negative lexical constraining is used to prohibit certain words or expressions in the translation produced by the NMT model. We compared various methods based on modifying either the decoding process or the training data. The comparison was performed on two tasks: paraphrasing and feedback-based translation refinement. We also studied how the methods “evade” the constraints, meaning that the disallowed expression is still present in the output, but in a changed form, most interestingly the case where a different surface form (for example different inflection) is produced. We propose a way to mitigate the issue through training with stemmed negative constraints, so that the ability of the model to induce different forms of a word might be used to prohibit the usage of all possible forms of the constraint. This helps to some extent, but the problem still persists in many cases.</abstract>
       <url hash="2139cc61">2023.mtsummit-research.31</url>
@@ -345,9 +345,9 @@
     <paper id="32">
       <title>Post-editing of Technical Terms based on Bilingual Example Sentences</title>
       <author><first>Elsie K. Y.</first><last>Chan</last></author>
-      <author><first>John</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
       <author><first>Chester</first><last>Cheng</last></author>
-      <author><first>Benjamin</first><last>Tsou</last></author>
+      <author id="benjamin-k-tsou"><first>Benjamin</first><last>Tsou</last></author>
       <pages>385–392</pages>
       <abstract>As technical fields become ever more specialized, and with continuous emergence of novel technical terms, it may not be always possible to avail of bilingual experts in the field to perform translation. This paper investigates the performance of bilingual non-experts in Computer-Assisted Translation. The translators were asked to identify and correct errors in MT output of technical terms in patent materials, aided only by example bilingual sentences. Targeting English-to-Chinese translation, we automatically extract the example sentences from a bilingual corpus of English and Chinese patents. We identify the most frequent translation candidates of a term, and then select the most relevant example sentences for each candidate according to semantic similarity. Even when given only two example sentences for each translation candidate, the non-expert translators were able to post-edit effectively, correcting 67.2% of the MT errors while mistakenly revising correct MT output in only 17% of the cases.</abstract>
       <url hash="b293a412">2023.mtsummit-research.32</url>
@@ -357,7 +357,7 @@
       <title>A Filtering Approach to Object Region Detection in Multimodal Machine Translation</title>
       <author><first>Ali</first><last>Hatami</last></author>
       <author><first>Paul</first><last>Buitelaar</last></author>
-      <author><first>Mihael</first><last>Arcan</last></author>
+      <author id="mihael-arcan"><first>Mihael</first><last>Arcan</last></author>
       <pages>393–405</pages>
       <abstract>Recent studies in Multimodal Machine Translation (MMT) have explored the use of visual information in a multimodal setting to analyze its redundancy with textual information. The aim of this work is to develop a more effective approach to incorporating relevant visual information into the translation process and improve the overall performance of MMT models. This paper proposes an object-level filtering approach in Multimodal Machine Translation, where the approach is applied to object regions extracted from an image to filter out irrelevant objects based on the image captions to be translated. Using the filtered image helps the model to consider only relevant objects and their relative locations to each other. Different matching methods, including string matching and word embeddings, are employed to identify relevant objects. Gaussian blurring is used to soften irrelevant objects from the image and to evaluate the effect of object filtering on translation quality. The performance of the filtering approaches was evaluated on the Multi30K dataset in English to German, French, and Czech translations, based on BLEU, ChrF2, and TER metrics.</abstract>
       <url hash="e636f322">2023.mtsummit-research.33</url>
@@ -423,7 +423,7 @@
     <paper id="6">
       <title>Improving <fixed-case>S</fixed-case>tandard <fixed-case>G</fixed-case>erman Captioning of Spoken <fixed-case>S</fixed-case>wiss <fixed-case>G</fixed-case>erman: Evaluating Multilingual Pre-trained Models</title>
       <author><first>Jonathan David</first><last>Mutal</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Johanna</first><last>Gerlach</last></author>
       <author><first>Marianne</first><last>Starlander</last></author>
       <pages>65–76</pages>
@@ -500,7 +500,7 @@
     <paper id="12">
       <title>Exploring Multilingual Pretrained Machine Translation Models for Interactive Translation</title>
       <author><first>Angel</first><last>Navarro</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <pages>132–142</pages>
       <abstract>Pre-trained large language models (LLM) constitute very important tools in many artificial intelligence applications. In this work, we explore the use of these models in interactive machine translation environments. In particular, we have chosen mBART (multilingual Bidirectional and Auto-Regressive Transformer) as one of these LLMs. The system enables users to refine the translation output interactively by providing feedback. The system utilizes a two-step process, where the NMT (Neural Machine Translation) model generates a preliminary translation in the first step, and the user performs one correction in the second step–repeating the process until the sentence is correctly translated. We assessed the performance of both mBART and the fine-tuned version by comparing them to a state-of-the-art machine translation model on a benchmark dataset regarding user effort, WSR (Word Stroke Ratio), and MAR (Mouse Action Ratio). The experimental results indicate that all the models performed comparably, suggesting that mBART is a viable option for an interactive machine translation environment, as it eliminates the need to train a model from scratch for this particular task. The implications of this finding extend to the development of new machine translation models for interactive environments, as it indicates that novel pre-trained models exhibit state-of-the-art performance in this domain, highlighting the potential benefits of adapting these models to specific needs.</abstract>
       <url hash="bf8ef12f">2023.mtsummit-users.12</url>
@@ -592,8 +592,8 @@
     <paper id="20">
       <title>Comparing <fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish <fixed-case>MT</fixed-case> Performance Involving <fixed-case>C</fixed-case>hat<fixed-case>GPT</fixed-case> and <fixed-case>MT</fixed-case> Providers and the Efficacy of <fixed-case>AI</fixed-case> mediated Post-Editing</title>
       <author><first>Larry</first><last>Cady</last></author>
-      <author><first>Benjamin</first><last>Tsou</last></author>
-      <author><first>John</first><last>Lee</last></author>
+      <author id="benjamin-k-tsou"><first>Benjamin</first><last>Tsou</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
       <pages>205–216</pages>
       <abstract>The recent introduction of ChatGPT has caused much stir in the translation industry because of its impressive translation performance against leaders in the industry. We review some ma-jor issues based on the BLEU comparisons of Chinese-to-English (C2E) and English-to-Chinese (E2C) machine translation (MT) performance by ChatGPT against a range of leading MT providers in mostly technical domains. Based on sample aligned sentences from a sizable bilingual Chinese-English patent corpus and other sources, we find that while ChatGPT perform better generally, it does not consistently perform better than others in all areas or cases. We also draw on novice translators as post-editors to explore a major component in MT post-editing: Optimization of terminology. Many new technical words, including MWEs (Multi-Word Expressions), are problematic because they involve terminological developments which must balance between proper encapsulation of technical innovation and conforming to past traditions . Drawing on the above-mentioned corpus we have been developing an AI mediated MT post-editing (MTPE) system through the optimization of precedent rendition distribution and semantic association to enhance the work of translators and MTPE practitioners.</abstract>
       <url hash="ec6a2094">2023.mtsummit-users.20</url>
@@ -602,7 +602,7 @@
     <paper id="21">
       <title>Challenges of Human vs Machine Translation of Emotion-Loaded <fixed-case>C</fixed-case>hinese Microblog Texts</title>
       <author><first>Shenbin</first><last>Qian</last></author>
-      <author><first>Constantin</first><last>Orăsan</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orăsan</last></author>
       <author><first>Félix</first><last>do Carmo</last></author>
       <author><first>Diptesh</first><last>Kanojia</last></author>
       <pages>217–236</pages>
@@ -650,7 +650,7 @@
       <author><first>Ananya</first><last>Ganesh</last></author>
       <author><first>Marine</first><last>Carpuat</last></author>
       <author><first>William</first><last>Chen</last></author>
-      <author><first>Katharina</first><last>Kann</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
       <author><first>Constantine</first><last>Lignos</last></author>
       <author><first>John E.</first><last>Ortega</last></author>
       <author><first>Jonne</first><last>Saleva</last></author>
diff --git a/data/xml/2023.multi3generation.xml b/data/xml/2023.multi3generation.xml
index 9e398b6c74..df55aa3cd4 100644
--- a/data/xml/2023.multi3generation.xml
+++ b/data/xml/2023.multi3generation.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the 1st International Workshop on Multilingual, Multimodal and Multitask Language Generation</booktitle>
       <editor><first>Anabela</first><last>Barreiro</last></editor>
-      <editor><first>Max</first><last>Silberztein</last></editor>
+      <editor id="max-silberztein"><first>Max</first><last>Silberztein</last></editor>
       <editor><first>Elena</first><last>Lloret</last></editor>
       <editor><first>Marcin</first><last>Paprzycki</last></editor>
       <publisher>European Association for Machine Translation</publisher>
@@ -52,7 +52,7 @@
     <paper id="4">
       <title>Towards an Efficient Approach for Controllable Text Generation</title>
       <author><first>Iván</first><last>Martínez-Murillo</last></author>
-      <author><first>Paloma</first><last>Moreda</last></author>
+      <author id="paloma-moreda-pozo"><first>Paloma</first><last>Moreda</last></author>
       <author><first>Elena</first><last>Lloret</last></author>
       <pages>26–31</pages>
       <url hash="775c8a61">2023.multi3generation-1.4</url>
diff --git a/data/xml/2023.mwe.xml b/data/xml/2023.mwe.xml
index d5a0fd8f71..3aea0ad0a8 100644
--- a/data/xml/2023.mwe.xml
+++ b/data/xml/2023.mwe.xml
@@ -6,7 +6,7 @@
       <editor><first>Archna</first><last>Bhatia</last></editor>
       <editor><first>Kilian</first><last>Evang</last></editor>
       <editor><first>Marcos</first><last>Garcia</last></editor>
-      <editor><first>Voula</first><last>Giouli</last></editor>
+      <editor id="voula-giouli"><first>Voula</first><last>Giouli</last></editor>
       <editor><first>Lifeng</first><last>Han</last></editor>
       <editor><first>Shiva</first><last>Taslimipoor</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -34,7 +34,7 @@
     <paper id="4">
       <title><fixed-case>R</fixed-case>omanian Multiword Expression Detection Using Multilingual Adversarial Training and Lateral Inhibition</title>
       <author><first>Andrei</first><last>Avram</last><affiliation>University Politehnica of Bucharest</affiliation></author>
-      <author><first>Verginica</first><last>Barbu Mititelu</last><affiliation>Racai</affiliation></author>
+      <author id="verginica-barbu-mititelu"><first>Verginica</first><last>Barbu Mititelu</last><affiliation>Racai</affiliation></author>
       <author><first>Dumitru-Clementin</first><last>Cercel</last><affiliation>University Politehnica of Bucharest</affiliation></author>
       <pages>7-13</pages>
       <abstract>Multiword expressions are a key ingredient for developing large-scale and linguistically sound natural language processing technology. This paper describes our improvements in automatically identifying Romanian multiword expressions on the corpus released for the PARSEME v1.2 shared task. Our approach assumes a multilingual perspective based on the recently introduced lateral inhibition layer and adversarial training to boost the performance of the employed multilingual language models. With the help of these two methods, we improve the F1-score of XLM-RoBERTa by approximately 2.7% on unseen multiword expressions, the main task of the PARSEME 1.2 edition. In addition, our results can be considered SOTA performance, as they outperform the previous results on Romanian obtained by the participants in this competition.</abstract>
@@ -61,7 +61,7 @@
       <author><first>Cherifa</first><last>Ben Khelil</last><affiliation>LIFAT - Université de Tours</affiliation></author>
       <author><first>Carlos</first><last>Ramisch</last><affiliation>Aix Marseille University, CNRS, LIS</affiliation></author>
       <author><first>Voula</first><last>Giouli</last><affiliation>Institute for Language &amp; Speech Processing, ATHENA Research &amp; Innovation Centre</affiliation></author>
-      <author><first>Verginica</first><last>Barbu Mititelu</last><affiliation>Racai</affiliation></author>
+      <author id="verginica-barbu-mititelu"><first>Verginica</first><last>Barbu Mititelu</last><affiliation>Racai</affiliation></author>
       <author><first>Najet</first><last>Hadj Mohamed</last><affiliation>LIFAT - Université de Tours</affiliation></author>
       <author><first>Cvetana</first><last>Krstev</last><affiliation>University of Belgrade, Faculty of Philology</affiliation></author>
       <author><first>Chaya</first><last>Liebeskind</last><affiliation>Jerusalem College of Technology , Lev Academic Center</affiliation></author>
@@ -72,7 +72,7 @@
       <author><first>Bruno</first><last>Guillaume</last><affiliation>LORIA / Inria Nancy Grand-Est</affiliation></author>
       <author><first>Eduard</first><last>Bejček</last><affiliation>Charles University in Prague, Faculty of Mathematics and Physics, Institute of Formal and Applied Linguistics</affiliation></author>
       <author><first>Archna</first><last>Bhatia</last><affiliation>The Florida Institute for Human and Machine Cognition</affiliation></author>
-      <author><first>Marie</first><last>Candito</last><affiliation>LLF (Université Paris Cité / CNRS)</affiliation></author>
+      <author id="marie-candito"><first>Marie</first><last>Candito</last><affiliation>LLF (Université Paris Cité / CNRS)</affiliation></author>
       <author><first>Polona</first><last>Gantar</last><affiliation>University of Ljubljana</affiliation></author>
       <author><first>Uxoa</first><last>Iñurrieta</last><affiliation>University of the Basque Country</affiliation></author>
       <author><first>Albert</first><last>Gatt</last><affiliation>Utrecht University</affiliation></author>
@@ -80,7 +80,7 @@
       <author><first>Timm</first><last>Lichte</last><affiliation>University of Tübingen</affiliation></author>
       <author><first>Nikola</first><last>Ljubešić</last><affiliation>Jožef Stefan Institute</affiliation></author>
       <author><first>Johanna</first><last>Monti</last><affiliation>L’OrientaleÜniversity of Naples</affiliation></author>
-      <author><first>Carla</first><last>Parra Escartín</last><affiliation>RWS Language Weaver</affiliation></author>
+      <author id="carla-parra-escartin"><first>Carla</first><last>Parra Escartín</last><affiliation>RWS Language Weaver</affiliation></author>
       <author><first>Mehrnoush</first><last>Shamsfard</last><affiliation>Faculty of Computer Science and Engineering, Shahid Beheshti University</affiliation></author>
       <author><first>Ivelina</first><last>Stoyanova</last><affiliation>Department of Computational Linguistics, IBL - BAS</affiliation></author>
       <author><first>Veronika</first><last>Vincze</last><affiliation>University of Szeged</affiliation></author>
@@ -109,7 +109,7 @@
       <title>Idioms, Probing and Dangerous Things: Towards Structural Probing for Idiomaticity in Vector Space</title>
       <author><first>Filip</first><last>Klubička</last><affiliation>Technological University Dublin</affiliation></author>
       <author><first>Vasudevan</first><last>Nedumpozhimana</last><affiliation>TU Dublin</affiliation></author>
-      <author><first>John</first><last>Kelleher</last><affiliation>Technological University Dublin</affiliation></author>
+      <author id="john-kelleher"><first>John</first><last>Kelleher</last><affiliation>Technological University Dublin</affiliation></author>
       <pages>45-57</pages>
       <abstract>The goal of this paper is to learn more about how idiomatic information is structurally encoded in embeddings, using a structural probing method. We repurpose an existing English verbal multi-word expression (MWE) dataset to suit the probing framework and perform a comparative probing study of static (GloVe) and contextual (BERT) embeddings. Our experiments indicate that both encode some idiomatic information to varying degrees, but yield conflicting evidence as to whether idiomaticity is encoded in the vector norm, leaving this an open question. We also identify some limitations of the used dataset and highlight important directions for future work in improving its suitability for a probing analysis.</abstract>
       <url hash="613deea6">2023.mwe-1.8</url>
@@ -131,7 +131,7 @@
       <title>Enriching Multiword Terms in <fixed-case>W</fixed-case>iktionary with Pronunciation Information</title>
       <author><first>Lenka</first><last>Bajcetic</last><affiliation>Innovation Center of the School of Electrical Engineering</affiliation></author>
       <author><first>Thierry</first><last>Declerck</last><affiliation>DFKI GmbH</affiliation></author>
-      <author><first>Gilles</first><last>Sérasset</last><affiliation>Université Grenoble Alpes</affiliation></author>
+      <author id="gilles-serasset"><first>Gilles</first><last>Sérasset</last><affiliation>Université Grenoble Alpes</affiliation></author>
       <pages>65-72</pages>
       <abstract>We report on work in progress dealing with the automated generation of pronunciation information for English multiword terms (MWTs) in Wiktionary, combining information available for their single components. We describe the issues we were encountering, the building of an evaluation dataset, and our teaming with the DBnary resource maintainer. Our approach shows potential for automatically adding morphosyntactic and semantic information to the components of such MWTs.</abstract>
       <url hash="d2d86c73">2023.mwe-1.10</url>
@@ -153,7 +153,7 @@
     </paper>
     <paper id="12">
       <title>Automatic Generation of Vocabulary Lists with Multiword Expressions</title>
-      <author><first>John</first><last>Lee</last><affiliation>City University of Hong Kong</affiliation></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last><affiliation>City University of Hong Kong</affiliation></author>
       <author><first>Adilet</first><last>Uvaliyev</last></author>
       <pages>81-86</pages>
       <abstract>The importance of multiword expressions (MWEs) for language learning is well established. While MWE research has been evaluated on various downstream tasks such as syntactic parsing and machine translation, its applications in computer-assisted language learning has been less explored. This paper investigates the selection of MWEs for graded vocabulary lists. Widely used by language teachers and students, these lists recommend a language acquisition sequence to optimize learning efficiency. We automatically generate these lists using difficulty-graded corpora and MWEs extracted based on semantic compositionality. We evaluate these lists on their ability to facilitate text comprehension for learners. Experimental results show that our proposed method generates higher-quality lists than baselines using collocation measures.</abstract>
@@ -179,9 +179,9 @@
     <paper id="14">
       <title>Annotation of lexical bundles with discourse functions in a <fixed-case>S</fixed-case>panish academic corpus</title>
       <author><first>Eleonora</first><last>Guzzi</last><affiliation>Universidade da Coruña</affiliation></author>
-      <author><first>Margarita</first><last>Alonso-Ramos</last><affiliation>Universidade da Coruña</affiliation></author>
+      <author id="margarita-alonso-ramos"><first>Margarita</first><last>Alonso-Ramos</last><affiliation>Universidade da Coruña</affiliation></author>
       <author><first>Marcos</first><last>Garcia</last><affiliation>Universidade de Santiago de Compostela</affiliation></author>
-      <author><first>Marcos</first><last>García Salido</last><affiliation>Universidade da Coruña</affiliation></author>
+      <author id="marcos-garcia-salido"><first>Marcos</first><last>García Salido</last><affiliation>Universidade da Coruña</affiliation></author>
       <pages>99-105</pages>
       <abstract>This paper describes the process of annotation of 996 lexical bundles (LB) assigned to 39 different discourse functions in a Spanish academic corpus. The purpose of the annotation is to obtain a new Spanish gold-standard corpus of 1,800,000 words useful for training and evaluating computational models that are capable of identifying automatically LBs for each context in new corpora, as well as for linguistic analysis about the role of LBs in academic discourse. The annotation process revealed that correspondence between LBs and discourse functions is not biunivocal and that the degree of ambiguity is high, so linguists’ contribution has been essential for improving the automatic assignation of tags.</abstract>
       <url hash="8f853bc1">2023.mwe-1.14</url>
diff --git a/data/xml/2023.naloma.xml b/data/xml/2023.naloma.xml
index 8ff609f8ce..701d8c6cf0 100644
--- a/data/xml/2023.naloma.xml
+++ b/data/xml/2023.naloma.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the 4th Natural Logic Meets Machine Learning Workshop</booktitle>
       <editor><first>Stergios</first><last>Chatzikyriakidis</last></editor>
-      <editor><first>Valeria</first><last>de Paiva</last></editor>
+      <editor id="valeria-de-paiva"><first>Valeria</first><last>de Paiva</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Nancy, France</address>
       <month>June</month>
diff --git a/data/xml/2023.nejlt.xml b/data/xml/2023.nejlt.xml
index fc65ae30a2..ad6af180e4 100644
--- a/data/xml/2023.nejlt.xml
+++ b/data/xml/2023.nejlt.xml
@@ -24,7 +24,7 @@
       <title><fixed-case>PARSEME</fixed-case> Meets <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies: Getting on the Same Page in Representing Multiword Expressions</title>
       <author><first>Agata</first><last>Savary</last></author>
       <author><first>Sara</first><last>Stymne</last></author>
-      <author><first>Verginica Barbu</first><last>Mititelu</last></author>
+      <author id="verginica-barbu-mititelu"><first>Verginica Barbu</first><last>Mititelu</last></author>
       <author><first>Nathan</first><last>Schneider</last></author>
       <author><first>Carlos</first><last>Ramisch</last></author>
       <author><first>Joakim</first><last>Nivre</last></author>
@@ -36,7 +36,7 @@
     <paper id="3">
       <title>Barriers and enabling factors for error analysis in <fixed-case>NLG</fixed-case> research</title>
       <author><first>Emiel</first><last>van Miltenburg</last></author>
-      <author><first>Miruna</first><last>Clinciu</last></author>
+      <author id="miruna-clinciu"><first>Miruna</first><last>Clinciu</last></author>
       <author><first>Ondřej</first><last>Dušek</last></author>
       <author><first>Dimitra</first><last>Gkatzia</last></author>
       <author><first>Stephanie</first><last>Inglis</last></author>
@@ -76,8 +76,8 @@
       <author><first>Samson</first><last>Tan</last></author>
       <author><first>Tongshang</first><last>Wu</last></author>
       <author><first>Jascha</first><last>Sohl-Dickstein</last></author>
-      <author><first>Jinho</first><last>Choi</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="jinho-d-choi"><first>Jinho</first><last>Choi</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <author><first>Ondřej</first><last>Dušek</last></author>
       <author><first>Sebastian</first><last>Ruder</last></author>
       <author><first>Sajant</first><last>Anand</last></author>
@@ -128,7 +128,7 @@
       <author><first>Simon</first><last>Meoni</last></author>
       <author><first>Maxine</first><last>Meyer</last></author>
       <author><first>Afnan</first><last>Mir</last></author>
-      <author><first>Nafise Sadat</first><last>Moosavi</last></author>
+      <author id="nafise-sadat-moosavi"><first>Nafise Sadat</first><last>Moosavi</last></author>
       <author><first>Niklas</first><last>Meunnighoff</last></author>
       <author><first>Timothy Sum Hon</first><last>Mun</last></author>
       <author><first>Kenton</first><last>Murray</last></author>
@@ -156,7 +156,7 @@
       <author><first>Saqib</first><last>Shamsi</last></author>
       <author><first>Xudong</first><last>Shen</last></author>
       <author><first>Yiwen</first><last>Shi</last></author>
-      <author><first>Haoyue</first><last>Shi</last></author>
+      <author id="freda-shi"><first>Haoyue</first><last>Shi</last></author>
       <author><first>Anna</first><last>Shvets</last></author>
       <author><first>Nick</first><last>Siegel</last></author>
       <author><first>Damien</first><last>Sileo</last></author>
@@ -165,7 +165,7 @@
       <author><first>Roman</first><last>Sitelew</last></author>
       <author><first>Priyank</first><last>Soni</last></author>
       <author><first>Taylor</first><last>Sorensen</last></author>
-      <author><first>William</first><last>Soto</last></author>
+      <author id="william-soto-martinez"><first>William</first><last>Soto</last></author>
       <author><first>Aman</first><last>Srivastava</last></author>
       <author><first>Aditya</first><last>Srivatsa</last></author>
       <author><first>Tony</first><last>Sun</last></author>
@@ -181,7 +181,7 @@
       <author><first>Gloria</first><last>Wang</last></author>
       <author><first>Fuxuan</first><last>Wei</last></author>
       <author><first>Bryan</first><last>Wilie</last></author>
-      <author><first>Genta Indra</first><last>Winata</last></author>
+      <author id="genta-indra-winata"><first>Genta Indra</first><last>Winata</last></author>
       <author><first>Xinyu</first><last>Wu</last></author>
       <author><first>Witold</first><last>Wydmanski</last></author>
       <author><first>Tianbao</first><last>Xie</last></author>
@@ -198,7 +198,7 @@
       <title>On the Relationship between Frames and Emotionality in Text</title>
       <author><first>Enrica</first><last>Troiano</last></author>
       <author><first>Roman</first><last>Klinger</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <abstract>Emotions, which are responses to salient events, can be realized in text implicitly, for instance with mere references to facts (e.g., “That was the beginning of a long war”). Interpreting affective meanings thus relies on the readers background knowledge, but that is hardly modeled in computational emotion analysis. Much work in the field is focused on the word level and treats individual lexical units as the fundamental emotion cues in written communication. We shift our attention to word relations. We leverage Frame Semantics, a prominent theory for the description of predicate-argument structures, which matches the study of emotions: frames build on a “semantics of understanding” whose assumptions rely precisely on peoples world knowledge. Our overarching question is whether and to what extent the events that are represented by frames possess an emotion meaning. To carry out a large corpus-based correspondence analysis, we automatically annotate texts with emotions as well as with FrameNet frames and roles, and we analyze the correlations between them. Our main finding is that substantial groups of frames have an emotional import. With an extensive qualitative analysis, we show that they capture several properties of emotions that are purported by theories from psychology. These observations boost insights on the two strands of research that we bring together: emotion analysis can profit from the event-based perspective of frame semantics; in return, frame semantics gains a better grip of its position vis-à-vis emotions, an integral part of word meanings.</abstract>
       <url hash="84200429">2023.nejlt-1.6</url>
       <doi>10.3384/nejlt.2000-1533.2023.4361</doi>
diff --git a/data/xml/2023.newsum.xml b/data/xml/2023.newsum.xml
index e83c6a7812..3c04b65a7f 100644
--- a/data/xml/2023.newsum.xml
+++ b/data/xml/2023.newsum.xml
@@ -6,7 +6,7 @@
       <editor><first>Yue</first><last>Dong</last></editor>
       <editor><first>Wen</first><last>Xiao</last></editor>
       <editor><first>Lu</first><last>Wang</last></editor>
-      <editor id="fei-liu"><first>Fei</first><last>Liu</last></editor>
+      <editor><first>Fei</first><last>Liu</last></editor>
       <editor><first>Giuseppe</first><last>Carenini</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Singapore</address>
@@ -27,7 +27,7 @@
       <author><first>Zengkui</first><last>Sun</last><affiliation>Beijing Jiaotong university</affiliation></author>
       <author><first>Haoxiang</first><last>Shi</last><affiliation>Waseda University</affiliation></author>
       <author><first>Zhixu</first><last>Li</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
       <author><first>Jianfeng</first><last>Qu</last><affiliation>Soochow University</affiliation></author>
       <author><first>Jie</first><last>Zhou</last><affiliation>Tencent Inc.</affiliation></author>
       <pages>1-11</pages>
@@ -94,7 +94,7 @@
       <author><first>Yuting</first><last>Tang</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Ratish</first><last>Puduppully</last><affiliation>Institute for Infocomm Research (I2R), A*STAR, Singapore</affiliation></author>
       <author><first>Zhengyuan</first><last>Liu</last><affiliation>Institute for Infocomm Research (I2R), A*STAR, Singapore</affiliation></author>
-      <author><first>Nancy</first><last>Chen</last><affiliation>Institute for Infocomm Research (I2R), A*STAR, Singapore</affiliation></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last><affiliation>Institute for Infocomm Research (I2R), A*STAR, Singapore</affiliation></author>
       <pages>56-67</pages>
       <abstract>Large Language Models (LLMs) have shown significant performance in numerous NLP tasks, including summarization and controlled text generation. A notable capability of LLMs is in-context learning (ICL), where the model learns new tasks using input-output pairs in the prompt without any parameter update. However, the performance of LLMs in the context of few-shot abstractive dialogue summarization remains underexplored. This study evaluates various state-of-the-art LLMs on the SAMSum dataset within a few-shot framework. We assess these models in both controlled (entity control, length control, and person-focused planning) and uncontrolled settings, establishing a comprehensive benchmark in few-shot dialogue summarization. Our findings provide insights into summary quality and model controllability, offering a crucial reference for future research in dialogue summarization.</abstract>
       <url hash="fb0bc2f1">2023.newsum-1.6</url>
@@ -109,7 +109,7 @@
       <author><first>Alex</first><last>Fabbri</last><affiliation>Salesforce AI Research</affiliation></author>
       <author><first>Faisal</first><last>Ladhak</last><affiliation>Columbia University</affiliation></author>
       <author><first>Eric</first><last>Lehman</last><affiliation>MIT</affiliation></author>
-      <author><first>Noémie</first><last>Elhadad</last><affiliation>Columbia University</affiliation></author>
+      <author id="noemie-elhadad"><first>Noémie</first><last>Elhadad</last><affiliation>Columbia University</affiliation></author>
       <pages>68-74</pages>
       <abstract>Selecting the “right” amount of information to include in a summary is a difficult task. A good summary should be detailed and entity-centric without being overly dense and hard to follow. To better understand this tradeoff, we solicit increasingly dense GPT-4 summaries with what we refer to as a “Chain of Density” (CoD) prompt. Specifically, GPT-4 generates an initial entity-sparse summary before iteratively incorporating missing salient entities without increasing the length. Summaries generated by CoD are more abstractive, exhibit more fusion, and have less of a lead bias than GPT-4 summaries generated by a vanilla prompt. We conduct a human preference study on 100 CNN DailyMail articles and find that humans prefer GPT-4 summaries that are more dense than those generated by a vanilla prompt and almost as dense as human written summaries. Qualitative analysis supports the notion that there exists a tradeoff between informativeness and readability. 500 annotated CoD summaries, as well as an extra 5,000 unannotated summaries, are freely available on HuggingFace (https://huggingface.co/datasets/griffin/chain_of_density).</abstract>
       <url hash="cef4297f">2023.newsum-1.7</url>
@@ -121,7 +121,7 @@
     <paper id="8">
       <title>Generating Extractive and Abstractive Summaries in Parallel from Scientific Articles Incorporating Citing Statements</title>
       <author><first>Sudipta</first><last>Singha Roy</last><affiliation>University of Western Ontario</affiliation></author>
-      <author><first>Robert E.</first><last>Mercer</last><affiliation>The University of Western Ontario</affiliation></author>
+      <author id="robert-e-mercer"><first>Robert E.</first><last>Mercer</last><affiliation>The University of Western Ontario</affiliation></author>
       <pages>75-86</pages>
       <abstract>Summarization of scientific articles often overlooks insights from citing papers, focusing solely on the document’s content. To incorporate citation contexts, we develop a model to summarize a scientific document using the information in the source and citing documents. It concurrently generates abstractive and extractive summaries, each enhancing the other. The extractive summarizer utilizes a blend of heterogeneous graph-based neural networks and graph attention networks, while the abstractive summarizer employs an autoregressive decoder. These modules exchange control signals through the loss function, ensuring the creation of high-quality summaries in both styles.</abstract>
       <url hash="ace451ed">2023.newsum-1.8</url>
@@ -135,7 +135,7 @@
       <author><first>Simão</first><last>Gonçalves</last><affiliation>Priberam</affiliation></author>
       <author><first>Gonçalo</first><last>Correia</last><affiliation>Priberam</affiliation></author>
       <author><first>Diogo</first><last>Pernes</last><affiliation>Priberam; University of Porto</affiliation></author>
-      <author><first>Afonso</first><last>Mendes</last><affiliation>Priberam Informática, SA.</affiliation></author>
+      <author id="alfonso-mendes"><first>Afonso</first><last>Mendes</last><affiliation>Priberam Informática, SA.</affiliation></author>
       <pages>87-96</pages>
       <abstract>The centroid method is a simple approach for extractive multi-document summarization and many improvements to its pipeline have been proposed. We further refine it by adding a beam search process to the sentence selection and also a centroid estimation attention model that leads to improved results. We demonstrate this in several multi-document summarization datasets, including in a multilingual scenario.</abstract>
       <url hash="bc80754c">2023.newsum-1.9</url>
@@ -174,7 +174,7 @@
       <title>Analyzing Multi-Sentence Aggregation in Abstractive Summarization via the Shapley Value</title>
       <author><first>Jingyi</first><last>He</last><affiliation>McGill University</affiliation></author>
       <author><first>Meng</first><last>Cao</last><affiliation>McGill University</affiliation></author>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last><affiliation>Mila / McGill University</affiliation></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last><affiliation>Mila / McGill University</affiliation></author>
       <pages>121-134</pages>
       <abstract>Abstractive summarization systems aim to write concise summaries capturing the most essential information of the input document in their own words. One of the ways to achieve this is to gather and combine multiple pieces of information from the source document, a process we call aggregation. Despite its importance, the extent to which both reference summaries in benchmark datasets and system-generated summaries require aggregation is yet unknown. In this work, we propose AggSHAP, a measure of the degree of aggregation in a summary sentence. We show that AggSHAP distinguishes multi-sentence aggregation from single-sentence extraction or paraphrasing through automatic and human evaluations. We find that few reference or model-generated summary sentences have a high degree of aggregation measured by the proposed metric. We also demonstrate negative correlations between AggSHAP and other quality scores of system summaries. These findings suggest the need to develop new tasks and datasets to encourage multi-sentence aggregation in summarization.</abstract>
       <url hash="e9c0e533">2023.newsum-1.12</url>
diff --git a/data/xml/2023.nllp.xml b/data/xml/2023.nllp.xml
index 17dd782c65..ad9d5915bc 100644
--- a/data/xml/2023.nllp.xml
+++ b/data/xml/2023.nllp.xml
@@ -255,8 +255,8 @@
     <paper id="20">
       <title>Super-<fixed-case>SCOTUS</fixed-case>: A multi-sourced dataset for the <fixed-case>S</fixed-case>upreme <fixed-case>C</fixed-case>ourt of the <fixed-case>US</fixed-case></title>
       <author><first>Biaoyan</first><last>Fang</last><affiliation>The University of Melbourne</affiliation></author>
-      <author><first>Trevor</first><last>Cohn</last><affiliation>University of Melbourne</affiliation></author>
-      <author><first>Timothy</first><last>Baldwin</last><affiliation>MBZUAI</affiliation></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last><affiliation>University of Melbourne</affiliation></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last><affiliation>MBZUAI</affiliation></author>
       <author><first>Lea</first><last>Frermann</last><affiliation>Melbourne University</affiliation></author>
       <pages>202-214</pages>
       <abstract>Given the complexity of the judiciary in the US Supreme Court, various procedures, along with various resources, contribute to the court system. However, most research focuses on a limited set of resources, e.g., court opinions or oral arguments, for analyzing a specific perspective in court, e.g., partisanship or voting. To gain a fuller understanding of these perspectives in the legal system of the US Supreme Court, a more comprehensive dataset, connecting different sources in different phases of the court procedure, is needed. To address this gap, we present a multi-sourced dataset for the Supreme Court, comprising court resources from different procedural phases, connecting language documents with extensive metadata. We showcase its utility through a case study on how different court documents reveal the decision direction (conservative vs. liberal) of the cases. We analyze performance differences across three protected attributes, indicating that different court resources encode different biases, and reinforcing that considering various resources provides a fuller picture of the court procedures. We further discuss how our dataset can contribute to future research directions.</abstract>
@@ -294,7 +294,7 @@
       <author><first>Rohan</first><last>Das</last><affiliation>University of Colorado, Boulder</affiliation></author>
       <author><first>Saeid</first><last>Tizpaz-Niari</last><affiliation>University of Texas, El Paso</affiliation></author>
       <author><first>Ashutosh</first><last>Trivedi</last><affiliation>University of Colorado, Boulder</affiliation></author>
-      <author><first>Maria Leonor</first><last>Pacheco</last><affiliation>University of Colorado Boulder / Microsoft Research</affiliation></author>
+      <author id="maria-leonor-pacheco"><first>Maria Leonor</first><last>Pacheco</last><affiliation>University of Colorado Boulder / Microsoft Research</affiliation></author>
       <pages>230-243</pages>
       <abstract>Due to the ever-increasing complexity of income tax laws in the United States, the number of US taxpayers filing their taxes using tax preparation software henceforth, tax software) continues to increase. According to the U.S. Internal Revenue Service (IRS), in FY22, nearly 50% of taxpayers filed their individual income taxes using tax software. Given the legal consequences of incorrectly filing taxes for the taxpayer, ensuring the correctness of tax software is of paramount importance. Metamorphic testing has emerged as a leading solution to test and debug legal-critical tax software due to the absence of correctness requirements and trustworthy datasets. The key idea behind metamorphic testing is to express the properties of a system in terms of the relationship between one input and its slightly metamorphosed twinned input. Extracting metamorphic properties from IRS tax publications is a tedious and time-consuming process. As a response, this paper formulates the task of generating metamorphic specifications as a translation task between properties extracted from tax documents - expressed in natural language - to a contrastive first-order logic form. We perform a systematic analysis on the potential and limitations of in-context learning with Large Language Models (LLMs) for this task, and outline a research agenda towards automating the generation of metamorphic specifications for tax preparation software.</abstract>
       <url hash="a57f86c8">2023.nllp-1.23</url>
diff --git a/data/xml/2023.nlp4call.xml b/data/xml/2023.nlp4call.xml
index 0110113bd5..502fa1ca9b 100644
--- a/data/xml/2023.nlp4call.xml
+++ b/data/xml/2023.nlp4call.xml
@@ -6,7 +6,7 @@
       <editor><first>David</first><last>Alfter</last></editor>
       <editor><first>Elena</first><last>Volodina</last></editor>
       <editor><first>Thomas</first><last>François</last></editor>
-      <editor><first>Arne</first><last>Jönsson</last></editor>
+      <editor id="arne-jonsson"><first>Arne</first><last>Jönsson</last></editor>
       <editor><first>Evelina</first><last>Rennes</last></editor>
       <publisher>LiU Electronic Press</publisher>
       <address>Tórshavn, Faroe Islands</address>
@@ -24,7 +24,7 @@
       <author><first>Elena</first><last>Volodina</last></author>
       <author><first>Christopher</first><last>Bryant</last></author>
       <author><first>Andrew</first><last>Caines</last></author>
-      <author><first>Orphée</first><last>De Clercq</last></author>
+      <author id="orphee-de-clercq"><first>Orphée</first><last>De Clercq</last></author>
       <author><first>Jennifer-Carmen</first><last>Frey</last></author>
       <author><first>Elizaveta</first><last>Ershova</last></author>
       <author><first>Alexandr</first><last>Rosen</last></author>
@@ -36,7 +36,7 @@
     <paper id="2">
       <title><fixed-case>NTNU</fixed-case>-<fixed-case>TRH</fixed-case> system at the <fixed-case>M</fixed-case>ulti<fixed-case>GED</fixed-case>-2023 Shared on Multilingual Grammatical Error Detection</title>
       <author><first>Lars</first><last>Bungum</last></author>
-      <author><first>Björn</first><last>Gambäck</last></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gambäck</last></author>
       <author><first>Arild</first><last>Brandrud Næss</last></author>
       <pages>17–23</pages>
       <url hash="bc9859a5">2023.nlp4call-1.2</url>
@@ -61,9 +61,9 @@
     </paper>
     <paper id="5">
       <title>Two Neural Models for Multilingual Grammatical Error Detection</title>
-      <author><first>Phuong</first><last>Le-Hong</last></author>
+      <author id="phuong-le-hong"><first>Phuong</first><last>Le-Hong</last></author>
       <author><first>The Quyen</first><last>Ngo</last></author>
-      <author><first>Thi Minh Huyen</first><last>Nguyen</last></author>
+      <author id="thi-minh-huyen-nguyen"><first>Thi Minh Huyen</first><last>Nguyen</last></author>
       <pages>40–44</pages>
       <url hash="c4ef2718">2023.nlp4call-1.5</url>
       <bibkey>le-hong-etal-2023-two</bibkey>
@@ -93,7 +93,7 @@
     <paper id="8">
       <title>Speech Technology to Support Phonics Learning for Kindergarten Children at Risk of Dyslexia</title>
       <author><first>Stine</first><last>Fuglsang Engmose</last></author>
-      <author><first>Peter Juel</first><last>Henrichsen</last></author>
+      <author id="peter-juel-henrichsen"><first>Peter Juel</first><last>Henrichsen</last></author>
       <pages>63–70</pages>
       <url hash="93960876">2023.nlp4call-1.8</url>
       <bibkey>fuglsang-engmose-henrichsen-2023-speech</bibkey>
@@ -101,7 +101,7 @@
     <paper id="9">
       <title>On the relevance and learner dependence of co-text complexity for exercise difficulty</title>
       <author><first>Tanja</first><last>Heck</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <pages>71–84</pages>
       <url hash="bcc1a9e0">2023.nlp4call-1.9</url>
       <bibkey>heck-meurers-2023-relevance</bibkey>
diff --git a/data/xml/2023.nlp4convai.xml b/data/xml/2023.nlp4convai.xml
index 8a877dbd73..ace1bf9a29 100644
--- a/data/xml/2023.nlp4convai.xml
+++ b/data/xml/2023.nlp4convai.xml
@@ -32,7 +32,7 @@
       <author><first>Naoki</first><last>Otani</last></author>
       <author><first>Jun</first><last>Araki</last></author>
       <author><first>HyeongSik</first><last>Kim</last></author>
-      <author><first>Eduard</first><last>Hovy</last><affiliation>University of Melbourne</affiliation></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last><affiliation>University of Melbourne</affiliation></author>
       <pages>12-28</pages>
       <abstract>Advances of open-domain conversational systems have been achieved through the creation of numerous conversation datasets. However, many of the commonly used datasets contain little or no information about the conversational situation, such as relevant objects/people, their properties, and relationships. This absence leads to underspecification of the problem space and typically results in undesired dialogue system behavior. This position paper discusses the current state of the field associated with processing situational information. An analysis of response generation using three datasets shows that explicitly provided situational information can improve the coherence and specificity of generated responses, but further experiments reveal that generation systems can be misled by irrelevant information. Our conclusions from this evaluation provide insights into the problem and directions for future research.</abstract>
       <url hash="6a2d5a78">2023.nlp4convai-1.2</url>
@@ -76,7 +76,7 @@
     </paper>
     <paper id="6">
       <title>c<fixed-case>TBLS</fixed-case>: Augmenting Large Language Models with Conversational Tables</title>
-      <author><first>Anirudh S.</first><last>Sundar</last></author>
+      <author id="anirudh-sundar"><first>Anirudh S.</first><last>Sundar</last></author>
       <author><first>Larry</first><last>Heck</last></author>
       <pages>59-70</pages>
       <abstract>Optimizing accuracy and performance while eliminating hallucinations of open-domain conversational large language models (LLMs) is an open research challenge. A particularly promising direction is to augment and ground LLMs with information from structured sources. This paper introduces Conversational Tables cTBLS, a three-step architecture to retrieve and generate dialogue responses grounded on retrieved tabular information. cTBLS uses Transformer encoder embeddings for Dense Table Retrieval and obtains up to 125% relative improvement over the retriever in the previous state-of-the-art system on the HyrbiDialogue dataset. cTBLS then uses a shared process between encoder and decoder models to perform a coarse+fine tabular knowledge (e.g., cell) ranking combined with a GPT-3.5 LLM response generator to yield a 2x relative improvement in ROUGE scores. Finally, human evaluators prefer cTBLs +80% of the time (coherency, fluency) and judge informativeness to be 4x better than the previous state-of-the-art.</abstract>
@@ -134,7 +134,7 @@
       <title>Generating Video Game Scripts with Style</title>
       <author><first>Gaetan</first><last>Lopez Latouche</last></author>
       <author><first>Laurence</first><last>Marcotte</last></author>
-      <author><first>Ben</first><last>Swanson</last></author>
+      <author id="ben-swanson"><first>Ben</first><last>Swanson</last></author>
       <pages>129-139</pages>
       <abstract>While modern language models can generate a scripted scene in the format of a play, movie, or video game cutscene the quality of machine generated text remains behind that of human authors. In this work, we focus on one aspect of this quality gap; generating text in the style of an arbitrary and unseen character. We propose the Style Adaptive Semiparametric Scriptwriter (SASS) which leverages an adaptive weighted style memory to generate dialog lines in accordance with a character’s speaking patterns. Using the LIGHT dataset as well as a new corpus of scripts from twenty-three AAA video games, we show that SASS not only outperforms similar models but in some cases can also be used in conjunction with them to yield further improvement.</abstract>
       <url hash="2d0cbb23">2023.nlp4convai-1.11</url>
@@ -145,8 +145,8 @@
     <paper id="12">
       <title>A Survey of Challenges and Methods in the Computational Modeling of Multi-Party Dialog</title>
       <author><first>Ananya</first><last>Ganesh</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
-      <author><first>Katharina</first><last>Kann</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
       <pages>140-154</pages>
       <abstract>Advances in conversational AI systems, powered in particular by large language models, have facilitated rapid progress in understanding and generating dialog. Typically, task-oriented or open-domain dialog systems have been designed to work with two-party dialog, i.e., the exchange of utterances between a single user and a dialog system. However, modern dialog systems may be deployed in scenarios such as classrooms or meetings where conversational analysis of multiple speakers is required. This survey will present research around computational modeling of “multi-party dialog”, outlining differences from two-party dialog, challenges and issues in working with multi-party dialog, and methods for representing multi-party dialog. We also provide an overview of dialog datasets created for the study of multi-party dialog, as well as tasks that are of interest in this domain.</abstract>
       <url hash="db636b22">2023.nlp4convai-1.12</url>
diff --git a/data/xml/2023.nlp4dh.xml b/data/xml/2023.nlp4dh.xml
index 33798a6742..2f55c63e46 100644
--- a/data/xml/2023.nlp4dh.xml
+++ b/data/xml/2023.nlp4dh.xml
@@ -248,7 +248,7 @@
     <paper id="25">
       <title>Comparing Transformer and Dictionary-based Sentiment Models for Literary Texts: Hemingway as a Case-study</title>
       <author><first>Yuri</first><last>Bizzoni</last><affiliation>Aarhus University</affiliation></author>
-      <author><first>Pascale</first><last>Feldkamp</last><affiliation>Aarhus University</affiliation></author>
+      <author id="pascale-feldkamp"><first>Pascale</first><last>Feldkamp</last><affiliation>Aarhus University</affiliation></author>
       <pages>219-228</pages>
       <abstract>The literary domain continues to pose a challenge for Sentiment Analysis methods, due to its particularly nuanced and layered nature. This paper explores the adequacy of different Sentiment Analysis tools - from dictionary-based approaches to state-of-the-art Transformers - for capturing valence and modelling sentiment arcs. We take Ernest Hemingway’s novel The Old Man and the Sea as a case study to address challenges inherent to literary language, compare Transformer and rule-based systems’ scores with human annotations, and shed light on the complexities of analyzing sentiment in narrative texts. Finally, we emphasize the potential of model ensembles.</abstract>
       <url hash="c4b4c49b">2023.nlp4dh-1.25</url>
@@ -265,7 +265,7 @@
     </paper>
     <paper id="27">
       <title>Readability and Complexity: Diachronic Evolution of Literary Language Across 9000 Novels</title>
-      <author><first>Pascale</first><last>Feldkamp</last><affiliation>Aarhus University</affiliation></author>
+      <author id="pascale-feldkamp"><first>Pascale</first><last>Feldkamp</last><affiliation>Aarhus University</affiliation></author>
       <author><first>Yuri</first><last>Bizzoni</last><affiliation>Aarhus University</affiliation></author>
       <author><first>Ida Marie S.</first><last>Lassen</last><affiliation>Aarhus University</affiliation></author>
       <author><first>Mads</first><last>Rosendahl Thomsen</last><affiliation>Aarhus University</affiliation></author>
diff --git a/data/xml/2023.nlp4tia.xml b/data/xml/2023.nlp4tia.xml
index 9cfdc5c20a..c981a7367d 100644
--- a/data/xml/2023.nlp4tia.xml
+++ b/data/xml/2023.nlp4tia.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the First Workshop on NLP Tools and Resources for Translation and Interpreting Applications</booktitle>
       <editor><first>Raquel Lázaro</first><last>Gutiérrez</last></editor>
       <editor><first>Antonio</first><last>Pareja</last></editor>
-      <editor><first>Ruslan</first><last>Mitkov</last></editor>
+      <editor id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></editor>
       <publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
       <address>Varna, Bulgaria</address>
       <month>September</month>
@@ -96,7 +96,7 @@
     </paper>
     <paper id="10">
       <title><fixed-case>C</fixed-case>hat<fixed-case>GPT</fixed-case> for translators: a survey</title>
-      <author><first>Constantin</first><last>Orăsan</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orăsan</last></author>
       <pages>61–63</pages>
       <abstract>This article surveys the most important ways in which translators can use ChatGPT. The focus is on scenarios where ChatGPT supports the work of translators, rather than tries to replace them. A discussion of issues that translators need to consider when using large language models, and ChatGPT in particular, is also provided.</abstract>
       <url hash="6d1f3c65">2023.nlp4tia-1.10</url>
diff --git a/data/xml/2023.nlpmc.xml b/data/xml/2023.nlpmc.xml
index f91c0a961d..25ac3d9df6 100644
--- a/data/xml/2023.nlpmc.xml
+++ b/data/xml/2023.nlpmc.xml
@@ -20,7 +20,7 @@
       <author><first>Zhuohao</first><last>Chen</last></author>
       <author><first>Jangwon</first><last>Kim</last></author>
       <author id="yang-liu-icsi"><first>Yang</first><last>Liu</last></author>
-      <author><first>Shrikanth</first><last>Narayanan</last></author>
+      <author id="shrikanth-narayanan"><first>Shrikanth</first><last>Narayanan</last></author>
       <pages>1–12</pages>
       <url hash="1ad522c1">2023.nlpmc-1.1</url>
       <bibkey>chen-etal-2023-clinical</bibkey>
diff --git a/data/xml/2023.nlposs.xml b/data/xml/2023.nlposs.xml
index b27d1d9e67..4a624371cb 100644
--- a/data/xml/2023.nlposs.xml
+++ b/data/xml/2023.nlposs.xml
@@ -78,7 +78,7 @@
       <author><first>Ioannis</first><last>Lyris</last><affiliation>ILSP - “Athena” Research Center</affiliation></author>
       <author><first>Natalia</first><last>Manola</last></author>
       <author><first>Ioanna</first><last>Grypari</last><affiliation>IMIS - “Athena” Research Center</affiliation></author>
-      <author><first>Haris</first><last>Papageorgiou</last><affiliation>NATIONAL AND KAPODISTRIAN UNIVERISTY OF ATHENS, ATHENA RESEARCH AND INNOVATION CENTER and ATHENS UNIVERSITY OF ECONOMICS AND BUSINESS</affiliation></author>
+      <author id="harris-papageorgiou"><first>Haris</first><last>Papageorgiou</last><affiliation>NATIONAL AND KAPODISTRIAN UNIVERISTY OF ATHENS, ATHENA RESEARCH AND INNOVATION CENTER and ATHENS UNIVERSITY OF ECONOMICS AND BUSINESS</affiliation></author>
       <pages>37-53</pages>
       <abstract>Knowledge extraction from scientific literature is a major issue, crucial to promoting transparency, reproducibility, and innovation in the research community. In this work, we present a novel approach towards the identification, extraction and analysis of dataset and code/software mentions within scientific literature. We introduce a comprehensive dataset, synthetically generated by ChatGPT and meticulously curated, augmented, and expanded with real snippets of scientific text from full-text publications in Computer Science using a human-in-the-loop process. The dataset contains snippets highlighting mentions of the two research artifact (RA) types: dataset and code/software, along with insightful metadata including their Name, Version, License, URL as well as the intended Usage and Provenance. We also fine-tune a simple Large Language Model (LLM) using Low-Rank Adaptation (LoRA) to transform the Research Artifact Analysis (RAA) into an instruction-based Question Answering (QA) task. Ultimately, we report the improvements in performance on the test set of our dataset when compared to other base LLM models. Our method provides a significant step towards facilitating accurate, effective, and efficient extraction of datasets and software from scientific papers, contributing to the challenges of reproducibility and reusability in scientific research.</abstract>
       <url hash="4ea56ffa">2023.nlposs-1.5</url>
@@ -164,8 +164,8 @@
       <author><first>Salvatore</first><last>Giorgi</last></author>
       <author><first>Garrick</first><last>Sherman</last></author>
       <author><first>Douglas</first><last>Bellew</last><affiliation>National Institute of Health</affiliation></author>
-      <author><first>Sharath Chandra</first><last>Guntuku</last><affiliation>University of Pennsylvania</affiliation></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="sharath-chandra-guntuku"><first>Sharath Chandra</first><last>Guntuku</last><affiliation>University of Pennsylvania</affiliation></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <author><first>Brenda</first><last>Curtis</last><affiliation>National Institute on Drug Abuse/NIH</affiliation></author>
       <pages>102-109</pages>
       <abstract>We present the AWARE-text package, an open-source software package for collecting textual data on Android mobile devices. This package allows for collecting short message service (SMS or text messages) and character-level keystrokes. In addition to collecting this raw data, AWARE-text is designed for on device lexicon processing, which allows one to collect standard textual-based measures (e.g., sentiment, emotions, and topics) without collecting the underlying raw textual data. This is especially important in the case of mobile phones, which can contain sensitive and identifying information. Thus, the AWARE-text package allows for privacy protection while simultaneously collecting textual information at multiple levels of granularity: person (lifetime history of SMS), conversation (both sides of SMS conversations and group chats), message (single SMS), and character (individual keystrokes entered across applications). Finally, the unique processing environment of mobile devices opens up several methodological and privacy issues, which we discuss.</abstract>
@@ -271,7 +271,7 @@
     </paper>
     <paper id="21">
       <title>News Signals: An <fixed-case>NLP</fixed-case> Library for Text and Time Series</title>
-      <author><first>Chris</first><last>Hokamp</last></author>
+      <author id="chris-hokamp"><first>Chris</first><last>Hokamp</last></author>
       <author><first>Demian</first><last>Ghalandari</last></author>
       <author><first>Parsa</first><last>Ghaffari</last></author>
       <pages>179-189</pages>
diff --git a/data/xml/2023.nlrse.xml b/data/xml/2023.nlrse.xml
index 39d77c3f7a..d1b6d3e7a8 100644
--- a/data/xml/2023.nlrse.xml
+++ b/data/xml/2023.nlrse.xml
@@ -3,9 +3,9 @@
   <volume id="1" ingest-date="2023-07-12" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 1st Workshop on Natural Language Reasoning and Structured Explanations (NLRSE)</booktitle>
-      <editor><first>Bhavana</first><last>Dalvi Mishra</last></editor>
+      <editor id="bhavana-dalvi"><first>Bhavana</first><last>Dalvi Mishra</last></editor>
       <editor><first>Greg</first><last>Durrett</last></editor>
-      <editor><first>Peter</first><last>Jansen</last></editor>
+      <editor id="peter-jansen"><first>Peter</first><last>Jansen</last></editor>
       <editor><first>Danilo</first><last>Neves Ribeiro</last></editor>
       <editor><first>Jason</first><last>Wei</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -45,7 +45,7 @@
     <paper id="3">
       <title>A smashed glass cannot be full: Generation of Commonsense Explanations through Prompt-based Few-shot Learning</title>
       <author><first>Andrea</first><last>Zaninello</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
-      <author><first>Bernardo</first><last>Magnini</last><affiliation>FBK</affiliation></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last><affiliation>FBK</affiliation></author>
       <pages>18-29</pages>
       <abstract>We assume that providing explanations is a process to elicit implicit knowledge in human communication, and propose a general methodology to generate commonsense explanations from pairs of semantically related sentences. We take advantage of both prompting applied to large, encoder-decoder pre-trained language models, and few-shot learning techniques, such as pattern-exploiting training. Experiments run on the e-SNLI dataset show that the proposed method achieves state-of-the-art results on the explanation generation task, with a substantial reduction of labelled data. The obtained results open new perspective on a number of tasks involving the elicitation of implicit knowledge.</abstract>
       <url hash="54396538">2023.nlrse-1.3</url>
@@ -69,7 +69,7 @@
     <paper id="5">
       <title>Using Planning to Improve Semantic Parsing of Instructional Texts</title>
       <author><first>Vanya</first><last>Cohen</last><affiliation>The University of Texas at Austin</affiliation></author>
-      <author><first>Raymond</first><last>Mooney</last><affiliation>University of Texas at Austin</affiliation></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last><affiliation>University of Texas at Austin</affiliation></author>
       <pages>47-58</pages>
       <abstract>We develop a symbolic planning-based decoder to improve the few-shot semantic parsing of instructional texts. The system takes long-form instructional texts as input and produces sequences of actions in a formal language that enable execution of the instructions. This task poses unique challenges since input texts may contain long context dependencies and ambiguous and domain-specific language. Valid semantic parses also require sequences of steps that constitute an executable plan. We build on recent progress in semantic parsing by leveraging large language models to learn parsers from small amounts of training data. During decoding, our method employs planning methods and domain information to rank and correct candidate parses. To validate our method, we evaluate on four domains: two household instruction-following domains and two cooking recipe interpretation domains. We present results for few-shot semantic parsing using leave-one-out cross-validation. We show that utilizing planning domain information improves the quality of generated plans. Through ablations we also explore the effects of our decoder design choices.</abstract>
       <url hash="c5d22c50">2023.nlrse-1.5</url>
@@ -126,7 +126,7 @@
       <author><first>Ping</first><last>Yu</last><affiliation>University at Buffalo</affiliation></author>
       <author><first>Zhijing</first><last>Jin</last><affiliation>Max Planck Institute &amp; ETH Zurich</affiliation></author>
       <author><first>Asli</first><last>Celikyilmaz</last><affiliation>FAIR @ Meta</affiliation></author>
-      <author><first>Mona</first><last>Diab</last><affiliation>Meta Responsible AI</affiliation></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last><affiliation>Meta Responsible AI</affiliation></author>
       <pages>128-138</pages>
       <abstract>We conduct a thorough investigation into the reasoning capabilities of Large Language Models (LLMs), focusing specifically on the Open Pretrained Transformers (OPT) models as a representative of such models. Our study entails finetuning three different sizes of OPT on a carefully curated reasoning corpus, resulting in two sets of finetuned models: OPT-R, finetuned without explanations, and OPT-RE, finetuned with explanations. We then evaluate all models on 57 out-of-domain tasks drawn from the Super-NaturalInstructions benchmark, covering 26 distinct reasoning skills, utilizing three prompting techniques. Through a comprehensive grid of 27 configurations and 6,156 test evaluations, we investigate the dimensions of finetuning, prompting, and scale to understand the role of explanations on different reasoning skills. Our findings reveal that having explanations in the fewshot exemplar has no significant impact on the model’s performance when the model is finetuned, while positively affecting the non-finetuned counterpart. Moreover, we observe a slight yet consistent increase in classification accuracy as we incorporate explanations during prompting and finetuning, respectively. Finally, we offer insights on which reasoning skills benefit the most from incorporating explanations during finetuning and prompting, such as Numerical (+20.4%) and Analogical (+13.9%) reasoning, as well as skills that exhibit negligible or negative effects.</abstract>
       <url hash="39be0da0">2023.nlrse-1.10</url>
diff --git a/data/xml/2023.nodalida.xml b/data/xml/2023.nodalida.xml
index f39f6aacf8..d5404ba606 100644
--- a/data/xml/2023.nodalida.xml
+++ b/data/xml/2023.nodalida.xml
@@ -64,7 +64,7 @@
     <paper id="5">
       <title>Good Reads and Easy Novels: Readability and Literary Quality in a Corpus of <fixed-case>US</fixed-case>-published Fiction</title>
       <author><first>Yuri</first><last>Bizzoni</last><affiliation>Universität des Saarlandes</affiliation></author>
-      <author><first>Pascale</first><last>Moreira</last></author>
+      <author id="pascale-feldkamp"><first>Pascale</first><last>Moreira</last></author>
       <author><first>Nicole</first><last>Dwenger</last></author>
       <author><first>Ida</first><last>Lassen</last></author>
       <author><first>Mads</first><last>Thomsen</last></author>
@@ -109,7 +109,7 @@
       <author><first>Filip</first><last>Ginter</last></author>
       <author><first>Sampo</first><last>Pyysalo</last></author>
       <author><first>Rob</first><last>van der Goot</last></author>
-      <author><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München and IT University of Copenhagen</affiliation></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München and IT University of Copenhagen</affiliation></author>
       <pages>80-85</pages>
       <abstract>Most research in Relation Extraction (RE) involves the English language, mainly due to the lack of multi-lingual resources. We propose Multi-CrossRE, the broadest multi-lingual dataset for RE, including 26 languages in addition to English, and covering six text domains. Multi-CrossRE is a machine translated version of CrossRE (Bassignana and Plank, 2022), with a sub-portion including more than 200 sentences in seven diverse languages checked by native speakers. We run a baseline model over the 26 new datasets and–as sanity check–over the 26 back-translations to English. Results on the back-translated data are consistent with the ones on the original English CrossRE, indicating high quality of the translation and the resulting dataset.</abstract>
       <url hash="c19341e5">2023.nodalida-1.9</url>
@@ -126,7 +126,7 @@
       <author><first>Jon</first><last>Gudnason</last></author>
       <author><first>Jökull</first><last>Gylfason</last></author>
       <author><first>Jarmo</first><last>Hemminki</last></author>
-      <author><first>Heiki-Jaan</first><last>Kaalep</last><affiliation>institute of computer science, University of Tartu</affiliation></author>
+      <author id="heiki-jaan-kaalep"><first>Heiki-Jaan</first><last>Kaalep</last><affiliation>institute of computer science, University of Tartu</affiliation></author>
       <pages>86-91</pages>
       <abstract>This paper describes a collaborative European project whose aim was to gather open source Natural Language Processing (NLP) tools and make them accessible as running services and easy to try out in the European Language Grid (ELG). The motivation of the project was to increase accessibility for more European languages and make it easier for developers to use the underlying tools in their own applications. The project resulted in the containerization of 60 existing NLP tools for 16 languages, all of which are now currently running as easily testable services in the ELG platform.</abstract>
       <url hash="555a2289">2023.nodalida-1.10</url>
@@ -169,7 +169,7 @@
       <author><first>Daniel</first><last>Holmer</last><affiliation>Linköping University</affiliation></author>
       <author><first>Lars</first><last>Ahrenberg</last><affiliation>Linköping University</affiliation></author>
       <author><first>Julius</first><last>Monsen</last></author>
-      <author><first>Arne</first><last>Jönsson</last><affiliation>Linköping University</affiliation></author>
+      <author id="arne-jonsson"><first>Arne</first><last>Jönsson</last><affiliation>Linköping University</affiliation></author>
       <author><first>Mikael</first><last>Apel</last></author>
       <author><first>Marianna</first><last>Grimaldi</last></author>
       <pages>124-134</pages>
@@ -204,7 +204,7 @@
       <author><first>Fredrik</first><last>Andreassen</last><affiliation>University of Oslo</affiliation></author>
       <author><first>Matias</first><last>Jentoft</last></author>
       <author><first>Sondre</first><last>Wold</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last><affiliation>Dept. of Informatics, University of Oslo</affiliation></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last><affiliation>Dept. of Informatics, University of Oslo</affiliation></author>
       <pages>159-168</pages>
       <abstract>In this paper we present NorQuAD: the first Norwegian question answering dataset for machine reading comprehension. The dataset consists of 4,752 manually created question-answer pairs. We here detail the data collection procedure and present statistics of the dataset. We also benchmark several multilingual and Norwegian monolingual language models on the dataset and compare them against human performance. The dataset will be made freely available.</abstract>
       <url hash="b54b823a">2023.nodalida-1.17</url>
@@ -237,7 +237,7 @@
     </paper>
     <paper id="21">
       <title><fixed-case>BRENT</fixed-case>: Bidirectional Retrieval Enhanced <fixed-case>N</fixed-case>orwegian Transformer</title>
-      <author><first>Lucas</first><last>Charpentier</last><affiliation>University of Oslo</affiliation></author>
+      <author id="lucas-georges-gabriel-charpentier"><first>Lucas</first><last>Charpentier</last><affiliation>University of Oslo</affiliation></author>
       <author><first>Sondre</first><last>Wold</last></author>
       <author><first>David</first><last>Samuel</last><affiliation>University of Oslo</affiliation></author>
       <author><first>Egil</first><last>Rønningstad</last></author>
@@ -249,7 +249,7 @@
     <paper id="22">
       <title>Machine vs. Human: Exploring Syntax and Lexicon in <fixed-case>G</fixed-case>erman Translations, with a Spotlight on Anglicisms</title>
       <author><first>Anastassia</first><last>Shaitarova</last></author>
-      <author><first>Anne</first><last>Göhring</last><affiliation>University of Zurich</affiliation></author>
+      <author id="anne-gohring"><first>Anne</first><last>Göhring</last><affiliation>University of Zurich</affiliation></author>
       <author><first>Martin</first><last>Volk</last><affiliation>University of Zurich</affiliation></author>
       <pages>215-227</pages>
       <abstract>Machine Translation (MT) has become an integral part of daily life for millions of people, with its output being so fluent that users often cannot distinguish it from human translation. However, these fluid texts often harbor algorithmic traces, from limited lexical choices to societal misrepresentations. This raises concerns about the possible effects of MT on natural language and human communication and calls for regular evaluations of machine-generated translations for different languages. Our paper explores the output of three widely used engines (Google, DeepL, Microsoft Azure) and one smaller commercial system. We translate the English and French source texts of seven diverse parallel corpora into German and compare MT-produced texts to human references in terms of lexical, syntactic, and morphological features. Additionally, we investigate how MT leverages lexical borrowings and analyse the distribution of anglicisms across the German translations.</abstract>
@@ -271,7 +271,7 @@
       <author><first>Niki</first><last>Loppi</last><affiliation>Nvidia</affiliation></author>
       <author><first>Timothee</first><last>Mickus</last><affiliation>University of Helsinki</affiliation></author>
       <author><first>Alessandro</first><last>Raganato</last><affiliation>University of Milan - Bicocca</affiliation></author>
-      <author><first>Jörg</first><last>Tiedemann</last><affiliation>University of Helsinki</affiliation></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last><affiliation>University of Helsinki</affiliation></author>
       <author><first>Raúl</first><last>Vázquez</last></author>
       <pages>238-247</pages>
       <abstract>There are several ways of implementing multilingual NLP systems but little consensus as to whether different approaches exhibit similar effects. Are the trends that we observe when adding more languages the same as those we observe when sharing more parameters? We focus on encoder representations drawn from modular multilingual machine translation systems in an English-centric scenario, and study their quality from multiple aspects: how adequate they are for machine translation, how independent of the source language they are, and what semantic information they convey. Adding translation directions in English-centric scenarios does not conclusively lead to an increase in translation quality. Shared layers increase performance on zero-shot translation pairs and lead to more language-independent representations, but these improvements do not systematically align with more semantically accurate representations, from a monolingual standpoint.</abstract>
@@ -381,7 +381,7 @@
       <author><first>Ali</first><last>Al-Laith</last></author>
       <author><first>Kirstine Nielsen</first><last>Degn</last></author>
       <author><first>Alexander</first><last>Conroy</last></author>
-      <author><first>Bolette Sandford</first><last>Pedersen</last></author>
+      <author id="bolette-sandford-pedersen"><first>Bolette Sandford</first><last>Pedersen</last></author>
       <author><first>Jens</first><last>Bjerring-Hansen</last></author>
       <author><first>Daniel</first><last>Hershcovich</last></author>
       <pages>324-334</pages>
@@ -414,7 +414,7 @@
       <author><first>Hande</first><last>Celikkanat</last><affiliation>University of Helsinki</affiliation></author>
       <author><first>Sami</first><last>Virpioja</last></author>
       <author><first>Markus</first><last>Heinonen</last><affiliation>Aalto University</affiliation></author>
-      <author><first>Jörg</first><last>Tiedemann</last><affiliation>University of Helsinki</affiliation></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last><affiliation>University of Helsinki</affiliation></author>
       <pages>358-365</pages>
       <abstract>This paper introduces Bayesian uncertainty modeling using Stochastic Weight Averaging-Gaussian (SWAG) in Natural Language Understanding (NLU) tasks. We apply the approach to standard tasks in natural language inference (NLI) and demonstrate the effectiveness of the method in terms of prediction accuracy and correlation with human annotation disagreements. We argue that the uncertainty representations in SWAG better reflect subjective interpretation and the natural variation that is also present in human language understanding. The results reveal the importance of uncertainty modeling, an often neglected aspect of neural language modeling, in NLU tasks.</abstract>
       <url hash="7f445c1d">2023.nodalida-1.37</url>
@@ -431,7 +431,7 @@
     <paper id="39">
       <title>Low-resource Bilingual Dialect Lexicon Induction with Large Language Models</title>
       <author><first>Ekaterina</first><last>Artemova</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
-      <author><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München and IT University of Copenhagen</affiliation></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München and IT University of Copenhagen</affiliation></author>
       <pages>371-385</pages>
       <abstract>Bilingual word lexicons map words in one language to their synonyms in another language. Numerous papers have explored bilingual lexicon induction (BLI) in high-resource scenarios, framing a typical pipeline that consists of two steps: (i) unsupervised bitext mining and (ii) unsupervised word alignment. At the core of those steps are pre-trained large language models (LLMs).In this paper we present the analysis of the BLI pipeline for German and two of its dialects, Bavarian and Alemannic. This setup poses a number of unique challenges, attributed to the scarceness of resources, relatedness of the languages and lack of standardization in the orthography of dialects. We analyze the BLI outputs with respect to word frequency and the pairwise edit distance. Finally, we release an evaluation dataset consisting of manual annotations for 1K bilingual word pairs labeled according to their semantic similarity.</abstract>
       <url hash="132998ee">2023.nodalida-1.39</url>
@@ -450,8 +450,8 @@
     <paper id="41">
       <title>A Survey of Corpora for <fixed-case>G</fixed-case>ermanic Low-Resource Languages and Dialects</title>
       <author><first>Verena</first><last>Blaschke</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
-      <author><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München and IT University of Copenhagen</affiliation></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München and IT University of Copenhagen</affiliation></author>
       <pages>392-414</pages>
       <abstract>Despite much progress in recent years, the vast majority of work in natural language processing (NLP) is on standard languages with many speakers. In this work, we instead focus on low-resource languages and in particular non-standardized low-resource languages. Even within branches of major language families, often considered well-researched, little is known about the extent and type of available resources and what the major NLP challenges are for these language varieties. The first step to address this situation is a systematic survey of available corpora (most importantly, annotated corpora, which are particularly valuable for NLP research). Focusing on Germanic low-resource language varieties, we provide such a survey in this paper. Except for geolocation (origin of speaker or document), we find that manually annotated linguistic resources are sparse and, if they exist, mostly cover morphosyntax. Despite this lack of resources, we observe that interest in this area is increasing: there is active development and a growing research community. To facilitate research, we make our overview of over 80 corpora publicly available.</abstract>
       <url hash="ea37119d">2023.nodalida-1.41</url>
@@ -479,7 +479,7 @@
     </paper>
     <paper id="44">
       <title>Comparing Methods for Segmenting Elementary Discourse Units in a <fixed-case>F</fixed-case>rench Conversational Corpus</title>
-      <author><first>Laurent</first><last>Prevot</last><affiliation>Université d’Aix-Marseille</affiliation></author>
+      <author id="laurent-prevot"><first>Laurent</first><last>Prevot</last><affiliation>Université d’Aix-Marseille</affiliation></author>
       <author><first>Julie</first><last>Hunter</last></author>
       <author><first>Philippe</first><last>Muller</last><affiliation>IRIT, University of Toulouse</affiliation></author>
       <pages>436-446</pages>
@@ -490,7 +490,7 @@
     <paper id="45">
       <title>Multi-way Variational <fixed-case>NMT</fixed-case> for <fixed-case>UGC</fixed-case>: Improving Robustness in Zero-shot Scenarios via Mixture Density Networks</title>
       <author><first>José</first><last>Rosales Núñez</last></author>
-      <author><first>Djamé</first><last>Seddah</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
       <author><first>Guillaume</first><last>Wisniewski</last><affiliation>Université de Paris</affiliation></author>
       <pages>447-459</pages>
       <abstract>This work presents a novel Variational Neural Machine Translation (VNMT) architecture with enhanced robustness properties, which we investigate through a detailed case-study addressing noisy French user-generated content (UGC) translation to English. We show that the proposed model, with results comparable or superior to state-of-the-art VNMT, improves performance over UGC translation in a zero-shot evaluation scenario while keeping optimal translation scores on in-domain test sets. We elaborate on such results by visualizing and explaining how neural learning representations behave when processing UGC noise. In addition, we show that VNMT enforces robustness to the learned embeddings, which can be later used for robust transfer learning approaches.</abstract>
@@ -580,7 +580,7 @@
     <paper id="54">
       <title>Probing structural constraints of negation in Pretrained Language Models</title>
       <author><first>David</first><last>Kletz</last></author>
-      <author><first>Marie</first><last>Candito</last><affiliation>Université de Paris</affiliation></author>
+      <author id="marie-candito"><first>Marie</first><last>Candito</last><affiliation>Université de Paris</affiliation></author>
       <author><first>Pascal</first><last>Amsili</last><affiliation>Sorbonne Nouvelle (Paris 3)</affiliation></author>
       <pages>541-554</pages>
       <abstract>Contradictory results about the encoding of the semantic impact of negation in pretrained language models (PLMs) have been drawn recently (e.g. Kassner and Schütze (2020); Gubelmann and Handschuh (2022)).In this paper we focus rather on the way PLMs encode negation and its formal impact, through the phenomenon of the Negative Polarity Item (NPI) licensing in English.More precisely, we use probes to identify which contextual representations best encode 1) the presence of negation in a sentence, and 2) the polarity of a neighboring masked polarity item. We find that contextual representations of tokens inside the negation scope do allow for (i) a better prediction of the presence of “not” compared to those outside the scope and (ii) a better prediction of the right polarity of a masked polarity item licensed by “not”, although the magnitude of the difference varies from PLM to PLM. Importantly, in both cases the trend holds even when controlling for distance to “not”.This tends to indicate that the embeddings of these models do reflect the notion of negation scope, and do encode the impact of negation on NPI licensing. Yet, further control experiments reveal that the presence of other lexical items is also better captured when using the contextual representation of a token within the same syntactic clause than outside from it, suggesting that PLMs simply capture the more general notion of syntactic clause.</abstract>
@@ -650,7 +650,7 @@
       <author><first>Andrey</first><last>Kutuzov</last><affiliation>University of Oslo</affiliation></author>
       <author><first>Samia</first><last>Touileb</last><affiliation>University og Bergen, Norway</affiliation></author>
       <author><first>Erik</first><last>Velldal</last><affiliation>University of Oslo</affiliation></author>
-      <author><first>Lilja</first><last>Øvrelid</last><affiliation>Dept. of Informatics, University of Oslo</affiliation></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last><affiliation>Dept. of Informatics, University of Oslo</affiliation></author>
       <author><first>Egil</first><last>Rønningstad</last></author>
       <author><first>Elina</first><last>Sigdel</last></author>
       <author><first>Anna</first><last>Palatkina</last></author>
@@ -671,7 +671,7 @@
     <paper id="63">
       <title><fixed-case>G</fixed-case>iella<fixed-case>LT</fixed-case> — a stable infrastructure for <fixed-case>N</fixed-case>ordic minority languages and beyond</title>
       <author><first>Flammie</first><last>Pirinen</last><affiliation>Norgga árktalaš universitehta</affiliation></author>
-      <author><first>Sjur</first><last>Moshagen</last></author>
+      <author id="sjur-moshagen"><first>Sjur</first><last>Moshagen</last></author>
       <author><first>Katri</first><last>Hiovain-Asikainen</last></author>
       <pages>643-649</pages>
       <abstract>Long term language technology infrastructures are critical for continued maintenance of language technology based software that is used to support the use of languages in digital world. In Nordic area we have languages ranging from well-resourced national majority languages like Norwegian, Swedish and Finnish as well as minoritised, unresourced and indigenous languages like Sámi languages. We present an infrastructure that has been build in over 20 years time that supports building language technology and tools for most of the Nordic languages as well as many of the languages all over the world, with focus on Sámi and other indigenous, minoritised and unresourced languages. We show that one common infrastructure can be used to build tools from keyboards and spell-checkers to machine translators, grammar checkers and text-to-speech as well as automatic speech recognition.</abstract>
@@ -845,7 +845,7 @@
   <volume id="cgmta" ingest-date="2023-12-19" type="proceedings">
     <meta>
       <booktitle>Proceedings of the NoDaLiDa 2023 Workshop on Constraint Grammar - Methods, Tools and Applications</booktitle>
-      <editor><first>Eckhard</first><last>Bick</last></editor>
+      <editor id="eckhard-bick"><first>Eckhard</first><last>Bick</last></editor>
       <editor><first>Trond</first><last>Trosterud</last></editor>
       <editor><first>Tanel</first><last>Alumäe</last></editor>
       <publisher>Association of Computational Linguistics</publisher>
@@ -870,9 +870,9 @@
     </paper>
     <paper id="2">
       <title><fixed-case>WITH</fixed-case> Context: Adding Rule-Grouping to <fixed-case>VISL</fixed-case> <fixed-case>CG</fixed-case>-3</title>
-      <author><first>Daniel</first><last>Swanson</last></author>
+      <author id="daniel-g-swanson"><first>Daniel</first><last>Swanson</last></author>
       <author><first>Tino</first><last>Didriksen</last></author>
-      <author><first>Francis M.</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis M.</first><last>Tyers</last></author>
       <pages>10-14</pages>
       <abstract>This paper presents an extension to the VISL CG-3 compiler and processor which enables complex contexts to be shared between rules. This sharing substantially improves the readability and maintainability of sets of rules performing multi-step operations.</abstract>
       <url hash="0075725b">2023.nodalida-cgmta.2</url>
@@ -889,7 +889,7 @@
     <paper id="4">
       <title>Towards automatic essay scoring of <fixed-case>B</fixed-case>asque language texts from a rule-based approach based on curriculum-aware systems</title>
       <author><first>Jose Maria</first><last>Arriola</last></author>
-      <author><first>Mikel</first><last>Iruskieta</last></author>
+      <author id="mikel-iruskieta"><first>Mikel</first><last>Iruskieta</last></author>
       <author><first>Ekain</first><last>Arrieta</last></author>
       <author><first>Jon</first><last>Alkorta</last></author>
       <pages>20-28</pages>
diff --git a/data/xml/2023.paclic.xml b/data/xml/2023.paclic.xml
index 3f2716fc03..b0a5dbcde0 100644
--- a/data/xml/2023.paclic.xml
+++ b/data/xml/2023.paclic.xml
@@ -9,7 +9,7 @@
       <editor><first>Si</first><last>Chen</last></editor>
       <editor><first>Yu-Yin</first><last>Hsu</last></editor>
       <editor><first>Emmanuele</first><last>Chersoni</last></editor>
-      <editor><first>Pranav</first><last>A</last></editor>
+      <editor id="pranav-a"><first>Pranav</first><last>A</last></editor>
       <editor><first>Winnie Huiheng</first><last>Zeng</last></editor>
       <editor><first>Bo</first><last>Peng</last></editor>
       <editor><first>Yuxi</first><last>Li</last></editor>
@@ -92,7 +92,7 @@
     <paper id="8">
       <title>The <fixed-case>WEAVE</fixed-case> 2.0 Corpus: Role Labelled Synthetic Chemical Procedures from Patents with Chemical Named Entities</title>
       <author><first>Shubhangi</first><last>Dutta</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <author><first>Prabhakar</first><last>Bhimalapuram</last></author>
       <pages>80–88</pages>
       <url hash="924fdd59">2023.paclic-1.8</url>
@@ -246,8 +246,8 @@
       <title>Solving Label Variation in Scientific Information Extraction via Multi-Task Learning</title>
       <author><first>Dong</first><last>Pham</last></author>
       <author><first>Xanh</first><last>Ho</last></author>
-      <author><first>Quang Thuy</first><last>Ha</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="quang-thuy-ha"><first>Quang Thuy</first><last>Ha</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>243–256</pages>
       <url hash="88c2be9a">2023.paclic-1.25</url>
       <bibkey>pham-etal-2023-solving</bibkey>
@@ -293,7 +293,7 @@
       <author><first>Po-Ya Angela</first><last>Wang</last></author>
       <author><first>Hsin-Yu</first><last>Chou</last></author>
       <author><first>Yu-Hsiang</first><last>Tseng</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <pages>293–302</pages>
       <url hash="2da1a5d9">2023.paclic-1.30</url>
       <bibkey>chen-etal-2023-exploring-affordance</bibkey>
@@ -355,7 +355,7 @@
     <paper id="37">
       <title><fixed-case>D</fixed-case>ialo<fixed-case>G</fixed-case>en: Generalized Long-Range Context Representation for Dialogue Systems</title>
       <author><first>Suvodip</first><last>Dey</last></author>
-      <author><first>Maunendra Sankar</first><last>Desarkar</last></author>
+      <author id="maunendra-sankar-desarkar"><first>Maunendra Sankar</first><last>Desarkar</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
       <author><first>Srijith P.</first><last>K.</last></author>
       <pages>372–386</pages>
@@ -385,7 +385,7 @@
       <author><first>Yuchen</first><last>Yan</last></author>
       <author><first>Yuxiang</first><last>Jia</last></author>
       <author><first>Yawen</first><last>Ye</last></author>
-      <author><first>Hongying</first><last>Zan</last></author>
+      <author id="hongying-zan"><first>Hongying</first><last>Zan</last></author>
       <pages>398–405</pages>
       <url hash="c3d34eb7">2023.paclic-1.39</url>
       <bibkey>zhao-etal-2023-corpus</bibkey>
@@ -434,7 +434,7 @@
       <title>Developing an Annotated <fixed-case>P</fixed-case>ersian Dataset from <fixed-case>COVID</fixed-case>-19 News for Enhanced Fake News Detection</title>
       <author><first>Foroogh</first><last>Zahed</last></author>
       <author><first>Seyedeh Fatemeh</first><last>Ebrahimi</last></author>
-      <author><first>Mohammad</first><last>Bahrani</last></author>
+      <author id="mohammad-bahrani"><first>Mohammad</first><last>Bahrani</last></author>
       <author><first>Alireza</first><last>Mansouri</last></author>
       <pages>446–455</pages>
       <url hash="2842c5e7">2023.paclic-1.44</url>
@@ -491,7 +491,7 @@
       <title>Automatic Identification of Assistance Needs in Disaster Situations Using Hybrid Word Embedding Techniques</title>
       <author><first>Sarra</first><last>Chaiir</last></author>
       <author><first>Malika</first><last>Charrad</last></author>
-      <author><first>Narjès</first><last>Bellamine Ben Saoud</last></author>
+      <author id="narjes-bellamine-ben-saoud"><first>Narjès</first><last>Bellamine Ben Saoud</last></author>
       <pages>510–515</pages>
       <url hash="8d02df56">2023.paclic-1.50</url>
       <bibkey>chaiir-etal-2023-automatic</bibkey>
@@ -538,7 +538,7 @@
       <author><first>Takato</first><last>Yamazaki</last></author>
       <author><first>Toshinori</first><last>Sato</last></author>
       <author><first>Kotaro</first><last>Funakoshi</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>552–558</pages>
       <url hash="5db7b7ec">2023.paclic-1.55</url>
       <bibkey>kawamoto-etal-2023-follow</bibkey>
@@ -556,7 +556,7 @@
       <title>A Corpus-Based Investigation of Occurrences and Functions of Frame Markers in <fixed-case>F</fixed-case>ilipino <fixed-case>U</fixed-case>niversity Students¡ ̄ <fixed-case>COVID</fixed-case>-19 Written Narratives</title>
       <author><first>Gregorio Ebron</first><last>Jr.</last></author>
       <author><first>Romualdo</first><last>Mabuan</last></author>
-      <author><first>Shirley</first><last>Dita</last></author>
+      <author id="shirley-dita"><first>Shirley</first><last>Dita</last></author>
       <pages>570–581</pages>
       <url hash="362d5026">2023.paclic-1.57</url>
       <bibkey>jr-etal-2023-corpus</bibkey>
@@ -566,7 +566,7 @@
       <author><first>Cam-Van Thi</first><last>Nguyen</last></author>
       <author><first>Ngoc-Hoa Thi</first><last>Nguyen</last></author>
       <author><first>Duc-Trong</first><last>Le</last></author>
-      <author><first>Quang-Thuy</first><last>Ha</last></author>
+      <author id="quang-thuy-ha"><first>Quang-Thuy</first><last>Ha</last></author>
       <pages>582–590</pages>
       <url hash="7f9907e0">2023.paclic-1.58</url>
       <bibkey>thi-etal-2023-self</bibkey>
@@ -596,7 +596,7 @@
       <title>From Data to Dialogue: Leveraging the Structure of Knowledge Graphs for Conversational Exploratory Search</title>
       <author><first>Phillip</first><last>Schneider</last></author>
       <author><first>Nils</first><last>Rehtanz</last></author>
-      <author><first>Kristiina</first><last>Jokinen</last></author>
+      <author id="kristiina-jokinen"><first>Kristiina</first><last>Jokinen</last></author>
       <author><first>Florian</first><last>Matthes</last></author>
       <pages>609–619</pages>
       <url hash="bd06fe59">2023.paclic-1.61</url>
@@ -607,7 +607,7 @@
       <author><first>Lokesh</first><last>Madasu</last></author>
       <author><first>Gopichand</first><last>Kanumolu</last></author>
       <author><first>Nirmal</first><last>Surange</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>620–634</pages>
       <url hash="b96d01dc">2023.paclic-1.62</url>
       <bibkey>madasu-etal-2023-mukhyansh</bibkey>
@@ -667,7 +667,7 @@
       <author><first>Mao-Chang</first><last>Ku</last></author>
       <author><first>Wei-Ling</first><last>Chen</last></author>
       <author><first>Yu-Lin</first><last>Chang</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <pages>679–690</pages>
       <url hash="98ff8472">2023.paclic-1.68</url>
       <bibkey>tseng-etal-2023-vec2gloss</bibkey>
@@ -684,7 +684,7 @@
     <paper id="70">
       <title>The Association of Second Language Proficiency with Nonverbal Behaviors</title>
       <author><first>Katsunori</first><last>Kotani</last></author>
-      <author><first>Takehiko</first><last>Yoshimi</last></author>
+      <author id="takehiko-yoshimi"><first>Takehiko</first><last>Yoshimi</last></author>
       <pages>701–709</pages>
       <url hash="7d52bbae">2023.paclic-1.70</url>
       <bibkey>kotani-yoshimi-2023-association</bibkey>
@@ -875,7 +875,7 @@
       <title>Fine-grained Contract <fixed-case>NER</fixed-case> using instruction based mode</title>
       <author><first>Hiranmai</first><last>Adibhatla</last></author>
       <author><first>Pavan</first><last>Baswani</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>889–902</pages>
       <url hash="30469e45">2023.paclic-1.89</url>
       <bibkey>adibhatla-etal-2023-fine</bibkey>
diff --git a/data/xml/2023.pandl.xml b/data/xml/2023.pandl.xml
index 9fc633a5d6..a5f477efde 100644
--- a/data/xml/2023.pandl.xml
+++ b/data/xml/2023.pandl.xml
@@ -4,10 +4,10 @@
     <meta>
       <booktitle>Proceedings of the 2nd Workshop on Pattern-based Approaches to NLP in the Age of Deep Learning</booktitle>
       <editor><first>Mihai</first><last>Surdeanu</last></editor>
-      <editor><first>Ellen</first><last>Riloff</last></editor>
+      <editor id="ellen-riloff"><first>Ellen</first><last>Riloff</last></editor>
       <editor><first>Laura</first><last>Chiticariu</last></editor>
       <editor><first>Dayne</first><last>Frietag</last></editor>
-      <editor><first>Gus</first><last>Hahn-Powell</last></editor>
+      <editor id="gus-hahn-powell"><first>Gus</first><last>Hahn-Powell</last></editor>
       <editor><first>Clayton T.</first><last>Morrison</last></editor>
       <editor><first>Enrique</first><last>Noriega-Atala</last></editor>
       <editor><first>Rebecca</first><last>Sharp</last></editor>
@@ -27,7 +27,7 @@
     <paper id="1">
       <title>Nearest Neighbor Search over Vectorized Lexico-Syntactic Patterns for Relation Extraction from Financial Documents</title>
       <author><first>Pawan</first><last>Rajpoot</last></author>
-      <author><first>Ankur</first><last>Parikh</last></author>
+      <author id="ankur-parikh"><first>Ankur</first><last>Parikh</last></author>
       <pages>1–5</pages>
       <abstract>Relation extraction (RE) has achieved remarkable progress with the help of pre-trained language models. However, existing RE models are usually incapable of handling two situations: implicit expressions and long-tail relation classes, caused by language complexity and data sparsity. Further, these approaches and models are largely inaccessible to users who don’t have direct access to large language models (LLMs) and/or infrastructure for supervised training or fine-tuning. Rule-based systems also struggle with implicit expressions. Apart from this, Real world financial documents such as various 10-X reports (including 10-K, 10-Q, etc.) of publicly traded companies pose another challenge to rule-based systems in terms of longer and complex sentences. In this paper, we introduce a simple approach that consults training relations at test time through a nearest-neighbor search over dense vectors of lexico-syntactic patterns and provides a simple yet effective means to tackle the above issues. We evaluate our approach on REFinD and show that our method achieves state-of-the-art performance. We further show that it can provide a good start for human in the loop setup when a small number of annotations are available and it is also beneficial when domain experts can provide high quality patterns. Our code is available at 1.</abstract>
       <url hash="7d6a888f">2023.pandl-1.1</url>
@@ -61,11 +61,11 @@
     <paper id="4">
       <title>Generating <fixed-case>I</fixed-case>rish Text with a Flexible Plug-and-Play Architecture</title>
       <author><first>Simon</first><last>Mille</last></author>
-      <author><first>Elaine</first><last>Uí Dhonnchadha</last></author>
+      <author id="elaine-ui-dhonnchadha"><first>Elaine</first><last>Uí Dhonnchadha</last></author>
       <author><first>Lauren</first><last>Cassidy</last></author>
       <author><first>Brian</first><last>Davis</last></author>
       <author><first>Stamatia</first><last>Dasiopoulou</last></author>
-      <author><first>Anya</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anya</first><last>Belz</last></author>
       <pages>25–42</pages>
       <abstract>In this paper, we describe M-FleNS, a multilingual flexible plug-and-play architecture designed to accommodate neural and symbolic modules, and initially instantiated with rule-based modules. We focus on using M-FleNS for the specific purpose of building new resources for Irish, a language currently under-represented in the NLP landscape. We present the general M-FleNS framework and how we use it to build an Irish Natural Language Generation system for verbalising part of the DBpedia ontology and building a multilayered dataset with rich linguistic annotations. Via automatic and human assessments of the output texts we show that with very limited resources we are able to create a system that reaches high levels of fluency and semantic accuracy, while having very low energy and memory requirements.</abstract>
       <url hash="47d81baa">2023.pandl-1.4</url>
@@ -79,7 +79,7 @@
       <author><first>Wenting</first><last>Zhao</last></author>
       <author><first>Derek</first><last>Chen</last></author>
       <author><first>Saujas</first><last>Vaduguru</last></author>
-      <author><first>Alexander</first><last>Rush</last></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last></author>
       <author><first>Daniel</first><last>Fried</last></author>
       <pages>43–53</pages>
       <abstract>Large language models (LLMs) excel at processing and generating both text and code. However, LLMs have had limited applicability in grounded task-oriented dialogue as they are difficult to steer toward task objectives and fail to handle novel grounding. We present a modular and interpretable grounded dialogue system that addresses these shortcomings by composing LLMs with a symbolic planner and grounded code execution. Our system consists of a reader and planner: the reader leverages an LLM to convert partner utterances into executable code, calling functions that perform grounding. The translated code’s output is stored to track dialogue state, while a symbolic planner determines the next appropriate response. We evaluate our system’s performance on the demanding OneCommon dialogue task, involving collaborative reference resolution on abstract images of scattered dots. Our system substantially outperforms the previous state-of-the-art, including improving task success in human evaluations from 56% to 69% in the most challenging setting.</abstract>
@@ -94,7 +94,7 @@
       <author><first>Omid</first><last>Abdar</last></author>
       <author><first>Mike</first><last>Ross</last></author>
       <author><first>Annie</first><last>Dong</last></author>
-      <author><first>Kenneth</first><last>Forbus</last></author>
+      <author id="kenneth-forbus"><first>Kenneth</first><last>Forbus</last></author>
       <author><first>Ahmed</first><last>Mohamed</last></author>
       <pages>54–63</pages>
       <abstract>Frame semantic parsing is an important component of task-oriented dialogue systems. Current models rely on a significant amount training data to successfully identify the intent and slots in the user’s input utterance. This creates a significant barrier for adding new domains to virtual assistant capabilities, as creation of this data requires highly specialized NLP expertise. In this work we propose OpenFSP, a framework that allows for easy creation of new domains from a handful of simple labels that can be generated without specific NLP knowledge. Our approach relies on creating a small, but expressive, set of domain agnostic slot types that enables easy annotation of new domains. Given such annotation, a matching algorithm relying on sentence encoders predicts the intent and slots for domains defined by end-users. Experiments on the TopV2 dataset shows that our model trained on these simple labels have strong performance against supervised baselines.</abstract>
@@ -120,7 +120,7 @@
       <title>Complementary Roles of Inference and Language Models in <fixed-case>QA</fixed-case></title>
       <author><first>Liang</first><last>Cheng</last></author>
       <author><first>Mohammad Javad</first><last>Hosseini</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>75–91</pages>
       <abstract>Answering open-domain questions through unsupervised methods poses challenges for both machine-reading (MR) and language model (LM) -based approaches. The MR-based approach suffers from sparsity issues in extracted knowledge graphs (KGs), while the performance of the LM-based approach significantly depends on the quality of the retrieved context for questions. In this paper, we compare these approaches and propose a novel methodology that leverages directional predicate entailment (inference) to address these limitations. We use entailment graphs (EGs), with natural language predicates as nodes and entailment as edges, to enhance parsed KGs by inferring unseen assertions, effectively mitigating the sparsity problem in the MR-based approach. We also show EGs improve context retrieval for the LM-based approach. Additionally, we present a Boolean QA task, demonstrating that EGs exhibit comparable directional inference capabilities to large language models (LLMs). Our results highlight the importance of inference in open-domain QA and the improvements brought by leveraging EGs.</abstract>
       <url hash="16ee5628">2023.pandl-1.8</url>
@@ -132,7 +132,7 @@
     <paper id="9">
       <title>Controlled Data Augmentation for Training Task-Oriented Dialog Systems with Low Resource Data</title>
       <author><first>Sebastian</first><last>Steindl</last></author>
-      <author><first>Ulrich</first><last>Schäfer</last></author>
+      <author id="ulrich-schafer"><first>Ulrich</first><last>Schäfer</last></author>
       <author><first>Bernd</first><last>Ludwig</last></author>
       <pages>92–102</pages>
       <abstract>Modern dialog systems rely on Deep Learning to train transformer-based model architectures. These notoriously rely on large amounts of training data. However, the collection of conversational data is often a tedious and costly process. This is especially true for Task-Oriented Dialogs, where the system ought to help the user achieve specific tasks, such as making reservations. We investigate a controlled strategy for dialog synthesis. Our method generates utterances based on dialog annotations in a sequence-to-sequence manner. Besides exploring the viability of the approach itself, we also explore the effect of constrained beam search on the generation capabilities. Moreover, we analyze the effectiveness of the proposed method as a data augmentation by studying the impact the synthetic dialogs have on training dialog systems. We perform the experiments in multiple settings, simulating various amounts of ground-truth data. Our work shows that a controlled generation approach is a viable method to synthesize Task-Oriented Dialogs, that can in turn be used to train dialog systems. We were able to improve this process by utilizing constrained beam search.</abstract>
@@ -149,7 +149,7 @@
       <author><first>Eduardo</first><last>Mendoza</last></author>
       <author><first>Nelson</first><last>Pampolina</last></author>
       <author><first>Maria Art Antonette</first><last>Clariño</last></author>
-      <author><first>Riza</first><last>Batista-Navarro</last></author>
+      <author id="riza-theresa-batista-navarro"><first>Riza</first><last>Batista-Navarro</last></author>
       <pages>103–113</pages>
       <abstract>Relation extraction (RE) is one of the tasks behind many relevant natural language processing (NLP) applications. Exploiting the information hidden in millions of scholarly articles by leveraging NLP, specifically RE, systems could benefit studies in specialized domains, e.g. biomedicine and biodiversity. Although deep learning (DL)-based methods have shown state-of-the-art performance in many NLP tasks including RE, DL for domain-specific RE systems has been hindered by the lack of expert-labeled datasets which are typically required to train such methods. In this paper, we take advantage of the zero-shot (i.e., not requiring any labeled data) capability of pattern-based methods for RE using a rule-based approach, combined with templates for natural language inference (NLI) transformer models. We present our hybrid method for RE that exploits the advantages of both methods, i.e., interpretability of rules and transferability of transformers. Evaluated on a corpus of biodiversity literature with annotated relations, our hybrid method demonstrated an improvement of up to 15 percentage points in recall and best performance over solely rule-based and transformer-based methods with F1-scores ranging from 89.61% to 96.75% for reproductive condition - temporal expression relations, and ranging from 85.39% to 89.90% for habitat - geographic location relations.</abstract>
       <url hash="034fd1bf">2023.pandl-1.10</url>
diff --git a/data/xml/2023.rail.xml b/data/xml/2023.rail.xml
index 03e3228f1e..39b0b34465 100644
--- a/data/xml/2023.rail.xml
+++ b/data/xml/2023.rail.xml
@@ -72,7 +72,7 @@
     <paper id="5">
       <title>A Corpus-Based List of Frequently Used Words in Sesotho</title>
       <author><first>Johannes</first><last>Sibeko</last><affiliation>Nelson Mandela University</affiliation></author>
-      <author><first>Orphée</first><last>De Clercq</last><affiliation>Universiteit Gent</affiliation></author>
+      <author id="orphee-de-clercq"><first>Orphée</first><last>De Clercq</last><affiliation>Universiteit Gent</affiliation></author>
       <pages>32-41</pages>
       <abstract>This paper describes the SpeechReporting Corpus, an online collection of corpora annotated for a range of discourse phenomena. The corpora contain folktales from 7 lesser-studied West African languages. Apart from its value for theoretical linguistics, especially for the study of reported speech, the database is an important resource for the preservation of intangible cultural heritage of minority languages and the development and testing of cross-linguistically applicable computational tools.</abstract>
       <url hash="5f58e1e9">2023.rail-1.5</url>
@@ -128,7 +128,7 @@
     <paper id="10">
       <title>Towards a <fixed-case>S</fixed-case>wahili <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependency Treebank: Leveraging the Annotations of the <fixed-case>H</fixed-case>elsinki Corpus of <fixed-case>S</fixed-case>wahili</title>
       <author><first>Kenneth</first><last>Steimel</last><affiliation>Indiana University</affiliation></author>
-      <author><first>Sandra</first><last>Kübler</last><affiliation>Indiana University</affiliation></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last><affiliation>Indiana University</affiliation></author>
       <pages>86-96</pages>
       <abstract>Dependency annotation can be a laborious process for under-resourced languages. However, in some cases, other resources are available. We investigate whether we can leverage such resources in the case of Swahili: We use the Helsinki Corpus of Swahili for creating a Universal Depedencies treebank for Swahili. The Helsinki Corpus of Swahili provides word-level annotations for part of speech tags, morphological features, and functional syntactic tags. We train neural taggers for these types of annotations, then use those models to annotate our target corpus, the Swahili portion of the OPUS Global Voices Corpus. Based on those annotations, we then manually create constraint grammar rules to annotate the target corpus for Universal Dependencies. In this paper, we describe the process, discuss the annotation decisions we had to make, and we evaluate the approach.</abstract>
       <url hash="0575d46a">2023.rail-1.10</url>
@@ -139,9 +139,9 @@
     <paper id="11">
       <title>Comparing methods of orthographic conversion for Bàsàá, a language of <fixed-case>C</fixed-case>ameroon</title>
       <author><first>Alexandra</first><last>O’neil</last><affiliation>Indiana University</affiliation></author>
-      <author><first>Daniel</first><last>Swanson</last><affiliation>Indiana University</affiliation></author>
+      <author id="daniel-g-swanson"><first>Daniel</first><last>Swanson</last><affiliation>Indiana University</affiliation></author>
       <author><first>Robert</first><last>Pugh</last><affiliation>Indiana University</affiliation></author>
-      <author><first>Francis</first><last>Tyers</last><affiliation>Indiana University</affiliation></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last><affiliation>Indiana University</affiliation></author>
       <author><first>Emmanuel</first><last>Ngue Um</last><affiliation>University of Yaoundé I</affiliation></author>
       <pages>97-105</pages>
       <abstract>Orthographical standardization is a milestone in a language’s documentation and the development of its resources. However, texts written in former orthographies remain relevant to the language’s history and development and therefore must be converted to the standardized orthography. Ensuring a language has access to the orthographically standardized version of all of its recorded texts is important in the development of resources as it provides additional textual resources for training, supports contribution of authors using former writing systems, and provides information about the development of the language. This paper evaluates the performance of natural language processing methods, specifically Finite State Transducers and Long Short-term Memory networks, for the orthographical conversion of Bàsàá texts from the Protestant missionary orthography to the now-standard AGLC orthography, with the conclusion that LSTMs are somewhat more effective in the absence of explicit lexical information.</abstract>
diff --git a/data/xml/2023.ranlp.xml b/data/xml/2023.ranlp.xml
index b0cf0307cd..bca0ca9356 100644
--- a/data/xml/2023.ranlp.xml
+++ b/data/xml/2023.ranlp.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2023-10-31" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 14th International Conference on Recent Advances in Natural Language Processing</booktitle>
-      <editor><first>Ruslan</first><last>Mitkov</last></editor>
+      <editor id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></editor>
       <editor><first>Galia</first><last>Angelova</last></editor>
       <publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
       <address>Varna, Bulgaria</address>
@@ -41,7 +41,7 @@
     <paper id="3">
       <title>Cross-lingual Classification of Crisis-related Tweets Using Machine Translation</title>
       <author><first>Shareefa</first><last>Al Amer</last></author>
-      <author><first>Mark</first><last>Lee</last></author>
+      <author id="mark-lee"><first>Mark</first><last>Lee</last></author>
       <author><first>Phillip</first><last>Smith</last></author>
       <pages>22–31</pages>
       <abstract>Utilisation of multilingual language models such as mBERT and XLM-RoBERTa has increasingly gained attention in recent work by exploiting the multilingualism of such models in different downstream tasks across different languages. However, performance degradation is expected in transfer learning across languages compared to monolingual performance although it is an acceptable trade-off considering the sparsity of resources and lack of available training data in low-resource languages. In this work, we study the effect of machine translation on the cross-lingual transfer learning in a crisis event classification task. Our experiments include measuring the effect of machine-translating the target data into the source language and vice versa. We evaluated and compared the performance in terms of accuracy and F1-Score. The results show that translating the source data into the target language improves the prediction accuracy by 14.8% and the Weighted Average F1-Score by 19.2% when compared to zero-shot transfer to an unseen language.</abstract>
@@ -64,7 +64,7 @@
       <author><first>Abinew Ali</first><last>Ayele</last></author>
       <author><first>Skadi</first><last>Dinter</last></author>
       <author><first>Seid Muhie</first><last>Yimam</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>41–48</pages>
       <abstract>The rise of social media eases the spread of hateful content, especially racist content with severe consequences. In this paper, we analyze the tweets targeting the death of George Floyd in May 2020 as the event accelerated debates on racism globally. We focus on the tweets published in French for a period of one month since the death of Floyd. Using the Yandex Toloka platform, we annotate the tweets into categories as hate, offensive or normal. Tweets that are offensive or hateful are further annotated as racial or non-racial. We build French hate speech detection models based on the multilingual BERT and CamemBERT and apply transfer learning by fine-tuning the HateXplain model. We compare different approaches to resolve annotation ties and find that the detection model based on CamemBERT yields the best results in our experiments.</abstract>
       <url hash="563c2466">2023.ranlp-1.5</url>
@@ -76,7 +76,7 @@
       <author><first>Seid Muhie</first><last>Yimam</last></author>
       <author><first>Tadesse Destaw</first><last>Belay</last></author>
       <author><first>Tesfa</first><last>Asfaw</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>49–59</pages>
       <abstract>In this paper, we present a study of efficient data selection and annotation strategies for Amharic hate speech. We also build various classification models and investigate the challenges of hate speech data selection, annotation, and classification for the Amharic language. From a total of over 18 million tweets in our Twitter corpus, 15.1k tweets are annotated by two independent native speakers, and a Cohen’s kappa score of 0.48 is achieved. A third annotator, a curator, is also employed to decide on the final gold labels. We employ both classical machine learning and deep learning approaches, which include fine-tuning AmFLAIR and AmRoBERTa contextual embedding models. Among all the models, AmFLAIR achieves the best performance with an F1-score of 72%. We publicly release the annotation guidelines, keywords/lexicon entries, datasets, models, and associated scripts with a permissive license.</abstract>
       <url hash="af030c59">2023.ranlp-1.6</url>
@@ -95,7 +95,7 @@
       <title>3<fixed-case>D</fixed-case>-<fixed-case>EX</fixed-case>: A Unified Dataset of Definitions and Dictionary Examples</title>
       <author><first>Fatemah</first><last>Almeman</last></author>
       <author><first>Hadi</first><last>Sheikhi</last></author>
-      <author><first>Luis</first><last>Espinosa Anke</last></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa Anke</last></author>
       <pages>69–79</pages>
       <abstract>Definitions are a fundamental building block in lexicography, linguistics and computational semantics. In NLP, they have been used for retrofitting word embeddings or augmenting contextual representations in language models. However, lexical resources containing definitions exhibit a wide range of properties, which has implications in the behaviour of models trained and evaluated on them. In this paper, we introduce 3D-EX, a dataset that aims to fill this gap by combining well-known English resources into one centralized knowledge repository in the form of &lt;term, definition, example&gt; triples. 3D-EX is a unified evaluation framework with carefully pre-computed train/validation/test splits to prevent memorization. We report experimental results that suggest that this dataset could be effectively leveraged in downstream NLP tasks. Code and data are available at https://github.com/F-Almeman/3D-EX.</abstract>
       <url hash="456bb028">2023.ranlp-1.8</url>
@@ -105,7 +105,7 @@
       <title>Are You Not moved? Incorporating Sensorimotor Knowledge to Improve Metaphor Detection</title>
       <author><first>Ghadi</first><last>Alnafesah</last></author>
       <author><first>Phillip</first><last>Smith</last></author>
-      <author><first>Mark</first><last>Lee</last></author>
+      <author id="mark-lee"><first>Mark</first><last>Lee</last></author>
       <pages>80–89</pages>
       <abstract>Metaphors use words from one domain of knowledge to describe another, which can make the meaning less clear and require human interpretation to understand. This makes it difficult for automated models to detect metaphorical usage. The objective of the experiments in the paper is to enhance the ability of deep learning models to detect metaphors automatically. This is achieved by using two elements of semantic richness, sensory experience, and body-object interaction, as the main lexical features, combined with the contextual information present in the metaphorical sentences. The tests were conducted using classification and sequence labeling models for metaphor detection on the three metaphorical corpora VUAMC, MOH-X, and TroFi. The sensory experience led to significant improvements in the classification and sequence labelling models across all datasets. The highest gains were seen on the VUAMC dataset: recall increased by 20.9%, F1 by 7.5% for the classification model, and Recall increased by 11.66% and F1 by 3.69% for the sequence labelling model. Body-object interaction also showed positive impact on the three datasets.</abstract>
       <url hash="4b40e8cc">2023.ranlp-1.9</url>
@@ -114,7 +114,7 @@
     <paper id="10">
       <title><fixed-case>HAQA</fixed-case> and <fixed-case>QUQA</fixed-case>: Constructing Two <fixed-case>A</fixed-case>rabic Question-Answering Corpora for the <fixed-case>Q</fixed-case>uran and <fixed-case>H</fixed-case>adith</title>
       <author><first>Sarah</first><last>Alnefaie</last></author>
-      <author><first>Eric</first><last>Atwell</last></author>
+      <author id="eric-atwell"><first>Eric</first><last>Atwell</last></author>
       <author><first>Mohammad Ammar</first><last>Alsalka</last></author>
       <pages>90–97</pages>
       <abstract>It is neither possible nor fair to compare the performance of question-answering systems for the Holy Quran and Hadith Sharif in Arabic due to both the absence of a golden test dataset on the Hadith Sharif and the small size and easy questions of the newly created golden test dataset on the Holy Quran. This article presents two question–answer datasets: Hadith Question–Answer pairs (HAQA) and Quran Question–Answer pairs (QUQA). HAQA is the first Arabic Hadith question–answer dataset available to the research community, while the QUQA dataset is regarded as the more challenging and the most extensive collection of Arabic question–answer pairs on the Quran. HAQA was designed and its data collected from several expert sources, while QUQA went through several steps in the construction phase; that is, it was designed and then integrated with existing datasets in different formats, after which the datasets were enlarged with the addition of new data from books by experts. The HAQA corpus consists of 1598 question–answer pairs, and that of QUQA contains 3382. They may be useful as gold–standard datasets for the evaluation process, as training datasets for language models with question-answering tasks and for other uses in artificial intelligence.</abstract>
@@ -139,9 +139,9 @@
     <paper id="12">
       <title>A Review in Knowledge Extraction from Knowledge Bases</title>
       <author><first>Fabio</first><last>Yanez</last></author>
-      <author><first>Andrés</first><last>Montoyo</last></author>
+      <author id="andres-montoyo"><first>Andrés</first><last>Montoyo</last></author>
       <author><first>Yoan</first><last>Gutierrez</last></author>
-      <author><first>Rafael</first><last>Muñoz</last></author>
+      <author id="rafael-munoz"><first>Rafael</first><last>Muñoz</last></author>
       <author><first>Armando</first><last>Suarez</last></author>
       <pages>109–116</pages>
       <abstract>Generative language models achieve the state of the art in many tasks within natural language processing (NLP). Although these models correctly capture syntactic information, they fail to interpret knowledge (semantics). Moreover, the lack of interpretability of these models promotes the use of other technologies as a replacement or complement to generative language models. This is the case with research focused on incorporating knowledge by resorting to knowledge bases mainly in the form of graphs. The generation of large knowledge graphs is carried out with unsupervised or semi-supervised techniques, which promotes the validation of this knowledge with the same type of techniques due to the size of the generated databases. In this review, we will explain the different techniques used to test and infer knowledge from graph structures with machine learning algorithms. The motivation of validating and inferring knowledge is to use correct knowledge in subsequent tasks with improved embeddings.</abstract>
@@ -162,7 +162,7 @@
     <paper id="14">
       <title>Impact of Emojis on Automatic Analysis of Individual Emotion Categories</title>
       <author><first>Ratchakrit</first><last>Arreerard</last></author>
-      <author><first>Scott</first><last>Piao</last></author>
+      <author id="scott-s-l-piao"><first>Scott</first><last>Piao</last></author>
       <pages>124–131</pages>
       <abstract>Automatic emotion analysis is a highly challenging task for Natural Language Processing, which has so far mainly relied on textual contents to determine the emotion of text. However, words are not the only media that carry emotional information. In social media, people also use emojis to convey their feelings. Recently, researchers have studied emotional aspects of emojis, and use emoji information to improve the emotion detection and classification, but many issues remain to be addressed. In this study, we examine the impact of emoji embedding on emotion classification and intensity prediction on four individual emotion categories, including anger, fear, joy, and sadness, in order to investigate how emojis affect the automatic analysis of individual emotion categories and intensity. We conducted a comparative study by testing five machine learning models with and without emoji embeddings involved. Our experiment demonstrates that emojis have varying impact on different emotion categories, and there is potential that emojis can be used to enhance emotion information processing.</abstract>
       <url hash="c765fb44">2023.ranlp-1.14</url>
@@ -171,7 +171,7 @@
     <paper id="15">
       <title>Was That a Question? Automatic Classification of Discourse Meaning in <fixed-case>S</fixed-case>panish</title>
       <author><first>Santiago</first><last>Arróniz</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <pages>132–142</pages>
       <abstract>This paper examines the effectiveness of different feature representations of audio data in accurately classifying discourse meaning in Spanish. The task involves determining whether an utterance is a declarative sentence, an interrogative, an imperative, etc. We explore how pitch contour can be represented for a discourse-meaning classification task, employing three different audio features: MFCCs, Mel-scale spectrograms, and chromagrams. We also determine if utilizing means is more effective in representing the speech signal, given the large number of coefficients produced during the feature extraction process. Finally, we evaluate whether these feature representation techniques are sensitive to speaker information. Our results show that a recurrent neural network architecture in conjunction with all three feature sets yields the best results for the task.</abstract>
       <url hash="6c5c3918">2023.ranlp-1.15</url>
@@ -210,7 +210,7 @@
       <title>Using <fixed-case>W</fixed-case>ikidata for Enhancing Compositionality in Pretrained Language Models</title>
       <author><first>Meriem</first><last>Beloucif</last></author>
       <author><first>Mihir</first><last>Bansal</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>170–178</pages>
       <abstract>One of the many advantages of pre-trained language models (PLMs) such as BERT and RoBERTa is their flexibility and contextual nature. These features give PLMs strong capabilities for representing lexical semantics. However, PLMs seem incapable of capturing high-level semantics in terms of compositionally. We show that when augmented with the relevant semantic knowledge, PMLs learn to capture a higher degree of lexical compositionality. We annotate a large dataset from Wikidata highlighting a type of semantic inference that is easy for humans to understand but difficult for PLMs, like the correlation between age and date of birth. We use this resource for finetuning DistilBERT, BERT large and RoBERTa. Our results show that the performance of PLMs against the test data continuously improves when augmented with such a rich resource. Our results are corroborated by a consistent improvement over most GLUE benchmark natural language understanding tasks.</abstract>
       <url hash="9d1989be">2023.ranlp-1.19</url>
@@ -244,7 +244,7 @@
       <author><first>Angana</first><last>Borah</last></author>
       <author><first>Daria</first><last>Pylypenko</last></author>
       <author><first>Cristina</first><last>España-Bonet</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>196–206</pages>
       <abstract>Recent work has shown evidence of “Clever Hans” behavior in high-performance neural translationese classifiers, where BERT-based classifiers capitalize on spurious correlations, in particular topic information, between data and target classification labels, rather than genuine translationese signals. Translationese signals are subtle (especially for professional translation) and compete with many other signals in the data such as genre, style, author, and, in particular, topic. This raises the general question of how much of the performance of a classifier is really due to spurious correlations in the data versus the signals actually targeted for by the classifier, especially for subtle target signals and in challenging (low resource) data settings. We focus on topic-based spurious correlation and approach the question from two directions: (i) where we have no knowledge about spurious topic information and its distribution in the data, (ii) where we have some indication about the nature of spurious topic correlations. For (i) we develop a measure from first principles capturing alignment of unsupervised topics with target classification labels as an indication of spurious topic information in the data. We show that our measure is the same as purity in clustering and propose a “topic floor” (as in a “noise floor”) for classification. For (ii) we investigate masking of known spurious topic carriers in classification. Both (i) and (ii) contribute to quantifying and (ii) to mitigating spurious correlations.</abstract>
       <url hash="ec9bbcb2">2023.ranlp-1.22</url>
@@ -253,7 +253,7 @@
     <paper id="23">
       <title><fixed-case>WIKITIDE</fixed-case>: A <fixed-case>W</fixed-case>ikipedia-Based Timestamped Definition Pairs Dataset</title>
       <author><first>Hsuvas</first><last>Borkakoty</last></author>
-      <author><first>Luis</first><last>Espinosa Anke</last></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa Anke</last></author>
       <pages>207–216</pages>
       <abstract>A fundamental challenge in the current NLP context, dominated by language models, comes from the inflexibility of current architectures to “learn” new information. While model-centric solutions like continual learning or parameter-efficient fine-tuning are available, the question still remains of how to reliably identify changes in language or in the world. In this paper, we propose WikiTiDe, a dataset derived from pairs of timestamped definitions extracted from Wikipedia. We argue that such resources can be helpful for accelerating diachronic NLP, specifically, for training models able to scan knowledge resources for core updates concerning a concept, an event, or a named entity. Our proposed end-to-end method is fully automatic and leverages a bootstrapping algorithm for gradually creating a high-quality dataset. Our results suggest that bootstrapping the seed version of WikiTiDe leads to better-fine-tuned models. We also leverage fine-tuned models in a number of downstream tasks, showing promising results with respect to competitive baselines.</abstract>
       <url hash="36b365f5">2023.ranlp-1.23</url>
@@ -265,7 +265,7 @@
       <author><first>Matheus Camasmie</first><last>Pavan</last></author>
       <author><first>Wesley Ramos</first><last>Santos</last></author>
       <author><first>Samuel Caetano</first><last>Silva</last></author>
-      <author><first>Ivandré</first><last>Paraboni</last></author>
+      <author id="ivandre-paraboni"><first>Ivandré</first><last>Paraboni</last></author>
       <pages>217–223</pages>
       <abstract>Transformer-based language models such as Bidirectional Encoder Representations from Transformers (BERT) are now mainstream in the NLP field, but extensions to languages other than English, to new domains and/or to more specific text genres are still in demand. In this paper we introduced BERTabaporu, a BERT language model that has been pre-trained on Twitter data in the Brazilian Portuguese language. The model is shown to outperform the best-known general-purpose model for this language in three Twitter-related NLP tasks, making a potentially useful resource for Portuguese NLP in general.</abstract>
       <url hash="310894cd">2023.ranlp-1.24</url>
@@ -295,7 +295,7 @@
       <title>Stance Prediction from Multimodal Social Media Data</title>
       <author><first>Lais Carraro Leme</first><last>Cavalheiro</last></author>
       <author><first>Matheus Camasmie</first><last>Pavan</last></author>
-      <author><first>Ivandré</first><last>Paraboni</last></author>
+      <author id="ivandre-paraboni"><first>Ivandré</first><last>Paraboni</last></author>
       <pages>242–248</pages>
       <abstract>Stance prediction - the computational task of inferring attitudes towards a given target topic of interest - relies heavily on text data provided by social media or similar sources, but it may also benefit from non-text information such as demographics (e.g., users’ gender, age, etc.), network structure (e.g., friends, followers, etc.), interactions (e.g., mentions, replies, etc.) and other non-text properties (e.g., time information, etc.). However, so-called hybrid (or in some cases multimodal) approaches to stance prediction have only been developed for a small set of target languages, and often making use of count-based text models (e.g., bag-of-words) and time-honoured classification methods (e.g., support vector machines). As a means to further research in the field, in this work we introduce a number of text- and non-text models for stance prediction in the Portuguese language, which make use of more recent methods based on BERT and an ensemble architecture, and ask whether a BERT stance classifier may be enhanced with different kinds of network-related information.</abstract>
       <url hash="60916def">2023.ranlp-1.27</url>
@@ -350,7 +350,7 @@
     </paper>
     <paper id="33">
       <title>A Computational Analysis of the Voices of Shakespeare’s Characters</title>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <author><first>Ana Sabina</first><last>Uban</last></author>
       <pages>295–300</pages>
       <abstract>In this paper we propose a study of a relatively novel problem in authorship attribution research: that of classifying the stylome of characters in a literary work. We choose as a case study the plays of William Shakespeare, presumably the most renowned and respected dramatist in the history of literature. Previous research in the field of authorship attribution has shown that the writing style of an author can be characterized and distinguished from that of other authors automatically. The question we propose to answer is a related but different one: can the styles of different characters be distinguished? We aim to verify in this way if an author managed to create believable characters with individual styles, and focus on Shakespeare’s iconic characters. We present our experiments using various features and models, including an SVM and a neural network, show that characters in Shakespeare’s plays can be classified with up to 50% accuracy.</abstract>
@@ -384,8 +384,8 @@
       <author><first>Isabel</first><last>Espinosa-Zaragoza</last></author>
       <author><first>José</first><last>Abreu-Salas</last></author>
       <author><first>Elena</first><last>Lloret</last></author>
-      <author><first>Paloma</first><last>Moreda</last></author>
-      <author><first>Manuel</first><last>Palomar</last></author>
+      <author id="paloma-moreda-pozo"><first>Paloma</first><last>Moreda</last></author>
+      <author id="manuel-palomar"><first>Manuel</first><last>Palomar</last></author>
       <pages>321–330</pages>
       <abstract>In the age of knowledge, the democratisation of information facilitated through the Internet may not be as pervasive if written language poses challenges to particular sectors of the population. The objective of this paper is to present an overview of research-based automatic text simplification tools. Consequently, we describe aspects such as the language, language phenomena, language levels simplified, approaches, specific target populations these tools are created for (e.g. individuals with cognitive impairment, attention deficit, elderly people, children, language learners), and accessibility and availability considerations. The review of existing studies covering automatic text simplification tools is undergone by searching two databases: Web of Science and Scopus. The eligibility criteria involve text simplification tools with a scientific background in order to ascertain how they operate. This methodology yielded 27 text simplification tools that are further analysed. Some of the main conclusions reached with this review are the lack of resources accessible to the public, the need for customisation to foster the individual’s independence by allowing the user to select what s/he finds challenging to understand while not limiting the user’s capabilities and the need for more simplification tools in languages other than English, to mention a few.</abstract>
       <url hash="f9c5f5c4">2023.ranlp-1.36</url>
@@ -428,7 +428,7 @@
       <author><first>Nina</first><last>Georgiades</last></author>
       <author><first>Noah</first><last>Sauer</last></author>
       <author><first>Daniel</first><last>Dakota</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <pages>353–363</pages>
       <abstract>We investigate approaches to classifying texts into either conspiracy theory or mainstream using the Language Of Conspiracy (LOCO) corpus. Since conspiracy theories are not monolithic constructs, we need to identify approaches that robustly work in an out-of- domain setting (i.e., across conspiracy topics). We investigate whether optimal in-domain set- tings can be transferred to out-of-domain set- tings, and we investigate different methods for bleaching to steer classifiers away from words typical for an individual conspiracy theory. We find that BART works better than an SVM, that we can successfully classify out-of-domain, but there are no clear trends in how to choose the best source training domains. Addition- ally, bleaching only topic words works better than bleaching all content words or completely delexicalizing texts.</abstract>
       <url hash="2aee0f90">2023.ranlp-1.40</url>
@@ -458,9 +458,9 @@
     <paper id="43">
       <title><fixed-case>T</fixed-case>2<fixed-case>KG</fixed-case>: Transforming Multimodal Document to Knowledge Graph</title>
       <author><first>Santiago</first><last>Galiano</last></author>
-      <author><first>Rafael</first><last>Muñoz</last></author>
-      <author><first>Yoan</first><last>Gutiérrez</last></author>
-      <author><first>Andrés</first><last>Montoyo</last></author>
+      <author id="rafael-munoz"><first>Rafael</first><last>Muñoz</last></author>
+      <author id="yoan-gutierrez"><first>Yoan</first><last>Gutiérrez</last></author>
+      <author id="andres-montoyo"><first>Andrés</first><last>Montoyo</last></author>
       <author><first>Jose Ignacio</first><last>Abreu</last></author>
       <author><first>Luis Alfonso</first><last>Ureña</last></author>
       <pages>385–391</pages>
@@ -503,7 +503,7 @@
       <title>Student’s t-Distribution: On Measuring the Inter-Rater Reliability When the Observations are Scarce</title>
       <author><first>Serge</first><last>Gladkoff</last></author>
       <author><first>Lifeng</first><last>Han</last></author>
-      <author><first>Goran</first><last>Nenadic</last></author>
+      <author id="goran-nenadic"><first>Goran</first><last>Nenadic</last></author>
       <pages>419–428</pages>
       <abstract>In natural language processing (NLP) we always rely on human judgement as the golden quality evaluation method. However, there has been an ongoing debate on how to better evaluate inter-rater reliability (IRR) levels for certain evaluation tasks, such as translation quality evaluation (TQE), especially when the data samples (observations) are very scarce. In this work, we first introduce the study on how to estimate the confidence interval for the measurement value when only one data (evaluation) point is available. Then, this leads to our example with two human-generated observational scores, for which, we introduce “Student’s <i>t</i>-Distribution” method and explain how to use it to measure the IRR score using only these two data points, as well as the confidence intervals (CIs) of the quality evaluation. We give a quantitative analysis of how the evaluation confidence can be greatly improved by introducing more observations, even if only one extra observation. We encourage researchers to report their IRR scores in all possible means, e.g. using Student’s <i>t</i>-Distribution method whenever possible; thus making the NLP evaluation more meaningful, transparent, and trustworthy. This <i>t</i>-Distribution method can be also used outside of NLP fields to measure IRR level for trustworthy evaluation of experimental investigations, whenever the observational data is scarce.</abstract>
       <url hash="1976ffd1">2023.ranlp-1.47</url>
@@ -533,7 +533,7 @@
       <author><first>Najet</first><last>Hadj Mohamed</last></author>
       <author><first>Malak</first><last>Rassem</last></author>
       <author><first>Lifeng</first><last>Han</last></author>
-      <author><first>Goran</first><last>Nenadic</last></author>
+      <author id="goran-nenadic"><first>Goran</first><last>Nenadic</last></author>
       <pages>448–457</pages>
       <abstract>Multiword Expressions (MWEs) have been a bottleneck for Natural Language Understanding (NLU) and Natural Language Generation (NLG) tasks due to their idiomaticity, ambiguity, and non-compositionality. Bilingual parallel corpora introducing MWE annotations are very scarce which set another challenge for current Natural Language Processing (NLP) systems, especially in a multilingual setting. This work presents AlphaMWE-Arabic, an Arabic edition of the AlphaMWE parallel corpus with MWE annotations. We introduce how we created this corpus including machine translation (MT), post-editing, and annotations for both standard and dialectal varieties, i.e. Tunisian and Egyptian Arabic. We analyse the MT errors when they meet MWEs-related content, both quantitatively using the human-in-the-loop metric HOPE and qualitatively. We report the current state-of-the-art MT systems are far from reaching human parity performances. We expect our bilingual English-Arabic corpus will be an asset for multilingual research on MWEs such as translation and localisation, as well as for monolingual settings including the study of Arabic-specific lexicography and phrasal verbs on MWEs. Our corpus and experimental data are available at <url>https://github.com/aaronlifenghan/AlphaMWE</url>.</abstract>
       <url hash="9d291827">2023.ranlp-1.50</url>
@@ -573,7 +573,7 @@
       <title>Enriched Pre-trained Transformers for Joint Slot Filling and Intent Detection</title>
       <author><first>Momchil</first><last>Hardalov</last></author>
       <author><first>Ivan</first><last>Koychev</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>480–493</pages>
       <abstract>Detecting the user’s intent and finding the corresponding slots among the utterance’s words are important tasks in natural language understanding. Their interconnected nature makes their joint modeling a standard part of training such models. Moreover, data scarceness and specialized vocabularies pose additional challenges. Recently, the advances in pre-trained language models, namely contextualized models such as ELMo and BERT have revolutionized the field by tapping the potential of training very large models with just a few steps of fine-tuning on a task-specific dataset. Here, we leverage such models, and we design a novel architecture on top of them. Moreover, we propose an intent pooling attention mechanism, and we reinforce the slot filling task by fusing intent distributions, word features, and token representations. The experimental results on standard datasets show that our model outperforms both the current non-BERT state of the art as well as stronger BERT-based baselines.</abstract>
       <url hash="038219b4">2023.ranlp-1.54</url>
@@ -615,7 +615,7 @@
     <paper id="58">
       <title>Towards a Consensus Taxonomy for Annotating Errors in Automatically Generated Text</title>
       <author><first>Rudali</first><last>Huidrom</last></author>
-      <author><first>Anya</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anya</first><last>Belz</last></author>
       <pages>527–540</pages>
       <abstract>Error analysis aims to provide insights into system errors at different levels of granularity. NLP as a field has a long-standing tradition of analysing and reporting errors which is generally considered good practice. There are existing error taxonomies tailored for different types of NLP task. In this paper, we report our work reviewing existing research on meaning/content error types in generated text, attempt to identify emerging consensus among existing meaning/content error taxonomies, and propose a standardised error taxonomy on this basis. We find that there is virtually complete agreement at the highest taxonomic level where errors of meaning/content divide into (1) Content Omission, (2) Content Addition, and (3) Content Substitution. Consensus in the lower levels is less pronounced, but a compact standardised consensus taxonomy can nevertheless be derived that works across generation tasks and application domains.</abstract>
       <url hash="b909a340">2023.ranlp-1.58</url>
@@ -646,10 +646,10 @@
       <title>Categorising Fine-to-Coarse Grained Misinformation: An Empirical Study of the <fixed-case>COVID</fixed-case>-19 Infodemic</title>
       <author><first>Ye</first><last>Jiang</last></author>
       <author><first>Xingyi</first><last>Song</last></author>
-      <author><first>Carolina</first><last>Scarton</last></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last></author>
       <author><first>Iknoor</first><last>Singh</last></author>
       <author><first>Ahmet</first><last>Aker</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
       <pages>556–567</pages>
       <abstract>The spread of COVID-19 misinformation on social media became a major challenge for citizens, with negative real-life consequences. Prior research focused on detection and/or analysis of COVID-19 misinformation. However, fine-grained classification of misinformation claims has been largely overlooked. The novel contribution of this paper is in introducing a new dataset which makes fine-grained distinctions between statements that assert, comment or question on false COVID-19 claims. This new dataset not only enables social behaviour analysis but also enables us to address both evidence-based and non-evidence-based misinformation classification tasks. Lastly, through leave claim out cross-validation, we demonstrate that classifier performance on unseen COVID-19 misinformation claims is significantly different, as compared to performance on topics present in the training data.</abstract>
       <url hash="0472d5a0">2023.ranlp-1.61</url>
@@ -669,7 +669,7 @@
     <paper id="63">
       <title>Evaluating Data Augmentation for Medication Identification in Clinical Notes</title>
       <author><first>Jordan</first><last>Koontz</last></author>
-      <author><first>Maite</first><last>Oronoz</last></author>
+      <author id="maite-oronoz"><first>Maite</first><last>Oronoz</last></author>
       <author><first>Alicia</first><last>Pérez</last></author>
       <pages>578–585</pages>
       <abstract>We evaluate the effectiveness of using data augmentation to improve the generalizability of a Named Entity Recognition model for the task of medication identification in clinical notes. We compare disparate data augmentation methods, namely mention-replacement and a generative model, for creating synthetic training examples. Through experiments on the n2c2 2022 Track 1 Contextualized Medication Event Extraction data set, we show that data augmentation with supplemental examples created with GPT-3 can boost the performance of a transformer-based model for small training sets.</abstract>
@@ -680,7 +680,7 @@
       <title>Advancing Topical Text Classification: A Novel Distance-Based Method with Contextual Embeddings</title>
       <author><first>Andriy</first><last>Kosar</last></author>
       <author><first>Guy</first><last>De Pauw</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>586–597</pages>
       <abstract>This study introduces a new method for distance-based unsupervised topical text classification using contextual embeddings. The method applies and tailors sentence embeddings for distance-based topical text classification. This is achieved by leveraging the semantic similarity between topic labels and text content, and reinforcing the relationship between them in a shared semantic space. The proposed method outperforms a wide range of existing sentence embeddings on average by 35%. Presenting an alternative to the commonly used transformer-based zero-shot general-purpose classifiers for multiclass text classification, the method demonstrates significant advantages in terms of computational efficiency and flexibility, while maintaining comparable or improved classification results.</abstract>
       <url hash="b731f642">2023.ranlp-1.64</url>
@@ -720,7 +720,7 @@
     <paper id="68">
       <title>Noisy Self-Training with Data Augmentations for Offensive and Hate Speech Detection Tasks</title>
       <author><first>João</first><last>Leite</last></author>
-      <author><first>Carolina</first><last>Scarton</last></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last></author>
       <author><first>Diego</first><last>Silva</last></author>
       <pages>631–640</pages>
       <abstract>Online social media is rife with offensive and hateful comments, prompting the need for their automatic detection given the sheer amount of posts created every second. Creating high-quality human-labelled datasets for this task is difficult and costly, especially because non-offensive posts are significantly more frequent than offensive ones. However, unlabelled data is abundant, easier, and cheaper to obtain. In this scenario, self-training methods, using weakly-labelled examples to increase the amount of training data, can be employed. Recent “noisy” self-training approaches incorporate data augmentation techniques to ensure prediction consistency and increase robustness against noisy data and adversarial attacks. In this paper, we experiment with default and noisy self-training using three different textual data augmentation techniques across five different pre-trained BERT architectures varying in size. We evaluate our experiments on two offensive/hate-speech datasets and demonstrate that (i) self-training consistently improves performance regardless of model size, resulting in up to +1.5% F1-macro on both datasets, and (ii) noisy self-training with textual data augmentations, despite being successfully applied in similar settings, decreases performance on offensive and hate-speech domains when compared to the default method, even with state-of-the-art augmentations such as backtranslation.</abstract>
@@ -737,10 +737,10 @@
     </paper>
     <paper id="70">
       <title>Classifying <fixed-case>COVID</fixed-case>-19 Vaccine Narratives</title>
-      <author id="yue-li"><first>Yue</first><last>Li</last></author>
-      <author><first>Carolina</first><last>Scarton</last></author>
+      <author><first>Yue</first><last>Li</last></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last></author>
       <author><first>Xingyi</first><last>Song</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
       <pages>648–657</pages>
       <abstract>Vaccine hesitancy is widespread, despite the government’s information campaigns and the efforts of the World Health Organisation (WHO). Categorising the topics within vaccine-related narratives is crucial to understand the concerns expressed in discussions and identify the specific issues that contribute to vaccine hesitancy. This paper addresses the need for monitoring and analysing vaccine narratives online by introducing a novel vaccine narrative classification task, which categorises COVID-19 vaccine claims into one of seven categories. Following a data augmentation approach, we first construct a novel dataset for this new classification task, focusing on the minority classes. We also make use of fact-checker annotated data. The paper also presents a neural vaccine narrative classifier that achieves an accuracy of 84% under cross-validation. The classifier is publicly available for researchers and journalists.</abstract>
       <url hash="0b019be5">2023.ranlp-1.70</url>
@@ -777,7 +777,7 @@
     <paper id="73">
       <title>Data Augmentation for Fake Reviews Detection</title>
       <author><first>Ming</first><last>Liu</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>673–680</pages>
       <abstract>In this research, we studied the relationship between data augmentation and model accuracy for the task of fake review detection. We used data generation methods to augment two different fake review datasets and compared the performance of models trained with the original data and with the augmented data. Our results show that the accuracy of our fake review detection model can be improved by 0.31 percentage points on DeRev Test and by 7.65 percentage points on Amazon Test by using the augmented datasets.</abstract>
       <url hash="5ae02e66">2023.ranlp-1.73</url>
@@ -788,7 +788,7 @@
       <author><first>Congda</first><last>Ma</last></author>
       <author><first>Kotaro</first><last>Funakoshi</last></author>
       <author><first>Kiyoaki</first><last>Shirai</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>681–690</pages>
       <abstract>The emergence of pre-trained language models has taken story generation, which is the task of automatically generating a comprehensible story from limited information, to a new stage. Although generated stories from the language models are fluent and grammatically correct, the lack of coherence affects their quality. We propose a knowledge-based multi-stage model that incorporates the schema, a kind of structured knowledge, to guide coherent story generation. Our framework includes a schema acquisition module, a plot generation module, and a surface realization module. In the schema acquisition module, high-relevant structured knowledge pieces are selected as a schema. In the plot generation module, a coherent plot plan is navigated by the schema. In the surface realization module, conditioned by the generated plot, a story is generated. Evaluations show that our methods can generate more comprehensible stories than strong baselines, especially with higher global coherence and less repetition.</abstract>
       <url hash="0302b155">2023.ranlp-1.74</url>
@@ -817,7 +817,7 @@
     <paper id="77">
       <title>Transformer-Based Language Models for <fixed-case>B</fixed-case>ulgarian</title>
       <author><first>Iva</first><last>Marinova</last></author>
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <author><first>Petya</first><last>Osenova</last></author>
       <pages>712–720</pages>
       <abstract>This paper presents an approach for training lightweight and robust language models for Bulgarian that mitigate gender, political, racial, and other biases in the data. Our method involves scraping content from major Bulgarian online media providers using a specialized procedure for source filtering, topic selection, and lexicon-based removal of inappropriate language during the pre-training phase. We continuously improve the models by incorporating new data from various domains, including social media, books, scientific literature, and linguistically modified corpora. Our motivation is to provide a solution that is sufficient for all natural language processing tasks in Bulgarian, and to address the lack of existing procedures for guaranteeing the robustness of such models.</abstract>
@@ -828,7 +828,7 @@
       <title>Multi-task Ensemble Learning for Fake Reviews Detection and Helpfulness Prediction: A Novel Approach</title>
       <author><first>Alimuddin</first><last>Melleng</last></author>
       <author><first>Anna</first><last>Jurek-Loughrey</last></author>
-      <author><first>Deepak</first><last>P</last></author>
+      <author id="deepak-p"><first>Deepak</first><last>P</last></author>
       <pages>721–729</pages>
       <abstract>Research on fake reviews detection and review helpfulness prediction is prevalent, yet most studies tend to focus solely on either fake reviews detection or review helpfulness prediction, considering them separate research tasks. In contrast to this prevailing pattern, we address both challenges concurrently by employing a multi-task learning approach. We posit that undertaking these tasks simultaneously can enhance the performance of each task through shared information among features. We utilize pre-trained RoBERTa embeddings with a document-level data representation. This is coupled with an array of deep learning and neural network models, including Bi-LSTM, LSTM, GRU, and CNN. Additionally, we em- ploy ensemble learning techniques to integrate these models, with the objective of enhancing overall prediction accuracy and mitigating the risk of overfitting. The findings of this study offer valuable insights to the fields of natural language processing and machine learning and present a novel perspective on leveraging multi-task learning for the twin challenges of fake reviews detection and review helpfulness prediction</abstract>
       <url hash="3189bb5f">2023.ranlp-1.78</url>
@@ -838,7 +838,7 @@
       <title>Data Fusion for Better Fake Reviews Detection</title>
       <author><first>Alimuddin</first><last>Melleng</last></author>
       <author><first>Anna</first><last>Jurek-Loughrey</last></author>
-      <author><first>Deepak</first><last>P</last></author>
+      <author id="deepak-p"><first>Deepak</first><last>P</last></author>
       <pages>730–738</pages>
       <abstract>Online reviews have become critical in informing purchasing decisions, making the detection of fake reviews a crucial challenge to tackle. Many different Machine Learning based solutions have been proposed, using various data representations such as n-grams or document embeddings. In this paper, we first explore the effectiveness of different data representations, including emotion, document embedding, n-grams, and noun phrases in embedding for mat, for fake reviews detection. We evaluate these representations with various state-of-the-art deep learning models, such as BILSTM, LSTM, GRU, CNN, and MLP. Following this, we propose to incorporate different data repre- sentations and classification models using early and late data fusion techniques in order to im- prove the prediction performance. The experiments are conducted on four datasets: Hotel, Restaurant, Amazon, and Yelp. The results demonstrate that combination of different data representations significantly outperform any of the single data representations</abstract>
       <url hash="5198f5fe">2023.ranlp-1.79</url>
@@ -846,7 +846,7 @@
     </paper>
     <paper id="80">
       <title>Dimensions of Quality: Contrasting Stylistic vs. Semantic Features for Modelling Literary Quality in 9,000 Novels</title>
-      <author><first>Pascale</first><last>Moreira</last></author>
+      <author id="pascale-feldkamp"><first>Pascale</first><last>Moreira</last></author>
       <author><first>Yuri</first><last>Bizzoni</last></author>
       <pages>739–747</pages>
       <abstract>In computational literary studies, the challenging task of predicting quality or reader-appreciation of narrative texts is confounded by volatile definitions of quality and the vast feature space that may be considered in modeling. In this paper, we explore two different types of feature sets: stylistic features on one hand, and semantic features on the other. We conduct experiments on a corpus of 9,089 English language literary novels published in the 19th and 20th century, using GoodReads’ ratings as a proxy for reader-appreciation. Examining the potential of both approaches, we find that some types of books are more predictable in one model than in the other, which may indicate that texts have different prominent characteristics (stylistic complexity, a certain narrative progression at the sentiment-level).</abstract>
@@ -890,9 +890,9 @@
       <author><first>Sneha</first><last>Rautmare</last></author>
       <author><first>Meegan</first><last>Gower</last></author>
       <author><first>Nishtha</first><last>Jain</last></author>
-      <author><first>Maja</first><last>Popović</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
       <author><first>Patricia</first><last>Buffini</last></author>
-      <author><first>John</first><last>Kelleher</last></author>
+      <author id="john-kelleher"><first>John</first><last>Kelleher</last></author>
       <pages>777–784</pages>
       <abstract>Identification of mentions of medical concepts in social media text can provide useful information for caseload prediction of diseases like Covid-19 and Measles. We propose a simple model for the automatic identification of the medical concept mentions in the social media text. We validate the effectiveness of the proposed model on Twitter, Reddit, and News/Media datasets.</abstract>
       <url hash="afbc9dcf">2023.ranlp-1.84</url>
@@ -912,7 +912,7 @@
       <title>Human Value Detection from Bilingual Sensory Product Reviews</title>
       <author><first>Boyu</first><last>Niu</last></author>
       <author><first>Céline</first><last>Manetta</last></author>
-      <author><first>Frédérique</first><last>Segond</last></author>
+      <author id="frederique-segond"><first>Frédérique</first><last>Segond</last></author>
       <pages>792–802</pages>
       <abstract>We applied text classification methods on a corpus of product reviews we created with the help of a questionnaire. We found that for certain values, “traditional” deep neural networks like CNN can give promising results compared to the baseline. We propose some ideas to improve the results in the future. The bilingual corpus we created which contains more than 16 000 consumer reviews associated to the human value profile of the authors can be used for different marketing purposes.</abstract>
       <url hash="9f9f60f8">2023.ranlp-1.86</url>
@@ -932,7 +932,7 @@
     <paper id="88">
       <title>A Research-Based Guide for the Creation and Deployment of a Low-Resource Machine Translation System</title>
       <author><first>John E.</first><last>Ortega</last></author>
-      <author><first>Kenneth</first><last>Church</last></author>
+      <author id="kenneth-church"><first>Kenneth</first><last>Church</last></author>
       <pages>813–823</pages>
       <abstract>The machine translation (MT) field seems to focus heavily on English and other high-resource languages. Though, low-resource MT (LRMT) is receiving more attention than in the past. Successful LRMT systems (LRMTS) should make a compelling business case in terms of demand, cost and quality in order to be viable for end users. When used by communities where low-resource languages are spoken, LRMT quality should not only be determined by the use of traditional metrics like BLEU, but it should also take into account other factors in order to be inclusive and not risk overall rejection by the community. MT systems based on neural methods tend to perform better with high volumes of training data, but they may be unrealistic and even harmful for LRMT. It is obvious that for research purposes, the development and creation of LRMTS is necessary. However, in this article, we argue that two main workarounds could be considered by companies that are considering deployment of LRMTS in the wild: human-in-the-loop and sub-domains.</abstract>
       <url hash="e42e9b6e">2023.ranlp-1.88</url>
@@ -979,7 +979,7 @@
     <paper id="93">
       <title>Multilingual Continual Learning Approaches for Text Classification</title>
       <author><first>Karan</first><last>Praharaj</last></author>
-      <author><first>Irina</first><last>Matveeva</last></author>
+      <author id="irina-matveeva"><first>Irina</first><last>Matveeva</last></author>
       <pages>864–870</pages>
       <abstract>Multilingual continual learning is important for models that are designed to be deployed over long periods of time and are required to be updated when new data becomes available. Such models are continually applied to new unseen data that can be in any of the supported languages. One challenge in this scenario is to ensure consistent performance of the model throughout the deployment lifecycle, beginning from the moment of first deployment. We empirically assess the strengths and shortcomings of some continual learning methods in a multilingual setting across two tasks.</abstract>
       <url hash="16226663">2023.ranlp-1.93</url>
@@ -1018,7 +1018,7 @@
     <paper id="97">
       <title>hu<fixed-case>PWKP</fixed-case>: A <fixed-case>H</fixed-case>ungarian Text Simplification Corpus</title>
       <author><first>Noémi</first><last>Prótár</last></author>
-      <author><first>Dávid Márk</first><last>Nemeskey</last></author>
+      <author id="david-mark-nemeskey"><first>Dávid Márk</first><last>Nemeskey</last></author>
       <pages>898–907</pages>
       <abstract>In this article we introduce huPWKP, the first parallel corpus consisting of Hungarian standard language-simplified sentence pairs. As Hungarian is a quite low-resource language in regards to text simplification, we opted for translating an already existing corpus, PWKP (Zhu et al., 2010), on which we performed some cleaning in order to improve its quality. We evaluated the corpus both with the help of human evaluators and by training a seq2seq model on both the Hungarian corpus and the original (cleaned) English corpus. The Hungarian model performed slightly worse in terms of automatic metrics; however, the English model attains a SARI score close to the state of the art on the official PWKP set. According to the human evaluation, the corpus performs at around 3 on a scale ranging from 1 to 5 in terms of information retention and increase in simplification and around 3.7 in terms of grammaticality.</abstract>
       <url hash="ba23e7b9">2023.ranlp-1.97</url>
@@ -1040,7 +1040,7 @@
       <author><first>Bharathi Raja</first><last>Chakravarthi</last></author>
       <author><first>Prasanna Kumar</first><last>Kumaresan</last></author>
       <author><first>Rahul</first><last>Ponnusamy</last></author>
-      <author><first>John P.</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></author>
       <author><first>Michaela</first><last>Comerford</last></author>
       <author><first>Jay</first><last>Megaro</last></author>
       <author><first>Deniz</first><last>Keles</last></author>
@@ -1063,7 +1063,7 @@
       <title>Modeling Easiness for Training Transformers with Curriculum Learning</title>
       <author><first>Leonardo</first><last>Ranaldi</last></author>
       <author><first>Giulia</first><last>Pucci</last></author>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last></author>
       <pages>937–948</pages>
       <abstract>Directly learning from complex examples is generally problematic for humans and machines. Indeed, a better strategy is exposing learners to examples in a reasonable, pedagogically-motivated order. Curriculum Learning (CL) has been proposed to import this strategy when training machine learning models. In this paper, building on Curriculum Learning, we propose a novel, linguistically motivated measure to determine example complexity for organizing examples during learning. Our complexity measure - LRC- is based on length, rarity, and comprehensibility. Our resulting learning model is CL-LRC, that is, CL with LRC. Experiments on downstream tasks show that CL-LRC outperforms existing CL and non-CL methods for training BERT and RoBERTa from scratch. Furthermore, we analyzed different measures, including perplexity, loss, and learning curve of different models pre-trained from scratch, showing that CL-LRC performs better than the state-of-the-art.</abstract>
       <url hash="4db9d34d">2023.ranlp-1.101</url>
@@ -1078,7 +1078,7 @@
       <author><first>Dario</first><last>Onorati</last></author>
       <author><first>Michele</first><last>Mastromattei</last></author>
       <author><first>Francesca</first><last>Fallucchi</last></author>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last></author>
       <pages>949–960</pages>
       <abstract>Pre-trained Transformers are challenging human performances in many Natural Language Processing tasks. The massive datasets used for pre-training seem to be the key to their success on existing tasks. In this paper, we explore how a range of pre-trained natural language understanding models performs on definitely unseen sentences provided by classification tasks over a DarkNet corpus. Surprisingly, results show that syntactic and lexical neural networks perform on par with pre-trained Transformers even after fine-tuning. Only after what we call extreme domain adaptation, that is, retraining with the masked language model task on all the novel corpus, pre-trained Transformers reach their standard high results. This suggests that huge pre-training corpora may give Transformers unexpected help since they are exposed to many of the possible sentences.</abstract>
       <url hash="ddbd036a">2023.ranlp-1.102</url>
@@ -1088,7 +1088,7 @@
       <title><fixed-case>P</fixed-case>re<fixed-case>C</fixed-case>og: Exploring the Relation between Memorization and Performance in Pre-trained Language Models</title>
       <author><first>Leonardo</first><last>Ranaldi</last></author>
       <author><first>Elena Sofia</first><last>Ruzzetti</last></author>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last></author>
       <pages>961–967</pages>
       <abstract>Large Language Models (LLMs) are impressive machines with the ability to memorize, possibly generalized learning examples. We present here a small, focused contribution to the analysis of the interplay between memorization and performance of BERT in downstream tasks. We propose PreCog, a measure for evaluating memorization from pre-training, and we analyze its correlation with the BERT’s performance. Our experiments show that highly memorized examples are better classified, suggesting memorization is an essential key to success for BERT.</abstract>
       <url hash="7ee1b9f6">2023.ranlp-1.103</url>
@@ -1111,7 +1111,7 @@
       <author><first>Anupam</first><last>Jamatia</last></author>
       <author><first>Dwijen</first><last>Rudrapal</last></author>
       <author><first>Kunal</first><last>Chakma</last></author>
-      <author><first>Björn</first><last>Gambäck</last></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gambäck</last></author>
       <pages>979–987</pages>
       <abstract>The paper introduces a cross-lingual speaker identification system for Indian languages, utilising a Long Short-Term Memory dense neural network (LSTM-DNN). The system was trained on audio recordings in English and evaluated on data from Hindi, Kannada, Malayalam, Tamil, and Telugu, with a view to how factors such as phonetic similarity and native accent affect performance. The model was fed with MFCC (mel-frequency cepstral coefficient) features extracted from the audio file. For comparison, the corresponding mel-spectrogram images were also used as input to a ResNet-50 model, while the raw audio was used to train a Siamese network. The LSTM-DNN model outperformed the other two models as well as two more traditional baseline speaker identification models, showing that deep learning models are superior to probabilistic models for capturing low-level speech features and learning speaker characteristics.</abstract>
       <url hash="42c89e0f">2023.ranlp-1.105</url>
@@ -1119,8 +1119,8 @@
     </paper>
     <paper id="106">
       <title>‘<fixed-case>C</fixed-case>hem<fixed-case>X</fixed-case>tract’ A System for Extraction of Chemical Events from Patent Documents</title>
-      <author><first>Pattabhi</first><last>RK Rao</last></author>
-      <author><first>Sobha</first><last>Lalitha Devi</last></author>
+      <author id="pattabhi-rk-rao"><first>Pattabhi</first><last>RK Rao</last></author>
+      <author id="sobha-lalitha-devi"><first>Sobha</first><last>Lalitha Devi</last></author>
       <pages>988–995</pages>
       <abstract>ChemXtraxt main goal is to extract the chemical events from patent documents. Event extraction requires that we first identify the names of chemical compounds involved in the events. Thus, in this work two extractions are done and they are (a) names of chemical compounds and (b) event that identify the specific involvement of the chemical compounds in a chemical reaction. Extraction of essential elements of a chemical reaction, generally known as Named Entity Recognition (NER), extracts the compounds, condition and yields, their specific role in reaction and assigns a label according to the role it plays within a chemical reaction. Whereas event extraction identifies the chemical event relations between the chemical compounds identified. Here in this work we have used Neural Conditional Random Fields (NCRF), which combines the power of artificial neural network (ANN) and CRFs. Different levels of features that include linguistic, orthographical and lexical clues are used. The results obtained are encouraging.</abstract>
       <url hash="950e4bd2">2023.ranlp-1.106</url>
@@ -1138,7 +1138,7 @@
       <title>Event Annotation and Detection in <fixed-case>K</fixed-case>annada-<fixed-case>E</fixed-case>nglish Code-Mixed Social Media Data</title>
       <author><first>Sumukh</first><last>S</last></author>
       <author><first>Abhinav</first><last>Appidi</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>1007–1014</pages>
       <abstract>Code-mixing (CM) is a frequently observed phenomenon on social media platforms in multilingual societies such as India. While the increase in code-mixed content on these platforms provides good amount of data for studying various aspects of code-mixing, the lack of automated text analysis tools makes such studies difficult. To overcome the same, tools such as language identifiers, Parts-of-Speech (POS) taggers and Named Entity Recognition (NER) for analysing code-mixed data have been developed. One such important tool is Event Detection, an important information retrieval task which can be used to identify critical facts occurring in the vast streams of unstructured text data available. While event detection from text is a hard problem on its own, social media data adds to it with its informal nature, and code-mixed (Kannada-English) data further complicates the problem due to its word-level mixing, lack of structure and incomplete information. In this work, we have tried to address this problem. We have proposed guidelines for the annotation of events in Kannada-English CM data and provided some baselines for the same with careful feature selection.</abstract>
       <url hash="2726512b">2023.ranlp-1.108</url>
@@ -1256,7 +1256,7 @@
     </paper>
     <paper id="120">
       <title><fixed-case>L</fixed-case>e<fixed-case>SS</fixed-case>: A Computationally-Light Lexical Simplifier for <fixed-case>S</fixed-case>panish</title>
-      <author><first>Sanja</first><last>Stajner</last></author>
+      <author id="sanja-stajner"><first>Sanja</first><last>Stajner</last></author>
       <author><first>Daniel</first><last>Ibanez</last></author>
       <author><first>Horacio</first><last>Saggion</last></author>
       <pages>1132–1142</pages>
@@ -1266,8 +1266,8 @@
     </paper>
     <paper id="121">
       <title><fixed-case>H</fixed-case>indi to <fixed-case>D</fixed-case>ravidian Language Neural Machine Translation Systems</title>
-      <author><first>Vijay</first><last>Sundar Ram</last></author>
-      <author><first>Sobha</first><last>Lalitha Devi</last></author>
+      <author id="vijay-sundar-ram"><first>Vijay</first><last>Sundar Ram</last></author>
+      <author id="sobha-lalitha-devi"><first>Sobha</first><last>Lalitha Devi</last></author>
       <pages>1143–1150</pages>
       <abstract>Neural machine translation (NMT) has achieved state-of-art performance in high-resource language pairs, but the performance of NMT drops in low-resource conditions. Morphologically rich languages are yet another challenge in NMT. The common strategy to handle this issue is to apply sub-word segmentation. In this work, we compare the morphologically inspired segmentation methods against the Byte Pair Encoding (BPE) in processing the input for building NMT systems for Hindi to Malayalam and Hindi to Tamil, where Hindi is an Indo-Aryan language and Malayalam and Tamil are south Dravidian languages. These two languages are low resource, morphologically rich and agglutinative. Malayalam is more agglutinative than Tamil. We show that for both the language pairs, the morphological segmentation algorithm out-performs BPE. We also present an elaborate analysis on translation outputs from both the NMT systems.</abstract>
       <url hash="3b945dde">2023.ranlp-1.121</url>
@@ -1300,7 +1300,7 @@
       <title>Auto-Encoding Questions with Retrieval Augmented Decoding for Unsupervised Passage Retrieval and Zero-Shot Question Generation</title>
       <author><first>Stalin</first><last>Varanasi</last></author>
       <author><first>Muhammad Umer Tariq</first><last>Butt</last></author>
-      <author><first>Guenter</first><last>Neumann</last></author>
+      <author id="gunter-neumann"><first>Guenter</first><last>Neumann</last></author>
       <pages>1171–1179</pages>
       <abstract>Dense passage retrieval models have become state-of-the-art for information retrieval on many Open-domain Question Answering (ODQA) datasets. However, most of these models rely on supervision obtained from the ODQA datasets, which hinders their performance in a low-resource setting. Recently, retrieval-augmented language models have been proposed to improve both zero-shot and supervised information retrieval. However, these models have pre-training tasks that are agnostic to the target task of passage retrieval. In this work, we propose Retrieval Augmented Auto-encoding of Questions for zero-shot dense information retrieval. Unlike other pre-training methods, our pre-training method is built for target information retrieval, thereby making the pre-training more efficient. Our method consists of a dense IR model for encoding questions and retrieving documents during training and a conditional language model that maximizes the question’s likelihood by marginalizing over retrieved documents. As a by-product, we can use this conditional language model for zero-shot question generation from documents. We show that the IR model obtained through our method improves the current state-of-the-art of zero-shot dense information retrieval, and we improve the results even further by training on a synthetic corpus created by zero-shot question generation.</abstract>
       <url hash="bad5954d">2023.ranlp-1.124</url>
@@ -1322,7 +1322,7 @@
       <title>Socially Responsible Hate Speech Detection: Can Classifiers Reflect Social Stereotypes?</title>
       <author><first>Francielle</first><last>Vargas</last></author>
       <author><first>Isabelle</first><last>Carvalho</last></author>
-      <author><first>Ali</first><last>Hürriyetoğlu</last></author>
+      <author id="ali-hurriyetoglu"><first>Ali</first><last>Hürriyetoğlu</last></author>
       <author><first>Thiago</first><last>Pardo</last></author>
       <author><first>Fabrício</first><last>Benevenuto</last></author>
       <pages>1187–1196</pages>
@@ -1344,7 +1344,7 @@
     <paper id="128">
       <title>Classification of <fixed-case>US</fixed-case> <fixed-case>S</fixed-case>upreme <fixed-case>C</fixed-case>ourt Cases Using <fixed-case>BERT</fixed-case>-Based Techniques</title>
       <author><first>Shubham</first><last>Vatsal</last></author>
-      <author><first>Adam</first><last>Meyers</last></author>
+      <author id="adam-meyers"><first>Adam</first><last>Meyers</last></author>
       <author><first>John E.</first><last>Ortega</last></author>
       <pages>1207–1215</pages>
       <abstract>Models based on bidirectional encoder representations from transformers (BERT) produce state of the art (SOTA) results on many natural language processing (NLP) tasks such as named entity recognition (NER), part-of-speech (POS) tagging etc. An interesting phenomenon occurs when classifying long documents such as those from the US supreme court where BERT-based models can be considered difficult to use on a first-pass or out-of-the-box basis. In this paper, we experiment with several BERT-based classification techniques for US supreme court decisions or supreme court database (SCDB) and compare them with the previous SOTA results. We then compare our results specifically with SOTA models for long documents. We compare our results for two classification tasks: (1) a broad classification task with 15 categories and (2) a fine-grained classification task with 279 categories. Our best result produces an accuracy of 80% on the 15 broad categories and 60% on the fine-grained 279 categories which marks an improvement of 8% and 28% respectively from previously reported SOTA results.</abstract>
@@ -1376,7 +1376,7 @@
       <author><first>Yizhou</first><last>Xu</last></author>
       <author><first>Kata</first><last>Gábor</last></author>
       <author><first>Jérôme</first><last>Milleret</last></author>
-      <author><first>Frédérique</first><last>Segond</last></author>
+      <author id="frederique-segond"><first>Frédérique</first><last>Segond</last></author>
       <pages>1234–1245</pages>
       <abstract>Text anomaly detection (TAD) is a crucial task that aims to identify texts that deviate significantly from the norm within a corpus. Despite its importance in various domains, TAD remains relatively underexplored in natural language processing. This article presents a systematic evaluation of 22 TAD algorithms on 17 corpora using multiple text representations, including monolingual and multilingual SBERT. The performance of the algorithms is compared based on three criteria: degree of supervision, theoretical basis, and architecture used. The results demonstrate that semi-supervised methods utilizing weak labels outperform both unsupervised methods and semi-supervised methods using only negative samples for training. Additionally, we explore the application of TAD techniques in hate speech detection. The results provide valuable insights for future TAD research and guide the selection of suitable algorithms for detecting text anomalies in different contexts.</abstract>
       <url hash="9b4cf9cb">2023.ranlp-1.131</url>
diff --git a/data/xml/2023.repl4nlp.xml b/data/xml/2023.repl4nlp.xml
index c96604ac2e..96034b3958 100644
--- a/data/xml/2023.repl4nlp.xml
+++ b/data/xml/2023.repl4nlp.xml
@@ -14,7 +14,7 @@
       <editor><first>Isabelle</first><last>Augenstein</last><affiliation>University of Copenhagen</affiliation></editor>
       <editor><first>Anna</first><last>Rogers</last><affiliation>University of Copenhagen</affiliation></editor>
       <editor><first>Kyunghyun</first><last>Cho</last><affiliation>New York University</affiliation></editor>
-      <editor><first>Edward</first><last>Grefenstette</last><affiliation>DeepMind</affiliation></editor>
+      <editor id="edward-grefenstette"><first>Edward</first><last>Grefenstette</last><affiliation>DeepMind</affiliation></editor>
       <editor><first>Lena</first><last>Voita</last><affiliation>Meta AI</affiliation></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Toronto, Canada</address>
@@ -50,7 +50,7 @@
     </paper>
     <paper id="3">
       <title>Grammatical information in <fixed-case>BERT</fixed-case> sentence embeddings as two-dimensional arrays</title>
-      <author><first>Vivi</first><last>Nastase</last><affiliation>University of Geneva</affiliation></author>
+      <author id="vivi-nastase"><first>Vivi</first><last>Nastase</last><affiliation>University of Geneva</affiliation></author>
       <author><first>Paola</first><last>Merlo</last><affiliation>Uppsala University and University of Geneva, Switzerland</affiliation></author>
       <pages>22-39</pages>
       <abstract>Sentence embeddings induced with various transformer architectures encode much semantic and syntactic information in a distributed manner in a one-dimensional array. We investigate whether specific grammatical information can be accessed in these distributed representations. Using data from a task developed to test rule-like generalizations, our experiments on detecting subject-verb agreement yield several promising results. First, we show that while the usual sentence representations encoded as one-dimensional arrays do not easily support extraction of rule-like regularities, a two-dimensional reshaping of these vectors allows various learning architectures to access such information. Next, we show that various architectures can detect patterns in these two-dimensional reshaped sentence embeddings and successfully learn a model based on smaller amounts of simpler training data, which performs well on more complex test data. This indicates that current sentence embeddings contain information that is regularly distributed, and which can be captured when the embeddings are reshaped into higher dimensional arrays. Our results cast light on representations produced by language models and help move towards developing few-shot learning approaches.</abstract>
@@ -139,7 +139,7 @@
       <author><first>Wenbo</first><last>Zhao</last><affiliation>Amazon</affiliation></author>
       <author><first>Arpit</first><last>Gupta</last><affiliation>Amazon</affiliation></author>
       <author><first>Tagyoung</first><last>Chung</last><affiliation>Amazon</affiliation></author>
-      <author id="jing-huang"><first>Jing</first><last>Huang</last><affiliation>Amazon Alexa AI</affiliation></author>
+      <author><first>Jing</first><last>Huang</last><affiliation>Amazon Alexa AI</affiliation></author>
       <pages>118-130</pages>
       <abstract>Recent advances in prompt tuning have proven effective as a new language modeling paradigm for various natural language understanding tasks. However, it is challenging to adapt the soft prompt embeddings to different domains or generalize to low-data settings when learning soft prompts itself is unstable, task-specific, and bias-prone. This paper proposes a principled learning framework—soft prompt construction (SPC)—to facilitate learning domain-adaptable soft prompts. Derived from the SPC framework is a simple loss that can plug into various models and tuning approaches to improve their cross-domain performance. We show SPC can improve upon SOTA for contextual query rewriting, summarization, and paraphrase detection by up to 5%, 19%, and 16%, respectively.</abstract>
       <url hash="80cff754">2023.repl4nlp-1.10</url>
@@ -193,7 +193,7 @@
     <paper id="15">
       <title>Syntax-Aware Graph-to-Graph Transformer for Semantic Role Labelling</title>
       <author><first>Alireza</first><last>Mohammadshahi</last></author>
-      <author><first>James</first><last>Henderson</last><affiliation>Idiap Research Institute</affiliation></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last><affiliation>Idiap Research Institute</affiliation></author>
       <pages>174-186</pages>
       <abstract>Recent models have shown that incorporating syntactic knowledge into the semantic role labelling (SRL) task leads to a significant improvement. In this paper, we propose Syntax-aware Graph-to-Graph Transformer (SynG2G-Tr) model, which encodes the syntactic structure using a novel way to input graph relations as embeddings, directly into the self-attention mechanism of Transformer. This approach adds a soft bias towards attention patterns that follow the syntactic structure but also allows the model to use this information to learn alternative patterns. We evaluate our model on both span-based and dependency-based SRL datasets, and outperform previous alternative methods in both in-domain and out-of-domain settings, on CoNLL 2005 and CoNLL 2009 datasets.</abstract>
       <url hash="804340bd">2023.repl4nlp-1.15</url>
@@ -296,7 +296,7 @@
       <author><first>Cheng-Han</first><last>Chiang</last><affiliation>National Taiwan University†</affiliation></author>
       <author><first>Hung-yi</first><last>Lee</last><affiliation>National Taiwan University†</affiliation></author>
       <author><first>Yung-Sung</first><last>Chuang</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
-      <author><first>James</first><last>Glass</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
+      <author id="james-glass"><first>James</first><last>Glass</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <pages>289-302</pages>
       <url hash="c653b5de">2023.repl4nlp-1.24</url>
       <bibkey>chiang-etal-2023-revealing</bibkey>
@@ -329,7 +329,7 @@
       <title>Visual Coherence Loss for Coherent and Visually Grounded Story Generation</title>
       <author><first>Xudong</first><last>Hong</last><affiliation>MPI Informatics and Saarland University and Saarland Informatics Campus</affiliation></author>
       <author><first>Vera</first><last>Demberg</last><affiliation>Saarland University and Saarland Informatics Campus</affiliation></author>
-      <author><first>Asad</first><last>Sayeed</last><affiliation>University of Gothenburg</affiliation></author>
+      <author id="asad-sayeed"><first>Asad</first><last>Sayeed</last><affiliation>University of Gothenburg</affiliation></author>
       <author><first>Qiankun</first><last>Zheng</last><affiliation>Saarland University and Saarland Informatics Campus</affiliation></author>
       <author><first>Bernt</first><last>Schiele</last><affiliation>MPI Informatics and Saarland Informatics Campus</affiliation></author>
       <pages>334-346</pages>
diff --git a/data/xml/2023.resourceful.xml b/data/xml/2023.resourceful.xml
index 31aa045277..d0a388d6b5 100644
--- a/data/xml/2023.resourceful.xml
+++ b/data/xml/2023.resourceful.xml
@@ -7,7 +7,7 @@
       <editor><first>Felix</first><last>Morger</last></editor>
       <editor><first>Dana</first><last>Dannélls</last></editor>
       <editor><first>Simon</first><last>Dobnik</last></editor>
-      <editor><first>Beáta</first><last>Megyesi</last></editor>
+      <editor id="beata-megyesi"><first>Beáta</first><last>Megyesi</last></editor>
       <editor><first>Joakim</first><last>Nivre</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Tórshavn, the Faroe Islands</address>
@@ -30,7 +30,7 @@
     </paper>
     <paper id="2">
       <title>The <fixed-case>DA</fixed-case>-<fixed-case>ELEXIS</fixed-case> Corpus - a Sense-Annotated Corpus for <fixed-case>D</fixed-case>anish with Parallel Annotations for Nine <fixed-case>E</fixed-case>uropean Languages</title>
-      <author><first>Bolette</first><last>Pedersen</last><affiliation>University of Copenhagen</affiliation></author>
+      <author id="bolette-sandford-pedersen"><first>Bolette</first><last>Pedersen</last><affiliation>University of Copenhagen</affiliation></author>
       <author><first>Sanni</first><last>Nimb</last><affiliation>Dsl</affiliation></author>
       <author><first>Sussi</first><last>Olsen</last><affiliation>University of Copenhagen</affiliation></author>
       <author><first>Thomas</first><last>Troelsgård</last></author>
@@ -74,7 +74,7 @@
       <title>Word Substitution with Masked Language Models as Data Augmentation for Sentiment Analysis</title>
       <author><first>Larisa</first><last>Kolesnichenko</last></author>
       <author><first>Erik</first><last>Velldal</last><affiliation>University of Oslo</affiliation></author>
-      <author><first>Lilja</first><last>Øvrelid</last><affiliation>Dept. of Informatics, University of Oslo</affiliation></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last><affiliation>Dept. of Informatics, University of Oslo</affiliation></author>
       <pages>42-47</pages>
       <abstract>This paper explores the use of masked language modeling (MLM) for data augmentation (DA), targeting structured sentiment analysis (SSA) for Norwegian based on a dataset of annotated reviews. Considering the limited resources for Norwegian language and the complexity of the annotation task, the aim is to investigate whether this approach to data augmentation can help boost the performance. We report on experiments with substituting words both inside and outside of sentiment annotations, and we also present an error analysis, discussing some of the potential pitfalls of using MLM-based DA for SSA, and suggest directions for future work.</abstract>
       <url hash="e66fd00f">2023.resourceful-1.6</url>
@@ -123,7 +123,7 @@
       <title>A Diagnostic Dataset for Sentiment and Negation Modeling for <fixed-case>N</fixed-case>orwegian</title>
       <author><first>Petter</first><last>Mæhlum</last></author>
       <author><first>Erik</first><last>Velldal</last><affiliation>University of Oslo</affiliation></author>
-      <author><first>Lilja</first><last>Øvrelid</last><affiliation>Dept. of Informatics, University of Oslo</affiliation></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last><affiliation>Dept. of Informatics, University of Oslo</affiliation></author>
       <pages>77-85</pages>
       <abstract>Negation constitutes a challenging phenomenon for many natural language processing tasks, such as sentiment analysis (SA). In this paper we investigate the relationship between negation and sentiment in the context of Norwegian professional reviews. The first part of this paper includes a corpus study which investigates how negation is tied to sentiment in this domain, based on existing annotations. In the second part, we introduce NoReC-NegSynt, a synthetically augmented test set for negation and sentiment, to allow for a more detailed analysis of the role of negation in current neural SA models. This diagnostic test set, containing both clausal and non-clausal negation, allows for analyzing and comparing models’ abilities to treat several different types of negation. We also present a case-study, applying several neural SA models to the diagnostic data.</abstract>
       <url hash="f394f6e5">2023.resourceful-1.11</url>
diff --git a/data/xml/2023.rocling.xml b/data/xml/2023.rocling.xml
index 74e1eaf9d8..7383e1178a 100644
--- a/data/xml/2023.rocling.xml
+++ b/data/xml/2023.rocling.xml
@@ -37,7 +37,7 @@
       <title>Improving End-to-end <fixed-case>T</fixed-case>aiwanese-Speech-to-<fixed-case>C</fixed-case>hinese-Text Translation by Semi-supervised Learning</title>
       <author><first>Yu-Chun</first><last>Lin</last></author>
       <author><first>Chung-Che</first><last>Wang</last></author>
-      <author><first>Jyh-Shing</first><last>Jang</last></author>
+      <author id="jyh-shing-roger-jang"><first>Jyh-Shing</first><last>Jang</last></author>
       <pages>21–28</pages>
       <url hash="34b3d727">2023.rocling-1.3</url>
       <bibkey>lin-etal-2023-improving-end</bibkey>
@@ -128,7 +128,7 @@
       <title>Impact of Feature Selection Algorithms on Readability Model</title>
       <author><first>Tsai-Ning</first><last>Tai</last></author>
       <author><first>Hou-Chiang</first><last>Tseng</last></author>
-      <author><first>Yao-Ting</first><last>Sung</last></author>
+      <author id="yao-ting-sung"><first>Yao-Ting</first><last>Sung</last></author>
       <pages>106–115</pages>
       <url hash="c3a7b4e5">2023.rocling-1.13</url>
       <bibkey>tai-etal-2023-impact</bibkey>
@@ -145,7 +145,7 @@
     <paper id="15">
       <title>Is <fixed-case>GPT</fixed-case>-4 a Good Islamic Expert for Answering <fixed-case>Q</fixed-case>uran Questions?</title>
       <author><first>Sarah</first><last>Alnefaie</last></author>
-      <author><first>Eric</first><last>Atwell</last></author>
+      <author id="eric-atwell"><first>Eric</first><last>Atwell</last></author>
       <author><first>Mohammad Ammar</first><last>Alsalka</last></author>
       <pages>124–133</pages>
       <url hash="ee07f1e3">2023.rocling-1.15</url>
@@ -169,7 +169,7 @@
       <author><first>Te-Lun</first><last>Yang</last></author>
       <author><first>Yu-Meng</first><last>Tang</last></author>
       <author><first>Ta-Lin</first><last>Chen</last></author>
-      <author><first>Jyh-Shing Roger</first><last>Jang</last></author>
+      <author id="jyh-shing-roger-jang"><first>Jyh-Shing Roger</first><last>Jang</last></author>
       <pages>141–156</pages>
       <url hash="6d8c1681">2023.rocling-1.17</url>
       <bibkey>zhang-etal-2023-category</bibkey>
@@ -178,7 +178,7 @@
       <title><fixed-case>ESC</fixed-case> <fixed-case>MA</fixed-case>-<fixed-case>SD</fixed-case> Net: Effective Speaker Separation through Convolutional Multi-View Attention and <fixed-case>S</fixed-case>udo<fixed-case>N</fixed-case>et</title>
       <author><first>Che-Wei</first><last>Liao</last></author>
       <author><first>Aye Nyein</first><last>Aung</last></author>
-      <author><first>Jeih-Weih</first><last>Hung</last></author>
+      <author id="jeih-weih-hung"><first>Jeih-Weih</first><last>Hung</last></author>
       <pages>157–161</pages>
       <url hash="11b661a4">2023.rocling-1.18</url>
       <bibkey>liao-etal-2023-esc</bibkey>
@@ -196,7 +196,7 @@
       <title>Fine-Tuning and Evaluation of Question Generation for <fixed-case>S</fixed-case>lovak Language</title>
       <author><first>Ondrej</first><last>Megela</last></author>
       <author><first>Daniel</first><last>Hladek</last></author>
-      <author><first>Matus</first><last>Pleva</last></author>
+      <author id="matus-pleva"><first>Matus</first><last>Pleva</last></author>
       <author><first>Ján</first><last>Staš</last></author>
       <author><first>Ming-Hsiang</first><last>Su</last></author>
       <author><first>Yuan-Fu</first><last>Liao</last></author>
@@ -222,7 +222,7 @@
     <paper id="23">
       <title>An Analysis of “<fixed-case>X</fixed-case> shi <fixed-case>Y</fixed-case>” Metaphors in <fixed-case>M</fixed-case>andarin Corpora and Learning Materials</title>
       <author><first>Yu-Hsiang</first><last>Shen</last></author>
-      <author><first>Siaw-Fong</first><last>Chung</last></author>
+      <author id="siaw-fong-chung"><first>Siaw-Fong</first><last>Chung</last></author>
       <pages>194–201</pages>
       <url hash="22358e8b">2023.rocling-1.23</url>
       <bibkey>shen-chung-2023-analysis</bibkey>
@@ -239,7 +239,7 @@
       <title><fixed-case>T</fixed-case>aiwanese/<fixed-case>M</fixed-case>andarin Speech Recognition using <fixed-case>O</fixed-case>pen<fixed-case>AI</fixed-case>’s Whisper Multilingual Speech Recognition Engine Based on Generative Pretrained Transformer Architecture</title>
       <author><first>Yueh-Che</first><last>Hsieh</last></author>
       <author><first>Ke-ming</first><last>Lyu</last></author>
-      <author><first>Ren-yuan</first><last>Lyu</last></author>
+      <author id="ren-yuan-lyu"><first>Ren-yuan</first><last>Lyu</last></author>
       <pages>210–214</pages>
       <url hash="d5aeed76">2023.rocling-1.25</url>
       <bibkey>hsieh-etal-2023-taiwanese</bibkey>
@@ -257,7 +257,7 @@
     <paper id="27">
       <title>Compact <fixed-case>CNN</fixed-case>s for End-to-End Keyword Spotting on Resource-Constrained Edge <fixed-case>AI</fixed-case> Devices</title>
       <author><first>Joseph</first><last>Lin</last></author>
-      <author><first>Ren-yuan</first><last>Lyu</last></author>
+      <author id="ren-yuan-lyu"><first>Ren-yuan</first><last>Lyu</last></author>
       <pages>222–226</pages>
       <url hash="6b451ea3">2023.rocling-1.27</url>
       <bibkey>lin-lyu-2023-compact</bibkey>
@@ -314,7 +314,7 @@
       <author><first>Chiao-Han</first><last>Yen</last></author>
       <author><first>Jia-Cheng</first><last>Xu</last></author>
       <author><first>Deborah</first><last>Watty</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <pages>262–269</pages>
       <url hash="2d6c7b34">2023.rocling-1.33</url>
       <bibkey>lin-etal-2023-solving</bibkey>
@@ -339,7 +339,7 @@
     <paper id="36">
       <title>Analysis of <fixed-case>C</fixed-case>hinese Irony on <fixed-case>PTT</fixed-case> Corpus-Using “Tested Positive” and “Hope” as the Key Words</title>
       <author><first>Pin-Wen</first><last>Wang</last></author>
-      <author><first>Siaw-Fong</first><last>Chung</last></author>
+      <author id="siaw-fong-chung"><first>Siaw-Fong</first><last>Chung</last></author>
       <pages>288–291</pages>
       <url hash="4aa6f7fb">2023.rocling-1.36</url>
       <bibkey>wang-chung-2023-analysis</bibkey>
@@ -349,7 +349,7 @@
       <author><first>Kai-Ching</first><last>Yeh</last></author>
       <author><first>Jou-An</first><last>Chi</last></author>
       <author><first>Da-Chen</first><last>Lian</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <pages>292–299</pages>
       <url hash="c117fcff">2023.rocling-1.37</url>
       <bibkey>yeh-etal-2023-evaluating</bibkey>
@@ -359,7 +359,7 @@
       <author><first>Ruei-Cyuan</first><last>Su</last></author>
       <author><first>Tzu-En</first><last>Su</last></author>
       <author><first>Ming-Hsiang</first><last>Su</last></author>
-      <author><first>Matus</first><last>Pleva</last></author>
+      <author id="matus-pleva"><first>Matus</first><last>Pleva</last></author>
       <author><first>Daniel</first><last>Hladek</last></author>
       <pages>300–310</pages>
       <url hash="41859877">2023.rocling-1.38</url>
@@ -411,7 +411,7 @@
       <author><first>Te-Yu</first><last>Chi</last></author>
       <author><first>Te-Lun</first><last>Yang</last></author>
       <author><first>Sheh</first><last>Chen</last></author>
-      <author><first>Jyh-Shing Roger</first><last>Jang</last></author>
+      <author id="jyh-shing-roger-jang"><first>Jyh-Shing Roger</first><last>Jang</last></author>
       <pages>339–349</pages>
       <url hash="470b5f3f">2023.rocling-1.43</url>
       <bibkey>wang-etal-2023-crowner</bibkey>
@@ -447,7 +447,7 @@
       <author><first>Po-Kai</first><last>Chen</last></author>
       <author><first>Bing-Jhih</first><last>Huang</last></author>
       <author><first>Chi-Tao</first><last>Chen</last></author>
-      <author><first>Hsin-Min</first><last>Wang</last></author>
+      <author id="hsin-min-wang"><first>Hsin-Min</first><last>Wang</last></author>
       <author><first>Jia-Ching</first><last>Wang</last></author>
       <pages>371–376</pages>
       <url hash="d31a2c13">2023.rocling-1.47</url>
diff --git a/data/xml/2023.scil.xml b/data/xml/2023.scil.xml
index 7dd564674f..effb9be282 100644
--- a/data/xml/2023.scil.xml
+++ b/data/xml/2023.scil.xml
@@ -167,7 +167,7 @@
       <title>Parsing “Early <fixed-case>E</fixed-case>nglish Books Online” for Linguistic Search</title>
       <author><first>Seth</first><last>Kulick</last></author>
       <author><first>Neville</first><last>Ryant</last></author>
-      <author><first>Beatrice</first><last>Santorini</last></author>
+      <author id="beatrice-santorini"><first>Beatrice</first><last>Santorini</last></author>
       <pages>222–242</pages>
       <url hash="4fe3fdcc">2023.scil-1.19</url>
       <bibkey>kulick-etal-2023-parsing</bibkey>
@@ -241,7 +241,7 @@
       <title>The Learnability of the Wh-Island Constraint in <fixed-case>D</fixed-case>utch by a Long Short-Term Memory Network</title>
       <author><first>Michelle</first><last>Suijkerbuijk</last></author>
       <author><first>Peter</first><last>de Swart</last></author>
-      <author><first>Stefan L.</first><last>Frank</last></author>
+      <author id="stefan-l-frank"><first>Stefan L.</first><last>Frank</last></author>
       <pages>321–331</pages>
       <url hash="88b47cd2">2023.scil-1.28</url>
       <bibkey>suijkerbuijk-etal-2023-learnability</bibkey>
@@ -255,7 +255,7 @@
     </paper>
     <paper id="30">
       <title>Unbounded recursion in two dimensions, where syntax and prosody meet</title>
-      <author><first>Edward P.</first><last>Stabler</last></author>
+      <author id="edward-stabler"><first>Edward P.</first><last>Stabler</last></author>
       <author><first>Kristine M.</first><last>Yu</last></author>
       <pages>343–356</pages>
       <url hash="51607356">2023.scil-1.30</url>
diff --git a/data/xml/2023.sealp.xml b/data/xml/2023.sealp.xml
index 640f0c850f..9e87c93885 100644
--- a/data/xml/2023.sealp.xml
+++ b/data/xml/2023.sealp.xml
@@ -3,10 +3,10 @@
   <volume id="1" ingest-date="2024-01-18" type="proceedings">
     <meta>
       <booktitle>Proceedings of the First Workshop in South East Asian Language Processing</booktitle>
-      <editor><first>Derry</first><last>Wijaya</last></editor>
+      <editor id="derry-tanti-wijaya"><first>Derry</first><last>Wijaya</last></editor>
       <editor><first>Alham Fikri</first><last>Aji</last></editor>
       <editor><first>Clara</first><last>Vania</last></editor>
-      <editor><first>Genta Indra</first><last>Winata</last></editor>
+      <editor id="genta-indra-winata"><first>Genta Indra</first><last>Winata</last></editor>
       <editor><first>Ayu</first><last>Purwarianti</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Nusa Dua, Bali, Indonesia</address>
@@ -79,7 +79,7 @@
     </paper>
     <paper id="6">
       <title><fixed-case>S</fixed-case>ent<fixed-case>M</fixed-case>ix-3<fixed-case>L</fixed-case>: A Novel Code-Mixed Test Dataset in <fixed-case>B</fixed-case>angla-<fixed-case>E</fixed-case>nglish-<fixed-case>H</fixed-case>indi for Sentiment Analysis</title>
-      <author><first>Md Nishat</first><last>Raihan</last></author>
+      <author id="nishat-raihan"><first>Md Nishat</first><last>Raihan</last></author>
       <author><first>Dhiman</first><last>Goswami</last></author>
       <author><first>Antara</first><last>Mahmud</last></author>
       <author><first>Antonios</first><last>Anastasopoulos</last></author>
diff --git a/data/xml/2023.semeval.xml b/data/xml/2023.semeval.xml
index c2fc38621f..1cb828cab1 100644
--- a/data/xml/2023.semeval.xml
+++ b/data/xml/2023.semeval.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2023-07-09" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 17th International Workshop on Semantic Evaluation (SemEval-2023)</booktitle>
-      <editor><first>Atul Kr.</first><last>Ojha</last></editor>
+      <editor id="atul-kr-ojha"><first>Atul Kr.</first><last>Ojha</last></editor>
       <editor><first>A. Seza</first><last>Doğruöz</last></editor>
       <editor><first>Giovanni</first><last>Da San Martino</last></editor>
       <editor><first>Harish</first><last>Tayyar Madabushi</last></editor>
@@ -48,7 +48,7 @@
     <paper id="3">
       <title><fixed-case>U</fixed-case>o<fixed-case>R</fixed-case>-<fixed-case>NCL</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2023 Task 1: Learning Word-Sense and Image Embeddings for Word Sense Disambiguation</title>
       <author><first>Thanet</first><last>Markchom</last><affiliation>University of Reading</affiliation></author>
-      <author><first>Huizhi</first><last>Liang</last><affiliation>Newcastle University</affiliation></author>
+      <author id="huizhi-liang"><first>Huizhi</first><last>Liang</last><affiliation>Newcastle University</affiliation></author>
       <author><first>Joyce</first><last>Gitau</last><affiliation>Newcastle University</affiliation></author>
       <author><first>Zehao</first><last>Liu</last><affiliation>Newcastle University</affiliation></author>
       <author><first>Varun</first><last>Ojha</last><affiliation>Newcastle University</affiliation></author>
@@ -105,7 +105,7 @@
     <paper id="7">
       <title><fixed-case>BERT</fixed-case>astic at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2023 Task 3: Fine-Tuning Pretrained Multilingual Transformers Does Order Matter?</title>
       <author><first>Tarek</first><last>Mahmoud</last><affiliation>Mohamed Bin Zayed University of Artificial Intelligence (MBZUAI)</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <pages>58-63</pages>
       <abstract>The naive approach for fine-tuning pretrained deep learning models on downstream tasks involves feeding them mini-batches of randomly sampled data. In this paper, we propose a more elaborate method for fine-tuning Pretrained Multilingual Transformers (PMTs) on multilingual data. Inspired by the success of curriculum learning approaches, we investigate the significance of fine-tuning PMTs on multilingual data in a sequential fashion language by language. Unlike the curriculum learning paradigm where the model is presented with increasingly complex examples, we do not adopt a notion of “easy” and “hard” samples. Instead, our experiments draw insight from psychological findings on how the human brain processes new information and the persistence of newly learned concepts. We perform our experiments on a challenging news-framing dataset that contains texts in six languages. Our proposed method outperforms the naïve approach by achieving improvements of 2.57\% in terms of F1 score. Even when we supplement the naïve approach with recency fine-tuning, we still achieve an improvement of 1.34\% with a 3.63\%$ convergence speed-up. Moreover, we are the first to observe an interesting pattern in which deep learning models exhibit a human-like primacy-recency effect.</abstract>
       <url hash="7882df2f">2023.semeval-1.7</url>
@@ -138,7 +138,7 @@
       <title><fixed-case>M</fixed-case>ars<fixed-case>E</fixed-case>clipse at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2023 Task 3: Multi-lingual and Multi-label Framing Detection with Contrastive Learning</title>
       <author><first>Qisheng</first><last>Liao</last><affiliation>Mohamed Bin Zayed University of Artificial Intellgence</affiliation></author>
       <author><first>Meiting</first><last>Lai</last><affiliation>Liaoning University</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <pages>83-87</pages>
       <abstract>This paper describes our system for SemEval-2023 Task 3 Subtask 2 on Framing Detection. We used a multi-label contrastive loss for fine-tuning large pre-trained language models in a multi-lingual setting, achieving very competitive results: our system was ranked first on the official test set and on the official shared task leaderboard for five of the six languages for which we had training data and for which we could perform fine-tuning. Here, we describe our experimental setup, as well as various ablation studies. The code of our system is available at <url>https://github.com/QishengL/SemEval2023</url>.</abstract>
       <url hash="9c8d980a">2023.semeval-1.10</url>
@@ -247,7 +247,7 @@
     <paper id="19">
       <title><fixed-case>LRL</fixed-case>_<fixed-case>NC</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2023 Task 4: The Touche23-<fixed-case>G</fixed-case>eorge-boole Approach for Multi-Label Classification of Human-Values behind Arguments</title>
       <author><first>Kushagri</first><last>Tandon</last><affiliation>Indian Institute of Technology Delhi</affiliation></author>
-      <author><first>Niladri</first><last>Chatterjee</last><affiliation>Indian Institute of Technology Delhi</affiliation></author>
+      <author id="niladri-chatterjee"><first>Niladri</first><last>Chatterjee</last><affiliation>Indian Institute of Technology Delhi</affiliation></author>
       <pages>136-142</pages>
       <abstract>The task ValueEval aims at assigning a sub- set of possible human value categories under- lying a given argument. Values behind argu- ments are often determinants to evaluate the relevance and importance of decisions in eth- ical sense, thereby making them essential for argument mining. The work presented here proposes two systems for the same. Both sys- tems use RoBERTa to encode sentences in each document. System1 makes use of features ob- tained from training models for two auxiliary tasks, whereas System2 combines RoBERTa with topic modeling to get sentence represen- tation. These features are used by a classifi- cation head to generate predictions. System1 secured the rank 22 in the official task rank- ing, achieving the macro F1-score 0.46 on the main dataset. System2 was not a part of official evaluation. Subsequent experiments achieved highest (among the proposed systems) macro F1-scores of 0.48 (System2), 0.31 (ablation on System1) and 0.33 (ablation on System1) on the main dataset, the Nahj al-Balagha dataset, and the New York Times dataset.</abstract>
       <url hash="74adb6b9">2023.semeval-1.19</url>
@@ -257,7 +257,7 @@
     <paper id="20">
       <title><fixed-case>LRL</fixed-case>_<fixed-case>NC</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2023 Task 6: Sequential Sentence Classification for Legal Documents Using Topic Modeling Features</title>
       <author><first>Kushagri</first><last>Tandon</last><affiliation>Indian Institute of Technology Delhi</affiliation></author>
-      <author><first>Niladri</first><last>Chatterjee</last><affiliation>Indian Institute of Technology Delhi</affiliation></author>
+      <author id="niladri-chatterjee"><first>Niladri</first><last>Chatterjee</last><affiliation>Indian Institute of Technology Delhi</affiliation></author>
       <pages>143-149</pages>
       <abstract>Natural Language Processing techniques can be leveraged to process legal proceedings for various downstream applications, such as sum- marization of a given judgement, prediction of the judgement for a given legal case, prece- dent search, among others. These applications will benefit from legal judgement documents already segmented into topically coherent units. The current task, namely, Rhetorical Role Pre- diction, aims at categorising each sentence in the sequence of sentences in a judgement document into different labels. The system proposed in this work combines topic mod- eling and RoBERTa to encode sentences in each document. A BiLSTM layer has been utilised to get contextualised sentence repre- sentations. The Rhetorical Role predictions for each sentence in each document are gen- erated by a final CRF layer of the proposed neuro-computing system. This system secured the rank 12 in the official task ranking, achiev- ing the micro-F1 score 0.7980. The code for the proposed systems has been made available at <url>https://github.com/KushagriT/SemEval23_</url> LegalEval_TeamLRL_NC</abstract>
       <url hash="a7567d61">2023.semeval-1.20</url>
@@ -304,7 +304,7 @@
     </paper>
     <paper id="25">
       <title><fixed-case>HULAT</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2023 Task 9: Data Augmentation for Pre-trained Transformers Applied to Multilingual Tweet Intimacy Analysis</title>
-      <author><first>Isabel</first><last>Segura-Bedmar</last><affiliation>Universidad Carlos III de Madrid</affiliation></author>
+      <author id="isabel-segura-bedmar"><first>Isabel</first><last>Segura-Bedmar</last><affiliation>Universidad Carlos III de Madrid</affiliation></author>
       <pages>177-183</pages>
       <abstract>This paper describes our participation in SemEval-2023 Task 9, Intimacy Analysis of Multilingual Tweets. We fine-tune some of the most popular transformer models with the training dataset and synthetic data generated by different data augmentation techniques. During the development phase, our best results were obtained by using XLM-T. Data augmentation techniques provide a very slight improvement in the results. Our system ranked in the 27th position out of the 45 participating systems. Despite its modest results, our system shows promising results in languages such as Portuguese, English, and Dutch. All our code is available in the repository <url>https://github.com/isegura/hulat_intimacy</url>.</abstract>
       <url hash="a410e00c">2023.semeval-1.25</url>
@@ -313,7 +313,7 @@
     </paper>
     <paper id="26">
       <title><fixed-case>HULAT</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2023 Task 10: Data Augmentation for Pre-trained Transformers Applied to the Detection of Sexism in Social Media</title>
-      <author><first>Isabel</first><last>Segura-Bedmar</last><affiliation>Universidad Carlos III de Madrid</affiliation></author>
+      <author id="isabel-segura-bedmar"><first>Isabel</first><last>Segura-Bedmar</last><affiliation>Universidad Carlos III de Madrid</affiliation></author>
       <pages>184-192</pages>
       <abstract>This paper describes our participation in SemEval-2023 Task 10, whose goal is the detection of sexism in social media. We explore some of the most popular transformer models such as BERT, DistilBERT, RoBERTa, and XLNet. We also study different data augmentation techniques to increase the training dataset. During the development phase, our best results were obtained by using RoBERTa and data augmentation for tasks B and C. However, the use of synthetic data does not improve the results for task C. We participated in the three subtasks. Our approach still has much room for improvement, especially in the two fine-grained classifications. All our code is available in the repository <url>https://github.com/isegura/hulat_edos</url>.</abstract>
       <url hash="27e4af10">2023.semeval-1.26</url>
@@ -355,8 +355,8 @@
       <title><fixed-case>FII</fixed-case> <fixed-case>SMART</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val 2023 Task7: Multi-evidence Natural Language Inference for Clinical Trial Data</title>
       <author><first>Mihai</first><last>Volosincu</last><affiliation>Alexandru Ioan Cuza University</affiliation></author>
       <author><first>Cosmin</first><last>Lupu</last><affiliation>Alexandru Ioan Cuza University</affiliation></author>
-      <author><first>Diana</first><last>Trandabat</last><affiliation>Alexandru Ioan Cuza University</affiliation></author>
-      <author><first>Daniela</first><last>Gifu</last><affiliation>Alexandru Ioan Cuza University</affiliation></author>
+      <author id="diana-trandabat"><first>Diana</first><last>Trandabat</last><affiliation>Alexandru Ioan Cuza University</affiliation></author>
+      <author id="daniela-gifu"><first>Daniela</first><last>Gifu</last><affiliation>Alexandru Ioan Cuza University</affiliation></author>
       <pages>212-220</pages>
       <abstract>The “Multi-evidence Natural Language Inference forClinical Trial Data” task at SemEval 2023competition focuses on extracting essentialinformation on clinical trial data, by posing twosubtasks on textual entailment and evidence retrieval. In the context of SemEval, we present a comparisonbetween a method based on the BioBERT model anda CNN model. The task is based on a collection ofbreast cancer Clinical Trial Reports (CTRs),statements, explanations, and labels annotated bydomain expert annotators. We achieved F1 scores of0.69 for determining the inference relation(entailment vs contradiction) between CTR -statement pairs. The implementation of our system ismade available via Github - <url>https://github.com/volosincu/FII_Smart__Semeval2023</url>.</abstract>
       <url hash="87e7b0fb">2023.semeval-1.30</url>
@@ -424,7 +424,7 @@
       <title>nclu_team at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2023 Task 6: Attention-based Approaches for Large Court Judgement Prediction with Explanation</title>
       <author><first>Nicolay</first><last>Rusnachenko</last><affiliation>Newcastle University</affiliation></author>
       <author><first>Thanet</first><last>Markchom</last><affiliation>University of Reading</affiliation></author>
-      <author><first>Huizhi</first><last>Liang</last><affiliation>Newcastle University</affiliation></author>
+      <author id="huizhi-liang"><first>Huizhi</first><last>Liang</last><affiliation>Newcastle University</affiliation></author>
       <pages>270-274</pages>
       <abstract>Legal documents tend to be large in size. In this paper, we provide an experiment with attention-based approaches complemented by certain document processing techniques for judgment prediction. For the prediction of explanation, we consider this as an extractive text summarization problem based on an output of (1) CNN with attention mechanism and (2) self-attention of language models. Our extensive experiments show that treating document endings at first results in a 2.1% improvement in judgment prediction across all the models. Additional content peeling from non-informative sentences allows an improvement of explanation prediction performance by 4% in the case of attention-based CNN models. The best submissions achieved 8’th and 3’rd ranks on judgment prediction (C1) and prediction with explanation (C2) tasks respectively among 11 participating teams. The results of our experiments are published</abstract>
       <url hash="a189d068">2023.semeval-1.36</url>
@@ -463,7 +463,7 @@
       <author><first>Ronghao</first><last>Pan</last><affiliation>Universidad de Murcia</affiliation></author>
       <author><first>Salud María</first><last>Jiménez Zafra</last><affiliation>Universidad de Jan</affiliation></author>
       <author><first>María-Teresa</first><last>Martn-Valdivia</last><affiliation>Univeristy of Jaen</affiliation></author>
-      <author><first>L. Alfonso</first><last>Ureña-López</last><affiliation>University of Jaen</affiliation></author>
+      <author id="l-alfonso-urena-lopez"><first>L. Alfonso</first><last>Ureña-López</last><affiliation>University of Jaen</affiliation></author>
       <author><first>Rafael</first><last>Valencia-García</last><affiliation>Universidad de Murcia</affiliation></author>
       <pages>293-299</pages>
       <abstract>This work presents the participation of the UMUTeam and the SINAI research groups in the SemEval-2023 Task 9: Multilingual Tweet Intimacy Analysis. The goal of this task is to predict the intimacy of a set of tweets in 10 languages: English, Spanish, Italian, Portuguese, French, Chinese, Hindi, Arabic, Dutch and Korean, of which, the last 4 are not in the training data. Our approach to address this task is based on data augmentation and the use of three multilingual Large Language Models (multilingual BERT, XLM and mDeBERTA) by ensemble learning. Our team ranked 30th out of 45 participants. Our best results were achieved with two unseen languages: Korean (16th) and Hindi (19th).</abstract>
@@ -529,7 +529,7 @@
     <paper id="45">
       <title><fixed-case>RIGA</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2023 Task 2: <fixed-case>NER</fixed-case> Enhanced with <fixed-case>GPT</fixed-case>-3</title>
       <author><first>Eduards</first><last>Mukans</last><affiliation>University of Latvia</affiliation></author>
-      <author><first>Guntis</first><last>Barzdins</last><affiliation>University of Latvia, IMCS</affiliation></author>
+      <author id="guntis-barzdins"><first>Guntis</first><last>Barzdins</last><affiliation>University of Latvia, IMCS</affiliation></author>
       <pages>331-339</pages>
       <abstract>The following is a description of the RIGA team’s submissions for the English track of the SemEval-2023 Task 2: Multilingual Complex Named Entity Recognition (MultiCoNER) II. Our approach achieves 17% boost in results by utilizing pre-existing Large-scale Language Models (LLMs), such as GPT-3, to gather additional contexts. We then fine-tune a pre-trained neural network utilizing these contexts. The final step of our approach involves meticulous model and compute resource scaling, which results in improved performance. Our results placed us 12th out of 34 teams in terms of overall ranking and 7th in terms of the noisy subset ranking. The code for our method is available on GitHub (<url>https://github.com/emukans/multiconer2-riga</url>).</abstract>
       <url hash="1ff678ba">2023.semeval-1.45</url>
@@ -541,7 +541,7 @@
       <title><fixed-case>SUTNLP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2023 Task 4: <fixed-case>LG</fixed-case>-Transformer for Human Value Detection</title>
       <author><first>Hamed</first><last>Hematian Hemati</last><affiliation>Graduate Student, AI Group, Computer Engineering Department, Sharif University of Technology</affiliation></author>
       <author><first>Sayed Hesam</first><last>Alavian</last><affiliation>Graduate student, AI Group, Computer Engineering Department, Sharif University of Technology, Tehran, Iran</affiliation></author>
-      <author><first>Hossein</first><last>Sameti</last><affiliation>Associate Professor, AI Group, Computer Engineering Department, Sharif University of Technology</affiliation></author>
+      <author id="hossein-sameti"><first>Hossein</first><last>Sameti</last><affiliation>Associate Professor, AI Group, Computer Engineering Department, Sharif University of Technology</affiliation></author>
       <author><first>Hamid</first><last>Beigy</last><affiliation>Associate Professor, AI Group, Computer Engineering Department, Sharif University of Technology</affiliation></author>
       <pages>340-346</pages>
       <abstract>When we interact with other humans, humanvalues guide us to consider the human element. As we shall see, value analysis in NLP hasbeen applied to personality profiling but not toargument mining. As part of SemEval-2023Shared Task 4, our system paper describes amulti-label classifier for identifying human val-ues. Human value detection requires multi-label classification since each argument maycontain multiple values. In this paper, we pro-pose an architecture called Label Graph Trans-former (LG-Transformer). LG-Transformeris a two-stage pipeline consisting of a trans-former jointly encoding argument and labelsand a graph module encoding and obtainingfurther interactions between labels. Using ad-versarial training, we can boost performanceeven further. Our best method scored 50.00 us-ing F1 score on the test set, which is 7.8 higherthan the best baseline method. Our code ispublicly available on Github.</abstract>
@@ -554,7 +554,7 @@
       <author><first>Hamed</first><last>Hematian Hemati</last><affiliation>Graduate Student, AI Group, Computer Engineering Department, Sharif University of Technology</affiliation></author>
       <author><first>Sayed Hesam</first><last>Alavian</last><affiliation>Graduate student, AI Group, Computer Engineering Department, Sharif University of Technology, Tehran, Iran</affiliation></author>
       <author><first>Hamid</first><last>Beigy</last><affiliation>Associate Professor, AI Group, Computer Engineering Department, Sharif University of Technology</affiliation></author>
-      <author><first>Hossein</first><last>Sameti</last><affiliation>Associate Professor, AI Group, Computer Engineering Department, Sharif University of Technology</affiliation></author>
+      <author id="hossein-sameti"><first>Hossein</first><last>Sameti</last><affiliation>Associate Professor, AI Group, Computer Engineering Department, Sharif University of Technology</affiliation></author>
       <pages>347-356</pages>
       <abstract>There is no simple definition of sexism, butit can be described as prejudice, stereotyping,or discrimination, especially against women,based on their gender. In online interactions,sexism is common. One out of ten Americanadults says that they have been harassed be-cause of their gender and have been the targetof sexism, so sexism is a growing issue. TheExplainable Detection of Online Sexism sharedtask in SemEval-2023 aims at building sexismdetection systems for the English language. Inorder to address the problem, we use largelanguage models such as RoBERTa and De-BERTa. In addition, we present Random LayerAdversarial Training (RLAT) for transformers,and show its significant impact on solving allsubtasks. Moreover, we use virtual adversar-ial training and contrastive learning to improveperformance on subtask A. Upon completionof subtask A, B, and C test sets, we obtainedmacro-F1 of 84.45, 67.78, and 52.52, respec-tively outperforming proposed baselines on allsubtasks. Our code is publicly available onGithub.</abstract>
       <url hash="062e58a0">2023.semeval-1.47</url>
@@ -576,7 +576,7 @@
     <paper id="49">
       <title><fixed-case>JCT</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2023 Tasks 12 A and 12<fixed-case>B</fixed-case>: Sentiment Analysis for Tweets Written in Low-resource <fixed-case>A</fixed-case>frican Languages using Various Machine Learning and Deep Learning Methods, Resampling, and <fixed-case>H</fixed-case>yper<fixed-case>P</fixed-case>arameter Tuning</title>
       <author><first>Ron</first><last>Keinan</last><affiliation>Jerusalem College of Technology, Lev Academic Center</affiliation></author>
-      <author><first>Yaakov</first><last>Hacohen-Kerner</last><affiliation>Jerusalem College of Technology</affiliation></author>
+      <author id="yaakov-hacohen-kerner"><first>Yaakov</first><last>Hacohen-Kerner</last><affiliation>Jerusalem College of Technology</affiliation></author>
       <pages>365-378</pages>
       <abstract>In this paper, we describe our submissions to the SemEval-2023 contest. We tackled subtask 12 - “AfriSenti-SemEval: Sentiment Analysis for Low-resource African Languages using Twitter Dataset”. We developed different models for 12 African languages and a 13th model for a multilingual dataset built from these 12 languages. We applied a wide variety of word and char n-grams based on their tf-idf values, 4 classical machine learning methods, 2 deep learning methods, and 3 oversampling methods. We used 12 sentiment lexicons and applied extensive hyperparameter tuning.</abstract>
       <url hash="3ea3c346">2023.semeval-1.49</url>
@@ -742,7 +742,7 @@
     <paper id="64">
       <title><fixed-case>LT</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2023 Task 1: Effective Zero-Shot Visual Word Sense Disambiguation Approaches using External Knowledge Sources</title>
       <author><first>Florian</first><last>Schneider</last><affiliation>University of Hamburg</affiliation></author>
-      <author><first>Chris</first><last>Biemann</last><affiliation>Universitt Hamburg</affiliation></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last><affiliation>Universitt Hamburg</affiliation></author>
       <pages>462-468</pages>
       <abstract>The objective of the SemEval-2023 Task 1: Visual Word Sense Disambiguation (VWSD) is to identify the image illustrating the indented meaning of a target word and some minimal additional context. The omnipresence of textual and visual data in the task strongly suggests the utilization of the recent advances in multi-modal machine learning, i.e., pretrained visiolinguistic models (VLMs). Often referred to as foundation models due to their strong performance on many vision-language downstream tasks, these models further demonstrate powerful zero-shot capabilities. In this work, we utilize various pertained VLMs in a zero-shot fashion for multiple approaches using external knowledge sources to enrich the contextual information. Further, we evaluate our methods on the final test data and extensively analyze the suitability of different knowledge sources, the influence of training data, model sizes, multi-linguality, and different textual prompting strategies. Although we are not among the best-performing systems (rank 20 of 56), our experiments described in this work prove competitive results. Moreover, we aim to contribute meaningful insights and propel multi-modal machine learning tasks like VWSD.</abstract>
       <url hash="2e044ccb">2023.semeval-1.64</url>
@@ -787,7 +787,7 @@
       <author><first>Heike</first><last>Adel</last><affiliation>Bosch Center for Artificial Intelligence</affiliation></author>
       <author><first>Lukas</first><last>Lange</last><affiliation>Bosch Center for Artificial Intelligence</affiliation></author>
       <author><first>Jannik</first><last>Strötgen</last><affiliation>Bosch Center for Artificial Intelligence</affiliation></author>
-      <author><first>Hinrich</first><last>Schütze</last><affiliation>Center for Information and Language Processing, University of Munich</affiliation></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last><affiliation>Center for Information and Language Processing, University of Munich</affiliation></author>
       <pages>488-497</pages>
       <abstract>This paper describes our system developed for the SemEval-2023 Task 12 “Sentiment Analysis for Low-resource African Languages using Twitter Dataset”. Sentiment analysis is one of the most widely studied applications in natural language processing. However, most prior work still focuses on a small number of high-resource languages. Building reliable sentiment analysis systems for low-resource languages remains challenging, due to the limited training data in this task. In this work, we propose to leverage language-adaptive and task-adaptive pretraining on African texts and study transfer learning with source language selection on top of an African language-centric pretrained language model. Our key findings are: (1) Adapting the pretrained model to the target language and task using a small yet relevant corpus improves performance remarkably by more than 10 F1 score points. (2) Selecting source languages with positive transfer gains during training can avoid harmful interference from dissimilar languages, leading to better results in multilingual and cross-lingual settings. In the shared task, our system wins 8 out of 15 tracks and, in particular, performs best in the multilingual evaluation.</abstract>
       <url hash="80f3c6e8">2023.semeval-1.68</url>
@@ -916,7 +916,7 @@
       <author><first>Intisar</first><last>Almuslim</last><affiliation>University of Ottawa</affiliation></author>
       <author><first>Sean</first><last>Stilwell</last><affiliation>University of Ottawa</affiliation></author>
       <author><first>Surya</first><last>Kiran Suresh</last><affiliation>University of Ottawa</affiliation></author>
-      <author><first>Diana</first><last>Inkpen</last><affiliation>University of Ottawa</affiliation></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last><affiliation>University of Ottawa</affiliation></author>
       <pages>580-588</pages>
       <abstract>We describe the methods we used for legal text understanding, specifically Task 6 Legal-Eval at SemEval 2023. The outcomes could assist law practitioners and help automate the working process of judicial systems. The shared task defined three main sub-tasks: sub-task A, Rhetorical Roles Prediction (RR); sub-task B, Legal Named Entities Extraction (L-NER); and sub-task C, Court Judgement Prediction with Explanation (CJPE). Our team addressed all three sub-tasks by exploring various Deep Learning (DL) based models. Overall, our team’s approaches achieved promising results on all three sub-tasks, demonstrating the potential of deep learning-based models in the judicial domain.</abstract>
       <url hash="adf272bf">2023.semeval-1.79</url>
@@ -1048,7 +1048,7 @@
     <paper id="90">
       <title>Rudolf Christoph Eucken at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2023 Task 4: An Ensemble Approach for Identifying Human Values from Arguments</title>
       <author><first>Sougata</first><last>Saha</last><affiliation>State University of New York at Buffalo</affiliation></author>
-      <author><first>Rohini</first><last>Srihari</last><affiliation>University at Buffalo, SUNY</affiliation></author>
+      <author id="rohini-k-srihari"><first>Rohini</first><last>Srihari</last><affiliation>University at Buffalo, SUNY</affiliation></author>
       <pages>660-663</pages>
       <abstract>The subtle human values we acquire through life experiences govern our thoughts and gets reflected in our speech. It plays an integral part in capturing the essence of our individuality and making it imperative to identify such values in computational systems that mimic human actions. Computational argumentation is a field that deals with the argumentation capabilities of humans and can benefit from identifying such values. Motivated by that, we present an ensemble approach for detecting human values from argument text. Our ensemble comprises three models: (i) An entailment-based model for determining the human values based on their descriptions, (ii) A Roberta-based classifier that predicts the set of human values from an argument. (iii) A Roberta-based classifier to predict a reduced set of human values from an argument. We experiment with different ways of combining the models and report our results. Furthermore, our best combination achieves an overall F1 score of 0.48 on the main test set.</abstract>
       <url hash="7ffdb4dd">2023.semeval-1.90</url>
@@ -1298,7 +1298,7 @@
     <paper id="111">
       <title><fixed-case>IITD</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2023 Task 2: A Multi-Stage Information Retrieval Approach for Fine-Grained Named Entity Recognition</title>
       <author><first>Shivani</first><last>Choudhary</last><affiliation>Indian Institute of Technology</affiliation></author>
-      <author><first>Niladri</first><last>Chatterjee</last><affiliation>IIT Delhi</affiliation></author>
+      <author id="niladri-chatterjee"><first>Niladri</first><last>Chatterjee</last><affiliation>IIT Delhi</affiliation></author>
       <author><first>Subir</first><last>Saha</last><affiliation>IIT Delhi</affiliation></author>
       <pages>800-806</pages>
       <abstract>MultiCoNER-II is a fine-grained Named Entity Recognition (NER) task that aims to identify ambiguous and complex named entities in multiple languages, with a small amount of contextual information available. To address this task, we propose a multi-stage information retrieval (IR) pipeline that improves the performance of language models for fine-grained NER. Our approach involves leveraging a combination of a BM25-based IR model and a language model to retrieve relevant passages from a corpus. These passages are then used to train a model that utilizes a weighted average of losses. The prediction is generated by a decoder stack that includes a projection layer and conditional random field. To demonstrate the effectiveness of our approach, we participated in the English track of the MultiCoNER-II competition. Our approach yielded promising results, which we validated through detailed analysis.</abstract>
@@ -1311,8 +1311,8 @@
       <author><first>Carlos-Emiliano</first><last>Gonzalez-Gallardo</last><affiliation>L3i, La Rochelle Universit</affiliation></author>
       <author><first>Thi Hong Hanh</first><last>Tran</last><affiliation>La Rochelle University</affiliation></author>
       <author><first>Nancy</first><last>Girdhar</last><affiliation>L3i, La Rochelle University</affiliation></author>
-      <author><first>Emanuela</first><last>Boros</last><affiliation>EPFL</affiliation></author>
-      <author><first>Jose G.</first><last>Moreno</last><affiliation>Paul Sabatier University - IRIT</affiliation></author>
+      <author id="emanuela-boros"><first>Emanuela</first><last>Boros</last><affiliation>EPFL</affiliation></author>
+      <author id="jose-g-moreno"><first>Jose G.</first><last>Moreno</last><affiliation>Paul Sabatier University - IRIT</affiliation></author>
       <author><first>Antoine</first><last>Doucet</last><affiliation>University of La Rochelle</affiliation></author>
       <pages>807-814</pages>
       <abstract>This paper summarizes the participation of the L3i laboratory of the University of La Rochelle in the SemEval-2023 Task 2, Multilingual Complex Named Entity Recognition (MultiCoNER II). Similar to MultiCoNER I, the task seeks to develop methods to detect semantic ambiguous and complex entities in short and low-context settings. However, MultiCoNER II adds a fine-grained entity taxonomy with over 30 entity types and corrupted data on the test partitions. We approach these complications following prompt-based learning as (1) a ranking problem using a seq2seq framework, and (2) an extractive question-answering task. Our findings show that even if prompting techniques have a similar recall to fine-tuned hierarchical language model-based encoder methods, precision tends to be more affected.</abstract>
@@ -1354,7 +1354,7 @@
       <title><fixed-case>D</fixed-case>2<fixed-case>KL</fixed-case>ab at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2023 Task 2: Leveraging <fixed-case>T</fixed-case>-<fixed-case>NER</fixed-case> to Develop a Fine-Tuned Multilingual Model for Complex Named Entity Recognition</title>
       <author><first>Thibault</first><last>Ehrhart</last><affiliation>EURECOM</affiliation></author>
       <author><first>Julien</first><last>Plu</last><affiliation>Buster.ai</affiliation></author>
-      <author><first>Raphael</first><last>Troncy</last><affiliation>EURECOM</affiliation></author>
+      <author id="raphael-troncy"><first>Raphael</first><last>Troncy</last><affiliation>EURECOM</affiliation></author>
       <pages>836-840</pages>
       <abstract>This paper presents D2KLab’s system used for the shared task of “Multilingual Complex Named Entity Recognition (MultiCoNER II)”, as part of SemEval 2023 Task 2. The system relies on a fine-tuned transformer based language model for extracting named entities. In addition to the architecture of the system, we discuss our results and observations.</abstract>
       <url hash="3916b581">2023.semeval-1.115</url>
@@ -1365,7 +1365,7 @@
       <title><fixed-case>LTRC</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2023 Task 6: Experiments with Ensemble Embeddings</title>
       <author><first>Pavan</first><last>Baswani</last><affiliation>International Institute of Information Technology Hyderabad</affiliation></author>
       <author><first>Hiranmai</first><last>Sri Adibhatla</last><affiliation>IIIT Hyderabad</affiliation></author>
-      <author><first>Manish</first><last>Shrivastava</last><affiliation>International Institute of Information Technology Hyderabad</affiliation></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last><affiliation>International Institute of Information Technology Hyderabad</affiliation></author>
       <pages>841-846</pages>
       <abstract>In this paper, we present our team’s involvement in Task 6: LegalEval: Understanding Legal Texts. The task comprised three subtasks, and we focus on subtask A: Rhetorical Roles prediction. Our approach included experimenting with pre-trained embeddings and refining them with statistical and neural classifiers. We provide a thorough examination ofour experiments, solutions, and analysis, culminating in our best-performing model and current progress. We achieved a micro F1 score of 0.6133 on the test data using fine-tuned LegalBERT embeddings.</abstract>
       <url hash="4ee78389">2023.semeval-1.116</url>
@@ -1376,7 +1376,7 @@
       <title><fixed-case>T</fixed-case>eam<fixed-case>A</fixed-case>mpa at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2023 Task 3: Exploring Multilabel and Multilingual <fixed-case>R</fixed-case>o<fixed-case>BERT</fixed-case>a Models for Persuasion and Framing Detection</title>
       <author><first>Amalie</first><last>Pauli</last><affiliation>Computer Science, Aarhus University</affiliation></author>
       <author><first>Rafael</first><last>Sarabia</last><affiliation>Aarhus University</affiliation></author>
-      <author><first>Leon</first><last>Derczynski</last><affiliation>IT University of Copenhagen</affiliation></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last><affiliation>IT University of Copenhagen</affiliation></author>
       <author><first>Ira</first><last>Assent</last><affiliation>Aarhus University</affiliation></author>
       <pages>847-855</pages>
       <abstract>This paper describes our submission to theSemEval 2023 Task 3 on two subtasks: detectingpersuasion techniques and framing. Bothsubtasks are multi-label classification problems. We present a set of experiments, exploring howto get robust performance across languages usingpre-trained RoBERTa models. We test differentoversampling strategies, a strategy ofadding textual features from predictions obtainedwith related models, and present bothinconclusive and negative results. We achievea robust ranking across languages and subtaskswith our best ranking being nr. 1 for Subtask 3on Spanish.</abstract>
@@ -1422,7 +1422,7 @@
     <paper id="121">
       <title><fixed-case>MEERQAT</fixed-case>-<fixed-case>IRIT</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2023 Task 2: Leveraging Contextualized Tag Descriptors for Multilingual Named Entity Recognition</title>
       <author><first>Jesus</first><last>Lovon-Melgarejo</last><affiliation>IRIT</affiliation></author>
-      <author><first>Jose G.</first><last>Moreno</last><affiliation>Paul Sabatier University - IRIT</affiliation></author>
+      <author id="jose-g-moreno"><first>Jose G.</first><last>Moreno</last><affiliation>Paul Sabatier University - IRIT</affiliation></author>
       <author><first>Romaric</first><last>Besançon</last><affiliation>CEA LIST</affiliation></author>
       <author><first>Olivier</first><last>Ferret</last><affiliation>CEA List</affiliation></author>
       <author><first>Lynda</first><last>Lechani</last><affiliation>IRIT</affiliation></author>
@@ -1477,7 +1477,7 @@
     <paper id="125">
       <title><fixed-case>IRIT</fixed-case>_<fixed-case>IRIS</fixed-case>_<fixed-case>A</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2023 Task 6: Legal Rhetorical Role Labeling Supported by Dynamic-Filled Contextualized Sentence Chunks</title>
       <author><first>Alexandre Gomes de</first><last>Lima</last><affiliation>Federal Institute of Rio Grande do Norte</affiliation></author>
-      <author><first>Jose G.</first><last>Moreno</last><affiliation>Paul Sabatier University - IRIT</affiliation></author>
+      <author id="jose-g-moreno"><first>Jose G.</first><last>Moreno</last><affiliation>Paul Sabatier University - IRIT</affiliation></author>
       <author><first>Eduardo</first><last>H. da S. Aranha</last><affiliation>Federal University of Rio Grande do Norte</affiliation></author>
       <pages>905-912</pages>
       <abstract>This work presents and evaluates an approach to efficiently leverage the context exploitation ability of pre-trained Transformer models as a way of boosting the performance of models tackling the Legal Rhetorical Role Labeling task. The core idea is to feed the model with sentence chunks that are assembled in a way that avoids the insertion of padding tokens and the truncation of sentences and, hence, obtain better sentence embeddings. The achieved results show that our proposal is efficient, despite its simplicity, since models based on it overcome strong baselines by 3.76% in the worst case and by 8.71% in the best case.</abstract>
@@ -1488,8 +1488,8 @@
     <paper id="126">
       <title>Togedemaru at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2023 Task 8: Causal Medical Claim Identification and Extraction from Social Media Posts</title>
       <author><first>Andra</first><last>Oica</last><affiliation>Faculty of Computer Science, Alexandru Ioan Cuza University of Iasi, Romania</affiliation></author>
-      <author><first>Daniela</first><last>Gifu</last><affiliation>“Alexandru Ioan Cuza” University of Iasi, Faculty of Computer Science &amp; Romanian Academy - Iasi branch, Institute of Computer Science</affiliation></author>
-      <author><first>Diana</first><last>Trandabat</last><affiliation>University Alexandru Ioan Cuza of Iasi, Romania</affiliation></author>
+      <author id="daniela-gifu"><first>Daniela</first><last>Gifu</last><affiliation>“Alexandru Ioan Cuza” University of Iasi, Faculty of Computer Science &amp; Romanian Academy - Iasi branch, Institute of Computer Science</affiliation></author>
+      <author id="diana-trandabat"><first>Diana</first><last>Trandabat</last><affiliation>University Alexandru Ioan Cuza of Iasi, Romania</affiliation></author>
       <pages>913-921</pages>
       <abstract>The “Causal Medical Claim Identification and Extraction from Social Media Posts task at SemEval 2023 competition focuses on identifying and validating medical claims in English, by posing two subtasks on causal claim identification and PIO (Population, Intervention, Outcome) frame extraction. In the context of SemEval, we present a method for sentence classification in four categories (claim, experience, experience_based_claim or a question) based on BioBERT model with a MLP layer. The website from which the dataset was gathered, Reddit, is a social news and content discussion site. The evaluation results show the effectiveness of the solution of this study (83.68%).</abstract>
       <url hash="deca4360">2023.semeval-1.126</url>
@@ -1591,7 +1591,7 @@
       <title>University at Buffalo at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2023 Task 11: <fixed-case>MASDA</fixed-case>–Modelling Annotator Sensibilities through <fixed-case>D</fixed-case>is<fixed-case>A</fixed-case>ggregation</title>
       <author><first>Michael</first><last>Sullivan</last><affiliation>University at Buffalo</affiliation></author>
       <author><first>Mohammed</first><last>Yasin</last><affiliation>University at Buffalo</affiliation></author>
-      <author><first>Cassandra L.</first><last>Jacobs</last><affiliation>University at Buffalo</affiliation></author>
+      <author id="cassandra-l-jacobs"><first>Cassandra L.</first><last>Jacobs</last><affiliation>University at Buffalo</affiliation></author>
       <pages>978-985</pages>
       <abstract>Modeling the most likely label when an annotation task is perspective-dependent discards relevant sources of variation that come from the annotators themselves. We present three approaches to modeling the controversiality of a particular text. First, we explicitly represented annotators using annotator embeddings to predict the training signals of each annotator’s selections in addition to a majority class label. This method leads to reduction in error relative to models without these features, allowing the overall result to influence the weights of each annotator on the final prediction. In a second set of experiments, annotators were not modeled individually but instead annotator judgments were combined in a pairwise fashion that allowed us to implicitly combine annotators. Overall, we found that aggregating and explicitly comparing annotators’ responses to a static document representation produced high-quality predictions in all datasets, though some systems struggle to account for large or variable numbers of annotators.</abstract>
       <url hash="1cc559d4">2023.semeval-1.135</url>
@@ -1603,8 +1603,8 @@
       <title><fixed-case>SINAI</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2023 Task 10: Leveraging Emotions, Sentiments, and Irony Knowledge for Explainable Detection of Online Sexism</title>
       <author><first>M. Estrella</first><last>Vallecillo Rodríguez</last></author>
       <author><first>Flor Miriam</first><last>Plaza-del-Arco</last></author>
-      <author><first>L. Alfonso</first><last>Ureña-López</last></author>
-      <author><first>M. Teresa</first><last>Martín-Valdivia</last></author>
+      <author id="l-alfonso-urena-lopez"><first>L. Alfonso</first><last>Ureña-López</last></author>
+      <author id="m-teresa-martin-valdivia"><first>M. Teresa</first><last>Martín-Valdivia</last></author>
       <pages>986-994</pages>
       <abstract>This paper describes the participation of SINAI research team in the Explainable Detection of Online Sexism (EDOS) Shared Task at SemEval 2023. Specifically, we participate in subtask A (binary sexism detection), subtask B (category of sexism), and subtask C (fine-grained vector of sexism). For the three subtasks, we propose a system that integrates information related to emotions, sentiments, and irony in order to check whether these features help detect sexism content. Our team ranked 46th in subtask A, 37th in subtask B, and 29th in subtask C, achieving 0.8245, 0.6043, and 0.4376 of macro f1-score, respectively, among the participants.</abstract>
       <url hash="36bc71a4">2023.semeval-1.136</url>
@@ -1794,8 +1794,8 @@
       <author><first>Viorica-Camelia</first><last>Lupancu</last><affiliation>Faculty of Computer Science, Alexandru Ioan Cuza University of Iasi</affiliation></author>
       <author><first>Alexandru-Gabriel</first><last>Platica</last><affiliation>Faculty of Computer Science, Alexandru Ioan Cuza University of Iasi</affiliation></author>
       <author><first>Cristian-Mihai</first><last>Rosu</last><affiliation>Faculty of Computer Science, Alexandru Ioan Cuza University of Iasi</affiliation></author>
-      <author><first>Daniela</first><last>Gifu</last><affiliation>Institute of Computer Science, Romanian Academy - Iasi Branch</affiliation></author>
-      <author><first>Diana</first><last>Trandabat</last><affiliation>Faculty of Computer Science, Alexandru Ioan Cuza University of Iasi</affiliation></author>
+      <author id="daniela-gifu"><first>Daniela</first><last>Gifu</last><affiliation>Institute of Computer Science, Romanian Academy - Iasi Branch</affiliation></author>
+      <author id="diana-trandabat"><first>Diana</first><last>Trandabat</last><affiliation>Faculty of Computer Science, Alexandru Ioan Cuza University of Iasi</affiliation></author>
       <pages>1107-1113</pages>
       <abstract>This task focuses on identifying complex named entities (NEs) in several languages. In the context of SemEval-2023 competition, our team presents an exploration of a base transformer model’s capabilities regarding the task, focused more specifically on five languages (English, Spanish, Swedish, German, Italian). We take DistilBERT and BERT as two examples of basic transformer models, using DistilBERT as a baseline and BERT as the platform to create an improved model. The dataset that we are using, MultiCoNER II, is a large multilingual dataset used for NER, that covers domains like: Wiki sentences, questions and search queries across 12 languages. This dataset contains 26M tokens and it is assembled from public resources. MultiCoNER II defines a NER tag-set with 6 classes and 67 tags. We have managed to get moderate results in the English track (we ranked 17th out of 34), while our results in the other tracks could be further improved in the future (overall third to last).</abstract>
       <url hash="2683a647">2023.semeval-1.153</url>
@@ -1858,7 +1858,7 @@
     </paper>
     <paper id="159">
       <title><fixed-case>NLP</fixed-case>eople at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2023 Task 2: A Staged Approach for Multilingual Named Entity Recognition</title>
-      <author><first>Mohab</first><last>Elkaref</last><affiliation>IBM Research Europe</affiliation></author>
+      <author id="mohab-el-karef"><first>Mohab</first><last>Elkaref</last><affiliation>IBM Research Europe</affiliation></author>
       <author><first>Nathan</first><last>Herr</last><affiliation>IBM</affiliation></author>
       <author><first>Shinnosuke</first><last>Tanaka</last><affiliation>IBM Research Europe</affiliation></author>
       <author><first>Geeth</first><last>De Mel</last><affiliation>IBM</affiliation></author>
@@ -1874,7 +1874,7 @@
       <author><first>Patchipulusu</first><last>Sindhu</last><affiliation>Student</affiliation></author>
       <author><first>Diya</first><last>Gupta</last><affiliation>Student</affiliation></author>
       <author><first>Sanjeevi</first><last>Meghana</last><affiliation>Student</affiliation></author>
-      <author><first>Anand</first><last>Kumar M</last><affiliation>Assistant Professor</affiliation></author>
+      <author id="anand-kumar-m"><first>Anand</first><last>Kumar M</last><affiliation>Assistant Professor</affiliation></author>
       <pages>1154-1160</pages>
       <abstract>The ability to automatically recognise the rhetorical roles of sentences in a legal case judgement is a crucial challenge to tackle since it can be useful for a number of activities that come later, such as summarising legal judgements and doing legal searches. The task is exigent since legal case documents typically lack structure, and their rhetorical roles could be subjective. This paper describes SemEval-2023 Task 6: LegalEval: Understanding Legal Texts, Sub-task A: Rhetorical Roles Prediction (RR). We propose a system to automatically generate rhetorical roles of all the sentences in a legal case document using Hierarchical Bi-LSTM CRF model and RoBERTa transformer. We also showcase different techniques used to manipulate dataset to generate a set of varying embeddings and train the Hierarchical Bi-LSTM CRF model to achieve better performance. Among all, model trained with the sent2vec embeddings concatenated with the handcrafted features perform better with the micro f1-score of 0.74 on test data.</abstract>
       <url hash="ce156297">2023.semeval-1.160</url>
@@ -1942,7 +1942,7 @@
     </paper>
     <paper id="166">
       <title><fixed-case>JUAGE</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2023 Task 10: Parameter Efficient Classification</title>
-      <author><first>Jeffrey</first><last>Sorensen</last><affiliation>Google Jigsaw</affiliation></author>
+      <author id="jeffrey-sorensen"><first>Jeffrey</first><last>Sorensen</last><affiliation>Google Jigsaw</affiliation></author>
       <author><first>Katerina</first><last>Korre</last><affiliation>University of Bologna</affiliation></author>
       <author><first>John</first><last>Pavlopoulos</last><affiliation>Athens University of Economics and Business</affiliation></author>
       <author><first>Katrin</first><last>Tomanek</last><affiliation>Google Inc</affiliation></author>
@@ -2016,7 +2016,7 @@
       <author><first>Maram</first><last>Hasanain</last></author>
       <author><first>Ahmed Oumar</first><last>El-Shangiti</last></author>
       <author><first>Rabindra Nath</first><last>Nandi</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Firoj</first><last>Alam</last></author>
       <pages>1237-1244</pages>
       <abstract>Misinformation spreading in mainstream and social media has been misleading users in different ways. Manual detection and verification efforts by journalists and fact-checkers can no longer cope with the great scale and quick spread of misleading information. This motivated research and industry efforts to develop systems for analyzing and verifying news spreading online. The SemEval-2023 Task 3 is an attempt to address several subtasks under this overarching problem, targeting writing techniques used in news articles to affect readers’ opinions. The task addressed three subtasks with six languages, in addition to three “surprise” test languages, resulting in 27 different test setups. This paper describes our participating system to this task. Our team is one of the 6 teams that successfully submitted runs for all setups. The official results show that our system is ranked among the top 3 systems for 10 out of the 27 setups.</abstract>
@@ -2095,7 +2095,7 @@
       <author><first>Junzhe</first><last>Zhao</last><affiliation>Newcastle University</affiliation></author>
       <author><first>Yingxi</first><last>Wang</last><affiliation>Newcastle University</affiliation></author>
       <author><first>Nicolay</first><last>Rusnachenko</last><affiliation>Newcastle University</affiliation></author>
-      <author><first>Huizhi</first><last>Liang</last><affiliation>Newcastle University</affiliation></author>
+      <author id="huizhi-liang"><first>Huizhi</first><last>Liang</last><affiliation>Newcastle University</affiliation></author>
       <pages>1282-1286</pages>
       <abstract>Named Entity Recognition (NER) is a subtask of Natural Language Processing (NLP) that involves identifying and categorizing named entities. The result annotation makes unstructured natural texts applicable for other NLP tasks, including information retrieval, question answering, and machine translation. NER is also essential in legal as an initial stage in extracting relevant entities. However, legal texts contain domain-specific named entities, such as applicants, defendants, courts, statutes, and articles. The latter makes standard named entity recognizers incompatible with legal documents. This paper proposes an approach combining multiple models’ results via a voting mechanism for unique entity identification in legal texts. This endeavor focuses on extracting legal named entities, and the specific assignment (task B) is to create a legal NER system for unique entity annotation in legal documents. The results of our experiments and system implementation are published in <url>https://github.com/SuperEDG/Legal_Project</url>.</abstract>
       <url hash="fc0c4d8e">2023.semeval-1.178</url>
@@ -2122,7 +2122,7 @@
       <author><first>Anup</first><last>Roy</last><affiliation>IIT Kanpur</affiliation></author>
       <author><first>Shubham Kumar</first><last>Mishra</last><affiliation>Indian Institute of Technology,Kanpur</affiliation></author>
       <author><first>Arnab</first><last>Bhattacharya</last><affiliation>Dept. of Computer Science and Engineering, IIT Kanpur</affiliation></author>
-      <author id="saptarshi-ghosh"><first>Saptarshi</first><last>Ghosh</last><affiliation>IIT Kharagpur</affiliation></author>
+      <author><first>Saptarshi</first><last>Ghosh</last><affiliation>IIT Kharagpur</affiliation></author>
       <author><first>Kripabandhu</first><last>Ghosh</last><affiliation>Indian Institute of Science Education and Research- Kolkata (IISER-K)</affiliation></author>
       <pages>1293-1303</pages>
       <abstract>This paper describes our submission to the SemEval-2023 for Task 6 on LegalEval: Understanding Legal Texts. Our submission concentrated on three subtasks: Legal Named Entity Recognition (L-NER) for Task-B, Legal Judgment Prediction (LJP) for Task-C1, and Court Judgment Prediction with Explanation (CJPE) for Task-C2. We conducted various experiments on these subtasks and presented the results in detail, including data statistics and methodology. It is worth noting that legal tasks, such as those tackled in this research, have been gaining importance due to the increasing need to automate legal analysis and support. Our team obtained competitive rankings of 15th, 11th, and 1st in Task-B, Task-C1, and Task-C2, respectively, as reported on the leaderboard.</abstract>
@@ -2148,7 +2148,7 @@
       <author><first>Sana</first><last>Al-azzawi</last><affiliation>Lulea University of Technology</affiliation></author>
       <author><first>Atnafu Lambebo</first><last>Tonja</last><affiliation>Instituto Politcnico Nacional (IPN), Centro de Investigacin en Computacin (CIC)</affiliation></author>
       <author><first>Iyanuoluwa</first><last>Shode</last><affiliation>Montclair State University</affiliation></author>
-      <author><first>Jesujoba</first><last>Alabi</last><affiliation>Saarland University</affiliation></author>
+      <author id="jesujoba-alabi"><first>Jesujoba</first><last>Alabi</last><affiliation>Saarland University</affiliation></author>
       <author><first>Ayodele</first><last>Awokoya</last><affiliation>University of Ibadan</affiliation></author>
       <author><first>Mardiyyah</first><last>Oduwole</last><affiliation>ML Collective | Masakhane | Cohere4AI</affiliation></author>
       <author><first>Tosin</first><last>Adewumi</last><affiliation>Lule University of Technology</affiliation></author>
@@ -2211,7 +2211,7 @@
       <author><first>Philipp</first><last>Heinisch</last><affiliation>Bielefeld University</affiliation></author>
       <author><first>Moritz</first><last>Plenz</last><affiliation>Heidelberg University</affiliation></author>
       <author><first>Anette</first><last>Frank</last><affiliation>Heidelberg University</affiliation></author>
-      <author><first>Philipp</first><last>Cimiano</last><affiliation>Univ. Bielefeld</affiliation></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last><affiliation>Univ. Bielefeld</affiliation></author>
       <pages>1347-1358</pages>
       <abstract>This paper describes the system and experimental results of an ensemble-based approach tomultilingual framing detection for the submission of the ACCEPT team to the SemEval-2023 Task 3 on Framing Detection (Subtask 2). The approach is based on an ensemble that combines three different methods: a classifier based on large language models, a classifier based on static word embeddings, and an approach that uses external commonsense knowledge graphs, in particular, ConceptNet. The results of the three classification heads are aggregated into an overall prediction for each frame class. Our best submission yielded a micro F1-score of 50.69% (rank 10) and a macro F1-score of 50.20% (rank 3) for English articles. Our experimental results show that static word embeddings and knowledge graphs are useful components for frame detection, while the ensemble of all three methods combines the strengths of our three proposed methods. Through system ablations, we show that the commonsenseguided knowledge graphs are the outperforming method for many languages.</abstract>
       <url hash="a3b598d5">2023.semeval-1.187</url>
@@ -2473,7 +2473,7 @@
       <author><first>Maksim</first><last>Aparovich</last><affiliation>Brno University of Technology, Faculty of Information Technology</affiliation></author>
       <author><first>Santosh</first><last>Kesiraju</last><affiliation>Brno University of Technology</affiliation></author>
       <author><first>Aneta</first><last>Dufkova</last><affiliation>Brno University of Technology</affiliation></author>
-      <author><first>Pavel</first><last>Smrz</last><affiliation>Brno University of Technology</affiliation></author>
+      <author id="pavel-smrz"><first>Pavel</first><last>Smrz</last><affiliation>Brno University of Technology</affiliation></author>
       <pages>1518-1524</pages>
       <abstract>This paper presents our proposed method for SemEval-2023 Task 12, which focuses on sentiment analysis for low-resource African languages. Our method utilizes a language-centric domain adaptation approach which is based on adversarial training, where a small version of Afro-XLM-Roberta serves as a generator model and a feed-forward network as a discriminator. We participated in all three subtasks: monolingual (12 tracks), multilingual (1 track), and zero-shot (2 tracks). Our results show an improvement in weighted F1 for 13 out of 15 tracks with a maximum increase of 4.3 points for Moroccan Arabic compared to the baseline. We observed that using language family-based labels along with sequence-level input representations for the discriminator model improves the quality of the cross-lingual sentiment analysis for the languages unseen during the training. Additionally, our experimental results suggest that training the system on languages that are close in a language families tree enhances the quality of sentiment analysis for low-resource languages. Lastly, the computational complexity of the prediction step was kept at the same level which makes the approach to be interesting from a practical perspective. The code of the approach can be found in our repository.</abstract>
       <url hash="ba1067f6">2023.semeval-1.209</url>
@@ -2930,7 +2930,7 @@
     </paper>
     <paper id="247">
       <title>Jus Mundi at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2023 Task 6: Using a Frustratingly Easy Domain Adaption for a Legal Named Entity Recognition System</title>
-      <author><first>Luis Adrián</first><last>Cabrera-Diego</last><affiliation>Jus Mundi</affiliation></author>
+      <author id="luis-adrian-cabrera-diego"><first>Luis Adrián</first><last>Cabrera-Diego</last><affiliation>Jus Mundi</affiliation></author>
       <author><first>Akshita</first><last>Gheewala</last><affiliation>Jus Mundi</affiliation></author>
       <pages>1783-1790</pages>
       <abstract>In this work, we present a Named Entity Recognition (NER) system that was trained using a Frustratingly Easy Domain Adaptation (FEDA) over multiple legal corpora. The goal was to create a NER capable of detecting 14 types of legal named entities in Indian judgments. Besides the FEDA architecture, we explored a method based on overlapping context and averaging tensors to process long input texts, which can be beneficial when processing legal documents. The proposed NER reached an F1-score of 0.9007 in the sub-task B of Semeval-2023 Task 6, Understanding Legal Texts.</abstract>
@@ -2992,7 +2992,7 @@
     <paper id="252">
       <title><fixed-case>F</fixed-case>i<fixed-case>RC</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2023 Task 10: Fine-grained Classification of Online Sexism Content Using <fixed-case>D</fixed-case>e<fixed-case>BERT</fixed-case>a</title>
       <author><first>Fadi</first><last>Hassan</last><affiliation>Huawei Technologies Oy., Finland</affiliation></author>
-      <author><first>Abdessalam</first><last>Bouchekif</last><affiliation>Huawei Technologies Oy., Finland</affiliation></author>
+      <author id="abdessalam-bouchekif"><first>Abdessalam</first><last>Bouchekif</last><affiliation>Huawei Technologies Oy., Finland</affiliation></author>
       <author><first>Walid</first><last>Aransa</last><affiliation>Huawei</affiliation></author>
       <pages>1824-1832</pages>
       <abstract>The SemEval 2023 shared task 10 “Explainable Detection of Online Sexism” focuses on detecting and identifying comments and tweets containing sexist expressions and also explaining why it is sexist. This paper describes our system that we used to participate in this shared task. Our model is an ensemble of different variants of fine tuned DeBERTa models that employs a k-fold cross-validation. We have participated in the three tasks A, B and C. Our model ranked 2 nd position in tasks A, 7 th in task B and 4 th in task C.</abstract>
@@ -3197,7 +3197,7 @@
       <author><first>Soroush</first><last>Gooran</last><affiliation>AI Group, Computer Engineering Department, Sharif University of Technology, Tehran, Iran</affiliation></author>
       <author><first>Ehsaneddin</first><last>Asgari</last><affiliation>University of California, Berkeley</affiliation></author>
       <author><first>Hamid Reza</first><last>Rabiee</last><affiliation>Professor of Computer Engineering, Sharif University of Technology</affiliation></author>
-      <author><first>Hossein</first><last>Sameti</last><affiliation>Associate Professor, Sharif University of Technology</affiliation></author>
+      <author id="hossein-sameti"><first>Hossein</first><last>Sameti</last><affiliation>Associate Professor, Sharif University of Technology</affiliation></author>
       <pages>1960-1964</pages>
       <url hash="e17d5360">2023.semeval-1.269</url>
       <bibkey>taghavi-etal-2023-ebhaam</bibkey>
@@ -3218,7 +3218,7 @@
     <paper id="271">
       <title>Attention at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2023 Task 10: Explainable Detection of Online Sexism (<fixed-case>EDOS</fixed-case>)</title>
       <author><first>Debashish</first><last>Roy</last><affiliation>IIIT Hyderabad</affiliation></author>
-      <author><first>Manish</first><last>Shrivastava</last><affiliation>International Institute of Information Technology Hyderabad</affiliation></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last><affiliation>International Institute of Information Technology Hyderabad</affiliation></author>
       <pages>1972-1977</pages>
       <abstract>In this paper, we have worked on explainability and understanding of the decisions made by models in the form of classification tasks. The task is divided into 3 subtasks. The first task consists of determining Binary Sexism Detection. The second task describes the Category of Sexism. The third task describes a more Fine-grained Category of Sexism. Our work explores solving these tasks as a classification problem by fine-tuning transformer-based architecture. We have performed several experiments with our architecture, including combining multiple transformers, using domain adaptive pretraining on the unlabelled dataset provided by Reddit and Gab, Joint learning, and taking different layers of transformers as input to a classification head. Our system (with the team name Attention’) was able to achieve a macro F1 score of 0.839 for task A, 0.5835 macro F1 score for task B and 0.3356 macro F1 score for task C at the Codalab SemEval Competition. Later we improved the accuracy of Task B to 0.6228 and Task C to 0.3693 in the test set.</abstract>
       <url hash="c4c200f1">2023.semeval-1.271</url>
@@ -3267,8 +3267,8 @@
       <author><first>Olesya</first><last>Razuvayevskaya</last><affiliation>The University of Sheffield</affiliation></author>
       <author><first>Freddy</first><last>Heppell</last><affiliation>The University of Sheffield</affiliation></author>
       <author><first>João A.</first><last>Leite</last><affiliation>The University of Sheffield</affiliation></author>
-      <author><first>Carolina</first><last>Scarton</last><affiliation>University of Sheffield</affiliation></author>
-      <author><first>Kalina</first><last>Bontcheva</last><affiliation>University of Sheffield</affiliation></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last><affiliation>University of Sheffield</affiliation></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last><affiliation>University of Sheffield</affiliation></author>
       <author><first>Xingyi</first><last>Song</last><affiliation>University of Sheffield</affiliation></author>
       <pages>1995-2008</pages>
       <abstract>This paper describes our approach for SemEval- 2023 Task 3: Detecting the category, the fram- ing, and the persuasion techniques in online news in a multilingual setup. For Subtask 1 (News Genre), we propose an ensemble of fully trained and adapter mBERT models which was ranked joint-first for German, and had the high- est mean rank of multi-language teams. For Subtask 2 (Framing), we achieved first place in 3 languages, and the best average rank across all the languages, by using two separate ensem- bles: a monolingual RoBERTa-MUPPETLARGE and an ensemble of XLM-RoBERTaLARGE with adapters and task adaptive pretraining. For Sub- task 3 (Persuasion Techniques), we trained a monolingual RoBERTa-Base model for English and a multilingual mBERT model for the re- maining languages, which achieved top 10 for all languages, including 2nd for English. For each subtask, we compared monolingual and multilingual approaches, and considered class imbalance techniques.</abstract>
@@ -3378,7 +3378,7 @@
     <paper id="283">
       <title>Team <fixed-case>T</fixed-case>he<fixed-case>S</fixed-case>yllogist at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2023 Task 3: Language-Agnostic Framing Detection in Multi-Lingual Online News: A Zero-Shot Transfer Approach</title>
       <author><first>Osama</first><last>Mohammed Afzal</last><affiliation>MBZUAI</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <pages>2058-2061</pages>
       <abstract>We describe our system for SemEval-2022 Task 3 subtask 2 which on detecting the frames used in a news article in a multi-lingual setup. We propose a multi-lingual approach based on machine translation of the input, followed by an English prediction model. Our system demonstrated good zero-shot transfer capability, achieving micro-F1 scores of 53% for Greek (4th on the leaderboard) and 56.1% for Georgian (3rd on the leaderboard), without any prior training on translated data for these languages. Moreover, our system achieved comparable performance on seven other languages, including German, English, French, Russian, Italian, Polish, and Spanish. Our results demonstrate the feasibility of creating a language-agnostic model for automatic framing detection in online news.</abstract>
       <url hash="f039dde2">2023.semeval-1.283</url>
@@ -3533,7 +3533,7 @@
       <author><first>Giridhar Kaushik</first><last>Ramachandran</last><affiliation>George Mason University</affiliation></author>
       <author><first>Haritha</first><last>Gangavarapu</last><affiliation>CGI Inc</affiliation></author>
       <author><first>Kevin</first><last>Lybarger</last><affiliation>George Mason University</affiliation></author>
-      <author><first>Ozlem</first><last>Uzuner</last><affiliation>George Mason University</affiliation></author>
+      <author id="ozlem-uzuner"><first>Ozlem</first><last>Uzuner</last><affiliation>George Mason University</affiliation></author>
       <pages>2143-2152</pages>
       <abstract>In online forums like Reddit, users share their experiences with medical conditions and treatments, including making claims, asking questions, and discussing the effects of treatments on their health. Building systems to understand this information can effectively monitor the spread of misinformation and verify user claims. The Task-8 of the 2023 International Workshop on Semantic Evaluation focused on medical applications, specifically extracting patient experience- and medical condition-related entities from user posts on social media. The Reddit Health Online Talk (RedHot) corpus contains posts from medical condition-related subreddits with annotations characterizing the patient experience and medical conditions. In Subtask-1, patient experience is characterized by personal experience, questions, and claims. In Subtask-2, medical conditions are characterized by population, intervention, and outcome. For the automatic extraction of patient experiences and medical condition information, as a part of the challenge, we proposed language-model-based extraction systems that ranked $3ˆ{rd}$ on both subtasks’ leaderboards. In this work, we describe our approach and, in addition, explore the automatic extraction of this information using domain-specific language models and the inclusion of external knowledge.</abstract>
       <url hash="53610e87">2023.semeval-1.296</url>
@@ -3668,7 +3668,7 @@
       <author><first>Hannah</first><last>Frost</last><affiliation>University of Manchester</affiliation></author>
       <author><first>Paul</first><last>O’regan</last><affiliation>Cancer Research UK</affiliation></author>
       <author><first>Donal</first><last>Landers</last><affiliation>Digital Experimental Cancer Medicine Team</affiliation></author>
-      <author><first>André</first><last>Freitas</last><affiliation>University of Manchester</affiliation></author>
+      <author id="andre-freitas"><first>André</first><last>Freitas</last><affiliation>University of Manchester</affiliation></author>
       <pages>2216-2226</pages>
       <abstract>This paper describes the results of SemEval 2023 task 7 – Multi-Evidence Natural Language Inference for Clinical Trial Data (NLI4CT) – consisting of 2 tasks, a Natural Language Inference (NLI) task, and an evidence selection task on clinical trial data. The proposed challenges require multi-hop biomedical and numerical reasoning, which are of significant importance to the development of systems capable of large-scale interpretation and retrieval of medical evidence, to provide personalized evidence-based care. Task 1, the entailment task, received 643 submissions from 40 participants, and Task 2, the evidence selection task, received 364 submissions from 23 participants. The tasks are challenging, with the majority of submitted systems failing to significantly outperform the majority class baseline on the entailment task, and we observe significantly better performance on the evidence selection task than on the entailment task. Increasing the number of model parameters leads to a direct increase in performance, far more significant than the effect of biomedical pre-training. Future works could explore the limitations of large models for generalization and numerical inference, and investigate methods to augment clinical datasets to allow for more rigorous testing and to facilitate fine-tuning. We envisage that the dataset, models, and results of this task will be useful to the biomedical NLI and evidence retrieval communities. The dataset, competition leaderboard, and website are publicly available.</abstract>
       <url hash="5f27832f">2023.semeval-1.307</url>
@@ -3681,7 +3681,7 @@
       <author><first>Alessandro</first><last>Raganato</last><affiliation>University of Milano-Bicocca</affiliation></author>
       <author><first>Iacer</first><last>Calixto</last><affiliation>University of Amsterdam</affiliation></author>
       <author><first>Asahi</first><last>Ushio</last><affiliation>Cardiff University</affiliation></author>
-      <author><first>Jose</first><last>Camacho-Collados</last><affiliation>Cardiff University</affiliation></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last><affiliation>Cardiff University</affiliation></author>
       <author><first>Mohammad Taher</first><last>Pilehvar</last><affiliation>Tehran Institute for Advanced Studies</affiliation></author>
       <pages>2227-2234</pages>
       <url hash="9af354fe">2023.semeval-1.308</url>
@@ -3712,7 +3712,7 @@
       <author><first>Sudipta</first><last>Kar</last><affiliation>Amazon Alexa AI</affiliation></author>
       <author id="zhiyu-chen-lehigh"><first>Zhiyu</first><last>Chen</last><affiliation>Amazon</affiliation></author>
       <author><first>Oleg</first><last>Rokhlenko</last><affiliation>Amazon Research</affiliation></author>
-      <author><first>Shervin</first><last>Malmasi</last><affiliation>Amazon</affiliation></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last><affiliation>Amazon</affiliation></author>
       <pages>2247-2265</pages>
       <abstract>We present the findings of SemEval-2023 Task 2 on Fine-grained Multilingual Named Entity Recognition (MultiCoNER 2). Divided into 13 tracks, the task focused on methods to identify complex fine-grained named entities (like WRITTENWORK, VEHICLE, MUSICALGRP) across 12 languages, in both monolingual and multilingual scenarios, as well as noisy settings. The task used the MultiCoNER V2 dataset, composed of 2.2 million instances in Bangla, Chinese, English, Farsi, French, German, Hindi, Italian., Portuguese, Spanish, Swedish, and Ukrainian. MultiCoNER 2 was one of the most popular tasks of SemEval-2023. It attracted 842 submissions from 47 teams, and 34 teams submitted system papers. Results showed that complex entity types such as media titles and product names were the most challenging. Methods fusing external knowledge into transformer models achieved the best performance, and the largest gains were on the Creative Work and Group classes, which are still challenging even with external knowledge. Some fine-grained classes proved to be more challenging than others, such as SCIENTIST, ARTWORK, and PRIVATECORP. We also observed that noisy data has a significant impact on model performance, with an average drop of 10% on the noisy subset. The task highlights the need for future research on improving NER robustness on noisy data containing complex entities.</abstract>
       <url hash="4658a4ef">2023.semeval-1.310</url>
@@ -3724,7 +3724,7 @@
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2023 Task 8: Causal Medical Claim Identification and Related <fixed-case>PIO</fixed-case> Frame Extraction from Social Media Posts</title>
       <author><first>Vivek</first><last>Khetan</last><affiliation>Accenture Labs</affiliation></author>
       <author><first>Somin</first><last>Wadhwa</last><affiliation>Northeastern University</affiliation></author>
-      <author><first>Byron</first><last>Wallace</last><affiliation>Northeastern University</affiliation></author>
+      <author id="byron-c-wallace"><first>Byron</first><last>Wallace</last><affiliation>Northeastern University</affiliation></author>
       <author><first>Silvio</first><last>Amir</last><affiliation>Northeastern University</affiliation></author>
       <pages>2266-2274</pages>
       <abstract>Identification of medical claims from user-generated text data is an onerous but essential step for various tasks including content moderation, and hypothesis generation. SemEval-2023 Task 8 is an effort towards building those capabilities and motivating further research in this direction. This paper summarizes the details and results of shared task 8 at SemEval-2023 which involved identifying causal medical claims and extracting related Populations, Interventions, and Outcomes (“PIO”) frames from social media (Reddit) text. This shared task comprised two subtasks: (1) Causal claim identification; and (2) PIO frame extraction. In total, seven teams participated in the task. Of the seven, six provided system descriptions which we summarize here. For the first subtask, the best approach yielded a macro-averaged F-1 score of 78.40, and for the second subtask, the best approach achieved token-level F-1 scores of 40.55 for Populations, 49.71 for Interventions, and 30.08 for Outcome frames.</abstract>
@@ -3770,10 +3770,10 @@
       <author><first>Dina</first><last>Almanea</last><affiliation>Queen Mary University of London</affiliation></author>
       <author><first>Valerio</first><last>Basile</last><affiliation>University of Turin</affiliation></author>
       <author><first>Tommaso</first><last>Fornaciari</last><affiliation>Italian National Police</affiliation></author>
-      <author><first>Barbara</first><last>Plank</last><affiliation>LMU Munich</affiliation></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last><affiliation>LMU Munich</affiliation></author>
       <author><first>Verena</first><last>Rieser</last><affiliation>Heriot-Watt University</affiliation></author>
       <author><first>Alexandra</first><last>Uma</last><affiliation>Connex One</affiliation></author>
-      <author><first>Massimo</first><last>Poesio</last><affiliation>Queen Mary University of London</affiliation></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last><affiliation>Queen Mary University of London</affiliation></author>
       <pages>2304-2318</pages>
       <abstract>NLP datasets annotated with human judgments are rife with disagreements between the judges. This is especially true for tasks depending on subjective judgments such as sentiment analysis or offensive language detection. Particularly in these latter cases, the NLP community has come to realize that the common approach of reconciling’ these different subjective interpretations risks misrepresenting the evidence. Many NLP researchers have therefore concluded that rather than eliminating disagreements from annotated corpora, we should preserve themindeed, some argue that corpora should aim to preserve all interpretations produced by annotators. But this approach to corpus creation for NLP has not yet been widely accepted. The objective of the Le-Wi-Di series of shared tasks is to promote this approach to developing NLP models by providing a unified framework for training and evaluating with such datasets. We report on the second such shared task, which differs from the first edition in three crucial respects: (i) it focuses entirely on NLP, instead of both NLP and computer vision tasks in its first edition; (ii) it focuses on subjective tasks, instead of covering different types of disagreements as training with aggregated labels for subjective NLP tasks is in effect a misrepresentation of the data; and (iii) for the evaluation, we concentrated on soft approaches to evaluation. This second edition of Le-Wi-Di attracted a wide array of partici- pants resulting in 13 shared task submission papers.</abstract>
       <url hash="25d2fe3e">2023.semeval-1.314</url>
@@ -3786,11 +3786,11 @@
       <author><first>Shamsuddeen Hassan</first><last>Muhammad</last><affiliation>Bayero University, Kano</affiliation></author>
       <author><first>Idris</first><last>Abdulmumin</last><affiliation>Ahmadu Bello University, Zaria</affiliation></author>
       <author><first>Seid Muhie</first><last>Yimam</last><affiliation>Universitt Hamburg</affiliation></author>
-      <author><first>David Ifeoluwa</first><last>Adelani</last><affiliation>University College London</affiliation></author>
+      <author id="david-ifeoluwa-adelani"><first>David Ifeoluwa</first><last>Adelani</last><affiliation>University College London</affiliation></author>
       <author><first>Ibrahim Said</first><last>Ahmad</last><affiliation>Bayero University Kano</affiliation></author>
       <author><first>Nedjma</first><last>Ousidhoum</last><affiliation>The University of Cambridge</affiliation></author>
       <author><first>Abinew Ali</first><last>Ayele</last><affiliation>Bahir Dar University</affiliation></author>
-      <author><first>Saif</first><last>Mohammad</last><affiliation>NRC</affiliation></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last><affiliation>NRC</affiliation></author>
       <author><first>Meriem</first><last>Beloucif</last><affiliation>Uppsala University</affiliation></author>
       <author><first>Sebastian</first><last>Ruder</last><affiliation>Google</affiliation></author>
       <pages>2319-2337</pages>
@@ -3804,7 +3804,7 @@
       <title><fixed-case>ITTC</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val 2023-Task 7: Document Retrieval and Sentence Similarity for Evidence Retrieval in Clinical Trial Data</title>
       <author><first>Rahmad</first><last>Mahendra</last><affiliation>Universitas Indonesia</affiliation></author>
       <author><first>Damiano</first><last>Spina</last><affiliation>RMIT University</affiliation></author>
-      <author><first>Karin</first><last>Verspoor</last><affiliation>RMIT University</affiliation></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last><affiliation>RMIT University</affiliation></author>
       <pages>2338-2342</pages>
       <abstract>This paper describes the submissions of the Natural Language Processing (NLP) team from the Australian Research Council Industrial Transformation Training Centre (ITTC) for Cognitive Computing in Medical Technologies to the SemEval 2023 Task 7, i.e., multi-evidence natural language inference for clinical trial data (NLI4CT). More specifically, we were working on subtask 2 whose objective is to identify the relevant parts of the premise from clinical trial report that justify the truth of information in the statement. We approach the evidence retrieval problem as a document retrieval and sentence similarity task. Our results show that the task poses some challenges which involve dealing with complex sentences and implicit evidences.</abstract>
       <url hash="8d45d087">2023.semeval-1.316</url>
@@ -3816,7 +3816,7 @@
       <author><first>Jakub</first><last>Piskorski</last><affiliation>Polish Academy of Sciences</affiliation></author>
       <author><first>Nicolas</first><last>Stefanovitch</last><affiliation>Joint Research Centre</affiliation></author>
       <author><first>Giovanni</first><last>Da San Martino</last><affiliation>University of Padova</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <pages>2343-2361</pages>
       <abstract>We describe SemEval-2023 task 3 on Detecting the Category, the Framing, and the Persuasion Techniques in Online News in a Multilingual Setup: the dataset, the task organization process, the evaluation setup, the results, and the participating systems. The task focused on news articles in nine languages (six known to the participants upfront: English, French, German, Italian, Polish, and Russian), and three additional ones revealed to the participants at the testing phase: Spanish, Greek, and Georgian). The task featured three subtasks: (1) determining the genre of the article (opinion, reporting, or satire), (2) identifying one or more frames used in an article from a pool of 14 generic frames, and (3) identify the persuasion techniques used in each paragraph of the article, using a taxonomy of 23 persuasion techniques. This was a very popular task: a total of 181 teams registered to participate, and 41 eventually made an official submission on the test set.</abstract>
       <url hash="b121c480">2023.semeval-1.317</url>
diff --git a/data/xml/2023.sicon.xml b/data/xml/2023.sicon.xml
index f3e9bf5f3a..6bf5e05d07 100644
--- a/data/xml/2023.sicon.xml
+++ b/data/xml/2023.sicon.xml
@@ -32,7 +32,7 @@
       <title>Detoxifying Online Discourse: A Guided Response Generation Approach for Reducing Toxicity in User-Generated Text</title>
       <author><first>Ritwik</first><last>Bose</last><affiliation>Knox College</affiliation></author>
       <author><first>Ian</first><last>Perera</last><affiliation>The Institute for Human &amp; Machine Cognition</affiliation></author>
-      <author><first>Bonnie</first><last>Dorr</last><affiliation>University of Florida</affiliation></author>
+      <author id="bonnie-dorr"><first>Bonnie</first><last>Dorr</last><affiliation>University of Florida</affiliation></author>
       <pages>9-14</pages>
       <abstract>The expression of opinions, stances, and moral foundations on social media often coincide with toxic, divisive, or inflammatory language that can make constructive discourse across communities difficult. Natural language generation methods could provide a means to reframe or reword such expressions in a way that fosters more civil discourse, yet current Large Language Model (LLM) methods tend towards language that is too generic or formal to seem authentic for social media discussions. We present preliminary work on training LLMs to maintain authenticity while presenting a community’s ideas and values in a constructive, non-toxic manner.</abstract>
       <url hash="ee8d24da">2023.sicon-1.2</url>
@@ -57,7 +57,7 @@
     </paper>
     <paper id="4">
       <title>What Makes a Good Counter-Stereotype? Evaluating Strategies for Automated Responses to Stereotypical Text</title>
-      <author><first>Kathleen</first><last>Fraser</last><affiliation>National Research Council Canada</affiliation></author>
+      <author id="kathleen-c-fraser"><first>Kathleen</first><last>Fraser</last><affiliation>National Research Council Canada</affiliation></author>
       <author><first>Svetlana</first><last>Kiritchenko</last><affiliation>National Research Council Canada</affiliation></author>
       <author><first>Isar</first><last>Nejadgholi</last></author>
       <author><first>Anna</first><last>Kerkhof</last></author>
@@ -81,7 +81,7 @@
     </paper>
     <paper id="6">
       <title>Measuring Lexico-Semantic Alignment in Debates with Contextualized Word Representations</title>
-      <author><first>Aina</first><last>Garí Soler</last><affiliation>Télécom-Paris</affiliation></author>
+      <author id="aina-gari-soler"><first>Aina</first><last>Garí Soler</last><affiliation>Télécom-Paris</affiliation></author>
       <author><first>Matthieu</first><last>Labeau</last><affiliation>Télécom ParisTech</affiliation></author>
       <author><first>Chloé</first><last>Clavel</last><affiliation>Télécom ParisTech and Télécom Paris</affiliation></author>
       <pages>50-63</pages>
diff --git a/data/xml/2023.sigdial.xml b/data/xml/2023.sigdial.xml
index ad0c335b1b..b7b4f5fd98 100644
--- a/data/xml/2023.sigdial.xml
+++ b/data/xml/2023.sigdial.xml
@@ -4,10 +4,10 @@
     <meta>
       <booktitle>Proceedings of the 24th Annual Meeting of the Special Interest Group on Discourse and Dialogue</booktitle>
       <editor><first>Svetlana</first><last>Stoyanchev</last></editor>
-      <editor><first>Shafiq</first><last>Joty</last></editor>
+      <editor id="shafiq-joty"><first>Shafiq</first><last>Joty</last></editor>
       <editor><first>David</first><last>Schlangen</last></editor>
       <editor><first>Ondrej</first><last>Dusek</last></editor>
-      <editor><first>Casey</first><last>Kennington</last></editor>
+      <editor id="casey-kennington"><first>Casey</first><last>Kennington</last></editor>
       <editor><first>Malihe</first><last>Alikhani</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Prague, Czechia</address>
@@ -45,7 +45,7 @@
     </paper>
     <paper id="3">
       <title>What’s Hard in <fixed-case>E</fixed-case>nglish <fixed-case>RST</fixed-case> Parsing? Predictive Models for Error Analysis</title>
-      <author><first>Yang Janet</first><last>Liu</last></author>
+      <author id="yang-janet-liu"><first>Yang Janet</first><last>Liu</last></author>
       <author><first>Tatsuya</first><last>Aoyama</last></author>
       <author><first>Amir</first><last>Zeldes</last></author>
       <pages>31–42</pages>
@@ -68,7 +68,7 @@
     <paper id="5">
       <title>A Statistical Approach for Quantifying Group Difference in Topic Distributions Using Clinical Discourse Samples</title>
       <author><first>Grace O.</first><last>Lawley</last></author>
-      <author><first>Peter A.</first><last>Heeman</last></author>
+      <author id="peter-a-heeman"><first>Peter A.</first><last>Heeman</last></author>
       <author><first>Jill K.</first><last>Dolata</last></author>
       <author><first>Eric</first><last>Fombonne</last></author>
       <author><first>Steven</first><last>Bedrick</last></author>
@@ -92,7 +92,7 @@
     <paper id="7">
       <title>Dial-<fixed-case>M</fixed-case>: A Masking-based Framework for Dialogue Evaluation</title>
       <author><first>Suvodip</first><last>Dey</last></author>
-      <author><first>Maunendra Sankar</first><last>Desarkar</last></author>
+      <author id="maunendra-sankar-desarkar"><first>Maunendra Sankar</first><last>Desarkar</last></author>
       <pages>77–84</pages>
       <abstract>In dialogue systems, automatically evaluating machine-generated responses is critical and challenging. Despite the tremendous progress in dialogue generation research, its evaluation heavily depends on human judgments. The standard word-overlapping based evaluation metrics are ineffective for dialogues. As a result, most of the recently proposed metrics are model-based and reference-free, which learn to score different aspects of a conversation. However, understanding each aspect requires a separate model, which makes them computationally expensive. To this end, we propose Dial-M, a Masking-based reference-free framework for Dialogue evaluation. The main idea is to mask the keywords of the current utterance and predict them, given the dialogue history and various conditions (like knowledge, persona, etc.), thereby making the evaluation framework simple and easily extensible for multiple datasets. Regardless of its simplicity, Dial-M achieves comparable performance to state-of-the-art metrics on several dialogue evaluation datasets. We also discuss the interpretability of our proposed metric along with error analysis.</abstract>
       <url hash="344b77e3">2023.sigdial-1.7</url>
@@ -103,13 +103,13 @@
       <title>From Chatter to Matter: Addressing Critical Steps of Emotion Recognition Learning in Task-oriented Dialogue</title>
       <author><first>Shutong</first><last>Feng</last></author>
       <author><first>Nurul</first><last>Lubis</last></author>
-      <author><first>Benjamin</first><last>Ruppik</last></author>
+      <author id="benjamin-matthias-ruppik"><first>Benjamin</first><last>Ruppik</last></author>
       <author><first>Christian</first><last>Geishauser</last></author>
       <author><first>Michael</first><last>Heck</last></author>
       <author><first>Hsien-chin</first><last>Lin</last></author>
       <author><first>Carel</first><last>van Niekerk</last></author>
       <author><first>Renato</first><last>Vukovic</last></author>
-      <author><first>Milica</first><last>Gasic</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gasic</last></author>
       <pages>85–103</pages>
       <abstract>Emotion recognition in conversations (ERC) is a crucial task for building human-like conversational agents. While substantial efforts have been devoted to ERC for chit-chat dialogues, the task-oriented counterpart is largely left unattended. Directly applying chit-chat ERC models to task-oriented dialogues (ToDs) results in suboptimal performance as these models overlook key features such as the correlation between emotions and task completion in ToDs. In this paper, we propose a framework that turns a chit-chat ERC model into a task-oriented one, addressing three critical aspects: data, features and objective. First, we devise two ways of augmenting rare emotions to improve ERC performance. Second, we use dialogue states as auxiliary features to incorporate key information from the goal of the user. Lastly, we leverage a multi-aspect emotion definition in ToDs to devise a multi-task learning objective and a novel emotion-distance weighted loss function. Our framework yields significant improvements for a range of chit-chat ERC models on EmoWOZ, a large-scale dataset for user emotions in ToDs. We further investigate the generalisability of the best resulting model to predict user satisfaction in different ToD datasets. A comparison with supervised baselines shows a strong zero-shot capability, highlighting the potential usage of our framework in wider scenarios.</abstract>
       <url hash="bee946dc">2023.sigdial-1.8</url>
@@ -143,7 +143,7 @@
     <paper id="11">
       <title>Towards Multilingual Automatic Open-Domain Dialogue Evaluation</title>
       <author><first>John</first><last>Mendonca</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <author><first>Isabel</first><last>Trancoso</last></author>
       <pages>130–141</pages>
       <abstract>The main limiting factor in the development of robust multilingual open-domain dialogue evaluation metrics is the lack of multilingual data and the limited availability of open-sourced multilingual dialogue systems. In this work, we propose a workaround for this lack of data by leveraging a strong multilingual pretrained encoder-based Language Model and augmenting existing English dialogue data using Machine Translation. We empirically show that the naive approach of finetuning a pretrained multilingual encoder model with translated data is insufficient to outperform the strong baseline of finetuning a multilingual model with only source data. Instead, the best approach consists in the careful curation of translated data using MT Quality Estimation metrics, excluding low quality translations that hinder its performance.</abstract>
@@ -155,7 +155,7 @@
       <title>Dialog Action-Aware Transformer for Dialog Policy Learning</title>
       <author><first>Huimin</first><last>Wang</last></author>
       <author><first>Wai Chung</first><last>Kwan</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <pages>142–148</pages>
       <abstract>Recent works usually address Dialog policy learning DPL by training a reinforcement learning (RL) agent to determine the best dialog action. However, existing works on deep RL require a large volume of agent-user interactions to achieve acceptable performance. In this paper, we propose to make full use of the plain text knowledge from the pre-trained language model to accelerate the RL agent’s learning speed. Specifically, we design a dialog action-aware transformer encoder (DaTrans), which integrates a new fine-tuning procedure named masked last action task to encourage DaTrans to be dialog-aware and distill action-specific features. Then, DaTrans is further optimized in an RL setting with ongoing interactions and evolves through exploration in the dialog action space toward maximizing long-term accumulated rewards. The effectiveness and efficiency of the proposed model are demonstrated with both simulator evaluation and human evaluation.</abstract>
       <url hash="20aeb91f">2023.sigdial-1.12</url>
@@ -200,7 +200,7 @@
       <author><first>Javier</first><last>Chiyah-Garcia</last></author>
       <author><first>Alessandro</first><last>Suglia</last></author>
       <author><first>Arash</first><last>Eshghi</last></author>
-      <author><first>Helen</first><last>Hastie</last></author>
+      <author id="helen-hastie"><first>Helen</first><last>Hastie</last></author>
       <pages>175–182</pages>
       <abstract>Referential ambiguities arise in dialogue when a referring expression does not uniquely identify the intended referent for the addressee. Addressees usually detect such ambiguities immediately and work with the speaker to repair it using meta-communicative, Clarificational Exchanges (CE): a Clarification Request (CR) and a response. Here, we argue that the ability to generate and respond to CRs imposes specific constraints on the architecture and objective functions of multi-modal, visually grounded dialogue models. We use the SIMMC 2.0 dataset to evaluate the ability of different state-of-the-art model architectures to process CEs, with a metric that probes the contextual updates that arise from them in the model. We find that language-based models are able to encode simple multi-modal semantic information and process some CEs, excelling with those related to the dialogue history, whilst multi-modal models can use additional learning objectives to obtain disentangled object representations, which become crucial to handle complex referential ambiguities across modalities overall.</abstract>
       <url hash="17d8f74d">2023.sigdial-1.16</url>
@@ -211,7 +211,7 @@
       <title><fixed-case>PGT</fixed-case>ask: Introducing the Task of Profile Generation from Dialogues</title>
       <author><first>Rui</first><last>Ribeiro</last></author>
       <author><first>Joao Paulo</first><last>Carvalho</last></author>
-      <author><first>Luisa</first><last>Coheur</last></author>
+      <author id="luisa-coheur"><first>Luisa</first><last>Coheur</last></author>
       <pages>183–189</pages>
       <abstract>Recent approaches have attempted to personalize dialogue systems by leveraging profile information into models. However, this knowledge is scarce and difficult to obtain, which makes the extraction/generation of profile information from dialogues a fundamental asset. To surpass this limitation, we introduce the Profile Generation Task (PGTask). We contribute with a new dataset for this problem, comprising profile sentences aligned with related utterances, extracted from a corpus of dialogues. Furthermore, using state-of-the-art methods, we provide a benchmark for profile generation on this novel dataset. Our experiments disclose the challenges of profile generation, and we hope that this introduces a new research direction.</abstract>
       <url hash="dadbdc5c">2023.sigdial-1.17</url>
@@ -221,7 +221,7 @@
     <paper id="18">
       <title>Question Generation to Elicit Users’ Food Preferences by Considering the Semantic Content</title>
       <author><first>Jie</first><last>Zeng</last></author>
-      <author><first>Yukiko</first><last>Nakano</last></author>
+      <author id="yukiko-i-nakano"><first>Yukiko</first><last>Nakano</last></author>
       <author><first>Tatsuya</first><last>Sakato</last></author>
       <pages>190–196</pages>
       <abstract>To obtain a better understanding of user preferences in providing tailored services, dialogue systems have to generate semi-structured interviews that require flexible dialogue control while following a topic guide to accomplish the purpose of the interview. Toward this goal, this study proposes a semantics-aware GPT-3 fine-tuning model that generates interviews to acquire users’ food preferences. The model was trained using dialogue history and semantic representation constructed from the communicative function and semantic content of the utterance. Using two baseline models: zero-shot ChatGPT and fine-tuned GPT-3, we conducted a user study for subjective evaluations alongside automatic objective evaluations. In the user study, in impression rating, the outputs of the proposed model were superior to those of baseline models and comparable to real human interviews in terms of eliciting the interviewees’ food preferences.</abstract>
@@ -254,7 +254,7 @@
       <title>Leveraging Large Language Models for Automated Dialogue Analysis</title>
       <author><first>Sarah E.</first><last>Finch</last></author>
       <author><first>Ellie S.</first><last>Paek</last></author>
-      <author><first>Jinho D.</first><last>Choi</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
       <pages>202–215</pages>
       <abstract>Developing high-performing dialogue systems benefits from the automatic identification of undesirable behaviors in system responses. However, detecting such behaviors remains challenging, as it draws on a breadth of general knowledge and understanding of conversational practices. Although recent research has focused on building specialized classifiers for detecting specific dialogue behaviors, the behavior coverage is still incomplete and there is a lack of testing on real-world human-bot interactions. This paper investigates the ability of a state-of-the-art large language model (LLM), ChatGPT-3.5, to perform dialogue behavior detection for nine categories in real human-bot dialogues. We aim to assess whether ChatGPT can match specialized models and approximate human performance, thereby reducing the cost of behavior detection tasks. Our findings reveal that neither specialized models nor ChatGPT have yet achieved satisfactory results for this task, falling short of human performance. Nevertheless, ChatGPT shows promising potential and often outperforms specialized detection models. We conclude with an in-depth examination of the prevalent shortcomings of ChatGPT, offering guidance for future research to enhance LLM capabilities.</abstract>
       <url hash="79093a5d">2023.sigdial-1.20</url>
@@ -333,7 +333,7 @@
     <paper id="27">
       <title>Syndicom: Improving Conversational Commonsense with Error-Injection and Natural Language Feedback</title>
       <author><first>Christopher</first><last>Richardson</last></author>
-      <author><first>Anirudh</first><last>Sundar</last></author>
+      <author id="anirudh-sundar"><first>Anirudh</first><last>Sundar</last></author>
       <author><first>Larry</first><last>Heck</last></author>
       <pages>297–308</pages>
       <abstract>Commonsense reasoning is a critical aspect of human communication. Despite recent advances in conversational AI driven by large language models, commonsense reasoning remains a challenging task. In this work, we introduce Syndicom - a method for improving commonsense in dialogue response generation. Syndicom consists of two components. The first component is a dataset composed of commonsense dialogues created from a knowledge graph and synthesized into natural language. This dataset includes both valid and invalid responses to dialogue contexts, along with natural language feedback (NLF) for the invalid responses. The second contribution is a two-step procedure: training a model to predict natural language feedback (NLF) for invalid responses, and then training a response generation model conditioned on the predicted NLF, the invalid response, and the dialogue. Syndicom is scalable and does not require reinforcement learning. Empirical results on three tasks are evaluated using a broad range of metrics. Syndicom achieves a relative improvement of 53% over ChatGPT on ROUGE-1, and human evaluators prefer Syndicom over ChatGPT 57% of the time. We will publicly release the code and the full dataset.</abstract>
@@ -348,11 +348,11 @@
       <author><first>Seokhwan</first><last>Kim</last></author>
       <author><first>Di</first><last>Jin</last></author>
       <author><first>Devamanyu</first><last>Hazarika</last></author>
-      <author><first>Alexandros</first><last>Papangelis</last></author>
+      <author id="alexandros-papangelis"><first>Alexandros</first><last>Papangelis</last></author>
       <author><first>Behnam</first><last>Hedayatnia</last></author>
       <author><first>Mahdi</first><last>Namazifar</last></author>
       <author id="yang-liu-icsi"><first>Yang</first><last>Liu</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></author>
       <pages>309–323</pages>
       <abstract>Task-oriented Dialogue (TOD) Systems aim to build dialogue systems that assist users in accomplishing specific goals, such as booking a hotel or a restaurant. Traditional TODs rely on domain-specific APIs/DBs or external factual knowledge to generate responses, which cannot accommodate subjective user requests (e.g.,”Is the WIFI reliable?” or “Does the restaurant have a good atmosphere?”). To address this issue, we propose a novel task of subjective-knowledge-based TOD (SK-TOD). We also propose the first corresponding dataset, which contains subjective knowledge-seeking dialogue contexts and manually annotated responses grounded in subjective knowledge sources. When evaluated with existing TOD approaches, we find that this task poses new challenges such as aggregating diverse opinions from multiple knowledge snippets. We hope this task and dataset can promote further research on TOD and subjective content understanding. The code and the dataset are available at https://github.com/alexa/dstc11-track5.</abstract>
       <url hash="7787c3f4">2023.sigdial-1.28</url>
@@ -383,7 +383,7 @@
     </paper>
     <paper id="31">
       <title>A New Dataset for Causality Identification in Argumentative Texts</title>
-      <author><first>Khalid</first><last>Al Khatib</last></author>
+      <author id="khalid-al-khatib"><first>Khalid</first><last>Al Khatib</last></author>
       <author><first>Michael</first><last>Voelske</last></author>
       <author><first>Anh</first><last>Le</last></author>
       <author><first>Shahbaz</first><last>Syed</last></author>
@@ -401,7 +401,7 @@
       <author><first>Kartik</first><last>Agarwal</last></author>
       <author><first>Juraj</first><last>Juraska</last></author>
       <author><first>Utkarsh</first><last>Garg</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <pages>355–369</pages>
       <abstract>Dialogue systems need to produce responses that realize multiple types of dialogue acts (DAs) with high semantic fidelity. In the past, natural language generators (NLGs) for dialogue were trained on large parallel corpora that map from a domain-specific DA and its semantic attributes to an output utterance. Recent work shows that pretrained language models (LLMs) offer new possibilities for controllable NLG using prompt-based learning. Here we develop a novel few-shot overgenerate-and-rank approach that achieves the controlled generation of DAs. We compare eight few-shot prompt styles that include a novel method of generating from textual pseudo-references using a textual style transfer approach. We develop six automatic ranking functions that identify outputs with both the correct DA and high semantic accuracy at generation time. We test our approach on three domains and four LLMs. To our knowledge, this is the first work on NLG for dialogue that automatically ranks outputs using both DA and attribute accuracy. For completeness, we compare our results to fine-tuned few-shot models trained with 5 to 100 instances per DA. Our results show that several prompt settings achieve perfect DA accuracy, and near perfect semantic accuracy (99.81%) and perform better than few-shot fine-tuning.</abstract>
       <url hash="0819a538">2023.sigdial-1.32</url>
@@ -464,7 +464,7 @@
     <paper id="37">
       <title>Prompting, Retrieval, Training: An exploration of different approaches for task-oriented dialogue generation</title>
       <author><first>Gonçalo</first><last>Raposo</last></author>
-      <author><first>Luisa</first><last>Coheur</last></author>
+      <author id="luisa-coheur"><first>Luisa</first><last>Coheur</last></author>
       <author><first>Bruno</first><last>Martins</last></author>
       <pages>400–412</pages>
       <abstract>Task-oriented dialogue systems need to generate appropriate responses to help fulfill users’ requests. This paper explores different strategies, namely prompting, retrieval, and fine-tuning, for task-oriented dialogue generation. Through a systematic evaluation, we aim to provide valuable insights and guidelines for researchers and practitioners working on developing efficient and effective dialogue systems for real-world applications. Evaluation is performed on the MultiWOZ and Taskmaster-2 datasets, and we test various versions of FLAN-T5, GPT-3.5, and GPT-4 models. Costs associated with running these models are analyzed, and dialogue evaluation is briefly discussed. Our findings suggest that when testing data differs from the training data, fine-tuning may decrease performance, favoring a combination of a more general language model and a prompting mechanism based on retrieved examples.</abstract>
@@ -479,8 +479,8 @@
       <author><first>Subhankar</first><last>Chakraborty</last></author>
       <author><first>Andrew</first><last>Perrault</last></author>
       <author><first>William</first><last>Schuler</last></author>
-      <author><first>Eric</first><last>Fosler-Lussier</last></author>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="eric-fosler-lussier"><first>Eric</first><last>Fosler-Lussier</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <pages>413–420</pages>
       <abstract>Creating conversational systems for niche domains is a challenging task, further exacerbated by a lack of quality datasets. We explore the construction of safer conversational systems for guiding patients in preparing for colonoscopies. This has required a data generation pipeline to generate a minimum viable dataset to bootstrap a semantic parser, augmented by automatic paraphrasing. Our study suggests large language models (e.g., GPT-3.5 and GPT-4) are a viable alternative to crowd sourced paraphrasing, but conversational systems that rely upon language models’ ability to do temporal reasoning struggle to provide accurate responses. A neural-symbolic system that performs temporal reasoning on an intermediate representation of user queries shows promising results compared to an end-to-end dialogue system, improving the number of correct responses while vastly reducing the number of incorrect or misleading ones.</abstract>
       <url hash="22c90b02">2023.sigdial-1.38</url>
@@ -560,7 +560,7 @@
       <author><first>Hoang</first><last>Nguyen</last></author>
       <author><first>Chenwei</first><last>Zhang</last></author>
       <author><first>Ye</first><last>Liu</last></author>
-      <author><first>Philip</first><last>Yu</last></author>
+      <author id="philip-s-yu"><first>Philip</first><last>Yu</last></author>
       <pages>470–481</pages>
       <abstract>Recent advanced methods in Natural Language Understanding for Task-oriented Dialogue (TOD) Systems (e.g., intent detection and slot filling) require a large amount of annotated data to achieve competitive performance. In reality, token-level annotations (slot labels) are time-consuming and difficult to acquire. In this work, we study the Slot Induction (SI) task whose objective is to induce slot boundaries without explicit knowledge of token-level slot annotations. We propose leveraging Unsupervised Pre-trained Language Model (PLM) Probing and Contrastive Learning mechanism to exploit (1) unsupervised semantic knowledge extracted from PLM, and (2) additional sentence-level intent label signals available from TOD. Our approach is shown to be effective in SI task and capable of bridging the gaps with token-level supervised models on two NLU benchmark datasets. When generalized to emerging intents, our SI objectives also provide enhanced slot label representations, leading to improved performance on the Slot Filling tasks.</abstract>
       <url hash="db1acca5">2023.sigdial-1.44</url>
@@ -621,7 +621,7 @@
     <paper id="49">
       <title>Incorporating Annotator Uncertainty into Representations of Discourse Relations</title>
       <author><first>S. Magalí</first><last>López Cortez</last></author>
-      <author><first>Cassandra L.</first><last>Jacobs</last></author>
+      <author id="cassandra-l-jacobs"><first>Cassandra L.</first><last>Jacobs</last></author>
       <pages>530–537</pages>
       <abstract>Annotation of discourse relations is a known difficult task, especially for non-expert annotators. In this paper, we investigate novice annotators’ uncertainty on the annotation of discourse relations on spoken conversational data. We find that dialogue context (single turn, pair of turns within speaker, and pair of turns across speakers) is a significant predictor of confidence scores. We compute distributed representations of discourse relations from co-occurrence statistics that incorporate information about confidence scores and dialogue context. We perform a hierarchical clustering analysis using these representations and show that weighting discourse relation representations with information about confidence and dialogue context coherently models our annotators’ uncertainty about discourse relation labels.</abstract>
       <url hash="b86c1b9a">2023.sigdial-1.49</url>
@@ -633,11 +633,11 @@
       <author><first>Vishakh</first><last>Padmakumar</last></author>
       <author><first>Behnam</first><last>Hedayatnia</last></author>
       <author><first>Di</first><last>Jin</last></author>
-      <author><first>Patrick</first><last>Lange</last></author>
+      <author id="patrick-l-lange"><first>Patrick</first><last>Lange</last></author>
       <author><first>Seokhwan</first><last>Kim</last></author>
       <author><first>Nanyun</first><last>Peng</last></author>
       <author id="yang-liu-icsi"><first>Yang</first><last>Liu</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></author>
       <pages>538–547</pages>
       <abstract>The bulk of work adapting transformer models to open-domain dialogue represents dialogue context as the concatenated set of turns in natural language. However, it is unclear if this is the best approach. In this work, we investigate this question by means of an empirical controlled experiment varying the dialogue context format from text-only formats (all recent utterances, summaries, selected utterances) as well as variants that are more structurally different (triples, AMR). We compare these formats based on fine-tuned model performance on two downstream tasks—knowledge selection and response generation. We find that simply concatenating the utterances works as a strong baseline in most cases, but is outperformed in longer contexts by a hybrid approach of combining a summary of the context with recent utterances. Through empirical analysis, our work highlights the need to examine the format of context representation and offers recommendations on adapting general-purpose language models to dialogue tasks.</abstract>
       <url hash="439af716">2023.sigdial-1.50</url>
@@ -647,7 +647,7 @@
     <paper id="51">
       <title>C3: Compositional Counterfactual Contrastive Learning for Video-grounded Dialogues</title>
       <author><first>Hung</first><last>Le</last></author>
-      <author><first>Nancy</first><last>Chen</last></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last></author>
       <author><first>Steven C.H.</first><last>Hoi</last></author>
       <pages>548–561</pages>
       <abstract>Video-grounded dialogue systems aim to integrate video understanding and dialogue understanding to generate responses that are relevant to both the dialogue and video context. Most existing approaches employ deep learning models and have achieved remarkable performance, given the relatively small datasets available. However, the results are partially accomplished by exploiting biases in the datasets rather than developing multimodal reasoning, resulting in limited generalization. In this paper, we propose a novel approach of Compositional Counterfactual Contrastive Learning (C3) to develop contrastive training between factual and counterfactual samples in video-grounded dialogues. Specifically, we design factual/counterfactual samples based on the temporal steps in videos and tokens in dialogues and propose contrastive loss functions that exploit object-level or action-level variance. Different from prior approaches, we focus on contrastive hidden state representations among compositional output tokens to optimize the representation space in a generation setting. We achieved promising performance gains on the Audio-Visual Scene-Aware Dialogues (AVSD) benchmark and showed the benefits of our approach in grounding video and dialogue context.</abstract>
@@ -741,7 +741,7 @@
       <author><first>Sijia</first><last>Liu</last></author>
       <author><first>Nanyun</first><last>Peng</last></author>
       <author id="yang-liu-icsi"><first>Yang</first><last>Liu</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></author>
       <pages>615–631</pages>
       <abstract>Automatic Evaluation (AE) and Response Selection (RS) models assign quality scores to various candidate responses and rank them in conversational setups. Prior response ranking research compares various models’ performance on synthetically generated test sets. In this work, we investigate the performance of model-based reference-free AE and RS models on our constructed response ranking datasets that mirror real-case scenarios of ranking candidates during inference time. Metrics’ unsatisfying performance can be interpreted as their low generalizability over more pragmatic conversational domains such as human-chatbot dialogs. To alleviate this issue we propose a novel RS model called MERCY that simulates human behavior in selecting the best candidate by taking into account distinct candidates concurrently and learns to rank them. In addition, MERCY leverages natural language feedback as another component to help the ranking task by explaining why each candidate response is relevant/irrelevant to the dialog context. These feedbacks are generated by prompting large language models in a few-shot setup. Our experiments show the better performance of MERCY over baselines for the response ranking task in our curated realistic datasets.</abstract>
       <url hash="83e346a7">2023.sigdial-1.58</url>
diff --git a/data/xml/2023.sigmorphon.xml b/data/xml/2023.sigmorphon.xml
index c575e3e79e..aa64f37385 100644
--- a/data/xml/2023.sigmorphon.xml
+++ b/data/xml/2023.sigmorphon.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the 20th SIGMORPHON workshop on Computational Research in Phonetics, Phonology, and Morphology</booktitle>
       <editor><first>Garrett</first><last>Nicolai</last></editor>
       <editor><first>Eleanor</first><last>Chodroff</last></editor>
-      <editor><first>Frederic</first><last>Mailhot</last></editor>
+      <editor id="frederic-mailhot"><first>Frederic</first><last>Mailhot</last></editor>
       <editor><first>Çağrı</first><last>Çöltekin</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Toronto, Canada</address>
@@ -32,8 +32,8 @@
     <paper id="3">
       <title>Evaluating Cross Lingual Transfer for Morphological Analysis: a Case Study of <fixed-case>I</fixed-case>ndian Languages</title>
       <author><first>Siddhesh</first><last>Pawar</last><affiliation>Google</affiliation></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology Bombay and Patna</affiliation></author>
-      <author><first>Partha</first><last>Talukdar</last><affiliation>Google Research and IISc</affiliation></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology Bombay and Patna</affiliation></author>
+      <author id="partha-talukdar"><first>Partha</first><last>Talukdar</last><affiliation>Google Research and IISc</affiliation></author>
       <pages>14-26</pages>
       <abstract>Recent advances in pretrained multilingual models such as Multilingual T5 (mT5) have facilitated cross-lingual transfer by learning shared representations across languages. Leveraging pretrained multilingual models for scaling morphology analyzers to low-resource languages is a unique opportunity that has been under-explored so far. We investigate this line of research in the context of Indian languages, focusing on two important morphological sub-tasks: root word extraction and tagging morphosyntactic descriptions (MSD), viz., gender, number, and person (GNP). We experiment with six Indian languages from two language families (Dravidian and Indo-Aryan) to train a multilingual morphology analyzers for the first time for Indian languages. We demonstrate the usability of multilingual models for few-shot cross-lingual transfer through an average 7% increase in GNP tagging in a cross-lingual setting as compared to a monolingual setting through controlled experiments. We provide an overview of the state of the datasets available related to our tasks and point-out a few modeling limitations due to datasets. Lastly, we analyze the cross-lingual transfer of morphological tags for verbs and nouns, which provides a proxy for the quality of representations of word markings learned by the model.</abstract>
       <url hash="fdd77252">2023.sigmorphon-1.3</url>
@@ -75,13 +75,13 @@
     </paper>
     <paper id="7">
       <title>Generalized Glossing Guidelines: An Explicit, Human- and Machine-Readable, Item-and-Process Convention for Morphological Annotation</title>
-      <author><first>David R.</first><last>Mortensen</last><affiliation>Language Technologies Institute, Carnegie Mellon University</affiliation></author>
+      <author id="david-r-mortensen"><first>David R.</first><last>Mortensen</last><affiliation>Language Technologies Institute, Carnegie Mellon University</affiliation></author>
       <author><first>Ela</first><last>Gulsen</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Taiqi</first><last>He</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Nathaniel</first><last>Robinson</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Jonathan</first><last>Amith</last><affiliation>Gettysburg College</affiliation></author>
       <author><first>Lindia</first><last>Tjuatja</last><affiliation>Carnegie Mellon University</affiliation></author>
-      <author><first>Lori</first><last>Levin</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last><affiliation>Carnegie Mellon University</affiliation></author>
       <pages>58-67</pages>
       <abstract>Interlinear glossing provides a vital type of morphosyntactic annotation, both for linguists and language revitalists, and numerous conventions exist for representing it formally and computationally. Some of these formats are human readable; others are machine readable. Some are easy to edit with general-purpose tools. Few represent non-concatentative processes like infixation, reduplication, mutation, truncation, and tonal overwriting in a consistent and formally rigorous way (on par with affixation). We propose an annotation conventionâ€”Generalized Glossing Guidelines (GGG) that combines all of these positive properties using an Item-and-Process (IP) framework. We describe the format, demonstrate its linguistic adequacy, and compare it with two other interlinear glossed text annotation schemes.</abstract>
       <url hash="9f02f658">2023.sigmorphon-1.7</url>
@@ -222,12 +222,12 @@
     <paper id="20">
       <title>Findings of the <fixed-case>SIGMORPHON</fixed-case> 2023 Shared Task on Interlinear Glossing</title>
       <author><first>Michael</first><last>Ginn</last><affiliation>University of Colorado Boulder</affiliation></author>
-      <author><first>Sarah</first><last>Moeller</last><affiliation>University of Florida</affiliation></author>
+      <author id="sarah-moeller"><first>Sarah</first><last>Moeller</last><affiliation>University of Florida</affiliation></author>
       <author><first>Alexis</first><last>Palmer</last><affiliation>University of Colorado Boulder</affiliation></author>
       <author><first>Anna</first><last>Stacey</last><affiliation>University of British Columbia</affiliation></author>
       <author><first>Garrett</first><last>Nicolai</last><affiliation>University of British Columbia</affiliation></author>
       <author><first>Mans</first><last>Hulden</last><affiliation>University of Colorado Boulder</affiliation></author>
-      <author><first>Miikka</first><last>Silfverberg</last><affiliation>University of British Columbia</affiliation></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last><affiliation>University of British Columbia</affiliation></author>
       <pages>186-201</pages>
       <abstract>This paper presents the findings of the SIGMORPHON 2023 Shared Task on Interlinear Glossing. This first iteration of the shared task explores glossing of a set of six typologically diverse languages: Arapaho, Gitksan, Lezgi, Natügu, Tsez and Uspanteko. The shared task encompasses two tracks: a resource-scarce closed track and an open track, where participants are allowed to utilize external data resources. Five teams participated in the shared task. The winning team Tü-CL achieved a 23.99%-point improvement over a baseline RoBERTa system in the closed track and a 17.42%-point improvement in the open track.</abstract>
       <url hash="aa719d85">2023.sigmorphon-1.20</url>
@@ -250,9 +250,9 @@
       <author><first>Lindia</first><last>Tjuatja</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Nathaniel</first><last>Robinson</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Shinji</first><last>Watanabe</last><affiliation>Carnegie Mellon University</affiliation></author>
-      <author><first>David R.</first><last>Mortensen</last><affiliation>Language Technologies Institute, Carnegie Mellon University</affiliation></author>
+      <author id="david-r-mortensen"><first>David R.</first><last>Mortensen</last><affiliation>Language Technologies Institute, Carnegie Mellon University</affiliation></author>
       <author><first>Graham</first><last>Neubig</last><affiliation>Carnegie Mellon University</affiliation></author>
-      <author><first>Lori</first><last>Levin</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last><affiliation>Carnegie Mellon University</affiliation></author>
       <pages>209-216</pages>
       <abstract>In our submission to the SIGMORPHON 2023 Shared Task on interlinear glossing (IGT), we explore approaches to data augmentation and modeling across seven low-resource languages. For data augmentation, we explore two approaches: creating artificial data from the provided training data and utilizing existing IGT resources in other languages. On the modeling side, we test an enhanced version of the provided token classification baseline as well as a pretrained multilingual seq2seq model. Additionally, we apply post-correction using a dictionary for Gitksan, the language with the smallest amount of data. We find that our token classification models are the best performing, with the highest word-level accuracy for Arapaho and highest morpheme-level accuracy for Gitksan out of all submissions. We also show that data augmentation is an effective strategy, though applying artificial data pretraining has very different effects across both models tested.</abstract>
       <url hash="056e7b6b">2023.sigmorphon-1.22</url>
@@ -275,7 +275,7 @@
       <author><first>Ananya</first><last>Apparaju</last><affiliation>University of British Columbia</affiliation></author>
       <author><first>Jata</first><last>MacCabe</last><affiliation>University of British Columbia</affiliation></author>
       <author><first>Garrett</first><last>Nicolai</last><affiliation>University of British Columbia</affiliation></author>
-      <author><first>Miikka</first><last>Silfverberg</last><affiliation>University of British Columbia</affiliation></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last><affiliation>University of British Columbia</affiliation></author>
       <pages>222-229</pages>
       <abstract>This paper presents several different neural subword modelling based approaches to interlinear glossing for seven under-resourced languages as a part of the 2023 SIGMORPHON shared task on interlinear glossing. We experiment with various augmentation and tokenization strategies for both the open and closed tracks of data. We found that while byte-level models may perform well for greater amounts of data, character based approaches remain competitive in their performance in lower resource settings.</abstract>
       <url hash="f3f3de35">2023.sigmorphon-1.24</url>
@@ -284,7 +284,7 @@
     </paper>
     <paper id="27">
       <title>The <fixed-case>SIGMORPHON</fixed-case> 2022 Shared Task on Cross-lingual and Low-Resource Grapheme-to-Phoneme Conversion</title>
-      <author><first>Arya D.</first><last>McCarthy</last><affiliation>Johns Hopkins University</affiliation></author>
+      <author id="arya-d-mccarthy"><first>Arya D.</first><last>McCarthy</last><affiliation>Johns Hopkins University</affiliation></author>
       <author><first>Jackson L.</first><last>Lee</last><affiliation/></author>
       <author><first>Alexandra</first><last>DeLucia</last><affiliation>Johns Hopkins University</affiliation></author>
       <author><first>Travis</first><last>Bartley</last><affiliation>City University of New York</affiliation></author>
diff --git a/data/xml/2023.sigtyp.xml b/data/xml/2023.sigtyp.xml
index 266b785e8a..2ecac64a23 100644
--- a/data/xml/2023.sigtyp.xml
+++ b/data/xml/2023.sigtyp.xml
@@ -108,7 +108,7 @@
       <title>Corpus-based Syntactic Typological Methods for Dependency Parsing Improvement</title>
       <author><first>Diego</first><last>Alves</last><affiliation>University of Zagreb</affiliation></author>
       <author><first>Božo</first><last>Bekavac</last><affiliation>University of Zagreb</affiliation></author>
-      <author><first>Daniel</first><last>Zeman</last><affiliation>Charles University in Prague</affiliation></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last><affiliation>Charles University in Prague</affiliation></author>
       <author><first>Marko</first><last>Tadić</last><affiliation>University of Zagreb</affiliation></author>
       <pages>76-88</pages>
       <abstract>This article presents a comparative analysis of four different syntactic typological approaches applied to 20 different languages to determine the most effective one to be used for the improvement of dependency parsing results via corpora combination. We evaluated these strategies by calculating the correlation between the language distances and the empirical LAS results obtained when languages were combined in pairs. From the results, it was possible to observe that the best method is based on the extraction of word order patterns which happen inside subtrees of the syntactic structure of the sentences.</abstract>
@@ -169,7 +169,7 @@
       <author><first>Adrian</first><last>Doyle</last><affiliation>University of Galway</affiliation></author>
       <author><first>Theodorus</first><last>Fransen</last><affiliation>University of Galway</affiliation></author>
       <author><first>Bernardo</first><last>Stearns</last><affiliation>University of Galway</affiliation></author>
-      <author><first>John P.</first><last>McCrae</last><affiliation>University of Galway</affiliation></author>
+      <author id="john-philip-mccrae"><first>John P.</first><last>McCrae</last><affiliation>University of Galway</affiliation></author>
       <pages>126-131</pages>
       <abstract>This paper describes the structure and findings of the SIGTYP 2023 shared task on cognate and derivative detection for low-resourced languages, broken down into a supervised and unsupervised sub-task. The participants were asked to submit the test data’s final prediction. A total of nine teams registered for the shared task where seven teams registered for both sub-tasks. Only two participants ended up submitting system descriptions, with only one submitting systems for both sub-tasks. While all systems show a rather promising performance, all could be within the baseline score for the supervised sub-task. However, the system submitted for the unsupervised sub-task outperforms the baseline score.</abstract>
       <url hash="ca39677d">2023.sigtyp-1.13</url>
@@ -189,7 +189,7 @@
     </paper>
     <paper id="15">
       <title><fixed-case>C</fixed-case>o<fixed-case>T</fixed-case>o<fixed-case>H</fixed-case>i<fixed-case>L</fixed-case>i at <fixed-case>SIGTYP</fixed-case> 2023: Ensemble Models for Cognate and Derivative Words Detection</title>
-      <author><first>Liviu P.</first><last>Dinu</last><affiliation>University of Bucharest</affiliation></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last><affiliation>University of Bucharest</affiliation></author>
       <author><first>Ioan-Bogdan</first><last>Iordache</last><affiliation>University of Bucharest</affiliation></author>
       <author><first>Ana Sabina</first><last>Uban</last><affiliation>University of Bucharest</affiliation></author>
       <pages>137-142</pages>
@@ -202,7 +202,7 @@
       <title>Multilingual <fixed-case>BERT</fixed-case> has an Accent: Evaluating <fixed-case>E</fixed-case>nglish Influences on Fluency in Multilingual Models</title>
       <author><first>Isabel</first><last>Papadimitriou</last><affiliation>Stanford University</affiliation></author>
       <author><first>Kezia</first><last>Lopez</last><affiliation>Stanford University</affiliation></author>
-      <author><first>Dan</first><last>Jurafsky</last><affiliation>Stanford University</affiliation></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last><affiliation>Stanford University</affiliation></author>
       <pages>143-146</pages>
       <abstract>While multilingual language models can improve NLP performance on low-resource languages by leveraging higher-resource languages, they also reduce average performance on all languages (the ‘curse of multilinguality’). Here we show another problem with multilingual models: grammatical structures in higher-resource languages bleed into lower-resource languages, a phenomenon we call grammatical structure bias. We show this bias via a novel method for comparing the fluency of multilingual models to the fluency of monolingual Spanish and Greek models: testing their preference for two carefully-chosen variable grammatical structures (optional pronoun-drop in Spanish and optional Subject-Verb ordering in Greek). We find that multilingual BERT is biased toward the English-like setting (explicit pronouns and Subject-Verb-Object ordering) and against the default Spanish and Gerek settings, as compared to our monolingual control language model. With our case studies, we hope to bring to light the fine-grained ways in which multilingual models can be biased, and encourage more linguistically-aware fluency evaluation.</abstract>
       <url hash="bc920b07">2023.sigtyp-1.16</url>
@@ -228,7 +228,7 @@
       <title>Language-Agnostic Measures Discriminate Inflection and Derivation</title>
       <author><first>Coleman</first><last>Haley</last><affiliation>University of Edinburgh</affiliation></author>
       <author><first>Edoardo M.</first><last>Ponti</last><affiliation>University of Edinburgh</affiliation></author>
-      <author><first>Sharon</first><last>Goldwater</last><affiliation>University of Edinburgh</affiliation></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last><affiliation>University of Edinburgh</affiliation></author>
       <pages>150-152</pages>
       <abstract>In morphology, a distinction is commonly drawn between inflection and derivation. However, a precise definition of this distinction which captures the way the terms are used across languages remains elusive within linguistic theory, typically being based on subjective tests. In this study, we present 4 quantitative measures which use the statistics of a raw text corpus in a language to estimate how much and how variably a morphological construction changes aspects of the lexical entry, specifically, the word’s form and the word’s semantic and syntactic properties (as operationalised by distributional word embeddings). Based on a sample of 26 languages, we find that we can reconstruct 90% of the classification of constructions into inflection and derivation in Unimorph using our 4 measures, providing large-scale cross-linguistic evidence that the concepts of inflection and derivation are associated with measurable signatures in terms of form and distribution signatures that behave consistently across a variety of languages. Critically, our measures and models are entirely language-agnostic, yet perform well across all languages studied. We find that while there is a high degree of consistency in the use of the terms inflection and derivation in terms of our measures, there are still many constructions near the model’s decision boundary between the two categories, indicating a gradient, rather than categorical, distinction.</abstract>
       <url hash="a24fa999">2023.sigtyp-1.18</url>
diff --git a/data/xml/2023.socialnlp.xml b/data/xml/2023.socialnlp.xml
index 2606c189ed..fb0a872221 100644
--- a/data/xml/2023.socialnlp.xml
+++ b/data/xml/2023.socialnlp.xml
@@ -41,7 +41,7 @@
     <paper id="3">
       <title><fixed-case>O</fixed-case>ff<fixed-case>M</fixed-case>ix-3<fixed-case>L</fixed-case>: A Novel Code-Mixed Test Dataset in <fixed-case>B</fixed-case>angla-<fixed-case>E</fixed-case>nglish-<fixed-case>H</fixed-case>indi for Offensive Language Identification</title>
       <author><first>Dhiman</first><last>Goswami</last></author>
-      <author><first>Md Nishat</first><last>Raihan</last></author>
+      <author id="nishat-raihan"><first>Md Nishat</first><last>Raihan</last></author>
       <author><first>Antara</first><last>Mahmud</last></author>
       <author><first>Antonios</first><last>Anastasopoulos</last></author>
       <author><first>Marcos</first><last>Zampieri</last></author>
diff --git a/data/xml/2023.splurobonlp.xml b/data/xml/2023.splurobonlp.xml
index 3f54a5eda8..2c3533133e 100644
--- a/data/xml/2023.splurobonlp.xml
+++ b/data/xml/2023.splurobonlp.xml
@@ -22,7 +22,7 @@
     </frontmatter>
     <paper id="1">
       <title>Dialogue-based generation of self-driving simulation scenarios using Large Language Models</title>
-      <author><first>Antonio Valerio</first><last>Miceli Barone</last><affiliation>The University of Edinburgh</affiliation></author>
+      <author id="antonio-valerio-miceli-barone"><first>Antonio Valerio</first><last>Miceli Barone</last><affiliation>The University of Edinburgh</affiliation></author>
       <author><first>Craig</first><last>Innes</last><affiliation>University of Edinburgh</affiliation></author>
       <author><first>Alex</first><last>Lascarides</last><affiliation>University of Edinburgh</affiliation></author>
       <pages>1-12</pages>
diff --git a/data/xml/2023.starsem.xml b/data/xml/2023.starsem.xml
index f56c4d4cd1..949e1e018a 100644
--- a/data/xml/2023.starsem.xml
+++ b/data/xml/2023.starsem.xml
@@ -42,7 +42,7 @@
     <paper id="3">
       <title>Revisiting Syntax-Based Approach in Negation Scope Resolution</title>
       <author><first>Asahi</first><last>Yoshida</last><affiliation>Nagoya University</affiliation></author>
-      <author><first>Yoshihide</first><last>Kato</last><affiliation>Nagoya University</affiliation></author>
+      <author id="yoshihide-kato"><first>Yoshihide</first><last>Kato</last><affiliation>Nagoya University</affiliation></author>
       <author><first>Shigeki</first><last>Matsubara</last><affiliation>Nagoya University</affiliation></author>
       <pages>18-23</pages>
       <abstract>Negation scope resolution is the process of detecting the negated part of a sentence. Unlike the syntax-based approach employed in previous research, state-of-the-art methods performed better without the explicit use of syntactic structure. This work revisits the syntax-based approach and re-evaluates the effectiveness of syntactic structure in negation scope resolution. We replace the parser utilized in the prior works with state-of-the-art parsers and modify the syntax-based heuristic rules. The experimental results demonstrate that the simple modifications enhance the performance of the prior syntax-based method to the same level as state-of-the-art end-to-end neural-based methods.</abstract>
@@ -86,7 +86,7 @@
     <paper id="7">
       <title><fixed-case>CRAPES</fixed-case>:Cross-modal Annotation Projection for Visual Semantic Role Labeling</title>
       <author><first>Abhidip</first><last>Bhattacharyya</last><affiliation>University of Colorado Boulder</affiliation></author>
-      <author><first>Martha</first><last>Palmer</last><affiliation>University of Colorado</affiliation></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last><affiliation>University of Colorado</affiliation></author>
       <author><first>Christoffer</first><last>Heckman</last><affiliation>University of Colorado Boulder</affiliation></author>
       <pages>61-70</pages>
       <abstract>Automatic image comprehension is an important yet challenging task that includes identifying actions in an image and corresponding action participants. Most current approaches to this task, now termed Grounded Situation Recognition (GSR), start by predicting a verb that describes the action and then predict the nouns that can participate in the action as arguments to the verb. This problem formulation limits each image to a single action even though several actions could be depicted. In contrast, text-based Semantic Role Labeling (SRL) aims to label all actions in a sentence, typically resulting in at least two or three predicate argument structures per sentence. We hypothesize that expanding GSR to follow the more liberal SRL text-based approach to action and participant identification could improve image comprehension results. To test this hypothesis and to preserve generalization capabilities, we use general-purpose vision and language components as a front-end. This paper presents our results, a substantial 28.6 point jump in performance on the SWiG dataset, which confirm our hypothesis. We also discuss the benefits of loosely coupled broad-coverage off-the-shelf components which generalized well to out of domain images, and can decrease the need for manual image semantic role annotation.</abstract>
@@ -97,7 +97,7 @@
     <paper id="8">
       <title>Not All Counterhate Tweets Elicit the Same Replies: A Fine-Grained Analysis</title>
       <author><first>Abdullah</first><last>Albanyan</last><affiliation>University of North Texas</affiliation></author>
-      <author><first>Ahmed</first><last>Hassan</last><affiliation>University of Science and Technology at Zewailcity</affiliation></author>
+      <author id="ahmed-hassan"><first>Ahmed</first><last>Hassan</last><affiliation>University of Science and Technology at Zewailcity</affiliation></author>
       <author><first>Eduardo</first><last>Blanco</last><affiliation>University of Arizona</affiliation></author>
       <pages>71-88</pages>
       <abstract>Counterhate arguments can effectively fight and limit the spread of hate speech. However, they can also exacerbate the hate, as some people may respond with aggression if they feel threatened or targeted by the counterhate. In this paper, we investigate replies to counterhate arguments beyond whether the reply agrees or disagrees with the counterhate argument. We present a corpus with 2,621 replies to counterhate arguments countering hateful tweets, and annotate them with fine-grained characteristics. We show that (a) half of the replies (51%) to the counterhate arguments disagree with the argument, and (b) this kind of reply often supports the hateful tweet (40%). We also analyze the language of counterhate arguments that elicit certain types of replies. Experimental results show that it is feasible to anticipate the kind of replies a counterhate argument will elicit.</abstract>
@@ -119,9 +119,9 @@
     <paper id="10">
       <title>Language models are not naysayers: an analysis of language models on negation benchmarks</title>
       <author><first>Thinh Hung</first><last>Truong</last><affiliation>The University of Melbourne</affiliation></author>
-      <author><first>Timothy</first><last>Baldwin</last><affiliation>MBZUAI</affiliation></author>
-      <author><first>Karin</first><last>Verspoor</last><affiliation>RMIT University</affiliation></author>
-      <author><first>Trevor</first><last>Cohn</last><affiliation>University of Melbourne</affiliation></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last><affiliation>MBZUAI</affiliation></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last><affiliation>RMIT University</affiliation></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last><affiliation>University of Melbourne</affiliation></author>
       <pages>101-114</pages>
       <abstract>Negation has been shown to be a major bottleneck for masked language models, such as BERT. However, whether this finding still holds for larger-sized auto-regressive language models (“LLMs”) has not been studied comprehensively. With the ever-increasing volume of research and applications of LLMs, we take a step back to evaluate the ability of current-generation LLMs to handle negation, a fundamental linguistic phenomenon that is central to language understanding. We evaluate different LLMs - including the open-source GPT-neo, GPT-3, and InstructGPT - against a wide range of negation benchmarks. Through systematic experimentation with varying model sizes and prompts, we show that LLMs have several limitations including insensitivity to the presence of negation, an inability to capture the lexical semantics of negation, and a failure to reason under negation.</abstract>
       <url hash="8f4efa53">2023.starsem-1.10</url>
@@ -145,7 +145,7 @@
       <author><first>Junfeng</first><last>Jiang</last><affiliation>The University of Tokyo</affiliation></author>
       <author><first>Youmi</first><last>Ma</last><affiliation>Tokyo Institute of Technology</affiliation></author>
       <author><first>Ao</first><last>Liu</last><affiliation>Tokyo Institute of Technology</affiliation></author>
-      <author><first>Naoaki</first><last>Okazaki</last><affiliation>Tokyo Institute of Technology</affiliation></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last><affiliation>Tokyo Institute of Technology</affiliation></author>
       <pages>128-140</pages>
       <abstract>Aspect sentiment quad prediction (ASQP) analyzes the aspect terms, opinion terms, sentiment polarity, and aspect categories in a text. One challenge in this task is the scarcity of data owing to the high annotation cost. Data augmentation techniques are commonly used to address this issue. However, existing approaches simply rewrite texts in the training data, restricting the semantic diversity of the generated data and impairing the quality due to the inconsistency between text and quads. To address these limitations, we augment quads and train a quads-to-text model to generate corresponding texts. Furthermore, we designed novel strategies to filter out low-quality data and balance the sample difficulty distribution of the augmented dataset. Empirical studies on two ASQP datasets demonstrate that our method outperforms other data augmentation methods and achieves state-of-the-art performance on the benchmarks. The source code will be released upon acceptance.</abstract>
       <url hash="12f3cffd">2023.starsem-1.12</url>
@@ -168,7 +168,7 @@
       <title>Syntax and Semantics Meet in the “Middle”: Probing the Syntax-Semantics Interface of <fixed-case>LM</fixed-case>s Through Agentivity</title>
       <author><first>Lindia</first><last>Tjuatja</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Emmy</first><last>Liu</last><affiliation>Carnegie Mellon University</affiliation></author>
-      <author><first>Lori</first><last>Levin</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Graham</first><last>Neubig</last><affiliation>Carnegie Mellon University</affiliation></author>
       <pages>149-164</pages>
       <abstract>Recent advances in large language models have prompted researchers to examine their abilities across a variety of linguistic tasks, but little has been done to investigate how models handle the interactions in meaning across words and larger syntactic forms—i.e. phenomena at the intersection of syntax and semantics. We present the semantic notion of agentivity as a case study for probing such interactions. We created a novel evaluation dataset by utilitizing the unique linguistic properties of a subset of optionally transitive English verbs. This dataset was used to prompt varying sizes of three model classes to see if they are sensitive to agentivity at the lexical level, and if they can appropriately employ these word-level priors given a specific syntactic context. Overall, GPT-3 text-davinci-003 performs extremely well across all experiments, outperforming all other models tested by far. In fact, the results are even better correlated with human judgements than both syntactic and semantic corpus statistics. This suggests that LMs may potentially serve as more useful tools for linguistic annotation, theory testing, and discovery than select corpora for certain tasks.</abstract>
@@ -204,7 +204,7 @@
       <author><first>Eduardo</first><last>Calò</last><affiliation>Utrecht University</affiliation></author>
       <author><first>Jordi</first><last>Levy</last><affiliation>IIIA, CSIC</affiliation></author>
       <author><first>Albert</first><last>Gatt</last><affiliation>Utrecht University</affiliation></author>
-      <author><first>Kees</first><last>Van Deemter</last><affiliation>Utrecht University</affiliation></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>Van Deemter</last><affiliation>Utrecht University</affiliation></author>
       <pages>180-192</pages>
       <abstract>Some applications of artificial intelligence make it desirable that logical formulae be converted computationally to comprehensible natural language sentences. As there are many logical equivalents to a given formula, finding the most suitable equivalent to be used as input for such a “logic-to-text” generation system is a difficult challenge. In this paper, we focus on the role of brevity: Are the shortest formulae the most suitable? We focus on propositional logic (PL), framing formula minimization (i.e., the problem of finding the shortest equivalent of a given formula) as a Quantified Boolean Formulae (QBFs) satisfiability problem. We experiment with several generators and selection strategies to prune the resulting candidates. We conduct exhaustive automatic and human evaluations of the comprehensibility and fluency of the generated texts. The results suggest that while, in many cases, minimization has a positive impact on the quality of the sentences generated, formula minimization may ultimately not be the best strategy.</abstract>
       <url hash="b21ac557">2023.starsem-1.17</url>
@@ -214,7 +214,7 @@
     <paper id="18">
       <title>Seeking Clozure: Robust Hypernym extraction from <fixed-case>BERT</fixed-case> with Anchored Prompts</title>
       <author><first>Chunhua</first><last>Liu</last><affiliation>The University of Melbourne</affiliation></author>
-      <author><first>Trevor</first><last>Cohn</last><affiliation>University of Melbourne</affiliation></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last><affiliation>University of Melbourne</affiliation></author>
       <author><first>Lea</first><last>Frermann</last><affiliation>Melbourne University</affiliation></author>
       <pages>193-206</pages>
       <abstract>The automatic extraction of hypernym knowledge from large language models like BERT is an open problem, and it is unclear whether methods fail due to a lack of knowledge in the model or shortcomings of the extraction methods. In particular, methods fail on challenging cases which include rare or abstract concepts, and perform inconsistently under paraphrased prompts. In this study, we revisit the long line of work on pattern-based hypernym extraction, and use it as a diagnostic tool to thoroughly examine the hypernomy knowledge encoded in BERT and the limitations of hypernym extraction methods. We propose to construct prompts from established pattern structures: definitional (X is a Y); lexico-syntactic (Y such as X); and their anchored versions (Y such as X or Z). We devise an automatic method for anchor prediction, and compare different patterns in: (i) their effectiveness for hypernym retrieval from BERT across six English data sets; (ii) on challenge sets of rare and abstract concepts; and (iii) on consistency under paraphrasing. We show that anchoring is particularly useful for abstract concepts and in enhancing consistency across paraphrases, demonstrating how established methods in the field can inform prompt engineering.</abstract>
@@ -228,7 +228,7 @@
       <author><first>Hila</first><last>Gonen</last><affiliation>UW and FAIR</affiliation></author>
       <author><first>Vidhisha</first><last>Balachandran</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Yulia</first><last>Tsvetkov</last><affiliation>University of Washington</affiliation></author>
-      <author><first>Noah A.</first><last>Smith</last><affiliation>University of Washington</affiliation></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last><affiliation>University of Washington</affiliation></author>
       <pages>207-216</pages>
       <abstract>Model explanations that shed light on the model’s predictions are becoming a desired additional output of NLP models, alongside their predictions. Challenges in creating these explanations include making them trustworthy and faithful to the model’s predictions. In this work, we propose a novel framework for guiding model explanations by supervising them explicitly. To this end, our method, LEXplain, uses task-related lexicons to directly supervise model explanations. This approach consistently improves the model’s explanations without sacrificing performance on the task, as we demonstrate on sentiment analysis and toxicity detection. Our analyses show that our method also demotes spurious correlations (i.e., with respect to African American English dialect) when performing the task, improving fairness.</abstract>
       <url hash="ea26db23">2023.starsem-1.19</url>
@@ -250,7 +250,7 @@
       <author><first>Hyunsoo</first><last>Cho</last><affiliation>Seoul National University</affiliation></author>
       <author><first>Choonghyun</first><last>Park</last><affiliation>Seoul National University</affiliation></author>
       <author><first>Junyeob</first><last>Kim</last><affiliation>Seoul National University</affiliation></author>
-      <author><first>Hyuhng Joon</first><last>Kim</last><affiliation>Seoul National University</affiliation></author>
+      <author id="hyuhng-joon-kim"><first>Hyuhng Joon</first><last>Kim</last><affiliation>Seoul National University</affiliation></author>
       <author><first>Kang Min</first><last>Yoo</last><affiliation>NAVER AI Lab</affiliation></author>
       <author><first>Sang-goo</first><last>Lee</last><affiliation>Seoul National University</affiliation></author>
       <pages>225-235</pages>
@@ -261,7 +261,7 @@
     </paper>
     <paper id="22">
       <title>Limits for learning with language models</title>
-      <author><first>Nicholas</first><last>Asher</last><affiliation>CNRS Institut de Recherche en Informatique de Toulouse</affiliation></author>
+      <author id="nicholas-asher"><first>Nicholas</first><last>Asher</last><affiliation>CNRS Institut de Recherche en Informatique de Toulouse</affiliation></author>
       <author><first>Swarnadeep</first><last>Bhar</last><affiliation>Institut de Recherche en Informatique de Toulouse</affiliation></author>
       <author><first>Akshay</first><last>Chaturvedi</last><affiliation>Institut de Recherche en Informatique de Toulouse</affiliation></author>
       <author><first>Julie</first><last>Hunter</last><affiliation>LINAGORA</affiliation></author>
@@ -285,7 +285,7 @@
     <paper id="24">
       <title>Testing Paraphrase Models on Recognising Sentence Pairs at Different Degrees of Semantic Overlap</title>
       <author><first>Qiwei</first><last>Peng</last><affiliation>University of Sussex</affiliation></author>
-      <author><first>David</first><last>Weir</last><affiliation>University of Sussex</affiliation></author>
+      <author id="david-weir"><first>David</first><last>Weir</last><affiliation>University of Sussex</affiliation></author>
       <author><first>Julie</first><last>Weeds</last><affiliation>University of Sussex</affiliation></author>
       <pages>259-269</pages>
       <abstract>Paraphrase detection is useful in many natural language understanding applications. Current works typically formulate this problem as a sentence pair binary classification task. However, this setup is not a good fit for many of the intended applications of paraphrase models. In particular, such applications often involve finding the closest paraphrases of the target sentence from a group of candidate sentences where they exhibit different degrees of semantic overlap with the target sentence. To apply models to this paraphrase retrieval scenario, the model must be sensitive to the degree to which two sentences are paraphrases of one another. However, many existing datasets ignore and fail to test models in this setup. In response, we propose adversarial paradigms to create evaluation datasets, which could examine the sensitivity to different degrees of semantic overlap. Empirical results show that, while paraphrase models and different sentence encoders appear successful on standard evaluations, measuring the degree of semantic overlap still remains a big challenge for them.</abstract>
@@ -299,7 +299,7 @@
       <author><first>Eduardo</first><last>Calò</last><affiliation>Utrecht University</affiliation></author>
       <author><first>Léo</first><last>Jacqmin</last><affiliation>Orange &amp; Aix-Marseille University</affiliation></author>
       <author><first>Denis</first><last>Paperno</last><affiliation>Utrecht University</affiliation></author>
-      <author><first>Mathieu</first><last>Constant</last><affiliation>Universit de Lorraine, CNRS, ATILF</affiliation></author>
+      <author id="matthieu-constant"><first>Mathieu</first><last>Constant</last><affiliation>Universit de Lorraine, CNRS, ATILF</affiliation></author>
       <pages>270-283</pages>
       <abstract>How does the word analogy task fit in the modern NLP landscape? Given the rarity of comparable multilingual benchmarks and the lack of a consensual evaluation protocol for contextual models, this remains an open question. In this paper, we introduce MATS: a multilingual analogy dataset, covering forty analogical relations in six languages, and evaluate human as well as static and contextual embedding performances on the task. We find that not all analogical relations are equally straightforward for humans, static models remain competitive with contextual embeddings, and optimal settings vary across languages and analogical relations. Several key challenges remain, including creating benchmarks that align with human reasoning and understanding what drives differences across methodologies.</abstract>
       <url hash="d38f3531">2023.starsem-1.25</url>
@@ -310,7 +310,7 @@
       <title>Scalable Performance Analysis for Vision-Language Models</title>
       <author><first>Santiago</first><last>Castro</last><affiliation>University of Michigan</affiliation></author>
       <author><first>Oana</first><last>Ignat</last><affiliation>University of Michigan</affiliation></author>
-      <author><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
       <pages>284-294</pages>
       <abstract>Joint vision-language models have shown great performance over a diverse set of tasks. However, little is known about their limitations, as the high dimensional space learned by these models makes it difficult to identify semantic errors. Recent work has addressed this problem by designing highly controlled probing task benchmarks. Our paper introduces a more scalable solution that relies on already annotated benchmarks. Our method consists of extracting a large set of diverse features from a vision-language benchmark and measuring their correlation with the output of the target model. We confirm previous findings that CLIP behaves like a bag of words model and performs better with nouns and verbs; we also uncover novel insights such as CLIP getting confused by concrete words. Our framework is available at <url>https://github.com/MichiganNLP/Scalable-VLM-Probing</url> and can be used with other multimodal models and benchmarks.</abstract>
       <url hash="f0ff8916">2023.starsem-1.26</url>
@@ -320,7 +320,7 @@
     <paper id="27">
       <title><fixed-case>PCFG</fixed-case>-Based Natural Language Interface Improves Generalization for Controlled Text Generation</title>
       <author><first>Jingyu</first><last>Zhang</last><affiliation>Johns Hopkins University</affiliation></author>
-      <author><first>James</first><last>Glass</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
+      <author id="james-glass"><first>James</first><last>Glass</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <author><first>Tianxing</first><last>He</last><affiliation>University of Washington</affiliation></author>
       <pages>295-313</pages>
       <abstract>Existing work on controlled text generation (CTG) assumes a control interface of categorical attributes. In this work, we propose a natural language (NL) interface, where we craft a PCFG to embed the control attributes into natural language commands, and propose variants of existing CTG models that take commands as input. In our experiments, we design tailored setups to test the model’s generalization abilities. We find our PCFG-based command generation approach is effective for handling unseen commands compared to fix-set templates. Further, our proposed NL models can effectively generalize to unseen attributes (a new ability enabled by the NL interface), as well as unseen attribute combinations. Interestingly, in model comparisons, the simple conditional generation approach, enhanced with our proposed NL interface, is shown to be a strong baseline in those challenging settings.</abstract>
@@ -386,7 +386,7 @@
     <paper id="33">
       <title>Event Semantic Knowledge in Procedural Text Understanding</title>
       <author><first>Ghazaleh</first><last>Kazeminejad</last><affiliation>University of Colorado Boulder</affiliation></author>
-      <author><first>Martha</first><last>Palmer</last><affiliation>University of Colorado</affiliation></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last><affiliation>University of Colorado</affiliation></author>
       <pages>388-398</pages>
       <abstract>The task of entity state tracking aims to automatically analyze procedural texts – texts that describe a step-by-step process (e.g. a baking recipe). Specifically, the goal is to track various states of the entities participating in a given process. Some of the challenges for this NLP task include annotated data scarcity and annotators’ reliance on commonsense knowledge to annotate implicit state information. Zhang et al. (2021) successfully incorporated commonsense entity-centric knowledge from ConceptNet into their BERT-based neural-symbolic architecture. Since English mostly encodes state change information in verbs, we attempted to test whether injecting semantic knowledge of events (retrieved from the state-of-the-art VerbNet parser) into a neural model can also improve the performance on this task. To achieve this, we adapt the methodology introduced by Zhang et al. (2021) for incorporating symbolic entity information from ConceptNet to the incorporation of VerbNet event semantics. We evaluate the performance of our model on the ProPara dataset (Mishra et al., 2018). In addition, we introduce a purely symbolic model for entity state tracking that uses a simple set of case statements, and is informed mostly by linguistic knowledge retrieved from various computational lexical resources. Our approach is inherently domain-agnostic, and our model is explainable and achieves state-of-the-art results on the Recipes dataset (Bosselut et al., 2017).</abstract>
       <url hash="364eec68">2023.starsem-1.33</url>
@@ -396,7 +396,7 @@
     <paper id="34">
       <title>Leveraging Active Learning to Minimise <fixed-case>SRL</fixed-case> Annotation Across Corpora</title>
       <author><first>Skatje</first><last>Myers</last><affiliation>University of Colorado at Boulder</affiliation></author>
-      <author><first>Martha</first><last>Palmer</last><affiliation>University of Colorado</affiliation></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last><affiliation>University of Colorado</affiliation></author>
       <pages>399-408</pages>
       <url hash="b6bb757a">2023.starsem-1.34</url>
       <bibkey>myers-palmer-2023-leveraging</bibkey>
@@ -485,7 +485,7 @@
     <paper id="42">
       <title>Arithmetic-Based Pretraining Improving Numeracy of Pretrained Language Models</title>
       <author><first>Dominic</first><last>Petrak</last><affiliation>TU Darmstadt</affiliation></author>
-      <author><first>Nafise Sadat</first><last>Moosavi</last><affiliation>Department of Computer Science, The University of Sheffield</affiliation></author>
+      <author id="nafise-sadat-moosavi"><first>Nafise Sadat</first><last>Moosavi</last><affiliation>Department of Computer Science, The University of Sheffield</affiliation></author>
       <author><first>Iryna</first><last>Gurevych</last><affiliation>UKP Lab, Technische Universitt Darmstadt</affiliation></author>
       <pages>477-493</pages>
       <abstract>State-of-the-art pretrained language models tend to perform below their capabilities when applied out-of-the-box on tasks that require understanding and working with numbers (usually referred to as numeracy). Recent work suggests two main reasons for this: (1) popular tokenisation algorithms have limited expressiveness for numbers, and (2) common pretraining objectives do not target numeracy. Approaches that address these shortcomings usually require architectural changes or pretraining from scratch. In this paper, we propose a new extended pretraining approach called Arithmetic-Based Pretraining that jointly addresses both in one extended pretraining step without requiring architectural changes or pretraining from scratch. Arithmetic-Based Pretraining combines contrastive learning to improve the number representation, and a novel extended pretraining objective called Inferable Number Prediction Task to improve numeracy. Our experiments show the effectiveness of Arithmetic-Based Pretraining in three different tasks that require improved numeracy, i.e., reading comprehension in the DROP dataset, inference-on-tables in the InfoTabs dataset, and table-to-text generation in the WikiBio and SciGen datasets.</abstract>
@@ -507,9 +507,9 @@
     <paper id="44">
       <title>Adverbs, Surprisingly</title>
       <author><first>Dmitry</first><last>Nikolaev</last><affiliation>University of Stuttgart</affiliation></author>
-      <author><first>Collin</first><last>Baker</last><affiliation>International Computer Science Institute</affiliation></author>
-      <author><first>Miriam R. L.</first><last>Petruck</last><affiliation>FrameNet</affiliation></author>
-      <author><first>Sebastian</first><last>Padó</last><affiliation>Stuttgart University</affiliation></author>
+      <author id="collin-f-baker"><first>Collin</first><last>Baker</last><affiliation>International Computer Science Institute</affiliation></author>
+      <author id="miriam-r-l-petruck"><first>Miriam R. L.</first><last>Petruck</last><affiliation>FrameNet</affiliation></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last><affiliation>Stuttgart University</affiliation></author>
       <pages>512-526</pages>
       <abstract>This paper begins with the premise that adverbs are neglected in computational linguistics. This view derives from two analyses: a literature review and a novel adverb dataset to probe a state-of-the-art language model, thereby uncovering systematic gaps in accounts for adverb meaning. We suggest that using Frame Semantics for characterizing word meaning, as in FrameNet, provides a promising approach to adverb analysis, given its ability to describe ambiguity, semantic roles, and null instantiation.</abstract>
       <url hash="17f3c927">2023.starsem-1.44</url>
diff --git a/data/xml/2023.sustainlp.xml b/data/xml/2023.sustainlp.xml
index bf09a09134..6a1f0ba81b 100644
--- a/data/xml/2023.sustainlp.xml
+++ b/data/xml/2023.sustainlp.xml
@@ -61,7 +61,7 @@
       <title>Quick Dense Retrievers Consume <fixed-case>KALE</fixed-case>: Post Training <fixed-case>K</fixed-case>ullback<fixed-case>L</fixed-case>eibler Alignment of Embeddings for Asymmetrical dual encoders</title>
       <author><first>Daniel</first><last>Campos</last><affiliation>University of Illinois Urbana Champaign</affiliation></author>
       <author><first>Alessandro</first><last>Magnani</last><affiliation>Walmart Labs</affiliation></author>
-      <author><first>Chengxiang</first><last>Zhai</last><affiliation>University of Illinois Urbana Champaign</affiliation></author>
+      <author id="chengxiang-zhai"><first>Chengxiang</first><last>Zhai</last><affiliation>University of Illinois Urbana Champaign</affiliation></author>
       <pages>59-77</pages>
       <url hash="5e36886a">2023.sustainlp-1.4</url>
       <bibkey>campos-etal-2023-quick</bibkey>
@@ -81,7 +81,7 @@
     <paper id="6">
       <title>To Asymmetry and Beyond: Structured Pruning of Sequence to Sequence Models for Improved Inference Efficiency</title>
       <author><first>Daniel</first><last>Campos</last><affiliation>University of Illinois Urbana Champaign</affiliation></author>
-      <author><first>Chengxiang</first><last>Zhai</last><affiliation>University of Illinois Urbana Champaign</affiliation></author>
+      <author id="chengxiang-zhai"><first>Chengxiang</first><last>Zhai</last><affiliation>University of Illinois Urbana Champaign</affiliation></author>
       <pages>91-109</pages>
       <url hash="9dceca34">2023.sustainlp-1.6</url>
       <bibkey>campos-zhai-2023-asymmetry</bibkey>
@@ -124,7 +124,7 @@
       <title>On the Interactions of Structural Constraints and Data Resources for Structured Prediction</title>
       <author><first>Zhisong</first><last>Zhang</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Emma</first><last>Strubell</last><affiliation>Carnegie Mellon University</affiliation></author>
-      <author><first>Eduard</first><last>Hovy</last><affiliation>University of Melbourne</affiliation></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last><affiliation>University of Melbourne</affiliation></author>
       <pages>147-157</pages>
       <url hash="35a336ec">2023.sustainlp-1.10</url>
       <bibkey>zhang-etal-2023-interactions</bibkey>
diff --git a/data/xml/2023.swisstext.xml b/data/xml/2023.swisstext.xml
index 84e50e44d7..46b3b1ab10 100644
--- a/data/xml/2023.swisstext.xml
+++ b/data/xml/2023.swisstext.xml
@@ -6,7 +6,7 @@
       <editor><first>Hatem</first><last>Ghorbel</last></editor>
       <editor><first>Maria</first><last>Sokhn</last></editor>
       <editor><first>Mark</first><last>Cieliebak</last></editor>
-      <editor><first>Manuela</first><last>Hürlimann</last></editor>
+      <editor id="manuela-huerlimann"><first>Manuela</first><last>Hürlimann</last></editor>
       <editor><first>Emmanuel</first><last>de Salis</last></editor>
       <editor><first>Jonathan</first><last>Guerne</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -32,8 +32,8 @@
     <paper id="2">
       <title>Evaluating a Multilingual Pre-trained Model for the Automatic <fixed-case>S</fixed-case>tandard <fixed-case>G</fixed-case>erman captioning of <fixed-case>S</fixed-case>wiss <fixed-case>G</fixed-case>erman <fixed-case>TV</fixed-case></title>
       <author><first>Johanna</first><last>Gerlach</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
-      <author><first>Silvia Rodríguez</first><last>Vázquez</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="silvia-vazquez"><first>Silvia Rodríguez</first><last>Vázquez</last></author>
       <author><first>Jonathan</first><last>Mutal</last></author>
       <author><first>Marianne</first><last>Starlander</last></author>
       <pages>14–22</pages>
diff --git a/data/xml/2023.tacl.xml b/data/xml/2023.tacl.xml
index 4e8f39e6af..7616b64a99 100644
--- a/data/xml/2023.tacl.xml
+++ b/data/xml/2023.tacl.xml
@@ -28,7 +28,7 @@
       <title>Assessing the Capacity of Transformer to Abstract Syntactic Representations: A Contrastive Analysis Based on Long-distance Agreement</title>
       <author><first>Bingzhi</first><last>Li</last></author>
       <author><first>Guillaume</first><last>Wisniewski</last></author>
-      <author><first>Benoît</first><last>Crabbé</last></author>
+      <author id="benoit-crabbe"><first>Benoît</first><last>Crabbé</last></author>
       <doi>10.1162/tacl_a_00531</doi>
       <abstract>Many studies have shown that transformers are able to predict subject-verb agreement, demonstrating their ability to uncover an abstract representation of the sentence in an unsupervised way. Recently, Li et al. (2021) found that transformers were also able to predict the object-past participle agreement in French, the modeling of which in formal grammar is fundamentally different from that of subject-verb agreement and relies on a movement and an anaphora resolution. To better understand transformers’ internal working, we propose to contrast how they handle these two kinds of agreement. Using probing and counterfactual analysis methods, our experiments on French agreements show that (i) the agreement task suffers from several confounders that partially question the conclusions drawn so far and (ii) transformers handle subject-verb and object-past participle agreements in a way that is consistent with their modeling in theoretical linguistics.</abstract>
       <pages>18–33</pages>
@@ -75,7 +75,7 @@
     <paper id="6">
       <title>Helpful Neighbors: Leveraging Neighbors in Geographic Feature Pronunciation</title>
       <author><first>Llion</first><last>Jones</last></author>
-      <author><first>Richard</first><last>Sproat</last></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last></author>
       <author><first>Haruko</first><last>Ishikawa</last></author>
       <author><first>Alexander</first><last>Gutkin</last></author>
       <doi>10.1162/tacl_a_00535</doi>
@@ -100,7 +100,7 @@
     <paper id="8">
       <title>Improving Low-Resource Cross-lingual Parsing with Expected Statistic Regularization</title>
       <author><first>Thomas</first><last>Effland</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <doi>10.1162/tacl_a_00537</doi>
       <abstract>We present Expected Statistic Regulariza tion (ESR), a novel regularization technique that utilizes low-order multi-task structural statistics to shape model distributions for semi- supervised learning on low-resource datasets. We study ESR in the context of cross-lingual transfer for syntactic analysis (POS tagging and labeled dependency parsing) and present several classes of low-order statistic functions that bear on model behavior. Experimentally, we evaluate the proposed statistics with ESR for unsupervised transfer on 5 diverse target languages and show that all statistics, when estimated accurately, yield improvements to both POS and LAS, with the best statistic improving POS by +7.0 and LAS by +8.5 on average. We also present semi-supervised transfer and learning curve experiments that show ESR provides significant gains over strong cross-lingual-transfer-plus-fine-tuning baselines for modest amounts of label data. These results indicate that ESR is a promising and complementary approach to model-transfer approaches for cross-lingual parsing.1</abstract>
       <pages>122–138</pages>
@@ -124,7 +124,7 @@
     <paper id="10">
       <title>Modeling Emotion Dynamics in Song Lyrics with State Space Models</title>
       <author><first>Yingjin</first><last>Song</last></author>
-      <author><first>Daniel</first><last>Beck</last></author>
+      <author id="daniel-beck"><first>Daniel</first><last>Beck</last></author>
       <doi>10.1162/tacl_a_00541</doi>
       <abstract>Most previous work in music emotion recognition assumes a single or a few song-level labels for the whole song. While it is known that different emotions can vary in intensity within a song, annotated data for this setup is scarce and difficult to obtain. In this work, we propose a method to predict emotion dynamics in song lyrics without song-level supervision. We frame each song as a time series and employ a State Space Model (SSM), combining a sentence-level emotion predictor with an Expectation-Maximization (EM) procedure to generate the full emotion dynamics. Our experiments show that applying our method consistently improves the performance of sentence-level baselines without requiring any annotated songs, making it ideal for limited training data scenarios. Further analysis through case studies shows the benefits of our method while also indicating the limitations and pointing to future directions.</abstract>
       <pages>157–175</pages>
@@ -161,7 +161,7 @@
       <title>Coreference Resolution through a seq2seq Transition-Based System</title>
       <author><first>Bernd</first><last>Bohnet</last></author>
       <author><first>Chris</first><last>Alberti</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <doi>10.1162/tacl_a_00543</doi>
       <abstract>Most recent coreference resolution systems use search algorithms over possible spans to identify mentions and resolve coreference. We instead present a coreference resolution system that uses a text-to-text (seq2seq) paradigm to predict mentions and links jointly. We implement the coreference system as a transition system and use multilingual T5 as an underlying language model. We obtain state-of-the-art accuracy on the CoNLL-2012 datasets with 83.3 F1-score for English (a 2.3 higher F1-score than previous work [Dobrovolskii, 2021]) using only CoNLL data for training, 68.5 F1-score for Arabic (+4.1 higher than previous work), and 74.3 F1-score for Chinese (+5.3). In addition we use the SemEval-2010 data sets for experiments in the zero-shot setting, a few-shot setting, and supervised setting using all available training data. We obtain substantially higher zero-shot F1-scores for 3 out of 4 languages than previous approaches and significantly exceed previous supervised state-of-the-art results for all five tested languages. We provide the code and models as open source.1</abstract>
       <pages>212–226</pages>
@@ -190,7 +190,7 @@
       <author><first>Ali</first><last>Elkahky</last></author>
       <author><first>Paden</first><last>Tomasello</last></author>
       <author><first>Robin</first><last>Algayres</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <author><first>Abdelrahman</first><last>Mohamed</last></author>
       <author><first>Emmanuel</first><last>Dupoux</last></author>
       <doi>10.1162/tacl_a_00545</doi>
@@ -277,7 +277,7 @@
       <title>Bridging the Gap between Synthetic and Natural Questions via Sentence Decomposition for Semantic Parsing</title>
       <author><first>Yilin</first><last>Niu</last></author>
       <author><first>Fei</first><last>Huang</last></author>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last></author>
+      <author><first>Wei</first><last>Liu</last></author>
       <author><first>Jianwei</first><last>Cui</last></author>
       <author><first>Bin</first><last>Wang</last></author>
       <author><first>Minlie</first><last>Huang</last></author>
@@ -343,7 +343,7 @@
       <author><first>Hao</first><last>Cheng</last></author>
       <author><first>Sewon</first><last>Min</last></author>
       <author><first>Prithviraj</first><last>Ammanabrolu</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last></author>
       <doi>10.1162/tacl_a_00559</doi>
       <abstract>In an information-seeking conversation, a user may ask questions that are under-specified or unanswerable. An ideal agent would interact by initiating different response types according to the available knowledge sources. However, most current studies either fail to or artificially incorporate such agent-side initiative. This work presents InSCIt, a dataset for Information-Seeking Conversations with mixed-initiative Interactions. It contains 4.7K user-agent turns from 805 human-human conversations where the agent searches over Wikipedia and either directly answers, asks for clarification, or provides relevant information to address user queries. The data supports two subtasks, evidence passage identification and response generation, as well as a human evaluation protocol to assess model performance. We report results of two systems based on state-of-the-art models of conversational knowledge identification and open-domain question answering. Both systems significantly underperform humans, suggesting ample room for improvement in future studies.1</abstract>
@@ -373,7 +373,7 @@
       <title>Erasure of Unaligned Attributes from Neural Representations</title>
       <author><first>Shun</first><last>Shao</last></author>
       <author><first>Yftah</first><last>Ziser</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <doi>10.1162/tacl_a_00558</doi>
       <abstract>We present the Assignment-Maximization Spectral Attribute removaL (AMSAL) algorithm, which erases information from neural representations when the information to be erased is implicit rather than directly being aligned to each input example. Our algorithm works by alternating between two steps. In one, it finds an assignment of the input representations to the information to be erased, and in the other, it creates projections of both the input representations and the information to be erased into a joint latent space. We test our algorithm on an extensive array of datasets, including a Twitter dataset with multiple guarded attributes, the BiasBios dataset, and the BiasBench benchmark. The latter benchmark includes four datasets with various types of protected attributes. Our results demonstrate that bias can often be removed in our setup. We also discuss the limitations of our approach when there is a strong entanglement between the main task and the information to be erased.1</abstract>
       <pages>488–510</pages>
@@ -385,7 +385,7 @@
       <title>Less is More: Mitigate Spurious Correlations for Open-Domain Dialogue Response Generation Models by Causal Discovery</title>
       <author><first>Tao</first><last>Feng</last></author>
       <author><first>Lizhen</first><last>Qu</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <doi>10.1162/tacl_a_00561</doi>
       <abstract>In this paper, we conduct the first study on spurious correlations for open-domain response generation models based on a corpus CGDialog curated by ourselves. The current models indeed suffer from spurious correlations and have a tendency to generate irrelevant and generic responses. Inspired by causal discovery algorithms, we propose a novel model-agnostic method for training and inference using a conditional independence classifier. The classifier is trained by a constrained self-training method, coined ConSTrain, to overcome data sparsity. The experimental results based on both human and automatic evaluation show that our method significantly outperforms the competitive baselines in terms of relevance, informativeness, and fluency.</abstract>
       <pages>511–530</pages>
@@ -407,7 +407,7 @@
       <author><first>Weijia</first><last>Xu</last></author>
       <author><first>Sweta</first><last>Agrawal</last></author>
       <author><first>Eleftheria</first><last>Briakou</last></author>
-      <author><first>Marianna J.</first><last>Martindale</last></author>
+      <author id="marianna-martindale"><first>Marianna J.</first><last>Martindale</last></author>
       <author><first>Marine</first><last>Carpuat</last></author>
       <doi>10.1162/tacl_a_00563</doi>
       <abstract>Neural sequence generation models are known to “hallucinate”, by producing outputs that are unrelated to the source text. These hallucinations are potentially harmful, yet it remains unclear in what conditions they arise and how to mitigate their impact. In this work, we first identify internal model symptoms of hallucinations by analyzing the relative token contributions to the generation in contrastive hallucinated vs. non-hallucinated outputs generated via source perturbations. We then show that these symptoms are reliable indicators of natural hallucinations, by using them to design a lightweight hallucination detector which outperforms both model-free baselines and strong classifiers based on quality estimation or large pre-trained models on manually annotated English-Chinese and German-English translation test beds.</abstract>
@@ -419,7 +419,7 @@
     <paper id="33">
       <title>Visual Writing Prompts: Character-Grounded Story Generation with Curated Image Sequences</title>
       <author><first>Xudong</first><last>Hong</last></author>
-      <author><first>Asad</first><last>Sayeed</last></author>
+      <author id="asad-sayeed"><first>Asad</first><last>Sayeed</last></author>
       <author><first>Khushboo</first><last>Mehra</last></author>
       <author><first>Vera</first><last>Demberg</last></author>
       <author><first>Bernt</first><last>Schiele</last></author>
@@ -433,7 +433,7 @@
     <paper id="34">
       <title>Unleashing the True Potential of Sequence-to-Sequence Models for Sequence Tagging and Structure Parsing</title>
       <author><first>Han</first><last>He</last></author>
-      <author><first>Jinho D.</first><last>Choi</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
       <doi>10.1162/tacl_a_00557</doi>
       <abstract>Sequence-to-Sequence (S2S) models have achieved remarkable success on various text generation tasks. However, learning complex structures with S2S models remains challenging as external neural modules and additional lexicons are often supplemented to predict non-textual outputs. We present a systematic study of S2S modeling using contained decoding on four core tasks: part-of-speech tagging, named entity recognition, constituency, and dependency parsing, to develop efficient exploitation methods costing zero extra parameters. In particular, 3 lexically diverse linearization schemas and corresponding constrained decoding methods are designed and evaluated. Experiments show that although more lexicalized schemas yield longer output sequences that require heavier training, their sequences being closer to natural language makes them easier to learn. Moreover, S2S models using our constrained decoding outperform other S2S approaches using external resources. Our best models perform better than or comparably to the state-of-the-art for all 4 tasks, lighting a promise for S2S models to generate non-sequential structures.</abstract>
       <pages>582–599</pages>
@@ -445,7 +445,7 @@
       <author><first>Devendra Singh</first><last>Sachan</last></author>
       <author><first>Mike</first><last>Lewis</last></author>
       <author><first>Dani</first><last>Yogatama</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <author><first>Joelle</first><last>Pineau</last></author>
       <author><first>Manzil</first><last>Zaheer</last></author>
       <doi>10.1162/tacl_a_00564</doi>
@@ -460,7 +460,7 @@
       <author><first>William</first><last>Merrill</last></author>
       <author><first>Hao</first><last>Peng</last></author>
       <author><first>Iz</first><last>Beltagy</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <doi>10.1162/tacl_a_00565</doi>
       <abstract>Many current NLP systems are built from language models trained to optimize unsupervised objectives on large amounts of raw text. Under what conditions might such a procedure acquire meaning? Our systematic experiments with synthetic data reveal that, with languages where all expressions have context-independent denotations (i.e., languages with strong transparency), both autoregressive and masked language models successfully learn to emulate semantic relations between expressions. However, when denotations are changed to be context-dependent with the language otherwise unmodified, this ability degrades. Turning to natural language, our experiments with a specific phenomenon—referential opacity—add to the growing body of evidence that current language models do not represent natural language semantics well. We show this failure relates to the context-dependent nature of natural language form-meaning mappings.</abstract>
       <pages>617–634</pages>
@@ -480,7 +480,7 @@
     </paper>
     <paper id="38">
       <title>How Much Do Language Models Copy From Their Training Data? Evaluating Linguistic Novelty in Text Generation Using <fixed-case>RAVEN</fixed-case></title>
-      <author><first>R. Thomas</first><last>McCoy</last></author>
+      <author id="r-thomas-mccoy"><first>R. Thomas</first><last>McCoy</last></author>
       <author><first>Paul</first><last>Smolensky</last></author>
       <author><first>Tal</first><last>Linzen</last></author>
       <author><first>Jianfeng</first><last>Gao</last></author>
@@ -601,7 +601,7 @@
       <author><first>Ziyi</first><last>Yang</last></author>
       <author><first>Yuwei</first><last>Fang</last></author>
       <author><first>Yulong</first><last>Chen</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <author><first>Chenguang</first><last>Zhu</last></author>
       <author><first>Michael</first><last>Zeng</last></author>
       <author><first>Rui</first><last>Zhang</last></author>
@@ -630,13 +630,13 @@
       <author><first>Tianchu</first><last>Ji</last></author>
       <author><first>Betty</first><last>van Aken</last></author>
       <author><first>Qingqing</first><last>Cao</last></author>
-      <author><first>Manuel R.</first><last>Ciosici</last></author>
+      <author id="manuel-r-ciosici"><first>Manuel R.</first><last>Ciosici</last></author>
       <author><first>Michael</first><last>Hassid</last></author>
       <author><first>Kenneth</first><last>Heafield</last></author>
       <author><first>Sara</first><last>Hooker</last></author>
       <author><first>Colin</first><last>Raffel</last></author>
       <author><first>Pedro H.</first><last>Martins</last></author>
-      <author><first>André F. T.</first><last>Martins</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
       <author><first>Jessica Zosa</first><last>Forde</last></author>
       <author><first>Peter</first><last>Milder</last></author>
       <author><first>Edwin</first><last>Simpson</last></author>
@@ -644,7 +644,7 @@
       <author><first>Jesse</first><last>Dodge</last></author>
       <author><first>Emma</first><last>Strubell</last></author>
       <author><first>Niranjan</first><last>Balasubramanian</last></author>
-      <author><first>Leon</first><last>Derczynski</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
       <author><first>Iryna</first><last>Gurevych</last></author>
       <author><first>Roy</first><last>Schwartz</last></author>
       <doi>10.1162/tacl_a_00577</doi>
@@ -668,7 +668,7 @@
     <paper id="50">
       <title>Expectations over Unspoken Alternatives Predict Pragmatic Inferences</title>
       <author><first>Jennifer</first><last>Hu</last></author>
-      <author><first>Roger</first><last>Levy</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
       <author><first>Judith</first><last>Degen</last></author>
       <author><first>Sebastian</first><last>Schuster</last></author>
       <doi>10.1162/tacl_a_00579</doi>
@@ -716,7 +716,7 @@
     </paper>
     <paper id="54">
       <title>Time-and-Space-Efficient Weighted Deduction</title>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <doi>10.1162/tacl_a_00588</doi>
       <abstract>Many NLP algorithms have been described in terms of deduction systems. Unweighted deduction allows a generic forward-chaining execution strategy. For weighted deduction, however, efficient execution should propagate the weight of each item only after it has converged. This means visiting the items in topologically sorted order (as in dynamic programming). Toposorting is fast on a materialized graph; unfortunately, materializing the graph would take extra space. Is there a generic weighted deduction strategy which, for every acyclic deduction system and every input, uses only a constant factor more time and space than generic unweighted deduction? After reviewing past strategies, we answer this question in the affirmative by combining ideas of Goodman (1999) and Kahn (1962). We also give an extension to cyclic deduction systems, based on Tarjan (1972).</abstract>
       <pages>960–973</pages>
@@ -728,7 +728,7 @@
       <title>Conditional Generation with a Question-Answering Blueprint</title>
       <author><first>Shashi</first><last>Narayan</last></author>
       <author><first>Joshua</first><last>Maynez</last></author>
-      <author><first>Reinald Kim</first><last>Amplayo</last></author>
+      <author id="reinald-kim-amplayo"><first>Reinald Kim</first><last>Amplayo</last></author>
       <author><first>Kuzman</first><last>Ganchev</last></author>
       <author><first>Annie</first><last>Louis</last></author>
       <author><first>Fantine</first><last>Huot</last></author>
@@ -748,8 +748,8 @@
       <author><first>Shimin</first><last>Tao</last></author>
       <author><first>Ning</first><last>Xie</last></author>
       <author><first>Hao</first><last>Yang</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
-      <author><first>Karin</first><last>Verspoor</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last></author>
       <doi>10.1162/tacl_a_00584</doi>
       <abstract>Despite the subjective nature of semantic textual similarity (STS) and pervasive disagreements in STS annotation, existing benchmarks have used averaged human ratings as gold standard. Averaging masks the true distribution of human opinions on examples of low agreement, and prevents models from capturing the semantic vagueness that the individual ratings represent. In this work, we introduce USTS, the first Uncertainty-aware STS dataset with ∼15,000 Chinese sentence pairs and 150,000 labels, to study collective human opinions in STS. Analysis reveals that neither a scalar nor a single Gaussian fits a set of observed judgments adequately. We further show that current STS models cannot capture the variance caused by human disagreement on individual instances, but rather reflect the predictive confidence over the aggregate dataset.</abstract>
       <pages>997–1013</pages>
@@ -790,7 +790,7 @@
       <author><first>Michael</first><last>Hahn</last></author>
       <author><first>Ryan</first><last>Cotterell</last></author>
       <author><first>Richard</first><last>Futrell</last></author>
-      <author><first>Roger</first><last>Levy</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
       <doi>10.1162/tacl_a_00589</doi>
       <abstract>While natural languages differ widely in both canonical word order and word order flexibility, their word orders still follow shared cross-linguistic statistical patterns, often attributed to functional pressures. In the effort to identify these pressures, prior work has compared real and counterfactual word orders. Yet one functional pressure has been overlooked in such investigations: The uniform information density (UID) hypothesis, which holds that information should be spread evenly throughout an utterance. Here, we ask whether a pressure for UID may have influenced word order patterns cross-linguistically. To this end, we use computational models to test whether real orders lead to greater information uniformity than counterfactual orders. In our empirical study of 10 typologically diverse languages, we find that: (i) among SVO languages, real word orders consistently have greater uniformity than reverse word orders, and (ii) only linguistically implausible counterfactual orders consistently exceed the uniformity of real orders. These findings are compatible with a pressure for information uniformity in the development and usage of natural languages.1</abstract>
       <pages>1048–1065</pages>
@@ -811,7 +811,7 @@
     </paper>
     <paper id="61">
       <title>Exploring Contrast Consistency of Open-Domain Question Answering Systems on Minimally Edited Questions</title>
-      <author id="zhihan-zhang"><first>Zhihan</first><last>Zhang</last></author>
+      <author><first>Zhihan</first><last>Zhang</last></author>
       <author><first>Wenhao</first><last>Yu</last></author>
       <author><first>Zheng</first><last>Ning</last></author>
       <author><first>Mingxuan</first><last>Ju</last></author>
@@ -878,7 +878,7 @@
     <paper id="65">
       <title><fixed-case>T</fixed-case>3<fixed-case>L</fixed-case>: Translate-and-Test Transfer Learning for Cross-Lingual Text Classification</title>
       <author><first>Inigo Jauregi</first><last>Unanue</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <author><first>Massimo</first><last>Piccardi</last></author>
       <doi>10.1162/tacl_a_00593</doi>
       <abstract>Cross-lingual text classification leverages text classifiers trained in a high-resource language to perform text classification in other languages with no or minimal fine-tuning (zero/ few-shots cross-lingual transfer). Nowadays, cross-lingual text classifiers are typically built on large-scale, multilingual language models (LMs) pretrained on a variety of languages of interest. However, the performance of these models varies significantly across languages and classification tasks, suggesting that the superposition of the language modelling and classification tasks is not always effective. For this reason, in this paper we propose revisiting the classic “translate-and-test” pipeline to neatly separate the translation and classification stages. The proposed approach couples 1) a neural machine translator translating from the targeted language to a high-resource language, with 2) a text classifier trained in the high-resource language, but the neural machine translator generates “soft” translations to permit end-to-end backpropagation during fine-tuning of the pipeline. Extensive experiments have been carried out over three cross-lingual text classification datasets (XNLI, MLDoc, and MultiEURLEX), with the results showing that the proposed approach has significantly improved performance over a competitive baseline.</abstract>
@@ -890,7 +890,7 @@
     <paper id="66">
       <title>Introduction to Mathematical Language Processing: Informal Proofs, Word Problems, and Supporting Tasks</title>
       <author><first>Jordan</first><last>Meadows</last></author>
-      <author><first>André</first><last>Freitas</last></author>
+      <author id="andre-freitas"><first>André</first><last>Freitas</last></author>
       <doi>10.1162/tacl_a_00594</doi>
       <abstract>Automating discovery in mathematics and science will require sophisticated methods of information extraction and abstract reasoning, including models that can convincingly process relationships between mathematical elements and natural language, to produce problem solutions of real-world value. We analyze mathematical language processing methods across five strategic sub-areas (identifier-definition extraction, formula retrieval, natural language premise selection, math word problem solving, and informal theorem proving) from recent years, highlighting prevailing methodologies, existing limitations, overarching trends, and promising avenues for future research.</abstract>
       <pages>1162–1184</pages>
@@ -980,7 +980,7 @@
       <author><first>Mahnaz</first><last>Koupaee</last></author>
       <author><first>Isabella</first><last>Chen</last></author>
       <author><first>Francis</first><last>Ferraro</last></author>
-      <author><first>Nathanael</first><last>Chambers</last></author>
+      <author id="nathanael-chambers"><first>Nathanael</first><last>Chambers</last></author>
       <author><first>Niranjan</first><last>Balasubramanian</last></author>
       <doi>10.1162/tacl_a_00600</doi>
       <abstract>The events in a narrative are understood as a coherent whole via the underlying states of their participants. Often, these participant states are not explicitly mentioned, instead left to be inferred by the reader. A model that understands narratives should likewise infer these implicit states, and even reason about the impact of changes to these states on the narrative. To facilitate this goal, we introduce a new crowdsourced English-language, Participant States dataset, PASTA. This dataset contains inferable participant states; a counterfactual perturbation to each state; and the changes to the story that would be necessary if the counterfactual were true. We introduce three state-based reasoning tasks that test for the ability to infer when a state is entailed by a story, to revise a story conditioned on a counterfactual state, and to explain the most likely state change given a revised story. Experiments show that today’s LLMs can reason about states to some degree, but there is large room for improvement, especially in problems requiring access and ability to reason with diverse types of knowledge (e.g., physical, numerical, factual).1</abstract>
@@ -1027,7 +1027,7 @@
       <author><first>Li-Kuang</first><last>Chen</last></author>
       <author><first>Yi-Chen</first><last>Chang</last></author>
       <author><first>Shu-Hui</first><last>Lee</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <doi>10.1162/tacl_a_00606</doi>
       <abstract>While sentence simplification is an active research topic in NLP, its adjacent tasks of sentence complexification and same-level paraphrasing are not. To train models on all three tasks, we present two new unsupervised datasets. We compare these datasets, one labeled by a weak classifier and the other by a rule-based approach, with a single supervised dataset. Using these three datasets for training, we perform extensive experiments on both multitasking and prompting strategies. Compared to other systems trained on unsupervised parallel data, models trained on our weak classifier labeled dataset achieve state-of-the-art performance on the ASSET simplification benchmark. Our models also outperform previous work on sentence-level targeting. Finally, we establish how a handful of Large Language Models perform on these tasks under a zero-shot setting.</abstract>
       <pages>1332–1354</pages>
@@ -1041,7 +1041,7 @@
       <author><first>Marco</first><last>Gaido</last></author>
       <author><first>Alina</first><last>Karakanta</last></author>
       <author><first>Mauro</first><last>Cettolo</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <doi>10.1162/tacl_a_00607</doi>
       <abstract>Automatic subtitling is the task of automatically translating the speech of audiovisual content into short pieces of timed text, i.e., subtitles and their corresponding timestamps. The generated subtitles need to conform to space and time requirements, while being synchronized with the speech and segmented in a way that facilitates comprehension. Given its considerable complexity, the task has so far been addressed through a pipeline of components that separately deal with transcribing, translating, and segmenting text into subtitles, as well as predicting timestamps. In this paper, we propose the first direct speech translation model for automatic subtitling that generates subtitles in the target language along with their timestamps with a single model. Our experiments on 7 language pairs show that our approach outperforms a cascade system in the same data condition, also being competitive with production tools on both in-domain and newly released out-domain benchmarks covering new scenarios.</abstract>
@@ -1111,7 +1111,7 @@
       <author><first>Tiago</first><last>Pimentel</last></author>
       <author><first>Clara</first><last>Meister</last></author>
       <author><first>Ryan</first><last>Cotterell</last></author>
-      <author><first>Roger P.</first><last>Levy</last></author>
+      <author id="roger-levy"><first>Roger P.</first><last>Levy</last></author>
       <doi>10.1162/tacl_a_00612</doi>
       <abstract>Surprisal theory posits that less-predictable words should take more time to process, with word predictability quantified as surprisal, i.e., negative log probability in context. While evidence supporting the predictions of surprisal theory has been replicated widely, much of it has focused on a very narrow slice of data: native English speakers reading English texts. Indeed, no comprehensive multilingual analysis exists. We address this gap in the current literature by investigating the relationship between surprisal and reading times in eleven different languages, distributed across five language families. Deriving estimates from language models trained on monolingual and multilingual corpora, we test three predictions associated with surprisal theory: (i) whether surprisal is predictive of reading times, (ii) whether expected surprisal, i.e., contextual entropy, is predictive of reading times, and (iii) whether the linking function between surprisal and reading times is linear. We find that all three predictions are borne out crosslinguistically. By focusing on a more diverse set of languages, we argue that these results offer the most robust link to date between information theory and incremental language processing across languages.</abstract>
       <pages>1451–1470</pages>
@@ -1154,7 +1154,7 @@
       <author><first>Barry</first><last>Haddow</last></author>
       <author><first>Alexandra</first><last>Birch</last></author>
       <author><first>Pierre</first><last>Colombo</last></author>
-      <author><first>André F. T.</first><last>Martins</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
       <doi>10.1162/tacl_a_00615</doi>
       <abstract>Hallucinated translations can severely undermine and raise safety issues when machine translation systems are deployed in the wild. Previous research on the topic focused on small bilingual models trained on high-resource languages, leaving a gap in our understanding of hallucinations in multilingual models across diverse translation scenarios. In this work, we fill this gap by conducting a comprehensive analysis—over 100 language pairs across various resource levels and going beyond English-centric directions—on both the M2M neural machine translation (NMT) models and GPT large language models (LLMs). Among several insights, we highlight that models struggle with hallucinations primarily in low-resource directions and when translating out of English, where, critically, they may reveal toxic patterns that can be traced back to the training data. We also find that LLMs produce qualitatively different hallucinations to those of NMT models. Finally, we show that hallucinations are hard to reverse by merely scaling models trained with the same data. However, employing more diverse models, trained on different data or with different procedures, as fallback systems can improve translation quality and virtually eliminate certain pathologies.</abstract>
       <pages>1500–1517</pages>
@@ -1269,7 +1269,7 @@
       <author><first>Shuyan</first><last>Zhou</last></author>
       <author><first>Tongshuang</first><last>Wu</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
-      <author><first>André F. T.</first><last>Martins</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
       <doi>10.1162/tacl_a_00626</doi>
       <abstract>Natural language generation has witnessed significant advancements due to the training of large language models on vast internet-scale datasets. Despite these advancements, there exists a critical challenge: These models can inadvertently generate content that is toxic, inaccurate, and unhelpful, and existing automatic evaluation metrics often fall short of identifying these shortcomings. As models become more capable, human feedback is an invaluable signal for evaluating and improving models. This survey aims to provide an overview of recent research that has leveraged human feedback to improve natural language generation. First, we introduce a taxonomy distilled from existing research to categorize and organize the varied forms of feedback. Next, we discuss how feedback can be described by its format and objective, and cover the two approaches proposed to use feedback (either for training or decoding): directly using feedback or training feedback models. We also discuss existing datasets for human-feedback data collection, and concerns surrounding feedback collection. Finally, we provide an overview of the nascent field of AI feedback, which uses large language models to make judgments based on a set of principles and minimize the need for human intervention. We also release a website of this survey at feedback-gap-survey.info.</abstract>
       <pages>1643–1668</pages>
@@ -1329,7 +1329,7 @@
     <paper id="96">
       <title><fixed-case>R</fixed-case>e<fixed-case>COGS</fixed-case>: How Incidental Details of a Logical Form Overshadow an Evaluation of Semantic Interpretation</title>
       <author><first>Zhengxuan</first><last>Wu</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <author><first>Christopher</first><last>Potts</last></author>
       <doi>10.1162/tacl_a_00623</doi>
       <abstract>Compositional generalization benchmarks for semantic parsing seek to assess whether models can accurately compute meanings for novel sentences, but operationalize this in terms of logical form (LF) prediction. This raises the concern that semantically irrelevant details of the chosen LFs could shape model performance. We argue that this concern is realized for the COGS benchmark (Kim and Linzen, 2020). COGS poses generalization splits that appear impossible for present-day models, which could be taken as an indictment of those models. However, we show that the negative results trace to incidental features of COGS LFs. Converting these LFs to semantically equivalent ones and factoring out capabilities unrelated to semantic interpretation, we find that even baseline models get traction. A recent variable-free translation of COGS LFs suggests similar conclusions, but we observe this format is not semantically equivalent; it is incapable of accurately representing some COGS meanings. These findings inform our proposal for ReCOGS, a modified version of COGS that comes closer to assessing the target semantic capabilities while remaining very challenging. Overall, our results reaffirm the importance of compositional generalization and careful benchmark task design.</abstract>
@@ -1341,7 +1341,7 @@
     <paper id="97">
       <title>Data-driven Parsing Evaluation for Child-Parent Interactions</title>
       <author><first>Zoey</first><last>Liu</last></author>
-      <author><first>Emily</first><last>Prud’hommeaux</last></author>
+      <author id="emily-prudhommeaux"><first>Emily</first><last>Prud’hommeaux</last></author>
       <doi>10.1162/tacl_a_00624</doi>
       <abstract>We present a syntactic dependency treebank for naturalistic child and child-directed spoken English. Our annotations largely follow the guidelines of the Universal Dependencies project (UD [Zeman et al., 2022]), with detailed extensions to lexical and syntactic structures unique to spontaneous spoken language, as opposed to written texts or prepared speech. Compared to existing UD-style spoken treebanks and other dependency corpora of child-parent interactions specifically, our dataset is much larger (44,744 utterances; 233,907 words) and contains data from 10 children covering a wide age range (18–66 months). We conduct thorough dependency parser evaluations using both graph-based and transition-based parsers, trained on three different types of out-of-domain written texts: news, tweets, and learner data. Out-of-domain parsers demonstrate reasonable performance for both child and parent data. In addition, parser performance for child data increases along children’s developmental paths, especially between 18 and 48 months, and gradually approaches the performance for parent data. These results are further validated with in-domain training.</abstract>
       <pages>1734–1753</pages>
diff --git a/data/xml/2023.teach4nlp.xml b/data/xml/2023.teach4nlp.xml
index 2e89048942..8ede3c27b8 100644
--- a/data/xml/2023.teach4nlp.xml
+++ b/data/xml/2023.teach4nlp.xml
@@ -38,7 +38,7 @@
     <paper id="3">
       <title>Working at your own Pace: Computer-based Learning for <fixed-case>CL</fixed-case></title>
       <author><first>Anselm</first><last>Knebusch</last></author>
-      <author><first>Ulrike</first><last>Padó</last></author>
+      <author id="ulrike-pado"><first>Ulrike</first><last>Padó</last></author>
       <pages>19–27</pages>
       <url hash="38f63ecc">2023.teach4nlp-1.3</url>
       <bibkey>knebusch-pado-2023-working</bibkey>
diff --git a/data/xml/2023.tllm.xml b/data/xml/2023.tllm.xml
index 2426ec0b40..82d91aa813 100644
--- a/data/xml/2023.tllm.xml
+++ b/data/xml/2023.tllm.xml
@@ -51,7 +51,7 @@
     <paper id="4">
       <title>Mitigating Harms of <fixed-case>LLM</fixed-case>s via Knowledge Distillation for a Virtual Museum Tour Guide</title>
       <author><first>Ashley</first><last>Lewis</last></author>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <pages>31-45</pages>
       <abstract>LLMs are known to be very powerful, exhibiting both great benefits and great risk. We seek to leverage the benefits, in particular the ability to be fluent, conversational dialogue agents, while minimizing the risks, such as hallucination and toxic content. In this work we use knowledge distillation to create a virtual museum tour guide dialogue agent, employing ChatGPT as a teacher model for a smaller student model, T5-large. We find the T5 model shows competitive performance, significantly reduces instances of hallucination, and shows promise for reducing toxic content.</abstract>
       <url hash="627bb1ca">2023.tllm-1.4</url>
diff --git a/data/xml/2023.tlt.xml b/data/xml/2023.tlt.xml
index c70dbe972c..5201ab2060 100644
--- a/data/xml/2023.tlt.xml
+++ b/data/xml/2023.tlt.xml
@@ -5,8 +5,8 @@
       <booktitle>Proceedings of the 21st International Workshop on Treebanks and Linguistic Theories (TLT, GURT/SyntaxFest 2023)</booktitle>
       <editor><first>Daniel</first><last>Dakota</last></editor>
       <editor><first>Kilian</first><last>Evang</last></editor>
-      <editor><first>Sandra</first><last>Kübler</last></editor>
-      <editor><first>Lori</first><last>Levin</last></editor>
+      <editor id="sandra-kubler"><first>Sandra</first><last>Kübler</last></editor>
+      <editor id="lori-levin"><first>Lori</first><last>Levin</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Washington, D.C.</address>
       <month>March</month>
@@ -21,9 +21,9 @@
     </frontmatter>
     <paper id="1">
       <title>Corpus-Based Multilingual Event-type Ontology: Annotation Tools and Principles</title>
-      <author><first>Eva</first><last>Fučíková</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
-      <author><first>Zdeňka</first><last>Urešová</last></author>
+      <author id="eva-fucikova"><first>Eva</first><last>Fučíková</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
+      <author id="zdenka-uresova"><first>Zdeňka</first><last>Urešová</last></author>
       <pages>1-10</pages>
       <abstract>In the course of building a multilingual Event-type Ontology resource called SynSemClass, it was necessary to provide the maintainers and the annotators with a set of tools to facilitate their job, achieve data format consistency, and in general obtain high-quality data. We have adapted a previously existing tool (Urešová et al., 2018b), developed to assist the work in capturing bilingual synonymy. This tool needed to be both substantially expanded with some new features and fundamentally changed in the context of developing the resource for more languages, which necessarily is to be done in parallel. We are thus presenting here the tool, the new data structure design which had to change at the same time, and the associated workflow.</abstract>
       <url hash="45573195">2023.tlt-1.1</url>
@@ -33,9 +33,9 @@
     <paper id="2">
       <title><fixed-case>S</fixed-case>panish Verbal Synonyms in the <fixed-case>S</fixed-case>yn<fixed-case>S</fixed-case>em<fixed-case>C</fixed-case>lass Ontology</title>
       <author><first>Cristina</first><last>Fernández-Alcaina</last></author>
-      <author><first>Eva</first><last>Fučíková</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
-      <author><first>Zdeňka</first><last>Urešová</last></author>
+      <author id="eva-fucikova"><first>Eva</first><last>Fučíková</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
+      <author id="zdenka-uresova"><first>Zdeňka</first><last>Urešová</last></author>
       <pages>11-20</pages>
       <abstract>This paper presents ongoing work in the expansion of the multilingual semantic event-type ontology SynSemClass (Czech-English-German) to include Spanish. As in previous versions of the lexicon, Spanish verbal synonyms have been collected from a sentence-aligned parallel corpus and classified into classes based on their syntactic-semantic properties. Each class member is linked to a number of syntactic and/or semantic resources specific to each language, thus enriching the annotation and enabling interoperability. This paper describes the procedure for the data extraction and annotation of Spanish verbal synonyms in the lexicon.</abstract>
       <url hash="f641e1c5">2023.tlt-1.2</url>
@@ -90,7 +90,7 @@
     <paper id="7">
       <title>Semgrex and Ssurgeon, Searching and Manipulating Dependency Graphs</title>
       <author><first>John</first><last>Bauer</last></author>
-      <author><first>Chloé</first><last>Kiddon</last></author>
+      <author id="chloe-kiddon"><first>Chloé</first><last>Kiddon</last></author>
       <author><first>Eric</first><last>Yeh</last></author>
       <author><first>Alex</first><last>Shan</last></author>
       <author><first>Christopher</first><last>D. Manning</last></author>
@@ -109,11 +109,11 @@
       <author><first>Jin</first><last>Zhao</last></author>
       <author><first>Andrew</first><last>Cowell</last></author>
       <author><first>William</first><last>Croft</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
       <author><first>James</first><last>H. Martin</last></author>
       <author><first>Alexis</first><last>Palmer</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <author><first>Zdenka</first><last>Urešová</last></author>
       <author><first>Rosa</first><last>Vallejos</last></author>
       <author><first>Nianwen</first><last>Xue</last></author>
diff --git a/data/xml/2023.trustnlp.xml b/data/xml/2023.trustnlp.xml
index cbff4e74a1..b169283ea7 100644
--- a/data/xml/2023.trustnlp.xml
+++ b/data/xml/2023.trustnlp.xml
@@ -154,7 +154,7 @@
       <title>An Empirical Study of Metrics to Measure Representational Harms in Pre-Trained Language Models</title>
       <author><first>Saghar</first><last>Hosseini</last><affiliation>Microsoft Research</affiliation></author>
       <author><first>Hamid</first><last>Palangi</last><affiliation>Microsoft Research</affiliation></author>
-      <author><first>Ahmed Hassan</first><last>Awadallah</last><affiliation>Microsoft Research</affiliation></author>
+      <author id="ahmed-hassan"><first>Ahmed Hassan</first><last>Awadallah</last><affiliation>Microsoft Research</affiliation></author>
       <pages>121-134</pages>
       <abstract>Large-scale Pre-Trained Language Models (PTLMs) capture knowledge from massive human-written data which contains latent societal biases and toxic contents. In this paper, we leverage the primary task of PTLMs, i.e., language modeling, and propose a new metric to quantify manifested implicit representational harms in PTLMs towards 13 marginalized demographics. Using this metric, we conducted an empirical analysis of 24 widely used PTLMs. Our analysis provides insights into the correlation between the proposed metric in this work and other related metrics for representational harm. We observe that our metric correlates with most of the gender-specific metrics in the literature. Through extensive experiments, we explore the connections between PTLMs architectures and representational harms across two dimensions: depth and width of the networks. We found that prioritizing depth over width, mitigates representational harms in some PTLMs. Our code and data can be found at [place holder].</abstract>
       <url hash="bf4fe558">2023.trustnlp-1.11</url>
@@ -204,7 +204,7 @@
       <author><first>Fanny</first><last>Jourdan</last><affiliation>IRIT Institut de Recherche en Informatique de Toulouse</affiliation></author>
       <author><first>Laurent</first><last>Risser</last><affiliation>CNRS, Institut de Mathematiques de Toulouse</affiliation></author>
       <author><first>Jean-michel</first><last>Loubes</last><affiliation>Universit Toulouse 3, Institut de Mathmatiques</affiliation></author>
-      <author><first>Nicholas</first><last>Asher</last><affiliation>CNRS Institut de Recherche en Informatique de Toulouse</affiliation></author>
+      <author id="nicholas-asher"><first>Nicholas</first><last>Asher</last><affiliation>CNRS Institut de Recherche en Informatique de Toulouse</affiliation></author>
       <pages>163-174</pages>
       <abstract>This paper presents novel experiments shedding light on the shortcomings of current metrics for assessing biases of gender discrimination made by machine learning algorithms on textual data. We focus on the Bios dataset, and our learning task is to predict the occupation of individuals, based on their biography. Such prediction tasks are common in commercial Natural Language Processing (NLP) applications such as automatic job recommendations. We address an important limitation of theoretical discussions dealing with group-wise fairness metrics: they focus on large datasets, although the norm in many industrial NLP applications is to use small to reasonably large linguistic datasets for which the main practical constraint is to get a good prediction accuracy. We then question how reliable are different popular measures of bias when the size of the training set is simply sufficient to learn reasonably accurate predictions. Our experiments sample the Bios dataset and learn more than 200 models on different sample sizes. This allows us to statistically study our results and to confirm that common gender bias indices provide diverging and sometimes unreliable results when applied to relatively small training and test samples. This highlights the crucial importance of variance calculations for providing sound results in this field.</abstract>
       <url hash="857a0d70">2023.trustnlp-1.15</url>
@@ -319,7 +319,7 @@
       <author><first>Xuanli</first><last>He</last><affiliation>University College London</affiliation></author>
       <author><first>Jun</first><last>Wang</last><affiliation>University of Melbourne</affiliation></author>
       <author><first>Benjamin</first><last>Rubinstein</last><affiliation>University of Melbourne</affiliation></author>
-      <author><first>Trevor</first><last>Cohn</last><affiliation>University of Melbourne</affiliation></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last><affiliation>University of Melbourne</affiliation></author>
       <pages>287-301</pages>
       <abstract>Backdoor attacks are an insidious security threat against machine learning models. Adversaries can manipulate the predictions of compromised models by inserting triggers into the training phase. Various backdoor attacks have been devised which can achieve nearly perfect attack success without affecting model predictions for clean inputs. Means of mitigating such vulnerabilities are underdeveloped, especially in natural language processing. To fill this gap, we introduce IMBERT, which uses either gradients or self-attention scores derived from victim models to self-defend against backdoor attacks at inference time. Our empirical studies demonstrate that IMBERT can effectively identify up to 98.5% of inserted triggers. Thus, it significantly reduces the attack success rate while attaining competitive accuracy on the clean dataset across widespread insertion-based attacks compared to two baselines. Finally, we show that our approach is model-agnostic, and can be easily ported to several pre-trained transformer models.</abstract>
       <url hash="6da9b4fa">2023.trustnlp-1.25</url>
@@ -344,7 +344,7 @@
       <author><first>Tiezheng</first><last>Yu</last><affiliation>The Hong Kong University of Science and Technology</affiliation></author>
       <author><first>Andrea</first><last>Madotto</last><affiliation>Meta</affiliation></author>
       <author><first>Zhaojiang</first><last>Lin</last><affiliation>Meta</affiliation></author>
-      <author><first>Mona</first><last>Diab</last><affiliation>Meta Responsible AI</affiliation></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last><affiliation>Meta Responsible AI</affiliation></author>
       <author><first>Pascale</first><last>Fung</last><affiliation>Hong Kong University of Science and Technology</affiliation></author>
       <pages>311-325</pages>
       <abstract>Many NLP classification tasks, such as sexism/racism detection or toxicity detection, are based on human values. Yet, human values can vary under diverse cultural conditions. Therefore, we introduce a framework for value-aligned classification that performs prediction based on explicitly written human values in the command. Along with the task, we propose a practical approach that distills value-aligned knowledge from large-scale language models (LLMs) to construct value-aligned classifiers in two steps. First, we generate value-aligned training data from LLMs by prompt-based few-shot learning. Next, we fine-tune smaller classification models with the generated data for the task. Empirical results show that our VA-Models surpass multiple baselines by at least 15.56% on the F1-score, including few-shot learning with OPT-175B and existing text augmentation methods. We suggest that using classifiers with explicit human value input improves both inclusivity &amp; explainability in AI.</abstract>
diff --git a/data/xml/2023.tsar.xml b/data/xml/2023.tsar.xml
index 9018ba99b6..15926820dc 100644
--- a/data/xml/2023.tsar.xml
+++ b/data/xml/2023.tsar.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2023-10-31" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Second Workshop on Text Simplification, Accessibility and Readability</booktitle>
-      <editor><first>Sanja</first><last>Štajner</last></editor>
+      <editor id="sanja-stajner"><first>Sanja</first><last>Štajner</last></editor>
       <editor><first>Horacio</first><last>Saggio</last></editor>
       <editor><first>Matthew</first><last>Shardlow</last></editor>
       <editor><first>Fernando</first><last>Alva-Manchego</last></editor>
@@ -33,7 +33,7 @@
     <paper id="2">
       <title>Context-aware <fixed-case>S</fixed-case>wedish Lexical Simplification</title>
       <author><first>Emil</first><last>Graichen</last></author>
-      <author><first>Arne</first><last>Jonsson</last></author>
+      <author id="arne-jonsson"><first>Arne</first><last>Jonsson</last></author>
       <pages>11–20</pages>
       <abstract>We present results from the development and evaluation of context-aware Lexical simplification (LS) systems for the Swedish language. Three versions of LS models, LäsBERT, LäsBERT-baseline, and LäsGPT, were created and evaluated on a newly constructed Swedish LS evaluation dataset. The LS systems demonstrated promising potential in aiding audiences with reading difficulties by providing context-aware word replacements. While there were areas for improvement, particularly in complex word identification, the systems showed agreement with human annotators on word replacements.</abstract>
       <url hash="0c002912">2023.tsar-1.2</url>
@@ -52,7 +52,7 @@
     <paper id="4">
       <title>Cross-lingual Mediation: Readability Effects</title>
       <author><first>Maria</first><last>Kunilovskaya</last></author>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <author><first>Eveline</first><last>Wandl-Vogt</last></author>
       <pages>33–43</pages>
       <abstract>This paper explores the readability of translated and interpreted texts compared to the original source texts and target language texts in the same domain. It was shown in the literature that translated and interpreted texts could exhibit lexical and syntactic properties that make them simpler, and hence, easier to process than their sources or comparable non-translations. In translation, this effect is attributed to the tendency to simplify and disambiguate the message. In interpreting, it can be enhanced by the temporal and cognitive constraints. We use readability annotations from the Newsela corpus to formulate a number of classification and regression tasks and fine-tune a multilingual pre-trained model on these tasks, obtaining models that can differentiate between complex and simple sentences. Then, the models are applied to predict the readability of sources, targets, and comparable target language originals in a zero-shot manner. Our test data – parallel and comparable – come from English-German bidirectional interpreting and translation subsets from the Europarl corpus. The results confirm the difference in readability between translated/interpreted targets against sentences in standard originally-authored source and target languages. Besides, we find consistent differences between the translation directions in the English-German language pair.</abstract>
@@ -70,7 +70,7 @@
     </paper>
     <paper id="6">
       <title>Comparing Generic and Expert Models for Genre-Specific Text Simplification</title>
-      <author id="zihao-li"><first>Zihao</first><last>Li</last></author>
+      <author><first>Zihao</first><last>Li</last></author>
       <author><first>Matthew</first><last>Shardlow</last></author>
       <author><first>Fernando</first><last>Alva-Manchego</last></author>
       <pages>51–67</pages>
@@ -82,8 +82,8 @@
       <title>Automatic Text Simplification for People with Cognitive Disabilities: Resource Creation within the <fixed-case>C</fixed-case>lear<fixed-case>T</fixed-case>ext Project</title>
       <author><first>Isabel</first><last>Espinosa-Zaragoza</last></author>
       <author><first>José</first><last>Abreu-Salas</last></author>
-      <author><first>Paloma</first><last>Moreda</last></author>
-      <author><first>Manuel</first><last>Palomar</last></author>
+      <author id="paloma-moreda-pozo"><first>Paloma</first><last>Moreda</last></author>
+      <author id="manuel-palomar"><first>Manuel</first><last>Palomar</last></author>
       <pages>68–77</pages>
       <abstract>This paper presents the ongoing work conducted within the ClearText project, specifically focusing on the resource creation for the simplification of Spanish for people with cognitive disabilities. These resources include the CLEARSIM corpus and the Simple.Text tool. On the one hand, a description of the corpus compilation process with the help of APSA is detailed along with information regarding whether these texts are bronze, silver or gold standard simplification versions from the original text. The goal to reach is 18,000 texts in total by the end of the project. On the other hand, we aim to explore Large Language Models (LLMs) in a sequence-to-sequence setup for text simplification at the document level. Therefore, the tool’s objectives, technical aspects, and the preliminary results derived from early experimentation are also presented. The initial results are subject to improvement, given that experimentation is in a very preliminary stage. Despite showcasing flaws inherent to generative models (e.g. hallucinations, repetitive text), we examine the resolutions (or lack thereof) of complex linguistic phenomena that can be learned from the corpus. These issues will be addressed throughout the remainder of this project. The expected positive results from this project that will impact society are three-fold in nature: scientific-technical, social, and economic.</abstract>
       <url hash="82aa2541">2023.tsar-1.7</url>
diff --git a/data/xml/2023.udfestbr.xml b/data/xml/2023.udfestbr.xml
index 415b4c92ee..cbf0d278be 100644
--- a/data/xml/2023.udfestbr.xml
+++ b/data/xml/2023.udfestbr.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the 2nd Edition of the Universal Dependencies Brazilian Festival</booktitle>
       <editor><first>Thiago Alexandre Salgueiro</first><last>Pardo</last></editor>
-      <editor><first>Magali Sanches</first><last>Duran</last></editor>
+      <editor id="magali-sanches-duran"><first>Magali Sanches</first><last>Duran</last></editor>
       <editor><first>Lucelene</first><last>Lopes</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Belo Horizonte, Brazil</address>
@@ -28,7 +28,7 @@
     <paper id="2">
       <title>Construções sintaticas do português que desafiam a tarefa de parsing: uma analise qualitativa</title>
       <author><first>Magali S.</first><last>Duran</last></author>
-      <author><first>Maria das Graças V.</first><last>Nunes</last></author>
+      <author id="maria-das-gracas-volpe-nunes"><first>Maria das Graças V.</first><last>Nunes</last></author>
       <author><first>Thiago A. S.</first><last>Pardo</last></author>
       <pages>432–441</pages>
       <url hash="fd423169">2023.udfestbr-1.2</url>
@@ -38,7 +38,7 @@
     <paper id="3">
       <title>Annotation of fixed Multiword Expressions (<fixed-case>MWE</fixed-case>s) in a <fixed-case>P</fixed-case>ortuguese <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies (<fixed-case>UD</fixed-case>) treebank: Gathering candidates from three different sources</title>
       <author><first>Elvis</first><last>Souza</last></author>
-      <author><first>Claudia</first><last>Freitas</last></author>
+      <author id="claudia-freitas"><first>Claudia</first><last>Freitas</last></author>
       <pages>442–450</pages>
       <url hash="b68c6748">2023.udfestbr-1.3</url>
       <bibkey>souza-freitas-2023-annotation</bibkey>
@@ -89,7 +89,7 @@
     <paper id="9">
       <title>Em Direção à Anotação Sintatica - <fixed-case>UD</fixed-case> de Tweets do Mercado Financeiro</title>
       <author><first>Bryan K. S.</first><last>Barbosa</last></author>
-      <author><first>Ariani</first><last>Di-Felippo</last></author>
+      <author id="ariani-di-felippo"><first>Ariani</first><last>Di-Felippo</last></author>
       <pages>499–508</pages>
       <url hash="fe48798e">2023.udfestbr-1.9</url>
       <bibkey>barbosa-di-felippo-2023-em</bibkey>
diff --git a/data/xml/2023.udw.xml b/data/xml/2023.udw.xml
index f6f1f548cb..0eac956e96 100644
--- a/data/xml/2023.udw.xml
+++ b/data/xml/2023.udw.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the Sixth Workshop on Universal Dependencies (UDW, GURT/SyntaxFest 2023)</booktitle>
       <editor><first>Loïc</first><last>Grobol</last><affiliation>Université Paris Nanterre</affiliation></editor>
-      <editor><first>Francis</first><last>Tyers</last><affiliation>Indiana University</affiliation></editor>
+      <editor id="francis-tyers"><first>Francis</first><last>Tyers</last><affiliation>Indiana University</affiliation></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Washington, D.C.</address>
       <month>March</month>
@@ -31,7 +31,7 @@
     <paper id="2">
       <title>Universalising <fixed-case>L</fixed-case>atin <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies: a harmonisation of <fixed-case>L</fixed-case>atin treebanks in <fixed-case>UD</fixed-case></title>
       <author><first>Federica</first><last>Gamba</last></author>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <pages>7-16</pages>
       <abstract>This paper presents the harmonisation process carried out on the five treebanks available for Latin in Universal Dependencies, with the aim of eliminating the discrepancies in their annotation styles. Indeed, this is the first issue to be addressed when parsing Latin, as significant drops in parsing accuracy on different Latin treebanks have been repeatedly observed. Latin syntactic variability surely accounts for this, but parsing results are as well affected by divergent annotation choices. By analysing where annotations differ, we propose a Python-based alignment of the five UD treebanks. Consequently, the impact of annotation choices on accuracy scores is assessed by performing parsing experiments with UDPipe and Stanza.</abstract>
       <url hash="c69c23a0">2023.udw-1.2</url>
@@ -52,7 +52,7 @@
     </paper>
     <paper id="4">
       <title>Methodological issues regarding the semi-automatic <fixed-case>UD</fixed-case> treebank creation of under-resourced languages: the case of Pomak</title>
-      <author><first>Stella</first><last>Markantonatou</last></author>
+      <author id="stella-markantonatou"><first>Stella</first><last>Markantonatou</last></author>
       <author><first>Nicolaos</first><last>Th. Constantinides</last></author>
       <author><first>Vivian</first><last>Stamou</last></author>
       <author><first>Vasileios</first><last>Arampatzakis</last></author>
@@ -68,7 +68,7 @@
       <title>Analysis of Corpus-based Word-Order Typological Methods</title>
       <author><first>Diego</first><last>Alves</last></author>
       <author><first>Božo</first><last>Bekavac</last></author>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <author><first>Marko</first><last>Tadić</last></author>
       <pages>36-46</pages>
       <abstract>This article presents a comparative analysis of four different syntactic typological approaches applied to 20 different languages. We compared three specific quantitative methods, using parallel CoNLL-U corpora, to the classification obtained via syntactic features provided by a typological database (lang2vec). First, we analyzed the Marsagram linear approach which consists of extracting the frequency word-order patterns regarding the position of components inside syntactic nodes. The second approach considers the relative position of heads and dependents, and the third is based simply on the relative position of verbs and objects. From the results, it was possible to observe that each method provides different language clusters which can be compared to the classic genealogical classification (the lang2vec and the head and dependent methods being the closest). As different word-order phenomena are considered in these specific typological strategies, each one provides a different angle of analysis to be applied according to the precise needs of the researchers.</abstract>
diff --git a/data/xml/2023.vardial.xml b/data/xml/2023.vardial.xml
index 38af4c5bd7..7e664e70db 100644
--- a/data/xml/2023.vardial.xml
+++ b/data/xml/2023.vardial.xml
@@ -6,8 +6,8 @@
       <editor><first>Yves</first><last>Scherrer</last></editor>
       <editor><first>Tommi</first><last>Jauhiainen</last></editor>
       <editor><first>Nikola</first><last>Ljubešić</last></editor>
-      <editor><first>Preslav</first><last>Nakov</last></editor>
-      <editor><first>Jörg</first><last>Tiedemann</last></editor>
+      <editor id="preslav-nakov"><first>Preslav</first><last>Nakov</last></editor>
+      <editor id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></editor>
       <editor><first>Marcos</first><last>Zampieri</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Dubrovnik, Croatia</address>
@@ -24,7 +24,7 @@
       <title>Analyzing Zero-Shot transfer Scenarios across <fixed-case>S</fixed-case>panish variants for Hate Speech Detection</title>
       <author><first>Galo</first><last>Castillo-lópez</last><affiliation>Université Paris-Saclay</affiliation></author>
       <author><first>Arij</first><last>Riabi</last><affiliation>Inria; Sorbonne université</affiliation></author>
-      <author><first>Djamé</first><last>Seddah</last><affiliation>Inria</affiliation></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last><affiliation>Inria</affiliation></author>
       <pages>1-13</pages>
       <abstract>Hate speech detection in online platforms has been widely studied inthe past. Most of these works were conducted in English and afew rich-resource languages. Recent approaches tailored forlow-resource languages have explored the interests of zero-shot cross-lingual transfer learning models in resource-scarce scenarios. However, languages variations between geolects such as AmericanEnglish and British English, Latin-American Spanish, and EuropeanSpanish is still a problem for NLP models that often relies on(latent) lexical information for their classification tasks. Moreimportantly, the cultural aspect, crucial for hate speech detection,is often overlooked. In this work, we present the results of a thorough analysis of hatespeech detection models performance on different variants of Spanish,including a new hate speech toward immigrants Twitter data set we built to cover these variants. Using mBERT and Beto, a monolingual Spanish Bert-based language model, as the basis of our transfer learning architecture, our results indicate that hate speech detection models for a given Spanish variant are affected when different variations of such language are not considered. Hate speech expressions could vary from region to region where the same language is spoken. Our new dataset, models and guidelines are freely available.</abstract>
       <url hash="3006d030">2023.vardial-1.1</url>
@@ -35,7 +35,7 @@
     <paper id="2">
       <title>Optimizing the Size of Subword Vocabularies in Dialect Classification</title>
       <author><first>Vani</first><last>Kanjirangat</last><affiliation>Idsia</affiliation></author>
-      <author><first>Tanja</first><last>Samardžić</last><affiliation>University of Zurich</affiliation></author>
+      <author id="tanja-samardzic"><first>Tanja</first><last>Samardžić</last><affiliation>University of Zurich</affiliation></author>
       <author><first>Ljiljana</first><last>Dolamic</last><affiliation>armasuisse S&amp;T</affiliation></author>
       <author><first>Fabio</first><last>Rinaldi</last><affiliation>IDSIA, Swiss AI Institute</affiliation></author>
       <pages>14-30</pages>
@@ -58,8 +58,8 @@
     <paper id="5">
       <title>Does Manipulating Tokenization Aid Cross-Lingual Transfer? A Study on <fixed-case>POS</fixed-case> Tagging for Non-Standardized Languages</title>
       <author><first>Verena</first><last>Blaschke</last><affiliation>LMU Munich</affiliation></author>
-      <author><first>Hinrich</first><last>Schütze</last><affiliation>Center for Information and Language Processing, University of Munich</affiliation></author>
-      <author><first>Barbara</first><last>Plank</last><affiliation>LMU Munich</affiliation></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last><affiliation>Center for Information and Language Processing, University of Munich</affiliation></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last><affiliation>LMU Munich</affiliation></author>
       <pages>40-54</pages>
       <abstract>One of the challenges with finetuning pretrained language models (PLMs) is that their tokenizer is optimized for the language(s) it was pretrained on, but brittle when it comes to previously unseen variations in the data. This can for instance be observed when finetuning PLMs on one language and evaluating them on data in a closely related language variety with no standardized orthography. Despite the high linguistic similarity, tokenization no longer corresponds to meaningful representations of the target data, leading to low performance in, e.g., part-of-speech tagging. In this work, we finetune PLMs on seven languages from three different families and analyze their zero-shot performance on closely related, non-standardized varieties. We consider different measures for the divergence in the tokenization of the source and target data, and the way they can be adjusted by manipulating the tokenization during the finetuning step. Overall, we find that the similarity between the percentage of words that get split into subwords in the source and target data (the split word ratio difference) is the strongest predictor for model performance on target data.</abstract>
       <url hash="167ef85c">2023.vardial-1.5</url>
@@ -163,7 +163,7 @@
     <paper id="15">
       <title>Dialect and Variant Identification as a Multi-Label Classification Task: A Proposal Based on Near-Duplicate Analysis</title>
       <author><first>Gabriel</first><last>Bernier-colborne</last><affiliation>National Research Council Canada</affiliation></author>
-      <author><first>Cyril</first><last>Goutte</last><affiliation>National Research Council Canada</affiliation></author>
+      <author id="cyril-goutte"><first>Cyril</first><last>Goutte</last><affiliation>National Research Council Canada</affiliation></author>
       <author><first>Serge</first><last>Leger</last><affiliation>National Research Council</affiliation></author>
       <pages>142-151</pages>
       <abstract>We argue that dialect identification should be treated as a multi-label classification problem rather than the single-class setting prevalent in existing collections and evaluations. In order to avoid extensive human re-labelling of the data, we propose an analysis of ambiguous near-duplicates in an existing collection covering four variants of French.We show how this analysis helps us provide multiple labels for a significant subset of the original data, therefore enriching the annotation with minimal human intervention. The resulting data can then be used to train dialect identifiers in a multi-label setting. Experimental results show that on the enriched dataset, the multi-label classifier produces similar accuracy to the single-label classifier on test cases that are unambiguous (single label), but it increases the macro-averaged F1-score by 0.225 absolute (71% relative gain) on ambiguous texts with multiple labels. On the original data, gains on the ambiguous test cases are smaller but still considerable (+0.077 absolute, 20% relative gain), and accuracy on non-ambiguous test cases is again similar in this case. This supports our thesis that modelling dialect identification as a multi-label problem potentially has a positive impact.</abstract>
@@ -281,7 +281,7 @@
       <author><first>Mourhaf</first><last>Kazzaz</last><affiliation>Universität Tübingen</affiliation></author>
       <author><first>Nikola</first><last>Ljubešić</last><affiliation>Jožef Stefan Institute</affiliation></author>
       <author><first>Kai</first><last>North</last><affiliation>George Mason University</affiliation></author>
-      <author><first>Barbara</first><last>Plank</last><affiliation>LMU Munich</affiliation></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last><affiliation>LMU Munich</affiliation></author>
       <author><first>Yves</first><last>Scherrer</last><affiliation>University of Helsinki</affiliation></author>
       <author><first>Marcos</first><last>Zampieri</last><affiliation>George Mason University</affiliation></author>
       <pages>251-261</pages>
diff --git a/data/xml/2023.wassa.xml b/data/xml/2023.wassa.xml
index 4c7648d556..fd794fb74a 100644
--- a/data/xml/2023.wassa.xml
+++ b/data/xml/2023.wassa.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the 13th Workshop on Computational Approaches to Subjectivity, Sentiment, &amp; Social Media Analysis</booktitle>
       <editor><first>Jeremy</first><last>Barnes</last></editor>
-      <editor><first>Orphée</first><last>De Clercq</last></editor>
+      <editor id="orphee-de-clercq"><first>Orphée</first><last>De Clercq</last></editor>
       <editor><first>Roman</first><last>Klinger</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Toronto, Canada</address>
@@ -31,7 +31,7 @@
     <paper id="2">
       <title>Sentimental Matters - Predicting Literary Quality by Sentiment Analysis and Stylometric Features</title>
       <author><first>Yuri</first><last>Bizzoni</last><affiliation>Aarhus University</affiliation></author>
-      <author><first>Pascale</first><last>Moreira</last></author>
+      <author id="pascale-feldkamp"><first>Pascale</first><last>Moreira</last></author>
       <author><first>Mads Rosendahl</first><last>Thomsen</last><affiliation>Aarhus University</affiliation></author>
       <author><first>Kristoffer</first><last>Nielbo</last><affiliation>Aarhus University</affiliation></author>
       <pages>11-18</pages>
@@ -148,7 +148,7 @@
       <author><first>Yilun</first><last>Hua</last></author>
       <author><first>Tharindu</first><last>Kumarage</last><affiliation>Arizona State University</affiliation></author>
       <author><first>Huan</first><last>Liu</last><affiliation>Arizona State University</affiliation></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>110-128</pages>
       <abstract>Manipulated news online is a growing problem which necessitates the use of automated systems to curtail its spread. We argue that while misinformation and disinformation detection have been studied, there has been a lack of investment in the important open challenge of detecting harmful agendas in news articles; identifying harmful agendas is critical to flag news campaigns with the greatest potential for real world harm. Moreover, due to real concerns around censorship, harmful agenda detectors must be interpretable to be effective. In this work, we propose this new task and release a dataset, NewsAgendas, of annotated news articles for agenda identification. We show how interpretable systems can be effective on this task and demonstrate that they can perform comparably to black-box models.</abstract>
       <url hash="1dd4ca98">2023.wassa-1.11</url>
@@ -185,7 +185,7 @@
       <author><first>Nannan</first><last>Huang</last></author>
       <author><first>Lin</first><last>Tian</last><affiliation>Royal Melbourne Institute of Technology</affiliation></author>
       <author><first>Haytham</first><last>Fayek</last><affiliation>Royal Melbourne Institute of Technology and Facebook</affiliation></author>
-      <author><first>Xiuzhen</first><last>Zhang</last><affiliation>Royal Melbourne Institute of Technology</affiliation></author>
+      <author id="xiuzhen-jenny-zhang"><first>Xiuzhen</first><last>Zhang</last><affiliation>Royal Melbourne Institute of Technology</affiliation></author>
       <pages>149-161</pages>
       <abstract>Opinion summarisation is a task that aims to condense the information presented in the source documents while retaining the core message and opinions. A summary that only represents the majority opinions will leave the minority opinions unrepresented in the summary. In this paper, we use the stance towards a certain target as an opinion. We study bias in opinion summarisation from the perspective of opinion diversity, which measures whether the model generated summary can cover a diverse set of opinions. In addition, we examine opinion similarity, a measure of how closely related two opinions are in terms of their stance on a given topic, and its relationship with opinion diversity. Through the lense of stances towards a topic, we examine opinion diversity and similarity using three debatable topics under COVID-19. Experimental results on these topics revealed that a higher degree of similarity of opinions did not indicate good diversity or fairly cover the various opinions originally presented in the source documents. We found that BART and ChatGPT can better capture diverse opinions presented in the source documents.</abstract>
       <url hash="78963078">2023.wassa-1.14</url>
@@ -244,8 +244,8 @@
       <author><first>Sunny</first><last>Rai</last></author>
       <author><first>Bhumika</first><last>Singhal</last></author>
       <author><first>Langchen</first><last>Liu</last></author>
-      <author><first>Sharath Chandra</first><last>Guntuku</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="sharath-chandra-guntuku"><first>Sharath Chandra</first><last>Guntuku</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <pages>202-214</pages>
       <abstract>Emotions are experienced and expressed differently across the world. In order to use Large Language Models (LMs) for multilingual tasks that require emotional sensitivity, LMs must reflect this cultural variation in emotion. In this study, we investigate whether the widely-used multilingual LMs in 2023 reflect differences in emotional expressions across cultures and languages. We find that embeddings obtained from LMs (e.g., XLM-RoBERTa) are Anglocentric, and generative LMs (e.g., ChatGPT) reflect Western norms, even when responding to prompts in other languages. Our results show that multilingual LMs do not successfully learn the culturally appropriate nuances of emotion and we highlight possible research directions towards correcting this.</abstract>
       <url hash="8bd43059">2023.wassa-1.19</url>
@@ -284,7 +284,7 @@
     <paper id="22">
       <title>Combining Active Learning and Task Adaptation with <fixed-case>BERT</fixed-case> for Cost-Effective Annotation of Social Media Datasets</title>
       <author><first>Jens</first><last>Lemmens</last></author>
-      <author><first>Walter</first><last>Daelemans</last><affiliation>University of Antwerp</affiliation></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last><affiliation>University of Antwerp</affiliation></author>
       <pages>237-250</pages>
       <abstract>Social media provide a rich source of data that can be mined and used for a wide variety of research purposes. However, annotating this data can be expensive, yet necessary for state-of-the-art pre-trained language models to achieve high prediction performance. Therefore, we combine pool-based active learning based on prediction uncertainty (an established method for reducing annotation costs) with unsupervised task adaptation through Masked Language Modeling (MLM). The results on three different datasets (two social media corpora, one benchmark dataset) show that task adaptation significantly improves results and that with only a fraction of the available training data, this approach reaches similar F1-scores as those achieved by an upper-bound baseline model fine-tuned on all training data. We hereby contribute to the scarce corpus of research on active learning with pre-trained language models and propose a cost-efficient annotation sampling and fine-tuning approach that can be applied to a wide variety of tasks and datasets.</abstract>
       <url hash="afea650a">2023.wassa-1.22</url>
@@ -295,7 +295,7 @@
     <paper id="23">
       <title>Improving <fixed-case>D</fixed-case>utch Vaccine Hesitancy Monitoring via Multi-Label Data Augmentation with <fixed-case>GPT</fixed-case>-3.5</title>
       <author><first>Jens</first><last>Van Nooten</last></author>
-      <author><first>Walter</first><last>Daelemans</last><affiliation>University of Antwerp</affiliation></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last><affiliation>University of Antwerp</affiliation></author>
       <pages>251-270</pages>
       <abstract>In this paper, we leverage the GPT-3.5 language model both using the Chat-GPT API interface and the GPT-3.5 API interface to generate realistic examples of anti-vaccination tweets in Dutch with the aim of augmenting an imbalanced multi-label vaccine hesitancy argumentation classification dataset. In line with previous research, we devise a prompt that, on the one hand, instructs the model to generate realistic examples based on the gold standard dataset and, on the other hand, to assign multiple pseudo-labels (or a single pseudo-label) to the generated instances. We then augment our gold standard data with the generated examples and evaluate the impact thereof in a cross-validation setting with several state-of-the-art Dutch large language models. This augmentation technique predominantly shows improvements in F1 for classifying underrepresented classes while increasing the overall recall, paired with a slight decrease in precision for more common classes. Furthermore, we examine how well the synthetic data generalises to human data in the classification task. To our knowledge, we are the first to utilise Chat-GPT and GPT-3.5 for augmenting a Dutch multi-label dataset classification task.</abstract>
       <url hash="8580c08b">2023.wassa-1.23</url>
@@ -306,7 +306,7 @@
     <paper id="24">
       <title>Emotion Analysis of Tweets Banning Education in <fixed-case>A</fixed-case>fghanistan</title>
       <author><first>Mohammad Ali</first><last>Hussiny</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last><affiliation>Dept. of Informatics, University of Oslo</affiliation></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last><affiliation>Dept. of Informatics, University of Oslo</affiliation></author>
       <pages>271-277</pages>
       <abstract>This paper introduces the first emotion-annotated dataset for the Dari variant of Persian spoken in Afghanistan. The LetHerLearn dataset contains 7,600 tweets posted in reaction to the Taliban’s ban of women’s rights to education in 2022 and has been manually annotated according to Ekman’s emotion categories. We here detail the data collection and annotation process, present relevant dataset statistics as well as initial experiments on the resulting dataset, benchmarking a number of different neural architectures for the task of Dari emotion classification.</abstract>
       <url hash="c39359cc">2023.wassa-1.24</url>
@@ -317,7 +317,7 @@
     <paper id="25">
       <title>Identifying Slurs and Lexical Hate Speech via Light-Weight Dimension Projection in Embedding Space</title>
       <author><first>Sanne</first><last>Hoeken</last><affiliation>Universität Bielefeld</affiliation></author>
-      <author><first>Sina</first><last>Zarrieß</last><affiliation>Bielefeld University</affiliation></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last><affiliation>Bielefeld University</affiliation></author>
       <author><first>Ozge</first><last>Alacam</last><affiliation>Bielefeld University</affiliation></author>
       <pages>278-289</pages>
       <abstract>The prevalence of hate speech on online platforms has become a pressing concern for society, leading to increased attention towards detecting hate speech. Prior work in this area has primarily focused on identifying hate speech at the utterance level that reflects the complex nature of hate speech. In this paper, we propose a targeted and efficient approach to identifying hate speech by detecting slurs at the lexical level using contextualized word embeddings. We hypothesize that slurs have a systematically different representation than their neutral counterparts, making them identifiable through existing methods for discovering semantic dimensions in word embeddings. The results demonstrate the effectiveness of our approach in predicting slurs, confirming linguistic theory that the meaning of slurs is stable across contexts. Our robust hate dimension approach for slur identification offers a promising solution to tackle a smaller yet crucial piece of the complex puzzle of hate speech detection.</abstract>
@@ -351,7 +351,7 @@
       <author><first>Aaron</first><last>Maladry</last></author>
       <author><first>Els</first><last>Lefever</last><affiliation>Ghent University</affiliation></author>
       <author><first>Cynthia</first><last>Van Hee</last><affiliation>Universiteit Gent and Universiteit Gent</affiliation></author>
-      <author><first>Veronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Veronique</first><last>Hoste</last></author>
       <pages>315-324</pages>
       <abstract>In this paper we investigate potential bias in fine-tuned transformer models for irony detection. Bias is defined in this research as spurious associations between word n-grams and class labels, that can cause the system to rely too much on superficial cues and miss the essence of the irony. For this purpose, we looked for correlations between class labels and words that are prone to trigger irony, such as positive adjectives, intensifiers and topical nouns. Additionally, we investigate our irony model’s predictions before and after manipulating the data set through irony trigger replacements. We further support these insights with state-of-the-art explainability techniques (Layer Integrated Gradients, Discretized Integrated Gradients and Layer-wise Relevance Propagation). Both approaches confirm the hypothesis that transformer models generally encode correlations between positive sentiments and ironic texts, with even higher correlations between vividly expressed sentiment and irony. Based on these insights, we implemented a number of modification strategies to enhance the robustness of our irony classifier.</abstract>
       <url hash="b550201a">2023.wassa-1.28</url>
@@ -373,7 +373,7 @@
     <paper id="30">
       <title>How to Control Sentiment in Text Generation: A Survey of the State-of-the-Art in Sentiment-Control Techniques</title>
       <author><first>Michela</first><last>Lorandi</last><affiliation>Dublin City University</affiliation></author>
-      <author><first>Anya</first><last>Belz</last><affiliation>Dublin City University and University of Aberdeen</affiliation></author>
+      <author id="anja-belz"><first>Anya</first><last>Belz</last><affiliation>Dublin City University and University of Aberdeen</affiliation></author>
       <pages>341-353</pages>
       <abstract>Recent advances in the development of large Pretrained Language Models, such as GPT, BERT and Bloom, have achieved remarkable performance on a wide range of different NLP tasks. However, when used for text generation tasks, these models still have limitations when it comes to controlling the content and style of the generated text, often producing content that is incorrect, irrelevant, or inappropriate in the context of a given task. In this survey paper, we explore methods for controllable text generation with a focus on sentiment control. We systematically collect papers from the ACL Anthology, create a categorisation scheme based on different control techniques and controlled attributes, and use the scheme to categorise and compare methods. The result is a detailed and comprehensive overview of state-of-the-art techniques for sentiment-controlled text generation categorised on the basis of how the control is implemented and what attributes are controlled and providing a clear idea of their relative strengths and weaknesses.</abstract>
       <url hash="18223038">2023.wassa-1.30</url>
@@ -425,7 +425,7 @@
       <author><first>Adithya</first><last>V Ganesan</last><affiliation>, State University of New York, Stony Brook</affiliation></author>
       <author><first>Yash Kumar</first><last>Lal</last><affiliation>State University of New York, Stony Brook</affiliation></author>
       <author><first>August</first><last>Nilsson</last></author>
-      <author><first>H. Andrew</first><last>Schwartz</last><affiliation>Stony Brook University (SUNY)</affiliation></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last><affiliation>Stony Brook University (SUNY)</affiliation></author>
       <pages>390-400</pages>
       <abstract>Very large language models (LLMs) perform extremely well on a spectrum of NLP tasks in a zero-shot setting. However, little is known about their performance on human-level NLP problems which rely on understanding psychological concepts, such as assessing personality traits. In this work, we investigate the zero-shot ability of GPT-3 to estimate the Big 5 personality traits from users’ social media posts. Through a set of systematic experiments, we find that zero-shot GPT-3 performance is somewhat close to an existing pre-trained SotA for broad classification upon injecting knowledge about the trait in the prompts. However, when prompted to provide fine-grained classification, its performance drops to close to a simple most frequent class (MFC) baseline. We further analyze where GPT-3 performs better, as well as worse, than a pretrained lexical model, illustrating systematic errors that suggest ways to improve LLMs on human-level NLP tasks. The code for this project is available on Github.</abstract>
       <url hash="85dbc3d7">2023.wassa-1.34</url>
@@ -437,7 +437,7 @@
       <title>Utterance Emotion Dynamics in Children’s Poems: Emotional Changes Across Age</title>
       <author><first>Daniela</first><last>Teodorescu</last><affiliation>University of Alberta</affiliation></author>
       <author><first>Alona</first><last>Fyshe</last><affiliation>University of Alberta</affiliation></author>
-      <author><first>Saif</first><last>Mohammad</last><affiliation>National Research Council Canada</affiliation></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last><affiliation>National Research Council Canada</affiliation></author>
       <pages>401-415</pages>
       <abstract>Emerging psychopathology studies are showing that patterns of changes in emotional state — emotion dynamics — are associated with overall well-being and mental health. More recently, there has been some work in tracking emotion dynamics through one’s utterances, allowing for data to be collected on a larger scale across time and people. However, several questions about how emotion dynamics change with age, especially in children, and when determined through children’s writing, remain unanswered. In this work, we use both a lexicon and a machine learning based approach to quantify characteristics of emotion dynamics determined from poems written by children of various ages. We show that both approaches point to similar trends: consistent increasing intensities for some emotions (e.g., anger, fear, joy, sadness, arousal, and dominance) with age and a consistent decreasing valence with age. We also find increasing emotional variability, rise rates (i.e., emotional reactivity), and recovery rates (i.e., emotional regulation) with age. These results act as a useful baselines for further research in how patterns of emotions expressed by children change with age, and their association with mental health.</abstract>
       <url hash="3989b0e3">2023.wassa-1.35</url>
@@ -478,7 +478,7 @@
       <author><first>Andy</first><last>Bernhardt</last></author>
       <author><first>Gregorios</first><last>Katsios</last><affiliation>Rensselaer Polytechnic Institute</affiliation></author>
       <author><first>Ning</first><last>Sa</last><affiliation>Rensselaer Polytechnic Institute</affiliation></author>
-      <author><first>Tomek</first><last>Strzalkowski</last><affiliation>Rensselaer Polytechnic Institute</affiliation></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last><affiliation>Rensselaer Polytechnic Institute</affiliation></author>
       <pages>441-451</pages>
       <abstract>Social media is an extremely potent tool for influencing public opinion, particularly during important events such as elections, pandemics, and national conflicts. Emotions are a crucial aspect of this influence, but detecting them accurately in the political domain is a significant challenge due to the lack of suitable emotion labels and training datasets. In this paper, we present a generalized approach to emotion detection that can be adapted to the political domain with minimal performance sacrifice. Our approach is designed to be easily integrated into existing models without the need for additional training or fine-tuning. We demonstrate the zero-shot and few-shot performance of our model on the 2017 French presidential elections and propose efficient emotion groupings that would aid in effectively analyzing influence campaigns and agendas on social media.</abstract>
       <url hash="c4b01d21">2023.wassa-1.38</url>
@@ -509,7 +509,7 @@
     </paper>
     <paper id="41">
       <title>Sadness and Anxiety Language in <fixed-case>R</fixed-case>eddit Messages Before and After Quitting a Job</title>
-      <author><first>Molly</first><last>Ireland</last><affiliation>Receptiviti</affiliation></author>
+      <author id="molly-ireland"><first>Molly</first><last>Ireland</last><affiliation>Receptiviti</affiliation></author>
       <author><first>Micah</first><last>Iserman</last></author>
       <author><first>Kiki</first><last>Adams</last></author>
       <pages>467-478</pages>
@@ -728,7 +728,7 @@
       <title><fixed-case>P</fixed-case>recog<fixed-case>IIITH</fixed-case>@<fixed-case>WASSA</fixed-case>2023: Emotion Detection for <fixed-case>U</fixed-case>rdu-<fixed-case>E</fixed-case>nglish Code-mixed Text</title>
       <author><first>Bhaskara Hanuma</first><last>Vedula</last></author>
       <author><first>Prashant</first><last>Kodali</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <author><first>Ponnurangam</first><last>Kumaraguru</last></author>
       <pages>601-605</pages>
       <abstract>Code-mixing refers to the phenomenon of using two or more languages interchangeably within a speech or discourse context. This practice is particularly prevalent on social media platforms, and determining the embedded affects in a code-mixed sentence remains as a challenging problem. In this submission we describe our system for WASSA 2023 Shared Task on Emotion Detection in English-Urdu code-mixed text. In our system we implement a multiclass emotion detection model with label space of 11 emotions. Samples are code-mixed English-Urdu text, where Urdu is written in romanised form. Our submission is limited to one of the subtasks - Multi Class classification and we leverage transformer-based Multilingual Large Language Models (MLLMs), XLM-RoBERTa and Indic-BERT. We fine-tune MLLMs on the released data splits, with and without pre-processing steps (translation to english), for classifying texts into the appropriate emotion category. Our methods did not surpass the baseline, and our submission is ranked sixth overall.</abstract>
diff --git a/data/xml/2023.wat.xml b/data/xml/2023.wat.xml
index bfd7c55381..10a2e77282 100644
--- a/data/xml/2023.wat.xml
+++ b/data/xml/2023.wat.xml
@@ -11,7 +11,7 @@
       <editor><first>Shohei</first><last>Higashiyama</last></editor>
       <editor><first>Shantipriya</first><last>Parida</last></editor>
       <editor><first>Makoto</first><last>Morishita</last></editor>
-      <editor><first>Ondrej</first><last>Bojar</last></editor>
+      <editor id="ondrej-bojar"><first>Ondrej</first><last>Bojar</last></editor>
       <editor><first>Akiko</first><last>Eriguchi</last></editor>
       <editor><first>Yusuke</first><last>Oda</last></editor>
       <editor><first>Akiko</first><last>Eriguchi</last></editor>
diff --git a/data/xml/2023.wiesp.xml b/data/xml/2023.wiesp.xml
index 8d9cec34d8..04552af9c4 100644
--- a/data/xml/2023.wiesp.xml
+++ b/data/xml/2023.wiesp.xml
@@ -97,7 +97,7 @@
       <author><first>Pranav</first><last>Khetarpal</last></author>
       <author><first>Sharaf</first><last>Zaman</last></author>
       <author><first>David</first><last>Brodrick</last></author>
-      <author><first>Sergio J.</first><last>Rodriguez Mendez</last></author>
+      <author id="sergio-jose-rodriguez-mendez"><first>Sergio J.</first><last>Rodriguez Mendez</last></author>
       <author><first>Thang</first><last>Bui</last></author>
       <author><first>Alyssa</first><last>Goodman</last></author>
       <author><first>Alberto</first><last>Accomazzi</last></author>
@@ -136,7 +136,7 @@
       <author><first>Elena</first><last>Callegari</last></author>
       <author><first>Peter</first><last>Vajdecka</last></author>
       <author><first>Desara</first><last>Xhura</last></author>
-      <author><first>Anton Karl</first><last>Ingason</last></author>
+      <author id="anton-karl-ingason"><first>Anton Karl</first><last>Ingason</last></author>
       <pages>74–83</pages>
       <url hash="6e4592bf">2023.wiesp-1.10</url>
       <bibkey>callegari-etal-2023-enhancing</bibkey>
diff --git a/data/xml/2023.wmt.xml b/data/xml/2023.wmt.xml
index ebeef332ae..4b8c1c98a9 100644
--- a/data/xml/2023.wmt.xml
+++ b/data/xml/2023.wmt.xml
@@ -23,7 +23,7 @@
       <author><first>Tom</first><last>Kocmi</last></author>
       <author><first>Eleftherios</first><last>Avramidis</last></author>
       <author><first>Rachel</first><last>Bawden</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <author><first>Anton</first><last>Dvorkovich</last></author>
       <author><first>Christian</first><last>Federmann</last></author>
       <author><first>Mark</first><last>Fishel</last></author>
@@ -39,7 +39,7 @@
       <author><first>Masaaki</first><last>Nagata</last></author>
       <author><first>Toshiaki</first><last>Nakazawa</last></author>
       <author><first>Martin</first><last>Popel</last></author>
-      <author><first>Maja</first><last>Popović</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
       <author><first>Mariya</first><last>Shmatova</last></author>
       <author><first>Jun</first><last>Suzuki</last></author>
       <pages>1–42</pages>
@@ -51,10 +51,10 @@
     <paper id="2">
       <title>Findings of the <fixed-case>WMT</fixed-case> 2023 Biomedical Translation Shared Task: Evaluation of <fixed-case>C</fixed-case>hat<fixed-case>GPT</fixed-case> 3.5 as a Comparison System</title>
       <author><first>Mariana</first><last>Neves</last></author>
-      <author><first>Antonio</first><last>Jimeno Yepes</last></author>
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="antonio-jimeno-yepes"><first>Antonio</first><last>Jimeno Yepes</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <author><first>Rachel</first><last>Bawden</last></author>
-      <author><first>Giorgio Maria</first><last>Di Nunzio</last></author>
+      <author id="giorgio-maria-di-nunzio"><first>Giorgio Maria</first><last>Di Nunzio</last></author>
       <author><first>Roland</first><last>Roller</last></author>
       <author><first>Philippe</first><last>Thomas</last></author>
       <author><first>Federica</first><last>Vezzani</last></author>
@@ -82,7 +82,7 @@
       <author><first>Yufeng</first><last>Ma</last></author>
       <author><first>Weiyu</first><last>Chen</last></author>
       <author><first>Yvette</first><last>Graham</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <author><first>Philipp</first><last>Koehn</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <author><first>Yulin</first><last>Yuan</last></author>
@@ -99,17 +99,17 @@
       <author><first>Malihe</first><last>Alikhani</last></author>
       <author><first>Eleftherios</first><last>Avramidis</last></author>
       <author><first>Richard</first><last>Bowden</last></author>
-      <author><first>Annelies</first><last>Braffort</last></author>
+      <author id="annelies-braffort"><first>Annelies</first><last>Braffort</last></author>
       <author><first>Necati</first><last>Cihan Camgöz</last></author>
       <author><first>Sarah</first><last>Ebling</last></author>
       <author><first>Cristina</first><last>España-Bonet</last></author>
-      <author><first>Anne</first><last>Göhring</last></author>
+      <author id="anne-gohring"><first>Anne</first><last>Göhring</last></author>
       <author><first>Roman</first><last>Grundkiewicz</last></author>
       <author><first>Mert</first><last>Inan</last></author>
       <author><first>Zifan</first><last>Jiang</last></author>
       <author><first>Oscar</first><last>Koller</last></author>
       <author><first>Amit</first><last>Moryossef</last></author>
-      <author><first>Annette</first><last>Rios</last></author>
+      <author id="annette-rios-gonzales"><first>Annette</first><last>Rios</last></author>
       <author><first>Dimitar</first><last>Shterionov</last></author>
       <author><first>Sandra</first><last>Sidler-Miserez</last></author>
       <author><first>Katja</first><last>Tissi</last></author>
@@ -161,7 +161,7 @@
       <title><fixed-case>CUNI</fixed-case> at <fixed-case>WMT</fixed-case>23 General Translation Task: <fixed-case>MT</fixed-case> and a Genetic Algorithm</title>
       <author><first>Josef</first><last>Jon</last></author>
       <author><first>Martin</first><last>Popel</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>119–127</pages>
       <abstract>This paper presents the contributions of Charles University teams to the WMT23 General translation task (English to Czech and Czech to Ukrainian translation directions). Our main submission, CUNI-GA, is a result of applying a novel n-best list reranking and modification method on translation candidates produced by the two other submitted systems, CUNI-Transformer and CUNI-DocTransformer (document-level translation only used for the <tex-math>en \rightarrow cs</tex-math> direction). Our method uses a genetic algorithm and MBR decoding to search for optimal translation under a given metric (in our case, a weighted combination of ChrF, BLEU, COMET22-DA, and COMET22-QE-DA). Our submissions are first in the constrained track and show competitive performance against top-tier unconstrained systems across various automatic metrics.</abstract>
       <url hash="62329ec6">2023.wmt-1.8</url>
@@ -226,7 +226,7 @@
     </paper>
     <paper id="14">
       <title><fixed-case>MUNI</fixed-case>-<fixed-case>NLP</fixed-case> Submission for <fixed-case>C</fixed-case>zech-<fixed-case>U</fixed-case>krainian Translation Task at <fixed-case>WMT</fixed-case>23</title>
-      <author><first>Pavel</first><last>Rychly</last></author>
+      <author id="pavel-rychly"><first>Pavel</first><last>Rychly</last></author>
       <author><first>Yuliia</first><last>Teslia</last></author>
       <pages>162–165</pages>
       <abstract>The system is trained on officialy provided data only. We have heavily filtered all the data to remove machine translated text, Russian text and other noise. We use the DeepNorm modification of the transformer architecture in the TorchScale library with 18 encoder layers and 6 decoder layers. The initial systems for backtranslation uses HFT tokenizer, the final system uses custom tokenizer derived from HFT.</abstract>
@@ -307,7 +307,7 @@
     <paper id="21">
       <title><fixed-case>R</fixed-case>o<fixed-case>CS</fixed-case>-<fixed-case>MT</fixed-case>: Robustness Challenge Set for Machine Translation</title>
       <author><first>Rachel</first><last>Bawden</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <pages>198–216</pages>
       <abstract>RoCS-MT, a Robust Challenge Set for Machine Translation (MT), is designed to test MT systems’ ability to translate user-generated content (UGC) that displays non-standard characteristics, such as spelling errors, devowelling, acronymisation, etc. RoCS-MT is composed of English comments from Reddit, selected for their non-standard nature, which have been manually normalised and professionally translated into five languages: French, German, Czech, Ukrainian and Russian. In the context of the WMT23 test suite shared task, we analyse the models submitted to the general MT task for all from-English language pairs, offering some insights into the types of problems faced by state-of-the-art MT models when dealing with non-standard UGC texts. We compare automatic metrics for MT quality, including quality estimation to see if the same conclusions can be drawn without references. In terms of robustness, we find that many of the systems struggle with non-standard variants of words (e.g. due to phonetically inspired spellings, contraction, truncations, etc.), but that this depends on the system and the amount of training data, with the best overall systems performing better across all phenomena. GPT4 is the clear front-runner. However we caution against drawing conclusions about generalisation capacity as it and other systems could be trained on the source side of RoCS and also on similar data.</abstract>
       <url hash="c089f521">2023.wmt-1.21</url>
@@ -352,7 +352,7 @@
     <paper id="24">
       <title><fixed-case>IIIT</fixed-case> <fixed-case>HYD</fixed-case>’s Submission for <fixed-case>WMT</fixed-case>23 Test-suite Task</title>
       <author><first>Ananya</first><last>Mukherjee</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>246–251</pages>
       <abstract>This paper summarizes the results of our test suite evaluation on 12 machine translation systems submitted at the Shared Task of the 8th Conference of Machine Translation (WMT23) for English-German (en-de) language pair. Our test suite covers five specific domains (entertainment, environment, health, science, legal) and spans five distinct writing styles (descriptive, judgments, narrative, reporting, technical-writing). We present our analysis through automatic evaluation methods, conducted with a focus on domain-specific and writing style-specific evaluations.</abstract>
       <url hash="7dacf0f0">2023.wmt-1.24</url>
@@ -363,7 +363,7 @@
       <title>Test Suites Task: Evaluation of Gender Fairness in <fixed-case>MT</fixed-case> with <fixed-case>M</fixed-case>u<fixed-case>ST</fixed-case>-<fixed-case>SHE</fixed-case> and <fixed-case>INES</fixed-case></title>
       <author><first>Beatrice</first><last>Savoldi</last></author>
       <author><first>Marco</first><last>Gaido</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Luisa</first><last>Bentivogli</last></author>
       <pages>252–262</pages>
       <abstract>As part of the WMT-2023 “Test suites” shared task, in this paper we summarize the results of two test suites evaluations: MuST-SHEWMT23 and INES. By focusing on the en-de and de-en language pairs, we rely on these newly created test suites to investigate systems’ ability to translate feminine and masculine gender and produce gender-inclusive translations. Furthermore we discuss metrics associated with our test suites and validate them by means of human evaluations. Our results indicate that systems achieve reasonable and comparable performance in correctly translating both feminine and masculine gender forms for naturalistic gender phenomena. Instead, the generation of inclusive language forms in translation emerges as a challenging task for all the evaluated MT models, indicating room for future improvements and research on the topic. We make MuST-SHEWMT23 and INES freely available.</abstract>
@@ -455,7 +455,7 @@
       <author><first>Anqi</first><last>Zhao</last></author>
       <author><first>Kaiyu</first><last>Huang</last></author>
       <author><first>Hao</first><last>Yu</last></author>
-      <author><first>Degen</first><last>Huang</last></author>
+      <author id="degen-huang"><first>Degen</first><last>Huang</last></author>
       <pages>296–301</pages>
       <abstract>This paper describes the submission of DUTNLP Lab submission to WMT23 Discourse-Level Literary Translation in the Chinese to English translation direction under unconstrained conditions. Our primary system aims to leverage a large language model with various prompt strategies, which can fully investigate the potential capabilities of large language models for discourse-level neural machine translation. Moreover, we test a widely used discourse-level machine translation model, G-transformer, with different training strategies. In our experimental results, the method with large language models achieves a BLEU score of 28.16, while the fine-tuned method scores 25.26. These findings indicate that selecting appropriate prompt strategies based on large language models can significantly improve translation performance compared to traditional model training methods.</abstract>
       <url hash="1e832340">2023.wmt-1.31</url>
@@ -485,7 +485,7 @@
     <paper id="33">
       <title><fixed-case>TJUNLP</fixed-case>:System Description for the <fixed-case>WMT</fixed-case>23 Literary Task in <fixed-case>C</fixed-case>hinese to <fixed-case>E</fixed-case>nglish Translation Direction</title>
       <author><first>Shaolin</first><last>Zhu</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <pages>307–311</pages>
       <abstract>This paper introduces the overall situation of the Natural Language Processing Laboratory of Tianjin University participating in the WMT23 machine translation evaluation task from Chinese to English. For this evaluation, the base model used is a Transformer based on a Mixture of Experts (MOE) model. During the model’s construction and training, a basic dense model based on Transformer is first trained on the training set. Then, this model is used to initialize the MOE-based translation model, which is further trained on the training corpus. Since the training dataset provided for this translation task is relatively small, to better utilize sparse models to enhance translation, we employed a data augmentation technique for alignment. Experimental results show that this method can effectively improve neural machine translation performance.</abstract>
       <url hash="f94a5350">2023.wmt-1.33</url>
@@ -505,7 +505,7 @@
       <author><first>Ibrahima Sory</first><last>Conde</last></author>
       <author><first>Kalo Mory</first><last>Diané</last></author>
       <author><first>Chris</first><last>Piech</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <pages>312–343</pages>
       <abstract>Currently, there is no usable machine translation system for Nko, a language spoken by tens of millions of people across multiple West African countries, which holds significant cultural and educational value. To address this issue, we present a set of tools, resources, and baseline results aimed towards the development of usable machine translation systems for Nko and other languages that do not currently have sufficiently large parallel text corpora available. (1) Fria<tex-math>\parallel</tex-math>el: A novel collaborative parallel text curation software that incorporates quality control through copyedit-based workflows. (2) Expansion of the FLoRes-200 and NLLB-Seed corpora with 2,009 and 6,193 high-quality Nko translations in parallel with 204 and 40 other languages. (3) nicolingua-0005: A collection of trilingual and bilingual corpora with 130,850 parallel segments and monolingual corpora containing over 3 million Nko words. (4) Baseline bilingual and multilingual neural machine translation results with the best model scoring 30.83 English-Nko chrF++ on FLoRes-devtest.</abstract>
       <url hash="2cc5f887">2023.wmt-1.34</url>
@@ -567,7 +567,7 @@
       <author><first>Christian</first><last>Herold</last></author>
       <author><first>Pavel</first><last>Petrushkov</last></author>
       <author><first>Shahram</first><last>Khadivi</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>375–391</pages>
       <abstract>Despite the known limitations, most machine translation systems today still operate on the sentence-level. One reason for this is, that most parallel training data is only sentence-level aligned, without document-level meta information available. In this work, we set out to build context-aware translation systems utilizing document-level monolingual data instead. This can be achieved by combining any existing sentence-level translation model with a document-level language model. We improve existing approaches by leveraging recent advancements in model combination. Additionally, we propose novel weighting techniques that make the system combination more flexible and significantly reduce computational overhead. In a comprehensive evaluation on four diverse translation tasks, we show that our extensions improve document-targeted scores significantly and are also computationally more efficient. However, we also find that in most scenarios, back-translation gives even better results, at the cost of having to re-train the translation system. Finally, we explore language model fusion in the light of recent advancements in large language models. Our findings suggest that there might be strong potential in utilizing large language models via model combination.</abstract>
       <url hash="bfe181a6">2023.wmt-1.39</url>
@@ -578,7 +578,7 @@
       <title><fixed-case>C</fixed-case>hat<fixed-case>GPT</fixed-case> <fixed-case>MT</fixed-case>: Competitive for High- (but Not Low-) Resource Languages</title>
       <author><first>Nathaniel</first><last>Robinson</last></author>
       <author><first>Perez</first><last>Ogayo</last></author>
-      <author><first>David R.</first><last>Mortensen</last></author>
+      <author id="david-r-mortensen"><first>David R.</first><last>Mortensen</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
       <pages>392–418</pages>
       <abstract>Large language models (LLMs) implicitly learn to perform a range of language tasks, including machine translation (MT). Previous studies explore aspects of LLMs’ MT capabilities. However, there exist a wide variety of languages for which recent LLM MT performance has never before been evaluated. Without published experimental evidence on the matter, it is difficult for speakers of the world’s diverse languages to know how and whether they can use LLMs for their languages. We present the first experimental evidence for an expansive set of 204 languages, along with MT cost analysis, using the FLORES-200 benchmark. Trends reveal that GPT models approach or exceed traditional MT model performance for some high-resource languages (HRLs) but consistently lag for low-resource languages (LRLs), under-performing traditional MT for 84.1% of languages we covered. Our analysis reveals that a language’s resource level is the most important feature in determining ChatGPT’s relative ability to translate it, and suggests that ChatGPT is especially disadvantaged for LRLs and African languages.</abstract>
@@ -644,7 +644,7 @@
     <paper id="46">
       <title>Bridging the Gap between Position-Based and Content-Based Self-Attention for Neural Machine Translation</title>
       <author><first>Felix</first><last>Schmidt</last></author>
-      <author><first>Mattia</first><last>Di Gangi</last></author>
+      <author id="mattia-a-di-gangi"><first>Mattia</first><last>Di Gangi</last></author>
       <pages>507–521</pages>
       <abstract>Position-based token-mixing approaches, such as FNet and MLPMixer, have shown to be exciting attention alternatives for computer vision and natural language understanding. The motivation is usually to remove redundant operations for higher efficiency on consumer GPUs while maintaining Transformer quality. On the hardware side, research on memristive crossbar arrays shows the possibility of efficiency gains up to two orders of magnitude by performing in-memory computation with weights stored on device. While it is impossible to store dynamic attention weights based on token-token interactions on device, position-based weights represent a concrete alternative if they only lead to minimal degradation. In this paper, we propose position-based attention as a variant of multi-head attention where the attention weights are computed from position representations. A naive replacement of token vectors with position vectors in self-attention results in a significant loss in translation quality, which can be recovered by using relative position representations and a gating mechanism. We show analytically that this gating mechanism introduces some form of word dependency and validate its effectiveness experimentally under various conditions. The resulting network, rPosNet, outperforms previous position-based approaches and matches the quality of the Transformer with relative position embedding while requiring 20% less attention parameters after training.</abstract>
       <url hash="fefc1163">2023.wmt-1.46</url>
@@ -672,9 +672,9 @@
       <author><first>Christophe</first><last>Ropers</last></author>
       <author><first>Eric</first><last>Smith</last></author>
       <author><first>Adina</first><last>Williams</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <author><first>Pierre</first><last>Andrews</last></author>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
       <pages>536–550</pages>
       <abstract>Gender biases in language generation systems are challenging to mitigate. One possible source for these biases is gender representation disparities in the training and evaluation data. Despite recent progress in documenting this problem and many attempts at mitigating it, we still lack shared methodology and tooling to report gender representation in large datasets. Such quantitative reporting will enable further mitigation, e.g., via data augmentation. This paper describes the Gender-Gap Pipeline (for Gender-Aware Polyglot Pipeline), an automatic pipeline to characterize gender representation in large-scale datasets for 55 languages. The pipeline uses a multilingual lexicon of gendered person-nouns to quantify the gender representation in text. We showcase it to report gender representation in WMT training data and development data for the News task, confirming that current data is skewed towards masculine representation. Having unbalanced datasets may indirectly optimize our systems towards outperforming one gender over the others. We suggest introducing our gender quantification pipeline in current datasets and, ideally, modifying them toward a balanced representation.</abstract>
       <url hash="1b0f1b00">2023.wmt-1.48</url>
@@ -715,12 +715,12 @@
       <author><first>Ricardo</first><last>Rei</last></author>
       <author><first>Brian</first><last>Thompson</last></author>
       <author><first>Tom</first><last>Kocmi</last></author>
-      <author><first>Frederic</first><last>Blain</last></author>
+      <author id="frederic-blain"><first>Frederic</first><last>Blain</last></author>
       <author><first>Daniel</first><last>Deutsch</last></author>
       <author><first>Craig</first><last>Stewart</last></author>
       <author><first>Chrysoula</first><last>Zerva</last></author>
       <author><first>Sheila</first><last>Castilho</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <author><first>George</first><last>Foster</last></author>
       <pages>578–628</pages>
       <abstract>This paper presents the results of the WMT23 Metrics Shared Task. Participants submitting automatic MT evaluation metrics were asked to score the outputs of the translation systems competing in the WMT23 News Translation Task. All metrics were evaluated on how well they correlate with human ratings at the system and segment level. Similar to last year, we acquired our own human ratings based on expert-based human evaluation via Multidimensional Quality Metrics (MQM). Following last year’s success, we also included a challenge set subtask, where participants had to create contrastive test suites for evaluating metrics’ ability to capture and penalise specific types of translation errors. Furthermore, we improved our meta-evaluation procedure by considering fewer tasks and calculating a global score by weighted averaging across the various tasks. We present an extensive analysis on how well metrics perform on three language pairs: Chinese-English, Hebrew-English on the sentence-level and English-German on the paragraph-level. The results strongly confirm the results reported last year, that neural-based metrics are significantly better than non-neural metrics in their levels of correlation with human judgments. Further, we investigate the impact of bad reference translations on the correlations of metrics with human judgment. We present a novel approach for generating synthetic reference translations based on the collection of MT system outputs and their corresponding MQM ratings, which has the potential to mitigate bad reference issues we observed this year for some language pairs. Finally, we also study the connections between the magnitude of metric differences and their expected significance in human evaluation, which should help the community to better understand and adopt new metrics.</abstract>
@@ -730,18 +730,18 @@
     </paper>
     <paper id="52">
       <title>Findings of the <fixed-case>WMT</fixed-case> 2023 Shared Task on Quality Estimation</title>
-      <author><first>Frederic</first><last>Blain</last></author>
+      <author id="frederic-blain"><first>Frederic</first><last>Blain</last></author>
       <author><first>Chrysoula</first><last>Zerva</last></author>
       <author><first>Ricardo</first><last>Rei</last></author>
       <author><first>Nuno M.</first><last>Guerreiro</last></author>
       <author><first>Diptesh</first><last>Kanojia</last></author>
-      <author><first>José G.</first><last>C. de Souza</last></author>
+      <author id="jose-g-c-de-souza"><first>José G.</first><last>C. de Souza</last></author>
       <author><first>Beatriz</first><last>Silva</last></author>
       <author><first>Tânia</first><last>Vaz</last></author>
       <author><first>Yan</first><last>Jingxuan</last></author>
       <author><first>Fatemeh</first><last>Azadi</last></author>
-      <author><first>Constantin</first><last>Orasan</last></author>
-      <author><first>André</first><last>Martins</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orasan</last></author>
+      <author id="andre-f-t-martins"><first>André</first><last>Martins</last></author>
       <pages>629–653</pages>
       <abstract>We report the results of the WMT 2023 shared task on Quality Estimation, in which the challenge is to predict the quality of the output of neural machine translation systems at the word and sentence levels, without access to reference translations. This edition introduces a few novel aspects and extensions that aim to enable more fine-grained, and explainable quality estimation approaches. We introduce an updated quality annotation scheme using Multidimensional Quality Metrics to obtain sentence- and word-level quality scores for three language pairs. We also extend the provided data to new language pairs: we specifically target low-resource languages and provide training, development and test data for English-Hindi, English-Tamil, English-Telegu and English-Gujarati as well as a zero-shot test-set for English-Farsi. Further, we introduce a novel fine-grained error prediction task aspiring to motivate research towards more detailed quality predictions.</abstract>
       <url hash="c668a6f5">2023.wmt-1.52</url>
@@ -751,14 +751,14 @@
     <paper id="53">
       <title>Findings of the Word-Level <fixed-case>A</fixed-case>uto<fixed-case>C</fixed-case>ompletion Shared Task in <fixed-case>WMT</fixed-case> 2023</title>
       <author><first>Lemao</first><last>Liu</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <author><first>George</first><last>Foster</last></author>
       <author><first>Guoping</first><last>Huang</last></author>
       <author><first>Philipp</first><last>Koehn</last></author>
       <author><first>Geza</first><last>Kovacs</last></author>
       <author><first>Shuming</first><last>Shi</last></author>
       <author><first>Taro</first><last>Watanabe</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>654–662</pages>
       <abstract>This paper presents the overview of the second Word-Level autocompletion (WLAC) shared task for computer-aided translation, which aims to automatically complete a target word given a translation context including a human typed character sequence. We largely adhere to the settings of the previous round of the shared task, but with two main differences: 1) The typed character sequence is obtained from the typing process of human translators to demonstrate system performance under real-world scenarios when preparing some type of testing examples; 2) We conduct a thorough analysis on the results of the submitted systems from three perspectives. From the experimental results, we observe that translation tasks are helpful to improve the performance of WLAC models. Additionally, our further analysis shows that the semantic error accounts for a significant portion of all errors, and thus it would be promising to take this type of errors into account in future.</abstract>
       <url hash="34bb19c4">2023.wmt-1.53</url>
@@ -781,11 +781,11 @@
     </paper>
     <paper id="55">
       <title>Findings of the <fixed-case>WMT</fixed-case> 2023 Shared Task on Automatic Post-Editing</title>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
-      <author><first>Rajen</first><last>Chatterjee</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="rajen-chatterjee"><first>Rajen</first><last>Chatterjee</last></author>
       <author><first>Markus</first><last>Freitag</last></author>
       <author><first>Diptesh</first><last>Kanojia</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <pages>672–681</pages>
       <abstract>We present the results from the 9th round of the WMT shared task on MT Automatic Post-Editing, which consists of automatically correcting the output of a “black-box” machine translation system by learning from human corrections. Like last year, the task focused on English→Marathi, with data coming from multiple domains (healthcare, tourism, and general/news). Despite the consistent task framework, this year’s data proved to be extremely challenging. As a matter of fact, none of the official submissions from the participating teams succeeded in improving the quality of the already high-level initial translations (with baseline TER and BLEU scores of 26.6 and 70.66, respectively). Only one run, accepted as a “late” submission, achieved automatic evaluation scores that exceeded the baseline.</abstract>
@@ -856,7 +856,7 @@
     </paper>
     <paper id="61">
       <title>e<fixed-case>BLEU</fixed-case>: Unexpectedly Good Machine Translation Evaluation Using Simple Word Embeddings</title>
-      <author><first>Muhammad</first><last>ElNokrashy</last></author>
+      <author id="muhammad-elnokrashy"><first>Muhammad</first><last>ElNokrashy</last></author>
       <author><first>Tom</first><last>Kocmi</last></author>
       <pages>746–750</pages>
       <abstract>We propose eBLEU, a metric inspired by BLEU metric that uses embedding similarities instead of string matches. We introduce meaning diffusion vectors to enable matching n-grams of semantically similar words in a BLEU-like algorithm, using efficient, non-contextual word embeddings like fastText. On WMT23 data, eBLEU beats BLEU and ChrF by around 3.8% system-level score, approaching BERTScore at −0.9% absolute difference. In WMT22 scenarios, eBLEU outperforms f101spBLEU and ChrF in MQM by 2.2%−3.6%. Curiously, on MTurk evaluations, eBLEU surpasses past methods by 3.9%−8.2% (f200spBLEU, COMET-22). eBLEU presents an interesting middle-ground between traditional metrics and pretrained metrics.</abstract>
@@ -913,7 +913,7 @@
     <paper id="66">
       <title><fixed-case>MEE</fixed-case>4 and <fixed-case>XL</fixed-case>sim : <fixed-case>IIIT</fixed-case> <fixed-case>HYD</fixed-case>’s Submissions’ for <fixed-case>WMT</fixed-case>23 Metrics Shared Task</title>
       <author><first>Ananya</first><last>Mukherjee</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>800–805</pages>
       <abstract>This paper presents our contributions to the WMT2023 shared metrics task, consisting of two distinct evaluation approaches: a) Unsupervised Metric (MEE4) and b) Supervised Metric (XLSim). MEE4 represents an unsupervised, reference-based assessment metric that quantifies linguistic features, encompassing lexical, syntactic, semantic, morphological, and contextual similarities, leveraging embeddings. In contrast, XLsim is a supervised reference-based evaluation metric, employing a Siamese Architecture, which regresses on Direct Assessments (DA) from previous WMT News Translation shared tasks from 2017-2022. XLsim is trained using XLM-RoBERTa (base) on English-German reference and mt pairs with human scores.</abstract>
       <url hash="985414ea">2023.wmt-1.66</url>
@@ -984,7 +984,7 @@
       <author><first>Yu</first><last>Zhang</last></author>
       <author><first>Shimin</first><last>Tao</last></author>
       <author><first>Hao</first><last>Yang</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
       <author><first>Shujian</first><last>Huang</last></author>
       <pages>829–834</pages>
       <abstract>We introduce the submissions of the NJUNLP team to the WMT 2023 Quality Estimation (QE) shared task. Our team submitted predictions for the English-German language pair on all two sub-tasks: (i) sentence- and word-level quality prediction; and (ii) fine-grained error span detection. This year, we further explore pseudo data methods for QE based on NJUQE framework (https://github.com/NJUNLP/njuqe). We generate pseudo MQM data using parallel data from the WMT translation task. We pre-train the XLMR large model on pseudo QE data, then fine-tune it on real QE data. At both stages, we jointly learn sentence-level scores and word-level tags. Empirically, we conduct experiments to find the key hyper-parameters that improve the performance. Technically, we propose a simple method that covert the word-level outputs to fine-grained error span results. Overall, our models achieved the best results in English-German for both word-level and fine-grained error span detection sub-tasks by a considerable margin.</abstract>
@@ -1014,9 +1014,9 @@
       <author><first>JosÃ©</first><last>Pombal</last></author>
       <author><first>Daan</first><last>van Stigt</last></author>
       <author><first>Marcos</first><last>Treviso</last></author>
-      <author><first>Luisa</first><last>Coheur</last></author>
-      <author><first>José G.</first><last>C. de Souza</last></author>
-      <author><first>André</first><last>Martins</last></author>
+      <author id="luisa-coheur"><first>Luisa</first><last>Coheur</last></author>
+      <author id="jose-g-c-de-souza"><first>José G.</first><last>C. de Souza</last></author>
+      <author id="andre-f-t-martins"><first>André</first><last>Martins</last></author>
       <pages>841–848</pages>
       <abstract>We present the joint contribution of Unbabel and Instituto Superior Técnico to the WMT 2023 Shared Task on Quality Estimation (QE). Our team participated on all tasks: Sentence- and Word-level Quality Prediction and Fine-grained error span detection. For all tasks we build on the CometKiwi model (rei et al. 2022). Our multilingual approaches are ranked first for all tasks, reaching state-of-the-art performance for quality estimation at word-, span- and sentence-level granularity. Compared to the previous state-of-the-art, CometKiwi, we show large improvements in correlation with human judgements (up to 10 Spearman points) and surpassing the second-best multilingual submission with up to 3.8 absolute points.</abstract>
       <url hash="c84fe212">2023.wmt-1.73</url>
@@ -1027,7 +1027,7 @@
       <title><fixed-case>S</fixed-case>urrey<fixed-case>AI</fixed-case> 2023 Submission for the Quality Estimation Shared Task</title>
       <author><first>Archchana</first><last>Sindhujan</last></author>
       <author><first>Diptesh</first><last>Kanojia</last></author>
-      <author><first>Constantin</first><last>Orasan</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orasan</last></author>
       <author><first>Tharindu</first><last>Ranasinghe</last></author>
       <pages>849–855</pages>
       <abstract>Quality Estimation (QE) systems are important in situations where it is necessary to assess the quality of translations, but there is no reference available. This paper describes the approach adopted by the SurreyAI team for addressing the Sentence-Level Direct Assessment shared task in WMT23. The proposed approach builds upon the TransQuest framework, exploring various autoencoder pre-trained language models within the MonoTransQuest architecture using single and ensemble settings. The autoencoder pre-trained language models employed in the proposed systems are XLMV, InfoXLM-large, and XLMR-large. The evaluation utilizes Spearman and Pearson correlation coefficients, assessing the relationship between machine-predicted quality scores and human judgments for 5 language pairs (English-Gujarati, English-Hindi, English-Marathi, English-Tamil and English-Telugu). The MonoTQ-InfoXLM-large approach emerges as a robust strategy, surpassing all other individual models proposed in this study by significantly improving over the baseline for the majority of the language pairs.</abstract>
@@ -1039,8 +1039,8 @@
       <title><fixed-case>MMT</fixed-case>’s Submission for the <fixed-case>WMT</fixed-case> 2023 Quality Estimation Shared Task</title>
       <author><first>Yulong</first><last>Wu</last></author>
       <author><first>Viktor</first><last>Schlegel</last></author>
-      <author><first>Daniel</first><last>Beck</last></author>
-      <author><first>Riza</first><last>Batista-Navarro</last></author>
+      <author id="daniel-beck"><first>Daniel</first><last>Beck</last></author>
+      <author id="riza-theresa-batista-navarro"><first>Riza</first><last>Batista-Navarro</last></author>
       <pages>856–862</pages>
       <abstract>This paper presents our submission to the WMT 2023 Quality Estimation (QE) shared task 1 (sentence-level subtask). We propose a straightforward training data augmentation approach aimed at improving the correlation between QE model predictions and human quality assessments. Utilising eleven data augmentation approaches and six distinct language pairs, we systematically create augmented training sets by individually applying each method to the original training set of each respective language pair. By evaluating the performance gap between the model before and after training on the augmented dataset, as measured on the development set, we assess the effectiveness of each augmentation method. Experimental results reveal that synonym replacement via the Paraphrase Database (PPDB) yields the most substantial performance boost for language pairs English-German, English-Marathi and English-Gujarati, while for the remaining language pairs, methods such as contextual word embeddings-based words insertion, back translation, and direct paraphrasing prove to be more effective. Training the model on a more diverse and larger set of samples does confer further performance improvements for certain language pairs, albeit to a marginal extent, and this phenomenon is not universally applicable. At the time of submission, we select the model trained on the augmented dataset constructed using the respective most effective method to generate predictions for the test set in each language pair, except for the English-German. Despite not being highly competitive, our system consistently surpasses the baseline performance on most language pairs and secures a third-place ranking in the English-Marathi.</abstract>
       <url hash="3b5ebd0e">2023.wmt-1.75</url>
@@ -1070,7 +1070,7 @@
       <title><fixed-case>PRHLT</fixed-case>’s Submission to <fixed-case>WLAC</fixed-case> 2023</title>
       <author><first>Angel</first><last>Navarro</last></author>
       <author><first>Miguel</first><last>Domingo</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <pages>877–881</pages>
       <abstract>This paper describes our submission to the Word-Level AutoCompletion shared task of WMT23. We participated in the English–German and German–English categories. We extended our last year segment-based interactive machine translation approach to address its weakness when no context is available. Additionally, we fine-tune the pre-trained mT5 large language model to be used for autocompletion.</abstract>
       <url hash="e61bd2b5">2023.wmt-1.78</url>
@@ -1116,7 +1116,7 @@
       <author><first>Yasmin</first><last>Moslem</last></author>
       <author><first>Gianfranco</first><last>Romani</last></author>
       <author><first>Mahdi</first><last>Molaei</last></author>
-      <author><first>John D.</first><last>Kelleher</last></author>
+      <author id="john-kelleher"><first>John D.</first><last>Kelleher</last></author>
       <author><first>Rejwanul</first><last>Haque</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <pages>902–911</pages>
@@ -1206,7 +1206,7 @@
       <author><first>Pranav</first><last>Gaikwad</last></author>
       <author><first>Meet</first><last>Doshi</last></author>
       <author><first>Sourabh</first><last>Deoghare</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>950–953</pages>
       <abstract>This paper is related to the submission of the CFILT-IITB team for the task called IndicMT in WMT23. The paper describes our MT systems submitted to the WMT23 IndicMT shared task. The task focused on MT system development from/to English and four low-resource North-East Indian languages, viz., Assamese, Khasi, Manipuri, and Mizo. We trained them on a small parallel corpus resulting in poor-quality systems. Therefore, we utilize transfer learning with the help of a large pre-trained multilingual NMT system. Since this approach produced the best results, we submitted our NMT models for the shared task using this approach.</abstract>
       <url hash="050f7f1e">2023.wmt-1.89</url>
@@ -1216,7 +1216,7 @@
     <paper id="90">
       <title>Low-Resource Machine Translation Systems for <fixed-case>I</fixed-case>ndic Languages</title>
       <author><first>Ivana</first><last>Kvapilíková</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>954–958</pages>
       <abstract>We present our submission to the WMT23 shared task in translation between English and Assamese, Khasi, Mizo and Manipuri. All our systems were pretrained on the task of multilingual masked language modelling and denoising auto-encoding. Our primary systems for translation into English were further pretrained for multilingual MT in all four language directions and fine-tuned on the limited parallel data available for each language pair separately. We used online back-translation for data augmentation. The same systems were submitted as contrastive for translation out of English as the multilingual MT pretraining step seemed to harm the translation performance. Our primary systems for translation out of English were trained without the multilingual MT pretraining step. Other contrastive systems used additional pseudo-parallel data mined from monolingual corpora for pretraining.</abstract>
       <url hash="e51c8a5d">2023.wmt-1.90</url>
@@ -1226,7 +1226,7 @@
     <paper id="91">
       <title><fixed-case>MUNI</fixed-case>-<fixed-case>NLP</fixed-case> Systems for Low-resource <fixed-case>I</fixed-case>ndic Machine Translation</title>
       <author><first>Edoardo</first><last>Signoroni</last></author>
-      <author><first>Pavel</first><last>Rychly</last></author>
+      <author id="pavel-rychly"><first>Pavel</first><last>Rychly</last></author>
       <pages>959–966</pages>
       <abstract>The WMT 2023 Shared Task on Low-Resource Indic Language Translation featured to and from Assamese, Khasi, Manipuri, Mizo on one side and English on the other. We submitted systems supervised neural machine translation systems for each pair and direction and experimented with different configurations and settings for both preprocessing and training. Even if most of them did not reach competitive performance, our experiments uncovered some interesting points for further investigation, namely the relation between dataset and model size, and the impact of the training framework. Moreover, the results of some of our preliminary experiments on the use of word embeddings initialization, backtranslation, and model depth were in contrast with previous work. The final results also show some disagreement in the automated metrics employed in the evaluation.</abstract>
       <url hash="968b7797">2023.wmt-1.91</url>
@@ -1238,8 +1238,8 @@
       <author><first>Kshetrimayum Boynao</first><last>Singh</last></author>
       <author><first>Ningthoujam Avichandra</first><last>Singh</last></author>
       <author><first>Loitongbam</first><last>Sanayai Meetei</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
-      <author><first>Thoudam Doren</first><last>Singh</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="thoudam-doren-singh"><first>Thoudam Doren</first><last>Singh</last></author>
       <pages>967–971</pages>
       <abstract>This paper describes the transformer-based Neural Machine translation (NMT) system for the Low-Resource Indic Language Translation task for the English-Manipuri language pair submitted by the Centre for Natural Language Processing in National Institute of Technology Silchar, India (NITS-CNLP) in the WMT 2023 shared task. The model attained an overall BLEU score of 22.75 and 26.92 for the English to Manipuri and Manipuri to English translations respectively. Experimental results for English to Manipuri and Manipuri to English models for character level n-gram F-score (chrF) of 48.35 and 48.64, RIBES of 0.61 and 0.65, TER of 70.02 and 67.62, as well as COMET of 0.70 and 0.66 respectively are reported.</abstract>
       <url hash="504ac42b">2023.wmt-1.92</url>
@@ -1251,7 +1251,7 @@
       <author><first>Dhairya</first><last>Suman</last></author>
       <author><first>Atanu</first><last>Mandal</last></author>
       <author><first>Santanu</first><last>Pal</last></author>
-      <author><first>Sudip</first><last>Naskar</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip</first><last>Naskar</last></author>
       <pages>972–977</pages>
       <abstract>Even though, machine translation has seen huge improvements in the the last decade, translation quality for Indic languages is still underwhelming, which is attributed to the small amount of parallel data available. In this paper, we present our approach to mitigate the issue of the low amount of parallel training data availability for Indic languages, especially for the language pair English-Manipuri and Assamese-English. Our primary submission for the Manipuri-to-English translation task provided the best scoring system for this language direction. We describe about the systems we built in detail and our findings in the process.</abstract>
       <url hash="f81f985b">2023.wmt-1.93</url>
@@ -1299,7 +1299,7 @@
       <author><first>Matthias</first><last>Sperber</last></author>
       <author><first>Hendra</first><last>Setiawan</last></author>
       <author><first>Dominic</first><last>Telaar</last></author>
-      <author><first>Saša</first><last>Hasan</last></author>
+      <author id="sasa-hasan"><first>Saša</first><last>Hasan</last></author>
       <pages>1014–1030</pages>
       <abstract>Behavioral testing in NLP allows fine-grained evaluation of systems by examining their linguistic capabilities through the analysis of input-output behavior. Unfortunately, existing work on behavioral testing in Machine Translation (MT) is currently restricted to largely handcrafted tests covering a limited range of capabilities and languages. To address this limitation, we propose to use Large Language Models (LLMs) to generate a diverse set of source sentences tailored to test the behavior of MT models in a range of situations. We can then verify whether the MT model exhibits the expected behavior through matching candidate sets that are also generated using LLMs. Our approach aims to make behavioral testing of MT systems practical while requiring only minimal human effort. In our experiments, we apply our proposed evaluation framework to assess multiple available MT systems, revealing that while in general pass-rates follow the trends observable from traditional accuracy-based metrics, our method was able to uncover several important differences and potential bugs that go unnoticed when relying only on accuracy.</abstract>
       <url hash="277d08ca">2023.wmt-1.97</url>
@@ -1336,10 +1336,10 @@
       <author><first>Daniel</first><last>Deutsch</last></author>
       <author><first>Mara</first><last>Finkelstein</last></author>
       <author><first>Parker</first><last>Riley</last></author>
-      <author><first>André</first><last>Martins</last></author>
+      <author id="andre-f-t-martins"><first>André</first><last>Martins</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
       <author><first>Ankush</first><last>Garg</last></author>
-      <author><first>Jonathan</first><last>Clark</last></author>
+      <author id="jonathan-h-clark"><first>Jonathan</first><last>Clark</last></author>
       <author><first>Markus</first><last>Freitag</last></author>
       <author><first>Orhan</first><last>Firat</last></author>
       <pages>1066–1083</pages>
diff --git a/data/xml/2023.wnu.xml b/data/xml/2023.wnu.xml
index a012e7a305..ee041c5d5a 100644
--- a/data/xml/2023.wnu.xml
+++ b/data/xml/2023.wnu.xml
@@ -42,7 +42,7 @@
     <paper id="4">
       <title>Evaluation Metrics for Depth and Flow of Knowledge in Non-fiction Narrative Texts</title>
       <author><first>Sachin</first><last>Pawar</last><affiliation>Tata Consultancy Services Ltd.</affiliation></author>
-      <author><first>Girish</first><last>Palshikar</last><affiliation>Tata Consultancy Services Limited</affiliation></author>
+      <author id="girish-palshikar"><first>Girish</first><last>Palshikar</last><affiliation>Tata Consultancy Services Limited</affiliation></author>
       <author><first>Ankita</first><last>Jain</last><affiliation>Tata Consultancy Services Ltd.</affiliation></author>
       <author><first>Mahesh</first><last>Singh</last><affiliation>Tata Consultancy Services Ltd.</affiliation></author>
       <author><first>Mahesh</first><last>Rangarajan</last><affiliation>Tata Consultancy Services Ltd.</affiliation></author>
@@ -58,7 +58,7 @@
     </paper>
     <paper id="5">
       <title>Modeling Readers’ Appreciation of Literary Narratives Through Sentiment Arcs and Semantic Profiles</title>
-      <author><first>Pascale</first><last>Moreira</last><affiliation>Comparative Literature, School of Communication and Culture, Aarhus University</affiliation></author>
+      <author id="pascale-feldkamp"><first>Pascale</first><last>Moreira</last><affiliation>Comparative Literature, School of Communication and Culture, Aarhus University</affiliation></author>
       <author><first>Yuri</first><last>Bizzoni</last><affiliation>Aarhus University</affiliation></author>
       <author><first>Kristoffer</first><last>Nielbo</last><affiliation>Center for Humanities Computing, Aarhus University</affiliation></author>
       <author><first>Ida Marie</first><last>Lassen</last><affiliation>Center for Humanities Computing, Aarhus University</affiliation></author>
@@ -73,7 +73,7 @@
       <title>Word Category Arcs in Literature Across Languages and Genres</title>
       <author><first>Winston</first><last>Wu</last><affiliation>University of Michigan</affiliation></author>
       <author><first>Lu</first><last>Wang</last><affiliation>University of Michigan</affiliation></author>
-      <author><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
       <pages>36-47</pages>
       <url hash="02d61f23">2023.wnu-1.6</url>
       <bibkey>wu-etal-2023-word</bibkey>
@@ -153,7 +153,7 @@
     <paper id="16">
       <title>Composition and Deformance: Measuring Imageability with a Text-to-Image Model</title>
       <author><first>Si</first><last>Wu</last><affiliation>Northeastern University</affiliation></author>
-      <author><first>David</first><last>Smith</last><affiliation>Northeastern University</affiliation></author>
+      <author id="david-a-smith"><first>David</first><last>Smith</last><affiliation>Northeastern University</affiliation></author>
       <pages>106-117</pages>
       <abstract>Although psycholinguists and psychologists have long studied the tendency of linguistic strings to evoke mental images in hearers or readers, most computational studies have applied this concept of imageability only to isolated words. Using recent developments in text-to-image generation models, such as DALLE mini, we propose computational methods that use generated images to measure the imageability of both single English words and connected text. We sample text prompts for image generation from three corpora: human-generated image captions, news article sentences, and poem lines. We subject these prompts to different deformances to examine the model’s ability to detect changes in imageability caused by compositional change. We find high correlation between the proposed computational measures of imageability and human judgments of individual words. We also find the proposed measures more consistently respond to changes in compositionality than baseline approaches. We discuss possible effects of model training and implications for the study of compositionality in text-to-image models.</abstract>
       <url hash="1acf032a">2023.wnu-1.16</url>
@@ -163,7 +163,7 @@
     <paper id="19">
       <title>Narrative Cloze as a Training Objective: Towards Modeling Stories Using Narrative Chain Embeddings</title>
       <author><first>Hans Ole</first><last>Hatzel</last><affiliation>Universitt Hamburg</affiliation></author>
-      <author><first>Chris</first><last>Biemann</last><affiliation>Universitt Hamburg</affiliation></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last><affiliation>Universitt Hamburg</affiliation></author>
       <pages>118-127</pages>
       <abstract>We present a novel approach to modeling narratives using narrative chain embeddings.A new dataset of narrative chains extracted from German news texts is presented. With neural methods, we produce models for both German and English that achieve state-of-the-art performance on the Multiple Choice Narrative Cloze task. Subsequently, we perform an extrinsic evaluation of the embeddings our models produce and show that they perform rather poorly in identifying narratively similar texts. We explore some of the reasons for this underperformance and discuss the upsides of our approach. We provide an outlook on alternative ways to model narratives, as well as techniques for evaluating such models.</abstract>
       <url hash="59195cac">2023.wnu-1.19</url>
diff --git a/data/xml/2023.woah.xml b/data/xml/2023.woah.xml
index cc785b13cf..736d52ff94 100644
--- a/data/xml/2023.woah.xml
+++ b/data/xml/2023.woah.xml
@@ -20,7 +20,7 @@
       <author><first>Michael</first><last>Yoder</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Chloe</first><last>Perry</last><affiliation>University of Michigan</affiliation></author>
       <author><first>David</first><last>Brown</last><affiliation>Carnegie Mellon University</affiliation></author>
-      <author><first>Kathleen</first><last>Carley</last><affiliation>Carnegie Mellon University, Netanomics</affiliation></author>
+      <author id="kathleen-m-carley"><first>Kathleen</first><last>Carley</last><affiliation>Carnegie Mellon University, Netanomics</affiliation></author>
       <author><first>Meredith</first><last>Pruden</last><affiliation>Kennesaw State University</affiliation></author>
       <pages>1-13</pages>
       <abstract>Online communities of involuntary celibates (incels) are a prominent source of misogynist hate speech. In this paper, we use quantitative text and network analysis approaches to examine how identity groups are discussed on incels.is, the largest black-pilled incels forum. We find that this community produces a wide range of novel identity terms and, while terms for women are most common, mentions of other minoritized identities are increasing. An analysis of the associations made with identity groups suggests an essentialist ideology where physical appearance, as well as gender and racial hierarchies, determine human value. We discuss implications for research into automated misogynist hate speech detection.</abstract>
@@ -116,7 +116,7 @@
       <author><first>Svetlana</first><last>Kiritchenko</last><affiliation>National Research Council Canada</affiliation></author>
       <author><first>Georgina</first><last>Curto Rex</last><affiliation>University of Notre Dame</affiliation></author>
       <author><first>Isar</first><last>Nejadgholi</last><affiliation>National Research Council Canada</affiliation></author>
-      <author><first>Kathleen C.</first><last>Fraser</last><affiliation>National Research Council Canada</affiliation></author>
+      <author id="kathleen-c-fraser"><first>Kathleen C.</first><last>Fraser</last><affiliation>National Research Council Canada</affiliation></author>
       <pages>113-125</pages>
       <abstract>While many types of hate speech and online toxicity have been the focus of extensive research in NLP, toxic language stigmatizing poor people has been mostly disregarded. Yet, aporophobia, a social bias against the poor, is a common phenomenon online, which can be psychologically damaging as well as hindering poverty reduction policy measures. We demonstrate that aporophobic attitudes are indeed present in social media and argue that the existing NLP datasets and models are inadequate to effectively address this problem. Efforts toward designing specialized resources and novel socio-technical mechanisms for confronting aporophobia are needed.</abstract>
       <url hash="a53849d9">2023.woah-1.12</url>
@@ -126,7 +126,7 @@
     <paper id="13">
       <title>Problematic Webpage Identification: A Trilogy of Hatespeech, Search Engines and <fixed-case>GPT</fixed-case></title>
       <author><first>Ojasvin</first><last>Sood</last><affiliation>Microsoft</affiliation></author>
-      <author><first>Sandipan</first><last>Dandapat</last><affiliation>Microsoft</affiliation></author>
+      <author id="sandipan-dandapat"><first>Sandipan</first><last>Dandapat</last><affiliation>Microsoft</affiliation></author>
       <pages>126-137</pages>
       <abstract>In this paper, we introduce a fine-tuned transformer-based model focused on problematic webpage classification to identify webpages promoting hate and violence of various forms. Due to the unavailability of labelled problematic webpage data, first we propose a novel webpage data collection strategy which leverages well-studied short-text hate speech datasets. We have introduced a custom GPT-4 few-shot prompt annotation scheme taking various webpage features to label the prohibitively expensive webpage annotation task. The resulting annotated data is used to build our problematic webpage classification model. We report the accuracy (87.6% F1-score) of our webpage classification model and conduct a detailed comparison of it against other state-of-the-art hate speech classification model on problematic webpage identification task. Finally, we have showcased the importance of various webpage features in identifying a problematic webpage.</abstract>
       <url hash="b16cc094">2023.woah-1.13</url>
@@ -137,7 +137,7 @@
       <title>Concept-Based Explanations to Test for False Causal Relationships Learned by Abusive Language Classifiers</title>
       <author><first>Isar</first><last>Nejadgholi</last><affiliation>National Research Council Canada</affiliation></author>
       <author><first>Svetlana</first><last>Kiritchenko</last><affiliation>National Research Council Canada</affiliation></author>
-      <author><first>Kathleen C.</first><last>Fraser</last><affiliation>National Research Council Canada</affiliation></author>
+      <author id="kathleen-c-fraser"><first>Kathleen C.</first><last>Fraser</last><affiliation>National Research Council Canada</affiliation></author>
       <author><first>Esma</first><last>Balkir</last><affiliation>National Research Council Canada</affiliation></author>
       <pages>138-149</pages>
       <abstract>Classifiers tend to learn a false causal relationship between an over-represented concept and a label, which can result in over-reliance on the concept and compromised classification accuracy. It is imperative to have methods in place that can compare different models and identify over-reliances on specific concepts. We consider three well-known abusive language classifiers trained on large English datasets and focus on the concept of negative emotions, which is an important signal but should not be learned as a sufficient feature for the label of abuse. Motivated by the definition of global sufficiency, we first examine the unwanted dependencies learned by the classifiers by assessing their accuracy on a challenge set across all decision thresholds. Further, recognizing that a challenge set might not always be available, we introduce concept-based explanation metrics to assess the influence of the concept on the labels. These explanations allow us to compare classifiers regarding the degree of false global sufficiency they have learned between a concept and a label.</abstract>
@@ -149,7 +149,7 @@
       <title>“Female Astronaut: Because sandwiches won’t make themselves up there”: Towards Multimodal misogyny detection in memes</title>
       <author><first>Smriti</first><last>Singh</last><affiliation>Manipal Institute of Technology</affiliation></author>
       <author><first>Amritha</first><last>Haridasan</last><affiliation>The University of Texas at Austin</affiliation></author>
-      <author><first>Raymond</first><last>Mooney</last><affiliation>University of Texas at Austin</affiliation></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last><affiliation>University of Texas at Austin</affiliation></author>
       <pages>150-159</pages>
       <abstract>A rise in the circulation of memes has led to the spread of a new form of multimodal hateful content. Unfortunately, the degree of hate women receive on the internet is disproportionately skewed against them. This, combined with the fact that multimodal misogyny is more challenging to detect as opposed to traditional text-based misogyny, signifies that the task of identifying misogynistic memes online is one of utmost importance. To this end, the MAMI dataset was released, consisting of 12000 memes annotated for misogyny and four sub-classes of misogyny - shame, objectification, violence and stereotype. While this balanced dataset is widely cited, we find that the task itself remains largely unsolved. Thus, in our work, we investigate the performance of multiple models in an effort to analyse whether domain specific pretraining helps model performance. We also investigate why even state of the art models find this task so challenging, and whether domain-specific pretraining can help. Our results show that pretraining BERT on hateful memes and leveraging an attention based approach with ViT outperforms state of the art models by more than 10%. Further, we provide insight into why these models may be struggling with this task with an extensive qualitative analysis of random samples from the test set.</abstract>
       <url hash="a94181f0">2023.woah-1.15</url>
@@ -222,7 +222,7 @@
       <title>Harmful Language Datasets: An Assessment of Robustness</title>
       <author><first>Katerina</first><last>Korre</last><affiliation>University of Bologna</affiliation></author>
       <author><first>John</first><last>Pavlopoulos</last><affiliation>Athens University of Economics and Business</affiliation></author>
-      <author><first>Jeffrey</first><last>Sorensen</last><affiliation>Google Jigsaw</affiliation></author>
+      <author id="jeffrey-sorensen"><first>Jeffrey</first><last>Sorensen</last><affiliation>Google Jigsaw</affiliation></author>
       <author><first>Léo</first><last>Laugier</last><affiliation>Swiss Federal Institute of Technology of Lausanne</affiliation></author>
       <author><first>Ion</first><last>Androutsopoulos</last><affiliation>Athens University of Economics and Business</affiliation></author>
       <author><first>Lucas</first><last>Dixon</last><affiliation>Google Research</affiliation></author>
@@ -236,7 +236,7 @@
     <paper id="25">
       <title>Robust Hate Speech Detection in Social Media: A Cross-Dataset Empirical Evaluation</title>
       <author><first>Dimosthenis</first><last>Antypas</last><affiliation>Cardiff University</affiliation></author>
-      <author><first>Jose</first><last>Camacho-Collados</last><affiliation>Cardiff University</affiliation></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last><affiliation>Cardiff University</affiliation></author>
       <pages>231-242</pages>
       <abstract>The automatic detection of hate speech online is an active research area in NLP. Most of the studies to date are based on social media datasets that contribute to the creation of hate speech detection models trained on them. However, data creation processes contain their own biases, and models inherently learn from these dataset-specific biases. In this paper, we perform a large-scale cross-dataset comparison where we fine-tune language models on different hate speech detection datasets. This analysis shows how some datasets are more generalizable than others when used as training data. Crucially, our experiments show how combining hate speech detection datasets can contribute to the development of robust hate speech detection models. This robustness holds even when controlling by data size and compared with the best individual datasets.</abstract>
       <url hash="9b8b4808">2023.woah-1.25</url>
diff --git a/data/xml/2023.wsc.xml b/data/xml/2023.wsc.xml
index db1ed47d03..91e637f0e3 100644
--- a/data/xml/2023.wsc.xml
+++ b/data/xml/2023.wsc.xml
@@ -21,7 +21,7 @@
       <author><first>Ashim</first><last>Gupta</last></author>
       <author><first>Deepak</first><last>Garasangi</last></author>
       <author><first>Jeevnesh</first><last>Sandhan</last></author>
-      <author><first>Pavankumar</first><last>Satuluri</last></author>
+      <author id="pavankumar-satuluri"><first>Pavankumar</first><last>Satuluri</last></author>
       <author><first>Pawan</first><last>Goyal</last></author>
       <pages>1–20</pages>
       <url hash="e2dd03df">2023.wsc-csdh.1</url>
diff --git a/data/xml/2024.acl.xml b/data/xml/2024.acl.xml
index 2a6cfe0541..235cfa7165 100644
--- a/data/xml/2024.acl.xml
+++ b/data/xml/2024.acl.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</booktitle>
       <editor><first>Lun-Wei</first><last>Ku</last><affiliation>Academia Sinica</affiliation></editor>
-      <editor><first>Andre</first><last>Martins</last><affiliation>Instituto Superior Técnico / Instituto de Telecomunicações / Unbabel</affiliation></editor>
+      <editor id="andre-f-t-martins"><first>Andre</first><last>Martins</last><affiliation>Instituto Superior Técnico / Instituto de Telecomunicações / Unbabel</affiliation></editor>
       <editor><first>Vivek</first><last>Srikumar</last><affiliation>University of Utah</affiliation></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Bangkok, Thailand</address>
@@ -24,7 +24,7 @@
       <author><first>Dan</first><last>Zhao</last><affiliation>Peng Cheng Laborotary</affiliation></author>
       <author><first>Xupeng</first><last>Miao</last><affiliation>Purdue University</affiliation></author>
       <author><first>Gabriele</first><last>Oliaro</last></author>
-      <author id="zhihao-zhang"><first>Zhihao</first><last>Zhang</last></author>
+      <author><first>Zhihao</first><last>Zhang</last></author>
       <author><first>Qing</first><last>Li</last><affiliation>Peng Cheng Laboratory</affiliation></author>
       <author><first>Yong</first><last>Jiang</last><affiliation>Tsinghua University</affiliation></author>
       <author><first>Zhihao</first><last>Jia</last><affiliation>School of Computer Science, Carnegie Mellon University</affiliation></author>
@@ -89,7 +89,7 @@
       <title><fixed-case>G</fixed-case>en<fixed-case>T</fixed-case>ranslate: Large Language Models are Generative Multilingual Speech and Machine Translators</title>
       <author><first>Yuchen</first><last>Hu</last></author>
       <author><first>Chen</first><last>Chen</last></author>
-      <author><first>Chao-Han Huck</first><last>Yang</last></author>
+      <author id="chao-han-huck-yang"><first>Chao-Han Huck</first><last>Yang</last></author>
       <author><first>Ruizhe</first><last>Li</last></author>
       <author><first>Dong</first><last>Zhang</last></author>
       <author><first>Zhehuai</first><last>Chen</last></author>
@@ -148,7 +148,7 @@
       <author><first>Mo</first><last>Yu</last><affiliation>WeChat AI, Tencent</affiliation></author>
       <author><first>Fandong</first><last>Meng</last><affiliation>WeChat AI, Tencent Inc.</affiliation></author>
       <author><first>Huawei</first><last>Shen</last><affiliation>Institute of Computing Technology, Chinese Academy of Sciences</affiliation></author>
-      <author><first>Xueqi</first><last>Cheng</last><affiliation>, Chinese Academy of Sciences</affiliation></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last><affiliation>, Chinese Academy of Sciences</affiliation></author>
       <author><first>Jie</first><last>Zhou</last></author>
       <pages>133-145</pages>
       <abstract>Retrieval-augmented generation (RAG) enhances large language models (LLMs) by incorporating additional information from retrieval. However, studies have shown that LLMs still face challenges in effectively using the retrieved information, even ignore it or be misled by it. The key reason is that the training of LLMs does not clearly make LLMs learn how to utilize input retrieved texts with varied quality. In this paper, we propose a novel perspective that considers the role of LLMs in RAG as “Information Refiner”, which means that regardless of correctness, completeness, or usefulness of retrieved texts, LLMs can consistently integrate knowledge within the retrieved texts and model parameters to generate the texts that are more concise, accurate, and complete than the retrieved texts. To this end, we propose an information refinement training method named INFO-RAG that optimizes LLMs for RAG in an unsupervised manner. INFO-RAG is low-cost and general across various tasks. Extensive experiments on zero-shot prediction of 11 datasets in diverse tasks including Question Answering, Slot-Filling, Language Modeling, Dialogue, and Code Generation show that INFO-RAG improves the performance of LLaMA2 by an average of 9.39% relative points. INFO-RAG also shows advantages in in-context learning and robustness of RAG.</abstract>
@@ -189,7 +189,7 @@
       <author><first>Mingfeng</first><last>Xue</last><affiliation>Sichuan University</affiliation></author>
       <author><first>Dayiheng</first><last>Liu</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Wei</first><last>Wang</last></author>
-      <author id="zheng-yuan"><first>Zheng</first><last>Yuan</last><affiliation>Alibaba Group</affiliation></author>
+      <author><first>Zheng</first><last>Yuan</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Chang</first><last>Zhou</last></author>
       <author><first>Jingren</first><last>Zhou</last><affiliation>Alibaba Group</affiliation></author>
       <pages>177-198</pages>
@@ -203,7 +203,7 @@
       <author><first>Shanshan</first><last>Xu</last><affiliation>Technische Universität München</affiliation></author>
       <author><first>Santosh</first><last>T.y.s.s</last><affiliation>Technische Universität München</affiliation></author>
       <author><first>Oana</first><last>Ichim</last><affiliation>University of Massachusetts at Amherst</affiliation></author>
-      <author><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München and IT University of Copenhagen</affiliation></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München and IT University of Copenhagen</affiliation></author>
       <author><first>Matthias</first><last>Grabmair</last><affiliation>Technische Universität München</affiliation></author>
       <pages>199-216</pages>
       <abstract>In legal decisions, split votes (SV) occur when judges cannot reach a unanimous decision, posing a difficulty for lawyers who must navigate diverse legal arguments and opinions. In high-stakes domains, %as human-AI interaction systems become increasingly important, understanding the alignment of perceived difficulty between humans and AI systems is crucial to build trust. However, existing NLP calibration methods focus on a classifier’s awareness of predictive performance, measured against the human majority class, overlooking inherent human label variation (HLV). This paper explores split votes as naturally observable human disagreement and value pluralism. We collect judges’ vote distributions from the European Court of Human Rights (ECHR), and present SV-ECHR, a case outcome classification (COC) dataset with SV information. We build a taxonomy of disagreement with SV-specific subcategories. We further assess the alignment of perceived difficulty between models and humans, as well as confidence- and human-calibration of COC models. We observe limited alignment with the judge vote distribution. To our knowledge, this is the first systematic exploration of calibration to human judgements in legal NLP. Our study underscores the necessity for further research on measuring and enhancing model calibration considering HLV in legal decision tasks.</abstract>
@@ -215,7 +215,7 @@
       <title>Inference to the Best Explanation in Large Language Models</title>
       <author><first>Dhairya</first><last>Dalal</last></author>
       <author><first>Marco</first><last>Valentino</last></author>
-      <author><first>Andre</first><last>Freitas</last><affiliation>Idiap Research Institute and University of Manchester</affiliation></author>
+      <author id="andre-freitas"><first>Andre</first><last>Freitas</last><affiliation>Idiap Research Institute and University of Manchester</affiliation></author>
       <author><first>Paul</first><last>Buitelaar</last><affiliation>University of Galway</affiliation></author>
       <pages>217-235</pages>
       <abstract>While Large Language Models (LLMs) have found success in real-world applications, their underlying explanatory process is still poorly understood. This paper proposes <i>IBE-Eval</i>, a framework inspired by philosophical accounts on <i>Inference to the Best Explanation (IBE)</i> to advance the interpretation and evaluation of LLMs’ explanations. <i>IBE-Eval</i> estimates the plausibility of natural language explanations through a combination of explicit logical and linguistic features including: <i>consistency</i>, <i>parsimony</i>, <i>coherence</i>, and <i>uncertainty</i>. Extensive experiments are conducted on <i>Causal Question Answering (CQA)</i>, where <i>IBE-Eval</i> is tasked to select the most plausible causal explanation amongst competing ones generated by LLMs (i.e., GPT 3.5 and Llama 2). The experiments reveal that <i>IBE-Eval</i> can successfully identify the best explanation with up to 77% accuracy (<tex-math>\approx 27\%</tex-math> above random), improving upon a GPT 3.5-as-a-Judge baseline (<tex-math>\approx+17\%</tex-math>) while being intrinsically more efficient and interpretable. Additional analyses suggest that, despite model-specific variances, LLM-generated explanations tend to conform to IBE criteria and that <i>IBE-Eval</i> is significantly correlated with human judgment, opening up opportunities for future development of automated explanation verification tools.</abstract>
@@ -258,7 +258,7 @@
       <author><first>Xiaoyang</first><last>Wang</last><affiliation>Tencent AI Lab</affiliation></author>
       <author><first>Hassan</first><last>Foroosh</last><affiliation>University of Central Florida</affiliation></author>
       <author><first>Dong</first><last>Yu</last><affiliation>Tencent AI Lab</affiliation></author>
-      <author id="fei-liu"><first>Fei</first><last>Liu</last><affiliation>Emory University</affiliation></author>
+      <author><first>Fei</first><last>Liu</last><affiliation>Emory University</affiliation></author>
       <pages>267-278</pages>
       <abstract>Large language models hold significant potential for integrating various data types, such as text documents and database records, for advanced analytics. However, blending text and numerical data presents substantial challenges. LLMs need to process and cross-reference entities and numbers, handle data inconsistencies and redundancies, and develop planning capabilities such as building a working memory for managing complex data queries. In this paper, we introduce four novel tasks centered around sports data analytics to evaluate the numerical reasoning and information fusion capabilities of LLMs. These tasks involve providing LLMs with detailed, play-by-play sports game descriptions, then challenging them with adversarial scenarios such as new game rules, longer durations, scrambled narratives, and analyzing key statistics in game summaries. We conduct extensive experiments on NBA and NFL games to assess the performance of LLMs on these tasks. Our benchmark, SportsMetrics, introduces a new mechanism for assessing LLMs’ numerical reasoning and fusion skills.</abstract>
       <url hash="bbf4a3ef">2024.acl-long.17</url>
@@ -345,7 +345,7 @@
       <author><first>Elliot</first><last>Pickens</last><affiliation>Department of Computer Science, University of Wisconsin - Madison</affiliation></author>
       <author><first>Coen</first><last>Needell</last></author>
       <author><first>David</first><last>Rothschild</last><affiliation>Research, Microsoft</affiliation></author>
-      <author><first>Maria Leonor</first><last>Pacheco</last><affiliation>University of Colorado at Boulder</affiliation></author>
+      <author id="maria-leonor-pacheco"><first>Maria Leonor</first><last>Pacheco</last><affiliation>University of Colorado at Boulder</affiliation></author>
       <pages>393-415</pages>
       <abstract>The mainstream media has much leeway in what it chooses to cover and how it covers it. These choices have real-world consequences on what people know and their subsequent behaviors. However, the lack of objective measures to evaluate editorial choices makes research in this area particularly difficult. In this paper, we argue that there are newsworthy topics where objective measures exist in the form of supporting data and propose a computational framework to analyze editorial choices in this setup. We focus on the economy because the reporting of economic indicators presents us with a relatively easy way to determine both the selection and framing of various publications. Their values provide a ground truth of how the economy is doing relative to how the publications choose to cover it. To do this, we define frame prediction as a set of interdependent tasks. At the article level, we learn to identify the reported stance towards the general state of the economy. Then, for every numerical quantity reported in the article, we learn to identify whether it corresponds to an economic indicator and whether it is being reported in a positive or negative way. To perform our analysis, we track six American publishers and each article that appeared in the top 10 slots of their landing page between 2015 and 2023.</abstract>
       <url hash="09aa02ed">2024.acl-long.24</url>
@@ -487,8 +487,8 @@
       <author><first>Jiatong</first><last>Shi</last></author>
       <author><first>Shinji</first><last>Watanabe</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Graham</first><last>Neubig</last><affiliation>Carnegie Mellon University</affiliation></author>
-      <author><first>David</first><last>Mortensen</last><affiliation>Carnegie Mellon University</affiliation></author>
-      <author><first>Lori</first><last>Levin</last><affiliation>School of Computer Science, Carnegie Mellon University</affiliation></author>
+      <author id="david-r-mortensen"><first>David</first><last>Mortensen</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last><affiliation>School of Computer Science, Carnegie Mellon University</affiliation></author>
       <pages>568-582</pages>
       <abstract>Thousands of the world’s languages are in danger of extinction—a tremendous threat to cultural identities and human language diversity. Interlinear Glossed Text (IGT) is a form of linguistic annotation that can support documentation and resource creation for these languages’ communities. IGT typically consists of (1) transcriptions, (2) morphological segmentation, (3) glosses, and (4) free translations to a majority language. We propose Wav2Gloss: a task in which these four annotation components are extracted automatically from speech, and introduce the first dataset to this end, Fieldwork: a corpus of speech with all these annotations, derived from the work of field linguists, covering 37 languages, with standard formatting, and train/dev/test splits. We provide various baselines to lay the groundwork for future research on IGT generation from speech, such as end-to-end versus cascaded, monolingual versus multilingual, and single-task versus multi-task approaches.</abstract>
       <url hash="78dc87cc">2024.acl-long.34</url>
@@ -556,7 +556,7 @@
       <author><first>Cliff</first><last>Wong</last><affiliation>Research, Microsoft</affiliation></author>
       <author><first>Tristan</first><last>Naumann</last><affiliation>Microsoft Research</affiliation></author>
       <author><first>Hoifung</first><last>Poon</last><affiliation>Microsoft</affiliation></author>
-      <author><first>Carolyn</first><last>Rose</last><affiliation>School of Computer Science, Carnegie Mellon University</affiliation></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rose</last><affiliation>School of Computer Science, Carnegie Mellon University</affiliation></author>
       <pages>649-679</pages>
       <abstract>Medical text generation aims to assist with administrative work and highlight salient information to support decision-making.To reflect the specific requirements of medical text, in this paper, we propose a set of metrics to evaluate the completeness, conciseness, and attribution of the generated text at a fine-grained level. The metrics can be computed by various types of evaluators including instruction-following (both proprietary and open-source) and supervised entailment models. We demonstrate the effectiveness of the resulting framework, DocLens, with three evaluators on three tasks: clinical note generation, radiology report summarization, and patient question summarization. A comprehensive human study shows that DocLens exhibits substantially higher agreement with the judgments of medical experts than existing metrics. The results also highlight the need to improve open-source evaluators and suggest potential directions. We released the code at https://github.com/yiqingxyq/DocLens.</abstract>
       <url hash="798c42b8">2024.acl-long.39</url>
@@ -609,7 +609,7 @@
       <author><first>Sreyan</first><last>Ghosh</last></author>
       <author><first>Utkarsh</first><last>Tyagi</last></author>
       <author><first>Sonal</first><last>Kumar</last></author>
-      <author><first>Chandra Kiran</first><last>Evuru</last></author>
+      <author id="chandra-kiran-reddy-evuru"><first>Chandra Kiran</first><last>Evuru</last></author>
       <author><first>Ramaneswaran</first><last>S</last><affiliation>NVIDIA</affiliation></author>
       <author><first>S</first><last>Sakshi</last></author>
       <author><first>Dinesh</first><last>Manocha</last><affiliation>University of Maryland, College Park</affiliation></author>
@@ -629,7 +629,7 @@
       <author><first>Donald</first><last>Husa</last></author>
       <author><first>Naman</first><last>Goyal</last></author>
       <author><first>Abhinandan</first><last>Krishnan</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington, Facebook and Meta</affiliation></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington, Facebook and Meta</affiliation></author>
       <author><first>Madian</first><last>Khabsa</last><affiliation>Facebook</affiliation></author>
       <pages>749-775</pages>
       <abstract>We present Belebele, a multiple-choice machine reading comprehension (MRC) dataset spanning 122 language variants. Significantly expanding the language coverage of natural language understanding (NLU) benchmarks, this dataset enables the evaluation of text models in high-, medium-, and low-resource languages. Each question is based on a short passage from the FLORES-200 dataset and has four multiple-choice answers. The questions were carefully curated to discriminate between models with different levels of general language comprehension. The English dataset on its own proves difficult enough to challenge state-of-the-art language models. Being fully parallel, this dataset enables direct comparison of model performance across all languages. We use this dataset to evaluate the capabilities of multilingual masked language models (MLMs) and large language models (LLMs). We present extensive results and findings, notably that despite significant cross-lingual transfer in English-centric LLMs, much smaller MLMs pretrained on balanced multilingual data still understand far more languages. Overall, Belebele opens up new avenues for evaluating and analyzing the multilingual capabilities of NLP systems.</abstract>
@@ -695,8 +695,8 @@
     </paper>
     <paper id="49">
       <title>Tree-of-Counterfactual Prompting for Zero-Shot Stance Detection</title>
-      <author><first>Maxwell</first><last>Weinzierl</last><affiliation>University of Texas at Dallas</affiliation></author>
-      <author><first>Sanda</first><last>Harabagiu</last><affiliation>University of Texas at Dallas</affiliation></author>
+      <author id="maxwell-weinzierl"><first>Maxwell</first><last>Weinzierl</last><affiliation>University of Texas at Dallas</affiliation></author>
+      <author id="sanda-harabagiu"><first>Sanda</first><last>Harabagiu</last><affiliation>University of Texas at Dallas</affiliation></author>
       <pages>861-880</pages>
       <abstract>Stance detection enables the inference of attitudes from human communications. Automatic stance identification was mostly cast as a classification problem. However, stance decisions involve complex judgments, which can be nowadays generated by prompting Large Language Models (LLMs). In this paper we present a new method for stance identification which (1) relies on a new prompting framework, called Tree-of-Counterfactual prompting; (2) operates not only on textual communications, but also on images; (3) allows more than one stance object type; and (4) requires no examples of stance attribution, thus it is a “Tabula Rasa” Zero-Shot Stance Detection (TR-ZSSD) method. Our experiments indicate surprisingly promising results, outperforming fine-tuned stance detection systems.</abstract>
       <url hash="c82fc26a">2024.acl-long.49</url>
@@ -850,7 +850,7 @@
     </paper>
     <paper id="60">
       <title><fixed-case>RORA</fixed-case>: Robust Free-Text Rationale Evaluation</title>
-      <author><first>Zhengping</first><last>Jiang</last><affiliation>Johns Hopkins University</affiliation></author>
+      <author id="zheng-ping-jiang"><first>Zhengping</first><last>Jiang</last><affiliation>Johns Hopkins University</affiliation></author>
       <author><first>Yining</first><last>Lu</last><affiliation>University of Notre Dame</affiliation></author>
       <author><first>Hanjie</first><last>Chen</last><affiliation>Rice University</affiliation></author>
       <author><first>Daniel</first><last>Khashabi</last><affiliation>Johns Hopkins University</affiliation></author>
@@ -916,7 +916,7 @@
       <author><first>Liulu</first><last>Yue</last></author>
       <author><first>Bingyang</first><last>Ye</last><affiliation>Brandeis University</affiliation></author>
       <author><first>Kyeongmin</first><last>Rim</last></author>
-      <author><first>James</first><last>Pustejovsky</last><affiliation>Brandeis University</affiliation></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last><affiliation>Brandeis University</affiliation></author>
       <pages>1161-1172</pages>
       <abstract>Semantic textual similarity (STS) is a fundamental NLP task that measures the semantic similarity between a pair of sentences. In order to reduce the inherent ambiguity posed from the sentences, a recent work called Conditional STS (C-STS) has been proposed to measure the sentences’ similarity conditioned on a certain aspect. Despite the popularity of C-STS, we find that the current C-STS dataset suffers from various issues that could impede proper evaluation on this task. In this paper, we reannotate the C-STS validation set and observe an annotator discrepancy on 55% of the instances resulting from the annotation errors in the original label, ill-defined conditions, and the lack of clarity in the task definition. After a thorough dataset analysis, we improve the C-STS task by leveraging the models’ capability to understand the conditions under a QA task setting. With the generated answers, we present an automatic error identification pipeline that is able to identify annotation errors from the C-STS data with over 80% F1 score. We also propose a new method that largely improves the performance over baselines on the C-STS data by training the models with the answers. Finally we discuss the conditionality annotation based on the typed-feature structure (TFS) of entity types. We show in examples that the TFS is able to provide a linguistic foundation for constructing C-STS data with new conditions.</abstract>
       <url hash="2a9e922d">2024.acl-long.64</url>
@@ -1135,7 +1135,7 @@
       <author><first>Weitao</first><last>Li</last></author>
       <author><first>Junkai</first><last>Li</last></author>
       <author><first>Weizhi</first><last>Ma</last><affiliation>Tsinghua University</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <pages>1451-1466</pages>
       <abstract>Large language models (LLMs) exhibit powerful general intelligence across diverse scenarios, including their integration into chatbots. However, a vital challenge of LLM-based chatbots is that they may produce hallucinated content in responses, which significantly limits their applicability. Various efforts have been made to alleviate hallucination, such as retrieval augmented generation and reinforcement learning with human feedback, but most of them require additional training and data annotation. In this paper, we propose a novel post-hoc Citation-Enhanced Generation (CEG) approach combined with retrieval argumentation. Unlike previous studies that focus on preventing hallucinations during generation, our method addresses this issue in a post-hoc way. It incorporates a retrieval module to search for supporting documents relevant to the generated content, and employs a natural language inference-based citation generation module. Once the statements in the generated content lack of reference, our model can regenerate responses until all statements are supported by citations. Note that our method is a training-free plug-and-play plugin that is capable of various LLMs. Experiments on various hallucination-related datasets show our framework outperforms state-of-the-art methods in both hallucination detection and response regeneration on three benchmarks. Our code and datasets can be found at https://github.com/Tsinghua-dhy/CEG.</abstract>
       <url hash="70adc7f6">2024.acl-long.79</url>
@@ -1145,8 +1145,8 @@
     <paper id="80">
       <title>Transitive Consistency Constrained Learning for Entity-to-Entity Stance Detection</title>
       <author><first>Haoyang</first><last>Wen</last><affiliation>Carnegie Mellon University</affiliation></author>
-      <author><first>Eduard</first><last>Hovy</last><affiliation>University of Melbourne and Carnegie Mellon University</affiliation></author>
-      <author><first>Alexander</first><last>Hauptmann</last><affiliation>School of Computer Science, Carnegie Mellon University</affiliation></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last><affiliation>University of Melbourne and Carnegie Mellon University</affiliation></author>
+      <author id="alexander-g-hauptmann"><first>Alexander</first><last>Hauptmann</last><affiliation>School of Computer Science, Carnegie Mellon University</affiliation></author>
       <pages>1467-1480</pages>
       <abstract>Entity-to-entity stance detection identifies the stance between a pair of entities with a directed link that indicates the source, target and polarity. It is a streamlined task without the complex dependency structure for structural sentiment analysis, while it is more informative compared to most previous work assuming that the source is the author. Previous work performs entity-to-entity stance detection training on individual entity pairs. However, stances between inter-connected entity pairs may be correlated. In this paper, we propose transitive consistency constrained learning, which first finds connected entity pairs and their stances, and adds an additional objective to enforce the transitive consistency. We explore consistency training on both classification-based and generation-based models and conduct experiments to compare consistency training with previous work and large language models with in-context learning. Experimental results illustrate that the inter-correlation of stances in political news can be used to improve the entity-to-entity stance detection model, while overly strict consistency enforcement may have a negative impact. In addition, we find that large language models struggle with predicting link direction and neutral labels in this task.</abstract>
       <url hash="69b4a056">2024.acl-long.80</url>
@@ -1237,7 +1237,7 @@
       <author><first>Chenchen</first><last>Ye</last><affiliation>University of California, Los Angeles</affiliation></author>
       <author><first>Yunshan</first><last>Ma</last></author>
       <author><first>Lizi</first><last>Liao</last><affiliation>Singapore Management University</affiliation></author>
-      <author><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
       <pages>1588-1606</pages>
       <abstract>The digital landscape is rapidly evolving with an ever-increasing volume of online news, emphasizing the need for swift and precise analysis of complex events.We refer to the complex events composed of many news articles over an extended period as Temporal Complex Event (TCE). This paper proposes a novel approach using Large Language Models (LLMs) to systematically extract and analyze the event chain within TCE, characterized by their key points and timestamps. We establish a benchmark, named TCELongBench, to evaluate the proficiency of LLMs in handling temporal dynamics and understanding extensive text. This benchmark encompasses three distinct tasks - reading comprehension, temporal sequencing, and future event forecasting. In the experiment, we leverage retrieval-augmented generation (RAG) method and LLMs with long context window to deal with lengthy news articles of TCE. Our findings indicate that models with suitable retrievers exhibit comparable performance with those utilizing long context window.</abstract>
       <url hash="3550d710">2024.acl-long.87</url>
@@ -1286,7 +1286,7 @@
       <author><first>Qianhui</first><last>Wu</last><affiliation>Microsoft</affiliation></author>
       <author><first>Xufang</first><last>Luo</last><affiliation>Microsoft Research</affiliation></author>
       <author id="dongsheng-li-fudan"><first>Dongsheng</first><last>Li</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last><affiliation>Microsoft</affiliation></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last><affiliation>Microsoft</affiliation></author>
       <author><first>Yuqing</first><last>Yang</last><affiliation>Research, Microsoft</affiliation></author>
       <author><first>Lili</first><last>Qiu</last><affiliation>Microsoft</affiliation></author>
       <pages>1658-1677</pages>
@@ -1508,7 +1508,7 @@
       <author><first>Rui</first><last>Zheng</last></author>
       <author><first>Tao</first><last>Gui</last><affiliation>Fudan University</affiliation></author>
       <author><first>Qi</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>1932-1945</pages>
       <abstract>Supervised fine-tuning (SFT) is a crucial step for large language models (LLMs), enabling them to align with human instructions and enhance their capabilities in downstream tasks. Substantially increasing instruction data is a direct solution to align the model with a broader range of downstream tasks or notably improve its performance on a specific task. However, we find that large-scale increases in instruction data can damage the world knowledge previously stored in LLMs. To address this challenge, we propose LoRAMoE, a novelty framework that introduces several low-rank adapters (LoRA) and integrates them by using a router network, like a plugin version of Mixture of Experts (MoE). It freezes the backbone model and forces a portion of LoRAs to focus on leveraging world knowledge to solve downstream tasks, to alleviate world knowledge forgetting. Experimental results show that, as the instruction data increases, LoRAMoE can significantly improve the ability to process downstream tasks, while maintaining the world knowledge stored in the LLM. Our code is available at https://github.com/Ablustrund/LoRAMoE.</abstract>
       <url hash="3907b0a9">2024.acl-long.106</url>
@@ -1524,7 +1524,7 @@
       <author><first>Lifeng</first><last>Jin</last><affiliation>Tencent AI Lab</affiliation></author>
       <author><first>Linfeng</first><last>Song</last></author>
       <author><first>Haitao</first><last>Mi</last><affiliation>Tencent AI Lab</affiliation></author>
-      <author><first>Helen</first><last>Meng</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
+      <author id="helen-meng"><first>Helen</first><last>Meng</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <pages>1946-1965</pages>
       <abstract>Despite showing impressive abilities, large language models (LLMs) often struggle with factual inaccuracies, i.e., ”hallucinations”, even when they hold relevant knowledge. To mitigate these hallucinations, current approaches typically necessitate high-quality human factuality annotations. In this work, we explore Self-Alignment for Factuality, where we leverage the self-evaluation capability of an LLM to provide training signals that steer the model towards factuality. Specifically, we incorporate Self-Eval, a self-evaluation component, to prompt an LLM to validate the factuality of its own generated responses solely based on its internal knowledge. Additionally, we design Self-Knowledge Tuning (SK-Tuning) to augment the LLM’s self-evaluation ability by improving the model’s confidence estimation and calibration. We then utilize these self-annotated responses to fine-tune the model via Direct Preference Optimization algorithm. We show that the proposed self-alignment approach substantially enhances factual accuracy over Llama family models across three key knowledge-intensive tasks on TruthfulQA and BioGEN.</abstract>
       <url hash="1ca33576">2024.acl-long.107</url>
@@ -1613,8 +1613,8 @@
     <paper id="114">
       <title>Handling Ambiguity in Emotion: From Out-of-Domain Detection to Distribution Estimation</title>
       <author><first>Wen</first><last>Wu</last></author>
-      <author id="bo-li"><first>Bo</first><last>Li</last><affiliation>Google</affiliation></author>
-      <author><first>Chao</first><last>Zhang</last><affiliation>Tsinghua University and University College London</affiliation></author>
+      <author><first>Bo</first><last>Li</last><affiliation>Google</affiliation></author>
+      <author id="chao-zhang-tu"><first>Chao</first><last>Zhang</last><affiliation>Tsinghua University and University College London</affiliation></author>
       <author><first>Chung-Cheng</first><last>Chiu</last><affiliation>Google</affiliation></author>
       <author><first>Qiujia</first><last>Li</last><affiliation>Google</affiliation></author>
       <author><first>Junwen</first><last>Bai</last><affiliation>Google</affiliation></author>
@@ -1641,7 +1641,7 @@
       <title>Learning Disentangled Semantic Spaces of Explanations via Invertible Neural Networks</title>
       <author><first>Yingji</first><last>Zhang</last></author>
       <author><first>Danilo</first><last>Carvalho</last><affiliation>University of Manchester</affiliation></author>
-      <author><first>Andre</first><last>Freitas</last><affiliation>Idiap Research Institute and University of Manchester</affiliation></author>
+      <author id="andre-freitas"><first>Andre</first><last>Freitas</last><affiliation>Idiap Research Institute and University of Manchester</affiliation></author>
       <pages>2113-2134</pages>
       <abstract>Disentangled latent spaces usually have better semantic separability and geometrical properties, which leads to better interpretability and more controllable data generation. While this has been well investigated in Computer Vision, in tasks such as image disentanglement, in the NLP domain, sentence disentanglement is still comparatively under-investigated. Most previous work have concentrated on disentangling task-specific generative factors, such as sentiment, within the context of style transfer. In this work, we focus on a more general form of sentence disentanglement, targeting the localised modification and control of more general sentence semantic features. To achieve this, we contribute to a novel notion of sentence semantic disentanglement and introduce a flow-based invertible neural network (INN) mechanism integrated with a transformer-based language Autoencoder (AE) in order to deliver latent spaces with better separability properties. Experimental results demonstrate that the model can conform the distributed latent space into a better semantically disentangled sentence space, leading to improved language interpretability and controlled generation when compared to the recent state-of-the-art language VAE models.</abstract>
       <url hash="f58aea84">2024.acl-long.116</url>
@@ -1681,7 +1681,7 @@
       <author><first>Yilong</first><last>Wu</last></author>
       <author><first>Qi</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Tao</first><last>Gui</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>2181-2211</pages>
       <abstract>Tool learning is widely acknowledged as a foundational approach or deploying large language models (LLMs) in real-world scenarios. While current research primarily emphasizes leveraging tools to augment LLMs, it frequently neglects emerging safety considerations tied to their application. To fill this gap, we present <tex-math>ToolSword</tex-math>, a comprehensive framework dedicated to meticulously investigating safety issues linked to LLMs in tool learning. Specifically, ToolSword delineates six safety scenarios for LLMs in tool learning, encompassing <tex-math>malicious</tex-math> <tex-math>queries</tex-math> and <tex-math>jailbreak</tex-math> <tex-math>attacks</tex-math> in the input stage, <tex-math>noisy</tex-math> <tex-math>misdirection</tex-math> and <tex-math>risky</tex-math> <tex-math>cues</tex-math> in the execution stage, and <tex-math>harmful</tex-math> <tex-math>feedback</tex-math> and <tex-math>error</tex-math> <tex-math>conflicts</tex-math> in the output stage. Experiments conducted on 11 open-source and closed-source LLMs reveal enduring safety challenges in tool learning, such as handling harmful queries, employing risky tools, and delivering detrimental feedback, which even GPT-4 is susceptible to. Moreover, we conduct further studies with the aim of fostering research on tool learning safety. The data will be released upon acceptance of the paper.</abstract>
       <url hash="3c7a930a">2024.acl-long.119</url>
@@ -1707,7 +1707,7 @@
       <author><first>Rui</first><last>Zheng</last></author>
       <author><first>Tao</first><last>Gui</last><affiliation>Fudan University</affiliation></author>
       <author><first>Qi</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>2227-2239</pages>
       <abstract>The principle of continual relation extraction (CRE) involves adapting to emerging novel relations while preserving old knowledge. Existing CRE approaches excel in preserving old knowledge but falter when confronted with contaminated data streams, likely due to an artificial assumption of no annotation errors. Recognizing the prevalence of noisy labels in real-world datasets, we introduce a more practical learning scenario, termed as <i>noisy-CRE</i>. In response to this challenge, we propose a noise-resistant contrastive framework called Noise-guided Attack in Contrastive Learning (NaCL), aimed at learning incremental corrupted relations. Diverging from conventional approaches like sample discarding or relabeling in the presence of noisy labels, NaCL takes a transformative route by modifying the feature space through targeted attack. This attack aims to align the feature space with the provided, albeit inaccurate, labels, thereby enhancing contrastive representations. Extensive empirical validations demonstrate the consistent performance improvement of NaCL with increasing noise rates, surpassing state-of-the-art methods.</abstract>
       <url hash="ad090a9c">2024.acl-long.121</url>
@@ -1733,7 +1733,7 @@
       <author><first>Leon</first><last>Weber-Genzel</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <author><first>Siyao</first><last>Peng</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <author><first>Marie-Catherine</first><last>De Marneffe</last><affiliation>UCLouvain</affiliation></author>
-      <author><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München and IT University of Copenhagen</affiliation></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München and IT University of Copenhagen</affiliation></author>
       <pages>2256-2269</pages>
       <abstract>Human label variation arises when annotators assign different labels to the same item for valid reasons, while annotation errors occur when labels are assigned for invalid reasons. These two issues are prevalent in NLP benchmarks, yet existing research has studied them in isolation. To the best of our knowledge, there exists no prior work that focuses on teasing apart error from signal, especially in cases where signal is beyond black-and-white.To fill this gap, we introduce a systematic methodology and a new dataset, VariErr (variation versus error), focusing on the NLI task in English. We propose a 2-round annotation procedure with annotators explaining each label and subsequently judging the validity of label-explanation pairs.VariErr contains 7,732 validity judgments on 1,933 explanations for 500 re-annotated MNLI items. We assess the effectiveness of various automatic error detection (AED) methods and GPTs in uncovering errors versus human label variation. We find that state-of-the-art AED methods significantly underperform GPTs and humans. While GPT-4 is the best system, it still falls short of human performance. Our methodology is applicable beyond NLI, offering fertile ground for future research on error versus plausible variation, which in turn can yield better and more trustworthy NLP systems.</abstract>
       <url hash="45ef1937">2024.acl-long.123</url>
@@ -1771,7 +1771,7 @@
       <title>Exploring the Potential of Large Language Models in Computational Argumentation</title>
       <author><first>Guizhen</first><last>Chen</last></author>
       <author><first>Liying</first><last>Cheng</last></author>
-      <author><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Lidong</first><last>Bing</last><affiliation>Alibaba Group</affiliation></author>
       <pages>2309-2330</pages>
       <abstract>Computational argumentation has become an essential tool in various domains, including law, public policy, and artificial intelligence. It is an emerging research field in natural language processing that attracts increasing attention. Research on computational argumentation mainly involves two types of tasks: argument mining and argument generation. As large language models (LLMs) have demonstrated impressive capabilities in understanding context and generating natural language, it is worthwhile to evaluate the performance of LLMs on diverse computational argumentation tasks. This work aims to embark on an assessment of LLMs, such as ChatGPT, Flan models, and LLaMA2 models, in both zero-shot and few-shot settings. We organize existing tasks into six main categories and standardize the format of fourteen openly available datasets. In addition, we present a new benchmark dataset on counter speech generation that aims to holistically evaluate the end-to-end performance of LLMs on argument mining and argument generation. Extensive experiments show that LLMs exhibit commendable performance across most of the datasets, demonstrating their capabilities in the field of argumentation. Our analysis offers valuable suggestions for evaluating computational argumentation and its integration with LLMs in future research endeavors.</abstract>
@@ -1815,7 +1815,7 @@
     <paper id="129">
       <title><fixed-case>MEFT</fixed-case>: Memory-Efficient Fine-Tuning through Sparse Adapter</title>
       <author><first>Jitai</first><last>Hao</last></author>
-      <author><first>Weiwei</first><last>Sun</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Xin</first><last>Xin</last></author>
       <author><first>Qi</first><last>Meng</last><affiliation>Microsoft and Academy of Mathematics and Systems Science, Chinese Academy of Sciences, Chinese Academy of Sciences</affiliation></author>
       <author><first>Zhumin</first><last>Chen</last><affiliation>Shandong University</affiliation></author>
@@ -1831,7 +1831,7 @@
       <title>Surgical Feature-Space Decomposition of <fixed-case>LLM</fixed-case>s: Why, When and How?</title>
       <author><first>Arnav</first><last>Chavan</last></author>
       <author><first>Nahush</first><last>Lele</last></author>
-      <author><first>Deepak</first><last>Gupta</last><affiliation>University of Tromsø and Transmute AI Research</affiliation></author>
+      <author id="deepak-gupta"><first>Deepak</first><last>Gupta</last><affiliation>University of Tromsø and Transmute AI Research</affiliation></author>
       <pages>2389-2400</pages>
       <abstract>Low-rank approximations, of the weight and feature space can enhance the performance of deep learning models, whether in terms of improving generalization or reducing the latency of inference. However, there is no clear consensus yet on how, when and why these approximations are helpful for large language models (LLMs). In this work, we empirically study the efficacy of weight and feature space decomposition in transformer-based LLMs. We demonstrate that surgical decomposition not only provides critical insights into the trade-off between compression and language modelling performance, but also sometimes enhances commonsense reasoning performance of LLMs. Our empirical analysis identifies specific network segments that intrinsically exhibit a low-rank structure. Furthermore, we extend our investigation to the implications of low-rank approximations on model bias. Overall, our findings offer a novel perspective on optimizing LLMs, presenting the low-rank approximation not only as a tool for performance enhancements, but also as a means to potentially rectify biases within these models.</abstract>
       <url hash="314014b4">2024.acl-long.130</url>
@@ -1847,7 +1847,7 @@
       <author><first>Xiaonan</first><last>Li</last><affiliation>Fudan University</affiliation></author>
       <author><first>Junqi</first><last>Dai</last></author>
       <author><first>Qinyuan</first><last>Cheng</last></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Xipeng</first><last>Qiu</last><affiliation>Fudan University</affiliation></author>
       <pages>2401-2416</pages>
       <abstract>Machine reasoning, which involves solving complex problems through step-by-step deduction and analysis, is a crucial indicator of the capabilities of Large Language Models (LLMs). However, as the complexity of tasks escalates, LLMs often encounter increasing errors in their multi-step reasoning process. This study delves into the underlying factors contributing to these reasoning errors and seeks to leverage uncertainty to refine them. Specifically, we introduce Uncertainty-aware Adaptive Guidance (UAG), a novel approach for guiding LLM reasoning onto an accurate and reliable trajectory. UAG first identifies and evaluates uncertainty signals within each step of the reasoning chain. Upon detecting a significant increase in uncertainty, UAG intervenes by retracting to a previously reliable state and then introduces certified reasoning clues for refinement. By dynamically adjusting the reasoning process, UAG offers a plug-and-play solution for improving LLMs’ performance in complex reasoning. Extensive experiments across various reasoning tasks demonstrate that UAG not only enhances the reasoning abilities of LLMs but also consistently outperforms several strong baselines with minimal computational overhead. Further analysis reveals that UAG is notably effective in identifying and diminishing reasoning errors.</abstract>
@@ -1873,7 +1873,7 @@
       <title>Unlocking Data-free Low-bit Quantization with Matrix Decomposition for <fixed-case>KV</fixed-case> Cache Compression</title>
       <author><first>Peiyu</first><last>Liu</last></author>
       <author><first>Ze-Feng</first><last>Gao</last></author>
-      <author><first>Xin</first><last>Zhao</last><affiliation>Renmin University of China</affiliation></author>
+      <author id="wayne-xin-zhao"><first>Xin</first><last>Zhao</last><affiliation>Renmin University of China</affiliation></author>
       <author><first>Yipeng</first><last>Ma</last></author>
       <author><first>Tao</first><last>Wang</last></author>
       <author><first>Ji-Rong</first><last>Wen</last><affiliation>Renmin University of China</affiliation></author>
@@ -1913,7 +1913,7 @@
       <author><first>Yihong</first><last>Liu</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <author><first>Chunlan</first><last>Ma</last></author>
       <author><first>Haotian</first><last>Ye</last><affiliation>Center for Information and Language Processing</affiliation></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <pages>2476-2499</pages>
       <abstract>The world’s more than 7000 languages are written in at least 293 scripts. Due to various reasons, many closely related languages use different scripts, which poses a difficulty for multilingual pretrained language models (mPLMs) in learning crosslingual knowledge through lexical overlap. As a consequence, mPLMs are faced with a script barrier: representations from different scripts are located in different subspaces, which can result in crosslingual transfer involving languages of different scripts performing suboptimally. To address this problem, we propose TransliCo, a framework that optimizes the Transliteration Contrastive Modeling (TCM) objective to fine-tune an mPLM by contrasting sentences in its training data and their transliterations in a unified script (in our case Latin), which enhances uniformity in the representation space for different scripts. Using Glot500-m, an mPLM pretrained on over 500 languages, as our source model, we fine-tune it on a small portion (5%) of its training data, and refer to the resulting model as Furina. We show that Furina not only better aligns representations from distinct scripts but also outperforms the original Glot500-m on various zero-shot crosslingual transfer tasks. Additionally, we achieve consistent improvement in a case study on the Indic group where the languages exhibit areal features but use different scripts. We make our code and models publicly available.</abstract>
       <url hash="9a5af75a">2024.acl-long.136</url>
@@ -1976,7 +1976,7 @@
       <title>Time is Encoded in the Weights of Finetuned Language Models</title>
       <author><first>Kai</first><last>Nylund</last></author>
       <author><first>Suchin</first><last>Gururangan</last><affiliation>Facebook and University of Washington, Seattle</affiliation></author>
-      <author><first>Noah</first><last>Smith</last><affiliation>University of Washington and Allen Institute for Artificial Intelligence</affiliation></author>
+      <author id="noah-a-smith"><first>Noah</first><last>Smith</last><affiliation>University of Washington and Allen Institute for Artificial Intelligence</affiliation></author>
       <pages>2571-2587</pages>
       <abstract>We present time vectors, a simple tool to customize language models to new time periods. Time vectors are created by finetuning a language model on data from a single time (e.g., a year or month), and then subtracting the weights of the original pretrained model. This vector specifies a direction in weight space that, as our experiments show, improves performance on text from that time period. Time vectors specialized to adjacent time periods appear to be positioned closer together in a manifold. Using this structure, we interpolate between time vectors to induce new models that perform better on intervening and future time periods, without any additional training. We demonstrate the consistency of our findings across different tasks, domains, model sizes, and time scales. Our results suggest that time is encoded in the weight space of finetuned models.</abstract>
       <url hash="6a7dcc54">2024.acl-long.141</url>
@@ -2121,7 +2121,7 @@
       <author><first>Xian-Ling</first><last>Mao</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <author><first>Puhai</first><last>Yang</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <author><first>Fanshu</first><last>Sun</last></author>
-      <author><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <pages>2748-2763</pages>
       <abstract>Task-oriented dialogue (TOD) systems are predominantly designed to be composed of several functional modules (e.g. dialogue state tracker, dialogue policy, natural language generation) whether they are pipeline or end-to-end architectures. However, this modular design not only heavily relies on massive fully-annotated data, but also suffers from many intrinsic drawbacks, such as serious error accumulation, poor generalization ability, high customization cost, and low fault tolerance rate. In this paper, we rethink the architecture of the task-oriented dialogue systems and propose a novel fully zero-shot autonomous TOD agent, named AutoTOD, where all the delicate modules in traditional TOD systems are deprecated and all it needs is a general-purpose instruction-following language model (e.g. GPT-4). AutoTOD only leverages a simple instruction schema consisting of the description of tasks and external APIs, and can autonomously decide what to do at each dialogue turn, including asking for information, calling APIs, summarizing API results, and correcting previous mistakes. Moreover, we propose a simulation-based evaluation framework to better validate the abilities of TOD models in real-life scenarios. Extensive experiments conducted on the MultiWOZ and SGD datasets show the superior task completion ability and flexible language skills of AutoTOD.</abstract>
       <url hash="8a2138fd">2024.acl-long.152</url>
@@ -2132,8 +2132,8 @@
       <title>On Context Utilization in Summarization with Large Language Models</title>
       <author><first>Mathieu</first><last>Ravaut</last><affiliation>Abu Dhabi Investment Authority</affiliation></author>
       <author><first>Aixin</first><last>Sun</last><affiliation>Nanyang Technological University</affiliation></author>
-      <author><first>Nancy</first><last>Chen</last></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>SalesForce.com and Nanyang Technological University</affiliation></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>SalesForce.com and Nanyang Technological University</affiliation></author>
       <pages>2764-2781</pages>
       <abstract>Large language models (LLMs) excel in abstractive summarization tasks, delivering fluent and pertinent summaries. Recent advancements have extended their capabilities to handle long-input contexts, exceeding 100k tokens. However, in question answering, language models exhibit uneven utilization of their input context. They tend to favor the initial and final segments, resulting in a U-shaped performance pattern concerning where the answer is located within the input. This bias raises concerns, particularly in summarization where crucial content may be dispersed throughout the source document(s). Besides, in summarization, mapping facts from the source to the summary is not trivial as salient content is usually re-phrased. In this paper, we conduct the first comprehensive study on context utilization and position bias in summarization. Our analysis encompasses 6 LLMs, 10 datasets, and 5 evaluation metrics. We introduce a new evaluation benchmark called MiddleSum on the which we benchmark two alternative inference methods to alleviate position bias: hierarchical summarization and incremental summarization. Our code and data can be found here: https://github.com/ntunlp/MiddleSum.</abstract>
       <url hash="a2187e79">2024.acl-long.153</url>
@@ -2193,7 +2193,7 @@
       <author><first>Ashima</first><last>Suvarna</last></author>
       <author><first>Mingyu</first><last>Ma</last><affiliation>University of California, Los Angeles</affiliation></author>
       <author><first>Hritik</first><last>Bansal</last><affiliation>University of California, Los Angeles</affiliation></author>
-      <author><first>Baobao</first><last>Chang</last><affiliation>Peking University</affiliation></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last><affiliation>Peking University</affiliation></author>
       <author><first>P. Jeffrey</first><last>Brantingham</last><affiliation>University of California, Los Angeles</affiliation></author>
       <author><first>Wei</first><last>Wang</last><affiliation>University of California, Los Angeles</affiliation></author>
       <author><first>Nanyun</first><last>Peng</last><affiliation>University of California, Los Angeles</affiliation></author>
@@ -2327,7 +2327,7 @@
       <author><first>Zeyu</first><last>Gao</last></author>
       <author><first>Hao</first><last>Wang</last><affiliation>Tsinghua University, Tsinghua University</affiliation></author>
       <author><first>Yuanda</first><last>Wang</last></author>
-      <author><first>Chao</first><last>Zhang</last><affiliation>Tsinghua University, Tsinghua University</affiliation></author>
+      <author id="chao-zhang-tu"><first>Chao</first><last>Zhang</last><affiliation>Tsinghua University, Tsinghua University</affiliation></author>
       <pages>3040-3051</pages>
       <abstract>Assembly code search is vital for reducing the burden on reverse engineers, allowing them to quickly identify specific functions using natural language within vast binary programs.Despite its significance, this critical task is impeded by the complexities involved in building high-quality datasets. This paper explores training a Large Language Model (LLM) to emulate a general compiler. By leveraging Ubuntu packages to compile a dataset of 20 billion tokens, we further continue pre-train CodeLlama as a Virtual Compiler (ViC), capable of compiling any source code to assembly code. This approach allows for “virtual” compilation across a wide range of programming languages without the need for a real compiler, preserving semantic equivalency and expanding the possibilities for assembly code dataset construction. Furthermore, we use ViC to construct a sufficiently large dataset for assembly code search. Employing this extensive dataset, we achieve a substantial improvement in assembly code search performance, with our model surpassing the leading baseline by 26%.</abstract>
       <url hash="e1a476d3">2024.acl-long.167</url>
@@ -2341,7 +2341,7 @@
       <author><first>Shiguang</first><last>Wu</last></author>
       <author><first>Mengqi</first><last>Zhang</last><affiliation>Shandong University</affiliation></author>
       <author><first>Zhaochun</first><last>Ren</last><affiliation>Leiden University</affiliation></author>
-      <author><first>Maarten</first><last>de Rijke</last><affiliation>University of Amsterdam</affiliation></author>
+      <author id="maarten-de-rijke"><first>Maarten</first><last>de Rijke</last><affiliation>University of Amsterdam</affiliation></author>
       <author><first>Zhumin</first><last>Chen</last><affiliation>Shandong University</affiliation></author>
       <author><first>Jiahuan</first><last>Pei</last><affiliation>Centrum voor Wiskunde en Informatica</affiliation></author>
       <pages>3052-3064</pages>
@@ -2435,7 +2435,7 @@
       <title><fixed-case>U</fixed-case>ni<fixed-case>B</fixed-case>ridge: A Unified Approach to Cross-Lingual Transfer Learning for Low-Resource Languages</title>
       <author><first>Trinh</first><last>Pham</last><affiliation>Ho Chi Minh City University of Technology</affiliation></author>
       <author><first>Khoi</first><last>Le</last></author>
-      <author><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
       <pages>3168-3184</pages>
       <abstract>In this paper, we introduce UniBridge (Cross-Lingual Transfer Learning with Optimized Embeddings and Vocabulary), a comprehensive approach developed to improve the effectiveness of Cross-Lingual Transfer Learning, particularly in languages with limited resources. Our approach tackles two essential elements of a language model: the initialization of embeddings and the optimal vocabulary size. Specifically, we propose a novel embedding initialization method that leverages both lexical and semantic alignment for a language. In addition, we present a method for systematically searching for the optimal vocabulary size, ensuring a balance between model complexity and linguistic coverage. Our experiments across multilingual datasets show that our approach greatly improves the F1-Score in several languages. UniBridge is a robust and adaptable solution for cross-lingual systems in various languages, highlighting the significance of initializing embeddings and choosing the right vocabulary size in cross-lingual environments.</abstract>
       <url hash="4dba9cd8">2024.acl-long.174</url>
@@ -2680,7 +2680,7 @@
       <title>Democratizing <fixed-case>LLM</fixed-case>s for Low-Resource Languages by Leveraging their <fixed-case>E</fixed-case>nglish Dominant Abilities with Linguistically-Diverse Prompts</title>
       <author><first>Xuan-Phi</first><last>Nguyen</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Mahani</first><last>Aljunied</last><affiliation>Alibaba Group</affiliation></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>SalesForce.com and Nanyang Technological University</affiliation></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>SalesForce.com and Nanyang Technological University</affiliation></author>
       <author><first>Lidong</first><last>Bing</last><affiliation>Alibaba Group</affiliation></author>
       <pages>3501-3516</pages>
       <abstract>Large language models (LLMs) are known to effectively perform tasks by simply observing few exemplars. However, in low-resource languages, obtaining such hand-picked exemplars can still be challenging, where unsupervised techniques may be necessary. Moreover, competent generative capabilities of LLMs are observed only in high-resource languages, while their performances among under-represented languages fall behind due to pre-training data imbalance. To elicit LLMs’ ability onto low-resource languages without any supervised data, we propose to assemble synthetic exemplars from a diverse set of high-resource languages to prompt the LLMs to translate from any language into English. These prompts are then used to create intra-lingual exemplars to perform tasks in the target languages. Our unsupervised prompting method performs on par with supervised few-shot learning in LLMs of different sizes for translations between English and 13 Indic and 21 African low-resource languages. We also show that fine-tuning a 7B model on data generated from our method helps it perform competitively with a 175B model. In non-English translation tasks, our method even outperforms supervised prompting by up to 3 chrF++ in many low-resource languages. When evaluated on zero-shot multilingual summarization, our method surpasses other English-pivoting baselines by up to 4 ROUGE-L and is also favored by GPT-4.</abstract>
@@ -2706,7 +2706,7 @@
       <author><first>Zheng</first><last>Liu</last></author>
       <author><first>Shitao</first><last>Xiao</last></author>
       <author><first>Zhicheng</first><last>Dou</last><affiliation>Renmin University of China</affiliation></author>
-      <author><first>Jian-Yun</first><last>Nie</last><affiliation>University of Montreal</affiliation></author>
+      <author id="jian-yun-nie"><first>Jian-Yun</first><last>Nie</last><affiliation>University of Montreal</affiliation></author>
       <pages>3537-3553</pages>
       <abstract>LLMs confront inherent limitations in terms of its knowledge, memory, and action. The retrieval augmentation stands as a vital mechanism to address these limitations, which brings in useful information from external sources to augment the LLM. However, existing retrieval methods encounter two pressing issues. On one hand, the general retrievers are not properly optimized for retrieval augmentation hence exhibit limited effectiveness; on the other hand, the task-specific retrievers excel in the targeted retrieval augmentation scenario, while lack the versatility to handle diverse scenarios. In this work, we propose LLM-Embedder for the unified support of diverse retrieval augmentation scenarios. Our method presents three technical contributions. Firstly, we introduce a new reward formulation, namely rank-aware reward. It exploits the ranking position of the desired output among N sampled outputs from the LLM, which leads to fine-grained and robust computation of reward from the LLM’s feedback. Secondly, we design a novel distillation objective, called graded distillation. It incorporates both the absolute value and the relative order of the reward for more sufficient utilization of the LLM’s feedback. Thirdly, we systematically optimize the multi-task learning, which effectively unifies the multiple retrieval functionalities into one model. In our experiment, LLM-Embedder substantially improves the LLM’s performances in various downstream tasks, while introducing superior retrieval augmentation’s effect over both general and task-specifc retrievers. Our data, code, and model have been released at https://github.com/FlagOpen/FlagEmbedding.</abstract>
       <url hash="72fd1e4b">2024.acl-long.194</url>
@@ -2767,7 +2767,7 @@
     <paper id="199">
       <title>Unity in Diversity: Collaborative Pre-training Across Multimodal Medical Sources</title>
       <author><first>Xiaochen</first><last>Wang</last></author>
-      <author id="junyu-luo"><first>Junyu</first><last>Luo</last><affiliation>ByteDance Inc.</affiliation></author>
+      <author><first>Junyu</first><last>Luo</last><affiliation>ByteDance Inc.</affiliation></author>
       <author><first>Jiaqi</first><last>Wang</last></author>
       <author><first>Yuan</first><last>Zhong</last><affiliation>Pennsylvania State University</affiliation></author>
       <author><first>Xiaokun</first><last>Zhang</last></author>
@@ -2786,7 +2786,7 @@
       <author><first>Sara</first><last>Papi</last></author>
       <author><first>Marco</first><last>Gaido</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
       <author><first>Andrea</first><last>Pilzer</last><affiliation>NVIDIA</affiliation></author>
-      <author><first>Matteo</first><last>Negri</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
       <pages>3657-3672</pages>
       <abstract>Despite its crucial role in research experiments, code correctness is often presumed solely based on the perceived quality of results. This assumption, however, comes with the risk of erroneous outcomes and, in turn, potentially misleading findings. To mitigate this risk, we posit that the current focus on reproducibility should go hand in hand with the emphasis on software quality. We support our arguments with a case study in which we identify and fix three bugs in widely used implementations of the state-of-the-art Conformer architecture. Through experiments on speech recognition and translation in various languages, we demonstrate that the presence of bugs does not prevent the achievement of good and reproducible results, which however can lead to incorrect conclusions that potentially misguide future research. As countermeasures, we release pangoliNN, a library dedicated to testing neural models, and propose a Code-quality Checklist, with the goal of promoting coding best practices and improving software quality within the NLP community.</abstract>
       <url hash="f0f9d975">2024.acl-long.200</url>
@@ -2797,7 +2797,7 @@
       <title><fixed-case>SBAAM</fixed-case>! Eliminating Transcript Dependency in Automatic Subtitling</title>
       <author><first>Marco</first><last>Gaido</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
       <author><first>Sara</first><last>Papi</last></author>
-      <author><first>Matteo</first><last>Negri</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
       <author><first>Mauro</first><last>Cettolo</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
       <author><first>Luisa</first><last>Bentivogli</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
       <pages>3673-3691</pages>
@@ -2810,7 +2810,7 @@
       <title><fixed-case>S</fixed-case>tream<fixed-case>A</fixed-case>tt: Direct Streaming Speech-to-Text Translation with Attention-based Audio History Selection</title>
       <author><first>Sara</first><last>Papi</last></author>
       <author><first>Marco</first><last>Gaido</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
-      <author><first>Matteo</first><last>Negri</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
       <author><first>Luisa</first><last>Bentivogli</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
       <pages>3692-3707</pages>
       <abstract>Streaming speech-to-text translation (StreamST) is the task of automatically translating speech while incrementally receiving an audio stream. Unlike simultaneous ST (SimulST), which deals with pre-segmented speech, StreamST faces the challenges of handling continuous and unbounded audio streams. This requires additional decisions about what to retain of the previous history, which is impractical to keep entirely due to latency and computational constraints. Despite the real-world demand for real-time ST, research on streaming translation remains limited, with existing works solely focusing on SimulST. To fill this gap, we introduce StreamAtt, the first StreamST policy, and propose StreamLAAL, the first StreamST latency metric designed to be comparable with existing metrics for SimulST. Extensive experiments across all 8 languages of MuST-C v1.0 show the effectiveness of StreamAtt compared to a naive streaming baseline and the related state-of-the-art SimulST policy, providing a first step in StreamST research.</abstract>
@@ -2823,7 +2823,7 @@
       <author><first>LingXi</first><last>Zhang</last></author>
       <author><first>Yue</first><last>Yu</last><affiliation>Georgia Institute of Technology</affiliation></author>
       <author><first>Kuan</first><last>Wang</last><affiliation>Georgia Institute of Technology</affiliation></author>
-      <author><first>Chao</first><last>Zhang</last><affiliation>Georgia Institute of Technology</affiliation></author>
+      <author id="chao-zhang-tu"><first>Chao</first><last>Zhang</last><affiliation>Georgia Institute of Technology</affiliation></author>
       <pages>3708-3719</pages>
       <abstract>Retrieval-augmented generation enhances large language models (LLMs) by incorporating relevant information from external knowledge sources. This enables LLMs to adapt to specific domains and mitigate hallucinations in knowledge-intensive tasks. However, existing retrievers are often misaligned with LLMs due to separate training processes and the inherent black-box nature of LLMs. To address this challenge, we propose ARL2, a retriever learning technique that harnesses LLMs as labelers. ARL2 leverages LLMs to annotate and score adaptive relevance evidence, enabling the retriever to learn from robust LLM supervision. Furthermore, ARL2 incorporates a self-training strategy to minimize the cost of API calls. Extensive experiments demonstrate the effectiveness of ARL2, achieving accuracy improvements of 5.4% on NQ and 4.6% on MMLU compared to the state-of-the-art methods. Additionally, ARL2 exhibits robust transfer learning capabilities and strong zero-shot generalization abilities.</abstract>
       <url hash="0a32c1ad">2024.acl-long.203</url>
@@ -3043,7 +3043,7 @@
       <author><first>Alham</first><last>Aji</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and Amazon</affiliation></author>
       <author><first>Nizar</first><last>Habash</last><affiliation>New York University Abu Dhabi</affiliation></author>
       <author><first>Iryna</first><last>Gurevych</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and Technical University of Darmstadt</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <pages>3964-3992</pages>
       <abstract>The advent of Large Language Models (LLMs) has brought an unprecedented surge in machine-generated text (MGT) across diverse channels. This raises legitimate concerns about its potential misuse and societal implications. The need to identify and differentiate such content from genuine human-generated text is critical in combating disinformation, preserving the integrity of education and scientific fields, and maintaining trust in communication. In this work, we address this problem by introducing a new benchmark based on a multilingual, multi-domain and multi-generator corpus of MGTs — M4GT-Bench. The benchmark is compiled of three tasks: (1) mono-lingual and multi-lingual binary MGT detection; (2) multi-way detection where one need to identify, which particular model generated the text; and (3) mixed human-machine text detection, where a word boundary delimiting MGT from human-written content should be determined. On the developed benchmark, we have tested several MGT detection baselines and also conducted an evaluation of human performance. We see that obtaining good performance in MGT detection usually requires an access to the training data from the same domain and generators. The benchmark is available at https://github.com/mbzuai-nlp/M4GT-Bench.</abstract>
       <url hash="e3004271">2024.acl-long.218</url>
@@ -3117,7 +3117,7 @@
       <author><first>Shuai</first><last>Zhang</last><affiliation>Amazon</affiliation></author>
       <author><first>Boran</first><last>Han</last></author>
       <author><first>Tong</first><last>He</last><affiliation>Amazon</affiliation></author>
-      <author id="bo-li"><first>Bo</first><last>Li</last><affiliation>University of Illinois, Urbana Champaign and University of California Berkeley</affiliation></author>
+      <author><first>Bo</first><last>Li</last><affiliation>University of Illinois, Urbana Champaign and University of California Berkeley</affiliation></author>
       <pages>4056-4071</pages>
       <abstract>In this work, we introduce Context-Aware MultiModal Learner (CaMML), for tuning large multimodal models (LMMs). CaMML, a lightweight module, is crafted to seamlessly integrate multimodal contextual samples into large models, thereby empowering the model to derive knowledge from analogous, domain-specific, up-to-date information and make grounded inferences. Importantly, CaMML is highly scalable and can efficiently handle lengthy multimodal context examples owing to its hierarchical design. Based on CaMML, we have developed two multimodal models, CaMML-7B and CaMML-13B, that have shown exceptional performance across an array of benchmark datasets for multimodal tasks. Remarkably, CaMML-13B achieves the state-of-the-art performance on over ten widely recognized multimodal benchmark datasets, surpassing LLaVA-1.5 (13B) with a noticeable margin, without integration of any external resources. Moreover, we have conducted extensive ablative studies to inspect the inner workings of CaMML and performed qualitative analyses to showcase its effectiveness in handling real-world challenging cases. Code and models are available at: https://github.com/amazon-science/camml.</abstract>
       <url hash="66df09a6">2024.acl-long.223</url>
@@ -3192,7 +3192,7 @@
       <title>In-context Mixing (<fixed-case>ICM</fixed-case>): Code-mixed Prompts for Multilingual <fixed-case>LLM</fixed-case>s</title>
       <author><first>Bhavani</first><last>Shankar</last></author>
       <author><first>Preethi</first><last>Jyothi</last><affiliation>Indian Institute of Technology Bombay</affiliation></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
       <pages>4162-4176</pages>
       <abstract>We introduce a simple and effective prompting technique called in-context mixing (ICM) for effective in-context learning (ICL) with multilingual large language models (MLLMs). With ICM, we modify the few-shot examples within ICL prompts to be intra-sententially code-mixed by randomly swapping content words in the target languages with their English translations. We observe that ICM prompts yield superior performance in NLP tasks such as disfluency correction, grammar error correction and text simplification that demand a close correspondence between the input and output sequences. Significant improvements are observed mainly for low-resource languages that are under-represented during the pretraining and finetuning of MLLMs. We present an extensive set of experiments to analyze when ICM is effective and what design choices contribute towards its effectiveness. ICM works consistently and significantly better than other prompting techniques across models of varying capacity such as mT0-XXL, BloomZ and GPT-4.</abstract>
       <url hash="30c6d498">2024.acl-long.228</url>
@@ -3218,7 +3218,7 @@
       <author><first>Yuche</first><last>Tsai</last><affiliation>National Taiwan University</affiliation></author>
       <author><first>Hsiang</first><last>Hsiao</last><affiliation>National Taiwan University</affiliation></author>
       <author><first>Hong-Yi</first><last>Lin</last><affiliation>National Taiwan University</affiliation></author>
-      <author><first>Shou-De</first><last>Lin</last><affiliation>National Taiwan University and National Taiwan University</affiliation></author>
+      <author id="shou-de-lin"><first>Shou-De</first><last>Lin</last><affiliation>National Taiwan University and National Taiwan University</affiliation></author>
       <pages>4193-4205</pages>
       <abstract>This study investigates the privacy risks associated with text embeddings, focusing on the scenario where attackers cannot access the original embedding model. Contrary to previous research requiring direct model access, we explore a more realistic threat model by developing a transfer attack method. This approach uses a surrogate model to mimic the victim model’s behavior, allowing the attacker to infer sensitive information from text embeddings without direct access. Our experiments across various embedding models and a clinical dataset demonstrate that our transfer attack significantly outperforms traditional methods, revealing the potential privacy vulnerabilities in embedding technologies and emphasizing the need for enhanced security measures.</abstract>
       <url hash="3a8059b2">2024.acl-long.230</url>
@@ -3280,7 +3280,7 @@
       <author><first>Christin</first><last>Seifert</last><affiliation>Phillips-Universität Marburg and University of Twente</affiliation></author>
       <author><first>Kyle</first><last>Lo</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Wei</first><last>Xu</last><affiliation>Georgia Institute of Technology</affiliation></author>
-      <author><first>Byron</first><last>Wallace</last><affiliation>Northeastern University, Brown University and Northeastern University</affiliation></author>
+      <author id="byron-c-wallace"><first>Byron</first><last>Wallace</last><affiliation>Northeastern University, Brown University and Northeastern University</affiliation></author>
       <author><first>Junyi Jessy</first><last>Li</last><affiliation>University of Texas, Austin</affiliation></author>
       <pages>4263-4294</pages>
       <abstract>Text simplification aims to make technical texts more accessible to laypeople but often results in deletion of information and vagueness. This work proposes InfoLossQA, a framework to characterize and recover simplification-induced information loss in form of question-and-answer (QA) pairs. Building on the theory of Questions Under Discussion, the QA pairs are designed to help readers deepen their knowledge of a text. First, we collect a dataset of 1,000 linguist-curated QA pairs derived from 104 LLM simplifications of English medical study abstracts. Our analyses of this data reveal that information loss occurs frequently, and that the QA pairs give a high-level overview of what information was lost. Second, we devise two methods for this task: end-to-end prompting of open-source and commercial language models, and a natural language inference pipeline. With a novel evaluation framework considering the correctness of QA pairs and their linguistic suitability, our expert evaluation reveals that models struggle to reliably identify information loss and applying similar standards as humans at what constitutes information loss.</abstract>
@@ -3360,7 +3360,7 @@
       <title>Missci: Reconstructing Fallacies in Misrepresented Science</title>
       <author><first>Max</first><last>Glockner</last><affiliation>Technische Universität Darmstadt</affiliation></author>
       <author><first>Yufang</first><last>Hou</last><affiliation>Technische Universität Darmstadt and IBM Research Ireland</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <author><first>Iryna</first><last>Gurevych</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and Technical University of Darmstadt</affiliation></author>
       <pages>4372-4405</pages>
       <abstract>Health-related misinformation on social networks can lead to poor decision-making and real-world dangers. Such misinformation often misrepresents scientific publications and cites them as “proof” to gain perceived credibility. To effectively counter such claims automatically, a system must explain how the claim was falsely derived from the cited publication. Current methods for automated fact-checking or fallacy detection neglect to assess the (mis)used evidence in relation to misinformation claims, which is required to detect the mismatch between them. To address this gap, we introduce Missci, a novel argumentation theoretical model for fallacious reasoning together with a new dataset for real-world misinformation detection that misrepresents biomedical publications. Unlike previous fallacy detection datasets, Missci (i) focuses on implicit fallacies between the relevant content of the cited publication and the inaccurate claim, and (ii) requires models to verbalize the fallacious reasoning in addition to classifying it. We present Missci as a dataset to test the critical reasoning abilities of large language models (LLMs), that are required to reconstruct real-world fallacious arguments, in a zero-shot setting. We evaluate two representative LLMs and the impact of different levels of detail about the fallacy classes provided to the LLM via prompts. Our experiments and human evaluation show promising results for GPT 4, while also demonstrating the difficulty of this task.</abstract>
@@ -3395,7 +3395,7 @@
     <paper id="243">
       <title>Favi-Score: A Measure for Favoritism in Automated Preference Ratings for Generative <fixed-case>AI</fixed-case> Evaluation</title>
       <author><first>Pius</first><last>Von Däniken</last><affiliation>University of Zurich and ZHAW - Zürcher Hochschule für Angewandte Wissenschaften</affiliation></author>
-      <author><first>Jan</first><last>Deriu</last><affiliation>ZHAW - Zürcher Hochschule für Angewandte Wissenschaften</affiliation></author>
+      <author id="jan-milan-deriu"><first>Jan</first><last>Deriu</last><affiliation>ZHAW - Zürcher Hochschule für Angewandte Wissenschaften</affiliation></author>
       <author><first>Don</first><last>Tuggener</last><affiliation>ZHAW School of Engineering</affiliation></author>
       <author><first>Mark</first><last>Cieliebak</last><affiliation>ZHAW School of Engineering</affiliation></author>
       <pages>4437-4454</pages>
@@ -3479,8 +3479,8 @@
       <author><first>Laura</first><last>Aina</last><affiliation>Amazon</affiliation></author>
       <author><first>Paula</first><last>Czarnowska</last><affiliation>Amazon AWS</affiliation></author>
       <author><first>Momchil</first><last>Hardalov</last><affiliation>AWS AI Labs</affiliation></author>
-      <author><first>Thomas</first><last>Müller</last><affiliation>Amazon</affiliation></author>
-      <author><first>Lluis</first><last>Marquez</last></author>
+      <author id="thomas-mueller"><first>Thomas</first><last>Müller</last><affiliation>Amazon</affiliation></author>
+      <author id="lluis-marquez"><first>Lluis</first><last>Marquez</last></author>
       <pages>4554-4570</pages>
       <abstract>Large Language Models (LLMs) tend to be unreliable on fact-based answers.To address this problem, NLP researchers have proposed a range of techniques to estimate LLM’s confidence over facts. However, due to the lack of a systematic comparison, it is not clear how the different methods compare to one other.To fill this gap, we present a rigorous survey and empirical comparison of estimators of factual confidence.We define an experimental framework allowing for fair comparison, covering both fact-verification and QA. Our experiments across a series of LLMs indicate that trained hidden-state probes provide the most reliable confidence estimates; albeit at the expense of requiring access to weights and supervision data. We also conduct a deeper assessment of the methods, in which we measure the consistency of model behavior under meaning-preserving variations in the input. We find that the factual confidence of LLMs is often unstable across semantically equivalent inputs, suggesting there is much room for improvement for the stability of models’ parametric knowledge.</abstract>
       <url hash="ce5d62e5">2024.acl-long.250</url>
@@ -3504,7 +3504,7 @@
       <author><first>Rui</first><last>Zheng</last></author>
       <author><first>Qi</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Tao</first><last>Gui</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>4571-4585</pages>
       <abstract>The advancement of large language models (LLMs) has significantly propelled the field of code generation. Previous work integrated reinforcement learning (RL) with compiler feedback for exploring the output space of LLMs to enhance code generation quality. However, the lengthy code generated by LLMs in response to complex human requirements makes RL exploration a challenge. Also, since the unit tests may not cover the complicated code, optimizing LLMs by using these unexecuted code snippets is ineffective. To tackle these challenges, we introduce <b>StepCoder</b>, a novel RL framework for code generation, consisting of two main components: CCCS addresses the exploration challenge by breaking the long sequences code generation task into a Curriculum of Code Completion Subtasks, while FGO only optimizes the model by masking the unexecuted code segments to provide Fine-Grained Optimization. In addition, we furthermore construct the APPS+ dataset for RL training, which is manually verified to ensure the correctness of unit tests. Experimental results show that our method improves the ability to explore the output space and outperforms state-of-the-art approaches in corresponding benchmarks. The code and dataset will be made available upon publication.</abstract>
       <url hash="48da7208">2024.acl-long.251</url>
@@ -3540,7 +3540,7 @@
       <author><first>Xianjun</first><last>Yang</last></author>
       <author><first>Tao</first><last>Gui</last><affiliation>Fudan University</affiliation></author>
       <author><first>Qi</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Xun</first><last>Zhao</last><affiliation>Shanghai AI Laboratory</affiliation></author>
       <author><first>Dahua</first><last>Lin</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <pages>4602-4614</pages>
@@ -3557,7 +3557,7 @@
       <author><first>Jonathan</first><last>Herzig</last><affiliation>Research, Google</affiliation></author>
       <author><first>Or</first><last>Honovich</last></author>
       <author><first>Michael</first><last>Tseng</last><affiliation>Google DeepMind</affiliation></author>
-      <author><first>Michael</first><last>Collins</last><affiliation>Google and Columbia University</affiliation></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last><affiliation>Google and Columbia University</affiliation></author>
       <author><first>Roee</first><last>Aharoni</last><affiliation>Google</affiliation></author>
       <author><first>Mor</first><last>Geva</last><affiliation>Tel Aviv University and Google Research</affiliation></author>
       <pages>4615-4634</pages>
@@ -3763,7 +3763,7 @@
       <title><fixed-case>M</fixed-case>ap<fixed-case>C</fixed-case>oder: Multi-Agent Code Generation for Competitive Problem Solving</title>
       <author><first>Md. Ashraful</first><last>Islam</last><affiliation>Bangladesh University of Engineering and Technology and Bangladesh University of Engineering and Technology</affiliation></author>
       <author><first>Mohammed Eunus</first><last>Ali</last><affiliation>Bangladesh University of Engineering and Technology</affiliation></author>
-      <author><first>Md Rizwan</first><last>Parvez</last><affiliation>Qatar Computing Research Institute and Bosch</affiliation></author>
+      <author id="md-rizwan-parvez"><first>Md Rizwan</first><last>Parvez</last><affiliation>Qatar Computing Research Institute and Bosch</affiliation></author>
       <pages>4912-4944</pages>
       <abstract>Code synthesis, which requires a deep understanding of complex natural language (NL) problem descriptions, generation of code instructions for complex algorithms and data structures, and the successful execution of comprehensive unit tests, presents a significant challenge. Thus, while large language models (LLMs) demonstrate impressive proficiency in natural language processing (NLP), their performance in code generation tasks remains limited. In this paper, we introduce a new approach to code generation tasks leveraging the multi-agent prompting that uniquely replicates the full cycle of program synthesis as observed in human developers. Our framework, MapCoder, consists of four LLM agents specifically designed to emulate the stages of this cycle: recalling relevant examples, planning, code generation, and debugging. After conducting thorough experiments, with multiple LLMs ablations and analyses across eight challenging competitive problem-solving and program synthesis benchmarks—MapCoder showcases remarkable code generation capabilities, achieving their new state-of-the-art (pass@1) results—(HumanEval 93.9%, MBPP 83.1%, APPS 22.0%, CodeContests 28.5%, and xCodeEval 45.3%). Moreover, our method consistently delivers superior performance across various programming languages and varying problem difficulties. We open-source our framework at https://github.com/Md-Ashraful-Pramanik/MapCoder.</abstract>
       <url hash="cf50dcd7">2024.acl-long.269</url>
@@ -3882,7 +3882,7 @@
     <paper id="278">
       <title>Estimating Agreement by Chance for Sequence Annotation</title>
       <author><first>Diya</first><last>Li</last><affiliation>Freenome</affiliation></author>
-      <author><first>Carolyn</first><last>Rose</last><affiliation>School of Computer Science, Carnegie Mellon University</affiliation></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rose</last><affiliation>School of Computer Science, Carnegie Mellon University</affiliation></author>
       <author><first>Ao</first><last>Yuan</last><affiliation>Georgetown University</affiliation></author>
       <author><first>Chunxiao</first><last>Zhou</last><affiliation>National Institutes of Health</affiliation></author>
       <pages>5085-5097</pages>
@@ -4122,7 +4122,7 @@
       <author><first>Giuseppe</first><last>Castellucci</last><affiliation>Amazon</affiliation></author>
       <author><first>Nikhita</first><last>Vedula</last><affiliation>Amazon</affiliation></author>
       <author><first>Jason Ingyu</first><last>Choi</last><affiliation>Amazon</affiliation></author>
-      <author><first>Shervin</first><last>Malmasi</last><affiliation>Amazon</affiliation></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last><affiliation>Amazon</affiliation></author>
       <pages>5396-5420</pages>
       <abstract>Recommender systems are widely used to suggest engaging content, and Large Language Models (LLMs) have given rise to generative recommenders. Such systems can directly generate items, including for open-set tasks like question suggestion. While the world knowledge of LLMs enables good recommendations, improving the generated content through user feedback is challenging as continuously fine-tuning LLMs is prohibitively expensive. We present a training-free approach for optimizing generative recommenders by connecting user feedback loops to LLM-based optimizers. We propose a generative explore-exploit method that can not only exploit generated items with known high engagement, but also actively explore and discover hidden population preferences to improve recommendation quality. We evaluate our approach on question generation in two domains (e-commerce and general knowledge), and model user feedback with Click Through Rate (CTR). Experiments show our LLM-based explore-exploit approach can iteratively improve recommendations and consistently increase CTR. Ablation analysis shows that generative exploration is key to learning user preferences, avoiding the pitfalls of greedy exploit-only approaches. A human evaluation strongly supports our quantitative findings.</abstract>
       <url hash="44cf7f36">2024.acl-long.295</url>
@@ -4138,7 +4138,7 @@
       <author><first>Chunting</first><last>Zhou</last><affiliation>Meta AI</affiliation></author>
       <author><first>Graham</first><last>Neubig</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Xi</first><last>Lin</last><affiliation>Facebook and Department of Computer Science, University of Washington</affiliation></author>
-      <author><first>Wen-tau</first><last>Yih</last><affiliation>Meta Platforms, Inc.</affiliation></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last><affiliation>Meta Platforms, Inc.</affiliation></author>
       <author><first>Srini</first><last>Iyer</last><affiliation>University of Washington, Seattle and Department of Computer Science, University of Washington</affiliation></author>
       <pages>5421-5434</pages>
       <abstract>In order for large language model (LLM)-based assistants to effectively adapt to evolving information needs, it must be possible to update their factual knowledge through continued training on new data. The standard recipe for doing so involves continued pre-training on new documents followed by instruction-tuning on question-answer (QA) pairs. However, we find that LLMs trained with this recipe struggle to answer questions, even though the perplexity of documents is minimized. We found that QA pairs are generally straightforward, while documents are more complex, weaving many factual statements together in an intricate manner. Therefore, we hypothesize that it is beneficial to expose LLMs to QA pairs before continued pre-training on documents so that the process of encoding knowledge from complex documents takes into account how this knowledge is accessed through questions. Based on this, we propose pre-instruction-tuning (PIT), a method that instruction-tunes on questions prior to training on documents. This contrasts with standard instruction-tuning, which learns how to extract knowledge after training on documents. Extensive experiments and ablation studies demonstrate that pre-instruction-tuning significantly enhances the ability of LLMs to absorb knowledge from new documents, outperforming standard instruction-tuning by 17.8%.</abstract>
@@ -4204,7 +4204,7 @@
       <author><first>Yunkun</first><last>Wang</last></author>
       <author><first>Yunzhe</first><last>Li</last></author>
       <author><first>Qian</first><last>Chen</last><affiliation>Alibaba Group</affiliation></author>
-      <author><first>Wen</first><last>Wang</last><affiliation>Alibaba Group</affiliation></author>
+      <author id="wen-wang"><first>Wen</first><last>Wang</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Tingyu</first><last>Lin</last><affiliation>Technische Universität Wien</affiliation></author>
       <author><first>Weishan</first><last>Zhao</last></author>
       <author><first>Li</first><last>Zhu</last></author>
@@ -4233,7 +4233,7 @@
       <author><first>Fengqing</first><last>Jiang</last><affiliation>University of Washington</affiliation></author>
       <author><first>Luyao</first><last>Niu</last></author>
       <author><first>Jinyuan</first><last>Jia</last><affiliation>Pennsylvania State University</affiliation></author>
-      <author><first>Bill Yuchen</first><last>Lin</last></author>
+      <author id="bill-yuchen-lin"><first>Bill Yuchen</first><last>Lin</last></author>
       <author><first>Radha</first><last>Poovendran</last><affiliation>University of Washington, Seattle</affiliation></author>
       <pages>5587-5605</pages>
       <abstract>As large language models (LLMs) become increasingly integrated into real-world applications such as code generation and chatbot assistance, extensive efforts have been made to align LLM behavior with human values, including safety. Jailbreak attacks, which aim to provoke unintended and unsafe behaviors from LLMs, remain a significant LLM safety threat. We analyze tokens, which are the smallest unit of text that can be processed by LLMs and make the following observations: (1) probabilities of tokens representing harmful responses are higher than those of harmless responses, and (2) responses containing safety disclaimers appear among the top tokens when token probabilities are sorted in descending order. In this paper, we leverage (1) and (2) to develop SafeDecoding, a safety-aware decoding strategy for LLMs, to defend against jailbreak attacks. We perform extensive experiments to evaluate SafeDecoding against six SOTA jailbreak attacks (GCG, AutoDAN, PAIR, DeepInception, SAP30, and template based attack) on five LLMs (Vicuna, Llama2, Guanaco, falcon, and Dolphin) using four benchmark datasets (AdvBench, HEx-PHI, MT-Bench, and Just-Eval). Our results show that SafeDecoding significantly reduces attack success rate and harmfulness of jailbreak attacks without compromising the helpfulness of responses to benign user queries while outperforming six defense methods (Perpelexity, Paraphrase, Retokenization, Self-Reminder, ICD, and Self-Examination).</abstract>
@@ -4293,7 +4293,7 @@
       <title>Speaker Verification in Agent-generated Conversations</title>
       <author><first>Yizhe</first><last>Yang</last></author>
       <author><first>Palakorn</first><last>Achananuparp</last><affiliation>Singapore Management University</affiliation></author>
-      <author><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <author><first>Jing</first><last>Jiang</last><affiliation>Singapore Management University</affiliation></author>
       <author><first>Ee-Peng</first><last>Lim</last><affiliation>Singapore Management University</affiliation></author>
       <pages>5655-5676</pages>
@@ -4323,7 +4323,7 @@
       <author><first>Haoyang</first><last>Huang</last><affiliation>Microsoft Research Asia</affiliation></author>
       <author><first>Dongdong</first><last>Zhang</last><affiliation>Microsoft Research Asia</affiliation></author>
       <author id="xiaolei-wang-fudan"><first>Xiaolei</first><last>Wang</last><affiliation>Renmin University of China</affiliation></author>
-      <author><first>Xin</first><last>Zhao</last><affiliation>Renmin University of China</affiliation></author>
+      <author id="wayne-xin-zhao"><first>Xin</first><last>Zhao</last><affiliation>Renmin University of China</affiliation></author>
       <author><first>Furu</first><last>Wei</last><affiliation>Microsoft Research</affiliation></author>
       <author><first>Ji-Rong</first><last>Wen</last><affiliation>Renmin University of China</affiliation></author>
       <pages>5701-5715</pages>
@@ -4523,7 +4523,7 @@
       <author><first>Enzhi</first><last>Zhang</last></author>
       <author><first>Xiang</first><last>Wang</last><affiliation>University of Science and Technology of China</affiliation></author>
       <author><first>Kenji</first><last>Kawaguchi</last><affiliation>National University of Singapore</affiliation></author>
-      <author><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
       <pages>5949-5966</pages>
       <abstract>Language Models (LMs) excel in understanding textual descriptions of proteins, as evident in biomedical question-answering tasks. However, their capability falters with raw protein data, such as amino acid sequences, due to a deficit in pretraining on such data. Conversely, Protein Language Models (PLMs) can understand and convert protein data into high-quality representations, but struggle to process texts. To address their limitations, we introduce ProtT3, a framework for Protein-to-Text Generation for Text-based Protein Understanding. ProtT3 empowers an LM to understand protein sequences of amino acids by incorporating a PLM as its protein understanding module, enabling effective protein-to-text generation. This collaboration between PLM and LM is facilitated by a cross-modal projector (i.e., Q-Former) that bridges the modality gap between the PLM’s representation space and the LM’s input space. Unlike previous studies focusing on protein property prediction and protein-text retrieval, we delve into the largely unexplored field of protein-to-text generation. To facilitate comprehensive benchmarks and promote future research, we establish quantitative evaluations for protein-text modeling tasks, including protein captioning, protein question-answering, and protein-text retrieval. Our experiments show that ProtT3 substantially surpasses current baselines, with ablation studies further highlighting the efficacy of its core components. Our code is available at https://github.com/acharkq/ProtT3.</abstract>
       <url hash="888b65a0">2024.acl-long.324</url>
@@ -4556,7 +4556,7 @@
       <author><first>Jinfeng</first><last>Zhou</last></author>
       <author><first>Alvionna</first><last>Sunaryo</last></author>
       <author><first>Tatia</first><last>Lee</last><affiliation>University of Hong Kong</affiliation></author>
-      <author><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
       <author><first>Minlie</first><last>Huang</last><affiliation>Tsinghua University, Tsinghua University</affiliation></author>
       <pages>5986-6004</pages>
       <abstract>Recent advances in Large Language Models (LLMs) have highlighted the need for robust, comprehensive, and challenging benchmarks. Yet, research on evaluating their Emotional Intelligence (EI) is considerably limited. Existing benchmarks have two major shortcomings: first, they mainly focus on emotion recognition, neglecting essential EI capabilities such as emotion management and thought facilitation through emotion understanding; second, they are primarily constructed from existing datasets, which include frequent patterns, explicit information, and annotation errors, leading to unreliable evaluation. We propose EmoBench, a benchmark that draws upon established psychological theories and proposes a comprehensive definition for machine EI, including Emotional Understanding and Emotional Application. EmoBench includes a set of 400 hand-crafted questions in English and Chinese, which are meticulously designed to require thorough reasoning and understanding. Our findings reveal a considerable gap between the EI of existing LLMs and the average human, highlighting a promising direction for future research. Our code and data are publicly available at https://github.com/Sahandfer/EmoBench.</abstract>
@@ -4580,7 +4580,7 @@
     </paper>
     <paper id="328">
       <title><fixed-case>F</fixed-case>in<fixed-case>T</fixed-case>ext<fixed-case>QA</fixed-case>: A Dataset for Long-form Financial Question Answering</title>
-      <author id="jian-chen"><first>Jian</first><last>Chen</last><affiliation>HSBC GPS LAB</affiliation></author>
+      <author><first>Jian</first><last>Chen</last><affiliation>HSBC GPS LAB</affiliation></author>
       <author><first>Peilin</first><last>Zhou</last></author>
       <author><first>Yining</first><last>Hua</last></author>
       <author><first>Loh</first><last>Xin</last></author>
@@ -4698,7 +4698,7 @@
       <author><first>Eva</first><last>Hasler</last><affiliation>Amazon</affiliation></author>
       <author id="bill-byrne"><first>Bill</first><last>Byrne</last><affiliation>Amazon and University of Cambridge</affiliation></author>
       <author><first>Christof</first><last>Monz</last><affiliation>University of Amsterdam, University of Amsterdam</affiliation></author>
-      <author><first>Ke</first><last>Tran</last><affiliation>Amazon</affiliation></author>
+      <author id="ke-m-tran"><first>Ke</first><last>Tran</last><affiliation>Amazon</affiliation></author>
       <pages>6189-6206</pages>
       <abstract>Fine-tuning large language models (LLMs) for machine translation has shown improvements in overall translation quality. However, it is unclear what is the impact of fine-tuning on desirable LLM behaviors that are not present in neural machine translation models, such as steerability, inherent document-level translation abilities, and the ability to produce less literal translations. We perform an extensive translation evaluation on the LLaMA and Falcon family of models with model size ranging from 7 billion up to 65 billion parameters.Our results show that while fine-tuning improves the general translation quality of LLMs, several abilities degrade. In particular, we observe a decline in the ability to perform formality steering, to produce technical translations through few-shot examples, and to perform document-level translation. On the other hand, we observe that the model produces less literal translations after fine-tuning on parallel data. We show that by including monolingual data as part of the fine-tuning data we can maintain the abilities while simultaneously enhancing overall translation quality. Our findings emphasize the need for fine-tuning strategies that preserve the benefits of LLMs for machine translation.</abstract>
       <url hash="d4ccfe62">2024.acl-long.336</url>
@@ -4712,7 +4712,7 @@
       <author><first>Wanli</first><last>Yang</last></author>
       <author><first>Yuanzhuo</first><last>Wang</last><affiliation>Chinese Academy of Sciences</affiliation></author>
       <author><first>Qi</first><last>Cao</last><affiliation>Institute of Computing Technology, Chinese Academy of Sciences, China</affiliation></author>
-      <author><first>Xueqi</first><last>Cheng</last><affiliation>, Chinese Academy of Sciences</affiliation></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last><affiliation>, Chinese Academy of Sciences</affiliation></author>
       <pages>6207-6227</pages>
       <abstract>While auxiliary information has become a key to enhancing Large Language Models (LLMs), relatively little is known about how LLMs merge these contexts, specifically contexts generated by LLMs and those retrieved from external sources.To investigate this, we formulate a systematic framework to identify whether LLMs’ responses are attributed to either generated or retrieved contexts.To easily trace the origin of the response, we construct datasets with conflicting contexts, i.e., each question is paired with both generated and retrieved contexts, yet only one of them contains the correct answer.Our experiments reveal a significant bias in several LLMs (GPT-4/3.5 and Llama2) to favor generated contexts, even when they provide incorrect information.We further identify two key factors contributing to this bias: i) contexts generated by LLMs typically show greater similarity to the questions, increasing their likelihood of being selected; ii) the segmentation process used in retrieved contexts disrupts their completeness, thereby hindering their full utilization in LLMs.Our analysis enhances the understanding of how LLMs merge diverse contexts, offers valuable insights for advancing current LLM augmentation methods, and highlights the risk of generated misinformation for retrieval-augmented LLMs.</abstract>
       <url hash="3cfc6db8">2024.acl-long.337</url>
@@ -4721,11 +4721,11 @@
     </paper>
     <paper id="338">
       <title>Unveiling Linguistic Regions in Large Language Models</title>
-      <author id="zhihao-zhang"><first>Zhihao</first><last>Zhang</last></author>
+      <author><first>Zhihao</first><last>Zhang</last></author>
       <author><first>Jun</first><last>Zhao</last></author>
       <author><first>Qi</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Tao</first><last>Gui</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>6228-6247</pages>
       <abstract>Large Language Models (LLMs) have demonstrated considerable cross-lingual alignment and generalization ability. Current research primarily focuses on improving LLMs’ cross-lingual generalization capabilities. However, there is still a lack of research on the intrinsic mechanisms of how LLMs achieve cross-lingual alignment. From the perspective of region partitioning, this paper conducts several investigations on the linguistic competence of LLMs. We discover a core region in LLMs that corresponds to linguistic competence, accounting for approximately 1% of the total model parameters. Removing this core region by setting parameters to zero results in a significant performance decrease across 30 different languages. Furthermore, this core region exhibits significant dimensional dependence, perturbations to even a single parameter on specific dimensions leading to a loss of linguistic competence. Moreover, we discover that distinct monolingual regions exist for different languages, and disruption to these specific regions substantially reduces the LLMs’ proficiency in those corresponding languages. Our research also indicates that freezing the core linguistic region during further pre-training can mitigate the issue of catastrophic forgetting (CF), a common phenomenon observed during further pre-training of LLMs. Overall, exploring the LLMs’ functional regions provides insights into the foundation of their intelligence.</abstract>
       <url hash="bf7913d1">2024.acl-long.338</url>
@@ -4764,7 +4764,7 @@
       <author><first>Yisong</first><last>Miao</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Hongfu</first><last>Liu</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Wenqiang</first><last>Lei</last><affiliation>Sichuan University</affiliation></author>
-      <author><first>Nancy</first><last>Chen</last></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last></author>
       <author><first>Min-Yen</first><last>Kan</last><affiliation>National University of Singapore</affiliation></author>
       <pages>6277-6295</pages>
       <abstract>While large language models have significantly enhanced the effectiveness of discourse relation classifications, it remains unclear whether their comprehension is faithful and reliable. We provide DiSQ, a new method for evaluating the faithfulness of understanding discourse based on question answering. We first employ in-context learning to annotate the reasoning for discourse comprehension, based on the connections among key events within the discourse. Following this, DiSQ interrogates the model with a sequence of questions to assess its grasp of core event relations, its resilience to counterfactual queries, as well as its consistency to its previous responses. then evaluate language models with different architectural designs using DiSQ, finding: (1) DiSQ presents a significant challenge for all models, with the top-performing GPT model attaining only 41% of the ideal performance in PDTB; (2) DiSQ is robust to domain shifts and paraphrase variations; (3) Open-source models generally lag behind their closed-source GPT counterparts, with notable exceptions being those enhanced with chat and code/math features; (4) Our analysis validates the effectiveness of explicitly signalled discourse connectives, the role of contextual information, and the benefits of using historical QA data.</abstract>
@@ -4851,8 +4851,8 @@
     <paper id="348">
       <title>Error-preserving Automatic Speech Recognition of Young <fixed-case>E</fixed-case>nglish Learners’ Language</title>
       <author><first>Janick</first><last>Michot</last></author>
-      <author><first>Manuela</first><last>Hürlimann</last><affiliation>ZHAW - Zürcher Hochschule für Angewandte Wissenschaften</affiliation></author>
-      <author><first>Jan</first><last>Deriu</last><affiliation>ZHAW - Zürcher Hochschule für Angewandte Wissenschaften</affiliation></author>
+      <author id="manuela-huerlimann"><first>Manuela</first><last>Hürlimann</last><affiliation>ZHAW - Zürcher Hochschule für Angewandte Wissenschaften</affiliation></author>
+      <author id="jan-milan-deriu"><first>Jan</first><last>Deriu</last><affiliation>ZHAW - Zürcher Hochschule für Angewandte Wissenschaften</affiliation></author>
       <author><first>Luzia</first><last>Sauer</last></author>
       <author><first>Katsiaryna</first><last>Mlynchyk</last><affiliation>ZHAW - Zürcher Hochschule für Angewandte Wissenschaften and ZHAW - Zürcher Hochschule für Angewandte Wissenschaften</affiliation></author>
       <author><first>Mark</first><last>Cieliebak</last><affiliation>ZHAW School of Engineering</affiliation></author>
@@ -4953,7 +4953,7 @@
     </paper>
     <paper id="356">
       <title>The Hidden Space of Transformer Language Adapters</title>
-      <author><first>Jesujoba</first><last>Alabi</last><affiliation>Universität des Saarlandes</affiliation></author>
+      <author id="jesujoba-alabi"><first>Jesujoba</first><last>Alabi</last><affiliation>Universität des Saarlandes</affiliation></author>
       <author><first>Marius</first><last>Mosbach</last><affiliation>McGill University and Mila - Quebec Artificial Intelligence Institute</affiliation></author>
       <author><first>Matan</first><last>Eyal</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Dietrich</first><last>Klakow</last><affiliation>Saarland University</affiliation></author>
@@ -5037,7 +5037,7 @@
       <title>Decoupled Vocabulary Learning Enables Zero-Shot Translation from Unseen Languages</title>
       <author><first>Carlos</first><last>Mullov</last><affiliation>Karlsruher Institut für Technologie</affiliation></author>
       <author><first>Quan</first><last>Pham</last></author>
-      <author><first>Alexander</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alexander</first><last>Waibel</last></author>
       <pages>6693-6709</pages>
       <abstract>Multilingual neural machine translation systems learn to map sentences of different languages into a common representation space. Intuitively, with a growing number of seen languages the encoder sentence representation grows more flexible and easily adaptable to new languages. In this work, we test this hypothesis by zero-shot translating from unseen languages. To deal with unknown vocabularies from unknown languages we propose a setup where we decouple learning of vocabulary and syntax, i.e. for each language we learn word representations in a separate step (using cross-lingual word embeddings), and then train to translate while keeping those word representations frozen. We demonstrate that this setup enables zero-shot translation from entirely unseen languages. Zero-shot translating with a model trained on Germanic and Romance languages we achieve scores of 42.6 BLEU for Portuguese-English and 20.7 BLEU for Russian-English on TED domain. We explore how this zero-shot translation capability develops with varying number of languages seen by the encoder. Lastly, we explore the effectiveness of our decoupled learning strategy for unsupervised machine translation. By exploiting our model’s zero-shot translation capability for iterative back-translation we attain near parity with a supervised setting.</abstract>
       <url hash="2e5b0070">2024.acl-long.362</url>
@@ -5063,7 +5063,7 @@
     <paper id="364">
       <title><fixed-case>P</fixed-case>ix<fixed-case>T</fixed-case>3: Pixel-based Table-To-Text Generation</title>
       <author><first>Iñigo</first><last>Alonso</last><affiliation>University of the Basque Country (UPV/EHU)</affiliation></author>
-      <author><first>Eneko</first><last>Agirre</last><affiliation>University of the Basque Country (UPV/EHU)</affiliation></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last><affiliation>University of the Basque Country (UPV/EHU)</affiliation></author>
       <author><first>Mirella</first><last>Lapata</last><affiliation>Edinburgh University, University of Edinburgh</affiliation></author>
       <pages>6721-6736</pages>
       <abstract>Table-to-text generation involves generating appropriate textual descriptions given structured tabular data. It has attracted increasing attention in recent years thanks to the popularity of neural network models and the availability of large-scale datasets. A common feature across existing methods is their treatment of the input as a string, i.e., by employing linearization techniques that do not always preserve information in the table, are verbose, and lack space efficiency. We propose to rethink data-to-text generation as a visual recognition task, removing the need for rendering the input in a string format. We present PixT3, a multimodal table-to-text model that overcomes the challenges of linearization and input size limitations encountered by existing models. PixT3 is trained with a new self-supervised learning objective to reinforce table structure awareness and is applicable to open-ended and controlled generation settings. Experiments on the ToTTo and Logic2Text benchmarks show that PixT3 is competitive and, in some settings, superior to generators that operate solely on text.</abstract>
@@ -5088,7 +5088,7 @@
       <author><first>Ali</first><last>Marashian</last><affiliation>University of Colorado at Boulder</affiliation></author>
       <author><first>Luke</first><last>Gessler</last><affiliation>Indiana University</affiliation></author>
       <author><first>Alexis</first><last>Palmer</last><affiliation>University of Colorado at Boulder</affiliation></author>
-      <author><first>Katharina</first><last>von der Wense</last><affiliation>Johannes-Gutenberg Universität Mainz, University of Colorado, Boulder and New York University</affiliation></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>von der Wense</last><affiliation>Johannes-Gutenberg Universität Mainz, University of Colorado, Boulder and New York University</affiliation></author>
       <pages>6752-6765</pages>
       <abstract>Canonical morphological segmentation is the process of analyzing words into the standard (<i>aka</i> underlying) forms of their constituent morphemes.This is a core task in endangered language documentation, and NLP systems have the potential to dramatically speed up this process. In typical language documentation settings, training data for canonical morpheme segmentation is scarce, making it difficult to train high quality models. However, translation data is often much more abundant, and, in this work, we present a method that attempts to leverage translation data in the canonical segmentation task. We propose a character-level sequence-to-sequence model that incorporates representations of translations obtained from pretrained high-resource monolingual language models as an additional signal. Our model outperforms the baseline in a super-low resource setting but yields mixed results on training splits with more data. Additionally, we find that we can achieve strong performance even without needing difficult-to-obtain word level alignments. While further work is needed to make translations useful in higher-resource settings, our model shows promise in severely resource-constrained settings.</abstract>
       <url hash="41d9ed81">2024.acl-long.366</url>
@@ -5099,10 +5099,10 @@
       <title><fixed-case>XC</fixed-case>ode<fixed-case>E</fixed-case>val: An Execution-based Large Scale Multilingual Multitask Benchmark for Code Understanding, Generation, Translation and Retrieval</title>
       <author><first>Mohammad Abdullah Matin</first><last>Khan</last></author>
       <author><first>M Saiful</first><last>Bari</last></author>
-      <author><first>Xuan Long</first><last>Do</last></author>
+      <author id="xuan-long-do"><first>Xuan Long</first><last>Do</last></author>
       <author><first>Weishi</first><last>Wang</last></author>
-      <author><first>Md Rizwan</first><last>Parvez</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="md-rizwan-parvez"><first>Md Rizwan</first><last>Parvez</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <pages>6766-6805</pages>
       <abstract>Recently, pre-trained large language models (LLMs) have shown impressive abilities in generating codes from natural language descriptions, repairing buggy codes, translating codes between languages, and retrieving relevant code segments. However, the evaluation of these models has often been performed in a scattered way on only one or two specific tasks, in a few languages, at a partial granularity (e.g., function) level, and in many cases without proper training data. Even more concerning is that in most cases the evaluation of generated codes has been done in terms of mere lexical overlap with a reference code rather than actual execution. We introduce *xCodeEval*, the largest executable multilingual multitask benchmark to date consisting of 25 M document-level coding examples (16.5 B tokens) from about 7.5 K unique problems covering up to 11 programming languages with execution-level parallelism. It features a total of 7 tasks involving code understanding, generation, translation and retrieval. *xCodeEval* adopts an execution-based evaluation and offers a multilingual code execution engine, *ExecEval* that supports unit test based execution in all the 11 languages. To address the challenge of balancing the distributions of text-code samples over multiple attributes in validation/test sets, we propose a novel data splitting and a data selection schema based on the geometric mean and graph-theoretic principle. Our experiments with OpenAI’s LLMs (zero-shot) and open-LLMs (zero-shot and fine-tuned) on the tasks and languages demonstrate to be quite challenging as per the current advancements in language models.</abstract>
       <url hash="e2e65a30">2024.acl-long.367</url>
@@ -5114,7 +5114,7 @@
       <author><first>Haochen</first><last>Tan</last></author>
       <author><first>Zhijiang</first><last>Guo</last><affiliation>University of Cambridge</affiliation></author>
       <author><first>Zhan</first><last>Shi</last></author>
-      <author id="lu-xu"><first>Lu</first><last>Xu</last></author>
+      <author><first>Lu</first><last>Xu</last></author>
       <author><first>Zhili</first><last>Liu</last></author>
       <author><first>Yunlong</first><last>Feng</last></author>
       <author><first>Xiaoguang</first><last>Li</last></author>
@@ -5134,7 +5134,7 @@
       <author><first>Maxime</first><last>Peyrard</last><affiliation>CNRS</affiliation></author>
       <author><first>Martin</first><last>Josifoski</last><affiliation>Swiss Federal Institute of Technology Lausanne</affiliation></author>
       <author><first>Vishrav</first><last>Chaudhary</last><affiliation>Microsoft</affiliation></author>
-      <author><first>Jason</first><last>Eisner</last><affiliation>Microsoft and Johns Hopkins University</affiliation></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last><affiliation>Microsoft and Johns Hopkins University</affiliation></author>
       <author><first>Emre</first><last>Kiciman</last><affiliation>Microsoft</affiliation></author>
       <author><first>Hamid</first><last>Palangi</last><affiliation>Microsoft Research</affiliation></author>
       <author><first>Barun</first><last>Patra</last><affiliation>Microsoft</affiliation></author>
@@ -5197,7 +5197,7 @@
       <author><first>Jiaxin</first><last>Shi</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Divyanshu</first><last>Sheth</last></author>
       <author><first>Prakhar</first><last>Gupta</last></author>
-      <author><first>Carolyn</first><last>Rose</last><affiliation>School of Computer Science, Carnegie Mellon University</affiliation></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rose</last><affiliation>School of Computer Science, Carnegie Mellon University</affiliation></author>
       <pages>6901-6929</pages>
       <abstract>We present a generalizable classification approach that leverages Large Language Models (LLMs) to facilitate the detection of implicitly encoded social meaning in conversations. We design a multi-faceted prompt to extract a textual explanation of the reasoning that connects visible cues to underlying social meanings. These extracted explanations or rationales serve as augmentations to the conversational text to facilitate dialogue understanding and transfer. Our empirical results over 2,340 experimental settings demonstrate the significant positive impact of adding these rationales. Our findings hold true for in-domain classification, zero-shot, and few-shot domain transfer for two different social meaning detection tasks, each spanning two different corpora.</abstract>
       <url hash="bba1ce6e">2024.acl-long.373</url>
@@ -5269,7 +5269,7 @@
     </paper>
     <paper id="379">
       <title><fixed-case>PLUG</fixed-case>: Leveraging Pivot Language in Cross-Lingual Instruction Tuning</title>
-      <author id="zhihan-zhang"><first>Zhihan</first><last>Zhang</last></author>
+      <author><first>Zhihan</first><last>Zhang</last></author>
       <author><first>Dong-Ho</first><last>Lee</last><affiliation>University of Southern California</affiliation></author>
       <author><first>Yuwei</first><last>Fang</last><affiliation>Snap Inc.</affiliation></author>
       <author><first>Wenhao</first><last>Yu</last><affiliation>Tencent AI Lab</affiliation></author>
@@ -5393,7 +5393,7 @@
       <author><first>Irene</first><last>Li</last></author>
       <author><first>Alex</first><last>Pentland</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <author><first>Yoon</first><last>Kim</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
-      <author><first>Deb</first><last>Roy</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
+      <author id="deb-roy"><first>Deb</first><last>Roy</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <author><first>Jad</first><last>Kabbara</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <pages>7194-7219</pages>
       <abstract>Making legal knowledge accessible to non-experts is crucial for enhancing general legal literacy and encouraging civic participation in democracy. However, legal documents are often challenging to understand for people without legal backgrounds. In this paper, we present a novel application of large language models (LLMs) in legal education to help non-experts learn intricate legal concepts through storytelling, an effective pedagogical tool in conveying complex and abstract concepts. We also introduce a new dataset LegalStories, which consists of 294 complex legal doctrines, each accompanied by a story and a set of multiple-choice questions generated by LLMs. To construct the dataset, we experiment with various LLMs to generate legal stories explaining these concepts. Furthermore, we use an expert-in-the-loop approach to iteratively design multiple-choice questions. Then, we evaluate the effectiveness of storytelling with LLMs through randomized controlled trials (RCTs) with legal novices on 10 samples from the dataset. We find that LLM-generated stories enhance comprehension of legal concepts and interest in law among non-native speakers compared to only definitions. Moreover, stories consistently help participants relate legal concepts to their lives. Finally, we find that learning with stories shows a higher retention rate for non-native speakers in the follow-up assessment. Our work has strong implications for using LLMs in promoting teaching and learning in the legal field and beyond.</abstract>
@@ -5405,7 +5405,7 @@
       <title>Intrinsic Task-based Evaluation for Referring Expression Generation</title>
       <author><first>Guanyi</first><last>Chen</last><affiliation>Central China Normal University</affiliation></author>
       <author><first>Fahime</first><last>Same</last><affiliation>Trivago N.V.</affiliation></author>
-      <author><first>Kees</first><last>Van Deemter</last><affiliation>Utrecht University</affiliation></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>Van Deemter</last><affiliation>Utrecht University</affiliation></author>
       <pages>7220-7231</pages>
       <abstract>Recently, a human evaluation study of Referring Expression Generation (REG) models had an unexpected conclusion: on WEBNLG, Referring Expressions (REs) generated by the state-of-the-art neural models were not only indistinguishable from the REs in WEBNLG but also from the REs generated by a simple rule-based system. Here, we argue that this limitation could stem from the use of a purely ratings-based human evaluation (which is a common practice in Natural Language Generation). To investigate these issues, we propose an intrinsic task-based evaluation for REG models, in which, in addition to rating the quality of REs, participants were asked to accomplish two meta-level tasks. One of these tasks concerns the referential success of each RE; the other task asks participants to suggest a better alternative for each RE. The outcomes suggest that, in comparison to previous evaluations, the new evaluation protocol assesses the performance of each REG model more comprehensively and makes the participants’ ratings more reliable and discriminable.</abstract>
       <url hash="ab85ba30">2024.acl-long.389</url>
@@ -5477,12 +5477,12 @@
     </paper>
     <paper id="395">
       <title>Prompt Optimization via Adversarial In-Context Learning</title>
-      <author><first>Xuan Long</first><last>Do</last></author>
+      <author id="xuan-long-do"><first>Xuan Long</first><last>Do</last></author>
       <author><first>Yiran</first><last>Zhao</last></author>
       <author><first>Hannah</first><last>Brown</last></author>
       <author><first>Yuxi</first><last>Xie</last></author>
       <author><first>James Xu</first><last>Zhao</last></author>
-      <author><first>Nancy F.</first><last>Chen</last></author>
+      <author id="nancy-chen"><first>Nancy F.</first><last>Chen</last></author>
       <author><first>Kenji</first><last>Kawaguchi</last></author>
       <author><first>Michael</first><last>Shieh</last></author>
       <author><first>Junxian</first><last>He</last></author>
@@ -5509,7 +5509,7 @@
       <title>Generate-then-Ground in Retrieval-Augmented Generation for Multi-hop Question Answering</title>
       <author><first>Zhengliang</first><last>Shi</last></author>
       <author><first>Shuo</first><last>Zhang</last></author>
-      <author><first>Weiwei</first><last>Sun</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Shen</first><last>Gao</last><affiliation>University of Electronic Science and Technology of China</affiliation></author>
       <author><first>Pengjie</first><last>Ren</last><affiliation>Shandong University</affiliation></author>
       <author><first>Zhumin</first><last>Chen</last><affiliation>Shandong University</affiliation></author>
@@ -5545,7 +5545,7 @@
     </paper>
     <paper id="400">
       <title><fixed-case>A</fixed-case>bout<fixed-case>M</fixed-case>e: Using Self-Descriptions in Webpages to Document the Effects of <fixed-case>E</fixed-case>nglish Pretraining Data Filters</title>
-      <author><first>Li</first><last>Lucy</last><affiliation>Allen Institute for Artificial Intelligence and University of California Berkeley</affiliation></author>
+      <author id="li-lucy"><first>Li</first><last>Lucy</last><affiliation>Allen Institute for Artificial Intelligence and University of California Berkeley</affiliation></author>
       <author><first>Suchin</first><last>Gururangan</last><affiliation>Facebook and University of Washington, Seattle</affiliation></author>
       <author><first>Luca</first><last>Soldaini</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Emma</first><last>Strubell</last><affiliation>Allen Institute for Artificial Intelligence and Carnegie Mellon University</affiliation></author>
@@ -5680,7 +5680,7 @@
       <author><first>Xiang</first><last>Yue</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Jie</first><last>Huang</last></author>
       <author><first>Sujian</first><last>Li</last><affiliation>Peking University</affiliation></author>
-      <author><first>Bill Yuchen</first><last>Lin</last></author>
+      <author id="bill-yuchen-lin"><first>Bill Yuchen</first><last>Lin</last></author>
       <pages>7584-7600</pages>
       <abstract>Large Language Models (LLMs) have become integral components in various autonomous agent systems.In this study, we present an exploration-based trajectory optimization approach, referred to as ETO. This learning method is designed to enhance the performance of open LLM agents. Contrary to previous studies that exclusively train on successful expert trajectories, our method allows agents to learn from their exploration failures. This leads to improved performance through an iterative optimization framework. During the exploration phase, the agent interacts with the environment while completing given tasks, gathering failure trajectories to create contrastive trajectory pairs. In the subsequent training phase, the agent utilizes these trajectory preference pairs to update its policy using contrastive learning methods like DPO. This iterative cycle of exploration and training fosters continued improvement in the agents. Our experiments on three complex tasks demonstrate that ETO consistently surpasses baseline performance by a large margin. Furthermore, an examination of task-solving efficiency and potential in scenarios lacking expert trajectory underscores the effectiveness of our approach.</abstract>
       <url hash="0da5e409">2024.acl-long.409</url>
@@ -5828,7 +5828,7 @@
       <author><first>Haonan</first><last>Li</last></author>
       <author><first>Dimitar</first><last>Dimitrov</last></author>
       <author><first>Ivan</first><last>Koychev</last><affiliation>Sofia University “St. Kliment Ohridski”</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <pages>7768-7791</pages>
       <abstract>We introduce EXAMS-V, a new challenging multi-discipline multimodal multilingual exam benchmark for evaluating vision language models. It consists of 20,932 multiple-choice questions across 20 school disciplines covering natural science, social science, and other miscellaneous studies, e.g., religion, fine arts, business, etc. EXAMS-V includes a variety of multimodal features such as text, images, tables, figures, diagrams, maps, scientific symbols, and equations. The questions come in 11 languages from 7 language families. Unlike existing benchmarks, EXAMS-V is uniquely curated by gathering school exam questions from various countries, with a variety of education systems. This distinctive approach calls for intricate reasoning across diverse languages and relies on region-specific knowledge. Solving the problems in the dataset requires advanced perception and joint reasoning over the text and the visual content in the image. Our evaluation results demonstrate that this is a challenging dataset, which is difficult even for advanced vision–text models such as GPT-4V and Gemini; this underscores the inherent complexity of the dataset and its significance as a future benchmark.</abstract>
       <url hash="82bebe81">2024.acl-long.420</url>
@@ -5915,7 +5915,7 @@
       <author><first>Yuanbin</first><last>Qu</last></author>
       <author><first>Konrad</first><last>Staniszewski</last><affiliation>University of Warsaw and IDEAS NCBR Sp. z o.o.</affiliation></author>
       <author><first>Szymon</first><last>Tworkowski</last></author>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last><affiliation>xiaomi</affiliation></author>
+      <author><first>Wei</first><last>Liu</last><affiliation>xiaomi</affiliation></author>
       <author><first>Piotr</first><last>Miłoś</last><affiliation>IDEAS NCBR and Polish Academy of Science</affiliation></author>
       <author><first>Yuxiang</first><last>Wu</last></author>
       <author><first>Pasquale</first><last>Minervini</last><affiliation>University of Edinburgh, University of Edinburgh</affiliation></author>
@@ -5945,7 +5945,7 @@
     </paper>
     <paper id="429">
       <title><fixed-case>S</fixed-case>pike<fixed-case>V</fixed-case>oice: High-Quality Text-to-Speech Via Efficient Spiking Neural Network</title>
-      <author id="kexin-wang"><first>Kexin</first><last>Wang</last></author>
+      <author><first>Kexin</first><last>Wang</last></author>
       <author><first>Jiahong</first><last>Zhang</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
       <author><first>Yong</first><last>Ren</last></author>
       <author><first>Man</first><last>Yao</last><affiliation>Institute of automation, Chinese academy of sciences</affiliation></author>
@@ -5988,8 +5988,8 @@
       <author><first>Haohao</first><last>Luo</last><affiliation>SUN YAT-SEN UNIVERSITY</affiliation></author>
       <author><first>Yang</first><last>Deng</last><affiliation>Singapore Management University</affiliation></author>
       <author><first>Ying</first><last>Shen</last></author>
-      <author><first>See-Kiong</first><last>Ng</last><affiliation>National University of Singapore</affiliation></author>
-      <author><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="see-kiong-ng"><first>See-Kiong</first><last>Ng</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
       <pages>7978-7993</pages>
       <abstract>Multiple-choice questions (MCQs) are important in enhancing concept learning and student engagement for educational purposes. Despite the multimodal nature of educational content, current methods focus mainly on text-based inputs and often neglect the integration of visual information. In this work, we study the problem of multimodal educational question generation, which aims at generating subject-specific educational questions with plausible yet incorrect distractors based on multimodal educational content. To tackle this problem, we introduce a novel framework, named Chain-of-Exemplar (CoE), which utilizes multimodal large language models (MLLMs) with Chain-of-Thought reasoning to improve the generation of challenging distractors. Furthermore, CoE leverages three-stage contextualized exemplar retrieval to retrieve exemplary questions as guides for generating more subject-specific educational questions. Experimental results on the ScienceQA benchmark demonstrate the superiority of CoE in both question generation and distractor generation over existing methods across various subjects and educational levels.</abstract>
       <url hash="d332ba1e">2024.acl-long.432</url>
@@ -6082,7 +6082,7 @@
       <author><first>Konika</first><last>Mandal</last></author>
       <author><first>Aman</first><last>Chadha</last><affiliation>Amazon</affiliation></author>
       <author><first>Sriparna</first><last>Saha</last><affiliation>Indian Institute of Technology Patna, India</affiliation></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
       <pages>8084-8104</pages>
       <abstract>In the digital world, memes present a unique challenge for content moderation due to their potential to spread harmful content. Although detection methods have improved, proactive solutions such as intervention are still limited, with current research focusing mostly on text-based content, neglecting the widespread influence of multimodal content like memes. Addressing this gap, we present <i>MemeGuard</i>, a comprehensive framework leveraging Large Language Models (LLMs) and Visual Language Models (VLMs) for meme intervention. <i>MemeGuard</i> harnesses a specially fine-tuned VLM, <i>VLMeme</i>, for meme interpretation, and a multimodal knowledge selection and ranking mechanism (<i>MKS</i>) for distilling relevant knowledge. This knowledge is then employed by a general-purpose LLM to generate contextually appropriate interventions. Another key contribution of this work is the <i>
           <b>I</b>ntervening</i>
@@ -6250,7 +6250,7 @@
     <paper id="452">
       <title><fixed-case>B</fixed-case>iz<fixed-case>B</fixed-case>ench: A Quantitative Reasoning Benchmark for Business and Finance</title>
       <author><first>Michael</first><last>Krumdick</last><affiliation>Kensho</affiliation></author>
-      <author><first>Rik</first><last>Koncel-Kedziorski</last><affiliation>Apple</affiliation></author>
+      <author id="rik-koncel-kedziorski"><first>Rik</first><last>Koncel-Kedziorski</last><affiliation>Apple</affiliation></author>
       <author><first>Viet Dac</first><last>Lai</last><affiliation>Adobe Systems</affiliation></author>
       <author><first>Varshini</first><last>Reddy</last></author>
       <author><first>Charles</first><last>Lovering</last><affiliation>Kensho</affiliation></author>
@@ -6283,7 +6283,7 @@
       <author><first>Eftekhar</first><last>Hossain</last></author>
       <author><first>Omar</first><last>Sharif</last><affiliation>Dartmouth College and Chittagong University of Engineering and Techology</affiliation></author>
       <author><first>Mohammed Moshiul</first><last>Hoque</last><affiliation>Chittagong University of Engineering and Technology</affiliation></author>
-      <author><first>Sarah Masud</first><last>Preum</last><affiliation>Dartmouth College</affiliation></author>
+      <author id="sarah-masud-preum"><first>Sarah Masud</first><last>Preum</last><affiliation>Dartmouth College</affiliation></author>
       <pages>8347-8359</pages>
       <abstract>Internet memes have become a powerful means for individuals to express emotions, thoughts, and perspectives on social media. While often considered as a source of humor and entertainment, memes can also disseminate hateful content targeting individuals or communities. Most existing research focuses on the negative aspects of memes in high-resource languages, overlooking the distinctive challenges associated with low-resource languages like Bengali (also known as Bangla). Furthermore, while previous work on Bengali memes has focused on detecting hateful memes, there has been no work on detecting their targeted entities. To bridge this gap and facilitate research in this arena, we introduce a novel multimodal dataset for Bengali, BHM (Bengali Hateful Memes). The dataset consists of 7,148 memes with Bengali as well as code-mixed captions, tailored for two tasks: (i) detecting hateful memes, and (ii) detecting the social entities they target (i.e., Individual, Organization, Community, and Society). To solve these tasks, we propose DORA (Dual cO-attention fRAmework), a multimodal deep neural network that systematically extracts the significant modality features from the memes and jointly evaluates them with the modality-specific features to understand the context better. Our experiments show that DORA is generalizable on other low-resource hateful meme datasets and outperforms several state-of-the-art rivaling baselines.</abstract>
       <url hash="c57a657c">2024.acl-long.454</url>
@@ -6307,7 +6307,7 @@
       <author><first>Ariel</first><last>Gera</last><affiliation>International Business Machines</affiliation></author>
       <author><first>Benjamin</first><last>Sznajder</last></author>
       <author><first>Leshem</first><last>Choshen</last><affiliation>International Business Machines</affiliation></author>
-      <author><first>Liat</first><last>Ein-Dor</last></author>
+      <author id="liat-ein-dor"><first>Liat</first><last>Ein-Dor</last></author>
       <author><first>Eyal</first><last>Shnarch</last><affiliation>International Business Machines</affiliation></author>
       <pages>8384-8402</pages>
       <abstract>Model selection for a given target task can be costly, as it may entail extensive annotation of the quality of outputs of different models. We introduce DiffUse, an efficient method to make an informed decision between candidate text generation models based on preference annotations. DiffUse reduces the required amount of annotations, thus saving valuable time and resources in performing evaluation.DiffUse intelligently selects instances by clustering embeddings that represent the semantic differences between model outputs. Thus, it is able to identify a subset of examples that are more informative for preference decisions. Our method is model-agnostic, and can be applied to any text generation model for selecting between models, prompts and configurations. Moreover, we propose a practical iterative approach for dynamically determining how many instances to annotate. In a series of experiments over hundreds of model pairs, we demonstrate that DiffUse can dramatically reduce the required number of annotations – by up to 75% – while maintaining high evaluation reliability.</abstract>
@@ -6352,7 +6352,7 @@
       <author><first>Hannah</first><last>Göke</last><affiliation>University Hospital Essen</affiliation></author>
       <author><first>Monika</first><last>Coers</last></author>
       <author><first>Wei</first><last>Xu</last><affiliation>Georgia Institute of Technology</affiliation></author>
-      <author><first>Byron</first><last>Wallace</last><affiliation>Northeastern University, Brown University and Northeastern University</affiliation></author>
+      <author id="byron-c-wallace"><first>Byron</first><last>Wallace</last><affiliation>Northeastern University, Brown University and Northeastern University</affiliation></author>
       <author><first>Junyi Jessy</first><last>Li</last><affiliation>University of Texas, Austin</affiliation></author>
       <pages>8437-8464</pages>
       <abstract>Plain language summarization with LLMs can be useful for improving textual accessibility of technical content. But how factual are these summaries in a high-stakes domain like medicine? This paper presents FactPICO, a factuality benchmark for plain language summarization of medical texts describing randomized controlled trials (RCTs), which are the basis of evidence-based medicine and can directly inform patient treatment. FactPICO consists of 345 plain language summaries of RCT abstracts generated from three LLMs (i.e., GPT-4, Llama-2, and Alpaca), with fine-grained evaluation and natural language rationales from experts. We assess the factuality of critical elements of RCTs in those summaries: Populations, Interventions, Comparators, Outcomes (PICO), as well as the reported findings concerning these. We also evaluate the correctness of the extra information (e.g., explanations) added by LLMs. Using FactPICO, we benchmark a range of existing factuality metrics, including the newly devised ones based on LLMs. We find that plain language summarization of medical evidence is still challenging, especially when balancing between simplicity and factuality, and that existing metrics correlate poorly with expert judgments on the instance level.</abstract>
@@ -6433,7 +6433,7 @@
       <author><first>Yanda</first><last>Chen</last><affiliation>Columbia University</affiliation></author>
       <author><first>Chen</first><last>Zhao</last><affiliation>New York University Shanghai</affiliation></author>
       <author><first>Zhou</first><last>Yu</last><affiliation>Columbia University</affiliation></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <author><first>He</first><last>He</last><affiliation>New York University</affiliation></author>
       <pages>8582-8592</pages>
       <abstract>Pre-trained language models (LMs) are capable of in-context learning (ICL): they can adapt to a task with only a few examples given in the prompt without any parameter update. However, it is unclear where this capability comes from as there is a stark distribution shift between pre-training text and ICL prompts. In this work, we study what patterns of the pre-training data contribute to ICL. We find that LMs’ ICL ability depends on <tex-math>\textit{parallel structures}</tex-math> in the pre-training data—pairs of phrases following similar templates in the same context window. Specifically, we detect parallel structures by checking whether training on one phrase improves prediction of the other, and conduct ablation experiments to study their effect on ICL. We show that removing parallel structures in the pre-training data reduces LMs’ ICL accuracy by <tex-math>\textbf{51}</tex-math>% (vs 2% from random ablation). This drop persists even when excluding common patterns such as n-gram repetitions and long-range dependency, showing the diversity and generality of parallel structures. A closer look at the detected parallel structures indicates that they cover diverse linguistic tasks and span long distances in the data.</abstract>
@@ -6504,7 +6504,7 @@
     </paper>
     <paper id="470">
       <title><fixed-case>RAVEL</fixed-case>: Evaluating Interpretability Methods on Disentangling Language Model Representations</title>
-      <author id="jing-huang-stanford"><first>Jing</first><last>Huang</last><affiliation>Stanford University</affiliation></author>
+      <author><first>Jing</first><last>Huang</last><affiliation>Stanford University</affiliation></author>
       <author><first>Zhengxuan</first><last>Wu</last><affiliation>Stanford University</affiliation></author>
       <author><first>Christopher</first><last>Potts</last><affiliation>Stanford University</affiliation></author>
       <author><first>Mor</first><last>Geva</last><affiliation>Tel Aviv University and Google Research</affiliation></author>
@@ -6518,15 +6518,15 @@
     <paper id="471">
       <title>Large Language Models as Zero-shot Dialogue State Tracker through Function Calling</title>
       <author><first>Zekun</first><last>Li</last></author>
-      <author id="zhiyu-chen"><first>Zhiyu</first><last>Chen</last></author>
+      <author><first>Zhiyu</first><last>Chen</last></author>
       <author><first>Mike</first><last>Ross</last><affiliation>Facebook</affiliation></author>
       <author><first>Patrick</first><last>Huber</last><affiliation>Facebook</affiliation></author>
       <author><first>Seungwhan</first><last>Moon</last><affiliation>Facebook</affiliation></author>
       <author><first>Zhaojiang</first><last>Lin</last><affiliation>Facebook</affiliation></author>
-      <author><first>Xin</first><last>Dong</last><affiliation>Facebook</affiliation></author>
+      <author id="xin-luna-dong"><first>Xin</first><last>Dong</last><affiliation>Facebook</affiliation></author>
       <author><first>Adithya</first><last>Sagar</last></author>
       <author><first>Xifeng</first><last>Yan</last><affiliation>UC Santa Barbara</affiliation></author>
-      <author><first>Paul</first><last>Crook</last><affiliation>Meta</affiliation></author>
+      <author id="paul-a-crook"><first>Paul</first><last>Crook</last><affiliation>Meta</affiliation></author>
       <pages>8688-8704</pages>
       <abstract>Large language models (LLMs) are increasingly prevalent in conversational systems due to their advanced understanding and generative capabilities in general contexts. However, their effectiveness in task-oriented dialogues (TOD), which requires not only response generation but also effective dialogue state tracking (DST) within specific tasks and domains, remains less satisfying. In this work, we propose a novel approach FnCTOD for solving DST with LLMs through function calling. This method improves zero-shot DST, allowing adaptation to diverse domains without extensive data collection or model tuning. Our experimental results demonstrate that our approach achieves exceptional performance with both modestly sized open-source and also proprietary LLMs: with in-context prompting it enables various 7B or 13B parameter models to surpass the previous state-of-the-art (SOTA) achieved by ChatGPT, and improves ChatGPT’s performance beating the SOTA by 5.6% average joint goal accuracy (JGA). Individual model results for GPT-3.5 and GPT-4 are boosted by 4.8% and 14%, respectively. We also show that by fine-tuning on a small collection of diverse task-oriented dialogues, we can equip modestly sized models, specifically a 13B parameter LLaMA2-Chat model, with function-calling capabilities and DST performance comparable to ChatGPT while maintaining their chat capabilities. We have made the code publicly available at https://github.com/facebookresearch/FnCTOD.</abstract>
       <url hash="07ae6ae9">2024.acl-long.471</url>
@@ -6589,9 +6589,9 @@
       <author><first>Pan</first><last>Yang</last></author>
       <author><first>Xiaolong</first><last>Jin</last><affiliation>Institute of Computing Technology, Chinese Academy of Sciences</affiliation></author>
       <author><first>Jiafeng</first><last>Guo</last><affiliation>Institute of Computing Technolgy, Chinese Academy of Sciences</affiliation></author>
-      <author><first>Xueqi</first><last>Cheng</last><affiliation>, Chinese Academy of Sciences</affiliation></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last><affiliation>, Chinese Academy of Sciences</affiliation></author>
       <pages>8758-8779</pages>
-      <abstract/>
+      <abstract></abstract>
       <url hash="f6d06e20">2024.acl-long.475</url>
       <bibkey>li-etal-2024-knowcoder</bibkey>
       <doi>10.18653/v1/2024.acl-long.475</doi>
@@ -6616,8 +6616,8 @@
       <author><first>Xuan</first><last>Zhang</last></author>
       <author><first>Wenxuan</first><last>Zhang</last></author>
       <author><first>Yifei</first><last>Yuan</last><affiliation>Copenhagen University</affiliation></author>
-      <author><first>See-Kiong</first><last>Ng</last><affiliation>National University of Singapore</affiliation></author>
-      <author><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="see-kiong-ng"><first>See-Kiong</first><last>Ng</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
       <pages>8795-8812</pages>
       <abstract>Web agents powered by Large Language Models (LLMs) have demonstrated remarkable abilities in planning and executing multi-step interactions within complex web-based environments, fulfilling a wide range of web navigation tasks. Despite these advancements, the potential for LLM-powered agents to effectively engage with sequential user instructions in real-world scenarios has not been fully explored. In this work, we introduce a new task of Conversational Web Navigation, which necessitates sophisticated interactions that span multiple turns with both the users and the environment, supported by a specially developed dataset named Multi-Turn Mind2Web (MT-Mind2Web). To tackle the limited context length of LLMs and the context-dependency issue of the conversational tasks, we further propose a novel framework, named self-reflective memory-augmented planning (Self-MAP), which employs memory utilization and self-reflection techniques. Extensive experiments are conducted to benchmark the MT-Mind2Web dataset, and validate the effectiveness of the proposed method.</abstract>
       <url hash="962bba5b">2024.acl-long.477</url>
@@ -6629,7 +6629,7 @@
       <author><first>Shihan</first><last>Deng</last></author>
       <author><first>Weikai</first><last>Xu</last></author>
       <author><first>Hongda</first><last>Sun</last></author>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last><affiliation>xiaomi</affiliation></author>
+      <author><first>Wei</first><last>Liu</last><affiliation>xiaomi</affiliation></author>
       <author><first>Tao</first><last>Tan</last></author>
       <author><first>Liujianfeng</first><last>Liujianfeng</last></author>
       <author><first>Ang</first><last>Li</last><affiliation>Software, Xiaomi Inc.</affiliation></author>
@@ -6711,7 +6711,7 @@
       <author><first>Le</first><last>Zhuo</last></author>
       <author><first>Zewen</first><last>Chi</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <author><first>Minghao</first><last>Xu</last></author>
-      <author><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <author><first>Jianan</first><last>Zhao</last></author>
       <author><first>Heqi</first><last>Zheng</last></author>
       <author><first>Conghui</first><last>He</last><affiliation>Shanghai AI Lab</affiliation></author>
@@ -6789,7 +6789,7 @@
       <author><first>Guangzhi</first><last>Sun</last></author>
       <author><first>Hongcheng</first><last>Liu</last><affiliation>Shanghai Jiaotong University</affiliation></author>
       <author><first>Ji</first><last>Wu</last><affiliation>Tsinghua University, Tsinghua University</affiliation></author>
-      <author><first>Chao</first><last>Zhang</last><affiliation>Tsinghua University and University College London</affiliation></author>
+      <author id="chao-zhang-tu"><first>Chao</first><last>Zhang</last><affiliation>Tsinghua University and University College London</affiliation></author>
       <author><first>Yu</first><last>Wang</last><affiliation>Shanghai Jiao Tong University</affiliation></author>
       <author><first>Yanfeng</first><last>Wang</last><affiliation>Shanghai Jiao Tong University</affiliation></author>
       <pages>9041-9060</pages>
@@ -6894,7 +6894,7 @@
       <author><first>Deng</first><last>Cai</last><affiliation>Tencent AI Lab</affiliation></author>
       <author><first>Shuaiyi</first><last>Li</last><affiliation>Chinese University of Hong Kong, The Chinese University of Hong Kong</affiliation></author>
       <author><first>Peilin</first><last>Zhao</last><affiliation>Tencent AI Lab</affiliation></author>
-      <author><first>Kam-Fai</first><last>Wong</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <pages>9166-9180</pages>
       <abstract>Text watermarking has emerged as a pivotal technique for identifying machine-generated text. However, existing methods often rely on arbitrary vocabulary partitioning during decoding to embed watermarks, which compromises the availability of suitable tokens and significantly degrades the quality of responses. This study assesses the impact of watermarking on different capabilities of large language models (LLMs) from a cognitive science lens. Our finding highlights a significant disparity; knowledge recall and logical reasoning are more adversely affected than language generation. These results suggest a more profound effect of watermarking on LLMs than previously understood. To address these challenges, we introduce Watermarking with Mutual Exclusion (WatME), a novel approach leveraging linguistic prior knowledge of inherent lexical redundancy in LLM vocabularies to seamlessly integrate watermarks. Specifically, WatME dynamically optimizes token usage during the decoding process by applying a mutually exclusive rule to the identified lexical redundancies. This strategy effectively prevents the unavailability of appropriate tokens and preserves the expressive power of LLMs. We provide both theoretical analysis and empirical evidence showing that WatME effectively preserves the diverse capabilities of LLMs while ensuring watermark detectability.</abstract>
       <url hash="a169dcc5">2024.acl-long.496</url>
@@ -6917,7 +6917,7 @@
     </paper>
     <paper id="498">
       <title><fixed-case>MM</fixed-case>-<fixed-case>SAP</fixed-case>: A Comprehensive Benchmark for Assessing Self-Awareness of Multimodal Large Language Models in Perception</title>
-      <author id="yuhao-wang"><first>Yuhao</first><last>Wang</last><affiliation>Shanghai Jiaotong University</affiliation></author>
+      <author><first>Yuhao</first><last>Wang</last><affiliation>Shanghai Jiaotong University</affiliation></author>
       <author><first>Yusheng</first><last>Liao</last></author>
       <author><first>Heyang</first><last>Liu</last></author>
       <author><first>Hongcheng</first><last>Liu</last><affiliation>Shanghai Jiaotong University</affiliation></author>
@@ -7046,7 +7046,7 @@
       <author><first>Qipeng</first><last>Guo</last><affiliation>Shanghai AI Laboratory</affiliation></author>
       <author><first>Hang</first><last>Yan</last><affiliation>AI lab</affiliation></author>
       <author><first>Xipeng</first><last>Qiu</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Dahua</first><last>Lin</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <pages>9348-9369</pages>
       <abstract>Large language models (LLMs) garner significant attention for their unprecedented performance, leading to an increasing number of researches evaluating LLMs. However, these evaluation benchmarks are limited to assessing the instruction-following capabilities, overlooking the fundamental abilities that emerge during the pre-training stage. Previous subjective evaluation methods mainly reply on scoring by API models. However, in the absence of references, large models have shown limited ability to discern subtle differences. To bridge the gap, we propose F-Eval, a bilingual evaluation benchmark to evaluate the fundamental abilities, including expression, commonsense and logic. The tasks in F-Eval include multi-choice objective tasks, open-ended objective tasks, reference-based subjective tasks and reference-free subjective tasks. For reference-free subjective tasks, we devise new evaluation methods, serving as alternatives to scoring by API models. We conduct evaluations on 13 advanced LLMs. Results show that our evaluation methods show higher correlation coefficients and larger distinction than other evaluators. Additionally, we discuss the influence of different model sizes, dimensions, and normalization methods. We anticipate that F-Eval will facilitate the study of LLMs’ fundamental abilities.</abstract>
@@ -7057,7 +7057,7 @@
     <paper id="508">
       <title>Comparing Inferential Strategies of Humans and Large Language Models in Deductive Reasoning</title>
       <author><first>Philipp</first><last>Mondorf</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
-      <author><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München and IT University of Copenhagen</affiliation></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München and IT University of Copenhagen</affiliation></author>
       <pages>9370-9402</pages>
       <abstract>Deductive reasoning plays a pivotal role in the formulation of sound and cohesive arguments. It allows individuals to draw conclusions that logically follow, given the truth value of the information provided. Recent progress in the domain of large language models (LLMs) has showcased their capability in executing deductive reasoning tasks. Nonetheless, a significant portion of research primarily assesses the accuracy of LLMs in solving such tasks, often overlooking a deeper analysis of their reasoning behavior. In this study, we draw upon principles from cognitive psychology to examine inferential strategies employed by LLMs, through a detailed evaluation of their responses to propositional logic problems. Our findings indicate that LLMs display reasoning patterns akin to those observed in humans, including strategies like <tex-math>\textit{supposition following}</tex-math> or <tex-math>\textit{chain construction}</tex-math>. Moreover, our research demonstrates that the architecture and scale of the model significantly affect its preferred method of reasoning, with more advanced models tending to adopt strategies more frequently than less sophisticated ones. Importantly, we assert that a model’s accuracy, that is the correctness of its final conclusion, does not necessarily reflect the validity of its reasoning process. This distinction underscores the necessity for more nuanced evaluation procedures in the field.</abstract>
       <url hash="6b9471f1">2024.acl-long.508</url>
@@ -7179,7 +7179,7 @@
       <author><first>Xinyu</first><last>Hu</last><affiliation>Peking University</affiliation></author>
       <author><first>Mingqi</first><last>Gao</last><affiliation>Peking University</affiliation></author>
       <author><first>Sen</first><last>Hu</last><affiliation>Alibaba Group</affiliation></author>
-      <author id="yang-zhang"><first>Yang</first><last>Zhang</last></author>
+      <author><first>Yang</first><last>Zhang</last></author>
       <author><first>Yicheng</first><last>Chen</last></author>
       <author><first>Teng</first><last>Xu</last></author>
       <author><first>Xiaojun</first><last>Wan</last><affiliation>Peking University</affiliation></author>
@@ -7314,7 +7314,7 @@
       <author><first>Kun</first><last>Zhou</last><affiliation>Renmin University of China</affiliation></author>
       <author><first>Jinwen</first><last>Huang</last></author>
       <author><first>Ruihua</first><last>Song</last><affiliation>Renmin University of China</affiliation></author>
-      <author><first>Xin</first><last>Zhao</last><affiliation>Renmin University of China</affiliation></author>
+      <author id="wayne-xin-zhao"><first>Xin</first><last>Zhao</last><affiliation>Renmin University of China</affiliation></author>
       <author><first>Fuzheng</first><last>Zhang</last></author>
       <author><first>Di</first><last>Zhang</last><affiliation>Kuaishou Technology</affiliation></author>
       <author><first>Kun</first><last>Gai</last></author>
@@ -7360,7 +7360,7 @@
       <author><first>Xiaomian</first><last>Kang</last></author>
       <author><first>Yuchen</first><last>Liu</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
       <author><first>Yu</first><last>Zhou</last><affiliation>Institute of Automation, Chinese Academy of Sciences</affiliation></author>
-      <author><first>Chengqing</first><last>Zong</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
       <pages>9781-9795</pages>
       <abstract>Simultaneous Machine Translation (SiMT) generates target outputs while receiving stream source inputs and requires a read/write policy to decide whether to wait for the next source token or generate a new target token, whose decisions form a decision path. Existing SiMT methods, which learn the policy by exploring various decision paths in training, face inherent limitations. These methods not only fail to precisely optimize the policy due to the inability to accurately assess the individual impact of each decision on SiMT performance, but also cannot sufficiently explore all potential paths because of their vast number. Besides, building decision paths requires unidirectional encoders to simulate streaming source inputs, which impairs the translation quality of SiMT models. To solve these issues, we propose Self-Modifying State Modeling (SM<tex-math>^2</tex-math>), a novel training paradigm for SiMT task. Without building decision paths, SM<tex-math>^2</tex-math> individually optimizes decisions at each state during training. To precisely optimize the policy, SM<tex-math>^2</tex-math> introduces Self-Modifying process to independently assess and adjust decisions at each state. For sufficient exploration, SM<tex-math>^2</tex-math> proposes Prefix Sampling to efficiently traverse all potential states. Moreover, SM<tex-math>^2</tex-math> ensures compatibility with bidirectional encoders, thus achieving higher translation quality. Experiments show that SM<tex-math>^2</tex-math> outperforms strong baselines. Furthermore, SM<tex-math>^2</tex-math> allows offline machine translation models to acquire SiMT ability with fine-tuning.</abstract>
       <url hash="dd5b0f66">2024.acl-long.528</url>
@@ -7397,7 +7397,7 @@
       <title><fixed-case>D</fixed-case>eterm<fixed-case>LR</fixed-case>: Augmenting <fixed-case>LLM</fixed-case>-based Logical Reasoning from Indeterminacy to Determinacy</title>
       <author><first>Hongda</first><last>Sun</last></author>
       <author><first>Weikai</first><last>Xu</last></author>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last><affiliation>xiaomi</affiliation></author>
+      <author><first>Wei</first><last>Liu</last><affiliation>xiaomi</affiliation></author>
       <author><first>Jian</first><last>Luan</last></author>
       <author><first>Bin</first><last>Wang</last><affiliation>AI Lab, Xiaomi Inc.</affiliation></author>
       <author><first>Shuo</first><last>Shang</last></author>
@@ -7496,7 +7496,7 @@
       <author><first>Haoqi</first><last>Zheng</last><affiliation>National University of Defense Technology</affiliation></author>
       <author><first>Zhen</first><last>Huang</last><affiliation>National University of Defense Technology</affiliation></author>
       <author><first>Zhihua</first><last>Wen</last><affiliation>National University of Defence Technology</affiliation></author>
-      <author id="dongsheng-li"><first>Dongsheng</first><last>Li</last></author>
+      <author><first>Dongsheng</first><last>Li</last></author>
       <pages>9976-9992</pages>
       <abstract>Low-resource languages (LRLs) face challenges in supervised neural machine translation (NMT) due to limited parallel data, prompting research in unsupervised NMT.Unsupervised NMT (UNMT), without requiring ground truth, provides solutions for LRL translations using synthetic pseudo-parallel data and parallel data from auxiliary language pairs. However, they usually encounter translation errors, including errors from synthetic data and from auxiliary language pairs with linguistic biases.We argue that large language models (LLMs) mitigate UNMT’s translation errors by dynamically organizing auxiliary languages in prompts to improve LRL translations. In this paper, we propose <tex-math>\textbf{P}</tex-math>r<tex-math>\textbf{O}</tex-math>bability-driven <tex-math>\textbf{M}</tex-math>eta-graph <tex-math>\textbf{P}</tex-math>rompter (POMP), an approach employing a dynamic graph to organize multiple auxiliary languages, to prompt LLMs in LRL translations. POMP proposes a language-specific meta-graph that dynamically samples multiple translation paths to organize auxiliary languages in constructing prompts. Following the path, POMP prompts LLMs to translate with a mixture of auxiliary languages. We achieve the meta-graph’s evolution by back-propagating evaluation scores to update probabilities on the graph.Our experimental improvements show POMP’s effectiveness on LRLs’ translation.</abstract>
       <url hash="e2551e3f">2024.acl-long.537</url>
@@ -7530,7 +7530,7 @@
       <author><first>Wenhao</first><last>Zhu</last><affiliation>Nanjing University</affiliation></author>
       <author><first>Xiang</first><last>Liu</last></author>
       <author><first>Xiang</first><last>Geng</last></author>
-      <author><first>Jiajun</first><last>Chen</last><affiliation>Nanjing University</affiliation></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last><affiliation>Nanjing University</affiliation></author>
       <pages>10015-10027</pages>
       <abstract>Intuitively, reasoning abilities are considered language-agnostic. However, existing LLMs exhibit inconsistent reasoning abilities across different languages, e.g., reasoning in the dominant language like English is superior to other languages due to the imbalance of multilingual training data. To enhance reasoning abilities in non-dominant languages, we propose a Multilingual-Alignment-as-Preference Optimization framework (MAPO) to align the reasoning processes in other languages with the dominant language. Specifically, we harness an off-the-shelf translation model for the consistency between answers in non-dominant and dominant languages, which we adopt as the preference for optimization, e.g., Direct Preference Optimization(DPO) or Proximal Policy Optimization (PPO). Experiments show that MAPO stably achieves significant improvements in the multilingual reasoning of various models on all three benchmarks (MSVAMP +16.2%, MGSM +6.1%, and MNumGLUESub +13.3%), with improved reasoning consistency across languages. The project is available at https://github.com/NJUNLP/MAPO.</abstract>
       <url hash="282e0c00">2024.acl-long.539</url>
@@ -7561,7 +7561,7 @@
       <author><first>Yue</first><last>Yu</last><affiliation>Georgia Institute of Technology</affiliation></author>
       <author><first>Charumathi</first><last>Lakshmanan</last></author>
       <author><first>Yair</first><last>Kurzion</last></author>
-      <author><first>Alexander</first><last>Rush</last><affiliation>Cornell University and School of Engineering and Applied Sciences, Harvard University</affiliation></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last><affiliation>Cornell University and School of Engineering and Applied Sciences, Harvard University</affiliation></author>
       <author><first>Jialu</first><last>Liu</last><affiliation>Google Research</affiliation></author>
       <author><first>Michael</first><last>Bendersky</last><affiliation>Google</affiliation></author>
       <pages>10040-10060</pages>
@@ -7642,7 +7642,7 @@
       <title>A Sentiment Consolidation Framework for Meta-Review Generation</title>
       <author><first>Miao</first><last>Li</last></author>
       <author><first>Jey Han</first><last>Lau</last><affiliation>The University of Melbourne</affiliation></author>
-      <author><first>Eduard</first><last>Hovy</last><affiliation>University of Melbourne and Carnegie Mellon University</affiliation></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last><affiliation>University of Melbourne and Carnegie Mellon University</affiliation></author>
       <pages>10158-10177</pages>
       <abstract>Modern natural language generation systems with Large Language Models (LLMs) exhibit the capability to generate a plausible summary of multiple documents; however, it is uncertain if they truly possess the capability of information consolidation to generate summaries, especially on documents with opinionated information. We focus on meta-review generation, a form of sentiment summarisation for the scientific domain. To make scientific sentiment summarization more grounded, we hypothesize that human meta-reviewers follow a three-layer framework of sentiment consolidation to write meta-reviews. Based on the framework, we propose novel prompting methods for LLMs to generate meta-reviews and evaluation metrics to assess the quality of generated meta-reviews. Our framework is validated empirically as we find that prompting LLMs based on the framework — compared with prompting them with simple instructions — generates better meta-reviews.</abstract>
       <url hash="85733eea">2024.acl-long.547</url>
@@ -7656,7 +7656,7 @@
       <author><first>Hao</first><last>Fei</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Fei</first><last>Li</last><affiliation>Wuhan University</affiliation></author>
       <author><first>Chong</first><last>Teng</last></author>
-      <author><first>Donghong</first><last>Ji</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
       <pages>10178-10191</pages>
       <abstract>Structured Sentiment Analysis (SSA) was cast as a problem of bi-lexical dependency graph parsing by prior studies.Multiple formulations have been proposed to construct the graph, which share several intrinsic drawbacks:(1) The internal structures of spans are neglected, thus only the boundary tokens of spans are used for relation prediction and span recognition, thus hindering the model’s expressiveness;(2) Long spans occupy a significant proportion in the SSA datasets, which further exacerbates the problem of internal structure neglect.In this paper, we treat the SSA task as a dependency parsing task on partially-observed dependency trees, regarding flat spans without determined tree annotations as latent subtrees to consider internal structures of spans.We propose a two-stage parsing method and leverage TreeCRFs with a novel constrained inside algorithm to model latent structures explicitly, which also takes advantages of joint scoring graph arcs and headed spans for global optimization and inference. Results of extensive experiments on five benchmark datasets reveal that our method performs significantly better than all previous bi-lexical methods, achieving new state-of-the-art.</abstract>
       <url hash="da53d48c">2024.acl-long.548</url>
@@ -7665,7 +7665,7 @@
     </paper>
     <paper id="549">
       <title><fixed-case>OWSM</fixed-case>-<fixed-case>CTC</fixed-case>: An Open Encoder-Only Speech Foundation Model for Speech Recognition, Translation, and Language Identification</title>
-      <author><first>Yifan</first><last>Peng</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="yifan-peng-cmu"><first>Yifan</first><last>Peng</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Yui</first><last>Sudo</last></author>
       <author><first>Muhammad</first><last>Shakeel</last><affiliation>Honda Research Institution Japan Co., Ltd.</affiliation></author>
       <author><first>Shinji</first><last>Watanabe</last><affiliation>Carnegie Mellon University</affiliation></author>
@@ -7691,7 +7691,7 @@
     <paper id="551">
       <title><fixed-case>M</fixed-case>uggle<fixed-case>M</fixed-case>ath: Assessing the Impact of Query and Response Augmentation on Math Reasoning</title>
       <author><first>Chengpeng</first><last>Li</last></author>
-      <author id="zheng-yuan"><first>Zheng</first><last>Yuan</last><affiliation>Alibaba Group</affiliation></author>
+      <author><first>Zheng</first><last>Yuan</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Hongyi</first><last>Yuan</last></author>
       <author><first>Guanting</first><last>Dong</last><affiliation>Renmin University of China</affiliation></author>
       <author><first>Keming</first><last>Lu</last></author>
@@ -7720,7 +7720,7 @@
       <title><fixed-case>B</fixed-case>inary<fixed-case>A</fixed-case>lign: Word Alignment as Binary Sequence Labeling</title>
       <author><first>Gaetan</first><last>Latouche</last><affiliation>Ubisoft</affiliation></author>
       <author><first>Marc-André</first><last>Carbonneau</last><affiliation>Ubisoft</affiliation></author>
-      <author><first>Benjamin</first><last>Swanson</last><affiliation>Ubisoft</affiliation></author>
+      <author id="ben-swanson"><first>Benjamin</first><last>Swanson</last><affiliation>Ubisoft</affiliation></author>
       <pages>10277-10288</pages>
       <abstract>Real world deployments of word alignment are almost certain to cover both high and low resource languages. However, the state-of-the-art for this task recommends a different model class depending on the availability of gold alignment training data for a particular language pair. We propose BinaryAlign, a novel word alignment technique based on binary sequence labeling that outperforms existing approaches in both scenarios, offering a unifying approach to the task. Additionally, we vary the specific choice of multilingual foundation model, perform stratified error analysis over alignment error type, and explore the performance of BinaryAlign on non-English language pairs. We make our source code publicly available.</abstract>
       <url hash="81acea89">2024.acl-long.553</url>
@@ -7754,7 +7754,7 @@
       <author><first>Huimin</first><last>Zeng</last></author>
       <author><first>Lanyu</first><last>Shang</last></author>
       <author><first>Yifan</first><last>Liu</last></author>
-      <author id="yang-zhang"><first>Yang</first><last>Zhang</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
+      <author><first>Yang</first><last>Zhang</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
       <author><first>Dong</first><last>Wang</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
       <pages>10331-10343</pages>
       <abstract>The rapid propagation of misinformation poses substantial risks to public interest. To combat misinformation, large language models (LLMs) are adapted to automatically verify claim credibility. Nevertheless, existing methods heavily rely on the embedded knowledge within LLMs and / or black-box APIs for evidence collection, leading to subpar performance with smaller LLMs or upon unreliable context. In this paper, we propose retrieval augmented fact verification through the synthesis of contrasting arguments (RAFTS). Upon input claims, RAFTS starts with evidence retrieval, where we design a retrieval pipeline to collect and re-rank relevant documents from verifiable sources. Then, RAFTS forms contrastive arguments (i.e., supporting or refuting) conditioned on the retrieved evidence. In addition, RAFTS leverages an embedding model to identify informative demonstrations, followed by in-context prompting to generate the prediction and explanation. Our method effectively retrieves relevant documents as evidence and evaluates arguments from varying perspectives, incorporating nuanced information for fine-grained decision-making. Combined with informative in-context examples as prior, RAFTS achieves significant improvements to supervised and LLM baselines without complex prompts. We demonstrate the effectiveness of our method through extensive experiments, where RAFTS can outperform GPT-based methods with a significantly smaller 7B LLM.</abstract>
@@ -7921,7 +7921,7 @@
       <title><fixed-case>LLM</fixed-case>s in the Imaginarium: Tool Learning through Simulated Trial and Error</title>
       <author><first>Boshi</first><last>Wang</last><affiliation>Ohio State University</affiliation></author>
       <author><first>Hao</first><last>Fang</last><affiliation>Microsoft</affiliation></author>
-      <author><first>Jason</first><last>Eisner</last><affiliation>Microsoft and Johns Hopkins University</affiliation></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last><affiliation>Microsoft and Johns Hopkins University</affiliation></author>
       <author><first>Benjamin</first><last>Van Durme</last><affiliation>Johns Hopkins University, Johns Hopkins University, Johns Hopkins University and Microsoft</affiliation></author>
       <author><first>Yu</first><last>Su</last><affiliation>Ohio State University and Microsoft</affiliation></author>
       <pages>10583-10604</pages>
@@ -7955,7 +7955,7 @@
       <author><first>Zhu</first><last>JianHao</last></author>
       <author><first>Cenyuan</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Xiaoqing</first><last>Zheng</last></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>10619-10638</pages>
       <abstract>Aligning large language models (LLMs) with human preferences is crucial for enhancing their utility in terms of helpfulness, truthfulness, safety, harmlessness, and interestingness. Existing methods for achieving this alignment often involve employing reinforcement learning from human feedback (RLHF) to fine-tune LLMs based on human labels assessing the relative quality of model responses. Nevertheless, RLHF is susceptible to instability during fine-tuning and presents challenges in implementation. Drawing inspiration from the emerging field of representation engineering (RepE), this study aims to identify relevant representations for high-level human preferences embedded in patterns of activity within an LLM and achieve precise control of model behavior by transforming its representations. This novel approach, denoted as Representation Alignment from Human Feedback (RAHF), proves to be effective, computationally efficient, and easy to implement. Extensive experiments demonstrate the efficacy of RAHF in not only capturing but also manipulating representations to align with a broad spectrum of human preferences or values, rather than being confined to a singular concept or function (e.g. honesty or bias). RAHF’s versatility in accommodating diverse human preferences shows its potential for advancing LLM performance.</abstract>
       <url hash="a6e804f3">2024.acl-long.572</url>
@@ -7977,7 +7977,7 @@
       <author><first>Peng</first><last>Li</last><affiliation>Tsinghua University</affiliation></author>
       <author><first>Ning</first><last>Ma</last></author>
       <author><first>Maosong</first><last>Sun</last></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <pages>10639-10659</pages>
       <abstract>Multimodal large language models (MLLMs) have demonstrated promising results in a variety of tasks that combine vision and language. As these models become more integral to research and applications, conducting comprehensive evaluations of their capabilities has grown increasingly important. However, most existing benchmarks fail to consider that, in certain situations, images need to be interpreted within a broader context. In this work, we introduce a new benchmark, named as CODIS, designed to assess the ability of models to use context provided in free-form text to enhance visual comprehension. Our findings indicate that MLLMs consistently fall short of human performance on this benchmark. Further analysis confirms that these models struggle to effectively extract and utilize contextual information to improve their understanding of images. This underscores the pressing need to enhance the ability of MLLMs to comprehend visuals in a context-dependent manner.</abstract>
       <url hash="ad938049">2024.acl-long.573</url>
@@ -8013,7 +8013,7 @@
     <paper id="576">
       <title>Prompted Aspect Key Point Analysis for Quantitative Review Summarization</title>
       <author><first>An Quang</first><last>Tang</last></author>
-      <author><first>Xiuzhen</first><last>Zhang</last></author>
+      <author id="xiuzhen-jenny-zhang"><first>Xiuzhen</first><last>Zhang</last></author>
       <author><first>Minh Ngoc</first><last>Dinh</last></author>
       <author><first>Erik</first><last>Cambria</last></author>
       <pages>10691-10708</pages>
@@ -8044,7 +8044,7 @@
       <author><first>Junhong</first><last>Liu</last></author>
       <author><first>Dingnan</first><last>Jin</last></author>
       <author><first>Hongru</first><last>Liang</last><affiliation>Sichuan University</affiliation></author>
-      <author><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
       <pages>10746-10766</pages>
       <abstract>Large language models (LLMs) are increasingly used to meet user information needs, but their effectiveness in dealing with user queries that contain various types of ambiguity remains unknown, ultimately risking user trust and satisfaction. To this end, we introduce CLAMBER, a benchmark for evaluating LLMs using a well-organized taxonomy. Building upon the taxonomy, we construct 12K high-quality data to assess the strengths, weaknesses, and potential risks of various off-the-shelf LLMs.Our findings indicate the limited practical utility of current LLMs in identifying and clarifying ambiguous user queries, even enhanced by chain-of-thought (CoT) and few-shot prompting. These techniques may result in overconfidence in LLMs and yield only marginal enhancements in identifying ambiguity. Furthermore, current LLMs fall short in generating high-quality clarifying questions due to a lack of conflict resolution and inaccurate utilization of inherent knowledge.In this paper, CLAMBER presents a guidance and promotes further research on proactive and trustworthy LLMs.</abstract>
       <url hash="a000e74f">2024.acl-long.578</url>
@@ -8149,8 +8149,8 @@
       <author><first>Jie</first><last>Chen</last><affiliation>Renmin University of China</affiliation></author>
       <author><first>Ruiyang</first><last>Ren</last></author>
       <author><first>Xiaoxue</first><last>Cheng</last></author>
-      <author><first>Xin</first><last>Zhao</last><affiliation>Renmin University of China</affiliation></author>
-      <author><first>Jian-Yun</first><last>Nie</last><affiliation>University of Montreal</affiliation></author>
+      <author id="wayne-xin-zhao"><first>Xin</first><last>Zhao</last><affiliation>Renmin University of China</affiliation></author>
+      <author id="jian-yun-nie"><first>Jian-Yun</first><last>Nie</last><affiliation>University of Montreal</affiliation></author>
       <author><first>Ji-Rong</first><last>Wen</last><affiliation>Renmin University of China</affiliation></author>
       <pages>10879-10899</pages>
       <abstract>In the era of large language models (LLMs), hallucination (the tendency to generate factually incorrect content) poses great challenges to trustworthy and reliable deployment of LLMs in real-world applications. To tackle the hallucination, three key questions should be well studied: how to detect hallucinations (detection), why do LLMs hallucinate (source), and what can be done to mitigate them (mitigation). To address these challenges, this work presents a systematic empirical study on LLM hallucinations, focused on the three aspects of hallucination detection, source and mitigation. Specially, we construct a new hallucination benchmark HaluEval 2.0, and design a simple yet effective detection method for LLM hallucinations. Furthermore, we zoom into the different training or utilization stages of LLMs and extensively analyze the potential factors that lead to the LLM hallucinations. Finally, we implement and examine a series of widely used techniques to mitigate the hallucinations in LLMs. Our work has led to several important findings to understand the hallucination origin and mitigate the hallucinations in LLMs.</abstract>
@@ -8177,7 +8177,7 @@
       <author><first>Yunlong</first><last>Liang</last></author>
       <author><first>Fandong</first><last>Meng</last><affiliation>WeChat AI, Tencent Inc.</affiliation></author>
       <author><first>Jiaan</first><last>Wang</last><affiliation>Soochow University</affiliation></author>
-      <author><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
       <author><first>Yufeng</first><last>Chen</last></author>
       <author><first>Jie</first><last>Zhou</last></author>
       <pages>10914-10928</pages>
@@ -8288,7 +8288,7 @@
       <author><first>Nitish</first><last>Gupta</last><affiliation>Google</affiliation></author>
       <author><first>Shikhar</first><last>Bharadwaj</last></author>
       <author><first>Dinesh</first><last>Tewari</last></author>
-      <author><first>Partha</first><last>Talukdar</last><affiliation>Google Research and Indian Institute of Science, Bangalore</affiliation></author>
+      <author id="partha-talukdar"><first>Partha</first><last>Talukdar</last><affiliation>Google Research and Indian Institute of Science, Bangalore</affiliation></author>
       <pages>11047-11073</pages>
       <abstract>As large language models (LLMs) see increasing adoption across the globe, it is imperative for LLMs to be representative of the linguistic diversity of the world. India is a linguistically diverse country of 1.4 Billion people. To facilitate research on multilingual LLM evaluation, we release IndicGenBench — the largest benchmark for evaluating LLMs on user-facing generation tasks across a diverse set 29 of Indic languages covering 13 scripts and 4 language families. IndicGenBench is composed of diverse generation tasks like cross-lingual summarization, machine translation, and cross-lingual question answering. IndicGenBench extends existing benchmarks to many Indic languages through human curation providing multi-way parallel evaluation data for many under-represented Indic languages for the first time. We evaluate stateof-the-art LLMs like GPT-3.5, GPT-4, PaLM2, and LLaMA on IndicGenBench in a variety of settings. The largest PaLM-2 models performs the best on most tasks, however, there is a significant performance gap in all languages compared to English showing that further research is needed for the development of more inclusive multilingual language models. IndicGenBench isavailable at www.github.com/google-researchdatasets/indic-gen-bench</abstract>
       <url hash="9da08e56">2024.acl-long.595</url>
@@ -8339,7 +8339,7 @@
       <author><first>Siddharth</first><last>Suri</last><affiliation>Research, Microsoft</affiliation></author>
       <author><first>Reid</first><last>Andersen</last><affiliation>Microsoft</affiliation></author>
       <author><first>Xiaofeng</first><last>Xu</last></author>
-      <author><first>Deepak</first><last>Gupta</last><affiliation>Microsoft</affiliation></author>
+      <author id="deepak-gupta"><first>Deepak</first><last>Gupta</last><affiliation>Microsoft</affiliation></author>
       <author><first>Sujay Kumar</first><last>Jauhar</last><affiliation>Microsoft Research</affiliation></author>
       <author><first>Xia</first><last>Song</last><affiliation>Microsoft</affiliation></author>
       <author><first>Georg</first><last>Buscher</last></author>
@@ -8369,7 +8369,7 @@
       <author><first>Bowen</first><last>Ren</last></author>
       <author><first>Chong</first><last>Feng</last></author>
       <author><first>Yang</first><last>Gao</last></author>
-      <author><first>Heyan</first><last>Huang</last></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last></author>
       <pages>11116-11141</pages>
       <abstract>Large Language Models (LLMs) demonstrate significant value in domain-specific applications, benefiting from their fundamental capabilities. Nevertheless, it is still unclear which fundamental capabilities contribute to success in specific domains. Moreover, the existing benchmark-based evaluation cannot effectively reflect the performance of real-world applications. In this survey, we review recent advances of LLMs in domain applications, aiming to summarize the fundamental capabilities and their collaboration. Furthermore, we establish connections between fundamental capabilities and specific domains, evaluating the varying importance of different capabilities. Based on our findings, we propose a reliable strategy for domains to choose more robust backbone LLMs for real-world applications.</abstract>
       <url hash="b2fa6890">2024.acl-long.599</url>
@@ -8423,7 +8423,7 @@
       <author><first>Xiaolong</first><last>Wang</last><affiliation>Tsinghua University</affiliation></author>
       <author><first>Peng</first><last>Li</last><affiliation>Tsinghua University</affiliation></author>
       <author><first>Maosong</first><last>Sun</last></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <pages>11189-11204</pages>
       <abstract>While large language models (LLMs) have been pre-trained on multilingual corpora, their performance still lags behind in most languages compared to a few resource-rich languages. One common approach to mitigate this issue is to translate training data from resource-rich languages into other languages and then continue training. However, using the data obtained solely relying on translation while ignoring the original capabilities of LLMs across languages is not always effective, which we show will limit the performance of cross-lingual knowledge transfer. In this work, we propose SDRRL, a method based on Self-Distillation from Resource-Rich Languages that effectively improve multilingual performance by leveraging the internal capabilities of LLMs on resource-rich languages. We evaluate on different LLMs (LLaMA-2 and SeaLLM) and source languages (English and French) across various comprehension and generation tasks, experimental results demonstrate that SDRRL can significantly enhance multilingual capabilities while minimizing the impact on original performance in resource-rich languages.</abstract>
       <url hash="4f141858">2024.acl-long.603</url>
@@ -8457,7 +8457,7 @@
       <author><first>Ji</first><last>Zhang</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Fei</first><last>Huang</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Maosong</first><last>Sun</last></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <pages>11229-11245</pages>
       <abstract>With the bloom of Large Language Models (LLMs), Multimodal Large Language Models (MLLMs) that incorporate LLMs with pre-trained vision models have recently demonstrated impressive performance across diverse vision-language tasks. However, they fall short to comprehend context involving multiple images. A primary reason for this shortcoming is that the visual features for each images are encoded individually by frozen encoders before feeding into the LLM backbone, lacking awareness of other images and the multimodal instructions. We term this issue as prior-LLM modality isolation and propose a two phase paradigm, browse-and-concentrate, to enable in-depth multimodal context fusion prior to feeding the features into LLMs. This paradigm initially “browses” through the inputs for essential insights, and then revisits the inputs to “concentrate” on crucial details, guided by these insights, to achieve a more comprehensive understanding of the multimodal inputs. Additionally, we develop training strategies specifically to enhance the understanding of multi-image inputs. Our method markedly boosts the performance on 7 multi-image scenarios, contributing to increments on average accuracy by 2.13% and 7.60% against strong MLLMs baselines with 3B and 11B LLMs, respectively.</abstract>
       <url hash="1f160c59">2024.acl-long.605</url>
@@ -8476,7 +8476,7 @@
       <author><first>Ji</first><last>Zhang</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Fei</first><last>Huang</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Maosong</first><last>Sun</last></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <pages>11246-11262</pages>
       <abstract>Recent developments in Multimodal Large Language Models (MLLMs) have shown rapid progress, moving towards the goal of creating versatile MLLMs that understand inputs from various modalities. However, existing methods typically rely on joint training with paired multimodal instruction data, which is resource-intensive and challenging to extend to new modalities. In this paper, we propose a new paradigm through the model composition of existing MLLMs to create a new model that retains the modal understanding capabilities of each original model. Our basic implementation, NaiveMC, demonstrates the effectiveness of this paradigm by reusing modality encoders and merging LLM parameters. Furthermore, we introduce DAMC to address parameter interference and mismatch issues during the merging process, thereby enhancing the model performance. To facilitate research in this area, we propose MCUB, a benchmark for assessing ability of MLLMs to understand inputs from diverse modalities. Experiments on this benchmark and four other multimodal understanding tasks show significant improvements over baselines, proving that model composition can create a versatile model capable of processing inputs from multiple modalities.</abstract>
       <url hash="1e91b7d2">2024.acl-long.606</url>
@@ -8517,7 +8517,7 @@
       <title>Measuring Meaning Composition in the Human Brain with Composition Scores from Large Language Models</title>
       <author><first>Changjiang</first><last>Gao</last><affiliation>nanjing university</affiliation></author>
       <author><first>Jixing</first><last>Li</last><affiliation>City University of Hong Kong</affiliation></author>
-      <author><first>Jiajun</first><last>Chen</last><affiliation>Nanjing University</affiliation></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last><affiliation>Nanjing University</affiliation></author>
       <author><first>Shujian</first><last>Huang</last><affiliation>Nanjing University</affiliation></author>
       <pages>11295-11308</pages>
       <abstract>The process of meaning composition, wherein smaller units like morphemes or words combine to form the meaning of phrases and sentences, is essential for human sentence comprehension. Despite extensive neurolinguistic research into the brain regions involved in meaning composition, a computational metric to quantify the extent of composition is still lacking. Drawing on the key-value memory interpretation of transformer feed-forward network blocks, we introduce the Composition Score, a novel model-based metric designed to quantify the degree of meaning composition during sentence comprehension. Experimental findings show that this metric correlates with brain clusters associated with word frequency, structural processing, and general sensitivity to words, suggesting the multifaceted nature of meaning composition during human sentence comprehension.</abstract>
@@ -8600,7 +8600,7 @@
     <paper id="615">
       <title>Learning to Plan and Generate Text with Citations</title>
       <author><first>Constanza</first><last>Fierro</last><affiliation>Copenhagen University</affiliation></author>
-      <author><first>Reinald Kim</first><last>Amplayo</last><affiliation>Google</affiliation></author>
+      <author id="reinald-kim-amplayo"><first>Reinald Kim</first><last>Amplayo</last><affiliation>Google</affiliation></author>
       <author><first>Fantine</first><last>Huot</last><affiliation>Google</affiliation></author>
       <author><first>Nicola</first><last>De Cao</last><affiliation>Google</affiliation></author>
       <author><first>Joshua</first><last>Maynez</last><affiliation>Google</affiliation></author>
@@ -8645,7 +8645,7 @@
       <author><first>Shounak</first><last>Paul</last></author>
       <author><first>Akshat</first><last>Sharma</last></author>
       <author><first>Pawan</first><last>Goyal</last><affiliation>IIT Kharagpur</affiliation></author>
-      <author id="saptarshi-ghosh"><first>Saptarshi</first><last>Ghosh</last><affiliation>Indian Institute of Technology Kharagpur</affiliation></author>
+      <author><first>Saptarshi</first><last>Ghosh</last><affiliation>Indian Institute of Technology Kharagpur</affiliation></author>
       <author><first>Ashutosh</first><last>Modi</last><affiliation>IIT Kanpur</affiliation></author>
       <pages>11460-11499</pages>
       <abstract>Legal systems worldwide are inundated with exponential growth in cases and documents. There is an imminent need to develop NLP and ML techniques for automatically processing and understanding legal documents to streamline the legal system. However, evaluating and comparing various NLP models designed specifically for the legal domain is challenging. This paper addresses this challenge by proposing : Benchmark for Indian Legal Text Understanding and Reasoning. contains monolingual (English, Hindi) and multi-lingual (9 Indian languages) domain-specific tasks that address different aspects of the legal system from the point of view of understanding and reasoning over Indian legal documents. We present baseline models (including LLM-based) for each task, outlining the gap between models and the ground truth. To foster further research in the legal domain, we create a leaderboard (available at: https://exploration-lab.github.io/IL-TUR/ ) where the research community can upload and compare legal text understanding systems.</abstract>
@@ -8673,7 +8673,7 @@
       <author><first>Shivalika</first><last>Singh</last></author>
       <author><first>Freddie</first><last>Vargus</last></author>
       <author><first>Daniel</first><last>D’souza</last></author>
-      <author><first>Börje F.</first><last>Karlsson</last><affiliation>Beijing Academy of Artificial Intelligence (BAAI)</affiliation></author>
+      <author id="borje-f-karlsson"><first>Börje F.</first><last>Karlsson</last><affiliation>Beijing Academy of Artificial Intelligence (BAAI)</affiliation></author>
       <author><first>Abinaya</first><last>Mahendiran</last></author>
       <author><first>Wei-Yin</first><last>Ko</last></author>
       <author><first>Herumb</first><last>Shandilya</last></author>
@@ -8686,7 +8686,7 @@
       <author><first>Marina</first><last>Machado</last></author>
       <author><first>Luisa</first><last>Moura</last><affiliation>Cohere For AI and Cohere</affiliation></author>
       <author><first>Dominik</first><last>Krzemiński</last></author>
-      <author><first>Hakimeh</first><last>Fadaei</last><affiliation>Divar company and University of Tehran, University of Tehran</affiliation></author>
+      <author id="hakimeh-fadaee"><first>Hakimeh</first><last>Fadaei</last><affiliation>Divar company and University of Tehran, University of Tehran</affiliation></author>
       <author><first>Irem</first><last>Ergun</last><affiliation>Cohere</affiliation></author>
       <author><first>Ifeoma</first><last>Okoh</last></author>
       <author><first>Aisha</first><last>Alaagib</last></author>
@@ -8795,7 +8795,7 @@
       <author><first>Changhao</first><last>Guan</last></author>
       <author><first>Ganglin</first><last>Bao</last><affiliation>Beijing Jiaotong University</affiliation></author>
       <author><first>Fengran</first><last>Mo</last></author>
-      <author><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
       <pages>11662-11675</pages>
       <abstract>Fine-tuning large-scale pre-trained models is inherently a resource-intensive task. While it can enhance the capabilities of the model, it also incurs substantial computational costs, posing challenges to the practical application of downstream tasks. Existing parameter-efficient fine-tuning (PEFT) methods such as Low-Rank Adaptation (LoRA) rely on a bypass framework that ignores the differential parameter budget requirements across weight matrices, which may lead to suboptimal fine-tuning outcomes. To address this issue, we introduce the Dynamic Low-Rank Adaptation (DoRA) method. DoRA decomposes high-rank LoRA layers into structured single-rank components, allowing for dynamic pruning of parameter budget based on their importance to specific tasks during training, which makes the most of the limited parameter budget. Experimental results demonstrate that DoRA can achieve competitive performance compared with LoRA and full model fine-tuning, and outperform various strong baselines with the same storage parameter budget. Our code is available at [github](https://github.com/MIkumikumi0116/DoRA)</abstract>
       <url hash="efbe3c0a">2024.acl-long.626</url>
@@ -8819,8 +8819,8 @@
     <paper id="628">
       <title>Argument Mining in Data Scarce Settings: Cross-lingual Transfer and Few-shot Techniques</title>
       <author><first>Anar</first><last>Yeginbergen</last></author>
-      <author><first>Maite</first><last>Oronoz</last></author>
-      <author><first>Rodrigo</first><last>Agerri</last><affiliation>University of the Basque Country</affiliation></author>
+      <author id="maite-oronoz"><first>Maite</first><last>Oronoz</last></author>
+      <author id="rodrigo-agerri"><first>Rodrigo</first><last>Agerri</last><affiliation>University of the Basque Country</affiliation></author>
       <pages>11687-11699</pages>
       <abstract>Recent research on sequence labelling has been exploring different strategies to mitigate the lack of manually annotated data for the large majority of the world languages. Among others, the most successful approaches have been based on (i) the crosslingual transfer capabilities of multilingual pre-trained language models (model-transfer), (ii) data translation and label projection (data-transfer) and (iii), prompt-based learning by reusing the mask objective to exploit the few-shot capabilities of pre-trained language models (few-shot). Previous work seems to conclude that model-transfer outperform data-transfer methods and that few-shot techniques based on prompting are superior to updating the model’s weights via fine-tuning. In this paper we empirically demonstrate that, for Argument Mining, a sequence labelling task which requires the detection of long and complex discourse structures, previous insights on crosslingual transfer or few-shot learning do not apply. Contrary to previous work, we show that for Argument Mining data-transfer obtains better results than model-transfer and that fine-tuning outperforms few-shot methods. Regarding the former, the domain of the dataset used for data-transfer seems to be a deciding factor, while, for few-shot, the type of task (length and complexity of the sequence spans) and sampling method proves to be crucial.</abstract>
       <url hash="1862994a">2024.acl-long.628</url>
@@ -8963,7 +8963,7 @@
       <author><first>Leigang</first><last>Qu</last><affiliation>national university of singaore, National University of Singapore</affiliation></author>
       <author><first>Liqiang</first><last>Nie</last><affiliation>Harbin Institute of Technology (Shenzhen)</affiliation></author>
       <author><first>Wenjie</first><last>Li</last><affiliation>The Hong Kong Polytechnic University, The Hong Kong Polytechnic University</affiliation></author>
-      <author><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
       <pages>11851-11861</pages>
       <abstract>The recent advancements in generative language models have demonstrated their ability to memorize knowledge from documents and recall knowledge to respond to user queries effectively. Building upon this capability, we propose to enable multimodal large language models (MLLMs) to memorize and recall images within their parameters. Given a user query for visual content, the MLLM is anticipated to “recall” the relevant image from its parameters as the response. Achieving this target presents notable challenges, including inbuilt visual memory and visual recall schemes within MLLMs. To address these challenges, we introduce a generative cross-modal retrieval framework, which assigns unique identifier strings to represent images and involves two training steps: learning to memorize and learning to retrieve. The first step focuses on training the MLLM to memorize the association between images and their respective identifiers. The latter step teaches the MLLM to generate the corresponding identifier of the target image, given the textual query input. By memorizing images in MLLMs, we introduce a new paradigm to cross-modal retrieval, distinct from previous discriminative approaches. The experiments demonstrate that the generative paradigm performs effectively and efficiently even with large-scale image candidate sets.</abstract>
       <url hash="647205ff">2024.acl-long.639</url>
@@ -9044,7 +9044,7 @@
     <paper id="645">
       <title>Document-level Claim Extraction and Decontextualisation for Fact-Checking</title>
       <author><first>Zhenyun</first><last>Deng</last></author>
-      <author><first>Michael</first><last>Schlichtkrull</last><affiliation>Queen Mary, University of London</affiliation></author>
+      <author id="michael-schlichtkrull"><first>Michael</first><last>Schlichtkrull</last><affiliation>Queen Mary, University of London</affiliation></author>
       <author><first>Andreas</first><last>Vlachos</last><affiliation>University of Cambridge</affiliation></author>
       <pages>11943-11954</pages>
       <abstract>Selecting which claims to check is a time-consuming task for human fact-checkers, especially from documents consisting of multiple sentences and containing multiple claims. However, existing claim extraction approaches focus more on identifying and extracting claims from individual sentences, e.g., identifying whether a sentence contains a claim or the exact boundaries of the claim within a sentence. In this paper, we propose a method for document-level claim extraction for fact-checking, which aims to extract check-worthy claims from documents and decontextualise them so that they can be understood out of context. Specifically, we first recast claim extraction as extractive summarization in order to identify central sentences from documents, then rewrite them to include necessary context from the originating document through sentence decontextualisation. Evaluation with both automatic metrics and a fact-checking professional shows that our method is able to extract check-worthy claims from documents at a higher rate than previous work, while also improving evidence retrieval.</abstract>
@@ -9148,7 +9148,7 @@
     <paper id="653">
       <title>Don’t Rank, Combine! Combining Machine Translation Hypotheses Using Quality Estimation</title>
       <author><first>Giorgos</first><last>Vernikos</last></author>
-      <author><first>Andrei</first><last>Popescu-Belis</last><affiliation>EPFL - EPF Lausanne and HEIG-VD, Switzerland</affiliation></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last><affiliation>EPFL - EPF Lausanne and HEIG-VD, Switzerland</affiliation></author>
       <pages>12087-12105</pages>
       <abstract>Neural machine translation systems estimate probabilities of target sentences given source sentences, yet these estimates may not align with human preferences. This work introduces QE-fusion, a method that synthesizes translations using a quality estimation metric (QE), which correlates better with human judgments. QE-fusion leverages a pool of candidates sampled from a model, combining spans from different candidates using a QE metric such as CometKiwi. We compare QE-fusion against beam search and recent reranking techniques, such as Minimum Bayes Risk decoding or QE-reranking. Our method consistently improves translation quality in terms of COMET and BLEURT scores when applied to large language models (LLMs) used for translation (PolyLM, XGLM, Llama2, Mistral, ALMA, and Tower) and to multilingual translation models (NLLB), over five language pairs. Notably, QE-fusion exhibits larger improvements for LLMs due to their ability to generate diverse outputs. We demonstrate that our approach generates novel translations in over half of the cases and consistently outperforms other methods across varying numbers of candidates (5–200). Furthermore, we empirically establish that QE-fusion scales linearly with the number of candidates in the pool.</abstract>
       <url hash="0a51fed0">2024.acl-long.653</url>
@@ -9175,7 +9175,7 @@
       <author><first>Sankara</first><last>Muddu</last></author>
       <author><first>Rupasai</first><last>Rangaraju</last></author>
       <author><first>Swaprava</first><last>Nath</last><affiliation>Computer Science and Engineering, Indian Institute of Technology Bombay</affiliation></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
       <author><first>Suman</first><last>Banerjee</last><affiliation>Flipkart</affiliation></author>
       <author><first>Amey</first><last>Patil</last></author>
       <author><first>Sudhanshu</first><last>Singh</last></author>
@@ -9191,8 +9191,8 @@
       <title><fixed-case>LAND</fixed-case>e<fixed-case>RMT</fixed-case>: Dectecting and Routing Language-Aware Neurons for Selectively Finetuning <fixed-case>LLM</fixed-case>s to Machine Translation</title>
       <author><first>Shaolin</first><last>Zhu</last><affiliation>Tianjin University</affiliation></author>
       <author><first>Leiyu</first><last>Pan</last><affiliation>Tianjin University</affiliation></author>
-      <author id="bo-li"><first>Bo</first><last>Li</last><affiliation>Baidu Inc</affiliation></author>
-      <author><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
+      <author><first>Bo</first><last>Li</last><affiliation>Baidu Inc</affiliation></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
       <pages>12135-12148</pages>
       <abstract>Recent advancements in large language models (LLMs) have shown promising results in multilingual translation even with limited bilingual supervision. The major challenges are catastrophic forgetting and parameter interference for finetuning LLMs when provided parallel training data. To address these challenges, we propose LANDeRMT, a Language-Aware Neuron Detecting and Routing framework that selectively finetunes LLMs to Machine Translation with diverse translation training data. In LANDeRMT, we evaluate the awareness of neurons to MT tasks and categorize them into language-general and language-specific neurons. This categorization enables selective parameter updates during finetuning, mitigating parameter interference and catastrophic forgetting issues. For the detected neurons, we further propose a conditional awareness-based routing mechanism to dynamically adjust language-general and language-specific capacity within LLMs, guided by translation signals. Experimental results demonstrate that the proposed LANDeRMT is very effective in learning translation knowledge, significantly improving translation quality over various strong baselines for multiple language pairs.</abstract>
       <url hash="67de2f44">2024.acl-long.656</url>
@@ -9244,7 +9244,7 @@
       <author><first>Donald</first><last>Dunagan</last></author>
       <author><first>Miloš</first><last>Stanojević</last><affiliation>University College London, University of London and Google DeepMind</affiliation></author>
       <author><first>Jan</first><last>Buys</last><affiliation>University of Cape Town</affiliation></author>
-      <author><first>John</first><last>Hale</last><affiliation>Johns Hopkins University, University of Georgia and DeepMind</affiliation></author>
+      <author id="john-hale"><first>John</first><last>Hale</last><affiliation>Johns Hopkins University, University of Georgia and DeepMind</affiliation></author>
       <pages>12215-12229</pages>
       <abstract>Humans understand sentences word-by-word, in the order that they hear them. This incrementality entails resolving temporary ambiguities about syntactic relationships. We investigate how humans process these syntactic ambiguities by correlating predictions from incremental generative dependency parsers with timecourse data from people undergoing functional neuroimaging while listening to an audiobook. In particular, we compare competing hypotheses regarding the number of developing syntactic analyses in play during word-by-word comprehension: one vs more than one. This comparison involves evaluating syntactic surprisal from a state-of-the-art dependency parser with LLM-adapted encodings against an existing fMRI dataset. In both English and Chinese data, we find evidence for multipath parsing. Brain regions associated with this multipath effect include bilateral superior temporal gyrus.</abstract>
       <url hash="eecd0da7">2024.acl-long.660</url>
@@ -9321,7 +9321,7 @@
     </paper>
     <paper id="666">
       <title>Structured Tree Alignment for Evaluation of (Speech) Constituency Parsing</title>
-      <author><first>Freda</first><last>Shi</last><affiliation>University of Waterloo</affiliation></author>
+      <author id="freda-shi"><first>Freda</first><last>Shi</last><affiliation>University of Waterloo</affiliation></author>
       <author><first>Kevin</first><last>Gimpel</last><affiliation>Toyota Technological Institute at Chicago</affiliation></author>
       <author><first>Karen</first><last>Livescu</last><affiliation>Toyota Technological Institute at Chicago</affiliation></author>
       <pages>12320-12332</pages>
@@ -9381,7 +9381,7 @@
       <author><first>Khyathi</first><last>Chandu</last></author>
       <author><first>Kai-Wei</first><last>Chang</last><affiliation>University of California, Los Angeles</affiliation></author>
       <author><first>Yejin</first><last>Choi</last><affiliation>Department of Computer Science, University of Washington</affiliation></author>
-      <author><first>Bill Yuchen</first><last>Lin</last></author>
+      <author id="bill-yuchen-lin"><first>Bill Yuchen</first><last>Lin</last></author>
       <pages>12380-12403</pages>
       <abstract>Closed-source agents suffer from several issues such as a lack of affordability, transparency, and reproducibility, particularly on complex interactive tasks. This motivates the development of open-source alternatives. We introduce Lumos, one of the first frameworks for training open-source LLM-based agents. Lumos features a learnable, unified and modular architecture with a planning module that learns high-level subgoal generation, and a grounding module trained to translate these into the actions using various tools in the execution module. The design allows for modular upgrades and wider applicability to diverse interactive tasks. To foster generalizable agent learning, we collect large-scale, unified, and high-quality training annotations derived from diverse ground-truth reasoning rationales across various complex interactive tasks. On 9 datasets, Lumos exhibits several key advantages: (1) Lumos excels multiple larger open-source agents on the held-out datasets (unused for training) for each task type. Lumos even surpasses GPT agents on QA and web tasks; (2) Lumos outperforms open-source agents produced by chain-of-thoughts and unmodularized integrated training; and (3) Lumos effectively generalizes to unseen tasks, outperforming 33B-scale agents and domain-specific agents. Code and data will be released.</abstract>
       <url hash="9087c8f4">2024.acl-long.670</url>
@@ -9391,9 +9391,9 @@
     <paper id="671">
       <title>Investigating Cultural Alignment of Large Language Models</title>
       <author><first>Badr</first><last>AlKhamissi</last><affiliation>EPFL - EPF Lausanne</affiliation></author>
-      <author><first>Muhammad</first><last>ElNokrashy</last><affiliation>Microsoft</affiliation></author>
+      <author id="muhammad-elnokrashy"><first>Muhammad</first><last>ElNokrashy</last><affiliation>Microsoft</affiliation></author>
       <author><first>Mai</first><last>Alkhamissi</last></author>
-      <author><first>Mona</first><last>Diab</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last><affiliation>Carnegie Mellon University</affiliation></author>
       <pages>12404-12422</pages>
       <abstract>The intricate relationship between language and culture has long been a subject of exploration within the realm of linguistic anthropology. Large Language Models (LLMs), promoted as repositories of collective human knowledge, raise a pivotal question: do these models genuinely encapsulate the diverse knowledge adopted by different cultures? Our study reveals that these models demonstrate greater cultural alignment along two dimensions—firstly, when prompted with the dominant language of a specific culture, and secondly, when pretrained with a refined mixture of languages employed by that culture. We quantify cultural alignment by simulating sociological surveys, comparing model responses to those of actual survey participants as references. Specifically, we replicate a survey conducted in various regions of Egypt and the United States through prompting LLMs with different pretraining data mixtures in both Arabic and English with the personas of the real respondents and the survey questions. Further analysis reveals that misalignment becomes more pronounced for underrepresented personas and for culturally sensitive topics, such as those probing social values. Finally, we introduce Anthropological Prompting, a novel method leveraging anthropological reasoning to enhance cultural alignment. Our study emphasizes the necessity for a more balanced multilingual pretraining dataset to better represent the diversity of human experience and the plurality of different cultures with many implications on the topic of cross-lingual transfer.</abstract>
       <url hash="eda78b62">2024.acl-long.671</url>
@@ -9407,7 +9407,7 @@
       <author><first>Yanze</first><last>Wang</last></author>
       <author><first>Ulf</first><last>Hermjakob</last></author>
       <author><first>Jonathan</first><last>May</last></author>
-      <author><first>Brandon M.</first><last>Stewart</last></author>
+      <author id="brandon-m-stewart"><first>Brandon M.</first><last>Stewart</last></author>
       <author><first>Jonathan K.</first><last>Kummerfeld</last></author>
       <author><first>Denis</first><last>Peskoff</last></author>
       <author><first>Jordan Lee</first><last>Boyd-Graber</last></author>
@@ -9476,7 +9476,7 @@
       <title>Analyzing <fixed-case>LLM</fixed-case> Behavior in Dialogue Summarization: Unveiling Circumstantial Hallucination Trends</title>
       <author><first>Sanjana</first><last>Ramprasad</last></author>
       <author><first>Elisa</first><last>Ferracane</last><affiliation>Abridge AI</affiliation></author>
-      <author><first>Zachary</first><last>Lipton</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="zachary-c-lipton"><first>Zachary</first><last>Lipton</last><affiliation>Carnegie Mellon University</affiliation></author>
       <pages>12549-12561</pages>
       <abstract>Recent advancements in large language models (LLMs) have significantly advanced the capabilities of summarization systems.However, they continue to face a persistent challenge: hallucination. While prior work has extensively examined LLMs in news domains, evaluation of dialogue summarization has primarily focused on BART-based models, resulting in a notable gap in understanding LLM effectiveness.Our work seeks to address this gap by benchmarking LLMs for dialogue summarization faithfulness using human annotations,focusing on identifying and categorizing span-level inconsistencies.Specifically, we evaluate two prominent LLMs: GPT-4 and Alpaca-13B.Our evaluation reveals that LLMs often generate plausible, but not fully supported inferences based on conversation contextual cues, a trait absent in older models. As a result, we propose a refined taxonomy of errors, introducing a novel category termed “Contextual Inference” to address this aspect of LLM behavior. Using our taxonomy, we compare the behavioral differences between LLMs and older fine-tuned models. Additionally, we systematically assess the efficacy of automatic error detection methods on LLM summaries and find that they struggle to detect these nuanced errors effectively. To address this, we introduce two prompt-based approaches for fine-grained error detection. Our methods outperform existing metrics, particularly in identifying the novel “Contextual Inference” error type.</abstract>
       <url hash="f1468ef3">2024.acl-long.677</url>
@@ -9504,7 +9504,7 @@
       <author><first>Muhammad</first><last>Maaz</last></author>
       <author><first>Hanoona</first><last>Rasheed</last></author>
       <author><first>Salman</first><last>Khan</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and Australian National University</affiliation></author>
-      <author><first>Fahad</first><last>Khan</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and Linköping University</affiliation></author>
+      <author id="fahad-khan"><first>Fahad</first><last>Khan</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and Linköping University</affiliation></author>
       <pages>12585-12602</pages>
       <abstract>Conversation agents fueled by Large Language Models (LLMs) are providing a new way to interact with visual data. While there have been initial attempts for image-based conversation models, this work addresses the under-explored field of <i>video-based conversation</i> by introducing Video-ChatGPT. It is a multimodal model that merges a video-adapted visual encoder with an LLM. The resulting model is capable of understanding and generating detailed conversations about videos. We introduce a new dataset of 100,000 video-instruction pairs used to train Video-ChatGPT acquired via manual and semi-automated pipeline that is easily scalable and robust to label noise. We also develop a quantitative evaluation framework for video-based dialogue models to objectively analyze the strengths and weaknesses of video-based dialogue models. Code: https://github.com/mbzuai-oryx/Video-ChatGPT.</abstract>
       <url hash="b2b57f09">2024.acl-long.679</url>
@@ -9547,7 +9547,7 @@
       <title>Classist Tools: Social Class Correlates with Performance in <fixed-case>NLP</fixed-case></title>
       <author><first>Amanda</first><last>Cercas Curry</last><affiliation>Bocconi University</affiliation></author>
       <author><first>Giuseppe</first><last>Attanasio</last><affiliation>Instituto de Telecomunicações</affiliation></author>
-      <author><first>Zeerak</first><last>Talat</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="zeerak-talat"><first>Zeerak</first><last>Talat</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <author><first>Dirk</first><last>Hovy</last><affiliation>Bocconi University</affiliation></author>
       <pages>12643-12655</pages>
       <abstract>The field of sociolinguistics has studied factors affecting language use for the last century. Labov (1964) and Bernstein (1960) showed that socioeconomic class strongly influences our accents, syntax and lexicon. However, despite growing concerns surrounding fairness and bias in Natural Language Processing (NLP), there is a dearth of studies delving into the effects it may have on NLP systems. We show empirically that NLP systems’ performance is affected by speakers’ SES, potentially disadvantaging less-privileged socioeconomic groups. We annotate a corpus of 95K utterances from movies with social class, ethnicity and geographical language variety and measure the performance of NLP systems on three tasks: language modelling, automatic speech recognition, and grammar error correction. We find significant performance disparities that can be attributed to socioeconomic status as well as ethnicity and geographical differences. With NLP technologies becoming ever more ubiquitous and quotidian, they must accommodate all language varieties to avoid disadvantaging already marginalised groups. We argue for the inclusion of socioeconomic class in future language technologies.</abstract>
@@ -9590,7 +9590,7 @@
     <paper id="685">
       <title>Retaining Key Information under High Compression Ratios: Query-Guided Compressor for <fixed-case>LLM</fixed-case>s</title>
       <author><first>Zhiwei</first><last>Cao</last></author>
-      <author id="qian-cao"><first>Qian</first><last>Cao</last></author>
+      <author><first>Qian</first><last>Cao</last></author>
       <author><first>Yu</first><last>Lu</last></author>
       <author><first>Ningxin</first><last>Peng</last><affiliation>ByteDance Inc.</affiliation></author>
       <author><first>Luyang</first><last>Huang</last><affiliation>Bytedance AI Lab</affiliation></author>
@@ -9606,7 +9606,7 @@
       <title><fixed-case>COSMIC</fixed-case>: Mutual Information for Task-Agnostic Summarization Evaluation</title>
       <author><first>Maxime</first><last>Darrin</last></author>
       <author><first>Philippe</first><last>Formont</last><affiliation>École de technologie supérieure, Université du Québec and Université Paris-Saclay</affiliation></author>
-      <author><first>Jackie</first><last>Cheung</last><affiliation>McGill University, Mila Research Institute and Microsoft</affiliation></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie</first><last>Cheung</last><affiliation>McGill University, Mila Research Institute and Microsoft</affiliation></author>
       <author><first>Pablo</first><last>Piantanida</last><affiliation>Mila - Quebec AI Institute and Université Paris-Saclay, CNRS</affiliation></author>
       <pages>12696-12717</pages>
       <abstract>Assessing the quality of summarizers poses significant challenges—gold summaries are hard to obtain and their suitability depends on the use context of the summarization system. Who is the user of the system, and what do they intend to do with the summary? In response, we propose a novel task-oriented evaluation approach that assesses summarizers based on their capacity to produce summaries while preserving task outcomes. We theoretically establish both a lower and upper bound on the expected error rate of these tasks, which depends on the mutual information between source texts and generated summaries. We introduce COSMIC, a practical implementation of this metric, and demonstrate its strong correlation with human judgment-based metrics, as well as its effectiveness in predicting downstream task performance. Comparative analyses against established metrics like BERTScore and ROUGE highlight the competitive performance of COSMIC.</abstract>
@@ -9621,7 +9621,7 @@
       <author><first>Frédéric</first><last>Piedboeuf</last><affiliation>CTA</affiliation></author>
       <author><first>Guillaume</first><last>Le Berre</last><affiliation>Université de Montréal, Université de Lorraine and University of Lorraine</affiliation></author>
       <author><first>David</first><last>Alfonso-Hermelo</last><affiliation>Huawei Technologies Ltd.</affiliation></author>
-      <author><first>Philippe</first><last>Langlais</last><affiliation>Université de Montréal</affiliation></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last><affiliation>Université de Montréal</affiliation></author>
       <pages>12718-12736</pages>
       <abstract>Keyphrase generation has primarily been explored within the context of academic research articles, with a particular focus on scientific domains and the English language. In this work, we present EUROPA, a novel dataset for multilingual keyphrase generation in the legal domain. It is derived from legal judgments from the Court of Justice of the European Union (EU), and contains instances in all 24 EU official languages. We run multilingual models on our corpus and analyze the results, showing room for improvement on a domain-specific multilingual corpus such as the one we present.</abstract>
       <url hash="e1f580a4">2024.acl-long.687</url>
@@ -9633,7 +9633,7 @@
       <author><first>Maxime</first><last>Darrin</last></author>
       <author><first>Ines</first><last>Arous</last></author>
       <author><first>Pablo</first><last>Piantanida</last><affiliation>Mila - Quebec AI Institute and Université Paris-Saclay, CNRS</affiliation></author>
-      <author><first>Jackie</first><last>Cheung</last><affiliation>McGill University, Mila Research Institute and Microsoft</affiliation></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie</first><last>Cheung</last><affiliation>McGill University, Mila Research Institute and Microsoft</affiliation></author>
       <pages>12737-12752</pages>
       <abstract>Scientific peer review is essential for the quality of academic publications. However, the increasing number of paper submissions to conferences has strained the reviewing process. This surge poses a burden on area chairs who have to carefully read an ever-growing volume of reviews and discern each reviewer’s main arguments as part of their decision process. In this paper, we introduce , a summarization method designed to offer a concise yet comprehensive overview of scholarly reviews. Unlike traditional consensus-based methods, extracts both common and unique opinions from the reviews. We introduce novel uniqueness scores based on the Rational Speech Act framework to identify relevant sentences in the reviews. Our method aims to provide a pragmatic glimpse into all reviews, offering a balanced perspective on their opinions. Our experimental results with both automatic metrics and human evaluation show that generates more discriminative summaries than baseline methods in terms of human evaluation while achieving comparable performance with these methods in terms of automatic metrics.</abstract>
       <url hash="e99edd1a">2024.acl-long.688</url>
@@ -9658,7 +9658,7 @@
       <author><first>João</first><last>Bordalo</last><affiliation>NOVA School of Science and Technology</affiliation></author>
       <author><first>Vasco</first><last>Ramos</last></author>
       <author><first>Rodrigo</first><last>Valério</last></author>
-      <author><first>Diogo</first><last>Glória-Silva</last><affiliation>Universidade NOVA de Lisboa</affiliation></author>
+      <author id="diogo-gloria-silva"><first>Diogo</first><last>Glória-Silva</last><affiliation>Universidade NOVA de Lisboa</affiliation></author>
       <author><first>Yonatan</first><last>Bitton</last><affiliation>Google</affiliation></author>
       <author><first>Michal</first><last>Yarom</last><affiliation>Research, Google</affiliation></author>
       <author><first>Idan</first><last>Szpektor</last><affiliation>Google</affiliation></author>
@@ -9763,7 +9763,7 @@
     </paper>
     <paper id="697">
       <title><fixed-case>XLAVS</fixed-case>-<fixed-case>R</fixed-case>: Cross-Lingual Audio-Visual Speech Representation Learning for Noise-Robust Speech Perception</title>
-      <author><first>HyoJung</first><last>Han</last><affiliation>Department of Computer Science, University of Maryland, College Park</affiliation></author>
+      <author id="hyojung-han"><first>HyoJung</first><last>Han</last><affiliation>Department of Computer Science, University of Maryland, College Park</affiliation></author>
       <author><first>Mohamed</first><last>Anwar</last></author>
       <author><first>Juan</first><last>Pino</last><affiliation>Meta</affiliation></author>
       <author><first>Wei-Ning</first><last>Hsu</last><affiliation>Facebook</affiliation></author>
@@ -10018,7 +10018,7 @@
       <title>Word Matters: What Influences Domain Adaptation in Summarization?</title>
       <author><first>Yinghao</first><last>Li</last></author>
       <author><first>Siyu</first><last>Miao</last></author>
-      <author><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <author><first>Yang</first><last>Gao</last></author>
       <pages>13236-13249</pages>
       <abstract>Domain adaptation aims to enable Large Language Models (LLMs) to generalize domain datasets unseen effectively during the training phase. However, factors such as the size of the model parameters and the scale of training data are general influencers and do not reflect the nuances of domain adaptation performance. This paper investigates the fine-grained factors affecting domain adaptation performance, analyzing the specific impact of ‘words’ in training data on summarization tasks. We propose quantifying dataset learning difficulty as the learning difficulty of generative summarization, which is determined by two indicators: word-based compression rate and abstraction level. Our experiments conclude that, when considering dataset learning difficulty, the cross-domain overlap and the performance gain in summarization tasks exhibit an approximate linear relationship, which is not directly related to the number of words. Based on this finding, predicting a model’s performance on unknown domain datasets is possible without undergoing training. Source code and scripts are available at https://github.com/li-aolong/Word-Matters.</abstract>
@@ -10163,7 +10163,7 @@
       <author><first>Zhu</first><last>JianHao</last></author>
       <author><first>Cenyuan</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Xiaoqing</first><last>Zheng</last></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>13445-13464</pages>
       <abstract>Parameter Efficient Fine-Tuning (PEFT) has gained significant attention for its ability to achieve competitive results while updating only a small subset of trainable parameters. Despite the promising performance of current PEFT methods, they present challenges in hyperparameter selection, such as determining the rank of LoRA or Adapter, or specifying the length of soft prompts. In addressing these challenges, we propose a novel approach to fine-tuning neural models, termed Representation EDiting (RED), which scales and biases the representation produced at each layer. RED substantially reduces the number of trainable parameters by a factor of 25,700 compared to full parameter fine-tuning, and by a factor of 32 compared to LoRA. Remarkably, RED achieves comparable or superior results to full parameter fine-tuning and other PEFT methods. Extensive experiments were conducted across models of varying architectures and scales, including RoBERTa, GPT-2, T5, and Llama-2, and the results demonstrate the efficiency and efficacy of RED, positioning it as a promising PEFT approach for large neural models.</abstract>
       <url hash="ffaa3b12">2024.acl-long.726</url>
@@ -10323,7 +10323,7 @@
     <paper id="738">
       <title>When is Tree Search Useful for <fixed-case>LLM</fixed-case> Planning? It Depends on the Discriminator</title>
       <author><first>Ziru</first><last>Chen</last></author>
-      <author><first>Michael</first><last>White</last><affiliation>Ohio State University</affiliation></author>
+      <author id="michael-white"><first>Michael</first><last>White</last><affiliation>Ohio State University</affiliation></author>
       <author><first>Ray</first><last>Mooney</last><affiliation>, University of Texas, Austin</affiliation></author>
       <author><first>Ali</first><last>Payani</last><affiliation>Cisco</affiliation></author>
       <author><first>Yu</first><last>Su</last><affiliation>Ohio State University and Microsoft</affiliation></author>
@@ -10428,7 +10428,7 @@
     <paper id="745">
       <title><fixed-case>LLM</fixed-case>-Rubric: A Multidimensional, Calibrated Approach to Automated Evaluation of Natural Language Texts</title>
       <author><first>Helia</first><last>Hashemi</last></author>
-      <author><first>Jason</first><last>Eisner</last><affiliation>Microsoft and Johns Hopkins University</affiliation></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last><affiliation>Microsoft and Johns Hopkins University</affiliation></author>
       <author><first>Corby</first><last>Rosset</last></author>
       <author><first>Benjamin</first><last>Van Durme</last><affiliation>Johns Hopkins University, Johns Hopkins University, Johns Hopkins University and Microsoft</affiliation></author>
       <author><first>Chris</first><last>Kedzie</last><affiliation>Rasa Technologies, Inc.</affiliation></author>
@@ -10541,7 +10541,7 @@
     <paper id="754">
       <title>Fora: A corpus and framework for the study of facilitated dialogue</title>
       <author><first>Hope</first><last>Schroeder</last></author>
-      <author><first>Deb</first><last>Roy</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
+      <author id="deb-roy"><first>Deb</first><last>Roy</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <author><first>Jad</first><last>Kabbara</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <pages>13985-14001</pages>
       <abstract>Facilitated dialogue is increasingly popular as a method of civic engagement and as a method for gathering social insight, but resources for its study are scant. We present Fora, a unique collection of annotated facilitated dialogues. We compile 262 facilitated conversations that were hosted with partner organizations seeking to engage their members and surface insights regarding issues like education, elections, and public health, primarily through the sharing of personal experience. Alongside this corpus of 39,911 speaker turns, we present a framework for the analysis of facilitated dialogue. We taxonomize key personal sharing behaviors and facilitation strategies in the corpus, annotate a 25% sample (10,000+ speaker turns) of the data accordingly, and evaluate and establish baselines on a number of tasks essential to the identification of these phenomena in dialogue. We describe the data, and relate facilitator behavior to turn-taking and participant sharing. We outline how this research can inform future work in understanding and improving facilitated dialogue, parsing spoken conversation, and improving the behavior of dialogue agents.</abstract>
@@ -10557,7 +10557,7 @@
       <author><first>Zhen</first><last>Qin</last><affiliation>Google</affiliation></author>
       <author><first>Jing Nathan</first><last>Yan</last></author>
       <author><first>Jialu</first><last>Liu</last><affiliation>Google Research</affiliation></author>
-      <author><first>Chao</first><last>Zhang</last><affiliation>Georgia Institute of Technology</affiliation></author>
+      <author id="chao-zhang-tu"><first>Chao</first><last>Zhang</last><affiliation>Georgia Institute of Technology</affiliation></author>
       <author><first>Michael</first><last>Bendersky</last><affiliation>Google</affiliation></author>
       <pages>14002-14024</pages>
       <abstract>Large language models (LLMs) have shown remarkable capabilities in various natural language understanding tasks with a few demonstration examples via in-context learning. Common strategies to boost such “in-context” learning ability are to ensemble multiple model decoded results and require the model to generate an explanation along with the prediction. However, these models often treat different class predictions equally and neglect the potential discrepancy between the explanations and predictions. To fully unleash the power of explanations, we propose EASE, an <i>Explanation-Aware Soft Ensemble</i> framework to empower in-context learning with LLMs. We design two techniques, explanation-guided ensemble, and soft probability aggregation, to mitigate the effect of unreliable explanations and improve the consistency between explanations and final predictions. Experiments on seven natural language understanding tasks and four varying-size LLMs demonstrate the effectiveness of our proposed framework.</abstract>
@@ -10709,7 +10709,7 @@
       <author><first>Wenlong</first><last>Zhao</last><affiliation>University of Massachusetts at Amherst</affiliation></author>
       <author><first>Andrew</first><last>Drozdov</last><affiliation>Databricks</affiliation></author>
       <author><first>Benjamin</first><last>Rozonoyer</last></author>
-      <author><first>Md Arafat</first><last>Sultan</last><affiliation>International Business Machines</affiliation></author>
+      <author id="md-arafat-sultan"><first>Md Arafat</first><last>Sultan</last><affiliation>International Business Machines</affiliation></author>
       <author><first>Jay-Yoon</first><last>Lee</last><affiliation>Seoul National University</affiliation></author>
       <author><first>Mohit</first><last>Iyyer</last><affiliation>University of Massachusetts Amherst</affiliation></author>
       <author><first>Andrew</first><last>McCallum</last><affiliation>University of Massachusetts Amherst and University of Massachusetts Amherst</affiliation></author>
@@ -10734,7 +10734,7 @@
     <paper id="768">
       <title><fixed-case>L</fixed-case>ogogram<fixed-case>NLP</fixed-case>: Comparing Visual and Textual Representations of Ancient Logographic Writing Systems for <fixed-case>NLP</fixed-case></title>
       <author><first>Danlu</first><last>Chen</last></author>
-      <author><first>Freda</first><last>Shi</last><affiliation>University of Waterloo</affiliation></author>
+      <author id="freda-shi"><first>Freda</first><last>Shi</last><affiliation>University of Waterloo</affiliation></author>
       <author><first>Aditi</first><last>Agarwal</last></author>
       <author><first>Jacobo</first><last>Myerston</last></author>
       <author><first>Taylor</first><last>Berg-Kirkpatrick</last><affiliation>University of California, San Diego</affiliation></author>
@@ -10927,8 +10927,8 @@
       <author><first>Ryo</first><last>Ueda</last><affiliation>The University of Tokyo</affiliation></author>
       <author><first>Ryo</first><last>Yoshida</last><affiliation>The University of Tokyo</affiliation></author>
       <author><first>Yohei</first><last>Oseki</last><affiliation>University of Tokyo</affiliation></author>
-      <author><first>Ted</first><last>Briscoe</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
-      <author><first>Timothy</first><last>Baldwin</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <pages>14522-14543</pages>
       <abstract>The world’s languages exhibit certain so-called typological or implicational universals; for example, Subject-Object-Verb (SOV) languages typically use postpositions. Explaining the source of such biases is a key goal of linguistics.We study word-order universals through a computational simulation with language models (LMs).Our experiments show that typologically-typical word orders tend to have lower perplexity estimated by LMs with cognitively plausible biases: syntactic biases, specific parsing strategies, and memory limitations. This suggests that the interplay of cognitive biases and predictability (perplexity) can explain many aspects of word-order universals.It also showcases the advantage of cognitively-motivated LMs, typically employed in cognitive modeling, in the simulation of language universals.</abstract>
       <url hash="2f42351c">2024.acl-long.781</url>
@@ -10981,7 +10981,7 @@
     <paper id="785">
       <title><fixed-case>C</fixed-case>ausal<fixed-case>G</fixed-case>ym: Benchmarking causal interpretability methods on linguistic tasks</title>
       <author><first>Aryaman</first><last>Arora</last><affiliation>Stanford University</affiliation></author>
-      <author><first>Dan</first><last>Jurafsky</last><affiliation>Stanford University</affiliation></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last><affiliation>Stanford University</affiliation></author>
       <author><first>Christopher</first><last>Potts</last><affiliation>Stanford University</affiliation></author>
       <pages>14638-14663</pages>
       <abstract>Language models (LMs) have proven to be powerful tools for psycholinguistic research, but most prior work has focused on purely behavioural measures (e.g., surprisal comparisons). At the same time, research in model interpretability has begun to illuminate the abstract causal mechanisms shaping LM behavior. To help bring these strands of research closer together, we introduce CausalGym. We adapt and expand the SyntaxGym suite of tasks to benchmark the ability of interpretability methods to causally affect model behaviour. To illustrate how CausalGym can be used, we study the pythia models (14M–6.9B) and assess the causal efficacy of a wide range of interpretability methods, including linear probing and distributed alignment search (DAS). We find that DAS outperforms the other methods, and so we use it to study the learning trajectory of two difficult linguistic phenomena in pythia-1b: negative polarity item licensing and filler–gap dependencies. Our analysis shows that the mechanism implementing both of these tasks is learned in discrete stages, not gradually.</abstract>
@@ -11025,7 +11025,7 @@
       <title>Semisupervised Neural Proto-Language Reconstruction</title>
       <author><first>Liang</first><last>Lu</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Peirong</first><last>Xie</last><affiliation>University of Southern California</affiliation></author>
-      <author><first>David</first><last>Mortensen</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="david-r-mortensen"><first>David</first><last>Mortensen</last><affiliation>Carnegie Mellon University</affiliation></author>
       <pages>14715-14759</pages>
       <abstract>Existing work implementing comparative reconstruction of ancestral languages (proto-languages) has usually required full supervision. However, historical reconstruction models are only of practical value if they can be trained with a limited amount of labeled data. We propose a semisupervised historical reconstruction task in which the model is trained on only a small amount of labeled data (cognate sets with proto-forms) and a large amount of unlabeled data (cognate sets without proto-forms). We propose a neural architecture for comparative reconstruction (DPD-BiReconstructor) incorporating an essential insight from linguists’ comparative method: that reconstructed words should not only be reconstructable from their daughter words, but also deterministically transformable back into their daughter words. We show that this architecture is able to leverage unlabeled cognate sets to outperform strong semisupervised baselines on this novel task.</abstract>
       <url hash="0c211f0e">2024.acl-long.788</url>
@@ -11037,7 +11037,7 @@
       <title>Speech Translation with Speech Foundation Models and Large Language Models: What is There and What is Missing?</title>
       <author><first>Marco</first><last>Gaido</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
       <author><first>Sara</first><last>Papi</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
-      <author><first>Matteo</first><last>Negri</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
       <author><first>Luisa</first><last>Bentivogli</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
       <pages>14760-14778</pages>
       <abstract>The field of natural language processing (NLP) has recently witnessed a transformative shift with the emergence of foundation models, particularly Large Language Models (LLMs) that have revolutionized text-based NLP. This paradigm has extended to other modalities, including speech, where researchers are actively exploring the combination of Speech Foundation Models (SFMs) and LLMs into single, unified models capable of addressing multimodal tasks. Among such tasks, this paper focuses on speech-to-text translation (ST). By examining the published papers on the topic, we propose a unified view of the architectural solutions and training strategies presented so far, highlighting similarities and differences among them. Based on this examination, we not only organize the lessons learned but also show how diverse settings and evaluation approaches hinder the identification of the best-performing solution for each architectural building block and training choice. Lastly, we outline recommendations for future works on the topic aimed at better understanding the strengths and weaknesses of the SFM+LLM solutions for ST.</abstract>
@@ -11136,7 +11136,7 @@
       <author><first>Muhammad</first><last>Mahendra</last><affiliation>Universitas Telkom</affiliation></author>
       <author><first>Rr</first><last>Putri</last><affiliation>Universitas Indonesia</affiliation></author>
       <author><first>Bryan</first><last>Wilie</last><affiliation>Hong Kong University of Science and Technology</affiliation></author>
-      <author><first>Genta</first><last>Winata</last><affiliation>Capital One AI Foundations</affiliation></author>
+      <author id="genta-indra-winata"><first>Genta</first><last>Winata</last><affiliation>Capital One AI Foundations</affiliation></author>
       <author><first>Alham</first><last>Aji</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <author><first>Ayu</first><last>Purwarianti</last><affiliation>Institut Teknologi Bandung</affiliation></author>
       <author><first>Pascale</first><last>Fung</last><affiliation>HKUST</affiliation></author>
@@ -11179,11 +11179,11 @@
       <author><first>Oscar</first><last>Sainz</last></author>
       <author><first>Naiara</first><last>Perez</last></author>
       <author><first>Itziar</first><last>Aldabe</last></author>
-      <author><first>German</first><last>Rigau</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <author><first>Aitor</first><last>Ormazabal</last></author>
       <author><first>Mikel</first><last>Artetxe</last></author>
-      <author><first>Aitor</first><last>Soroa</last></author>
+      <author id="aitor-soroa"><first>Aitor</first><last>Soroa</last></author>
       <pages>14952-14972</pages>
       <abstract>We introduce Latxa, a family of large language models for Basque ranging from 7 to 70 billion parameters. Latxa is based on Llama 2, which we continue pretraining on a new Basque corpus comprising 4.3M documents and 4.2B tokens. Addressing the scarcity of high-quality benchmarks for Basque, we further introduce 4 multiple choice evaluation datasets: EusProficiency, comprising 5,169 questions from official language proficiency exams; EusReading, comprising 352 reading comprehension questions; EusTrivia, comprising 1,715 trivia questions from 5 knowledge areas; and EusExams, comprising 16,046 questions from public examinations. In our extensive evaluation, Latxa outperforms all previous open models we compare to by a large margin. In addition, it is competitive with GPT-4 Turbo in language proficiency and understanding, despite lagging behind in reading comprehension and knowledge-intensive tasks. Both the Latxa family of models, as well as our new pretraining corpora and evaluation datasets, are publicly available under open licenses. Our suite enables reproducible research on methods to build LLMs for low-resource languages.</abstract>
       <url hash="879cf118">2024.acl-long.799</url>
@@ -11247,7 +11247,7 @@
       <author><first>Terra</first><last>Blevins</last><affiliation>University of Washington</affiliation></author>
       <author><first>Hila</first><last>Gonen</last><affiliation>University of Washington</affiliation></author>
       <author><first>Orevaoghene</first><last>Ahia</last><affiliation>Department of Computer Science</affiliation></author>
-      <author><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington</affiliation></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington</affiliation></author>
       <pages>15059-15076</pages>
       <abstract>A major consideration in multilingual language modeling is how to best represent languages with diverse vocabularies and scripts.Although contemporary text encoding methods cover most of the world’s writing systems, they exhibit bias towards the high-resource languages of the Global West. As a result, texts of underrepresented languages tend to be segmented into long sequences of linguistically meaningless units. To address the disparities, we introduce a new paradigm that encodes the same information with segments of consistent size across diverse languages. Our encoding convention (MYTE) is based on morphemes, as their inventories are more balanced across languages than characters, which are used in previous methods. We show that MYTE produces shorter encodings for all 99 analyzed languages, with the most notable improvements for non-European languages and non-Latin scripts. This, in turn, improves multilingual LM performance and diminishes the perplexity gap throughout diverse languages.</abstract>
       <url hash="e1ae23a1">2024.acl-long.804</url>
@@ -11320,7 +11320,7 @@
       <author><first>Luyao</first><last>Niu</last><affiliation>University of Washington</affiliation></author>
       <author><first>Zhen</first><last>Xiang</last><affiliation>University of Illinois Urbana-Champaign</affiliation></author>
       <author><first>Bhaskar</first><last>Ramasubramanian</last><affiliation>Western Washington University</affiliation></author>
-      <author id="bo-li"><first>Bo</first><last>Li</last><affiliation>University of Illinois, Urbana Champaign</affiliation></author>
+      <author><first>Bo</first><last>Li</last><affiliation>University of Illinois, Urbana Champaign</affiliation></author>
       <author><first>Radha</first><last>Poovendran</last><affiliation>University of Washington, Seattle</affiliation></author>
       <pages>15157-15173</pages>
       <abstract>Safety is critical to the usage of large language models (LLMs). Multiple techniques such as data filtering and supervised fine-tuning have been developed to strengthen LLM safety. However, currently known techniques presume that corpora used for safety alignment of LLMs are solely interpreted by semantics. This assumption, however, does not hold in real-world applications, which leads to severe vulnerabilities in LLMs. For example, users of forums often use ASCII art, a form of text-based art, to convey image information. In this paper, we propose a novel ASCII art-based jailbreak attack and introduce a comprehensive benchmark Vision-in-Text Challenge (ViTC) to evaluate the capabilities of LLMs in recognizing prompts that cannot be solely interpreted by semantics. We show that five SOTA LLMs (GPT-3.5, GPT-4, Gemini, Claude, and Llama2) struggle to recognize prompts provided in the form of ASCII art. Based on this observation, we develop the jailbreak attack ArtPrompt, which leverages the poor performance of LLMs in recognizing ASCII art to bypass safety measures and elicit undesired behaviors from LLMs. ArtPrompt only requires black-box access to the victim LLMs, making it a practical attack. We evaluate ArtPrompt on five SOTA LLMs, and show that ArtPrompt can effectively and efficiently induce undesired behaviors from all five LLMs.</abstract>
@@ -11385,7 +11385,7 @@
     <paper id="813">
       <title>Can Large Language Models be Good Emotional Supporter? Mitigating Preference Bias on Emotional Support Conversation</title>
       <author><first>Dongjin</first><last>Kang</last><affiliation>Yonsei University</affiliation></author>
-      <author><first>Sunghwan</first><last>Kim</last><affiliation>Yonsei University</affiliation></author>
+      <author id="sunghwan-mac-kim"><first>Sunghwan</first><last>Kim</last><affiliation>Yonsei University</affiliation></author>
       <author><first>Taeyoon</first><last>Kwon</last><affiliation>Yonsei University</affiliation></author>
       <author><first>Seungjun</first><last>Moon</last><affiliation>Yonsei University</affiliation></author>
       <author><first>Hyunsouk</first><last>Cho</last><affiliation>Ajou University</affiliation></author>
@@ -11422,7 +11422,7 @@
       <title>Natural Language Satisfiability: Exploring the Problem Distribution and Evaluating Transformer-based Language Models</title>
       <author><first>Tharindu</first><last>Madusanka</last><affiliation>University of Manchester</affiliation></author>
       <author><first>Ian</first><last>Pratt-Hartmann</last><affiliation>University of Opole</affiliation></author>
-      <author><first>Riza</first><last>Batista-Navarro</last><affiliation>University of Manchester</affiliation></author>
+      <author id="riza-theresa-batista-navarro"><first>Riza</first><last>Batista-Navarro</last><affiliation>University of Manchester</affiliation></author>
       <pages>15278-15294</pages>
       <abstract>Efforts to apply transformer-based language models (TLMs) to the problem of reasoning in natural language have enjoyed ever-increasing success in recent years. The most fundamental task in this area to which nearly all others can be reduced is that of determining satisfiability. However, from a logical point of view, satisfiability problems vary along various dimensions, which may affect TLMs’ ability to learn how to solve them. The problem instances of satisfiability in natural language can belong to different computational complexity classes depending on the language fragment in which they are expressed. Although prior research has explored the problem of natural language satisfiability, the above-mentioned point has not been discussed adequately. Hence, we investigate how problem instances from varying computational complexity classes and having different grammatical constructs impact TLMs’ ability to learn rules of inference. Furthermore, to faithfully evaluate TLMs, we conduct an empirical study to explore the distribution of satisfiability problems.</abstract>
       <url hash="4322fb76">2024.acl-long.815</url>
@@ -11437,7 +11437,7 @@
       <author><first>Valentina</first><last>Pyatkin</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Musashi</first><last>Hinck</last><affiliation>Intel</affiliation></author>
       <author><first>Hannah</first><last>Kirk</last><affiliation>University of Oxford</affiliation></author>
-      <author><first>Hinrich</first><last>Schuetze</last><affiliation>Center for Information and Language Processing</affiliation></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last><affiliation>Center for Information and Language Processing</affiliation></author>
       <author><first>Dirk</first><last>Hovy</last><affiliation>Bocconi University</affiliation></author>
       <pages>15295-15311</pages>
       <abstract>Much recent work seeks to evaluate values and opinions in large language models (LLMs) using multiple-choice surveys and questionnaires. Most of this work is motivated by concerns around real-world LLM applications. For example, politically-biased LLMs may subtly influence society when they are used by millions of people. Such real-world concerns, however, stand in stark contrast to the artificiality of current evaluations: real users do not typically ask LLMs survey questions. Motivated by this discrepancy, we challenge the prevailing *constrained* evaluation paradigm for values and opinions in LLMs and explore more realistic *unconstrained* evaluations. As a case study, we focus on the popular Political Compass Test (PCT). In a systematic review, we find that most prior work using the PCT *forces models to comply with the PCT’s multiple-choice format. We show that models give substantively different answers when not forced; that answers change depending on how models are forced; and that answers lack paraphrase robustness. Then, we demonstrate that models give different answers yet again in a more realistic open-ended answer setting. We distill these findings into recommendations and open challenges in evaluating values and opinions in LLMs.</abstract>
@@ -11478,7 +11478,7 @@
       <author><first>Qiliang</first><last>Liang</last><affiliation>Peking University</affiliation></author>
       <author><first>Yaqi</first><last>Yin</last><affiliation>Peking University</affiliation></author>
       <author><first>Hansi</first><last>Wang</last><affiliation>Peking University</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>Peking University</affiliation></author>
+      <author><first>Yang</first><last>Liu</last><affiliation>Peking University</affiliation></author>
       <pages>15354-15365</pages>
       <abstract>In parataxis languages like Chinese, word meanings are highly correlated with morphological knowledge, which can help to disambiguate word senses. However, in-depth exploration of morphological knowledge in previous word sense disambiguation (WSD) methods is still lacking due to the absence of publicly available resources. In this paper, we are motivated to enhance Chinese WSD with full morphological knowledge, including both word-formations and morphemes. We first construct the largest and releasable Chinese WSD resources, including the lexico-semantic inventories MorInv and WrdInv, a Chinese WSD dataset MiCLS, and an out-of-volcabulary (OOV) test set. Then, we propose a model, MorBERT, to fully leverage this morphology-informed knowledge for Chinese WSD and achieve a SOTA F1 of 92.18% in the task. Finally, we demonstrated the model’s robustness in low-resource settings and generalizability to OOV senses. These resources and methods may bring new insights into and solutions for various downstream tasks in both computational and humanistic fields.</abstract>
       <url hash="7664f3fa">2024.acl-long.819</url>
@@ -11515,7 +11515,7 @@
     </paper>
     <paper id="822">
       <title>Media Framing: A typology and Survey of Computational Approaches Across Disciplines</title>
-      <author><first>Yulia</first><last>Otmakhova</last><affiliation>The University of Melbourne</affiliation></author>
+      <author id="julia-otmakhova"><first>Yulia</first><last>Otmakhova</last><affiliation>The University of Melbourne</affiliation></author>
       <author><first>Shima</first><last>Khanehzar</last><affiliation>CSIRO</affiliation></author>
       <author><first>Lea</first><last>Frermann</last><affiliation>University of Melbourne</affiliation></author>
       <pages>15407-15428</pages>
@@ -11669,7 +11669,7 @@
       <author><first>Yuxin</first><last>Jiang</last><affiliation>Hong Kong University of Science and Technology</affiliation></author>
       <author><first>Lifeng</first><last>Shang</last><affiliation>Huawei Technologies Ltd.</affiliation></author>
       <author><first>Qun</first><last>Liu</last><affiliation>Huawei Noah’s Ark Lab</affiliation></author>
-      <author><first>Kam-Fai</first><last>Wong</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <pages>15568-15592</pages>
       <abstract>Managing long sequences has become an important and necessary feature for large language models (LLMs). However, assessing their ability to handle long contexts remains a challenge. This paper introduces M<tex-math>^4</tex-math>LE, a <tex-math>\textbf{M}</tex-math>ulti-ability, <tex-math>\textbf{M}</tex-math>ulti-range, <tex-math>\textbf{M}</tex-math>ulti-task, <tex-math>\textbf{M}</tex-math>ulti-domain benchmark for <tex-math>\textbf{L}</tex-math>ong-context <tex-math>\textbf{E}</tex-math>valuation. It encompasses 36 NLP datasets, covering 11 types of tasks and 12 domains, providing a comprehensive test bed. To address the lack of tasks featuring naturally long sequences, we propose an automatic approach to convert short-sequence tasks into long-sequence scenarios. These scenarios evaluate LLMs’ long-context understanding across five key abilities: understanding of single or multiple relevant spans in long contexts based on explicit or semantic hints, and global context understanding. This automatic approach allows us to create instances evenly distributed from 1k to 8k input length. Our evaluation of 11 prominent LLMs reveals that 1) Current LLMs struggle to understand long context, particularly when tasks require multiple-span attention. 2) Semantic retrieval is more difficult for competent LLMs. 3) Models fine-tuned on longer text with position interpolation have comparable performance to those using Neural Tangent Kernel (NTK) aware scaling methods without fine-tuning. We make our benchmark publicly available to encourage future research in this challenging area.</abstract>
       <url hash="4a924d47">2024.acl-long.832</url>
@@ -11788,7 +11788,7 @@
       <author><first>Valentin</first><last>Hofmann</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Ananya</first><last>Jha</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Sachin</first><last>Kumar</last><affiliation>Ohio State University, Columbus</affiliation></author>
-      <author><first>Li</first><last>Lucy</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
+      <author id="li-lucy"><first>Li</first><last>Lucy</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Xinxi</first><last>Lyu</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Nathan</first><last>Lambert</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Ian</first><last>Magnusson</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
@@ -11796,7 +11796,7 @@
       <author><first>Niklas</first><last>Muennighoff</last><affiliation>Contextual AI</affiliation></author>
       <author><first>Aakanksha</first><last>Naik</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Crystal</first><last>Nam</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
-      <author><first>Matthew</first><last>Peters</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
+      <author id="matthew-e-peters"><first>Matthew</first><last>Peters</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Abhilasha</first><last>Ravichander</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Kyle</first><last>Richardson</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Zejiang</first><last>Shen</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
@@ -11804,8 +11804,8 @@
       <author><first>Nishant</first><last>Subramani</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Oyvind</first><last>Tafjord</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Evan</first><last>Walsh</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
-      <author><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington</affiliation></author>
-      <author><first>Noah</first><last>Smith</last><affiliation>University of Washington</affiliation></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington</affiliation></author>
+      <author id="noah-a-smith"><first>Noah</first><last>Smith</last><affiliation>University of Washington</affiliation></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last><affiliation>University of Washington</affiliation></author>
       <author><first>Iz</first><last>Beltagy</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Dirk</first><last>Groeneveld</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
@@ -11845,7 +11845,7 @@
       <author><first>Niklas</first><last>Muennighoff</last><affiliation>Contextual AI</affiliation></author>
       <author><first>Aakanksha</first><last>Naik</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Crystal</first><last>Nam</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
-      <author><first>Matthew</first><last>Peters</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
+      <author id="matthew-e-peters"><first>Matthew</first><last>Peters</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Valentina</first><last>Pyatkin</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Abhilasha</first><last>Ravichander</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Dustin</first><last>Schwenk</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
@@ -11857,11 +11857,11 @@
       <author><first>Pradeep</first><last>Dasigi</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Nathan</first><last>Lambert</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Kyle</first><last>Richardson</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
-      <author><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington</affiliation></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington</affiliation></author>
       <author><first>Jesse</first><last>Dodge</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Kyle</first><last>Lo</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Luca</first><last>Soldaini</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
-      <author><first>Noah</first><last>Smith</last><affiliation>University of Washington</affiliation></author>
+      <author id="noah-a-smith"><first>Noah</first><last>Smith</last><affiliation>University of Washington</affiliation></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last><affiliation>University of Washington</affiliation></author>
       <pages>15789-15809</pages>
       <abstract>Language models (LMs) have become ubiquitous in both NLP research and in commercial product offerings. As their commercial importance has surged, the most powerful models have become closed off, gated behind proprietary interfaces, with important details of their training data, architectures, and development undisclosed. Given the importance of these details in scientifically studying these models, including their biases and potential risks, we believe it is essential for the research community to have access to powerful, truly open LMs. To this end, we have built OLMo, a competitive, truly Open Language Model, to enable the scientific study of language models. Unlike most prior efforts that have only released model weights and inference code, we release OLMo alongside open training data and training and evaluation code. We hope this release will empower the open research community and inspire a new wave of innovation.</abstract>
@@ -11899,7 +11899,7 @@
       <author><first>Anoop</first><last>Kunchukuttan</last><affiliation>Microsoft</affiliation></author>
       <author><first>Pratyush</first><last>Kumar</last><affiliation>Indian Institute of Technology Madras, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
       <author><first>Raj</first><last>Dabre</last><affiliation>National Institute of Information and Communications Technology (NICT), National Institute of Advanced Industrial Science and Technology</affiliation></author>
-      <author><first>Mitesh M.</first><last>Khapra</last><affiliation>Indian Institute of Technology, Madras, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
+      <author id="mitesh-m-khapra"><first>Mitesh M.</first><last>Khapra</last><affiliation>Indian Institute of Technology, Madras, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
       <pages>15831-15879</pages>
       <abstract>Despite the considerable advancements in English LLMs, the progress in building comparable models for other languages has been hindered due to the scarcity of tailored resources. Our work aims to bridge this divide by introducing an expansive suite of resources specifically designed for the development of Indic LLMs, covering 22 languages, containing a total of 251B tokens and 74.8M instruction-response pairs. Recognizing the importance of both data quality and quantity, our approach combines highly curated manually verified data, unverified yet valuable data, and synthetic data. We build a clean, open-source pipeline for curating pre-training data from diverse sources, including websites, PDFs, and videos, incorporating best practices for crawling, cleaning, flagging, and deduplication. For instruction-fine tuning, we amalgamate existing Indic datasets, translate/transliterate English datasets into Indian languages, and utilize LLaMa2 and Mixtral models to create conversations grounded in articles from Indian Wikipedia and Wikihow. Additionally, we address toxicity alignment by generating toxic prompts for multiple scenarios and then generate non-toxic responses by feeding these toxic prompts to an aligned LLaMa2 model. We hope that the datasets, tools, and resources released as a part of this work will not only propel the research and development of Indic LLMs but also establish an open-source blueprint for extending such efforts to other languages.</abstract>
       <url hash="28f6a48c">2024.acl-long.843</url>
@@ -11915,7 +11915,7 @@
       <author><first>Fuwen</first><last>Luo</last><affiliation>Tsinghua University, Tsinghua University</affiliation></author>
       <author><first>Peng</first><last>Li</last><affiliation>Tsinghua University</affiliation></author>
       <author><first>Maosong</first><last>Sun</last><affiliation>Tsinghua University, Tsinghua University</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>Tsinghua University</affiliation></author>
+      <author><first>Yang</first><last>Liu</last><affiliation>Tsinghua University</affiliation></author>
       <pages>15880-15893</pages>
       <abstract>Large Language Models (LLMs) have achieved remarkable performance in objective tasks such as open-domain question answering and mathematical reasoning, which can often be solved through recalling learned factual knowledge or chain-of-thought style reasoning. However, we find that the performance of LLMs in subjective tasks is still unsatisfactory, such as metaphor recognition, dark humor detection, etc. Compared to objective tasks, subjective tasks focus more on interpretation or emotional response rather than a universally accepted reasoning pathway. Based on the characteristics of the tasks and the strong dialogue-generation capabilities of LLMs, we propose RiC (Reasoning in Conversation), a method that focuses on solving subjective tasks through dialogue simulation. The motivation of RiC is to mine useful contextual information by simulating dialogues instead of supplying chain-of-thought style rationales, thereby offering potential useful knowledge behind dialogues for giving the final answers. We evaluate both API-based and open-source LLMs including GPT-4, ChatGPT, and OpenChat across twelve tasks. Experimental results show that RiC can yield significant improvement compared with various baselines.</abstract>
       <url hash="8ff47454">2024.acl-long.844</url>
@@ -11935,7 +11935,7 @@
       <author><first>Hui-Lee</first><last>Ooi</last><affiliation>École Polytechnique de Montréal, Université de Montréal</affiliation></author>
       <author><first>Amr</first><last>Kayid</last><affiliation>Technical University Munich</affiliation></author>
       <author><first>Freddie</first><last>Vargus</last><affiliation>Boston University, Boston University</affiliation></author>
-      <author><first>Phil</first><last>Blunsom</last><affiliation>Google</affiliation></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last><affiliation>Google</affiliation></author>
       <author><first>Shayne</first><last>Longpre</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <author><first>Niklas</first><last>Muennighoff</last><affiliation>Contextual AI</affiliation></author>
       <author><first>Marzieh</first><last>Fadaee</last><affiliation>Cohere For AI</affiliation></author>
@@ -11989,7 +11989,7 @@
       <author><first>Zhuang</first><last>Chen</last><affiliation>Tsinghua University, Tsinghua University</affiliation></author>
       <author><first>Jiawen</first><last>Deng</last><affiliation>University of Electronic Science and Technology of China</affiliation></author>
       <author><first>Sahand</first><last>Sabour</last><affiliation>Tsinghua University</affiliation></author>
-      <author><first>Helen</first><last>Meng</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
+      <author id="helen-meng"><first>Helen</first><last>Meng</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <author><first>Minlie</first><last>Huang</last><affiliation>Tsinghua University, Tsinghua University</affiliation></author>
       <pages>15984-16007</pages>
       <abstract>Theory of mind (ToM) refers to humans’ ability to understand and infer the desires, beliefs, and intentions of others. The acquisition of ToM plays a key role in humans’ social cognition and interpersonal relations. Though indispensable for social intelligence, ToM is still lacking for modern AI and NLP systems since they cannot access the human mental state and cognitive process beneath the training corpus. To empower AI systems with the ToM ability and narrow the gap between them and humans, in this paper, we propose COKE: the first cognitive knowledge graph for machine theory of mind. Specifically, COKE formalizes ToM as a collection of 45k+ manually verified cognitive chains that characterize human mental activities and subsequent behavioral/affective responses when facing specific social circumstances. In addition, we further generalize COKE using LLMs and build a powerful generation model COLM tailored for cognitive reasoning. Experimental results in both automatic and human evaluation demonstrate the high quality of COKE, the superior ToM ability of COLM, and its potential to significantly enhance social applications.</abstract>
@@ -12026,7 +12026,7 @@
       <author><first>Ruskin</first><last>Manku</last><affiliation>State University of New York at Stony Brook</affiliation></author>
       <author><first>Vinty</first><last>Dong</last><affiliation>State University of New York at Stony Brook</affiliation></author>
       <author><first>Edward</first><last>Li</last><affiliation>State University of New York at Stony Brook</affiliation></author>
-      <author id="shashank-gupta"><first>Shashank</first><last>Gupta</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
+      <author><first>Shashank</first><last>Gupta</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Ashish</first><last>Sabharwal</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Niranjan</first><last>Balasubramanian</last><affiliation>State University of New York, Stony Brook</affiliation></author>
       <pages>16022-16076</pages>
@@ -12187,7 +12187,7 @@
       <title><fixed-case>ECBD</fixed-case>: Evidence-Centered Benchmark Design for <fixed-case>NLP</fixed-case></title>
       <author><first>Yu Lu</first><last>Liu</last><affiliation>McGill University, McGill University</affiliation></author>
       <author><first>Su Lin</first><last>Blodgett</last><affiliation>Microsoft</affiliation></author>
-      <author><first>Jackie</first><last>Cheung</last><affiliation>McGill University</affiliation></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie</first><last>Cheung</last><affiliation>McGill University</affiliation></author>
       <author><first>Q. Vera</first><last>Liao</last><affiliation>International Business Machines</affiliation></author>
       <author><first>Alexandra</first><last>Olteanu</last><affiliation>Research, Microsoft</affiliation></author>
       <author><first>Ziang</first><last>Xiao</last><affiliation>Department of Computer Science, Whiting School of Engineering</affiliation></author>
@@ -12247,7 +12247,7 @@
     <meta>
       <booktitle>Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)</booktitle>
       <editor><first>Lun-Wei</first><last>Ku</last><affiliation>Academia Sinica</affiliation></editor>
-      <editor><first>Andre</first><last>Martins</last><affiliation>Instituto Superior Técnico / Instituto de Telecomunicações / Unbabel</affiliation></editor>
+      <editor id="andre-f-t-martins"><first>Andre</first><last>Martins</last><affiliation>Instituto Superior Técnico / Instituto de Telecomunicações / Unbabel</affiliation></editor>
       <editor><first>Vivek</first><last>Srikumar</last><affiliation>University of Utah</affiliation></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Bangkok, Thailand</address>
@@ -12268,7 +12268,7 @@
       <author><first>Xingdi</first><last>Yuan</last><affiliation>Microsoft Research</affiliation></author>
       <author><first>Marc-Alexandre</first><last>Côté</last><affiliation>Microsoft</affiliation></author>
       <author><first>Peter</first><last>Clark</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
-      <author><first>Peter</first><last>Jansen</last><affiliation>University of Arizona</affiliation></author>
+      <author id="peter-jansen"><first>Peter</first><last>Jansen</last><affiliation>University of Arizona</affiliation></author>
       <pages>1-17</pages>
       <abstract>Virtual environments play a key role in benchmarking advances in complex planning and decision-making tasks but are expensive and complicated to build by hand. Can current language models themselves serve as world simulators, correctly predicting how actions change different world states, thus bypassing the need for extensive manual coding? Our goal is to answer this question in the context of text-based simulators. Our approach is to build and use a new benchmark, called ByteSized32-State-Prediction, containing a dataset of text game state transitions and accompanying game tasks. We use this to directly quantify, for the first time, how well LLMs can serve as text-based world simulators. We test GPT-4 on this dataset and find that, despite its impressive performance, it is still an unreliable world simulator without further innovations. This work thus contributes both new insights into current LLM’s capabilities and weaknesses, as well as a novel benchmark to track future progress as new models appear.</abstract>
       <url hash="60668c3b">2024.acl-short.1</url>
@@ -12304,7 +12304,7 @@
       <title>Resisting the Lure of the Skyline: Grounding Practices in Active Learning for Morphological Inflection</title>
       <author><first>Saliha</first><last>Muradoglu</last></author>
       <author><first>Michael</first><last>Ginn</last><affiliation>University of Colorado at Boulder</affiliation></author>
-      <author><first>Miikka</first><last>Silfverberg</last><affiliation>University of British Columbia</affiliation></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last><affiliation>University of British Columbia</affiliation></author>
       <author><first>Mans</first><last>Hulden</last><affiliation>University of Colorado at Boulder</affiliation></author>
       <pages>47-55</pages>
       <abstract>Active learning (AL) aims to lower the demand of annotation by selecting informative unannotated samples for the model building. In this paper, we explore the importance of conscious experimental design in the language documentation and description setting, particularly the distribution of the unannotated sample pool. We focus on the task of morphological inflection using a Transformer model. We propose context motivated benchmarks: a baseline and skyline. The baseline describes the frequency weighted distribution encountered in natural speech. We simulate this using Wikipedia texts. The skyline defines the more common approach, uniform sampling from a large, balanced corpus (UniMorph, in our case), which often yields mixed results. We note the unrealistic nature of this unannotated pool. When these factors are considered, our results show a clear benefit to targeted sampling.</abstract>
@@ -12317,7 +12317,7 @@
       <author><first>Hongyi</first><last>Yuan</last></author>
       <author><first>Keming</first><last>Lu</last></author>
       <author><first>Fei</first><last>Huang</last><affiliation>Alibaba Group</affiliation></author>
-      <author id="zheng-yuan"><first>Zheng</first><last>Yuan</last><affiliation>Alibaba Group</affiliation></author>
+      <author><first>Zheng</first><last>Yuan</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Chang</first><last>Zhou</last></author>
       <pages>56-64</pages>
       <abstract>Large language models (LLMs) exhibit exceptional performance in language tasks, yet their auto-regressive inference is limited due to high computational requirements and is sub-optimal due to the exposure bias. Inspired by speculative decoding and contrastive decoding, we introduce Speculative Contrastive Decoding (SCD), a straightforward yet powerful decoding approach that leverages predictions from smaller language models (LMs) to achieve both decoding acceleration and quality improvement. Extensive evaluations and analyses on four diverse language tasks demonstrate the effectiveness of SCD, showing that decoding efficiency and quality can compatibly benefit from one smaller LM.</abstract>
@@ -12363,7 +12363,7 @@
       <title>Simpson’s Paradox and the Accuracy-Fluency Tradeoff in Translation</title>
       <author><first>Zheng Wei</first><last>Lim</last></author>
       <author><first>Ekaterina</first><last>Vylomova</last><affiliation>The University of Melbourne</affiliation></author>
-      <author><first>Trevor</first><last>Cohn</last><affiliation>Google and The University of Melbourne</affiliation></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last><affiliation>Google and The University of Melbourne</affiliation></author>
       <author><first>Charles</first><last>Kemp</last><affiliation>University of Melbourne</affiliation></author>
       <pages>92-103</pages>
       <abstract>A good translation should be faithful to the source and should respect the norms of the target language. We address a theoretical puzzle about the relationship between these objectives. On one hand, intuition and some prior work suggest that accuracy and fluency should trade off against each other, and that capturing every detail of the source can only be achieved at the cost of fluency. On the other hand, quality assessment researchers often suggest that accuracy and fluency are highly correlated and difficult for human raters to distinguish (Callison-Burch et al., 2007). We show that the tension between these views is an instance of Simpson’s paradox, and that accuracy and fluency are positively correlated at the level of the corpus but trade off at the level of individual source segments. We further suggest that the relationship between accuracy and fluency is best evaluated at the segment (or sentence) level, and that the trade off between these dimensions has implications both for assessing translation quality and developing improved MT systems.</abstract>
@@ -12534,7 +12534,7 @@
       <title>Growing Trees on Sounds: Assessing Strategies for End-to-End Dependency Parsing of Speech</title>
       <author><first>Adrien</first><last>Pupier</last><affiliation>Université Grenoble Alpes</affiliation></author>
       <author><first>Maximin</first><last>Coavoux</last><affiliation>CNRS</affiliation></author>
-      <author><first>Jérôme</first><last>Goulian</last><affiliation>Université Grenoble Alpes</affiliation></author>
+      <author id="jerome-goulian"><first>Jérôme</first><last>Goulian</last><affiliation>Université Grenoble Alpes</affiliation></author>
       <author><first>Benjamin</first><last>Lecouteux</last><affiliation>University of Grenoble-Alpes</affiliation></author>
       <pages>225-233</pages>
       <abstract>Direct dependency parsing of the speech signal –as opposed to parsing speech transcriptions– has recently been proposed as a task (Pupier et al. 2022), as a way of incorporating prosodic information in the parsing system and bypassing the limitations of a pipeline approach that would consist of using first an Automatic Speech Recognition (ASR) system and then a syntactic parser. In this article, we report on a set of experiments aiming at assessing the performance of two parsing paradigms (graph-based parsing and sequence labeling based parsing) on speech parsing. We perform this evaluation on a large treebank of spoken French, featuring realistic spontaneous conversations. Our findings show that (i) the graph based approach obtain better results across the board (ii) parsing directly from speech outperforms a pipeline approach, despite having 30% fewer parameters.</abstract>
@@ -12597,9 +12597,9 @@
       <title>Generating Harder Cross-document Event Coreference Resolution Datasets using Metaphoric Paraphrasing</title>
       <author><first>Shafiuddin Rehan</first><last>Ahmed</last></author>
       <author><first>Zhiyong Eric</first><last>Wang</last></author>
-      <author><first>George Arthur</first><last>Baker</last></author>
+      <author id="george-baker"><first>George Arthur</first><last>Baker</last></author>
       <author><first>Kevin</first><last>Stowe</last></author>
-      <author><first>James H.</first><last>Martin</last></author>
+      <author id="james-h-martin"><first>James H.</first><last>Martin</last></author>
       <pages>276-286</pages>
       <abstract>The most popular Cross-Document Event Coreference Resolution (CDEC) datasets fail to convey the true difficulty of the task, due to the lack of lexical diversity between coreferring event triggers (words or phrases that refer to an event). Furthermore, there is a dearth of event datasets for figurative language, limiting a crucial avenue of research in event comprehension. We address these two issues by introducing ECB+META, a lexically rich variant of Event Coref Bank Plus (ECB+) for CDEC on symbolic and metaphoric language. We use ChatGPT as a tool for the metaphoric transformation of sentences in the documents of ECB+, then tag the original event triggers in the transformed sentences in a semi-automated manner. In this way, we avoid the re-annotation of expensive coreference links. We present results that show existing methods that work well on ECB+ struggle with ECB+META, thereby paving the way for CDEC research on a much more challenging dataset. Code/data: https://github.com/ahmeshaf/llms_coref</abstract>
       <url hash="17d81066">2024.acl-short.27</url>
@@ -12653,9 +12653,9 @@
     </paper>
     <paper id="32">
       <title><fixed-case>ATLAS</fixed-case>: Improving Lay Summarisation with Attribute-based Control</title>
-      <author id="zhihao-zhang"><first>Zhihao</first><last>Zhang</last><affiliation>Beijing University of Technology</affiliation></author>
+      <author><first>Zhihao</first><last>Zhang</last><affiliation>Beijing University of Technology</affiliation></author>
       <author><first>Tomas</first><last>Goldsack</last><affiliation>University of Sheffield</affiliation></author>
-      <author><first>Carolina</first><last>Scarton</last><affiliation>University of Sheffield</affiliation></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last><affiliation>University of Sheffield</affiliation></author>
       <author><first>Chenghua</first><last>Lin</last><affiliation>University of Manchester</affiliation></author>
       <pages>337-345</pages>
       <abstract>Lay summarisation aims to produce summaries of scientific articles that are comprehensible to non-expert audiences. However, previous work assumes a one-size-fits-all approach, where the content and style of the produced summary are entirely dependent on the data used to train the model. In practice, audiences with different levels of expertise will have specific needs, impacting what content should appear in a lay summary and how it should be presented. Aiming to address this, we propose ATLAS, a novel abstractive summarisation approach that can control various properties that contribute to the overall “layness” of the generated summary using targeted control attributes. We evaluate ATLAS on a combination of biomedical lay summarisation datasets, where it outperforms state-of-the-art baselines using mainstream summarisation metrics.Additional analyses provided on the discriminatory power and emergent influence of our selected controllable attributes further attest to the effectiveness of our approach.</abstract>
@@ -12668,7 +12668,7 @@
       <author><first>Mengfei</first><last>Du</last></author>
       <author><first>Binhao</first><last>Wu</last><affiliation>Fudan University</affiliation></author>
       <author><first>Zejun</first><last>Li</last></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Zhongyu</first><last>Wei</last><affiliation>Fudan University</affiliation></author>
       <pages>346-355</pages>
       <abstract>The recent rapid development of Large Vision-Language Models (LVLMs) has indicated their potential for embodied tasks. However, the critical skill of spatial understanding in embodied environments has not been thoroughly evaluated, leaving the gap between current LVLMs and qualified embodied intelligence unknown. Therefore, we construct EmbSpatial-Bench, a benchmark for evaluating embodied spatial understanding of LVLMs. The benchmark is automatically derived from embodied scenes and covers 6 spatial relationships from an egocentric perspective. Experiments expose the insufficient capacity of current LVLMs (even GPT-4V). We further present EmbSpatial-SFT, an instruction-tuning dataset designed to improve LVLMs’ embodied spatial understanding.</abstract>
@@ -12743,7 +12743,7 @@
       <title>Born Differently Makes a Difference: Counterfactual Study of Bias in Biography Generation from a Data-to-Text Perspective</title>
       <author><first>Biaoyan</first><last>Fang</last><affiliation>CSIRO</affiliation></author>
       <author><first>Ritvik</first><last>Dinesh</last><affiliation>University of Sydney, University of Sydney</affiliation></author>
-      <author><first>Xiang</first><last>Dai</last><affiliation>CSIRO</affiliation></author>
+      <author id="xiang-dai"><first>Xiang</first><last>Dai</last><affiliation>CSIRO</affiliation></author>
       <author><first>Sarvnaz</first><last>Karimi</last><affiliation>CSIRO</affiliation></author>
       <pages>409-424</pages>
       <abstract>How do personal attributes affect biography generation? Addressing this question requires an identical pair of biographies where only the personal attributes of interest are different. However, it is rare in the real world. To address this, we propose a counterfactual methodology from a data-to-text perspective, manipulating the personal attributes of interest while keeping the co-occurring attributes unchanged. We first validate that the fine-tuned Flan-T5 model generates the biographies based on the given attributes. This work expands the analysis of gender-centered bias in text generation. Our results confirm the well-known bias in gender and also show the bias in regions, in both individual and its related co-occurring attributes in semantic machining and sentiment.</abstract>
@@ -12754,7 +12754,7 @@
     <paper id="40">
       <title>Sign Language Translation with Sentence Embedding Supervision</title>
       <author><first>Yasser</first><last>Hamidullah</last></author>
-      <author><first>Josef</first><last>van Genabith</last><affiliation>German Research Center for AI and Universität des Saarlandes</affiliation></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last><affiliation>German Research Center for AI and Universität des Saarlandes</affiliation></author>
       <author><first>Cristina</first><last>España-Bonet</last><affiliation>German Research Center for AI</affiliation></author>
       <pages>425-434</pages>
       <abstract>State-of-the-art sign language translation (SLT) systems facilitate the learning process through gloss annotations, either in an end2end manner or by involving an intermediate step. Unfortunately, gloss labelled sign language data is usually not available at scale and, when available, gloss annotations widely differ from dataset to dataset. We present a novel approach using sentence embeddings of the target sentences at training time that take the role of glosses. The new kind of supervision does not need any manual annotation but it is learned on raw textual data. As our approach easily facilitates multilinguality, we evaluate it on datasets covering German (PHOENIX-2014T) and American (How2Sign) sign languages and experiment with mono- and multilingual sentence embeddings and translation systems. Our approach significantly outperforms other gloss-free approaches, setting the new state-of-the-art for data sets where glosses are not available and when no additional SLT datasets are used for pretraining, diminishing the gap between gloss-free and gloss-dependent systems.</abstract>
@@ -12779,7 +12779,7 @@
     <paper id="42">
       <title><fixed-case>D</fixed-case>oc<fixed-case>F</fixed-case>in<fixed-case>QA</fixed-case>: A Long-Context Financial Reasoning Dataset</title>
       <author><first>Varshini</first><last>Reddy</last></author>
-      <author><first>Rik</first><last>Koncel-Kedziorski</last><affiliation>Apple</affiliation></author>
+      <author id="rik-koncel-kedziorski"><first>Rik</first><last>Koncel-Kedziorski</last><affiliation>Apple</affiliation></author>
       <author><first>Viet Dac</first><last>Lai</last><affiliation>Adobe Systems</affiliation></author>
       <author><first>Michael</first><last>Krumdick</last><affiliation>Kensho</affiliation></author>
       <author><first>Charles</first><last>Lovering</last><affiliation>Kensho</affiliation></author>
@@ -12794,7 +12794,7 @@
       <title><fixed-case>M</fixed-case>ask<fixed-case>LID</fixed-case>: Code-Switching Language Identification through Iterative Masking</title>
       <author><first>Amir Hossein</first><last>Kargaran</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <author><first>François</first><last>Yvon</last><affiliation>ISIR, Sorbonne Université &amp; CNRS</affiliation></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <pages>459-469</pages>
       <abstract>We present MaskLID, a simple, yet effective, code-switching (CS) language identification (LID) method. MaskLID does not require any training and is designed to complement current high-performance sentence-level LIDs. Sentence-level LIDs are classifiers trained on monolingual texts to provide single labels, typically using a softmax layer to turn scores into probabilities. However, in cases where a sentence is composed in both L1 and L2 languages, the LID classifier often only returns the dominant label L1. To address this limitation, MaskLID employs a strategy to mask text features associated with L1, allowing the LID to classify the text as L2 in the next round. This method uses the LID itself to identify the features that require masking and does not rely on any external resource. In this work, we explore the use of MaskLID for two open-source LIDs (GlotLID and OpenLID), that are both based on the FastText architecture. Code and demo are available at https://github.com/cisnlp/MaskLID.</abstract>
       <url hash="a0202fe5">2024.acl-short.43</url>
@@ -12834,7 +12834,7 @@
       <author><first>Ashutosh</first><last>Sharma</last></author>
       <author><first>Omar</first><last>Khattab</last></author>
       <author><first>Niyati</first><last>Chhaya</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
       <pages>501-509</pages>
       <abstract>In this paper, we introduce Neural Information Retrieval resources for 11 widely spoken Indian Languages (Assamese, Bengali, Gujarati, Hindi, Kannada, Malayalam, Marathi, Oriya, Punjabi, Tamil, and Telugu) from two major Indian language families (Indo-Aryan and Dravidian). These resources include (a) INDIC-MARCO, a multilingual version of the MS MARCO dataset in 11 Indian Languages created using Machine Translation, and (b) Indic-ColBERT, a collection of 11 distinct Monolingual Neural Information Retrieval models, each trained on one of the 11 languages in the INDIC-MARCO dataset. To the best of our knowledge, IndicIRSuite is the first attempt at building large-scale Neural Information Retrieval resources for a large number of Indian languages, and we hope that it will help accelerate research in Neural IR for Indian Languages. Experiments demonstrate that Indic-ColBERT achieves 47.47% improvement in the MRR@10 score averaged over the INDIC-MARCO baselines for all 11 Indian languages except Oriya, 12.26% improvement in the NDCG@10 score averaged over the MIRACL Bengali and Hindi Language baselines, and 20% improvement in the MRR@100 Score over the Mr. Tydi Bengali Language baseline.</abstract>
       <url hash="aedd76fb">2024.acl-short.46</url>
@@ -12917,7 +12917,7 @@
       <title>Time Sensitive Knowledge Editing through Efficient Finetuning</title>
       <author><first>Xiou</first><last>Ge</last><affiliation>Apple</affiliation></author>
       <author><first>Ali</first><last>Mousavi</last><affiliation>Apple</affiliation></author>
-      <author><first>Edouard</first><last>Grave</last><affiliation>Facebook</affiliation></author>
+      <author id="edouard-grave"><first>Edouard</first><last>Grave</last><affiliation>Facebook</affiliation></author>
       <author><first>Armand</first><last>Joulin</last><affiliation>Facebook</affiliation></author>
       <author><first>Kun</first><last>Qian</last><affiliation>Adobe Systems</affiliation></author>
       <author><first>Benjamin</first><last>Han</last><affiliation>Apple</affiliation></author>
@@ -12983,7 +12983,7 @@
       <author><first>Raj</first><last>Dabre</last><affiliation>National Institute of Information and Communications Technology (NICT), National Institute of Advanced Industrial Science and Technology</affiliation></author>
       <author><first>Ratish</first><last>Puduppully</last><affiliation>A*STAR</affiliation></author>
       <author><first>Anoop</first><last>Kunchukuttan</last><affiliation>Microsoft and Indian Institute of Technology, Madras, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
-      <author><first>Mitesh</first><last>Khapra</last><affiliation>Indian Institute of Technology, Madras, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
+      <author id="mitesh-m-khapra"><first>Mitesh</first><last>Khapra</last><affiliation>Indian Institute of Technology, Madras, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
       <pages>640-649</pages>
       <abstract>While machine translation evaluation has been studied primarily for high-resource languages, there has been a recent interest in evaluation for low-resource languages due to the increasing availability of data and models. In this paper, we focus on a zero-shot evaluation setting focusing on low-resource Indian languages, namely Assamese, Kannada, Maithili, and Punjabi. We collect sufficient Multi-Dimensional Quality Metrics (MQM) and Direct Assessment (DA) annotations to create test sets and meta-evaluate a plethora of automatic evaluation metrics. We observe that even for learned metrics, which are known to exhibit zero-shot performance, the Kendall Tau and Pearson correlations with human annotations are only as high as 0.32 and 0.45. Synthetic data approaches show mixed results and overall do not help close the gap by much for these languages. This indicates that there is still a long way to go for low-resource evaluation.</abstract>
       <url hash="e379e93c">2024.acl-short.58</url>
@@ -13021,7 +13021,7 @@
       <author><first>Hyeonseok</first><last>Kang</last><affiliation>Chungnam National University</affiliation></author>
       <author><first>Hyein</first><last>Seo</last><affiliation>Chungnam National University</affiliation></author>
       <author><first>Jeesu</first><last>Jung</last></author>
-      <author><first>Sangkeun</first><last>Jung</last></author>
+      <author id="sangkeun-jung"><first>Sangkeun</first><last>Jung</last></author>
       <author><first>Du-Seong</first><last>Chang</last></author>
       <author><first>Riwoo</first><last>Chung</last></author>
       <pages>665-672</pages>
@@ -13060,8 +13060,8 @@
       <author><first>Kehai</first><last>Chen</last><affiliation>Harbin Institute of Technology (Shenzhen)</affiliation></author>
       <author><first>Xuefeng</first><last>Bai</last></author>
       <author><first>Yang</first><last>Xiang</last></author>
-      <author><first>Muyun</first><last>Yang</last></author>
-      <author><first>Tiejun</first><last>Zhao</last><affiliation>Harbin Institute of Technology</affiliation></author>
+      <author id="muyun-yang"><first>Muyun</first><last>Yang</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <author><first>Min</first><last>Zhang</last><affiliation>Harbin Institute of Technology, Shenzhen</affiliation></author>
       <pages>693-704</pages>
       <abstract>Recently, large language models (LLMs) enhanced by self-reflection have achieved promising performance on machine transla004 tion. The key idea is guiding LLMs to generate translation with human-like feedback. However, existing self-reflection methods lack effective feedback information, limiting the translation performance. To address this, we introduce a DUAL-REFLECT framework, leveraging the dual learning of translation tasks to provide effective feedback, thereby enhancing the models’ self-reflective abilities and improving translation performance. The application of this method across various translation tasks has proven its effectiveness in improving translation accuracy and eliminating ambiguities, especially in translation tasks with low-resource language pairs.</abstract>
@@ -13127,7 +13127,7 @@
       <title>Estimating the Level of Dialectness Predicts Inter-annotator Agreement in Multi-dialect <fixed-case>A</fixed-case>rabic Datasets</title>
       <author><first>Amr</first><last>Keleg</last><affiliation>University of Edinburgh</affiliation></author>
       <author><first>Walid</first><last>Magdy</last><affiliation>University of Edinburgh</affiliation></author>
-      <author><first>Sharon</first><last>Goldwater</last><affiliation>University of Edinburgh</affiliation></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last><affiliation>University of Edinburgh</affiliation></author>
       <pages>766-777</pages>
       <abstract>On annotating multi-dialect Arabic datasets, it is common to randomly assign the samples across a pool of native Arabic speakers. Recent analyses recommended routing dialectal samples to native speakers of their respective dialects to build higher-quality datasets. However, automatically identifying the dialect of samples is hard. Moreover, the pool of annotators who are native speakers of specific Arabic dialects might be scarce. Arabic Level of Dialectness (ALDi) was recently introduced as a quantitative variable that measures how sentences diverge from Standard Arabic. On randomly assigning samples to annotators, we hypothesize that samples of higher ALDi scores are harder to label especially if they are written in dialects that the annotators do not speak. We test this by analyzing the relation between ALDi scores and the annotators’ agreement, on 15 public datasets having raw individual sample annotations for various sentence-classification tasks. We find strong evidence supporting our hypothesis for 11 of them. Consequently, we recommend prioritizing routing samples of high ALDi scores to native speakers of each sample’s dialect, for which the dialect could be automatically identified at higher accuracies.</abstract>
       <url hash="82a30d81">2024.acl-short.69</url>
@@ -13139,7 +13139,7 @@
       <title>Estimating the Level of Dialectness Predicts Inter-annotator Agreement in Multi-dialect <fixed-case>A</fixed-case>rabic Datasets</title>
       <author><first>Amr</first><last>Keleg</last><affiliation>University of Edinburgh</affiliation></author>
       <author><first>Walid</first><last>Magdy</last><affiliation>University of Edinburgh</affiliation></author>
-      <author><first>Sharon</first><last>Goldwater</last><affiliation>University of Edinburgh</affiliation></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last><affiliation>University of Edinburgh</affiliation></author>
       <pages>778-789</pages>
       <abstract>On annotating multi-dialect Arabic datasets, it is common to randomly assign the samples across a pool of native Arabic speakers. Recent analyses recommended routing dialectal samples to native speakers of their respective dialects to build higher-quality datasets. However, automatically identifying the dialect of samples is hard. Moreover, the pool of annotators who are native speakers of specific Arabic dialects might be scarce. Arabic Level of Dialectness (ALDi) was recently introduced as a quantitative variable that measures how sentences diverge from Standard Arabic. On randomly assigning samples to annotators, we hypothesize that samples of higher ALDi scores are harder to label especially if they are written in dialects that the annotators do not speak. We test this by analyzing the relation between ALDi scores and the annotators’ agreement, on 15 public datasets having raw individual sample annotations for various sentence-classification tasks. We find strong evidence supporting our hypothesis for 11 of them. Consequently, we recommend prioritizing routing samples of high ALDi scores to native speakers of each sample’s dialect, for which the dialect could be automatically identified at higher accuracies.</abstract>
       <url hash="82a30d81">2024.acl-short.70</url>
@@ -13189,8 +13189,8 @@
       <title>What Do Dialect Speakers Want? A Survey of Attitudes Towards Language Technology for <fixed-case>G</fixed-case>erman Dialects</title>
       <author><first>Verena</first><last>Blaschke</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <author><first>Christoph</first><last>Purschke</last><affiliation>University of Luxemburg</affiliation></author>
-      <author><first>Hinrich</first><last>Schuetze</last><affiliation>Center for Information and Language Processing</affiliation></author>
-      <author><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last><affiliation>Center for Information and Language Processing</affiliation></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <pages>823-841</pages>
       <abstract>Natural language processing (NLP) has largely focused on modelling standardized languages. More recently, attention has increasingly shifted to local, non-standardized languages and dialects. However, the relevant speaker populations’ needs and wishes with respect to NLP tools are largely unknown. In this paper, we focus on dialects and regional languages related to German – a group of varieties that is heterogeneous in terms of prestige and standardization. We survey speakers of these varieties (N=327) and present their opinions on hypothetical language technologies for their dialects. Although attitudes vary among subgroups of our respondents, we find that respondents are especially in favour of potential NLP tools that work with dialectal input (especially audio input) such as virtual assistants, and less so for applications that produce dialectal output such as machine translation or spellcheckers.</abstract>
       <url hash="5c2986f9">2024.acl-short.74</url>
@@ -13218,7 +13218,7 @@
       <author><first>Harshvardhan</first><last>Srivastava</last><affiliation>Columbia University</affiliation></author>
       <author><first>Robert</first><last>West</last><affiliation>EPFL - EPF Lausanne</affiliation></author>
       <author><first>Zhou</first><last>Yu</last><affiliation>Columbia University</affiliation></author>
-      <author><first>Kathleen</first><last>McKeown</last><affiliation>Columbia University</affiliation></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last><affiliation>Columbia University</affiliation></author>
       <pages>855-869</pages>
       <abstract>Humor is a fundamental facet of human cognition and interaction. Yet, despite recent advances in natural language processing, humor detection remains a challenging task that is complicated by the scarcity of datasets that pair humorous texts with similar non-humorous counterparts. We investigate whether large language models (LLMs) can generate synthetic data for humor detection via editing texts. We benchmark LLMs on an existing human dataset and show that current LLMs display an impressive ability to “unfun” jokes, as judged by humans and as measured on the downstream task of humor detection. We extend our approach to a code-mixed English-Hindi humor dataset where we find that GPT-4’s synthetic data is highly rated by bilingual annotators and provides challenging adversarial examples for humor classifiers.</abstract>
       <url hash="9b372b05">2024.acl-short.76</url>
@@ -13230,7 +13230,7 @@
       <title>Don’t Buy it! Reassessing the Ad Understanding Abilities of Contrastive Multimodal Models</title>
       <author><first>Anna</first><last>Bavaresco</last><affiliation>University of Amsterdam</affiliation></author>
       <author><first>Alberto</first><last>Testoni</last><affiliation>University of Amsterdam</affiliation></author>
-      <author><first>Raquel</first><last>Fernández</last><affiliation>University of Amsterdam</affiliation></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last><affiliation>University of Amsterdam</affiliation></author>
       <pages>870-879</pages>
       <abstract>Image-based advertisements are complex multimodal stimuli that often contain unusual visual elements and figurative language. Previous research on automatic ad understanding has reported impressive zero-shot accuracy of contrastive vision-and-language models (VLMs) on an ad-explanation retrieval task. Here, we examine the original task setup and show that contrastive VLMs can solve it by exploiting grounding heuristics. To control for this confound, we introduce TRADE, a new evaluation test set with adversarial grounded explanations. While these explanations look implausible to humans, we show that they “fool” four different contrastive VLMs. Our findings highlight the need for an improved operationalisation of automatic ad understanding that truly evaluates VLMs’ multimodal reasoning abilities. We make our code and TRADE available at https://github.com/dmg-illc/trade.</abstract>
       <url hash="41cd2a31">2024.acl-short.77</url>
@@ -13243,7 +13243,7 @@
       <booktitle>Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 3: System Demonstrations)</booktitle>
       <editor><first>Yixin</first><last>Cao</last><affiliation>Singapore Management University</affiliation></editor>
       <editor><first>Yang</first><last>Feng</last><affiliation>Chinese Academy of Science</affiliation></editor>
-      <editor><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></editor>
+      <editor id="deyi-xiong"><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Bangkok, Thailand</address>
       <month>August</month>
@@ -13288,7 +13288,7 @@
       <author><first>Hao</first><last>Fei</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Meishan</first><last>Zhang</last><affiliation>Harbin Institute of Technology (Shenzhen), China and Tianjin University, China</affiliation></author>
       <author><first>Min</first><last>Zhang</last><affiliation>Harbin Institute of Technology, Shenzhen</affiliation></author>
-      <author><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
       <pages>19-30</pages>
       <abstract>Structured Natural Language Processing (XNLP) is an important subset of NLP that entails understanding the underlying semantic or syntactic structure of texts, which serves as a foundational component for many downstream applications. Despite certain recent efforts to explore universal solutions for specific categories of XNLP tasks, a comprehensive and effective approach for unifying all XNLP tasks long remains underdeveloped. Meanwhile, while XNLP demonstration systems are vital for researchers exploring various XNLP tasks, existing platforms can be limited to, e.g., supporting few XNLP tasks, lacking interactivity and universalness. To this end, we propose an advanced XNLP demonstration system, where we leverage LLM to achieve universal XNLP, with one model for all with high generalizability. Overall, our system advances in multiple aspects, including universal XNLP modeling, high performance, interpretability, scalability, and interactivity, offering a unified platform for exploring diverse XNLP tasks in the community.</abstract>
       <url hash="9fe5bb3e">2024.acl-demos.3</url>
@@ -13299,7 +13299,7 @@
       <title>Towards the <fixed-case>T</fixed-case>op<fixed-case>M</fixed-case>ost: A Topic Modeling System Toolkit</title>
       <author><first>Xiaobao</first><last>Wu</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Fengjun</first><last>Pan</last></author>
-      <author><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
       <pages>31-41</pages>
       <abstract>Topic models have a rich history with various applications and have recently been reinvigorated by neural topic modeling. However, these numerous topic models adopt totally distinct datasets, implementations, and evaluations. This impedes quick utilization and fair comparisons, and thereby hinders their research progress and applications. To tackle this challenge, we in this paper propose a Topic Modeling System Toolkit (TopMost). Compared to existing toolkits, TopMost stands out by supporting more extensive features. It covers a broader spectrum of topic modeling scenarios with their complete lifecycles, including datasets, preprocessing, models, training, and evaluations. Thanks to its highly cohesive and decoupled modular design, TopMost enables rapid utilization, fair comparisons, and flexible extensions of diverse cutting-edge topic models. Our code, tutorials, and documentation are available at https://github.com/bobxwu/topmost.</abstract>
       <url hash="09f44af9">2024.acl-demos.4</url>
@@ -13418,7 +13418,7 @@
     <paper id="12">
       <title><fixed-case>G</fixed-case>en<fixed-case>GO</fixed-case>: <fixed-case>ACL</fixed-case> Paper Explorer with Semantic Features</title>
       <author><first>Sotaro</first><last>Takeshita</last><affiliation>Universit�t Mannheim</affiliation></author>
-      <author><first>Simone</first><last>Ponzetto</last><affiliation>University of Mannheim</affiliation></author>
+      <author id="simone-paolo-ponzetto"><first>Simone</first><last>Ponzetto</last><affiliation>University of Mannheim</affiliation></author>
       <author><first>Kai</first><last>Eckert</last><affiliation>Mannheim University of Applied Sciences</affiliation></author>
       <pages>117-126</pages>
       <abstract>We present GenGO, a system for exploring papers published in ACL conferences. Paper data stored in our database is enriched with multi-aspect summaries, extracted named entities, a field of study label, and text embeddings by our data processing pipeline. These metadata are used in our web-based user interface to enable researchers to quickly find papers relevant to their interests, and grasp an overview of papers without reading full-text of papers. To make GenGO to be available online as long as possible, we design GenGO to be simple and efficient to reduce maintenance and financial costs. In addition, the modularity of our data processing pipeline lets developers easily extend it to add new features. We make our code available to foster open development and transparency: https://gengo.sotaro.io.</abstract>
@@ -13507,7 +13507,7 @@
       <author><first>Tingting</first><last>Cui</last></author>
       <author><first>Liutao</first><last>Liutao</last></author>
       <author><first>Jinwang</first><last>Song</last></author>
-      <author><first>Hongying</first><last>Zan</last><affiliation>Zhengzhou University</affiliation></author>
+      <author id="hongying-zan"><first>Hongying</first><last>Zan</last><affiliation>Zhengzhou University</affiliation></author>
       <author><first>Sun</first><last>Li</last><affiliation>China Academy of Information and Communications Technology</affiliation></author>
       <author><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
       <pages>190-210</pages>
@@ -13542,7 +13542,7 @@
       <title><fixed-case>D</fixed-case>oc<fixed-case>P</fixed-case>ilot: Copilot for Automating <fixed-case>PDF</fixed-case> Edit Workflows in Documents</title>
       <author><first>Puneet</first><last>Mathur</last><affiliation>Adobe Systems</affiliation></author>
       <author><first>Alexa</first><last>Siu</last><affiliation>Adobe</affiliation></author>
-      <author><first>Varun</first><last>Manjunatha</last><affiliation>Adobe Systems</affiliation></author>
+      <author id="varun-manjunatha"><first>Varun</first><last>Manjunatha</last><affiliation>Adobe Systems</affiliation></author>
       <author><first>Tong</first><last>Sun</last><affiliation>Adobe Systems</affiliation></author>
       <pages>232-246</pages>
       <abstract>Digital documents, such as PDFs, are vital in business workflows, enabling communication, documentation, and collaboration. Handling PDFs can involve navigating complex workflows and numerous tools (e.g., comprehension, annotation, editing), which can be tedious and time-consuming for users. We introduce DocPilot, an AI-assisted document workflow Copilot system capable of understanding user intent and executing tasks accordingly to help users streamline their workflows. DocPilot undertakes intelligent orchestration of various tools through LLM prompting in four steps: (1) Task plan generation, (2) Task plan verification and self-correction, (3) Multi-turn User Feedback, and (4) Task Plan Execution via Code Generation and Error log-based Code Self-Revision. The primary goal of this system is to free the user from the intricacies of document editing, enabling them to focus on the creative aspects and enrich their document management experience.</abstract>
@@ -13572,7 +13572,7 @@
       <title><fixed-case>P</fixed-case>y<fixed-case>F</fixed-case>oma: a Python finite-state compiler module</title>
       <author><first>Mans</first><last>Hulden</last><affiliation>University of Colorado at Boulder</affiliation></author>
       <author><first>Michael</first><last>Ginn</last><affiliation>University of Colorado at Boulder</affiliation></author>
-      <author><first>Miikka</first><last>Silfverberg</last><affiliation>University of British Columbia</affiliation></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last><affiliation>University of British Columbia</affiliation></author>
       <author><first>Michael</first><last>Hammond</last><affiliation>University of Arizona</affiliation></author>
       <pages>258-265</pages>
       <abstract>We describe PyFoma, an open-source Python module for constructing weighted and unweighted finite-state transducers and automata from regular expressions, string rewriting rules, right-linear grammars, or low-level state/transition manipulation. A large variety of standard algorithms for working with finite-state machines is included, with a particular focus on the needs of linguistic and NLP applications. The data structures and code in the module are designed for legibility to allow for potential use in teaching the theory and algorithms associated with finite-state machines.</abstract>
@@ -13601,8 +13601,8 @@
     <paper id="26">
       <title>string2string: A Modern Python Library for String-to-String Algorithms</title>
       <author><first>Mirac</first><last>Suzgun</last><affiliation>Stanford University</affiliation></author>
-      <author><first>Stuart</first><last>Shieber</last><affiliation>Harvard University</affiliation></author>
-      <author><first>Dan</first><last>Jurafsky</last><affiliation>Stanford University</affiliation></author>
+      <author id="stuart-m-shieber"><first>Stuart</first><last>Shieber</last><affiliation>Harvard University</affiliation></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last><affiliation>Stanford University</affiliation></author>
       <pages>278-285</pages>
       <abstract>We introduce **string2string**, an open-source library that offers a comprehensive suite of efficient algorithms for a broad range of string-to-string problems. It includes traditional algorithmic solutions as well as recent advanced neural approaches to tackle various problems in string alignment, distance measurement, lexical and semantic search, and similarity analysis�along with several helpful visualization tools and metrics to facilitate the interpretation and analysis of these methods. Notable algorithms featured in the library include the Smith-Waterman algorithm for pairwise local alignment, the Hirschberg algorithm for global alignment, the Wagner-Fischer algorithm for edit distance, BARTScore and BERTScore for similarity analysis, the Knuth-Morris-Pratt algorithm for lexical search, and Faiss for semantic search. In addition, it wraps existing efficient and widely-used implementations of certain frameworks and metrics, such as sacreBLEU and ROUGE. Overall, the library aims to provide extensive coverage and increased flexibility in comparison to existing libraries for strings. It can be used for many downstream applications, tasks, and problems in natural-language processing, bioinformatics, and computational social sciences. It is implemented in Python, easily installable via pip, and accessible through a simple API. Source code, documentation, and tutorials are all available on our GitHub page: https://github.com/stanfordnlp/string2string* Documentation: https://string2string.readthedocs.io/en/latest/* GitHub page: https://github.com/stanfordnlp/string2string* Short video: https://drive.google.com/file/d/1IT-pBACDVUoEHewk__5Pz5mU5oAMq5k_/view?usp=sharing</abstract>
       <url hash="79bb351d">2024.acl-demos.26</url>
@@ -13728,7 +13728,7 @@
       <author><first>Danilo.miranda@idiap.ch</first><last>Danilo.miranda@idiap.ch</last><affiliation>NA</affiliation></author>
       <author><first>Maxime.delmas@idiap.ch</first><last>Maxime.delmas@idiap.ch</last><affiliation>NA</affiliation></author>
       <author><first>Harriet.unsworth@cruk.manchester.ac.uk</first><last>Harriet.unsworth@cruk.manchester.ac.uk</last><affiliation>NA</affiliation></author>
-      <author><first>Andre</first><last>Freitas</last><affiliation>Idiap Research Institute and University of Manchester</affiliation></author>
+      <author id="andre-freitas"><first>Andre</first><last>Freitas</last><affiliation>Idiap Research Institute and University of Manchester</affiliation></author>
       <pages>355-364</pages>
       <abstract>We present BioLunar, developed using the Lunar framework, as a tool for supporting biological analyses, with a particular emphasis on molecular-level evidence enrichment for biomarker discovery in oncology. The platform integrates Large Language Models (LLMs) to facilitate complex scientific reasoning across distributed evidence spaces, enhancing the capability for harmonizing and reasoning over heterogeneous data sources. Demonstrating its utility in cancer research, BioLunar leverages modular design, reusable data access and data analysis components, and a low-code user interface, enabling researchers of all programming levels to construct LLM-enabled scientific workflows. By facilitating automatic scientific discovery and inference from heterogeneous evidence, BioLunar exemplifies the potential of the integration between LLMs, specialised databases and biomedical tools to support expert-level knowledge synthesis and discovery.</abstract>
       <url hash="7bd84691">2024.acl-demos.34</url>
@@ -13783,7 +13783,7 @@
       <author><first>Xia</first><last>Chunxuan</last></author>
       <author><first>Junyi</first><last>Li</last></author>
       <author><first>Kun</first><last>Zhou</last><affiliation>Renmin University of China</affiliation></author>
-      <author><first>Xin</first><last>Zhao</last><affiliation>Renmin University of China</affiliation></author>
+      <author id="wayne-xin-zhao"><first>Xin</first><last>Zhao</last><affiliation>Renmin University of China</affiliation></author>
       <author><first>Ji-Rong</first><last>Wen</last><affiliation>Renmin University of China</affiliation></author>
       <pages>388-399</pages>
       <abstract>To facilitate the research on large language models (LLMs), this paper presents a comprehensive and unified library, LLMBox, to ease the development, use, and evaluation of LLMs. This library is featured with three main merits: (1) a unified data interface that supports the flexible implementation of various training strategies, (2) a comprehensive evaluation that covers extensive tasks, datasets, and models, and (3) more practical consideration, especially on user-friendliness and efficiency. With our library, users can easily reproduce existing methods, train new models, and conduct comprehensive performance comparisons. To rigorously test LLMBox, we conduct extensive experiments in a diverse coverage of evaluation settings, and experimental results demonstrate the effectiveness and efficiency of our library in supporting various implementations related to LLMs. The detailed introduction and usage guidance can be found at <url>https://github.com/RUCAIBox/LLMBox</url>.</abstract>
@@ -13909,7 +13909,7 @@
       <author><first>Yuxia</first><last>Wang</last></author>
       <author><first>Xudong</first><last>Han</last><affiliation>University of Melbourne</affiliation></author>
       <author><first>Chiyu</first><last>Zhang</last><affiliation>University of British Columbia</affiliation></author>
-      <author><first>Timothy</first><last>Baldwin</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and The University of Melbourne</affiliation></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and The University of Melbourne</affiliation></author>
       <pages>68-75</pages>
       <abstract>Instruction tuning significantly enhances the performance of large language models (LLMs) across various tasks. However, the procedure to optimizing the mixing of instruction datasets for LLM fine-tuning is still poorly understood. This study categorizes instructions into three primary types: NLP downstream tasks, coding, and general chat. We explore the effects of instruction tuning on different combinations of datasets on LLM performance, and find that certain instruction types are more advantageous for specific applications but can negatively impact other areas. This work provides insights into instruction mixtures, laying the foundations for future research.</abstract>
       <url hash="4e6cceeb">2024.acl-srw.15</url>
@@ -13919,7 +13919,7 @@
     <paper id="16">
       <title>Fine-Tuning <fixed-case>ASR</fixed-case> models for Very Low-Resource Languages: A Study on Mvskoke</title>
       <author><first>Julia</first><last>Mainzinger</last></author>
-      <author><first>Gina-Anne</first><last>Levow</last><affiliation>University of Washington and University of Washington</affiliation></author>
+      <author id="gina-anne-levow"><first>Gina-Anne</first><last>Levow</last><affiliation>University of Washington and University of Washington</affiliation></author>
       <pages>76-82</pages>
       <abstract>Recent advancements in multilingual models for automatic speech recognition (ASR) have been able to achieve a high accuracy for languages with extremely limited resources. This study examines ASR modeling for the Mvskoke language, an indigenous language of America. The parameter efficiency of adapter training is contrasted with training entire models, and it is demonstrated how performance varies with different amounts of data. Additionally, the models are evaluated with trigram language model decoding, and the outputs are compared across different types of speech recordings. Results show that training an adapter is both parameter efficient and gives higher accuracy for a relatively small amount of data.</abstract>
       <url hash="e0de67af">2024.acl-srw.16</url>
@@ -14053,7 +14053,7 @@
       <author><first>Jan</first><last>Strich</last></author>
       <author><first>Florian</first><last>Schneider</last><affiliation>Universität Hamburg</affiliation></author>
       <author><first>Irina</first><last>Nikishina</last></author>
-      <author><first>Chris</first><last>Biemann</last><affiliation>U Hamburg</affiliation></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last><affiliation>U Hamburg</affiliation></author>
       <pages>209-244</pages>
       <abstract>Large Language Models (LLMs) such as ChatGPT, GitHub Copilot, Llama, or Mistral assist programmers as copilots and knowledge sources to make the coding process faster and more efficient. This paper aims to improve the copilot performance by implementing different self-alignment processes and retrieval-augmented generation (RAG) pipelines, as well as their combination. To test the effectiveness of all approaches, we create a dataset and apply a model-based evaluation, using LLM as a judge. It is designed to check the model’s abilities to understand the source code semantics, the dependency between files, and the overall meta-information about the repository. We also compare our approach with other existing solutions, e.g. ChatGPT-3.5, and evaluate on the existing benchmarks. Code and dataset are available online (https://anonymous.4open.science/r/ma_llm-382D).</abstract>
       <url hash="3df8ab17">2024.acl-srw.28</url>
@@ -14075,8 +14075,8 @@
       <title><fixed-case>V</fixed-case>i<fixed-case>M</fixed-case>ed<fixed-case>AQA</fixed-case>: A <fixed-case>V</fixed-case>ietnamese Medical Abstractive Question-Answering Dataset and Findings of Large Language Model</title>
       <author><first>Minh-Nam</first><last>Tran</last></author>
       <author><first>Phu-Vinh</first><last>Nguyen</last></author>
-      <author><first>Long</first><last>Nguyen</last><affiliation>Ho Chi Minh city University of Science, Vietnam National University</affiliation></author>
-      <author><first>Dien</first><last>Dinh</last></author>
+      <author id="long-nguyen"><first>Long</first><last>Nguyen</last><affiliation>Ho Chi Minh city University of Science, Vietnam National University</affiliation></author>
+      <author id="dinh-dien"><first>Dien</first><last>Dinh</last></author>
       <pages>252-260</pages>
       <abstract>Question answering involves creating answers to questions. With the growth of large language models, the ability of question-answering systems has dramatically improved. However, there is a lack of Vietnamese abstractive question-answering datasets, especially in the medical domain. Therefore, this research aims to mitigate this gap by introducing ViMedAQA. This **Vi**etnamese **Med**ical **A**bstractive **Q**uestion-**A**nswering dataset covers four topics in the Vietnamese medical domain, including body parts, disease, drugs and medicine. Additionally, the empirical results on the proposed dataset examine the capability of the large language models in the Vietnamese medical domain, including reasoning, memorizing and awareness of essential information.</abstract>
       <url hash="2545887c">2024.acl-srw.31</url>
@@ -14090,7 +14090,7 @@
       <author><first>Haoming</first><last>Li</last></author>
       <author><first>Qi</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Tao</first><last>Gui</last><affiliation>Fudan University</affiliation></author>
-      <author id="fei-liu"><first>Fei</first><last>Liu</last><affiliation>Emory University</affiliation></author>
+      <author><first>Fei</first><last>Liu</last><affiliation>Emory University</affiliation></author>
       <pages>261-272</pages>
       <abstract>Customizing LLMs for a specific task involves separating high-quality responses from lower-quality ones. This skill can be developed using supervised fine-tuning with extensive human preference data. However, obtaining a large volume of expert-annotated data is costly for most tasks. In this paper, we explore a novel method to optimize LLMs using ranking metrics. This method trains the model to prioritize the best responses from a pool of candidates created for a particular task. Rather than a traditional full ordering, we advocate for a partial ordering, as achieving consensus on the perfect order of candidate responses can be challenging. Our partial ordering is more robust, less sensitive to noise, and can be achieved with limited human annotations or through heuristic methods. We test our system’s improved response generation ability using benchmark datasets, including textual entailment and multi-document question answering. We conduct ablation studies to understand crucial factors, such as how to gather candidate responses for a specific task, determine their most suitable order, and balance supervised fine-tuning with ranking metrics. Our approach, named RESCUE, offers a promising avenue for enhancing the response generation and task accuracy of LLMs.</abstract>
       <url hash="3cbb59e1">2024.acl-srw.32</url>
@@ -14174,7 +14174,7 @@
       <author><first>James</first><last>O’Doherty</last></author>
       <author><first>Cian</first><last>Nolan</last></author>
       <author><first>Yufang</first><last>Hou</last><affiliation>Technische Universität Darmstadt and IBM Research Ireland</affiliation></author>
-      <author><first>Anya</first><last>Belz</last><affiliation>Dublin City University</affiliation></author>
+      <author id="anja-belz"><first>Anya</first><last>Belz</last><affiliation>Dublin City University</affiliation></author>
       <pages>358-377</pages>
       <abstract>The biomedical field relies on cost and time intensive systematic reviews of papers to enable practitioners to keep up to date with research. Impressive recent advances in large language models (LLMs) have made the task of automating at least part of the systematic review process feasible, but progress is slow. This paper identifies some factors that may have been holding research back, and proposes a new, enhanced dataset and prompting-based method for automatic synthesis generation, the most challenging step for automation. We test different models and types of information from and about biomedical studies for their usefulness in obtaining high-quality results.We find that, surprisingly, inclusion of paper abstracts can worsens results. Instead, study summary information, and system instructions informed by domain knowledge, are key to producing high-quality syntheses.</abstract>
       <url hash="8aac3af1">2024.acl-srw.42</url>
@@ -14300,7 +14300,7 @@
     <paper id="55">
       <title>Automatically Suggesting Diverse Example Sentences for <fixed-case>L</fixed-case>2 <fixed-case>J</fixed-case>apanese Learners Using Pre-Trained Language Models</title>
       <author><first>Enrico</first><last>Benedetti</last></author>
-      <author><first>Akiko</first><last>Aizawa</last><affiliation>NII, Tokyo Institute of Technology</affiliation></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last><affiliation>NII, Tokyo Institute of Technology</affiliation></author>
       <author><first>Florian</first><last>Boudin</last><affiliation>University of Nantes</affiliation></author>
       <pages>517-534</pages>
       <abstract>Providing example sentences that are diverse and aligned with learners’ proficiency levels is essential for fostering effective language acquisition.This study examines the use of Pre-trained Language Models (PLMs) to produce example sentences targeting L2 Japanese learners.We utilize PLMs in two ways: as quality scoring components in a retrieval system that draws from a newly curated corpus of Japanese sentences, and as direct sentence generators using zero-shot learning.We evaluate the quality of sentences by considering multiple aspects such as difficulty, diversity, and naturalness, with a panel of raters consisting of learners of Japanese, native speakers – and GPT-4.Our findings suggest that there is inherent disagreement among participants on the ratings of sentence qualities, except for difficulty. Despite that, the retrieval approach was preferred by all evaluators, especially for beginner and advanced target proficiency, while the generative approaches received lower scores on average.Even so, our experiments highlight the potential for using PLMs to enhance the adaptability of sentence suggestion systems and therefore improve the language learning journey.</abstract>
@@ -14325,7 +14325,7 @@
       <author><first>Mo</first><last>Yu</last><affiliation>WeChat AI, Tencent</affiliation></author>
       <author><first>Fandong</first><last>Meng</last><affiliation>WeChat AI, Tencent Inc.</affiliation></author>
       <author><first>Huawei</first><last>Shen</last><affiliation>Institute of Computing Technology, Chinese Academy of Sciences</affiliation></author>
-      <author><first>Xueqi</first><last>Cheng</last><affiliation>, Chinese Academy of Sciences</affiliation></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last><affiliation>, Chinese Academy of Sciences</affiliation></author>
       <author><first>Jie</first><last>Zhou</last></author>
       <pages>543-558</pages>
       <abstract>Retrieving relevant plots from the book for a query is a critical task, which can improve the reading experience and efficiency of readers. Readers usually only give an abstract and vague description as the query based on their own understanding, summaries, or speculations of the plot, which requires the retrieval model to have a strong ability to estimate the abstract semantic associations between the query and candidate plots. However, existing information retrieval (IR) datasets cannot reflect this ability well. In this paper, we propose PlotRetrieval, a labeled dataset to train and evaluate the performance of IR models on the novel task Plot Retrieval. Text pairs in PlotRetrieval have less word overlap and more abstract semantic association, which can reflect the ability of the IR models to estimate the abstract semantic association, rather than just traditional lexical or semantic matching. Extensive experiments across various lexical retrieval, sparse retrieval, dense retrieval, and cross-encoder methods compared with human studies on PlotRetrieval show current IR models still struggle in capturing abstract semantic association between texts. PlotRetrieval can be the benchmark for further research on the semantic association modeling ability of IR models.</abstract>
@@ -14364,7 +14364,7 @@
       <author><first>Shaonan</first><last>Wang</last></author>
       <author><first>Zijiao</first><last>Chen</last></author>
       <author><first>Jixing</first><last>Li</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>1-2</pages>
       <abstract>Computational linguistics (CL) has witnessed tremendous advancements in recent years, with models such as large language models demonstrating exceptional performance in various natural language processing tasks. These advancements highlight their potential to help understand brain language processing, especially through the lens of brain encoding and decoding. Brain encoding involves the mapping of linguistic stimuli to brain activity, while brain decoding is the process of reconstructing linguistic stimuli from observed brain activities. CL models that excel at capturing and manipulating linguistic features are crucial for mapping linguistic stimuli to brain activities and vice versa. Brain encoding and decoding have vast applications, from enhancing human-computer interaction to developing assistive technologies for individuals with communication impairments. This tutorial will focus on elucidating how computational linguistics can facilitate brain encoding and decoding. We will delve into the principles and practices of using computational linguistics methods for brain encoding and decoding. We will also discuss the challenges and future directions of brain encoding and decoding. Through this tutorial, we aim to provide a comprehensive and informative overview of the intersection between computational linguistics and cognitive neuroscience, inspiring future research in this exciting and rapidly evolving field.</abstract>
       <url hash="9dda3d29">2024.acl-tutorials.1</url>
@@ -14404,8 +14404,8 @@
     <paper id="4">
       <title>Presentation Matters: How to Communicate Science in the <fixed-case>NLP</fixed-case> Venues and in the Wild</title>
       <author><first>Sarvnaz</first><last>Karimi</last></author>
-      <author><first>Cecile</first><last>Paris</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="cecile-paris"><first>Cecile</first><last>Paris</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>6-7</pages>
       <abstract>Each year a large number of early career researchers join the NLP/Computational Linguistics community, with most starting by presenting their research in the *ACL conferences and workshops. While writing a paper that has made it to these venues is one important step, what comes with communicating the outcome is equally important and sets the path to impact of a research outcome. In addition, not all PhD candidates get the chance of being trained for their presentation skills. Research methods courses are not all of the same quality and may not cover scientific communications, and certainly not all are tailored to the NLP community. We are proposing an introductory tutorial that covers a range of different communication skills, including writing, oral presentation (posters and demos), and social media presence. This is to fill in the gap for the researchers who may not have access to research methods courses or other mentors who could help them acquire such skills. The interactive nature of such a tutorial would allow attendees to ask questions and clarifications which would not be possible from reading materials alone.</abstract>
       <url hash="c30f2e87">2024.acl-tutorials.4</url>
diff --git a/data/xml/2024.alta.xml b/data/xml/2024.alta.xml
index 3a5d59dbfa..03e554df4d 100644
--- a/data/xml/2024.alta.xml
+++ b/data/xml/2024.alta.xml
@@ -3,8 +3,8 @@
   <volume id="1" ingest-date="2025-01-29" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 22nd Annual Workshop of the Australasian Language Technology Association</booktitle>
-      <editor><first>Tim</first><last>Baldwin</last></editor>
-      <editor><first>Sergio José</first><last>Rodríguez Méndez</last></editor>
+      <editor id="timothy-baldwin"><first>Tim</first><last>Baldwin</last></editor>
+      <editor id="sergio-jose-rodriguez-mendez"><first>Sergio José</first><last>Rodríguez Méndez</last></editor>
       <editor><first>Nicholas</first><last>Kuo</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Canberra, Australia</address>
@@ -30,7 +30,7 @@
       <title>Do <fixed-case>LLM</fixed-case>s Generate Creative and Visually Accessible Data visualisations?</title>
       <author><first>Clarissa</first><last>Miranda-Pena</last></author>
       <author><first>Andrew</first><last>Reeson</last></author>
-      <author><first>Cécile</first><last>Paris</last></author>
+      <author id="cecile-paris"><first>Cécile</first><last>Paris</last></author>
       <author><first>Josiah</first><last>Poon</last></author>
       <author><first>Jonathan K.</first><last>Kummerfeld</last></author>
       <pages>12-29</pages>
@@ -80,11 +80,11 @@
     <paper id="7">
       <title>Simultaneous Machine Translation with Large Language Models</title>
       <author><first>Minghan</first><last>Wang</last></author>
-      <author><first>Thuy-Trang</first><last>Vu</last></author>
+      <author id="thuy-vu"><first>Thuy-Trang</first><last>Vu</last></author>
       <author><first>Jinming</first><last>Zhao</last></author>
       <author><first>Fatemeh</first><last>Shiri</last></author>
       <author><first>Ehsan</first><last>Shareghi</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>89-103</pages>
       <abstract>Real-world simultaneous machine translation (SimulMT) systems face more challenges than just the quality-latency trade-off. They also need to address issues related to robustness with noisy input, processing long contexts, and flexibility for knowledge injection. These challenges demand models with strong language understanding and generation capabilities which may not often equipped by dedicated MT models. In this paper, we investigate the possibility of applying Large Language Models (LLM) to SimulMT tasks by using existing incremental-decoding methods with a newly proposed RALCP algorithm for latency reduction. We conducted experiments using the Llama2-7b-chat model on nine different languages from the MUST-C dataset. The results show that LLM outperforms dedicated MT models in terms of BLEU and LAAL metrics. Further analysis indicates that LLM has advantages in terms of tuning efficiency and robustness. However, it is important to note that the computational cost of LLM remains a significant obstacle to its application in SimulMT.</abstract>
       <url hash="326b94e8">2024.alta-1.7</url>
@@ -115,7 +115,7 @@
     <paper id="10">
       <title>Lesser the Shots, Higher the Hallucinations: Exploration of Genetic Information Extraction using Generative Large Language Models</title>
       <author><first>Milindi</first><last>Kodikara</last></author>
-      <author><first>Karin</first><last>Verspoor</last></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last></author>
       <pages>130-145</pages>
       <abstract>Organisation of information about genes, genetic variants, and associated diseases from vast quantities of scientific literature texts through automated information extraction (IE) strategies can facilitate progress in personalised medicine. We systematically evaluate the performance of generative large language models (LLMs) on the extraction of specialised genetic information, focusing on end-to-end IE encompassing both named entity recognition and relation extraction. We experiment across multilingual datasets with a range of instruction strategies, including zero-shot and few-shot prompting along with providing an annotation guideline. Optimal results are obtained with few-shot prompting. However, we also identify that generative LLMs failed to adhere to the instructions provided, leading to over-generation of entities and relations. We therefore carefully examine the effect of learning paradigms on the extent to which genetic entities are fabricated, and the limitations of exact matching to determine performance of the model.</abstract>
       <url hash="9e0f73d0">2024.alta-1.10</url>
@@ -185,7 +185,7 @@
     </paper>
     <paper id="17">
       <title>Overview of the 2024 <fixed-case>ALTA</fixed-case> Shared Task: Detect Automatic <fixed-case>AI</fixed-case>-Generated Sentences for Human-<fixed-case>AI</fixed-case> Hybrid Articles</title>
-      <author><first>Diego</first><last>Mollá</last></author>
+      <author id="diego-molla"><first>Diego</first><last>Mollá</last></author>
       <author><first>Qiongkai</first><last>Xu</last></author>
       <author><first>Zijie</first><last>Zeng</last></author>
       <author><first>Zhuang</first><last>Li</last></author>
diff --git a/data/xml/2024.alvr.xml b/data/xml/2024.alvr.xml
index 487ad2f4e3..f9a6e89ad7 100644
--- a/data/xml/2024.alvr.xml
+++ b/data/xml/2024.alvr.xml
@@ -23,7 +23,7 @@
     <paper id="1">
       <title><fixed-case>WISMIR</fixed-case>3: A Multi-Modal Dataset to Challenge Text-Image Retrieval Approaches</title>
       <author><first>Florian</first><last>Schneider</last><affiliation>Universität Hamburg</affiliation></author>
-      <author><first>Chris</first><last>Biemann</last><affiliation>U Hamburg</affiliation></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last><affiliation>U Hamburg</affiliation></author>
       <pages>1-6</pages>
       <abstract>This paper presents WISMIR3, a multi-modal dataset comprising roughly 300K text-image pairs from Wikipedia. With a sophisticated automatic ETL pipeline, we scraped, filtered, and transformed the data so that WISMIR3 intrinsically differs from other popular text-image datasets like COCO and Flickr30k. We prove this difference by comparing various linguistic statistics between the three datasets computed using the pipeline. The primary purpose of WISMIR3 is to use it as a benchmark to challenge state-of-the-art text-image retrieval approaches, which already reach around 90% Recall@5 scores on the mentioned popular datasets. Therefore, we ran several text-image retrieval experiments on our dataset using current models, which show that the models, in fact, perform significantly worse compared to evaluation results on COCO and Flickr30k. In addition, for each text-image pair, we release features computed by Faster-R-CNN and CLIP models. With this, we want to ease and motivate the use of the dataset for other researchers.</abstract>
       <url hash="7eb44789">2024.alvr-1.1</url>
@@ -226,7 +226,7 @@
       <title><fixed-case>V</fixed-case>erb<fixed-case>CLIP</fixed-case>: Improving Verb Understanding in Vision-Language Models with Compositional Structures</title>
       <author><first>Hadi</first><last>Wazni</last></author>
       <author><first>Kin Ian</first><last>Lo</last><affiliation>University College London, University of London</affiliation></author>
-      <author><first>Mehrnoosh</first><last>Sadrzadeh</last><affiliation>University College London</affiliation></author>
+      <author id="mehrnoosh-sadrzadeh"><first>Mehrnoosh</first><last>Sadrzadeh</last><affiliation>University College London</affiliation></author>
       <pages>195-201</pages>
       <abstract>Verbs describe the dynamics of interactions between people, objects, and their environments. They play a crucial role in language formation and understanding. Nonetheless, recent vision-language models like CLIP predominantly rely on nouns and have a limited account of verbs. This limitation affects their performance in tasks requiring action recognition and scene understanding. In this work, we introduce VerbCLIP, a verb-centric vision-language model which learns meanings of verbs based on a compositional approach to statistical machine learning. Our methods significantly outperform CLIP in zero-shot performance on the VALSE, VL-Checklist, and SVO-Probes datasets, with improvements of +2.38%, +3.14%, and +1.47%, without fine-tuning. Fine-tuning resulted in further improvements, with gains of +2.85% and +9.2% on the VALSE and VL-Checklist datasets.</abstract>
       <url hash="a21f8202">2024.alvr-1.17</url>
diff --git a/data/xml/2024.americasnlp.xml b/data/xml/2024.americasnlp.xml
index f71958e450..7e55fb0625 100644
--- a/data/xml/2024.americasnlp.xml
+++ b/data/xml/2024.americasnlp.xml
@@ -6,10 +6,10 @@
       <editor><first>Manuel</first><last>Mager</last></editor>
       <editor><first>Abteen</first><last>Ebrahimi</last></editor>
       <editor><first>Shruti</first><last>Rijhwani</last></editor>
-      <editor><first>Arturo</first><last>Oncevay</last></editor>
+      <editor id="arturo-oncevay"><first>Arturo</first><last>Oncevay</last></editor>
       <editor><first>Luis</first><last>Chiruzzo</last></editor>
       <editor><first>Robert</first><last>Pugh</last></editor>
-      <editor><first>Katharina</first><last>von der Wense</last></editor>
+      <editor id="katharina-von-der-wense"><first>Katharina</first><last>von der Wense</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Mexico City, Mexico</address>
       <month>June</month>
@@ -74,10 +74,10 @@
     </paper>
     <paper id="5">
       <title>Comparing <fixed-case>LLM</fixed-case> prompting with Cross-lingual transfer performance on Indigenous and Low-resource <fixed-case>B</fixed-case>razilian Languages</title>
-      <author><first>David Ifeoluwa</first><last>Adelani</last><affiliation>University College London</affiliation></author>
+      <author id="david-ifeoluwa-adelani"><first>David Ifeoluwa</first><last>Adelani</last><affiliation>University College London</affiliation></author>
       <author><first>A. Seza</first><last>Doğruöz</last><affiliation>Universiteit Gent</affiliation></author>
       <author><first>André</first><last>Coneglian</last><affiliation>Federal university of Minas gerais</affiliation></author>
-      <author><first>Atul Kr.</first><last>Ojha</last><affiliation>Data Science Institute, Unit for Linguistic Data, University of Galway</affiliation></author>
+      <author id="atul-kr-ojha"><first>Atul Kr.</first><last>Ojha</last><affiliation>Data Science Institute, Unit for Linguistic Data, University of Galway</affiliation></author>
       <pages>34-41</pages>
       <abstract>Large Language Models are transforming NLP for a lot of tasks. However, how LLMs perform NLP tasks for LRLs is less explored. In alliance with the theme track of the NAACL’24, we focus on 12 low-resource languages (LRLs) from Brazil, 2 LRLs from Africa and 2 high-resource languages (HRLs) (e.g., English and Brazilian Portuguese). Our results indicate that the LLMs perform worse for the labeling of LRLs in comparison to HRLs in general. We explain the reasons behind this failure and provide an error analyses through examples from 2 Brazilian LRLs.</abstract>
       <url hash="6c6f245b">2024.americasnlp-1.5</url>
@@ -174,7 +174,7 @@
       <title>Wav2pos: Exploring syntactic analysis from audio for <fixed-case>H</fixed-case>ighland <fixed-case>P</fixed-case>uebla <fixed-case>N</fixed-case>ahuatl</title>
       <author><first>Robert</first><last>Pugh</last><affiliation>Indiana University</affiliation></author>
       <author><first>Varun</first><last>Sreedhar</last><affiliation>Indiana University</affiliation></author>
-      <author><first>Francis</first><last>Tyers</last><affiliation>Indiana University</affiliation></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last><affiliation>Indiana University</affiliation></author>
       <pages>121-126</pages>
       <abstract>We describe an approach to part-of-speech tagging from audio with very little human-annotated data, for Highland Puebla Nahuatl, a low-resource language of Mexico. While automatic morphosyntactic analysis is typically trained on annotated textual data, large amounts of text is rarely available for low-resource, marginalized, and/or minority languages, and morphosyntactically-annotated data is even harder to come by. Much of the data from these languages may exist in the form of recordings, often only partially-transcribed or analyzed by field linguists working on language documentation projects. Given this relatively low-availability of text in the low-resource language scenario, we explore end-to-end automated morphosyntactic analysis directly from audio. The experiments described in this paper focus on one piece of morphosyntax, part-of-speech tagging, and builds on existing work in a high-resource setting. We use weak supervision to increase training volume, and explore a few techniques for generating word-level predictions from the acoustic features. Our experiments show promising results, despite less than 400 sentences of audio-aligned, manually-labeled text.</abstract>
       <url hash="32285a23">2024.americasnlp-1.13</url>
@@ -196,7 +196,7 @@
       <title>Enenlhet as a case-study to investigate <fixed-case>ASR</fixed-case> model generalizability for language documentation</title>
       <author><first>Éric</first><last>Le Ferrand</last><affiliation>Boston College</affiliation></author>
       <author><first>Raina</first><last>Heaton</last><affiliation>University of Oklahoma</affiliation></author>
-      <author><first>Emily</first><last>Prud’hommeaux</last><affiliation>Boston College</affiliation></author>
+      <author id="emily-prudhommeaux"><first>Emily</first><last>Prud’hommeaux</last><affiliation>Boston College</affiliation></author>
       <pages>132-137</pages>
       <abstract>Although both linguists and language community members recognize the potential utility of automatic speech recognition (ASR) for documentation, one of the obstacles to using these technologies is the scarcity of data necessary to train effective systems. Recent advances in ASR, particularly the ability to fine-tune large multilingual acoustic models to small amounts of data from a new language, have demonstrated the potential of ASR for transcription. However, many proof-of-concept demonstrations of ASR in low-resource settings rely on a single data collection project, which may yield models that are biased toward that particular data scenario, whether in content, recording quality, transcription conventions, or speaker population. In this paper, we investigate the performance of two state-of-the art ASR architectures for fine-tuning acoustic models to small speech datasets with the goal of transcribing recordings of Enenlhet, an endangered Indigenous language spoken in South America. Our results suggest that while ASR offers utility for generating first-pass transcriptions of speech collected in the course of linguistic fieldwork, individual vocabulary diversity and data quality have an outsized impact on ASR accuracy.</abstract>
       <url hash="69f1c171">2024.americasnlp-1.15</url>
@@ -232,7 +232,7 @@
       <author><first>Joseph</first><last>Attieh</last><affiliation>University of Helsinki</affiliation></author>
       <author><first>Zachary</first><last>Hopton</last><affiliation>University of Zurich</affiliation></author>
       <author><first>Yves</first><last>Scherrer</last><affiliation>University of Oslo</affiliation></author>
-      <author><first>Tanja</first><last>Samardžić</last><affiliation>University of Zurich</affiliation></author>
+      <author id="tanja-samardzic"><first>Tanja</first><last>Samardžić</last><affiliation>University of Zurich</affiliation></author>
       <pages>150-158</pages>
       <abstract>This paper presents the system description of the NordicsAlps team for the AmericasNLP 2024 Machine Translation Shared Task 1. We investigate the effect of tokenization on translation quality by exploring two different tokenization schemes: byte-level and redundancy-driven tokenization. We submitted three runs per language pair. The redundancy-driven tokenization ranked first among all submissions, scoring the highest average chrF2++, chrF, and BLEU metrics (averaged across all languages). These findings demonstrate the importance of carefully tailoring the tokenization strategies of machine translation systems, particularly in resource-constrained scenarios.</abstract>
       <url hash="a74fe49e">2024.americasnlp-1.18</url>
@@ -274,8 +274,8 @@
       <author><first>Jim</first><last>Su</last><affiliation>University of Florida</affiliation></author>
       <author><first>Justin</first><last>Ho</last><affiliation>University of Florida</affiliation></author>
       <author><first>George</first><last>Broadwell</last><affiliation>University of Florida</affiliation></author>
-      <author><first>Sarah</first><last>Moeller</last><affiliation>University of Florida</affiliation></author>
-      <author><first>Bonnie</first><last>Dorr</last><affiliation>University of Florida</affiliation></author>
+      <author id="sarah-moeller"><first>Sarah</first><last>Moeller</last><affiliation>University of Florida</affiliation></author>
+      <author id="bonnie-dorr"><first>Bonnie</first><last>Dorr</last><affiliation>University of Florida</affiliation></author>
       <pages>179-187</pages>
       <abstract>This paper presents our submission to the AmericasNLP 2024 Shared Task on the Creation of Educational Materials for Indigenous Languages. We frame this task as one of morphological inflection generation, treating each sentence as a single word. We investigate and compare two distinct approaches: fine-tuning neural encoder-decoder models such as NLLB- 200, and in-context learning with proprietary large language models (LLMs). Our findings demonstrate that for this task, no one approach is perfect. Anthropic’s Claude 3 Opus, when supplied with grammatical description entries, achieves the highest performance on Bribri among the evaluated models. This outcome corroborates and extends previous research exploring the efficacy of in-context learning in low- resource settings. For Maya, fine-tuning NLLB- 200-3.3B using StemCorrupt augmented data yielded the best performance.</abstract>
       <url hash="a7db5b8b">2024.americasnlp-1.21</url>
@@ -367,7 +367,7 @@
     <paper id="28">
       <title>Findings of the <fixed-case>A</fixed-case>mericas<fixed-case>NLP</fixed-case> 2024 Shared Task on Machine Translation into Indigenous Languages</title>
       <author><first>Abteen</first><last>Ebrahimi</last><affiliation>University of Colorado, Boulder</affiliation></author>
-      <author><first>Ona</first><last>de Gibert</last><affiliation>University of Helsinki</affiliation></author>
+      <author id="ona-de-gibert"><first>Ona</first><last>de Gibert</last><affiliation>University of Helsinki</affiliation></author>
       <author><first>Raul</first><last>Vazquez</last><affiliation>University of Helsinki</affiliation></author>
       <author><first>Rolando</first><last>Coto-Solano</last><affiliation>Dartmouth College</affiliation></author>
       <author><first>Pavel</first><last>Denisov</last><affiliation>University of Stuttgart</affiliation></author>
diff --git a/data/xml/2024.amta.xml b/data/xml/2024.amta.xml
index eb963f6e25..66ed42d34d 100644
--- a/data/xml/2024.amta.xml
+++ b/data/xml/2024.amta.xml
@@ -38,7 +38,7 @@
       <author><first>Vipin</first><last>Vijayan</last></author>
       <author><first>Braeden</first><last>Bowen</last></author>
       <author><first>Scott</first><last>Grigsby</last></author>
-      <author><first>Timothy</first><last>Anderson</last></author>
+      <author id="tim-anderson"><first>Timothy</first><last>Anderson</last></author>
       <author><first>Jeremy</first><last>Gwinnup</last></author>
       <pages>14–28</pages>
       <abstract>While most current work in multimodal machine translation (MMT) uses the Multi30k dataset for training and evaluation, we find that the resulting models overfit to the Multi30k dataset to an extreme degree. Consequently, these models perform very badly when evaluated against typical text-only testing sets such as the newstest datasets. In order to perform well on both Multi30k and typical text-only datasets, we use a performant text-only machine translation (MT) model as the starting point of our MMT model. We add vision-text adapter layers connected via gating mechanisms to the MT model, and incrementally transform the MT model into an MMT model by 1) pre-training using vision-based masking of the source text and 2) fine-tuning on Multi30k. We achieve a state-of-the-art performance on the Multi30k 2016 en-de test set of 46.5 BLEU4 score and 0.61 CoMMuTE score via this approach while retaining the performance of the original text-only MT model against the newstest dataset.</abstract>
@@ -50,7 +50,7 @@
       <author><first>Braeden</first><last>Bowen</last></author>
       <author><first>Vipin</first><last>Vijayan</last></author>
       <author><first>Scott</first><last>Grigsby</last></author>
-      <author><first>Timothy</first><last>Anderson</last></author>
+      <author id="tim-anderson"><first>Timothy</first><last>Anderson</last></author>
       <author><first>Jeremy</first><last>Gwinnup</last></author>
       <pages>29–38</pages>
       <abstract>The challenge of visual grounding and masking in multimodal machine translation (MMT) systems has encouraged varying approaches to the detection and selection of visually-grounded text tokens for masking. We introduce new methods for detection of visually and contextually relevant (concrete) tokens from source sentences, including detection with natural language processing (NLP), detection with object detection, and a joint detection-verification technique. We also introduce new methods for selection of detected tokens, including shortest n tokens, longest n tokens, and all detected concrete tokens. We utilize the GRAM MMT architecture to train models against synthetically collated multimodal datasets of source images with masked sentences, showing performance improvements and improved usage of visual context during translation tasks over the baseline model.</abstract>
@@ -70,7 +70,7 @@
       <title>On Translating Technical Terminology: A Translation Workflow for Machine-Translated Acronyms</title>
       <author><first>Richard</first><last>Yue</last></author>
       <author><first>John</first><last>Ortega</last></author>
-      <author><first>Kenneth</first><last>Church</last></author>
+      <author id="kenneth-church"><first>Kenneth</first><last>Church</last></author>
       <pages>48–54</pages>
       <abstract>The typical workflow for a professional translator to translate a document from its source language (SL) to a target language (TL) is not always focused on what many language models in natural language processing (NLP) do - predict the next word in a series of words. While high-resource languages like English and French are reported to achieve near human parity using common metrics for measurement such as BLEU and COMET, we find that an important step is being missed: the translation of technical terms, specifically acronyms. Some state-of-the art machine translation systems like Google Translate which are publicly available can be erroneous when dealing with acronyms - as much as 50% in our findings. This article addresses acronym disambiguation for MT systems by proposing an additional step to the SL-TL (FR-EN) translation workflow where we first offer a new acronym corpus for public consumption and then experiment with a search-based thresholding algorithm that achieves nearly 10% increase when compared to Google Translate and OpusMT.</abstract>
       <url hash="358ce0ce">2024.amta-research.6</url>
@@ -90,7 +90,7 @@
       <author><first>Raj</first><last>Dabre</last></author>
       <author><first>Haiyue</first><last>Song</last></author>
       <author><first>Miriam</first><last>Exel</last></author>
-      <author><first>Bianka</first><last>Buschbeck</last></author>
+      <author id="bianka-buschbeck"><first>Bianka</first><last>Buschbeck</last></author>
       <author><first>Johannes</first><last>Eschbach-Dymanus</last></author>
       <author><first>Hideki</first><last>Tanaka</last></author>
       <pages>73–87</pages>
@@ -117,7 +117,7 @@
       <author><first>Michel</first><last>Simard</last></author>
       <author><first>Marc A</first><last>Tessier</last></author>
       <author><first>Gabriel</first><last>Bernier-Colborne</last></author>
-      <author><first>Cyril</first><last>Goutte</last></author>
+      <author id="cyril-goutte"><first>Cyril</first><last>Goutte</last></author>
       <author><first>Chi-kiu</first><last>Lo</last></author>
       <pages>102–118</pages>
       <abstract>In long-term translation projects, like Parliamentary text, there is a desire to build machine translation systems that can adapt to changes over time. We implement and examine a simple approach to continual learning for neural machine translation, exploring tradeoffs between consistency, the model’s ability to learn from incoming data, and the time a client would need to wait to obtain a newly trained translation system.</abstract>
@@ -154,7 +154,7 @@
     <paper id="14">
       <title>Enhancing Translation Quality by Leveraging Semantic Diversity in Multimodal Machine Translation</title>
       <author><first>Ali</first><last>Hatami</last></author>
-      <author><first>Mihael</first><last>Arcan</last></author>
+      <author id="mihael-arcan"><first>Mihael</first><last>Arcan</last></author>
       <author><first>Paul</first><last>Buitelaar</last></author>
       <pages>154–166</pages>
       <abstract>Despite advancements in neural machine translation, word sense disambiguation remains challenging, particularly with limited textual context. Multimodal Machine Translation enhances text-only models by integrating visual information, but its impact varies across translations. This study focuses on ambiguous sentences to investigate the effectiveness of utilizing visual information. By prioritizing these sentences, which benefit from visual cues, we aim to enhance hybrid multimodal and text-only translation approaches. We utilize Latent Semantic Analysis and Sentence-BERT to extract context vectors from the British National Corpus, enabling the assessment of semantic diversity. Our approach enhances translation quality for English-German and English-French on Multi30k, assessed through metrics including BLEU, chrF2, and TER.</abstract>
@@ -242,7 +242,7 @@
   <volume id="presentations" ingest-date="2024-09-27" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 16th Conference of the Association for Machine Translation in the Americas (Volume 2: Presentations)</booktitle>
-      <editor><first>Marianna</first><last>Martindale</last></editor>
+      <editor id="marianna-martindale"><first>Marianna</first><last>Martindale</last></editor>
       <editor><first>Janice</first><last>Campbell</last></editor>
       <editor><first>Konstantin</first><last>Savenkov</last></editor>
       <editor><first>Shivali</first><last>Goel</last></editor>
@@ -300,8 +300,8 @@
       <title>The Multi-Range Theory of Translation Quality Measurement: <fixed-case>MQM</fixed-case> scoring models and Statistical Quality Control</title>
       <author><first>Arle</first><last>Lommel</last></author>
       <author><first>Serge</first><last>Gladkoff</last></author>
-      <author><first>Alan</first><last>Melby</last></author>
-      <author><first>Sue Ellen</first><last>Wright</last></author>
+      <author id="alan-k-melby"><first>Alan</first><last>Melby</last></author>
+      <author id="sue-ellen-wright"><first>Sue Ellen</first><last>Wright</last></author>
       <author><first>Ingemar</first><last>Strandvik</last></author>
       <author><first>Katerina</first><last>Gasova</last></author>
       <author><first>Angelika</first><last>Vaasa</last></author>
@@ -310,7 +310,7 @@
       <author><first>Monica</first><last>Foresi</last></author>
       <author><first>Johani</first><last>Innis</last></author>
       <author><first>Lifeng</first><last>Han</last></author>
-      <author><first>Goran</first><last>Nenadic</last></author>
+      <author id="goran-nenadic"><first>Goran</first><last>Nenadic</last></author>
       <pages>75–94</pages>
       <abstract>The year 2024 marks the 10th anniversary of the Multidimensional Quality Metrics (MQM) framework for analytic translation quality evaluation. The MQM error typology has been widely used by practitioners in the translation and localization industry and has served as the basis for many derivative projects. The annual Conference on Machine Translation (WMT) shared tasks on both human and automatic translation quality evaluations used the MQM error typology. The metric stands on two pillars: <i>error typology</i> and the <i>scoring model</i>. The scoring model calculates the quality score from annotation data, detailing how to convert error type and severity counts into numeric scores to determine if the content meets specifications. Previously, only the raw scoring model had been published. This April, the MQM Council published the <i>Linear Calibrated Scoring Model</i>, officially presented herein, along with the <i>Non-Linear Scoring Model</i>, which had not been published</abstract>
       <url hash="8a54a89b">2024.amta-presentations.6</url>
@@ -336,8 +336,8 @@
       <title><fixed-case>C</fixed-case>anton<fixed-case>MT</fixed-case>: <fixed-case>C</fixed-case>antonese-<fixed-case>E</fixed-case>nglish Neural Machine Translation Looking into Evaluations</title>
       <author><first>Kung Yin</first><last>Hong</last></author>
       <author><first>Lifeng</first><last>Han</last></author>
-      <author><first>Riza</first><last>Batista-Navarro</last></author>
-      <author><first>Goran</first><last>Nenadic</last></author>
+      <author id="riza-theresa-batista-navarro"><first>Riza</first><last>Batista-Navarro</last></author>
+      <author id="goran-nenadic"><first>Goran</first><last>Nenadic</last></author>
       <pages>133-144</pages>
       <abstract>Cantonese-English is a low-resource language pair for machine translation (MT) studies, despite the vast amount of English content publicly available online and the large amount of native Cantonese speakers. Based on our previous work on CANTONMT from Hong et al. (2024), where we created the open-source fine-tuned systems for Cantonese-English Neural MT (NMT) using base-models NLLB, OpusMT, and mBART and corpus collections and creation, in this paper, we report our extended experiments on model training and comparisons. In particular, we incorporated human-based evaluations using native Cantonese speakers who are also fluent in the English language. We designed a modified version of the HOPE metric from Gladkoff and Han (2022) for the categorised error analysis and serenity-level statistics (naming HOPES). The models selected for human evaluations are NLLB-mBART fine-tuned and two translators from commercial companies: Bing and GPT4.</abstract>
       <url hash="fc607f7b">2024.amta-presentations.9</url>
@@ -439,7 +439,7 @@
     </paper>
     <paper id="20">
       <title>Labels on Translation Output: a triple win</title>
-      <author><first>Alan</first><last>Melby</last></author>
+      <author id="alan-k-melby"><first>Alan</first><last>Melby</last></author>
       <pages>261–286</pages>
       <abstract>In the 2023 edition of the ASTM International translation standard (F2575) the labels BRT and UMT have been standardized. The Label BRT stands for ‘Bilingually Reviewed Translation, by a qualified language professional’. The Label UMT is for everything else, from raw machine translation to MT where only the target text is checked, to human translation that does not involve a qualified professional. Thus, UMT could be expanded as ‘Unreviewed or Missing-qualifications Translation’. This presentation will argue that the use of the labels BRT and UMT is a triple win: The ‘consumers’ (end users) of a translation win because they have useful information for risk analysis (harm from errors). MT developers win because they have useful metadata when selecting training material. And professional translators win by increasing their visibility to the public. The presentation will give a history of these two labels and enlist the help of the entire AMTA community in promoting their use.</abstract>
       <url hash="5fdf26c1">2024.amta-presentations.20</url>
diff --git a/data/xml/2024.arabicnlp.xml b/data/xml/2024.arabicnlp.xml
index 0588c627d9..ce63c6ce63 100644
--- a/data/xml/2024.arabicnlp.xml
+++ b/data/xml/2024.arabicnlp.xml
@@ -109,7 +109,7 @@
     <paper id="7">
       <title>Improving Language Models Trained on Translated Data with Continual Pre-Training and Dictionary Learning Analysis</title>
       <author><first>Sabri</first><last>Boughorbel</last><affiliation>Qatar Computing Research Institute</affiliation></author>
-      <author><first>Md Rizwan</first><last>Parvez</last><affiliation>Qatar Computing Research Institute and Bosch</affiliation></author>
+      <author id="md-rizwan-parvez"><first>Md Rizwan</first><last>Parvez</last><affiliation>Qatar Computing Research Institute and Bosch</affiliation></author>
       <author><first>Majd</first><last>Hawasly</last><affiliation>Qatar Computing Research Institute</affiliation></author>
       <pages>73-88</pages>
       <abstract>Training LLMs in low resources languages usually utilizes machine translation (MT) data augmentation from English language. However, translation brings a number of challenges: there are large costs attached to translating and curating huge amounts of content with high-end machine translation solutions; the translated content carries over cultural biases; and if the translation is not faithful and accurate, the quality of the data degrades causing issues in the trained model. In this work, we investigate the role of translation and synthetic data in training language models. We translate TinyStories, a dataset of 2.2M short stories for 3-4 year old children, from English to Arabic using the open NLLB-3B MT model. We train a number of story generation models of size 1M-33M parameters using this data. We identify a number of quality and task-specific issues in the resulting models. To rectify these issues, we further pre-train the models with a small dataset of synthesized high-quality stories generated by a capable LLM in Arabic, representing 1% of the original training data. We show, using GPT-4 as a judge and dictionary learning analysis from mechanistic interpretability, that the suggested approach is a practical means to resolve some of the translation pitfalls. We illustrate the improvement through case studies of linguistic and cultural bias issues.</abstract>
@@ -119,7 +119,7 @@
     </paper>
     <paper id="8">
       <title>A Context-Contrastive Inference Approach To Partial Diacritization</title>
-      <author><first>Muhammad</first><last>ElNokrashy</last><affiliation>Microsoft</affiliation></author>
+      <author id="muhammad-elnokrashy"><first>Muhammad</first><last>ElNokrashy</last><affiliation>Microsoft</affiliation></author>
       <author><first>Badr</first><last>AlKhamissi</last><affiliation>EPFL - EPF Lausanne</affiliation></author>
       <pages>89-101</pages>
       <abstract>Diacritization plays a pivotal role for meaning disambiguation and improving readability in Arabic texts. Efforts have long focused on marking every eligible character (Full Diacritization). Overlooked in comparison, Partial Diacritzation (‘PD‘) is the selection of a subset of characters to be annotated to aid comprehension only where needed. Research has indicated that excessive diacritic marks can hinder skilled readers—reducing reading speed and accuracy. We conduct a behavioral experiment and show that partially marked text is often easier to read than fully marked text, and sometimes easier than plain text. In this light, we introduce Context-Contrastive Partial Diacritization (‘CCPD‘)—a novel approach to ‘PD‘ which integrates seamlessly with existing Arabic diacritization systems. ‘CCPD‘ processes each word twice, once with context and once without, and diacritizes only the characters with disparities between the two inferences. Further, we introduce novel indicators for measuring partial diacritization quality to help establish this as a machine learning task. Lastly, we introduce ‘TD2‘, a Transformer-variant of an established model which offers a markedly different performance profile on our proposed indicators compared to all other known systems.</abstract>
@@ -215,7 +215,7 @@
       <title>Out-of-Domain Dependency Parsing for Dialects of <fixed-case>A</fixed-case>rabic: A Case Study</title>
       <author><first>Noor</first><last>Abo Mokh</last><affiliation>Indiana University</affiliation></author>
       <author><first>Daniel</first><last>Dakota</last><affiliation>Indiana University</affiliation></author>
-      <author><first>Sandra</first><last>Kübler</last><affiliation>Indiana University</affiliation></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last><affiliation>Indiana University</affiliation></author>
       <pages>170-182</pages>
       <abstract>We study dependency parsing for four Arabic dialects (Gulf, Levantine, Egyptian, and Maghrebi). Since no syntactically annotated data exist for Arabic dialects, we train the parser on a Modern Standard Arabic (MSA) corpus, which creates an out-of-domain setting.We investigate methods to close the gap between the source (MSA) and target data (dialects), e.g., by training on syntactically similar sentences to the test data. For testing, we manually annotate a small data set from a dialectal corpus. We focus on parsing two linguistic phenomena, which are difficult to parse: Idafa and coordination. We find that we can improve results by adding in-domain MSA data while adding dialectal embeddings only results in minor improvements.</abstract>
       <url hash="5bff2cb7">2024.arabicnlp-1.16</url>
@@ -225,7 +225,7 @@
     <paper id="17">
       <title>Investigating Linguistic Features for <fixed-case>A</fixed-case>rabic <fixed-case>NLI</fixed-case></title>
       <author><first>Yasmeen</first><last>Bassas</last></author>
-      <author><first>Sandra</first><last>Kübler</last><affiliation>Indiana University at Bloomington</affiliation></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last><affiliation>Indiana University at Bloomington</affiliation></author>
       <pages>183-192</pages>
       <abstract>Native Language Identification (NLI) is concerned with predicting the native language of an author writing in a second language. We investigate NLI for Arabic, with a focus on the types of linguistic information given that Arabic is morphologically rich. We use the Arabic Learner Corpus (ALC) foro training and testing along with a linear SVM. We explore lexical, morpho-syntactic, and syntactic features. Results show that the best single type of information is character n-grams ranging from 2 to 6. Using this model, we achieve an accuracy of 61.84%, thus outperforming previous results (Ionesco, 2015) by 11.74% even though we use an additional 2 L1s. However, when using prefix and suffix sequences, we reach an accuracy of 53.95%, showing that an approximation of unlexicalized features still reaches solid results.</abstract>
       <url hash="43ecdfd3">2024.arabicnlp-1.17</url>
@@ -289,7 +289,7 @@
       <author><first>Salam</first><last>Khalifa</last><affiliation>State University of New York, Stony Brook</affiliation></author>
       <author><first>Abdelrahim</first><last>Qaddoumi</last></author>
       <author><first>Ellen</first><last>Broselow</last><affiliation>State University of New York at Stony Brook</affiliation></author>
-      <author><first>Owen</first><last>Rambow</last><affiliation>Stony Brook University</affiliation></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last><affiliation>Stony Brook University</affiliation></author>
       <pages>258-264</pages>
       <abstract>Learning morphophonological mappings between the spoken form of a language and its underlying morphological structures is crucial for enriching resources for morphologically rich languages like Arabic. In this work, we focus on Egyptian Arabic as our case study and explore the integration of linguistic knowledge with a neural transformer model. Our approach involves learning to correct the residual errors from hand-crafted rules to predict the spoken form from a given underlying morphological representation. We demonstrate that using a minimal set of rules, we can effectively recover errors even in very low-resource settings.</abstract>
       <url hash="15855d3e">2024.arabicnlp-1.22</url>
@@ -421,7 +421,7 @@
       <title>Upaya at <fixed-case>A</fixed-case>rabic<fixed-case>NLU</fixed-case> Shared-Task: <fixed-case>A</fixed-case>rabic Lexical Disambiguation using Large Language Models</title>
       <author><first>Pawan</first><last>Rajpoot</last></author>
       <author><first>Ashvini</first><last>Jindal</last></author>
-      <author><first>Ankur</first><last>Parikh</last><affiliation>International Institute of Information Technology Hyderabad</affiliation></author>
+      <author id="ankur-parikh"><first>Ankur</first><last>Parikh</last><affiliation>International Institute of Information Technology Hyderabad</affiliation></author>
       <pages>377-382</pages>
       <abstract>Disambiguating a word’s intended meaning(sense) in a given context is important in Nat-ural Language Understanding (NLU). WSDaims to determine the correct sense of ambigu-ous words in context. At the same time, LMD(a WSD variation) focuses on disambiguatinglocation mention. Both tasks are vital in Nat-ural Language Processing (NLP) and informa-tion retrieval, as they help correctly interpretand extract information from text. Arabic ver-sion is further challenging because of its mor-phological richness, encompassing a complexinterplay of roots, stems, and affixes. This pa-per describes our solutions to both tasks, em-ploying Llama3 and Cohere-based models un-der Zero-Shot Learning and Re-Ranking, re-spectively. Both the shared tasks were partof the second Arabic Natural Language Pro-cessing Conference co-located with ACL 2024.Overall, we achieved 1st rank in the WSD task(accuracy 78%) and 2nd rank in the LMD task(MRR@1 0.59)</abstract>
       <url hash="ae0eea33">2024.arabicnlp-1.32</url>
@@ -481,7 +481,7 @@
       <author><first>Hossam</first><last>Elkordi</last><affiliation>Alexandria University</affiliation></author>
       <author><first>Ahmed</first><last>Sakr</last><affiliation>Alexandria University</affiliation></author>
       <author><first>Marwan</first><last>Torki</last><affiliation>Alexandria University</affiliation></author>
-      <author><first>Nagwa</first><last>El-Makky</last></author>
+      <author id="nagwa-m-el-makky"><first>Nagwa</first><last>El-Makky</last></author>
       <pages>415-421</pages>
       <abstract>Arabic banking intent detection represents a challenging problem across multiple dialects. It imposes generalization difficulties due to the scarcity of Arabic language and its dialects resources compared to English. We propose a methodology that leverages contrastive training to overcome this limitation. We also augmented the data with several dialects using a translation model. Our experiments demonstrate the ability of our approach in capturing linguistic nuances across different Arabic dialects as well as accurately differentiating between banking intents across diverse linguistic landscapes. This would enhance multi-dialect banking services in the Arab world with limited Arabic language resources. Using our proposed method we achieved second place on subtask 1 leaderboard of the AraFinNLP2024 shared task with micro-F1 score of 0.8762 on the test split.</abstract>
       <url hash="51cef3bb">2024.arabicnlp-1.37</url>
@@ -527,7 +527,7 @@
       <author><first>Asmaa</first><last>Ramadan</last></author>
       <author><first>Manar</first><last>Amr</last></author>
       <author><first>Marwan</first><last>Torki</last><affiliation>Alexandria University</affiliation></author>
-      <author><first>Nagwa</first><last>El-Makky</last></author>
+      <author id="nagwa-m-el-makky"><first>Nagwa</first><last>El-Makky</last></author>
       <pages>441-445</pages>
       <abstract>Intent detection, also called intent classification or recognition, is an NLP technique to comprehend the purpose behind user utterances. This paper focuses on Multi-dialect Arabic intent detection in banking, utilizing the ArBanking77 dataset. Our method employs an ensemble of fine-tuned BERT-based models, integrating contrastive loss for training. To enhance generalization to diverse Arabic dialects, we augment the ArBanking77 dataset, originally in Modern Standard Arabic (MSA) and Palestinian, with additional dialects such as Egyptian, Moroccan, and Saudi, among others. Our approach achieved an F1-score of 0.8771, ranking first in subtask-1 of the AraFinNLP shared task 2024.</abstract>
       <url hash="3a67d000">2024.arabicnlp-1.41</url>
@@ -669,7 +669,7 @@
     <paper id="53">
       <title><fixed-case>A</fixed-case>lex<fixed-case>UNLP</fixed-case>-<fixed-case>MZ</fixed-case> at <fixed-case>A</fixed-case>r<fixed-case>AIE</fixed-case>val Shared Task: Contrastive Learning, <fixed-case>LLM</fixed-case> Features Extraction and Multi-Objective Optimization for <fixed-case>A</fixed-case>rabic Multi-Modal Meme Propaganda Detection</title>
       <author><first>Mohamed</first><last>Zaytoon</last><affiliation>Alexandria University</affiliation></author>
-      <author><first>Nagwa</first><last>El-Makky</last></author>
+      <author id="nagwa-m-el-makky"><first>Nagwa</first><last>El-Makky</last></author>
       <author><first>Marwan</first><last>Torki</last><affiliation>Alexandria University</affiliation></author>
       <pages>512-517</pages>
       <abstract>The rise of memes as a tool for spreading propaganda presents a significant challenge in the current digital environment. In this paper, we outline our work for the ArAIEval Shared Task2 in ArabicNLP 2024. This study introduces a method for identifying propaganda in Arabic memes using a multimodal system that combines textual and visual indicators to enhance the result. Our approach achieves the first place in text classification with Macro-F1 of 78.69%, the third place in image classification with Macro-F1 of 65.92%, and the first place in multimodal classification with Macro-F1 of 80.51%</abstract>
@@ -708,8 +708,8 @@
       <author><first>Nizar</first><last>Habash</last><affiliation>New York University Abu Dhabi</affiliation></author>
       <author><first>Houda</first><last>Bouamor</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Imed</first><last>Zitouni</last><affiliation>Google</affiliation></author>
-      <author><first>Mona</first><last>Diab</last><affiliation>Carnegie Mellon University</affiliation></author>
-      <author><first>Samhaa</first><last>El-Beltagy</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="samhaa-r-el-beltagy"><first>Samhaa</first><last>El-Beltagy</last></author>
       <author><first>Muhammed</first><last>AbuOdeh</last><affiliation>New York University, Abu Dhabi</affiliation></author>
       <pages>530-547</pages>
       <abstract>We present an overview of the FIGNEWSshared task, organized as part of the Arabic-NLP 2024 conference co-located with ACL2024. The shared task addresses bias and pro-paganda annotation in multilingual news posts.We focus on the early days of the Israel War onGaza as a case study. The task aims to fostercollaboration in developing annotation guide-lines for subjective tasks by creating frame-works for analyzing diverse narratives high-lighting potential bias and propaganda. In aspirit of fostering and encouraging diversity,we address the problem from a multilingualperspective, namely within five languages: En-glish, French, Arabic, Hebrew, and Hindi. Atotal of 17 teams participated in two annota-tion subtasks: bias (16 teams) and propaganda(6 teams). The teams competed in four evalua-tion tracks: guidelines development, annotationquality, annotation quantity, and consistency.Collectively, the teams produced 129,800 datapoints. Key findings and implications for thefield are discussed.</abstract>
@@ -852,7 +852,7 @@
       <title><fixed-case>BSC</fixed-case>-<fixed-case>LANGTECH</fixed-case> at <fixed-case>FIGNEWS</fixed-case> 2024 Shared Task: Exploring Semi-Automatic Bias Annotation using Frame Analysis</title>
       <author><first>Valle</first><last>Ruiz-Fernández</last></author>
       <author><first>José</first><last>Saiz</last><affiliation>Barcelona Supercomputing Center</affiliation></author>
-      <author><first>Aitor</first><last>Gonzalez-Agirre</last></author>
+      <author id="aitor-gonzalez-agirre"><first>Aitor</first><last>Gonzalez-Agirre</last></author>
       <pages>620-629</pages>
       <abstract>This paper introduces the methodology of BSC-LANGTECH team for the FIGNEWS 2024 Shared Task on News Media Narratives. Following the bias annotation subtask, we apply the theory and methods of framing analysis to develop guidelines to annotate bias in the corpus provided by the task organizators. The manual annotation of a subset, with which a moderate IAA agreement has been achieved, is further used in Deep Learning techniques to explore automatic annotation and test the reliability of our framework.</abstract>
       <url hash="4cdb8923">2024.arabicnlp-1.67</url>
@@ -1020,7 +1020,7 @@
       <title><fixed-case>A</fixed-case>lex<fixed-case>UNLP</fixed-case>-<fixed-case>STM</fixed-case> at <fixed-case>NADI</fixed-case> 2024 shared task: Quantifying the <fixed-case>A</fixed-case>rabic Dialect Spectrum with Contrastive Learning, Weighted Sampling, and <fixed-case>BERT</fixed-case>-based Regression Ensemble</title>
       <author><first>Abdelrahman</first><last>Sakr</last></author>
       <author><first>Marwan</first><last>Torki</last><affiliation>Alexandria University</affiliation></author>
-      <author><first>Nagwa</first><last>El-Makky</last></author>
+      <author id="nagwa-m-el-makky"><first>Nagwa</first><last>El-Makky</last></author>
       <pages>735-741</pages>
       <abstract>Recognizing the nuanced spectrum of dialectness in Arabic text poses a significant challenge for natural language processing (NLP) tasks. Traditional dialect identification (DI) methods treat the task as binary, overlooking the continuum of dialect variation present in Arabic speech and text. In this paper, we describe our submission to the NADI shared Task of ArabicNLP 2024. We participated in Subtask 2 - ALDi Estimation, which focuses on estimating the Arabic Level of Dialectness (ALDi) for Arabic text, indicating how much it deviates from Modern Standard Arabic (MSA) on a scale from 0 to 1, where 0 means MSA and 1 means high divergence from MSA. We explore diverse training approaches, including contrastive learning, applying a random weighted sampler along with fine-tuning a regression task based on the AraBERT model, after adding a linear and non-linear layer on top of its pooled output. Finally, performing a brute force ensemble strategy increases the performance of our system. Our proposed solution achieved a Root Mean Squared Error (RMSE) of 0.1406, ranking second on the leaderboard.</abstract>
       <url hash="f570740e">2024.arabicnlp-1.81</url>
@@ -1030,7 +1030,7 @@
     <paper id="82">
       <title><fixed-case>NLP</fixed-case>_<fixed-case>DI</fixed-case> at <fixed-case>NADI</fixed-case> 2024 shared task: Multi-label <fixed-case>A</fixed-case>rabic Dialect Classifications with an Unsupervised Cross-Encoder</title>
       <author><first>Vani</first><last>Kanjirangat</last><affiliation>Dalle Molle Institute for Artificial Intelligence USI-SUPSI</affiliation></author>
-      <author><first>Tanja</first><last>Samardzic</last><affiliation>University of Zurich</affiliation></author>
+      <author id="tanja-samardzic"><first>Tanja</first><last>Samardzic</last><affiliation>University of Zurich</affiliation></author>
       <author><first>Ljiljana</first><last>Dolamic</last><affiliation>armasuisse</affiliation></author>
       <author><first>Fabio</first><last>Rinaldi</last><affiliation>IDSIA</affiliation></author>
       <pages>742-747</pages>
@@ -1122,7 +1122,7 @@
       <title><fixed-case>ANLP</fixed-case> <fixed-case>RG</fixed-case> at <fixed-case>S</fixed-case>tance<fixed-case>E</fixed-case>val2024: Comparative Evaluation of Stance, Sentiment and Sarcasm Detection</title>
       <author><first>Mezghani</first><last>Amal</last><affiliation>Université Virtuelle de Tunis</affiliation></author>
       <author><first>Rahma</first><last>Boujelbane</last></author>
-      <author><first>Mariem</first><last>Ellouze</last></author>
+      <author id="mariem-ellouze-khemekhem"><first>Mariem</first><last>Ellouze</last></author>
       <pages>788-793</pages>
       <abstract>As part of our study, we worked on three tasks:stance detection, sarcasm detection and senti-ment analysis using fine-tuning techniques onBERT-based models. Fine-tuning parameterswere carefully adjusted over multiple iterationsto maximize model performance. The threetasks are essential in the field of natural lan-guage processing (NLP) and present uniquechallenges. Stance detection is a critical taskaimed at identifying a writer’s stances or view-points in relation to a topic. Sarcasm detectionseeks to spot sarcastic expressions, while senti-ment analysis determines the attitude expressedin a text. After numerous experiments, we iden-tified Arabert-twitter as the model offering thebest performance for all three tasks. In particu-lar, it achieves a macro F-score of 78.08% forstance detection, a macro F1-score of 59.51%for sarcasm detection and a macro F1-score of64.57% for sentiment detection. .Our source code is available at https://github.com/MezghaniAmal/Mawqif</abstract>
       <url hash="145e030e">2024.arabicnlp-1.90</url>
@@ -1186,7 +1186,7 @@
       <author><first>Mohamed</first><last>Badran</last></author>
       <author><first>Mo’men</first><last>Hamdy</last><affiliation>Alexandria University</affiliation></author>
       <author><first>Marwan</first><last>Torki</last><affiliation>Alexandria University</affiliation></author>
-      <author><first>Nagwa</first><last>El-Makky</last></author>
+      <author id="nagwa-m-el-makky"><first>Nagwa</first><last>El-Makky</last></author>
       <pages>823-827</pages>
       <abstract>Stance detection, an evolving task in natural language processing, involves understanding a writer’s perspective on certain topics by analyzing his written text and interactions online, especially on social media platforms. In this paper, we outline our submission to the StanceEval task, leveraging the Mawqif dataset featured in The Second Arabic Natural Language Processing Conference. Our task is to detect writers’ stances (Favor, Against, or None) towards three selected topics (COVID-19 vaccine, digital transformation, and women empowerment). We present our approach primarily relying on a contrastive loss ensemble strategy. Our proposed approach achieved an F1-score of 0.8438 and ranked first in the stanceEval 2024 task. The code and checkpoints are availableat https://github.com/MBadran2000/Mawqif.git</abstract>
       <url hash="05fecec6">2024.arabicnlp-1.96</url>
diff --git a/data/xml/2024.argmining.xml b/data/xml/2024.argmining.xml
index 272b37ed81..55069587ae 100644
--- a/data/xml/2024.argmining.xml
+++ b/data/xml/2024.argmining.xml
@@ -19,7 +19,7 @@
       <title><fixed-case>ARIES</fixed-case>: A General Benchmark for Argument Relation Identification</title>
       <author><first>Debela</first><last>Gemechu</last></author>
       <author><first>Ramon</first><last>Ruiz-Dolz</last><affiliation>University of Dundee</affiliation></author>
-      <author><first>Chris</first><last>Reed</last><affiliation>University of Dundee</affiliation></author>
+      <author id="chris-reed"><first>Chris</first><last>Reed</last><affiliation>University of Dundee</affiliation></author>
       <pages>1-14</pages>
       <abstract>Measuring advances in argument mining is one of the main challenges in the area. Different theories of argument, heterogeneous annotations, and a varied set of argumentation domains make it difficult to contextualise and understand the results reported in different work from a general perspective. In this paper, we present ARIES, a general benchmark for Argument Relation Identification aimed at providing with a standard evaluation for argument mining research. ARIES covers the three different language modelling approaches: sequence and token modelling, and sequence-to-sequence-to-sequence alignment, together with the three main Transformer-based model architectures: encoder-only, decoder-only, and encoder-decoder. Furthermore, the benchmark consists of eight different argument mining datasets, covering the most common argumentation domains, and standardised with the same annotation structures. This paper provides a first comprehensive and comparative set of results in argument mining across a broad range of configurations to compare with, both advancing the state-of-the-art, and establishing a standard way to measure future advances in the area. Across varied task setups and architectures, our experiments reveal consistent challenges in cross-dataset evaluation, with notably poor results. Given the models’ struggle to acquire transferable skills, the task remains challenging, opening avenues for future research.</abstract>
       <url hash="062a6bae">2024.argmining-1.1</url>
@@ -101,7 +101,7 @@
       <author><first>Ramon</first><last>Ruiz-Dolz</last><affiliation>Centre for Argument Technology (ARG-tech), University of Dundee</affiliation></author>
       <author><first>John</first><last>Lawrence</last><affiliation>Centre for Argument Technology (ARG-tech), University of Dundee</affiliation></author>
       <author><first>Ella</first><last>Schad</last><affiliation>Centre for Argument Technology (ARG-tech), University of Dundee</affiliation></author>
-      <author><first>Chris</first><last>Reed</last><affiliation>Centre for Argument Technology (ARG-tech), University of Dundee</affiliation></author>
+      <author id="chris-reed"><first>Chris</first><last>Reed</last><affiliation>Centre for Argument Technology (ARG-tech), University of Dundee</affiliation></author>
       <pages>83-92</pages>
       <abstract>Argumentation is the process by which humans rationally elaborate their thoughts and opinions in written (e.g., essays) or spoken (e.g., debates) contexts. Argument Mining research, however, has been focused on either written argumentation or spoken argumentation but without considering any additional information, e.g., speech acts and intentions. In this paper, we present an overview of DialAM-2024, the first shared task in dialogical argument mining, where argumentative relations and speech illocutions are modelled together in a unified framework. The task was divided into two different sub-tasks: the identification of propositional relations and the identification of illocutionary relations. Six different teams explored different methodologies to leverage both sources of information to reconstruct argument maps containing the locutions uttered in the speeches and the argumentative propositions implicit in them. The best performing team achieved an F1-score of 67.05% in the overall evaluation of the reconstruction of complete argument maps, considering both sub-tasks included in the DialAM-2024 shared task.</abstract>
       <url hash="8dd5e35e">2024.argmining-1.8</url>
@@ -150,7 +150,7 @@
       <author><first>Sirawut</first><last>Chaixanien</last><affiliation>Cornell University</affiliation></author>
       <author><first>Eugene</first><last>Choi</last><affiliation>Cornell University</affiliation></author>
       <author><first>Shaden</first><last>Shaar</last><affiliation>Cornell University</affiliation></author>
-      <author><first>Claire</first><last>Cardie</last><affiliation>Cornell University</affiliation></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last><affiliation>Cornell University</affiliation></author>
       <pages>119-123</pages>
       <abstract>In this paper we tackle the shared task DialAM-2024 aiming to annotate dialogue based on the inference anchoring theory (IAT). The task can be split into two parts, identification of propositional relations and identification of illocutionary relations. We propose a pipelined system made up of three parts: (1) locutionary-propositions relation detection, (2) propositional relations detection, and (3) illocutionary relations identification. We fine-tune models independently for each step, and combine at the end for the final system. Our proposed system ranks second overall compared to other participants in the shared task, scoring an average f1-score on both sub-parts of 63.7.</abstract>
       <url hash="a81b88bc">2024.argmining-1.12</url>
@@ -160,7 +160,7 @@
     <paper id="13">
       <title>Turiya at <fixed-case>D</fixed-case>ial<fixed-case>AM</fixed-case>-2024: Inference Anchoring Theory Based <fixed-case>LLM</fixed-case> Parsers</title>
       <author><first>Sougata</first><last>Saha</last><affiliation>State University of New York at Buffalo</affiliation></author>
-      <author><first>Rohini</first><last>Srihari</last><affiliation>State University of New York at Buffalo</affiliation></author>
+      <author id="rohini-k-srihari"><first>Rohini</first><last>Srihari</last><affiliation>State University of New York at Buffalo</affiliation></author>
       <pages>124-129</pages>
       <abstract>Representing discourse as argument graphs facilitates robust analysis. Although computational frameworks for constructing graphs from monologues exist, there is a lack of frameworks for parsing dialogue. Inference Anchoring Theory (IAT) is a theoretical framework for extracting graphical argument structures and relationships from dialogues. Here, we introduce computational models for implementing the IAT framework for parsing dialogues. We experiment with a classification-based biaffine parser and Large Language Model (LLM)-based generative methods and compare them. Our results demonstrate the utility of finetuning LLMs for constructing IAT-based argument graphs from dialogues, which is a nuanced task.</abstract>
       <url hash="eaace3c6">2024.argmining-1.13</url>
@@ -183,7 +183,7 @@
       <author><first>Robert</first><last>Günzler</last></author>
       <author><first>Özge</first><last>Sevgili</last></author>
       <author><first>Steffen</first><last>Remus</last><affiliation>Universität Hamburg</affiliation></author>
-      <author><first>Chris</first><last>Biemann</last><affiliation>U Hamburg</affiliation></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last><affiliation>U Hamburg</affiliation></author>
       <author><first>Irina</first><last>Nikishina</last></author>
       <pages>150-158</pages>
       <abstract>This paper presents the Sövereign submission for the shared task on perspective argument retrieval for the Argument Mining Workshop 2024. The main challenge is to perform argument retrieval considering socio-cultural aspects such as political interests, occupation, age, and gender. To address the challenge, we apply open-access Large Language Models (Mistral-7b) in a zero-shot fashion for re-ranking and explicit similarity scoring. Additionally, we combine different features in an ensemble setup using logistic regression. Our system ranks second in the competition for all test set rounds on average for the logistic regression approach using LLM similarity scores as a feature. In addition to the description of the approach, we also provide further results of our ablation study. Our code will be open-sourced upon acceptance.</abstract>
@@ -194,7 +194,7 @@
     <paper id="16">
       <title>Turiya at <fixed-case>P</fixed-case>erpective<fixed-case>A</fixed-case>rg2024: A Multilingual Argument Retriever and Reranker</title>
       <author><first>Sougata</first><last>Saha</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
-      <author><first>Rohini</first><last>Srihari</last><affiliation>State University of New York at Buffalo</affiliation></author>
+      <author id="rohini-k-srihari"><first>Rohini</first><last>Srihari</last><affiliation>State University of New York at Buffalo</affiliation></author>
       <pages>159-163</pages>
       <abstract>While general argument retrieval systems have significantly matured, multilingual argument retrieval in a socio-cultural setting is an overlooked problem. Advancements in such systems are imperative to enhance the inclusivity of society. The Perspective Argument Retrieval (PAR) task addresses these aspects and acknowledges their potential latent influence on argumentation. Here, we present a multilingual retrieval system for PAR that accounts for societal diversity during retrieval. Our approach couples a retriever and a re-ranker and spans multiple languages, thus factoring in diverse socio-cultural settings. The performance of our end-to-end system on three distinct test sets testify to its robustness.</abstract>
       <url hash="20c6867c">2024.argmining-1.16</url>
diff --git a/data/xml/2024.bea.xml b/data/xml/2024.bea.xml
index e3d9ad529e..380bc1cd91 100644
--- a/data/xml/2024.bea.xml
+++ b/data/xml/2024.bea.xml
@@ -61,7 +61,7 @@
       <title>Using Adaptive Empathetic Responses for Teaching <fixed-case>E</fixed-case>nglish</title>
       <author><first>Li</first><last>Siyan</last><affiliation>Columbia University</affiliation></author>
       <author><first>Teresa</first><last>Shao</last><affiliation>Columbia University</affiliation></author>
-      <author><first>Julia</first><last>Hirschberg</last><affiliation>Columbia University in the City of New York</affiliation></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last><affiliation>Columbia University in the City of New York</affiliation></author>
       <author><first>Zhou</first><last>Yu</last><affiliation>Columbia University</affiliation></author>
       <pages>34-53</pages>
       <abstract>Existing English-teaching chatbots rarely incorporate empathy explicitly in their feedback, but empathetic feedback could help keep students engaged and reduce learner anxiety. Toward this end, we propose the task of negative emotion detection via audio, for recognizing empathetic feedback opportunities in language learning. We then build the first spoken English-teaching chatbot with adaptive, empathetic feedback. This feedback is synthesized through automatic prompt optimization of ChatGPT and is evaluated with English learners. We demonstrate the effectiveness of our system through a preliminary user study.</abstract>
@@ -213,7 +213,7 @@
       <title>Explainable <fixed-case>AI</fixed-case> in Language Learning: Linking Empirical Evidence and Theoretical Concepts in Proficiency and Readability Modeling of <fixed-case>P</fixed-case>ortuguese</title>
       <author><first>Luisa</first><last>Ribeiro-Flucht</last><affiliation>University of Tuebingen</affiliation></author>
       <author><first>Xiaobin</first><last>Chen</last><affiliation>Tübingen Universität</affiliation></author>
-      <author><first>Detmar</first><last>Meurers</last><affiliation>Leibniz-Institut für Wissensmedien (IWM)</affiliation></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last><affiliation>Leibniz-Institut für Wissensmedien (IWM)</affiliation></author>
       <pages>199-209</pages>
       <abstract>While machine learning methods have supported significantly improved results in education research, a common deficiency lies in the explainability of the result. Explainable AI (XAI) aims to fill that gap by providing transparent, conceptually understandable explanations for the classification decisions, enhancing human comprehension and trust in the outcomes. This paper explores an XAI approach to proficiency and readability assessment employing a comprehensive set of 465 linguistic complexity measures. We identify theoretical descriptions associating such measures with varying levels of proficiency and readability and validate them using cross-corpus experiments employing supervised machine learning and Shapley Additive Explanations. The results not only highlight the utility of a diverse set of complexity measures in effectively modeling proficiency and readability in Portuguese, achieving a state-of-the-art accuracy of 0.70 in the proficiency classification task and of 0.84 in the readability classification task, but they largely corroborate the theoretical research assumptions, especially in the lexical domain.</abstract>
       <url hash="faaa40b4">2024.bea-1.17</url>
@@ -301,7 +301,7 @@
     <paper id="24">
       <title>Towards Fine-Grained Pedagogical Control over <fixed-case>E</fixed-case>nglish Grammar Complexity in Educational Text Generation</title>
       <author><first>Dominik</first><last>Glandorf</last><affiliation>University of Tübingen, Yale University</affiliation></author>
-      <author><first>Detmar</first><last>Meurers</last><affiliation>Leibniz-Institut für Wissensmedien (IWM)</affiliation></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last><affiliation>Leibniz-Institut für Wissensmedien (IWM)</affiliation></author>
       <pages>299-308</pages>
       <abstract>Teaching foreign languages and fostering language awareness in subject matter teaching requires a profound knowledge of grammar structures. Yet, while Large Language Models can act as tutors, it is unclear how effectively they can control grammar in generated text and adapt to learner needs. In this study, we investigate the ability of these models to exemplify pedagogically relevant grammar patterns, detect instances of grammar in a given text, and constrain text generation to grammar characteristic of a proficiency level. Concretely, we (1) evaluate the ability of GPT3.5 and GPT4 to generate example sentences for the standard English Grammar Profile CEFR taxonomy using few-shot in-context learning, (2) train BERT-based detectors with these generated examples of grammatical patterns, and (3) control the grammatical complexity of text generated by the open Mistral model by ranking sentence candidates with these detectors. We show that the grammar pattern instantiation quality is accurate but too homogeneous, and our classifiers successfully detect these patterns. A GPT-generated dataset of almost 1 million positive and negative examples for the English Grammar Profile is released with this work. With our method, Mistral’s output significantly increases the number of characteristic grammar constructions on the desired level, outperforming GPT4. This showcases how language domain knowledge can enhance Large Language Models for specific education needs, facilitating their effective use for intelligent tutor development and AI-generated materials. Code, models, and data are available at https://github.com/dominikglandorf/LLM-grammar.</abstract>
       <url hash="9dbc85bf">2024.bea-1.24</url>
@@ -344,7 +344,7 @@
     <paper id="28">
       <title>Improving Readability Assessment with Ordinal Log-Loss</title>
       <author><first>Ho Hung</first><last>Lim</last><affiliation>City University of Hong Kong</affiliation></author>
-      <author><first>John</first><last>Lee</last><affiliation>City University of Hong Kong</affiliation></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last><affiliation>City University of Hong Kong</affiliation></author>
       <pages>343-350</pages>
       <abstract>Automatic Readability Assessment (ARA) predicts the level of difficulty of a text, e.g. at Grade 1 to Grade 12. ARA is an ordinal classification task since the predicted levels follow an underlying order, from easy to difficult. However, most neural ARA models ignore the distance between the gold level and predicted level, treating all levels as independent labels. This paper investigates whether distance-sensitive loss functions can improve ARA performance. We evaluate a variety of loss functions on neural ARA models, and show that ordinal log-loss can produce statistically significant improvement over the standard cross-entropy loss in terms of adjacent accuracy in a majority of our datasets.</abstract>
       <url hash="4705fc1b">2024.bea-1.28</url>
@@ -368,7 +368,7 @@
       <author><first>Zhexiong</first><last>Liu</last><affiliation>University of Pittsburgh</affiliation></author>
       <author><first>Lindsay</first><last>Matsumura</last><affiliation>University of Pittsburgh</affiliation></author>
       <author><first>Elaine</first><last>Wang</last><affiliation>RAND Corporation</affiliation></author>
-      <author><first>Diane</first><last>Litman</last><affiliation>University of Pittsburgh</affiliation></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last><affiliation>University of Pittsburgh</affiliation></author>
       <author><first>Richard</first><last>Correnti</last><affiliation>University of Pittsburgh</affiliation></author>
       <pages>365-380</pages>
       <abstract>Although effective revision is the crucial component of writing instruction, few automated writing evaluation (AWE) systems specifically focus on the quality of the revisions students undertake. In this study we investigate the use of a large language model (GPT-4) with Chain-of-Thought (CoT) prompting for assessing the quality of young students’ essay revisions aligned with the automated feedback messages they received. Results indicate that GPT-4 has significant potential for evaluating revision quality, particularly when detailed rubrics are included that describe common revision patterns shown by young writers. However, the addition of CoT prompting did not significantly improve performance. Further examination of GPT-4’s scoring performance across various levels of student writing proficiency revealed variable agreement with human ratings. The implications for improving AWE systems focusing on young students are discussed.</abstract>
@@ -410,7 +410,7 @@
       <author><first>Seung</first><last>Lee</last><affiliation>North Carolina State University</affiliation></author>
       <author><first>Gamze</first><last>Ozogul</last><affiliation>Indiana University</affiliation></author>
       <author><first>Xiaoying</first><last>Zheng</last><affiliation>Indiana University</affiliation></author>
-      <author><first>James</first><last>Lester</last><affiliation>North Carolina State University</affiliation></author>
+      <author id="james-lester"><first>James</first><last>Lester</last><affiliation>North Carolina State University</affiliation></author>
       <pages>403-413</pages>
       <abstract>The practice of soliciting self-explanations from students is widely recognized for its pedagogical benefits. However, the labor-intensive effort required to manually assess students’ explanations makes it impractical for classroom settings. As a result, many current solutions to gauge students’ understanding during class are often limited to multiple choice or fill-in-the-blank questions, which are less effective at exposing misconceptions or helping students to understand and integrate new concepts. Recent advances in large language models (LLMs) present an opportunity to assess student explanations in real-time, making explanation-based classroom response systems feasible for implementation. In this work, we investigate LLM-based approaches for assessing the correctness of students’ explanations in response to undergraduate computer science questions. We investigate alternative prompting approaches for multiple LLMs (i.e., Llama 2, GPT-3.5, and GPT-4) and compare their performance to FLAN-T5 models trained in a fine-tuning manner. The results suggest that the highest accuracy and weighted F1 score were achieved by fine-tuning FLAN-T5, while an in-context learning approach with GPT-4 attains the highest macro F1 score.</abstract>
       <url hash="fbd61740">2024.bea-1.33</url>
@@ -567,7 +567,7 @@
       <title>Leveraging Physical and Semantic Features of text item for Difficulty and Response Time Prediction of <fixed-case>USMLE</fixed-case> Questions</title>
       <author><first>Gummuluri</first><last>Venkata Ravi Ram</last><affiliation>National Institute of Technology Karnataka, Surathkal</affiliation></author>
       <author><first>Ashinee</first><last>Kesanam</last><affiliation>National Institute of Technology Karnataka Surathkal</affiliation></author>
-      <author><first>Anand Kumar</first><last>M</last><affiliation>National Institute of Technology Karnataka</affiliation></author>
+      <author id="anand-kumar-m"><first>Anand Kumar</first><last>M</last><affiliation>National Institute of Technology Karnataka</affiliation></author>
       <pages>534-541</pages>
       <abstract>This paper presents our system developed for the Shared Task on Automated Prediction of Item Difficulty and Item Response Time for USMLE questions, organized by the Association for Computational Linguistics (ACL) Special Interest Group for building Educational Applications (BEA SIGEDU). The Shared Task, held as a workshop at the North American Chapter of the Association for Computational Linguistics (NAACL) 2024 conference, aimed to advance the state-of-the-art in predicting item characteristics directly from item text, with implications for the fairness and validity of standardized exams. We compared various methods ranging from BERT for regression to Random forest, Gradient Boosting(GB), Linear Regression, Support Vector Regressor (SVR), k-nearest neighbours (KNN) Regressor, MultiLayer Perceptron(MLP) to custom-ANN using BioBERT and Word2Vec embeddings and provided inferences on which performed better. This paper also explains the importance of data augmentation to balance the data in order to get better results. We also proposed five hypotheses regarding factors impacting difficulty and response time for a question and also verified it thereby helping researchers to derive meaningful numerical attributes for accurate prediction. We achieved a RSME score of 0.315 for Difficulty prediction and 26.945 for Response Time.</abstract>
       <url hash="fabb2a20">2024.bea-1.46</url>
@@ -608,9 +608,9 @@
     </paper>
     <paper id="50">
       <title><fixed-case>UNED</fixed-case> team at <fixed-case>BEA</fixed-case> 2024 Shared Task: Testing different Input Formats for predicting Item Difficulty and Response Time in Medical Exams</title>
-      <author><first>Alvaro</first><last>Rodrigo</last><affiliation>NLP and IR group at UNED</affiliation></author>
+      <author id="alvaro-rodrigo"><first>Alvaro</first><last>Rodrigo</last><affiliation>NLP and IR group at UNED</affiliation></author>
       <author><first>Sergio</first><last>Moreno-Álvarez</last><affiliation>UNED</affiliation></author>
-      <author><first>Anselmo</first><last>Peñas</last><affiliation>NLP &amp; IR Group, UNED</affiliation></author>
+      <author id="anselmo-penas"><first>Anselmo</first><last>Peñas</last><affiliation>NLP &amp; IR Group, UNED</affiliation></author>
       <pages>567-570</pages>
       <abstract>This paper presents the description and primary outcomes of our team’s participation in the BEA 2024 shared task. Our primary exploration involved employing transformer-based systems, particularly BERT models, due to their suitability for Natural Language Processing tasks and efficiency with computational resources. We experimented with various input formats, including concatenating all text elements and incorporating only the clinical case. Surprisingly, our results revealed different impacts on predicting difficulty versus response time, with the former favoring clinical text only and the latter benefiting from including the correct answer. Despite moderate performance in difficulty prediction, our models excelled in response time prediction, ranking highest among all participants. This study lays the groundwork for future investigations into more complex approaches and configurations, aiming to advance the automatic prediction of exam difficulty and response time.</abstract>
       <url hash="df815813">2024.bea-1.50</url>
@@ -621,7 +621,7 @@
       <title>The <fixed-case>BEA</fixed-case> 2024 Shared Task on the Multilingual Lexical Simplification Pipeline</title>
       <author><first>Matthew</first><last>Shardlow</last><affiliation>Manchester Metropolitan University</affiliation></author>
       <author><first>Fernando</first><last>Alva-Manchego</last><affiliation>Cardiff University</affiliation></author>
-      <author><first>Riza</first><last>Batista-Navarro</last><affiliation>Department of Computer Science, The University of Manchester</affiliation></author>
+      <author id="riza-theresa-batista-navarro"><first>Riza</first><last>Batista-Navarro</last><affiliation>Department of Computer Science, The University of Manchester</affiliation></author>
       <author><first>Stefan</first><last>Bott</last><affiliation>Universitat Pompe Fabra</affiliation></author>
       <author><first>Saul</first><last>Calderon Ramirez</last><affiliation>Tecnológico de Costa Rica</affiliation></author>
       <author><first>Rémi</first><last>Cardon</last><affiliation>CENTAL, ILC, Université Catholique de Louvain</affiliation></author>
@@ -635,10 +635,10 @@
       <author><first>Kai</first><last>North</last><affiliation>George Mason University</affiliation></author>
       <author><first>Laura</first><last>Occhipinti</last><affiliation>University of Bologna</affiliation></author>
       <author><first>Nelson Peréz</first><last>Rojas</last><affiliation>Tecnológico de Costa Rica</affiliation></author>
-      <author><first>Nishat</first><last>Raihan</last><affiliation>George Mason University</affiliation></author>
+      <author id="nishat-raihan"><first>Nishat</first><last>Raihan</last><affiliation>George Mason University</affiliation></author>
       <author><first>Tharindu</first><last>Ranasinghe</last><affiliation>Aston University</affiliation></author>
       <author><first>Martin Solis</first><last>Salazar</last><affiliation>Tecnológico de Costa Rica</affiliation></author>
-      <author><first>Sanja</first><last>Štajner</last><affiliation>Karlsruhe</affiliation></author>
+      <author id="sanja-stajner"><first>Sanja</first><last>Štajner</last><affiliation>Karlsruhe</affiliation></author>
       <author><first>Marcos</first><last>Zampieri</last><affiliation>George Mason University</affiliation></author>
       <author><first>Horacio</first><last>Saggion</last><affiliation>Universitat Pompeu Fabra</affiliation></author>
       <pages>571-589</pages>
diff --git a/data/xml/2024.bionlp.xml b/data/xml/2024.bionlp.xml
index 34b6493c52..e7891001a9 100644
--- a/data/xml/2024.bionlp.xml
+++ b/data/xml/2024.bionlp.xml
@@ -7,7 +7,7 @@
       <editor><first>Sophia</first><last>Ananiadou</last></editor>
       <editor><first>Makoto</first><last>Miwa</last></editor>
       <editor><first>Kirk</first><last>Roberts</last></editor>
-      <editor><first>Junichi</first><last>Tsujii</last></editor>
+      <editor id="junichi-tsujii"><first>Junichi</first><last>Tsujii</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Bangkok, Thailand</address>
       <month>August</month>
@@ -74,7 +74,7 @@
       <author><first>Jiuding</first><last>Sun</last></author>
       <author><first>Karen</first><last>Zhang</last></author>
       <author><first>Jered</first><last>McInerney</last></author>
-      <author><first>Byron C.</first><last>Wallace</last></author>
+      <author id="byron-c-wallace"><first>Byron C.</first><last>Wallace</last></author>
       <author><first>Silvio</first><last>Amir</last></author>
       <pages>50–71</pages>
       <abstract>Instruction-tuned Large Language Models (LLMs) can perform a wide range of tasks given natural language instructions to do so, but they are sensitive to how such instructions are phrased. This issue is especially concerning in healthcare, as clinicians are unlikely to be experienced prompt engineers and the potential consequences of inaccurate outputs are heightened in this domain. This raises a practical question: How robust are instruction-tuned LLMs to natural variations in the instructions provided for clinical NLP tasks? We collect prompts from medical doctors across a range of tasks and quantify the sensitivity of seven LLMs—some general, others specialized—to natural (i.e., non-adversarial) instruction phrasings. We find that performance varies substantially across all models, and that—perhaps surprisingly—domain-specific models explicitly trained on clinical data are especially brittle, compared to their general domain counterparts. Further, arbitrary phrasing differences can affect fairness, e.g., valid but distinct instructions for mortality prediction yield a range both in overall performance, and in terms of differences between demographic groups.</abstract>
@@ -147,7 +147,7 @@
     <paper id="10">
       <title>Overview of the <fixed-case>B</fixed-case>io<fixed-case>L</fixed-case>ay<fixed-case>S</fixed-case>umm 2024 Shared Task on the Lay Summarization of Biomedical Research Articles</title>
       <author><first>Tomas</first><last>Goldsack</last></author>
-      <author><first>Carolina</first><last>Scarton</last></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last></author>
       <author><first>Matthew</first><last>Shardlow</last></author>
       <author><first>Chenghua</first><last>Lin</last></author>
       <pages>122–131</pages>
@@ -237,7 +237,7 @@
       <author><first>Aman</first><last>Sinha</last></author>
       <author><first>Timothee</first><last>Mickus</last></author>
       <author><first>Marianne</first><last>Clausel</last></author>
-      <author><first>Mathieu</first><last>Constant</last></author>
+      <author id="matthieu-constant"><first>Mathieu</first><last>Constant</last></author>
       <author><first>Xavier</first><last>Coubez</last></author>
       <pages>202–211</pages>
       <abstract>The success of pretrained language models (PLMs) across a spate of use-cases has led to significant investment from the NLP community towards building domain-specific foundational models. On the other hand, in mission critical settings such as biomedical applications, other aspects also factor in—chief of which is a model’s ability to produce reasonable estimates of its own uncertainty. In the present study, we discuss these two desiderata through the lens of how they shape the entropy of a model’s output probability distribution. We find that domain specificity and uncertainty awareness can often be successfully combined, but the exact task at hand weighs in much more strongly.</abstract>
@@ -262,7 +262,7 @@
     <paper id="18">
       <title>Using Large Language Models to Evaluate Biomedical Query-Focused Summarisation</title>
       <author><first>Hashem</first><last>Hijazi</last></author>
-      <author><first>Diego</first><last>Molla</last></author>
+      <author id="diego-molla"><first>Diego</first><last>Molla</last></author>
       <author><first>Vincent</first><last>Nguyen</last></author>
       <author><first>Sarvnaz</first><last>Karimi</last></author>
       <pages>236–242</pages>
@@ -296,8 +296,8 @@
       <title>Get the Best out of 1<fixed-case>B</fixed-case> <fixed-case>LLM</fixed-case>s: Insights from Information Extraction on Clinical Documents</title>
       <author><first>Saeed</first><last>Farzi</last></author>
       <author><first>Soumitra</first><last>Ghosh</last></author>
-      <author><first>Alberto</first><last>Lavelli</last></author>
-      <author><first>Bernardo</first><last>Magnini</last></author>
+      <author id="alberto-lavelli"><first>Alberto</first><last>Lavelli</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
       <pages>266–276</pages>
       <abstract>While the popularity of large, versatile language models like ChatGPT continues to rise, the landscape shifts when considering open-source models tailored to specific domains. Moreover, many areas, such as clinical documents, suffer from a scarcity of training data, often amounting to only a few hundred instances. Additionally, in certain settings, such as hospitals, cloud-based solutions pose privacy concerns, necessitating the deployment of language models on traditional hardware, such as single GPUs or powerful CPUs. To address these complexities, we conduct extensive experiments on both clinical entity detection and relation extraction in clinical documents using 1B parameter models. Our study delves into traditional fine-tuning, continuous pre-training in the medical domain, and instruction-tuning methods, providing valuable insights into their effectiveness in a multilingual setting. Our results underscore the importance of domain-specific models and pre-training for clinical natural language processing tasks. Furthermore, data augmentation using cross-lingual information improves performance in most cases, highlighting the potential for multilingual enhancements.</abstract>
       <url hash="27998f64">2024.bionlp-1.21</url>
@@ -336,7 +336,7 @@
       <author><first>Kriti</first><last>Bhattarai</last></author>
       <author><first>Inez Y.</first><last>Oh</last></author>
       <author><first>Zachary B.</first><last>Abrams</last></author>
-      <author><first>Albert M.</first><last>Lai</last></author>
+      <author id="albert-m-lai"><first>Albert M.</first><last>Lai</last></author>
       <pages>318–327</pages>
       <abstract>Generative pre-trained transformer (GPT) models have shown promise in clinical entity and relation extraction tasks because of their precise extraction and contextual understanding capability. In this work, we further leverage the Unified Medical Language System (UMLS) knowledge base to accurately identify medical concepts and improve clinical entity and relation extraction at the document level. Our framework selects UMLS concepts relevant to the text and combines them with prompts to guide language models in extracting entities. Our experiments demonstrate that this initial concept mapping and the inclusion of these mapped concepts in the prompts improves extraction results compared to few-shot extraction tasks on generic language models that do not leverage UMLS. Further, our results show that this approach is more effective than the standard Retrieval Augmented Generation (RAG) technique, where retrieved data is compared with prompt embeddings to generate results. Overall, we find that integrating UMLS concepts with GPT models significantly improves entity and relation identification, outperforming the baseline and RAG models. By combining the precise concept mapping capability of knowledge-based approaches like UMLS with the contextual understanding capability of GPT, our method highlights the potential of these approaches in specialized domains like healthcare.</abstract>
       <url hash="bb681dc6">2024.bionlp-1.24</url>
@@ -373,7 +373,7 @@
     <paper id="27">
       <title>Pre-training data selection for biomedical domain adaptation using journal impact metrics</title>
       <author><first>Mathieu</first><last>Lai-king</last></author>
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <pages>363–369</pages>
       <abstract>Domain adaptation is a widely used method in natural language processing (NLP) to improve the performance of a language model within a specific domain. This method is particularly common in the biomedical domain, which sees regular publication of numerous scientific articles. PubMed, a significant corpus of text, is frequently used in the biomedical domain. The primary objective of this study is to explore whether refining a pre-training dataset using specific quality metrics for scientific papers can enhance the performance of the resulting model. To accomplish this, we employ two straightforward journal impact metrics and conduct experiments by continually pre-training BERT on various subsets of the complete PubMed training set, we then evaluate the resulting models on biomedical language understanding tasks from the BLURB benchmark. Our results show that pruning using journal impact metrics is not efficient. But we also show that pre-training using fewer abstracts (but with the same number of training steps) does not necessarily decrease the resulting model’s performance.</abstract>
       <url hash="782aef89">2024.bionlp-1.27</url>
@@ -410,9 +410,9 @@
     <paper id="30">
       <title>Is That the Right Dose? Investigating Generative Language Model Performance on Veterinary Prescription Text Analysis</title>
       <author><first>Brian</first><last>Hur</last></author>
-      <author><first>Lucy Lu</first><last>Wang</last></author>
+      <author id="lucy-lu-wang"><first>Lucy Lu</first><last>Wang</last></author>
       <author><first>Laura</first><last>Hardefeldt</last></author>
-      <author><first>Meliha</first><last>Yetisgen</last></author>
+      <author id="meliha-yetisgen-yildiz"><first>Meliha</first><last>Yetisgen</last></author>
       <pages>390–397</pages>
       <abstract>Optimizing antibiotic dosing recommendations is a vital aspect of antimicrobial stewardship (AMS) programs aimed at combating antimicrobial resistance (AMR), a significant public health concern, where inappropriate dosing contributes to the selection of AMR pathogens. A key challenge is the extraction of dosing information, which is embedded in free-text clinical records and necessitates numerical transformations. This paper assesses the utility of Large Language Models (LLMs) in extracting essential prescription attributes such as dose, duration, active ingredient, and indication. We evaluate methods to optimize LLMs on this task against a baseline BERT-based ensemble model. Our findings reveal that LLMs can achieve exceptional accuracy by combining probabilistic predictions with deterministic calculations, enforced through functional prompting, to ensure data types and execute necessary arithmetic. This research demonstrates new prospects for automating aspects of AMS when no training data is available.</abstract>
       <url hash="098c59a0">2024.bionlp-1.30</url>
@@ -424,7 +424,7 @@
       <author><first>William</first><last>Hogan</last></author>
       <author><first>Andrew</first><last>Bartko</last></author>
       <author><first>Jingbo</first><last>Shang</last></author>
-      <author><first>Chun-Nan</first><last>Hsu</last></author>
+      <author id="chun-nan-hsu"><first>Chun-Nan</first><last>Hsu</last></author>
       <pages>398–408</pages>
       <abstract>The interplay between microbiota and diseases has emerged as a significant area of research facilitated by the proliferation of cost-effective and precise sequencing technologies. To keep track of the many findings, domain experts manually review publications to extract reported microbe-disease associations and compile them into knowledge bases. However, manual curation efforts struggle to keep up with the pace of publications. Relation extraction has demonstrated remarkable success in other domains, yet the availability of datasets supporting such methods within the domain of microbiome research remains limited. To bridge this gap, we introduce the Microbe-Disease Relation Extraction Dataset (MiDRED); a human-annotated dataset containing 3,116 annotations of fine-grained relationships between microbes and diseases. We hope this dataset will help address the scarcity of data in this crucial domain and facilitate the development of advanced text-mining solutions to automate the creation and maintenance of microbiome knowledge bases.</abstract>
       <url hash="33100a4b">2024.bionlp-1.31</url>
@@ -436,7 +436,7 @@
       <author><first>Rahmad</first><last>Mahendra</last></author>
       <author><first>Damiano</first><last>Spina</last></author>
       <author><first>Lawrence</first><last>Cavedon</last></author>
-      <author><first>Karin</first><last>Verspoor</last></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last></author>
       <pages>409–415</pages>
       <abstract>In this short position paper, we highlight the importance of numbers in clinical text. We first present a taxonomy of number variants. We then perform corpus analysis to analyze characteristics of number use in several clinical corpora. Based on our findings of extensive use of numbers, and limited understanding of the impact of numbers on clinical NLP tasks, we identify the need for a public benchmark that will support investigation of numerical processing tasks for the clinical domain.</abstract>
       <url hash="5566b244">2024.bionlp-1.32</url>
@@ -446,7 +446,7 @@
     <paper id="33">
       <title>A Fine-grained citation graph for biomedical academic papers: the finding-citation graph</title>
       <author><first>Yuan</first><last>Liang</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <author><first>Roonak</first><last>Rezvani</last></author>
       <pages>416–426</pages>
       <abstract>Citations typically mention findings as well as papers. To model this richer notion of citation, we introduce a richer form of citation graph with nodes for both academic papers and their findings: the finding-citation graph (FCG). We also present a new pipeline to construct such a graph, which includes a finding identification module and a citation sentence extraction module. From each paper, it extracts rich basic information, abstract, and structured full text first. The abstract and vital sections, such as the results and discussion, are input into the finding identification module. This module identifies multiple findings from a paper, achieving an 80% accuracy in multiple findings evaluation. The full text is input into the citation sentence extraction module to identify inline citation sentences and citation markers, achieving 97.7% accuracy. Then, the graph is constructed using the outputs from the two modules mentioned above. We used the Europe PMC to build such a graph using the pipeline, resulting in a graph with 14.25 million nodes and 76 million edges.</abstract>
@@ -473,7 +473,7 @@
       <author><first>Rao Muhammad</first><last>Anwer</last></author>
       <author><first>Salman</first><last>Khan</last></author>
       <author><first>Jorma</first><last>Laaksonen</last></author>
-      <author><first>Fahad</first><last>Khan</last></author>
+      <author id="fahad-khan"><first>Fahad</first><last>Khan</last></author>
       <pages>440–448</pages>
       <abstract>The latest breakthroughs in large language models (LLMs) and vision-language models (VLMs) have showcased promising capabilities toward performing a wide range of tasks. Such models are typically trained on massive datasets comprising billions of image-text pairs with diverse tasks. However, their performance on task-specific domains, such as radiology, is still under-explored. While few works have recently explored LLMs-based conversational medical models, they mainly focus on text-based analysis. In this paper, we introduce XrayGPT, a conversational medical vision-language (VLMs) model that can analyze and answer open-ended questions about chest radiographs. Specifically, we align both medical visual encoder with a fine-tuned LLM to possess visual conversation abilities, grounded in an understanding of radiographs and medical knowledge. For improved alignment of chest radiograph data, we generate ~217k interactive and high-quality summaries from free-text radiology reports. Extensive experiments are conducted to validate the merits of XrayGPT. To conduct an expert evaluation, certified medical doctors evaluated the output of our XrayGPT on a test subset and the results reveal that more than 70% of the responses are scientifically accurate, with an average score of 4/5. We hope our simple and effective method establishes a solid baseline, facilitating future research toward automated analysis and summarization of chest radiographs. Code, models, and instruction sets will be publicly released.</abstract>
       <url hash="8d8f36c0">2024.bionlp-1.35</url>
@@ -502,7 +502,7 @@
       <author><first>Yuki</first><last>Yamagata</last></author>
       <author><first>Solen</first><last>Quiniou</last></author>
       <author><first>Samuel</first><last>Chaffron</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>457–473</pages>
       <abstract>Biomedical information extraction is crucial for advancing research, enhancing healthcare, and discovering treatments by efficiently analyzing extensive data. Given the extensive amount of biomedical data available, automated information extraction methods are necessary due to manual extraction’s labor-intensive, expertise-dependent, and costly nature. In this paper, we propose a novel two-stage system for information extraction where we annotate biomedical articles based on a specific ontology (HOIP). The major challenge is annotating relation between biomedical processes often not explicitly mentioned in text articles. Here, we first predict the candidate processes and then determine the relationships between these processes. The experimental results show promising outcomes in mention-agnostic process identification using Large Language Models (LLMs). In relation classification, BERT-based supervised models still outperform LLMs significantly. The end-to-end evaluation results suggest the difficulty of this task and room for improvement in both process identification and relation classification.</abstract>
       <url hash="5170930e">2024.bionlp-1.37</url>
@@ -527,10 +527,10 @@
       <title>Intervention extraction in preclinical animal studies of <fixed-case>A</fixed-case>lzheimer’s Disease: Enhancing regex performance with language model-based filtering</title>
       <author><first>Yiyuan</first><last>Pu</last></author>
       <author><first>Kaitlyn</first><last>Hair</last></author>
-      <author><first>Daniel</first><last>Beck</last></author>
+      <author id="daniel-beck"><first>Daniel</first><last>Beck</last></author>
       <author><first>Mike</first><last>Conway</last></author>
       <author><first>Malcolm</first><last>MacLeod</last></author>
-      <author><first>Karin</first><last>Verspoor</last></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last></author>
       <pages>486–492</pages>
       <abstract>We explore different information extraction tools for annotation of interventions to support automated systematic reviews of preclinical AD animal studies. We compare two PICO (Population, Intervention, Comparison, and Outcome) extraction tools and two prompting-based learning strategies based on Large Language Models (LLMs). Motivated by the high recall of a dictionary-based approach, we define a two-stage method, removing false positives obtained from regexes with a pre-trained LM. With ChatGPT-based filtering using three-shot prompting, our approach reduces almost two-thirds of False Positives compared to the dictionary approach alone, while outperforming knowledge-free instructional prompting.</abstract>
       <url hash="a0c283fb">2024.bionlp-1.39</url>
@@ -658,7 +658,7 @@
       <author><first>Yuanbang</first><last>Liang</last></author>
       <author><first>Yipeng</first><last>Qin</last></author>
       <author><first>Hantao</first><last>Liu</last></author>
-      <author><first>Irena</first><last>Spasic</last></author>
+      <author id="irena-spasic"><first>Irena</first><last>Spasic</last></author>
       <pages>591–596</pages>
       <abstract>Radiology Report Generation (RRG) seeks to leverage deep learning techniques to automate the reporting process of radiologists. Current methods are typically modelling RRG as an image-to-text generation task that takes X-ray images as input and generates textual reports describing the corresponding clinical observations. However, the wording of the same clinical observation could have been influenced by the expression preference of radiologists. Nevertheless, such variability can be mitigated by normalizing textual reports into structured representations such as a graph structure. In this study, we attempt a novel paradigm for incorporating graph structural data into the RRG model. Our approach involves predicting graph labels based on visual features and subsequently initiating the decoding process through a template injection conditioned on the predicted labels. We trained and evaluated our model on the BioNLP 2024 Shared Task on Large-Scale Radiology Report Generation and submitted our results to the ViLMedic RRG leaderboard. Although our model showed a moderate ranking on the leaderboard, the results provide preliminary evidence for the feasibility of this new paradigm, warranting further exploration and refinement.</abstract>
       <url hash="d321ad6f">2024.bionlp-1.49</url>
@@ -780,7 +780,7 @@
     <paper id="57">
       <title>Ixa-<fixed-case>M</fixed-case>ed at Discharge Me! Retrieval-Assisted Generation for Streamlining Discharge Documentation</title>
       <author><first>Jordan C.</first><last>Koontz</last></author>
-      <author><first>Maite</first><last>Oronoz</last></author>
+      <author id="maite-oronoz"><first>Maite</first><last>Oronoz</last></author>
       <author><first>Alicia</first><last>Pérez</last></author>
       <pages>658–663</pages>
       <abstract>In this paper we present our system for the BioNLP ACL’24 “Discharge Me!” task on automating discharge summary section generation. Using Retrieval-Augmented Generation, we combine a Large Language Model (LLM) with external knowledge to guide the generation of the target sections. Our approach generates structured patient summaries from discharge notes using an instructed LLM, retrieves relevant “Brief Hospital Course” and “Discharge Instructions” examples via BM25 and SentenceBERT, and provides this context to a frozen LLM for generation. Our top system using SentenceBERT retrieval achieves an overall score of 0.183, outperforming zero-shot baselines. We analyze performance across different aspects, discussing limitations and future research directions.</abstract>
@@ -888,7 +888,7 @@
     <paper id="65">
       <title><fixed-case>I</fixed-case>gnition<fixed-case>I</fixed-case>nnovators at “Discharge Me!”: Chain-of-Thought Instruction Finetuning Large Language Models for Discharge Summaries</title>
       <author><first>An Quang</first><last>Tang</last></author>
-      <author><first>Xiuzhen</first><last>Zhang</last></author>
+      <author id="xiuzhen-jenny-zhang"><first>Xiuzhen</first><last>Zhang</last></author>
       <author><first>Minh Ngoc</first><last>Dinh</last></author>
       <pages>731–739</pages>
       <abstract>This paper presents our proposed approach to the Discharge Me! shared task, collocated with the 23th Workshop on Biomedical Natural Language Processing (BioNLP). In this work, we develop an LLM-based framework for solving the Discharge Summary Documentation (DSD) task, i.e., generating the two critical target sections ‘Brief Hospital Course’ and ‘Discharge Instructions’ in the discharge summary. By streamlining the recent instruction-finetuning process on LLMs, we explore several prompting strategies for optimally adapting LLMs to specific generation task of DSD. Experimental results show that providing a clear output structure, complimented by a set of comprehensive Chain-of-Thoughts (CoT) questions, effectively improves the model’s reasoning capability, and thereby, enhancing the structural correctness and faithfulness of clinical information in the generated text. Source code is available at: https://anonymous.4open.science/r/Discharge_LLM-A233</abstract>
@@ -925,7 +925,7 @@
       <author><first>Vicent</first><last>Ahuir</last></author>
       <author><first>Diego</first><last>Torres</last></author>
       <author><first>Encarna</first><last>Segarra</last></author>
-      <author><first>Lluís-F.</first><last>Hurtado</last></author>
+      <author id="lluis-f-hurtado"><first>Lluís-F.</first><last>Hurtado</last></author>
       <pages>755–761</pages>
       <abstract>This paper presents our contribution to the BioLaySumm 2024 shared task of the 23rd BioNLP Workshop. The task is to create a lay summary, given a biomedical research article and its technical summary. As the input to the system could be large, a Longformer Encoder-Decoder (LED) has been used. We continuously pre-trained a general domain LED model with biomedical data to adapt it to this specific domain. In the pre-training phase, several pre-training tasks were aggregated to inject linguistic knowledge and increase the abstractivity of the generated summaries. Since the distribution of samples between the two datasets, eLife and PLOS, is unbalanced, we fine-tuned two models: one for eLife and another for PLOS. To increase the quality of the lay summaries of the system, we developed a regression model that helps us rank the summaries generated by the summarization models. This regression model predicts the quality of the summary in three different aspects: Relevance, Readability, and Factuality. We present the results of our models and a study to measure the ranking capabilities of the regression model.</abstract>
       <url hash="def5fd29">2024.bionlp-1.68</url>
@@ -992,7 +992,7 @@
       <title><fixed-case>SINAI</fixed-case> at <fixed-case>B</fixed-case>io<fixed-case>L</fixed-case>ay<fixed-case>S</fixed-case>umm: Self-Play Fine-Tuning of Large Language Models for Biomedical Lay Summarisation</title>
       <author><first>Mariia</first><last>Chizhikova</last></author>
       <author><first>Manuel Carlos</first><last>Díaz-Galiano</last></author>
-      <author><first>L. Alfonso</first><last>Ureña-López</last></author>
+      <author id="l-alfonso-urena-lopez"><first>L. Alfonso</first><last>Ureña-López</last></author>
       <author><first>María-Teresa</first><last>Martín-Valdivia</last></author>
       <pages>804–809</pages>
       <abstract>An effective disclosure of scientific knowledge and advancements to the general public is often hindered by the complexity of the technical language used in research which often results very difficult, if not impossible, for non-experts to understand. In this paper we present the approach developed by the SINAI team as the result of our participation in BioLaySumm shared task hosted by the BioNLP workshop at ACL 2024. Our approach stems from the experimentation we performed in order to test the ability of state-of-the-art pre-trained large language models, namely GPT 3.5, GPT 4 and Llama-3, to tackle this task in a few-shot manner. In order to improve this baseline, we opted for fine-tuning Llama-3 by applying parameter-efficient methodologies. The best performing system which resulted from applying self-play fine tuning method which allows the model to improve while learning to distinguish between its own generations from the previous step from the gold standard summaries. This approach achieved 0.4205 ROUGE-1 score and 0.8583 BERTScore.</abstract>
diff --git a/data/xml/2024.blackboxnlp.xml b/data/xml/2024.blackboxnlp.xml
index 7d5f37424c..38ee5fd061 100644
--- a/data/xml/2024.blackboxnlp.xml
+++ b/data/xml/2024.blackboxnlp.xml
@@ -48,7 +48,7 @@
     </paper>
     <paper id="3">
       <title>Are there identifiable structural parts in the sentence embedding whole?</title>
-      <author><first>Vivi</first><last>Nastase</last><affiliation>University of Geneva</affiliation></author>
+      <author id="vivi-nastase"><first>Vivi</first><last>Nastase</last><affiliation>University of Geneva</affiliation></author>
       <author><first>Paola</first><last>Merlo</last><affiliation>Idiap Research Institute and University of Geneva, Switzerland</affiliation></author>
       <pages>23-42</pages>
       <abstract>Sentence embeddings from transformer models encode much linguistic information in a fixed-length vector. We investigate whether structural information – specifically, information about chunks and their structural and semantic properties – can be detected in these representations. We use a dataset consisting of sentences with known chunk structure, and two linguistic intelligence datasets, whose solution relies on detecting chunks and their grammatical number, and respectively, their semantic roles. Through an approach involving indirect supervision, and through analyses of the performance on the tasks and of the internal representations built during learning, we show that information about chunks and their properties can be obtained from sentence embeddings.</abstract>
@@ -124,7 +124,7 @@
       <author><first>Sebastian</first><last>Gehrmann</last><affiliation>Bloomberg</affiliation></author>
       <author><first>Achintya</first><last>Gopal</last></author>
       <author><first>David S</first><last>Rosenberg</last><affiliation>Bloomberg</affiliation></author>
-      <author><first>Gideon S.</first><last>Mann</last></author>
+      <author id="gideon-mann"><first>Gideon S.</first><last>Mann</last></author>
       <author><first>Mark</first><last>Dredze</last><affiliation>Department of Computer Science, Whiting School of Engineering</affiliation></author>
       <pages>140-176</pages>
       <abstract>Current large language model (LLM) evaluations rely on benchmarks to assess model capabilities and their encoded knowledge. However, these evaluations cannot reveal where a model encodes its knowledge, and thus little is known about which weights contain specific information. We propose a method to statically (without forward or backward passes) locate topical knowledge in the weight space of an LLM, building on a prior insight that parameters can be decoded into interpretable tokens. If parameters can be mapped into the embedding space, it should be possible to directly search for knowledge via embedding similarity. We study the validity of this assumption across several LLMs for a variety of concepts in the financial domain and a toxicity detection setup. Our analysis yields an improved understanding of the promises and limitations of static knowledge location in real-world scenarios.</abstract>
@@ -181,7 +181,7 @@
     <paper id="14">
       <title>On the alignment of <fixed-case>LM</fixed-case> language generation and human language comprehension</title>
       <author><first>Lena Sophia</first><last>Bolliger</last><affiliation>University of Zurich</affiliation></author>
-      <author><first>Patrick</first><last>Haller</last><affiliation>University of Zurich</affiliation></author>
+      <author id="patrick-haller-zurich"><first>Patrick</first><last>Haller</last><affiliation>University of Zurich</affiliation></author>
       <author><first>Lena Ann</first><last>Jäger</last><affiliation>University of Zurich and Universität Potsdam</affiliation></author>
       <pages>217-231</pages>
       <abstract>Previous research on the predictive power (PP) of surprisal and entropy has focused on determining which language models (LMs) generate estimates with the highest PP on reading times, and examining for which populations the PP is strongest. In this study, we leverage eye movement data on texts that were generated using a range of decoding strategies with different LMs. We then extract the transition scores that reflect the models’ production rather than comprehension effort. This allows us to investigate the alignment of LM language production and human language comprehension. Our findings reveal that there are differences in the strength of the alignment between reading behavior and certain LM decoding strategies and that this alignment further reflects different stages of language understanding (early, late, or global processes). Although we find lower PP of transition-based measures compared to surprisal and entropy for most decoding strategies, our results provide valuable insights into which decoding strategies impose less processing effort for readers. Our code is available via https://github.com/DiLi-Lab/LM-human-alignment.</abstract>
@@ -205,7 +205,7 @@
       <title>Uncovering Syllable Constituents in the Self-Attention-Based Speech Representations of Whisper</title>
       <author><first>Erfan</first><last>A Shams</last><affiliation>University College Dublin</affiliation></author>
       <author><first>Iona</first><last>Gessinger</last></author>
-      <author><first>Julie</first><last>Carson-Berndsen</last><affiliation>University College Dublin</affiliation></author>
+      <author id="julie-carson-berndsen"><first>Julie</first><last>Carson-Berndsen</last><affiliation>University College Dublin</affiliation></author>
       <pages>238-247</pages>
       <abstract>As intuitive units of speech, syllables have been widely studied in linguistics. A syllable can be defined as a three-constituent unit with a vocalic centre surrounded by two (in some languages optional) consonant clusters. Syllables are also used to design automatic speech recognition (ASR) models. The significance of knowledge-driven syllable-based tokenisation in ASR over data-driven byte-pair encoding has often been debated. However, the emergence of transformer-based ASR models employing self-attention (SA) overshadowed this debate. These models learn the nuances of speech from large corpora without prior knowledge of the domain; yet, they are not interpretable by design. Consequently, it is not clear if the recent performance improvements are related to the extraction of human-interpretable knowledge. We probe such models for syllable constituents and use an SA head pruning method to assess the relevance of the SA weights. We also investigate the role of vowel identification in syllable constituent probing. Our findings show that the general features of syllable constituents are extracted in the earlier layers of the model and the syllable-related features mostly depend on the temporal knowledge incorporated in specific SA heads rather than on vowel identification.</abstract>
       <url hash="631a5569">2024.blackboxnlp-1.16</url>
@@ -359,7 +359,7 @@
     </paper>
     <paper id="29">
       <title>Investigating Layer Importance in Large Language Models</title>
-      <author id="yang-zhang"><first>Yang</first><last>Zhang</last></author>
+      <author><first>Yang</first><last>Zhang</last></author>
       <author><first>Yanfei</first><last>Dong</last><affiliation>PayPal Inc. and national university of singaore, National University of Singapore</affiliation></author>
       <author><first>Kenji</first><last>Kawaguchi</last><affiliation>National University of Singapore</affiliation></author>
       <pages>469-479</pages>
diff --git a/data/xml/2024.bucc.xml b/data/xml/2024.bucc.xml
index 7465396b37..fd6f2e24cf 100644
--- a/data/xml/2024.bucc.xml
+++ b/data/xml/2024.bucc.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2024-05-16" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 17th Workshop on Building and Using Comparable Corpora (BUCC) @ LREC-COLING 2024</booktitle>
-      <editor><first>Pierre</first><last>Zweigenbaum</last></editor>
+      <editor id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></editor>
       <editor><first>Reinhard</first><last>Rapp</last></editor>
       <editor><first>Serge</first><last>Sharoff</last></editor>
       <publisher>ELRA and ICCL</publisher>
@@ -21,7 +21,7 @@
       <title>On a Novel Application of <fixed-case>W</fixed-case>asserstein-<fixed-case>P</fixed-case>rocrustes for Unsupervised Cross-Lingual Alignment of Embeddings</title>
       <author><first>Guillem</first><last>Ramírez</last></author>
       <author><first>Rumen</first><last>Dangovski</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Marin</first><last>Soljacic</last></author>
       <pages>1–11</pages>
       <url hash="d1c084f9">2024.bucc-1.1</url>
@@ -45,9 +45,9 @@
       <author><first>Bernardo</first><last>Leite</last></author>
       <author><first>Henrique</first><last>Lopes Cardoso</last></author>
       <author><first>Luís</first><last>Gomes</last></author>
-      <author><first>João</first><last>Rodrigues</last></author>
+      <author id="joao-rodrigues"><first>João</first><last>Rodrigues</last></author>
       <author><first>Rodrigo</first><last>Santos</last></author>
-      <author><first>António</first><last>Branco</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
       <pages>24–34</pages>
       <url hash="410c8d81">2024.bucc-1.3</url>
       <bibkey>osorio-etal-2024-portulan</bibkey>
@@ -64,7 +64,7 @@
       <author><first>Zi</first><last>Long</last></author>
       <author><first>ZhenHao</first><last>Tang</last></author>
       <author><first>Xianghua</first><last>Fu</last></author>
-      <author id="jian-chen"><first>Jian</first><last>Chen</last></author>
+      <author><first>Jian</first><last>Chen</last></author>
       <author><first>Shilong</first><last>Hou</last></author>
       <author><first>Jinze</first><last>Lyu</last></author>
       <pages>36–50</pages>
@@ -104,7 +104,7 @@
     <paper id="9">
       <title>Creating Clustered Comparable Corpora from <fixed-case>W</fixed-case>ikipedia with Different Fuzziness Levels and Language Representativity</title>
       <author><first>Anna</first><last>Laskina</last></author>
-      <author><first>Eric</first><last>Gaussier</last></author>
+      <author id="eric-gaussier"><first>Eric</first><last>Gaussier</last></author>
       <author><first>Gaelle</first><last>Calvary</last></author>
       <pages>85–93</pages>
       <url hash="284f0287">2024.bucc-1.9</url>
@@ -115,7 +115,7 @@
       <author><first>Marc</first><last>Kupietz</last></author>
       <author><first>Piotr</first><last>Banski</last></author>
       <author><first>Nils</first><last>Diewald</last></author>
-      <author><first>Beata</first><last>Trawinski</last></author>
+      <author id="beata-trawinski"><first>Beata</first><last>Trawinski</last></author>
       <author><first>Andreas</first><last>Witt</last></author>
       <pages>94–103</pages>
       <url hash="a899e788">2024.bucc-1.10</url>
diff --git a/data/xml/2024.c3nlp.xml b/data/xml/2024.c3nlp.xml
index eb4c419019..8ae118f57f 100644
--- a/data/xml/2024.c3nlp.xml
+++ b/data/xml/2024.c3nlp.xml
@@ -67,7 +67,7 @@
       <author><first>Geyu</first><last>Lin</last><affiliation>Institute of Infocomm Research, A*STAR</affiliation></author>
       <author><first>Zhengyuan</first><last>Liu</last><affiliation>I2R</affiliation></author>
       <author><first>Chengwei</first><last>Wei</last></author>
-      <author><first>Nancy</first><last>Chen</last></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last></author>
       <pages>42-47</pages>
       <abstract>Large language models (LLMs) have rapidly evolved as the foundation of various natural language processing (NLP) applications. Despite their wide use cases, their understanding of culturally-related concepts and reasoning remains limited. Meantime, there is a significant need to enhance these models’ cultural reasoning capabilities, especially concerning underrepresented regions. This paper introduces a novel pipeline for extracting high-quality, culturally-related instruction tuning datasets from vast unstructured corpora. We utilize a self-instruction generation pipeline to identify cultural concepts and trigger instruction. By integrating with a general-purpose instruction tuning dataset, our model demonstrates enhanced capabilities in recognizing and understanding regional cultural nuances, thereby enhancing its reasoning capabilities. We conduct experiments across three regions: Singapore, the Philippines, and the United States, achieving performance improvement of up to 6%. Our research opens new avenues for extracting cultural instruction tuning sets directly from unstructured data, setting a precedent for future innovations in the field.</abstract>
       <url hash="19d05c55">2024.c3nlp-1.4</url>
@@ -115,7 +115,7 @@
       <author><first>Ibrahim</first><last>Ahmad</last><affiliation>Northeastern University</affiliation></author>
       <author><first>Shiran</first><last>Dudy</last><affiliation>Northeastern University</affiliation></author>
       <author><first>Resmi</first><last>Ramachandranpillai</last><affiliation>Institute for Experiential AI and Linköping University</affiliation></author>
-      <author><first>Kenneth</first><last>Church</last><affiliation>Northeastern University</affiliation></author>
+      <author id="kenneth-church"><first>Kenneth</first><last>Church</last><affiliation>Northeastern University</affiliation></author>
       <pages>98-106</pages>
       <abstract>Large Language Models (LLMs), such as ChatGPT, are widely used to generate content for various purposes and audiences. However, these models may not reflect the cultural and emotional diversity of their users, especially for low-resource languages. In this paper, we investigate how ChatGPT represents Hausa’s culture and emotions. We compare responses generated by ChatGPT with those provided by native Hausa speakers on 37 culturally relevant questions. We conducted experiments using emotion analysis. We also used two similarity metrics to measure the alignment between human and ChatGPT responses. We also collect human participants ratings and feedback on ChatGPT responses. Our results show that ChatGPT has some level of similarity to human responses, but also exhibits some gaps and biases in its knowledge and awareness of Hausa culture and emotions. We discuss the implications and limitations of our methodology and analysis and suggest ways to improve the performance and evaluation of LLMs for low-resource languages.</abstract>
       <url hash="7e10b534">2024.c3nlp-1.8</url>
@@ -125,7 +125,7 @@
     <paper id="9">
       <title>Computational Language Documentation: Designing a Modular Annotation and Data Management Tool for Cross-cultural Applicability</title>
       <author><first>Alexandra</first><last>O’Neil</last><affiliation>Indiana University at Bloomington</affiliation></author>
-      <author><first>Daniel</first><last>Swanson</last><affiliation>Indiana University</affiliation></author>
+      <author id="daniel-g-swanson"><first>Daniel</first><last>Swanson</last><affiliation>Indiana University</affiliation></author>
       <author><first>Shobhana</first><last>Chelliah</last><affiliation>Indiana University at Bloomington</affiliation></author>
       <pages>107-116</pages>
       <abstract>While developing computational language documentation tools, researchers must center the role of language communities in the process by carefully reflecting on and designing tools to support the varying needs and priorities of different language communities. This paper provides an example of how cross-cultural considerations discussed in literature about language documentation, data sovereignty, and community-led documentation projects can motivate the design of a computational language documentation tool by reflecting on our design process as we work towards developing an annotation and data management tool. We identify three recurring themes for cross-cultural consideration in the literature - Linguistic Sovereignty, Cultural Specificity, and Reciprocity - and present eight essential features for an annotation and data management tool that reflect these themes.</abstract>
diff --git a/data/xml/2024.caldpseudo.xml b/data/xml/2024.caldpseudo.xml
index 111ce2f1e0..df797f470a 100644
--- a/data/xml/2024.caldpseudo.xml
+++ b/data/xml/2024.caldpseudo.xml
@@ -37,7 +37,7 @@
       <author><first>Tomohiro</first><last>Nishiyama</last><affiliation>Nara Institue of Science and Technology</affiliation></author>
       <author><first>Lisa</first><last>Raithel</last><affiliation>TU Berlin, BIFOLD / Université Paris-Saclay, CNRS, LISN</affiliation></author>
       <author><first>Roland</first><last>Roller</last><affiliation>German Research Center for Artificial Intelligence (DFKI)</affiliation></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last><affiliation>Université Paris-Saclay, CNRS, LISN</affiliation></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last><affiliation>Université Paris-Saclay, CNRS, LISN</affiliation></author>
       <author><first>Eiji</first><last>Aramaki</last><affiliation>Nara Institue of Science and Technology</affiliation></author>
       <pages>8-17</pages>
       <abstract>Since medical text cannot be shared easily due to privacy concerns, synthetic data bears much potential for natural language processing applications. In the context of social media and user-generated messages about drug intake and adverse drug effects, this work presents different methods to examine the authenticity of synthetic text. We conclude that the generated tweets are untraceable and show enough authenticity from the medical point of view to be used as a replacement for a real Twitter corpus. However, original data might still be the preferred choice as they contain much more diversity.</abstract>
@@ -58,7 +58,7 @@
     </paper>
     <paper id="4">
       <title><fixed-case>PSILENCE</fixed-case>: A Pseudonymization Tool for International Law</title>
-      <author><first>Luis Adrián</first><last>Cabrera-Diego</last><affiliation>Jus Mundi, France</affiliation></author>
+      <author id="luis-adrian-cabrera-diego"><first>Luis Adrián</first><last>Cabrera-Diego</last><affiliation>Jus Mundi, France</affiliation></author>
       <author><first>Akshita</first><last>Gheewala</last><affiliation>Jus Mundi, France</affiliation></author>
       <pages>25-36</pages>
       <abstract>Since the announcement of the GDPR, the pseudonymization of legal documents has become a high-priority task in many legal organizations. This means that for making public a document, it is necessary to redact the identity of certain entities, such as witnesses. In this work, we present the first results obtained by PSILENCE, a pseudonymization tool created for redacting semi-automatically international arbitration documents in English. PSILENCE has been built using a Named Entity Recognition (NER) system, along with a Coreference Resolution system. These systems allow us to find the people that we need to redact in a clustered way, but also to propose the same pseudonym throughout one document. This last aspect makes it easier to read and comprehend a redacted legal document. Different experiments were done on four different datasets, one of which was legal, and the results are promising, reaching a Macro F-score of up to 0.72 on the legal dataset.</abstract>
@@ -83,8 +83,8 @@
     <paper id="6">
       <title>Extending Off-the-shelf <fixed-case>NER</fixed-case> Systems to Personal Information Detection in Dialogues with a Virtual Agent: Findings from a Real-Life Use Case</title>
       <author><first>Mario</first><last>Mina</last><affiliation>Barcelona Supercomputing Center</affiliation></author>
-      <author><first>Carlos</first><last>Rodríguez</last><affiliation>Barcelona Supercomputing Center</affiliation></author>
-      <author><first>Aitor</first><last>Gonzalez-Agirre</last><affiliation>Barcelona Supercomputing Center</affiliation></author>
+      <author id="carlos-rodriguez-penagos"><first>Carlos</first><last>Rodríguez</last><affiliation>Barcelona Supercomputing Center</affiliation></author>
+      <author id="aitor-gonzalez-agirre"><first>Aitor</first><last>Gonzalez-Agirre</last><affiliation>Barcelona Supercomputing Center</affiliation></author>
       <author><first>Marta</first><last>Villegas</last><affiliation>Barcelona Supercomputing Center</affiliation></author>
       <pages>44-53</pages>
       <abstract>We present the findings and results of our pseudonymisation system, which has been developed for a real-life use-case involving users and an informative chatbot in the context of the COVID-19 pandemic. Message exchanges between the two involve the former group providing information about themselves and their residential area, which could easily allow for their re-identification. We create a modular pipeline to detect PIIs and perform basic deidentification such that the data can be stored while mitigating any privacy concerns. The use-case presents several challenging aspects, the most difficult of which is the logistic challenge of not being able to directly view or access the data due to the very privacy issues we aim to resolve. Nevertheless, our system achieves a high recall of 0.99, correctly identifying almost all instances of personal data. However, this comes at the expense of precision, which only reaches 0.64. We describe the sensitive information identification in detail, explaining the design principles behind our decisions. We additionally highlight the particular challenges we’ve encountered.</abstract>
@@ -109,9 +109,9 @@
       <title>Data Anonymization for Privacy-Preserving Large Language Model Fine-Tuning on Call Transcripts</title>
       <author><first>Shayna</first><last>Gardiner</last><affiliation>Dialpad Canada Inc.</affiliation></author>
       <author><first>Tania</first><last>Habib</last><affiliation>Dialpad Canada Inc.</affiliation></author>
-      <author><first>Kevin</first><last>Humphreys</last><affiliation>Dialpad Canada Inc.</affiliation></author>
+      <author id="kevin-humphreys"><first>Kevin</first><last>Humphreys</last><affiliation>Dialpad Canada Inc.</affiliation></author>
       <author><first>Masha</first><last>Azizi</last><affiliation>Dialpad Canada Inc.</affiliation></author>
-      <author><first>Frederic</first><last>Mailhot</last><affiliation>Dialpad Canada Inc. / Dialpad Inc.</affiliation></author>
+      <author id="frederic-mailhot"><first>Frederic</first><last>Mailhot</last><affiliation>Dialpad Canada Inc. / Dialpad Inc.</affiliation></author>
       <author><first>Anne</first><last>Paling</last><affiliation>Dialpad Canada Inc.</affiliation></author>
       <author><first>Preston</first><last>Thomas</last><affiliation>Dialpad Canada Inc. / Dialpad Inc.</affiliation></author>
       <author><first>Nathan</first><last>Zhang</last><affiliation>Dialpad Canada Inc.</affiliation></author>
diff --git a/data/xml/2024.case.xml b/data/xml/2024.case.xml
index 7d260a74fc..7369e71b58 100644
--- a/data/xml/2024.case.xml
+++ b/data/xml/2024.case.xml
@@ -3,8 +3,8 @@
   <volume id="1" ingest-date="2024-03-04" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 7th Workshop on Challenges and Applications of Automated Extraction of Socio-political Events from Text (CASE 2024)</booktitle>
-      <editor><first>Ali</first><last>Hürriyetoğlu</last></editor>
-      <editor><first>Hristo</first><last>Tanev</last></editor>
+      <editor id="ali-hurriyetoglu"><first>Ali</first><last>Hürriyetoğlu</last></editor>
+      <editor id="hristo-tanev"><first>Hristo</first><last>Tanev</last></editor>
       <editor><first>Surendrabikram</first><last>Thapa</last></editor>
       <editor><first>Gökçe</first><last>Uludoğan</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -23,7 +23,7 @@
       <title>The Future of Web Data Mining: Insights from Multimodal and Code-based Extraction Methods</title>
       <author><first>Evan</first><last>Fellman</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Jacob</first><last>Tyo</last><affiliation>Carnegie Mellon University</affiliation></author>
-      <author><first>Zachary</first><last>Lipton</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="zachary-c-lipton"><first>Zachary</first><last>Lipton</last><affiliation>Carnegie Mellon University</affiliation></author>
       <pages>1-5</pages>
       <abstract>The extraction of structured data from websites is critical for numerous Artificial Intelligence applications, but modern web design increasingly stores information visually in images rather than in text. This shift calls into question the optimal technique, as language-only models fail without textual cues while new multimodal models like GPT-4 promise image understanding abilities. We conduct the first rigorous comparison between text-based and vision-based models for extracting event metadata harvested from comic convention websites. Surprisingly, our results between GPT-4 Vision and GPT-4 Text uncover a significant accuracy advantage for vision-based methods in an applies-to-apples setting, indicating that vision models may be outpacing language-alone techniques in the task of information extraction from websites. We release our dataset and provide a qualitative analysis to guide further research in multi-modal models for web information extraction.</abstract>
       <url hash="bdb15e0c">2024.case-1.1</url>
@@ -72,7 +72,7 @@
       <author><first>Helene</first><last>Olsen</last><affiliation>University of Oslo</affiliation></author>
       <author><first>Étienne</first><last>Simon</last><affiliation>LTG, UiO</affiliation></author>
       <author><first>Erik</first><last>Velldal</last><affiliation>University of Oslo</affiliation></author>
-      <author><first>Lilja</first><last>Øvrelid</last><affiliation>University of Oslo</affiliation></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last><affiliation>University of Oslo</affiliation></author>
       <pages>40-53</pages>
       <abstract>There is a large and growing body of literature on datasets created to facilitate the study of socio-political events of conflict and unrest. However, the datasets, and the approaches taken to create them, vary a lot depending on the type of research they are intended to support. For example, while scholars from natural language processing (NLP) tend to focus on annotating specific spans of text indicating various components of an event, scholars from the disciplines of political science and conflict studies tend to focus on creating databases that code an abstract but structured representation of the event, less tied to a specific source text.The survey presented in this paper aims to map out the current landscape of available event datasets within the domain of social and political conflict and unrest – both from the NLP and political science communities – offering a unified view of the work done across different disciplines.</abstract>
       <url hash="12548161">2024.case-1.5</url>
@@ -125,7 +125,7 @@
     <paper id="10">
       <title><fixed-case>HAM</fixed-case>i<fixed-case>S</fixed-case>o<fixed-case>N</fixed-case>-Generative at <fixed-case>C</fixed-case>limate<fixed-case>A</fixed-case>ctivism 2024: Stance Detection using generative large language models</title>
       <author><first>Jesus M.</first><last>Fraile-Hernandez</last><affiliation>NLP &amp; IR Group, UNED</affiliation></author>
-      <author><first>Anselmo</first><last>Peñas</last><affiliation>NLP &amp; IR Group, UNED</affiliation></author>
+      <author id="anselmo-penas"><first>Anselmo</first><last>Peñas</last><affiliation>NLP &amp; IR Group, UNED</affiliation></author>
       <pages>79-84</pages>
       <abstract>CASE in EACL 2024 proposes the shared task on Hate Speech and Stance Detection during Climate Activism. In our participation in the stance detection task, we have tested different approaches using LLMs for this classification task. We have tested a generative model using the classical seq2seq structure. Subsequently, we have considerably improved the results by replacing the last layer of these LLMs with a classifier layer. We have also studied how the performance is affected by the amount of data used in training. For this purpose, a partition of the dataset has been used and external data from posture detection tasks has been added.</abstract>
       <url hash="c2c88bff">2024.case-1.10</url>
@@ -191,7 +191,7 @@
       <author><first>Raquel</first><last>Rodriguez-Garcia</last><affiliation>NLP &amp; IR Group, UNED</affiliation></author>
       <author><first>Julio</first><last>Reyes Montesinos</last><affiliation>NLP &amp; IR Group, UNED</affiliation></author>
       <author><first>Jesus M.</first><last>Fraile-Hernandez</last><affiliation>NLP &amp; IR Group, UNED</affiliation></author>
-      <author><first>Anselmo</first><last>Peñas</last><affiliation>NLP &amp; IR Group, UNED</affiliation></author>
+      <author id="anselmo-penas"><first>Anselmo</first><last>Peñas</last><affiliation>NLP &amp; IR Group, UNED</affiliation></author>
       <pages>118-124</pages>
       <abstract>CASE @ EACL 2024 proposes a shared task on Stance and Hate Event Detection for Climate Activism discourse. For our participation in the stance detection task, we propose an ensemble of different approaches: a transformer-based model (RoBERTa), a generative Large Language Model (Llama 2), and a Multi-Task Learning model. Our main goal is twofold: to study the effect of augmenting the training data with external datasets, and to examine the contribution of several, diverse models through a voting ensemble. The results show that if we take the best configuration during training for each of the three models (RoBERTa, Llama 2 and MTL), the ensemble would have ranked first with the highest F1 on the leaderboard for the stance detection subtask.</abstract>
       <url hash="34b85316">2024.case-1.16</url>
@@ -204,7 +204,7 @@
       <author><first>Amrita</first><last>Ganguly</last><affiliation>George Mason University</affiliation></author>
       <author><first>Al Nahian</first><last>Bin Emran</last><affiliation>George Mason University</affiliation></author>
       <author><first>Sadiya Sayara Chowdhury</first><last>Puspo</last><affiliation>George Mason University</affiliation></author>
-      <author><first>Md Nishat</first><last>Raihan</last><affiliation>George Mason University</affiliation></author>
+      <author id="nishat-raihan"><first>Md Nishat</first><last>Raihan</last><affiliation>George Mason University</affiliation></author>
       <author><first>Dhiman</first><last>Goswami</last><affiliation>George Mason University</affiliation></author>
       <author><first>Marcos</first><last>Zampieri</last><affiliation>George Mason University</affiliation></author>
       <pages>125-131</pages>
@@ -220,7 +220,7 @@
       <author><first>Amrita</first><last>Ganguly</last><affiliation>George Mason University</affiliation></author>
       <author><first>Sadiya Sayara Chowdhury</first><last>Puspo</last><affiliation>George Mason University</affiliation></author>
       <author><first>Dhiman</first><last>Goswami</last><affiliation>George Mason University</affiliation></author>
-      <author><first>Md Nishat</first><last>Raihan</last><affiliation>George Mason University</affiliation></author>
+      <author id="nishat-raihan"><first>Md Nishat</first><last>Raihan</last><affiliation>George Mason University</affiliation></author>
       <pages>132-138</pages>
       <abstract>The task of identifying public opinions on social media, particularly regarding climate activism and the detection of hate events, has emerged as a critical area of research in our rapidly changing world. With a growing number of people voicing either to support or oppose to climate-related issues - understanding these diverse viewpoints has become increasingly vital. Our team, MasonPerplexity, participates in a significant research initiative focused on this subject. We extensively test various models and methods, discovering that our most effective results are achieved through ensemble modeling, enhanced by data augmentation techniques like back-translation. In the specific components of this research task, our team achieved notable positions, ranking 5th, 1st, and 6th in the respective sub-tasks, thereby illustrating the effectiveness of our approach in this important field of study.</abstract>
       <url hash="1ef9fe9b">2024.case-1.18</url>
@@ -254,7 +254,7 @@
     <paper id="21">
       <title><fixed-case>HAM</fixed-case>i<fixed-case>S</fixed-case>o<fixed-case>N</fixed-case>-baselines at <fixed-case>C</fixed-case>limate<fixed-case>A</fixed-case>ctivism 2024: A Study on the Use of External Data for Hate Speech and Stance Detection</title>
       <author><first>Julio</first><last>Reyes Montesinos</last><affiliation>NLP &amp; IR Group, UNED</affiliation></author>
-      <author><first>Alvaro</first><last>Rodrigo</last><affiliation>NLP and IR group at UNED</affiliation></author>
+      <author id="alvaro-rodrigo"><first>Alvaro</first><last>Rodrigo</last><affiliation>NLP and IR group at UNED</affiliation></author>
       <pages>156-160</pages>
       <abstract>The CASE@EACL2024 Shared Task addresses Climate Activism online through three subtasks that focus on hate speech detection (Subtask A), hate speech target classification (Subtask B), and stance detection (Subtask C) respectively.Our contribution examines the effect of fine-tuning on external data for each of these subtasks. For the two subtasks that focus on hate speech, we augment the training data with the OLID dataset, whereas for the stance subtask we harness the SemEval-2016 Stance dataset. We fine-tune RoBERTa and DeBERTa models for each of the subtasks, with and without external training data.For the hate speech detection and stance detection subtasks, our RoBERTa models came up third and first on the leaderboard, respectively. While the use of external data was not relevant on those tasks, we found that it greatly improved the performance on the hate speech target categorization.</abstract>
       <url hash="8c88ca45">2024.case-1.21</url>
@@ -352,7 +352,7 @@
       <author><first>Burak</first><last>Işık</last><affiliation>Boğaziçi University</affiliation></author>
       <author><first>Yasemin</first><last>Korkmaz</last><affiliation>Hrant Dink Foundation</affiliation></author>
       <author><first>Didar</first><last>Akar</last><affiliation>Bogazici University</affiliation></author>
-      <author><first>Arzucan</first><last>Özgür</last><affiliation>Bogazici University</affiliation></author>
+      <author id="arzucan-ozgur"><first>Arzucan</first><last>Özgür</last><affiliation>Bogazici University</affiliation></author>
       <pages>205-214</pages>
       <abstract>The use of hate speech targeting ethnicity, nationalities, religious identities, and specific groups has been on the rise in the news media. However, most existing automatic hate speech detection models focus on identifying hate speech, often neglecting the target group-specific language that is common in news articles. To address this problem, we first compile a hate speech dataset, TurkishHatePrintCorpus, derived from Turkish news articles and annotate it specifically for the language related to the targeted group. We then introduce the HateTargetBERT model, which integrates the target-centric linguistic features extracted in this study into the BERT model, and demonstrate its effectiveness in detecting hate speech while allowing the model’s classification decision to be explained. We have made the dataset and source code publicly available at url{https://github.com/boun-tabi/HateTargetBERT-TR}.</abstract>
       <url hash="e304fe62">2024.case-1.29</url>
@@ -396,7 +396,7 @@
       <author><first>Inanc</first><last>Arin</last><affiliation>Sabanci University</affiliation></author>
       <author><first>Elif</first><last>Erol</last><affiliation>Hrant Dink Foundation</affiliation></author>
       <author><first>Berrin</first><last>Yanikoglu</last><affiliation>Sabanci University</affiliation></author>
-      <author><first>Arzucan</first><last>Özgür</last><affiliation>Bogazici University</affiliation></author>
+      <author id="arzucan-ozgur"><first>Arzucan</first><last>Özgür</last><affiliation>Bogazici University</affiliation></author>
       <pages>229-233</pages>
       <abstract>This paper offers an overview of Hate Speech Detection in Turkish and Arabic Tweets (HSD-2Lang) Shared Task at CASE workshop to be held jointly with EACL 2024. The task was divided into two subtasks: Subtask A, targeting hate speech detection in various Turkish contexts, and Subtask B, addressing hate speech detection in Arabic with limited data. The shared task attracted significant attention with 33 teams that registered and 10 teams that participated in at least one task. In this paper, we provide the details of the tasks and the approaches adopted by the participant along with an analysis of the results obtained from this shared task.</abstract>
       <url hash="9a039c4b">2024.case-1.32</url>
diff --git a/data/xml/2024.cawl.xml b/data/xml/2024.cawl.xml
index 17a00f737b..90948811f8 100644
--- a/data/xml/2024.cawl.xml
+++ b/data/xml/2024.cawl.xml
@@ -4,9 +4,9 @@
     <meta>
       <booktitle>Proceedings of the Second Workshop on Computation and Written Language (CAWL) @ LREC-COLING 2024</booktitle>
       <editor><first>Kyle</first><last>Gorman</last></editor>
-      <editor><first>Emily</first><last>Prud'hommeaux</last></editor>
+      <editor id="emily-prudhommeaux"><first>Emily</first><last>Prud'hommeaux</last></editor>
       <editor><first>Brian</first><last>Roark</last></editor>
-      <editor><first>Richard</first><last>Sproat</last></editor>
+      <editor id="richard-sproat"><first>Richard</first><last>Sproat</last></editor>
       <publisher>ELRA and ICCL</publisher>
       <address>Torino, Italia</address>
       <month>May</month>
@@ -31,9 +31,9 @@
     <paper id="2">
       <title>A Joint Approach for Automatic Analysis of Reading and Writing Errors</title>
       <author><first>Wieke</first><last>Harmsen</last></author>
-      <author><first>Catia</first><last>Cucchiarini</last></author>
+      <author id="catia-cucchiarini"><first>Catia</first><last>Cucchiarini</last></author>
       <author><first>Roeland</first><last>van Hout</last></author>
-      <author><first>Helmer</first><last>Strik</last></author>
+      <author id="helmer-strik"><first>Helmer</first><last>Strik</last></author>
       <pages>8–17</pages>
       <abstract>Analyzing the errors that children make on their ways to becoming fluent readers and writers can provide invaluable scientific insights into the processes that underlie literacy acquisition. To this end, we present in this paper an extension of an earlier developed spelling error detection and classification algorithm for Dutch, so that reading errors can also be automatically detected from their phonetic transcription. The strength of this algorithm lies in its ability to detect errors at Phoneme-Corresponding Unit (PCU) level, where a PCU is a sequence of letters corresponding to one phoneme. We validated this algorithm and found good agreement between manual and automatic reading error classifications. We also used the algorithm to analyze written words by second graders and phonetic transcriptions of read words by first graders. With respect to the writing data, we found that the PCUs ‘ei’, ‘eu’, ‘g’, ‘ij’ and ‘ch’ were most frequently written incorrectly, for the reading data, these were the PCUs ‘v’, ‘ui’, ‘ng’, ‘a’ and ‘g’. This study presents a first attempt at developing a joint method for detecting reading and writing errors. In future research this algorithm can be used to analyze corpora containing reading and writing data from the same children.</abstract>
       <url hash="d818197a">2024.cawl-1.2</url>
@@ -43,7 +43,7 @@
     <paper id="3">
       <title>Tool for Constructing a Large-Scale Corpus of Code Comments and Other Source Code Annotations</title>
       <author><first>Luna</first><last>Peck</last></author>
-      <author><first>Susan</first><last>Brown</last></author>
+      <author id="susan-windisch-brown"><first>Susan</first><last>Brown</last></author>
       <pages>18–22</pages>
       <abstract>The sublanguage of source code annotations—explanatory natural language writing that accompanies programming source code—is little-studied in linguistics. To facilitate research into this domain, we have developed a program prototype that can extract code comments and changelogs (i.e. commit messages) from public, open-source code repositories, with automatic tokenization and part-of-speech tagging on the extracted text. The program can also automatically detect and discard “commented-out” source code in data from Python repositories, to prevent it from polluting the corpus, demonstrating that such sanitization is likely feasible for other programming languages as well. With the current tool, we have produced a 6-million word corpus of English-language comments extracted from three different programming languages: Python, C, and C++.</abstract>
       <url hash="f22cebf9">2024.cawl-1.3</url>
diff --git a/data/xml/2024.ccl.xml b/data/xml/2024.ccl.xml
index 23d4254e42..037fdcc488 100644
--- a/data/xml/2024.ccl.xml
+++ b/data/xml/2024.ccl.xml
@@ -924,7 +924,7 @@
       <author><first>Zhiyu</first><last>Yang</last></author>
       <author><first>Shuo</first><last>Wang</last></author>
       <author><first>Yukun</first><last>Yan</last></author>
-      <author><first>Pengyuan</first><last>Liu</last></author>
+      <author id="pengyuan-liu"><first>Pengyuan</first><last>Liu</last></author>
       <author><first>Dong</first><last>Yu</last></author>
       <pages>973–985</pages>
       <abstract>“Free-form table question answering is a challenging task since tables contain structured contentscompared to plain texts, which requires high-level reasoning abilities to effectively identify cellsthat are relevant to the question and produce a correct and faithful answer based on their relations.Large language models (LLMs) have exhibited remarkable reasoning capabilities in numerousNLP applications. However, in some specific tasks, specially-trained small models can still out-perform LLMs. Furthermore, small models require extremely less computation costs comparedto LLMs. To leverage the strengths of both types of models, we propose a Relevant-Cell-basedKnowledge Distillation with inference-time Teacher Guidance (RCKD-TG) method. This ap-proach aims to combine small free-form table question answering models’ abilities to learn fromhuman annotations and large language models’ abilities to effectively reason from table contents,via applying Relevant-Cell-based rationales distilled from LLMs to small models’ training andinference stages. Our experiments demonstrate the superiority of our method over vanilla smallmodels in correctness, faithfulness, adequacy and fluency, also over general LLMs in adheringto the style of human annotations. We achieve state-of-the-art performance on FeTaQA, a rep-resentative free-form table question answering benchmark. Our result of a 41.3 BLEU scoredemonstrates the feasibility of effectively using small models’ task-specific abilities and LLMs’reasoning capabilities at the same time. Additionally, our method exhibits high computation ef-ficiency and data efficiency. Compared to strong baselines, we achieve better performance withsignificantly less training data.”</abstract>
@@ -1253,8 +1253,8 @@
       <author><first>Bing</first><last>Xu</last></author>
       <author><first>Conghui</first><last>Zhu</last></author>
       <author><first>Hailong</first><last>Cao</last></author>
-      <author><first>Muyun</first><last>Yang</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="muyun-yang"><first>Muyun</first><last>Yang</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <pages>1310–1319</pages>
       <abstract>Recently, there has been a trend of evaluating the Large Language Model (LLM) quality in the flavor of LLM-as-a-Judge, namely leveraging another LLM to evaluate the current output quality. However, existing judges are proven to be biased, namely they would favor answers which present better superficial quality (such as verbosity, fluency) while ignoring the instruction following ability. In this work, we propose systematic research about the bias of LLM-as-a-Judge. Specifically, for closed-source judge models, we apply calibration to mitigate the significance of superficial quality, both on probability level and prompt level. For open-source judge models, we propose to mitigate the bias by contrastive training, with curated negative samples that deviate from instruction but present better superficial quality. We apply our methods on the bias evaluation benchmark, and experiment results show our methods mitigate the bias by a large margin while maintaining a satisfactory evaluation accuracy.</abstract>
       <url hash="c647d529">2024.ccl-1.101</url>
@@ -1844,7 +1844,7 @@
       <author><first>Liu</first><last>Nuowei</last></author>
       <author><first>Chen</first><last>Xinhao</last></author>
       <author><first>Ren</first><last>Yupei</last></author>
-      <author><first>Lan</first><last>Man</last></author>
+      <author id="man-lan"><first>Lan</first><last>Man</last></author>
       <author><first>Bai</first><last>Xiaopeng</last></author>
       <author><first>Wu</first><last>Yuanbin</last></author>
       <author><first>Mao</first><last>Shaoguang</last></author>
@@ -1920,7 +1920,7 @@
       <author><first>Zhuang</first><last>Xinlin</last></author>
       <author><first>Shen</first><last>Xinshu</last></author>
       <author><first>Wu</first><last>Hongyi</last></author>
-      <author><first>Lan</first><last>Man</last></author>
+      <author id="man-lan"><first>Lan</first><last>Man</last></author>
       <author><first>Bai</first><last>Xiaopeng</last></author>
       <author><first>Wu</first><last>Yuanbin</last></author>
       <author><first>Zhou</first><last>Aimin</last></author>
diff --git a/data/xml/2024.cl.xml b/data/xml/2024.cl.xml
index 665b0576ba..0fd3100106 100644
--- a/data/xml/2024.cl.xml
+++ b/data/xml/2024.cl.xml
@@ -13,7 +13,7 @@
     </meta>
     <paper id="1">
       <title>My Big, Fat 50-Year Journey</title>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <doi>10.1162/coli_a_00499</doi>
       <abstract>My most heartfelt thanks to ACL for this tremendous honor. I’m completely thrilled. I cannot tell you how surprised I was when I got Iryna’s email. It is amazing that my first ACL conference since 2019 in Florence includes this award. What a wonderful way to be back with all of my friends and family here at ACL. I’m going to tell you about my big fat 50-year journey. What have I been doing for the last 50 years? Well, finding meaning, quite literally in words. Or in other words, exploring how computational lexical semantics can support natural language understanding. This is going to be quick. Hold onto your hats, here we go.</abstract>
       <pages>1–24</pages>
@@ -65,7 +65,7 @@
       <author><first>Swarnadeep</first><last>Bhar</last></author>
       <author><first>Soumadeep</first><last>Saha</last></author>
       <author><first>Utpal</first><last>Garain</last></author>
-      <author><first>Nicholas</first><last>Asher</last></author>
+      <author id="nicholas-asher"><first>Nicholas</first><last>Asher</last></author>
       <doi>10.1162/coli_a_00493</doi>
       <abstract>Transformer-based language models have been shown to be highly effective for several NLP tasks. In this article, we consider three transformer models, BERT, RoBERTa, and XLNet, in both small and large versions, and investigate how faithful their representations are with respect to the semantic content of texts. We formalize a notion of semantic faithfulness, in which the semantic content of a text should causally figure in a model’s inferences in question answering. We then test this notion by observing a model’s behavior on answering questions about a story after performing two novel semantic interventions—deletion intervention and negation intervention. While transformer models achieve high performance on standard question answering tasks, we show that they fail to be semantically faithful once we perform these interventions for a significant number of cases (∼ 50% for deletion intervention, and ∼ 20% drop in accuracy for negation intervention). We then propose an intervention-based training regime that can mitigate the undesirable effects for deletion intervention by a significant margin (from ∼ 50% to ∼ 6%). We analyze the inner-workings of the models to better understand the effectiveness of intervention-based training for deletion intervention. But we show that this training does not attenuate other aspects of semantic unfaithfulness such as the models’ inability to deal with negation intervention or to capture the predicate–argument structure of texts. We also test InstructGPT, via prompting, for its ability to handle the two interventions and to capture predicate–argument structure. While InstructGPT models do achieve very high performance on predicate–argument structure task, they fail to respond adequately to our deletion and negation interventions.</abstract>
       <pages>119–155</pages>
@@ -76,7 +76,7 @@
     <paper id="6">
       <title>On the Role of Morphological Information for Contextual Lemmatization</title>
       <author><first>Olia</first><last>Toporkov</last></author>
-      <author><first>Rodrigo</first><last>Agerri</last></author>
+      <author id="rodrigo-agerri"><first>Rodrigo</first><last>Agerri</last></author>
       <doi>10.1162/coli_a_00497</doi>
       <abstract>Lemmatization is a natural language processing (NLP) task that consists of producing, from a given inflected word, its canonical form or lemma. Lemmatization is one of the basic tasks that facilitate downstream NLP applications, and is of particular importance for high-inflected languages. Given that the process to obtain a lemma from an inflected word can be explained by looking at its morphosyntactic category, including fine-grained morphosyntactic information to train contextual lemmatizers has become common practice, without considering whether that is the optimum in terms of downstream performance. In order to address this issue, in this article we empirically investigate the role of morphological information to develop contextual lemmatizers in six languages within a varied spectrum of morphological complexity: Basque, Turkish, Russian, Czech, Spanish, and English. Furthermore, and unlike the vast majority of previous work, we also evaluate lemmatizers in out-of-domain settings, which constitutes, after all, their most common application use. The results of our study are rather surprising. It turns out that providing lemmatizers with fine-grained morphological features during training is not that beneficial, not even for agglutinative languages. In fact, modern contextual word representations seem to implicitly encode enough morphological information to obtain competitive contextual lemmatizers without seeing any explicit morphological signal. Moreover, our experiments suggest that the best lemmatizers out-of-domain are those using simple UPOS tags or those trained without morphology and, lastly, that current evaluation practices for lemmatization are not adequate to clearly discriminate between models.</abstract>
       <pages>157–191</pages>
@@ -87,7 +87,7 @@
       <title>Stance Detection with Explanations</title>
       <author><first>Rudra Ranajee</first><last>Saha</last></author>
       <author><first>Laks V. S.</first><last>Lakshmanan</last></author>
-      <author><first>Raymond T.</first><last>Ng</last></author>
+      <author id="raymond-ng"><first>Raymond T.</first><last>Ng</last></author>
       <doi>10.1162/coli_a_00501</doi>
       <abstract>Identification of stance has recently gained a lot of attention with the extreme growth of fake news and filter bubbles. Over the last decade, many feature-based and deep-learning approaches have been proposed to solve stance detection. However, almost none of the existing works focus on providing a meaningful explanation for their prediction. In this work, we study stance detection with an emphasis on generating explanations for the predicted stance by capturing the pivotal argumentative structure embedded in a document. We propose to build a stance tree that utilizes rhetorical parsing to construct an evidence tree and to use Dempster Shafer Theory to aggregate the evidence. Human studies show that our unsupervised technique of generating stance explanations outperforms the SOTA extractive summarization method in terms of informativeness, non-redundancy, coverage, and overall quality. Furthermore, experiments show that our explanation-based stance prediction excels or matches the performance of the SOTA model on various benchmark datasets.</abstract>
       <pages>193–235</pages>
@@ -122,7 +122,7 @@
     <paper id="10">
       <title><fixed-case>P</fixed-case>olysemy—<fixed-case>E</fixed-case>vidence from Linguistics, Behavioral Science, and Contextualized Language Models</title>
       <author><first>Janosch</first><last>Haber</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <doi>10.1162/coli_a_00500</doi>
       <abstract>Polysemy is the type of lexical ambiguity where a word has multiple distinct but related interpretations. In the past decade, it has been the subject of a great many studies across multiple disciplines including linguistics, psychology, neuroscience, and computational linguistics, which have made it increasingly clear that the complexity of polysemy precludes simple, universal answers, especially concerning the representation and processing of polysemous words. But fuelled by the growing availability of large, crowdsourced datasets providing substantial empirical evidence; improved behavioral methodology; and the development of contextualized language models capable of encoding the fine-grained meaning of a word within a given context, the literature on polysemy recently has developed more complex theoretical analyses. In this survey we discuss these recent contributions to the investigation of polysemy against the backdrop of a long legacy of research across multiple decades and disciplines. Our aim is to bring together different perspectives to achieve a more complete picture of the heterogeneity and complexity of the phenomenon of polysemy. Specifically, we highlight evidence supporting a range of hybrid models of the mental processing of polysemes. These hybrid models combine elements from different previous theoretical approaches to explain patterns and idiosyncrasies in the processing of polysemous that the best known models so far have failed to account for. Our literature review finds that (i) traditional analyses of polysemy can be limited in their generalizability by loose definitions and selective materials; (ii) linguistic tests provide useful evidence on individual cases, but fail to capture the full range of factors involved in the processing of polysemous sense extensions; and (iii) recent behavioral (psycho) linguistics studies, large-scale annotation efforts, and investigations leveraging contextualized language models provide accumulating evidence suggesting that polysemous sense similarity covers a wide spectrum between identity of sense and homonymy-like unrelatedness of meaning. We hope that the interdisciplinary account of polysemy provided in this survey inspires further fundamental research on the nature of polysemy and better equips applied research to deal with the complexity surrounding the phenomenon, for example, by enabling the development of benchmarks and testing paradigms for large language models informed by a greater portion of the rich evidence on the phenomenon currently available.</abstract>
       <pages>351–417</pages>
@@ -169,7 +169,7 @@
       <author><first>Wenxi</first><last>Li</last></author>
       <author><first>Yutong</first><last>Zhang</last></author>
       <author><first>Guy</first><last>Emerson</last></author>
-      <author><first>Weiwei</first><last>Sun</last></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last></author>
       <doi>10.1162/coli_a_00504</doi>
       <abstract>Divergence of languages observed at the surface level is a major challenge encountered by multilingual data representation, especially when typologically distant languages are involved. Drawing inspiration from a formalist Chomskyan perspective towards language universals, Universal Grammar (UG), this article uses deductively pre-defined universals to analyze a multilingually heterogeneous phenomenon, event nominals. In this way, deeper universality of event nominals beneath their huge divergence in different languages is uncovered, which empowers us to break barriers between languages and thus extend insights from some synthetic languages to a non-inflectional language, Mandarin Chinese. Our empirical investigation also demonstrates this UG-inspired schema is effective: With its assistance, the inter-annotator agreement (IAA) for identifying event nominals in Mandarin grows from 88.02% to 94.99%, and automatic detection of event-reading nominalizations on the newly-established data achieves an accuracy of 94.76% and an F1 score of 91.3%, which significantly surpass those achieved on the pre-existing resource by 9.8% and 5.2%, respectively. Our systematic analysis also sheds light on nominal semantic role labeling. By providing a clear definition and classification on arguments of event nominal, the IAA of this task significantly increases from 90.46% to 98.04%.</abstract>
       <pages>535–561</pages>
@@ -235,7 +235,7 @@
       <title>Common Flaws in Running Human Evaluation Experiments in <fixed-case>NLP</fixed-case></title>
       <author><first>Craig</first><last>Thomson</last></author>
       <author><first>Ehud</first><last>Reiter</last></author>
-      <author><first>Anya</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anya</first><last>Belz</last></author>
       <doi>10.1162/coli_a_00508</doi>
       <abstract>While conducting a coordinated set of repeat runs of human evaluation experiments in NLP, we discovered flaws in every single experiment we selected for inclusion via a systematic process. In this squib, we describe the types of flaws we discovered, which include coding errors (e.g., loading the wrong system outputs to evaluate), failure to follow standard scientific practice (e.g., ad hoc exclusion of participants and responses), and mistakes in reported numerical results (e.g., reported numbers not matching experimental data). If these problems are widespread, it would have worrying implications for the rigor of NLP evaluation experiments as currently conducted. We discuss what researchers can do to reduce the occurrence of such flaws, including pre-registration, better code development practices, increased testing and piloting, and post-publication addressing of errors.</abstract>
       <pages>795–805</pages>
@@ -244,7 +244,7 @@
     </paper>
     <paper id="10">
       <title>The Pitfalls of Defining Hallucination</title>
-      <author><first>Kees</first><last>van Deemter</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
       <doi>10.1162/coli_a_00509</doi>
       <abstract>Despite impressive advances in Natural Language Generation (NLG) and Large Language Models (LLMs), researchers are still unclear about important aspects of NLG evaluation. To substantiate this claim, I examine current classifications of hallucination and omission in data-text NLG, and I propose a logic-based synthesis of these classfications. I conclude by highlighting some remaining limitations of all current thinking about hallucination and by discussing implications for LLMs.</abstract>
       <pages>807–816</pages>
@@ -301,7 +301,7 @@
       <title>Relation Extraction in Underexplored Biomedical Domains: A Diversity-optimized Sampling and Synthetic Data Generation Approach</title>
       <author><first>Maxime</first><last>Delmas</last></author>
       <author><first>Magdalena</first><last>Wysocka</last></author>
-      <author><first>André</first><last>Freitas</last></author>
+      <author id="andre-freitas"><first>André</first><last>Freitas</last></author>
       <doi>10.1162/coli_a_00520</doi>
       <abstract>The sparsity of labeled data is an obstacle to the development of Relation Extraction (RE) models and the completion of databases in various biomedical areas. While being of high interest in drug-discovery, the literature on natural products, reporting the identification of potential bioactive compounds from organisms, is a concrete example of such an overlooked topic. To mark the start of this new task, we created the first curated evaluation dataset and extracted literature items from the LOTUS database to build training sets. To this end, we developed a new sampler, inspired by diversity metrics in ecology, named Greedy Maximum Entropy sampler (https://github.com/idiap/gme-sampler). The strategic optimization of both balance and diversity of the selected items in the evaluation set is important given the resource-intensive nature of manual curation. After quantifying the noise in the training set, in the form of discrepancies between the text of input abstracts and the expected output labels, we explored different strategies accordingly. Framing the task as an end-to-end Relation Extraction, we evaluated the performance of standard fine-tuning (BioGPT, GPT-2, and Seq2rel) and few-shot learning with open Large Language Models (LLMs) (LLaMA 7B-65B). In addition to their evaluation in few-shot settings, we explore the potential of open LLMs as synthetic data generators and propose a new workflow for this purpose. All evaluated models exhibited substantial improvements when fine-tuned on synthetic abstracts rather than the original noisy data. We provide our best performing (F1-score = 59.0) BioGPT-Large model for end-to-end RE of natural products relationships along with all the training and evaluation datasets. See more details at https://github.com/idiap/abroad-re.</abstract>
       <pages>953–1000</pages>
@@ -392,7 +392,7 @@
       <title>Language Learning, Representation, and Processing in Humans and Machines: Introduction to the Special Issue</title>
       <author><first>Marianna</first><last>Apidianaki</last></author>
       <author><first>Abdellah</first><last>Fourtassi</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <doi>10.1162/coli_e_00539</doi>
       <abstract>Large Language Models (LLMs) and humans acquire knowledge about language without direct supervision. LLMs do so by means of specific training objectives, while humans rely on sensory experience and social interaction. This parallelism has created a feeling in NLP and cognitive science that a systematic understanding of how LLMs acquire and use the encoded knowledge could provide useful insights for studying human cognition. Conversely, methods and findings from the field of cognitive science have occasionally inspired language model development. Yet, the differences in the way that language is processed by machines and humans—in terms of learning mechanisms, amounts of data used, grounding and access to different modalities—make a direct translation of insights challenging. The aim of this edited volume has been to create a forum of exchange and debate along this line of research, inviting contributions that further elucidate similarities and differences between humans and LLMs.</abstract>
       <pages>1201–1210</pages>
@@ -404,7 +404,7 @@
       <author><first>Emily</first><last>Allaway</last></author>
       <author><first>Chandra</first><last>Bhagavatula</last></author>
       <author><first>Jena D.</first><last>Hwang</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <author><first>Sarah-Jane</first><last>Leslie</last></author>
       <doi>10.1162/coli_a_00530</doi>
       <abstract>Large language models (LLMs) have garnered a great deal of attention for their exceptional generative performance on commonsense and reasoning tasks. In this work, we investigate LLMs’ capabilities for generalization using a particularly challenging type of statement: generics. Generics express generalizations (e.g., birds can fly) but do so without explicit quantification. They are notable because they generalize over their instantiations (e.g., sparrows can fly) yet hold true even in the presence of exceptions (e.g., penguins do not). For humans, these generic generalizations play a fundamental role in cognition, concept acquisition, and intuitive reasoning. We investigate how LLMs respond to and reason about generics. To this end, we first propose a framework grounded in pragmatics to automatically generate both exceptions and instantiations – collectively exemplars. We make use of focus—a pragmatic phenomenon that highlights meaning-bearing elements in a sentence—to capture the full range of interpretations of generics across different contexts of use. This allows us to derive precise logical definitions for exemplars and operationalize them to automatically generate exemplars from LLMs. Using our system, we generate a dataset of ∼370kexemplars across ∼17k generics and conduct a human validation of a sample of the generated data. We use our final generated dataset to investigate how LLMs reason about generics. Humans have a documented tendency to conflate universally quantified statements (e.g., all birds can fly) with generics. Therefore, we probe whether LLMs exhibit similar overgeneralization behavior in terms of quantification and in property inheritance. We find that LLMs do show evidence of overgeneralization, although they sometimes struggle to reason about exceptions. Furthermore, we find that LLMs may exhibit similar non-logical behavior to humans when considering property inheritance from generics.</abstract>
diff --git a/data/xml/2024.cl4health.xml b/data/xml/2024.cl4health.xml
index 8e2f070b29..38243d42f5 100644
--- a/data/xml/2024.cl4health.xml
+++ b/data/xml/2024.cl4health.xml
@@ -22,8 +22,8 @@
     <paper id="1">
       <title>Improving Sign Language Production in the Healthcare Domain Using <fixed-case>UMLS</fixed-case> and Multi-task Learning</title>
       <author><first>Jonathan David</first><last>Mutal</last></author>
-      <author><first>Raphael</first><last>Rubino</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="raphael-rubino"><first>Raphael</first><last>Rubino</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Bastien</first><last>David</last></author>
       <author><first>Johanna</first><last>Gerlach</last></author>
       <author><first>Irene</first><last>Strasly</last></author>
@@ -39,7 +39,7 @@
       <author><first>Rebecka Maria</first><last>Norman</last></author>
       <author><first>Elma</first><last>Jelin</last></author>
       <author><first>Øyvind Andresen</first><last>Bjertnæs</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <author><first>Erik</first><last>Velldal</last></author>
       <pages>8–19</pages>
       <abstract>Sentiment analysis is an important tool for aggregating patient voices, in order to provide targeted improvements in healthcare services. A prerequisite for this is the availability of in-domain data annotated for sentiment. This article documents an effort to add sentiment annotations to free-text comments in patient surveys collected by the Norwegian Institute of Public Health (NIPH). However, annotation can be a time-consuming and resource-intensive process, particularly when it requires domain expertise. We therefore also evaluate a possible alternative to human annotation, using large language models (LLMs) as annotators. We perform an extensive evaluation of the approach for two openly available pretrained LLMs for Norwegian, experimenting with different configurations of prompts and in-context learning, comparing their performance to human annotators. We find that even for zero-shot runs, models perform well above the baseline for binary sentiment, but still cannot compete with human annotators on the full dataset.</abstract>
@@ -58,7 +58,7 @@
     <paper id="4">
       <title>Annotating Emotions in Acquired Brain Injury Patients’ Narratives</title>
       <author><first>Salomé</first><last>Klein</last></author>
-      <author><first>Amalia</first><last>Todirascu</last></author>
+      <author id="amalia-todirascu"><first>Amalia</first><last>Todirascu</last></author>
       <author><first>Hélène</first><last>Vassiliadou</last></author>
       <author><first>Marie</first><last>Kuppelin</last></author>
       <author><first>Joffrey</first><last>Becart</last></author>
@@ -145,7 +145,7 @@
       <author><first>Anuja</first><last>Tayal</last></author>
       <author><first>Barbara</first><last>Di Eugenio</last></author>
       <author><first>Devika</first><last>Salunke</last></author>
-      <author><first>Andrew D.</first><last>Boyd</last></author>
+      <author id="andrew-boyd"><first>Andrew D.</first><last>Boyd</last></author>
       <author><first>Carolyn A.</first><last>Dickens</last></author>
       <author><first>Eulalia P.</first><last>Abril</last></author>
       <author><first>Olga</first><last>Garcia-Bedoya</last></author>
@@ -254,7 +254,7 @@
       <title><fixed-case>C</fixed-case>lini<fixed-case>R</fixed-case>es: Publicly Available Mapping of Clinical Lexical Resources</title>
       <author><first>Elena</first><last>Zotova</last></author>
       <author><first>Montse</first><last>Cuadros</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <pages>163–172</pages>
       <abstract>This paper presents a human-readable resource for mapping identifiers from various clinical knowledge bases. This resource is a version of UMLS Metathesaurus enriched with WordNet 3.0 and 3.1 synsets, Wikidata items with their clinical identifiers, SNOMED CT to ICD-10 mapping and Spanish ICD-10 codes description. The main goal of the presented resource is to provide semantic interoperability across the clinical concepts from various knowledge bases and facilitate its integration into mapping tools. As a side effect, the mapping enriches already annotated medical corpora for entity recognition or entity linking tasks with new labels. We experiment with entity linking task, using a corpus annotated both manually and with the mapping method and demonstrate that a semi-automatic way of annotation may be used to create new labels. The resource is available in English and Spanish, although all languages of UMLS may be extracted. The new lexical resource is publicly available.</abstract>
       <url hash="4adca1af">2024.cl4health-1.20</url>
@@ -291,7 +291,7 @@
       <author><first>Jesus</first><last>Lovon-Melgarejo</last></author>
       <author><first>Thouria</first><last>Ben-Haddi</last></author>
       <author><first>Jules</first><last>Di Scala</last></author>
-      <author><first>Jose G.</first><last>Moreno</last></author>
+      <author id="jose-g-moreno"><first>Jose G.</first><last>Moreno</last></author>
       <author><first>Lynda</first><last>Tamine</last></author>
       <pages>189–196</pages>
       <abstract>The lack of standardized evaluation benchmarks in the medical domain for text inputs can be a barrier to widely adopting and leveraging the potential of natural language models for health-related downstream tasks. This paper revisited an openly available MIMIC-IV benchmark for electronic health records (EHRs) to address this issue. First, we integrate the MIMIC-IV data within the Hugging Face datasets library to allow an easy share and use of this collection. Second, we investigate the application of templates to convert EHR tabular data to text. Experiments using fine-tuned and zero-shot LLMs on the mortality of patients task show that fine-tuned text-based models are competitive against robust tabular classifiers. In contrast, zero-shot LLMs struggle to leverage EHR representations. This study underlines the potential of text-based approaches in the medical field and highlights areas for further improvement.</abstract>
@@ -301,7 +301,7 @@
     <paper id="24">
       <title>Unraveling Clinical Insights: A Lightweight and Interpretable Approach for Multimodal and Multilingual Knowledge Integration</title>
       <author><first>Kanimozhi</first><last>Uma</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>197–203</pages>
       <abstract>In recent years, the analysis of clinical texts has evolved significantly, driven by the emergence of language models like BERT such as PubMedBERT, and ClinicalBERT, which have been tailored for the (bio)medical domain that rely on extensive archives of medical documents. While they boast high accuracy, their lack of interpretability and language transfer limitations restrict their clinical utility. To address this, we propose a new, lightweight graph-based embedding method designed specifically for radiology reports. This approach considers the report’s structure and content, connecting medical terms through the multilingual SNOMED Clinical Terms knowledge base. The resulting graph embedding reveals intricate relationships among clinical terms, enhancing both clinician comprehension and clinical accuracy without the need for large pre-training datasets. Demonstrating the versatility of our method, we apply this embedding to two tasks: disease and image classification in X-ray reports. In disease classification, our model competes effectively with BERT-based approaches, yet it is significantly smaller and requires less training data. Additionally, in image classification, we illustrate the efficacy of the graph embedding by leveraging cross-modal knowledge transfer, highlighting its applicability across diverse languages.</abstract>
       <url hash="708a9e53">2024.cl4health-1.24</url>
@@ -341,7 +341,7 @@
       <title>Enhancing Consumer Health Question Reformulation: Chain-of-Thought Prompting Integrating Focus, Type, and User Knowledge Level</title>
       <author><first>Jooyeon</first><last>Lee</last></author>
       <author><first>Luan Huy</first><last>Pham</last></author>
-      <author><first>Özlem</first><last>Uzuner</last></author>
+      <author id="ozlem-uzuner"><first>Özlem</first><last>Uzuner</last></author>
       <pages>220–228</pages>
       <abstract>In this paper, we explore consumer health question (CHQ) reformulation, focusing on enhancing the quality of reformation of questions without considering interest shifts. Our study introduces the use of the NIH GARD website as a gold standard dataset for this specific task, emphasizing its relevance and applicability. Additionally, we developed other datasets consisting of related questions scraped from Google, Bing, and Yahoo. We augmented, evaluated and analyzed the various datasets, demonstrating that the reformulation task closely resembles the question entailment generation task. Our approach, which integrates the Focus and Type of consumer inquiries, represents a significant advancement in the field of question reformulation. We provide a comprehensive analysis of different methodologies, offering insights into the development of more effective and user-centric AI systems for consumer health support.</abstract>
       <url hash="6c73da3b">2024.cl4health-1.27</url>
@@ -382,7 +382,7 @@
       <title>Biomedical Entity Linking for <fixed-case>D</fixed-case>utch: Fine-tuning a Self-alignment <fixed-case>BERT</fixed-case> Model on an Automatically Generated <fixed-case>W</fixed-case>ikipedia Corpus</title>
       <author><first>Fons</first><last>Hartendorp</last></author>
       <author><first>Tom</first><last>Seinen</last></author>
-      <author><first>Erik</first><last>van Mulligen</last></author>
+      <author id="erik-van-mulligen"><first>Erik</first><last>van Mulligen</last></author>
       <author><first>Suzan</first><last>Verberne</last></author>
       <pages>253–263</pages>
       <abstract>Biomedical entity linking, a main component in automatic information extraction from health-related texts, plays a pivotal role in connecting textual entities (such as diseases, drugs and body parts mentioned by patients) to their corresponding concepts in a structured biomedical knowledge base. The task remains challenging despite recent developments in natural language processing. This report presents the first evaluated biomedical entity linking model for the Dutch language. We use MedRoBERTa.nl as basemodel and perform second-phase pretraining through self-alignment on a Dutch biomedical ontology extracted from the UMLS and Dutch SNOMED. We derive a corpus from Wikipedia of ontology-linked Dutch biomedical entities in context and fine-tune our model on this dataset. We evaluate our model on the Dutch portion of the Mantra GSC-corpus and achieve 54.7% classification accuracy and 69.8% 1-distance accuracy. We then perform a case study on a collection of unlabeled, patient-support forum data and show that our model is hampered by the limited quality of the preceding entity recognition step. Manual evaluation of small sample indicates that of the correctly extracted entities, around 65% is linked to the correct concept in the ontology. Our results indicate that biomedical entity linking in a language other than English remains challenging, but our Dutch model can be used to for high-level analysis of patient-generated text.</abstract>
diff --git a/data/xml/2024.clasp.xml b/data/xml/2024.clasp.xml
index ad50e24e42..4c997cf59d 100644
--- a/data/xml/2024.clasp.xml
+++ b/data/xml/2024.clasp.xml
@@ -41,7 +41,7 @@
     <paper id="3">
       <title>How Does an Adjective Sound Like? Exploring Audio Phrase Composition with Textual Embeddings</title>
       <author><first>Saba</first><last>Nazir</last></author>
-      <author><first>Mehrnoosh</first><last>Sadrzadeh</last></author>
+      <author id="mehrnoosh-sadrzadeh"><first>Mehrnoosh</first><last>Sadrzadeh</last></author>
       <pages>13–18</pages>
       <abstract>We learn matrix representations for the fre- quent sound-relevant adjectives of English and compose them with vector representations of their nouns. The matrices are learnt jointly from audio and textual data, via linear regres- sion and tensor skipgram. They are assessed using an adjective similarity benchmark and also a novel adjective-noun phrase similarity dataset, applied to two tasks: semantic similar- ity and audio similarity. Joint learning via Ten- sor Skipgram (TSG) outperforms audio-only models, matrix composition outperforms addi- tion and non compositional phrase vectors.</abstract>
       <url hash="e2728db2">2024.clasp-1.3</url>
@@ -83,7 +83,7 @@
     <paper id="7">
       <title>Fifty shapes of <fixed-case>BL</fixed-case>i<fixed-case>MP</fixed-case>: syntactic learning curves in language models are not uniform, but sometimes unruly</title>
       <author><first>Bastian</first><last>Bunzeck</last></author>
-      <author><first>Sina</first><last>Zarrieß</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
       <pages>39–55</pages>
       <abstract>Syntactic learning curves in LMs are usually reported as relatively stable and power law-shaped. By analyzing the learning curves of different LMs on various syntactic phenomena using both small self-trained llama models and larger pre-trained pythia models, we show that while many phenomena do follow typical power law curves, others exhibit S-shaped, U-shaped, or erratic patterns. Certain syntactic paradigms remain challenging even for large models, resulting in persistent preference for ungrammatical sentences. Most phenomena show similar curves for their paradigms, but the existence of diverging patterns and oscillations indicates that average curves mask important developments, underscoring the need for more detailed analyses of individual learning trajectories.</abstract>
       <url hash="c8f43866">2024.clasp-1.7</url>
diff --git a/data/xml/2024.clib.xml b/data/xml/2024.clib.xml
index ec3ec4e743..e639bdca54 100644
--- a/data/xml/2024.clib.xml
+++ b/data/xml/2024.clib.xml
@@ -18,7 +18,7 @@
     <paper id="1">
       <title>A Cross-model Study on Learning <fixed-case>R</fixed-case>omanian Parts of Speech with Transformer Models</title>
       <author><first>Radu</first><last>Ion</last></author>
-      <author><first>Verginica</first><last>Barbu Mititelu</last></author>
+      <author id="verginica-barbu-mititelu"><first>Verginica</first><last>Barbu Mititelu</last></author>
       <author><first>Vasile</first><last>Păiş</last></author>
       <author><first>Elena</first><last>Irimia</last></author>
       <author><first>Valentin</first><last>Badea</last></author>
@@ -60,7 +60,7 @@
       <author><first>Milica Ikonić</first><last>Nešić</last></author>
       <author><first>Saša</first><last>Petalinkar</last></author>
       <author><first>Mihailo</first><last>Škorić</last></author>
-      <author><first>Ranka</first><last>Stanković</last></author>
+      <author id="ranka-stankovic"><first>Ranka</first><last>Stanković</last></author>
       <author><first>Biljana</first><last>Rujević</last></author>
       <pages>58–70</pages>
       <abstract>This study presents the Sentiment Analysis of the Serbian old novels from the 1840-1920 period, employing the Mistral Large Language Model (LLM) to pioneer zero and few-shot learning techniques. The main approach innovates by devising research prompts that include guidance text for zero-shot classification and examples for few-shot learning, enabling the LLM to classify sentiments into positive, negative, or objective categories. This methodology aims to streamline sentiment analysis by limiting responses, thereby enhancing classification precision. Python, along with the Hugging Face Transformers and LangChain libraries, serves as our technological backbone, facilitating the creation and refinement of research prompts tailored for sentence-level sentiment analysis. The results of sentiment analysis in both scenarios, zero-shot and few-shot, have indicated that the zero-shot approach outperforms, achieving an accuracy of 68.2%.</abstract>
@@ -94,7 +94,7 @@
     </paper>
     <paper id="8">
       <title>Function Multiword Expressions Annotated with Discourse Relations in the <fixed-case>R</fixed-case>omanian Reference Treebank</title>
-      <author><first>Verginica</first><last>Barbu Mititelu</last></author>
+      <author id="verginica-barbu-mititelu"><first>Verginica</first><last>Barbu Mititelu</last></author>
       <author><first>Tudor</first><last>Voicu</last></author>
       <pages>90–97</pages>
       <abstract>For the Romanian Reference Treebank, a general language corpus, covering several genres and annotated according to the principles of Universal Dependencies, we present here the annotation of some function words, namely multiword conjunctions, with discourse relations from the Penn Discourse Treebank version 3.0 inventory of such relations. The annotation process was manual, with two annotators for each occurrence of the conjunctions. Lexical-semantic relations of the types synonymy, polysemy can be established between the senses of such conjunctions. The discourse relations are added to the CoNLL-U file in which the treebank is represented.</abstract>
@@ -127,7 +127,7 @@
       <author><first>Agute</first><last>Klints</last></author>
       <author><first>Ilze</first><last>Lokmane</last></author>
       <author><first>Madara</first><last>Stāde</last></author>
-      <author><first>Pēteris</first><last>Paikens</last></author>
+      <author id="peteris-paikens"><first>Pēteris</first><last>Paikens</last></author>
       <pages>113–118</pages>
       <abstract>The electronic dictionary Tēzaurs.lv contains more than 400,000 entries from which 73,000 entries are multi-word expressions (MWEs). Over the past two years, there has been an ongoing division of these MWEs into subgroups (proper names, multi-word terms, taxa, phraseological units, collocations). The article describes the classification of MWEs, focusing on phraseological units (approximately 7,250 entries), as well as on borderline cases of phraseological unit types (phrasemes and idioms) and different MWE groups in general. The division of phraseological units depends on semantic divisibility and figurativeness. In a phraseme, at least one of the constituents retains its literal sense, whereas the meaning of an idiom is not dependent on the literal sense of any of its constituents. As a result, 65919 entries of MWE have been manually classified, and now this information of MWE type is available for the users of the electronic dictionary Tēzaurs.lv.</abstract>
       <url hash="95b30fd2">2024.clib-1.11</url>
diff --git a/data/xml/2024.clicit.xml b/data/xml/2024.clicit.xml
index 6f88fa1303..831479fc6a 100644
--- a/data/xml/2024.clicit.xml
+++ b/data/xml/2024.clicit.xml
@@ -5,8 +5,8 @@
       <booktitle>Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024)</booktitle>
       <editor><first>Felice</first><last>Dell'Orletta</last></editor>
       <editor><first>Alessandro</first><last>Lenci</last></editor>
-      <editor><first>Simonetta</first><last>Montemagni</last></editor>
-      <editor><first>Rachele</first><last>Sprugnoli</last></editor>
+      <editor id="simonetta-montemagni"><first>Simonetta</first><last>Montemagni</last></editor>
+      <editor id="rachele-sprugnoli"><first>Rachele</first><last>Sprugnoli</last></editor>
       <publisher>CEUR Workshop Proceedings</publisher>
       <address>Pisa, Italy</address>
       <month>December</month>
@@ -64,7 +64,7 @@
       <title>Data Augmentation for Low-Resource <fixed-case>I</fixed-case>talian <fixed-case>NLP</fixed-case>: Enhancing Semantic Processing with <fixed-case>DRS</fixed-case></title>
       <author><first>Muhammad Saad</first><last>Amin</last><affiliation>The University of Turin</affiliation></author>
       <author><first>Luca</first><last>Anselma</last><affiliation>Università degli Studi di Torino</affiliation></author>
-      <author><first>Alessandro</first><last>Mazzei</last><affiliation>Università degli Studi di Torino</affiliation></author>
+      <author id="alessandro-mazzei"><first>Alessandro</first><last>Mazzei</last><affiliation>Università degli Studi di Torino</affiliation></author>
       <pages>29-38</pages>
       <abstract>Discourse Representation Structure (DRS), a formal meaning representation, has shown promising results in semantic parsing and natural language generation tasks for high-resource languages like English. This paper investigates enhancing the application of DRS to low-resource Italian Natural Language Processing (NLP), in both semantic parsing (Text-to-DRS) and natural language generation (DRS-to-Text). To address the scarcity of annotated corpora for Italian DRS, we propose a novel data augmentation technique that involves the use of external linguistic resources including: (i) WordNet for common nouns, adjectives, adverbs, and verbs; (ii) LLM-generated named entities for proper nouns; and (iii) rule-based algorithms fortense augmentation. This approach not only increases the quantity of training data but also introduces linguistic diversity, which is crucial for improving model performance and robustness. Using this augmented dataset, we developed neural semantic parser and generator models that demonstrated enhanced generalization ability compared to models trained on non-augmented data. We evaluated the effect of semantic data augmentation using two state-of-the-art transformer-based neural sequence-to-sequence models, i.e., byT5 and IT5. Our implementation shows promising results for Italian semanticprocessing. Data augmentation significantly increased the performance of semantic parsing from 76.10 to 90.56 (+14.46%) F1-SMATCH score and generation with 37.79 to 57.48 (+19.69%) BLEU, 30.83 to 40.95 (+10.12%) METEOR, 81.66 to 90.97 (+9.31%) COMET, 54.84 to 70.88 (+16.04%) chrF, and 88.86 to 92.97 (+4.11%) BERT scores. These results demonstrate the effectiveness of our novel augmentation approach in enhancing semantic processing capabilities for low-resource languages like Italian.</abstract>
       <url hash="a075e8be">2024.clicit-1.5</url>
@@ -87,7 +87,7 @@
       <author><first>Pierpaolo</first><last>Basile</last><affiliation>Department of Computer Science, University of Bari Aldo Moro</affiliation></author>
       <author><first>Marco</first><last>Degemmis</last><affiliation>University of Bari</affiliation></author>
       <author><first>Marco</first><last>Polignano</last><affiliation>University of Bari</affiliation></author>
-      <author><first>Giovanni</first><last>Semeraro</last><affiliation>University of Bari “Aldo Moro”</affiliation></author>
+      <author id="giovanni-semeraro"><first>Giovanni</first><last>Semeraro</last><affiliation>University of Bari “Aldo Moro”</affiliation></author>
       <author><first>Lucia</first><last>Siciliani</last><affiliation>University of Bari Aldo Moro</affiliation></author>
       <author><first>Vincenzo</first><last>Tamburrano</last><affiliation>University of Bari Aldo Moro</affiliation></author>
       <author><first>Fabiana</first><last>Battista</last><affiliation>University of Bari Aldo Moro</affiliation></author>
@@ -224,7 +224,7 @@
       <title><fixed-case>B</fixed-case>a<fixed-case>BIE</fixed-case>s: A Benchmark for the Linguistic Evaluation of <fixed-case>I</fixed-case>talian Baby Language Models</title>
       <author><first>Luca</first><last>Capone</last><affiliation>CoLing Lab, Dipartimento di Filologia, Letteratura e Linguistica, Università di Pisa, Via Santa Maria, Pisa, 56126, Italy</affiliation></author>
       <author><first>Alice</first><last>Suozzi</last><affiliation>Ca Foscari University of Venice</affiliation></author>
-      <author><first>Gianluca</first><last>Lebani</last><affiliation>Ca’ Foscari University of Venice</affiliation></author>
+      <author id="gianluca-e-lebani"><first>Gianluca</first><last>Lebani</last><affiliation>Ca’ Foscari University of Venice</affiliation></author>
       <author><first>Alessandro</first><last>Lenci</last><affiliation>University of Pisa</affiliation></author>
       <pages>157-170</pages>
       <abstract>The possibility of comparing the linguistic competence of Language Models (LMs) to that of children has gained growing attention lately, raising the need for effective tools for evaluating both the former and the latter. To this purpose, we developed a resource for the linguistic evaluation of BabyLMs, which are LMs trained on datasets that comparable to the linguistic stimulus received by children. This resource adapts four standardized tests for the evaluation of linguistic skills of Italian-speaking children (BVL, TROG-2, TCGB-2 and Peabody). To verify the effectiveness of our benchmark, we administered it to Minerva, a LLM pretrained from scratch on Italian. Our results indicate that Minerva struggles to master certain linguistic aspects, achieving an age-equivalent score of 4 years, and that the type of task administered affects the model’s performance.</abstract>
@@ -385,8 +385,8 @@
     </paper>
     <paper id="35">
       <title>Scalable Query Understanding for <fixed-case>E</fixed-case>-commerce: An Ensemble Architecture with Graph-based Optimization</title>
-      <author><first>Giuseppe</first><last>Di Fabbrizio</last><affiliation>Harvard Business School</affiliation></author>
-      <author><first>Evgeny</first><last>Stepanov</last><affiliation>VUI, Inc.</affiliation></author>
+      <author id="giuseppe-di-fabbrizio"><first>Giuseppe</first><last>Di Fabbrizio</last><affiliation>Harvard Business School</affiliation></author>
+      <author id="evgeny-stepanov"><first>Evgeny</first><last>Stepanov</last><affiliation>VUI, Inc.</affiliation></author>
       <author><first>Ludovico</first><last>Frizziero</last><affiliation>VUI, Inc.</affiliation></author>
       <author><first>Filippo</first><last>Tessaro</last><affiliation>VUI, Inc.</affiliation></author>
       <pages>289-296</pages>
@@ -413,7 +413,7 @@
     </paper>
     <paper id="38">
       <title><fixed-case>I</fixed-case>t<fixed-case>G</fixed-case>ra<fixed-case>S</fixed-case>yll: A Computational Analysis of Graphical Syllabification and Stress Assignment in <fixed-case>I</fixed-case>talian</title>
-      <author><first>Liviu</first><last>Dinu</last><affiliation>University of Bucharest</affiliation></author>
+      <author id="liviu-p-dinu"><first>Liviu</first><last>Dinu</last><affiliation>University of Bucharest</affiliation></author>
       <author><first>Ioan-Bogdan</first><last>Iordache</last><affiliation>University of Bucharest</affiliation></author>
       <author><first>Simona</first><last>Georgescu</last><affiliation>University of Bucharest</affiliation></author>
       <author><first>Alina Maria</first><last>Cristea</last><affiliation>University of Bucharest</affiliation></author>
@@ -498,7 +498,7 @@
       <author><first>Dennis</first><last>Fucci</last><affiliation>Fondazione Bruno Kessler, University of Trento</affiliation></author>
       <author><first>Beatrice</first><last>Savoldi</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
       <author><first>Marco</first><last>Gaido</last><affiliation>Fondazione Bruno Kessler, University of Trento</affiliation></author>
-      <author><first>Matteo</first><last>Negri</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
       <author><first>Mauro</first><last>Cettolo</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
       <author><first>Luisa</first><last>Bentivogli</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
       <pages>373-381</pages>
@@ -554,7 +554,7 @@
       <title>The Vulnerable Identities Recognition Corpus (<fixed-case>VIRC</fixed-case>) for Hate Speech Analysis</title>
       <author><first>Ibai</first><last>Guillén-Pacho</last><affiliation>Universidad Politécnica de Madrid</affiliation></author>
       <author><first>Arianna</first><last>Longo</last><affiliation>University of Turin, Italy</affiliation></author>
-      <author><first>Marco Antonio</first><last>Stranisci</last><affiliation>University of Turin</affiliation></author>
+      <author id="marco-antonio-stranisci"><first>Marco Antonio</first><last>Stranisci</last><affiliation>University of Turin</affiliation></author>
       <author><first>Viviana</first><last>Patti</last><affiliation>University of Turin, Dipartimento di Informatica</affiliation></author>
       <author><first>Carlos</first><last>Badenes-Olmedo</last><affiliation>Universidad Politecnica de Madrid</affiliation></author>
       <pages>417-424</pages>
@@ -578,8 +578,8 @@
       <title>La Non Canonica L’hai Studiata? Exploring <fixed-case>LLM</fixed-case>s and Sentence Canonicity in <fixed-case>I</fixed-case>talian</title>
       <author><first>Claudiu Daniel</first><last>Hromei</last></author>
       <author><first>Danilo</first><last>Croce</last></author>
-      <author><first>Rodolfo</first><last>Delmonte</last></author>
-      <author><first>Roberto</first><last>Basili</last></author>
+      <author id="rodolfo-delmonte"><first>Rodolfo</first><last>Delmonte</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
       <pages>431-439</pages>
       <abstract>This paper investigates the ability of Large Language Models (LLMs) to differentiate between canonical and non-canonical sentences in Italian, employing advanced neural architectures like LLaMA and its adaptations. Canonical sentences adhere to the standard Subject-Verb-Object (SVO) structure. We hypothesize that recent generative LLMs are influenced heavily by the English language, where non-canonical structures are very rare. Using the in-context learning technique, we probe these models and further fine-tune them for this specific task. Initial results indicate that these models continue to struggle with this task even after fine-tuning. Additionally, we introduce a new dataset comprising several hundred sentences from the poetry domain, which presents significant challenges for the canonical structure task.</abstract>
       <url hash="90804877">2024.clicit-1.52</url>
@@ -620,7 +620,7 @@
       <author><first>Tiziano</first><last>Labruna</last><affiliation>Fondazione Bruno Kessler and Free University of Bozen-Bolzano</affiliation></author>
       <author><first>Sofia</first><last>Brenna</last><affiliation>FBK, Unibz</affiliation></author>
       <author><first>Giovanni</first><last>Bonetta</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
-      <author><first>Bernardo</first><last>Magnini</last><affiliation>FBK</affiliation></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last><affiliation>FBK</affiliation></author>
       <pages>470-477</pages>
       <abstract>Despite the impressive capabilities of recent Large Language Models (LLMs) to generate human-like text, their ability to produce contextually appropriate content for specific communicative situations is still a matter of debate. This issue is particularly crucial when LLMs are employed as assistants to help solve tasks or achieve goals within a given conversational domain. In such scenarios, the assistant is expected to access specific knowledge (e.g., a database of restaurants, a calendar of appointments) that is not directly accessible to the user and must be consistently utilised to accomplish the task.In this paper, we conduct experiments to evaluate the trustworthiness of automatic assistants in task-oriented dialogues. Our findings indicate that state-of-the-art open-source LLMs still face significant challenges in maintaining logical consistency with a knowledge base of facts, highlighting the need for further advancements in this area.</abstract>
       <url hash="fe98c358">2024.clicit-1.56</url>
@@ -711,7 +711,7 @@
     </paper>
     <paper id="64">
       <title>Understanding High-complexity Technical Documents with State-of-Art Models</title>
-      <author><first>Bernardo</first><last>Magnini</last><affiliation>FBK</affiliation></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last><affiliation>FBK</affiliation></author>
       <author><first>Roberto</first><last>Zanoli</last><affiliation>Fondazione Bruno Kessler -FBK</affiliation></author>
       <pages>540-547</pages>
       <abstract>Technical documents, particularly those in civil engineering, contain crucial information that supports critical decision-making in construction, transportation and infrastructure projects. Large language models (LLMs) offer a promising solution for automating the extraction and comprehension of technical documents, potentially transforming our interaction with technical information. However, LLMs may encounter significant challenges when processing technical documents due to their complex structure, specialized terminology and reliance on graphical and visual elements. Moreover, LLMs are known to sometimes produce unexpected or incorrect analyses, a phenomenon referred to as hallucination.This study explores the potential of state-of-the-art LLMs, specifically GPT-4omni, to automate the comprehension of technical documents. The evaluation was performed on two types of PDF documents. The first type is selectable text PDFs, which are extractable and editable, focusing on civil engineering documents from the Italian state railways. The second type is scanned OCR PDFs, where text is derived from scanning or OCR, specifically focusing on the design of an outdoor swimming pool. These documents include textual and visual elements such as tables, figures and photos. Our findings suggest that GPT-4omni has a high potential for real-world use, although it may still be susceptible to producing misleading information.</abstract>
@@ -732,7 +732,7 @@
       <title>Fine-grained Sexism Detection in <fixed-case>I</fixed-case>talian Newspapers</title>
       <author><first>Federica</first><last>Manzi</last><affiliation>Ludwig-Maximilians-University Munich (LMU University)</affiliation></author>
       <author><first>Leon</first><last>Weber-Genzel</last><affiliation>Ludwig-Maximilians-University Munich (LMU University)</affiliation></author>
-      <author><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-University Munich (LMU University)</affiliation></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-University Munich (LMU University)</affiliation></author>
       <pages>556-583</pages>
       <abstract>In recent years, tasks revolving around hate speech detection have experienced a growing interest in the field of Natural Language Processing. Two main trends stand out in the context of sexism recognition: the focus on overt forms of sexism such as misogyny on social media and tackling the problem as a text classification task. The main objective of this work is to introduce a new approach to tackle sexism recognition as a sequence labelling task, operating on the token level rather than the document one. To achieve this goal, we introduce (i) the FGSDI (Fine-Grained Sexism Detection in Italian) corpus, containing Italian newspaper articles annotated with fine-grained linguistic markers of sexism, and (ii) a two-step pipeline that sequentially performs sexism detection on the sentence level and sexism classification on the token one. Our primary findings include that (i) tackling the task of sexism recognition as a sequence labelling task is possible, however, a large amount of labelled data is needed; (ii) leveraging few-shot learning for sexism detection proves to be an effective solution in scenarios where only a limited amount of data are available; (iii) the proposed pipeline approach allows for better results compared to the baseline by doubling the overall precision and achieving a better F1-score.</abstract>
       <url hash="2857038a">2024.clicit-1.66</url>
@@ -757,7 +757,7 @@
       <author><first>Edoardo</first><last>Michielon</last><affiliation>Fastweb SpA, Milan, Italy</affiliation></author>
       <author><first>Marco</first><last>Pasqualini</last><affiliation>Fastweb SpA, Milan, Italy</affiliation></author>
       <author><first>Asia Beatrice</first><last>Uboldi</last><affiliation>Fastweb SpA, Milan, Italy</affiliation></author>
-      <author><first>Giovanni</first><last>Semeraro</last><affiliation>University of Bari “Aldo Moro”</affiliation></author>
+      <author id="giovanni-semeraro"><first>Giovanni</first><last>Semeraro</last><affiliation>University of Bari “Aldo Moro”</affiliation></author>
       <pages>600-611</pages>
       <abstract>With the rising interest in Large Language Models, deep architectures capable of solving a wide range of Natural LanguageGeneration tasks, an increasing number of open weights architectures have been developed and released online. In contrastwith older architectures, which were aimed at solving specific linguistic assignments, Large Language Models have shownoutstanding capabilities in solving several tasks at once, raising the question of whether they can truly comprehend naturallanguage. Nevertheless, evaluating this kind of capability is far from easy. One of the proposed solutions so far is usingbenchmarks that combine various types of tasks. This approach is based on the premise that achieving good performance ineach of these individual tasks can imply having developed a model capable of understanding language. However, while thisassumption is not incorrect, it is evident that it is not sufficient, and the evaluation of Large Language Models still remains anopen challenge. In this paper, we conduct a study aimed at highlighting the potential and limitations of current datasets andhow a new evaluation setting applied to language-adapted Large Language Models may provide more insight than traditionalapproaches.</abstract>
       <url hash="a6a03f24">2024.clicit-1.68</url>
@@ -774,9 +774,9 @@
     </paper>
     <paper id="70">
       <title>Exploring <fixed-case>I</fixed-case>talian Sentence Embeddings Properties through Multi-tasking</title>
-      <author><first>Vivi</first><last>Nastase</last><affiliation>University of Geneva</affiliation></author>
+      <author id="vivi-nastase"><first>Vivi</first><last>Nastase</last><affiliation>University of Geneva</affiliation></author>
       <author><first>Giuseppe</first><last>Samo</last><affiliation>University of Geneva</affiliation></author>
-      <author id="chunyang-jiang"><first>Chunyang</first><last>Jiang</last><affiliation>University of Geneva</affiliation></author>
+      <author><first>Chunyang</first><last>Jiang</last><affiliation>University of Geneva</affiliation></author>
       <author><first>Paola</first><last>Merlo</last><affiliation>University of Geneva</affiliation></author>
       <pages>620-630</pages>
       <abstract>We investigate to what degree existing LLMs encode abstract linguistic information in Italian in a multi-task setting. We exploit curated synthetic data on a large scale – several Blackbird Language Matrices (BLMs) problems in Italian – and use them to study how sentence representations built using pre-trained language models encode specific syntactic and semantic information. We use a two-level architecture to model separately a compression of the sentence embeddings into a representation that contains relevant information for a task, and a BLM task. We then investigate whether we can obtain compressed sentence representations that encode syntactic and semantic information relevant to several BLM tasks. While we expected that the sentence structure – in terms of sequence of phrases/chunks – and chunk properties could be shared across tasks, performance and error analysis show that the clues for the different tasks are encoded in different manners in the sentence embeddings, suggesting that abstract linguistic notions such as constituents or thematic roles does not seem to be present in the pretrained sentence embeddings.</abstract>
@@ -785,9 +785,9 @@
     </paper>
     <paper id="71">
       <title>Exploring Syntactic Information in Sentence Embeddings through Multilingual Subject-verb Agreement</title>
-      <author><first>Vivi</first><last>Nastase</last><affiliation>University of Geneva</affiliation></author>
+      <author id="vivi-nastase"><first>Vivi</first><last>Nastase</last><affiliation>University of Geneva</affiliation></author>
       <author><first>Giuseppe</first><last>Samo</last><affiliation>University of Geneva</affiliation></author>
-      <author id="chunyang-jiang"><first>Chunyang</first><last>Jiang</last><affiliation>University of Geneva</affiliation></author>
+      <author><first>Chunyang</first><last>Jiang</last><affiliation>University of Geneva</affiliation></author>
       <author><first>Paola</first><last>Merlo</last><affiliation>University of Geneva</affiliation></author>
       <pages>631-643</pages>
       <abstract>In this paper, our goal is to investigate to what degree multilingual pretrained language models capture cross-linguistically valid abstract linguistic representations. We take the approach of developing curated synthetic data on a large scale, with specific properties, and using them to study sentence representations built using pretrained language models. We use a new multiple-choice task and datasets, Blackbird Language Matrices (BLMs), to focus on a specific grammatical structural phenomenon – subject-verb agreement across a variety of sentence structures – in several languages. Finding a solution to this task requires a system detecting complex linguistic patterns and paradigms in text representations. Using a two-level architecture that solves the problem in two steps – detect syntactic objects and their properties in individual sentences, and find patterns across an input sequence of sentences – we show that despite having been trained on multilingual texts in a consistent manner, multilingual pretrained language models have language-specific differences, and syntactic structure is not shared, even across closely related languages.</abstract>
@@ -809,7 +809,7 @@
     <paper id="73">
       <title>Exploring Text-Embedding Retrieval Models for the <fixed-case>I</fixed-case>talian Language</title>
       <author><first>Yuri</first><last>Noviello</last><affiliation>FICLIT - University of Bologna</affiliation></author>
-      <author><first>Fabio</first><last>Tamburini</last><affiliation>FICLIT - University of Bologna</affiliation></author>
+      <author id="fabio-tamburini"><first>Fabio</first><last>Tamburini</last><affiliation>FICLIT - University of Bologna</affiliation></author>
       <pages>654-661</pages>
       <abstract>Text retrieval systems have become essential in the field of natural language processing (NLP), serving as the backbone for applications such as search engines, document indexing, and information retrieval. With the rise of generative AI, particularly Retrieval-Augmented Generation (RAG) systems, the demand for robust text retrieval models has increased. However, existing large language models (LLMs) and datasets are often insufficiently optimized for Italian, limiting their performance in Italian text retrieval tasks. This paper addresses this gap by proposing both a data collection and specialized models tailored for Italian text retrieval. Through extensive experimentation, we analyze the improvements and limitations in retrieval performance, paving the way for more effective Italian NLP applications.</abstract>
       <url hash="72414df3">2024.clicit-1.73</url>
@@ -838,7 +838,7 @@
       <author><first>Elena Sofia</first><last>Ruzzetti</last><affiliation>University of Rome Tor Vergata</affiliation></author>
       <author><first>Federico</first><last>Ranaldi</last><affiliation>University of Rome Tor Vergata</affiliation></author>
       <author><first>Leonardo</first><last>Ranaldi</last><affiliation>UniTV</affiliation></author>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last><affiliation>University of Rome Tor Vergata</affiliation></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last><affiliation>University of Rome Tor Vergata</affiliation></author>
       <pages>679-706</pages>
       <abstract>Instruction-Following Language Models (IFLMs) are the state-of-the-art for solving many downstream tasks. Given their widespread use, there is an urgent need to measure whether the sentences they generate contain toxic information or social biases. In this paper, we propose Prompt Association Test for the Italian language (ItaP-AT): a new resource for testing the presence of social bias in different domains in IFLMs. This work also aims to understand whether it is possible to make the responses of these models more fair by using context learning, using “one-shot anti-stereotypical prompts”.</abstract>
       <url hash="fce6e556">2024.clicit-1.76</url>
@@ -926,7 +926,7 @@
       <title>Unraveling the Enigma of <fixed-case>SPLIT</fixed-case> in Large-Language Models: The Unforeseen Impact of System Prompts on <fixed-case>LLM</fixed-case>s with Dissociative Identity Disorder</title>
       <author><first>Marco</first><last>Polignano</last><affiliation>University of Bari</affiliation></author>
       <author><first>Marco</first><last>De Gemmis</last><affiliation>University of Bari Aldo Moro</affiliation></author>
-      <author><first>Giovanni</first><last>Semeraro</last><affiliation>University of Bari Aldo Moro</affiliation></author>
+      <author id="giovanni-semeraro"><first>Giovanni</first><last>Semeraro</last><affiliation>University of Bari Aldo Moro</affiliation></author>
       <pages>774-780</pages>
       <abstract>Our work delves into the unexplored territory of Large-Language Models (LLMs) and their interactions with System Prompts, unveiling the previously undiscovered implications of SPLIT (System Prompt Induced Linguistic Transmutation) in commonly used state-of-the-art LLMs. Dissociative Identity Disorder, a complex and multifaceted mental health condition, is characterized by the presence of two or more distinct identities or personas within an individual, often with varying levels of awareness and control. The advent of large-language models has raised intriguing questions about the presence of such conditions in LLMs. Our research investigates the phenomenon of SPLIT, in which the System Prompt, a seemingly innocuous input, profoundly impacts the linguistic outputs of LLMs. The findings of our study reveal a striking correlation between the System Prompt and the emergence of distinct, persona-like linguistic patterns in the LLM’s responses. These patterns are not only reminiscent of the dissociative identities present in the original data but also exhibit a level of coherence and consistency that is uncommon in typical LLM outputs. As we continue to explore the capabilities of LLMs, it is imperative that we maintain a keen awareness of the potential for SPLIT and its significant implications for the development of more human-like and empathetic AI systems.</abstract>
       <url hash="06a209cb">2024.clicit-1.84</url>
@@ -938,7 +938,7 @@
       <author><first>Giulia</first><last>Pucci</last><affiliation>Independent</affiliation></author>
       <author><first>Federico</first><last>Ranaldi</last><affiliation>University of Rome Tor Vergata</affiliation></author>
       <author><first>Elena Sofia</first><last>Ruzzetti</last><affiliation>University of Rome Tor Vergata</affiliation></author>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last><affiliation>University of Rome Tor Vergata</affiliation></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last><affiliation>University of Rome Tor Vergata</affiliation></author>
       <pages>781-795</pages>
       <abstract>Previous studies have demonstrated the effectiveness of <i>reasoning methods</i> in eliciting multi-step reasoned answers from Large Language Models (LLMs) by leveraging in-context demonstrations. These methods, exemplified by Chain-of-Thought (CoT) and Program-Aided Language Models (PAL), have been shown to reason well in monolingual contexts, primarily in English. There has, however, been limited exploration of their abilities in other languages, especially in Italian.To gain a deeper understanding of the role of reasoning methods in in-context demonstrations, we propose a multidimensional analysis tailored to Italian, focusing on arithmetic and symbolic reasoning tasks. Our findings indicate that the effectiveness of reasoning methods varies significantly beyond English. Specifically, CoT, which relies on natural language demonstrations, is limited to English. Conversely, the structured nature of PAL in-context demonstrations facilitates multilingual comprehension, enabling LLMs to generate programmatic answers in Italian as well. Finally, for a more comprehensive overview, we observe that additional alignment methods do not improve downstream performances; in contrast, in some cases, they limit the abilities of the original models. This leads to significant improvements in the accuracy and quality of the generated responses.</abstract>
       <url hash="98035d9b">2024.clicit-1.85</url>
@@ -948,7 +948,7 @@
       <title>How Far Does the Sequence of Compositions Impact Multilingual Pre-Training?</title>
       <author><first>Leonardo</first><last>Ranaldi</last><affiliation>UniTV</affiliation></author>
       <author><first>Giulia</first><last>Pucci</last><affiliation>Independent</affiliation></author>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last><affiliation>University of Rome Tor Vergata</affiliation></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last><affiliation>University of Rome Tor Vergata</affiliation></author>
       <pages>796-804</pages>
       <abstract>The most efficient strategy for conducting pre-training of language models is the concatenation of contiguous sequences of text of fixed length through causal masking that estimates the probability of each token given its context.However, the role of the composition sequence pre-training technique in the models’ generalization properties has yet to be explored.In this paper, we show that operating via causal masking impacts model performance because it could include misleading information from previous text sequences during pre-training.To fill this gap, we propose intra-context causal masking where the probability of each token is conditional only on the previous in the same chunk of text, avoiding misleading information from different contexts.Hence, we demonstrate that organizing text chunks based on a policy that aligns with text similarity effectively reduces the risk of misleading context during pre-training by enhancing language models’ in-context learning and factual knowledge storage capabilities while maintaining efficiency.</abstract>
       <url hash="af086834">2024.clicit-1.86</url>
@@ -1011,7 +1011,7 @@
       <author><first>Davide</first><last>Venditti</last><affiliation>Università degli studi di Tor Vergata</affiliation></author>
       <author><first>Leonardo</first><last>Ranaldi</last><affiliation>UniTV</affiliation></author>
       <author><first>Tommaso</first><last>Caselli</last><affiliation>Rijksuniversiteit Groningen</affiliation></author>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last><affiliation>University of Rome Tor Vergata</affiliation></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last><affiliation>University of Rome Tor Vergata</affiliation></author>
       <pages>854-863</pages>
       <abstract>While LLMs get more proficient at solving tasks and generating sentences, we aim to investigate the role that differentsyntactic structures have on models’ performances on a battery of Natural Language Understanding tasks. We analyze theperformance of five LLMs on semantically equivalent sentences that are characterized by different syntactic structures. Tocorrectly solve the tasks, a model is implicitly required to correctly parse the sentence. We found out that LLMs strugglewhen there are more complex syntactic structures, with an average drop of 16.13(±11.14) points in accuracy on Q&amp;A task.Additionally, we propose a method based on token attribution to spot which area of the LLMs encode syntactic knowledge,by identifying model heads and layers responsible for the generation of a correct answer</abstract>
       <url hash="e14be92b">2024.clicit-1.92</url>
@@ -1112,7 +1112,7 @@
     </paper>
     <paper id="102">
       <title>Complexifying <fixed-case>BERT</fixed-case> Using <fixed-case>L</fixed-case>o<fixed-case>RA</fixed-case> Adapters</title>
-      <author><first>Fabio</first><last>Tamburini</last><affiliation>FICLIT - University of Bologna</affiliation></author>
+      <author id="fabio-tamburini"><first>Fabio</first><last>Tamburini</last><affiliation>FICLIT - University of Bologna</affiliation></author>
       <pages>948-954</pages>
       <abstract>This paper presents the first results of a pilot study for transforming a real-valued pre-trained transformer encoder into a complex-valued one. Following recent findings about pre-training using LoRA, the main idea is to employ complex-valued LoRA adapters to make the trick and continue the pre-training of a given Italian model for setting up the adapters. After pre-training, the proposed complex-valued model has been evaluated on a standardised benchmark for Italian natural-language understanding obtaining very encouraging results.</abstract>
       <url hash="7ea13668">2024.clicit-1.102</url>
@@ -1122,7 +1122,7 @@
       <title>How Do We Counter Hate Speech in <fixed-case>I</fixed-case>taly?</title>
       <author><first>Vittoria</first><last>Tonini</last><affiliation>University of Turin</affiliation></author>
       <author><first>Simona</first><last>Frenda</last><affiliation>Heriot-Watt University</affiliation></author>
-      <author><first>Marco Antonio</first><last>Stranisci</last><affiliation>University of Turin</affiliation></author>
+      <author id="marco-antonio-stranisci"><first>Marco Antonio</first><last>Stranisci</last><affiliation>University of Turin</affiliation></author>
       <author><first>Viviana</first><last>Patti</last><affiliation>University of Turin, Dipartimento di Informatica</affiliation></author>
       <pages>955-966</pages>
       <abstract>The phenomenon of online hate speech is a growing challenge and various organisations try to prevent its spread answering promptly to hateful messages online. In this context, we propose a new dataset of activists’ and users’ comments on Facebook reacting to specific news headlines: AmnestyCounterHS. Taking into account the literature on counterspeech, we defined a new schema of annotation and applied it to our dataset, in order to examine the most used counter-narrative strategies in Italy. This research aims to support the future development of automatic counterspeech generation. This paper presents also a comparative analysis of our dataset with other two datasets in Italian (Counter-TWIT and multilingual CONAN) containing hate speech and counter narratives. Through this analysis, we will understand how the environment (artificial vs. ecological) and the topics of discussions online influence the nature of counter narratives. Our findings highlight the predominance of negative sentiment and emotions, the varying presence of stereotypes, and the strategic differences in counter narratives across dataset.</abstract>
@@ -1175,7 +1175,7 @@
       <author><first>Wolfgang</first><last>Wolfgang Schmeisser-Nieto</last><affiliation>Universitat de Barcelona</affiliation></author>
       <author><first>Giacomo</first><last>Ricci</last><affiliation>Università degli Studi di Torino</affiliation></author>
       <author><first>Simona</first><last>Frenda</last><affiliation>Heriot-Watt University</affiliation></author>
-      <author><first>Mariona</first><last>Taule</last><affiliation>University of Barcelona</affiliation></author>
+      <author id="mariona-taule"><first>Mariona</first><last>Taule</last><affiliation>University of Barcelona</affiliation></author>
       <author><first>Cristina</first><last>Bosco</last><affiliation>Dipartimento di Informatica - UniversitÃ di Torino</affiliation></author>
       <pages>997-1004</pages>
       <abstract>Detecting stereotypes is a challenging task, particularly when they are not expressed explicitly. In this study, we applied an annotation schema from the literature designed to formalize implicit stereotypes. We analyzed implicit stereotypes towards immigrants in two datasets: StereoHoax-IT and SterheoSchool, which are created from different sources. StereoHoax-IT consists of reactions on Twitter to specific hoaxes aimed at discriminating against immigrants, while SterheoSchool includes comments from teenagers on fake news generated in psychological experiments. We describe the annotation process, annotator disagreements, and provide both quantitative and qualitative analyses to shed light on how implicitness characterizes stereotypes in different texts. Our findings suggest that implicit stereotypes are often conveyed through logical linguistic relations, such as entailment and behavioral evaluations of immigrants.</abstract>
@@ -1209,7 +1209,7 @@
       <title>Voice Activity Detection on <fixed-case>I</fixed-case>talian Language</title>
       <author><first>Shibingfeng</first><last>Zhang</last><affiliation>University of Bologna</affiliation></author>
       <author><first>Gloria</first><last>Gagliardi</last><affiliation>Alma Mater Studiorum - University of Bologna</affiliation></author>
-      <author><first>Fabio</first><last>Tamburini</last><affiliation>FICLIT - University of Bologna</affiliation></author>
+      <author id="fabio-tamburini"><first>Fabio</first><last>Tamburini</last><affiliation>FICLIT - University of Bologna</affiliation></author>
       <pages>1024-1029</pages>
       <abstract>Voice Activity Detection (VAD) refers to the task of identifying human voice activity in noisy settings, playing a crucial role in fields like speech recognition and audio surveillance. However, most VAD research focuses on English, leaving other languages, such as Italian, under-explored. This study aims to evaluate and enhance VAD systems for Italian speech, with the goal of finding a solution for the speech segmentation component of the Digital Linguistic Biomarkers (DLBs) extraction pipeline for early mental disorder diagnosis. We experimented with various VAD systems and propose an ensemble VAD system that integrates the best-performing models. Our ensemble system shows significant improvements in speech event detection. This advancement lays a robust foundation for more accurate early detection of mental health issues using DLBs in Italian.</abstract>
       <url hash="bb081f0d">2024.clicit-1.111</url>
@@ -1230,7 +1230,7 @@
       <author><first>Simone</first><last>Manai</last><affiliation>UNITN</affiliation></author>
       <author><first>Laura</first><last>Gemme</last><affiliation>Lutech-Softjam</affiliation></author>
       <author><first>Roberto</first><last>Zanoli</last><affiliation>Fondazione Bruno Kessler -FBK</affiliation></author>
-      <author><first>Alberto</first><last>Lavelli</last><affiliation>FBK</affiliation></author>
+      <author id="alberto-lavelli"><first>Alberto</first><last>Lavelli</last><affiliation>FBK</affiliation></author>
       <pages>1036-1042</pages>
       <abstract>This paper introduces IDRE (Italian Dataset for Rephrasing with Empathy), a novel automatically generated Italian linguistic dataset. IDRE comprises typical chatbot user utterances in the healthcare domain, corresponding chatbot responses, and empathetically enhanced chatbot responses. The dataset was generated using the Llama2 language model and evaluated by human raters based on predefined metrics. The IDRE dataset offers a comprehensive and realistic collection of Italian chatbot-user interactions suitable for training and refining chatbot models in the healthcare domain. This facilitates the development of chatbots capable of natural and productive conversations with healthcare users. Notably, the dataset incorporates empathetically enhanced chatbot responses, enabling researchers to investigate the effects of empathetic language on fostering more positive and engaging human-machine interactions within healthcare settings. The methodology employed for the construction of the IDRE dataset can be extended to generate phrases in additional languages and domains, thereby expanding its applicability and utility. The IDRE dataset is publicly available for research purposes.</abstract>
       <url hash="a33b917e">2024.clicit-1.113</url>
@@ -1306,7 +1306,7 @@
       <author><first>Andrea</first><last>Piergentili</last><affiliation>University of Trento</affiliation></author>
       <author><first>Sara</first><last>Papi</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
       <author><first>Marco</first><last>Gaido</last><affiliation>Fondazione Bruno Kessler, University of Trento</affiliation></author>
-      <author><first>Matteo</first><last>Negri</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
       <author><first>Luisa</first><last>Bentivogli</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
       <pages>1089-1093</pages>
       <abstract>We propose MAGNET - MAchines GeNErating Translations, a CALAMITA Challenge which aims at testing the ability of large language models (LLMs) in the hot topic of automatic translation, focusing on Italian and English (in both directions) to overcome the marginality with which Italian is considered by the machine translation community. We propose a benchmark composed of two portions with different distribution policies (one free to use, the other not discloseable), allowing to handle data contamination issues. The publicly available section of the benchmark is distributed on Hugging Face, whereas in this report we describe the details of our challenge, including the prompt formats to be used. Additionally, we report the performance of five models, including a LLM and different sized translation models, in terms of four evaluation metrics, whose scores allow an overall evaluation of the quality of the automatically generated translations.</abstract>
@@ -1337,7 +1337,7 @@
       <author><first>Silvia</first><last>Casola</last><affiliation>LMU Munich</affiliation></author>
       <author><first>Chiara</first><last>Ferrando</last><affiliation>Università di Torino</affiliation></author>
       <author><first>Viviana</first><last>Patti</last><affiliation>University of Turin, Dipartimento di Informatica</affiliation></author>
-      <author><first>Matteo</first><last>Negri</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
       <author><first>Luisa</first><last>Bentivogli</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
       <pages>1106-1115</pages>
       <abstract>Gender-fair language aims at promoting gender equality by using terms and expressions that include all identities and avoid reinforcing gender stereotypes. Implementing gender-fair strategies is particularly challenging in heavily gender-marked languages, such as Italian. To address this, the Gender-Fair Generation challenge intends to help shift toward gender-fair language in written communication. The challenge, designed to assess and monitor the recognition and generation of gender-fair language in both mono- and cross-lingual scenarios, includes three tasks: (1) the detection of gendered expressions in Italian sentences, (2) the reformulation of gendered expressions into gender-fair alternatives, and (3) the generation of gender-fair language in automatic translation from English to Italian. The challenge relies on three different annotated datasets: the GFL-it corpus, which contains Italian texts extracted from administrative documents provided by the University of Brescia; GeNTE, a bilingual test set for gender-neutral rewriting and translation built upon a subset of the Europarl dataset; and Neo-GATE, a bilingual test set designed to assess the use of non-binary neomorphemes in Italian for both fairformulation and translation tasks. Finally, each task is evaluated with specific metrics: average of F1-score obtained by means of BERTScore computed on each entry of the datasets for task 1, an accuracy measured with a gender-neutral classifier, and a coverage-weighted accuracy for tasks 2 and 3.</abstract>
@@ -1373,9 +1373,9 @@
     </paper>
     <paper id="125">
       <title><fixed-case>BLM</fixed-case>-It - Blackbird Language Matrices for <fixed-case>I</fixed-case>talian: A <fixed-case>CALAMITA</fixed-case> Challenge</title>
-      <author id="chunyang-jiang"><first>Chunyang</first><last>Jiang</last><affiliation>University of Geneva</affiliation></author>
+      <author><first>Chunyang</first><last>Jiang</last><affiliation>University of Geneva</affiliation></author>
       <author><first>Giuseppe</first><last>Samo</last><affiliation>University of Geneva</affiliation></author>
-      <author><first>Vivi</first><last>Nastase</last><affiliation>University of Geneva</affiliation></author>
+      <author id="vivi-nastase"><first>Vivi</first><last>Nastase</last><affiliation>University of Geneva</affiliation></author>
       <author><first>Paola</first><last>Merlo</last><affiliation>University of Geneva</affiliation></author>
       <pages>1135-1143</pages>
       <abstract>In this challenge, we propose Blackbird Language Matrices (BLMs), linguistic puzzles to learn language-related problems and delve into deeper formal and semantic properties of language, through a process of paradigm understanding. A BLM matrix consists of a context set and an answer set. The context is a sequence of sentences that encode implicitly an underlying generative linguistic rule. The contrastive multiple-choice answer set includes negative examples following corrupted generating rules. We propose three subtasks —agreement concord, causative and object-drop alternation detection— each in two variants of increasing lexical complexity.The datasets comprise a few prompts for few-shot learning and a large test set.</abstract>
@@ -1431,7 +1431,7 @@
       <author><first>Federico</first><last>Ranaldi</last><affiliation>University of Rome Tor Vergata</affiliation></author>
       <author><first>Elena Sofia</first><last>Ruzzetti</last><affiliation>University of Rome Tor Vergata</affiliation></author>
       <author><first>Dario</first><last>Onorati</last><affiliation>University of Rome Tor Vergata</affiliation></author>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last><affiliation>University of Rome Tor Vergata</affiliation></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last><affiliation>University of Rome Tor Vergata</affiliation></author>
       <author><first>Leonardo</first><last>Ranaldi</last><affiliation>UniTV</affiliation></author>
       <pages>1176-1183</pages>
       <abstract>We introduce Termite, which is a definitely unseen resource for evaluating Text-to-SQL in Italian. Specifically,we transfer evaluation pipelines beyond English, proposing novel, definitely unseen resources that avoid data-contamination phenomena while assessing the ability of models to perform Text-to-SQL tasks when natural language queries are written in Italian. We establish an evaluation grid based on execution accuracy.</abstract>
@@ -1465,7 +1465,7 @@
     <paper id="133">
       <title><fixed-case>GEESE</fixed-case> - Generating and Evaluating Explanations for Semantic Entailment: A <fixed-case>CALAMITA</fixed-case> Challenge</title>
       <author><first>Andrea</first><last>Zaninello</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
-      <author><first>Bernardo</first><last>Magnini</last><affiliation>FBK</affiliation></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last><affiliation>FBK</affiliation></author>
       <pages>1209-1216</pages>
       <abstract>In the GEESE challenge, we present a pipeline to evaluate generated explanations for the task of Recognizing Textual Entailment (RTE) in Italian. The challenge focuses on evaluating the impact of generated explanations on the predictive performance of language models. Using a dataset enriched with human-written explanations, we employ two large language models (LLMs) to generate and utilize explanations for semantic relationships between sentence pairs. Our methodology assesses the quality of generated explanations by measuring changes in prediction accuracy when explanations are provided. Through reproducible experimentation, we establish benchmarks against various baseline approaches, demonstrating the potential of explanation injection to enhance model interpretability and performance.</abstract>
       <url hash="e2fae932">2024.clicit-1.133</url>
diff --git a/data/xml/2024.climatenlp.xml b/data/xml/2024.climatenlp.xml
index 3325d6ef1f..8ca47cefa5 100644
--- a/data/xml/2024.climatenlp.xml
+++ b/data/xml/2024.climatenlp.xml
@@ -46,7 +46,7 @@
       <author><first>Maurice</first><last>Fehr</last><affiliation>Deutsche Bundesbank</affiliation></author>
       <author><first>Bolei</first><last>Ma</last><affiliation>Ludwig-Maximilians-Universitüt München</affiliation></author>
       <author><first>Jacob</first><last>Beck</last><affiliation>Ludwig-Maximilians-Universitüt München</affiliation></author>
-      <author><first>Alexander</first><last>Fraser</last><affiliation>Technical University of Munich</affiliation></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last><affiliation>Technical University of Munich</affiliation></author>
       <author><first>Frauke</first><last>Kreuter</last><affiliation>University of Maryland, College Park</affiliation></author>
       <pages>12-26</pages>
       <abstract>We present a research agenda focused on efficiently extracting, assuring quality, and consolidating textual company sustainability information to address urgent climate change decision-making needs. Starting from the goal to create integrated FAIR (Findable, Accessible, Interoperable, Reusable) climate-related data, we identify research needs pertaining to the technical aspects of information extraction as well as to the design of the integrated sustainability datasets that we seek to compile. Regarding extraction, we leverage technological advancements, particularly in large language models (LLMs) and Retrieval-Augmented Generation (RAG) pipelines, to unlock the underutilized potential of unstructured textual information contained in corporate sustainability reports. In applying these techniques, we review key challenges, which include the retrieval and extraction of CO<tex-math>_2</tex-math> emission values from PDF documents, especially from unstructured tables and graphs therein, and the validation of automatically extracted data through comparisons with human-annotated values. We also review how existing use cases and practices in climate risk analytics relate to choices of what textual information should be extracted and how it could be linked to existing structured data.</abstract>
@@ -70,7 +70,7 @@
     <paper id="4">
       <title>Generative Debunking of Climate Misinformation</title>
       <author><first>Francisco</first><last>Zanartu</last><affiliation>University of Melbourne</affiliation></author>
-      <author><first>Yulia</first><last>Otmakhova</last><affiliation>The University of Melbourne</affiliation></author>
+      <author id="julia-otmakhova"><first>Yulia</first><last>Otmakhova</last><affiliation>The University of Melbourne</affiliation></author>
       <author><first>John</first><last>Cook</last></author>
       <author><first>Lea</first><last>Frermann</last><affiliation>University of Melbourne</affiliation></author>
       <pages>46-62</pages>
@@ -82,7 +82,7 @@
     <paper id="5">
       <title>Decoding Climate Disagreement: A Graph Neural Network-Based Approach to Understanding Social Media Dynamics</title>
       <author><first>Ruiran</first><last>Su</last><affiliation>University of Oxford</affiliation></author>
-      <author><first>Janet</first><last>Pierrehumbert</last><affiliation>University of Oxford</affiliation></author>
+      <author id="janet-pierrehumbert"><first>Janet</first><last>Pierrehumbert</last><affiliation>University of Oxford</affiliation></author>
       <pages>63-81</pages>
       <abstract>This paper presents the ClimateSent-GAT Model, a novel approach that combines Graph Attention Networks (GATs) with natural language processing techniques to accurately identify and predict disagreements within Reddit comment-reply pairs. Our model classifies disagreements into three categories: agree, disagree, and neutral. Leveraging the inherent graph structure of Reddit comment-reply pairs, the model significantly outperforms existing benchmarks by capturing complex interaction patterns and sentiment dynamics. This research advances graph-based NLP methodologies and provides actionable insights for policymakers and educators in climate science communication.</abstract>
       <url hash="7fc38721">2024.climatenlp-1.5</url>
@@ -219,7 +219,7 @@
       <title><fixed-case>CLIMATELI</fixed-case>: Evaluating Entity Linking on Climate Change Data</title>
       <author><first>Shijia</first><last>Zhou</last><affiliation>Ludwig-Maximilians-Universitüt München</affiliation></author>
       <author><first>Siyao</first><last>Peng</last><affiliation>Ludwig-Maximilians-Universitüt München</affiliation></author>
-      <author><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universitüt München and IT University of Copenhagen</affiliation></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universitüt München and IT University of Copenhagen</affiliation></author>
       <pages>215-222</pages>
       <abstract>Climate Change (CC) is a pressing topic of global importance, attracting increasing attention across research fields, from social sciences to Natural Language Processing (NLP). CC is also discussed in various settings and communication platforms, from academic publications to social media forums. Understanding who and what is mentioned in such data is a first critical step to gaining new insights into CC. We present CLIMATELI (CLIMATe Entity LInking), the first manually annotated CC dataset that links 3,087 entity spans to Wikipedia. Using CLIMATELI (CLIMATe Entity LInking), we evaluate existing entity linking (EL) systems on the CC topic across various genres and propose automated filtering methods for CC entities. We find that the performance of EL models notably lags behind humans at both token and entity levels. Testing within the scope of retaining or excluding non-nominal and/or non-CC entities particularly impacts the models’ performances.</abstract>
       <url hash="f7fbd044">2024.climatenlp-1.16</url>
diff --git a/data/xml/2024.clinicalnlp.xml b/data/xml/2024.clinicalnlp.xml
index fc068bcce3..d3bbb0c9cb 100644
--- a/data/xml/2024.clinicalnlp.xml
+++ b/data/xml/2024.clinicalnlp.xml
@@ -23,7 +23,7 @@
     <paper id="1">
       <title>Exploring Robustness in Doctor-Patient Conversation Summarization: An Analysis of Out-of-Domain <fixed-case>SOAP</fixed-case> Notes</title>
       <author><first>Yu-Wen</first><last>Chen</last><affiliation>Columbia University</affiliation></author>
-      <author><first>Julia</first><last>Hirschberg</last><affiliation>Columbia University</affiliation></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last><affiliation>Columbia University</affiliation></author>
       <pages>1-9</pages>
       <abstract>Summarizing medical conversations poses unique challenges due to the specialized domain and the difficulty of collecting in-domain training data. In this study, we investigate the performance of state-of-the-art doctor-patient conversation generative summarization models on the out-of-domain data. We divide the summarization model of doctor-patient conversation into two configurations: (1) a general model, without specifying subjective (S), objective (O), and assessment (A) and plan (P) notes; (2) a SOAP-oriented model that generates a summary with SOAP sections. We analyzed the limitations and strengths of the fine-tuning language model-based methods and GPTs on both configurations. We also conducted a Linguistic Inquiry and Word Count analysis to compare the SOAP notes from different datasets. The results exhibit a strong correlation for reference notes across different datasets, indicating that format mismatch (i.e., discrepancies in word distribution) is not the main cause of performance decline on out-of-domain data. Lastly, a detailed analysis of SOAP notes is included to provide insights into missing information and hallucinations introduced by the models.</abstract>
       <url hash="185096e0">2024.clinicalnlp-1.1</url>
@@ -80,7 +80,7 @@
     <paper id="6">
       <title>Working Alliance Transformer for Psychotherapy Dialogue Classification</title>
       <author><first>Baihan</first><last>Lin</last><affiliation>Columbia University and IBM, International Business Machines</affiliation></author>
-      <author><first>Guillermo</first><last>Cecchi</last><affiliation>International Business Machines</affiliation></author>
+      <author id="guillermo-a-cecchi"><first>Guillermo</first><last>Cecchi</last><affiliation>International Business Machines</affiliation></author>
       <author><first>Djallel</first><last>Bouneffouf</last></author>
       <pages>64-69</pages>
       <abstract>As a predictive measure of the treatment outcome in psychotherapy, the working alliance measures the agreement of the patient and the therapist in terms of their bond, task and goal. Long been a clinical quantity estimated by the patients’ and therapists’ self-evaluative reports, we believe that the working alliance can be better characterized using natural language processing technique directly in the dialogue transcribed in each therapy session. In this work, we propose the Working Alliance Transformer (WAT), a Transformer-based classification model that has a psychological state encoder which infers the working alliance scores by projecting the embedding of the dialogues turns onto the embedding space of the clinical inventory for working alliance. We evaluate our method in a real-world dataset with over 950 therapy sessions with anxiety, depression, schizophrenia and suicidal patients and demonstrate an empirical advantage of using information about therapeutic states in the sequence classification task of psychotherapy dialogues.</abstract>
@@ -119,7 +119,7 @@
       <author><first>Pasquale</first><last>Minervini</last><affiliation>University of Edinburgh, University of Edinburgh</affiliation></author>
       <author><first>Luke</first><last>Daines</last><affiliation>University of Edinburgh, University of Edinburgh</affiliation></author>
       <author><first>Tom</first><last>Hope</last><affiliation>Allen Institute for Artificial Intelligence and Hebrew University, Hebrew University of Jerusalem</affiliation></author>
-      <author><first>Beatrice</first><last>Alex</last><affiliation>University of Edinburgh, University of Edinburgh</affiliation></author>
+      <author id="beatrice-alex"><first>Beatrice</first><last>Alex</last><affiliation>University of Edinburgh, University of Edinburgh</affiliation></author>
       <pages>91-104</pages>
       <abstract>Adapting pretrained language models to novel domains, such as clinical applications, traditionally involves retraining their entire set of parameters. Parameter-Efficient Fine-Tuning (PEFT) techniques for fine-tuning language models significantly reduce computational requirements by selectively fine-tuning small subsets of parameters. In this study, we propose a two-step PEFT framework and evaluate it in the clinical domain. Our approach combines a specialised PEFT adapter layer designed for clinical domain adaptation with another adapter specialised for downstream tasks. We evaluate the framework on multiple clinical outcome prediction datasets, comparing it to clinically trained language models. Our framework achieves a better AUROC score averaged across all clinical downstream tasks compared to clinical language models. In particular, we observe large improvements of 4-5% AUROC in large-scale multilabel classification tasks, such as diagnoses and procedures classification. To our knowledge, this study is the first to provide an extensive empirical analysis of the interplay between PEFT techniques and domain adaptation in an important real-world domain of clinical applications.</abstract>
       <url hash="f81852bc">2024.clinicalnlp-1.9</url>
@@ -189,7 +189,7 @@
       <author><first>Nicolas</first><last>Hiebel</last><affiliation>Université Paris-Saclay</affiliation></author>
       <author><first>Olivier</first><last>Ferret</last><affiliation>CEA</affiliation></author>
       <author><first>Karën</first><last>Fort</last><affiliation>Sorbonne Université / LORIA</affiliation></author>
-      <author><first>Aurélie</first><last>Névéol</last><affiliation>LISN-CNRS / Université Paris Saclay</affiliation></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last><affiliation>LISN-CNRS / Université Paris Saclay</affiliation></author>
       <pages>172-184</pages>
       <abstract>Text generation opens up new prospects for overcoming the lack of open corpora in fields such as healthcare, where data sharing is bound by confidentiality. In this study, we compare the performance of encoder-decoder and decoder-only language models for the controlled generation of clinical cases in French. To do so, we fine-tuned several pre-trained models on French clinical cases for each architecture and generate clinical cases conditioned by patient demographic information (gender and age) and clinical features.Our results suggest that encoder-decoder models are easier to control than decoder-only models, but more costly to train.</abstract>
       <url hash="4147de44">2024.clinicalnlp-1.14</url>
@@ -308,9 +308,9 @@
       <title>Adapting <fixed-case>A</fixed-case>bstract <fixed-case>M</fixed-case>eaning <fixed-case>R</fixed-case>epresentation Parsing to the Clinical Narrative – the <fixed-case>SPRING</fixed-case> <fixed-case>THYME</fixed-case> parser</title>
       <author><first>Jon</first><last>Cai</last><affiliation>University of Colorado at Boulder</affiliation></author>
       <author><first>Kristin</first><last>Wright-Bettner</last><affiliation>University of Colorado at Boulder</affiliation></author>
-      <author><first>Martha</first><last>Palmer</last><affiliation>University of Colorado at Boulder</affiliation></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last><affiliation>University of Colorado at Boulder</affiliation></author>
       <author><first>Guergana</first><last>Savova</last><affiliation>Harvard University</affiliation></author>
-      <author><first>James</first><last>Martin</last></author>
+      <author id="james-h-martin"><first>James</first><last>Martin</last></author>
       <pages>271-282</pages>
       <abstract>This paper is dedicated to the design and evaluation of the first AMR parser tailored for clinical notes. Our objective was to facilitate the precise transformation of the clinical notes into structured AMR expressions, thereby enhancing the interpretability and usability of clinical text data at scale. Leveraging the colon cancer dataset from the Temporal Histories of Your Medical Events (THYME) corpus, we adapted a state-of-the-art AMR parser utilizing continuous training. Our approach incorporates data augmentation techniques to enhance the accuracy of AMR structure predictions. Notably, through this learning strategy, our parser achieved an impressive F1 score of 88% on the THYME corpus’s colon cancer dataset. Moreover, our research delved into the efficacy of data required for domain adaptation within the realm of clinical notes, presenting domain adaptation data requirements for AMR parsing. This exploration not only underscores the parser’s robust performance but also highlights its potential in facilitating a deeper understanding of clinical narratives through structured semantic representations.</abstract>
       <url hash="8fc233cc">2024.clinicalnlp-1.23</url>
@@ -371,7 +371,7 @@
       <author><first>Zhengyuan</first><last>Liu</last><affiliation>I2R</affiliation></author>
       <author><first>Siti</first><last>Salleh</last><affiliation>, A*STAR</affiliation></author>
       <author><first>Pavitra</first><last>Krishnaswamy</last><affiliation>Institute for Infocomm Research</affiliation></author>
-      <author><first>Nancy</first><last>Chen</last></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last></author>
       <pages>310-321</pages>
       <abstract>In the realm of dialogue systems, generated responses often lack personalization. This is particularly true in the medical domain, where research is limited by scarce available domain-specific data and the complexities of modeling medical context and persona information. In this work, we investigate the potential of harnessing large language models for personalized medical dialogue generation. In particular, to better aggregate the long conversational context, we adopt topic-focused summarization to distill core information from the dialogue history, and use such information to guide the conversation flow and generated content. Drawing inspiration from real-world telehealth conversations, we outline a comprehensive pipeline encompassing data processing, profile construction, and domain adaptation. This work not only highlights our technical approach but also shares distilled insights from the data preparation and model construction phases.</abstract>
       <url hash="ba2381d7">2024.clinicalnlp-1.27</url>
@@ -381,7 +381,7 @@
     <paper id="28">
       <title>Evaluating Lexicon Incorporation for Depression Symptom Estimation</title>
       <author><first>Kirill</first><last>Milintsevich</last><affiliation>Université de Caen Basse Normandie and University of Tartu</affiliation></author>
-      <author><first>Gaël</first><last>Dias</last><affiliation>University of Caen Normandy</affiliation></author>
+      <author id="gael-dias"><first>Gaël</first><last>Dias</last><affiliation>University of Caen Normandy</affiliation></author>
       <author><first>Kairit</first><last>Sirts</last><affiliation>institute of computer science, University of Tartu</affiliation></author>
       <pages>322-328</pages>
       <abstract>This paper explores the impact of incorporating sentiment, emotion, and domain-specific lexicons into a transformer-based model for depression symptom estimation. Lexicon information is added by marking the words in the input transcripts of patient-therapist conversations as well as in social media posts. Overall results show that the introduction of external knowledge within pre-trained language models can be beneficial for prediction performance, while different lexicons show distinct behaviours depending on the targeted task. Additionally, new state-of-the-art results are obtained for the estimation of depression level over patient-therapist interviews.</abstract>
@@ -633,7 +633,7 @@
       <author><first>Pasquale</first><last>Minervini</last><affiliation>University of Edinburgh, University of Edinburgh</affiliation></author>
       <author><first>Luke</first><last>Daines</last><affiliation>University of Edinburgh, University of Edinburgh</affiliation></author>
       <author><first>T.</first><last>Simpson</last><affiliation>University of Edinburgh, University of Edinburgh</affiliation></author>
-      <author><first>Beatrice</first><last>Alex</last><affiliation>University of Edinburgh, University of Edinburgh</affiliation></author>
+      <author id="beatrice-alex"><first>Beatrice</first><last>Alex</last><affiliation>University of Edinburgh, University of Edinburgh</affiliation></author>
       <pages>488-501</pages>
       <abstract>The MEDIQA-CORR 2024 shared task aims to assess the ability of Large Language Models (LLMs) to identify and correct medical errors in clinical notes. In this study, we evaluate the capability of general LLMs, specifically GPT-3.5 and GPT-4, to identify and correct medical errors with multiple prompting strategies. Recognising the limitation of LLMs in generating accurate corrections only via prompting strategies, we propose incorporating error-span predictions from a smaller, fine-tuned model in two ways: 1) by presenting it as a hint in the prompt and 2) by framing it as multiple-choice questions from which the LLM can choose the best correction. We found that our proposed prompting strategies significantly improve the LLM’s ability to generate corrections. Our best-performing solution with 8-shot + CoT + hints ranked sixth in the shared task leaderboard. Additionally, our comprehensive analyses show the impact of the location of the error sentence, the prompted role, and the position of the multiple-choice option on the accuracy of the LLM. This prompts further questions about the readiness of LLM to be implemented in real-world clinical settings.</abstract>
       <url hash="eaad176b">2024.clinicalnlp-1.49</url>
@@ -711,8 +711,8 @@
       <author><first>Yujuan</first><last>Fu</last><affiliation>University of Washington</affiliation></author>
       <author><first>Zhaoyi</first><last>Sun</last></author>
       <author><first>Fei</first><last>Xia</last><affiliation>University of Washington, Seattle</affiliation></author>
-      <author><first>Meliha</first><last>Yetisgen</last><affiliation>University of Washington</affiliation></author>
-      <author><first>Martin</first><last>Krallinger</last><affiliation>Barcelona Supercomputing Center</affiliation></author>
+      <author id="meliha-yetisgen-yildiz"><first>Meliha</first><last>Yetisgen</last><affiliation>University of Washington</affiliation></author>
+      <author id="martin-krallinger"><first>Martin</first><last>Krallinger</last><affiliation>Barcelona Supercomputing Center</affiliation></author>
       <pages>581-589</pages>
       <abstract>Remote patient care provides opportunities for expanding medical access, saving healthcare costs, and offering on-demand convenient services. In the MEDIQA-M3G 2024 Shared Task, researchers explored solutions for the specific task of dermatological consumer health visual question answering, where user generated queries and images are used as input and a free-text answer response is generated as output. In this novel challenge, eight teams with a total of 48 submissions were evaluated across three language test sets. In this work, we provide a summary of the dataset, as well as results and approaches. We hope that the insights learned here will inspire future research directions that can lead to technology that deburdens clinical workload and improves care.</abstract>
       <url hash="14975668">2024.clinicalnlp-1.55</url>
@@ -737,7 +737,7 @@
       <author><first>Yujuan</first><last>Fu</last><affiliation>University of Washington</affiliation></author>
       <author><first>Zhaoyi</first><last>Sun</last></author>
       <author><first>Fei</first><last>Xia</last><affiliation>University of Washington, Seattle</affiliation></author>
-      <author><first>Meliha</first><last>Yetisgen</last><affiliation>University of Washington</affiliation></author>
+      <author id="meliha-yetisgen-yildiz"><first>Meliha</first><last>Yetisgen</last><affiliation>University of Washington</affiliation></author>
       <pages>596-603</pages>
       <abstract>Automatic detection and correction of medical errors enables a more rigorous validation of medical documentation as well as clinical notes generated by large language models. Such solutions can ensure the accuracy and medical coherence of clinical texts and enhance patient care and health outcomes. The MEDIQA-CORR 2024 shared task focused on detecting and correcting different types of medical errors in clinical texts. Seventeen teams participated in the shared task and experimented with a broad range of approaches and models. In this paper, we describe the MEDIQA-CORR task, datasets, and the participants’ results and methods.</abstract>
       <url hash="29db28ef">2024.clinicalnlp-1.57</url>
@@ -854,7 +854,7 @@
       <title><fixed-case>LTRC</fixed-case>-<fixed-case>IIITH</fixed-case> at <fixed-case>EHRSQL</fixed-case> 2024: Enhancing Reliability of Text-to-<fixed-case>SQL</fixed-case> Systems through Abstention and Confidence Thresholding</title>
       <author><first>Jerrin</first><last>Thomas</last></author>
       <author><first>Pruthwik</first><last>Mishra</last><affiliation>IIIT-Hyderabad</affiliation></author>
-      <author><first>Dipti</first><last>Sharma</last><affiliation>IIIT Hyderabad</affiliation></author>
+      <author id="dipti-misra-sharma"><first>Dipti</first><last>Sharma</last><affiliation>IIIT Hyderabad</affiliation></author>
       <author><first>Parameswari</first><last>Krishnamurthy</last></author>
       <pages>697-702</pages>
       <abstract>In this paper, we present our work in the EHRSQL 2024 shared task which tackles reliable text-to-SQL modeling on Electronic Health Records. Our proposed system tackles the task with three modules - abstention module, text-to-SQL generation module, and reliability module. The abstention module identifies whether the question is answerable given the database schema. If the question is answerable, the text-to-SQL generation module generates the SQL query and associated confidence score. The reliability module has two key components - confidence score thresholding, which rejects generations with confidence below a pre-defined level, and error filtering, which identifies and excludes SQL queries that result in execution errors. In the official leaderboard for the task, our system ranks 6th. We have also made the source code public.</abstract>
diff --git a/data/xml/2024.clpsych.xml b/data/xml/2024.clpsych.xml
index fd97442b12..739eaca3b8 100644
--- a/data/xml/2024.clpsych.xml
+++ b/data/xml/2024.clpsych.xml
@@ -5,12 +5,12 @@
       <booktitle>Proceedings of the 9th Workshop on Computational Linguistics and Clinical Psychology (CLPsych 2024)</booktitle>
       <editor><first>Andrew</first><last>Yates</last></editor>
       <editor><first>Bart</first><last>Desmet</last></editor>
-      <editor><first>Emily</first><last>Prud’hommeaux</last></editor>
+      <editor id="emily-prudhommeaux"><first>Emily</first><last>Prud’hommeaux</last></editor>
       <editor><first>Ayah</first><last>Zirikly</last></editor>
       <editor><first>Steven</first><last>Bedrick</last></editor>
       <editor><first>Sean</first><last>MacAvaney</last></editor>
       <editor><first>Kfir</first><last>Bar</last></editor>
-      <editor><first>Molly</first><last>Ireland</last></editor>
+      <editor id="molly-ireland"><first>Molly</first><last>Ireland</last></editor>
       <editor><first>Yaakov</first><last>Ophir</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>St. Julians, Malta</address>
@@ -111,7 +111,7 @@
     <paper id="8">
       <title>Explainable Depression Detection Using Large Language Models on Social Media Data</title>
       <author><first>Yuxi</first><last>Wang</last><affiliation>University of Ottawa</affiliation></author>
-      <author><first>Diana</first><last>Inkpen</last><affiliation>University of Ottawa</affiliation></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last><affiliation>University of Ottawa</affiliation></author>
       <author><first>Prasadith</first><last>Kirinde Gamaarachchige</last><affiliation>University of Ottawa</affiliation></author>
       <pages>108-126</pages>
       <abstract>Due to the rapid growth of user interaction on different social media platforms, publicly available social media data has increased substantially. The sheer amount of data and level of personal information being shared on such platforms has made analyzing textual information to predict mental disorders such as depression a reliable preliminary step when it comes to psychometrics. In this study, we first proposed a system to search for texts that are related to depression symptoms from the Beck’s Depression Inventory (BDI) questionnaire, and providing a ranking for further investigation in a second step. Then, in this second step, we address the even more challenging task of automatic depression level detection, using writings and voluntary answers provided by users on Reddit. Several Large Language Models (LLMs) were applied in experiments. Our proposed system based on LLMs can generate both predictions and explanations for each question. By combining two LLMs for different questions, we achieved better performance on three of four metrics compared to the state-of-the-art and remained competitive on the one remaining metric. In addition, our system is explainable on two levels: first, knowing the answers to the BDI questions provides clues about the possible symptoms that could lead to a clinical diagnosis of depression; second, our system can explain the predicted answer for each question.</abstract>
@@ -122,7 +122,7 @@
     <paper id="9">
       <title>Analysing relevance of Discourse Structure for Improved Mental Health Estimation</title>
       <author><first>Navneet</first><last>Agarwal</last><affiliation>Normandie University, UNICAEN, ENSICAEN, CNRS, GREYC</affiliation></author>
-      <author><first>Gaël</first><last>Dias</last><affiliation>Normandie Univ, UNICAEN, ENSICAEN, CNRS, GREYC</affiliation></author>
+      <author id="gael-dias"><first>Gaël</first><last>Dias</last><affiliation>Normandie Univ, UNICAEN, ENSICAEN, CNRS, GREYC</affiliation></author>
       <author><first>Sonia</first><last>Dollfus</last><affiliation>CHU de Caen, Service de Psychiatrie; Normandie Univ, UNICAEN, ISTS, GIP Cyceron; Normandie Univ, UNICAEN, UFR de Médecine</affiliation></author>
       <pages>127-132</pages>
       <abstract>Automated depression estimation has received significant research attention in recent years as a result of its growing impact on the global community. Within the context of studies based on patient-therapist interview transcripts, most researchers treat the dyadic discourse as a sequence of unstructured sentences, thus ignoring the discourse structure within the learning process. In this paper we propose Multi-view architectures that divide the input transcript into patient and therapist views based on sentence type in an attempt to utilize symmetric discourse structure for improved model performance. Experiments on DAIC-WOZ dataset for binary classification task within depression estimation show advantages of Multi-view architecture over sequential input representations. Our model also outperforms the current state-of-the-art results and provide new SOTA performance on test set of DAIC-WOZ dataset.</abstract>
@@ -140,8 +140,8 @@
       <author><first>James</first><last>McKay</last><affiliation>University of Pennsylvania</affiliation></author>
       <author><first>Johannes</first><last>Eichstaedt</last><affiliation>Stanford University</affiliation></author>
       <author><first>Richard</first><last>Rosenthal</last><affiliation>Stony Brook University</affiliation></author>
-      <author><first>Lyle</first><last>Ungar</last><affiliation>University of Pennsylvania</affiliation></author>
-      <author><first>H. Andrew</first><last>Schwartz</last><affiliation>Stony Brook University</affiliation></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last><affiliation>University of Pennsylvania</affiliation></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last><affiliation>Stony Brook University</affiliation></author>
       <pages>133-144</pages>
       <abstract>Analyses for linking language with psychological factors or behaviors predominately treat linguistic features as a static set, working with a single document per person or aggregating across multiple posts (e.g. on social media) into a single set of features. This limits language to mostly shed light on between-person differences rather than changes in behavior within-person. Here, we collected a novel dataset of daily surveys where participants were asked to describe their experienced well-being and report the number of alcoholic beverages they had within the past 24 hours. Through this data, we first build a multi-level forecasting model that is able to capture within-person change and leverage both the psychological features of the person and daily well-being responses. Then, we propose a longitudinal version of differential language analysis that finds patterns associated with drinking more (e.g. social events) and less (e.g. task-oriented), as well as distinguishing patterns of heavy drinks versus light drinkers.</abstract>
       <url hash="2579df42">2024.clpsych-1.10</url>
@@ -173,7 +173,7 @@
       <title>Your Model Is Not Predicting Depression Well And That Is Why: A Case Study of <fixed-case>PRIMATE</fixed-case> Dataset</title>
       <author><first>Kirill</first><last>Milintsevich</last><affiliation>University of Caen Normandy</affiliation></author>
       <author><first>Kairit</first><last>Sirts</last><affiliation>University of Tartu</affiliation></author>
-      <author><first>Gaël</first><last>Dias</last><affiliation>Normandie Univ, UNICAEN, ENSICAEN, CNRS, GREYC</affiliation></author>
+      <author id="gael-dias"><first>Gaël</first><last>Dias</last><affiliation>Normandie Univ, UNICAEN, ENSICAEN, CNRS, GREYC</affiliation></author>
       <pages>166-171</pages>
       <abstract>This paper addresses the quality of annotations in mental health datasets used for NLP-based depression level estimation from social media texts. While previous research relies on social media-based datasets annotated with binary categories, i.e. depressed or non-depressed, recent datasets such as D2S and PRIMATE aim for nuanced annotations using PHQ-9 symptoms. However, most of these datasets rely on crowd workers without the domain knowledge for annotation. Focusing on the PRIMATE dataset, our study reveals concerns regarding annotation validity, particularly for the lack of interest or pleasure symptom. Through reannotation by a mental health professional, we introduce finer labels and textual spans as evidence, identifying a notable number of false positives. Our refined annotations, to be released under a Data Use Agreement, offer a higher-quality test set for anhedonia detection. This study underscores the necessity of addressing annotation quality issues in mental health datasets, advocating for improved methodologies to enhance NLP model reliability in mental health assessments.</abstract>
       <url hash="e743e05f">2024.clpsych-1.13</url>
@@ -222,9 +222,9 @@
       <title>Exploring Instructive Prompts for Large Language Models in the Extraction of Evidence for Supporting Assigned Suicidal Risk Levels</title>
       <author><first>Jiyu</first><last>Chen</last><affiliation>CSIRO</affiliation></author>
       <author><first>Vincent</first><last>Nguyen</last><affiliation>CSIRO</affiliation></author>
-      <author><first>Xiang</first><last>Dai</last><affiliation>CSIRO</affiliation></author>
-      <author><first>Diego</first><last>Molla-Aliod</last><affiliation>Macquarie University</affiliation></author>
-      <author><first>Cecile</first><last>Paris</last><affiliation>CSIRO</affiliation></author>
+      <author id="xiang-dai"><first>Xiang</first><last>Dai</last><affiliation>CSIRO</affiliation></author>
+      <author id="diego-molla"><first>Diego</first><last>Molla-Aliod</last><affiliation>Macquarie University</affiliation></author>
+      <author id="cecile-paris"><first>Cecile</first><last>Paris</last><affiliation>CSIRO</affiliation></author>
       <author><first>Sarvnaz</first><last>Karimi</last><affiliation>CSIRO</affiliation></author>
       <pages>197-202</pages>
       <abstract>Monitoring and predicting the expression of suicidal risk in individuals’ social media posts is a central focus in clinical NLP. Yet, existing approaches frequently lack a crucial explainability component necessary for extracting evidence related to an individual’s mental health state. We describe the CSIRO Data61 team’s evidence extraction system submitted to the CLPsych 2024 shared task. The task aims to investigate the zero-shot capabilities of open-source LLM in extracting evidence regarding an individual’s assigned suicide risk level from social media discourse. The results are assessed against ground truth evidence annotated by psychological experts, with an achieved recall-oriented BERTScore of 0.919. Our findings suggest that LLMs showcase strong feasibility in the extraction of information supporting the evaluation of suicidal risk in social media discourse. Opportunities for refinement exist, notably in crafting concise and effective instructions to guide the extraction process.</abstract>
@@ -267,7 +267,7 @@
       <title>Detecting Suicide Risk Patterns using Hierarchical Attention Networks with Large Language Models</title>
       <author><first>Koushik</first><last>L</last><affiliation>Student, NITK</affiliation></author>
       <author><first>Vishruth</first><last>M</last><affiliation>National Institute of Technology Karnataka</affiliation></author>
-      <author><first>Anand Kumar</first><last>M</last><affiliation>National Institute of Technology Karnataka</affiliation></author>
+      <author id="anand-kumar-m"><first>Anand Kumar</first><last>M</last><affiliation>National Institute of Technology Karnataka</affiliation></author>
       <pages>227-231</pages>
       <abstract>Suicide has become a major public health and social concern in the world . This Paper looks into a method through use of LLMs (Large Lan- guage Model) to extract the likely reason for a person to attempt suicide , through analysis of their social media text posts detailing about the event , using this data we can extract the rea- son for the cause such mental state which can provide support for suicide prevention. This submission presents our approach for CLPsych Shared Task 2024. Our model uses Hierarchi- cal Attention Networks (HAN) and Llama2 for finding supporting evidence about an individ- ual’s suicide risk level.</abstract>
       <url hash="326e0638">2024.clpsych-1.21</url>
@@ -356,9 +356,9 @@
       <author><first>Kevin</first><last>Lanning</last><affiliation>Wilkes Honors College, Florida Atlantic University</affiliation></author>
       <author><first>Isabella</first><last>Vallejo</last><affiliation>Wilkes Honors College, Florida Atlantic University</affiliation></author>
       <author><first>Lucie</first><last>Flek</last><affiliation>CAISA Lab, University of Bonn</affiliation></author>
-      <author><first>H. Andrew</first><last>Schwartz</last><affiliation>Stony Brook University</affiliation></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last><affiliation>Stony Brook University</affiliation></author>
       <author><first>Charles</first><last>Welch</last><affiliation>University of Bonn</affiliation></author>
-      <author><first>Ryan</first><last>Boyd</last><affiliation>Stony Brook University</affiliation></author>
+      <author id="ryan-boyd"><first>Ryan</first><last>Boyd</last><affiliation>Stony Brook University</affiliation></author>
       <pages>278-291</pages>
       <abstract>Research on psychological risk factors for suicide has developed for decades. However, combining explainable theory with modern data-driven language model approaches is non-trivial. In this study, we propose and evaluate methods for identifying language patterns aligned with theories of suicide risk by combining theory-driven suicidal archetypes with language model-based and relative entropy-based approaches. Archetypes are based on prototypical statements that evince risk of suicidality while relative entropy considers the ratio of how unusual both a risk-familiar and unfamiliar model find the statements. While both approaches independently performed similarly, we find that combining the two significantly improved the performance in the shared task evaluations, yielding our combined system submission with a BERTScore Recall of 0.906. Consistent with the literature, we find that titles are highly informative as suicide risk evidence, despite the brevity. We conclude that a combination of theory- and data-driven methods are needed in the mental health space and can outperform more modern prompt-based methods.</abstract>
       <url hash="d1630b11">2024.clpsych-1.28</url>
diff --git a/data/xml/2024.cmcl.xml b/data/xml/2024.cmcl.xml
index 116d505180..d3b1decf1a 100644
--- a/data/xml/2024.cmcl.xml
+++ b/data/xml/2024.cmcl.xml
@@ -82,7 +82,7 @@
       <author><first>Michael</first><last>Wolfman</last></author>
       <author><first>Donald</first><last>Dunagan</last></author>
       <author><first>Jonathan</first><last>Brennan</last><affiliation>University of Michigan - Ann Arbor</affiliation></author>
-      <author><first>John</first><last>Hale</last><affiliation>Johns Hopkins University, University of Georgia and DeepMind</affiliation></author>
+      <author id="john-hale"><first>John</first><last>Hale</last><affiliation>Johns Hopkins University, University of Georgia and DeepMind</affiliation></author>
       <pages>72-80</pages>
       <abstract>Language models (LMs) are a meeting point for cognitive modeling and computational linguistics. How should they be designed to serve as adequate cognitive models? To address this question, this study contrasts two Transformer-based LMs that share the same architecture. Only one of them analyzes sentences in terms of explicit hierarchical structure. Evaluating the two LMs against fMRI time series via the surprisal complexity metric, the results implicate the superior temporal gyrus. These findings underline the need for hierarchical sentence structures in word-by-word models of human language comprehension.</abstract>
       <url hash="767d0a16">2024.cmcl-1.6</url>
@@ -151,7 +151,7 @@
       <title>Daily auditory environments in <fixed-case>F</fixed-case>rench-speaking infants: A longitudinal dataset</title>
       <author><first>Estelle</first><last>Hervé</last></author>
       <author><first>Clément</first><last>François</last><affiliation>CNRS</affiliation></author>
-      <author><first>Laurent</first><last>Prevot</last><affiliation>Université d’Aix-Marseille</affiliation></author>
+      <author id="laurent-prevot"><first>Laurent</first><last>Prevot</last><affiliation>Université d’Aix-Marseille</affiliation></author>
       <pages>132-151</pages>
       <abstract>Babies’ daily auditory environment plays a crucial role in language development. Most previous research estimating the quantitative and qualitative aspects of early speech inputs has predominantly focused on English- and Spanish-speaking families. In addition, validation studies for daylong recordings’ analysis tools are scarce on French data sets.In this paper, we present a French corpus of daylong audio recordings longitudinally collected with the LENA (Language ENvironment Analysis) system from infants aged 3 to 24 months. We conduct a thorough exploration of this data set, which serves as a quality check for both the data and the analysis tools.We evaluate the reliability of LENA metrics by systematically comparing them with those obtained from the ChildProject set of tools and by checking the known dynamics of the metrics with age. These metrics are also used to replicate, on our data set, findings from (Warlaumont et al, 2014) about the increase of infants’ speech vocalizations and temporal contingencies between infants and caregivers with age.</abstract>
       <url hash="a073ec9f">2024.cmcl-1.12</url>
@@ -173,7 +173,7 @@
     <paper id="14">
       <title>How can large language models become more human?</title>
       <author><first>Daphne</first><last>Wang</last><affiliation>Quandela</affiliation></author>
-      <author><first>Mehrnoosh</first><last>Sadrzadeh</last><affiliation>University College London</affiliation></author>
+      <author id="mehrnoosh-sadrzadeh"><first>Mehrnoosh</first><last>Sadrzadeh</last><affiliation>University College London</affiliation></author>
       <author><first>Miloš</first><last>Stanojević</last><affiliation>University College London, University of London and Google DeepMind</affiliation></author>
       <author><first>Wing-Yee</first><last>Chow</last><affiliation>University College London, University of London</affiliation></author>
       <author><first>Richard</first><last>Breheny</last><affiliation>University College London, University of London</affiliation></author>
@@ -219,7 +219,7 @@
     <paper id="18">
       <title>Evaluating Semantic Relations in Predicting Textual Labels for Images of Abstract and Concrete Concepts</title>
       <author><first>Tarun</first><last>Tater</last><affiliation>Universität Stuttgart</affiliation></author>
-      <author><first>Sabine</first><last>Schulte Im Walde</last><affiliation>University of Stuttgart</affiliation></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte Im Walde</last><affiliation>University of Stuttgart</affiliation></author>
       <author><first>Diego</first><last>Frassinelli</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <pages>214-220</pages>
       <abstract>This study investigates the performance of SigLIP, a state-of-the-art Vision-Language Model (VLM), in predicting labels for images depicting 1,278 concepts. Our analysis across 300 images per concept shows that the model frequently predicts the exact user-tagged labels, but similarly, it often predicts labels that are semantically related to the exact labels in various ways: synonyms, hypernyms, co-hyponyms, and associated words, particularly for abstract concepts. We then zoom into the diversity of the user tags of images and word associations for abstract versus concrete concepts. Surprisingly, not only abstract but also concrete concepts exhibit significant variability, thus challenging the traditional view that representations of concrete concepts are less diverse.</abstract>
@@ -241,7 +241,7 @@
       <title>How Useful is Context, Actually? Comparing <fixed-case>LLM</fixed-case>s and Humans on Discourse Marker Prediction</title>
       <author><first>Emily</first><last>Sadlier-Brown</last></author>
       <author><first>Millie</first><last>Lou</last></author>
-      <author><first>Miikka</first><last>Silfverberg</last><affiliation>University of British Columbia</affiliation></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last><affiliation>University of British Columbia</affiliation></author>
       <author><first>Carla</first><last>Kam</last><affiliation>University of British Columbia</affiliation></author>
       <pages>231-241</pages>
       <abstract>This paper investigates the adverbial discourse particle actually. We compare LLM and human performance on cloze tests involving actually on examples sourced from the Providence Corpus of speech around children. We explore the impact of utterance context on cloze test performance. We find that context is always helpful, though the extent to which additional context is helpful, and what relative placement of context (i.e. before or after the masked word) is most helpful differs for individual models and humans. The best-performing LLM, GPT-4, narrowly outperforms humans. In an additional experiment, we explore cloze performance on synthetic LLM-generated examples, and find that several models vastly outperform humans.</abstract>
diff --git a/data/xml/2024.codi.xml b/data/xml/2024.codi.xml
index bafe63fbb4..c268ad9d6c 100644
--- a/data/xml/2024.codi.xml
+++ b/data/xml/2024.codi.xml
@@ -70,7 +70,7 @@
     </paper>
     <paper id="5">
       <title>Experimenting with Discourse Segmentation of <fixed-case>T</fixed-case>aiwan <fixed-case>S</fixed-case>outhern <fixed-case>M</fixed-case>in Spontaneous Speech</title>
-      <author><first>Laurent</first><last>Prévot</last><affiliation>Aix Marseille Université &amp; CNRS</affiliation></author>
+      <author id="laurent-prevot"><first>Laurent</first><last>Prévot</last><affiliation>Aix Marseille Université &amp; CNRS</affiliation></author>
       <author><first>Sheng-Fu</first><last>Wang</last><affiliation>Academia Sinica</affiliation></author>
       <pages>50-63</pages>
       <abstract>Discourse segmentation received increased attention in the past years, however the majority of studies have focused on written genres and with high-resource languages. This paper investigates discourse segmentation of a Taiwan Southern Min spontaneous speech corpus. We compare the fine-tuning a Language Model (LLM using two approaches: supervised, thanks to a high-quality annotated dataset, and weakly-supervised, requiring only a small amount of manual labeling. The corpus used here is transcribed with both Chinese characters and romanized transcription. This allows us to compare the impact of the written form on the discourse segmentation task. Additionally, the dataset includes manual prosodic breaks labeling, allowing an exploration of the role prosody can play in contemporary discourse segmentation systems grounded in LLMs. In our study, the supervised approach outperforms weak-supervision ; character-based version demonstrated better scores compared to the romanized version; and prosodic information proved to be an interesting source to increase discourse segmentation performance.</abstract>
@@ -82,7 +82,7 @@
     <paper id="6">
       <title>Actor Identification in Discourse: A Challenge for <fixed-case>LLM</fixed-case>s?</title>
       <author><first>Ana</first><last>Barić</last><affiliation>University of Zagreb, Faculty of Electrical Engineering and Computing</affiliation></author>
-      <author><first>Sebastian</first><last>Padó</last><affiliation>Stuttgart University</affiliation></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last><affiliation>Stuttgart University</affiliation></author>
       <author><first>Sean</first><last>Papay</last><affiliation>University of Stuttgart</affiliation></author>
       <pages>64-70</pages>
       <abstract>The identification of political actors who put forward claims in public debate is a crucial step in the construction of discourse networks, which are helpful to analyze societal debates. Actor identification is, however, rather challenging: Often, the locally mentioned speaker of a claim is only a pronoun (“He proposed that [claim]”), so recovering the canonical actor name requires discourse understanding. We compare a traditional pipeline of dedicated NLP components (similar to those applied to the related task of coreference) with a LLM, which appears a good match for this generation task. Evaluating on a corpus of German actors in newspaper reports, we find surprisingly that the LLM performs worse. Further analysis reveals that the LLM is very good at identifying the right reference, but struggles to generate the correct canonical form. This points to an underlying issue in LLMs with controlling generated output. Indeed, a hybrid model combining the LLM with a classifier to normalize its output substantially outperforms both initial models.</abstract>
@@ -148,11 +148,11 @@
     </paper>
     <paper id="12">
       <title>The <fixed-case>ARRAU</fixed-case> 3.0 Corpus</title>
-      <author><first>Massimo</first><last>Poesio</last><affiliation>Queen Mary University of London and University of Utrecht</affiliation></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last><affiliation>Queen Mary University of London and University of Utrecht</affiliation></author>
       <author><first>Maris</first><last>Camilleri</last><affiliation>Queen Mary University of London</affiliation></author>
       <author><first>Paloma</first><last>Carretero Garcia</last><affiliation>Queen Mary University of London</affiliation></author>
       <author><first>Juntao</first><last>Yu</last><affiliation>Queen Mary University of London</affiliation></author>
-      <author><first>Mark-Christoph</first><last>Müller</last><affiliation>Heidelberg Institute for Theoretical Studies gGmbH</affiliation></author>
+      <author id="mark-christoph-muller"><first>Mark-Christoph</first><last>Müller</last><affiliation>Heidelberg Institute for Theoretical Studies gGmbH</affiliation></author>
       <pages>127-138</pages>
       <abstract>The ARRAU corpus is an anaphorically annotated corpus designed to cover a wide variety of aspects of anaphoric reference in a variety of genres, including both written text and spoken language. The objective of this annotation project is to push forward the state of the art in anaphoric annotation, by overcoming the limitations of current annotation practice and the scope of current models of anaphoric interpretation, which in turn may reveal other issues. The resulting corpus is still therefore very much a work in progress almost twenty years after the project started. In this paper, we discuss the issues identified with the coding scheme used for the previous release, ARRAU 2, and through the use of this corpus for three shared tasks; the proposed solutions to these issues; and the resulting corpus, ARRAU 3.</abstract>
       <url hash="f8aff236">2024.codi-1.12</url>
diff --git a/data/xml/2024.cogalex.xml b/data/xml/2024.cogalex.xml
index 2d4be04701..034cf26495 100644
--- a/data/xml/2024.cogalex.xml
+++ b/data/xml/2024.cogalex.xml
@@ -44,7 +44,7 @@
       <author><first>Brendan Balcerak</first><last>Jackson</last></author>
       <author><first>Joerg</first><last>Deigmoeller</last></author>
       <author><first>Julian</first><last>Eggert</last></author>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <pages>26–31</pages>
       <abstract>Temporal adverbial phrases such as recently and some time ago have a special function in communication and temporal cognition. These adverbials are deictic, in that their meaning is tied to their time of utterance; and they are vague, in that the time periods to which they apply are under-specified in comparison to expressions such as yesterday, which precisely indicates the day before the day of utterance. Despite their vagueness, conversational participants have a mental image of when events described using these adverbials take place. We present a study that aims to quantify this mental model in terms of fuzzy or graded membership. To achieve this, we investigated the four English temporal adverbials recently, just, some time ago and long time ago as applied to types of events with different durations and frequencies, by conducting surveys to measure how speakers judge the different adverbials to apply in different time ranges. Our results suggest that it is possible to represent the meanings of deictic vague temporal adverbials geometrically in terms of graded membership within a temporal conceptual space.</abstract>
       <url hash="722201b5">2024.cogalex-1.3</url>
@@ -61,7 +61,7 @@
     </paper>
     <paper id="5">
       <title>How Human-Like Are Word Associations in Generative Models? An Experiment in <fixed-case>S</fixed-case>lovene</title>
-      <author><first>Špela</first><last>Vintar</last></author>
+      <author id="spela-vintar"><first>Špela</first><last>Vintar</last></author>
       <author><first>Mojca</first><last>Brglez</last></author>
       <author><first>Aleš</first><last>Žagar</last></author>
       <pages>42–48</pages>
@@ -80,7 +80,7 @@
     <paper id="7">
       <title>What <fixed-case>GPT</fixed-case>-4 Knows about Aspectual Coercion: Focused on “Begin the Book”</title>
       <author><first>Seohyun</first><last>Im</last></author>
-      <author><first>Chungmin</first><last>Lee</last></author>
+      <author id="chungmin-lee"><first>Chungmin</first><last>Lee</last></author>
       <pages>56–67</pages>
       <abstract>This paper explores whether Pre-trained Large Language Models (PLLMs) like GPT-4 can grasp profound linguistic insights into language phenomena such as Aspectual Coercion through interaction with Microsoft’s Copilot, which integrates GPT-4. Firstly, we examined Copilot’s understanding of the co-occurrence constraints of the aspectual verb “begin” and the complex-type noun “book” using the classic illustration of Aspectual Coercion, “begin the book.” Secondly, we verified Copilot’s awareness of both the default interpretation of “begin the book” with no specific context and the contextually preferred interpretation. Ultimately, Copilot provided appropriate responses regarding potential interpretations of “begin the book” based on its distributional properties and context-dependent preferred interpretations. However, it did not furnish sophisticated explanations concerning these interpretations from a linguistic theoretical perspective. On the other hand, by offering diverse interpretations grounded in distributional properties, language models like GPT-4 demonstrated their potential contribution to the refinement of linguistic theories. Furthermore, we suggested the feasibility of employing Language Models to construct language resources associated with language phenomena including Aspectual Coercion.</abstract>
       <url hash="e3af633f">2024.cogalex-1.7</url>
diff --git a/data/xml/2024.computel.xml b/data/xml/2024.computel.xml
index 86b3353050..c660ba4f48 100644
--- a/data/xml/2024.computel.xml
+++ b/data/xml/2024.computel.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2024-03-04" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Seventh Workshop on the Use of Computational Methods in the Study of Endangered Languages</booktitle>
-      <editor><first>Sarah</first><last>Moeller</last></editor>
+      <editor id="sarah-moeller"><first>Sarah</first><last>Moeller</last></editor>
       <editor><first>Godfred</first><last>Agyapong</last></editor>
       <editor><first>Antti</first><last>Arppe</last></editor>
       <editor><first>Aditi</first><last>Chaudhary</last></editor>
@@ -60,7 +60,7 @@
       <title><fixed-case>T</fixed-case> is for Treu, but how do you pronounce that? Using <fixed-case>C</fixed-case>-<fixed-case>LARA</fixed-case> to create phonetic texts for Kanak languages</title>
       <author><first>Pauline</first><last>Welby</last><affiliation>Aix Marseille Université, CNRS, Laboratoire Parole et Langage</affiliation></author>
       <author><first>Fabrice</first><last>Wacalie</last><affiliation>Université de la Nouvelle Calédonie</affiliation></author>
-      <author><first>Manny</first><last>Rayner</last><affiliation>University of South Australia</affiliation></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last><affiliation>University of South Australia</affiliation></author>
       <author><first>Chatgpt-4</first><last>C-Lara-Instance</last><affiliation>University of South Australia</affiliation></author>
       <pages>21-26</pages>
       <abstract>In Drehu, a language of the indigenous Kanak people of New Caledonia, the word treu ‘moon’ is pronounced [{tSe.u}]; but, even if they hear the word, the spelling pulls French speakers to a spurious pronunciation [tK{o}]. We implement a strategy to mitigate the influence of such orthographic conflicts, while retaining the benefits of written input on vocabulary learning. We present text in “phonetized” form, where words are broken down into components associated with mnemonically presented phonetic values, adapting features from the “Comment ça se prononce~?” multilingual phonetizer. We present an exploratory project where we used the ChatGPT-based Learning And Reading Assistant (C-LARA) to implement a version of the phonetizer strategy, outlining how the AI-engineered codebase and help from the AI made it easy to add the necessary extensions. We describe two proof-of-concept texts for learners produced using the platform, a Drehu alphabet book and a Drehu version of “The (North) Wind and the Sun”; both texts include native-speaker recorded audio, pronunciation respellings based on French orthography, and AI-generated illustrations.</abstract>
@@ -80,7 +80,7 @@
     <paper id="6">
       <title>Automatic Transcription of Grammaticality Judgements for Language Documentation</title>
       <author><first>Éric</first><last>Le Ferrand</last><affiliation>Boston College</affiliation></author>
-      <author><first>Emily</first><last>Prud’hommeaux</last><affiliation>Boston College</affiliation></author>
+      <author id="emily-prudhommeaux"><first>Emily</first><last>Prud’hommeaux</last><affiliation>Boston College</affiliation></author>
       <pages>33-38</pages>
       <abstract>Descriptive linguistics is a sub-field of linguistics that involves the collection and annotationof language resources to describe linguistic phenomena. The transcription of these resources is often described as a tedious task, and Automatic Speech Recognition (ASR) has frequently been employed to support this process. However, the typical research approach to ASR in documentary linguistics often only captures a subset of the field’s diverse reality. In this paper, we focus specifically on one type of data known as grammaticality judgment elicitation in the context of documenting Kréyòl Gwadloupéyen. We show that only a few minutes of speech is enough to fine-tune a model originally trained in French to transcribe segments in Kréyol.</abstract>
       <url hash="5bcf7e06">2024.computel-1.6</url>
diff --git a/data/xml/2024.conda.xml b/data/xml/2024.conda.xml
index 74f916eecb..b9cc72a7fa 100644
--- a/data/xml/2024.conda.xml
+++ b/data/xml/2024.conda.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the 1st Workshop on Data Contamination (CONDA)</booktitle>
       <editor><first>Oscar</first><last>Sainz</last></editor>
       <editor><first>Iker</first><last>García Ferrero</last></editor>
-      <editor><first>Eneko</first><last>Agirre</last></editor>
+      <editor id="eneko-agirre"><first>Eneko</first><last>Agirre</last></editor>
       <editor><first>Jon</first><last>Ander Campos</last></editor>
       <editor><first>Alon</first><last>Jacovi</last></editor>
       <editor><first>Yanai</first><last>Elazar</last></editor>
@@ -26,8 +26,8 @@
       <title>Evaluating <fixed-case>C</fixed-case>hinese Large Language Models on Discipline Knowledge Acquisition via Memorization and Robustness Assessment</title>
       <author><first>Chuang</first><last>Liu</last><affiliation>Tianjin University</affiliation></author>
       <author><first>Renren</first><last>Jin</last></author>
-      <author><first>Mark</first><last>Steedman</last><affiliation>University of Edinburgh</affiliation></author>
-      <author><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last><affiliation>University of Edinburgh</affiliation></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
       <pages>1-12</pages>
       <abstract>Chinese LLMs demonstrate impressive performance on NLP tasks, particularly on discipline knowledge benchmarks, with some results approaching those of GPT-4. Previous research has viewed these advancements as potential outcomes of data contamination or leakage, prompting efforts to create new detection methods and address evaluation issues in LLM benchmarks. However, there has been a lack of comprehensive assessment of the evolution of Chinese LLMs. To address this gap, this paper offers a thorough investigation of Chinese LLMs on discipline knowledge evaluation, delving into the advancements of various LLMs, including a group of related models and others. Specifically, we have conducted six assessments ranging from knowledge memorization to comprehension for robustness, encompassing tasks like predicting incomplete questions and options, identifying behaviors by the contaminational fine-tuning, and answering rephrased questions. Experimental findings indicate a positive correlation between the release time of LLMs and their memorization capabilities, but they struggle with variations in original question-options pairs. Additionally, our findings suggest that question descriptions have a more significant impact on LLMs’ performance.</abstract>
       <url hash="c02a0e92">2024.conda-1.1</url>
@@ -50,7 +50,7 @@
       <title>A Taxonomy for Data Contamination in Large Language Models</title>
       <author><first>Medha</first><last>Palavalli</last></author>
       <author><first>Amanda</first><last>Bertsch</last><affiliation>Carnegie Mellon University</affiliation></author>
-      <author><first>Matthew</first><last>Gormley</last><affiliation>Solventum and School of Computer Science, Carnegie Mellon University</affiliation></author>
+      <author id="matthew-r-gormley"><first>Matthew</first><last>Gormley</last><affiliation>Solventum and School of Computer Science, Carnegie Mellon University</affiliation></author>
       <pages>22-40</pages>
       <abstract>Large language models pretrained on extensive web corpora demonstrate remarkable performance across a wide range of downstream tasks. However, a growing concern is data contamination, where evaluation datasets may unintentionally be contained in the pretraining corpus, inflating model performance. Decontamination, the process of detecting and removing such data, is a potential solution; yet these contaminants may originate from altered versions of the test set, evading detection during decontamination. How different types of contamination impact the performance of language models on downstream tasks is not fully understood. We present a taxonomy that categorizes the various types of contamination encountered by LLMs during the pretraining phase and identify which types pose the highest risk. We analyze the impact of contamination on two key NLP tasks—summarization and question answering—revealing how different types of contamination influence task performance during evaluation.</abstract>
       <url hash="fa3c42d3">2024.conda-1.3</url>
diff --git a/data/xml/2024.conll.xml b/data/xml/2024.conll.xml
index 2bfaebc3b6..dd8dbb0f96 100644
--- a/data/xml/2024.conll.xml
+++ b/data/xml/2024.conll.xml
@@ -97,7 +97,7 @@
       <author><first>Maria</first><last>Lomeli</last><affiliation>Meta</affiliation></author>
       <author><first>Patrick</first><last>Lewis</last></author>
       <author><first>Gautier</first><last>Izacard</last></author>
-      <author><first>Edouard</first><last>Grave</last><affiliation>Facebook</affiliation></author>
+      <author id="edouard-grave"><first>Edouard</first><last>Grave</last><affiliation>Facebook</affiliation></author>
       <author><first>Sebastian</first><last>Riedel</last><affiliation>Google and University College London</affiliation></author>
       <author><first>Fabio</first><last>Petroni</last></author>
       <pages>69-83</pages>
@@ -113,7 +113,7 @@
       <author><first>Raj</first><last>Dabre</last><affiliation>National Institute of Information and Communications Technology (NICT), National Institute of Advanced Industrial Science and Technology</affiliation></author>
       <author><first>Ratish</first><last>Puduppully</last><affiliation>IT University of Copenhagen</affiliation></author>
       <author><first>Anoop</first><last>Kunchukuttan</last><affiliation>Microsoft and Indian Institute of Technology, Madras</affiliation></author>
-      <author><first>Mitesh M</first><last>Khapra</last><affiliation>Indian Institute of Technology, Madras, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
+      <author id="mitesh-m-khapra"><first>Mitesh M</first><last>Khapra</last><affiliation>Indian Institute of Technology, Madras, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
       <pages>84-104</pages>
       <abstract>Language Models (LMs) excel in natural language processing tasks for English but show reduced performance in most other languages. This problem is commonly tackled by continually pre-training and fine-tuning these models for said languages. A significant issue in this process is the limited vocabulary coverage in the original model’s tokenizer, leading to inadequate representation of new languages and necessitating an expansion of the tokenizer. The initialization of the embeddings corresponding to new vocabulary items presents a further challenge. Current strategies require cross-lingual embeddings and lack a solid theoretical foundation as well as comparisons with strong baselines. In this paper, we first establish theoretically that initializing within the convex hull of existing embeddings is a good initialization, followed by a novel but simple approach, <i>Constrained Word2Vec (CW2V)</i>, which does not require cross-lingual embeddings. Our study evaluates different initialization methods for expanding RoBERTa and LLaMA 2 across four languages and five tasks. The results show that CW2V performs equally well or even better than more advanced techniques. Additionally, simpler approaches like multivariate initialization perform on par with these advanced methods indicating that efficient large-scale multilingual continued pretraining can be achieved even with simpler initialization methods.</abstract>
       <url hash="1f522855">2024.conll-1.8</url>
@@ -123,7 +123,7 @@
     <paper id="9">
       <title>Critical Questions Generation: Motivation and Challenges</title>
       <author><first>Blanca</first><last>Calvo Figueras</last><affiliation>Universidad del País Vasco</affiliation></author>
-      <author><first>Rodrigo</first><last>Agerri</last><affiliation>University of the Basque Country</affiliation></author>
+      <author id="rodrigo-agerri"><first>Rodrigo</first><last>Agerri</last><affiliation>University of the Basque Country</affiliation></author>
       <pages>105-116</pages>
       <abstract>The development of Large Language Models (LLMs) has brought impressive performances on mitigation strategies against misinformation, such as counterargument generation. However, LLMs are still seriously hindered by outdated knowledge and by their tendency to generate hallucinated content. In order to circumvent these issues, we propose a new task, namely, Critical Questions Generation, consisting of processing an argumentative text to generate the critical questions (CQs) raised by it.In argumentation theory CQs are tools designed to lay bare the blind spots of an argument by pointing at the information it could be missing.Thus, instead of trying to deploy LLMs to produce knowledgeable and relevant counterarguments, we use them to question arguments, without requiring any external knowledge.Research on CQs Generation using LLMs requires a reference dataset for large scale experimentation. Thus, in this work we investigate two complementary methods to create such a resource: (i) instantiating CQs templates as defined by Walton’s argumentation theory and (ii), using LLMs as CQs generators. By doing so, we contribute with a procedure to establish what is a valid CQ and conclude that, while LLMs are reasonable CQ generators, they still have a wide margin for improvement in this task.</abstract>
       <url hash="760a4b80">2024.conll-1.9</url>
@@ -240,7 +240,7 @@
       <title>A Novel Instruction Tuning Method for <fixed-case>V</fixed-case>ietnamese Mathematical Reasoning using Trainable Open-Source Large Language Models</title>
       <author><first>Nguyen Quang</first><last>Vinh</last></author>
       <author><first>Thanh-Do</first><last>Nguyen</last></author>
-      <author><first>Vinh Van</first><last>Nguyen</last><affiliation>Vietnam National University Hanoi</affiliation></author>
+      <author id="vinh-van-nguyen"><first>Vinh Van</first><last>Nguyen</last><affiliation>Vietnam National University Hanoi</affiliation></author>
       <author><first>Nam Khac-Hoai</first><last>Bui</last><affiliation>Viettel Group</affiliation></author>
       <pages>259-268</pages>
       <abstract>This study introduces Simple Reasoning with Code (SiRC), a novel instruction fine-tuning method for solving mathematical reasoning problems, particularly effective for Vietnamese, which is considered a low-resource language. Specifically, solving mathematical problems requires strategic and logical reasoning, which remains challenging in this research area. This paper presents a simple yet effective instruction fine-tuning method for mathematical reasoning. Unlike previous approaches, our proposed method effectively combines chain-of-thought reasoning with code transfer methods without requiring a sophisticated inference procedure. Furthermore, we focus on exploiting small open-source large language models (LLMs) for the Vietnamese language. In this regard, we first introduce a trainable Vietnamese math reasoning dataset, which is named ViMath-InstructCode. The proposed dataset is then used for fine-tuning open-source LLMs (e.g., less than 10 billion parameters). Experiments conducted on our custom ViMath-Bench dataset, the largest benchmarking dataset focusing on Vietnamese mathematical problems, indicate the promising results of our proposed method. Our source code and dataset are available for further exploitation.</abstract>
@@ -341,7 +341,7 @@
       <author><first>Dimosthenis</first><last>Antypas</last></author>
       <author><first>Zara</first><last>Siddique</last></author>
       <author><first>Nina</first><last>White</last></author>
-      <author><first>Jose</first><last>Camacho-Collados</last><affiliation>Cardiff University</affiliation></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last><affiliation>Cardiff University</affiliation></author>
       <pages>365-387</pages>
       <abstract>The ability to compare by analogy, metaphorically or not, lies at the core of how humans understand the world and communicate. In this paper, we study the likelihood of metaphoric outputs, and the capability of a wide range of pretrained transformer-based language models to identify metaphors from other types of analogies, including anomalous ones. In particular, we are interested in discovering whether language models recognise metaphorical analogies equally well as other types of analogies, and whether the model size has an impact on this ability. The results show that there are relevant differences using perplexity as a proxy, with the larger models reducing the gap when it comes to analogical processing, and for distinguishing metaphors from incorrect analogies. This behaviour does not result in increased difficulties for larger generative models in identifying metaphors in comparison to other types of analogies from anomalous sentences in a zero-shot generation setting, when perplexity values of metaphoric and non-metaphoric analogies are similar.</abstract>
       <url hash="035f5e6b">2024.conll-1.28</url>
@@ -424,7 +424,7 @@
     <paper id="34">
       <title>Image-conditioned human language comprehension and psychometric benchmarking of visual language models</title>
       <author><first>Subha Nawer</first><last>Pushpita</last></author>
-      <author><first>Roger P.</first><last>Levy</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
+      <author id="roger-levy"><first>Roger P.</first><last>Levy</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <pages>447-457</pages>
       <abstract>Large language model (LLM)s’ next-word predictions have shown impressive performance in capturing human expectations during real-time language comprehension. This finding has enabled a line of research on psychometric benchmarking of LLMs against human language-comprehension data in order to reverse-engineer humans’ linguistic subjective probability distributions and representations. However, to date, this work has exclusively involved unimodal (language-only) comprehension data, whereas much human language use takes place in rich multimodal contexts. Here we extend psychometric benchmarking to visual language models (VLMs). We develop a novel experimental paradigm, <tex-math>\textit{Image-Conditioned Maze Reading}</tex-math>, in which participants first view an image and then read a text describing an image within the Maze paradigm, yielding word-by-word reaction-time measures with high signal-to-noise ratio and good localization of expectation-driven language processing effects. We find a large facilitatory effect of correct image context on language comprehension, not only for words such as concrete nouns that are directly grounded in the image but even for ungrounded words in the image descriptions. Furthermore, we find that VLM surprisal captures most to all of this effect. We use these findings to benchmark a range of VLMs, showing that models with lower perplexity generally have better psychometric performance, but that among the best VLMs tested perplexity and psychometric performance dissociate. Overall, our work offers new possibilities for connecting psycholinguistics with multimodal LLMs for both scientific and engineering goals.</abstract>
       <url hash="d684404c">2024.conll-1.34</url>
@@ -436,8 +436,8 @@
       <author><first>Joselyn</first><last>Rodriguez</last><affiliation>University of Maryland, College Park</affiliation></author>
       <author><first>Kamala</first><last>Sreepada</last></author>
       <author><first>Ruolan Leslie</first><last>Famularo</last></author>
-      <author><first>Sharon</first><last>Goldwater</last><affiliation>University of Edinburgh</affiliation></author>
-      <author><first>Naomi</first><last>Feldman</last><affiliation>University of Maryland, College Park</affiliation></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last><affiliation>University of Edinburgh</affiliation></author>
+      <author id="naomi-feldman"><first>Naomi</first><last>Feldman</last><affiliation>University of Maryland, College Park</affiliation></author>
       <pages>458-463</pages>
       <abstract>State of the art models in automatic speech recognition have shown remarkable improvements due to modern self-supervised (SSL) transformer-based architectures such as wav2vec 2.0 (Baevski et al., 2020). However, how these models encode phonetic information is still not well understood. We explore whether SSL speech models display a linguistic property that characterizes human speech perception: language specificity. We show that while wav2vec 2.0 displays an overall language specificity effect when tested on Hindi vs. English, it does not resemble human speech perception when tested on finer-grained differences in Hindi speech contrasts.</abstract>
       <url hash="d8f0f12c">2024.conll-1.35</url>
@@ -583,7 +583,7 @@
       <author><first>Bastian</first><last>Bunzeck</last><affiliation>Universität Bielefeld</affiliation></author>
       <author><first>Daniel</first><last>Duran</last><affiliation>Universität Bielefeld</affiliation></author>
       <author><first>Leonie</first><last>Schade</last><affiliation>Universität Bielefeld</affiliation></author>
-      <author><first>Sina</first><last>Zarrieß</last><affiliation>Bielefeld University</affiliation></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last><affiliation>Bielefeld University</affiliation></author>
       <pages>54-64</pages>
       <abstract>We present grapheme-llama and phoneme-llama, character-based language models trained for the 2024 BabyLM challenge. Through these models, we explore an under-researched approach to downsizing: replacing subword-based tokenization with character-level tokenization, drastically reducing the vocabulary size. The grapheme model is trained on a standard BabyLM dataset, while the phoneme model uses a phoneme-converted version of this dataset. Results show that grapheme-based models perform better overall, achieving scores comparable to subword-based models on grammatical benchmarks. Despite lower performance, phoneme models also demonstrate promising grammatical learning. We argue that our results challenge conventional wisdom on language modeling techniques and open up novel research questions with character- and phoneme-based models as objects of inquiry.</abstract>
       <url hash="26524275">2024.conll-babylm.5</url>
@@ -602,7 +602,7 @@
     </paper>
     <paper id="7">
       <title><fixed-case>B</fixed-case>aby<fixed-case>HGRN</fixed-case>: Exploring <fixed-case>RNN</fixed-case>s for Sample-Efficient Language Modeling</title>
-      <author><first>Patrick</first><last>Haller</last><affiliation>Humboldt Universität Berlin</affiliation></author>
+      <author id="patrick-haller-zurich"><first>Patrick</first><last>Haller</last><affiliation>Humboldt Universität Berlin</affiliation></author>
       <author><first>Jonas</first><last>Golde</last><affiliation>Department of Computer Science, Humboldt University Berlin, Humboldt Universität Berlin</affiliation></author>
       <author><first>Alan</first><last>Akbik</last><affiliation>Humboldt Universität Berlin</affiliation></author>
       <pages>82-94</pages>
@@ -655,10 +655,10 @@
     </paper>
     <paper id="12">
       <title>Extending the <fixed-case>B</fixed-case>aby<fixed-case>LM</fixed-case> Initiative : Promoting Diversity in Datasets and Metrics through High-Quality Linguistic Corpora</title>
-      <author><first>Laurent</first><last>Prévot</last><affiliation>Université d’Aix-Marseille</affiliation></author>
+      <author id="laurent-prevot"><first>Laurent</first><last>Prévot</last><affiliation>Université d’Aix-Marseille</affiliation></author>
       <author><first>Sheng-Fu</first><last>Wang</last><affiliation>Academia Sinica</affiliation></author>
       <author><first>Jou-An</first><last>Chi</last><affiliation>NA</affiliation></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last><affiliation>National Taiwan University</affiliation></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last><affiliation>National Taiwan University</affiliation></author>
       <pages>147-158</pages>
       <abstract>BabyLM paves the way for a range of experiments aimed at better understanding language models (LMs) and the differences and similarities between human and artificial language learning. However, the current framework is limited to the English language and a narrow but significant range of evaluation metrics, primarily focused on syntax, semantics, and pragmatics. In this paper, we propose some steps towards extending the framework to other languages, specifically Mandarin Chinese and French, leveraging existing linguistic resources for these languages. Additionally, we advocate for greater exploration of genre variations within subcorpora for training LMs, as well as for the adoption of additional evaluation metrics with different underlying principles. Our proposal consists of using high-quality spontaneous speech corpora as a source for extracting production-related variables, which the models are then fine-tuned to predict. We hypothesize that these production-related features offer insights into the language processing mechanisms underlying the data and that cognitively sensitive models should outperform others in predicting these features. Specifically, we propose focusing on the prediction of phenomena such as speech reductions, prosodic prominences, sequences co-occurring with listeners’ backchannels, and disfluencies. To illustrate our approach, we present an example involving the prediction of speech reductions in spontaneous speech in two different languages (French and English), using models trained on 10 million tokens from different data source mixtures. Although the results are preliminary, they suggest that this task can characterize models for predicting human language processing.</abstract>
       <url hash="b8e7fbee">2024.conll-babylm.12</url>
@@ -677,7 +677,7 @@
       <author><first>Lukas</first><last>Edman</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <author><first>Lisa</first><last>Bylinina</last><affiliation>Utrecht University</affiliation></author>
       <author><first>Faeze</first><last>Ghorbanpour</last></author>
-      <author><first>Alexander</first><last>Fraser</last><affiliation>Technical University of Munich</affiliation></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last><affiliation>Technical University of Munich</affiliation></author>
       <pages>166-173</pages>
       <abstract>This paper describes a linguistically-motivated approach to the 2024 edition of the BabyLM Challenge. Rather than pursuing a first language learning (L1) paradigm, we approach the challenge from a second language (L2) learning perspective. In L2 learning, there is a stronger focus on learning explicit linguistic information, such as grammatical notions, definitions of words or different ways of expressing a meaning. This makes L2 learning potentially more efficient and concise. We approximate this using data from Wiktionary, grammar examples either generated by an LLM or sourced from grammar books, and paraphrase data.We find that explicit information about word meaning (in our case, Wiktionary) does not boost model performance, while grammatical information can give a small improvement. The most impactful data ingredient is sentence paraphrases, with our two best models being trained on 1) a mix of paraphrase data and data from the BabyLM pretraining dataset, and 2) exclusively paraphrase data.</abstract>
       <url hash="d20a69a2">2024.conll-babylm.14</url>
@@ -747,7 +747,7 @@
       <title>A surprisal oracle for when every layer counts</title>
       <author><first>Xudong</first><last>Hong</last><affiliation>Saarland University and Max-Planck Institute for Informatics</affiliation></author>
       <author><first>Sharid</first><last>Loáiciga</last><affiliation>University of Gothenburg, Sweden</affiliation></author>
-      <author><first>Asad</first><last>Sayeed</last><affiliation>University of Gothenburg</affiliation></author>
+      <author id="asad-sayeed"><first>Asad</first><last>Sayeed</last><affiliation>University of Gothenburg</affiliation></author>
       <pages>237-243</pages>
       <abstract>Active Curriculum Language Modeling (ACLM; Hong et al., 2023) is a learner-directed approach to training a language model. We proposed the original version of this process in our submission to the BabyLM 2023 task, and now we propose an updated ACLM process for the BabyLM 2024 task. ACLM involves an iteratively-and dynamically-constructed curriculum informed over the training process by a model of uncertainty; other training items that are similarly uncertain to a least certain candidate item are prioritized. Our new process improves the similarity model so that it is more dynamic, and we run ACLM over the most successful model from the BabyLM 2023 task: ELC-BERT (Charpentier and Samuel, 2023). We find that while our models underperform on fine-grained grammatical inferences, they outperform the BabyLM 2024 official base-lines on common-sense and world-knowledge tasks. We make our code available at https://github.com/asayeed/ActiveBaby.</abstract>
       <url hash="357ea52b">2024.conll-babylm.21</url>
diff --git a/data/xml/2024.cpss.xml b/data/xml/2024.cpss.xml
index 2a4320f354..4e2900b3e4 100644
--- a/data/xml/2024.cpss.xml
+++ b/data/xml/2024.cpss.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the 4th Workshop on Computational Linguistics for the Political and Social Sciences: Long and short papers</booktitle>
       <editor><first>Christopher</first><last>Klamm</last></editor>
       <editor><first>Gabriella</first><last>Lapesa</last></editor>
-      <editor><first>Simone Paolo</first><last>Ponzetto</last></editor>
+      <editor id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></editor>
       <editor><first>Ines</first><last>Rehbein</last></editor>
       <editor><first>Indira</first><last>Sen</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
diff --git a/data/xml/2024.crac.xml b/data/xml/2024.crac.xml
index ce09b3f878..7063c92277 100644
--- a/data/xml/2024.crac.xml
+++ b/data/xml/2024.crac.xml
@@ -5,8 +5,8 @@
       <booktitle>Proceedings of the Seventh Workshop on Computational Models of Reference, Anaphora and Coreference</booktitle>
       <editor><first>Maciej</first><last>Ogrodniczuk</last></editor>
       <editor><first>Anna</first><last>Nedoluzhko</last></editor>
-      <editor><first>Massimo</first><last>Poesio</last></editor>
-      <editor><first>Sameer</first><last>Pradhan</last></editor>
+      <editor id="massimo-poesio"><first>Massimo</first><last>Poesio</last></editor>
+      <editor id="sameer-pradhan"><first>Sameer</first><last>Pradhan</last></editor>
       <editor><first>Vincent</first><last>Ng</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Miami</address>
@@ -58,7 +58,7 @@
       <author><first>John</first><last>Bauer</last></author>
       <author><first>Karel</first><last>D’Oosterlinck</last></author>
       <author><first>Christopher</first><last>Potts</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>33–40</pages>
       <url hash="159606dd">2024.crac-1.4</url>
       <bibkey>liu-etal-2024-mscaw</bibkey>
@@ -66,7 +66,7 @@
     </paper>
     <paper id="5">
       <title>Unifying the Scope of Bridging Anaphora Types in <fixed-case>E</fixed-case>nglish: Bridging Annotations in <fixed-case>ARRAU</fixed-case> and <fixed-case>GUM</fixed-case></title>
-      <author><first>Lauren</first><last>Levine</last></author>
+      <author id="lauren-levine"><first>Lauren</first><last>Levine</last></author>
       <author><first>Amir</first><last>Zeldes</last></author>
       <pages>41–51</pages>
       <url hash="7511e66b">2024.crac-1.5</url>
@@ -108,8 +108,8 @@
       <author><first>Ondrej</first><last>Prazak</last></author>
       <author><first>Jakub</first><last>Sido</last></author>
       <author><first>Milan</first><last>Straka</last></author>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <pages>78–96</pages>
       <url hash="a6119d4c">2024.crac-1.8</url>
       <bibkey>novak-etal-2024-findings</bibkey>
diff --git a/data/xml/2024.ctt.xml b/data/xml/2024.ctt.xml
index 5d016e21f4..ce303fea52 100644
--- a/data/xml/2024.ctt.xml
+++ b/data/xml/2024.ctt.xml
@@ -61,7 +61,7 @@ from literary works</title>
     <paper id="5">
       <title>An Analysis of Surprisal Uniformity in Machine and Human Translations</title>
       <author><first>Josef</first><last>Jon</last><affiliation>Charles University Prague</affiliation></author>
-      <author><first>Ondřej</first><last>Bojar</last><affiliation>Charles University Prague</affiliation></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last><affiliation>Charles University Prague</affiliation></author>
       <pages>40-56</pages>
       <abstract>This study examines neural machine translation (NMT) and its performance on texts that diverege from typical standards, focusing on how information is organized within sentences. We analyze surprisal distributions in source texts, human translations, and machine translations across several datasets to determine if NMT systems naturally promote a uniform density of surprisal in their translations, even when the original texts do not adhere to this principle.The findings reveal that NMT tends to align more closely with source texts in terms of surprisal uniformity compared to human translations.We analyzed absolute values of the surprisal uniformity measures as well, expecting that human translations will be less uniform. In contradiction to our initial hypothesis, we did not find comprehensive evidence for this claim, with some results suggesting this might be the case for very diverse texts, like poetry.</abstract>
       <url hash="c72e78a7">2024.ctt-1.5</url>
diff --git a/data/xml/2024.customnlp4u.xml b/data/xml/2024.customnlp4u.xml
index f3929c41ac..0d5143ff81 100644
--- a/data/xml/2024.customnlp4u.xml
+++ b/data/xml/2024.customnlp4u.xml
@@ -9,7 +9,7 @@
       <editor><first>Weijia</first><last>Shi</last></editor>
       <editor><first>Shirley Anugrah</first><last>Hayati</last></editor>
       <editor><first>Yulia</first><last>Tsvetkov</last></editor>
-      <editor><first>Noah</first><last>Smith</last></editor>
+      <editor id="noah-a-smith"><first>Noah</first><last>Smith</last></editor>
       <editor><first>Hannaneh</first><last>Hajishirzi</last></editor>
       <editor><first>Dongyeop</first><last>Kang</last></editor>
       <editor><first>David</first><last>Jurgens</last></editor>
@@ -43,7 +43,7 @@
       <title>Empowering <fixed-case>AAC</fixed-case> Users: A Systematic Integration of Personal Narratives with Conversational <fixed-case>AI</fixed-case></title>
       <author><first>Sayantan</first><last>Pal</last><affiliation>State University of New York at Buffalo</affiliation></author>
       <author><first>Souvik</first><last>Das</last><affiliation>State University of New York at Buffalo</affiliation></author>
-      <author><first>Rohini</first><last>Srihari</last><affiliation>State University of New York at Buffalo</affiliation></author>
+      <author id="rohini-k-srihari"><first>Rohini</first><last>Srihari</last><affiliation>State University of New York at Buffalo</affiliation></author>
       <author><first>Jeff</first><last>Higginborham</last><affiliation>State University of New York at Buffalo</affiliation></author>
       <author><first>Jenna</first><last>Bizovi</last><affiliation>State University of New York at Buffalo</affiliation></author>
       <pages>12-25</pages>
@@ -183,7 +183,7 @@
       <title>Constructing Domain-Specific Evaluation Sets for <fixed-case>LLM</fixed-case>-as-a-judge</title>
       <author><first>Ravi Shanker</first><last>Raju</last><affiliation>Sambanova Systems</affiliation></author>
       <author><first>Swayambhoo</first><last>Jain</last><affiliation>Sambanova Systems</affiliation></author>
-      <author id="bo-li"><first>Bo</first><last>Li</last></author>
+      <author><first>Bo</first><last>Li</last></author>
       <author><first>Jonathan Lingjie</first><last>Li</last></author>
       <author><first>Urmish</first><last>Thakker</last><affiliation>SambaNova Systems</affiliation></author>
       <pages>167-181</pages>
@@ -281,7 +281,7 @@
       <author><first>Hwajung</first><last>Hong</last></author>
       <author><first>Juho</first><last>Kim</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
       <author><first>So-Yeon</first><last>Ahn</last><affiliation>Korea Advanced Institute of Science &amp; Technology</affiliation></author>
-      <author><first>Alice</first><last>Oh</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
       <pages>284-293</pages>
       <abstract>In the context of English as a Foreign Language (EFL) writing education, LLM-as-a-tutor can assist students by providing real-time feedback on their essays. However, challenges arise in assessing LLM-as-a-tutor due to differing standards between educational and general use cases. To bridge this gap, we integrate pedagogical principles to assess student-LLM interaction. First, we explore how LLMs can function as English tutors, providing effective essay feedback tailored to students. Second, we propose three criteria to evaluate LLM-as-a-tutor specifically designed for EFL writing education, emphasizing pedagogical aspects. In this process, EFL experts evaluate the feedback from LLM-as-a-tutor regarding (1) quality and (2) characteristics. On the other hand, EFL learners assess their (3) learning outcomes from interaction with LLM-as-a-tutor. This approach lays the groundwork for developing LLMs-as-a-tutor tailored to the needs of EFL learners, advancing the effectiveness of writing education in this context.</abstract>
       <url hash="6202961b">2024.customnlp4u-1.21</url>
@@ -307,7 +307,7 @@
       <title>Adapting <fixed-case>LLM</fixed-case> Predictions in In-Context Learning with Data Priors</title>
       <author><first>Javier</first><last>Chiyah-Garcia</last></author>
       <author><first>Prasoon</first><last>Goyal</last><affiliation>Amazon</affiliation></author>
-      <author><first>Michael</first><last>Johnston</last><affiliation>Amazon</affiliation></author>
+      <author id="michael-johnston"><first>Michael</first><last>Johnston</last><affiliation>Amazon</affiliation></author>
       <author><first>Reza</first><last>Ghanadan</last><affiliation>University of Maryland, College Park</affiliation></author>
       <pages>305-316</pages>
       <abstract>In-Context Learning (ICL) has enabled Large Language Models (LLMs) to excel as general-purpose models in zero and few-shot task settings. However, since LLMs are often not trained on the downstream tasks, they lack crucial contextual knowledge from the data distributions, which limits their task adaptability.This paper explores using data priors to automatically customize prompts in ICL. We extract these priors in a dataset-agnostic way basedon historical information, enabling LLMs to personalize their output towards users or tasks at inference time. We find that they improve LLM’s output by injecting latent dataset-specific information for the task of rating prediction. Throughout a series of experiments, we show replicable results across LLMs and datasets on what information and methods are most effective for adapting ICL outputs with priors. Our findings offer a systematic approach to customizing prompts with additional information in a privacy-friendly manner, requiring only aggregated data that is computationally efficient.</abstract>
diff --git a/data/xml/2024.delite.xml b/data/xml/2024.delite.xml
index 7cb464c124..538d0b041c 100644
--- a/data/xml/2024.delite.xml
+++ b/data/xml/2024.delite.xml
@@ -3,12 +3,12 @@
   <volume id="1" ingest-date="2024-05-16" type="proceedings">
     <meta>
       <booktitle>Proceedings of the First Workshop on Language-driven Deliberation Technology (DELITE) @ LREC-COLING 2024</booktitle>
-      <editor><first>Annette</first><last>Hautli-Janisz</last></editor>
+      <editor id="annette-hautli"><first>Annette</first><last>Hautli-Janisz</last></editor>
       <editor><first>Gabriella</first><last>Lapesa</last></editor>
       <editor><first>Lucas</first><last>Anastasiou</last></editor>
       <editor><first>Valentin</first><last>Gold</last></editor>
       <editor><first>Anna De</first><last>Liddo</last></editor>
-      <editor><first>Chris</first><last>Reed</last></editor>
+      <editor id="chris-reed"><first>Chris</first><last>Reed</last></editor>
       <publisher>ELRA and ICCL</publisher>
       <address>Torino, Italia</address>
       <month>May</month>
diff --git a/data/xml/2024.determit.xml b/data/xml/2024.determit.xml
index 164676ce6d..cffe72b69d 100644
--- a/data/xml/2024.determit.xml
+++ b/data/xml/2024.determit.xml
@@ -49,7 +49,7 @@
     <paper id="4">
       <title>Towards Automatic <fixed-case>F</fixed-case>innish Text Simplification</title>
       <author><first>Anna</first><last>Dmitrieva</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>39–50</pages>
       <abstract>Automatic text simplification (ATS/TS) models typically require substantial parallel training data. This paper describes our work on expanding the Finnish-Easy Finnish parallel corpus and making baseline simplification models. We discuss different approaches to document and sentence alignment. After finding the optimal alignment methodologies, we increase the amount of document-aligned data 6.5 times and add a sentence-aligned version of the dataset consisting of more than twelve thousand sentence pairs. Using sentence-aligned data, we fine-tune two models for text simplification. The first is mBART, a sequence-to-sequence translation architecture proven to show good results for monolingual translation tasks. The second is the Finnish GPT model, for which we utilize instruction fine-tuning. This work is the first attempt to create simplification models for Finnish using monolingual parallel data in this language. The data has been deposited in the Finnish Language Bank (Kielipankki) and is available for non-commercial use, and the models will be made accessible through either Kielipankki or public repositories such as Huggingface or GitHub.</abstract>
       <url hash="a7279e36">2024.determit-1.4</url>
@@ -101,7 +101,7 @@
       <author><first>Lucía</first><last>Ormaechea</last></author>
       <author><first>Nikos</first><last>Tsourakis</last></author>
       <author><first>Didier</first><last>Schwab</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Benjamin</first><last>Lecouteux</last></author>
       <pages>90–102</pages>
       <abstract>Automatic Text Simplification (ATS) aims at rewriting texts into simpler variants while preserving their original meaning, so they can be more easily understood by different audiences. While ATS has been widely used for written texts, its application to spoken language remains unexplored, even if it is not exempt from difficulty. This study aims to characterize the edit operations performed in order to simplify French transcripts for non-native speakers. To do so, we relied on a data sample randomly extracted from the Orféo-CEFC French spontaneous speech dataset. In the absence of guidelines to direct this process, we adopted an intuitive simplification approach, so as to investigate the crafted simplifications based on expert linguists’ criteria, and to compare them with those produced by a generative AI (namely, ChatGPT). The results, analyzed quantitatively and qualitatively, reveal that the most common edits are deletions, and affect oral production aspects, like restarts or hesitations. Consequently, candidate simplifications are typically register-standardized sentences that solely include the propositional content of the input. The study also examines the alignment between human- and machine-based simplifications, revealing a moderate level of agreement, and highlighting the subjective nature of the task. The findings contribute to understanding the intricacies of simplifying spontaneous spoken language. In addition, the provision of a small-scale parallel dataset derived from such expert simplifications, Propicto-Orféo-Simple, can facilitate the evaluation of speech simplification solutions.</abstract>
@@ -114,7 +114,7 @@
       <author><first>Sultan</first><last>Almujaiwel</last></author>
       <author><first>Damith</first><last>Premasiri</last></author>
       <author><first>Tharindu</first><last>Ranasinghe</last></author>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <pages>103–113</pages>
       <abstract>This research introduces DARES, a dataset for assessing the readability of Arabic text in Saudi school materials. DARES compromise of 13335 instances from textbooks used in 2021 and contains two subtasks; (a) Coarse-grained readability assessment where the text is classified into different educational levels such as primary and secondary. (b) Fine-grained readability assessment where the text is classified into individual grades.. We fine-tuned five transformer models that support Arabic and found that CAMeLBERTmix performed the best in all input settings. Evaluation results showed high performance for the coarse-grained readability assessment task, achieving a weighted F1 score of 0.91 and a macro F1 score of 0.79. The fine-grained task achieved a weighted F1 score of 0.68 and a macro F1 score of 0.55. These findings demonstrate the potential of our approach for advancing Arabic text readability assessment in education, with implications for future innovations in the field.</abstract>
       <url hash="32c655e3">2024.determit-1.10</url>
@@ -137,7 +137,7 @@
     <paper id="12">
       <title>The Simplification of the Language of Public Administration: The Case of Ombudsman Institutions</title>
       <author><first>Gabriel</first><last>Gonzalez-Delgado</last></author>
-      <author><first>Borja</first><last>Navarro-Colorado</last></author>
+      <author id="borja-navarro"><first>Borja</first><last>Navarro-Colorado</last></author>
       <pages>125–133</pages>
       <abstract>Language produced by Public Administrations has crucial implications in citizens’ lives. However, its syntactic complexity and the use of legal jargon, among other factors, make it difficult to be understood for laypeople and certain target audiences. The NLP task of Automatic Text Simplification (ATS) can help to the necessary simplification of this technical language. For that purpose, specialized parallel datasets of complex-simple pairs need to be developed for the training of these ATS systems. In this position paper, an on-going project is presented, whose main objectives are (a) to extensively analyze the syntactical, lexical, and discursive features of the language of English-speaking ombudsmen, as samples of public administrative language, with special attention to those characteristics that pose a threat to comprehension, and (b) to develop the OmbudsCorpus, a parallel corpus of complex-simple supra-sentential fragments from ombudsmen’s case reports that have been manually simplified by professionals and annotated with standardized simplification operations. This research endeavor aims to provide a deeper understanding of the simplification process and to enhance the training of ATS systems specialized in administrative texts.</abstract>
       <url hash="f5ac291e">2024.determit-1.12</url>
@@ -155,7 +155,7 @@
     <paper id="14">
       <title><fixed-case>LARGEMED</fixed-case>: A Resource for Identifying and Generating Paraphrases for <fixed-case>F</fixed-case>rench Medical Terms</title>
       <author><first>Ioana</first><last>Buhnila</last></author>
-      <author><first>Amalia</first><last>Todirascu</last></author>
+      <author id="amalia-todirascu"><first>Amalia</first><last>Todirascu</last></author>
       <pages>141–151</pages>
       <abstract>This article presents a method extending an existing French corpus of paraphrases of medical terms ANONYMOUS with new data from Web archives created during the Covid-19 pandemic. Our method semi-automatically detects new terms and paraphrase markers introducing paraphrases from these Web archives, followed by a manual annotation step to identify paraphrases and their lexical and semantic properties. The extended large corpus LARGEMED could be used for automatic medical text simplification for patients and their families. To automatise data collection, we propose two experiments. The first experiment uses the new LARGEMED dataset to train a binary classifier aiming to detect new sentences containing possible paraphrases. The second experiment aims to use correct paraphrases to train a model for paraphrase generation, by adapting T5 Language Model to the paraphrase generation task using an adversarial algorithm.</abstract>
       <url hash="c26fa6c5">2024.determit-1.14</url>
@@ -174,7 +174,7 @@
     <paper id="16">
       <title>Legal Science and Compute Science: A Preliminary Discussions on How to Represent the “Penumbra” Cone with <fixed-case>AI</fixed-case></title>
       <author><first>Angela</first><last>Condello</last></author>
-      <author><first>Giorgio Maria</first><last>Di Nunzio</last></author>
+      <author id="giorgio-maria-di-nunzio"><first>Giorgio Maria</first><last>Di Nunzio</last></author>
       <pages>179–184</pages>
       <abstract>Legal science encounters significant challenges with the widespread integration of AI software across various legal operations. The distinction between signs, senses, and references from a linguistic point of view, as drawn by Gottlob Frege, underscores the complexity of legal language, especially in multilingual contexts like the European Union. In this paper, we describe the problems of legal terminology, examining the “penumbra” problem through Herbert Hart’s legal theory of meaning. We also analyze the feasibility of training automatic systems to handle conflicts between different interpretations of legal norms, particularly in multilingual legal systems. By examining the transformative impact of Artificial Intelligence on traditional legal practices, this research contributes to the theoretical discussion about the exploration of innovative methodologies for simplifying complex terminologies without compromising meaning.</abstract>
       <url hash="2cdf77a6">2024.determit-1.16</url>
diff --git a/data/xml/2024.dlnld.xml b/data/xml/2024.dlnld.xml
index 68a2ac7460..d27b913cac 100644
--- a/data/xml/2024.dlnld.xml
+++ b/data/xml/2024.dlnld.xml
@@ -3,8 +3,8 @@
   <volume id="1" ingest-date="2024-05-18" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Workshop on Deep Learning and Linked Data (DLnLD) @ LREC-COLING 2024</booktitle>
-      <editor><first>Gilles</first><last>Sérasset</last></editor>
-      <editor><first>Hugo Gonçalo</first><last>Oliveira</last></editor>
+      <editor id="gilles-serasset"><first>Gilles</first><last>Sérasset</last></editor>
+      <editor id="hugo-goncalo-oliveira"><first>Hugo Gonçalo</first><last>Oliveira</last></editor>
       <editor><first>Giedre Valunaite</first><last>Oleskeviciene</last></editor>
       <publisher>ELRA and ICCL</publisher>
       <address>Torino, Italia</address>
@@ -23,7 +23,7 @@
       <author><first>Moritz</first><last>Blum</last></author>
       <author><first>Gennaro</first><last>Nolano</last></author>
       <author><first>Basil</first><last>Ell</last></author>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <pages>1–13</pages>
       <abstract>Graph Neural Networks(GNNs) have been applied successfully to various NLP tasks, particularly Relation Extraction(RE). Even though most of these approaches rely on the syntactic dependency tree of a sentence to derive a graph representation, the impact of this choice compared to other possible graph representations has not been evaluated. We examine the effect of representing text though a graph of different graph representations for GNNs that are applied to RE, considering, e.g., a fully connected graph of tokens, of semantic role structures, and combinations thereof. We further examine the impact of background knowledge injection from Knowledge Graphs(KGs) into the graph representation to achieve enhanced graph representations. Our results show that combining multiple graph representations can improve the model’s predictions. Moreover, the integration of background knowledge positively impacts scores, as enhancing the text graphs with Wikidata features or WordNet features can lead to an improvement of close to 0.1 points in F1.</abstract>
       <url hash="f39c3c41">2024.dlnld-1.1</url>
@@ -74,7 +74,7 @@
       <title>Evaluating Large Language Models for Linguistic Linked Data Generation</title>
       <author><first>Maria Pia</first><last>di Buono</last></author>
       <author><first>Blerina</first><last>Spahiu</last></author>
-      <author><first>Verginica</first><last>Barbu Mititelu</last></author>
+      <author id="verginica-barbu-mititelu"><first>Verginica</first><last>Barbu Mititelu</last></author>
       <pages>66–75</pages>
       <abstract>Large language models (LLMs) have revolutionized human-machine interaction with their ability to converse and perform various language tasks. This study investigates the potential of LLMs for knowledge formalization using well-defined vocabularies, specifically focusing on OntoLex-Lemon. As a preliminary exploration, we test four languages (English, Italian, Albanian, Romanian) and analyze the formalization quality of nine words with varying characteristics applying a multidimensional evaluation approach. While manual validation provided initial insights, it highlights the need for developing scalable evaluation methods for future large-scale experiments. This research aims to initiate a discussion on the potential and challenges of utilizing LLMs for knowledge formalization within the Semantic Web framework.</abstract>
       <url hash="0841a12e">2024.dlnld-1.6</url>
diff --git a/data/xml/2024.dmr.xml b/data/xml/2024.dmr.xml
index 8ab3bb5d63..20e3840d3f 100644
--- a/data/xml/2024.dmr.xml
+++ b/data/xml/2024.dmr.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2024-05-18" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Fifth International Workshop on Designing Meaning Representations @ LREC-COLING 2024</booktitle>
-      <editor><first>Claire</first><last>Bonial</last></editor>
+      <editor id="claire-bonial"><first>Claire</first><last>Bonial</last></editor>
       <editor><first>Julia</first><last>Bonn</last></editor>
       <editor><first>Jena D.</first><last>Hwang</last></editor>
       <publisher>ELRA and ICCL</publisher>
@@ -58,7 +58,7 @@
       <author><first>Adam</first><last>Pollins</last></author>
       <author><first>Rebekah</first><last>Tozier</last></author>
       <author><first>Olga</first><last>Babko-Malaya</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>30–38</pages>
       <abstract>Semantic role labeling (SRL) resources, such as Proposition Bank (PropBank), provide useful input to downstream applications. In this paper we present some challenges and insights we learned while expanding the previously developed Russian PropBank. This new effort involved annotation and adjudication of all predicates within a subset of the prior work in order to provide a test corpus for future applications. We discuss a number of new issues that arose while developing our PropBank for Russian as well as our solutions. Framing issues include: distinguishing between morphological processes that warrant new frames, differentiating between modal verbs and predicate verbs, and maintaining accurate representations of a given language’s semantics. Annotation issues include disagreements derived from variability in Universal Dependency parses and semantic ambiguity within the text. Finally, we demonstrate how Russian sentence structures reveal inherent limitations to PropBank’s ability to capture semantic data. These discussions should prove useful to anyone developing a PropBank or similar SRL resources for a new language.</abstract>
       <url hash="80246302">2024.dmr-1.4</url>
@@ -69,7 +69,7 @@
       <author><first>Leixin</first><last>Zhang</last></author>
       <author><first>David</first><last>Burian</last></author>
       <author><first>Vojtěch</first><last>John</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>39–47</pages>
       <abstract>This study evaluates the extent to which semantic information is preserved within sentence embeddings generated from state-of-art sentence embedding models: SBERT and LaBSE. Specifically, we analyzed 13 semantic attributes in sentence embeddings. Our findings indicate that some semantic features (such as tense-related classes) can be decoded from the representation of sentence embeddings. Additionally, we discover the limitation of the current sentence embedding models: inferring meaning beyond the lexical level has proven to be difficult.</abstract>
       <url hash="e45365fa">2024.dmr-1.5</url>
@@ -77,7 +77,7 @@
     </paper>
     <paper id="6">
       <title>A Quantum Theory of Terms and New Challenges to Meaning Representation of Quanterms</title>
-      <author><first>Diego</first><last>Burgos</last></author>
+      <author id="diego-a-burgos"><first>Diego</first><last>Burgos</last></author>
       <pages>48–53</pages>
       <abstract>This article discusses the challenges to meaning representation of terms posed by a quantum theory of terms (QTT) that was recently reported. We first summarize this theory and then highlight the difficulties of representing quanterms, which is the name we coined for the view that the QTT has of terms as quantum systems by analogy with quantum objects in quantum mechanics. We briefly summarize the representation practices followed to date to record and represent terminology. We use findings reported in the literature to model both terms and quanterms and found that current representations of terms in specialized repositories are collapsed quanterms at the expense of other states of the original quanterm. In this work, both quanterms and collapsed quanterms are mathematically modelled following formulations used in quantum mechanics. These formulations suggest that representations of quanterms need to include information about the probabilities of quanterm states and the role they play in the entanglement of terms for phenomena such as specialized collocations.</abstract>
       <url hash="acdc2edc">2024.dmr-1.6</url>
@@ -105,7 +105,7 @@
       <title>Argument Sharing in Meaning Representation Parsing</title>
       <author><first>Maja</first><last>Buljan</last></author>
       <author><first>Stephan</first><last>Oepen</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <pages>77–87</pages>
       <abstract>We present a contrastive study of argument sharing across three graph-based meaning representation frameworks, where semantically shared arguments manifest as reentrant graph nodes. For a state-of-the-art graph parser, we observe how parser performance – in terms of output quality – covaries with overall graph complexity, on the one hand, and presence of different types of reentrancies, on the other hand. We identify common linguistic phenomena that give rise to shared arguments, and therefore node reentrancies, through a small-case and partially automated annotation study and parallel error anaylsis of actual parser outputs. Our results provide new insights into the distribution of different types of reentrancies in meaning representation graphs for three distinct frameworks, as well as on the effects that these structures have on parser performance, thus suggesting both novel cross-framework generalisations as well as avenues for focussed parser development.</abstract>
       <url hash="467b837d">2024.dmr-1.9</url>
@@ -113,10 +113,10 @@
     </paper>
     <paper id="10">
       <title>Mapping <fixed-case>C</fixed-case>zech Verbal Valency to <fixed-case>P</fixed-case>rop<fixed-case>B</fixed-case>ank Argument Labels</title>
-      <author><first>Jan</first><last>Hajič</last></author>
-      <author><first>Eva</first><last>Fučíková</last></author>
-      <author><first>Markéta</first><last>Lopatková</last></author>
-      <author><first>Zdeňka</first><last>Urešová</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
+      <author id="eva-fucikova"><first>Eva</first><last>Fučíková</last></author>
+      <author id="marketa-lopatkova"><first>Markéta</first><last>Lopatková</last></author>
+      <author id="zdenka-uresova"><first>Zdeňka</first><last>Urešová</last></author>
       <pages>88–100</pages>
       <abstract>For many years, there has been attempts to compare predicate-argument labeling schemas between formalism, typically under the dependency assumptions (even if the annotation by these schemas could have been performed on either constituent-based specifications or dependency ones). Given the growing number of resources that link various lexical resources to one another, as well as thanks to parallel annotated corpora (with or without annotation), it is now possible to do more in-depth studies of those correspondences. We present here a high-coverage pilot study of mapping the labeling system used in PropBank (for English) to Czech, which has so far used mainly valency lexicons (in several closely related forms) for annotation projects, under a different level of specification and different theoretical assumptions. The purpose of this study is both theoretical (comparing the argument labeling schemes) and practical (to be able to annotate Czech under the standard UMR specifications).</abstract>
       <url hash="119b06ce">2024.dmr-1.10</url>
@@ -146,7 +146,7 @@
     </paper>
     <paper id="13">
       <title>Extending <fixed-case>V</fixed-case>erb<fixed-case>N</fixed-case>et’s Verb-Specific Features to Enhance Selectional Preferences of Semantic Roles</title>
-      <author><first>Susan Windisch</first><last>Brown</last></author>
+      <author id="susan-windisch-brown"><first>Susan Windisch</first><last>Brown</last></author>
       <pages>124–130</pages>
       <abstract>This work proposes expanding the thematic role selectional preferences used in the lexical resource VerbNet as a way to increase the available semantic information in the resource, induce semantically-based subclasses for the more generic VerbNet classes, and create new links across classes. The addition of verb-specific features in the latest version of VerbNet provides a means for adding more specific selectional preferences based on the meaning of a class’s individual member verbs. These features could refine both the instantiated class roles and the new implicit roles introduced in VerbNet version 4. We suggest 49 classes that would benefit from 111 verb-specific selectional preferences and explain how they would enhance VerbNet’s semantic representations.</abstract>
       <url hash="7dd87420">2024.dmr-1.13</url>
@@ -170,7 +170,7 @@
       <title>Accelerating <fixed-case>UMR</fixed-case> Adoption: Neuro-Symbolic Conversion from <fixed-case>AMR</fixed-case>-to-<fixed-case>UMR</fixed-case> with Low Supervision</title>
       <author><first>Claire Benet</first><last>Post</last></author>
       <author><first>Marie C.</first><last>McGregor</last></author>
-      <author><first>Maria Leonor</first><last>Pacheco</last></author>
+      <author id="maria-leonor-pacheco"><first>Maria Leonor</first><last>Pacheco</last></author>
       <author><first>Alexis</first><last>Palmer</last></author>
       <pages>140–150</pages>
       <abstract>Despite Uniform Meaning Representation’s (UMR) potential for cross-lingual semantics, limited annotated data has hindered its adoption. There are large datasets of English AMRs (Abstract Meaning Representations), but the process of converting AMR graphs to UMR graphs is non-trivial. In this paper we address a complex piece of that conversion process, namely cases where one AMR role can be mapped to multiple UMR roles through a non-deterministic process. We propose a neuro-symbolic method for role conversion, integrating animacy parsing and logic rules to guide a neural network, and minimizing human intervention. On test data, the model achieves promising accuracy, highlighting its potential to accelerate AMR-to-UMR conversion. Future work includes expanding animacy parsing, incorporating human feedback, and applying the method to broader aspects of conversion. This research demonstrates the benefits of combining symbolic and neural approaches for complex semantic tasks.</abstract>
diff --git a/data/xml/2024.dravidianlangtech.xml b/data/xml/2024.dravidianlangtech.xml
index 746b3a2e02..f06a7cfae3 100644
--- a/data/xml/2024.dravidianlangtech.xml
+++ b/data/xml/2024.dravidianlangtech.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the Fourth Workshop on Speech, Vision, and Language Technologies for Dravidian Languages</booktitle>
       <editor><first>Bharathi Raja</first><last>Chakravarthi</last></editor>
       <editor><first>Ruba</first><last>Priyadharshini</last></editor>
-      <editor><first>Anand Kumar</first><last>Madasamy</last></editor>
+      <editor id="anand-kumar-m"><first>Anand Kumar</first><last>Madasamy</last></editor>
       <editor><first>Sajeetha</first><last>Thavareesan</last></editor>
       <editor><first>Elizabeth</first><last>Sherly</last></editor>
       <editor><first>Rajeswari</first><last>Nadarajan</last></editor>
@@ -73,7 +73,7 @@
     <paper id="5">
       <title>Exploring the impact of noise in low-resource <fixed-case>ASR</fixed-case> for <fixed-case>T</fixed-case>amil</title>
       <author><first>Vigneshwar</first><last>Lakshminarayanan</last></author>
-      <author><first>Emily</first><last>Prud’hommeaux</last><affiliation>Boston College</affiliation></author>
+      <author id="emily-prudhommeaux"><first>Emily</first><last>Prud’hommeaux</last><affiliation>Boston College</affiliation></author>
       <pages>30-34</pages>
       <abstract>The use of deep learning algorithms has resulted in significant progress in automatic speech recognition (ASR). Robust high-accuracy ASR models typically require thousands or tens of thousands of hours of speech data, but even the strongest models tend fail under noisy conditions. Unsurprisingly, the impact of noise on accuracy is more drastic in low-resource settings. In this paper, we investigate the impact of noise on ASR in a low-resource setting. We explore novel methods for developing noise-robust ASR models using a a small dataset for Tamil, a widely-spoken but under-resourced Dravidian languages. We add various noises to the audio data to determine the impact of different kinds of noise (e.g., punctuated vs. constant, man-made vs natural) We also explore the relationship between different data augmentation methods are better suited to handling different types of noise. Our results show that all noises, regardless of the type, had an impact on ASR performance, and that upgrading the architecture alone could not mitigate the impact of noise. SpecAugment, the most common data augmentation method, was not as helpful as raw data augmentation, in which noise is explicitly added to the training data. Raw data augmentation enhances ASR performance on both clean data and noise-mixed data.</abstract>
       <url hash="e1593a27">2024.dravidianlangtech-1.5</url>
@@ -197,7 +197,7 @@
       <author><first>Muhammad</first><last>Arif</last></author>
       <author><first>M.</first><last>Ahmad</last></author>
       <author><first>E</first><last>Felipe-Riveron</last></author>
-      <author><first>Alexander</first><last>Gelbukh</last><affiliation>Instituto Politécnico Nacional</affiliation></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last><affiliation>Instituto Politécnico Nacional</affiliation></author>
       <pages>85-90</pages>
       <abstract>In the contemporary digital landscape, social media has emerged as a prominent means of communication and information dissemination, offering a rapid outreach to a broad audience compared to traditional communication methods. Unfortunately, the escalating prevalence of abusive language and hate speech on these platforms has become a pressing issue. Detecting and addressing such content on the Internet has garnered considerable attention due to the significant impact it has on individuals. The advent of deep learning has facilitated the use of pre-trained deep neural network models for text classification tasks. While these models demonstrate high performance, some exhibit a substantial number of parameters. In the DravidianLangTech@EACL 2024 task, we opted for the Distilbert-base-multilingual-cased model, an enhancement of the BERT model that effectively reduces the number of parameters without compromising performance. This model was selected based on its exceptional results in the task. Our system achieved a commendable Macro F1 score of 0.6369%.</abstract>
       <url hash="d3fc8c7a">2024.dravidianlangtech-1.13</url>
@@ -208,7 +208,7 @@
       <title>Selam@<fixed-case>D</fixed-case>ravidian<fixed-case>L</fixed-case>ang<fixed-case>T</fixed-case>ech 2024:Identifying Hate Speech and Offensive Language</title>
       <author><first>Selam</first><last>Abitte Kanta</last></author>
       <author><first>Grigori</first><last>Sidorov</last><affiliation>Instituto Politécnico Nacional</affiliation></author>
-      <author><first>Alexander</first><last>Gelbukh</last><affiliation>Instituto Politécnico Nacional</affiliation></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last><affiliation>Instituto Politécnico Nacional</affiliation></author>
       <pages>91-95</pages>
       <abstract>Social media has transformed into a powerful tool for sharing information while upholding the principle of free expression. However, this open platform has given rise to significant issues like hate speech, cyberbullying, aggression, and offensive language, negatively impacting societal well-being. These problems can even lead to severe consequences such as suicidal thoughts, affecting the mental health of the victims. Our primary goal is to develop an automated system for the rapid detection of offensive content on social media, facilitating timely interventions and moderation. This research employs various machine learning classifiers, utilizing character N-gram TF-IDF features. Additionally, we introduce SVM, RL, and Convolutional Neural Network (CNN) models specifically designed for hate speech detection. SVM utilizes character Ngram TF-IDF features, while CNN employs word embedding features. Through extensive experiments, we achieved optimal results, with a weighted F1-score of 0.77 in identifying hate speech and offensive language.</abstract>
       <url hash="b6eecb81">2024.dravidianlangtech-1.14</url>
@@ -231,7 +231,7 @@
       <author><first>Muhammad</first><last>Zamir</last></author>
       <author><first>Moein</first><last>Tash</last><affiliation>Instituto Politécnico Nacional</affiliation></author>
       <author><first>Zahra</first><last>Ahani</last><affiliation>Instituto Politécnico Nacional</affiliation></author>
-      <author><first>Alexander</first><last>Gelbukh</last><affiliation>Instituto Politécnico Nacional</affiliation></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last><affiliation>Instituto Politécnico Nacional</affiliation></author>
       <author><first>Grigori</first><last>Sidorov</last><affiliation>Instituto Politécnico Nacional</affiliation></author>
       <pages>101-106</pages>
       <abstract>Over the past few years, research on hate speech and offensive content identification on social media has been ongoing. Since most people in the world are not native English speakers, unapproved messages are typically sent in code-mixed language. We accomplished collaborative work to identify the language of code-mixed text on social media in order to address the difficulties associated with it in the Telugu language scenario. Specifically, we participated in the shared task on the provided dataset by the Dravidian- LangTech Organizer for the purpose of identifying hate and non-hate content. The assignment is to classify each sentence in the provided text into two predetermined groups: hate or non-hate. We developed a model in Python and selected a BERT multilingual to do the given task. Using a train-development data set, we developed a model, which we then tested on test data sets. An average macro F1 score metric was used to measure the model’s performance. For the task, the model reported an average macro F1 of 0.6151.</abstract>
@@ -304,7 +304,7 @@
     <paper id="22">
       <title><fixed-case>IIITDWD</fixed-case>-zk@<fixed-case>D</fixed-case>ravidian<fixed-case>L</fixed-case>ang<fixed-case>T</fixed-case>ech-2024: Leveraging the Power of Language Models for Hate Speech Detection in <fixed-case>T</fixed-case>elugu-<fixed-case>E</fixed-case>nglish Code-Mixed Text</title>
       <author><first>Zuhair Hasan</first><last>Shaik</last></author>
-      <author><first>Sai Kartheek</first><last>Reddy Kasu</last></author>
+      <author id="kasu-sai-kartheek-reddy"><first>Sai Kartheek</first><last>Reddy Kasu</last></author>
       <author><first>Sunil</first><last>Saumya</last></author>
       <author><first>Shankar</first><last>Biradar</last></author>
       <pages>134-139</pages>
@@ -352,7 +352,7 @@
       <author><first>Mesay</first><last>Yigezu</last><affiliation>Instituto Politécnico Nacional</affiliation></author>
       <author><first>Olga</first><last>Kolesnikova</last><affiliation>Instituto Politécnico Nacional</affiliation></author>
       <author><first>Grigori</first><last>Sidorov</last><affiliation>Instituto Politécnico Nacional</affiliation></author>
-      <author><first>Alexander</first><last>Gelbukh</last><affiliation>Instituto Politécnico Nacional</affiliation></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last><affiliation>Instituto Politécnico Nacional</affiliation></author>
       <pages>156-161</pages>
       <abstract>This research tackles the issue of fake news by utilizing the RNN-LSTM deep learning method with optimized hyperparameters identified through grid search. The model’s performance in multi-label classification is hindered by unbalanced data, despite its success in binary classification. We achieved a score of 0.82 in the binary classification task, whereas in the multi-class task, the score was 0.32. We suggest incorporating data balancing techniques for researchers who aim to further this task, aiming to improve results in managing a variety of information.</abstract>
       <url hash="c8e3d0ba">2024.dravidianlangtech-1.26</url>
diff --git a/data/xml/2024.eacl.xml b/data/xml/2024.eacl.xml
index 5b1d87b517..1cf49dc03f 100644
--- a/data/xml/2024.eacl.xml
+++ b/data/xml/2024.eacl.xml
@@ -23,7 +23,7 @@
       <author><first>Xin</first><last>Quan</last><affiliation>University of Manchester</affiliation></author>
       <author><first>Marco</first><last>Valentino</last></author>
       <author><first>Louise</first><last>Dennis</last><affiliation>University of Manchester, University of Manchester</affiliation></author>
-      <author><first>Andre</first><last>Freitas</last><affiliation>University of Manchester</affiliation></author>
+      <author id="andre-freitas"><first>Andre</first><last>Freitas</last><affiliation>University of Manchester</affiliation></author>
       <pages>1-22</pages>
       <abstract>An increasing amount of research in Natural Language Inference (NLI) focuses on the application and evaluation of Large Language Models (LLMs) and their reasoning capabilities. Despite their success, however, LLMs are still prone to factual errors and inconsistencies in their explanations, offering limited control and interpretability for inference in complex domains. In this paper, we focus on ethical NLI, investigating how hybrid neuro-symbolic techniques can enhance the logical validity and alignment of ethical explanations produced by LLMs. Specifically, we present an abductive-deductive framework named Logic-Explainer, which integrates LLMs with an external backward-chaining solver to refine step-wise natural language explanations and jointly verify their correctness, reduce incompleteness and minimise redundancy. An extensive empirical analysis demonstrates that Logic-Explainer can improve explanations generated via in-context learning methods and Chain-of-Thought (CoT) on challenging ethical NLI tasks, while, at the same time, producing formal proofs describing and supporting models’ reasoning. As ethical NLI requires commonsense reasoning to identify underlying moral violations, our results suggest the effectiveness of neuro-symbolic methods for multi-step NLI more broadly, opening new opportunities to enhance the logical consistency, reliability, and alignment of LLMs.</abstract>
       <url hash="066f368a">2024.eacl-long.1</url>
@@ -35,7 +35,7 @@
       <title>Multi-Relational Hyperbolic Word Embeddings from Natural Language Definitions</title>
       <author><first>Marco</first><last>Valentino</last></author>
       <author><first>Danilo</first><last>Carvalho</last><affiliation>University of Manchester</affiliation></author>
-      <author><first>Andre</first><last>Freitas</last><affiliation>University of Manchester</affiliation></author>
+      <author id="andre-freitas"><first>Andre</first><last>Freitas</last><affiliation>University of Manchester</affiliation></author>
       <pages>23-34</pages>
       <abstract>Natural language definitions possess a recursive, self-explanatory semantic structure that can support representation learning methods able to preserve explicit conceptual relations and constraints in the latent space. This paper presents a multi-relational model that explicitly leverages such a structure to derive word embeddings from definitions. By automatically extracting the relations linking defined and defining terms from dictionaries, we demonstrate how the problem of learning word embeddings can be formalised via a translational framework in Hyperbolic space and used as a proxy to capture the global semantic structure of definitions. An extensive empirical analysis demonstrates that the framework can help imposing the desired structural constraints while preserving the semantic mapping required for controllable and interpretable traversal. Moreover, the experiments reveal the superiority of the Hyperbolic word embeddings over the Euclidean counterparts and demonstrate that the multi-relational approach can obtain competitive results when compared to state-of-the-art neural models, with the advantage of being intrinsically more efficient and interpretable</abstract>
       <url hash="89559947">2024.eacl-long.2</url>
@@ -48,7 +48,7 @@
       <title>Anisotropy Is Inherent to Self-Attention in Transformers</title>
       <author><first>Nathan</first><last>Godey</last></author>
       <author><first>Éric</first><last>Clergerie</last></author>
-      <author><first>Benoît</first><last>Sagot</last><affiliation>INRIA</affiliation></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last><affiliation>INRIA</affiliation></author>
       <pages>35-48</pages>
       <abstract>The representation degeneration problem is a phenomenon that is widely observed among self-supervised learning methods based on Transformers. In NLP, it takes the form of anisotropy, a singular property of hidden representations which makes them unexpectedly close to each other in terms of angular distance (cosine-similarity). Some recent works tend to show that anisotropy is a consequence of optimizing the cross-entropy loss on long-tailed distributions of tokens. We show in this paper that anisotropy can also be observed empirically in language models with specific objectives that should not suffer directly from the same consequences. We also show that the anisotropy problem extends to Transformers trained on other modalities. Our observations tend to demonstrate that anisotropy might actually be inherent to Transformers-based models.</abstract>
       <url hash="6b88a441">2024.eacl-long.3</url>
@@ -132,7 +132,7 @@
       <title>Parameter-Efficient Conversational Recommender System as a Language Processing Task</title>
       <author><first>Mathieu</first><last>Ravaut</last></author>
       <author><first>Hao</first><last>Zhang</last></author>
-      <author id="lu-xu"><first>Lu</first><last>Xu</last></author>
+      <author><first>Lu</first><last>Xu</last></author>
       <author><first>Aixin</first><last>Sun</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Yong</first><last>Liu</last><affiliation>Huawei Technologies Ltd.</affiliation></author>
       <pages>152-165</pages>
@@ -163,7 +163,7 @@
       <author><first>Andrew</first><last>Lee</last><affiliation>University of Michigan</affiliation></author>
       <author><first>Jonathan K.</first><last>Kummerfeld</last><affiliation>University of Sydney</affiliation></author>
       <author><first>Larry</first><last>Ann</last><affiliation>University of Michigan</affiliation></author>
-      <author><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
       <pages>179-189</pages>
       <abstract>Recently, empathetic dialogue systems have received significant attention.While some researchers have noted limitations, e.g., that these systems tend to generate generic utterances, no study has systematically verified these issues. We survey 21 systems, asking what progress has been made on the task. We observe multiple limitations of current evaluation procedures. Most critically, studies tend to rely on a single non-reproducible empathy score, which inadequately reflects the multidimensional nature of empathy. To better understand the differences between systems, we comprehensively analyze each system with automated methods that are grounded in a variety of aspects of empathy. We find that recent systems lack three important aspects of empathy: specificity, reflection levels, and diversity. Based on our results, we discuss problematic behaviors that may have gone undetected in prior evaluations, and offer guidance for developing future systems.</abstract>
       <url hash="df025edc">2024.eacl-long.11</url>
@@ -175,7 +175,7 @@
       <title>Few-Shot Data Synthesis for Open Domain Multi-Hop Question Answering</title>
       <author><first>Mingda</first><last>Chen</last><affiliation>Meta AI</affiliation></author>
       <author><first>Xilun</first><last>Chen</last><affiliation>Meta AI</affiliation></author>
-      <author><first>Wen-tau</first><last>Yih</last><affiliation>Meta Platforms, Inc.</affiliation></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last><affiliation>Meta Platforms, Inc.</affiliation></author>
       <pages>190-208</pages>
       <abstract>Few-shot learning for open domain multi-hop question answering typically relies on the in-context learning capability of large language models (LLMs). While powerful, these LLMs usually contain tens or hundreds of billions of parameters, making them rather inefficient at inference time. To improve performance of smaller language models, we propose a data synthesis framework for multi-hop question answering that requires less than 10 human-annotated question answer pairs. Our framework depends only on rich, naturally-occurring relationships among documents and is built upon the data generation functions parameterized by LLMs and prompts. We synthesize millions of multi-hop questions and claims to finetune language models, evaluated on popular benchmarks for multi-hop question answering and fact verification. Empirically, our approach improves model performance significantly, allowing the finetuned models to be competitive with GPT-3.5 based approaches while being almost one-third the size in parameter count.</abstract>
       <url hash="e98ef159">2024.eacl-long.12</url>
@@ -203,11 +203,11 @@
     </paper>
     <paper id="14">
       <title><fixed-case>SIB</fixed-case>-200: A Simple, Inclusive, and Big Evaluation Dataset for Topic Classification in 200+ Languages and Dialects</title>
-      <author><first>David Ifeoluwa</first><last>Adelani</last></author>
+      <author id="david-ifeoluwa-adelani"><first>David Ifeoluwa</first><last>Adelani</last></author>
       <author><first>Hannah</first><last>Liu</last><affiliation>University of Toronto</affiliation></author>
       <author><first>Xiaoyu</first><last>Shen</last><affiliation>Amazon</affiliation></author>
       <author><first>Nikita</first><last>Vassilyev</last></author>
-      <author><first>Jesujoba O.</first><last>Alabi</last><affiliation>Universität des Saarlandes</affiliation></author>
+      <author id="jesujoba-alabi"><first>Jesujoba O.</first><last>Alabi</last><affiliation>Universität des Saarlandes</affiliation></author>
       <author><first>Yanke</first><last>Mao</last><affiliation>University of Toronto</affiliation></author>
       <author><first>Haonan</first><last>Gao</last></author>
       <author><first>En-Shiun Annie</first><last>Lee</last></author>
@@ -238,7 +238,7 @@
     <paper id="16">
       <title>Asking the Right Question at the Right Time: Human and Model Uncertainty Guidance to Ask Clarification Questions</title>
       <author><first>Alberto</first><last>Testoni</last><affiliation>University of Amsterdam</affiliation></author>
-      <author><first>Raquel</first><last>Fernández</last><affiliation>University of Amsterdam and University of Amsterdam</affiliation></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last><affiliation>University of Amsterdam and University of Amsterdam</affiliation></author>
       <pages>258-275</pages>
       <abstract>Clarification questions are an essential dialogue tool to signal misunderstanding, ambiguities, and under-specification in language use. While humans are able to resolve uncertainty by asking questions since childhood, modern dialogue systems struggle to generate effective questions. To make progress in this direction, in this work we take a collaborative dialogue task as a testbed and study how model uncertainty relates to human uncertainty—an as yet under-explored problem. We show that model uncertainty does not mirror human clarification-seeking behavior, which suggests that using human clarification questions as supervision for deciding when to ask may not be the most effective way to resolve model uncertainty. To address this issue, we propose an approach to generating clarification questions based on model uncertainty estimation, compare it to several alternatives, and show that it leads to significant improvements in terms of task success. Our findings highlight the importance of equipping dialogue systems with the ability to assess their own uncertainty and exploit in interaction.</abstract>
       <url hash="d8992014">2024.eacl-long.16</url>
@@ -263,9 +263,9 @@
       <title>Zero-shot Sentiment Analysis in Low-Resource Languages Using a Multilingual Sentiment Lexicon</title>
       <author><first>Fajri</first><last>Koto</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <author><first>Tilman</first><last>Beck</last></author>
-      <author><first>Zeerak</first><last>Talat</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="zeerak-talat"><first>Zeerak</first><last>Talat</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <author><first>Iryna</first><last>Gurevych</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and Technical University of Darmstadt</affiliation></author>
-      <author><first>Timothy</first><last>Baldwin</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and The University of Melbourne</affiliation></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and The University of Melbourne</affiliation></author>
       <pages>298-320</pages>
       <abstract>Improving multilingual language models capabilities in low-resource languages is generally difficult due to the scarcity of large-scale data in those languages. In this paper, we relax the reliance on texts in low-resource languages by using multilingual lexicons in pretraining to enhance multilingual capabilities. Specifically, we focus on zero-shot sentiment analysis tasks across 34 languages, including 6 high/medium-resource languages, 25 low-resource languages, and 3 code-switching datasets. We demonstrate that pretraining using multilingual lexicons, without using any sentence-level sentiment data, achieves superior zero-shot performance compared to models fine-tuned on English sentiment datasets, and large language models like GPT–3.5, BLOOMZ, and XGLM. These findings are observable for unseen low-resource languages to code-mixed scenarios involving high-resource languages.</abstract>
       <url hash="c55df9c7">2024.eacl-long.18</url>
@@ -355,7 +355,7 @@
     <paper id="25">
       <title>From Text Segmentation to Smart Chaptering: A Novel Benchmark for Structuring Video Transcriptions</title>
       <author><first>Fabian</first><last>Retkowski</last><affiliation>Karlsruher Institut für Technologie</affiliation></author>
-      <author><first>Alexander</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alexander</first><last>Waibel</last></author>
       <pages>406-419</pages>
       <abstract>Text segmentation is a fundamental task in natural language processing, where documents are split into contiguous sections. However, prior research in this area has been constrained by limited datasets, which are either small in scale, synthesized, or only contain well-structured documents. In this paper, we address these limitations by introducing a novel benchmark YTSeg focusing on spoken content that is inherently more unstructured and both topically and structurally diverse. As part of this work, we introduce an efficient hierarchical segmentation model MiniSeg, that outperforms state-of-the-art baselines. Lastly, we expand the notion of text segmentation to a more practical “smart chaptering” task that involves the segmentation of unstructured content, the generation of meaningful segment titles, and a potential real-time application of the models.</abstract>
       <url hash="048cd823">2024.eacl-long.25</url>
@@ -391,7 +391,7 @@
       <title>Exploring the Robustness of Task-oriented Dialogue Systems for Colloquial <fixed-case>G</fixed-case>erman Varieties</title>
       <author><first>Ekaterina</first><last>Artemova</last><affiliation>Toloka AI</affiliation></author>
       <author><first>Verena</first><last>Blaschke</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
-      <author><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München and IT University of Copenhagen</affiliation></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München and IT University of Copenhagen</affiliation></author>
       <pages>445-468</pages>
       <abstract>Mainstream cross-lingual task-oriented dialogue (ToD) systems leverage the transfer learning paradigm by training a joint model for intent recognition and slot-filling in English and applying it, zero-shot, to other languages. We address a gap in prior research, which often overlooked the transfer to lower-resource colloquial varieties due to limited test data. Inspired by prior work on English varieties, we craft and manually evaluate perturbation rules that transform German sentences into colloquial forms and use them to synthesize test sets in four ToD datasets. Our perturbation rules cover 18 distinct language phenomena, enabling us to explore the impact of each perturbation on slot and intent performance. Using these new datasets, we conduct an experimental evaluation across six different transformers. Here, we demonstrate that when applied to colloquial varieties, ToD systems maintain their intent recognition performance, losing 6% (4.62 percentage points) in accuracy on average. However, they exhibit a significant drop in slot detection, with a decrease of 31% (21 percentage points) in slot F<tex-math>_1</tex-math> score. Our findings are further supported by a transfer experiment from Standard American English to synthetic Urban African American Vernacular English.</abstract>
       <url hash="51ed9c7f">2024.eacl-long.28</url>
@@ -403,7 +403,7 @@
     <paper id="29">
       <title><fixed-case>PEARL</fixed-case>: Prompting Large Language Models to Plan and Execute Actions Over Long Documents</title>
       <author><first>Simeng</first><last>Sun</last><affiliation>University of Massachusetts, Amherst</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <author><first>Shuohang</first><last>Wang</last></author>
       <author><first>Dan</first><last>Iter</last></author>
       <author><first>Chenguang</first><last>Zhu</last><affiliation>Zoom</affiliation></author>
@@ -493,7 +493,7 @@
       <author><first>Mike</first><last>Zhang</last><affiliation>IT University of Copenhagen</affiliation></author>
       <author><first>Rob</first><last>van der Goot</last></author>
       <author><first>Min-Yen</first><last>Kan</last><affiliation>National University of Singapore</affiliation></author>
-      <author><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München and IT University of Copenhagen</affiliation></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München and IT University of Copenhagen</affiliation></author>
       <pages>589-608</pages>
       <abstract>The labor market is changing rapidly, prompting increased interest in the automatic extraction of occupational skills from text. With the advent of English benchmark job description datasets, there is a need for systems that handle their diversity well. We tackle the complexity in occupational skill datasets tasks—combining and leveraging multiple datasets for skill extraction, to identify rarely observed skills within a dataset, and overcoming the scarcity of skills across datasets. In particular, we investigate the retrieval-augmentation of language models, employing an external datastore for retrieving similar skills in a dataset-unifying manner. Our proposed method, <b>N</b>earest <b>N</b>eighbor <b>O</b>ccupational <b>S</b>kill <b>E</b>xtraction (NNOSE) effectively leverages multiple datasets by retrieving neighboring skills from other datasets in the datastore. This improves skill extraction <i>without</i> additional fine-tuning. Crucially, we observe a performance gain in predicting infrequent patterns, with substantial gains of up to 30% span-F1 in cross-dataset settings.</abstract>
       <url hash="fa763f2b">2024.eacl-long.35</url>
@@ -555,7 +555,7 @@
       <title>Quantifying the Hyperparameter Sensitivity of Neural Networks for Character-level Sequence-to-Sequence Tasks</title>
       <author><first>Adam</first><last>Wiemerslage</last><affiliation>University of Colorado, Boulder</affiliation></author>
       <author><first>Kyle</first><last>Gorman</last><affiliation>The Graduate Center, City University of New York and Google</affiliation></author>
-      <author><first>Katharina</first><last>von der Wense</last><affiliation>Johannes-Gutenberg Universität Mainz, University of Colorado, Boulder and New York University</affiliation></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>von der Wense</last><affiliation>Johannes-Gutenberg Universität Mainz, University of Colorado, Boulder and New York University</affiliation></author>
       <pages>674-689</pages>
       <abstract>Hyperparameter tuning, the process of searching for suitable hyperparameters, becomes more difficult as the computing resources required to train neural networks continue to grow. This topic continues to receive little attention and discussion—much of it hearsay—despite its obvious importance. We attempt to formalize hyperparameter sensitivity using two metrics: similarity-based sensitivity and performance-based sensitivity. We then use these metrics to quantify two such claims: (1) transformers are more sensitive to hyperparameter choices than LSTMs and (2) transformers are particularly sensitive to batch size. We conduct experiments on two different character-level sequence-to-sequence tasks and find that, indeed, the transformer is slightly more sensitive to hyperparameters according to both of our metrics. However, we do not find that it is more sensitive to batch size in particular.</abstract>
       <url hash="1f7882b9">2024.eacl-long.40</url>
@@ -565,7 +565,7 @@
     </paper>
     <paper id="41">
       <title>Examining Gender and Racial Bias in Large Vision–Language Models Using a Novel Dataset of Parallel Images</title>
-      <author><first>Kathleen</first><last>Fraser</last><affiliation>National Research Council Canada</affiliation></author>
+      <author id="kathleen-c-fraser"><first>Kathleen</first><last>Fraser</last><affiliation>National Research Council Canada</affiliation></author>
       <author><first>Svetlana</first><last>Kiritchenko</last><affiliation>National Research Council Canada</affiliation></author>
       <pages>690-713</pages>
       <abstract>Following on recent advances in large language models (LLMs) and subsequent chat models, a new wave of large vision–language models (LVLMs) has emerged. Such models can incorporate images as input in addition to text, and perform tasks such as visual question answering, image captioning, story generation, etc. Here, we examine potential gender and racial biases in such systems, based on the perceived characteristics of the people in the input images. To accomplish this, we present a new dataset PAIRS (PArallel Images for eveRyday Scenarios). The PAIRS dataset contains sets of AI-generated images of people, such that the images are highly similar in terms of background and visual content, but differ along the dimensions of gender (man, woman) and race (Black, white). By querying the LVLMs with such images, we observe significant differences in the responses according to the perceived gender or race of the person depicted.</abstract>
@@ -608,7 +608,7 @@
       <author><first>Yufei</first><last>Wang</last></author>
       <author><first>George</first><last>Foster</last><affiliation>Google</affiliation></author>
       <author><first>Lizhen</first><last>Qu</last><affiliation>Monash University</affiliation></author>
-      <author><first>Gholamreza</first><last>Haffari</last><affiliation>Monash University</affiliation></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last><affiliation>Monash University</affiliation></author>
       <pages>740-752</pages>
       <abstract>Document-level neural machine translation (DocNMT) aims to generate translations that are both coherent and cohesive, in contrast to its sentence-level counterpart. However, due to its longer input length and limited availability of training data, DocNMT often faces the challenge of data sparsity. To overcome this issue, we propose a novel Importance-Aware Data Augmentation (IADA) algorithm for DocNMT that augments the training data based on token importance information estimated by the norm of hidden states and training gradients. We conduct comprehensive experiments on three widely-used DocNMT benchmarks. Our empirical results show that our proposed IADA outperforms strong DocNMT baselines as well as several data augmentation approaches, with statistical significance on both sentence-level and document-level BLEU.</abstract>
       <url hash="9ae09aa2">2024.eacl-long.44</url>
@@ -631,8 +631,8 @@
       <title>Comparing Template-based and Template-free Language Model Probing</title>
       <author><first>Sagi</first><last>Shaier</last></author>
       <author><first>Kevin</first><last>Bennett</last><affiliation>Memorial Healthcare System</affiliation></author>
-      <author><first>Lawrence</first><last>Hunter</last><affiliation>University of Colorado at Denver</affiliation></author>
-      <author><first>Katharina</first><last>von der Wense</last><affiliation>Johannes-Gutenberg Universität Mainz, University of Colorado, Boulder and New York University</affiliation></author>
+      <author id="lawrence-hunter"><first>Lawrence</first><last>Hunter</last><affiliation>University of Colorado at Denver</affiliation></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>von der Wense</last><affiliation>Johannes-Gutenberg Universität Mainz, University of Colorado, Boulder and New York University</affiliation></author>
       <pages>766-776</pages>
       <abstract>The differences between cloze-task language model (LM) probing with 1) expert-made templates and 2) naturally-occurring text have often been overlooked. Here, we evaluate 16 different LMs on 10 probing English datasets – 4 template-based and 6 template-free – in general and biomedical domains to answer the following research questions: (RQ1) Do model rankings differ between the two approaches? (RQ2) Do models’ absolute scores differ between the two approaches? (RQ3) Do the answers to RQ1 and RQ2 differ between general and domain-specific models? Our findings are: 1) Template-free and template-based approaches often rank models differently, except for the top domain- specific models. 2) Scores decrease by up to 42% Acc@1 when comparing parallel template-free and template-based prompts. 3) Perplexity is negatively correlated with accuracy in the template-free approach, but, counter-intuitively, they are positively correlated for template-based probing. 4) Models tend to predict the same answers frequently across prompts for template-based probing, which is less common when employing template-free techniques.</abstract>
       <url hash="1e17a564">2024.eacl-long.46</url>
@@ -642,8 +642,8 @@
     <paper id="47">
       <title>Desiderata For The Context Use Of Question Answering Systems</title>
       <author><first>Sagi</first><last>Shaier</last></author>
-      <author><first>Lawrence</first><last>Hunter</last><affiliation>University of Colorado at Denver</affiliation></author>
-      <author><first>Katharina</first><last>von der Wense</last><affiliation>Johannes-Gutenberg Universität Mainz, University of Colorado, Boulder and New York University</affiliation></author>
+      <author id="lawrence-hunter"><first>Lawrence</first><last>Hunter</last><affiliation>University of Colorado at Denver</affiliation></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>von der Wense</last><affiliation>Johannes-Gutenberg Universität Mainz, University of Colorado, Boulder and New York University</affiliation></author>
       <pages>777-792</pages>
       <abstract>Prior work has uncovered a set of common problems in state-of-the-art context-based question answering (QA) systems: a lack of attention to the context when the latter conflicts with a model’s parametric knowledge, little robustness to noise, and a lack of consistency with their answers. However, most prior work focus on one or two of those problems in isolation, which makes it difficult to see trends across them. We aim to close this gap, by first outlining a set of – previously discussed as well as novel – desiderata for QA models. We then survey relevant analysis and methods papers to provide an overview of the state of the field. The second part of our work presents experiments where we evaluate 15 QA systems on 5 datasets according to all desiderata at once. We find many novel trends, including (1) systems that are less susceptible to noise are not necessarily more consistent with their answers when given irrelevant context; (2) most systems that are more susceptible to noise are more likely to correctly answer according to a context that conflicts with their parametric knowledge; and (3) the combination of conflicting knowledge and noise can reduce system performance by up to 96%. As such, our desiderata help increase our understanding of how these models work and reveal potential avenues for improvements.</abstract>
       <url hash="cc1dac2e">2024.eacl-long.47</url>
@@ -668,7 +668,7 @@
       <author><first>Myra</first><last>Cheng</last><affiliation>Stanford University</affiliation></author>
       <author><first>Kristina</first><last>Gligoric</last><affiliation>Stanford University</affiliation></author>
       <author><first>Tiziano</first><last>Piccardi</last><affiliation>Stanford University</affiliation></author>
-      <author><first>Dan</first><last>Jurafsky</last><affiliation>Stanford University</affiliation></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last><affiliation>Stanford University</affiliation></author>
       <pages>807-825</pages>
       <abstract>Anthropomorphism, or the attribution of human-like characteristics to non-human entities, has shaped conversations about the impacts and possibilities of technology. We present AnthroScore, an automatic metric of implicit anthropomorphism in language. We use a masked language model to quantify how non-human entities are implicitly framed as human by the surrounding context. We show that AnthroScore corresponds with human judgments of anthropomorphism and dimensions of anthropomorphism described in social science literature. Motivated by concerns of misleading anthropomorphism in computer science discourse, we use AnthroScore to analyze 15 years of research papers and downstream news articles. In research papers, we find that anthropomorphism has steadily increased over time, and that papers related to language models have the most anthropomorphism. Within ACL papers, temporal increases in anthropomorphism are correlated with key neural advancements. Building upon concerns of scientific misinformation in mass media, we identify higher levels of anthropomorphism in news headlines compared to the research papers they cite. Since AnthroScore is lexicon-free, it can be directly applied to a wide range of text sources.</abstract>
       <url hash="400f26de">2024.eacl-long.49</url>
@@ -741,7 +741,7 @@
       <author><first>Niklas</first><last>Stoehr</last></author>
       <author><first>Pengxiang</first><last>Cheng</last><affiliation>Bloomberg</affiliation></author>
       <author><first>Jing</first><last>Wang</last><affiliation>Bloomberg</affiliation></author>
-      <author><first>Daniel</first><last>Preotiuc-Pietro</last><affiliation>Bloomberg</affiliation></author>
+      <author id="daniel-preotiuc-pietro"><first>Daniel</first><last>Preotiuc-Pietro</last><affiliation>Bloomberg</affiliation></author>
       <author><first>Rajarshi</first><last>Bhowmik</last><affiliation>Bloomberg L.P.</affiliation></author>
       <pages>900-914</pages>
       <abstract>Language models contain ranking-based knowledge and are powerful solvers of in-context ranking tasks. For instance, they may have parametric knowledge about the ordering of countries by size or may be able to rank product reviews by sentiment. We compare pairwise, pointwise and listwise prompting techniques to elicit a language model’s ranking knowledge. However, we find that even with careful calibration and constrained decoding, prompting-based techniques may not always be self-consistent in the rankings they produce. This motivates us to explore an alternative approach that is inspired by an unsupervised probing method called Contrast-Consistent Search (CCS). The idea is to train a probe guided by a logical constraint: a language model’s representation of a statement and its negation must be mapped to contrastive true-false poles consistently across multiple statements. We hypothesize that similar constraints apply to ranking tasks where all items are related via consistent, pairwise or listwise comparisons. To this end, we extend the binary CCS method to Contrast-Consistent Ranking (CCR) by adapting existing ranking methods such as the Max-Margin Loss, Triplet Loss and an Ordinal Regression objective. Across different models and datasets, our results confirm that CCR probing performs better or, at least, on a par with prompting.</abstract>
@@ -775,7 +775,7 @@
       <author><first>Raghav</first><last>Jain</last><affiliation>Indian Institute of Technology, Patna.</affiliation></author>
       <author><first>Apoorv</first><last>Verma</last></author>
       <author><first>Sriparna</first><last>Saha</last><affiliation>Indian Institute of Technology Patna, India</affiliation></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
       <pages>930-943</pages>
       <abstract>Internet memes have gained significant influence in communicating political, psychological, and sociocultural ideas. While meme are often humorous, there has been a rise in the use of memes for trolling and cyberbullying. Although a wide variety of effective deep learning-based models have been developed for detecting offensive multimodal memes, only a few works have been done on explainability aspect. Recent laws like “right to explanations” of General Data Protection Regulation, have spurred research in developing interpretable models rather than only focusing on performance. Motivated by this, we introduce MultiBully-Ex, the first benchmark dataset for multimodal explanation from code-mixed cyberbullying memes. Here, both visual and textual modalities are highlighted to explain why a given meme is cyberbullying. A Contrastive Language-Image Pretraining (CLIP) projection based multimodal shared-private multitask approach has been proposed for visual and textual explanation of a meme. Experimental results demonstrate that training with multimodal explanations improves performance in generating textual justifications and more accurately identifying the visual evidence supporting a decision with reliable performance improvements.</abstract>
       <url hash="e4b37150">2024.eacl-long.56</url>
@@ -825,7 +825,7 @@
     <paper id="60">
       <title>Moderation in the Wild: Investigating User-Driven Moderation in Online Discussions</title>
       <author><first>Neele</first><last>Falk</last><affiliation>University of Stuttgart, University of Stuttgart</affiliation></author>
-      <author><first>Eva</first><last>Vecchi</last></author>
+      <author id="eva-maria-vecchi"><first>Eva</first><last>Vecchi</last></author>
       <author><first>Iman</first><last>Jundi</last><affiliation>University of Stuttgart, Universität Stuttgart</affiliation></author>
       <author><first>Gabriella</first><last>Lapesa</last><affiliation>University of Stuttgart</affiliation></author>
       <pages>992-1013</pages>
@@ -868,7 +868,7 @@
       <title>Bias in Opinion Summarisation from Pre-training to Adaptation: A Case Study in Political Bias</title>
       <author><first>Nannan</first><last>Huang</last></author>
       <author><first>Haytham</first><last>Fayek</last><affiliation>Royal Melbourne Institute of Technology and Facebook</affiliation></author>
-      <author><first>Xiuzhen</first><last>Zhang</last><affiliation>Royal Melbourne Institute of Technology</affiliation></author>
+      <author id="xiuzhen-jenny-zhang"><first>Xiuzhen</first><last>Zhang</last><affiliation>Royal Melbourne Institute of Technology</affiliation></author>
       <pages>1041-1055</pages>
       <abstract>Opinion summarisation aims to summarise the salient information and opinions presented in documents such as product reviews, discussion forums, and social media texts into short summaries that enable users to effectively understand the opinions therein.Generating biased summaries has the risk of potentially swaying public opinion. Previous studies focused on studying bias in opinion summarisation using extractive models, but limited research has paid attention to abstractive summarisation models. In this study, using political bias as a case study, we first establish a methodology to quantify bias in abstractive models, then trace it from the pre-trained models to the task of summarising social media opinions using different models and adaptation methods. We find that most models exhibit intrinsic bias. Using a social media text summarisation dataset and contrasting various adaptation methods, we find that tuning a smaller number of parameters is less biased compared to standard fine-tuning; however, the diversity of topics in training data used for fine-tuning is critical.</abstract>
       <url hash="eca14f3f">2024.eacl-long.63</url>
@@ -953,7 +953,7 @@
       <author><first>Song</first><last>Duong</last></author>
       <author><first>Mathieu</first><last>Ravaut</last></author>
       <author><first>Alexandre</first><last>Allauzen</last><affiliation>Ecole supérieure de physique et chimie and Univeristé Paris-Dauphine</affiliation></author>
-      <author><first>Nancy</first><last>Chen</last></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last></author>
       <author><first>Vincent</first><last>Guigue</last><affiliation>AgroParisTech</affiliation></author>
       <author><first>Alberto</first><last>Lumbreras</last><affiliation>Criteo</affiliation></author>
       <author><first>Laure</first><last>Soulier</last><affiliation>Université Pierre et Marie Curie - Paris 6, Sorbonne Université - Faculté des Sciences (Paris VI)</affiliation></author>
@@ -1017,7 +1017,7 @@
     </paper>
     <paper id="74">
       <title>Quantifying Stereotypes in Language</title>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <pages>1223-1240</pages>
       <abstract>A stereotype is a generalized perception of a specific group of humans. It is often potentially encoded in human language, which is more common in texts on social issues. Previous works simply define a sentence as stereotypical and anti-stereotypical. However, the stereotype of a sentence may require fine-grained quantification. In this paper, to fill this gap, we quantify stereotypes in language by annotating a dataset. We use the pre-trained language models (PLMs) to learn this dataset to predict stereotypes of sentences. Then, we discuss stereotypes about common social issues such as hate speech, sexism, sentiments, and disadvantaged and advantaged groups. We demonstrate the connections and differences between stereotypes and common social issues, and all four studies validate the general findings of the current studies. In addition, our work suggests that fine-grained stereotype scores are a highly relevant and competitive dimension for research on social issues. The models and datasets used in this paper are available at https://anonymous.4open.science/r/quantifying_stereotypes_in_language.</abstract>
       <url hash="ef21cc92">2024.eacl-long.74</url>
@@ -1054,7 +1054,7 @@
     </paper>
     <paper id="77">
       <title>Plan-Grounded Large Language Models for Dual Goal Conversational Settings</title>
-      <author><first>Diogo</first><last>Glória-Silva</last><affiliation>Universidade NOVA de Lisboa</affiliation></author>
+      <author id="diogo-gloria-silva"><first>Diogo</first><last>Glória-Silva</last><affiliation>Universidade NOVA de Lisboa</affiliation></author>
       <author><first>Rafael</first><last>Ferreira</last><affiliation>Universidade NOVA de Lisboa</affiliation></author>
       <author><first>Diogo</first><last>Tavares</last><affiliation>Universidade NOVA de Lisboa</affiliation></author>
       <author><first>David</first><last>Semedo</last><affiliation>Universidade NOVA de Lisboa and Universidade NOVA de Lisboa</affiliation></author>
@@ -1130,7 +1130,7 @@
       <author><first>Zeyu</first><last>Wang</last><affiliation>Yale University</affiliation></author>
       <author><first>Mu</first><last>Gao</last><affiliation>Georgia Institute of Technology</affiliation></author>
       <author><first>Jeffrey</first><last>Skolnick</last></author>
-      <author><first>Chao</first><last>Zhang</last><affiliation>Georgia Institute of Technology</affiliation></author>
+      <author id="chao-zhang-tu"><first>Chao</first><last>Zhang</last><affiliation>Georgia Institute of Technology</affiliation></author>
       <pages>1354-1368</pages>
       <abstract>Hierarchical text classification (HTC) is a complex subtask under multi-label text classification, characterized by a hierarchical label taxonomy and data imbalance. The best-performing models aim to learn a static representation by combining document and hierarchical label information. However, the relevance of document sections can vary based on the hierarchy level, necessitating a dynamic document representation. To address this, we propose HiGen, a text-generation-based framework utilizing language models to encode dynamic text representations. We introduce a level-guided loss function to capture the relationship between text and label name semantics. Our approach incorporates a task-specific pretraining strategy, adapting the language model to in-domain knowledge and significantly enhancing performance for classes with limited examples. Furthermore, we present a new and valuable dataset called ENZYME, designed for HTC, which comprises articles from PubMed with the goal of predicting Enzyme Commission (EC) numbers. Through extensive experiments on the ENZYME dataset and the widely recognized WOS and NYT datasets, our methodology demonstrates superior performance, surpassing existing approaches while efficiently handling data and mitigating class imbalance. We release our code and dataset here: https://github.com/viditjain99/HiGen.</abstract>
       <url hash="f8e14848">2024.eacl-long.82</url>
@@ -1155,7 +1155,7 @@
       <author><first>Alham Fikri</first><last>Aji</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and Amazon</affiliation></author>
       <author><first>Nizar</first><last>Habash</last><affiliation>New York University Abu Dhabi</affiliation></author>
       <author><first>Iryna</first><last>Gurevych</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and Technical University of Darmstadt</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>1369-1407</pages>
       <abstract>Large language models (LLMs) have demonstrated remarkable capability to generate fluent responses to a wide variety of user queries. However, this has also raised concerns about the potential misuse of such texts in journalism, education, and academia. In this study, we strive to create automated systems that can detect machine-generated texts and pinpoint potential misuse. We first introduce a large-scale benchmark M4, which is a multi-generator, multi-domain, and multi-lingual corpus for machine-generated text detection. Through an extensive empirical study of this dataset, we show that it is challenging for detectors to generalize well on instances from unseen domains or LLMs. In such cases, detectors tend to misclassify machine-generated text as human-written. These results show that the problem is far from solved and that there is a lot of room for improvement. We believe that our dataset will enable future research towards more robust approaches to this pressing societal problem. The dataset is available at https://github.com/mbzuai-nlp/M4</abstract>
       <url hash="3ba47fcc">2024.eacl-long.83</url>
@@ -1192,7 +1192,7 @@
       <title>Diffusion-<fixed-case>NAT</fixed-case>: Self-Prompting Discrete Diffusion for Non-Autoregressive Text Generation</title>
       <author><first>Kun</first><last>Zhou</last><affiliation>Renmin University of China</affiliation></author>
       <author><first>Yifan</first><last>Li</last></author>
-      <author><first>Xin</first><last>Zhao</last><affiliation>Renmin University of China</affiliation></author>
+      <author id="wayne-xin-zhao"><first>Xin</first><last>Zhao</last><affiliation>Renmin University of China</affiliation></author>
       <author><first>Ji-Rong</first><last>Wen</last><affiliation>Renmin University of China</affiliation></author>
       <pages>1438-1451</pages>
       <abstract>Recently, continuous diffusion models (CDM) have been introduced into non-autoregressive (NAR) text-to-text generation. However, the discrete nature of text increases the difficulty of CDM to generate coherent and fluent texts, and also causes the incompatibility problem between CDM and advanced NLP techniques, especially the popular pre-trained language models (PLMs).To solve it, we propose Diffusion-NAT, which introduces discrete diffusion models (DDM) into NAR text-to-text generation and integrates BART to improve the performance.By revising the decoding process of BART and the typical settings of DDM, we unify the inference process of BART and the denoising process of DDM into the same NAR masked tokens recovering task.In this way, DDM can rely on BART to perform denoising, which can benefit from both the rich pre-learned knowledge of BART and the iterative refining paradigm of DDM.Besides, we also propose the iterative self-prompting strategy to further improve the generation quality.Experimental results on 7 datasets show that our approach can outperform competitive NAR methods, and even surpass autoregressive methods.Our code and data are released at <url>https://github.com/RUCAIBox/DiffusionNAT</url>.</abstract>
@@ -1205,7 +1205,7 @@
       <title>Unleashing the Power of Discourse-Enhanced Transformers for Propaganda Detection</title>
       <author><first>Alexander</first><last>Chernyavskiy</last></author>
       <author><first>Dmitry</first><last>Ilvovsky</last><affiliation>Higher School of Economics</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>1452-1462</pages>
       <abstract>The prevalence of information manipulation online has created a need for propaganda detection systems. Such systems have typically focused on the surface words, ignoring the linguistic structure. Here we aim to bridge this gap. In particular, we present the first attempt at using discourse analysis for the task. We consider both paragraph-level and token-level classification and we propose a discourse-aware Transformer architecture. Our experiments on English and Russian demonstrate sizeable performance gains compared to a number of baselines. Moreover, our ablation study emphasizes the importance of specific types of discourse features, and our in-depth analysis reveals a strong correlation between propaganda instances and discourse spans.</abstract>
       <url hash="33e6bec7">2024.eacl-long.87</url>
@@ -1298,7 +1298,7 @@
       <title>Translate to Disambiguate: Zero-shot Multilingual Word Sense Disambiguation with Pretrained Language Models</title>
       <author><first>Haoqiang</first><last>Kang</last></author>
       <author><first>Terra</first><last>Blevins</last><affiliation>University of Washington</affiliation></author>
-      <author><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington, Facebook and Meta</affiliation></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington, Facebook and Meta</affiliation></author>
       <pages>1562-1575</pages>
       <abstract>Pretrained Language Models (PLMs) learn rich cross-lingual knowledge and perform well on diverse tasks such as translation and multilingual word sense disambiguation (WSD) when finetuned. However, they often struggle at disambiguating word sense in a zero-shot setting. To better understand this contrast, we present a new study investigating how well PLMs capture cross-lingual word sense with Contextual Word-Level Translation (C-WLT), an extension of word-level translation that prompts the model to translate a given word in context. We find that as the model size increases, PLMs encode more cross-lingual word sense knowledge and better use context to improve WLT performance. Building on C-WLT, we introduce a zero-shot prompting approach for WSD, tested on 18 languages from the XL-WSD dataset. Our method outperforms fully supervised baselines on recall for many evaluation languages without additional training or finetuning. This study presents a first step towards understanding how to best leverage the cross-lingual knowledge inside PLMs for robust zero-shot reasoning in any language.</abstract>
       <url hash="990aca57">2024.eacl-long.94</url>
@@ -1335,8 +1335,8 @@
     </paper>
     <paper id="97">
       <title>Discovering and Articulating Frames of Communication from Social Media Using Chain-of-Thought Reasoning</title>
-      <author><first>Maxwell</first><last>Weinzierl</last><affiliation>University of Texas at Dallas</affiliation></author>
-      <author><first>Sanda</first><last>Harabagiu</last><affiliation>University of Texas at Dallas</affiliation></author>
+      <author id="maxwell-weinzierl"><first>Maxwell</first><last>Weinzierl</last><affiliation>University of Texas at Dallas</affiliation></author>
+      <author id="sanda-harabagiu"><first>Sanda</first><last>Harabagiu</last><affiliation>University of Texas at Dallas</affiliation></author>
       <pages>1617-1631</pages>
       <abstract>Frames of Communication (FoCs) are ubiquitous in social media discourse. They define what counts as a problem, diagnose what is causing the problem, elicit moral judgments and imply remedies for resolving the problem. Most research on automatic frame detection involved the recognition of the problems addressed by frames, but did not consider the articulation of frames. Articulating an FoC involves reasoning with salient problems, their cause and eventual solution. In this paper we present a method for Discovering and Articulating FoCs (DA-FoC) that relies on a combination of Chain-of-Thought prompting of large language models (LLMs) with In-Context Active Curriculum Learning. Very promising evaluation results indicate that 86.72% of the FoCs encoded by communication experts on the same reference dataset were also uncovered by DA-FoC. Moreover, DA-FoC uncovered many new FoCs, which escaped the experts. Interestingly, 55.1% of the known FoCs were judged as being better articulated than the human-written ones, while 93.8% of the new FoCs were judged as having sound rationale and being clearly articulated.</abstract>
       <url hash="ecd1ffc0">2024.eacl-long.97</url>
@@ -1373,7 +1373,7 @@
       <author><first>Lütfi Kerem</first><last>Senel</last><affiliation>Ludwig Maximilian University of Munich and The Center for Information and Language Processing</affiliation></author>
       <author><first>Benedikt</first><last>Ebing</last><affiliation>Bayerische Julius-Maximilians-Universität Würzburg</affiliation></author>
       <author><first>Konul</first><last>Baghirova</last></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <author><first>Goran</first><last>Glavaš</last><affiliation>Julius-Maximilians-Universität Würzburg</affiliation></author>
       <pages>1672-1688</pages>
       <abstract>Cross-lingual transfer (XLT) driven by massively multilingual language models (mmLMs) has been shown largely ineffective for low-resource (LR) target languages with little (or no) representation in mmLM’s pretraining, especially if they are linguistically distant from the high-resource (HR) source language. Much of the recent focus in XLT research has been dedicated to <i>LR language families</i>, i.e., families without any HR languages (e.g., families of African languages or indigenous languages of the Americas). In this work, in contrast, we investigate a configuration that is arguably of practical relevance for more of the world’s languages: XLT to LR languages that do have a close HR relative. To explore the extent to which a HR language can facilitate transfer to its LR relatives, we (1) introduce Kardeş-NLU, an evaluation benchmark with language understanding datasets in five LR Turkic languages: Azerbaijani, Kazakh, Kyrgyz, Uzbek, and Uyghur; and (2) investigate (a) intermediate training and (b) fine-tuning strategies that leverage Turkish in XLT to these target languages. Our experimental results show that both - integrating Turkish in intermediate training and in downstream fine-tuning - yield substantial improvements in XLT to LR Turkic languages. Finally, we benchmark cutting-edge instruction-tuned large language models on Kardeş-NLU, showing that their performance is highly task- and language-dependent.</abstract>
@@ -1416,7 +1416,7 @@
       <author><first>Bhavya</first><last>Bhavya</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
       <author><first>Shradha</first><last>Sehgal</last><affiliation>Department of Computer Science</affiliation></author>
       <author><first>Jinjun</first><last>Xiong</last><affiliation>State University of New York at Buffalo</affiliation></author>
-      <author><first>ChengXiang</first><last>Zhai</last><affiliation>University of Illinois, Urbana Champaign</affiliation></author>
+      <author id="chengxiang-zhai"><first>ChengXiang</first><last>Zhai</last><affiliation>University of Illinois, Urbana Champaign</affiliation></author>
       <pages>1723-1737</pages>
       <abstract>Textual analogies that make comparisons between two concepts are often used for explaining complex ideas, creative writing, and scientific discovery. In this paper, we propose and study a new task, called Analogy Detection and Extraction (AnaDE), which includes three synergistic sub-tasks: 1) detecting documents containing analogies, 2) extracting text segments that make up the analogy, and 3) identifying the (source and target) concepts being compared. To facilitate the study of this new task, we create a benchmark dataset by scraping Metamia.com and investigate the performances of state-of-the-art models on all sub-tasks to establish the first-generation benchmark results for this new task. We find that the Longformer model achieves the best performance on all the three sub-tasks demonstrating its effectiveness for handling long texts. Moreover, smaller models fine-tuned on our dataset perform better than non-finetuned ChatGPT, suggesting high task difficulty. Overall, the models achieve a high performance on documents detection suggesting that it could be used to develop applications like analogy search engines. Further, there is a large room for improvement on the segment and concept extraction tasks.</abstract>
       <url hash="443d0109">2024.eacl-long.103</url>
@@ -1430,7 +1430,7 @@
       <author><first>Thanh-Tung</first><last>Nguyen</last></author>
       <author><first>Viktor</first><last>Schlegel</last><affiliation>ASUS Intelligent Cloud Solutions</affiliation></author>
       <author><first>Stefan</first><last>Winkler</last><affiliation>National University of Singapore</affiliation></author>
-      <author><first>See-Kiong</first><last>Ng</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="see-kiong-ng"><first>See-Kiong</first><last>Ng</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Soujanya</first><last>Poria</last><affiliation>Singapore University of Technology and Design</affiliation></author>
       <pages>1738-1751</pages>
       <abstract>Sentence representations are a critical component in NLP applications such as retrieval, question answering, and text classification. They capture the meaning of a sentence, enabling machines to understand and reason over human language. In recent years, significant progress has been made in developing methods for learning sentence representations, including unsupervised, supervised, and transfer learning approaches. However there is no literature review on sentence representations till now. In this paper, we provide an overview of the different methods for sentence representation learning, focusing mostly on deep learning models. We provide a systematic organization of the literature, highlighting the key contributions and challenges in this area. Overall, our review highlights the importance of this area in natural language processing, the progress made in sentence representation learning, and the challenges that remain. We conclude with directions for future research, suggesting potential avenues for improving the quality and efficiency of sentence representations.</abstract>
@@ -1496,7 +1496,7 @@
     <paper id="109">
       <title>Aligning Large and Small Language Models via Chain-of-Thought Reasoning</title>
       <author><first>Leonardo</first><last>Ranaldi</last><affiliation>Idiap Research Institute</affiliation></author>
-      <author><first>Andre</first><last>Freitas</last><affiliation>University of Manchester</affiliation></author>
+      <author id="andre-freitas"><first>Andre</first><last>Freitas</last><affiliation>University of Manchester</affiliation></author>
       <pages>1812-1827</pages>
       <abstract>Chain-of-Thought (CoT) prompting empowersthe reasoning abilities of Large Language Models (LLMs), eliciting them to solve complexreasoning tasks in a step-wise manner. However, these capabilities appear only in models with billions of parameters, which represent an entry barrier for many users who are constrained to operate on a smaller model scale, i.e., Small Language Models (SLMs). Although many companies are releasing LLMs of the same family with fewer parameters, these models tend not to preserve all the reasoning capabilities of the original models, including CoT reasoning.In this paper, we propose a method for aligning and transferring reasoning abilities between larger to smaller Language Models. By using an Instruction-tuning-CoT method, that is, an Instruction-tuning designed around CoT-Demonstrations, we enable the SLMs to generate multi-step controlled reasoned answers when they are elicited with the CoT mechanism. Hence, we instruct a smaller Language Model using outputs generated by more robust models belonging to the same family or not, evaluating the impact across different types of models. Results obtained on question-answering and mathematical reasoning benchmarks show that LMs instructed via the Instruction-tuning CoT method produced by LLMs outperform baselines within both in-domain and out-domain scenarios.</abstract>
       <url hash="b286bc06">2024.eacl-long.109</url>
@@ -1520,7 +1520,7 @@
       <author><first>Hari</first><last>Shrawgi</last><affiliation>Microsoft</affiliation></author>
       <author><first>Prasanjit</first><last>Rath</last></author>
       <author><first>Tushar</first><last>Singhal</last><affiliation>Microsoft</affiliation></author>
-      <author><first>Sandipan</first><last>Dandapat</last><affiliation>Microsoft</affiliation></author>
+      <author id="sandipan-dandapat"><first>Sandipan</first><last>Dandapat</last><affiliation>Microsoft</affiliation></author>
       <pages>1841-1857</pages>
       <abstract>Recent Large Language Models (LLMs) have unlocked unprecedented applications of AI. As these models continue to transform human life, there are growing socio-ethical concerns around their inherent stereotypes that can lead to bias in their applications. There is an urgent need for holistic bias evaluation of these LLMs. Few such benchmarks exist today and evaluation techniques that do exist are either non-holistic or may provide a false sense of security as LLMs become better at hiding their biases on simpler tasks. We address these issues with an extensible benchmark - LLM Stereotype Index (LSI). LSI is grounded on Social Progress Index, a holistic social benchmark. We also test the breadth and depth of bias protection provided by LLMs via a variety of tasks with varying complexities. Our findings show that both ChatGPT and GPT-4 have strong inherent prejudice with respect to nationality, gender, race, and religion. The exhibition of such issues becomes increasingly apparent as we increase task complexity. Furthermore, GPT-4 is better at hiding the biases, but when displayed it is more significant. Our findings highlight the harms and divide that these LLMs can bring to society if we do not take very diligent care in their use.</abstract>
       <url hash="c1a01154">2024.eacl-long.111</url>
@@ -1572,7 +1572,7 @@
     <paper id="115">
       <title>Human Temporal Inferences Go Beyond Aspectual Class</title>
       <author><first>Katarzyna</first><last>Pruś</last></author>
-      <author><first>Mark</first><last>Steedman</last><affiliation>University of Edinburgh</affiliation></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last><affiliation>University of Edinburgh</affiliation></author>
       <author><first>Adam</first><last>Lopez</last><affiliation>University of Edinburgh</affiliation></author>
       <pages>1913-1923</pages>
       <abstract>Past work in NLP has proposed the task of classifying English verb phrases into situation aspect categories, assuming that these categories play an important role in tasks requiring temporal reasoning. We investigate this assumption by gathering crowd-sourced judgements about aspectual entailments from non-expert, native English participants. The results suggest that aspectual class alone is not sufficient to explain the response patterns of the participants. We propose that looking at scenarios which can feasibly accompany an action description contributes towards a better explanation of the participants’ answers. A further experiment using GPT-3.5 shows that its outputs follow different patterns than human answers, suggesting that such conceivable scenarios cannot be fully accounted for in the language alone. We release our dataset to support further research.</abstract>
@@ -1621,7 +1621,7 @@
       <author><first>Chris</first><last>Brockett</last><affiliation>Microsoft</affiliation></author>
       <author><first>Akanksha</first><last>Malhotra</last><affiliation>Microsoft</affiliation></author>
       <author><first>Nebojsa</first><last>Jojic</last><affiliation>Microsoft Research and Microsoft Research</affiliation></author>
-      <author><first>Bill</first><last>Dolan</last></author>
+      <author id="william-b-dolan"><first>Bill</first><last>Dolan</last></author>
       <pages>1968-1987</pages>
       <abstract>Agency, the capacity to proactively shape events, is central to how humans interact and collaborate. While LLMs are being developed to simulate human behavior and serve as human-like agents, little attention has been given to the Agency that these models should possess in order to proactively manage the direction of interaction and collaboration. In this paper, we investigate Agency as a desirable function of LLMs, and how it can be measured and managed. We build on social-cognitive theory to develop a framework of features through which Agency is expressed in dialogue – indicating what you intend to do (Intentionality), motivating your intentions (Motivation), having self-belief in intentions (Self-Efficacy), and being able to self-adjust (Self-Regulation). We collect a new dataset of 83 human-human collaborative interior design conversations containing 908 conversational snippets annotated for Agency features. Using this dataset, we develop methods for measuring Agency of LLMs. Automatic and human evaluations show that models that manifest features associated with high Intentionality, Motivation, Self-Efficacy, and Self-Regulation are more likely to be perceived as strongly agentive.</abstract>
       <url hash="1b326838">2024.eacl-long.119</url>
@@ -1687,7 +1687,7 @@
     </paper>
     <paper id="124">
       <title>What Makes Medical Claims (Un)Verifiable? Analyzing Entity and Relation Properties for Fact Verification</title>
-      <author><first>Amelie</first><last>Wuehrl</last><affiliation>University of Stuttgart, Universität Stuttgart</affiliation></author>
+      <author id="amelie-wuhrl"><first>Amelie</first><last>Wuehrl</last><affiliation>University of Stuttgart, Universität Stuttgart</affiliation></author>
       <author><first>Yarik</first><last>Menchaca Resendiz</last></author>
       <author><first>Lara</first><last>Grimminger</last></author>
       <author><first>Roman</first><last>Klinger</last><affiliation>Otto-Friedrich Universität Bamberg</affiliation></author>
@@ -1703,7 +1703,7 @@
       <title>Approximate Attributions for Off-the-Shelf <fixed-case>S</fixed-case>iamese Transformers</title>
       <author><first>Lucas</first><last>Moeller</last><affiliation>University of Stuttgart, Universität Stuttgart</affiliation></author>
       <author><first>Dmitry</first><last>Nikolaev</last></author>
-      <author><first>Sebastian</first><last>Padó</last><affiliation>University of Stuttgart, Universität Stuttgart</affiliation></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last><affiliation>University of Stuttgart, Universität Stuttgart</affiliation></author>
       <pages>2059-2071</pages>
       <abstract>Siamese encoders such as sentence transformers are among the least understood deep models.Established attribution methods cannot tackle this model class since it compares two inputs rather than processing a single one. To address this gap, we have recently proposed an attribution method specifically for Siamese encoders (Möller et al., 2023). However, it requires models to be adjusted and fine-tuned and therefore cannot be directly applied to off-the-shelf models. In this work, we reassess these restrictions and propose (i) a model with exact attribution ability that retains the original model’s predictive performance and (ii) a way to compute approximate attributions for off-the-shelf models.We extensively compare approximate and exact attributions and use them to analyze the models’ attendance to different linguistic aspects. We gain insights into which syntactic roles Siamese transformers attend to, confirm that they mostly ignore negation, explore how they judge semantically opposite adjectives, and find that they exhibit lexical bias.</abstract>
       <url hash="9534c6fa">2024.eacl-long.125</url>
@@ -1716,7 +1716,7 @@
       <title>Describing Images <tex-math>\textit{Fast and Slow}</tex-math>: Quantifying and Predicting the Variation in Human Signals during Visuo-Linguistic Processes</title>
       <author><first>Ece</first><last>Takmaz</last><affiliation>University of Amsterdam</affiliation></author>
       <author><first>Sandro</first><last>Pezzelle</last><affiliation>University of Amsterdam</affiliation></author>
-      <author><first>Raquel</first><last>Fernández</last><affiliation>University of Amsterdam and University of Amsterdam</affiliation></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last><affiliation>University of Amsterdam and University of Amsterdam</affiliation></author>
       <pages>2072-2087</pages>
       <abstract>There is an intricate relation between the properties of an image and how humans behave while describing the image. This behavior shows ample variation, as manifested in human signals such as eye movements and when humans start to describe the image. Despite the value of such signals of visuo-linguistic variation, they are virtually disregarded in the training of current pretrained models, which motivates further investigation. Using a corpus of Dutch image descriptions with concurrently collected eye-tracking data, we explore the nature of the variation in visuo-linguistic signals, and find that they correlate with each other. Given this result, we hypothesize that variation stems partly from the properties of the images, and explore whether image representations encoded by pretrained vision encoders can capture such variation. Our results indicate that pretrained models do so to a weak-to-moderate degree, suggesting that the models lack biases about what makes a stimulus complex for humans and what leads to variations in human outputs.</abstract>
       <url hash="1647aacb">2024.eacl-long.126</url>
@@ -1726,7 +1726,7 @@
     </paper>
     <paper id="127">
       <title>Tracing the Roots of Facts in Multilingual Language Models: Independent, Shared, and Transferred Knowledge</title>
-      <author><first>Xin</first><last>Zhao</last></author>
+      <author id="wayne-xin-zhao"><first>Xin</first><last>Zhao</last></author>
       <author><first>Naoki</first><last>Yoshinaga</last><affiliation>Institute of Industrial Science, the University of Tokyo</affiliation></author>
       <author><first>Daisuke</first><last>Oba</last><affiliation>Institute of Industrial Science, The University of Tokyo</affiliation></author>
       <pages>2088-2102</pages>
@@ -1780,7 +1780,7 @@
       <author><first>Fantine</first><last>Huot</last><affiliation>Google</affiliation></author>
       <author><first>Joshua</first><last>Maynez</last><affiliation>Google</affiliation></author>
       <author><first>Chris</first><last>Alberti</last><affiliation>Columbia University</affiliation></author>
-      <author><first>Reinald Kim</first><last>Amplayo</last><affiliation>Google</affiliation></author>
+      <author id="reinald-kim-amplayo"><first>Reinald Kim</first><last>Amplayo</last><affiliation>Google</affiliation></author>
       <author><first>Priyanka</first><last>Agrawal</last><affiliation>Google</affiliation></author>
       <author><first>Constanza</first><last>Fierro</last><affiliation>Copenhagen University</affiliation></author>
       <author><first>Shashi</first><last>Narayan</last><affiliation>Google</affiliation></author>
@@ -1796,7 +1796,7 @@
       <title>Exploring Data Augmentation in Neural <fixed-case>DRS</fixed-case>-to-Text Generation</title>
       <author><first>Muhammad Saad</first><last>Amin</last></author>
       <author><first>Luca</first><last>Anselma</last><affiliation>University of Turin</affiliation></author>
-      <author><first>Alessandro</first><last>Mazzei</last><affiliation>University of Turin</affiliation></author>
+      <author id="alessandro-mazzei"><first>Alessandro</first><last>Mazzei</last><affiliation>University of Turin</affiliation></author>
       <pages>2164-2178</pages>
       <abstract>Neural networks are notoriously data-hungry. This represents an issue in cases where data are scarce such as in low-resource languages. Data augmentation is a technique commonly used in computer vision to provide neural networks with more data and increase their generalization power. When dealing with data augmentation for natural language, however, simple data augmentation techniques similar to the ones used in computer vision such as rotation and cropping cannot be employed because they would generate ungrammatical texts. Thus, data augmentation needs a specific design in the case of neural logic-to-text systems, especially for a structurally rich input format such as the ones used for meaning representation. This is the case of the neural natural language generation for Discourse Representation Structures (DRS-to-Text), where the logical nature of DRS needs a specific design of data augmentation. In this paper, we adopt a novel approach in DRS-to-Text to selectively augment a training set with new data by adding and varying two specific lexical categories, i.e. proper and common nouns. In particular, we propose using WordNet supersenses to produce new training sentences using both in-and-out-of-context nouns. We present a number of experiments for evaluating the role played by augmented lexical information. The experimental results prove the effectiveness of our approach for data augmentation in DRS-to-Text generation.</abstract>
       <url hash="fcedda01">2024.eacl-long.132</url>
@@ -1936,7 +1936,7 @@
       <title>Investigating Content Planning for Navigating Trade-offs in Knowledge-Grounded Dialogue</title>
       <author><first>Kushal</first><last>Chawla</last></author>
       <author><first>Hannah</first><last>Rashkin</last><affiliation>Google</affiliation></author>
-      <author><first>Gaurav Singh</first><last>Tomar</last><affiliation>Google</affiliation></author>
+      <author id="gaurav-singh-tomar"><first>Gaurav Singh</first><last>Tomar</last><affiliation>Google</affiliation></author>
       <author><first>David</first><last>Reitter</last><affiliation>Google, Brain, DeepMind</affiliation></author>
       <pages>2316-2335</pages>
       <abstract>Knowledge-grounded dialogue generation is a challenging task because it requires satisfying two fundamental, yet often competing constraints: being responsive in a manner that is specific to what the conversation partner has said while also being attributable to an underlying source document. In this work, we bring this trade-off between these two objectives (specificity and attribution) to light, and ask the question: Can explicit content planning before the response generation help the model to address this challenge? To answer this question, we design a framework called PLEDGE, which allows us to experiment with various plan variables explored in prior work supporting both metric-agnostic and metric-aware approaches. While content planning shows promise, our results on whether it can actually help to navigate this trade-off are mixed – planning mechanisms that are metric-aware (use automatic metrics during training) are better at automatic evaluations but underperform in human judgment compared to metric-agnostic mechanisms. We discuss how this may be caused by over-fitting to automatic metrics, and the need for future work to better calibrate these metrics towards human judgment. We hope the observations from our analysis will inform future work that aims to apply content planning in this context.</abstract>
@@ -1962,9 +1962,9 @@
       <author><first>Rabeeh</first><last>Karimi Mahabadi</last></author>
       <author><first>Hamish</first><last>Ivison</last><affiliation>University of Washington</affiliation></author>
       <author><first>Jaesung</first><last>Tae</last><affiliation>Yale University</affiliation></author>
-      <author><first>James</first><last>Henderson</last><affiliation>Idiap Research Institute</affiliation></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last><affiliation>Idiap Research Institute</affiliation></author>
       <author><first>Iz</first><last>Beltagy</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
-      <author><first>Matthew</first><last>Peters</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
+      <author id="matthew-e-peters"><first>Matthew</first><last>Peters</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Arman</first><last>Cohan</last><affiliation>Yale University and Allen Institute for Artificial Intelligence</affiliation></author>
       <pages>2347-2361</pages>
       <abstract>Diffusion models have emerged as a powerful paradigm for generation, obtaining strong performance in various continuous domains. However, applying continuous diffusion models to natural language remains challenging due to its discrete nature and the need for a large number of diffusion steps to generate text, making diffusion-based generation expensive.In this work, we propose Text-to-text Self-conditioned Simplex Diffusion (TESS), a text diffusion model that is fully non-autoregressive, employs a new form of self-conditioning, and applies the diffusion process on the logit simplex space rather than the learned embedding space.Through extensive experiments on natural language understanding and generation tasks including summarization, text simplification, paraphrase generation, and question generation, we demonstrate that TESS outperforms state-of-the-art non-autoregressive models, requires fewer diffusion steps with minimal drop in performance, and is competitive with pretrained autoregressive sequence-to-sequence models.</abstract>
@@ -1978,7 +1978,7 @@
       <author><first>Yunzhe</first><last>Li</last></author>
       <author><first>Qian</first><last>Chen</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Weixiang</first><last>Yan</last></author>
-      <author><first>Wen</first><last>Wang</last><affiliation>Alibaba Group</affiliation></author>
+      <author id="wen-wang"><first>Wen</first><last>Wang</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Qinglin</first><last>Zhang</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Hari</first><last>Sundaram</last><affiliation>Department of Computer Science</affiliation></author>
       <pages>2362-2377</pages>
@@ -2037,7 +2037,7 @@
       <author><first>Dongyub</first><last>Lee</last><affiliation>NAVER and Korea University</affiliation></author>
       <author><first>Eunhwan</first><last>Park</last></author>
       <author><first>Hodong</first><last>Lee</last><affiliation>NAVER CLOUD</affiliation></author>
-      <author><first>Heuiseok</first><last>Lim</last><affiliation>Korea University</affiliation></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last><affiliation>Korea University</affiliation></author>
       <pages>2422-2433</pages>
       <abstract>Recent advancements in Large Language Models (LLMs) have heralded unprecedented capabilities in information-seeking and text generation, as evidenced by applications like Bing Chat and perplexity.ai. Despite these strides, challenges on hallucination and factual inconsistency continue to impede their wider real-world adoption. Contemporary methods, including retrieval-augmented LLMs and feedback-based learning, serve as alternatives to mitigate these challenges. However, challenges remain, particularly regarding referencing erroneous evidence (citation errors) and generating information not present in the evidence (hallucination). In this paper, we introduce the <tex-math>\mathsf{A}^2\mathsf{R}</tex-math> framework: <b>A</b>sk, <b>A</b>ssess, and <b>R</b>efine. Our approach utilizes an <i>explicit</i> evaluation paradigm, incorporating metrics specifically tailored to assess citation errors and hallucination, aiming to address these prevalent challenges robustly. Capitalizing on these evaluations, we devise a strategy to formulate actionable natural language feedback, enabling iterative refinements that yield improved factual consistency and reduced hallucinations in responses. Our experiments on ASQA, ELI5, and QAMPARI datasets demonstrate our method’s superiority in enhancing correctness, fluency, and citation quality.</abstract>
       <url hash="396e7a17">2024.eacl-long.149</url>
@@ -2081,7 +2081,7 @@
     <paper id="152">
       <title>A <fixed-case>R</fixed-case>el<fixed-case>E</fixed-case>nt<fixed-case>L</fixed-case>ess Benchmark for Modelling Graded Relations between Named Entities</title>
       <author><first>Asahi</first><last>Ushio</last></author>
-      <author><first>Jose</first><last>Camacho-Collados</last><affiliation>Cardiff University</affiliation></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last><affiliation>Cardiff University</affiliation></author>
       <author><first>Steven</first><last>Schockaert</last><affiliation>Cardiff University</affiliation></author>
       <pages>2473-2486</pages>
       <abstract>Relations such as “is influenced by”, “is known for” or “is a competitor of” are inherently graded: we can rank entity pairs based on how well they satisfy these relations, but it is hard to draw a line between those pairs that satisfy them and those that do not. Such graded relations play a central role in many applications, yet they are typically not covered by existing Knowledge Graphs. In this paper, we consider the possibility of using Large Language Models (LLMs) to fill this gap. To this end, we introduce a new benchmark, in which entity pairs have to be ranked according to how much they satisfy a given graded relation. The task is formulated as a few-shot ranking problem, where models only have access to a description of the relation and five prototypical instances. We use the proposed benchmark to evaluate state-of-the-art relation embedding strategies as well as several publicly available LLMs and closed conversational models such as GPT-4. We find that smaller language models struggle to outperform a naive baseline. Overall, the best results are obtained with the 11B parameter Flan-T5 model and the 13B parameter OPT model, where further increasing the model size does not seem to be beneficial. For all models, a clear gap with human performance remains.</abstract>
@@ -2141,7 +2141,7 @@
       <author><first>Sotiris</first><last>Kotitsas</last><affiliation>National and Kapodistrian University of Athens</affiliation></author>
       <author><first>Panagiotis</first><last>Kounoudis</last><affiliation>Athena Research Center and National and Kapodistrian University of Athens</affiliation></author>
       <author><first>Eleni</first><last>Koutli</last></author>
-      <author><first>Haris</first><last>Papageorgiou</last><affiliation>NATIONAL AND KAPODISTRIAN UNIVERISTY OF ATHENS, ATHENA RESEARCH AND INNOVATION CENTER and ATHENS UNIVERSITY OF ECONOMICS AND BUSINESS</affiliation></author>
+      <author id="harris-papageorgiou"><first>Haris</first><last>Papageorgiou</last><affiliation>NATIONAL AND KAPODISTRIAN UNIVERISTY OF ATHENS, ATHENA RESEARCH AND INNOVATION CENTER and ATHENS UNIVERSITY OF ECONOMICS AND BUSINESS</affiliation></author>
       <pages>2540-2554</pages>
       <abstract>Misinformation and disinformation phenomena existed long before the advent of digital technologies. The exponential use of social media platforms, whose information feeds have created the conditions for many to many communication and instant amplification of the news has accelerated the diffusion of inaccurate and misleading information. As a result, the identification of claims have emerged as a pivotal technology for combating the influence of misinformation and disinformation within news media. Most existing work has concentrated on claim analysis at the sentence level, neglecting the crucial exploration of supplementary attributes such as the claimer and the claim object of the claim or confining it by limiting its scope to a predefined list of topics. Furthermore, previous research has been mostly centered around political debates, Wikipedia articles, and COVID-19 related content. By leveraging the advanced capabilities of Large Language Models (LLMs) in Natural Language Understanding (NLU) and text generation, we propose a novel architecture utilizing LLMs finetuned with LoRA to transform the claim, claimer and claim object detection task into a Question Answering (QA) setting. We evaluate our approach in a dataset of 867 scientific news articles of 3 domains (Health, Climate Change, Nutrition) (HCN), which are human annotated with the major claim, the claimer and the object of the major claim. We also evaluate our proposed model in the benchmark dataset of NEWSCLAIMS. Experimental and qualitative results showcase the effectiveness of the proposed approach. We make our dataset publicly available to encourage further research.</abstract>
       <url hash="3c7f19ca">2024.eacl-long.156</url>
@@ -2247,7 +2247,7 @@
       <author><first>Helmut</first><last>Schmid</last><affiliation>Center for Information and Language Processing</affiliation></author>
       <author><first>Michael</first><last>Färber</last><affiliation>Karlsruhe Institute of Technology</affiliation></author>
       <author><first>Frauke</first><last>Kreuter</last><affiliation>University of Maryland, College Park</affiliation></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <pages>2685-2702</pages>
       <abstract>Prompt-based methods have been successfully applied to multilingual pretrained language models for zero-shot cross-lingual understanding. However, most previous studies primarily focused on sentence-level classification tasks, and only a few considered token-level labeling tasks such as Named Entity Recognition (NER) and Part-of-Speech (POS) tagging. In this paper, we propose Token-Level Prompt Decomposition (ToPro), which facilitates the prompt-based method for token-level sequence labeling tasks. The ToPro method decomposes an input sentence into single tokens and applies one prompt template to each token. Our experiments on multilingual NER and POS tagging datasets demonstrate that ToPro-based fine-tuning outperforms Vanilla fine-tuning and Prompt-Tuning in zero-shot cross-lingual transfer, especially for languages that are typologically different from the source language English. Our method also attains state-of-the-art performance when employed with the mT5 model. Besides, our exploratory study in multilingual large language models shows that ToPro performs much better than the current in-context learning method. Overall, the performance improvements show that ToPro could potentially serve as a novel and simple benchmarking method for sequence labeling tasks.</abstract>
       <url hash="744e06d3">2024.eacl-long.164</url>
@@ -2330,7 +2330,7 @@
       <title>Syntactic Preposing and Discourse Relations</title>
       <author><first>Yunfang</first><last>Dong</last><affiliation>University of Edinburgh</affiliation></author>
       <author><first>Xixian</first><last>Liao</last></author>
-      <author><first>Bonnie</first><last>Webber</last><affiliation>Edinburgh University, University of Edinburgh</affiliation></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last><affiliation>Edinburgh University, University of Edinburgh</affiliation></author>
       <pages>2790-2802</pages>
       <abstract>Over 15 years ago, Ward &amp; Birner (2006) suggested that non-canonical constructions in English can serve both to mark information status and to structure the information flow of discourse. One such construction is preposing, where a phrasal constituent appears to the left of its canonical position, typically sentence-initially. But computational work on discourse has, to date, ignored non-canonical syntax. We take account of non-canonical syntax by providing quantitative evidence relating NP/PP preposing to discourse relations. The evidence comes from an LLM mask-filling task that compares the predictions when a mask is inserted between the arguments of an implicit inter-sentential discourse relation — first, when the right-hand argument (Arg2) starts with a preposed constituent, and again, when that constituent is in canonical (post-verbal) position. Results show that (1) the top-ranked mask-fillers in the preposed case agree more often with “gold” annotations in the Penn Discourse TreeBank than they do in the latter case, and (2) preposing in Arg2 can affect the distribution of discourse-relational senses.</abstract>
       <url hash="a8aa0d72">2024.eacl-long.170</url>
@@ -2343,7 +2343,7 @@
       <author><first>Aru</first><last>Maekawa</last><affiliation>Tokyo Institute of Technology, Tokyo Institute of Technology</affiliation></author>
       <author><first>Tsutomu</first><last>Hirao</last><affiliation>NTT Communication Science Laboratories</affiliation></author>
       <author><first>Hidetaka</first><last>Kamigaito</last><affiliation>Division of Information Science, Nara Institute of Science and Technology</affiliation></author>
-      <author><first>Manabu</first><last>Okumura</last><affiliation>Tokyo Institute of Technology, Tokyo Institute of Technology</affiliation></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last><affiliation>Tokyo Institute of Technology, Tokyo Institute of Technology</affiliation></author>
       <pages>2803-2815</pages>
       <abstract>Recently, decoder-only pre-trained large language models (LLMs), with several tens of billion parameters, have significantly impacted a wide range of natural language processing (NLP) tasks. While encoder-only or encoder-decoder pre-trained language models have already proved to be effective in discourse parsing, the extent to which LLMs can perform this task remains an open research question. Therefore, this paper explores how beneficial such LLMs are for Rhetorical Structure Theory (RST) discourse parsing. Here, the parsing process for both fundamental top-down and bottom-up strategies is converted into prompts, which LLMs can work with. We employ Llama 2 and fine-tune it with QLoRA, which has fewer parameters that can be tuned. Experimental results on three benchmark datasets, RST-DT, Instr-DT, and the GUM corpus, demonstrate that Llama 2 with 70 billion parameters in the bottom-up strategy obtained state-of-the-art (SOTA) results with significant differences. Furthermore, our parsers demonstrated generalizability when evaluated on RST-DT, showing that, in spite of being trained with the GUM corpus, it obtained similar performances to those of existing parsers trained with RST-DT.</abstract>
       <url hash="58708507">2024.eacl-long.171</url>
@@ -2570,7 +2570,7 @@
     <paper id="5">
       <title>Injecting <fixed-case>W</fixed-case>iktionary to improve token-level contextual representations using contrastive learning</title>
       <author><first>Anna</first><last>Mosolova</last></author>
-      <author><first>Marie</first><last>Candito</last><affiliation>Université de Paris</affiliation></author>
+      <author id="marie-candito"><first>Marie</first><last>Candito</last><affiliation>Université de Paris</affiliation></author>
       <author><first>Carlos</first><last>Ramisch</last><affiliation>LIS - Laboratoire d’Informatique et Systèmes and AMU - Aix Marseille University</affiliation></author>
       <pages>34-41</pages>
       <abstract>While static word embeddings are blind to context, for lexical semantics tasks context is rather too present in contextual word embeddings, vectors of same-meaning occurrences being too different (Ethayarajh, 2019). Fine-tuning pre-trained language models (PLMs) using contrastive learning was proposed, leveraging automatically self-augmented examples (Liu et al., 2021b). In this paper, we investigate how to inject a lexicon as an alternative source of supervision, using the English Wiktionary. We also test how dimensionality reduction impacts the resulting contextual word embeddings. We evaluate our approach on the Word-In-Context (WiC) task, in the unsupervised setting (not using the training set). We achieve new SoTA result on the original WiC test set. We also propose two new WiC test sets for which we show that our fine-tuning method achieves substantial improvements. We also observe improvements, although modest, for the semantic frame induction task. Although we experimented on English to allow comparison with related work, our method is adaptable to the many languages for which large Wiktionaries exist.</abstract>
@@ -2595,8 +2595,8 @@
       <title>Evaluating the Factuality of Zero-shot Summarizers Across Varied Domains</title>
       <author><first>Sanjana</first><last>Ramprasad</last></author>
       <author><first>Kundan</first><last>Krishna</last></author>
-      <author><first>Zachary</first><last>Lipton</last><affiliation>Carnegie Mellon University</affiliation></author>
-      <author><first>Byron</first><last>Wallace</last><affiliation>Northeastern University, Brown University and Northeastern University</affiliation></author>
+      <author id="zachary-c-lipton"><first>Zachary</first><last>Lipton</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="byron-c-wallace"><first>Byron</first><last>Wallace</last><affiliation>Northeastern University, Brown University and Northeastern University</affiliation></author>
       <pages>50-59</pages>
       <abstract>Recent work has shown that large language models (LLMs) are capable of generating summaries zero-shot—i.e., without explicit supervision—that, under human assessment, are often comparable or even preferred to manually composed reference summaries. However, this prior work has focussed almost exclusively on evaluating news article summarization. How do zero-shot summarizers perform in other (potentially more specialized) domains?In this work we evaluate zero-shot generated summaries across specialized domains including: biomedical articles, and legal bills (in addition to standard news benchmarks for reference). We focus especially on the factuality of outputs. We acquire annotations from domain experts to identify inconsistencies in summaries and systematically categorize these errors. We analyze whether the prevalence of a given domain in the pretraining corpus affects extractiveness and faithfulness of generated summaries of articles in this domain. We release all collected annotations to facilitate additional research toward measuring and realizing factually accurate summarization, beyond news articles (The dataset can be downloaded from https://anonymous.4open.science/r/zero_shot_faceval_domains-9B83)</abstract>
       <url hash="19a8a9a2">2024.eacl-short.7</url>
@@ -2622,7 +2622,7 @@
       <title>Characterizing the Confidence of Large Language Model-Based Automatic Evaluation Metrics</title>
       <author><first>Rickard</first><last>Stureborg</last><affiliation>Duke University</affiliation></author>
       <author><first>Dimitris</first><last>Alikaniotis</last><affiliation>Grammarly</affiliation></author>
-      <author><first>Yoshi</first><last>Suhara</last><affiliation>NVIDIA</affiliation></author>
+      <author id="yoshi-suhara"><first>Yoshi</first><last>Suhara</last><affiliation>NVIDIA</affiliation></author>
       <pages>76-89</pages>
       <abstract>There has recently been a growing interest in using Large Language Models (LLMs) to evaluate NLP tasks automatically. Considerable research effort has been put into improving such systems towards achieving high correlations with human judgement. However, it is still unclear what level of correlation is good enough for practical applications of LLM-based automatic evaluation systems. This paper characterizes these LLM evaluators’ confidence in ranking candidate NLP models and develops a configurable Monte Carlo simulation method. We show that even automatic metrics with low correlation with human judgement can reach high-confidence rankings of candidate models with reasonable evaluation set sizes (100s of examples). Further, we describe tradeoff curves between the LLM evaluator performance (i.e., correlation with humans) and evaluation set size; loss in correlation can be compensated with modest increases in the evaluation set size. We validate our results on RoSE, a text summarization dataset, and find our estimates of confidence align with empirical observations.Code available at https://github.com/rickardstureborg/llm-eval-confidence</abstract>
       <url hash="861861e5">2024.eacl-short.9</url>
@@ -2686,7 +2686,7 @@
       <author><first>Yingqiang</first><last>Gao</last></author>
       <author><first>Nianlong</first><last>Gu</last><affiliation>University of Zurich</affiliation></author>
       <author><first>Jessica</first><last>Lam</last><affiliation>Insititute of Neuroinformatics, University of Zurich and ETH Zurich, ETHZ - ETH Zurich</affiliation></author>
-      <author><first>James</first><last>Henderson</last><affiliation>Idiap Research Institute</affiliation></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last><affiliation>Idiap Research Institute</affiliation></author>
       <author><first>Richard</first><last>Hahnloser</last><affiliation>ETHZ - ETH Zurich</affiliation></author>
       <pages>151-160</pages>
       <abstract>Scientific abstracts provide a concise summary of research findings, making them a valuable resource for extracting scientific arguments. In this study, we assess various unsupervised approaches for extracting arguments as aligned premise-conclusion pairs: semantic similarity, text perplexity, and mutual information. We aggregate structured abstracts from PubMed Central Open Access papers published in 2022 and evaluate the argument aligners in terms of the performance of language models that we fine-tune to generate the conclusions from the extracted premise given as input prompts. We find that mutual information outperforms the other measures on this task, suggesting that the reasoning process in scientific abstracts hinges mostly on linguistic constructs beyond simple textual similarity.</abstract>
@@ -2736,7 +2736,7 @@
     <paper id="18">
       <title>Language Model Sentence Completion with a Parser-Driven Rhetorical Control Method</title>
       <author><first>Joshua</first><last>Zingale</last><affiliation>San Diego State University</affiliation></author>
-      <author><first>Jugal</first><last>Kalita</last><affiliation>University of Colorado at Colorado Springs</affiliation></author>
+      <author id="jugal-kalita"><first>Jugal</first><last>Kalita</last><affiliation>University of Colorado at Colorado Springs</affiliation></author>
       <pages>193-203</pages>
       <abstract>Controlled text generation (CTG) seeks to guide large language model (LLM) output, that statistical language generation would conform to desired criteria. The current study presents a novel CTG algorithm that enforces adherence toward specific rhetorical relations in an LLM sentence-completion context by a parser-driven decoding scheme that requires no model fine-tuning. The method is validated both with automatic and human evaluation.</abstract>
       <url hash="ee9d9743">2024.eacl-short.18</url>
@@ -2758,7 +2758,7 @@
     </paper>
     <paper id="20">
       <title>Source Identification in Abstractive Summarization</title>
-      <author><first>Yoshi</first><last>Suhara</last><affiliation>NVIDIA</affiliation></author>
+      <author id="yoshi-suhara"><first>Yoshi</first><last>Suhara</last><affiliation>NVIDIA</affiliation></author>
       <author><first>Dimitris</first><last>Alikaniotis</last><affiliation>Grammarly</affiliation></author>
       <pages>212-224</pages>
       <abstract>Neural abstractive summarization models make summaries in an end-to-end manner, and little is known about how the source information is actually converted into summaries. In this paper, we define input sentences that contain essential information in the generated summary as source sentences and study how abstractive summaries are made by analyzing the source sentences. To this end, we annotate source sentences for reference summaries and system summaries generated by PEGASUS on document-summary pairs sampled from the CNN/DailyMail and XSum datasets. We also formulate automatic source sentence detection and compare multiple methods to establish a strong baseline for the task. Experimental results show that the perplexity-based method performs well in highly abstractive settings, while similarity-based methods perform robustly in relatively extractive settings.</abstract>
@@ -2799,7 +2799,7 @@
       <author><first>Beatrice</first><last>Savoldi</last></author>
       <author><first>Andrea</first><last>Piergentili</last><affiliation>University of Trento and Fondazione Bruno Kessler</affiliation></author>
       <author><first>Dennis</first><last>Fucci</last></author>
-      <author><first>Matteo</first><last>Negri</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
       <author><first>Luisa</first><last>Bentivogli</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
       <pages>256-267</pages>
       <abstract>Gender-neutral translation (GNT) that avoids biased and undue binary assumptions is a pivotal challenge for the creation of more inclusive translation technologies. Advancements for this task in Machine Translation (MT), however, are hindered by the lack of dedicated parallel data, which are necessary to adapt MT systems to satisfy neutral constraints. For such a scenario, large language models offer hitherto unforeseen possibilities, as they come with the distinct advantage of being versatile in various (sub)tasks when provided with explicit instructions. In this paper, we explore this potential to automate GNT by comparing MT with the popular GPT-4 model. Through extensive manual analyses, our study empirically reveals the inherent limitations of current MT systems in generating GNTs and provides valuable insights into the potential and challenges associated with prompting for neutrality.</abstract>
@@ -2811,8 +2811,8 @@
     <paper id="24">
       <title>Interpreting Predictive Probabilities: Model Confidence or Human Label Variation?</title>
       <author><first>Joris</first><last>Baan</last><affiliation>University of Amsterdam</affiliation></author>
-      <author><first>Raquel</first><last>Fernández</last><affiliation>University of Amsterdam and University of Amsterdam</affiliation></author>
-      <author><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München and IT University of Copenhagen</affiliation></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last><affiliation>University of Amsterdam and University of Amsterdam</affiliation></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München and IT University of Copenhagen</affiliation></author>
       <author><first>Wilker</first><last>Aziz</last><affiliation>University of Amsterdam</affiliation></author>
       <pages>268-277</pages>
       <abstract>With the rise of increasingly powerful and user-facing NLP systems, there is growing interest in assessing whether they have a good _representation of uncertainty_ by evaluating the quality of their predictive distribution over outcomes. We identify two main perspectives that drive starkly different evaluation protocols. The first treats predictive probability as an indication of model confidence; the second as an indication of human label variation. We discuss their merits and limitations, and take the position that both are crucial for trustworthy and fair NLP systems, but that exploiting a single predictive distribution is limiting. We recommend tools and highlight exciting directions towards models with disentangled representations of uncertainty about predictions and uncertainty about human labels.</abstract>
@@ -2890,7 +2890,7 @@
       <title>Leveraging <fixed-case>C</fixed-case>hat<fixed-case>GPT</fixed-case> in Pharmacovigilance Event Extraction: An Empirical Study</title>
       <author><first>Zhaoyue</first><last>Sun</last><affiliation>University of Warwick</affiliation></author>
       <author><first>Gabriele</first><last>Pergola</last></author>
-      <author><first>Byron</first><last>Wallace</last><affiliation>Northeastern University, Brown University and Northeastern University</affiliation></author>
+      <author id="byron-c-wallace"><first>Byron</first><last>Wallace</last><affiliation>Northeastern University, Brown University and Northeastern University</affiliation></author>
       <author><first>Yulan</first><last>He</last><affiliation>King’s College London, University of London</affiliation></author>
       <pages>344-357</pages>
       <abstract>With the advent of large language models (LLMs), there has been growing interest in exploring their potential for medical applications. This research aims to investigate the ability of LLMs, specifically ChatGPT, in the context of pharmacovigilance event extraction, of which the main goal is to identify and extract adverse events or potential therapeutic events from textual medical sources. We conduct extensive experiments to assess the performance of ChatGPT in the pharmacovigilance event extraction task, employing various prompts and demonstration selection strategies. The findings demonstrate that while ChatGPT demonstrates reasonable performance with appropriate demonstration selection strategies, it still falls short compared to fully fine-tuned small models. Additionally, we explore the potential of leveraging ChatGPT for data augmentation. However, our investigation reveals that the inclusion of synthesized data into fine-tuning may lead to a decrease in performance, possibly attributed to noise in the ChatGPT-generated labels. To mitigate this, we explore different filtering strategies and find that, with the proper approach, more stable performance can be achieved, although constant improvement remains elusive.</abstract>
@@ -2918,7 +2918,7 @@
       <author><first>Boonnithi</first><last>Jiaramaneepinit</last></author>
       <author><first>Thodsaporn</first><last>Chay-intr</last></author>
       <author><first>Kotaro</first><last>Funakoshi</last><affiliation>Institute of Innovative Research, Tokyo Institute of Technology</affiliation></author>
-      <author><first>Manabu</first><last>Okumura</last><affiliation>Tokyo Institute of Technology, Tokyo Institute of Technology</affiliation></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last><affiliation>Tokyo Institute of Technology, Tokyo Institute of Technology</affiliation></author>
       <pages>368-379</pages>
       <abstract>Although fine-tuning a pre-trained model with a conventional approach has shown to be effective in various downstream tasks, previous work has used only backpropagation to fine-tune the model, which causes a massive amount of computational resources and time. We propose Extreme Fine-Tuning (EFT), a novel approach for fine-tuning a pre-trained model effectively and efficiently. EFT uses backpropagation for a brief fine-tuning and an iterative extreme learning machine for training a classifier. We applied EFT to four text classification datasets, MELD, IEMOCAP, IMDb, and AG News, and compared its performance with state-of-the-art (SOTA) approaches. The results indicate that EFT noticeably outperformed the other approaches in training-time measurement with comparable model performance. We will release our code at https://github.com/up-33/extreme-fine-tuning.</abstract>
       <url hash="d7ff3f30">2024.eacl-short.32</url>
@@ -2989,7 +2989,7 @@
       <author><first>Pavan</first><last>Tankala</last></author>
       <author><first>Preethi</first><last>Jyothi</last><affiliation>Indian Institute of Technology Bombay</affiliation></author>
       <author><first>Preeti</first><last>Rao</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
       <pages>426-431</pages>
       <abstract>We present a new Hindi text-to-speech (TTS) dataset and demonstrate its utility for the expressive synthesis of children’s audio stories. The dataset comprises narration by a single female speaker who modifies her voice to produce different story characters. Annotation for dialogue identification, character labelling, and character attribution are provided, all of which are expected to facilitate the learning of character voice and speaking styles. Experiments are conducted using different versions of the annotated dataset that enable training a multi-speaker TTS model on the single-speaker data. Subjective tests show that the multi-speaker model improves expressiveness and character voice consistency compared to the baseline single-speaker TTS. With the multi-speaker model, objective evaluations show comparable word error rates, better speaker voice consistency, and higher correlations with ground-truth emotion attributes. We release a new 16.8 hours storytelling speech dataset in Hindi and propose effective solutions for expressive TTS with narrator voice modulation and character voice consistency.</abstract>
       <url hash="21284d0b">2024.eacl-short.37</url>
@@ -3033,7 +3033,7 @@
       <author><first>Matthew</first><last>Matero</last></author>
       <author><first>Salvatore</first><last>Giorgi</last></author>
       <author><first>Vivek</first><last>Kulkarni</last></author>
-      <author><first>H. Andrew</first><last>Schwartz</last><affiliation>Stony Brook University (SUNY)</affiliation></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last><affiliation>Stony Brook University (SUNY)</affiliation></author>
       <pages>454-468</pages>
       <abstract>Social science NLP tasks, such as emotion or humor detection, are required to capture the semantics along with the implicit pragmatics from text, often with limited amounts of training data. Instruction tuning has been shown to improve the many capabilities of large language models (LLMs) such as commonsense reasoning, reading comprehension, and computer programming. However, little is known about the effectiveness of instruction tuning on the social domain where implicit pragmatic cues are often needed to be captured. We explore the use of instruction tuning for social science NLP tasks and introduce Socialite-Llama — an open-source, instruction-tuned Llama. On a suite of 20 social science tasks, Socialite-Llama improves upon the performance of Llama as well as matches or improves upon the performance of a state-of-the-art, multi-task finetuned model on a majority of them. Further, Socialite-Llama also leads to improvement on 5 out of 6 related social tasks as compared to Llama, suggesting instruction tuning can lead to generalized social understanding. All resources including our code, model and dataset can be found through [bit.ly/socialitellama](https://bit.ly/socialitellama/).</abstract>
       <url hash="91eb1d08">2024.eacl-short.40</url>
@@ -3073,7 +3073,7 @@
     <meta>
       <booktitle>Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics: System Demonstrations</booktitle>
       <editor><first>Nikolaos</first><last>Aletras</last></editor>
-      <editor><first>Orphee</first><last>De Clercq</last></editor>
+      <editor id="orphee-de-clercq"><first>Orphee</first><last>De Clercq</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>St. Julians, Malta</address>
       <month>March</month>
@@ -3094,7 +3094,7 @@
       <author><first>Marie-Noelle</first><last>Bessagnet</last><affiliation>LIUPPA, E2S, University of Pau and Pays Adour (UPPA)</affiliation></author>
       <author><first>Annig</first><last>Le Parc Lacayrelle</last><affiliation>LIUPPA, E2S, University of Pau and Pays Adour (UPPA)</affiliation></author>
       <author><first>Philippe</first><last>Roose</last><affiliation>LIUPPA, E2S, University of Pau and Pays Adour (UPPA)</affiliation></author>
-      <author><first>Rodrigo</first><last>Agerri</last><affiliation>HiTZ Center-Ixa, University of the Basque Country UPV/EHU</affiliation></author>
+      <author id="rodrigo-agerri"><first>Rodrigo</first><last>Agerri</last><affiliation>HiTZ Center-Ixa, University of the Basque Country UPV/EHU</affiliation></author>
       <pages>1-9</pages>
       <abstract>In this paper we introduce TextBI, a multimodal generic dashboard designed to present multidimensional text annotations on large volumes of multilingual social media data. This tool focuses on four core dimensions: spatial, temporal, thematic, and personal, and also supports additional enrichment data such as sentiment and engagement. Multiple visualization modes are offered, including frequency, movement, and association. This dashboard addresses the challenge of facilitating the interpretation of NLP annotations by visualizing them in a user-friendly, interactive interface catering to two categories of users: (1) domain stakeholders and (2) NLP researchers. We conducted experiments within the domain of tourism leveraging data from X (formerly Twitter) and incorporating requirements from tourism offices. Our approach, TextBI, represents a significant advancement in the field of visualizing NLP annotations by integrating and blending features from a variety of Business Intelligence, Geographical Information Systems and NLP tools. A demonstration video is also provided https://youtu.be/x714RKvo9Cg</abstract>
       <url hash="df77c7ee">2024.eacl-demo.1</url>
@@ -3110,7 +3110,7 @@
       <author><first>Shujian</first><last>Huang</last><affiliation>National Key Laboratory for Novel Software Technology, Nanjing University</affiliation></author>
       <author><first>Siheng</first><last>Zhao</last><affiliation>Nanjing University</affiliation></author>
       <author><first>Sizhe</first><last>Liu</last><affiliation>Nanjing University</affiliation></author>
-      <author><first>Jiajun</first><last>Chen</last><affiliation>Nanjing University</affiliation></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last><affiliation>Nanjing University</affiliation></author>
       <pages>10-17</pages>
       <abstract>Augmenting the base neural model with a token-level symbolic datastore is a novel generation paradigm and has achieved promising results in machine translation (MT). In this paper, we introduce a unified framework kNN-BOX, which enables quick development and visualization for this novel paradigm. kNN-BOX decomposes the datastore-augmentation approach into three modules: datastore, retriever and combiner, thus putting diverse kNN generation methods into a unified way. Currently, kNN-BOX has provided implementation of seven popular kNN-MT variants, covering research from performance enhancement to efficiency optimization. It is easy for users to reproduce these existing work or customize their own models. Besides, users can interact with their kNN generation systems with kNN-BOX to better understand the underlying inference process in a visualized way. In experiment section, we apply kNN-BOX for machine translation and three other seq2seq generation tasks (text simplification, paraphrase generation and question generation). Experiment results show that augmenting the base neural model with kNN-BOX can bring large performance improvement in all these tasks. The code and document of kNN-BOX is available at https://github.com/NJUNLP/knn-box. The demo can be accessed at http://nlp.nju.edu.cn/demo/knn-box/. The introduction video is available at https://www.youtube.com/watch?v=m0eJldHVR3w.</abstract>
       <url hash="b981b9a2">2024.eacl-demo.2</url>
@@ -3135,7 +3135,7 @@
       <author><first>Akul</first><last>Singh</last><affiliation>Florida International University</affiliation></author>
       <author><first>Jared</first><last>Hummer</last><affiliation>Florida International University</affiliation></author>
       <author><first>Mustafa</first><last>Ocal</last><affiliation>Florida International University</affiliation></author>
-      <author><first>Mark</first><last>Finlayson</last><affiliation>FIU</affiliation></author>
+      <author id="mark-finlayson"><first>Mark</first><last>Finlayson</last><affiliation>FIU</affiliation></author>
       <pages>27-34</pages>
       <abstract>pyTLEX is an implementation of the TimeLine EXtraction algorithm (TLEX; Finlayson et al.,2021) that enables users to work with TimeML annotations and perform advanced temporal analysis, offering a comprehensive suite of features. TimeML is a standardized markup language for temporal information in text. pyTLEX allows users to parse TimeML annotations, construct TimeML graphs, and execute the TLEX algorithm to effect complete timeline extraction. In contrast to previous implementations (i.e., jTLEX for Java), pyTLEX sets itself apart with a range of advanced features. It introduces a React-based visualization system, enhancing the exploration of temporal data and the comprehension of temporal connections within textual information. Furthermore, pyTLEX incorporates an algorithm for increasing connectivity in temporal graphs, which identifies graph disconnectivity and recommends links based on temporal reasoning, thus enhancing the coherence of the graph representation. Additionally, pyTLEX includes a built-in validation algorithm, ensuring compliance with TimeML annotation guidelines, which is essential for maintaining data quality and reliability. pyTLEX equips researchers and developers with an extensive toolkit for temporal analysis, and its testing across various datasets validates its accuracy and reliability.</abstract>
       <url hash="f73e10d4">2024.eacl-demo.4</url>
@@ -3279,7 +3279,7 @@
       <author><first>Niki Andreas</first><last>Loppi</last><affiliation>NVIDIA</affiliation></author>
       <author><first>Alessandro</first><last>Raganato</last><affiliation>University of Milano-Bicocca</affiliation></author>
       <author><first>Raúl</first><last>Vázquez</last><affiliation>University of Helsinki</affiliation></author>
-      <author><first>Jörg</first><last>Tiedemann</last><affiliation>University of Helsinki</affiliation></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last><affiliation>University of Helsinki</affiliation></author>
       <pages>127-136</pages>
       <abstract>NLP in the age of monolithic large language models is approaching its limits in terms of size and information that can be handled. The trend goes to modularization, a necessary step into the direction of designing smaller sub-networks and components with specialized functionality. In this paper, we present the MAMMOTH toolkit: a framework designed for training massively multilingual modular machine translation systems at scale, initially derived from OpenNMT-py and then adapted to ensure efficient training across computation clusters.We showcase its efficiency across clusters of A100 and V100 NVIDIA GPUs, and discuss our design philosophy and plans for future information.The toolkit is publicly available online at https://github.com/Helsinki-NLP/mammoth.</abstract>
       <url hash="f7dfc576">2024.eacl-demo.14</url>
@@ -3296,7 +3296,7 @@
       <author><first>Tuo</first><last>Zhang</last><affiliation>University of Stuttgart</affiliation></author>
       <author><first>Nina</first><last>Tahmasebi</last><affiliation>University of Gothenburg</affiliation></author>
       <author><first>Jonas</first><last>Kuhn</last><affiliation>University of Stuttgart</affiliation></author>
-      <author><first>Sabine</first><last>Schulte Im Walde</last><affiliation>University of Stuttgart</affiliation></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte Im Walde</last><affiliation>University of Stuttgart</affiliation></author>
       <pages>137-149</pages>
       <abstract>We present the DURel tool implementing the annotation of semantic proximity between word uses into an online, open source interface. The tool supports standardized human annotation as well as computational annotation, building on recent advances with Word-in-Context models. Annotator judgments are clustered with automatic graph clustering techniques and visualized for analysis. This allows to measure word senses with simple and intuitive micro-task judgments between use pairs, requiring minimal preparation efforts. The tool offers additional functionalities to compare the agreement between annotators to guarantee the inter-subjectivity of the obtained judgments and to calculate summary statistics over the annotated data giving insights into sense frequency distributions, semantic variation or changes of senses over time.</abstract>
       <url hash="a47730f3">2024.eacl-demo.15</url>
@@ -3308,7 +3308,7 @@
       <title><fixed-case>RAGA</fixed-case>s: Automated Evaluation of Retrieval Augmented Generation</title>
       <author><first>Shahul</first><last>Es</last><affiliation>Exploding Gradients</affiliation></author>
       <author><first>Jithin</first><last>James</last><affiliation>Exploding Gradients</affiliation></author>
-      <author><first>Luis</first><last>Espinosa Anke</last><affiliation>Cardiff University</affiliation></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa Anke</last><affiliation>Cardiff University</affiliation></author>
       <author><first>Steven</first><last>Schockaert</last><affiliation>Cardiff University</affiliation></author>
       <pages>150-158</pages>
       <abstract>We introduce RAGAs (Retrieval Augmented Generation Assessment), a framework for reference-free evaluation of Retrieval Augmented Generation (RAG) pipelines. RAGAs is available at [https://github.com/explodinggradients/ragas]. RAG systems are composed of a retrieval and an LLM based generation module. They provide LLMs with knowledge from a reference textual database, enabling them to act as a natural language layer between a user and textual databases, thus reducing the risk of hallucinations. Evaluating RAG architectures is challenging due to several dimensions to consider: the ability of the retrieval system to identify relevant and focused context passages, the ability of the LLM to exploit such passages faithfully, and the quality of the generation itself. With RAGAs, we introduce a suite of metrics that can evaluate these different dimensions without relying on ground truth human annotations. We posit that such a framework can contribute crucially to faster evaluation cycles of RAG architectures, which is especially important given the fast adoption of LLMs.</abstract>
@@ -3335,7 +3335,7 @@
       <author><first>Kushan</first><last>Mitra</last><affiliation>Megagon Labs</affiliation></author>
       <author><first>Rafael</first><last>Li Chen</last><affiliation>Megagon Labs</affiliation></author>
       <author><first>Sajjadur</first><last>Rahman</last><affiliation>Megagon Labs</affiliation></author>
-      <author id="dan-zhang"><first>Dan</first><last>Zhang</last><affiliation>Megagon Labs</affiliation></author>
+      <author><first>Dan</first><last>Zhang</last><affiliation>Megagon Labs</affiliation></author>
       <pages>168-176</pages>
       <abstract>Large language models (LLMs) can label data faster and cheaper than humans for various NLP tasks. Despite their prowess, LLMs may fall short in understanding of complex, sociocultural, or domain-specific context, potentially leading to incorrect annotations. Therefore, we advocate a collaborative approach where humans and LLMs work together to produce reliable and high-quality labels. We present MEGAnno+, a human-LLM collaborative annotation system that offers effective LLM agent and annotation management, convenient and robust LLM annotation, and exploratory verification of LLM labels by humans.</abstract>
       <url hash="629984d8">2024.eacl-demo.18</url>
@@ -3346,8 +3346,8 @@
       <title><fixed-case>X</fixed-case>-<fixed-case>AMR</fixed-case> Annotation Tool</title>
       <author><first>Shafiuddin Rehan</first><last>Ahmed</last><affiliation>University of Colorado Boulder</affiliation></author>
       <author><first>Jon</first><last>Cai</last><affiliation>The University of Colorado</affiliation></author>
-      <author><first>Martha</first><last>Palmer</last><affiliation>University of Colorado</affiliation></author>
-      <author><first>James H.</first><last>Martin</last><affiliation>University of Colorado Boulder</affiliation></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last><affiliation>University of Colorado</affiliation></author>
+      <author id="james-h-martin"><first>James H.</first><last>Martin</last><affiliation>University of Colorado Boulder</affiliation></author>
       <pages>177-186</pages>
       <abstract>This paper presents a novel Cross-document Abstract Meaning Representation (X-AMR) annotation tool designed for annotating key corpus-level event semantics. Leveraging machine assistance through the Prodigy Annotation Tool, we enhance the user experience, ensuring ease and efficiency in the annotation process. Through empirical analyses, we demonstrate the effectiveness of our tool in augmenting an existing event corpus, highlighting its advantages when integrated with GPT-4. Code and annotations: href{https://anonymous.4open.science/r/xamr-9ED0}{anonymous.4open.science/r/xamr-9ED0} footnote Demo: {href{https://youtu.be/TuirftxciNE}{https://youtu.be/TuirftxciNE}} footnote Live Link: {href{https://tinyurl.com/mrxmafwh}{https://tinyurl.com/mrxmafwh}}</abstract>
       <url hash="f09896a9">2024.eacl-demo.19</url>
@@ -3372,7 +3372,7 @@
     <paper id="21">
       <title><fixed-case>TL</fixed-case>;<fixed-case>DR</fixed-case> Progress: Multi-faceted Literature Exploration in Text Summarization</title>
       <author><first>Shahbaz</first><last>Syed</last><affiliation>Leipzig University</affiliation></author>
-      <author><first>Khalid</first><last>Al Khatib</last><affiliation>Groningen University</affiliation></author>
+      <author id="khalid-al-khatib"><first>Khalid</first><last>Al Khatib</last><affiliation>Groningen University</affiliation></author>
       <author><first>Martin</first><last>Potthast</last><affiliation>Leipzig University</affiliation></author>
       <pages>195-206</pages>
       <abstract>This paper presents TL;DR Progress, a new tool for exploring the literature on neural text summarization. It organizes 514~papers based on a comprehensive annotation scheme for text summarization approaches and enables fine-grained, faceted search. Each paper was manually annotated to capture aspects such as evaluation metrics, quality dimensions, learning paradigms, challenges addressed, datasets, and document domains. In addition, a succinct indicative summary is provided for each paper, describing contextual factors, issues, and proposed solutions. The tool is available at {url{https://www.tldr-progress.de}}, a demo video at {url{https://youtu.be/uCVRGFvXUj8}}</abstract>
@@ -3400,7 +3400,7 @@
       <author><first>Jakub</first><last>Piskorski</last></author>
       <author><first>Nicolas</first><last>Stefanovitch</last></author>
       <author><first>Giovanni</first><last>Da San Martino</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>207-213</pages>
       <abstract>The abundance of news sources and the urgent demand for reliable information have led to serious concerns about the threat of misleading information. In this paper, we present FRAPPE, a FRAming, Persuasion, and Propaganda Explorer system. FRAPPE goes beyond conventional news analysis of articles and unveils the intricate linguistic techniques used to shape readers’ opinions and emotions. Our system allows users not only to analyze individual articles for their genre, framings, and use of persuasion techniques, but also to draw comparisons between the strategies of persuasion and framing adopted by a diverse pool of news outlets and countries across multiple languages for different topics, thus providing a comprehensive understanding of how information is presented and manipulated. FRAPPE is publicly accessible at https://frappe.streamlit.app/ and a video explaining our system is available at https://www.youtube.com/watch?v=3RlTfSVnZmk</abstract>
       <url hash="e66c3473">2024.eacl-demo.22</url>
@@ -3608,7 +3608,7 @@
       <author><first>Eftekhar</first><last>Hossain</last></author>
       <author><first>Omar</first><last>Sharif</last><affiliation>Dartmouth College</affiliation></author>
       <author><first>Mohammed Moshiul</first><last>Hoque</last><affiliation>Chittagong University of Engineering and Technology</affiliation></author>
-      <author><first>Sarah Masud</first><last>Preum</last><affiliation>Dartmouth College</affiliation></author>
+      <author id="sarah-masud-preum"><first>Sarah Masud</first><last>Preum</last><affiliation>Dartmouth College</affiliation></author>
       <pages>162-174</pages>
       <abstract>Multimodal hateful content detection is a challenging task that requires complex reasoning across visual and textual modalities. Therefore, creating a meaningful multimodal representation that effectively captures the interplay between visual and textual features through intermediate fusion is critical. Conventional fusion techniques are unable to attend to the modality-specific features effectively. Moreover, most studies exclusively concentrated on English and overlooked other low-resource languages. This paper proposes a context-aware attention framework for multimodal hateful content detection and assesses it for both English and non-English languages. The proposed approach incorporates an attention layer to meaningfully align the visual and textual features. This alignment enables selective focus on modality-specific features before fusing them. We evaluate the proposed approach on two benchmark hateful meme datasets, viz. MUTE (Bengali code-mixed) and MultiOFF (English). Evaluation results demonstrate our proposed approach’s effectiveness with F1-scores of 69.7% and 70.3% for the MUTE and MultiOFF datasets. The scores show approximately 2.5% and 3.2% performance improvement over the state-of-the-art systems on these datasets. Our implementation is available at https://github.com/eftekhar-hossain/Bengali-Hateful-Memes.</abstract>
       <url hash="6461034d">2024.eacl-srw.12</url>
@@ -3691,7 +3691,7 @@
       <author><first>Juhyun</first><last>Oh</last><affiliation>Korea Advanced Institute of Science &amp; Technology</affiliation></author>
       <author><first>Eunsu</first><last>Kim</last></author>
       <author><first>Inha</first><last>Cha</last><affiliation>Upstage AI Research</affiliation></author>
-      <author><first>Alice</first><last>Oh</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
       <pages>248-257</pages>
       <abstract>This paper explores the assumption that Large Language Models (LLMs) skilled in generation tasks are equally adept as evaluators. We assess the performance of three LLMs and one open-source LM in Question-Answering (QA) and evaluation tasks using the TriviaQA (Joshi et al., 2017) dataset. Results indicate a significant disparity, with LLMs exhibiting lower performance in evaluation tasks compared to generation tasks. Intriguingly, we discover instances of unfaithful evaluation where models accurately evaluate answers in areas where they lack competence, underscoring the need to examine the faithfulness and trustworthiness of LLMs as evaluators. This study contributes to the understanding of “the Generative AI Paradox” (West et al., 2023), highlighting a need to explore the correlation between generative excellence and evaluation proficiency, and the necessity to scrutinize the faithfulness aspect in model evaluations.</abstract>
       <url hash="2704af54">2024.eacl-srw.19</url>
@@ -3821,7 +3821,7 @@
       <title>Dynamic Task-Oriented Dialogue: A Comparative Study of Llama-2 and Bert in Slot Value Generation</title>
       <author><first>Tiziano</first><last>Labruna</last></author>
       <author><first>Sofia</first><last>Brenna</last><affiliation>Free University of Bozen</affiliation></author>
-      <author><first>Bernardo</first><last>Magnini</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
       <pages>358-368</pages>
       <abstract>Recent advancements in instruction-based language models have demonstrated exceptional performance across various natural language processing tasks. We present a comprehensive analysis of the performance of two open-source language models, BERT and Llama-2, in the context of dynamic task-oriented dialogues. Focusing on the Restaurant domain and utilizing the MultiWOZ 2.4 dataset, our investigation centers on the models’ ability to generate predictions for masked slot values within text. The dynamic aspect is introduced through simulated domain changes, mirroring real-world scenarios where new slot values are incrementally added to a domain over time.This study contributes to the understanding of instruction-based models’ effectiveness in dynamic natural language understanding tasks when compared to traditional language models and emphasizes the significance of open-source, reproducible models in advancing research within the academic community.</abstract>
       <url hash="e6a5f347">2024.eacl-srw.29</url>
@@ -3866,7 +3866,7 @@
     </paper>
     <paper id="2">
       <title>Item Response Theory for Natural Language Processing</title>
-      <author><first>John P.</first><last>Lalor</last></author>
+      <author id="john-p-lalor"><first>John P.</first><last>Lalor</last></author>
       <author><first>Pedro</first><last>Rodriguez</last></author>
       <author><first>João</first><last>Sedoc</last></author>
       <author><first>Jose</first><last>Hernandez-Orallo</last></author>
diff --git a/data/xml/2024.eamt.xml b/data/xml/2024.eamt.xml
index 04f570d3dc..01c09ccf87 100644
--- a/data/xml/2024.eamt.xml
+++ b/data/xml/2024.eamt.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2024-09-22" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 25th Annual Conference of the European Association for Machine Translation (Volume 1)</booktitle>
-      <editor><first>Carolina</first><last>Scarton</last></editor>
+      <editor id="carolina-scarton"><first>Carolina</first><last>Scarton</last></editor>
       <editor><first>Charlotte</first><last>Prescott</last></editor>
       <editor><first>Chris</first><last>Bayliss</last></editor>
       <editor><first>Chris</first><last>Oakley</last></editor>
@@ -210,7 +210,7 @@
       <author><first>Penumalla</first><last>Aditya Pavani</last><affiliation>NA</affiliation></author>
       <author><first>Kukkapalli</first><last>Shravya</last><affiliation>NA</affiliation></author>
       <author><first>Parameswari</first><last>Krishnamurthy</last><affiliation>NA</affiliation></author>
-      <author><first>Dipti</first><last>Sharma</last><affiliation>IIIT Hyderabad</affiliation></author>
+      <author id="dipti-misra-sharma"><first>Dipti</first><last>Sharma</last><affiliation>IIIT Hyderabad</affiliation></author>
       <pages>207-228</pages>
       <abstract>Generative Large Language Models (LLMs) have achieved remarkable advances in various NLP tasks. In this work, our aim is to explore the multilingual capabilities of large language models by using machine translation as a task involving English and 22 Indian languages. We first investigate the translation capabilities of raw large-language models, followed by exploring the in-context learning capabilities of the same raw models. We fine-tune these large language models using parameter-efficient fine-tuning methods such as LoRA and additionally with full fine-tuning. Through our study, we have identified the model that performs best among the large language models available for the translation task.Our results demonstrate significant progress, with average BLEU scores of 13.42, 15.93, 12.13, 12.30, and 12.07, as well as chrF scores of 43.98, 46.99, 42.55, 42.42, and 45.39, respectively, using two-stage fine-tuned LLaMA-13b for English to Indian languages on IN22 (conversational), IN22 (general), flores200-dev, flores200-devtest, and newstest2019 testsets. Similarly, for Indian languages to English, we achieved average BLEU scores of 14.03, 16.65, 16.17, 15.35 and 12.55 along with chrF scores of 36.71, 40.44, 40.26, 39.51, and 36.20, respectively, using fine-tuned LLaMA-13b on IN22 (conversational), IN22 (general), flores200-dev, flores200-devtest and newstest2019 testsets. Overall, our findings highlight the potential and strength of large language models for machine translation capabilities, including languages that are currently underrepresented in LLMs.</abstract>
       <url hash="94e60415">2024.eamt-1.19</url>
@@ -233,7 +233,7 @@
       <author><first>Baban</first><last>Gain</last><affiliation>Indian Institute of Technology, Patna</affiliation></author>
       <author><first>Santanu</first><last>Pal</last><affiliation>Wipro</affiliation></author>
       <author><first>Asif</first><last>Ekbal</last><affiliation>IIT Patna</affiliation></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
       <pages>246-257</pages>
       <abstract>In document-level neural machine translation (DocNMT), multi-encoder approaches are common in encoding context and source sentences. Recent studies (CITATION) have shown that the context encoder generates noise and makes the model robust to the choice of context. This paper further investigates this observation by explicitly modelling context encoding through multi-task learning (MTL) to make the model sensitive to the choice of context. We conduct experiments on cascade MTL architecture, which consists of one encoder and two decoders. Generation of the source from the context is considered an auxiliary task, and generation of the target from the source is the main task. We experimented with German–English language pairs on News, TED, and Europarl corpora. Evaluation results show that the proposed MTL approach performs better than concatenation-based and multi-encoder DocNMT models in low-resource settings and is sensitive to the choice of context. However, we observe that the MTL models are failing to generate the source from the context. These observations align with the previous studies, and this might suggest that the available document-level parallel corpora are not context-aware, and a robust sentence-level model can outperform the context-aware models.</abstract>
       <url hash="494f6c85">2024.eamt-1.21</url>
@@ -244,7 +244,7 @@
       <author><first>Miguel</first><last>Ramos</last></author>
       <author><first>Patrick</first><last>Fernandes</last></author>
       <author><first>António</first><last>Farinhas</last><affiliation>Instituto Superior Técnico</affiliation></author>
-      <author><first>Andre</first><last>Martins</last><affiliation>Instituto Superior Técnico and Unbabel</affiliation></author>
+      <author id="andre-f-t-martins"><first>Andre</first><last>Martins</last><affiliation>Instituto Superior Técnico and Unbabel</affiliation></author>
       <pages>258-274</pages>
       <abstract>Reinforcement learning from human feedback (RLHF) is a recent technique to improve the quality of the text generated by a language model, making it closer to what humans would generate.A core ingredient in RLHF’s success in aligning and improving large language models (LLMs) is its <tex-math>\textit{reward model}</tex-math>, trained using human feedback on model outputs. In machine translation (MT), where metrics trained from human annotations can readily be used as reward models, recent methods using <tex-math>\textit{minimum Bayes risk}</tex-math> decoding and reranking have succeeded in improving the final quality of translation.In this study, we comprehensively explore and compare techniques for integrating quality metrics as reward models into the MT pipeline. This includes using the reward model for data filtering, during the training phase through RL, and at inference time by employing reranking techniques, and we assess the effects of combining these in a unified approach.Our experimental results, conducted across multiple translation tasks, underscore the crucial role of effective data filtering, based on estimated quality, in harnessing the full potential of RL in enhancing MT quality.Furthermore, our findings demonstrate the effectiveness of combining RL training with reranking techniques, showcasing substantial improvements in translation quality.</abstract>
       <url hash="530ff16f">2024.eamt-1.22</url>
@@ -254,7 +254,7 @@
       <title>Enhancing Scientific Discourse: Machine Translation for the Scientific Domain</title>
       <author><first>Dimitris</first><last>Roussis</last><affiliation>ILSP - “Athena” Research Center</affiliation></author>
       <author><first>Sokratis</first><last>Sofianopoulos</last><affiliation>ILSP - “Athena” Research Center</affiliation></author>
-      <author><first>Stelios</first><last>Piperidis</last></author>
+      <author id="stelios-piperidis"><first>Stelios</first><last>Piperidis</last></author>
       <pages>275-285</pages>
       <abstract>The increasing volume of scientific research necessitates effective communication across language barriers. Machine translation (MT) offers a promising solution for accessing international publications. However, the scientific domain presents unique challenges due to its specialized vocabulary and complex sentence structures. In this paper, we present the development of a collection of parallel and monolingual corpora from the scientific domain. The corpora target the language pairs Spanish-English, French-English, and Portuguese-English. For each language pair, we create a large general scientific corpus as well as four smaller corpora focused on the research domains of: Energy Research, Neuroscience, Cancer and Transportation. To evaluate the quality of these corpora, we utilize them for fine-tuning general-purpose neural machine translation (NMT) systems. We provide details regarding the corpus creation process, the fine-tuning strategies employed, and we conclude with the evaluation results.</abstract>
       <url hash="b37bce1a">2024.eamt-1.23</url>
@@ -275,7 +275,7 @@
       <title>Enhancing Gender-Inclusive Machine Translation with Neomorphemes and Large Language Models</title>
       <author><first>Andrea</first><last>Piergentili</last><affiliation>University of Trento and Fondazione Bruno Kessler</affiliation></author>
       <author><first>Beatrice</first><last>Savoldi</last></author>
-      <author><first>Matteo</first><last>Negri</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
       <author><first>Luisa</first><last>Bentivogli</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
       <pages>300-314</pages>
       <abstract>Machine translation (MT) models are known to suffer from gender bias, especially when translating into languages with extensive gendered morphology. Accordingly, they still fall short in using gender-inclusive language, also representative of non-binary identities. In this paper, we look at gender-inclusive neomorphemes, neologistic elements that avoid binary gender markings as an approach towards fairer MT. In this direction, we explore prompting techniques with large language models (LLMs) to translate from English into Italian using neomorphemes. So far, this area has been under-explored due to its novelty and the lack of publicly available evaluation resources. We fill this gap by releasing NEO-GATE, a resource designed to evaluate gender-inclusive en→it translation with neomorphemes. With NEO-GATE, we assess four LLMs of different families and sizes and different prompt formats, identifying strengths and weaknesses of each on this novel task for MT.</abstract>
@@ -313,7 +313,7 @@
       <author><first>Lifeng</first><last>Han</last></author>
       <author><first>Gleb</first><last>Erofeev</last></author>
       <author><first>Irina</first><last>Sorokina</last></author>
-      <author><first>Goran</first><last>Nenadic</last><affiliation>University of Manchester</affiliation></author>
+      <author id="goran-nenadic"><first>Goran</first><last>Nenadic</last><affiliation>University of Manchester</affiliation></author>
       <pages>337-346</pages>
       <abstract>Translation Quality Evaluation (TQE) is an essential step of the modern translation production process. TQE is critical in assessing both machine translation (MT) and human translation (HT) quality without reference translations. The ability to evaluate or even simply estimate the quality of translation automatically may open significant efficiency gains through process optimisation.This work examines whether the state-of-the-art large language models (LLMs) can be used for this uncertainty estimation of MT output quality. We take OpenAI models as an example technology and approach TQE as a binary classification task.On <b>eight language pairs</b> including English to Italian, German, French, Japanese, Dutch, Portuguese, Turkish, and Chinese, our experimental results show that fine-tuned <b>
           <i>gpt3.5</i></b> can demonstrate good performance on translation quality prediction tasks, i.e. <i>whether the translation needs to be edited</i>.Another finding is that simply increasing the sizes of LLMs does not lead to apparent better performances on this task by comparing the performance of three different versions of OpenAI models: <b>
@@ -359,7 +359,7 @@
     <paper id="33">
       <title>Post-editors as Gatekeepers of Lexical and Syntactic Diversity: Comparative Analysis of Human Translation and Post-editing in Professional Settings</title>
       <author><first>Lise</first><last>Volkart</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last><affiliation>University of Geneva</affiliation></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last><affiliation>University of Geneva</affiliation></author>
       <pages>387-395</pages>
       <abstract>This paper presents a comparative analysis between human translation (HT) and post-edited machine translation (PEMT) from a lexical and syntactic perspective to verify whether the tendency of neural machine translation (NMT) systems to produce lexically and syntactically poorer translations shines through after post-editing (PE). The analysis focuses on three datasets collected in professional contexts containing translations from English into French and German into French. Through a comparison of word translation entropy (HTRa) scores, we observe a lower degree of lexical diversity in PEMT compared to HT. Additionally, metrics of syntactic equivalence indicate that PEMT is more likely to mirror the syntactic structure of the source text in contrast to HT. By incorporating raw machine translation (MT) output into our analysis, we underline the important role post-editors play in adding lexical and syntactic diversity to MT output. Our findings provide relevant input for MT users and decision-makers in language services as well as for MT and PE trainers and advisers.</abstract>
       <url hash="520e5904">2024.eamt-1.33</url>
@@ -387,7 +387,7 @@
     <paper id="35">
       <title>Mitigating Translationese with <fixed-case>GPT</fixed-case>-4: Strategies and Performance</title>
       <author><first>Maria</first><last>Kunilovskaya</last><affiliation>Universität des Saarlandes and Tyumen State University</affiliation></author>
-      <author><first>Koel</first><last>Dutta Chowdhury</last></author>
+      <author id="koel-dutta-chowdhury"><first>Koel</first><last>Dutta Chowdhury</last></author>
       <author><first>Heike</first><last>Przybyl</last><affiliation>Universität des Saarlandes</affiliation></author>
       <author><first>Cristina</first><last>España-Bonet</last><affiliation>German Research Center for AI</affiliation></author>
       <author><first>Josef</first><last>Genabith</last><affiliation>German Research Center for AI and Universität des Saarlandes</affiliation></author>
@@ -404,10 +404,10 @@
       <author><first>Éric</first><last>Clergerie</last></author>
       <author><first>Raphaël</first><last>Esamotunu</last><affiliation>NA</affiliation></author>
       <author><first>Mathilde</first><last>Huguin</last><affiliation>NA</affiliation></author>
-      <author><first>Natalie</first><last>Kübler</last><affiliation>NA</affiliation></author>
+      <author id="natalie-kubler"><first>Natalie</first><last>Kübler</last><affiliation>NA</affiliation></author>
       <author><first>Alexandra</first><last>Mestivier</last><affiliation>NA</affiliation></author>
       <author><first>Mona</first><last>Michelot</last><affiliation>NA</affiliation></author>
-      <author><first>Laurent</first><last>Romary</last><affiliation>INRIA</affiliation></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last><affiliation>INRIA</affiliation></author>
       <author><first>Lichao</first><last>Zhu</last><affiliation>Université Paris Cité</affiliation></author>
       <author><first>François</first><last>Yvon</last><affiliation>Université Pierre et Marie Curie - Paris 6, Sorbonne Université - Faculté des Sciences (Paris VI)</affiliation></author>
       <pages>431-443</pages>
@@ -522,7 +522,7 @@
       <title>Training an <fixed-case>NMT</fixed-case> system for legal texts of a low-resource language variety South Tyrolean <fixed-case>G</fixed-case>erman - <fixed-case>I</fixed-case>talian</title>
       <author><first>Antoni</first><last>Oliver</last><affiliation>Universitat Oberta de Catalunya</affiliation></author>
       <author><first>Sergi</first><last>Alvarez-Vidal</last><affiliation>Universitat Pompeu Fabra and Universitat Oberta de Catalunya</affiliation></author>
-      <author><first>Egon</first><last>Stemle</last><affiliation>Masaryk University and Eurac Research</affiliation></author>
+      <author id="egon-stemle"><first>Egon</first><last>Stemle</last><affiliation>Masaryk University and Eurac Research</affiliation></author>
       <author><first>Elena</first><last>Chiocchetti</last><affiliation>NA</affiliation></author>
       <pages>573-579</pages>
       <abstract>This paper illustrates the process of training and evaluating NMT systems for a language pair that includes a low-resource language variety.A parallel corpus of legal texts for Italian and South Tyrolean German has been compiled, with South Tyrolean German being the low-resourced language variety. As the size of the compiled corpus is insufficient for the training, we have combined the corpus with several parallel corpora using data weighting at sentence level. We then performed an evaluation of each combination and of two popular commercial systems.</abstract>
@@ -542,8 +542,8 @@
       <title><fixed-case>CantonMT</fixed-case>: <fixed-case>C</fixed-case>antonese to <fixed-case>E</fixed-case>nglish <fixed-case>NMT</fixed-case> Platform with Fine-Tuned Models using Real and Synthetic Back-Translation Data</title>
       <author><first>Kung</first><last>Hong</last></author>
       <author><first>Lifeng</first><last>Han</last></author>
-      <author><first>Riza</first><last>Batista-Navarro</last><affiliation>University of Manchester</affiliation></author>
-      <author><first>Goran</first><last>Nenadic</last><affiliation>University of Manchester</affiliation></author>
+      <author id="riza-theresa-batista-navarro"><first>Riza</first><last>Batista-Navarro</last><affiliation>University of Manchester</affiliation></author>
+      <author id="goran-nenadic"><first>Goran</first><last>Nenadic</last><affiliation>University of Manchester</affiliation></author>
       <pages>590-599</pages>
       <abstract>Neural Machine Translation (NMT) for low-resource languages remains a challenge for many NLP researchers. In this work, we deploy a standard data augmentation methodology by back-translation to a new language translation direction, i.e., Cantonese-to-English. We present the models we fine-tuned using the limited amount of real data and the synthetic data we generated using back-translation by three models: OpusMT, NLLB, and mBART.We carried out automatic evaluation using a range of different metrics including those that are lexical-based and embedding-based.Furthermore, we create a user-friendly interface for the models we included in this project, CantonMT, and make it available to facilitate Cantonese-to-English MT research. Researchers can add more models to this platform via our open-source CantonMT toolkit, available at <url>https://github.com/kenrickkung/CantoneseTranslation</url>.</abstract>
       <url hash="db0854f2">2024.eamt-1.49</url>
@@ -551,7 +551,7 @@
     </paper>
     <paper id="50">
       <title>Advancing Digital Language Equality in <fixed-case>E</fixed-case>urope: A Market Study and Open-Source Solutions for Multilingual Websites</title>
-      <author><first>Andrejs</first><last>Vasiljevs</last><affiliation>Tilde</affiliation></author>
+      <author id="andrejs-vasiljevs"><first>Andrejs</first><last>Vasiljevs</last><affiliation>Tilde</affiliation></author>
       <author><first>Rinalds</first><last>Vīksna</last><affiliation>University of Latvia</affiliation></author>
       <author><first>Neil</first><last>Vacheva</last><affiliation>NA</affiliation></author>
       <author><first>Andis</first><last>Lagzdiņš</last><affiliation>NA</affiliation></author>
@@ -564,7 +564,7 @@
       <title>Exploring the Effectiveness of <fixed-case>LLM</fixed-case> Domain Adaptation for Business <fixed-case>IT</fixed-case> Machine Translation</title>
       <author><first>Johannes</first><last>Eschbach-Dymanus</last><affiliation>SAP SE and Institute for Computational Linguistics, Heidelberg University, Ruprecht-Karls-Universität Heidelberg</affiliation></author>
       <author><first>Frank</first><last>Essenberger</last><affiliation>SAP SE</affiliation></author>
-      <author><first>Bianka</first><last>Buschbeck</last></author>
+      <author id="bianka-buschbeck"><first>Bianka</first><last>Buschbeck</last></author>
       <author><first>Miriam</first><last>Exel</last><affiliation>SAP SE</affiliation></author>
       <pages>610-622</pages>
       <abstract>In this paper, we study the translation abilities of Large Language Models (LLMs) for business IT texts.We are strongly interested in domain adaptation of translation systems, which is essential for accurate and lexically appropriate translation of such texts.Among the open-source models evaluated in a zero- and few-shot setting, we find Llama-2 13B the most promising for domain-specific translation fine-tuning.We investigate the full range of adaptation techniques for LLMs: from prompting, over parameter-efficient fine-tuning to full fine-tuning, and compare to classic neural machine translation (MT) models trained internally at SAP.We provide guidance how to use training budget most effectively for different fine-tuning approaches.We observe that while LLMs can translate on-par with SAP’s MT models on general domain data, it is difficult to close the gap on SAP’s domain-specific data, even with extensive training and carefully curated data.</abstract>
@@ -573,7 +573,7 @@
     </paper>
     <paper id="52">
       <title>Creating and Evaluating a Multilingual Corpus of <fixed-case>UN</fixed-case> General Assembly Debates</title>
-      <author><first>Hannah</first><last>Bechara</last></author>
+      <author id="hannah-bechara"><first>Hannah</first><last>Bechara</last></author>
       <author><first>Krishnamoorthy</first><last>Manohara</last><affiliation>NA</affiliation></author>
       <author><first>Slava</first><last>Jankin</last><affiliation>Hertie School of Governance</affiliation></author>
       <pages>623-627</pages>
@@ -615,7 +615,7 @@
   <volume id="2" ingest-date="2024-09-22" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 25th Annual Conference of the European Association for Machine Translation (Volume 2)</booktitle>
-      <editor><first>Carolina</first><last>Scarton</last></editor>
+      <editor id="carolina-scarton"><first>Carolina</first><last>Scarton</last></editor>
       <editor><first>Charlotte</first><last>Prescott</last></editor>
       <editor><first>Chris</first><last>Bayliss</last></editor>
       <editor><first>Chris</first><last>Oakley</last></editor>
@@ -623,7 +623,7 @@
       <editor><first>Stuart</first><last>Wrigley</last></editor>
       <editor><first>Xingyi</first><last>Song</last></editor>
       <editor><first>Edward</first><last>Gow-Smith</last></editor>
-      <editor><first>Mikel</first><last>Forcada</last></editor>
+      <editor id="mikel-l-forcada"><first>Mikel</first><last>Forcada</last></editor>
       <editor><first>Helena</first><last>Moniz</last></editor>
       <publisher>European Association for Machine Translation (EAMT)</publisher>
       <address>Sheffield, UK</address>
@@ -661,7 +661,7 @@
       <author><first>Andrés</first><last>Lou</last><affiliation>Universidad de Alicante</affiliation></author>
       <author><first>Cristian</first><last>García-Romero</last><affiliation>Universidad de Alicante</affiliation></author>
       <author><first>Aarón</first><last>Galiano-Jiménez</last><affiliation>Universidad de Alicante</affiliation></author>
-      <author><first>Miquel</first><last>Esplà-Gomis</last><affiliation>Universidad de Alicante</affiliation></author>
+      <author id="miquel-espla-gomis"><first>Miquel</first><last>Esplà-Gomis</last><affiliation>Universidad de Alicante</affiliation></author>
       <pages>4-5</pages>
       <abstract>The LiLowLa (“Lightweight neural translation technologies for low-resource languages”) project aims to enhance machine translation (MT) and translation memory (TM) technologies, particularly for low-resource language pairs, where adequate linguistic resources are scarce. The project started in September 2022 and will run till August 2025.</abstract>
       <url hash="9d2f482b">2024.eamt-2.3</url>
@@ -682,8 +682,8 @@
     </paper>
     <paper id="5">
       <title><fixed-case>S</fixed-case>mart<fixed-case>B</fixed-case>i<fixed-case>C</fixed-case>: Smart Harvesting of Bilingual Corpora from the <fixed-case>I</fixed-case>nternet</title>
-      <author><first>Gema</first><last>Ramírez-Sánchez</last></author>
-      <author><first>Sergio</first><last>Ortiz Rojas</last><affiliation>Prompsit Language Engineering SL</affiliation></author>
+      <author id="gema-ramirez-sanchez"><first>Gema</first><last>Ramírez-Sánchez</last></author>
+      <author id="sergio-ortiz-rojas"><first>Sergio</first><last>Ortiz Rojas</last><affiliation>Prompsit Language Engineering SL</affiliation></author>
       <author><first>Alicia</first><last>Núñez Alcover</last><affiliation>NA</affiliation></author>
       <author><first>Tudor</first><last>Mateiu</last><affiliation>NA</affiliation></author>
       <author><first>Mikel</first><last>Forcada</last></author>
@@ -724,9 +724,9 @@
     </paper>
     <paper id="8">
       <title><fixed-case>MULTILINGTOOL</fixed-case>, Development of an Automatic Multilingual Subtitling and Dubbing System</title>
-      <author><first>Xabier</first><last>Saralegi</last></author>
+      <author id="xabier-saralegi"><first>Xabier</first><last>Saralegi</last></author>
       <author><first>Ander</first><last>Corral</last><affiliation>Orai NLP Technologies</affiliation></author>
-      <author><first>Igor</first><last>Leturia</last><affiliation>NA</affiliation></author>
+      <author id="igor-leturia"><first>Igor</first><last>Leturia</last><affiliation>NA</affiliation></author>
       <author><first>Xabier</first><last>Sarasola</last></author>
       <author><first>Josu</first><last>Murua</last><affiliation>NA</affiliation></author>
       <author><first>Iker</first><last>Manterola</last><affiliation>NA</affiliation></author>
@@ -741,7 +741,7 @@
       <author><first>Jingyuan</first><last>Sun</last></author>
       <author><first>Mingxiao</first><last>Li</last></author>
       <author><first>Ruben</first><last>Cartuyvels</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last><affiliation>KU Leuven, KU Leuven</affiliation></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last><affiliation>KU Leuven, KU Leuven</affiliation></author>
       <pages>16-17</pages>
       <abstract>The CALCULUS project, drawing on human capabilities of imagination and commonsense for natural language understanding (NLU), aims to advance machine-based NLU by integrating traditional AI concepts with contemporary machine learning techniques. It focuses on developing anticipatory event representations from both textual and visual data, connecting language structure to visual spatial organization and incorporating broad knowledge domains. Through testing these models in NLU tasks and evaluating their ability to predict untrained spatial and temporal details using real-world metrics, CALCULUS employs machine learning methods, including Bayesian techniques and neural networks, especially in data-sparse scenarios. The project’s culmination involves creating demonstrators that transform written stories into dynamic videos, showcasing the interdisciplinary expertise of the project leader in natural language processing, language and visual data analysis, information retrieval, and machine learning, all vital for the project’s achievements. In the CALCULUS project, our exploration of machine translation extends beyond the conventional text-to-text framework. We are broadening the horizons of machine translation by delving into the essence of transforming the formats of data distribution while keeping the meaning. This innovative approach involves converting information from one modality into another, transcending traditional linguistic boundaries. Our project includes novel work on translating text into images and videos, brain signals into images and videos.</abstract>
       <url hash="400120d6">2024.eamt-2.9</url>
@@ -758,7 +758,7 @@
     </paper>
     <paper id="11">
       <title><fixed-case>RC</fixed-case>num: A Semantic and Multilingual Online Edition of the Geneva Council Registers from 1545 to 1550</title>
-      <author><first>Pierrette</first><last>Bouillon</last><affiliation>University of Geneva</affiliation></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last><affiliation>University of Geneva</affiliation></author>
       <author><first>Christophe</first><last>Chazalon</last></author>
       <author><first>Sandra</first><last>Coram-Mekkey</last></author>
       <author><first>Gilles</first><last>Falquet</last><affiliation>University of Geneva, Switzerland</affiliation></author>
@@ -766,7 +766,7 @@
       <author><first>Stephane</first><last>Marchand-Maillet</last><affiliation>University of Geneva, Switzerland</affiliation></author>
       <author><first>Laurent</first><last>Moccozet</last><affiliation>University of Genoa</affiliation></author>
       <author><first>Jonathan</first><last>Mutal</last></author>
-      <author><first>Raphael</first><last>Rubino</last><affiliation>University of Geneva</affiliation></author>
+      <author id="raphael-rubino"><first>Raphael</first><last>Rubino</last><affiliation>University of Geneva</affiliation></author>
       <author><first>Marco</first><last>Sorbi</last></author>
       <pages>21-22</pages>
       <abstract>The RCnum project is funded by the Swiss National Science Foundation and aims at producing a multilingual and semantically rich online edition of the Registers of Geneva Council from 1545 to 1550. Combining multilingual NLP, history and paleography, this collaborative project will clear hurdles inherent to texts manually written in 16th century Middle French while allowing for easy access and interactive consultation of these archives.</abstract>
@@ -892,7 +892,7 @@
     <paper id="22">
       <title>Evaluating Machine Translation for Emotion-loaded User Generated Content (<fixed-case>T</fixed-case>rans<fixed-case>E</fixed-case>val4<fixed-case>E</fixed-case>mo-<fixed-case>UGC</fixed-case>)</title>
       <author><first>Shenbin</first><last>Qian</last></author>
-      <author><first>Constantin</first><last>Orasan</last><affiliation>University of Surrey</affiliation></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orasan</last><affiliation>University of Surrey</affiliation></author>
       <author><first>Félix</first><last>Do Carmo</last><affiliation>University of Surrey</affiliation></author>
       <author><first>Diptesh</first><last>Kanojia</last><affiliation>University of Surrey</affiliation></author>
       <pages>43-44</pages>
@@ -944,7 +944,7 @@
       <author><first>Bram</first><last>Vanroy</last><affiliation>Instituut voor de Nederlandse Taal and KU Leuven</affiliation></author>
       <author><first>Santiago</first><last>Gomez</last><affiliation>NA</affiliation></author>
       <author><first>Ineke</first><last>Schuurman</last></author>
-      <author><first>Gorka</first><last>Labaka</last><affiliation>Universidad del País Vasco</affiliation></author>
+      <author id="gorka-labaka"><first>Gorka</first><last>Labaka</last><affiliation>Universidad del País Vasco</affiliation></author>
       <author><first>Adrián</first><last>Núñez-Marcos</last><affiliation>NA</affiliation></author>
       <author><first>Irene</first><last>Murtagh</last></author>
       <author><first>Euan</first><last>McGill</last><affiliation>Universitat Pompeu Fabra</affiliation></author>
@@ -965,17 +965,17 @@
     </paper>
     <paper id="27">
       <title><fixed-case>HPLT</fixed-case>’s First Release of Data and Models</title>
-      <author><first>Nikolay</first><last>Arefyev</last></author>
+      <author id="nikolay-arefyev"><first>Nikolay</first><last>Arefyev</last></author>
       <author><first>Mikko</first><last>Aulamo</last></author>
       <author><first>Pinzhen</first><last>Chen</last></author>
-      <author><first>Ona</first><last>de Gibert</last></author>
+      <author id="ona-de-gibert"><first>Ona</first><last>de Gibert</last></author>
       <author><first>Barry</first><last>Haddow</last></author>
-      <author><first>Jindřich</first><last>Helcl</last></author>
+      <author id="jindrich-helcl"><first>Jindřich</first><last>Helcl</last></author>
       <author><first>Bhavitvya</first><last>Malik</last></author>
-      <author><first>Gema</first><last>Ramírez-Sánchez</last></author>
+      <author id="gema-ramirez-sanchez"><first>Gema</first><last>Ramírez-Sánchez</last></author>
       <author><first>Pavel</first><last>Stepachev</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
-      <author><first>Dušan</first><last>Variš</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="dusan-varis"><first>Dušan</first><last>Variš</last></author>
       <author><first>Jaume</first><last>Zaragoza</last></author>
       <pages>53-54</pages>
       <abstract>The High Performance Language Technologies (HPLT) project is a 3-year EU-funded project that started in September 2022. It aims to deliver free, sustainable, and reusable datasets, models, and workflows at scale using high-performance computing. We describe the first results of the project. The data release includes monolingual data in 75 languages at 5.6T tokens and parallel data in 18 language pairs at 96M pairs, derived from 1.8 petabytes of web crawls. Building upon automated and transparent pipelines, the first machine translation (MT) models as well as large language models (LLMs) have been trained and released. Multiple data processing tools and pipelines have also been made public.</abstract>
@@ -1041,7 +1041,7 @@
       <author><first>Bruno</first><last>Prezado Silva</last><affiliation>NA</affiliation></author>
       <author><first>Ana</first><last>Oliveira</last><affiliation>NA</affiliation></author>
       <author><first>Helena</first><last>Moniz</last><affiliation>NA</affiliation></author>
-      <author><first>Andre</first><last>Martins</last><affiliation>Instituto Superior Técnico and Unbabel</affiliation></author>
+      <author id="andre-f-t-martins"><first>Andre</first><last>Martins</last><affiliation>Instituto Superior Técnico and Unbabel</affiliation></author>
       <author><first>Paulo</first><last>Dimas</last><affiliation>NA</affiliation></author>
       <pages>61-62</pages>
       <abstract>This paper describes the project “NextGenAI: Center for Responsible AI”, a 39-month Mobilizing and Green Agenda for Business Innovation funded by the Portuguese Recovery and Resilience Plan, under the Recovery and Resilience Facility (RRF). The project aims to create a new Center for Responsible AI in Portugal, capable of delivering more than 20 AI products in crucial areas like “Life Sciences”, many of which use generative AI, particularly NLP models such as those for Machine Translation, contributing to translating into legislation the European Law included in the EU AI Act, and creating a critical mass in the development of responsible AI technologies. To accomplish this mission, the Center for Responsible AI is formed by an ecosystem of startups and research institutions driving research in a virtuous way by addressing real market needs and opportunities in Responsible AI.</abstract>
diff --git a/data/xml/2024.ecnlp.xml b/data/xml/2024.ecnlp.xml
index 9aa327063d..aad461b2ff 100644
--- a/data/xml/2024.ecnlp.xml
+++ b/data/xml/2024.ecnlp.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2024-05-18" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Seventh Workshop on e-Commerce and NLP @ LREC-COLING 2024</booktitle>
-      <editor><first>Shervin</first><last>Malmasi</last></editor>
+      <editor id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></editor>
       <editor><first>Besnik</first><last>Fetahu</last></editor>
       <editor><first>Nicola</first><last>Ueffing</last></editor>
       <editor><first>Oleg</first><last>Rokhlenko</last></editor>
diff --git a/data/xml/2024.emnlp.xml b/data/xml/2024.emnlp.xml
index a9f8f335a0..25434dd96e 100644
--- a/data/xml/2024.emnlp.xml
+++ b/data/xml/2024.emnlp.xml
@@ -3,7 +3,7 @@
   <volume id="main" ingest-date="2024-11-01" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing</booktitle>
-      <editor><first>Yaser</first><last>Al-Onaizan</last></editor>
+      <editor id="yaser-al-onaizan"><first>Yaser</first><last>Al-Onaizan</last></editor>
       <editor><first>Mohit</first><last>Bansal</last></editor>
       <editor><first>Yun-Nung</first><last>Chen</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -78,7 +78,7 @@
       <author><first>Vaishali</first><last>Pal</last></author>
       <author><first>Evangelos</first><last>Kanoulas</last><affiliation>University of Amsterdam and University of Amsterdam</affiliation></author>
       <author><first>Andrew</first><last>Yates</last><affiliation>University of Amsterdam</affiliation></author>
-      <author><first>Maarten</first><last>de Rijke</last><affiliation>University of Amsterdam</affiliation></author>
+      <author id="maarten-de-rijke"><first>Maarten</first><last>de Rijke</last><affiliation>University of Amsterdam</affiliation></author>
       <pages>75-92</pages>
       <abstract>TableQA is the task of answering questions over tables of structured information, returning individual cells or tables as output. TableQA research has focused primarily on high-resource languages, leaving medium- and low-resource languages with little progress due to scarcity of annotated data and neural models. We address this gap by introducing a fully automatic large-scale tableQA data generation process for low-resource languages with limited budget. We incorporate our data generation method on two Indic languages, Bengali and Hindi, which have no tableQA datasets or models. TableQA models trained on our large-scale datasets outperform state-of-the-art LLMs. We further study the trained models on different aspects, including mathematical reasoning capabilities and zero-shot cross-lingual transfer. Our work is the first on low-resource tableQA focusing on scalable data generation and evaluation procedures. Our proposed data generation method can be applied to any low-resource language with a web presence. We release datasets, models, and code (https://github.com/kolk/Low-Resource-TableQA-Indic-languages).</abstract>
       <url hash="413f416e">2024.emnlp-main.5</url>
@@ -156,7 +156,7 @@
     <paper id="10">
       <title>Hateful Word in Context Classification</title>
       <author><first>Sanne</first><last>Hoeken</last><affiliation>Universität Bielefeld</affiliation></author>
-      <author><first>Sina</first><last>Zarrieß</last><affiliation>Bielefeld University</affiliation></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last><affiliation>Bielefeld University</affiliation></author>
       <author><first>Özge</first><last>Alacam</last><affiliation>Bielefeld University</affiliation></author>
       <pages>172-186</pages>
       <abstract>Hate speech detection is a prevalent research field, yet it remains underexplored at the level of word meaning. This is significant, as terms used to convey hate often involve non-standard or novel usages which might be overlooked by commonly leveraged LMs trained on general language use. In this paper, we introduce the Hateful Word in Context Classification (<b>HateWiC</b>) task and present a dataset of ~4000 WiC-instances, each labeled by three annotators. Our analyses and computational exploration focus on the interplay between the subjective nature (context-dependent connotations) and the descriptive nature (as described in dictionary definitions) of hateful word senses. HateWiC annotations confirm that hatefulness of a word in context does not always derive from the sense definition alone. We explore the prediction of both majority and individual annotator labels, and we experiment with modeling context- and sense-based inputs. Our findings indicate that including definitions proves effective overall, yet not in cases where hateful connotations vary. Conversely, including annotator demographics becomes more important for mitigating performance drop in subjective hate prediction.</abstract>
@@ -168,7 +168,7 @@
       <title>Eyes Don’t Lie: Subjective Hate Annotation and Detection with Gaze</title>
       <author><first>Özge</first><last>Alacam</last><affiliation>Bielefeld University</affiliation></author>
       <author><first>Sanne</first><last>Hoeken</last><affiliation>Universität Bielefeld</affiliation></author>
-      <author><first>Sina</first><last>Zarrieß</last><affiliation>Bielefeld University</affiliation></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last><affiliation>Bielefeld University</affiliation></author>
       <pages>187-205</pages>
       <abstract>Hate speech is a complex and subjective phenomenon. In this paper, we present a dataset (GAZE4HATE) that provides gaze data collected in a hate speech annotation experiment. We study whether the gaze of an annotator provides predictors of their subjective hatefulness rating, and how gaze features can improve Hate Speech Detection (HSD). We conduct experiments on statistical modeling of subjective hate ratings and gaze and analyze to what extent rationales derived from hate speech models correspond to human gaze and explanations in our data. Finally, we introduce MEANION, a first gaze-integrated HSD model. Our experiments show that particular gaze features like dwell time or fixation counts systematically correlate with annotators’ subjective hate ratings and improve predictions of text-only hate speech models.</abstract>
       <url hash="bd6c25cd">2024.emnlp-main.11</url>
@@ -209,7 +209,7 @@
       <author><first>Wendi</first><last>Zhou</last><affiliation>Huawei Technologies Ltd.</affiliation></author>
       <author><first>Tianyi</first><last>Li</last></author>
       <author><first>Pavlos</first><last>Vougiouklis</last><affiliation>Huawei Technologies Ltd.</affiliation></author>
-      <author><first>Mark</first><last>Steedman</last><affiliation>University of Edinburgh</affiliation></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last><affiliation>University of Edinburgh</affiliation></author>
       <author><first>Jeff Z.</first><last>Pan</last><affiliation>University of Edinburgh, University of Edinburgh</affiliation></author>
       <pages>228-236</pages>
       <abstract>Identifying and understanding user intents is a pivotal task for E-Commerce. Despite its essential role in product recommendation and business user profiling analysis, intent understanding has not been consistently defined or accurately benchmarked. In this paper, we focus on predicative user intents as “how a customer uses a product”, and pose intent understanding as a natural language reasoning task, independent of product ontologies. We identify two weaknesses of FolkScope, the SOTA E-Commerce Intent Knowledge Graph: category-rigidity and property-ambiguity. They limit its ability to strongly align user intents with products having the most desirable property, and to recommend useful products across diverse categories. Following these observations, we introduce a Product Recovery Benchmark featuring a novel evaluation framework and an example dataset. We further validate the above FolkScope weaknesses on this benchmark. Our code and dataset are available at https://github.com/stayones/Usgae-Centric-Intent-Understanding.</abstract>
@@ -280,7 +280,7 @@
       <author><first>Xiaoran</first><last>Fan</last></author>
       <author><first>Qi</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Tao</first><last>Gui</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>313-333</pages>
       <abstract>Tool learning has generated widespread interest as a vital means of interaction between Large Language Models (LLMs) and the physical world. Current research predominantly emphasizes LLMs’ capacity to utilize tools in well-structured environments while overlooking their stability when confronted with the inevitable noise of the real world. To bridge this gap, we introduce *RoTBench*, a multi-level benchmark for evaluating the robustness of LLMs in tool learning. Specifically, we establish five external environments, each featuring varying levels of noise (i.e., Clean, Slight, Medium, Heavy, and Union), providing an in-depth analysis of the model’s resilience across three critical phases: tool selection, parameter identification, and content filling. Experiments involving six widely-used models underscore the urgent necessity for enhancing the robustness of LLMs in tool learning. For instance, the performance of GPT-4 even drops significantly from 80.00 to 58.10 when there is no substantial change in manual accuracy. More surprisingly, the noise correction capability inherent in the GPT family paradoxically impedes its adaptability in the face of mild noise. In light of these findings, we propose RoTTuning, a strategy that enriches the diversity of training environments to bolster the robustness of LLMs in tool learning. The code and data are available at https://github.com/Junjie-Ye/RoTBench.</abstract>
       <url hash="735d46c7">2024.emnlp-main.19</url>
@@ -294,8 +294,8 @@
       <author><first>Fangkai</first><last>Jiao</last></author>
       <author><first>Chengwei</first><last>Qin</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Zhengyuan</first><last>Liu</last><affiliation>I2R</affiliation></author>
-      <author><first>Nancy F.</first><last>Chen</last></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>SalesForce.com and Nanyang Technological University</affiliation></author>
+      <author id="nancy-chen"><first>Nancy F.</first><last>Chen</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>SalesForce.com and Nanyang Technological University</affiliation></author>
       <pages>334-350</pages>
       <abstract>Large Language Models (LLMs) have demonstrated significant potential in handling complex reasoning tasks through step-by-step rationale generation. However, recent studies have raised concerns regarding the hallucination and flaws in their reasoning process. Substantial efforts are being made to improve the reliability and faithfulness of the generated rationales. Some approaches model reasoning as planning, while others focus on annotating for process supervision. Nevertheless, the planning-based search process often results in high latency due to the frequent assessment of intermediate reasoning states and the extensive exploration space. Additionally, supervising the reasoning process with human annotation is costly and challenging to scale for LLM training. To address these issues, in this paper, we propose a framework to learn planning-based reasoning through Direct Preference Optimization (DPO) on collected trajectories, which are ranked according to synthesized process rewards. Our results on challenging logical reasoning benchmarks demonstrate the effectiveness of our learning framework, showing that our 7B model can surpass the strong counterparts like GPT-3.5-Turbo.</abstract>
       <url hash="80116b5e">2024.emnlp-main.20</url>
@@ -315,7 +315,7 @@
     </paper>
     <paper id="22">
       <title>“We Demand Justice!”: Towards Social Context Grounding of Political Texts</title>
-      <author><first>Rajkumar</first><last>Pujari</last><affiliation>Purdue University</affiliation></author>
+      <author id="rajkumar-pujari"><first>Rajkumar</first><last>Pujari</last><affiliation>Purdue University</affiliation></author>
       <author><first>Chengfei</first><last>Wu</last><affiliation>Purdue University</affiliation></author>
       <author><first>Dan</first><last>Goldwasser</last><affiliation>Purdue University and Purdue University</affiliation></author>
       <pages>362-372</pages>
@@ -375,7 +375,7 @@
       <author><first>Jia</first><last>Liu</last></author>
       <author><first>Zujie</first><last>Wen</last></author>
       <author><first>Wenqiang</first><last>Lei</last><affiliation>Sichuan University</affiliation></author>
-      <author><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
       <pages>424-444</pages>
       <abstract>We investigate non-collaborative dialogue agents, which are expected to engage in strategic conversations with diverse users, for securing a mutual agreement that leans favorably towards the system’s objectives. This poses two main challenges for existing dialogue agents: 1) The inability to integrate user-specific characteristics into the strategic planning, and 2) The difficulty of training strategic planners that can be generalized to diverse users. To address these challenges, we propose TRIP to enhance the capability in tailored strategic planning, incorporating a user-aware strategic planning module and a population-based training paradigm. Through experiments on benchmark non-collaborative dialogue tasks, we demonstrate the effectiveness of TRIP in catering to diverse users.</abstract>
       <url hash="25a2cfc5">2024.emnlp-main.26</url>
@@ -534,7 +534,7 @@
       <author><first>Zhengyuan</first><last>Liu</last><affiliation>I2R</affiliation></author>
       <author><first>Stella Xin</first><last>Yin</last></author>
       <author><first>Geyu</first><last>Lin</last><affiliation>Institute of Infocomm Research, A*STAR</affiliation></author>
-      <author><first>Nancy F.</first><last>Chen</last></author>
+      <author id="nancy-chen"><first>Nancy F.</first><last>Chen</last></author>
       <pages>626-642</pages>
       <abstract>Intelligent Tutoring Systems (ITSs) can provide personalized and self-paced learning experience. The emergence of large language models (LLMs) further enables better human-machine interaction, and facilitates the development of conversational ITSs in various disciplines such as math and language learning. In dialogic teaching, recognizing and adapting to individual characteristics can significantly enhance student engagement and learning efficiency. However, characterizing and simulating student’s persona remain challenging in training and evaluating conversational ITSs. In this work, we propose a framework to construct profiles of different student groups by refining and integrating both cognitive and noncognitive aspects, and leverage LLMs for personality-aware student simulation in a language learning scenario. We further enhance the framework with multi-aspect validation, and conduct extensive analysis from both teacher and student perspectives. Our experimental results show that state-of-the-art LLMs can produce diverse student responses according to the given language ability and personality traits, and trigger teacher’s adaptive scaffolding strategies.</abstract>
       <url hash="b0dcdc9f">2024.emnlp-main.37</url>
@@ -559,7 +559,7 @@
       <title><fixed-case>C</fixed-case>o<fixed-case>C</fixed-case>o<fixed-case>L</fixed-case>o<fixed-case>F</fixed-case>a: A Dataset of News Comments with Common Logical Fallacies Written by <fixed-case>LLM</fixed-case>-Assisted Crowds</title>
       <author><first>Min-Hsuan</first><last>Yeh</last></author>
       <author><first>Ruyuan</first><last>Wan</last><affiliation>Pennsylvania State University</affiliation></author>
-      <author><first>Ting-Hao Kenneth</first><last>Huang</last><affiliation>Pennsylvania State University</affiliation></author>
+      <author id="ting-hao-huang"><first>Ting-Hao Kenneth</first><last>Huang</last><affiliation>Pennsylvania State University</affiliation></author>
       <pages>660-677</pages>
       <abstract>Detecting logical fallacies in texts can help users spot argument flaws, but automating this detection is not easy. Manually annotating fallacies in large-scale, real-world text data to create datasets for developing and validating detection models is costly. This paper introduces CoCoLoFa, the largest known logical fallacy dataset, containing 7,706 comments for 648 news articles, with each comment labeled for fallacy presence and type. We recruited 143 crowd workers to write comments embodying specific fallacy types (e.g., slippery slope) in response to news articles. Recognizing the complexity of this writing task, we built an LLM-powered assistant into the workers’ interface to aid in drafting and refining their comments. Experts rated the writing quality and labeling validity of CoCoLoFa as high and reliable. BERT-based models fine-tuned using CoCoLoFa achieved the highest fallacy detection (F1=0.86) and classification (F1=0.87) performance on its test set, outperforming the state-of-the-art LLMs. Our work shows that combining crowdsourcing and LLMs enables us to more effectively construct datasets for complex linguistic phenomena that crowd workers find challenging to produce on their own.</abstract>
       <url hash="af93eb8d">2024.emnlp-main.39</url>
@@ -603,7 +603,7 @@
       <title>Successfully Guiding Humans with Imperfect Instructions by Highlighting Potential Errors and Suggesting Corrections</title>
       <author><first>Lingjun</first><last>Zhao</last></author>
       <author><first>Nguyen X.</first><last>Khanh</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <pages>719-736</pages>
       <abstract>Language models will inevitably err in situations with which they are unfamiliar. However, by effectively communicating uncertainties, they can still guide humans toward making sound decisions in those contexts. We demonstrate this idea by developing HEAR, a system that can successfully guide humans in simulated residential environments despite generating potentially inaccurate instructions. Diverging from systems that provide users with only the instructions they generate, HEAR warns users of potential errors in its instructions and suggests corrections. This rich uncertainty information effectively prevents misguidance and reduces the search space for users. Evaluation with 80 users shows that HEAR achieves a 13% increase in success rate and a 29% reduction in final location error distance compared to only presenting instructions to users. Interestingly, we find that offering users possibilities to explore, HEAR motivates them to make more attempts at the task, ultimately leading to a higher success rate. To our best knowledge, this work is the first to show the practical benefits of uncertainty communication in a long-horizon sequential decision-making problem.</abstract>
       <url hash="98eb6ac2">2024.emnlp-main.42</url>
@@ -910,7 +910,7 @@
       <author><first>Heming</first><last>Xia</last></author>
       <author><first>Jingjing</first><last>Xu</last></author>
       <author><first>Zhiyong</first><last>Wu</last><affiliation>Shanghai Artificial Intelligence Laboratory</affiliation></author>
-      <author><first>Baobao</first><last>Chang</last><affiliation>Peking University</affiliation></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last><affiliation>Peking University</affiliation></author>
       <author><first>Xu</first><last>Sun</last></author>
       <author><first>Lei</first><last>Li</last><affiliation>School of Computer Science, Carnegie Mellon University</affiliation></author>
       <author><first>Zhifang</first><last>Sui</last><affiliation>Peking University</affiliation></author>
@@ -959,7 +959,7 @@
       <author><first>Weihao</first><last>Chen</last></author>
       <author><first>Chunhui</first><last>Li</last><affiliation>The Hong Kong University of Science and Technology</affiliation></author>
       <author><first>Jianbing</first><last>Zhang</last><affiliation>Nanjing University</affiliation></author>
-      <author><first>Xinyu</first><last>Dai</last><affiliation>Nanjing University</affiliation></author>
+      <author id="xinyu-dai"><first>Xinyu</first><last>Dai</last><affiliation>Nanjing University</affiliation></author>
       <pages>1167-1181</pages>
       <abstract>Multimodal large language models (MLLMs) have attracted increasing attention in the past few years, but they may still generate descriptions that include objects not present in the corresponding images, a phenomenon known as object hallucination. To eliminate hallucinations, existing methods manually annotate paired responses with and without hallucinations, and then employ various alignment algorithms to improve the alignment capability between images and text. However, they not only demand considerable computation resources during the finetuning stage but also require expensive human annotation to construct paired data needed by the alignment algorithms. To address these issues, we propose an efficient fine-grained unlearning framework (EFUF), which performs gradient ascent utilizing three tailored losses to eliminate hallucinations without paired data. Extensive experiments show that our method consistently reduces hallucinations while preserving the generation quality with modest computational overhead. Our code and datasets will be publicly available.</abstract>
       <url hash="605344b0">2024.emnlp-main.67</url>
@@ -1195,7 +1195,7 @@
       <author><first>Cheng-Yu</first><last>Hsieh</last><affiliation>University of Washington</affiliation></author>
       <author><first>Ranjay</first><last>Krishna</last><affiliation>Department of Computer Science</affiliation></author>
       <author><first>Yoon</first><last>Kim</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
-      <author><first>James R.</first><last>Glass</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
+      <author id="james-glass"><first>James R.</first><last>Glass</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <pages>1419-1436</pages>
       <abstract>When asked to summarize articles or answer questions given a passage, large language models (LLMs) can hallucinate details and respond with unsubstantiated answers that are inaccurate with respect to the input context. This paper describes a simple approach for detecting such **contextual hallucinations**. We hypothesize that contextual hallucinations are related to the extent to which an LLM attends to information in the provided context versus its own generations. Based on this intuition, we propose a simple hallucination detection model whose input features are given by the ratio of attention weights on the context versus newly generated tokens (for each attention head). We find that a linear classifier based on these _lookback ratio_ features is as effective as a richer detector that utilizes the entire hidden states of an LLM or a text-based entailment model. The lookback ratio-based detector—**Lookback Lens**—is found to transfer across tasks and even models, allowing a detector that is trained on a 7B model to be applied (without retraining) to a larger 13B model. We further apply this detector to mitigate contextual hallucinations, and find that a simple classifier-guided decoding approach is able to reduce the amount of hallucination, for example by 9.6% in the XSum summarization task.</abstract>
       <url hash="4a8a5f7b">2024.emnlp-main.84</url>
@@ -1346,12 +1346,12 @@
     <paper id="95">
       <title>Cross-domain <fixed-case>NER</fixed-case> with Generated Task-Oriented Knowledge: An Empirical Study from Information Density Perspective</title>
       <author id="zhihao-zhang-soochow"><first>Zhihao</first><last>Zhang</last></author>
-      <author><first>Sophia Yat Mei</first><last>Lee</last><affiliation>Hong Kong Polytechnic University</affiliation></author>
+      <author id="sophia-yat-mei-lee"><first>Sophia Yat Mei</first><last>Lee</last><affiliation>Hong Kong Polytechnic University</affiliation></author>
       <author><first>Junshuang</first><last>Wu</last></author>
       <author><first>Dong</first><last>Zhang</last></author>
       <author><first>Shoushan</first><last>Li</last></author>
       <author><first>Erik</first><last>Cambria</last><affiliation>Nanyang Technological University</affiliation></author>
-      <author><first>Guodong</first><last>Zhou</last><affiliation>Soochow University, China</affiliation></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last><affiliation>Soochow University, China</affiliation></author>
       <pages>1595-1609</pages>
       <abstract>Cross-domain Named Entity Recognition (CDNER) is crucial for Knowledge Graph (KG) construction and natural language processing (NLP), enabling learning from source to target domains with limited data. Previous studies often rely on manually collected entity-relevant sentences from the web or attempt to bridge the gap between tokens and entity labels across domains. These approaches are time-consuming and inefficient, as these data are often weakly correlated with the target task and require extensive pre-training.To address these issues, we propose automatically generating task-oriented knowledge (GTOK) using large language models (LLMs), focusing on the reasoning process of entity extraction. Then, we employ task-oriented pre-training (TOPT) to facilitate domain adaptation. Additionally, current cross-domain NER methods often lack explicit explanations for their effectiveness. Therefore, we introduce the concept of information density to better evaluate the model’s effectiveness before performing entity recognition.We conduct systematic experiments and analyses to demonstrate the effectiveness of our proposed approach and the validity of using information density for model evaluation.</abstract>
       <url hash="737b93a7">2024.emnlp-main.95</url>
@@ -1480,7 +1480,7 @@
       <author><first>Jiayi</first><last>Xin</last><affiliation>University of Pennsylvania, University of Pennsylvania</affiliation></author>
       <author><first>Qintong</first><last>Li</last></author>
       <author><first>Lijun</first><last>Wu</last></author>
-      <author><first>Zhihong</first><last>Deng</last></author>
+      <author id="zhi-hong-deng"><first>Zhihong</first><last>Deng</last></author>
       <author><first>Yang Young</first><last>Lu</last><affiliation>University of Waterloo</affiliation></author>
       <author><first>Qi</first><last>Liu</last><affiliation>University of Hong Kong</affiliation></author>
       <author><first>Sheng</first><last>Wang</last></author>
@@ -1522,7 +1522,7 @@
       <author><first>Yibo</first><last>Wang</last></author>
       <author><first>Xiangjue</first><last>Dong</last><affiliation>Texas A&amp;M University - College Station</affiliation></author>
       <author><first>James</first><last>Caverlee</last><affiliation>Google and Texas A&amp;M University - College Station</affiliation></author>
-      <author><first>Philip S.</first><last>Yu</last><affiliation>University of Illinois, Chicago</affiliation></author>
+      <author id="philip-s-yu"><first>Philip S.</first><last>Yu</last><affiliation>University of Illinois, Chicago</affiliation></author>
       <pages>1808-1825</pages>
       <abstract>Language models can be manipulated by adversarial attacks, which introduce subtle perturbations to input data. While recent attack methods can achieve a relatively high attack success rate (ASR), we’ve observed that the generated adversarial examples have a different data distribution compared with the original examples. Specifically, these adversarial examples exhibit reduced confidence levels and greater divergence from the training data distribution. Consequently, they are easy to detect using straightforward detection methods, diminishing the efficacy of such attacks. To address this issue, we propose a Distribution-Aware Adversarial Attack (DA<tex-math>^3</tex-math>) method. DA<tex-math>^3</tex-math> considers the distribution shifts of adversarial examples to improve attacks’ effectiveness under detection methods. We further design a novel evaluation metric, the Non-detectable Attack Success Rate (NASR), which integrates both ASR and detectability for the attack task. We conduct experiments on four widely used datasets to validate the attack effectiveness and transferability of adversarial examples generated by DA<tex-math>^3</tex-math> against both the white-box BERT-base and RoBERTa-base models and the black-box LLaMA2-7b model.</abstract>
       <url hash="1616272a">2024.emnlp-main.107</url>
@@ -1535,7 +1535,7 @@
       <author><first>Xingxuan</first><last>Li</last></author>
       <author><first>Yutong</first><last>Li</last></author>
       <author><first>Lin</first><last>Qiu</last></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>SalesForce.com and Nanyang Technological University</affiliation></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>SalesForce.com and Nanyang Technological University</affiliation></author>
       <author><first>Lidong</first><last>Bing</last><affiliation>Alibaba Group</affiliation></author>
       <pages>1826-1843</pages>
       <abstract>In this work, we designed unbiased prompts to systematically evaluate the psychological safety of large language models (LLMs). First, we tested five different LLMs by using two personality tests: Short Dark Triad (SD-3) and Big Five Inventory (BFI). All models scored higher than the human average on SD-3, suggesting a relatively darker personality pattern. Despite being instruction fine-tuned with safety metrics to reduce toxicity, InstructGPT, GPT-3.5, and GPT-4 still showed dark personality patterns; these models scored higher than self-supervised GPT-3 on the Machiavellianism and narcissism traits on SD-3. Then, we evaluated the LLMs in the GPT series by using well-being tests to study the impact of fine-tuning with more training data. We observed a continuous increase in the well-being scores of GPT models. Following these observations, we showed that fine-tuning Llama-2-chat-7B with responses from BFI using direct preference optimization could effectively reduce the psychological toxicity of the model. Based on the findings, we recommended the application of systematic and comprehensive psychological metrics to further evaluate and improve the safety of LLMs.</abstract>
@@ -1685,7 +1685,7 @@
     </paper>
     <paper id="119">
       <title>Aligning Language Models to Explicitly Handle Ambiguity</title>
-      <author><first>Hyuhng Joon</first><last>Kim</last><affiliation>Seoul National University</affiliation></author>
+      <author id="hyuhng-joon-kim"><first>Hyuhng Joon</first><last>Kim</last><affiliation>Seoul National University</affiliation></author>
       <author><first>Youna</first><last>Kim</last><affiliation>Seoul National University</affiliation></author>
       <author><first>Cheonbok</first><last>Park</last><affiliation>NAVER</affiliation></author>
       <author><first>Junyeob</first><last>Kim</last><affiliation>Seoul National University</affiliation></author>
@@ -1809,7 +1809,7 @@
       <author><first>Yaswanth</first><last>Narsupalli</last></author>
       <author><first>Rongqi</first><last>Fan</last></author>
       <author><first>Zhiheng</first><last>Lyu</last></author>
-      <author><first>Bill Yuchen</first><last>Lin</last></author>
+      <author id="bill-yuchen-lin"><first>Bill Yuchen</first><last>Lin</last></author>
       <author><first>Wenhu</first><last>Chen</last><affiliation>University of Waterloo and Google</affiliation></author>
       <pages>2105-2123</pages>
       <abstract>The recent years have witnessed great advances in video generation. However, the development of automatic video metrics is lagging significantly behind. None of the existing metric is able to provide reliable scores over generated videos. The main barrier is the lack of large-scale human-annotated dataset. In this paper, we release VideoFeedback, the first large-scale dataset containing human-provided multi-aspect score over 37.6K synthesized videos from 11 existing video generative models. We train VideoScore (initialized from Mantis)based on VideoFeedback to enable automatic video quality assessment. Experiments show that the Spearman’s correlation betweenVideoScore and humans can reach 77.1 on VideoFeedback-test, beating the prior best metrics by about 50 points. Further result onother held-out EvalCrafter, GenAI-Bench, and VBench show that VideoScore has consistently much higher correlation with humanjudges than other metrics. Due to these results, we believe VideoScore can serve as a great proxy for human raters to (1) rate different video models to track progress (2) simulate fine-grained human feedback in Reinforcement Learning with Human Feedback (RLHF) to improve current video generation models.</abstract>
@@ -1849,7 +1849,7 @@
     <paper id="130">
       <title><fixed-case>F</fixed-case>use<fixed-case>G</fixed-case>en: <fixed-case>PLM</fixed-case> Fusion for Data-generation based Zero-shot Learning</title>
       <author><first>Tianyuan</first><last>Zou</last></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>Tsinghua University, Tsinghua University</affiliation></author>
+      <author><first>Yang</first><last>Liu</last><affiliation>Tsinghua University, Tsinghua University</affiliation></author>
       <author><first>Peng</first><last>Li</last><affiliation>Tsinghua University</affiliation></author>
       <author><first>Jianqing</first><last>Zhang</last></author>
       <author><first>Jingjing</first><last>Liu</last><affiliation>Tsinghua University</affiliation></author>
@@ -1920,7 +1920,7 @@
       <author><first>Mehdi</first><last>Rezagholizadeh</last></author>
       <author><first>Boxing</first><last>Chen</last><affiliation>Huawei Technologies Ltd.</affiliation></author>
       <author><first>Qun</first><last>Liu</last><affiliation>Huawei Noah’s Ark Lab</affiliation></author>
-      <author><first>Jian-Yun</first><last>Nie</last><affiliation>University of Montreal</affiliation></author>
+      <author id="jian-yun-nie"><first>Jian-Yun</first><last>Nie</last><affiliation>University of Montreal</affiliation></author>
       <pages>2253-2268</pages>
       <abstract>In this paper, we study how open-source large language models (LLMs) can be effectively deployed for improving query rewriting in conversational search, especially for ambiguous queries. We introduce CHIQ, a two-step method that leverages the capabilities of LLMs to resolve ambiguities in the conversation history before query rewriting. This approach contrasts with prior studies that predominantly use closed-source LLMs to directly generate search queries from conversation history. We demonstrate on five well-established benchmarks that CHIQ leads to state-of-the-art results across most settings, showing highly competitive performances with systems leveraging closed-source LLMs. Our study provides a first step towards leveraging open-source LLMs in conversational search, as a competitive alternative to the prevailing reliance on commercial LLMs. Data, models, and source code will be publicly available upon acceptance at https://github.com/fengranMark/CHIQ.</abstract>
       <url hash="42045d88">2024.emnlp-main.135</url>
@@ -1970,7 +1970,7 @@
     <paper id="139">
       <title>Self-Refine Instruction-Tuning for Aligning Reasoning in Language Models</title>
       <author><first>Leonardo</first><last>Ranaldi</last></author>
-      <author><first>Andre</first><last>Freitas</last><affiliation>Idiap Research Institute and University of Manchester</affiliation></author>
+      <author id="andre-freitas"><first>Andre</first><last>Freitas</last><affiliation>Idiap Research Institute and University of Manchester</affiliation></author>
       <pages>2325-2347</pages>
       <abstract>The alignment of reasoning abilities between smaller and larger Language Models are largely conducted via supervised fine-tuning using demonstrations generated from robust Large Language Models (LLMs). Although these approaches deliver more performant models, they do not show sufficiently strong generalization ability as the training only relies on the provided demonstrations.In this paper, we propose the Self-refine Instruction-tuning method that elicits Smaller Language Models to self-improve their abilities.Our approach is based on a two-stage process, where reasoning abilities are first transferred between LLMs and Small Language Models (SLMs) via Instruction-tuning on synthetic demonstrations provided by LLMs, and then the instructed models self-improve their abilities through preference optimization strategies.In particular, the second phase operates refinement heuristics based on Direct Preference Optimization, where the SLMs are elicited to deliver a series of reasoning paths by automatically sampling the generated responses and providing rewards using ground truths from the LLMs.Results obtained on commonsense and math reasoning tasks show that this approach consistently outperforms Instruction-tuning in both in-domain and out-domain scenarios, aligning the reasoning abilities of Smaller and Larger language models.</abstract>
       <url hash="2fccc9fa">2024.emnlp-main.139</url>
@@ -2069,7 +2069,7 @@
       <author><first>Heng</first><last>Chang</last><affiliation>Tsinghua University, Tsinghua University</affiliation></author>
       <author><first>Yaqi</first><last>Wang</last><affiliation>Northeastern University</affiliation></author>
       <author><first>Peng</first><last>Cao</last><affiliation>Northeastern University</affiliation></author>
-      <author><first>Osmar</first><last>Zaiane</last><affiliation>University of Alberta</affiliation></author>
+      <author id="osmar-r-zaiane"><first>Osmar</first><last>Zaiane</last><affiliation>University of Alberta</affiliation></author>
       <pages>2470-2488</pages>
       <abstract>Text style transfer (TST) is crucial in natural language processing, aiming to endow text with a new style without altering its meaning. In real-world scenarios, not all styles have abundant resources. This work introduces TWIST (reusing Transferable Weight Increments for Style Text generation), a novel framework to mitigate data scarcity by utilizing style features in weight increments to transfer low-resource styles effectively. During target style learning, we derive knowledge via a specially designed weight pool and initialize the parameters for the unseen style. To enhance the effectiveness of merging, the target style weight increments are often merged from multiple source style weight increments through singular vectors. Considering the diversity of styles, we also designed a multi-key memory network that simultaneously focuses on task- and instance-level information to derive the most relevant weight increments. Results from multiple style transfer datasets show that TWIST demonstrates remarkable performance across different backbones, achieving particularly effective results in low-resource scenarios.</abstract>
       <url hash="e560717d">2024.emnlp-main.145</url>
@@ -2406,7 +2406,7 @@
       <author><first>Xiaoqi</first><last>Jiao</last><affiliation>Huazhong University of Science and Technology</affiliation></author>
       <author><first>Zhongwei</first><last>Wan</last></author>
       <author><first>Shaorong</first><last>Xie</last><affiliation>Shanghai University</affiliation></author>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last><affiliation>Shanghai University</affiliation></author>
+      <author><first>Wei</first><last>Liu</last><affiliation>Shanghai University</affiliation></author>
       <author><first>Xian</first><last>Wu</last><affiliation>Tencent</affiliation></author>
       <author><first>Yefeng</first><last>Zheng</last><affiliation>Westlake University</affiliation></author>
       <pages>2900-2912</pages>
@@ -2434,7 +2434,7 @@
       <author><first>Xin</first><last>Quan</last><affiliation>University of Manchester</affiliation></author>
       <author><first>Marco</first><last>Valentino</last></author>
       <author><first>Louise A.</first><last>Dennis</last><affiliation>University of Manchester, University of Manchester</affiliation></author>
-      <author><first>Andre</first><last>Freitas</last><affiliation>Idiap Research Institute and University of Manchester</affiliation></author>
+      <author id="andre-freitas"><first>Andre</first><last>Freitas</last><affiliation>Idiap Research Institute and University of Manchester</affiliation></author>
       <pages>2933-2958</pages>
       <abstract>Natural language explanations represent a proxy for evaluating explanation-based and multi-step Natural Language Inference (NLI) models. However, assessing the validity of explanations for NLI is challenging as it typically involves the crowd-sourcing of apposite datasets, a process that is time-consuming and prone to logical errors. To address existing limitations, this paper investigates the verification and refinement of natural language explanations through the integration of Large Language Models (LLMs) and Theorem Provers (TPs). Specifically, we present a neuro-symbolic framework, named Explanation-Refiner, that integrates TPs with LLMs to generate and formalise explanatory sentences and suggest potential inference strategies for NLI. In turn, the TP is employed to provide formal guarantees on the logical validity of the explanations and to generate feedback for subsequent improvements. We demonstrate how Explanation-Refiner can be jointly used to evaluate explanatory reasoning, autoformalisation, and error correction mechanisms of state-of-the-art LLMs as well as to automatically enhance the quality of explanations of variable complexity in different domains.</abstract>
       <url hash="d3965ba2">2024.emnlp-main.172</url>
@@ -2489,7 +2489,7 @@
       <title>Zero-shot Cross-Lingual Transfer for Synthetic Data Generation in Grammatical Error Detection</title>
       <author><first>Gaetan Lopez</first><last>Latouche</last><affiliation>Ubisoft</affiliation></author>
       <author><first>Marc-André</first><last>Carbonneau</last><affiliation>Ubisoft</affiliation></author>
-      <author><first>Benjamin</first><last>Swanson</last><affiliation>Ubisoft</affiliation></author>
+      <author id="ben-swanson"><first>Benjamin</first><last>Swanson</last><affiliation>Ubisoft</affiliation></author>
       <pages>3002-3016</pages>
       <abstract>Grammatical Error Detection (GED) methods rely heavily on human annotated error corpora. However, these annotations are unavailable in many low-resource languages. In this paper, we investigate GED in this context. Leveraging the zero-shot cross-lingual transfer capabilities of multilingual pre-trained language models, we train a model using data from a diverse set of languages to generate synthetic errors in other languages. These synthetic error corpora are then used to train a GED model. Specifically we propose a two-stage fine-tuning pipeline where the GED model is first fine-tuned on multilingual synthetic data from target languages followed by fine-tuning on human-annotated GED corpora from source languages. This approach outperforms current state-of-the-art annotation-free GED methods. We also analyse the errors produced by our method and other strong baselines, finding that our approach produces errors that are more diverse and more similar to human errors.</abstract>
       <url hash="835348c5">2024.emnlp-main.176</url>
@@ -2500,7 +2500,7 @@
       <title><fixed-case>CUTE</fixed-case>: Measuring <fixed-case>LLM</fixed-case>s’ Understanding of Their Tokens</title>
       <author><first>Lukas</first><last>Edman</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <author><first>Helmut</first><last>Schmid</last><affiliation>Center for Information and Language Processing</affiliation></author>
-      <author><first>Alexander</first><last>Fraser</last><affiliation>Technical University of Munich</affiliation></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last><affiliation>Technical University of Munich</affiliation></author>
       <pages>3017-3026</pages>
       <abstract>Large Language Models (LLMs) show remarkable performance on a wide variety of tasks. Most LLMs split text into multi-character tokens and process them as atomic units without direct access to individual characters. This raises the question: To what extent can LLMs learn orthographic information? To answer this, we propose a new benchmark, CUTE, which features a collection of tasks designed to test the orthographic knowledge of LLMs. We evaluate popular LLMs on CUTE, finding that most of them seem to know the spelling of their tokens, yet fail to use this information effectively to manipulate text, calling into question how much of this knowledge is generalizable.</abstract>
       <url hash="f834ff02">2024.emnlp-main.177</url>
@@ -2539,7 +2539,7 @@
     <paper id="180">
       <title><fixed-case>BC</fixed-case>-Prover: Backward Chaining Prover for Formal Theorem Proving</title>
       <author><first>Yuhang</first><last>He</last></author>
-      <author id="jihai-zhang"><first>Jihai</first><last>Zhang</last><affiliation>Alibaba Group</affiliation></author>
+      <author><first>Jihai</first><last>Zhang</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Jianzhu</first><last>Bao</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <author><first>Fangquan</first><last>Lin</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Cheng</first><last>Yang</last><affiliation>Alibaba Group</affiliation></author>
@@ -2600,7 +2600,7 @@
       <author><first>Arjun</first><last>Subramonian</last><affiliation>University of California, Los Angeles</affiliation></author>
       <author><first>Vagrant</first><last>Gautam</last><affiliation>Saarland University</affiliation></author>
       <author><first>Dietrich</first><last>Klakow</last><affiliation>Saarland University</affiliation></author>
-      <author><first>Zeerak</first><last>Talat</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="zeerak-talat"><first>Zeerak</first><last>Talat</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <pages>3151-3166</pages>
       <abstract>Recent improvements in natural language processing (NLP) and machine learning (ML) and increased mainstream adoption have led to researchers frequently discussing the “democratization” of artificial intelligence. In this paper, we seek to clarify how democratization is understood in NLP and ML publications, through large-scale mixed-methods analyses of papers using the keyword “democra*” published in NLP and adjacent venues. We find that democratization is most frequently used to convey (ease of) access to or use of technologies, without meaningfully engaging with theories of democratization, while research using other invocations of “democra*” tends to be grounded in theories of deliberation and debate. Based on our findings, we call for researchers to enrich their use of the term democratization with appropriate theory, towards democratic technologies beyond superficial access.</abstract>
       <url hash="7d34de5b">2024.emnlp-main.184</url>
@@ -2667,7 +2667,7 @@
       <title>Improving Multi-party Dialogue Generation via Topic and Rhetorical Coherence</title>
       <author><first>Yaxin</first><last>Fan</last></author>
       <author><first>Peifeng</first><last>Li</last><affiliation>Soochow University, China</affiliation></author>
-      <author><first>Qiaoming</first><last>Zhu</last><affiliation>Soochow University</affiliation></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last><affiliation>Soochow University</affiliation></author>
       <pages>3240-3253</pages>
       <abstract>Previous studies on multi-party dialogue generation predominantly concentrated on modeling the reply-to structure of dialogue histories, always overlooking the coherence between generated responses and target utterances. To address this issue, we propose a Reinforcement Learning approach emphasizing both Topic and Rhetorical Coherence (RL-TRC). In particular, the topic- and rhetorical-coherence tasks are designed to enhance the model’s perception of coherence with the target utterance. Subsequently, an agent is employed to learn a coherence policy, which guides the generation of responses that are topically and rhetorically aligned with the target utterance. Furthermore, three discourse-aware rewards are developed to assess the coherence between the generated response and the target utterance, with the objective of optimizing the policy. The experimental results and in-depth analyses on two popular datasets demonstrate that our RL-TRC significantly outperforms the state-of-the-art baselines, particularly in generating responses that are more coherent with the target utterances.</abstract>
       <url hash="242b77e0">2024.emnlp-main.189</url>
@@ -2919,7 +2919,7 @@
       <author><first>Wenpeng</first><last>Yin</last><affiliation>Pennsylvania State University</affiliation></author>
       <author><first>Mingzhe</first><last>Xing</last><affiliation>Zhongguancun Laboratory</affiliation></author>
       <author><first>Yinliang</first><last>Yue</last><affiliation>Zhongguancun Laboratory</affiliation></author>
-      <author><first>Marie-Francine</first><last>Moens</last><affiliation>KU Leuven, KU Leuven</affiliation></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last><affiliation>KU Leuven, KU Leuven</affiliation></author>
       <pages>3548-3566</pages>
       <abstract>Fine-grained category discovery using only coarse-grained supervision is a cost-effective yet challenging task. Previous training methods focus on aligning query samples with positive samples and distancing them from negatives. They often neglect intra-category and inter-category semantic similarities of fine-grained categories when navigating sample distributions in the embedding space. Furthermore, some evaluation techniques that rely on pre-collected test samples are inadequate for real-time applications. To address these shortcomings, we introduce a method that successfully detects fine-grained clusters of semantically similar texts guided by a novel objective function. The method uses semantic similarities in a logarithmic space to guide sample distributions in the Euclidean space and to form distinct clusters that represent fine-grained categories. We also propose a centroid inference mechanism to support real-time applications. The efficacy of the method is both theoretically justified and empirically confirmed on three benchmark tasks. The proposed objective function is integrated in multiple contrastive learning based neural models. Its results surpass existing state-of-the-art approaches in terms of Accuracy, Adjusted Rand Index and Normalized Mutual Information of the detected fine-grained categories. Code and data are publicly available at https://github.com/changtianluckyforever/F-grained-STAR.</abstract>
       <url hash="55bc0b4f">2024.emnlp-main.208</url>
@@ -2947,7 +2947,7 @@
       <author><first>Weizhi</first><last>Ma</last><affiliation>Tsinghua University</affiliation></author>
       <author><first>Peijie</first><last>Sun</last></author>
       <author><first>Min</first><last>Zhang</last><affiliation>Tsinghua University, Tsinghua University</affiliation></author>
-      <author><first>Jian-Yun</first><last>Nie</last><affiliation>University of Montreal</affiliation></author>
+      <author id="jian-yun-nie"><first>Jian-Yun</first><last>Nie</last><affiliation>University of Montreal</affiliation></author>
       <pages>3588-3612</pages>
       <abstract>Large language models (LLMs) are essential tools that users employ across various scenarios, so evaluating their performance and guiding users in selecting the suitable service is important. Although many benchmarks exist, they mainly focus on specific predefined model abilities, such as world knowledge, reasoning, etc. Based on these ability scores, it is hard for users to determine which LLM best suits their particular needs. To address these issues, we propose to evaluate LLMs from a user-centric perspective and design this benchmark to measure their efficacy in satisfying user needs under distinct intents. Firstly, we collect 1,846 real-world use cases from a user study with 712 participants from 23 countries. This first-hand data helps us understand actual user intents and needs in LLM interactions, forming the User Reported Scenarios (URS) dataset, which is categorized with six types of user intents. Secondly, based on this authentic dataset, we benchmark 10 LLM services with GPT-4-as-Judge. Thirdly, we show that benchmark scores align well with human preference in both real-world experience and pair-wise annotations, achieving Pearson correlations of 0.95 and 0.94, respectively. This alignment confirms that the URS dataset and our evaluation method establish an effective user-centric benchmark. The dataset, code, and process data are publicly available at https://github.com/Alice1998/URS.</abstract>
       <url hash="f3cb16cc">2024.emnlp-main.210</url>
@@ -2998,7 +2998,7 @@
       <author><first>Archchana</first><last>Sindhujan</last></author>
       <author><first>Minnie</first><last>Kabra</last></author>
       <author><first>Diptesh</first><last>Kanojia</last><affiliation>University of Surrey</affiliation></author>
-      <author><first>Constantin</first><last>Orasan</last><affiliation>University of Surrey</affiliation></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orasan</last><affiliation>University of Surrey</affiliation></author>
       <author><first>Tharindu</first><last>Ranasinghe</last><affiliation>Lancaster University</affiliation></author>
       <author><first>Fred</first><last>Blain</last><affiliation>Tilburg University</affiliation></author>
       <pages>3660-3674</pages>
@@ -3021,7 +3021,7 @@
     <paper id="216">
       <title>External Knowledge-Driven Argument Mining: Leveraging Attention-Enhanced Multi-Network Models</title>
       <author><first>Debela</first><last>Gemechu</last></author>
-      <author><first>Chris</first><last>Reed</last><affiliation>University of Dundee</affiliation></author>
+      <author id="chris-reed"><first>Chris</first><last>Reed</last><affiliation>University of Dundee</affiliation></author>
       <pages>3688-3709</pages>
       <abstract>Argument mining (AM) involves the identification of argument relations (AR) between Argumentative Discourse Units (ADUs). The essence of ARs among ADUs is context-dependent and lies in maintaining a coherent flow of ideas, often centered around the relations between discussed entities, topics, themes or concepts. However, these relations are not always explicitly stated; rather, inferred from implicit chains of reasoning connecting the concepts addressed in the ADUs. While humans can infer such background knowledge, machines face challenges when the contextual cues are not explicitly provided. This paper leverages external resources, including WordNet, ConceptNet, and Wikipedia to identify semantic paths (knowledge paths) connecting the concepts discussed in the ADUs to obtain the implicit chains of reasoning. To effectively leverage these paths for AR prediction, we propose attention-based Multi-Network architectures. Various architecture are evaluated on the external resources, and the Wikipedia based configuration attains F-scores of 0.85, 0.84, 0.70, and 0.87, respectively, on four diverse datasets, showing strong performance over the baselines.</abstract>
       <url hash="43069040">2024.emnlp-main.216</url>
@@ -3163,7 +3163,7 @@
       <author><first>Yongchao</first><last>Chen</last></author>
       <author><first>Jacob</first><last>Arkin</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <author><first>Yilun</first><last>Hao</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
-      <author id="yang-zhang"><first>Yang</first><last>Zhang</last></author>
+      <author><first>Yang</first><last>Zhang</last></author>
       <author><first>Nicholas</first><last>Roy</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <author><first>Chuchu</first><last>Fan</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <pages>3859-3920</pages>
@@ -3258,7 +3258,7 @@
       <title>Consistent Autoformalization for Constructing Mathematical Libraries</title>
       <author><first>Lan</first><last>Zhang</last><affiliation>University of Manchester</affiliation></author>
       <author><first>Xin</first><last>Quan</last><affiliation>University of Manchester</affiliation></author>
-      <author><first>Andre</first><last>Freitas</last><affiliation>Idiap Research Institute and University of Manchester</affiliation></author>
+      <author id="andre-freitas"><first>Andre</first><last>Freitas</last><affiliation>Idiap Research Institute and University of Manchester</affiliation></author>
       <pages>4020-4033</pages>
       <abstract>Autoformalization is the task of automatically translating mathematical content written in natural language to a formal language expression. The growing language interpretation capabilities of Large Language Models (LLMs), including in formal languages, are lowering the barriers for autoformalization. However, LLMs alone are not capable of consistently and reliably delivering autoformalization, in particular as the complexity and specialization of the target domain grows. As the field evolves into the direction of systematically applying autoformalization towards large mathematical libraries, the need to improve syntactic, terminological and semantic control increases. This paper proposes the coordinated use of three mechanisms, most-similar retrieval augmented generation (MS-RAG), denoising steps, and auto-correction with syntax error feedback (Auto-SEF) to improve autoformalization quality. The empirical analysis, across different models, demonstrates that these mechanisms can deliver autoformalizaton results which are syntactically, terminologically and semantically more consistent. These mechanisms can be applied across different LLMs and have shown to deliver improve results across different model types.</abstract>
       <url hash="3416ed33">2024.emnlp-main.233</url>
@@ -3309,7 +3309,7 @@
       <author><first>Yinong</first><last>He</last></author>
       <author><first>Jianing</first><last>Yang</last></author>
       <author><first>Yinpei</first><last>Dai</last><affiliation>University of Michigan - Ann Arbor</affiliation></author>
-      <author><first>Joyce</first><last>Chai</last><affiliation>University of Michigan</affiliation></author>
+      <author id="joyce-chai"><first>Joyce</first><last>Chai</last><affiliation>University of Michigan</affiliation></author>
       <pages>4097-4114</pages>
       <abstract>In real-world scenarios, it is desirable for embodied agents to have the ability to leverage human language to gain explicit or implicit knowledge for learning tasks. Despite recent progress, most previous approaches adopt simple low-level instructions as language inputs, which may not reflect natural human communication. We expect human language to be informative (i.e., providing feedback on agents’ past behaviors and offering guidance on achieving their future goals) and diverse (i.e., encompassing a wide range of expressions and style nuances). To enable flexibility of language use in teaching agents tasks, this paper studies different types of language inputs in facilitating reinforcement learning (RL) embodied agents. More specifically, we examine how different levels of language informativeness and diversity impact agent learning and inference. Our empirical results based on four RL benchmarks demonstrate that agents trained with diverse and informative language feedback can achieve enhanced generalization and fast adaptation to new tasks. These findings highlight the pivotal role of language use in teaching embodied agents new tasks in an open world.</abstract>
       <url hash="7621c53b">2024.emnlp-main.237</url>
@@ -3381,7 +3381,7 @@
       <title><fixed-case>I</fixed-case> Could’ve Asked That: Reformulating Unanswerable Questions</title>
       <author><first>Wenting</first><last>Zhao</last><affiliation>Cornell University</affiliation></author>
       <author><first>Ge</first><last>Gao</last><affiliation>Cornell University</affiliation></author>
-      <author><first>Claire</first><last>Cardie</last><affiliation>Cornell University</affiliation></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last><affiliation>Cornell University</affiliation></author>
       <author><first>Alexander M</first><last>Rush</last><affiliation>Cornell University and School of Engineering and Applied Sciences, Harvard University</affiliation></author>
       <pages>4207-4220</pages>
       <abstract>When seeking information from unfamiliar documents, users frequently pose questions that cannot be answered by the documents. While existing large language models (LLMs) identify these unanswerable questions, they do not assist users in reformulating their questions, thereby reducing their overall utility. We curate CouldAsk, an evaluation benchmark composed of existing and new datasets for document-grounded question answering, specifically designed to study reformulating unanswerable questions. We evaluate state-of-the-art open-source and proprietary LLMs on CouldAsk. The results demonstrate the limited capabilities of these models in reformulating questions. Specifically, GPT-4 and Llama2-7B successfully reformulate questions only 26% and 12% of the time, respectively. Error analysis shows that 62% of the unsuccessful reformulations stem from the models merely rephrasing the questions or even generating identical questions. We publicly release the benchmark and the code to reproduce the experiments.</abstract>
@@ -3441,7 +3441,7 @@
       <author><first>Wenlin</first><last>Yao</last><affiliation>Tencent AI Lab</affiliation></author>
       <author><first>Hassan</first><last>Foroosh</last><affiliation>University of Central Florida</affiliation></author>
       <author><first>Dong</first><last>Yu</last><affiliation>Tencent AI Lab</affiliation></author>
-      <author id="fei-liu"><first>Fei</first><last>Liu</last><affiliation>Emory University</affiliation></author>
+      <author><first>Fei</first><last>Liu</last><affiliation>Emory University</affiliation></author>
       <pages>4293-4308</pages>
       <abstract>Reasoning is most powerful when an LLM accurately aggregates relevant information. We examine the critical role of information aggregation in reasoning by requiring the LLM to analyze sports narratives. To succeed at this task, an LLM must infer points from actions, identify related entities, attribute points accurately to players and teams, and compile key statistics to draw conclusions. We conduct comprehensive experiments with real NBA basketball data and present SportsGen, a new method to synthesize game narratives. By synthesizing data, we can rigorously evaluate LLMs’ reasoning capabilities under complex scenarios with varying narrative lengths and density of information. Our findings show that most models, including GPT-4o, often fail to accurately aggregate basketball scores due to frequent scoring patterns. Open-source models like Llama-3 further suffer from significant score hallucinations. Finally, the effectiveness of reasoning is influenced by narrative complexity, information density, and domain-specific terms, highlighting the challenges in analytical reasoning tasks.</abstract>
       <url hash="f7e061a8">2024.emnlp-main.246</url>
@@ -3467,7 +3467,7 @@
       <author><first>Seungone</first><last>Kim</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Juyoung</first><last>Suk</last></author>
       <author><first>Shayne</first><last>Longpre</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
-      <author><first>Bill Yuchen</first><last>Lin</last></author>
+      <author id="bill-yuchen-lin"><first>Bill Yuchen</first><last>Lin</last></author>
       <author><first>Jamin</first><last>Shin</last><affiliation>NAVER</affiliation></author>
       <author><first>Sean</first><last>Welleck</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Graham</first><last>Neubig</last><affiliation>Carnegie Mellon University</affiliation></author>
@@ -3519,9 +3519,9 @@
       <author><first>Anuoluwapo</first><last>Aremu</last></author>
       <author><first>Diana</first><last>Abagyan</last><affiliation>University of Washington</affiliation></author>
       <author><first>Hila</first><last>Gonen</last><affiliation>University of Washington</affiliation></author>
-      <author><first>David Ifeoluwa</first><last>Adelani</last></author>
+      <author id="david-ifeoluwa-adelani"><first>David Ifeoluwa</first><last>Adelani</last></author>
       <author><first>Daud</first><last>Abolade</last></author>
-      <author><first>Noah A.</first><last>Smith</last><affiliation>University of Washington and Allen Institute for Artificial Intelligence</affiliation></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last><affiliation>University of Washington and Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Yulia</first><last>Tsvetkov</last><affiliation>Department of Computer Science, University of Washington</affiliation></author>
       <pages>4392-4409</pages>
       <abstract>Yoruba—an African language with roughly 47 million speakers—encompasses a continuum with several dialects. Recent efforts to develop NLP technologies for African languages have focused on their standard dialects, resulting in disparities for dialects and varieties for which there are little to no resources or tools. We take steps towards bridging this gap by introducing a new high-quality parallel text and speech corpus; YORULECT across three domains and four regional yoruba dialects. To develop this corpus, we engaged native speakers, traveling to communities where these dialects are spoken, to collect text and speech data. Using our newly created corpus, we conducted extensive experiments on (text) machine translation, automatic speech recognition, and speech-to-text translation. Our results reveal substantial performance disparities between standard yoruba and the other dialects across all tasks. However, we also show that with dialect-adaptive finetuning, we are able to narrow this gap. We believe our dataset and experimental analysis will contribute greatly to developing NLP tools for Yoruba and its dialects, and potentially for other African languages, by improving our understanding of existing challenges and offering a high-quality dataset for further development. We will release YORULECT dataset and models publicly under an open license.</abstract>
@@ -3604,7 +3604,7 @@
       <author><first>Wenjia</first><last>Niu</last></author>
       <author><first>Sabrina</first><last>Caldwell</last><affiliation>Australian National University</affiliation></author>
       <author><first>Tom</first><last>Gedeon</last></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <author><first>Zhenyue</first><last>Qin</last><affiliation>Yale University</affiliation></author>
       <pages>4484-4499</pages>
       <abstract>Vision Large Language Models (VLLMs) are transforming the intersection of computer vision and natural language processing; however, the potential of using visual prompts for emotion recognition in these models remains largely unexplored and untapped. Traditional methods in VLLMs struggle with spatial localization and often discard valuable global context. We propose a novel Set-of-Vision prompting (SoV) approach that enhances zero-shot emotion recognition by using spatial information, such as bounding boxes and facial landmarks, to mark targets precisely. SoV improves accuracy in face count and emotion categorization while preserving the enriched image context. Through comprehensive experimentation and analysis of recent commercial or open-source VLLMs, we evaluate the SoV model’s ability to comprehend facial expressions in natural environments. Our findings demonstrate the effectiveness of integrating spatial visual prompts into VLLMs for improving emotion recognition performance.</abstract>
@@ -3654,7 +3654,7 @@
       <title>Multiple Sources are Better Than One: Incorporating External Knowledge in Low-Resource Glossing</title>
       <author><first>Changbing</first><last>Yang</last><affiliation>University of British Columbia</affiliation></author>
       <author><first>Garrett</first><last>Nicolai</last><affiliation>University of British Columbia</affiliation></author>
-      <author><first>Miikka</first><last>Silfverberg</last><affiliation>University of British Columbia</affiliation></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last><affiliation>University of British Columbia</affiliation></author>
       <pages>4537-4552</pages>
       <abstract>In this paper, we address the data scarcity problem in automatic data-driven glossing for low-resource languages by coordinating multiple sources of linguistic expertise. We enhance models by incorporating both token-level and sentence-level translations, utilizing the extensive linguistic capabilities of modern LLMs, and incorporating available dictionary resources. Our enhancements lead to an average absolute improvement of 5%-points in word-level accuracy over the previous state of the art on a typologically diverse dataset spanning six low-resource languages. The improvements are particularly noticeable for the lowest-resourced language Gitksan, where we achieve a 10%-point improvement. Furthermore, in a simulated ultra-low resource setting for the same six languages, training on fewer than 100 glossed sentences, we establish an average 10%-point improvement in word-level accuracy over the previous state-of-the-art system.</abstract>
       <url hash="fe57190f">2024.emnlp-main.261</url>
@@ -3778,7 +3778,7 @@
       <author><first>Yuanjie</first><last>Lyu</last></author>
       <author><first>Zihan</first><last>Niu</last><affiliation>University of Science and Technology of China</affiliation></author>
       <author><first>Zheyong</first><last>Xie</last></author>
-      <author><first>Chao</first><last>Zhang</last></author>
+      <author id="chao-zhang-tu"><first>Chao</first><last>Zhang</last></author>
       <author><first>Tong</first><last>Xu</last><affiliation>University of Science and Technology of China</affiliation></author>
       <author><first>Yang</first><last>Wang</last></author>
       <author><first>Enhong</first><last>Chen</last><affiliation>University of Science and Technology of China</affiliation></author>
@@ -3824,7 +3824,7 @@
       <title><fixed-case>B</fixed-case>ayesian Calibration of Win Rate Estimation with <fixed-case>LLM</fixed-case> Evaluators</title>
       <author><first>Yicheng</first><last>Gao</last></author>
       <author><first>Gonghan</first><last>Xu</last></author>
-      <author><first>Zhe</first><last>Wang</last></author>
+      <author id="daisy-zhe-wang"><first>Zhe</first><last>Wang</last></author>
       <author><first>Arman</first><last>Cohan</last><affiliation>Yale University and Allen Institute for Artificial Intelligence</affiliation></author>
       <pages>4757-4769</pages>
       <abstract>Recent advances in large language models (LLMs) show the potential of using LLMs as evaluators for assessing the quality of text generations from LLMs. However, applying LLM evaluators naively to compare different systems can lead to unreliable results due to the inaccuracy and intrinsic bias of LLM evaluators. In order to mitigate this problem, we propose two calibration methods, Bayesian Win-Rate Sampling (BWRS) and Bayesian Dawid-Skene, both of which leverage Bayesian inference to more accurately infer the true win rate of generative language models. We empirically validate our methods on six datasets covering story generation, summarization, and instruction following tasks. We show that both our methods are effective in improving the accuracy of win rate estimation using LLMs as evaluators, offering a promising direction for reliable automatic text quality evaluation.</abstract>
@@ -3947,7 +3947,7 @@
       <author><first>Yuji</first><last>Zhang</last></author>
       <author><first>Sha</first><last>Li</last><affiliation>University of Illinois, Urbana Champaign</affiliation></author>
       <author><first>Zixuan</first><last>Zhang</last></author>
-      <author><first>Ruhi</first><last>Sarikaya</last></author>
+      <author id="ruhi-sarikaya"><first>Ruhi</first><last>Sarikaya</last></author>
       <author><first>Kevin</first><last>Small</last><affiliation>Amazon</affiliation></author>
       <author><first>Heng</first><last>Ji</last><affiliation>University of Illinois, Urbana-Champaign</affiliation></author>
       <pages>4907-4926</pages>
@@ -4110,12 +4110,12 @@
       <author><first>Surangika</first><last>Ranathunga</last><affiliation>Massey University</affiliation></author>
       <author><first>Meng</first><last>Fang</last><affiliation>University of Liverpool and Eindhoven University of Technology</affiliation></author>
       <author><first>Jie</first><last>Fu</last><affiliation>Shanghai Artificial Intelligence Laboratory</affiliation></author>
-      <author id="fei-liu"><first>Fei</first><last>Liu</last><affiliation>Emory University</affiliation></author>
+      <author><first>Fei</first><last>Liu</last><affiliation>Emory University</affiliation></author>
       <author><first>Ruihong</first><last>Huang</last><affiliation>Texas A&amp;M University</affiliation></author>
       <author><first>Eduardo</first><last>Blanco</last><affiliation>University of Arizona</affiliation></author>
       <author><first>Yixin</first><last>Cao</last><affiliation>Fudan University</affiliation></author>
       <author><first>Rui</first><last>Zhang</last><affiliation>Pennsylvania State University</affiliation></author>
-      <author><first>Philip S.</first><last>Yu</last><affiliation>University of Illinois, Chicago</affiliation></author>
+      <author id="philip-s-yu"><first>Philip S.</first><last>Yu</last><affiliation>University of Illinois, Chicago</affiliation></author>
       <author><first>Wenpeng</first><last>Yin</last><affiliation>Pennsylvania State University</affiliation></author>
       <pages>5081-5099</pages>
       <abstract>Claim: This work is not advocating the use of LLMs for paper (meta-)reviewing. Instead, wepresent a comparative analysis to identify and distinguish LLM activities from human activities. Two research goals: i) Enable better recognition of instances when someone implicitly uses LLMs for reviewing activities; ii) Increase community awareness that LLMs, and AI in general, are currently inadequate for performing tasks that require a high level of expertise and nuanced judgment.This work is motivated by two key trends. On one hand, large language models (LLMs) have shown remarkable versatility in various generative tasks such as writing, drawing, and question answering, significantly reducing the time required for many routine tasks. On the other hand, researchers, whose work is not only time-consuming but also highly expertise-demanding, face increasing challenges as they have to spend more time reading, writing, and reviewing papers. This raises the question: how can LLMs potentially assist researchers in alleviating their heavy workload?This study focuses on the topic of LLMs as NLP Researchers, particularly examining the effectiveness of LLMs in assisting paper (meta-)reviewing and its recognizability. To address this, we constructed the ReviewCritique dataset, which includes two types of information: (i) NLP papers (initial submissions rather than camera-ready) with both human-written and LLM-generated reviews, and (ii) each review comes with “deficiency” labels and corresponding explanations for individual segments, annotated by experts. Using ReviewCritique, this study explores two threads of research questions: (i) “LLMs as Reviewers”, how do reviews generated by LLMs compare with those written by humans in terms of quality and distinguishability? (ii) “LLMs as Metareviewers”, how effectively can LLMs identify potential issues, such as Deficient or unprofessional review segments, within individual paper reviews? To our knowledge, this is the first work to provide such a comprehensive analysis.</abstract>
@@ -4126,10 +4126,10 @@
     <paper id="293">
       <title>Academics Can Contribute to Domain-Specialized Language Models</title>
       <author><first>Mark</first><last>Dredze</last><affiliation>Department of Computer Science, Whiting School of Engineering</affiliation></author>
-      <author><first>Genta Indra</first><last>Winata</last><affiliation>Capital One</affiliation></author>
+      <author id="genta-indra-winata"><first>Genta Indra</first><last>Winata</last><affiliation>Capital One</affiliation></author>
       <author><first>Prabhanjan</first><last>Kambadur</last></author>
       <author><first>Shijie</first><last>Wu</last><affiliation>Anthropic</affiliation></author>
-      <author><first>Ozan</first><last>Irsoy</last><affiliation>Bloomberg</affiliation></author>
+      <author id="ozan-irsoy"><first>Ozan</first><last>Irsoy</last><affiliation>Bloomberg</affiliation></author>
       <author><first>Steven</first><last>Lu</last></author>
       <author><first>Vadim</first><last>Dabravolski</last><affiliation>Bloomberg</affiliation></author>
       <author><first>David S</first><last>Rosenberg</last><affiliation>Bloomberg</affiliation></author>
@@ -4179,11 +4179,11 @@
       <author><first>Joel Ruben Antony</first><last>Moniz</last><affiliation>Apple</affiliation></author>
       <author><first>Muhammad Ravi Shulthan</first><last>Habibi</last><affiliation>Universitas Indonesia</affiliation></author>
       <author><first>Frederikus</first><last>Hudi</last></author>
-      <author><first>Railey</first><last>Montalan</last><affiliation>AI Singapore and Ateneo de Manila University</affiliation></author>
+      <author id="jann-railey-montalan"><first>Railey</first><last>Montalan</last><affiliation>AI Singapore and Ateneo de Manila University</affiliation></author>
       <author><first>Ryan</first><last>Ignatius</last><affiliation>Institut Teknologi Bandung</affiliation></author>
       <author><first>Joanito Agili</first><last>Lopo</last><affiliation>Universitas Gadjah Mada</affiliation></author>
       <author><first>William</first><last>Nixon</last></author>
-      <author><first>Börje F.</first><last>Karlsson</last><affiliation>Beijing Academy of Artificial Intelligence (BAAI)</affiliation></author>
+      <author id="borje-f-karlsson"><first>Börje F.</first><last>Karlsson</last><affiliation>Beijing Academy of Artificial Intelligence (BAAI)</affiliation></author>
       <author><first>James</first><last>Jaya</last><affiliation>Georgia Institute of Technology</affiliation></author>
       <author><first>Ryandito</first><last>Diandaru</last></author>
       <author><first>Yuze</first><last>Gao</last><affiliation>A*STAR</affiliation></author>
@@ -4222,7 +4222,7 @@
       <author><first>Peerat</first><last>Limkonchotiwat</last><affiliation>AI Singapore</affiliation></author>
       <author><first>Alham Fikri</first><last>Aji</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and Amazon</affiliation></author>
       <author><first>Sedrick</first><last>Keh</last><affiliation>Toyota Research Institute</affiliation></author>
-      <author><first>Genta Indra</first><last>Winata</last><affiliation>Capital One</affiliation></author>
+      <author id="genta-indra-winata"><first>Genta Indra</first><last>Winata</last><affiliation>Capital One</affiliation></author>
       <author><first>Ruochen</first><last>Zhang</last><affiliation>Brown University</affiliation></author>
       <author><first>Fajri</first><last>Koto</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <author><first>Zheng-Xin</first><last>Yong</last><affiliation>Brown University</affiliation></author>
@@ -4237,7 +4237,7 @@
     </paper>
     <paper id="297">
       <title>Induct-Learn: Short Phrase Prompting with Instruction Induction</title>
-      <author><first>Po-Chun</first><last>Chen</last><affiliation>National Taiwan University</affiliation></author>
+      <author id="po-chun-chen"><first>Po-Chun</first><last>Chen</last><affiliation>National Taiwan University</affiliation></author>
       <author><first>Sheng-Lun</first><last>Wei</last><affiliation>Department of computer science and informational engineering, National Taiwan University</affiliation></author>
       <author><first>Hen-Hsen</first><last>Huang</last><affiliation>Institute of Information Science, Academia Sinica</affiliation></author>
       <author><first>Hsin-Hsi</first><last>Chen</last><affiliation>National Taiwan University</affiliation></author>
@@ -4277,9 +4277,9 @@
       <author><first>Weichao</first><last>Zhang</last></author>
       <author><first>Ruqing</first><last>Zhang</last></author>
       <author><first>Jiafeng</first><last>Guo</last><affiliation>Institute of Computing Technolgy, Chinese Academy of Sciences</affiliation></author>
-      <author><first>Maarten</first><last>de Rijke</last><affiliation>University of Amsterdam</affiliation></author>
+      <author id="maarten-de-rijke"><first>Maarten</first><last>de Rijke</last><affiliation>University of Amsterdam</affiliation></author>
       <author><first>Yixing</first><last>Fan</last></author>
-      <author><first>Xueqi</first><last>Cheng</last><affiliation>, Chinese Academy of Sciences</affiliation></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last><affiliation>, Chinese Academy of Sciences</affiliation></author>
       <pages>5263-5274</pages>
       <abstract>As the scale of training corpora for large language models (LLMs) grows, model developers become increasingly reluctant to disclose details on their data. This lack of transparency poses challenges to scientific evaluation and ethical deployment. Recently, pretraining data detection approaches, which infer whether a given text was part of an LLM’s training data through black-box access, have been explored. The Min-K% Prob method, which has achieved state-of-the-art results, assumes that a non-training example tends to contain a few outlier words with low token probabilities. However, the effectiveness may be limited as it tends to misclassify non-training texts that contain many common words with high probabilities predicted by LLMs. To address this issue, we introduce a divergence-based calibration method, inspired by the divergence-from-randomness concept, to calibrate token probabilities for pretraining data detection. We compute the cross-entropy (i.e., the divergence) between the token probability distribution and the token frequency distribution to derive a detection score.We have developed a Chinese-language benchmark, PatentMIA, to assess the performance of detection approaches for LLMs on Chinese text. Experimental results on English-language benchmarks and PatentMIA demonstrate that our proposed method significantly outperforms existing methods. Our code and PatentMIA benchmark are available at <url>https://github.com/zhang-wei-chao/DC-PDD</url>.</abstract>
       <url hash="5763867e">2024.emnlp-main.300</url>
@@ -4330,7 +4330,7 @@
       <author><first>Jiahuan</first><last>Li</last></author>
       <author><first>Yiqing</first><last>Cao</last></author>
       <author><first>Shujian</first><last>Huang</last><affiliation>Nanjing University</affiliation></author>
-      <author><first>Jiajun</first><last>Chen</last><affiliation>Nanjing University</affiliation></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last><affiliation>Nanjing University</affiliation></author>
       <pages>5307-5320</pages>
       <abstract>Having been trained on massive pretraining data, large language models have shown excellent performance on many knowledge-intensive tasks. However, pretraining data tends to contain misleading and even conflicting information, and it is intriguing to understand how LLMs handle these noisy data during training. In this study, we systematically analyze LLMs’ learning preferences for data with conflicting knowledge. We find that pretrained LLMs establish learning preferences similar to humans, i.e., preferences towards formal texts and texts with fewer spelling errors, resulting in faster learning and more favorable treatment of knowledge in data with such features when facing conflicts. This finding is generalizable across models and languages and is more evident in larger models. An in-depth analysis reveals that LLMs tend to trust data with features that signify consistency with the majority of data, and it is possible to instill new preferences and erase old ones by manipulating the degree of consistency with the majority data.</abstract>
       <url hash="e90935f7">2024.emnlp-main.304</url>
@@ -4425,7 +4425,7 @@
       <author><first>Wenyan</first><last>Li</last></author>
       <author><first>Pontus</first><last>Stenetorp</last><affiliation>University College London</affiliation></author>
       <author><first>Jimmy</first><last>Lin</last><affiliation>University of Waterloo</affiliation></author>
-      <author><first>Ferhan</first><last>Ture</last></author>
+      <author id="ferhan-ture"><first>Ferhan</first><last>Ture</last></author>
       <pages>5441-5454</pages>
       <abstract>Diffusion models are the state of the art in text-to-image generation, but their perceptual variability remains understudied. In this paper, we examine how prompts affect image variability in black-box diffusion-based models. We propose W1KP, a human-calibrated measure of variability in a set of images, bootstrapped from existing image-pair perceptual distances. Current datasets do not cover recent diffusion models, thus we curate three test sets for evaluation. Our best perceptual distance outperforms nine baselines by up to 18 points in accuracy, and our calibration matches graded human judgements 78% of the time. Using W1KP, we study prompt reusability and show that Imagen prompts can be reused for 10-50 random seeds before new images become too similar to already generated images, while Stable Diffusion XL and DALL-E 3 can be reused 50-200 times. Lastly, we analyze 56 linguistic features of real prompts, finding that the prompt’s length, CLIP embedding norm, concreteness, and word senses influence variability most. As far as we are aware, we are the first to analyze diffusion variability from a visuolinguistic perspective. Our project page is at http://w1kp.com.</abstract>
       <url hash="8749a3b4">2024.emnlp-main.311</url>
@@ -4471,7 +4471,7 @@
     <paper id="315">
       <title>Concept Space Alignment in Multilingual <fixed-case>LLM</fixed-case>s</title>
       <author><first>Qiwei</first><last>Peng</last></author>
-      <author><first>Anders</first><last>Søgaard</last><affiliation>Copenhagen University</affiliation></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last><affiliation>Copenhagen University</affiliation></author>
       <pages>5511-5526</pages>
       <abstract>Multilingual large language models (LLMs) seem to generalize somewhat across languages. We hypothesize this is a result of implicit vector space alignment. Evaluating such alignment, we see that larger models exhibit very high-quality linear alignments between corresponding concepts in different languages. Our experiments show that multilingual LLMs suffer from two familiar weaknesses: generalization works best for languages with similar typology, and for abstract concepts. For some models, e.g., the Llama-2 family of models, prompt-based embeddings align better than word embeddings, but the projections are less linear – an observation that holds across almost all model families, indicating that some of the implicitly learned alignments are broken somewhat by prompt-based methods.</abstract>
       <url hash="3735b036">2024.emnlp-main.315</url>
@@ -4552,7 +4552,7 @@
       <author id="yuhao-wang-renmin"><first>Yuhao</first><last>Wang</last><affiliation>Renmin University of China</affiliation></author>
       <author><first>Ruiyang</first><last>Ren</last></author>
       <author><first>Junyi</first><last>Li</last></author>
-      <author><first>Xin</first><last>Zhao</last><affiliation>Renmin University of China</affiliation></author>
+      <author id="wayne-xin-zhao"><first>Xin</first><last>Zhao</last><affiliation>Renmin University of China</affiliation></author>
       <author><first>Jing</first><last>Liu</last><affiliation>Baidu</affiliation></author>
       <author><first>Ji-Rong</first><last>Wen</last><affiliation>Renmin University of China</affiliation></author>
       <pages>5613-5626</pages>
@@ -4589,7 +4589,7 @@
       <title>On Mitigating Performance Disparities in Multilingual Speech Recognition</title>
       <author><first>Monorama</first><last>Swain</last></author>
       <author><first>Anna Katrine Van</first><last>Zee</last></author>
-      <author><first>Anders</first><last>Søgaard</last><affiliation>Copenhagen University</affiliation></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last><affiliation>Copenhagen University</affiliation></author>
       <pages>5647-5655</pages>
       <abstract>How far have we come in mitigating performance disparities across genders in multilingual speech recognition? We compare the impact on gender disparity of different fine-tuning algorithms for automated speech recognition across model sizes, languages and gender. We look at both performance-focused and fairness-promoting algorithms. Across languages, we see slightly better performance for female speakers for larger models regardless of the fine-tuning algorithm. The best trade-off between performance and parity is found using adapter fusion. Fairness-promoting fine-tuning algorithms (Group-DRO and Spectral Decoupling) hurt performance compared to adapter fusion with only slightly better performance parity. LoRA increases disparities slightly. Fairness-mitigating fine-tuning techniques led to slightly higher variance in performance across languages, with the exception of adapter fusion.</abstract>
       <url hash="ea275914">2024.emnlp-main.323</url>
@@ -4725,7 +4725,7 @@
       <title>Pretraining Language Models Using Translationese</title>
       <author><first>Meet</first><last>Doshi</last></author>
       <author><first>Raj</first><last>Dabre</last><affiliation>National Institute of Information and Communications Technology (NICT), National Institute of Advanced Industrial Science and Technology</affiliation></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
       <pages>5843-5862</pages>
       <abstract>In this paper, we explore the utility of Translationese as synthetic data created using machine translation for pre-training language models (LMs) for low-resource languages (LRLs). Our simple methodology consists of translating large amounts of web-crawled monolingual documents (clean) into the LRLs, followed by filtering the translated documents using tiny LMs trained on small but clean LRL data. Taking the case of Indian languages, we pre-train LMs from scratch with 28M and 85M parameters, and then fine-tune them for 5 downstream natural language understanding (NLU) and 4 generative (NLG) tasks. We observe that pre-training on filtered synthetic data leads to relative performance drops of only 0.87% for NLU and 2.35% for NLG, compared to pre-training on clean data, and this gap further diminishes upon the inclusion of a small amount of clean data. We also study the impact of synthetic data filtering and the choice of source language for synthetic data generation. Furthermore, evaluating continually pre-trained larger models like Gemma-2B and Llama-3-8B in few-shot settings, we observe that using synthetic data is competitive with using clean data. Our findings suggest that synthetic data shows promise for bridging the pre-training gap between English and LRLs.</abstract>
       <url hash="d6bdcc12">2024.emnlp-main.334</url>
@@ -4772,7 +4772,7 @@
       <title><fixed-case>CI</fixed-case>tru<fixed-case>S</fixed-case>: Chunked Instruction-aware State Eviction for Long Sequence Modeling</title>
       <author><first>Yu</first><last>Bai</last></author>
       <author><first>Xiyuan</first><last>Zou</last></author>
-      <author><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <author><first>Sanxing</first><last>Chen</last><affiliation>Duke University</affiliation></author>
       <author><first>Marc-Antoine</first><last>Rondeau</last><affiliation>Mila - Quebec Artificial Intelligence Institute</affiliation></author>
       <author><first>Yang</first><last>Gao</last></author>
@@ -4788,7 +4788,7 @@
     <paper id="339">
       <title>Story Embeddings — Narrative-Focused Representations of Fictional Stories</title>
       <author><first>Hans Ole</first><last>Hatzel</last><affiliation>Universität Hamburg</affiliation></author>
-      <author><first>Chris</first><last>Biemann</last><affiliation>U Hamburg</affiliation></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last><affiliation>U Hamburg</affiliation></author>
       <pages>5931-5943</pages>
       <abstract>We present a novel approach to modeling fictional narratives. The proposed model creates embeddings that represent a story such that similar narratives, that is, reformulations of the same story, will result in similar embeddings. We showcase the prowess of our narrative-focused embeddings on various datasets, exhibiting state-of-the-art performance on multiple retrieval tasks. The embeddings also show promising results on a narrative understanding task. Additionally, we perform an annotation-based evaluation to validate that our introduced computational notion of narrative similarity aligns with human perception. The approach can help to explore vast datasets of stories, with potential applications in recommender systems and in the computational analysis of literature.</abstract>
       <url hash="8552c1d5">2024.emnlp-main.339</url>
@@ -4893,7 +4893,7 @@
       <title>Model Internals-based Answer Attribution for Trustworthy Retrieval-Augmented Generation</title>
       <author><first>Jirui</first><last>Qi</last></author>
       <author><first>Gabriele</first><last>Sarti</last><affiliation>University of Groningen</affiliation></author>
-      <author><first>Raquel</first><last>Fernández</last><affiliation>University of Amsterdam and University of Amsterdam</affiliation></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last><affiliation>University of Amsterdam and University of Amsterdam</affiliation></author>
       <author><first>Arianna</first><last>Bisazza</last><affiliation>University of Groningen</affiliation></author>
       <pages>6037-6053</pages>
       <abstract>Ensuring the verifiability of model answers is a fundamental challenge for retrieval-augmented generation (RAG) in the question answering (QA) domain. Recently, self-citation prompting was proposed to make large language models (LLMs) generate citations to supporting documents along with their answers. However, self-citing LLMs often struggle to match the required format, refer to non-existent sources, and fail to faithfully reflect LLMs’ context usage throughout the generation. In this work, we present MIRAGE – Model Internals-based RAG Explanations – a plug-and-play approach using model internals for faithful answer attribution in RAG applications. MIRAGE detects context-sensitive answer tokens and pairs them with retrieved documents contributing to their prediction via saliency methods. We evaluate our proposed approach on a multilingual extractive QA dataset, finding high agreement with human answer attribution. On open-ended QA, MIRAGE achieves citation quality and efficiency comparable to self-citation while also allowing for a finer-grained control of attribution parameters. Our qualitative evaluation highlights the faithfulness of MIRAGE’s attributions and underscores the promising application of model internals for RAG answer attribution. Code and data released at https://github.com/Betswish/MIRAGE.</abstract>
@@ -5078,7 +5078,7 @@
       <author><first>Garrett</first><last>Tanzer</last><affiliation>Google</affiliation></author>
       <author><first>Maximus</first><last>Shengelia</last></author>
       <author><first>Ken</first><last>Harrenstien</last><affiliation>Google</affiliation></author>
-      <author><first>David</first><last>Uthus</last><affiliation>Google</affiliation></author>
+      <author id="david-c-uthus"><first>David</first><last>Uthus</last><affiliation>Google</affiliation></author>
       <pages>6262-6287</pages>
       <abstract>Historically, sign language machine translation has been posed as a sentence-level task: datasets consisting of continuous narratives are chopped up and presented to the model as isolated clips. In this work, we explore the limitations of this task framing. First, we survey a number of linguistic phenomena in sign languages that depend on discourse-level context. Then as a case study, we perform the first human baseline for sign language translation that actually substitutes a human into the machine learning task framing, rather than provide the human with the entire document as context. This human baseline—for ASL to English translation on the How2Sign dataset—shows that for 33% of sentences in our sample, our fluent Deaf signer annotators were only able to understand key parts of the clip in light of additional discourse-level context. These results underscore the importance of understanding and sanity checking examples when adapting machine learning to new domains.</abstract>
       <url hash="9fc3f4d3">2024.emnlp-main.360</url>
@@ -5090,7 +5090,7 @@
       <author><first>Sreyan</first><last>Ghosh</last></author>
       <author><first>Sonal</first><last>Kumar</last></author>
       <author><first>Ashish</first><last>Seth</last></author>
-      <author><first>Chandra Kiran Reddy</first><last>Evuru</last></author>
+      <author id="chandra-kiran-reddy-evuru"><first>Chandra Kiran Reddy</first><last>Evuru</last></author>
       <author><first>Utkarsh</first><last>Tyagi</last></author>
       <author><first>S</first><last>Sakshi</last></author>
       <author><first>Oriol</first><last>Nieto</last><affiliation>Adobe Systems</affiliation></author>
@@ -5204,7 +5204,7 @@
       <author><first>Jize</first><last>Jiang</last><affiliation>Department of Computer Science</affiliation></author>
       <author><first>Haozhen</first><last>Zheng</last><affiliation>Department of Computer Science</affiliation></author>
       <author><first>Beitong</first><last>Tian</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
-      <author><first>ChengXiang</first><last>Zhai</last><affiliation>University of Illinois, Urbana Champaign</affiliation></author>
+      <author id="chengxiang-zhai"><first>ChengXiang</first><last>Zhai</last><affiliation>University of Illinois, Urbana Champaign</affiliation></author>
       <author><first>Klara</first><last>Nahrstedt</last></author>
       <author><first>Zhiting</first><last>Hu</last><affiliation>University of California, San Diego and Amazon</affiliation></author>
       <pages>6432-6441</pages>
@@ -5316,7 +5316,7 @@
       <author><first>Junjie</first><last>Chu</last><affiliation>CISPA Helmholtz Center for Information Security</affiliation></author>
       <author><first>Zeyang</first><last>Sha</last><affiliation>CISPA, saarland university, saarland informatics campus</affiliation></author>
       <author><first>Michael</first><last>Backes</last><affiliation>CISPA Helmholtz Center for Information Security</affiliation></author>
-      <author id="yang-zhang"><first>Yang</first><last>Zhang</last><affiliation>CISPA Helmholtz Center for Information Security</affiliation></author>
+      <author><first>Yang</first><last>Zhang</last><affiliation>CISPA Helmholtz Center for Information Security</affiliation></author>
       <pages>6584-6600</pages>
       <abstract>Significant advancements have recently been made in large language models, represented by GPT models.Users frequently have multi-round private conversations with cloud-hosted GPT models for task optimization.Yet, this operational paradigm introduces additional attack surfaces, particularly in custom GPTs and hijacked chat sessions.In this paper, we introduce a straightforward yet potent Conversation Reconstruction Attack.This attack targets the contents of previous conversations between GPT models and benign users, i.e., the benign users’ input contents during their interaction with GPT models.The adversary could induce GPT models to leak such contents by querying them with designed malicious prompts.Our comprehensive examination of privacy risks during the interactions with GPT models under this attack reveals GPT-4’s considerable resilience.We present two advanced attacks targeting improved reconstruction of past conversations, demonstrating significant privacy leakage across all models under these advanced techniques.Evaluating various defense mechanisms, we find them ineffective against these attacks.Our findings highlight the ease with which privacy can be compromised in interactions with GPT models, urging the community to safeguard against potential abuses of these models’ capabilities.</abstract>
       <url hash="c2307f49">2024.emnlp-main.377</url>
@@ -5368,7 +5368,7 @@
       <author><first>Gael</first><last>Gendron</last></author>
       <author><first>Bao Trung</first><last>Nguyen</last><affiliation>University of Auckland</affiliation></author>
       <author><first>Alex Yuxuan</first><last>Peng</last></author>
-      <author><first>Michael</first><last>Witbrock</last><affiliation>University of Auckland</affiliation></author>
+      <author id="michael-j-witbrock"><first>Michael</first><last>Witbrock</last><affiliation>University of Auckland</affiliation></author>
       <author><first>Gillian</first><last>Dobbie</last><affiliation>University of Auckland</affiliation></author>
       <pages>6678-6701</pages>
       <abstract>Despite impressive performance on language modelling and complex reasoning tasks, Large Language Models (LLMs) fall short on the same tasks in uncommon settings or with distribution shifts, exhibiting a lack of generalisation ability. By contrast, systems such as causal models, that learn abstract variables and causal relationships, can demonstrate increased robustness against changes in the distribution. One reason for this success is the existence and use of Independent Causal Mechanisms (ICMs) representing high-level concepts that only sparsely interact. In this work, we apply two concepts from causality to learn ICMs within LLMs. We develop a new LLM architecture composed of multiple sparsely interacting language modelling modules. We show that such causal constraints can improve out-of-distribution performance on abstract and causal reasoning tasks. We also investigate the level of independence and domain specialisation and show that LLMs rely on pre-trained partially domain-invariant mechanisms resilient to fine-tuning.</abstract>
@@ -5536,7 +5536,7 @@
     <paper id="394">
       <title><fixed-case>T</fixed-case>ime<fixed-case>R</fixed-case><tex-math>^4</tex-math> : Time-aware Retrieval-Augmented Large Language Models for Temporal Knowledge Graph Question Answering</title>
       <author><first>Xinying</first><last>Qian</last><affiliation>Nankai University</affiliation></author>
-      <author><first>Ying</first><last>Zhang</last><affiliation>Nankai University</affiliation></author>
+      <author id="ying-zhang"><first>Ying</first><last>Zhang</last><affiliation>Nankai University</affiliation></author>
       <author><first>Yu</first><last>Zhao</last><affiliation>Nankai University</affiliation></author>
       <author><first>Baohang</first><last>Zhou</last><affiliation>Nankai University</affiliation></author>
       <author><first>Xuhui</first><last>Sui</last></author>
@@ -5638,8 +5638,8 @@
       <author><first>Zhiyuan</first><last>Hu</last></author>
       <author><first>Xiaobao</first><last>Wu</last></author>
       <author><first>Cong-Duy T</first><last>Nguyen</last><affiliation>School of Computer Science and Engineering, Nanyang Technological University</affiliation></author>
-      <author><first>See-Kiong</first><last>Ng</last><affiliation>National University of Singapore</affiliation></author>
-      <author><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
+      <author id="see-kiong-ng"><first>See-Kiong</first><last>Ng</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
       <pages>7049-7066</pages>
       <abstract>Seeking answers effectively for long videos is essential to build video question answering (videoQA) systems. Previous methods adaptively select frames and regions from long videos to save computations. However, this fails to reason over the whole sequence of video, leading to sub-optimal performance. To address this problem, we introduce a state space layer (SSL) into multi-modal Transformer to efficiently integrate global semantics of the video, which mitigates the video information loss caused by frame and region selection modules. Our SSL includes a gating unit to enable controllability over the flow of global semantics into visual representations. To further enhance the controllability, we introduce a cross-modal compositional congruence objective to encourage global semantics aligned with the question. To rigorously evaluate long-form videoQA capacity, we construct two new benchmarks Ego-QA and MAD-QA featuring videos of considerably long length, i.e. 17.5 minutes and 1.9 hours, respectively. Extensive experiments demonstrate the superiority of our framework on these new as well as existing datasets.</abstract>
       <url hash="d3c773d3">2024.emnlp-main.400</url>
@@ -5695,7 +5695,7 @@
     <paper id="404">
       <title>Liar, Liar, Logical Mire: A Benchmark for Suppositional Reasoning in Large Language Models</title>
       <author><first>Philipp</first><last>Mondorf</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
-      <author><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München and IT University of Copenhagen</affiliation></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München and IT University of Copenhagen</affiliation></author>
       <pages>7114-7137</pages>
       <url hash="a6798d6f">2024.emnlp-main.404</url>
       <attachment type="software" hash="2d3da2c6">2024.emnlp-main.404.software.zip</attachment>
@@ -5722,7 +5722,7 @@
       <author><first>Harsh</first><last>Jhamtani</last><affiliation>Microsoft</affiliation></author>
       <author><first>Patrick</first><last>Xia</last><affiliation>Microsoft</affiliation></author>
       <author><first>Richard</first><last>Shin</last><affiliation>Google</affiliation></author>
-      <author><first>Jason</first><last>Eisner</last><affiliation>Microsoft and Johns Hopkins University</affiliation></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last><affiliation>Microsoft and Johns Hopkins University</affiliation></author>
       <author><first>Benjamin</first><last>Van Durme</last><affiliation>Microsoft and Johns Hopkins University</affiliation></author>
       <pages>7156-7168</pages>
       <abstract>We introduce iterative retrieval, a novel framework that empowers retrievers to make iterative decisions through policy optimization. Finding an optimal portfolio of retrieved items is a combinatorial optimization problem, generally considered NP-hard. This approach provides a learned approximation to such a solution, meeting specific task requirements under a given family of large language models (LLMs). We propose a training procedure based on reinforcement learning, incorporating feedback from LLMs. We instantiate an iterative retriever for composing in-context learning (ICL) exemplars and apply it to various semantic parsing tasks that demand synthesized programs as outputs. By adding only 4M additional parameters for state encoding, we convert an off-the-shelf dense retriever into a stateful iterative retriever, outperforming previous methods in selecting ICL exemplars on semantic parsing datasets such as CalFlow, TreeDST, and MTOP. Additionally, the trained iterative retriever generalizes across different inference LLMs beyond the one used during training.</abstract>
@@ -5783,7 +5783,7 @@
       <author><first>Zhiyu</first><last>Cao</last></author>
       <author><first>Peifeng</first><last>Li</last><affiliation>Soochow University, China</affiliation></author>
       <author><first>Yaxin</first><last>Fan</last></author>
-      <author><first>Qiaoming</first><last>Zhu</last><affiliation>Soochow University</affiliation></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last><affiliation>Soochow University</affiliation></author>
       <pages>7225-7238</pages>
       <abstract>Although existing fashionable generation methods on Incomplete Utterance Rewriting (IUR) can generate coherent utterances, they often result in the inclusion of irrelevant and redundant tokens in rewritten utterances due to their inability to focus on critical tokens in dialogue context. Furthermore, the limited size of the training datasets also contributes to the insufficient training of the IUR model. To address the first issue, we propose a multi-task learning framework EO-IUR (Editing Operation-guided Incomplete Utterance Rewriting) that introduces the editing operation labels generated by sequence labeling module to guide generation model to focus on critical tokens. Furthermore, we introduce a token-level heterogeneous graph to represent dialogues. To address the second issue, we propose a two-dimensional utterance augmentation strategy, namely editing operation-based incomplete utterance augmentation and LLM-based historical utterance augmentation. The experimental results on three datasets demonstrate that our EO-IUR outperforms previous state-of-the-art (SOTA) baselines in both open-domain and task-oriented dialogue.</abstract>
       <url hash="765667e0">2024.emnlp-main.410</url>
@@ -5860,7 +5860,7 @@
       <author><first>Hongyu</first><last>Ren</last></author>
       <author><first>Zhen</first><last>Dong</last><affiliation>Nexusflow.ai Inc</affiliation></author>
       <author><first>Kurt</first><last>Keutzer</last><affiliation>University of California Berkeley</affiliation></author>
-      <author><first>See-Kiong</first><last>Ng</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="see-kiong-ng"><first>See-Kiong</first><last>Ng</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Jiashi</first><last>Feng</last><affiliation>ByteDance</affiliation></author>
       <pages>7315-7332</pages>
       <abstract>Large Language Models (LLMs) have significantly advanced natural language processing, demonstrating exceptional reasoning, tool usage, and memory capabilities. As their applications expand into multi-agent environments, there arises a need for a comprehensive evaluation framework that captures LLMs’ reasoning, planning, collaboration, and other social abilities. This work introduces a novel competition-based benchmark framework specifically designed to assess LLMs within multi-agent settings, providing quantitative metrics to evaluate their judgment, reasoning, deception, self-awareness, cooperation, coordination, and rationality.We utilize two social deduction games alongside three game-theory scenarios to create diverse environments.Our frame is fortified with the probabilistic graphic modeling (PGM) method, enhancing the LLMs’ capabilities in navigating complex social and cognitive dimensions. We evaluate seven LLMs, quantitatively highlighting a significant capability gap of over threefold between the strongest, GPT o1, and the weakest, Llama-2-70B. It also confirms that our PGM enhancement boosts the abilities of all selected models by an average of 37%. Our data and code can be found here https://github.com/cathyxl/MAgIC.</abstract>
@@ -5933,7 +5933,7 @@
     <paper id="421">
       <title>Lexically Grounded Subword Segmentation</title>
       <author><first>Jindřich</first><last>Libovický</last><affiliation>Charles University Prague</affiliation></author>
-      <author><first>Jindřich</first><last>Helcl</last><affiliation>Charles University</affiliation></author>
+      <author id="jindrich-helcl"><first>Jindřich</first><last>Helcl</last><affiliation>Charles University</affiliation></author>
       <pages>7403-7420</pages>
       <abstract>We present three innovations in tokenization and subword segmentation. First, we propose to use unsupervised morphological analysis with Morfessor as pre-tokenization. Second, we present an algebraic method for obtaining subword embeddings grounded in a word embedding space. Based on that, we design a novel subword segmentation algorithm that uses the embeddings, ensuring that the procedure considers lexical meaning. Third, we introduce an efficient segmentation algorithm based on a subword bigram model that can be initialized with the lexically aware segmentation method to avoid using Morfessor and large embedding tables at inference time. We evaluate the proposed approaches using two intrinsic metrics and measure their performance on two downstream tasks: part-of-speech tagging and machine translation. Our experiments show significant improvements in the morphological plausibility of the segmentation when evaluated using segmentation precision on morpheme boundaries and improved Rényi efficiency in 8 languages. Although the proposed tokenization methods do not have a large impact on automatic translation quality, we observe consistent performance gains in the arguably more morphological task of part-of-speech tagging.</abstract>
       <url hash="58076098">2024.emnlp-main.421</url>
@@ -5944,7 +5944,7 @@
       <title><fixed-case>EAGLE</fixed-case>-2: Faster Inference of Language Models with Dynamic Draft Trees</title>
       <author><first>Yuhui</first><last>Li</last></author>
       <author><first>Fangyun</first><last>Wei</last></author>
-      <author><first>Chao</first><last>Zhang</last><affiliation>Peking University</affiliation></author>
+      <author id="chao-zhang-tu"><first>Chao</first><last>Zhang</last><affiliation>Peking University</affiliation></author>
       <author><first>Hongyang</first><last>Zhang</last><affiliation>School of Computer Science, University of Waterloo</affiliation></author>
       <pages>7421-7432</pages>
       <abstract>Inference with modern Large Language Models (LLMs) is expensive and time-consuming, and speculative sampling has proven to be an effective solution. Most speculative sampling methods such as EAGLE use a static draft tree, implicitly assuming that the acceptance rate of draft tokens depends only on their position. Interestingly, we found that the acceptance rate of draft tokens is also context-dependent. In this paper, building upon EAGLE, we propose EAGLE-2, which introduces a new technique of context-aware dynamic draft tree into drafting modeling. This improvement leverages the fact that the draft model of EAGLE is well-calibrated: the confidence scores from the draft model approximate acceptance rates with small errors. We conducted extensive evaluations on three series of LLMs and six tasks, with EAGLE-2 achieving speedup ratios of up to **5x**, which is 1.3x that of EAGLE. EAGLE-2 also ensures that the distribution of the generated text remains unchanged, making it a **lossless** acceleration algorithm.</abstract>
@@ -5958,7 +5958,7 @@
       <author><first>Hy</first><last>Nguyen</last><affiliation>University of Sydney, University of Sydney</affiliation></author>
       <author><first>Xuefei</first><last>He</last><affiliation>University of Hong Kong</affiliation></author>
       <author><first>Andrew</first><last>Reeson</last><affiliation>CSIRO</affiliation></author>
-      <author><first>Cecile</first><last>Paris</last><affiliation>CSIRO</affiliation></author>
+      <author id="cecile-paris"><first>Cecile</first><last>Paris</last><affiliation>CSIRO</affiliation></author>
       <author><first>Josiah</first><last>Poon</last><affiliation>University of Sydney</affiliation></author>
       <author><first>Jonathan K.</first><last>Kummerfeld</last><affiliation>University of Sydney</affiliation></author>
       <pages>7433-7441</pages>
@@ -6187,7 +6187,7 @@
       <title><fixed-case>DAMRO</fixed-case>: Dive into the Attention Mechanism of <fixed-case>LVLM</fixed-case> to Reduce Object Hallucination</title>
       <author><first>Xuan</first><last>Gong</last></author>
       <author><first>Tianshi</first><last>Ming</last></author>
-      <author id="xinpeng-wang"><first>Xinpeng</first><last>Wang</last></author>
+      <author><first>Xinpeng</first><last>Wang</last></author>
       <author><first>Zhihua</first><last>Wei</last><affiliation>Tongji University</affiliation></author>
       <pages>7696-7712</pages>
       <abstract>Despite the great success of Large Vision-Language Models (LVLMs), they inevitably suffer from hallucination. As we know, both the visual encoder and the Large Language Model (LLM) decoder in LVLMs are Transformer-based, allowing the model to extract visual information and generate text outputs via attention mechanisms. We find that the attention distribution of LLM decoder on image tokens is highly consistent with the visual encoder and both distributions tend to focus on particular background tokens rather than the referred objects in the image. We attribute to the unexpected attention distribution to an inherent flaw in the visual encoder itself, which misguides LLMs to over emphasize the redundant information and generate object hallucination. To address the issue, we propose DAMRO, a novel training-free strategy that **D**ive into **A**ttention **M**echanism of LVLM to **R**educe **O**bject Hallucination. Specifically, our approach employs classification token (CLS) of ViT to filter out high-attention tokens scattered in the background and then eliminate their influence during decoding stage. We evaluate our method on LVLMs including LLaVA-1.5, LLaVA-NeXT and InstructBLIP, using various benchmarks such as POPE, CHAIR, MME and GPT-4V Aided Evaluation. The results demonstrate that our approach significantly reduces the impact of these outlier tokens, thus effectively alleviating the hallucination of LVLMs.</abstract>
@@ -6320,7 +6320,7 @@
     <paper id="448">
       <title>“Image, Tell me your story!” Predicting the original meta-context of visual misinformation</title>
       <author><first>Jonathan</first><last>Tonglet</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last><affiliation>KU Leuven, KU Leuven</affiliation></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last><affiliation>KU Leuven, KU Leuven</affiliation></author>
       <author><first>Iryna</first><last>Gurevych</last><affiliation>Institute for Computer Science, Artificial Intelligence and Technology, Mohamed bin Zayed University of Artificial Intelligence and Technische Universität Darmstadt</affiliation></author>
       <pages>7845-7864</pages>
       <abstract>To assist human fact-checkers, researchers have developed automated approaches for visual misinformation detection. These methods assign veracity scores by identifying inconsistencies between the image and its caption, or by detecting forgeries in the image. However, they neglect a crucial point of the human fact-checking process: identifying the original meta-context of the image. By explaining what is actually true about the image, fact-checkers can better detect misinformation, focus their efforts on check-worthy visual content, engage in counter-messaging before misinformation spreads widely, and make their explanation more convincing. Here, we fill this gap by introducing the task of automated image contextualization. We create 5Pils, a dataset of 1,676 fact-checked images with question-answer pairs about their original meta-context. Annotations are based on the 5 Pillars fact-checking framework. We implement a first baseline that grounds the image in its original meta-context using the content of the image and textual evidence retrieved from the open web. Our experiments show promising results while highlighting several open challenges in retrieval and reasoning.</abstract>
@@ -6457,7 +6457,7 @@
       <author><first>Shimao</first><last>Zhang</last></author>
       <author><first>Changjiang</first><last>Gao</last><affiliation>nanjing university</affiliation></author>
       <author><first>Wenhao</first><last>Zhu</last><affiliation>Nanjing University</affiliation></author>
-      <author><first>Jiajun</first><last>Chen</last><affiliation>Nanjing University</affiliation></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last><affiliation>Nanjing University</affiliation></author>
       <author><first>Xin</first><last>Huang</last><affiliation>China Mobile Communications Company Limited Research Institute</affiliation></author>
       <author><first>Xue</first><last>Han</last></author>
       <author><first>Junlan</first><last>Feng</last></author>
@@ -6538,7 +6538,7 @@
       <author><first>Richard</first><last>Shin</last><affiliation>Google</affiliation></author>
       <author><first>Patrick</first><last>Xia</last><affiliation>Microsoft</affiliation></author>
       <author><first>Benjamin</first><last>Van Durme</last><affiliation>Microsoft and Johns Hopkins University</affiliation></author>
-      <author><first>Jason</first><last>Eisner</last><affiliation>Microsoft and Johns Hopkins University</affiliation></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last><affiliation>Microsoft and Johns Hopkins University</affiliation></author>
       <author><first>Jacob</first><last>Andreas</last><affiliation>Massachusetts Institute of Technology and Microsoft</affiliation></author>
       <pages>8101-8112</pages>
       <abstract>Tools for translating natural language into code promise natural, open-ended interaction with databases, web APIs, and other software systems. However, this promise is complicated by the diversity and continual development of these systems, each with its own interface and distinct set of features. Building a new language-to-code translator, even starting with a large language model (LM), typically requires annotating a large set of natural language commands with their associated programs. In this paper, we describe ICIP (In-Context Inverse Programming), a method for bootstrapping a language-to-code system using mostly (or entirely) unlabeled programs written using a potentially unfamiliar (but human-readable) library or API. ICIP uses a pre-trained LM to assign candidate natural language descriptions to these programs, then iteratively refines the descriptions to ensure global consistency. Across nine different application domains from the Overnight and Spider benchmarks and text-davinci-003 and CodeLlama-7b-Instruct models, ICIP outperforms a number of prompting baselines. Indeed, in a “nearly unsupervised” setting with only a single annotated program and 100 unlabeled examples, it achieves up to 85% of the performance of a fully supervised system.</abstract>
@@ -6593,7 +6593,7 @@
       <title><fixed-case>E</fixed-case>mo<fixed-case>K</fixed-case>nob: Enhance Voice Cloning with Fine-Grained Emotion Control</title>
       <author><first>Haozhe</first><last>Chen</last></author>
       <author><first>Run</first><last>Chen</last><affiliation>Columbia University</affiliation></author>
-      <author><first>Julia</first><last>Hirschberg</last><affiliation>Columbia University</affiliation></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last><affiliation>Columbia University</affiliation></author>
       <pages>8170-8180</pages>
       <abstract>While recent advances in Text-to-Speech (TTS) technology produce natural and expressive speech, they lack the option for users to select emotion and control intensity. We propose EmoKnob, a framework that allows fine-grained emotion control in speech synthesis with few-shot demonstrative samples of arbitrary emotion. Our framework leverages the expressive speaker representation space made possible by recent advances in foundation voice cloning models. Based on the few-shot capability of our emotion control framework, we propose two methods to apply emotion control on emotions described by open-ended text, enabling an intuitive interface for controlling a diverse array of nuanced emotions. To facilitate a more systematic emotional speech synthesis field, we introduce a set of evaluation metrics designed to rigorously assess the faithfulness and recognizability of emotion control frameworks. Through objective and subjective evaluations, we show that our emotion control framework effectively embeds emotions into speech and surpasses emotion expressiveness of commercial TTS services.</abstract>
       <url hash="feaf2af3">2024.emnlp-main.466</url>
@@ -6716,7 +6716,7 @@
       <author><first>Wenxuan</first><last>Zhou</last><affiliation>Zoom</affiliation></author>
       <author><first>Ravi</first><last>Agrawal</last><affiliation>Zoom Video Communications</affiliation></author>
       <author><first>Shujian</first><last>Zhang</last><affiliation>University of Texas, Austin</affiliation></author>
-      <author><first>Sathish Reddy</first><last>Indurthi</last><affiliation>Zoom Video Communications</affiliation></author>
+      <author id="sathish-reddy-indurthi"><first>Sathish Reddy</first><last>Indurthi</last><affiliation>Zoom Video Communications</affiliation></author>
       <author><first>Sanqiang</first><last>Zhao</last><affiliation>Zoom</affiliation></author>
       <author><first>Kaiqiang</first><last>Song</last><affiliation>Tencent AI Lab</affiliation></author>
       <author><first>Silei</first><last>Xu</last><affiliation>Zoom</affiliation></author>
@@ -6933,9 +6933,9 @@
       <author><first>Revanth</first><last>Gangi Reddy</last></author>
       <author><first>JaeHyeok</first><last>Doo</last><affiliation>AI@UIUC</affiliation></author>
       <author><first>Yifei</first><last>Xu</last><affiliation>Lapis Lab</affiliation></author>
-      <author><first>Md Arafat</first><last>Sultan</last><affiliation>International Business Machines</affiliation></author>
+      <author id="md-arafat-sultan"><first>Md Arafat</first><last>Sultan</last><affiliation>International Business Machines</affiliation></author>
       <author><first>Deevya</first><last>Swain</last></author>
-      <author><first>Avirup</first><last>Sil</last><affiliation>International Business Machines</affiliation></author>
+      <author id="avirup-sil"><first>Avirup</first><last>Sil</last><affiliation>International Business Machines</affiliation></author>
       <author><first>Heng</first><last>Ji</last><affiliation>University of Illinois, Urbana-Champaign</affiliation></author>
       <pages>8642-8652</pages>
       <abstract>Large Language Models (LLMs) have significantly advanced the field of information retrieval, particularly for reranking. Listwise LLM rerankers have showcased superior performance and generalizability compared to existing supervised approaches. However, conventional listwise LLM reranking methods lack efficiency as they provide ranking output in the form of a generated ordered sequence of candidate passage identifiers. Further, they are trained with the typical language modeling objective, which treats all ranking errors uniformly–potentially at the cost of misranking highly relevant passages. Addressing these limitations, we introduce FIRST, a novel listwise LLM reranking approach leveraging the output logits of the first generated identifier to directly obtain a ranked ordering of the candidates. Further, we incorporate a learning-to-rank loss during training, prioritizing ranking accuracy for the more relevant passages. Empirical results demonstrate that FIRST accelerates inference by 50% while maintaining a robust ranking performance with gains across the BEIR benchmark. Finally, to illustrate the practical effectiveness of listwise LLM rerankers, we investigate their application in providing relevance feedback for retrievers during inference. Our results show that LLM rerankers can provide a stronger distillation signal compared to cross-encoders, yielding substantial improvements in retriever recall after relevance feedback.</abstract>
@@ -6988,7 +6988,7 @@
     <paper id="495">
       <title>Revisiting Who’s Harry Potter: Towards Targeted Unlearning from a Causal Intervention Perspective</title>
       <author><first>Yujian</first><last>Liu</last><affiliation>University of California, Santa Barbara</affiliation></author>
-      <author id="yang-zhang"><first>Yang</first><last>Zhang</last></author>
+      <author><first>Yang</first><last>Zhang</last></author>
       <author><first>Tommi</first><last>Jaakkola</last><affiliation>Massachusetts Institute of Technology and Massachusetts Institute of Technology</affiliation></author>
       <author><first>Shiyu</first><last>Chang</last><affiliation>UC Santa Barbara</affiliation></author>
       <pages>8708-8731</pages>
@@ -7157,7 +7157,7 @@
       <author><first>Shrestha</first><last>Mohanty</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <author><first>Cassandra</first><last>Overney</last></author>
       <author><first>Elinor</first><last>Poole-Dayan</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
-      <author><first>Deb</first><last>Roy</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
+      <author id="deb-roy"><first>Deb</first><last>Roy</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <author><first>Jad</first><last>Kabbara</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <pages>9004-9018</pages>
       <abstract>Language model alignment research often attempts to ensure that models are not only helpful and harmless, but also truthful and unbiased. However, optimizing these objectives simultaneously can obscure how improving one aspect might impact the others. In this work, we focus on analyzing the relationship between two concepts essential in both language model alignment and political science: truthfulness and political bias. We train reward models on various popular truthfulness datasets and subsequently evaluate their political bias. Our findings reveal that optimizing reward models for truthfulness on these datasets tends to result in a left-leaning political bias. We also find that existing open-source reward models (i.e., those trained on standard human preference datasets) already show a similar bias and that the bias is larger for larger models. These results raise important questions about the datasets used to represent truthfulness, potential limitations of aligning models to be both truthful and politically unbiased, and what language models capture about the relationship between truth and politics.</abstract>
@@ -7297,8 +7297,8 @@
       <author><first>Yue</first><last>Guo</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
       <author><first>Tal</first><last>August</last></author>
       <author><first>Gondy</first><last>Leroy</last><affiliation>University of Arizona</affiliation></author>
-      <author><first>Trevor</first><last>Cohen</last><affiliation>University of Washington</affiliation></author>
-      <author><first>Lucy Lu</first><last>Wang</last><affiliation>University of Washington and Allen Institute for Artificial Intelligence</affiliation></author>
+      <author id="trevor-cohen"><first>Trevor</first><last>Cohen</last><affiliation>University of Washington</affiliation></author>
+      <author id="lucy-lu-wang"><first>Lucy Lu</first><last>Wang</last><affiliation>University of Washington and Allen Institute for Artificial Intelligence</affiliation></author>
       <pages>9194-9211</pages>
       <abstract>While there has been significant development of models for Plain Language Summarization (PLS), evaluation remains a challenge. PLS lacks a dedicated assessment metric, and the suitability of text generation evaluation metrics is unclear due to the unique transformations involved (e.g., adding background explanations, removing jargon). To address these questions, our study introduces a granular meta-evaluation testbed, APPLS, designed to evaluate metrics for PLS. We identify four PLS criteria from previous work—informativeness, simplification, coherence, and faithfulness—and define a set of perturbations corresponding to these criteria that sensitive metrics should be able to detect. We apply these perturbations to extractive hypotheses for two PLS datasets to form our testbed. Using APPLS, we assess performance of 14 metrics, including automated scores, lexical features, and LLM prompt-based evaluations. Our analysis reveals that while some current metrics show sensitivity to specific criteria, no single method captures all four criteria simultaneously. We therefore recommend a suite of automated metrics be used to capture PLS quality along all relevant criteria. This work contributes the first meta-evaluation testbed for PLS and a comprehensive evaluation of existing metrics.</abstract>
       <url hash="8e7c6244">2024.emnlp-main.519</url>
@@ -7461,10 +7461,10 @@
       <author><first>Orion</first><last>Weller</last></author>
       <author><first>Shreya</first><last>Sharma</last></author>
       <author><first>Dongwei</first><last>Jiang</last></author>
-      <author><first>Zhengping</first><last>Jiang</last><affiliation>Johns Hopkins University</affiliation></author>
-      <author><first>Bhavana</first><last>Dalvi Mishra</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
+      <author id="zheng-ping-jiang"><first>Zhengping</first><last>Jiang</last><affiliation>Johns Hopkins University</affiliation></author>
+      <author id="bhavana-dalvi"><first>Bhavana</first><last>Dalvi Mishra</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Oyvind</first><last>Tafjord</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
-      <author><first>Peter</first><last>Jansen</last><affiliation>University of Arizona</affiliation></author>
+      <author id="peter-jansen"><first>Peter</first><last>Jansen</last><affiliation>University of Arizona</affiliation></author>
       <author><first>Peter</first><last>Clark</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Benjamin</first><last>Van Durme</last><affiliation>Microsoft and Johns Hopkins University</affiliation></author>
       <pages>9458-9482</pages>
@@ -7586,7 +7586,7 @@
     </paper>
     <paper id="540">
       <title>Modeling Layout Reading Order as Ordering Relations for Visually-rich Document Understanding</title>
-      <author id="chong-zhang"><first>Chong</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
+      <author><first>Chong</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Yi</first><last>Tu</last></author>
       <author><first>Yixi</first><last>Zhao</last></author>
       <author><first>Chenshu</first><last>Yuan</last><affiliation>Nankai University</affiliation></author>
@@ -7657,7 +7657,7 @@
       <title>Evaluating the Effectiveness of Large Language Models in Establishing Conversational Grounding</title>
       <author><first>Biswesh</first><last>Mohapatra</last><affiliation>INRIA</affiliation></author>
       <author><first>Manav Nitin</first><last>Kapadnis</last></author>
-      <author><first>Laurent</first><last>Romary</last><affiliation>INRIA</affiliation></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last><affiliation>INRIA</affiliation></author>
       <author><first>Justine</first><last>Cassell</last><affiliation>INRIA and Carnegie Mellon University</affiliation></author>
       <pages>9767-9781</pages>
       <abstract>Conversational grounding, vital for building dependable dialog systems, involves ensuring a mutual understanding of shared information. Despite its importance, there has been limited research on this aspect of conversation in recent years, especially after the advent of Large Language Models (LLMs). Previous studies have highlighted the shortcomings of pre-trained language models in conversational grounding. However, most testing for conversational grounding capabilities involves human evaluations that are costly and time-consuming. This has led to a lack of testing across multiple models of varying sizes, a critical need given the rapid rate of new model releases. This gap in research becomes more significant considering recent advances in language models, which have led to new emergent capabilities. In this paper, we aim to evaluate the performance of LLMs in various aspects of conversational grounding and analyze why some models perform better than others. We demonstrate a direct correlation between the size of the pre-training data and conversational grounding abilities, meaning that they have independently acquired a specific form of pragmatic capabilities from larger pre-training datasets. Finally, we propose ways to enhance the capabilities of the models that lag in this aspect.</abstract>
@@ -7707,7 +7707,7 @@
     </paper>
     <paper id="549">
       <title><fixed-case>MQ</fixed-case>uin<fixed-case>E</fixed-case>: a Cure for “<fixed-case>Z</fixed-case>-paradox” in Knowledge Graph Embedding</title>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <author><first>Huang</first><last>Fang</last></author>
       <author><first>Yunfeng</first><last>Cai</last><affiliation>Beijing Institute of Mathematical Sciences and Applications</affiliation></author>
       <author><first>Mingming</first><last>Sun</last><affiliation>Beijing Institute of Mathematical Sciences and Applications</affiliation></author>
@@ -7747,7 +7747,7 @@
     <paper id="552">
       <title>Summary of a Haystack: A Challenge to Long-Context <fixed-case>LLM</fixed-case>s and <fixed-case>RAG</fixed-case> Systems</title>
       <author><first>Philippe</first><last>Laban</last></author>
-      <author><first>Alexander</first><last>Fabbri</last><affiliation>SalesForce.com</affiliation></author>
+      <author id="alexander-richard-fabbri"><first>Alexander</first><last>Fabbri</last><affiliation>SalesForce.com</affiliation></author>
       <author><first>Caiming</first><last>Xiong</last><affiliation>Salesforce Research</affiliation></author>
       <author><first>Chien-Sheng</first><last>Wu</last><affiliation>Salesforce AI</affiliation></author>
       <pages>9885-9903</pages>
@@ -7801,7 +7801,7 @@
       <author><first>Pengfei</first><last>Wu</last></author>
       <author><first>Jianxin</first><last>Liang</last></author>
       <author><first>Dongyan</first><last>Zhao</last><affiliation>Peking University</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>Peking University</affiliation></author>
+      <author><first>Yang</first><last>Liu</last><affiliation>Peking University</affiliation></author>
       <author><first>Zilong</first><last>Zheng</last><affiliation>Beijing Institute for General Artificial Intelligence</affiliation></author>
       <pages>9972-9987</pages>
       <abstract>Despite progress in multimodal large language models (MLLMs), the challenge of interpreting long-form videos in response to linguistic queries persists, largely due to the inefficiency in temporal grounding and limited pre-trained context window size. In this work, we introduce Temporal Grounding Bridge (TGB), a novel framework that bootstraps MLLMs with advanced temporal grounding capabilities and broadens their contextual scope. Our framework significantly enhances the temporal capabilities of current MLLMs through three key innovations: an efficient multi-span temporal grounding algorithm applied to low-dimension temporal features projected from flow; a multimodal length extrapolation training paradigm that utilizes low-dimension temporal features to extend the training context window size; and a bootstrapping framework that bridges our model with pluggable MLLMs without requiring annotation. We validate TGB across seven video benchmarks and demonstrate substantial performance improvements compared with prior MLLMs. Notably, our model, initially trained on sequences of four frames, effectively handles sequences up to 16 longer without sacrificing performance, highlighting its scalability and effectiveness in real-world applications. Our code is publicly available.</abstract>
@@ -7816,7 +7816,7 @@
       <author><first>Akankshya</first><last>Mishra</last></author>
       <author><first>Griffin Thomas</first><last>Adams</last></author>
       <author><first>Lydia</first><last>Chilton</last><affiliation>Columbia University</affiliation></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>9988-10005</pages>
       <abstract>Human evaluation has been the gold standard for checking faithfulness in abstractive summarization. However, with a challenging source domain like narrative, multiple annotators can agree a summary is faithful, while missing details that are obvious errors only once pointed out. We therefore introduce a new dataset, StorySumm, comprising LLM summaries of short stories with localized faithfulness labels and error explanations. This benchmark is for evaluation methods, testing whether a given method can detect challenging inconsistencies. Using this dataset, we first show that any one human annotation protocol is likely to miss inconsistencies, and we advocate for pursuing a range of methods when establishing ground truth for a summarization dataset. We finally test recent automatic metrics and find that none of them achieve more than 70% balanced accuracy on this task, demonstrating that it is a challenging benchmark for future work in faithfulness evaluation.</abstract>
       <url hash="a87d2ef8">2024.emnlp-main.557</url>
@@ -7855,7 +7855,7 @@
       <author><first>Lin</first><last>Ai</last></author>
       <author><first>Zheng</first><last>Hui</last><affiliation>Microsoft and Columbia University</affiliation></author>
       <author><first>Zizhou</first><last>Liu</last></author>
-      <author><first>Julia</first><last>Hirschberg</last><affiliation>Columbia University</affiliation></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last><affiliation>Columbia University</affiliation></author>
       <pages>10046-10063</pages>
       <url hash="28bb5d94">2024.emnlp-main.560</url>
       <attachment type="software" hash="2f3036ad">2024.emnlp-main.560.software.zip</attachment>
@@ -7991,7 +7991,7 @@
       <title>Towards Robust Speech Representation Learning for Thousands of Languages</title>
       <author><first>William</first><last>Chen</last></author>
       <author><first>Wangyou</first><last>Zhang</last></author>
-      <author><first>Yifan</first><last>Peng</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="yifan-peng-cmu"><first>Yifan</first><last>Peng</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Xinjian</first><last>Li</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Jinchuan</first><last>Tian</last></author>
       <author><first>Jiatong</first><last>Shi</last></author>
@@ -8022,8 +8022,8 @@
       <author><first>Jiahuan</first><last>Li</last></author>
       <author><first>Shujian</first><last>Huang</last><affiliation>Nanjing University</affiliation></author>
       <author><first>Aarron</first><last>Ching</last><affiliation>Independent Researcher</affiliation></author>
-      <author><first>Xinyu</first><last>Dai</last><affiliation>Nanjing University</affiliation></author>
-      <author><first>Jiajun</first><last>Chen</last><affiliation>Nanjing University</affiliation></author>
+      <author id="xinyu-dai"><first>Xinyu</first><last>Dai</last><affiliation>Nanjing University</affiliation></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last><affiliation>Nanjing University</affiliation></author>
       <pages>10246-10257</pages>
       <abstract>Large language models demonstrate reasonable multilingual abilities, despite predominantly English-centric pretraining. However, the spontaneous multilingual alignment in these models is shown to be weak, leading to unsatisfactory cross-lingual transfer and knowledge sharing. Previous works attempt to address this issue by explicitly injecting multilingual alignment information during or after pretraining. Thus for the early stage in pretraining, the alignment is weak for sharing information or knowledge across languages. In this paper, we propose PreAlign, a framework that establishes multilingual alignment prior to language model pretraining. PreAlign injects multilingual alignment by initializing the model to generate similar representations of aligned words and preserves this alignment using a code-switching strategy during pretraining. Extensive experiments in a synthetic English to English-Clone setting demonstrate that PreAlign significantly outperforms standard multilingual joint training in language modeling, zero-shot cross-lingual transfer, and cross-lingual knowledge application. Further experiments in real-world scenarios further validate PreAlign’s effectiveness across various model sizes.</abstract>
       <url hash="9f623b1f">2024.emnlp-main.572</url>
@@ -8379,7 +8379,7 @@
     </paper>
     <paper id="598">
       <title>Demystifying Verbatim Memorization in Large Language Models</title>
-      <author id="jing-huang-stanford"><first>Jing</first><last>Huang</last><affiliation>Stanford University</affiliation></author>
+      <author><first>Jing</first><last>Huang</last><affiliation>Stanford University</affiliation></author>
       <author><first>Diyi</first><last>Yang</last><affiliation>Stanford University</affiliation></author>
       <author><first>Christopher</first><last>Potts</last><affiliation>Stanford University</affiliation></author>
       <pages>10711-10732</pages>
@@ -8401,7 +8401,7 @@
       <title>Distributional Properties of Subword Regularization</title>
       <author><first>Marco</first><last>Cognetta</last><affiliation>Tokyo Institute of Technology, Tokyo Institute of Technology and Google</affiliation></author>
       <author><first>Vilém</first><last>Zouhar</last><affiliation>Department of Computer Science, ETHZ - ETH Zurich</affiliation></author>
-      <author><first>Naoaki</first><last>Okazaki</last><affiliation>Tokyo Institute of Technology</affiliation></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last><affiliation>Tokyo Institute of Technology</affiliation></author>
       <pages>10753-10763</pages>
       <abstract>Subword regularization, used widely in NLP, improves model performance by reducing the dependency on exact tokenizations, augmenting the training corpus, and exposing the model to more unique contexts during training. BPE and MaxMatch, two popular subword tokenization schemes, have stochastic dropout regularization variants. However, there has not been an analysis of the distributions formed by them.We show that these stochastic variants are heavily biased towards a small set of tokenizations per word. If the benefits of subword regularization are as mentioned, we hypothesize that biasedness artificially limits the effectiveness of these schemes. Thus, we propose an algorithm to uniformly sample tokenizations that we use as a drop-in replacement for the stochastic aspects of existing tokenizers, and find that it improves machine translation quality.</abstract>
       <url hash="cc5f85ea">2024.emnlp-main.600</url>
@@ -8457,8 +8457,8 @@
       <author><first>Suchin</first><last>Gururangan</last><affiliation>Facebook and University of Washington, Seattle</affiliation></author>
       <author><first>Margaret</first><last>Li</last><affiliation>Meta and University of Washington</affiliation></author>
       <author><first>Hila</first><last>Gonen</last><affiliation>University of Washington</affiliation></author>
-      <author><first>Noah A.</first><last>Smith</last><affiliation>University of Washington and Allen Institute for Artificial Intelligence</affiliation></author>
-      <author><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington, Facebook and Meta</affiliation></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last><affiliation>University of Washington and Allen Institute for Artificial Intelligence</affiliation></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington, Facebook and Meta</affiliation></author>
       <pages>10822-10837</pages>
       <abstract>Despite their popularity in non-English NLP, multilingual language models often underperform monolingual ones due to inter-language competition for model parameters. We propose Cross-lingual Expert Language Models (X-ELM), which mitigate this competition by independently training language models on subsets of the multilingual corpus. This process specializes X-ELMs to different languages while remaining effective as a multilingual ensemble. Our experiments show that when given the same compute budget, X-ELM outperforms jointly trained multilingual models across all 16 considered languages and that these gains transfer to downstream tasks. X-ELM provides additional benefits over performance improvements: new experts can be iteratively added, adapting X-ELM to new languages without catastrophic forgetting. Furthermore, training is asynchronous, reducing the hardware requirements for multilingual training and democratizing multilingual modeling.</abstract>
       <url hash="053f3d51">2024.emnlp-main.604</url>
@@ -8636,7 +8636,7 @@
       <author><first>Jingtao</first><last>Cao</last></author>
       <author><first>Zhang</first><last>Zheng</last></author>
       <author><first>Hongru</first><last>Wang</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
-      <author><first>Kam-Fai</first><last>Wong</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <pages>11034-11049</pages>
       <abstract>Progress in Text-to-Image (T2I) models has significantly advanced the generation of images from textual descriptions. Existing metrics, such as CLIP, effectively measure the semantic alignment between single prompts and their corresponding images. However, they fall short in evaluating a model’s ability to generalize across a broad spectrum of textual inputs. To address this gap, we propose the VLEU (<b>V</b>isual <b>L</b>anguage <b>E</b>valuation <b>U</b>nderstudy) metric. VLEU leverages the power of Large Language Models (LLMs) to sample from the visual text domain, encompassing the entire range of potential inputs for the T2I task, to generate a wide variety of visual text. The images generated by T2I models from these prompts are then assessed for their alignment with the input text using the CLIP model. VLEU quantitatively measures a model’s generalizability by computing the Kullback-Leibler (KL) divergence between the visual text marginal distribution and the conditional distribution over the images generated by the model. This provides a comprehensive metric for comparing the overall generalizability of T2I models, beyond single-prompt evaluations, and offers valuable insights during the finetuning process. Our experimental results demonstrate VLEU’s effectiveness in evaluating the generalizability of various T2I models, positioning it as an essential metric for future research and development in image synthesis from text prompts. Our code and data will be publicly available at <url>https://github.com/mio7690/VLEU</url>.</abstract>
       <url hash="41dad8f1">2024.emnlp-main.618</url>
@@ -8677,7 +8677,7 @@
       <author><first>Daoyuan</first><last>Wu</last><affiliation>Hong Kong University of Science and Technology</affiliation></author>
       <author><first>Shuai</first><last>Wang</last><affiliation>Hong Kong University of Science and Technology</affiliation></author>
       <author><first>Cuiyun</first><last>Gao</last><affiliation>Harbin Institute of Technology</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>Nanyang Technological University</affiliation></author>
+      <author><first>Yang</first><last>Liu</last><affiliation>Nanyang Technological University</affiliation></author>
       <pages>11084-11108</pages>
       <abstract>Large language models (LLMs) have shown promise as automated evaluators for assessing the quality of answers generated by AI systems. However, LLM-based evaluators exhibit position bias, or inconsistency, when used to evaluate candidate answers in pairwise comparisons, favoring either the first or second answer regardless of content. To address this limitation, we propose PORTIA, an alignment-based system designed to mimic human comparison strategies to calibrate position bias in a lightweight yet effective manner. Specifically, PORTIA splits the answers into multiple segments, taking into account both length and semantics, and merges them back into a single prompt for evaluation by LLMs. Extensive experiments with six LLMs on 11,520 answer pairs demonstrate that PORTIA markedly enhances the consistency rates for all models and forms of comparison tested, achieving an average relative improvement of 47.46%. It also enables PORTIA-enhanced GPT-3.5 to achieve agreement rates with humans comparable to GPT-4 and elevates GPT-4’s consistency rate up to 98%. Subsequent human evaluations indicate that the PORTIA-enhanced GPT-3.5 model can even surpass standalone GPT-4 in terms of alignment with human evaluators, highlighting PORTIA’s ability to correct position bias, improve LLM consistency, and boost performance while keeping cost efficiency.</abstract>
       <url hash="0c4bf283">2024.emnlp-main.621</url>
@@ -8689,7 +8689,7 @@
     <paper id="622">
       <title>Integrating Argumentation and Hate-Speech-based Techniques for Countering Misinformation</title>
       <author><first>Sougata</first><last>Saha</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
-      <author><first>Rohini</first><last>Srihari</last><affiliation>State University of New York at Buffalo</affiliation></author>
+      <author id="rohini-k-srihari"><first>Rohini</first><last>Srihari</last><affiliation>State University of New York at Buffalo</affiliation></author>
       <pages>11109-11124</pages>
       <abstract>The proliferation of online misinformation presents a significant challenge, requiring scalable strategies for effective mitigation. While detection methods exist, current reactive approaches, like content flagging and banning, are short-term and insufficient. Additionally, advancements like large language models (LLMs) exacerbate the issue by enabling large-scale creation and dissemination of misinformation. Thus, sustainable, scalable solutions that encourage behavior change and broaden perspectives by persuading misinformants against their viewpoints or broadening their perspectives are needed. To this end, we propose persuasive LLM-based dialogue systems to tackle misinformation. However, challenges arise due to the lack of suitable datasets and formal frameworks for generating persuasive responses. Inspired by existing methods for countering online hate speech, we explore adapting counter-hate response strategies for misinformation. Since misinformation and hate speech often coexist despite differing intentions, we develop classifiers to identify and annotate response strategies from hate-speech counter-responses for use in misinformation scenarios. Human evaluations show a 91% agreement on the applicability of these strategies to misinformation. Next, as a scalable counter-misinformation solution, we create an LLM-based argument graph framework that generates persuasive responses, using the strategies as control codes to adjust the style and content. Human evaluations and case studies demonstrate that our framework generates expert-like responses and is 14% more engaging, 21% more natural, and 18% more factual than the best available alternatives.</abstract>
       <url hash="d0a9c845">2024.emnlp-main.622</url>
@@ -8728,7 +8728,7 @@
       <title>Unlocking Markets: A Multilingual Benchmark to Cross-Market Question Answering</title>
       <author><first>Yifei</first><last>Yuan</last><affiliation>Copenhagen University</affiliation></author>
       <author><first>Yang</first><last>Deng</last><affiliation>Singapore Management University</affiliation></author>
-      <author><first>Anders</first><last>Søgaard</last><affiliation>Copenhagen University</affiliation></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last><affiliation>Copenhagen University</affiliation></author>
       <author><first>Mohammad</first><last>Aliannejadi</last><affiliation>University of Amsterdam</affiliation></author>
       <pages>11154-11169</pages>
       <abstract>Users post numerous product-related questions on e-commerce platforms, affecting their purchase decisions. Product-related question answering (PQA) entails utilizing product-related resources to provide precise responses to users. We propose a novel task of Multilingual Cross-market Product-based Question Answering (MCPQA) and define the task as providing answers to product-related questions in a main marketplace by utilizing information from another resource-rich auxiliary marketplace in a multilingual context. We introduce a large-scale dataset comprising over 7 million questions from 17 marketplaces across 11 languages. We then perform automatic translation on the Electronics category of our dataset, naming it as McMarket. We focus on two subtasks: review-based answer generation and product-related question ranking. For each subtask, we label a subset of McMarket using an LLM and further evaluate the quality of the annotations via human assessment. We then conduct experiments to benchmark our dataset, using models ranging from traditional lexical models to LLMs in both single-market and cross-market scenarios across McMarket and the corresponding LLM subset. Results show that incorporating cross-market information significantly enhances performance in both tasks.</abstract>
@@ -8792,7 +8792,7 @@
       <title>Unveiling the Role of Pretraining in Direct Speech Translation</title>
       <author><first>Belen</first><last>Alastruey</last><affiliation>Facebook</affiliation></author>
       <author><first>Gerard I.</first><last>Gállego</last><affiliation>Universidad Politécnica de Cataluna</affiliation></author>
-      <author><first>Marta R.</first><last>Costa-jussà</last><affiliation>Meta</affiliation></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last><affiliation>Meta</affiliation></author>
       <pages>11259-11265</pages>
       <abstract>Direct speech-to-text translation systems encounter an important drawback in data scarcity. A common solution consists on pretraining the encoder on automatic speech recognition, hence losing efficiency in the training process. In this study, we compare the training dynamics of a system using a pretrained encoder, the conventional approach, and one trained from scratch. We observe that, throughout the training, the randomly initialized model struggles to incorporate information from the speech inputs for its predictions. Hence, we hypothesize that this issue stems from the difficulty of effectively training an encoder for direct speech translation. While a model trained from scratch needs to learn acoustic and semantic modeling simultaneously, a pretrained one can just focus on the latter. Based on these findings, we propose a subtle change in the decoder cross-attention to integrate source information from earlier steps in training. We show that with this change, the model trained from scratch can achieve comparable performance to the pretrained one, while reducing the training time.</abstract>
       <url hash="40bb39a2">2024.emnlp-main.630</url>
@@ -8906,7 +8906,7 @@
     <paper id="638">
       <title><fixed-case>TEMA</fixed-case>: Token Embeddings Mapping for Enriching Low-Resource Language Models</title>
       <author><first>Rodolfo</first><last>Zevallos</last></author>
-      <author><first>Núria</first><last>Bel</last><affiliation>Universitat Pompeu Fabra</affiliation></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last><affiliation>Universitat Pompeu Fabra</affiliation></author>
       <author><first>Mireia</first><last>Farrús</last><affiliation>Universitat de Barcelona</affiliation></author>
       <pages>11423-11435</pages>
       <abstract>The objective of the research we present is to remedy the problem of the low quality of language models for low-resource languages. We introduce an algorithm, the Token Embedding Mapping Algorithm (TEMA), that maps the token embeddings of a richly pre-trained model L1 to a poorly trained model L2, thus creating a richer L2’ model. Our experiments show that the L2’ model reduces perplexity with respect to the original monolingual model L2, and that for downstream tasks, including SuperGLUE, the results are state-of-the-art or better for the most semantic tasks. The models obtained with TEMA are also competitive or better than multilingual or extended models proposed as solutions for mitigating the low-resource language problems.</abstract>
@@ -8958,7 +8958,7 @@
       <title>Universal Vulnerabilities in Large Language Models: Backdoor Attacks for In-context Learning</title>
       <author><first>Shuai</first><last>Zhao</last></author>
       <author><first>Meihuizi</first><last>Jia</last></author>
-      <author><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Fengjun</first><last>Pan</last></author>
       <author><first>Jinming</first><last>Wen</last></author>
       <pages>11507-11522</pages>
@@ -9036,7 +9036,7 @@
       <author><first>Jianfeng</first><last>Gao</last></author>
       <author><first>Fabian</first><last>Peller-Konrad</last></author>
       <author><first>Tobias</first><last>Röddiger</last></author>
-      <author><first>Alexander</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alexander</first><last>Waibel</last></author>
       <author><first>Tamim</first><last>Asfour</last><affiliation>Karlsruhe Institute of Technology</affiliation></author>
       <author><first>Michael</first><last>Beigl</last><affiliation>Karlsruher Institut für Technologie</affiliation></author>
       <author><first>Rainer</first><last>Stiefelhagen</last><affiliation>Karlsruhe Institute of Technology</affiliation></author>
@@ -9205,7 +9205,7 @@
       <author><first>Sergei</first><last>Bogdanov</last><affiliation>NuMind</affiliation></author>
       <author><first>Alexandre</first><last>Constantin</last></author>
       <author><first>Timothée</first><last>Bernard</last><affiliation>Université Paris Cité</affiliation></author>
-      <author><first>Benoit</first><last>Crabbé</last><affiliation>Université de Paris</affiliation></author>
+      <author id="benoit-crabbe"><first>Benoit</first><last>Crabbé</last><affiliation>Université de Paris</affiliation></author>
       <author><first>Etienne P</first><last>Bernard</last></author>
       <pages>11829-11841</pages>
       <abstract>Large Language Models (LLMs) have shown impressive abilities in data annotation, opening the way for new approaches to solve classic NLP problems. In this paper, we show how to use LLMs to create NuNER, a compact language representation model specialized in the Named Entity Recognition (NER) task. NuNER can be fine-tuned to solve downstream NER problems in a data-efficient way, outperforming similar-sized foundation models in the few-shot regime and competing with much larger LLMs. We find that the size and entity-type diversity of the pre-training dataset are key to achieving good performance. We view NuNER as a member of the broader family of task-specific foundation models, recently unlocked by LLMs. NuNER and NuNER’s dataset are open-sourced with MIT License.</abstract>
@@ -9253,7 +9253,7 @@
     <paper id="664">
       <title>Rationalizing Transformer Predictions via End-To-End Differentiable Self-Training</title>
       <author><first>Marc Felix</first><last>Brinner</last><affiliation>Universität Bielefeld</affiliation></author>
-      <author><first>Sina</first><last>Zarrieß</last><affiliation>Bielefeld University</affiliation></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last><affiliation>Bielefeld University</affiliation></author>
       <pages>11894-11907</pages>
       <abstract>We propose an end-to-end differentiable training paradigm for stable training of a rationalized transformer classifier. Our approach results in a single model that simultaneously classifies a sample and scores input tokens based on their relevance to the classification. To this end, we build on the widely-used three-player-game for training rationalized models, which typically relies on training a rationale selector, a classifier and a complement classifier. We simplify this approach by making a single model fulfill all three roles, leading to a more efficient training paradigm that is not susceptible to the common training instabilities that plague existing approaches. Further, we extend this paradigm to produce class-wise rationales while incorporating recent advances in parameterizing and regularizing the resulting rationales, thus leading to substantially improved and state-of-the-art alignment with human annotations without any explicit supervision.</abstract>
       <url hash="7239f91c">2024.emnlp-main.664</url>
@@ -9335,7 +9335,7 @@
       <title><fixed-case>PANDA</fixed-case>: Persona Attributes Navigation for Detecting and Alleviating Overuse Problem in Large Language Models</title>
       <author><first>Jinsung</first><last>Kim</last></author>
       <author><first>Seonmin</first><last>Koo</last><affiliation>Korea University</affiliation></author>
-      <author><first>Heuiseok</first><last>Lim</last><affiliation>Korea University</affiliation></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last><affiliation>Korea University</affiliation></author>
       <pages>12005-12026</pages>
       <abstract>In the persona-grounded dialogue (PGD) task, it is required not only to respond fluently, but also to ground the attributes according to the current conversation topic properly. However, due to their tendency to overly ground given attributes, LLMs often generate unnatural responses provoked by using attributes that deviate from the flow of the conversation or by exploiting too many attributes at once. We term this phenomenon the *overuse* problem of LLMs. Unfortunately, research devising precise criteria and frameworks to quantitatively verify LLMs’ *overuse* problem is obviously insufficient. To address this issue, we propose **P**ersona **A**ttributes **N**avigation for **D**etecting and **A**lleviating the *overuse* problem (**PANDA**) framework. **PANDA** is the first study to quantify the persona *overuse* problem of LLMs by establishing clear standards of the problem and verifying various LLMs based on them. Moreover, this framework navigates us into understanding persona attributes by introducing diverse and detailed dialogue topics that consider practical conversation situations. We provide insights related to LLMs’ persona attribute *overuse* problem through comprehensive verification and analysis with **PANDA** in the PGD task. Our code and resources can be found at http://github.com/jin62304/PANDA.</abstract>
       <url hash="a343ab1d">2024.emnlp-main.670</url>
@@ -9360,7 +9360,7 @@
     <paper id="672">
       <title>Subword Segmentation in <fixed-case>LLM</fixed-case>s: Looking at Inflection and Consistency</title>
       <author><first>Marion Di</first><last>Marco</last><affiliation>Technische Universität München</affiliation></author>
-      <author><first>Alexander</first><last>Fraser</last><affiliation>Technical University of Munich</affiliation></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last><affiliation>Technical University of Munich</affiliation></author>
       <pages>12050-12060</pages>
       <abstract>The role of subword segmentation in relation to capturing morphological patterns in LLMs is currently not well explored. Ideally, one would train models like GPT using various segmentations and evaluate how well word meanings are captured. Since this is not computationally feasible, we group words according to their segmentation properties and compare how well a model can solve a linguistic task for these groups. We study two criteria: (i) adherence to morpheme boundaries and (ii) the segmentation consistency of the different inflected forms of a lemma. We select word forms with high and low values for these criteria and carry out experiments on GPT-4o’s ability to capture verbal inflection for 10 languages. Our results indicate that in particular the criterion of segmentation consistency can help to predict the model’s ability to recognize and generate the lemma from an inflected form, providing evidence that subword segmentation is relevant.</abstract>
       <url hash="a491ec71">2024.emnlp-main.672</url>
@@ -9372,7 +9372,7 @@
       <author><first>Omar</first><last>Sharif</last><affiliation>Dartmouth College and Chittagong University of Engineering and Techology</affiliation></author>
       <author><first>Joseph</first><last>Gatto</last><affiliation>Dartmouth College</affiliation></author>
       <author><first>Madhusudan</first><last>Basak</last><affiliation>Dartmouth College and Bangladesh University of Engineering and Technology</affiliation></author>
-      <author><first>Sarah Masud</first><last>Preum</last><affiliation>Dartmouth College</affiliation></author>
+      <author id="sarah-masud-preum"><first>Sarah Masud</first><last>Preum</last><affiliation>Dartmouth College</affiliation></author>
       <pages>12061-12081</pages>
       <abstract>Prior works formulate the extraction of event-specific arguments as a span extraction problem, where event arguments are explicit — i.e. assumed to be contiguous spans of text in a document. In this study, we revisit this definition of Event Extraction (EE) by introducing two key argument types that cannot be modeled by existing EE frameworks. First, implicit arguments are event arguments which are not explicitly mentioned in the text, but can be inferred through context. Second, scattered arguments are event arguments that are composed of information scattered throughout the text. These two argument types are crucial to elicit the full breadth of information required for proper event modeling.To support the extraction of explicit, implicit, and scattered arguments, we develop a novel dataset, DiscourseEE, which includes 7,464 argument annotations from online health discourse. Notably, 51.2% of the arguments are implicit, and 17.4% are scattered, making DiscourseEE a unique corpus for complex event extraction. Additionally, we formulate argument extraction as a text generation problem to facilitate the extraction of complex argument types. We provide a comprehensive evaluation of state-of-the-art models and highlight critical open challenges in generative event extraction. Our data and codebase are available at https://omar-sharif03.github.io/DiscourseEE.</abstract>
       <url hash="9810839c">2024.emnlp-main.673</url>
@@ -9499,7 +9499,7 @@
       <author><first>Enora</first><last>Rice</last><affiliation>University of Colorado at Boulder</affiliation></author>
       <author><first>Graham</first><last>Neubig</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Alexis</first><last>Palmer</last><affiliation>University of Colorado at Boulder</affiliation></author>
-      <author><first>Lori</first><last>Levin</last><affiliation>School of Computer Science, Carnegie Mellon University</affiliation></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last><affiliation>School of Computer Science, Carnegie Mellon University</affiliation></author>
       <pages>12267-12286</pages>
       <abstract>Language documentation projects often involve the creation of annotated text in a format such as interlinear glossed text (IGT), which captures fine-grained morphosyntactic analyses in a morpheme-by-morpheme format. However, there are few existing resources providing large amounts of standardized, easily accessible IGT data, limiting their applicability to linguistic research, and making it difficult to use such data in NLP modeling. We compile the largest existing corpus of IGT data from a variety of sources, covering over 450k examples across 1.8k languages, to enable research on crosslingual transfer and IGT generation. We normalize much of our data to follow a standard set of labels across languages.Furthermore, we explore the task of automatically generating IGT in order to aid documentation projects. As many languages lack sufficient monolingual data, we pretrain a large multilingual model on our corpus. We demonstrate the utility of this model by finetuning it on monolingual corpora, outperforming SOTA models by up to 6.6%. Our pretrained model and dataset are available on Hugging Face: https://huggingface.co/collections/lecslab/glosslm-66da150854209e910113dd87</abstract>
       <url hash="0f60bcb9">2024.emnlp-main.683</url>
@@ -9509,12 +9509,12 @@
     </paper>
     <paper id="684">
       <title><fixed-case>GDTB</fixed-case>: Genre Diverse Data for <fixed-case>E</fixed-case>nglish Shallow Discourse Parsing across Modalities, Text Types, and Domains</title>
-      <author><first>Yang Janet</first><last>Liu</last></author>
+      <author id="yang-janet-liu"><first>Yang Janet</first><last>Liu</last></author>
       <author><first>Tatsuya</first><last>Aoyama</last></author>
       <author><first>Wesley</first><last>Scivetti</last></author>
       <author><first>Yilun</first><last>Zhu</last></author>
       <author><first>Shabnam</first><last>Behzad</last></author>
-      <author><first>Lauren Elizabeth</first><last>Levine</last><affiliation>Georgetown University</affiliation></author>
+      <author id="lauren-levine"><first>Lauren Elizabeth</first><last>Levine</last><affiliation>Georgetown University</affiliation></author>
       <author><first>Jessica</first><last>Lin</last><affiliation>Georgetown University</affiliation></author>
       <author><first>Devika</first><last>Tiwari</last></author>
       <author><first>Amir</first><last>Zeldes</last><affiliation>Georgetown University</affiliation></author>
@@ -9696,7 +9696,7 @@
       <author><first>Shenghua</first><last>Liu</last></author>
       <author><first>Yiwei</first><last>Wang</last><affiliation>University of California, Merced</affiliation></author>
       <author><first>Baolong</first><last>Bi</last></author>
-      <author><first>Xueqi</first><last>Cheng</last><affiliation>, Chinese Academy of Sciences</affiliation></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last><affiliation>, Chinese Academy of Sciences</affiliation></author>
       <pages>12558-12575</pages>
       <abstract>The dynamic nature of language, particularly evident in the realm of slang and memes on the Internet, poses serious challenges to the adaptability of Large Language Models (LLMs). Traditionally anchored to static datasets, these models often struggle to keep up with the rapid linguistic evolution characteristic of online communities. This research aims to bridge this gap by enhancing LLMs’ comprehension of the evolving new concepts on the Internet, without the high cost of continual retraining. In pursuit of this goal, we introduce <b>SLNAG</b>, a benchmark designed to autonomously integrate novel data and assess LLMs’ ability to comprehend emerging concepts, alongside <b>FOCUS</b>, an approach uses causal inference to enhance LLMs to understand new phrases and their colloquial context. Our benchmark and approach involves understanding real-world instances of linguistic shifts, serving as contextual beacons, to form more precise and contextually relevant connections between newly emerging expressions and their meanings. The empirical analysis shows that our causal inference-based approach outperforms the baseline methods in terms of precision and relevance in the comprehension of Internet slang and memes.</abstract>
       <url hash="1c8ec592">2024.emnlp-main.698</url>
@@ -9747,7 +9747,7 @@
       <title><fixed-case>SUPER</fixed-case>: Evaluating Agents on Setting Up and Executing Tasks from Research Repositories</title>
       <author><first>Ben</first><last>Bogin</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Kejuan</first><last>Yang</last></author>
-      <author id="shashank-gupta"><first>Shashank</first><last>Gupta</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
+      <author><first>Shashank</first><last>Gupta</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Kyle</first><last>Richardson</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Erin</first><last>Bransom</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Peter</first><last>Clark</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
@@ -9861,7 +9861,7 @@
       <author><first>Junjie</first><last>Ye</last></author>
       <author><first>Qi</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Tao</first><last>Gui</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>12750-12771</pages>
       <abstract>Task-oriented dialogue (TOD) systems aim to efficiently handle task-oriented conversations, including information collection. How to utilize TOD accurately, efficiently and effectively for information collection has always been a critical and challenging task. Recent studies have demonstrated that Large Language Models (LLMs) excel in dialogue, instruction generation, and reasoning, and can significantly enhance the performance of TOD through fine-tuning. However, current datasets primarily cater to user-led systems and are limited to predefined specific scenarios and slots, thereby necessitating improvements in the proactiveness, diversity, and capabilities of TOD. In this study, we present a detailed multi-domain task-oriented data construction process for conversations, and a Chinese dialogue dataset generated based on this process, **TransferTOD**, which authentically simulates human-computer dialogues in 30 popular life service scenarios. Leveraging this dataset, we trained a model using full-parameter fine-tuning called **TransferTOD-7B**, showcasing notable abilities in slot filling and questioning. Our work has demonstrated its strong generalization capabilities in various downstream scenarios, significantly enhancing both data utilization efficiency and system performance. The data is released in https://github.com/KongLongGeFDU/TransferTOD.</abstract>
       <url hash="57318882">2024.emnlp-main.710</url>
@@ -9881,7 +9881,7 @@
       <author><first>Kate V</first><last>Hardy</last><affiliation>Stanford University</affiliation></author>
       <author><first>Hong</first><last>Shen</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Fei</first><last>Fang</last><affiliation>Carnegie Mellon University</affiliation></author>
-      <author id="zhiyu-chen"><first>Zhiyu</first><last>Chen</last></author>
+      <author><first>Zhiyu</first><last>Chen</last></author>
       <pages>12772-12797</pages>
       <abstract>Mental illness remains one of the most critical public health issues. Despite its importance, many mental health professionals highlight a disconnect between their training and actual real-world patient practice. To help bridge this gap, we propose PATIENT-<tex-math>\psi</tex-math>, a novel patient simulation framework for cognitive behavior therapy (CBT) training. To build PATIENT-<tex-math>\psi</tex-math>, we construct diverse patient cognitive models based on CBT principles and use large language models (LLMs) programmed with these cognitive models to act as a simulated therapy patient. We propose an interactive training scheme, PATIENT-<tex-math>\psi</tex-math>-TRAINER, for mental health trainees to practice a key skill in CBT – formulating the cognitive model of the patient – through role-playing a therapy session with PATIENT-<tex-math>\psi</tex-math>. To evaluate PATIENT-<tex-math>\psi</tex-math>, we conducted a comprehensive user study of 13 mental health trainees and 20 experts. The results demonstrate that practice using PATIENT-<tex-math>\psi</tex-math>-TRAINER enhances the perceived skill acquisition and confidence of the trainees beyond existing forms of training such as textbooks, videos, and role-play with non-patients. Based on the experts’ perceptions, PATIENT-<tex-math>\psi</tex-math> is perceived to be closer to real patient interactions than GPT-4, and PATIENT-<tex-math>\psi</tex-math>-TRAINER holds strong promise to improve trainee competencies. Our code and data are released at <url>https://github.com/ruiyiw/patient-psi</url>.</abstract>
       <url hash="9981db2f">2024.emnlp-main.711</url>
@@ -9908,7 +9908,7 @@
       <author><first>Xinyue</first><last>Shen</last><affiliation>CISPA Helmholtz Center for Information Security</affiliation></author>
       <author><first>Yugeng</first><last>Liu</last><affiliation>CISPA Helmholtz Center for Information Security</affiliation></author>
       <author><first>Michael</first><last>Backes</last><affiliation>CISPA Helmholtz Center for Information Security</affiliation></author>
-      <author id="yang-zhang"><first>Yang</first><last>Zhang</last><affiliation>CISPA Helmholtz Center for Information Security</affiliation></author>
+      <author><first>Yang</first><last>Zhang</last><affiliation>CISPA Helmholtz Center for Information Security</affiliation></author>
       <pages>12814-12845</pages>
       <url hash="e5bbeb0a">2024.emnlp-main.713</url>
       <bibkey>jiang-etal-2024-modscan</bibkey>
@@ -9918,7 +9918,7 @@
       <title>Large Language Models Can Self-Correct with Key Condition Verification</title>
       <author><first>Zhenyu</first><last>Wu</last><affiliation>University of Notre Dame and Xi’an Jiaotong University</affiliation></author>
       <author><first>Qingkai</first><last>Zeng</last><affiliation>University of Notre Dame</affiliation></author>
-      <author id="zhihan-zhang"><first>Zhihan</first><last>Zhang</last></author>
+      <author><first>Zhihan</first><last>Zhang</last></author>
       <author><first>Zhaoxuan</first><last>Tan</last><affiliation>University of Notre Dame</affiliation></author>
       <author><first>Chao</first><last>Shen</last><affiliation>Xi’an Jiaotong University</affiliation></author>
       <author><first>Meng</first><last>Jiang</last><affiliation>University of Notre Dame</affiliation></author>
@@ -9956,7 +9956,7 @@
       <author><first>Anthony</first><last>Hoogs</last><affiliation>Kitware, Inc.</affiliation></author>
       <author><first>Joshua</first><last>Garland</last><affiliation>Arizona State University</affiliation></author>
       <author><first>Huan</first><last>Liu</last><affiliation>Arizona State University</affiliation></author>
-      <author><first>Julia</first><last>Hirschberg</last><affiliation>Columbia University</affiliation></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last><affiliation>Columbia University</affiliation></author>
       <pages>12880-12902</pages>
       <url hash="830ae0f9">2024.emnlp-main.716</url>
       <attachment type="software" hash="affb78c8">2024.emnlp-main.716.software.zip</attachment>
@@ -10188,7 +10188,7 @@
       <author><first>Weiyu</first><last>Sun</last></author>
       <author><first>Tran</first><last>Huynh</last><affiliation>Virginia Polytechnic Institute and State University</affiliation></author>
       <author><first>Dawn</first><last>Song</last><affiliation>University of California Berkeley</affiliation></author>
-      <author id="bo-li"><first>Bo</first><last>Li</last><affiliation>University of Illinois, Urbana Champaign and University of California Berkeley</affiliation></author>
+      <author><first>Bo</first><last>Li</last><affiliation>University of Illinois, Urbana Champaign and University of California Berkeley</affiliation></author>
       <author><first>Ruoxi</first><last>Jia</last><affiliation>Virginia Tech</affiliation></author>
       <pages>13189-13215</pages>
       <abstract>Safety backdoor attacks in large language models (LLMs) enable harmful behaviors to be stealthily triggered while evading detection during normal interactions. The high dimensionality of the trigger search space and the diverse range of potential malicious behaviors in LLMs make this a critical open problem. This paper presents BEEAR, a novel mitigation method based on a key insight: backdoor triggers induce a uniform drift in the model’s embedding space, irrespective of the trigger’s form or targeted behavior. Leveraging this observation, we introduce a bi-level optimization approach. The inner level identifies universal perturbations to the decoder’s embeddings that steer the model towards defender-defined unwanted behaviors; the outer level fine-tunes the model to reinforce safe behaviors against these perturbations. Our experiments demonstrate the effectiveness of this approach, reducing the success rate of safety backdoor attacks from over 95% to &lt;1% for general harmful behaviors and from 47% to 0% for Sleeper Agents, without compromising the model’s helpfulness. Notably, our method relies only on defender-defined sets of safe and unwanted behaviors without any assumptions about the trigger location or attack mechanism. This work represents the first practical framework to counter safety backdoors in LLMs and provides a foundation for future advancements in AI safety and security.</abstract>
@@ -10236,7 +10236,7 @@
       <author><first>Zhiqi</first><last>Huang</last></author>
       <author><first>Puxuan</first><last>Yu</last></author>
       <author><first>Shauli</first><last>Ravfogel</last></author>
-      <author><first>James</first><last>Allan</last><affiliation>University of Massachusetts, Amherst</affiliation></author>
+      <author id="james-allan"><first>James</first><last>Allan</last><affiliation>University of Massachusetts, Amherst</affiliation></author>
       <pages>13261-13273</pages>
       <abstract>Multilingual models aim for language-invariant representations but still prominently encode language identity. This, along with the scarcity of high-quality parallel retrieval data, limits their performance in retrieval. We introduce LANCER, a multi-task learning framework that improves language-invariant dense retrieval by reducing language-specific signals in the embedding space. Leveraging the notion of linear concept erasure, we design a loss function that penalizes cross-correlation between representations and their language labels. LANCER leverages only English retrieval data and general multilingual corpora, training models to focus on language-invariant retrieval by semantic similarity without necessitating a vast parallel corpus. Experimental results on various datasets show our method consistently improves over baselines, with extensive analyses demonstrating greater language agnosticism.</abstract>
       <url hash="e4dd1f9d">2024.emnlp-main.736</url>
@@ -10263,7 +10263,7 @@
       <author><first>Yue</first><last>Zhou</last><affiliation>University of Illinois at Chicago</affiliation></author>
       <author><first>Henry Peng</first><last>Zou</last><affiliation>University of Illinois at Chicago</affiliation></author>
       <author><first>Barbara</first><last>Di Eugenio</last><affiliation>University of Illinois, Chicago</affiliation></author>
-      <author id="yang-zhang"><first>Yang</first><last>Zhang</last></author>
+      <author><first>Yang</first><last>Zhang</last></author>
       <pages>13293-13304</pages>
       <abstract>We find that language models have difficulties generating fallacious and deceptive reasoning. When asked to generate deceptive outputs, language models tend to leak honest counterparts but believe them to be false. Exploiting this deficiency, we propose a jailbreak attack method that elicits an aligned language model for malicious output. Specifically, we query the model to generate a fallacious yet deceptively real procedure for the harmful behavior. Since a fallacious procedure is generally considered fake and thus harmless by LLMs, it helps bypass the safeguard mechanism. Yet the output is factually harmful since the LLM cannot fabricate fallacious solutions but proposes truthful ones. We evaluate our approach over five safety-aligned large language models, comparing four previous jailbreak methods, and show that our approach achieves competitive performance with more harmful outputs. We believe the findings could be extended beyond model safety, such as self-verification and hallucination.</abstract>
       <url hash="36bc87cd">2024.emnlp-main.738</url>
@@ -10366,7 +10366,7 @@
       <title>Style-Specific Neurons for Steering <fixed-case>LLM</fixed-case>s in Text Style Transfer</title>
       <author><first>Wen</first><last>Lai</last></author>
       <author><first>Viktor</first><last>Hangya</last><affiliation>The Center for Information and Language Processing, University of Munich</affiliation></author>
-      <author><first>Alexander</first><last>Fraser</last><affiliation>Technical University of Munich</affiliation></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last><affiliation>Technical University of Munich</affiliation></author>
       <pages>13427-13443</pages>
       <abstract>Text style transfer (TST) aims to modify the style of a text without altering its original meaning. Large language models (LLMs) demonstrate superior performance across multiple tasks, including TST. However, in zero-shot setups, they tend to directly copy a significant portion of the input text to the output without effectively changing its style. To enhance the stylistic variety and fluency of the text, we present sNeuron-TST, a novel approach for steering LLMs using style-specific neurons in TST. Specifically, we identify neurons associated with the source and target styles and deactivate source-style-only neurons to give target-style words a higher probability, aiming to enhance the stylistic diversity of the generated text. However, we find that this deactivation negatively impacts the fluency of the generated text, which we address by proposing an improved contrastive decoding method that accounts for rapid token probability shifts across layers caused by deactivated source-style neurons. Empirical experiments demonstrate the effectiveness of the proposed method on six benchmarks, encompassing formality, toxicity, politics, politeness, authorship, and sentiment.</abstract>
       <url hash="077cbb0b">2024.emnlp-main.745</url>
@@ -10379,8 +10379,8 @@
       <author><first>Kun</first><last>Li</last><affiliation>Chinese University of Hong Kong, The Chinese University of Hong Kong</affiliation></author>
       <author><first>Hongyin</first><last>Luo</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <author><first>Xixin</first><last>Wu</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
-      <author><first>James R.</first><last>Glass</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
-      <author><first>Helen M.</first><last>Meng</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
+      <author id="james-glass"><first>James R.</first><last>Glass</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
+      <author id="helen-meng"><first>Helen M.</first><last>Meng</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <pages>13444-13461</pages>
       <abstract>Query rewriting is a crucial technique for passage retrieval in open-domain conversational question answering (CQA). It decontexualizes conversational queries into self-contained questions suitable for off-the-shelf retrievers. Existing methods attempt to incorporate retriever’s preference during the training of rewriting models. However, these approaches typically rely on extensive annotations such as in-domain rewrites and/or relevant passage labels, limiting the models’ generalization and adaptation capabilities. In this paper, we introduce AdaQR (Adaptive Query Rewriting), a framework for training query rewriting models with limited rewrite annotations from seed datasets and completely no passage label. Our approach begins by fine-tuning compact large language models using only 10% of rewrite annotations from the seed dataset training split. The models are then utilized to self-sample rewrite candidates for each query instance, further eliminating the expense for human labeling or larger language model prompting often adopted in curating preference data. A novel approach is then proposed to assess retriever’s preference for these candidates with the probability of answers conditioned on the conversational query by marginalizing the Top-<tex-math>K</tex-math> passages. This serves as the reward for optimizing the rewriter further using Direct Preference Optimization (DPO), a process free of rewrite and retrieval annotations. Experimental results on four open-domain CQA datasets demonstrate that AdaQR not only enhances the in-domain capabilities of the rewriter with limited annotation requirement, but also adapts effectively to out-of-domain datasets.</abstract>
       <url hash="533ea3df">2024.emnlp-main.746</url>
@@ -10540,8 +10540,8 @@
       <author><first>Yang</first><last>Deng</last><affiliation>Singapore Management University</affiliation></author>
       <author><first>Yong</first><last>Zhao</last></author>
       <author><first>Moxin</first><last>Li</last></author>
-      <author><first>See-Kiong</first><last>Ng</last><affiliation>National University of Singapore</affiliation></author>
-      <author><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="see-kiong-ng"><first>See-Kiong</first><last>Ng</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
       <pages>13652-13673</pages>
       <abstract>Despite the remarkable abilities of Large Language Models (LLMs) to answer questions, they often display a considerable level of overconfidence even when the question does not have a definitive answer. To avoid providing hallucinated answers to these unknown questions, existing studies typically investigate approaches to refusing to answer these questions. In this work, we propose a novel and scalable self-alignment method to utilize the LLM itself to enhance its response-ability to different types of unknown questions, being capable of not just refusing to answer but further proactively providing explanations to the unanswerability of unknown questions. Specifically, the Self-Align method first employ a two-stage class-aware self-augmentation approach to generate a large amount of unknown question-response data. Then we conduct disparity-driven self-curation to select qualified data for fine-tuning the LLM itself for aligning the responses to unknown questions as desired. Experimental results on two datasets across four types of unknown questions validate the superiority of the Self-Aligned method over existing baselines in terms of three types of task formulation.</abstract>
       <url hash="eb489f57">2024.emnlp-main.757</url>
@@ -10655,9 +10655,9 @@
       <author><first>Israt</first><last>Jahan</last><affiliation>York University</affiliation></author>
       <author><first>Amran</first><last>Bhuiyan</last></author>
       <author><first>Chee Wei</first><last>Tan</last><affiliation>Nanyang Technological University</affiliation></author>
-      <author><first>Md Rizwan</first><last>Parvez</last><affiliation>Qatar Computing Research Institute and Bosch</affiliation></author>
+      <author id="md-rizwan-parvez"><first>Md Rizwan</first><last>Parvez</last><affiliation>Qatar Computing Research Institute and Bosch</affiliation></author>
       <author><first>Enamul</first><last>Hoque</last><affiliation>York University</affiliation></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>SalesForce.com and Nanyang Technological University</affiliation></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>SalesForce.com and Nanyang Technological University</affiliation></author>
       <author><first>Jimmy</first><last>Huang</last><affiliation>York University and York University</affiliation></author>
       <pages>13785-13816</pages>
       <abstract>Large Language Models (LLMs) have recently gained significant attention due to their remarkable capabilities in performing diverse tasks across various domains. However, a thorough evaluation of these models is crucial before deploying them in real-world applications to ensure they produce reliable performance. Despite the well-established importance of evaluating LLMs in the community, the complexity of the evaluation process has led to varied evaluation setups, causing inconsistencies in findings and interpretations. To address this, we systematically review the primary challenges and limitations causing these inconsistencies and unreliable evaluations in various steps of LLM evaluation. Based on our critical review, we present our perspectives and recommendations to ensure LLM evaluations are reproducible, reliable, and robust.</abstract>
@@ -10723,7 +10723,7 @@
       <title>A Systematic Analysis of Large Language Models as Soft Reasoners: The Case of Syllogistic Inferences</title>
       <author><first>Leonardo</first><last>Bertolazzi</last><affiliation>University of Trento</affiliation></author>
       <author><first>Albert</first><last>Gatt</last><affiliation>Utrecht University</affiliation></author>
-      <author><first>Raffaella</first><last>Bernardi</last><affiliation>University of Trento</affiliation></author>
+      <author id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last><affiliation>University of Trento</affiliation></author>
       <pages>13882-13905</pages>
       <abstract>The reasoning abilities of Large Language Models (LLMs) are becoming a central focus of study in NLP. In this paper, we consider the case of syllogistic reasoning, an area of deductive reasoning studied extensively in logic and cognitive psychology. Previous research has shown that pre-trained LLMs exhibit reasoning biases, such as content effects, avoid answering that no conclusion follows, align with human difficulties, and struggle with multi-step reasoning. We contribute to this research line by systematically investigating the effects of chain-of-thought reasoning, in-context learning (ICL), and supervised fine-tuning (SFT) on syllogistic reasoning, considering syllogisms with conclusions that support or violate world knowledge and with multiple premises. Crucially, we go beyond the standard focus on accuracy, with an in-depth analysis of the conclusions generated by the models. Our results suggest that the behavior of pre-trained LLMs can be explained by heuristics studied in cognitive science and that both ICL and SFT improve model performance on valid inferences, although only the latter can mitigate most reasoning biases while being consistent.</abstract>
       <url hash="913bb2c0">2024.emnlp-main.769</url>
@@ -10734,7 +10734,7 @@
       <title>Pre-training Cross-lingual Open Domain Question Answering with Large-scale Synthetic Supervision</title>
       <author><first>Fan</first><last>Jiang</last></author>
       <author><first>Tom</first><last>Drummond</last><affiliation>University of Melbourne</affiliation></author>
-      <author><first>Trevor</first><last>Cohn</last><affiliation>Google and The University of Melbourne</affiliation></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last><affiliation>Google and The University of Melbourne</affiliation></author>
       <pages>13906-13933</pages>
       <url hash="27a9f4cf">2024.emnlp-main.770</url>
       <bibkey>jiang-etal-2024-pre</bibkey>
@@ -10748,9 +10748,9 @@
       <author><first>Alessio</first><last>Brutti</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
       <author><first>Mauro</first><last>Cettolo</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
       <author><first>Roberto</first><last>Gretter</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
-      <author><first>Marco</first><last>Matassoni</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
+      <author id="marco-matassoni"><first>Marco</first><last>Matassoni</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
       <author><first>Mohamed</first><last>Nabih</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
-      <author><first>Matteo</first><last>Negri</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
       <pages>13934-13947</pages>
       <abstract>The rise of foundation models (FMs), coupled with regulatory efforts addressing their risks and impacts, has sparked significant interest in open-source models. However, existing speech FMs (SFMs) fall short of full compliance with the open-source principles, even if claimed otherwise, as no existing SFM has model weights, code, and training data publicly available under open-source terms. In this work, we take the first step toward filling this gap by focusing on the 24 official languages of the European Union (EU). We collect suitable training data by surveying automatic speech recognition datasets and unlabeled speech corpora under open-source compliant licenses, for a total of 950k hours. Additionally, we release automatic transcripts for 441k hours of unlabeled data under the permissive CC-BY license, thereby facilitating the creation of open-source SFMs for the EU languages.</abstract>
       <url hash="a7d7db99">2024.emnlp-main.771</url>
@@ -10853,7 +10853,7 @@
     </paper>
     <paper id="778">
       <title><fixed-case>MAIR</fixed-case>: A Massive Benchmark for Evaluating Instructed Retrieval</title>
-      <author><first>Weiwei</first><last>Sun</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Zhengliang</first><last>Shi</last></author>
       <author><first>Wu Jiu</first><last>Long</last></author>
       <author><first>Lingyong</first><last>Yan</last><affiliation>Baidu Inc.</affiliation></author>
@@ -10912,7 +10912,7 @@
       <author><first>Kangxi</first><last>Wu</last><affiliation>Institute of Computing Technology, Chinese Academy of Sciences</affiliation></author>
       <author><first>Liang</first><last>Pang</last><affiliation>Institute of Computing Technology, Chinese Academy of Sciences</affiliation></author>
       <author><first>Huawei</first><last>Shen</last><affiliation>Institute of Computing Technology, Chinese Academy of Sciences</affiliation></author>
-      <author><first>Xueqi</first><last>Cheng</last><affiliation>, Chinese Academy of Sciences</affiliation></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last><affiliation>, Chinese Academy of Sciences</affiliation></author>
       <pages>14131-14143</pages>
       <abstract>The black-box nature of large language models (LLMs) poses challenges in interpreting results, impacting issues such as data intellectual property protection and hallucination tracing. Training data attribution (TDA) methods are considered effective solutions to address these challenges.Most recent TDA methods rely on influence functions, assuming the model achieves minimized empirical risk. However, achieving this criterion is difficult, and sourcing accuracy can be compromised by fitting errors during model training. In this paper, we introduce a novel TDA method called Debias and Denoise Attribution (DDA), which enhances influence functions by addressing fitting errors. Specifically, the debias strategy seeks to improve the performance of influence functions by eliminating the knowledge bias present in the base model before fine-tuning, while the denoise strategy aims to reduce discrepancies in influence scores arising from varying degrees of fitting during the training process through smoothing techniques.Experimental results demonstrate that our method significantly outperforms existing approaches, achieving an averaged AUC of 91.64%. Moreover, DDA exhibits strong generality and scalability across various sources and different-scale models like LLaMA2, QWEN2, and Mistral.</abstract>
       <url hash="bd42bfe3">2024.emnlp-main.782</url>
@@ -10925,7 +10925,7 @@
       <author><first>Jinsung</first><last>Kim</last></author>
       <author><first>YoungJoon</first><last>Jang</last><affiliation>Korea University and Hongik University</affiliation></author>
       <author><first>Chanjun</first><last>Park</last><affiliation>Upstage</affiliation></author>
-      <author><first>Heuiseok</first><last>Lim</last><affiliation>Korea University</affiliation></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last><affiliation>Korea University</affiliation></author>
       <pages>14144-14160</pages>
       <abstract>As the utilization of Large Language Models (LLMs) becomes more widespread, there is a growing demand for their ability to handle more complex and longer external knowledge across various use cases. Most existing evaluations of the open-ended question answering (ODQA) task, which necessitates the use of external knowledge, focus solely on whether the model provides the correct answer. However, even when LLMs answer correctly, they often fail to provide an obvious source for their responses. Therefore, it is necessary to jointly evaluate and verify the correctness of the answers and the appropriateness of grounded evidence in complex external contexts. To address this issue, we examine the phenomenon of discrepancies in abilities across two distinct tasks—QA and evidence selection—when performed simultaneously, from the perspective of task alignment. To verify LLMs’ task alignment, we introduce a verification framework and resources considering both semantic relevancy and structural diversity of the given long context knowledge. Through extensive experiments and detailed analysis, we provide insights into the task misalignment between QA and evidence selection. Our code and resources will be available upon acceptance.</abstract>
       <url hash="8637fe72">2024.emnlp-main.783</url>
@@ -10986,7 +10986,7 @@
     <paper id="787">
       <title>Mixture-of-Skills: Learning to Optimize Data Usage for Fine-Tuning Large Language Models</title>
       <author><first>Minghao</first><last>Wu</last></author>
-      <author><first>Thuy-Trang</first><last>Vu</last><affiliation>Monash University</affiliation></author>
+      <author id="thuy-vu"><first>Thuy-Trang</first><last>Vu</last><affiliation>Monash University</affiliation></author>
       <author><first>Lizhen</first><last>Qu</last><affiliation>Monash University</affiliation></author>
       <author><first>Reza</first><last>Haf</last><affiliation>Monash University</affiliation></author>
       <pages>14226-14240</pages>
@@ -11078,7 +11078,7 @@
       <author><first>Fengjun</first><last>Pan</last></author>
       <author><first>Xiaobao</first><last>Wu</last></author>
       <author><first>Zongrui</first><last>Li</last><affiliation>Nanyang Technological University</affiliation></author>
-      <author><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
       <pages>14338-14364</pages>
       <abstract>Fallacies are defective arguments with faulty reasoning. Detecting and classifying them is a crucial NLP task to prevent misinformation, manipulative claims, and biased decisions. However, existing fallacy classifiers are limited by the requirement for sufficient labeled data for training, which hinders their out-of-distribution (OOD) generalization abilities. In this paper, we focus on leveraging Large Language Models (LLMs) for zero-shot fallacy classification. To elicit fallacy-related knowledge and reasoning abilities of LLMs, we propose diverse single-round and multi-round prompting schemes, applying different taskspecific instructions such as extraction, summarization, and Chain-of-Thought reasoning. With comprehensive experiments on benchmark datasets, we suggest that LLMs could be potential zero-shot fallacy classifiers. In general, LLMs under single-round prompting schemes have achieved acceptable zeroshot performances compared to the best fullshot baselines and can outperform them in all OOD inference scenarios and some opendomain tasks. Our novel multi-round prompting schemes can effectively bring about more improvements, especially for small LLMs. Our analysis further underlines the future research on zero-shot fallacy classification. Codes and data are available at: https://github.com/panFJCharlotte98/Fallacy_Detection.</abstract>
       <url hash="0d71a720">2024.emnlp-main.794</url>
@@ -11105,7 +11105,7 @@
       <author><first>Pierluigi</first><last>Cassotti</last><affiliation>Göteborg University</affiliation></author>
       <author><first>Bill</first><last>Noble</last><affiliation>Göteborg University and University of Gothenburg</affiliation></author>
       <author><first>David</first><last>Alfter</last><affiliation>Göteborg University</affiliation></author>
-      <author><first>Sabine</first><last>Schulte Im Walde</last><affiliation>University of Stuttgart</affiliation></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte Im Walde</last><affiliation>University of Stuttgart</affiliation></author>
       <author><first>Nina</first><last>Tahmasebi</last><affiliation>Göteborg University</affiliation></author>
       <pages>14379-14393</pages>
       <abstract>Word Usage Graphs (WUGs) represent human semantic proximity judgments for pairs of word uses in a weighted graph, which can be clustered to infer word sense clusters from simple pairwise word use judgments, avoiding the need for word sense definitions. SemEval-2020 Task 1 provided the first and to date largest manually annotated, diachronic WUG dataset. In this paper, we check the robustness and correctness of the annotations by continuing the SemEval annotation algorithm for two more rounds and comparing against an established annotation paradigm. Further, we test the reproducibility by resampling a new, smaller set of word uses from the SemEval source corpora and annotating them. Our work contributes to a better understanding of the problems and opportunities of the WUG annotation paradigm and points to future improvements.</abstract>
@@ -11154,7 +11154,7 @@
     <paper id="800">
       <title>Evaluating <tex-math>n</tex-math>-Gram Novelty of Language Models Using Rusty-<fixed-case>DAWG</fixed-case></title>
       <author><first>William</first><last>Merrill</last><affiliation>New York University</affiliation></author>
-      <author><first>Noah A.</first><last>Smith</last><affiliation>University of Washington and Allen Institute for Artificial Intelligence</affiliation></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last><affiliation>University of Washington and Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Yanai</first><last>Elazar</last><affiliation>Allen Institute for Artificial Intelligence and Department of Computer Science</affiliation></author>
       <pages>14459-14473</pages>
       <abstract>How novel are texts generated by language models (LMs) relative to their training corpora? In this work, we investigate the extent to which modern LMs generate <tex-math>n</tex-math>-grams from their training data, evaluating both (i) the probability LMs assign to complete training <tex-math>n</tex-math>-grams and (ii) <tex-math>n</tex-math>-novelty, the proportion of <tex-math>n</tex-math>-grams generated by an LM that did not appear in the training data (for arbitrarily large <tex-math>n</tex-math>). To enable arbitrary-length <tex-math>n</tex-math>-gram search over a corpus in constant time w.r.t. corpus size, we develop Rusty-DAWG, a novel search tool inspired by indexing of genomic data. We compare the novelty of LM-generated text to human-written text and explore factors that affect generation novelty, focusing on the Pythia models. We find that, for <tex-math>n &gt; 4</tex-math>, LM-generated text is less novel than human-written text, though it is more novel for smaller <tex-math>n</tex-math>. Larger LMs and more constrained decoding strategies both decrease novelty. Finally, we show that LMs complete <tex-math>n</tex-math>-grams with lower loss if they are more frequent in the training data. Overall, our results reveal factors influencing the novelty of LM-generated text, and we release Rusty-DAWG to facilitate further pretraining data research.</abstract>
@@ -11183,7 +11183,7 @@
       <author><first>Sweta</first><last>Agrawal</last><affiliation>Instituto de Telecomunicações</affiliation></author>
       <author><first>António</first><last>Farinhas</last><affiliation>Instituto Superior Técnico</affiliation></author>
       <author><first>Ricardo</first><last>Rei</last><affiliation>Unbabel</affiliation></author>
-      <author><first>Andre</first><last>Martins</last><affiliation>Instituto Superior Técnico and Unbabel</affiliation></author>
+      <author id="andre-f-t-martins"><first>Andre</first><last>Martins</last><affiliation>Instituto Superior Técnico and Unbabel</affiliation></author>
       <pages>14491-14502</pages>
       <abstract>Automatic metrics for evaluating translation quality are typically validated by measuring how well they correlate with human assessments. However, correlation methods tend to capture only the ability of metrics to differentiate between good and bad source-translation pairs, overlooking their reliability in distinguishing alternative translations for the same source. In this paper, we confirm that this is indeed the case by showing that current metrics are insensitive to nuanced differences in translation quality. This effect is most pronounced when the quality is high and the variance among alternatives is low. Given this finding, we shift towards detecting high-quality correct translations, an important problem in practical decision-making scenarios where a binary check of correctness is prioritized over a nuanced evaluation of quality. Using the MQM framework as the gold standard, we systematically stress-test the ability of current metrics to identify translations with no errors as marked by humans. Our findings reveal that current metrics often over or underestimate translation quality, indicating significant room for improvement in machine translation evaluation.</abstract>
       <url hash="59eef01b">2024.emnlp-main.802</url>
@@ -11199,7 +11199,7 @@
       <author><first>Gonçalo</first><last>Faria</last><affiliation>Instituto de Telecomunicações, Portugal</affiliation></author>
       <author><first>Patrick</first><last>Fernandes</last></author>
       <author><first>Nuno M</first><last>Guerreiro</last><affiliation>Unbabel and Instituto Superior Técnico</affiliation></author>
-      <author><first>Andre</first><last>Martins</last><affiliation>Instituto Superior Técnico and Unbabel</affiliation></author>
+      <author id="andre-f-t-martins"><first>Andre</first><last>Martins</last><affiliation>Instituto Superior Técnico and Unbabel</affiliation></author>
       <pages>14503-14519</pages>
       <abstract>Alignment with human preferences is an important step in developing accurate and safe large language models. This is no exception in machine translation (MT), where better handling of language nuances and context-specific variations leads to improved quality. However, preference data based on human feedback can be very expensive to obtain and curate at a large scale. Automatic metrics, on the other hand, can induce preferences, but they might not match human expectations perfectly. In this paper, we propose an approach that leverages the best of both worlds. We first collect sentence-level quality assessments from professional linguists on translations generated by multiple high-quality MT systems and evaluate the ability of current automatic metrics to recover these preferences. We then use this analysis to curate a new dataset, MT-Pref (metric induced translation preference) dataset, which comprises 18k instances covering 18 language directions, using texts sourced from multiple domains post-2022. We show that aligning TOWER models on MT-Pref significantly improves translation quality on WMT23 and FLORES benchmarks.</abstract>
       <url hash="4069f8cf">2024.emnlp-main.803</url>
@@ -11227,7 +11227,7 @@
       <author><first>Dawei</first><last>Yin</last><affiliation>Baidu</affiliation></author>
       <author><first>Pengjie</first><last>Ren</last><affiliation>Shandong University</affiliation></author>
       <author><first>Zhumin</first><last>Chen</last><affiliation>Shandong University</affiliation></author>
-      <author><first>Maarten</first><last>de Rijke</last><affiliation>University of Amsterdam</affiliation></author>
+      <author id="maarten-de-rijke"><first>Maarten</first><last>de Rijke</last><affiliation>University of Amsterdam</affiliation></author>
       <author><first>Zhaochun</first><last>Ren</last><affiliation>Leiden University</affiliation></author>
       <pages>14535-14556</pages>
       <abstract>Despite their success at many natural language processing (NLP) tasks, large language models still struggle to effectively leverage knowledge for knowledge-intensive tasks, manifesting limitations such as generating incomplete, non-factual, or illogical answers. These limitations stem from inadequate knowledge awareness of LLMs during vanilla fine-tuning. To address these problems, we propose a knowledge-aware fine-tuning (KnowTuning) method to improve fine-grained and coarse-grained knowledge awareness of LLMs. We devise a fine-grained knowledge augmentation stage to train LLMs to identify difficult fine-grained knowledge in answers. We also propose a coarse-grained knowledge comparison stage to train LLMs to distinguish between reliable and unreliable knowledge, in three aspects: completeness, factuality, and logicality. Extensive experiments on both generic and medical question answering (QA) datasets confirm the effectiveness of KnowTuning, through automatic and human evaluations, across various sizes of LLMs. We further verify that KnowTuning generates more facts with less factual error rate under fine-grained facts evaluation.</abstract>
@@ -11286,7 +11286,7 @@
       <title>Small Agent Can Also Rock! Empowering Small Language Models as Hallucination Detector</title>
       <author><first>Xiaoxue</first><last>Cheng</last></author>
       <author><first>Junyi</first><last>Li</last></author>
-      <author><first>Xin</first><last>Zhao</last><affiliation>Renmin University of China</affiliation></author>
+      <author id="wayne-xin-zhao"><first>Xin</first><last>Zhao</last><affiliation>Renmin University of China</affiliation></author>
       <author><first>Hongzhi</first><last>Zhang</last><affiliation>Kuaishou- 快手科技</affiliation></author>
       <author><first>Fuzheng</first><last>Zhang</last></author>
       <author><first>Di</first><last>Zhang</last><affiliation>Kuaishou Technology</affiliation></author>
@@ -11360,7 +11360,7 @@
       <title><fixed-case>D</fixed-case>yna<fixed-case>T</fixed-case>hink: Fast or Slow? A Dynamic Decision-Making Framework for Large Language Models</title>
       <author><first>Jiabao</first><last>Pan</last></author>
       <author><first>Yan</first><last>Zhang</last><affiliation>Tencent</affiliation></author>
-      <author id="chen-zhang"><first>Chen</first><last>Zhang</last><affiliation>National University of Singapore</affiliation></author>
+      <author><first>Chen</first><last>Zhang</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Zuozhu</first><last>Liu</last><affiliation>Zhejiang University</affiliation></author>
       <author><first>Hongwei</first><last>Wang</last><affiliation>Zhejiang University</affiliation></author>
       <author><first>Haizhou</first><last>Li</last><affiliation>The Chinese University of Hong Kong (Shenzhen); National University of Singapore and National University of Singapore</affiliation></author>
@@ -11399,7 +11399,7 @@
     </paper>
     <paper id="817">
       <title>Learn Beyond The Answer: Training Language Models with Reflection for Mathematical Reasoning</title>
-      <author id="zhihan-zhang"><first>Zhihan</first><last>Zhang</last></author>
+      <author><first>Zhihan</first><last>Zhang</last></author>
       <author><first>Tao</first><last>Ge</last></author>
       <author><first>Zhenwen</first><last>Liang</last></author>
       <author><first>Wenhao</first><last>Yu</last><affiliation>Tencent AI Lab</affiliation></author>
@@ -11643,7 +11643,7 @@
       <author><first>Aashiq</first><last>Muhamed</last></author>
       <author><first>Oscar</first><last>Li</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>David</first><last>Woodruff</last><affiliation>Carnegie Mellon University</affiliation></author>
-      <author><first>Mona T.</first><last>Diab</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="mona-diab"><first>Mona T.</first><last>Diab</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Virginia</first><last>Smith</last><affiliation>Carnegie Mellon University</affiliation></author>
       <pages>14978-15003</pages>
       <url hash="8a7d2a7a">2024.emnlp-main.835</url>
@@ -11747,7 +11747,7 @@
       <author><first>Xiaobao</first><last>Wu</last></author>
       <author><first>Liangming</first><last>Pan</last><affiliation>University of Arizona</affiliation></author>
       <author><first>William Yang</first><last>Wang</last><affiliation>UC Santa Barbara</affiliation></author>
-      <author><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
       <pages>15118-15133</pages>
       <abstract>Knowledge editing injects knowledge updates into language models to keep them correct and up-to-date. However, its current evaluations deviate significantly from practice: their knowledge updates solely consist of structured facts derived from meticulously crafted datasets, instead of practical sources—unstructured texts like news articles, and they often overlook practical real-world knowledge updates. To address these issues, in this paper we propose AKEW (Assessing Knowledge Editing in the Wild), a new practical benchmark for knowledge editing. AKEW fully covers three editing settings of knowledge updates: structured facts, unstructured texts as facts, and extracted triplets. It further introduces new datasets featuring both counterfactual and real-world knowledge updates. Through extensive experiments, we demonstrate the considerable gap between state-of-the-art knowledge-editing methods and practical scenarios. Our analyses further highlight key insights to motivate future research for practical knowledge editing.</abstract>
       <url hash="502cb103">2024.emnlp-main.843</url>
@@ -11764,7 +11764,7 @@
       <author><first>James</first><last>Grimmelmann</last><affiliation>Cornell University</affiliation></author>
       <author><first>Yejin</first><last>Choi</last><affiliation>Department of Computer Science, University of Washington</affiliation></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last><affiliation>University of Washington, University of Washington, Allen Institute for Artificial Intelligence and University of Washington, Seattle</affiliation></author>
-      <author><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington, Facebook and Meta</affiliation></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington, Facebook and Meta</affiliation></author>
       <author><first>Pang Wei</first><last>Koh</last><affiliation>University of Washington</affiliation></author>
       <pages>15134-15158</pages>
       <abstract>Evaluating the degree of reproduction of copyright-protected content by language models (LMs) is of significant interest to the AI and legal communities. Although both literal and non-literal similarities are considered by courts when assessing the degree of reproduction, prior research has focused only on literal similarities. To bridge this gap, we introduce CopyBench, a benchmark designed to measure both literal and non-literal copying in LM generations. Using copyrighted fiction books as text sources, we provide automatic evaluation protocols to assess literal and non-literal copying, balanced against the model utility in terms of the ability to recall facts from the copyrighted works and generate fluent completions. We find that, although literal copying is relatively rare, two types of non-literal copying—event copying and character copying—occur even in models as small as 7B parameters. Larger models demonstrate significantly more copying, with literal copying rates increasing from 0.2% to 10.5% and non-literal copying from 2.3% to 5.9% when comparing Llama3-8B and 70B models, respectively. We further evaluate the effectiveness of current strategies for mitigating copying and show that (1) training-time alignment can reduce literal copying but may increase non-literal copying, and (2) current inference-time mitigation methods primarily reduce literal but not non-literal copying.</abstract>
@@ -11876,12 +11876,12 @@
       <author><first>Senjie</first><last>Jin</last></author>
       <author><first>Caishuang</first><last>Huang</last></author>
       <author><first>Junjie</first><last>Ye</last></author>
-      <author id="zhihao-zhang"><first>Zhihao</first><last>Zhang</last></author>
+      <author><first>Zhihao</first><last>Zhang</last></author>
       <author><first>Yuhao</first><last>Zhou</last></author>
       <author><first>Zhiheng</first><last>Xi</last></author>
       <author><first>Tao</first><last>Gui</last><affiliation>Fudan University</affiliation></author>
       <author><first>Qi</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>15270-15283</pages>
       <abstract>Reinforcement Learning from Human Feedback (RLHF) is a crucial approach to aligning language models with human values and intentions. A fundamental challenge in this method lies in ensuring that the reward model accurately understands and evaluates human preferences. Current methods rely on ranking losses to teach the reward model to assess preferences, but they are susceptible to noise and ambiguous data, often failing to deeply understand human intentions. To address this issue, we introduce contrastive learning into the reward modeling process. In addition to supervised ranking loss, we introduce an unsupervised contrastive loss to enable the reward model to fully capture the distinctions in contrastive data. Experimental results demonstrate that the proposed contrastive learning-based reward modeling method effectively enhances the generalization of the reward model, stabilizes the reinforcement learning training process, and improves the final alignment with human preferences.</abstract>
       <url hash="3b4d0484">2024.emnlp-main.852</url>
@@ -11894,7 +11894,7 @@
       <author><first>Yixing</first><last>Fan</last></author>
       <author><first>Jiafeng</first><last>Guo</last><affiliation>Institute of Computing Technolgy, Chinese Academy of Sciences</affiliation></author>
       <author><first>Ruqing</first><last>Zhang</last></author>
-      <author><first>Xueqi</first><last>Cheng</last><affiliation>, Chinese Academy of Sciences</affiliation></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last><affiliation>, Chinese Academy of Sciences</affiliation></author>
       <pages>15284-15298</pages>
       <abstract>Table entity linking (TEL) aims to map entity mentions in the table to their corresponding entities in a knowledge base (KB). The core of this task is to leverage structured contexts, specifically row and column contexts, to enhance the semantics of mentions in entity disambiguation. Most entity linking (EL) methods primarily focus on understanding sequential text contexts, making it difficult to adapt to the row and column structure of tables. Additionally, existing methods for TEL indiscriminately mix row and column contexts together, overlooking their semantic differences. In this paper, we explicitly distinguish the modeling of row and column contexts, and propose a method called RoCEL to capture their distinct semantics. Specifically, for row contexts in tables, we take the attention mechanism to learn the implicit relational dependencies between each cell and the mention. For column contexts in tables, we employ a set-wise encoder to learn the categorical information about the group of mentions. At last, we merge both contexts to obtain the final mention embedding for link prediction. Experiments on four benchmarks show that our approach outperforms the state-of-the-art (SOTA) baseline by about 1.5% on the in-domain dataset, and by 3.7% on average across three out-of-domain datasets.</abstract>
       <url hash="82a2fa2f">2024.emnlp-main.853</url>
@@ -11936,7 +11936,7 @@
       <author><first>Jingtao</first><last>Cao</last></author>
       <author><first>Zeming</first><last>Liu</last></author>
       <author><first>Jeff Z.</first><last>Pan</last><affiliation>University of Edinburgh, University of Edinburgh</affiliation></author>
-      <author><first>Kam-Fai</first><last>Wong</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <pages>15322-15336</pages>
       <abstract>Large Language Models (LLMs) can interact with the real world by connecting with versatile external APIs, resulting in better problem-solving and task automation capabilities. Previous research primarily either focuses on APIs with limited arguments from a single source or overlooks the complex dependency relationship between different APIs. However, it is essential to utilize multiple APIs collaboratively from various sources, especially for complex user instructions. In this paper, we introduce MetaBench, the first benchmark to evaluate LLMs’ ability to plan and execute multiple APIs from various sources in order to complete the user’s task. Specifically, we consider two significant challenges in multiple APIs: 1) graph structures: some APIs can be executed independently while others need to be executed one by one, resulting in graph-like execution order; and 2) permission constraints: which source is authorized to execute the API call. We have experimental results on 9 distinct LLMs; e.g., GPT-4o achieves only a 2.0% success rate at the most complex instruction, revealing that the existing state-of-the-art LLMs still cannot perform well in this situation even with the help of in-context learning and finetuning. Our code and data are publicly available at <url>https://github.com/ruleGreen/AppBench</url>.</abstract>
       <url hash="dfd7ad94">2024.emnlp-main.856</url>
@@ -11947,7 +11947,7 @@
       <title>Not Everything is All You Need: Toward Low-Redundant Optimization for Large Language Model Alignment</title>
       <author><first>Zhipeng</first><last>Chen</last></author>
       <author><first>Kun</first><last>Zhou</last><affiliation>University of California, San Diego</affiliation></author>
-      <author><first>Xin</first><last>Zhao</last><affiliation>Renmin University of China</affiliation></author>
+      <author id="wayne-xin-zhao"><first>Xin</first><last>Zhao</last><affiliation>Renmin University of China</affiliation></author>
       <author><first>Jingyuan</first><last>Wang</last><affiliation>Beijing University of Aeronautics and Astronautics</affiliation></author>
       <author><first>Ji-Rong</first><last>Wen</last><affiliation>Renmin University of China</affiliation></author>
       <pages>15337-15351</pages>
@@ -12084,7 +12084,7 @@
       <author><first>Rajiv</first><last>Jain</last><affiliation>Adobe Systems</affiliation></author>
       <author><first>Vlad I</first><last>Morariu</last><affiliation>Adobe</affiliation></author>
       <author><first>Ramit</first><last>Sawhney</last><affiliation>Georgia Institute of Technology</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <author><first>Dinesh</first><last>Manocha</last><affiliation>University of Maryland, College Park</affiliation></author>
       <pages>15485-15505</pages>
       <abstract>Document structure editing involves manipulating localized textual, visual, and layout components in document images based on the user’s requests. Past works have shown that multimodal grounding of user requests in the document image and identifying the accurate structural components and their associated attributes remain key challenges for this task. To address these, we introduce the DocEditAgent, a novel framework that performs end-to-end document editing by leveraging Large Multimodal Models (LMMs). It consists of three novel components – (1) Doc2Command to simultaneously localize edit regions of interest (RoI) and disambiguate user edit requests into edit commands. (2) LLM-based Command Reformulation prompting to tailor edit commands originally intended for specialized software into edit instructions suitable for generalist LMMs. (3) Moreover, DocEditAgent processes these outputs via Large Multimodal Models like GPT-4V and Gemini, to parse the document layout, execute edits on grounded Region of Interest (RoI), and generate the edited document image. Extensive experiments on the DocEdit dataset show that DocEditAgent significantly outperforms strong baselines on edit command generation (2-33%), RoI bounding box detection (12-31%), and overall document editing (1-12%) tasks.</abstract>
@@ -12298,7 +12298,7 @@
       <author><first>Kexin</first><last>Huang</last></author>
       <author><first>Tianle</first><last>Gu</last></author>
       <author><first>Yixu</first><last>Wang</last></author>
-      <author id="jian-wang"><first>Jian</first><last>Wang</last><affiliation>Shanghai University</affiliation></author>
+      <author><first>Jian</first><last>Wang</last><affiliation>Shanghai University</affiliation></author>
       <author><first>Liang</first><last>Dandan</last></author>
       <author><first>Zhixu</first><last>Li</last></author>
       <author><first>Yan</first><last>Teng</last><affiliation>Shanghai Artificial Intelligence Laboratory</affiliation></author>
@@ -12344,7 +12344,7 @@
       <author><first>Sarah</first><last>Masud</last><affiliation>Indraprastha Institute of Information Technology Delhi (IIIT-Delhi)</affiliation></author>
       <author><first>Sahajpreet</first><last>Singh</last><affiliation>IIT Delhi</affiliation></author>
       <author><first>Viktor</first><last>Hangya</last><affiliation>The Center for Information and Language Processing, University of Munich</affiliation></author>
-      <author><first>Alexander</first><last>Fraser</last><affiliation>Technical University of Munich</affiliation></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last><affiliation>Technical University of Munich</affiliation></author>
       <author><first>Tanmoy</first><last>Chakraborty</last><affiliation>Indian Institute of Technology, Delhi</affiliation></author>
       <pages>15847-15863</pages>
       <abstract>For subjective tasks such as hate detection, where people perceive hate differently, the Large Language Model’s (LLM) ability to represent diverse groups is unclear. By including additional context in prompts, we comprehensively analyze LLM’s sensitivity to geographical priming, persona attributes, and numerical information to assess how well the needs of various groups are reflected. Our findings on two LLMs, five languages, and six datasets reveal that mimicking persona-based attributes leads to annotation variability. Meanwhile, incorporating geographical signals leads to better regional alignment. We also find that the LLMs are sensitive to numerical anchors, indicating the ability to leverage community-based flagging efforts and exposure to adversaries. Our work provides preliminary guidelines and highlights the nuances of applying LLMs in culturally sensitive cases.</abstract>
@@ -12372,7 +12372,7 @@
       <author><first>Shaoxiong</first><last>Ji</last><affiliation>University of Helsinki</affiliation></author>
       <author><first>Timothee</first><last>Mickus</last><affiliation>University of Helsinki</affiliation></author>
       <author><first>Vincent</first><last>Segonne</last><affiliation>Université de Bretagne Sud</affiliation></author>
-      <author><first>Jörg</first><last>Tiedemann</last><affiliation>University of Helsinki</affiliation></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last><affiliation>University of Helsinki</affiliation></author>
       <pages>15882-15894</pages>
       <abstract>Pretrained language models (PLMs) display impressive performances and have captured the attention of the NLP community.Establishing best practices in pretraining has, therefore, become a major focus of NLP research, especially since insights gained from monolingual English models may not necessarily apply to more complex multilingual models.One significant caveat of the current state of the art is that different works are rarely comparable: they often discuss different parameter counts, training data, and evaluation methodology.This paper proposes a comparison of multilingual pretraining objectives in a controlled methodological environment. We ensure that training data and model architectures are comparable, and discuss the downstream performances across 6 languages that we observe in probing and fine-tuning scenarios.We make two key observations: (1) the architecture dictates which pretraining objective is optimal; (2) multilingual translation is a very effective pretraining objective under the right conditions.We make our code, data, and model weights available at https://github.com/Helsinki-NLP/lm-vs-mt.</abstract>
       <url hash="83ff7b0b">2024.emnlp-main.888</url>
@@ -12382,7 +12382,7 @@
     <paper id="889">
       <title>Can <fixed-case>LLM</fixed-case>s replace Neil de<fixed-case>G</fixed-case>rasse Tyson? Evaluating the Reliability of <fixed-case>LLM</fixed-case>s as Science Communicators</title>
       <author><first>Prasoon</first><last>Bajpai</last></author>
-      <author><first>Niladri</first><last>Chatterjee</last></author>
+      <author id="niladri-chatterjee"><first>Niladri</first><last>Chatterjee</last></author>
       <author><first>Subhabrata</first><last>Dutta</last><affiliation>Technische Universität Darmstadt</affiliation></author>
       <author><first>Tanmoy</first><last>Chakraborty</last><affiliation>Indian Institute of Technology, Delhi</affiliation></author>
       <pages>15895-15912</pages>
@@ -12535,7 +12535,7 @@
       <author><first>Ruchira</first><last>Dhar</last></author>
       <author><first>Filippos</first><last>Stamatiou</last><affiliation>Copenhagen University and University of Stellenbosch</affiliation></author>
       <author><first>Nicolas</first><last>Garneau</last></author>
-      <author><first>Anders</first><last>Søgaard</last><affiliation>Copenhagen University</affiliation></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last><affiliation>Copenhagen University</affiliation></author>
       <pages>16096-16111</pages>
       <abstract>Knowledge claims are abundant in the literature on large language models (LLMs); but can we say that GPT-4 truly “knows” the Earth is round? To address this question, we review standard definitions of knowledge in epistemology and we formalize interpretations applicable to LLMs. In doing so, we identify inconsistencies and gaps in how current NLP research conceptualizes knowledge with respect to epistemological frameworks. Additionally, we conduct a survey of 100 professional philosophers and computer scientists to compare their preferences in knowledge definitions and their views on whether LLMs can really be said to know. Finally, we suggest evaluation protocols for testing knowledge in accordance to the most relevant definitions.</abstract>
       <url hash="9f480e4d">2024.emnlp-main.900</url>
@@ -12664,7 +12664,7 @@
     </paper>
     <paper id="910">
       <title><fixed-case>W</fixed-case>orry<fixed-case>W</fixed-case>ords: Norms of Anxiety Association for over 44k <fixed-case>E</fixed-case>nglish Words</title>
-      <author><first>Saif M.</first><last>Mohammad</last><affiliation>National Research Council Canada</affiliation></author>
+      <author id="saif-mohammad"><first>Saif M.</first><last>Mohammad</last><affiliation>National Research Council Canada</affiliation></author>
       <pages>16261-16278</pages>
       <abstract>Anxiety, the anticipatory unease about a potential negative outcome, is a common and beneficial human emotion. However, there is still much that is not known about anxiety, such as how it relates to our body and how it manifests in language; especially pertinent given the increasing impact of related disorders.In this work,we introduce <i>WorryWords</i>, the first large-scale repository of manually derived word–anxiety associations for over 44,450 English words. We show that the anxiety associations are highly reliable.We use WorryWords to study the relationship between anxiety and other emotion constructs, as well as the rate at which children acquire anxiety words with age. Finally, we show that using WorryWords alone, one can accurately track the change of anxiety in streams of text.WorryWords enables a wide variety of anxiety-related research in psychology, NLP, public health, and social sciences.WorryWords (and its translations to over 100 languages) is freely available. http://saifmohammad.com/worrywords.html</abstract>
       <url hash="e3710966">2024.emnlp-main.910</url>
@@ -12676,7 +12676,7 @@
       <author><first>Sumanth</first><last>Doddapaneni</last><affiliation>Indian Institute of Technology, Madras</affiliation></author>
       <author><first>Mohammed Safi Ur Rahman</first><last>Khan</last><affiliation>Indian Institute of Technology, Madras, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
       <author><first>Sshubam</first><last>Verma</last></author>
-      <author><first>Mitesh M</first><last>Khapra</last><affiliation>Indian Institute of Technology, Madras, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
+      <author id="mitesh-m-khapra"><first>Mitesh M</first><last>Khapra</last><affiliation>Indian Institute of Technology, Madras, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
       <pages>16279-16309</pages>
       <abstract>Large Language Models (LLMs) are increasingly relied upon to evaluate text outputs of other LLMs, thereby influencing leaderboards and development decisions. However, concerns persist over the accuracy of these assessments and the potential for misleading conclusions. In this work, we investigate the effectiveness of LLMs as evaluators for text generation tasks. We propose FBI, a novel framework designed to examine the proficiency of Evaluator LLMs in assessing four critical abilities in other LLMs: factual accuracy, instruction following, coherence in long-form writing, and reasoning proficiency. By introducing targeted perturbations in answers generated by LLMs, that clearly impact one of these key capabilities, we test whether an Evaluator LLM can detect these quality drops. By creating a total of 2400 perturbed answers covering 22 perturbation categories, we conduct a comprehensive study using different evaluation strategies on five prominent LLMs commonly used as evaluators in the literature. Our findings reveal significant shortcomings in current Evaluator LLMs, which failed to identify quality drops in over 50% of cases on average. Single-answer and pairwise evaluations demonstrated notable limitations, whereas reference-based evaluations showed comparatively better performance. <i>These results underscore the unreliable nature of current Evaluator LLMs and advocate for cautious implementation in practical applications.</i></abstract>
       <url hash="72f3c285">2024.emnlp-main.911</url>
@@ -12695,7 +12695,7 @@
       <author><first>Yiwen</first><last>Ding</last></author>
       <author><first>Tao</first><last>Gui</last><affiliation>Fudan University</affiliation></author>
       <author><first>Qi</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>16310-16324</pages>
       <abstract>Large language models (LLMs) have achieved tremendous success in understanding language and processing text. However, question-answering (QA) on lengthy documents faces challenges of resource constraints and a high propensity for errors, even for the most advanced models such as GPT-4 and Claude2.In this paper, we introduce _LongAgent_, a multi-agent collaboration method that enables efficient and effective QA over <tex-math>128k</tex-math>-token-long documents. _LongAgent_ adopts a _divide-and-conquer_ strategy, breaking down lengthy documents into shorter, more manageable text chunks. A leader agent comprehends the user’s query and organizes the member agents to read their assigned chunks, reasoning a final answer through multiple rounds of discussion.Due to members’ hallucinations, it’s difficult to guarantee that every response provided by each member is accurate.To address this, we develop an _inter-member communication_ mechanism that facilitates information sharing, allowing for the detection and mitigation of hallucinatory responses.Experimental results show that a LLaMA-2 7B driven by _LongAgent_ can effectively support QA over <tex-math>128k</tex-math>-token documents, achieving 16.42% and 1.63% accuracy gains over GPT-4 on single-hop and multi-hop QA settings, respectively.</abstract>
       <url hash="f3596146">2024.emnlp-main.912</url>
@@ -12707,7 +12707,7 @@
       <author><first>Till Raphael</first><last>Saenger</last></author>
       <author><first>Musashi</first><last>Hinck</last><affiliation>Intel</affiliation></author>
       <author><first>Justin</first><last>Grimmer</last><affiliation>Stanford University</affiliation></author>
-      <author><first>Brandon M.</first><last>Stewart</last></author>
+      <author id="brandon-m-stewart"><first>Brandon M.</first><last>Stewart</last></author>
       <pages>16325-16342</pages>
       <abstract>We introduce a three-part framework for constructing persuasive messages, AutoPersuade. First, we curate a large collection of arguments and gather human evaluations of their persuasiveness. Next, we introduce a novel topic model to identify the features of these arguments that influence persuasion. Finally, we use the model to predict the persuasiveness of new arguments and to assess the causal effects of argument components, offering an explanation of the results. We demonstrate the effectiveness of AutoPersuade in an experimental study on arguments for veganism, validating our findings through human studies and out-of-sample predictions.</abstract>
       <url hash="fbbc155c">2024.emnlp-main.913</url>
@@ -12737,7 +12737,7 @@
       <author><first>Yurong</first><last>Mou</last><affiliation>Fudan University</affiliation></author>
       <author><first>Ming</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Qi</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>16361-16376</pages>
       <abstract>Human cognition exhibits systematic compositionality, the algebraic ability to generate infinite novel combinations from finite learned components, which is the key to understanding and reasoning about complex logic. In this work, we investigate the compositionality of large language models (LLMs) in mathematical reasoning. Specifically, we construct a new dataset MathTrap by introducing carefully designed logical traps into the problem descriptions of MATH and GSM8K. Since problems with logical flaws are quite rare in the real world, these represent “unseen” cases to LLMs. Solving these requires the models to systematically compose (1) the mathematical knowledge involved in the original problems with (2) knowledge related to the introduced traps. Our experiments show that while LLMs possess both components of requisite knowledge, they do not <b>spontaneously</b> combine them to handle these novel cases. We explore several methods to mitigate this deficiency, such as natural language prompts, few-shot demonstrations, and fine-tuning. We find that LLMs’ performance can be improved through the above external intervention. Overall, systematic compositionality remains an open challenge for large language models.</abstract>
       <url hash="f25092a7">2024.emnlp-main.915</url>
@@ -12774,7 +12774,7 @@
       <author><first>Guangliang</first><last>Liu</last><affiliation>Michigan State University</affiliation></author>
       <author><first>Haitao</first><last>Mao</last></author>
       <author><first>Jiliang</first><last>Tang</last><affiliation>Michigan State University</affiliation></author>
-      <author><first>Kristen</first><last>Johnson</last><affiliation>Michigan State University</affiliation></author>
+      <author id="kristen-johnson"><first>Kristen</first><last>Johnson</last><affiliation>Michigan State University</affiliation></author>
       <pages>16439-16455</pages>
       <abstract>Large Language Models (LLMs) are capable of producing content that perpetuates stereotypes, discrimination, and toxicity.The recently proposed <i>moral self-correction</i> is a computationally efficient method for reducing harmful content in the responses of LLMs. However, the process of how injecting self-correction instructions can modify the behavior of LLMs remains under-explored. In this paper, we explore the effectiveness of moral self-correction by answering three research questions: (1) In what scenarios does moral self-correction work? (2) What are the internal mechanisms of LLMs, e.g., hidden states, that are influenced by moral self-correction instructions? (3) Is intrinsic moral self-correction actually superficial in terms of reduced immorality in hidden states? We argue that self-correction can help LLMs find a shortcut to more morally correct output, rather than truly reducing the immorality stored in hidden states.Through empirical investigation with tasks of language generation and multi-choice question answering, we conclude: (i) LLMs exhibit good performance across both tasks, and self-correction instructions are particularly beneficial when the correct answer is already top-ranked; (ii) The morality levels in intermediate hidden states are strong indicators as to whether one instruction would be more effective than another; (iii) Based on our analysis of intermediate hidden states and task case studies of self-correction behaviors, we are first to propose the hypothesis that intrinsic moral self-correction is in fact superficial.</abstract>
       <url hash="8f2697ec">2024.emnlp-main.918</url>
@@ -12898,7 +12898,7 @@
       <author><first>Zhiyuan</first><last>Zeng</last></author>
       <author><first>Qinyuan</first><last>Cheng</last></author>
       <author><first>Xipeng</first><last>Qiu</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>16618-16635</pages>
       <url hash="65c02e11">2024.emnlp-main.927</url>
       <bibkey>yin-etal-2024-explicit</bibkey>
@@ -13183,7 +13183,7 @@
     </paper>
     <paper id="949">
       <title>Foundational Autoraters: Taming Large Language Models for Better Automatic Evaluation</title>
-      <author><first>Tu</first><last>Vu</last><affiliation>Virginia Tech and Google</affiliation></author>
+      <author id="tu-vu"><first>Tu</first><last>Vu</last><affiliation>Virginia Tech and Google</affiliation></author>
       <author><first>Kalpesh</first><last>Krishna</last><affiliation>Google</affiliation></author>
       <author><first>Salaheddin</first><last>Alzubi</last><affiliation>College of Information and Computer Science, University of Massachusetts at Amherst</affiliation></author>
       <author><first>Chris</first><last>Tar</last></author>
@@ -13197,7 +13197,7 @@
     </paper>
     <paper id="950">
       <title>Do <fixed-case>LLM</fixed-case>s learn a true syntactic universal?</title>
-      <author><first>John T.</first><last>Hale</last><affiliation>Johns Hopkins University, University of Georgia and DeepMind</affiliation></author>
+      <author id="john-hale"><first>John T.</first><last>Hale</last><affiliation>Johns Hopkins University, University of Georgia and DeepMind</affiliation></author>
       <author><first>Miloš</first><last>Stanojević</last><affiliation>University College London and Google DeepMind</affiliation></author>
       <pages>17106-17119</pages>
       <abstract>Do large multilingual language models learn language universals? We consider a candidate universal much-discussed in the linguistics literature, the Final-over-Final Condition (Sheehan et al., 2017b). This Condition is syntactic in the sense that it can only be stated by reference to abstract sentence properties such as nested phrases and head direction. A study of typologically diverse “mixed head direction” languages confirms that the Condition holds in corpora. But in a targeted syntactic evaluation, Gemini Pro only seems to respect the Condition in German, Russian, Hungarian and Serbian. These relatively high-resource languages contrast with Basque, where Gemini Pro does not seem to have learned the Condition at all. This result suggests that modern language models may need additional sources of bias in order to become truly human-like, within a developmentally-realistic budget of training data.</abstract>
@@ -13273,7 +13273,7 @@
       <title>Adaptive Question Answering: Enhancing Language Model Proficiency for Addressing Knowledge Conflicts with Source Citations</title>
       <author><first>Sagi</first><last>Shaier</last></author>
       <author><first>Ari</first><last>Kobren</last><affiliation>Oracle Labs</affiliation></author>
-      <author><first>Philip V.</first><last>Ogren</last><affiliation>Oracle</affiliation></author>
+      <author id="philip-ogren"><first>Philip V.</first><last>Ogren</last><affiliation>Oracle</affiliation></author>
       <pages>17226-17239</pages>
       <abstract>Resolving knowledge conflicts is a crucial challenge in Question Answering (QA) tasks, as the internet contains numerous conflicting facts and opinions. While some research has made progress in tackling ambiguous settings where multiple valid answers exist, these approaches often neglect to provide source citations, leaving users to evaluate the factuality of each answer. On the other hand, existing work on citation generation has focused on unambiguous settings with single answers, failing to address the complexity of real-world scenarios. Despite the importance of both aspects, no prior research has combined them, leaving a significant gap in the development of QA systems. In this work, we bridge this gap by proposing the novel task of QA with source citation in ambiguous settings, where multiple valid answers exist. To facilitate research in this area, we create a comprehensive framework consisting of: (1) five novel datasets, obtained by augmenting three existing reading comprehension datasets with citation meta-data across various ambiguous settings, such as distractors and paraphrasing; (2) the first ambiguous multi-hop QA dataset featuring real-world, naturally occurring contexts; (3) two new metrics to evaluate models’ performances; and (4) several strong baselines using rule-based, prompting, and finetuning approaches over five large language models. We hope that this new task, datasets, metrics, and baselines will inspire the community to push the boundaries of QA research and develop more trustworthy and interpretable systems.</abstract>
       <url hash="87fb3a28">2024.emnlp-main.956</url>
@@ -13536,12 +13536,12 @@
     </paper>
     <paper id="976">
       <title>Don’t Forget Your Reward Values: Language Model Alignment via Value-based Calibration</title>
-      <author><first>Xin</first><last>Mao</last></author>
+      <author id="xinnian-mao"><first>Xin</first><last>Mao</last></author>
       <author><first>Feng-Lin</first><last>Li</last><affiliation>Shopee</affiliation></author>
       <author><first>Huimin</first><last>Xu</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Wei</first><last>Zhang</last></author>
       <author><first>Wang</first><last>Chen</last></author>
-      <author><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
       <pages>17622-17642</pages>
       <abstract>While Reinforcement Learning from Human Feedback (RLHF) significantly enhances the generation quality of Large Language Models (LLMs), recent studies have raised concerns regarding the complexity and instability associated with the Proximal Policy Optimization (PPO) algorithm, proposing a series of order-based alignment methods as viable alternatives. This paper delves into existing order-based methods, unifying them into one framework and examining their inefficiencies in utilizing reward values. Building upon these findings, we propose a new Value-based Calibration (VCB) method to better align LLMs with human preferences. Experimental results demonstrate that VCB surpasses existing alignment methods on AI assistant and summarization datasets, providing impressive generalizability, robustness, and diversity in different settings.</abstract>
       <url hash="b729064b">2024.emnlp-main.976</url>
@@ -13618,7 +13618,7 @@
       <author><first>Ruicheng</first><last>Yin</last></author>
       <author><first>Changze</first><last>Lv</last></author>
       <author><first>Xiaoqing</first><last>Zheng</last></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>17716-17736</pages>
       <abstract>Retrieval-augmented generation (RAG) techniques have proven to be effective in integrating up-to-date information, mitigating hallucinations, and enhancing response quality, particularly in specialized domains. While many RAG approaches have been proposed to enhance large language models through query-dependent retrievals, these approaches still suffer from their complex implementation and prolonged response times. Typically, a RAG workflow involves multiple processing steps, each of which can be executed in various ways. Here, we investigate existing RAG approaches and their potential combinations to identify optimal RAG practices. Through extensive experiments, we suggest several strategies for deploying RAG that balance both performance and efficiency. Moreover, we demonstrate that multimodal retrieval techniques can significantly enhance question-answering capabilities about visual inputs and accelerate the generation of multimodal content using a “retrieval as generation” strategy.</abstract>
       <url hash="ff3197fa">2024.emnlp-main.981</url>
@@ -13682,7 +13682,7 @@
       <title>From Descriptive Richness to Bias: Unveiling the Dark Side of Generative Image Caption Enrichment</title>
       <author><first>Yusuke</first><last>Hirota</last><affiliation>Osaka University</affiliation></author>
       <author><first>Ryo</first><last>Hachiuma</last><affiliation>NVIDIA</affiliation></author>
-      <author><first>Chao-Han Huck</first><last>Yang</last><affiliation>NVIDIA Research</affiliation></author>
+      <author id="chao-han-huck-yang"><first>Chao-Han Huck</first><last>Yang</last><affiliation>NVIDIA Research</affiliation></author>
       <author><first>Yuta</first><last>Nakashima</last><affiliation>Osaka University</affiliation></author>
       <pages>17807-17816</pages>
       <abstract>Large language models (LLMs) have enhanced the capacity of vision-language models to caption visual text. This generative approach to image caption enrichment further makes textual captions more descriptive, improving alignment with the visual context. However, while many studies focus on the benefits of generative caption enrichment (GCE), are there any negative side effects? We compare standard-format captions and recent GCE processes from the perspectives of gender bias and hallucination, showing that enriched captions suffer from increased gender bias and hallucination. Furthermore, models trained on these enriched captions amplify gender bias by an average of 30.9% and increase hallucination by 59.5%. This study serves as a caution against the trend of making captions more descriptive.</abstract>
@@ -13700,7 +13700,7 @@
       <author><first>Fangying</first><last>Rong</last></author>
       <author><first>Qingbin</first><last>Liu</last><affiliation>Tencent PCG</affiliation></author>
       <author><first>Yanchao</first><last>Hao</last><affiliation>Tencent PCG</affiliation></author>
-      <author id="bo-li"><first>Bo</first><last>Li</last><affiliation>Tencent AI Lab</affiliation></author>
+      <author><first>Bo</first><last>Li</last><affiliation>Tencent AI Lab</affiliation></author>
       <author><first>Xi</first><last>Chen</last></author>
       <author><first>Cunhang</first><last>Fan</last></author>
       <author><first>Zhao</first><last>Lv</last><affiliation>School of Computer Science and Technology, Anhui University, Hefei 230601, China</affiliation></author>
@@ -13790,7 +13790,7 @@
       <author><first>Ruotian</first><last>Ma</last></author>
       <author><first>Tao</first><last>Gui</last><affiliation>Fudan University</affiliation></author>
       <author><first>Qi</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>17905-17923</pages>
       <abstract>Retrieval-Augmented Generation (RAG) significantly improved the ability of Large Language Models (LLMs) to solve knowledge-intensive tasks. While existing research seeks to enhance RAG performance by retrieving higher-quality documents or designing RAG-specific LLMs, the internal mechanisms within LLMs that contribute to RAG’s effectiveness remain underexplored. In this paper, we aim to investigate these internal mechanisms within the popular Mixture-of-Expert (MoE)-based LLMs and demonstrate how to improve RAG by examining expert activations in these LLMs. Our controlled experiments reveal that several core groups of experts are primarily responsible for RAG-related behaviors. The activation of these core experts can signify the model’s inclination towards external/internal knowledge and adjust its behavior. For instance, we identify core experts that can (1) indicate the sufficiency of the model’s internal knowledge, (2) assess the quality of retrieved documents, and (3) enhance the model’s ability to utilize context. Based on these findings, we propose several strategies to enhance RAG’s efficiency and effectiveness through expert activation. Experimental results across various datasets and MoE LLMs show the effectiveness of our method.</abstract>
       <url hash="68cd58e4">2024.emnlp-main.993</url>
@@ -13868,7 +13868,7 @@
     </paper>
     <paper id="999">
       <title>Label Confidence Weighted Learning for Target-level Sentence Simplification</title>
-      <author><first>Xin Ying</first><last>Qiu</last><affiliation>Guangdong University of Foreign Studies</affiliation></author>
+      <author id="xin-ying-qiu"><first>Xin Ying</first><last>Qiu</last><affiliation>Guangdong University of Foreign Studies</affiliation></author>
       <author><first>Jingshen</first><last>Zhang</last></author>
       <pages>18004-18019</pages>
       <abstract>Multi-level sentence simplification generates simplified sentences with varying language proficiency levels. We propose Label Confidence Weighted Learning (LCWL), a novel approach that incorporates a label confidence weighting scheme in the training loss of the encoder-decoder model, setting it apart from existing confidence-weighting methods primarily designed for classification. Experimentation on English grade-level simplification dataset shows that LCWL outperforms state-of-the-art unsupervised baselines. Fine-tuning the LCWL model on in-domain data and combining with Symmetric Cross Entropy (SCE) consistently delivers better simplifications compared to strong supervised methods. Our results highlight the effectiveness of label confidence weighting techniques for text simplification tasks with encoder-decoder architectures.</abstract>
@@ -13892,7 +13892,7 @@
     <paper id="1001">
       <title>Tree of Problems: Improving structured problem solving with compositionality</title>
       <author><first>Armel Randy</first><last>Zebaze</last><affiliation>INRIA</affiliation></author>
-      <author><first>Benoît</first><last>Sagot</last><affiliation>Inria</affiliation></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last><affiliation>Inria</affiliation></author>
       <author><first>Rachel</first><last>Bawden</last><affiliation>Inria</affiliation></author>
       <pages>18028-18047</pages>
       <abstract>Large Language Models (LLMs) have demonstrated remarkable performance across multipletasks through in-context learning. For complex reasoning tasks that require step-by-step thinking, Chain-of-Thought (CoT) prompting has given impressive results, especially when combined with self-consistency. Nonetheless, some tasks remain particularly difficult for LLMs to solve. Tree of Thoughts (ToT) and Graph of Thoughts (GoT) emerged as alternatives, dividing the complex problem into paths of subproblems. In this paper, we propose Tree of Problems (ToP), a simpler version of ToT, which we hypothesise can work better for complex tasks that can be divided into identical subtasks. Our empirical results show that our approach outperforms ToT and GoT, and in addition per forms better than CoT on complex reasoning tasks. All code for this paper will be made available.</abstract>
@@ -13904,7 +13904,7 @@
       <title>What the Harm? Quantifying the Tangible Impact of Gender Bias in Machine Translation with a Human-centered Study</title>
       <author><first>Beatrice</first><last>Savoldi</last></author>
       <author><first>Sara</first><last>Papi</last></author>
-      <author><first>Matteo</first><last>Negri</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
       <author><first>Ana</first><last>Guerberof-Arenas</last><affiliation>University of Groningen</affiliation></author>
       <author><first>Luisa</first><last>Bentivogli</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
       <pages>18048-18076</pages>
@@ -14001,7 +14001,7 @@
     <paper id="1009">
       <title>Why do objects have many names? A study on word informativeness in language use and lexical systems</title>
       <author><first>Eleonora</first><last>Gualdoni</last></author>
-      <author><first>Gemma</first><last>Boleda</last><affiliation>ICREA and Universitat Pompeu Fabra</affiliation></author>
+      <author id="gemma-boleda"><first>Gemma</first><last>Boleda</last><affiliation>ICREA and Universitat Pompeu Fabra</affiliation></author>
       <pages>18150-18163</pages>
       <abstract>Human lexicons contain many different words that speakers can use to refer to the same object, e.g., *purple* or *magenta* for the same shade of color. On the one hand, studies on language use have explored how speakers adapt their referring expressions to successfully communicate in context, without focusing on properties of the lexical system. On the other hand, studies in language evolution have discussed how competing pressures for informativeness and simplicity shape lexical systems, without tackling in-context communication. We aim at bridging the gap between these traditions, and explore why a soft mapping between referents and words is a good solution for communication, by taking into account both in-context communication and the structure of the lexicon. We propose a simple measure of informativeness for words and lexical systems, grounded in a visual space, and analyze color naming data for English and Mandarin Chinese. We conclude that optimal lexical systems are those where multiple words can apply to the same referent, conveying different amounts of information. Such systems allow speakers to maximize communication accuracy and minimize the amount of information they convey when communicating about referents in contexts.</abstract>
       <url hash="52663841">2024.emnlp-main.1009</url>
@@ -14014,7 +14014,7 @@
       <author><first>Xue</first><last>Zhang</last><affiliation>Beijing Jiaotong University</affiliation></author>
       <author><first>Zengkui</first><last>Sun</last></author>
       <author><first>Yufeng</first><last>Chen</last></author>
-      <author><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
       <pages>18164-18181</pages>
       <abstract>Knowledge distillation (KD) is known as a promising solution to compress large language models (LLMs) via transferring their knowledge to smaller models. During this process, white-box KD methods usually minimize the distance between the output distributions of the two models so that more knowledge can be transferred. However, in the current white-box KD framework, the output distributions are from the respective output spaces of the two models, using their own prediction heads. We argue that the space discrepancy will lead to low similarity between the teacher model and the student model on both representation and distribution levels. Furthermore, this discrepancy also hinders the KD process between models with different vocabularies, which is common for current LLMs. To address these issues, we propose a dual-space knowledge distillation (DSKD) framework that unifies the output spaces of the two models for KD. On the basis of DSKD, we further develop a cross-model attention mechanism, which can automatically align the representations of the two models with different vocabularies. Thus, our framework is not only compatible with various distance functions for KD (e.g., KL divergence) like the current framework, but also supports KD between any two LLMs regardless of their vocabularies. Experiments on task-agnostic instruction-following benchmarks show that DSKD significantly outperforms the current white-box KD framework with various distance functions, and also surpasses existing KD methods for LLMs with different vocabularies.</abstract>
       <url hash="114aa0eb">2024.emnlp-main.1010</url>
@@ -14078,7 +14078,7 @@
       <author><first>Shi</first><last>Feng</last><affiliation>Northeastern University, China</affiliation></author>
       <author><first>Daling</first><last>Wang</last><affiliation>Northeastern University</affiliation></author>
       <author><first>Yifei</first><last>Zhang</last><affiliation>Northeastern University</affiliation></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <pages>18266-18287</pages>
       <abstract>Full-parameter fine-tuning (FPFT) has become the go-to choice for adapting language models (LMs) to downstream tasks due to its excellent performance. As LMs grow in size, fine-tuning the full parameters of LMs requires a prohibitively large amount of GPU memory. Existing approaches utilize zeroth-order optimizer to conserve GPU memory, which potentially compromises the performance of LMs as non-zero order optimizers tend to converge more readily on most downstream tasks. We propose a novel, memory-efficient, optimizer-independent, end-to-end hierarchical fine-tuning strategy, HiFT, which only updates a subset of parameters at each training step. HiFT significantly reduces the amount of gradients and optimizer state parameters residing in GPU memory at the same time, thereby reducing GPU memory usage. Our results demonstrate that: (1) HiFT achieves comparable performance with parameter-efficient fine-tuning and standard FPFT. (2) Results on six models show that HiFT reduces the number of trainable parameters by about 89.18% on average compared to FPFT. (3) HiFT supports FPFT of 7B models for 24G GPU memory devices under mixed precision without using any memory saving techniques. (4) HiFT supports various optimizers including AdamW, AdaGrad, SGD, etc. The source code link is https://github.com/misonsky/HiFT.</abstract>
       <url hash="c30e3489">2024.emnlp-main.1015</url>
@@ -14112,7 +14112,7 @@
     <paper id="1018">
       <title><fixed-case>T</fixed-case>ool<fixed-case>P</fixed-case>lanner: A Tool Augmented <fixed-case>LLM</fixed-case> for Multi Granularity Instructions with Path Planning and Feedback</title>
       <author><first>Qinzhuo</first><last>Wu</last></author>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last><affiliation>xiaomi</affiliation></author>
+      <author><first>Wei</first><last>Liu</last><affiliation>xiaomi</affiliation></author>
       <author><first>Jian</first><last>Luan</last></author>
       <author><first>Bin</first><last>Wang</last><affiliation>AI Lab, Xiaomi Inc.</affiliation></author>
       <pages>18315-18339</pages>
@@ -14125,7 +14125,7 @@
       <title>Please note that <fixed-case>I</fixed-case>’m just an <fixed-case>AI</fixed-case>: Analysis of Behavior Patterns of <fixed-case>LLM</fixed-case>s in (Non-)offensive Speech Identification</title>
       <author><first>Esra</first><last>Dönmez</last><affiliation>Universität Stuttgart</affiliation></author>
       <author><first>Thang</first><last>Vu</last><affiliation>University of Stuttgart, University of Stuttgart</affiliation></author>
-      <author><first>Agnieszka</first><last>Falenska</last><affiliation>Interchange Forum for Reflecting on Intelligent Systems, University of Stuttgart</affiliation></author>
+      <author id="agnieszka-falenska"><first>Agnieszka</first><last>Falenska</last><affiliation>Interchange Forum for Reflecting on Intelligent Systems, University of Stuttgart</affiliation></author>
       <pages>18340-18357</pages>
       <abstract>Offensive speech is highly prevalent on online platforms. Being trained on online data, Large Language Models (LLMs) display undesirable behaviors, such as generating harmful text or failing to recognize it. Despite these shortcomings, the models are becoming a part of our everyday lives by being used as tools for information search, content creation, writing assistance, and many more. Furthermore, the research explores using LLMs in applications with immense social risk, such as late-life companions and online content moderators. Despite the potential harms from LLMs in such applications, whether LLMs can reliably identify offensive speech and how they behave when they fail are open questions. This work addresses these questions by probing sixteen widely used LLMs and showing that most fail to identify (non-)offensive online language. Our experiments reveal undesirable behavior patterns in the context of offensive speech detection, such as erroneous response generation, over-reliance on profanity, and failure to recognize stereotypes. Our work highlights the need for extensive documentation of model reliability, particularly in terms of the ability to detect offensive language.</abstract>
       <url hash="a926879d">2024.emnlp-main.1019</url>
@@ -14145,9 +14145,9 @@
     <paper id="1021">
       <title>A linguistically-motivated evaluation methodology for unraveling model’s abilities in reading comprehension tasks</title>
       <author><first>Elie</first><last>Antoine</last><affiliation>Université d’Aix-Marseille</affiliation></author>
-      <author><first>Frederic</first><last>Bechet</last><affiliation>Académie d’Aix-Marseille</affiliation></author>
-      <author><first>Géraldine</first><last>Damnati</last><affiliation>Orange Innovation</affiliation></author>
-      <author><first>Philippe</first><last>Langlais</last><affiliation>Université de Montréal</affiliation></author>
+      <author id="frederic-bechet"><first>Frederic</first><last>Bechet</last><affiliation>Académie d’Aix-Marseille</affiliation></author>
+      <author id="geraldine-damnati"><first>Géraldine</first><last>Damnati</last><affiliation>Orange Innovation</affiliation></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last><affiliation>Université de Montréal</affiliation></author>
       <pages>18376-18392</pages>
       <abstract>We introduce an evaluation methodology for reading comprehension tasks based on the intuition that certain examples, by the virtue of their linguistic complexity, consistently yield lower scores regardless of model size or architecture. We capitalize on semantic frame annotation for characterizing this complexity, and study seven complexity factors that may account for model’s difficulty. We first deploy this methodology on a carefully annotated French reading comprehension benchmark showing that two of those complexity factors are indeed good predictors of models’ failure, while others are less so. We further deploy our methodology on a well studied English benchmark by using chatGPT as a proxy for semantic annotation.Our study reveals that fine-grained linguistically-motivated automatic evaluation of a reading comprehension task is not only possible, but helps understand models’ abilities to handle specific linguistic characteristics of input examples. It also shows that current state-of-the-art models fail with some for those characteristics which suggests that adequately handling them requires more than merely increasing model size.</abstract>
       <url hash="ac460c89">2024.emnlp-main.1021</url>
@@ -14197,7 +14197,7 @@
       <author><first>Luyang</first><last>Lin</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <author><first>Lingzhi</first><last>Wang</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <author><first>Hong</first><last>Cheng</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
-      <author><first>Kam-Fai</first><last>Wong</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <pages>18455-18462</pages>
       <abstract>Text clustering is a fundamental task in natural language processing with numerous applications. However, traditional clustering methods often struggle with domain-specific fine-tuning and the presence of outliers. To address these challenges, we introduce LLMEdgeRefine, an iterative clustering method enhanced by large language models (LLMs), focusing on edge points refinement. LLMEdgeRefine enhances current clustering methods by creating super-points to mitigate outliers and iteratively refining clusters using LLMs for improved semantic coherence. Our method demonstrates superior performance across multiple datasets, outperforming state-of-the-art techniques, and offering robustness, adaptability, and cost-efficiency for diverse text clustering applications.</abstract>
       <url hash="c5582e39">2024.emnlp-main.1025</url>
@@ -14211,7 +14211,7 @@
       <author><first>Ainara</first><last>Estarrona</last><affiliation>Universidad del País Vasco</affiliation></author>
       <author><first>Elena</first><last>Cabrio</last><affiliation>Université Côte d’Azur</affiliation></author>
       <author><first>Serena</first><last>Villata</last><affiliation>CNRS</affiliation></author>
-      <author><first>Rodrigo</first><last>Agerri</last><affiliation>University of the Basque Country</affiliation></author>
+      <author id="rodrigo-agerri"><first>Rodrigo</first><last>Agerri</last><affiliation>University of the Basque Country</affiliation></author>
       <pages>18463-18475</pages>
       <abstract>Explaining Artificial Intelligence (AI) decisions is a major challenge nowadays in AI, in particular when applied to sensitive scenarios like medicine and law. However, the need to explain the rationale behind decisions is a main issues also for human-based deliberation as it is important to justify why a certain decision has been taken. Resident medical doctors for instance are required not only to provide a (possibly correct) diagnosis, but also to explain how they reached a certain conclusion. Developing new tools to aid residents to train their explanation skills is therefore a central objective of AI in education. In this paper, we follow this direction, and we present, to the best of our knowledge, the first multilingual dataset for Medical Question Answering where correct and incorrect diagnoses for a clinical case are enriched with a natural language explanation written by doctors. These explanations have been manually annotated with argument components (i.e., premise, claim) and argument relations (i.e., attack, support). The Multilingual CasiMedicos-arg dataset consists of 558 clinical cases (English, Spanish, French, Italian) with explanations, where we annotated 5021 claims, 2313 premises, 2431 support relations, and 1106 attack relations. We conclude by showing how competitive baselines perform over this challenging dataset for the argument mining task.</abstract>
       <url hash="ffbb7ed8">2024.emnlp-main.1026</url>
@@ -14329,7 +14329,7 @@
       <title>Who is better at math, Jenny or Jingzhen? Uncovering Stereotypes in Large Language Models</title>
       <author><first>Zara</first><last>Siddique</last></author>
       <author><first>Liam</first><last>Turner</last><affiliation>Cardiff University</affiliation></author>
-      <author><first>Luis</first><last>Espinosa-Anke</last><affiliation>Cardiff University and AMPLYFI</affiliation></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa-Anke</last><affiliation>Cardiff University and AMPLYFI</affiliation></author>
       <pages>18601-18619</pages>
       <abstract>Large language models (LLMs) have been shown to propagate and amplify harmful stereotypes, particularly those that disproportionately affect marginalised communities. To understand the effect of these stereotypes more comprehensively, we introduce GlobalBias, a dataset of 876k sentences incorporating 40 distinct gender-by-ethnicity groups alongside descriptors typically used in bias literature, which enables us to study a broad set of stereotypes from around the world. We use GlobalBias to directly probe a suite of LMs via perplexity, which we use as a proxy to determine how certain stereotypes are represented in the model’s internal representations. Following this, we generate character profiles based on given names and evaluate the prevalence of stereotypes in model outputs. We find that the demographic groups associated with various stereotypes remain consistent across model likelihoods and model outputs. Furthermore, larger models consistently display higher levels of stereotypical outputs, even when explicitly instructed not to.</abstract>
       <url hash="75263e0e">2024.emnlp-main.1035</url>
@@ -14398,7 +14398,7 @@
       <author><first>Srikanth G.</first><last>Tamilselvam</last><affiliation>International Business Machines</affiliation></author>
       <author><first>Prince</first><last>Kumar</last><affiliation>International Business Machines</affiliation></author>
       <author><first>Ashok Pon</first><last>Kumar</last><affiliation>IBM Research India</affiliation></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
       <pages>18681-18697</pages>
       <abstract>Recent developments show that Large Language Models (LLMs) produce state-of-the-art performance on natural language (NL) to code generation for resource-rich general-purpose languages like C++, Java, and Python. However, their practical usage for structured domain-specific languages (DSLs) such as YAML, JSON is limited due to domain-specific schema, grammar, and customizations generally unseen by LLMs during pre-training. Efforts have been made to mitigate this challenge via in-context learning through relevant examples or by fine-tuning. However, it suffers from problems, such as limited DSL samples and prompt sensitivity but enterprises maintain good documentation of the DSLs. Therefore, we propose DocCGen, a framework that can leverage such rich knowledge by breaking the NL-to-Code generation task for structured code languages into a two-step process. First, it detects the correct libraries using the library documentation that best matches the NL query. Then, it utilizes schema rules extracted from the documentation of these libraries to constrain the decoding. We evaluate our framework for two complex structured languages, Ansible YAML and Bash command, consisting of two settings: Out-of-domain (OOD) and In domain (ID). Our extensive experiments show that DocCGen consistently improves different sized language models across all six evaluation metrics, reducing syntactic and semantic errors in structured code.</abstract>
       <url hash="c2161931">2024.emnlp-main.1040</url>
@@ -14553,8 +14553,8 @@
       <title>Towards Faithful Knowledge Graph Explanation Through Deep Alignment in Commonsense Question Answering</title>
       <author><first>Weihe</first><last>Zhai</last></author>
       <author><first>Arkaitz</first><last>Zubiaga</last><affiliation>Queen Mary University of London</affiliation></author>
-      <author><first>Bingquan</first><last>Liu</last><affiliation>Harbin Institute of Technology</affiliation></author>
-      <author><first>Chengjie</first><last>Sun</last><affiliation>Harbin Institute of Technology</affiliation></author>
+      <author id="bingquan-liu"><first>Bingquan</first><last>Liu</last><affiliation>Harbin Institute of Technology</affiliation></author>
+      <author id="cheng-jie-sun"><first>Chengjie</first><last>Sun</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <author><first>Yalong</first><last>Zhao</last></author>
       <pages>18920-18930</pages>
       <abstract>The fusion of language models (LMs) and knowledge graphs (KGs) is widely used in commonsense question answering, but generating faithful explanations remains challenging. Current methods often overlook path decoding faithfulness, leading to divergence between graph encoder outputs and model predictions. We identify confounding effects and LM-KG misalignment as key factors causing spurious explanations. To address this, we introduce the LM-KG Fidelity metric to assess KG representation reliability and propose the LM-KG Distribution-aware Alignment (LKDA) algorithm to improve explanation faithfulness. Without ground truth, we evaluate KG explanations using the proposed Fidelity-Sparsity Trade-off Curve. Experiments on CommonsenseQA and OpenBookQA show that LKDA significantly enhances explanation fidelity and model performance, highlighting the need to address distributional misalignment for reliable commonsense reasoning.</abstract>
@@ -14625,7 +14625,7 @@
       <author><first>Ruiyu</first><last>Xiao</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <author><first>Lei</first><last>Wu</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <author><first>Yuhang</first><last>Gou</last></author>
-      <author><first>Weinan</first><last>Zhang</last><affiliation>Harbin Institute of Technology</affiliation></author>
+      <author id="weinan-zhang"><first>Weinan</first><last>Zhang</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <author><first>Ting</first><last>Liu</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <pages>18995-19008</pages>
       <abstract>Argumentative essay generation (AEG) aims to generate complete texts on specific controversial topics or debates. Although current AEG methods can generate individual opinions, they often overlook the high-level connections between these opinions. This often leads to the generated results being mired in logical confusion, unable to proof their own arguments effectively. The generated essay may present evidence that contradicts the claims or they may fail to assemble the claims into logical flow. In this paper, we present a unified two-stage framework: Proof-Enhancement and Self-Annotation (PESA) for AEG with a focus on logical enhancement. Specifically, we first construct pseudo-labels for logical information,claims and grounds, using a large language model. We then propose a tree planning approach that introduces proof principles and ensures logical consistency. Extensive experimental results show that, benefiting from proof principle guidance, PESA generates argumentative essays with better logical validity and persuasiveness than strong baseline models.</abstract>
@@ -14691,7 +14691,7 @@
       <author><first>Weijia</first><last>Zhang</last></author>
       <author><first>Guimin</first><last>Hu</last></author>
       <author><first>Yifei</first><last>Yuan</last><affiliation>Copenhagen University</affiliation></author>
-      <author><first>Anders</first><last>Søgaard</last><affiliation>Copenhagen University</affiliation></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last><affiliation>Copenhagen University</affiliation></author>
       <author><first>Daniel</first><last>Hershcovich</last><affiliation>University of Copenhagen</affiliation></author>
       <author><first>Desmond</first><last>Elliott</last><affiliation>University of Copenhagen</affiliation></author>
       <pages>19077-19095</pages>
@@ -14754,7 +14754,7 @@
       <author><first>Ahmed F.</first><last>AbouElhamayed</last></author>
       <author><first>Jordan</first><last>Dotzel</last></author>
       <author><first>Zhiru</first><last>Zhang</last></author>
-      <author><first>Alexander M.</first><last>Rush</last></author>
+      <author id="alexander-m-rush"><first>Alexander M.</first><last>Rush</last></author>
       <author><first>Safeen</first><last>Huda</last></author>
       <author><first>Mohamed S.</first><last>Abdelfattah</last></author>
       <pages>19154-19167</pages>
@@ -14769,7 +14769,7 @@
       <author><first>Daniela</first><last>Teodorescu</last></author>
       <author><first>Mallory J</first><last>Feldman</last></author>
       <author><first>Kristen</first><last>Lindquist</last><affiliation>University of North Carolina at Chapel Hill</affiliation></author>
-      <author><first>Saif M.</first><last>Mohammad</last><affiliation>National Research Council Canada</affiliation></author>
+      <author id="saif-mohammad"><first>Saif M.</first><last>Mohammad</last><affiliation>National Research Council Canada</affiliation></author>
       <pages>19168-19185</pages>
       <abstract>We are united in how emotions are central to shaping our experiences; yet, individuals differ greatly in how we each identify, categorize, and express emotions. In psychology, variation in the ability of individuals to differentiate between emotion concepts is called emotion granularity (determined through self-reports of one’s emotions). High emotion granularity has been linked with better mental and physical health; whereas low emotion granularity has been linked with maladaptive emotion regulation strategies and poor health outcomes. In this work, we propose computational measures of emotion granularity derived from temporally-ordered speaker utterances in social media (in lieu of self reports that suffer from various biases). We then investigate the effectiveness of such text-derived measures of emotion granularity in functioning as markers of various mental health conditions (MHCs). We establish baseline measures of emotion granularity derived from textual utterances, and show that, at an aggregate level, emotion granularities are significantly lower for people self-reporting as having an MHC than for the control population. This paves the way towards a better understanding of the MHCs, and specifically the role emotions play in our well-being.</abstract>
       <url hash="5e2fb106">2024.emnlp-main.1069</url>
@@ -14782,7 +14782,7 @@
       <author><first>Minpeng</first><last>Liao</last></author>
       <author><first>Zhongqiang</first><last>Huang</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Junhong</first><last>Wu</last><affiliation>University of Chinese Academy of Sciences</affiliation></author>
-      <author><first>Chengqing</first><last>Zong</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
       <author><first>Jiajun</first><last>Zhang</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
       <pages>19186-19199</pages>
       <abstract>The recent release of GPT-4o showcased the potential of end-to-end multimodal models, not just in terms of low latency but also in their ability to understand and generate expressive speech with rich emotions. While the details are unknown to the open research community, it likely involves significant amounts of curated data and compute, neither of which is readily accessible. In this paper, we present BLSP-Emo (Bootstrapped Language-Speech Pretraining with Emotion support), a novel approach to developing an end-to-end speech-language model capable of understanding both semantics and emotions in speech and generate empathetic responses. BLSP-Emo utilizes existing speech recognition (ASR) and speech emotion recognition (SER) datasets through a two-stage process. The first stage focuses on semantic alignment, following recent work on pretraining speech-language models using ASR data. The second stage performs emotion alignment with the pretrained speech-language model on an emotion-aware continuation task constructed from SER data. Our experiments demonstrate that the BLSP-Emo model excels in comprehending speech and delivering empathetic responses, both in instruction-following tasks and conversations.</abstract>
@@ -14825,9 +14825,9 @@
       <title><fixed-case>D</fixed-case>ata<fixed-case>N</fixed-case>arrative: Automated Data-Driven Storytelling with Visualizations and Texts</title>
       <author><first>Mohammed Saidul</first><last>Islam</last><affiliation>York University</affiliation></author>
       <author><first>Md Tahmid Rahman</first><last>Laskar</last><affiliation>Dialpad Inc.</affiliation></author>
-      <author><first>Md Rizwan</first><last>Parvez</last><affiliation>Qatar Computing Research Institute and Bosch</affiliation></author>
+      <author id="md-rizwan-parvez"><first>Md Rizwan</first><last>Parvez</last><affiliation>Qatar Computing Research Institute and Bosch</affiliation></author>
       <author><first>Enamul</first><last>Hoque</last><affiliation>York University</affiliation></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>SalesForce.com and Nanyang Technological University</affiliation></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>SalesForce.com and Nanyang Technological University</affiliation></author>
       <pages>19253-19286</pages>
       <abstract>Data-driven storytelling is a powerful method for conveying insights by combining narrative techniques with visualizations and text. These stories integrate visual aids, such as highlighted bars and lines in charts, along with textual annotations explaining insights. However, creating such stories requires a deep understanding of the data and meticulous narrative planning, often necessitating human intervention, which can be time-consuming and mentally taxing. While Large Language Models (LLMs) excel in various NLP tasks, their ability to generate coherent and comprehensive data stories remains underexplored. In this work, we introduce a novel task for data story generation and a benchmark containing 1,449 stories from diverse sources. To address the challenges of crafting coherent data stories, we propose a multi-agent framework employing two LLM agents designed to replicate the human storytelling process: one for understanding and describing the data (Reflection), generating the outline, and narration, and another for verification at each intermediary step. While our agentic framework generally outperforms non-agentic counterparts in both model-based and human evaluations, the results also reveal unique challenges in data story generation.</abstract>
       <url hash="9bcd6612">2024.emnlp-main.1073</url>
@@ -14860,8 +14860,8 @@
       <author><first>Christine</first><last>Jou</last><affiliation>Facebook</affiliation></author>
       <author><first>Gargi</first><last>Ghosh</last><affiliation>Meta AI</affiliation></author>
       <author><first>Omer</first><last>Levy</last><affiliation>Facebook</affiliation></author>
-      <author><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington, Facebook and Meta</affiliation></author>
-      <author><first>Wen-tau</first><last>Yih</last><affiliation>Meta Platforms, Inc.</affiliation></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington, Facebook and Meta</affiliation></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last><affiliation>Meta Platforms, Inc.</affiliation></author>
       <author><first>Shang-Wen</first><last>Li</last><affiliation>Facebook</affiliation></author>
       <author><first>Saining</first><last>Xie</last><affiliation>New York University</affiliation></author>
       <author><first>Christoph</first><last>Feichtenhofer</last><affiliation>Facebook</affiliation></author>
@@ -14886,7 +14886,7 @@
       <title><fixed-case>C</fixed-case>a<fixed-case>T</fixed-case>-Bench: Benchmarking Language Model Understanding of Causal and Temporal Dependencies in Plans</title>
       <author><first>Yash Kumar</first><last>Lal</last><affiliation>State University of New York, Stony Brook</affiliation></author>
       <author><first>Vanya</first><last>Cohen</last></author>
-      <author><first>Nathanael</first><last>Chambers</last><affiliation>US Naval Academy</affiliation></author>
+      <author id="nathanael-chambers"><first>Nathanael</first><last>Chambers</last><affiliation>US Naval Academy</affiliation></author>
       <author><first>Niranjan</first><last>Balasubramanian</last><affiliation>State University of New York, Stony Brook</affiliation></author>
       <author><first>Ray</first><last>Mooney</last><affiliation>, University of Texas, Austin</affiliation></author>
       <pages>19336-19354</pages>
@@ -14914,7 +14914,7 @@
       <title>An Empirical Analysis of the Writing Styles of Persona-Assigned <fixed-case>LLM</fixed-case>s</title>
       <author><first>Manuj</first><last>Malik</last></author>
       <author><first>Jing</first><last>Jiang</last><affiliation>Singapore Management University</affiliation></author>
-      <author><first>Kian Ming A.</first><last>Chai</last><affiliation>DSO National Laboratories</affiliation></author>
+      <author id="kian-ming-a-chai"><first>Kian Ming A.</first><last>Chai</last><affiliation>DSO National Laboratories</affiliation></author>
       <pages>19369-19388</pages>
       <url hash="7f083b4f">2024.emnlp-main.1079</url>
       <bibkey>malik-etal-2024-empirical</bibkey>
@@ -14980,12 +14980,12 @@
     <paper id="1084">
       <title>The Illusion of Competence: Evaluating the Effect of Explanations on Users’ Mental Models of Visual Question Answering Systems</title>
       <author><first>Judith</first><last>Sieker</last><affiliation>Universität Bielefeld</affiliation></author>
-      <author><first>Simeon</first><last>Junker</last><affiliation>Universität Bielefeld</affiliation></author>
+      <author id="simeon-junker"><first>Simeon</first><last>Junker</last><affiliation>Universität Bielefeld</affiliation></author>
       <author><first>Ronja</first><last>Utescher</last><affiliation>Universität Bielefeld</affiliation></author>
       <author><first>Nazia</first><last>Attari</last><affiliation>Bielefeld University</affiliation></author>
       <author><first>Heiko</first><last>Wersing</last><affiliation>Honda Research Institute</affiliation></author>
       <author><first>Hendrik</first><last>Buschmeier</last><affiliation>Universität Bielefeld</affiliation></author>
-      <author><first>Sina</first><last>Zarrieß</last><affiliation>Bielefeld University</affiliation></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last><affiliation>Bielefeld University</affiliation></author>
       <pages>19459-19475</pages>
       <abstract>We examine how users perceive the limitations of an AI system when it encounters a task that it cannot perform perfectly and whether providing explanations alongside its answers aids users in constructing an appropriate mental model of the system’s capabilities and limitations. We employ a visual question answer and explanation task where we control the AI system’s limitations by manipulating the visual inputs: during inference, the system either processes full-color or grayscale images. Our goal is to determine whether participants can perceive the limitations of the system. We hypothesize that explanations will make limited AI capabilities more transparent to users. However, our results show that explanations do not have this effect. Instead of allowing users to more accurately assess the limitations of the AI system, explanations generally increase users’ perceptions of the system’s competence – regardless of its actual performance.</abstract>
       <url hash="127aac7f">2024.emnlp-main.1084</url>
@@ -14999,7 +14999,7 @@
       <author><first>Lintang</first><last>Sutawika</last><affiliation>EleutherAI</affiliation></author>
       <author><first>Alham Fikri</first><last>Aji</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and Amazon</affiliation></author>
       <author><first>Samuel</first><last>Cahyawijaya</last></author>
-      <author><first>Genta Indra</first><last>Winata</last><affiliation>Capital One</affiliation></author>
+      <author id="genta-indra-winata"><first>Genta Indra</first><last>Winata</last><affiliation>Capital One</affiliation></author>
       <author><first>Minghao</first><last>Wu</last></author>
       <author><first>Carsten</first><last>Eickhoff</last><affiliation>Eberhard-Karls-Universität Tübingen</affiliation></author>
       <author><first>Stella</first><last>Biderman</last><affiliation>EleutherAI and Booz Allen Hamilton</affiliation></author>
@@ -15037,10 +15037,10 @@
       <author><first>Yuxia</first><last>Wang</last></author>
       <author><first>Minghan</first><last>Wang</last><affiliation>Monash University</affiliation></author>
       <author><first>Muhammad Arslan</first><last>Manzoor</last></author>
-      <author id="fei-liu"><first>Fei</first><last>Liu</last></author>
+      <author><first>Fei</first><last>Liu</last></author>
       <author><first>Georgi Nenkov</first><last>Georgiev</last></author>
       <author><first>Rocktim Jyoti</first><last>Das</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <pages>19519-19529</pages>
       <abstract>Large language models (LLMs), especially when instruction-tuned for chat, have become part of our daily lives, freeing people from the process of searching, extracting, and integrating information from multiple sources by offering a straightforward answer to a variety of questions in a single place. Unfortunately, in many cases, LLM responses are factually incorrect, which limits their applicability in real-world scenarios. As a result, research on evaluating and improving the factuality of LLMs has attracted a lot of research attention recently. In this survey, we critically analyze existing work with the aim to identify the major challenges and their associated causes, pointing out to potential solutions for improving the factuality of LLMs, and analyzing the obstacles to automated factuality evaluation for open-ended text generation. We further offer an outlook on where future research should go.</abstract>
       <url hash="3ab8cfd2">2024.emnlp-main.1088</url>
@@ -15051,7 +15051,7 @@
       <title>Discovering Biases in Information Retrieval Models Using Relevance Thesaurus as Global Explanation</title>
       <author><first>Youngwoo</first><last>Kim</last><affiliation>Pohang University of Science and Technology</affiliation></author>
       <author><first>Razieh</first><last>Rahimi</last><affiliation>University of Massachusetts Amherst</affiliation></author>
-      <author><first>James</first><last>Allan</last><affiliation>University of Massachusetts, Amherst</affiliation></author>
+      <author id="james-allan"><first>James</first><last>Allan</last><affiliation>University of Massachusetts, Amherst</affiliation></author>
       <pages>19530-19547</pages>
       <abstract>Most of the efforts in interpreting neural relevance models have been on local explanations, which explain the relevance of a document to a query. However, local explanations are not effective in predicting the model’s behavior on unseen texts. We aim at explaining a neural relevance model by providing lexical explanations that can be globally generalized. Specifically, we construct a relevance thesaurus containing semantically relevant query term and document term pairs, which can augment BM25 scoring functions to better approximate the neural model’s predictions. We propose a novel method to build a relevance thesaurus construction. Our method involves training a neural relevance model which can score the relevance for partial segments of query and documents. The trained model is used to identify relevant terms over the vocabulary space. The resulting thesaurus explanation is evaluated based on ranking effectiveness and fidelity to the targeted neural ranking model. Finally, our thesaurus reveals the existence of brand name bias in ranking models, which further supports the utility of our explanation method.</abstract>
       <url hash="1615f618">2024.emnlp-main.1089</url>
@@ -15063,7 +15063,7 @@
       <author><first>Rongchen</first><last>Guo</last></author>
       <author><first>Isar</first><last>Nejadgholi</last><affiliation>National Research Council Canada and University of Ottawa</affiliation></author>
       <author><first>Hillary</first><last>Dawkins</last><affiliation>National Research Council Canada</affiliation></author>
-      <author><first>Kathleen C.</first><last>Fraser</last><affiliation>National Research Council Canada</affiliation></author>
+      <author id="kathleen-c-fraser"><first>Kathleen C.</first><last>Fraser</last><affiliation>National Research Council Canada</affiliation></author>
       <author><first>Svetlana</first><last>Kiritchenko</last><affiliation>National Research Council Canada</affiliation></author>
       <pages>19548-19564</pages>
       <abstract>This work provides an explanatory view of how LLMs can apply moral reasoning to both criticize and defend sexist language. We assessed eight large language models, all of which demonstrated the capability to provide explanations grounded in varying moral perspectives for both critiquing and endorsing views that reflect sexist assumptions. With both human and automatic evaluation, we show that all eight models produce comprehensible and contextually relevant text, which is helpful in understanding diverse views on how sexism is perceived. Also, through analysis of moral foundations cited by LLMs in their arguments, we uncover the diverse ideological perspectives in models’ outputs, with some models aligning more with progressive or conservative views on gender roles and sexism.Based on our observations, we caution against the potential misuse of LLMs to justify sexist language. We also highlight that LLMs can serve as tools for understanding the roots of sexist beliefs and designing well-informed interventions. Given this dual capacity, it is crucial to monitor LLMs and design safety mechanisms for their use in applications that involve sensitive societal topics, such as sexism.</abstract>
@@ -15112,7 +15112,7 @@
       <author><first>Trisha</first><last>Maturi</last></author>
       <author><first>Bowen</first><last>Yi</last></author>
       <author><first>Siqi</first><last>Shen</last><affiliation>University of Michigan - Ann Arbor</affiliation></author>
-      <author><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
       <pages>19617-19634</pages>
       <abstract>We explore the alignment of values in Large Language Models (LLMs) with specific age groups, leveraging data from the World Value Survey across thirteen categories. Through a diverse set of prompts tailored to ensure response robustness, we find a general inclination of LLM values towards younger demographics, especially when compared to the US population. Although a general inclination can be observed, we also found that this inclination toward younger groups can be different across different value categories. Additionally, we explore the impact of incorporating age identity information in prompts and observe challenges in mitigating value discrepancies with different age cohorts. Our findings highlight the age bias in LLMs and provide insights for future work. Materials for our analysis will be available via <url>https://github.com/anonymous</url></abstract>
       <url hash="af03bd08">2024.emnlp-main.1094</url>
@@ -15146,7 +15146,7 @@
       <title>Evaluating Diversity in Automatic Poetry Generation</title>
       <author><first>Yanran</first><last>Chen</last></author>
       <author><first>Hannes</first><last>Gröner</last></author>
-      <author><first>Sina</first><last>Zarrieß</last><affiliation>Bielefeld University</affiliation></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last><affiliation>Bielefeld University</affiliation></author>
       <author><first>Steffen</first><last>Eger</last><affiliation>University of Technology Nuremberg</affiliation></author>
       <pages>19671-19692</pages>
       <abstract>Natural Language Generation (NLG), and more generally generative AI, are among the currently most impactful research fields. Creative NLG, such as automatic poetry generation, is a fascinating niche in this area. While most previous research has focused on forms of the Turing test when evaluating automatic poetry generation — can humans distinguish between automatic and human generated poetry — we evaluate the diversity of automatically generated poetry (with a focus on quatrains), by comparing distributions of generated poetry to distributions of human poetry along structural, lexical, semantic and stylistic dimensions, assessing different model types (word vs. character-level, general purpose LLMs vs. poetry-specific models), including the very recent LLaMA3-8B, and types of fine-tuning (conditioned vs. unconditioned). We find that current automatic poetry systems are considerably underdiverse along multiple dimensions — they often do not rhyme sufficiently, are semantically too uniform and even do not match the length distribution of human poetry. Our experiments reveal, however, that style-conditioning and character-level modeling clearly increases diversity across virtually all dimensions we explore. Our identified limitations may serve as the basis for more genuinely diverse future poetry generation models.</abstract>
@@ -15158,7 +15158,7 @@
       <title>Evaluating Short-Term Temporal Fluctuations of Social Biases in Social Media Data and Masked Language Models</title>
       <author><first>Yi</first><last>Zhou</last><affiliation>Cardiff University</affiliation></author>
       <author><first>Danushka</first><last>Bollegala</last><affiliation>Amazon and University of Liverpool</affiliation></author>
-      <author><first>Jose</first><last>Camacho-Collados</last><affiliation>Cardiff University</affiliation></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last><affiliation>Cardiff University</affiliation></author>
       <pages>19693-19708</pages>
       <abstract>Social biases such as gender or racial biases have been reported in language models (LMs), including Masked Language Models (MLMs). Given that MLMs are continuously trained with increasing amounts of additional data collected over time, an important yet unanswered question is how the social biases encoded with MLMs vary over time. In particular, the number of social media users continues to grow at an exponential rate, and it is a valid concern for the MLMs trained specifically on social media data whether their social biases (if any) would also amplify over time. To empirically analyse this problem, we use a series of MLMs pretrained on chronologically ordered temporal snapshots of corpora. Our analysis reveals that, although social biases are present in all MLMs, most types of social bias remain relatively stable over time (with a few exceptions). To further understand the mechanisms that influence social biases in MLMs, we analyse the temporal corpora used to train the MLMs. Our findings show that some demographic groups, such as male, obtain higher preference over the other, such as female on the training corpora constantly.</abstract>
       <url hash="a6e47b88">2024.emnlp-main.1098</url>
@@ -15233,7 +15233,7 @@
     <paper id="1104">
       <title>Unsupervised Discrete Representations of <fixed-case>A</fixed-case>merican <fixed-case>S</fixed-case>ign <fixed-case>L</fixed-case>anguage</title>
       <author><first>Artem</first><last>Abzaliev</last></author>
-      <author><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
       <pages>19786-19793</pages>
       <abstract>Many modalities are naturally represented as continuous signals, making it difficult to use them with models that expect discrete units, such as LLMs. In this paper, we explore the use of audio compression techniques for the discrete representation of the gestures used in sign language. We train a tokenizer for American Sign Language (ASL) fingerspelling, which discretizes sequences of fingerspelling signs into tokens. We also propose a loss function to improve the interpretability of these tokens such that they preserve both the semantic and the visual information of the signal. We show that the proposed method improves the performance of the discretized sequence on downstream tasks.</abstract>
       <url hash="255cfbbb">2024.emnlp-main.1104</url>
@@ -15248,7 +15248,7 @@
       <author><first>Jiseon</first><last>Kim</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
       <author><first>Yeon</first><last>Seonwoo</last><affiliation>Amazon</affiliation></author>
       <author><first>Yejin</first><last>Choi</last><affiliation>Department of Computer Science, University of Washington</affiliation></author>
-      <author><first>Alice</first><last>Oh</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
       <author><first>Hyunwoo</first><last>Kim</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <pages>19794-19809</pages>
       <abstract>While humans naturally develop theory of mind (ToM), the capability to understand other people’s mental states and beliefs, state-of-the-art large language models (LLMs) underperform on simple ToM benchmarks. We posit that we can extend our understanding of LLMs’ ToM abilities by evaluating key human ToM precursors<tex-math>-</tex-math>perception inference and perception-to-belief inference<tex-math>-</tex-math>in LLMs. We introduce two datasets, Percept-ToMi and Percept-FANToM, to evaluate these precursory inferences for ToM in LLMs by annotating characters’ perceptions on ToMi and FANToM, respectively.Our evaluation of eight state-of-the-art LLMs reveals that the models generally perform well in perception inference while exhibiting limited capability in perception-to-belief inference (e.g., lack of inhibitory control).Based on these results, we present PercepToM, a novel ToM method leveraging LLMs’ strong perception inference capability while supplementing their limited perception-to-belief inference. Experimental results demonstrate that PercepToM significantly enhances LLM’s performance, especially in false belief scenarios.</abstract>
@@ -15263,7 +15263,7 @@
       <author><first>Franck</first><last>Dernoncourt</last><affiliation>Adobe Systems</affiliation></author>
       <author><first>Seunghyun</first><last>Yoon</last><affiliation>Adobe Research</affiliation></author>
       <author><first>Ryan A.</first><last>Rossi</last><affiliation>Adobe Research</affiliation></author>
-      <author><first>Trung</first><last>Bui</last><affiliation>Adobe Research</affiliation></author>
+      <author id="trung-bui"><first>Trung</first><last>Bui</last><affiliation>Adobe Research</affiliation></author>
       <pages>19810-19820</pages>
       <abstract>Extractive summarization plays a pivotal role in natural language processing due to its wide-range applications in summarizing diverse content efficiently, while also being faithful to the original content. Despite significant advancement achieved in extractive summarization by Large Language Models (LLMs), these summaries frequently exhibit incoherence. An important aspect of the coherent summary is its readability for intended users. Although there have been many datasets and benchmarks proposed for creating coherent extractive summaries, none of them currently incorporate user intent to improve coherence in extractive summarization. Motivated by this, we propose a systematically created human-annotated dataset consisting of coherent summaries for five publicly available datasets and natural language user feedback, offering valuable insights into how to improve coherence in extractive summaries. We utilize this dataset for aligning LLMs through supervised fine-tuning with natural language human feedback to enhance the coherence of their generated summaries. Preliminary experiments with Falcon-40B and Llama-2-13B show significant performance improvements (~10% Rouge-L) in terms of producing coherent summaries. We further utilize human feedback to benchmark results over instruction-tuned models such as FLAN-T5 which resulted in several interesting findings.</abstract>
       <url hash="5d5a2c9a">2024.emnlp-main.1106</url>
@@ -15492,7 +15492,7 @@
       <author><first>Dimosthenis</first><last>Antypas</last></author>
       <author><first>Asahi</first><last>Ushio</last></author>
       <author><first>Francesco</first><last>Barbieri</last><affiliation>Snap Inc.</affiliation></author>
-      <author><first>Jose</first><last>Camacho-Collados</last><affiliation>Cardiff University</affiliation></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last><affiliation>Cardiff University</affiliation></author>
       <pages>20136-20152</pages>
       <abstract>In the dynamic realm of social media, diverse topics are discussed daily, transcending linguistic boundaries. However, the complexities of understanding and categorising this content across various languages remain an important challenge with traditional techniques like topic modelling often struggling to accommodate this multilingual diversity. In this paper, we introduce X-Topic, a multilingual dataset featuring content in four distinct languages (English, Spanish, Japanese, and Greek), crafted for the purpose of tweet topic classification. Our dataset includes a wide range of topics, tailored for social media content, making it a valuable resource for scientists and professionals working on cross-linguistic analysis, the development of robust multilingual models, and computational scientists studying online dialogue. Finally, we leverage X-Topic to perform a comprehensive cross-linguistic and multilingual analysis, and compare the capabilities of current general- and domain-specific language models.</abstract>
       <url hash="ced2ce1a">2024.emnlp-main.1123</url>
@@ -15509,7 +15509,7 @@
       <author><first>Lifeng</first><last>Shang</last><affiliation>Huawei Technologies Ltd.</affiliation></author>
       <author><first>Xin</first><last>Jiang</last></author>
       <author><first>Qun</first><last>Liu</last><affiliation>Huawei Noah’s Ark Lab</affiliation></author>
-      <author><first>Kam-Fai</first><last>Wong</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <pages>20153-20177</pages>
       <abstract>Large language models (LLMs) are increasingly used for complex multi-turn conversations across diverse real-world applications. However, existing benchmarks mainly focus on single-turn evaluations, overlooking the models’ capabilities in multi-turn interactions. To address this gap, we introduce , a comprehensive benchmark to evaluate the multi-turn conversational abilities of LLMs. By analyzing human-LLM conversations, we categorize interaction patterns into four types: recollection, expansion, refinement, and follow-up. We construct multi-turn queries for each category either by augmenting existing datasets or creating new examples using GPT-4 with a human-in-the-loop process to avoid data leakage. To study the factors impacting multi-turn abilities, we create single-turn versions of the 1170 multi-turn queries and compare performance. Our evaluation of 10 well-known LLMs shows that while closed-source models generally surpass open-source ones, certain open-source models exceed GPT-3.5-Turbo in specific tasks. We observe significant performance degradation in multi-turn settings compared to single-turn settings in most models, which is not correlated with the models’ fundamental capabilities. Moreover, we identify the distance to relevant content and susceptibility to error propagation as the key factors influencing multi-turn performance.</abstract>
       <url hash="f5b9b04e">2024.emnlp-main.1124</url>
@@ -15566,7 +15566,7 @@
       <author><first>Xuande</first><last>Feng</last></author>
       <author><first>Kevin</first><last>Zhang</last></author>
       <author><first>Jialu</first><last>Liu</last><affiliation>Google Research</affiliation></author>
-      <author><first>Shih-Fu</first><last>Chang</last><affiliation>Columbia, Columbia University, Columbia University, Columbia University, Columbia University, Columbia University and Columbia University</affiliation></author>
+      <author id="shih-fu-chang"><first>Shih-Fu</first><last>Chang</last><affiliation>Columbia, Columbia University, Columbia University, Columbia University, Columbia University, Columbia University and Columbia University</affiliation></author>
       <pages>20220-20239</pages>
       <abstract>Existing popular video captioning benchmarks and models often produce generic captions for videos that lack specific identification of individuals, locations, or organizations (named entities). However, in the case of news videos, the setting is more demanding, requiring the inclusion of such named entities for meaningful summarization. Therefore, we introduce the task of directly summarizing news videos into captions that are entity-aware. To facilitate research in this area, we have collected a large-scale dataset named VIEWS (VIdeo NEWS). Within this task, we face challenges inherent to recognizing named entities and navigating diverse, dynamic contexts, all while relying solely on visual cues. To address these challenges, we propose a model-agnostic approach that enriches visual information extracted from videos with context sourced from external knowledge, enabling the generation of entity-aware captions. We validate the effectiveness of our approach across three video captioning models. Additionally, we conduct a critical analysis of our methodology to gain insights into the complexity of the task, the challenges it presents, and potential avenues for future research.</abstract>
       <url hash="8e27dc97">2024.emnlp-main.1128</url>
@@ -15609,7 +15609,7 @@
     <paper id="1131">
       <title><fixed-case>D</fixed-case>e<fixed-case>MPT</fixed-case>: Decoding-enhanced Multi-phase Prompt Tuning for Making <fixed-case>LLM</fixed-case>s Be Better Context-aware Translators</title>
       <author><first>Xinglin</first><last>Lyu</last><affiliation>Soochow University</affiliation></author>
-      <author><first>Junhui</first><last>Li</last><affiliation>Soochow University, China</affiliation></author>
+      <author id="junhui-li"><first>Junhui</first><last>Li</last><affiliation>Soochow University, China</affiliation></author>
       <author><first>Yanqing</first><last>Zhao</last><affiliation>Huawei Technologies Ltd.</affiliation></author>
       <author><first>Min</first><last>Zhang</last><affiliation>Huawei Technologies Ltd.</affiliation></author>
       <author><first>Daimeng</first><last>Wei</last></author>
@@ -15661,12 +15661,12 @@
     </paper>
     <paper id="1135">
       <title>Multi-expert Prompting Improves Reliability, Safety and Usefulness of Large Language Models</title>
-      <author><first>Do Xuan</first><last>Long</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="xuan-long-do"><first>Do Xuan</first><last>Long</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Duong Ngoc</first><last>Yen</last></author>
-      <author><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Kenji</first><last>Kawaguchi</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Min-Yen</first><last>Kan</last><affiliation>National University of Singapore</affiliation></author>
-      <author><first>Nancy F.</first><last>Chen</last></author>
+      <author id="nancy-chen"><first>Nancy F.</first><last>Chen</last></author>
       <pages>20370-20401</pages>
       <abstract>We present Multi-expert Prompting, a novel enhancement of ExpertPrompting (Xu et al., 2023), designed to improve the large language model (LLM) generation. Specifically, it guides an LLM to fulfill an input instruction by simulating multiple experts, aggregating their responses, and selecting the best among individual and aggregated responses. This process is performed in a single chain of thoughts through our seven carefully designed subtasks derived from the Nominal Group Technique (Ven and Delbecq, 1974), a well-established decision-making framework. Our evaluations demonstrate that Multi-expert Prompting significantly outperforms ExpertPrompting and comparable baselines in enhancing the truthfulness, factuality, informativeness, and usefulness of responses while reducing toxicity and hurtfulness. It further achieves state-of-the-art truthfulness by outperforming the best baseline by 8.69% with ChatGPT. Multi-expert Prompting is efficient, explainable, and highly adaptable to diverse scenarios, eliminating the need for manual prompt construction.</abstract>
       <url hash="ec108175">2024.emnlp-main.1135</url>
@@ -15690,7 +15690,7 @@
       <author><first>Zheyuan</first><last>Zhang</last></author>
       <author><first>Fengyuan</first><last>Hu</last></author>
       <author><first>Shane</first><last>Storks</last><affiliation>University of Michigan</affiliation></author>
-      <author><first>Joyce</first><last>Chai</last><affiliation>University of Michigan</affiliation></author>
+      <author id="joyce-chai"><first>Joyce</first><last>Chai</last><affiliation>University of Michigan</affiliation></author>
       <pages>20416-20431</pages>
       <url hash="9e2bab48">2024.emnlp-main.1137</url>
       <bibkey>yu-etal-2024-eliciting</bibkey>
@@ -15716,7 +15716,7 @@
       <author><first>Nicholas</first><last>Deas</last><affiliation>Columbia University</affiliation></author>
       <author><first>Elsbeth</first><last>Turcan</last></author>
       <author><first>Ivan Ernesto Perez</first><last>Mejia</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>20467-20485</pages>
       <abstract>In the field of emotion analysis, much NLP research focuses on identifying a limited number of discrete emotion categories, often applied across languages. These basic sets, however, are rarely designed with textual data in mind, and culture, language, and dialect can influence how particular emotions are interpreted. In this work, we broaden our scope to a practically unbounded set of affective states, which includes any terms that humans use to describe their experiences of feeling. We collect and publish MASIVE, a dataset of Reddit posts in English and Spanish containing over 1,000 unique affective states each. We then define the new problem of affective state identification for language generation models framed as a masked span prediction task. On this task, we find that smaller finetuned multilingual models outperform much larger LLMs, even on region-specific Spanish affective states. Additionally, we show that pretraining on MASIVE improves model performance on existing emotion benchmarks. Finally, through machine translation experiments, we find that native speaker-written data is vital to good performance on this task.</abstract>
       <url hash="4901c89a">2024.emnlp-main.1139</url>
@@ -15797,7 +15797,7 @@
       <author><first>Rifki Afina</first><last>Putri</last><affiliation>Korea Advanced Institute of Science &amp; Technology</affiliation></author>
       <author><first>Faiz Ghifari</first><last>Haznitrama</last><affiliation>Korea Advanced Institute of Science &amp; Technology</affiliation></author>
       <author><first>Dea</first><last>Adhista</last></author>
-      <author><first>Alice</first><last>Oh</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
       <pages>20571-20590</pages>
       <abstract>Large Language Models (LLMs) are increasingly being used to generate synthetic data for training and evaluating models. However, it is unclear whether they can generate a good quality of question answering (QA) dataset that incorporates knowledge and cultural nuance embedded in a language, especially for low-resource languages. In this study, we investigate the effectiveness of using LLMs in generating culturally relevant commonsense QA datasets for Indonesian and Sundanese languages. To do so, we create datasets for these languages using various methods involving both LLMs and human annotators, resulting in 4.5K questions per language (9K in total), making our dataset the largest of its kind. Our experiments show that automatic data adaptation from an existing English dataset is less effective for Sundanese. Interestingly, using the direct generation method on the target language, GPT-4 Turbo can generate questions with adequate general knowledge in both languages, albeit not as culturally ‘deep’ as humans. We also observe a higher occurrence of fluency errors in the Sundanese dataset, highlighting the discrepancy between medium- and lower-resource languages.</abstract>
       <url hash="e7ceac05">2024.emnlp-main.1145</url>
@@ -15942,7 +15942,7 @@
     <paper id="1156">
       <title>Automatic sentence segmentation of clinical record narratives in real-world data</title>
       <author><first>Dongfang</first><last>Xu</last><affiliation>Cedars Sinai Medical Center</affiliation></author>
-      <author><first>Davy</first><last>Weissenbacher</last></author>
+      <author id="davy-weissenbacher"><first>Davy</first><last>Weissenbacher</last></author>
       <author><first>Karen</first><last>O’Connor</last><affiliation>University of Pennsylvania, University of Pennsylvania</affiliation></author>
       <author><first>Siddharth</first><last>Rawal</last><affiliation>Amazon</affiliation></author>
       <author><first>Graciela Gonzalez</first><last>Hernandez</last><affiliation>Cedars Sinai Medical Center</affiliation></author>
@@ -15965,9 +15965,9 @@
     <paper id="1158">
       <title><fixed-case>B</fixed-case>ayesian Example Selection Improves In-Context Learning for Speech, Text and Visual Modalities</title>
       <author><first>Siyin</first><last>Wang</last><affiliation>Tsinghua University, Tsinghua University</affiliation></author>
-      <author><first>Chao-Han Huck</first><last>Yang</last><affiliation>NVIDIA Research</affiliation></author>
+      <author id="chao-han-huck-yang"><first>Chao-Han Huck</first><last>Yang</last><affiliation>NVIDIA Research</affiliation></author>
       <author><first>Ji</first><last>Wu</last></author>
-      <author><first>Chao</first><last>Zhang</last><affiliation>Tsinghua University and University College London</affiliation></author>
+      <author id="chao-zhang-tu"><first>Chao</first><last>Zhang</last><affiliation>Tsinghua University and University College London</affiliation></author>
       <pages>20812-20828</pages>
       <abstract>Large language models (LLMs) can adapt to new tasks through in-context learning (ICL) based on a few examples presented in dialogue history without any model parameter update. Despite such convenience, the performance of ICL heavily depends on the quality of the in-context examples presented, which makes the in-context example selection approach a critical choice. This paper proposes a novel eBayesian in-Context example Selection method (ByCS) for ICL. Extending the inference probability conditioned on in-context examples based on Bayes’ theorem, ByCS focuses on the inverse inference conditioned on test input. Following the assumption that accurate inverse inference probability (likelihood) will result in accurate inference probability (posterior), in-context examples are selected based on their inverse inference results. Diverse and extensive cross-tasking and cross-modality experiments are performed with speech, text, and image examples. Experimental results show the efficacy and robustness of our ByCS method on various models, tasks and modalities.</abstract>
       <url hash="33dde5b4">2024.emnlp-main.1158</url>
@@ -16069,7 +16069,7 @@
       <author><first>Ibrahim Said</first><last>Ahmad</last><affiliation>Northeastern University</affiliation></author>
       <author><first>Kilichbek</first><last>Haydarov</last><affiliation>King Abdullah University of Science and Technology</affiliation></author>
       <author><first>Philip</first><last>Torr</last><affiliation>University of Oxford</affiliation></author>
-      <author><first>Kenneth</first><last>Church</last><affiliation>Northeastern University</affiliation></author>
+      <author id="kenneth-church"><first>Kenneth</first><last>Church</last><affiliation>Northeastern University</affiliation></author>
       <author><first>Mohamed</first><last>Elhoseiny</last><affiliation>KAUST</affiliation></author>
       <pages>20939-20962</pages>
       <abstract>Research in vision and language has made considerable progress thanks to benchmarks such as COCO. COCO captions focused on unambiguous facts in English; ArtEmis introduced subjective emotions and ArtELingo introduced some multilinguality (Chinese and Arabic). However we believe there should be more multilinguality. Hence, we present ArtELingo-28, a vision-language benchmark that spans 28 languages and encompasses approximately 200,000 annotations (140 annotations per image). Traditionally, vision research focused on unambiguous class labels, whereas ArtELingo-28 emphasizes diversity of opinions over languages and cultures. The challenge is to build machine learning systems that assign emotional captions to images. Baseline results will be presented for three novel conditions: Zero-Shot, Few-Shot and One-vs-All Zero-Shot. We find that cross-lingual transfer is more successful for culturally-related languages. Data and code will be made publicly available.</abstract>
@@ -16143,7 +16143,7 @@
       <author><first>Poulami</first><last>Ghosh</last><affiliation>Indian Institute of Technology, Bombay</affiliation></author>
       <author><first>Shikhar</first><last>Vashishth</last><affiliation>Google</affiliation></author>
       <author><first>Raj</first><last>Dabre</last><affiliation>National Institute of Information and Communications Technology (NICT), National Institute of Advanced Industrial Science and Technology</affiliation></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
       <pages>21035-21045</pages>
       <abstract>Contemporary deep learning models effectively handle languages with diverse morphology despite not being directly integrated into them. Morphology and word order are closely linked, with the latter incorporated into transformer-based models through positional encodings. This prompts a fundamental inquiry: Is there a correlation between the morphological complexity of a language and the utilization of positional encoding in pre-trained language models? In pursuit of an answer, we present the first study addressing this question, encompassing 22 languages and 5 downstream tasks. Our findings reveal that the importance of positional encoding diminishes with increasing morphological complexity in languages. Our study motivates the need for a deeper understanding of positional encoding, augmenting them to better reflect the different languages under consideration.</abstract>
       <url hash="54f34c8f">2024.emnlp-main.1170</url>
@@ -16417,7 +16417,7 @@
     </paper>
     <paper id="1191">
       <title>Show and Guide: Instructional-Plan Grounded Vision and Language Model</title>
-      <author><first>Diogo</first><last>Glória-Silva</last><affiliation>Universidade NOVA de Lisboa</affiliation></author>
+      <author id="diogo-gloria-silva"><first>Diogo</first><last>Glória-Silva</last><affiliation>Universidade NOVA de Lisboa</affiliation></author>
       <author><first>David</first><last>Semedo</last><affiliation>Universidade NOVA de Lisboa and Universidade NOVA de Lisboa</affiliation></author>
       <author><first>Joao</first><last>Magalhaes</last><affiliation>Universidade Nova de Lisboa</affiliation></author>
       <pages>21371-21389</pages>
@@ -16587,7 +16587,7 @@
     <paper id="1203">
       <title>Unveiling the mystery of visual attributes of concrete and abstract concepts: Variability, nearest neighbors, and challenging categories</title>
       <author><first>Tarun</first><last>Tater</last><affiliation>Universität Stuttgart</affiliation></author>
-      <author><first>Sabine</first><last>Schulte Im Walde</last><affiliation>University of Stuttgart</affiliation></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte Im Walde</last><affiliation>University of Stuttgart</affiliation></author>
       <author><first>Diego</first><last>Frassinelli</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <pages>21581-21597</pages>
       <abstract>The visual representation of a concept varies significantly depending on its meaning and the context where it occurs; this poses multiple challenges both for vision and multimodal models. Our study focuses on concreteness, a well-researched lexical-semantic variable, using it as a case study to examine the variability in visual representations. We rely on images associated with approximately 1,000 abstract and concrete concepts extracted from two different datasets: Bing and YFCC. Our goals are: (i) evaluate whether visual diversity in the depiction of concepts can reliably distinguish between concrete and abstract concepts; (ii) analyze the variability of visual features across multiple images of the same concept through a nearest neighbor analysis; and (iii) identify challenging factors contributing to this variability by categorizing and annotating images. Our findings indicate that for classifying images of abstract versus concrete concepts, a combination of basic visual features such as color and texture is more effective than features extracted by more complex models like Vision Transformer (ViT). However, ViTs show better performances in the nearest neighbor analysis, emphasizing the need for a careful selection of visual features when analyzing conceptual variables through modalities other than text.</abstract>
@@ -16643,7 +16643,7 @@
       <author><first>Giuseppe</first><last>Attanasio</last><affiliation>Instituto de Telecomunicações</affiliation></author>
       <author><first>Debora</first><last>Nozza</last><affiliation>Bocconi University</affiliation></author>
       <author><first>Su Lin</first><last>Blodgett</last><affiliation>Microsoft</affiliation></author>
-      <author><first>Zeerak</first><last>Talat</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="zeerak-talat"><first>Zeerak</first><last>Talat</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <pages>21669-21691</pages>
       <abstract>This paper introduces the concept of actionability in the context of bias measures in natural language processing (NLP). We define actionability as the degree to which a measure’s results enable informed action and propose a set of desiderata for assessing it. Building on existing frameworks such as measurement modeling, we argue that actionability is a crucial aspect of bias measures that has been largely overlooked in the literature.We conduct a comprehensive review of 146 papers proposing bias measures in NLP, examining whether and how they provide the information required for actionable results. Our findings reveal that many key elements of actionability, including a measure’s intended use and reliability assessment, are often unclear or entirely absent.This study highlights a significant gap in the current approach to developing and reporting bias measures in NLP. We argue that this lack of clarity may impede the effective implementation and utilization of these measures. To address this issue, we offer recommendations for more comprehensive and actionable metric development and reporting practices in NLP bias research.</abstract>
       <url hash="4a2fe84f">2024.emnlp-main.1207</url>
@@ -16802,7 +16802,7 @@
     </paper>
     <paper id="1218">
       <title><fixed-case>S</fixed-case>peech<fixed-case>QE</fixed-case>: Estimating the Quality of Direct Speech Translation</title>
-      <author><first>HyoJung</first><last>Han</last><affiliation>Department of Computer Science, University of Maryland, College Park</affiliation></author>
+      <author id="hyojung-han"><first>HyoJung</first><last>Han</last><affiliation>Department of Computer Science, University of Maryland, College Park</affiliation></author>
       <author><first>Kevin</first><last>Duh</last><affiliation>Johns Hopkins University</affiliation></author>
       <author><first>Marine</first><last>Carpuat</last><affiliation>University of Maryland, College Park</affiliation></author>
       <pages>21852-21867</pages>
@@ -16818,8 +16818,8 @@
       <author><first>Nikhil</first><last>Mehta</last></author>
       <author><first>Qingyun</first><last>Wu</last><affiliation>Pennsylvania State University</affiliation></author>
       <author><first>Chi</first><last>Wang</last><affiliation>Google DeepMind</affiliation></author>
-      <author><first>Ahmed Hassan</first><last>Awadallah</last><affiliation>Microsoft Research</affiliation></author>
-      <author><first>Charles L. A.</first><last>Clarke</last><affiliation>University of Waterloo</affiliation></author>
+      <author id="ahmed-hassan"><first>Ahmed Hassan</first><last>Awadallah</last><affiliation>Microsoft Research</affiliation></author>
+      <author id="charles-l-a-clarke"><first>Charles L. A.</first><last>Clarke</last><affiliation>University of Waterloo</affiliation></author>
       <author><first>Julia</first><last>Kiseleva</last><affiliation>Research, Microsoft</affiliation></author>
       <pages>21868-21888</pages>
       <abstract>The rapid development of Large Language Models (LLMs) has led to a surge in applications that facilitate collaboration among multiple agents, assisting humans in their daily tasks. However, a significant gap remains in assessing to what extent LLM-powered applications genuinely enhance user experience and task execution efficiency. This highlights the need to verify utility of LLM-powered applications, particularly by ensuring alignment between the application’s functionality and end-user needs. We introduce AgentEval, a novel framework designed to simplify the utility verification process by automatically proposing a set of criteria tailored to the unique purpose of any given application. This allows for a comprehensive assessment, quantifying the utility of an application against the suggested criteria. We present a comprehensive analysis of the effectiveness and robustness of AgentEval for two open source datasets including Math Problem solving and ALFWorld House-hold related tasks. For reproducibility purposes, we make the data, code and all the logs publicly available at https://github.com/Narabzad/AgentEval</abstract>
@@ -16834,7 +16834,7 @@
       <author><first>Tianyang</first><last>Liu</last><affiliation>University of California, San Diego</affiliation></author>
       <author><first>Abdullah</first><last>Ashfaq</last></author>
       <author><first>Zhiting</first><last>Hu</last><affiliation>University of California, San Diego and Amazon</affiliation></author>
-      <author><first>Eric P.</first><last>Xing</last><affiliation>Mohamed bin Zayed Univeristy of AI and School of Computer Science, Carnegie Mellon University</affiliation></author>
+      <author id="eric-xing"><first>Eric P.</first><last>Xing</last><affiliation>Mohamed bin Zayed Univeristy of AI and School of Computer Science, Carnegie Mellon University</affiliation></author>
       <pages>21889-21909</pages>
       <abstract>Aligning Large Language Models (LLMs) traditionally relies on complex and costly training processes like supervised fine-tuning (SFT) and reinforcement learning from human feedback (RLHF). To address the challenge of achieving alignment without these extensive tuning costs and expensive annotations, we present a novel, tuning-free approach for self-alignment called Dynamic Rewarding with Prompt Optimization (DRPO). Our approach enables self-alignment through a search-based prompt optimization framework, allowing the model to self-improve and generate optimized prompts without additional training or human supervision. The core of DRPO leverages a dynamic rewarding mechanism to identify and rectify model-specific alignment weaknesses, enabling LLMs to adapt quickly to various alignment challenges. Empirical evaluations on eight recent LLMs, including both open- and closed-source, reveal that DRPO significantly enhances alignment performance, enabling base models to outperform their SFT/RLHF-tuned counterparts. Moreover, DRPO’s automatically optimized prompts surpass those curated by human experts, demonstrating its superior alignment capabilities. Our findings envision a highly cost-effective and adaptable solution for future alignment research to be further explored.</abstract>
       <url hash="8300a839">2024.emnlp-main.1220</url>
@@ -16925,7 +16925,7 @@
       <author><first>Yixin</first><last>Wu</last></author>
       <author><first>Boyang</first><last>Zhang</last><affiliation>CISPA Helmholtz Center for Information Security</affiliation></author>
       <author><first>Michael</first><last>Backes</last><affiliation>CISPA Helmholtz Center for Information Security</affiliation></author>
-      <author id="yang-zhang"><first>Yang</first><last>Zhang</last><affiliation>CISPA Helmholtz Center for Information Security</affiliation></author>
+      <author><first>Yang</first><last>Zhang</last><affiliation>CISPA Helmholtz Center for Information Security</affiliation></author>
       <pages>21990-22001</pages>
       <abstract>Effective utilization of large language models (LLMs), such as ChatGPT, relies on the quality of input prompts. This paper explores prompt engineering, specifically focusing on the disparity between experimentally designed prompts and real-world “in-the-wild” prompts. We analyze 10,538 in-the-wild prompts collected from various platforms and develop a framework that decomposes the prompts into eight key components. Our analysis shows that and Requirement are the most prevalent two components. Roles specified in the prompts, along with their capabilities, have become increasingly varied over time, signifying a broader range of application scenarios for LLMs. However, from the response of GPT-4, there is a marginal improvement with a specified role, whereas leveraging less prevalent components such as Capability and Demonstration can result in a more satisfying response. Overall, our work sheds light on the essential components of in-the-wild prompts and the effectiveness of these components on the broader landscape of LLM prompt engineering, providing valuable guidelines for the LLM community to optimize high-quality prompts.</abstract>
       <url hash="33f8c0f4">2024.emnlp-main.1227</url>
@@ -16973,17 +16973,17 @@
       <author><first>Jungo</first><last>Kasai</last><affiliation>Toyota Technological Institute at Chicago</affiliation></author>
       <author><first>Tao</first><last>Yu</last><affiliation>The University of Hong Kong</affiliation></author>
       <author><first>Rui</first><last>Zhang</last><affiliation>Pennsylvania State University</affiliation></author>
-      <author><first>Alexander</first><last>Fabbri</last><affiliation>SalesForce.com</affiliation></author>
+      <author id="alexander-richard-fabbri"><first>Alexander</first><last>Fabbri</last><affiliation>SalesForce.com</affiliation></author>
       <author><first>Wojciech Maciej</first><last>Kryscinski</last></author>
       <author><first>Semih</first><last>Yavuz</last><affiliation>SalesForce.com</affiliation></author>
       <author><first>Ye</first><last>Liu</last><affiliation>SalesForce.com</affiliation></author>
-      <author><first>Xi Victoria</first><last>Lin</last><affiliation>Meta and Department of Computer Science, University of Washington</affiliation></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>SalesForce.com and Nanyang Technological University</affiliation></author>
+      <author id="xi-victoria-lin"><first>Xi Victoria</first><last>Lin</last><affiliation>Meta and Department of Computer Science, University of Washington</affiliation></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>SalesForce.com and Nanyang Technological University</affiliation></author>
       <author><first>Yingbo</first><last>Zhou</last><affiliation>Salesforce Research</affiliation></author>
       <author><first>Caiming</first><last>Xiong</last><affiliation>Salesforce Research</affiliation></author>
       <author><first>Rex</first><last>Ying</last><affiliation>Yale University</affiliation></author>
       <author><first>Arman</first><last>Cohan</last><affiliation>Yale University and Allen Institute for Artificial Intelligence</affiliation></author>
-      <author><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
       <pages>22017-22031</pages>
       <abstract>Large language models (LLMs) have achieved remarkable performance on a variety of natural language understanding tasks. However, existing benchmarks are inadequate in measuring the complex logical reasoning capabilities of a model. We present FOLIO, a human-annotated, logically complex and diverse dataset for reasoning in natural language (NL), equipped with first-order logic (FOL) annotations. FOLIO consists of 1,430 examples (unique conclusions), each paired with one of 487 sets of premises used to deductively reason for the validity of each conclusion. The logical correctness of the premises and conclusions is ensured by their FOL annotations, which are automatically verified by an FOL inference engine. In addition to the main NL reasoning task, NL-FOL pairs in FOLIO constitute a new NL-FOL translation dataset. Our experiments on FOLIO systematically evaluate the FOL reasoning ability of supervised fine-tuning on medium-sized language models. For both NL reasoning and NL-FOL translation, we benchmark multiple state-of-the-art language models. Our results show that a subset of FOLIO remains a challenge for one of the most capable Large Language Model (LLM) publicly available, GPT-4.</abstract>
       <url hash="0b5a010f">2024.emnlp-main.1229</url>
@@ -17006,8 +17006,8 @@
     <paper id="1231">
       <title>Is Child-Directed Speech Effective Training Data for Language Models?</title>
       <author><first>Steven Y.</first><last>Feng</last></author>
-      <author><first>Noah D.</first><last>Goodman</last></author>
-      <author><first>Michael C.</first><last>Frank</last></author>
+      <author id="noah-goodman"><first>Noah D.</first><last>Goodman</last></author>
+      <author id="michael-c-frank"><first>Michael C.</first><last>Frank</last></author>
       <pages>22055-22071</pages>
       <abstract>While high-performing language models are typically trained on hundreds of billions of words, human children become fluent language users with a much smaller amount of data. What are the features of the data they receive, and how do these features support language modeling objectives? To investigate this question, we train GPT-2 and RoBERTa models on 29M words of English child-directed speech and a new matched, synthetic dataset (TinyDialogues), comparing to OpenSubtitles, Wikipedia, and a heterogeneous blend of datasets from the BabyLM challenge. We evaluate the syntactic and semantic knowledge of these models using developmentally-inspired evaluations. Through pretraining experiments, we test whether the global developmental ordering or the local discourse ordering of children’s training data supports high performance relative to other datasets. The local properties of the data affect model results, but surprisingly, global properties do not. Further, child language input is not uniquely valuable for training language models. These findings support the hypothesis that, rather than proceeding from better data, the child’s learning algorithm is substantially more data-efficient than current language modeling techniques.</abstract>
       <url hash="e40a74fc">2024.emnlp-main.1231</url>
@@ -17129,7 +17129,7 @@
       <author><first>Yanqiao</first><last>Zhu</last><affiliation>University of California, Los Angeles</affiliation></author>
       <author><first>May Dongmei</first><last>Wang</last></author>
       <author><first>Joyce C.</first><last>Ho</last><affiliation>Emory University</affiliation></author>
-      <author><first>Chao</first><last>Zhang</last><affiliation>Georgia Institute of Technology</affiliation></author>
+      <author id="chao-zhang-tu"><first>Chao</first><last>Zhang</last><affiliation>Georgia Institute of Technology</affiliation></author>
       <author><first>Carl</first><last>Yang</last><affiliation>Emory University</affiliation></author>
       <pages>22234-22254</pages>
       <abstract>Developing effective biomedical retrieval models is important for excelling at knowledge-intensive biomedical tasks but still challenging due to the lack of sufficient publicly annotated biomedical data and computational resources. We present BMRetriever, a series of dense retrievers for enhancing biomedical retrieval via unsupervised pre-training on large biomedical corpora, followed by instruction fine-tuning on a combination of labeled datasets and synthetic pairs. Experiments on 5 biomedical tasks across 11 datasets verify BMRetriever’s efficacy on various biomedical applications. BMRetriever also exhibits strong parameter efficiency, with the 410M variant outperforming baselines up to 11.7 times larger, and the 2B variant matching the performance of models with over 5B parameters. The training data and model checkpoints are released at https://huggingface.co/BMRetriever to ensure transparency, reproducibility, and application to new domains.</abstract>
@@ -17246,7 +17246,7 @@
       <author><first>Manling</first><last>Li</last><affiliation>Northwestern University</affiliation></author>
       <author><first>Richard</first><last>Zemel</last><affiliation>Department of Computer Science, Columbia University and Department of Computer Science, University of Toronto</affiliation></author>
       <author><first>Heng</first><last>Ji</last><affiliation>University of Illinois, Urbana-Champaign</affiliation></author>
-      <author><first>Shih-Fu</first><last>Chang</last><affiliation>Columbia, Columbia University, Columbia University, Columbia University, Columbia University, Columbia University and Columbia University</affiliation></author>
+      <author id="shih-fu-chang"><first>Shih-Fu</first><last>Chang</last><affiliation>Columbia, Columbia University, Columbia University, Columbia University, Columbia University, Columbia University and Columbia University</affiliation></author>
       <pages>22399-22416</pages>
       <abstract>Recently, enabling pretrained language models (PLMs) to perform zero-shot crossmodal tasks such as video question answering has been extensively studied. A popular approach is to learn a projection network that projects visual features into the input text embedding space of a PLM, as well as feed-forward adaptation layers, with the weights of the PLM frozen. However, is it really necessary to learn such additional layers? In this paper, we make the first attempt to demonstrate that the PLM is able to perform zero-shot crossmodal tasks without any crossmodal pretraining, when the observed visual concepts are injected as both additional input text tokens and augmentation in the intermediate features within each feed-forward network for the PLM. Specifically, inputting observed visual concepts as text tokens helps to inject them through the self-attention layers in the PLM; to augment the intermediate features in a way that is compatible with the PLM, we propose to construct adaptation layers based on the intermediate representation of concepts (obtained by solely inputting them to the PLM). These two complementary injection mechanisms form the proposed Deep Concept Injection, which comprehensively enables the PLM to perceive instantly without crossmodal pretraining. Extensive empirical analysis on zero-shot video question answering, as well as visual question answering, shows Deep Concept Injection achieves competitive or even better results in both zero-shot and fine-tuning settings, compared to state-of-the-art methods that require crossmodal pretraining.</abstract>
       <url hash="b4427aa7">2024.emnlp-main.1249</url>
@@ -17303,7 +17303,7 @@
       <author><first>Kai Tzu-iunn</first><last>Ong</last></author>
       <author><first>Beong-woo</first><last>Kwak</last><affiliation>Yonsei University</affiliation></author>
       <author><first>Moohyeon</first><last>Kim</last><affiliation>Yonsei University</affiliation></author>
-      <author><first>Sunghwan</first><last>Kim</last></author>
+      <author id="sunghwan-mac-kim"><first>Sunghwan</first><last>Kim</last></author>
       <author><first>Taeyoon</first><last>Kwon</last><affiliation>Yonsei University</affiliation></author>
       <author><first>Jiwan</first><last>Chung</last></author>
       <author><first>Youngjae</first><last>Yu</last><affiliation>Yonsei University</affiliation></author>
@@ -17408,7 +17408,7 @@
       <author><first>Yiping</first><last>Song</last></author>
       <author><first>Tianlun</first><last>Liu</last></author>
       <author><first>Liang</first><last>Ding</last></author>
-      <author id="dongsheng-li"><first>Dongsheng</first><last>Li</last></author>
+      <author><first>Dongsheng</first><last>Li</last></author>
       <pages>22633-22646</pages>
       <abstract>Watermarking enables people to determine whether the text is generated by a specific model. It injects a unique signature based on the “green-red” list that can be tracked during detection, where the words in green lists are encouraged to be generated. Recent researchers propose to fix the green/red lists or increase the proportion of green tokens to defend against paraphrasing attacks. However, these methods cause degradation of text quality due to semantic disparities between the watermarked text and the unwatermarked text. In this paper, we propose a semantic-aware watermark method that considers contexts to generate a semantic-aware key to split a semantically balanced green/red list for watermark injection. The semantic balanced list reduces the performance drop due to adding bias on green lists. To defend against paraphrasing attacks, we generate the watermark key considering the semantics of contexts via locally sensitive hashing. To improve the text quality, we propose to split green/red lists considering semantics to enable the green list to cover almost all semantics. We also dynamically adapt the bias to balance text quality and robustness. The experiments show our advantages in both robustness and text quality comparable to existing baselines.</abstract>
       <url hash="2d922321">2024.emnlp-main.1260</url>
@@ -17495,7 +17495,7 @@
     </paper>
     <paper id="1267">
       <title>Instruction Fine-Tuning: Does Prompt Loss Matter?</title>
-      <author><first>Mathew</first><last>Huerta-Enochian</last></author>
+      <author id="mathew-huerta-enochian"><first>Mathew</first><last>Huerta-Enochian</last></author>
       <author><first>Seung Yong</first><last>Ko</last></author>
       <pages>22771-22795</pages>
       <abstract>We present a novel study analyzing the effects of various prompt loss token weights (PLW) for supervised instruction fine-tuning (SIFT). While prompt-masking (PLW = 0) is common for SIFT, some fine-tuning APIs support fractional PLWs and suggest that using a small non-zero PLW can help stabilize learning when fine-tuning on short-completion data. However, there has never been a study confirming this claim, and OpenAI, a major cloud-based SIFT provider, recently removed this parameter from their fine-tuning API. We found that performance of models fine-tuned on short-completion data had a statistically-significant negative quadratic relationship with PLW. Using small values (0.01 − 0.5) of PLW produced better results on multiple-choice and short-generation benchmarks (outperforming models fine-tuned on long-completion data) while large values (≈ 1.0) of PLW produced better results on long-generation benchmarks. We explained this effect and verified its importance through additional experiments. This research serves as a warning to API providers about the importance of providing a PLW parameter for SIFT.</abstract>
@@ -17566,7 +17566,7 @@
       <author><first>Takuya</first><last>Yoshioka</last><affiliation>Microsoft</affiliation></author>
       <author><first>Lu</first><last>Yuan</last><affiliation>Facebook</affiliation></author>
       <author><first>Michael</first><last>Zeng</last><affiliation>Microsoft</affiliation></author>
-      <author><first>Xuedong</first><last>Huang</last></author>
+      <author id="xuedong-huang"><first>Xuedong</first><last>Huang</last></author>
       <pages>14-24</pages>
       <abstract>Artificial General Intelligence (AGI) requires comprehensive understanding and generation capabilities for a variety of tasks spanning different modalities and functionalities. Integrative AI is one important direction to approach AGI, through combining multiple models to tackle complex multimodal tasks. However, there is a lack of a flexible and composable platform to facilitate efficient and effective model composition and coordination. In this paper, we propose the i-Code Studio, a configurable and composable framework for Integrative AI. The i-Code Studio orchestrates multiple pre-trained models in a finetuning-free fashion to conduct complex multimodal tasks. Instead of simple model composition, the i-Code Studio provides an integrative, flexible, and composable setting for developers to quickly and easily compose cutting-edge services and technologies tailored to their specific requirements. The i-Code Studio achieves impressive results on a variety of zero-shot multimodal tasks, such as video-to-text retrieval, speech-to-speech translation, and visual question answering. We also demonstrate how to quickly build a multimodal agent based on the i-Code Studio that can communicate and personalize for users. The project page with demonstrations and code is at https://i-code-studio.github.io/.</abstract>
       <url hash="4a5b9799">2024.emnlp-demo.2</url>
@@ -17609,7 +17609,7 @@
       <author><first>Yuanyi</first><last>Luo</last></author>
       <author><first>Sirui</first><last>Li</last></author>
       <author><first>Wenxiao</first><last>Zhang</last></author>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last><affiliation>University of Western Australia</affiliation></author>
+      <author><first>Wei</first><last>Liu</last><affiliation>University of Western Australia</affiliation></author>
       <pages>46-52</pages>
       <abstract>Multimodal conversational agents are highly desirable because they offer natural and human-like interaction.However, there is a lack of comprehensive end-to-end solutions to support collaborative development and benchmarking.While proprietary systems like GPT-4o and Gemini demonstrating impressive integration of audio, video, and text with response times of 200-250ms, challenges remain in balancing latency, accuracy, cost, and data privacy.To better understand and quantify these issues, we developed <b>OpenOmni</b>, an open-source, end-to-end pipeline benchmarking tool that integrates advanced technologies such as Speech-to-Text, Emotion Detection, Retrieval Augmented Generation, Large Language Models, along with the ability to integrate customized models.OpenOmni supports local and cloud deployment, ensuring data privacy and supporting latency and accuracy benchmarking. This flexible framework allows researchers to customize the pipeline, focusing on real bottlenecks and facilitating rapid proof-of-concept development. OpenOmni can significantly enhance applications like indoor assistance for visually impaired individuals, advancing human-computer interaction.Our demonstration video is available https://www.youtube.com/watch?v=zaSiT3clWqY, demo is available via https://openomni.ai4wa.com, code is available via https://github.com/AI4WA/OpenOmniFramework.</abstract>
       <url hash="bfcbfd99">2024.emnlp-demo.5</url>
@@ -17642,7 +17642,7 @@
       <author><first>Xuming</first><last>Hu</last><affiliation>The Hong Kong University of Science and Technology (Guangzhou) and Hong Kong University of Science and Technology</affiliation></author>
       <author><first>Lijie</first><last>Wen</last><affiliation>Tsinghua University</affiliation></author>
       <author><first>Irwin</first><last>King</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
-      <author><first>Philip S.</first><last>Yu</last><affiliation>University of Illinois, Chicago</affiliation></author>
+      <author id="philip-s-yu"><first>Philip S.</first><last>Yu</last><affiliation>University of Illinois, Chicago</affiliation></author>
       <pages>61-71</pages>
       <abstract>Watermarking for Large Language Models (LLMs), which embeds imperceptible yet algorithmically detectable signals in model outputs to identify LLM-generated text, has become crucial in mitigating the potential misuse of LLMs. However, the abundance of LLM watermarking algorithms, their intricate mechanisms, and the complex evaluation procedures and perspectives pose challenges for researchers and the community to easily understand, implement and evaluate the latest advancements. To address these issues, we introduce MarkLLM, an open-source toolkit for LLM watermarking. MarkLLM offers a unified and extensible framework for implementing LLM watermarking algorithms, while providing user-friendly interfaces to ensure ease of access. Furthermore, it enhances understanding by supporting automatic visualization of the underlying mechanisms of these algorithms. For evaluation, MarkLLM offers a comprehensive suite of 12 tools spanning three perspectives, along with two types of automated evaluation pipelines. Through MarkLLM, we aim to support researchers while improving the comprehension and involvement of the general public in LLM watermarking technology, fostering consensus and driving further advancements in research and application. Our code is available at https://github.com/THU-BPM/MarkLLM.</abstract>
       <url hash="dce6152f">2024.emnlp-demo.7</url>
@@ -17707,7 +17707,7 @@
       <author><first>Shubh</first><last>Nisar</last></author>
       <author><first>Heenaben</first><last>Prajapati</last></author>
       <author><first>Himanshu</first><last>Beniwal</last><affiliation>Indian Institute of Technology Gandhinagar</affiliation></author>
-      <author id="mayank-singh"><first>Mayank</first><last>Singh</last><affiliation>Indian Institute of Technology Gandhinagar</affiliation></author>
+      <author><first>Mayank</first><last>Singh</last><affiliation>Indian Institute of Technology Gandhinagar</affiliation></author>
       <pages>101-109</pages>
       <abstract>As the NLP community increasingly addresses challenges associated with multilingualism, robust annotation tools are essential to handle multilingual datasets efficiently. In this paper, we introduce a code-mixed multilingual text annotation framework, COMMENTATOR, specifically designed for annotating code- mixed text. The tool demonstrates its effectiveness in token-level and sentence-level language annotation tasks for Hinglish text. We perform robust qualitative human-based evaluations to showcase COMMENTATOR led to 5x faster annotations than the best baseline.</abstract>
       <url hash="3683f2ea">2024.emnlp-demo.11</url>
@@ -17756,7 +17756,7 @@
       <author><first>Prasetia Anugrah</first><last>Pratama</last></author>
       <author><first>Ika Karlina</first><last>Idris</last><affiliation>Monash University</affiliation></author>
       <author><first>Traci</first><last>Hong</last><affiliation>Boston University, Boston University</affiliation></author>
-      <author><first>Derry Tanti</first><last>Wijaya</last><affiliation>Monash University and Boston University</affiliation></author>
+      <author id="derry-tanti-wijaya"><first>Derry Tanti</first><last>Wijaya</last><affiliation>Monash University and Boston University</affiliation></author>
       <pages>142-152</pages>
       <abstract>Online hate speech propagation is a complex issue, deeply influenced by both the perpetrator and the target’s cultural, historical, and societal contexts. Consequently, developing a universally robust hate speech classifier for diverse social media texts remains a challenging and unsolved task. The lack of mechanisms to track the spread and severity of hate speech further complicates the formulation of effective solutions. In response to this, to monitor hate speech in Indonesia during the recent 2024 presidential election, we have employed advanced Natural Language Processing (NLP) technologies to create an improved hate speech classifier tailored for a narrower subset of texts; specifically, texts that target vulnerable groups that have historically been the targets of hate speech in Indonesia. Our focus is on texts that mention these six vulnerable minority groups in Indonesia: Shia, Ahmadiyyah, Christians, LGBTQ+, Indonesian Chinese, and people with disabilities, as well as one additional group of interest: Jews. The insights gained from our dashboard have assisted stakeholders in devising more effective strategies to counteract hate speech. Notably, our dashboard has persuaded the General Election Supervisory Body in Indonesia (BAWASLU) to collaborate with our institution and the Alliance of Independent Journalists (AJI) to monitor social media hate speech in vulnerable areas in the country known for hate speech dissemination or hate-related violence in the upcoming Indonesian regional elections. This dashboard is available online at <url>https://aji.or.id/hate-speech-monitoring</url>.</abstract>
       <url hash="8be41d67">2024.emnlp-demo.15</url>
@@ -17873,7 +17873,7 @@
       <author><first>Georgi Nenkov</first><last>Georgiev</last></author>
       <author><first>Jiahui</first><last>Geng</last></author>
       <author><first>Iryna</first><last>Gurevych</last><affiliation>Institute for Computer Science, Artificial Intelligence and Technology, Mohamed bin Zayed University of Artificial Intelligence and Technische Universität Darmstadt</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <pages>219-229</pages>
       <abstract>The increased use of large language models (LLMs) across a variety of real-world applications calls for automatic tools to check the factual accuracy of their outputs, as LLMs often hallucinate. This is difficult as it requires assessing the factuality of free-form open-domain responses. While there has been a lot of research on this topic, different papers use different evaluation benchmarks and measures,which makes them hard to compare and hampers future progress. To mitigate these issues, we developed OpenFactCheck, a unified framework, with three modules: (i) RESPONSEEVAL, which allows users to easily customize an automatic fact-checking system and to assess the factuality of all claims in an input document using that system, (ii) LLMEVAL, which assesses the overall factuality of an LLM, and (iii) CHECKEREVAL, a module to evaluate automatic fact-checking systems. OpenFactCheck is open-sourced (https://github.com/mbzuai-nlp/openfactcheck) and publicly released as a Python library (https://pypi.org/project/openfactcheck/) and also as a web service (http://app.openfactcheck.com). A video describing the system is available at https://youtu.be/-i9VKL0HleI.</abstract>
       <url hash="736192fb">2024.emnlp-demo.23</url>
@@ -18058,7 +18058,7 @@
       <author><first>Artem</first><last>Shelmanov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <author><first>Nizar</first><last>Habash</last><affiliation>New York University Abu Dhabi</affiliation></author>
       <author><first>Iryna</first><last>Gurevych</last><affiliation>Institute for Computer Science, Artificial Intelligence and Technology, Mohamed bin Zayed University of Artificial Intelligence and Technische Universität Darmstadt</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <pages>336-343</pages>
       <abstract>The ease of access to large language models (LLMs) has enabled a widespread of machine-generated texts, and now it is often hard to tell whether a piece of text was human-written or machine-generated. This raises concerns about potential misuse, particularly within educational and academic domains. Thus, it is important to develop practical systems that can automate the process. Here, we present one such system, LLM-DetectAIve, designed for fine-grained detection. Unlike most previous work on machine-generated text detection, which focused on binary classification, LLM-DetectAIve supports four categories: (i) human-written, (ii) machine-generated, (iii) machine-written, then machine-humanized, and (iv) human-written, then machine-polished. Category (iii) aims to detect attempts to obfuscate the fact that a text was machine-generated, while category (iv) looks for cases where the LLM was used to polish a human-written text, which is typically acceptable in academic writing, but not in education. Our experiments show that LLM-DetectAIve can effectively identify the above four categories, which makes it a potentially useful tool in education, academia, and other domains.LLM-DetectAIve is publicly accessible at https://github.com/mbzuai-nlp/LLM-DetectAIve. The video describing our system is available at https://youtu.be/E8eT_bE7k8c.</abstract>
       <url hash="6365ae80">2024.emnlp-demo.35</url>
@@ -18114,7 +18114,7 @@
       <author><first>Chi</first><last>Han</last></author>
       <author><first>Jiawei</first><last>Han</last></author>
       <author><first>Kartik</first><last>Natarajan</last><affiliation>Private Sector Humanitarian Alliance</affiliation></author>
-      <author><first>Clare R.</first><last>Voss</last><affiliation>ARL</affiliation></author>
+      <author id="clare-voss"><first>Clare R.</first><last>Voss</last><affiliation>ARL</affiliation></author>
       <author><first>Heng</first><last>Ji</last><affiliation>University of Illinois, Urbana-Champaign</affiliation></author>
       <pages>372-381</pages>
       <abstract>Complex news events, such as natural disasters and socio-political conflicts, require swift responses from the government and society. Relying on historical events to project the future is insufficient as such events are sparse and do not cover all possible conditions and nuanced situations. Simulation of these complex events can help better prepare and reduce the negative impact. We develop a controllable complex news event simulator guided by both the event schema representing domain knowledge about the scenario and user-provided assumptions representing case-specific conditions.As event dynamics depend on the fine-grained social and cultural context, we further introduce a geo-diverse commonsense and cultural norm-aware knowledge enhancement component.To enhance the coherence of the simulation, apart from the global timeline of events,we take an agent-based approach to simulate the individual character states, plans, and actions. By incorporating the schema and cultural norms, our generated simulations achieve much higher coherence and appropriateness and are received favorably by participants from a humanitarian assistance organization.</abstract>
@@ -18147,7 +18147,7 @@
       <title>Generative Dictionary: Improving Language Learner Understanding with Contextual Definitions</title>
       <author><first>Kai-Wen</first><last>Tuan</last><affiliation>National Tsinghua University</affiliation></author>
       <author><first>Hai-Lun</first><last>Tu</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>390-396</pages>
       <abstract>We introduce GenerativeDictionary, a novel dictionary system that generates word sense interpretations based on the given context. Our approach involves transforming context sentences to highlight the meaning of target words within their specific context. The method involves automatically transforming context sentences into sequences of low-dimensional vector token representations, automatically processing the input embeddings through multiple layers of transformers, and automatically generate the word senses based on the latent representations derived from the context. At runtime, context sentences with target words are processed through a transformer model that outputs the relevant word senses.Blind evaluations on a combined set of dictionary example sentences and generated sentences based on given word senses demonstrate that our method is comparable to traditional word sense disambiguation (WSD) methods. By framing WSD as a generative problem, GenerativeDictionary delivers more precise and contextually appropriate word senses, enhancing the effectiveness of language learning tools.</abstract>
       <url hash="b8328b40">2024.emnlp-demo.41</url>
@@ -18185,7 +18185,7 @@
       <author><first>Wei</first><last>Ye</last><affiliation>Peking University</affiliation></author>
       <author><first>Wenyuan</first><last>Xu</last></author>
       <author><first>Yue</first><last>Zhang</last><affiliation>Westlake University</affiliation></author>
-      <author><first>Xinyu</first><last>Dai</last><affiliation>Nanjing University</affiliation></author>
+      <author id="xinyu-dai"><first>Xinyu</first><last>Dai</last><affiliation>Nanjing University</affiliation></author>
       <author><first>Shikun</first><last>Zhang</last><affiliation>Peking University</affiliation></author>
       <author><first>Qingsong</first><last>Wen</last><affiliation>Squirrel Ai Learning</affiliation></author>
       <pages>408-418</pages>
@@ -18210,7 +18210,7 @@
       <author><first>Guangtao</first><last>Zeng</last></author>
       <author><first>Jia</first><last>Guo</last></author>
       <author><first>Jiahui</first><last>Zhou</last></author>
-      <author><first>Xin</first><last>Mao</last></author>
+      <author id="xinnian-mao"><first>Xin</first><last>Mao</last></author>
       <author><first>Ziqi</first><last>Jin</last></author>
       <author><first>Wei</first><last>Lu</last><affiliation>Singapore University of Technology and Design</affiliation></author>
       <author><first>Min</first><last>Lin</last><affiliation>Sea AI Lab</affiliation></author>
@@ -18302,7 +18302,7 @@
       <author><first>Wenting</first><last>Zhao</last><affiliation>Cornell University</affiliation></author>
       <author><first>Jack</first><last>Hessel</last><affiliation>Samaya AI</affiliation></author>
       <author><first>Xiang</first><last>Ren</last></author>
-      <author><first>Claire</first><last>Cardie</last><affiliation>Cornell University</affiliation></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last><affiliation>Cornell University</affiliation></author>
       <author><first>Yejin</first><last>Choi</last><affiliation>Department of Computer Science, University of Washington</affiliation></author>
       <pages>497-506</pages>
       <abstract>The increasing availability of real-world conversation data offers exciting opportunities for researchers to study user-chatbot interactions. However, the sheer volume of this data makes manually examining individual conversations impractical. To overcome this challenge, we introduce WildVis, an interactive tool that enables fast, versatile, and large-scale conversation analysis. WildVis provides search and visualization capabilities in the text and embedding spaces based on a list of criteria. To manage million-scale datasets, we implemented optimizations including search index construction, embedding precomputation and compression, and caching to ensure responsive user interactions within seconds. We demonstrate WildVis’ utility through three case studies: facilitating chatbot misuse research, visualizing and comparing topic distributions across datasets, and characterizing user-specific conversation patterns. WildVis is open-source and designed to be extendable, supporting additional datasets and customized search and visualization functionalities.</abstract>
@@ -18344,7 +18344,7 @@
     <meta>
       <booktitle>Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: Industry Track</booktitle>
       <editor><first>Franck</first><last>Dernoncourt</last></editor>
-      <editor><first>Daniel</first><last>Preoţiuc-Pietro</last></editor>
+      <editor id="daniel-preotiuc-pietro"><first>Daniel</first><last>Preoţiuc-Pietro</last></editor>
       <editor><first>Anastasia</first><last>Shimorina</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Miami, Florida, US</address>
@@ -18374,7 +18374,7 @@
       <title>Two-tiered Encoder-based Hallucination Detection for Retrieval-Augmented Generation in the Wild</title>
       <author><first>Ilana</first><last>Zimmerman</last><affiliation>LivePerson Inc</affiliation></author>
       <author><first>Jadin</first><last>Tredup</last></author>
-      <author><first>Ethan</first><last>Selfridge</last><affiliation>NA</affiliation></author>
+      <author id="ethan-selfridge"><first>Ethan</first><last>Selfridge</last><affiliation>NA</affiliation></author>
       <author><first>Joseph</first><last>Bradley</last></author>
       <pages>8-22</pages>
       <abstract>Detecting hallucinations, where Large Language Models (LLMs) are not factually consistent with a Knowledge Base (KB), is a challenge for Retrieval-Augmented Generation (RAG) systems. Current solutions rely on public datasets to develop prompts or fine-tune a Natural Language Inference (NLI) model. However, these approaches are not focused on developing an enterprise RAG system; they do not consider latency, train or evaluate on production data, nor do they handle non-verifiable statements such as small talk or questions. To address this, we leverage the customer service conversation data of four large brands to evaluate existing solutions and propose a set of small encoder models trained on a new dataset. We find the proposed models to outperform existing methods and highlight the value of combining a small amount of in-domain data with public datasets.</abstract>
@@ -18618,7 +18618,7 @@
       <author><first>Swapnil</first><last>Bhosale</last></author>
       <author><first>Samarth</first><last>Agrawal</last><affiliation>eBay Inc.</affiliation></author>
       <author><first>Diptesh</first><last>Kanojia</last><affiliation>University of Surrey</affiliation></author>
-      <author><first>Constantin</first><last>Orasan</last><affiliation>University of Surrey</affiliation></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orasan</last><affiliation>University of Surrey</affiliation></author>
       <author><first>Zhe</first><last>Wu</last></author>
       <pages>215-224</pages>
       <abstract>This paper addresses the challenge of improving user experience on e-commerce platforms by enhancing product ranking relevant to user’s search queries. Ambiguity and complexity of user queries often lead to a mismatch between user’s intent and retrieved product titles or documents. Recent approaches have proposed the use of Transformer-based models which need millions of annotated query-title pairs during the pre-training stage, and this data often does not take user intent into account. To tackle this, we curate samples from existing datasets at eBay, manually annotated with buyer-centric relevance scores, and centrality scores which reflect how well the product title matches the user’s intent. We introduce a User-intent Centrality Optimization (UCO) approach for existing models, which optimizes for the user intent in semantic product search. To that end, we propose a dual-loss based optimization to handle hard negatives, i.e., product titles that are semantically relevant but do not reflect the user’s intent. Our contributions include curating challenging evaluation sets and implementing UCO, resulting in significant improvements in product ranking efficiency, observed for different evaluation metrics. Our work aims to ensure that the most buyer-centric titles for a query are ranked higher, thereby, enhancing the user experience on e-commerce platforms.</abstract>
@@ -18719,7 +18719,7 @@
       <author><first>Zhi-Qi</first><last>Cheng</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Yifei</first><last>Dong</last></author>
       <author><first>Aike</first><last>Shi</last><affiliation>Georgia Institute of Technology</affiliation></author>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last></author>
+      <author><first>Wei</first><last>Liu</last></author>
       <author><first>Yuzhi</first><last>Hu</last></author>
       <author><first>Jason</first><last>O’Connor</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Alexander G</first><last>Hauptmann</last><affiliation>School of Computer Science, Carnegie Mellon University</affiliation></author>
@@ -18852,7 +18852,7 @@
       <title><fixed-case>F</fixed-case>ast<fixed-case>A</fixed-case>da<fixed-case>SP</fixed-case>: Multitask-Adapted Efficient Inference for Large Speech Language Model</title>
       <author><first>Yichen</first><last>Lu</last></author>
       <author><first>Jiaqi</first><last>Song</last></author>
-      <author><first>Chao-Han Huck</first><last>Yang</last><affiliation>NVIDIA Research</affiliation></author>
+      <author id="chao-han-huck-yang"><first>Chao-Han Huck</first><last>Yang</last><affiliation>NVIDIA Research</affiliation></author>
       <author><first>Shinji</first><last>Watanabe</last><affiliation>Carnegie Mellon University</affiliation></author>
       <pages>440-451</pages>
       <abstract>In this study, we aim to explore Multitask Speech Language Model (SpeechLM) efficient inference via token reduction. Unlike other modalities such as vision or text, speech has unique temporal dependencies, making previous efficient inference works on other modalities not directly applicable. Furthermore, methods for efficient SpeechLM inference on long sequence and sparse signals remain largely unexplored. In this work, we propose FastAdaSP, a weighted token merging framework specifically designed for various speech-related tasks to improve the trade-off between efficiency and performance. Experimental results on WavLLM and Qwen-Audio show that our method achieves the state-of-the-art (SOTA) efficiency-performance trade-off compared with other baseline methods. Specifically, FastAdaSP achieved 7x memory efficiency and 1.83x decoding throughput without any degradation on tasks like Emotion Recognition (ER) and Spoken Question Answering (SQA).</abstract>
@@ -18985,7 +18985,7 @@
       <author id="zhiyu-chen-lehigh"><first>Zhiyu</first><last>Chen</last><affiliation>Amazon</affiliation></author>
       <author><first>Jason Ingyu</first><last>Choi</last><affiliation>Amazon</affiliation></author>
       <author><first>Besnik</first><last>Fetahu</last><affiliation>Amazon</affiliation></author>
-      <author><first>Shervin</first><last>Malmasi</last><affiliation>Amazon</affiliation></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last><affiliation>Amazon</affiliation></author>
       <pages>563-572</pages>
       <abstract>In e-commerce, high consideration search missions typically require careful and elaborate decision making, and involve a substantial research investment from customers. We consider the task of identifying High Consideration (HC) queries. Identifying such queries enables e-commerce sites to better serve user needs using targeted experiences such as curated QA widgets that help users reach purchase decisions. We explore the task by proposing an Engagement-based Query Ranking (EQR) approach, focusing on query ranking to indicate potential engagement levels with query-related shopping knowledge content during product search. Unlike previous studies on predicting trends, EQR prioritizes query-level features related to customer behavior, finance, and catalog information rather than popularity signals. We introduce an accurate and scalable method for EQR and present experimental results demonstrating its effectiveness. Offline experiments show strong ranking performance. Human evaluation shows a precision of 96% for HC queries identified by our model. The model was commercially deployed, and shown to outperform human-selected queries in terms of downstream customer impact, as measured through engagement.</abstract>
       <url hash="3f9a2bf9">2024.emnlp-industry.42</url>
@@ -19202,7 +19202,7 @@
       <author><first>Shengguang</first><last>Bai</last></author>
       <author><first>Tianrui</first><last>Zhang</last></author>
       <author><first>Lin</first><last>Lin</last><affiliation>ucloud</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>North Carolina Central University</affiliation></author>
+      <author><first>Yang</first><last>Liu</last><affiliation>North Carolina Central University</affiliation></author>
       <author><first>Jiawei</first><last>Ren</last></author>
       <pages>738-754</pages>
       <abstract>With the ever-increasing demands on Question Answering (QA) systems for IT operations and maintenance, an efficient and supervised fine-tunable framework is necessary to ensure the data security, private deployment and continuous upgrading. Although Large Language Models (LLMs) have notably improved the open-domain QA’s performance, how to efficiently handle enterprise-exclusive corpora and build domain-specific QA systems are still less-studied for industrial applications. In this paper, we propose a general and comprehensive framework based on Retrieval Augmented Generation (RAG) and facilitate the whole business process of establishing QA systems for IT operations and maintenance. In accordance with the prevailing RAG method, our proposed framework, named with RAG4ITOps, composes of two major stages: (1) Models Fine-tuning &amp; Data Vectorization, and (2) Online QA System Process. At the Stage 1, we leverage a contrastive learning method with two negative sampling strategies to fine-tune the embedding model, and design the instruction templates to fine-tune the LLM with a Retrieval Augmented Fine-Tuning method. At the Stage 2, an efficient process of QA system is built for serving. We collect enterprise-exclusive corpora from the domain of cloud computing, and the extensive experiments show that our method achieves superior results than counterparts on two kinds of QA tasks. Our experiment also provide a case for applying the RAG4ITOps to real-world enterprise-level applications.</abstract>
@@ -19274,7 +19274,7 @@
       <author><first>Jeongbae</first><last>Park</last><affiliation>Korea University</affiliation></author>
       <author><first>Cho Man</first><last>Young</last><affiliation>gaonplatform</affiliation></author>
       <author><first>Byeongho</first><last>Choi</last><affiliation>Korea University</affiliation></author>
-      <author><first>Heuiseok</first><last>Lim</last><affiliation>Korea University</affiliation></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last><affiliation>Korea University</affiliation></author>
       <pages>805-820</pages>
       <url hash="201f7679">2024.emnlp-industry.61</url>
       <bibkey>hong-etal-2024-intelligent</bibkey>
@@ -19286,7 +19286,7 @@
       <author><first>Kee Kiat</first><last>Koo</last><affiliation>Amazon</affiliation></author>
       <author><first>Hayreddin</first><last>Ceker</last><affiliation>Amazon</affiliation></author>
       <author><first>Shaobai</first><last>Jiang</last><affiliation>Amazon and Iowa State University</affiliation></author>
-      <author id="qi-li"><first>Qi</first><last>Li</last><affiliation>Amazon</affiliation></author>
+      <author><first>Qi</first><last>Li</last><affiliation>Amazon</affiliation></author>
       <author><first>Julien</first><last>Han</last><affiliation>Amazon</affiliation></author>
       <author><first>Karim</first><last>Bouyarmane</last><affiliation>Amazon</affiliation></author>
       <pages>821-828</pages>
@@ -19614,7 +19614,7 @@
       <author><first>Maxwell</first><last>Crouse</last><affiliation>International Business Machines</affiliation></author>
       <author><first>Chulaka</first><last>Gunasekara</last><affiliation>International Business Machines</affiliation></author>
       <author><first>Shajith</first><last>Ikbal</last><affiliation>IBM Research AI, India</affiliation></author>
-      <author><first>Sachindra</first><last>Joshi</last></author>
+      <author id="sachindra-joshi"><first>Sachindra</first><last>Joshi</last></author>
       <author><first>Hima</first><last>Karanam</last></author>
       <author><first>Vineet</first><last>Kumar</last><affiliation>International Business Machines</affiliation></author>
       <author><first>Asim</first><last>Munawar</last><affiliation>International Business Machines</affiliation></author>
@@ -19626,7 +19626,7 @@
       <author><first>Praveen</first><last>Venkateswaran</last><affiliation>International Business Machines</affiliation></author>
       <author><first>Merve</first><last>Unuvar</last><affiliation>IBM TJ Watson Research Center</affiliation></author>
       <author><first>David Daniel</first><last>Cox</last><affiliation>International Business Machines</affiliation></author>
-      <author><first>Salim</first><last>Roukos</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
       <author><first>Luis A.</first><last>Lastras</last></author>
       <author><first>Pavan</first><last>Kapanipathi</last><affiliation>International Business Machines</affiliation></author>
       <pages>1131-1139</pages>
@@ -19746,10 +19746,10 @@
     <paper id="94">
       <title>Prompt Leakage effect and mitigation strategies for multi-turn <fixed-case>LLM</fixed-case> Applications</title>
       <author><first>Divyansh</first><last>Agarwal</last><affiliation>Salesforce.com</affiliation></author>
-      <author><first>Alexander</first><last>Fabbri</last><affiliation>SalesForce.com</affiliation></author>
+      <author id="alexander-richard-fabbri"><first>Alexander</first><last>Fabbri</last><affiliation>SalesForce.com</affiliation></author>
       <author><first>Ben</first><last>Risher</last><affiliation>SalesForce.com</affiliation></author>
       <author><first>Philippe</first><last>Laban</last></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>SalesForce.com and Nanyang Technological University</affiliation></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>SalesForce.com and Nanyang Technological University</affiliation></author>
       <author><first>Chien-Sheng</first><last>Wu</last><affiliation>Salesforce AI</affiliation></author>
       <pages>1255-1275</pages>
       <abstract>Prompt leakage poses a compelling security and privacy threat in LLM applications. Leakage of system prompts may compromise intellectual property, and act as adversarial reconnaissance for an attacker. A systematic evaluation of prompt leakage threats and mitigation strategies is lacking, especially for multi-turn LLM interactions. In this paper, we systematically investigate LLM vulnerabilities against prompt leakage for 10 closed- and open-source LLMs, across four domains. We design a unique threat model which leverages the LLM sycophancy effect and elevates the average attack success rate (ASR) from 17.7% to 86.2% in a multi-turn setting. Our standardized setup further allows dissecting leakage of specific prompt contents such as task instructions and knowledge documents. We measure the mitigation effect of 7 black-box defense strategies, along with finetuning an open-source model to defend against leakage attempts. We present different combination of defenses against our threat model, including a cost analysis. Our study highlights key takeaways for building secure LLM applications and provides directions for research in multi-turn LLM interactions.</abstract>
@@ -19799,7 +19799,7 @@
       <author><first>Mohammed Nasheed</first><last>Yasin</last><affiliation>Uniphore</affiliation></author>
       <author><first>Tanner</first><last>Sorensen</last><affiliation>Uniphore</affiliation></author>
       <author><first>Alessandro Di</first><last>Bari</last><affiliation>UNIPHORE</affiliation></author>
-      <author><first>Andreas</first><last>Stolcke</last><affiliation>Uniphore Technologies</affiliation></author>
+      <author id="andreas-stolcke"><first>Andreas</first><last>Stolcke</last><affiliation>Uniphore Technologies</affiliation></author>
       <pages>1305-1313</pages>
       <abstract>We present a light-weight approach for detecting nonfactual outputs from retrieval-augemented generation (RAG). Given a context and putative output, we compute a factuality score that can be thresholded to yield a binary decision to check the results of LLM-based question-answering, summarization, or other systems. Unlike factuality checkers that themselves rely on LLMs, we use compact, open-source natural language inference (NLI) models that yield a freely accessible solution with low latency and low cost at run-time, and no need for LLM fine-tuning. The approach also enables downstream mitigation and correction of hallucinations, by tracing them back to specific context chunks. Our experiments show high ROC-AUC across a wide range of relevant open source datasets, indicating the effectiveness of our method for fact-checking RAG output.</abstract>
       <url hash="181a4ba0">2024.emnlp-industry.97</url>
@@ -20025,7 +20025,7 @@
       <author><first>Ling</first><last>Shi</last></author>
       <author><first>Juesi</first><last>Xiao</last></author>
       <author><first>Shaolin</first><last>Zhu</last><affiliation>Tianjin University</affiliation></author>
-      <author><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
       <pages>1499-1522</pages>
       <abstract>Large language models (LLMs) have demonstrated prowess in a wide range of tasks. However, many LLMs exhibit significant performance discrepancies between high- and low-resource languages. To mitigate this challenge, we present <b>FuxiTranyu</b>, an open-source multilingual LLM, which is designed to satisfy the need of the research community for balanced and high-performing multilingual capabilities. The base model, FuxiTranyu-8B, features 8 billion parameters and is trained from scratch on meticulously balanced multilingual data that contains 600 billion tokens covering 43 natural languages and 16 programming languages. We also develop two instruction-tuned models: FuxiTranyu-8B-SFT which is fine-tuned on a diverse multilingual instruction dataset, and FuxiTranyu-8B-DPO which is further refined with DPO on a preference dataset for enhanced alignment ability. Extensive experiments on a wide range of multilingual benchmarks demonstrate the competitive performance of FuxiTranyu against existing multilingual LLMs, e.g., BLOOM-7B, PolyLM-13B, and Mistral-7B-Instruct. Both neuron and representation interpretability analyses reveal that FuxiTranyu achieves consistent multilingual representations across languages. To promote further research into multilingual LLMs, we release both the base and instruction-tuned FuxiTranyu models together with 58 pre-training checkpoints at HuggingFace and Github.</abstract>
       <url hash="a4ab5470">2024.emnlp-industry.110</url>
@@ -20090,7 +20090,7 @@
       <author><first>Lu</first><last>Shi</last><affiliation>Beijing Jingwei Hirain Technologies Co., Inc.</affiliation></author>
       <author><first>Bin</first><last>Qi</last><affiliation>Beijing Jingwei Hirain Technologies Co., Inc.</affiliation></author>
       <author><first>Jiarui</first><last>Luo</last><affiliation>Beijing Jingwei Hirain Technologies Co., Inc.</affiliation></author>
-      <author id="yang-zhang"><first>Yang</first><last>Zhang</last><affiliation>Beijing Jingwei Hirain Technologies Co., Inc.</affiliation></author>
+      <author><first>Yang</first><last>Zhang</last><affiliation>Beijing Jingwei Hirain Technologies Co., Inc.</affiliation></author>
       <author><first>Zhanzhao</first><last>Liang</last><affiliation>Beijing Jingwei Hirain Technologies Co., Inc.</affiliation></author>
       <author><first>Zhaowei</first><last>Gao</last><affiliation>Beijing Jingwei Hirain Technologies Co., Inc.</affiliation></author>
       <author><first>Wenke</first><last>Deng</last><affiliation>Beijing Jingwei Hirain Technologies Co., Inc. and Harbin Engineering University</affiliation></author>
@@ -20134,7 +20134,7 @@
     <paper id="118">
       <title>Mixture of Diverse Size Experts</title>
       <author><first>Manxi</first><last>Sun</last></author>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last><affiliation>xiaomi</affiliation></author>
+      <author><first>Wei</first><last>Liu</last><affiliation>xiaomi</affiliation></author>
       <author><first>Jian</first><last>Luan</last></author>
       <author><first>Pengzhi</first><last>Gao</last><affiliation>Baidu</affiliation></author>
       <author><first>Bin</first><last>Wang</last><affiliation>AI Lab, Xiaomi Inc.</affiliation></author>
@@ -20233,7 +20233,7 @@
       <author><first>Flor Miriam</first><last>Plaza-del-Arco</last></author>
       <author><first>Debora</first><last>Nozza</last></author>
       <author><first>Marco</first><last>Guerini</last></author>
-      <author><first>Jeffrey</first><last>Sorensen</last></author>
+      <author id="jeffrey-sorensen"><first>Jeffrey</first><last>Sorensen</last></author>
       <author><first>Marcos</first><last>Zampieri</last></author>
       <pages>11–16</pages>
       <abstract>In today’s digital age, hate speech and offensive speech online pose a significant challenge to maintaining respectful and inclusive online environments. This tutorial aims to provide attendees with a comprehensive understanding of the field by delving into essential dimensions such as multilingualism, counter-narrative generation, a hands-on session with one of the most popular APIs for detecting hate speech, fairness, and ethics in AI, and the use of recent advanced approaches. In addition, the tutorial aims to foster collaboration and inspire participants to create safer online spaces by detecting and mitigating hate speech.</abstract>
@@ -20256,7 +20256,7 @@
     <paper id="4">
       <title>Reasoning with Natural Language Explanations</title>
       <author><first>Marco</first><last>Valentino</last></author>
-      <author><first>André</first><last>Freitas</last></author>
+      <author id="andre-freitas"><first>André</first><last>Freitas</last></author>
       <pages>25–31</pages>
       <abstract>Explanation constitutes an archetypal feature of human rationality, underpinning learning and generalisation, and representing one of the media supporting scientific discovery and communication. Due to the importance of explanations in human reasoning, an increasing amount of research in Natural Language Inference (NLI) has started reconsidering the role that explanations play in learning and inference, attempting to build explanation-based NLI models that can effectively encode and use natural language explanations on downstream tasks. Research in explanation-based NLI, however, presents specific challenges and opportunities, as explanatory reasoning reflects aspects of both material and formal inference, making it a particularly rich setting to model and deliver complex reasoning. In this tutorial, we provide a comprehensive introduction to the field of explanation-based NLI, grounding this discussion on the epistemological-linguistic foundations of explanations, systematically describing the main architectural trends and evaluation methodologies that can be used to build systems capable of explanatory reasoning.</abstract>
       <url hash="b110995b">2024.emnlp-tutorials.4</url>
@@ -20278,7 +20278,7 @@
     <paper id="6">
       <title>Human-Centered Evaluation of Language Technologies</title>
       <author><first>Su Lin</first><last>Blodgett</last></author>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <author><first>Vera</first><last>Liao</last></author>
       <author><first>Ziang</first><last>Xiao</last></author>
       <pages>39–43</pages>
diff --git a/data/xml/2024.eurali.xml b/data/xml/2024.eurali.xml
index 6bb618165a..0845b861d8 100644
--- a/data/xml/2024.eurali.xml
+++ b/data/xml/2024.eurali.xml
@@ -3,12 +3,12 @@
   <volume id="1" ingest-date="2024-05-18" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 2nd Workshop on Resources and Technologies for Indigenous, Endangered and Lesser-resourced Languages in Eurasia (EURALI) @ LREC-COLING 2024</booktitle>
-      <editor><first>Atul Kr.</first><last>Ojha</last></editor>
+      <editor id="atul-kr-ojha"><first>Atul Kr.</first><last>Ojha</last></editor>
       <editor><first>Sina</first><last>Ahmadi</last></editor>
       <editor><first>Silvie</first><last>Cinková</last></editor>
       <editor><first>Theodorus</first><last>Fransen</last></editor>
       <editor><first>Chao-Hong</first><last>Liu</last></editor>
-      <editor><first>John P.</first><last>McCrae</last></editor>
+      <editor id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></editor>
       <publisher>ELRA and ICCL</publisher>
       <address>Torino, Italia</address>
       <month>May</month>
diff --git a/data/xml/2024.fever.xml b/data/xml/2024.fever.xml
index 6676c09963..9cfaccfec1 100644
--- a/data/xml/2024.fever.xml
+++ b/data/xml/2024.fever.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2024-11-05" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Seventh Fact Extraction and VERification Workshop (FEVER)</booktitle>
-      <editor><first>Michael</first><last>Schlichtkrull</last></editor>
+      <editor id="michael-schlichtkrull"><first>Michael</first><last>Schlichtkrull</last></editor>
       <editor><first>Yulong</first><last>Chen</last></editor>
       <editor><first>Chenxi</first><last>Whitehouse</last></editor>
       <editor><first>Zhenyun</first><last>Deng</last></editor>
@@ -88,7 +88,7 @@
       <author><first>Irina</first><last>Nikishina</last></author>
       <author><first>Seid Muhie</first><last>Yimam</last><affiliation>Universität Hamburg</affiliation></author>
       <author><first>Martin</first><last>Semmann</last><affiliation>Universität Hamburg</affiliation></author>
-      <author><first>Chris</first><last>Biemann</last><affiliation>U Hamburg</affiliation></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last><affiliation>U Hamburg</affiliation></author>
       <pages>55-63</pages>
       <abstract>This paper presents UHH’s approach developed for the AVeriTeC shared task. The goal of the challenge is to verify given real-world claims with evidences from the Web. In this shared task, we investigate a Retrieval-Augmented Generation (RAG) model, which mainly contains retrieval, generation, and augmentation components. We start with the selection of the top 10k evidences via BM25 scores, and continue with two approaches to retrieve the most similar evidences: (1) to retrieve top 10 evidences through vector similarity, generate questions for them, and rerank them or (2) to generate questions for the claim and retrieve the most similar evidence, again, through vector similarity. After retrieving the top evidences, a Large Language Model (LLM) is prompted using the claim along with either all evidences or individual evidence to predict the label. Our system submission, <tex-math>\textbf{UHH}</tex-math>, using the first approach and individual evidence prompts, ranks 6th out of 23 systems.</abstract>
       <url hash="e648df01">2024.fever-1.5</url>
diff --git a/data/xml/2024.fieldmatters.xml b/data/xml/2024.fieldmatters.xml
index 79057d198d..f08fc6fd0c 100644
--- a/data/xml/2024.fieldmatters.xml
+++ b/data/xml/2024.fieldmatters.xml
@@ -11,7 +11,7 @@
       <editor><first>Elena</first><last>Klyachko</last></editor>
       <editor><first>Ekaterina</first><last>Vylomova</last></editor>
       <editor><first>Tatiana</first><last>Shavrina</last></editor>
-      <editor><first>Francis</first><last>Tyers</last></editor>
+      <editor id="francis-tyers"><first>Francis</first><last>Tyers</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Bangkok, Thailand</address>
       <month>August</month>
@@ -105,7 +105,7 @@
       <title>Comparing <fixed-case>K</fixed-case>aldi-Based Pipeline Elpis and Whisper for Čakavian Transcription</title>
       <author><first>Austin</first><last>Jones</last><affiliation>University of Georgia</affiliation></author>
       <author><first>Shulin</first><last>Zhang</last></author>
-      <author><first>John</first><last>Hale</last><affiliation>Johns Hopkins University, University of Georgia and DeepMind</affiliation></author>
+      <author id="john-hale"><first>John</first><last>Hale</last><affiliation>Johns Hopkins University, University of Georgia and DeepMind</affiliation></author>
       <author><first>Margaret</first><last>Renwick</last></author>
       <author><first>Zvjezdana</first><last>Vrzic</last><affiliation>NA</affiliation></author>
       <author><first>Keith</first><last>Langston</last><affiliation>University of Georgia</affiliation></author>
@@ -121,7 +121,7 @@
       <author><first>Isaiah Edri W.</first><last>Flores</last></author>
       <author><first>Katrina Bernice M.</first><last>Tan</last></author>
       <author><first>Ma. Regina E.</first><last>Estuar</last></author>
-      <author><first>Jann Railey E.</first><last>Montalan</last></author>
+      <author id="jann-railey-montalan"><first>Jann Railey E.</first><last>Montalan</last></author>
       <author><first>Marlene M.</first><last>De Leon</last></author>
       <pages>69-77</pages>
       <abstract>Supervised learning approaches in NLP, exemplified by POS tagging, rely heavily on the presence of large amounts of annotated data. However, acquiring such data often requires significant amount of resources and incurs high costs. In this work, we explore zero-shot cross-lingual transfer learning to address data scarcity issues in Filipino POS tagging, particularly focusing on optimizing source language selection. Our zero-shot approach demonstrates superior performance compared to previous studies, with top-performing fine-tuned PLMs achieving F1 scores as high as 79.10%. The analysis reveals moderate correlations between cross-lingual transfer performance and specific linguistic distances–featural, inventory, and syntactic–suggesting that source languages with these features closer to Filipino provide better results. We identify tokenizer optimization as a key challenge, as PLM tokenization sometimes fails to align with meaningful representations, thus hindering POS tagging performance.</abstract>
diff --git a/data/xml/2024.figlang.xml b/data/xml/2024.figlang.xml
index 3e1c1f947e..c11bc32495 100644
--- a/data/xml/2024.figlang.xml
+++ b/data/xml/2024.figlang.xml
@@ -46,7 +46,7 @@
       <title>Comparison of Image Generation Models for Abstract and Concrete Event Descriptions</title>
       <author><first>Mohammed</first><last>Khaliq</last></author>
       <author><first>Diego</first><last>Frassinelli</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
-      <author><first>Sabine</first><last>Schulte Im Walde</last><affiliation>University of Stuttgart</affiliation></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte Im Walde</last><affiliation>University of Stuttgart</affiliation></author>
       <pages>15-21</pages>
       <abstract>With the advent of diffusion-based image generation models such as DALL-E, Stable Diffusion and Midjourney, high quality images can be easily generated using textual inputs. It is unclear, however, to what extent the generated images resemble human mental representations, especially regarding abstract event knowledge. We analyse the capability of four state-of-the-art models in generating images of verb-object event pairs when we systematically manipulate the degrees of abstractness of both the verbs and the object nouns. Human judgements assess the generated images and demonstrate that DALL-E is strongest for event pairs with concrete nouns (e.g., “pour water”; “believe person”), while Midjourney is preferred for event pairs with abstract nouns (e.g., “raise awareness”; “remain mystery”), irrespective of the concreteness of the verb. Across models, humans were most unsatisfied with images of events pairs that combined concrete verbs with abstract direct-object nouns (e.g., “speak truth”), and an additional ad-hoc annotation contributes this to its potential for figurative language.</abstract>
       <url hash="9292c8c2">2024.figlang-1.3</url>
@@ -56,7 +56,7 @@
     <paper id="4">
       <title>Cross-Lingual Metaphor Detection for Low-Resource Languages</title>
       <author><first>Anna</first><last>Hülsing</last><affiliation>Universität Hildesheim</affiliation></author>
-      <author><first>Sabine</first><last>Schulte Im Walde</last><affiliation>University of Stuttgart</affiliation></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte Im Walde</last><affiliation>University of Stuttgart</affiliation></author>
       <pages>22-34</pages>
       <abstract>Research on metaphor detection (MD) in a multilingual setup has recently gained momentum. As for many tasks, it is however unclear how the amount of data used to pretrain large language models affects the performance, and whether non-neural models might provide a reasonable alternative, especially for MD in low-resource languages. This paper compares neural and non-neural cross-lingual models for English as the source language and Russian, German and Latin as target languages. In a series of experiments we show that the neural cross-lingual adapter architecture MAD-X performs best across target languages. Zero-shot classification with mBERT achieves decent results above the majority baseline, while few-shot classification with mBERT heavily depends on shot-selection, which is inconvenient in a cross-lingual setup where no validation data for the target language exists. The non-neural model, a random forest classifier with conceptual features, is outperformed by the neural models. Overall, we recommend MAD-X for metaphor detection not only in high-resource but also in low-resource scenarios regarding the amounts of pretraining data for mBERT.</abstract>
       <url hash="b895331f">2024.figlang-1.4</url>
@@ -101,7 +101,7 @@
     <paper id="8">
       <title>Evaluating the Development of Linguistic Metaphor Annotation in <fixed-case>M</fixed-case>exican <fixed-case>S</fixed-case>panish Popular Science Tweets</title>
       <author><first>Alec</first><last>Sánchez-Montero</last></author>
-      <author><first>Gemma</first><last>Bel-Enguix</last></author>
+      <author id="gemma-bel-enguix"><first>Gemma</first><last>Bel-Enguix</last></author>
       <author><first>Sergio-Luis</first><last>Ojeda-Trueba</last></author>
       <pages>59-64</pages>
       <abstract>Following previous work on metaphor annotation and automatic metaphor processing, this study presents the evaluation of an initial phase in the novel area of linguistic metaphor detection in Mexican Spanish popular science tweets. Specifically, we examine the challenges posed by the annotation process stemming from disagreement among annotators. During this phase of our work, we conducted the annotation of a corpus comprising 3733 Mexican Spanish popular science tweets. This corpus was divided into two halves and each half was then assigned to two different pairs of native Mexican Spanish-speaking annotators. Despite rigorous methodology and continuous training, inter-annotator agreement as measured by Cohen’s kappa was found to be low, slightly above chance levels, although the concordance percentage exceeded 60%. By elucidating the inherent complexity of metaphor annotation tasks, our evaluation emphasizes the implications of these findings and offers insights for future research in this field, with the aim of creating a robust dataset for machine learning.</abstract>
@@ -132,7 +132,7 @@
     <paper id="11">
       <title>An Expectation-Realization Model for Metaphor Detection</title>
       <author><first>Oseremen</first><last>Uduehi</last><affiliation>Ohio University</affiliation></author>
-      <author><first>Razvan</first><last>Bunescu</last><affiliation>University of North Carolina at Charlotte</affiliation></author>
+      <author id="razvan-bunescu"><first>Razvan</first><last>Bunescu</last><affiliation>University of North Carolina at Charlotte</affiliation></author>
       <pages>79-84</pages>
       <abstract>We propose a new model for metaphor detection in which an expectation component estimates representations of expected word meanings in a given context, whereas a realization component computes representations of target word meanings in context. We also introduce a systematic evaluation methodology that estimates generalization performance in three settings: within distribution, a new strong out of distribution setting, and a novel out-of-pretraining setting. Across all settings, the expectation-realization model obtains results that are competitive with or better than previous metaphor detection models.</abstract>
       <url hash="67229d07">2024.figlang-1.11</url>
diff --git a/data/xml/2024.findings.xml b/data/xml/2024.findings.xml
index 6c88ce69c2..d347061e37 100644
--- a/data/xml/2024.findings.xml
+++ b/data/xml/2024.findings.xml
@@ -78,7 +78,7 @@
       <author><first>Hyeonseok</first><last>Moon</last><affiliation>Korea University</affiliation></author>
       <author><first>Sugyeong</first><last>Eo</last><affiliation>Korea University</affiliation></author>
       <author><first>Chanhee</first><last>Lee</last><affiliation>NAVER</affiliation></author>
-      <author><first>Heuiseok</first><last>Lim</last><affiliation>Korea University</affiliation></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last><affiliation>Korea University</affiliation></author>
       <pages>67-78</pages>
       <abstract>The recent advancements in the realm of Automatic Speech Recognition (ASR) post-processing have been primarily driven by sequence-to-sequence paradigms. Despite their effectiveness, these methods often demand substantial amounts of data, necessitating the expensive recruitment of phonetic transcription experts to rectify the erroneous outputs of ASR systems, thereby creating the desired training data. Back TranScription (BTS) alleviates this issue by generating ASR inputs from clean text via a Text-to-Speech (TTS) system. While initial studies on BTS exhibited promise, they were constrained by a limited dataset of just 200,000 sentence pairs, leaving the scalability of this method in question. In this study, we delve into the potential scalability of BTS. We introduce the “Hyper-BTS” dataset, a corpus approximately five times larger than that utilized in prior research. Additionally, we present innovative criteria for categorizing error types within ASR post-processing. This not only facilitates a more comprehensive qualitative analysis, which was absent in preceding studies, but also enhances the understanding of ASR error patterns. Our empirical results, both quantitative and qualitative, suggest that the enlarged scale of the Hyper-BTS dataset sufficiently addresses a vast majority of the ASR error categories. We make the Hyper-BTS dataset publicly available.</abstract>
       <url hash="822de166">2024.findings-eacl.5</url>
@@ -150,7 +150,7 @@
     <paper id="11">
       <title>How Does In-Context Learning Help Prompt Tuning?</title>
       <author><first>Simeng</first><last>Sun</last><affiliation>University of Massachusetts, Amherst</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <author><first>Dan</first><last>Iter</last></author>
       <author><first>Chenguang</first><last>Zhu</last><affiliation>Zoom</affiliation></author>
       <author><first>Mohit</first><last>Iyyer</last><affiliation>University of Massachusetts Amherst</affiliation></author>
@@ -261,8 +261,8 @@
       <author><first>Peiqin</first><last>Lin</last><affiliation>Institut für Informatik</affiliation></author>
       <author><first>Chengzhi</first><last>Hu</last></author>
       <author><first>Zheyu</first><last>Zhang</last><affiliation>Center for Information and Language Processing</affiliation></author>
-      <author><first>Andre</first><last>Martins</last><affiliation>Instituto Superior Técnico and Unbabel</affiliation></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="andre-f-t-martins"><first>Andre</first><last>Martins</last><affiliation>Instituto Superior Técnico and Unbabel</affiliation></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <pages>276-310</pages>
       <abstract>Recent multilingual pretrained language models (mPLMs) have been shown to encode strong language-specific signals, which are not explicitly provided during pretraining. It remains an open question whether it is feasible to employ mPLMs to measure language similarity, and subsequently use the similarity results to select source languages for boosting cross-lingual transfer. To investigate this, we propose mPLM-Sim, a language similarity measure that induces the similarities across languages from mPLMs using multi-parallel corpora. Our study shows that mPLM-Sim exhibits moderately high correlations with linguistic similarity measures, such as lexicostatistics, genealogical language family, and geographical sprachbund. We also conduct a case study on languages with low correlation and observe that mPLM-Sim yields more accurate similarity results. Additionally, we find that similarity results vary across different mPLMs and different layers within an mPLM. We further investigate whether mPLM-Sim is effective for zero-shot cross-lingual transfer by conducting experiments on both low-level syntactic tasks and high-level semantic tasks. The experimental results demonstrate that mPLM-Sim is capable of selecting better source languages than linguistic measures, resulting in a 1%-2% improvement in zero-shot cross-lingual transfer performance.</abstract>
       <url hash="accddcb1">2024.findings-eacl.20</url>
@@ -294,7 +294,7 @@
     <paper id="22">
       <title>A Comprehensive Evaluation of Inductive Reasoning Capabilities and Problem Solving in Large Language Models</title>
       <author><first>Chen</first><last>Bowen</last></author>
-      <author><first>Rune</first><last>Sætre</last><affiliation>Norwegian University of Science and Technology</affiliation></author>
+      <author id="rune-saetre"><first>Rune</first><last>Sætre</last><affiliation>Norwegian University of Science and Technology</affiliation></author>
       <author><first>Yusuke</first><last>Miyao</last><affiliation>The University of Tokyo</affiliation></author>
       <pages>323-339</pages>
       <abstract>Inductive reasoning is fundamental to both human and artificial intelligence. The inductive reasoning abilities of current Large Language Models (LLMs) are evaluated in this research.We argue that only considering induction of rules is too narrow and unrealistic, since inductive reasoning is usually mixed with other abilities, like rules application, results/rules validation, and updated information integration.We probed the LLMs with a set of designed symbolic tasks and found that even state-of-the-art (SotA) LLMs fail significantly, showing the inability of LLMs to perform these intuitively simple tasks.Furthermore, we found that perfect accuracy in a small-size problem does not guarantee the same accuracy in a larger-size version of the same problem, provoking the question of how we can assess the LLMs’ actual problem-solving capabilities.We also argue that Chain-of-Thought prompts help the LLMs by decomposing the problem-solving process, but the LLMs still learn limitedly.Furthermore, we reveal that few-shot examples assist LLM generalization in out-of-domain (OOD) cases, albeit limited. The LLM starts to fail when the problem deviates from the provided few-shot examples.</abstract>
@@ -317,7 +317,7 @@
       <author><first>Zhuang</first><last>Li</last><affiliation>Monash University</affiliation></author>
       <author><first>Levon</first><last>Haroutunian</last><affiliation>Openstream, Inc.</affiliation></author>
       <author><first>Raj</first><last>Tumuluri</last><affiliation>Openstream Inc</affiliation></author>
-      <author><first>Philip</first><last>Cohen</last></author>
+      <author id="philip-r-cohen"><first>Philip</first><last>Cohen</last></author>
       <author><first>Reza</first><last>Haf</last><affiliation>Monash University</affiliation></author>
       <pages>347-354</pages>
       <abstract>Post-editing has proven effective in improving the quality of text generated by large language models (LLMs) such as GPT-3.5 or GPT-4, particularly when direct updating of their parameters to enhance text quality is infeasible or expensive. However, relying solely on smaller language models for post-editing can limit the LLMs’ ability to generalize across domains. Moreover, the editing strategies in these methods are not optimally designed for text generation tasks. To address these limitations, we propose a neural programmer-interpreter approach that preserves the domain generalization ability of LLMs while editing their output. The editing actions in this framework are specifically devised for text generation. Extensive experiments demonstrate that the programmer-interpreter significantly enhances GPT-3.5’s performance in logical form-to-text conversion and low-resource machine translation, surpassing other state-of-the-art (SOTA) LLM post-editing methods in cross-domain settings.</abstract>
@@ -366,7 +366,7 @@
       <title>Entity Linking in the Job Market Domain</title>
       <author><first>Mike</first><last>Zhang</last><affiliation>IT University of Copenhagen</affiliation></author>
       <author><first>Rob</first><last>van der Goot</last></author>
-      <author><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München and IT University of Copenhagen</affiliation></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München and IT University of Copenhagen</affiliation></author>
       <pages>410-419</pages>
       <abstract>In Natural Language Processing, entity linking (EL) has centered around Wikipedia, but yet remains underexplored for the job market domain. Disambiguating skill mentions can help us get insight into the current labor market demands. In this work, we are the first to explore EL in this domain, specifically targeting the linkage of occupational skills to the ESCO taxonomy (le Vrang et al., 2014). Previous efforts linked coarse-grained (full) sentences to a corresponding ESCO skill. In this work, we link more fine-grained span-level mentions of skills. We tune two high-performing neural EL models, a bi-encoder (Wu et al., 2020) and an autoregressive model (Cao et al., 2021), on a synthetically generated mention–skill pair dataset and evaluate them on a human-annotated skill-linking benchmark. Our findings reveal that both models are capable of linking implicit mentions of skills to their correct taxonomy counterparts. Empirically, BLINK outperforms GENRE in strict evaluation, but GENRE performs better in loose evaluation (accuracy@k).</abstract>
       <url hash="21d3a5dd">2024.findings-eacl.28</url>
@@ -610,7 +610,7 @@
       <author><first>Rose</first><last>Wang</last><affiliation>Stanford University</affiliation></author>
       <author><first>Pawan</first><last>Wirawarn</last></author>
       <author><first>Omar</first><last>Khattab</last></author>
-      <author><first>Noah</first><last>Goodman</last><affiliation>Stanford University</affiliation></author>
+      <author id="noah-goodman"><first>Noah</first><last>Goodman</last><affiliation>Stanford University</affiliation></author>
       <author><first>Dorottya</first><last>Demszky</last><affiliation>Stanford University</affiliation></author>
       <pages>722-735</pages>
       <abstract>Many online content portals allow users to ask questions to supplement their understanding (e.g., of lectures). While information retrieval (IR) systems may provide answers for such user queries, they do not directly assist content creators—such as lecturers who want to improve their content—identify segments that caused a user to ask those questions.We introduce the task of backtracing, in which systems retrieve the text segment that most likely caused a user query.We formalize three real-world domains for which backtracing is important in improving content delivery and communication: understanding the cause of (a) student confusion in the Lecture domain, (b) reader curiosity in the News Article domain, and (c) user emotion in the Conversation domain.We evaluate the zero-shot performance of popular information retrieval methods and language modeling methods, including bi-encoder, re-ranking and likelihood-based methods and ChatGPT.While traditional IR systems retrieve semantically relevant information (e.g., details on “projection matrices” for a query “does projecting multiple times still lead to the same point?”), they often miss the causally relevant context (e.g., the lecturer states “projecting twice gets me the same answer as one projection”). Our results show that there is room for improvement on backtracing and it requires new retrieval approaches.We hope our benchmark serves to improve future retrieval systems for backtracing, spawning systems that refine content generation and identify linguistic triggers influencing user queries.</abstract>
@@ -780,8 +780,8 @@
       <author><first>Yuxia</first><last>Wang</last></author>
       <author><first>Haonan</first><last>Li</last></author>
       <author><first>Xudong</first><last>Han</last><affiliation>University of Melbourne</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
-      <author><first>Timothy</first><last>Baldwin</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and The University of Melbourne</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and The University of Melbourne</affiliation></author>
       <pages>896-911</pages>
       <abstract>With the rapid evolution of large language models (LLMs), new and hard-to-predict harmful capabilities are emerging. This requires developers to identify potential risks through the evaluation of “dangerous capabilities” in order to responsibly deploy LLMs. Here we aim to facilitate this process. In particular, we collect an open-source dataset to evaluate the safeguards in LLMs, to facilitate the deployment of safer open-source LLMs at a low cost. Our dataset is curated and filtered to consist only of instructions that responsible language models should not follow. We assess the responses of six popular LLMs to these instructions, and we find that simple BERT-style classifiers can achieve results that are comparable to GPT-4 on automatic safety evaluation. Our data and code are available at https://github.com/Libr-AI/do-not-answer</abstract>
       <url hash="3329c299">2024.findings-eacl.61</url>
@@ -826,7 +826,7 @@
       <title>Rethinking <fixed-case>STS</fixed-case> and <fixed-case>NLI</fixed-case> in Large Language Models</title>
       <author><first>Yuxia</first><last>Wang</last></author>
       <author><first>Minghan</first><last>Wang</last><affiliation>Monash University</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>965-982</pages>
       <abstract>Recent years, have seen the rise of large language models (LLMs), where practitioners use task-specific prompts; this was shown to be effective for a variety of tasks. However, when applied to semantic textual similarity (STS) and natural language inference (NLI), the effectiveness of LLMs turns out to be limited by low-resource domain accuracy, model overconfidence, and difficulty to capture the disagreements between human judgements. With this in mind, here we try to rethink STS and NLI in the era of LLMs. We first evaluate the performance of STS and NLI in the clinical/biomedical domain, and then we assess LLMs’ predictive confidence and their capability of capturing collective human opinions. We find that these old problems are still to be properly addressed in the era of LLMs.</abstract>
       <url hash="4cbaa3d3">2024.findings-eacl.65</url>
@@ -837,7 +837,7 @@
       <title>Learning High-Quality and General-Purpose Phrase Representations</title>
       <author><first>Lihu</first><last>Chen</last></author>
       <author><first>Gael</first><last>Varoquaux</last><affiliation>INRIA</affiliation></author>
-      <author><first>Fabian</first><last>Suchanek</last><affiliation>Telecom Paris</affiliation></author>
+      <author id="fabian-suchanek"><first>Fabian</first><last>Suchanek</last><affiliation>Telecom Paris</affiliation></author>
       <pages>983-994</pages>
       <abstract>Phrase representations play an important role in data science and natural language processing, benefiting various tasks like Entity Alignment, Record Linkage, Fuzzy Joins, and Paraphrase Classification.The current state-of-the-art method involves fine-tuning pre-trained language models for phrasal embeddings using contrastive learning. However, we have identified areas for improvement. First, these pre-trained models tend to be unnecessarily complex and require to be pre-trained on a corpus with context sentences.Second, leveraging the phrase type and morphology gives phrase representations that are both more precise and more flexible.We propose an improved framework to learn phrase representations in a context-free fashion.The framework employs phrase type classification as an auxiliary task and incorporates character-level information more effectively into the phrase representation.Furthermore, we design three granularities of data augmentation to increase the diversity of training samples.Our experiments across a wide range of tasks reveal that our approach generates superior phrase embeddings compared to previous methods while requiring a smaller model size.</abstract>
       <url hash="da034f83">2024.findings-eacl.66</url>
@@ -850,7 +850,7 @@
       <author><first>Ruochen</first><last>Zhao</last></author>
       <author><first>Tan</first><last>Wang</last></author>
       <author><first>Yongjie</first><last>Wang</last><affiliation>School of Computer Science and Engineering, Nanyang Technological University</affiliation></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>SalesForce.com and Nanyang Technological University</affiliation></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>SalesForce.com and Nanyang Technological University</affiliation></author>
       <pages>995-1012</pages>
       <abstract>To encourage fairness and transparency, there exists an urgent demand for deriving reliable explanations for large language models (LLMs). One promising solution is concept-based explanations, i.e., human-understandable concepts from internal representations. However, due to the compositional nature of languages, current methods mostly discover correlational explanations instead of causal features. Therefore, we propose a novel framework to provide impact-aware explanations for users to understand the LLM’s behavior, which are robust to feature changes and influential to the model’s predictions. Specifically, we extract predictive high-level features (concepts) from the model’s hidden layer activations. Then, we innovatively optimize for features whose existence causes the output predictions to change substantially. Extensive experiments on real and synthetic tasks demonstrate that our method achieves superior results on predictive impact, explainability, and faithfulness compared to the baselines, especially for LLMs.</abstract>
       <url hash="5970a8fc">2024.findings-eacl.67</url>
@@ -875,7 +875,7 @@
       <author><first>Miaoran</first><last>Li</last><affiliation>Iowa State University</affiliation></author>
       <author><first>Youbiao</first><last>He</last><affiliation>Iowa State University</affiliation></author>
       <author><first>Yinfei</first><last>Yang</last><affiliation>Apple</affiliation></author>
-      <author><first>Forrest</first><last>Bao</last><affiliation>Iowa State University, Iowa State University and Iowa State University</affiliation></author>
+      <author id="forrest-bao"><first>Forrest</first><last>Bao</last><affiliation>Iowa State University, Iowa State University and Iowa State University</affiliation></author>
       <pages>1026-1037</pages>
       <abstract>Factual consistency detection has gotten raised attention in the task of abstractive summarization. Many existing works rely on synthetic training data, which may not accurately reflect or match the inconsistencies produced by summarization models. In this paper, we first systematically analyze the shortcomings of the current methods in synthesizing inconsistent summaries. Current synthesis methods may fail to produce inconsistencies of coreference errors and discourse errors, per our quantitative and qualitative study. Then, employing the parameter-efficient finetuning (PEFT) technique, we discover that a competitive factual consistency detector can be achieved using thousands of real model-generated summaries with human annotations. Our study demonstrates the importance of real machine-generated texts with human annotation in NLG evaluation as our model outperforms the SOTA on the CoGenSumm, FactCC, Frank, and SummEval datasets.</abstract>
       <url hash="814be2d6">2024.findings-eacl.69</url>
@@ -887,7 +887,7 @@
       <author><first>Lingzhi</first><last>Wang</last></author>
       <author><first>Xiaoyan</first><last>Zhao</last><affiliation>Chinese University of Hong Kong, The Chinese University of Hong Kong</affiliation></author>
       <author><first>Jing</first><last>Li</last><affiliation>The Hong Kong Polytechnic University</affiliation></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <pages>1038-1050</pages>
       <abstract>This study focuses on media bias detection, crucial in today’s era of influential social media platforms shaping individual attitudes and opinions. In contrast to prior work that primarily relies on training specific models tailored to particular datasets, resulting in limited adaptability and subpar performance on out-of-domain data, we introduce a general bias detection framework, IndiVec, built upon large language models. IndiVec begins by constructing a fine-grained media bias database, leveraging the robust instruction-following capabilities of large language models and vector database techniques. When confronted with new input for bias detection, our framework automatically selects the most relevant indicator from the vector database and employs majority voting to determine the input’s bias label. IndiVec excels compared to previous methods due to its adaptability (demonstrating consistent performance across diverse datasets from various sources) and explainability (providing explicit top-k indicators to interpret bias predictions). Experimental results on four political bias datasets highlight IndiVec’s significant superiority over baselines. Furthermore, additional experiments and analysis provide profound insights into the framework’s effectiveness.</abstract>
       <url hash="05911d09">2024.findings-eacl.70</url>
@@ -939,7 +939,7 @@
       <author><first>Nitin</first><last>Ramrakhiyani</last><affiliation>International Institute of Information Technology, Hyderabad and Tata Consultancy Services Limited, India</affiliation></author>
       <author><first>Anubhav</first><last>Sinha</last><affiliation>Tata Consultancy Services Limited, India</affiliation></author>
       <author><first>Manoj</first><last>Apte</last></author>
-      <author><first>Girish</first><last>Palshikar</last></author>
+      <author id="girish-palshikar"><first>Girish</first><last>Palshikar</last></author>
       <pages>1099-1114</pages>
       <abstract>In this paper, we propose a novel two-step technique for text classification using autoregressive Language Models (LM). In the first step, a set of perplexity and log-likelihood based numeric features are elicited from an LM for a text instance to be classified. Then, in the second step, a classifier based on these features is trained to predict the final label. The classifier used is usually a simple machine learning classifier like Support Vector Machine (SVM) or Logistic Regression (LR) and it is trained using a small set of training examples. We believe, our technique presents a whole new way of exploiting the available training instances, in addition to the existing ways like fine-tuning LMs or in-context learning. Our approach stands out by eliminating the need for parameter updates in LMs, as required in fine-tuning, and does not impose limitations on the number of training examples faced while building prompts for in-context learning. We evaluate our technique across 5 different datasets and compare with multiple competent baselines.</abstract>
       <url hash="c83c1003">2024.findings-eacl.74</url>
@@ -959,7 +959,7 @@
     <paper id="76">
       <title>Improving Multimodal Classification of Social Media Posts by Leveraging Image-Text Auxiliary Tasks</title>
       <author><first>Danae</first><last>Sanchez Villegas</last></author>
-      <author><first>Daniel</first><last>Preotiuc-Pietro</last><affiliation>Bloomberg</affiliation></author>
+      <author id="daniel-preotiuc-pietro"><first>Daniel</first><last>Preotiuc-Pietro</last><affiliation>Bloomberg</affiliation></author>
       <author><first>Nikolaos</first><last>Aletras</last><affiliation>University of Sheffield, University of Sheffield and Amazon</affiliation></author>
       <pages>1126-1137</pages>
       <abstract>Effectively leveraging multimodal information from social media posts is essential to various downstream tasks such as sentiment analysis, sarcasm detection or hate speech classification. Jointly modeling text and images is challenging because cross-modal semantics might be hidden or the relation between image and text is weak. However, prior work on multimodal classification of social media posts has not yet addressed these challenges. In this work, we present an extensive study on the effectiveness of using two auxiliary losses jointly with the main task during fine-tuning multimodal models. First, Image-Text Contrastive (ITC) is designed to minimize the distance between image-text representations within a post, thereby effectively bridging the gap between posts where the image plays an important role in conveying the post’s meaning. Second, Image-Text Matching (ITM) enhances the model’s ability to understand the semantic relationship between images and text, thus improving its capacity to handle ambiguous or loosely related posts. We combine these objectives with five multimodal models, demonstrating consistent improvements of up to 2.6 F1 score across five diverse social media datasets. Our comprehensive analysis shows the specific scenarios where each auxiliary task is most effective.</abstract>
@@ -983,7 +983,7 @@
       <title><fixed-case>I</fixed-case>ndi<fixed-case>F</fixed-case>ood<fixed-case>VQA</fixed-case>: Advancing Visual Question Answering and Reasoning with a Knowledge-Infused Synthetic Data Generation Pipeline</title>
       <author><first>Pulkit</first><last>Agarwal</last><affiliation>Indian Institute of Technology Bombay</affiliation></author>
       <author><first>Settaluri</first><last>Sravanthi</last><affiliation>Indian Institute of Technology Bombay, Indian Institute of Technology, Bombay</affiliation></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
       <pages>1158-1176</pages>
       <abstract>Large Vision Language Models (VLMs) like GPT-4, LLaVA, and InstructBLIP exhibit extraordinary capabilities for both knowledge understanding and reasoning. However, the reasoning capabilities of such models on sophisticated problems that require external knowledge of a specific domain have not been assessed well, due to the unavailability of necessary datasets. In this work, we release a first-of-its-kind dataset called IndiFoodVQA with around 16.7k data samples, consisting of explicit knowledge-infused questions, answers, and reasons. We also release IndiFoodKG, a related Knowledge Graph (KG) with 79k triples. The data has been created with minimal human intervention via an automated pipeline based on InstructBlip and GPT-3.5. We also present a methodology to extract knowledge from the KG and use it to both answer and reason upon the questions. We employ different models to report baseline zero-shot and fine-tuned results. Fine-tuned VLMs on our data showed an improvement of ~25% over the corresponding base model, highlighting the fact that current VLMs need domain-specific fine-tuning to excel in specialized settings. Our findings reveal that (1) explicit knowledge infusion during question generation helps in making questions that have more grounded knowledge, and (2) proper knowledge retrieval can often lead to better-answering potential in such cases. The data and code is available at https://github.com/SLSravanthi/IndifoodVQA.</abstract>
       <url hash="f3aa97e4">2024.findings-eacl.78</url>
@@ -1065,7 +1065,7 @@
       <author><first>Lifu</first><last>Tu</last><affiliation>Salesforce AI Research</affiliation></author>
       <author><first>Jin</first><last>Qu</last><affiliation>Salesforce AI Research</affiliation></author>
       <author><first>Semih</first><last>Yavuz</last><affiliation>SalesForce.com</affiliation></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>SalesForce.com and Nanyang Technological University</affiliation></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>SalesForce.com and Nanyang Technological University</affiliation></author>
       <author><first>Wenhao</first><last>Liu</last><affiliation>Faire</affiliation></author>
       <author><first>Caiming</first><last>Xiong</last><affiliation>Salesforce Research</affiliation></author>
       <author><first>Yingbo</first><last>Zhou</last><affiliation>Salesforce Research</affiliation></author>
@@ -1106,7 +1106,7 @@
       <author><first>Jennifer</first><last>Hsia</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Danish</first><last>Pruthi</last><affiliation>Indian Institute of Science, Bangalore</affiliation></author>
       <author><first>Aarti</first><last>Singh</last><affiliation>Carnegie Mellon University</affiliation></author>
-      <author><first>Zachary</first><last>Lipton</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="zachary-c-lipton"><first>Zachary</first><last>Lipton</last><affiliation>Carnegie Mellon University</affiliation></author>
       <pages>1322-1335</pages>
       <abstract>Despite the rising popularity of saliency-based explanations, the research community remains at an impasse, facing doubts concerning their purpose, efficacy, and tendency to contradict each other. Seeking to unite the community’s efforts around common goals, several recent works have proposed evaluation metrics. In this paper, we critically examine two sets of metrics: the ERASER metrics (comprehensiveness and sufficiency) and the EVAL-X metrics, focusing our inquiry on natural language processing. First, we show that we can inflate a model’s comprehensiveness and sufficiency scores dramatically without altering its predictions or explanations on in-distribution test inputs. Our strategy exploits the tendency for extracted explanations and their complements to be “out-of-support” relative to each other and in-distribution inputs. Next, we demonstrate that the EVAL-X metrics can be inflated arbitrarily by a simple method that encodes the label, even though EVAL-X is precisely motivated to address such exploits. Our results raise doubts about the ability of current metrics to guide explainability research, underscoring the need for a broader reassessment of what precisely these metrics are intended to capture.</abstract>
       <url hash="e6cc8838">2024.findings-eacl.88</url>
@@ -1168,7 +1168,7 @@
     </paper>
     <paper id="93">
       <title>Minimal Distillation Schedule for Extreme Language Model Compression</title>
-      <author id="chen-zhang"><first>Chen</first><last>Zhang</last><affiliation>Beijing Institute of Technology</affiliation></author>
+      <author><first>Chen</first><last>Zhang</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <author><first>Yang</first><last>Yang</last></author>
       <author><first>Qifan</first><last>Wang</last><affiliation>Meta AI</affiliation></author>
       <author><first>Jiahao</first><last>Liu</last></author>
@@ -1204,7 +1204,7 @@
     <paper id="96">
       <title>Aspect-based Key Point Analysis for Quantitative Summarization of Reviews</title>
       <author><first>An Quang</first><last>Tang</last></author>
-      <author><first>Xiuzhen</first><last>Zhang</last></author>
+      <author id="xiuzhen-jenny-zhang"><first>Xiuzhen</first><last>Zhang</last></author>
       <author><first>Minh Ngoc</first><last>Dinh</last></author>
       <pages>1419-1433</pages>
       <abstract>Key Point Analysis (KPA) is originally for summarizing arguments, where short sentences containing salient viewpoints are extracted as key points (KPs) and quantified for their prevalence as salience scores. Recently, KPA was applied to summarize reviews, but the study still relies on sentence-based KP extraction and matching, which leads to two issues: sentence-based extraction can result in KPs of overlapping opinions on the same aspects, and sentence-based matching of KP to review comment can be inaccurate, resulting in inaccurate salience scores. To address the above issues, in this paper, we propose Aspect-based Key Point Analysis (ABKPA), a novel framework for quantitative review summarization. Leveraging the readily available aspect-based sentiment analysis (ABSA) resources of reviews to automatically annotate silver labels for matching aspect-sentiment pairs, we propose a contrastive learning model to effectively match KPs to reviews and quantify KPs at the aspect level. Especially, the framework ensures extracting KP of distinct aspects and opinions, leading to more accurate opinion quantification. Experiments on five business categories of the popular Yelp review dataset show that ABKPA outperforms state-of-the-art baselines. Source code and data are available at: https://github.com/antangrocket1312/ABKPA</abstract>
@@ -1217,7 +1217,7 @@
       <author><first>Danilo</first><last>Carvalho</last><affiliation>University of Manchester</affiliation></author>
       <author><first>Marco</first><last>Valentino</last></author>
       <author><first>Ian</first><last>Pratt-Hartmann</last><affiliation>University of Manchester, University of Manchester</affiliation></author>
-      <author><first>Andre</first><last>Freitas</last><affiliation>University of Manchester</affiliation></author>
+      <author id="andre-freitas"><first>Andre</first><last>Freitas</last><affiliation>University of Manchester</affiliation></author>
       <pages>1434-1450</pages>
       <abstract>Achieving precise semantic control over the latent spaces of Variational AutoEncoders (VAEs) holds significant value for downstream tasks in NLP as the underlying generative mechanisms could be better localised, explained and improved upon. Recent research, however, has struggled to achieve consistent results, primarily due to the inevitable loss of semantic information in the variational bottleneck and limited control over the decoding mechanism. To overcome these challenges, we investigate discrete latent spaces in Vector Quantized Variational AutoEncoder (VQVAE) to improve semantic control and generation in Transformer-based VAEs. In particular, We propose T5VQVAE, a novel model that leverages the controllability of VQVAE to guide the self-attention mechanism in T5, exploiting its full generalization capabilities. Experimental results indicate that T5VQVAE outperforms existing state-of-the-art VAE models, including Optimus, in terms of control and preservation of semantic information across different tasks such as auto-encoding of sentences and mathematical expressions, text transfer, and inference. Moreover, T5VQVAE exhibits improved reasoning capabilities, suggesting potential applications for downstream natural language and symbolic inference tasks.</abstract>
       <url hash="170b94f0">2024.findings-eacl.97</url>
@@ -1226,7 +1226,7 @@
     <paper id="98">
       <title>High-quality Data-to-Text Generation for Severely Under-Resourced Languages with Out-of-the-box Large Language Models</title>
       <author><first>Michela</first><last>Lorandi</last><affiliation>Dublin City University</affiliation></author>
-      <author><first>Anya</first><last>Belz</last><affiliation>Dublin City University and University of Aberdeen</affiliation></author>
+      <author id="anja-belz"><first>Anya</first><last>Belz</last><affiliation>Dublin City University and University of Aberdeen</affiliation></author>
       <pages>1451-1461</pages>
       <abstract>The performance of NLP methods for severely under-resourced languages cannot currently hope to match the state of the art in NLP methods for well resourced languages. We explore the extent to which pretrained large language models (LLMs) can bridge this gap, via the example of data-to-text generation for Irish, Welsh, Breton and Maltese. We test LLMs on these under-resourced languages and English, in a range of scenarios. We find that LLMs easily set the state of the art for the under-resourced languages by substantial margins, as measured by both automatic and human evaluations. For all our languages, human evaluation shows on-a-par performance with humans for our best systems, but BLEU scores collapse compared to English, casting doubt on the metric’s suitability for evaluating non-task-specific systems. Overall, our results demonstrate the great potential of LLMs to bridge the performance gap for under-resourced languages.</abstract>
       <url hash="6a16680d">2024.findings-eacl.98</url>
@@ -1335,7 +1335,7 @@
     <paper id="106">
       <title>Assessing the Portability of Parameter Matrices Trained by Parameter-Efficient Finetuning Methods</title>
       <author><first>Mohammed</first><last>Sabry</last></author>
-      <author><first>Anya</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anya</first><last>Belz</last></author>
       <pages>1548-1556</pages>
       <abstract>As the cost of training ever larger language models has grown, so has the interest in reusing previously learnt knowledge. Transfer learning methods have shown how reusing non-task-specific knowledge can help in subsequent task-specific learning.In this paper, we investigate the inverse: porting whole functional modules that encode task-specific knowledge from one model to another. We designed a study comprising 1,440 training/testing runs to test the portability of modules trained by parameter-efficient finetuning (PEFT) techniques, using sentiment analysis as an example task. We test portability in a wide range of scenarios, involving different PEFT techniques and different pretrained host models, among other dimensions. We compare the performance of ported modules with that of equivalent modules trained (i) from scratch, and (ii) from parameters sampled from the same distribution as the ported module.We find that the ported modules far outperform the two alternatives tested, but that there are interesting differences between the four PEFT techniques tested.We conclude that task-specific knowledge in the form of structurally modular sets of parameters as produced by PEFT techniques is highly portable, but that degree of success depends on type of PEFT and on differences between originating and receiving pretrained models.</abstract>
       <url hash="4e5f93e8">2024.findings-eacl.106</url>
@@ -1428,7 +1428,7 @@
       <title>Solving <fixed-case>NLP</fixed-case> Problems through Human-System Collaboration: A Discussion-based Approach</title>
       <author><first>Masahiro</first><last>Kaneko</last><affiliation>Tokyo Institute of Technology</affiliation></author>
       <author><first>Graham</first><last>Neubig</last><affiliation>Carnegie Mellon University</affiliation></author>
-      <author><first>Naoaki</first><last>Okazaki</last><affiliation>Tokyo Institute of Technology</affiliation></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last><affiliation>Tokyo Institute of Technology</affiliation></author>
       <pages>1644-1658</pages>
       <abstract>Humans work together to solve common problems by having discussions, explaining, and agreeing or disagreeing with each other.Similarly, if a system can have discussions with human partners when solving tasks, it has the potential to improve the system’s performance and reliability.In previous research on explainability, it has only been possible for systems to make predictions and for humans to ask questions about them, rather than having a mutual exchange of opinions.This research aims to create a dataset and computational framework for systems that discuss and refine their predictions through dialogue. Through experiments, we show that the proposed system can have beneficial discussions with humans, improving the accuracy by up to 25 points on a natural language inference task.</abstract>
       <url hash="e5d679ba">2024.findings-eacl.114</url>
@@ -1449,7 +1449,7 @@
     <paper id="116">
       <title><fixed-case>CMA</fixed-case>-<fixed-case>R</fixed-case>: Causal Mediation Analysis for Explaining Rumour Detection</title>
       <author><first>Lin</first><last>Tian</last><affiliation>Royal Melbourne Institute of Technology</affiliation></author>
-      <author><first>Xiuzhen</first><last>Zhang</last><affiliation>Royal Melbourne Institute of Technology</affiliation></author>
+      <author id="xiuzhen-jenny-zhang"><first>Xiuzhen</first><last>Zhang</last><affiliation>Royal Melbourne Institute of Technology</affiliation></author>
       <author><first>Jey Han</first><last>Lau</last><affiliation>The University of Melbourne</affiliation></author>
       <pages>1667-1675</pages>
       <abstract>We apply causal mediation analysis to explain the decision-making process of neural models for rumour detection on Twitter.Interventions at the input and network level reveal the causal impacts of tweets and words in the model output.We find that our approach CMA-R – Causal Mediation Analysis for Rumour detection – identifies salient tweets that explain model predictions and show strong agreement with human judgements for critical tweets determining the truthfulness of stories.CMA-R can further highlight causally impactful words in the salient tweets, providing another layer of interpretability and transparency into these blackbox rumour detection systems. Code is available at: https://github.com/ltian678/cma-r.</abstract>
@@ -1459,7 +1459,7 @@
     <paper id="117">
       <title>Morphology Aware Source Term Masking for Terminology-Constrained <fixed-case>NMT</fixed-case></title>
       <author><first>Ander</first><last>Corral</last><affiliation>Orai NLP Technologies</affiliation></author>
-      <author><first>Xabier</first><last>Saralegi</last></author>
+      <author id="xabier-saralegi"><first>Xabier</first><last>Saralegi</last></author>
       <pages>1676-1688</pages>
       <abstract>Terminology-constrained NMT systems facilitate the forced translation of domain-specific vocabulary. A notable method in this context is the “copy-and-inflect” approach, which appends the target term lemmas of constraints to their corresponding source terms in the input sentence. In this work, we propose a novel adaptation of the “copy-and-inflect” method, referred to as “morph-masking”. Our method involves masking the source terms of the constraints from the input sentence while retaining essential grammatical information. Our approach is based on the hypothesis that “copy-and-inflect” systems have access to both source and target terms, allowing them to generate the correct surface form of the constraint by either translating the source term itself or properly inflecting the target term lemma. Through extensive validation of our method in two translation directions with different levels of source morphological complexity, Basque to Spanish and English to German, we have demonstrated that “morph-masking” is capable of providing a harder constraint signal, resulting in a notable improvement over the “copy-and-inflect” method (up to 38% in term accuracy), especially in challenging constraint scenarios.</abstract>
       <url hash="f037957d">2024.findings-eacl.117</url>
@@ -1526,7 +1526,7 @@
       <title>Contextualized Topic Coherence Metrics</title>
       <author><first>Hamed</first><last>Rahimi</last></author>
       <author><first>David</first><last>Mimno</last><affiliation>Cornell University and Cornell University</affiliation></author>
-      <author><first>Jacob</first><last>Hoover</last><affiliation>McGill University</affiliation></author>
+      <author id="jacob-hoover-vigly"><first>Jacob</first><last>Hoover</last><affiliation>McGill University</affiliation></author>
       <author><first>Hubert</first><last>Naacke</last><affiliation>Sorbonne Université</affiliation></author>
       <author><first>Camelia</first><last>Constantin</last></author>
       <author><first>Bernd</first><last>Amann</last><affiliation>Sorbonne Université</affiliation></author>
@@ -1592,7 +1592,7 @@
       <title>Non-Exchangeable Conformal Language Generation with Nearest Neighbors</title>
       <author><first>Dennis</first><last>Ulmer</last></author>
       <author><first>Chrysoula</first><last>Zerva</last><affiliation>Instituto Superior Técnico</affiliation></author>
-      <author><first>Andre</first><last>Martins</last><affiliation>Instituto Superior Técnico and Unbabel</affiliation></author>
+      <author id="andre-f-t-martins"><first>Andre</first><last>Martins</last><affiliation>Instituto Superior Técnico and Unbabel</affiliation></author>
       <pages>1909-1929</pages>
       <abstract>Quantifying uncertainty in automatically generated text is important for letting humans check potential hallucinations and making systems more reliable. Conformal prediction is an attractive framework to provide predictions imbued with statistical guarantees, however, its application to text generation is challenging since any i.i.d. assumptions are not realistic. In this paper, we bridge this gap by leveraging recent results on *non-exchangeable* conformal prediction, which still ensures bounds on coverage. The result, *non-exchangeable conformal nucleus sampling*, is a novel extension of the conformal prediction framework to generation based on nearest neighbors. Our method can be used post-hoc for an arbitrary model without extra training and supplies token-level, calibrated prediction sets equipped with statistical guarantees. Experiments in machine translation and language modeling show encouraging results in generation quality. By also producing tighter prediction sets with good coverage, we thus give a more theoretically principled way to perform sampling with conformal guarantees.</abstract>
       <url hash="74ae0a2d">2024.findings-eacl.129</url>
@@ -1736,7 +1736,7 @@
       <title>Cross-lingual Editing in Multilingual Language Models</title>
       <author><first>Himanshu</first><last>Beniwal</last><affiliation>Indian Institute of Technology Gandhinagar</affiliation></author>
       <author><first>Kowsik</first><last>D</last></author>
-      <author id="mayank-singh"><first>Mayank</first><last>Singh</last><affiliation>Indian Institute of Technology Gandhinagar</affiliation></author>
+      <author><first>Mayank</first><last>Singh</last><affiliation>Indian Institute of Technology Gandhinagar</affiliation></author>
       <pages>2078-2128</pages>
       <abstract>The training of large language models (LLMs) necessitates substantial data and computational resources, and updating outdated LLMs entails significant efforts and resources. While numerous model editing techniques (METs) have emerged to efficiently update model outputs without retraining, their effectiveness in multilingual LLMs, where knowledge is stored in diverse languages, remains an underexplored research area. This research paper introduces the cross-lingual model editing (XME) paradigm, wherein a fact is edited in one language, and the subsequent update propagation is observed across other languages. To investigate the XME paradigm, we conducted experiments using BLOOM, mBERT, and XLM-RoBERTa using the two writing scripts: Latin (English, French, and Spanish) and Indic (Hindi, Gujarati, and Bengali). The results reveal notable performance limitations of state-of-the-art METs under the XME setting, mainly when the languages involved belong to two distinct script families. These findings highlight the need for further research and development of XME techniques to address these challenges. For more comprehensive information, the dataset used in this research and the associated code are publicly available at the following [URL](https://github.com/lingo-iitgn/XME).</abstract>
       <url hash="4343bb32">2024.findings-eacl.140</url>
@@ -1774,7 +1774,7 @@
       <author><first>Yichen</first><last>Jiang</last></author>
       <author><first>Marco</first><last>Vecchio</last></author>
       <author><first>Mohit</first><last>Bansal</last><affiliation>University of North Carolina at Chapel Hill</affiliation></author>
-      <author><first>Anders</first><last>Johannsen</last></author>
+      <author id="anders-johannsen"><first>Anders</first><last>Johannsen</last></author>
       <pages>2162-2174</pages>
       <url hash="8c1adec3">2024.findings-eacl.143</url>
       <bibkey>jiang-etal-2024-hierarchical</bibkey>
@@ -1784,9 +1784,9 @@
       <title>Fine-tuning <fixed-case>CLIP</fixed-case> Text Encoders with Two-step Paraphrasing</title>
       <author><first>Hyunjae</first><last>Kim</last><affiliation>Korea University</affiliation></author>
       <author><first>Seunghyun</first><last>Yoon</last><affiliation>Adobe Research</affiliation></author>
-      <author><first>Trung</first><last>Bui</last><affiliation>Adobe Research</affiliation></author>
+      <author id="trung-bui"><first>Trung</first><last>Bui</last><affiliation>Adobe Research</affiliation></author>
       <author><first>Handong</first><last>Zhao</last><affiliation>Adobe Systems</affiliation></author>
-      <author><first>Quan</first><last>Tran</last><affiliation>servicenow</affiliation></author>
+      <author id="quan-hung-tran"><first>Quan</first><last>Tran</last><affiliation>servicenow</affiliation></author>
       <author><first>Franck</first><last>Dernoncourt</last><affiliation>Adobe Systems</affiliation></author>
       <author><first>Jaewoo</first><last>Kang</last><affiliation>Korea University</affiliation></author>
       <pages>2175-2184</pages>
@@ -1801,7 +1801,7 @@
       <author><first>Sugyeong</first><last>Eo</last><affiliation>Korea University</affiliation></author>
       <author><first>Chanjun</first><last>Park</last><affiliation>Upstage</affiliation></author>
       <author><first>Jaehyung</first><last>Seo</last></author>
-      <author><first>Heuiseok</first><last>Lim</last><affiliation>Korea University</affiliation></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last><affiliation>Korea University</affiliation></author>
       <pages>2185-2196</pages>
       <abstract>Educational question-answer generation has been extensively researched owing to its practical applicability. However, we have identified a persistent challenge concerning the evaluation of such systems. Existing evaluation methods often fail to produce objective results and instead exhibit a bias towards favoring high similarity to the ground-truth question-answer pairs. In this study, we demonstrate that these evaluation methods yield low human alignment and propose an alternative approach called Generative Interpretation (GI) to achieve more objective evaluations. Through experimental analysis, we reveal that GI outperforms existing evaluation methods in terms of human alignment, and even shows comparable performance with GPT3.5, only with BART-large.</abstract>
       <url hash="7a40f6e9">2024.findings-eacl.145</url>
@@ -1877,7 +1877,7 @@
       <title>Joint Inference of Retrieval and Generation for Passage Re-ranking</title>
       <author><first>Wei</first><last>Fang</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <author><first>Yung-Sung</first><last>Chuang</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
-      <author><first>James</first><last>Glass</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
       <pages>2289-2298</pages>
       <abstract>Passage retrieval is a crucial component of modern open-domain question answering (QA) systems, providing information for downstream QA components to generate accurate and transparent answers. In this study we focus on passage re-ranking, proposing a simple yet effective method, Joint Passage Re-ranking (JPR), that optimizes the mutual information between query and passage distributions, integrating both cross-encoders and generative models in the re-ranking process. Experimental results demonstrate that JPR outperforms conventional re-rankers and language model scorers in both open-domain QA retrieval settings and diverse retrieval benchmarks under zero-shot settings.</abstract>
       <url hash="b861c24e">2024.findings-eacl.151</url>
@@ -1922,7 +1922,7 @@
       <author><first>Maxime</first><last>Fily</last></author>
       <author><first>Guillaume</first><last>Wisniewski</last><affiliation>LLF / Université Paris Cité</affiliation></author>
       <author><first>Severine</first><last>Guillaume</last><affiliation>CNRS</affiliation></author>
-      <author><first>Gilles</first><last>Adda</last><affiliation>CNRS</affiliation></author>
+      <author id="gilles-adda"><first>Gilles</first><last>Adda</last><affiliation>CNRS</affiliation></author>
       <author><first>Alexis</first><last>Michaud</last><affiliation>CNRS</affiliation></author>
       <pages>2332-2341</pages>
       <abstract>In the highly constrained context of low-resource language studies, we explore vector representations of speech from a pretrained model to determine their level of abstraction with regard to the audio signal. We propose a new unsupervised method using ABX tests on audio recordings with carefully curated metadata to shed light on the type of information present in the representations. ABX tests determine whether the representations computed by a multilingual speech model encode a given characteristic. Three experiments are devised: one on room acoustics aspects, one on linguistic genre, and one on phonetic aspects. The results confirm that the representations extracted from recordings with different linguistic/extra-linguistic characteristics differ along the same lines. Embedding more audio signal in one vector better discriminates extra-linguistic characteristics, whereas shorter snippets are better to distinguish segmental information. The method is fully unsupervised, potentially opening new research avenues for comparative work on under-documented languages.</abstract>
@@ -1985,7 +1985,7 @@
     <meta>
       <booktitle>Findings of the Association for Computational Linguistics: NAACL 2024</booktitle>
       <editor><first>Kevin</first><last>Duh</last></editor>
-      <editor><first>Helena</first><last>Gomez</last></editor>
+      <editor id="helena-gomez"><first>Helena</first><last>Gomez</last></editor>
       <editor><first>Steven</first><last>Bethard</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Mexico City, Mexico</address>
@@ -2061,8 +2061,8 @@
       <author><first>Ye</first><last>Liu</last><affiliation>SalesForce.com</affiliation></author>
       <author><first>Tong</first><last>Niu</last><affiliation>Salesforce AI Research</affiliation></author>
       <author><first>Yao</first><last>Wan</last><affiliation>Huazhong University of Science and Technology</affiliation></author>
-      <author><first>Philip</first><last>Yu</last><affiliation>University of Illinois, Chicago</affiliation></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>SalesForce.com and Nanyang Technological University</affiliation></author>
+      <author id="philip-s-yu"><first>Philip</first><last>Yu</last><affiliation>University of Illinois, Chicago</affiliation></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>SalesForce.com and Nanyang Technological University</affiliation></author>
       <author><first>Yingbo</first><last>Zhou</last><affiliation>Salesforce Research</affiliation></author>
       <author><first>Semih</first><last>Yavuz</last><affiliation>SalesForce.com</affiliation></author>
       <pages>51-68</pages>
@@ -2126,7 +2126,7 @@
       <title>Attention Alignment and Flexible Positional Embeddings Improve Transformer Length Extrapolation</title>
       <author><first>Ta-Chung</first><last>Chi</last></author>
       <author><first>Ting-Han</first><last>Fan</last></author>
-      <author><first>Alexander</first><last>Rudnicky</last><affiliation>Carnegie Mellon University and Carnegie Mellon University</affiliation></author>
+      <author id="alexander-rudnicky"><first>Alexander</first><last>Rudnicky</last><affiliation>Carnegie Mellon University and Carnegie Mellon University</affiliation></author>
       <pages>132-148</pages>
       <abstract>An ideal length-extrapolatable Transformer language model can handle sequences longer than the training length without any fine-tuning. Such long-context utilization capability relies heavily on a flexible positional embedding design. Upon investigating the flexibility of existing large pre-trained Transformer language models, we find that the T5 family deserves a closer look, as its positional embeddings capture rich and flexible attention patterns. However, T5 suffers from the dispersed attention issue: the longer the input sequence, the flatter the attention distribution. To alleviate the issue, we propose two attention alignment strategies via temperature scaling. Our findings show improvement on the long-context utilization capability of T5 on language modeling, retrieval, multi-document question answering, and code completion tasks without any fine-tuning. This suggests that a flexible positional embedding design and attention alignment can go a long way toward Transformer length extrapolation. The code is released at: <url>https://github.com/chijames/T5-Attention-Alignment</url></abstract>
       <url hash="c0a735b0">2024.findings-naacl.10</url>
@@ -2361,7 +2361,7 @@
       <author><first>Rebecca</first><last>Jiang</last></author>
       <author><first>Xingyu</first><last>Lu</last><affiliation>Bloomberg</affiliation></author>
       <author><first>Qian</first><last>Zhao</last><affiliation>Bloomberg</affiliation></author>
-      <author><first>Daniel</first><last>Preotiuc-Pietro</last><affiliation>Bloomberg</affiliation></author>
+      <author id="daniel-preotiuc-pietro"><first>Daniel</first><last>Preotiuc-Pietro</last><affiliation>Bloomberg</affiliation></author>
       <pages>395-408</pages>
       <abstract>In text documents such as news articles, the content and key events usually revolve around a subset of all the entities mentioned in a document. These entities, often deemed as salient entities, provide useful cues of the aboutness of a document to a reader. Identifying the salience of entities was found helpful in several downstream applications such as search, ranking, and entity-centric summarization, among others. Prior work on salient entity detection mainly focused on machine learning models that require heavy feature engineering. We show that fine-tuning medium-sized language models with a cross-encoder style architecture yields substantial performance gains over feature engineering approaches. To this end, we conduct a comprehensive benchmarking of four publicly available datasets using models representative of the medium-sized pre-trained language model family. Additionally, we show that zero-shot prompting of instruction-tuned language models yields inferior results, indicating the task’s uniqueness and complexity.</abstract>
       <url hash="cc38b18c">2024.findings-naacl.28</url>
@@ -2418,7 +2418,7 @@
       <author><first>Marco</first><last>Valentino</last></author>
       <author><first>Danilo</first><last>Carvalho</last><affiliation>University of Manchester</affiliation></author>
       <author><first>Ian</first><last>Pratt-Hartmann</last><affiliation>University of Opole and University of Manchester</affiliation></author>
-      <author><first>Andre</first><last>Freitas</last><affiliation>Idiap Research Institute and University of Manchester</affiliation></author>
+      <author id="andre-freitas"><first>Andre</first><last>Freitas</last><affiliation>Idiap Research Institute and University of Manchester</affiliation></author>
       <pages>474-489</pages>
       <abstract>The injection of syntactic information in Variational AutoEncoders (VAEs) can result in an overall improvement of performances and generalisation. An effective strategy to achieve such a goal is to separate the encoding of distributional semantic features and syntactic structures into heterogeneous latent spaces via multi-task learning or dual encoder architectures. However, existing works employing such techniques are limited to LSTM-based VAEs. This work investigates latent space separation methods for structural syntactic injection in Transformer-based VAE architectures (i.e., Optimus) through the integration of graph-based models. Our empirical evaluation reveals that the proposed end-to-end VAE architecture can improve theoverall organisation of the latent space, alleviating the information loss occurring in standard VAE setups, and resulting in enhanced performances on language modelling and downstream generation tasks.</abstract>
       <url hash="4193dedb">2024.findings-naacl.32</url>
@@ -2494,9 +2494,9 @@
     </paper>
     <paper id="38">
       <title>Addressing Both Statistical and Causal Gender Fairness in <fixed-case>NLP</fixed-case> Models</title>
-      <author><first>Hannah</first><last>Chen</last><affiliation>University of Virginia</affiliation></author>
+      <author id="hannah-cyberey"><first>Hannah</first><last>Chen</last><affiliation>University of Virginia</affiliation></author>
       <author><first>Yangfeng</first><last>Ji</last><affiliation>University of Virginia</affiliation></author>
-      <author><first>David</first><last>Evans</last><affiliation>University of Virginia</affiliation></author>
+      <author id="david-k-evans"><first>David</first><last>Evans</last><affiliation>University of Virginia</affiliation></author>
       <pages>561-582</pages>
       <abstract>Statistical fairness stipulates equivalent outcomes for every protected group, whereas causal fairness prescribes that a model makes the same prediction for an individual regardless of their protected characteristics. Counterfactual data augmentation (CDA) is effective for reducing bias in NLP models, yet models trained with CDA are often evaluated only on metrics that are closely tied to the causal fairness notion; similarly, sampling-based methods designed to promote statistical fairness are rarely evaluated for causal fairness. In this work, we evaluate both statistical and causal debiasing methods for gender bias in NLP models, and find that while such methods are effective at reducing bias as measured by the targeted metric, they do not necessarily improve results on other bias metrics. We demonstrate that combinations of statistical and causal debiasing techniques are able to reduce bias measured through both types of metrics.</abstract>
       <url hash="fd6fcc8a">2024.findings-naacl.38</url>
@@ -2546,7 +2546,7 @@
       <author><first>Carina</first><last>Negreanu</last><affiliation>Microsoft</affiliation></author>
       <author><first>Benjamin</first><last>Zorn</last></author>
       <author><first>José</first><last>Cambronero</last><affiliation>Microsoft</affiliation></author>
-      <author><first>Andrew</first><last>Gordon</last><affiliation>University of Edinburgh and Microsoft Research</affiliation></author>
+      <author id="andrew-gordon"><first>Andrew</first><last>Gordon</last><affiliation>University of Edinburgh and Microsoft Research</affiliation></author>
       <author><first>Vu</first><last>Le</last><affiliation>Microsoft</affiliation></author>
       <author><first>Elnaz</first><last>Nouri</last></author>
       <author><first>Nadia</first><last>Polikarpova</last><affiliation>University of California, San Diego</affiliation></author>
@@ -2652,7 +2652,7 @@
       <title><fixed-case>GEE</fixed-case>! Grammar Error Explanation with Large Language Models</title>
       <author><first>Yixiao</first><last>Song</last><affiliation>University of Massachusetts at Amherst</affiliation></author>
       <author><first>Kalpesh</first><last>Krishna</last><affiliation>Google</affiliation></author>
-      <author><first>Rajesh</first><last>Bhatt</last><affiliation>University of Massachusetts at Amherst</affiliation></author>
+      <author id="rajesh-bhatt"><first>Rajesh</first><last>Bhatt</last><affiliation>University of Massachusetts at Amherst</affiliation></author>
       <author><first>Kevin</first><last>Gimpel</last><affiliation>Toyota Technological Institute at Chicago</affiliation></author>
       <author><first>Mohit</first><last>Iyyer</last><affiliation>University of Massachusetts Amherst</affiliation></author>
       <pages>754-781</pages>
@@ -2780,7 +2780,7 @@
       <author><first>Boxin</first><last>Wang</last><affiliation>NVIDIA</affiliation></author>
       <author><first>Yibo</first><last>Zhang</last><affiliation>Stanford University and University of Illinois, Urbana Champaign</affiliation></author>
       <author><first>Yuan</first><last>Cao</last><affiliation>Google DeepMind</affiliation></author>
-      <author id="bo-li"><first>Bo</first><last>Li</last><affiliation>University of Illinois, Urbana Champaign and University of California Berkeley</affiliation></author>
+      <author><first>Bo</first><last>Li</last><affiliation>University of Illinois, Urbana Champaign and University of California Berkeley</affiliation></author>
       <author><first>Hugh</first><last>McMahan</last><affiliation>Google</affiliation></author>
       <author><first>Sewoong</first><last>Oh</last><affiliation>University of Washington, University of Illinois at Urbana-Champaign and University of Washington, Seattle</affiliation></author>
       <author><first>Zheng</first><last>Xu</last><affiliation>Google</affiliation></author>
@@ -2908,7 +2908,7 @@
       <author><first>Yihong</first><last>Liu</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <author><first>Peiqin</first><last>Lin</last><affiliation>Institut für Informatik</affiliation></author>
       <author><first>Mingyang</first><last>Wang</last></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <pages>1067-1097</pages>
       <url hash="eea02159">2024.findings-naacl.68</url>
       <bibkey>liu-etal-2024-ofa</bibkey>
@@ -3042,7 +3042,7 @@
       <author><first>Giulia</first><last>Pucci</last></author>
       <author><first>Federico</first><last>Ranaldi</last><affiliation>University of Roma “Tor Vergata”</affiliation></author>
       <author><first>Elena Sofia</first><last>Ruzzetti</last><affiliation>Università degli Studi di Roma Tor Vergata</affiliation></author>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last><affiliation>University of Rome Tor Vergata</affiliation></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last><affiliation>University of Rome Tor Vergata</affiliation></author>
       <pages>1229-1241</pages>
       <abstract>Reasoning methods, best exemplified by the well-known Chain-of-Thought (CoT), empower the reasoning abilities of Large Language Models (LLMs) by eliciting them to solve complex tasks in a step-by-step manner. Although they are achieving significant success, the ability to deliver multi-step reasoning remains limited to English because of the imbalance in the distribution of pre-training data, which makes other languages a barrier. In this paper, we propose Cross-lingual Tree-of-Thoughts (Cross-ToT), a method for aligning Cross-lingual CoT reasoning across languages. The proposed method, through a self-consistent cross-lingual prompting mechanism inspired by the Tree-of-Thoughts approach, provides multi-step reasoning paths in different languages that, during the steps, lead to the final solution. Experimental evaluations show that our method significantly outperforms existing prompting methods by reducing the number of interactions and achieving state-of-the-art performance.</abstract>
       <url hash="7578feb0">2024.findings-naacl.78</url>
@@ -3066,7 +3066,7 @@
       <title>Context Does Matter: Implications for Crowdsourced Evaluation Labels in Task-Oriented Dialogue Systems</title>
       <author><first>Clemencia</first><last>Siro</last></author>
       <author><first>Mohammad</first><last>Aliannejadi</last><affiliation>University of Amsterdam</affiliation></author>
-      <author><first>Maarten</first><last>de Rijke</last><affiliation>University of Amsterdam</affiliation></author>
+      <author id="maarten-de-rijke"><first>Maarten</first><last>de Rijke</last><affiliation>University of Amsterdam</affiliation></author>
       <pages>1258-1273</pages>
       <abstract>Crowdsourced labels play a crucial role in evaluating task-oriented dialogue systems (TDSs). Obtaining high-quality and consistent ground-truth labels from annotators presents challenges. When evaluating a TDS, annotators must fully comprehend the dialogue before providing judgments. Previous studies suggest using only a portion of the dialogue context in the annotation process. However, the impact of this limitation on label quality remains unexplored. This study investigates the influence of dialogue context on annotation quality, considering the truncated context for relevance and usefulness labeling. We further propose to use large language models ( LLMs) to summarize the dialogue context to provide a rich and short description of the dialogue context and study the impact of doing so on the annotator’s performance. Reducing context leads to more positive ratings. Conversely, providing the entire dialogue context yields higher-quality relevance ratings but introduces ambiguity in usefulness ratings. Using the first user utterance as context leads to consistent ratings, akin to those obtained using the entire dialogue, with significantly reduced annotation effort. Our findings show how task design, particularly the availability of dialogue context, affects the quality and consistency of crowdsourced evaluation labels.</abstract>
       <url hash="874ad2d5">2024.findings-naacl.80</url>
@@ -3141,7 +3141,7 @@
       <author><first>Tien-Hong</first><last>Lo</last></author>
       <author><first>Fu-An</first><last>Chao</last></author>
       <author><first>Tzu-I</first><last>Wu</last></author>
-      <author><first>Yao-Ting</first><last>Sung</last></author>
+      <author id="yao-ting-sung"><first>Yao-Ting</first><last>Sung</last></author>
       <author><first>Berlin</first><last>Chen</last></author>
       <pages>1352-1362</pages>
       <abstract>Automated speaking assessment (ASA) typically involves automatic speech recognition (ASR) and hand-crafted feature extraction from the ASR transcript of a learner’s speech. Recently, self-supervised learning (SSL) has shown stellar performance compared to traditional methods. However, SSL-based ASA systems are faced with at least three data-related challenges: limited annotated data, uneven distribution of learner proficiency levels and non-uniform score intervals between different CEFR proficiency levels. To address these challenges, we explore the use of two novel modeling strategies: metric-based classification and loss re-weighting, leveraging distinct SSL-based embedding features. Extensive experimental results on the ICNALE benchmark dataset suggest that our approach can outperform existing strong baselines by a sizable margin, achieving a significant improvement of more than 10% in CEFR prediction accuracy.</abstract>
@@ -3180,8 +3180,8 @@
     <paper id="89">
       <title><fixed-case>UGIF</fixed-case>-<fixed-case>D</fixed-case>ata<fixed-case>S</fixed-case>et: A New Dataset for Cross-lingual, Cross-modal Sequential actions on the <fixed-case>UI</fixed-case></title>
       <author><first>Sagar</first><last>Gubbi Venkatesh</last></author>
-      <author><first>Partha</first><last>Talukdar</last><affiliation>Google Research and Indian Institute of Science, Bangalore</affiliation></author>
-      <author><first>Srini</first><last>Narayanan</last><affiliation>Google research</affiliation></author>
+      <author id="partha-talukdar"><first>Partha</first><last>Talukdar</last><affiliation>Google Research and Indian Institute of Science, Bangalore</affiliation></author>
+      <author id="srini-narayanan"><first>Srini</first><last>Narayanan</last><affiliation>Google research</affiliation></author>
       <pages>1390-1399</pages>
       <abstract>Help documents are supposed to aid smartphone users in resolving queries such as “How to block calls from unknown numbers?”. However, given a query, identifying the right help document, understanding instructions from the document, and using them to resolve the issue at hand is challenging. The user experience may be enhanced by converting the instructions in the help document to a step-by-step tutorial overlaid on the phone UI. Successful execution of this task requires overcoming research challenges in retrieval, parsing, and grounding in the multilingual-multimodal setting. For example, user queries in one language may have to be matched against instructions in another language, which in turn needs to be grounded in a multimodal UI in yet another language. Moreover, there isn’t any relevant dataset for such a task. In order to bridge this gap, we introduce UGIF-DataSet, a multi-lingual, multi-modal UI grounded dataset for step-by-step task completion on the smartphone, containing 4,184 tasks across 8 languages. The instruction steps in UGIF-DataSet are available only in English, so the challenge involves operations in the cross-modal, cross-lingual setting. We compare the performance of different large language models for this task and find that the end-to-end task completion rate drops from 48% in English to 32% for other languages, demonstrating significant overall headroom for improvement. We are hopeful that UGIF-DataSet and our analysis will aid further research on the important problem of sequential task completion in the multilingual and multimodal setting.</abstract>
       <url hash="734d8018">2024.findings-naacl.89</url>
@@ -3258,7 +3258,7 @@
       <author><first>Zhengyu</first><last>Zhao</last><affiliation>Xi’an Jiaotong University</affiliation></author>
       <author><first>Michael</first><last>Backes</last><affiliation>CISPA Helmholtz Center for Information Security</affiliation></author>
       <author><first>Yun</first><last>Shen</last><affiliation>NetApp</affiliation></author>
-      <author id="yang-zhang"><first>Yang</first><last>Zhang</last><affiliation>CISPA Helmholtz Center for Information Security</affiliation></author>
+      <author><first>Yang</first><last>Zhang</last><affiliation>CISPA Helmholtz Center for Information Security</affiliation></author>
       <pages>1459-1472</pages>
       <abstract>Large language models (LLMs) have demonstrated superior performance compared to previous methods on various tasks, and often serve as the foundation models for many researches and services. However, the untrustworthy third-party LLMs may covertly introduce vulnerabilities for downstream tasks. In this paper, we explore the vulnerability of LLMs through the lens of backdoor attacks. Different from existing backdoor attacks against LLMs, ours scatters multiple trigger keys in different prompt components. Such a Composite Backdoor Attack (CBA) is shown to be stealthier than implanting the same multiple trigger keys in only a single component. CBA ensures that the backdoor is activated only when all trigger keys appear. Our experiments demonstrate that CBA is effective in both natural language processing (NLP) and multimodal tasks. For instance, with 3% poisoning samples against the LLaMA-7B model on the Emotion dataset, our attack achieves a 100% Attack Success Rate (ASR) with a False Triggered Rate (FTR) below 2.06% and negligible model accuracy degradation. Our work highlights the necessity of increased security research on the trustworthiness of foundation LLMs.</abstract>
       <url hash="02f970fe">2024.findings-naacl.94</url>
@@ -3269,8 +3269,8 @@
     <paper id="95">
       <title>Adapting Fake News Detection to the Era of Large Language Models</title>
       <author><first>Jinyan</first><last>Su</last><affiliation>Cornell University</affiliation></author>
-      <author><first>Claire</first><last>Cardie</last><affiliation>Cornell University</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last><affiliation>Cornell University</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>1473-1490</pages>
       <abstract>In the age of large language models (LLMs) and the widespread adoption of AI-driven content creation, the landscape of information dissemination has witnessed a paradigm shift. With the proliferation of both human-written and machine-generated real and fake news, robustly and effectively discerning the veracity of news articles has become an intricate challenge. While substantial research has been dedicated to fake news detection, it has either assumed that all news articles are human-written or has abruptly assumed that all machine-generated news was fake. Thus, a significant gap exists in understanding the interplay between machine-paraphrased real news, machine-generated fake news, human-written fake news, and human-written real news. In this paper, we study this gap by conducting a comprehensive evaluation of fake news detectors trained in various scenarios. Our primary objectives revolve around the following pivotal question: How can we adapt fake news detectors to the era of LLMs?Our experiments reveal an interesting pattern that detectors trained exclusively on human-written articles can indeed perform well at detecting machine-generated fake news, but not vice versa. Moreover, due to the bias of detectors against machine-generated texts (CITATION), they should be trained on datasets with a lower machine-generated news ratio than the test set. Building on our findings, we provide a practical strategy for the development of robust fake news detectors.</abstract>
       <url hash="6a2c83da">2024.findings-naacl.95</url>
@@ -3414,7 +3414,7 @@
       <author><first>Lu</first><last>Yuan</last><affiliation>Microsoft</affiliation></author>
       <author><first>Takuya</first><last>Yoshioka</last><affiliation>Microsoft</affiliation></author>
       <author><first>Michael</first><last>Zeng</last><affiliation>Microsoft</affiliation></author>
-      <author><first>Xuedong</first><last>Huang</last></author>
+      <author id="xuedong-huang"><first>Xuedong</first><last>Huang</last></author>
       <pages>1615-1627</pages>
       <abstract>The convergence of text, visual, and audio data is crucial towards human-like artificial intelligence, however the current Vision-Language-Speech landscape is dominated by encoder-only models that lack generative abilities. We propose closing this gap with i-Code V2, one of the first models capable of generating natural language from any combination of Vision, Language, and Speech data. i-Code V2 leverages state-of-the-art single-modality encoders, combining their outputs with a new modality-fusing encoder to project combinations of modalities into a shared representational space. Language tokens are generated from these representations via an autoregressive decoder. i-Code V2 is pretrained end-to-end on a large collection of dual- and single-modality datasets with a novel text completion objective that can be generalized across arbitrary combinations of modalities. i-Code V2 matches or outperforms state-of-the-art single- and dual-modality baselines on 7 multimodal tasks, demonstrating the power of generative multimodal pretraining across a diversity of tasks and signals.</abstract>
       <url hash="d4695178">2024.findings-naacl.105</url>
@@ -3425,7 +3425,7 @@
       <title>Think While You Write: Hypothesis Verification Promotes Faithful Knowledge-to-Text Generation</title>
       <author><first>Yifu</first><last>Qiu</last></author>
       <author><first>Varun</first><last>Embar</last><affiliation>University of California, Santa Cruz</affiliation></author>
-      <author><first>Shay</first><last>Cohen</last><affiliation>University of Edinburgh</affiliation></author>
+      <author id="shay-b-cohen"><first>Shay</first><last>Cohen</last><affiliation>University of Edinburgh</affiliation></author>
       <author><first>Benjamin</first><last>Han</last><affiliation>Apple</affiliation></author>
       <pages>1628-1644</pages>
       <abstract>Knowledge-to-text generators often struggle to faithfully generate descriptions for the input facts: they may produce hallucinations that contradict the input, or describe facts not present in the input. To reduce hallucinations, we propose a decoding-only method, TWEAK (Think While Effectively Articulating Knowledge), which can be integrated with any generator without retraining. TWEAK treats the generated sequences at each decoding step and its future sequences as hypotheses, and ranks each generation candidate based on the extent to which their hypotheses are supported by the input facts using a Hypothesis Verification Model (HVM). We first demonstrate the effectiveness of TWEAK by using a Natural Language Inference (NLI) model as the HVM and report improved faithfulness with a minimal impact on the quality. We then replace the NLI model with a task-specific HVM trained with a first-of-a-kind dataset, FATE (Fact-Aligned Textual Entailment), which pairs input facts with their original and perturbed descriptions. We test TWEAK with two generators, and the best TWEAK variants improve on average for the two models by 2.24/7.17 points in faithfulness (FactKB) in in/out-of-distribution evaluations, respectively, and with only a 0.14/0.32-point decline in quality (BERTScore).</abstract>
@@ -3543,7 +3543,7 @@
       <author><first>Shrey</first><last>Pandit</last></author>
       <author><first>Vishwa</first><last>Shah</last></author>
       <author><first>Megh</first><last>Thakkar</last></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>SalesForce.com and Nanyang Technological University</affiliation></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>SalesForce.com and Nanyang Technological University</affiliation></author>
       <pages>1757-1771</pages>
       <abstract>The Euclidean space is the familiar space for training neural models and performing arithmetic operations.However, many data types inherently possess complex geometries, and model training methods involve operating over their latent representations, which cannot be effectively captured in the Euclidean space.The hyperbolic space provides a more generalized representative geometry to model the hierarchical complexities of the tree-like structure of natural language.We propose AdaPT a set of guidelines for initialization, parametrization, and training of neural networks, which adapts to the dataset and can be used with different manifolds. AdaPT can be generalized over any existing neural network training methodology and leads to more stable training without a substantial increase in training time.We apply AdaPT guidelines over two state-of-the-art deep learning approaches and empirically demonstrate its effectiveness through experiments on three tasks over 12 languages across speech and text.Through extensive qualitative analysis, we put forward the applicability of AdaPT as a set of guidelines optimally utilizing the manifold geometry, which can be extended to various downstream tasks across languages and modalities.</abstract>
       <url hash="d8a65cba">2024.findings-naacl.114</url>
@@ -3561,7 +3561,7 @@
       <author><first>Shao</first><last>Zhang</last><affiliation>Shanghai Jiao Tong University</affiliation></author>
       <author><first>Yisi</first><last>Sang</last><affiliation>Apple</affiliation></author>
       <author><first>Sijia</first><last>Liu</last><affiliation>Michigan State University</affiliation></author>
-      <author><first>James</first><last>Hendler</last><affiliation>Rensselaer Polytechnic Institute</affiliation></author>
+      <author id="james-hendler"><first>James</first><last>Hendler</last><affiliation>Rensselaer Polytechnic Institute</affiliation></author>
       <author><first>Dakuo</first><last>Wang</last><affiliation>Northeastern University</affiliation></author>
       <pages>1772-1790</pages>
       <abstract>While most existing works on LLM prompting techniques focus only on how to select a better set of data samples inside one single prompt input (In-Context Learning or ICL), why can not we design and leverage multiple prompts together to further improve the LLM’s performance? In this work, we propose In-Context Sampling (ICS), a low-resource LLM prompting technique to produce confident predictions by optimizing the construction of multiple ICL prompt inputs. Extensive experiments with three open-source LLMs (FlanT5-XL, Mistral-7B, and Mixtral-8x7B) on four NLI datasets (e-SNLI, Multi-NLI, ANLI, and Contract-NLI) and one QA dataset (CommonsenseQA) illustrate that ICS can consistently enhance LLMs’ performance. An in-depth evaluation with three data similarity-based ICS strategies suggests that these strategies can further elevate LLM’s performance, which sheds light on a new yet promising future research direction.</abstract>
@@ -3614,7 +3614,7 @@
       <title>Prompt Space Optimizing Few-shot Reasoning Success with Large Language Models</title>
       <author><first>Fobo</first><last>Shi</last></author>
       <author><first>Peijun</first><last>Qing</last><affiliation>Dartmouth College</affiliation></author>
-      <author><first>Dong</first><last>Yang</last></author>
+      <author id="dong-yang"><first>Dong</first><last>Yang</last></author>
       <author><first>Nan</first><last>Wang</last></author>
       <author><first>Youbo</first><last>Lei</last></author>
       <author><first>Haonan</first><last>Lu</last><affiliation>OPPO Guangdong Mobile Telecommunications Co., Ltd.</affiliation></author>
@@ -3628,7 +3628,7 @@
     </paper>
     <paper id="120">
       <title><fixed-case>DAGCN</fixed-case>: Distance-based and Aspect-oriented Graph Convolutional Network for Aspect-based Sentiment Analysis</title>
-      <author id="zhihao-wang"><first>Zhihao</first><last>Wang</last></author>
+      <author><first>Zhihao</first><last>Wang</last></author>
       <author><first>Bo</first><last>Zhang</last><affiliation>Shanghai Normal University</affiliation></author>
       <author><first>Ru</first><last>Yang</last><affiliation>Shanghai Normal University</affiliation></author>
       <author><first>Chang</first><last>Guo</last><affiliation>Shanghai Normal University</affiliation></author>
@@ -3731,7 +3731,7 @@
       <title>“Tell me who you are and <fixed-case>I</fixed-case> tell you how you argue”: Predicting Stances and Arguments for Stakeholder Groups</title>
       <author><first>Philipp</first><last>Heinisch</last><affiliation>Universität Bielefeld</affiliation></author>
       <author><first>Lorik</first><last>Dumani</last><affiliation>Trier University</affiliation></author>
-      <author><first>Philipp</first><last>Cimiano</last><affiliation>Bielefeld University</affiliation></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last><affiliation>Bielefeld University</affiliation></author>
       <author><first>Ralf</first><last>Schenkel</last><affiliation>Trier University</affiliation></author>
       <pages>1968-1982</pages>
       <abstract>Argument mining has focused so far mainly on the identification, extraction, and formalization of arguments. An important yet unaddressedtask consists in the prediction of the argumentative behavior of stakeholders in a debate. Predicting the argumentative behavior in advance can support foreseeing issues in public policy making or help recognize potential disagreements early on and help to resolve them. In this paper, we consider the novel task of predicting the argumentative behavior of individual stakeholders. We present ARGENST, a framework that relies on a recommender-based architecture to predict the stance and the argumentative main point on a specific controversial topic for a given stakeholder, which is described in terms of a profile including properties related to demographic attributes, religious and political orientation, socio-economic background, etc. We evaluate our approach on the well-known debate.org dataset in terms of accuracy for predicting stance as well as in terms of similarity of the generated arguments to the ground truth arguments using BERTScore. As part of a case study, we show how juries of members representing different stakeholder groups and perspectives can be assembled to simulate the public opinion on a given topic.</abstract>
@@ -3744,7 +3744,7 @@
       <title>Psychometric Predictive Power of Large Language Models</title>
       <author><first>Tatsuki</first><last>Kuribayashi</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <author><first>Yohei</first><last>Oseki</last><affiliation>University of Tokyo</affiliation></author>
-      <author><first>Timothy</first><last>Baldwin</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and The University of Melbourne</affiliation></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and The University of Melbourne</affiliation></author>
       <pages>1983-2005</pages>
       <abstract>Instruction tuning aligns the response of large language models (LLMs) with human preferences.Despite such efforts in human–LLM alignment, we find that instruction tuning does not always make LLMs human-like from a cognitive modeling perspective. More specifically, next-word probabilities estimated by instruction-tuned LLMs are often worse at simulating human reading behavior than those estimated by base LLMs.In addition, we explore prompting methodologies for simulating human reading behavior with LLMs. Our results show that prompts reflecting a particular linguistic hypothesis improve psychometric predictive power, but are still inferior to small base models.These findings highlight that recent advancements in LLMs, i.e., instruction tuning and prompting, do not offer better estimates than direct probability measurements from base LLMs in cognitive modeling. In other words, pure next-word probability remains a strong predictor for human reading behavior, even in the age of LLMs.</abstract>
       <url hash="288b3653">2024.findings-naacl.129</url>
@@ -3769,7 +3769,7 @@
       <author><first>Peijie</first><last>Chen</last></author>
       <author><first>Tin</first><last>Nguyen</last></author>
       <author><first>Seunghyun</first><last>Yoon</last><affiliation>Adobe Research</affiliation></author>
-      <author><first>Trung</first><last>Bui</last><affiliation>Adobe Research</affiliation></author>
+      <author id="trung-bui"><first>Trung</first><last>Bui</last><affiliation>Adobe Research</affiliation></author>
       <author><first>Anh</first><last>Nguyen</last><affiliation>Auburn University</affiliation></author>
       <pages>2018-2053</pages>
       <abstract>CLIP-based classifiers rely on the prompt containing a class name that is known to the text encoder. Therefore, they perform poorly on new classes or the classes whose names rarely appear on the Internet (e.g., scientific names of birds). For fine-grained classification, we propose PEEB – an explainable and editable classifier to (1) express the class name into a set of text descriptors that describe the visual parts of that class; and (2) match the embeddings of the detected parts to their textual descriptors in each class to compute a logit score for classification. In a zero-shot setting where the class names are unknown, PEEB outperforms CLIP by a huge margin (∼10× in top-1 accuracy). Compared to part-based classifiers, PEEB is not only the state-of-the-art (SOTA) on the supervised-learning setting (88.80% and 92.20% accuracy on CUB-200 and Stanford Dogs-120, respectively) but also the first to enable users to edit the text descriptors to form a new classifier without any re-training. Compared to concept bottleneck models, PEEB is also the SOTA in both zero-shot and supervised-learning settings.</abstract>
@@ -3870,7 +3870,7 @@
     <paper id="139">
       <title>Plug-in Language Model: Controlling Text Generation with a Simple Regression Model</title>
       <author><first>Nai-Chi</first><last>Yang</last></author>
-      <author><first>Wei-Yun</first><last>Ma</last><affiliation>Academia Sinica</affiliation></author>
+      <author id="wei-yun-ma"><first>Wei-Yun</first><last>Ma</last><affiliation>Academia Sinica</affiliation></author>
       <author><first>Pu-Jen</first><last>Cheng</last><affiliation>National Taiwan University</affiliation></author>
       <pages>2165-2181</pages>
       <abstract>Large-scale pre-trained language models have displayed unrivaled capacity in generating text that closely resembles human-written text. Nevertheless, generating texts adhering to specific conditions without fine-tuning or adding new parameters can be challenging. Contemporary approaches commonly rely on either prompts or auxiliary models to avoid modifying the language models. These auxiliary models are designed to assess whether a generated token contributes to meeting the desired requirements. These approaches adjust the distribution of the next token during the inference phase by leveraging the prediction score of the desired attribute to calculate gradients. However, these auxiliary models typically require the language model’s latent states. This prerequisite challenges integrating various existing black box attribute models or tools. We present the Plug-in Language Model (PiLM) as a solution to address the limitations. PiLM leverages reinforcement learning to utilize black box tools directly, adjusting the latent state to control text generation. However, performing backpropagation during the inference phase is time-consuming for PiLM. By replacing backpropagation with a simple regression model, PiLM can achieve an inference time comparable to that of the original LLM. Experiment results show that our approaches in this paper outperform existing state-of-the-art methods that rely on gradient-based, weighted decoding, or prompt-based methodologies.</abstract>
@@ -3923,7 +3923,7 @@
       <title>Group Fairness in Multilingual Speech Recognition Models</title>
       <author><first>Anna</first><last>Zee</last></author>
       <author><first>Marc</first><last>Zee</last><affiliation>Research, Google</affiliation></author>
-      <author><first>Anders</first><last>Søgaard</last><affiliation>Copenhagen University</affiliation></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last><affiliation>Copenhagen University</affiliation></author>
       <pages>2213-2226</pages>
       <abstract>We evaluate the performance disparity of the Whisper and MMS families of ASR models across the VoxPopuli and Common Voice multilingual datasets, with an eye toward intersectionality. Our two most important findings are that model size, surprisingly, correlates logarithmically with worst-case performance disparities, meaning that larger (and better) models are less fair. We also observe the importance of intersectionality. In particular, models often exhibit significant performance disparity across binary gender for adolescents.</abstract>
       <url hash="96ca57ea">2024.findings-naacl.143</url>
@@ -3938,7 +3938,7 @@
       <author><first>Xiaoying</first><last>Zhang</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <author><first>Xixin</first><last>Wu</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <author><first>Irwin</first><last>King</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
-      <author><first>Helen</first><last>Meng</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
+      <author id="helen-meng"><first>Helen</first><last>Meng</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <pages>2227-2242</pages>
       <abstract>Making moral judgments is an essential step toward developing ethical AI systems. Prevalent approaches are mostly implemented in a bottom-up manner, which uses a large set of annotated data to train models based on crowd-sourced opinions about morality. These approaches have been criticized for potentially overgeneralizing a limited group of annotators’ moral stances and lacking explainability. This work proposes a flexible top-down framework to steer (Large) Language Models to perform moral reasoning with well-established moral theories from interdisciplinary research. The theory-guided top-down framework can incorporate various moral theories. Our experiments demonstrate the effectiveness of the proposed framework on datasets derived from moral theories. Furthermore, we show the alignment between different moral theories and existing morality datasets. Our analysis exhibits the potential and flaws in existing resources (models and datasets) in developing explainable moral judgment-making systems.</abstract>
       <url hash="2c393a5d">2024.findings-naacl.144</url>
@@ -3953,7 +3953,7 @@
       <author><first>Boyang</first><last>Xue</last></author>
       <author><first>Hongru</first><last>Wang</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <author><first>Qi</first><last>Zhu</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <author><first>Ruifeng</first><last>Xu</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <pages>2243-2255</pages>
       <abstract>The growing interest in Large Language Models (LLMs) for specialized applications has revealed a significant challenge: when tailored to specific domains, LLMs tend to experience catastrophic forgetting, compromising their general capabilities and leading to a suboptimal user experience. Additionally, crafting a versatile model for multiple domains simultaneously often results in a decline in overall performance due to confusion between domains. In response to these issues, we present the RolE Prompting Guided Multi-Domain Adaptation (REGA) strategy. This novel approach effectively manages multi-domain LLM adaptation through three key components: 1) Self-Distillation constructs and replays general-domain exemplars to alleviate catastrophic forgetting. 2) Role Prompting assigns a central prompt to the general domain and a unique role prompt to each specific domain to minimize inter-domain confusion during training. 3) Role Integration reuses and integrates a small portion of domain-specific data to the general-domain data, which are trained under the guidance of the central prompt. The central prompt is used for a streamlined inference process, removing the necessity to switch prompts for different domains.Empirical results demonstrate that REGA effectively alleviates catastrophic forgetting and inter-domain confusion. This leads to improved domain-specific performance compared to standard fine-tuned models, while still preserving robust general capabilities.</abstract>
@@ -4025,7 +4025,7 @@
       <author><first>Muthusamy</first><last>Chelliah</last><affiliation>Flipkart</affiliation></author>
       <author><first>Nikesh</first><last>Garera</last></author>
       <author><first>Swaprava</first><last>Nath</last><affiliation>IIT Kanpur and Computer Science and Engineering, Indian Institute of Technology Bombay</affiliation></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
       <pages>2315-2332</pages>
       <abstract>In e-commerce, opinion summarization is the process of summarizing the consensus opinions found in product reviews. However, the potential of additional sources such as product description and question-answers (QA) has been considered less often. Moreover, the absence of any supervised training data makes this task challenging. To address this, we propose a novel synthetic dataset creation (SDC) strategy that leverages information from reviews as well as additional sources for selecting one of the reviews as a pseudo-summary to enable supervised training. Our Multi-Encoder Decoder framework for Opinion Summarization (MEDOS) employs a separate encoder for each source, enabling effective selection of information while generating the summary. For evaluation, due to the unavailability of test sets with additional sources, we extend the Amazon, Oposum+, and Flipkart test sets and leverage ChatGPT to annotate summaries. Experiments across nine test sets demonstrate that the combination of our SDC approach and MEDOS model achieves on average a 14.5% improvement in ROUGE-1 F1 over the SOTA. Moreover, comparative analysis underlines the significance of incorporating additional sources for generating more informative summaries. Human evaluations further indicate that MEDOS scores relatively higher in coherence and fluency with 0.41 and 0.5 (−1 to 1) respectively, compared to existing models. To the best of our knowledge, we are the first to generate opinion summaries leveraging additional sources in a self-supervised setting.</abstract>
       <url hash="cd9ad100">2024.findings-naacl.150</url>
@@ -4128,7 +4128,7 @@
       <author><first>SeungWoo</first><last>Song</last><affiliation>Hanbat National University</affiliation></author>
       <author><first>HanGyeol</first><last>Yoo</last></author>
       <author><first>SangMin</first><last>Kim</last><affiliation>Seoul National University of Science and Technology</affiliation></author>
-      <author><first>KyungTae</first><last>Lim</last><affiliation>Seoul National University of Science and Technology</affiliation></author>
+      <author id="kyungtae-lim"><first>KyungTae</first><last>Lim</last><affiliation>Seoul National University of Science and Technology</affiliation></author>
       <pages>2463-2473</pages>
       <abstract>The impressive development of large language models (LLMs) is expanding into the realm of large multimodal models (LMMs), which incorporate multiple types of data beyond text. However, the nature of multimodal models leads to significant expenses in the creation of training data. Furthermore, constructing multilingual data for LMMs presents its own set of challenges due to language diversity and complexity. Therefore, in this study, we propose two cost-effective methods to solve this problem: (1) vocabulary expansion and pretraining of multilingual LLM for specific languages, and (2) automatic and elaborate construction of multimodal datasets using GPT4-V. Based on these methods, we constructed a 91K English-Korean-Chinese multilingual, multimodal training dataset. Additionally, we developed a bilingual multimodal model that exhibits excellent performance in both Korean and English, surpassing existing approaches.</abstract>
       <url hash="b30034b3">2024.findings-naacl.158</url>
@@ -4141,7 +4141,7 @@
       <author><first>Giwon</first><last>Hong</last><affiliation>University of Edinburgh</affiliation></author>
       <author><first>Jeonghwan</first><last>Kim</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
       <author><first>Junmo</first><last>Kang</last><affiliation>Georgia Institute of Technology</affiliation></author>
-      <author><first>Sung-Hyon</first><last>Myaeng</last><affiliation>KAIST</affiliation></author>
+      <author id="sung-hyon-myaeng"><first>Sung-Hyon</first><last>Myaeng</last><affiliation>KAIST</affiliation></author>
       <author><first>Joyce Jiyoung</first><last>Whang</last><affiliation>KAIST</affiliation></author>
       <pages>2474-2495</pages>
       <abstract>Most existing retrieval-augmented language models (LMs) assume a naive dichotomy within a retrieved document set: query-relevance and irrelevance. Our work investigates a more challenging scenario in which even the “relevant” documents may contain misleading or incorrect information, causing conflict among the retrieved documents and thereby negatively influencing model decisions as noise. We observe that existing LMs are highly brittle to the presence of conflicting information in both the fine-tuning and in-context few-shot learning scenarios. We propose approaches for handling knowledge conflicts among retrieved documents by explicitly fine-tuning a discriminator or prompting GPT-3.5 to elicit its discriminative capability. Our empirical results on open-domain QA show that these approaches significantly enhance model robustness. We also provide our findings on incorporating the fine-tuned discriminator’s decision into the in-context learning process, proposing a way to exploit the benefits of two disparate learning schemes. Alongside our findings, we provide MacNoise, a machine-generated, conflict-induced dataset to further encourage research in this direction.</abstract>
@@ -4398,7 +4398,7 @@
       <author><first>Jingjing</first><last>Xu</last></author>
       <author><first>Shujian</first><last>Huang</last><affiliation>Nanjing University</affiliation></author>
       <author><first>Lingpeng</first><last>Kong</last><affiliation>Department of Computer Science, The University of Hong Kong</affiliation></author>
-      <author><first>Jiajun</first><last>Chen</last><affiliation>Nanjing University</affiliation></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last><affiliation>Nanjing University</affiliation></author>
       <author><first>Lei</first><last>Li</last><affiliation>School of Computer Science, Carnegie Mellon University</affiliation></author>
       <pages>2765-2781</pages>
       <abstract>Large language models (LLMs) have demonstrated remarkable potential in handling multilingual machine translation (MMT). In this paper, we systematically investigate the advantages and challenges of LLMs for MMT by answering two questions: 1) How well do LLMs perform in translating massive languages? 2) Which factors affect LLMs’ performance in translation? We thoroughly evaluate eight popular LLMs, including ChatGPT and GPT-4. Our empirical results show that translation capabilities of LLMs are continually involving. GPT-4 has beat the strong supervised baseline NLLB in 40.91% of translation directions but still faces a large gap towards the commercial translation system like Google Translate, especially on low-resource languages. Through further analysis, we discover that LLMs exhibit new working patterns when used for MMT. First, LLM can acquire translation ability in a resource-efficient way and generate moderate translation even on zero-resource languages. Second, instruction semantics can surprisingly be ignored when given in-context exemplars. Third, cross-lingual exemplars can provide better task guidance for low-resource translation than exemplars in the same language pairs. Code will be released at: https://github.com/NJUNLP/MMT-LLM.</abstract>
@@ -4431,7 +4431,7 @@
       <author><first>Jiun-Yu</first><last>Kao</last><affiliation>Amazon Alexa AI</affiliation></author>
       <author><first>Emre</first><last>Barut</last><affiliation>Amazon</affiliation></author>
       <author><first>Tagyoung</first><last>Chung</last><affiliation>Amazon</affiliation></author>
-      <author id="jing-huang"><first>Jing</first><last>Huang</last><affiliation>Amazon Alexa AI</affiliation></author>
+      <author><first>Jing</first><last>Huang</last><affiliation>Amazon Alexa AI</affiliation></author>
       <author><first>Mohit</first><last>Bansal</last><affiliation>University of North Carolina at Chapel Hill</affiliation></author>
       <pages>2793-2807</pages>
       <abstract>Referring Expression Generation (REG) is the task of generating a description that unambiguously identifies a given target in the scene. Different from Image Captioning (IC), REG requires learning fine-grained characteristics of not only the scene objects but also their surrounding context. Referring expressions are usually not singular; an object can often be uniquely referenced in numerous ways, for instance, by color, by location, or by relationship with other objects. Most prior works, however, have not explored this ‘aspect-based multiplicity’ of referring expressions. Hence, in this work, we focus on the Aspect-Controlled REG task, which requires generating a referring expression conditioned on the input aspect(s), where an aspect captures a style of reference. By changing the input aspect such as color, location, action etc., one can generate multiple distinct expressions per target region. To solve this new task, we first modify BLIP for aligning image-regions and text-expressions. We achieve this through a novel approach for feeding the input by drawing a bounding box around the target image-region and prompting the model to generate the referring expression. Our base REG model already beats all prior works in CIDEr score. To tackle Aspect-Controlled REG, we append ‘aspect tokens’ to the prompt and show that distinct expressions can be generated by just changing the prompt. Finally, to prove the high-quality and diversity of the data generated by our proposed aspect-controlled REG model, we also perform data-augmentation-based evaluation on the downstream Referring Expression Comprehension (REC) task. With just half of the real data augmented with the generated synthetic data, we achieve performance comparable to training with 100% of real data, using a SOTA REC model.</abstract>
@@ -4487,7 +4487,7 @@
       <title>Crossing Linguistic Horizons: Finetuning and Comprehensive Evaluation of <fixed-case>V</fixed-case>ietnamese Large Language Models</title>
       <author><first>Sang</first><last>Truong</last></author>
       <author><first>Duc</first><last>Nguyen</last></author>
-      <author><first>Toan</first><last>Nguyen</last></author>
+      <author id="toan-q-nguyen"><first>Toan</first><last>Nguyen</last></author>
       <author><first>Dong</first><last>Le</last></author>
       <author><first>Nhi</first><last>Truong</last></author>
       <author><first>Tho</first><last>Quan</last></author>
@@ -4574,7 +4574,7 @@
       <author><first>YiFan</first><last>Zhang</last><affiliation>Institute of automation, Chinese academy of science</affiliation></author>
       <author><first>Hanlin</first><last>Zhang</last><affiliation>Harvard University</affiliation></author>
       <author><first>Li</first><last>Li</last><affiliation>Amazon and Columbia University</affiliation></author>
-      <author><first>Eric</first><last>Xing</last><affiliation>Mohamed bin Zayed Univeristy of AI and School of Computer Science, Carnegie Mellon University</affiliation></author>
+      <author id="eric-xing"><first>Eric</first><last>Xing</last><affiliation>Mohamed bin Zayed Univeristy of AI and School of Computer Science, Carnegie Mellon University</affiliation></author>
       <pages>2984-3002</pages>
       <abstract>Pre-trained language models (LMs) have shown remarkable reasoning performance using explanations or chain-of-thoughts (CoT)) for in-context learning. On the other hand, these reasoning tasks are usually presumed to be more approachable for symbolic programming. To understand the mechanism of reasoning of LMs, we curate synthetic datasets containing equivalent (natural, symbolic) data pairs, where symbolic examples contain first-order logic rules and predicates from non-parametric knowledge bases (KBs), supporting automated verification of intermediate reasoning results. Then we revisit neuro-symbolic approaches and propose to learn from demonstrations containing logic rules and corresponding examples to iteratively reason over KBs, recovering Prolog’s backward chaining algorithm and supporting automated verification of LMs’ outputs. Comprehensive experiments are included to systematically compare LMLP with CoT in deductive reasoning settings, showing that LMLP enjoys more than 25% higher accuracy than CoT on length generalization benchmarks even with smaller model sizes.</abstract>
       <url hash="34aa5d25">2024.findings-naacl.188</url>
@@ -4597,7 +4597,7 @@
     <paper id="190">
       <title>Retrieving Examples from Memory for Retrieval Augmented Neural Machine Translation: A Systematic Comparison</title>
       <author><first>Maxime</first><last>Bouthors</last></author>
-      <author><first>Josep</first><last>Crego</last><affiliation>SYSTRAN</affiliation></author>
+      <author id="josep-m-crego"><first>Josep</first><last>Crego</last><affiliation>SYSTRAN</affiliation></author>
       <author><first>François</first><last>Yvon</last><affiliation>ISIR, Sorbonne Université &amp; CNRS</affiliation></author>
       <pages>3022-3039</pages>
       <abstract>Retrieval-Augmented Neural Machine Translation (RAMT) architectures retrieve examples from memory to guide the generation process. While most works in this trend explore new ways to exploit the retrieved examples, the upstream retrieval step is mostly unexplored. In this paper, we study the effect of varying retrieval methods for several translation architectures to better understand the interplay between these two processes.We conduct experiments in two language pairs in a multi-domain setting and consider several downstream architectures based on a standard autoregressive model, an edit-based model, and a large language model with in-context learning. Our experiments show that the choice of the retrieval technique impacts the translation scores, with variance across architectures. We also discuss the effects of increasing the number and diversity of examples, which are mostly positive across the board.</abstract>
@@ -4730,7 +4730,7 @@
       <author><first>Aru</first><last>Maekawa</last><affiliation>Tokyo Institute of Technology, Tokyo Institute of Technology</affiliation></author>
       <author><first>Satoshi</first><last>Kosugi</last><affiliation>Tokyo Institute of Technology, Tokyo Institute of Technology</affiliation></author>
       <author><first>Kotaro</first><last>Funakoshi</last><affiliation>Institute of Innovative Research, Tokyo Institute of Technology</affiliation></author>
-      <author><first>Manabu</first><last>Okumura</last><affiliation>Tokyo Institute of Technology, Tokyo Institute of Technology</affiliation></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last><affiliation>Tokyo Institute of Technology, Tokyo Institute of Technology</affiliation></author>
       <pages>3138-3153</pages>
       <abstract>Dataset distillation aims to compress a training dataset by creating a small number of informative synthetic samples such that neural networks trained on them perform as well as those trained on the original training dataset. Current text dataset distillation methods create each synthetic sample as a sequence of word embeddings instead of a text to apply gradient-based optimization; however, such embedding-level distilled datasets cannot be used for training other models whose word embedding weights are different from the model used for distillation. To address this issue, we propose a novel text dataset distillation approach, called Distilling dataset into Language Model (DiLM), which trains a language model to generate informative synthetic training samples as text data, instead of directly optimizing synthetic samples. We evaluated DiLM on various text classification datasets and showed that distilled synthetic datasets from DiLM outperform those from current coreset selection methods. DiLM achieved remarkable generalization performance in training different types of models and in-context learning of large language models. Our code will be available at https://github.com/arumaekawa/DiLM.</abstract>
       <url hash="44382ddd">2024.findings-naacl.199</url>
@@ -4777,7 +4777,7 @@
       <title>Learning Mutually Informed Representations for Characters and Subwords</title>
       <author><first>Yilin</first><last>Wang</last><affiliation>School of Engineering and Applied Sciences, Harvard University and Carnegie Mellon University</affiliation></author>
       <author><first>Xinyi</first><last>Hu</last><affiliation>School of Computer Science, Carnegie Mellon University</affiliation></author>
-      <author><first>Matthew</first><last>Gormley</last><affiliation>Solventum and School of Computer Science, Carnegie Mellon University</affiliation></author>
+      <author id="matthew-r-gormley"><first>Matthew</first><last>Gormley</last><affiliation>Solventum and School of Computer Science, Carnegie Mellon University</affiliation></author>
       <pages>3201-3213</pages>
       <abstract>Most pretrained language models rely on subword tokenization, which processes text as a sequence of subword tokens. However, different granularities of text, such as characters, subwords, and words, can contain different kinds of information. Previous studies have shown that incorporating multiple input granularities improves model generalization, yet very few of them outputs useful representations for each granularity. In this paper, we introduce the entanglement model, aiming to combine character and subword language models. Inspired by vision-language models, our model treats characters and subwords as separate modalities, and it generates mutually informed representations for both granularities as output. We evaluate our model on text classification, named entity recognition, POS-tagging, and character-level sequence labeling (intraword code-switching). Notably, the entanglement model outperforms its backbone language models, particularly in the presence of noisy texts and low-resource languages. Furthermore, the entanglement model even outperforms larger pre-trained models on all English sequence labeling tasks and classification tasks. We make our code publically available.</abstract>
       <url hash="2c4c8710">2024.findings-naacl.202</url>
@@ -4853,7 +4853,7 @@
       <title>Time Machine <fixed-case>GPT</fixed-case></title>
       <author><first>Felix</first><last>Drinkall</last><affiliation>University of Oxford</affiliation></author>
       <author><first>Eghbal</first><last>Rahimikia</last><affiliation>University of Manchester</affiliation></author>
-      <author><first>Janet</first><last>Pierrehumbert</last><affiliation>University of Oxford</affiliation></author>
+      <author id="janet-pierrehumbert"><first>Janet</first><last>Pierrehumbert</last><affiliation>University of Oxford</affiliation></author>
       <author><first>Stefan</first><last>Zohren</last><affiliation>University of Oxford</affiliation></author>
       <pages>3281-3292</pages>
       <abstract>Large language models (LLMs) are often trained on extensive, temporally indiscriminate text corpora, reflecting the lack of datasets with temporal metadata. This approach is not aligned with the evolving nature of language. Conventional methods for creating temporally adapted language models often depend on further pre-training static models on time-specific data. This paper presents a new approach: a series of point-in-time LLMs called TimeMachineGPT (TiMaGPT), specifically designed to be nonprognosticative. This ensures they remain uninformed about future factual information and linguistic changes. This strategy is beneficial for understanding language evolution and is of critical importance when applying models in dynamic contexts, such as time-series forecasting, where foresight of future information can prove problematic. We provide access to both the models and training datasets.</abstract>
@@ -4868,7 +4868,7 @@
       <author><first>Shengjie</first><last>Wang</last><affiliation>University of Washington, University of Illinois, Urbana Champaign and New York University, Shanghai</affiliation></author>
       <author><first>Arnav</first><last>Das</last><affiliation>University of Washington</affiliation></author>
       <author><first>Tianyi</first><last>Zhou</last><affiliation>University of Maryland, College Park</affiliation></author>
-      <author><first>Jeff</first><last>Bilmes</last><affiliation>University of Washington, Seattle</affiliation></author>
+      <author id="jeff-bilmes"><first>Jeff</first><last>Bilmes</last><affiliation>University of Washington, Seattle</affiliation></author>
       <pages>3293-3308</pages>
       <url hash="e673d0c9">2024.findings-naacl.209</url>
       <bibkey>kumari-etal-2024-end</bibkey>
@@ -4919,10 +4919,10 @@
     </paper>
     <paper id="213">
       <title>A Measure for Transparent Comparison of Linguistic Diversity in Multilingual <fixed-case>NLP</fixed-case> Data Sets</title>
-      <author><first>Tanja</first><last>Samardzic</last><affiliation>University of Zurich</affiliation></author>
+      <author id="tanja-samardzic"><first>Tanja</first><last>Samardzic</last><affiliation>University of Zurich</affiliation></author>
       <author><first>Ximena</first><last>Gutierrez</last><affiliation>Universidad Nacional Autónoma de México</affiliation></author>
       <author><first>Christian</first><last>Bentz</last><affiliation>Eberhard-Karls-Universität Tübingen</affiliation></author>
-      <author><first>Steven</first><last>Moran</last><affiliation>University of Miami</affiliation></author>
+      <author id="steven-moran"><first>Steven</first><last>Moran</last><affiliation>University of Miami</affiliation></author>
       <author><first>Olga</first><last>Pelloni</last></author>
       <pages>3367-3382</pages>
       <abstract>Typologically diverse benchmarks are increasingly created to track the progress achieved in multilingual NLP. Linguistic diversity of these data sets is typically measured as the number of languages or language families included in the sample, but such measures do not consider structural properties of the included languages. In this paper, we propose assessing linguistic diversity of a data set against a reference language sample as a means of maximising linguistic diversity in the long run. We represent languages as sets of features and apply a version of the Jaccard index suitable for comparing sets of measures. In addition to the features extracted from typological data bases, we propose an automatic text-based measure, which can be used as a means of overcoming the well-known problem of data sparsity in manually collected features. Our diversity score is interpretable in terms of linguistic features and can identify the types of languages that are not represented in a data set. Using our method, we analyse a range of popular multilingual data sets (UD, Bible100, mBERT, XTREME, XGLUE, XNLI, XCOPA, TyDiQA, XQuAD). In addition to ranking these data sets, we find, for example, that (poly)synthetic languages are missing in almost all of them.</abstract>
@@ -4946,10 +4946,10 @@
     </paper>
     <paper id="215">
       <title>Normalizing without Modernizing: Keeping Historical Wordforms of <fixed-case>M</fixed-case>iddle <fixed-case>F</fixed-case>rench while Reducing Spelling Variants</title>
-      <author><first>Raphael</first><last>Rubino</last><affiliation>University of Geneva</affiliation></author>
+      <author id="raphael-rubino"><first>Raphael</first><last>Rubino</last><affiliation>University of Geneva</affiliation></author>
       <author><first>Johanna</first><last>Gerlach</last><affiliation>University of Geneva</affiliation></author>
       <author><first>Jonathan</first><last>Mutal</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last><affiliation>University of Geneva</affiliation></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last><affiliation>University of Geneva</affiliation></author>
       <pages>3394-3402</pages>
       <abstract>Conservation of historical documents benefits from computational methods by alleviating the manual labor related to digitization and modernization of textual content. Languages usually evolve over time and keeping historical wordforms is crucial for diachronic studies and digital humanities. However, spelling conventions did not necessarily exist when texts were originally written and orthographic variations are commonly observed depending on scribes and time periods. In this study, we propose to automatically normalize orthographic wordforms found in historical archives written in Middle French during the 16th century without fully modernizing textual content. We leverage pre-trained models in a low resource setting based on a manually curated parallel corpus and produce additional resources with artificial data generation approaches. Results show that causal language models and knowledge distillation improve over a strong baseline, thus validating the proposed methods.</abstract>
       <url hash="d9353190">2024.findings-naacl.215</url>
@@ -4972,7 +4972,7 @@
       <title>Defending Against Weight-Poisoning Backdoor Attacks for Parameter-Efficient Fine-Tuning</title>
       <author><first>Shuai</first><last>Zhao</last><affiliation>Jinan University</affiliation></author>
       <author><first>Leilei</first><last>Gan</last><affiliation>Zhejiang University</affiliation></author>
-      <author><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Jie</first><last>Fu</last><affiliation>Hong Kong University of Science and Technology</affiliation></author>
       <author><first>Lingjuan</first><last>Lyu</last><affiliation>Sony Research</affiliation></author>
       <author><first>Meihuizi</first><last>Jia</last></author>
@@ -5138,7 +5138,7 @@
       <author><first>Xiajie</first><last>Zhang</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <author><first>Xubo</first><last>Cao</last></author>
       <author><first>Cynthia</first><last>Breazeal</last></author>
-      <author><first>Deb</first><last>Roy</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
+      <author id="deb-roy"><first>Deb</first><last>Roy</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <author><first>Jad</first><last>Kabbara</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <pages>3605-3627</pages>
       <abstract>Despite the many use cases for large language models (LLMs) in creating personalized chatbots, there has been limited research on evaluating the extent to which the behaviors of personalized LLMs accurately and consistently reflect specific personality traits. We consider studying the behavior of LLM-based agents which we refer to as LLM personas and present a case study with GPT-3.5 and GPT-4 to investigate whether LLMs can generate content that aligns with their assigned personality profiles. To this end, we simulate distinct LLM personas based on the Big Five personality model, have them complete the 44-item Big Five Inventory (BFI) personality test and a story writing task, and then assess their essays with automatic and human evaluations. Results show that LLM personas’ self-reported BFI scores are consistent with their designated personality types, with large effect sizes observed across five traits. Additionally, LLM personas’ writings have emerging representative linguistic patterns for personality traits when compared with a human writing corpus. Furthermore, human evaluation shows that humans can perceive some personality traits with an accuracy of up to 80%. Interestingly, the accuracy drops significantly when the annotators were informed of AI authorship.</abstract>
@@ -5262,7 +5262,7 @@
     </paper>
     <paper id="238">
       <title><fixed-case>C</fixed-case>o<fixed-case>D</fixed-case>a: Constrained Generation based Data Augmentation for Low-Resource <fixed-case>NLP</fixed-case></title>
-      <author><first>Chandra Kiran</first><last>Evuru</last></author>
+      <author id="chandra-kiran-reddy-evuru"><first>Chandra Kiran</first><last>Evuru</last></author>
       <author><first>Sreyan</first><last>Ghosh</last></author>
       <author><first>Sonal</first><last>Kumar</last></author>
       <author><first>Ramaneswaran</first><last>S</last></author>
@@ -5337,7 +5337,7 @@
       <author><first>Zhiheng</first><last>Xi</last></author>
       <author><first>Tao</first><last>Gui</last><affiliation>Fudan University</affiliation></author>
       <author><first>Qi</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>3829-3845</pages>
       <abstract>Large language models (LLMs) have shown promising abilities of in-context learning (ICL), adapting swiftly to new tasks with only few-shot demonstrations. However, current few-shot methods heavily depend on high-quality, query-specific demos, which are often lacking. When faced with out-of-demonstration (OOD) queries, methods that rely on hand-crafted demos or external retrievers might fail. To bridge the gap between limited demos and OOD queries, we propose Self-Demos, a novel prompting method that elicits the inherent generalizability in LLMs by query-aware demo generation. The generated demos strategically interpolate between existing demos and the given query, transforming the query from OOD to ID. To evaluate the effectiveness of our approach, we manually constructed OOD-Toolset, a dataset in the tool-using scenario with over 300 real-world APIs and 1000 instances, each consisting of three tool-use cases as demos and an OOD query. Thorough experiments on our dataset and two public math benchmarks have shown that our method can outperform state-of-the-art baselines in the OOD setting. Moreover, we conduct a range of analyses to validate Self-Demos’s generalization and provide more insights.</abstract>
       <url hash="1f7aafc7">2024.findings-naacl.243</url>
@@ -5403,7 +5403,7 @@
       <author><first>Lena</first><last>Jurkschat</last><affiliation>Technische Universität Dresden</affiliation></author>
       <author><first>Hammam</first><last>Abdelwahab</last><affiliation>Fraunhofer Institute IAIS, Fraunhofer IAIS</affiliation></author>
       <author><first>Chelsea</first><last>John</last><affiliation>Forschungszentrum Juelich GmbH</affiliation></author>
-      <author><first>Pedro</first><last>Ortiz Suarez</last><affiliation>Common Crawl Foundation</affiliation></author>
+      <author id="pedro-ortiz-suarez"><first>Pedro</first><last>Ortiz Suarez</last><affiliation>Common Crawl Foundation</affiliation></author>
       <author><first>Malte</first><last>Ostendorff</last><affiliation>German Research Center for AI</affiliation></author>
       <author><first>Samuel</first><last>Weinbach</last><affiliation>Aleph Alpha GmbH</affiliation></author>
       <author><first>Rafet</first><last>Sifa</last><affiliation>Rheinische Friedrich-Wilhelms Universität Bonn</affiliation></author>
@@ -5421,7 +5421,7 @@
       <author><first>Junkai</first><last>Zhou</last></author>
       <author><first>Liang</first><last>Pang</last><affiliation>Institute of Computing Technology, Chinese Academy of Sciences</affiliation></author>
       <author><first>Huawei</first><last>Shen</last><affiliation>Institute of Computing Technology, Chinese Academy of Sciences</affiliation></author>
-      <author><first>Xueqi</first><last>Cheng</last><affiliation>, Chinese Academy of Sciences</affiliation></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last><affiliation>, Chinese Academy of Sciences</affiliation></author>
       <pages>3925-3951</pages>
       <abstract>The emergence of large language models (LLMs) further improves the capabilities of open-domain dialogue systems and can generate fluent, coherent, and diverse responses. However, LLMs still lack a crucial ability: communication skills. This limitation renders them more like information seeking tools rather than anthropomorphic chatbots. Communication skills, such as topic transition, proactively asking questions, concept guidance, empathy, and summarising often should be taken into consideration, to make LLMs more anthropomorphic and proactive during the conversation, thereby increasing the interest of users and attracting them to chat for longer. However, enabling these communication skills in black-box LLMs remains a key challenge because they do not have the same utterance formation mode as real people: think before speaking. Inspired by linguistics and cognitive science, we empower LLMs with communication skills through inner monologues. To evaluate various communication skills, we construct a benchmark named Cskills, which can also more comprehensively evaluate the dialogue generation ability of the model. Experimental results show that the proposed CSIM strategy improves the backbone models and outperforms the baselines.</abstract>
       <url hash="3c85bfcc">2024.findings-naacl.248</url>
@@ -5435,7 +5435,7 @@
       <author><first>Atula</first><last>Neerkaje</last><affiliation>University of Texas at Austin</affiliation></author>
       <author><first>Ramit</first><last>Sawhney</last><affiliation>Georgia Institute of Technology</affiliation></author>
       <author><first>Lucie</first><last>Flek</last><affiliation>Rheinische Friedrich-Wilhelms Universität Bonn</affiliation></author>
-      <author><first>Anders</first><last>Søgaard</last><affiliation>Copenhagen University</affiliation></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last><affiliation>Copenhagen University</affiliation></author>
       <pages>3952-3965</pages>
       <abstract>The performance cost of differential privacy has, for some applications, been shown to be higher for minority groups; fairness, conversely, has been shown to disproportionally compromise the privacy of members of such groups. Most work in this area has been restricted to computer vision and risk assessment. In response, we evaluate the impact of differential privacy on fairness across four diverse tasks, focusing on how attempts to mitigate privacy violations and between-group performance differences interact: Does privacy inhibit attempts to ensure fairness? To this end, we train <tex-math>(\varepsilon,\delta)</tex-math>-differentially private models with empirical risk minimization and group distributionally robust training objectives. Consistent with previous findings, we find that differential privacy increases between-group performance differences in the baseline setting; more interestingly, differential privacy <i>reduces</i> between-group performance differences in the robust setting. We explain this by interpreting differential privacy as regularization.</abstract>
       <url hash="a3578e23">2024.findings-naacl.249</url>
@@ -5450,7 +5450,7 @@
       <author><first>Mohammadi</first><last>Zaki</last><affiliation>Sony Research India, Bangalore</affiliation></author>
       <author><first>Ashishkumar</first><last>Gudmalwar</last></author>
       <author><first>Pankaj</first><last>Wasnik</last><affiliation>Sony Research India</affiliation></author>
-      <author><first>Rajiv</first><last>Shah</last><affiliation>Indraprastha Institute of Information Technology, Delhi</affiliation></author>
+      <author id="rajiv-shah"><first>Rajiv</first><last>Shah</last><affiliation>Indraprastha Institute of Information Technology, Delhi</affiliation></author>
       <pages>3966-3976</pages>
       <abstract>Traditional Automatic Video Dubbing (AVD) pipeline consists of three key modules, namely, Automatic Speech Recognition (ASR), Neural Machine Translation (NMT), and Text-to-Speech (TTS). Within AVD pipelines, isometric-NMT algorithms are employed to regulate the length of the synthesized output text. This is done to guarantee synchronization with respect to the alignment of video and audio subsequent to the dubbing process. Previous approaches have focused on aligning the number of characters and words in the source and target language texts of Machine Translation models. However, our approach aims to align the number of phonemes instead, as they are closely associated with speech duration. In this paper, we present the development of an isometric NMT system using Reinforcement Learning (RL), with a focus on optimizing the alignment of phoneme counts in the source and target language sentence pairs. To evaluate our models, we propose the Phoneme Count Compliance (PCC) score, which is a measure of length compliance. Our approach demonstrates a substantial improvement of approximately 36% in the PCC score compared to the state-of-the-art models when applied to English-Hindi language pairs. Moreover, we propose a student-teacher architecture within the framework of our RL approach to maintain a trade-off between the phoneme count and translation quality.</abstract>
       <url hash="4e0cff82">2024.findings-naacl.250</url>
@@ -5524,7 +5524,7 @@
       <author><first>Nikita</first><last>Moghe</last></author>
       <author><first>Patrick</first><last>Xia</last><affiliation>Microsoft</affiliation></author>
       <author><first>Jacob</first><last>Andreas</last><affiliation>Massachusetts Institute of Technology and Microsoft</affiliation></author>
-      <author><first>Jason</first><last>Eisner</last><affiliation>Microsoft and Johns Hopkins University</affiliation></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last><affiliation>Microsoft and Johns Hopkins University</affiliation></author>
       <author><first>Benjamin</first><last>Van Durme</last><affiliation>Johns Hopkins University, Johns Hopkins University, Johns Hopkins University and Microsoft</affiliation></author>
       <author><first>Harsh</first><last>Jhamtani</last><affiliation>Microsoft</affiliation></author>
       <pages>4043-4060</pages>
@@ -5583,8 +5583,8 @@
       <author><first>Yuan</first><last>Gong</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <author><first>Yoon</first><last>Kim</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <author><first>Xixin</first><last>Wu</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
-      <author><first>Helen</first><last>Meng</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
-      <author><first>James</first><last>Glass</last></author>
+      <author id="helen-meng"><first>Helen</first><last>Meng</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
       <pages>4131-4155</pages>
       <abstract>How can we perform computations over natural language representations to solve tasks that require symbolic and numeric reasoning? We propose natural language embedded programs (NLEP) as a unifying framework for addressing math/symbolic reasoning, natural language understanding, and instruction following tasks. Our approach prompts a language model to generate full Python programs that define functions over data structures which contain natural language representations of structured knowledge. A Python interpreter then executes the generated code and prints the output. Despite using a task-general prompt, we find that this approach can improve upon strong baselines across a range of different tasks including math and symbolic reasoning, text classification, question answering, and instruction following. We found that the generated programs are interpretable since they outline the exact reasoning process followed by the program interpreter.</abstract>
       <url hash="2d026d25">2024.findings-naacl.259</url>
@@ -5607,8 +5607,8 @@
       <title><fixed-case>V</fixed-case>i<fixed-case>GLUE</fixed-case>: A <fixed-case>V</fixed-case>ietnamese General Language Understanding Benchmark and Analysis of <fixed-case>V</fixed-case>ietnamese Language Models</title>
       <author><first>Minh-Nam</first><last>Tran</last></author>
       <author><first>Phu-Vinh</first><last>Nguyen</last></author>
-      <author><first>Long</first><last>Nguyen</last><affiliation>Ho Chi Minh city University of Science, Vietnam National University</affiliation></author>
-      <author><first>Dien</first><last>Dinh</last></author>
+      <author id="long-nguyen"><first>Long</first><last>Nguyen</last><affiliation>Ho Chi Minh city University of Science, Vietnam National University</affiliation></author>
+      <author id="dinh-dien"><first>Dien</first><last>Dinh</last></author>
       <pages>4174-4189</pages>
       <abstract>As the number of language models has increased, various benchmarks have been suggested to assess the proficiency of the models in natural language understanding. However, there is a lack of such a benchmark in Vietnamese due to the difficulty in accessing natural language processing datasets or the scarcity of task-specific datasets. **ViGLUE**, the proposed dataset collection, is a **Vi**etnamese **G**eneral **L**anguage **U**nderstanding **E**valuation benchmark developed using three methods: translating an existing benchmark, generating new corpora, and collecting available datasets. ViGLUE contains twelve tasks and encompasses over ten areas and subjects, enabling it to evaluate models comprehensively over a broad spectrum of aspects. Baseline models utilizing multilingual language models are also provided for all tasks in the proposed benchmarks. In addition, the study of the available Vietnamese large language models is conducted to explore the language models’ ability in the few-shot learning framework, leading to the exploration of the relationship between specific tasks and the number of shots.</abstract>
       <url hash="1bde9a76">2024.findings-naacl.261</url>
@@ -5865,13 +5865,13 @@
     <paper id="280">
       <title>Benchmarking Generation and Evaluation Capabilities of Large Language Models for Instruction Controllable Summarization</title>
       <author><first>Yixin</first><last>Liu</last><affiliation>Yale University</affiliation></author>
-      <author><first>Alexander</first><last>Fabbri</last><affiliation>SalesForce.com</affiliation></author>
+      <author id="alexander-richard-fabbri"><first>Alexander</first><last>Fabbri</last><affiliation>SalesForce.com</affiliation></author>
       <author><first>Jiawen</first><last>Chen</last></author>
       <author><first>Yilun</first><last>Zhao</last><affiliation>Yale University</affiliation></author>
       <author><first>Simeng</first><last>Han</last><affiliation>Yale University</affiliation></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>SalesForce.com and Nanyang Technological University</affiliation></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>SalesForce.com and Nanyang Technological University</affiliation></author>
       <author><first>Pengfei</first><last>Liu</last></author>
-      <author><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
       <author><first>Chien-Sheng</first><last>Wu</last><affiliation>Salesforce AI</affiliation></author>
       <author><first>Arman</first><last>Cohan</last><affiliation>Yale University and Allen Institute for Artificial Intelligence</affiliation></author>
       <pages>4481-4501</pages>
@@ -5901,7 +5901,7 @@
       <author><first>Fangxu</first><last>Yu</last></author>
       <author><first>Junjie</first><last>Guo</last><affiliation>Nanjing University</affiliation></author>
       <author><first>Zhen</first><last>Wu</last><affiliation>Nanjing University</affiliation></author>
-      <author><first>Xinyu</first><last>Dai</last><affiliation>Nanjing University</affiliation></author>
+      <author id="xinyu-dai"><first>Xinyu</first><last>Dai</last><affiliation>Nanjing University</affiliation></author>
       <pages>4521-4534</pages>
       <abstract>Emotion Recognition in Conversation (ERC) involves detecting the underlying emotion behind each utterance within a conversation. Effectively generating representations for utterances remains a significant challenge in this task. Recent works propose various models to address this issue, but they still struggle with differentiating similar emotions such as excitement and happiness. To alleviate this problem, We propose an Emotion-Anchored Contrastive Learning (EACL) framework that can generate more distinguishable utterance representations for similar emotions. To achieve this, we utilize label encodings as anchors to guide the learning of utterance representations and design an auxiliary loss to ensure the effective separation of anchors for similar emotions. Moreover, an additional adaptation process is proposed to adapt anchors to serve as effective classifiers to improve classification performance. Across extensive experiments, our proposed EACL achieves state-of-the-art emotion recognition performance and exhibits superior performance on similar emotions. Our code is available at https://github.com/Yu-Fangxu/EACL.</abstract>
       <url hash="27a140c4">2024.findings-naacl.282</url>
@@ -6104,7 +6104,7 @@
     <meta>
       <booktitle>Findings of the Association for Computational Linguistics: ACL 2024</booktitle>
       <editor><first>Lun-Wei</first><last>Ku</last><affiliation>Academia Sinica</affiliation></editor>
-      <editor><first>Andre</first><last>Martins</last><affiliation>Instituto Superior Técnico / Instituto de Telecomunicações / Unbabel</affiliation></editor>
+      <editor id="andre-f-t-martins"><first>Andre</first><last>Martins</last><affiliation>Instituto Superior Técnico / Instituto de Telecomunicações / Unbabel</affiliation></editor>
       <editor><first>Vivek</first><last>Srikumar</last><affiliation>University of Utah</affiliation></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Bangkok, Thailand</address>
@@ -6158,7 +6158,7 @@
       <title>End-to-End Emotion Semantic Parsing</title>
       <author><first>Xiaotong</first><last>Jiang</last></author>
       <author><first>Zhongqing</first><last>Wang</last><affiliation>Soochow University, China</affiliation></author>
-      <author><first>Guodong</first><last>Zhou</last><affiliation>Soochow University, China</affiliation></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last><affiliation>Soochow University, China</affiliation></author>
       <pages>37-47</pages>
       <abstract>Emotion detection is the task of automatically associating one or more emotions with a text. The emotions are experienced, targeted, and caused by different semantic constituents. Therefore, it is necessary to incorporate these semantic constituents into the process of emotion detection. In this study, we propose a new task called emotion semantic parsing which aims to parse the emotion and semantic constituents into an abstract semantic tree structure. In particular, we design an end-to-end generation model to capture the relations between emotion and all the semantic constituents, and to generate them jointly. Furthermore, we employ a task decomposition strategy to capture the semantic relation among these constituents in a more cognitive and structural way. Experimental results demonstrate the importance of the proposed task, and indicate the proposed model gives superior performance compared to other models.</abstract>
       <url hash="be47bceb">2024.findings-acl.4</url>
@@ -6211,7 +6211,7 @@
       <author><first>Bailey</first><last>Kuehl</last></author>
       <author><first>Chenhao</first><last>Tan</last><affiliation>University of Chicago</affiliation></author>
       <author><first>David</first><last>Wadden</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
-      <author><first>Lucy</first><last>Wang</last><affiliation>University of Washington and Allen Institute for Artificial Intelligence</affiliation></author>
+      <author id="lucy-lu-wang"><first>Lucy</first><last>Wang</last><affiliation>University of Washington and Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Aakanksha</first><last>Naik</last><affiliation>Allen Institute for Artificial Intelligence and National Institutes of Health</affiliation></author>
       <pages>118-132</pages>
       <abstract>Literature review requires researchers to synthesize a large amount of information and is increasingly challenging as the scientific literature expands. In this work, we investigate the potential of LLMs for producing hierarchical organizations of scientific studies to assist researchers with literature review. We define hierarchical organizations as tree structures where nodes refer to topical categories and every node is linked to the studies assigned to that category. Our naive LLM-based pipeline for hierarchy generation from a set of studies produces promising yet imperfect hierarchies, motivating us to collect CHIME, an expert-curated dataset for this task focused on biomedicine. Given the challenging and time-consuming nature of building hierarchies from scratch, we use a human-in-the-loop process in which experts correct errors (both links between categories and study assignment) in LLM-generated hierarchies. CHIME contains 2,174 LLM-generated hierarchies covering 472 topics, and expert-corrected hierarchies for a subset of 100 topics. Expert corrections allow us to quantify LLM performance, and we find that while they are quite good at generating and organizing categories, their assignment of studies to categories could be improved. We attempt to train a corrector model with human feedback which improves study assignment by 12.6 F1 points. We release our dataset and models to encourage research on developing better assistive tools for literature review.</abstract>
@@ -6224,14 +6224,14 @@
       <author><first>Hao</first><last>Li</last></author>
       <author><first>Yuping</first><last>Wu</last></author>
       <author><first>Viktor</first><last>Schlegel</last><affiliation>Imperial College London</affiliation></author>
-      <author><first>Riza</first><last>Batista-Navarro</last><affiliation>University of Manchester</affiliation></author>
+      <author id="riza-theresa-batista-navarro"><first>Riza</first><last>Batista-Navarro</last><affiliation>University of Manchester</affiliation></author>
       <author><first>Tharindu</first><last>Madusanka</last></author>
       <author><first>Iqra</first><last>Zahid</last></author>
       <author><first>Jiayan</first><last>Zeng</last></author>
       <author><first>Xiaochi</first><last>Wang</last></author>
       <author><first>Xinran</first><last>He</last></author>
       <author><first>Yizhi</first><last>Li</last><affiliation>University of Manchester and University of Sheffield</affiliation></author>
-      <author><first>Goran</first><last>Nenadic</last><affiliation>University of Manchester</affiliation></author>
+      <author id="goran-nenadic"><first>Goran</first><last>Nenadic</last><affiliation>University of Manchester</affiliation></author>
       <pages>133-150</pages>
       <abstract>With the recent advances of large language models (LLMs), it is no longer infeasible to build an automated debate system that helps people to synthesise persuasive arguments. Previous work attempted this task by integrating multiple components. In our work, we introduce an argument mining dataset that captures the end-to-end process of preparing an argumentative essay for a debate, which covers the tasks of claim and evidence identification (Task 1 ED), evidence convincingness ranking (Task 2 ECR), argumentative essay summarisation and human preference ranking (Task 3 ASR) and metric learning for automated evaluation of resulting essays, based on human feedback along argument quality dimensions (Task 4 SQE). Our dataset contains 14k examples of claims that are fully annotated with various properties supporting the aforementioned tasks. We evaluate multiple generative baselines for each of these tasks, including representative LLMs. We find, that while they show promising results on individual tasks in our benchmark, their end-to-end performance on all four tasks in succession deteriorates significantly, both in automated measures as well as in human-centred evaluation. This challenge presented by our proposed dataset motivates future research on end-to-end argument mining and summarisation. The repository of this project is available at https://github.com/HarrywillDr/ArgSum-Datatset.</abstract>
       <url hash="e6791161">2024.findings-acl.9</url>
@@ -6245,8 +6245,8 @@
       <author><first>Sarathkrishna</first><last>Swaminathan</last><affiliation>International Business Machines</affiliation></author>
       <author><first>Asaf</first><last>Yehudai</last></author>
       <author><first>Subhajit</first><last>Chaudhury</last><affiliation>International Business Machines</affiliation></author>
-      <author><first>Radu</first><last>Florian</last><affiliation>International Business Machines</affiliation></author>
-      <author><first>Ramón</first><last>Astudillo</last><affiliation>International Business Machines</affiliation></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last><affiliation>International Business Machines</affiliation></author>
+      <author id="ramon-fernandez-astudillo"><first>Ramón</first><last>Astudillo</last><affiliation>International Business Machines</affiliation></author>
       <author><first>Asim</first><last>Munawar</last><affiliation>International Business Machines</affiliation></author>
       <pages>151-162</pages>
       <abstract>Despite LLMs’ recent advancements, they still suffer from factual inconsistency and hallucination. An often-opted remedy is retrieval-augmented generation – however, there is no guarantee that the model will strictly adhere to retrieved grounding. Fundamentally, LLMs need to be aligned to be more faithful to grounding, which will require high-quality preference annotations. This paper investigates whether we can create high-quality grounded preference data for model alignment without using annotations from humans or large proprietary models. We experimented with existing entailment data and proposed approaches to generate synthetic grounded preference data, with which we train a Grounded Preference Model(GPM). We demonstrate through Proximal Policy Optimization(PPO) training of Mistral-7B-Instruct that our GPM model can successfully align powerful LLMs to generate much better grounded responses as judged by GPT4. Moreover, we show that our GPM is also a great faithfulness classifier, achieving SoTA in dialogue sub-tasks of the TRUE faithfulness Benchmark. We will release our GPM under the Apache 2.0 license.</abstract>
@@ -6291,7 +6291,7 @@
       <author><first>Zoey</first><last>Liu</last><affiliation>University of Florida</affiliation></author>
       <author><first>Nitin</first><last>Venkateswaran</last></author>
       <author><first>Eric</first><last>Le Ferrand</last><affiliation>Boston College</affiliation></author>
-      <author><first>Emily</first><last>Prud’hommeaux</last><affiliation>Boston College</affiliation></author>
+      <author id="emily-prudhommeaux"><first>Emily</first><last>Prud’hommeaux</last><affiliation>Boston College</affiliation></author>
       <pages>206-213</pages>
       <abstract>N-gram language models (LMs) are the innovation that first made large-vocabulary continuous automatic speech recognition (ASR) viable. With neural end-to-end ASR architectures, however, LMs have become an afterthought. While the effect on accuracy may be negligible for English and Mandarin, jettisoning the LM might not make sense for the world’s remaining 6000+ languages. In this paper, we investigate the role of the LM in low-resource ASR. First we ask: does using an n-gram LM in decoding in neural architectures help ASR performance? While it may seem obvious that it should, its absence in most implementations suggests otherwise. Second, we ask: when an n-gram LM is used in ASR, is there a relationship between the size of the LM and ASR accuracy? We have discovered that gut feelings on this question vary considerably, but there is little empirical work to support any particular claim. We explore these questions “in the wild” using a deliberately diverse set of 9 very small ASR corpora. The results show that: (1) decoding with an n-gram LM, regardless of its size, leads to lower word error rates; and (2) increasing the size of the LM appears to yield improvements only when the audio corpus itself is already relatively large. This suggests that collecting additional LM training text may benefit widely-spoken languages which typically have larger audio corpora. In contrast, for endangered languages where data of any kind will always be limited, efforts may be better spent collecting additional transcribed audio.</abstract>
       <url hash="cdf6fa28">2024.findings-acl.13</url>
@@ -6351,7 +6351,7 @@
       <author><first>Canwen</first><last>Xu</last></author>
       <author><first>Yichong</first><last>Xu</last><affiliation>Microsoft</affiliation></author>
       <author><first>Shuohang</first><last>Wang</last></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <author><first>Chenguang</first><last>Zhu</last><affiliation>Zoom</affiliation></author>
       <author><first>Julian</first><last>McAuley</last><affiliation>University of California, San Diego, University of California, San Diego</affiliation></author>
       <pages>283-294</pages>
@@ -6379,7 +6379,7 @@
       <author><first>Weizhi</first><last>Zhang</last></author>
       <author><first>Liancheng</first><last>Fang</last></author>
       <author><first>Zihe</first><last>Song</last></author>
-      <author><first>Philip S.</first><last>Yu</last></author>
+      <author id="philip-s-yu"><first>Philip S.</first><last>Yu</last></author>
       <author><first>Cornelia</first><last>Caragea</last></author>
       <pages>338-354</pages>
       <abstract>Existing datasets for attribute value extraction (AVE) predominantly focus on explicit attribute values while neglecting the implicit ones, lack product images, are often not publicly available, and lack an in-depth human inspection across diverse domains. To address these limitations, we present ImplicitAVE, the first, publicly available multimodal dataset for implicit attribute value extraction. ImplicitAVE, sourced from the MAVE dataset, is carefully curated and expanded to include implicit AVE and multimodality, resulting in a refined dataset of 68k training and 1.6k testing data across five domains. We also explore the application of multimodal large language models (MLLMs) to implicit AVE, establishing a comprehensive benchmark for MLLMs on the ImplicitAVE dataset. Six recent MLLMs with eleven variants are evaluated across diverse settings, revealing that implicit value extraction remains a challenging task for MLLMs. The contributions of this work include the development and release of ImplicitAVE, and the exploration and benchmarking of various MLLMs for implicit AVE, providing valuable insights and potential future research directions. Dataset and code are available at https://github.com/HenryPengZou/ImplicitAVE.</abstract>
@@ -6402,7 +6402,7 @@
     <paper id="22">
       <title><fixed-case>ASPIRE</fixed-case>: Language-Guided Data Augmentation for Improving Robustness Against Spurious Correlations</title>
       <author><first>Sreyan</first><last>Ghosh</last></author>
-      <author><first>Chandra Kiran</first><last>Evuru</last></author>
+      <author id="chandra-kiran-reddy-evuru"><first>Chandra Kiran</first><last>Evuru</last></author>
       <author><first>Sonal</first><last>Kumar</last></author>
       <author><first>Utkarsh</first><last>Tyagi</last></author>
       <author><first>S</first><last>Sakshi</last></author>
@@ -6423,7 +6423,7 @@
       <author><first>Yulong</first><last>Chen</last><affiliation>University of Cambridge</affiliation></author>
       <author><first>Lin</first><last>Ma</last><affiliation>University of Michigan - Ann Arbor</affiliation></author>
       <author><first>Yue</first><last>Zhang</last><affiliation>Westlake University</affiliation></author>
-      <author><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
       <pages>407-426</pages>
       <abstract>Tables contrast with unstructured text data by its structure to organize the information.In this paper, we investigate the efficiency of various LLMs in interpreting tabular data through different prompting strategies and data formats. Our analysis extends across six benchmarks for table-related tasks such as question-answering and fact-checking. We pioneer in the assessment of LLMs’ performance on image-based table representation. Specifically, we compare five text-based and three image-based table representations, revealing the influence of representation and prompting on LLM performance. We hope our study provides researchers insights into optimizing LLMs’ application in table-related tasks.</abstract>
       <url hash="fced95f0">2024.findings-acl.23</url>
@@ -6529,7 +6529,7 @@
       <author><first>Jiajun</first><last>Zhang</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
       <author><first>Jinliang</first><last>Lu</last><affiliation>Institute of automation, Chinese Academy of Sciences</affiliation></author>
       <author><first>Shaonan</first><last>Wang</last></author>
-      <author><first>Chengqing</first><last>Zong</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
       <pages>546-566</pages>
       <abstract>Large language models respond well in high-resource languages like English but struggle in low-resource languages. It may arise from the lack of high-quality instruction following data in these languages. Directly translating English samples into these languages can be a solution but unreliable, leading to responses with translation errors and lacking language-specific or cultural knowledge. To address this issue, we propose a novel method to construct cross-lingual instruction following samples with instruction in English and response in low-resource languages. Specifically, the language model first learns to generate appropriate English instructions according to the natural web texts in other languages as responses. The candidate cross-lingual instruction tuning samples are further refined and diversified. We have employed this method to build a large-scale cross-lingual instruction tuning dataset on 10 languages, namely X-Instruction. The instruction data built using our method incorporate more language-specific knowledge compared with the naive translation method. Experimental results have shown that the response quality of the model tuned on X-Instruction greatly exceeds the model distilled from a powerful teacher model, reaching or even surpassing the ones of ChatGPT. In addition, we find that models tuned on cross-lingual instruction following samples can follow the instruction in the output language without further tuning.</abstract>
       <url hash="5c6391f4">2024.findings-acl.30</url>
@@ -6596,10 +6596,10 @@
       <author><first>Zezhong</first><last>Wang</last></author>
       <author><first>Lingzhi</first><last>Wang</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <author><first>Hongru</first><last>Wang</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
-      <author><first>Ying</first><last>Zhang</last><affiliation>Nankai University</affiliation></author>
+      <author id="ying-zhang"><first>Ying</first><last>Zhang</last><affiliation>Nankai University</affiliation></author>
       <author><first>Kehui</first><last>Song</last></author>
       <author><first>Xuhui</first><last>Sui</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <pages>644-653</pages>
       <abstract>The success of large language models (LLM) benefits from large-scale model parameters and large amounts of pre-training data. However, the textual data for training LLM can not be confirmed to be legal because they are crawled from different web sites. For example, there are copyrighted articles, personal reviews and information in the pre-training data for LLM which are illegal. To address the above issue and develop legal LLM, we propose to detect the pre-training data from LLM in a pure black-box way because the existing LLM services only return the generated text. The previous most related works are the membership inference attack (MIA) on machine learning models to detect the training data from them. But the existing methods are based on analyzing the output probabilities of models which are unrealistic to LLM services. To tackle the problem, we firstly construct the benchmark datasets by collecting textual data from different domains as the seen and unseen pre-training data for LLMs. Then, we investigate a black-box framework named DPDLLM, with the only access to the generated texts from LLM for detecting textual data whether was used to train it. In the proposed framework, we exploit GPT-2 as the reference model to fit the textual data and feed the generated text from LLM into it to acquire sequence probabilities as the significant feature for detection. The experimental results on the benchmark datasets demonstrate that DPDLLM is effective on different popular LLMs and outperforms the existing methods.</abstract>
       <url hash="786e252a">2024.findings-acl.35</url>
@@ -6664,7 +6664,7 @@
       <title><fixed-case>C</fixed-case>ode<fixed-case>M</fixed-case>: Less Data Yields More Versatility via Ability Matrix</title>
       <author><first>Daoguang</first><last>Zan</last></author>
       <author><first>Ailun</first><last>Yu</last></author>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last></author>
+      <author><first>Wei</first><last>Liu</last></author>
       <author><first>Bo</first><last>Shen</last></author>
       <author><first>Shaoxin</first><last>Lin</last></author>
       <author><first>Yongshun</first><last>Gong</last><affiliation>Shandong University</affiliation></author>
@@ -6689,7 +6689,7 @@
       <author><first>Yi</first><last>Fung</last></author>
       <author><first>Zhenhailong</first><last>Wang</last></author>
       <author><first>Lingyu</first><last>Zhang</last><affiliation>Duke University</affiliation></author>
-      <author><first>Shih-Fu</first><last>Chang</last><affiliation>Columbia, Columbia University, Columbia University, Columbia University, Columbia University, Columbia University and Columbia University</affiliation></author>
+      <author id="shih-fu-chang"><first>Shih-Fu</first><last>Chang</last><affiliation>Columbia, Columbia University, Columbia University, Columbia University, Columbia University, Columbia University and Columbia University</affiliation></author>
       <author><first>Heng</first><last>Ji</last><affiliation>University of Illinois, Urbana-Champaign</affiliation></author>
       <pages>730-749</pages>
       <abstract>Advances in large vision-language models (LVLMs) have led to significant progress in generating natural language descriptions for visual contents. These powerful models are known for producing texts that are factually inconsistent with the visual input. While some efforts mitigate such inconsistencies in natural image captioning, the factuality of generated captions for structured visuals, such as charts, has not received as much scrutiny. This work introduces a comprehensive typology of factual errors in generated chart captions. A large-scale human annotation effort provides insight into the error patterns in captions generated by various models, ultimately forming the foundation of a dataset, CHOCOLATE. Our analysis reveals that even advanced models like GPT-4V frequently produce captions laced with factual inaccuracies. To combat this, we establish the task of Chart Caption Factual Error Correction and introduce CHARTVE, a visual entailment model that outperforms current LVLMs in evaluating caption factuality. Furthermore, we propose C2TFEC, an interpretable two-stage framework that excels at correcting factual errors. This work inaugurates a new domain in factual error correction for chart captions, presenting a novel evaluation metric, and demonstrating an effective approach to ensuring the factuality of generated chart captions. The code and data as well as the continuously updated benchmark can be found at: https://khuangaf.github.io/CHOCOLATE/.</abstract>
@@ -6756,7 +6756,7 @@
     <paper id="46">
       <title><fixed-case>MELOV</fixed-case>: Multimodal Entity Linking with Optimized Visual Features in Latent Space</title>
       <author><first>Xuhui</first><last>Sui</last></author>
-      <author><first>Ying</first><last>Zhang</last><affiliation>Nankai University</affiliation></author>
+      <author id="ying-zhang"><first>Ying</first><last>Zhang</last><affiliation>Nankai University</affiliation></author>
       <author><first>Yu</first><last>Zhao</last><affiliation>Nankai University</affiliation></author>
       <author><first>Kehui</first><last>Song</last></author>
       <author><first>Baohang</first><last>Zhou</last><affiliation>Nankai University</affiliation></author>
@@ -6821,7 +6821,7 @@
       <author><first>Chong</first><last>Yang</last><affiliation>ByteDance Inc.</affiliation></author>
       <author><first>Tu</first><last>Hu</last></author>
       <author><first>Xinhao</first><last>Chen</last></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <author><first>Li</first><last>Cai</last><affiliation>Guizhou University</affiliation></author>
       <author><first>Xinlin</first><last>Zhuang</last></author>
       <author><first>Xuan</first><last>Lin</last><affiliation>Ant Group</affiliation></author>
@@ -6905,7 +6905,7 @@
       <author><first>Qingwei</first><last>Lin</last><affiliation>Microsoft Research</affiliation></author>
       <author><first>Victor</first><last>Rühle</last><affiliation>Microsoft</affiliation></author>
       <author><first>Yuqing</first><last>Yang</last><affiliation>Research, Microsoft</affiliation></author>
-      <author><first>Chin-Yew</first><last>Lin</last><affiliation>Microsoft</affiliation></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last><affiliation>Microsoft</affiliation></author>
       <author><first>H. Vicky</first><last>Zhao</last><affiliation>Tsinghua University, Tsinghua University</affiliation></author>
       <author><first>Lili</first><last>Qiu</last><affiliation>Microsoft</affiliation></author>
       <author><first>Dongmei</first><last>Zhang</last><affiliation>Microsoft and Microsoft</affiliation></author>
@@ -6999,7 +6999,7 @@
       <author><first>Peiyi</first><last>Wang</last></author>
       <author><first>Xiangdi</first><last>Meng</last></author>
       <author><first>Tianyu</first><last>Liu</last></author>
-      <author><first>Baobao</first><last>Chang</last><affiliation>Peking University</affiliation></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last><affiliation>Peking University</affiliation></author>
       <pages>1086-1104</pages>
       <abstract>We present PCA-Bench, a multimodal decision-making benchmark for evaluating the integrated capabilities of Multimodal Large Language Models (MLLMs). Departing from previous benchmarks focusing on simplistic tasks and individual model capability, PCA-Bench introduces three complex scenarios: autonomous driving, domestic robotics, and open-world games. Given task instructions and diverse contexts, the model is required to seamlessly integrate multiple capabilities of Perception, Cognition, and Action in a reasoning chain to make accurate decisions. Moreover, PCA-Bench features error localization capabilities, scrutinizing model inaccuracies in areas such as perception, knowledge, or reasoning. This enhances the reliability of deploying MLLMs. To balance accuracy and efficiency in evaluation, we propose PCA-Eval, an automatic evaluation protocol, and assess 10 prevalent MLLMs. The results reveal significant performance disparities between open-source models and powerful proprietary models like GPT-4 Vision. To address this, we introduce Embodied-Instruction-Evolution (EIE), an automatic framework for synthesizing instruction tuning examples in multimodal embodied environments. EIE generates 7,510 training examples in PCA-Bench and enhances the performance of open-source MLLMs, occasionally surpassing GPT-4 Vision (+3% in decision accuracy), thereby validating the effectiveness of EIE. Our findings suggest that robust MLLMs like GPT4-Vision show promise for decision-making in embodied agents, opening new avenues for MLLM research. All benchmark data and evaluation code are made public.</abstract>
       <url hash="7bcb6477">2024.findings-acl.64</url>
@@ -7038,7 +7038,7 @@
       <title>Modelling Variability in Human Annotator Simulation</title>
       <author><first>Wen</first><last>Wu</last></author>
       <author><first>Wenlin</first><last>Chen</last><affiliation>University of Cambridge and Max Planck Institute for Intelligent Systems</affiliation></author>
-      <author><first>Chao</first><last>Zhang</last><affiliation>Tsinghua University and University College London</affiliation></author>
+      <author id="chao-zhang-tu"><first>Chao</first><last>Zhang</last><affiliation>Tsinghua University and University College London</affiliation></author>
       <author><first>Phil</first><last>Woodland</last><affiliation>University of Cambridge</affiliation></author>
       <pages>1139-1157</pages>
       <abstract>Human annotator simulation (HAS) serves as a cost-effective substitute for human evaluation tasks such as data annotation and system assessment. It is important to incorporate the variability present in human evaluation into HAS, since it helps capture diverse subjective interpretations and mitigate potential biases and over-representation. This work introduces a novel framework for modelling variability in HAS. Conditional softmax flow (S-CNF) is proposed to model the distribution of subjective human annotations, which leverages diverse human annotations via meta-learning. This enables efficient generation of annotations that exhibit human variability for unlabelled input. In addition, a wide range of evaluation metrics are adopted to assess the capability and efficiency of HAS systems in predicting the aggregated behaviours of human annotators, matching the distribution of human annotations, and simulating the inter-annotator disagreements. Results demonstrate that the proposed method achieves state-of-the-art performance on two real-world human evaluation tasks: emotion recognition and toxic speech detection.</abstract>
@@ -7053,7 +7053,7 @@
       <author><first>Minhajur</first><last>Mahim</last></author>
       <author><first>Rifki</first><last>Putri</last><affiliation>Korea Advanced Institute of Science &amp; Technology</affiliation></author>
       <author><first>James</first><last>Thorne</last><affiliation>KAIST</affiliation></author>
-      <author><first>Alice</first><last>Oh</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
       <pages>1158-1177</pages>
       <abstract>In this study, we introduce BEnQA, a dataset comprising parallel Bengali and English exam questions for middle and high school levels in Bangladesh. Our dataset consists of approximately 5K questions covering several subjects in science with different types of questions, including factual, application, and reasoning-based questions. We benchmark several Large Language Models (LLMs) with our parallel dataset and observe a notable performance disparity between the models in Bengali and English. We also investigate some prompting methods, and find that Chain-of-Thought prompting is beneficial mostly on reasoning questions, but not so much on factual ones. We also find that appending English translation helps to answer questions in Bengali. Our findings point to promising future research directions for improving the performance of LLMs in Bengali and more generally in low-resource languages.</abstract>
       <url hash="a07748dd">2024.findings-acl.68</url>
@@ -7065,7 +7065,7 @@
       <author><first>Wanqing</first><last>Cui</last></author>
       <author><first>Keping</first><last>Bi</last><affiliation>Chinese Academy of Sciences</affiliation></author>
       <author><first>Jiafeng</first><last>Guo</last><affiliation>Institute of Computing Technolgy, Chinese Academy of Sciences</affiliation></author>
-      <author><first>Xueqi</first><last>Cheng</last><affiliation>, Chinese Academy of Sciences</affiliation></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last><affiliation>, Chinese Academy of Sciences</affiliation></author>
       <pages>1178-1192</pages>
       <url hash="e6ef9f5d">2024.findings-acl.69</url>
       <bibkey>cui-etal-2024-multi</bibkey>
@@ -7217,9 +7217,9 @@
     <paper id="80">
       <title>Unveiling the Achilles’ Heel of <fixed-case>NLG</fixed-case> Evaluators: A Unified Adversarial Framework Driven by Large Language Models</title>
       <author><first>Yiming</first><last>Chen</last><affiliation>national university of singaore, National University of Singapore</affiliation></author>
-      <author id="chen-zhang"><first>Chen</first><last>Zhang</last><affiliation>National University of Singapore</affiliation></author>
+      <author><first>Chen</first><last>Zhang</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Danqing</first><last>Luo</last><affiliation>National University of Singapore</affiliation></author>
-      <author><first>Luis Fernando</first><last>D’Haro</last><affiliation>Universidad Politécnica de Madrid</affiliation></author>
+      <author id="luis-fernando-dharo"><first>Luis Fernando</first><last>D’Haro</last><affiliation>Universidad Politécnica de Madrid</affiliation></author>
       <author><first>Robby</first><last>Tan</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Haizhou</first><last>Li</last><affiliation>The Chinese University of Hong Kong (Shenzhen); National University of Singapore and National University of Singapore</affiliation></author>
       <pages>1359-1375</pages>
@@ -7334,7 +7334,7 @@
       <title><fixed-case>CHARP</fixed-case>: Conversation History <fixed-case>A</fixed-case>wa<fixed-case>R</fixed-case>eness Probing for Knowledge-grounded Dialogue Systems</title>
       <author><first>Abbas</first><last>Ghaddar</last><affiliation>Huawei Technologies Ltd.</affiliation></author>
       <author><first>David</first><last>Alfonso-Hermelo</last><affiliation>Huawei Technologies Ltd.</affiliation></author>
-      <author><first>Philippe</first><last>Langlais</last><affiliation>Université de Montréal</affiliation></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last><affiliation>Université de Montréal</affiliation></author>
       <author><first>Mehdi</first><last>Rezagholizadeh</last></author>
       <author><first>Boxing</first><last>Chen</last><affiliation>Huawei Technologies Ltd.</affiliation></author>
       <author><first>Prasanna</first><last>Parthasarathi</last><affiliation>Huawei Technologies Ltd.</affiliation></author>
@@ -7443,8 +7443,8 @@
       <author><first>Guizhen</first><last>Chen</last></author>
       <author><first>Wenhan</first><last>Xia</last></author>
       <author><first>Junjie</first><last>Hu</last><affiliation>University of Wisconsin, Madison</affiliation></author>
-      <author><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>SalesForce.com and Nanyang Technological University</affiliation></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>SalesForce.com and Nanyang Technological University</affiliation></author>
       <pages>1679-1705</pages>
       <abstract>In the rapidly evolving field of large language models (LLMs), data augmentation (DA) has emerged as a pivotal technique for enhancing model performance by diversifying training examples without the need for additional data collection. This survey explores the transformative impact of LLMs on DA, particularly addressing the unique challenges and opportunities they present in the context of natural language processing (NLP) and beyond. From both data and learning perspectives, we examine various strategies that utilize LLMs for data augmentation, including a novel exploration of learning paradigms where LLM-generated data is used for diverse forms of further training. Additionally, this paper highlights the primary open challenges faced in this domain, ranging from controllable data augmentation to multi-modal data augmentation. This survey highlights a paradigm shift introduced by LLMs in DA, and aims to serve as a comprehensive guide for researchers and practitioners.</abstract>
       <url hash="fd0fa258">2024.findings-acl.97</url>
@@ -7500,7 +7500,7 @@
     <paper id="102">
       <title><fixed-case>UNIWIZ</fixed-case>: A Unified Large Language Model Orchestrated Wizard for Safe Knowledge Grounded Conversations</title>
       <author><first>Souvik</first><last>Das</last><affiliation>State University of New York at Buffalo</affiliation></author>
-      <author><first>Rohini</first><last>Srihari</last><affiliation>State University of New York at Buffalo</affiliation></author>
+      <author id="rohini-k-srihari"><first>Rohini</first><last>Srihari</last><affiliation>State University of New York at Buffalo</affiliation></author>
       <pages>1749-1762</pages>
       <abstract>Large Language Models (LLMs) have made significant progress in integrating safety and knowledge alignment. However, adversarial actors can manipulate these models into generating unsafe responses, and excessive safety alignment can lead to unintended hallucinations. To address these challenges, we introduce UniWiz, a novel 2-step data orchestration framework that unifies safety and knowledge data generation. We propose a “safety-priming” method to generate synthetic safety data and overcome safety bottlenecks. We also inject relevant knowledge into conversations by retrieving factual information from curated sources. UniWiz dataset consists of 17,638 quality-controlled conversations and 10,000 augmented preference data. Pretrained models fine-tuned on UniWiz show improvements across various metrics and outperform state-of-the-art instruction-tuned models trained on much larger datasets.</abstract>
       <url hash="aa5fcb00">2024.findings-acl.102</url>
@@ -7598,7 +7598,7 @@
       <author><first>Avrajit</first><last>Ghosh</last></author>
       <author><first>Bidhan</first><last>Bashyal</last></author>
       <author><first>Rongrong</first><last>Wang</last><affiliation>Michigan State University</affiliation></author>
-      <author><first>Kristen</first><last>Johnson</last><affiliation>Michigan State University</affiliation></author>
+      <author id="kristen-johnson"><first>Kristen</first><last>Johnson</last><affiliation>Michigan State University</affiliation></author>
       <pages>1843-1856</pages>
       <abstract>While task-agnostic debiasing provides notable generalizability and reduced reliance on downstream data, its impact on language modeling ability and the risk of relearning social biases from downstream task-specific data remain as the two most significant challenges when debiasing Pretrained Language Models (PLMs). The impact on language modeling ability can be alleviated given a high-quality and long-contextualized debiasing corpus, but there remains a deficiency in understanding the specifics of relearning biases. We empirically ascertain that the effectiveness of task-agnostic debiasing hinges on the quantitative bias level of both the task-specific data used for downstream applications and the debiased model. We empirically show that the lower bound of the bias level of the downstream fine-tuned model can be approximated by the bias level of the debiased model, in most practical cases. To gain more in-depth understanding about how the parameters of PLMs change during fine-tuning due to the forgetting issue of PLMs, we propose a novel framework which can Propagate Socially-fair Debiasing to Downstream Fine-tuning, ProSocialTuning. Our proposed framework can push the fine-tuned model to approach the bias lower bound during downstream fine-tuning, indicating that the ineffectiveness of debiasing can be alleviated by overcoming the forgetting issue through regularizing successfully debiased attention heads based on the PLMs’ bias levels from stages of pretraining and debiasing.</abstract>
       <url hash="06cfe14f">2024.findings-acl.109</url>
@@ -7667,7 +7667,7 @@
       <author><first>Jun</first><last>Zhou</last><affiliation>Wuhan University</affiliation></author>
       <author><first>Fei</first><last>Li</last><affiliation>Wuhan University</affiliation></author>
       <author><first>Chong</first><last>Teng</last></author>
-      <author><first>Donghong</first><last>Ji</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
       <pages>1913-1927</pages>
       <abstract>Document-level event extraction aims to extract structured event information from unstructured text. However, a single document often contains limited event information and the roles of different event arguments may be biased due to the influence of the information source.This paper addresses the limitations of traditional document-level event extraction by proposing the task of cross-document event extraction (CDEE) to integrate event information from multiple documents and provide a comprehensive perspective on events. We construct a novel cross-document event extraction dataset, namely CLES, which contains 20,059 documents and 37,688 mention-level events, where over 70% of them are cross-document. To address the task, we propose a CDEE pipeline that includes 5 steps, namely event extraction, coreference resolution, entity normalization, role normalization and entity-role resolution. Our CDEE pipeline achieves about 72% F1 in end-to-end cross-document event extraction, suggesting the challenge of this task and setting up a benchmark for future research. Our work builds a new line of information extraction research and will attract new research attention.</abstract>
       <url hash="f172e2a7">2024.findings-acl.114</url>
@@ -7780,7 +7780,7 @@
       <author><first>Meina</first><last>Song</last><affiliation>Beijing University of Posts and Telecommunications</affiliation></author>
       <author><first>Wei</first><last>Lin</last></author>
       <author><first>Yifan</first><last>Zhu</last><affiliation>Beijing University of Posts and Telecommunications</affiliation></author>
-      <author><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
       <pages>2039-2056</pages>
       <abstract>Knowledge Base Question Answering (KBQA) aims to answer natural language questions over large-scale knowledge bases (KBs), which can be summarized into two crucial steps: knowledge retrieval and semantic parsing. However, three core challenges remain: inefficient knowledge retrieval, mistakes of retrieval adversely impacting semantic parsing, and the complexity of previous KBQA methods. To tackle these challenges, we introduce ChatKBQA, a novel and simple generate-then-retrieve KBQA framework, which proposes first generating the logical form with fine-tuned LLMs, then retrieving and replacing entities and relations with an unsupervised retrieval method, to improve both generation and retrieval more directly. Experimental results show that ChatKBQA achieves new state-of-the-art performance on standard KBQA datasets, WebQSP, and CWQ. This work can also be regarded as a new paradigm for combining LLMs with knowledge graphs (KGs) for interpretable and knowledge-required question answering.</abstract>
       <url hash="14c787ff">2024.findings-acl.122</url>
@@ -7970,7 +7970,7 @@
       <author><first>Hyunwoong</first><last>Ko</last></author>
       <author><first>Jaehyung</first><last>Seo</last></author>
       <author><first>Seungyoon</first><last>Lee</last><affiliation>Korea University</affiliation></author>
-      <author><first>Heuiseok</first><last>Lim</last><affiliation>Korea University</affiliation></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last><affiliation>Korea University</affiliation></author>
       <pages>2287-2303</pages>
       <abstract>Byte Pair Encoding is an effective approach in machine translation across several languages. However, our analysis indicates that BPE is prone to over-segmentation in the morphologically rich language, Korean, which can erode word semantics and lead to semantic confusion during training. This semantic confusion, stemming from over-segmentation, ultimately contributes to a degradation of overall translation quality. To address this issue, we introduce Length-aware Subword Vocabulary Construction (LeVoC), a novel approach strategically incorporating longer words into the vocabulary. By utilizing an external monolingual Korean corpus, LeVoC extracts and integrates long words, effectively preserving morphological information and reducing semantic confusion. Our experiments demonstrate that LeVoC not only significantly outperforms BPE, but also can be applied to and surpass current state-of-the-art morpheme-aware subword tokenization methods. We provide evidence that the difficulty in translating sentences with long words in Korean is associated with morphological compositionality, and LeVoC’s ability to reduce semantic confusion during training leads to improved translation quality.</abstract>
       <url hash="54233cfa">2024.findings-acl.135</url>
@@ -8045,7 +8045,7 @@
       <author><first>Kun</first><last>Wu</last></author>
       <author><first>Siming</first><last>Chen</last><affiliation>Fudan University</affiliation></author>
       <author><first>Jiebo</first><last>Luo</last><affiliation>University of Rochester and University of Rochester</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Zhongyu</first><last>Wei</last><affiliation>Fudan University</affiliation></author>
       <pages>2366-2389</pages>
       <abstract>The growth of social media, characterized by its multimodal nature, has led to the emergence of diverse phenomena and challenges, which calls for an effective approach to uniformly solve automated tasks. The powerful Large Vision Language Models make it possible to handle a variety of tasks simultaneously, but even with carefully designed prompting methods, the general domain models often fall short in aligning with the unique speaking style and context of social media tasks. In this paper, we introduce a Large Vision Language Model for Social Media Processing (SoMeLVLM), which is a cognitive framework equipped with five key capabilities including knowledge &amp; comprehension, application, analysis, evaluation, and creation. SoMeLVLM is designed to understand and generate realistic social media behavior. We have developed a 654k multimodal social media instruction-tuning dataset to support our cognitive framework and fine-tune our model. Our experiments demonstrate that SoMeLVLM achieves state-of-the-art performance in multiple social media tasks. Further analysis shows its significant advantages over baselines in terms of cognitive abilities.</abstract>
@@ -8060,7 +8060,7 @@
       <author><first>Chanjun</first><last>Park</last><affiliation>Upstage</affiliation></author>
       <author><first>SeongTae</first><last>Hong</last><affiliation>Korea University</affiliation></author>
       <author><first>Seungjun</first><last>Lee</last><affiliation>Korea University</affiliation></author>
-      <author><first>Heuiseok</first><last>Lim</last><affiliation>Korea University</affiliation></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last><affiliation>Korea University</affiliation></author>
       <pages>2390-2415</pages>
       <abstract>The evolution of large language models (LLMs) has culminated in a multitask model paradigm where prompts drive the generation of user-specific outputs. However, this advancement has revealed a critical challenge: LLMs frequently produce outputs against socially acceptable commonsense standards in various scenarios. To address this gap in commonsense reasoning, we present KoCommonGEN v2, a fine-grained benchmark dataset focused on Korean commonsense reasoning. This dataset, enriched with human annotations, comprises multiple-choice questions across seven error categories. These categories include commonsense memorization, numerical commonsense, toxic speech, and more, which are vulnerable to undermining the reliability of LLMs’ commonsense reasoning capabilities. The empirical results present that LLMs struggle with Korean commonsense reasoning. With human accuracy benchmarked at approximately 85%, GPT-4’s performance lags at about 74%, and other LLMs demonstrate an average accuracy of around 42%. Our findings emphasize the need for targeted improvements in Korean commonsense reasoning within LLMs, paving the way for more socially and contextually sensitive AI models.</abstract>
       <url hash="e7e87161">2024.findings-acl.141</url>
@@ -8098,7 +8098,7 @@
     <paper id="144">
       <title>Integrating Physician Diagnostic Logic into Large Language Models: Preference Learning from Process Feedback</title>
       <author><first>Chengfeng</first><last>Dou</last></author>
-      <author><first>Ying</first><last>Zhang</last></author>
+      <author id="ying-zhang"><first>Ying</first><last>Zhang</last></author>
       <author><first>Zhi</first><last>Jin</last><affiliation>Peking University and Peking University</affiliation></author>
       <author><first>Wenpin</first><last>Jiao</last><affiliation>Peking University</affiliation></author>
       <author><first>Haiyan</first><last>Zhao</last><affiliation>Peking University</affiliation></author>
@@ -8147,7 +8147,7 @@
       <author><first>Abinew</first><last>Ayele</last><affiliation>Bahir Dar University, Universität Hamburg</affiliation></author>
       <author><first>Pavan</first><last>Baswani</last></author>
       <author><first>Meriem</first><last>Beloucif</last><affiliation>Uppsala University</affiliation></author>
-      <author><first>Chris</first><last>Biemann</last><affiliation>U Hamburg</affiliation></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last><affiliation>U Hamburg</affiliation></author>
       <author><first>Sofia</first><last>Bourhim</last></author>
       <author><first>Christine</first><last>Kock</last><affiliation>University of Melbourne</affiliation></author>
       <author><first>Genet</first><last>Dekebo</last></author>
@@ -8155,14 +8155,14 @@
       <author><first>Gopichand</first><last>Kanumolu</last></author>
       <author><first>Lokesh</first><last>Madasu</last></author>
       <author><first>Samuel</first><last>Rutunda</last></author>
-      <author><first>Manish</first><last>Shrivastava</last><affiliation>International Institute of Information Technology Hyderabad, India</affiliation></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last><affiliation>International Institute of Information Technology Hyderabad, India</affiliation></author>
       <author><first>Thamar</first><last>Solorio</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and University of Houston</affiliation></author>
       <author><first>Nirmal</first><last>Surange</last><affiliation>International Institute of Information Technology Hyderabad</affiliation></author>
       <author><first>Hailegnaw</first><last>Tilaye</last><affiliation>Kotebe University of Education</affiliation></author>
       <author><first>Krishnapriya</first><last>Vishnubhotla</last></author>
-      <author><first>Genta</first><last>Winata</last><affiliation>Capital One AI Foundations</affiliation></author>
+      <author id="genta-indra-winata"><first>Genta</first><last>Winata</last><affiliation>Capital One AI Foundations</affiliation></author>
       <author><first>Seid</first><last>Yimam</last><affiliation>Universität Hamburg</affiliation></author>
-      <author><first>Saif</first><last>Mohammad</last><affiliation>National Research Council Canada</affiliation></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last><affiliation>National Research Council Canada</affiliation></author>
       <pages>2512-2530</pages>
       <abstract>Exploring and quantifying semantic relatedness is central to representing language and holds significant implications across various NLP tasks. While earlier NLP research primarily focused on semantic similarity, often within the English language context, we instead investigate the broader phenomenon of semantic relatedness. In this paper, we present <i>SemRel</i>, a new semantic relatedness dataset collection annotated by native speakers across 13 languages: <i>Afrikaans, Algerian Arabic, Amharic, English, Hausa, Hindi, Indonesian, Kinyarwanda, Marathi, Moroccan Arabic, Modern Standard Arabic, Spanish,</i> and <i>Telugu</i>. These languages originate from five distinct language families and are predominantly spoken in Africa and Asia – regions characterised by a relatively limited availability of NLP resources. Each instance in the SemRel datasets is a sentence pair associated with a score that represents the degree of semantic textual relatedness between the two sentences. The scores are obtained using a comparative annotation framework. We describe the data collection and annotation processes, challenges when building the datasets, baseline experiments, and their impact and utility in NLP.</abstract>
       <url hash="ea378668">2024.findings-acl.147</url>
@@ -8195,7 +8195,7 @@
       <author><first>Krishnapriya</first><last>Vishnubhotla</last></author>
       <author><first>Adam</first><last>Hammond</last><affiliation>University of Toronto</affiliation></author>
       <author><first>Graeme</first><last>Hirst</last><affiliation>University of Toronto</affiliation></author>
-      <author><first>Saif</first><last>Mohammad</last><affiliation>National Research Council Canada</affiliation></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last><affiliation>National Research Council Canada</affiliation></author>
       <pages>2557-2574</pages>
       <abstract>Stories are rich in the emotions they exhibit in their narratives and evoke in the readers. The emotional journeys of the various characters within a story are central to their appeal. Computational analysis of the emotions of novels, however, has rarely examined the variation in the emotional trajectories of the different characters within them, instead considering the entire novel to represent a single story arc. In this work, we use character dialogue to distinguish between the emotion arcs of the narration and the various characters. We analyze the emotion arcs of the various characters in a dataset of English literary novels using the framework of Utterance Emotion Dynamics. Our findings show that the narration and the dialogue largely express disparate emotions through the course of a novel, and that the commonalities or differences in the emotional arcs of stories are more accurately captured by those associated with individual characters.</abstract>
       <url hash="5bf5ef73">2024.findings-acl.150</url>
@@ -8215,7 +8215,7 @@
     <paper id="152">
       <title>Dictionary-Aided Translation for Handling Multi-Word Expressions in Low-Resource Languages</title>
       <author><first>Antonios</first><last>Dimakis</last><affiliation>University of Athens</affiliation></author>
-      <author><first>Stella</first><last>Markantonatou</last></author>
+      <author id="stella-markantonatou"><first>Stella</first><last>Markantonatou</last></author>
       <author><first>Antonios</first><last>Anastasopoulos</last><affiliation>Athena Research Center and George Mason University</affiliation></author>
       <pages>2588-2595</pages>
       <abstract>Multi-word expressions (MWEs) present unique challenges in natural language processing (NLP), particularly within the context of translation systems, due to their inherent scarcity, non-compositional nature, and other distinct lexical and morphosyntactic characteristics, issues that are exacerbated in low-resource settings.In this study, we elucidate and attempt to address these challenges by leveraging a substantial corpus of human-annotated Greek MWEs. To address the complexity of translating such phrases, we propose a novel method leveraging an available out-of-context lexicon.We assess the translation capabilities of current state-of-the-art systems on this task, employing both automated metrics and human evaluators.We find that by using our method when applicable, the performance of current systems can be significantly improved, however these models are still unable to produce translations comparable to those of a human speaker.</abstract>
@@ -8255,7 +8255,7 @@
       <author><first>Herun</first><last>Wan</last></author>
       <author><first>Shangbin</first><last>Feng</last><affiliation>University of Washington</affiliation></author>
       <author><first>Zhaoxuan</first><last>Tan</last><affiliation>University of Notre Dame</affiliation></author>
-      <author id="heng-wang"><first>Heng</first><last>Wang</last></author>
+      <author><first>Heng</first><last>Wang</last></author>
       <author><first>Yulia</first><last>Tsvetkov</last><affiliation>Department of Computer Science, University of Washington</affiliation></author>
       <author><first>Minnan</first><last>Luo</last><affiliation>Xi’an Jiaotong University</affiliation></author>
       <pages>2637-2667</pages>
@@ -8287,7 +8287,7 @@
       <author><first>Hongyin</first><last>Luo</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <author><first>Yada</first><last>Zhu</last><affiliation>IBM Research</affiliation></author>
       <author><first>Jacob</first><last>Hansen</last></author>
-      <author><first>James</first><last>Glass</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
       <author><first>David</first><last>Cox</last><affiliation>International Business Machines</affiliation></author>
       <author><first>Alan</first><last>Ritter</last><affiliation>Georgia Institute of Technology</affiliation></author>
       <author><first>Rogerio</first><last>Feris</last><affiliation>International Business Machines</affiliation></author>
@@ -8431,8 +8431,8 @@
       <author><first>Fatemeh</first><last>Shiri</last></author>
       <author><first>Dinh</first><last>Phung</last></author>
       <author><first>Yuan-Fang</first><last>Li</last></author>
-      <author><first>Thuy-Trang</first><last>Vu</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="thuy-vu"><first>Thuy-Trang</first><last>Vu</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>2862-2883</pages>
       <abstract>Large language models (LLMs) have demonstrated strong reasoning abilities when prompted to generate chain-of-thought (CoT) explanations alongside answers. However, previous research on evaluating LLMs has solely focused on answer accuracy, neglecting the correctness of the generated CoT. In this paper, we delve deeper into the CoT reasoning capabilities of LLMs in multi-hop question answering by utilizing knowledge graphs (KGs). We propose a novel discriminative and generative CoT evaluation paradigm to assess LLMs’ knowledge of reasoning and the accuracy of the generated CoT. Through experiments conducted on 5 different families of LLMs across 2 multi-hop question-answering datasets, we find that LLMs possess sufficient knowledge to perform reasoning. However, there exists a significant disparity between answer accuracy and faithfulness of the CoT generated by LLMs, indicating that they often arrive at correct answers through incorrect reasoning.</abstract>
       <url hash="cdd88d2f">2024.findings-acl.168</url>
@@ -8444,7 +8444,7 @@
       <author><first>Longyin</first><last>Zhang</last><affiliation>A*STAR</affiliation></author>
       <author><first>Bowei</first><last>Zou</last><affiliation>A*STAR</affiliation></author>
       <author><first>Jacintha</first><last>Yi</last></author>
-      <author><first>AiTi</first><last>Aw</last><affiliation>I2R</affiliation></author>
+      <author id="aiti-aw"><first>AiTi</first><last>Aw</last><affiliation>I2R</affiliation></author>
       <pages>2884-2896</pages>
       <abstract>Real-world news comments pose a significant challenge due to their noisy and ambiguous nature, which complicates their modeling for clustering and summarization tasks. Most previous research has predominantly focused on extractive summarization methods within specific constraints. This paper concentrates on Clustering and Abstractive Summarization of online news Comments (CASC). First, we introduce an enhanced fast clustering algorithm that maintains a dynamic similarity threshold to ensure the high density of each comment cluster being built. Moreover, we pioneer the exploration of tuning Large Language Models (LLMs) through a chain-of-thought strategy to generate summaries for each comment cluster. On the other hand, a notable challenge in CASC research is the scarcity of evaluation data. To address this problem, we design an annotation scheme and contribute a manual test suite tailored for CASC. Experimental results on the test suite demonstrate the effectiveness of our improvements to the baseline methods. In addition, the quantitative and qualitative analyses illustrate the adaptability of our approach to real-world news comment scenarios.</abstract>
       <url hash="3fbfd581">2024.findings-acl.169</url>
@@ -8543,7 +8543,7 @@
       <title>Towards Precise Localization of Critical Errors in Machine Translation</title>
       <author><first>Dahyun</first><last>Jung</last><affiliation>Korea University</affiliation></author>
       <author><first>Sugyeong</first><last>Eo</last><affiliation>Korea University</affiliation></author>
-      <author><first>Heuiseok</first><last>Lim</last><affiliation>Korea University</affiliation></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last><affiliation>Korea University</affiliation></author>
       <pages>3000-3012</pages>
       <abstract>The advent of large language models has experienced a remarkable improvement in the field of machine translation. However, machine translation is still vulnerable to critical meaning deviations, which may incur catastrophic issues in social or ethical contexts. In particular, existing critical error detection primarily focuses on identifying sentence-level errors, leaving the precise localization of such errors within the sentence unaddressed. In this paper, we introduce a new task, word-level critical error detection (WCED), to detect critical errors at a fine-grained level in machine translation sentences. The task aims to identify the parts of a machine translation that contain catastrophic meaning distortions. We hypothesize that the ability to determine errors at the sentence level will positively influence the detection of more granular errors. We propose a sentence-level error detection module to predict which words in a sentence have critical errors. Experimental results demonstrate that our method outperforms existing methodologies and LLM in En-De, Zh-En, En-Ru, and En-Ko. Our method is helpful for determining the fine-grained location of errors. We hope that such studies will improve the capacity to address critical errors adeptly.</abstract>
       <url hash="54afef71">2024.findings-acl.177</url>
@@ -8610,7 +8610,7 @@
       <title>Transition-based Opinion Generation for Aspect-based Sentiment Analysis</title>
       <author><first>Tianlai</first><last>Ma</last></author>
       <author><first>Zhongqing</first><last>Wang</last><affiliation>Soochow University, China</affiliation></author>
-      <author><first>Guodong</first><last>Zhou</last><affiliation>Soochow University, China</affiliation></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last><affiliation>Soochow University, China</affiliation></author>
       <pages>3078-3087</pages>
       <abstract>Recently, the use of pre-trained generation models for extracting sentiment elements has resulted in significant advancements in aspect-based sentiment analysis benchmarks. However, these approaches often overlook the importance of explicitly modeling structure among sentiment elements. To address this limitation, we present a study that aims to integrate general pre-trained sequence-to-sequence language models with a structure-aware transition-based approach. Therefore, we propose a transition system for opinion tree generation, designed to better exploit pre-trained language models for structured fine-tuning. Our proposed transition system ensures the structural integrity of the generated opinion tree. By leveraging pre-trained generation models and simplifying the transition set, we are able to maximize the accuracy of opinion tree generation. Extensive experiments show that our model significantly advances the state-of-the-art performance on several benchmark datasets. In addition, the empirical studies also indicate that the proposed opinion tree generation with transition system is more effective in capturing the sentiment structure than other generation models.</abstract>
       <url hash="8995bd6e">2024.findings-acl.182</url>
@@ -8623,7 +8623,7 @@
       <author><first>Xinshuai</first><last>Dong</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Liangming</first><last>Pan</last></author>
       <author><first>Thong</first><last>Nguyen</last></author>
-      <author><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
       <pages>3088-3105</pages>
       <abstract>Dynamic topic models track the evolution of topics in sequential documents, which have derived various applications like trend analysis. However, existing models suffer from repetitive topic and unassociated topic issues, failing to reveal the evolution and hindering further applications. To address these issues, we break the tradition of simply chaining topics in existing work and propose a novel neural Chain-Free Dynamic Topic Model. We introduce a new evolution-tracking contrastive learning method that builds the similarity relations among dynamic topics. This not only tracks topic evolution but also maintains topic diversity, mitigating the repetitive topic issue. To avoid unassociated topics, we further present an unassociated word exclusion method that consistently excludes unassociated words from discovered topics. Extensive experiments demonstrate our model significantly outperforms state-of-the-art baselines, tracking topic evolution with high-quality topics, showing better performance on downstream tasks, and remaining robust to the hyperparameter for evolution intensities.</abstract>
       <url hash="b1c09579">2024.findings-acl.183</url>
@@ -8639,8 +8639,8 @@
       <author><first>Shom</first><last>Lin</last></author>
       <author><first>Zhenxuan</first><last>Zhang</last></author>
       <author><first>Angela</first><last>Zhao</last></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
-      <author><first>Timothy</first><last>Baldwin</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and The University of Melbourne</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and The University of Melbourne</affiliation></author>
       <pages>3106-3119</pages>
       <abstract>Many studies have demonstrated that large language models (LLMs) can produce harmful responses, exposing users to unexpected risks. Previous studies have proposed comprehensive taxonomies of LLM risks, as well as corresponding prompts that can be used to examine LLM safety. However, the focus has been almost exclusively on English. We aim to broaden LLM safety research by introducing a dataset for the safety evaluation of Chinese LLMs, and extending it to better identify false negative and false positive examples in terms of risky prompt rejections. We further present a set of fine-grained safety assessment criteria for each risk type, facilitating both manual annotation and automatic evaluation in terms of LLM response harmfulness. Our experiments over five LLMs show that region-specific risks are the prevalent risk type. Warning: this paper contains example data that may be offensive, harmful, or biased. Our data is available at https://github.com/Libr-AI/do-not-answer.</abstract>
       <url hash="cd97fa61">2024.findings-acl.184</url>
@@ -8747,7 +8747,7 @@
       <author><first>Masahiro</first><last>Kaneko</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and Tokyo Institute of Technology, Tokyo Institute of Technology</affiliation></author>
       <author><first>Ryuto</first><last>Koike</last></author>
       <author><first>Mengsay</first><last>Loem</last><affiliation>Sansan, Inc.</affiliation></author>
-      <author><first>Naoaki</first><last>Okazaki</last><affiliation>Tokyo Institute of Technology</affiliation></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last><affiliation>Tokyo Institute of Technology</affiliation></author>
       <pages>3237-3245</pages>
       <abstract>Large Language Models (LLMs) are widely used to evaluate natural language generation tasks as automated metrics.However, the likelihood, a measure of LLM’s plausibility for a sentence, can vary due to superficial differences in sentences, such as word order and sentence structure.It is therefore possible that there might be a likelihood bias if LLMs are used for evaluation: they might overrate sentences with higher likelihoods while underrating those with lower likelihoods.In this paper, we investigate the presence and impact of likelihood bias in LLM-based evaluators.We also propose a method to mitigate the likelihood bias.Our method utilizes highly biased instances as few-shot examples for in-context learning.Our experiments in evaluating the data-to-text and grammatical error correction tasks reveal that several LLMs we test display a likelihood bias.Furthermore, our proposed method successfully mitigates this bias, also improving evaluation performance (in terms of correlation of models with human scores) significantly.</abstract>
       <url hash="5b980bf8">2024.findings-acl.193</url>
@@ -8906,10 +8906,10 @@
       <title>Compositional Generalization with Grounded Language Models</title>
       <author><first>Sondre</first><last>Wold</last></author>
       <author><first>Étienne</first><last>Simon</last></author>
-      <author><first>Lucas</first><last>Charpentier</last><affiliation>University of Oslo</affiliation></author>
+      <author id="lucas-georges-gabriel-charpentier"><first>Lucas</first><last>Charpentier</last><affiliation>University of Oslo</affiliation></author>
       <author><first>Egor</first><last>Kostylev</last><affiliation>University of Oslo, Norway</affiliation></author>
       <author><first>Erik</first><last>Velldal</last><affiliation>University of Oslo</affiliation></author>
-      <author><first>Lilja</first><last>Øvrelid</last><affiliation>Dept. of Informatics, University of Oslo</affiliation></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last><affiliation>Dept. of Informatics, University of Oslo</affiliation></author>
       <pages>3447-3460</pages>
       <abstract>Grounded language models use external sources of information, such as knowledge graphs, to meet some of the general challenges associated with pre-training. By extending previous work on compositional generalization in semantic parsing, we allow for a controlled evaluation of the degree to which these models learn and generalize from patterns in knowledge graphs. We develop a procedure for generating natural language questions paired with knowledge graphs that targets different aspects of compositionality and further avoids grounding the language models in information already encoded implicitly in their weights. We evaluate existing methods for combining language models with knowledge graphs and find them to struggle with generalization to sequences of unseen lengths and to novel combinations of seen base components. While our experimental results provide some insight into the expressive power of these models, we hope our work and released datasets motivate future research on how to better combine language models with structured knowledge representations.</abstract>
       <url hash="d29f7f79">2024.findings-acl.205</url>
@@ -8987,7 +8987,7 @@
       <author><first>Zhirui</first><last>Zhang</last><affiliation>Tencent AI Lab</affiliation></author>
       <author><first>Xiang</first><last>Geng</last></author>
       <author><first>Yichao</first><last>Du</last><affiliation>University of Science and Technology of China</affiliation></author>
-      <author><first>Jiajun</first><last>Chen</last><affiliation>Nanjing University</affiliation></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last><affiliation>Nanjing University</affiliation></author>
       <author><first>Shujian</first><last>Huang</last><affiliation>Nanjing University</affiliation></author>
       <pages>3546-3562</pages>
       <abstract>This study investigates how Large Language Models (LLMs) leverage source and reference data in machine translation evaluation task, aiming to better understand the mechanisms behind their remarkable performance in this task.We design the controlled experiments across various input modes and model types, and employ both coarse-grained and fine-grained prompts to discern the utility of source versus reference information.We find that reference information significantly enhances the evaluation accuracy, while surprisingly, source information sometimes is counterproductive, indicating LLMs’ inability to fully leverage the cross-lingual capability when evaluating translations.Further analysis of the fine-grained evaluation and fine-tuning experiments show similar results.These findings also suggest a potential research direction for LLMs that fully exploits the cross-lingual capability of LLMs to achieve better performance in machine translation evaluation tasks.</abstract>
@@ -9059,7 +9059,7 @@
       <author><first>Shenghua</first><last>Liu</last></author>
       <author><first>Yiwei</first><last>Wang</last></author>
       <author><first>Lingrui</first><last>Mei</last></author>
-      <author><first>Xueqi</first><last>Cheng</last><affiliation>, Chinese Academy of Sciences</affiliation></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last><affiliation>, Chinese Academy of Sciences</affiliation></author>
       <pages>3615-3625</pages>
       <abstract>Exploring the application of large language models (LLMs) to graph learning is an emerging endeavor. However, the vast amount of information inherent in large graphs poses significant challenges to graph learning with LLMs. This work focuses on the link prediction task and introduces **LPNL** (Link Prediction via Natural Language), a framework based on large language models designed for scalable link prediction on large-scale heterogeneous graphs. We design novel prompts for link prediction that articulate graph details in natural language. We propose a two-stage sampling pipeline to extract crucial information from the graphs, and a divide-and-conquer strategy to control the input tokens within predefined limits, addressing the challenge of overwhelming information. We fine-tune a T5 model based on our self-supervised learning designed for link prediction. Extensive experimental results demonstrate that LPNL outperforms multiple advanced baselines in link prediction tasks on large-scale graphs.</abstract>
       <url hash="1391e9f6">2024.findings-acl.215</url>
@@ -9088,8 +9088,8 @@
       <author><first>Yicong</first><last>Li</last></author>
       <author><first>Jay Zhangjie</first><last>Wu</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Cong-Duy</first><last>Nguyen</last><affiliation>School of Computer Science and Engineering, Nanyang Technological University</affiliation></author>
-      <author><first>See-Kiong</first><last>Ng</last><affiliation>National University of Singapore</affiliation></author>
-      <author><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
+      <author id="see-kiong-ng"><first>See-Kiong</first><last>Ng</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
       <pages>3636-3657</pages>
       <abstract>Humans use multiple senses to comprehend the environment. Vision and language are two of the most vital senses since they allow us to easily communicate our thoughts and perceive the world around us. There has been a lot of interest in creating video-language understanding systems with human-like senses since a video-language pair can mimic both our linguistic medium and visual environment with temporal dynamics. In this survey, we review the key tasks of these systems and highlight the associated challenges. Based on the challenges, we summarize their methods from model architecture, model training, and data perspectives. We also conduct performance comparison among the methods, and discuss promising directions for future research.</abstract>
       <url hash="309990f9">2024.findings-acl.217</url>
@@ -9363,7 +9363,7 @@
       <author><first>Ercong</first><last>Nie</last></author>
       <author><first>Michael</first><last>Färber</last><affiliation>Technische Universität Dresden</affiliation></author>
       <author><first>Helmut</first><last>Schmid</last><affiliation>Center for Information and Language Processing</affiliation></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <pages>3987-4001</pages>
       <abstract>Large Language Models (LLMs) exhibit strong In-Context Learning (ICL) capabilities when prompts with demonstrations are used. However, fine-tuning still remains crucial to further enhance their adaptability. Prompt-based fine-tuning proves to be an effective fine-tuning method in low-data scenarios, but high demands on computing resources limit its practicality. We address this issue by introducing a prompt-based parameter-efficient fine-tuning (PEFT) approach. GNNavi leverages insights into ICL’s information flow dynamics, which indicates that label words act in prompts as anchors for information propagation. GNNavi employs a Graph Neural Network (GNN) layer to precisely guide the aggregation and distribution of information flow during the processing of prompts by hardwiring the desired information flow into the GNN. Our experiments on text classification tasks with GPT-2 and Llama2 show GNNavi surpasses standard prompt-based fine-tuning methods in few-shot settings by updating just 0.2% to 0.5% of parameters. We compare GNNavi with prevalent PEFT approaches, such as prefix tuning, LoRA and Adapter in terms of performance and efficiency. Our analysis reveals that GNNavi enhances information flow and ensures a clear aggregation process.</abstract>
       <url hash="c90aad15">2024.findings-acl.237</url>
@@ -9469,7 +9469,7 @@
       <author><first>Ines</first><last>Reinig</last></author>
       <author><first>Maria</first><last>Becker</last><affiliation>Ruprecht-Karls-Universität Heidelberg</affiliation></author>
       <author><first>Ines</first><last>Rehbein</last><affiliation>Universität Mannheim</affiliation></author>
-      <author><first>Simone</first><last>Ponzetto</last><affiliation>University of Mannheim</affiliation></author>
+      <author id="simone-paolo-ponzetto"><first>Simone</first><last>Ponzetto</last><affiliation>University of Mannheim</affiliation></author>
       <pages>4136-4155</pages>
       <abstract>In this survey, we provide a systematic review of recent work on modelling morality in text, an area of research that has garnered increasing attention in recent years. Our survey is motivated by the importance of modelling decisions on the created resources, the models trained on these resources and the analyses that result from the models’ predictions. We review work at the interface of NLP, Computational Social Science and Psychology and give an overview of the different goals and research questions addressed in the papers, their underlying theoretical backgrounds and the methods that have been applied to pursue these goals. We then identify and discuss challenges and research gaps, such as the lack of a theoretical framework underlying the operationalisation of morality in text, the low IAA reported for manyhuman-annotated resulting resources and the lack of validation of newly proposed resources and analyses.</abstract>
       <url hash="29eec591">2024.findings-acl.245</url>
@@ -9630,7 +9630,7 @@
     <paper id="256">
       <title><fixed-case>R</fixed-case>ul<fixed-case>E</fixed-case>: Knowledge Graph Reasoning with Rule Embedding</title>
       <author><first>Xiaojuan</first><last>Tang</last></author>
-      <author><first>Song-Chun</first><last>Zhu</last><affiliation>Beijing Institute for General Artificial Intelligence</affiliation></author>
+      <author id="song-chun-zhu"><first>Song-Chun</first><last>Zhu</last><affiliation>Beijing Institute for General Artificial Intelligence</affiliation></author>
       <author><first>Yitao</first><last>Liang</last><affiliation>Peking University</affiliation></author>
       <author><first>Muhan</first><last>Zhang</last><affiliation>Peking University</affiliation></author>
       <pages>4316-4335</pages>
@@ -9688,7 +9688,7 @@
       <title>Do Androids Know They’re Only Dreaming of Electric Sheep?</title>
       <author><first>Sky</first><last>CH-Wang</last><affiliation>Columbia University</affiliation></author>
       <author><first>Benjamin</first><last>Van Durme</last><affiliation>Johns Hopkins University, Johns Hopkins University, Johns Hopkins University and Microsoft</affiliation></author>
-      <author><first>Jason</first><last>Eisner</last><affiliation>Microsoft and Johns Hopkins University</affiliation></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last><affiliation>Microsoft and Johns Hopkins University</affiliation></author>
       <author><first>Chris</first><last>Kedzie</last><affiliation>Rasa Technologies, Inc.</affiliation></author>
       <pages>4401-4420</pages>
       <abstract>We design probes trained on the internal representations of a transformer language model to predict its hallucinatory behavior on three grounded generation tasks. To train the probes, we annotate for span-level hallucination on both sampled (organic) and manually edited (synthetic) reference outputs. Our probes are narrowly trained and we find that they are sensitive to their training domain: they generalize poorly from one task to another or from synthetic to organic hallucinations. However, on in-domain data, they can reliably detect hallucinations at many transformer layers, achieving 95% of their peak performance as early as layer 4. Here, probing proves accurate for evaluating hallucination, outperforming several contemporary baselines and even surpassing an expert human annotator in response-level detection F1. Similarly, on span-level labeling, probes are on par or better than the expert annotator on two out of three generation tasks. Overall, we find that probing is a feasible and efficient alternative to language model hallucination evaluation when model states are available.</abstract>
@@ -9770,7 +9770,7 @@
       <title>Semantics or spelling? Probing contextual word embeddings with orthographic noise</title>
       <author><first>Jacob A.</first><last>Matthews</last></author>
       <author><first>John R.</first><last>Starr</last></author>
-      <author><first>Marten</first><last>van Schijndel</last></author>
+      <author id="marten-van-schijndel"><first>Marten</first><last>van Schijndel</last></author>
       <pages>4495-4504</pages>
       <abstract>Pretrained language model (PLM) hidden states are frequently employed as contextual word embeddings (CWE): high-dimensional representations that encode semantic information given linguistic context. Across many areas of computational linguistics research, similarity between CWEs is interpreted as semantic similarity. However, it remains unclear exactly what information is encoded in PLM hidden states. We investigate this practice by probing PLM representations using minimal orthographic noise. We expect that if CWEs primarily encode semantic information, a single character swap in the input word will not drastically affect the resulting representation, given sufficient linguistic context. Surprisingly, we find that CWEs generated by popular PLMs are highly sensitive to noise in input data, and that this sensitivity is related to subword tokenization: the fewer tokens used to represent a word at input, the more sensitive its corresponding CWE. This suggests that CWEs capture information unrelated to word-level meaning and can be manipulated through trivial modifications of input data. We conclude that these PLM-derived CWEs may not be reliable semantic proxies, and that caution is warranted when interpreting representational similarity.</abstract>
       <url hash="1cf69cf5">2024.findings-acl.266</url>
@@ -9877,7 +9877,7 @@
       <title>Probing the Uniquely Identifiable Linguistic Patterns of Conversational <fixed-case>AI</fixed-case> Agents</title>
       <author><first>Iqra</first><last>Zahid</last></author>
       <author><first>Tharindu</first><last>Madusanka</last></author>
-      <author><first>Riza</first><last>Batista-Navarro</last><affiliation>University of Manchester</affiliation></author>
+      <author id="riza-theresa-batista-navarro"><first>Riza</first><last>Batista-Navarro</last><affiliation>University of Manchester</affiliation></author>
       <author><first>Youcheng</first><last>Sun</last><affiliation>The University of Manchester</affiliation></author>
       <pages>4612-4628</pages>
       <abstract>The proliferation of Conversational AI agents (CAAs) has emphasised the need to distinguish between human and machine-generated texts, with implications spanning digital forensics and cybersecurity. While prior research primarily focussed on distinguishing human from machine-generated text, our study takes a more refined approach by analysing different CAAs. We construct linguistic profiles for five CAAs, aiming to identify Uniquely Identifiable Linguistic Patterns (UILPs) for each model using authorship attribution techniques. Authorship attribution (AA) is the task of identifying the author of an unknown text from a pool of known authors. Our research seeks to answer crucial questions about the existence of UILPs in CAAs, the linguistic overlap between various text types generated by these models, and the feasibility of Authorship Attribution (AA) for CAAs based on UILPs. Promisingly, we are able to attribute CAAs based on their original texts with a weighted F1-score of 96.94%. Further, we are able to attribute CAAs according to their writing style (as specified by prompts), yielding a weighted F1-score of 95.84%, which sets the baseline for this task. By employing principal component analysis (PCA), we identify the top 100 most informative linguistic features for each CAA, achieving a weighted F1-score ranging from 86.04% to 97.93%, and an overall weighted F1-score of 93.86%.</abstract>
@@ -9997,7 +9997,7 @@
       <title>Instruction Tuning with Retrieval-based Examples Ranking for Aspect-based Sentiment Analysis</title>
       <author><first>Guangmin</first><last>Zheng</last></author>
       <author><first>Jin</first><last>Wang</last><affiliation>Yunnan University</affiliation></author>
-      <author><first>Liang-Chih</first><last>Yu</last><affiliation>Yuan Ze University</affiliation></author>
+      <author id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last><affiliation>Yuan Ze University</affiliation></author>
       <author><first>Xuejie</first><last>Zhang</last><affiliation>Yunnan University</affiliation></author>
       <pages>4777-4788</pages>
       <abstract>Aspect-based sentiment analysis (ABSA) identifies sentiment information related to specific aspects and provides deeper market insights to businesses and organizations. With the emergence of large language models (LMs), recent studies have proposed using fixed examples for instruction tuning to reformulate ABSA as a generation task. However, the performance is sensitive to the selection of in-context examples; several retrieval methods are based on surface similarity and are independent of the LM generative objective. This study proposes an instruction learning method with retrieval-based example ranking for ABSA tasks. For each target sample, an LM was applied as a scorer to estimate the likelihood of the output given the input and a candidate example as the prompt, and training examples were labeled as positive or negative by ranking the scores. An alternating training schema is proposed to train both the retriever and LM. Instructional prompts can be constructed using high-quality examples. The LM is used for both scoring and inference, improving the generation efficiency without incurring additional computational costs or training difficulties. Extensive experiments on three ABSA subtasks verified the effectiveness of the proposed method, demonstrating its superiority over various strong baseline models. Code and data are released at https://github.com/zgMin/IT-RER-ABSA.</abstract>
@@ -10009,7 +10009,7 @@
       <title>Unveiling the Truth and Facilitating Change: Towards Agent-based Large-scale Social Movement Simulation</title>
       <author><first>Xinyi</first><last>Mou</last></author>
       <author><first>Zhongyu</first><last>Wei</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>4789-4809</pages>
       <abstract>Social media has emerged as a cornerstone of social movements, wielding significant influence in driving societal change. Simulating the response of the public and forecasting the potential impact has become increasingly important. However, existing methods for simulating such phenomena encounter challenges concerning their efficacy and efficiency in capturing the behaviors of social movement participants. In this paper, we introduce a hybrid framework for social media user simulation, wherein users are categorized into two types. Core users are driven by Large Language Models, while numerous ordinary users are modeled by deductive agent-based models. We further construct a Twitter-like environment to replicate their response dynamics following trigger events. Subsequently, we develop a multi-faceted benchmark SoMoSiMu-Bench for evaluation and conduct comprehensive experiments across real-world datasets. Experimental results demonstrate the effectiveness and flexibility of our method.</abstract>
       <url hash="6a746deb">2024.findings-acl.285</url>
@@ -10045,7 +10045,7 @@
       <author><first>Xinyu</first><last>Li</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <author><first>Yifan</first><last>Chen</last></author>
       <author><first>Wenfeng</first><last>Xuan</last><affiliation>XVERSE</affiliation></author>
-      <author><first>Weinan</first><last>Zhang</last><affiliation>Harbin Institute of Technology</affiliation></author>
+      <author id="weinan-zhang"><first>Weinan</first><last>Zhang</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <pages>4833-4850</pages>
       <abstract>Although Retrieval-Augmented Large Language Models (RALMs) demonstrate their superiority in terms of factuality, they do not consistently outperform the original retrieval-free Language Models (LMs). Our experiments reveal that this example-level performance inconsistency exists not only between retrieval-augmented and retrieval-free LM but also among different retrievers. To understand this phenomenon, we investigate the degeneration behavior of RALMs and theoretically decompose it into four categories. Further analysis based on our decomposition reveals that the innate difference in knowledge sources and the unpredictable degeneration of the reader model contribute most to the inconsistency. Drawing from our analysis, we introduce Ensemble of Retrievers (EoR), a trainable framework that can adaptively retrieve from different knowledge sources and effectively decrease unpredictable reader errors. Our experiments on Open Domain Question Answering show that EoR substantially improves performance over the RALM with a single retriever by considerably reducing inconsistent behaviors.</abstract>
       <url hash="efcfa19e">2024.findings-acl.288</url>
@@ -10215,7 +10215,7 @@
       <title><fixed-case>M</fixed-case>eme<fixed-case>MQA</fixed-case>: Multimodal Question Answering for Memes via Rationale-Based Inferencing</title>
       <author><first>Siddhant</first><last>Agarwal</last></author>
       <author><first>Shivam</first><last>Sharma</last><affiliation>Indian Institute of Technology, Delhi</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <author><first>Tanmoy</first><last>Chakraborty</last><affiliation>Indian Institute of Technology, Delhi</affiliation></author>
       <pages>5042-5078</pages>
       <abstract>Memes have evolved as a prevalent medium for diverse communication, ranging from humour to propaganda. With the rising popularity of image-focused content, there is a growing need to explore its potential harm from different aspects. Previous studies have analyzed memes in closed settings - detecting harm, applying semantic labels, and offering natural language explanations. To extend this research, we introduce MemeMQA, a multimodal question-answering framework aiming to solicit accurate responses to structured questions while providing coherent explanations. We curate MemeMQACorpus, a new dataset featuring 1,880 questions related to 1,122 memes with corresponding answer-explanation pairs. We further propose ARSENAL, a novel two-stage multimodal framework that leverages the reasoning capabilities of LLMs to address MemeMQA. We benchmark MemeMQA using competitive baselines and demonstrate its superiority - ~18% enhanced answer prediction accuracy and distinct text generation lead across various metrics measuring lexical and semantic alignment over the best baseline. We analyze ARSENAL’s robustness through diversification of question-set, confounder-based evaluation regarding MemeMQA’s generalizability, and modality-specific assessment, enhancing our understanding of meme interpretation in the multimodal communication landscape.</abstract>
@@ -10268,7 +10268,7 @@
       <author><first>Yi</first><last>Liu</last></author>
       <author><first>Junjie</first><last>Wang</last></author>
       <author><first>Qing</first><last>Wang</last><affiliation>Institute of Software, Chinese Academy of Sciences</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>Nanyang Technological University</affiliation></author>
+      <author><first>Yang</first><last>Liu</last><affiliation>Nanyang Technological University</affiliation></author>
       <pages>5135-5147</pages>
       <abstract>With the development of LLMs, the security threats of LLMs are getting more and more attention. Numerous jailbreak attacks have been proposed to assess the security defense of LLMs. Current jailbreak attacks primarily utilize scenario camouflage techniques. However their explicitly mention of malicious intent will be easily recognized and defended by LLMs. In this paper, we propose an indirect jailbreak attack approach, Puzzler, which can bypass the LLM’s defensive strategies and obtain malicious response by implicitly providing LLMs with some clues about the original malicious query. In addition, inspired by the wisdom of “When unable to attack, defend” from Sun Tzu’s Art of War, we adopt a defensive stance to gather clues about the original malicious query through LLMs. The experimental results indicate that the Query Success Rate of the Puzzler is 14.0%-82.7% higher than baselines on the most prominent LLMs. Furthermore, when tested against the state-of-the-art jailbreak detection approaches, Puzzler proves to be more effective at evading detection compared to baselines.</abstract>
       <url hash="1541070a">2024.findings-acl.304</url>
@@ -10424,7 +10424,7 @@
       <author><first>Xinwei</first><last>Wu</last></author>
       <author><first>Weilong</first><last>Dong</last></author>
       <author><first>Shaoyang</first><last>Xu</last></author>
-      <author><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
       <pages>5319-5332</pages>
       <abstract>Protecting privacy leakage in large language models remains a paramount challenge. In this paper, we reveal Privacy Seesaw in LLM privacy safeguarding, a phenomenon where measures to secure specific private information inadvertently heighten exposure risks for other privacy. Through comprehensive analysis, we identify the amount of targeted privacy data and the volume of edited privacy neurons as the two central triggers to this issue. To mitigate privacy seesaw, we propose Augmented Privacy Neuron Editing via Activation Patching (APNEAP), a novel framework designed to well balance model performance with privacy protection. The proposed APNEAP augments collected private data by automatically synthesizing new private data, which deactivates the first trigger to the privacy seesaw issue. Additionally, it adapts activation patching to privacy neuron editing for switching off the second trigger to the privacy seesaw problem. Experimental results show that the proposed APNEAP is capable of alleviating the privacy seesaw phenomenon and offers a more stable and reliable approach to privacy protection in LLMs than previous methods.</abstract>
       <url hash="c8107da8">2024.findings-acl.315</url>
@@ -10465,7 +10465,7 @@
       <author><first>Enzhi</first><last>Zhang</last></author>
       <author><first>Xiang</first><last>Wang</last><affiliation>University of Science and Technology of China</affiliation></author>
       <author><first>Kenji</first><last>Kawaguchi</last><affiliation>National University of Singapore</affiliation></author>
-      <author><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
       <pages>5353-5377</pages>
       <abstract>Molecule-text modeling, which aims to facilitate molecule-relevant tasks with a textual interface and textual knowledge, is an emerging research direction. Beyond single molecules, studying reaction-text modeling holds promise for helping the synthesis of new materials and drugs. However, previous works mostly neglect reaction-text modeling: they primarily focus on modeling individual molecule-text pairs or learning chemical reactions without texts in context. Additionally, one key task of reaction-text modeling – experimental procedure prediction – is less explored due to the absence of an open-source dataset. The task is to predict step-by-step actions of conducting chemical experiments and is crucial to automating chemical synthesis. To resolve the challenges above, we propose a new pretraining method, ReactXT, for reaction-text modeling, and a new dataset, OpenExp, for experimental procedure prediction. Specifically, ReactXT features three types of input contexts to incrementally pretrain LMs. Each of the three input contexts corresponds to a pretraining task to improve the text-based understanding of either reactions or single molecules. ReactXT demonstrates consistent improvements in experimental procedure prediction and molecule captioning and offers competitive results in retrosynthesis. Our code is available at https://github.com/syr-cn/ReactXT.</abstract>
       <url hash="28b0618a">2024.findings-acl.318</url>
@@ -10522,7 +10522,7 @@
       <author><first>Xinyu</first><last>Ma</last><affiliation>Baidu</affiliation></author>
       <author><first>Xun</first><last>Liu</last></author>
       <author><first>Dawei</first><last>Yin</last><affiliation>Baidu</affiliation></author>
-      <author><first>Xueqi</first><last>Cheng</last><affiliation>, Chinese Academy of Sciences</affiliation></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last><affiliation>, Chinese Academy of Sciences</affiliation></author>
       <pages>5419-5437</pages>
       <abstract>Although model editing has shown promise in revising knowledge in Large Language Models (LLMs), its impact on the inherent capabilities of LLMs is often overlooked. In this work, we reveal a critical phenomenon: even a single edit can trigger model collapse, manifesting as significant performance degradation in various benchmark tasks. However, benchmarking LLMs after each edit, while necessary to prevent such collapses, is impractically time-consuming and resource-intensive. To mitigate this, we propose using perplexity as a surrogate metric, validated by extensive experiments demonstrating changes in an edited model’s perplexity are strongly correlated with its downstream task performances. We further conduct an in-depth study on sequential editing, a practical setting for real-world scenarios, across various editing methods and LLMs, focusing on hard cases from our previous single edit studies. The results indicate that nearly all examined editing methods result in model collapse after only few edits. To facilitate further research, we have utilized GPT-3.5 to develop a new dataset, HardEdit, based on those hard cases. This dataset aims to establish the foundation for pioneering research in reliable model editing and the mechanisms underlying editing-induced model collapse. We hope this work can draw the community’s attention to the potential risks inherent in model editing practices.</abstract>
       <url hash="671d287a">2024.findings-acl.322</url>
@@ -10531,7 +10531,7 @@
     </paper>
     <paper id="323">
       <title>Can We Continually Edit Language Models? On the Knowledge Attenuation in Sequential Model Editing</title>
-      <author id="qi-li"><first>Qi</first><last>Li</last><affiliation>Tsinghua University, Tsinghua University</affiliation></author>
+      <author><first>Qi</first><last>Li</last><affiliation>Tsinghua University, Tsinghua University</affiliation></author>
       <author><first>Xiaowen</first><last>Chu</last><affiliation>Hong Kong University of Science and Technology (Guangzhou)</affiliation></author>
       <pages>5438-5455</pages>
       <abstract>Model editing has become a promising method for precisely and effectively updating knowledge in language models. In this paper, we investigate knowledge attenuation, in which the retention of updated knowledge within the language model decreases as the number of edits increases after sequential editing. Through empirical study, we discovered that existing editing methods generally suffer from knowledge attenuation. We attribute this phenomenon to two aspects: (1) redundant parameters interference and (2) update weight disentanglement. To this end, we propose the AdaPLE method. It not only mitigates the knowledge attenuation issue but also improves the performance on existing benchmarks. To the best of our knowledge, we are the first to investigate the cause and mitigation of knowledge attenuation in sequential LLM editing.</abstract>
@@ -10614,7 +10614,7 @@
       <author><first>Shuang</first><last>Zeng</last></author>
       <author><first>Kaikai</first><last>An</last></author>
       <author><first>Zefan</first><last>Cai</last></author>
-      <author><first>Baobao</first><last>Chang</last><affiliation>Peking University</affiliation></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last><affiliation>Peking University</affiliation></author>
       <pages>5533-5546</pages>
       <abstract>Distantly-Supervised Named Entity Recognition (DS-NER) effectively alleviates the burden of annotation, but meanwhile suffers from the label noise. Recent works attempt to adopt the teacher-student framework to gradually refine the training labels and improve the overall robustness. However, we argue that these teacher-student methods achieve limited performance because the poor calibration of the teacher network produces incorrectly pseudo-labeled samples, leading to error propagation. Therefore, we attempt to mitigate this issue by proposing: (1) Uncertainty-Aware Teacher Learning that leverages the prediction uncertainty to reduce the number of incorrect pseudo labels in the self-training stage; (2) Student-Student Collaborative Learning that allows the transfer of reliable labels between two student networks instead of indiscriminately relying on all pseudo labels from its teacher. This approach further enables a full exploration of mislabeled samples rather than simply filtering unreliable pseudo-labeled samples. We evaluate our proposed method on five DS-NER datasets, demonstrating that our method is superior to the state-of-the-art DS-NER denoising methods.</abstract>
       <url hash="7edd1412">2024.findings-acl.329</url>
@@ -10626,7 +10626,7 @@
       <author><first>Harri</first><last>Rowlands</last><affiliation>InfluenceMap</affiliation></author>
       <author><first>Gaku</first><last>Morio</last><affiliation>Hitachi America, Ltd., Stanford University and Hitachi, ltd.</affiliation></author>
       <author><first>Dylan</first><last>Tanner</last></author>
-      <author><first>Christopher</first><last>Manning</last><affiliation>Computer Science Department, Stanford University</affiliation></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last><affiliation>Computer Science Department, Stanford University</affiliation></author>
       <pages>5547-5558</pages>
       <abstract>Social media advertising offers a platform for fossil fuel value chain companies and their agents to reinforce their narratives, often emphasizing economic, labor market, and energy security benefits to promote oil and gas policy and products. Whether such narratives can be detected automatically and the extent to which the cost of human annotation can be reduced is our research question. We introduce a task of classifying narratives into seven categories, based on existing definitions and data.Experiments showed that RoBERTa-large outperforms other methods, while GPT-4 Turbo can serve as a viable annotator for the task, thereby reducing human annotation costs. Our findings and insights provide guidance to automate climate-related ad analysis and lead to more scalable ad scrutiny.</abstract>
       <url hash="da863651">2024.findings-acl.330</url>
@@ -10685,8 +10685,8 @@
       <author><first>Neha</first><last>Sengupta</last></author>
       <author><first>Shady</first><last>Shehata</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <author><first>Nizar</first><last>Habash</last><affiliation>New York University Abu Dhabi</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
-      <author><first>Timothy</first><last>Baldwin</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and The University of Melbourne</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and The University of Melbourne</affiliation></author>
       <pages>5622-5640</pages>
       <abstract>The focus of language model evaluation has transitioned towards reasoning and knowledge-intensive tasks, driven by advancements in pretraining large models. While state-of-the-art models are partially trained on large Arabic texts, evaluating their performance in Arabic remains challenging due to the limited availability of relevant datasets. To bridge this gap, we present ArabicMMLU, the first multi-task language understanding benchmark for the Arabic language, sourced from school exams across diverse educational levels in different countries spanning North Africa, the Levant, and the Gulf regions. Our data comprises 40 tasks and 14,575 multiple-choice questions in Modern Standard Arabic (MSA) and is carefully constructed by collaborating with native speakers in the region. Our comprehensive evaluations of 35 models reveal substantial room for improvement, particularly among the best open-source models. Notably, BLOOMZ, mT0, LLama2, and Falcon struggle to achieve a score of 50%, while even the top-performing Arabic-centric model only achieves a score of 62.3%.</abstract>
       <url hash="6445e78b">2024.findings-acl.334</url>
@@ -10728,7 +10728,7 @@
     <paper id="337">
       <title>Benchmarking Large Language Models on <fixed-case>CFLUE</fixed-case> - A <fixed-case>C</fixed-case>hinese Financial Language Understanding Evaluation Dataset</title>
       <author><first>Jie</first><last>Zhu</last><affiliation>Alibaba Group</affiliation></author>
-      <author><first>Junhui</first><last>Li</last><affiliation>Soochow University, China</affiliation></author>
+      <author id="junhui-li"><first>Junhui</first><last>Li</last><affiliation>Soochow University, China</affiliation></author>
       <author><first>Yalong</first><last>Wen</last></author>
       <author><first>Lifan</first><last>Guo</last></author>
       <pages>5673-5693</pages>
@@ -10741,7 +10741,7 @@
       <title>Improving Large Language Models via Fine-grained Reinforcement Learning with Minimum Editing Constraint</title>
       <author><first>Zhipeng</first><last>Chen</last></author>
       <author><first>Kun</first><last>Zhou</last><affiliation>Renmin University of China</affiliation></author>
-      <author><first>Xin</first><last>Zhao</last><affiliation>Renmin University of China</affiliation></author>
+      <author id="wayne-xin-zhao"><first>Xin</first><last>Zhao</last><affiliation>Renmin University of China</affiliation></author>
       <author><first>Junchen</first><last>Wan</last></author>
       <author><first>Fuzheng</first><last>Zhang</last></author>
       <author><first>Di</first><last>Zhang</last><affiliation>Kuaishou Technology</affiliation></author>
@@ -10806,7 +10806,7 @@
       <author><first>Yadong</first><last>Zhang</last></author>
       <author><first>Youqi</first><last>Song</last></author>
       <author><first>Binxuan</first><last>Liu</last></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <pages>5749-5765</pages>
       <abstract>Topic relevance of an essay demands that the composition adheres to a clear theme and aligns well with the essay prompt requirements, a critical aspect of essay quality evaluation. However, existing research of Automatic Essay Scoring (AES) for Chinese essays has overlooked topic relevance and lacks detailed feedback, while Automatic Essay Comment Generation (AECG) faces much complexity and difficulty. Additionally, current Large Language Models, including GPT-4, often make incorrect judgments and provide overly impractical feedback when evaluating topic relevance. This paper introduces <b>TOREE</b> (<b>To</b>pic <b>Re</b>levance <b>E</b>valuation), a comprehensive dataset developed to assess topic relevance in Chinese primary and middle school students’ essays, which is beneficial for AES, AECG and other applications. Moreover, our proposed two-step method utilizes TOREE through a combination of Supervised Fine-tuning and Preference Learning. Experimental results demonstrate that TOREE is of high quality, and our method significantly enhances models’ performance on two designed tasks for topic relevance evaluation, improving both automatic and human evaluations across four diverse LLMs.</abstract>
       <url hash="85eb2b7c">2024.findings-acl.342</url>
@@ -10872,7 +10872,7 @@
     <paper id="347">
       <title>Mass-Editing Memory with Attention in Transformers: A cross-lingual exploration of knowledge</title>
       <author><first>Daniel</first><last>Tamayo</last></author>
-      <author><first>Aitor</first><last>Gonzalez-Agirre</last></author>
+      <author id="aitor-gonzalez-agirre"><first>Aitor</first><last>Gonzalez-Agirre</last></author>
       <author><first>Javier</first><last>Hernando</last></author>
       <author><first>Marta</first><last>Villegas</last></author>
       <pages>5831-5847</pages>
@@ -10963,7 +10963,7 @@
       <author><first>Yang</first><last>Chen</last></author>
       <author><first>Yonghua</first><last>Zhu</last></author>
       <author><first>Paul</first><last>Denny</last></author>
-      <author><first>Michael</first><last>Witbrock</last></author>
+      <author id="michael-j-witbrock"><first>Michael</first><last>Witbrock</last></author>
       <author><first>Jiamou</first><last>Liu</last></author>
       <pages>5914-5934</pages>
       <abstract>Combining large language models with logical reasoning enhances their capacity to address problems in a robust and reliable manner. Nevertheless, the intricate nature of logical reasoning poses challenges when gathering reliable data from the web to build comprehensive training datasets, subsequently affecting performance on downstream tasks. To address this, we introduce a novel logic-driven data augmentation approach, AMR-LDA. AMR-LDA converts the original text into an Abstract Meaning Representation (AMR) graph, a structured semantic representation that encapsulates the logical structure of the sentence, upon which operations are performed to generate logically modified AMR graphs. The modified AMR graphs are subsequently converted back into text to create augmented data. Notably, our methodology is architecture-agnostic and enhances both generative large language models, such as GPT-3.5 and GPT-4, through prompt augmentation, and discriminative large language models through contrastive learning with logic-driven data augmentation. Empirical evidence underscores the efficacy of our proposed method with improvement in performance across seven downstream tasks, such as reading comprehension requiring logical reasoning, textual entailment, and natural language inference. Furthermore, our method leads on the ReClor leaderboard at https://eval.ai/web/challenges/challenge-page/503/leaderboard/1347. The source code and data are publicly available at https://github.com/Strong-AI-Lab/Logical-Equivalence-driven-AMR-Data-Augmentation-for-Representation-Learning.</abstract>
@@ -10974,7 +10974,7 @@
     <paper id="354">
       <title><fixed-case>C</fixed-case>ode<fixed-case>I</fixed-case>nsight: A Curated Dataset of Practical Coding Solutions from <fixed-case>S</fixed-case>tack <fixed-case>O</fixed-case>verflow</title>
       <author><first>Nathanaël</first><last>Beau</last></author>
-      <author><first>Benoit</first><last>Crabbé</last><affiliation>Université de Paris</affiliation></author>
+      <author id="benoit-crabbe"><first>Benoit</first><last>Crabbé</last><affiliation>Université de Paris</affiliation></author>
       <pages>5935-5947</pages>
       <abstract>We introduce a novel dataset tailored for code generation, aimed at aiding developers in common tasks. Our dataset provides examples that include a clarified intent, code snippets associated, and an average of three related unit tests. It encompasses a range of libraries such as Pandas, Numpy, and Regex, along with more than 70 standard libraries in Python code derived from Stack Overflow. Comprising 3,402 crafted examples by Python experts, our dataset is designed for both model finetuning and standalone evaluation. To complete unit tests evaluation, we categorize examples in order to get more fine grained analysis, enhancing the understanding of models’ strengths and weaknesses in specific coding tasks. The examples have been refined to reduce data contamination, a process confirmed by the performance of three leading models: Mistral 7B, CodeLLAMA 13B, and Starcoder 15B. We further investigate data-contamination testing GPT-4 performance on a part of our dataset. The benchmark can be accessed at anonymized address.</abstract>
       <url hash="48cccfbe">2024.findings-acl.354</url>
@@ -11043,7 +11043,7 @@
     </paper>
     <paper id="360">
       <title>It takes two to borrow: a donor and a recipient. Who’s who?</title>
-      <author><first>Liviu</first><last>Dinu</last><affiliation>University of Bucharest</affiliation></author>
+      <author id="liviu-p-dinu"><first>Liviu</first><last>Dinu</last><affiliation>University of Bucharest</affiliation></author>
       <author><first>Ana</first><last>Uban</last><affiliation>Universitatea Bucuresti</affiliation></author>
       <author><first>Anca</first><last>Dinu</last></author>
       <author><first>Ioan-Bogdan</first><last>Iordache</last></author>
@@ -11155,7 +11155,7 @@
     </paper>
     <paper id="369">
       <title>Understanding Fine-grained Distortions in Reports of Scientific Findings</title>
-      <author><first>Amelie</first><last>Wuehrl</last><affiliation>University of Stuttgart, Universität Stuttgart</affiliation></author>
+      <author id="amelie-wuhrl"><first>Amelie</first><last>Wuehrl</last><affiliation>University of Stuttgart, Universität Stuttgart</affiliation></author>
       <author><first>Dustin</first><last>Wright</last><affiliation>University of Copenhagen</affiliation></author>
       <author><first>Roman</first><last>Klinger</last><affiliation>Otto-Friedrich Universität Bamberg</affiliation></author>
       <author><first>Isabelle</first><last>Augenstein</last><affiliation>University of Copenhagen</affiliation></author>
@@ -11212,7 +11212,7 @@
       <author><first>Tianhao</first><last>Shen</last></author>
       <author><first>Ge</first><last>Zhang</last></author>
       <author><first>Yuhang</first><last>Wu</last></author>
-      <author id="cong-liu"><first>Cong</first><last>Liu</last></author>
+      <author><first>Cong</first><last>Liu</last></author>
       <author><first>Ziya</first><last>Zhou</last></author>
       <author><first>Liumeng</first><last>Xue</last></author>
       <author><first>Ziyang</first><last>Ma</last></author>
@@ -11282,7 +11282,7 @@
       <author><first>Abbas</first><last>Ghaddar</last><affiliation>Huawei Technologies Ltd.</affiliation></author>
       <author><first>Ivan</first><last>Kobyzev</last><affiliation>Huawei Noah’s Ark Lab</affiliation></author>
       <author><first>Mehdi</first><last>Rezagholizadeh</last></author>
-      <author><first>Osmar</first><last>Zaiane</last><affiliation>University of Alberta</affiliation></author>
+      <author id="osmar-r-zaiane"><first>Osmar</first><last>Zaiane</last><affiliation>University of Alberta</affiliation></author>
       <author><first>Boxing</first><last>Chen</last><affiliation>Huawei Technologies Ltd.</affiliation></author>
       <pages>6322-6334</pages>
       <abstract>Recently, there has been considerable attention on detecting hallucinations and omissions in Machine Translation (MT) systems. The two dominant approaches to tackle this task involve analyzing the MT system’s internal states or relying on the output of external tools, such as sentence similarity or MT quality estimators. In this work, we introduce OTTAWA, a novel Optimal Transport (OT)-based word aligner specifically designed to enhance the detection of hallucinations and omissions in MT systems. Our approach explicitly models the missing alignments by introducing a “null” vector, for which we propose a novel one-side constrained OT setting to allow an adaptive null alignment. Our approach yields competitive results compared to state-of-the-art methods across 18 language pairs on the HalOmi benchmark. In addition, it shows promising features, such as the ability to distinguish between both error types and perform word-level detection without accessing the MT system’s internal states.</abstract>
@@ -11309,8 +11309,8 @@
       <author><first>Guangzhi</first><last>Sun</last></author>
       <author><first>Shutong</first><last>Feng</last><affiliation>Heinrich-Heine Universität Düsseldorf</affiliation></author>
       <author><first>Dongcheng</first><last>Jiang</last><affiliation>University of Cambridge</affiliation></author>
-      <author><first>Chao</first><last>Zhang</last><affiliation>Tsinghua University and University College London</affiliation></author>
-      <author><first>Milica</first><last>Gasic</last><affiliation>Heinrich Heine University Duesseldorf</affiliation></author>
+      <author id="chao-zhang-tu"><first>Chao</first><last>Zhang</last><affiliation>Tsinghua University and University College London</affiliation></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gasic</last><affiliation>Heinrich Heine University Duesseldorf</affiliation></author>
       <author><first>Phil</first><last>Woodland</last><affiliation>University of Cambridge</affiliation></author>
       <pages>6351-6362</pages>
       <abstract>Recently, advancements in large language models (LLMs) have shown an unprecedented ability across various language tasks. This paper investigates the potential application of LLMs to slot filling with noisy ASR transcriptions, via both in-context learning and task-specific fine-tuning. Dedicated prompt designs and noise-robust LoRA fine-tuning are proposed to improve the robustness of LLMs for slot filling with noisy ASR transcriptions. Moreover, a linearised knowledge injection (LKI) scheme is also proposed to integrate dynamic external knowledge into LLMs. Experiments were performed on SLURP to quantify the performance of LLMs, including GPT-3.5-turbo, GPT-4, LLaMA-13B, LLaMA-2-13B and Vicuna-13B (v1.1 and v1.5) with different ASR error rates. The use of the noise-robust fine-tuning together with LKI for Vicuna-13B-v1.5 achieved 6.7% and 17.6% absolute SLU-F1 improvements compared to a fully fine-tuned Flan-T5-XL model on the limited data setup and the zero-shot setup respectively.</abstract>
@@ -11322,8 +11322,8 @@
       <title>Too Big to Fail: Larger Language Models are Disproportionately Resilient to Induction of Dementia-Related Linguistic Anomalies</title>
       <author id="changye-li-umn"><first>Changye</first><last>Li</last><affiliation>University of Washington</affiliation></author>
       <author><first>Zhecheng</first><last>Sheng</last></author>
-      <author><first>Trevor</first><last>Cohen</last><affiliation>University of Washington</affiliation></author>
-      <author><first>Serguei</first><last>Pakhomov</last><affiliation>University of Minnesota - Twin Cities</affiliation></author>
+      <author id="trevor-cohen"><first>Trevor</first><last>Cohen</last><affiliation>University of Washington</affiliation></author>
+      <author id="serguei-pakhomov"><first>Serguei</first><last>Pakhomov</last><affiliation>University of Minnesota - Twin Cities</affiliation></author>
       <pages>6363-6377</pages>
       <abstract>As artificial neural networks grow in complexity, understanding their inner workings becomes increasingly challenging, which is particularly important in healthcare applications. The intrinsic evaluation metrics of autoregressive neural language models (NLMs), perplexity (PPL), can reflect how “surprised” an NLM model is at novel input. PPL has been widely used to understand the behavior of NLMs. Previous findings show that changes in PPL when masking attention layers in pre-trained transformer-based NLMs reflect linguistic anomalies associated with Alzheimer’s disease dementia. Building upon this, we explore a novel bidirectional attention head ablation method that exhibits properties attributed to the concepts of cognitive and brain reserve in human brain studies, which postulate that people with more neurons in the brain and more efficient processing are more resilient to neurodegeneration. Our results show that larger GPT-2 models require a disproportionately larger share of attention heads to be masked/ablated to display degradation of similar magnitude to masking in smaller models. These results suggest that the attention mechanism in transformer models may present an analogue to the notions of cognitive and brain reserve and could potentially be used to model certain aspects of the progression of neurodegenerative disorders and aging.</abstract>
       <url hash="624958b0">2024.findings-acl.380</url>
@@ -11458,7 +11458,7 @@
       <author><first>Sang</first><last>Truong</last></author>
       <author><first>Stephen</first><last>Mussmann</last><affiliation>Georgia Institute of Technology</affiliation></author>
       <author><first>Yinglun</first><last>Zhu</last><affiliation>University of California, Riverside</affiliation></author>
-      <author><first>Jeff</first><last>Bilmes</last><affiliation>University of Washington, Seattle</affiliation></author>
+      <author id="jeff-bilmes"><first>Jeff</first><last>Bilmes</last><affiliation>University of Washington, Seattle</affiliation></author>
       <author><first>Simon</first><last>Du</last><affiliation>University of Washington</affiliation></author>
       <author><first>Kevin</first><last>Jamieson</last><affiliation>University of Washington</affiliation></author>
       <author><first>Jordan</first><last>Ash</last><affiliation>Microsoft Research</affiliation></author>
@@ -11535,7 +11535,7 @@
       <author><first>Jiasheng</first><last>Ye</last></author>
       <author><first>Junjie</first><last>Ye</last></author>
       <author><first>Xipeng</first><last>Qiu</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>6632-6646</pages>
       <abstract>In the realm of Large Language Models (LLMs), users commonly employ diverse decoding strategies and adjust hyperparameters to control the generated text. However, a critical question emerges: Are LLMs conscious of the existence of these decoding strategies and capable of regulating themselves? The current decoding generation process often relies on empirical and heuristic manual adjustments to hyperparameters based on types of tasks and demands. However, this process is typically cumbersome, and the decoding hyperparameters may not always be optimal for each sample. To address the aforementioned challenges, we propose a novel text generation paradigm termed Hyperparameter Aware Generation (HAG). By leveraging hyperparameter-aware instruction tuning, the LLM autonomously determines the optimal decoding strategy and configs based on the input samples, enabling self-regulation. Our approach eliminates the need for extensive manual tuning, offering a more autonomous, self-regulate model behavior. Experimental results spanning six datasets across reasoning, creativity, translation, and mathematics tasks demonstrate that hyperparameter-aware instruction tuning empowers the LLMs to self-regulate the decoding strategy and hyperparameter. HAG extends the current paradigm in the text generation process, highlighting the feasibility of endowing the LLMs with self-regulate decoding strategies.</abstract>
       <url hash="7d20b169">2024.findings-acl.396</url>
@@ -11599,7 +11599,7 @@
       <author><first>Zhigang</first><last>Kan</last><affiliation>National University of Defense Technology</affiliation></author>
       <author><first>Zhihua</first><last>Wen</last><affiliation>National University of Defence Technology</affiliation></author>
       <author><first>Yongquan</first><last>He</last></author>
-      <author id="dongsheng-li"><first>Dongsheng</first><last>Li</last></author>
+      <author><first>Dongsheng</first><last>Li</last></author>
       <pages>6719-6734</pages>
       <abstract>Temporal knowledge graph question answering (TKGQA) poses a significant challenge task, due to the temporal constraints hidden in questions and the answers sought from dynamic structured knowledge. Although large language models (LLMs) have made considerable progress in their reasoning ability over structured data, their application to the TKGQA task is a relatively unexplored area. This paper first proposes a novel generative temporal knowledge graph question answering framework, GenTKGQA, which guides LLMs to answer temporal questions through two phases: Subgraph Retrieval and Answer Generation. First, we exploit LLM’s intrinsic knowledge to mine temporal constraints and structural links in the questions without extra training, thus narrowing down the subgraph search space in both temporal and structural dimensions. Next, we design virtual knowledge indicators to fuse the graph neural network signals of the subgraph and the text representations of the LLM in a non-shallow way, which helps the open-source LLM deeply understand the temporal order and structural dependencies among the retrieved facts through instruction tuning. Experimental results on two widely used datasets demonstrate the superiority of our model.</abstract>
       <url hash="463c4385">2024.findings-acl.401</url>
@@ -11612,7 +11612,7 @@
       <author><first>Sangwu</first><last>Lee</last><affiliation>University of Rochester</affiliation></author>
       <author><first>Yingshan</first><last>Chang</last></author>
       <author><first>Yonatan</first><last>Bisk</last><affiliation>Meta and Carnegie Mellon University</affiliation></author>
-      <author><first>Eric</first><last>Nyberg</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last><affiliation>Carnegie Mellon University</affiliation></author>
       <pages>6735-6752</pages>
       <abstract>Verifying a question’s validity before answering is crucial in real-world applications, where users may provide imperfect instructions. In this scenario, an ideal model should address the discrepancies in the query and convey them to the users rather than generating the best possible answer. Addressing this requirement, we introduce a new compositional visual question-answering dataset, VisReas, that consists of answerable and unanswerable visual queries formulated by traversing and perturbing commonalities and differences among objects, attributes, and relations. VisReas contains 2.07M semantically diverse queries generated automatically using Visual Genome scene graphs. The unique feature of this task, validating question answerability with respect to an image before answering, and the poor performance of state-of-the-art models inspired the design of a new modular baseline, Logic2Vision that reasons by producing and executing pseudocode without any external modules to generate the answer. Logic2Vision outperforms generative models in VisReas (+4.82% over LLaVA-1.5; +12.23% over InstructBLIP) and achieves a significant gain in performance against the classification models.</abstract>
       <url hash="c78ef7d9">2024.findings-acl.402</url>
@@ -11853,7 +11853,7 @@
       <author><first>Yijin</first><last>Liu</last><affiliation>Wechat AI</affiliation></author>
       <author><first>Fandong</first><last>Meng</last><affiliation>WeChat AI, Tencent Inc.</affiliation></author>
       <author><first>Yufeng</first><last>Chen</last></author>
-      <author><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
       <author><first>Jie</first><last>Zhou</last></author>
       <pages>7040-7051</pages>
       <abstract>Code generation aims to understand the problem description and generate corresponding code snippets, where existing works generally decompose such complex tasks into intermediate steps by prompting strategies, such as Chain-of-Thought and its variants. While these studies have achieved some success, their effectiveness is highly dependent on the capabilities of advanced Large Language Models (LLMs) such as GPT-4, particularly in terms of API calls, which significantly limits their practical applicability. Consequently, how to enhance the code generation capabilities of small and medium-scale code LLMs without significantly increasing training costs is an appealing challenge. In this paper, we suggest that code comments are the natural logic pivot between natural language and code language and propose using comments to boost the code generation ability of code LLMs. Concretely, we propose MANGO (comMents As Natural loGic pivOts), including a comment contrastive training strategy and a corresponding logical comment decoding strategy. Experiments are performed on HumanEval and MBPP, utilizing StarCoder and WizardCoder as backbone models, and encompassing model parameter sizes between 3B and 7B. The results indicate that MANGO significantly improves the code pass rate based on the strong baselines. Meanwhile, the robustness of the logical comment decoding strategy is notably higher than the Chain-of-thoughts prompting.</abstract>
@@ -11939,7 +11939,7 @@
       <author><first>Shahin</first><last>Amiriparian</last><affiliation>Technical University of Munich</affiliation></author>
       <author><first>Manuel</first><last>Milling</last><affiliation>University of Augsburg</affiliation></author>
       <author><first>Ilhan</first><last>Aslan</last></author>
-      <author><first>Björn</first><last>Schuller</last><affiliation>Technische Universität München and Imperial College London</affiliation></author>
+      <author id="bjorn-schuller"><first>Björn</first><last>Schuller</last><affiliation>Technische Universität München and Imperial College London</affiliation></author>
       <pages>7144-7159</pages>
       <abstract>Telling stories is an integral part of human communication which can evoke emotions and influence the affective states of the audience. Automatically modeling emotional trajectories in stories has thus attracted considerable scholarly interest. However, as most existing works have been limited to unsupervised dictionary-based approaches, there is no benchmark for this task. We address this gap by introducing continuous valence and arousal labels for an existing dataset of children’s stories originally annotated with discrete emotion categories. We collect additional annotations for this data and map the categorical labels to the continuous valence and arousal space. For predicting the thus obtained emotionality signals, we fine-tune a DeBERTa model and improve upon this baseline via a weakly supervised learning approach. The best configuration achieves a Concordance Correlation Coefficient (CCC) of .8221 for valence and .7125 for arousal on the test set, demonstrating the efficacy of our proposed approach. A detailed analysis shows the extent to which the results vary depending on factors such as the author, the individual story, or the section within the story. In addition, we uncover the weaknesses of our approach by investigating examples that prove to be difficult to predict.</abstract>
       <url hash="a5a917b9">2024.findings-acl.426</url>
@@ -12114,7 +12114,7 @@
       <author><first>Miaoran</first><last>Zhang</last><affiliation>Saarland University</affiliation></author>
       <author><first>Vagrant</first><last>Gautam</last><affiliation>Saarland University</affiliation></author>
       <author><first>Mingyang</first><last>Wang</last></author>
-      <author><first>Jesujoba</first><last>Alabi</last><affiliation>Universität des Saarlandes</affiliation></author>
+      <author id="jesujoba-alabi"><first>Jesujoba</first><last>Alabi</last><affiliation>Universität des Saarlandes</affiliation></author>
       <author><first>Xiaoyu</first><last>Shen</last><affiliation>Amazon</affiliation></author>
       <author><first>Dietrich</first><last>Klakow</last><affiliation>Saarland University</affiliation></author>
       <author><first>Marius</first><last>Mosbach</last><affiliation>McGill University and Mila - Quebec Artificial Intelligence Institute</affiliation></author>
@@ -12156,7 +12156,7 @@
       <author><first>Paul</first><last>Röttger</last><affiliation>Bocconi University</affiliation></author>
       <author><first>Frauke</first><last>Kreuter</last><affiliation>University of Maryland, College Park</affiliation></author>
       <author><first>Dirk</first><last>Hovy</last><affiliation>Bocconi University</affiliation></author>
-      <author><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München and IT University of Copenhagen</affiliation></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München and IT University of Copenhagen</affiliation></author>
       <pages>7407-7416</pages>
       <abstract>The open-ended nature of language generation makes the evaluation of autoregressive large language models (LLMs) challenging. One common evaluation approach uses multiple-choice questions to limit the response space. The model is then evaluated by ranking the candidate answers by the log probability of the first token prediction. However, first-tokens may not consistently reflect the final response output, due to model’s diverse response styles such as starting with “Sure” or refusing to answer. Consequently, first-token evaluation is not indicative of model behaviour when interacting with users. But by how much? We evaluate how aligned first-token evaluation is with the text output along several dimensions, namely final option choice, refusal rate, choice distribution and robustness under prompt perturbation. Our results show that the two approaches are severely misaligned <i>on all dimensions</i>, reaching mismatch rates over 60%. Models heavily fine-tuned on conversational or safety data are especially impacted. Crucially, models remain misaligned even when we increasingly constrain prompts, i.e., force them to start with an option letter or example template. Our findings i) underscore the importance of inspecting the text output as well and ii) caution against relying solely on first-token evaluation.</abstract>
       <url hash="0f087161">2024.findings-acl.441</url>
@@ -12221,7 +12221,7 @@
       <author><first>Haiqin</first><last>Yang</last><affiliation>International Digital Economy Academy (IDEA)</affiliation></author>
       <author><first>Fei</first><last>Zhao</last><affiliation>Nanjing University</affiliation></author>
       <author><first>Zhen</first><last>Wu</last><affiliation>Nanjing University</affiliation></author>
-      <author><first>Xinyu</first><last>Dai</last><affiliation>Nanjing University</affiliation></author>
+      <author id="xinyu-dai"><first>Xinyu</first><last>Dai</last><affiliation>Nanjing University</affiliation></author>
       <pages>7510-7527</pages>
       <abstract>Relation triplet extraction is a fundamental task in natural language processing that aims to identify semantic relationships between entities in text. It is particularly challenging in the zero-shot setting, i.e., zero-shot relation triplet extraction (ZeroRTE), where the relation sets between training and test are disjoint. Existing methods deal with this task by integrating relations into prompts, which may lack sufficient understanding of the unseen relations. To address these limitations, this paper presents a novel Two-Agent Game (TAG) approach to deliberate and debate the semantics of unseen relations. TAG consists of two agents, a generator and an extractor. They iteratively interact in three key steps: attempting, criticizing, and rectifying. This enables the agents to fully debate and understand the unseen relations. Experimental results demonstrate consistent improvement over ALBERT-Large, BART, andGPT3.5, without incurring additional inference costs in all cases. Remarkably, our method outperforms strong baselines by a significant margin, achieving an impressive 6%-16% increase in F1 scores, particularly when dealingwith FewRel with five unseen relations.</abstract>
       <url hash="786fa03c">2024.findings-acl.446</url>
@@ -12475,7 +12475,7 @@
       <author><first>Yuhang</first><last>Lai</last><affiliation>Fudan University</affiliation></author>
       <author><first>Siyuan</first><last>Wang</last><affiliation>University of Southern California</affiliation></author>
       <author><first>Shujun</first><last>Liu</last></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Zhongyu</first><last>Wei</last><affiliation>Fudan University</affiliation></author>
       <pages>7817-7831</pages>
       <abstract>We introduce ALaRM, the first framework modeling hierarchical rewards in reinforcement learning from human feedback (RLHF), which is designed to enhance the alignment of large language models (LLMs) with human preferences. The framework addresses the limitations of current alignment approaches, which often struggle with the inconsistency and sparsity of human supervision signals, by integrating holistic rewards with aspect-specific rewards. This integration enables more precise and consistent guidance of language models towards desired outcomes, particularly in complex and open text generation tasks. By employing a methodology that filters and combines multiple rewards based on their consistency, the framework provides a reliable mechanism for improving model alignment. We validate our approach through applications in long-form question answering and machine translation tasks, employing gpt-3.5-turbo for pairwise comparisons, and demonstrate improvements over existing baselines. Our work underscores the effectiveness of hierarchical rewards modeling in refining LLM training processes for better human preference alignment. We release our code at https://ALaRM-fdu.github.io.</abstract>
@@ -12577,7 +12577,7 @@
       <title>Empowering cross-lingual abilities of instruction-tuned large language models by translation-following demonstrations</title>
       <author><first>Leonardo</first><last>Ranaldi</last><affiliation>Idiap Research Institute</affiliation></author>
       <author><first>Giulia</first><last>Pucci</last></author>
-      <author><first>Andre</first><last>Freitas</last><affiliation>Idiap Research Institute and University of Manchester</affiliation></author>
+      <author id="andre-freitas"><first>Andre</first><last>Freitas</last><affiliation>Idiap Research Institute and University of Manchester</affiliation></author>
       <pages>7961-7973</pages>
       <abstract>The language ability of Large Language Models (LLMs) is often unbalanced towards English because of the imbalance in the distribution of the pre-training data. This disparity is demanded in further fine-tuning and affecting the cross-lingual abilities of LLMs. In this paper, we propose to empower Instruction-tuned LLMs (It-LLMs) in languages other than English by building semantic alignment between them. Hence, we propose <i>CrossAlpaca</i>, an It-LLM with cross-lingual Instruction-following and Translation-following demonstrations to improve semantic alignment between languages. We validate our approach on the multilingual Question Answering (QA) benchmarks XQUAD and MLQA and adapted versions of MMLU and BBH.Our models, tested over six different languages, outperform the It-LLMs tuned on monolingual data. The final results show that instruction tuning on non-English data is not enough and that semantic alignment can be further improved by Translation-following demonstrations.</abstract>
       <url hash="5b6a3de1">2024.findings-acl.473</url>
@@ -12637,7 +12637,7 @@
     <paper id="478">
       <title>Can Large Language Models Follow Concept Annotation Guidelines? A Case Study on Scientific and Financial Domains</title>
       <author><first>Marcio</first><last>Fonseca</last></author>
-      <author><first>Shay</first><last>Cohen</last><affiliation>University of Edinburgh</affiliation></author>
+      <author id="shay-b-cohen"><first>Shay</first><last>Cohen</last><affiliation>University of Edinburgh</affiliation></author>
       <pages>8027-8042</pages>
       <abstract>Although large language models (LLMs) exhibit remarkable capacity to leverage in-context demonstrations, it is still unclear to what extent they can learn new facts or concept definitions via prompts. To address this question, we examine the capacity of instruction-tuned LLMs to follow in-context concept annotation guidelines for zero-shot sentence labeling tasks. We design guidelines that present different types of factual and counterfactual concept definitions, which are used as prompts for zero-shot sentence classification tasks. Our results show that although concept definitions consistently help in task performance, only the larger models (with 70B parameters or more) have limited ability to work under counterfactual contexts. Importantly, only proprietary models such as GPT-3.5 can recognize nonsensical guidelines, which we hypothesize is due to more sophisticated alignment methods. Finally, we find that Falcon-180B-chat is outperformed by Llama-2-70B-chat is most cases, which indicates that increasing model scale does not guarantee better adherence to guidelines. Altogether, our simple evaluation method reveals significant gaps in concept understanding between the most capable open-source language models and the leading proprietary APIs.</abstract>
       <url hash="79d6b9d3">2024.findings-acl.478</url>
@@ -12674,7 +12674,7 @@
     <paper id="481">
       <title>Deterministic Reversible Data Augmentation for Neural Machine Translation</title>
       <author><first>Jiashu</first><last>Yao</last></author>
-      <author><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <author><first>Zeming</first><last>Liu</last></author>
       <author><first>Yuhang</first><last>Guo</last></author>
       <pages>8075-8089</pages>
@@ -12766,7 +12766,7 @@
       <title><fixed-case>LLM</fixed-case>s Beyond <fixed-case>E</fixed-case>nglish: Scaling the Multilingual Capability of <fixed-case>LLM</fixed-case>s with Cross-Lingual Feedback</title>
       <author><first>Wen</first><last>Lai</last></author>
       <author><first>Mohsen</first><last>Mesgar</last><affiliation>Bosch</affiliation></author>
-      <author><first>Alexander</first><last>Fraser</last><affiliation>Technical University of Munich</affiliation></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last><affiliation>Technical University of Munich</affiliation></author>
       <pages>8186-8213</pages>
       <abstract>To democratize large language models (LLMs) to most natural languages, it is imperative to make these models capable of understanding and generating texts in many languages, in particular low-resource ones. While recent multilingual LLMs demonstrate remarkable performance in such capabilities, these LLMs still support a limited number of human languages due to the lack of training data for low resource languages. Moreover, these LLMs are not yet aligned with human preference for downstream tasks, which is crucial for the success of LLMs in English. In this paper, we introduce xLLaMA-100 and xBLOOM-100 (collectively xLLMs-100), which scale the multilingual capabilities of LLaMA and BLOOM to 100 languages. To do so, we construct two datasets: a multilingual instruction dataset including 100 languages, which represents the largest language coverage to date, and a cross-lingual human feedback dataset encompassing 30 languages. We perform multilingual instruction tuning on the constructed instruction data and further align the LLMs with human feedback using the DPO algorithm on our cross-lingual human feedback dataset. We evaluate the multilingual understanding and generating capabilities of xLLMs-100 on five multilingual benchmarks. Experimental results show that xLLMs-100 consistently outperforms its peers across the benchmarks by considerable margins, defining a new state-of-the-art multilingual LLM that supports 100 languages.</abstract>
       <url hash="c8c52894">2024.findings-acl.488</url>
@@ -12805,7 +12805,7 @@
     <paper id="491">
       <title>It Is Not About What You Say, It Is About How You Say It: A Surprisingly Simple Approach for Improving Reading Comprehension</title>
       <author><first>Sagi</first><last>Shaier</last></author>
-      <author><first>Lawrence</first><last>Hunter</last><affiliation>University of Colorado at Denver</affiliation></author>
+      <author id="lawrence-hunter"><first>Lawrence</first><last>Hunter</last><affiliation>University of Colorado at Denver</affiliation></author>
       <author><first>Katharina</first><last>Wense</last><affiliation>Johannes-Gutenberg Universität Mainz, University of Colorado, Boulder and New York University</affiliation></author>
       <pages>8292-8305</pages>
       <abstract>Natural language processing has seen rapid progress over the past decade. Due to the speed of developments, some practices get established without proper evaluation. Considering one such case and focusing on reading comprehension, we ask our first research question: 1) How does the order of inputs – i.e., question and context – affect model performance? Additionally, given recent advancements in input emphasis, we ask a second research question: 2) Does emphasizing either the question, the context, or both enhance performance? Experimenting with 9 large language models across 3 datasets, we find that presenting the context before the question improves model performance, with an accuracy increase of up to 31%. Furthermore, emphasizing the context yields superior results compared to question emphasis, and in general, emphasizing parts of the input is particularly effective for addressing questions that models lack the parametric knowledge to answer. Experimenting with both prompt-based and attention-based emphasis methods, we additionally find that the best method is surprisingly simple: it only requires concatenating a few tokens to the input and results in an ac- curacy improvement of up to 36%, allowing smaller models to outperform their significantly larger counterparts.</abstract>
@@ -12817,7 +12817,7 @@
       <title>Large Language Models Relearn Removed Concepts</title>
       <author><first>Michelle</first><last>Lo</last></author>
       <author><first>Fazl</first><last>Barez</last><affiliation>University of Oxford, University of Oxford and University of Edinburgh</affiliation></author>
-      <author><first>Shay</first><last>Cohen</last><affiliation>University of Edinburgh</affiliation></author>
+      <author id="shay-b-cohen"><first>Shay</first><last>Cohen</last><affiliation>University of Edinburgh</affiliation></author>
       <pages>8306-8323</pages>
       <abstract>Advances in model editing through neuron pruning hold promise for removing undesirable concepts from large language models. However, it remains unclear whether models have the capacity to reacquire pruned concepts after editing. To investigate this, we evaluate concept relearning in models by tracking concept saliency and similarity in pruned neurons during retraining for named entity recognition tasks. Our findings reveal that models can quickly regain performance post-pruning by relocating advanced concepts to earlier layers and reallocating pruned concepts to primed neurons with similar semantics. This suggests that models exhibit polysemantic capacities and can blend old and new concepts in individual neurons. While neuron pruning provides interpretability into model concepts, our results highlight the challenges of permanent concept removal for improved model *safety*. Monitoring concept reemergence and developing techniques to mitigate relearning of unsafe concepts will be important directions for more robust model editing. Overall, our work strongly demonstrates the resilience and fluidity of concept representations in LLMs post concept removal.</abstract>
       <url hash="2594e833">2024.findings-acl.492</url>
@@ -12862,7 +12862,7 @@
     <paper id="496">
       <title><fixed-case>B</fixed-case>ench<fixed-case>IE</fixed-case>^<fixed-case>FL</fixed-case>: A Manually Re-Annotated Fact-Based Open Information Extraction Benchmark</title>
       <author><first>Fabrice</first><last>Lamarche</last></author>
-      <author><first>Philippe</first><last>Langlais</last><affiliation>Université de Montréal</affiliation></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last><affiliation>Université de Montréal</affiliation></author>
       <pages>8372-8394</pages>
       <abstract>Open Information Extraction (OIE) is a field of natural language processing that aims to present textual information in a format that allows it to be organized, analyzed and reflected upon. Numerous OIE systems are developed, claiming ever-increasing performance, marking the need for objective benchmarks. BenchIE is the latest reference we know of. Despite being very well thought out, we noticed a number of issues we believe are limiting. Therefore, we propose BenchIE^FL, a new OIE benchmark which fully enforces the principles of BenchIE while containing fewer errors, omissions and shortcomings when candidate facts are matched towards reference ones. BenchIE^FL allows insightful conclusions to be drawn on the actual performance of OIE extractors.</abstract>
       <url hash="40c1df8a">2024.findings-acl.496</url>
@@ -12890,7 +12890,7 @@
       <author><first>Shujian</first><last>Huang</last><affiliation>Nanjing University</affiliation></author>
       <author><first>Fei</first><last>Yuan</last></author>
       <author><first>Shuaijie</first><last>She</last></author>
-      <author><first>Jiajun</first><last>Chen</last><affiliation>Nanjing University</affiliation></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last><affiliation>Nanjing University</affiliation></author>
       <author><first>Alexandra</first><last>Birch</last><affiliation>University of Edinburgh</affiliation></author>
       <pages>8411-8423</pages>
       <abstract>Large language models show compelling performance on reasoning tasks but they tend to perform much worse in languages other than English. This is unsurprising given that their training data largely consists of English text and instructions. A typical solution is to translate instruction data into all languages of interest, and then train on the resulting multilingual data, which is called translate-training. This approach not only incurs high cost, but also results in poorly translated data due to the non-standard formatting of mathematical chain-of-thought. In this paper, we explore the benefits of question alignment, where we train the model to translate reasoning questions into English by finetuning on X-English parallel question data. In this way we perform targeted, in-domain language alignment which makes best use of English instruction data to unlock the LLMs’ multilingual reasoning abilities. Experimental results on LLaMA2-13B show that question alignment leads to consistent improvements over the translate-training approach: an average improvement of 11.3% and 16.1% accuracy across ten languages on the MGSM and MSVAMP multilingual reasoning benchmarks.</abstract>
@@ -12918,7 +12918,7 @@
       <title>Multi-Label Classification for Implicit Discourse Relation Recognition</title>
       <author><first>Wanqiu</first><last>Long</last></author>
       <author><first>Siddharth</first><last>N</last><affiliation>University of Edinburgh</affiliation></author>
-      <author><first>Bonnie</first><last>Webber</last><affiliation>Edinburgh University, University of Edinburgh</affiliation></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last><affiliation>Edinburgh University, University of Edinburgh</affiliation></author>
       <pages>8437-8451</pages>
       <abstract>Discourse relations play a pivotal role in establishing coherence within textual content, uniting sentences and clauses into a cohesive narrative. The Penn Discourse Treebank (PDTB) stands as one of the most extensively utilized datasets in this domain. In PDTB-3, the annotators can assign multiple labels to an example, when they believe the simultaneous presence of multiple relations. Prior research in discourse relation recognition has treated these instances as separate examples during training, with a gold-standard prediction matching one of the labels considered correct at test time. However, this approach is inadequate, as it fails to account for the interdependence of labels in real-world contexts and to distinguish between cases where only one sense relation holds and cases where multiple relations hold simultaneously. In our work, we address this challenge by exploring various multi-label classification frameworks to handle implicit discourse relation recognition. We show that the methods for multi-label prediction don’t depress performance for single-label prediction. Additionally, we give comprehensive analysis of results and data. Our work contributes to advancing the understanding and application of discourse relations and provide a foundation for the future study.</abstract>
       <url hash="8930e081">2024.findings-acl.500</url>
@@ -13011,7 +13011,7 @@
     <paper id="508">
       <title>Can Large Language Model Summarizers Adapt to Diverse Scientific Communication Goals?</title>
       <author><first>Marcio</first><last>Fonseca</last></author>
-      <author><first>Shay</first><last>Cohen</last><affiliation>University of Edinburgh</affiliation></author>
+      <author id="shay-b-cohen"><first>Shay</first><last>Cohen</last><affiliation>University of Edinburgh</affiliation></author>
       <pages>8599-8618</pages>
       <abstract>In this work, we investigate the controllability of large language models (LLMs) on scientific summarization tasks. We identify key stylistic and content coverage factors that characterize different types of summaries such as paper reviews, abstracts, and lay summaries. By controlling stylistic features, we find that non-fine-tuned LLMs outperform humans in the MuP review generation task, both in terms of similarity to reference summaries and human preferences. Also, we show that we can improve the controllability of LLMs with keyword-based classifier-free guidance (CFG) while achieving lexical overlap comparable to strong fine-tuned baselines on arXiv and PubMed. However, our results also indicate that LLMs cannot consistently generate long summaries with more than 8 sentences. Furthermore, these models exhibit limited capacity to produce highly abstractive lay summaries. Although LLMs demonstrate strong generic summarization competency, sophisticated content control without costly fine-tuning remains an open problem for domain-specific applications.</abstract>
       <url hash="f67b13a7">2024.findings-acl.508</url>
@@ -13039,7 +13039,7 @@
       <author><first>Xian</first><last>Li</last><affiliation>Amazon</affiliation></author>
       <author><first>Jingbo</first><last>Shang</last><affiliation>University of California, San Diego</affiliation></author>
       <author><first>Hoang</first><last>Nguyen</last></author>
-      <author><first>Philip</first><last>Yu</last><affiliation>University of Illinois, Chicago</affiliation></author>
+      <author id="philip-s-yu"><first>Philip</first><last>Yu</last><affiliation>University of Illinois, Chicago</affiliation></author>
       <pages>8631-8643</pages>
       <abstract>Attribute value extraction involves identifying the value spans of predetermined attributes in product texts. This area of research has traditionally operated under a closed-world assumption, focusing on products from a static set of categories and their associated attributes. However, products in e-commerce stores are ever-increasing and evolving, calling for life-long learning. If continuously trained on the fast-increasing products and attributes, most existing solutions not only struggle for parameter efficiency but also endure foreseeable defects due to data contamination, catastrophic forgetting, etc. As a remedy, we propose and study a new task, which aims to effectively maintain a strong single model for many domains in a life-long learning fashion, without jeopardizing the model performance and parameter efficiency. We introduce factorization into the model and make it domain-aware by decoupling the modeling of product type and attribute, as a way to promote de-contamination and parameter efficiency while scaling up. Tuning the model with distillation prevents forgetting historical knowledge and enables continuous learning from emerging domains. Experiments on hundreds of domains showed that our model attains the near state-of-the-art performance with affordable parameter size, the least historical knowledge forgetting, and the greatest robustness against noises, whilst adding only a few parameters per domain when compared with competitive baselines.</abstract>
       <url hash="3081a4f1">2024.findings-acl.510</url>
@@ -13050,7 +13050,7 @@
       <title>Exploring Domain Robust Lightweight Reward Models based on Router Mechanism</title>
       <author><first>Hyuk</first><last>Namgoong</last><affiliation>Chungnam National University</affiliation></author>
       <author><first>Jeesu</first><last>Jung</last></author>
-      <author><first>Sangkeun</first><last>Jung</last></author>
+      <author id="sangkeun-jung"><first>Sangkeun</first><last>Jung</last></author>
       <author><first>YoonHyung</first><last>Roh</last><affiliation>Electronics and Telecommunications Research Institute</affiliation></author>
       <pages>8644-8652</pages>
       <abstract>Recent advancements in large language models have heavily relied on the large reward model from reinforcement learning from human feedback for fine-tuning. However, the use of a single reward model across various domains may not always be optimal, often requiring retraining from scratch when new domain data is introduced. To address these challenges, we explore the utilization of small language models operating in a domain-specific manner based on router mechanisms. Our three approaches are: 1) utilize mixture of experts to form a single reward model by modularizing an internal router and experts, 2) employing external router to select the appropriate reward model from multiple domain-specific models, and 3) the framework reduces parameter size by loading reward models and router adapters onto a single small language model using adapters. Experimental validation underscores the effectiveness of our approach, demonstrating performance comparable to baseline methods while also reducing the total parameter size.</abstract>
@@ -13340,7 +13340,7 @@
       <author><first/><last>Juseon-Do</last><affiliation>Chungnam National University</affiliation></author>
       <author><first>Jingun</first><last>Kwon</last><affiliation>Chungnam National University</affiliation></author>
       <author><first>Hidetaka</first><last>Kamigaito</last><affiliation>Nara Institute of Science and Technology</affiliation></author>
-      <author><first>Manabu</first><last>Okumura</last><affiliation>Tokyo Institute of Technology</affiliation></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last><affiliation>Tokyo Institute of Technology</affiliation></author>
       <pages>8980-8996</pages>
       <abstract>Extractive summarization can produce faithful summaries but often requires additional constraints such as a desired summary length. Traditional sentence compression models do not typically consider the constraints because of their restricted model abilities, which require model modifications for coping with them. To bridge this gap, we propose Instruction-based Compression (InstructCMP), an approach to the sentence compression task that can consider the length constraint through instructions by leveraging the zero-shot task-solving abilities of Large Language Models (LLMs). For this purpose, we created new evaluation datasets by transforming traditional sentence compression datasets into an instruction format. By using the datasets, we first reveal that the current LLMs still face challenges in accurately controlling the length for a compressed text. To address this issue, we propose an approach named length priming, that incorporates additional length information into the instructions without external resources. While the length priming effectively works in a zero-shot setting, a training dataset with the instructions would further improve the ability of length control. Thus, we additionally created a training dataset in an instruction format to fine-tune the model on it. Experimental results and analysis show that applying the length priming significantly improves performances of InstructCMP in both zero-shot and fine-tuning settings without the need of any model modifications.</abstract>
       <url hash="5893350a">2024.findings-acl.532</url>
@@ -13387,7 +13387,7 @@
       <author><first>Ming</first><last>Yan</last></author>
       <author><first>Ji</first><last>Zhang</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Fei</first><last>Huang</last><affiliation>Alibaba Group</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <pages>9039-9052</pages>
       <abstract>Despite intensive efforts devoted to tool learning, the problem of budget-constrained tool learning, which focuses on resolving user queries within a specific budget constraint, has been widely overlooked. This paper proposes a novel method for budget-constrained tool learning. Our approach involves creating a preferable plan under the budget constraint before utilizing the tools. This plan outlines the feasible tools and the maximum number of times they can be employed, offering a comprehensive overview of the tool learning process for large language models. This allows them to allocate the budget from a broader perspective. To devise the plan without incurring significant extra costs, we suggest initially estimating the usefulness of the candidate tools based on past experience. Subsequently, we employ dynamic programming to formulate the plan. Experimental results demonstrate that our method can be integrated with various tool learning methods, significantly enhancing their effectiveness under strict budget constraints.</abstract>
       <url hash="21730936">2024.findings-acl.536</url>
@@ -13534,7 +13534,7 @@
       <author><first>Zengkui</first><last>Sun</last></author>
       <author><first>Yijin</first><last>Liu</last><affiliation>Wechat AI</affiliation></author>
       <author><first>Fandong</first><last>Meng</last><affiliation>WeChat AI, Tencent Inc.</affiliation></author>
-      <author><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
       <author><first>Yufeng</first><last>Chen</last></author>
       <author><first>Jie</first><last>Zhou</last></author>
       <pages>9201-9214</pages>
@@ -13606,7 +13606,7 @@
       <author><first>Yijin</first><last>Liu</last><affiliation>Wechat AI</affiliation></author>
       <author><first>Jiaan</first><last>Wang</last><affiliation>Soochow University</affiliation></author>
       <author><first>Fandong</first><last>Meng</last><affiliation>WeChat AI, Tencent Inc.</affiliation></author>
-      <author><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
       <author><first>Yufeng</first><last>Chen</last></author>
       <author><first>Jie</first><last>Zhou</last></author>
       <pages>9282-9293</pages>
@@ -13690,8 +13690,8 @@
       <author><first>Evgenii</first><last>Tsymbalov</last><affiliation>Independent Researcher</affiliation></author>
       <author><first>Gleb</first><last>Kuzmin</last><affiliation>Artificial Intelligence Research Institute and Institute for Systems Analysis of Russian Academy of Sciences</affiliation></author>
       <author><first>Alexander</first><last>Panchenko</last><affiliation>Skoltech</affiliation></author>
-      <author><first>Timothy</first><last>Baldwin</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and The University of Melbourne</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and The University of Melbourne</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <author><first>Maxim</first><last>Panov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <pages>9367-9385</pages>
       <abstract>Large language models (LLMs) are notorious for hallucinating, i.e., producing erroneous claims in their output. Such hallucinations can be dangerous, as occasional factual inaccuracies in the generated text might be obscured by the rest of the output being generally factually correct, making it extremely hard for the users to spot them. Current services that leverage LLMs usually do not provide any means for detecting unreliable generations. Here, we aim to bridge this gap. In particular, we propose a novel fact-checking and hallucination detection pipeline based on token-level uncertainty quantification. Uncertainty scores leverage information encapsulated in the output of a neural network or its layers to detect unreliable predictions, and we show that they can be used to fact-check the atomic claims in the LLM output. Moreover, we present a novel token-level uncertainty quantification method that removes the impact of uncertainty about what claim to generate on the current step and what surface form to use. Our method Claim Conditioned Probability (CCP) measures only the uncertainty of a particular claim value expressed by the model. Experiments on the task of biography generation demonstrate strong improvements for CCP compared to the baselines for seven different LLMs and four languages. Human evaluation reveals that the fact-checking pipeline based on uncertainty quantification is competitive with a fact-checking tool that leverages external knowledge.</abstract>
@@ -13776,7 +13776,7 @@
       <author><first>DingYi</first><last>Zeng</last></author>
       <author><first>Yichen</first><last>Xiao</last></author>
       <author><first>Shaohuan</first><last>Cheng</last><affiliation>University of Electronic Science and Technology of China</affiliation></author>
-      <author id="chen-zhang"><first>Chen</first><last>Zhang</last><affiliation>National University of Singapore</affiliation></author>
+      <author><first>Chen</first><last>Zhang</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Grandee</first><last>Lee</last><affiliation>Singapore University of Social Sciences</affiliation></author>
       <author><first>Malu</first><last>Zhang</last><affiliation>University of Electronic Science and Technology of China</affiliation></author>
       <author><first>Wenyu</first><last>Chen</last></author>
@@ -13976,7 +13976,7 @@
     <paper id="580">
       <title>Automated Focused Feedback Generation for Scientific Writing Assistance</title>
       <author><first>Eric</first><last>Chamoun</last><affiliation>University of Cambridge</affiliation></author>
-      <author><first>Michael</first><last>Schlichtkrull</last><affiliation>Queen Mary, University of London</affiliation></author>
+      <author id="michael-schlichtkrull"><first>Michael</first><last>Schlichtkrull</last><affiliation>Queen Mary, University of London</affiliation></author>
       <author><first>Andreas</first><last>Vlachos</last><affiliation>University of Cambridge</affiliation></author>
       <pages>9742-9763</pages>
       <abstract>Scientific writing is a challenging task, particularly for novice researchers who often rely on feedback from experienced peers. Recent work has primarily focused on improving surface form and style rather than manuscript content. In this paper, we propose a novel task: automated focused feedback generation for scientific writing assistance. We present SWIF<tex-math>^2</tex-math>T: a Scientific WrIting Focused Feedback Tool. It is designed to generate specific, actionable and coherent comments, which identify weaknesses in a scientific paper and/or propose revisions to it. Our approach consists of four components - planner, investigator, reviewer and controller - leveraging multiple Large Language Models (LLMs) to implement them. We compile a dataset of 300 peer reviews citing weaknesses in scientific papers and conduct human evaluation. The results demonstrate the superiority in specificity, reading comprehension, and overall helpfulness of SWIF<tex-math>^2</tex-math>T’s feedback compared to other approaches. In our analysis, we also identified cases where automatically generated reviews were judged better than human ones, suggesting opportunities for integration of AI-generated feedback in scientific writing.</abstract>
@@ -14001,7 +14001,7 @@
       <author><first>Bowen</first><last>Shen</last><affiliation>University of the Chinese Academy of Sciences</affiliation></author>
       <author><first>Zheng</first><last>Lin</last><affiliation>Institute of Information Engineering, Chinese Academy of Sciences</affiliation></author>
       <author><first>Daren</first><last>Zha</last><affiliation>Institute of Information Engineering, Chinese Academy of Sciences</affiliation></author>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last><affiliation>xiaomi</affiliation></author>
+      <author><first>Wei</first><last>Liu</last><affiliation>xiaomi</affiliation></author>
       <author><first>Jian</first><last>Luan</last></author>
       <author><first>Bin</first><last>Wang</last><affiliation>AI Lab, Xiaomi Inc.</affiliation></author>
       <author><first>Weiping</first><last>Wang</last></author>
@@ -14026,7 +14026,7 @@
       <author><first>Afra Feyza</first><last>Akyürek</last><affiliation>Boston University</affiliation></author>
       <author><first>Ekin</first><last>Akyürek</last></author>
       <author><first>Leshem</first><last>Choshen</last><affiliation>International Business Machines</affiliation></author>
-      <author><first>Derry</first><last>Wijaya</last><affiliation>Monash University and Boston University</affiliation></author>
+      <author id="derry-tanti-wijaya"><first>Derry</first><last>Wijaya</last><affiliation>Monash University and Boston University</affiliation></author>
       <author><first>Jacob</first><last>Andreas</last><affiliation>Massachusetts Institute of Technology and Microsoft</affiliation></author>
       <pages>9802-9818</pages>
       <abstract>While language models (LMs) can sometimes generate factually correct text and estimate truth values of individual claims, these generally do not reflect a globally coherent, manipulable model of the world. As a consequence, current LMs also generate incorrect or nonsensical content, and are difficult to edit and bring up to date. We present a method called Deductive Closure Training (DCT) that uses LMs themselves to identify implications of (and contradictions within) the text that they generate, yielding an efficient self-supervised procedure for improving LM factuality. Given a collection of seed documents, DCT prompts LMs to generate additional text implied by these documents, reason globally about the correctness of this generated text, and finally fine-tune on text inferred to be correct. Given seed documents from a trusted source, DCT provides a tool for supervised model updating; if seed documents are sampled from the LM itself, DCT enables fully unsupervised fine-tuning for improved coherence and accuracy. Across the CREAK, MQuAKE, and Reversal Curse datasets, supervised DCT improves LM fact verification and text generation accuracy by 3-26%; on CREAK, fully unsupervised DCT improves verification accuracy by 12%. These results show that LMs’ reasoning capabilities during inference can be leveraged during training to improve their reliability.</abstract>
@@ -14050,7 +14050,7 @@
     <paper id="586">
       <title>Evaluating Large Language Model Biases in Persona-Steered Generation</title>
       <author><first>Andy</first><last>Liu</last></author>
-      <author><first>Mona</first><last>Diab</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Daniel</first><last>Fried</last><affiliation>Carnegie Mellon University</affiliation></author>
       <pages>9832-9850</pages>
       <abstract>The task of persona-steered text generation requires large language models (LLMs) to generate text that reflects the distribution of views that an individual fitting a persona could have. People have multifaceted personas, but prior work on bias in LLM-generated opinions has only explored multiple-choice settings or one-dimensional personas. We define an incongruous persona as a persona with multiple traits where one trait makes its other traits less likely in human survey data, e.g. political liberals who support increased military spending. We find that LLMs are 9.7% less steerable towards incongruous personas than congruous ones, sometimes generating the stereotypical stance associated with its demographic rather than the target stance. Models that we evaluate that are fine-tuned with Reinforcement Learning from Human Feedback (RLHF) are more steerable, especially towards stances associated with political liberals and women, but present significantly less diverse views of personas. We also find variance in LLM steerability that cannot be predicted from multiple-choice opinion evaluation. Our results show the importance of evaluating models in open-ended text generation, as it can surface new LLM opinion biases. Moreover, such a setup can shed light on our ability to steer models toward a richer and more diverse range of viewpoints.</abstract>
@@ -14177,7 +14177,7 @@
       <title>Fair Federated Learning with Biased Vision-Language Models</title>
       <author><first>Huimin</first><last>Zeng</last></author>
       <author><first>Zhenrui</first><last>Yue</last></author>
-      <author id="yang-zhang"><first>Yang</first><last>Zhang</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
+      <author><first>Yang</first><last>Zhang</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
       <author><first>Lanyu</first><last>Shang</last></author>
       <author><first>Dong</first><last>Wang</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
       <pages>10002-10017</pages>
@@ -14240,7 +14240,7 @@
       <title><fixed-case>PARADISE</fixed-case>: Evaluating Implicit Planning Skills of Language Models with Procedural Warnings and Tips Dataset</title>
       <author><first>Arda</first><last>Uzunoğlu</last><affiliation>Johns Hopkins University</affiliation></author>
       <author><first>Abdulfattah</first><last>Safa</last><affiliation>Koç University</affiliation></author>
-      <author><first>Gözde Gül</first><last>Şahin</last><affiliation>Koç University</affiliation></author>
+      <author id="gozde-gul-sahin"><first>Gözde Gül</first><last>Şahin</last><affiliation>Koç University</affiliation></author>
       <pages>10085-10102</pages>
       <abstract>Recently, there has been growing interest within the community regarding whether large language models are capable of planning or executing plans. However, most prior studies use LLMs to generate high-level plans for simplified scenarios lacking linguistic complexity and domain diversity, limiting analysis of their planning abilities. These setups constrain evaluation methods (e.g., predefined action space), architectural choices (e.g., only generative models), and overlook the linguistic nuances essential for realistic analysis. To tackle this, we present PARADISE, an abductive reasoning task using Q&amp;A format on practical procedural text sourced from wikiHow. It involves tip and warning inference tasks directly associated with goals, excluding intermediary steps, with the aim of testing the ability of the models to infer implicit knowledge of the plan solely from the given goal. Our experiments, utilizing fine-tuned language models and zero-shot prompting, reveal the effectiveness of task-specific small models over large language models in most scenarios. Despite advancements, all models fall short of human performance. Notably, our analysis uncovers intriguing insights, such as variations in model behavior with dropped keywords, struggles of BERT-family and GPT-4 with physical and abstract goals, and the proposed tasks offering valuable prior knowledge for other unseen procedural tasks. The PARADISE dataset and associated resources are publicly available for further research exploration with https://anonymous.4open.science/r/paradise-53BD/README.md.</abstract>
       <url hash="2e6b25f4">2024.findings-acl.599</url>
@@ -14401,7 +14401,7 @@
       <author><first>Yujian</first><last>Wei</last></author>
       <author><first>Tao</first><last>Gui</last><affiliation>Fudan University</affiliation></author>
       <author><first>Qi</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>10258-10273</pages>
       <abstract>Large language models (LLMs) have shown great potential to empower various domains and are often customized by fine-tuning for the requirements of different applications. However, the powerful learning ability of LLMs not only enables them to learn new tasks but also makes them vulnerable to learning undesired behaviors, such as harmfulness and hallucination, as the fine-tuning data often implicitly or explicitly contains such content. Can we fine-tune LLMs on harmful data without learning harmful behaviors? This paper proposes a controllable training framework to make undesired behaviors unlearnable during the fine-tuning process. Specifically, we introduce security vectors to control the model’s behavior and make it consistent with the undesired behavior. Security vectors are activated during fine-tuning, the consistent behavior makes the model believe that such behavior has already been learned and there is no need for further optimization, while inconsistent data can still be learned. After fine-tuning, security vectors are deactivated to restore the LLM’s normal behavior. Our experiments show that the security vectors can prevent LLM from learning harmful and hallucination behavior while preserving the ability to learn other information.</abstract>
       <url hash="449fbcd5">2024.findings-acl.611</url>
@@ -14412,7 +14412,7 @@
       <title>Debiasing Large Language Models with Structured Knowledge</title>
       <author><first>Congda</first><last>Ma</last><affiliation>Tokyo Institute of Technology, Tokyo Institute of Technology</affiliation></author>
       <author><first>Tianyu</first><last>Zhao</last><affiliation>Sakana AI</affiliation></author>
-      <author><first>Manabu</first><last>Okumura</last><affiliation>Tokyo Institute of Technology, Tokyo Institute of Technology</affiliation></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last><affiliation>Tokyo Institute of Technology, Tokyo Institute of Technology</affiliation></author>
       <pages>10274-10287</pages>
       <abstract>Due to biases inherently present in data for pre-training, current pre-trained Large Language Models (LLMs) also ubiquitously manifest the same phenomena. Since the bias influences the output from the LLMs across various tasks, the widespread deployment of the LLMs is hampered. We propose a simple method that utilizes structured knowledge to alleviate this issue, aiming to reduce the bias embedded within the LLMs and ensuring they have an encompassing perspective when used in applications. Experimental results indicated that our method has good debiasing ability when applied to existing both autoregressive and masked language models. Additionally, it could ensure that the performances of LLMs on downstream tasks remain uncompromised.Our method outperforms state-of-the-art (SOTA) baselines in the debiasing ability. Importantly, our method obviates the need for training from scratch, thus offering enhanced scalability and cost-effectiveness.</abstract>
       <url hash="fdf6f4d6">2024.findings-acl.612</url>
@@ -14440,9 +14440,9 @@
       <author><first>Yubao</first><last>Tang</last></author>
       <author><first>Ruqing</first><last>Zhang</last></author>
       <author><first>Jiafeng</first><last>Guo</last><affiliation>Institute of Computing Technolgy, Chinese Academy of Sciences</affiliation></author>
-      <author><first>Maarten</first><last>de Rijke</last><affiliation>University of Amsterdam</affiliation></author>
+      <author id="maarten-de-rijke"><first>Maarten</first><last>de Rijke</last><affiliation>University of Amsterdam</affiliation></author>
       <author><first>Yixing</first><last>Fan</last></author>
-      <author><first>Xueqi</first><last>Cheng</last><affiliation>, Chinese Academy of Sciences</affiliation></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last><affiliation>, Chinese Academy of Sciences</affiliation></author>
       <pages>10303-10317</pages>
       <abstract>Generative retrieval uses differentiable search indexes to directly generate relevant document identifiers in response to a query. Recent studies have highlighted the potential of a strong generative retrieval model, trained with carefully crafted pre-training tasks, to enhance downstream retrieval tasks via fine-tuning. However, the full power of pre-training for generative retrieval remains underexploited due to its reliance on pre-defined static document identifiers, which may not align with evolving model parameters. In this work, we introduce BootRet, a bootstrapped pre-training method for generative retrieval that dynamically adjusts document identifiers during pre-training to accommodate the continuing memorization of the corpus. BootRet involves three key training phases: (i) initial identifier generation, (ii) pre-training via corpus indexing and relevance prediction tasks, and (iii) bootstrapping for identifier updates. To facilitate the pre-training phase, we further introduce noisy documents and pseudo-queries, generated by large language models, to resemble semantic connections in both indexing and retrieval tasks. Experimental results demonstrate that BootRet significantly outperforms existing pre-training generative retrieval baselines and performs well even in zero-shot settings.</abstract>
       <url hash="ead2197a">2024.findings-acl.614</url>
@@ -14458,7 +14458,7 @@
       <author><first>Fei</first><last>Li</last><affiliation>Wuhan University</affiliation></author>
       <author><first>Xu</first><last>Han</last></author>
       <author><first>Chong</first><last>Teng</last></author>
-      <author><first>Donghong</first><last>Ji</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
       <pages>10318-10329</pages>
       <abstract>Aspect-based Sentiment Analysis (ABSA) is extensively researched in the NLP community, yet related models face challenges due to data sparsity when shifting to a new domain. Hence, data augmentation for cross-domain ABSA has attracted increasing attention in recent years. However, two key points have been neglected in prior studies: First, target domain unlabeled data are labeled with pseudo labels by the model trained in the source domain with little quality control, leading to inaccuracy and error propagation. Second, the label and text patterns of generated labeled data are monotonous, thus limiting the robustness and generalization ability of trained ABSA models. In this paper, we aim to design a simple yet effective framework to address the above shortages in ABSA data augmentation, called Refining and Synthesis Data Augmentation (RSDA). Our framework roughly includes two steps: First, it refines generated labeled data using a natural language inference (NLI) filter to control data quality. Second, it synthesizes diverse labeled data via novel label composition and paraphrase approaches. We conduct experiments on 4 kinds of ABSA subtasks, and our framework outperforms 7 strong baselines, demonstrating its effectiveness.</abstract>
       <url hash="c6645a43">2024.findings-acl.615</url>
@@ -14475,7 +14475,7 @@
       <author><first>Yu-Chi</first><last>Pai</last></author>
       <author><first>Hsiu-Hsuan</first><last>Wang</last></author>
       <author><first>Kai-Wei</first><last>Chang</last><affiliation>National Taiwan University</affiliation></author>
-      <author><first>Alexander</first><last>Liu</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
+      <author id="alex-liu"><first>Alexander</first><last>Liu</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <author><first>Hung-yi</first><last>Lee</last><affiliation>National Taiwan University</affiliation></author>
       <pages>10330-10348</pages>
       <abstract>The sound codec’s dual roles in minimizing data transmission latency and serving as tokenizers underscore its critical importance.Recent years have witnessed significant developments in codec models.The ideal sound codec should preserve content, paralinguistics, speakers, and audio information.However, the question of which codec achieves optimal sound information preservation remains unanswered, as in different papers, models are evaluated on their selected experimental settings.This study introduces Codec-SUPERB, an acronym for Codec sound processing Universal PERformance Benchmark.It is an ecosystem designed to assess codec models across representative sound applications and signal-level metrics rooted in sound domain knowledge.Codec-SUPERB simplifies result sharing through an online leaderboard, promoting collaboration within a community-driven benchmark database, thereby stimulating new development cycles for codecs.Furthermore, we undertake an in-depth analysis to offer insights into codec models from both application and signal perspectives, diverging from previous codec papers mainly concentrating on signal-level comparisons.Finally, we will release codes, the leaderboard, and data to accelerate progress within the community.</abstract>
@@ -14514,9 +14514,9 @@
       <title><fixed-case>C</fixed-case>hart<fixed-case>I</fixed-case>nstruct: Instruction Tuning for Chart Comprehension and Reasoning</title>
       <author><first>Ahmed</first><last>Masry</last><affiliation>York University</affiliation></author>
       <author><first>Mehrad</first><last>Shahmohammadi</last></author>
-      <author><first>Md Rizwan</first><last>Parvez</last><affiliation>Qatar Computing Research Institute and Bosch</affiliation></author>
+      <author id="md-rizwan-parvez"><first>Md Rizwan</first><last>Parvez</last><affiliation>Qatar Computing Research Institute and Bosch</affiliation></author>
       <author><first>Enamul</first><last>Hoque</last><affiliation>York University</affiliation></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>SalesForce.com and Nanyang Technological University</affiliation></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>SalesForce.com and Nanyang Technological University</affiliation></author>
       <pages>10387-10409</pages>
       <abstract>Charts provide visual representations of data and are widely used for analyzing information, addressing queries, and conveying insights to others. Various chart-related downstream tasks have emerged recently, such as question-answering and summarization. A common strategy to solve these tasks is to fine-tune various models originally trained on vision tasks language. However, such task-specific models are not capable of solving a wide range of chart-related tasks, constraining their real-world applicability. To overcome these challenges, we introduce ChartInsruct: a novel chart-specific vision-language Instruction-following dataset comprising 191K instructions generated with 71K charts. We then present two distinct systems for instruction tuning on such datasets: (1) an end-to-end model that connects a vision encoder for chart understanding with a LLM; and (2) a pipeline model that employs a two-step approach to extract chart data tables and input them into the LLM. In experiments on four downstream tasks, we first show the effectiveness of our model–achieving a new set of state-of-the-art results. Further evaluation shows that our instruction-tuning approach supports a wide array of real-world chart comprehension and reasoning scenarios, thereby expanding the scope and applicability of our models to new kinds of tasks.</abstract>
       <url hash="5149bfff">2024.findings-acl.619</url>
@@ -14695,7 +14695,7 @@
       <author><first>Wenqiang</first><last>Lei</last><affiliation>Sichuan University</affiliation></author>
       <author><first>Dingnan</first><last>Jin</last></author>
       <author><first>Jia</first><last>Liu</last></author>
-      <author><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
       <pages>10633-10649</pages>
       <abstract>Equipping a conversational search engine with strategies regarding when to ask clarification questions is becoming increasingly important across various domains. Attributing to the context understanding capability of LLMs and their access to domain-specific sources of knowledge, LLM-based clarification strategies feature rapid transfer to various domains in a post-hoc manner.However, they still struggle to deliver promising performance on unseen domains, struggling to achieve effective domain transferability.We take the first step to investigate this issue and existing methods tend to produce one-size-fits-all strategies across diverse domains, limiting their search effectiveness.In response, we introduce a novel method, called STYLE,to achieve effective domain transferability.Our experimental results indicate that STYLE bears strong domain transferability, resulting in an average search performance improvement of 10% on four unseen domains.</abstract>
       <url hash="0afb9df2">2024.findings-acl.632</url>
@@ -14713,7 +14713,7 @@
       <author><first>Yasheng</first><last>Wang</last></author>
       <author><first>Qun</first><last>Liu</last><affiliation>Huawei Noah’s Ark Lab</affiliation></author>
       <author><first>Lijie</first><last>Wen</last><affiliation>School of Software, Tsinghua University</affiliation></author>
-      <author><first>Philip</first><last>Yu</last><affiliation>University of Illinois, Chicago</affiliation></author>
+      <author id="philip-s-yu"><first>Philip</first><last>Yu</last><affiliation>University of Illinois, Chicago</affiliation></author>
       <author><first>Zhijiang</first><last>Guo</last><affiliation>University of Cambridge</affiliation></author>
       <pages>10650-10671</pages>
       <abstract>Generative search engines have the potential to transform how people seek information online, but generated responses from existing large language models (LLMs)-backed generative search engines may not always be accurate. Nonetheless, retrieval-augmented generation exacerbates safety concerns, since adversaries may successfully evade the entire system by subtly manipulating the most vulnerable part of a claim. To this end, we propose evaluating the robustness of generative search engines in the realistic and high-risk setting, where adversaries have only black-box system access and seek to deceive the model into returning incorrect responses. Through a comprehensive human evaluation of various generative search engines, such as Bing Chat, PerplexityAI, and YouChat across diverse queries, we demonstrate the effectiveness of adversarial factual questions in inducing incorrect responses. Moreover, retrieval-augmented generation exhibits a higher susceptibility to factual errors compared to LLMs without retrieval. These findings highlight the potential security risks of these systems and emphasize the need for rigorous evaluation before deployment. The dataset and code will be publicly available.</abstract>
@@ -14812,7 +14812,7 @@
       <author><first>C</first><last>Vaijayanthi</last><affiliation>Department of Computer Science, Indian Institute of Technology, Madras, Indian Institute of Technology, Madras</affiliation></author>
       <author><first>Krishnan</first><last>Karunganni</last></author>
       <author><first>Pratyush</first><last>Kumar</last><affiliation>Indian Institute of Technology Madras, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
-      <author><first>Mitesh</first><last>Khapra</last><affiliation>Indian Institute of Technology, Madras, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
+      <author id="mitesh-m-khapra"><first>Mitesh</first><last>Khapra</last><affiliation>Indian Institute of Technology, Madras, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
       <pages>10740-10782</pages>
       <abstract>We present INDICVOICES, a dataset of natural and spontaneous speech containing a total of 7348 hours of read (9%), extempore (74%) and conversational (17%) audio from 16237 speakers covering 145 Indian districts and 22 languages. Of these 7348 hours, 1639 hours have already been transcribed, with a median of 73 hours per language. Through this paper, we share our journey of capturing the cultural, linguistic and demographic diversity of India to create a one-of-its-kind inclusive and representative dataset. More specifically, we share an open-source blueprint for data collection at scale comprising of standardised protocols, centralised tools, a repository of engaging questions, prompts and conversation scenarios spanning multiple domains and topics of interest, quality control mechanisms, comprehensive transcription guidelines and transcription tools. We hope that this open source blueprint will serve as a comprehensive starter kit for data collection efforts in other multilingual regions of the world. Using INDICVOICES, we build IndicASR, the first ASR model to support all the 22 languages listed in the 8th schedule of the Constitution of India.</abstract>
       <url hash="ecaa3fe5">2024.findings-acl.639</url>
@@ -14908,7 +14908,7 @@
       <author><first>Menglong</first><last>Cui</last></author>
       <author><first>Jiangcun</first><last>Du</last></author>
       <author><first>Shaolin</first><last>Zhu</last><affiliation>Tianjin University</affiliation></author>
-      <author><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
       <pages>10885-10897</pages>
       <abstract>Large language models (LLMs) exhibit outstanding performance in machine translation via in-context learning. In contrast to sentence-level translation, document-level translation (DOCMT) by LLMs based on in-context learning faces two major challenges: firstly, document translations generated by LLMs are often incoherent; secondly, the length of demonstration for in-context learning is usually limited. To address these issues, we propose a Context-Aware Prompting method (CAP), which enables LLMs to generate more accurate, cohesive, and coherent translations via in-context learning. CAP takes into account multi-level attention, selects the most relevant sentences to the current one as context, and then generates a summary from these collected sentences. Subsequently, sentences most similar to the summary are retrieved from the datastore as demonstrations, which effectively guide LLMs in generating cohesive and coherent translations. We conduct extensive experiments across various DOCMT tasks, and the results demonstrate the effectiveness of our approach, particularly in zero pronoun translation (ZPT) and literary translation tasks.</abstract>
       <url hash="42bb68c9">2024.findings-acl.646</url>
@@ -14945,7 +14945,7 @@
       <title>Understanding Cross-Lingual <fixed-case>A</fixed-case>lignment—<fixed-case>A</fixed-case> Survey</title>
       <author><first>Katharina</first><last>Hämmerl</last><affiliation>CIS, LMU Munich</affiliation></author>
       <author><first>Jindřich</first><last>Libovický</last><affiliation>Charles University Prague</affiliation></author>
-      <author><first>Alexander</first><last>Fraser</last><affiliation>Technical University of Munich</affiliation></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last><affiliation>Technical University of Munich</affiliation></author>
       <pages>10922-10943</pages>
       <abstract>Cross-lingual alignment, the meaningful similarity of representations across languages in multilingual language models, has been an active field of research in recent years. We survey the literature of techniques to improve cross-lingual alignment, providing a taxonomy of methods and summarising insights from throughout the field. We present different understandings of cross-lingual alignment and their limitations. We provide a qualitative summary of results from a number of surveyed papers. Finally, we discuss how these insights may be applied not only to encoder models, where this topic has been heavily studied, but also to encoder-decoder or even decoder-only models, and argue that an effective trade-off between language-neutral and language-specific information is key.</abstract>
       <url hash="870eae9a">2024.findings-acl.649</url>
@@ -14973,7 +14973,7 @@
       <author><first>Ming</first><last>Yan</last></author>
       <author><first>Ji</first><last>Zhang</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Fei</first><last>Huang</last><affiliation>Alibaba Group</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <pages>10960-10977</pages>
       <abstract>While Large language models (LLMs) have demonstrated considerable capabilities across various natural language tasks, they often fall short of the performance achieved by domain-specific state-of-the-art models. One potential approach to enhance domain-specific capabilities of LLMs involves fine-tuning them using corresponding datasets. However, this method can be both resource and time-intensive, and not applicable to closed-source commercial LLMs. In this paper, we propose Preference Adaptation for Enhancing Domain-specific Abilities of LLMs (PANDA), a method designed to augment the domain-specific capabilities of LLMs by leveraging insights from the response preference of expert models without requiring fine-tuning. Our experimental results reveal that PANDA significantly enhances the domain-specific ability of LLMs on text classification and interactive decision tasks. Moreover, LLM with PANDA even outperforms the expert model that being learned on 4 tasks of ScienceWorld. This finding highlights the potential of exploring tuning-free approaches to achieve weak-to-strong generalization.</abstract>
       <url hash="312e4953">2024.findings-acl.651</url>
@@ -14998,7 +14998,7 @@
     <paper id="653">
       <title>Knowledge-to-<fixed-case>SQL</fixed-case>: Enhancing <fixed-case>SQL</fixed-case> Generation with Data Expert <fixed-case>LLM</fixed-case></title>
       <author><first>Zijin</first><last>Hong</last></author>
-      <author id="zheng-yuan"><first>Zheng</first><last>Yuan</last></author>
+      <author><first>Zheng</first><last>Yuan</last></author>
       <author><first>Hao</first><last>Chen</last></author>
       <author><first>Qinggang</first><last>Zhang</last></author>
       <author><first>Feiran</first><last>Huang</last></author>
@@ -15082,7 +15082,7 @@
       <title>When is a Language Process a Language Model?</title>
       <author><first>Li</first><last>Du</last><affiliation>Johns Hopkins University</affiliation></author>
       <author><first>Holden</first><last>Lee</last><affiliation>Johns Hopkins University</affiliation></author>
-      <author><first>Jason</first><last>Eisner</last><affiliation>Microsoft and Johns Hopkins University</affiliation></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last><affiliation>Microsoft and Johns Hopkins University</affiliation></author>
       <author><first>Ryan</first><last>Cotterell</last><affiliation>Swiss Federal Institute of Technology</affiliation></author>
       <pages>11083-11094</pages>
       <abstract>A language model may be viewed as a <tex-math>\Sigma</tex-math>-valued stochastic process for some alphabet <tex-math>\Sigma</tex-math>.However, in some pathological situations, such a stochastic process may “leak” probability mass onto the set of infinite strings and hence is not equivalent to the conventional view of a language model as a distribution over ordinary (finite) strings.Such ill-behaved language processes are referred to as *non-tight* in the literature.In this work, we study conditions of tightness through the lens of stochastic processes.In particular, by regarding the symbol as marking a stopping time and using results from martingale theory, we give characterizations of tightness that generalize our previous work [(Du et al. 2023)](https://arxiv.org/abs/2212.10502).</abstract>
@@ -15119,7 +15119,7 @@
       <author><first>Wenjie</first><last>Wang</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Liqiang</first><last>Nie</last><affiliation>Harbin Institute of Technology (Shenzhen)</affiliation></author>
       <author><first>Wenjie</first><last>Li</last><affiliation>The Hong Kong Polytechnic University, The Hong Kong Polytechnic University</affiliation></author>
-      <author><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
       <pages>11119-11129</pages>
       <abstract>Generative retrieval is a promising new paradigm in text retrieval that generates identifier strings of relevant passages as the retrieval target. This paradigm leverages powerful generative language models, distinct from traditional sparse or dense retrieval methods. In this work, we identify a viable direction to further enhance generative retrieval via distillation and propose a feasible framework, named DGR. DGR utilizes sophisticated ranking models, such as the cross-encoder, in a teacher role to supply a passage rank list, which captures the varying relevance degrees of passages instead of binary hard labels; subsequently, DGR employs a specially designed distilled RankNet loss to optimize the generative retrieval model, considering the passage rank order provided by the teacher model as labels. This framework only requires an additional distillation step to enhance current generative retrieval systems and does not add any burden to the inference stage. We conduct experiments on four public datasets, and the results indicate that DGR achieves state-of-the-art performance among the generative retrieval methods. Additionally, DGR demonstrates exceptional robustness and generalizability with various teacher models and distillation losses.</abstract>
       <url hash="24956c06">2024.findings-acl.662</url>
@@ -15131,7 +15131,7 @@
       <author><first>Krishanu</first><last>Maity</last></author>
       <author><first>A.S.</first><last>Poornash</last></author>
       <author><first>Sriparna</first><last>Saha</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>11130-11142</pages>
       <abstract>In an era of rapidly evolving internet technology, the surge in multimodal content, including videos, has expanded the horizons of online communication. However, the detection of toxic content in this diverse landscape, particularly in low-resource code-mixed languages, remains a critical challenge. While substantial research has addressed toxic content detection in textual data, the realm of video content, especially in non-English languages, has been relatively underexplored. This paper addresses this research gap by introducing a benchmark dataset, the first of its kind, consisting of 931 videos with 4021 code-mixed Hindi-English utterances collected from YouTube. Each utterance within this dataset has been meticulously annotated for toxicity, severity, and sentiment labels. We have developed an advanced Multimodal Multitask framework built for Toxicity detection in Video Content by leveraging Language Models (LMs), crafted for the primary objective along with the additional tasks of conducting sentiment and severity analysis. ToxVidLM incorporates three key modules – the Encoder module, Cross-Modal Synchronization module, and Multitask module – crafting a generic multimodal LM customized for intricate video classification tasks. Our experiments reveal that incorporating multiple modalities from the videos substantially enhances the performance of toxic content detection by achieving an Accuracy and Weighted F1 score of 94.29% and 94.35%, respectively.</abstract>
       <url hash="f71b458e">2024.findings-acl.663</url>
@@ -15148,7 +15148,7 @@
       <author><first>Peng</first><last>Li</last><affiliation>Tsinghua University</affiliation></author>
       <author><first>Zhiyuan</first><last>Liu</last><affiliation>Tsinghua University</affiliation></author>
       <author><first>Maosong</first><last>Sun</last></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <pages>11143-11156</pages>
       <abstract>Large Language Models (LLMs) have witnessed remarkable advancements in recent years, prompting the exploration of tool learning, which integrates LLMs with external tools to address diverse real-world challenges. Assessing the capability of LLMs to utilise tools necessitates large-scale and stable benchmarks. However, previous works relied on either hand-crafted online tools with limited scale, or large-scale real online APIs suffering from instability of API status. To address this problem, we introduce StableToolBench, a benchmark evolving from ToolBench, proposing a virtual API server and stable evaluation system. The virtual API server contains a caching system and API simulators which are complementary to alleviate the change in API status. Meanwhile, the stable evaluation system designs solvable pass and win rates using GPT-4 as the automatic evaluator to eliminate the randomness during evaluation. Experimental results demonstrate the stability of StableToolBench, and further discuss the effectiveness of API simulators, the caching system, and the evaluator system.</abstract>
       <url hash="70d2d095">2024.findings-acl.664</url>
@@ -15244,7 +15244,7 @@
       <author><first>Hai</first><last>Zhao</last><affiliation>Shanghai Jiao Tong University</affiliation></author>
       <author><first>Yeyun</first><last>Gong</last></author>
       <author><first>Nan</first><last>Duan</last><affiliation>Microsoft Research Asia</affiliation></author>
-      <author><first>Timothy</first><last>Baldwin</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and The University of Melbourne</affiliation></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and The University of Melbourne</affiliation></author>
       <pages>11260-11285</pages>
       <abstract>As the capabilities of large language models (LLMs) continue to advance, evaluating their performance is becoming more important and more challenging. This paper aims to address this issue for Mandarin Chinese in the form of CMMLU, a comprehensive Chinese benchmark that covers various subjects, including natural sciences, social sciences, engineering, and the humanities. We conduct a thorough evaluation of more than 20 contemporary multilingual and Chinese LLMs, assessing their performance across different subjects and settings. The results reveal that most existing LLMs struggle to achieve an accuracy of even 60%, which is the pass mark for Chinese exams. This highlights that there is substantial room for improvement in the capabilities of LLMs. Additionally, we conduct extensive experiments to identify factors impacting the models’ performance and propose directions for enhancing LLMs. CMMLU fills the gap in evaluating the knowledge and reasoning capabilities of large language models for Chinese.</abstract>
       <url hash="b25adb84">2024.findings-acl.671</url>
@@ -15281,7 +15281,7 @@
     <paper id="674">
       <title>Less is <fixed-case>KEN</fixed-case>: a Universal and Simple Non-Parametric Pruning Algorithm for Large Language Models</title>
       <author><first>Michele</first><last>Mastromattei</last><affiliation>Campus Bio-Medico University of Rome</affiliation></author>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last><affiliation>University of Rome Tor Vergata</affiliation></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last><affiliation>University of Rome Tor Vergata</affiliation></author>
       <pages>11361-11374</pages>
       <url hash="1ca89a79">2024.findings-acl.674</url>
       <bibkey>mastromattei-zanzotto-2024-less</bibkey>
@@ -15292,7 +15292,7 @@
       <author><first>Shiyu</first><last>Ni</last><affiliation>Institute of Computing Technology, Chinese Academy of Sciences</affiliation></author>
       <author><first>Keping</first><last>Bi</last><affiliation>Chinese Academy of Sciences</affiliation></author>
       <author><first>Jiafeng</first><last>Guo</last><affiliation>Institute of Computing Technolgy, Chinese Academy of Sciences</affiliation></author>
-      <author><first>Xueqi</first><last>Cheng</last><affiliation>, Chinese Academy of Sciences</affiliation></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last><affiliation>, Chinese Academy of Sciences</affiliation></author>
       <pages>11375-11388</pages>
       <abstract>Large Language Models (LLMs) have been found to have difficulty knowing they do not possess certain knowledge and tend to provide specious answers in such cases. Retrieval Augmentation (RA) has been extensively studied to mitigate LLMs’ hallucinations. However, due to the extra overhead and unassured quality of retrieval, it may not be optimal to conduct RA all the time. A straightforward idea is to only conduct retrieval when LLMs are uncertain about a question. This motivates us to enhance the LLMs’ ability to perceive their knowledge boundaries to help RA. In this paper, we first quantitatively measure LLMs’ such ability and confirm their overconfidence. Then, we study how LLMs’ certainty about a question correlates with their dependence on external retrieved information. We propose several methods to enhance LLMs’ perception of knowledge boundaries and show that they are effective in reducing overconfidence. Additionally, equipped with these methods, LLMs can achieve comparable or even better performance of RA with much fewer retrieval calls.</abstract>
       <url hash="f11157ba">2024.findings-acl.675</url>
@@ -15652,7 +15652,7 @@
       <author><first>Yukun</first><last>Yan</last></author>
       <author><first>Zhenghao</first><last>Liu</last><affiliation>Northeastern University</affiliation></author>
       <author><first>Zhixing</first><last>Tan</last><affiliation>Zhongguancun Laboratory</affiliation></author>
-      <author><first>Pengyuan</first><last>Liu</last><affiliation>Beijing Language and Culture University</affiliation></author>
+      <author id="pengyuan-liu"><first>Pengyuan</first><last>Liu</last><affiliation>Beijing Language and Culture University</affiliation></author>
       <author><first>Dong</first><last>Yu</last></author>
       <author><first>Zhiyuan</first><last>Liu</last><affiliation>Tsinghua University</affiliation></author>
       <author><first>Xiaodong</first><last>Shi</last><affiliation>Xiamen University, Tsinghua University</affiliation></author>
@@ -15688,7 +15688,7 @@
       <author><first>Tingting</first><last>Cui</last></author>
       <author><first>Xiaoqing</first><last>Cheng</last><affiliation>Zhengzhou University</affiliation></author>
       <author><first>Liutao</first><last>Liutao</last></author>
-      <author><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
       <pages>11817-11837</pages>
       <abstract>What a large language model (LLM) would respond in ethically relevant context? In this paper, we curate a large benchmark CMoralEval for morality evaluation of Chinese LLMs. The data sources of CMoralEval are two-fold: 1) a Chinese TV program discussing Chinese moral norms with stories from the society and 2) a collection of Chinese moral anomies from various newspapers and academic papers on morality. With these sources, we aim to create a moral evaluation dataset characterized by diversity and authenticity. We develop a morality taxonomy and a set of fundamental moral principles that are not only rooted in traditional Chinese culture but also consistent with contemporary societal norms. To facilitate efficient construction and annotation of instances in CMoralEval, we establish a platform with AI-assisted instance generation to streamline the annotation process. These help us curate CMoralEval that encompasses both explicit moral scenarios (14,964 instances) and moral dilemma scenarios (15,424 instances), each with instances from different data sources. We conduct extensive experiments with CMoralEval to examine a variety of Chinese LLMs. Experiment results demonstrate that CMoralEval is a challenging benchmark for Chinese LLMs.</abstract>
       <url hash="00d4ffe7">2024.findings-acl.703</url>
@@ -15796,7 +15796,7 @@
       <title>Prompting open-source and commercial language models for grammatical error correction of <fixed-case>E</fixed-case>nglish learner text</title>
       <author><first>Christopher</first><last>Davis</last><affiliation>University of Cambridge</affiliation></author>
       <author><first>Andrew</first><last>Caines</last><affiliation>University of Cambridge</affiliation></author>
-      <author><first>Øistein E.</first><last>Andersen</last><affiliation>Computer Laboratory</affiliation></author>
+      <author id="oistein-e-andersen"><first>Øistein E.</first><last>Andersen</last><affiliation>Computer Laboratory</affiliation></author>
       <author><first>Shiva</first><last>Taslimipoor</last><affiliation>University of Cambridge</affiliation></author>
       <author><first>Helen</first><last>Yannakoudakis</last><affiliation>Computer Laboratory, University of Cambridge and King’s College London</affiliation></author>
       <author id="zheng-yuan-cambridge"><first>Zheng</first><last>Yuan</last><affiliation>King’s College London, University of London</affiliation></author>
@@ -15906,7 +15906,7 @@
       <author><first>Pavan</first><last>Tankala</last></author>
       <author><first>Rudra</first><last>Murthy</last><affiliation>IBM India Ltd</affiliation></author>
       <author><first>Raj</first><last>Dabre</last><affiliation>National Institute of Information and Communications Technology (NICT), National Institute of Advanced Industrial Science and Technology</affiliation></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
       <pages>12075-12097</pages>
       <abstract>LLMs have demonstrated remarkable capability for understanding semantics, but their understanding of pragmatics is not well studied. To this end, we release a Pragmatics Understanding Benchmark (PUB) dataset consisting of fourteen tasks in four pragmatics phenomena, namely; Implicature, Presupposition, Reference, and Deixis. We curate high-quality test sets for each task, consisting of Multiple Choice Question Answers (MCQA). PUB includes a total of 28k data points, 6.1k are newly annotated. We evaluate nine models varying in the number of parameters and type of training. Our study reveals several key observations about the pragmatic capabilities of LLMs: 1. chat-fine-tuning strongly benefits smaller models, 2. large base models are competitive with their chat-fine-tuned counterparts, 3. there is a huge variance in performance across different pragmatics phenomena, and 4. a noticeable performance gap between human capabilities and model capabilities. We hope that PUB will enable comprehensive evaluation of LLM’s pragmatic reasoning capabilities.</abstract>
       <url hash="1a6feaa7">2024.findings-acl.719</url>
@@ -15994,10 +15994,10 @@
       <author><first>Renren</first><last>Jin</last></author>
       <author><first>Jiangcun</first><last>Du</last></author>
       <author><first>Wuwei</first><last>Huang</last></author>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last><affiliation>xiaomi</affiliation></author>
+      <author><first>Wei</first><last>Liu</last><affiliation>xiaomi</affiliation></author>
       <author><first>Jian</first><last>Luan</last></author>
       <author><first>Bin</first><last>Wang</last><affiliation>AI Lab, Xiaomi Inc.</affiliation></author>
-      <author><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
       <pages>12186-12215</pages>
       <abstract>Increasing the number of parameters in large language models (LLMs) usually improves performance in downstream tasks but raises compute and memory costs, making deployment difficult in resource-limited settings. Quantization techniques, which reduce the bits needed for model weights or activations with minimal performance loss, have become popular due to the rise of LLMs. However, most quantization studies use pre-trained LLMs, and the impact of quantization on instruction-tuned LLMs and the relationship between perplexity and benchmark performance of quantized LLMs are not well understood. Evaluation of quantized LLMs is often limited to language modeling and a few classification tasks, leaving their performance on other benchmarks unclear. To address these gaps, we propose a structured evaluation framework consisting of three critical dimensions: (1) knowledge &amp; capacity, (2) alignment, and (3) efficiency, and conduct extensive experiments across ten diverse benchmarks. Our experimental results indicate that LLMs with 4-bit quantization can retain performance comparable to their non-quantized counterparts, and perplexity can serve as a proxy metric for quantized LLMs on most benchmarks. Furthermore, quantized LLMs with larger parameter scales can outperform smaller LLMs. Despite the memory savings achieved through quantization, it can also slow down the inference speed of LLMs. Consequently, substantial engineering efforts and hardware support are imperative to achieve a balanced optimization of decoding speed and memory consumption in the context of quantized LLMs.</abstract>
       <url hash="9a624c12">2024.findings-acl.726</url>
@@ -16074,7 +16074,7 @@
       <title>Large Language Models are Few-Shot Training Example Generators: A Case Study in Fallacy Recognition</title>
       <author><first>Tariq</first><last>Alhindi</last></author>
       <author><first>Smaranda</first><last>Muresan</last><affiliation>Amazon and Columbia University</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <pages>12323-12334</pages>
       <abstract>Recognizing fallacies is crucial for ensuring the quality and validity of arguments across various domains. However, computational fallacy recognition faces challenges due to the diverse genres, domains, and types of fallacies found in datasets. This leads to a highly multi-class, and even multi-label, setup with substantial class imbalance. In this study, we aim to enhance existing models for fallacy recognition by incorporating additional context and by leveraging large language models to generate synthetic data, thus increasing the representation of the infrequent classes. We experiment with GPT3.5 to generate synthetic examples and we examine the impact of prompt settings for this. Moreover, we explore zero-shot and few-shot scenarios to evaluate the effectiveness of using the generated examples for training smaller models within a unified fallacy recognition framework. Furthermore, we analyze the overlap between the synthetic data and existing fallacy datasets. Finally, we investigate the usefulness of providing supplementary context for detecting fallacy types that need such context, e.g., diversion fallacies. Our evaluation results demonstrate consistent improvements across fallacy types, datasets, and generators. The code and the synthetic datasets are all publicly available.</abstract>
       <url hash="f2dc2138">2024.findings-acl.732</url>
@@ -16122,7 +16122,7 @@
       <author><first>Lin</first><last>Gui</last><affiliation>King’s College London, University of London</affiliation></author>
       <author><first>Min</first><last>Yang</last><affiliation>Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Chinese Academy of Sciences</affiliation></author>
       <author><first>Yue</first><last>Yu</last><affiliation>National University of Defense Technology and PengCheng Lab</affiliation></author>
-      <author><first>Kam-Fai</first><last>Wong</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <author><first>Ruifeng</first><last>Xu</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <pages>12373-12387</pages>
       <abstract>Stance detection is a challenging task that aims to identify public opinion from social media platforms with respect to specific targets. Previous work on stance detection largely focused on pure texts. In this paper, we study multi-modal stance detection for tweets consisting of texts and images, which are prevalent in today’s fast-growing social media platforms where people often post multi-modal messages. To this end, we create five new multi-modal stance detection datasets of different domains based on Twitter, in which each example consists of a text and an image. In addition, we propose a simple yet effective Targeted Multi-modal Prompt Tuning framework (TMPT), where target information is leveraged to learn multi-modal stance features from textual and visual modalities. Experimental results on our five benchmark datasets show that the proposed TMPT achieves state-of-the-art performance in multi-modal stance detection.</abstract>
@@ -16202,7 +16202,7 @@
       <title>Enhancing Idiomatic Representation in Multiple Languages via an Adaptive Contrastive Triplet Loss</title>
       <author><first>Wei</first><last>He</last><affiliation>University of Sheffield</affiliation></author>
       <author><first>Marco</first><last>Idiart</last><affiliation>Universidade Federal do Rio Grande do Sul</affiliation></author>
-      <author><first>Carolina</first><last>Scarton</last><affiliation>University of Sheffield</affiliation></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last><affiliation>University of Sheffield</affiliation></author>
       <author><first>Aline</first><last>Villavicencio</last><affiliation>University of Exeter and University of Sheffield</affiliation></author>
       <pages>12473-12485</pages>
       <abstract>Accurately modeling idiomatic or non-compositional language has been a longstanding challenge in Natural Language Processing (NLP). This is partly because these expressions do not derive their meanings solely from their constituent words, but also due to the scarcity of relevant data resources, and their impact on the performance of downstream tasks such as machine translation and simplification. In this paper we propose an approach to model idiomaticity effectively using a triplet loss that incorporates the asymmetric contribution of components words to an idiomatic meaning for training language models by using adaptive contrastive learning and resampling miners to build an idiomatic-aware learning objective. Our proposed method is evaluated on a SemEval challenge and outperforms previous alternatives significantly in many metrics.</abstract>
@@ -16350,7 +16350,7 @@
     </paper>
     <paper id="752">
       <title>Strong hallucinations from negation and how to fix them</title>
-      <author><first>Nicholas</first><last>Asher</last></author>
+      <author id="nicholas-asher"><first>Nicholas</first><last>Asher</last></author>
       <author><first>Swarnadeep</first><last>Bhar</last></author>
       <pages>12670-12687</pages>
       <abstract>Despite great performance on many tasks, language models (LMs) still struggle with reasoning, sometimes providing responses that cannot possibly be true because they stem from logical incoherence. We call such responses strong hallucinations and prove that they follow from an LM’s computation of its internal representations for logical operators and outputs from those representations. Focusing on negation, we provide a novel solution in which negation is treated not as another element of a latent representation, but as an operation over an LM’s latent representations that constrains how they may evolve. We show that our approach improves model performance in cloze prompting and natural language inference tasks with negation without requiring training on sparse negative data.</abstract>
@@ -16361,7 +16361,7 @@
     <paper id="753">
       <title><fixed-case>LLM</fixed-case>s as Narcissistic Evaluators: When Ego Inflates Evaluation Scores</title>
       <author><first>Yiqi</first><last>Liu</last><affiliation>University of Manchester</affiliation></author>
-      <author><first>Nafise</first><last>Moosavi</last><affiliation>University of Sheffield</affiliation></author>
+      <author id="nafise-sadat-moosavi"><first>Nafise</first><last>Moosavi</last><affiliation>University of Sheffield</affiliation></author>
       <author><first>Chenghua</first><last>Lin</last><affiliation>University of Manchester</affiliation></author>
       <pages>12688-12701</pages>
       <abstract>Automatic evaluation of generated textual content presents an ongoing challenge within the field of NLP. Given the impressive capabilities of modern language models (LMs) across diverse NLP tasks, there is a growing trend to employ these models in creating innovative evaluation metrics for automated assessment of generation tasks. This paper investigates a pivotal question: Do language model-driven evaluation metrics inherently exhibit bias favoring texts generated by the same underlying language model? Specifically, we assess whether prominent LM-based evaluation metrics (e.g. BARTScore, T5Score, and GPTScore) demonstrate a favorable bias toward their respective underlying LMs in the context of summarization tasks. Our findings unveil a latent bias, particularly pronounced when such evaluation metrics are used in a reference-free manner without leveraging gold summaries. These results underscore that assessments provided by generative evaluation models can be influenced by factors beyond the inherent text quality, highlighting the necessity of developing more reliable evaluation protocols in the future.</abstract>
@@ -16425,7 +16425,7 @@
       <author><first>Zihan</first><last>Xue</last><affiliation>University of California, Los Angeles</affiliation></author>
       <author><first>Nilay</first><last>Pochhi</last></author>
       <author><first>Sahil</first><last>Bansal</last><affiliation>International Business Machines</affiliation></author>
-      <author><first>Prem</first><last>Natarajan</last><affiliation>Amazon/Alexa</affiliation></author>
+      <author id="prem-natarajan"><first>Prem</first><last>Natarajan</last><affiliation>Amazon/Alexa</affiliation></author>
       <author><first>Jayanth</first><last>Srinivasa</last></author>
       <author><first>Nanyun</first><last>Peng</last><affiliation>University of California, Los Angeles</affiliation></author>
       <pages>12769-12781</pages>
@@ -16439,7 +16439,7 @@
       <author><first>I-Hung</first><last>Hsu</last></author>
       <author><first>Zifeng</first><last>Wang</last><affiliation>Google</affiliation></author>
       <author><first>Long</first><last>Le</last><affiliation>Google</affiliation></author>
-      <author><first>Lesly</first><last>Miculicich</last><affiliation>Google</affiliation></author>
+      <author id="lesly-miculicich-werlen"><first>Lesly</first><last>Miculicich</last><affiliation>Google</affiliation></author>
       <author><first>Nanyun</first><last>Peng</last><affiliation>University of California, Los Angeles</affiliation></author>
       <author><first>Chen-Yu</first><last>Lee</last><affiliation>Google</affiliation></author>
       <author><first>Tomas</first><last>Pfister</last><affiliation>Google</affiliation></author>
@@ -16456,7 +16456,7 @@
       <author><first>Tanmay</first><last>Parekh</last></author>
       <author><first>Zhiyu</first><last>Xie</last></author>
       <author><first>Zixuan</first><last>Zhang</last></author>
-      <author><first>Prem</first><last>Natarajan</last><affiliation>Amazon/Alexa</affiliation></author>
+      <author id="prem-natarajan"><first>Prem</first><last>Natarajan</last><affiliation>Amazon/Alexa</affiliation></author>
       <author><first>Kai-Wei</first><last>Chang</last><affiliation>University of California, Los Angeles</affiliation></author>
       <author><first>Nanyun</first><last>Peng</last><affiliation>University of California, Los Angeles</affiliation></author>
       <author><first>Heng</first><last>Ji</last><affiliation>University of Illinois, Urbana-Champaign</affiliation></author>
@@ -16473,7 +16473,7 @@
       <author><first>Michael</first><last>Madaio</last><affiliation>Google</affiliation></author>
       <author><first>Hal</first><last>Daumé Iii</last><affiliation>University of Maryland - College Park, University of Maryland, College Park and Microsoft</affiliation></author>
       <author><first>Christina</first><last>Harrington</last><affiliation>Google</affiliation></author>
-      <author><first>Hanna</first><last>Wallach</last><affiliation>Microsoft</affiliation></author>
+      <author id="hanna-wallach"><first>Hanna</first><last>Wallach</last><affiliation>Microsoft</affiliation></author>
       <pages>12826-12833</pages>
       <abstract>This paper examines the experiences of African American Language (AAL) speakers when using language technologies. Previous work has used quantitative methods to uncover performance disparities between AAL speakers and White Mainstream English speakers when using language technologies, but has not sought to understand the impacts of these performance disparities on AAL speakers. Through interviews with 19 AAL speakers, we focus on understanding such impacts in a contextualized and human-centered manner. We find that AAL speakers often undertake invisible labor of adapting their speech patterns to successfully use language technologies, and they make connections between failures of language technologies for AAL speakers and a lack of inclusion of AAL speakers in language technology design processes and datasets. Our findings suggest that NLP researchers and practitioners should invest in developing contextualized and human-centered evaluations of language technologies that seek to understand the impacts of performance disparities on speakers of underrepresented languages and language varieties.</abstract>
       <url hash="7088da3c">2024.findings-acl.761</url>
@@ -16486,7 +16486,7 @@
       <author><first>Ge</first><last>Zhang</last></author>
       <author><first>Tianhao</first><last>Shen</last></author>
       <author><first>Xueling</first><last>Liu</last></author>
-      <author><first>Bill Yuchen</first><last>Lin</last></author>
+      <author id="bill-yuchen-lin"><first>Bill Yuchen</first><last>Lin</last></author>
       <author><first>Jie</first><last>Fu</last><affiliation>Hong Kong University of Science and Technology</affiliation></author>
       <author><first>Wenhu</first><last>Chen</last><affiliation>University of Waterloo and Google</affiliation></author>
       <author><first>Xiang</first><last>Yue</last><affiliation>Carnegie Mellon University</affiliation></author>
@@ -16562,7 +16562,7 @@
       <author><first>Tejas</first><last>Srinivasan</last><affiliation>University of Southern California</affiliation></author>
       <author><first>Jack</first><last>Hessel</last><affiliation>Samaya AI</affiliation></author>
       <author><first>Tanmay</first><last>Gupta</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
-      <author><first>Bill Yuchen</first><last>Lin</last></author>
+      <author id="bill-yuchen-lin"><first>Bill Yuchen</first><last>Lin</last></author>
       <author><first>Yejin</first><last>Choi</last><affiliation>Department of Computer Science, University of Washington</affiliation></author>
       <author><first>Jesse</first><last>Thomason</last><affiliation>University of Southern California and Amazon</affiliation></author>
       <author><first>Khyathi</first><last>Chandu</last></author>
@@ -16612,7 +16612,7 @@
       <author><first>Mir Tafseer</first><last>Nayeem</last></author>
       <author><first>Samsul</first><last>Islam</last></author>
       <author><first>Abu Ubaida</first><last>Akash</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <pages>12991-13024</pages>
       <abstract>Millions of news articles published online daily can overwhelm readers. Headlines and entity (topic) tags are essential for guiding readers to decide if the content is worth their time. While headline generation has been extensively studied, tag generation remains largely unexplored, yet it offers readers better access to topics of interest. The need for conciseness in capturing readers’ attention necessitates improved content selection strategies for identifying salient and relevant segments within lengthy articles, thereby guiding language models effectively. To address this, we propose to leverage auxiliary information such as images and captions embedded in the articles to retrieve relevant sentences and utilize instruction tuning with variations to generate both headlines and tags for news articles in a multilingual context. To make use of the auxiliary information, we have compiled a dataset named XL-HeadTags, which includes 20 languages across 6 diverse language families. Through extensive evaluation, we demonstrate the effectiveness of our plug-and-play multimodal-multilingual retrievers for both tasks. Additionally, we have developed a suite of tools for processing and evaluating multilingual texts, significantly contributing to the research community by enabling more accurate and efficient analysis across languages.</abstract>
       <url hash="75cf664a">2024.findings-acl.771</url>
@@ -16628,7 +16628,7 @@
       <author><first>Sangwoo</first><last>Cho</last><affiliation>Capital One</affiliation></author>
       <author><first>Xiaoyang</first><last>Wang</last><affiliation>Tencent AI Lab</affiliation></author>
       <author><first>Xuansheng</first><last>Wu</last></author>
-      <author id="fei-liu"><first>Fei</first><last>Liu</last><affiliation>Emory University</affiliation></author>
+      <author><first>Fei</first><last>Liu</last><affiliation>Emory University</affiliation></author>
       <author><first>Pengfei</first><last>Liu</last></author>
       <author><first>Dong</first><last>Yu</last><affiliation>Tencent AI Lab</affiliation></author>
       <pages>13025-13048</pages>
@@ -16790,7 +16790,7 @@
       <title>Figuratively Speaking: Authorship Attribution via Multi-Task Figurative Language Modeling</title>
       <author><first>Gregorios</first><last>Katsios</last><affiliation>Rensselaer Polytechnic Institute</affiliation></author>
       <author><first>Ning</first><last>Sa</last><affiliation>Rensselaer Polytechnic Institute</affiliation></author>
-      <author><first>Tomek</first><last>Strzalkowski</last><affiliation>Rensselaer Polytechnic Institute</affiliation></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last><affiliation>Rensselaer Polytechnic Institute</affiliation></author>
       <pages>13240-13255</pages>
       <abstract>The identification of Figurative Language (FL) features in text is crucial for various Natural Language Processing (NLP) tasks, where understanding of the author’s intended meaning and its nuances is key for successful communication. At the same time, the use of a specific blend of various FL forms most accurately reflects a writer’s style, rather than the use of any single construct, such as just metaphors or irony. Thus, we postulate that FL features could play an important role in Authorship Attribution (AA) tasks. We believe that our is the first computational study of AA based on FL use. Accordingly, we propose a Multi-task Figurative Language Model (MFLM) that learns to detect multiple FL features in text at once. We demonstrate, through detailed evaluation across multiple test sets, that the our model tends to perform equally or outperform specialized binary models in FL detection. Subsequently, we evaluate the predictive capability of joint FL features towards the AA task on three datasets, observing improved AA performance through the integration of MFLM embeddings.</abstract>
       <url hash="7c0977f8">2024.findings-acl.784</url>
@@ -16892,7 +16892,7 @@
       <author><first>Tianyu</first><last>Zhu</last></author>
       <author><first>Zhan</first><last>Su</last></author>
       <author><first>Kaiyu</first><last>Huang</last><affiliation>Beijing Jiaotong University</affiliation></author>
-      <author><first>Jian-Yun</first><last>Nie</last><affiliation>University of Montreal</affiliation></author>
+      <author id="jian-yun-nie"><first>Jian-Yun</first><last>Nie</last><affiliation>University of Montreal</affiliation></author>
       <pages>13366-13378</pages>
       <abstract>Conversational search facilitates complex information retrieval by enabling multi-turn interactions between users and the system. Supporting such interactions requires a comprehensive understanding of the conversational inputs to formulate a good search query based on historical information. In particular, the search query should include the relevant information from the previous conversation turns.However, current approaches for conversational dense retrieval primarily rely on fine-tuning a pre-trained ad-hoc retriever using the whole conversational search session, which can be lengthy and noisy. Moreover, existing approaches are limited by the amount of manual supervision signals in the existing datasets.To address the aforementioned issues, we propose a **H**istory-**A**ware **Conv**ersational **D**ense **R**etrieval (HAConvDR) system, which incorporates two ideas: context-denoised query reformulation and automatic mining of supervision signals based on the actual impact of historical turns.Experiments on two public conversational search datasets demonstrate the improved history modeling capability of HAConvDR, in particular for long conversations with topic shifts.</abstract>
       <url hash="5968b712">2024.findings-acl.792</url>
@@ -17170,7 +17170,7 @@
     </paper>
     <paper id="813">
       <title><fixed-case>F</fixed-case>resh<fixed-case>LLM</fixed-case>s: Refreshing Large Language Models with Search Engine Augmentation</title>
-      <author><first>Tu</first><last>Vu</last><affiliation>Virginia Tech and Google</affiliation></author>
+      <author id="tu-vu"><first>Tu</first><last>Vu</last><affiliation>Virginia Tech and Google</affiliation></author>
       <author><first>Mohit</first><last>Iyyer</last><affiliation>University of Massachusetts Amherst</affiliation></author>
       <author><first>Xuezhi</first><last>Wang</last><affiliation>Google</affiliation></author>
       <author><first>Noah</first><last>Constant</last></author>
@@ -17179,8 +17179,8 @@
       <author><first>Chris</first><last>Tar</last></author>
       <author><first>Yun-Hsuan</first><last>Sung</last><affiliation>Google</affiliation></author>
       <author><first>Denny</first><last>Zhou</last><affiliation>Google DeepMind</affiliation></author>
-      <author><first>Quoc</first><last>Le</last><affiliation>Google</affiliation></author>
-      <author><first>Thang</first><last>Luong</last><affiliation>Google</affiliation></author>
+      <author id="quoc-le"><first>Quoc</first><last>Le</last><affiliation>Google</affiliation></author>
+      <author id="minh-thang-luong"><first>Thang</first><last>Luong</last><affiliation>Google</affiliation></author>
       <pages>13697-13720</pages>
       <abstract>Since most large language models (LLMs) are trained once and never updated, they struggle to dynamically adapt to our ever-changing world. In this work, we present FreshQA, a dynamic QA benchmark that tests a model’s ability to answer questions that may require reasoning over up-to-date world knowledge. We develop a two-mode human evaluation procedure to measure both correctness and hallucination, which we use to benchmark both closed and open-source LLMs by collecting &gt;50K human judgments. We observe that all LLMs struggle to answer questions that require fast-changing world knowledge as well as questions with false premises that need to be debunked. In response, we develop FreshPrompt, a few-shot prompting method that curates and organizes relevant information from a search engine into an LLM’s prompt. Our experiments show that FreshPrompt outperforms both competing search engine-augmented prompting methods such as Self-Ask (Press et al., 2022) as well as commercial systems such as Perplexity.AI. To facilitate future work, we additionally develop FreshEval, a reliable autorater for quick evaluation and comparison on FreshQA. Our latest results with FreshEval suggest that open-source LLMs such as Mixtral (Jiang et al., 2024), when combined with FreshPrompt, are competitive with closed-source and commercial systems on search-augmented QA.</abstract>
       <url hash="2335713c">2024.findings-acl.813</url>
@@ -17224,7 +17224,7 @@
       <author><first>Yingqian</first><last>Min</last></author>
       <author><first>Kun</first><last>Zhou</last><affiliation>Renmin University of China</affiliation></author>
       <author><first>Dawei</first><last>Gao</last><affiliation>Alibaba Group</affiliation></author>
-      <author><first>Xin</first><last>Zhao</last><affiliation>Renmin University of China</affiliation></author>
+      <author id="wayne-xin-zhao"><first>Xin</first><last>Zhao</last><affiliation>Renmin University of China</affiliation></author>
       <author><first>He</first><last>Hu</last><affiliation>Renmin University of China, Renmin University of China</affiliation></author>
       <author><first>Yaliang</first><last>Li</last><affiliation>Alibaba Group</affiliation></author>
       <pages>13748-13761</pages>
@@ -17312,7 +17312,7 @@
       <author><first>Zhen</first><last>Yu</last><affiliation>Tencent</affiliation></author>
       <author><first>Fei</first><last>Zhao</last><affiliation>Nanjing University</affiliation></author>
       <author><first>Shujian</first><last>Huang</last><affiliation>Nanjing University</affiliation></author>
-      <author><first>Xinyu</first><last>Dai</last><affiliation>Nanjing University</affiliation></author>
+      <author id="xinyu-dai"><first>Xinyu</first><last>Dai</last><affiliation>Nanjing University</affiliation></author>
       <pages>13857-13867</pages>
       <abstract>Text2SQL is a task that translates natural language into SQL statements. Context-dependent Text2SQL offers a more natural database interaction by simulating dialogues between users and databases, with CoSQL and SparC as representative datasets. Yet, these datasets struggle to accurately replicate real-world situations. To address this, we introduce MultiSQL, which extends them in three key aspects: (1) Diverse SQL Operations. We incorporate diverse SQL types such as Create, Update, and Insert to broaden the scope of SQL operations. (2) Schema-Integrated Context. We integrated query context with database schema dependencies to better depict database complexity. (3) Extended Dialogues. We expand dialogue length to better simulate long conversations and complex interactions. This multi-type, schema-integrated, context-dependent Text2SQL dataset comprises nearly 800 dialogue groups and over 9,000 interaction turns across 166 complex databases, offering a better benchmark for interactive user-database dialogue.Addressing MultiSQL’s challenges, we refined evaluation metrics to better capture diverse SQL types and schema dependencies. We designed a prompt framework that leverages historical data and self-refinement to accurately capture the dependency between text queries and database structures. Experiments with GPT-3.5, GPT-4, and LLaMA2-7B show both the effectiveness of our strategies and the challenges of MultiSQL. The datasets is available at https://github.com/grandchicken/MultiSQL.</abstract>
       <url hash="87069468">2024.findings-acl.823</url>
@@ -17350,7 +17350,7 @@
       <author><first>Gladys</first><last>Tyen</last><affiliation>University of Cambridge</affiliation></author>
       <author><first>Hassan</first><last>Mansoor</last><affiliation>Google</affiliation></author>
       <author><first>Victor</first><last>Carbune</last><affiliation>Google</affiliation></author>
-      <author><first>Peter</first><last>Chen</last><affiliation>Google</affiliation></author>
+      <author id="yuanzhu-peter-chen"><first>Peter</first><last>Chen</last><affiliation>Google</affiliation></author>
       <author><first>Tony</first><last>Mak</last><affiliation>Google</affiliation></author>
       <pages>13894-13908</pages>
       <abstract>While self-correction has shown promise in improving LLM outputs in terms of style and quality (e.g. Chen et al., 2023b; Madaan et al.,2023), recent attempts to self-correct logical or reasoning errors often cause correct answers to become incorrect, resulting in worse performances overall (Huang et al., 2023). In this paper, we show that poor self-correction performance stems from LLMs’ inability tofind logical mistakes, rather than their ability to correct a known mistake. Firstly, we benchmark several state-of-the-art LLMs ontheir mistake-finding ability and demonstrate that they generally struggle with the task, even in highly objective, unambiguous cases. Secondly, we test the correction abilities of LLMs – separately from mistake finding – using a backtracking setup that feeds ground truth mistake location information to the model. We show that this boosts downstream task performance across our 5 reasoning tasks, indicating that LLMs’ correction abilities are robust. Finally, we show that it is possible to obtain mistake location information without ground truth labels or in-domain training data. We train a small classifier with out-of-domain data, which exhibits stronger mistake-finding performance than prompting a large model. We release our dataset of LLM-generated logical mistakes, BIG-Bench Mistake, to enable further research into locating LLM reasoning mistakes.</abstract>
@@ -17367,7 +17367,7 @@
       <author><first>Cristina</first><last>Giannone</last></author>
       <author><first>Andrea</first><last>Favalli</last><affiliation>Almawave</affiliation></author>
       <author><first>Raniero</first><last>Romagnoli</last><affiliation>University of Roma “La Sapienza”</affiliation></author>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last><affiliation>University of Rome Tor Vergata</affiliation></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last><affiliation>University of Rome Tor Vergata</affiliation></author>
       <pages>13909-13920</pages>
       <abstract>Understanding textual description to generate code seems to be an achieved capability of instruction-following Large Language Models (LLMs) in zero-shot scenario. However, there is a severe possibility that this translation ability may be influenced by having seen target textual descriptions and the related code. This effect is known as Data Contamination.In this study, we investigate the impact of Data Contamination on the performance of GPT-3.5 in the Text-to-SQL code-generating tasks. Hence, we introduce a novel method to detect Data Contamination in GPTs and examine GPT-3.5’s Text-to-SQL performances using the known Spider Dataset and our new unfamiliar dataset Termite. Furthermore, we analyze GPT-3.5’s efficacy on databases with modified information via an adversarial table disconnection (ATD) approach, complicating Text-to-SQL tasks by removing structural pieces of information from the database. Our results indicate a significant performance drop in GPT-3.5 on the unfamiliar Termite dataset, even with ATD modifications, highlighting the effect of Data Contamination on LLMs in Text-to-SQL translation tasks.</abstract>
       <url hash="cbb712be">2024.findings-acl.827</url>
@@ -17393,7 +17393,7 @@
       <author><first>Mohanna</first><last>Hoveyda</last></author>
       <author><first>Arjen</first><last>Vries</last><affiliation>Institute for Computing and Information Sciences, Radboud University Nijmegen, Radboud University</affiliation></author>
       <author><first>Faegheh</first><last>Hasibi</last><affiliation>Radboud University</affiliation></author>
-      <author><first>Maarten</first><last>de Rijke</last><affiliation>University of Amsterdam</affiliation></author>
+      <author id="maarten-de-rijke"><first>Maarten</first><last>de Rijke</last><affiliation>University of Amsterdam</affiliation></author>
       <pages>13938-13946</pages>
       <abstract>Entity linking (EL) in conversations faces notable challenges in practical applications, primarily due to scarcity of entity-annotated conversational datasets and sparse knowledge bases (KB) containing domain-specific, long-tail entities. We designed targeted evaluation scenarios to measure the efficacy of EL models under resource constraints. Our evaluation employs two KBs: Fandom, exemplifying real-world EL complexities, and the widely used Wikipedia. First, we assess EL models’ ability to generalize to a new unfamiliar KB using Fandom and a novel zero-shot conversational entity linking dataset that we curated based on Reddit discussions on Fandom entities. We then evaluate the adaptability of EL models to conversational settings without prior training. Our results indicate that current zero-shot EL models falter when introduced to new, domain-specific KBs without prior training, significantly dropping in performance.Our findings reveal that previous evaluation approaches fall short of capturing real-world complexities for zero-shot EL, highlighting the necessity for new approaches to design and assess conversational EL models to adapt to limited resources. The evaluation frame-work and dataset proposed are tailored to facilitate this research.</abstract>
       <url hash="2ad6aa92">2024.findings-acl.829</url>
@@ -17467,7 +17467,7 @@
       <author><first>Hang</first><last>Yan</last><affiliation>AI lab</affiliation></author>
       <author><first>Tao</first><last>Gui</last><affiliation>Fudan University</affiliation></author>
       <author><first>Qi</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Xiaoling</first><last>Wang</last><affiliation>East China Normal University</affiliation></author>
       <pages>14024-14040</pages>
       <abstract>Generalizing to longer sentences is important for recent Transformer-based language models. Besides algorithms manipulating explicit position features, the success of Transformers without position encodings (NoPE) provides a new way to overcome the challenge. In this paper, we study the length generalization property of NoPE. We find that although NoPE can extend to longer sequences than the commonly used explicit position encodings, it still has a limited context length. We identify a connection between the failure of NoPE’s generalization and the distraction of attention distributions. We propose a parameter-efficient tuning for searching attention heads’ best temperature hyper-parameters, which substantially expands NoPE’s context size. Experiments on long sequence language modeling, the synthetic passkey retrieval task and real-world long context tasks show that NoPE can achieve competitive performances with state-of-the-art length generalization algorithms. The source code is publicly accessible</abstract>
@@ -17513,7 +17513,7 @@
       <author><first>Lixin</first><last>Zou</last><affiliation>School of Cyber Science and Engineering, Wuhan University</affiliation></author>
       <author><first>Dan</first><last>Luo</last><affiliation>Lehigh University</affiliation></author>
       <author><first>Xiangyang</first><last>Luo</last><affiliation>State Key Lab of Mathematical Engineering and Advanced Computing</affiliation></author>
-      <author id="zihao-li"><first>Zihao</first><last>Li</last><affiliation>Wuhan University</affiliation></author>
+      <author><first>Zihao</first><last>Li</last><affiliation>Wuhan University</affiliation></author>
       <author><first>Min</first><last>Tang</last><affiliation>Monash University</affiliation></author>
       <author><first>Chenliang</first><last>Li</last></author>
       <pages>14081-14094</pages>
@@ -17645,8 +17645,8 @@
       <title>Pushing the Limits of Zero-shot End-to-End Speech Translation</title>
       <author><first>Ioannis</first><last>Tsiamas</last></author>
       <author><first>Gerard I.</first><last>Gállego</last></author>
-      <author><first>José A. R.</first><last>Fonollosa</last></author>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="jose-a-r-fonollosa"><first>José A. R.</first><last>Fonollosa</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
       <pages>14245-14267</pages>
       <abstract>Data scarcity and the modality gap between the speech and text modalities are two major obstacles of end-to-end Speech Translation (ST) systems, thus hindering their performance. Prior work has attempted to mitigate these challenges by leveraging external MT data and optimizing distance metrics that bring closer the speech-text representations. However, achieving competitive results typically requires some ST data. For this reason, we introduce ZeroSwot, a method for zero-shot ST that bridges the modality gap without any paired ST data. Leveraging a novel CTC compression and Optimal Transport, we train a speech encoder using only ASR data, to align with the representation space of a massively multilingual MT model. The speech encoder seamlessly integrates with the MT model at inference, enabling direct translation from speech to text, across all languages supported by the MT model. Our experiments show that we can effectively close the modality gap without ST data, while our results on MuST-C and CoVoST demonstrate our method’s superiority over not only previous zero-shot models, but also supervised ones, achieving state-of-the-art results.</abstract>
       <url hash="4916d6f6">2024.findings-acl.847</url>
@@ -17722,7 +17722,7 @@
       <author><first>Zihe</first><last>Liu</last></author>
       <author><first>Xueqiang</first><last>Lyu</last><affiliation>Beijing Information Science And Technology University</affiliation></author>
       <author><first>Peng</first><last>Jin</last></author>
-      <author><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
       <pages>14367-14378</pages>
       <abstract>Temporal knowledge graph reasoning has emerged as a crucial task for answering time-dependent questions within a knowledge graph (KG).Despite tremendous progress, the present research is impeded by the sparsity of a temporal KG and an over-reliance on simple single-relational reasoning patterns. To overcome these challenges, we introduce MulQuestions, a new temporal KG reasoning benchmark featuring over 200k entities and 960k questions designed to facilitate complex, multi-relational and multi-hop reasoning. Additionally, we propose a new model adept at conducting pattern-aware and time-sensitive reasoning across temporal KGs. The model’s efficacy is confirmed through rigorous evaluations, showcasing its effectiveness in sparse data conditions and adeptness at handling questions with long reasoning chains. We have made our benchmark and model publicly accessible at [https://anonymous].</abstract>
       <url hash="7303c2ed">2024.findings-acl.853</url>
@@ -17773,7 +17773,7 @@
       <title>Part-of-speech Tagging for Extremely Low-resource <fixed-case>I</fixed-case>ndian Languages</title>
       <author><first>Sanjeev</first><last>Kumar</last></author>
       <author><first>Preethi</first><last>Jyothi</last><affiliation>Indian Institute of Technology Bombay</affiliation></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
       <pages>14422-14431</pages>
       <abstract>Modern natural language processing (NLP) systems thrive when given access to large datasets. However, a large fraction of the world’s languages are not privy to such benefits due to sparse documentation and inadequate digital representation. This is especially true for Indian regional languages. As a first step towards expanding the reach of NLP technologies to extremely low-resource Indian languages, we present a new parallel part-of-speech (POS) evaluation dataset for Angika, Magahi, Bhojpuri and Hindi. Angika, Magahi, Bhojpuri, along with the more well-known Hindi, are all languages spoken in the Indian states of Bihar, Jharkhand and West Bengal. Ours is notably the first NLP resource, even for a shallow NLP task like POS-tagging, for Angika. We establish POS-tagging baselines using state-of-the-art multilingual pretrained language models (PLMs) finetuned on Hindi data, and show zero-shot evaluations on the other three languages. While all four languages use the same Devanagari script, pretrained tokenizers underperform in zero-shot on the three languages. We propose a simple look-back fix to address the tokenization challenge yielding F1-score improvements of up to 8% on Angika and show how it comes very close to an oracle setting when the underlying Hindi word is known (and can be accurately tokenized).</abstract>
       <url hash="44f5bd21">2024.findings-acl.857</url>
@@ -17825,7 +17825,7 @@
       <author><first>Emaan</first><last>Abbas</last></author>
       <author><first>Abdul Hameed</first><last>Azeemi</last><affiliation>Lahore University of Management Sciences</affiliation></author>
       <author><first>Ihsan Ayyub</first><last>Qazi</last><affiliation>Lahore University of Management Sciences</affiliation></author>
-      <author><first>Agha Ali</first><last>Raza</last><affiliation>Lahore University of Management Sciences</affiliation></author>
+      <author id="agha-ali-raza"><first>Agha Ali</first><last>Raza</last><affiliation>Lahore University of Management Sciences</affiliation></author>
       <pages>14470-14480</pages>
       <abstract>Deepfakes, particularly in the auditory domain, have become a significant threat, necessitating the development of robust countermeasures. This paper addresses the escalating challenges posed by deepfake attacks on Automatic Speaker Verification (ASV) systems. We present a novel Urdu deepfake audio dataset for deepfake detection, focusing on two spoofing attacks – Tacotron and VITS TTS. The dataset construction involves careful consideration of phonemic cover and balance and comparison with existing corpora like PRUS and PronouncUR. Evaluation with AASIST-L model shows EERs of 0.495 and 0.524 for VITS TTS and Tacotron-generated audios, respectively, with variability across speakers. Further, this research implements a detailed human evaluation, incorporating a user study to gauge whether people are able to discern deepfake audios from real (bonafide) audios. The ROC curve analysis shows an area under the curve (AUC) of 0.63, indicating that individuals demonstrate a limited ability to detect deepfakes (approximately 1 in 3 fake audio samples are regarded as real). Our work contributes a valuable resource for training deepfake detection models in low-resource languages like Urdu, addressing the critical gap in existing datasets. The dataset is publicly available at: https://github.com/CSALT-LUMS/urdu-deepfake-dataset.</abstract>
       <url hash="5e2590d6">2024.findings-acl.861</url>
@@ -17920,7 +17920,7 @@
       <author><first>Meng</first><last>Han</last></author>
       <author><first>Ruofei</first><last>Lai</last></author>
       <author><first>Xinyu</first><last>Zhang</last><affiliation>Huawei Technologies Ltd.</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Zhongyu</first><last>Wei</last><affiliation>Fudan University</affiliation></author>
       <pages>14575-14595</pages>
       <abstract>How can we construct an automated debate judge to evaluate an extensive, vibrant, multi-turn debate? This task is challenging, as judging a debate involves grappling with lengthy texts, intricate argument relationships, and multi-dimensional assessments.At the same time, current research mainly focuses on short dialogues, rarely touching upon the evaluation of an entire debate.In this paper, by leveraging Large Language Models (LLMs), we propose Debatrix, which makes the analysis and assessment of multi-turn debates more aligned with majority preferences. Specifically, Debatrix features a vertical, iterative chronological analysis and a horizontal, multi-dimensional evaluation collaboration.To align with real-world debate scenarios, we introduced the PanelBench benchmark, comparing our system’s performance to actual debate outcomes.The findings indicate a notable enhancement over directly using LLMs for debate evaluation.Source code and benchmark data are available at https://github.com/ljcleo/debatrix.</abstract>
@@ -17946,7 +17946,7 @@
       <title>Towards a new research agenda for multimodal enterprise document understanding: What are we missing?</title>
       <author><first>Armineh</first><last>Nourbakhsh</last><affiliation>School of Computer Science, Carnegie Mellon University and J.P. Morgan Chase</affiliation></author>
       <author><first>Sameena</first><last>Shah</last><affiliation>J.P. Morgan Chase</affiliation></author>
-      <author><first>Carolyn</first><last>Rose</last><affiliation>School of Computer Science, Carnegie Mellon University</affiliation></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rose</last><affiliation>School of Computer Science, Carnegie Mellon University</affiliation></author>
       <pages>14610-14622</pages>
       <abstract>The field of multimodal document understanding has produced a suite of models that have achieved stellar performance across several tasks, even coming close to human performance on certain benchmarks. Nevertheless, the application of these models to real-world enterprise datasets remains constrained by a number of limitations. In this position paper, we discuss these limitations in the context of three key aspects of research: dataset curation, model development, and evaluation on downstream tasks. By analyzing 14 datasets and 7 SotA models, we identify major gaps in their utility in the context of a real-world scenario. We demonstrate how each limitation impedes the widespread use of SotA models in enterprise settings, and present a set of research challenges that are motivated by these limitations. Lastly, we propose a research agenda that is aimed at driving the field towards higher impact in enterprise applications.</abstract>
       <url hash="fc9e084e">2024.findings-acl.870</url>
@@ -17959,7 +17959,7 @@
       <author><first>Zhaochun</first><last>Ren</last><affiliation>Leiden University</affiliation></author>
       <author><first>Arian</first><last>Askari</last></author>
       <author><first>Mohammad</first><last>Aliannejadi</last><affiliation>University of Amsterdam</affiliation></author>
-      <author><first>Maarten</first><last>de Rijke</last><affiliation>University of Amsterdam</affiliation></author>
+      <author id="maarten-de-rijke"><first>Maarten</first><last>de Rijke</last><affiliation>University of Amsterdam</affiliation></author>
       <author><first>Suzan</first><last>Verberne</last><affiliation>Universiteit Leiden</affiliation></author>
       <pages>14623-14635</pages>
       <abstract>An important unexplored aspect in previous work on user satisfaction estimation for Task-Oriented Dialogue (TOD) systems is their evaluation in terms of robustness for the identification of user dissatisfaction: current benchmarks for user satisfaction estimation in TOD systems are highly skewed towards dialogues for which the user is satisfied. The effect of having a more balanced set of satisfaction labels on performance is unknown. However, balancing the data with more dissatisfactory dialogue samples requires further data collection and human annotation, which is costly and time-consuming. In this work, we leverage large language models (LLMs) and unlock their ability to generate satisfaction-aware counterfactual dialogues to augment the set of original dialogues of a test collection. We gather human annotations to ensure the reliability of the generated samples. We evaluate two open-source LLMs as user satisfaction estimators on our augmented collection against state-of-the-art fine-tuned models. Our experiments show that when used as few-shot user satisfaction estimators, open-source LLMs show higher robustness to the increase in the number of dissatisfaction labels in the test collection than the fine-tuned state-of-the-art models. Our results shed light on the need for data augmentation approaches for user satisfaction estimation in TOD systems. We release our aligned counterfactual dialogues, which are curated by human annotation, to facilitate further research on this topic.</abstract>
@@ -18077,7 +18077,7 @@
       <author><first>Zixia</first><last>Jia</last></author>
       <author><first>Mengmeng</first><last>Wang</last><affiliation>Beijing Institute for General Artificial Intelligence</affiliation></author>
       <author><first>Baichen</first><last>Tong</last><affiliation>Beijing Institute for General Artificial Intelligence</affiliation></author>
-      <author><first>Song-Chun</first><last>Zhu</last><affiliation>Beijing Institute for General Artificial Intelligence</affiliation></author>
+      <author id="song-chun-zhu"><first>Song-Chun</first><last>Zhu</last><affiliation>Beijing Institute for General Artificial Intelligence</affiliation></author>
       <author><first>Zilong</first><last>Zheng</last><affiliation>Beijing Institute for General Artificial Intelligence</affiliation></author>
       <pages>14778-14814</pages>
       <abstract>Recent advances in Large Language Models (LLMs) have shown inspiring achievements in constructing autonomous agents that rely onlanguage descriptions as inputs. However, it remains unclear how well LLMs can function as few-shot or zero-shot embodied agents in dynamic interactive environments. To address this gap, we introduce LangSuit·E, a versatile and simulation-free testbed featuring 6 representative embodied tasks in textual embodied worlds. Compared with previous LLM-based testbeds, LangSuit·E (i) offers adaptability to diverse environments without multiple simulation engines, (ii) evaluates agents’ capacity to develop “internalized world knowledge” with embodied observations, and (iii) allows easy customization of communication and action strategies. To address the embodiment challenge, we devise a novel chain-of-thought (CoT) schema, EmMem, which summarizes embodied states w.r.t. history information. Comprehensive benchmark results illustrate challenges and insights of embodied planning. LangSuit·E represents a significant step toward building embodied generalists in the context of language models.</abstract>
@@ -18093,7 +18093,7 @@
       <author><first>Peter</first><last>Zeng</last><affiliation>State University of New York at Stony Brook</affiliation></author>
       <author><first>Magdalena</first><last>Markowska</last></author>
       <author><first>Seyed Abolghasem</first><last>Mirroshandel</last><affiliation>University of Guilan</affiliation></author>
-      <author><first>Owen</first><last>Rambow</last><affiliation>Stony Brook University</affiliation></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last><affiliation>Stony Brook University</affiliation></author>
       <pages>14815-14823</pages>
       <abstract>Evaluating the theory of mind (ToM) capabilities of language models (LMs) has recently received a great deal of attention. However, many existing benchmarks rely on synthetic data, which risks misaligning the resulting experiments with human behavior. We introduce the first ToM dataset based on naturally occurring spoken dialogs, Common-ToM, and show that LMs struggle to demonstrate ToM. We then show that integrating a simple, explicit representation of beliefs improves LM performance on Common-ToM.</abstract>
       <url hash="25fa7dfa">2024.findings-acl.880</url>
@@ -18229,7 +18229,7 @@
       <author><first>Zifeng</first><last>Wang</last><affiliation>Google</affiliation></author>
       <author><first>Long</first><last>Le</last><affiliation>Google</affiliation></author>
       <author><first>Abhishek</first><last>Kumar</last><affiliation>Google DeepMind</affiliation></author>
-      <author><first>James</first><last>Glass</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
       <author><first>Alexander</first><last>Ratner</last><affiliation>Department of Computer Science, University of Washington</affiliation></author>
       <author><first>Chen-Yu</first><last>Lee</last><affiliation>Google</affiliation></author>
       <author><first>Ranjay</first><last>Krishna</last><affiliation>Department of Computer Science</affiliation></author>
@@ -18262,7 +18262,7 @@
       <author><first>Zander</first><last>Brumbaugh</last><affiliation>Department of Computer Science</affiliation></author>
       <author><first>Yizhong</first><last>Wang</last><affiliation>Department of Computer Science, University of Washington</affiliation></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last><affiliation>University of Washington, University of Washington, Allen Institute for Artificial Intelligence and University of Washington, Seattle</affiliation></author>
-      <author><first>Noah</first><last>Smith</last><affiliation>University of Washington and Allen Institute for Artificial Intelligence</affiliation></author>
+      <author id="noah-a-smith"><first>Noah</first><last>Smith</last><affiliation>University of Washington and Allen Institute for Artificial Intelligence</affiliation></author>
       <pages>15015-15040</pages>
       <abstract>Language models (LMs) are trained on web text originating from many points in time and, in general, without any explicit temporal grounding. This work investigates the temporal chaos of pretrained LMs and explores various methods to align their internal knowledge to a target time, which we call “temporal alignment.” To do this, we first automatically construct a dataset containing 20K time-sensitive questions and their answers for each year from 2000 to 2023. Based on this dataset, we empirically show that pretrained LMs (e.g., LLaMa2), despite having a recent pretraining cutoff (e.g., 2022), mostly answer questions using earlier knowledge (e.g., in 2019). We then develop several methods, from prompting to finetuning, to align LMs to use their most recent knowledge when answering questions, and investigate various factors in this alignment. Our experiments demonstrate that aligning LLaMa2 to the year 2022 can enhance its performance by up to 62% according to that year’s answers. This improvement occurs even without explicitly mentioning time information, indicating the possibility of aligning models’ internal sense of time after pretraining. Finally, we find that alignment to a historical time is also possible, with up to <tex-math>2.8\times</tex-math> the performance of the unaligned LM in 2010 if finetuning models to that year. These findings hint at the sophistication of LMs’ internal knowledge organization and the necessity of tuning them properly.</abstract>
       <url hash="ff732010">2024.findings-acl.892</url>
@@ -18468,7 +18468,7 @@
       <author><first>Portia</first><last>Botchway</last><affiliation>Vanderbilt University</affiliation></author>
       <author><first>Jessica</first><last>Quaye</last><affiliation>Harvard University</affiliation></author>
       <author><first>Chris</first><last>Brockett</last><affiliation>Microsoft</affiliation></author>
-      <author><first>Bill</first><last>Dolan</last></author>
+      <author id="william-b-dolan"><first>Bill</first><last>Dolan</last></author>
       <pages>15353-15368</pages>
       <abstract>Advancements in large language models (LLMs) are revolutionizing interactive game design, enabling dynamic plotlines and interactions between players and non-player characters (NPCs). However, LLMs may exhibit flaws such as hallucinations, forgetfulness, or misinterpretations of prompts, causing logical inconsistencies and unexpected deviations from intended designs. Automated techniques for detecting such game bugs are still lacking. To address this, we propose a systematic LLM-based method for automatically identifying such bugs from player game logs, eliminating the need for collecting additional data such as post-play surveys. Applied to a text-based game DejaBoom!, our approach effectively identifies bugs inherent in LLM-powered interactive games, surpassing unstructured LLM-powered bug-catching methods and filling the gap in automated detection of logical and design flaws.</abstract>
       <url hash="ff0668cc">2024.findings-acl.907</url>
@@ -18491,7 +18491,7 @@
       <author><first>Alexandra</first><last>Olteanu</last><affiliation>Research, Microsoft</affiliation></author>
       <author><first>Kaheer</first><last>Suleman</last></author>
       <author><first>Adam</first><last>Trischler</last></author>
-      <author><first>Jackie</first><last>Cheung</last><affiliation>McGill University, Mila Research Institute and Microsoft</affiliation></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie</first><last>Cheung</last><affiliation>McGill University, Mila Research Institute and Microsoft</affiliation></author>
       <pages>15380-15395</pages>
       <abstract>It is increasingly common to evaluate the same coreference resolution (CR) model on multiple datasets. Do these multi-dataset evaluations allow us to draw meaningful conclusions about model generalization? Or, do they rather reflect the idiosyncrasies of a particular experimental setup (e.g., the specific datasets used)? To study this, we view evaluation through the lens of measurement modeling, a framework commonly used in the social sciences for analyzing the validity of measurements. By taking this perspective, we show how multi-dataset evaluations risk conflating different factors concerning what, precisely, is being measured. This in turn makes it difficult to draw more generalizable conclusions from these evaluations. For instance, we show that across seven datasets, measurements intended to reflect CR model generalization are often correlated with differences in both how coreference is defined and how it is operationalized; this limits our ability to draw conclusions regarding the ability of CR models to generalize across any singular dimension. We believe the measurement modeling framework provides the needed vocabulary for discussing challenges surrounding what is actually being measured by CR evaluations.</abstract>
       <url hash="28033dd4">2024.findings-acl.909</url>
@@ -18674,7 +18674,7 @@
       <author><first>Jialu</first><last>Liu</last><affiliation>Google Research</affiliation></author>
       <author><first>Simon</first><last>Baumgartner</last><affiliation>Google</affiliation></author>
       <author><first>Michael</first><last>Bendersky</last><affiliation>Google</affiliation></author>
-      <author><first>Chao</first><last>Zhang</last><affiliation>Georgia Institute of Technology</affiliation></author>
+      <author id="chao-zhang-tu"><first>Chao</first><last>Zhang</last><affiliation>Georgia Institute of Technology</affiliation></author>
       <pages>15623-15636</pages>
       <abstract>Large Language Models (LLMs) have exhibited impressive capabilities in various tasks, yet their vast parameter sizes restrict their applicability in resource-constrained settings. Knowledge distillation (KD) offers a viable solution by transferring expertise from large teacher models to compact student models. However, traditional KD techniques face specific challenges when applied to LLMs, including restricted access to LLM outputs, significant teacher-student capacity gaps, and the inherited mis-calibration issue. In this work, we present PLaD, a novel preference-based LLM distillation framework. PLaD exploits the teacher-student capacity discrepancy to generate pseudo-preference pairs where teacher outputs are preferred over student outputs. Then, PLaD leverages a ranking loss to re-calibrate the student’s estimation of sequence likelihood, which steers the student’s focus towards understanding the relative quality of outputs instead of simply imitating the teacher. PLaD bypasses the need for access to teacher LLM’s internal states, tackles the student’s expressivity limitations, and mitigates the student mis-calibration issue. Through extensive experiments on two sequence generation tasks and with various LLMs, we demonstrate the effectiveness of our proposed PLaD framework.</abstract>
       <url hash="c109b7e8">2024.findings-acl.923</url>
@@ -18729,7 +18729,7 @@
       <author><first>Emmanuel</first><last>Ma</last></author>
       <author><first>Futian</first><last>Wei</last></author>
       <author><first>Afaf</first><last>Taik</last></author>
-      <author><first>Jackie</first><last>Cheung</last><affiliation>McGill University, Mila Research Institute and Microsoft</affiliation></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie</first><last>Cheung</last><affiliation>McGill University, Mila Research Institute and Microsoft</affiliation></author>
       <author><first>Golnoosh</first><last>Farnadi</last></author>
       <pages>15694-15710</pages>
       <abstract>Recent progress in large language models (LLMs) has led to their widespread adoption in various domains. However, these advancements have also introduced additional safety risks and raised concerns regarding their detrimental impact on already marginalized populations.Despite growing mitigation efforts to develop safety safeguards, such as supervised safety-oriented fine-tuning and leveraging safe reinforcement learning from human feedback, multiple concerns regarding the safety and ingrained biases in these models remain. Furthermore, previous work has demonstrated that models optimized for safety often display exaggerated safety behaviors, such as a tendency to refrain from responding to certain requests as a precautionary measure. As such, a clear trade-off between the helpfulness and safety of these models has been documented in the literature. In this paper, we further investigate the effectiveness of safety measures by evaluating models on already mitigated biases. Using the case of Llama 2 as an example, we illustrate how LLMs’ safety responses can still encode harmful assumptions. To do so, we create a set of non-toxic prompts, which we then use to evaluate Llama models. Through our new taxonomy of LLMs responses to users, we observe that the safety/helpfulness trade-offs are more pronounced for certain demographic groups which can lead to different kinds of harms such as quality-of-service harms for marginalized populations.</abstract>
@@ -18741,7 +18741,7 @@
       <title><fixed-case>CT</fixed-case>ool<fixed-case>E</fixed-case>val: A <fixed-case>C</fixed-case>hinese Benchmark for <fixed-case>LLM</fixed-case>-Powered Agent Evaluation in Real-World <fixed-case>API</fixed-case> Interactions</title>
       <author><first>Zishan</first><last>Guo</last></author>
       <author><first>Yufei</first><last>Huang</last></author>
-      <author><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
       <pages>15711-15724</pages>
       <abstract>Assessing the capabilities of large language models (LLMs) as agents in decision making and operational tasks is crucial for the development of LLM-as-agent service. We propose CToolEval, a benchmark designed to evaluate LLMs in the context of Chinese societal applications, featuring 398 APIs across 27 widely-used Apps (e.g., Apps for shopping, map, music, travel, etc.) that cover 14 domains. We further present an evaluation framework that simulates real-life scenarios, to facilitate the assessment of tool invocation ability of LLMs for tool learning and task completion ability for user interation. Our extensive experiments with CToolEval evaluate 11 LLMs, revealing that while GPT-3.5-turbo excels in tool invocation, Chinese LLMs usually struggle with issues like hallucination and a lack of comprehensive tool understanding. Our findings highlight the need for further refinement in decision-making capabilities of LLMs, offering insights into bridging the gap between current functionalities and agent-level performance. To promote further research for LLMs to fully act as reliable agents in complex, real-world situations, we release our data and codes at https://github.com/tjunlp-lab/CToolEval.</abstract>
       <url hash="a845b2fb">2024.findings-acl.928</url>
@@ -18854,7 +18854,7 @@
       <author><first>Xintong</first><last>Wang</last></author>
       <author><first>Jingheng</first><last>Pan</last></author>
       <author><first>Liang</first><last>Ding</last></author>
-      <author><first>Chris</first><last>Biemann</last><affiliation>U Hamburg</affiliation></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last><affiliation>U Hamburg</affiliation></author>
       <pages>15840-15853</pages>
       <abstract>Large Vision-Language Models (LVLMs) are increasingly adept at generating contextually detailed and coherent responses from visual inputs. However, their application in multimodal decision-making and open-ended generation is hindered by a notable rate of hallucinations, where generated text inaccurately represents the visual contents. To address this issue, this paper introduces the Instruction Contrastive Decoding (ICD) method, a novel approach designed to reduce hallucinations during LVLM inference. Our method is inspired by our observation that what we call disturbance instructions significantly exacerbate hallucinations in multimodal fusion modules. ICD contrasts distributions from standard and instruction disturbance, thereby increasing alignment uncertainty and effectively subtracting hallucinated concepts from the original distribution. Through comprehensive experiments on discriminative benchmarks (POPE and MME) and a generative benchmark (LLaVa-Bench), we demonstrate that ICD significantly mitigates both object-level and attribute-level hallucinations. Moreover, our method not only addresses hallucinations but also significantly enhances the general perception and recognition capabilities of LVLMs.</abstract>
       <url hash="2b8ea016">2024.findings-acl.937</url>
@@ -18943,7 +18943,7 @@
     </paper>
     <paper id="944">
       <title>A Survey on Predicting the Factuality and the Bias of News Media</title>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Jisun</first><last>An</last></author>
       <author><first>Haewoon</first><last>Kwak</last></author>
       <author><first>Muhammad Arslan</first><last>Manzoor</last></author>
@@ -18959,7 +18959,7 @@
       <title>Semantic Compression for Word and Sentence Embeddings using Discrete Wavelet Transform</title>
       <author><first>Rana</first><last>Salama</last><affiliation>George Washington University</affiliation></author>
       <author><first>Abdou</first><last>Youssef</last><affiliation>George Washington University</affiliation></author>
-      <author><first>Mona</first><last>Diab</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last><affiliation>Carnegie Mellon University</affiliation></author>
       <pages>15963-15977</pages>
       <abstract>Wavelet transforms, a powerful mathematical tool, have been widely used in different domains, including Signal and Image processing, to unravel intricate patterns, enhance data representation, and extract meaningful features from data. Tangible results from their application suggest that Wavelet transforms can be applied to NLP capturing a variety of linguistic and semantic properties.In this paper, we empirically leverage the application of Discrete Wavelet Transforms (DWT) to word and sentence embeddings. We aim to showcase the capabilities of DWT in analyzing embedding representations at different levels of resolution and compressing them while maintaining their overall quality.We assess the effectiveness of DWT embeddings on semantic similarity tasks to show how DWT can be used to consolidate important semantic information in an embedding vector. We show the efficacy of the proposed paradigm using different embedding models, including large language models, on downstream tasks. Our results show that DWT can reduce the dimensionality of embeddings by 50-93% with almost no change in performance for semantic similarity tasks, while achieving superior accuracy in most downstream tasks. Our findings pave the way for applying DWT to improve NLP applications.</abstract>
       <url hash="00299b61">2024.findings-acl.945</url>
@@ -18986,7 +18986,7 @@
       <author><first>Yue</first><last>Yu</last><affiliation>Georgia Institute of Technology</affiliation></author>
       <author><first>Yinghao</first><last>Li</last></author>
       <author><first>Rongzhi</first><last>Zhang</last><affiliation>Georgia Institute of Technology and Zhejiang University</affiliation></author>
-      <author><first>Chao</first><last>Zhang</last><affiliation>Georgia Institute of Technology</affiliation></author>
+      <author id="chao-zhang-tu"><first>Chao</first><last>Zhang</last><affiliation>Georgia Institute of Technology</affiliation></author>
       <pages>15992-16030</pages>
       <abstract>Although Large Language Models (LLMs) exhibit remarkable adaptability across domains, these models often fall short in structured knowledge extraction tasks such as named entity recognition (NER). This paper explores an innovative, cost-efficient strategy to harness LLMs with modest NER capabilities for producing superior NER datasets. Our approach diverges from the basic class-conditional prompts by instructing LLMs to self-reflect on the specific domain, thereby generating domain-relevant attributes (such as category and emotions for movie reviews), which are utilized for creating attribute-rich training data. Furthermore, we preemptively generate entity terms and then develop NER context data around these entities, effectively bypassing the LLMs’ challenges with complex structures. Our experiments across both general and niche domains reveal significant performance enhancements over conventional data generation methods while being more cost-effective than existing alternatives.</abstract>
       <url hash="c898ac29">2024.findings-acl.947</url>
@@ -19077,7 +19077,7 @@
       <author><first>Hoyun</first><last>Song</last><affiliation>Korea Advanced Institute of Science &amp; Technology</affiliation></author>
       <author><first>Huije</first><last>Lee</last><affiliation>Korea Advanced Institute of Science &amp; Technology</affiliation></author>
       <author><first>Soyeong</first><last>Jeong</last><affiliation>Korea Advanced Institute of Science &amp; Technology</affiliation></author>
-      <author><first>Jong</first><last>Park</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
+      <author id="jong-c-park"><first>Jong</first><last>Park</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
       <pages>16122-16143</pages>
       <abstract>Social bias is shaped by the accumulation of social perceptions towards targets across various demographic identities. To fully understand such social bias in large language models (LLMs), it is essential to consider the composite of social perceptions from diverse perspectives among identities. Previous studies have either evaluated biases in LLMs by indirectly assessing the presence of sentiments towards demographic identities in the generated text or measuring the degree of alignment with given stereotypes. These methods have limitations in directly quantifying social biases at the level of distinct perspectives among identities. In this paper, we aim to investigate how social perceptions from various viewpoints contribute to the development of social bias in LLMs. To this end, we propose a novel strategy to intuitively quantify these social perceptions and suggest metrics that can evaluate the social biases within LLMs by aggregating diverse social perceptions. The experimental results show the quantitative demonstration of the social attitude in LLMs by examining social perception. The analysis we conducted shows that our proposed metrics capture the multi-dimensional aspects of social bias, enabling a fine-grained and comprehensive investigation of bias in LLMs.</abstract>
       <url hash="75157565">2024.findings-acl.954</url>
@@ -19380,7 +19380,7 @@
   <volume id="emnlp" ingest-date="2024-11-01" type="proceedings">
     <meta>
       <booktitle>Findings of the Association for Computational Linguistics: EMNLP 2024</booktitle>
-      <editor><first>Yaser</first><last>Al-Onaizan</last></editor>
+      <editor id="yaser-al-onaizan"><first>Yaser</first><last>Al-Onaizan</last></editor>
       <editor><first>Mohit</first><last>Bansal</last></editor>
       <editor><first>Yun-Nung</first><last>Chen</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -19457,7 +19457,7 @@
       <author><first>Seonmin</first><last>Koo</last><affiliation>Korea University</affiliation></author>
       <author><first>Jinsung</first><last>Kim</last></author>
       <author><first>Chanjun</first><last>Park</last><affiliation>Upstage</affiliation></author>
-      <author><first>Heuiseok</first><last>Lim</last><affiliation>Korea University</affiliation></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last><affiliation>Korea University</affiliation></author>
       <pages>96-125</pages>
       <abstract>Grammatical error correction (GEC) system is a practical task used in the real world, showing high achievements alongside the development of large language models (LLMs). However, these achievements have been primarily obtained in English, and there is a relative lack of performance for non-English data, such as Korean. We hypothesize that this insufficiency occurs because relying solely on the parametric knowledge of LLMs makes it difficult to thoroughly understand the given context in the Korean GEC. Therefore, we propose a Knowledge-Augmented GEC (KAGEC) framework that incorporates evidential information from external sources into the prompt for the GEC task. KAGEC first extracts salient phrases from the given source and retrieves non-parametric knowledge based on these phrases, aiming to enhance the context-aware generation capabilities of LLMs. Furthermore, we conduct validations for fine-grained error types to identify those requiring a retrieval-augmented manner when LLMs perform Korean GEC. According to experimental results, most LLMs, including ChatGPT, demonstrate significant performance improvements when applying KAGEC.</abstract>
       <url hash="99c96a74">2024.findings-emnlp.6</url>
@@ -19568,10 +19568,10 @@
       <author><first>Jielin</first><last>Qiu</last></author>
       <author><first>Andrea</first><last>Madotto</last><affiliation>FAIR</affiliation></author>
       <author><first>Zhaojiang</first><last>Lin</last><affiliation>Facebook</affiliation></author>
-      <author><first>Paul A.</first><last>Crook</last><affiliation>Meta</affiliation></author>
+      <author id="paul-a-crook"><first>Paul A.</first><last>Crook</last><affiliation>Meta</affiliation></author>
       <author><first>Yifan Ethan</first><last>Xu</last><affiliation>Meta</affiliation></author>
       <author><first>Babak</first><last>Damavandi</last></author>
-      <author><first>Xin Luna</first><last>Dong</last><affiliation>Facebook</affiliation></author>
+      <author id="xin-luna-dong"><first>Xin Luna</first><last>Dong</last><affiliation>Facebook</affiliation></author>
       <author><first>Christos</first><last>Faloutsos</last><affiliation>Amazon and Carnegie Mellon University</affiliation></author>
       <author><first>Lei</first><last>Li</last><affiliation>School of Computer Science, Carnegie Mellon University</affiliation></author>
       <author><first>Seungwhan</first><last>Moon</last><affiliation>Facebook</affiliation></author>
@@ -19805,7 +19805,7 @@
       <author><first>Kshitij Sharad</first><last>Jadhav</last><affiliation>Indian Institute of Technology, Bombay</affiliation></author>
       <author><first>Yatin</first><last>Nandwani</last><affiliation>International Business Machines</affiliation></author>
       <author><first>Dinesh</first><last>Raghu</last><affiliation>IBM Research - New Delhi</affiliation></author>
-      <author><first>Sachindra</first><last>Joshi</last></author>
+      <author id="sachindra-joshi"><first>Sachindra</first><last>Joshi</last></author>
       <pages>542-573</pages>
       <abstract>In this paper, we propose a modified version of the MedQA-USMLE dataset, named MEDQA-OPEN, which contains open-ended medical questions without options to mimic clinical scenarios, along with clinician-approved reasoned answers. Additionally, we implement a prompt driven by Chain of Thought (CoT) reasoning, CLINICR, to mirror the prospective process of incremental reasoning, reaching a correct response to medical questions. We empirically demonstrate how CLINICR outperforms the state-of-the-art 5-shot CoT-based prompt (Liévin et al., 2022). We also present an approach that mirrors real-life clinical practice by first exploring multiple differential diagnoses through MCQ-CLINICR and subsequently narrowing down to a final diagnosis using MCQ-ELIMINATIVE. Finally, emphasizing the importance of response verification in medical settings, we utilize a reward model mechanism, replacing the elimination process performed by MCQ-ELIMINATIVE.</abstract>
       <url hash="383a3662">2024.findings-emnlp.31</url>
@@ -19833,7 +19833,7 @@
     <paper id="33">
       <title>Unsupervised Domain Adaptation for Keyphrase Generation using Citation Contexts</title>
       <author><first>Florian</first><last>Boudin</last><affiliation>University of Nantes</affiliation></author>
-      <author><first>Akiko</first><last>Aizawa</last><affiliation>NII, Tokyo Institute of Technology</affiliation></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last><affiliation>NII, Tokyo Institute of Technology</affiliation></author>
       <pages>598-614</pages>
       <abstract>Adapting keyphrase generation models to new domains typically involves few-shot fine-tuning with in-domain labeled data. However, annotating documents with keyphrases is often prohibitively expensive and impractical, requiring expert annotators. This paper presents silk, an unsupervised method designed to address this issue by extracting silver-standard keyphrases from citation contexts to create synthetic labeled data for domain adaptation. Extensive experiments across three distinct domains demonstrate that our method yields high-quality synthetic samples, resulting in significant and consistent improvements in in-domain performance over strong baselines.</abstract>
       <url hash="8c6f30af">2024.findings-emnlp.33</url>
@@ -19895,7 +19895,7 @@
     <paper id="38">
       <title>Learning to Route for Dynamic Adapter Composition in Continual Learning with Language Models</title>
       <author><first>Vladimir</first><last>Araujo</last><affiliation>KU Leuven</affiliation></author>
-      <author><first>Marie-Francine</first><last>Moens</last><affiliation>KU Leuven, KU Leuven</affiliation></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last><affiliation>KU Leuven, KU Leuven</affiliation></author>
       <author><first>Tinne</first><last>Tuytelaars</last><affiliation>KU Leuven</affiliation></author>
       <pages>687-696</pages>
       <abstract>Parameter-efficient fine-tuning (PEFT) methods are increasingly used with pre-trained language models (PLMs) for continual learning (CL). These methods typically involve training a PEFT module for each new task and employing similarity-based selection to route modules during inference. However, they face two major limitations: 1) interference during module training with already learned modules and 2) suboptimal routing when composing modules. In this paper, we present L2R, a method that isolates the training of new PEFT modules to ensure their task specialization. L2R then learns to compose the learned modules by training a network of routers that leverages a small memory containing examples of previously seen tasks. We evaluate our method in two CL setups using various benchmarks. Our results demonstrate that L2R provides an effective composition of PEFT modules, leading to improved generalization and performance compared to other methods.</abstract>
@@ -20063,7 +20063,7 @@
       <author><first>Peng</first><last>Qiu</last></author>
       <author><first>Yingqi</first><last>Qu</last></author>
       <author><first>Jing</first><last>Liu</last><affiliation>Baidu</affiliation></author>
-      <author><first>Xin</first><last>Zhao</last><affiliation>Renmin University of China</affiliation></author>
+      <author id="wayne-xin-zhao"><first>Xin</first><last>Zhao</last><affiliation>Renmin University of China</affiliation></author>
       <author><first>Hua</first><last>Wu</last></author>
       <author><first>Ji-Rong</first><last>Wen</last><affiliation>Renmin University of China</affiliation></author>
       <author><first>Haifeng</first><last>Wang</last><affiliation>Baidu</affiliation></author>
@@ -20334,8 +20334,8 @@
       <author><first>Zhuosheng</first><last>Zhang</last><affiliation>Shanghai Jiao Tong University</affiliation></author>
       <author><first>Kehai</first><last>Chen</last><affiliation>Harbin Institute of Technology (Shenzhen)</affiliation></author>
       <author><first>Xinbei</first><last>Ma</last></author>
-      <author><first>Muyun</first><last>Yang</last></author>
-      <author><first>Tiejun</first><last>Zhao</last><affiliation>Harbin Institute of Technology</affiliation></author>
+      <author id="muyun-yang"><first>Muyun</first><last>Yang</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <author><first>Min</first><last>Zhang</last><affiliation>Harbin Institute of Technology, Shenzhen</affiliation></author>
       <pages>1304-1320</pages>
       <abstract>The advent of large language models (LLMs) has spurred considerable interest in advancing autonomous LLMs-based agents, particularly in intriguing applications within smartphone graphical user interfaces (GUIs). When presented with a task goal, these agents typically emulate human actions within a GUI environment until the task is completed. However, a key challenge lies in devising effective plans to guide action prediction in GUI tasks, though planning have been widely recognized as effective for decomposing complex tasks into a series of steps. Specifically, given the dynamic nature of environmental GUIs following action execution, it is crucial to dynamically adapt plans based on environmental feedback and action history.We show that the widely-used ReAct approach fails due to the excessively long historical dialogues. To address this challenge, we propose a novel approach called Dynamic Planning of Thoughts (D-PoT) for LLM-based GUI agents.D-PoT involves the dynamic adjustment of planning based on the environmental feedback and execution history. Experimental results reveal that the proposed D-PoT significantly surpassed the strong GPT-4V baseline by +12.7% (34.66% <tex-math>\rightarrow</tex-math> 47.36%) in accuracy. The analysis highlights the generality of dynamic planning in different backbone LLMs, as well as the benefits in mitigating hallucinations and adapting to unseen tasks. Code is available at https://github.com/sqzhang-lazy/D-PoT.</abstract>
@@ -20352,7 +20352,7 @@
       <author><first>Jingyan</first><last>Zhou</last></author>
       <author><first>Jingjing</first><last>Li</last></author>
       <author><first>Yichen</first><last>Gao</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <author><first>Yu</first><last>Li</last><affiliation>Department of Computer Science and Engineering, The Chinese University of Hong Kong</affiliation></author>
       <author><first>Irwin</first><last>King</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <pages>1321-1335</pages>
@@ -20564,7 +20564,7 @@
       <title>Reconfidencing <fixed-case>LLM</fixed-case>s from the Grouping Loss Perspective</title>
       <author><first>Lihu</first><last>Chen</last><affiliation>Imperial College London</affiliation></author>
       <author><first>Alexandre</first><last>Perez-Lebel</last></author>
-      <author><first>Fabian M.</first><last>Suchanek</last><affiliation>Telecom Paris</affiliation></author>
+      <author id="fabian-suchanek"><first>Fabian M.</first><last>Suchanek</last><affiliation>Telecom Paris</affiliation></author>
       <author><first>Gaël</first><last>Varoquaux</last><affiliation>INRIA</affiliation></author>
       <pages>1567-1581</pages>
       <abstract>Large Language Models (LLMs), such as GPT and LLaMA, are susceptible to generating hallucinated answers in a confident tone. While previous efforts to elicit and calibrate confidence scores have shown some success, they often overlook biases towards certain groups, such as specific nationalities. Existing calibration methods typically focus on average performance, failing to address this disparity. In our study, we demonstrate that the concept of grouping loss is an effective metric for understanding and correcting the heterogeneity in confidence levels. We introduce a novel evaluation dataset, derived from a knowledge base, specifically designed to assess the confidence scores of LLM responses across different groups. Our experimental results highlight significant variations in confidence, which are accurately captured by grouping loss. To tackle this issue, we propose a new method to calibrate the confidence scores of LLMs by considering different groups, a process we term <i>reconfidencing</i>. Our findings indicate that this approach effectively mitigates biases against minority groups, contributing to the development of fairer LLMs.</abstract>
@@ -20621,7 +20621,7 @@
       <title>Dealing with Controversy: An Emotion and Coping Strategy Corpus Based on Role Playing</title>
       <author><first>Enrica</first><last>Troiano</last><affiliation>Vrije Universiteit Amsterdam</affiliation></author>
       <author><first>Sofie</first><last>Labat</last><affiliation>Universiteit Gent</affiliation></author>
-      <author><first>Marco Antonio</first><last>Stranisci</last></author>
+      <author id="marco-antonio-stranisci"><first>Marco Antonio</first><last>Stranisci</last></author>
       <author><first>Rossana</first><last>Damiano</last><affiliation>University of Turin</affiliation></author>
       <author><first>Viviana</first><last>Patti</last><affiliation>University of Turin, Computer Science Department</affiliation></author>
       <author><first>Roman</first><last>Klinger</last><affiliation>Otto-Friedrich Universität Bamberg</affiliation></author>
@@ -20669,7 +20669,7 @@
       <author><first>Panagiotis</first><last>Eustratiadis</last><affiliation>University of Amsterdam</affiliation></author>
       <author><first>Christof</first><last>Monz</last><affiliation>University of Amsterdam, University of Amsterdam</affiliation></author>
       <author><first>Arianna</first><last>Bisazza</last><affiliation>University of Groningen</affiliation></author>
-      <author><first>Maarten</first><last>de Rijke</last><affiliation>University of Amsterdam</affiliation></author>
+      <author id="maarten-de-rijke"><first>Maarten</first><last>de Rijke</last><affiliation>University of Amsterdam</affiliation></author>
       <pages>1691-1706</pages>
       <abstract>Following multiple instructions is a crucial ability for large language models (LLMs). Evaluating this ability comes with significant challenges: (i) limited coherence between multiple instructions, (ii) positional bias where the order of instructions affects model performance, and (iii) a lack of objectively verifiable tasks. To address these issues, we introduce a benchmark designed to evaluate models’ abilities to follow multiple instructions through sequential instruction following (SIFo) tasks. In SIFo, the successful completion of multiple instructions is verifiable by examining only the final instruction. Our benchmark evaluates instruction following using four tasks (text modification, question answering, mathematics, and security rule following), each assessing different aspects of sequential instruction following. Our evaluation of popular LLMs, both closed-source and open-source, shows that more recent and larger models significantly outperform their older and smaller counterparts on the SIFo tasks, validating the benchmark’s effectiveness. All models struggle with following sequences of instructions, hinting at an important lack of robustness of today’s language models.</abstract>
       <url hash="44cab2ea">2024.findings-emnlp.92</url>
@@ -20696,7 +20696,7 @@
     </paper>
     <paper id="94">
       <title><fixed-case>S</fixed-case>uri: Multi-constraint Instruction Following in Long-form Text Generation</title>
-      <author><first>Chau Minh</first><last>Pham</last></author>
+      <author id="chau-minh-pham"><first>Chau Minh</first><last>Pham</last></author>
       <author><first>Simeng</first><last>Sun</last><affiliation>NVIDIA</affiliation></author>
       <author><first>Mohit</first><last>Iyyer</last><affiliation>University of Massachusetts Amherst</affiliation></author>
       <pages>1722-1753</pages>
@@ -20723,7 +20723,7 @@
       <author><first>Weilong</first><last>Dong</last></author>
       <author><first>Zishan</first><last>Guo</last></author>
       <author><first>Xinwei</first><last>Wu</last></author>
-      <author><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
       <pages>1771-1793</pages>
       <abstract>Prior research has revealed that certain abstract concepts are linearly represented as directions in the representation space of LLMs, predominantly centered around English. In this paper, we extend this investigation to a multilingual context, with a specific focus on human values-related concepts (i.e., value concepts) due to their significance for AI safety. Through our comprehensive exploration covering 7 types of human values, 16 languages and 3 LLM series with distinct multilinguality (e.g., monolingual, bilingual and multilingual), we first empirically confirm the presence of value concepts within LLMs in a multilingual format. Further analysis on the cross-lingual characteristics of these concepts reveals 3 traits arising from language resource disparities: cross-lingual inconsistency, distorted linguistic relationships, and unidirectional cross-lingual transfer between high- and low-resource languages, all in terms of value concepts. Moreover, we validate the feasibility of cross-lingual control over value alignment capabilities of LLMs, leveraging the dominant language as a source language. Ultimately, recognizing the significant impact of LLMs’ multilinguality on our results, we consolidate our findings and provide prudent suggestions on the composition of multilingual data for LLMs pre-training.</abstract>
       <url hash="6b1e861d">2024.findings-emnlp.96</url>
@@ -20811,7 +20811,7 @@
       <author><first>Le</first><last>Zhang</last><affiliation>Mila - Quebec AI Institute</affiliation></author>
       <author><first>Yihong</first><last>Wu</last></author>
       <author><first>Qian</first><last>Yang</last></author>
-      <author><first>Jian-Yun</first><last>Nie</last><affiliation>University of Montreal</affiliation></author>
+      <author id="jian-yun-nie"><first>Jian-Yun</first><last>Nie</last><affiliation>University of Montreal</affiliation></author>
       <pages>1872-1883</pages>
       <abstract>Large Language Models (LLMs) are foundational in language technologies, particularly in information retrieval (IR). In this paper, we thoroughly explore the best practice of leveraging LLMs for query expansion. To this end, we introduce a training-free, straightforward yet effective framework called Multi-Text Generation Integration (MuGI). This approach leverages LLMs to generate multiple pseudo-references, which are then integrated with the original queries to enhance both sparse and dense retrieval methods. Additionally, we introduce a retrieval pipeline based on MuGI, which combines the strengths of sparse and dense retrievers to achieve superior performance without the need for costly pre-indexing. Our empirical findings reveal that: (1) Increasing the number of samples from LLMs benefits IR systems; (2) A balance between the query and pseudo-documents, and an effective integration strategy, is critical for high performance; (3) Contextual information from LLMs is essential, even boost a 23M model to outperform a 7B baseline model; (4) Pseudo relevance feedback can further calibrate queries for improved performance; and (5) Query expansion is widely applicable and versatile, consistently enhancing models ranging from 23M to 7B parameters. Our code and all generated references are made available at https://github.com/lezhang7/Retrieval_MuGI.</abstract>
       <url hash="658ebdea">2024.findings-emnlp.103</url>
@@ -20968,7 +20968,7 @@
       <author><first>SeongTae</first><last>Hong</last><affiliation>Korea University</affiliation></author>
       <author><first>Seungjun</first><last>Lee</last><affiliation>Korea University</affiliation></author>
       <author><first>Chanjun</first><last>Park</last><affiliation>Upstage</affiliation></author>
-      <author><first>Heuiseok</first><last>Lim</last><affiliation>Korea University</affiliation></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last><affiliation>Korea University</affiliation></author>
       <pages>2088-2108</pages>
       <abstract>Translating major language resources to build minor language resources becomes a widely-used approach. Particularly in translating complex data points composed of multiple components, it is common to translate each component separately. However, we argue that this practice often overlooks the interrelation between components within the same data point. To address this limitation, we propose a novel MT pipeline that considers the intra-data relation. in implementing MT for training data. In our MT pipeline, all the components in a data point are concatenated to form a single translation sequence and subsequently reconstructed to the data components after translation. We introduce a Catalyst Statement (CS) to enhance the intra-data relation, and Indicator Token (IT) to assist the decomposition of a translated sequence into its respective data components. Through our approach, we have achieved a considerable improvement in translation quality itself, along with its effectiveness as training data. Compared with the conventional approach that translates each data component separately, our method yields better training data that enhances the performance of the trained model by 2.690 points for the web page ranking (WPR) task, and 0.845 for the question generation (QG) task in the XGLUE benchmark.</abstract>
       <url hash="0fff3ec1">2024.findings-emnlp.114</url>
@@ -21092,7 +21092,7 @@
       <author><first>Giuseppe</first><last>Ruggiero</last><affiliation>University of Turin</affiliation></author>
       <author><first>Matteo</first><last>Testa</last></author>
       <author><first>Jurgen Van De</first><last>Walle</last></author>
-      <author><first>Luigi</first><last>Di Caro</last><affiliation>University of Turin, Italy</affiliation></author>
+      <author id="luigi-di-caro"><first>Luigi</first><last>Di Caro</last><affiliation>University of Turin, Italy</affiliation></author>
       <pages>2237-2246</pages>
       <abstract>The creation of artificial polyglot voices remains a challenging task, despite considerable progress in recent years. This paper investigates self-supervised learning for voice conversion to create native-sounding polyglot voices. We introduce a novel cross-lingual any-to-one voice conversion system that is able to preserve the source accent without the need for multilingual data from the target speaker. In addition, we show a novel cross-lingual fine-tuning strategy that further improves the accent and reduces the training data requirements. Objective and subjective evaluations with English, Spanish, French and Mandarin Chinese confirm that our approach improves on state-of-the-art methods, enhancing the speech intelligibility and overall quality of the converted speech, especially in cross-lingual scenarios. Audio samples are available at: https://giuseppe-ruggiero.github.io/a2o-vc-demo/</abstract>
       <url hash="dd7ac9df">2024.findings-emnlp.122</url>
@@ -21164,7 +21164,7 @@
     <paper id="127">
       <title>Can <fixed-case>LLM</fixed-case> Graph Reasoning Generalize beyond Pattern Memorization?</title>
       <author><first>Yizhuo</first><last>Zhang</last></author>
-      <author id="heng-wang"><first>Heng</first><last>Wang</last></author>
+      <author><first>Heng</first><last>Wang</last></author>
       <author><first>Shangbin</first><last>Feng</last><affiliation>University of Washington</affiliation></author>
       <author><first>Zhaoxuan</first><last>Tan</last><affiliation>University of Notre Dame</affiliation></author>
       <author><first>Xiaochuang</first><last>Han</last><affiliation>Department of Computer Science, University of Washington</affiliation></author>
@@ -21178,7 +21178,7 @@
     </paper>
     <paper id="128">
       <title>Improving Multilingual Instruction Finetuning via Linguistically Natural and Diverse Datasets</title>
-      <author><first>Sathish Reddy</first><last>Indurthi</last><affiliation>Zoom Video Communications</affiliation></author>
+      <author id="sathish-reddy-indurthi"><first>Sathish Reddy</first><last>Indurthi</last><affiliation>Zoom Video Communications</affiliation></author>
       <author><first>Wenxuan</first><last>Zhou</last><affiliation>Zoom</affiliation></author>
       <author><first>Shamil</first><last>Chollampatt</last><affiliation>Zoom Video Communications</affiliation></author>
       <author><first>Ravi</first><last>Agrawal</last><affiliation>Zoom Video Communications</affiliation></author>
@@ -21215,7 +21215,7 @@
     <paper id="131">
       <title><fixed-case>S</fixed-case>yn<fixed-case>TQA</fixed-case>: Synergistic Table-based Question Answering via Mixture of Text-to-<fixed-case>SQL</fixed-case> and <fixed-case>E</fixed-case>2<fixed-case>E</fixed-case> <fixed-case>TQA</fixed-case></title>
       <author><first>Siyue</first><last>Zhang</last></author>
-      <author><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Chen</first><last>Zhao</last><affiliation>NYU Shanghai</affiliation></author>
       <pages>2352-2364</pages>
       <abstract>Text-to-SQL parsing and end-to-end question answering (E2E TQA) are two main approaches for Table-based Question Answering task. Despite success on multiple benchmarks, they have yet to be compared and their synergy remains unexplored. In this paper, we identify different strengths and weaknesses through evaluating state-of-the-art models on benchmark datasets: Text-to-SQL demonstrates superiority in handling questions involving arithmetic operations and long tables; E2E TQA excels in addressing ambiguous questions, non-standard table schema, and complex table contents. To combine both strengths, we propose a Synergistic Table-based Question Answering approach that integrate different models via answer selection, which is agnostic to any model types. Further experiments validate that ensembling models by either feature-based or LLM-based answer selector significantly improves the performance over individual models.</abstract>
@@ -21240,7 +21240,7 @@
       <author><first>Ruqing</first><last>Zhang</last></author>
       <author><first>Jiafeng</first><last>Guo</last><affiliation>Institute of Computing Technolgy, Chinese Academy of Sciences</affiliation></author>
       <author><first>Yixing</first><last>Fan</last></author>
-      <author><first>Xueqi</first><last>Cheng</last><affiliation>, Chinese Academy of Sciences</affiliation></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last><affiliation>, Chinese Academy of Sciences</affiliation></author>
       <pages>2380-2393</pages>
       <abstract>Retrieval-augmented generation (RAG) has emerged as a popular solution to mitigate the hallucination issues of large language models. However, existing studies on RAG seldom address the issue of predictive uncertainty, i.e., how likely it is that a RAG model’s prediction is incorrect, resulting in uncontrollable risks in real-world applications. In this work, we emphasize the importance of risk control, ensuring that RAG models proactively refuse to answer questions with low confidence. Our research identifies two critical latent factors affecting RAG’s confidence in its predictions: the quality of the retrieved results and the manner in which these results are utilized. To guide RAG models in assessing their own confidence based on these two latent factors, we develop a counterfactual prompting framework that induces the models to alter these factors and analyzes the effect on their answers. We also introduce a benchmarking procedure to collect answers with the option to abstain, facilitating a series of experiments. For evaluation, we introduce several risk-related metrics and the experimental results demonstrate the effectiveness of our approach. Our code and benchmark dataset are available at https://github.com/ict-bigdatalab/RC-RAG.</abstract>
       <url hash="6add2d48">2024.findings-emnlp.133</url>
@@ -21279,7 +21279,7 @@
     <paper id="136">
       <title>Adaptive Contrastive Decoding in Retrieval-Augmented Generation for Handling Noisy Contexts</title>
       <author><first>Youna</first><last>Kim</last><affiliation>Seoul National University</affiliation></author>
-      <author><first>Hyuhng Joon</first><last>Kim</last><affiliation>Seoul National University</affiliation></author>
+      <author id="hyuhng-joon-kim"><first>Hyuhng Joon</first><last>Kim</last><affiliation>Seoul National University</affiliation></author>
       <author><first>Cheonbok</first><last>Park</last><affiliation>NAVER</affiliation></author>
       <author><first>Choonghyun</first><last>Park</last><affiliation>Seoul National University</affiliation></author>
       <author><first>Hyunsoo</first><last>Cho</last><affiliation>Ewha Women’s University</affiliation></author>
@@ -21554,9 +21554,9 @@
     </paper>
     <paper id="155">
       <title><fixed-case>MINERS</fixed-case>: Multilingual Language Models as Semantic Retrievers</title>
-      <author><first>Genta Indra</first><last>Winata</last><affiliation>Capital One</affiliation></author>
+      <author id="genta-indra-winata"><first>Genta Indra</first><last>Winata</last><affiliation>Capital One</affiliation></author>
       <author><first>Ruochen</first><last>Zhang</last><affiliation>Brown University</affiliation></author>
-      <author><first>David Ifeoluwa</first><last>Adelani</last></author>
+      <author id="david-ifeoluwa-adelani"><first>David Ifeoluwa</first><last>Adelani</last></author>
       <pages>2742-2766</pages>
       <abstract>Words have been represented in a high-dimensional vector space that encodes their semantic similarities, enabling downstream applications such as retrieving synonyms, antonyms, and relevant contexts. However, despite recent advances in multilingual language models (LMs), the effectiveness of these models’ representations in semantic retrieval contexts has not been comprehensively explored. To fill this gap, this paper introduces the MINERS, a benchmark designed to evaluate the ability of multilingual LMs in semantic retrieval tasks, including bitext mining and classification via retrieval-augmented contexts. We create a comprehensive framework to assess the robustness of LMs in retrieving samples across over 200 diverse languages, including extremely low-resource languages in challenging cross-lingual and code-switching settings. Our results demonstrate that by solely retrieving semantically similar embeddings yields performance competitive with state-of-the-art approaches, without requiring any fine-tuning.</abstract>
       <url hash="8aa4e006">2024.findings-emnlp.155</url>
@@ -21637,7 +21637,7 @@
       <author><first>Soyeong</first><last>Jeong</last><affiliation>Korea Advanced Institute of Science &amp; Technology</affiliation></author>
       <author><first>Jeongyeon</first><last>Seo</last></author>
       <author><first>Taeho</first><last>Hwang</last></author>
-      <author><first>Jong C.</first><last>Park</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
+      <author id="jong-c-park"><first>Jong C.</first><last>Park</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
       <pages>2826-2844</pages>
       <abstract>The robustness of recent Large Language Models (LLMs) has become increasingly crucial as their applicability expands across various domains and real-world applications. Retrieval-Augmented Generation (RAG) is a promising solution for addressing the limitations of LLMs, yet existing studies on the robustness of RAG often overlook the interconnected relationships between RAG components or the potential threats prevalent in real-world databases, such as minor textual errors. In this work, we investigate two underexplored aspects when assessing the robustness of RAG: 1) vulnerability to noisy documents through low-level perturbations and 2) a holistic evaluation of RAG robustness. Furthermore, we introduce a novel attack method, the Genetic Attack on RAG (GARAG), which targets these aspects. Specifically, GARAG is designed to reveal vulnerabilities within each component and test the overall system functionality against noisy documents. We validate RAG robustness by applying our GARAG to standard QA datasets, incorporating diverse retrievers and LLMs. The experimental results show that GARAG consistently achieves high attack success rates. Also, it significantly devastates the performance of each component and their synergy, highlighting the substantial risk that minor textual inaccuracies pose in disrupting RAG systems in the real world. Code is available at https://github.com/zomss/GARAG.</abstract>
       <url hash="76b1f526">2024.findings-emnlp.161</url>
@@ -21704,7 +21704,7 @@
       <author><first>Eric</first><last>Le Ferrand</last><affiliation>Boston College</affiliation></author>
       <author><first>Zoey</first><last>Liu</last><affiliation>University of Florida</affiliation></author>
       <author><first>Antti</first><last>Arppe</last><affiliation>University of Alberta</affiliation></author>
-      <author><first>Emily</first><last>Prud’hommeaux</last><affiliation>Boston College</affiliation></author>
+      <author id="emily-prudhommeaux"><first>Emily</first><last>Prud’hommeaux</last><affiliation>Boston College</affiliation></author>
       <pages>2953-2963</pages>
       <abstract>Automatic speech recognition (ASR) technology is frequently proposed as a means of preservation and documentation of endangered languages, with promising results thus far. Among the endangered languages spoken today, a significant number exhibit complex morphology. The models employed in contemporary language documentation pipelines that utilize ASR, however, are predominantly based on isolating or inflectional languages, often from the Indo-European family. This raises a critical concern: building models exclusively on such languages may introduce a bias, resulting in better performance with simpler morphological structures. In this paper, we investigate the performance of modern ASR architectures on morphologically complex languages. Results indicate that modern ASR architectures appear less robust in managing high OOV rates for morphologically complex languages in terms of word error rate, while character error rates are consistently higher for isolating languages.</abstract>
       <url hash="8e6c39ac">2024.findings-emnlp.166</url>
@@ -21867,7 +21867,7 @@
       <author><first>Peng</first><last>Hu</last><affiliation>nanjing university</affiliation></author>
       <author><first>Changjiang</first><last>Gao</last><affiliation>nanjing university</affiliation></author>
       <author><first>Ruiqi</first><last>Gao</last></author>
-      <author><first>Jiajun</first><last>Chen</last><affiliation>Nanjing University</affiliation></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last><affiliation>Nanjing University</affiliation></author>
       <author><first>Shujian</first><last>Huang</last><affiliation>Nanjing University</affiliation></author>
       <pages>3144-3155</pages>
       <abstract>Large Language Models (LLMs) possess extensive knowledge and strong capabilities in performing in-context reasoning. However, previous work challenges their out-of-context reasoning ability, i.e., the ability to infer information from their training data, instead of from the context or prompt. This paper focuses on a significant aspect of out-of-context reasoning: Out-of-Context Knowledge Reasoning (OCKR), which is to combine multiple knowledge to infer new knowledge. We designed a synthetic dataset with seven representative OCKR tasks to systematically assess the OCKR capabilities of LLMs. Using this dataset, we evaluated several LLMs and discovered that their proficiency in this aspect is limited, regardless of whether the knowledge is trained in a separate or adjacent training settings. Moreover, training the model to reason with reasoning examples does not result in significant improvement, while training the model to perform explicit knowledge retrieval helps for retrieving attribute knowledge but not the relation knowledge, indicating that the model’s limited OCKR capabilities are due to difficulties in knowledge retrieval. Furthermore, we treat cross-lingual knowledge transfer as a distinct form of OCKR, and evaluate this ability. Our results show that the evaluated model also exhibits limited ability in transferring knowledge across languages.</abstract>
@@ -21987,7 +21987,7 @@
       <title>A Psycholinguistic Evaluation of Language Models’ Sensitivity to Argument Roles</title>
       <author><first>Eun-Kyoung Rosa</first><last>Lee</last></author>
       <author><first>Sathvik</first><last>Nair</last></author>
-      <author><first>Naomi</first><last>Feldman</last><affiliation>University of Maryland, College Park</affiliation></author>
+      <author id="naomi-feldman"><first>Naomi</first><last>Feldman</last><affiliation>University of Maryland, College Park</affiliation></author>
       <pages>3262-3274</pages>
       <url hash="daad318a">2024.findings-emnlp.186</url>
       <attachment type="software" hash="99738c74">2024.findings-emnlp.186.software.zip</attachment>
@@ -22111,7 +22111,7 @@
       <author><first>Jingfeng</first><last>Yang</last><affiliation>Amazon</affiliation></author>
       <author><first>Bing</first><last>Yin</last></author>
       <author><first>Xian</first><last>Li</last><affiliation>Amazon</affiliation></author>
-      <author><first>Chao</first><last>Zhang</last><affiliation>Georgia Institute of Technology</affiliation></author>
+      <author id="chao-zhang-tu"><first>Chao</first><last>Zhang</last><affiliation>Georgia Institute of Technology</affiliation></author>
       <author><first>Tuo</first><last>Zhao</last><affiliation>Georgia Institute of Technology</affiliation></author>
       <author><first>Haoming</first><last>Jiang</last><affiliation>Amazon</affiliation></author>
       <pages>3411-3425</pages>
@@ -22123,7 +22123,7 @@
     <paper id="196">
       <title><fixed-case>GE</fixed-case>2<fixed-case>PE</fixed-case>: <fixed-case>P</fixed-case>ersian End-to-End Grapheme-to-Phoneme Conversion</title>
       <author><first>Elnaz</first><last>Rahmati</last></author>
-      <author><first>Hossein</first><last>Sameti</last><affiliation>Sharif University of Technology</affiliation></author>
+      <author id="hossein-sameti"><first>Hossein</first><last>Sameti</last><affiliation>Sharif University of Technology</affiliation></author>
       <pages>3426-3436</pages>
       <abstract>Text-to-Speech (TTS) systems have made significant strides, enabling the generation of speech from grapheme sequences. However, for low-resource languages, these models still struggle to produce natural and intelligible speech. Grapheme-to-Phoneme conversion (G2P) addresses this challenge by enhancing the input sequence with phonetic information. Despite these advancements, existing G2P systems face limitations when dealing with Persian texts due to the complexity of Persian transcription. In this study, we focus on enriching resources for the Persian language. To achieve this, we introduce two novel G2P training datasets: one manually labeled and the other machine-generated. These datasets comprise over five million sentences alongside their corresponding phoneme sequences. Additionally, we propose two evaluation datasets tailored for Persian sub-tasks, including Kasre-Ezafe detection, homograph disambiguation, and handling out-of-vocabulary (OOV) words. To tackle the unique challenges of the Persian language, we develop a new sentence-level End-to-End (E2E) model leveraging a two-step training approach, as outlined in our paper, to maximize the impact of manually labeled data. The results show that our model surpasses the state-of-the-art performance by 1.86% in word error rate, 4.03% in Kasre-Ezafe detection recall, and 3.42% in homograph disambiguation accuracy.</abstract>
       <url hash="3ee88850">2024.findings-emnlp.196</url>
@@ -22136,7 +22136,7 @@
       <title>Characterizing <fixed-case>LLM</fixed-case> Abstention Behavior in Science <fixed-case>QA</fixed-case> with Context Perturbations</title>
       <author><first>Bingbing</first><last>Wen</last><affiliation>University of Washington</affiliation></author>
       <author><first>Bill</first><last>Howe</last><affiliation>University of Washington</affiliation></author>
-      <author><first>Lucy Lu</first><last>Wang</last><affiliation>University of Washington and Allen Institute for Artificial Intelligence</affiliation></author>
+      <author id="lucy-lu-wang"><first>Lucy Lu</first><last>Wang</last><affiliation>University of Washington and Allen Institute for Artificial Intelligence</affiliation></author>
       <pages>3437-3450</pages>
       <abstract>The correct model response in the face of uncertainty is to abstain from answering a question so as not to mislead the user. In this work, we study the ability of LLMs to abstain from answering context-dependent science questions when provided insufficient or incorrect context. We probe model sensitivity in several settings: removing gold context, replacing gold context with irrelevant context, and providing additional context beyond what is given. In experiments on four QA datasets with six LLMs, we show that performance varies greatly across models, across the type of context provided, and also by question type; in particular, many LLMs seem unable to abstain from answering boolean questions using standard QA prompts. Our analysis also highlights the unexpected impact of abstention performance on QA task accuracy. Counter-intuitively, in some settings, replacing gold context with irrelevant context or adding irrelevant context to gold context can improve abstention performance in a way that results in improvements in task performance. Our results imply that changes are needed in QA dataset design and evaluation to more effectively assess the correctness and downstream impacts of model abstention.</abstract>
       <url hash="bba30001">2024.findings-emnlp.197</url>
@@ -22147,7 +22147,7 @@
       <title>Plausibly Problematic Questions in Multiple-Choice Benchmarks for Commonsense Reasoning</title>
       <author><first>Shramay</first><last>Palta</last><affiliation>University of Maryland</affiliation></author>
       <author><first>Nishant</first><last>Balepur</last><affiliation>University of Maryland</affiliation></author>
-      <author><first>Peter</first><last>Rankel</last><affiliation>University of Maryland</affiliation></author>
+      <author id="peter-a-rankel"><first>Peter</first><last>Rankel</last><affiliation>University of Maryland</affiliation></author>
       <author><first>Sarah</first><last>Wiegreffe</last><affiliation>Allen Institute for AI</affiliation></author>
       <author><first>Marine</first><last>Carpuat</last><affiliation>University of Maryland</affiliation></author>
       <author><first>Rachel</first><last>Rudinger</last><affiliation>University of Maryland</affiliation></author>
@@ -22177,7 +22177,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <author><first>Li</first><last>Siyan</last><affiliation>Columbia University</affiliation></author>
       <author><first>Teresa</first><last>Shao</last></author>
       <author><first>Zhou</first><last>Yu</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
       <pages>3492-3511</pages>
       <abstract>Dialogue systems have been used as conversation partners in English learning, but few have studied whether these systems improve learning outcomes. Student passion and perseverance, or grit, has been associated with language learning success. Recent work establishes that as students perceive their English teachers to be more supportive, their grit improves. Hypothesizing that the same pattern applies to English-teaching chatbots, we create EDEN, a robust open-domain chatbot for spoken conversation practice that provides empathetic feedback. To construct EDEN, we first train a specialized spoken utterance grammar correction model and a high-quality social chit-chat conversation model. We then conduct a preliminary user study with a variety of strategies for empathetic feedback. Our experiment suggests that using adaptive empathetic feedback leads to higher *perceived affective support*. Furthermore, elements of perceived affective support positively correlate with student grit.</abstract>
       <url hash="29b483bf">2024.findings-emnlp.200</url>
@@ -22223,7 +22223,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
     </paper>
     <paper id="204">
       <title>Zero-Resource Hallucination Prevention for Large Language Models</title>
-      <author id="junyu-luo"><first>Junyu</first><last>Luo</last><affiliation>ByteDance Inc.</affiliation></author>
+      <author><first>Junyu</first><last>Luo</last><affiliation>ByteDance Inc.</affiliation></author>
       <author><first>Cao</first><last>Xiao</last><affiliation>GE Healthcare</affiliation></author>
       <author><first>Fenglong</first><last>Ma</last><affiliation>Pennsylvania State University</affiliation></author>
       <pages>3586-3602</pages>
@@ -22243,7 +22243,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <author><first>Khyathi</first><last>Chandu</last></author>
       <author><first>Vivek</first><last>Srikumar</last><affiliation>University of Utah</affiliation></author>
       <author><first>Sameer</first><last>Singh</last><affiliation>University of California, Irvine and Allen Institute for Artificial Intelligence</affiliation></author>
-      <author><first>Noah A.</first><last>Smith</last><affiliation>University of Washington and Allen Institute for Artificial Intelligence</affiliation></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last><affiliation>University of Washington and Allen Institute for Artificial Intelligence</affiliation></author>
       <pages>3603-3623</pages>
       <abstract>The inevitable appearance of spurious correlations in training datasets hurts the generalization of NLP models on unseen data. Previous work has found that datasets with paired inputs are prone to correlations between a specific part of the input (e.g., the hypothesis in NLI) and the label; consequently, models trained only on those outperform chance. Are these correlations picked up by models trained on the full input data? To address this question, we propose a new evaluation method, Counterfactual Attentiveness Test (CAT). CAT uses counterfactuals by replacing part of the input with its counterpart from a different example (subject to some restrictions), expecting an attentive model to change its prediction. Using CAT, we systematically investigate established supervised and in-context learning models on ten datasets spanning four tasks: natural language inference, reading comprehension, paraphrase detection, and visual &amp; language reasoning. CAT reveals that reliance on such correlations is mainly data-dependent. Surprisingly, we find that GPT3 becomes less attentive with an increased number of demonstrations, while its accuracy on the test data improves. Our results demonstrate that augmenting training or demonstration data with counterfactuals is effective in improving models’ attentiveness. We show that models’ attentiveness measured by CAT reveals different conclusions from solely measuring correlations in data.</abstract>
       <url hash="f364d01b">2024.findings-emnlp.205</url>
@@ -22310,7 +22310,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <author><first>Guoruizhe</first><last>Sun</last><affiliation>Iowa State University</affiliation></author>
       <author><first>Runlong</first><last>Zhang</last></author>
       <author><first>Chenyu</first><last>Xu</last></author>
-      <author><first>Forrest Sheng</first><last>Bao</last><affiliation>Vectara, Inc.</affiliation></author>
+      <author id="forrest-bao"><first>Forrest Sheng</first><last>Bao</last><affiliation>Vectara, Inc.</affiliation></author>
       <pages>3689-3702</pages>
       <abstract>Summarization is an important application of Large Language Models (LLMs). When judging the quality of a summary, factual consistency holds a significant weight. Despite numerous efforts dedicated to building factual inconsistency detectors, the exploration of explanability remains limited among existing effort. In this study, we incorporate both human-annotated and model-generated natural language explanations elucidating how a summary deviates and thus becomes inconsistent with its source article. We build our explanation-augmented dataset on top of the widely used SummaC summarization consistency benchmark. Additionally, we develop an inconsistency detector that is jointly trained with the collected explanations. Our findings demonstrate that integrating explanations during training not only enables the model to provide rationales for its judgments but also enhances its accuracy significantly.</abstract>
       <url hash="77cac94e">2024.findings-emnlp.210</url>
@@ -22331,8 +22331,8 @@ and high variation in performance on the subset, suggesting our plausibility cri
     <paper id="212">
       <title>Deciphering the Factors Influencing the Efficacy of Chain-of-Thought: Probability, Memorization, and Noisy Reasoning</title>
       <author><first>Akshara</first><last>Prabhakar</last><affiliation>Salesforce Research</affiliation></author>
-      <author><first>Thomas L.</first><last>Griffiths</last><affiliation>Princeton University</affiliation></author>
-      <author><first>R. Thomas</first><last>McCoy</last><affiliation>Yale University</affiliation></author>
+      <author id="thomas-l-griffiths"><first>Thomas L.</first><last>Griffiths</last><affiliation>Princeton University</affiliation></author>
+      <author id="r-thomas-mccoy"><first>R. Thomas</first><last>McCoy</last><affiliation>Yale University</affiliation></author>
       <pages>3710-3724</pages>
       <abstract>Chain-of-Thought (CoT) prompting has been shown to enhance the multi-step reasoning capabilities of Large Language Models (LLMs). However, debates persist about whether LLMs exhibit *abstract generalization* or rely on *shallow heuristics* when given CoT prompts. To understand the factors influencing CoT reasoning we provide a detailed case study of the symbolic reasoning task of decoding shift ciphers, where letters are shifted forward some number of steps in the alphabet. We analyze the pattern of results produced by three LLMs—GPT-4, Claude 3, and Llama 3.1—performing this task using CoT prompting. By focusing on a single relatively simple task, we are able to identify three factors that systematically affect CoT performance: the probability of the task’s expected output (probability), what the model has implicitly learned during pre-training (memorization), and the number of intermediate operations involved in reasoning (noisy reasoning). We show that these factors can drastically influence task accuracy across all three LLMs; e.g., when tested with GPT-4, varying the output’s probability of occurrence shifts accuracy from 26% to 70%. Overall, we conclude that CoT prompting performance reflects both memorization and a probabilistic version of genuine reasoning.</abstract>
       <url hash="9c550d45">2024.findings-emnlp.212</url>
@@ -22347,7 +22347,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <author><first>Isabelle</first><last>Lee</last><affiliation>University of Southern California</affiliation></author>
       <author><first>Yongkang</first><last>Du</last><affiliation>Pennsylvania State University</affiliation></author>
       <author><first>Rahul</first><last>Gupta</last></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <author><first>Jieyu</first><last>Zhao</last><affiliation>University of Southern California</affiliation></author>
       <pages>3725-3742</pages>
       <abstract>In a plethora of recent work, large language models (LLMs) demonstrated impressive reasoning ability, but many proposed downstream reasoning tasks only focus on performance-wise evaluation. Two fundamental questions persist: 1) how consistent is the reasoning, and 2) can models detect unreliable reasoning? In this paper, we investigate self-contradictory (Self-Contra) reasoning, where the model reasoning does not support answers. To answer 1), we define and assess the Self-Contra rate across three datasets and delve into finer-grained categories of Self-Contra reasoning. We find that LLMs often contradict themselves in reasoning tasks involving contextual information understanding or commonsense. The model may generate correct answers by taking shortcuts in reasoning or overlooking contextual evidence, leading to compromised reasoning. For 2), we task the state-of-the-art model GPT-4 with identifying Self-Contra reasoning and finer-grained fallacies. We find that finer-grained aided detection can improve GPT-4’s ability to detect Self-Contra. However, it is only able to detect Self-Contra with a 52.2% F1 score, much lower compared to 66.7% for humans. Our results indicate that current LLMs lack the robustness necessary for reliable reasoning and we emphasize the urgent need for establishing best practices in comprehensive reasoning evaluations beyond pure performance-based metrics.</abstract>
@@ -22441,7 +22441,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <author><first>Yichao</first><last>Zhou</last><affiliation>Google</affiliation></author>
       <author><first>James Bradley</first><last>Wendt</last><affiliation>Research, Google</affiliation></author>
       <author><first>Beliz</first><last>Gunel</last><affiliation>Google DeepMind</affiliation></author>
-      <author><first>Nguyen</first><last>Vo</last><affiliation>Research, Google</affiliation></author>
+      <author id="nguyen-vo"><first>Nguyen</first><last>Vo</last><affiliation>Research, Google</affiliation></author>
       <author><first>Jing</first><last>Xie</last><affiliation>Google</affiliation></author>
       <author><first>Sandeep</first><last>Tata</last><affiliation>Google</affiliation></author>
       <pages>3830-3842</pages>
@@ -22516,7 +22516,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <title><fixed-case>L</fixed-case>ingu<fixed-case>A</fixed-case>lchemy: Fusing Typological and Geographical Elements for Unseen Language Generalization</title>
       <author><first>Muhammad Farid</first><last>Adilazuarda</last></author>
       <author><first>Samuel</first><last>Cahyawijaya</last></author>
-      <author><first>Genta Indra</first><last>Winata</last><affiliation>Capital One</affiliation></author>
+      <author id="genta-indra-winata"><first>Genta Indra</first><last>Winata</last><affiliation>Capital One</affiliation></author>
       <author><first>Ayu</first><last>Purwarianti</last><affiliation>Institut Teknologi Bandung</affiliation></author>
       <author><first>Alham Fikri</first><last>Aji</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and Amazon</affiliation></author>
       <pages>3912-3928</pages>
@@ -22647,7 +22647,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <author><first>Dong</first><last>Yan</last></author>
       <author><first>Tao</first><last>Gui</last><affiliation>Fudan University</affiliation></author>
       <author><first>Qi</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>4041-4064</pages>
       <abstract>In Reinforcement Learning from Human Feedback (RLHF), the reward model plays a crucial role in aligning language model outputs with human values. The human preference data used to train the reward model consists of a prompt and a response pair, with humans annotating which response better aligns with human value preferences. Due to the complexity and subjectivity of the annotation task, multiple organizations including OpenAI and Anthropic report significant noise in the human preference datasets, leading to instability and deviation in reward model training from human values. We discover that the difference in scores assigned to response pairs by the reward model effectively indicates the quality of data, and data of varying qualities show significant distinctions in reward model training. We introduce a method that automatically adjusts reward modeling based on data quality, reducing the impact of noise and making full use of dataset. Experiments on multiple human preference datasets demonstrate that our method stabilizes reward model training and significantly enhances the alignment performance of RLHF.</abstract>
       <url hash="204f318e">2024.findings-emnlp.234</url>
@@ -22780,7 +22780,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <author><first>Dezhi</first><last>Peng</last><affiliation>South China University of Technology</affiliation></author>
       <author><first>Peirong</first><last>Zhang</last></author>
       <author><first>Yongxin</first><last>Shi</last><affiliation>South China University of Technology</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>South China University of Technology</affiliation></author>
+      <author><first>Yang</first><last>Liu</last><affiliation>South China University of Technology</affiliation></author>
       <author><first>Kai</first><last>Ding</last><affiliation>INTSIG Information</affiliation></author>
       <author><first>Lianwen</first><last>Jin</last><affiliation>South China University of Technology</affiliation></author>
       <pages>4196-4210</pages>
@@ -22842,7 +22842,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <author><first>Chen</first><last>Huang</last><affiliation>Sichuan University</affiliation></author>
       <author><first>Yang</first><last>Deng</last><affiliation>Singapore Management University</affiliation></author>
       <author><first>Wenqiang</first><last>Lei</last><affiliation>Sichuan University</affiliation></author>
-      <author><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
       <pages>4264-4282</pages>
       <abstract>With the aid of large language models, current conversational recommender system (CRS) has gaining strong abilities to persuade users to accept recommended items. While these CRSs are highly persuasive, they can mislead users by incorporating incredible information in their explanations, ultimately damaging the long-term trust between users and the CRS. To address this, we propose a simple yet effective method, called PC-CRS, to enhance the credibility of CRS’s explanations during persuasion. It guides the explanation generation through our proposed credibility-aware persuasive strategies and then gradually refines explanations via post-hoc self-reflection. Experimental results demonstrate the efficacy of PC-CRS in promoting persuasive and credible explanations. Further analysis reveals the reason behind current methods producing incredible explanations and the potential of credible explanations to improve recommendation accuracy.</abstract>
       <url hash="dbff1001">2024.findings-emnlp.247</url>
@@ -22927,7 +22927,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <author><first>Shivam</first><last>Sharma</last><affiliation>Indian Institute of Technology, Delhi</affiliation></author>
       <author><first>Rui</first><last>Cao</last><affiliation>University of Cambridge</affiliation></author>
       <author><first>Palash</first><last>Nandi</last></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <author><first>Tanmoy</first><last>Chakraborty</last><affiliation>Indian Institute of Technology, Delhi</affiliation></author>
       <author><first>Roy Ka-Wei</first><last>Lee</last><affiliation>Singapore University of Technology and Design and University of Saskatchewan</affiliation></author>
       <pages>4407-4419</pages>
@@ -23125,7 +23125,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <author><first>Yi</first><last>Bin</last></author>
       <author><first>Junhua</first><last>Liu</last><affiliation>Singapore University of Technology and Design and Forth AI</affiliation></author>
       <author><first>Yang</first><last>Yang</last><affiliation>University of Electronic Science and Technology of China</affiliation></author>
-      <author><first>See-Kiong</first><last>Ng</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="see-kiong-ng"><first>See-Kiong</first><last>Ng</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Lidong</first><last>Bing</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Roy Ka-Wei</first><last>Lee</last><affiliation>Singapore University of Technology and Design and University of Saskatchewan</affiliation></author>
       <pages>4663-4680</pages>
@@ -23179,7 +23179,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <author><first>Xuelin</first><last>Liu</last></author>
       <author><first>Yanfei</first><last>Zhu</last></author>
       <author><first>Shucheng</first><last>Zhu</last><affiliation>Tsinghua University, Tsinghua University</affiliation></author>
-      <author><first>Pengyuan</first><last>Liu</last><affiliation>Beijing Language and Culture University</affiliation></author>
+      <author id="pengyuan-liu"><first>Pengyuan</first><last>Liu</last><affiliation>Beijing Language and Culture University</affiliation></author>
       <author><first>Ying</first><last>Liu</last><affiliation>Tsinghua University, Tsinghua University</affiliation></author>
       <author><first>Dong</first><last>Yu</last></author>
       <pages>4740-4760</pages>
@@ -23257,7 +23257,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
     <paper id="278">
       <title><fixed-case>QRM</fixed-case>e<fixed-case>M</fixed-case>: Unleash the Length Limitation through Question then Reflection Memory Mechanism</title>
       <author><first>Bo</first><last>Wang</last><affiliation>School of Computer Science &amp; Technology, Beijing Institute of Technology</affiliation></author>
-      <author><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <author><first>Yixin</first><last>Cao</last><affiliation>Fudan University</affiliation></author>
       <author><first>Jiahao</first><last>Ying</last></author>
       <author><first>Wei</first><last>Tang</last></author>
@@ -23327,7 +23327,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
     </paper>
     <paper id="283">
       <title>Generating Media Background Checks for Automated Source Critical Reasoning</title>
-      <author><first>Michael Sejr</first><last>Schlichtkrull</last><affiliation>Queen Mary, University of London</affiliation></author>
+      <author id="michael-schlichtkrull"><first>Michael Sejr</first><last>Schlichtkrull</last><affiliation>Queen Mary, University of London</affiliation></author>
       <pages>4927-4947</pages>
       <abstract>Not everything on the internet is true. This unfortunate fact requires both humans and models to perform complex reasoning about credibility when working with retrieved information. In NLP, this problem has seen little attention. Indeed, retrieval-augmented models are not typically expected to distrust retrieved documents. Human experts overcome the challenge by gathering signals about the context, reliability, and tendency of source documents - that is, they perform *source criticism*. We propose a novel NLP task focused on finding and summarising such signals. We introduce a new dataset of 6,709 “media background checks” derived from Media Bias / Fact Check, a volunteer-run website documenting media bias. We test open-source and closed-source LLM baselines with and without retrieval on this dataset, finding that retrieval greatly improves performance. We furthermore carry out human evaluation, demonstrating that 1) media background checks are helpful for humans, and 2) media background checks are helpful for retrieval-augmented models.</abstract>
       <url hash="4e7b295d">2024.findings-emnlp.283</url>
@@ -23440,7 +23440,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <title>Learning to Ask Informative Questions: Enhancing <fixed-case>LLM</fixed-case>s with Preference Optimization and Expected Information Gain</title>
       <author><first>Davide</first><last>Mazzaccara</last></author>
       <author><first>Alberto</first><last>Testoni</last><affiliation>University of Amsterdam</affiliation></author>
-      <author><first>Raffaella</first><last>Bernardi</last><affiliation>University of Trento</affiliation></author>
+      <author id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last><affiliation>University of Trento</affiliation></author>
       <pages>5064-5074</pages>
       <abstract>Questions are essential tools for acquiring the necessary information to complete information-seeking tasks. However, large language models (LLMs), especially open-source models, often perform poorly in generating informative questions, as measured by expected information gain (EIG). In this paper, we propose a method to enhance the informativeness of LLM-generated questions in 20-question game dialogues. We sample multiple questions from the same model (LLaMA 2-Chat 7B) for each game and create pairs of low-EIG and high-EIG questions to apply a Direct Preference Optimization (DPO) algorithm. Our results show that this method produces more effective questions (in terms of EIG), even in domains different from those used to train the DPO model.</abstract>
       <url hash="06cfce12">2024.findings-emnlp.291</url>
@@ -23487,7 +23487,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <title>A Simple but Effective Approach to Improve Structured Language Model Output for Information Extraction</title>
       <author><first>Yinghao</first><last>Li</last></author>
       <author><first>Rampi</first><last>Ramprasad</last><affiliation>Georgia Institute of Technology</affiliation></author>
-      <author><first>Chao</first><last>Zhang</last><affiliation>Georgia Institute of Technology</affiliation></author>
+      <author id="chao-zhang-tu"><first>Chao</first><last>Zhang</last><affiliation>Georgia Institute of Technology</affiliation></author>
       <pages>5133-5148</pages>
       <abstract>Large language models (LLMs) have demonstrated impressive abilities in generating unstructured natural language according to instructions. However, their performance can be inconsistent when tasked with producing text that adheres to specific structured formats, which is crucial in applications like named entity recognition (NER) or relation extraction (RE). To address this issue, this paper introduces an efficient method, G&amp;O, to enhance their structured text generation capabilities. It breaks the generation into a two-step pipeline: initially, LLMs generate answers in natural language as intermediate responses. Subsequently, LLMs are asked to organize the output into the desired structure, using the intermediate responses as context. G&amp;O effectively separates the generation of content from the structuring process, reducing the pressure of completing two orthogonal tasks simultaneously. Tested on zero-shot NER and RE, the results indicate a significant improvement in LLM performance with minimal additional efforts. This straightforward and adaptable prompting technique can also be combined with other strategies, like self-consistency, to further elevate LLM capabilities in various structured text generation tasks.</abstract>
       <url hash="a84fce98">2024.findings-emnlp.295</url>
@@ -23540,7 +23540,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
     <paper id="299">
       <title>From Test-Taking to Test-Making: Examining <fixed-case>LLM</fixed-case> Authoring of Commonsense Assessment Items</title>
       <author><first>Melissa</first><last>Roemmele</last><affiliation>Midjourney</affiliation></author>
-      <author><first>Andrew</first><last>Gordon</last><affiliation>University of Southern California</affiliation></author>
+      <author id="andrew-gordon"><first>Andrew</first><last>Gordon</last><affiliation>University of Southern California</affiliation></author>
       <pages>5193-5203</pages>
       <abstract>LLMs can now perform a variety of complex writing tasks. They also excel in answering questions pertaining to natural language inference and commonsense reasoning. Composing these questions is itself a skilled writing task, so in this paper we consider LLMs as authors of commonsense assessment items. We prompt LLMs to generate items in the style of a prominent benchmark for commonsense reasoning, the Choice of Plausible Alternatives (COPA). We examine the outcome according to analyses facilitated by the LLMs and human annotation. We find that LLMs that succeed in answering the original COPA benchmark are also more successful in authoring their own items.</abstract>
       <url hash="461fee95">2024.findings-emnlp.299</url>
@@ -23781,7 +23781,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
     <paper id="317">
       <title>Dual-teacher Knowledge Distillation for Low-frequency Word Translation</title>
       <author><first>Yifan</first><last>Guo</last></author>
-      <author><first>Hongying</first><last>Zan</last><affiliation>Zhengzhou University</affiliation></author>
+      <author id="hongying-zan"><first>Hongying</first><last>Zan</last><affiliation>Zhengzhou University</affiliation></author>
       <author><first>Hongfei</first><last>Xu</last><affiliation>Zhengzhou University</affiliation></author>
       <pages>5543-5552</pages>
       <abstract>Neural Machine Translation (NMT) models are trained on parallel corpora with unbalanced word frequency distribution. As a result, NMT models are likely to prefer high-frequency words than low-frequency ones despite low-frequency word may carry the crucial semantic information, which may hamper the translation quality once they are neglected. The objective of this study is to enhance the translation of meaningful but low-frequency words. Our general idea is to optimize the translation of low-frequency words through knowledge distillation. Specifically, we employ a low-frequency teacher model that excels in translating low-frequency words to guide the learning of the student model. To remain the translation quality of high-frequency words, we further introduce a dual-teacher distillation framework, leveraging both the low-frequency and high-frequency teacher models to guide the student model’s training. Our single-teacher distillation method already achieves a +0.64 BLEU improvements over the state-of-the-art method on the WMT 16 English-to-German translation task on the low-frequency test set. While our dual-teacher framework leads to +0.87, +1.24, +0.47, +0.87 and +0.86 BLEU improvements on the IWSLT 14 German-to-English, WMT 16 English-to-German, WMT 15 English-to-Czech, WMT 14 English-to-French and WMT 18 Chinese-to-English tasks respectively compared to the baseline, while maintaining the translation performance of high-frequency words.</abstract>
@@ -23855,7 +23855,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
     </paper>
     <paper id="323">
       <title><fixed-case>M</fixed-case>ath<fixed-case>F</fixed-case>ish: Evaluating Language Model Math Reasoning via Grounding in Educational Curricula</title>
-      <author><first>Li</first><last>Lucy</last><affiliation>Allen Institute for Artificial Intelligence and University of California Berkeley</affiliation></author>
+      <author id="li-lucy"><first>Li</first><last>Lucy</last><affiliation>Allen Institute for Artificial Intelligence and University of California Berkeley</affiliation></author>
       <author><first>Tal</first><last>August</last></author>
       <author><first>Rose E</first><last>Wang</last><affiliation>Stanford University</affiliation></author>
       <author><first>Luca</first><last>Soldaini</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
@@ -23883,7 +23883,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <author><first>Zhige</first><last>Huang</last><affiliation>Shanghai Jiaotong University</affiliation></author>
       <author><first>Haoan</first><last>Jin</last></author>
       <author><first>Mengyue</first><last>Wu</last></author>
-      <author><first>Kenny Q.</first><last>Zhu</last><affiliation>University of Texas at Arlington</affiliation></author>
+      <author id="kenny-zhu"><first>Kenny Q.</first><last>Zhu</last><affiliation>University of Texas at Arlington</affiliation></author>
       <pages>5689-5698</pages>
       <abstract>Reconstructing ancient Chinese pronunciation is a challenging task due to the scarcity of phonetic records. Different from historical linguistics’ comparative approaches, we reformulate this problem into a temporal prediction task with masked language models, digitizing existing phonology rules into ACP (Ancient Chinese Phonology) dataset of 70,943 entries for 17,001 Chinese characters. Utilizing this dataset and Chinese character glyph information, our transformer-based model demonstrates superior performance on a series of reconstruction tasks, with or without prior phonological knowledge on the target historical period. Our work significantly advances the digitization and computational reconstruction of ancient Chinese phonology, providing a more complete and temporally contextualized resource for computational linguistics and historical research. The dataset and model training code are publicly available.</abstract>
       <url hash="79b54e19">2024.findings-emnlp.325</url>
@@ -23988,9 +23988,9 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <author><first>Hui</first><last>Huang</last></author>
       <author><first>Yingqi</first><last>Qu</last></author>
       <author><first>Jing</first><last>Liu</last><affiliation>Baidu</affiliation></author>
-      <author><first>Muyun</first><last>Yang</last></author>
+      <author id="muyun-yang"><first>Muyun</first><last>Yang</last></author>
       <author><first>Bing</first><last>Xu</last></author>
-      <author><first>Tiejun</first><last>Zhao</last><affiliation>Harbin Institute of Technology</affiliation></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <author><first>Wenpeng</first><last>Lu</last><affiliation>Qilu University of Technology</affiliation></author>
       <pages>5813-5820</pages>
       <abstract>The proliferation of open-source Large Language Models (LLMs) underscores the pressing need for evaluation methods. Existing works primarily rely on external evaluators, focusing on training and prompting strategies. However, a crucial aspect – model-aware glass-box features – is overlooked. In this study, we explore the utility of glass-box features under the scenario of self-evaluation, namely applying an LLM to evaluate its own output. We investigate various glass-box feature groups and discovered that the softmax distribution serves as a reliable quality indicator for self-evaluation. Experimental results on public benchmarks validate the feasibility of self-evaluation of LLMs using glass-box features.</abstract>
@@ -24275,7 +24275,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <title>Toeing the Party Line: Election Manifestos as a Key to Understand Political Discourse on <fixed-case>T</fixed-case>witter</title>
       <author><first>Maximilian</first><last>Maurer</last><affiliation>GESIS Leibniz Institute for the Social Sciences</affiliation></author>
       <author><first>Tanise</first><last>Ceron</last></author>
-      <author><first>Sebastian</first><last>Padó</last><affiliation>University of Stuttgart, Universität Stuttgart</affiliation></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last><affiliation>University of Stuttgart, Universität Stuttgart</affiliation></author>
       <author><first>Gabriella</first><last>Lapesa</last><affiliation>GESIS – Leibniz Institute for the Social Sciences and Heinrich-Heine University Düsseldorf</affiliation></author>
       <pages>6115-6130</pages>
       <abstract>Political discourse on Twitter is a moving target: politicians continuously make statements about their positions. It is therefore crucial to track their discourse on social media to understand their ideological positions and goals. However, Twitter data is also challenging to work with since it is ambiguous and often dependent on social context, and consequently, recent work on political positioning has tended to focus strongly on manifestos (parties’ electoral programs) rather than social media.In this paper, we extend recently proposed methods to predict pairwise positional similarities between parties from the manifesto case to the Twitter case, using hashtags as a signal to fine-tune text representations, without the need for manual annotation. We verify the efficacy of fine-tuning and conduct a series of experiments that assess the robustness of our method for low-resource scenarios. We find that our method yields stable positionings reflective of manifesto positionings, both in scenarios with all tweets of candidates across years available and when only smaller subsets from shorter time periods are available. This indicates that it is possible to reliably analyze the relative positioning of actors without the need for manual annotation, even in the noisier context of social media.</abstract>
@@ -24336,7 +24336,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <title><fixed-case>FANTA</fixed-case>stic <fixed-case>SE</fixed-case>quences and Where to Find Them: Faithful and Efficient <fixed-case>API</fixed-case> Call Generation through State-tracked Constrained Decoding and Reranking</title>
       <author><first>Zhuoer</first><last>Wang</last></author>
       <author><first>Leonardo F. R.</first><last>Ribeiro</last><affiliation>Amazon</affiliation></author>
-      <author><first>Alexandros</first><last>Papangelis</last><affiliation>Apple</affiliation></author>
+      <author id="alexandros-papangelis"><first>Alexandros</first><last>Papangelis</last><affiliation>Apple</affiliation></author>
       <author><first>Rohan</first><last>Mukherjee</last></author>
       <author><first>Tzu-Yen</first><last>Wang</last><affiliation>Amazon</affiliation></author>
       <author><first>Xinyan</first><last>Zhao</last><affiliation>Amazon</affiliation></author>
@@ -24431,7 +24431,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
     <paper id="366">
       <title>Unveiling the Invisible: Captioning Videos with Metaphors</title>
       <author><first>Abisek</first><last>Rajakumar Kalarani</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
       <author><first>Sumit</first><last>Shekhar</last></author>
       <pages>6306-6320</pages>
       <abstract>Metaphors are a common communication tool used in our day-to-day life. The detection and generation of metaphors in textual form have been studied extensively but metaphors in other forms have been under-explored. Recent studies have shown that Vision-Language (VL) models cannot understand visual metaphors in memes and adverts. As of now, no probing studies have been done that involve complex language phenomena like metaphors with videos. Hence, we introduce a new VL task of describing the metaphors present in the videos in our work. To facilitate this novel task, we construct and release a manually created dataset with 705 videos and 2115 human-written captions, along with a new metric called Average Concept Distance (ACD), to automatically evaluate the creativity of the metaphors generated. We also propose a novel low-resource video metaphor captioning system: GIT-LLaVA, which obtains comparable performance to SoTA video language models on the proposed task. We perform a comprehensive analysis of existing video language models on this task and publish our dataset, models, and benchmark results to enable further research.</abstract>
@@ -24484,7 +24484,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
     <paper id="370">
       <title>Comparing Edge-based and Node-based Methods on a Citation Prediction Task</title>
       <author><first>Peter</first><last>Vickers</last></author>
-      <author><first>Kenneth</first><last>Church</last><affiliation>Northeastern University</affiliation></author>
+      <author id="kenneth-church"><first>Kenneth</first><last>Church</last><affiliation>Northeastern University</affiliation></author>
       <pages>6369-6388</pages>
       <abstract>Citation Prediction, estimating whether paper a cites paper b, is particularly interesting in a forecasting setting where the model is trained on papers published before time t, and evaluated on papers published after h, where h is the forecast horizon. Performance improves with t (larger training sets) and degrades with h (longer forecast horizons). The trade-off between edge-based methods and node-based methods depends on t. Because edges grow faster than nodes, larger training sets favor edge-based methods.We introduce a new forecast-based Citation Prediction benchmark of 3 million papers to quantify these trends.Our benchmark shows that desirable policies for combining edge- and node-based methods depend on h and t.We release our benchmark, evaluation scripts, and embeddings.</abstract>
       <url hash="4e635077">2024.findings-emnlp.370</url>
@@ -24517,7 +24517,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <author><first>Kate</first><last>Thompson</last></author>
       <author><first>Akshay</first><last>Chaturvedi</last><affiliation>IRIT, Toulouse, France</affiliation></author>
       <author><first>Julie</first><last>Hunter</last><affiliation>LINAGORA</affiliation></author>
-      <author><first>Nicholas</first><last>Asher</last><affiliation>CNRS</affiliation></author>
+      <author id="nicholas-asher"><first>Nicholas</first><last>Asher</last><affiliation>CNRS</affiliation></author>
       <pages>6418-6430</pages>
       <abstract>This paper provides the first discourse parsing experiments with a large language model (LLM) finetuned on corpora annotated in the style of SDRT (Segmented Discourse Representation Theory, Asher (1993), Asher and Lascarides (2003)). The result is a discourse parser, Llamipa (Llama Incremental Parser), that leverages discourse context, leading to substantial performance gains over approaches that use encoder-only models to provide local, context-sensitive representations of discourse units. Furthermore, it is able to process discourse data incrementally, which is essential for the eventual use of discourse information in downstream tasks.</abstract>
       <url hash="6558757b">2024.findings-emnlp.373</url>
@@ -24528,7 +24528,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <title>Nebula: A discourse aware <fixed-case>M</fixed-case>inecraft Builder</title>
       <author><first>Akshay</first><last>Chaturvedi</last><affiliation>IRIT, Toulouse, France</affiliation></author>
       <author><first>Kate</first><last>Thompson</last></author>
-      <author><first>Nicholas</first><last>Asher</last><affiliation>CNRS</affiliation></author>
+      <author id="nicholas-asher"><first>Nicholas</first><last>Asher</last><affiliation>CNRS</affiliation></author>
       <pages>6431-6443</pages>
       <abstract>When engaging in collaborative tasks, humans efficiently exploit the semantic structure of a conversation to optimize verbal and nonverbal interactions. But in recent “language to code” or “language to action” models, this information is lacking. We show how incorporating the prior discourse and nonlinguistic context of a conversation situated in a nonlinguistic environment can improve the “language to action” component of such interactions. We finetune an LLM to predict actions based on prior context; our model, Nebula, doubles the net-action F1 score over the baseline on this task of Jayannavar et al. (2020). We also investigate our model’s ability to construct shapes and understand location descriptions using a synthetic dataset.</abstract>
       <url hash="d4ea01f5">2024.findings-emnlp.374</url>
@@ -24540,7 +24540,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <title>Improving Referring Ability for Biomedical Language Models</title>
       <author><first>Junfeng</first><last>Jiang</last></author>
       <author><first>Fei</first><last>Cheng</last><affiliation>Kyoto University</affiliation></author>
-      <author><first>Akiko</first><last>Aizawa</last><affiliation>NII, Tokyo Institute of Technology</affiliation></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last><affiliation>NII, Tokyo Institute of Technology</affiliation></author>
       <pages>6444-6457</pages>
       <abstract>Existing auto-regressive large language models (LLMs) are primarily trained using documents from general domains. In the biomedical domain, continual pre-training is a prevalent method for domain adaptation to inject professional knowledge into powerful LLMs that have been pre-trained in general domains. Previous studies typically conduct standard pre-training by randomly packing multiple documents into a long pre-training sequence. Recently, some existing works suggest that enhancing the relatedness of documents within the same pre-training sequence may be advantageous. However, these studies primarily focus on general domains, which cannot be readily applied in the biomedical domain where the distinction of fine-grained topics is harder. Is it possible to further improve the pre-training for biomedical language models (LMs) using exactly the same corpus? In this paper, we explore an improved approach to continual pre-training, which is a prevalent method for domain adaptation, by utilizing information from the citation network in this challenging scenario. Empirical studies demonstrate that our proposed LinkLM data improves both the intra-sample and inter-sample referring abilities of auto-regressive LMs in the biomedical domain, encouraging more profound consideration of task-specific pre-training sequence design for continual pre-training.</abstract>
       <url hash="eaf29ef6">2024.findings-emnlp.375</url>
@@ -24577,7 +24577,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <title>Exploring the Limits of Fine-grained <fixed-case>LLM</fixed-case>-based Physics Inference via Premise Removal Interventions</title>
       <author><first>Jordan</first><last>Meadows</last></author>
       <author><first>Tamsin Emily</first><last>James</last><affiliation>University of Birmingham</affiliation></author>
-      <author><first>Andre</first><last>Freitas</last><affiliation>Idiap Research Institute and University of Manchester</affiliation></author>
+      <author id="andre-freitas"><first>Andre</first><last>Freitas</last><affiliation>Idiap Research Institute and University of Manchester</affiliation></author>
       <pages>6487-6502</pages>
       <abstract>Language models (LMs) can hallucinate when performing complex mathematical reasoning. Physics provides a rich domain for assessing their mathematical capabilities, where physical context requires that any symbolic manipulation satisfies complex semantics (<i>e.g.,</i> units, tensorial order). In this work, we systematically remove crucial context from prompts to force instances where model inference may be algebraically coherent, yet unphysical. We assess LM capabilities in this domain using a curated dataset encompassing multiple notations and Physics subdomains. Further, we improve zero-shot scores using synthetic in-context examples, and demonstrate non-linear degradation of derivation quality with perturbation strength via the progressive omission of supporting premises. We find that the models’ mathematical reasoning is not physics-informed in this setting, where physical context is predominantly ignored in favour of reverse-engineering solutions.</abstract>
       <url hash="428e7908">2024.findings-emnlp.378</url>
@@ -24738,7 +24738,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
     <paper id="390">
       <title>Learning Semantic Structure through First-Order-Logic Translation</title>
       <author><first>Akshay</first><last>Chaturvedi</last><affiliation>IRIT, Toulouse, France</affiliation></author>
-      <author><first>Nicholas</first><last>Asher</last><affiliation>CNRS</affiliation></author>
+      <author id="nicholas-asher"><first>Nicholas</first><last>Asher</last><affiliation>CNRS</affiliation></author>
       <pages>6669-6680</pages>
       <abstract>In this paper, we study whether transformer-based language models can extract predicate argument structure from simple sentences. We firstly show that language models sometimes confuse which predicates apply to which objects. To mitigate this, we explore two tasks: question answering (Q/A), and first order logic (FOL) translation, and two regimes, prompting and finetuning. In FOL translation, we finetune several large language models on synthetic datasets designed to gauge their generalization abilities. For Q/A, we finetune encoder models like BERT and RoBERTa and use prompting for LLMs. The results show that FOL translation for LLMs is better suited to learn predicate argument structure.</abstract>
       <url hash="5dd38f05">2024.findings-emnlp.390</url>
@@ -24800,7 +24800,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <author><first>Xinhao</first><last>Chen</last></author>
       <author><first>Hongyi</first><last>Wu</last></author>
       <author><first>Changzhi</first><last>Sun</last></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <author><first>Yuanbin</first><last>Wu</last></author>
       <author><first>Xiaopeng</first><last>Bai</last><affiliation>East China Normal University</affiliation></author>
       <author><first>Shaoguang</first><last>Mao</last><affiliation>Microsoft</affiliation></author>
@@ -24871,7 +24871,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <title>Few-shot clinical entity recognition in <fixed-case>E</fixed-case>nglish, <fixed-case>F</fixed-case>rench and <fixed-case>S</fixed-case>panish: masked language models outperform generative model prompting</title>
       <author><first>Marco</first><last>Naguib</last></author>
       <author><first>Xavier</first><last>Tannier</last><affiliation>LIMICS, UMRS_1142</affiliation></author>
-      <author><first>Aurélie</first><last>Névéol</last><affiliation>LISN-CNRS / Université Paris Saclay</affiliation></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last><affiliation>LISN-CNRS / Université Paris Saclay</affiliation></author>
       <pages>6829-6852</pages>
       <abstract>Large language models (LLMs) have become the preferred solution for many natural language processing tasks. In low-resource environments such as specialized domains, their few-shot capabilities are expected to deliver high performance. Named Entity Recognition (NER) is a critical task in information extraction that is not covered in recent LLM benchmarks. There is a need for better understanding the performance of LLMs for NER in a variety of settings including languages other than English. This study aims to evaluate generative LLMs, employed through prompt engineering, for few-shot clinical NER. We compare 13 auto-regressive models using prompting and 16 masked models using fine-tuning on 14 NER datasets covering English, French and Spanish. While prompt-based auto-regressive models achieve competitive F1 for general NER, they are outperformed within the clinical domain by lighter biLSTM-CRF taggers based on masked models. Additionally, masked models exhibit lower environmental impact compared to auto-regressive models. Findings are consistent across the three languages studied, which suggests that LLM prompting is not yet suited for NER production in the clinical domain.</abstract>
       <url hash="12305569">2024.findings-emnlp.400</url>
@@ -24991,7 +24991,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <author><first>Zheqin</first><last>Yin</last></author>
       <author><first>Xinlin</first><last>Zhuang</last></author>
       <author><first>Xiaopeng</first><last>Bai</last><affiliation>East China Normal University</affiliation></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <pages>6949-6966</pages>
       <abstract>This paper introduces the Chinese Essay Argument Mining Corpus (CEAMC), a manually annotated dataset designed for argument component classification on multiple levels of granularity. Existing argument component types in education remain simplistic and isolated, failing to encapsulate the complete argument information. Originating from authentic examination settings, CEAMC categorizes argument components into 4 coarse-grained and 10 fine-grained delineations, surpassing previous simple representations to capture the subtle nuances of argumentation in the real world, thus meeting the needs of complex and diverse argumentative scenarios. Our contributions include the development of CEAMC, the establishment of baselines for further research, and a thorough exploration of the performance of Large Language Models (LLMs) on CEAMC. The results indicate that our CEAMC can serve as a challenging benchmark for the development of argument analysis in education.</abstract>
       <url hash="07b3969a">2024.findings-emnlp.408</url>
@@ -25014,7 +25014,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <author><first>Keping</first><last>Bi</last><affiliation>Chinese Academy of Sciences</affiliation></author>
       <author><first>Wanqing</first><last>Cui</last></author>
       <author><first>Jiafeng</first><last>Guo</last><affiliation>Institute of Computing Technolgy, Chinese Academy of Sciences</affiliation></author>
-      <author><first>Xueqi</first><last>Cheng</last><affiliation>, Chinese Academy of Sciences</affiliation></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last><affiliation>, Chinese Academy of Sciences</affiliation></author>
       <pages>6985-7000</pages>
       <abstract>Non-Factoid (NF) Question Answering (QA) is challenging to evaluate due to diverse potential answers and no objective criterion. The commonly used automatic evaluation metrics like ROUGE or BERTScore cannot accurately measure semantic similarities or answers from different perspectives. Recently, Large Language Models (LLMs) have been resorted to for NFQA evaluation due to their compelling performance on various NLP tasks. Common approaches include pointwise scoring of each candidate answer and pairwise comparisons between answers. Inspired by the evolution from pointwise to pairwise to listwise in learning-to-rank methods, we propose a novel listwise NFQA evaluation approach, that utilizes LLMs to rank candidate answers in a list of reference answers sorted by descending quality. Moreover, for NF questions that do not have multi-grade or any golden answers, we leverage LLMs to generate the reference answer list of various quality to facilitate the listwise evaluation. Extensive experimental results on three NFQA datasets, i.e., ANTIQUE, the TREC-DL-NF, and WebGLM show that our method has significantly higher correlations with human annotations compared to automatic scores and common pointwise and pairwise approaches.</abstract>
       <url hash="0f7fdcf3">2024.findings-emnlp.410</url>
@@ -25042,7 +25042,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <author><first>Yihong</first><last>Liu</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <author><first>Leonie</first><last>Weissweiler</last><affiliation>University of Texas at Austin</affiliation></author>
       <author><first>Anna</first><last>Korhonen</last><affiliation>University of Cambridge</affiliation></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <pages>7017-7034</pages>
       <abstract>Traditional benchmarking in NLP typically involves using static, held-out test sets and calculating aggregated statistics based on diverse examples. However, this approach often results in an overestimation of performance and lacks the ability to offer comprehensive, interpretable, and dynamic assessments of NLP models. Recently, works like DynaBench and Checklist have addressed these limitations through behavioral testing of NLP models with test types generated by a multi-step human-annotated pipeline. Unfortunately, manually creating a variety of test types requires significant human labor, thus weakening efficiency. In this work, we propose SynthEval, a hybrid behavioral testing framework that leverages large language models (LLMs) to generate a wide range of test types for a comprehensive evaluation of NLP models. The SynthEval framework first generates sentences via LLMs using controlled generation, and then identifies challenging examples by comparing the predictions made by LLMs with task-specific NLP models. In the last stage, human experts investigate the challenging examples, manually design templates, and identify the types of failures the task-specific models consistently exhibit. We apply SynthEval to two classification tasks and show that our framework is effective in identifying weaknesses of strong models on these tasks.</abstract>
       <url hash="aad31703">2024.findings-emnlp.412</url>
@@ -25057,7 +25057,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <author><first>Abdullatif</first><last>Köksal</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <author><first>Lütfi Kerem</first><last>Senel</last><affiliation>The Center for Information and Language Processing</affiliation></author>
       <author><first>Anna</first><last>Korhonen</last><affiliation>University of Cambridge</affiliation></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <pages>7035-7055</pages>
       <abstract>Multiple choice question answering tasks evaluate the reasoning, comprehension, and mathematical abilities of Large Language Models (LLMs). While existing benchmarks employ automatic translation for multilingual evaluation, this approach is error-prone and potentially introduces culturally biased questions, especially in social sciences. We introduce the first multitask, multiple-choice Turkish QA benchmark, TurkishMMLU, to evaluate LLMs’ understanding of the Turkish language. TurkishMMLU includes over 10,000 questions, covering 9 different subjects from Turkish high-school education curricula. These questions are written by curriculum experts, suitable for the high-school curricula in Turkey, covering subjects ranging from natural sciences and math questions to more culturally representative topics such as Turkish Literature and the history of the Turkish Republic. We evaluate over 20 LLMs, including multilingual open-source (e.g., Gemma, Llama, MT5), closed-source (GPT 4o, Claude, Gemini), and Turkish-adapted (e.g., Trendyol) models. We provide an extensive evaluation, including zero-shot and few-shot evaluation of LLMs, chain-of-thought reasoning, and question difficulty analysis along with model performance. We provide an in-depth analysis of the Turkish capabilities and limitations of current LLMs to provide insights for future LLMs for the Turkish language. We publicly release our code for the dataset and evaluation: https://github.com/ArdaYueksel/TurkishMMLU</abstract>
       <url hash="3fac5e1b">2024.findings-emnlp.413</url>
@@ -25070,7 +25070,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <author><first>Abdullatif</first><last>Köksal</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <author><first>Timo</first><last>Schick</last><affiliation>Facebook</affiliation></author>
       <author><first>Anna</first><last>Korhonen</last><affiliation>University of Cambridge</affiliation></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <pages>7056-7078</pages>
       <abstract>Instruction tuning enables language models to more effectively generalize and better follow user intent. However, obtaining instruction data is costly and challenging. Prior work employs methods such as expensive human annotation, crowd-sourced datasets with alignment issues, and generating noisy examples via LLMs. We introduce the LongForm-C dataset, which is created by reverse instructions. We generate instructions via LLMs for human-written corpus examples using reverse instructions. First we select a diverse set of human-written documents from corpora such as C4 and Wikipedia; then we generate instructions for these documents via LLMs. This approach provides a cheaper and cleaner instruction-tuning dataset with natural output and one suitable for long text generation. Our models outperform 10x larger language models without instruction tuning on tasks such as story/recipe generation and long-form question answering. Moreover, LongForm models outperform prior instruction-tuned models such as FLAN-T5 and Alpaca by a large margin, and improve language understanding capabilities further. We publicly release our data and models: [Anonymized-URL].</abstract>
       <url hash="21ef4b22">2024.findings-emnlp.414</url>
@@ -25123,7 +25123,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <author><first>Tao</first><last>Ji</last></author>
       <author><first>Tao</first><last>Gui</last><affiliation>Fudan University</affiliation></author>
       <author><first>Qi</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>7136-7148</pages>
       <abstract>Large language models (LLMs) have achieved impressive performance in numerous domains but often struggle to process lengthy inputs effectively and efficiently due to limited length generalization and attention’s quadratic computational demands. Many sought to mitigate this by restricting the attention window within the pre-trained length. However, these methods introduce new issues such as ignoring the middle context and requiring additional training. To address these problems, we propose LongHeads, a training-free framework that enhances LLM’s long context ability by unlocking multi-head attention’s untapped potential. Instead of allowing each head to attend to the full sentence, which struggles with generalizing to longer sequences due to out-of-distribution (OOD) issues, we allow each head to process in-distribution length by selecting and attending to important context chunks. To this end, we propose a chunk selection strategy that relies on the inherent correlation between the query and the key representations, efficiently distributing context chunks to different heads. In this way, each head ensures it can effectively process attended tokens within the trained length, while different heads in different layers can collectively process longer contexts. LongHeads works efficiently and fits seamlessly with many LLMs that use relative positional encoding. LongHeads achieves 100% accuracy at the 128k length on passkey retrieval task, verifying LongHeads’ efficacy in extending the usable context window for existing models.</abstract>
       <url hash="bee19c3b">2024.findings-emnlp.417</url>
@@ -25163,7 +25163,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <author><first>Garrick</first><last>Sherman</last></author>
       <author><first>Zachary</first><last>Fried</last></author>
       <author><first>João</first><last>Sedoc</last><affiliation>New York University</affiliation></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <author><first>Brenda</first><last>Curtis</last><affiliation>National Institute on Drug Abuse/NIH</affiliation></author>
       <pages>7174-7188</pages>
       <abstract>Large language models (LLMs) are increasingly being used in human-centered social scientific tasks, such as data annotation, synthetic data creation, and engaging in dialog. However, these tasks are highly subjective and dependent on human factors, such as one’s environment, attitudes, beliefs, and lived experiences. Thus, it may be the case that employing LLMs (which do not have such human factors) in these tasks results in a lack of variation in data, failing to reflect the diversity of human experiences. In this paper, we examine the role of prompting LLMs with human-like personas and asking the models to answer as if they were a specific human. This is done explicitly, with exact demographics, political beliefs, and lived experiences, or implicitly via names prevalent in specific populations. The LLM personas are then evaluated via (1) subjective annotation task (e.g., detecting toxicity) and (2) a belief generation task, where both tasks are known to vary across human factors. We examine the impact of explicit vs. implicit personas and investigate which human factors LLMs recognize and respond to. Results show that explicit LLM personas show mixed results when reproducing known human biases, but generally fail to demonstrate implicit biases. We conclude that LLMs may capture the statistical patterns of how people speak, but are generally unable to model the complex interactions and subtleties of human perceptions, potentially limiting their effectiveness in social science applications.</abstract>
@@ -25177,7 +25177,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <author><first>Yue</first><last>Chang</last><affiliation>University of Manchester</affiliation></author>
       <author><first>Tharindu</first><last>Madusanka</last></author>
       <author><first>Youcheng</first><last>Sun</last><affiliation>The University of Manchester</affiliation></author>
-      <author><first>Riza</first><last>Batista-Navarro</last><affiliation>University of Manchester</affiliation></author>
+      <author id="riza-theresa-batista-navarro"><first>Riza</first><last>Batista-Navarro</last><affiliation>University of Manchester</affiliation></author>
       <pages>7189-7202</pages>
       <abstract>Modern natural language generation (NLG) systems have led to the development of synthetic human-like open-ended texts, posing concerns as to who the original author of a text is. To address such concerns, we introduce DeB-Ang: the utilisation of a custom DeBERTa model with angular loss and contrastive loss functions for effective class separation in neural text classification tasks. We expand the application of this model on binary machine-generated text detection and multi-class neural authorship attribution. We demonstrate improved performance on many benchmark datasets whereby the accuracy for machine-generated text detection was increased by as much as 38.04% across all datasets.</abstract>
       <url hash="a0e40baa">2024.findings-emnlp.421</url>
@@ -25246,7 +25246,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <title>Generalists vs. Specialists: Evaluating Large Language Models for <fixed-case>U</fixed-case>rdu</title>
       <author><first>Samee</first><last>Arif</last><affiliation>Lahore University of Management Sciences</affiliation></author>
       <author><first>Abdul Hameed</first><last>Azeemi</last></author>
-      <author><first>Agha Ali</first><last>Raza</last><affiliation>Lahore University of Management Sciences</affiliation></author>
+      <author id="agha-ali-raza"><first>Agha Ali</first><last>Raza</last><affiliation>Lahore University of Management Sciences</affiliation></author>
       <author><first>Awais</first><last>Athar</last><affiliation>European Bioinformatics Institute - European Molecular Biology Laboratory (EMBL-EBI)</affiliation></author>
       <pages>7263-7280</pages>
       <url hash="f6ce885a">2024.findings-emnlp.426</url>
@@ -25350,7 +25350,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <author><first>Yifei</first><last>He</last><affiliation>University of Illinois Urbana-Champaign</affiliation></author>
       <author><first>Haoxiang</first><last>Wang</last><affiliation>NVIDIA</affiliation></author>
       <author><first>Ziyan</first><last>Jiang</last><affiliation>Amazon</affiliation></author>
-      <author><first>Alexandros</first><last>Papangelis</last><affiliation>Apple</affiliation></author>
+      <author id="alexandros-papangelis"><first>Alexandros</first><last>Papangelis</last><affiliation>Apple</affiliation></author>
       <author><first>Han</first><last>Zhao</last><affiliation>University of Illinois, Urbana Champaign</affiliation></author>
       <pages>7365-7377</pages>
       <abstract>Reward models (RM) capture the values and preferences of humans and play a central role in Reinforcement Learning with Human Feedback (RLHF) to align pretrained large language models (LLMs). Traditionally, training these models relies on extensive human-annotated preference data, which poses significant challenges in terms of scalability and cost. To overcome these limitations, we propose Semi-Supervised Reward Modeling (SSRM), an approach that enhances RM training using unlabeled data. Given an unlabeled dataset, SSRM involves three key iterative steps: pseudo-labeling unlabeled examples, selecting high-confidence examples through a confidence threshold, and supervised finetuning on the refined dataset. Across extensive experiments on various model configurations, we demonstrate that SSRM significantly improves reward models without incurring additional labeling costs. Notably, SSRM can achieve performance comparable to models trained entirely on labeled data of equivalent volumes. Overall, SSRM substantially reduces the dependency on large volumes of human-annotated data, thereby decreasing the overall cost and time involved in training effective reward models.</abstract>
@@ -25549,7 +25549,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
     <paper id="449">
       <title><fixed-case>BERGEN</fixed-case>: A Benchmarking Library for Retrieval-Augmented Generation</title>
       <author><first>David</first><last>Rau</last></author>
-      <author><first>Hervé</first><last>Déjean</last><affiliation>Naver Labs Europe</affiliation></author>
+      <author id="herve-dejean"><first>Hervé</first><last>Déjean</last><affiliation>Naver Labs Europe</affiliation></author>
       <author><first>Nadezhda</first><last>Chirkova</last><affiliation>Naver Labs Europe</affiliation></author>
       <author><first>Thibault</first><last>Formal</last><affiliation>Naver Labs Europe</affiliation></author>
       <author><first>Shuai</first><last>Wang</last></author>
@@ -25635,7 +25635,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <author><first>Yujie</first><last>Zhang</last></author>
       <author><first>Yuanmeng</first><last>Chen</last><affiliation>Beijing Jiaotong University</affiliation></author>
       <author><first>Yufeng</first><last>Chen</last></author>
-      <author><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
       <pages>7732-7743</pages>
       <abstract>In a practical scenario, multi-domain neural machine translation (MDNMT) aims to continuously acquire knowledge from new domain data while retaining old knowledge. Previous work separately learns each new domain knowledge based on parameter isolation methods, which effectively capture the new knowledge. However, task-specific parameters lead to isolation between models, which hinders the mutual transfer of knowledge between new domains. Given the scarcity of domain-specific corpora, we consider making full use of the data from multiple new domains. Therefore, our work aims to leverage previously acquired domain knowledge when modeling subsequent domains. To this end, we propose an Iterative Continual Learning (ICL) framework for multi-domain neural machine translation. Specifically, when each new domain arrives, (1) we first build a pluggable incremental learning model, (2) then we design an iterative updating algorithm to continuously update the original model, which can be used flexibly for constructing subsequent domain models. Furthermore, we design a domain knowledge transfer mechanism to enhance the fine-grained domain-specific representation, thereby solving the word ambiguity caused by mixing domain data. Experimental results on the UM-Corpus and OPUS multi-domain datasets show the superior performance of our proposed model compared to representative baselines.</abstract>
       <url hash="eb9a2f52">2024.findings-emnlp.455</url>
@@ -25821,7 +25821,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <author><first>Yuxin</first><last>Li</last><affiliation>Renmin University of China</affiliation></author>
       <author><first>Liang</first><last>Pang</last><affiliation>Institute of Computing Technology, Chinese Academy of Sciences</affiliation></author>
       <author><first>Jun</first><last>Xu</last><affiliation>Renmin University of China</affiliation></author>
-      <author><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
       <pages>7957-7970</pages>
       <abstract>Recently, Large Language Models (LLMs) have demonstrated a superior ability to serve as ranking models. However, concerns have arisen as LLMs will exhibit discriminatory ranking behaviors based on users’ sensitive attributes (gender). Worse still, in this paper, we identify a subtler form of discrimination in LLMs, termed <i>implicit ranking unfairness</i>, where LLMs exhibit discriminatory ranking patterns based solely on non-sensitive user profiles, such as user names. Such implicit unfairness is more widespread but less noticeable, threatening the ethical foundation. To comprehensively explore such unfairness, our analysis will focus on three research aspects: (1) We propose an evaluation method to investigate the severity of implicit ranking unfairness. (2) We uncover the reasons for causing such unfairness. (3) To mitigate such unfairness effectively, we utilize a pair-wise regression method to conduct fair-aware data augmentation for LLM fine-tuning. The experiment demonstrates that our method outperforms existing approaches in ranking fairness, achieving this with only a small reduction in accuracy. Lastly, we emphasize the need for the community to identify and mitigate the implicit unfairness, aiming to avert the potential deterioration in the reinforced human-LLMs ecosystem deterioration.</abstract>
       <url hash="6f5e85fc">2024.findings-emnlp.467</url>
@@ -25844,7 +25844,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <author><first>Lukas</first><last>Lange</last><affiliation>Robert Bosch GmbH, Bosch</affiliation></author>
       <author><first>Heike</first><last>Adel</last><affiliation>Hochschule der Medien (University of Applied Sciences)</affiliation></author>
       <author><first>Jannik</first><last>Strötgen</last><affiliation>Karlsruhe University of Applied Sciences</affiliation></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <pages>7990-8000</pages>
       <abstract>To ensure large language models contain up-to-date knowledge, they need to be updated regularly. However, model editing is challenging as it might also affect knowledge that is unrelated to the new data. State-of-the-art methods identify parameters associated with specific knowledge and then modify them via direct weight updates. However, these locate-and-edit methods suffer from heavy computational overhead and lack theoretical validation. In contrast, directly fine-tuning the model on requested edits affects the model’s behavior on unrelated knowledge, and significantly damages the model’s generation fluency and consistency. To address these challenges, we propose SAUL, a streamlined model editing method that uses sentence concatenation with augmented random facts for generation regularization. Evaluations on three model editing benchmarks show that is a practical and reliable solution for model editing outperforming state-of-the-art methods while maintaining generation quality and reducing computational overhead.</abstract>
       <url hash="1472d64a">2024.findings-emnlp.469</url>
@@ -25966,7 +25966,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <author><first>Qiming</first><last>Ge</last></author>
       <author><first>Zhiheng</first><last>Xi</last></author>
       <author><first>Qi</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>8178-8188</pages>
       <abstract>Reinforcement Learning from Human Feedback (RLHF) has proven effective in aligning large language models with human intentions, yet it often relies on complex methodologies like Proximal Policy Optimization (PPO) that require extensive hyper-parameter tuning and present challenges in sample efficiency and stability. In this paper, we introduce Inverse-Q*, an innovative framework that transcends traditional RL methods by optimizing token-level reinforcement learning without the need for additional reward or value models. Inverse-Q* leverages direct preference optimization techniques but extends them by estimating the conditionally optimal policy directly from the model’s responses, facilitating more granular and flexible policy shaping. Our approach reduces reliance on human annotation and external supervision, making it especially suitable for low-resource settings. We present extensive experimental results demonstrating that Inverse-Q* not only matches but potentially exceeds the effectiveness of PPO in terms of convergence speed and the alignment of model responses with human preferences. Our findings suggest that Inverse-Q* offers a practical and robust alternative to conventional RLHF approaches, paving the way for more efficient and adaptable model training approaches.</abstract>
       <url hash="40b7e3e2">2024.findings-emnlp.478</url>
@@ -26295,7 +26295,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
     </paper>
     <paper id="504">
       <title>How Far Can In-Context Alignment Go? Exploring the State of In-Context Alignment</title>
-      <author><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <author><first>Yinghao</first><last>Li</last></author>
       <author><first>Huashan</first><last>Sun</last></author>
       <author><first>Yu</first><last>Bai</last></author>
@@ -26336,7 +26336,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <title><fixed-case>TWB</fixed-case>ias: A Benchmark for Assessing Social Bias in Traditional <fixed-case>C</fixed-case>hinese Large Language Models through a <fixed-case>T</fixed-case>aiwan Cultural Lens</title>
       <author><first>Hsin-Yi</first><last>Hsieh</last></author>
       <author><first>Shih-Cheng</first><last>Huang</last><affiliation>Appier Inc.</affiliation></author>
-      <author><first>Richard Tzong-Han</first><last>Tsai</last><affiliation>National Central University</affiliation></author>
+      <author id="richard-tzong-han-tsai"><first>Richard Tzong-Han</first><last>Tsai</last><affiliation>National Central University</affiliation></author>
       <pages>8688-8704</pages>
       <url hash="b181a21e">2024.findings-emnlp.507</url>
       <attachment type="data" hash="6898df27">2024.findings-emnlp.507.data.zip</attachment>
@@ -26410,7 +26410,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <author><first>Shujian</first><last>Huang</last><affiliation>Nanjing University</affiliation></author>
       <author><first>Shuaijie</first><last>She</last></author>
       <author><first>Chris</first><last>Wendler</last><affiliation>EPFL - EPF Lausanne</affiliation></author>
-      <author><first>Jiajun</first><last>Chen</last><affiliation>Nanjing University</affiliation></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last><affiliation>Nanjing University</affiliation></author>
       <pages>8775-8782</pages>
       <abstract>Decoding by contrasting layers (DoLa), is designed to improve the generation quality of large language models (LLMs) by contrasting the prediction probabilities between an early exit output (amateur logits) and the final output (expert logits).However, we find that this approach does not work well on non-English tasks.Inspired by previous interpretability work on language transition during the model’s forward pass, we discover that this issue arises from a language mismatch between early exit output and final output.In this work, we propose an improved contrastive decoding algorithm that is effective for diverse languages beyond English.To obtain more helpful amateur logits, we devise two strategies to skip a set of bottom, language-agnostic layers based on our preliminary analysis.Experimental results on multilingual reasoning benchmarks demonstrate that our proposed method outperforms previous contrastive decoding baselines and substantially improves LLM’s chain-of-thought reasoning accuracy across 11 languages.</abstract>
       <url hash="c024015e">2024.findings-emnlp.512</url>
@@ -26424,7 +26424,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <author><first>Tiancheng</first><last>Hu</last><affiliation>University of Cambridge</affiliation></author>
       <author><first>Anna-Carolina</first><last>Haensch</last><affiliation>University of Maryland, College Park and Ludwig-Maximilians-Universität München</affiliation></author>
       <author><first>Michael A.</first><last>Hedderich</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
-      <author><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München and IT University of Copenhagen</affiliation></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München and IT University of Copenhagen</affiliation></author>
       <author><first>Frauke</first><last>Kreuter</last><affiliation>University of Maryland, College Park</affiliation></author>
       <pages>8783-8805</pages>
       <abstract>Recent advances in Large Language Models (LLMs) have sparked wide interest in validating and comprehending the human-like cognitive-behavioral traits LLMs may capture and convey. These cognitive-behavioral traits include typically Attitudes, Opinions, Values (AOVs). However, measuring AOVs embedded within LLMs remains opaque, and different evaluation methods may yield different results. This has led to a lack of clarity on how different studies are related to each other and how they can be interpreted. This paper aims to bridge this gap by providing a comprehensive overview of recent works on the evaluation of AOVs in LLMs. Moreover, we survey related approaches in different stages of the evaluation pipeline in these works. By doing so, we address the potential and challenges with respect to understanding the model, human-AI alignment, and downstream application in social sciences. Finally, we provide practical insights into evaluation methods, model enhancement, and interdisciplinary collaboration, thereby contributing to the evolving landscape of evaluating AOVs in LLMs.</abstract>
@@ -26436,7 +26436,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <title>Low-Resource Machine Translation through the Lens of Personalized Federated Learning</title>
       <author><first>Viktor</first><last>Moskvoretskii</last><affiliation>Skolkovo Institute of Science and Technology</affiliation></author>
       <author><first>Nazarii</first><last>Tupitsa</last></author>
-      <author><first>Chris</first><last>Biemann</last><affiliation>U Hamburg</affiliation></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last><affiliation>U Hamburg</affiliation></author>
       <author><first>Samuel</first><last>Horváth</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <author><first>Eduard</first><last>Gorbunov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <author><first>Irina</first><last>Nikishina</last></author>
@@ -26475,7 +26475,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <author><first>Yang</first><last>Gao</last><affiliation>Google</affiliation></author>
       <author><first>Tim</first><last>Baumgärtner</last><affiliation>TU Darmstadt</affiliation></author>
       <author><first>Alex</first><last>Fabrikant</last><affiliation>Google Research</affiliation></author>
-      <author><first>Reinald Kim</first><last>Amplayo</last><affiliation>Google</affiliation></author>
+      <author id="reinald-kim-amplayo"><first>Reinald Kim</first><last>Amplayo</last><affiliation>Google</affiliation></author>
       <pages>8856-8872</pages>
       <abstract>Segmenting text into fine-grained units of meaning is important to a wide range of NLP applications. The default approach of segmenting text into sentences is often insufficient, especially since sentences are usually complex enough to include multiple units of meaning that merit separate treatment in the downstream task. We focus on the task of abstractive proposition segmentation (APS): transforming text into simple, self-contained, well-formed sentences. Several recent works have demonstrated the utility of proposition segmentation with few-shot prompted LLMs for downstream tasks such as retrieval-augmented grounding and fact verification. However, this approach does not scale to large amounts of text and may not always extract all the facts from the input text.In this paper, we first introduce evaluation metrics for the task to measure several dimensions of quality.We then propose a scalable, yet accurate, proposition segmentation model. We model proposition segmentation as a supervised task by training LLMs on existing annotated datasets and show that training yields significantly improved results. We further show that by using the fine-tuned LLMs (Gemini Pro and Gemini Ultra) as teachers for annotating large amounts of multi-domain synthetic distillation data, we can train smaller student models (Gemma 1 2B and 7B) with results similar to the teacher LLMs. We then demonstrate that our technique leads to effective domain generalization, by annotating data in two domains outside the original training data and evaluating on them. Finally, as a key contribution of the paper, we share an easy-to-use API for NLP practitioners to use.</abstract>
       <url hash="fa1bda1f">2024.findings-emnlp.517</url>
@@ -26527,7 +26527,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
     </paper>
     <paper id="521">
       <title><fixed-case>TS</fixed-case>-Align: A Teacher-Student Collaborative Framework for Scalable Iterative Finetuning of Large Language Models</title>
-      <author id="chen-zhang"><first>Chen</first><last>Zhang</last><affiliation>National University of Singapore</affiliation></author>
+      <author><first>Chen</first><last>Zhang</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Chengguang</first><last>Tang</last><affiliation>Tencent AI Lab</affiliation></author>
       <author><first>Dading</first><last>Chong</last></author>
       <author><first>Ke</first><last>Shi</last></author>
@@ -26555,11 +26555,11 @@ and high variation in performance on the subset, suggesting our plausibility cri
     <paper id="523">
       <title>Active Learning for Abstractive Text Summarization via <fixed-case>LLM</fixed-case>-Determined Curriculum and Certainty Gain Maximization</title>
       <author><first>Dongyuan</first><last>Li</last><affiliation>The University of Tokyo</affiliation></author>
-      <author><first>Ying</first><last>Zhang</last><affiliation>RIKEN</affiliation></author>
+      <author id="ying-zhang"><first>Ying</first><last>Zhang</last><affiliation>RIKEN</affiliation></author>
       <author><first>Zhen</first><last>Wang</last></author>
       <author><first>Shiyin</first><last>Tan</last></author>
       <author><first>Satoshi</first><last>Kosugi</last><affiliation>Tokyo Institute of Technology, Tokyo Institute of Technology</affiliation></author>
-      <author><first>Manabu</first><last>Okumura</last><affiliation>Tokyo Institute of Technology, Tokyo Institute of Technology</affiliation></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last><affiliation>Tokyo Institute of Technology, Tokyo Institute of Technology</affiliation></author>
       <pages>8959-8971</pages>
       <abstract>For abstractive text summarization, laborious data annotation and time-consuming model training become two high walls, hindering its further progress. Active Learning, selecting a few informative instances for annotation and model training, sheds light on solving these issues. However, only few active learning-based studies focus on abstractive text summarization and suffer from low stability, effectiveness, and efficiency. To solve the problems, we propose a novel LLM-determined curriculum active learning framework. Firstly, we design a prompt to ask large language models to rate the difficulty of instances, which guides the model to train on from easier to harder instances. Secondly, we design a novel active learning strategy, i.e., Certainty Gain Maximization, enabling to select instances whose distribution aligns well with the overall distribution. Experiments show our method can improve stability, effectiveness, and efficiency of abstractive text summarization backbones.</abstract>
       <url hash="30cd4acb">2024.findings-emnlp.523</url>
@@ -26684,7 +26684,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <author><first>Ru</first><last>Peng</last></author>
       <author><first>Tengxiao</first><last>Liu</last><affiliation>University of California, Santa Barbara</affiliation></author>
       <author><first>Xipeng</first><last>Qiu</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>9113-9129</pages>
       <abstract>The training process of large language models (LLMs) often involves varying degrees of test data contamination. Although current LLMs are achieving increasingly better performance on various benchmarks, their performance in practical applications does not always match their benchmark results. Leakage of benchmarks can prevent the accurate assessment of LLMs’ true performance. However, constructing new benchmarks is costly, labor-intensive and still carries the risk of leakage. Therefore, in this paper, we ask the question Can we reuse these leaked benchmarks for LLM evaluation? We propose Inference-Time Decontamination (ITD) to address this issue by detecting and rewriting leaked samples without altering their difficulties. ITD can mitigate performance inflation caused by memorizing leaked benchmarks. Our proof-of-concept experiments demonstrate that ITD reduces inflated accuracy by 22.9% on GSM8K and 19.0% on MMLU. On MMLU, using Inference-time Decontamination can lead to a decrease in the results of Phi3 and Mistral by 6.7% and 3.6% respectively. We hope that ITD can provide more truthful evaluation results for large language models.</abstract>
       <url hash="1fe20de2">2024.findings-emnlp.532</url>
@@ -26810,7 +26810,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
     <paper id="542">
       <title><fixed-case>S</fixed-case>table<fixed-case>PT</fixed-case> : Towards Stable Prompting for Few-shot Learning via Input Separation</title>
       <author><first>Xiaoming</first><last>Liu</last></author>
-      <author id="chen-liu"><first>Chen</first><last>Liu</last></author>
+      <author><first>Chen</first><last>Liu</last></author>
       <author><first>Zhaohan</first><last>Zhang</last></author>
       <author><first>Chengzhengxu</first><last>Li</last></author>
       <author><first>Longtian</first><last>Wang</last></author>
@@ -26852,7 +26852,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
     <paper id="545">
       <title>Towards Implicit Bias Detection and Mitigation in Multi-Agent <fixed-case>LLM</fixed-case> Interactions</title>
       <author><first>Angana</first><last>Borah</last></author>
-      <author><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
       <pages>9306-9326</pages>
       <abstract>As Large Language Models (LLMs) continue to evolve, they are increasingly being employed in numerous studies to simulate societies and execute diverse social tasks. However, LLMs are susceptible to societal biases due to their exposure to human-generated data. Given that LLMs are being used to gain insights into various societal aspects, it is essential to mitigate these biases. To that end, our study investigates the presence of implicit gender biases in multi-agent LLM interactions and proposes two strategies to mitigate these biases. We begin by creating a dataset of scenarios where implicit gender biases might arise, and subsequently develop a metric to assess the presence of biases. Our empirical analysis reveals that LLMs generate outputs characterized by strong implicit bias associations (<tex-math>\geq \approx 50\%</tex-math> of the time). Furthermore, these biases tend to escalate following multi-agent interactions. To mitigate them, we propose two strategies: self-reflection with in-context examples (ICE); and supervised fine-tuning. Our research demonstrates that both methods effectively mitigate implicit biases, with the ensemble of fine-tuning and self-reflection proving to be the most successful.</abstract>
       <url hash="ead7905f">2024.findings-emnlp.545</url>
@@ -26879,7 +26879,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <author><first>Zhiheng</first><last>Lyu</last></author>
       <author><first>Zhijing</first><last>Jin</last></author>
       <author><first>Fernando</first><last>Gonzalez Adauto</last></author>
-      <author><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
       <author><first>Bernhard</first><last>Schölkopf</last><affiliation>ELLIS Institute and Max Planck Institute for Intelligent Systems, Max-Planck Institute</affiliation></author>
       <author><first>Mrinmaya</first><last>Sachan</last><affiliation>Swiss Federal Institute of Technology</affiliation></author>
       <pages>9353-9372</pages>
@@ -27015,7 +27015,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
     <paper id="557">
       <title>How Entangled is Factuality and Deception in <fixed-case>G</fixed-case>erman?</title>
       <author><first>Aswathy</first><last>Velutharambath</last><affiliation>University of Stuttgart, Universität Stuttgart</affiliation></author>
-      <author><first>Amelie</first><last>Wuehrl</last><affiliation>University of Stuttgart, Universität Stuttgart</affiliation></author>
+      <author id="amelie-wuhrl"><first>Amelie</first><last>Wuehrl</last><affiliation>University of Stuttgart, Universität Stuttgart</affiliation></author>
       <author><first>Roman</first><last>Klinger</last><affiliation>Otto-Friedrich Universität Bamberg</affiliation></author>
       <pages>9538-9554</pages>
       <abstract>The statement “The earth is flat” is factually inaccurate, but if someone truly believes and argues in its favor, it is not deceptive. Research on deception detection and fact checking often conflates factual accuracy with the truthfulness of statements. This assumption makes it difficult to (a) study subtle distinctions and interactions between the two and (b) gauge their effects on downstream tasks. The belief-based deception framework disentangles these properties by defining texts as deceptive when there is a mismatch between what people say and what they truly believe. In this study, we assess if presumed patterns of deception generalize to German language texts. We test the effectiveness of computational models in detecting deception using an established corpus of belief-based argumentation. Finally, we gauge the impact of deception on the downstream task of fact checking and explore if this property confounds verification models. Surprisingly, our analysis finds no correlation with established cues of deception. Previous work claimed that computational models can outperform humans in deception detection accuracy, however, our experiments show that both traditional and state-of-the-art models struggle with the task, performing no better than random guessing. For fact checking, we find that natural language inference-based verification performs worse on non-factual and deceptive content, while prompting large language models for the same task is less sensitive to these properties.</abstract>
@@ -27037,8 +27037,8 @@ and high variation in performance on the subset, suggesting our plausibility cri
     <paper id="559">
       <title>A <fixed-case>LLM</fixed-case>-based Ranking Method for the Evaluation of Automatic Counter-Narrative Generation</title>
       <author><first>Irune</first><last>Zubiaga</last><affiliation>Universidad del País Vasco</affiliation></author>
-      <author><first>Aitor</first><last>Soroa</last><affiliation>University of the Basque Country. UPV/EHU.</affiliation></author>
-      <author><first>Rodrigo</first><last>Agerri</last><affiliation>University of the Basque Country</affiliation></author>
+      <author id="aitor-soroa"><first>Aitor</first><last>Soroa</last><affiliation>University of the Basque Country. UPV/EHU.</affiliation></author>
+      <author id="rodrigo-agerri"><first>Rodrigo</first><last>Agerri</last><affiliation>University of the Basque Country</affiliation></author>
       <pages>9572-9585</pages>
       <abstract>This paper proposes a novel approach to evaluate Counter Narrative (CN) generation using a Large Language Model (LLM) as an evaluator. We show that traditional automatic metrics correlate poorly with human judgements and fail to capture the nuanced relationship between generated CNs and human perception. To alleviate this, we introduce a model ranking pipeline based on pairwise comparisons of generated CNs from different models, organized in a tournament-style format. The proposed evaluation method achieves a high correlation with human preference, with a ρ score of 0.88. As an additional contribution, we leverage LLMs as zero-shot CN generators and provide a comparative analysis of chat, instruct, and base models, exploring their respective strengths and limitations. Through meticulous evaluation, including fine-tuning experiments, we elucidate the differences in performance and responsiveness to domain-specific data. We conclude that chat-aligned models in zero-shot are the best option for carrying out the task, provided they do not refuse to generate an answer due to security concerns.</abstract>
       <url hash="1a475618">2024.findings-emnlp.559</url>
@@ -27055,7 +27055,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <author><first>Songfang</first><last>Huang</last><affiliation>Peking University</affiliation></author>
       <author><first>Li</first><last>Zongsheng</last></author>
       <author><first>Ehsan</first><last>Hoque</last><affiliation>University of Rochester, University of Rochester and University of Rochester</affiliation></author>
-      <author><first>Julia</first><last>Hirschberg</last><affiliation>Columbia University</affiliation></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last><affiliation>Columbia University</affiliation></author>
       <author><first>Yue</first><last>Zhang</last><affiliation>Westlake University</affiliation></author>
       <pages>9586-9608</pages>
       <abstract>Open Information Extraction (OpenIE) represents a crucial NLP task aimed at deriving structured information from unstructured text, unrestricted by relation type or domain. This survey paper provides an overview of OpenIE technologies spanning from 2007 to 2024, emphasizing a chronological perspective absent in prior surveys. It examines the evolution of task settings in OpenIE to align with the advances in recent technologies. The paper categorizes OpenIE approaches into rule-based, neural, and pre-trained large language models, discussing each within a chronological framework. Additionally, it highlights prevalent datasets and evaluation metrics currently in use. Building on this extensive review, this paper systematically reviews the evolution of task settings, data, evaluation metrics, and methodologies in the era of large language models, highlighting their mutual influence, comparing their capabilities, and examining their implications for open challenges and future research directions.</abstract>
@@ -27107,7 +27107,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <author><first>Naomi</first><last>Fuchs</last></author>
       <author><first>Joshua</first><last>Darmon</last></author>
       <author><first>Pontus</first><last>Stenetorp</last><affiliation>University College London</affiliation></author>
-      <author><first>David Ifeoluwa</first><last>Adelani</last></author>
+      <author id="david-ifeoluwa-adelani"><first>David Ifeoluwa</first><last>Adelani</last></author>
       <author><first>Eduardo</first><last>Sánchez</last><affiliation>University College London, University of London and Meta</affiliation></author>
       <pages>9647-9665</pages>
       <abstract>Recent advancements in massively multilingual machine translation systems have significantly enhanced translation accuracy; however, even the best performing systems still generate hallucinations, severely impacting user trust. Detecting hallucinations in Machine Translation (MT) remains a critical challenge, particularly since existing methods excel with High-Resource Languages (HRLs) but exhibit substantial limitations when applied to Low-Resource Languages (LRLs). This paper evaluates sentence-level hallucination detection approaches using Large Language Models (LLMs) and semantic similarity within massively multilingual embeddings. Our study spans 16 language directions, covering HRLs, LRLs, with diverse scripts. We find that the choice of model is essential for performance. On average, for HRLs, Llama3-70B outperforms the previous state of the art by as much as 0.16 MCC (Matthews Correlation Coefficient). However, for LRLs we observe that Claude Sonnet outperforms other LLMs on average by 0.03 MCC. The key takeaway from our study is that LLMs can achieve performance comparable or even better than previously proposed models, despite not being explicitly trained for any machine translation task. However, their advantage is less significant for LRLs.</abstract>
@@ -27147,8 +27147,8 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <author><first>Yu</first><last>Li</last><affiliation>Columbia University and University of California, Davis</affiliation></author>
       <author><first>Devamanyu</first><last>Hazarika</last><affiliation>Amazon Alexa AI</affiliation></author>
       <author><first>Di</first><last>Jin</last><affiliation>Meta</affiliation></author>
-      <author><first>Julia</first><last>Hirschberg</last><affiliation>Columbia University</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last><affiliation>Columbia University</affiliation></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <pages>9695-9713</pages>
       <abstract>Self-anthropomorphism in robots manifests itself through their display of human-like characteristics in dialogue, such as expressing preferences and emotions. Our study systematically analyzes self-anthropomorphic expression within various dialogue datasets, outlining the contrasts between self-anthropomorphic and non-self-anthropomorphic responses in dialogue systems. We show significant differences in these two types of responses and propose transitioning from one type to the other. We also introduce Pix2Persona, a novel dataset aimed at developing ethical and engaging AI systems in various embodiments. This dataset preserves the original dialogues from existing corpora and enhances them with paired responses: self-anthropomorphic and non-self-anthropomorphic for each original bot response. Our work not only uncovers a new category of bot responses that were previously under-explored but also lays the groundwork for future studies about dynamically adjusting self-anthropomorphism levels in AI systems to align with ethical standards and user expectations.</abstract>
       <url hash="ea9c06c0">2024.findings-emnlp.567</url>
@@ -27306,7 +27306,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
     <paper id="578">
       <title>Understanding Faithfulness and Reasoning of Large Language Models on Plain Biomedical Summaries</title>
       <author><first>Biaoyan</first><last>Fang</last><affiliation>CSIRO</affiliation></author>
-      <author><first>Xiang</first><last>Dai</last><affiliation>CSIRO</affiliation></author>
+      <author id="xiang-dai"><first>Xiang</first><last>Dai</last><affiliation>CSIRO</affiliation></author>
       <author><first>Sarvnaz</first><last>Karimi</last><affiliation>CSIRO</affiliation></author>
       <pages>9890-9911</pages>
       <abstract>Generating plain biomedical summaries with Large Language Models (LLMs) can enhance the accessibility of biomedical knowledge to the public. However, how faithful the generated summaries are remains an open yet critical question. To address this, we propose FaReBio, a benchmark dataset with expert-annotated Faithfulness and Reasoning on plain Biomedical Summaries. This dataset consists of 175 plain summaries ($,445 sentences) generated by seven different LLMs, paired with source articles. Using our dataset, we identify the performance gap of LLMs in generating faithful plain biomedical summaries and observe a negative correlation between abstractiveness and faithfulness. We also show that current faithfulness evaluation metrics do not work well in the biomedical domain and confirm the over-confident tendency of LLMs as faithfulness evaluators. To better understand the faithfulness judgements, we further benchmark LLMs in retrieving supporting evidence and show the gap of LLMs in reasoning faithfulness evaluation at different abstractiveness levels. Going beyond the binary faithfulness labels, coupled with the annotation of supporting sentences, our dataset could further contribute to the understanding of faithfulness evaluation and reasoning.</abstract>
@@ -27348,7 +27348,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <author><first>Tsutomu</first><last>Hirao</last><affiliation>NTT Communication Science Laboratories</affiliation></author>
       <author><first>Naoki</first><last>Kobayashi</last><affiliation>LegalOn Technologies</affiliation></author>
       <author><first>Hidetaka</first><last>Kamigaito</last><affiliation>Nara Institute of Science and Technology</affiliation></author>
-      <author><first>Manabu</first><last>Okumura</last><affiliation>Tokyo Institute of Technology, Tokyo Institute of Technology</affiliation></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last><affiliation>Tokyo Institute of Technology, Tokyo Institute of Technology</affiliation></author>
       <author><first>Akisato</first><last>Kimura</last><affiliation>NTT Corporation</affiliation></author>
       <pages>9943-9958</pages>
       <abstract>This paper tackles a new task: discourse parsing for videos, inspired by text discourse parsing based on Rhetorical Structure Theory (RST). The task aims to construct an RST tree for a video to represent its storyline and illustrate the event relationships. We first construct a benchmark dataset by identifying events with their time spans, providing corresponding captions, and constructing RST trees with events as leaves. We then evaluate baseline approaches to video RST parsing: the ‘parsing after captioning’ framework and parsing via visual features. The results show that a parser using gold captions performed the best, while parsers relying on generated captions performed the worst; a parser using visual features provided intermediate performance. However, we observed that parsing via visual features could be improved by pre-training it with video captioning designed to produce a coherent video story. Furthermore, we demonstrated that RST trees obtained from videos contribute to multimodal summarization consisting of keyframes with texts.</abstract>
@@ -27493,7 +27493,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
     <paper id="591">
       <title>On the Similarity of Circuits across Languages: a Case Study on the Subject-verb Agreement Task</title>
       <author><first>Javier</first><last>Ferrando</last></author>
-      <author><first>Marta R.</first><last>Costa-jussà</last><affiliation>Meta</affiliation></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last><affiliation>Meta</affiliation></author>
       <pages>10115-10125</pages>
       <abstract>Several algorithms implemented by language models have recently been successfully reversed-engineered. However, these findings have been concentrated on specific tasks and models, leaving it unclear how universal circuits are across different settings. In this paper, we study the circuits implemented by Gemma 2B for solving the subject-verb agreement task across two different languages, English and Spanish. We discover that both circuits are highly consistent, being mainly driven by a particular attention head writing a ‘subject number’ signal to the last residual stream, which is read by a small set of neurons in the final MLPs. Notably, this subject number signal is represented as a direction in the residual stream space, and is language-independent. Finally, we demonstrate this direction has a causal effect on the model predictions, effectively flipping the Spanish predicted verb number by intervening with the direction found in English.</abstract>
       <url hash="04a00337">2024.findings-emnlp.591</url>
@@ -27516,7 +27516,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <title>Who’s Who: Large Language Models Meet Knowledge Conflicts in Practice</title>
       <author><first>Quang Hieu</first><last>Pham</last><affiliation>VinAI Research</affiliation></author>
       <author><first>Hoang</first><last>Ngo</last><affiliation>VinAI Research</affiliation></author>
-      <author><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Dat Quoc</first><last>Nguyen</last><affiliation>VinAI Research, Vietnam</affiliation></author>
       <pages>10142-10151</pages>
       <abstract>Retrieval-augmented generation (RAG) methods are viable solutions for addressing the static memory limits of pre-trained language models. Nevertheless, encountering conflicting sources of information within the retrieval context is an inevitable practical challenge. In such situations, the language models are recommended to transparently inform users about the conflicts rather than autonomously deciding what to present based on their inherent biases. To analyze how current large language models (LLMs) align with our recommendation, we introduce WhoQA, a public benchmark dataset to examine model’s behavior in knowledge conflict situations. We induce conflicts by asking about a common property among entities having the same name, resulting in questions with up to 8 distinctive answers. WhoQA evaluation set includes 5K questions across 13 Wikidata property types and 150K Wikipedia entities. Our experiments show that despite the simplicity of WhoQA questions, knowledge conflicts significantly degrades LLMs’ performance in RAG settings.</abstract>
@@ -27604,7 +27604,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <title><fixed-case>M</fixed-case>obile<fixed-case>VLM</fixed-case>: A Vision-Language Model for Better Intra- and Inter-<fixed-case>UI</fixed-case> Understanding</title>
       <author><first>Qinzhuo</first><last>Wu</last></author>
       <author><first>Weikai</first><last>Xu</last></author>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last><affiliation>xiaomi</affiliation></author>
+      <author><first>Wei</first><last>Liu</last><affiliation>xiaomi</affiliation></author>
       <author><first>Tao</first><last>Tan</last></author>
       <author><first>Liujian</first><last>Liujianfeng</last></author>
       <author><first>Ang</first><last>Li</last><affiliation>Software, Xiaomi Inc.</affiliation></author>
@@ -27622,7 +27622,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <author><first>Fan</first><last>Bai</last><affiliation>Johns Hopkins University</affiliation></author>
       <author><first>Junmo</first><last>Kang</last><affiliation>Georgia Institute of Technology</affiliation></author>
       <author><first>Gabriel</first><last>Stanovsky</last><affiliation>Hebrew University of Jerusalem</affiliation></author>
-      <author><first>Dayne</first><last>Freitag</last><affiliation>SRI International</affiliation></author>
+      <author id="dayne-freitag"><first>Dayne</first><last>Freitag</last><affiliation>SRI International</affiliation></author>
       <author><first>Mark</first><last>Dredze</last><affiliation>Department of Computer Science, Whiting School of Engineering</affiliation></author>
       <author><first>Alan</first><last>Ritter</last><affiliation>Georgia Institute of Technology</affiliation></author>
       <pages>10252-10273</pages>
@@ -27680,10 +27680,10 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <author><first>Ayrton</first><last>San Joaquin</last></author>
       <author><first>Bin</first><last>Wang</last></author>
       <author><first>Zhengyuan</first><last>Liu</last></author>
-      <author><first>Nicholas</first><last>Asher</last></author>
+      <author id="nicholas-asher"><first>Nicholas</first><last>Asher</last></author>
       <author><first>Brian</first><last>Lim</last></author>
       <author><first>Philippe</first><last>Muller</last></author>
-      <author><first>Nancy F.</first><last>Chen</last></author>
+      <author id="nancy-chen"><first>Nancy F.</first><last>Chen</last></author>
       <pages>10324-10335</pages>
       <abstract>Despite advancements, fine-tuning Large Language Models (LLMs) remains costly due to the extensive parameter count and substantial data requirements for model generalization. Accessibility to computing resources remains a barrier for the open-source community. To address this challenge, we propose the In2Core algorithm, which selects a coreset by analyzing the correlation between training and evaluation samples with a trained model. Notably, we assess the model’s internal gradients to estimate this relationship, aiming to rank the contribution of each training point. To enhance efficiency, we propose an optimization to compute influence functions with a reduced number of layers while achieving similar accuracy. By applying our algorithm to instruction fine-tuning data of LLMs, we can achieve similar performance with just 50% of the training data. Meantime, using influence functions to analyze model coverage to certain testing samples could provide a reliable and interpretable signal on the training set’s coverage of those test points.</abstract>
       <url hash="e82f3b38">2024.findings-emnlp.604</url>
@@ -27840,7 +27840,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <author><first>Zixuan</first><last>Ling</last></author>
       <author><first>Cenyuan</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Xiaoqing</first><last>Zheng</last></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>10501-10512</pages>
       <abstract>Conventional federated learning primarily aims to secure the privacy of data distributed across multiple edge devices, with the global model dispatched to edge devices for parameter updates during the learning process. However, the development of large language models (LLMs) requires substantial data and computational resources, rendering them valuable intellectual properties for their developers and owners. To establish a mechanism that protects both data and model privacy in a federated learning context, we introduce a method that just needs to distribute a quantized version of the model’s parameters during training. This method enables accurate gradient estimations for parameter updates while preventing clients from accessing a model whose performance is comparable to the centrally hosted one. Moreover, we combine this quantization strategy with LoRA, a popular and parameter-efficient fine-tuning method, to significantly reduce communication costs in federated learning. The proposed framework, named FedLPP, successfully ensures both data and model privacy in the federated learning context. Additionally, the learned central model exhibits good generalization and can be trained in a resource-efficient manner.</abstract>
       <url hash="0f32ebed">2024.findings-emnlp.615</url>
@@ -28018,12 +28018,12 @@ and high variation in performance on the subset, suggesting our plausibility cri
     <paper id="628">
       <title><fixed-case>PDF</fixed-case>-to-Tree: Parsing <fixed-case>PDF</fixed-case> Text Blocks into a Tree</title>
       <author><first>Yue</first><last>Zhang</last></author>
-      <author id="zhihao-zhang"><first>Zhihao</first><last>Zhang</last></author>
+      <author><first>Zhihao</first><last>Zhang</last></author>
       <author><first>Wenbin</first><last>Lai</last></author>
-      <author id="chong-zhang"><first>Chong</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
+      <author><first>Chong</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Tao</first><last>Gui</last><affiliation>Fudan University</affiliation></author>
       <author><first>Qi</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>10704-10714</pages>
       <abstract>In many PDF documents, the reading order of text blocks is missing, which can hinder machine understanding of the document’s content.Existing works try to extract one universal reading order for a PDF file.However, applications, like Retrieval Augmented Generation (RAG), require breaking long articles into sections and subsections for better indexing.For this reason, this paper introduces a new task and dataset, PDF-to-Tree, which organizes the text blocks of a PDF into a tree structure.Since a PDF may contain thousands of text blocks, far exceeding the number of words in a sentence, this paper proposes a transition-based parser that uses a greedy strategy to build the tree structure.Compared to parser for plain text, we also use multi-modal features to encode the parser state.Experiments show that our approach achieves an accuracy of 93.93%, surpassing the performance of baseline methods by an improvement of 6.72%.</abstract>
       <url hash="395ea38b">2024.findings-emnlp.628</url>
@@ -28094,7 +28094,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <title>Together We Can: Multilingual Automatic Post-Editing for Low-Resource Languages</title>
       <author><first>Sourabh</first><last>Deoghare</last></author>
       <author><first>Diptesh</first><last>Kanojia</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>10800-10812</pages>
       <abstract>This exploratory study investigates the potential of multilingual Automatic Post-Editing (APE) systems to enhance the quality of machine translations for low-resource Indo-Aryan languages. Focusing on two closely related language pairs, English-Marathi and English-Hindi, we exploit the linguistic similarities to develop a robust multilingual APE model. To facilitate cross-linguistic transfer, we generate synthetic Hindi-Marathi and Marathi-Hindi APE triplets. Additionally, we incorporate a Quality Estimation (QE)-APE multi-task learning framework. While the experimental results underline the complementary nature of APE and QE, we also observe that QE-APE multitask learning facilitates effective domain adaptation. Our experiments demonstrate that the multilingual APE models outperform their corresponding English-Hindi and English-Marathi single-pair models by 2.5 and 2.39 TER points, respectively, with further notable improvements over the multilingual APE model observed through multi-task learning (<tex-math>+1.29</tex-math> and <tex-math>+1.44</tex-math> TER points), data augmentation (<tex-math>+0.53</tex-math> and <tex-math>+0.45</tex-math> TER points) and domain adaptation (<tex-math>+0.35</tex-math> and <tex-math>+0.45</tex-math> TER points). We release the synthetic data, code, and models accrued during this study publicly for further research.</abstract>
       <url hash="074e119a">2024.findings-emnlp.634</url>
@@ -28119,8 +28119,8 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <author><first>Xuan</first><last>Zhang</last></author>
       <author><first>Yang</first><last>Deng</last><affiliation>Singapore Management University</affiliation></author>
       <author><first>Zifeng</first><last>Ren</last></author>
-      <author><first>See-Kiong</first><last>Ng</last><affiliation>National University of Singapore</affiliation></author>
-      <author><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="see-kiong-ng"><first>See-Kiong</first><last>Ng</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
       <pages>10836-10863</pages>
       <url hash="6ff1b6ac">2024.findings-emnlp.636</url>
       <bibkey>zhang-etal-2024-ask</bibkey>
@@ -28161,7 +28161,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <author><first>Raja</first><last>Kumar</last><affiliation>CNRS</affiliation></author>
       <author><first>Kishan</first><last>Maharaj</last></author>
       <author><first>Ashita</first><last>Saxena</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
       <pages>10901-10916</pages>
       <abstract>Mental disorders pose a global challenge, aggravated by the shortage of qualified mental health professionals. Mental disorder prediction from social media posts by current LLMs is challenging due to the complexities of sequential text data and the limited context length of language models. Current language model-based approaches split a single data instance into multiple chunks to compensate for limited context size. The predictive model is then applied to each chunk individually, and the most voted output is selected as the final prediction. This results in the loss of inter-post dependencies and important time variant information, leading to poor performance. We propose a novel framework which first compresses the large sequence of chronologically ordered social media posts into a series of numbers. We then use this time variant representation for mental disorder classification. We demonstrate the generalization capabilities of our framework by outperforming the current SOTA in three different mental conditions: depression, self-harm, and anorexia, by an absolute improvement of 5% in the F1 score. We also investigate the situation when current data instances fall within the context length of language models and present empirical results highlighting the importance of temporal properties of textual data. Furthermore, we utilize the proposed framework for a cross-domain study, exploring commonalities across disorders and the possibility of inter-domain data usage.</abstract>
       <url hash="a1e62e9c">2024.findings-emnlp.639</url>
@@ -28173,8 +28173,8 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <author><first>Yiming</first><last>Chen</last><affiliation>national university of singaore, National University of Singapore</affiliation></author>
       <author><first>Xianghu</first><last>Yue</last></author>
       <author><first>Xiaoxue</first><last>Gao</last></author>
-      <author id="chen-zhang"><first>Chen</first><last>Zhang</last><affiliation>National University of Singapore</affiliation></author>
-      <author><first>Luis Fernando</first><last>D’Haro</last><affiliation>Universidad Politécnica de Madrid</affiliation></author>
+      <author><first>Chen</first><last>Zhang</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="luis-fernando-dharo"><first>Luis Fernando</first><last>D’Haro</last><affiliation>Universidad Politécnica de Madrid</affiliation></author>
       <author><first>Robby T.</first><last>Tan</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Haizhou</first><last>Li</last><affiliation>The Chinese University of Hong Kong (Shenzhen); National University of Singapore and National University of Singapore</affiliation></author>
       <pages>10917-10930</pages>
@@ -28187,7 +28187,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <title>Multimodal Procedural Planning via Dual Text-Image Prompting</title>
       <author><first>Yujie</first><last>Lu</last><affiliation>UC Santa Barbara</affiliation></author>
       <author><first>Pan</first><last>Lu</last><affiliation>Stanford University</affiliation></author>
-      <author id="zhiyu-chen"><first>Zhiyu</first><last>Chen</last></author>
+      <author><first>Zhiyu</first><last>Chen</last></author>
       <author><first>Wanrong</first><last>Zhu</last><affiliation>Adobe Research</affiliation></author>
       <author><first>Xin Eric</first><last>Wang</last><affiliation>University of California, Santa Cruz</affiliation></author>
       <author><first>William Yang</first><last>Wang</last><affiliation>UC Santa Barbara</affiliation></author>
@@ -28267,7 +28267,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <author><first>Lingrui</first><last>Mei</last></author>
       <author><first>Hongcheng</first><last>Gao</last></author>
       <author><first>Yilong</first><last>Xu</last></author>
-      <author><first>Xueqi</first><last>Cheng</last><affiliation>, Chinese Academy of Sciences</affiliation></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last><affiliation>, Chinese Academy of Sciences</affiliation></author>
       <pages>11071-11083</pages>
       <abstract>The parametric knowledge memorized by large language models (LLMs) becomes outdated quickly. In-context editing (ICE) is currently the most effective method for updating the knowledge of LLMs. Recent advancements involve enhancing ICE by modifying the decoding strategy, obviating the need for altering internal model structures or adjusting external prompts.However, this enhancement operates across the entire sequence generation, encompassing a plethora of non-critical tokens.In this work, we introduce **A**daptive **T**oken **Bias**er (ATBias), a new decoding technique designed to enhance ICE.It focuses on the tokens that are mostly related to knowledge during decoding, biasing their logits by matching key entities related to new and parametric knowledge.Experimental results show that ATBias significantly enhances ICE performance, achieving up to a 32.3% improvement over state-of-the-art ICE methods while incurring only half the latency.ATBias not only improves the knowledge editing capabilities of ICE but can also be widely applied to LLMs with negligible cost.</abstract>
       <url hash="bbf46cf9">2024.findings-emnlp.647</url>
@@ -28337,7 +28337,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
     </paper>
     <paper id="652">
       <title>Retrieval-Augmented Code Generation for Situated Action Generation: A Case Study on <fixed-case>M</fixed-case>inecraft</title>
-      <author><first>Chalamalasetti</first><last>Kranti</last></author>
+      <author id="kranti-chalamalasetti"><first>Chalamalasetti</first><last>Kranti</last></author>
       <author><first>Sherzod</first><last>Hakimov</last></author>
       <author><first>David</first><last>Schlangen</last></author>
       <pages>11159-11170</pages>
@@ -28367,7 +28367,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <author><first>Wei</first><last>Tang</last></author>
       <author><first>Bo</first><last>Wang</last><affiliation>School of Computer Science &amp; Technology, Beijing Institute of Technology</affiliation></author>
       <author><first>Qianru</first><last>Sun</last><affiliation>Singapore Management University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Shuicheng</first><last>Yan</last><affiliation>Sea Ltd.</affiliation></author>
       <pages>11185-11208</pages>
       <abstract>This paper introduces the innovative “LLMs-as-Instructors” framework, which leverages the advanced Large Language Models (LLMs) to autonomously enhance the training of smaller target models. Inspired by the theory of “Learning from Errors”, this framework employs an instructor LLM to meticulously analyze the specific errors within a target model, facilitating targeted and efficient training cycles. Within this framework, we implement two strategies: “Learning from Error,” which focuses solely on incorrect responses to tailor training data, and “Learning from Error by Contrast,” which uses contrastive learning to analyze both correct and incorrect responses for a deeper understanding of errors. Our empirical studies, conducted with several open-source models, demonstrate significant improvements across multiple benchmarks, including mathematical reasoning, coding abilities, and factual knowledge. Notably, the refined Llama-3-8b-Instruction has outperformed ChatGPT, illustrating the effectiveness of our approach. By leveraging the strengths of both strategies, we have attained a more balanced performance improvement on both in-domain and out-of-domain benchmarks.</abstract>
@@ -28423,7 +28423,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <author><first>Xiaonan</first><last>Li</last><affiliation>Fudan University</affiliation></author>
       <author><first>Qinyuan</first><last>Cheng</last></author>
       <author><first>Kai</first><last>Ding</last><affiliation>INTSIG Information</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Xipeng</first><last>Qiu</last><affiliation>Fudan University</affiliation></author>
       <pages>11263-11282</pages>
       <abstract>Fact knowledge memorization is crucial for Large Language Models (LLM) to generate factual and reliable responses. However, the behaviors of LLM fact memorization remain under-explored. In this paper, we analyze the scaling laws for LLM’s fact knowledge and LLMs’ behaviors of memorizing different types of facts. We find that LLMs’ fact knowledge capacity has a linear and negative exponential law relationship with model size and training epochs, respectively. Estimated by the built scaling law, memorizing the whole Wikidata’s facts requires training an LLM with 1000B non-embed parameters for 100 epochs, suggesting that using LLMs to memorize all public facts is almost implausible for a general pre-training setting. Meanwhile, we find that LLMs can generalize on unseen fact knowledge and its scaling law is similar to general pre-training. Additionally, we analyze the compatibility and preference of LLMs’ fact memorization. For compatibility, we find LLMs struggle with memorizing redundant facts in a unified way. Only when correlated facts have the same direction and structure, the LLM can compatibly memorize them. This shows the inefficiency of LLM memorization for redundant facts. For preference, the LLM pays more attention to memorizing more frequent and difficult facts, and the subsequent facts can overwrite prior facts’ memorization, which significantly hinders low-frequency facts memorization. Our findings reveal the capacity and characteristics of LLMs’ fact knowledge learning, which provide directions for LLMs’ fact knowledge augmentation.</abstract>
@@ -28435,7 +28435,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <title>Breaking the Script Barrier in Multilingual Pre-Trained Language Models with Transliteration-Based Post-Training Alignment</title>
       <author><first>Orgest</first><last>Xhelili</last></author>
       <author><first>Yihong</first><last>Liu</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <pages>11283-11296</pages>
       <url hash="5e931acd">2024.findings-emnlp.659</url>
       <bibkey>xhelili-etal-2024-breaking</bibkey>
@@ -28529,7 +28529,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <author><first>Junjie</first><last>Wang</last></author>
       <author><first>Yi</first><last>Liu</last></author>
       <author><first>Qing</first><last>Wang</last><affiliation>Institute of Software, Chinese Academy of Sciences</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>Nanyang Technological University</affiliation></author>
+      <author><first>Yang</first><last>Liu</last><affiliation>Nanyang Technological University</affiliation></author>
       <pages>11379-11390</pages>
       <abstract>Text-to-Image Diffusion Models (T2I DMs) have garnered significant attention for their ability to generate high-quality images from textual descriptions.However, these models often produce images that do not fully align with the input prompts, resulting in semantic inconsistencies.The most prominent issue among these semantic inconsistencies is catastrophic-neglect, where the images generated by T2I DMs miss key objects mentioned in the prompt.We first conduct an empirical study on this issue, exploring the prevalence of catastrophic-neglect, potential mitigation strategies with feature enhancement, and the insights gained.Guided by the empirical findings, we propose an automated repair approach named Patcher to address catastrophic-neglect in T2I DMs.Specifically, Patcher first determines whether there are any neglected objects in the prompt, and then applies attention-guided feature enhancement to these neglected objects, resulting in a repaired prompt.Experimental results on three versions of Stable Diffusion demonstrate that Patcher effectively repairs the issue of catastrophic-neglect, achieving 10.1%-16.3% higher Correct Rate in image generation compared to baselines.</abstract>
       <url hash="a837d167">2024.findings-emnlp.665</url>
@@ -28621,7 +28621,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <title>Consistent Document-level Relation Extraction via Counterfactuals</title>
       <author><first>Ali</first><last>Modarressi</last><affiliation>Center for Information and Language Processing, LMU Munich</affiliation></author>
       <author><first>Abdullatif</first><last>Köksal</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <pages>11501-11507</pages>
       <abstract>Many datasets have been developed to train and evaluate document-level relation extraction (RE) models. Most of these are constructed using real-world data. It has been shown that RE models trained on real-world data suffer from factual biases. To evaluate and address this issue, we present CovEReD, a counterfactual data generation approach for document-level relation extraction datasets using entity replacement. We first demonstrate that models trained on factual data exhibit inconsistent behavior: while they accurately extract triples from factual data, they fail to extract the same triples after counterfactual modification. This inconsistency suggests that models trained on factual data rely on spurious signals such as specific entities and external knowledge – rather than on context – to extract triples. We show that by generating document-level counterfactual data with CovEReD and training models on them, consistency is maintained with minimal impact on RE performance. We release our CovEReD pipeline as well as Re-DocRED-CF, a dataset of counterfactual RE documents, to assist in evaluating and addressing inconsistency in document-level RE.</abstract>
       <url hash="007dc20d">2024.findings-emnlp.672</url>
@@ -28640,7 +28640,7 @@ and high variation in performance on the subset, suggesting our plausibility cri
       <author><first>Yang</first><last>Yang</last></author>
       <author id="ning-liu-tsinghua"><first>Ning</first><last>Liu</last><affiliation>Shandong University</affiliation></author>
       <author><first>Jingfeng</first><last>Zhang</last><affiliation>RIKEN and University of Auckland</affiliation></author>
-      <author><first>Zhe</first><last>Wang</last><affiliation>Institute of Computing Technology, Chinese Academy of Sciences</affiliation></author>
+      <author id="daisy-zhe-wang"><first>Zhe</first><last>Wang</last><affiliation>Institute of Computing Technology, Chinese Academy of Sciences</affiliation></author>
       <pages>11508-11518</pages>
       <abstract>Compared to identifying binary versions of the same function under different compilation options, existing Learning-Based Binary Code Similarity Detection (LB-BCSD) methods exhibit lower accuracy in recognizing functions with the same functionality but different implementations. To address this issue, we introduces an adversarial attack method called FuncFooler, which focuses on perturbing critical code to generate multiple variants of the same function. These variants are then used to retrain the model to enhance its robustness. Current adversarial attacks against LB-BCSD mainly draw inspiration from the FGSM (Fast Gradient Sign Method) method in the image domain, which involves generating adversarial bytes and appending them to the end of the executable file. However, this approach has a significant drawback: the appended bytes do not affect the actual code of the executable file, thus failing to create diverse code variants. To overcome this limitation, we proposes a gradient-guided adversarial attack method based on critical code—FuncFooler. This method designs a series of strategies to perturb the code while preserving the program’s semantics. Specifically, we first utilizes gradient information to locate critical nodes in the control flow graph. Then, fine-grained perturbations are applied to these nodes, including control flow, data flow, and internal node perturbations, to obtain adversarial samples. The experimental results show that the application of the FuncFooler method can increase the accuracy of the latest LB-BCSD model by 5%-7%.</abstract>
       <url hash="4aa9ba33">2024.findings-emnlp.673</url>
@@ -28696,7 +28696,7 @@ hai-coaching/</abstract>
       <author><first>Hao</first><last>Zhou</last></author>
       <author><first>Cameron</first><last>Jen</last></author>
       <author><first>Kangjie</first><last>Zheng</last></author>
-      <author><first>Osmar</first><last>Zaiane</last><affiliation>University of Alberta</affiliation></author>
+      <author id="osmar-r-zaiane"><first>Osmar</first><last>Zaiane</last><affiliation>University of Alberta</affiliation></author>
       <author><first>Lili</first><last>Mou</last><affiliation>University of Alberta</affiliation></author>
       <pages>11572-11583</pages>
       <abstract>Length-control summarization aims to condense long texts into a short one within a certain length limit. Previous approaches often use autoregressive (AR) models and treat the length requirement as a soft constraint, which may not always be satisfied. In this study, we propose a novel length-control decoding algorithm based on the directed acyclic Transformer (DAT). Our approach allows for multiple plausible sequence fragments and predicts a <i>path</i> to connect them. In addition, we propose a Sequence Maximum a Posteriori (Seq-MAP) decoding algorithm that marginalizes different possible paths and finds the most probable summary satisfying the length budget. Our algorithm is based on beam search, which further facilitates a reranker for performance improvement. Experimental results on the Gigaword dataset demonstrate our state-of-the-art performance for length-control summarization.</abstract>
@@ -28719,7 +28719,7 @@ hai-coaching/</abstract>
     <paper id="679">
       <title>Not (yet) the whole story: Evaluating Visual Storytelling Requires More than Measuring Coherence, Grounding, and Repetition</title>
       <author><first>Aditya K</first><last>Surikuchi</last></author>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <author><first>Sandro</first><last>Pezzelle</last></author>
       <pages>11597-11611</pages>
       <abstract>Visual storytelling consists in generating a natural language story given a temporally ordered sequence of images. This task is not only challenging for models, but also very difficult to evaluate with automatic metrics since there is no consensus about what makes a story ‘good’. In this paper, we introduce a novel method that measures story quality in terms of human likeness regarding three key aspects highlighted in previous work: visual grounding, coherence, and repetitiveness. We then use this method to evaluate the stories generated by several models, showing that the foundation model LLaVA obtains the best result, but only slightly so compared to TAPM, a 50-times smaller visual storytelling model. Upgrading the visual and language components of TAPM results in a model that yields competitive performance with a relatively low number of parameters. Finally, we carry out a human evaluation study, whose results suggest that a ‘good’ story may require more than a human-like level of visual grounding, coherence, and repetition.</abstract>
@@ -28732,7 +28732,7 @@ hai-coaching/</abstract>
     <paper id="680">
       <title>Gender Identity in Pretrained Language Models: An Inclusive Approach to Data Creation and Probing</title>
       <author><first>Urban</first><last>Knupleš</last></author>
-      <author><first>Agnieszka</first><last>Falenska</last><affiliation>Interchange Forum for Reflecting on Intelligent Systems, University of Stuttgart</affiliation></author>
+      <author id="agnieszka-falenska"><first>Agnieszka</first><last>Falenska</last><affiliation>Interchange Forum for Reflecting on Intelligent Systems, University of Stuttgart</affiliation></author>
       <author><first>Filip</first><last>Miletić</last><affiliation>University of Stuttgart</affiliation></author>
       <pages>11612-11631</pages>
       <abstract>Pretrained language models (PLMs) have been shown to encode binary gender information of text authors, raising the risk of skewed representations and downstream harms. This effect is yet to be examined for transgender and non-binary identities, whose frequent marginalization may exacerbate harmful system behaviors. Addressing this gap, we first create TRANsCRIPT, a corpus of YouTube transcripts from transgender, cisgender, and non-binary speakers. Using this dataset, we probe various PLMs to assess if they encode the gender identity information, examining both frozen and fine-tuned representations as well as representations for inputs with author-specific words removed. Our findings reveal that PLM representations encode information for all gender identities but to different extents. The divergence is most pronounced for cis women and non-binary individuals, underscoring the critical need for gender-inclusive approaches to NLP systems.</abstract>
@@ -28779,7 +28779,7 @@ hai-coaching/</abstract>
       <author><first>Jisu</first><last>Shin</last><affiliation>Korea Advanced Institute of Science &amp; Technology and Korea Advanced Institute of Science &amp; Technology</affiliation></author>
       <author><first>Sukmin</first><last>Cho</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
       <author><first>SeungYoon</first><last>Han</last></author>
-      <author><first>Jong C.</first><last>Park</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
+      <author id="jong-c-park"><first>Jong C.</first><last>Park</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
       <pages>11670-11686</pages>
       <abstract>Trolling in online communities typically involves disruptive behaviors such as provoking anger and manipulating discussions, leading to a polarized atmosphere and emotional distress. Robust moderation is essential for mitigating these negative impacts and maintaining a healthy and constructive community atmosphere. However, effectively addressing trolls is difficult because their behaviors vary widely and require different response strategies (RSs) to counter them. This diversity makes it challenging to choose an appropriate RS for each specific situation.To address this challenge, our research investigates whether humans have preferred strategies tailored to different types of trolling behaviors.Our findings reveal a correlation between the types of trolling encountered and the preferred RS. In this paper, we introduce a methodology for generating counter-responses to trolls by recommending appropriate RSs, supported by a dataset aligning these strategies with human preferences across various troll contexts. The experimental results demonstrate that our proposed approach guides constructive discussion and reduces the negative effects of trolls, thereby enhancing the online community environment.</abstract>
       <url hash="e658c0db">2024.findings-emnlp.683</url>
@@ -28790,7 +28790,7 @@ hai-coaching/</abstract>
       <title>Soda-Eval: Open-Domain Dialogue Evaluation in the age of <fixed-case>LLM</fixed-case>s</title>
       <author><first>John</first><last>Mendonça</last><affiliation>Instituto Superior Técnico</affiliation></author>
       <author><first>Isabel</first><last>Trancoso</last><affiliation>Instituto Superior Técnico</affiliation></author>
-      <author><first>Alon</first><last>Lavie</last><affiliation>Phrase and School of Computer Science, Carnegie Mellon University</affiliation></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last><affiliation>Phrase and School of Computer Science, Carnegie Mellon University</affiliation></author>
       <pages>11687-11708</pages>
       <abstract>Although human evaluation remains the gold standard for open-domain dialogue evaluation, the growing popularity of automated evaluation using Large Language Models (LLMs) has also extended to dialogue. However, most frameworks leverage benchmarks that assess older chatbots on aspects such as fluency and relevance, which are not reflective of the challenges associated with contemporary models. In fact, a qualitative analysis on Soda. (Kim et al., 2023), a GPT-3.5 generated dialogue dataset, suggests that current chatbots may exhibit several recurring issues related to coherence and commonsense knowledge, but generally produce highly fluent and relevant responses.Noting the aforementioned limitations, this paper introduces Soda-Eval, an annotated dataset based on Soda that covers over 120K turn-level assessments across 10K dialogues, where the annotations were generated by GPT-4. Using Soda-Eval as a benchmark, we then study the performance of several open-access instruction-tuned LLMs, finding that dialogue evaluation remains challenging. Fine-tuning these models improves performance over few-shot inferences, both in terms of correlation and explanation.</abstract>
       <url hash="c237e109">2024.findings-emnlp.684</url>
@@ -28868,7 +28868,7 @@ hai-coaching/</abstract>
     <paper id="690">
       <title><fixed-case>CSLM</fixed-case>: A Framework for Question Answering Dataset Generation through Collaborative Small Language Models</title>
       <author><first>Yiming</first><last>Wang</last><affiliation>Noah’s Ark Lab, Huawei Technologies Ltd.</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>Huawei Technologies Ltd.</affiliation></author>
+      <author><first>Yang</first><last>Liu</last><affiliation>Huawei Technologies Ltd.</affiliation></author>
       <author><first>Lingchen</first><last>Wang</last><affiliation>Huawei Technologies Ltd.</affiliation></author>
       <author><first>An</first><last>Xiao</last><affiliation>Huawei Technologies Ltd.</affiliation></author>
       <pages>11816-11825</pages>
@@ -28907,7 +28907,7 @@ hai-coaching/</abstract>
       <author><first>Fuli</first><last>Feng</last><affiliation>University of Science and Technology of China</affiliation></author>
       <author><first>Fengbin</first><last>Zhu</last></author>
       <author><first>Qifan</first><last>Wang</last><affiliation>Meta AI</affiliation></author>
-      <author><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
       <pages>11858-11875</pages>
       <abstract>Self-detection for Large Language Models (LLMs) seeks to evaluate the trustworthiness of the LLM’s output by leveraging its own capabilities, thereby alleviating the issue of output hallucination. However, existing self-detection approaches only retrospectively evaluate answers generated by LLM, typically leading to the over-trust in incorrectly generated answers. To tackle this limitation, we propose a novel self-detection paradigm that considers the comprehensive answer space beyond LLM-generated answers. It thoroughly compares the trustworthiness of multiple candidate answers to mitigate the over-trust in LLM-generated incorrect answers. Building upon this paradigm, we introduce a two-step framework, which firstly instructs LLM to reflect and provide justifications for each candidate answer, and then aggregates the justifications for comprehensive target answer evaluation. This framework can be seamlessly integrated with existing approaches for superior self-detection. Extensive experiments on six datasets spanning three tasks demonstrate the effectiveness of the proposed framework.</abstract>
       <url hash="a04aab25">2024.findings-emnlp.693</url>
@@ -28969,7 +28969,7 @@ hai-coaching/</abstract>
       <author><first>Chengwei</first><last>Wei</last></author>
       <author><first>Zhengyuan</first><last>Liu</last><affiliation>I2R</affiliation></author>
       <author><first>Geyu</first><last>Lin</last><affiliation>Institute of Infocomm Research, A*STAR</affiliation></author>
-      <author><first>Nancy F.</first><last>Chen</last></author>
+      <author id="nancy-chen"><first>Nancy F.</first><last>Chen</last></author>
       <pages>11939-11950</pages>
       <abstract>As the rapidly advancing domain of natural language processing (NLP), large language models (LLMs) have emerged as powerful tools for interpreting human commands and generating text across various tasks. Nonetheless, the resilience of LLMs to handle text containing inherent errors, stemming from human interactions and collaborative systems, has not been thoroughly explored. Our study investigates the resilience of LLMs against five common types of disruptions including 1) ASR (Automatic Speech Recognition) errors, 2) OCR (Optical Character Recognition) errors, 3) grammatical mistakes, 4) typographical errors, and 5) distractive content. We aim to investigate how these models react by deliberately embedding these errors into instructions. Our findings reveal that while some LLMs show a degree of resistance to certain types of noise, their overall performance significantly suffers. This emphasizes the importance of further investigation into enhancing model resilience. In response to the observed decline in performance, our study also evaluates a “re-pass” strategy, designed to purify the instructions of noise before the LLMs process them. Our analysis indicates that correcting noisy instructions, particularly for open-source LLMs, presents significant challenges.</abstract>
       <url hash="583d6bd7">2024.findings-emnlp.697</url>
@@ -29076,7 +29076,7 @@ hai-coaching/</abstract>
     <paper id="705">
       <title>The Shape of Word Embeddings: Quantifying Non-Isometry with Topological Data Analysis</title>
       <author><first>Ondřej</first><last>Draganov</last></author>
-      <author><first>Steven</first><last>Skiena</last><affiliation>State University of New York - Stony Brook, Stony Brook University, SUNY at Stony Brook, , State University of New York at Stony Brook and State University of New York at Stony Brook</affiliation></author>
+      <author id="steven-skiena"><first>Steven</first><last>Skiena</last><affiliation>State University of New York - Stony Brook, Stony Brook University, SUNY at Stony Brook, , State University of New York at Stony Brook and State University of New York at Stony Brook</affiliation></author>
       <pages>12080-12099</pages>
       <abstract>Word embeddings represent language vocabularies as clouds of <tex-math>d</tex-math>-dimensional points. We investigate how information is conveyed by the general shape of these clouds, instead of representing the semantic meaning of each token. Specifically, we use the notion of persistent homology from topological data analysis (TDA) to measure the distances between language pairs from the shape of their unlabeled embeddings. These distances quantify the degree of non-isometry of the embeddings. To distinguish whether these differences are random training errors or capture real information about the languages, we use the computed distance matrices to construct language phylogenetic trees over 81 Indo-European languages. Careful evaluation shows that our reconstructed trees exhibit strong and statistically-significant similarities to the reference.</abstract>
       <url hash="1f382ab1">2024.findings-emnlp.705</url>
@@ -29176,7 +29176,7 @@ hai-coaching/</abstract>
       <author><first>Dilshod</first><last>Azizov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <author><first>Zain Muhammad</first><last>Mujahid</last></author>
       <author><first>Hilal</first><last>AlQuabeh</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <author><first>Shangsong</first><last>Liang</last><affiliation>SUN YAT-SEN UNIVERSITY</affiliation></author>
       <pages>12217-12231</pages>
       <abstract>In an era where information is quickly shared across many cultural and language contexts, the neutrality and integrity of news media are essential. Ensuring that media content remains unbiased and factual is crucial for maintaining public trust. With this in mind, we introduce SAFARI (CroSs-lingual BiAs and Factuality Detection in News MediA and News ARtIcles), a novel corpus of news media and articles for predicting political bias and the factuality of reporting in a multilingual and cross-lingual setup. To the best of our knowledge, this corpus is unprecedented in its collection and introduces a dataset for political bias and factuality for three tasks: (i) media-level, (ii) article-level, and (iii) joint modeling at the article-level. At the media and article levels, we evaluate the cross-lingual ability of the models; however, in joint modeling, we evaluate on English data. Our frameworks set a new benchmark in the cross-lingual evaluation of political bias and factuality. This is achieved through the use of various Multilingual Pre-trained Language Models (MPLMs) and Large Language Models (LLMs) coupled with ensemble learning methods.</abstract>
@@ -29243,7 +29243,7 @@ hai-coaching/</abstract>
       <author><first>Shehzaad</first><last>Dhuliawala</last><affiliation>Swiss Federal Institute of Technology</affiliation></author>
       <author><first>Yahang</first><last>Qi</last></author>
       <author><first>Bernhard</first><last>Schölkopf</last><affiliation>ELLIS Institute and Max Planck Institute for Intelligent Systems, Max-Planck Institute</affiliation></author>
-      <author><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
       <author><first>Mrinmaya</first><last>Sachan</last><affiliation>Swiss Federal Institute of Technology</affiliation></author>
       <pages>12309-12325</pages>
       <abstract>Implicit Personalization (IP) is a phenomenon of language models inferring a user’s background from the implicit cues in the input prompts and tailoring the response based on this inference. While previous work has touched upon various instances of this problem, there lacks a unified framework to study this behavior. This work systematically studies IP through a rigorous mathematical formulation, a multi-perspective moral reasoning framework, and a set of case studies. Our theoretical foundation for IP relies on a structural causal model and introduces a novel method, indirect intervention, to estimate the causal effect of a mediator variable that cannot be directly intervened upon. Beyond the technical approach, we also introduce a set of moral reasoning principles based on three schools of moral philosophy to study when IP may or may not be ethically appropriate. Equipped with both mathematical and ethical insights, we present three diverse case studies illustrating the varied nature of the IP problem and offer recommendations for future research.</abstract>
@@ -29257,7 +29257,7 @@ hai-coaching/</abstract>
       <title>When the Misidentified Adverbial Phrase Functions as a Complement</title>
       <author><first>Yige</first><last>Chen</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <author><first>Kyuwon</first><last>Kim</last><affiliation>Seoul National University</affiliation></author>
-      <author><first>KyungTae</first><last>Lim</last><affiliation>Seoul National University of Science and Technology</affiliation></author>
+      <author id="kyungtae-lim"><first>KyungTae</first><last>Lim</last><affiliation>Seoul National University of Science and Technology</affiliation></author>
       <author><first>Jungyeul</first><last>Park</last><affiliation>University of British Columbia</affiliation></author>
       <author><first>Chulwoo</first><last>Park</last><affiliation>Anyang University</affiliation></author>
       <pages>12326-12336</pages>
@@ -29333,7 +29333,7 @@ hai-coaching/</abstract>
       <author><first>William</first><last>Brandon</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <author><first>Radostin</first><last>Cholakov</last><affiliation>Computer Science and Artificial Intelligence Laboratory, Electrical Engineering &amp; Computer Science</affiliation></author>
       <author><first>Jonathan</first><last>Ragan-Kelley</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
-      <author><first>Eric P.</first><last>Xing</last><affiliation>Mohamed bin Zayed Univeristy of AI and School of Computer Science, Carnegie Mellon University</affiliation></author>
+      <author id="eric-xing"><first>Eric P.</first><last>Xing</last><affiliation>Mohamed bin Zayed Univeristy of AI and School of Computer Science, Carnegie Mellon University</affiliation></author>
       <author><first>Yoon</first><last>Kim</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <pages>12419-12433</pages>
       <abstract>The deployment of large language models (LLMs) is often constrained by memory bandwidth, where the primary bottleneck is the cost of transferring model parameters from the GPU’s global memory to its registers. When coupled with custom kernels that fuse the dequantization and matmul operations, weight-only quantization can thus enable faster inference by reducing the amount of memory movement. However, developing high-performance kernels for weight-quantized LLMs presents substantial challenges, especially when the weights are compressed to non-evenly-divisible bit widths (e.g., 3 bits) with non-uniform, lookup table (LUT) quantization. This paper describes FLUTE, a flexible lookup table engine for LUT-quantized LLMs, which uses offline restructuring of the quantized weight matrix to minimize bit manipulations associated with unpacking, and vectorization and duplication of the lookup table to mitigate shared memory bandwidth constraints. At batch sizes &lt; 32 and quantization group size of 128 (typical in LLM inference), the FLUTE kernel can be 2-4x faster than existing GEMM kernels. As an application of FLUTE, we explore a simple extension to lookup table-based NormalFloat quantization and apply it to quantize LLaMA3 to various configurations, obtaining competitive quantization performance against strong baselines while obtaining an end-to-end throughput increase of 1.5 to 2 times.</abstract>
@@ -29431,7 +29431,7 @@ hai-coaching/</abstract>
     <paper id="731">
       <title>Diverse and Effective Synthetic Data Generation for Adaptable Zero-Shot Dialogue State Tracking</title>
       <author><first>James D.</first><last>Finch</last><affiliation>Emory University</affiliation></author>
-      <author><first>Jinho D.</first><last>Choi</last><affiliation>Emory University</affiliation></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last><affiliation>Emory University</affiliation></author>
       <pages>12527-12544</pages>
       <abstract>We demonstrate substantial performance gains in zero-shot dialogue state tracking (DST) by enhancing training data diversity through synthetic data generation.Existing DST datasets are severely limited in the number of application domains and slot types they cover due to the high costs of data collection, restricting their adaptability to new domains.This work addresses this challenge with a novel, fully automatic data generation approach that creates synthetic zero-shot DST datasets.Distinguished from previous methods, our approach can generate dialogues across a massive range of application domains, complete with silver-standard dialogue state annotations and slot descriptions.This technique is used to create the D0T dataset for training zero-shot DST models, encompassing an unprecedented 1,000+ domains. Experiments on the MultiWOZ benchmark show that training models on diverse synthetic data improves Joint Goal Accuracy by 6.7%, achieving results competitive with models 13.5 times larger than ours.</abstract>
       <url hash="ce1fdbd7">2024.findings-emnlp.731</url>
@@ -29480,7 +29480,7 @@ hai-coaching/</abstract>
       <author><first>Ruobing</first><last>Xie</last></author>
       <author><first>Wenqi</first><last>Sun</last><affiliation>Renmin University of China</affiliation></author>
       <author><first>Leyu</first><last>Lin</last><affiliation>WeChat, Tencent</affiliation></author>
-      <author><first>Xin</first><last>Zhao</last><affiliation>Renmin University of China</affiliation></author>
+      <author id="wayne-xin-zhao"><first>Xin</first><last>Zhao</last><affiliation>Renmin University of China</affiliation></author>
       <author><first>Ji-Rong</first><last>Wen</last><affiliation>Renmin University of China</affiliation></author>
       <pages>12580-12592</pages>
       <abstract>With recommender systems broadly deployed in various online platforms, many efforts have been devoted to learning user preferences and building effective sequential recommenders. However, existing work mainly focuses on capturing user implicit preferences from historical interactions and simply matching them with the next behavior, instead of predicting user explicit intentions. This may lead to inappropriate recommendations. In light of this issue, we propose the adversarial user intention learning approach for sequential recommendaiton, named AuriSRec. The major novelty of our approach is to explicitly predict user current intentions when making recommendations, by inferring their decision-making process as explained in target reviews (reviews written after interacting with the ground-truth item). Specifically, AuriSRec conducts adversarial learning between an intention generator and a discriminator. The generator predicts user intentions by taking their historical reviews and behavioral sequences as inputs, while target reviews provide guidance. Beyond typical sequential modeling methods in the field of natural language process (NLP), a decoupling-based review encoder and a hybrid attention fusion mechanism are introduced to filter noise and enhance the generation capacity. On the other hand, the discriminator determines whether the intention is generated or real based on their matching degree to the target item, thereby guiding the generator to produce gradually improved intentions. Extensive experiments on five real-world datasets demonstrate the effectiveness of our approach.</abstract>
@@ -29588,7 +29588,7 @@ hai-coaching/</abstract>
       <author><first>David S.</first><last>Batista</last><affiliation>deepset</affiliation></author>
       <author><first>Christina</first><last>Wille</last></author>
       <author><first>Aoife</first><last>Cahill</last><affiliation>Dataminr</affiliation></author>
-      <author><first>Joel R.</first><last>Tetreault</last></author>
+      <author id="joel-tetreault"><first>Joel R.</first><last>Tetreault</last></author>
       <author><first>Alejandro</first><last>Jaimes</last><affiliation>Dataminr</affiliation></author>
       <pages>12705-12722</pages>
       <abstract>Humanitarian organizations can enhance their effectiveness by analyzing data to discover trends, gather aggregated insights, manage their security risks, support decision-making, and inform advocacy and funding proposals. However, data about violent incidents with direct impact and relevance for humanitarian aid operations is not readily available. An automatic data collection and NLP-backed classification framework aligned with humanitarian perspectives can help bridge this gap. In this paper, we present HumVI – a dataset comprising news articles in three languages (English, French, Arabic) containing instances of different types of violent incidents categorized by the humanitarian sector they impact, e.g., aid security, education, food security, health, and protection. Reliable labels were obtained for the dataset by partnering with a data-backed humanitarian organization, Insecurity Insight. We provide multiple benchmarks for the dataset, employing various deep learning architectures and techniques, including data augmentation and mask loss, to address different task-related challenges, e.g., domain expansion. The dataset is publicly available at https://github.com/dataminr-ai/humvi-dataset.</abstract>
@@ -29673,7 +29673,7 @@ hai-coaching/</abstract>
     </paper>
     <paper id="749">
       <title><fixed-case>SQFT</fixed-case>: Low-cost Model Adaptation in Low-precision Sparse Foundation Models</title>
-      <author><first>Juan Pablo</first><last>Munoz</last><affiliation>Intel</affiliation></author>
+      <author id="juan-pablo-munoz"><first>Juan Pablo</first><last>Munoz</last><affiliation>Intel</affiliation></author>
       <author><first>Jinjie</first><last>Yuan</last><affiliation>Intel</affiliation></author>
       <author><first>Nilesh</first><last>Jain</last><affiliation>Intel</affiliation></author>
       <pages>12817-12832</pages>
@@ -29751,7 +29751,7 @@ hai-coaching/</abstract>
       <title>Multi-Target Cross-Lingual Summarization: a novel task and a language-neutral approach</title>
       <author><first>Diogo</first><last>Pernes</last><affiliation>OutSystems and Universidade do Porto</affiliation></author>
       <author><first>Gonçalo M.</first><last>Correia</last><affiliation>Priberam Informática SA</affiliation></author>
-      <author><first>Afonso</first><last>Mendes</last></author>
+      <author id="alfonso-mendes"><first>Afonso</first><last>Mendes</last></author>
       <pages>12908-12924</pages>
       <abstract>Cross-lingual summarization aims to bridge language barriers by summarizing documents in different languages. However, ensuring semantic coherence across languages is an overlooked challenge and can be critical in several contexts. To fill this gap, we introduce multi-target cross-lingual summarization as the task of summarizing a document into multiple target languages while ensuring that the produced summaries are semantically similar. We propose a principled re-ranking approach to this problem and a multi-criteria evaluation protocol to assess semantic coherence across target languages, marking a first step that will hopefully stimulate further research on this problem.</abstract>
       <url hash="c61f0d16">2024.findings-emnlp.755</url>
@@ -29962,7 +29962,7 @@ hai-coaching/</abstract>
     </paper>
     <paper id="771">
       <title>What Matters in Memorizing and Recalling Facts? Multifaceted Benchmarks for Knowledge Probing in Language Models</title>
-      <author><first>Xin</first><last>Zhao</last></author>
+      <author id="wayne-xin-zhao"><first>Xin</first><last>Zhao</last></author>
       <author><first>Naoki</first><last>Yoshinaga</last><affiliation>Institute of Industrial Science, the University of Tokyo</affiliation></author>
       <author><first>Daisuke</first><last>Oba</last><affiliation>ELYZA</affiliation></author>
       <pages>13186-13214</pages>
@@ -30035,7 +30035,7 @@ hai-coaching/</abstract>
       <title>Exploring the Potential of Multimodal <fixed-case>LLM</fixed-case> with Knowledge-Intensive Multimodal <fixed-case>ASR</fixed-case></title>
       <author><first>Minghan</first><last>Wang</last><affiliation>Monash University</affiliation></author>
       <author><first>Yuxia</first><last>Wang</last></author>
-      <author><first>Thuy-Trang</first><last>Vu</last><affiliation>Monash University</affiliation></author>
+      <author id="thuy-vu"><first>Thuy-Trang</first><last>Vu</last><affiliation>Monash University</affiliation></author>
       <author><first>Ehsan</first><last>Shareghi</last><affiliation>Monash University and University of Cambridge</affiliation></author>
       <author><first>Reza</first><last>Haf</last><affiliation>Monash University</affiliation></author>
       <pages>13274-13288</pages>
@@ -30051,7 +30051,7 @@ hai-coaching/</abstract>
       <author><first>Sewoong</first><last>Oh</last></author>
       <author><first>Ludwig</first><last>Schmidt</last><affiliation>Stanford University</affiliation></author>
       <author><first>Jason E</first><last>Weston</last><affiliation>New York University and Facebook</affiliation></author>
-      <author><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington, Facebook and Meta</affiliation></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington, Facebook and Meta</affiliation></author>
       <author><first>Xian</first><last>Li</last><affiliation>Facebook AI</affiliation></author>
       <pages>13289-13308</pages>
       <abstract>We propose a new method, instruction back-and-forth translation, to improve the quality of instruction-tuning data used for aligning large language models (LLMs). Given preprocessed texts from an initial web corpus (e.g. Dolma (Soldaini et al., 2024)), we generate synthetic instructions using the backtranslation approach proposed by Li et al., (2023), filter the generated data and rewrite the responses to improve their quality further based on the initial texts. Given similar quantities of instructions, fine-tuning Llama-2 on our (synthetic instruction, rewritten response) pairs yields better AlpacaEval win rates than using other common instruction datasets such as Humpback, ShareGPT, Open Orca, Alpaca-GPT4 and Self-instruct, at both 7B and 70B parameter scales. We also demonstrate that rewriting the responses with an LLM is different from direct distillation: the former process yields better win rate at 70B scale, and the two text distributions exhibit significant distinction in the embedding space. Besides, we provide analyses showing that our backtranslated instructions are of higher quality than other sources of synthetic instructions, while our responses are more diverse and complex than what can be obtained from distillation. Overall we find that instruction back-and-forth translation combines the best of both worlds—making use of the information diversity and quantity found on the web, while ensuring the quality of the responses which is necessary for effective alignment.</abstract>
@@ -30065,7 +30065,7 @@ hai-coaching/</abstract>
       <author><first>Zhao</first><last>Jin</last></author>
       <author><first>Siddharth</first><last>Parekh</last></author>
       <author><first>Sameena</first><last>Shah</last><affiliation>J.P. Morgan Chase</affiliation></author>
-      <author><first>Carolyn</first><last>Rose</last><affiliation>School of Computer Science, Carnegie Mellon University</affiliation></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rose</last><affiliation>School of Computer Science, Carnegie Mellon University</affiliation></author>
       <pages>13309-13328</pages>
       <abstract>Forms constitute a large portion of layout-rich documents that convey information through key-value pairs. Form understanding involves two main tasks, namely, the identification of keys and values (a.k.a Key Information Extraction or KIE) and the association of keys to corresponding values (a.k.a. Relation Extraction or RE). State of the art models for form understanding often rely on training paradigms that yield poorly calibrated output probabilities and low performance on RE. In this paper, we present AliGATr, a graph-based model that uses a generative objective to represent complex grid-like layouts that are often found in forms. Using a grid-based graph topology, our model learns to generate the layout of each page token by token in a data efficient manner. Despite using 30% fewer parameters than the smallest SotA, AliGATr performs on par with or better than SotA models on the KIE and RE tasks against four datasets. We also show that AliGATr’s output probabilities are better calibrated and do not exhibit the over-confident distributions of other SotA models.</abstract>
       <url hash="109c62af">2024.findings-emnlp.778</url>
@@ -30110,7 +30110,7 @@ hai-coaching/</abstract>
       <author><first>Ajay</first><last>Patel</last></author>
       <author><first>Kanishk</first><last>Singh</last><affiliation>Moveworks</affiliation></author>
       <author><first>Chris</first><last>Callison-Burch</last><affiliation>Allen Institute for Artificial Intelligence and University of Pennsylvania</affiliation></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <author><first>Zhou</first><last>Yu</last><affiliation>Columbia University</affiliation></author>
       <pages>13376-13390</pages>
       <abstract>The goal of text style transfer is to transform the style of texts while preserving their original meaning, often with only a few examples of the target style. Existing style transfer methods generally rely on the few-shot capabilities of large language models or on complex controllable text generation approaches that are inefficient and underperform on fluency metrics. We introduce TinyStyler, a lightweight but effective approach, which leverages a small language model (800M params) and pre-trained authorship embeddings to perform efficient, few-shot text style transfer. We evaluate on the challenging task of authorship style transfer and find TinyStyler outperforms strong approaches such as GPT-4. We also evaluate TinyStyler’s ability to perform text attribute style transfer (formal <tex-math>\leftrightarrow</tex-math> informal) with automatic and human evaluations and find that the approach outperforms recent controllable text generation methods.</abstract>
@@ -30221,7 +30221,7 @@ hai-coaching/</abstract>
     <paper id="790">
       <title>Topic Modeling: Contextual Token Embeddings Are All You Need</title>
       <author><first>Dimo</first><last>Angelov</last><affiliation>University of Ottawa</affiliation></author>
-      <author><first>Diana</first><last>Inkpen</last><affiliation>University of Ottawa</affiliation></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last><affiliation>University of Ottawa</affiliation></author>
       <pages>13528-13539</pages>
       <abstract>The goal of topic modeling is to find meaningful topics that capture the information present in a collection of documents. The main challenges of topic modeling are finding the optimal number of topics, labeling the topics, segmenting documents by topic, and evaluating topic model performance. Current neural approaches have tackled some of these problems but none have been able to solve all of them. We introduce a novel topic modeling approach, Contextual-Top2Vec, which uses document contextual token embeddings, it creates hierarchical topics, finds topic spans within documents and labels topics with phrases rather than just words. We propose the use of BERTScore to evaluate topic coherence and to evaluate how informative topics are of the underlying documents. Our model outperforms the current state-of-the-art models on a comprehensive set of topic model evaluation metrics.</abstract>
       <url hash="d71ab623">2024.findings-emnlp.790</url>
@@ -30241,7 +30241,7 @@ hai-coaching/</abstract>
     </paper>
     <paper id="792">
       <title>Margin Matching Preference Optimization: Enhanced Model Alignment with Granular Feedback</title>
-      <author><first>Kyuyoung</first><last>Kim</last><affiliation>Korea Advanced Institute of Science &amp; Technology</affiliation></author>
+      <author id="kyuyoung-kim"><first>Kyuyoung</first><last>Kim</last><affiliation>Korea Advanced Institute of Science &amp; Technology</affiliation></author>
       <author><first>Ah Jeong</first><last>Seo</last><affiliation>Korea Advanced Institute of Science &amp; Technology</affiliation></author>
       <author><first>Hao</first><last>Liu</last><affiliation>Google DeepMind</affiliation></author>
       <author><first>Jinwoo</first><last>Shin</last><affiliation>Korea Advanced Institute of Science &amp; Technology</affiliation></author>
@@ -30323,7 +30323,7 @@ hai-coaching/</abstract>
     </paper>
     <paper id="798">
       <title>Reference-based Metrics Disprove Themselves in Question Generation</title>
-      <author><first>Bang</first><last>Nguyen</last><affiliation>University of Notre Dame</affiliation></author>
+      <author id="bang-nguyen"><first>Bang</first><last>Nguyen</last><affiliation>University of Notre Dame</affiliation></author>
       <author><first>Mengxia</first><last>Yu</last><affiliation>University of Notre Dame</affiliation></author>
       <author><first>Yun</first><last>Huang</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
       <author><first>Meng</first><last>Jiang</last><affiliation>University of Notre Dame</affiliation></author>
@@ -30461,7 +30461,7 @@ hai-coaching/</abstract>
     <paper id="809">
       <title><fixed-case>TOWER</fixed-case>: Tree Organized Weighting for Evaluating Complex Instructions</title>
       <author><first>Noah</first><last>Ziems</last></author>
-      <author id="zhihan-zhang"><first>Zhihan</first><last>Zhang</last></author>
+      <author><first>Zhihan</first><last>Zhang</last></author>
       <author><first>Meng</first><last>Jiang</last><affiliation>University of Notre Dame</affiliation></author>
       <pages>13803-13810</pages>
       <abstract>Evaluating the ability of large language models (LLMs) to follow complex human-written instructions is essential for their deployment in real-world applications. While benchmarks like Chatbot Arena use human judges to assess model performance, they are resource-intensive and time-consuming. Alternative methods using LLMs as judges, such as AlpacaEval, MT Bench, WildBench, and InFoBench offer improvements but still do not capture that certain complex instruction aspects are more important than others to follow.To address this gap, we propose a novel evaluation metric, TOWER, that incorporates human-judged importance into the assessment of complex instruction following. We show that human annotators agree with tree-based representations of these complex instructions nearly as much as they agree with other human annotators. We release tree-based annotations of the InFoBench dataset and the corresponding evaluation code to facilitate future research.</abstract>
@@ -30570,7 +30570,7 @@ hai-coaching/</abstract>
       <author><first>Xingyuan</first><last>Li</last></author>
       <author><first>Chunhao</first><last>Zhang</last></author>
       <author><first>Mengyue</first><last>Wu</last></author>
-      <author><first>Kenny Q.</first><last>Zhu</last><affiliation>University of Texas at Arlington</affiliation></author>
+      <author id="kenny-zhu"><first>Kenny Q.</first><last>Zhu</last><affiliation>University of Texas at Arlington</affiliation></author>
       <pages>13972-13983</pages>
       <abstract>This paper attempts to discover communication patterns automatically within dog vocalizations in a data-driven approach, which breaks the barrier previous approaches that rely on human prior knowledge on limited data. We present a self-supervised approach with HuBERT, enabling the accurate classification of phones, and an adaptive grammar induction method that identifies phone sequence patterns that suggest a preliminary vocabulary within dog vocalizations. Our results show that a subset of this vocabulary has substantial causality relations with certain canine activities, suggesting signs of stable semantics associated with these “words”.</abstract>
       <url hash="21f58019">2024.findings-emnlp.816</url>
@@ -30651,7 +30651,7 @@ hai-coaching/</abstract>
       <author><first>Jainit Sushil</first><last>Bafna</last></author>
       <author><first>Kunal</first><last>Kartik</last></author>
       <author><first>Harshita</first><last>Khandelwal</last><affiliation>UCLA Computer Science Department, University of California, Los Angeles</affiliation></author>
-      <author><first>Manish</first><last>Shrivastava</last><affiliation>International Institute of Information Technology Hyderabad, India</affiliation></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last><affiliation>International Institute of Information Technology Hyderabad, India</affiliation></author>
       <author><first>Vivek</first><last>Gupta</last><affiliation>University of Pennsylvania, United States</affiliation></author>
       <author><first>Mohit</first><last>Bansal</last><affiliation>University of North Carolina at Chapel Hill</affiliation></author>
       <author><first>Dan</first><last>Roth</last><affiliation>University of Pennsylvania</affiliation></author>
@@ -30705,7 +30705,7 @@ hai-coaching/</abstract>
       <author><first>Wen</first><last>Cui</last></author>
       <author><first>Davan</first><last>Harrison</last></author>
       <author><first>Xin Eric</first><last>Wang</last><affiliation>University of California, Santa Cruz</affiliation></author>
-      <author><first>Marilyn</first><last>Walker</last><affiliation>University of California, Santa Cruz</affiliation></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last><affiliation>University of California, Santa Cruz</affiliation></author>
       <pages>14120-14157</pages>
       <abstract>Large language models (LLMs) capable of casual conversation have recently become widely available. We hypothesize that users of conversational systems want a more personalized experience, and existing work shows that users are highly receptive to personalized questions (PQs). Question Generation tasks, however, focus on factual questions from textual excerpts. To create a PQ generator, we first identify over 400 real user interests by anonymously aggregating ~39K user models. We then populate prompt templates with these 400 interests and use an LLM to generate PQs customized to user interests. The result is PerQs, a novel corpus of ~19K question/answer pairs. We evaluate PerQs at scale in the unique context of the Alexa Prize. Our results show significant positive effects on perceived conversation quality. We then fine-tune, deploy, and evaluate PerQy, a neural model that generates PQs in real-time. When evaluated against several competitive LLM baselines, PerQy produced the most natural and engaging responses.</abstract>
       <url hash="0ffcc9ea">2024.findings-emnlp.826</url>
@@ -30769,7 +30769,7 @@ hai-coaching/</abstract>
       <author><first>Aditya</first><last>Pillai</last></author>
       <author><first>Isabelle</first><last>Augenstein</last><affiliation>University of Copenhagen</affiliation></author>
       <author><first>Iryna</first><last>Gurevych</last><affiliation>Institute for Computer Science, Artificial Intelligence and Technology, Mohamed bin Zayed University of Artificial Intelligence and Technische Universität Darmstadt</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <pages>14199-14230</pages>
       <abstract>The increased use of large language models (LLMs) across a variety of real-world applications calls for mechanisms to verify the factual accuracy of their outputs. In this work, we present Factcheck-Bench, a holistic end-to-end framework for annotating and evaluating the factuality of LLM-generated responses, which encompasses a multi-stage annotation scheme designed to yield detailed labels for fact-checking and correcting not just the final prediction, but also the intermediate steps that a fact-checking system might need to take. Based on this framework, we construct an open-domain factuality benchmark in three-levels of granularity: claim, sentence, and document. We further propose a system, Factcheck-GPT, which follows our framework, and we show that it outperforms several popular LLM fact-checkers. We make our annotation tool, annotated data, benchmark, and code available at https://github.com/yuxiaw/Factcheck-GPT.</abstract>
       <url hash="976a0d9a">2024.findings-emnlp.830</url>
@@ -30782,8 +30782,8 @@ hai-coaching/</abstract>
       <author><first>Md Asib</first><last>Rahman</last><affiliation>Bangladesh University of Engineering and Technology</affiliation></author>
       <author><first>K S M Tozammel</first><last>Hossain</last><affiliation>University of North Texas</affiliation></author>
       <author><first>Enamul</first><last>Hoque</last><affiliation>York University</affiliation></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>SalesForce.com and Nanyang Technological University</affiliation></author>
-      <author><first>Md Rizwan</first><last>Parvez</last><affiliation>Qatar Computing Research Institute and Bosch</affiliation></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>SalesForce.com and Nanyang Technological University</affiliation></author>
+      <author id="md-rizwan-parvez"><first>Md Rizwan</first><last>Parvez</last><affiliation>Qatar Computing Research Institute and Bosch</affiliation></author>
       <pages>14231-14244</pages>
       <abstract>Retrieval Augmented Generation (RAG) has been shown to enhance the factual accuracy of Large Language Models (LLMs) by providing external evidence, but existing methods often suffer from limited reasoning capabilities (e.g., multi-hop complexities) in effectively using such evidence, particularly when using open-source LLMs. To mitigate this gap, in this paper, we introduce a novel framework, **Open-RAG**, designed to enhance reasoning capabilities in RAG with open-source LLMs. Our framework transforms an arbitrary dense LLM into a parameter-efficient sparse mixture of experts (MoE) model capable of handling complex reasoning tasks, including both single- and multi-hop queries. Open-RAG uniquely trains the model to navigate challenging distractors that appear relevant but are misleading. By combining the constructive learning and architectural transformation, Open-RAG leverages latent learning, dynamically selecting relevant experts and integrating external knowledge effectively for more accurate and contextually relevant responses. Additionally, we propose a hybrid adaptive retrieval method to determine retrieval necessity and balance the trade-off between performance gain and inference speed. Experimental results show that Open-RAG outperforms state-of-the-art LLMs and RAG models in various knowledge-intensive tasks. Our method based on Llama2-7B sets new benchmarks, surpassing ChatGPT-RAG and Self-RAG. For example, in multi-hop HotpotQA, it achieves an EM score of 63.3, compared to RAG 2.0’s 54 and Command R+’s 60.</abstract>
       <url hash="712b1906">2024.findings-emnlp.831</url>
@@ -30793,7 +30793,7 @@ hai-coaching/</abstract>
     <paper id="832">
       <title>Cactus: Towards Psychological Counseling Conversations using Cognitive Behavioral Theory</title>
       <author><first>Suyeon</first><last>Lee</last><affiliation>Yonsei University</affiliation></author>
-      <author><first>Sunghwan</first><last>Kim</last></author>
+      <author id="sunghwan-mac-kim"><first>Sunghwan</first><last>Kim</last></author>
       <author><first>Minju</first><last>Kim</last></author>
       <author><first>Dongjin</first><last>Kang</last><affiliation>Yonsei University</affiliation></author>
       <author><first>Dongil</first><last>Yang</last></author>
@@ -30847,7 +30847,7 @@ hai-coaching/</abstract>
       <author><first>Svitlana</first><last>Vakulenko</last><affiliation>Amazon</affiliation></author>
       <author><first>Ionut Teodor</first><last>Sorodoc</last><affiliation>Amazon</affiliation></author>
       <author id="bill-byrne"><first>Bill</first><last>Byrne</last><affiliation>Amazon and University of Cambridge</affiliation></author>
-      <author><first>Adrià</first><last>de Gispert</last><affiliation>Amazon</affiliation></author>
+      <author id="adria-de-gispert"><first>Adrià</first><last>de Gispert</last><affiliation>Amazon</affiliation></author>
       <pages>14301-14310</pages>
       <abstract>Long-form question answering (LFQA) aims at generating in-depth answers to end-user questions, providing relevant information beyond the direct answer. However, existing retrievers are typically optimized towards information that directly targets the question, missing out on such contextual information. Furthermore, there is a lack of training data for relevant context. To this end, we propose and compare different weak supervision techniques to optimize retrieval for contextual information. Experiments demonstrate improvements on the end-to-end QA performance on ASQA, a dataset for long-form question answering. Importantly, as more contextual information is retrieved, we improve the relevant page recall for LFQA by 14.7% and the groundedness of generated long-form answers by 12.5%. Finally, we show that long-form answers often anticipate likely follow-up questions, via experiments on a conversational QA dataset.</abstract>
       <url hash="2ebe94bf">2024.findings-emnlp.835</url>
@@ -30857,7 +30857,7 @@ hai-coaching/</abstract>
     <paper id="836">
       <title>Persuasiveness of Generated Free-Text Rationales in Subjective Decisions: A Case Study on Pairwise Argument Ranking</title>
       <author><first>Mohamed</first><last>Elaraby</last></author>
-      <author><first>Diane</first><last>Litman</last><affiliation>University of Pittsburgh, University of Pittsburgh and University of Pittsburgh</affiliation></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last><affiliation>University of Pittsburgh, University of Pittsburgh and University of Pittsburgh</affiliation></author>
       <author><first>Xiang Lorraine</first><last>Li</last></author>
       <author><first>Ahmed</first><last>Magooda</last><affiliation>Microsoft</affiliation></author>
       <pages>14311-14329</pages>
@@ -30918,7 +30918,7 @@ hai-coaching/</abstract>
       <title>How You Prompt Matters! <fixed-case>E</fixed-case>ven Task-Oriented Constraints in Instructions Affect <fixed-case>LLM</fixed-case>-Generated Text Detection</title>
       <author><first>Ryuto</first><last>Koike</last><affiliation>University of Pennsylvania and Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <author><first>Masahiro</first><last>Kaneko</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and Tokyo Institute of Technology, Tokyo Institute of Technology</affiliation></author>
-      <author><first>Naoaki</first><last>Okazaki</last><affiliation>Tokyo Institute of Technology</affiliation></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last><affiliation>Tokyo Institute of Technology</affiliation></author>
       <pages>14384-14395</pages>
       <abstract>To combat the misuse of Large Language Models (LLMs), many recent studies have presented LLM-generated-text detectors with promising performance. When users instruct LLMs to generate texts, the instruction can include different constraints depending on the user’s need. However, most recent studies do not cover such diverse instruction patterns when creating datasets for LLM detection. In this paper, we reveal that even task-oriented constraints — constraints that would naturally be included in an instruction and are not related to detection-evasion — cause existing powerful detectors to have a large variance in detection performance. We focus on student essay writing as a realistic domain and manually create task-oriented constraints based on several factors for essay quality. Our experiments show that the standard deviation (SD) of current detector performance on texts generated by an instruction with such a constraint is significantly larger (up to an SD of 14.4 F1-score) than that by generating texts multiple times or paraphrasing the instruction. We also observe an overall trend where the constraints can make LLM detection more challenging than without them. Finally, our analysis indicates that the high instruction-following ability of LLMs fosters the large impact of such constraints on detection performance.</abstract>
       <url hash="c5c8f68e">2024.findings-emnlp.841</url>
@@ -30933,7 +30933,7 @@ hai-coaching/</abstract>
       <author><first>Siyao</first><last>Peng</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <author><first>Robert</first><last>Litschko</last></author>
       <author><first>Anna</first><last>Korhonen</last><affiliation>University of Cambridge</affiliation></author>
-      <author><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München and IT University of Copenhagen</affiliation></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München and IT University of Copenhagen</affiliation></author>
       <pages>14396-14419</pages>
       <abstract>Human label variation (HLV) is a valuable source of information that arises when multiple human annotators provide different labels for valid reasons. In Natural Language Inference (NLI) earlier approaches to capturing HLV involve either collecting annotations from many crowd workers to represent human judgment distribution (HJD) or use expert linguists to provide detailed explanations for their chosen labels. While the former method provides denser HJD information, obtaining it is resource-intensive. In contrast, the latter offers richer textual information but it is challenging to scale up to many human judges. Besides, large language models (LLMs) are increasingly used as evaluators (“LLM judges”) but with mixed results, and few works aim to study HJDs. This study proposes to exploit LLMs to approximate HJDs using a small number of expert labels and explanations. Our experiments show that a few explanations significantly improve LLMs’ ability to approximate HJDs with and without explicit labels, thereby providing a solution to scale up annotations for HJD. However, fine-tuning smaller soft-label aware models with the LLM-generated model judgment distributions (MJDs) presents partially inconsistent results: while similar in distance, their resulting fine-tuned models and visualized distributions differ substantially. We show the importance of complementing instance-level distance measures with a global-level shape metric and visualization to more effectively evaluate MJDs against human judgment distributions.</abstract>
       <url hash="59591bd8">2024.findings-emnlp.842</url>
@@ -31010,7 +31010,7 @@ hai-coaching/</abstract>
       <author><first>Silvia</first><last>Casola</last></author>
       <author><first>Soda Marem</first><last>Lo</last></author>
       <author><first>Valerio</first><last>Basile</last><affiliation>University of Turin</affiliation></author>
-      <author><first>Alessandro</first><last>Mazzei</last><affiliation>University of Turin</affiliation></author>
+      <author id="alessandro-mazzei"><first>Alessandro</first><last>Mazzei</last><affiliation>University of Turin</affiliation></author>
       <pages>14480-14494</pages>
       <abstract>Generating ironic content is challenging: it requires a nuanced understanding of context and implicit references and balancing seriousness and playfulness. Moreover, irony is highly subjective and can depend on various factors, such as social, cultural, or generational aspects. This paper explores whether Large Language Models (LLMs) can learn to generate ironic responses to social media posts. To do so, we fine-tune two models to generate ironic and non-ironic content and deeply analyze their outputs’ linguistic characteristics, their connection to the original post, and their similarity to the human-written replies. We also conduct a large-scale human evaluation of the outputs. Additionally, we investigate whether LLMs can learn a form of irony tied to a generational perspective, with mixed results.</abstract>
       <url hash="078b70de">2024.findings-emnlp.847</url>
@@ -31034,7 +31034,7 @@ hai-coaching/</abstract>
     <paper id="849">
       <title>Minimal Yet Big Impact: How <fixed-case>AI</fixed-case> Agent Back-channeling Enhances Conversational Engagement through Conversation Persistence and Context Richness</title>
       <author><first>Jin Yea</first><last>Jang</last><affiliation>Korea Electronics Technology Institute</affiliation></author>
-      <author><first>Saim</first><last>Shin</last><affiliation>Korea Electronics technology Institute</affiliation></author>
+      <author id="saim-shin"><first>Saim</first><last>Shin</last><affiliation>Korea Electronics technology Institute</affiliation></author>
       <author><first>Gahgene</first><last>Gweon</last><affiliation>Seoul National University</affiliation></author>
       <pages>14509-14521</pages>
       <abstract>The increasing use of AI agents in conversational services, such as counseling, highlights the importance of back-channeling (BC) as an active listening strategy to enhance conversational engagement. BC improves conversational engagement by providing timely acknowledgments and encouraging the speaker to talk. This study investigates the effect of BC provided by an AI agent on conversational engagement, offering insights for future AI conversational service design. We conducted an experiment with 55 participants, divided into Todak_BC and Todak_NoBC groups based on the presence or absence of the BC feature in Todak, a conversational agent. Each participant engaged in nine sessions with predetermined subjects and questions. We collected and analyzed approximately 6 hours and 30 minutes of conversation logs to evaluate conversational engagement using both quantitative (conversation persistence, including conversation duration and number of utterances) and qualitative metrics (context richness, including self-disclosure and topic diversity). The findings reveal significantly higher conversational engagement in the Todak_BC group compared to the Todak_NoBC group across all metrics (p&lt;0.05). Additionally, the impact of BC varies across sessions, suggesting that conversation characteristics such as question type and topic sensitivity can influence BC effectiveness.</abstract>
@@ -31109,7 +31109,7 @@ hai-coaching/</abstract>
       <author><first>Shruti</first><last>Singh</last><affiliation>IIT Gandhinagar</affiliation></author>
       <author><first>Shoaib</first><last>Alam</last></author>
       <author><first>Husain</first><last>Malwat</last></author>
-      <author id="mayank-singh"><first>Mayank</first><last>Singh</last><affiliation>Indian Institute of Technology Gandhinagar</affiliation></author>
+      <author><first>Mayank</first><last>Singh</last><affiliation>Indian Institute of Technology Gandhinagar</affiliation></author>
       <pages>14598-14613</pages>
       <abstract>The ever-increasing volume of paper submissions makes it difficult to stay informed about the latest state-of-the-art research. To address this challenge, we introduce LEGOBench, a benchmark for evaluating systems that generate scientific leaderboards. LEGOBench is curated from 22 years of preprint submission data on arXiv and more than 11k machine learning leaderboards on the PapersWithCode portal. We present a language model-based and four graph-based leaderboard generation task configuration. We evaluate popular encoder-only scientific language models as well as decoder-only large language models across these task configurations. State-of-the-art models showcase significant performance gaps in automatic leaderboard generation on LEGOBench. The code is available on GitHub and the dataset is hosted on OSF.</abstract>
       <url hash="486ae871">2024.findings-emnlp.855</url>
@@ -31135,9 +31135,9 @@ hai-coaching/</abstract>
       <author><first>Zhenlin</first><last>Su</last><affiliation>South China University of Technology</affiliation></author>
       <author><first>Mo</first><last>Yu</last><affiliation>WeChat AI, Tencent</affiliation></author>
       <author><first>Jin</first><last>Xu</last></author>
-      <author><first>Jinho D.</first><last>Choi</last><affiliation>Emory University</affiliation></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last><affiliation>Emory University</affiliation></author>
       <author><first>Jie</first><last>Zhou</last></author>
-      <author id="fei-liu"><first>Fei</first><last>Liu</last><affiliation>Emory University</affiliation></author>
+      <author><first>Fei</first><last>Liu</last><affiliation>Emory University</affiliation></author>
       <pages>14626-14641</pages>
       <abstract>Factual inconsistencies pose a significant hurdle for the faithful summarization by generative models. While a major direction to enhance inconsistency detection is to derive stronger Natural Language Inference (NLI) models, we propose an orthogonal aspect that underscores the importance of incorporating task-specific taxonomy into the inference. To this end, we consolidate key error types of inconsistent facts in summaries, and incorporate them to facilitate both the zero-shot and supervised paradigms of LLMs. Extensive experiments on ten datasets of five distinct domains suggest that, zero-shot LLM inference could benefit from the explicit solution space depicted by the error type taxonomy, and achieves state-of-the-art performance overall, surpassing specialized non-LLM baselines, as well as recent LLM baselines. We further distill models that fuse the taxonomy into parameters through our designed prompt completions and supervised training strategies, efficiently substituting state-of-the-art zero-shot inference with much larger LLMs.</abstract>
       <url hash="85d2c87d">2024.findings-emnlp.857</url>
@@ -31186,7 +31186,7 @@ hai-coaching/</abstract>
       <author><first>Muhammad Arslan</first><last>Manzoor</last></author>
       <author><first>Yuxia</first><last>Wang</last></author>
       <author><first>Minghan</first><last>Wang</last><affiliation>Monash University</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <pages>14683-14701</pages>
       <abstract>Empathy plays a pivotal role in fostering prosocial behavior, often triggered by the sharing of personal experiences through narratives. However, modeling empathy using NLP approaches remains challenging due to its deep interconnection with human interaction dynamics. Previous approaches, which involve fine-tuning language models (LMs) on human-annotated empathic datasets, have had limited success. In our pursuit of improving empathy understanding in LMs, we propose several strategies, including contrastive learning with masked LMs and supervised fine-tuning with large language models. While these methods show improvements over previous methods, the overall results remain unsatisfactory. To better understand this trend, we performed an analysis which reveals a low agreement among annotators. This lack of consensus hinders training and highlights the subjective nature of the task. We also explore the cultural impact on annotations. To study this, we meticulously collected story pairs in Urdu language and find that subjectivity in interpreting empathy among annotators appears to be independent of cultural background. Our systematic exploration of LMs’ understanding of empathy reveals substantial opportunities for further investigation in both task formulation and modeling.</abstract>
       <url hash="08810bf3">2024.findings-emnlp.861</url>
@@ -31198,7 +31198,7 @@ hai-coaching/</abstract>
       <author><first>Witold</first><last>Sosnowski</last></author>
       <author><first>Arkadiusz</first><last>Modzelewski</last></author>
       <author><first>Kinga</first><last>Skorupska</last><affiliation>Polish-Japanese Institute of Information Technology in Warsaw</affiliation></author>
-      <author><first>Jahna</first><last>Otterbacher</last><affiliation>Open University of Cyprus</affiliation></author>
+      <author id="jahna-otterbacher"><first>Jahna</first><last>Otterbacher</last><affiliation>Open University of Cyprus</affiliation></author>
       <author><first>Adam</first><last>Wierzbicki</last><affiliation>Polish-Japanese Institute of Information Technology in Warsaw</affiliation></author>
       <pages>14702-14723</pages>
       <abstract>As narratives shape public opinion and influence societal actions, distinguishing between truthful and misleading narratives has become a significant challenge. To address this, we introduce the EU DisinfoTest, a novel benchmark designed to evaluate the efficacy of Language Models in identifying disinformation narratives. Developed through a Human-in-the-Loop methodology and grounded in research from EU DisinfoLab, the EU DisinfoTest comprises more than 1,300 narratives. Our benchmark includes persuasive elements under Logos, Pathos, and Ethos rhetorical dimensions. We assessed state-of-the-art LLMs, including the newly released GPT-4o, on their capability to perform zero-shot classification of disinformation narratives versus credible narratives. Our findings reveal that LLMs tend to regard narratives with authoritative appeals as trustworthy, while those with emotional appeals are frequently incorrectly classified as disinformative. These findings highlight the challenges LLMs face in nuanced content interpretation and suggest the need for tailored adjustments in LLM training to better handle diverse narrative structures.</abstract>
@@ -31285,7 +31285,7 @@ hai-coaching/</abstract>
     </paper>
     <paper id="869">
       <title>A Critical Look at Meta-evaluating Summarisation Evaluation Metrics</title>
-      <author><first>Xiang</first><last>Dai</last><affiliation>CSIRO</affiliation></author>
+      <author id="xiang-dai"><first>Xiang</first><last>Dai</last><affiliation>CSIRO</affiliation></author>
       <author><first>Sarvnaz</first><last>Karimi</last><affiliation>CSIRO</affiliation></author>
       <author><first>Biaoyan</first><last>Fang</last><affiliation>CSIRO</affiliation></author>
       <pages>14795-14808</pages>
@@ -31340,7 +31340,7 @@ hai-coaching/</abstract>
       <author><first>Jie</first><last>Chen</last><affiliation>Renmin University of China</affiliation></author>
       <author><first>Yupeng</first><last>Zhang</last><affiliation>Beijing Baichuan Intelligence Technology Co., Ltd.</affiliation></author>
       <author><first>Bingning</first><last>Wang</last><affiliation>Baichuan Inc.</affiliation></author>
-      <author><first>Xin</first><last>Zhao</last><affiliation>Renmin University of China</affiliation></author>
+      <author id="wayne-xin-zhao"><first>Xin</first><last>Zhao</last><affiliation>Renmin University of China</affiliation></author>
       <author><first>Ji-Rong</first><last>Wen</last><affiliation>Renmin University of China</affiliation></author>
       <author><first>Weipeng</first><last>Chen</last></author>
       <pages>14855-14865</pages>
@@ -31381,7 +31381,7 @@ hai-coaching/</abstract>
       <title>Analyzing Context Contributions in <fixed-case>LLM</fixed-case>-based Machine Translation</title>
       <author><first>Emmanouil</first><last>Zaranis</last><affiliation>Instituto Superior Técnico</affiliation></author>
       <author><first>Nuno M</first><last>Guerreiro</last><affiliation>Unbabel and Instituto Superior Técnico</affiliation></author>
-      <author><first>Andre</first><last>Martins</last><affiliation>Instituto Superior Técnico and Unbabel</affiliation></author>
+      <author id="andre-f-t-martins"><first>Andre</first><last>Martins</last><affiliation>Instituto Superior Técnico and Unbabel</affiliation></author>
       <pages>14899-14924</pages>
       <abstract>Large language models (LLMs) have achieved state-of-the-art performance in machine translation (MT) and demonstrated the ability to leverage in-context learning through few-shot examples. However, the mechanisms by which LLMs use different parts of the input context remain largely unexplored. In this work, we provide a comprehensive analysis of context utilization in MT, studying how LLMs use various context parts, such as few-shot examples and the source text, when generating translations. We highlight several key findings: (1) the source part of few-shot examples appears to contribute more than its corresponding targets, irrespective of translation direction; (2) finetuning LLMs with parallel data alters the contribution patterns of different context parts; and (3) there is a positional bias where earlier few-shot examples have higher contributions to the translated sequence. Finally, we demonstrate that inspecting anomalous context contributions can potentially uncover pathological translations, such as hallucinations. Our findings shed light on the internal workings of LLM-based MT which go beyond those known for standard encoder-decoder MT models.</abstract>
       <url hash="a193da9c">2024.findings-emnlp.876</url>
@@ -31592,7 +31592,7 @@ hai-coaching/</abstract>
       <author><first>Helena</first><last>Wu</last><affiliation>Faculty of Arts of the University of Lisbon</affiliation></author>
       <author><first>Beatriz</first><last>Silva</last><affiliation>Unbabel</affiliation></author>
       <author><first>Daan Van</first><last>Stigt</last><affiliation>Unbabel and Unbabel</affiliation></author>
-      <author><first>Andre</first><last>Martins</last><affiliation>Instituto Superior Técnico and Unbabel</affiliation></author>
+      <author id="andre-f-t-martins"><first>Andre</first><last>Martins</last><affiliation>Instituto Superior Técnico and Unbabel</affiliation></author>
       <pages>15222-15239</pages>
       <abstract>While machine translation (MT) systems are achieving increasingly strong performance on benchmarks, they often produce translations with errors and anomalies. Understanding these errors can potentially help improve the translation quality and user experience. This paper introduces xTower, an open large language model (LLM) built on top of TowerBase designed to provide free-text explanations for translation errors in order to guide the generation of a corrected translation. The quality of the generated explanations by xTower are assessed via both intrinsic and extrinsic evaluation. We ask expert translators to evaluate the quality of the explanations across two dimensions: relatedness towards the error span being explained and helpfulness in error understanding and improving translation quality. Extrinsically, we test xTower across various experimental setups in generating translation corrections, demonstrating significant improvements in translation quality. Our findings highlight xTower’s potential towards not only producing plausible and helpful explanations of automatic translations, but also leveraging them to suggest corrected translations.</abstract>
       <url hash="57262beb">2024.findings-emnlp.892</url>
@@ -31607,7 +31607,7 @@ hai-coaching/</abstract>
       <author><first>Yicheng</first><last>Xu</last><affiliation>Tokyo Institute of Technology, Tokyo Institute of Technology</affiliation></author>
       <author><first>Mingkun</first><last>Xu</last></author>
       <author><first>Kotaro</first><last>Funakoshi</last><affiliation>Institute of Innovative Research, Tokyo Institute of Technology</affiliation></author>
-      <author><first>Manabu</first><last>Okumura</last><affiliation>Tokyo Institute of Technology, Tokyo Institute of Technology</affiliation></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last><affiliation>Tokyo Institute of Technology, Tokyo Institute of Technology</affiliation></author>
       <pages>15240-15253</pages>
       <abstract>Multi-modal machine translation (MMT) can reduce ambiguity and semantic distortion compared with traditional machine translation (MT) by utilizing auxiliary information such as images. However, current MMT methods face two primary challenges. The first is their underperformance compared to MT methods based on pre-trained models. The second is the inadequate exploitation and integration of the image modality within the model, primarily due to a lack of triplet training data. A mainstream approach is to introduce large amounts of parallel and monolingual data to train the text model and the visual model separately. However, incorporating extensive external data can result in data imbalance, which may introduce biases during training. Additionally, the collection and cleaning of such large datasets is labor-intensive. To overcome these challenges, we introduce a novel, low-cost, large language model-based data augmentation method called LAMBDA, which can enrich the original samples and expand the dataset without requiring external images and text. We propose a fine-grained image captioning module with a noise filter to hierarchically and accurately extract unexploited information from images. Additionally, we design two specific prompts to guide the GPT-3.5 model in generating enriched texts and the corresponding translations. The enriched samples contain diverse text and strong connections between text and images, leading to significant improvements for MMT baselines, with the highest being an increase of up to 3.83 BLEU score and 3.61 METEOR score.</abstract>
       <url hash="bb0710c6">2024.findings-emnlp.893</url>
@@ -31636,7 +31636,7 @@ hai-coaching/</abstract>
       <author><first>Jonathan</first><last>Rowe</last><affiliation>North Carolina State University</affiliation></author>
       <author><first>Bradford</first><last>Mott</last><affiliation>North Carolina State University</affiliation></author>
       <author><first>Snigdha</first><last>Chaturvedi</last><affiliation>Department of Computer Science, University of North Carolina at Chapel Hill</affiliation></author>
-      <author><first>James</first><last>Lester</last><affiliation>North Carolina State University</affiliation></author>
+      <author id="james-lester"><first>James</first><last>Lester</last><affiliation>North Carolina State University</affiliation></author>
       <pages>15270-15283</pages>
       <abstract>Dialogue act recognition is the task of classifying conversational utterances based on their communicative intent or function. To address this problem, we propose a novel two-phase processing approach called Dual-Process Masking. This approach streamlines the task by masking less important tokens in the input, identified through retrospective analysis of their estimated contribution during training. It enhances interpretability by using the masks applied during classification learning. Dual-Process Masking significantly improves performance over strong baselines for dialogue act recognition on a collaborative problem-solving dataset and three public dialogue benchmarks.</abstract>
       <url hash="b153b172">2024.findings-emnlp.895</url>
@@ -31653,7 +31653,7 @@ hai-coaching/</abstract>
       <author><first>Valentina</first><last>Zantedeschi</last><affiliation>ServiceNow Research</affiliation></author>
       <author><first>David</first><last>Vazquez</last><affiliation>ServiceNow research</affiliation></author>
       <author><first>Nicolas</first><last>Chapados</last><affiliation>ServiceNow Research</affiliation></author>
-      <author><first>Christopher</first><last>Pal</last><affiliation>Polytechnique Montreal</affiliation></author>
+      <author id="christopher-pal"><first>Christopher</first><last>Pal</last><affiliation>Polytechnique Montreal</affiliation></author>
       <author><first>Perouz</first><last>Taslakian</last><affiliation>ServiceNow</affiliation></author>
       <pages>15284-15302</pages>
       <abstract>Prompts are often employed to condition decoder-only language model generation on reference information. Just-in-time processing of a context is inefficient due to the quadratic cost of self-attention operations, and caching is desirable. However, caching transformer states can easily require almost as much space as the model parameters. When the right context is not known in advance, caching the prompt can be challenging. This work addresses these limitations by introducing models that, inspired by the encoder-decoder architecture, use cross-attention to condition generation on reference text without the prompt. More precisely, we leverage pre-trained decoder-only models and only train a small number of added layers. We use Question-Answering (QA) as a testbed to evaluate the ability of our models to perform conditional generation and observe that they outperform prompt-based inference methods, are comparable to fine-tuned prompted LLMs, and drastically reduce the space footprint relative to standard KV caching by two orders of magnitude. Specifically, we introduced XC-Llama which converts a pre-trained Llama 2 into an encoder-decoder architecture by integrating cross-attention layers interleaved in between existing self-attention layers.</abstract>
@@ -31781,7 +31781,7 @@ hai-coaching/</abstract>
     <paper id="906">
       <title>Knowledge-Centric Templatic Views of Documents</title>
       <author><first>Isabel Alyssa</first><last>Cachola</last><affiliation>Department of Computer Science, Whiting School of Engineering</affiliation></author>
-      <author><first>Silviu</first><last>Cucerzan</last><affiliation>Microsoft</affiliation></author>
+      <author id="silviu-cucerzan"><first>Silviu</first><last>Cucerzan</last><affiliation>Microsoft</affiliation></author>
       <author><first>Allen</first><last>Herring</last></author>
       <author><first>Vuksan</first><last>Mijovic</last></author>
       <author><first>Erik</first><last>Oveson</last></author>
@@ -31838,7 +31838,7 @@ hai-coaching/</abstract>
       <author><first>Xinshu</first><last>Shen</last></author>
       <author><first>Hongyi</first><last>Wu</last></author>
       <author><first>Yadong</first><last>Zhang</last></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <author><first>Xiaopeng</first><last>Bai</last><affiliation>East China Normal University</affiliation></author>
       <author><first>Shaoguang</first><last>Mao</last><affiliation>Microsoft</affiliation></author>
       <author><first>Yuanbin</first><last>Wu</last></author>
@@ -31900,7 +31900,7 @@ hai-coaching/</abstract>
     <paper id="915">
       <title>Merge to Learn: Efficiently Adding Skills to Language Models with Model Merging</title>
       <author><first>Jacob</first><last>Morrison</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
-      <author><first>Noah A.</first><last>Smith</last><affiliation>University of Washington and Allen Institute for Artificial Intelligence</affiliation></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last><affiliation>University of Washington and Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last><affiliation>University of Washington, University of Washington, Allen Institute for Artificial Intelligence and University of Washington, Seattle</affiliation></author>
       <author><first>Pang Wei</first><last>Koh</last><affiliation>University of Washington</affiliation></author>
       <author><first>Jesse</first><last>Dodge</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
@@ -32042,7 +32042,7 @@ hai-coaching/</abstract>
       <author><first>Tianyang</first><last>Liu</last><affiliation>Edinburgh University, University of Edinburgh</affiliation></author>
       <author><first>Tianyi</first><last>Li</last></author>
       <author><first>Liang</first><last>Cheng</last></author>
-      <author><first>Mark</first><last>Steedman</last><affiliation>University of Edinburgh</affiliation></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last><affiliation>University of Edinburgh</affiliation></author>
       <pages>15779-15786</pages>
       <abstract>Large Language Models (LLMs) are reported to hold undesirable attestation bias on inference tasks: when asked to predict if a premise <tex-math>P</tex-math> entails a hypothesis <tex-math>H</tex-math>, instead of considering <tex-math>H</tex-math>‘s conditional truthfulness entailed by <tex-math>P</tex-math>, LLMs tend to use the out-of-context truth label of <tex-math>H</tex-math> as a fragile proxy. In this paper, we propose a pipeline that exploits this bias to do explicit inductive inference. Our pipeline uses an LLM to transform a premise into a set of attested alternatives, and then aggregate answers of the derived new entailment inquiries to support the original inference prediction. On a directional predicate entailment benchmark, we demonstrate that by applying this simple pipeline, we can improve the overall performance of LLMs on inference and substantially alleviate the impact of their attestation bias.</abstract>
       <url hash="b4b9b06d">2024.findings-emnlp.926</url>
@@ -32229,7 +32229,7 @@ hai-coaching/</abstract>
       <author><first>Vaishnav</first><last>Tadiparthi</last><affiliation>Honda Research Institution US</affiliation></author>
       <author><first>Ehsan</first><last>Moradi Pari</last><affiliation>Honda Research Institute</affiliation></author>
       <author><first>Simon</first><last>Stepputtis</last><affiliation>Carnegie Mellon University</affiliation></author>
-      <author><first>Joseph</first><last>Campbell</last><affiliation>Purdue University</affiliation></author>
+      <author id="joseph-p-campbell"><first>Joseph</first><last>Campbell</last><affiliation>Purdue University</affiliation></author>
       <author><first>Katia P.</first><last>Sycara</last></author>
       <pages>16002-16014</pages>
       <abstract>The correct specification of reward models is a well-known challenge in reinforcement learning.Hand-crafted reward functions often lead to inefficient or suboptimal policies and may not be aligned with user values.Reinforcement learning from human feedback is a successful technique that can mitigate such issues, however, the collection of human feedback can be laborious.Recent works have solicited feedback from pre-trained large language models rather than humans to reduce or eliminate human effort, however, these approaches yield poor performance in the presence of hallucination and other errors.This paper studies the advantages and limitations of reinforcement learning from large language model feedback and proposes a simple yet effective method for soliciting and applying feedback as a potential-based shaping function.We theoretically show that inconsistent rankings – which approximate ranking errors – lead to uninformative rewards with our approach. Our method empirically improves convergence speed and policy returns over commonly used baselines even with significant ranking errors, and eliminates the need for complex post-processing of reward functions.</abstract>
@@ -32280,7 +32280,7 @@ hai-coaching/</abstract>
     <paper id="943">
       <title><fixed-case>BLASER</fixed-case> 2.0: a metric for evaluation and quality estimation of massively multilingual speech and text translation</title>
       <author><first>David</first><last>Dale</last><affiliation>FAIR at Meta</affiliation></author>
-      <author><first>Marta R.</first><last>Costa-jussà</last><affiliation>Meta</affiliation></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last><affiliation>Meta</affiliation></author>
       <pages>16075-16085</pages>
       <abstract>We present BLASER 2.0, an automatic metric of machine translation quality which supports both speech and text modalities. Compared to its predecessor BLASER (Chen et al., 2023), BLASER 2.0 is based on better underlying text and speech representations that cover 202 text languages and 57 speech ones and extends the training data. BLASER 2.0 comes in two varieties: a reference-based and a reference-free (quality estimation) model. We demonstrate that the reference-free version is applicable not only at the dataset level, for evaluating the overall model performance, but also at the sentence level, for scoring individual translations. In particular, we show its applicability for detecting translation hallucinations and filtering training datasets to obtain more reliable translation models. The BLASER 2.0 models are publicly available at https://github.com/facebookresearch/sonar.</abstract>
       <url hash="1ca84a54">2024.findings-emnlp.943</url>
@@ -32338,7 +32338,7 @@ hai-coaching/</abstract>
     </paper>
     <paper id="948">
       <title>Structured Chain-of-Thought Prompting for Few-Shot Generation of Content-Grounded <fixed-case>QA</fixed-case> Conversations</title>
-      <author><first>Md Arafat</first><last>Sultan</last><affiliation>International Business Machines</affiliation></author>
+      <author id="md-arafat-sultan"><first>Md Arafat</first><last>Sultan</last><affiliation>International Business Machines</affiliation></author>
       <author><first>Jatin</first><last>Ganhotra</last><affiliation>International Business Machines</affiliation></author>
       <author><first>Ramón Fernandez</first><last>Astudillo</last><affiliation>International Business Machines</affiliation></author>
       <pages>16172-16187</pages>
@@ -32398,7 +32398,7 @@ hai-coaching/</abstract>
       <author><first>Kowsik Nandagopan</first><last>D</last></author>
       <author><first>Hritik</first><last>Ladia</last></author>
       <author><first>Ankit</first><last>Yadav</last></author>
-      <author id="mayank-singh"><first>Mayank</first><last>Singh</last><affiliation>Indian Institute of Technology Gandhinagar</affiliation></author>
+      <author><first>Mayank</first><last>Singh</last><affiliation>Indian Institute of Technology Gandhinagar</affiliation></author>
       <pages>16239-16348</pages>
       <abstract>Large Language Models (LLMs) are increasingly ubiquitous, yet their ability to retain and reason about temporal information remains limited, hindering their application in real-world scenarios where understanding the sequential nature of events is crucial. Our study experiments with 12 state-of-the-art models (ranging from 2B to 70B+ parameters) on a novel numerical-temporal dataset, TempUN, spanning from 10,000 BCE to 2100 CE, to uncover significant temporal retention and comprehension limitations. We propose six metrics to assess three learning paradigms to enhance temporal knowledge acquisition. Our findings reveal that open-source models exhibit knowledge gaps more frequently, suggesting a trade-off between limited knowledge and incorrect responses. Additionally, various fine-tuning approaches significantly improved performance, reducing incorrect outputs and impacting the identification of ‘information not available’ in the generations. The associated dataset and code are available at the [URL](https://anonymous.4open.science/r/TempUN-ARR/).</abstract>
       <url hash="14eb5a84">2024.findings-emnlp.953</url>
@@ -32446,7 +32446,7 @@ hai-coaching/</abstract>
       <title>Inference and Verbalization Functions During In-Context Learning</title>
       <author><first>Junyi</first><last>Tao</last></author>
       <author><first>Xiaoyin</first><last>Chen</last><affiliation>Mila - Quebec Artificial Intelligence Institute</affiliation></author>
-      <author><first>Nelson F.</first><last>Liu</last><affiliation>Stanford University</affiliation></author>
+      <author id="nelson-f-liu"><first>Nelson F.</first><last>Liu</last><affiliation>Stanford University</affiliation></author>
       <pages>16394-16421</pages>
       <abstract>Large language models (LMs) are capable of in-context learning from a few demonstrations (example-label pairs) to solve new tasks during inference. Despite the intuitive importance of high-quality demonstrations, previous work has observed that, in some settings, ICL performance is minimally affected by irrelevant labels (Min et al., 2022). We hypothesize that LMs perform ICL with irrelevant labels via two sequential processes: an inference function that solves the task, followed by a verbalization function that maps the inferred answer to the label space. Importantly, we hypothesize that the inference function is invariant to remappings of the label space (e.g., “true”/“false” to “cat”/“dog”), enabling LMs to share the same inference function across settings with different label words. We empirically validate this hypothesis with controlled layer-wise interchange intervention experiments. Our findings confirm the hypotheses on multiple datasets and tasks (natural language inference, sentiment analysis, and topic classification) and further suggest that the two functions can be localized in specific layers across various open-sourced models, including GEMMA-7B, MISTRAL-7B-V0.3, GEMMA-2-27B, and LLAMA-3.1-70B.</abstract>
       <url hash="2689384e">2024.findings-emnlp.957</url>
@@ -32562,10 +32562,10 @@ hai-coaching/</abstract>
       <author><first>Yilun</first><last>Zhao</last><affiliation>Yale University</affiliation></author>
       <author><first>Semih</first><last>Yavuz</last><affiliation>SalesForce.com</affiliation></author>
       <author><first>Ye</first><last>Liu</last><affiliation>SalesForce.com</affiliation></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>SalesForce.com and Nanyang Technological University</affiliation></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>SalesForce.com and Nanyang Technological University</affiliation></author>
       <author><first>Yingbo</first><last>Zhou</last><affiliation>Salesforce Research</affiliation></author>
       <author><first>Caiming</first><last>Xiong</last><affiliation>Salesforce Research</affiliation></author>
-      <author><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
       <author><first>Rex</first><last>Ying</last><affiliation>Yale University</affiliation></author>
       <author><first>Arman</first><last>Cohan</last><affiliation>Yale University and Allen Institute for Artificial Intelligence</affiliation></author>
       <pages>16553-16565</pages>
@@ -32687,7 +32687,7 @@ hai-coaching/</abstract>
     <paper id="975">
       <title><fixed-case>T</fixed-case>ransfer<fixed-case>CVLM</fixed-case>: Transferring Cross-Modal Knowledge for Vision-Language Modeling</title>
       <author><first>Dongha</first><last>Choi</last><affiliation>Gwangju Institute of Science and Technology</affiliation></author>
-      <author><first>Jung-jae</first><last>Kim</last><affiliation>A*STAR</affiliation></author>
+      <author id="jung-jae-kim"><first>Jung-jae</first><last>Kim</last><affiliation>A*STAR</affiliation></author>
       <author><first>Hyunju</first><last>Lee</last><affiliation>Gwangju Institute of Science and Technology</affiliation></author>
       <pages>16733-16746</pages>
       <abstract>Recent large vision-language multimodal models pre-trained with huge amount of image-text pairs show remarkable performances in downstream tasks. However, the multimodal pre-training has limitations in terms of resources and training time when it comes to obtaining new models that surpass existing models. To overcome these issues, we propose TransferCVLM, a method of efficient knowledge transfer that integrates pre-trained uni-modal models (and cross-modal fusion-encoder) into a combined vision-language model (CVLM), without pre-training the CVLM with large amount of multimodal data, and then for each task application, fine-tunes the CVLM and transfers the multimodal knowledge of a teacher vision-language model to the CVLM by using knowledge distillation techniques. We demonstrate that 1) the fine-tuned CVLM performs comparable to other vision-language models of similar size, that 2) the multimodal knowledge transfer consistently enhances the CVLM, and the knowledge-transferred CVLM composed of large-size unimodal models outperforms the teacher multimodal model in most of downstream tasks, and that 3) TransferCVLM can also be used for model compression when using small-size unimodal models. We estimate that the training of TransferCVLM takes only 6% of pre-training of other vision-language models. Our code is available at https://github.com/DMCB-GIST/TransferCVLM.</abstract>
@@ -32718,7 +32718,7 @@ hai-coaching/</abstract>
       <author><first>Yew Ken</first><last>Chia</last></author>
       <author><first>Guizhen</first><last>Chen</last></author>
       <author><first>Weiwen</first><last>Xu</last><affiliation>Alibaba Group</affiliation></author>
-      <author><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Soujanya</first><last>Poria</last><affiliation>Singapore University of Technology and Design</affiliation></author>
       <author><first>Lidong</first><last>Bing</last><affiliation>Alibaba Group</affiliation></author>
       <pages>16763-16780</pages>
@@ -32746,7 +32746,7 @@ hai-coaching/</abstract>
       <author><first>Ali</first><last>Zare</last><affiliation>Columbia University</affiliation></author>
       <author><first>Shiyuan</first><last>Huang</last><affiliation>Columbia University</affiliation></author>
       <author><first>Ming-Hsuan</first><last>Yang</last><affiliation>Google and University of California at Merced</affiliation></author>
-      <author><first>Shih-Fu</first><last>Chang</last><affiliation>Columbia, Columbia University, Columbia University, Columbia University, Columbia University, Columbia University and Columbia University</affiliation></author>
+      <author id="shih-fu-chang"><first>Shih-Fu</first><last>Chang</last><affiliation>Columbia, Columbia University, Columbia University, Columbia University, Columbia University, Columbia University and Columbia University</affiliation></author>
       <author id="li-zhang-google"><first>Li</first><last>Zhang</last><affiliation>Google</affiliation></author>
       <pages>16806-16820</pages>
       <abstract>Generating personalized responses, particularly in the context of video, poses a unique challenge for language models. This paper introduces the novel task of <b>Personalized Video Comment Generation</b> (PVCG), aiming to predict user comments tailored to both the input video and the user’s comment history, where the user is unseen during the model training process. Unlike existing video captioning tasks that ignores the personalization in the text generation process, we introduce PerVidCom, a new dataset specifically collected for this novel task with diverse personalized comments from YouTube. Recognizing the limitations of existing captioning metrics for evaluating this task, we propose a new automatic metric based on Large Language Models (LLMs) with few-shot in-context learning, named FICL-Score, specifically measuring quality from the aspects of emotion, language style and content relevance. We verify the proposed metric with human evaluations. We establish baselines using prominent Multimodal LLMs (MLLMs), analyze their performance discrepancies through extensive evaluation, and identifies directions for future improvement on this important task. Our research opens up a new direction of personalizing MLLMs and paves the way for future research.</abstract>
@@ -32875,7 +32875,7 @@ hai-coaching/</abstract>
       <author><first>Fahad Shahbaz</first><last>Khan</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and Linköping University</affiliation></author>
       <author><first>Rao Muhammad</first><last>Anwer</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <author><first>Salman</first><last>Khan</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and Australian National University</affiliation></author>
-      <author><first>Timothy</first><last>Baldwin</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and The University of Melbourne</affiliation></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and The University of Melbourne</affiliation></author>
       <author><first>Hisham</first><last>Cholakkal</last><affiliation>MBZUAI</affiliation></author>
       <pages>16984-17002</pages>
       <abstract>In this paper, we introduce BiMediX, the first bilingual medical mixture of experts LLM designed for seamless interaction in both English and Arabic. Our model facilitates a wide range of medical interactions in English and Arabic, including multi-turn chats to inquire about additional details such as patient symptoms and medical history, multiple-choice question answering, and open-ended question answering. We propose a semi-automated English-to-Arabic translation pipeline with human refinement to ensure high-quality translations. We also introduce a comprehensive evaluation benchmark for Arabic medical LLMs. Furthermore, we introduce BiMed1.3M, an extensive Arabic-English bilingual instruction set that covers 1.3 Million diverse medical interactions, including 200k synthesized multi-turn doctor-patient chats, in a 1:2 Arabic-to-English ratio. Our model outperforms state-of-the-art Med42 and Meditron by average absolute gains of 2.5% and 4.1%, respectively, computed across multiple medical evaluation benchmarks in English, while operating at 8-times faster inference. Moreover, our BiMediX outperforms the generic Arabic-English bilingual LLM, Jais-30B, by average absolute gains of 10% on our Arabic and 15% on our bilingual evaluations across multiple datasets. Additionally, BiMediX exceeds the accuracy of GPT4 by 4.4% in open-ended question UPHILL evaluation and largely outperforms state-of-the-art open source medical LLMs in human evaluations of multi-turn conversations. Our trained models, instruction set, and source code are available at https://github.com/mbzuai-oryx/BiMediX.</abstract>
@@ -32967,7 +32967,7 @@ hai-coaching/</abstract>
       <title><fixed-case>P</fixed-case>ython<fixed-case>S</fixed-case>aga: Redefining the Benchmark to Evaluate Code Generating <fixed-case>LLM</fixed-case>s</title>
       <author><first>Ankit</first><last>Yadav</last></author>
       <author><first>Himanshu</first><last>Beniwal</last><affiliation>Indian Institute of Technology Gandhinagar</affiliation></author>
-      <author id="mayank-singh"><first>Mayank</first><last>Singh</last><affiliation>Indian Institute of Technology Gandhinagar</affiliation></author>
+      <author><first>Mayank</first><last>Singh</last><affiliation>Indian Institute of Technology Gandhinagar</affiliation></author>
       <pages>17113-17126</pages>
       <abstract>Driven by the surge in code generation using large language models (LLMs), numerous benchmarks have emerged to evaluate these LLMs capabilities. We conducted a large-scale human evaluation of *HumanEval* and *MBPP*, two popular benchmarks for Python code generation, analyzing their diversity and difficulty. Our findings unveil a critical bias towards a limited set of programming concepts, neglecting most of the other concepts entirely. Furthermore, we uncover a worrying prevalence of easy tasks that can inflate model performance estimations. To address these limitations, we propose a novel benchmark, *PythonSaga*, featuring 185 hand-crafted prompts in a balanced representation of 38 programming concepts across diverse difficulty levels. The robustness of our benchmark is demonstrated by the poor performance of existing Code-LLMs. The code and data set are openly available to the NLP community at this [URL](https://github.com/PythonSaga/PythonSaga).</abstract>
       <url hash="fa7dcf18">2024.findings-emnlp.996</url>
@@ -33060,7 +33060,7 @@ hai-coaching/</abstract>
       <author><first>Robert</first><last>Litschko</last></author>
       <author><first>Diego</first><last>Frassinelli</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <author><first>Benjamin</first><last>Roth</last><affiliation>Universität Vienna</affiliation></author>
-      <author><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München and IT University of Copenhagen</affiliation></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München and IT University of Copenhagen</affiliation></author>
       <pages>17203-17217</pages>
       <abstract>One of the major aspects contributing to the striking performance of large language models (LLMs) is the vast amount of factual knowledge accumulated during pre-training. Yet, many LLMs suffer from self-inconsistency, which raises doubts about their trustworthiness and reliability. This paper focuses on entity type ambiguity, analyzing the proficiency and consistency of state-of-the-art LLMs in applying factual knowledge when prompted with ambiguous entities. To do so, we propose an evaluation protocol that disentangles knowing from applying knowledge, and test state-of-the-art LLMs on 49 ambiguous entities. Our experiments reveal that LLMs struggle with choosing the correct entity reading, achieving an average accuracy of only 85%, and as low as 75% with underspecified prompts. The results also reveal systematic discrepancies in LLM behavior, showing that while the models may possess knowledge, they struggle to apply it consistently, exhibit biases toward preferred readings, and display self-inconsistencies. This highlights the need to address entity ambiguity in the future for more trustworthy LLMs.</abstract>
       <url hash="05bc32c5">2024.findings-emnlp.1003</url>
diff --git a/data/xml/2024.finnlp.xml b/data/xml/2024.finnlp.xml
index d8db111801..7bc2b2512f 100644
--- a/data/xml/2024.finnlp.xml
+++ b/data/xml/2024.finnlp.xml
@@ -9,7 +9,7 @@
       <editor><first>Armineh</first><last>Nourbakhsh</last></editor>
       <editor><first>Zhiqiang</first><last>Ma</last></editor>
       <editor><first>Charese</first><last>Smiley</last></editor>
-      <editor><first>Veronique</first><last>Hoste</last></editor>
+      <editor id="veronique-hoste"><first>Veronique</first><last>Hoste</last></editor>
       <editor><first>Sanjiv Ranjan</first><last>Das</last></editor>
       <editor><first>Manling</first><last>Li</last></editor>
       <editor><first>Mohammad</first><last>Ghassemi</last></editor>
@@ -303,7 +303,7 @@
       <author><first>Anubhav</first><last>Sarkar</last></author>
       <author><first>Swagata</first><last>Chakraborty</last></author>
       <author><first>Sohom</first><last>Ghosh</last></author>
-      <author><first>Sudip Kumar</first><last>Naskar</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last></author>
       <pages>244–247</pages>
       <abstract>Investors and other stakeholders like consumers and employees, increasingly consider ESG factors when making decisions about investments or engaging with companies. Taking into account the importance of ESG today, FinNLP-KDF introduced the <i>ML-ESG-3</i> shared task, which seeks to determine the duration of the impact of financial news articles in four languages - English, French, Korean, and Japanese. This paper describes our team, LIPI’s approach towards solving the above-mentioned task. Our final systems consist of translation, paraphrasing and fine-tuning language models like BERT, Fin-BERT and RoBERTa for classification. We ranked first in the impact duration prediction subtask for French language.</abstract>
       <url hash="9266c72f">2024.finnlp-1.25</url>
@@ -360,7 +360,7 @@
       <title>Adapting <fixed-case>LLM</fixed-case> to Multi-lingual <fixed-case>ESG</fixed-case> Impact and Length Prediction Using In-context Learning and Fine-Tuning with Rationale</title>
       <author><first>Pawan Kumar</first><last>Rajpoot</last></author>
       <author><first>Ashvini</first><last>Jindal</last></author>
-      <author><first>Ankur</first><last>Parikh</last></author>
+      <author id="ankur-parikh"><first>Ankur</first><last>Parikh</last></author>
       <pages>274–278</pages>
       <abstract>The prediction of Environmental, Social, and Governance (ESG) impact and duration (length) of impact from company events, as reported in news articles, hold immense significance for investors, policymakers, and various stakeholders. In this paper, we describe solutions from our team “Upaya” to ESG impact and length prediction tasks on one such dataset ML-ESG-3. ML-ESG-3 dataset was released along with shared task as a part of the Fifth Workshop on Knowledge Discovery from Unstructured Data in Financial Services, co-located with LREC-COLING 2024. We employed two different paradigms to adapt Large Language Models (LLMs) to predict both the ESG impact and length of events. In the first approach, we leverage GPT-4 within the In-context learning (ICL) framework. A learning-free dense retriever identifies top K-relevant In-context learning examples from the training data for a given test example. The second approach involves instruction-tuning Mistral (7B) LLM to predict impact and duration, supplemented with rationale generated using GPT-4. Our models secured second place in French tasks and achieved reasonable results (fifth and ninth rank) in English tasks. These results demonstrate the potential of different LLM-based paradigms for delivering valuable insights within the ESG investing landscape.</abstract>
       <url hash="8c356f58">2024.finnlp-1.30</url>
@@ -523,7 +523,7 @@
       <title>Capturing Analysts’ Questioning Strategies in Earnings Calls via a Question Cornering Score (<fixed-case>QCS</fixed-case>)</title>
       <author><first>Giulia</first><last>D’Agostino</last></author>
       <author><first>Andrea</first><last>Rocci</last></author>
-      <author><first>Chris</first><last>Reed</last></author>
+      <author id="chris-reed"><first>Chris</first><last>Reed</last></author>
       <pages>107–118</pages>
       <url hash="96c2c33c">2024.finnlp-2.10</url>
       <bibkey>dagostino-etal-2024-capturing</bibkey>
@@ -610,7 +610,7 @@
       <title>Upaya at the <fixed-case>F</fixed-case>in<fixed-case>LLM</fixed-case> Challenge Task 1 and 2: <fixed-case>D</fixed-case>ist<fixed-case>F</fixed-case>in: Distillation based Fine-Tuning for Financial Tasks</title>
       <author><first>Ashvini Kumar</first><last>Jindal</last></author>
       <author><first>Pawan Kumar</first><last>Rajpoot</last></author>
-      <author><first>Ankur</first><last>Parikh</last></author>
+      <author id="ankur-parikh"><first>Ankur</first><last>Parikh</last></author>
       <pages>159–164</pages>
       <url hash="28a0e4ac">2024.finnlp-2.17</url>
       <bibkey>jindal-etal-2024-upaya</bibkey>
diff --git a/data/xml/2024.futured.xml b/data/xml/2024.futured.xml
index 0b34ef54b0..d35847edcf 100644
--- a/data/xml/2024.futured.xml
+++ b/data/xml/2024.futured.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2024-11-05" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Workshop on the Future of Event Detection (FuturED)</booktitle>
-      <editor><first>Joel</first><last>Tetreault</last></editor>
+      <editor id="joel-tetreault"><first>Joel</first><last>Tetreault</last></editor>
       <editor><first>Thien Huu</first><last>Nguyen</last></editor>
       <editor><first>Hemank</first><last>Lamba</last></editor>
       <editor><first>Amanda</first><last>Hughes</last></editor>
@@ -57,13 +57,13 @@
     </paper>
     <paper id="4">
       <title><fixed-case>MUMOSA</fixed-case>, Interactive Dashboard for <fixed-case>MU</fixed-case>lti-<fixed-case>MO</fixed-case>dal Situation Awareness</title>
-      <author><first>Stephanie M.</first><last>Lukin</last><affiliation>U.S. Army Research Laboratory</affiliation></author>
+      <author id="stephanie-lukin"><first>Stephanie M.</first><last>Lukin</last><affiliation>U.S. Army Research Laboratory</affiliation></author>
       <author><first>Shawn</first><last>Bowser</last><affiliation>U.S. Army Research Laboratory</affiliation></author>
       <author><first>Reece</first><last>Suchocki</last><affiliation>University of Colorado Boulder</affiliation></author>
       <author><first>Douglas</first><last>Summers-Stay</last><affiliation>U.S. Army Research Laboratory</affiliation></author>
       <author><first>Francis</first><last>Ferraro</last><affiliation>University of Maryland, Baltimore County</affiliation></author>
       <author><first>Cynthia</first><last>Matuszek</last><affiliation>UMBC</affiliation></author>
-      <author><first>Clare</first><last>Voss</last><affiliation>Army Research Laboratory</affiliation></author>
+      <author id="clare-voss"><first>Clare</first><last>Voss</last><affiliation>Army Research Laboratory</affiliation></author>
       <pages>32-47</pages>
       <abstract>enter abstract here</abstract>
       <url hash="6eec9198">2024.futured-1.4</url>
@@ -98,7 +98,7 @@
       <author><first>Helene</first><last>Olsen</last><affiliation>University of Oslo</affiliation></author>
       <author><first>Huiling</first><last>You</last><affiliation>University of Oslo</affiliation></author>
       <author><first>Samia</first><last>Touileb</last><affiliation>University of Bergen</affiliation></author>
-      <author><first>Lilja</first><last>Øvrelid</last><affiliation>Dept of Informatics, University of Oslo</affiliation></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last><affiliation>Dept of Informatics, University of Oslo</affiliation></author>
       <author><first>Erik</first><last>Velldal</last><affiliation>University of Oslo</affiliation></author>
       <pages>73-86</pages>
       <abstract>enter abstract here</abstract>
diff --git a/data/xml/2024.games.xml b/data/xml/2024.games.xml
index 8ab55e3922..8f90afe143 100644
--- a/data/xml/2024.games.xml
+++ b/data/xml/2024.games.xml
@@ -6,8 +6,8 @@
       <editor><first>Chris</first><last>Madge</last></editor>
       <editor><first>Jon</first><last>Chamberlain</last></editor>
       <editor><first>Karen</first><last>Fort</last></editor>
-      <editor><first>Udo</first><last>Kruschwitz</last></editor>
-      <editor><first>Stephanie</first><last>Lukin</last></editor>
+      <editor id="udo-kruschwitz"><first>Udo</first><last>Kruschwitz</last></editor>
+      <editor id="stephanie-lukin"><first>Stephanie</first><last>Lukin</last></editor>
       <publisher>ELRA and ICCL</publisher>
       <address>Torino, Italia</address>
       <month>May</month>
@@ -30,7 +30,7 @@
     </paper>
     <paper id="2">
       <title>Empowering Adaptive Digital Game-Based Language Learning for Under-Resourced Languages Through Text Analysis</title>
-      <author><first>Elaine</first><last>Uí Dhonnchadha</last></author>
+      <author id="elaine-ui-dhonnchadha"><first>Elaine</first><last>Uí Dhonnchadha</last></author>
       <author><first>Sally</first><last>Bruen</last></author>
       <author><first>Liang</first><last>Xu</last></author>
       <author><first>Monica</first><last>Ward</last></author>
@@ -45,7 +45,7 @@
       <author><first>Bertrand</first><last>Remy</last></author>
       <author><first>Bruno</first><last>Guillaume</last></author>
       <author><first>Olivier</first><last>Ferret</last></author>
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <author><first>Karen</first><last>Fort</last></author>
       <pages>14–20</pages>
       <abstract>This paper presents the creation of Hostomytho, a game with a purpose intended for evaluating the quality of synthetic biomedical texts through multiple mini-games. Hostomytho was developed entirely using open source technologies both for internet browser and mobile platforms (IOS &amp; Android). The code and the annotations created for synthetic clinical cases in French will be made freely available.</abstract>
@@ -56,7 +56,7 @@
       <title>Using In-context Learning to Automate <fixed-case>AI</fixed-case> Image Generation for a Gamified Text Labelling Task</title>
       <author><first>Fatima</first><last>Althani</last></author>
       <author><first>Chris</first><last>Madge</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>21–31</pages>
       <abstract>This paper explores a novel automated method to produce AI-generated images for a text-labelling gamified task. By leveraging the in-context learning capabilities of GPT-4, we automate the optimisation of text-to-image prompts to align with the text being labelled in the part-of-speech tagging task. As an initial evaluation, we compare the optimised prompts to the original sentences based on imageability and concreteness scores. Our results revealed that optimised prompts had significantly higher imageability and concreteness scores. Moreover, to evaluate text-to-image outputs, we generate images using Stable Diffusion XL based on the two prompt types, optimised prompts and the original sentences. Using the automated LIAON-Aesthetic predictor model, we assigned aesthetic scores for the generated images. This resulted in the outputs using optimised prompts scoring significantly higher in predicted aesthetics than those using original sentences as prompts. Our preliminary findings suggest that this methodology provides significantly more aesthetic text-to-image outputs than using the original sentence as a prompt. While the initial results are promising, the text labelling task and AI-generated images presented in this paper have yet to undergo human evaluation.</abstract>
       <url hash="5441d1ac">2024.games-1.4</url>
@@ -65,7 +65,7 @@
     <paper id="5">
       <title>Aspect-based Sentiment Evaluation of Chess Moves (<fixed-case>ASSESS</fixed-case>): an <fixed-case>NLP</fixed-case>-based Method for Evaluating Chess Strategies from Textbooks</title>
       <author><first>Haifa</first><last>Alrdahi</last></author>
-      <author><first>Riza</first><last>Batista-Navarro</last></author>
+      <author id="riza-theresa-batista-navarro"><first>Riza</first><last>Batista-Navarro</last></author>
       <pages>32–42</pages>
       <abstract>The chess domain is well-suited for creating an artificial intelligence (AI) system that mimics real-world challenges, including decision-making. Throughout the years, minimal attention has been paid to investigating insights derived from unstructured chess data sources. In this study, we examine the complicated relationships between multiple referenced moves in a chess-teaching textbook, and propose a novel method designed to encapsulate chess knowledge derived from move-action phrases. This study investigates the feasibility of using a modified sentiment analysis method as a means for evaluating chess moves based on text. Our proposed Aspect-Based Sentiment Analysis (ABSA) method represents an advancement in evaluating the sentiment associated with referenced chess moves. By extracting insights from move-action phrases, our approach aims to provide a more fine-grained and contextually aware ‘chess move’-based sentiment classification. Through empirical experiments and analysis, we evaluate the performance of our fine-tuned ABSA model, presenting results that confirm the efficiency of our approach in advancing aspect-based sentiment classification within the chess domain. This research contributes to the area of game-playing by machines and shows the practical applicability of leveraging NLP techniques to understand the context of strategic games. Keywords: Natural Language Processing, Chess, Aspect-based Sentiment Analysis (ABSA), Chess Move Evaluation.</abstract>
       <url hash="3912b899">2024.games-1.5</url>
@@ -74,7 +74,7 @@
     <paper id="6">
       <title>Generating Converging Narratives for Games with Large Language Models</title>
       <author><first>Douglas</first><last>Summers-Stay</last></author>
-      <author><first>Clare R.</first><last>Voss</last></author>
+      <author id="clare-voss"><first>Clare R.</first><last>Voss</last></author>
       <pages>43–60</pages>
       <abstract>We explore methods of combining the probability distributions generated by two LLM prompts in order to generate a continuation that is appropriate for both prompts at once. This is a new capability that extends the possibilities for branching and rejoining narratives in games.</abstract>
       <url hash="a99520ac">2024.games-1.6</url>
@@ -85,7 +85,7 @@
       <author><first>Elio</first><last>Musacchio</last></author>
       <author><first>Lucia</first><last>Siciliani</last></author>
       <author><first>Pierpaolo</first><last>Basile</last></author>
-      <author><first>Giovanni</first><last>Semeraro</last></author>
+      <author id="giovanni-semeraro"><first>Giovanni</first><last>Semeraro</last></author>
       <pages>61–69</pages>
       <abstract>Dungeons &amp; Dragons (D&amp;D) is a classic tabletop game with a 50-year history. Its intricate and customizable gameplay allows players to create endless worlds and stories. Due to the highly narrative component of this game, D&amp;D and many other interactive games represent a challenging setting for the Natural Language Generation (NLG) capabilities of LLMs. This paper explores using LLMs to generate new spells, which are one of the most captivating aspects of D&amp;D gameplay. Due to the scarcity of resources available for such a specific task, we build a dataset of 3,259 instances by combining official and fan-made D&amp;D spells. We considered several LLMs in generating spells, which underwent a quantitative and qualitative evaluation. Metrics including Bleu and BertScore were computed for quantitative assessments. Subsequently, we also conducted an in-vivo evaluation with a survey involving D&amp;D players, which could assess the quality of the generated spells as well as their adherence to the rules. Furthermore, the paper emphasizes the open-sourcing of all models, datasets, and findings, aiming to catalyze further research on this topic.</abstract>
       <url hash="ab9590b2">2024.games-1.7</url>
@@ -111,7 +111,7 @@
     <paper id="10">
       <title>Linguistic Acceptability and Usability Enhancement: A Case Study of <fixed-case>GWAP</fixed-case> Evaluation and Redesign</title>
       <author><first>Wateen Abdullah</first><last>Aliady</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>85–96</pages>
       <abstract>Collecting high-quality annotations for Natural Language Processing (NLP) tasks poses challenges. Gamified annotation systems, like Games-with-a-Purpose (GWAP), have become popular tools for data annotation. For GWAPs to be effective, they must be user-friendly and produce high-quality annotations to ensure the collected data’s usefulness. This paper investigates the effectiveness of a gamified approach through two specific studies on an existing GWAP designed for collecting NLP coreference judgments. The first study involved preliminary usability testing using the concurrent think-aloud method to gather open-ended feedback. This feedback was crucial in pinpointing design issues. Following this, we conducted semi-structured interviews with our participants, and the insights collected from these interviews were instrumental in crafting player personas, which informed design improvements aimed at enhancing user experience. The outcomes of our research have been generalized to benefit other GWAP implementations. The second study evaluated the linguistic acceptability and reliability of the data collected through our GWAP. Our findings indicate that our GWAP produced reliable corpora with 91.49% accuracy and 0.787 Cohen’s kappa.</abstract>
       <url hash="82eb90e1">2024.games-1.10</url>
diff --git a/data/xml/2024.gebnlp.xml b/data/xml/2024.gebnlp.xml
index de5abdb25e..aedcae3b37 100644
--- a/data/xml/2024.gebnlp.xml
+++ b/data/xml/2024.gebnlp.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2024-07-25" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 5th Workshop on Gender Bias in Natural Language Processing (GeBNLP)</booktitle>
-      <editor><first>Agnieszka</first><last>Faleńska</last></editor>
+      <editor id="agnieszka-falenska"><first>Agnieszka</first><last>Faleńska</last></editor>
       <editor><first>Christine</first><last>Basta</last></editor>
       <editor><first>Marta</first><last>Costa-jussà</last></editor>
       <editor><first>Seraphina</first><last>Goldfarb-Tarrant</last></editor>
@@ -36,7 +36,7 @@
       <author><first>Bingjie</first><last>Du</last></author>
       <author><first>Jishun</first><last>Zhao</last></author>
       <author><first>Ying</first><last>Liu</last><affiliation>Tsinghua University, Tsinghua University</affiliation></author>
-      <author><first>Pengyuan</first><last>Liu</last><affiliation>Beijing Language and Culture University</affiliation></author>
+      <author id="pengyuan-liu"><first>Pengyuan</first><last>Liu</last><affiliation>Beijing Language and Culture University</affiliation></author>
       <pages>20-32</pages>
       <abstract>Pre-trained language models (PLMs) have achieved success in various of natural language processing (NLP) tasks. However, PLMs also introduce some disquieting safety problems, such as gender bias. Gender bias is an extremely complex issue, because different individuals may hold disparate opinions on whether the same sentence expresses harmful bias, especially those seemingly neutral or positive. This paper first defines the concept of contextualized gender bias (CGB), which makes it easy to measure implicit gender bias in both PLMs and annotators. We then construct CGBDataset, which contains 20k natural sentences with gendered words, from Chinese news. Similar to the task of masked language models, gendered words are masked for PLMs and annotators to judge whether a male word or a female word is more suitable. Then, we introduce CGBFrame to measure the gender bias of annotators. By comparing the results measured by PLMs and annotators, we find that though there are differences on the choices made by PLMs and annotators, they show significant consistency in general.</abstract>
       <url hash="c96c4e8a">2024.gebnlp-1.2</url>
@@ -145,7 +145,7 @@
     </paper>
     <paper id="12">
       <title>Dissecting Biases in Relation Extraction: A Cross-Dataset Analysis on People’s Gender and Origin</title>
-      <author><first>Marco</first><last>Stranisci</last></author>
+      <author id="marco-antonio-stranisci"><first>Marco</first><last>Stranisci</last></author>
       <author><first>Pere-Lluís</first><last>Huguet Cabot</last></author>
       <author><first>Elisa</first><last>Bassignana</last></author>
       <author><first>Roberto</first><last>Navigli</last><affiliation>Sapienza University of Rome</affiliation></author>
@@ -169,7 +169,7 @@
       <author><first>Haotian</first><last>Zhu</last></author>
       <author><first>Kexin</first><last>Gao</last><affiliation>University of Washington</affiliation></author>
       <author><first>Fei</first><last>Xia</last><affiliation>University of Washington, Seattle</affiliation></author>
-      <author><first>Mari</first><last>Ostendorf</last><affiliation>University of Washington</affiliation></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last><affiliation>University of Washington</affiliation></author>
       <pages>219-236</pages>
       <abstract>Gender bias has been extensively studied in both the educational field and the Natural Language Processing (NLP) field, the former using human coding to identify patterns associated with and causes of gender bias in text and the latter to detect, measure and mitigate gender bias in NLP output and models. This work aims to use NLP to facilitate automatic, quantitative analysis of educational text within the framework of a gender bias taxonomy. Analyses of both educational texts and a lexical resource (WordNet) reveal patterns of bias that can inform and aid educators in updating textbooks and lexical resources and in designing assessment items.</abstract>
       <url hash="50682e27">2024.gebnlp-1.14</url>
@@ -230,7 +230,7 @@
       <author><first>Vipul</first><last>Gupta</last><affiliation>Pennsylvania State University</affiliation></author>
       <author><first>Pranav</first><last>Narayanan Venkit</last></author>
       <author><first>Shomir</first><last>Wilson</last><affiliation>Pennsylvania State University</affiliation></author>
-      <author><first>Rebecca</first><last>Passonneau</last><affiliation>Pennsylvania State University</affiliation></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca</first><last>Passonneau</last><affiliation>Pennsylvania State University</affiliation></author>
       <pages>295-322</pages>
       <abstract>Sociodemographic bias in language models (LMs) has the potential for harm when deployed in real-world settings. This paper presents a comprehensive survey of the past decade of research on sociodemographic bias in LMs, organized into a typology that facilitates examining the different aims: types of bias, quantifying bias, and debiasing techniques. We track the evolution of the latter two questions, then identify current trends and their limitations, as well as emerging techniques. To guide future research towards more effective and reliable solutions, and to help authors situate their work within this broad landscape, we conclude with a checklist of open questions.</abstract>
       <url hash="2bc2ecae">2024.gebnlp-1.19</url>
@@ -263,7 +263,7 @@
     <paper id="22">
       <title>Detecting and Mitigating <fixed-case>LGBTQIA</fixed-case>+ Bias in Large <fixed-case>N</fixed-case>orwegian Language Models</title>
       <author><first>Selma</first><last>Bergstrand</last></author>
-      <author><first>Björn</first><last>Gambäck</last><affiliation>Norwegian University of Science and Technology</affiliation></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gambäck</last><affiliation>Norwegian University of Science and Technology</affiliation></author>
       <pages>351-364</pages>
       <abstract>The paper aims to detect and mitigate LGBTQIA+ bias in large language models (LLMs). As the usage of LLMs quickly increases, so does the significance of the harms they may cause due to bias. The research field of bias in LLMs has seen massive growth, but few attempts have been made to detect or mitigate other biases than gender bias, and most focus has been on English LLMs. This work shows experimentally that LLMs may cause representational harms towards LGBTQIA+ individuals when evaluated on sentence completion tasks and on a benchmark dataset constructed from stereotypes reported by the queer community of Norway, collected through a survey in order to directly involve the affected community. Furthermore, Norwegian training corpora are probed for queer bias, revealing strong associations between queer terms and anti-queer slurs, as well as words related to pedophilia. Finally, a fine-tuning-based debiasing method is applied to two Norwegian LLMs. This method does not consistently reduce bias, but shows that queer bias can be altered, laying the foundation for future debiasing approaches. By shedding light on the severe discrimination that can occur through the usage of LLMs, this paper contributes to the ongoing fight for equal rights for the LGBTQIA+ community.</abstract>
       <url hash="fda56984">2024.gebnlp-1.22</url>
@@ -273,7 +273,7 @@
     <paper id="23">
       <title>Whose wife is it anyway? Assessing bias against same-gender relationships in machine translation</title>
       <author><first>Ian</first><last>Stewart</last><affiliation>Pacific Northwest National Laboratory</affiliation></author>
-      <author><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
       <pages>365-375</pages>
       <abstract>Machine translation often suffers from biased data and algorithms that can lead to unacceptable errors in system output. While bias in gender norms has been investigated, less is known about whether MT systems encode bias about social relationships, e.g., “the lawyer kissed her wife.” We investigate the degree of bias against same-gender relationships in MT systems, using generated template sentences drawn from several noun-gender languages (e.g., Spanish) and comprised of popular occupation nouns. We find that three popular MT services consistently fail to accurately translate sentences concerning relationships between entities of the same gender. The error rate varies considerably based on the context, and same-gender sentences referencing high female-representation occupations are translated with lower accuracy. We provide this work as a case study in the evaluation of intrinsic bias in NLP systems with respect to social relationships.</abstract>
       <url hash="8cf34f43">2024.gebnlp-1.23</url>
diff --git a/data/xml/2024.genbench.xml b/data/xml/2024.genbench.xml
index c0cd452318..1b0f494fa5 100644
--- a/data/xml/2024.genbench.xml
+++ b/data/xml/2024.genbench.xml
@@ -48,7 +48,7 @@
     <paper id="3">
       <title>The <fixed-case>S</fixed-case>lay<fixed-case>QA</fixed-case> benchmark of social reasoning: testing gender-inclusive generalization with neopronouns</title>
       <author><first>Bastian</first><last>Bunzeck</last><affiliation>Universität Bielefeld</affiliation></author>
-      <author><first>Sina</first><last>Zarrieß</last><affiliation>Bielefeld University</affiliation></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last><affiliation>Bielefeld University</affiliation></author>
       <pages>42-53</pages>
       <abstract>We introduce SlayQA, a novel benchmark data set designed to evaluate language models’ ability to handle gender-inclusive language, specifically the use of neopronouns, in a question-answering setting. Derived from the Social IQa data set, SlayQA modifies context-question-answer triples to include gender-neutral pronouns, creating a significant linguistic distribution shift in comparison to common pre-training corpora like C4 or Dolma. Our results show that state-of-the-art language models struggle with the challenge, exhibiting small, but noticeable performance drops when answering question containing neopronouns compared to those without.</abstract>
       <url hash="9a3359d4">2024.genbench-1.3</url>
@@ -70,7 +70,7 @@
       <title><fixed-case>MMLU</fixed-case>-<fixed-case>SR</fixed-case>: A Benchmark for Stress-Testing Reasoning Capability of Large Language Models</title>
       <author><first>Wentian</first><last>Wang</last></author>
       <author><first>Sarthak</first><last>Jain</last></author>
-      <author><first>Paul</first><last>Kantor</last><affiliation>University of Wisconsin - Madison, Rutgers University, New Brunswick and Paul B Kantor, Consultant</affiliation></author>
+      <author id="paul-kantor"><first>Paul</first><last>Kantor</last><affiliation>University of Wisconsin - Madison, Rutgers University, New Brunswick and Paul B Kantor, Consultant</affiliation></author>
       <author><first>Jacob</first><last>Feldman</last><affiliation>Rutgers University</affiliation></author>
       <author><first>Lazaros</first><last>Gallos</last><affiliation>Rutgers University</affiliation></author>
       <author><first>Hao</first><last>Wang</last><affiliation>Rutgers University</affiliation></author>
@@ -148,7 +148,7 @@
       <author><first>Ritam</first><last>Dutt</last></author>
       <author><first>Sagnik</first><last>Ray Choudhury</last></author>
       <author><first>Varun Venkat</first><last>Rao</last></author>
-      <author><first>Carolyn</first><last>Rose</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rose</last></author>
       <author><first>V.G.Vinod</first><last>Vydiswaran</last></author>
       <pages>165-182</pages>
       <abstract>Generalization refers to the ability of machine learning models to perform well on dataset distributions different from the one it was trained on. While several pre-existing works have characterized the generalizability of NLP models across different dimensions, such as domain shift, adversarial perturbations, or compositional variations, most studies were carried out in a stand-alone setting, emphasizing a single dimension of interest. We bridge this gap by systematically investigating the generalizability of pre-trained language models across different architectures, sizes, and training strategies, over multiple dimensions for the task of natural language inference and question answering. Our results indicate that model instances typically exhibit consistent generalization trends, i.e., they generalize equally well (or poorly) across most scenarios, and this ability is correlated with model architecture, base dataset performance, size, and training mechanism. We hope this research motivates further work in a) developing a multi-dimensional generalization benchmark for systematic evaluation and b) examining the reasons behind models’ generalization abilities. The code and data are available at https://github.com/sagnik/md-gen-nlp, and the trained models are released at https://huggingface.co/varun-v-rao.</abstract>
diff --git a/data/xml/2024.germeval.xml b/data/xml/2024.germeval.xml
index 7c49485517..09da966c34 100644
--- a/data/xml/2024.germeval.xml
+++ b/data/xml/2024.germeval.xml
@@ -93,7 +93,7 @@
     <paper id="4">
       <title>Team Quabynar at the <fixed-case>G</fixed-case>erm<fixed-case>E</fixed-case>val 2024 Shared Task 1 <fixed-case>G</fixed-case>er<fixed-case>MS</fixed-case>-Detect (Subtasks 1 and 2) on Sexism Detection</title>
       <author><first>Kwabena Odame</first><last>Akomeah</last></author>
-      <author><first>Udo</first><last>Kruschwitz</last></author>
+      <author id="udo-kruschwitz"><first>Udo</first><last>Kruschwitz</last></author>
       <author><first>Bernd</first><last>Ludwig</last></author>
       <pages>26–32</pages>
       <url hash="cfb09bcf">2024.germeval-2.4</url>
diff --git a/data/xml/2024.gitt.xml b/data/xml/2024.gitt.xml
index 4e83c33eaf..5632126757 100644
--- a/data/xml/2024.gitt.xml
+++ b/data/xml/2024.gitt.xml
@@ -49,7 +49,7 @@
     </paper>
     <paper id="3">
       <title>Gender and bias in <fixed-case>A</fixed-case>mazon review translations: by humans, <fixed-case>MT</fixed-case> systems and <fixed-case>C</fixed-case>hat<fixed-case>GPT</fixed-case></title>
-      <author><first>Maja</first><last>Popovic</last><affiliation>IU International University of Applied Sciences and Dublin City University</affiliation></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popovic</last><affiliation>IU International University of Applied Sciences and Dublin City University</affiliation></author>
       <author><first>Ekaterina</first><last>Lapshinova-Koltunski</last><affiliation>Universität Hildesheim</affiliation></author>
       <pages>22-30</pages>
       <abstract>This paper presents an analysis of first-person gender in five different translation variants of Amazon product reviews:those produced by professional translators, by translation students, with different machine translation (MT) systems andwith ChatGPT. The analysis revealed that the majority of the reviews were translated into the masculine first-person gender, both by humans as well as by machines. Further inspection revealed that the choice of the gender in a translation is not related to the actual gender of the translator. Finally, the analysis of different products showed that there are certain bias tendencies, because the distribution of genders notably differ for different products.</abstract>
diff --git a/data/xml/2024.hcinlp.xml b/data/xml/2024.hcinlp.xml
index bb2f478403..8a657b3ec5 100644
--- a/data/xml/2024.hcinlp.xml
+++ b/data/xml/2024.hcinlp.xml
@@ -62,7 +62,7 @@
       <author><first>Marcus</first><last>Collins</last><affiliation>Amazon</affiliation></author>
       <author><first>Eugene</first><last>Agichtein</last><affiliation>Amazon and Emory University</affiliation></author>
       <author><first>Oleg</first><last>Rokhlenko</last></author>
-      <author><first>Shervin</first><last>Malmasi</last><affiliation>Amazon</affiliation></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last><affiliation>Amazon</affiliation></author>
       <pages>40-50</pages>
       <abstract>Conversational AI is a subtype of Human Computer Interaction that has gained wide adoption. These systems are typically powered by Large Language Models (LLMs) that use Retrieval Augmented Generation (RAG) to infuse external knowledge, which is effective against issues like hallucination. However, automatically evaluating retrieval augmented conversations with minimal human effort remains challenging, particularly in online settings. We address this challenge by proposing a lexical metric, and a novel method for combining it with other metrics, including semantic models. Our approach involves: (1) Conversational Information Utility (CIU), a new automated metric inspired by prior user studies on web search evaluation, to compute information overlap between conversation context and grounded information in an unsupervised, purely lexical way; and (2) a generalized reward model through Mixture-of-Experts (MoE-CIU) that dynamically ensembles CIU with other metrics, including learned ones, into a single reward. Evaluation against human ratings on two public datasets (Topical Chat and Persona Chat) shows that CIU improves correlation against human judgments by 2.0% and 0.9% respectively compared to the second best metric. When MoE is applied to combine lexical and learned semantic metrics, correlations further improve by 9.9% and 5.0%, suggesting that unified reward models are a promising approach.</abstract>
       <url hash="b024e70e">2024.hcinlp-1.4</url>
@@ -74,7 +74,7 @@
       <author><first>Chantal</first><last>Shaib</last><affiliation>Northeastern University</affiliation></author>
       <author><first>Joe</first><last>Barrow</last><affiliation>Pattern Data</affiliation></author>
       <author><first>Alexa</first><last>Siu</last><affiliation>Adobe</affiliation></author>
-      <author><first>Byron</first><last>Wallace</last><affiliation>Northeastern University, Brown University and Northeastern University</affiliation></author>
+      <author id="byron-c-wallace"><first>Byron</first><last>Wallace</last><affiliation>Northeastern University, Brown University and Northeastern University</affiliation></author>
       <author><first>Ani</first><last>Nenkova</last><affiliation>Adobe Research</affiliation></author>
       <pages>51-59</pages>
       <abstract>Modern instruction-tuned models have become highly capable in text generation tasks such as summarization, and are expected to be released at a steady pace. In practice one may now wish to choose confidently, but with minimal effort, the best performing summarization model when applied to a new domain or purpose. In this work, we empirically investigate the test sample size necessary to select a preferred model in the context of news summarization. Empirical results reveal that comparative evaluation converges quickly for both automatic and human evaluation, with clear preferences for a system emerging from under 100 examples. The human preference data allows us to quantify how well automatic scores can reproduce preference rankings across a variety of downstream summarization tasks. We find that, while automatic metrics are stable at smaller sample sizes, only some automatic metrics are able to moderately predict model win rates according to human preference.</abstract>
diff --git a/data/xml/2024.htres.xml b/data/xml/2024.htres.xml
index 7e1b352c99..a0951946aa 100644
--- a/data/xml/2024.htres.xml
+++ b/data/xml/2024.htres.xml
@@ -33,7 +33,7 @@
       <title><fixed-case>TEI</fixed-case> Specifications for a Sustainable Management of Digitized Holocaust Testimonies</title>
       <author><first>Sarah</first><last>Bénière</last></author>
       <author><first>Floriane</first><last>Chiffoleau</last></author>
-      <author><first>Laurent</first><last>Romary</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
       <pages>10–17</pages>
       <abstract>Data modeling and standardization are central issues in the field of Digital Humanities, and all the more so when dealing with Holocaust testimonies, where stable preservation and long-term accessibility are key. The EHRI Online Editions are composed of documents of diverse nature (testimonies, letters, diplomatic reports, etc.), held by EHRI’s partnering institutions, and selected, gathered thematically and encoded according to the TEI Guidelines by the editors within the EHRI Consortium. Standardization is essential in order to make sure that the editions are consistent with one another. The issue of consistency also encourages a broader reflection on the usage of standards when processing data, and on the standardization of digital scholarly editions of textual documents in general. In this paper, we present the normalization work we carried out on the EHRI Online Editions. It includes a customization of the TEI adapted to Holocaust-related documents, and a focus on the implementation of controlled vocabulary. We recommend the use of these encoding specifications as a tool for researchers and/or non-TEI experts to ensure their encoding is valid and consistent across editions, but also as a mechanism for integrating the edition work smoothly within a wider workflow leading from image digitization to publication.</abstract>
       <url hash="7cdf7364">2024.htres-1.2</url>
@@ -69,7 +69,7 @@
     <paper id="6">
       <title>Speech Technology Services for Oral History Research</title>
       <author><first>Christoph</first><last>Draxler</last></author>
-      <author><first>Henk</first><last>van den Heuvel</last></author>
+      <author id="henk-van-den-heuvel"><first>Henk</first><last>van den Heuvel</last></author>
       <author><first>Arjan</first><last>van Hessen</last></author>
       <author><first>Pavel</first><last>Ircing</last></author>
       <author><first>Jan</first><last>Lehečka</last></author>
diff --git a/data/xml/2024.hucllm.xml b/data/xml/2024.hucllm.xml
index b53df0c24e..58df9d421a 100644
--- a/data/xml/2024.hucllm.xml
+++ b/data/xml/2024.hucllm.xml
@@ -8,7 +8,7 @@
       <editor><first>Ashish</first><last>Sharma</last></editor>
       <editor><first>Diyi</first><last>Yang</last></editor>
       <editor><first>Sara</first><last>Hooker</last></editor>
-      <editor><first>H. Andrew</first><last>Schwartz</last></editor>
+      <editor id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last></editor>
       <publisher>ACL</publisher>
       <address>TBD</address>
       <month>August</month>
@@ -65,7 +65,7 @@
       <title>To What Extent Are Large Language Models Capable of Generating Substantial Reflections for Motivational Interviewing Counseling Chatbots? A Human Evaluation</title>
       <author><first>Erkan</first><last>Basar</last></author>
       <author><first>Iris</first><last>Hendrickx</last><affiliation>Radboud University Nijmegen, the Netherlands</affiliation></author>
-      <author><first>Emiel</first><last>Krahmer</last><affiliation>Tilburg University</affiliation></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last><affiliation>Tilburg University</affiliation></author>
       <author><first>Gert-Jan</first><last>Bruijn</last></author>
       <author><first>Tibor</first><last>Bosse</last><affiliation>Radboud University</affiliation></author>
       <pages>41-52</pages>
@@ -80,7 +80,7 @@
       <author><first>Phillip</first><last>Rust</last></author>
       <author><first>Ruixiang</first><last>Cui</last></author>
       <author><first>Yong</first><last>Cao</last></author>
-      <author><first>Anders</first><last>Søgaard</last><affiliation>Copenhagen University</affiliation></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last><affiliation>Copenhagen University</affiliation></author>
       <author><first>Daniel</first><last>Hershcovich</last><affiliation>University of Copenhagen</affiliation></author>
       <pages>53-66</pages>
       <abstract>Large Vision Language Models can be used to assist visually impaired individuals by describing images they capture in their daily lives. Current evaluation datasets may not reflect the diverse cultural user backgrounds nor the situational context of this use case. To address this problem, we create a survey to determine caption preferences and propose a culture-centric evaluation benchmark by filtering VizWiz, an existing dataset with images taken by people who are blind. We then evaluate different models and prompts, investigating their reliability as visual assistants. While the evaluation results for state-of-the-art models seem promising, we identified some weak spots such as hallucinations and problems with conventional evaluation metrics. Our survey, data, code, and model outputs will be publicly available.</abstract>
@@ -92,7 +92,7 @@
       <title>Evaluating Large Language Models on Social Signal Sensitivity: An Appraisal Theory Approach</title>
       <author><first>Zhen</first><last>Wu</last></author>
       <author><first>Ritam</first><last>Dutt</last><affiliation>Carnegie Mellon University</affiliation></author>
-      <author><first>Carolyn</first><last>Rose</last><affiliation>School of Computer Science, Carnegie Mellon University</affiliation></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rose</last><affiliation>School of Computer Science, Carnegie Mellon University</affiliation></author>
       <pages>67-80</pages>
       <abstract>We present a framework to assess the sensitivity of Large Language Models (LLMs) to textually embedded social signals using an Appraisal Theory perspective. We report on an experiment that uses prompts encoding three dimensions of social signals: Affect, Judgment, and Appreciation. In response to the prompt, an LLM generates both an analysis (Insight) and a conversational Response, which are analyzed in terms of sensitivity to the signals. We quantitatively evaluate the output text through topical analysis of the Insight and predicted social intelligence scores of the Response in terms of empathy and emotional polarity. Key findings show that LLMs are more sensitive to positive signals. The personas impact Responses but not the Insight. We discuss how our framework can be extended to a broader set of social signals, personas, and scenarios to evaluate LLM behaviors under various conditions.</abstract>
       <url hash="4fa58f6b">2024.hucllm-1.6</url>
@@ -105,7 +105,7 @@
       <title>Aligning to Adults Is Easy, Aligning to Children Is Hard: A Study of Linguistic Alignment in Dialogue Systems</title>
       <author><first>Dorothea</first><last>French</last><affiliation>University of Colorado, Boulder</affiliation></author>
       <author><first>Sidney</first><last>D’Mello</last><affiliation>University of Colorado at Boulder</affiliation></author>
-      <author><first>Katharina</first><last>von der Wense</last><affiliation>Johannes-Gutenberg Universität Mainz, University of Colorado, Boulder and New York University</affiliation></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>von der Wense</last><affiliation>Johannes-Gutenberg Universität Mainz, University of Colorado, Boulder and New York University</affiliation></author>
       <pages>81-87</pages>
       <abstract>During conversations, people align to one another over time, by using similar words, concepts, and syntax. This helps form a shared understanding of the conversational content and is associated with increased engagement and satisfaction. It also affects conversation outcomes: e.g., when talking to language learners, an above normal level of linguistic alignment of parents or language teachers is correlated with faster language acquisition. These benefits make human-like alignment an important property of dialogue systems, which has often been overlooked by the NLP community. In order to fill this gap, we ask: (RQ1) Due to the importance for engagement and satisfaction, to what degree do state-of-the-art dialogue systems align to adult users? (RQ2) With a potential application to child language acquisition in mind, do systems, similar to parents, show high levels of alignment during conversations with children? Our experiments show that ChatGPT aligns to adults at roughly human levels, while Llama2 shows elevated alignment. However, when responding to a child, both systems’ alignment is below human levels.</abstract>
       <url hash="5d1d87e5">2024.hucllm-1.7</url>
diff --git a/data/xml/2024.humeval.xml b/data/xml/2024.humeval.xml
index 727e858fc3..6cb7ddd950 100644
--- a/data/xml/2024.humeval.xml
+++ b/data/xml/2024.humeval.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the Fourth Workshop on Human Evaluation of NLP Systems (HumEval) @ LREC-COLING 2024</booktitle>
       <editor><first>Simone</first><last>Balloccu</last></editor>
-      <editor><first>Anya</first><last>Belz</last></editor>
+      <editor id="anja-belz"><first>Anya</first><last>Belz</last></editor>
       <editor><first>Rudali</first><last>Huidrom</last></editor>
       <editor><first>Ehud</first><last>Reiter</last></editor>
       <editor><first>Joao</first><last>Sedoc</last></editor>
@@ -24,7 +24,7 @@
     <paper id="1">
       <title>Quality and Quantity of Machine Translation References for Automatic Metrics</title>
       <author><first>Vilém</first><last>Zouhar</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>1–11</pages>
       <abstract>Automatic machine translation metrics typically rely on human translations to determine the quality of system translations. Common wisdom in the field dictates that the human references should be of very high quality. However, there are no cost-benefit analyses that could be used to guide practitioners who plan to collect references for machine translation evaluation. We find that higher-quality references lead to better metric correlations with humans at the segment-level. Having up to 7 references per segment and taking their average (or maximum) helps all metrics. Interestingly, the references from vendors of different qualities can be mixed together and improve metric success. Higher quality references, however, cost more to create and we frame this as an optimization problem: given a specific budget, what references should be collected to maximize metric success. These findings can be used by evaluators of shared tasks when references need to be created under a certain budget.</abstract>
       <url hash="63f93dc2">2024.humeval-1.1</url>
@@ -44,8 +44,8 @@
       <author><first>Mohamed</first><last>Elaraby</last></author>
       <author><first>Huihui</first><last>Xu</last></author>
       <author><first>Morgan</first><last>Gray</last></author>
-      <author><first>Kevin</first><last>Ashley</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="kevin-d-ashley"><first>Kevin</first><last>Ashley</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <pages>28–35</pages>
       <abstract>Human evaluation remains the gold standard for assessing abstractive summarization. However, current practices often prioritize constructing evaluation guidelines for fluency, coherence, and factual accuracy, overlooking other critical dimensions. In this paper, we investigate argument coverage in abstractive summarization by focusing on long legal opinions, where summaries must effectively encapsulate the document’s argumentative nature. We introduce a set of human-evaluation guidelines to evaluate generated summaries based on argumentative coverage. These guidelines enable us to assess three distinct summarization models, studying the influence of including argument roles in summarization. Furthermore, we utilize these evaluation scores to benchmark automatic summarization metrics against argument coverage, providing insights into the effectiveness of automated evaluation methods.</abstract>
       <url hash="4066d1a1">2024.humeval-1.3</url>
@@ -64,9 +64,9 @@
       <title>Insights of a Usability Study for <fixed-case>KBQA</fixed-case> Interactive Semantic Parsing: Generation Yields Benefits over Templates but External Validity Remains Challenging</title>
       <author><first>Ashley</first><last>Lewis</last></author>
       <author><first>Lingbo</first><last>Mo</last></author>
-      <author><first>Marie-Catherine</first><last>de Marneffe</last></author>
+      <author id="marie-catherine-de-marneffe"><first>Marie-Catherine</first><last>de Marneffe</last></author>
       <author><first>Huan</first><last>Sun</last></author>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <pages>47–62</pages>
       <abstract>We present our findings from a usability study of an interactive semantic parsing system for knowledge based question answering (KBQA). The system is designed to help users access information within a knowledge base without having to know its query language. The system translates the user’s question into the query language, retrieves an answer, then presents an English explanation of the process so that the user can make corrections if necessary. To our knowledge, our work is the most thorough usability study conducted for such a system and the only one that uses crowdworkers as participants to verify that the system is usable for average users. Our crowdworkers participate in KBQA dialogues using 4 versions of a system based on the framework by Mo et al. (2022) and answer surveys about their experiences. Some key takeaways from this work are: 1) we provide evidence for the benefits of interactivity in semantic parsing with human users and using generated questions in lieu of templated representations, 2) we identify limitations of simulations and provide contrasting evidence from actual system use, and 3) we provide an examination of crowdsourcing methodology, in particular the trade-offs of using crowdworkers vs. a specially trained group of evaluators.</abstract>
       <url hash="c8cb6e3c">2024.humeval-1.5</url>
@@ -76,8 +76,8 @@
       <title>Extrinsic evaluation of question generation methods with user journey logs</title>
       <author><first>Elie</first><last>Antoine</last></author>
       <author><first>Eléonore</first><last>Besnehard</last></author>
-      <author><first>Frederic</first><last>Bechet</last></author>
-      <author><first>Geraldine</first><last>Damnati</last></author>
+      <author id="frederic-bechet"><first>Frederic</first><last>Bechet</last></author>
+      <author id="geraldine-damnati"><first>Geraldine</first><last>Damnati</last></author>
       <author><first>Eric</first><last>Kergosien</last></author>
       <author><first>Arnaud</first><last>Laborderie</last></author>
       <pages>63–70</pages>
@@ -118,7 +118,7 @@
       <title>Once Upon a Replication: It is Humans’ Turn to Evaluate <fixed-case>AI</fixed-case>’s Understanding of Children’s Stories for <fixed-case>QA</fixed-case> Generation</title>
       <author><first>Andra-Maria</first><last>Florescu</last></author>
       <author><first>Marius</first><last>Micluta-Campeanu</last></author>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <pages>106–113</pages>
       <abstract>The following paper presents the outcomes of a collaborative experiment on human evaluation from the ReproNLP 2024 shared task, track B, part of the ReproHum project. For this paper, we evaluated a QAG (question-answer generation) system centered on English children’s storybooks that was presented in a previous research, by using human evaluators for the study. The system generated relevant QA (Question-Answer) pairs based on a dataset with storybooks for early education (kindergarten up to middle school) called FairytaleQA. In the framework of the ReproHum project, we first outline the previous paper and the reproduction strategy that has been decided upon. The complete setup of the first human evaluation is then described, along with the modifications required to replicate it. We also add other relevant related works on this subject. In conclusion, we juxtapose the replication outcomes with those documented in the cited publication. Additionally, we explore the general features of this endeavor as well as its shortcomings.</abstract>
       <url hash="654632fa">2024.humeval-1.10</url>
@@ -150,7 +150,7 @@
       <author><first>Anouck</first><last>Braggaar</last></author>
       <author><first>Nadine</first><last>Braun</last></author>
       <author><first>Martijn</first><last>Goudbeek</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <author><first>Chris</first><last>van der Lee</last></author>
       <author><first>Steffen</first><last>Pauws</last></author>
       <author><first>Frédéric</first><last>Tomas</last></author>
@@ -184,7 +184,7 @@
     <paper id="16">
       <title><fixed-case>R</fixed-case>epro<fixed-case>H</fixed-case>um #1018-09: Reproducing Human Evaluations of Redundancy Errors in Data-To-Text Systems</title>
       <author><first>Filip</first><last>Klubička</last></author>
-      <author><first>John D.</first><last>Kelleher</last></author>
+      <author id="john-kelleher"><first>John D.</first><last>Kelleher</last></author>
       <pages>163–198</pages>
       <abstract>This paper describes a reproduction of a human evaluation study evaluating redundancies generated in automatically generated text from a data-to-text system. While the scope of the original study is broader, a human evaluation—a manual error analysis—is included as part of the system evaluation. We attempt a reproduction of this human evaluation, however while the authors annotate multiple properties of the generated text, we focus exclusively on a single quality criterion, that of redundancy. In focusing our study on a single minimal reproducible experimental unit, with the experiment being fairly straightforward and all data made available by the authors, we encountered no challenges with our reproduction and were able to reproduce the trend found in the original experiment. However, while still confirming the general trend, we found that both our annotators identified twice as many errors in the dataset than the original authors.</abstract>
       <url hash="0a99acef">2024.humeval-1.16</url>
diff --git a/data/xml/2024.icnlsp.xml b/data/xml/2024.icnlsp.xml
index 04d3e270d8..d551d42ff4 100644
--- a/data/xml/2024.icnlsp.xml
+++ b/data/xml/2024.icnlsp.xml
@@ -19,7 +19,7 @@
     <paper id="1">
       <title>Leveraging Annotator Disagreement for Text Classification</title>
       <author><first>Jin</first><last>Xu</last></author>
-      <author><first>Mariët</first><last>Theune</last></author>
+      <author id="mariet-theune"><first>Mariët</first><last>Theune</last></author>
       <author><first>Daniel</first><last>Braun</last></author>
       <pages>1–10</pages>
       <url hash="60323fad">2024.icnlsp-1.1</url>
@@ -137,7 +137,7 @@
       <author><first>Aritz</first><last>Lasarguren</last></author>
       <author><first>Jone</first><last>Lòpez</last></author>
       <author><first>Egoitz</first><last>Rodriguez</last></author>
-      <author><first>Aitor</first><last>Álvarez</last></author>
+      <author id="aitor-alvarez"><first>Aitor</first><last>Álvarez</last></author>
       <pages>109–118</pages>
       <url hash="5eeb398a">2024.icnlsp-1.13</url>
       <bibkey>vasquez-correa-etal-2024-real</bibkey>
@@ -148,7 +148,7 @@
       <author><first>Asier López</first><last>Zorrilla</last></author>
       <author><first>Mikel</first><last>deVelasco</last></author>
       <author><first>Juan Camilo</first><last>Vasquez-Correa</last></author>
-      <author><first>Aitor</first><last>Álvarez</last></author>
+      <author id="aitor-alvarez"><first>Aitor</first><last>Álvarez</last></author>
       <author><first>Maria Inés</first><last>Torres</last></author>
       <author><first>Paz</first><last>Delgado</last></author>
       <author><first>Ane</first><last>Lazpiur</last></author>
@@ -237,9 +237,9 @@
       <author><first>Guram</first><last>Mikaberidze</last></author>
       <author><first>Raphael</first><last>Kalandadze</last></author>
       <author><first>Konstantine</first><last>Pkhakadze</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <author><first>Simon</first><last>Ostermann</last></author>
-      <author><first>Lonneke</first><last>van der Plas</last></author>
+      <author id="lonneke-van-der-plas"><first>Lonneke</first><last>van der Plas</last></author>
       <author><first>Philipp</first><last>Müller</last></author>
       <pages>199–208</pages>
       <url hash="a9445100">2024.icnlsp-1.22</url>
@@ -283,7 +283,7 @@
       <title>Human and Machine: Language Processing in Translation Tasks</title>
       <author><first>Hening</first><last>Wang</last></author>
       <author><first>Leixin</first><last>Zhang</last></author>
-      <author><first>Ondrej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondrej</first><last>Bojar</last></author>
       <pages>243–250</pages>
       <url hash="89dc2086">2024.icnlsp-1.27</url>
       <bibkey>wang-etal-2024-human</bibkey>
@@ -310,7 +310,7 @@
       <author><first>Timothy</first><last>Obiso</last></author>
       <author><first>Bingyang</first><last>Ye</last></author>
       <author><first>Kyeongmin</first><last>Rim</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <pages>279–286</pages>
       <url hash="9de96a2f">2024.icnlsp-1.30</url>
       <bibkey>obiso-etal-2024-semantically</bibkey>
@@ -342,7 +342,7 @@
       <author><first>Hazem</first><last>Hajj</last></author>
       <author><first>Shady</first><last>Elbassuoni</last></author>
       <author><first>Wassim El</first><last>Hajj</last></author>
-      <author><first>Khaled</first><last>Shaban</last></author>
+      <author id="khaled-shaban"><first>Khaled</first><last>Shaban</last></author>
       <pages>304–318</pages>
       <url hash="184ede38">2024.icnlsp-1.33</url>
       <bibkey>hajj-etal-2024-design</bibkey>
@@ -352,7 +352,7 @@
       <author><first>Yasmine A Abu</first><last>Adla</last></author>
       <author><first>Hazem</first><last>Hajj</last></author>
       <author><first>Shady</first><last>Elbassuoni</last></author>
-      <author><first>Khaled</first><last>Shaban</last></author>
+      <author id="khaled-shaban"><first>Khaled</first><last>Shaban</last></author>
       <author><first>Wassim El</first><last>Hajj</last></author>
       <pages>319–342</pages>
       <url hash="8c6ebd58">2024.icnlsp-1.34</url>
@@ -386,7 +386,7 @@
     <paper id="38">
       <title><fixed-case>B</fixed-case>ulgarian Grammar Error Correction with Data Augmentation and Machine Translation Techniques</title>
       <author><first>Bozhidar</first><last>Klouchek</last></author>
-      <author><first>Riza</first><last>Batista-Navarro</last></author>
+      <author id="riza-theresa-batista-navarro"><first>Riza</first><last>Batista-Navarro</last></author>
       <pages>365–376</pages>
       <url hash="520e613a">2024.icnlsp-1.38</url>
       <bibkey>klouchek-batista-navarro-2024-bulgarian</bibkey>
@@ -446,8 +446,8 @@
     <paper id="45">
       <title><fixed-case>SG</fixed-case>-<fixed-case>RAG</fixed-case>: Multi-Hop Question Answering With Large Language Models Through Knowledge Graphs</title>
       <author><first>Ahmmad O. M.</first><last>Saleh</last></author>
-      <author><first>Gokhan</first><last>Tur</last></author>
-      <author><first>Yucel</first><last>Saygin</last></author>
+      <author id="gokhan-tur"><first>Gokhan</first><last>Tur</last></author>
+      <author id="yucel-saygin"><first>Yucel</first><last>Saygin</last></author>
       <pages>439–448</pages>
       <url hash="f6e5e25e">2024.icnlsp-1.45</url>
       <bibkey>saleh-etal-2024-sg</bibkey>
@@ -455,7 +455,7 @@
     <paper id="46">
       <title>Linking <fixed-case>Q</fixed-case>uran and <fixed-case>H</fixed-case>adith Topics in an Ontology using Word Embeddings and Cellfie Plugin</title>
       <author><first>Ibtisam Khalaf</first><last>Alshammari</last></author>
-      <author><first>Eric</first><last>Atwell</last></author>
+      <author id="eric-atwell"><first>Eric</first><last>Atwell</last></author>
       <author><first>Mohammad Ammar</first><last>Alsalka</last></author>
       <pages>449–455</pages>
       <url hash="961320c4">2024.icnlsp-1.46</url>
@@ -466,7 +466,7 @@
       <author><first>Raffaello</first><last>Fornasiere</last></author>
       <author><first>Nicolò</first><last>Brunello</last></author>
       <author><first>Vincenzo</first><last>Scotti</last></author>
-      <author><first>Mark</first><last>Carman</last></author>
+      <author id="mark-carman"><first>Mark</first><last>Carman</last></author>
       <pages>456–466</pages>
       <url hash="a065d4e9">2024.icnlsp-1.47</url>
       <bibkey>fornasiere-etal-2024-medical</bibkey>
diff --git a/data/xml/2024.icon.xml b/data/xml/2024.icon.xml
index 7ef11b7d98..907ba19c13 100644
--- a/data/xml/2024.icon.xml
+++ b/data/xml/2024.icon.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2025-01-29" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 21st International Conference on Natural Language Processing (ICON)</booktitle>
-      <editor><first>Sobha</first><last>Lalitha Devi</last></editor>
+      <editor id="sobha-lalitha-devi"><first>Sobha</first><last>Lalitha Devi</last></editor>
       <editor><first>Karunesh</first><last>Arora</last></editor>
       <publisher>NLP Association of India (NLPAI)</publisher>
       <address>AU-KBC Research Centre, Chennai, India</address>
@@ -37,7 +37,7 @@
       <title>Precision Empowers, Excess Distracts: Visual Question Answering With Dynamically Infused Knowledge In Language Models</title>
       <author><first>Manas</first><last>Jhalani</last></author>
       <author><first>Annervaz</first><last>K M</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>21–36</pages>
       <abstract>In the realm of multimodal tasks, Visual Question Answering (VQA) plays a crucial role by addressing natural language questions grounded in visual content. Knowledge-Based Visual Question Answering (KBVQA) advances this concept by adding external knowledge along with images to respond to questions. We introduce an approach for KBVQA, augmenting the existing vision-language transformer encoder-decoder (OFA) model . Our main contribution involves enhancing questions by incorporating relevant external knowledge extracted from knowledge graphs, using a dynamic triple extraction</abstract>
       <url hash="3218354e">2024.icon-1.3</url>
@@ -60,7 +60,7 @@
       <author><first>Jay</first><last>J. Gorakhiya</last></author>
       <author><first>Sanand</first><last>Sasidharan</last></author>
       <author><first>Anuradha</first><last>Kanamarlapudi</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>45–53</pages>
       <abstract>Extracting information from genomic reports of cancer patients is crucial for both healthcare professionals and cancer research. While Large Language Models (LLMs) have shown promise in extracting information, their potential for handling genomic reports remains unexplored. These reports are complex, multi-page documents that feature a variety of visually rich, structured layouts and contain many domain-specific terms. Two primary challenges complicate the process: (i) extracting data from PDFs with intricate layouts and domain-specific terminology and (ii) dealing with variations in report layouts from different laboratories, making extraction layout-dependent and posing challenges for subsequent data processing. To tackle these issues, we propose GR-PROMPT, a prompt-based technique, and GR-FORMAT, a standardized format. Together, these two convert a genomic report in PDF format into GR-FORMAT as a JSON file using a multimodal LLM. To address the lack of available datasets for this task, we introduce GR-DATASET, a synthetic collection of 100 cancer genomic reports in PDF format. Each report is accompanied by key-value information presented in a layout-specific format, as well as structured key-value information in GR-FORMAT. This is the first dataset in this domain to promote further research for the task. We performed our experiment on this dataset.</abstract>
       <url hash="2d3371f7">2024.icon-1.5</url>
@@ -69,7 +69,7 @@
     <paper id="6">
       <title>Identification of Idiomatic Expressions in <fixed-case>K</fixed-case>onkani Language Using Neural Networks</title>
       <author><first>Naziya Mahamdul</first><last>Shaikh</last></author>
-      <author><first>Jyoti</first><last>Pawar</last></author>
+      <author id="jyoti-pawar"><first>Jyoti</first><last>Pawar</last></author>
       <pages>54–58</pages>
       <abstract>The task of multi-word expressions identification and processing has posed a remarkable challenge to the natural language processing applications. One related subtask in this arena is correct labelling of the sentences with the presence of idiomatic expressions as either literal or idiomatic sense. The regional Indian language Konkani spoken in the states located in the west coast of India lacks in the research in idiom processing tasks. We aim at bridging this gap through a contribution to idiom identification method in Konkani language. This paper classifies the idiomatic expression usage in Konkani language as idiomatic or literal usage using a neural network-based setup. The developed system was able to successfully perform the identification task with an accuracy of 79.5 % and F1-score of 0.77.</abstract>
       <url hash="a8a42005">2024.icon-1.6</url>
@@ -81,7 +81,7 @@
       <author><first>Jayram Ulhas</first><last>Gawas</last></author>
       <author><first>Shrikrishna</first><last>R. Parab</last></author>
       <author><first>Shilpa Neenad</first><last>Desai</last></author>
-      <author><first>Jyoti</first><last>Pawar</last></author>
+      <author id="jyoti-pawar"><first>Jyoti</first><last>Pawar</last></author>
       <pages>59–67</pages>
       <abstract>The Visualizer is a tree-structure designed to browse and explore the Konkani WordNet lexical database. We propose to utilise this tool as a concept teaching and learning resource for Konkani, to be used by both teachers and students. It can also be used to add the missing semantic and lexical relations, thus enhancing the wordnet. It extracts related concepts for a given word and displays them as a sub-tree. The interface includes various features to offer users greater flexibility in navigating and understanding the word relationships. We attempted to enrich the Konkani Wordnet qualitatively with a Visualizer that offers an improved usability and is incorporated in the Konkani Wordnet website for the public use. The Visualizer is designed to provide graphical representations of words and their semantic relationships, making it easier to explore connections and meanings within the lexical database.</abstract>
       <url hash="e19f4b14">2024.icon-1.7</url>
@@ -91,7 +91,7 @@
       <title>A Systematic Exploration of Linguistic Phenomena in Spoken <fixed-case>H</fixed-case>indi: Resource Creation and Hypothesis Testing</title>
       <author><first>Aadya</first><last>Ranjan</last></author>
       <author><first>Sidharth</first><last>Ranjan</last></author>
-      <author><first>Rajakrishnan</first><last>Rajkumar</last></author>
+      <author id="rajakrishnan-rajkumar"><first>Rajakrishnan</first><last>Rajkumar</last></author>
       <pages>68–78</pages>
       <abstract>This paper presents a meticulous and well-structured approach to annotating a corpus of Hindi spoken data. We deployed 4 annotators to augment the spoken section of the EMILLE Hindi corpus by marking the various linguistic phenomena observed in spoken data. Then we analyzed various phonological (sound deletion), morphological (code-mixing and reduplication) and syntactic phenomena (case markers and ambiguity), not attested in written data. Code mixing and switching and constitute the majority of the phenomena we annotated, followed by orthographic errors related to symbols in the Devanagiri script. In terms of divergences from written form of Hindi, case marker usage, missing auxiliary verbs and agreement patterns are markedly distinct for spoken Hindi. The annotators also assigned a quality rating to each sentence in the corpus. Our analysis of the quality ratings revealed that most of the sentences in the spoken data corpus are of moderate to high quality. Female speakers produced a greater percentage of high quality sentences compared to their male counterparts. While previous efforts in corpus annotation have been largely focused on creating resources for engineering applications, we illustrate the utility of our dataset for scientific hypothesis testing. Inspired from the Surprisal Theory of language comprehension, we validate the hypothesis that sentences with high values of lexical surprisal are rated low in terms of quality by native speakers, even when controlling for sentence length and word frequencies in a sentence.</abstract>
       <url hash="17ecdf39">2024.icon-1.8</url>
@@ -181,7 +181,7 @@
     <paper id="17">
       <title>Reconsidering <fixed-case>SMT</fixed-case> Over <fixed-case>NMT</fixed-case> for Closely Related Languages: A Case Study of <fixed-case>P</fixed-case>ersian-<fixed-case>H</fixed-case>indi Pair</title>
       <author><first>Waisullah</first><last>Yousofi</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>149–156</pages>
       <abstract>This paper demonstrates that Phrase-Based Statistical Machine Translation (PBSMT) can outperform Transformer-based Neural Machine Translation (NMT) in moderate-resource scenarios, specifically for structurally similar languages, Persian-Hindi pair in our case. Despite the Transformer architecture’s typical preference for large parallel corpora, our results show that PBSMT achieves a BLEU score of 66.32, significantly exceeding the Transformer-NMT score of 53.7 ingesting the same dataset.</abstract>
       <url hash="be91f9f4">2024.icon-1.17</url>
@@ -190,7 +190,7 @@
     <paper id="18">
       <title><fixed-case>R</fixed-case>o<fixed-case>M</fixed-case>antra: Optimizing Neural Machine Translation for Low-Resource Languages through <fixed-case>R</fixed-case>omanization</title>
       <author><first>Govind</first><last>Soni</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>157–168</pages>
       <abstract>Neural Machine Translation (NMT) for low-resource language pairs with distinct scripts, such as Hindi-Chinese and Japanese-Hindi, poses significant challenges due to scriptural and linguistic differences. This paper investigates the efficacy of romanization as a preprocessing step to bridge these gaps. We compare baseline models trained on native scripts with models incorporating romanization in three configurations: both-side, source-side only, and target-side only. Additionally, we introduce a script restoration model that converts romanized output back to native scripts, ensuring accurate evaluation. Our experiments show that romanization, particularly when applied to both sides, improves translation quality across the studied language pairs. The script restoration model further enhances the practicality of this approach by enabling evaluation in native scripts with some performance loss. This work provides insights into leveraging romanization for NMT in low-resource, cross-script settings, presenting a promising direction for under-researched language combinations.</abstract>
       <url hash="01fed439">2024.icon-1.18</url>
@@ -258,7 +258,7 @@
       <author><first>Pritam</first><last>Pal</last></author>
       <author><first>Srijani</first><last>Debnath</last></author>
       <author><first>Dipankar</first><last>Das</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>215–223</pages>
       <abstract>With the advancement of natural language processing (NLP) and sophisticated Large Language Models (LLMs), distinguishing between human-written texts and machine-generated texts is quite difficult nowadays. This paper presents a systematic approach to classifying machine-generated text from human-written text with a combination of the transformer-based model and textual feature-based post-processing technique. We extracted five textual features: readability score, stop word score, spelling and grammatical error count, unique word score and human phrase count from both human-written and machine-generated texts separately and trained three machine learning models (SVM, Random Forest and XGBoost) with these scores. Along with exploring traditional machine-learning models, we explored the BiLSTM and transformer-based distilBERT models to enhance the classification performance. By training and evaluating with a large dataset containing both human-written and machine-generated text, our best-performing framework achieves an accuracy of 87.5%.</abstract>
       <url hash="e958a80d">2024.icon-1.24</url>
@@ -292,7 +292,7 @@
     <paper id="27">
       <title>Pronominal Anaphora Resolution in <fixed-case>K</fixed-case>onkani language incorporating Gender Agreement</title>
       <author><first>Poonam</first><last>A. Navelker</last></author>
-      <author><first>Jyoti</first><last>Pawar</last></author>
+      <author id="jyoti-pawar"><first>Jyoti</first><last>Pawar</last></author>
       <pages>243–247</pages>
       <abstract>Konkani is a low-resource language, spoken mainly on the central west coast of India. Approximately 2.3 million people speak Konkani (Office of the Registrar General Census Commissioner, India,2011). It is also the official language of the state of Goa. It belongs to the Southern Indo-Aryan language group. The official Script for writing the Konkani language is Devanagari. Despite this, being a low-resource language has hampered its development on the digital platform, Konkani has yet to significantly impact its digital presence. To improve this situation, contribution to Natural Language Understanding in the Konkani language is important. This paper aims to resolve pronominal anaphora in the Konkani language using a rule-based method incorporating gender agreement. This is required in NLP applications like text summarization, machine translation, and question-answering systems. While research on English and other foreign languages, as well as Indian languages like Tamil, Kannada, Malayalam, Bengali, and Marathi, have been done, no work has been done on the Konkani language thus far. This is the very first attempt made to resolve anaphora in Konkani.</abstract>
       <url hash="64ad5781">2024.icon-1.27</url>
@@ -314,8 +314,8 @@
     <paper id="29">
       <title>End to End Multilingual Coreference Resolution for <fixed-case>I</fixed-case>ndian Languages</title>
       <author><first>Sobha</first><last>Lalitha Devi</last></author>
-      <author><first>Vijay Sundar</first><last>Ram</last></author>
-      <author><first>Pattabhi</first><last>RK Rao</last></author>
+      <author id="vijay-sundar-ram"><first>Vijay Sundar</first><last>Ram</last></author>
+      <author id="pattabhi-rk-rao"><first>Pattabhi</first><last>RK Rao</last></author>
       <pages>256–259</pages>
       <abstract>This paper describes an approach on an end to end model for Multilingual Coreference Resolution (CR) for low resource languages such as Tamil, Malayalam and Hindi. We have done fine tune the XLM-Roberta large model on multilingual training dataset using specific languages with linguistic features and without linguistic features. XLM-R with linguistic features achieves better results than the baseline system. This shows that giving the linguistic knowledge enriches the system performance. The performance of the system is comparable with the state of the art systems.</abstract>
       <url hash="66264799">2024.icon-1.29</url>
@@ -324,7 +324,7 @@
     <paper id="30">
       <title><fixed-case>L</fixed-case>ang<fixed-case>B</fixed-case>ot-Language Learning Chatbot</title>
       <author><first>Madhubala</first><last>Sundaram</last></author>
-      <author><first>Pattabhi</first><last>RK Rao</last></author>
+      <author id="pattabhi-rk-rao"><first>Pattabhi</first><last>RK Rao</last></author>
       <author><first>Sobha</first><last>Lalitha Devi</last></author>
       <pages>260–263</pages>
       <abstract>Chatbots are being widely used in educational domain to revolutionize how students interact and learn along with traditional methods of learning. This paper presents our work on LangBot, a chatbot developed for learning Tamil language. LangBot developed integrates the interactive features of chatbots with the study material of the Tamil courses offered by Tamil Virtual Academy, Government of Tamil Nadu. LangBot helps students in enhancing their learning skills and increases their interest in learning the language. Using semi-automatic methods, we generate question and answers related to all topics in the courses. We then develop a generative language model and also Retrieval Augmented Generation (RAG) so that the system can incorporate new syllabus changes. We have performed manual user studies. The results obtained are encouraging. This approach offers learners an interactive tool that aligns with their syllabus. It is observed that this enriches the overall learning experience.</abstract>
@@ -356,7 +356,7 @@
     <paper id="33">
       <title><fixed-case>R</fixed-case>ound<fixed-case>T</fixed-case>rip<fixed-case>OCR</fixed-case>: A Data Generation Technique for Enhancing Post-<fixed-case>OCR</fixed-case> Error Correction in Low-Resource <fixed-case>D</fixed-case>evanagari Languages</title>
       <author><first>Harshvivek</first><last>Kashid</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>274–284</pages>
       <abstract>Optical Character Recognition (OCR) technology has revolutionized the digitization of printed text, enabling efficient data extraction and analysis across various domains. Just like Machine Translation systems, OCR systems are prone to errors. In this work, we address the challenge of data generation and post-OCR error correction, specifically for low-resource languages. We propose an approach for synthetic data generation for Devanagari languages, RoundTripOCR, that tackles the scarcity of the post-OCR Error Correction datasets for low-resource languages. We release post-OCR text correction datasets for Hindi, Marathi, Bodo, Nepali, Konkani and Sanskrit. We also present a novel approach for OCR error correction by leveraging techniques from machine translation. Our method involves translating erroneous OCR output into a corrected form by treating the OCR errors as mistranslations in a parallel text corpus, employing pre-trained transformer models to learn the mapping from erroneous to correct text pairs, effectively correcting OCR errors.</abstract>
       <url hash="6f3e85ac">2024.icon-1.33</url>
@@ -377,7 +377,7 @@
     <paper id="35">
       <title>We Care: Multimodal Depression Detection and Knowledge Infused Mental Health Therapeutic Response Generation</title>
       <author><first>Palash</first><last>Moon</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>296–310</pages>
       <abstract>The detection of depression through non-verbal cues has gained significant attention. Previous research predominantly centred on identifying depression within the confines of controlled laboratory environments, often with the supervision of psychologists or counsellors. Unfortunately, datasets generated in such controlled settings may struggle to account for individual behaviours in real-life situations. In response to this limitation, we present the Extended D-vlog dataset, encompassing a collection of 1,261 YouTube vlogs. Additionally, the emergence of large language models (LLMs) like GPT3.5, and GPT4 has sparked interest in their potential that LLMs can act like mental health professionals. Yet, the readiness of these LLM models to be used in real-life settings is still a concern as they can give wrong responses that can harm the users. We introduce a virtual agent serving as an initial contact for mental health patients, offering Cognitive Behavioral Therapy (CBT)-based responses. It comprises two core functions: 1. Identifying depression in individuals, and 2. Delivering CBT-based therapeutic responses. Our Mistral model achieved impressive scores of 70.1% and 30.9% for distortion assessment and classification, along with a Bert score of 88.7%. Moreover, utilizing the TVLT model on our Multimodal Extended D-vlog Dataset yielded outstanding results, with an impressive F1-score of 67.8%</abstract>
       <url hash="14256a8c">2024.icon-1.35</url>
@@ -465,7 +465,7 @@
       <title>Natural Answer Generation: From Factoid Answer to Full-length Answer using Grammar Correction</title>
       <author><first>Manas</first><last>Jain</last></author>
       <author><first>Sriparna</first><last>Saha</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <author><first>Gladvin</first><last>Chinnadurai</last></author>
       <author><first>Manish</first><last>Vatsa</last></author>
       <pages>376–385</pages>
@@ -537,8 +537,8 @@
       <title><fixed-case>S</fixed-case>ans<fixed-case>GPT</fixed-case>: Advancing Generative Pre-Training in <fixed-case>S</fixed-case>anskrit</title>
       <author><first>Rhugved Pankaj</first><last>Chaudhari</last></author>
       <author><first>Bhakti</first><last>Jadhav</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
-      <author><first>Malhar</first><last>Kulkarni</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="malhar-kulkarni"><first>Malhar</first><last>Kulkarni</last></author>
       <pages>432–441</pages>
       <abstract>In the past decade, significant progress has been made in digitizing Sanskrit texts and advancing computational analysis of the language. However, efforts to advance NLP for complex semantic downstream tasks like Semantic Analogy Prediction, Named Entity Recognition, and others remain limited. This gap is mainly due to the absence of a robust, pre-trained Sanskrit model built on large-scale Sanskrit text data since this demands considerable computational resources and data preparation. In this paper, we introduce SansGPT, a generative pre-trained model that has been trained on a large corpus of Sanskrit texts and is designed to facilitate fine-tuning and development for downstream NLP tasks. We aim for this model to serve as a catalyst for advancing NLP research in Sanskrit. Additionally, we developed a custom tokenizer specifically optimized for Sanskrit text, enabling effective tokenization of compound words and making it better suited for generative tasks. Our data collection and cleaning process encompassed a wide array of available Sanskrit literature, ensuring comprehensive representation for training. We further demonstrate the model’s efficacy by fine-tuning it on Semantic Analogy Prediction and Simile Element Extraction, achieving an impressive accuracy of approximately 95.8% and 92.8%, respectively.</abstract>
       <url hash="3b50e58a">2024.icon-1.50</url>
@@ -668,7 +668,7 @@
       <author><first>Shrikrishna</first><last>R. Parab</last></author>
       <author><first>Jayram Ulhas</first><last>Gawas</last></author>
       <author><first>Shilpa Neenad</first><last>Desai</last></author>
-      <author><first>Jyoti</first><last>Pawar</last></author>
+      <author id="jyoti-pawar"><first>Jyoti</first><last>Pawar</last></author>
       <pages>531–536</pages>
       <abstract>Konkani WordNet, also called Konkani Shabdamalem, was created as part of the Indradhanush WordNet Project Consortium between August 2010 and October 2013. Currently, the Konkani WordNet includes about 32,370 synsets and 37,719 unique words. There is a need to enhance the Konkani WordNet both quantitatively as well as qualitatively. In this paper we are presenting a Game-Based Crowdsourcing approach adopted by us to add audio feature to the Konkani WordNet which has resulted in an increase in the number of users using and getting exposed to the capabilities of the Konkani WordNet to aid in the Konkani language teaching-learning process as well as for creation of resources to initiate further research. Our work presented here has resulted in the creation of an audio corpus of 37,719 unique words which we have named as ‘Shabdocchar’ within a short time span of four months covering five dialects of Konkani. We are confident that Shabdocchar will prove to be a very useful resource to support future research work on Dialects of Konkani and support voice-based search of words in the wordnet. This approach can be adopted to enhance other wordnets as well.</abstract>
       <url hash="daace509">2024.icon-1.62</url>
@@ -702,7 +702,7 @@
       <author><first>Pratik Deelip</first><last>Korkankar</last></author>
       <author><first>Alvyn</first><last>Abranches</last></author>
       <author><first>Pradnya</first><last>Bhagat</last></author>
-      <author><first>Jyoti</first><last>Pawar</last></author>
+      <author id="jyoti-pawar"><first>Jyoti</first><last>Pawar</last></author>
       <pages>562–568</pages>
       <abstract>In the era of online shopping, the volume of product reviews for user products on e-commerce platforms is massively increasing on a daily basis. For any given user product, it consists of a flood of reviews and manually analysing each of these reviews to understand the important aspects or opinions associated with the products is difficult and time-consuming task. Furthermore, it becomes nearly impossible for the customer to make decision of buying the product or not. Thus, it becomes necessary to have an aspect-based summary generated from these user reviews, which can act as a guide for the interested buyer in decision-making. Recently, the use of Large Language Models (LLMs) has shown great potential for solving diverse Natural Language Processing (NLP) tasks, including the task of summarization. Our paper explores the use of various LLMs such as Llama3, GPT-4o, Gemma2, Mistral, Mixtral and Qwen2 on the publicly available domain-specific Amazon reviews dataset as a part of our experimentation work. Our study postulates an algorithm to accurately identify product aspects and the model’s ability to extract relevant information and generate concise summaries. Further, we analyzed the experimental results of each of these LLMs with summary evaluation metrics such as Rouge, Meteor, BERTScore F1 and GPT-4o to evaluate the quality of the generated aspect-based summary. Our study highlights the strengths and limitations of each of these LLMs, thereby giving valuable insights for guiding researchers in harnessing LLMs for generating aspect-based summaries of user products present on these online shopping platforms.</abstract>
       <url hash="a36ab9fc">2024.icon-1.65</url>
@@ -715,7 +715,7 @@
       <author><first>Pradnya</first><last>Bhagat</last></author>
       <author><first>Alvyn</first><last>Abranches</last></author>
       <author><first>Pratik Deelip</first><last>Korkankar</last></author>
-      <author><first>Jyoti</first><last>Pawar</last></author>
+      <author id="jyoti-pawar"><first>Jyoti</first><last>Pawar</last></author>
       <pages>569–575</pages>
       <abstract>Sentiment Analysis plays a crucial role in understanding user opinions in various languages. The paper presents an experiment with a sentiment analysis model fine-tuned on Marathi sentences to classify sentiments into positive, negative, and neutral categories. The fine-tuned model shows high accuracy when tested on Konkani sentences, despite not being explicitly trained on Konkani data; since Marathi is a language very close to Konkani. This outcome highlights the effectiveness of Zero-shot learning, where the model generalizes well across linguistically similar languages. Evaluation metrics such as accuracy, balanced accuracy, negative accuracy, neutral accuracy, positive accuracy and confusion matrix scores were used to assess the performance, with Konkani sentences demonstrating superior results. These findings indicate that zero-shot sentiment analysis can be a powerful tool for sentiment classification in resource poor languages like Konkani, where labeled data is limited. The method can be used to generate datasets for resource-poor languages. Furthermore, this suggests that leveraging linguistically similar languages can help generate datasets for low-resource languages, enhancing sentiment analysis capabilities where labeled data is scarce. By utilizing related languages, zero-shot models can achieve meaningful performance without the need for extensive labeled data for the target language.</abstract>
       <url hash="f0ba3446">2024.icon-1.66</url>
@@ -774,7 +774,7 @@
     <paper id="72">
       <title>Automatic Summarization of Long Documents</title>
       <author><first>Naman</first><last>Chhibbar</last></author>
-      <author><first>Jugal</first><last>Kalita</last></author>
+      <author id="jugal-kalita"><first>Jugal</first><last>Kalita</last></author>
       <pages>607–615</pages>
       <abstract>A vast amount of textual data is added to the internet daily, making utilization and interpretation of such data difficult and cumbersome. As a result, automatic text summarization is crucial for extracting relevant information, saving precious reading time. Although many transformer-based models excel in summarization, they are constrained by their input size, preventing them from processing texts longer than their context size. This study introduces three novel algorithms that allow any LLM to efficiently overcome its input size limitation, effectively utilizing its full potential without any architectural modifications. We test our algorithms on texts with more than 70,000 words, and our experiments show a significant increase in BERTScore with competitive ROUGE scores.</abstract>
       <url hash="c0d38929">2024.icon-1.72</url>
@@ -814,7 +814,7 @@
     <meta>
       <booktitle>Proceedings of the 21st International Conference on Natural Language Processing (ICON): Shared Task on Decoding Fake Narratives in Spreading Hateful Stories (Faux-Hate)</booktitle>
       <editor><first>Shankar</first><last>Biradar</last></editor>
-      <editor><first>Kasu Sai Kartheek</first><last>Reddy</last></editor>
+      <editor id="kasu-sai-kartheek-reddy"><first>Kasu Sai Kartheek</first><last>Reddy</last></editor>
       <editor><first>Sunil</first><last>Saumya</last></editor>
       <editor><first>Md. Shad</first><last>Akhtar</last></editor>
       <publisher>NLP Association of India (NLPAI)</publisher>
diff --git a/data/xml/2024.inlg.xml b/data/xml/2024.inlg.xml
index 3f5f6516a9..b447d832a3 100644
--- a/data/xml/2024.inlg.xml
+++ b/data/xml/2024.inlg.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the 17th International Natural Language Generation Conference</booktitle>
       <editor><first>Saad</first><last>Mahamood</last></editor>
-      <editor><first>Nguyen Le</first><last>Minh</last></editor>
+      <editor id="minh-le-nguyen"><first>Nguyen Le</first><last>Minh</last></editor>
       <editor><first>Daphne</first><last>Ippolito</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Tokyo, Japan</address>
@@ -33,7 +33,7 @@
       <title>Noisy Pairing and Partial Supervision for Stylized Opinion Summarization</title>
       <author><first>Hayate</first><last>Iso</last></author>
       <author><first>Xiaolan</first><last>Wang</last></author>
-      <author><first>Yoshi</first><last>Suhara</last></author>
+      <author id="yoshi-suhara"><first>Yoshi</first><last>Suhara</last></author>
       <pages>13–23</pages>
       <abstract>Opinion summarization research has primarily focused on generating summaries reflecting important opinions from customer reviews without paying much attention to the writing style. In this paper, we propose the stylized opinion summarization task, which aims to generate a summary of customer reviews in the desired (e.g., professional) writing style. To tackle the difficulty in collecting customer and professional review pairs, we develop a non-parallel training framework, Noisy Pairing and Partial Supervision (NAPA), which trains a stylized opinion summarization system from non-parallel customer and professional review sets. We create a benchmark ProSum by collecting customer and professional reviews from Yelp and Michelin. Experimental results on ProSum and FewSum demonstrate that our non-parallel training framework consistently improves both automatic and human evaluations, successfully building a stylized opinion summarization model that can generate professionally-written summaries from customer reviews. The code is available at https://github.com/megagonlabs/napa</abstract>
       <url hash="952c41de">2024.inlg-main.2</url>
@@ -85,7 +85,7 @@
     </paper>
     <paper id="7">
       <title>Generating from <fixed-case>AMR</fixed-case>s into High and Low-Resource Languages using Phylogenetic Knowledge and Hierarchical <fixed-case>QL</fixed-case>o<fixed-case>RA</fixed-case> Training (<fixed-case>HQL</fixed-case>)</title>
-      <author><first>William</first><last>Soto Martinez</last></author>
+      <author id="william-soto-martinez"><first>William</first><last>Soto Martinez</last></author>
       <author><first>Yannick</first><last>Parmentier</last></author>
       <author><first>Claire</first><last>Gardent</last></author>
       <pages>70–81</pages>
@@ -111,8 +111,8 @@
       <author><first>Sameen</first><last>Maruf</last></author>
       <author><first>Ingrid</first><last>Zukerman</last></author>
       <author><first>Xuelin</first><last>Situ</last></author>
-      <author><first>Cecile</first><last>Paris</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="cecile-paris"><first>Cecile</first><last>Paris</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>103–120</pages>
       <abstract>In this paper, we generate and compare three types of explanations of Machine Learning (ML) predictions: simple, conservative and unifying. Simple explanations are concise, conservative explanations address the surprisingness of a prediction, and unifying explanations convey the extent to which an ML model’s predictions are applicable. The results of our user study show that (1) conservative and unifying explanations are liked equally and considered largely equivalent in terms of completeness, helpfulness for understanding the AI, and enticement to act, and both are deemed better than simple explanations; and (2)users’ views about explanations are influenced by the (dis)agreement between the ML model’s predictions and users’ estimations of these predictions, and by the inclusion/omission of features users expect to see in explanations.</abstract>
       <url hash="f73eca8e">2024.inlg-main.9</url>
@@ -239,7 +239,7 @@
     </paper>
     <paper id="20">
       <title>Exploring the impact of data representation on neural data-to-text generation</title>
-      <author><first>David M.</first><last>Howcroft</last></author>
+      <author id="david-m-howcroft"><first>David M.</first><last>Howcroft</last></author>
       <author><first>Lewis N.</first><last>Watson</last></author>
       <author><first>Olesia</first><last>Nedopas</last></author>
       <author><first>Dimitra</first><last>Gkatzia</last></author>
@@ -265,7 +265,7 @@
     <paper id="22">
       <title>(Mostly) Automatic Experiment Execution for Human Evaluations of <fixed-case>NLP</fixed-case> Systems</title>
       <author><first>Craig</first><last>Thomson</last></author>
-      <author><first>Anya</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anya</first><last>Belz</last></author>
       <pages>272–279</pages>
       <abstract>Human evaluation is widely considered the most reliable form of evaluation in NLP, but recent research has shown it to be riddled with mistakes, often as a result of manual execution of tasks. This paper argues that such mistakes could be avoided if we were to automate, as much as is practical, the process of performing experiments for human evaluation of NLP systems. We provide a simple methodology that can improve both the transparency and reproducibility of experiments. We show how the sequence of component processes of a human evaluation can be defined in advance, facilitating full or partial automation, detailed preregistration of the process, and research transparency and repeatability.</abstract>
       <url hash="4f5862ba">2024.inlg-main.22</url>
@@ -300,7 +300,7 @@
       <author><first>Shota</first><last>Koyama</last></author>
       <author><first>Ryo</first><last>Nagata</last></author>
       <author><first>Hiroya</first><last>Takamura</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <pages>303–313</pages>
       <abstract>M2 and its variants are the most widely used automatic evaluation metrics for grammatical error correction (GEC), which calculate an F-score using a phrase-based alignment between sentences. However, it is not straightforward at all to align learner sentences containing errors to their correct sentences. In addition, alignment calculations are computationally expensive. We propose GREEN, an alignment-free F-score for GEC evaluation. GREEN treats a sentence as a multiset of n-grams and extracts edits between sentences by set operations instead of computing an alignment. Our experiments confirm that GREEN performs better than existing methods for the corpus-level metrics and comparably for the sentence-level metrics even without computing an alignment. GREEN is available at https://github.com/shotakoyama/green.</abstract>
       <url hash="5f09ede7">2024.inlg-main.25</url>
@@ -323,7 +323,7 @@
       <title>Pipeline Neural Data-to-text with Large Language Models</title>
       <author><first>Chinonso Cynthia</first><last>Osuji</last></author>
       <author><first>Brian</first><last>Timoney</last></author>
-      <author><first>Thiago</first><last>Castro Ferreira</last></author>
+      <author id="thiago-castro-ferreira"><first>Thiago</first><last>Castro Ferreira</last></author>
       <author><first>Brian</first><last>Davis</last></author>
       <pages>320–329</pages>
       <abstract>Previous studies have highlighted the advantages of pipeline neural architectures over end-to-end models, particularly in reducing text hallucination. In this study, we extend prior research by integrating pretrained language models (PLMs) into a pipeline framework, using both fine-tuning and prompting methods. Our findings show that fine-tuned PLMs consistently generate high quality text, especially within end-to-end architectures and at intermediate stages of the pipeline across various domains. These models also outperform prompt-based ones on automatic evaluation metrics but lag in human evaluations. Compared to the standard five-stage pipeline architecture, a streamlined three-stage pipeline, which only include ordering, structuring, and surface realization, achieves superior performance in fluency and semantic adequacy according to the human evaluation.</abstract>
@@ -345,8 +345,8 @@
     </paper>
     <paper id="29">
       <title>Resilience through Scene Context in Visual Referring Expression Generation</title>
-      <author><first>Simeon</first><last>Junker</last></author>
-      <author><first>Sina</first><last>Zarrieß</last></author>
+      <author id="simeon-junker"><first>Simeon</first><last>Junker</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
       <pages>344–357</pages>
       <abstract>Scene context is well known to facilitate humans’ perception of visible objects. In this paper, we investigate the role of context in Referring Expression Generation (REG) for objects in images, where existing research has often focused on distractor contexts that exert pressure on the generator. We take a new perspective on scene context in REG and hypothesize that contextual information can be conceived of as a resource that makes REG models more resilient and facilitates the generation of object descriptions, and object types in particular. We train and test Transformer-based REG models with target representations that have been artificially obscured with noise to varying degrees. We evaluate how properties of the models’ visual context affect their processing and performance. Our results show that even simple scene contexts make models surprisingly resilient to perturbations, to the extent that they can identify referent types even when visual information about the target is completely missing.</abstract>
       <url hash="fc7f90ae">2024.inlg-main.29</url>
@@ -444,7 +444,7 @@
       <title>ai<fixed-case>X</fixed-case>plain <fixed-case>SDK</fixed-case>: A High-Level and Standardized Toolkit for <fixed-case>AI</fixed-case> Assets</title>
       <author><first>Shreyas</first><last>Sharma</last></author>
       <author><first>Lucas</first><last>Pavanelli</last></author>
-      <author><first>Thiago</first><last>Castro Ferreira</last></author>
+      <author id="thiago-castro-ferreira"><first>Thiago</first><last>Castro Ferreira</last></author>
       <author><first>Mohamed</first><last>Al-Badrashiny</last></author>
       <author><first>Hassan</first><last>Sawaf</last></author>
       <pages>446–452</pages>
@@ -490,10 +490,10 @@
     <paper id="41">
       <title>Multilingual Text Style Transfer: Datasets &amp; Models for <fixed-case>I</fixed-case>ndian Languages</title>
       <author><first>Sourabrata</first><last>Mukherjee</last></author>
-      <author><first>Atul Kr.</first><last>Ojha</last></author>
+      <author id="atul-kr-ojha"><first>Atul Kr.</first><last>Ojha</last></author>
       <author><first>Akanksha</first><last>Bansal</last></author>
       <author><first>Deepak</first><last>Alok</last></author>
-      <author><first>John P.</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></author>
       <author><first>Ondrej</first><last>Dusek</last></author>
       <pages>494–522</pages>
       <abstract>Text style transfer (TST) involves altering the linguistic style of a text while preserving its style-independent content. This paper focuses on sentiment transfer, a popular TST subtask, across a spectrum of Indian languages: Hindi, Magahi, Malayalam, Marathi, Punjabi, Odia, Telugu, and Urdu, expanding upon previous work on English-Bangla sentiment transfer. We introduce dedicated datasets of 1,000 positive and 1,000 negative style-parallel sentences for each of these eight languages. We then evaluate the performance of various benchmark models categorized into parallel, non-parallel, cross-lingual, and shared learning approaches, including the Llama2 and GPT-3.5 large language models (LLMs). Our experiments highlight the significance of parallel data in TST and demonstrate the effectiveness of the Masked Style Filling (MSF) approach in non-parallel techniques. Moreover, cross-lingual and joint multilingual learning methods show promise, offering insights into selecting optimal models tailored to the specific language and task requirements. To the best of our knowledge, this work represents the first comprehensive exploration of the TST task as sentiment transfer across a diverse set of languages.</abstract>
@@ -504,7 +504,7 @@
     <paper id="42">
       <title>Are Large Language Models Actually Good at Text Style Transfer?</title>
       <author><first>Sourabrata</first><last>Mukherjee</last></author>
-      <author><first>Atul Kr.</first><last>Ojha</last></author>
+      <author id="atul-kr-ojha"><first>Atul Kr.</first><last>Ojha</last></author>
       <author><first>Ondrej</first><last>Dusek</last></author>
       <pages>523–539</pages>
       <abstract>We analyze the performance of large language models (LLMs) on Text Style Transfer (TST), specifically focusing on sentiment transfer and text detoxification across three languages: English, Hindi, and Bengali. Text Style Transfer involves modifying the linguistic style of a text while preserving its core content. We evaluate the capabilities of pre-trained LLMs using zero-shot and few-shot prompting as well as parameter-efficient finetuning on publicly available datasets. Our evaluation using automatic metrics, GPT-4 and human evaluations reveals that while some prompted LLMs perform well in English, their performance in on other languages (Hindi, Bengali) remains average. However, finetuning significantly improves results compared to zero-shot and few-shot prompting, making them comparable to previous state-of-the-art. This underscores the necessity of dedicated datasets and specialized models for effective TST.</abstract>
@@ -535,7 +535,7 @@
       <author><first>Ondrej</first><last>Dusek</last></author>
       <author><first>Albert</first><last>Gatt</last></author>
       <author><first>Dimitra</first><last>Gkatzia</last></author>
-      <author><first>David M.</first><last>Howcroft</last></author>
+      <author id="david-m-howcroft"><first>David M.</first><last>Howcroft</last></author>
       <author><first>Ondrej</first><last>Platek</last></author>
       <author><first>Adarsa</first><last>Sivaprasad</last></author>
       <pages>557–583</pages>
@@ -562,7 +562,7 @@
       <author><first>Soichiro</first><last>Murakami</last></author>
       <author><first>Peinan</first><last>Zhang</last></author>
       <author><first>Hiroya</first><last>Takamura</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>597–608</pages>
       <abstract>Ad text generation is vital for automatic advertising in various fields through search engine advertising (SEA) to avoid the cost problem caused by laborious human efforts for creating ad texts. Even though ad creators create the landing page (LP) for advertising and we can expect its quality, conventional approaches with reinforcement learning (RL) mostly focus on advertising keywords rather than LP information. This work investigates and shows the effective usage of LP information as a reward in RL-based ad text generation through automatic and human evaluations. Our analysis of the actually generated ad text shows that LP information can be a crucial reward by appropriately scaling its value range to improve ad text generation performance.</abstract>
       <url hash="95462471">2024.inlg-main.46</url>
@@ -572,7 +572,7 @@
     <paper id="47">
       <title>Differences in Semantic Errors Made by Different Types of Data-to-text Systems</title>
       <author><first>Rudali</first><last>Huidrom</last></author>
-      <author><first>Anya</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anya</first><last>Belz</last></author>
       <author><first>Michela</first><last>Lorandi</last></author>
       <pages>609–621</pages>
       <abstract>In this paper, we investigate how different semantic, or content-related, errors made by different types of data-to-text systems differ in terms of number and type. In total, we examine 15 systems: three rule-based and 12 neural systems including two large language models without training or fine-tuning. All systems were tested on the English WebNLG dataset version 3.0. We use a semantic error taxonomy and the brat annotation tool to obtain word-span error annotations on a sample of system outputs. The annotations enable us to establish how many semantic errors different (types of) systems make and what specific types of errors they make, and thus to get an overall understanding of semantic strengths and weaknesses among various types of NLG systems. Among our main findings, we observe that symbolic (rule and template-based) systems make fewer semantic errors overall, non-LLM neural systems have better fluency and data coverage, but make more semantic errors, while LLM-based systems require improvement particularly in addressing superfluous.</abstract>
@@ -608,7 +608,7 @@
       <title>Generating Faithful and Salient Text from Multimodal Data</title>
       <author><first>Tahsina</first><last>Hashem</last></author>
       <author><first>Weiqing</first><last>Wang</last></author>
-      <author><first>Derry Tanti</first><last>Wijaya</last></author>
+      <author id="derry-tanti-wijaya"><first>Derry Tanti</first><last>Wijaya</last></author>
       <author><first>Mohammed Eunus</first><last>Ali</last></author>
       <author><first>Yuan-Fang</first><last>Li</last></author>
       <pages>646–662</pages>
@@ -620,7 +620,7 @@
     </paper>
     <paper id="51">
       <title>Investigating Paraphrase Generation as a Data Augmentation Strategy for Low-Resource <fixed-case>AMR</fixed-case>-to-Text Generation</title>
-      <author><first>Marco Antonio</first><last>Sobrevilla Cabezudo</last></author>
+      <author id="marco-antonio-sobrevilla-cabezudo"><first>Marco Antonio</first><last>Sobrevilla Cabezudo</last></author>
       <author><first>Marcio Lima</first><last>Inacio</last></author>
       <author><first>Thiago Alexandre Salgueiro</first><last>Pardo</last></author>
       <pages>663–675</pages>
@@ -659,7 +659,7 @@
     <meta>
       <booktitle>Proceedings of the 17th International Natural Language Generation Conference: System Demonstrations</booktitle>
       <editor><first>Saad</first><last>Mahamood</last></editor>
-      <editor><first>Nguyen Le</first><last>Minh</last></editor>
+      <editor id="minh-le-nguyen"><first>Nguyen Le</first><last>Minh</last></editor>
       <editor><first>Daphne</first><last>Ippolito</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Tokyo, Japan</address>
@@ -715,7 +715,7 @@
     </paper>
     <paper id="4">
       <title><fixed-case>QCET</fixed-case>: An Interactive Taxonomy of Quality Criteria for Comparable and Repeatable Evaluation of <fixed-case>NLP</fixed-case> Systems</title>
-      <author><first>Anya</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anya</first><last>Belz</last></author>
       <author><first>Simon</first><last>Mille</last></author>
       <author><first>Craig</first><last>Thomson</last></author>
       <author><first>Rudali</first><last>Huidrom</last></author>
@@ -748,7 +748,7 @@
       <author><first>Rudali</first><last>Huidrom</last></author>
       <author><first>Mohammed</first><last>Sabry</last></author>
       <author><first>Amy</first><last>O’Riordan</last></author>
-      <author><first>Anya</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anya</first><last>Belz</last></author>
       <pages>16–19</pages>
       <abstract>Wikipedia is known to have systematic gaps in its coverage that correspond to under-resourced languages as well as underrepresented groups. This paper presents a new tool to support efforts to fill in these gaps by automatically generating draft articles and facilitating post-editing and uploading to Wikipedia. A rule-based generator and an input-constrained LLM are used to generate two alternative articles, enabling the often more fluent, but error-prone, LLM-generated article to be content-checked against the more reliable, but less fluent, rule-generated article.</abstract>
       <url hash="8d2a4a35">2024.inlg-demos.6</url>
@@ -765,7 +765,7 @@
       <year>2024</year>
       <url hash="f7e4a54d">2024.inlg-tutorials</url>
       <venue>inlg</venue>
-      <editor><first>Anya</first><last>Belz</last></editor>
+      <editor id="anja-belz"><first>Anya</first><last>Belz</last></editor>
       <editor><first>João</first><last>Sedoc</last></editor>
       <editor><first>Craig</first><last>Thomson</last></editor>
       <editor><first>Simon</first><last>Mille</last></editor>
@@ -795,7 +795,7 @@
     <meta>
       <booktitle>Proceedings of the 17th International Natural Language Generation Conference: Generation Challenges</booktitle>
       <editor><first>Simon</first><last>Mille</last></editor>
-      <editor><first>Miruna-Adriana</first><last>Clinciu</last></editor>
+      <editor id="miruna-clinciu"><first>Miruna-Adriana</first><last>Clinciu</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Tokyo, Japan</address>
       <month>September</month>
@@ -815,7 +815,7 @@
       <author><first>Chris</first><last>Palaguachi</last></author>
       <author><first>Yang</first><last>Zhou</last></author>
       <author><first>Suma</first><last>Bhat</last></author>
-      <author><first>ChengXiang</first><last>Zhai</last></author>
+      <author id="chengxiang-zhai"><first>ChengXiang</first><last>Zhai</last></author>
       <pages>1–16</pages>
       <abstract>Given the practical applications of analogies, recent work has studied analogy generation to explain concepts. However, not all generated analogies are of high quality and it is unclear how to measure the quality of this new kind of generated text. To address this challenge, we propose a shared task on automatically evaluating the quality of generated analogies based on seven comprehensive criteria. For this, we will set up a leader board based on our dataset annotated with manual ratings along the seven criteria, and provide a baseline solution leveraging GPT-4. We hope that this task would advance the progress in development of new evaluation metrics and methods for analogy generation in natural language, particularly for education.</abstract>
       <url hash="5bf455b7">2024.inlg-genchal.1</url>
@@ -843,7 +843,7 @@
     <paper id="3">
       <title>Summary of the Visually Grounded Story Generation Challenge</title>
       <author><first>Xudong</first><last>Hong</last></author>
-      <author><first>Asad</first><last>Sayeed</last></author>
+      <author id="asad-sayeed"><first>Asad</first><last>Sayeed</last></author>
       <author><first>Vera</first><last>Demberg</last></author>
       <pages>39–46</pages>
       <abstract>Recent advancements in vision-and-language models have opened new possibilities for natural language generation, particularly in generating creative stories from visual input. We thus host an open-sourced shared task, Visually Grounded Story Generation (VGSG), to explore whether these models can create coherent, diverse, and visually grounded narratives. This task challenges participants to generate coherent stories based on sequences of images, where characters and events must be grounded in the images provided. The task is structured into two tracks: the Closed track with constraints on fixed visual features and the Open track which allows all kinds of models. We propose the first two-stage model using GPT-4o as the baseline for the Open track that first generates descriptions for the images and then creates a story based on those descriptions. Human and automatic evaluations indicate that: 1) Retrieval augmentation helps generate more human-like stories, and 2) Largescale pre-trained LLM improves story quality by a large margin; 3) Traditional automatic metrics can not capture the overall quality.</abstract>
@@ -878,7 +878,7 @@
       <title><fixed-case>D</fixed-case>ip<fixed-case>I</fixed-case>nfo-<fixed-case>U</fixed-case>ni<fixed-case>T</fixed-case>o at the <fixed-case>GEM</fixed-case>’24 Data-to-Text Task: Augmenting <fixed-case>LLM</fixed-case>s with the Split-Generate-Aggregate Pipeline</title>
       <author><first>Michael</first><last>Oliverio</last></author>
       <author><first>Pier Felice</first><last>Balestrucci</last></author>
-      <author><first>Alessandro</first><last>Mazzei</last></author>
+      <author id="alessandro-mazzei"><first>Alessandro</first><last>Mazzei</last></author>
       <author><first>Valerio</first><last>Basile</last></author>
       <pages>59–65</pages>
       <abstract>This paper describes the DipInfo-UniTo system participating to the GEM shared task 2024. We participate only to the Data-to-Text (D2T) task. The DipInfo-UniTo system is based on Mistral (Jiang et al., 2023), a recent Large Language Model (LLM). Most LLMs are capable of generating high-quality text for D2T tasks but, crucially, they often fall short in terms of adequacy, and sometimes exhibit “hallucinations”. To mitigate this issue, we have implemented a generation pipeline that combines LLMs with techniques from the traditional Natural Language Generation (NLG) pipeline. In particular, we have a three step process SGA, consisting in (1) Splitting the original set of triples, (2) Generating verbalizations from the resulting split data units, (3) Aggregating the verbalizations produced in the previous step.</abstract>
@@ -891,7 +891,7 @@
       <author><first>Chinonso Cynthia</first><last>Osuji</last></author>
       <author><first>Rudali</first><last>Huidrom</last></author>
       <author><first>Kolawole John</first><last>Adebayo</last></author>
-      <author><first>Thiago</first><last>Castro Ferreira</last></author>
+      <author id="thiago-castro-ferreira"><first>Thiago</first><last>Castro Ferreira</last></author>
       <author><first>Brian</first><last>Davis</last></author>
       <pages>66–75</pages>
       <abstract>In this paper, we present our approach to the GEM Shared Task at the INLG’24 Generation Challenges, which focuses on generating data-to-text in multiple languages, including low-resource languages, from WebNLG triples. We employ a combination of end-to-end and pipeline neural architectures for English text generation. To extend our methodology to Hindi, Korean, Arabic, and Swahili, we leverage a neural machine translation model. Our results demonstrate that our approach achieves competitive performance in the given task.</abstract>
@@ -902,7 +902,7 @@
     <paper id="8">
       <title><fixed-case>DCU</fixed-case>-<fixed-case>NLG</fixed-case>-<fixed-case>PBN</fixed-case> at the <fixed-case>GEM</fixed-case>’24 Data-to-Text Task: Open-Source <fixed-case>LLM</fixed-case> <fixed-case>PEFT</fixed-case>-Tuning for Effective Data-to-Text Generation</title>
       <author><first>Michela</first><last>Lorandi</last></author>
-      <author><first>Anya</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anya</first><last>Belz</last></author>
       <pages>76–83</pages>
       <abstract>LLMs have been used in various tasks with impressive success, including data-to-text generation. However, one concern when LLMs are compared to alternative methods is data contamination, in other words, for many datasets the data used in training these models may have included publicly available test sets. In this paper, we explore the performance of LLMs using newly constructed datasets in the context of data-to-text generation for English, Chinese, German, Russian, Spanish, Korean, Hindi, Swahili, and Arabic. We performed a testing phase to evaluate a range of prompt types and a fine-tuning technique on Mistral 7B and Falcon 40B. We then fully evaluated the most promising system for each scenario: (i) LLM prompting in English followed by translation, and (ii) LLM PEFT-tuning in English followed by translation. We find that fine-tuning Mistral outperforms all other tested systems and achieves performance close to GPT-3.5. The few-shot prompting with a dynamic selection of examples achieves higher results among prompting. The human evaluation to be carried out by the shared-task organisers will provide insight into the performance of the new datasets. In conclusion, we observed how the fine-tuning of an open-source LLM can achieve good performance close to state-of-the-art closed-source LLM while using considerably fewer resources.</abstract>
       <url hash="8d6b828b">2024.inlg-genchal.8</url>
@@ -913,7 +913,7 @@
       <title><fixed-case>DCU</fixed-case>-<fixed-case>NLG</fixed-case>-Small at the <fixed-case>GEM</fixed-case>’24 Data-to-Text Task: Rule-based generation and post-processing with T5-Base</title>
       <author><first>Simon</first><last>Mille</last></author>
       <author><first>Mohammed</first><last>Sabry</last></author>
-      <author><first>Anya</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anya</first><last>Belz</last></author>
       <pages>84–91</pages>
       <abstract>Our submission to the GEM data-to-text shared task aims to assess the quality of texts produced by the combination of a rule-based system with a language model of reduced size, by first using a rule-based generator to convert input triples into semantically correct English text, and then a language model to paraphrase these texts to make them more fluent. The texts are translated to languages other than English with the NLLB machine translation system.</abstract>
       <url hash="6428e021">2024.inlg-genchal.9</url>
@@ -936,7 +936,7 @@
       <author><first>Ashley</first><last>Lewis</last></author>
       <author><first>Yi-Chien</first><last>Lin</last></author>
       <author><first>Tomiris</first><last>Kaumenova</last></author>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <pages>100–111</pages>
       <abstract>This paper details experiments conducted for completing the GEM 2024 Data-to-Text task for a WebNLG dataset (Gardent et al., 2017). We show that model performance varies greatly across English, Spanish, Chinese, and Russian. Data filtering was done with automatic model judgments via error detection, which performs differently per language. We report English and Spanish dev set results for a data filtering and knowledge distillation approach to generating natural language outputs for sets of triples across a variety of domains. Specifically, we compare three generation conditions: 1) few-shot prompting with ChatGPT (GPT4), 2) fine-tuning LLama2 on the unfiltered dataset, and 3) fine-tuning Llama2 on a filtered version of the dataset. Russian and Chinese efforts did not result in submissions due to inconsistent or incoherent translations being produced in either the data synthesis or final generation stages. We provide details on these shortcomings but largely focus on Spanish and English efforts that align with our task submissions. We ultimately submitted outputs in English and Spanish that were generated using a version of Llama2 fine-tuned on a filtered dataset.</abstract>
       <url hash="0aeda81e">2024.inlg-genchal.11</url>
diff --git a/data/xml/2024.insights.xml b/data/xml/2024.insights.xml
index f379582b84..2045c44acc 100644
--- a/data/xml/2024.insights.xml
+++ b/data/xml/2024.insights.xml
@@ -26,7 +26,7 @@
       <author><first>Haotian</first><last>Ye</last><affiliation>LMU Munich</affiliation></author>
       <author><first>Yihong</first><last>Liu</last><affiliation>LMU Munich</affiliation></author>
       <author><first>Chunlan</first><last>Ma</last><affiliation>LMU Munich</affiliation></author>
-      <author><first>Hinrich</first><last>Schütze</last><affiliation>Center for Information and Language Processing, University of Munich</affiliation></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last><affiliation>Center for Information and Language Processing, University of Munich</affiliation></author>
       <pages>1-7</pages>
       <abstract>Transformer-based pre-trained language models (PLMs) have achieved remarkable performance in various natural language processing (NLP) tasks. However, pre-training such models can take considerable resources that are almost only available to high-resource languages. On the contrary, static word embeddings are easier to train in terms of computing resources and the amount of data required. In this paper, we introduce MoSECroT (Model Stitching with Static Word Embeddings for Crosslingual Zero-shot Transfer, a novel and challenging task that is especially relevant to low-resource languages for which static word embeddings are available. To tackle the task, we present the first framework that leverages relative representations to construct a common space for the embeddings of a source language PLM and the static word embeddings of a target language. In this way, we can train the PLM on source-language training data and perform zero-shot transfer to the target language by simply swapping the embedding layer. However, through extensive experiments on two classification datasets, we show that although our proposed framework is competitive with weak baselines when addressing MoSECroT, it fails to achieve competitive results compared with some strong baselines. In this paper, we attempt to explain this negative result and provide several thoughts on possible improvement.</abstract>
       <url hash="50df66cc">2024.insights-1.1</url>
@@ -104,7 +104,7 @@
       <author><first>Tatsuya</first><last>Hiraoka</last><affiliation>Fujitsu Limited (Fujitsu Research)</affiliation></author>
       <author><first>Rico</first><last>Sennrich</last><affiliation>University of Zurich</affiliation></author>
       <author><first>Yuval</first><last>Pinter</last><affiliation>Ben-Gurion University of the Negev</affiliation></author>
-      <author><first>Naoaki</first><last>Okazaki</last><affiliation>Tokyo Institute of Technology</affiliation></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last><affiliation>Tokyo Institute of Technology</affiliation></author>
       <pages>48-50</pages>
       <abstract>We explore threshold vocabulary trimming in Byte-Pair Encoding subword tokenization, a tokenization postprocessing step that replaces rare subwords with their component subwords. The technique is available in popular tokenization libraries but has not been subjected to rigorous scientific scrutiny. While the removal of rare subwords is suggested as best practice in model implementations, both as a means to reduce model size and for improving model performance through robustness, our experiments indicate that, across a large space of hyperparameter settings, vocabulary trimming fails to consistently improve model performance, and is even prone to incurring heavy degradation.</abstract>
       <url hash="e8990150">2024.insights-1.7</url>
@@ -148,7 +148,7 @@
       <author><first>Brian</first><last>Davis</last><affiliation>Dublin City University</affiliation></author>
       <author><first>Fabio</first><last>Cozman</last><affiliation>Universidade de Sao Paulo</affiliation></author>
       <author><first>Adriana</first><last>Pagano</last><affiliation>Federal University of Minas Gerais</affiliation></author>
-      <author><first>Thiago</first><last>Castro Ferreira</last><affiliation>Federal University of Minas Gerais</affiliation></author>
+      <author id="thiago-castro-ferreira"><first>Thiago</first><last>Castro Ferreira</last><affiliation>Federal University of Minas Gerais</affiliation></author>
       <pages>73-81</pages>
       <abstract>Neural end-to-end surface realizers output more fluent texts than classical architectures. However, they tend to suffer from adequacy problems, in particular hallucinations in numerical referring expression generation. This poses a problem to language generation in sensitive domains, as is the case of robot journalism covering COVID-19 and Amazon deforestation. We propose an approach whereby numerical referring expressions are converted from digits to plain word form descriptions prior to being fed to state-of-the-art Large Language Models. We conduct automatic and human evaluations to report the best strategy to numerical superficial realization. Code and data are publicly available.</abstract>
       <url hash="ad3c7b73">2024.insights-1.10</url>
@@ -170,7 +170,7 @@
     <paper id="12">
       <title>Can probing classifiers reveal the learning by contact center large language models?: No, it doesn’t!</title>
       <author><first>Varun</first><last>Nathan</last><affiliation>Observe.AI</affiliation></author>
-      <author><first>Ayush</first><last>Kumar</last><affiliation>Observe.AI</affiliation></author>
+      <author id="ayush-kumar"><first>Ayush</first><last>Kumar</last><affiliation>Observe.AI</affiliation></author>
       <author><first>Digvijay</first><last>Ingle</last><affiliation>Observe.AI, India</affiliation></author>
       <pages>92-100</pages>
       <abstract>Fine-tuning large language models (LLMs) with domain-specific instruction dataset has emerged as an effective method to enhance their domain-specific understanding. Yet, there is limited work that examines the core characteristics acquired during this process. In this study, we benchmark the fundamental characteristics learned by contact-center (CC) domain specific instruction fine-tuned LLMs with out-of-the-box (OOB) LLMs via probing tasks encompassing conversational, channel, and automatic speech recognition (ASR) properties. We explore different LLM architectures (Flan-T5 and Llama) and sizes (3B, 7B, 11B, 13B). Our findings reveal remarkable effectiveness of CC-LLMs on the in-domain downstream tasks, with improvement in response acceptability by over 48% compared to OOB-LLMs. However, we observe that the performance of probing classifiers are relatively similar and does not reflect the performance of in-domain downstream tasks. A similar observation is also noted on SentEval dataset that assess capabilities of models in terms of surface, syntactic, and semantic information through probing tasks. Our study challenges the premise that probing classifiers can reveal the fundamental characteristics learned by large language models and is reflective of the downstream task performance, via a case-study of LLMs tuned for contact center domain.</abstract>
@@ -218,7 +218,7 @@
       <title>The Paradox of Preference: A Study on <fixed-case>LLM</fixed-case> Alignment Algorithms and Data Acquisition Methods</title>
       <author><first>Rishikesh</first><last>Devanathan</last><affiliation>Observe.AI</affiliation></author>
       <author><first>Varun</first><last>Nathan</last><affiliation>Observe.AI</affiliation></author>
-      <author><first>Ayush</first><last>Kumar</last><affiliation>Observe.AI</affiliation></author>
+      <author id="ayush-kumar"><first>Ayush</first><last>Kumar</last><affiliation>Observe.AI</affiliation></author>
       <pages>135-147</pages>
       <abstract>This research investigates the impact of preference annotation acquisition methods on the performance of LLM alignment algorithms, including Direct Preference Optimization (DPO), Identity Preference Optimization (IPO), and Conservative DPO (cDPO), compared to Supervised Fine-Tuning (SFT) in NLP tasks. We analyze the influence of LLM and human-based preferences on algorithm performance, considering data volume and quality. Additionally, we assess DPO’s vulnerability to overfitting and IPO’s resilience against it, addressing four main research questions. Using the GAIR dataset and Zephyr-7b as the SFT model, we reveal unexpected negative outcomes. Specifically, DPO trained on LLM preferences outperforms human preferences, contrary to expectations. Moreover, there’s no correlation between preference data volume or quality and algorithm performance. Contrary to expectations, DPO shows no overfitting in both human and LLM preference datasets. Surprisingly, cDPO doesn’t fare better than DPO under flip noise. Our findings highlight the complexities of preference annotation methods and underscore the importance of scrutinizing negative results in NLP algorithm research.</abstract>
       <url hash="2fab5011">2024.insights-1.16</url>
@@ -241,7 +241,7 @@
     <paper id="18">
       <title>Multi-Task Learning with Adapters for Plausibility Prediction: Bridging the Gap or Falling into the Trenches?</title>
       <author><first>Annerose</first><last>Eichel</last><affiliation>University of Stuttgart</affiliation></author>
-      <author><first>Sabine</first><last>Schulte Im Walde</last><affiliation>University of Stuttgart</affiliation></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte Im Walde</last><affiliation>University of Stuttgart</affiliation></author>
       <pages>154-168</pages>
       <abstract>We present a multi-task learning approach to predicting semantic plausibility by leveraging 50+ adapters categorized into 17 tasks within an efficient training framework. Across four plausibility datasets in English of varying size and linguistic constructions, we compare how models provided with knowledge from a range of NLP tasks perform in contrast to models without external information. Our results show that plausibility prediction benefits from complementary knowledge (e.g., provided by syntactic tasks) are significant but non-substantial, while performance may be hurt when injecting knowledge from an unsuitable task. Similarly important, we find that knowledge transfer may be hindered by class imbalance, and demonstrate the positive yet minor effect of balancing training data, even at the expense of size.</abstract>
       <url hash="ee9a8358">2024.insights-1.18</url>
diff --git a/data/xml/2024.isa.xml b/data/xml/2024.isa.xml
index 2f7ee4201d..908e54f58d 100644
--- a/data/xml/2024.isa.xml
+++ b/data/xml/2024.isa.xml
@@ -3,12 +3,12 @@
   <volume id="1" ingest-date="2024-05-18" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 20th Joint ACL - ISO Workshop on Interoperable Semantic Annotation @ LREC-COLING 2024</booktitle>
-      <editor><first>Harry</first><last>Bunt</last></editor>
-      <editor><first>Nancy</first><last>Ide</last></editor>
+      <editor id="harry-bunt"><first>Harry</first><last>Bunt</last></editor>
+      <editor id="nancy-ide"><first>Nancy</first><last>Ide</last></editor>
       <editor><first>Kiyong</first><last>Lee</last></editor>
       <editor><first>Volha</first><last>Petukhova</last></editor>
-      <editor><first>James</first><last>Pustejovsky</last></editor>
-      <editor><first>Laurent</first><last>Romary</last></editor>
+      <editor id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></editor>
+      <editor id="laurent-romary"><first>Laurent</first><last>Romary</last></editor>
       <publisher>ELRA and ICCL</publisher>
       <address>Torino, Italia</address>
       <month>May</month>
@@ -33,8 +33,8 @@
     <paper id="2">
       <title><fixed-case>MSNER</fixed-case>: A Multilingual Speech Dataset for Named Entity Recognition</title>
       <author><first>Quentin</first><last>Meeus</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
-      <author><first>Hugo</first><last>Van hamme</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="hugo-van-hamme"><first>Hugo</first><last>Van hamme</last></author>
       <pages>8–16</pages>
       <abstract>While extensively explored in text-based tasks, Named Entity Recognition (NER) remains largely neglected in spoken language understanding. Existing resources are limited to a single, English-only dataset. This paper addresses this gap by introducing MSNER, a freely available, multilingual speech corpus annotated with named entities. It provides annotations to the VoxPopuli dataset in four languages (Dutch, French, German, and Spanish). We have also releasing an efficient annotation tool that leverages automatic pre-annotations for faster manual refinement. This results in 590 and 15 hours of silver-annotated speech for training and validation, alongside a 17-hour, manually-annotated evaluation set. We further provide an analysis comparing silver and gold annotations. Finally, we present baseline NER models to stimulate further research on this newly available dataset.</abstract>
       <url hash="edabe280">2024.isa-1.2</url>
@@ -87,7 +87,7 @@
     </paper>
     <paper id="8">
       <title>Shallow Discourse Parsing on <fixed-case>T</fixed-case>witter Conversations</title>
-      <author><first>Berfin</first><last>Aktas</last></author>
+      <author id="berfin-aktas"><first>Berfin</first><last>Aktas</last></author>
       <author><first>Burak</first><last>Özmen</last></author>
       <pages>60–65</pages>
       <abstract>We present our PDTB-style annotations on conversational Twitter data, which was initially annotated by Scheffler et al. (2019). We introduced 1,043 new annotations to the dataset, nearly doubling the number of previously annotated discourse relations. Subsequently, we applied a neural Shallow Discourse Parsing (SDP) model to the resulting corpus, improving its performance through retraining with in-domain data. The most substantial improvement was observed in the sense identification task (+19%). Our experiments with diverse training data combinations underline the potential benefits of exploring various data combinations in domain adaptation efforts for SDP. To the best of our knowledge, this is the first application of Shallow Discourse Parsing on Twitter data</abstract>
@@ -98,9 +98,9 @@
       <title>Search tool for An Event-Type Ontology</title>
       <author><first>Nataliia</first><last>Petliak</last></author>
       <author><first>Cristina Fernandéz</first><last>Alcaina</last></author>
-      <author><first>Eva</first><last>Fučíková</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
-      <author><first>Zdeňka</first><last>Urešová</last></author>
+      <author id="eva-fucikova"><first>Eva</first><last>Fučíková</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
+      <author id="zdenka-uresova"><first>Zdeňka</first><last>Urešová</last></author>
       <pages>66–70</pages>
       <abstract>This short demo description paper presents a new tool designed for searching an event-type ontology with rich information, demonstrated on the SynSemClass ontology resource. The tool complements a web browser, created by the authors of the SynSemClass ontology previously. Due to the complexity of the resource, the search tool offers possibilities both for a linguistically-oriented researcher as well as for teams working with the resource from a technical point of view, such as building role labeling tools, automatic annotation tools, etc.</abstract>
       <url hash="72d32c32">2024.isa-1.9</url>
@@ -110,7 +110,7 @@
       <title>Tiny But Mighty: A Crowdsourced Benchmark Dataset for Triple Extraction from Unstructured Text</title>
       <author><first>Muhammad</first><last>Salman</last></author>
       <author><first>Armin</first><last>Haller</last></author>
-      <author><first>Sergio J.</first><last>Rodriguez Mendez</last></author>
+      <author id="sergio-jose-rodriguez-mendez"><first>Sergio J.</first><last>Rodriguez Mendez</last></author>
       <author><first>Usman</first><last>Naseem</last></author>
       <pages>71–81</pages>
       <abstract>In the context of Natural Language Processing (NLP) and Semantic Web applications, constructing Knowledge Graphs (KGs) from unstructured text plays a vital role. Several techniques have been developed for KG construction from text, but the lack of standardized datasets hinders the evaluation of triple extraction methods. The evaluation of existing KG construction approaches is based on structured data or manual investigations. To overcome this limitation, this work introduces a novel dataset specifically designed to evaluate KG construction techniques from unstructured text. Our dataset consists of a diverse collection of compound and complex sentences meticulously annotated by human annotators with potential triples (subject, verb, object). The annotations underwent further scrutiny by expert ontologists to ensure accuracy and consistency. For evaluation purposes, the proposed F-measure criterion offers a robust approach to quantify the relatedness and assess the alignment between extracted triples and the ground-truth triples, providing a valuable tool for evaluating the performance of triple extraction systems. By providing a diverse collection of high-quality triples, our proposed benchmark dataset offers a comprehensive training and evaluation set for refining the performance of state-of-the-art language models on a triple extraction task. Furthermore, this dataset encompasses various KG-related tasks, such as named entity recognition, relation extraction, and entity linking.</abstract>
@@ -120,7 +120,7 @@
     <paper id="11">
       <title>Less is Enough: Less-Resourced Multilingual <fixed-case>AMR</fixed-case> Parsing</title>
       <author><first>Bram</first><last>Vanroy</last></author>
-      <author><first>Tim</first><last>Van de Cruys</last></author>
+      <author id="tim-van-de-cruys"><first>Tim</first><last>Van de Cruys</last></author>
       <pages>82–92</pages>
       <abstract>This paper investigates the efficacy of multilingual models for the task of text-to-AMR parsing, focusing on English, Spanish, and Dutch. We train and evaluate models under various configurations, including monolingual and multilingual settings, both in full and reduced data scenarios. Our empirical results reveal that while monolingual models exhibit superior performance, multilingual models are competitive across all languages, offering a more resource-efficient alternative for training and deployment. Crucially, our findings demonstrate that AMR parsing benefits from transfer learning across languages even when having access to significantly smaller datasets. As a tangible contribution, we provide text-to-AMR parsing models for the aforementioned languages as well as multilingual variants, and make available the large corpora of translated data for Dutch, Spanish (and Irish) that we used for training them in order to foster AMR research in non-English languages. Additionally, we open-source the training code and offer an interactive interface for parsing AMR graphs from text.</abstract>
       <url hash="1194c7f7">2024.isa-1.11</url>
@@ -129,7 +129,7 @@
     <paper id="12">
       <title><fixed-case>M</fixed-case>o<fixed-case>CCA</fixed-case>: A Model of Comparative Concepts for Aligning Constructicons</title>
       <author><first>Arthur</first><last>Lorenzi</last></author>
-      <author><first>Peter</first><last>Ljunglöf</last></author>
+      <author id="peter-ljunglof"><first>Peter</first><last>Ljunglöf</last></author>
       <author><first>Ben</first><last>Lyngfelt</last></author>
       <author><first>Tiago</first><last>Timponi Torrent</last></author>
       <author><first>William</first><last>Croft</last></author>
@@ -184,7 +184,7 @@
       <title>Annotating Evaluative Language: Challenges and Solutions in Applying Appraisal Theory</title>
       <author><first>Jiamei</first><last>Zeng</last></author>
       <author><first>Min</first><last>Dong</last></author>
-      <author><first>Alex Chengyu</first><last>Fang</last></author>
+      <author id="alex-chengyu-fang"><first>Alex Chengyu</first><last>Fang</last></author>
       <pages>144–151</pages>
       <abstract>This article describes a corpus-based experiment to identify the challenges and solutions in the annotation of evaluative language according to the scheme defined in Appraisal Theory (Martin and White, 2005). Originating from systemic functional linguistics, Appraisal Theory provides a robust framework for the analysis of linguistic expressions of evaluation, stance, and interpersonal relationships. Despite its theoretical richness, the practical application of Appraisal Theory in text annotation presents significant challenges, chiefly due to the intricacies of identifying and classifying evaluative expressions within its sub-system of Attitude, which comprises Affect, Judgement, and Appreciation. This study examines these challenges through the annotation of a corpus of editorials related to the Russian-Ukraine conflict and aims to offer practical solutions to enhance the transparency and consistency of the annotation. By refining the annotation process and addressing the subjective nature in the identification and classification of evaluative language, this work represents some timely effort in the annotation of pragmatic knowledge in language resources.</abstract>
       <url hash="373eb5f2">2024.isa-1.17</url>
diff --git a/data/xml/2024.iscls.xml b/data/xml/2024.iscls.xml
index 9cea47af12..96d9e7bff4 100644
--- a/data/xml/2024.iscls.xml
+++ b/data/xml/2024.iscls.xml
@@ -26,7 +26,7 @@
       <title>Context and <fixed-case>WSD</fixed-case>: Analysing <fixed-case>G</fixed-case>oogle <fixed-case>T</fixed-case>ranslate’s <fixed-case>S</fixed-case>anskrit to <fixed-case>E</fixed-case>nglish Output of Bhagavadgītā Verses for Word Meaning</title>
       <author><first>Anagha</first><last>Pradeep</last></author>
       <author><first>Radhika</first><last>Mamidi</last></author>
-      <author><first>Pavankumar</first><last>Satuluri</last></author>
+      <author id="pavankumar-satuluri"><first>Pavankumar</first><last>Satuluri</last></author>
       <pages>14–26</pages>
       <url hash="e1c1bca9">2024.iscls-1.2</url>
       <bibkey>pradeep-etal-2024-context</bibkey>
@@ -79,7 +79,7 @@
     </paper>
     <paper id="9">
       <title><fixed-case>START</fixed-case>: <fixed-case>S</fixed-case>anskrit Teaching; Annotation; and Research Tool – Bridging Tradition and Technology in Scholarly Exploration</title>
-      <author><first>Anil</first><last>Kumar</last></author>
+      <author id="anil-kumar-nelakanti"><first>Anil</first><last>Kumar</last></author>
       <author><first>Amba</first><last>Kulkarni</last></author>
       <author><first>Nakka</first><last>Shailaj</last></author>
       <pages>113–124</pages>
diff --git a/data/xml/2024.iwclul.xml b/data/xml/2024.iwclul.xml
index 6e6841e523..cd3191a4ce 100644
--- a/data/xml/2024.iwclul.xml
+++ b/data/xml/2024.iwclul.xml
@@ -71,7 +71,7 @@
       <author><first>Olga</first><last>Kolesnikova</last><affiliation>Instituto Politécnico Nacional (IPN), Centro de Investigación en Computación (CIC), Mexico City, Mexico</affiliation></author>
       <author><first>Liliana</first><last>Chanona Hernandez</last><affiliation>Instituto Politécnico Nacional (IPN), Escuela Superior de Ingeniería Mecánica y Eléctrica (ESIME), Mexico City, Mexico</affiliation></author>
       <author><first>Grigori</first><last>Sidorov</last><affiliation>Instituto Politécnico Nacional (IPN), Centro de Investigación en Computación (CIC), Mexico City, Mexico</affiliation></author>
-      <author><first>Alexander</first><last>Gelbukh</last><affiliation>Instituto Politécnico Nacional (IPN), Centro de Investigación en Computación (CIC), Mexico City, Mexico</affiliation></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last><affiliation>Instituto Politécnico Nacional (IPN), Centro de Investigación en Computación (CIC), Mexico City, Mexico</affiliation></author>
       <pages>49-58</pages>
       <abstract>This article is dedicated to the study of multilingual approaches to sentiment analysis of texts in Finnish, Hungarian, and Bulgarian. For Finnish and Hungarian, which are characterized by complex morphology and agglutinative grammar, an analysis was conducted using both traditional rule-based methods and modern machine learning techniques. In the study, BERT, XLM-R, and mBERT models were used for sentiment analysis, demonstrating high accuracy in sentiment classification. The inclusion of Bulgarian was motivated by the opportunity to compare results across languages with varying degrees of morphological complexity, which allowed for a better understanding of how these models can adapt to different linguistic structures. Datasets such as the Hungarian Emotion Corpus, FinnSentiment, and SentiFi were used to evaluate model performance. The results showed that transformer-based models, particularly BERT, XLM-R, and mBERT, significantly outperformed traditional methods, achieving high accuracy in sentiment classification tasks for all the languages studied.</abstract>
       <url hash="3c164ecd">2024.iwclul-1.6</url>
diff --git a/data/xml/2024.iwslt.xml b/data/xml/2024.iwslt.xml
index 4384df4c78..ae189c0a0c 100644
--- a/data/xml/2024.iwslt.xml
+++ b/data/xml/2024.iwslt.xml
@@ -21,7 +21,7 @@
       <title><fixed-case>FINDINGS</fixed-case> <fixed-case>OF</fixed-case> <fixed-case>THE</fixed-case> <fixed-case>IWSLT</fixed-case> 2024 <fixed-case>EVALUATION</fixed-case> <fixed-case>CAMPAIGN</fixed-case></title>
       <author><first>Ibrahim Said</first><last>Ahmad</last><affiliation>Northeastern U.</affiliation></author>
       <author><first>Antonios</first><last>Anastasopoulos</last><affiliation>GMU</affiliation></author>
-      <author><first>Ondřej</first><last>Bojar</last><affiliation>Charles U.</affiliation></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last><affiliation>Charles U.</affiliation></author>
       <author><first>Claudia</first><last>Borg</last><affiliation>U. Malta</affiliation></author>
       <author><first>Marine</first><last>Carpuat</last><affiliation>UMD</affiliation></author>
       <author><first>Roldano</first><last>Cattoni</last><affiliation>FBK</affiliation></author>
@@ -37,13 +37,13 @@
       <author><first>Prashant</first><last>Mathur</last><affiliation>Amazon</affiliation></author>
       <author><first>Evgeny</first><last>Matusov</last><affiliation>AppTek</affiliation></author>
       <author><first>Chandresh</first><last>Maurya</last><affiliation>IIT Indore</affiliation></author>
-      <author><first>John</first><last>McCrae</last><affiliation>U. Galway</affiliation></author>
+      <author id="john-philip-mccrae"><first>John</first><last>McCrae</last><affiliation>U. Galway</affiliation></author>
       <author><first>Kenton</first><last>Murray</last><affiliation>JHU</affiliation></author>
       <author><first>Satoshi</first><last>Nakamura</last><affiliation>NAIST</affiliation></author>
-      <author><first>Matteo</first><last>Negri</last><affiliation>FBK</affiliation></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last><affiliation>FBK</affiliation></author>
       <author><first>Jan</first><last>Niehues</last><affiliation>KIT</affiliation></author>
       <author><first>Xing</first><last>Niu</last><affiliation>Amazon</affiliation></author>
-      <author><first>Atul Kr.</first><last>Ojha</last><affiliation>U. Galway</affiliation></author>
+      <author id="atul-kr-ojha"><first>Atul Kr.</first><last>Ojha</last><affiliation>U. Galway</affiliation></author>
       <author><first>John</first><last>Ortega</last><affiliation>Northeastern</affiliation></author>
       <author><first>Sara</first><last>Papi</last><affiliation>FBK</affiliation></author>
       <author><first>Peter</first><last>Polák</last><affiliation>Charles U.</affiliation></author>
@@ -55,10 +55,10 @@
       <author><first>Jiatong</first><last>Shi</last><affiliation>CMU</affiliation></author>
       <author><first>Claytone</first><last>Sikasote</last><affiliation>U. Zambia</affiliation></author>
       <author><first>Matthias</first><last>Sperber</last><affiliation>Apple</affiliation></author>
-      <author><first>Sebastian</first><last>Stüker</last><affiliation>Zoom</affiliation></author>
+      <author id="sebastian-stuker"><first>Sebastian</first><last>Stüker</last><affiliation>Zoom</affiliation></author>
       <author><first>Katsuhito</first><last>Sudoh</last><affiliation>NAIST</affiliation></author>
       <author><first>Brian</first><last>Thompson</last><affiliation>Amazon</affiliation></author>
-      <author><first>Alex</first><last>Waibel</last><affiliation>CMU</affiliation></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last><affiliation>CMU</affiliation></author>
       <author><first>Shinji</first><last>Watanabe</last><affiliation>CMU</affiliation></author>
       <author><first>Patrick</first><last>Wilken</last><affiliation>AppTek</affiliation></author>
       <author><first>Petr</first><last>Zemánek</last><affiliation>Charles U.</affiliation></author>
@@ -182,7 +182,7 @@
       <title><fixed-case>FBK</fixed-case>@<fixed-case>IWSLT</fixed-case> Test Suites Task: Gender Bias evaluation with <fixed-case>M</fixed-case>u<fixed-case>ST</fixed-case>-<fixed-case>SHE</fixed-case></title>
       <author><first>Beatrice</first><last>Savoldi</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
       <author><first>Marco</first><last>Gaido</last><affiliation>Fondazione Bruno Kessler, University of Trento</affiliation></author>
-      <author><first>Matteo</first><last>Negri</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
       <author><first>Luisa</first><last>Bentivogli</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
       <pages>65-71</pages>
       <abstract>This paper presents the FBK contribution to the IWSLT-2024 ‘Test suites’ shared subtask, part of the Offline Speech Translation Task. Our contribution consists of the MuST-SHE-IWSLT24 benchmark evaluation, designed to assess gender bias in speech translation. By focusing on the en-de language pair, we rely on a newly created test suite to investigate systems’ ability to correctly translate feminine and masculine gender. Our results indicate that – under realistic conditions – current ST systems achieve reasonable and comparable performance in correctly translating both feminine and masculine forms when contextual gender information is available. For ambiguous references to the speaker, however, we attest a consistent preference towards masculine gender, thus calling for future endeavours on the topic. Towards this goal we make MuST-SHE-IWSLT24 freely available at: https://mt.fbk.eu/must-she/</abstract>
@@ -194,7 +194,7 @@
       <title><fixed-case>S</fixed-case>imul<fixed-case>S</fixed-case>eamless: <fixed-case>FBK</fixed-case> at <fixed-case>IWSLT</fixed-case> 2024 Simultaneous Speech Translation</title>
       <author><first>Sara</first><last>Papi</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
       <author><first>Marco</first><last>Gaido</last><affiliation>Fondazione Bruno Kessler, University of Trento</affiliation></author>
-      <author><first>Matteo</first><last>Negri</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
       <author><first>Luisa</first><last>Bentivogli</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
       <pages>72-79</pages>
       <abstract>This paper describes the FBK’s participation in the Simultaneous Translation Evaluation Campaign at IWSLT 2024. For this year’s submission in the speech-to-text translation (ST) sub-track, we propose SimulSeamless, which is realized by combining AlignAtt and SeamlessM4T in its medium configuration. The SeamlessM4T model is used ‘off-the-shelf’ and its simultaneous inference is enabled through the adoption of AlignAtt, a SimulST policy based on cross-attention that can be applied without any retraining or adaptation of the underlying model for the simultaneous task. We participated in all the Shared Task languages (English-&gt;German, Japanese, Chinese, and Czech-&gt;English), achieving acceptable or even better results compared to last year’s submissions. SimulSeamless, covering more than 143 source languages and 200 target languages, is released at: https://github.com/hlt-mt/FBK-fairseq/.</abstract>
@@ -223,7 +223,7 @@
       <author><first>Mauro</first><last>Cettolo</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
       <author><first>Roldano</first><last>Cattoni</last><affiliation>FBK</affiliation></author>
       <author><first>Andrea</first><last>Piergentili</last><affiliation>Fondazione Bruno Kessler, University of Trento</affiliation></author>
-      <author><first>Matteo</first><last>Negri</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
       <author><first>Luisa</first><last>Bentivogli</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
       <pages>86-96</pages>
       <abstract>The paper describes the FBK submissions to the Subtitling track of the 2024 IWSLT Evaluation Campaign, which covers both the Automatic Subtitling and the Subtitle Compression task for two language pairs: English to German (en-de) and English to Spanish (en-es). For the Automatic Subtitling task, we submitted two systems: i) a direct model, trained in constrained conditions, that produces the SRT files from the audio without intermediate outputs (e.g., transcripts), and ii) a cascade solution that integrates only free-to-use components, either taken off-the-shelf or developed in-house. Results show that, on both language pairs, our direct model outperforms both cascade and direct systems trained in constrained conditions in last year’s edition of the campaign, while our cascade solution is competitive with the best 2023 runs. For the Subtitle Compression task, our primary submission involved prompting a Large Language Model (LLM) in zero-shot mode to shorten subtitles that exceed the reading speed limit of 21 characters per second. Our results highlight the challenges inherent in shrinking out-of-context sentence fragments that are automatically generated and potentially error-prone, underscoring the need for future studies to develop targeted solutions.</abstract>
@@ -312,7 +312,7 @@
       <author><first>Haoran</first><last>Xu</last><affiliation>Johns Hopkins University</affiliation></author>
       <author><first>Henry</first><last>Li Xinyuan</last><affiliation>Johns Hopkins University</affiliation></author>
       <author><first>Ankur</first><last>Kejriwal</last><affiliation>Johns Hopkins University</affiliation></author>
-      <author><first>Sanjeev</first><last>Khudanpur</last><affiliation>Johns Hopkins University</affiliation></author>
+      <author id="sanjeev-khudanpur"><first>Sanjeev</first><last>Khudanpur</last><affiliation>Johns Hopkins University</affiliation></author>
       <author><first>Kenton</first><last>Murray</last><affiliation>Johns Hopkins University</affiliation></author>
       <author><first>Paul</first><last>McNamee</last><affiliation>Johns Hopkins University</affiliation></author>
       <pages>140-153</pages>
@@ -399,10 +399,10 @@
       <title>Blending <fixed-case>LLM</fixed-case>s into Cascaded Speech Translation: <fixed-case>KIT</fixed-case>’s Offline Speech Translation System for <fixed-case>IWSLT</fixed-case> 2024</title>
       <author><first>Sai</first><last>Koneru</last><affiliation>Karlsruhe Institute of Technology</affiliation></author>
       <author><first>Thai</first><last>Binh Nguyen</last><affiliation>Karlsruhe Institute of Technology</affiliation></author>
-      <author><first>Ngoc-Quan</first><last>Pham</last><affiliation>Karlsruhe Institute of Technology</affiliation></author>
+      <author id="ngoc-quan-pham"><first>Ngoc-Quan</first><last>Pham</last><affiliation>Karlsruhe Institute of Technology</affiliation></author>
       <author><first>Danni</first><last>Liu</last><affiliation>Karlsruhe Institute of Technology</affiliation></author>
       <author><first>Zhaolin</first><last>Li</last><affiliation>Karlsruhe Institute of Technology</affiliation></author>
-      <author><first>Alexander</first><last>Waibel</last><affiliation>Carnegie Mellon</affiliation></author>
+      <author id="alex-waibel"><first>Alexander</first><last>Waibel</last><affiliation>Carnegie Mellon</affiliation></author>
       <author><first>Jan</first><last>Niehues</last><affiliation>Karlsruhe Institut of Technology</affiliation></author>
       <pages>183-191</pages>
       <abstract>Large Language Models (LLMs) are currently under exploration for various tasks, including Automatic Speech Recognition (ASR), Machine Translation (MT), and even End-to-End Speech Translation (ST). In this paper, we present KIT’s offline submission in the constrained + LLM track by incorporating recently proposed techniques that can be added to any cascaded speech translation. Specifically, we integrate Mistral-7B into our system to enhance it in two ways. Firstly, we refine the ASR outputs by utilizing the N-best lists generated by our system and fine-tuning the LLM to predict the transcript accurately. Secondly, we refine the MT outputs at the document level by fine-tuning the LLM, leveraging both ASR and MT predictions to improve translation quality. We find that integrating the LLM into the ASR and MT systems results in an absolute improvement of 0.3% in Word Error Rate and 0.65% in COMET for tst2019 test set. In challenging test sets with overlapping speakers and background noise, we find that integrating LLM is not beneficial due to poor ASR performance. Here, we use ASR with chunked long-form decoding to improve context usage that may be unavailable when transcribing with Voice Activity Detection segmentation alone.</abstract>
@@ -417,7 +417,7 @@
       <author><first>André</first><last>Beyer</last><affiliation>Crowdee</affiliation></author>
       <author><first>Abdel</first><last>Messaoudi</last><affiliation>Vocapia</affiliation></author>
       <author><first>Rabea</first><last>Affan</last><affiliation>Mr.</affiliation></author>
-      <author><first>Claude</first><last>Barras</last><affiliation>Vocapia Research</affiliation></author>
+      <author id="claude-barras"><first>Claude</first><last>Barras</last><affiliation>Vocapia Research</affiliation></author>
       <author><first>Maxim</first><last>Tychonov</last><affiliation>Lingea ltd.</affiliation></author>
       <author><first>Jean-Luc</first><last>Gauvain</last><affiliation>CNRS/LIMSI</affiliation></author>
       <pages>192-202</pages>
@@ -445,7 +445,7 @@
       <author><first>Carlos</first><last>Mullov</last><affiliation>Karlsruhe Institute of Technology</affiliation></author>
       <author><first>Tu</first><last>Anh Dinh</last><affiliation>Karlsruhe Institute of Technology</affiliation></author>
       <author><first>Sai</first><last>Koneru</last><affiliation>Karlsruhe Institute of Technology</affiliation></author>
-      <author><first>Alexander</first><last>Waibel</last><affiliation>Carnegie Mellon</affiliation></author>
+      <author id="alex-waibel"><first>Alexander</first><last>Waibel</last><affiliation>Carnegie Mellon</affiliation></author>
       <author><first>Jan</first><last>Niehues</last><affiliation>Karlsruhe Institut of Technology</affiliation></author>
       <pages>221-228</pages>
       <abstract>This paper presents KIT’s submissions to the IWSLT 2024 dialectal and low-resource track. In this work, we build systems for translating into English from speech in Maltese, Bemba, and two Arabic dialects Tunisian and North Levantine. Under the unconstrained condition, we leverage the pre-trained multilingual models by fine-tuning them for the target language pairs to address data scarcity problems in this track. We build cascaded and end-to-end speech translation systems for different language pairs and show the cascaded system brings slightly better overall performance. Besides, we find utilizing additional data resources boosts speech recognition performance but slightly harms machine translation performance in cascaded systems. Lastly, we show that Minimum Bayes Risk is effective in improving speech translation performance by combining the cascaded and end-to-end systems, bringing a consistent improvement of around 1 BLUE point.</abstract>
@@ -550,8 +550,8 @@
       <author><first>Maximilian</first><last>Awiszus</last><affiliation>Zoom Video Communications</affiliation></author>
       <author><first>Jan</first><last>Niehues</last><affiliation>Karlsruhe Institute of Technology</affiliation></author>
       <author><first>Marco</first><last>Turchi</last><affiliation>Zoom Video Communications</affiliation></author>
-      <author><first>Sebastian</first><last>Stüker</last><affiliation>Zoom Video Communications</affiliation></author>
-      <author><first>Alex</first><last>Waibel</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="sebastian-stuker"><first>Sebastian</first><last>Stüker</last><affiliation>Zoom Video Communications</affiliation></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last><affiliation>Carnegie Mellon University</affiliation></author>
       <pages>291-297</pages>
       <abstract>Generating rare words is a challenging task for natural language processing in general and in speech translation (ST) specifically. This paper introduces a test suite prepared for the Offline ST shared task at IWSLT. In the test suite, corresponding rare words (i.e. named entities) were annotated on TED-Talks for English and German and the English side was made available to the participants together with some distractors (irrelevant named entities). Our evaluation checks the capabilities of ST systems to leverage the information in the contextual list of named entities and improve translation quality. Systems are ranked based on the recall and precision of named entities (separately on person, location, and organization names) in the translated texts. Our evaluation shows that using contextual information improves translation quality as well as the recall and precision of NEs. The recall of organization names in all submissions is the lowest of all categories with a maximum of 87.5 % confirming the difficulties of ST systems in dealing with names.</abstract>
       <url hash="2492d16b">2024.iwslt-1.35</url>
diff --git a/data/xml/2024.jeptalnrecital.xml b/data/xml/2024.jeptalnrecital.xml
index d7742b51ec..da1f413bf3 100644
--- a/data/xml/2024.jeptalnrecital.xml
+++ b/data/xml/2024.jeptalnrecital.xml
@@ -107,9 +107,9 @@
     <paper id="8">
       <title>Étude en temps réel de la fusion des /a/ ~ /ɑ/ en français depuis 1925</title>
       <author><first>Juliusz</first><last>Cęcelewski</last></author>
-      <author><first>Cédric</first><last>Gendrot</last></author>
-      <author><first>Martine</first><last>Adda-Decker</last></author>
-      <author><first>Philippe</first><last>Boula de Mareüil</last></author>
+      <author id="cedric-gendrot"><first>Cédric</first><last>Gendrot</last></author>
+      <author id="martine-adda-decker"><first>Martine</first><last>Adda-Decker</last></author>
+      <author id="philippe-boula-de-mareuil"><first>Philippe</first><last>Boula de Mareüil</last></author>
       <pages>71–81</pages>
       <abstract>Cette étude explore la variation diachronique de la réalisation des voyelles /a/ ~ /ɑ/ du français en position finale de mot dans la parole déclamatoire/journalistique de 1925 à 2023. Nos données comprennent deux corpus préexistants – le corpus d’archives INA (1940–1997) et le corpus ESTER (2000–2004) – ainsi que deux nouveaux corpus composés d’enregistrements issus des Archives de la Parole d’Hubert Pernot (1925–1929), de Radio France et de YouTube (2020–2023).Nos résultats indiquent une postériorisation du /a/ vers une position plus centrale et, dans une moindre mesure, une antériorisations du /ɑ/, qui ont abouti à la neutralisation et la fusion acoustique des deux phonèmes au cours du XXe siècle. Les résultats sont discutés à la lumière de l’évolution globale du système des voyelles à double timbre en français.</abstract>
       <url hash="226656fa">2024.jeptalnrecital-jep.8</url>
@@ -132,8 +132,8 @@
     <paper id="10">
       <title>Identification du locuteur : ouvrir la boîte noire</title>
       <author><first>Carole</first><last>Millot</last></author>
-      <author><first>Cédric</first><last>Gendrot</last></author>
-      <author><first>Jean-François</first><last>Bonastre</last></author>
+      <author id="cedric-gendrot"><first>Cédric</first><last>Gendrot</last></author>
+      <author id="jean-francois-bonastre"><first>Jean-François</first><last>Bonastre</last></author>
       <pages>92–101</pages>
       <abstract>L’explicabilité des systèmes relevant du deep learning est devenue un enjeu central ces dernières années, dans le droit européen comme le domaine criminalistique. L’approche BA-LR introduit en identification du locuteur un nouveau paradigme de modélisation : elle fait émerger automatiquement les attributs partagés par un groupe de locuteurs et qui sous-entendent la discrimination de ceux-ci. Le score produit est décomposable au niveau des attributs, ce qui augmente significativement l’explicabilité de la méthode. Cette étude propose de compléter la caractérisation des attributs obtenus par le BA-LR, à l’aide de paramètres de qualité de voix. L’analyse suggère que plusieurs attributs utilisent les types de phonation pour regrouper les locuteurs, ceux-ci encodant des informations humainement perceptibles. Cet article pose ainsi des bases pour l’analyse acoustique des attributs, qui permettra à terme d’utiliser le BA-LR dans le cadre du profilage vocal.</abstract>
       <url hash="ac861c5f">2024.jeptalnrecital-jep.10</url>
@@ -158,7 +158,7 @@
       <author><first>Maxime</first><last>Fily</last></author>
       <author><first>Guillaume</first><last>Wisniewski</last></author>
       <author><first>Séverine</first><last>Guillaume</last></author>
-      <author><first>Gilles</first><last>Adda</last></author>
+      <author id="gilles-adda"><first>Gilles</first><last>Adda</last></author>
       <author><first>Alexis</first><last>Michaud</last></author>
       <pages>112–121</pages>
       <abstract>Nous explorons les représentations vectorielles de la parole à partir d’un modèle pré-entraîné pour déterminer leur niveau d’abstraction par rapport au signal audio. Nous proposons une nouvelle méthode non-supervisée exploitant des données audio ayant des métadonnées soigneusement organisées pour apporter un éclairage sur les informations présentes dans les représentations. Des tests ABX déterminent si les représentations obtenues via un modèle de parole multilingue encodent une caractéristique donnée. Trois expériences sont présentées, portant sur la qualité acoustique de la pièce, le type de discours, ou le contenu phonétique. Les résultats confirment que les différences au niveau de caractéristiques linguistiques/extra-linguistiques d’enregistrements audio sont reflétées dans les représentations de ceux-ci. Plus la quantité d’audio par vecteur est importante, mieux elle permet de distinguer les caractéristiques extra-linguistiques. Plus elle est faible, et mieux nous pouvons distinguer les informations d’ordre phonétique/segmental. La méthode proposée ouvre de nouvelles pistes pour la recherche et les travaux comparatifs sur les langues peu dotées.</abstract>
@@ -207,7 +207,7 @@
       <title>Réductions temporelles en français parlé : Où peut-on trouver les zones de réduction ?</title>
       <author><first>Yaru</first><last>Wu</last></author>
       <author><first>Kim</first><last>Gerdes</last></author>
-      <author><first>Martine</first><last>Adda-Decker</last></author>
+      <author id="martine-adda-decker"><first>Martine</first><last>Adda-Decker</last></author>
       <pages>153–162</pages>
       <abstract>Cet article examine la réduction dans la parole continue en français, ainsi que les différents facteurs qui contribuent au phénomène, tels que le style de parole, le débit de parole, la catégorie de mots, la position du phone dans le mot et la position du mot dans les groupes syntaxiques. L’étude utilise trois corpus de parole continue en français, couvrant la parole formelle, la parole moins formelle et la parole familière. La méthode utilisée comprend l’alignement forcé et l’étiquetage automatique des zones de réduction. Les résultats suggèrent que la réduction de la parole est présente dans tous les styles de parole, mais moins fréquente dans la parole formelle, et que la réduction est plus susceptible d’être observée dans les énoncés de parole avec un taux de parole élevé. La position médiane des mots ou des groupes syntaxiques tend à favoriser la réduction.</abstract>
       <url hash="6fa403c5">2024.jeptalnrecital-jep.16</url>
@@ -282,7 +282,7 @@
     <paper id="23">
       <title>Utilisation de wav2vec 2.0 pour des tâches de classifications phonétiques : aspects méthodologiques</title>
       <author><first>Lila</first><last>Kim</last></author>
-      <author><first>Cedric</first><last>Gendrot</last></author>
+      <author id="cedric-gendrot"><first>Cedric</first><last>Gendrot</last></author>
       <pages>219–229</pages>
       <abstract>L’apprentissage auto-supervisé, particulièrement dans le contexte de la parole, a démontré son efficacité dans diverses tâches telles que la reconnaissance du locuteur et la reconnaissance de la parole. Notre question de recherche se concentre sur l’efficacité des représentations vectorielles - extraites de phonèmes - plus courtes par rapport à des séquences plus longues dans la détection de la nasalité. Deux approches distinctes ont été étudiées : extraire des vecteurs sur la durée du phonème et prendre des séquences plus longues avec une seconde ajoutée de chaque côté du phonème, puis récupérer la partie centrale a posteriori. Les résultats révèlent que les modèles réagissent différemment selon les phones et les locuteurs, avec une variabilité observée à ces niveaux. Le modèle à séquences longues surpasse le modèle à séquences courtes en assurant une corrélation plus robuste avec le débit d’air nasal.</abstract>
       <url hash="8bab4dac">2024.jeptalnrecital-jep.23</url>
@@ -312,7 +312,7 @@
     <paper id="26">
       <title>Analyse Factorielle de signaux sonores : développement d’une méthode automatique de détermination des frontières optimales entre canaux de fréquence</title>
       <author><first>Agnieszka</first><last>Duniec</last></author>
-      <author><first>Elisabeth</first><last>Delais-Roussarie</last></author>
+      <author id="elisabeth-delais-roussarie"><first>Elisabeth</first><last>Delais-Roussarie</last></author>
       <author><first>Olivier</first><last>Crouzet</last></author>
       <pages>252–260</pages>
       <abstract>Des études récentes supportent l’hypothèse d’une relation entre les propriétés statistiques des signaux de parole et les mécanismes perceptifs : les gammes de fréquence présentant une corrélation dans leurs modulations d’amplitude pourraient être associées à des frontières spectrales relativement stables envisagées comme optimales sur le plan perceptif. Cependant, des limites afférentes à ces études antérieures ressortent : (1) elles se fondent pour la plupart sur des critères subjectifs à travers l’observation visuelle des courbes de résultats statistiques, et (2) elles n’envisagent pas que les résultats puissent varier en fonction des échantillons de données sélectionnés, de la nature des signaux utilisés, ou de la taille des échantillons. Même si cette position peut être argumentée en lien avec l’approche du codage efficace, cet aspect afférent au degré de variation potentiel nécessite d’être évalué. Nous avons mis en place une méthode de détermination automatique des frontières qui permet de répliquer les travaux antérieurs en introduisant une évaluation expérimentale de ces limites et discutons de quelques résultats préliminaires en comparaison avec les études précédentes.</abstract>
@@ -347,7 +347,7 @@
     <paper id="29">
       <title>Comparaison de mesures pour la détection automatique de déviance dans la dysarthrie ataxique</title>
       <author><first>Natacha</first><last>Miniconi</last></author>
-      <author><first>Cédric</first><last>Gendrot</last></author>
+      <author id="cedric-gendrot"><first>Cédric</first><last>Gendrot</last></author>
       <author><first>Angélina</first><last>Bourbon</last></author>
       <author><first>Leonardo</first><last>Lancia</last></author>
       <author><first>Cécile</first><last>Fougeron</last></author>
@@ -362,7 +362,7 @@
       <author><first>Jingyi</first><last>Sun</last></author>
       <author><first>Yaru</first><last>Wu</last></author>
       <author><first>Nicolas</first><last>Audibert</last></author>
-      <author><first>Martine</first><last>Adda-Decker</last></author>
+      <author id="martine-adda-decker"><first>Martine</first><last>Adda-Decker</last></author>
       <pages>291–300</pages>
       <abstract>La technologie ASR excelle dans la transcription précise des discours lus préparés, mais elle rencontre encore des défis lorsqu’il s’agit de conversations spontanées. Cela est en partie dû au fait que ces dernières relèvent d’un registre de langage informel, avec disfluences et réductions de parole. Afin de mieux comprendre les différences de production en fonction des styles de parole, nous présentons la création d’un corpus de parole conversationnelle, dont des extraits sont ensuite lus par leurs auteurs. Le corpus comprend 36 heures de parole en chinois mandarin avec leur transcription, réparties entre conversations spontanées et lecture. Nous avons utilisé WHISPER pour la transcription automatique de la parole et le Montreal Forced Aligner pour l’alignement forcé, résultant dans un corpus de parole transcrit avec annotations multi-niveaux incluant phonèmes, caractères/syllabes et mots. De telles productions de parole parallèles (en modes spontané et lu) seront particulièrement intéressantes pour l’étude des réductions temporelles.</abstract>
       <url hash="5e32bfc5">2024.jeptalnrecital-jep.30</url>
@@ -411,7 +411,7 @@
     <paper id="34">
       <title>Effets du shadowing et de l’imitation en tant que méthodes d’entraînement à la prononciation du /ɥi/ en français</title>
       <author><first>Wenxun</first><last>Fu</last></author>
-      <author><first>Martine</first><last>Adda-Decker</last></author>
+      <author id="martine-adda-decker"><first>Martine</first><last>Adda-Decker</last></author>
       <author><first>Barbara</first><last>Kühnert</last></author>
       <pages>332–341</pages>
       <abstract>Trente étudiantes mandarines apprenant le français ont participé à un entraînement autonome de quatre semaines, utilisant l’imitation tardive et le shadowing (répétition immédiate). Cette étude se concentre sur le résultat de la réalisation du /i/ dans /ɥi/, souvent réalisé proche du /y/. Les posttests montrent des améliorations dans la perception et la production de /ɥi/. Pour les apprenants de niveaux intermédiaires ayant pratiqué le shadowing, la distinction entre le troisième formant (F3) et le deuxième formant (F2) du /i/ dans /ɥi/ est significativement plus élevé après l’entraînement, indiquant une meilleure distinction avec /y/. Le shadowing semble efficace dans l’amélioration de la perception chez les débutants et apprenants intermédiaires, mais uniquement dans la production pour les niveaux intermédiaires. Nous suggérons que le shadowing, en tant que méthode hautement cognitive et active, puisse servir d’alternative à la méthode d’imitation, sous réserve que la compétence linguistique des apprenants leur permette d’accomplir la tâche avec succès.</abstract>
@@ -555,7 +555,7 @@
       <title>La sonorité n’est pas l’intensité: le cas des diphtongues dans une langue tonale</title>
       <author><first>Yunzhuo</first><last>Xiang</last></author>
       <author><first>Jiayin</first><last>Gao</last></author>
-      <author><first>Cédric</first><last>Gendrot</last></author>
+      <author id="cedric-gendrot"><first>Cédric</first><last>Gendrot</last></author>
       <pages>441–450</pages>
       <abstract>Cette étude explore le lien entre la sonorité et l’intensité dans la production des diphtongues ouvrantes et fermantes en mandarin de Pékin. Étant donné qu’une voyelle ouverte est considérée comme plus sonore qu’une voyelle fermée, nous nous attendons à constater une augmentation d’intensité dans une diphtongue ouvrante et une diminution d’intensité dans une diphtongue fermante. Or, nos résultats, basés sur les modèles GAMM (modèles additifs généralisés à l’effet mixte) révèlent un pattern différent de nos attentes : la dynamique d’intensité au sein de la diphtongue n’est pas liée à l’aperture vocalique. En revanche, conformément aux études précédentes, nous trouvons une corrélation positive entre la F0 et l’intensité. Nous interrogeons ainsi sur la validité de définir la sonorité à base de l’intensité seule. Enfin, nous discutons du rôle de la F0 dans la définition de la sonorité et l’apport de notre étude pour modéliser la sonorité dans une langue tonale.</abstract>
       <url hash="27cedd42">2024.jeptalnrecital-jep.45</url>
@@ -592,9 +592,9 @@
       <title>Nouvelle tâche sémantique pour le corpus de compréhension de parole en français <fixed-case>MEDIA</fixed-case></title>
       <author><first>Nadège</first><last>Alavoine</last></author>
       <author><first>Gaëlle</first><last>Laperrière</last></author>
-      <author><first>Christophe</first><last>Servan</last></author>
+      <author id="christophe-servan"><first>Christophe</first><last>Servan</last></author>
       <author><first>Sahar</first><last>Ghannay</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <pages>470–480</pages>
       <abstract>La détection d’intention et de concepts sont des tâches essentielles de la compréhension de la parole(SLU). Or il n’existe que peu de données annotées en français permettant d’effectuer ces deux tâches conjointement. Cependant, il existe des ensembles de données annotées en concept, dont le corpus MEDIA. Ce corpus est considéré comme l’un des plus difficiles. Néanmoins, il ne comporte que des annotations en concepts et pas en intentions. Dans cet article, nous proposons une version étendue de MEDIA annotée en intentions pour étendre son utilisation. Cet article présente une méthode semi-automatique pour obtenir cette version étendue. De plus, nous présentons les premiers résultats des expériences menées sur cet ensemble de données en utilisant des modèles joints pour la classification des intentions et la détection de concepts.</abstract>
       <url hash="e449e20b">2024.jeptalnrecital-jep.48</url>
@@ -633,10 +633,10 @@
       <author><first>Isabelle</first><last>Ferrané</last></author>
       <author><first>Hervé</first><last>Bredin</last></author>
       <author><first>Thomas</first><last>Pellegrini</last></author>
-      <author><first>Farah</first><last>Benamara</last></author>
+      <author id="farah-benamara"><first>Farah</first><last>Benamara</last></author>
       <author><first>Jérôme</first><last>Bertrand</last></author>
       <author><first>Marie-Françoise</first><last>Bertrand</last></author>
-      <author><first>Véronique</first><last>Moriceau</last></author>
+      <author id="veronique-moriceau"><first>Véronique</first><last>Moriceau</last></author>
       <author><first>Jérôme</first><last>Farinas</last></author>
       <pages>502–511</pages>
       <abstract>Dans cet article, nous présentons notre contribution à la tâche de classification des émotions dans la parole dans le cadre de notre participation à la campagne d’évaluation Odyssey 2024. Nous proposons un système hybride qui tire parti à la fois des informations du signal audio et des informations sémantiques issues des transcriptions automatiques. Les résultats montrent que l’ajout de l’information sémantique permet de dépasser les systèmes uniquement audio.</abstract>
@@ -647,7 +647,7 @@
     <paper id="52">
       <title>Preuve de concept d’un système de génération automatique en Langue française Parlée Complétée</title>
       <author><first>Brigitte</first><last>Bigi</last></author>
-      <author><first>Nuria</first><last>Gala</last></author>
+      <author id="nuria-gala"><first>Nuria</first><last>Gala</last></author>
       <pages>512–520</pages>
       <abstract>La Langue française Parlée Complétée (LfPC) est un système de communication développé pour les personnes sourdes afin de compléter la lecture labiale avec une main, au niveau phonétique. Il est utilisé par les enfants pour acquérir des compétences en lecture, en lecture labiale et en communication orale. L’objectif principal est de permettre aux enfants sourds de devenir des lecteurs et des locuteurs compétents en langue française. Nous proposons une preuve de concept (PoC) d’un système de réalité augmentée qui place automatiquement la représentation d’une main codeuse sur la vidéo pré-enregistrée d’un locuteur. Le PoC prédit la forme et la position de la main, le moment durant lequel elle doit être affichée, et ses coordonnées relativement au visage dans la vidéo. Des photos de mains sont ensuite juxtaposées à la vidéo. Des vidéos annotées automatiquement par le PoC ont été montrées à des personnes sourdes qui l’ont accueilli et évalué favorablement.</abstract>
       <url hash="db2387f5">2024.jeptalnrecital-jep.52</url>
@@ -806,7 +806,7 @@
       <title>Au-delà de la performance des modèles : la prédiction de liens peut-elle enrichir des graphes lexico-sémantiques du français ?</title>
       <author><first>Hee-Soo</first><last>Choi</last></author>
       <author><first>Priyansh</first><last>Trivedi</last></author>
-      <author><first>Mathieu</first><last>Constant</last></author>
+      <author id="matthieu-constant"><first>Mathieu</first><last>Constant</last></author>
       <author><first>Karën</first><last>Fort</last></author>
       <author><first>Bruno</first><last>Guillaume</last></author>
       <pages>36–49</pages>
@@ -820,7 +820,7 @@
       <author><first>Thomas</first><last>Gerald</last></author>
       <author><first>Louis</first><last>Tamames</last></author>
       <author><first>Sofiane</first><last>Ettayeb</last></author>
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <author><first>Anne</first><last>Vilnat</last></author>
       <pages>50–63</pages>
       <abstract>Dans cet article nous présentons un nouveau corpus de question-réponse en français pour le domaine de l’éducation. Ce corpus à été construit dans le but de créer un système d’assistant virtuel pour répondre à des questions sur des documents ou du matériel de cours. Afin d’être utile autant aux enseignants qu’au étudiants, il est important de considérer des questions complexes ainsi que d’être capable de justifier les réponses sur du matériel validé. Nous présentons donc le nouveau Corpus CQuAE, un corpus de question-réponse manuellement annoté dont nous discutons des propriétés. Nous présenterons aussi les différentes étapes de sa création avec aujourd’hui une phase d’amélioration des données.Enfin, nous présentons plusieurs expériences pour évaluer l’exploitation du corpus dans le cadre d’un système de question-réponse automatique.Ces différentes analyses et expériences nous permettrons de valider l’adéquation des données collectés pour l’objectif visé.</abstract>
@@ -831,7 +831,7 @@
     <paper id="5">
       <title>Évaluation automatique des biais de genre dans des modèles de langue auto-régressifs</title>
       <author><first>Fanny</first><last>Ducel</last></author>
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <author><first>Karën</first><last>Fort</last></author>
       <pages>64–84</pages>
       <abstract>Nous proposons un outil pour mesurer automatiquement les biais de genre dans des textes générés par des grands modèles de langue dans des langues flexionnelles. Nous évaluons sept modèles à l’aide de 52 000 textes en français et 2 500 textes en italien, pour la rédaction de lettres de motivation. Notre outil s’appuie sur la détection de marqueurs morpho-syntaxiques de genre pour mettre au jour des biais. Ainsi, les modèles favorisent largement la génération de masculin : le genre masculin est deux fois plus présent que le féminin en français, et huit fois plus en italien. Les modèles étudiés exacerbent également des stéréotypes attestés en sociologie en associant les professions stéréotypiquement féminines aux textes au féminin, et les professions stéréotypiquement masculines aux textes au masculin.</abstract>
@@ -867,8 +867,8 @@
       <author><first>Pierre</first><last>Lepagnol</last></author>
       <author><first>Thomas</first><last>Gerald</last></author>
       <author><first>Sahar</first><last>Ghannay</last></author>
-      <author><first>Christophe</first><last>Servan</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="christophe-servan"><first>Christophe</first><last>Servan</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <pages>113–129</pages>
       <abstract>Ce travail s’inscrit dans le débat sur l’efficacité des grands modèles de langue par rapport aux petits pour la classification de texte par amorçage (prompting). Nous évaluons ici le potentiel des petits modèles de langue dans la classification de texte sans exemples, remettant en question la prédominance des grands modèles. À travers un ensemble diversifié de jeux de données, notre étude compare les petits et les grands modèles utilisant différentes architectures et données de pré-entraînement. Nos conclusions révèlent que les petits modèles peuvent générer efficacement des étiquettes et, dans certains contextes, rivaliser ou surpasser les performances de leurs homologues plus grands. Ce travail souligne l’idée que le modèle le plus grand n’est pas toujours le meilleur, suggérant que les petits modèles économes en ressources peuvent offrir des solutions viables pour des défis spécifiques de classification de données</abstract>
       <url hash="fad44193">2024.jeptalnrecital-taln.8</url>
@@ -901,7 +901,7 @@
     <paper id="11">
       <title>Recherche de relation à partir d’un seul exemple fondée sur un modèle N-way K-shot : une histoire de distracteurs</title>
       <author><first>Hugo</first><last>Thomas</last></author>
-      <author><first>Guillaume</first><last>Gravier</last></author>
+      <author id="guillaume-gravier"><first>Guillaume</first><last>Gravier</last></author>
       <author><first>Pascale</first><last>Sébillot</last></author>
       <pages>157–168</pages>
       <abstract>La recherche de relation à partir d’un exemple consiste à trouver dans un corpus toutes les occurrences d’un type de relation liant deux entités dans une phrase, nommé type cible et caractérisé à l’aide d’un seul exemple. Nous empruntons le scénario d’entraînement et évaluation N-way K-shot à la tâche de classification de relations rares qui prédit le type de relation liant deux entités à partir de peu d’exemples d’entraînement, et l’adaptons à la recherche de relation avec un exemple. Lors de l’évaluation, un modèle entraîné pour la classification de relations en N-way K-shot est utilisé, dans lequel K vaut un pour le type cible, une des N classes (du N-way) représente le type cible, et les N-1 classes restantes sont des distracteurs modélisant la classe de rejet. Les résultats sur FewRel et TACREV démontrent l’efficacité de notre approche malgré la difficulté de la tâche. L’étude de l’évolution des performances en fonction du nombre de distracteurs et des stratégies de leur choix met en avant une bonne configuration globale, à savoir un nombre élevé de distracteurs à une distance intermédiaire du type de relation cible dans l’espace latent appris par le modèle. Le diagnostic a posteriori de notre méthode révèle l’existence de configurations optimales pour chaque type cible que nos analyses actuelles échouent à caractériser, ouvrant la voie à de futurs travaux.</abstract>
@@ -912,7 +912,7 @@
     <paper id="12">
       <title>Reconnaissance d’entités cliniques en few-shot en trois langues</title>
       <author><first>Marco</first><last>Naguib</last></author>
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <author><first>Xavier</first><last>Tannier</last></author>
       <pages>169–197</pages>
       <abstract>Les grands modèles de langage deviennent la solution de choix pour de nombreuses tâches de traitement du langage naturel, y compris dans des domaines spécialisés où leurs capacités few-shot devraient permettre d’obtenir des performances élevées dans des environnements à faibles ressources. Cependant, notre évaluation de 10 modèles auto-régressifs et 16 modèles masqués montre que, bien que les modèles auto-régressifs utilisant des prompts puissent rivaliser en termes de reconnaissance d’entités nommées (REN) en dehors du domaine clinique, ils sont dépassés dans le domaine clinique par des taggers biLSTM-CRF plus légers reposant sur des modèles masqués. De plus, les modèles masqués ont un bien moindre impact environnemental que les modèles auto-régressifs. Ces résultats, cohérents dans les trois langues étudiées, suggèrent que les modèles à apprentissage few-shot ne sont pas encore adaptés à la production de REN dans le domaine clinique, mais pourraient être utilisés pour accélérer la création de données annotées de qualité.</abstract>
@@ -924,7 +924,7 @@
       <title>Réduction des répétitions dans la Traduction Automatique Neuronale</title>
       <author><first>Marko</first><last>Avila</last></author>
       <author><first>Anna</first><last>Rebollo</last></author>
-      <author><first>Josep</first><last>Crego</last></author>
+      <author id="josep-m-crego"><first>Josep</first><last>Crego</last></author>
       <pages>198–210</pages>
       <abstract>Actuellement, de nombreux systèmes TAL utilisent des décodeurs neuronaux pour la génération de textes, qui font preuve d’une capacité impressionnante à générer des textes approchant les niveaux de fluidité humaine. Toutefois, dans le cas des réseaux de traduction automatique, ils sont souvent confrontés à la production de contenu répétitif, également connu sous le nom de diction répétitive ou de répétition de mots, un aspect pour lequel ils n’ont pas été explicitement entraînés. Bien que cela ne soit pas intrinsèquement négatif, cette répétition peut rendre l’écriture monotone ou maladroite si elle n’est pas utilisée intentionnellement pour l’emphase ou des fins stylistiques. La répétition de mots a été traitée par des méthodes post-hoc pendant l’inférence, contraignant le réseau à examiner des hypothèses auxquelles le système avait initialement attribué une plus faible probabilité. Dans cet article, nous implémentons une méthode qui consiste à pénaliser les répétitions lors de l’apprentissage et qui s’inspire des principes du label smoothing. Conformément à cette méthode, nous modifions la distribution de la vérité terrain afin d’orienter le modèle de manière à décourager ces répétitions. Les résultats de nos expériences montrent que les méthodes proposées permettent de contrôler le problème de la répétition dans les moteurs neuronaux de traduction automatique sans compromis en termes d’efficacité ou de qualité des traductions.</abstract>
       <url hash="a42074ea">2024.jeptalnrecital-taln.13</url>
@@ -957,7 +957,7 @@
       <author><first>Adrien</first><last>Pupier</last></author>
       <author><first>Maximin</first><last>Coavoux</last></author>
       <author><first>Benjamin</first><last>Lecouteux</last></author>
-      <author><first>Jérôme</first><last>Goulian</last></author>
+      <author id="jerome-goulian"><first>Jérôme</first><last>Goulian</last></author>
       <pages>234–244</pages>
       <abstract>Effectuer l’analyse syntaxique du signal audio –plutôt que de passer par des transcriptions de l’audio– est une tache récemment proposée par Pupier et al. (2022), dans le but d’incorporer de l’information prosodique dans le modèle d’analyse syntaxique et de passer outre les limitations d’une approche cascade qui consisterait à utiliser un système de reconnaissance de la parole (RAP) puis un analyseur syntaxique. Dans cet article, nous effectuons un ensemble d’expériences visant à comparer les performances de deux familles d’analyseurs syntaxiques (i) l’approche par graphe (ii) la réduction à une tâche d’étiquetage de séquence ; directement sur la parole. Nous évaluons notre approche sur un corpus arboré du Français parlé. Nous montrons que (i) l’approche par graphe obtient de meilleurs résultats globalement (ii) effectuer l’analyse syntaxique directement depuis la parole obtient de meilleurs résultats qu’une approche par cascade de systèmes, malgré 30 de paramètre en moins</abstract>
       <url hash="1a18f7f0">2024.jeptalnrecital-taln.16</url>
@@ -977,7 +977,7 @@
     <paper id="18">
       <title><fixed-case>W</fixed-case>iki<fixed-case>F</fixed-case>act<fixed-case>D</fixed-case>iff: Un Grand jeu de données Réaliste et Temporellement Adaptable pour la Mise à Jour Atomique des Connaissances Factuelles dans les Modèles de Langue Causaux</title>
       <author><first>Hichem</first><last>Ammar Khodja</last></author>
-      <author><first>Frédéric</first><last>Béchet</last></author>
+      <author id="frederic-bechet"><first>Frédéric</first><last>Béchet</last></author>
       <author><first>Quentin</first><last>Brabant</last></author>
       <author><first>Alexis</first><last>Nasr</last></author>
       <author><first>Gwénolé</first><last>Lecrové</last></author>
@@ -1037,8 +1037,8 @@
     <paper id="23">
       <title>Approche multitâche pour l’amélioration de la fiabilité des systèmes de résumé automatique de conversation</title>
       <author><first>Eunice</first><last>Akani</last></author>
-      <author><first>Benoit</first><last>Favre</last></author>
-      <author><first>Frederic</first><last>Bechet</last></author>
+      <author id="benoit-favre"><first>Benoit</first><last>Favre</last></author>
+      <author id="frederic-bechet"><first>Frederic</first><last>Bechet</last></author>
       <author><first>Romain</first><last>Gemignani</last></author>
       <pages>338–351</pages>
       <abstract>Le résumé de dialogue consiste à générer un résumé bref et cohérent d’une conversation ou d’un dialogue entre deux ou plusieurs locuteurs. Même si les modèles de langue les plus récents ont permis des progrès remarquables dans ce domaine, générer un résumé fidèle au dialogue de départ reste un défi car cela nécessite de prendre en compte l’interaction entre les locuteurs pour conserver les informations les plus pertinentes du dialogue. Nous nous plaçons dans le cadre des dialogues humain-humain avec but. Ce cadre nous permet d’intégrer des informations relatives à la tâche dans le cadre du résumé de dialogue afin d’aider le système à générer des résumés plus fidèles sémantiquement. Nous évaluons dans cette étude des approches multitâches permettant de lier la tâche de résumé à des tâches de compréhension du langage comme la détection de motifs d’appels. Les informations liées à la tâche nous permettent également de proposer des nouvelles méthodes de sélection de résumés basées sur l’analyse sémantique du dialogue ainsi que des métriques d’évaluation basées également sur cette même analyse. Nous avons testé ces méthodes sur DECODA, un corpus français de dialogue collecté dans le centre d’appel de la RATP entre des usagers et des téléconseillers. Nous montrons que l’ajout d’informations liées à la tâche augmente la fiabilité des résumés générés.</abstract>
@@ -1069,7 +1069,7 @@
     <paper id="26">
       <title>De nouvelles méthodes pour l’exploration de l’interface syntaxe-prosodie : un treebank intonosyntaxique et un système de synthèse pour le pidgin nigérian</title>
       <author><first>Emmett</first><last>Strickland</last></author>
-      <author><first>Anne</first><last>Lacheret-Dujour</last></author>
+      <author id="anne-lacheret"><first>Anne</first><last>Lacheret-Dujour</last></author>
       <author><first>Marc</first><last>Evrard</last></author>
       <author><first>Sylvain</first><last>Kahane</last></author>
       <author><first>Dana</first><last>Aubakirova</last></author>
@@ -1086,9 +1086,9 @@
     <paper id="27">
       <title>Étude des facteurs de complexité des modèles de langage dans une tâche de compréhension de lecture à l’aide d’une expérience contrôlée sémantiquement</title>
       <author><first>Elie</first><last>Antoine</last></author>
-      <author><first>Frederic</first><last>Bechet</last></author>
-      <author><first>Géraldine</first><last>Damnati</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="frederic-bechet"><first>Frederic</first><last>Bechet</last></author>
+      <author id="geraldine-damnati"><first>Géraldine</first><last>Damnati</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <pages>384–396</pages>
       <abstract>Cet article propose une méthodologie pour identifier les facteurs de complexité inhérents aux tâches de traitement automatique du langage (TAL), indépendamment de la dimension des modèles. Il montre que la performance inférieure de certains exemples est attribuable à des facteurs de complexités spécifiques. Plutôt que de procéder à des évaluations générales, nous préconisons des évaluations restreintes portant sur des tâches, des ensembles de données et des langues spécifiques, décrites de manière linguistique. Appliquée à une tâche de compréhension de texte via un corpus de questions-réponses, notre méthode met en évidence des facteurs de complexité sémantique affectant divers modèles de tailles et d’architectures différentes. En outre, nous proposons plusieurs corpus de complexité sémantique croissante dérivés de ces facteurs, avançant que l’optimisation de leur traitement dépasse la simple augmentation de la taille des modèles.</abstract>
       <url hash="3c60956b">2024.jeptalnrecital-taln.27</url>
@@ -1120,7 +1120,7 @@
       <author><first>Clémence</first><last>Sebe</last></author>
       <author><first>Sarah</first><last>Cohen-Boulakia</last></author>
       <author><first>Olivier</first><last>Ferret</last></author>
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <pages>422–434</pages>
       <abstract>Les chaînes de traitement d’analyses de données biologiques utilisées en bioinformatique sont une solution pour la portabilité et la reproductibilité des analyses. Ces chaînes figurent à la fois sous forme descriptive dans des articles scientifiques et/ou sous forme de codes dans des dépôts. L’identification de publications scientifiques décrivant de nouvelles chaînes de traitement et l’extraction de leurs informations sont des enjeux importants pour la communauté bioinformatique. Nous proposons ici d’étendre le corpus BioToFlow ayant trait aux articles décrivant des chaînes de traitement bioinformatiques et de l’utiliser pour entraîner et évaluer des modèles de reconnaissance d’entités nommées bioinformatiques. Ce travail est accompagné d’une discussion critique portant à la fois sur le processus d’annotation du corpus et sur les résultats de l’extraction d’entités.</abstract>
       <url hash="a2ca714d">2024.jeptalnrecital-taln.30</url>
@@ -1133,7 +1133,7 @@
       <author><first>Nicolas</first><last>Hiebel</last></author>
       <author><first>Olivier</first><last>Ferret</last></author>
       <author><first>Karën</first><last>Fort</last></author>
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <pages>435–448</pages>
       <abstract>La génération de texte ouvre des perspectives pour pallier l’absence de corpus librement partageables dans des domaines contraints par la confidentialité, comme le domaine médical. Dans cette étude, nous comparons les performances de modèles encodeurs-décodeurs et décodeurs seuls pour la génération conditionnée de cas cliniques en français. Nous affinons plusieurs modèles pré-entraînés pour chaque architecture sur des cas cliniques en français conditionnés par les informations démographiques des patient·es (sexe et âge) et des éléments cliniques.Nous observons que les modèles encodeur-décodeurs sont plus facilement contrôlables que les modèles décodeurs seuls, mais plus coûteux à entraîner.</abstract>
       <url hash="e4f2d50d">2024.jeptalnrecital-taln.31</url>
@@ -1183,7 +1183,7 @@
       <author><first>Jérôme</first><last>Louradour</last></author>
       <author><first>Roxane</first><last>Bertrand</last></author>
       <author><first>Kate</first><last>Thompson</last></author>
-      <author><first>Laurent</first><last>Prévot</last></author>
+      <author id="laurent-prevot"><first>Laurent</first><last>Prévot</last></author>
       <pages>508–529</pages>
       <abstract>We present the MEETING corpus, a dataset of roughly 95 hours of spontaneous meeting-style conversations in French. The corpus is designed to serve as a foundation for downstream tasks such as meeting summarization. In its current state, it offers 25 hours of manually corrected transcripts that are aligned with the audio signal, making it a valuable resource for evaluating ASR and speaker recognition systems. It also includes automatic transcripts and alignments of the whole corpus which can be used for downstream NLP tasks. The aim of this paper is to describe the conception, production and annotation of the corpus up to the transcription level as well as to provide statistics that shed light on the main linguistic features of the corpus.</abstract>
       <url hash="0de9f314">2024.jeptalnrecital-taln.35</url>
@@ -1218,7 +1218,7 @@
       <title>Optimisation des performances d’un système de reconnaissance automatique de la parole pour les commentaires sportifs: fine-tuning de Whisper</title>
       <author><first>Camille</first><last>Lavigne</last></author>
       <author><first>Alex</first><last>Stasica</last></author>
-      <author><first>Anna</first><last>Kupsc</last></author>
+      <author id="anna-kupsc"><first>Anna</first><last>Kupsc</last></author>
       <pages>567–581</pages>
       <abstract>Malgré les performances élevées des systèmes automatiques de reconnaissance de la parole (Automatic Speech Recognition ; ASR) sur des corpus généraux, leur efficacité est considérablement réduite lorsqu’ils sont confrontés à des corpus spécialisés. Ces corpus peuvent notamment contenir du lexique propre à des domaines spécifiques, des accents ou du bruit de fond rendant la transcription ardue. Cette étude vise à évaluer les avantages de l’optimisation d’une transcription automatique, par opposition à manuelle, après fine-tuning d’un modèle d’ASR de dernière génération, Whisper (Radford et al., 2023), sur un corpus spécialisé de commentaires sportifs de petite taille. Nos analyses quantitatives et qualitatives indiquent que Whisper est capable d’apprendre les particularités d’un corpus de spécialité, atteignant des performances égales où supérieures aux transcripteurs humains, avec cette quantité de données limitée. Cette recherche met en lumière le rôle que l’intelligence artificielle, notamment les larges modèles de langage, peut jouer pour faciliter la création de corpus spécialisés.</abstract>
       <url hash="b89294e1">2024.jeptalnrecital-taln.38</url>
@@ -1228,7 +1228,7 @@
     <paper id="39">
       <title>Optimiser le choix des exemples pour la traduction automatique augmentée par des mémoires de traduction</title>
       <author><first>Maxime</first><last>Bouthors</last></author>
-      <author><first>Josep</first><last>Crego</last></author>
+      <author id="josep-m-crego"><first>Josep</first><last>Crego</last></author>
       <author><first>François</first><last>Yvon</last></author>
       <pages>582–604</pages>
       <abstract>La traduction neuronale à partir d’exemples s’appuie sur l’exploitation d’une mémoire de traduction contenant des exemples similaires aux phrases à traduire. Ces exemples sont utilisés pour conditionner les prédictions d’un décodeur neuronal. Nous nous intéressons à l’amélioration du système qui effectue l’étape de recherche des phrases similaires, l’architecture du décodeur neuronal étant fixée et reposant ici sur un modèle explicite d’édition, le Transformeur multi-Levenshtein. Le problème considéré consiste à trouver un ensemble optimal d’exemples similaires, c’est-à-dire qui couvre maximalement la phrase source. En nous appuyant sur la théorie des fonctions sous-modulaires, nous explorons de nouveaux algorithmes pour optimiser cette couverture et évaluons les améliorations de performances auxquels ils mènent pour la tâche de traduction automatique.</abstract>
@@ -1252,7 +1252,7 @@
     <paper id="41">
       <title>Prédiction de la complexité lexicale : Une étude comparative entre <fixed-case>C</fixed-case>hat<fixed-case>GPT</fixed-case> et un modèle dédié à cette tâche.</title>
       <author><first>Abdelhak</first><last>Kelious</last></author>
-      <author><first>Mathieu</first><last>Constant</last></author>
+      <author id="matthieu-constant"><first>Mathieu</first><last>Constant</last></author>
       <author><first>Christophe</first><last>Coeur</last></author>
       <pages>617–629</pages>
       <abstract>Cette étude s’intéresse à la prédiction de la complexité lexicale. Nous explorons des méthodesd’apprentissage profond afin d’évaluer la complexité d’un mot en se basant sur son contexte. Plusspécifiquement, nous examinons comment utiliser des modèles de langue pré-entraînés pour encoderle mot cible et son contexte, en les combinant avec des caractéristiques supplémentaires basées sur lafréquence. Notre approche obtient de meilleurs résultats que les meilleurs systèmes de SemEval-2021(Shardlow et al., 2021). Enfin, nous menons une étude comparative avec ChatGPT afin d’évaluer sonpotentiel pour prédire la complexité lexicale en comparaison avec un modèle dédié à cette tâche.</abstract>
@@ -1342,7 +1342,7 @@
       <author><first>Felix</first><last>Grezes</last></author>
       <author><first>Cyril</first><last>Grouin</last></author>
       <author><first>Fabian</first><last>Schüssler</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <pages>720–733</pages>
       <abstract>Le manque de ressources annotées constitue un défi majeur pour le traitement automatique de la langue en astrophysique. Afin de combler cette lacune, nous présentons astroECR, une extension du corpus TDAC (Time-Domain Astrophysics Corpus). Notre corpus, constitué de 300 rapports d’observation en anglais, étend le schéma d’annotation initial de TDAC en introduisant cinq classes d’entités nommées supplémentaires spécifiques à l’astrophysique. Nous avons enrichi les annotations en incluant les coréférences, les relations sémantiques entre les objets célestes et leurs propriétés physiques, ainsi qu’en normalisant les noms d’objets célestes via des bases de données astronomiques. L’utilité de notre corpus est démontrée en fournissant des scores de référence à travers quatre tâches~: la reconnaissance d’entités nommées, la résolution de coréférences, la détection de relations, et la normalisation des noms d’objets célestes. Nous mettons à disposition le corpus ainsi que son guide d’annotation, les codes sources, et les modèles associés.</abstract>
       <url hash="8ee8a65c">2024.jeptalnrecital-taln.48</url>
@@ -1403,8 +1403,8 @@
       <author><first>Cassandre</first><last>Armand</last></author>
       <author><first>Chiara</first><last>Mazzocconi</last></author>
       <author><first>Shreejata</first><last>Gupta</last></author>
-      <author><first>Laurent</first><last>Prévot</last></author>
-      <author><first>Benoit</first><last>Favre</last></author>
+      <author id="laurent-prevot"><first>Laurent</first><last>Prévot</last></author>
+      <author id="benoit-favre"><first>Benoit</first><last>Favre</last></author>
       <author><first>Leonor</first><last>Becerra-Bonache</last></author>
       <author><first>Abdellah</first><last>Fourtassi</last></author>
       <pages>4–5</pages>
@@ -1453,13 +1453,13 @@
       <author><first>Mathilde</first><last>Aguiar</last></author>
       <author><first>Felix</first><last>Herron</last></author>
       <author><first>Magali</first><last>Norré</last></author>
-      <author><first>Massih-Reza</first><last>Amini</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="massih-r-amini"><first>Massih-Reza</first><last>Amini</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Iris</first><last>Eshkol Taravella</last></author>
       <author><first>Emmanuelle</first><last>Esparança-Rodier</last></author>
       <author><first>Thomas</first><last>François</last></author>
       <author><first>Lorraine</first><last>Goeuriot</last></author>
-      <author><first>Jérôme</first><last>Goulian</last></author>
+      <author id="jerome-goulian"><first>Jérôme</first><last>Goulian</last></author>
       <author><first>Mathieu</first><last>Lafourcade</last></author>
       <author><first>Benjamin</first><last>Lecouteux</last></author>
       <author><first>François</first><last>Portet</last></author>
@@ -1494,7 +1494,7 @@
       <author><first>Louis</first><last>Escouflaire</last></author>
       <author><first>Antonin</first><last>Descampe</last></author>
       <author><first>Antoine</first><last>Venant</last></author>
-      <author><first>Cédrick</first><last>Fairon</last></author>
+      <author id="cedrick-fairon"><first>Cédrick</first><last>Fairon</last></author>
       <pages>12–13</pages>
       <abstract>Cet article s’intéresse à la capacité de transfert des modèles de classification de texte dans le domaine journalistique, en particulier pour distinguer les articles d’opinion des articles d’information. A l’ère du numérique et des réseaux sociaux, les distinctions entre ces genres deviennent de plus en plus floues, augmentant l’importance de cette tâche de classification. Un corpus de 80 000 articles de presse provenant de huit médias, quatre québécois et quatre belges francophones, a été constitué. Pour identifier les thèmes des articles, une clusterisation a été appliquée sur les 10 000 articles issus de chaque média, assurant une distribution équilibrée des thèmes entre les deux genres opinion et information. Les données ont ensuite été utilisées pour entraîner (ou peaufiner) et évaluer deux types de modèles : CamemBERT (Martin et al., 2019), un modèle neuronal pré-entraîné, et un modèle de régression logistique basé sur des traits textuels. Dix versions différentes de chaque modèle sont entraînées : 8 versions mono-médias’, chacune peaufinée sur l’ensemble d’entraînement du sous-corpus correspondant à un média, et deux versions multi-médias’, l’une peaufinée sur 8000 articles québécois, l’autre sur les articles belges. Les résultats montrent que les modèles CamemBERT surpassent significativement les modèlesstatistiques en termes de capacité de transfert (voir Figures 1 et 2). Les modèles CamemBERT montrent une plus grande exactitude, notamment sur les ensembles de test du même média que celui utilisé pour l’entraînement. Cependant, les modèles entraînés sur Le Journal de Montréal(JDM) sont particulièrement performants même sur d’autres ensembles de test, suggérant une distinction plus claire entre les genres journalistiques dans ce média. Les modèles CamemBERT multi-médias affichent également de bonnes performances. Le modèle québécois notamment obtient les meilleurs résultats en moyenne, indiquant qu’une diversité de sources améliore la généricité du modèle. Les modèles statistiques (mono- et multi-médias) montrent des performances globalement inférieures, avec des variations significatives selon les médias. Les textes québécois sont plus difficiles à classer pour ces modèles, suggérant des différences culturelles dans les pratiques journalistiques entre le Québec et la Belgique. L’analyse des traits révèle que l’importance de certains éléments textuels, comme les points d’exclamation et les marqueurs de temps relatifs, varient considérablement entre les modèles entraînés sur différents médias. Par exemple, les éditoriaux du JDM utilisent fréquemment des points d’exclamation, reflétant un style plus affirmé et polarisant. En revanche, les articles de La Presse présentent des particularités qui compliquent la généralisation de la tâche. En sommme, cette étude démontre la supériorité des modèles neuronaux comme CamemBERT pour la classification de textes journalistiques, notamment grâce à leur capacité de transfert, bien que les modèles basés sur des traits se distinguent par la transparence de leur raisonnement’. Elle met également en lumière des différences significatives entre les cultures journalistiques québécoises et belges.</abstract>
       <url hash="51328ad8">2024.jeptalnrecital-trad.8</url>
@@ -1532,9 +1532,9 @@
       <author><first>Emmanuelle</first><last>Esperança-Rodier</last></author>
       <author><first>Romane</first><last>Gallienne</last></author>
       <author><first>Carlos-Emiliano</first><last>González-Gallardo</last></author>
-      <author><first>Jérôme</first><last>Goulian</last></author>
+      <author id="jerome-goulian"><first>Jérôme</first><last>Goulian</last></author>
       <author><first>Jose</first><last>G Moreno</last></author>
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <author><first>Didier</first><last>Schwab</last></author>
       <author><first>Vincent</first><last>Segonne</last></author>
       <author><first>Johanna</first><last>Simoens</last></author>
@@ -1658,7 +1658,7 @@
     <meta>
       <booktitle>Actes du Défi Fouille de Textes@TALN 2024</booktitle>
       <editor><first>Richard</first><last>Dufour</last></editor>
-      <editor><first>Benoit</first><last>Favre</last></editor>
+      <editor id="benoit-favre"><first>Benoit</first><last>Favre</last></editor>
       <editor><first>Mickael</first><last>Rouvier</last></editor>
       <editor><first>Adrien</first><last>Bazoge</last></editor>
       <editor><first>Yanis</first><last>Labrak</last></editor>
@@ -1691,7 +1691,7 @@
       <author><first>Charles-William</first><last>Cummings</last></author>
       <author><first>Azur</first><last>Handan</last></author>
       <author><first>Edith</first><last>Galy</last></author>
-      <author><first>Eric</first><last>Charton</last></author>
+      <author id="eric-charton"><first>Eric</first><last>Charton</last></author>
       <pages>11–22</pages>
       <abstract>Ce papier décrit le travail de l’équipe du CRIM (Centre de recherche en Informatique de Montréal) dans le cadre du Défi Fouille de textes 2024. Nous présentons les expériences que nous avons menées dans le cadre de la tâche principale consistant à identifier automatiquement, pour une question donnée issue d’annales d’examens de pharmacie, l’ensemble des réponses correctes parmi les cinq proposées. La contrainte est d’utiliser un système de moins de 3 milliards de paramètres dont les données d’entraînement sont connues. Pour ce faire, nous avons testé des approches impliquant du few-shot prompting, du RAG, de l’affinage et de la génération contrainte en dernier recours.</abstract>
       <url hash="b546b290">2024.jeptalnrecital-deft.2</url>
diff --git a/data/xml/2024.kallm.xml b/data/xml/2024.kallm.xml
index 70c53048ff..c31207ab18 100644
--- a/data/xml/2024.kallm.xml
+++ b/data/xml/2024.kallm.xml
@@ -27,7 +27,7 @@
       <author><first>Yeon</first><last>Seonwoo</last></author>
       <author><first>Seunghyun</first><last>Yoon</last><affiliation>Adobe Research</affiliation></author>
       <author><first>James</first><last>Thorne</last><affiliation>KAIST</affiliation></author>
-      <author><first>Alice</first><last>Oh</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
       <pages>1-11</pages>
       <abstract>Application of LLM to database queries on natural language sentences has demonstrated impressive results in both single and multi-hop scenarios.In the existing methodologies, the requirement to re-encode query vectors at each stage for processing multi-hop queries presents a significant bottleneck to the inference speed.This paper proposes VKGFR (Virtual Knowledge Graph based Fact Retriever) that leverages large language models to extract representations corresponding to a sentence’s knowledge graph, significantly enhancing inference speed for multi-hop reasoning without performance loss.Given that both the queries and natural language database sentences can be structured as a knowledge graph, we suggest extracting a Virtual Knowledge Graph (VKG) representation from sentences with LLM.Over the pre-constructed VKG, our VKGFR conducts retrieval with a tiny model structure, showing performance improvements with higher computational efficiency. We evaluate VKGFR on the WikiNLDB and MetaQA dataset, designed for multi-hop database reasoning over text. The results indicate 13x faster inference speed on the WikiNLDB dataset without performance loss.</abstract>
       <url hash="d9dd4e2f">2024.kallm-1.1</url>
@@ -73,7 +73,7 @@
     <paper id="5">
       <title><fixed-case>KGAST</fixed-case>: From Knowledge Graphs to Annotated Synthetic Texts</title>
       <author><first>Nakanyseth</first><last>Vuth</last></author>
-      <author><first>Gilles</first><last>Sérasset</last><affiliation>Université Grenoble Alpes</affiliation></author>
+      <author id="gilles-serasset"><first>Gilles</first><last>Sérasset</last><affiliation>Université Grenoble Alpes</affiliation></author>
       <author><first>Didier</first><last>Schwab</last><affiliation>Université Grenoble Alpes</affiliation></author>
       <pages>43-55</pages>
       <abstract>In recent years, the use of synthetic data, either as a complement or a substitute for original data, has emerged as a solution to challenges such as data scarcity and security risks. This paper is an initial attempt to automatically generate such data for Information Extraction tasks. We accomplished this by developing a novel synthetic data generation framework called KGAST, which leverages Knowledge Graphs and Large Language Models. In our preliminary study, we conducted simple experiments to generate synthetic versions of two datasets—a French security defense dataset and an English general domain dataset, after which we evaluated them both intrinsically and extrinsically. The results indicated that synthetic data can effectively complement original data, improving the performance of models on classes with limited training samples. This highlights KGAST’s potential as a tool for generating synthetic data for Information Extraction tasks.</abstract>
@@ -127,7 +127,7 @@
       <title><fixed-case>STAGE</fixed-case>: Simplified Text-Attributed Graph Embeddings using Pre-trained <fixed-case>LLM</fixed-case>s</title>
       <author><first>Aaron</first><last>Zolnai-Lucas</last><affiliation>Quantexa Ltd</affiliation></author>
       <author><first>Jack</first><last>Boylan</last><affiliation>Georgia Institute of Technology</affiliation></author>
-      <author><first>Chris</first><last>Hokamp</last><affiliation>Quantexa</affiliation></author>
+      <author id="chris-hokamp"><first>Chris</first><last>Hokamp</last><affiliation>Quantexa</affiliation></author>
       <author><first>Parsa</first><last>Ghaffari</last></author>
       <pages>92-104</pages>
       <abstract>We present STAGE, a straightforward yet effective method for enhancing node features in Graph Neural Network (GNN) models that encode Text-Attributed Graphs (TAGs). Our approach leverages Large-Language Models (LLMs) to generate embeddings for textual attributes. STAGE achieves competitive results on various node classification benchmarks while also maintaining a simplicity in implementation relative to current state-of-the-art (SoTA) techniques. We show that utilizing pre-trained LLMs as embedding generators provides robust features for ensemble GNN training, enabling pipelines that are simpler than current SoTA approaches which require multiple expensive training and prompting stages. We also implement diffusion-pattern GNNs in an effort to make this pipeline scalable to graphs beyond academic benchmarks.</abstract>
diff --git a/data/xml/2024.kemt.xml b/data/xml/2024.kemt.xml
index e897872282..ac2611ee80 100644
--- a/data/xml/2024.kemt.xml
+++ b/data/xml/2024.kemt.xml
@@ -4,8 +4,8 @@
     <meta>
       <booktitle>Proceedings of the First International Workshop on Knowledge-Enhanced Machine Translation</booktitle>
       <editor><first>Arda</first><last>Tezcan</last></editor>
-      <editor><first>Víctor M.</first><last>Sánchez-Cartagena</last></editor>
-      <editor><first>Miquel</first><last>Esplà-Gomis</last></editor>
+      <editor id="victor-m-sanchez-cartagena"><first>Víctor M.</first><last>Sánchez-Cartagena</last></editor>
+      <editor id="miquel-espla-gomis"><first>Miquel</first><last>Esplà-Gomis</last></editor>
       <publisher>European Association for Machine Translation (EAMT)</publisher>
       <address>Sheffield, United Kingdom</address>
       <month>June</month>
@@ -32,7 +32,7 @@
     </paper>
     <paper id="2">
       <title>Exploring Inline Lexicon Injection for Cross-Domain Transfer in Neural Machine Translation</title>
-      <author><first>Jesujoba O.</first><last>Alabi</last></author>
+      <author id="jesujoba-alabi"><first>Jesujoba O.</first><last>Alabi</last></author>
       <author><first>Rachel</first><last>Bawden</last></author>
       <pages>7-20</pages>
       <abstract>Domain transfer remains a challenge in machine translation (MT), particularly concerning rare or unseen words. Amongst the strategies proposed to address the issue, one of the simplest and most promising in terms of generalisation capacity is coupling the MT system with external resources such as bilingual lexicons and appending inline annotations within source sentences. This method has been shown to work well for controlled language settings, but its usability for general language (and ambiguous) MT is less certain. In this article we explore this question further, testing the strategy in a multi-domain transfer setting for German-to-English MT, using the mT5 language model fine-tuned on parallel data. We analyse the MT outputs and design evaluation strategies to understand the behaviour of such models. Our analysis using distractor annotations suggests that although improvements are not systematic according to automatic metrics, the model does learn to select appropriate translation candidates and ignore irrelevant ones, thereby exhibiting more than a systematic copying behaviour. However, we also find that the method is less successful in a higher-resource setting with a larger lexicon, suggesting that it is not a magic solution, especially when the baseline model is already exposed to a wide range of vocabulary.</abstract>
diff --git a/data/xml/2024.knowledgenlp.xml b/data/xml/2024.knowledgenlp.xml
index b587453e4c..cc4459804b 100644
--- a/data/xml/2024.knowledgenlp.xml
+++ b/data/xml/2024.knowledgenlp.xml
@@ -9,8 +9,8 @@
       <editor><first>Meng</first><last>Jiang</last></editor>
       <editor><first>Chenguang</first><last>Zhu</last></editor>
       <editor><first>Hannaneh</first><last>Hajishirzi</last></editor>
-      <editor><first>Luke</first><last>Zettlemoyer</last></editor>
-      <editor id="zhihan-zhang"><first>Zhihan</first><last>Zhang</last></editor>
+      <editor id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></editor>
+      <editor><first>Zhihan</first><last>Zhang</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Bangkok, Thailand</address>
       <month>August</month>
@@ -27,8 +27,8 @@
       <title><fixed-case>GAD</fixed-case>e<fixed-case>P</fixed-case>o: Graph-Assisted Declarative Pooling Transformers for Document-Level Relation Extraction</title>
       <author><first>Andrei</first><last>Coman</last></author>
       <author><first>Christos</first><last>Theodoropoulos</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last><affiliation>KU Leuven, KU Leuven</affiliation></author>
-      <author><first>James</first><last>Henderson</last><affiliation>Idiap Research Institute</affiliation></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last><affiliation>KU Leuven, KU Leuven</affiliation></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last><affiliation>Idiap Research Institute</affiliation></author>
       <pages>1-14</pages>
       <abstract>Document-level relation extraction typically relies on text-based encoders and hand-coded pooling heuristics to aggregate information learned by the encoder. In this paper, we leverage the intrinsic graph processing capabilities of the Transformer model and propose replacing hand-coded pooling methods with new tokens in the input, which are designed to aggregate information via explicit graph relations in the computation of attention weights. We introduce a joint text-graph Transformer model and a graph-assisted declarative pooling (GADePo) specification of the input, which provides explicit and high-level instructions for information aggregation. GADePo allows the pooling process to be guided by domain-specific knowledge or desired outcomes but still learned by the Transformer, leading to more flexible and customisable pooling strategies. We evaluate our method across diverse datasets and models and show that our approach yields promising results that are consistently better than those achieved by the hand-coded pooling functions.</abstract>
       <url hash="1d08fcae">2024.knowledgenlp-1.1</url>
@@ -59,7 +59,7 @@
       <author><first>Marcus</first><last>Collins</last><affiliation>Amazon</affiliation></author>
       <author><first>Oleg</first><last>Rokhlenko</last></author>
       <author><first>Eugene</first><last>Agichtein</last><affiliation>Amazon and Emory University</affiliation></author>
-      <author><first>Shervin</first><last>Malmasi</last><affiliation>Amazon</affiliation></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last><affiliation>Amazon</affiliation></author>
       <pages>30-43</pages>
       <abstract>Continued improvement of conversational assistants in knowledge-rich domains like E-Commerce requires large volumes of realistic high-quality conversation data to power increasingly sophisticated large language model chatbots, dialogue managers, response rankers, and recommenders. The problem is exacerbated for multi-modal interactions in realistic conversational product search and recommendation. Here, an artificial sales agent must interact intelligently with a customer using both textual and visual information and incorporate results from external search systems, such as a product catalog. Yet, it remains an open question how to best crowd-source large-scale, naturalistic multi-modal dialogue and action data, required to train such an artificial agent. We describe our crowd-sourced task where one worker (the Buyer) plays the role of the customer, and another (the Seller) plays the role of the sales agent. We identify subtle interactions between one worker’s environment and their partner’s behavior mediated by workers’ word choice. We find that limiting information presented to the Buyer, both in their backstory and by the Seller, improves conversation quality. We also show how conversations are improved through minimal automated Seller “coaching”. While typed and spoken messages are slightly different, the differences are not as large as frequently assumed. We plan to release our platform code and the resulting dialogues to advance research on conversational search agents.</abstract>
       <url hash="788ccfb0">2024.knowledgenlp-1.3</url>
@@ -98,7 +98,7 @@
       <author><first>Soyeong</first><last>Jeong</last><affiliation>Korea Advanced Institute of Science &amp; Technology</affiliation></author>
       <author><first>Sukmin</first><last>Cho</last></author>
       <author><first>SeungYoon</first><last>Han</last></author>
-      <author><first>Jong</first><last>Park</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
+      <author id="jong-c-park"><first>Jong</first><last>Park</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
       <pages>73-92</pages>
       <abstract>Recent advancements in Large Language Models (LLMs) have significantly improved their performance across various Natural Language Processing (NLP) tasks.However, LLMs still struggle with generating non-factual responses due to limitations in their parametric memory.Retrieval-Augmented Generation (RAG) systems address this issue by incorporating external knowledge with a retrieval module.Despite their successes, however, current RAG systems face challenges with retrieval failures and the limited ability of LLMs to filter out irrelevant information.Therefore, in this work, we propose <i>
           <b>DSLR</b></i> (<b>D</b>ocument Refinement with <b>S</b>entence-<b>L</b>evel <b>R</b>e-ranking and Reconstruction), an unsupervised framework that decomposes retrieved documents into sentences, filters out irrelevant sentences, and reconstructs them again into coherent passages.We experimentally validate <i>DSLR</i> on multiple open-domain QA datasets and the results demonstrate that <i>DSLR</i> significantly enhances the RAG performance over conventional fixed-size passage.Furthermore, our <i>DSLR</i> enhances performance in specific, yet realistic scenarios without the need for additional training, providing an effective and efficient solution for refining retrieved documents in RAG systems.</abstract>
diff --git a/data/xml/2024.knowllm.xml b/data/xml/2024.knowllm.xml
index 68df7a269d..bdcf60ce0b 100644
--- a/data/xml/2024.knowllm.xml
+++ b/data/xml/2024.knowllm.xml
@@ -99,7 +99,7 @@
       <author><first>Ye</first><last>Liu</last><affiliation>SalesForce.com</affiliation></author>
       <author><first>Rui</first><last>Meng</last><affiliation>SalesForce Research</affiliation></author>
       <author><first>Meghana Moorthy</first><last>Bhat</last><affiliation>Salesforce Research</affiliation></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>SalesForce.com and Nanyang Technological University</affiliation></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>SalesForce.com and Nanyang Technological University</affiliation></author>
       <author><first>Caiming</first><last>Xiong</last><affiliation>Salesforce Research</affiliation></author>
       <author><first>Yingbo</first><last>Zhou</last><affiliation>Salesforce Research</affiliation></author>
       <author><first>Semih</first><last>Yavuz</last><affiliation>SalesForce.com</affiliation></author>
@@ -198,7 +198,7 @@
       <title>Retrieval-augmented generation in multilingual settings</title>
       <author><first>Nadezhda</first><last>Chirkova</last><affiliation>Naver Labs Europe</affiliation></author>
       <author><first>David</first><last>Rau</last></author>
-      <author><first>Hervé</first><last>Déjean</last><affiliation>Naver Labs Europe</affiliation></author>
+      <author id="herve-dejean"><first>Hervé</first><last>Déjean</last><affiliation>Naver Labs Europe</affiliation></author>
       <author><first>Thibault</first><last>Formal</last><affiliation>Naver Labs Europe</affiliation></author>
       <author><first>Stéphane</first><last>Clinchant</last><affiliation>Naver Labs Europe</affiliation></author>
       <author><first>Vassilina</first><last>Nikoulina</last><affiliation>Naver Labs Europe</affiliation></author>
@@ -212,7 +212,7 @@
       <title>Retrieve, Generate, Evaluate: A Case Study for Medical Paraphrases Generation with Small Language Models</title>
       <author><first>Ioana</first><last>Buhnila</last></author>
       <author><first>Aman</first><last>Sinha</last></author>
-      <author><first>Mathieu</first><last>Constant</last><affiliation>Université de Lorraine, CNRS, ATILF</affiliation></author>
+      <author id="matthieu-constant"><first>Mathieu</first><last>Constant</last><affiliation>Université de Lorraine, CNRS, ATILF</affiliation></author>
       <pages>189-203</pages>
       <abstract>Recent surge in the accessibility of large language models (LLMs) to the general population can lead to untrackable use of such models for medical-related recommendations. Language generation via LLMs models has two key problems: firstly, they are prone to hallucination and therefore, for any medical purpose they require scientific and factual grounding; secondly, LLMs pose tremendous challenge to computational resources due to their gigantic model size. In this work, we introduce pRAGe, a Pipeline for Retrieval Augmented Generation and Evaluation of medical paraphrases generation using Small Language Models (SLM). We study the effectiveness of SLMs and the impact of external knowledge base for medical paraphrase generation in French.</abstract>
       <url hash="2cd8cfe2">2024.knowllm-1.16</url>
diff --git a/data/xml/2024.konvens.xml b/data/xml/2024.konvens.xml
index f7227a39bb..75d90d1b2f 100644
--- a/data/xml/2024.konvens.xml
+++ b/data/xml/2024.konvens.xml
@@ -99,8 +99,8 @@
     <paper id="10">
       <title>Using <fixed-case>G</fixed-case>erma<fixed-case>N</fixed-case>et for the Generation of Crossword Puzzles</title>
       <author><first>Claus</first><last>Zinn</last></author>
-      <author><first>Marie</first><last>Hinrichs</last></author>
-      <author><first>Erhard</first><last>Hinrichs</last></author>
+      <author id="marie-hinrichs"><first>Marie</first><last>Hinrichs</last></author>
+      <author id="erhard-hinrichs"><first>Erhard</first><last>Hinrichs</last></author>
       <pages>89–97</pages>
       <url hash="fba1ee43">2024.konvens-main.10</url>
       <bibkey>zinn-etal-2024-using</bibkey>
@@ -145,7 +145,7 @@
     <paper id="15">
       <title>How to Translate <fixed-case>SQ</fixed-case>u<fixed-case>AD</fixed-case> to <fixed-case>G</fixed-case>erman? A Comparative Study of Answer Span Retrieval Methods for Question Answering Dataset Creation</title>
       <author><first>Jens</first><last>Kaiser</last></author>
-      <author><first>Agnieszka</first><last>Falenska</last></author>
+      <author id="agnieszka-falenska"><first>Agnieszka</first><last>Falenska</last></author>
       <pages>134–140</pages>
       <url hash="c24fd43e">2024.konvens-main.15</url>
       <bibkey>kaiser-falenska-2024-translate</bibkey>
@@ -160,7 +160,7 @@
     </paper>
     <paper id="17">
       <title>Binary indexes for optimising corpus queries</title>
-      <author><first>Peter</first><last>Ljunglöf</last></author>
+      <author id="peter-ljunglof"><first>Peter</first><last>Ljunglöf</last></author>
       <author><first>Nicholas</first><last>Smallbone</last></author>
       <author><first>Mijo</first><last>Thoresson</last></author>
       <author><first>Victor</first><last>Salomonsson</last></author>
@@ -204,7 +204,7 @@
     <paper id="22">
       <title>Fine-grained quotation detection and attribution in <fixed-case>G</fixed-case>erman news articles</title>
       <author><first>Fynn</first><last>Petersen-Frey</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>196–208</pages>
       <url hash="b87946b1">2024.konvens-main.22</url>
       <bibkey>petersen-frey-biemann-2024-fine</bibkey>
diff --git a/data/xml/2024.langmol.xml b/data/xml/2024.langmol.xml
index 134f718778..21d456655f 100644
--- a/data/xml/2024.langmol.xml
+++ b/data/xml/2024.langmol.xml
@@ -136,7 +136,7 @@
       <author><first>Carol</first><last>Mak</last><affiliation>International Business Machines</affiliation></author>
       <author><first>Flaviu</first><last>Cipcigan</last><affiliation>International Business Machines</affiliation></author>
       <author><first>James</first><last>Barry</last></author>
-      <author><first>Mohab</first><last>Elkaref</last><affiliation>International Business Machines</affiliation></author>
+      <author id="mohab-el-karef"><first>Mohab</first><last>Elkaref</last><affiliation>International Business Machines</affiliation></author>
       <author><first>Movina</first><last>Moses</last></author>
       <author><first>Vishnudev</first><last>Kuruvanthodi</last><affiliation>International Business Machines</affiliation></author>
       <author><first>Geeth</first><last>Mel</last></author>
diff --git a/data/xml/2024.latechclfl.xml b/data/xml/2024.latechclfl.xml
index 699d91d442..769f246fbf 100644
--- a/data/xml/2024.latechclfl.xml
+++ b/data/xml/2024.latechclfl.xml
@@ -6,7 +6,7 @@
       <editor><first>Yuri</first><last>Bizzoni</last></editor>
       <editor><first>Stefania</first><last>Degaetano-Ortlieb</last></editor>
       <editor><first>Anna</first><last>Kazantseva</last></editor>
-      <editor><first>Stan</first><last>Szpakowicz</last></editor>
+      <editor id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>St. Julians, Malta</address>
       <month>March</month>
@@ -34,7 +34,7 @@
       <title>Coreference in Long Documents using Hierarchical Entity Merging</title>
       <author><first>Talika</first><last>Gupta</last><affiliation>IIIT Guwahati</affiliation></author>
       <author><first>Hans Ole</first><last>Hatzel</last><affiliation>Universität Hamburg</affiliation></author>
-      <author><first>Chris</first><last>Biemann</last><affiliation>Universität Hamburg</affiliation></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last><affiliation>Universität Hamburg</affiliation></author>
       <pages>11-17</pages>
       <abstract>Current top-performing coreference resolution approaches are limited with regard to the maximum length of texts they can accept. We explore a recursive merging technique of entities that allows us to apply coreference models to texts of arbitrary length, as found in many narrative genres. In experiments on established datasets, we quantify the drop in resolution quality caused by this approach. Finally, we use an under-explored resource in the form of a fully coreference-annotated novel to illustrate our model’s performance for long documents in practice. Here, we achieve state-of-the-art performance, outperforming previous systems capable of handling long documents.</abstract>
       <url hash="07186827">2024.latechclfl-1.2</url>
@@ -162,7 +162,7 @@
     </paper>
     <paper id="14">
       <title>Post-Correction of Historical Text Transcripts with Large Language Models: An Exploratory Study</title>
-      <author><first>Emanuela</first><last>Boros</last><affiliation>EPFL</affiliation></author>
+      <author id="emanuela-boros"><first>Emanuela</first><last>Boros</last><affiliation>EPFL</affiliation></author>
       <author><first>Maud</first><last>Ehrmann</last><affiliation>Ècole Polytechnique Fédérale de Lausanne</affiliation></author>
       <author><first>Matteo</first><last>Romanello</last><affiliation>Ècole polytechnique fédérale de Lausanne / Deutsches Archäologisches Institut</affiliation></author>
       <author><first>Sven</first><last>Najem-Meyer</last><affiliation>Ècole Polytechnique Fédérale de Lausanne</affiliation></author>
@@ -191,7 +191,7 @@
       <title>Perplexing Canon: A study on <fixed-case>GPT</fixed-case>-based perplexity of canonical and non-canonical literary works</title>
       <author><first>Yaru</first><last>Wu</last><affiliation>UU</affiliation></author>
       <author><first>Yuri</first><last>Bizzoni</last><affiliation>Aarhus University</affiliation></author>
-      <author><first>Pascale</first><last>Moreira</last><affiliation>Comparative Literature, School of Communication and Culture, Aarhus University</affiliation></author>
+      <author id="pascale-feldkamp"><first>Pascale</first><last>Moreira</last><affiliation>Comparative Literature, School of Communication and Culture, Aarhus University</affiliation></author>
       <author><first>Kristoffer</first><last>Nielbo</last><affiliation>Center for Humanities Computing, Aarhus University</affiliation></author>
       <pages>172-184</pages>
       <abstract>This study extends previous research on literary quality by using information theory-based methods to assess the level of perplexity recorded by three large language models when processing 20th-century English novels deemed to have high literary quality, recognized by experts as canonical, compared to a broader control group. We find that canonical texts appear to elicit a higher perplexity in the models, we explore which textual features might concur to create such an effect. We find that the usage of a more heavily nominal style, together with a more diverse vocabulary, is one of the leading causes of the difference between the two groups. These traits could reflect “strategies” to achieve an informationally dense literary style.</abstract>
@@ -225,7 +225,7 @@
     <paper id="19">
       <title>Two Approaches to Diachronic Normalization of <fixed-case>P</fixed-case>olish Texts</title>
       <author><first>Kacper</first><last>Dudzic</last><affiliation>Adam Mickiewicz University</affiliation></author>
-      <author><first>Filip</first><last>Gralinski</last><affiliation>Applica.ai</affiliation></author>
+      <author id="filip-gralinski"><first>Filip</first><last>Gralinski</last><affiliation>Applica.ai</affiliation></author>
       <author><first>Krzysztof</first><last>Jassem</last><affiliation>Adam Mickiewicz University</affiliation></author>
       <author><first>Marek</first><last>Kubis</last><affiliation>Adam Mickiewicz University</affiliation></author>
       <author><first>Piotr</first><last>Wierzchon</last><affiliation>Adam Mickiewicz University</affiliation></author>
@@ -241,8 +241,8 @@
       <author><first>Youcef</first><last>Benkhedda</last><affiliation>University of Manchester</affiliation></author>
       <author><first>Adrians</first><last>Skapars</last><affiliation>University of Manchester</affiliation></author>
       <author><first>Viktor</first><last>Schlegel</last><affiliation>ASUS AICS</affiliation></author>
-      <author><first>Goran</first><last>Nenadic</last><affiliation>University of Manchester</affiliation></author>
-      <author><first>Riza</first><last>Batista-Navarro</last><affiliation>Department of Computer Science, The University of Manchester</affiliation></author>
+      <author id="goran-nenadic"><first>Goran</first><last>Nenadic</last><affiliation>University of Manchester</affiliation></author>
+      <author id="riza-theresa-batista-navarro"><first>Riza</first><last>Batista-Navarro</last><affiliation>Department of Computer Science, The University of Manchester</affiliation></author>
       <pages>213-220</pages>
       <abstract>Digital archive collections that have been contributed by communities, known as community-generated digital content (CGDC), are important sources of historical and cultural knowledge. However, CGDC items are not easily searchable due to semantic information being obscured within their textual metadata. In this paper, we investigate the extent to which state-of-the-art, general-domain entity linking (EL) models (i.e., BLINK, EPGEL and mGENRE) can map named entities mentioned in CGDC textual metadata, to Wikidata entities. We evaluate and compare their performance on an annotated dataset of CGDC textual metadata and provide some error analysis, in the way of informing future studies aimed at enriching CGDC metadata using entity linking methods.</abstract>
       <url hash="e2ef7153">2024.latechclfl-1.20</url>
@@ -330,7 +330,7 @@
     <paper id="28">
       <title>Stage Direction Classification in <fixed-case>F</fixed-case>rench Theater: Transfer Learning Experiments</title>
       <author><first>Alexia</first><last>Schneider</last><affiliation>Université de Strasbourg</affiliation></author>
-      <author><first>Pablo</first><last>Ruiz Fabo</last><affiliation>LiLPa, Université de Strasbourg</affiliation></author>
+      <author id="pablo-ruiz-fabo"><first>Pablo</first><last>Ruiz Fabo</last><affiliation>LiLPa, Université de Strasbourg</affiliation></author>
       <pages>278-286</pages>
       <abstract>The automatic classification of stage directions is a little explored topic in computational drama analysis, in spite of their relevance for plays’ structural and stylistic analysis. With a view to start assessing good practices for the automatic annotation of this textual element, we developed a 13-class stage direction typology, based on annotations in the FreDraCor corpus (French-language plays), but abstracting away from their huge variability while still providing classes useful for literary research. We fine-tuned transformers-based models to classify against the typology, gradually decreasing the corpus size used for fine tuning, to compare model efficiency with reduced training data. A result comparison speaks in favour of distilled monolingual models for this task, and, unlike earlier research on German, shows no negative effects of model case-sensitivity. The results have practical relevance for computational literary studies, as comparing classification results with complementary stage direction typologies, limiting the amount of manual annotation needed to apply them, would be helpful towards a systematic study of this important textual element.</abstract>
       <url hash="d76d1bc2">2024.latechclfl-1.28</url>
diff --git a/data/xml/2024.law.xml b/data/xml/2024.law.xml
index b1c9bfce53..fa319e5212 100644
--- a/data/xml/2024.law.xml
+++ b/data/xml/2024.law.xml
@@ -45,7 +45,7 @@
       <author><first>Muyin</first><last>Yao</last><affiliation>Tufts University</affiliation></author>
       <author><first>Xinyi</first><last>Hu</last><affiliation>Tufts University</affiliation></author>
       <author><first>Xiaoning</first><last>Zhu</last><affiliation>Beihang University</affiliation></author>
-      <author><first>Julia</first><last>Hirschberg</last><affiliation>Columbia University in the City of New York</affiliation></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last><affiliation>Columbia University in the City of New York</affiliation></author>
       <pages>19-28</pages>
       <abstract>In Emotion Detection within Natural Language Processing and related multimodal research, the growth of datasets and models has led to a challenge: disparities in emotion classification methods. The lack of commonly agreed upon conventions on the classification of emotions creates boundaries for model comparisons and dataset adaptation. In this paper, we compare the current classification methods in recent models and datasets and propose a valid method to combine different emotion categories. Our proposal arises from experiments across models, psychological theories, and human evaluations, and we examined the effect of proposed mapping on models.</abstract>
       <url hash="94c5c4db">2024.law-1.3</url>
@@ -78,7 +78,7 @@
       <title>Towards Better Inclusivity: A Diverse Tweet Corpus of <fixed-case>E</fixed-case>nglish Varieties</title>
       <author><first>Nhi</first><last>Pham</last><affiliation>New York University Abu Dhabi</affiliation></author>
       <author><first>Lachlan</first><last>Pham</last><affiliation>New York University Abu Dhabi</affiliation></author>
-      <author><first>Adam</first><last>Meyers</last><affiliation>New York University</affiliation></author>
+      <author id="adam-meyers"><first>Adam</first><last>Meyers</last><affiliation>New York University</affiliation></author>
       <pages>61-70</pages>
       <abstract>The prevalence of social media presents a growing opportunity to collect and analyse examples of English varieties. Whilst usage of these varieties is often used only in spoken contexts or hard-to-access private messages, social media sites like Twitter provide a platform for users to communicate informally in a scrapeable format. Notably, Indian English (Hinglish), Singaporean English (Singlish), and African-American English (AAE) can be commonly found online. These varieties pose a challenge to existing natural language processing (NLP) tools as they often differ orthographically and syntactically from standard English for which the majority of these tools are built. NLP models trained on standard English texts produced biased outcomes for users of underrepresented varieties (Blodgett and O’Connor, 2017). Some research has aimed to overcome the inherent biases caused by unrepresentative data through techniques like data augmentation or adjusting training models. We aim to address the issue of bias at its root - the data itself. We curate a dataset of tweets from countries with high proportions of underserved English variety speakers, and propose an annotation framework of six categorical classifications along a pseudo-spectrum that measures the degree of standard English and that thereby indirectly aims to surface the manifestations of English varieties in these tweets.</abstract>
       <url hash="64a23995">2024.law-1.6</url>
@@ -87,9 +87,9 @@
     <paper id="7">
       <title>Building a corpus for the anonymization of <fixed-case>R</fixed-case>omanian jurisprudence</title>
       <author><first>Vasile</first><last>Păiș</last><affiliation>Research Institute for Artificial Intelligence, Romanian Academy</affiliation></author>
-      <author><first>Dan</first><last>Tufis</last><affiliation>Research Institute for Artificial Intelligence, Romanian Academy</affiliation></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufis</last><affiliation>Research Institute for Artificial Intelligence, Romanian Academy</affiliation></author>
       <author><first>Elena</first><last>Irimia</last><affiliation>Research Institute for Artificial Intelligence, Romanian Academy (RACAI)</affiliation></author>
-      <author><first>Verginica</first><last>Barbu Mititelu</last><affiliation>RACAI</affiliation></author>
+      <author id="verginica-barbu-mititelu"><first>Verginica</first><last>Barbu Mititelu</last><affiliation>RACAI</affiliation></author>
       <pages>71-76</pages>
       <abstract>Access to jurisprudence is of paramount importance for both law professionals (judges, lawyers, law students) and for the larger public. In Romania, the Superior Council of Magistracy holds a large database of jurisprudence from different courts in the country, which is updated daily. However, granting public access requires its anonymization. This paper presents the efforts behind building a corpus for the anonymization process. We present the annotation scheme, the manual annotation methods, and the platform used.</abstract>
       <url hash="2db28bf2">2024.law-1.7</url>
@@ -144,7 +144,7 @@
     <paper id="12">
       <title>Are You Serious? Handling Disagreement When Annotating Conspiracy Theory Texts</title>
       <author><first>Ashley</first><last>Hemm</last><affiliation>University of Miami</affiliation></author>
-      <author><first>Sandra</first><last>Kübler</last><affiliation>Indiana University</affiliation></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last><affiliation>Indiana University</affiliation></author>
       <author><first>Michelle</first><last>Seelig</last><affiliation>University of Miami</affiliation></author>
       <author><first>John</first><last>Funchion</last><affiliation>University of Miami</affiliation></author>
       <author><first>Manohar</first><last>Murthi</last><affiliation>University of Miami</affiliation></author>
@@ -161,7 +161,7 @@
       <title>A <fixed-case>GPT</fixed-case> among Annotators: <fixed-case>LLM</fixed-case>-based Entity-Level Sentiment Annotation</title>
       <author><first>Egil</first><last>Rønningstad</last><affiliation>University of Oslo</affiliation></author>
       <author><first>Erik</first><last>Velldal</last><affiliation>University of Oslo</affiliation></author>
-      <author><first>Lilja</first><last>Øvrelid</last><affiliation>Dept of Informatics, University of Oslo</affiliation></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last><affiliation>Dept of Informatics, University of Oslo</affiliation></author>
       <pages>133-139</pages>
       <abstract>We investigate annotator variation for the novel task of Entity-Level Sentiment Analysis (ELSA) which annotates the aggregated sentiment directed towards volitional entities in a text. More specifically, we analyze the annotations of a newly constructed Norwegian ELSA dataset and release additional data with each annotator’s labels for the 247 entities in the dataset’s test split. We also perform a number of experiments prompting ChatGPT for these sentiment labels regarding each entity in the text and compare the generated annotations with the human labels. Cohen’s Kappa for agreement between the best LLM-generated labels and curated gold was 0.425, which indicates that these labels would not have high quality. Our analyses further investigate the errors that ChatGPT outputs, and compare them with the variations that we find among the 5 trained annotators that all annotated the same test data.</abstract>
       <url hash="86abc0d1">2024.law-1.13</url>
@@ -196,11 +196,11 @@
       <author><first>Kathryn</first><last>Conger</last><affiliation>Universitiy of Colorado, Boulder</affiliation></author>
       <author><first>Anatole</first><last>Gershman</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Mahir</first><last>Morshed</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
-      <author><first>Susan Windisch</first><last>Brown</last><affiliation>University of Colorado at Boulder</affiliation></author>
-      <author><first>James</first><last>Pustejovsky</last><affiliation>Brandeis University</affiliation></author>
+      <author id="susan-windisch-brown"><first>Susan Windisch</first><last>Brown</last><affiliation>University of Colorado at Boulder</affiliation></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last><affiliation>Brandeis University</affiliation></author>
       <author><first>Rosario</first><last>Uceda-Sosa</last><affiliation>IBM Research</affiliation></author>
       <author><first>Sijia</first><last>Ge</last><affiliation>University of Colorado-Boulder</affiliation></author>
-      <author><first>Martha</first><last>Palmer</last><affiliation>University of Colorado</affiliation></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last><affiliation>University of Colorado</affiliation></author>
       <pages>166-175</pages>
       <abstract>This paper presents the first integration of PropBank role information into Wikidata, in order to provide a novel resource for information extraction, one combining Wikidata’s ontological metadata with PropBank’s rich argument structure encoding for event classes. We discuss a technique for PropBank augmentation to existing eventive Wikidata items, as well as identification of gaps in Wikidata’s coverage based on manual examination of over 11,300 PropBank rolesets. We propose five new Wikidata properties to integrate PropBank structure into Wikidata so that the annotated mappings can be added en masse. We then outline the methodology and challenges of this integration, including annotation with the combined resources.</abstract>
       <url hash="122b3ac7">2024.law-1.16</url>
@@ -234,7 +234,7 @@
       <author><first>Leon</first><last>Weber</last><affiliation>LMU Munich</affiliation></author>
       <author><first>Robert</first><last>Litschko</last><affiliation>LMU Munich</affiliation></author>
       <author><first>Ekaterina</first><last>Artemova</last><affiliation>Toloka.AI</affiliation></author>
-      <author><first>Barbara</first><last>Plank</last><affiliation>LMU Munich</affiliation></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last><affiliation>LMU Munich</affiliation></author>
       <pages>197-215</pages>
       <abstract>Instruction tuning has become an integral part of training pipelines for Large Language Models (LLMs) and has been shown to yield strong performance gains. In an orthogonal line of research, Annotation Error Detection (AED) has emerged as a tool for detecting quality problems in gold standard labels. So far, however, the application of AED methods has been limited to classification tasks. It is an open question how well AED methods generalize to language generation settings, which are becoming more widespread via LLMs. In this paper, we present a first and novel benchmark for AED on instruction tuning data: Donkii.It comprises three instruction-tuning datasets enriched with error annotations by experts and semi-automatic methods. We also provide a novel taxonomy of error types for instruction-tuning data.We find that all three datasets contain clear errors, which sometimes propagate directly into instruction-tuned LLMs. We propose four AED baselines for the generative setting and evaluate them extensively on the newly introduced dataset. Our results show that the choice of the right AED method and model size is indeed crucial and derive practical recommendations for how to use AED methods to clean instruction-tuning data.</abstract>
       <url hash="bc0ad0ac">2024.law-1.19</url>
@@ -245,7 +245,7 @@
       <title><fixed-case>EEVEE</fixed-case>: An Easy Annotation Tool for Natural Language Processing</title>
       <author><first>Axel</first><last>Sorensen</last><affiliation>IT University of Copenhagen</affiliation></author>
       <author><first>Siyao</first><last>Peng</last><affiliation>LMU Munich</affiliation></author>
-      <author><first>Barbara</first><last>Plank</last><affiliation>LMU Munich</affiliation></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last><affiliation>LMU Munich</affiliation></author>
       <author><first>Rob</first><last>Van Der Goot</last><affiliation>IT University of Copenhagen</affiliation></author>
       <pages>216-221</pages>
       <abstract>Annotation tools are the starting point for creating Natural Language Processing (NLP) datasets. There is a wide variety of tools available; setting up these tools is however a hindrance. We propose Eevee, an annotation tool focused on simplicity, efficiency, and ease of use. It can run directly in the browser (no setup required) and uses tab-separated files (as opposed to character offsets or task-specific formats) for annotation. It allows for annotation of multiple tasks on a single dataset and supports four task-types: sequence labeling, span labeling, text classification and seq2seq.</abstract>
diff --git a/data/xml/2024.lchange.xml b/data/xml/2024.lchange.xml
index f075328c7f..0b5c5cfe4b 100644
--- a/data/xml/2024.lchange.xml
+++ b/data/xml/2024.lchange.xml
@@ -79,10 +79,10 @@
     <paper id="6">
       <title>Towards a <fixed-case>G</fixed-case>olden<fixed-case>H</fixed-case>ymns Dataset for Studying Diachronic Trends in 19th Century <fixed-case>D</fixed-case>anish Religious Hymns</title>
       <author><first>Ea</first><last>Lindhardt Overgaard</last><affiliation>Aarhus University</affiliation></author>
-      <author><first>Pascale</first><last>Feldkamp</last><affiliation>Aarhus University</affiliation></author>
+      <author id="pascale-feldkamp"><first>Pascale</first><last>Feldkamp</last><affiliation>Aarhus University</affiliation></author>
       <author><first>Yuri</first><last>Bizzoni</last><affiliation>Aarhus University</affiliation></author>
       <pages>55-61</pages>
-      <abstract/>
+      <abstract></abstract>
       <url hash="5dc8f8b8">2024.lchange-1.6</url>
       <bibkey>lindhardt-overgaard-etal-2024-towards</bibkey>
       <doi>10.18653/v1/2024.lchange-1.6</doi>
@@ -144,9 +144,9 @@
     <paper id="12">
       <title><fixed-case>E</fixed-case>tymo<fixed-case>L</fixed-case>ink: A Structured <fixed-case>E</fixed-case>nglish Etymology Dataset</title>
       <author><first>Yuan</first><last>Gao</last><affiliation>University of Cambridge</affiliation></author>
-      <author><first>Weiwei</first><last>Sun</last><affiliation>University of Cambridge</affiliation></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last><affiliation>University of Cambridge</affiliation></author>
       <pages>126-136</pages>
-      <abstract/>
+      <abstract></abstract>
       <url hash="183478e8">2024.lchange-1.12</url>
       <bibkey>gao-sun-2024-etymolink</bibkey>
       <doi>10.18653/v1/2024.lchange-1.12</doi>
@@ -168,9 +168,9 @@
       <author><first>Robin</first><last>Cooper</last><affiliation>University of Gothenburg</affiliation></author>
       <author><first>Elina</first><last>Lindgren</last><affiliation>University of Gothenburg</affiliation></author>
       <author><first>Gregor</first><last>Rettenegger</last><affiliation>University of Gothenburg</affiliation></author>
-      <author><first>Asad</first><last>Sayeed</last><affiliation>University of Gothenburg</affiliation></author>
+      <author id="asad-sayeed"><first>Asad</first><last>Sayeed</last><affiliation>University of Gothenburg</affiliation></author>
       <pages>144-157</pages>
-      <abstract/>
+      <abstract></abstract>
       <url hash="c1a2429b">2024.lchange-1.14</url>
       <bibkey>boholm-etal-2024-political</bibkey>
       <doi>10.18653/v1/2024.lchange-1.14</doi>
@@ -190,9 +190,9 @@
       <title>Deep-change at <fixed-case>AXOLOTL</fixed-case>-24: Orchestrating <fixed-case>WSD</fixed-case> and <fixed-case>WSI</fixed-case> Models for Semantic Change Modeling</title>
       <author><first>Denis</first><last>Kokosinskii</last><affiliation>Moscow State University and SaluteDevices</affiliation></author>
       <author><first>Mikhail</first><last>Kuklin</last><affiliation>Moscow State University and Yandex</affiliation></author>
-      <author><first>Nikolay</first><last>Arefyev</last><affiliation>University of Oslo</affiliation></author>
+      <author id="nikolay-arefyev"><first>Nikolay</first><last>Arefyev</last><affiliation>University of Oslo</affiliation></author>
       <pages>168-179</pages>
-      <abstract/>
+      <abstract></abstract>
       <url hash="6451ba2b">2024.lchange-1.16</url>
       <bibkey>kokosinskii-etal-2024-deep</bibkey>
       <doi>10.18653/v1/2024.lchange-1.16</doi>
diff --git a/data/xml/2024.ldl.xml b/data/xml/2024.ldl.xml
index 213a429b16..2470ff2004 100644
--- a/data/xml/2024.ldl.xml
+++ b/data/xml/2024.ldl.xml
@@ -6,8 +6,8 @@
       <editor><first>Christian</first><last>Chiarcos</last></editor>
       <editor><first>Katerina</first><last>Gkirtzou</last></editor>
       <editor><first>Maxim</first><last>Ionov</last></editor>
-      <editor><first>Fahad</first><last>Khan</last></editor>
-      <editor><first>John P.</first><last>McCrae</last></editor>
+      <editor id="fahad-khan"><first>Fahad</first><last>Khan</last></editor>
+      <editor id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></editor>
       <editor><first>Elena Montiel</first><last>Ponsoda</last></editor>
       <editor><first>Patricia Martín</first><last>Chozas</last></editor>
       <publisher>ELRA and ICCL</publisher>
@@ -31,7 +31,7 @@
       <author><first>Giedre</first><last>Valunaite Oleskeviciene</last></author>
       <author><first>Elena-Simona</first><last>Apostol</last></author>
       <author><first>Ciprian-Octavian</first><last>Truica</last></author>
-      <author><first>Daniela</first><last>Gifu</last></author>
+      <author id="daniela-gifu"><first>Daniela</first><last>Gifu</last></author>
       <pages>1–10</pages>
       <abstract>This article proposes a linguistic linked open data model for diachronic analysis (LLODIA) that combines data derived from diachronic analysis of multilingual corpora with dictionary-based evidence. A humanities use case was devised as a proof of concept that includes examples in five languages (French, Hebrew, Latin, Lithuanian and Romanian) related to various meanings of the term “revolution” considered at different time intervals. The examples were compiled through diachronic word embedding and dictionary alignment.</abstract>
       <url hash="d291e3f7">2024.ldl-1.1</url>
@@ -84,7 +84,7 @@
       <author><first>Rute</first><last>Costa</last></author>
       <author><first>Chamila</first><last>Liyanage</last></author>
       <author><first>John P.</first><last>McCrae</last></author>
-      <author><first>Atul Kr.</first><last>Ojha</last></author>
+      <author id="atul-kr-ojha"><first>Atul Kr.</first><last>Ojha</last></author>
       <author><first>Priya</first><last>Rani</last></author>
       <author><first>Francesca</first><last>Frontini</last></author>
       <pages>44–48</pages>
@@ -164,7 +164,7 @@
     </paper>
     <paper id="12">
       <title>Linguistic <fixed-case>LOD</fixed-case> for Interoperable Morphological Description</title>
-      <author><first>Michael</first><last>Rosner</last></author>
+      <author id="michael-rosner"><first>Michael</first><last>Rosner</last></author>
       <author><first>Maxim</first><last>Ionov</last></author>
       <pages>94–102</pages>
       <abstract>Interoperability is a characteristic of a product or system that seamlessly works with another product or system and implies a certain level of independence from the context of use. Turning to language resources, interoperability is frequently cited as one important rationale underlying the use of LLOD representations and is generally regarded as highly desirable. In this paper we further elaborate this theme, distinguishing three different kinds of interoperability providing practical implementations with examples from morphology.</abstract>
@@ -181,7 +181,7 @@
     </paper>
     <paper id="14">
       <title><fixed-case>O</fixed-case>nto<fixed-case>L</fixed-case>ex Publication Made Easy: A Dataset of Verbal Aspectual Pairs for <fixed-case>B</fixed-case>osnian, <fixed-case>C</fixed-case>roatian and <fixed-case>S</fixed-case>erbian</title>
-      <author><first>Ranka</first><last>Stanković</last></author>
+      <author id="ranka-stankovic"><first>Ranka</first><last>Stanković</last></author>
       <author><first>Maxim</first><last>Ionov</last></author>
       <author><first>Medina</first><last>Bajtarević</last></author>
       <author><first>Lorena</first><last>Ninčević</last></author>
@@ -192,7 +192,7 @@
     </paper>
     <paper id="15">
       <title>Towards Semantic Interoperability: Parallel Corpora as Linked Data Incorporating Named Entity Linking</title>
-      <author><first>Ranka</first><last>Stanković</last></author>
+      <author id="ranka-stankovic"><first>Ranka</first><last>Stanković</last></author>
       <author><first>Milica</first><last>Ikonić Nešić</last></author>
       <author><first>Olja</first><last>Perisic</last></author>
       <author><first>Mihailo</first><last>Škorić</last></author>
diff --git a/data/xml/2024.legal.xml b/data/xml/2024.legal.xml
index abc142a4b8..38cd29a07d 100644
--- a/data/xml/2024.legal.xml
+++ b/data/xml/2024.legal.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the Workshop on Legal and Ethical Issues in Human Language Technologies @ LREC-COLING 2024</booktitle>
       <editor><first>Ingo</first><last>Siegert</last></editor>
-      <editor><first>Khalid</first><last>Choukri</last></editor>
+      <editor id="khalid-choukri"><first>Khalid</first><last>Choukri</last></editor>
       <publisher>ELRA and ICCL</publisher>
       <address>Torino, Italia</address>
       <month>May</month>
@@ -79,7 +79,7 @@
     </paper>
     <paper id="7">
       <title>Selling Personal Information: Data Brokers and the Limits of <fixed-case>US</fixed-case> Regulation</title>
-      <author><first>Denise</first><last>DiPersio</last></author>
+      <author id="denise-dipersio"><first>Denise</first><last>DiPersio</last></author>
       <pages>39–46</pages>
       <abstract>A principal pillar of the US Blueprint for an AI Bill of Rights is data privacy, specifically, that individuals should be protected from abusive practices by data collectors and data aggregators, and that users should have control over how their personal information is collected and used. An area that spotlights the need for such protections is found in the common practices of data brokers who scrape, purchase, process and reassemble personal information in bulk and sell it for a variety of downstream uses. Such activities almost always occur in the absence of users’ knowledge or meaningful consent, yet they are legal under US law. This paper examines how data brokers operate, provides some examples of recent US regulatory actions taken against them, summarizes federal efforts to redress data broker practices and concludes that as long as there continues to be no comprehensive federal data protection and privacy scheme, efforts to control such behavior will have only a limited effect. This paper also addresses the limits of informed consent on the use of personal information in language resources and suggests a solution in an holistic approach to data protection and privacy across the data/development life cycle.</abstract>
       <url hash="497860df">2024.legal-1.7</url>
diff --git a/data/xml/2024.loresmt.xml b/data/xml/2024.loresmt.xml
index 951fc348aa..e9a6c8cbe7 100644
--- a/data/xml/2024.loresmt.xml
+++ b/data/xml/2024.loresmt.xml
@@ -3,12 +3,12 @@
   <volume id="1" ingest-date="2024-07-26" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Seventh Workshop on Technologies for Machine Translation of Low-Resource Languages (LoResMT 2024)</booktitle>
-      <editor><first>Atul Kr.</first><last>Ojha</last></editor>
+      <editor id="atul-kr-ojha"><first>Atul Kr.</first><last>Ojha</last></editor>
       <editor><first>Chao-hong</first><last>Liu</last></editor>
       <editor><first>Ekaterina</first><last>Vylomova</last></editor>
       <editor><first>Flammie</first><last>Pirinen</last></editor>
       <editor><first>Jade</first><last>Abbott</last></editor>
-      <editor><first>Jonathan</first><last>Washington</last></editor>
+      <editor id="jonathan-washington"><first>Jonathan</first><last>Washington</last></editor>
       <editor><first>Nathaniel</first><last>Oco</last></editor>
       <editor><first>Valentin</first><last>Malykh</last></editor>
       <editor><first>Varvara</first><last>Logacheva</last></editor>
@@ -62,7 +62,7 @@
       <title>Challenges in <fixed-case>U</fixed-case>rdu Machine Translation</title>
       <author><first>Abdul</first><last>Basit</last><affiliation>Lahore University of Management Sciences</affiliation></author>
       <author><first>Abdul Hameed</first><last>Azeemi</last><affiliation>Lahore University of Management Sciences</affiliation></author>
-      <author><first>Agha Ali</first><last>Raza</last><affiliation>Lahore University of Management Sciences</affiliation></author>
+      <author id="agha-ali-raza"><first>Agha Ali</first><last>Raza</last><affiliation>Lahore University of Management Sciences</affiliation></author>
       <pages>44-49</pages>
       <abstract>Recent advancements in Neural Machine Translation (NMT) systems have significantly improved model performance on various translation benchmarks. However, these systems still face numerous challenges when translating low-resource languages such as Urdu. In this work, we highlight the specific issues faced by machine translation systems when translating Urdu language. We first conduct a comprehensive evaluation of English to Urdu Machine Translation with four diverse models: GPT-3.5 (a large language model), opus-mt-en-ur (a bilingual translation model), NLLB (a model trained for translating 200 languages), and IndicTrans2 (a specialized model for translating low-resource Indic languages). The results demonstrate that IndicTrans2 significantly outperforms other models in Urdu Machine Translation. To understand the differences in the performance of these models, we analyze the Urdu word distribution in different training datasets and compare the training methodologies. Finally, we uncover the specific translation issues and provide suggestions for improvements in Urdu machine translation systems.</abstract>
       <url hash="e7838117">2024.loresmt-1.4</url>
@@ -122,7 +122,7 @@
       <title>Enhancing <fixed-case>T</fixed-case>urkish Word Segmentation: A Focus on Borrowed Words and Invalid Morpheme</title>
       <author><first>Soheila</first><last>Behrooznia</last></author>
       <author><first>Ebrahim</first><last>Ansari</last><affiliation>Zanjan Institute for Advanced Studies in Basic Sciences</affiliation></author>
-      <author><first>Zdenek</first><last>Zabokrtsky</last><affiliation>Faculty of Mathematics and Physics, Charles University Prague</affiliation></author>
+      <author id="zdenek-zabokrtsky"><first>Zdenek</first><last>Zabokrtsky</last><affiliation>Faculty of Mathematics and Physics, Charles University Prague</affiliation></author>
       <pages>85-93</pages>
       <abstract>This study addresses a challenge in morphological segmentation: accurately segmenting words in languages with rich morphology. Current probabilistic methods, such as Morfessor, often produce results that lack consistency with human-segmented words. Our study adds some steps to the Morfessor segmentation process to consider invalid morphemes and borrowed words from other languages to improve morphological segmentation significantly. Comparing our idea to the results obtained from Morfessor demonstrates its efficiency, leading to more accurate morphology segmentation. This is particularly evident in the case of Turkish, highlighting the potential for further advancements in morpheme segmentation for morphologically rich languages.</abstract>
       <url hash="cf09e57e">2024.loresmt-1.9</url>
@@ -205,7 +205,7 @@
     <paper id="16">
       <title>Adopting Ensemble Learning for Cross-lingual Classification of Crisis-related Text On Social Media</title>
       <author><first>Shareefa</first><last>Al Amer</last></author>
-      <author><first>Mark</first><last>Lee</last></author>
+      <author id="mark-lee"><first>Mark</first><last>Lee</last></author>
       <author><first>Phillip</first><last>Smith</last><affiliation>University of Birmingham</affiliation></author>
       <pages>159-165</pages>
       <abstract>Cross-lingual classification poses a significant challenge in Natural Language Processing (NLP), especially when dealing with languages with scarce training data. This paper delves into the adaptation of ensemble learning to address this challenge, specifically for disaster-related social media texts. Initially, we employ Machine Translation to generate a parallel corpus in the target language to mitigate the issue of data scarcity and foster a robust training environment. Following this, we implement the bagging ensemble technique, integrating multiple classifiers into a cohesive model that demonstrates enhanced performance over individual classifiers. Our experimental results reveal significant improvements in adapting models for Arabic, utilising only English training data and markedly outperforming models intended for linguistically similar languages to English, with our ensemble model achieving an accuracy and F1 score of 0.78 when tested on original Arabic data. This research makes a substantial contribution to the field of cross-lingual classification, establishing a new benchmark for enhancing the effectiveness of language transfer in linguistically challenging scenarios.</abstract>
diff --git a/data/xml/2024.lrec.xml b/data/xml/2024.lrec.xml
index 5a446fd00d..bbca69b970 100644
--- a/data/xml/2024.lrec.xml
+++ b/data/xml/2024.lrec.xml
@@ -3,9 +3,9 @@
   <volume id="main" ingest-date="2024-05-17" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</booktitle>
-      <editor><first>Nicoletta</first><last>Calzolari</last></editor>
+      <editor id="nicoletta-calzolari"><first>Nicoletta</first><last>Calzolari</last></editor>
       <editor><first>Min-Yen</first><last>Kan</last></editor>
-      <editor><first>Veronique</first><last>Hoste</last></editor>
+      <editor id="veronique-hoste"><first>Veronique</first><last>Hoste</last></editor>
       <editor><first>Alessandro</first><last>Lenci</last></editor>
       <editor><first>Sakriani</first><last>Sakti</last></editor>
       <editor><first>Nianwen</first><last>Xue</last></editor>
@@ -40,8 +40,8 @@
     <paper id="2">
       <title>A Benchmark Evaluation of Clinical Named Entity Recognition in <fixed-case>F</fixed-case>rench</title>
       <author><first>Nesrine</first><last>Bannour</last></author>
-      <author><first>Christophe</first><last>Servan</last></author>
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="christophe-servan"><first>Christophe</first><last>Servan</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <author><first>Xavier</first><last>Tannier</last></author>
       <pages>14–21</pages>
       <abstract>Background: Transformer-based language models have shown strong performance on many Natural Language Processing (NLP) tasks. Masked Language Models (MLMs) attract sustained interest because they can be adapted to different languages and sub-domains through training or fine-tuning on specific corpora while remaining lighter than modern Large Language Models (MLMs). Recently, several MLMs have been released for the biomedical domain in French, and experiments suggest that they outperform standard French counterparts. However, no systematic evaluation comparing all models on the same corpora is available. Objective: This paper presents an evaluation of masked language models for biomedical French on the task of clinical named entity recognition. Material and methods: We evaluate biomedical models CamemBERT-bio and DrBERT and compare them to standard French models CamemBERT, FlauBERT and FrAlBERT as well as multilingual mBERT using three publically available corpora for clinical named entity recognition in French. The evaluation set-up relies on gold-standard corpora as released by the corpus developers. Results: Results suggest that CamemBERT-bio outperforms DrBERT consistently while FlauBERT offers competitive performance and FrAlBERT achieves the lowest carbon footprint. Conclusion: This is the first benchmark evaluation of biomedical masked language models for French clinical entity recognition that compares model performance consistently on nested entity recognition using metrics covering performance and environmental impact.</abstract>
@@ -67,7 +67,7 @@
       <title><fixed-case>ABLE</fixed-case>: Agency-<fixed-case>B</fixed-case>e<fixed-case>L</fixed-case>iefs Embedding to Address Stereotypical Bias through Awareness Instead of Obliviousness</title>
       <author><first>Michelle YoungJin</first><last>Kim</last></author>
       <author><first>Junghwan</first><last>Kim</last></author>
-      <author><first>Kristen</first><last>Johnson</last></author>
+      <author id="kristen-johnson"><first>Kristen</first><last>Johnson</last></author>
       <pages>43–56</pages>
       <abstract>Natural Language Processing (NLP) models tend to inherit and amplify stereotypical biases present in their training data, leading to harmful societal consequences. Current efforts to rectify these biases typically revolve around making models oblivious to bias, which is at odds with the idea that humans require increased awareness to tackle these biases better. This prompts a fundamental research question: are bias-oblivious models the only viable solution to combat stereotypical biases? This paper answers this question by proposing the Agency-BeLiefs Embedding (ABLE) model, a novel approach that actively encodes stereotypical biases into the embedding space. ABLE draws upon social psychological theory to acquire and represent stereotypical biases in the form of agency and belief scores rather than directly representing stereotyped groups. Our experimental results showcase ABLE’s effectiveness in learning agency and belief stereotypes while preserving the language model’s proficiency. Furthermore, we underscore the practical significance of incorporating stereotypes within the ABLE model by demonstrating its utility in various downstream tasks. Our approach exemplifies the potential benefits of addressing bias through awareness, as opposed to the prevailing approach of mitigating bias through obliviousness.</abstract>
       <url hash="c457a9ea">2024.lrec-main.4</url>
@@ -101,9 +101,9 @@
       <author><first>Jungo</first><last>Kasai</last></author>
       <author><first>Keisuke</first><last>Sakaguchi</last></author>
       <author><first>Ronan</first><last>Le Bras</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <author><first>Yejin</first><last>Choi</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>77–90</pages>
       <abstract>Text generation with beam search has proven successful in a wide range of applications. We point out that, though largely overlooked in the literature, the commonly-used implementation of beam decoding (e.g., Hugging Face Transformers and fairseq) uses a first come, first served heuristic: it keeps a set of already completed sequences over time steps and stops when the size of this set reaches the beam size. Based on this finding, we introduce a patience factor, a simple modification to this beam decoding implementation, that generalizes the stopping criterion and provides flexibility to the depth of search. Empirical results demonstrate that adjusting this patience factor improves decoding performance of strong pretrained models on news text summarization and machine translation over diverse language pairs, with a negligible inference slowdown. Our approach only modifies one line of code and can be thus readily incorporated in any implementation. Further, we find that different versions of beam decoding result in large performance differences in summarization, demonstrating the need for clarity in specifying the beam search implementation in research work. Our code will be available upon publication.</abstract>
       <url hash="33231929">2024.lrec-main.7</url>
@@ -111,7 +111,7 @@
     </paper>
     <paper id="8">
       <title>A Canonical Form for Flexible Multiword Expressions</title>
-      <author><first>Jan</first><last>Odijk</last></author>
+      <author id="jan-odijk"><first>Jan</first><last>Odijk</last></author>
       <author><first>Martin</first><last>Kroon</last></author>
       <pages>91–101</pages>
       <abstract>This paper proposes a canonical form for Multiword Expressions (MWEs), in particular for the Dutch language. The canonical form can be enriched with all kinds of annotations that can be used to describe the properties of the MWE and its components. It also introduces the DUCAME (DUtch CAnonical Multiword Expressions) lexical resource with more than 11k MWEs in canonical form. DUCAME is used in MWE-Finder to automatically generate queries for searching for flexible MWEs in large text corpora.</abstract>
@@ -160,7 +160,7 @@
     <paper id="12">
       <title>A Closer Look at Clustering Bilingual Comparable Corpora</title>
       <author><first>Anna</first><last>Laskina</last></author>
-      <author><first>Eric</first><last>Gaussier</last></author>
+      <author id="eric-gaussier"><first>Eric</first><last>Gaussier</last></author>
       <author><first>Gaelle</first><last>Calvary</last></author>
       <pages>133–142</pages>
       <abstract>We study in this paper the problem of clustering comparable corpora, building upon the observation that different types of clusters can be present in such corpora: monolingual clusters comprising documents in a single language, and bilingual or multilingual clusters comprising documents written in different languages. Based on a state-of-the-art deep variant of Kmeans, we propose new clustering models fully adapted to comparable corpora and illustrate their behavior on several bilingual collections (in English, French, German and Russian) created from Wikipedia.</abstract>
@@ -178,7 +178,7 @@
     </paper>
     <paper id="14">
       <title>A Collection of Pragmatic-Similarity Judgments over Spoken Dialog Utterances</title>
-      <author><first>Nigel</first><last>Ward</last></author>
+      <author id="nigel-ward"><first>Nigel</first><last>Ward</last></author>
       <author><first>Divette</first><last>Marco</last></author>
       <pages>154–163</pages>
       <abstract>Automatic measures of similarity between sentences or utterances are invaluable for training speech synthesizers, evaluating machine translation, and assessing learner productions. While there exist measures for semantic similarity and prosodic similarity, there are as yet none for pragmatic similarity. To enable the training of such measures, we developed the first collection of human judgments of pragmatic similarity between utterance pairs. 9 judges listened to 220 utterance pairs, each consisting of an utterance extracted from a recorded dialog and a re-enactment of that utterance under various conditions designed to create various degrees of similarity. Each pair was rated on a continuous scale. The average inter-judge correlation was 0.45. We make this data available at https://github.com/divettemarco/PragSim .</abstract>
@@ -247,8 +247,8 @@
     </paper>
     <paper id="20">
       <title>A Computational Model of <fixed-case>L</fixed-case>atvian Morphology</title>
-      <author><first>Peteris</first><last>Paikens</last></author>
-      <author><first>Lauma</first><last>Pretkalniņa</last></author>
+      <author id="peteris-paikens"><first>Peteris</first><last>Paikens</last></author>
+      <author id="lauma-pretkalnina"><first>Lauma</first><last>Pretkalniņa</last></author>
       <author><first>Laura</first><last>Rituma</last></author>
       <pages>221–232</pages>
       <abstract>In this paper we describe a computational model of Latvian morphology that provides a formal structure for Latvian word form inflection and has been implemented in software for generation, analysis and lemmatization of Latvian word forms. The work was motivated by the need for a NLP inflection model that can cover all the complexity of Latvian language and explicitly enumerate and handle the many exceptions to the general Latvian inflection principles. This is an evolution of earlier work, extending the initial proof of concept model to properly cover Latvian language. We provide a set of morphological paradigms that differ from current linguistic tradition, a set of systematic stem changes and combine it with an extensive lexicon that includes paradigm information and structured morphological attributes for 118 000 lexemes. This model has been applied on both dictionary and corpora data, demonstrating that it provides a good coverage for modern Latvian literary language. We also consider that there is a good potential to extend this also to the related Latgalian language.</abstract>
@@ -258,7 +258,7 @@
     <paper id="21">
       <title>A Concept Based Approach for Translation of Medical Dialogues into Pictographs</title>
       <author><first>Johanna</first><last>Gerlach</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Jonathan</first><last>Mutal</last></author>
       <author><first>Hervé</first><last>Spechbach</last></author>
       <pages>233–242</pages>
@@ -268,7 +268,7 @@
     </paper>
     <paper id="22">
       <title>A Construction Grammar Corpus of Varying Schematicity: A Dataset for the Evaluation of Abstractions in Language Models</title>
-      <author><first>Claire</first><last>Bonial</last></author>
+      <author id="claire-bonial"><first>Claire</first><last>Bonial</last></author>
       <author><first>Harish</first><last>Tayyar Madabushi</last></author>
       <pages>243–255</pages>
       <abstract>Large Language Models (LLMs) have been developed without a theoretical framework, yet we posit that evaluating and improving LLMs will benefit from the development of theoretical frameworks that enable comparison of the structures of human language and the model of language built up by LLMs through the processing of text. In service of this goal, we develop the Construction Grammar Schematicity (“CoGS”) corpus of 10 distinct English constructions, where the constructions vary with respect to schematicity, or in other words the level to which constructional slots require specific, fixed lexical items, or can be filled with a variety of elements that fulfill a particular semantic role of the slot. Our corpus constructions are carefully curated to range from substantive, frozen constructions (e.g., Let-alone) to entirely schematic constructions (e.g., Resultative). The corpus was collected to allow us to probe LLMs for constructional information at varying levels of abstraction. We present our own probing experiments using this corpus, which clearly demonstrate that even the largest LLMs are limited to more substantive constructions and do not exhibit recognition of the similarity of purely schematic constructions. We publicly release our dataset, prompts, and associated model responses.</abstract>
@@ -279,7 +279,7 @@
       <title>A Controlled Reevaluation of Coreference Resolution Models</title>
       <author><first>Ian</first><last>Porada</last></author>
       <author><first>Xiyuan</first><last>Zou</last></author>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <pages>256–263</pages>
       <abstract>All state-of-the-art coreference resolution (CR) models involve finetuning a pretrained language model. Whether the superior performance of one CR model over another is due to the choice of language model or other factors, such as the task-specific architecture, is difficult or impossible to determine due to lack of a standardized experimental setup. To resolve this ambiguity, we systematically evaluate five CR models and control for certain design decisions including the pretrained language model used by each. When controlling for language model size, encoder-based CR models outperform more recent decoder-based models in terms of both accuracy and inference speed. Surprisingly, among encoder-based CR models, more recent models are not always more accurate, and the oldest CR model that we test generalizes the best to out-of-domain textual genres. We conclude that controlling for the choice of language model reduces most, but not all, of the increase in F1 score reported in the past five years.</abstract>
       <url hash="30a4d8d3">2024.lrec-main.23</url>
@@ -291,7 +291,7 @@
       <author><first>Peiyan</first><last>Wang</last></author>
       <author><first>Libang</first><last>Wang</last></author>
       <author><first>Danqingxin</first><last>Yang</last></author>
-      <author><first>Dongfeng</first><last>Cai</last></author>
+      <author id="dongfeng-cai"><first>Dongfeng</first><last>Cai</last></author>
       <pages>264–272</pages>
       <abstract>Manufacturing specifications are documents entailing different techniques, processes, and components involved in manufacturing. There is a growing demand for named entity recognition (NER) resources and techniques for manufacturing-specific named entities, with the development of smart manufacturing. In this paper, we introduce a corpus of Chinese manufacturing specifications, named MS-NERC, including 4,424 sentences and 16,383 entities. We also propose an entity recognizer named Trainable State Transducer (TST), which is initialized with a finite state transducer describing the morphological patterns of entities. It can directly recognize entities based on prior morphological knowledge without training. Experimental results show that TST achieves an overall 82.05% F1 score for morphological-specific entities in zero-shot. TST can be improved through training, the result of which outperforms neural methods in few-shot and rich-resource. We believe that our corpus and model will be valuable resources for NER research not only in manufacturing but also in other low-resource domains.</abstract>
       <url hash="8b2e0f88">2024.lrec-main.24</url>
@@ -363,7 +363,7 @@
       <title>Active Learning Design Choices for <fixed-case>NER</fixed-case> with Transformers</title>
       <author><first>Robert</first><last>Vacareanu</last></author>
       <author><first>Enrique</first><last>Noriega-Atala</last></author>
-      <author><first>Gus</first><last>Hahn-Powell</last></author>
+      <author id="gus-hahn-powell"><first>Gus</first><last>Hahn-Powell</last></author>
       <author><first>Marco A.</first><last>Valenzuela-Escarcega</last></author>
       <author><first>Mihai</first><last>Surdeanu</last></author>
       <pages>321–334</pages>
@@ -380,9 +380,9 @@
       <author><first>Severino</first><last>Da Dalt</last></author>
       <author><first>Joan</first><last>Llop</last></author>
       <author><first>Malte</first><last>Ostendorff</last></author>
-      <author><first>Pedro</first><last>Ortiz Suarez</last></author>
+      <author id="pedro-ortiz-suarez"><first>Pedro</first><last>Ortiz Suarez</last></author>
       <author><first>Georg</first><last>Rehm</last></author>
-      <author><first>Aitor</first><last>Gonzalez-Agirre</last></author>
+      <author id="aitor-gonzalez-agirre"><first>Aitor</first><last>Gonzalez-Agirre</last></author>
       <author><first>Marta</first><last>Villegas</last></author>
       <pages>335–349</pages>
       <abstract>We present and describe two language resources in this paper: CATalog 1.0, the largest text corpus in Catalan to date, and CURATE (Corpus Utility for RAting TExt), a modular, parallelizable pipeline used for processing and scoring documents based on text quality that we have optimised to run in High Performance Cluster (HPC) environments. In the coming sections we describe our data preprocessing pipeline at length; traditional pipelines usually implement a set of binary filters such that a given document is either in or out. In our experience with Catalan, in lower-resource settings it is more practical to instead assign a document a soft score to allow for more flexible decision-making. We describe how the document score is calculated and highlight its interpretability by showing that it is significantly correlated with human judgements as obtained from a comparative judgement experiment. We additionally describe the different subcorpora that make up CATalog 1.0.</abstract>
@@ -443,15 +443,15 @@
       <author><first>Shuntaro</first><last>Yada</last></author>
       <author><first>Cyril</first><last>Grouin</last></author>
       <author><first>Thomas</first><last>Lavergne</last></author>
-      <author><first>Aurélie</first><last>Névéol</last></author>
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <author><first>Philippe</first><last>Thomas</last></author>
       <author><first>Tomohiro</first><last>Nishiyama</last></author>
       <author><first>Sebastian</first><last>Möller</last></author>
       <author><first>Eiji</first><last>Aramaki</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <author><first>Roland</first><last>Roller</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <pages>395–414</pages>
       <abstract>User-generated data sources have gained significance in uncovering Adverse Drug Reactions (ADRs), with an increasing number of discussions occurring in the digital world. However, the existing clinical corpora predominantly revolve around scientific articles in English. This work presents a multilingual corpus of texts concerning ADRs gathered from diverse sources, including patient fora, social media, and clinical reports in German, French, and Japanese. Our corpus contains annotations covering 12 entity types, four attribute types, and 13 relation types. It contributes to the development of real-world multilingual language models for healthcare. We provide statistics to highlight certain challenges associated with the corpus and conduct preliminary experiments resulting in strong baselines for extracting entities and relations between these entities, both within and across languages.</abstract>
       <url hash="edaaab95">2024.lrec-main.36</url>
@@ -495,7 +495,7 @@
       <title>A Differentiable Integer Linear Programming Solver for Explanation-Based Natural Language Inference</title>
       <author><first>Mokanarangan</first><last>Thayaparan</last></author>
       <author><first>Marco</first><last>Valentino</last></author>
-      <author><first>André</first><last>Freitas</last></author>
+      <author id="andre-freitas"><first>André</first><last>Freitas</last></author>
       <pages>449–458</pages>
       <abstract>Integer Linear Programming (ILP) has been proposed as a formalism for encoding precise structural and semantic constraints for Natural Language Inference (NLI). However, traditional ILP frameworks are non-differentiable, posing critical challenges for the integration of continuous language representations based on deep learning. In this paper, we introduce a novel approach, named Diff-Comb Explainer, a neuro-symbolic architecture for explanation-based NLI based on Differentiable BlackBox Combinatorial Solvers (DBCS). Differently from existing neuro-symbolic solvers, Diff-Comb Explainer does not necessitate a continuous relaxation of the semantic constraints, enabling a direct, more precise, and efficient incorporation of neural representations into the ILP formulation. Our experiments demonstrate that Diff-Comb Explainer achieves superior performance when compared to conventional ILP solvers, neuro-symbolic black-box solvers, and Transformer-based encoders. Moreover, a deeper analysis reveals that Diff-Comb Explainer can significantly improve the precision, consistency, and faithfulness of the constructed explanations, opening new opportunities for research on neuro-symbolic architectures for explainable and transparent NLI in complex domains.</abstract>
       <url hash="579f3241">2024.lrec-main.40</url>
@@ -537,7 +537,7 @@
       <author><first>Weihao</first><last>Liu</last></author>
       <author><first>Xiaomin</first><last>Chu</last></author>
       <author><first>Peifeng</first><last>Li</last></author>
-      <author><first>Qiaoming</first><last>Zhu</last></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last></author>
       <author><first>Haizhou</first><last>Li</last></author>
       <pages>495–506</pages>
       <abstract>Topic segmentation and outline generation strive to divide a document into coherent topic sections and generate corresponding subheadings, unveiling the discourse topic structure of a document. Compared with sentence-level topic structure, the paragraph-level topic structure can quickly grasp and understand the overall context of the document from a higher level, benefitting many downstream tasks such as summarization, discourse parsing, and information retrieval. However, the lack of large-scale, high-quality Chinese paragraph-level topic structure corpora restrained relative research and applications. To fill this gap, we build the Chinese paragraph-level topic representation, corpus, and benchmark in this paper. Firstly, we propose a hierarchical paragraph-level topic structure representation with three layers to guide the corpus construction. Then, we employ a two-stage man-machine collaborative annotation method to construct the largest Chinese Paragraph-level Topic Structure corpus (CPTS), achieving high quality. We also build several strong baselines, including ChatGPT, to validate the computability of CPTS on two fundamental tasks (topic segmentation and outline generation) and preliminarily verified its usefulness for the downstream task (discourse parsing).</abstract>
@@ -609,7 +609,7 @@
       <author><first>Anoop</first><last>Kumar</last></author>
       <author><first>Aram</first><last>Galstyan</last></author>
       <author><first>Heng</first><last>Ji</last></author>
-      <author><first>Prem</first><last>Natarajan</last></author>
+      <author id="prem-natarajan"><first>Prem</first><last>Natarajan</last></author>
       <pages>572–583</pages>
       <abstract>This paper introduces a novel problem of automated question generation for courtroom examinations, CourtQG. While question generation has been studied in domains such as educational testing and product description, CourtQG poses several unique challenges owing to its non-cooperative and agenda-driven nature. Specifically, not only the generated questions need to be relevant to the case and underlying context, they also have to achieve certain objectives such as challenging the opponent’s arguments and/or revealing potential inconsistencies in their answers. We propose to leverage large language models (LLM) for CourtQG by fine-tuning them on two auxiliary tasks, agenda explanation (i.e., uncovering the underlying intents) and question type prediction. We additionally propose cold-start generation of questions from background documents without relying on examination history. We construct a dataset to evaluate our proposed method and show that it generates better questions according to standard metrics when compared to several baselines.</abstract>
       <url hash="c823d7b6">2024.lrec-main.49</url>
@@ -657,7 +657,7 @@
       <author><first>Ding</first><last>Wang</last></author>
       <author><first>Xiaofeng</first><last>Mou</last></author>
       <author><first>Xipeng</first><last>Qiu</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>609–625</pages>
       <abstract>Recent advancements in Chain-of-Thought prompting have facilitated significant breakthroughs for Large Language Models (LLMs) in complex reasoning tasks. Current research enhances the reasoning performance of LLMs by sampling multiple reasoning chains and ensembling based on the answer frequency. However, this approach fails in scenarios where the correct answers are in the minority. We identify this as a primary factor constraining the reasoning capabilities of LLMs, a limitation that cannot be resolved solely based on the predicted answers. To address this shortcoming, we introduce a hierarchical reasoning aggregation framework AoR (Aggregation of Reasoning), which selects answers based on the evaluation of reasoning chains. Additionally, AoR incorporates dynamic sampling, adjusting the number of reasoning chains in accordance with the complexity of the task. Experimental results on a series of complex reasoning tasks show that AoR outperforms prominent ensemble methods. Further analysis reveals that AoR not only adapts various LLMs but also achieves a superior performance ceiling when compared to current methods.</abstract>
       <url hash="17cb78aa">2024.lrec-main.53</url>
@@ -791,7 +791,7 @@
     <paper id="65">
       <title>A Linguistically-Informed Annotation Strategy for <fixed-case>K</fixed-case>orean Semantic Role Labeling</title>
       <author><first>Yige</first><last>Chen</last></author>
-      <author><first>KyungTae</first><last>Lim</last></author>
+      <author id="kyungtae-lim"><first>KyungTae</first><last>Lim</last></author>
       <author><first>Jungyeul</first><last>Park</last></author>
       <pages>733–738</pages>
       <abstract>Semantic role labeling is an essential component of semantic and syntactic processing of natural languages, which reveals the predicate-argument structure of the language. Despite its importance, semantic role labeling for the Korean language has not been studied extensively. One notable issue is the lack of uniformity among data annotation strategies across different datasets, which often lack thorough rationales. In this study, we suggest an annotation strategy for Korean semantic role labeling that is in line with the previously proposed linguistic theories as well as the distinct properties of the Korean language. We further propose a simple yet viable conversion strategy from the Sejong verb dictionary to a CoNLL-style dataset for Korean semantic role labeling. Experiment results using a transformer-based sequence labeling model demonstrate the reliability and trainability of the converted dataset.</abstract>
@@ -859,7 +859,7 @@
     <paper id="71">
       <title>A Matter of Perspective: Building a Multi-Perspective Annotated Dataset for the Study of Literary Quality</title>
       <author><first>Yuri</first><last>Bizzoni</last></author>
-      <author><first>Pascale</first><last>Feldkamp</last></author>
+      <author id="pascale-feldkamp"><first>Pascale</first><last>Feldkamp</last></author>
       <author><first>Ida Marie S.</first><last>Lassen</last></author>
       <author><first>Mads Rosendahl</first><last>Thomsen</last></author>
       <author><first>Kristoffer L.</first><last>Nielbo</last></author>
@@ -872,7 +872,7 @@
       <title><fixed-case>AM</fixed-case>en<fixed-case>D</fixed-case>e<fixed-case>D</fixed-case>: Modelling Concepts by Aligning Mentions, Definitions and Decontextualised Embeddings</title>
       <author><first>Amit</first><last>Gajbhiye</last></author>
       <author><first>Zied</first><last>Bouraoui</last></author>
-      <author><first>Luis</first><last>Espinosa Anke</last></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa Anke</last></author>
       <author><first>Steven</first><last>Schockaert</last></author>
       <pages>801–811</pages>
       <abstract>Contextualised Language Models (LM) improve on traditional word embeddings by encoding the meaning of words in context. However, such models have also made it possible to learn high-quality decontextualised concept embeddings. Three main strategies for learning such embeddings have thus far been considered: (i) fine-tuning the LM to directly predict concept embeddings from the name of the concept itself, (ii) averaging contextualised representations of mentions of the concept in a corpus, and (iii) encoding definitions of the concept. As these strategies have complementary strengths and weaknesses, we propose to learn a unified embedding space in which all three types of representations can be integrated. We show that this allows us to outperform existing approaches in tasks such as ontology completion, which heavily depends on access to high-quality concept embeddings. We furthermore find that mentions and definitions are well-aligned in the resulting space, enabling tasks such as target sense verification, even without the need for any fine-tuning.</abstract>
@@ -1038,7 +1038,7 @@
       <author><first>Katsumi</first><last>Ibaraki</last></author>
       <author><first>Winston</first><last>Wu</last></author>
       <author><first>Lu</first><last>Wang</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>959–973</pages>
       <abstract>Recent advances in large language models (LLMs) have enabled users to generate fluent and seemingly convincing text. However, these models have uneven performance in different languages, which is also associated with undesirable societal biases toward marginalized populations. Specifically, there is relatively little work on Japanese models, despite it being the thirteenth most widely spoken language. In this work, we first develop three Japanese language prompts to probe LLMs’ understanding of Japanese names and their association between gender and occupations. We then evaluate a variety of English, multilingual, and Japanese models, correlating the models’ outputs with occupation statistics from the Japanese Census Bureau from the last 100 years. Our findings indicate that models can associate Japanese names with the correct gendered occupations when using constrained decoding. However, with sampling or greedy decoding, Japanese language models have a preference for a small set of stereotypically gendered occupations, and multilingual models, though trained on Japanese, are not always able to understand Japanese prompts.</abstract>
       <url hash="15364c47">2024.lrec-main.86</url>
@@ -1050,7 +1050,7 @@
       <author><first>Kirill</first><last>Milintsevich</last></author>
       <author><first>Lucie</first><last>Metivier</last></author>
       <author><first>Maud</first><last>Rotharmel</last></author>
-      <author><first>Gaël</first><last>Dias</last></author>
+      <author id="gael-dias"><first>Gaël</first><last>Dias</last></author>
       <author><first>Sonia</first><last>Dollfus</last></author>
       <pages>974–983</pages>
       <abstract>The ever-growing number of people suffering from mental distress has motivated significant research initiatives towards automated depression estimation. Despite the multidisciplinary nature of the task, very few of these approaches include medical professionals in their research process, thus ignoring a vital source of domain knowledge. In this paper, we propose to bring the domain experts back into the loop and incorporate their knowledge within the gold-standard DAIC-WOZ dataset. In particular, we define a novel transformer-based architecture and analyse its performance in light of our expert annotations. Overall findings demonstrate a strong correlation between the psychological tendencies of medical professionals and the behavior of the proposed model, which additionally provides new state-of-the-art results.</abstract>
@@ -1081,8 +1081,8 @@
     </paper>
     <paper id="90">
       <title>Analyzing the Understanding of Morphologically Complex Words in Large Language Models</title>
-      <author><first>Marion</first><last>Weller-Di Marco</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="marion-weller-di-marco"><first>Marion</first><last>Weller-Di Marco</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>1009–1020</pages>
       <abstract>We empirically study the ability of a Large Language Model (gpt-3.5-turbo-instruct) to understand morphologically complex words. In our experiments, we looked at a variety of tasks to analyse German compounds with regard to compositional word formation and derivation, such as identifying the head noun of existing and novel compounds, identifying the shared verb stem between two words, or recognizing words constructed with inappropriately used derivation morphemes as invalid. Our results show that the language model is generally capable of solving most tasks, except for the task of identifying ill-formed word forms. While the model demonstrated a good overall understanding of complex words and their word-internal structure, the results also suggest that there is no formal knowledge of derivational rules, but rather an interpretation of the observed word parts to derive the meaning of a word.</abstract>
       <url hash="d15da0a6">2024.lrec-main.90</url>
@@ -1136,7 +1136,7 @@
       <title>An Effective Span-based Multimodal Named Entity Recognition with Consistent Cross-Modal Alignment</title>
       <author><first>Yongxiu</first><last>Xu</last></author>
       <author><first>Hao</first><last>Xu</last></author>
-      <author><first>Heyan</first><last>Huang</last></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last></author>
       <author><first>Shiyao</first><last>Cui</last></author>
       <author><first>Minghao</first><last>Tang</last></author>
       <author><first>Longzheng</first><last>Wang</last></author>
@@ -1160,7 +1160,7 @@
       <title>An Empirical Study on the Robustness of Massively Multilingual Neural Machine Translation</title>
       <author><first>Supryadi</first><last>Supryadi</last></author>
       <author><first>Leiyu</first><last>Pan</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <pages>1086–1097</pages>
       <abstract>Massively multilingual neural machine translation (MMNMT) has been proven to enhance the translation quality of low-resource languages. In this paper, we empirically investigate the translation robustness of Indonesian-Chinese translation in the face of various naturally occurring noise. To assess this, we create a robustness evaluation benchmark dataset for Indonesian-Chinese translation. This dataset is automatically translated into Chinese using four NLLB-200 models of different sizes. We conduct both automatic and human evaluations. Our in-depth analysis reveal the correlations between translation error types and the types of noise present, how these correlations change across different model sizes, and the relationships between automatic evaluation indicators and human evaluation indicators. The dataset is publicly available at https://github.com/tjunlp-lab/ID-ZH-MTRobustEval.</abstract>
       <url hash="ac523217">2024.lrec-main.97</url>
@@ -1169,7 +1169,7 @@
     <paper id="98">
       <title>An Evaluation of <fixed-case>C</fixed-case>roatian <fixed-case>ASR</fixed-case> Models for Čakavian Transcription</title>
       <author><first>Shulin</first><last>Zhang</last></author>
-      <author><first>John</first><last>Hale</last></author>
+      <author id="john-hale"><first>John</first><last>Hale</last></author>
       <author><first>Margaret</first><last>Renwick</last></author>
       <author><first>Zvjezdana</first><last>Vrzić</last></author>
       <author><first>Keith</first><last>Langston</last></author>
@@ -1192,19 +1192,19 @@
     </paper>
     <paper id="100">
       <title>A New Massive Multilingual Dataset for High-Performance Language Technologies</title>
-      <author><first>Ona</first><last>de Gibert</last></author>
+      <author id="ona-de-gibert"><first>Ona</first><last>de Gibert</last></author>
       <author><first>Graeme</first><last>Nail</last></author>
-      <author><first>Nikolay</first><last>Arefyev</last></author>
+      <author id="nikolay-arefyev"><first>Nikolay</first><last>Arefyev</last></author>
       <author><first>Marta</first><last>Bañón</last></author>
       <author><first>Jelmer</first><last>van der Linde</last></author>
       <author><first>Shaoxiong</first><last>Ji</last></author>
       <author><first>Jaume</first><last>Zaragoza-Bernabeu</last></author>
       <author><first>Mikko</first><last>Aulamo</last></author>
-      <author><first>Gema</first><last>Ramírez-Sánchez</last></author>
+      <author id="gema-ramirez-sanchez"><first>Gema</first><last>Ramírez-Sánchez</last></author>
       <author><first>Andrey</first><last>Kutuzov</last></author>
       <author><first>Sampo</first><last>Pyysalo</last></author>
       <author><first>Stephan</first><last>Oepen</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>1116–1128</pages>
       <abstract>We present the HPLT (High Performance Language Technologies) language resources, a new massive multilingual dataset including both monolingual and bilingual corpora extracted from CommonCrawl and previously unused web crawls from the Internet Archive. We describe our methods for data acquisition, management and processing of large corpora, which rely on open-source software tools and high-performance computing. Our monolingual collection focuses on low- to medium-resourced languages and covers 75 languages and a total of ≈ 5.6 trillion word tokens de-duplicated on the document level. Our English-centric parallel corpus is derived from its monolingual counterpart and covers 18 language pairs and more than 96 million aligned sentence pairs with roughly 1.4 billion English tokens. The HPLT language resources are one of the largest open text corpora ever released, providing a great resource for language modeling and machine translation training. We publicly release the corpora, the software, and the tools used in this work.</abstract>
       <url hash="755abc1c">2024.lrec-main.100</url>
@@ -1272,8 +1272,8 @@
       <title>Annotating <fixed-case>C</fixed-case>hinese Word Senses with <fixed-case>E</fixed-case>nglish <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et: A Practice on <fixed-case>O</fixed-case>nto<fixed-case>N</fixed-case>otes <fixed-case>C</fixed-case>hinese Sense Inventories</title>
       <author><first>Hongzhi</first><last>Xu</last></author>
       <author><first>Jingxia</first><last>Lin</last></author>
-      <author><first>Sameer</first><last>Pradhan</last></author>
-      <author><first>Mitchell</first><last>Marcus</last></author>
+      <author id="sameer-pradhan"><first>Sameer</first><last>Pradhan</last></author>
+      <author id="mitch-marcus"><first>Mitchell</first><last>Marcus</last></author>
       <author><first>Ming</first><last>Liu</last></author>
       <pages>1187–1196</pages>
       <abstract>In this paper, we present our exploration of annotating Chinese word senses using English WordNet synsets, with examples extracted from OntoNotes Chinese sense inventories. Given a target word along with the example that contains it, the annotators select a WordNet synset that best describes the meaning of the target word in the context. The result demonstrates an inter-annotator agreement of 38% between two annotators. We delve into the instances of disagreement by comparing the two annotated synsets, including their positions within the WordNet hierarchy. The examination reveals intriguing patterns among closely related synsets, shedding light on similar concepts represented within the WordNet structure. The data offers as an indirect linking of Chinese word senses defined in OntoNotes Chinese sense inventories to WordNet sysnets, and thus promotes the value of the OntoNotes corpus. Compared to a direct linking of Chinese word senses to WordNet synsets, the example-based annotation has the merit of not being affected by inaccurate sense definitions and thus offers a new way of mapping WordNets of different languages. At the same time, the annotated data also serves as a valuable linguistic resource for exploring potential lexical differences between English and Chinese, with potential contributions to the broader understanding of cross-linguistic semantic mapping</abstract>
@@ -1295,7 +1295,7 @@
       <author><first>Pietro Giovanni</first><last>Bizzaro</last></author>
       <author><first>Elena</first><last>Della Valentina</last></author>
       <author><first>Maurizio</first><last>Napolitano</last></author>
-      <author><first>Nadia</first><last>Mana</last></author>
+      <author id="nadia-mana"><first>Nadia</first><last>Mana</last></author>
       <author><first>Massimo</first><last>Zancanaro</last></author>
       <pages>1209–1214</pages>
       <abstract>In this paper, we propose a new annotation scheme to classify different types of clauses in Terms-and-Conditions contracts with the ultimate goal of supporting legal experts to quickly identify and assess problematic issues in this type of legal documents. To this end, we built a small corpus of Terms-and-Conditions contracts and finalized an annotation scheme of 14 categories, eventually reaching an inter-annotator agreement of 0.92. Then, for 11 of them, we experimented with binary classification tasks using few-shot prompting with a multilingual T5 and two fine-tuned versions of two BERT-based LLMs for Italian. Our experiments showed the feasibility of automatic classification of our categories by reaching accuracies ranging from .79 to .95 on validation tasks.</abstract>
@@ -1343,7 +1343,7 @@
       <author><first>Oana</first><last>Ignat</last></author>
       <author><first>Longju</first><last>Bai</last></author>
       <author><first>Joan C.</first><last>Nwatu</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>1239–1259</pages>
       <abstract>Current foundation models have shown impressive performance across various tasks. However, several studies have revealed that these models are not effective for everyone due to the imbalanced geographical and economic representation of the data used in the training process. Most of this data comes from Western countries, leading to poor results for underrepresented countries. To address this issue, more data needs to be collected from these countries, but the cost of annotation can be a significant bottleneck. In this paper, we propose methods to identify the data to be annotated to balance model performance and annotation costs. Our approach first involves finding the countries with images of topics (objects and actions) most visually distinct from those already in the training datasets used by current large vision-language foundation models. Next, we identify countries with higher visual similarity for these topics and show that using data from these countries to supplement the training data improves model performance and reduces annotation costs. The resulting lists of countries and corresponding topics are made available at https://github.com/MichiganNLP/visual_diversity_budget.</abstract>
       <url hash="58329182">2024.lrec-main.112</url>
@@ -1353,7 +1353,7 @@
       <title><fixed-case>A</fixed-case>nno<fixed-case>T</fixed-case>heia: A Semi-Automatic Annotation Toolkit for Audio-Visual Speech Technologies</title>
       <author><first>José-M.</first><last>Acosta-Triana</last></author>
       <author><first>David</first><last>Gimeno-Gómez</last></author>
-      <author><first>Carlos-D.</first><last>Martínez-Hinarejos</last></author>
+      <author id="carlos-d-martinez-hinarejos"><first>Carlos-D.</first><last>Martínez-Hinarejos</last></author>
       <pages>1260–1269</pages>
       <abstract>More than 7,000 known languages are spoken around the world. However, due to the lack of annotated resources, only a small fraction of them are currently covered by speech technologies. Albeit self-supervised speech representations, recent massive speech corpora collections, as well as the organization of challenges, have alleviated this inequality, most studies are mainly benchmarked on English. This situation is aggravated when tasks involving both acoustic and visual speech modalities are addressed. In order to promote research on low-resource languages for audio-visual speech technologies, we present AnnoTheia, a semi-automatic annotation toolkit that detects when a person speaks on the scene and the corresponding transcription. In addition, to show the complete process of preparing AnnoTheia for a language of interest, we also describe the adaptation of a pre-trained model for active speaker detection to Spanish, using a database not initially conceived for this type of task. Prior evaluations show that the toolkit is able to speed up to four times the annotation process. The AnnoTheia toolkit, tutorials, and pre-trained models are available at https://github.com/joactr/AnnoTheia/.</abstract>
       <url hash="f3705195">2024.lrec-main.113</url>
@@ -1377,9 +1377,9 @@
       <author><first>Giridhar Kaushik</first><last>Ramachandran</last></author>
       <author><first>Spencer</first><last>Lewis</last></author>
       <author><first>Aashka</first><last>Damani</last></author>
-      <author><first>Özlem</first><last>Uzuner</last></author>
+      <author id="ozlem-uzuner"><first>Özlem</first><last>Uzuner</last></author>
       <author><first>Martin</first><last>Gunn</last></author>
-      <author><first>Meliha</first><last>Yetisgen</last></author>
+      <author id="meliha-yetisgen-yildiz"><first>Meliha</first><last>Yetisgen</last></author>
       <pages>1280–1292</pages>
       <abstract>Medical imaging is critical to the diagnosis, surveillance, and treatment of many health conditions, including oncological, neurological, cardiovascular, and musculoskeletal disorders, among others. Radiologists interpret these complex, unstructured images and articulate their assessments through narrative reports that remain largely unstructured. This unstructured narrative must be converted into a structured semantic representation to facilitate secondary applications such as retrospective analyses or clinical decision support. Here, we introduce the Corpus of Annotated Medical Imaging Reports (CAMIR), which includes 609 annotated radiology reports from three imaging modality types: Computed Tomography, Magnetic Resonance Imaging, and Positron Emission Tomography-Computed Tomography. Reports were annotated using an event-based schema that captures clinical indications, lesions, and medical problems. Each event consists of a trigger and multiple arguments, and a majority of the argument types, including anatomy, normalize the spans to pre-defined concepts to facilitate secondary use. CAMIR uniquely combines a granular event structure and concept normalization. To extract CAMIR events, we explored two BERT (Bi-directional Encoder Representation from Transformers)-based architectures, including an existing architecture (mSpERT) that jointly extracts all event information and a multi-step approach (PL-Marker++) that we augmented for the CAMIR schema.</abstract>
       <url hash="a8fdcd19">2024.lrec-main.115</url>
@@ -1451,7 +1451,7 @@
     <paper id="121">
       <title>A Persona-Based Corpus in the Diabetes Self-Care Domain - Applying a Human-Centered Approach to a Low-Resource Context</title>
       <author><first>Rossana</first><last>Cunha</last></author>
-      <author><first>Thiago</first><last>Castro Ferreira</last></author>
+      <author id="thiago-castro-ferreira"><first>Thiago</first><last>Castro Ferreira</last></author>
       <author><first>Adriana</first><last>Pagano</last></author>
       <author><first>Fabio</first><last>Alves</last></author>
       <pages>1353–1369</pages>
@@ -1476,7 +1476,7 @@
     </paper>
     <paper id="123">
       <title>Applying Transfer Learning to <fixed-case>G</fixed-case>erman Metaphor Prediction</title>
-      <author><first>Maria</first><last>Berger</last></author>
+      <author id="maria-berger"><first>Maria</first><last>Berger</last></author>
       <author><first>Nieke</first><last>Kiwitt</last></author>
       <author><first>Sebastian</first><last>Reimann</last></author>
       <pages>1383–1392</pages>
@@ -1581,7 +1581,7 @@
       <author><first>Mingtong</first><last>Liu</last></author>
       <author><first>Chunyou</first><last>Li</last></author>
       <author><first>Yufeng</first><last>Chen</last></author>
-      <author><first>Jinan</first><last>Xu</last></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last></author>
       <author><first>Ming</first><last>Zhou</last></author>
       <pages>1486–1497</pages>
       <abstract>Due to the lack of parallel data, the mainstream fine-tuning-based domain adaptation methods have the overfitting problem in the translation of low-resource domains, and it is difficult for the model to learn the in-domain generalization knowledge. To address the above issue, in this work, we propose a novel Reinforcement Learning Domain Adaptation method for Neural Machine Translation (RLDA-NMT) in the low-resource domain. RLDA-NMT utilizes in-domain source monolingual data to make up for the lack of parallel data, and reinforces domain features learning to make the translation model learn the domain-specific knowledge more fully. Specifically, we first train a ranking-based model with a small-scale in-domain parallel corpus, and then adopt it as the reward model to select higher-quality generated translations for reinforcement when fine-tuning pre-trained NMT model using in-domain source monolingual data. We conduct experiments on Education, Laws, Thesis, and Patent domains of Chinese⇔English translation tasks. Experimental results demonstrate that RLDA-NMT can alleviate overfitting and reinforce the NMT model to learn domain-specific knowledge. Additionally, the results also show that RLDA-NMT and back-translation (BT) are nicely complementary to each other, where combining RLDA-NMT with BT can further improve translation quality.</abstract>
@@ -1614,7 +1614,7 @@
       <author><first>Elena</first><last>Cabrio</last></author>
       <author><first>Anne</first><last>Lauscher</last></author>
       <author><first>Joonsuk</first><last>Park</last></author>
-      <author><first>Eva Maria</first><last>Vecchi</last></author>
+      <author id="eva-maria-vecchi"><first>Eva Maria</first><last>Vecchi</last></author>
       <author><first>Serena</first><last>Villata</last></author>
       <author><first>Timon</first><last>Ziegenbein</last></author>
       <pages>1519–1538</pages>
@@ -1635,9 +1635,9 @@
     </paper>
     <paper id="137">
       <title><fixed-case>ART</fixed-case>: The Alternating Reading Task Corpus for Speech Entrainment and Imitation</title>
-      <author id="zheng-yuan"><first>Zheng</first><last>Yuan</last></author>
+      <author><first>Zheng</first><last>Yuan</last></author>
       <author><first>Dorina</first><last>de Jong</last></author>
-      <author><first>Štefan</first><last>Beňuš</last></author>
+      <author id="stefan-benus"><first>Štefan</first><last>Beňuš</last></author>
       <author><first>Noël</first><last>Nguyen</last></author>
       <author><first>Ruitao</first><last>Feng</last></author>
       <author><first>Róbert</first><last>Sabo</last></author>
@@ -1654,7 +1654,7 @@
       <author><first>Changxin</first><last>Ke</last></author>
       <author><first>Shuhan</first><last>Zhou</last></author>
       <author><first>Churui</first><last>Sun</last></author>
-      <author><first>Wei-Nan</first><last>Zhang</last></author>
+      <author id="weinan-zhang"><first>Wei-Nan</first><last>Zhang</last></author>
       <author><first>Ting</first><last>Liu</last></author>
       <pages>1563–1576</pages>
       <abstract>Simile tasks are challenging in natural language processing (NLP) because models require adequate world knowledge to produce predictions. In recent years, pre-trained language models (PLMs) have succeeded in NLP since they learn generic knowledge from a large corpus. The knowledge embedded in PLMs can be used for different kinds of Simile tasks. However, previous work usually explored one type of simile knowledge for a specific simile task, how to fully utilize different types of knowledge embedded in the PLMs requires further exploration. This paper proposes a self-verified method for exploring simile knowledge from PLMs, which allows the PLMs to leverage one type of simile knowledge to self-validate another. To this end, we first enhance PLMs with a novel multi-level simile recognition (MLSR) task that trains PLMs to evaluate the quality of similes. Then the PLMs leverage this evaluation score to assist the simile interpretation and generation tasks. In this way, we connect different types of simile knowledge in PLMs and make better use of them. Experiments on different pre-trained models and multiple publicly available datasets show that our method works for different kinds of PLMs and can explore more accurate simile knowledge for PLMs. Our code/data will be released on GitHub.</abstract>
@@ -1677,7 +1677,7 @@
     <paper id="140">
       <title><fixed-case>ASEM</fixed-case>: Enhancing Empathy in Chatbot through Attention-based Sentiment and Emotion Modeling</title>
       <author><first>Omama</first><last>Hamad</last></author>
-      <author><first>Khaled</first><last>Shaban</last></author>
+      <author id="khaled-shaban"><first>Khaled</first><last>Shaban</last></author>
       <author><first>Ali</first><last>Hamdi</last></author>
       <pages>1588–1601</pages>
       <abstract>Effective feature representations play a critical role in enhancing the performance of text generation models that rely on deep neural networks. However, current approaches suffer from several drawbacks, such as the inability to capture the deep semantics of language and sensitivity to minor input variations, resulting in significant changes in the generated text. In this paper, we present a novel solution to these challenges by employing a mixture of experts, multiple encoders, to offer distinct perspectives on the emotional state of the user’s utterance while simultaneously enhancing performance. We propose an end-to-end model architecture called ASEM that performs emotion analysis on top of sentiment analysis for open-domain chatbots, enabling the generation of empathetic responses that are fluent and relevant. In contrast to traditional attention mechanisms, the proposed model employs a specialized attention strategy that uniquely zeroes in on sentiment and emotion nuances within the user’s utterance. This ensures the generation of context-rich representations tailored to the underlying emotional tone and sentiment intricacies of the text. Our approach outperforms existing methods for generating empathetic embeddings, providing empathetic and diverse responses. The performance of our proposed model significantly exceeds that of existing models, enhancing emotion detection accuracy by 6.2% and lexical diversity by 1.4%. ASEM code is released at https://github.com/MIRAH-Official/Empathetic-Chatbot-ASEM.git</abstract>
@@ -1729,7 +1729,7 @@
     <paper id="145">
       <title>Assessing the Capabilities of Large Language Models in Coreference: An Evaluation</title>
       <author><first>Yujian</first><last>Gan</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <author><first>Juntao</first><last>Yu</last></author>
       <pages>1645–1665</pages>
       <abstract>This paper offers a nuanced examination of the role Large Language Models (LLMs) play in coreference resolution, aimed at guiding the future direction in the era of LLMs. We carried out both manual and automatic analyses of different LLMs’ abilities, employing different prompts to examine the performance of different LLMs, obtaining a comprehensive view of their strengths and weaknesses. We found that LLMs show exceptional ability in understanding coreference. However, harnessing this ability to achieve state of the art results on traditional datasets and benchmarks isn’t straightforward. Given these findings, we propose that future efforts should: (1) Improve the scope, data, and evaluation methods of traditional coreference research to adapt to the development of LLMs. (2) Enhance the fine-grained language understanding capabilities of LLMs.</abstract>
@@ -1860,7 +1860,7 @@
     <paper id="158">
       <title>A Typology of Errors for User Utterances in Chatbots</title>
       <author><first>Anu</first><last>Singh</last></author>
-      <author><first>Esme</first><last>Manandise</last></author>
+      <author id="esmeralda-manandise"><first>Esme</first><last>Manandise</last></author>
       <pages>1789–1794</pages>
       <abstract>This paper discusses the challenges non-prescriptive language uses in chatbot communication create for Semantic Parsing (SP). To help SP developers improve their systems, we propose a flexible error typology based on an analysis of a sample of non-prescriptive language uses mined from a domain-specific chatbot logs. This typology is not tied to any specific language model. We also present a framework for automatically mapping these errors to the typology. Finally, we show how our framework can help evaluate SP systems from a linguistic robustness perspective. Our framework can be expanded to include new classes of errors across different domains and user demographics.</abstract>
       <url hash="8a60fde9">2024.lrec-main.158</url>
@@ -1899,7 +1899,7 @@
     </paper>
     <paper id="162">
       <title>Automatically Estimating Textual and Phonemic Complexity for Cued Speech: How to See the Sounds from <fixed-case>F</fixed-case>rench Texts</title>
-      <author><first>Núria</first><last>Gala</last></author>
+      <author id="nuria-gala"><first>Núria</first><last>Gala</last></author>
       <author><first>Brigitte</first><last>Bigi</last></author>
       <author><first>Marie</first><last>Bauer</last></author>
       <pages>1817–1824</pages>
@@ -1942,7 +1942,7 @@
       <title>Automatic Coding of Contingency in Child-Caregiver Conversations</title>
       <author><first>Abhishek</first><last>Agrawal</last></author>
       <author><first>Mitja</first><last>Nikolaus</last></author>
-      <author><first>Benoit</first><last>Favre</last></author>
+      <author id="benoit-favre"><first>Benoit</first><last>Favre</last></author>
       <author><first>Abdellah</first><last>Fourtassi</last></author>
       <pages>1856–1870</pages>
       <abstract>One of the most important communicative skills children have to learn is to engage in meaningful conversations with people around them. At the heart of this learning lies the mastery of contingency, i.e., the ability to contribute to an ongoing exchange in a relevant fashion (e.g., by staying on topic). Current research on this question relies on the manual annotation of a small sample of children, which limits our ability to draw general conclusions about development. Here, we propose to mitigate the limitations of manual labor by relying on automatic tools for contingency judgment in children’s early natural interactions with caregivers. Drawing inspiration from the field of dialogue systems evaluation, we built and compared several automatic classifiers. We found that a Transformer-based pre-trained language model – when fine-tuned on a relatively small set of data we annotated manually (around 3,500 turns) – provided the best predictions. We used this model to automatically annotate, new and large-scale data, almost two orders of magnitude larger than our fine-tuning set. It was able to replicate existing results and generate new data-driven hypotheses. The broad impact of the work is to provide resources that can help the language development community study communicative development at scale, leading to more robust theories.</abstract>
@@ -1953,7 +1953,7 @@
       <title>Automatic Construction of a <fixed-case>C</fixed-case>hinese Review Dataset for Aspect Sentiment Triplet Extraction via Iterative Weak Supervision</title>
       <author><first>Chia-Wen</first><last>Lu</last></author>
       <author><first>Ching-Wen</first><last>Yang</last></author>
-      <author><first>Wei-Yun</first><last>Ma</last></author>
+      <author id="wei-yun-ma"><first>Wei-Yun</first><last>Ma</last></author>
       <pages>1871–1882</pages>
       <abstract>Aspect Sentiment Triplet Extraction (ASTE), introduced in 2020, is a task that involves the extraction of three key elements: target aspects, descriptive opinion spans, and their corresponding sentiment polarity. This process, however, faces a significant hurdle, particularly when applied to Chinese languages, due to the lack of sufficient datasets for model training, largely attributable to the arduous manual labeling process. To address this issue, we present an innovative framework that facilitates the automatic construction of ASTE via Iterative Weak Supervision, negating the need for manual labeling, aided by a discriminator to weed out subpar samples. The objective is to successively improve the quality of this raw data and generate supplementary data. The effectiveness of our approach is underscored by our results, which include the creation of a substantial Chinese review dataset. This dataset encompasses over 60,000 Google restaurant reviews in Chinese and features more than 200,000 extracted triplets. Moreover, we have also established a robust baseline model by leveraging a novel method of weak supervision. Both our dataset and model are openly accessible to the public.</abstract>
       <url hash="dedd35b0">2024.lrec-main.167</url>
@@ -1990,7 +1990,7 @@
       <author><first>Rei</first><last>Miyata</last></author>
       <author><first>Atsushi</first><last>Fujita</last></author>
       <author><first>Tomoyuki</first><last>Kajiwara</last></author>
-      <author><first>Satoshi</first><last>Sato</last></author>
+      <author id="satoshi-sato"><first>Satoshi</first><last>Sato</last></author>
       <pages>1899–1914</pages>
       <abstract>This paper presents our work on a task of automatic decomposition of text editing examples into primitive edit operations. Toward a detailed analysis of the behavior of text editing systems, identification of fine-grained edit operations performed by the systems is essential. Given a pair of source and edited sentences, the goal of our task is to generate a non-redundant sequence of primitive edit operations, i.e., the semantically minimal edit operations preserving grammaticality, that iteratively converts the source sentence to the edited sentence. First, we formalize this task, explaining its significant features and specifying the constraints that primitive edit operations should satisfy. Then, we propose a method to automate this task, which consists of two steps: generation of an edit operation lattice and selection of an optimal path. To obtain a wide range of edit operation candidates in the first step, we combine a phrase aligner and a large language model. Experimental results show that our method perfectly decomposes 44% and 64% of editing examples in the text simplification and machine translation post-editing datasets, respectively. Detailed analyses also provide insights into the difficulties of this task, suggesting directions for improvement.</abstract>
       <url hash="b14dacfc">2024.lrec-main.170</url>
@@ -2001,7 +2001,7 @@
       <author><first>Elena</first><last>Callegari</last></author>
       <author><first>Iris Edda</first><last>Nowenstein</last></author>
       <author><first>Ingunn Jóhanna</first><last>Kristjánsdóttir</last></author>
-      <author><first>Anton Karl</first><last>Ingason</last></author>
+      <author id="anton-karl-ingason"><first>Anton Karl</first><last>Ingason</last></author>
       <pages>1915–1924</pages>
       <abstract>This study examines the influence of task type and healthy aging on various automatically extracted part-of-speech features in Icelandic. We administered three language tasks to participants aged 60–80: picture description, trip planning, and description of one’s childhood home. Our findings reveal significant task effects on 11 out of 14 linguistic variables studied, highlighting the substantial influence of sampling methods on language production. Among the variables showing statistically significant task effects, we find the rate of the genitive and subjunctive, variables which can only be studied in morphologically richer languages like Icelandic. On the other hand, rates of pronouns, adverbs, and prepositions remained stable across task types. Aging effects were more subtle, being evident in 3 of the 14 variables, including an interaction with task type for dative case marking. These findings underscore the significance of task selection in studies targeting linguistic features but also emphasize the need to examine languages other than English to fully understand the effects of aging on language production. Additionally, the results have clinical implications: understanding healthy aging’s impact on language can help us better identify and study changes caused by Alzheimer’s Disease in older adults’ speech.</abstract>
       <url hash="35436156">2024.lrec-main.171</url>
@@ -2069,7 +2069,7 @@
     <paper id="177">
       <title>Automatic Speech Recognition for <fixed-case>G</fixed-case>ascon and Languedocian Variants of <fixed-case>O</fixed-case>ccitan</title>
       <author><first>Iñigo</first><last>Morcillo</last></author>
-      <author><first>Igor</first><last>Leturia</last></author>
+      <author id="igor-leturia"><first>Igor</first><last>Leturia</last></author>
       <author><first>Ander</first><last>Corral</last></author>
       <author><first>Xabier</first><last>Sarasola</last></author>
       <author><first>Michaël</first><last>Barret</last></author>
@@ -2114,7 +2114,7 @@
     <paper id="181">
       <title>Auxiliary Knowledge-Induced Learning for Automatic Multi-Label Medical Document Classification</title>
       <author><first>Xindi</first><last>Wang</last></author>
-      <author><first>Robert E.</first><last>Mercer</last></author>
+      <author id="robert-e-mercer"><first>Robert E.</first><last>Mercer</last></author>
       <author><first>Frank</first><last>Rudzicz</last></author>
       <pages>2006–2016</pages>
       <abstract>The International Classification of Diseases (ICD) is an authoritative medical classification system of different diseases and conditions for clinical and management purposes. ICD indexing aims to assign a subset of ICD codes to a medical record. Since human coding is labour-intensive and error-prone, many studies employ machine learning techniques to automate the coding process. ICD coding is a challenging task, as it needs to assign multiple codes to each medical document from an extremely large hierarchically organized collection. In this paper, we propose a novel approach for ICD indexing that adopts three ideas: (1) we use a multi-level deep dilated residual convolution encoder to aggregate the information from the clinical notes and learn document representations across different lengths of the texts; (2) we formalize the task of ICD classification with auxiliary knowledge of the medical records, which incorporates not only the clinical texts but also different clinical code terminologies and drug prescriptions for better inferring the ICD codes; and (3) we introduce a graph convolutional network to leverage the co-occurrence patterns among ICD codes, aiming to enhance the quality of label representations. Experimental results show the proposed method achieves state-of-the-art performance on a number of measures.</abstract>
@@ -2128,7 +2128,7 @@
       <author><first>Maitane</first><last>Urruela</last></author>
       <author><first>Elisa</first><last>Espina</last></author>
       <author><first>Aitziber</first><last>Atutxa Salazar</last></author>
-      <author><first>Koldo</first><last>Gojenola</last></author>
+      <author id="koldo-gojenola"><first>Koldo</first><last>Gojenola</last></author>
       <pages>2017–2027</pages>
       <abstract>In this work we present two datasets for the development of virtual patients and the first evaluation results. We firstly introduce a Spanish corpus of medical dialogue questions annotated with intents, built upon prior research in French. We also propose a second dataset of dialogues using a novel annotation approach that involves doctor questions, patient answers, and corresponding clinical records, organized as triples of the form (clinical report, question, patient answer). This way, the doctor-patient conversation is modeled as a question-answering system that tries to find responses to questions taking a clinical record as input. This approach can help to eliminate the need for manually structured patient records, as commonly used in previous studies, thereby expanding the pool of diverse virtual patients available. Leveraging these annotated corpora, we develop and assess an automatic system designed to answer medical dialogue questions posed by medical students to simulated patients in medical exams. Our approach demonstrates robust generalization, relying solely on medical records to generate new patient cases. The two datasets and the code will be freely available for the research community.</abstract>
       <url hash="1f1c9049">2024.lrec-main.182</url>
@@ -2136,7 +2136,7 @@
     </paper>
     <paper id="183">
       <title>A Web Portal about the State of the Art of <fixed-case>NLP</fixed-case> Tasks in <fixed-case>S</fixed-case>panish</title>
-      <author><first>Enrique</first><last>Amigó</last></author>
+      <author id="enrique-amigo"><first>Enrique</first><last>Amigó</last></author>
       <author><first>Jorge</first><last>Carrillo-de-Albornoz</last></author>
       <author><first>Andrés</first><last>Fernández</last></author>
       <author><first>Julio</first><last>Gonzalo</last></author>
@@ -2155,7 +2155,7 @@
       <author><first>Maarten</first><last>van Gompel</last></author>
       <author><first>Anna</first><last>Jouravel</last></author>
       <author><first>Elena</first><last>Renje</last></author>
-      <author><first>Uwe</first><last>Reichel</last></author>
+      <author id="uwe-reichel"><first>Uwe</first><last>Reichel</last></author>
       <author><first>Achim</first><last>Rabus</last></author>
       <author><first>Eckhart</first><last>Arnold</last></author>
       <pages>2039–2048</pages>
@@ -2188,13 +2188,13 @@
     <paper id="187">
       <title><fixed-case>B</fixed-case>alsu<fixed-case>T</fixed-case>alka.lv - Boosting the Common Voice Corpus for Low-Resource Languages</title>
       <author><first>Roberts</first><last>Dargis</last></author>
-      <author><first>Arturs</first><last>Znotins</last></author>
+      <author id="arturs-znotins"><first>Arturs</first><last>Znotins</last></author>
       <author><first>Ilze</first><last>Auzina</last></author>
-      <author><first>Baiba</first><last>Saulite</last></author>
+      <author id="baiba-saulite"><first>Baiba</first><last>Saulite</last></author>
       <author><first>Sanita</first><last>Reinsone</last></author>
       <author><first>Raivis</first><last>Dejus</last></author>
       <author><first>Antra</first><last>Klavinska</last></author>
-      <author><first>Normunds</first><last>Gruzitis</last></author>
+      <author id="normunds-gruzitis"><first>Normunds</first><last>Gruzitis</last></author>
       <pages>2080–2085</pages>
       <abstract>Open speech corpora of substantial size are seldom available for less-spoken languages, and this was recently the case also for Latvian with its 1.5M native speakers. While there exist several closed Latvian speech corpora of 100+ hours, used to train competitive models for automatic speech recognition (ASR), there were only a few tiny open datasets available at the beginning of 2023, the 18-hour Latvian Common Voice 13.0 dataset being the largest one. In the result of a successful national crowdsourcing initiative, organised jointly by several institutions, the size and speaker diversity of the Latvian Common Voice 17.0 release have increased more than tenfold in less than a year. A successful follow-up initiative was also launched for Latgalian, which has been recognized as an endangered historic variant of Latvian with 150k speakers. The goal of these initiatives is not only to enlarge the datasets but also to make them more diverse in terms of speakers and accents, text genres and styles, intonations, grammar and lexicon. They have already become considerable language resources for both improving ASR and conducting linguistic research. Since we use the Mozilla Common Voice platform to record and validate speech samples, this paper focuses on (i) the selection of text snippets to enrich the language data and to stimulate various intonations, (ii) an indicative evaluation of the acquired corpus and the first ASR models fine-tuned on this data, (iii) our social campaigns to boost and maintain this initiative.</abstract>
       <url hash="865e846b">2024.lrec-main.187</url>
@@ -2205,7 +2205,7 @@
       <author><first>Zican</first><last>Dong</last></author>
       <author><first>Tianyi</first><last>Tang</last></author>
       <author><first>Junyi</first><last>Li</last></author>
-      <author><first>Wayne Xin</first><last>Zhao</last></author>
+      <author id="wayne-xin-zhao"><first>Wayne Xin</first><last>Zhao</last></author>
       <author><first>Ji-Rong</first><last>Wen</last></author>
       <pages>2086–2099</pages>
       <abstract>Large language models (LLMs) have achieved dramatic proficiency over NLP tasks with normal length. Recently, multiple studies have committed to extending the context length and enhancing the long text modeling capabilities of LLMs. To comprehensively evaluate the long context ability of LLMs, we propose BAMBOO, a multi-task long context benchmark. BAMBOO has been designed with four principles: comprehensive capacity evaluation, avoidance of data contamination, accurate automatic evaluation, and different length levels. It consists of 10 datasets from 5 different long text understanding tasks, i.e., question answering, hallucination detection, text sorting, language modeling, and code completion, to cover various domains and core capacities of LLMs. We conduct experiments with five widely-used long-context models and further discuss five key questions for long text research. In the end, we discuss problems of current long-context models and point out future directions for enhancing long text modeling capacities. We release our data, prompts, and code at https://anonymous.4open.science/r/BAMBOO/.</abstract>
@@ -2255,7 +2255,7 @@
       <author><first>Jaione</first><last>Bengoetxea</last></author>
       <author><first>Yi-Ling</first><last>Chung</last></author>
       <author><first>Marco</first><last>Guerini</last></author>
-      <author><first>Rodrigo</first><last>Agerri</last></author>
+      <author id="rodrigo-agerri"><first>Rodrigo</first><last>Agerri</last></author>
       <pages>2132–2141</pages>
       <abstract>Counter Narratives (CNs) are non-negative textual responses to Hate Speech (HS) aiming at defusing online hatred and mitigating its spreading across media. Despite the recent increase in HS content posted online, research on automatic CN generation has been relatively scarce and predominantly focused on English. In this paper, we present CONAN-EUS, a new Basque and Spanish dataset for CN generation developed by means of Machine Translation (MT) and professional post-edition. Being a parallel corpus, also with respect to the original English CONAN, it allows to perform novel research on multilingual and crosslingual automatic generation of CNs. Our experiments on CN generation with mT5, a multilingual encoder-decoder model, shows that generation greatly benefits from training on post-edited data, as opposed to relying on silver MT data only. These results are confirmed by their correlation with a qualitative manual evaluation, demonstrating that manually revised training data remains crucial for the quality of the generated CNs. Furthermore, multilingual data augmentation improves results over monolingual settings for structurally similar languages such as English and Spanish, while being detrimental for Basque, a language isolate. Similar findings occur in zero-shot crosslingual evaluations, where model transfer (fine-tuning in English and generating in a different target language) outperforms fine-tuning mT5 on machine translated data for Spanish but not for Basque. This provides an interesting insight into the asymmetry in the multilinguality of generative models, a challenging topic which is still open to research. Data and code will be made publicly available upon publication.</abstract>
       <url hash="17c08f44">2024.lrec-main.192</url>
@@ -2265,7 +2265,7 @@
     <paper id="193">
       <title>Becoming a High-Resource Language in Speech: The <fixed-case>C</fixed-case>atalan Case in the Common Voice Corpus</title>
       <author><first>Carme</first><last>Armentano-Oller</last></author>
-      <author><first>Montserrat</first><last>Marimon</last></author>
+      <author id="montserrat-marimon"><first>Montserrat</first><last>Marimon</last></author>
       <author><first>Marta</first><last>Villegas</last></author>
       <pages>2142–2148</pages>
       <abstract>Collecting voice resources for speech recognition systems is a multifaceted challenge, involving legal, technical, and diversity considerations. However, it is crucial to ensure fair access to voice-driven technology across diverse linguistic backgrounds. We describe an ongoing effort to create an extensive, high-quality, publicly available voice dataset for future development of speech technologies in Catalan through the Mozilla Common Voice crowd-sourcing platform. We detail the specific approaches used to address the challenges faced in recruiting contributors and managing the collection, validation, and recording of sentences. This detailed overview can serve as a source of guidance for similar initiatives across other projects and linguistic contexts. The success of this project is evident in the latest corpus release, version 16.1, where Catalan ranks as the most prominent language in the corpus, both in terms of recorded hours and when considering validated hours. This establishes Catalan as a language with significant speech resources for language technology development and significantly raises its international visibility.</abstract>
@@ -2454,7 +2454,7 @@
       <title>Beyond Model Performance: Can Link Prediction Enrich <fixed-case>F</fixed-case>rench Lexical Graphs?</title>
       <author><first>Hee-Soo</first><last>Choi</last></author>
       <author><first>Priyansh</first><last>Trivedi</last></author>
-      <author><first>Mathieu</first><last>Constant</last></author>
+      <author id="matthieu-constant"><first>Mathieu</first><last>Constant</last></author>
       <author><first>Karen</first><last>Fort</last></author>
       <author><first>Bruno</first><last>Guillaume</last></author>
       <pages>2329–2341</pages>
@@ -2514,7 +2514,7 @@
     </paper>
     <paper id="213">
       <title>Biomedical Concept Normalization over Nested Entities with Partial <fixed-case>UMLS</fixed-case> Terminology in <fixed-case>R</fixed-case>ussian</title>
-      <author><first>Natalia</first><last>Loukachevitch</last></author>
+      <author id="natalia-loukachevitch"><first>Natalia</first><last>Loukachevitch</last></author>
       <author><first>Andrey</first><last>Sakhovskiy</last></author>
       <author><first>Elena</first><last>Tutubalina</last></author>
       <pages>2383–2389</pages>
@@ -2536,7 +2536,7 @@
     <paper id="215">
       <title>Bits and Pieces: Investigating the Effects of Subwords in Multi-task Parsing across Languages and Domains</title>
       <author><first>Daniel</first><last>Dakota</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <pages>2397–2409</pages>
       <abstract>Neural parsing is very dependent on the underlying language model. However, very little is known about how choices in the language model affect parsing performance, especially in multi-task learning. We investigate questions on how the choice of subwords affects parsing, how subword sharing is responsible for gains or negative transfer in a multi-task setting where each task is parsing of a specific domain of the same language. More specifically, we investigate these issues across four languages: English, German, Italian, and Turkish. We find a general preference for averaged or last subwords across languages and domains. However, specific POS tags may require different subwords, and the distributional overlap between subwords across domains is perhaps a more influential factor in determining positive or negative transfer than discrepancies in the data sizes.</abstract>
       <url hash="21ee04af">2024.lrec-main.215</url>
@@ -2581,7 +2581,7 @@
       <title><fixed-case>BLN</fixed-case>600: A Parallel Corpus of Machine/Human Transcribed Nineteenth Century Newspaper Texts</title>
       <author><first>Callum William</first><last>Booth</last></author>
       <author><first>Alan</first><last>Thomas</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <pages>2440–2446</pages>
       <abstract>We present a publicly available corpus of nineteenth-century newspaper text focused on crime in London, derived from the Gale British Library Newspapers corpus parts 1 and 2. The corpus comprises 600 newspaper excerpts and for each excerpt contains the original source image, the machine transcription of that image as found in the BLN and a gold standard manual transcription that we have created. We envisage the corpus will be helpful for the training and development of OCR and post-OCR correction methodologies for historical newspaper machine transcription—for which there is currently a dearth of publicly available resources. In this paper, we discuss the rationale behind gathering such a corpus, the methodology used to select, process, and align the data, and the corpus’ potential utility for historians and digital humanities researchers—particularly within the realms of neural machine translation-based post-OCR correction approaches, and other natural language processing tasks that are critically affected by erroneous OCR.</abstract>
       <url hash="01d18159">2024.lrec-main.219</url>
@@ -2619,7 +2619,7 @@
       <author><first>Yupu</first><last>Liang</last></author>
       <author><first>Yang</first><last>Zhao</last></author>
       <author><first>Yu</first><last>Zhou</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>2468–2479</pages>
       <abstract>Text image machine translation (TIMT) aims at translating source language texts in images into another target language, which has been proven successful by bridging text image recognition encoder and text translation decoder. However, it is still an open question of how to incorporate fine-grained knowledge supervision to make it consistent between recognition and translation modules. In this paper, we propose a novel TIMT method named as BabyNet, which is optimized with hierarchical parental supervision to improve translation performance. Inspired by genetic recombination and variation in the field of genetics, the proposed BabyNet is inherited from the recognition and translation parent models with a variation module of which parameters can be updated when training on the TIMT task. Meanwhile, hierarchical and multi-granularity supervision from parent models is introduced to bridge the gap between inherited modules in BabyNet. Extensive experiments on both synthetic and real-world TIMT tests show that our proposed method significantly outperforms existing methods. Further analyses of various parent model combinations show the good generalization of our method.</abstract>
       <url hash="865c50b0">2024.lrec-main.222</url>
@@ -2652,9 +2652,9 @@
     <paper id="225">
       <title>Bridging Computational Lexicography and Corpus Linguistics: A Query Extension for <fixed-case>O</fixed-case>nto<fixed-case>L</fixed-case>ex-<fixed-case>F</fixed-case>r<fixed-case>AC</fixed-case></title>
       <author><first>Christian</first><last>Chiarcos</last></author>
-      <author><first>Ranka</first><last>Stanković</last></author>
+      <author id="ranka-stankovic"><first>Ranka</first><last>Stanković</last></author>
       <author><first>Maxim</first><last>Ionov</last></author>
-      <author><first>Gilles</first><last>Sérasset</last></author>
+      <author id="gilles-serasset"><first>Gilles</first><last>Sérasset</last></author>
       <pages>2504–2514</pages>
       <abstract>OntoLex, the dominant community standard for machine-readable lexical resources in the context of RDF, Linked Data and Semantic Web technologies, is currently extended with a designated module for Frequency, Attestations and Corpus-based Information (OntoLex-FrAC). We propose a novel component for OntoLex-FrAC, addressing the incorporation of corpus queries for (a) linking dictionaries with corpus engines, (b) enabling RDF-based web services to exchange corpus queries and responses data dynamically, and (c) using conventional query languages to formalize the internal structure of collocations, word sketches, and colligations. The primary field of application of the query extension is in digital lexicography and corpus linguistics, and we present a proof-of-principle implementation in backend components of a novel platform designed to support digital lexicography for the Serbian language.</abstract>
       <url hash="441d9c70">2024.lrec-main.225</url>
@@ -2684,7 +2684,7 @@
     </paper>
     <paper id="228">
       <title>Bring Invariant to Variant: A Contrastive Prompt-based Framework for Temporal Knowledge Graph Forecasting</title>
-      <author><first>Ying</first><last>Zhang</last></author>
+      <author id="ying-zhang"><first>Ying</first><last>Zhang</last></author>
       <author><first>Xinying</first><last>Qian</last></author>
       <author><first>Yu</first><last>Zhao</last></author>
       <author><first>Baohang</first><last>Zhou</last></author>
@@ -2704,17 +2704,17 @@
       <author><first>William</first><last>Croft</last></author>
       <author><first>Lukas</first><last>Denk</last></author>
       <author><first>Sijia</first><last>Ge</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
       <author><first>Kenneth</first><last>Lai</last></author>
-      <author><first>James H.</first><last>Martin</last></author>
+      <author id="james-h-martin"><first>James H.</first><last>Martin</last></author>
       <author><first>Skatje</first><last>Myers</last></author>
       <author><first>Alexis</first><last>Palmer</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Claire Benet</first><last>Post</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <author><first>Kristine</first><last>Stenzel</last></author>
       <author><first>Haibo</first><last>Sun</last></author>
-      <author><first>Zdeňka</first><last>Urešová</last></author>
+      <author id="zdenka-uresova"><first>Zdeňka</first><last>Urešová</last></author>
       <author><first>Rosa</first><last>Vallejos</last></author>
       <author><first>Jens E. L.</first><last>Van Gysel</last></author>
       <author><first>Meagan</first><last>Vigus</last></author>
@@ -2730,7 +2730,7 @@
       <author><first>Polina</first><last>Bychkova</last></author>
       <author><first>Alyaxey</first><last>Yaskevich</last></author>
       <author><first>Serafima</first><last>Gyulasaryan</last></author>
-      <author><first>Ekaterina</first><last>Rakhilina</last></author>
+      <author id="ekaterina-v-rakhilina"><first>Ekaterina</first><last>Rakhilina</last></author>
       <pages>2548–2555</pages>
       <abstract>This paper discusses the Routinicon, a new constructicographic resource for the description of conversational routines. Conversational routines are defined as conventional formulaic expressions that language speakers use in standard extralinguistic situations (cf. Bless you! as a reaction to sneezing or Who’s there? as a typical answer to a knock on the door). The Routinicon’s goal is to accumulate the routines that constitute the inventory of conventional expressions in Russian language and systematically describe them in a way that would enable future cross-linguistic comparison and typological research. Conceptually, the Routinicon is a natural extension of such projects as the Russian Constructicon and Pragmaticon. It inherits their approach to the systematization of phraseological units as well as to the data collection. At the same time, the new project focuses on a fundamentally different domain of units and hence offers a radically new structure of linguistic annotation. Its principles and challenges are addressed in the paper.</abstract>
       <url hash="c765d434">2024.lrec-main.230</url>
@@ -2738,9 +2738,9 @@
     </paper>
     <paper id="231">
       <title>Building a Data Infrastructure for a Mid-Resource Language: The Case of <fixed-case>C</fixed-case>atalan</title>
-      <author><first>Aitor</first><last>Gonzalez-Agirre</last></author>
-      <author><first>Montserrat</first><last>Marimon</last></author>
-      <author><first>Carlos</first><last>Rodriguez-Penagos</last></author>
+      <author id="aitor-gonzalez-agirre"><first>Aitor</first><last>Gonzalez-Agirre</last></author>
+      <author id="montserrat-marimon"><first>Montserrat</first><last>Marimon</last></author>
+      <author id="carlos-rodriguez-penagos"><first>Carlos</first><last>Rodriguez-Penagos</last></author>
       <author><first>Javier</first><last>Aula-Blasco</last></author>
       <author><first>Irene</first><last>Baucells</last></author>
       <author><first>Carme</first><last>Armentano-Oller</last></author>
@@ -2756,7 +2756,7 @@
       <title>Building a <fixed-case>J</fixed-case>apanese Document-Level Relation Extraction Dataset Assisted by Cross-Lingual Transfer</title>
       <author><first>Youmi</first><last>Ma</last></author>
       <author><first>An</first><last>Wang</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <pages>2567–2579</pages>
       <abstract>Document-level Relation Extraction (DocRE) is the task of extracting all semantic relationships from a document. While studies have been conducted on English DocRE, limited attention has been given to DocRE in non-English languages. This work delves into effectively utilizing existing English resources to promote DocRE studies in non-English languages, with Japanese as the representative case. As an initial attempt, we construct a dataset by transferring an English dataset to Japanese. However, models trained on such a dataset are observed to suffer from low recalls. We investigate the error cases and attribute the failure to different surface structures and semantics of documents translated from English and those written by native speakers. We thus switch to explore if the transferred dataset can assist human annotation on Japanese documents. In our proposal, annotators edit relation predictions from a model trained on the transferred dataset. Quantitative analysis shows that relation recommendations suggested by the model help reduce approximately 50% of the human edit steps compared with the previous approach. Experiments quantify the performance of existing DocRE models on our collected dataset, portraying the challenges of Japanese and cross-lingual DocRE.</abstract>
       <url hash="4a1aef35">2024.lrec-main.232</url>
@@ -2830,7 +2830,7 @@
       <author><first>Rudy Alexandro</first><last>Garrido Veliz</last></author>
       <author><first>Natia</first><last>Mestvirishvili</last></author>
       <author><first>Alexander</first><last>Panchenko</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <author><first>Irina</first><last>Nikishina</last></author>
       <pages>2657–2672</pages>
       <abstract>Comparative Question Answering (CompQA) is a Natural Language Processing task that combines Question Answering and Argument Mining approaches to answer subjective comparative questions in an efficient argumentative manner. In this paper, we present an end-to-end (full pipeline) system for answering comparative questions called CAM 2.0 as well as a public leaderboard called CompUGE that unifies the existing datasets under a single easy-to-use evaluation suite. As compared to previous web-form-based CompQA systems, it features question identification, object and aspect labeling, stance classification, and summarization using up-to-date models. We also select the most time- and memory-effective pipeline by comparing separately fine-tuned Transformer Encoder models which show state-of-the-art performance on the subtasks with Generative LLMs in few-shot and LoRA setups. We also conduct a user study for a whole-system evaluation.</abstract>
@@ -2864,7 +2864,7 @@
     <paper id="241">
       <title><fixed-case>C</fixed-case>amem<fixed-case>BERT</fixed-case>-bio: Leveraging Continual Pre-training for Cost-Effective Models on <fixed-case>F</fixed-case>rench Biomedical Data</title>
       <author><first>Rian</first><last>Touchent</last></author>
-      <author><first>Éric</first><last>de la Clergerie</last></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Éric</first><last>de la Clergerie</last></author>
       <pages>2692–2701</pages>
       <abstract>Clinical data in hospitals are increasingly accessible for research through clinical data warehouses. However these documents are unstructured and it is therefore necessary to extract information from medical reports to conduct clinical studies. Transfer learning with BERT-like models such as CamemBERT has allowed major advances for French, especially for named entity recognition. However, these models are trained for plain language and are less efficient on biomedical data. Addressing this gap, we introduce CamemBERT-bio, a dedicated French biomedical model derived from a new public French biomedical dataset. Through continual pre-training of the original CamemBERT, CamemBERT-bio achieves an improvement of 2.54 points of F1-score on average across various biomedical named entity recognition tasks, reinforcing the potential of continual pre-training as an equally proficient yet less computationally intensive alternative to training from scratch. Additionally, we highlight the importance of using a standard evaluation protocol that provides a clear view of the current state-of-the-art for French biomedical models.</abstract>
       <url hash="892f3524">2024.lrec-main.241</url>
@@ -2885,7 +2885,7 @@
     <paper id="243">
       <title>Can Factual Statements Be Deceptive? The <fixed-case>D</fixed-case>e<fixed-case>F</fixed-case>a<fixed-case>B</fixed-case>el Corpus of Belief-based Deception</title>
       <author><first>Aswathy</first><last>Velutharambath</last></author>
-      <author><first>Amelie</first><last>Wührl</last></author>
+      <author id="amelie-wuhrl"><first>Amelie</first><last>Wührl</last></author>
       <author><first>Roman</first><last>Klinger</last></author>
       <pages>2708–2723</pages>
       <abstract>If a person firmly believes in a non-factual statement, such as “The Earth is flat”, and argues in its favor, there is no inherent intention to deceive. As the argumentation stems from genuine belief, it may be unlikely to exhibit the linguistic properties associated with deception or lying. This interplay of factuality, personal belief, and intent to deceive remains an understudied area. Disentangling the influence of these variables in argumentation is crucial to gain a better understanding of the linguistic properties attributed to each of them. To study the relation between deception and factuality, based on belief, we present the DeFaBel corpus, a crowd-sourced resource of belief-based deception. To create this corpus, we devise a study in which participants are instructed to write arguments supporting statements like “eating watermelon seeds can cause indigestion”, regardless of its factual accuracy or their personal beliefs about the statement. In addition to the generation task, we ask them to disclose their belief about the statement. The collected instances are labelled as deceptive if the arguments are in contradiction to the participants’ personal beliefs. Each instance in the corpus is thus annotated (or implicitly labelled) with personal beliefs of the author, factuality of the statement, and the intended deceptiveness. The DeFaBel corpus contains 1031 texts in German, out of which 643 are deceptive and 388 are non-deceptive. It is the first publicly available corpus for studying deception in German. In our analysis, we find that people are more confident in the persuasiveness of their arguments when the statement is aligned with their belief, but surprisingly less confident when they are generating arguments in favor of facts. The DeFaBel corpus can be obtained from https://www.ims.uni-stuttgart.de/data/defabel .</abstract>
@@ -2952,7 +2952,7 @@
       <title>Can Large Language Models Learn Translation Robustness from Noisy-Source In-context Demonstrations?</title>
       <author><first>Leiyu</first><last>Pan</last></author>
       <author><first>Yongqi</first><last>Leng</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <pages>2798–2808</pages>
       <abstract>Large language models (LLMs) have been used for machine translation. When provided with prompts and source sentences, LLMs can achieve impressive translation results. However, the robustness of these LLMs remains a significant challenge, as they often struggle to accurately translate sentences in the presence of noise, even when using similarity-based in-context learning methods. This work proposes a research scheme for studying machine translation robustness on LLMs, investigating whether LLMs can learn translation robustness from noisy-source demonstration examples. Through experiments on different models, languages, and noise types, we empirically demonstrate that LLMs can learn how to handle noise and translation methods from noisy-source demonstration examples, thereby improving their translation performance on noisy sentences. Furthermore, we find that increasing the noise ratio appropriately for the noisy-source demonstration examples can enhance the translation robustness of LLMs. Additionally, we also attempt to investigate scenarios where LLMs are more likely to learn translation robustness for mixed and specific types of noise. We find that the model’s performance varies across different noise settings.</abstract>
       <url hash="4a5a54c9">2024.lrec-main.249</url>
@@ -2963,7 +2963,7 @@
       <author><first>Shaoxiong</first><last>Ji</last></author>
       <author><first>Timothee</first><last>Mickus</last></author>
       <author><first>Vincent</first><last>Segonne</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>2809–2818</pages>
       <abstract>Multilingual pretraining and fine-tuning have remarkably succeeded in various natural language processing tasks. Transferring representations from one language to another is especially crucial for cross-lingual learning. One can expect machine translation objectives to be well suited to fostering such capabilities, as they involve the explicit alignment of semantically equivalent sentences from different languages. This paper investigates the potential benefits of employing machine translation as a continued training objective to enhance language representation learning, bridging multilingual pretraining and cross-lingual applications. We study this question through two lenses: a quantitative evaluation of the performance of existing models and an analysis of their latent representations. Our results show that, contrary to expectations, machine translation as the continued training fails to enhance cross-lingual representation learning in multiple cross-lingual natural language understanding tasks. We conclude that explicit sentence-level alignment in the cross-lingual scenario is detrimental to cross-lingual transfer pretraining, which has important implications for future cross-lingual transfer studies. We furthermore provide evidence through similarity measures and investigation of parameters that this lack of positive influence is due to output separability—which we argue is of use for machine translation but detrimental elsewhere.</abstract>
       <url hash="ef1d437c">2024.lrec-main.250</url>
@@ -2999,8 +2999,8 @@
     </paper>
     <paper id="253">
       <title>Can We Identify Stance without Target Arguments? A Study for Rumour Stance Classification</title>
-      <author id="yue-li"><first>Yue</first><last>Li</last></author>
-      <author><first>Carolina</first><last>Scarton</last></author>
+      <author><first>Yue</first><last>Li</last></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last></author>
       <pages>2844–2851</pages>
       <abstract>Considering a conversation thread, rumour stance classification aims to identify the opinion (e.g. agree or disagree) of replies towards a target (rumour story). Although the target is expected to be an essential component in traditional stance classification, we show that rumour stance classification datasets contain a considerable amount of real-world data whose stance could be naturally inferred directly from the replies, contributing to the strong performance of the supervised models without awareness of the target. We find that current target-aware models underperform in cases where the context of the target is crucial. Finally, we propose a simple yet effective framework to enhance reasoning with the targets, achieving state-of-the-art performance on two benchmark datasets.</abstract>
       <url hash="b73356fa">2024.lrec-main.253</url>
@@ -3062,7 +3062,7 @@
     <paper id="259">
       <title>Causal Intersectionality and Dual Form of Gradient Descent for Multimodal Analysis: A Case Study on Hateful Memes</title>
       <author><first>Yosuke</first><last>Miyanishi</last></author>
-      <author><first>Minh Le</first><last>Nguyen</last></author>
+      <author id="minh-le-nguyen"><first>Minh Le</first><last>Nguyen</last></author>
       <pages>2901–2916</pages>
       <abstract>Amidst the rapid expansion of Machine Learning (ML) and Large Language Models (LLMs), understanding the semantics within their mechanisms is vital. Causal analyses define semantics, while gradient-based methods are essential to eXplainable AI (XAI), interpreting the model’s ‘black box’. Integrating these, we investigate how a model’s mechanisms reveal its causal effect on evidence-based decision-making. Research indicates intersectionality - the combined impact of an individual’s demographics - can be framed as an Average Treatment Effect (ATE). This paper demonstrates that hateful meme detection can be viewed as an ATE estimation using intersectionality principles, and summarized gradient-based attention scores highlight distinct behaviors of three Transformer models. We further reveal that LLM Llama-2 can discern the intersectional aspects of the detection through in-context learning and that the learning process could be explained via meta-gradient, a secondary form of gradient. In conclusion, this work furthers the dialogue on Causality and XAI. Our code is available online (see External Resources section).</abstract>
       <url hash="337509f4">2024.lrec-main.259</url>
@@ -3071,7 +3071,7 @@
     <paper id="260">
       <title><fixed-case>CBBQ</fixed-case>: A <fixed-case>C</fixed-case>hinese Bias Benchmark Dataset Curated with Human-<fixed-case>AI</fixed-case> Collaboration for Large Language Models</title>
       <author><first>Yufei</first><last>Huang</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <pages>2917–2929</pages>
       <abstract>Holistically measuring societal biases of large language models is crucial for detecting and reducing ethical risks in highly capable AI models. In this work, we present a Chinese Bias Benchmark dataset that consists of over 100K questions jointly constructed by human experts and generative language models, covering stereotypes and societal biases in 14 social dimensions related to Chinese culture and values. The curation process contains 4 essential steps: bias identification, ambiguous context generation, AI-assisted disambiguous context generation, and manual review and recomposition. The testing instances in the dataset are automatically derived from 3K+ high-quality templates manually authored with stringent quality control. The dataset exhibits wide coverage and high diversity. Extensive experiments demonstrate the effectiveness of the dataset in evaluating model bias, with all 12 publicly available Chinese large language models exhibiting strong bias in certain categories. Additionally, we observe from our experiments that fine-tuned models could, to a certain extent, heed instructions and avoid generating harmful outputs, in the way of “moral self-correction”. Our dataset is available at https://anonymous.4open.science/r/CBBQ-B860/.</abstract>
       <url hash="3ff8b696">2024.lrec-main.260</url>
@@ -3130,7 +3130,7 @@
       <title><fixed-case>C</fixed-case>hain<fixed-case>LM</fixed-case>: Empowering Large Language Models with Improved Chain-of-Thought Prompting</title>
       <author><first>Xiaoxue</first><last>Cheng</last></author>
       <author><first>Junyi</first><last>Li</last></author>
-      <author><first>Wayne Xin</first><last>Zhao</last></author>
+      <author id="wayne-xin-zhao"><first>Wayne Xin</first><last>Zhao</last></author>
       <author><first>Ji-Rong</first><last>Wen</last></author>
       <pages>2969–2983</pages>
       <abstract>Chain-of-Thought (CoT) prompting can enhance the reasoning capabilities of large language models (LLMs), establishing itself as a primary approach to solving complex reasoning tasks. Existing CoT synthesis approaches usually focus on simpler reasoning tasks and thus result in low-quality and inconsistent CoT prompts. In response to this challenge, we present an empirical investigation of CoT prompting and introduce CoTGenius, a novel framework designed for the automatic generation of superior CoT prompts. CoTGenius is developed based on three major evolution strategies, i.e., complicate, diversify, and specify—alongside two filtering mechanisms: evolutionary success judgement and correctness verification. We further employ CoTGenius to create an extensive CoT dataset, and subsequently fine-tune the Llama 2-Chat 7B and 13B models on this dataset. We call the resulting model ChainLM. To deal with the cumulative error issue in reasoning steps, we propose a step-level debating method, wherein multiple debaters discuss each reasoning step to arrive at the correct answer. Extensive experiments demonstrate that our ChainLM models exhibit enhanced proficiency in addressing a spectrum of complex reasoning problems compared to existing models. In addition, we conduct an in-depth analysis of the impact of data categories within CoTGenius on the model performance. We release our dataset and code at https://github.com/RUCAIBox/ChainLM.</abstract>
@@ -3142,7 +3142,7 @@
       <author><first>Rowan Hall</first><last>Maudslay</last></author>
       <author><first>Simone</first><last>Teufel</last></author>
       <author><first>Francis</first><last>Bond</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <pages>2984–2996</pages>
       <abstract>The senses of a word exhibit rich internal structure. In a typical lexicon, this structure is overlooked: A word’s senses are encoded as a list, without inter-sense relations. We present ChainNet, a lexical resource which for the first time explicitly identifies these structures, by expressing how senses in the Open English Wordnet are derived from one another. In ChainNet, every nominal sense of a word is either connected to another sense by metaphor or metonymy, or is disconnected (in the case of homonymy). Because WordNet senses are linked to resources which capture information about their meaning, ChainNet represents the first dataset of grounded metaphor and metonymy.</abstract>
       <url hash="0306cc8c">2024.lrec-main.266</url>
@@ -3151,7 +3151,7 @@
     <paper id="267">
       <title>Challenges in Pre-Training Graph Neural Networks for Context-Based Fake News Detection: An Evaluation of Current Strategies and Resource Limitations</title>
       <author><first>Gregor</first><last>Donabauer</last></author>
-      <author><first>Udo</first><last>Kruschwitz</last></author>
+      <author id="udo-kruschwitz"><first>Udo</first><last>Kruschwitz</last></author>
       <pages>2997–3004</pages>
       <abstract>Pre-training of neural networks has recently revolutionized the field of Natural Language Processing (NLP) and has before demonstrated its effectiveness in computer vision. At the same time, advances around the detection of fake news were mainly driven by the context-based paradigm, where different types of signals (e.g. from social media) form graph-like structures that hold contextual information apart from the news article to classify. We propose to merge these two developments by applying pre-training of Graph Neural Networks (GNNs) in the domain of context-based fake news detection. Our experiments provide an evaluation of different pre-training strategies for graph-based misinformation detection and demonstrate that transfer learning does currently not lead to significant improvements over training a model from scratch in the domain. We argue that a major current issue is the lack of suitable large-scale resources that can be used for pre-training.</abstract>
       <url hash="6a86453e">2024.lrec-main.267</url>
@@ -3160,7 +3160,7 @@
     <paper id="268">
       <title>Challenging Negative Gender Stereotypes: A Study on the Effectiveness of Automated Counter-Stereotypes</title>
       <author><first>Isar</first><last>Nejadgholi</last></author>
-      <author><first>Kathleen C.</first><last>Fraser</last></author>
+      <author id="kathleen-c-fraser"><first>Kathleen C.</first><last>Fraser</last></author>
       <author><first>Anna</first><last>Kerkhof</last></author>
       <author><first>Svetlana</first><last>Kiritchenko</last></author>
       <pages>3005–3015</pages>
@@ -3184,7 +3184,7 @@
       <author><first>Leonardo</first><last>Zilio</last></author>
       <author><first>Shenbin</first><last>Qian</last></author>
       <author><first>Diptesh</first><last>Kanojia</last></author>
-      <author><first>Constantin</first><last>Orasan</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orasan</last></author>
       <pages>3028–3037</pages>
       <abstract>Abbreviations and their associated long forms are important textual elements that are present in almost every scientific communication, and having information about these forms can help improve several NLP tasks. In this paper, our aim is to fine-tune language models for automatically identifying abbreviations and long forms. We used existing datasets which are annotated with abbreviations and long forms to train and test several language models, including transformer models, character-level language models, stacking of different embeddings, and ensemble methods. Our experiments showed that it was possible to achieve state-of-the-art results by stacking RoBERTa embeddings with domain-specific embeddings. However, the analysis of our first run showed that one of the datasets had issues in the BIO annotation, which led us to propose a revised dataset. After re-training selected models on the revised dataset, results show that character-level models achieve comparable results, especially when detecting abbreviations, but both RoBERTa large and the stacking of embeddings presented better results on biomedical data. When tested on a different subdomain (segments extracted from computer science texts), an ensemble method proved to yield the best results for the detection of long forms, and a character-level model had the best performance in detecting abbreviations.</abstract>
       <url hash="cceda49a">2024.lrec-main.270</url>
@@ -3195,7 +3195,7 @@
       <author><first>Martin</first><last>Popel</last></author>
       <author><first>Lucie</first><last>Polakova</last></author>
       <author><first>Michal</first><last>Novák</last></author>
-      <author><first>Jindřich</first><last>Helcl</last></author>
+      <author id="jindrich-helcl"><first>Jindřich</first><last>Helcl</last></author>
       <author><first>Jindřich</first><last>Libovický</last></author>
       <author><first>Pavel</first><last>Straňák</last></author>
       <author><first>Tomas</first><last>Krabac</last></author>
@@ -3234,7 +3234,7 @@
       <author><first>Jingjing</first><last>Wang</last></author>
       <author><first>Jiamin</first><last>Luo</last></author>
       <author><first>Tao</first><last>Zeng</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>3075–3085</pages>
       <abstract>Aspect Sentiment Understanding (ASU) in interactive scenarios (e.g., Question-Answering and Dialogue) has attracted ever-more interest in recent years and achieved important progresses. However, existing studies on interactive ASU largely ignore the coreference issue for opinion targets (i.e., aspects), while this phenomenon is ubiquitous in interactive scenarios especially dialogues, limiting the ASU performance. Recently, large language models (LLMs) shows the powerful ability to integrate various NLP tasks with the chat paradigm. In this way, this paper proposes a new Chat-based Aspect Sentiment Understanding (ChatASU) task, aiming to explore LLMs’ ability in understanding aspect sentiments in dialogue scenarios. Particularly, this ChatASU task introduces a sub-task, i.e., Aspect Chain Reasoning (ACR) task, to address the aspect coreference issue. On this basis, we propose a Trusted Self-reflexion Approach (TSA) with ChatGLM as backbone to ChatASU. Specifically, this TSA treats the ACR task as an auxiliary task to boost the performance of the primary ASU task, and further integrates trusted learning into reflexion mechanisms to alleviate the LLMs-intrinsic factual hallucination problem in TSA. Furthermore, a high-quality ChatASU dataset is annotated to evaluate TSA, and extensive experiments show that our proposed TSA can significantly outperform several state-of-the-art baselines, justifying the effectiveness of TSA to ChatASU and the importance of considering the coreference and hallucination issues in ChatASU.</abstract>
       <url hash="6b4b48b3">2024.lrec-main.274</url>
@@ -3310,8 +3310,8 @@
       <author><first>Cassandre</first><last>Armand</last></author>
       <author><first>Chiara</first><last>Mazzocconi</last></author>
       <author><first>Shreejata</first><last>Gupta</last></author>
-      <author><first>Laurent</first><last>Prévot</last></author>
-      <author><first>Benoit</first><last>Favre</last></author>
+      <author id="laurent-prevot"><first>Laurent</first><last>Prévot</last></author>
+      <author id="benoit-favre"><first>Benoit</first><last>Favre</last></author>
       <author><first>Leonor</first><last>Becerra-Bonache</last></author>
       <author><first>Abdellah</first><last>Fourtassi</last></author>
       <pages>3153–3164</pages>
@@ -3357,7 +3357,7 @@
     <paper id="284">
       <title>Chitchat as Interference: Adding User Backstories to Task-Oriented Dialogues</title>
       <author><first>Armand</first><last>Stricker</last></author>
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <pages>3203–3214</pages>
       <abstract>During task-oriented dialogues (TODs), human users naturally introduce chitchat that is beyond the immediate scope of the task, interfering with the flow of the conversation. To address this issue without the need for expensive manual data creation, we use few-shot prompting with Llama-2-70B to enhance the MultiWOZ dataset with user backstories, a typical example of chitchat interference in TODs. We assess the impact of this addition by testing two models: one trained solely on TODs and another trained on TODs with a preliminary chitchat interaction. Our analysis demonstrates that our enhanced dataset poses a challenge for these systems. Moreover, we demonstrate that our dataset can be effectively used for training purposes, enabling a system to consistently acknowledge the user’s backstory while also successfully moving the task forward in the same turn, as confirmed by human evaluation. These findings highlight the benefits of generating novel chitchat-TOD scenarios to test TOD systems more thoroughly and improve their resilience to natural user interferences.</abstract>
       <url hash="a9dae725">2024.lrec-main.284</url>
@@ -3419,7 +3419,7 @@
       <author><first>Xiaolong</first><last>Jin</last></author>
       <author><first>Long</first><last>Bai</last></author>
       <author><first>Jiafeng</first><last>Guo</last></author>
-      <author><first>Xueqi</first><last>Cheng</last></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last></author>
       <pages>3261–3270</pages>
       <abstract>Event detection is one of the fundamental tasks in information extraction and knowledge graph. However, a realistic event detection system often needs to deal with new event classes constantly. These new classes usually have only a few labeled instances as it is time-consuming and labor-intensive to annotate a large number of unlabeled instances. Therefore, this paper proposes a new task, called class-incremental few-shot event detection. Nevertheless, there are two problems (i.e., old knowledge forgetting and new class overfitting) in this task. To solve these problems, this paper further presents a novel knowledge distillation and prompt learning based method, called Prompt-KD. Specifically, to reduce the forgetting issue about old knowledge, Prompt-KD develops an attention based multi-teacher knowledge distillation framework, where the ancestor teacher model pre-trained on base classes is reused in all learning sessions, and the father teacher model derives the current student model via adaptation. On the other hand, in order to cope with the few-shot learning scenario and alleviate the corresponding new class overfitting problem, Prompt-KD is also equipped with a prompt learning mechanism. Extensive experiments on two benchmark datasets, i.e., FewEvent and MAVEN, demonstrate the state-of-the-art performance of Prompt-KD.</abstract>
       <url hash="35860938">2024.lrec-main.290</url>
@@ -3458,7 +3458,7 @@
       <author><first>Fan</first><last>Xu</last></author>
       <author><first>Lei</first><last>Zeng</last></author>
       <author><first>Bowei</first><last>Zou</last></author>
-      <author><first>Ai Ti</first><last>Aw</last></author>
+      <author id="aiti-aw"><first>Ai Ti</first><last>Aw</last></author>
       <author><first>Huan</first><last>Rong</last></author>
       <pages>3314–3324</pages>
       <abstract>In an era where rumors can propagate rapidly across social media platforms such as Twitter and Weibo, automatic rumor detection has garnered considerable attention from both academia and industry. Existing multimodal rumor detection models often overlook the intricacies of sample difficulty, e.g., text-level difficulty, image-level difficulty, and multimodal-level difficulty, as well as their order when training. Inspired by the concept of curriculum learning, we propose the Curriculum Learning and Fine-grained Fusion-driven multimodal Rumor Detection (CLFFRD) framework, which employs curriculum learning to automatically select and train samples according to their difficulty at different training stages. Furthermore, we introduce a fine-grained fusion strategy that unifies entities from text and objects from images, enhancing their semantic cohesion. We also propose a novel data augmentation method that utilizes linear interpolation between textual and visual modalities to generate diverse data. Additionally, our approach incorporates deep fusion for both intra-modality (e.g., text entities and image objects) and inter-modality (e.g., CLIP and social graph) features. Extensive experimental results demonstrate that CLFFRD outperforms state-of-the-art models on both English and Chinese benchmark datasets for rumor detection in social media.</abstract>
@@ -3488,7 +3488,7 @@
       <author><first>Philhoon</first><last>Oh</last></author>
       <author><first>Haneul</first><last>Yoo</last></author>
       <author><first>James</first><last>Thorne</last></author>
-      <author><first>Alice</first><last>Oh</last></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last></author>
       <pages>3335–3346</pages>
       <abstract>Despite the rapid development of large language models (LLMs) for the Korean language, there remains an obvious lack of benchmark datasets that test the requisite Korean cultural and linguistic knowledge. Because many existing Korean benchmark datasets are derived from the English counterparts through translation, they often overlook the different cultural contexts. For the few benchmark datasets that are sourced from Korean data capturing cultural knowledge, only narrow tasks such as hate speech detection are offered. To address this gap, we introduce a benchmark of Cultural and Linguistic Intelligence in Korean (CLIcK), a dataset comprising 1,995 QA pairs. CLIcK sources its data from official Korean exams and textbooks, partitioning the questions into eleven categories under the two main categories of language and culture. For each instance in click, we provide fine-grained annotation of which cultural and linguistic knowledge is required to correctly answer the question. Using CLIcK, we test 13 language models to assess their performance. Our evaluation uncovers insights into their performances across the categories, as well as the diverse factors affecting their comprehension. CLIcK offers the first large-scale comprehensive Korean-centric analysis of LLMs’ proficiency in Korean language and culture.</abstract>
       <url hash="b48fcc9b">2024.lrec-main.296</url>
@@ -3593,7 +3593,7 @@
       <title><fixed-case>C</fixed-case>o<fixed-case>C</fixed-case>o<fixed-case>MIC</fixed-case>: Code Completion by Jointly Modeling In-file and Cross-file Context</title>
       <author><first>Yangruibo</first><last>Ding</last></author>
       <author><first>Zijian</first><last>Wang</last></author>
-      <author><first>Wasi</first><last>Ahmad</last></author>
+      <author id="wasi-ahmad"><first>Wasi</first><last>Ahmad</last></author>
       <author><first>Murali Krishna</first><last>Ramanathan</last></author>
       <author><first>Ramesh</first><last>Nallapati</last></author>
       <author><first>Parminder</first><last>Bhatia</last></author>
@@ -3618,7 +3618,7 @@
       <title>Code-Mixed Probes Show How Pre-Trained Models Generalise on Code-Switched Text</title>
       <author><first>Frances Adriana</first><last>Laureano De Leon</last></author>
       <author><first>Harish</first><last>Tayyar Madabushi</last></author>
-      <author><first>Mark</first><last>Lee</last></author>
+      <author id="mark-lee"><first>Mark</first><last>Lee</last></author>
       <pages>3457–3468</pages>
       <abstract>Code-switching is a prevalent linguistic phenomenon in which multilingual individuals seamlessly alternate between languages. Despite its widespread use online and recent research trends in this area, research in code-switching presents unique challenges, primarily stemming from the scarcity of labelled data and available resources. In this study we investigate how pre-trained Language Models handle code-switched text in three dimensions: a) the ability of PLMs to detect code-switched text, b) variations in the structural information that PLMs utilise to capture code-switched text, and c) the consistency of semantic information representation in code-switched text. To conduct a systematic and controlled evaluation of the language models in question, we create a novel dataset of well-formed naturalistic code-switched text along with parallel translations into the source languages. Our findings reveal that pre-trained language models are effective in generalising to code-switched text, shedding light on abilities of these models to generalise representations to CS corpora. We release all our code and data, including the novel corpus, at https://github.com/francesita/code-mixed-probes.</abstract>
       <url hash="f41f3e9e">2024.lrec-main.307</url>
@@ -3628,7 +3628,7 @@
       <title>Code-Mixed Text Augmentation for <fixed-case>L</fixed-case>atvian <fixed-case>ASR</fixed-case></title>
       <author><first>Martins</first><last>Kronis</last></author>
       <author><first>Askars</first><last>Salimbajevs</last></author>
-      <author><first>Mārcis</first><last>Pinnis</last></author>
+      <author id="marcis-pinnis"><first>Mārcis</first><last>Pinnis</last></author>
       <pages>3469–3479</pages>
       <abstract>Code-mixing has become mainstream in the modern, globalised world and affects low-resource languages, such as Latvian, in particular. Solutions to developing an automatic speech recognition system (ASR) for code-mixed speech often rely on specially created audio-text corpora, which are expensive and time-consuming to create. In this work, we attempt to tackle code-mixed Latvian-English speech recognition by improving the language model (LM) of a hybrid ASR system. We make a distinction between inflected transliterations and phonetic transcriptions as two different foreign word types. We propose an inflected transliteration model and a phonetic transcription model for the automatic generation of said word types. We then leverage a large human-translated English-Latvian parallel text corpus to generate synthetic code-mixed Latvian sentences by substituting in generated foreign words. Using the newly created augmented corpora, we train a new LM and combine it with our existing Latvian acoustic model (AM). For evaluation, we create a specialised foreign word test set on which our methods yield up to 15% relative CER improvement. We then further validate these results in a human evaluation campaign.</abstract>
       <url hash="e103b054">2024.lrec-main.308</url>
@@ -3649,7 +3649,7 @@
       <author><first>Yufeng</first><last>Chen</last></author>
       <author><first>Ning</first><last>Cheng</last></author>
       <author><first>Xingyu</first><last>Cui</last></author>
-      <author><first>Jinan</first><last>Xu</last></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last></author>
       <author><first>Wenjuan</first><last>Han</last></author>
       <pages>3490–3506</pages>
       <abstract>In order to construct or extend entity-centric and event-centric knowledge graphs (KG and EKG), the information extraction (IE) annotation toolkit is essential. However, existing IE toolkits have several non-trivial problems, such as not supporting multi-tasks, and not supporting automatic updates. In this work, we present CollabKG, a learnable human-machine-cooperative IE toolkit for KG and EKG construction. Specifically, for the multi-task issue, CollabKG unifies different IE subtasks, including named entity recognition (NER), entity-relation triple extraction (RE), and event extraction (EE), and supports both KG and EKG. Then, combining advanced prompting-based IE technology, the human-machine-cooperation mechanism with Large Language Models (LLMs) as the assistant machine is presented which can provide a lower cost as well as a higher performance. Lastly, owing to the two-way interaction between the human and machine, CollabKG with learning ability allows self-renewal. Besides, CollabKG has several appealing features (e.g., customization, training-free, and label propagation) that make the system powerful and high-productivity. We holistically compare our toolkit with other existing tools on these features. Human evaluation quantitatively illustrates that CollabKG significantly improves annotation quality, efficiency, and stability simultaneously.</abstract>
@@ -3680,7 +3680,7 @@
       <title>Collecting Linguistic Resources for Assessing Children’s Pronunciation of <fixed-case>N</fixed-case>ordic Languages</title>
       <author><first>Anne Marte Haug</first><last>Olstad</last></author>
       <author><first>Anna</first><last>Smolander</last></author>
-      <author><first>Sofia</first><last>Strömbergsson</last></author>
+      <author id="sofia-stronbergsson"><first>Sofia</first><last>Strömbergsson</last></author>
       <author><first>Sari</first><last>Ylinen</last></author>
       <author><first>Minna</first><last>Lehtonen</last></author>
       <author><first>Mikko</first><last>Kurimo</last></author>
@@ -3730,23 +3730,23 @@
     <paper id="317">
       <title>Common <fixed-case>E</fixed-case>uropean Language Data Space</title>
       <author><first>Georg</first><last>Rehm</last></author>
-      <author><first>Stelios</first><last>Piperidis</last></author>
-      <author><first>Khalid</first><last>Choukri</last></author>
-      <author><first>Andrejs</first><last>Vasiļjevs</last></author>
+      <author id="stelios-piperidis"><first>Stelios</first><last>Piperidis</last></author>
+      <author id="khalid-choukri"><first>Khalid</first><last>Choukri</last></author>
+      <author id="andrejs-vasiljevs"><first>Andrejs</first><last>Vasiļjevs</last></author>
       <author><first>Katrin</first><last>Marheinecke</last></author>
       <author><first>Victoria</first><last>Arranz</last></author>
       <author><first>Aivars</first><last>Bērziņš</last></author>
       <author><first>Miltos</first><last>Deligiannis</last></author>
-      <author><first>Dimitris</first><last>Galanis</last></author>
+      <author id="dimitrios-galanis"><first>Dimitris</first><last>Galanis</last></author>
       <author><first>Maria</first><last>Giagkou</last></author>
       <author><first>Katerina</first><last>Gkirtzou</last></author>
       <author><first>Dimitris</first><last>Gkoumas</last></author>
       <author><first>Annika</first><last>Grützner-Zahn</last></author>
       <author><first>Athanasia</first><last>Kolovou</last></author>
-      <author><first>Penny</first><last>Labropoulou</last></author>
+      <author id="penny-labropoulou"><first>Penny</first><last>Labropoulou</last></author>
       <author><first>Andis</first><last>Lagzdiņš</last></author>
       <author><first>Elena</first><last>Leitner</last></author>
-      <author><first>Valérie</first><last>Mapelli</last></author>
+      <author id="valerie-mapelli"><first>Valérie</first><last>Mapelli</last></author>
       <author><first>Hélène</first><last>Mazo</last></author>
       <author><first>Simon</first><last>Ostermann</last></author>
       <author><first>Stefania</first><last>Racioppa</last></author>
@@ -3769,7 +3769,7 @@
       <author><first>Benjamin A.</first><last>Ibarra</last></author>
       <author><first>Nathaniel</first><last>Blanchard</last></author>
       <author><first>Nikhil</first><last>Krishnaswamy</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <pages>3587–3602</pages>
       <abstract>Within Dialogue Modeling research in AI and NLP, considerable attention has been spent on “dialogue state tracking” (DST), which is the ability to update the representations of the speaker’s needs at each turn in the dialogue by taking into account the past dialogue moves and history. Less studied but just as important to dialogue modeling, however, is “common ground tracking” (CGT), which identifies the shared belief space held by all of the participants in a task-oriented dialogue: the task-relevant propositions all participants accept as true. In this paper we present a method for automatically identifying the current set of shared beliefs and ”questions under discussion” (QUDs) of a group with a shared goal. We annotate a dataset of multimodal interactions in a shared physical space with speech transcriptions, prosodic features, gestures, actions, and facets of collaboration, and operationalize these features for use in a deep neural model to predict moves toward construction of common ground. Model outputs cascade into a set of formal closure rules derived from situated evidence and belief axioms and update operations. We empirically assess the contribution of each feature type toward successful construction of common ground relative to ground truth, establishing a benchmark in this novel, challenging task.</abstract>
       <url hash="163e5072">2024.lrec-main.318</url>
@@ -3801,7 +3801,7 @@
     <paper id="321">
       <title>Comparison of Conventional Hybrid and <fixed-case>CTC</fixed-case>/Attention Decoders for Continuous Visual Speech Recognition</title>
       <author><first>David</first><last>Gimeno-Gómez</last></author>
-      <author><first>Carlos-D.</first><last>Martínez-Hinarejos</last></author>
+      <author id="carlos-d-martinez-hinarejos"><first>Carlos-D.</first><last>Martínez-Hinarejos</last></author>
       <pages>3628–3638</pages>
       <abstract>Thanks to the rise of deep learning and the availability of large-scale audio-visual databases, recent advances have been achieved in Visual Speech Recognition (VSR). Similar to other speech processing tasks, these end-to-end VSR systems are usually based on encoder-decoder architectures. While encoders are somewhat general, multiple decoding approaches have been explored, such as the conventional hybrid model based on Deep Neural Networks combined with Hidden Markov Models (DNN-HMM) or the Connectionist Temporal Classification (CTC) paradigm. However, there are languages and tasks in which data is scarce, and in this situation, there is not a clear comparison between different types of decoders. Therefore, we focused our study on how the conventional DNN-HMM decoder and its state-of-the-art CTC/Attention counterpart behave depending on the amount of data used for their estimation. We also analyzed to what extent our visual speech features were able to adapt to scenarios for which they were not explicitly trained, either considering a similar dataset or another collected for a different language. Results showed that the conventional paradigm reached recognition rates that improve the CTC/Attention model in data-scarcity scenarios along with a reduced training time and fewer parameters.</abstract>
       <url hash="0b8d6ca6">2024.lrec-main.321</url>
@@ -3821,7 +3821,7 @@
     <paper id="323">
       <title>Complex Word Identification: A Comparative Study between <fixed-case>C</fixed-case>hat<fixed-case>GPT</fixed-case> and a Dedicated Model for This Task</title>
       <author><first>Abdelhak</first><last>Kelious</last></author>
-      <author><first>Mathieu</first><last>Constant</last></author>
+      <author id="matthieu-constant"><first>Mathieu</first><last>Constant</last></author>
       <author><first>Christophe</first><last>Coeur</last></author>
       <pages>3645–3653</pages>
       <abstract>There are several works in natural language processing for identifying lexical complexity. This can be for various reasons, either for simplification, the selection of more suitable content, or for other specific tasks. Words can have multiple definitions and degrees of complexity depending on the context in which they appear. One solution being investigated is lexical complexity prediction, where computational methods are used to evaluate the difficulty of vocabulary for language learners and offer personalized assistance. In this work, we explore deep learning methods to assess the complexity of a word based on its context. Specifically, we investigate how to use pre-trained language models to encode both the sentence and the target word, and then fine-tune them by combining them with additional frequency-based features. Our approach achieved superior results compared to the best systems in SemEval-2021 (Shardlow et al., 2021), as demonstrated by an R2 score of 0.65. Finally, we carry out a comparative study with ChatGPT to assess its potential for predicting lexical complexity, to see whether prompt engineering can be an alternative to this task, we will discuss the advantages and limitations of ChatGPT.</abstract>
@@ -3859,7 +3859,7 @@
       <title>Computational Modelling of Plurality and Definiteness in <fixed-case>C</fixed-case>hinese Noun Phrases</title>
       <author><first>Yuqi</first><last>Liu</last></author>
       <author><first>Guanyi</first><last>Chen</last></author>
-      <author><first>Kees</first><last>van Deemter</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
       <pages>3666–3676</pages>
       <abstract>Theoretical linguists have suggested that some languages (e.g., Chinese and Japanese) are “cooler” than other languages based on the observation that the intended meaning of phrases in these languages depends more on their contexts. As a result, many expressions in these languages are shortened, and their meaning is inferred from the context. In this paper, we focus on the omission of the plurality and definiteness markers in Chinese noun phrases (NPs) to investigate the predictability of their intended meaning given the contexts. To this end, we built a corpus of Chinese NPs, each of which is accompanied by its corresponding context, and by labels indicating its singularity/plurality and definiteness/indefiniteness. We carried out corpus assessments and analyses. The results suggest that Chinese speakers indeed drop plurality and definiteness markers very frequently. Building on the corpus, we train a bank of computational models using both classic machine learning models and state-of-the-art pre-trained language models to predict the plurality and definiteness of each NP. We report on the performance of these models and analyse their behaviours.</abstract>
       <url hash="b9b934d3">2024.lrec-main.325</url>
@@ -3880,10 +3880,10 @@
     <paper id="327">
       <title>Conceptual Pacts for Reference Resolution Using Small, Dynamically Constructed Language Models: A Study in Puzzle Building Dialogues</title>
       <author><first>Julian</first><last>Hough</last></author>
-      <author><first>Sina</first><last>Zarrieß</last></author>
-      <author><first>Casey</first><last>Kennington</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
+      <author id="casey-kennington"><first>Casey</first><last>Kennington</last></author>
       <author><first>David</first><last>Schlangen</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>3689–3699</pages>
       <abstract>Using Brennan and Clark’s theory of a Conceptual Pact, that when interlocutors agree on a name for an object, they are forming a temporary agreement on how to conceptualize that object, we present an extension to a simple reference resolver which simulates this process over time with different conversation pairs. In a puzzle construction domain, we model pacts with small language models for each referent which update during the interaction. When features from these pact models are incorporated into a simple bag-of-words reference resolver, the accuracy increases compared to using a standard pre-trained model. The model performs equally to a competitor using the same data but with exhaustive re-training after each prediction, while also being more transparent, faster and less resource-intensive. We also experiment with reducing the number of training interactions, and can still achieve reference resolution accuracies of over 80% in testing from observing a single previous interaction, over 20% higher than a pre-trained baseline. While this is a limited domain, we argue the model could be applicable to larger real-world applications in human and human-robot interaction and is an interpretable and transparent model.</abstract>
       <url hash="0dd0ec69">2024.lrec-main.327</url>
@@ -3897,11 +3897,11 @@
       <author><first>Jing</first><last>Liu</last></author>
       <author><first>Desh</first><last>Raj</last></author>
       <author><first>Leibny Paola</first><last>Garcia</last></author>
-      <author><first>Alexei V.</first><last>Ivanov</last></author>
+      <author id="alexei-v-ivanov"><first>Alexei V.</first><last>Ivanov</last></author>
       <author><first>Patrick</first><last>Ehlen</last></author>
       <author><first>Mingzhi</first><last>Yu</last></author>
       <author><first>Dan</first><last>Povey</last></author>
-      <author><first>Sanjeev</first><last>Khudanpur</last></author>
+      <author id="sanjeev-khudanpur"><first>Sanjeev</first><last>Khudanpur</last></author>
       <pages>3700–3706</pages>
       <abstract>Knowing the particular context associated with a conversation can help improving the performance of an automatic speech recognition (ASR) system. For example, if we are provided with a list of in-context words or phrases — such as the speaker’s contacts or recent song playlists — during inference, we can bias the recognition process towards this list. There are many works addressing contextual ASR; however, there is few publicly available real benchmark for evaluation, making it difficult to compare different solutions. To this end, we provide a corpus (“ConEC”) and baselines to evaluate contextual ASR approaches, grounded on real-world applications. The ConEC corpus is based on public-domain earnings calls (ECs) and associated supplementary materials, such as presentation slides, earnings news release as well as a list of meeting participants’ names and affiliations. We demonstrate that such real contexts are noisier than artificially synthesized contexts that contain the ground truth, yet they still make great room for future improvement of contextual ASR technology</abstract>
       <url hash="fd5a1ec2">2024.lrec-main.328</url>
@@ -3999,9 +3999,9 @@
       <author><first>Shijia</first><last>Zhou</last></author>
       <author><first>Leonie</first><last>Weissweiler</last></author>
       <author><first>Taiqi</first><last>He</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
-      <author><first>David R.</first><last>Mortensen</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
+      <author id="david-r-mortensen"><first>David R.</first><last>Mortensen</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
       <pages>3804–3811</pages>
       <abstract>In this paper, we make a contribution that can be understood from two perspectives: from an NLP perspective, we introduce a small challenge dataset for NLI with large lexical overlap, which minimises the possibility of models discerning entailment solely based on token distinctions, and show that GPT-4 and Llama 2 fail it with strong bias. We then create further challenging sub-tasks in an effort to explain this failure. From a Computational Linguistics perspective, we identify a group of constructions with three classes of adjectives which cannot be distinguished by surface features. This enables us to probe for LLM’s understanding of these constructions in various ways, and we find that they fail in a variety of ways to distinguish between them, suggesting that they don’t adequately represent their meaning or capture the lexical properties of phrasal heads.</abstract>
       <url hash="86ad7ce0">2024.lrec-main.336</url>
@@ -4015,7 +4015,7 @@
       <author><first>Kaiyu</first><last>Huang</last></author>
       <author><first>Anqi</first><last>Zhao</last></author>
       <author><first>Junpeng</first><last>Liu</last></author>
-      <author><first>Degen</first><last>Huang</last></author>
+      <author id="degen-huang"><first>Degen</first><last>Huang</last></author>
       <pages>3812–3824</pages>
       <abstract>Previous studies employ the autoregressive translation (AT) paradigm in the document-to-document neural machine translation. These methods extend the translation unit from a single sentence to a pseudo-document and encodes the full pseudo-document, avoiding the redundant computation problem in context. However, the AT methods cannot parallelize decoding and struggle with error accumulation, especially when the length of sentences increases. In this work, we propose a context-aware non-autoregressive framework with the sentence-aligned connectionist temporal classification (SA-CTC) loss for document-level neural machine translation. In particular, the SA-CTC loss reduces the search space of the decoding path by fixing the positions of the beginning and end tokens for each sentence in the document. Meanwhile, the context-aware architecture introduces preset nodes to represent sentence-level information and utilizes a hierarchical attention structure to regulate the attention hypothesis space. Experimental results show that our proposed method can achieve competitive performance compared with several strong baselines. Our method implements non-autoregressive modeling in Doc-to-Doc translation manner, achieving an average 46X decoding speedup compared to the document-level AT baselines on three benchmarks.</abstract>
       <url hash="7151753e">2024.lrec-main.337</url>
@@ -4084,7 +4084,7 @@
     <paper id="343">
       <title>Continual Reinforcement Learning for Controlled Text Generation</title>
       <author><first>Velizar</first><last>Shulev</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <pages>3881–3889</pages>
       <abstract>Controlled Text Generation (CTG) steers the generation of continuations of a given context (prompt) by a Large Language Model (LLM) towards texts possessing a given attribute (e.g., topic, sentiment). In this paper we view CTG as a Continual Learning problem: how to learn at every step to steer next-word generation, without having to wait for end-of-sentence. This continual view is useful for online applications such as CTG for speech, where end-of-sentence is often uncertain. We depart from an existing model, the Plug-and-Play language models (PPLM), which perturbs the context at each step to better predict next-words that posses the desired attribute. While PPLM is intricate and has many hyper-parameters, we provide a proof that the PPLM objective function can be reduced to a Continual Reinforcement Learning (CRL) reward function, thereby simplifying PPLM and endowing it with a better understood learning framework. Subsequently, we present, the first of its kind, CTG algorithm that is fully based on CRL and exhibit promising empirical results.</abstract>
       <url hash="67c4c83d">2024.lrec-main.343</url>
@@ -4145,7 +4145,7 @@
     <paper id="349">
       <title>Controllable Sentence Simplification in <fixed-case>S</fixed-case>wedish Using Control Prefixes and Mined Paraphrases</title>
       <author><first>Julius</first><last>Monsen</last></author>
-      <author><first>Arne</first><last>Jonsson</last></author>
+      <author id="arne-jonsson"><first>Arne</first><last>Jonsson</last></author>
       <pages>3943–3954</pages>
       <abstract>Making information accessible to diverse target audiences, including individuals with dyslexia and cognitive disabilities, is crucial. Automatic Text Simplification (ATS) systems aim to facilitate readability and comprehension by reducing linguistic complexity. However, they often lack customizability to specific user needs, and training data for smaller languages can be scarce. This paper addresses ATS in a Swedish context, using methods that provide more control over the simplification. A dataset of Swedish paraphrases is mined from large amounts of text and used to train ATS models utilizing prefix-tuning with control prefixes. We also introduce a novel data-driven method for selecting complexity attributes for controlling the simplification and compare it with previous approaches. Evaluation of the trained models using SARI and BLEU demonstrates significant improvements over the baseline — a fine-tuned Swedish BART model — and compared to previous Swedish ATS results. These findings highlight the effectiveness of employing paraphrase data in conjunction with controllable generation mechanisms for simplification. Additionally, the set of explored attributes yields similar results compared to previously used attributes, indicating their ability to capture important simplification aspects.</abstract>
       <url hash="bc3531bc">2024.lrec-main.349</url>
@@ -4154,7 +4154,7 @@
     <paper id="350">
       <title>Controlled Generation with Prompt Insertion for Natural Language Explanations in Grammatical Error Correction</title>
       <author><first>Masahiro</first><last>Kaneko</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <pages>3955–3961</pages>
       <abstract>In Grammatical Error Correction (GEC), it is crucial to ensure the user’s comprehension of a reason for correction. Existing studies present tokens, examples, and hints for corrections, but do not directly explain the reasons in natural language. Although methods that use Large Language Models (LLMs) to provide direct explanations in natural language have been proposed for various tasks, no such method exists for GEC. Generating explanations for GEC corrections involves aligning input and output tokens, identifying correction points, and presenting corresponding explanations consistently. However, it is not straightforward to specify a complex format to generate explanations, because explicit control of generation is difficult with prompts. This study introduces a method called controlled generation with Prompt Insertion (PI) so that LLMs can explain the reasons for corrections in natural language. In PI, LLMs first correct the input text, and then we automatically extract the correction points based on the rules. The extracted correction points are sequentially inserted into the LLM’s explanation output as prompts, guiding the LLMs to generate explanations for the correction points. We also create an Explainable GEC (XGEC) dataset of correction reasons by annotating NUCLE, CoNLL2013, and CoNLL2014. Although generations from GPT-3.5 and ChatGPT using original prompts miss some correction points, the generation control using PI can explicitly guide to describe explanations for all correction points, contributing to improved performance in generating correction reasons.</abstract>
       <url hash="00bd6ae2">2024.lrec-main.350</url>
@@ -4176,7 +4176,7 @@
       <title>Conversational Grounding: Annotation and Analysis of Grounding Acts and Grounding Units</title>
       <author><first>Biswesh</first><last>Mohapatra</last></author>
       <author><first>Seemab</first><last>Hassan</last></author>
-      <author><first>Laurent</first><last>Romary</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
       <author><first>Justine</first><last>Cassell</last></author>
       <pages>3967–3977</pages>
       <abstract>Successful conversations often rest on common understanding, where all parties are on the same page about the information being shared. This process, known as conversational grounding, is crucial for building trustworthy dialog systems that can accurately keep track of and recall the shared information. The proficiencies of an agent in grounding the conveyed information significantly contribute to building a reliable dialog system. Despite recent advancements in dialog systems, there exists a noticeable deficit in their grounding capabilities. Traum (Traum, 1995) provided a framework for conversational grounding introducing Grounding Acts and Grounding Units, but substantial progress, especially in the realm of Large Language Models, remains lacking. To bridge this gap, we present the annotation of two dialog corpora employing Grounding Acts, Grounding Units, and a measure of their degree of grounding. We discuss our key findings during the annotation and also provide a baseline model to test the performance of current Language Models in categorizing the grounding acts of the dialogs. Our work aims to provide a useful resource for further research in making conversations with machines better understood and more reliable in natural day-to-day collaborative dialogs.</abstract>
@@ -4186,8 +4186,8 @@
     <paper id="353">
       <title>Converting Legacy Data to <fixed-case>CLDF</fixed-case>: A <fixed-case>FAIR</fixed-case> Exit Strategy for Linguistic Web Apps</title>
       <author><first>Robert</first><last>Forkel</last></author>
-      <author><first>Daniel</first><last>Swanson</last></author>
-      <author><first>Steven</first><last>Moran</last></author>
+      <author id="daniel-g-swanson"><first>Daniel</first><last>Swanson</last></author>
+      <author id="steven-moran"><first>Steven</first><last>Moran</last></author>
       <pages>3978–3982</pages>
       <abstract>In the mid 2000s, there were several large-scale US National Science Foundation (NSF) grants awarded to projects aiming at developing digital infrastructure and standards for different forms of linguistics data. For example, MultiTree encoded language family trees as phylogenies in XML and LL-MAP converted detailed geographic maps of endangered languages into KML. As early stand-alone website applications, these projects allowed researchers interested in comparative linguistics to explore language genealogies and areality, respectively. However as time passed, the technologies that supported these web apps became deprecated, unsupported, and inaccessible. Here we take a future-oriented approach to digital obsolescence and illustrate how to convert legacy linguistic resources into FAIR data via the Cross-Linguistic Data Formats (CLDF). CLDF is built on the W3C recommendations Model for Tabular Data and Metadata on the Web and Metadata Vocabulary for Tabular Data developed by the CSVW (CSV on the Web) working group. Thus, each dataset is modeled as a set of tabular data files described by metadata in JSON. These standards and the tools built to validate and manipulate them provide an accessible and extensible format for converting legacy linguistic web apps into FAIR datasets.</abstract>
       <url hash="2a83ab49">2024.lrec-main.353</url>
@@ -4209,7 +4209,7 @@
     </paper>
     <paper id="355">
       <title><fixed-case>C</fixed-case>o<fixed-case>R</fixed-case>elation: Boosting Automatic <fixed-case>ICD</fixed-case> Coding through Contextualized Code Relation Learning</title>
-      <author id="junyu-luo"><first>Junyu</first><last>Luo</last></author>
+      <author><first>Junyu</first><last>Luo</last></author>
       <author><first>Xiaochen</first><last>Wang</last></author>
       <author><first>Jiaqi</first><last>Wang</last></author>
       <author><first>Aofei</first><last>Chang</last></author>
@@ -4227,7 +4227,7 @@
       <author><first>Ye</first><last>Liu</last></author>
       <author><first>Natalie</first><last>Parde</last></author>
       <author><first>Eugene</first><last>Rohrbaugh</last></author>
-      <author><first>Philip S.</first><last>Yu</last></author>
+      <author id="philip-s-yu"><first>Philip S.</first><last>Yu</last></author>
       <pages>4008–4020</pages>
       <abstract>Naively assuming English as a source language may hinder cross-lingual transfer for many languages by failing to consider the importance of language contact. Some languages are more well-connected than others, and target languages can benefit from transferring from closely related languages; for many languages, the set of closely related languages does not include English. In this work, we study the impact of source language for cross-lingual transfer, demonstrating the importance of selecting source languages that have high contact with the target language. We also construct a novel benchmark dataset for close contact Chinese-Japanese-Korean-Vietnamese (CJKV) languages to further encourage in-depth studies of language contact. To comprehensively capture contact between these languages, we propose to integrate Romanized transcription beyond textual scripts via Contrastive Learning objectives, leading to enhanced cross-lingual representations and effective zero-shot cross-lingual transfer.</abstract>
       <url hash="da56deac">2024.lrec-main.356</url>
@@ -4239,7 +4239,7 @@
       <author><first>Eric</first><last>Sanders</last></author>
       <author><first>Antal P.J.</first><last>van den Bosch</last></author>
       <author><first>Douwe</first><last>Zeldenrust</last></author>
-      <author><first>Henk</first><last>van den Heuvel</last></author>
+      <author id="henk-van-den-heuvel"><first>Henk</first><last>van den Heuvel</last></author>
       <pages>4021–4029</pages>
       <abstract>The Dutch Dialect Database (also known as the ‘Nederlandse Dialectenbank’) contains dialectal variations of Dutch that were recorded all over the Netherlands in the second half of the twentieth century. A subset of these recordings of about 300 hours were enriched with manual orthographic transcriptions, using non-standard approximations of dialectal speech. In this paper we describe the creation of a corpus containing both the audio recordings and their corresponding transcriptions and focus on our method for aligning the recordings with the transcriptions and the metadata.</abstract>
       <url hash="6de1d284">2024.lrec-main.357</url>
@@ -4302,7 +4302,7 @@
     <paper id="363">
       <title>Counterfactual Dialog Mixing as Data Augmentation for Task-Oriented Dialog Systems</title>
       <author><first>Sebastian</first><last>Steindl</last></author>
-      <author><first>Ulrich</first><last>Schäfer</last></author>
+      <author id="ulrich-schafer"><first>Ulrich</first><last>Schäfer</last></author>
       <author><first>Bernd</first><last>Ludwig</last></author>
       <pages>4078–4087</pages>
       <abstract>High-quality training data for Task-Oriented Dialog (TOD) systems is costly to come by if no corpora are available. One method to extend available data is data augmentation. Yet, the research into and adaptation of data augmentation techniques for TOD systems is limited in comparison with other data modalities. We propose a novel, causally-flavored data augmentation technique called Counterfactual Dialog Mixing (CDM) that generates realistic synthetic dialogs via counterfactuals to increase the amount of training data. We demonstrate the method on a benchmark dataset and show that a model trained to classify the counterfactuals from the original data fails to do so, which strengthens the claim of creating realistic synthetic dialogs. To evaluate the effectiveness of CDM, we train a current architecture on a benchmark dataset and compare the performance with and without CDM. By doing so, we achieve state-of-the-art on some metrics. We further investigate the external generalizability and a lower resource setting. To evaluate the models, we adopted an interactive evaluation scheme.</abstract>
@@ -4405,7 +4405,7 @@
     <paper id="373">
       <title><fixed-case>C</fixed-case>ross<fixed-case>T</fixed-case>une: Black-Box Few-Shot Classification with Label Enhancement</title>
       <author><first>Danqing</first><last>Luo</last></author>
-      <author id="chen-zhang"><first>Chen</first><last>Zhang</last></author>
+      <author><first>Chen</first><last>Zhang</last></author>
       <author><first>Yan</first><last>Zhang</last></author>
       <author><first>Haizhou</first><last>Li</last></author>
       <pages>4185–4197</pages>
@@ -4425,7 +4425,7 @@
     <paper id="375">
       <title><fixed-case>CSSW</fixed-case>iki: A <fixed-case>C</fixed-case>hinese Sentence Simplification Dataset with Linguistic and Content Operations</title>
       <author><first>Fengkai</first><last>Liu</last></author>
-      <author><first>John S. Y.</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John S. Y.</first><last>Lee</last></author>
       <pages>4205–4213</pages>
       <abstract>Sentence Simplification aims to make sentences easier to read and understand. With most effort on corpus development focused on English, the amount of annotated data is limited in Chinese. To address this need, we introduce CSSWiki, an open-source dataset for Chinese sentence simplification based on Wikipedia. This dataset contains 1.6k source sentences paired with their simplified versions. Each sentence pair is annotated with operation tags that distinguish between linguistic and content modifications. We analyze differences in annotation scheme and data statistics between CSSWiki and existing datasets. We then report baseline sentence simplification performance on CSSWiki using zero-shot and few-shot approaches with Large Language Models.</abstract>
       <url hash="4696f7c5">2024.lrec-main.375</url>
@@ -4485,7 +4485,7 @@
       <author><first>Cam-Van Thi</first><last>Nguyen</last></author>
       <author><first>Cao-Bach</first><last>Nguyen</last></author>
       <author><first>Duc-Trong</first><last>Le</last></author>
-      <author><first>Quang-Thuy</first><last>Ha</last></author>
+      <author id="quang-thuy-ha"><first>Quang-Thuy</first><last>Ha</last></author>
       <pages>4259–4265</pages>
       <abstract>Emotion recognition in conversation (ERC) is a crucial task in natural language processing and affective computing. This paper proposes MultiDAG+CL, a novel approach for Multimodal Emotion Recognition in Conversation (ERC) that employs Directed Acyclic Graph (DAG) to integrate textual, acoustic, and visual features within a unified framework. The model is enhanced by Curriculum Learning (CL) to address challenges related to emotional shifts and data imbalance. Curriculum learning facilitates the learning process by gradually presenting training samples in a meaningful order, thereby improving the model’s performance in handling emotional variations and data imbalance. Experimental results on the IEMOCAP and MELD datasets demonstrate that the MultiDAG+CL models outperform baseline models. We release the code for and experiments: <url>https://github.com/vanntc711/MultiDAG-CL</url>.</abstract>
       <url hash="5c30c81e">2024.lrec-main.380</url>
@@ -4619,7 +4619,7 @@
       <author><first>Ileana</first><last>Rugina</last></author>
       <author><first>Rumen</first><last>Dangovski</last></author>
       <author><first>Li</first><last>Jing</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Marin</first><last>Soljacic</last></author>
       <pages>4392–4403</pages>
       <abstract>Attention mechanisms play a crucial role in the neural revolution of Natural Language Processing (NLP). With the growth of attention-based models, several pruning techniques have been developed to identify and exploit sparseness, making these models more efficient. Most efforts focus on hard-coding attention patterns or pruning attention weights based on training data. We propose Attention Pruning (AP), a framework that observes attention patterns in a fixed dataset and generates a global sparseness mask. AP saves 90% of attention computation for language modeling and about 50% for machine translation and GLUE tasks, maintaining result quality. Our method reveals important distinctions between self- and cross-attention patterns, guiding future NLP research. Our framework can reduce both latency and memory requirements for any attention-based model, aiding in the development of improved models for existing or new NLP applications. We have demonstrated this with encoder and autoregressive transformer models using Triton GPU kernels and make our code publicly available at https://github.com/irugina/AP</abstract>
@@ -4641,7 +4641,7 @@
     <paper id="394">
       <title>Dataset of Quotation Attribution in <fixed-case>G</fixed-case>erman News Articles</title>
       <author><first>Fynn</first><last>Petersen-Frey</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>4412–4422</pages>
       <abstract>Extracting who says what to whom is a crucial part in analyzing human communication in today’s abundance of data such as online news articles. Yet, the lack of annotated data for this task in German news articles severely limits the quality and usability of possible systems. To remedy this, we present a new, freely available, creative-commons-licensed dataset for quotation attribution in German news articles based on WIKINEWS. The dataset provides curated, high-quality annotations across 1000 documents (250,000 tokens) in a fine-grained annotation schema enabling various downstream uses for the dataset. The annotations not only specify who said what but also how, in which context, to whom and define the type of quotation. We specify our annotation schema, describe the creation of the dataset and provide a quantitative analysis. Further, we describe suitable evaluation metrics, apply two existing systems for quotation attribution, discuss their results to evaluate the utility of our dataset and outline use cases of our dataset in downstream tasks.</abstract>
       <url hash="8b0928a3">2024.lrec-main.394</url>
@@ -4677,7 +4677,7 @@
       <author><first>Merve</first><last>Ünlü Menevşe</last></author>
       <author><first>Yusufcan</first><last>Manav</last></author>
       <author><first>Ebru</first><last>Arisoy</last></author>
-      <author><first>Arzucan</first><last>Özgür</last></author>
+      <author id="arzucan-ozgur"><first>Arzucan</first><last>Özgür</last></author>
       <pages>4449–4455</pages>
       <abstract>This paper focuses on dealing with data scarcity in spoken question answering (QA) using automatic question-answer generation and a carefully selected fine-tuning strategy that leverages limited annotated data (paragraphs and question-answer pairs). Spoken QA is a challenging task due to using spoken documents, i.e., erroneous automatic speech recognition (ASR) transcriptions, and the scarcity of spoken QA data. We propose a framework for utilizing limited annotated data effectively to improve spoken QA performance. To deal with data scarcity, we train a question-answer generation model with annotated data and then produce large amounts of question-answer pairs from unannotated data (paragraphs). Our experiments demonstrate that incorporating limited annotated data and the automatically generated data through a carefully selected fine-tuning strategy leads to 5.5% relative F1 gain over the model trained only with annotated data. Moreover, the proposed framework is also effective in high ASR errors.</abstract>
       <url hash="2e8f8caf">2024.lrec-main.397</url>
@@ -4705,8 +4705,8 @@
     <paper id="400">
       <title><fixed-case>DECM</fixed-case>: Evaluating Bilingual <fixed-case>ASR</fixed-case> Performance on a Code-switching/mixing Benchmark</title>
       <author><first>Enes Yavuz</first><last>Ugan</last></author>
-      <author><first>Ngoc-Quan</first><last>Pham</last></author>
-      <author><first>Alexander</first><last>Waibel</last></author>
+      <author id="ngoc-quan-pham"><first>Ngoc-Quan</first><last>Pham</last></author>
+      <author id="alex-waibel"><first>Alexander</first><last>Waibel</last></author>
       <pages>4468–4475</pages>
       <abstract>Automatic Speech Recognition has made significant progress, but challenges persist. Code-switched (CSW) Speech presents one such challenge, involving the mixing of multiple languages by a speaker. Even when multilingual ASR models are trained, each utterance on its own usually remains monolingual. We introduce an evaluation dataset for German-English CSW, with German as the matrix language and English as the embedded language. The dataset comprises spontaneous speech from diverse domains, enabling realistic CSW evaluation in German-English. It includes splits with varying degrees of CSW to facilitate specialized model analysis. As it is difficult to collect CSW data for all language pairs, the provision of such evaluation data, is crucial for developing and analyzing ASR models capable of generalizing across unseen pairs. Detailed data statistics are presented, and state-of-the-art (SOTA) multilingual models are evaluated showing challanges of CSW speech.</abstract>
       <url hash="db8e81b3">2024.lrec-main.400</url>
@@ -4724,7 +4724,7 @@
       <author><first>Chengru</first><last>Song</last></author>
       <author><first>Di</first><last>Zhang</last></author>
       <author><first>Kun</first><last>Gai</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <pages>4476–4487</pages>
       <abstract>Large language models have demonstrated exceptional capability in natural language understanding and generation. However, their generation speed is limited by the inherently sequential nature of their decoding process, posing challenges for real-time applications. This paper introduces Lexical Unit Decoding (LUD), a novel decoding methodology implemented in a data-driven manner, accelerating the decoding process without sacrificing output quality. The core of our approach is the observation that a pre-trained language model can confidently predict multiple contiguous tokens, forming the basis for a lexical unit, in which these contiguous tokens could be decoded in parallel. Extensive experiments validate that our method substantially reduces decoding time while maintaining generation quality, i.e., 33% speed up on natural language generation with no quality loss, and 30% speed up on code generation with a negligible quality loss of 3%. Distinctively, LUD requires no auxiliary models and does not require changes to existing architectures. It can also be integrated with other decoding acceleration methods, thus achieving an even more pronounced inference efficiency boost. We posit that the foundational principles of LUD could define a new decoding paradigm for future language models, enhancing their applicability for a broader spectrum of applications. All codes are be publicly available at https://github.com/tjunlp-lab/Lexical-Unit-Decoding-LUD-.</abstract>
       <url hash="10fdb56f">2024.lrec-main.401</url>
@@ -4856,7 +4856,7 @@
       <author><first>Zhihao</first><last>Fan</last></author>
       <author><first>Zejun</first><last>Li</last></author>
       <author><first>Ruipu</first><last>Luo</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <author><first>Zhongyu</first><last>Wei</last></author>
       <pages>4605–4616</pages>
       <abstract>Vision-and-Language navigation (VLN) requires an agent to navigate in unseen environment by following natural language instruction. For task completion, the agent needs to align and integrate various navigation modalities, including instruction, observation and navigation history. Existing works primarily concentrate on cross-modal attention at the fusion stage to achieve this objective. Nevertheless, modality features generated by disparate uni-encoders reside in their own spaces, leading to a decline in the quality of cross-modal fusion and decision. To address this problem, we propose a Dual-levEL AligNment (DELAN) framework by cross-modal contrastive learning. This framework is designed to align various navigation-related modalities before fusion, thereby enhancing cross-modal interaction and action decision-making. Specifically, we divide the pre-fusion alignment into dual levels: instruction-history level and landmark-observation level according to their semantic correlations. We also reconstruct a dual-level instruction for adaptation to the dual-level alignment. As the training signals for pre-fusion alignment are extremely limited, self-supervised contrastive learning strategies are employed to enforce the matching between different modalities. Our approach seamlessly integrates with the majority of existing models, resulting in improved navigation performance on various VLN benchmarks, including R2R, R4R, RxR and CVDN.</abstract>
@@ -4868,8 +4868,8 @@
       <author><first>Shiming</first><last>He</last></author>
       <author><first>Yu</first><last>Hong</last></author>
       <author><first>Shuai</first><last>Yang</last></author>
-      <author><first>Jianmin</first><last>Yao</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="jianmin-yao"><first>Jianmin</first><last>Yao</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>4617–4625</pages>
       <abstract>We tackle Event Argument Extraction (EAE) in the manner of template-based generation. Based on our exploration of generative EAE, it suffers from several issues, such as multiple arguments of one role, generating words out of context and inconsistency with prescribed format. We attribute it to the weakness of following complex input prompts. To address these problems, we propose the demonstration retrieval-augmented generative EAE (DRAGEAE), containing two components: event knowledge-injected generator (EKG) and demonstration retriever (DR). EKG employs event knowledge prompts to capture role dependencies and semantics. DR aims to search informative demonstrations from training data, facilitating the conditional generation of EKG. To train DR, we use the probability-based rankings from large language models (LLMs) as supervised signals. Experimental results on ACE-2005, RAMS and WIKIEVENTS demonstrate that our method outperforms all strong baselines and it can be generalized to various datasets. Further analysis is conducted to discuss the impact of diverse LLMs and prove that our model alleviates the above issues.</abstract>
       <url hash="2b368db7">2024.lrec-main.412</url>
@@ -4878,7 +4878,7 @@
     <paper id="413">
       <title>Denoising Labeled Data for Comment Moderation Using Active Learning</title>
       <author><first>Andraž</first><last>Pelicon</last></author>
-      <author><first>Vanja Mladen</first><last>Karan</last></author>
+      <author id="vanja-m-karan"><first>Vanja Mladen</first><last>Karan</last></author>
       <author><first>Ravi</first><last>Shekhar</last></author>
       <author><first>Matthew</first><last>Purver</last></author>
       <author><first>Senja</first><last>Pollak</last></author>
@@ -4892,7 +4892,7 @@
       <author><first>Deokhyung</first><last>Kang</last></author>
       <author><first>Baikjin</first><last>Jung</last></author>
       <author><first>Yunsu</first><last>Kim</last></author>
-      <author><first>Gary Geunbae</first><last>Lee</last></author>
+      <author id="gary-geunbae-lee"><first>Gary Geunbae</first><last>Lee</last></author>
       <pages>4634–4640</pages>
       <abstract>In table-text open-domain question answering, a retriever system retrieves relevant evidence from tables and text to answer questions. Previous studies in table-text open-domain question answering have two common challenges: firstly, their retrievers can be affected by false-positive labels in training datasets; secondly, they may struggle to provide appropriate evidence for questions that require reasoning across the table. To address these issues, we propose Denoised Table-Text Retriever (DoTTeR). Our approach involves utilizing a denoised training dataset with fewer false positive labels by discarding instances with lower question-relevance scores measured through a false positive detection model. Subsequently, we integrate table-level ranking information into the retriever to assist in finding evidence for questions that demand reasoning across the table. To encode this ranking information, we fine-tune a rank-aware column encoder to identify minimum and maximum values within a column. Experimental results demonstrate that DoTTeR significantly outperforms strong baselines on both retrieval recall and downstream QA tasks. Our code is available at https://github.com/deokhk/DoTTeR.</abstract>
       <url hash="c4b43422">2024.lrec-main.414</url>
@@ -4927,9 +4927,9 @@
     </paper>
     <paper id="417">
       <title>Depth-Wise Attention (<fixed-case>DWA</fixed-case>tt): A Layer Fusion Method for Data-Efficient Classification</title>
-      <author><first>Muhammad</first><last>ElNokrashy</last></author>
+      <author id="muhammad-elnokrashy"><first>Muhammad</first><last>ElNokrashy</last></author>
       <author><first>Badr</first><last>AlKhamissi</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>4665–4674</pages>
       <abstract>Language Models pretrained on large textual data have been shown to encode different types of knowledge simultaneously. Traditionally, only the features from the last layer are used when adapting to new tasks or data. We put forward that, when using or finetuning deep pretrained models, intermediate layer features that may be relevant to the downstream task are buried too deep to be used efficiently in terms of needed samples or steps. To test this, we propose a new layer fusion method: Depth-Wise Attention (DWAtt), to help re-surface signals from non-final layers. We compare DWAtt to a basic concatenation-based layer fusion method (Concat), and compare both to a deeper model baseline—all kept within a similar parameter budget. Our findings show that DWAtt and Concat are more step- and sample-efficient than the baseline, especially in the few-shot setting. DWAtt outperforms Concat on larger data sizes. On CoNLL-03 NER, layer fusion shows 3.68 − 9.73% F1 gain at different few-shot sizes. The layer fusion models presented significantly outperform the baseline in various training scenarios with different data sizes, architectures, and training constraints.</abstract>
       <url hash="dc3d8042">2024.lrec-main.417</url>
@@ -4976,7 +4976,7 @@
       <author><first>Seonmin</first><last>Koo</last></author>
       <author><first>Hyeonseok</first><last>Moon</last></author>
       <author><first>Jaehyung</first><last>Seo</last></author>
-      <author><first>Heuiseok</first><last>Lim</last></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last></author>
       <pages>4705–4716</pages>
       <abstract>Recent machine translation (MT) systems have overcome language barriers for a wide range of users, yet they still carry the risk of critical meaning deviation. Critical error detection (CED) is a task that identifies an inherent risk of catastrophic meaning distortions in the machine translation output. With the importance of reflecting cultural elements in detecting critical errors, we introduce the culture-aware “Politeness” type in detecting English-Korean critical translation errors. Besides, we facilitate two tasks by providing multiclass labels: critical error detection and critical error type classification (CETC). Empirical evaluations reveal that our introduced data augmentation approach using a newly presented perturber significantly outperforms existing baselines in both tasks. Further analysis highlights the significance of multiclass labeling by demonstrating its superior effectiveness compared to binary labels.</abstract>
       <url hash="7eb1c422">2024.lrec-main.421</url>
@@ -5060,7 +5060,7 @@
     <paper id="428">
       <title>Detection, Diagnosis, and Explanation: A Benchmark for <fixed-case>C</fixed-case>hinese Medial Hallucination Evaluation</title>
       <author><first>Chengfeng</first><last>Dou</last></author>
-      <author><first>Ying</first><last>Zhang</last></author>
+      <author id="ying-zhang"><first>Ying</first><last>Zhang</last></author>
       <author><first>Yanyuan</first><last>Chen</last></author>
       <author><first>Zhi</first><last>Jin</last></author>
       <author><first>Wenpin</first><last>Jiao</last></author>
@@ -5076,7 +5076,7 @@
       <title>Developing a Benchmark for Pronunciation Feedback: Creation of a Phonemically Annotated Speech Corpus of isi<fixed-case>Z</fixed-case>ulu Language Learner Speech</title>
       <author><first>Alexandra</first><last>O’Neil</last></author>
       <author><first>Nils</first><last>Hjortnaes</last></author>
-      <author><first>Francis</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last></author>
       <author><first>Zinhle</first><last>Nkosi</last></author>
       <author><first>Thulile</first><last>Ndlovu</last></author>
       <author><first>Zanele</first><last>Mlondo</last></author>
@@ -5091,7 +5091,7 @@
       <author><first>Lucie</first><last>Polakova</last></author>
       <author><first>Jiří</first><last>Mírovský</last></author>
       <author><first>Šárka</first><last>Zikánová</last></author>
-      <author><first>Eva</first><last>Hajicova</last></author>
+      <author id="eva-hajicova"><first>Eva</first><last>Hajicova</last></author>
       <pages>4802–4810</pages>
       <abstract>We introduce the first version of the Czech RST Discourse Treebank, a collection of Czech journalistic texts manually annotated using the Rhetorical Structure Theory (RST), a global coherence model proposed by Mann and Thompson (1988). Each document in the corpus is represented as a single tree-like structure, where discourse units are interconnected through hierarchical rhetorical relations and their relative importance for the main purpose of a text is modeled by the nuclearity principle. The treebank is freely available in the LINDAT/CLARIAH-CZ repository under the Creative Commons license; for some documents, it includes two gold annotations representing divergent yet relevant interpretations. The paper outlines the annotation process, provides corpus statistics and evaluation, and discusses the issue of consistency associated with the global level of textual interpretation. In general, good agreement on the structure and labeling could be achieved on the lowest, local tree level and on the identification of the most central (nuclear) elementary discourse units. Disagreements mostly concerned segmentation and, in the structure, differences in the stepwise process of linking the largest text blocks. The project contributes to the advancement of RST research and its application to real-world text analysis challenges.</abstract>
       <url hash="da3e23d6">2024.lrec-main.430</url>
@@ -5260,7 +5260,7 @@
       <title>Discourse Structure for the <fixed-case>M</fixed-case>inecraft Corpus</title>
       <author><first>Kate</first><last>Thompson</last></author>
       <author><first>Julie</first><last>Hunter</last></author>
-      <author><first>Nicholas</first><last>Asher</last></author>
+      <author id="nicholas-asher"><first>Nicholas</first><last>Asher</last></author>
       <pages>4957–4967</pages>
       <abstract>We provide a new linguistic resource: The Minecraft Structured Dialogue Corpus (MSDC), a discourse annotated version of the Minecraft Dialogue Corpus (MDC; Narayan-Chen et al., 2019), with complete, situated discourse structures in the style of SDRT (Asher and Lascarides, 2003). Our structures feature both linguistic discourse moves and nonlinguistic actions. To show computational tractability, we train a discourse parser with a novel “2 pass architecture” on MSDC that gives excellent results on attachment prediction and relation labeling tasks especially long distance attachments.</abstract>
       <url hash="75f625df">2024.lrec-main.444</url>
@@ -5291,7 +5291,7 @@
       <author><first>Chloé</first><last>Braud</last></author>
       <author><first>Amir</first><last>Zeldes</last></author>
       <author><first>Laura</first><last>Rivière</last></author>
-      <author><first>Yang Janet</first><last>Liu</last></author>
+      <author id="yang-janet-liu"><first>Yang Janet</first><last>Liu</last></author>
       <author><first>Philippe</first><last>Muller</last></author>
       <author><first>Damien</first><last>Sileo</last></author>
       <author><first>Tatsuya</first><last>Aoyama</last></author>
@@ -5390,7 +5390,7 @@
       <author><first>Mingxiao</first><last>Li</last></author>
       <author><first>Jingyuan</first><last>Sun</last></author>
       <author><first>Jesse</first><last>Davis</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>5109–5118</pages>
       <abstract>Argument structure learning (ASL) entails predicting relations between arguments. Because it can structure a document to facilitate its understanding, it has been widely applied in many fields (medical, commercial, and scientific domains). Despite its broad utilization, ASL remains a challenging task because it involves examining the complex relationships between the sentences in a potentially unstructured discourse. To resolve this problem, we have developed a simple yet effective approach called Dual-tower Multi-scale cOnvolution neural Network (DMON) for the ASL task. Specifically, we organize arguments into a relationship matrix that together with the argument embeddings forms a relationship tensor and design a mechanism to capture relations with contextual arguments. Experimental results on three different-domain argument mining datasets demonstrate that our framework outperforms state-of-the-art models. We will release the code after paper acceptance.</abstract>
       <url hash="0b4c0f9d">2024.lrec-main.455</url>
@@ -5403,7 +5403,7 @@
       <author><first>Fuli</first><last>Feng</last></author>
       <author><first>Zifeng</first><last>Ren</last></author>
       <author><first>Moxin</first><last>Li</last></author>
-      <author><first>Tat-Seng</first><last>Chua</last></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last></author>
       <pages>5119–5131</pages>
       <abstract>Table-text document (e.g., financial reports) understanding has attracted increasing attention in recent two years. TAT-DQA is a realistic setting for the understanding of visually-rich table-text documents, which involves answering associated questions requiring discrete reasoning. Most existing work relies on token-level semantics, falling short in the reasoning across document elements such as quantities and dates. To address this limitation, we propose a novel Doc2SoarGraph model that exploits element-level semantics and employs Semantic-oriented hierarchical Graph structures to capture the differences and correlations among different elements within the given document and question. Extensive experiments on the TAT-DQA dataset reveal that our model surpasses the state-of-the-art conventional method (i.e., MHST) and large language model (i.e., ChatGPT) by 17.73 and 6.49 points respectively in terms of Exact Match (EM) metric, demonstrating exceptional effectiveness.</abstract>
       <url hash="2bc95598">2024.lrec-main.456</url>
@@ -5432,7 +5432,7 @@
       <author><first>Franck</first><last>Dernoncourt</last></author>
       <author><first>Jiuxiang</first><last>Gu</last></author>
       <author><first>Ramit</first><last>Sawhney</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Dinesh</first><last>Manocha</last></author>
       <author><first>Rajiv</first><last>Jain</last></author>
       <pages>5140–5155</pages>
@@ -5473,7 +5473,7 @@
       <author><first>Zikang</first><last>Liu</last></author>
       <author><first>Ze-Feng</first><last>Gao</last></author>
       <author><first>Dawei</first><last>Gao</last></author>
-      <author><first>Wayne Xin</first><last>Zhao</last></author>
+      <author id="wayne-xin-zhao"><first>Wayne Xin</first><last>Zhao</last></author>
       <author><first>Yaliang</first><last>Li</last></author>
       <author><first>Bolin</first><last>Ding</last></author>
       <author><first>Ji-Rong</first><last>Wen</last></author>
@@ -5524,8 +5524,8 @@
       <author><first>Taja</first><last>Kuzman</last></author>
       <author><first>Peter</first><last>Rupnik</last></author>
       <author><first>Nikola</first><last>Ljubešić</last></author>
-      <author><first>Miquel</first><last>Esplà-Gomis</last></author>
-      <author><first>Gema</first><last>Ramírez-Sánchez</last></author>
+      <author id="miquel-espla-gomis"><first>Miquel</first><last>Esplà-Gomis</last></author>
+      <author id="gema-ramirez-sanchez"><first>Gema</first><last>Ramírez-Sánchez</last></author>
       <author><first>Antonio</first><last>Toral</last></author>
       <pages>5221–5234</pages>
       <abstract>Large, curated, web-crawled corpora play a vital role in training language models (LMs). They form the lion’s share of the training data in virtually all recent LMs, such as the well-known GPT, LLaMA and XLM-RoBERTa models. However, despite this importance, relatively little attention has been given to the quality of these corpora. In this paper, we compare four of the currently most relevant large, web-crawled corpora (CC100, MaCoCu, mC4 and OSCAR) across eleven lower-resourced European languages. Our approach is two-fold: first, we perform an intrinsic evaluation by performing a human evaluation of the quality of samples taken from different corpora; then, we assess the practical impact of the qualitative differences by training specific LMs on each of the corpora and evaluating their performance on downstream tasks. We find that there are clear differences in quality of the corpora, with MaCoCu and OSCAR obtaining the best results. However, during the extrinsic evaluation, we actually find that the CC100 corpus achieves the highest scores. We conclude that, in our experiments, the quality of the web-crawled corpora does not seem to play a significant role when training LMs.</abstract>
@@ -5535,7 +5535,7 @@
     <paper id="466">
       <title>Do Large Language Models Understand Mansplaining? Well, Actually...</title>
       <author><first>Carla</first><last>Perez Almendros</last></author>
-      <author><first>Jose</first><last>Camacho-Collados</last></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></author>
       <pages>5235–5246</pages>
       <abstract>Gender bias has been widely studied by the NLP community. However, other more subtle variations of it, such as mansplaining, have yet received little attention. Mansplaining is a discriminatory behaviour that consists of a condescending treatment or discourse towards women. In this paper, we introduce and analyze Well, actually..., a corpus of 886 mansplaining stories experienced by women. We analyze the corpus in terms of features such as offensiveness, sentiment or misogyny, among others. We also explore to what extent Large Language Models (LLMs) can understand and identify mansplaining and other gender-related microaggressions. Specifically, we experiment with ChatGPT-3.5-Turbo and LLaMA-2 (13b and 70b), with both targeted and open questions. Our findings suggest that, although they can identify mansplaining to some extent, LLMs still struggle to point out this attitude and will even reproduce some of the social patterns behind mansplaining situations, for instance by praising men for giving unsolicited advice to women.</abstract>
       <url hash="5db33564">2024.lrec-main.466</url>
@@ -5544,7 +5544,7 @@
     <paper id="467">
       <title>Domain Adaptation for Dense Retrieval and Conversational Dense Retrieval through Self-Supervision by Meticulous Pseudo-Relevance Labeling</title>
       <author><first>Minghan</first><last>Li</last></author>
-      <author><first>Eric</first><last>Gaussier</last></author>
+      <author id="eric-gaussier"><first>Eric</first><last>Gaussier</last></author>
       <pages>5247–5259</pages>
       <abstract>Recent studies have demonstrated that the ability of dense retrieval models to generalize to target domains with different distributions is limited, which contrasts with the results obtained with interaction-based models. Prior attempts to mitigate this challenge involved leveraging adversarial learning and query generation approaches, but both approaches nevertheless resulted in limited improvements. In this paper, we propose to combine the query-generation approach with a self-supervision approach in which pseudo-relevance labels are automatically generated on the target domain. To accomplish this, a T5-3B model is utilized for pseudo-positive labeling, and meticulous hard negatives are chosen. We also apply this strategy on conversational dense retrieval model for conversational search. A similar pseudo-labeling approach is used, but with the addition of a query-rewriting module to rewrite conversational queries for subsequent labeling. This proposed approach enables a model’s domain adaptation with real queries and documents from the target dataset. Experiments on standard dense retrieval and conversational dense retrieval models both demonstrate improvements on baseline models when they are fine-tuned on the pseudo-relevance labeled data.</abstract>
       <url hash="2b4b2821">2024.lrec-main.467</url>
@@ -5580,7 +5580,7 @@
       <author><first>Qin</first><last>Chen</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
       <author><first>Tao</first><last>Gui</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>5286–5298</pages>
       <abstract>Domain adaption has been widely adapted for cross-domain sentiment analysis to transfer knowledge from the source domain to the target domain. Whereas, most methods are proposed under the assumption that the target (test) domain is known, making them fail to generalize well on unknown test data that is not always available in practice. In this paper, we focus on the problem of domain generalization for cross-domain sentiment analysis. Specifically, we propose a backdoor adjustment-based causal model to disentangle the domain-specific and domain-invariant representations that play essential roles in tackling domain shift. First, we rethink the cross-domain sentiment analysis task in a causal view to model the causal-and-effect relationships among different variables. Then, to learn an invariant feature representation, we remove the effect of domain confounders (e.g., domain knowledge) using the backdoor adjustment. A series of experiments over many homologous and diverse datasets show the great performance and robustness of our model by comparing it with the state-of-the-art domain generalization baselines.</abstract>
       <url hash="1cecbb8e">2024.lrec-main.470</url>
@@ -5613,8 +5613,8 @@
       <title><fixed-case>DORE</fixed-case>: A Dataset for <fixed-case>P</fixed-case>ortuguese Definition Generation</title>
       <author><first>Anna Beatriz</first><last>Dimas Furtado</last></author>
       <author><first>Tharindu</first><last>Ranasinghe</last></author>
-      <author><first>Frederic</first><last>Blain</last></author>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="frederic-blain"><first>Frederic</first><last>Blain</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <pages>5315–5322</pages>
       <abstract>Definition modelling (DM) is the task of automatically generating a dictionary definition of a specific word. Computational systems that are capable of DM can have numerous applications benefiting a wide range of audiences. As DM is considered a supervised natural language generation problem, these systems require large annotated datasets to train the machine learning (ML) models. Several DM datasets have been released for English and other high-resource languages. While Portuguese is considered a mid/high-resource language in most natural language processing tasks and is spoken by more than 200 million native speakers, there is no DM dataset available for Portuguese. In this research, we fill this gap by introducing DORE; the first dataset for <b>D</b>efinition M<b>O</b>delling for Po<b>R</b>tugu<b>E</b>se containing more than 100,000 definitions. We also evaluate several deep learning based DM models on DORE and report the results. The dataset and the findings of this paper will facilitate research and study of Portuguese in wider contexts.</abstract>
       <url hash="e73630aa">2024.lrec-main.473</url>
@@ -5652,7 +5652,7 @@
       <author><first>Yujie</first><last>Chen</last></author>
       <author><first>Shenglan</first><last>Wu</last></author>
       <author><first>Haoyuan</first><last>Hu</last></author>
-      <author><first>Xinyu</first><last>Dai</last></author>
+      <author id="xinyu-dai"><first>Xinyu</first><last>Dai</last></author>
       <pages>5350–5364</pages>
       <abstract>Open Domain Multi-Hop Question Answering (ODMHQA) plays a crucial role in Natural Language Processing (NLP) by aiming to answer complex questions through multi-step reasoning over retrieved information from external knowledge sources. Recently, Large Language Models (LLMs) have demonstrated remarkable performance in solving ODMHQA owing to their capabilities including planning, reasoning, and utilizing tools. However, LLMs may generate off-topic answers when attempting to solve ODMHQA, namely the generated answers are irrelevant to the original questions. This issue of off-topic answers accounts for approximately one-third of incorrect answers, yet remains underexplored despite its significance. To alleviate this issue, we propose the Discriminate→Re-Compose→Re- Solve→Re-Decompose (Dr3) mechanism. Specifically, the Discriminator leverages the intrinsic capabilities of LLMs to judge whether the generated answers are off-topic. In cases where an off-topic answer is detected, the Corrector performs step-wise revisions along the reversed reasoning chain (Re-Compose→Re-Solve→Re-Decompose) until the final answer becomes on-topic. Experimental results on the HotpotQA and 2WikiMultiHopQA datasets demonstrate that our Dr3 mechanism considerably reduces the occurrence of off-topic answers in ODMHQA by nearly 13%, improving the performance in Exact Match (EM) by nearly 3% compared to the baseline method without the Dr3 mechanism.</abstract>
       <url hash="6b168926">2024.lrec-main.476</url>
@@ -5677,7 +5677,7 @@
       <author><first>Mickael</first><last>Rouvier</last></author>
       <author><first>Pacome</first><last>Constant Dit Beaufils</last></author>
       <author><first>Natalia</first><last>Grabar</last></author>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <author><first>Solen</first><last>Quiniou</last></author>
       <author><first>Emmanuel</first><last>Morin</last></author>
       <author><first>Pierre-Antoine</first><last>Gourraud</last></author>
@@ -5735,7 +5735,7 @@
       <author><first>Do June</first><last>Min</last></author>
       <author><first>Veronica</first><last>Perez-Rosas</last></author>
       <author><first>Ken</first><last>Resnicow</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>5437–5449</pages>
       <abstract>In this paper, we study the problem of multi-reward reinforcement learning to jointly optimize for multiple text qualities for natural language generation. We focus on the task of counselor reflection generation, where we optimize the generators to simultaneously improve the fluency, coherence, and reflection quality of generated counselor responses. We introduce two novel bandit methods, DynaOpt and C-DynaOpt, which rely on the broad strategy of combining rewards into a single value and optimizing them simultaneously. Specifically, we employ non-contextual and contextual multi-arm bandits to dynamically adjust multiple reward weights during training. Through automatic and manual evaluations, we show that our proposed techniques, DynaOpt and C-DynaOpt, outperform existing naive and bandit baselines, showcasing their potential for enhancing language models.</abstract>
       <url hash="eeb75a9e">2024.lrec-main.483</url>
@@ -5757,7 +5757,7 @@
       <author><first>Francesca</first><last>Grasso</last></author>
       <author><first>Stefano</first><last>Locci</last></author>
       <author><first>Giovanni</first><last>Siragusa</last></author>
-      <author><first>Luigi</first><last>Di Caro</last></author>
+      <author id="luigi-di-caro"><first>Luigi</first><last>Di Caro</last></author>
       <pages>5461–5472</pages>
       <abstract>Anthropogenic ecological crisis constitutes a significant challenge that all within the academy must urgently face, including the Natural Language Processing (NLP) community. While recent years have seen increasing work revolving around climate-centric discourse, crucial environmental and ecological topics outside of climate change remain largely unaddressed, despite their prominent importance. Mainstream NLP tasks, such as sentiment analysis, dominate the scene, but there remains an untouched space in the literature involving the analysis of environmental impacts of certain events and practices. To address this gap, this paper presents EcoVerse, an annotated English Twitter dataset of 3,023 tweets spanning a wide spectrum of environmental topics. We propose a three-level annotation scheme designed for Eco-Relevance Classification, Stance Detection, and introducing an original approach for Environmental Impact Analysis. We detail the data collection, filtering, and labeling process that led to the creation of the dataset. Remarkable Inter-Annotator Agreement indicates that the annotation scheme produces consistent annotations of high quality. Subsequent classification experiments using BERT-based models, including ClimateBERT, are presented. These yield encouraging results, while also indicating room for a model specifically tailored for environmental texts. The dataset is made freely available to stimulate further research.</abstract>
       <url hash="2d70fdd8">2024.lrec-main.485</url>
@@ -5796,7 +5796,7 @@
       <author><first>Lubos</first><last>Steskal</last></author>
       <author><first>Lilja Charlotte</first><last>Storset</last></author>
       <author><first>Huiling</first><last>You</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <pages>5495–5506</pages>
       <abstract>We present EDEN, the first Norwegian dataset annotated with event information at the sentence level, adapting the widely used ACE event schema to Norwegian. The paper describes the manual annotation of Norwegian text as well as transcribed speech in the news domain, together with inter-annotator agreement and discussions of relevant dataset statistics. We also present preliminary modeling results using a graph-based event parser. The resulting dataset will be freely available for download and use.</abstract>
       <url hash="056ad8af">2024.lrec-main.488</url>
@@ -5809,7 +5809,7 @@
       <author><first>Pier Felice</first><last>Balestrucci</last></author>
       <author><first>Luca</first><last>Anselma</last></author>
       <author><first>Cristian</first><last>Bernareggi</last></author>
-      <author><first>Alessandro</first><last>Mazzei</last></author>
+      <author id="alessandro-mazzei"><first>Alessandro</first><last>Mazzei</last></author>
       <pages>5507–5519</pages>
       <abstract>This paper describes a corpus consisting of real-world dialogues in English between users and a task-oriented conversational agent, with interactions revolving around the description of finite state automata. The creation of this corpus is part of a larger research project aimed at developing tools for an easier access to educational content, especially in STEM fields, for users with visual impairments. The development of this corpus was precisely motivated by the aim of providing a useful resource to support the design of such tools. The core feature of this corpus is that its creation involved both sighted and visually impaired participants, thus allowing for a greater diversity of perspectives and giving the opportunity to identify possible differences in the way the two groups of participants interacted with the agent. The paper introduces this corpus, giving an account of the process that led to its creation, i.e. the methodology followed to obtain the data, the annotation scheme adopted, and the analysis of the results. Finally, the paper reports the results of a classification experiment on the annotated corpus, and an additional experiment to assess the annotation capabilities of three large language models, in view of a further expansion of the corpus.</abstract>
       <url hash="4bb9c34f">2024.lrec-main.489</url>
@@ -5895,7 +5895,7 @@
     </paper>
     <paper id="497">
       <title><fixed-case>EFTNAS</fixed-case>: Searching for Efficient Language Models in First-Order Weight-Reordered Super-Networks</title>
-      <author><first>Juan Pablo</first><last>Munoz</last></author>
+      <author id="juan-pablo-munoz"><first>Juan Pablo</first><last>Munoz</last></author>
       <author><first>Yi</first><last>Zheng</last></author>
       <author><first>Nilesh</first><last>Jain</last></author>
       <pages>5596–5608</pages>
@@ -5909,7 +5909,7 @@
       <author><first>Jiahuan</first><last>Pei</last></author>
       <author><first>Jan de</first><last>Wit</last></author>
       <author><first>Mohammad</first><last>Aliannejadi</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <author><first>Jos T.P.</first><last>Dobber</last></author>
       <author><first>Jos A.</first><last>Bosch</last></author>
       <pages>5609–5621</pages>
@@ -5942,7 +5942,7 @@
       <author><first>Zhigang</first><last>Kan</last></author>
       <author><first>Liwen</first><last>Peng</last></author>
       <author><first>Linbo</first><last>Qiao</last></author>
-      <author id="dongsheng-li"><first>Dongsheng</first><last>Li</last></author>
+      <author><first>Dongsheng</first><last>Li</last></author>
       <pages>5644–5653</pages>
       <abstract>Event Extraction (EE) is a challenging task that aims to extract structural event-related information from unstructured text. Traditional methods for EE depend on manual annotations, which are both expensive and scarce. Furthermore, the existing datasets mostly follow the long-tail distribution, severely hindering the previous methods of modeling tail types. Two techniques can address this issue: transfer learning and data generation. However, the existing methods based on transfer learning still rely on pre-training with a large amount of labeled data in the source domain. Additionally, the quality of data generated by previous data generation methods is difficult to control. In this paper, leveraging Large Language Models (LLMs), we propose novel methods for event extraction and generation based on dialogues, overcoming the problems of relying on source domain data and maintaining data quality. Specifically, this paper innovatively transforms the EE task into multi-turn dialogues, guiding LLMs to learn event schemas from historical dialogue information and output structural events. Furthermore, we introduce a novel LLM-based method for generating high-quality data, significantly improving traditional models’ performance with various paradigms and structures, especially on tail types. Adequate experiments on real-world datasets demonstrate the effectiveness of the proposed event extraction and data generation methods.</abstract>
       <url hash="4efdd0ac">2024.lrec-main.501</url>
@@ -5951,7 +5951,7 @@
     <paper id="502">
       <title><fixed-case>EMOLIS</fixed-case> App and Dataset to Find Emotionally Close Cartoons</title>
       <author><first>Soëlie</first><last>Lerch</last></author>
-      <author><first>Patrice</first><last>Bellot</last></author>
+      <author id="patrice-bellot"><first>Patrice</first><last>Bellot</last></author>
       <author><first>Elisabeth</first><last>Murisasco</last></author>
       <author><first>Emmanuel</first><last>Bruno</last></author>
       <pages>5654–5659</pages>
@@ -6081,7 +6081,7 @@
       <title>Empowering Tree-structured Entailment Reasoning: Rhetorical Perception and <fixed-case>LLM</fixed-case>-driven Interpretability</title>
       <author><first>Longyin</first><last>Zhang</last></author>
       <author><first>Bowei</first><last>Zou</last></author>
-      <author><first>Ai Ti</first><last>Aw</last></author>
+      <author id="aiti-aw"><first>Ai Ti</first><last>Aw</last></author>
       <pages>5783–5793</pages>
       <abstract>The study delves into the construction of entailment trees for science question answering (SQA), employing a novel framework termed Tree-structured Entailment Reasoning (TER). Current research on entailment tree construction presents significant challenges, primarily due to the ambiguities and similarities among candidate science facts, which considerably complicate the fact retrieval process. Moreover, the existing models exhibit limitations in effectively modeling the sequence of reasoning states, understanding the intricate relations between neighboring entailment tree nodes, and generating intermediate conclusions. To this end, we explore enhancing the TER performance from three aspects: First, improving retrieval capabilities by modeling and referring to the chained reasoning states; Second, enhancing TER by infusing knowledge that bridges the gap between reasoning types and rhetorical relations. Third, exploring a task-specific large language model tuning scheme to mitigate deficiencies in intermediate conclusion generation. Experiments on the English EntailmentBank demonstrate the effectiveness of the proposed methods in augmenting the quality of tree-structured entailment reasoning to a certain extent.</abstract>
       <url hash="62c1a936">2024.lrec-main.513</url>
@@ -6102,7 +6102,7 @@
       <author><first>Kenneth</first><last>Lai</last></author>
       <author><first>Richard</first><last>Brutti</last></author>
       <author><first>Lucia</first><last>Donatelli</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <pages>5806–5818</pages>
       <abstract>Abstract Meaning Representation (AMR) is a general-purpose meaning representation that has become popular for its clear structure, ease of annotation and available corpora, and overall expressiveness. While AMR was designed to represent sentence meaning in English text, recent research has explored its adaptation to broader domains, including documents, dialogues, spatial information, cross-lingual tasks, and gesture. In this paper, we present an annotated corpus of multimodal (speech and gesture) AMR in a task-based setting. Our corpus is multilayered, containing temporal alignments to both the speech signal and to descriptions of gesture morphology. We also capture coreference relationships across modalities, enabling fine-grained analysis of how the semantics of gesture and natural language interact. We discuss challenges that arise when identifying cross-modal coreference and anaphora, as well as in creating and evaluating multimodal corpora in general. Although we find AMR’s abstraction away from surface form (in both language and gesture) occasionally too coarse-grained to capture certain cross-modal interactions, we believe its flexibility allows for future work to fill in these gaps. Our corpus and annotation guidelines are available at https://github.com/klai12/encoding-gesture-multimodal-dialogue.</abstract>
       <url hash="d2edda25">2024.lrec-main.515</url>
@@ -6112,7 +6112,7 @@
       <title>Endowing Neural Language Learners with Human-like Biases: A Case Study on Dependency Length Minimization</title>
       <author><first>Yuqing</first><last>Zhang</last></author>
       <author><first>Tessa</first><last>Verhoef</last></author>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <author><first>Arianna</first><last>Bisazza</last></author>
       <pages>5819–5832</pages>
       <abstract>Natural languages show a tendency to minimize the linear distance between heads and their dependents in a sentence, known as dependency length minimization (DLM). Such a preference, however, has not been consistently replicated with neural agent simulations. Comparing the behavior of models with that of human learners can reveal which aspects affect the emergence of this phenomenon. In this work, we investigate the minimal conditions that may lead neural learners to develop a DLM preference. We add three factors to the standard neural-agent language learning and communication framework to make the simulation more realistic, namely: (i) the presence of noise during listening, (ii) context-sensitivity of word use through non-uniform conditional word distributions, and (iii) incremental sentence processing, or the extent to which an utterance’s meaning can be guessed before hearing it entirely. While no preference appears in production, we show that the proposed factors can contribute to a small but significant learning advantage of DLM for listeners of verb-initial languages.</abstract>
@@ -6205,7 +6205,7 @@
       <author><first>Jun</first><last>Zhou</last></author>
       <author><first>Fei</first><last>Li</last></author>
       <author><first>Chong</first><last>Teng</last></author>
-      <author><first>Donghong</first><last>Ji</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
       <pages>5907–5921</pages>
       <abstract>Existing cross-document event coreference resolution models, which either compute mention similarity directly or enhance mention representation by extracting event arguments (such as location, time, agent, and patient), lackingmthe ability to utilize document-level information. As a result, they struggle to capture long-distance dependencies. This shortcoming leads to their underwhelming performance in determining coreference for the events where their argument information relies on long-distance dependencies. In light of these limitations, we propose the construction of document-level Rhetorical Structure Theory (RST) trees and cross-document Lexical Chains to model the structural and semantic information of documents. Subsequently, cross-document heterogeneous graphs are constructed and GAT is utilized to learn the representations of events. Finally, a pair scorer calculates the similarity between each pair of events and co-referred events can be recognized using standard clustering algorithm. Additionally, as the existing cross-document event coreference datasets are limited to English, we have developed a large-scale Chinese cross-document event coreference dataset to fill this gap, which comprises 53,066 event mentions and 4,476 clusters. After applying our model on the English and Chinese datasets respectively, it outperforms all baselines by large margins.</abstract>
       <url hash="4cc0e571">2024.lrec-main.523</url>
@@ -6243,7 +6243,7 @@
       <author><first>Yi</first><last>Liu</last></author>
       <author><first>Sarah T.</first><last>Bonna</last></author>
       <author><first>Margrit</first><last>Betke</last></author>
-      <author><first>Derry Tanti</first><last>Wijaya</last></author>
+      <author id="derry-tanti-wijaya"><first>Derry Tanti</first><last>Wijaya</last></author>
       <pages>5944–5955</pages>
       <abstract>Predicting emotions elicited by news headlines can be challenging as the task is largely influenced by the varying nature of people’s interpretations and backgrounds. Previous works have explored classifying discrete emotions directly from news headlines. We provide a different approach to tackling this problem by utilizing people’s explanations of their emotion, written in free-text, on how they feel after reading a news headline. Using the dataset BU-NEmo+ (Gao et al., 2022), we found that for emotion classification, the free-text explanations have a strong correlation with the dominant emotion elicited by the headlines. The free-text explanations also contain more sentimental context than the news headlines alone and can serve as a better input to emotion classification models. Therefore, in this work we explored generating emotion explanations from headlines by training a sequence-to-sequence transformer model and by using pretrained large language model, ChatGPT (GPT-4). We then used the generated emotion explanations for emotion classification. In addition, we also experimented with training the pretrained T5 model for the intermediate task of explanation generation before fine-tuning it for emotion classification. Using McNemar’s significance test, methods that incorporate GPT-generated free-text emotion explanations demonstrated significant improvement (P-value &lt; 0.05) in emotion classification from headlines, compared to methods that only use headlines. This underscores the value of using intermediate free-text explanations for emotion prediction tasks with headlines.</abstract>
       <url hash="bec381d2">2024.lrec-main.526</url>
@@ -6285,7 +6285,7 @@
     <paper id="530">
       <title>Enhancing Knowledge Retrieval with Topic Modeling for Knowledge-Grounded Dialogue</title>
       <author><first>Nhat</first><last>Tran</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <pages>5986–5995</pages>
       <abstract>Knowledge retrieval is one of the major challenges in building a knowledge-grounded dialogue system. A common method is to use a neural retriever with a distributed approximate nearest-neighbor database to quickly find the relevant knowledge sentences. In this work, we propose an approach that utilizes topic modeling on the knowledge base to further improve retrieval accuracy and as a result, improve response generation. Additionally, we experiment with a large language model (LLM), ChatGPT, to take advantage of the improved retrieval performance to further improve the generation results. Experimental results on two datasets show that our approach can increase retrieval and generation performance. The results also indicate that ChatGPT is a better response generator for knowledge-grounded dialogue when relevant knowledge is provided.</abstract>
       <url hash="58f5ec9a">2024.lrec-main.530</url>
@@ -6321,7 +6321,7 @@
       <author id="zhiyu-chen-lehigh"><first>Zhiyu</first><last>Chen</last></author>
       <author><first>Giuseppe</first><last>Castellucci</last></author>
       <author><first>Oleg</first><last>Rokhlenko</last></author>
-      <author><first>Shervin</first><last>Malmasi</last></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></author>
       <pages>6017–6023</pages>
       <abstract>Large Language Models (LLMs) operating in 0-shot or few-shot settings achieve competitive results in Text Classification tasks. In-Context Learning (ICL) typically achieves better accuracy than the 0-shot setting, but it pays in terms of efficiency, due to the longer input prompt. In this paper, we propose a strategy to make LLMs as efficient as 0-shot text classifiers, while getting comparable or better accuracy than ICL. Our solution targets the low resource setting, i.e., when only 4 examples per class are available. Using a single LLM and few-shot real data we perform a sequence of generation, filtering and Parameter-Efficient Fine-Tuning steps to create a robust and efficient classifier. Experimental results show that our approach leads to competitive results on multiple text classification datasets.</abstract>
       <url hash="0e2f37a2">2024.lrec-main.533</url>
@@ -6332,7 +6332,7 @@
       <author><first>Peiyu</first><last>Liu</last></author>
       <author><first>Ze-Feng</first><last>Gao</last></author>
       <author><first>Xiao</first><last>Zhang</last></author>
-      <author><first>Wayne Xin</first><last>Zhao</last></author>
+      <author id="wayne-xin-zhao"><first>Wayne Xin</first><last>Zhao</last></author>
       <author><first>Ji-Rong</first><last>Wen</last></author>
       <pages>6024–6035</pages>
       <abstract>Lightweight fine-tuning is widely used as an important technique for efficiently adapting pre-trained language models (PLM) to downstream tasks. Despite the reduction in trainable parameters, existing lightweight fine-tuning methods are found to be effective in low-resource settings but often fail in high-resource settings, leading to unreliable outcomes. This limitation can be attributed to inflexible strategies: they identify the parameters of the model to be trained before fine-tuning and remain unchanged without taking into account the inherent variance of generalization ability in model components (<i>i.e.</i>, feed-forward, attention layers) and potential changes during the fine-tuning process. In this paper, we introduce a simple but effective calibration for lightweight fine-tuning PLMs based on the matrix’s stable rank according to both model components and the training process. We proposed both theoretical analyses and experimental verification for the proposed calibration strategy. Considering efficiency, we further propose time-aware and structure-aware strategies to determine the most crucial time to commence the fine-tuning procedure and selectively apply parameter matrices for lightweight fine-tuning, respectively. Extensive experiments demonstrate the superiority of our proposed fine-tuning approach (average improvement 3.1 for GLUE score compared to lightweight fine-tuning method).</abstract>
@@ -6352,7 +6352,7 @@
     <paper id="536">
       <title>Enhancing Scientific Document Summarization with Research Community Perspective and Background Knowledge</title>
       <author><first>Sudipta</first><last>Singha Roy</last></author>
-      <author><first>Robert E.</first><last>Mercer</last></author>
+      <author id="robert-e-mercer"><first>Robert E.</first><last>Mercer</last></author>
       <pages>6048–6058</pages>
       <abstract>Scientific paper summarization has been the focus of much recent research. Unlike previous research which summarizes only the paper in question, or which summarizes the paper and the papers that it references, or which summarizes the paper and the citing sentences from the papers that cite it, this work puts all three of these summarization techniques together. To accomplish this, we have, by utilizing the citation network, introduced a corpus for scientific document summarization that provides information about the document being summarized, the papers referenced by it, as well as the papers that have cited it. The proposed summarizer model utilizes the referenced articles as background information and citing articles to capture the impact of the scientific document on the research community. Another aspect of the proposed model is its ability to generate both the extractive and abstractive summaries in parallel. The parallel training helps the counterparts to improve their individual performance. Results have shown that the summaries are of high quality when considering the standard metrics.</abstract>
       <url hash="7651ea04">2024.lrec-main.536</url>
@@ -6374,7 +6374,7 @@
       <author><first>Bo-Han</first><last>Lu</last></author>
       <author><first>Yi-Hsuan</first><last>Lin</last></author>
       <author><first>Annie</first><last>Lee</last></author>
-      <author><first>Richard Tzong-Han</first><last>Tsai</last></author>
+      <author id="richard-tzong-han-tsai"><first>Richard Tzong-Han</first><last>Tsai</last></author>
       <pages>6077–6090</pages>
       <abstract>Machine translation focuses mainly on high-resource languages (HRLs), while low-resource languages (LRLs) like Taiwanese Hokkien are relatively under-explored. The study aims to address this gap by developing a dual translation model between Taiwanese Hokkien and both Traditional Mandarin Chinese and English. We employ a pre-trained LLaMA 2-7B model specialized in Traditional Mandarin Chinese to leverage the orthographic similarities between Taiwanese Hokkien Han and Traditional Mandarin Chinese. Our comprehensive experiments involve translation tasks across various writing systems of Taiwanese Hokkien as well as between Taiwanese Hokkien and other HRLs. We find that the use of a limited monolingual corpus still further improves the model’s Taiwanese Hokkien capabilities. We then utilize our translation model to standardize all Taiwanese Hokkien writing systems into Hokkien Han, resulting in further performance improvements. Additionally, we introduce an evaluation method incorporating back-translation and GPT-4 to ensure reliable translation quality assessment even for LRLs. The study contributes to narrowing the resource gap for Taiwanese Hokkien and empirically investigates the advantages and limitations of pre-training and fine-tuning based on LLaMA 2.</abstract>
       <url hash="0f3e1e64">2024.lrec-main.538</url>
@@ -6456,7 +6456,7 @@
       <author><first>Felix</first><last>Grezes</last></author>
       <author><first>Cyril</first><last>Grouin</last></author>
       <author><first>Fabian</first><last>Schussler</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <pages>6177–6188</pages>
       <abstract>Interest in Astrophysical Natural Language Processing (NLP) has increased recently, fueled by the development of specialized language models for information extraction. However, the scarcity of annotated resources for this domain is still a significant challenge. Most existing corpora are limited to Named Entity Recognition (NER) tasks, leaving a gap in resource diversity. To address this gap and facilitate a broader spectrum of NLP research in astrophysics, we introduce astroECR, an extension of our previously built Time-Domain Astrophysics Corpus (TDAC). Our contributions involve expanding it to cover named entities, coreferences, annotations related to astrophysical relationships, and normalizing celestial object names. We showcase practical utility through baseline models for four NLP tasks and provide the research community access to our corpus, code, and models.</abstract>
       <url hash="dee97ba0">2024.lrec-main.545</url>
@@ -6467,7 +6467,7 @@
       <author><first>Andrey</first><last>Kutuzov</last></author>
       <author><first>Mariia</first><last>Fedorova</last></author>
       <author><first>Dominik</first><last>Schlechtweg</last></author>
-      <author><first>Nikolay</first><last>Arefyev</last></author>
+      <author id="nikolay-arefyev"><first>Nikolay</first><last>Arefyev</last></author>
       <pages>6189–6198</pages>
       <abstract>We present a dataset of word usage graphs (WUGs), where the existing WUGs for multiple languages are enriched with cluster labels functioning as sense definitions. They are generated from scratch by fine-tuned encoder-decoder language models. The conducted human evaluation has shown that these definitions match the existing clusters in WUGs better than the definitions chosen from WordNet by two baseline systems. At the same time, the method is straightforward to use and easy to extend to new languages. The resulting enriched datasets can be extremely helpful for moving on to explainable semantic change modeling.</abstract>
       <url hash="39635d01">2024.lrec-main.546</url>
@@ -6477,7 +6477,7 @@
       <title>Ensembles of Hybrid and End-to-End Speech Recognition.</title>
       <author><first>Aditya Kamlesh</first><last>Parikh</last></author>
       <author><first>Louis</first><last>ten Bosch</last></author>
-      <author><first>Henk</first><last>van den Heuvel</last></author>
+      <author id="henk-van-den-heuvel"><first>Henk</first><last>van den Heuvel</last></author>
       <pages>6199–6205</pages>
       <abstract>We propose a method to combine the hybrid Kaldi-based Automatic Speech Recognition (ASR) system with the end-to-end wav2vec 2.0 XLS-R ASR using confidence measures. Our research is focused on the low-resource Irish language. Given the limited available open-source resources, neither the standalone hybrid ASR nor the end-to-end ASR system can achieve optimal performance. By applying the Recognizer Output Voting Error Reduction (ROVER) technique, we illustrate how ensemble learning could facilitate mutual error correction between both ASR systems. This paper outlines the strategies for merging the hybrid Kaldi ASR model and the end-to-end XLS-R model with the help of confidence scores. Although contemporary state-of-the-art end-to-end ASR models face challenges related to prediction overconfidence, we utilize Renyi’s entropy-based confidence approach, tuned with temperature scaling, to align it with the Kaldi ASR confidence. Although there was no significant difference in the Word Error Rate (WER) between the hybrid and end-to-end ASR, we could achieve a notable reduction in WER after ensembling through ROVER. This resulted in an almost 14% Word Error Rate Reduction (WERR) on our primary test set and an approximately 20% WERR on other noisy and imbalanced test data.</abstract>
       <url hash="4c751171">2024.lrec-main.547</url>
@@ -6556,7 +6556,7 @@
     </paper>
     <paper id="554">
       <title><fixed-case>E</fixed-case>s<fixed-case>C</fixed-case>o<fixed-case>LA</fixed-case>: <fixed-case>S</fixed-case>panish Corpus of Linguistic Acceptability</title>
-      <author><first>Núria</first><last>Bel</last></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last></author>
       <author><first>Marta</first><last>Punsola</last></author>
       <author><first>Valle</first><last>Ruiz-Fernández</last></author>
       <pages>6268–6277</pages>
@@ -6611,7 +6611,7 @@
       <title>Estimating the Causal Effects of Natural Logic Features in Transformer-Based <fixed-case>NLI</fixed-case> Models</title>
       <author><first>Julia</first><last>Rozanova</last></author>
       <author><first>Marco</first><last>Valentino</last></author>
-      <author><first>André</first><last>Freitas</last></author>
+      <author id="andre-freitas"><first>André</first><last>Freitas</last></author>
       <pages>6319–6329</pages>
       <abstract>Rigorous evaluation of the causal effects of semantic features on language model predictions can be hard to achieve for natural language reasoning problems. However, this is such a desirable form of analysis from both an interpretability and model evaluation perspective, that it is valuable to investigate specific patterns of reasoning with enough structure and regularity to identify and quantify systematic reasoning failures in widely-used models. In this vein, we pick a portion of the NLI task for which an explicit causal diagram can be systematically constructed: the case where across two sentences (the premise and hypothesis), two related words/terms occur in a shared context. In this work, we apply causal effect estimation strategies to measure the effect of context interventions (whose effect on the entailment label is mediated by the semantic monotonicity characteristic) and interventions on the inserted word-pair (whose effect on the entailment label is mediated by the relation between these words). Extending related work on causal analysis of NLP models in different settings, we perform an extensive interventional study on the NLI task to investigate robustness to irrelevant changes and sensitivity to impactful changes of Transformers. The results strongly bolster the fact that similar benchmark accuracy scores may be observed for models that exhibit very different behaviour. Moreover, our methodology reinforces previously suspected biases from a causal perspective, including biases in favour of upward-monotone contexts and ignoring the effects of negation markers.</abstract>
       <url hash="531fadb6">2024.lrec-main.559</url>
@@ -6652,14 +6652,14 @@
     <paper id="562">
       <title><fixed-case>E</fixed-case>uropean Language Grid: One Year after</title>
       <author><first>Georg</first><last>Rehm</last></author>
-      <author><first>Stelios</first><last>Piperidis</last></author>
-      <author><first>Dimitris</first><last>Galanis</last></author>
-      <author><first>Penny</first><last>Labropoulou</last></author>
+      <author id="stelios-piperidis"><first>Stelios</first><last>Piperidis</last></author>
+      <author id="dimitrios-galanis"><first>Dimitris</first><last>Galanis</last></author>
+      <author id="penny-labropoulou"><first>Penny</first><last>Labropoulou</last></author>
       <author><first>Maria</first><last>Giagkou</last></author>
       <author><first>Miltos</first><last>Deligiannis</last></author>
       <author><first>Leon</first><last>Voukoutis</last></author>
       <author><first>Martin</first><last>Courtois</last></author>
-      <author><first>Julian</first><last>Moreno-Schneider</last></author>
+      <author id="julian-moreno-schneider"><first>Julian</first><last>Moreno-Schneider</last></author>
       <author><first>Katrin</first><last>Marheinecke</last></author>
       <pages>6353–6362</pages>
       <abstract>The European Language Grid (ELG) is a cloud platform for the whole European Language Technology community. While the EU project that developed the platform successfully concluded in June 2022, the ELG initiative has continued. This article provides a description of the current state of ELG in terms of user adoption and number of language resources and technologies available in early 2024. It also provides an overview of the various activities with regard to ELG since the end of the project and since the publication of the ELG book, especially the co-authors’ attempt to integrate the ELG platform into various data space initiatives. The article also provides an overview of the Digital Language Equality (DLE) dashboard and the current state of DLE in Europe.</abstract>
@@ -6670,7 +6670,7 @@
       <title>Evaluating Automatic Subtitling: Correlating Post-editing Effort and Automatic Metrics</title>
       <author><first>Alina</first><last>Karakanta</last></author>
       <author><first>Mauro</first><last>Cettolo</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Luisa</first><last>Bentivogli</last></author>
       <pages>6363–6369</pages>
       <abstract>Systems that automatically generate subtitles from video are gradually entering subtitling workflows, both for supporting subtitlers and for accessibility purposes. Even though robust metrics are essential for evaluating the quality of automatically-generated subtitles and for estimating potential productivity gains, there is limited research on whether existing metrics, some of which directly borrowed from machine translation (MT) evaluation, can fulfil such purposes. This paper investigates how well such MT metrics correlate with measures of post-editing (PE) effort in automatic subtitling. To this aim, we collect and publicly release a new corpus containing product-, process- and participant-based data from post-editing automatic subtitles in two language pairs (en→de,it). We find that different types of metrics correlate with different aspects of PE effort. Specifically, edit distance metrics have high correlation with technical and temporal effort, while neural metrics correlate well with PE speed.</abstract>
@@ -6702,7 +6702,7 @@
       <title>Evaluating Gender Bias of Pre-trained Language Models in Natural Language Inference by Considering All Labels</title>
       <author><first>Panatchakorn</first><last>Anantaprayoon</last></author>
       <author><first>Masahiro</first><last>Kaneko</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <pages>6395–6408</pages>
       <abstract>Discriminatory gender biases have been found in Pre-trained Language Models (PLMs) for multiple languages. In Natural Language Inference (NLI), existing bias evaluation methods have focused on the prediction results of one specific label out of three labels, such as neutral. However, such evaluation methods can be inaccurate since unique biased inferences are associated with unique prediction labels. Addressing this limitation, we propose a bias evaluation method for PLMs, called NLI-CoAL, which considers all the three labels of NLI task. First, we create three evaluation data groups that represent different types of biases. Then, we define a bias measure based on the corresponding label output of each data group. In the experiments, we introduce a meta-evaluation technique for NLI bias measures and use it to confirm that our bias measure can distinguish biased, incorrect inferences from non-biased incorrect inferences better than the baseline, resulting in a more accurate bias evaluation. We create the datasets in English, Japanese, and Chinese, and successfully validate the compatibility of our bias measure across multiple languages. Lastly, we observe the bias tendencies in PLMs of different languages. To our knowledge, we are the first to construct evaluation datasets and measure PLMs’ bias from NLI in Japanese and Chinese.</abstract>
       <url hash="399ba6df">2024.lrec-main.566</url>
@@ -6770,7 +6770,7 @@
     <paper id="572">
       <title>Evaluating Shortest Edit Script Methods for Contextual Lemmatization</title>
       <author><first>Olia</first><last>Toporkov</last></author>
-      <author><first>Rodrigo</first><last>Agerri</last></author>
+      <author id="rodrigo-agerri"><first>Rodrigo</first><last>Agerri</last></author>
       <pages>6451–6463</pages>
       <abstract>Modern contextual lemmatizers often rely on automatically induced Shortest Edit Scripts (SES), namely, the number of edit operations to transform a word form into its lemma. In fact, different methods of computing SES have been proposed as an integral component in the architecture of several state-of-the-art contextual lemmatizers currently available. However, previous work has not investigated the direct impact of SES in the final lemmatization performance. In this paper we address this issue by focusing on lemmatization as a token classification task where the only input that the model receives is the word-label pairs in context, where the labels correspond to previously induced SES. Thus, by modifying in our lemmatization system only the SES labels that the model needs to learn, we may then objectively conclude which SES representation produces the best lemmatization results. We experiment with seven languages of different morphological complexity, namely, English, Spanish, Basque, Russian, Czech, Turkish and Polish, using multilingual and language-specific pre-trained masked language encoder-only models as a backbone to build our lemmatizers. Comprehensive experimental results, both in- and out-of-domain, indicate that computing the casing and edit operations separately is beneficial overall, but much more clearly for languages with high-inflected morphology. Notably, multilingual pre-trained language models consistently outperform their language-specific counterparts in every evaluation setting.</abstract>
       <url hash="b4606cfb">2024.lrec-main.572</url>
@@ -6799,11 +6799,11 @@
     <paper id="575">
       <title>Evaluating the <fixed-case>IWSLT</fixed-case>2023 Speech Translation Tasks: Human Annotations, Automatic Metrics, and Segmentation</title>
       <author><first>Matthias</first><last>Sperber</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <author><first>Barry</first><last>Haddow</last></author>
       <author><first>Dávid</first><last>Javorský</last></author>
       <author><first>Xutai</first><last>Ma</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
       <author><first>Peter</first><last>Polák</last></author>
       <author><first>Elizabeth</first><last>Salesky</last></author>
@@ -6830,7 +6830,7 @@
       <author><first>Büşra</first><last>Marşan</last></author>
       <author><first>Tunga</first><last>Gungor</last></author>
       <author><first>Balkiz</first><last>Ozturk Basaran</last></author>
-      <author><first>Arzucan</first><last>Özgür</last></author>
+      <author id="arzucan-ozgur"><first>Arzucan</first><last>Özgür</last></author>
       <author><first>Susan</first><last>Uskudarli</last></author>
       <pages>6504–6514</pages>
       <abstract>Pretrained language models and large language models are increasingly used to assist in a great variety of natural language tasks. In this work, we explore their use in evaluating the quality of alternative corpus annotation schemes. For this purpose, we analyze two alternative annotations of the Turkish BOUN treebank, versions 2.8 and 2.11, in the Universal Dependencies framework using large language models. Using a suitable prompt generated using treebank annotations, large language models are used to recover the surface forms of sentences. Based on the idea that the large language models capture the characteristics of the languages, we expect that the better annotation scheme would yield the sentences with higher success. The experiments conducted on a subset of the treebank show that the new annotation scheme (2.11) results in a successful recovery percentage of about 2 points higher. All the code developed for this work is available at https://github.com/boun-tabi/eval-ud .</abstract>
@@ -6840,7 +6840,7 @@
     <paper id="578">
       <title>Evaluating Topic Model on Asymmetric and Multi-Domain Financial Corpus</title>
       <author><first>Corentin</first><last>Masson</last></author>
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <pages>6515–6529</pages>
       <abstract>Multiple recent research works in Finance try to quantify the exposure of market assets to various risks from text and how assets react if the risk materialize itself. We consider risk sections from french Financial Corporate Annual Reports, which are regulated documents with a mandatory section containing important risks the company is facing, to extract an accurate risk profile and exposure of companies. We identify multiple pitfalls of topic models when applied to corporate filing financial domain data for unsupervised risk distribution extraction which has not yet been studied on this domain. We propose two new metrics to evaluate the behavior of different types of topic models with respect to pitfalls previously mentioned about document risk distribution extraction. Our evaluation will focus on three aspects: regularizations, down-sampling and data augmentation. In our experiments, we found that classic Topic Models require down-sampling to obtain unbiased risks, while Topic Models using metadata and in-domain pre-trained word-embeddings partially correct the coherence imbalance per subdomain and remove sector’s specific language from the detected themes. We then demonstrate the relevance and usefulness of the extracted information with visualizations that help to understand the content of such corpus and its evolution along the years.</abstract>
       <url hash="92e1cdbf">2024.lrec-main.578</url>
@@ -6861,7 +6861,7 @@
       <author><first>Stephanie</first><last>Brandl</last></author>
       <author><first>Oliver</first><last>Eberle</last></author>
       <author><first>Tiago</first><last>Ribeiro</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <author><first>Nora</first><last>Hollenstein</last></author>
       <pages>6544–6556</pages>
       <abstract>Rationales in the form of manually annotated input spans usually serve as ground truth when evaluating explainability methods in NLP. They are, however, time-consuming and often biased by the annotation process. In this paper, we debate whether human gaze, in the form of webcam-based eye-tracking recordings, poses a valid alternative when evaluating importance scores. We evaluate the additional information provided by gaze data, such as total reading times, gaze entropy, and decoding accuracy with respect to human rationale annotations. We compare WebQAmGaze, a multilingual dataset for information-seeking QA, with attention and explainability-based importance scores for 4 different multilingual Transformer-based language models (mBERT, distil-mBERT, XLMR, and XLMR-L) and 3 languages (English, Spanish, and German). Our pipeline can easily be applied to other tasks and languages. Our findings suggest that gaze data offers valuable linguistic insights that could be leveraged to infer task difficulty and further show a comparable ranking of explainability methods to that of human rationales.</abstract>
@@ -6889,7 +6889,7 @@
     <paper id="583">
       <title>Evaluation Dataset for Lexical Translation Consistency in <fixed-case>C</fixed-case>hinese-to-<fixed-case>E</fixed-case>nglish Document-level Translation</title>
       <author><first>Xiangyu</first><last>Lei</last></author>
-      <author><first>Junhui</first><last>Li</last></author>
+      <author id="junhui-li"><first>Junhui</first><last>Li</last></author>
       <author><first>Shimin</first><last>Tao</last></author>
       <author><first>Hao</first><last>Yang</last></author>
       <pages>6575–6581</pages>
@@ -6903,7 +6903,7 @@
       <author><first>Katarina</first><last>Gillholm</last></author>
       <author><first>Murathan</first><last>Kurfalı</last></author>
       <author><first>Marie</first><last>Mattson</last></author>
-      <author><first>Mats</first><last>Wirén</last></author>
+      <author id="mats-wiren"><first>Mats</first><last>Wirén</last></author>
       <pages>6582–6593</pages>
       <abstract>Traditional evaluation methods for Grammatical Error Correction (GEC) fail to fully capture the full range of system capabilities and objectives. The emergence of large language models (LLMs) has further highlighted the shortcomings of these evaluation strategies, emphasizing the need for a paradigm shift in evaluation methodology. In the current study, we perform a comprehensive evaluation of various GEC systems using a recently published dataset of Swedish learner texts. The evaluation is performed using established evaluation metrics as well as human judges. We find that GPT-3 in a few-shot setting by far outperforms previous grammatical error correction systems for Swedish, a language comprising only about 0.1% of its training data. We also found that current evaluation methods contain undesirable biases that a human evaluation is able to reveal. We suggest using human post-editing of GEC system outputs to analyze the amount of change required to reach native-level human performance on the task, and provide a dataset annotated with human post-edits and assessments of grammaticality, fluency and meaning preservation of GEC system outputs.</abstract>
       <url hash="151a1053">2024.lrec-main.584</url>
@@ -6924,8 +6924,8 @@
       <author><first>Mikel</first><last>Zubillaga</last></author>
       <author><first>Oscar</first><last>Sainz</last></author>
       <author><first>Ainara</first><last>Estarrona</last></author>
-      <author><first>Oier</first><last>Lopez de Lacalle</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="oier-lopez-de-lacalle"><first>Oier</first><last>Lopez de Lacalle</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <pages>6607–6621</pages>
       <abstract>Cross-lingual transfer-learning is widely used in Event Extraction for low-resource languages and involves a Multilingual Language Model that is trained in a source language and applied to the target language. This paper studies whether the typological similarity between source and target languages impacts the performance of cross-lingual transfer, an under-explored topic. We first focus on Basque as the target language, which is an ideal target language because it is typologically different from surrounding languages. Our experiments on three Event Extraction tasks show that the shared linguistic characteristic between source and target languages does have an impact on transfer quality. Further analysis of 72 language pairs reveals that for tasks that involve token classification such as entity and event trigger identification, common writing script and morphological features produce higher quality cross-lingual transfer. In contrast, for tasks involving structural prediction like argument extraction, common word order is the most relevant feature. In addition, we show that when increasing the training size, not all the languages scale in the same way in the cross-lingual setting. To perform the experiments we introduce EusIE, an event extraction dataset for Basque, which follows the Multilingual Event Extraction dataset (MEE). The dataset and code are publicly available.</abstract>
       <url hash="8a0ea731">2024.lrec-main.586</url>
@@ -7015,7 +7015,7 @@
       <title>Examining Temporalities on Stance Detection towards <fixed-case>COVID</fixed-case>-19 Vaccination</title>
       <author><first>Yida</first><last>Mu</last></author>
       <author><first>Mali</first><last>Jin</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
       <author><first>Xingyi</first><last>Song</last></author>
       <pages>6732–6738</pages>
       <abstract>Previous studies have highlighted the importance of vaccination as an effective strategy to control the transmission of the COVID-19 virus. It is crucial for policymakers to have a comprehensive understanding of the public’s stance towards vaccination on a large scale. However, attitudes towards COVID-19 vaccination, such as pro-vaccine or vaccine hesitancy, have evolved over time on social media. Thus, it is necessary to account for possible temporal shifts when analysing these stances. This study aims to examine the impact of temporal concept drift on stance detection towards COVID-19 vaccination on Twitter. To this end, we evaluate a range of transformer-based models using chronological (splitting the training, validation, and test sets in order of time) and random splits (randomly splitting these three sets) of social media data. Our findings reveal significant discrepancies in model performance between random and chronological splits in several existing COVID-19-related datasets; specifically, chronological splits significantly reduce the accuracy of stance classification. Therefore, real-world stance detection approaches need to be further refined to incorporate temporal factors as a key consideration.</abstract>
@@ -7026,7 +7026,7 @@
       <title>Examining the Limitations of Computational Rumor Detection Models Trained on Static Datasets</title>
       <author><first>Yida</first><last>Mu</last></author>
       <author><first>Xingyi</first><last>Song</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
       <author><first>Nikolaos</first><last>Aletras</last></author>
       <pages>6739–6751</pages>
       <abstract>A crucial aspect of a rumor detection model is its ability to generalize, particularly its ability to detect emerging, previously unknown rumors. Past research has indicated that content-based (i.e., using solely source post as input) rumor detection models tend to perform less effectively on unseen rumors. At the same time, the potential of context-based models remains largely untapped. The main contribution of this paper is in the in-depth evaluation of the performance gap between content and context-based models specifically on detecting new, unseen rumors. Our empirical findings demonstrate that context-based models are still overly dependent on the information derived from the rumors’ source post and tend to overlook the significant role that contextual information can play. We also study the effect of data split strategies on classifier performance. Based on our experimental results, the paper also offers practical suggestions on how to minimize the effects of temporal concept drift in static datasets during the training of rumor detection methods.</abstract>
@@ -7048,7 +7048,7 @@
     </paper>
     <paper id="597">
       <title>Experimental versus In-Corpus Variation in Referring Expression Choice</title>
-      <author><first>T. Mark</first><last>Ellison</last></author>
+      <author id="t-mark-ellison"><first>T. Mark</first><last>Ellison</last></author>
       <author><first>Fahime</first><last>Same</last></author>
       <pages>6838–6848</pages>
       <abstract>In this paper, we compare the results of three studies. The first explored feature-conditioned distributions of referring expression (RE) forms in the original corpus from which the contexts were taken. The second is a crowdsourcing study in which we asked participants to express entities within a pre-existing context, given fully specified referents. The third study replicates the crowdsourcing experiment using Large Language Models (LLMs). We evaluate how well the corpus itself can model the variation found when multiple informants (either human participants or LLMs) choose REs in the same contexts. We measure the similarity of the conditional distributions of form categories using the Jensen-Shannon Divergence metric and Description Length metric. We find that the experimental methodology introduces substantial noise, but by taking this noise into account, we can model the variation captured from the corpus and RE form choices made during experiments. Furthermore, we compared the three conditional distributions over the corpus, the human experimental results, and the GPT models. Against our expectations, the divergence is greatest between the corpus and the GPT model.</abstract>
@@ -7069,7 +7069,7 @@
       <title>Explainable Multi-hop Question Generation: An End-to-End Approach without Intermediate Question Labeling</title>
       <author><first>Seonjeong</first><last>Hwang</last></author>
       <author><first>Yunsu</first><last>Kim</last></author>
-      <author><first>Gary Geunbae</first><last>Lee</last></author>
+      <author id="gary-geunbae-lee"><first>Gary Geunbae</first><last>Lee</last></author>
       <pages>6855–6866</pages>
       <abstract>In response to the increasing use of interactive artificial intelligence, the demand for the capacity to handle complex questions has increased. Multi-hop question generation aims to generate complex questions that requires multi-step reasoning over several documents. Previous studies have predominantly utilized end-to-end models, wherein questions are decoded based on the representation of context documents. However, these approaches lack the ability to explain the reasoning process behind the generated multi-hop questions. Additionally, the question rewriting approach, which incrementally increases the question complexity, also has limitations due to the requirement of labeling data for intermediate-stage questions. In this paper, we introduce an end-to-end question rewriting model that increases question complexity through sequential rewriting. The proposed model has the advantage of training with only the final multi-hop questions, without intermediate questions. Experimental results demonstrate the effectiveness of our model in generating complex questions, particularly 3- and 4-hop questions, which are appropriately paired with input answers. We also prove that our model logically and incrementally increases the complexity of questions, and the generated multi-hop questions are also beneficial for training question answering models.</abstract>
       <url hash="6c0a899f">2024.lrec-main.599</url>
@@ -7187,7 +7187,7 @@
       <title>Exploring the Impact of Human Evaluator Group on Chat-Oriented Dialogue Evaluation</title>
       <author><first>Sarah E.</first><last>Finch</last></author>
       <author><first>James D.</first><last>Finch</last></author>
-      <author><first>Jinho D.</first><last>Choi</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
       <pages>6966–6973</pages>
       <abstract>Human evaluation has been widely accepted as the standard for evaluating chat-oriented dialogue systems. However, there is a significant variation in previous work regarding who gets recruited as evaluators. Evaluator groups such as domain experts, university students, and crowdworkers have been used to assess and compare dialogue systems, although it is unclear to what extent the choice of an evaluator group can affect results. This paper analyzes the evaluator group impact on dialogue system evaluation by testing 4 state-of-the-art dialogue systems using 4 distinct evaluator groups. Our analysis reveals a robustness towards evaluator groups for Likert evaluations that is not seen for Pairwise, with only minor differences observed when changing evaluator groups. Furthermore, two notable limitations to this robustness are observed, which reveal discrepancies between evaluators with different levels of chatbot expertise and indicate that evaluator objectivity is beneficial for certain dialogue metrics.</abstract>
       <url hash="d98ece94">2024.lrec-main.610</url>
@@ -7243,7 +7243,7 @@
       <author><first>Anja Silvia Mollah</first><last>Haque</last></author>
       <author><first>Isabel</first><last>Eiser</last></author>
       <author><first>Gertraud</first><last>Koch</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>7017–7022</pages>
       <abstract>In this system demonstration paper, we describe the Whiteboards extension for an existing web-based platform for digital qualitative discourse analysis. Whiteboards comprise interactive graph-based interfaces to organize and manipulate objects, which can be qualitative research data, such as documents, images, etc., and analyses of these research data, such as annotations, tags, and code structures. The proposed extension offers a customizable view of the material and a wide range of actions that enable new ways of interacting and working with such resources. We show that the visualizations facilitate various use cases of qualitative data analysis, including reflection of the research process through sampling maps, creation of actor networks, and refining code taxonomies.</abstract>
       <url hash="cc1ab904">2024.lrec-main.615</url>
@@ -7286,8 +7286,8 @@
       <author><first>Abby R.</first><last>Rosenberg</last></author>
       <author><first>Kevin</first><last>Lybarger</last></author>
       <author><first>Fei</first><last>Xia</last></author>
-      <author><first>Özlem</first><last>Uzuner</last></author>
-      <author><first>Meliha</first><last>Yetisgen</last></author>
+      <author id="ozlem-uzuner"><first>Özlem</first><last>Uzuner</last></author>
+      <author id="meliha-yetisgen-yildiz"><first>Meliha</first><last>Yetisgen</last></author>
       <pages>7045–7056</pages>
       <abstract>Social determinants of health (SDoH) play a critical role in shaping health outcomes, particularly in pediatric populations where interventions can have long-term implications. SDoH are frequently studied in the Electronic Health Record (EHR), which provides a rich repository for diverse patient data. In this work, we present a novel annotated corpus, the Pediatric Social History Annotation Corpus (PedSHAC), and evaluate the automatic extraction of detailed SDoH representations using fine-tuned and in-context learning methods with Large Language Models (LLMs). PedSHAC comprises annotated social history sections from 1,260 clinical notes obtained from pediatric patients within the University of Washington (UW) hospital system. Employing an event-based annotation scheme, PedSHAC captures ten distinct health determinants to encompass living and economic stability, prior trauma, education access, substance use history, and mental health with an overall annotator agreement of 81.9 F1. Our proposed fine-tuning LLM-based extractors achieve high performance at 78.4 F1 for event arguments. In-context learning approaches with GPT-4 demonstrate promise for reliable SDoH extraction with limited annotated examples, with extraction performance at 82.3 F1 for event triggers.</abstract>
       <url hash="c0b49a2e">2024.lrec-main.618</url>
@@ -7348,7 +7348,7 @@
       <author><first>Eric</first><last>Sanders</last></author>
       <author><first>Sara</first><last>Petrollino</last></author>
       <author><first>Gilles R.</first><last>Scheifer</last></author>
-      <author><first>Henk</first><last>van den Heuvel</last></author>
+      <author id="henk-van-den-heuvel"><first>Henk</first><last>van den Heuvel</last></author>
       <author><first>Christopher</first><last>Handy</last></author>
       <pages>7101–7106</pages>
       <abstract>LeiLanD (Leiden Language Data) is a searchable catalogue initiated by the Leiden University Centre for Linguistics (LUCL) with the support of CLARIAH. The catalogue contains metadata about language datasets collected at LUCL and other institutes of Leiden University. This paper describes a project to FAIRify the datasets increasing their findability and accessibility through a standardised metadata format CMDI so as to obtain a rich metadata description for all resources and to make them findable through CLARIN’s Virtual Language Observatory. The paper describes the creation of the catalogue and the steps that led from unstructured metadata to CMDI standards. This FAIRifi- cation of LeiLanD has enhanced the findability and accessibility of incredibly diverse collection of language datasets.</abstract>
@@ -7358,8 +7358,8 @@
     <paper id="624">
       <title><fixed-case>F</fixed-case>al<fixed-case>AI</fixed-case>: A Dataset for End-to-end Spoken Language Understanding in a Low-Resource Scenario</title>
       <author><first>Andres</first><last>Pineiro-Martin</last></author>
-      <author><first>Carmen</first><last>Garcia-Mateo</last></author>
-      <author><first>Laura</first><last>Docio-Fernandez</last></author>
+      <author id="carmen-garcia-mateo"><first>Carmen</first><last>Garcia-Mateo</last></author>
+      <author id="laura-docio-fernandez"><first>Laura</first><last>Docio-Fernandez</last></author>
       <author><first>Maria del Carmen</first><last>Lopez-Perez</last></author>
       <author><first>Jose</first><last>Gandarela-Rodriguez</last></author>
       <pages>7107–7116</pages>
@@ -7381,9 +7381,9 @@
     <paper id="626">
       <title><fixed-case>F</fixed-case>ast<fixed-case>S</fixed-case>pell: The <fixed-case>L</fixed-case>ang<fixed-case>I</fixed-case>d Magic Spell</title>
       <author><first>Marta</first><last>Bañón</last></author>
-      <author><first>Gema</first><last>Ramírez-Sánchez</last></author>
+      <author id="gema-ramirez-sanchez"><first>Gema</first><last>Ramírez-Sánchez</last></author>
       <author><first>Jaume</first><last>Zaragoza-Bernabeu</last></author>
-      <author><first>Sergio</first><last>Ortiz Rojas</last></author>
+      <author id="sergio-ortiz-rojas"><first>Sergio</first><last>Ortiz Rojas</last></author>
       <pages>7133–7140</pages>
       <abstract>Language identification is a crucial component in the automated production of language resources, particularly in multilingual and big data contexts. However, commonly used language identifiers struggle to differentiate between similar or closely-related languages. This paper introduces FastSpell, a language identifier that combines fastText (a pre-trained language identifier tool) and Hunspell (a spell checker) with the aim of having a refined second-opinion before deciding which language should be assigned to a text. We provide a description of the FastSpell algorithm along with an explanation on how to use and configure it. To that end, we motivate the need of such a tool and present a benchmark including some popular language identifiers evaluated during the development of FastSpell. We show how FastSpell is useful not only to improve identification of similar languages, but also to identify new ones ignored by other tools.</abstract>
       <url hash="960a62b7">2024.lrec-main.626</url>
@@ -7423,7 +7423,7 @@
     <paper id="630">
       <title>Federated Foundation Models: Privacy-Preserving and Collaborative Learning for Large Models</title>
       <author><first>Sixing</first><last>Yu</last></author>
-      <author><first>Juan Pablo</first><last>Munoz</last></author>
+      <author id="juan-pablo-munoz"><first>Juan Pablo</first><last>Munoz</last></author>
       <author><first>Ali</first><last>Jannesari</last></author>
       <pages>7174–7184</pages>
       <abstract>Foundation Models (FMs), such as LLaMA, BERT, GPT, ViT, and CLIP, have demonstrated remarkable success in a wide range of applications, driven by their ability to leverage vast amounts of data for pre-training. However, optimizing FMs often requires access to sensitive data, raising privacy concerns and limiting their applicability in many domains. In this paper, we propose the Federated Foundation Models (FFMs) paradigm, which combines the benefits of FMs and Federated Learning (FL) to enable privacy-preserving and collaborative learning across multiple end-users. We discuss the potential benefits and challenges of integrating FL into the lifespan of FMs, covering pre-training, fine-tuning, and application. We further outline potential future research avenues in FFM, including FFM pre-training, FFM fine-tuning, and federated prompt tuning, which allow the development of more personalized and context-aware models while ensuring data privacy. Moreover, we explore the possibility of continual/lifelong learning in FFMs, as increased computational power at the edge may unlock the potential for optimizing FMs using newly generated private data close to the data source. The proposed FFM concepts offer a flexible and scalable framework for training large language models in a privacy-preserving manner, setting the stage for subsequent advancements in both FM training and federated learning.</abstract>
@@ -7447,7 +7447,7 @@
       <author><first>Saiping</first><last>Guan</last></author>
       <author><first>Xiaolong</first><last>Jin</last></author>
       <author><first>Jiafeng</first><last>Guo</last></author>
-      <author><first>Xueqi</first><last>Cheng</last></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last></author>
       <pages>7196–7207</pages>
       <abstract>Hyper-relational facts, which consist of a primary triple (head entity, relation, tail entity) and auxiliary attribute-value pairs, are widely present in real-world Knowledge Graphs (KGs). Link Prediction on Hyper-relational Facts (LPHFs) is to predict a missing element in a hyper-relational fact, which helps populate and enrich KGs. However, existing LPHFs studies usually require an amount of high-quality data. They overlook few-shot relations, which have limited instances, yet are common in real-world scenarios. Thus, we introduce a new task, Few-Shot Link Prediction on Hyper-relational Facts (FSLPHFs). It aims to predict a missing entity in a hyper-relational fact with limited support instances. To tackle FSLPHFs, we propose MetaRH, a model that learns Meta Relational information in Hyper-relational facts. MetaRH comprises three modules: relation learning, support-specific adjustment, and query inference. By capturing meta relational information from limited support instances, MetaRH can accurately predict the missing entity in a query. As there is no existing dataset available for this new task, we construct three datasets to validate the effectiveness of MetaRH. Experimental results on these datasets demonstrate that MetaRH significantly outperforms existing representative models.</abstract>
       <url hash="f9e3ee07">2024.lrec-main.632</url>
@@ -7531,7 +7531,7 @@
       <title><fixed-case>F</fixed-case>in<fixed-case>C</fixed-case>orpus-<fixed-case>DE</fixed-case>10k: A Corpus for the <fixed-case>G</fixed-case>erman Financial Domain</title>
       <author><first>Serhii</first><last>Hamotskyi</last></author>
       <author><first>Nata</first><last>Kozaeva</last></author>
-      <author><first>Christian</first><last>Hänig</last></author>
+      <author id="christian-hanig"><first>Christian</first><last>Hänig</last></author>
       <pages>7277–7285</pages>
       <abstract>We introduce a predominantly German corpus comprising 12.5k PDF documents sourced from the financial domain. The corresponding extracted textual data encompasses more than 165 million tokens derived predominantly from German, and to a lesser extent, bilingual documents. We provide detailed information about the document types included in the corpus, such as final terms, base prospectuses, annual reports, information materials, law documents, international financial reporting standards, and monthly reports from the Bundesbank, accompanied by comprehensive statistical analysis. To our knowledge, it is the first non-email German financial corpus available, and we hope it will fill this gap and foster further research in the financial domain both in the German language and in multilingual contexts.</abstract>
       <url hash="9f1d3716">2024.lrec-main.639</url>
@@ -7570,7 +7570,7 @@
       <author><first>Yuting</first><last>Shi</last></author>
       <author><first>Naoya</first><last>Inoue</last></author>
       <author><first>Houjing</first><last>Wei</last></author>
-      <author><first>Yufeng</first><last>Zhao</last></author>
+      <author id="hakaze-cho"><first>Yufeng</first><last>Zhao</last></author>
       <author><first>Tao</first><last>Jin</last></author>
       <pages>7307–7313</pages>
       <abstract>Recent advances in Instruction-fine-tuned Vision and Language Models (IVLMs), such as GPT-4V and InstructBLIP, have prompted some studies have started an in-depth analysis of the reasoning capabilities of IVLMs. However, Inductive Visual Reasoning, a vital skill for text-image understanding, remains underexplored due to the absence of benchmarks. In this paper, we introduce Find-the-Common (FTC): a new vision and language task for Inductive Visual Reasoning. In this task, models are required to identify an answer that explains the common attributes across visual scenes. We create a new dataset for the FTC and assess the performance of several contemporary approaches including Image-Based Reasoning, Text-Based Reasoning, and Image-Text-Based Reasoning with various models. Extensive experiments show that even state-of-the-art models like GPT-4V can only archive with 48% accuracy on the FTC, for which, the FTC is a new challenge for the visual reasoning research community. Our dataset has been released and is available online: https://github.com/SSSSSeki/Find-the-common.</abstract>
@@ -7657,7 +7657,7 @@
       <author><first>Irene</first><last>Baucells</last></author>
       <author><first>Marc</first><last>Pamies</last></author>
       <author><first>Yishi</first><last>Xu</last></author>
-      <author><first>Aitor</first><last>Gonzalez-Agirre</last></author>
+      <author id="aitor-gonzalez-agirre"><first>Aitor</first><last>Gonzalez-Agirre</last></author>
       <author><first>Marta</first><last>Villegas</last></author>
       <pages>7377–7388</pages>
       <abstract>Large language models have amply proven their great capabilities, both in downstream tasks and real-life settings. However, low- and mid-resource languages do not have access to the necessary means to train such models from scratch, and often have to rely on multilingual models despite being underrepresented in the training data. For the particular case of the Catalan language, we prove that continued pre-training with vocabulary adaptation is a better alternative to take the most out of already pre-trained models, even if these have not seen any Catalan data during their pre-training phase. We curate a 26B tokens corpus and use it to further pre-train BLOOM, giving rise to the FLOR models. We perform an extensive evaluation to assess the effectiveness of our method, obtaining consistent gains across Catalan and Spanish tasks. The models, training data, and evaluation framework are made freely available under permissive licenses.</abstract>
@@ -7678,7 +7678,7 @@
       <title><fixed-case>FORECAST</fixed-case>2023: A Forecast and Reasoning Corpus of Argumentation Structures</title>
       <author><first>Kamila</first><last>Górska</last></author>
       <author><first>John</first><last>Lawrence</last></author>
-      <author><first>Chris</first><last>Reed</last></author>
+      <author id="chris-reed"><first>Chris</first><last>Reed</last></author>
       <pages>7395–7405</pages>
       <abstract>It is known from large-scale crowd experimentation that some people are innately better at analysing complex situations and making justified predictions – the so-called ‘superforecasters’. Surprisingly, however, there has to date been no work exploring the role played by the reasoning in those justifications. Bag-of-words analyses might tell us something, but the real value lies in understanding what features of reasoning and argumentation lead to better forecasts – both in providing an objective measure for argument quality, and even more importantly, in providing guidance on how to improve forecasting performance. The work presented here covers the creation of a unique dataset of such prediction rationales, the structure of which naturally lends itself to partially automated annotation which in turn is used as the basis for subsequent manual enhancement that provides a uniquely fine-grained and close characterisation of the structure of argumentation, with potential impact on forecasting domains from intelligence analysis to investment decision-making.</abstract>
       <url hash="55491650">2024.lrec-main.652</url>
@@ -7826,14 +7826,14 @@
       <title>From Linguistic Linked Data to Big Data</title>
       <author><first>Dimitar</first><last>Trajanov</last></author>
       <author><first>Elena</first><last>Apostol</last></author>
-      <author><first>Radovan</first><last>Garabik</last></author>
+      <author id="radovan-garabik"><first>Radovan</first><last>Garabik</last></author>
       <author><first>Katerina</first><last>Gkirtzou</last></author>
       <author><first>Dagmar</first><last>Gromann</last></author>
       <author><first>Chaya</first><last>Liebeskind</last></author>
       <author><first>Cosimo</first><last>Palma</last></author>
-      <author><first>Michael</first><last>Rosner</last></author>
+      <author id="michael-rosner"><first>Michael</first><last>Rosner</last></author>
       <author><first>Alexia</first><last>Sampri</last></author>
-      <author><first>Gilles</first><last>Sérasset</last></author>
+      <author id="gilles-serasset"><first>Gilles</first><last>Sérasset</last></author>
       <author><first>Blerina</first><last>Spahiu</last></author>
       <author><first>Ciprian-Octavian</first><last>Truică</last></author>
       <author><first>Giedre</first><last>Valunaite Oleskeviciene</last></author>
@@ -7877,8 +7877,8 @@
     <paper id="665">
       <title>From Text to Source: Results in Detecting Large Language Model-Generated Content</title>
       <author><first>Wissam</first><last>Antoun</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
-      <author><first>Djamé</first><last>Seddah</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
       <pages>7531–7543</pages>
       <abstract>The widespread use of Large Language Models (LLMs), celebrated for their ability to generate human-like text, has raised concerns about misinformation and ethical implications. Addressing these concerns necessitates the development of robust methods to detect and attribute text generated by LLMs. This paper investigates “Cross-Model Detection,” by evaluating whether a classifier trained to distinguish between source LLM-generated and human-written text can also detect text from a target LLM without further training. The study comprehensively explores various LLM sizes and families and assesses the impact of conversational fine-tuning techniques, quantization, and watermarking on classifier generalization. The research also explores Model Attribution, encompassing source model identification, model family, and model size classification, in addition to quantization and watermarking detection. Our results reveal several key findings: a clear inverse relationship between classifier effectiveness and model size, with larger LLMs being more challenging to detect, especially when the classifier is trained on data from smaller models. Training on data from similarly sized LLMs can improve detection performance from larger models but may lead to decreased performance when dealing with smaller models. Additionally, model attribution experiments show promising results in identifying source models and model families, highlighting detectable signatures in LLM-generated text, with particularly remarkable outcomes in watermarking detection, while no detectable signatures of quantization were observed. Overall, our study contributes valuable insights into the interplay of model size, family, and training data in LLM detection and attribution.</abstract>
       <url hash="d4c0120f">2024.lrec-main.665</url>
@@ -7888,7 +7888,7 @@
     <paper id="666">
       <title><fixed-case>FUSE</fixed-case> - <fixed-case>F</fixed-case>r<fixed-case>U</fixed-case>stration and Surprise Expressions: A Subtle Emotional Multimodal Language Corpus</title>
       <author><first>Rajesh</first><last>Titung</last></author>
-      <author><first>Cecilia Ovesdotter</first><last>Alm</last></author>
+      <author id="cecilia-ovesdotter-alm"><first>Cecilia Ovesdotter</first><last>Alm</last></author>
       <pages>7544–7555</pages>
       <abstract>This study introduces a novel multimodal corpus for expressive task-based spoken language and dialogue, focused on language use under frustration and surprise, elicited from three tasks motivated by prior research and collected in an IRB-approved experiment. The resource is unique both because these are understudied affect states for emotion modeling in language, and also because it provides both individual and dyadic multimodally grounded language. The study includes a detailed analysis of annotations and performance results for multimodal emotion inference in language use.</abstract>
       <url hash="d18cf775">2024.lrec-main.666</url>
@@ -7910,7 +7910,7 @@
     <paper id="668">
       <title><fixed-case>GAATME</fixed-case>: A Genetic Algorithm for Adversarial Translation Metrics Evaluation</title>
       <author><first>Josef</first><last>Jon</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>7562–7569</pages>
       <abstract>Building on a recent method for decoding translation candidates from a Machine Translation (MT) model via a genetic algorithm, we modify it to generate adversarial translations to test and challenge MT evaluation metrics. The produced translations score very well in an arbitrary MT evaluation metric selected beforehand, despite containing serious, deliberately introduced errors. The method can be used to create adversarial test sets to analyze the biases and shortcomings of the metrics. We publish various such test sets for the Czech to English language pair, as well as the code to convert any parallel data into a similar adversarial test set.</abstract>
       <url hash="66215a12">2024.lrec-main.668</url>
@@ -7985,7 +7985,7 @@
       <title>Generating Multiple-choice Questions for Medical Question Answering with Distractors and Cue-masking</title>
       <author><first>Damien</first><last>Sileo</last></author>
       <author><first>Kanimozhi</first><last>Uma</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>7647–7653</pages>
       <abstract>Medical multiple-choice question answering (MCQA) is a challenging evaluation for medical natural language processing and a helpful task in itself. Medical questions may describe patient symptoms and ask for the correct diagnosis, which requires domain knowledge and complex reasoning. Standard language modeling pretraining alone is not sufficient to achieve the best results with BERT-base size (Devlin et al., 2019) encoders. Jin et al. (2020) showed that focusing masked language modeling on disease name prediction when using medical encyclopedic paragraphs as input leads to considerable MCQA accuracy improvement. In this work, we show that (1) fine-tuning on generated MCQA dataset outperforms the masked language modeling based objective and (2) correctly masking the cues to the answers is critical for good performance. We release new pretraining datasets and achieve state-of-the-art results on 4 MCQA datasets, notably +5.7% with base-size model on MedQA-USMLE.</abstract>
       <url hash="d7385864">2024.lrec-main.675</url>
@@ -8039,7 +8039,7 @@
       <title><fixed-case>G</fixed-case>erman Also Hallucinates! Inconsistency Detection in News Summaries with the Absinth Dataset</title>
       <author><first>Laura</first><last>Mascarell</last></author>
       <author><first>Ribin</first><last>Chalumattu</last></author>
-      <author><first>Annette</first><last>Rios</last></author>
+      <author id="annette-rios-gonzales"><first>Annette</first><last>Rios</last></author>
       <pages>7696–7706</pages>
       <abstract>The advent of Large Language Models (LLMs) has led to remarkable progress on a wide range of natural language processing tasks. Despite the advances, these large-sized models still suffer from hallucinating information in their output, which poses a major issue in automatic text summarization, as we must guarantee that the generated summary is consistent with the content of the source document. Previous research addresses the challenging task of detecting hallucinations in the output (i.e. inconsistency detection) in order to evaluate the faithfulness of the generated summaries. However, these works primarily focus on English and recent multilingual approaches lack German data. This work presents Absinth, a manually annotated dataset for hallucination detection in German news summarization and explores the capabilities of novel open-source LLMs on this task in both fine-tuning and in-context learning settings. We open-source and release the Absinth dataset to foster further research on hallucination detection in German.</abstract>
       <url hash="99f56634">2024.lrec-main.680</url>
@@ -8095,9 +8095,9 @@
       <author><first>Kyeongmin</first><last>Rim</last></author>
       <author><first>Keer</first><last>Xu</last></author>
       <author><first>Liulu</first><last>Yue</last></author>
-      <author><first>Susan Windisch</first><last>Brown</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="susan-windisch-brown"><first>Susan Windisch</first><last>Brown</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <pages>7746–7759</pages>
       <abstract>This paper introduces GLAMR, an Abstract Meaning Representation (AMR) interpretation of Generative Lexicon (GL) semantic components. It includes a structured subeventual interpretation of linguistic predicates, and encoding of the opposition structure of property changes of event arguments. Both of these features are recently encoded in VerbNet (VN), and form the scaffolding for the semantic form associated with VN frame files. We develop a new syntax, concepts, and roles for subevent structure based on VN for connecting subevents to atomic predicates. Our proposed extension is compatible with current AMR specification. We also present an approach to automatically augment AMR graphs by inserting subevent structure of the predicates and identifying the subevent arguments from the semantic roles. A pilot annotation of GLAMR graphs of 65 documents (486 sentences), based on procedural texts as a source, is presented as a public dataset. The annotation includes subevents, argument property change, and document-level anaphoric links. Finally, we provide baseline models for converting text to GLAMR and vice versa, along with the application of GLAMR for generating enriched paraphrases with details on subevent transformation and arguments that are not present in the surface form of the texts.</abstract>
       <url hash="0046bbdb">2024.lrec-main.685</url>
@@ -8120,7 +8120,7 @@
       <title><fixed-case>G</fixed-case>lot<fixed-case>S</fixed-case>cript: A Resource and Tool for Low Resource Writing System Identification</title>
       <author><first>Amir Hossein</first><last>Kargaran</last></author>
       <author><first>François</first><last>Yvon</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>7774–7784</pages>
       <abstract>We present GlotScript, an open resource and tool for low resource writing system identification. GlotScript-R is a resource that provides the attested writing systems for more than 7,000 languages. It is compiled by aggregating information from existing writing system resources. GlotScript-T is a writing system identification tool that covers all 161 Unicode 15.0 scripts. For an input text, it returns its script distribution where scripts are identified by ISO 15924 codes. We also present two use cases for GlotScript. First, we demonstrate that GlotScript can help cleaning multilingual corpora such as mC4 and OSCAR. Second, we analyze the tokenization of a number of language models such as GPT-4 using GlotScript and provide insights on the coverage of low resource scripts and languages by each language model. We hope that GlotScript will become a useful resource for work on low resource languages in the NLP community. GlotScript-R and GlotScript-T are available at https://github.com/cisnlp/GlotScript.</abstract>
       <url hash="52072f1c">2024.lrec-main.687</url>
@@ -8142,7 +8142,7 @@
       <author><first>Anurag</first><last>Acharya</last></author>
       <author><first>Diego</first><last>Castro Estrada</last></author>
       <author><first>Diana</first><last>Gomez</last></author>
-      <author><first>Mark</first><last>Finlayson</last></author>
+      <author id="mark-finlayson"><first>Mark</first><last>Finlayson</last></author>
       <pages>7801–7813</pages>
       <abstract>Motifs are distinctive, recurring, widely used idiom-like words or phrases, often originating from folklore, whose meaning are anchored in a narrative. Motifs have significance as communicative devices because they concisely imply a constellation of culturally relevant information. Their broad usage suggests their cognitive importance as touchstones of cultural knowledge. We present GOLEM, the first dataset annotated for motific information. The dataset comprises 7,955 English articles (2,039,424 words). The corpus identifies 26,078 motif candidates across 34 motif types from three cultural or national groups: Jewish, Irish, and Puerto Rican. Each motif candidate is labeled with the type of usage (Motific, Referential, Eponymic, or Unrelated), resulting in 1,723 actual motific instances. Annotation was performed by individuals identifying as members of each group and achieved a Fleiss’ kappa of &gt;0.55. We demonstrate that classification of candidate type is a challenging task for LLMs using a few-shot approach; recent models such as T5, FLAN-T5, GPT-2, and Llama 2 (7B) achieved a performance of 41% accuracy at best. These data will support development of new models and approaches for detecting (and reasoning about) motific information in text. We release the corpus, the annotation guide, and the code to support other researchers building on this work.</abstract>
       <url hash="25e725a1">2024.lrec-main.689</url>
@@ -8162,7 +8162,7 @@
       <title>Gos 2: A New Reference Corpus of Spoken <fixed-case>S</fixed-case>lovenian</title>
       <author><first>Darinka</first><last>Verdonik</last></author>
       <author><first>Kaja</first><last>Dobrovoljc</last></author>
-      <author><first>Tomaž</first><last>Erjavec</last></author>
+      <author id="tomaz-erjavec"><first>Tomaž</first><last>Erjavec</last></author>
       <author><first>Nikola</first><last>Ljubešić</last></author>
       <pages>7825–7830</pages>
       <abstract>This paper introduces a new version of the Gos reference corpus of spoken Slovenian, which was recently extended to more than double the original size (300 hours, 2.4 million words) by adding speech recordings and transcriptions from two related initiatives, the Gos VideoLectures corpus of public academic speech, and the Artur speech recognition database. We describe this process by first presenting the criteria guiding the balanced selection of the newly added data and the challenges encountered when merging language resources with divergent designs, followed by the presentation of other major enhancements of the new Gos corpus, such as improvements in lemmatization and morphosyntactic annotation, word-level speech alignment, a new XML schema and the development of a specialized online concordancer.</abstract>
@@ -8256,7 +8256,7 @@
     <paper id="699">
       <title>Granular Change Accuracy: A More Accurate Performance Metric for Dialogue State Tracking</title>
       <author><first>Taha</first><last>Aksu</last></author>
-      <author><first>Nancy</first><last>Chen</last></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last></author>
       <pages>7939–7948</pages>
       <abstract>Current metrics for evaluating Dialogue State Tracking (DST) systems exhibit three primary limitations. They: i) erroneously presume a uniform distribution of slots throughout the dialog, ii) neglect to assign partial scores for individual turns, iii) frequently overestimate or underestimate performance by repeatedly counting the models’ successful or failed predictions. To address these shortcomings, we introduce a novel metric: Granular Change Accuracy (GCA). GCA focuses on evaluating the predicted changes in dialogue state over the entire dialogue history. Benchmarking reveals that GCA effectively reduces biases arising from distribution uniformity and the positioning of errors across turns, resulting in a more precise evaluation. Notably, we find that these biases are particularly pronounced when evaluating few-shot or zero-shot trained models, becoming even more evident as the model’s error rate increases. Hence, GCA offers significant promise, particularly for assessing models trained with limited resources. Our GCA implementation is a useful addition to the pool of DST metrics.</abstract>
       <url hash="ec4c451c">2024.lrec-main.699</url>
@@ -8385,7 +8385,7 @@
       <author><first>Siqi</first><last>Shen</last></author>
       <author><first>Zekun</first><last>Wang</last></author>
       <author><first>Winston</first><last>Wu</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>8050–8094</pages>
       <abstract>Recent progress in large language models (LLMs) has enabled the deployment of many generative NLP applications. At the same time, it has also led to a misleading public discourse that “it’s all been solved.” Not surprisingly, this has, in turn, made many NLP researchers – especially those at the beginning of their careers – worry about what NLP research area they should focus on. Has it all been solved, or what remaining questions can we work on regardless of LLMs? To address this question, this paper compiles NLP research directions rich for exploration. We identify fourteen different research areas encompassing 45 research directions that require new research and are not directly solvable by LLMs. While we identify many research areas, many others exist; we do not cover areas currently addressed by LLMs, but where LLMs lag behind in performance or those focused on LLM development. We welcome suggestions for other research directions to include: https://bit.ly/nlp-era-llm.</abstract>
       <url hash="9ceaa6d1">2024.lrec-main.708</url>
@@ -8487,7 +8487,7 @@
       <title>How Do Hyenas Deal with Human Speech? Speech Recognition and Translation with <fixed-case>C</fixed-case>onf<fixed-case>H</fixed-case>yena</title>
       <author><first>Marco</first><last>Gaido</last></author>
       <author><first>Sara</first><last>Papi</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Luisa</first><last>Bentivogli</last></author>
       <pages>8184–8191</pages>
       <abstract>The attention mechanism, a cornerstone of state-of-the-art neural models, faces computational hurdles in processing long sequences due to its quadratic complexity. Consequently, research efforts in the last few years focused on finding more efficient alternatives. Among them, Hyena (Poli et al., 2023) stands out for achieving competitive results in both language modeling and image classification, while offering sub-quadratic memory and computational complexity. Building on these promising results, we propose ConfHyena, a Conformer whose encoder self-attentions are replaced with an adaptation of Hyena for speech processing, where the long input sequences cause high computational costs. Through experiments in automatic speech recognition (for English) and translation (from English into 8 target languages), we show that our best ConfHyena model significantly reduces the training time by 27%, at the cost of minimal quality degradation (∼1%), which, in most cases, is not statistically significant.</abstract>
@@ -8531,7 +8531,7 @@
       <title>How Important Is Tokenization in <fixed-case>F</fixed-case>rench Medical Masked Language Models?</title>
       <author><first>Yanis</first><last>Labrak</last></author>
       <author><first>Adrien</first><last>Bazoge</last></author>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <author><first>Mickael</first><last>Rouvier</last></author>
       <author><first>Richard</first><last>Dufour</last></author>
       <pages>8223–8234</pages>
@@ -8542,7 +8542,7 @@
     <paper id="722">
       <title>How Large Language Models Encode Context Knowledge? A Layer-Wise Probing Study</title>
       <author><first>Tianjie</first><last>Ju</last></author>
-      <author><first>Weiwei</first><last>Sun</last></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last></author>
       <author><first>Wei</first><last>Du</last></author>
       <author><first>Xinwei</first><last>Yuan</last></author>
       <author><first>Zhaochun</first><last>Ren</last></author>
@@ -8569,7 +8569,7 @@
       <author><first>Venkata Sahith</first><last>Bathini</last></author>
       <author><first>Niloy</first><last>Ganguly</last></author>
       <author><first>Pawan</first><last>Goyal</last></author>
-      <author id="mayank-singh"><first>Mayank</first><last>Singh</last></author>
+      <author><first>Mayank</first><last>Singh</last></author>
       <pages>8258–8264</pages>
       <abstract>Question-answering (QA) on hybrid scientific tabular and textual data deals with scientific information, and relies on complex numerical reasoning. In recent years, while tabular QA has seen rapid progress, understanding their robustness on scientific information is lacking due to absence of any benchmark dataset. To investigate the robustness of the existing state-of-the-art QA models on scientific hybrid tabular data, we propose a new dataset, “SciTabQA”, consisting of 822 question-answer pairs from scientific tables and their descriptions. With the help of this dataset, we assess the state-of-the-art Tabular QA models based on their ability (i) to use heterogeneous information requiring both structured data (table) and unstructured data (text) and (ii) to perform complex scientific reasoning tasks. In essence, we check the capability of the models to interpret scientific tables and text. Our experiments show that “SciTabQA” is an innovative dataset to study question-answering over scientific heterogeneous data. We benchmark three state-of-the-art Tabular QA models, and find that the best F1 score is only 0.462.</abstract>
       <url hash="42be2912">2024.lrec-main.724</url>
@@ -8578,7 +8578,7 @@
     <paper id="725">
       <title>How Speculative Can Speculative Decoding Be?</title>
       <author><first>Zhuorui</first><last>Liu</last></author>
-      <author id="chen-zhang"><first>Chen</first><last>Zhang</last></author>
+      <author><first>Chen</first><last>Zhang</last></author>
       <author><first>Dawei</first><last>Song</last></author>
       <pages>8265–8275</pages>
       <abstract>Large language models (LLMs) have drawn great attention from the field of natural language processing and beyond, due to their impressive capability of autoregressive modeling, yet bringing an obvious problem, i.e., the largely increased latency. An emerging idea to alleviate this problem is speculative decoding, which first uses a draft model to draft tokens autoregressively and then makes the target model verify these tokens in parallel. The draft model is typically smaller than the target model, and it essentially trades generation quality for speed. Thereby, speculative decoding can be viewed as a speculative game for the target model in term of verification failures. That is, the lengthy draft tokens proposed by the small draft models could fail in the verification stage. Naturally, a critical question arises: how speculative can speculative decoding be, or in other words, how small can an adequate draft model be and how large can an appropriate number of draft tokens be? This work aims to investigate these questions and demonstrate how the scale of the draft model and the number of draft tokens would have an impact on the overall latency of the speculative decoding. We theoretically show that neither of above two factors will be infinitely speculative. Namely, there is a certain turning point for each of them. We then empirically show that the scale of the draft model could be 10-20<tex-math>\times</tex-math> smaller than the target model and the optimal number of draft tokens should lie in 3-5.</abstract>
@@ -8601,7 +8601,7 @@
       <title>How to Do Politics with Words: Investigating Speech Acts in Parliamentary Debates</title>
       <author><first>Ines</first><last>Reinig</last></author>
       <author><first>Ines</first><last>Rehbein</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <pages>8287–8300</pages>
       <abstract>This paper presents a new perspective on framing through the lens of speech acts and investigates how politicians make use of different pragmatic speech act functions in political debates. To that end, we created a new resource of German parliamentary debates, annotated with fine-grained speech act types. Our hierarchical annotation scheme distinguishes between cooperation and conflict communication, further structured into six subtypes, such as informative, declarative or argumentative-critical speech acts, with 14 fine-grained classes at the lowest level. We present classification baselines on our new data and show that the fine-grained classes in our schema can be predicted with an avg. F1 of around 82.0%. We then use our classifier to analyse the use of speech acts in a large corpus of parliamentary debates over a time span from 2003–2023.</abstract>
       <url hash="8de61668">2024.lrec-main.727</url>
@@ -8615,7 +8615,7 @@
       <author><first>Gustav</first><last>Kristensen</last></author>
       <author><first>Marie Haahr</first><last>Petersen</last></author>
       <author><first>Rob</first><last>van der Goot</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>8301–8306</pages>
       <abstract>Current language models require a lot of training data to obtain high performance. For Relation Classification (RC), many datasets are domain-specific, so combining datasets to obtain better performance is non-trivial. We explore a multi-domain training setup for RC, and attempt to improve performance by encoding domain information. Our proposed models improve &gt; 2 Macro-F1 against the baseline setup, and our analysis reveals that not all the labels benefit the same: The classes which occupy a similar space across domains (i.e., their interpretation is close across them, for example “physical”) benefit the least, while domain-dependent relations (e.g., “part-of”) improve the most when encoding domain information.</abstract>
       <url hash="22603e38">2024.lrec-main.728</url>
@@ -8624,7 +8624,7 @@
     <paper id="729">
       <title>How to Solve Few-Shot Abusive Content Detection Using the Data We Actually Have</title>
       <author><first>Viktor</first><last>Hangya</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>8307–8322</pages>
       <abstract>Due to the broad range of social media platforms, the requirements of abusive language detection systems are varied and ever-changing. Already a large set of annotated corpora with different properties and label sets were created, such as hate or misogyny detection, but the form and targets of abusive speech are constantly evolving. Since, the annotation of new corpora is expensive, in this work we leverage datasets we already have, covering a wide range of tasks related to abusive language detection. Our goal is to build models cheaply for a new target label set and/or language, using only a few training examples of the target domain. We propose a two-step approach: first we train our model in a multitask fashion. We then carry out few-shot adaptation to the target requirements. Our experiments show that using already existing datasets and only a few-shots of the target task the performance of models improve both monolingually and across languages. Our analysis also shows that our models acquire a general understanding of abusive language, since they improve the prediction of labels which are present only in the target dataset and can benefit from knowledge about labels which are not directly used for the target task.</abstract>
       <url hash="8891ab46">2024.lrec-main.729</url>
@@ -8635,7 +8635,7 @@
       <author><first>Jiamin</first><last>Luo</last></author>
       <author><first>Jianing</first><last>Zhao</last></author>
       <author><first>Jingjing</first><last>Wang</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>8323–8333</pages>
       <abstract>Weakly-supervised Phrase Grounding (WPG) is an emerging task of inferring the fine-grained phrase-region matching, while merely leveraging the coarse-grained sentence-image pairs for training. However, existing studies on WPG largely ignore the implicit phrase-region matching relations, which are crucial for evaluating the capability of models in understanding the deep multimodal semantics. To this end, this paper proposes an Implicit-Enhanced Causal Inference (IECI) approach to address the challenges of modeling the implicit relations and highlighting them beyond the explicit. Specifically, this approach leverages both the intervention and counterfactual techniques to tackle the above two challenges respectively. Furthermore, a high-quality implicit-enhanced dataset is annotated to evaluate IECI and detailed evaluations show the great advantages of IECI over the state-of-the-art baselines. Particularly, we observe an interesting finding that IECI outperforms the advanced multimodal LLMs by a large margin on this implicit-enhanced dataset, which may facilitate more research to evaluate the multimodal LLMs in this direction.</abstract>
       <url hash="24c649c7">2024.lrec-main.730</url>
@@ -8645,7 +8645,7 @@
       <title>How Well Can <fixed-case>BERT</fixed-case> Learn the Grammar of an Agglutinative and Flexible-Order Language? The Case of <fixed-case>B</fixed-case>asque.</title>
       <author><first>Gorka</first><last>Urbizu</last></author>
       <author><first>Muitze</first><last>Zulaika</last></author>
-      <author><first>Xabier</first><last>Saralegi</last></author>
+      <author id="xabier-saralegi"><first>Xabier</first><last>Saralegi</last></author>
       <author><first>Ander</first><last>Corral</last></author>
       <pages>8334–8348</pages>
       <abstract>This work investigates the acquisition of formal linguistic competence by neural language models, hypothesizing that languages with complex grammar, such as Basque, present substantial challenges during the pre-training phase. Basque is distinguished by its complex morphology and flexible word order, potentially complicating grammar extraction. In our analysis, we evaluated the grammatical knowledge of BERT models trained under various pre-training configurations, considering factors such as corpus size, model size, number of epochs, and the use of lemmatization. To assess this grammatical knowledge, we constructed the BL2MP (Basque L2 student-based Minimal Pairs) test set. This test set consists of minimal pairs, each containing both a grammatically correct and an incorrect sentence, sourced from essays authored by students at different proficiency levels in the Basque language. Additionally, our analysis explores the difficulties in learning various grammatical phenomena, the challenges posed by flexible word order, and the influence of the student’s proficiency level on the difficulty of correcting grammar errors.</abstract>
@@ -8670,7 +8670,7 @@
       <author><first>László János</first><last>Laki</last></author>
       <author><first>Noémi</first><last>Vadász</last></author>
       <author><first>Zijian Győző</first><last>Yang</last></author>
-      <author><first>Tamás</first><last>Váradi</last></author>
+      <author id="tamas-varadi"><first>Tamás</first><last>Váradi</last></author>
       <pages>8360–8371</pages>
       <abstract>The paper introduces the Hungarian Language Understanding (HuLU) benchmark, a comprehensive assessment framework designed to evaluate the performance of neural language models on Hungarian language tasks. Inspired by the renowned GLUE and SuperGLUE benchmarks, HuLU aims to address the challenges specific to Hungarian language processing. The benchmark consists of various datasets, each representing different linguistic phenomena and task complexities. Moreover, the paper presents a web service developed for HuLU, offering a user-friendly interface for model evaluation. This platform not only ensures consistent assessment but also fosters transparency by maintaining a leaderboard showcasing model performances. Preliminary evaluations of various LMMs on HuLU datasets indicate that while Hungarian models show promise, there’s room for improvement to match the proficiency of English-centric models in their native language.</abstract>
       <url hash="482b9c3c">2024.lrec-main.733</url>
@@ -8722,7 +8722,7 @@
       <title>Humanitarian Corpora for <fixed-case>E</fixed-case>nglish, <fixed-case>F</fixed-case>rench and <fixed-case>S</fixed-case>panish</title>
       <author><first>Loryn</first><last>Isaacs</last></author>
       <author><first>Santiago</first><last>Chambó</last></author>
-      <author><first>Pilar</first><last>León-Araúz</last></author>
+      <author id="pilar-leon-arauz"><first>Pilar</first><last>León-Araúz</last></author>
       <pages>8418–8426</pages>
       <abstract>This paper presents three corpora of English, French and Spanish humanitarian documents compiled with reports obtained from ReliefWeb through its API. ReliefWeb is a leading database of humanitarian documents operated by the UN Office for the Coordination of Humanitarian Affairs (OCHA). To compile these corpora, documents were selected with language identification and noise reduction techniques. They were subsequently tokenized, lemmatized, tagged by part of speech, and enriched with metadata for use by linguists in corpus query software. These corpora were compiled to satisfy the research needs of the Humanitarian Encyclopedia, a project with a focus on conceptual variation. However, they can also be useful for other humanitarian endeavors, whether they are research- or practitioner-oriented; the source code for generating the corpora is available on GitHub. To compare materials, an exploratory analysis of definitional and generic-specific information was conducted for the concept of ARMED ACTOR with lexical data extracted from an English legacy corpus (where the concept is underrepresented) as well as on the new English and Spanish corpora. Lexical data were compared among corpora and presented by means of online data visualization to illustrate its potential to inform conceptual modelling.</abstract>
       <url hash="922e5d4f">2024.lrec-main.738</url>
@@ -8742,8 +8742,8 @@
       <title>Humans Need Context, What about Machines? Investigating Conversational Context in Abusive Language Detection</title>
       <author><first>Tom</first><last>Bourgeade</last></author>
       <author><first>Zongmin</first><last>Li</last></author>
-      <author><first>Farah</first><last>Benamara</last></author>
-      <author><first>Véronique</first><last>Moriceau</last></author>
+      <author id="farah-benamara"><first>Farah</first><last>Benamara</last></author>
+      <author id="veronique-moriceau"><first>Véronique</first><last>Moriceau</last></author>
       <author><first>Jian</first><last>Su</last></author>
       <author><first>Aixin</first><last>Sun</last></author>
       <pages>8438–8452</pages>
@@ -8756,7 +8756,7 @@
       <author><first>Wolfgang S.</first><last>Schmeisser-Nieto</last></author>
       <author><first>Pol</first><last>Pastells</last></author>
       <author><first>Simona</first><last>Frenda</last></author>
-      <author><first>Mariona</first><last>Taule</last></author>
+      <author id="mariona-taule"><first>Mariona</first><last>Taule</last></author>
       <pages>8453–8463</pages>
       <abstract>The increasing popularity of natural language processing has led to a race to improve machine learning models that often leaves aside the core study object, the language itself. In this study, we present classification models designed to detect stereotypes related to immigrants, along with both quantitative and qualitative analyses, shedding light on linguistic distinctions in how humans and various models perceive stereotypes. Given the subjective nature of this task, one of the models incorporates the judgments of all annotators by utilizing soft labels. Through a comparative analysis of BERT-based models using both hard and soft labels, along with predictions from GPT-4, we gain a clearer understanding of the linguistic challenges posed by texts containing stereotypes. Our dataset comprises Spanish Twitter posts collected as responses to immigrant-related hoaxes, annotated with binary values indicating the presence of stereotypes, implicitness, and the requirement for conversational context to understand the stereotype. Our findings suggest that both model prediction confidence and inter-annotator agreement are higher for explicit stereotypes, while stereotypes conveyed through irony and other figures of speech prove more challenging to detect than other implicit stereotypes.</abstract>
       <url hash="149174db">2024.lrec-main.741</url>
@@ -8764,7 +8764,7 @@
     </paper>
     <paper id="742">
       <title>Hybrid of Spans and Table-Filling for Aspect-Level Sentiment Triplet Extraction</title>
-      <author><first>Minghua</first><last>Nuo</last></author>
+      <author id="minghua-nuo"><first>Minghua</first><last>Nuo</last></author>
       <author><first>Chaofan</first><last>Guo</last></author>
       <pages>8464–8473</pages>
       <abstract>Aspect Sentiment Triplet Extraction (ASTE) has become an emerging task in sentiment analysis research. Recently, researchers have proposed different tagging schemes, containing tagging of words, tagging of word pairs, and tagging of spans. However, the first two of these methods are often insufficient for the identification of multi-word terms, while the span tagging can label the entire phrase span, but it lacks the interactive information between words. In this paper, we propose Span in Table(S&amp;T) model which combining span with table-filling. Specifically, S&amp;T model achieve full fusion of syntactic and contextual features through cross-attention and generate the structures of word-pair table through Biaffine. Then, our model converts it to a span table by computing semantic distance based on syntactic dependency tree, which can enrich each unit of span table with semantic and interactive information. Meanwhile, the initial sentence features are constructed as simple phrase tables to enhance textual information of the phrase itself. In decoding, we define 8 types of labels for identifying three dimensions including aspect, opinion, and sentiment. Finally, the extensive experiments on D2 dataset show S&amp;T model achieves competitive results in ASTE task, the results certify the effectiveness and robustness of our S&amp;T model.</abstract>
@@ -8830,7 +8830,7 @@
     <paper id="748">
       <title><fixed-case>HYRR</fixed-case>: Hybrid Infused Reranking for Passage Retrieval</title>
       <author><first>Jing</first><last>Lu</last></author>
-      <author><first>Keith</first><last>Hall</last></author>
+      <author id="keith-hall"><first>Keith</first><last>Hall</last></author>
       <author><first>Ji</first><last>Ma</last></author>
       <author><first>Jianmo</first><last>Ni</last></author>
       <pages>8528–8534</pages>
@@ -8879,7 +8879,7 @@
       <author><first>Foivos Ioannis</first><last>Tzavellos</last></author>
       <author><first>Bas Marco</first><last>Göritzer</last></author>
       <author><first>Marijn</first><last>ten Thij</last></author>
-      <author><first>Riza</first><last>Batista-Navarro</last></author>
+      <author id="riza-theresa-batista-navarro"><first>Riza</first><last>Batista-Navarro</last></author>
       <pages>8569–8579</pages>
       <abstract>Idiomatic expressions are used in everyday language and typically convey affect, i.e., emotion. However, very little work investigating the extent to which automated methods can recognise emotions expressed in idiom-containing text has been undertaken. This can be attributed to the lack of emotion-labelled datasets that support the development and evaluation of such methods. In this paper, we present the IDioms with EMotions (IDEM) dataset consisting of a total of 9685 idiom-containing sentences that were generated and labelled with any one of 36 emotion types, with the help of the GPT-4 generative language model. Human validation by two independent annotators showed that more than 51% of the generated sentences are ideal examples, with the annotators reaching an agreement rate of 62% measured in terms of Cohen’s Kappa coefficient. To establish baseline performance on IDEM, various transformer-based emotion recognition approaches were implemented and evaluated. Results show that a RoBERTa model fine-tuned as a sequence classifier obtains a weighted F1-score of 58.73%, when the sequence provided as input specifies the idiom contained in a given sentence, together with its definition. Since this input configuration is based on the assumption that the idiom contained in the given sentence is already known, we also sought to assess the feasibility of automatically identifying the idioms contained in IDEM sentences. To this end, a hybrid idiom identification approach combining a rule-based method and a deep learning-based model was developed, whose performance on IDEM was determined to be 84.99% in terms of F1-score.</abstract>
       <url hash="298d39e7">2024.lrec-main.752</url>
@@ -8887,7 +8887,7 @@
     </paper>
     <paper id="753">
       <title>Identifying and Aligning Medical Claims Made on Social Media with Medical Evidence</title>
-      <author><first>Anthony</first><last>Hughes</last></author>
+      <author id="anthony-hughes"><first>Anthony</first><last>Hughes</last></author>
       <author><first>Xingyi</first><last>Song</last></author>
       <pages>8580–8593</pages>
       <abstract>Evidence-based medicine is the practise of making medical decisions that adhere to the latest, and best known evidence at that time. Currently, the best evidence is often found in the form of documents, such as randomized control trials, meta-analyses and systematic reviews. This research focuses on aligning medical claims made on social media platforms with this medical evidence. By doing so, individuals without medical expertise can more effectively assess the veracity of such medical claims. We study three core tasks: identifying medical claims, extracting medical vocabulary from these claims, and retrieving evidence relevant to those identified medical claims. We propose a novel system that can generate synthetic medical claims to aid each of these core tasks. We additionally introduce a novel dataset produced by our synthetic generator that, when applied to these tasks, demonstrates not only a more flexible and holistic approach, but also an improvement in all comparable metrics. We make our dataset, the Expansive Medical Claim Corpus (EMCC), available at https://zenodo.org/records/8321460.</abstract>
@@ -8918,7 +8918,7 @@
       <title>Ideological Knowledge Representation: Framing Climate Change in <fixed-case>E</fixed-case>co<fixed-case>L</fixed-case>exicon</title>
       <author><first>Arianne</first><last>Reimerink</last></author>
       <author><first>Melania</first><last>Cabezas-García</last></author>
-      <author><first>Pilar</first><last>León-Araúz</last></author>
+      <author id="pilar-leon-arauz"><first>Pilar</first><last>León-Araúz</last></author>
       <author><first>Pamela</first><last>Faber</last></author>
       <pages>8617–8626</pages>
       <abstract>Culture is underrepresented in terminological resources and ideology is an especially complicated cultural aspect to convey. This complexity stems from the intertwined relationships among the discourse community of politicians, the media and the general public, as well as their interactions with scientific knowledge. Nevertheless, terminological resources should provide the necessary information to understand the political perspective taken in discourse on scientific issues with a high political profile. As in all specialized domains, environmental concepts and terms are subject to dynamism and variation (León-Araúz, 2017). Cognitive term variants (e.g., climate change, climate crisis) are of particular interest because of their presence in political discourse and their potential to influence climate actions. They can be used to reflect multidimensionality, imprecision or ideological attachment. This paper describes a method based on framing in Communication Studies to extract ideological knowledge from corpora. We used Spanish and English parliamentary debates (ParlaMint 2.1) and annotated the interventions that included a term variant of climate change according to an adapted version of the frames proposed by Bolsen and Shapiro (2018). The results showed how climate change discourse changes across de ideological spectrum and we give a proposal on how to represent that knowledge in an environmental TKB on the environment.</abstract>
@@ -8966,7 +8966,7 @@
     <paper id="761">
       <title>Impoverished Language Technology: The Lack of (Social) Class in <fixed-case>NLP</fixed-case></title>
       <author><first>Amanda</first><last>Cercas Curry</last></author>
-      <author><first>Zeerak</first><last>Talat</last></author>
+      <author id="zeerak-talat"><first>Zeerak</first><last>Talat</last></author>
       <author><first>Dirk</first><last>Hovy</last></author>
       <pages>8675–8682</pages>
       <abstract>Since Labov’s foundational 1964 work on the social stratification of language, linguistics has dedicated concerted efforts towards understanding the relationships between socio-demographic factors and language production and perception. Despite the large body of evidence identifying significant relationships between socio-demographic factors and language production, relatively few of these factors have been investigated in the context of NLP technology. While age and gender are well covered, Labov’s initial target, socio-economic class, is largely absent. We survey the existing Natural Language Processing (NLP) literature and find that only 20 papers even mention socio-economic status. However, the majority of those papers do not engage with class beyond collecting information of annotator-demographics. Given this research lacuna, we provide a definition of class that can be operationalised by NLP researchers, and argue for including socio-economic class in future language technologies.</abstract>
@@ -8977,7 +8977,7 @@
       <title>Improved Neural Protoform Reconstruction via Reflex Prediction</title>
       <author><first>Liang</first><last>Lu</last></author>
       <author><first>Jingzhi</first><last>Wang</last></author>
-      <author><first>David R.</first><last>Mortensen</last></author>
+      <author id="david-r-mortensen"><first>David R.</first><last>Mortensen</last></author>
       <pages>8683–8707</pages>
       <abstract>Protolanguage reconstruction is central to historical linguistics. The comparative method, one of the most influential theoretical and methodological frameworks in the history of the language sciences, allows linguists to infer protoforms (reconstructed ancestral words) from their reflexes (related modern words) based on the assumption of regular sound change. Not surprisingly, numerous computational linguists have attempted to operationalize comparative reconstruction through various computational models, the most successful of which have been supervised encoder-decoder models, which treat the problem of predicting protoforms given sets of reflexes as a sequence-to-sequence problem. We argue that this framework ignores one of the most important aspects of the comparative method: not only should protoforms be inferable from cognate sets (sets of related reflexes) but the reflexes should also be inferable from the protoforms. Leveraging another line of research—reflex prediction—we propose a system in which candidate protoforms from a reconstruction model are reranked by a reflex prediction model. We show that this more complete implementation of the comparative method allows us to surpass state-of-the-art protoform reconstruction methods on three of four Chinese and Romance datasets.</abstract>
       <url hash="50c7df19">2024.lrec-main.762</url>
@@ -9155,7 +9155,7 @@
       <title>Improving Personalized Sentiment Representation with Knowledge-enhanced and Parameter-efficient Layer Normalization</title>
       <author><first>You</first><last>Zhang</last></author>
       <author><first>Jin</first><last>Wang</last></author>
-      <author><first>Liang-Chih</first><last>Yu</last></author>
+      <author id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last></author>
       <author><first>Dan</first><last>Xu</last></author>
       <author><first>Xuejie</first><last>Zhang</last></author>
       <pages>8877–8889</pages>
@@ -9214,7 +9214,7 @@
       <title>Improving the Robustness of Large Language Models via Consistency Alignment</title>
       <author><first>Yukun</first><last>Zhao</last></author>
       <author><first>Lingyong</first><last>Yan</last></author>
-      <author><first>Weiwei</first><last>Sun</last></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last></author>
       <author><first>Guoliang</first><last>Xing</last></author>
       <author><first>Shuaiqiang</first><last>Wang</last></author>
       <author><first>Chong</first><last>Meng</last></author>
@@ -9284,7 +9284,7 @@
     <paper id="788">
       <title>Incorporating Word-level Phonemic Decoding into Readability Assessment</title>
       <author><first>Christine</first><last>Pinney</last></author>
-      <author><first>Casey</first><last>Kennington</last></author>
+      <author id="casey-kennington"><first>Casey</first><last>Kennington</last></author>
       <author><first>Maria Soledad</first><last>Pera</last></author>
       <author><first>Katherine</first><last>Landau Wright</last></author>
       <author><first>Jerry Alan</first><last>Fails</last></author>
@@ -9298,7 +9298,7 @@
       <author><first>Sohom</first><last>Ghosh</last></author>
       <author><first>Arnab</first><last>Maji</last></author>
       <author><first>Aswartha</first><last>Narayana</last></author>
-      <author><first>Sudip Kumar</first><last>Naskar</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last></author>
       <pages>9010–9018</pages>
       <abstract>Applications of Natural Language Processing (NLP) in the finance domain have been very popular of late. For financial NLP, (FinNLP) while various datasets exist for widely spoken languages like English and Chinese, datasets are scarce for low resource languages,particularly for Indian languages. In this paper, we address this challenges by presenting IndicFinNLP – a collection of 9 datasets consisting of three tasks relating to FinNLP for three Indian languages. These tasks are Exaggerated Numeral Detection, Sustainability Classification, and ESG Theme Determination of financial texts in Hindi, Bengali, and Telugu. Moreover, we release the datasets under CC BY-NC-SA 4.0 license for the benefit of the research community.</abstract>
       <url hash="fa971b1b">2024.lrec-main.789</url>
@@ -9317,7 +9317,7 @@
     <paper id="791">
       <title><fixed-case>I</fixed-case>ndirect<fixed-case>QA</fixed-case>: Understanding Indirect Answers to Implicit Polar Questions in <fixed-case>F</fixed-case>rench and <fixed-case>S</fixed-case>panish</title>
       <author><first>Christin</first><last>Müller</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>9025–9035</pages>
       <abstract>Polar questions are common in dialogue and expect exactly one of two answers (yes/no). It is however not uncommon for speakers to bypass these expected choices and answer, for example, “Islands are generally by the sea” to the question: “An island? By the sea?”. While such answers are natural in spoken dialogues, conversational systems still struggle to interpret them. Seminal work to interpret indirect answers were made in recent years—but only for English and with strict question formulations. In this work, we present a new corpus for French and Spanish—IndirectQA —where we mine subtitle data for indirect answers to study the labeling task with six different labels, while broadening polar questions to include also implicit polar questions (statements that trigger a yes/no-answer which are not necessarily formulated as a question). We opted for subtitles since they are a readily available source of conversation in various languages, but also come with peculiarities and challenges which we will discuss. Overall, we provide the first results on French and Spanish. They show that the task is challenging: the baseline accuracy scores drop from 61.43 on English to 44.06 for French and Spanish.</abstract>
       <url hash="38e00c37">2024.lrec-main.791</url>
@@ -9386,7 +9386,7 @@
       <author><first>Anurag</first><last>Shukla</last></author>
       <author><first>Tanuja</first><last>Ganu</last></author>
       <author><first>Vivek</first><last>Seshadri</last></author>
-      <author><first>Sandipan</first><last>Dandapat</last></author>
+      <author id="sandipan-dandapat"><first>Sandipan</first><last>Dandapat</last></author>
       <author><first>Monojit</first><last>Choudhury</last></author>
       <author><first>Kalika</first><last>Bali</last></author>
       <pages>9097–9109</pages>
@@ -9438,7 +9438,7 @@
     <paper id="801">
       <title>Intention and Face in Dialog</title>
       <author><first>Adil</first><last>Soubki</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>9143–9153</pages>
       <abstract>The notion of face described by Brown and Levinson (1987) has been studied in great detail, but a critical aspect of the framework, that which focuses on how intentions mediate the planning of turns which impose upon face, has received far less attention. We present an analysis of three computational systems trained for classifying both intention and politeness, focusing on how the former influences the latter. In politeness theory, agents attend to the desire to have their wants appreciated (positive face), and a complementary desire to act unimpeded and maintain freedom (negative face). Similar to speech acts, utterances can perform so-called face acts which can either raise or threaten the positive or negative face of the speaker or hearer. We begin by using an existing corpus to train a model which classifies face acts, achieving a new SoTA in the process. We then observe that every face act has an underlying intention that motivates it and perform additional experiments integrating dialog act annotations to provide these intentions by proxy. Our analysis finds that dialog acts improve performance on face act detection for minority classes and points to a close relationship between aspects of face and intent.</abstract>
       <url hash="937e972d">2024.lrec-main.801</url>
@@ -9518,7 +9518,7 @@
       <author><first>Anne</first><last>Vilnat</last></author>
       <author><first>Sofiane</first><last>Ettayeb</last></author>
       <author><first>Louis</first><last>Tamames</last></author>
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <pages>9234–9244</pages>
       <abstract>We present a new question answering corpus in French designed to educational domain. To be useful in such domain, we have to propose more complex questions and to be able to justify the answers on validated material. We analyze some properties of this corpus. The last part of this paper will be devoted to present the first experiments we have carried out to demonstrate the value of this dataset for learning a Retrieval Augmented Genration framework. Different experiments are proposed, with an automatic evaluation. A human evaluation is proposed to confirm or infirm this automatic evaluation.</abstract>
       <url hash="a7f0853b">2024.lrec-main.808</url>
@@ -9562,7 +9562,7 @@
       <author><first>Kola</first><last>Tubosun</last></author>
       <author><first>Anuoluwapo</first><last>Aremu</last></author>
       <author><first>Iroro</first><last>Orife</last></author>
-      <author><first>David Ifeoluwa</first><last>Adelani</last></author>
+      <author id="david-ifeoluwa-adelani"><first>David Ifeoluwa</first><last>Adelani</last></author>
       <pages>9296–9303</pages>
       <abstract>We introduce ÌròyìnSpeech corpus—a new dataset influenced by a desire to increase the amount of high quality, freely available, contemporary Yorùbá speech data that can be used for both Text-to-Speech (TTS) and Automatic Speech Recognition (ASR) tasks. We curated about 23,000 text sentences from the news and creative writing domains with an open license i.e., CC-BY-4.0 and asked multiple speakers to record each sentence. To encourage more participatory approach to data creation, we provide 5 000 utterances from the curated sentences to the Mozilla Common Voice platform to crowd-source the recording and validation of Yorùbá speech data. In total, we created about 42 hours of speech data recorded by 80 volunteers in-house, and 6 hours validated recordings on Mozilla Common Voice platform. Our evaluation on TTS shows that we can create a good quality general domain single-speaker TTS model for Yorùbá with as little 5 hours of speech by leveraging an end-to-end VITS architecture. Similarly, for ASR, we obtained a WER of 21.5.</abstract>
       <url hash="962b32d4">2024.lrec-main.812</url>
@@ -9618,7 +9618,7 @@
     </paper>
     <paper id="818">
       <title><fixed-case>ISO</fixed-case> 24617-12: A New Standard for Semantic Annotation</title>
-      <author><first>Harry</first><last>Bunt</last></author>
+      <author id="harry-bunt"><first>Harry</first><last>Bunt</last></author>
       <pages>9361–9371</pages>
       <abstract>This paper presents ISO 24617-12, an annotation scheme for quantification phenomena in natural language., as part of the ISO Semantic Annotation Framework (ISO 24617). This scheme combines ideas from the theory of generalised quantifiers, from neo-Davidsonian event semantics, and from Discourse Representation Theory. The scheme consists of (1) an abstract syntax which defines ‘annotation structures’ as triples and other set-theoretic constructs of quantification-related concepts; (2) a reference representation of annotation structures (‘concrete syntax’); and (3) a compositional semantics of annotation structures. Together, these components define the markup language QuantML. This paper focuses on the identification and structuring of the semantic information useful for the characterisation of quantification in natural language and the interoperable representation of these information structures in QuantML.</abstract>
       <url hash="b23385b5">2024.lrec-main.818</url>
@@ -9662,7 +9662,7 @@
     <paper id="822">
       <title><fixed-case>IT</fixed-case>2<fixed-case>ACL</fixed-case> Learning Easy-to-Hard Instructions via 2-Phase Automated Curriculum Learning for Large Language Models</title>
       <author><first>Yufei</first><last>Huang</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <pages>9405–9421</pages>
       <abstract>Instruction tuning has demonstrated its superiority in unlocking the abilities of pre-trained large language models (LLMs), including their capability to respond to diverse human instructions and conduct complex reasoning. In order to further enhance the continuous learning capabilities of pre-trained LLMs, we explore the training process of instruction tuning through the lens of task sequences. We propose a 2-phase automated curriculum learning guided instruction tuning framework, IT2ACL that learns easy-to-hard instructions for LLMs in a self-adjusting dynamic manner. To facilitate curriculum learning from instructions, we propose a loss-driven progress signal for two-phase strategies: instruction prediction gain that decides the instruction level syllabus. Through comprehensive experiments on 70 Chinese datasets which have been grouped into 16 distinct task clusters, we demonstrate the effectiveness of our approach in eliciting latent ability in pre-trained LLMs and achieving superior performance across diverse tasks.</abstract>
       <url hash="8404bcd3">2024.lrec-main.822</url>
@@ -9689,7 +9689,7 @@
     <paper id="825">
       <title>It’s Not under the Lamppost: Expanding the Reach of Conversational <fixed-case>AI</fixed-case></title>
       <author><first>Christy</first><last>Doran</last></author>
-      <author><first>Deborah A.</first><last>Dahl</last></author>
+      <author id="deborah-a-dahl"><first>Deborah A.</first><last>Dahl</last></author>
       <pages>9441–9451</pages>
       <abstract>Generic commercial language-based assistants have become ubiquitously available, originally in the form of smart speakers and mobile apps, and more recently in the form of systems based on generative AI. At first glance, their capabilities seem remarkable. Speech recognition works well, NLU mostly works, and access to back-end information sources is usually quite good. However, there is still a lot of work to be done. In the area of NLU in particular, focused probes into the capabilities of language-based assistants easily reveal significant areas of brittleness that demonstrate large gaps in their coverage. For example, the straightforward disjunctive query <i>is this monday or tuesday</i> elicited the nonsensical response <i>it’s 2:50 p.m. many consider it to be the afternoon</i>. These gaps are difficult to identify if the development process relies on training the system with an ongoing supply of natural user data, because this natural data can become distorted by a self-reinforcing feedback loop where the system ‘trains’ the user to produce data that works. This paper describes a process for collecting specific kinds of data to uncover these gaps and an annotation scheme for system responses, and includes examples of simple utterances that nonetheless fail to be correctly processed. The systems tested include both Conventional assistants, such as Amazon Alexa and Google Assistant, as well as GenAI systems, including ChatGPT and Bard/Gemini. We claim that these failures are due to a lack of attention to the full spectrum of input possibilities, and argue that systems would benefit from the inclusion of focused manual assessment to directly target likely gaps.</abstract>
       <url hash="5107524c">2024.lrec-main.825</url>
@@ -9700,7 +9700,7 @@
       <author><first>Masaaki</first><last>Nagata</last></author>
       <author><first>Makoto</first><last>Morishita</last></author>
       <author><first>Katsuki</first><last>Chousa</last></author>
-      <author><first>Norihito</first><last>Yasuda</last></author>
+      <author id="norihito-yasuda"><first>Norihito</first><last>Yasuda</last></author>
       <pages>9452–9462</pages>
       <abstract>We constructed JaParaPat (Japanese-English Parallel Patent Application Corpus), a bilingual corpus of more than 300 million Japanese-English sentence pairs from patent applications published in Japan and the United States from 2000 to 2021. We obtained the publication of unexamined patent applications from the Japan Patent Office (JPO) and the United States Patent and Trademark Office (USPTO). We also obtained patent family information from the DOCDB, that is a bibliographic database maintained by the European Patent Office (EPO). We extracted approximately 1.4M Japanese-English document pairs, which are translations of each other based on the patent families, and extracted about 350M sentence pairs from the document pairs using a translation-based sentence alignment method whose initial translation model is bootstrapped from a dictionary-based sentence alignment. We experimentally improved the accuracy of the patent translations by 20 bleu points by adding more than 300M sentence pairs obtained from patent applications to 22M sentence pairs obtained from the web.</abstract>
       <url hash="fda979b6">2024.lrec-main.826</url>
@@ -9720,12 +9720,12 @@
       <author><first>Felix E.</first><last>Herron</last></author>
       <author><first>Magali</first><last>Norré</last></author>
       <author><first>Massih R</first><last>Amini</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
-      <author><first>Iris</first><last>Eshkol-Taravella</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="iris-eshkol"><first>Iris</first><last>Eshkol-Taravella</last></author>
       <author><first>Emmanuelle</first><last>Esperança-Rodier</last></author>
       <author><first>Thomas</first><last>François</last></author>
       <author><first>Lorraine</first><last>Goeuriot</last></author>
-      <author><first>Jérôme</first><last>Goulian</last></author>
+      <author id="jerome-goulian"><first>Jérôme</first><last>Goulian</last></author>
       <author><first>Mathieu</first><last>Lafourcade</last></author>
       <author><first>Benjamin</first><last>Lecouteux</last></author>
       <author><first>François</first><last>Portet</last></author>
@@ -9853,7 +9853,7 @@
       <author><first>Bin</first><last>Liang</last></author>
       <author><first>Xian</first><last>Wu</last></author>
       <author><first>Yefeng</first><last>Zheng</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <pages>9578–9588</pages>
       <abstract>Dialogue policy learning (DPL) aims to determine an abstract representation (also known as action) to guide what the response should be. Typically, DPL is cast as a sequential decision problem across a series of predefined action candidates. However, such static and narrow actions can limit response diversity and impede the dialogue agent’s adaptability to new scenarios and edge cases. To overcome these challenges, we introduce a novel <b>Jo</b>int <b>T</b>ransformer <b>R</b>einforcement Learning framework, coined as <b>JoTR</b>, where a text-to-text Transformer-based model is employed to directly generate dialogue actions. More concretely, JoTR formulates a token-grained policy, facilitating more dynamic and adaptable dialogue action generation without the need for predefined action candidates. This method not only enhances the diversity of responses but also significantly improves the system’s capability to manage unfamiliar scenarios. Furthermore, JoTR utilizes Reinforcement Learning with a reward-shaping mechanism to efficiently fine-tune the token-grained policy. This allows the model to evolve through interactions, thereby enhancing its performance over time. Our extensive evaluation demonstrates that JoTR surpasses previous state-of-the-art models, showing improvements of 9% and 13% in success rate, and 34% and 37% in the diversity of dialogue actions across two benchmark dialogue modeling tasks respectively. These results have been validated by both user simulators and human evaluators. Code and data are available at ://github.com/KwanWaiChung/JoTR.</abstract>
       <url hash="ff9890d3">2024.lrec-main.837</url>
@@ -9936,8 +9936,8 @@
       <author><first>Yilin</first><last>Wang</last></author>
       <author><first>Minghao</first><last>Hu</last></author>
       <author><first>Zhen</first><last>Huang</last></author>
-      <author id="dongsheng-li"><first>Dongsheng</first><last>Li</last></author>
-      <author><first>Dong</first><last>Yang</last></author>
+      <author><first>Dongsheng</first><last>Li</last></author>
+      <author id="dong-yang"><first>Dong</first><last>Yang</last></author>
       <author><first>Xicheng</first><last>Lu</last></author>
       <pages>9668–9680</pages>
       <abstract>The goal of knowledge graph completion (KGC) is to predict missing facts among entities. Previous methods for KGC re-ranking are mostly built on non-generative language models to obtain the probability of each candidate. Recently, generative large language models (LLMs) have shown outstanding performance on several tasks such as information extraction and dialog systems. Leveraging them for KGC re-ranking is beneficial for leveraging the extensive pre-trained knowledge and powerful generative capabilities. However, it may encounter new problems when accomplishing the task, namely mismatch, misordering and omission. To this end, we introduce KC-GenRe, a knowledge-constrained generative re-ranking method based on LLMs for KGC. To overcome the mismatch issue, we formulate the KGC re-ranking task as a candidate identifier sorting generation problem implemented by generative LLMs. To tackle the misordering issue, we develop a knowledge-guided interactive training method that enhances the identification and ranking of candidates. To address the omission issue, we design a knowledge-augmented constrained inference method that enables contextual prompting and controlled generation, so as to obtain valid rankings. Experimental results show that KG-GenRe achieves state-of-the-art performance on four datasets, with gains of up to 6.7% and 7.7% in the MRR and Hits@1 metric compared to previous methods, and 9.0% and 11.1% compared to that without re-ranking. Extensive analysis demonstrates the effectiveness of components in KG-GenRe.</abstract>
@@ -9991,7 +9991,7 @@
     <paper id="850">
       <title><fixed-case>KGC</fixed-case>onv, a Conversational Corpus Grounded in <fixed-case>W</fixed-case>ikidata</title>
       <author><first>Quentin</first><last>Brabant</last></author>
-      <author><first>Lina M.</first><last>Rojas Barahona</last></author>
+      <author id="lina-m-rojas-barahona"><first>Lina M.</first><last>Rojas Barahona</last></author>
       <author><first>Gwénolé</first><last>Lecorvé</last></author>
       <author><first>Claire</first><last>Gardent</last></author>
       <pages>9732–9742</pages>
@@ -10005,7 +10005,7 @@
       <author><first>Daniela</first><last>Jurášová</last></author>
       <author><first>Matúš</first><last>Žilinec</last></author>
       <author><first>Eduard</first><last>Šubert</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>9743–9752</pages>
       <abstract>We present the Khan Academy Corpus totalling 10122 hours in 87394 recordings across 29 languages, where 43% of recordings (4252 hours) are equipped with human-written subtitles. The subtitle texts cover a total of 137 languages. The dataset was collected from open access Khan Academy lectures, benefiting from their manual transcripts and manual translations of the transcripts. The dataset can serve in creation or evaluation of multilingual speech recognition or translation systems, featuring a diverse set of subject domains.</abstract>
       <url hash="fd082de2">2024.lrec-main.851</url>
@@ -10192,7 +10192,7 @@
     <paper id="867">
       <title>Konidioms Corpus: A Dataset of Idioms in <fixed-case>K</fixed-case>onkani Language</title>
       <author><first>Naziya Mahamdul</first><last>Shaikh</last></author>
-      <author><first>Jyoti D.</first><last>Pawar</last></author>
+      <author id="jyoti-pawar"><first>Jyoti D.</first><last>Pawar</last></author>
       <author><first>Mubarak Banu</first><last>Sayed</last></author>
       <pages>9932–9940</pages>
       <abstract>Konkani is a language spoken by a large number of people from the states located in the west coast of India. It is the official language of Goa state from the Indian subcontinent. Currently there is a lack of idioms corpus in the low-resource Konkani language. This paper aims to improve the progress in idiomatic sentence identification in order to enhance linguistic processing by creating the first corpus for idioms in the Konkani language. We select a unique list of 1597 idioms from multiple sources and proceed with a strictly controlled sentence creation procedure through crowdsourcing. This is followed by quality check of the sentences and annotation procedure by the experts in the Konkani language. We were able to build a good quality corpus comprising of 6520 sentences written in the Devanagari script of Konkani language. Analysis of the collected idioms and their usage in the created sentences revealed the dominance of selective domains like ‘human body’ in the creation and occurrences of idiomatic expressions in the Konkani language. This corpus is made publicly available.</abstract>
@@ -10341,7 +10341,7 @@
     <paper id="879">
       <title>Language Models for Text Classification: Is In-Context Learning Enough?</title>
       <author><first>Aleksandra</first><last>Edwards</last></author>
-      <author><first>Jose</first><last>Camacho-Collados</last></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></author>
       <pages>10058–10072</pages>
       <abstract>Recent foundational language models have shown state-of-the-art performance in many NLP tasks in zero- and few-shot settings. An advantage of these models over more standard approaches based on fine-tuning is the ability to understand instructions written in natural language (prompts), which helps them generalise better to different tasks and domains without the need for specific training data. This makes them suitable for addressing text classification problems for domains with limited amounts of annotated instances. However, existing research is limited in scale and lacks understanding of how text generation models combined with prompting techniques compare to more established methods for text classification such as fine-tuning masked language models. In this paper, we address this research gap by performing a large-scale evaluation study for 16 text classification datasets covering binary, multiclass, and multilabel problems. In particular, we compare zero- and few-shot approaches of large language models to fine-tuning smaller language models. We also analyse the results by prompt, classification type, domain, and number of labels. In general, the results show how fine-tuning smaller and more efficient language models can still outperform few-shot approaches of larger language models, which have room for improvement when it comes to text classification.</abstract>
       <url hash="485b10e3">2024.lrec-main.879</url>
@@ -10436,7 +10436,7 @@
       <title>Large Language Models Offer an Alternative to the Traditional Approach of Topic Modelling</title>
       <author><first>Yida</first><last>Mu</last></author>
       <author><first>Chun</first><last>Dong</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
       <author><first>Xingyi</first><last>Song</last></author>
       <pages>10160–10171</pages>
       <abstract>Topic modelling, as a well-established unsupervised technique, has found extensive use in automatically detecting significant topics within a corpus of documents. However, classic topic modelling approaches (e.g., LDA) have certain drawbacks, such as the lack of semantic understanding and the presence of overlapping topics. In this work, we investigate the untapped potential of large language models (LLMs) as an alternative for uncovering the underlying topics within extensive text corpora. To this end, we introduce a framework that prompts LLMs to generate topics from a given set of documents and establish evaluation protocols to assess the clustering efficacy of LLMs. Our findings indicate that LLMs with appropriate prompts can stand out as a viable alternative, capable of generating relevant topic titles and adhering to human guidelines to refine and merge topics. Through in-depth experiments and evaluation, we summarise the advantages and constraints of employing LLMs in topic extraction.</abstract>
@@ -10629,9 +10629,9 @@
       <author><first>Linyu</first><last>Fan</last></author>
       <author><first>Wu Wu</first><last>Yiheng</last></author>
       <author><first>Jun</first><last>Xie</last></author>
-      <author><first>Junhui</first><last>Li</last></author>
+      <author id="junhui-li"><first>Junhui</first><last>Li</last></author>
       <author><first>Fang</first><last>Kong</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>10336–10346</pages>
       <abstract>Thanks to the development of pre-trained sequence-to-sequence (seq2seq) models (e.g., BART), recent studies on AMR parsing often regard this task as a seq2seq translation problem by linearizing AMR graphs into AMR token sequences in pre-processing and recovering AMR graphs from sequences in post-processing. Seq2seq AMR parsing is a relatively simple paradigm but it unavoidably loses structural information among AMR tokens. To compensate for the loss of structural information, in this paper we explicitly leverage AMR structure in the decoding phase. Given an AMR graph, we first project the structure in the graph into an AMR token graph, i.e., structure among AMR tokens in the linearized sequence. The structures for an AMR token could be divided into two parts: structure in prediction history and structure in future. Then we propose to model structure in prediction history via a graph attention network (GAT) and learn structure in future via a multi-task scheme, respectively. Experimental results show that our approach significantly outperforms a strong baseline and achieves performance with 85.5 ±0.1 and 84.2 ±0.1 Smatch scores on AMR 2.0 and AMR 3.0, respectively</abstract>
       <url hash="ddb1bbe4">2024.lrec-main.903</url>
@@ -10678,7 +10678,7 @@
       <author><first>Hyeonseok</first><last>Moon</last></author>
       <author><first>Jaehyung</first><last>Seo</last></author>
       <author><first>Sugyeong</first><last>Eo</last></author>
-      <author><first>Heuiseok</first><last>Lim</last></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last></author>
       <pages>10380–10392</pages>
       <abstract>Counter-narrative generation, i.e., the generation of fact-based responses to hate speech with the aim of correcting discriminatory beliefs, has been demonstrated to be an effective method to combat hate speech. However, its effectiveness is limited by the resource-intensive nature of dataset construction processes and only focuses on the primary language. To alleviate this problem, we propose a Korean Hate Speech Counter Punch (KHSCP), a cost-effective counter-narrative generation method in the Korean language. To this end, we release the first counter-narrative generation dataset in Korean and pose two research questions. Under the questions, we propose an effective augmentation method and investigate the reasonability of a large language model to overcome data scarcity in low-resource environments by leveraging existing resources. In this regard, we conduct several experiments to verify the effectiveness of the proposed method. Our results reveal that applying pre-existing resources can improve the generation performance by a significant margin. Through deep analysis on these experiments, this work proposes the possibility of overcoming the challenges of generating counter-narratives in low-resource environments.</abstract>
       <url hash="a6833acc">2024.lrec-main.907</url>
@@ -10705,7 +10705,7 @@
       <author><first>Jotsna</first><last>Gowda</last></author>
       <author><first>Bill</first><last>Dyer</last></author>
       <author><first>Kevin</first><last>Tang</last></author>
-      <author><first>Sarah</first><last>Moeller</last></author>
+      <author id="sarah-moeller"><first>Sarah</first><last>Moeller</last></author>
       <pages>10403–10415</pages>
       <abstract>African American English (AAE) has received recent attention in the field of natural language processing (NLP). Efforts to address bias against AAE in NLP systems tend to focus on lexical differences. When the unique structures of AAE are considered, the solution is often to remove or neutralize the differences. This work leverages knowledge about the unique linguistic structures to improve automatic disambiguation of habitual and non-habitual meanings of “be” in naturally produced AAE transcribed speech. Both meanings are employed in AAE but examples of Habitual be are rare in already limited AAE data. Generally, representing additional syntactic information improves semantic disambiguation of habituality. Using an ensemble of classical machine learning models with a representation of the unique POS and dependency patterns of Habitual be, we show that integrating syntactic information improves the identification of habitual uses of “be” by about 65 F1 points over a simple baseline model of n-grams, and as much as 74 points. The success of this approach demonstrates the potential impact when we embrace, rather than neutralize, the structural uniqueness of African American English.</abstract>
       <url hash="03cb5439">2024.lrec-main.909</url>
@@ -10715,7 +10715,7 @@
       <title>Leveraging the Interplay between Syntactic and Acoustic Cues for Optimizing <fixed-case>K</fixed-case>orean <fixed-case>TTS</fixed-case> Pause Formation</title>
       <author><first>Yejin</first><last>Jeon</last></author>
       <author><first>Yunsu</first><last>Kim</last></author>
-      <author><first>Gary Geunbae</first><last>Lee</last></author>
+      <author id="gary-geunbae-lee"><first>Gary Geunbae</first><last>Lee</last></author>
       <pages>10416–10421</pages>
       <abstract>Contemporary neural speech synthesis models have indeed demonstrated remarkable proficiency in synthetic speech generation as they have attained a level of quality comparable to that of human-produced speech. Nevertheless, it is important to note that these achievements have predominantly been verified within the context of high-resource languages such as English. Furthermore, the Tacotron and FastSpeech variants show substantial pausing errors when applied to the Korean language, which affects speech perception and naturalness. In order to address the aforementioned issues, we propose a novel framework that incorporates comprehensive modeling of both syntactic and acoustic cues that are associated with pausing patterns. Remarkably, our framework possesses the capability to consistently generate natural speech even for considerably more extended and intricate out-of-domain (OOD) sentences, despite its training on short audio clips. Architectural design choices are validated through comparisons with baseline models and ablation studies using subjective and objective metrics, thus confirming model performance.</abstract>
       <url hash="2bab5d50">2024.lrec-main.910</url>
@@ -10763,7 +10763,7 @@
       <title><fixed-case>LFED</fixed-case>: A Literary Fiction Evaluation Dataset for Large Language Models</title>
       <author><first>Linhao</first><last>Yu</last></author>
       <author><first>Qun</first><last>Liu</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <pages>10466–10475</pages>
       <abstract>The rapid evolution of large language models (LLMs) has ushered in the need for comprehensive assessments of their performance across various dimensions. In this paper, we propose LFED, a Literary Fiction Evaluation Dataset, which aims to evaluate the capability of LLMs on the long fiction comprehension and reasoning. We collect 95 literary fictions that are either originally written in Chinese or translated into Chinese, covering a wide range of topics across several centuries. We define a question taxonomy with 8 question categories to guide the creation of 1,304 questions. Additionally, we conduct an in-depth analysis to ascertain how specific attributes of literary fictions (e.g., novel types, character numbers, the year of publication) impact LLM performance in evaluations. Through a series of experiments involving various state-of-the-art LLMs, our findings reveal that these models face considerable challenges in effectively addressing questions related to literary fictions, with ChatGPT reaching only 57.08% under the zero-shot setting. The dataset will be publicly available at https://github.com/tjunlp-lab/LFED.git.</abstract>
       <url hash="6bf3507a">2024.lrec-main.915</url>
@@ -10774,7 +10774,7 @@
       <author><first>Chuang</first><last>Liu</last></author>
       <author><first>Renren</first><last>Jin</last></author>
       <author><first>Yuqi</first><last>Ren</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <pages>10476–10487</pages>
       <abstract>Chinese Large Language Models (LLMs) have recently demonstrated impressive capabilities across various NLP benchmarks and real-world applications. However, the existing benchmarks for comprehensively evaluating these LLMs are still insufficient, particularly in terms of measuring knowledge that LLMs capture. Current datasets collect questions from Chinese examinations across different subjects and educational levels to address this issue. Yet, these benchmarks primarily focus on objective questions such as multiple-choice questions, leading to a lack of diversity in question types. To tackle this problem, we propose LHMKE, a Large-scale, Holistic, and Multi-subject Knowledge Evaluation benchmark in this paper. LHMKE is designed to provide a comprehensive evaluation of the knowledge acquisition capabilities of Chinese LLMs. It encompasses 10,465 questions across 75 tasks covering 30 subjects, ranging from primary school to professional certification exams. Notably, LHMKE includes both objective and subjective questions, offering a more holistic evaluation of the knowledge level of LLMs. We have assessed 11 Chinese LLMs under the zero-shot setting, which aligns with real examinations, and compared their performance across different subjects. We also conduct an in-depth analysis to check whether GPT-4 can automatically score subjective predictions. Our findings suggest that LHMKE is a challenging and advanced testbed for Chinese LLMs.</abstract>
       <url hash="41033068">2024.lrec-main.916</url>
@@ -10811,9 +10811,9 @@
       <author><first>Emmanuelle</first><last>Esperança-Rodier</last></author>
       <author><first>Romane</first><last>Gallienne</last></author>
       <author><first>Carlos-Emiliano</first><last>González-Gallardo</last></author>
-      <author><first>Jérôme</first><last>Goulian</last></author>
-      <author><first>Jose G.</first><last>Moreno</last></author>
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="jerome-goulian"><first>Jérôme</first><last>Goulian</last></author>
+      <author id="jose-g-moreno"><first>Jose G.</first><last>Moreno</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <author><first>Didier</first><last>Schwab</last></author>
       <author><first>Vincent</first><last>Segonne</last></author>
       <author><first>Johanna</first><last>Simoens</last></author>
@@ -10825,12 +10825,12 @@
     <paper id="920">
       <title>Linear Cross-document Event Coreference Resolution with <fixed-case>X</fixed-case>-<fixed-case>AMR</fixed-case></title>
       <author><first>Shafiuddin Rehan</first><last>Ahmed</last></author>
-      <author><first>George Arthur</first><last>Baker</last></author>
+      <author id="george-baker"><first>George Arthur</first><last>Baker</last></author>
       <author><first>Evi</first><last>Judge</last></author>
       <author><first>Michael</first><last>Reagan</last></author>
       <author><first>Kristin</first><last>Wright-Bettner</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
-      <author><first>James H.</first><last>Martin</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
+      <author id="james-h-martin"><first>James H.</first><last>Martin</last></author>
       <pages>10517–10529</pages>
       <abstract>Event Coreference Resolution (ECR) as a pairwise mention classification task is expensive both for automated systems and manual annotations. The task’s quadratic difficulty is exacerbated when using Large Language Models (LLMs), making prompt engineering for ECR prohibitively costly. In this work, we propose a graphical representation of events, X-AMR, anchored around individual mentions using a cross-document version of Abstract Meaning Representation. We then linearize the ECR with a novel multi-hop coreference algorithm over the event graphs. The event graphs simplify ECR, making it a) LLM cost-effective, b) compositional and interpretable, and c) easily annotated. For a fair assessment, we first enrich an existing ECR benchmark dataset with these event graphs using an annotator-friendly tool we introduce. Then, we employ GPT-4, the newest LLM by OpenAI, for these annotations. Finally, using the ECR algorithm, we assess GPT-4 against humans and analyze its limitations. Through this research, we aim to advance the state-of-the-art for efficient ECR and shed light on the potential shortcomings of current LLMs at this task. Code and annotations: <url>https://github.com/ahmeshaf/gpt_coref</url></abstract>
       <url hash="97b4e58b">2024.lrec-main.920</url>
@@ -10863,8 +10863,8 @@
     <paper id="923">
       <title>Linguistic Nudges and Verbal Interaction with Robots, Smart-Speakers, and Humans</title>
       <author><first>Natalia</first><last>Kalashnikova</last></author>
-      <author><first>Ioana</first><last>Vasilescu</last></author>
-      <author><first>Laurence</first><last>Devillers</last></author>
+      <author id="ioana-vasilescu"><first>Ioana</first><last>Vasilescu</last></author>
+      <author id="laurence-devillers"><first>Laurence</first><last>Devillers</last></author>
       <pages>10555–10564</pages>
       <abstract>This paper describes a data collection methodology and emotion annotation of dyadic interactions between a human, a Pepper robot, a Google Home smart-speaker, or another human. The collected 16 hours of audio recordings were used to analyze the propensity to change someone’s opinions about ecological behavior regarding the type of conversational agent, the kind of nudges, and the speaker’s emotional state. We describe the statistics of data collection and annotation. We also report the first results, which showed that humans change their opinions on more questions with a human than with a device, even against mainstream ideas. We observe a correlation between a certain emotional state and the interlocutor and a human’s propensity to be influenced. We also reported the results of the studies that investigated the effect of human likeness on speech using our data.</abstract>
       <url hash="108bd10a">2024.lrec-main.923</url>
@@ -10909,7 +10909,7 @@
     <paper id="927">
       <title>Linking Judgement Text to Court Hearing Videos: <fixed-case>UK</fixed-case> <fixed-case>S</fixed-case>upreme <fixed-case>C</fixed-case>ourt as a Case Study</title>
       <author><first>Hadeel</first><last>Saadany</last></author>
-      <author><first>Constantin</first><last>Orasan</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orasan</last></author>
       <author><first>Sophie</first><last>Walker</last></author>
       <author><first>Catherine</first><last>Breslin</last></author>
       <pages>10598–10609</pages>
@@ -11058,7 +11058,7 @@
     </paper>
     <paper id="940">
       <title><fixed-case>L</fixed-case>o<fixed-case>NAS</fixed-case>: Elastic Low-Rank Adapters for Efficient Large Language Models</title>
-      <author><first>Juan Pablo</first><last>Munoz</last></author>
+      <author id="juan-pablo-munoz"><first>Juan Pablo</first><last>Munoz</last></author>
       <author><first>Jinjie</first><last>Yuan</last></author>
       <author><first>Yi</first><last>Zheng</last></author>
       <author><first>Nilesh</first><last>Jain</last></author>
@@ -11092,7 +11092,7 @@
       <title>Look before You Leap: Dual Logical Verification for Knowledge-based Visual Question Generation</title>
       <author><first>Xumeng</first><last>Liu</last></author>
       <author><first>Wenya</first><last>Guo</last></author>
-      <author><first>Ying</first><last>Zhang</last></author>
+      <author id="ying-zhang"><first>Ying</first><last>Zhang</last></author>
       <author><first>Xubo</first><last>Liu</last></author>
       <author><first>Yu</first><last>Zhao</last></author>
       <author><first>Shenglong</first><last>Yu</last></author>
@@ -11114,7 +11114,7 @@
     <paper id="945">
       <title>Low-Rank Prune-And-Factorize for Language Model Compression</title>
       <author><first>Siyu</first><last>Ren</last></author>
-      <author><first>Kenny Q.</first><last>Zhu</last></author>
+      <author id="kenny-zhu"><first>Kenny Q.</first><last>Zhu</last></author>
       <pages>10822–10832</pages>
       <abstract>The components underpinning PLMs—large weight matrices—were shown to bear considerable redundancy. Matrix factorization, a well-established technique from matrix theory, has been utilized to reduce the number of parameters in PLM. However, it fails to retain satisfactory performance under moderate to high compression rates. In this paper, we identify the full-rankness of fine-tuned PLM as the fundamental bottleneck for the failure of matrix factorization and explore the use of network pruning to extract low-rank sparsity pattern desirable to matrix factorization. We find such a low-rank sparsity pattern exclusively exists in models generated by first-order pruning, which motivates us to unite the two approaches and achieve more effective model compression. We further propose two techniques: sparsity-aware SVD and mixed-rank fine-tuning, which improve the initialization and training of the compression procedure, respectively. Experiments on GLUE and question-answering tasks show that the proposed method has a superior compression-performance trade-off compared to existing approaches.</abstract>
       <url hash="ce851bd4">2024.lrec-main.945</url>
@@ -11135,7 +11135,7 @@
       <author><first>Yang</first><last>Bai</last></author>
       <author><first>Anthony</first><last>Colas</last></author>
       <author><first>Christan</first><last>Grant</last></author>
-      <author><first>Zhe</first><last>Wang</last></author>
+      <author id="daisy-zhe-wang"><first>Zhe</first><last>Wang</last></author>
       <pages>10846–10857</pages>
       <abstract>In recent research, contrastive learning has proven to be a highly effective method for representation learning and is widely used for dense retrieval. However, we identify that relying solely on contrastive learning can lead to suboptimal retrieval performance. On the other hand, despite many retrieval datasets supporting various learning objectives beyond contrastive learning, combining them efficiently in multi-task learning scenarios can be challenging. In this paper, we introduce M3, an advanced recursive Multi-hop dense sentence retrieval system built upon a novel Multi-task Mixed-objective approach for dense text representation learning, addressing the aforementioned challenges. Our approach yields state-of-the-art performance on a large-scale open-domain fact verification benchmark dataset, FEVER.</abstract>
       <url hash="800d869b">2024.lrec-main.947</url>
@@ -11172,7 +11172,7 @@
       <title><fixed-case>M</fixed-case>a<fixed-case>C</fixed-case>m<fixed-case>S</fixed-case>: <fixed-case>M</fixed-case>agahi Code-mixed Dataset for Sentiment Analysis</title>
       <author><first>Priya</first><last>Rani</last></author>
       <author><first>Theodorus</first><last>Fransen</last></author>
-      <author><first>John P.</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></author>
       <author><first>Gaurav</first><last>Negi</last></author>
       <pages>10880–10890</pages>
       <abstract>The present paper introduces new sentiment data, MaCMS, for Magahi-Hindi-English (MHE) code-mixed language, where Magahi is a less-resourced minority language. This dataset is the first Magahi-Hindi-English code-mixed dataset for sentiment analysis tasks. Further, we also provide a linguistics analysis of the dataset to understand the structure of code-mixing and a statistical study to understand the language preferences of speakers with different polarities. With these analyses, we also train baseline models to evaluate the dataset’s quality.</abstract>
@@ -11196,7 +11196,7 @@
       <author><first>Terry</first><last>Ruas</last></author>
       <author><first>Jerome</first><last>Waßmuth</last></author>
       <author><first>André</first><last>Greiner-Petter</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <author><first>Bela</first><last>Gipp</last></author>
       <author><first>Timo</first><last>Spinde</last></author>
       <pages>10903–10920</pages>
@@ -11211,8 +11211,8 @@
       <author><first>Verena</first><last>Blaschke</last></author>
       <author><first>Barbara</first><last>Kovačić</last></author>
       <author><first>Siyao</first><last>Peng</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>10921–10938</pages>
       <abstract>Despite the success of the Universal Dependencies (UD) project exemplified by its impressive language breadth, there is still a lack in ‘within-language breadth’: most treebanks focus on standard languages. Even for German, the language with the most annotations in UD, so far no treebank exists for one of its language varieties spoken by over 10M people: Bavarian. To contribute to closing this gap, we present the first multi-dialect Bavarian treebank (MaiBaam) manually annotated with part-of-speech and syntactic dependency information in UD, covering multiple text genres (wiki, fiction, grammar examples, social, non-fiction). We highlight the morphosyntactic differences between the closely-related Bavarian and German and showcase the rich variability of speakers’ orthographies. Our corpus includes 15k tokens, covering dialects from all Bavarian-speaking areas spanning three countries. We provide baseline parsing and POS tagging results, which are lower than results obtained on German and vary substantially between different graph-based parsers. To support further research on Bavarian syntax, we make our dataset, language-specific guidelines and code publicly available.</abstract>
       <url hash="95b8e864">2024.lrec-main.953</url>
@@ -11223,7 +11223,7 @@
       <author><first>Tyler K.</first><last>Bikaun</last></author>
       <author><first>Tim</first><last>French</last></author>
       <author><first>Michael</first><last>Stewart</last></author>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last></author>
+      <author><first>Wei</first><last>Liu</last></author>
       <author><first>Melinda</first><last>Hodkiewicz</last></author>
       <pages>10939–10951</pages>
       <abstract>Maintenance short texts (MST), derived from maintenance work order records, encapsulate crucial information in a concise yet information-rich format. These user-generated technical texts provide critical insights into the state and maintenance activities of machines, infrastructure, and other engineered assets–pillars of the modern economy. Despite their importance for asset management decision-making, extracting and leveraging this information at scale remains a significant challenge. This paper presents MaintIE, a multi-level fine-grained annotation scheme for entity recognition and relation extraction, consisting of 5 top-level classes: PhysicalObject, State, Process, Activity and Property and 224 leaf entities, along with 6 relations tailored to MSTs. Using MaintIE, we have curated a multi-annotator, high-quality, fine-grained corpus of 1,076 annotated texts. Additionally, we present a coarse-grained corpus of 7,000 texts and consider its performance for bootstrapping and enhancing fine-grained information extraction. Using these corpora, we provide model performance measures for benchmarking automated entity recognition and relation extraction. The MaintIE scheme, corpus, and model are publicly available at https://github.com/nlp-tlp/maintie under the MIT license, encouraging further community exploration and innovation in extracting valuable insights from MSTs.</abstract>
@@ -11268,7 +11268,7 @@
     <paper id="958">
       <title>Making Sentence Embeddings Robust to User-Generated Content</title>
       <author><first>Lydia</first><last>Nishimwe</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <author><first>Rachel</first><last>Bawden</last></author>
       <pages>10984–10998</pages>
       <abstract>NLP models have been known to perform poorly on user-generated content (UGC), mainly because it presents a lot of lexical variations and deviates from the standard texts on which most of these models were trained. In this work, we focus on the robustness of LASER, a sentence embedding model, to UGC data. We evaluate this robustness by LASER’s ability to represent non-standard sentences and their standard counterparts close to each other in the embedding space. Inspired by previous works extending LASER to other languages and modalities, we propose RoLASER, a robust English encoder trained using a teacher-student approach to reduce the distances between the representations of standard and UGC sentences. We show that with training only on standard and synthetic UGC-like data, RoLASER significantly improves LASER’s robustness to both natural and artificial UGC data by achieving up to 2x and 11x better scores. We also perform a fine-grained analysis on artificial UGC data and find that our model greatly outperforms LASER on its most challenging UGC phenomena such as keyboard typos and social media abbreviations. Evaluation on downstream tasks shows that RoLASER performs comparably to or better than LASER on standard data, while consistently outperforming it on UGC data.</abstract>
@@ -11290,9 +11290,9 @@
     </paper>
     <paper id="960">
       <title>m<fixed-case>ALBERT</fixed-case>: Is a Compact Multilingual <fixed-case>BERT</fixed-case> Model Still Worth It?</title>
-      <author><first>Christophe</first><last>Servan</last></author>
+      <author id="christophe-servan"><first>Christophe</first><last>Servan</last></author>
       <author><first>Sahar</first><last>Ghannay</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <pages>11023–11029</pages>
       <abstract>Within the current trend of Pretained Language Models (PLM), emerge more and more criticisms about the ethical and ecological impact of such models. In this article, considering these critical remarks, we propose to focus on smaller models, such as compact models like ALBERT, which are more ecologically virtuous than these PLM. However, PLMs enable huge breakthroughs in Natural Language Processing tasks, such as Spoken and Natural Language Understanding, classification, Question–Answering tasks. PLMs also have the advantage of being multilingual, and, as far as we know, a multilingual version of compact ALBERT models does not exist. Considering these facts, we propose the free release of the first version of a multilingual compact ALBERT model, pre-trained using Wikipedia data, which complies with the ethical aspect of such a language model. We also evaluate the model against classical multilingual PLMs in classical NLP tasks. Finally, this paper proposes a rare study on the subword tokenization impact on language performances.</abstract>
       <url hash="2329ec68">2024.lrec-main.960</url>
@@ -11353,7 +11353,7 @@
     <paper id="966">
       <title>Mathematical Entities: Corpora and Benchmarks</title>
       <author><first>Jacob</first><last>Collard</last></author>
-      <author><first>Valeria</first><last>de Paiva</last></author>
+      <author id="valeria-de-paiva"><first>Valeria</first><last>de Paiva</last></author>
       <author><first>Eswaran</first><last>Subrahmanian</last></author>
       <pages>11080–11089</pages>
       <abstract>Mathematics is a highly specialized domain with its own unique set of challenges. Despite this, there has been relatively little research on natural language processing for mathematical texts, and there are few mathematical language resources aimed at NLP. In this paper, we aim to provide annotated corpora that can be used to study the language of mathematics in different contexts, ranging from fundamental concepts found in textbooks to advanced research mathematics. We preprocess the corpora with a neural parsing model and some manual intervention to provide part-of-speech tags, lemmas, and dependency trees. In total, we provide 182397 sentences across three corpora. We then aim to test and evaluate several noteworthy natural language processing models using these corpora, to show how well they can adapt to the domain of mathematics and provide useful tools for exploring mathematical language. We evaluate several neural and symbolic models against benchmarks that we extract from the corpus metadata to show that terminology extraction and definition extraction do not easily generalize to mathematics, and that additional work is needed to achieve good performance on these metrics. Finally, we provide a learning assistant that grants access to the content of these corpora in a context-sensitive manner, utilizing text search and entity linking. Though our corpora and benchmarks provide useful metrics for evaluating mathematical language processing, further work is necessary to adapt models to mathematics in order to provide more effective learning assistants and apply NLP methods to different mathematical domains.</abstract>
@@ -11375,7 +11375,7 @@
     <paper id="968">
       <title><fixed-case>MCIL</fixed-case>: Multimodal Counterfactual Instance Learning for Low-resource Entity-based Multimodal Information Extraction</title>
       <author><first>Baohang</first><last>Zhou</last></author>
-      <author><first>Ying</first><last>Zhang</last></author>
+      <author id="ying-zhang"><first>Ying</first><last>Zhang</last></author>
       <author><first>Kehui</first><last>Song</last></author>
       <author><first>Hongru</first><last>Wang</last></author>
       <author><first>Yu</first><last>Zhao</last></author>
@@ -11451,15 +11451,15 @@
     <paper id="974">
       <title><fixed-case>M</fixed-case>ed<fixed-case>MT</fixed-case>5: An Open-Source Multilingual Text-to-Text <fixed-case>LLM</fixed-case> for the Medical Domain</title>
       <author><first>Iker</first><last>García-Ferrero</last></author>
-      <author><first>Rodrigo</first><last>Agerri</last></author>
+      <author id="rodrigo-agerri"><first>Rodrigo</first><last>Agerri</last></author>
       <author><first>Aitziber</first><last>Atutxa Salazar</last></author>
       <author><first>Elena</first><last>Cabrio</last></author>
       <author><first>Iker</first><last>de la Iglesia</last></author>
-      <author><first>Alberto</first><last>Lavelli</last></author>
-      <author><first>Bernardo</first><last>Magnini</last></author>
+      <author id="alberto-lavelli"><first>Alberto</first><last>Lavelli</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
       <author><first>Benjamin</first><last>Molinet</last></author>
       <author><first>Johana</first><last>Ramirez-Romero</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <author><first>Jose Maria</first><last>Villa-Gonzalez</last></author>
       <author><first>Serena</first><last>Villata</last></author>
       <author><first>Andrea</first><last>Zaninello</last></author>
@@ -11480,7 +11480,7 @@
     <paper id="976">
       <title><fixed-case>M</fixed-case>emory<fixed-case>P</fixed-case>rompt: A Light Wrapper to Improve Context Tracking in Pre-trained Language Models</title>
       <author><first>Nathanael Carraz</first><last>Rakotonirina</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <pages>11187–11195</pages>
       <abstract>Transformer-based language models (LMs) track contextual information through large, hard-coded input windows. We introduce MemoryPrompt, a leaner approach in which the LM is complemented by a small auxiliary recurrent network that passes information to the LM by prefixing its regular input with a sequence of vectors, akin to soft prompts, without requiring LM finetuning. Tested on a task designed to probe a LM’s ability to keep track of multiple fact updates, a MemoryPrompt-augmented LM outperforms much larger LMs that have access to the full input history. We also test MemoryPrompt on a long-distance dialogue dataset, where its performance is comparable to that of a model conditioned on the entire conversation history. In both experiments we also observe that, unlike full-finetuning approaches, MemoryPrompt does not suffer from catastrophic forgetting when adapted to new tasks, thus not disrupting the generalist capabilities of the underlying LM.</abstract>
       <url hash="39eda84f">2024.lrec-main.976</url>
@@ -11488,7 +11488,7 @@
     </paper>
     <paper id="977">
       <title><fixed-case>M</fixed-case>ental<fixed-case>H</fixed-case>elp: A Multi-Task Dataset for Mental Health in Social Media</title>
-      <author><first>Nishat</first><last>Raihan</last></author>
+      <author id="nishat-raihan"><first>Nishat</first><last>Raihan</last></author>
       <author><first>Sadiya Sayara Chowdhury</first><last>Puspo</last></author>
       <author><first>Shafkat</first><last>Farabi</last></author>
       <author><first>Ana-Maria</first><last>Bucur</last></author>
@@ -11504,7 +11504,7 @@
       <author><first>Alba María</first><last>Mármol Romero</last></author>
       <author><first>Adrián</first><last>Moreno-Muñoz</last></author>
       <author><first>Flor Miriam</first><last>Plaza-Del-Arco</last></author>
-      <author><first>M. Dolores</first><last>Molina-González</last></author>
+      <author id="m-dolores-molina-gonzalez"><first>M. Dolores</first><last>Molina-González</last></author>
       <author><first>Arturo</first><last>Montejo-Ráez</last></author>
       <pages>11204–11214</pages>
       <abstract>With mental health issues on the rise on the Web, especially among young people, there is a growing need for effective identification and intervention. In this paper, we introduce a new open-sourced corpus for the early detection of mental disorders in Spanish, focusing on eating disorders, depression, and anxiety. It consists of user messages posted on groups within the Telegram message platform and contains over 1,300 subjects with more than 45,000 messages posted in different public Telegram groups. This corpus has been manually annotated via crowdsourcing and is prepared for its use in several Natural Language Processing tasks including text classification and regression tasks. The samples in the corpus include both text and time data. To provide a benchmark for future research, we conduct experiments on text classification and regression by using state-of-the-art transformer-based models.</abstract>
@@ -11571,7 +11571,7 @@
       <title>m<fixed-case>F</fixed-case>orms : Multimodal Form Filling with Question Answering</title>
       <author><first>Larry</first><last>Heck</last></author>
       <author><first>Simon</first><last>Heck</last></author>
-      <author><first>Anirudh</first><last>Sundar</last></author>
+      <author id="anirudh-sundar"><first>Anirudh</first><last>Sundar</last></author>
       <pages>11262–11271</pages>
       <abstract>This paper presents a new approach to form-filling by reformulating the task as multimodal natural language Question Answering (QA). The reformulation is achieved by first translating the elements on the GUI form (text fields, buttons, icons, etc.) to natural language questions, where these questions capture the element’s multimodal semantics. After a match is determined between the form element (Question) and the user utterance (Answer), the form element is filled through a pre-trained extractive QA system. By leveraging pre-trained QA models and not requiring form-specific training, this approach to form-filling is zero-shot. The paper also presents an approach to further refine the form-filling by using multi-task training to incorporate a potentially large number of successive tasks. Finally, the paper introduces a multimodal natural language form-filling dataset Multimodal Forms (mForms), as well as a multimodal extension of the popular ATIS dataset to support future research and experimentation. Results show the new approach not only maintains robust accuracy for sparse training conditions but achieves state-of-the-art F1 of 0.97 on ATIS with approximately 1/10th the training data.</abstract>
       <url hash="eb282e7a">2024.lrec-main.984</url>
@@ -11683,8 +11683,8 @@
       <author><first>Ibrahim Said</first><last>Ahmad</last></author>
       <author><first>Deontae</first><last>Smith</last></author>
       <author><first>Praise-EL</first><last>Michaels</last></author>
-      <author><first>David Ifeoluwa</first><last>Adelani</last></author>
-      <author><first>Derry Tanti</first><last>Wijaya</last></author>
+      <author id="david-ifeoluwa-adelani"><first>David Ifeoluwa</first><last>Adelani</last></author>
+      <author id="derry-tanti-wijaya"><first>Derry Tanti</first><last>Wijaya</last></author>
       <author><first>Anietie</first><last>Andy</last></author>
       <pages>11349–11360</pages>
       <abstract>Low-resource languages often face challenges in acquiring high-quality language data due to the reliance on translation-based methods, which can introduce the translationese effect. This phenomenon results in translated sentences that lack fluency and naturalness in the target language. In this paper, we propose a novel approach for data collection by leveraging storyboards to elicit more fluent and natural sentences. Our method involves presenting native speakers with visual stimuli in the form of storyboards and collecting their descriptions without direct exposure to the source text. We conducted a comprehensive evaluation comparing our storyboard-based approach with traditional text translation-based methods in terms of accuracy and fluency. Human annotators and quantitative metrics were used to assess translation quality. The results indicate a preference for text translation in terms of accuracy, while our method demonstrates worse accuracy but better fluency in the language focused.</abstract>
@@ -11699,7 +11699,7 @@
       <author><first>Zheng</first><last>Ma</last></author>
       <author><first>Jianbing</first><last>Zhang</last></author>
       <author><first>Liang</first><last>He</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
       <pages>11361–11370</pages>
       <abstract>Relation extraction is a critical task in the field of natural language processing with numerous real-world applications. Existing research primarily focuses on monolingual relation extraction or cross-lingual enhancement for relation extraction. Yet, there remains a significant gap in understanding relation extraction in the mix-lingual (or code-switching) scenario, where individuals intermix contents from different languages within sentences, generating mix-lingual content. Due to the lack of a dedicated dataset, the effectiveness of existing relation extraction models in such a scenario is largely unexplored. To address this issue, we introduce a novel task of considering relation extraction in the mix-lingual scenario called MixRE and constructing the human-annotated dataset MixRED to support this task. In addition to constructing the MixRED dataset, we evaluate both state-of-the-art supervised models and large language models (LLMs) on MixRED, revealing their respective advantages and limitations in the mix-lingual scenario. Furthermore, we delve into factors influencing model performance within the MixRE task and uncover promising directions for enhancing the performance of both supervised models and LLMs in this novel task.</abstract>
       <url hash="53ef310c">2024.lrec-main.993</url>
@@ -11743,7 +11743,7 @@
       <title><fixed-case>MLDSP</fixed-case>-<fixed-case>MA</fixed-case>: Multidimensional Attention for Multi-Round Long Dialogue Sentiment Prediction</title>
       <author><first>Yunfei</first><last>Yin</last></author>
       <author><first>Congrui</first><last>Zou</last></author>
-      <author id="zheng-yuan"><first>Zheng</first><last>Yuan</last></author>
+      <author><first>Zheng</first><last>Yuan</last></author>
       <author><first>Xianjian</first><last>Bao</last></author>
       <pages>11405–11414</pages>
       <abstract>The intelligent chatbot takes dialogue sentiment prediction as the core, and it has to tackle long dialogue sentiment prediction problems in many real-world applications. Current state-of-the-art methods usually employ attention-based dialogue sentiment prediction models. However, as the conversation progresses, more topics are involved and the changes in sentiments become more frequent, which leads to a sharp decline in the accuracy and efficiency of the current methods. Therefore, we propose a Multi-round Long Dialogue Sentiment Prediction based on Multidimensional Attention (MLDSP-MA), which can focus on different topics. In particular, MLSDP-MA leverages a sliding window to capture different topics and traverses all historical dialogues. In each sliding window, the contextual dependency, sentiment persistence, and sentiment infectivity are characterized, and local attention cross fusion is performed. To learn dialogue sentiment globally, global attention is proposed to iteratively learn comprehensive sentiments from historical dialogues, and finally integrate with local attention. We conducted extensive experimental research on publicly available dialogue datasets. The experimental results show that, compared to the current state-of-the-art methods, our model improves by 3.5% in accuracy and 5.7% in Micro-F1 score.</abstract>
@@ -11783,7 +11783,7 @@
       <author><first>Claudiu Daniel</first><last>Hromei</last></author>
       <author><first>Daniele</first><last>Margiotta</last></author>
       <author><first>Danilo</first><last>Croce</last></author>
-      <author><first>Roberto</first><last>Basili</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
       <pages>11440–11451</pages>
       <abstract>This paper explores Interactive Grounded Language Understanding (IGLU) challenges within Human-Robot Interaction (HRI). In this setting, a robot interprets user commands related to its environment, aiming to discern whether a specific command can be executed. If faced with ambiguities or incomplete data, the robot poses relevant clarification questions. Drawing from the NeurIPS 2022 IGLU competition, we enrich the dataset by introducing our multi-modal data and natural language descriptions in <i>MM-IGLU: Multi-Modal Interactive Grounded Language Understanding</i>. Utilizing a BART-based model that integrates the user’s statement with the environment’s description, and a cutting-edge Multi-Modal Large Language Model that merges both visual and textual data, we offer a valuable resource for ongoing research in the domain. Additionally, we discuss the evaluation methods for such tasks, highlighting potential limitations imposed by traditional string-match-based evaluations on this intricate multi-modal challenge. Moreover, we provide an evaluation benchmark based on human judgment to address the limits and capabilities of such baseline models. This resource is released on a dedicated GitHub repository at https://github.com/crux82/MM-IGLU.</abstract>
       <url hash="0afaa5a1">2024.lrec-main.1000</url>
@@ -11868,7 +11868,7 @@
       <author><first>Felix</first><last>Lange</last></author>
       <author><first>Meisam</first><last>Booshehri</last></author>
       <author><first>Meghdut</first><last>Sengupta</last></author>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <author><first>Henning</first><last>Wachsmuth</last></author>
       <pages>11523–11536</pages>
       <abstract>Explanations are pervasive in our lives. Mostly, they occur in dialogical form where an <i>explainer</i> discusses a concept or phenomenon of interest with an <i>explainee</i>. Leaving the explainee with a clear understanding is not straightforward due to the knowledge gap between the two participants. Previous research looked at the interaction of explanation moves, dialogue acts, and topics in successful dialogues with expert explainers. However, daily-life explanations often fail, raising the question of what makes a dialogue successful. In this work, we study explanation dialogues in terms of the interactions between the explainer and explainee and how they correlate with the quality of explanations in terms of a successful understanding on the explainee’s side. In particular, we first construct a corpus of 399 dialogues from the Reddit forum <i>Explain Like I am Five</i> and annotate it for interaction flows and explanation quality. We then analyze the interaction flows, comparing them to those appearing in expert dialogues. Finally, we encode the interaction flows using two language models that can handle long inputs, and we provide empirical evidence for the effectiveness boost gained through the encoding in predicting the success of explanation dialogues.</abstract>
@@ -12017,7 +12017,7 @@
       <author><first>Hanjie</first><last>Zhao</last></author>
       <author><first>Danyan</first><last>Xing</last></author>
       <author><first>Yuxiang</first><last>Jia</last></author>
-      <author><first>Hongying</first><last>Zan</last></author>
+      <author id="hongying-zan"><first>Hongying</first><last>Zan</last></author>
       <pages>11669–11679</pages>
       <abstract>In medical information extraction, medical Named Entity Recognition (NER) is indispensable, playing a crucial role in developing medical knowledge graphs, enhancing medical question-answering systems, and analyzing electronic medical records. The challenge in medical NER arises from the complex nested structures and sophisticated medical terminologies, distinguishing it from its counterparts in traditional domains. In response to these complexities, we propose a medical NER model based on Machine Reading Comprehension (MRC), which uses a task-adaptive pre-training strategy to improve the model’s capability in the medical field. Meanwhile, our model introduces multiple word-pair embeddings and multi-granularity dilated convolution to enhance the model’s representation ability and uses a combined predictor of Biaffine and MLP to improve the model’s recognition performance. Experimental evaluations conducted on the CMeEE, a benchmark for Chinese nested medical NER, demonstrate that our proposed model outperforms the compared state-of-the-art (SOTA) models.</abstract>
       <url hash="ee14af5e">2024.lrec-main.1019</url>
@@ -12066,7 +12066,7 @@
     <paper id="1023">
       <title><fixed-case>MULTICOLLAB</fixed-case>: A Multimodal Corpus of Dialogues for Analyzing Collaboration and Frustration in Language</title>
       <author><first>Michael</first><last>Peechatt</last></author>
-      <author><first>Cecilia Ovesdotter</first><last>Alm</last></author>
+      <author id="cecilia-ovesdotter-alm"><first>Cecilia Ovesdotter</first><last>Alm</last></author>
       <author><first>Reynold</first><last>Bailey</last></author>
       <pages>11713–11722</pages>
       <abstract>This paper addresses an existing resource gap for studying complex emotional states when a speaker collaborates with a partner to solve a task. We present a novel dialogue resource — the MULTICOLLAB corpus — where two interlocutors, an instructor and builder, communicated through a Zoom call while sensors recorded eye gaze, facial action units, and galvanic skin response, with transcribed speech signals, resulting in a unique, heavily multimodal corpus. The builder received instructions from the instructor. Half of the builders were privately told to disobey the instructor’s directions. After the task, participants watched the Zoom recording and annotated their instances of frustration. In this study, we introduce this new corpus and perform computational experiments with time series transformers, using early fusion through time for sensor data and late fusion for speech transcripts. We then average predictions from both methods to recognize instructor frustration. Using sensor and speech data in a 4.5 second time window, we find that the fusion of both models yields 21% improvement in classification accuracy (with a precision of 79% and F1 of 63%) over a comparison baseline, demonstrating that complex emotions can be recognized when rich multimodal data from transcribed spoken dialogue and biophysical sensor data are fused.</abstract>
@@ -12076,7 +12076,7 @@
     <paper id="1024">
       <title>Multi-Dimensional Machine Translation Evaluation: Model Evaluation and Resource for <fixed-case>K</fixed-case>orean</title>
       <author><first>Dojun</first><last>Park</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <pages>11723–11744</pages>
       <abstract>Almost all frameworks for the manual or automatic evaluation of machine translation characterize the quality of an MT output with a single number. An exception is the Multidimensional Quality Metrics (MQM) framework which offers a fine-grained ontology of quality dimensions for scoring (such as style, fluency, accuracy, and terminology). Previous studies have demonstrated the feasibility of MQM annotation but there are, to our knowledge, no computational models that predict MQM scores for novel texts, due to a lack of resources. In this paper, we address these shortcomings by (a) providing a 1200-sentence MQM evaluation benchmark for the language pair English-Korean and (b) reframing MT evaluation as the multi-task problem of simultaneously predicting several MQM scores using SOTA language models, both in a reference-based MT evaluation setup and a reference-free quality estimation (QE) setup. We find that reference-free setup outperforms its counterpart in the style dimension while reference-based models retain an edge regarding accuracy. Overall, RemBERT emerges as the most promising model. Through our evaluation, we offer an insight into the translation quality in a more fine-grained, interpretable manner.</abstract>
       <url hash="72994a7a">2024.lrec-main.1024</url>
@@ -12115,7 +12115,7 @@
     <paper id="1028">
       <title><fixed-case>M</fixed-case>ulti<fixed-case>L</fixed-case>eg: Dataset for Text Sanitisation in Less-resourced Languages</title>
       <author><first>Rinalds</first><last>Vīksna</last></author>
-      <author><first>Inguna</first><last>Skadiņa</last></author>
+      <author id="inguna-skadina"><first>Inguna</first><last>Skadiņa</last></author>
       <pages>11776–11782</pages>
       <abstract>Text sanitization is the task of detecting and removing personal information from the text. While it has been well-studied in monolingual settings, today, there is also a need for multilingual text sanitization. In this paper, we introduce MultiLeg: a parallel, multilingual named entity (NE) dataset consisting of documents from the Court of Justice of the European Union annotated with semantic categories suitable for text sanitization. The dataset is available in 8 languages, and it contains 3082 parallel text segments for each language. We also show that the pseudonymized dataset remains useful for downstream tasks.</abstract>
       <url hash="6b15e2b3">2024.lrec-main.1028</url>
@@ -12132,24 +12132,24 @@
       <author><first>Chiara</first><last>Cantone</last></author>
       <author><first>Sara</first><last>Carvalho</last></author>
       <author><first>Francesca</first><last>Frontini</last></author>
-      <author><first>Radovan</first><last>Garabik</last></author>
+      <author id="radovan-garabik"><first>Radovan</first><last>Garabik</last></author>
       <author><first>Jorge</first><last>Gracia</last></author>
       <author><first>Letizia</first><last>Granata</last></author>
-      <author><first>Fahad</first><last>Khan</last></author>
+      <author id="fahad-khan"><first>Fahad</first><last>Khan</last></author>
       <author><first>Timotej</first><last>Knez</last></author>
-      <author><first>Penny</first><last>Labropoulou</last></author>
+      <author id="penny-labropoulou"><first>Penny</first><last>Labropoulou</last></author>
       <author><first>Chaya</first><last>Liebeskind</last></author>
       <author><first>Maria Pia</first><last>Di Buono</last></author>
       <author><first>Ana</first><last>Ostroški Anić</last></author>
       <author><first>Sigita</first><last>Rackevičienė</last></author>
       <author><first>Ricardo</first><last>Rodrigues</last></author>
-      <author><first>Gilles</first><last>Sérasset</last></author>
+      <author id="gilles-serasset"><first>Gilles</first><last>Sérasset</last></author>
       <author><first>Linas</first><last>Selmistraitis</last></author>
       <author><first>Mahammadou</first><last>Sidibé</last></author>
       <author><first>Purificação</first><last>Silvano</last></author>
       <author><first>Blerina</first><last>Spahiu</last></author>
       <author><first>Enriketa</first><last>Sogutlu</last></author>
-      <author><first>Ranka</first><last>Stanković</last></author>
+      <author id="ranka-stankovic"><first>Ranka</first><last>Stanković</last></author>
       <author><first>Ciprian-Octavian</first><last>Truică</last></author>
       <author><first>Giedre</first><last>Valunaite Oleskeviciene</last></author>
       <author><first>Slavko</first><last>Zitnik</last></author>
@@ -12174,7 +12174,7 @@
       <title>Multilingual Coreference Resolution in Low-resource <fixed-case>S</fixed-case>outh <fixed-case>A</fixed-case>sian Languages</title>
       <author><first>Ritwik</first><last>Mishra</last></author>
       <author><first>Pooja</first><last>Desur</last></author>
-      <author><first>Rajiv Ratn</first><last>Shah</last></author>
+      <author id="rajiv-shah"><first>Rajiv Ratn</first><last>Shah</last></author>
       <author><first>Ponnurangam</first><last>Kumaraguru</last></author>
       <pages>11813–11826</pages>
       <abstract>Coreference resolution involves the task of identifying text spans within a discourse that pertain to the same real-world entity. While this task has been extensively explored in the English language, there has been a notable scarcity of publicly accessible resources and models for coreference resolution in South Asian languages. We introduce a Translated dataset for Multilingual Coreference Resolution (TransMuCoRes) in 31 South Asian languages using off-the-shelf tools for translation and word-alignment. Nearly all of the predicted translations successfully pass a sanity check, and 75% of English references align with their predicted translations. Using multilingual encoders, two off-the-shelf coreference resolution models were trained on a concatenation of TransMuCoRes and a Hindi coreference resolution dataset with manual annotations. The best performing model achieved a score of 64 and 68 for LEA F1 and CoNLL F1, respectively, on our test-split of Hindi golden set. This study is the first to evaluate an end-to-end coreference resolution model on a Hindi golden set. Furthermore, this work underscores the limitations of current coreference evaluation metrics when applied to datasets with split antecedents, advocating for the development of more suitable evaluation metrics.</abstract>
@@ -12219,7 +12219,7 @@
     <paper id="1035">
       <title>Multilingual Substitution-based Word Sense Induction</title>
       <author><first>Denis</first><last>Kokosinskii</last></author>
-      <author><first>Nikolay</first><last>Arefyev</last></author>
+      <author id="nikolay-arefyev"><first>Nikolay</first><last>Arefyev</last></author>
       <pages>11859–11872</pages>
       <abstract>Word Sense Induction (WSI) is the task of discovering senses of an ambiguous word by grouping usages of this word into clusters corresponding to these senses. Many approaches were proposed to solve WSI in English and a few other languages, but these approaches are not easily adaptable to new languages. We present multilingual substitution-based WSI methods that support any of 100 languages covered by the underlying multilingual language model with minimal to no adaptation required. Despite the multilingual capabilities, our methods perform on par with the existing monolingual approaches on popular English WSI datasets. At the same time, they will be most useful for lower-resourced languages which miss lexical resources available for English, thus, have higher demand for unsupervised methods like WSI.</abstract>
       <url hash="0c7af297">2024.lrec-main.1035</url>
@@ -12262,9 +12262,9 @@
       <author><first>Abhijnan</first><last>Nath</last></author>
       <author><first>Huma</first><last>Jamil</last></author>
       <author><first>Shafiuddin Rehan</first><last>Ahmed</last></author>
-      <author><first>George Arthur</first><last>Baker</last></author>
+      <author id="george-baker"><first>George Arthur</first><last>Baker</last></author>
       <author><first>Rahul</first><last>Ghosh</last></author>
-      <author><first>James H.</first><last>Martin</last></author>
+      <author id="james-h-martin"><first>James H.</first><last>Martin</last></author>
       <author><first>Nathaniel</first><last>Blanchard</last></author>
       <author><first>Nikhil</first><last>Krishnaswamy</last></author>
       <pages>11901–11916</pages>
@@ -12310,7 +12310,7 @@
       <author><first>Meng</first><last>Han</last></author>
       <author><first>Ruofei</first><last>Lai</last></author>
       <author><first>Xinyu</first><last>Zhang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <author><first>Zhongyu</first><last>Wei</last></author>
       <pages>11944–11955</pages>
       <abstract>Product review summarization aims to generate a concise summary based on product reviews to facilitate purchasing decisions. This intricate task gives rise to three challenges in existing work: factual accuracy, aspect comprehensiveness, and content relevance. In this paper, we first propose an FB-Thinker framework to improve the summarization ability of LLMs with multi-objective forward reasoning and multi-reward backward refinement. To enable LLM with these dual capabilities, we present two Chinese product review summarization datasets, Product-CSum and Product-CSum-Cross, for both instruction-tuning and cross-domain evaluation. Specifically, these datasets are collected via GPT-assisted manual annotations from an online forum and public datasets. We further design an evaluation mechanism Product-Eval, integrating both automatic and human evaluation across multiple dimensions for product summarization. Experimental results show the competitiveness and generalizability of our proposed framework in the product review summarization tasks.</abstract>
@@ -12390,7 +12390,7 @@
     </paper>
     <paper id="1050">
       <title><fixed-case>MWE</fixed-case>-Finder: A Demonstration</title>
-      <author><first>Jan</first><last>Odijk</last></author>
+      <author id="jan-odijk"><first>Jan</first><last>Odijk</last></author>
       <author><first>Martin</first><last>Kroon</last></author>
       <author><first>Tijmen</first><last>Baarda</last></author>
       <author><first>Ben</first><last>Bonfil</last></author>
@@ -12415,9 +12415,9 @@
     </paper>
     <paper id="1052">
       <title>My Science Tutor (<fixed-case>M</fixed-case>y<fixed-case>ST</fixed-case>)–a Large Corpus of Children’s Conversational Speech</title>
-      <author><first>Sameer</first><last>Pradhan</last></author>
-      <author><first>Ronald A.</first><last>Cole</last></author>
-      <author><first>Wayne H.</first><last>Ward</last></author>
+      <author id="sameer-pradhan"><first>Sameer</first><last>Pradhan</last></author>
+      <author id="ronald-cole"><first>Ronald A.</first><last>Cole</last></author>
+      <author id="wayne-ward"><first>Wayne H.</first><last>Ward</last></author>
       <pages>12040–12045</pages>
       <abstract>This article describes the [corpus-name] corpus developed as part of the [project-name] project. To the best of our knowledge, this is one of the largest collections of children’s conversational speech that is freely available for non-commercial use under the creative commons license (CC BY-NC-SA 4.0). It comprises approximately 400 hours of speech, spanning some 230K utterances spread across about 10,500 virtual tutor sessions. Roughly 1,300 third, fourth and fifth grade students contributed to this corpus. The current release contains roughly 100K transcribed utterances. It is our hope that the corpus can be used to improve automatic speech recognition models and algorithms. We report the word error rate achieved on the test set using a model trained on the training and development portion of the corpus. The git repository of the corpus contains the complete training and evaluation setup in order to facilitate a fair and consistent evaluation. It is our hope that this corpus will contribute to the creation and evaluation of conversational AI agents having a better understanding of children’s speech, potentially opening doors to novel, effective, learning and therapeutic interventions.</abstract>
       <url hash="d5ef7e5a">2024.lrec-main.1052</url>
@@ -12456,8 +12456,8 @@
       <author><first>William</first><last>Thorne</last></author>
       <author><first>Ambrose</first><last>Robinson</last></author>
       <author><first>Nikolaos</first><last>Aletras</last></author>
-      <author><first>Carolina</first><last>Scarton</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
       <author><first>Xingyi</first><last>Song</last></author>
       <pages>12074–12086</pages>
       <abstract>Instruction-tuned Large Language Models (LLMs) have exhibited impressive language understanding and the capacity to generate responses that follow specific prompts. However, due to the computational demands associated with training these models, their applications often adopt a zero-shot setting. In this paper, we evaluate the zero-shot performance of two publicly accessible LLMs, ChatGPT and OpenAssistant, in the context of six Computational Social Science classification tasks, while also investigating the effects of various prompting strategies. Our experiments investigate the impact of prompt complexity, including the effect of incorporating label definitions into the prompt; use of synonyms for label names; and the influence of integrating past memories during foundation model training. The findings indicate that in a zero-shot setting, current LLMs are unable to match the performance of smaller, fine-tuned baseline transformer models (such as BERT-large). Additionally, we find that different prompting strategies can significantly affect classification accuracy, with variations in accuracy and F1 scores exceeding 10%.</abstract>
@@ -12476,7 +12476,7 @@
     <paper id="1057">
       <title>Negation Scope Conversion: Towards a Unified Negation-Annotated Dataset</title>
       <author><first>Asahi</first><last>Yoshida</last></author>
-      <author><first>Yoshihide</first><last>Kato</last></author>
+      <author id="yoshihide-kato"><first>Yoshihide</first><last>Kato</last></author>
       <author><first>Shigeki</first><last>Matsubara</last></author>
       <pages>12093–12099</pages>
       <abstract>Negation scope resolution is the task that identifies the part of a sentence affected by the negation cue. The three major corpora used for this task, the BioScope corpus, the SFU review corpus and the Sherlock dataset, have different annotation schemes for negation scope. Due to the different annotations, the negation scope resolution models based on pre-trained language models (PLMs) perform worse when fine-tuned on the simply combined dataset consisting of the three corpora. To address this issue, we propose a method for automatically converting the scopes of BioScope and SFU to those of Sherlock and merge them into a unified dataset. To verify the effectiveness of the proposed method, we conducted experiments using the unified dataset for fine-tuning PLM-based models. The experimental results demonstrate that the performances of the models increase when fine-tuned on the unified dataset unlike the simply combined one. In the token-level metric, the model fine-tuned on the unified dataset archived the state-of-the-art performance on the Sherlock dataset.</abstract>
@@ -12527,7 +12527,7 @@
       <author><first>Yantao</first><last>Liu</last></author>
       <author><first>Saiping</first><last>Guan</last></author>
       <author><first>Jiafeng</first><last>Guo</last></author>
-      <author><first>Xueqi</first><last>Cheng</last></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last></author>
       <pages>12127–12137</pages>
       <abstract>Nested Event Extraction (NEE) aims to extract complex event structures where an event contains other events as its arguments recursively. Nested events involve a kind of Pivot Elements (PEs) that simultaneously act as arguments of outer-nest events and as triggers of inner-nest events, and thus connect them into nested structures. This special characteristic of PEs brings challenges to existing NEE methods, as they cannot well cope with the dual identities of PEs. Therefore, this paper proposes a new model, called PerNee, which extracts nested events mainly based on recognizing PEs. Specifically, PerNee first recognizes the triggers of both inner-nest and outer-nest events and further recognizes the PEs via classifying the relation type between trigger pairs. The model uses prompt learning to incorporate information from both event types and argument roles for better trigger and argument representations to improve NEE performance. Since existing NEE datasets (e.g., Genia11) are limited to specific domains and contain a narrow range of event types with nested structures, we systematically categorize nested events in the generic domain and construct a new NEE dataset, called ACE2005-Nest. Experimental results demonstrate that PerNee consistently achieves state-of-the-art performance on ACE2005-Nest, Genia11, and Genia13. The ACE2005-Nest dataset and the code of the PerNee model are available at https://github.com/waysonren/PerNee.</abstract>
       <url hash="1c22a59c">2024.lrec-main.1061</url>
@@ -12596,7 +12596,7 @@
     <paper id="1068">
       <title>New Methods for Exploring Intonosyntax: Introducing an Intonosyntactic Treebank for <fixed-case>N</fixed-case>igerian <fixed-case>P</fixed-case>idgin</title>
       <author><first>Emmett</first><last>Strickland</last></author>
-      <author><first>Anne</first><last>Lacheret-Dujour</last></author>
+      <author id="anne-lacheret"><first>Anne</first><last>Lacheret-Dujour</last></author>
       <author><first>Sylvain</first><last>Kahane</last></author>
       <author><first>Marc</first><last>Evrard</last></author>
       <author><first>Perrine</first><last>Quennehen</last></author>
@@ -12621,9 +12621,9 @@
       <title>New Semantic Task for the <fixed-case>F</fixed-case>rench Spoken Language Understanding <fixed-case>MEDIA</fixed-case> Benchmark</title>
       <author><first>Nadège</first><last>Alavoine</last></author>
       <author><first>Gaëlle</first><last>Laperrière</last></author>
-      <author><first>Christophe</first><last>Servan</last></author>
+      <author id="christophe-servan"><first>Christophe</first><last>Servan</last></author>
       <author><first>Sahar</first><last>Ghannay</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <pages>12227–12246</pages>
       <abstract>Intent classification and slot-filling are essential tasks of Spoken Language Understanding (SLU). In most SLU systems, those tasks are realized by independent modules, but for about fifteen years, models achieving both of them jointly and exploiting their mutual enhancement have been proposed. A multilingual module using a joint model was envisioned to create a touristic dialogue system for a European project, HumanE-AI-Net. A combination of multiple datasets, including the MEDIA dataset, was suggested for training this joint model. The MEDIA SLU dataset is a French dataset distributed since 2005 by ELRA, mainly used by the French research community and free for academic research since 2020. Unfortunately, it is annotated only in slots but not intents. An enhanced version of MEDIA annotated with intents has been built to extend its use to more tasks and use cases. This paper presents the semi-automatic methodology used to obtain this enhanced version. In addition, we present the first results of SLU experiments on this enhanced dataset using joint models for intent classification and slot-filling.</abstract>
       <url hash="6ae9f2de">2024.lrec-main.1070</url>
@@ -12762,7 +12762,7 @@
     <paper id="1083">
       <title>On Leveraging Encoder-only Pre-trained Language Models for Effective Keyphrase Generation</title>
       <author><first>Di</first><last>Wu</last></author>
-      <author><first>Wasi</first><last>Ahmad</last></author>
+      <author id="wasi-ahmad"><first>Wasi</first><last>Ahmad</last></author>
       <author><first>Kai-Wei</first><last>Chang</last></author>
       <pages>12370–12384</pages>
       <abstract>This study addresses the application of encoder-only Pre-trained Language Models (PLMs) in keyphrase generation (KPG) amidst the broader availability of domain-tailored encoder-only models compared to encoder-decoder models. We investigate three core inquiries: (1) the efficacy of encoder-only PLMs in KPG, (2) optimal architectural decisions for employing encoder-only PLMs in KPG, and (3) a performance comparison between in-domain encoder-only and encoder-decoder PLMs across varied resource settings. Our findings, derived from extensive experimentation in two domains reveal that with encoder-only PLMs, although keyphrase extraction with Conditional Random Fields slightly excels in identifying present keyphrases, the KPG formulation renders a broader spectrum of keyphrase predictions. Additionally, prefix-LM fine-tuning of encoder-only PLMs emerges as a strong and data-efficient strategy for KPG, outperforming general-domain seq2seq PLMs. We also identify a favorable parameter allocation towards model depth rather than width when employing encoder-decoder architectures initialized with encoder-only PLMs. The study sheds light on the potential of utilizing encoder-only PLMs for advancing KPG systems and provides a groundwork for future KPG methods. Our code and pre-trained checkpoints are released at https://github.com/uclanlp/DeepKPG.</abstract>
@@ -12771,11 +12771,11 @@
     </paper>
     <paper id="1084">
       <title>On Modelling Corpus Citations in Computational Lexical Resources</title>
-      <author><first>Fahad</first><last>Khan</last></author>
+      <author id="fahad-khan"><first>Fahad</first><last>Khan</last></author>
       <author><first>Maxim</first><last>Ionov</last></author>
       <author><first>Christian</first><last>Chiarcos</last></author>
-      <author><first>Laurent</first><last>Romary</last></author>
-      <author><first>Gilles</first><last>Sérasset</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
+      <author id="gilles-serasset"><first>Gilles</first><last>Sérasset</last></author>
       <author><first>Besim</first><last>Kabashi</last></author>
       <pages>12385–12394</pages>
       <abstract>In this article we look at how two different standards for lexical resources, TEI and OntoLex, deal with corpus citations in lexicons. We will focus on how corpus citations in retrodigitised dictionaries can be modelled using each of the two standards since this provides us with a suitably challenging use case. After looking at the structure of an example entry from a legacy dictionary, we examine the two approaches offered by the two different standards by outlining an encoding for the example entry using both of them (note that this article features the first extended discussion of how the Frequency Attestation and Corpus (FrAC) module of OntoLex deals with citations). After comparing the two approaches and looking at the advantages and disadvantages of both, we argue for a combination of both. In the last part of the article we discuss different ways of doing this, giving our preference for a strategy which makes use of RDFa.</abstract>
@@ -12807,8 +12807,8 @@
     <paper id="1087">
       <title>On the Scaling Laws of Geographical Representation in Language Models</title>
       <author><first>Nathan</first><last>Godey</last></author>
-      <author><first>Éric</first><last>de la Clergerie</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Éric</first><last>de la Clergerie</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <pages>12416–12422</pages>
       <abstract>Language models have long been shown to embed geographical information in their hidden representations. This line of work has recently been revisited by extending this result to Large Language Models (LLMs). In this paper, we propose to fill the gap between well-established and recent literature by observing how geographical knowledge evolves when scaling language models. We show that geographical knowledge is observable even for tiny models, and that it scales consistently as we increase the model size. Notably, we observe that larger language models cannot mitigate the geographical bias that is inherent to the training data.</abstract>
       <url hash="833ce947">2024.lrec-main.1087</url>
@@ -12845,7 +12845,7 @@
       <author><first>Punyajoy</first><last>Saha</last></author>
       <author><first>Aalok</first><last>Agrawal</last></author>
       <author><first>Abhik</first><last>Jana</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <author><first>Animesh</first><last>Mukherjee</last></author>
       <pages>12443–12454</pages>
       <abstract>With the emergence of numerous Large Language Models (LLM), the usage of such models in various Natural Language Processing (NLP) applications is increasing extensively. Counterspeech generation is one such key task where efforts are made to develop generative models by fine-tuning LLMs with hatespeech - counterspeech pairs, but none of these attempts explores the intrinsic properties of large language models in zero-shot settings. In this work, we present a comprehensive analysis of the performances of four LLMs namely GPT-2, DialoGPT, ChatGPT and FlanT5 in zero-shot settings for counterspeech generation, which is the first of its kind. For GPT-2 and DialoGPT, we further investigate the deviation in performance with respect to the sizes (small, medium, large) of the models. On the other hand, we propose three different prompting strategies for generating different types of counterspeech and analyse the impact of such strategies on the performance of the models. Our analysis shows that there is an improvement in generation quality for two datasets (17%), however the toxicity increase (25%) with increase in model size. Considering type of model, GPT-2 and FlanT5 models are significantly better in terms of counterspeech quality but also have high toxicity as compared to DialoGPT. ChatGPT are much better at generating counter speech than other models across all metrics. In terms of prompting, we find that our proposed strategies help in improving counter speech generation across all the models.</abstract>
@@ -12868,7 +12868,7 @@
       <author><first>Yang</first><last>Gao</last></author>
       <author><first>Ji</first><last>Ma</last></author>
       <author><first>Ivan</first><last>Korotkov</last></author>
-      <author><first>Keith</first><last>Hall</last></author>
+      <author id="keith-hall"><first>Keith</first><last>Hall</last></author>
       <author><first>Dana</first><last>Alon</last></author>
       <author><first>Donald</first><last>Metzler</last></author>
       <pages>12467–12480</pages>
@@ -12882,7 +12882,7 @@
       <author><first>Arash</first><last>Yousefi Jordehi</last></author>
       <author><first>Mahsa</first><last>Hosseini Khasheh Heyran</last></author>
       <author><first>SeyedAbolghasem</first><last>Mirroshandel</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>12481–12495</pages>
       <abstract>Opinion mining is an important task in natural language processing. The MPQA Opinion Corpus is a fine-grained and comprehensive dataset of private states (i.e., the condition of a source who has an attitude which may be directed toward a target) based on context. Although this dataset was released years ago, because of its complex definition of annotations and hard-to-read data format, almost all existing research works have only focused on a small subset of the dataset. In this paper, we present a comprehensive study of the entire MPQA 2.0 dataset. In order to achieve this goal, we first provide a clean version of MPQA 2.0 in a more interpretable format. Then, we propose two novel approaches for opinion mining, establishing new high baselines for future work. We use two pre-trained large language models, BERT and T5, to automatically identify the type, polarity, and intensity of private states expressed in phrases, and we use T5 to detect opinion expressions and their agents (i.e., sources).</abstract>
       <url hash="9a037d6b">2024.lrec-main.1093</url>
@@ -12914,9 +12914,9 @@
       <author><first>Jaewan</first><last>Park</last></author>
       <author><first>Yiseul</first><last>Lee</last></author>
       <author><first>HyeJin</first><last>Lee</last></author>
-      <author><first>Younggyun</first><last>Hahm</last></author>
+      <author id="younggyun-hahm"><first>Younggyun</first><last>Hahm</last></author>
       <author><first>Hansaem</first><last>Kim</last></author>
-      <author><first>KyungTae</first><last>Lim</last></author>
+      <author id="kyungtae-lim"><first>KyungTae</first><last>Lim</last></author>
       <pages>12514–12526</pages>
       <abstract>Large language models (LLMs) use pretraining to predict the subsequent word; however, their expansion requires significant computing resources. Numerous big tech companies and research institutes have developed multilingual LLMs (MLLMs) to meet current demands, overlooking less-resourced languages (LRLs). This study proposed three strategies to enhance the performance of LRLs based on the publicly available MLLMs. First, the MLLM vocabularies of LRLs were expanded to enhance expressiveness. Second, bilingual data were used for pretraining to align the high- and less-resourced languages. Third, a high-quality small-scale instruction dataset was constructed and instruction-tuning was performed to augment the LRL. The experiments employed the Llama2 model and Korean was used as the LRL, which was quantitatively evaluated against other developed LLMs across eight tasks. Furthermore, a qualitative assessment was performed based on human evaluation and GPT4. Experimental results showed that our proposed Bllossom model exhibited superior performance in qualitative analyses compared to previously proposed Korean monolingual models.</abstract>
       <url hash="8b0bab23">2024.lrec-main.1095</url>
@@ -12930,7 +12930,7 @@
       <author><first>Zhiheng</first><last>Xi</last></author>
       <author><first>Tao</first><last>Gui</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>12527–12538</pages>
       <abstract>Pretrained language models can be applied for various downstream tasks but are susceptible to subtle perturbations. Most adversarial defense methods often introduce adversarial training during the fine-tuning phase to enhance empirical robustness. However, the repeated execution of adversarial training hinders training efficiency when transitioning to different tasks. In this paper, we explore the transferability of robustness within subnetworks and leverage this insight to introduce a novel adversarial defense method ORTicket, eliminating the need for separate adversarial training across diverse downstream tasks. Specifically, (i) pruning the full model using the MLM task (the same task employed for BERT pretraining) yields a task-agnostic robust subnetwork(i.e., winning ticket in Lottery Ticket Hypothesis); and (ii) fine-tuning this subnetwork for downstream tasks. Extensive experiments demonstrate that our approach achieves comparable robustness to other defense methods while retaining the efficiency of traditional fine-tuning.This also confirms the significance of selecting MLM task for identifying the transferable robust subnetwork. Furthermore, our method is orthogonal to other adversarial training approaches, indicating the potential for further enhancement of model robustness.</abstract>
       <url hash="ab231117">2024.lrec-main.1096</url>
@@ -12953,7 +12953,7 @@
       <author><first>Ines</first><last>Rehbein</last></author>
       <author><first>Josef</first><last>Ruppenhofer</last></author>
       <author><first>Annelen</first><last>Brunner</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <pages>12553–12563</pages>
       <abstract>This paper presents GePaDe_SpkAtt , a new corpus for speaker attribution in German parliamentary debates, with more than 7,700 manually annotated events of speech, thought and writing. Our role inventory includes the sources, addressees, messages and topics of the speech event and also two additional roles, medium and evidence. We report baseline results for the automatic prediction of speech events and their roles, with high scores for both, event triggers and roles. Then we apply our model to predict speech events in 20 years of parliamentary debates and investigate the use of factives in the rhetoric of MPs.</abstract>
       <url hash="33e6e98c">2024.lrec-main.1098</url>
@@ -12966,7 +12966,7 @@
       <author><first>Zhanghao</first><last>Wang</last></author>
       <author><first>Hong</first><last>Cheng</last></author>
       <author><first>Rui</first><last>Zhang</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <pages>12564–12573</pages>
       <abstract>In an era characterized by the rapid proliferation of information, the pervasive issues of misinformation and disinformation have significantly impacted numerous individuals. Consequently, the evaluation of information’s truthfulness and accuracy has garnered substantial attention among researchers. In this work, we present a novel fact-checking framework called PACAR, fact-checking based on planning and customized action reasoning using LLMs. It comprises four modules: a claim decomposer with self-reflection, an LLM-centric planner module, an executor for carrying out planned actions, and a verifier module that assesses veracity and generates explanations based on the overall reasoning process. Unlike previous work that employs single-path decision-making and single-step verdict prediction, PACAR focuses on the use of LLMs in dynamic planning and execution of actions. Furthermore, in contrast to previous work that relied primarily on general reasoning, we introduce tailored actions such as numerical reasoning and entity disambiguation to effectively address potential challenges in fact-checking. Our PACAR framework, incorporating LLM-centric planning along with customized action reasoning, significantly outperforms baseline methods across three datasets from different domains and with varying complexity levels. Additional experiments, including multidimensional and sliced observations, demonstrate the effectiveness of PACAR and offer valuable insights for the advancement of automated fact-checking.</abstract>
       <url hash="8a8fc6f2">2024.lrec-main.1099</url>
@@ -12998,7 +12998,7 @@
     <paper id="1102">
       <title>Parameter-Efficient Transfer Learning for End-to-end Speech Translation</title>
       <author><first>Yunlong</first><last>Zhao</last></author>
-      <author id="kexin-wang"><first>Kexin</first><last>Wang</last></author>
+      <author><first>Kexin</first><last>Wang</last></author>
       <author><first>Qianqian</first><last>Dong</last></author>
       <author><first>Tom</first><last>Ko</last></author>
       <pages>12592–12598</pages>
@@ -13051,7 +13051,7 @@
       <author><first>Zhenzhe</first><last>Ying</last></author>
       <author><first>Weiqiang</first><last>Wang</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <author><first>Zhongyu</first><last>Wei</last></author>
       <pages>12644–12656</pages>
       <abstract>Modeling social media users is the core of social governance in the digital society. Existing works have incorporated different digital traces to better learn the representations of social media users, including text information encoded by pre-trained language models and social network information encoded by graph models. However, limited by overloaded text information and hard-to-collect social network information, they cannot utilize global text information and cannot be generalized without social relationships. In this paper, we propose a Pre-training Architecture for Social Media User Modeling based on Text Graph(PASUM). We aggregate all microblogs to represent social media users based on the text graph model and learn the mapping from microblogs to user representation. We further design inter-user and intra-user contrastive learning tasks to inject general structural information into the mapping. In different scenarios, we can represent users based on text, even without social network information. Experimental results on various downstream tasks demonstrate the effectiveness and superiority of our framework.</abstract>
@@ -13060,7 +13060,7 @@
     </paper>
     <paper id="1108">
       <title>Pater Incertus? There Is a Solution: Automatic Discrimination between Cognates and Borrowings for <fixed-case>R</fixed-case>omance Languages</title>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <author><first>Ana Sabina</first><last>Uban</last></author>
       <author><first>Ioan-Bogdan</first><last>Iordache</last></author>
       <author><first>Alina Maria</first><last>Cristea</last></author>
@@ -13101,7 +13101,7 @@
     </paper>
     <paper id="1111">
       <title><fixed-case>PECC</fixed-case>: Problem Extraction and Coding Challenges</title>
-      <author><first>Patrick</first><last>Haller</last></author>
+      <author id="patrick-haller-zurich"><first>Patrick</first><last>Haller</last></author>
       <author><first>Jonas</first><last>Golde</last></author>
       <author><first>Alan</first><last>Akbik</last></author>
       <pages>12690–12699</pages>
@@ -13140,7 +13140,7 @@
       <author><first>Eleanor</first><last>Chodroff</last></author>
       <author><first>Blaž</first><last>Pažon</last></author>
       <author><first>Annie</first><last>Baker</last></author>
-      <author><first>Steven</first><last>Moran</last></author>
+      <author id="steven-moran"><first>Steven</first><last>Moran</last></author>
       <pages>12724–12733</pages>
       <abstract>Research in speech technologies and comparative linguistics depends on access to diverse and accessible speech data. The UCLA Phonetics Lab Archive is one of the earliest multilingual speech corpora, with long-form audio recordings and phonetic transcriptions for 314 languages (Ladefoged et al., 2009). Recently, 95 of these languages were time-aligned with word-level phonetic transcriptions (Li et al., 2021). Here we present VoxAngeles, a corpus of audited phonetic transcriptions and phone-level alignments of the UCLA Phonetics Lab Archive, which uses the 95-language CMU re-release as our starting point. VoxAngeles also includes word- and phone-level segmentations from the original UCLA corpus, as well as phonetic measurements of word and phone durations, vowel formants, and vowel f0. This corpus enhances the usability of the original data, particularly for quantitative phonetic typology, as demonstrated through a case study of vowel intrinsic f0. We also discuss the utility of the VoxAngeles corpus for general research and pedagogy in crosslinguistic phonetics, as well as for low-resource and multilingual speech technologies. VoxAngeles is free to download and use under a CC-BY-NC 4.0 license.</abstract>
       <url hash="60f0d781">2024.lrec-main.1114</url>
@@ -13150,7 +13150,7 @@
       <title>Phonotactic Complexity across Dialects</title>
       <author><first>Ryan Soh-Eun</first><last>Shim</last></author>
       <author><first>Kalvin</first><last>Chang</last></author>
-      <author><first>David R.</first><last>Mortensen</last></author>
+      <author id="david-r-mortensen"><first>David R.</first><last>Mortensen</last></author>
       <pages>12734–12748</pages>
       <abstract>Received wisdom in linguistic typology holds that if the structure of a language becomes more complex in one dimension, it will simplify in another, building on the assumption that all languages are equally complex (Joseph and Newmeyer, 2012). We study this claim on a micro-level, using a tightly-controlled sample of Dutch dialects (across 366 collection sites) and Min dialects (across 60 sites), which enables a more fair comparison across varieties. Even at the dialect level, we find empirical evidence for a tradeoff between word length and a computational measure of phonotactic complexity from a LSTM-based phone-level language model—a result previously documented only at the language level. A generalized additive model (GAM) shows that dialects with low phonotactic complexity concentrate around the capital regions, which we hypothesize to correspond to prior hypotheses that language varieties of greater or more diverse populations show reduced phonotactic complexity. We also experiment with incorporating the auxiliary task of predicting syllable constituency, but do not find an increase in the strength of the negative correlation observed.</abstract>
       <url hash="0cf814c8">2024.lrec-main.1115</url>
@@ -13190,7 +13190,7 @@
       <title>Plots Made Quickly: An Efficient Approach for Generating Visualizations from Natural Language Queries</title>
       <author><first>Henrik</first><last>Voigt</last></author>
       <author><first>Kai</first><last>Lawonn</last></author>
-      <author><first>Sina</first><last>Zarrieß</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
       <pages>12787–12793</pages>
       <abstract>Generating visualizations from natural language queries is a useful extension to visualization libraries such as Vega-Lite. The goal of the NL2VIS task is to generate a valid Vega-Lite specification from a data frame and a natural language query as input, which can then be rendered as a visualization. To enable real-time interaction with the data, small model sizes and fast inferences are required. Previous work has introduced custom neural network solutions with custom visualization specifications and has not systematically tested pre-trained LMs to solve this problem. In this work, we opt for a more generic approach that (i) evaluates pre-trained LMs of different sizes and (ii) uses string encodings of data frames and visualization specifications instead of custom specifications. In our experiments, we show that these representations, in combination with pre-trained LMs, scale better than current state-of-the-art models. In addition, the small and base versions of the T5 architecture achieve real-time interaction, while LLMs far exceed latency thresholds suitable for visual exploration tasks. In summary, our models generate visualization specifications in real-time on a CPU and establish a new state of the art on the NL2VIS benchmark nvBench.</abstract>
       <url hash="e2b0af30">2024.lrec-main.1119</url>
@@ -13215,7 +13215,7 @@
       <author><first>Gennaro</first><last>Nolano</last></author>
       <author><first>Moritz</first><last>Blum</last></author>
       <author><first>Basil</first><last>Ell</last></author>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <pages>12809–12820</pages>
       <abstract>In recent years, large language models have achieved state-of-the-art performance across various NLP tasks. However, investigations have shown that these models tend to rely on shortcut features, leading to inaccurate predictions and causing the models to be unreliable at generalization to out-of-distribution (OOD) samples. For instance, in the context of relation extraction (RE), we would expect a model to identify the same relation independently of the entities involved in it. For example, consider the sentence “Leonardo da Vinci painted the Mona Lisa” expressing the created(Leonardo_da_Vinci, Mona_Lisa) relation. If we substiute “Leonardo da Vinci” with “Barack Obama”, then the sentence still expresses the created relation. A robust model is supposed to detect the same relation in both cases. In this work, we describe several semantically-motivated strategies to generate adversarial examples by replacing entity mentions and investigate how state-of-the-art RE models perform under pressure. Our analyses show that the performance of these models significantly deteriorates on the modified datasets (avg. of -48.5% in F1), which indicates that these models rely to a great extent on shortcuts, such as surface forms (or patterns therein) of entities, without making full use of the information present in the sentences.</abstract>
       <url hash="c5c1f2a4">2024.lrec-main.1121</url>
@@ -13248,7 +13248,7 @@
     <paper id="1124">
       <title><fixed-case>P</fixed-case>oliti<fixed-case>C</fixed-case>ause: An Annotation Scheme and Corpus for Causality in Political Texts</title>
       <author><first>Paulina</first><last>Garcia Corral</last></author>
-      <author><first>Hanna</first><last>Bechara</last></author>
+      <author id="hannah-bechara"><first>Hanna</first><last>Bechara</last></author>
       <author><first>Ran</first><last>Zhang</last></author>
       <author><first>Slava</first><last>Jankin</last></author>
       <pages>12836–12845</pages>
@@ -13270,7 +13270,7 @@
       <title><fixed-case>P</fixed-case>oly<fixed-case>NERE</fixed-case>: A Novel Ontology and Corpus for Named Entity Recognition and Relation Extraction in Polymer Science Domain</title>
       <author><first>Van-Thuy</first><last>Phi</last></author>
       <author><first>Hiroki</first><last>Teranishi</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <author><first>Hiroyuki</first><last>Oka</last></author>
       <author><first>Masashi</first><last>Ishii</last></author>
       <pages>12856–12866</pages>
@@ -13432,7 +13432,7 @@
       <title>Probing Large Language Models for Scalar Adjective Lexical Semantics and Scalar Diversity Pragmatics</title>
       <author><first>Fangru</first><last>Lin</last></author>
       <author><first>Daniel</first><last>Altshuler</last></author>
-      <author><first>Janet B.</first><last>Pierrehumbert</last></author>
+      <author id="janet-pierrehumbert"><first>Janet B.</first><last>Pierrehumbert</last></author>
       <pages>13033–13049</pages>
       <abstract>Scalar adjectives pertain to various domain scales and vary in intensity within each scale (e.g. certain is more intense than likely on the likelihood scale). Scalar implicatures arise from the consideration of alternative statements which could have been made. They can be triggered by scalar adjectives and require listeners to reason pragmatically about them. Some scalar adjectives are more likely to trigger scalar implicatures than others. This phenomenon is referred to as scalar diversity. In this study, we probe different families of Large Language Models such as GPT-4 for their knowledge of the lexical semantics of scalar adjectives and one specific aspect of their pragmatics, namely scalar diversity. We find that they encode rich lexical-semantic information about scalar adjectives. However, the rich lexical-semantic knowledge does not entail a good understanding of scalar diversity. We also compare current models of different sizes and complexities and find that larger models are not always better. Finally, we explain our probing results by leveraging linguistic intuitions and model training objectives.</abstract>
       <url hash="fcbe605a">2024.lrec-main.1141</url>
@@ -13475,9 +13475,9 @@
     </paper>
     <paper id="1145">
       <title>Producing a Parallel <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies Treebank of <fixed-case>A</fixed-case>ncient <fixed-case>H</fixed-case>ebrew and <fixed-case>A</fixed-case>ncient <fixed-case>G</fixed-case>reek via Cross-Lingual Projection</title>
-      <author><first>Daniel G.</first><last>Swanson</last></author>
+      <author id="daniel-g-swanson"><first>Daniel G.</first><last>Swanson</last></author>
       <author><first>Bryce D.</first><last>Bussert</last></author>
-      <author><first>Francis</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last></author>
       <pages>13074–13078</pages>
       <abstract>In this paper we present the initial construction of a treebank of Ancient Greek containing portions of the Septuagint, a translation of the Hebrew Scriptures (1576 sentences, 39K tokens, roughly 7% of the total corpus). We construct the treebank by word-aligning and projecting from the parallel text in Ancient Hebrew before automatically correcting systematic syntactic mismatches and manually correcting other errors.</abstract>
       <url hash="dead6ee2">2024.lrec-main.1145</url>
@@ -13697,7 +13697,7 @@
       <author><first>Laurin</first><last>Friedrich</last></author>
       <author><first>Wassiliki</first><last>Siskou</last></author>
       <author><first>Steffen</first><last>Eckhard</last></author>
-      <author><first>Annette</first><last>Hautli-Janisz</last></author>
+      <author id="annette-hautli"><first>Annette</first><last>Hautli-Janisz</last></author>
       <pages>13315–13320</pages>
       <abstract>Face-to-face interactions between representatives of the state and citizens are a key intercept in public service delivery, for instance when providing social benefits to vulnerable groups. Despite the relevance of these encounters for the individual, but also for society at large, there is a significant research gap in the systematic empirical study of the communication taking place. This is mainly due to the high institutional and data protection barriers for collecting data in a very sensitive and private setting in which citizens request support from the state. In this paper, we describe the procedure of compiling the first open access dataset of transcribed recordings of so-called Public Service Encounters in Germany, i.e., meetings between state officials and citizens in which there is direct communication in order to allocate state services. This dataset sets a new research directive in the social sciences, because it allows the community to open up the black box of direct state-citizen interaction. With data of this kind it becomes possible to directly and systematically investigate bias, bureaucratic discrimination and other power-driven dynamics in the actual communication and ideally propose guidelines as to alleviate these issues.</abstract>
       <url hash="8ddd39cb">2024.lrec-main.1165</url>
@@ -13739,7 +13739,7 @@
       <author><first>Nate B.</first><last>Carlson</last></author>
       <author><first>Nathaniel Romney</first><last>Robinson</last></author>
       <author><first>Mrinmaya</first><last>Sachan</last></author>
-      <author><first>David R.</first><last>Mortensen</last></author>
+      <author id="david-r-mortensen"><first>David R.</first><last>Mortensen</last></author>
       <pages>13344–13355</pages>
       <abstract>Mapping words into a fixed-dimensional vector space is the backbone of modern NLP. While most word embedding methods successfully encode semantic information, they overlook phonetic information that is crucial for many tasks. We develop three methods that use articulatory features to build phonetically informed word embeddings. To address the inconsistent evaluation of existing phonetic word embedding methods, we also contribute a task suite to fairly evaluate past, current, and future methods. We evaluate both (1) intrinsic aspects of phonetic word embeddings, such as word retrieval and correlation with sound similarity, and (2) extrinsic performance on tasks such as rhyme and cognate detection and sound analogies. We hope our task suite will promote reproducibility and inspire future phonetic embedding research.</abstract>
       <url hash="9b8fb9a1">2024.lrec-main.1168</url>
@@ -13806,7 +13806,7 @@
       <author><first>Liang</first><last>Pang</last></author>
       <author><first>Yuanzhuo</first><last>Wang</last></author>
       <author><first>Huawei</first><last>Shen</last></author>
-      <author><first>Xueqi</first><last>Cheng</last></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last></author>
       <pages>13407–13418</pages>
       <abstract>The questionnaire is a professional research methodology used for both qualitative and quantitative analysis of human opinions, preferences, attitudes, and behaviors. However, designing and evaluating questionnaires demands significant effort due to their intricate and complex structure. Questionnaires entail a series of questions that must conform to intricate constraints involving the questions, options, and overall structure. Specifically, the questions should be relevant and specific to the given research topic and intent. The options should be tailored to the questions, ensuring they are mutually exclusive, completed, and ordered sensibly. Moreover, the sequence of questions should follow a logical order, grouping similar topics together. As a result, automatically generating questionnaires presents a significant challenge and this area has received limited attention primarily due to the scarcity of high-quality datasets. To address these issues, we present Qsnail, the first dataset specifically constructed for the questionnaire generation task, which comprises 13,168 human-written questionnaires gathered from online platforms. We further conduct experiments on Qsnail, and the results reveal that retrieval models and traditional generative models do not fully align with the given research topic and intents. Large language models, while more closely related to the research topic and intents, exhibit significant limitations in terms of diversity and specificity. Despite enhancements through the chain-of-thought prompt and finetuning, questionnaires generated by language models still fall short of human-written questionnaires. Therefore, questionnaire generation is challenging and needs to be further explored. The dataset will be published in the future.</abstract>
       <url hash="8d4b6b1d">2024.lrec-main.1174</url>
@@ -13860,9 +13860,9 @@
     <paper id="1179">
       <title>Question Answering over Tabular Data with <fixed-case>D</fixed-case>ata<fixed-case>B</fixed-case>ench: A Large-Scale Empirical Evaluation of <fixed-case>LLM</fixed-case>s</title>
       <author><first>Jorge</first><last>Osés Grijalba</last></author>
-      <author><first>L. Alfonso</first><last>Ureña-López</last></author>
+      <author id="l-alfonso-urena-lopez"><first>L. Alfonso</first><last>Ureña-López</last></author>
       <author><first>Eugenio</first><last>Martínez Cámara</last></author>
-      <author><first>Jose</first><last>Camacho-Collados</last></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></author>
       <pages>13471–13488</pages>
       <abstract>Large Language Models (LLMs) are showing emerging abilities, and one of the latest recognized ones deals with their ability to reason and answer questions from tabular data. Although there are some available datasets to assess question answering systems on tabular data, they are not large and diverse enough to properly assess the capabilities of LLMs. To this end, we propose DataBench, a benchmark composed of 65 real-world datasets over several domains, including 20 human-generated questions per dataset, totaling 1300 questions and answers overall. Using this benchmark, we perform a large-scale empirical comparison of several open and closed source models, including both code-generating and in-context learning models. The results highlight the current gap between open-source and closed-source models, with all types of model having room for improvement even in simple boolean questions or involving a single column.</abstract>
       <url hash="7174b48c">2024.lrec-main.1179</url>
@@ -13988,7 +13988,7 @@
     </paper>
     <paper id="1189">
       <title>Reassessing Semantic Knowledge Encoded in Large Language Models through the Word-in-Context Task</title>
-      <author><first>Yoshihiko</first><last>Hayashi</last></author>
+      <author id="yoshihiko-hayashi"><first>Yoshihiko</first><last>Hayashi</last></author>
       <pages>13610–13620</pages>
       <abstract>Despite the remarkable recent advancements in large language models (LLMs), a comprehensive understanding of their inner workings and the depth of their knowledge remains elusive. This study aims to reassess the semantic knowledge encoded in LLMs by utilizing the Word-in-Context (WiC) task, which involves predicting the semantic equivalence of a target word across different contexts, as a probing task. To address this challenge, we start by prompting LLMs, specifically GPT-3 and GPT-4, to generate natural language descriptions that contrast the meanings of the target word in two contextual sentences given in the WiC dataset. Subsequently, we conduct a manual analysis to examine their linguistic attributes. In parallel, we train a text classification model that utilizes the generated descriptions as supervision and assesses their practical effectiveness in the WiC task. The linguistic and empirical findings reveal a consistent provision of valid and valuable descriptions by LLMs, with LLM-generated descriptions significantly improving classification accuracy. Notably, the highest classification result achieved with GPT-3-generated descriptions largely surpassed GPT-3’s zero-shot baseline. However, the GPT-4-generated descriptions performed slightly below GPT-4’s zero-shot baseline, suggesting that the full potential of the most advanced large language models, such as GPT-4, is yet to be fully revealed.</abstract>
       <url hash="fb065d49">2024.lrec-main.1189</url>
@@ -13999,7 +13999,7 @@
       <author><first>Maxime</first><last>Arens</last></author>
       <author><first>Lucile</first><last>Callebert</last></author>
       <author><first>Mohand</first><last>Boughanem</last></author>
-      <author><first>Jose G.</first><last>Moreno</last></author>
+      <author id="jose-g-moreno"><first>Jose G.</first><last>Moreno</last></author>
       <pages>13621–13632</pages>
       <abstract>Data annotation is crucial for machine learning, notably in technical domains, where the quality and quantity of annotated data, significantly affect effectiveness of trained models. Employing humans is costly, especially when annotating for multi-label classification, as instances may bear multiple labels. Active Learning (AL) aims to alleviate annotation costs by intelligently selecting instances for annotation, rather than randomly annotating. Recent attention on transformers has spotlighted the potential of AL in this context. However, in practical settings, implementing AL faces challenges beyond theory. Notably, the gap between AL cycles presents idle time for annotators. To address this issue, we investigate alternative instance selection methods, aiming to maximize annotation efficiency by seamlessly integrating with the AL process. We begin by evaluating two existing methods in our transformer setting, employing respectively random sampling and outdated information. Following this we propose our novel method based on annotating instances to rebalance label distribution. Our approach mitigates biases, enhances model performance (up to 23% improvement on f1score), reduces strategy-dependent disparities (decrease of nearly 50% on standard deviation) and reduces label imbalance (decrease of 30% on Mean Imbalance Ratio).</abstract>
       <url hash="2fc9ef79">2024.lrec-main.1190</url>
@@ -14008,7 +14008,7 @@
     <paper id="1191">
       <title><fixed-case>R</fixed-case>e<fixed-case>CAP</fixed-case>: Semantic Role Enhanced Caption Generation</title>
       <author><first>Abhidip</first><last>Bhattacharyya</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Christoffer</first><last>Heckman</last></author>
       <pages>13633–13649</pages>
       <abstract>Even though current vision language (V+L) models have achieved success in generating image captions, they often lack specificity and overlook various aspects of the image. Additionally, the attention learned through weak supervision operates opaquely and is difficult to control. To address these limitations, we propose the use of semantic roles as control signals in caption generation. Our hypothesis is that, by incorporating semantic roles as signals, the generated captions can be guided to follow specific predicate argument structures. To validate the effectiveness of our approach, we conducted experiments using data and compared the results with a baseline model VL-BART(CITATION). The experiments showed a significant improvement, with a gain of 45% in Smatch score (Standard NLP evaluation metric for semantic representations), demonstrating the efficacy of our approach. By focusing on specific objects and their associated semantic roles instead of providing a general description, our framework produces captions that exhibit enhanced quality, diversity, and controllability.</abstract>
@@ -14020,7 +14020,7 @@
       <author><first>Yi-Pei</first><last>Chen</last></author>
       <author><first>Noriki</first><last>Nishida</last></author>
       <author><first>Hideki</first><last>Nakayama</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>13650–13665</pages>
       <abstract>Enhancing user engagement through personalization in conversational agents has gained significance, especially with the advent of large language models that generate fluent responses. Personalized dialogue generation, however, is multifaceted and varies in its definition – ranging from instilling a persona in the agent to capturing users’ explicit and implicit cues. This paper seeks to systemically survey the recent landscape of personalized dialogue generation, including the datasets employed, methodologies developed, and evaluation metrics applied. Covering 22 datasets, we highlight benchmark datasets and newer ones enriched with additional features. We further analyze 17 seminal works from top conferences between 2021-2023 and identify five distinct types of problems. We also shed light on recent progress by LLMs in personalized dialogue generation. Our evaluation section offers a comprehensive summary of assessment facets and metrics utilized in these works. In conclusion, we discuss prevailing challenges and envision prospect directions for future research in personalized dialogue generation.</abstract>
       <url hash="c18d803d">2024.lrec-main.1192</url>
@@ -14034,7 +14034,7 @@
       <author><first>Minsun</first><last>Kim</last></author>
       <author><first>Tak Yeon</first><last>Lee</last></author>
       <author><first>So-Yeon</first><last>Ahn</last></author>
-      <author><first>Alice</first><last>Oh</last></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last></author>
       <pages>13666–13676</pages>
       <abstract>The integration of generative AI in education is expanding, yet empirical analyses of large-scale and real-world interactions between students and AI systems still remain limited. Addressing this gap, we present RECIPE4U (RECIPE for University), a dataset sourced from a semester-long experiment with 212 college students in English as Foreign Language (EFL) writing courses. During the study, students engaged in dialogues with ChatGPT to revise their essays. RECIPE4U includes comprehensive records of these interactions, including conversation logs, students’ intent, students’ self-rated satisfaction, and students’ essay edit histories. In particular, we annotate the students’ utterances in RECIPE4U with 13 intention labels based on our coding schemes. We establish baseline results for two subtasks in task-oriented dialogue systems within educational contexts: intent detection and satisfaction estimation. As a foundational step, we explore student-ChatGPT interaction patterns through RECIPE4U and analyze them by focusing on students’ dialogue, essay data statistics, and students’ essay edits. We further illustrate potential applications of RECIPE4U dataset for enhancing the incorporation of LLMs in educational frameworks. RECIPE4U is publicly available at https://zeunie.github.io/RECIPE4U/.</abstract>
       <url hash="284ed69f">2024.lrec-main.1193</url>
@@ -14044,7 +14044,7 @@
       <title>Recognizing Social Cues in Crisis Situations</title>
       <author><first>Di</first><last>Wang</last></author>
       <author><first>Yuan</first><last>Zhuang</last></author>
-      <author><first>Ellen</first><last>Riloff</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
       <author><first>Marina</first><last>Kogan</last></author>
       <pages>13677–13687</pages>
       <abstract>During crisis situations, observations of other people’s behaviors often play an essential role in a person’s decision-making. For example, a person might evacuate before a hurricane only if everyone else in the neighborhood does so. Conversely, a person might stay if no one else is leaving. Such observations are called social cues. Social cues are important for understanding people’s response to crises, so recognizing them can help inform the decisions of government officials and emergency responders. In this paper, we propose the first NLP task to categorize social cues in social media posts during crisis situations. We introduce a manually annotated dataset of 6,000 tweets, labeled with respect to eight social cue categories. We also present experimental results of several classification models, which show that some types of social cues can be recognized reasonably well, but overall this task is challenging for NLP systems. We further present error analyses to identify specific types of mistakes and promising directions for future research on this task.</abstract>
@@ -14086,7 +14086,7 @@
       <author><first>Fabian</first><last>Simonjetz</last></author>
       <author><first>Jussi</first><last>Laasonen</last></author>
       <author><first>Yunus</first><last>Cobanoglu</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <author><first>Enrique</first><last>Jiménez</last></author>
       <pages>13712–13721</pages>
       <abstract>Ancient Mesopotamian literature is riddled with gaps, caused by the decay and fragmentation of its writing material, clay tablets. The discovery of overlaps between fragments allows reconstruction to advance, but it is a slow and unsystematic process. Since new pieces are found and digitized constantly, NLP techniques can help to identify fragments and match them with existing text collections to restore complete literary works. We compare a number of approaches and determine that a character-level n-gram-based similarity matching approach works well for this problem, leading to a large speed-up for researchers in Assyriology.</abstract>
@@ -14108,7 +14108,7 @@
       <title>Re-evaluating the Tomes for the Times</title>
       <author><first>Ryan</first><last>Brate</last></author>
       <author><first>Marieke</first><last>van Erp</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <pages>13734–13739</pages>
       <abstract>Literature is to some degree a snapshot of the time it was written in and the societal attitudes of the time. Not all depictions are pleasant or in-line with modern-day sensibilities; this becomes problematic when the prevalent depictions over a large body of work are negatively biased, leading to their normalisation. Many much-loved and much-read classics are set in periods of heightened social inequality: slavery, pre-womens’ rights movements, colonialism, etc. In this paper, we exploit known text co-occurrence metrics with respect to token-level level contexts to identify prevailing themes associated with known problematic descriptors. We see that prevalent, negative depictions are perpetuated by classic literature. We propose that such a methodology could form the basis of a system for making explicit such problematic associations, for interested parties: such as, sensitivity coordinators of publishing houses, library curators, or organisations concerned with social justice</abstract>
       <url hash="fdb967e8">2024.lrec-main.1199</url>
@@ -14128,7 +14128,7 @@
       <author><first>Wei-Fan</first><last>Chen</last></author>
       <author><first>Milad</first><last>Alshomary</last></author>
       <author><first>Maja</first><last>Stahl</last></author>
-      <author><first>Khalid</first><last>Al Khatib</last></author>
+      <author id="khalid-al-khatib"><first>Khalid</first><last>Al Khatib</last></author>
       <author><first>Benno</first><last>Stein</last></author>
       <author><first>Henning</first><last>Wachsmuth</last></author>
       <pages>13754–13768</pages>
@@ -14145,7 +14145,7 @@
       <author><first>Emily</first><last>Preston</last></author>
       <author><first>Chris</first><last>Bayliss</last></author>
       <author><first>Chris</first><last>Oakley</last></author>
-      <author><first>Carolina</first><last>Scarton</last></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last></author>
       <pages>13769–13784</pages>
       <abstract>Sensitising language models (LMs) to external context helps them to more effectively capture the speaking patterns of individuals with specific characteristics or in particular environments. This work investigates to what extent detailed character and film annotations can be leveraged to personalise LMs in a scalable manner. We then explore the use of such models in evaluating context specificity in machine translation. We build LMs which leverage rich contextual information to reduce perplexity by up to 6.5% compared to a non-contextual model, and generalise well to a scenario with no speaker-specific data, relying on combinations of demographic characteristics expressed via metadata. Our findings are consistent across two corpora, one of which (Cornell-rich) is also a contribution of this paper. We then use our personalised LMs to measure the co-occurrence of extra-textual context and translation hypotheses in a machine translation setting. Our results suggest that the degree to which professional translations in our domain are context-specific can be preserved to a better extent by a contextual machine translation model than a non-contextual model, which is also reflected in the contextual model’s superior reference-based scores.</abstract>
       <url hash="2e15f2d8">2024.lrec-main.1202</url>
@@ -14197,7 +14197,7 @@
       <title><fixed-case>R</fixed-case>eflect<fixed-case>S</fixed-case>umm: A Benchmark for Course Reflection Summarization</title>
       <author><first>Yang</first><last>Zhong</last></author>
       <author><first>Mohamed</first><last>Elaraby</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <author><first>Ahmed Ashraf</first><last>Butt</last></author>
       <author><first>Muhsin</first><last>Menekse</last></author>
       <pages>13819–13846</pages>
@@ -14477,8 +14477,8 @@
       <author><first>Hongfei</first><last>Xu</last></author>
       <author><first>Yang</first><last>Song</last></author>
       <author><first>Qiuhui</first><last>Liu</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <pages>14122–14133</pages>
       <abstract>Stacking non-linear layers allows deep neural networks to model complicated functions, and including residual connections in Transformer layers is beneficial for convergence and performance. However, residual connections may make the model “forget” distant layers and fail to fuse information from previous layers effectively. Selectively managing the representation aggregation of Transformer layers may lead to better performance. In this paper, we present a Transformer with depth-wise LSTMs connecting cascading Transformer layers and sub-layers. We show that layer normalization and feed-forward computation within a Transformer layer can be absorbed into depth-wise LSTMs connecting pure Transformer attention layers. Our experiments with the 6-layer Transformer show significant BLEU improvements in both WMT 14 English-German / French tasks and the OPUS-100 many-to-many multilingual NMT task, and our deep Transformer experiments demonstrate the effectiveness of depth-wise LSTM on the convergence and performance of deep Transformers.</abstract>
       <url hash="4fcf2eb1">2024.lrec-main.1231</url>
@@ -14490,7 +14490,7 @@
       <author><first>Atula Tejaswi</first><last>Neerkaje</last></author>
       <author><first>Ramit</first><last>Sawhney</last></author>
       <author><first>Nikolaos</first><last>Aletras</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>14134–14145</pages>
       <abstract>Suicide is a serious public health issue, but it is preventable with timely intervention. Emerging studies have suggested there is a noticeable increase in the number of individuals sharing suicidal thoughts online. As a result, utilising advance Natural Language Processing techniques to build automated systems for risk assessment is a viable alternative. However, existing systems are prone to incorrectly predicting risk severity and have no early detection mechanisms. Therefore, we propose RISE, a novel robust mechanism for accurate early detection of suicide risk by ensembling Hyperbolic Internal Classifiers equipped with an abstention mechanism and early-exit inference capabilities. Through quantitative, qualitative and ablative experiments, we demonstrate RISE as an efficient and robust human-in-the-loop approach for risk assessment over the Columbia Suicide Severity Risk Scale (C-SSRS) and CLPsych 2022 datasets. It is able to successfully abstain from 84% incorrect predictions on Reddit data while out-predicting state of the art models upto 3.5x earlier.</abstract>
       <url hash="3578caec">2024.lrec-main.1232</url>
@@ -14511,7 +14511,7 @@
       <author><first>Mohammad</first><last>Mohammadamini</last></author>
       <author><first>Driss</first><last>Matrouf</last></author>
       <author><first>Michael</first><last>Rouvier</last></author>
-      <author><first>Jean-Francois</first><last>Bonastre</last></author>
+      <author id="jean-francois-bonastre"><first>Jean-Francois</first><last>Bonastre</last></author>
       <author><first>Romain</first><last>Serizel</last></author>
       <author><first>Theophile</first><last>Gonos</last></author>
       <pages>14152–14156</pages>
@@ -14553,7 +14553,7 @@
       <author><first>Han</first><last>Xia</last></author>
       <author><first>Tao</first><last>Gui</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>14186–14203</pages>
       <abstract>Large Language Models (LLMs) have showcased remarkable capabilities in following human instructions. However, recent studies have raised concerns about the robustness of LLMs for natural language understanding (NLU) tasks when prompted with instructions combining textual adversarial samples. In this paper, drawing inspiration from recent works that LLMs are sensitive to the design of the instructions, we utilize instructions in code style, which are more structural and less ambiguous, to replace typically natural language instructions. Through this conversion, we provide LLMs with more precise instructions and strengthen the robustness of LLMs. Moreover, under few-shot scenarios, we propose a novel method to compose in-context demonstrations using both clean and adversarial samples (adversarial context method) to further boost the robustness of the LLMs. Experiments on eight robustness datasets show that our method consistently outperforms prompting LLMs with natural language, for example, with gpt-3.5-turbo on average, our method achieves an improvement of 5.68% in test set accuracy and a reduction of 5.66 points in Attack Success Rate (ASR).</abstract>
       <url hash="a38bee75">2024.lrec-main.1237</url>
@@ -14600,7 +14600,7 @@
       <author><first>Daniil</first><last>Kosakin</last></author>
       <author><first>Sergei</first><last>Obiedkov</last></author>
       <author><first>Ivan</first><last>Smirnov</last></author>
-      <author><first>Ekaterina</first><last>Rakhilina</last></author>
+      <author id="ekaterina-v-rakhilina"><first>Ekaterina</first><last>Rakhilina</last></author>
       <author><first>Anastasia</first><last>Vyrenkova</last></author>
       <author><first>Ekaterina</first><last>Zalivina</last></author>
       <pages>14240–14258</pages>
@@ -14637,7 +14637,7 @@
       <author><first>Puneet</first><last>Mathur</last></author>
       <author><first>Ramit</first><last>Sawhney</last></author>
       <author><first>Shivam</first><last>Agarwal</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Sudheer</first><last>Chava</last></author>
       <author><first>Dinesh</first><last>Manocha</last></author>
       <pages>14285–14297</pages>
@@ -14756,7 +14756,7 @@
       <author><first>Song</first><last>Chen</last></author>
       <author><first>Jennifer</first><last>Tracey</last></author>
       <author><first>Ann</first><last>Bies</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <pages>14393–14399</pages>
       <abstract>The Schema Learning Corpus (SLC) is a new linguistic resource designed to support research into the structure of complex events in multilingual, multimedia data. The SLC incorporates large volumes of background data in English, Spanish and Russian, and defines 100 complex events (CEs) across 12 domains, with CE profiles containing information about the typical steps and substeps and expected event categories for the CE. Multiple documents are labeled for each CE, with pointers to evidence in the document for each CE step, plus labeled events and relations along with their arguments across a large tag set. The SLC was designed to support development and evaluation of technology capable of understanding and reasoning about complex real-world events in multimedia, multilingual data streams in order to provide users with a deeper understanding of the potential relationships among seemingly disparate events and actors, and to allow users to make better predictions about how future events are likely to unfold. The Schema Learning Corpus will be made available to the research community through publication in Linguistic Data Consortium catalog.</abstract>
       <url hash="89f10b0d">2024.lrec-main.1254</url>
@@ -14790,7 +14790,7 @@
       <author><first>Xiao</first><last>Zhang</last></author>
       <author><first>Heqi</first><last>Zheng</last></author>
       <author><first>Yuxiang</first><last>Nie</last></author>
-      <author><first>Heyan</first><last>Huang</last></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last></author>
       <author><first>Xian-Ling</first><last>Mao</last></author>
       <pages>14418–14428</pages>
       <abstract>Scientific Machine Reading Comprehension (SMRC) aims to facilitate the understanding of scientific texts through human-machine interactions. While existing dataset has significantly contributed to this field, it predominantly focus on single-perspective question-answer pairs, thereby overlooking the inherent variation in comprehension levels among different readers. To address this limitation, we introduce a novel multi-perspective scientific machine reading comprehension dataset, SciMRC, which incorporates perspectives from beginners, students, and experts. Our dataset comprises 741 scientific papers and 6,057 question-answer pairs, with 3,306, 1,800, and 951 pairs corresponding to beginners, students, and experts respectively. Extensive experiments conducted on SciMRC using pre-trained models underscore the importance of considering diverse perspectives in SMRC and highlight the challenging nature of our scientific machine comprehension tasks.</abstract>
@@ -14810,11 +14810,11 @@
     </paper>
     <paper id="1259">
       <title><fixed-case>SCOUT</fixed-case>: A Situated and Multi-Modal Human-Robot Dialogue Corpus</title>
-      <author><first>Stephanie M.</first><last>Lukin</last></author>
-      <author><first>Claire</first><last>Bonial</last></author>
+      <author id="stephanie-lukin"><first>Stephanie M.</first><last>Lukin</last></author>
+      <author id="claire-bonial"><first>Claire</first><last>Bonial</last></author>
       <author><first>Matthew</first><last>Marge</last></author>
       <author><first>Taylor A.</first><last>Hudson</last></author>
-      <author><first>Cory J.</first><last>Hayes</last></author>
+      <author id="cory-hayes"><first>Cory J.</first><last>Hayes</last></author>
       <author><first>Kimberly</first><last>Pollard</last></author>
       <author><first>Anthony</first><last>Baker</last></author>
       <author><first>Ashley N.</first><last>Foots</last></author>
@@ -14825,8 +14825,8 @@
       <author><first>Lucia</first><last>Donatelli</last></author>
       <author><first>Anton</first><last>Leuski</last></author>
       <author><first>Susan G.</first><last>Hill</last></author>
-      <author><first>David</first><last>Traum</last></author>
-      <author><first>Clare</first><last>Voss</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
+      <author id="clare-voss"><first>Clare</first><last>Voss</last></author>
       <pages>14445–14458</pages>
       <abstract>We introduce the Situated Corpus Of Understanding Transactions (SCOUT), a multi-modal collection of human-robot dialogue in the task domain of collaborative exploration. The corpus was constructed from multiple Wizard-of-Oz experiments where human participants gave verbal instructions to a remotely-located robot to move and gather information about its surroundings. SCOUT contains 89,056 utterances and 310,095 words from 278 dialogues averaging 320 utterances per dialogue. The dialogues are aligned with the multi-modal data streams available during the experiments: 5,785 images and 30 maps. The corpus has been annotated with Abstract Meaning Representation and Dialogue-AMR to identify the speaker’s intent and meaning within an utterance, and with Transactional Units and Relations to track relationships between utterances to reveal patterns of the Dialogue Structure. We describe how the corpus and its annotations have been used to develop autonomous human-robot systems and enable research in open questions of how humans speak to robots. We release this corpus to accelerate progress in autonomous, situated, human-robot dialogue, especially in the context of navigation tasks where details about the environment need to be discovered.</abstract>
       <url hash="f3c78910">2024.lrec-main.1259</url>
@@ -14865,7 +14865,7 @@
       <author><first>Marie</first><last>Kolm</last></author>
       <author><first>Verena</first><last>Blaschke</last></author>
       <author><first>Ekaterina</first><last>Artemova</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>14478–14493</pages>
       <abstract>Named Entity Recognition (NER) is a fundamental task to extract key information from texts, but annotated resources are scarce for dialects. This paper introduces the first dialectal NER dataset for German, BarNER, with 161K tokens annotated on Bavarian Wikipedia articles (bar-wiki) and tweets (bar-tweet), using a schema adapted from German CoNLL 2006 and GermEval. The Bavarian dialect differs from standard German in lexical distribution, syntactic construction, and entity information. We conduct in-domain, cross-domain, sequential, and joint experiments on two Bavarian and three German corpora and present the first comprehensive NER results on Bavarian. Incorporating knowledge from the larger German NER (sub-)datasets notably improves on bar-wiki and moderately on bar-tweet. Inversely, training first on Bavarian contributes slightly to the seminal German CoNLL 2006 corpus. Moreover, with gold dialect labels on Bavarian tweets, we assess multi-task learning between five NER and two Bavarian-German dialect identification tasks and achieve NER SOTA on bar-wiki. We substantiate the necessity of our low-resource BarNER corpus and the importance of diversity in dialects, genres, and topics in enhancing model performance.</abstract>
       <url hash="7b64f8d7">2024.lrec-main.1262</url>
@@ -14888,7 +14888,7 @@
       <author><first>Preeti</first><last>Verma</last></author>
       <author><first>Jaithra Varma</first><last>Manthena</last></author>
       <author><first>Sriparna</first><last>Saha</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <author><first>Minakshi</first><last>Dhar</last></author>
       <author><first>Sarbajeet</first><last>Tiwari</last></author>
       <pages>14513–14523</pages>
@@ -14932,7 +14932,7 @@
       <author><first>Zixuan</first><last>Li</last></author>
       <author><first>Long</first><last>Bai</last></author>
       <author><first>Jiafeng</first><last>Guo</last></author>
-      <author><first>Xueqi</first><last>Cheng</last></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last></author>
       <pages>14555–14566</pages>
       <abstract>Temporal Knowledge Graph (TKG), which characterizes temporally evolving facts in the form of (subject, relation, object, timestamp), has attracted much attention recently. TKG reasoning aims to predict future facts based on given historical ones. However, existing TKG reasoning models are unable to abstain from predictions they are uncertain, which will inevitably bring risks in real-world applications. Thus, in this paper, we propose an abstention mechanism for TKG reasoning, which helps the existing models make selective, instead of indiscriminate, predictions. Specifically, we develop a confidence estimator, called Confidence Estimator with History (CEHis), to enable the existing TKG reasoning models to first estimate their confidence in making predictions, and then abstain from those with low confidence. To do so, CEHis takes two kinds of information into consideration, namely, the certainty of the current prediction and the accuracy of historical predictions. Experiments with representative TKG reasoning models on two benchmark datasets demonstrate the effectiveness of the proposed CEHis.</abstract>
       <url hash="2f99ad93">2024.lrec-main.1268</url>
@@ -14979,8 +14979,8 @@
     </paper>
     <paper id="1272">
       <title>Self-reported Demographics and Discourse Dynamics in a Persuasive Online Forum</title>
-      <author><first>Agnieszka</first><last>Falenska</last></author>
-      <author><first>Eva Maria</first><last>Vecchi</last></author>
+      <author id="agnieszka-falenska"><first>Agnieszka</first><last>Falenska</last></author>
+      <author id="eva-maria-vecchi"><first>Eva Maria</first><last>Vecchi</last></author>
       <author><first>Gabriella</first><last>Lapesa</last></author>
       <pages>14606–14621</pages>
       <abstract>Research on language as interactive discourse underscores the deliberate use of demographic parameters such as gender, ethnicity, and class to shape social identities. For example, by explicitly disclosing one’s information and enforcing one’s social identity to an online community, the reception by and interaction with the said community is impacted, e.g., strengthening one’s opinions by depicting the speaker as credible through their experience in the subject. Here, we present a first thorough study of the role and effects of self-disclosures on online discourse dynamics, focusing on a pervasive type of self-disclosure: author gender. Concretely, we investigate the contexts and properties of gender self-disclosures and their impact on interaction dynamics in an online persuasive forum, ChangeMyView. Our contribution is twofold. At the level of the target phenomenon, we fill a research gap in the understanding of the impact of these self-disclosures on the discourse by bringing together features related to forum activity (votes, number of comments), linguistic/stylistic features from the literature, and discourse topics. At the level of the contributed resource, we enrich and release a comprehensive dataset that will provide a further impulse for research on the interplay between gender disclosures, community interaction, and persuasion in online discourse.</abstract>
@@ -14998,7 +14998,7 @@
     <paper id="1274">
       <title>Semantic Map-based Generation of Navigation Instructions</title>
       <author><first>Chengzu</first><last>Li</last></author>
-      <author><first>Chao</first><last>Zhang</last></author>
+      <author id="chao-zhang-tu"><first>Chao</first><last>Zhang</last></author>
       <author><first>Simone</first><last>Teufel</last></author>
       <author><first>Rama Sanand</first><last>Doddipatla</last></author>
       <author><first>Svetlana</first><last>Stoyanchev</last></author>
@@ -15108,7 +15108,7 @@
       <author><first>Vladimir</first><last>Araujo</last></author>
       <author><first>Maria Mihaela</first><last>Trusca</last></author>
       <author><first>Rodrigo</first><last>Tufiño</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>14729–14743</pages>
       <abstract>In recent years, significant advancements in pre-trained language models have driven the creation of numerous non-English language variants, with a particular emphasis on encoder-only and decoder-only architectures. While Spanish language models based on BERT and GPT have demonstrated proficiency in natural language understanding and generation, there remains a noticeable scarcity of encoder-decoder models explicitly designed for sequence-to-sequence tasks, which aim to map input sequences to generate output sequences conditionally. This paper breaks new ground by introducing the implementation and evaluation of renowned encoder-decoder architectures exclusively pre-trained on Spanish corpora. Specifically, we present Spanish versions of BART, T5, and BERT2BERT-style models and subject them to a comprehensive assessment across various sequence-to-sequence tasks, including summarization, question answering, split-and-rephrase, dialogue, and translation. Our findings underscore the competitive performance of all models, with the BART- and T5-based models emerging as top performers across all tasks. We have made all models publicly available to the research community to foster future explorations and advancements in Spanish NLP: https://github.com/vgaraujov/Seq2Seq-Spanish-PLMs.</abstract>
       <url hash="4c512ad3">2024.lrec-main.1283</url>
@@ -15117,7 +15117,7 @@
     <paper id="1284">
       <title>Sequential and Repetitive Pattern Learning for Temporal Knowledge Graph Reasoning</title>
       <author><first>Xuefei</first><last>Li</last></author>
-      <author><first>Huiwei</first><last>Zhou</last></author>
+      <author id="huiwei-zhou"><first>Huiwei</first><last>Zhou</last></author>
       <author><first>Weihong</first><last>Yao</last></author>
       <author><first>Wenchu</first><last>Li</last></author>
       <author><first>Yingyu</first><last>Lin</last></author>
@@ -15131,7 +15131,7 @@
       <title><fixed-case>SGCM</fixed-case>: Salience-Guided Context Modeling for Question Generation</title>
       <author><first>Chuyao</first><last>Ding</last></author>
       <author><first>Yu</first><last>Hong</last></author>
-      <author><first>Jianmin</first><last>Yao</last></author>
+      <author id="jianmin-yao"><first>Jianmin</first><last>Yao</last></author>
       <pages>14755–14762</pages>
       <abstract>We tackle Paragraph-level Question Generation (abbr., PQG) in this paper. PQG is a task of automatically generating questions given paragraphs and answers. Identifying the relevant sentences to answers is crucial for reasoning the possible questions before generation. Accordingly, we propose a salience-guided approach to enhance PQG. Specifically, we construct an auxiliary task of identifying salient sentences that manifest relevance. Grounded on this auxiliary task and the main task of PQG, we strengthen the BART encoder during training within a multitask learning framework. In particular, we utilize the identified salient sentences as an explicit guidance to enable the salience-aware attention computation in the BART decoder. We experiment on the benchmark dataset FairytaleQA. The test results show that our approach yields substantial improvements compared to the BART baseline, achieving the Rouge-L, BLEU4, BERTScore, Q-BLUE-3 and F1-scores of about 56.56%, 19.78%, 61.19%, 54.33% and 43.55%, respectively. Both the source codes and models will be publicly available.</abstract>
       <url hash="59ebaf12">2024.lrec-main.1285</url>
@@ -15169,7 +15169,7 @@
     <paper id="1289">
       <title><fixed-case>S</fixed-case>ign<fixed-case>BLEU</fixed-case>: Automatic Evaluation of Multi-channel Sign Language Translation</title>
       <author><first>Jung-Ho</first><last>Kim</last></author>
-      <author><first>Mathew</first><last>Huerta-Enochian</last></author>
+      <author id="mathew-huerta-enochian"><first>Mathew</first><last>Huerta-Enochian</last></author>
       <author><first>Changyong</first><last>Ko</last></author>
       <author><first>Du Hui</first><last>Lee</last></author>
       <pages>14796–14811</pages>
@@ -15181,7 +15181,7 @@
       <title><fixed-case>S</fixed-case>ilver<fixed-case>A</fixed-case>lign: <fixed-case>MT</fixed-case>-Based Silver Data Algorithm for Evaluating Word Alignment</title>
       <author><first>Abdullatif</first><last>Koksal</last></author>
       <author><first>Silvia</first><last>Severini</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>14812–14825</pages>
       <abstract>Word alignments are essential for a variety of NLP tasks. Therefore, choosing the best approaches for their creation is crucial. However, the scarce availability of gold evaluation data makes the choice difficult. We propose SilverAlign, a new method to automatically create silver data for the evaluation of word aligners by exploiting machine translation and minimal pairs. We show that performance on our silver data correlates well with gold benchmarks for 9 language pairs, making our approach a valid resource for evaluation of different languages and domains when gold data is not available. This addresses the important scenario of missing gold data alignments for low-resource languages.</abstract>
       <url hash="04dc5c84">2024.lrec-main.1290</url>
@@ -15266,7 +15266,7 @@
       <author><first>Miriam</first><last>Winkler</last></author>
       <author><first>Virginija</first><last>Juozapaityte</last></author>
       <author><first>Rob</first><last>van der Goot</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>14898–14915</pages>
       <abstract>Digital assistants perform well in high-resource languages like English, where tasks like slot and intent detection (SID) are well-supported. Many recent SID datasets start including multiple language varieties. However, it is unclear how realistic these translated datasets are. Therefore, we extend one such dataset, namely xSID-0.4, to include two underrepresented languages: Bavarian, a German dialect, and Lithuanian, a Baltic language. Both language variants have limited speaker populations and are often not included in multilingual projects. In addition to translations we provide “natural” queries to digital assistants generated by native speakers. We further include utterances from another dataset for Bavarian to build the richest SID dataset available today for a low-resource dialect without standard orthography. We then set out to evaluate models trained on English in a zero-shot scenario on our target language variants. Our evaluation reveals that translated data can produce overly optimistic scores. However, the error patterns in translated and natural datasets are highly similar. Cross-dataset experiments demonstrate that data collection methods influence performance, with scores lower than those achieved with single-dataset translations. This work contributes to enhancing SID datasets for underrepresented languages, yielding NaLiBaSID, a new evaluation dataset for Bavarian and Lithuanian.</abstract>
       <url hash="a8313e56">2024.lrec-main.1297</url>
@@ -15286,8 +15286,8 @@
       <author><first>Pierre</first><last>Lepagnol</last></author>
       <author><first>Thomas</first><last>Gerald</last></author>
       <author><first>Sahar</first><last>Ghannay</last></author>
-      <author><first>Christophe</first><last>Servan</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="christophe-servan"><first>Christophe</first><last>Servan</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <pages>14923–14936</pages>
       <abstract>This study is part of the debate on the efficiency of large versus small language models for text classification by prompting. We assess the performance of small language models in zero-shot text classification, challenging the prevailing dominance of large models. Across 15 datasets, our investigation benchmarks language models from 77M to 40B parameters using different architectures and scoring functions. Our findings reveal that small models can effectively classify texts, getting on par with or surpassing their larger counterparts. We developed and shared a comprehensive open-source repository that encapsulates our methodologies. This research underscores the notion that bigger isn’t always better, suggesting that resource-efficient small models may offer viable solutions for specific data classification challenges.</abstract>
       <url hash="06871381">2024.lrec-main.1299</url>
@@ -15344,7 +15344,7 @@
       <author><first>Ankita</first><last>Bhaumik</last></author>
       <author><first>Ning</first><last>Sa</last></author>
       <author><first>Gregorios</first><last>Katsios</last></author>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
       <pages>14984–14994</pages>
       <abstract>Social media platforms are popular tools for disseminating targeted information during major public events like elections or pandemics. Systematic analysis of the message traffic can provide valuable insights into prevailing opinions and social dynamics among different segments of the population. We are specifically interested in influence spread, and in particular whether more deliberate influence operations can be detected. However, filtering out the essential messages with telltale influence indicators from the extensive and often chaotic social media traffic is a major challenge.In this paper we present a novel approach to extract influence indicators from messages circulating among groups of users discussing particular topics. We build upon the the concept of a convo to identify influential authors who are actively promoting some particular agenda around that topic within the group. We focus on two influence indicators: the (control of) agenda and the use of emotional language.</abstract>
       <url hash="393baddd">2024.lrec-main.1303</url>
@@ -15357,7 +15357,7 @@
       <author><first>Yanda</first><last>Chen</last></author>
       <author><first>Amith</first><last>Ananthram</last></author>
       <author><first>Colin Wayne</first><last>Leach</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>14995–15011</pages>
       <abstract>There are many settings where it is useful to predict and explain the success or failure of a dialogue. Circumplex theory from psychology models the social orientations (e.g., Warm-Agreeable, Arrogant-Calculating) of conversation participants and can be used to predict and explain the outcome of social interactions. Our work is novel in its systematic application of social orientation tags to modeling conversation outcomes. In this paper, we introduce a new data set of dialogue utterances machine-labeled with social orientation tags. We show that social orientation tags improve task performance, especially in low-resource settings, on both English and Chinese language benchmarks. We also demonstrate how social orientation tags help explain the outcomes of social interactions when used in neural models. Based on these results showing the utility of social orientation tags for dialogue outcome prediction tasks, we release our data sets, code, and models that are fine-tuned to predict social orientation tags on dialogue utterances.</abstract>
       <url hash="a6d8067a">2024.lrec-main.1304</url>
@@ -15366,7 +15366,7 @@
     <paper id="1305">
       <title><fixed-case>S</fixed-case>oft<fixed-case>MCL</fixed-case>: Soft Momentum Contrastive Learning for Fine-grained Sentiment-aware Pre-training</title>
       <author><first>Jin</first><last>Wang</last></author>
-      <author><first>Liang-Chih</first><last>Yu</last></author>
+      <author id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last></author>
       <author><first>Xuejie</first><last>Zhang</last></author>
       <pages>15012–15023</pages>
       <abstract>The pre-training for language models captures general language understanding but fails to distinguish the affective impact of a particular context to a specific word. Recent works have sought to introduce contrastive learning (CL) for sentiment-aware pre-training in acquiring affective information. Nevertheless, these methods present two significant limitations. First, the compatibility of the GPU memory often limits the number of negative samples, hindering the opportunities to learn good representations. In addition, using only a few sentiment polarities as hard labels, e.g., positive, neutral, and negative, to supervise CL will force all representations to converge to a few points, leading to the issue of latent space collapse. This study proposes a soft momentum contrastive learning (SoftMCL) for fine-grained sentiment-aware pre-training. Instead of hard labels, we introduce valence ratings as soft-label supervision for CL to fine-grained measure the sentiment similarities between samples. The proposed SoftMCL conducts CL on both the word- and sentence-level to enhance the model’s ability to learn affective information. A momentum queue was introduced to expand the contrastive samples, allowing storing and involving more negatives to overcome the limitations of hardware platforms. Extensive experiments were conducted on four different sentiment-related tasks, which demonstrates the effectiveness of the proposed SoftMCL method. The code and data of the proposed SoftMCL is available at: https://www.github.com/wangjin0818/SoftMCL/.</abstract>
@@ -15435,7 +15435,7 @@
       <title><fixed-case>SPACE</fixed-case>-<fixed-case>IDEAS</fixed-case>: A Dataset for Salient Information Detection in Space Innovation</title>
       <author><first>Andres</first><last>Garcia-Silva</last></author>
       <author><first>Cristian</first><last>Berrio</last></author>
-      <author><first>Jose Manuel</first><last>Gomez-Perez</last></author>
+      <author id="jose-manuel-gomez-perez"><first>Jose Manuel</first><last>Gomez-Perez</last></author>
       <pages>15087–15092</pages>
       <abstract>Detecting salient parts in text using natural language processing has been widely used to mitigate the effects of information overflow. Nevertheless, most of the datasets available for this task are derived mainly from academic publications. We introduce SPACE-IDEAS, a dataset for salient information detection from innovation ideas related to the Space domain. The text in SPACE-IDEAS varies greatly and includes informal, technical, academic and business-oriented writing styles. In addition to a manually annotated dataset we release an extended version that is annotated using a large generative language model. We train different sentence and sequential sentence classifiers, and show that the automatically annotated dataset can be leveraged using multitask learning to train better classifiers.</abstract>
       <url hash="fe3667b0">2024.lrec-main.1311</url>
@@ -15457,7 +15457,7 @@
       <author><first>Jennifer</first><last>Tracey</last></author>
       <author><first>Ann</first><last>O’Brien</last></author>
       <author><first>Song</first><last>Chen</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <pages>15105–15113</pages>
       <abstract>We present a new approach to event annotation designed to promote whole-corpus understanding of complex events in multilingual, multimedia data as part of the DARPA Knowledge-directed Artificial Intelligence Reasoning Over Schemas (KAIROS) Program. KAIROS aims to build technology capable of reasoning about complex real-world events like a specific terrorist attack in order to provide actionable insights to end users. KAIROS systems extract events from a corpus, aggregate information into a coherent semantic representation, and instantiate observed events or predict unseen but expected events using a relevant event schema selected from a generalized schema library. To support development and testing for KAIROS Phase 2B we created a complex event annotation corpus that, instead of individual event mentions anchored in document spans with pre-defined event type labels, comprises a series of temporally ordered event frames populated with information aggregated from the whole corpus and labeled with an unconstrained tag set based on Wikidata Qnodes. The corpus makes a unique contribution to the resource landscape for information extraction, addressing gaps in the availability of multilingual, multimedia corpora for schema-based event representation. The corpus will be made available through publication in the Linguistic Data Consortium (LDC) catalog.</abstract>
       <url hash="fb930255">2024.lrec-main.1313</url>
@@ -15489,7 +15489,7 @@
       <author><first>Aleix</first><last>Sant</last></author>
       <author><first>Gerard I.</first><last>Gállego</last></author>
       <author><first>David</first><last>Dale</last></author>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
       <pages>15137–15146</pages>
       <abstract>Speech-to-Speech and Speech-to-Text translation are currently dynamic areas of research. In our commitment to advance these fields, we present SpeechAlign, a framework designed to evaluate the underexplored field of source-target alignment in speech models. The SpeechAlign framework has two core components. First, to tackle the absence of suitable evaluation datasets, we introduce the Speech Gold Alignment dataset, built upon a English-German text translation gold alignment dataset. Secondly, we introduce two novel metrics, Speech Alignment Error Rate (SAER) and Time-weighted Speech Alignment Error Rate (TW-SAER), which enable the evaluation of alignment quality within speech models. While the former gives equal importance to each word, the latter assigns weights based on the length of the words in the speech signal. By publishing SpeechAlign we provide an accessible evaluation framework for model assessment, and we employ it to benchmark open-source Speech Translation models. In doing so, we contribute to the ongoing research progress within the fields of Speech-to-Speech and Speech-to-Text translation.</abstract>
       <url hash="b833a9a6">2024.lrec-main.1316</url>
@@ -15543,7 +15543,7 @@
       <title><fixed-case>SPLICE</fixed-case>: A Singleton-Enhanced <fixed-case>P</fixed-case>ipe<fixed-case>LI</fixed-case>ne for Coreference <fixed-case>RE</fixed-case>solution</title>
       <author><first>Yilun</first><last>Zhu</last></author>
       <author><first>Siyao</first><last>Peng</last></author>
-      <author><first>Sameer</first><last>Pradhan</last></author>
+      <author id="sameer-pradhan"><first>Sameer</first><last>Pradhan</last></author>
       <author><first>Amir</first><last>Zeldes</last></author>
       <pages>15191–15201</pages>
       <abstract>Singleton mentions, i.e. entities mentioned only once in a text, are important to how humans understand discourse from a theoretical perspective. However previous attempts to incorporate their detection in end-to-end neural coreference resolution for English have been hampered by the lack of singleton mention spans in the OntoNotes benchmark. This paper addresses this limitation by combining predicted mentions from existing nested NER systems and features derived from OntoNotes syntax trees. With this approach, we create a near approximation of the OntoNotes dataset with all singleton mentions, achieving ~94% recall on a sample of gold singletons. We then propose a two-step neural mention and coreference resolution system, named SPLICE, and compare its performance to the end-to-end approach in two scenarios: the OntoNotes test set and the out-of-domain (OOD) OntoGUM corpus. Results indicate that reconstructed singleton training yields results comparable to end-to-end systems for OntoNotes, while improving OOD stability (+1.1 avg. F1). We conduct error analysis for mention detection and delve into its impact on coreference clustering, revealing that precision improvements deliver more substantial benefits than increases in recall for resolving coreference chains.</abstract>
@@ -15612,7 +15612,7 @@
       <author><first>Isabelle</first><last>Lorge</last></author>
       <author id="li-zhang-uk"><first>Li</first><last>Zhang</last></author>
       <author><first>Xiaowen</first><last>Dong</last></author>
-      <author><first>Janet</first><last>Pierrehumbert</last></author>
+      <author id="janet-pierrehumbert"><first>Janet</first><last>Pierrehumbert</last></author>
       <pages>15273–15284</pages>
       <abstract>The rise of social media platforms has led to an increase in polarised online discussions, especially on political and socio-cultural topics such as elections and climate change. We propose a simple and entirely novel unsupervised method to better predict whether the authors of two posts agree or disagree, leveraging user stances about named entities obtained from their posts. We present STEntConv, a model which builds a graph of users and named entities weighted by stance and trains a Signed Graph Convolutional Network (SGCN) to detect disagreement between comment and reply posts. We run experiments and ablation studies and show that including this information improves disagreement detection performance on a dataset of Reddit posts for a range of controversial subreddit topics, without the need for platform-specific features or user history</abstract>
       <url hash="bd0559fd">2024.lrec-main.1327</url>
@@ -15624,7 +15624,7 @@
       <author><first>Lianwei</first><last>Wu</last></author>
       <author><first>Linyong</first><last>Wang</last></author>
       <author><first>Sensen</first><last>Guo</last></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <pages>15285–15295</pages>
       <abstract>Recently, the autoregressive framework based on large language models (LLMs) has achieved excellent performance in controlling the generated text to adhere to the required style. These methods guide LLMs through prompt learning to generate target text in an autoregressive manner. However, this manner possesses lower controllability and suffers from the challenge of accumulating errors, where early prediction inaccuracies might influence subsequent word generation. Furthermore, existing prompt-based methods overlook specific region editing, resulting in a deficiency of localized control over input text. To overcome these challenges, we propose a novel three-stage prompt-based approach for specific region editing. To alleviate the issue of accumulating errors, we transform the text style transfer task into a text infilling task, guiding the LLMs to modify only a small portion of text within the editing region to achieve style transfer, thus reducing the number of autoregressive iterations. To achieve an effective specific editing region, we adopt both prompt-based and word frequency-based strategies for region selection, subsequently employing a discriminator to validate the efficacy of the selected region. Experiments conducted on several publicly competitive datasets for text style transfer task confirm that our proposed approach achieves state-of-the-art performance. Keywords: text style transfer, natural language generation, large language models</abstract>
       <url hash="8b6e81a8">2024.lrec-main.1328</url>
@@ -15699,7 +15699,7 @@
       <author><first>Shichen</first><last>Li</last></author>
       <author><first>Zhongqing</first><last>Wang</last></author>
       <author><first>Yanzhi</first><last>Xu</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>15373–15383</pages>
       <abstract>Employing pre-trained generation models for cross-domain aspect-based sentiment classification has recently led to large improvements. However, they ignore the importance of syntactic structures, which have shown appealing effectiveness in classification based models. Different from previous studies, efficiently encoding the syntactic structure in generation model is challenging because such models are pretrained on natural language, and modeling structured data may lead to catastrophic forgetting of distributional knowledge. In this study, we propose a novel structure-aware generation model to tackle this challenge. In particular, a prompt-driven strategy is designed to bridge the gap between different domains, by capturing implicit syntactic information from the input and output sides. Furthermore, the syntactic structure is explicitly encoded into the structure-aware generation model, which can effectively learn domain-irrelevant features based on syntactic pivot features. Empirical results demonstrate the effectiveness of the proposed structure-aware generation model over several strong baselines. The results also indicate the proposed model is capable of leveraging the input syntactic structure into the generation model.</abstract>
       <url hash="019c87f6">2024.lrec-main.1335</url>
@@ -15725,7 +15725,7 @@
       <author><first>Xiangyu</first><last>Duan</last></author>
       <author><first>Zhenyu</first><last>Qiu</last></author>
       <author><first>Tong</first><last>Zhang</last></author>
-      <author><first>Junhui</first><last>Li</last></author>
+      <author id="junhui-li"><first>Junhui</first><last>Li</last></author>
       <author><first>Hao</first><last>Yang</last></author>
       <author><first>Min</first><last>Zhang</last></author>
       <pages>15398–15409</pages>
@@ -15740,7 +15740,7 @@
       <author><first>Zhiheng</first><last>Xi</last></author>
       <author><first>Tao</first><last>Gui</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>15410–15421</pages>
       <abstract>Deep neural networks (DNNs) are notoriously vulnerable to adversarial attacks that place carefully crafted perturbations on normal examples to fool DNNs. To better understand such attacks, a characterization of the features carried by adversarial examples is needed. In this paper, we tackle this challenge by inspecting the subspaces of sample features through spectral analysis. We first empirically show that the features of either clean signals or adversarial perturbations are redundant and span in low-dimensional linear subspaces respectively with minimal overlap, and the classical low-dimensional subspace projection can suppress perturbation features out of the subspace of clean signals. This makes it possible for DNNs to learn a subspace where only features of clean signals exist while those of perturbations are discarded, which can facilitate the distinction of adversarial examples. To prevent the residual perturbations that is inevitable in subspace learning, we propose an independence criterion to disentangle clean signals from perturbations. Experimental results show that the proposed strategy enables the model to inherently suppress adversaries, which not only boosts model robustness but also motivates new directions of effective adversarial defense.</abstract>
       <url hash="3562dd69">2024.lrec-main.1338</url>
@@ -15759,7 +15759,7 @@
       <author><first>Špela</first><last>Arhar Holdt</last></author>
       <author><first>Jaka</first><last>Čibej</last></author>
       <author><first>Kaja</first><last>Dobrovoljc</last></author>
-      <author><first>Tomaž</first><last>Erjavec</last></author>
+      <author id="tomaz-erjavec"><first>Tomaž</first><last>Erjavec</last></author>
       <author><first>Polona</first><last>Gantar</last></author>
       <author><first>Simon</first><last>Krek</last></author>
       <author><first>Tina</first><last>Munda</last></author>
@@ -15787,7 +15787,7 @@
     <paper id="1342">
       <title><fixed-case>S</fixed-case>wiss<fixed-case>SL</fixed-case>i: The Multi-parallel Sign Language Corpus for <fixed-case>S</fixed-case>witzerland</title>
       <author><first>Zifan</first><last>Jiang</last></author>
-      <author><first>Anne</first><last>Göhring</last></author>
+      <author id="anne-gohring"><first>Anne</first><last>Göhring</last></author>
       <author><first>Amit</first><last>Moryossef</last></author>
       <author><first>Rico</first><last>Sennrich</last></author>
       <author><first>Sarah</first><last>Ebling</last></author>
@@ -15965,7 +15965,7 @@
       <title><fixed-case>TARIC</fixed-case>-<fixed-case>SLU</fixed-case>: A <fixed-case>T</fixed-case>unisian Benchmark Dataset for Spoken Language Understanding</title>
       <author><first>Salima</first><last>Mdhaffar</last></author>
       <author><first>Fethi</first><last>Bougares</last></author>
-      <author><first>Renato</first><last>de Mori</last></author>
+      <author id="renato-de-mori"><first>Renato</first><last>de Mori</last></author>
       <author><first>Salah</first><last>Zaiem</last></author>
       <author><first>Mirco</first><last>Ravanelli</last></author>
       <author><first>Yannick</first><last>Estève</last></author>
@@ -15990,7 +15990,7 @@
     </paper>
     <paper id="1359">
       <title>Task-agnostic Distillation of Encoder-Decoder Language Models</title>
-      <author id="chen-zhang"><first>Chen</first><last>Zhang</last></author>
+      <author><first>Chen</first><last>Zhang</last></author>
       <author><first>Yang</first><last>Yang</last></author>
       <author><first>Qiuchi</first><last>Li</last></author>
       <author><first>Jingang</first><last>Wang</last></author>
@@ -16027,7 +16027,7 @@
       <author><first>Yunpeng</first><last>Li</last></author>
       <author><first>Jiarui</first><last>Zhang</last></author>
       <author><first>Xingsheng</first><last>Zhang</last></author>
-      <author><first>Heyan</first><last>Huang</last></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last></author>
       <pages>15685–15697</pages>
       <abstract>Large Language Models (LLMs) have achieved impressive results in Machine Translation by simply following instructions, even without training on parallel data. However, LLMs still face challenges on low-resource languages due to the lack of pre-training data. In real-world situations, humans can become proficient in their native languages through abundant and meaningful social interactions and can also learn foreign languages effectively using well-organized textbooks. Drawing inspiration from human learning patterns, we introduce the Translate After LEarNing Textbook (TALENT) approach, which aims to enhance LLMs’ ability to translate low-resource languages by learning from a textbook. TALENT follows a step-by-step process: (1) Creating a Textbook for low-resource languages. (2) Guiding LLMs to absorb the Textbook’s content for Syntax Patterns. (3) Enhancing translation by utilizing the Textbook and Syntax Patterns. We thoroughly assess TALENT’s performance using 112 low-resource languages from FLORES-200 with two LLMs: ChatGPT and BLOOMZ. Evaluation across three different metrics reveals that TALENT consistently enhances translation performance by 14.8% compared to zero-shot baselines. Further analysis demonstrates that TALENT not only improves LLMs’ comprehension of low-resource languages but also equips them with the knowledge needed to generate accurate and fluent sentences in these languages.</abstract>
       <url hash="620f31b9">2024.lrec-main.1362</url>
@@ -16048,7 +16048,7 @@
       <author><first>Gopichand</first><last>Kanumolu</last></author>
       <author><first>Lokesh</first><last>Madasu</last></author>
       <author><first>Nirmal</first><last>Surange</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>15711–15720</pages>
       <abstract>News headline generation is a crucial task in increasing productivity for both the readers and producers of news. This task can easily be aided by automated News headline-generation models. However, the presence of irrelevant headlines in scraped news articles results in sub-optimal performance of generation models. We propose that relevance-based headline classification can greatly aid the task of generating relevant headlines. Relevance-based headline classification involves categorizing news headlines based on their relevance to the corresponding news articles. While this task is well-established in English, it remains under-explored in low-resource languages like Telugu due to a lack of annotated data. To address this gap, we present TeClass, the first-ever human-annotated Telugu news headline classification dataset, containing 78,534 annotations across 26,178 article-headline pairs. We experiment with various baseline models and provide a comprehensive analysis of their results. We further demonstrate the impact of this work by fine-tuning various headline generation models using TeClass dataset. The headlines generated by the models fine-tuned on highly relevant article-headline pairs, showed about a 5 point increment in the ROUGE-L scores. To encourage future research, the annotated dataset as well as the annotation guidelines will be made publicly available.</abstract>
       <url hash="8ff96ba1">2024.lrec-main.1364</url>
@@ -16069,7 +16069,7 @@
     <paper id="1366">
       <title>Tell Me Again! a Large-Scale Dataset of Multiple Summaries for the Same Story</title>
       <author><first>Hans Ole</first><last>Hatzel</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>15732–15741</pages>
       <abstract>A wide body of research is concerned with the semantics of narratives, both in terms of understanding narratives and generating fictional narratives and stories. We provide a dataset of summaries to be used as a proxy for entire stories or for the analysis of the summaries themselves. Our dataset consists of a total of 96,831 individual summaries across 29,505 stories. We intend for the dataset to be used for training and evaluation of embedding representations for stories, specifically the stories’ narratives. The summary data is harvested from five different language versions of Wikipedia. Our dataset comes with rich metadata, which we extract from Wikidata, enabling a wide range of applications that operate on story summaries in conjunction with metadata. To set baseline results, we run retrieval experiments on the dataset, exploring the capability of similarity models in retrieving summaries of the same story. For this retrieval, a crucial element is to not place too much emphasis on the named entities, as this can enable retrieval of other summaries for the same work without taking the narrative into account.</abstract>
       <url hash="1100e10e">2024.lrec-main.1366</url>
@@ -16101,7 +16101,7 @@
       <title>text2story: A Python Toolkit to Extract and Visualize Story Components of Narrative Text</title>
       <author><first>Evelin</first><last>Amorim</last></author>
       <author><first>Ricardo</first><last>Campos</last></author>
-      <author><first>Alipio</first><last>Jorge</last></author>
+      <author id="alipio-jorge"><first>Alipio</first><last>Jorge</last></author>
       <author><first>Pedro</first><last>Mota</last></author>
       <author><first>Rúben</first><last>Almeida</last></author>
       <pages>15761–15772</pages>
@@ -16145,7 +16145,7 @@
     </paper>
     <paper id="1373">
       <title>Text Style Transfer Evaluation Using Large Language Models</title>
-      <author><first>Phil</first><last>Ostheimer</last></author>
+      <author id="phil-sidney-ostheimer"><first>Phil</first><last>Ostheimer</last></author>
       <author><first>Mayank</first><last>Nagda</last></author>
       <author><first>Marius</first><last>Kloft</last></author>
       <author><first>Sophie</first><last>Fellenz</last></author>
@@ -16167,10 +16167,10 @@
     </paper>
     <paper id="1375">
       <title>Textual Coverage of Eventive Entries in Lexical Semantic Resources</title>
-      <author><first>Eva</first><last>Fučíková</last></author>
+      <author id="eva-fucikova"><first>Eva</first><last>Fučíková</last></author>
       <author><first>Cristina Fernández</first><last>Alcaina</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
-      <author><first>Zdeňka</first><last>Urešová</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
+      <author id="zdenka-uresova"><first>Zdeňka</first><last>Urešová</last></author>
       <pages>15835–15841</pages>
       <abstract>This short paper focuses on the coverage of eventive entries (verbs, predicates, etc.) of some well-known lexical semantic resources when applied to random running texts taken from the internet. While coverage gaps are often reported for manually created lexicons (which is the case of most semantically-oriented lexical ones), it was our aim to quantify these gaps, cross-lingually, on a new purely textual resource set produced by the HPLT Project from crawled internet data. Several English, German, Spanish and Czech lexical semantic resources (which, for the most part, focus on verbs and predicates) have been selected for this experiment. We also describe the challenges related to the fact that these resources are (to a varying extent) semantically oriented, meaning that the texts have to be preprocessed to obtain lemmas (base forms) and some types of MWEs before the coverage can be reasonably evaluated, and thus the results are necessarily only approximate. The coverage of these resources, with some exclusions as described in the paper, range from 41.00% to 97.33%, confirming the need to expand at least some - even well-known - resources to cover the prevailing source of today’s textual resources with regard to lexical units describing events or states (or possibly other eventive mentions).</abstract>
       <url hash="ff9b88de">2024.lrec-main.1375</url>
@@ -16197,7 +16197,7 @@
     </paper>
     <paper id="1378">
       <title>The Corpus <fixed-case>AIKIA</fixed-case>: Using Ranking Annotation for Offensive Language Detection in <fixed-case>M</fixed-case>odern <fixed-case>G</fixed-case>reek</title>
-      <author><first>Stella</first><last>Markantonatou</last></author>
+      <author id="stella-markantonatou"><first>Stella</first><last>Markantonatou</last></author>
       <author><first>Vivian</first><last>Stamou</last></author>
       <author><first>Christina</first><last>Christodoulou</last></author>
       <author><first>Georgia</first><last>Apostolopoulou</last></author>
@@ -16256,7 +16256,7 @@
       <author><first>Børre</first><last>Gaup</last></author>
       <author><first>Trond</first><last>Trosterud</last></author>
       <author><first>Maja Lisa</first><last>Kappfjell</last></author>
-      <author><first>Sjur</first><last>Moshagen</last></author>
+      <author id="sjur-moshagen"><first>Sjur</first><last>Moshagen</last></author>
       <pages>15922–15931</pages>
       <abstract>Creating language technology based on language data has become very popular with the recent advances of large language models and neural network technologies. This makes language resources very valuable, and especially in case of indigenous languages, the scarce resources are even more precious. Given the good results of simply fetching everything you can from the internet and feeding it to neural networks in English, there has been more work on doing the same for all languages. However, indigenous language resources as they are on the web are not comparable in that they would encode the most recent normativised language in all its aspects. This problematic is further due to not understanding the texts input to models or output by models by the people who work on them. Corpora also have intelligent property rights and copyrights that are not respected. Furthermore, the web is filled with the result of language model -generated texts. In this article we describe an ethical and sustainable way to work with indigenous languages.</abstract>
       <url hash="8076593b">2024.lrec-main.1383</url>
@@ -16285,8 +16285,8 @@
     </paper>
     <paper id="1385">
       <title>The Impact of Stance Object Type on the Quality of Stance Detection</title>
-      <author><first>Maxwell A.</first><last>Weinzierl</last></author>
-      <author><first>Sanda M.</first><last>Harabagiu</last></author>
+      <author id="maxwell-weinzierl"><first>Maxwell A.</first><last>Weinzierl</last></author>
+      <author id="sanda-harabagiu"><first>Sanda M.</first><last>Harabagiu</last></author>
       <pages>15942–15954</pages>
       <abstract>Stance as an expression of an author’s standpoint and as a means of communication has long been studied by computational linguists. Automatically identifying the stance of a subject toward an object is an active area of research in natural language processing. Significant work has employed topics and claims as the object of stance, with frames of communication becoming more recently considered as alternative objects of stance. However, little attention has been paid to finding what are the benefits and what are the drawbacks when inferring the stance of a text towards different possible stance objects. In this paper we seek to answer this question by analyzing the implied knowledge and the judgments required when deciding the stance of a text towards each stance object type. Our analysis informed experiments with models capable of inferring the stance of a text towards any of the stance object types considered, namely topics, claims, and frames of communication. Experiments clearly indicate that it is best to infer the stance of a text towards a frame of communication, rather than a claim or a topic. It is also better to infer the stance of a text towards a claim rather than a topic. Therefore we advocate that rather than continuing efforts to annotate the stance of texts towards topics, it is better to use those efforts to produce annotations towards frames of communication. These efforts will allow us to better capture the stance towards claims and topics as well.</abstract>
       <url hash="e34f58bc">2024.lrec-main.1385</url>
@@ -16324,7 +16324,7 @@
     <paper id="1389">
       <title>The Onomastic Repertoire of the <fixed-case>R</fixed-case>oman d’Alexandre (<fixed-case>ORNARE</fixed-case>). Designing an Integrated Digital Onomastic Tool for Medieval <fixed-case>F</fixed-case>rench <fixed-case>R</fixed-case>omance</title>
       <author><first>Marta</first><last>Milazzo</last></author>
-      <author><first>Giorgio Maria</first><last>Di Nunzio</last></author>
+      <author id="giorgio-maria-di-nunzio"><first>Giorgio Maria</first><last>Di Nunzio</last></author>
       <pages>15982–15987</pages>
       <abstract>The paper reports on the first results of the design and implementation of a new digital tool for romance philology: the digital Onomastic Repertoire for the medieval French romance (12th-15th centuries). This tool, projected with a modular and integrable architecture, was implemented from a selection of romances, the corpus of the Medieval French Roman d’Alexandre. After introducing the peculiarities of the onomastic system in the Middle Ages (and, more generally, the peculiarities of medieval literary texts), the paper describes 1) the methodological challenges faced in the preparatory work, illustrates and comments on the first results achieved and 2) the design and implementation of the first integrated system for the interactive creation of the Onomastic Repertoire of the romaN d’AlexandRE (ORNARE), and 3) the current research output in terms of both a digital edition and the digital onomastic index of the corpus.</abstract>
       <url hash="558f2156">2024.lrec-main.1389</url>
@@ -16390,7 +16390,7 @@
       <title>The <fixed-case>RIP</fixed-case> Corpus of Collaborative Hypothesis-Making</title>
       <author><first>Ella</first><last>Schad</last></author>
       <author><first>Jacky</first><last>Visser</last></author>
-      <author><first>Chris</first><last>Reed</last></author>
+      <author id="chris-reed"><first>Chris</first><last>Reed</last></author>
       <pages>16047–16057</pages>
       <abstract>The dearth of literature combining hypothesis-making and collaborative problem solving presents a problem in the investigation into how hypotheses are generated in group environments. A new dataset, the Resolving Investigative hyPotheses (RIP) corpus, is introduced to address this issue. The corpus uses the fictionalised environment of a murder investigation game. An artificial environment restricts the number of possible hypotheses compared to real-world situations, allowing a deeper dive into the data. In three groups of three, participants collaborated to solve the mystery: two groups came to the wrong conclusion in different ways, and one succeeded in solving the game. RIP is a 49k-word dialogical corpus, consisting of three sub-corpora, annotated for argumentation and discourse structure on the basis of Inference Anchoring Theory. The corpus shows the emergent roles individuals took on and the strategies the groups employed, showing what can be gained through a deeper exploration of this domain. The corpus bridges the gap between these two areas – hypothesis generation and collaborative problem solving – by using an environment rich with potential for hypothesising within a highly collaborative space.</abstract>
       <url hash="e52b02e1">2024.lrec-main.1395</url>
@@ -16434,7 +16434,7 @@
       <author><first>Róbert</first><last>Sabo</last></author>
       <author><first>Katarína</first><last>Polónyiová</last></author>
       <author><first>Daniela</first><last>Ostatníková</last></author>
-      <author><first>Štefan</first><last>Beňuš</last></author>
+      <author id="stefan-benus"><first>Štefan</first><last>Beňuš</last></author>
       <pages>16094–16099</pages>
       <abstract>This paper presents the Slovak Autistic and Non-Autistic Child Speech Corpus, which consists of audio-recordings and transcripts of collaborative, task-oriented conversations between children (with or without autism spectrum disorder, ASD) and a non-autistic adult experimenter. The task used to elicit this corpus was the Maps task. This corpus was primarily recorded to investigate lexical alignment, but can also be used to study other conversation coordination strategies and behaviours. Scores on various standardised psychometric tests, such as those measuring IQ, executive functioning, and theory of mind, are included for each participant. In total, the corpus contains over 15 hours of speech. This relatively large database contains a non-Germanic language and can be shared with any qualified researcher, making it a valuable resource for replication of existing findings regarding communication and ASD as well as future research into communication between individuals with and without ASD.</abstract>
       <url hash="a7f6907c">2024.lrec-main.1399</url>
@@ -16557,7 +16557,7 @@
       <author><first>Wen-wai</first><last>Yim</last></author>
       <author><first>Yujuan</first><last>Fu</last></author>
       <author><first>Asma</first><last>Ben Abacha</last></author>
-      <author><first>Meliha</first><last>Yetisgen</last></author>
+      <author id="meliha-yetisgen-yildiz"><first>Meliha</first><last>Yetisgen</last></author>
       <pages>16211–16223</pages>
       <abstract>Unpredictability, especially unpredictability with unknown error characteristics, is a highly undesirable trait, particularly in medical patient care applications. Although large pre-trained language models (LLM) have been applied to a variety of unseen tasks with highly competitive and successful results, their sensitivity to language inputs and resulting performance variability is not well-studied. In this work, we test state-of-the-art pre-trained language models from a variety of families to characterize their error generation and reliability in medical assessment ability. Particularly, we experiment with general medical assessment multiple choice tests, as well as their open-ended and true-false alternatives. We also profile model consistency, error agreements with each other and to humans; and finally, quantify their ability to recover and explain errors. The findings in this work can be used to give further information about medical models so that modelers can make better-informed decisions rather than relying on standalone performance metrics alone.</abstract>
       <url hash="ca328adf">2024.lrec-main.1409</url>
@@ -16600,7 +16600,7 @@
       <title><fixed-case>T</fixed-case>ool<fixed-case>R</fixed-case>erank: Adaptive and Hierarchy-Aware Reranking for Tool Retrieval</title>
       <author><first>Yuanhang</first><last>Zheng</last></author>
       <author><first>Peng</first><last>Li</last></author>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last></author>
+      <author><first>Wei</first><last>Liu</last></author>
       <author id="yang-liu-ict"><first>Yang</first><last>Liu</last></author>
       <author><first>Jian</first><last>Luan</last></author>
       <author><first>Bin</first><last>Wang</last></author>
@@ -16633,7 +16633,7 @@
       <author><first>Hang</first><last>Jiang</last></author>
       <author><first>Doug</first><last>Beeferman</last></author>
       <author><first>Weiquan</first><last>Mao</last></author>
-      <author><first>Deb</first><last>Roy</last></author>
+      <author id="deb-roy"><first>Deb</first><last>Roy</last></author>
       <pages>16293–16303</pages>
       <abstract>The time at which a message is communicated is a vital piece of metadata in many real-world natural language processing tasks such as Topic Detection and Tracking (TDT). TDT systems aim to cluster a corpus of news articles by event, and in that context, stories that describe the same event are likely to have been written at around the same time. Prior work on time modeling for TDT takes this into account, but does not well capture how time interacts with the semantic nature of the event. For example, stories about a tropical storm are likely to be written within a short time interval, while stories about a movie release may appear over weeks or months. In our work, we design a neural method that fuses temporal and textual information into a single representation of news documents for event detection. We fine-tune these time-aware document embeddings with a triplet loss architecture, integrate the model into downstream TDT systems, and evaluate the systems on two benchmark TDT data sets in English. In the retrospective setting, we apply clustering algorithms to the time-aware embeddings and show substantial improvements over baselines on the News2013 data set. In the online streaming setting, we add our document encoder to an existing state-of-the-art TDT pipeline and demonstrate that it can benefit the overall performance. We conduct ablation studies on the time representation and fusion algorithm strategies, showing that our proposed model outperforms alternative strategies. Finally, we probe the model to examine how it handles recurring events more effectively than previous TDT systems.</abstract>
       <url hash="9aef255f">2024.lrec-main.1416</url>
@@ -16643,7 +16643,7 @@
       <title><fixed-case>T</fixed-case>opic<fixed-case>D</fixed-case>iff: A Topic-enriched Diffusion Approach for Multimodal Conversational Emotion Detection</title>
       <author><first>Jiamin</first><last>Luo</last></author>
       <author><first>Jingjing</first><last>Wang</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>16304–16314</pages>
       <abstract>Multimodal Conversational Emotion (MCE) detection, generally spanning across the acoustic, vision and language modalities, has attracted increasing interest in the multimedia community. Previous studies predominantly focus on learning contextual information in conversations with only a few considering the topic information in single language modality, while always neglecting the acoustic and vision topic information. On this basis, we propose a model-agnostic Topic-enriched Diffusion (TopicDiff) approach for capturing multimodal topic information in MCE tasks. Particularly, we integrate the diffusion model into neural topic model to alleviate the diversity deficiency problem of neural topic model in capturing topic information. Detailed evaluations demonstrate the significant improvements of TopicDiff over the state-of-the-art MCE baselines, justifying the importance of multimodal topic information to MCE and the effectiveness of TopicDiff in capturing such information. Furthermore, we observe an interesting finding that the topic information in acoustic and vision is more discriminative and robust compared to the language.</abstract>
       <url hash="bbbb619a">2024.lrec-main.1417</url>
@@ -16682,7 +16682,7 @@
     </paper>
     <paper id="1421">
       <title>Towards a <fixed-case>D</fixed-case>anish Semantic Reasoning Benchmark - Compiled from Lexical-Semantic Resources for Assessing Selected Language Understanding Capabilities of Large Language Models</title>
-      <author><first>Bolette</first><last>Pedersen</last></author>
+      <author id="bolette-sandford-pedersen"><first>Bolette</first><last>Pedersen</last></author>
       <author><first>Nathalie</first><last>Sørensen</last></author>
       <author><first>Sussi</first><last>Olsen</last></author>
       <author><first>Sanni</first><last>Nimb</last></author>
@@ -16706,7 +16706,7 @@
       <author><first>Shinka</first><last>Mori</last></author>
       <author><first>Oana</first><last>Ignat</last></author>
       <author><first>Andrew</first><last>Lee</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>16378–16391</pages>
       <abstract>Synthetic data generation has the potential to impact applications and domains with scarce data. However, before such data is used for sensitive tasks such as mental health, we need an understanding of how different demographics are represented in it. In our paper, we analyze the potential of producing synthetic data using GPT-3 by exploring the various stressors it attributes to different race and gender combinations, to provide insight for future researchers looking into using LLMs for data generation. Using GPT-3, we develop HeadRoom, a synthetic dataset of 3,120 posts about depression-triggering stressors, by controlling for race, gender, and time frame (before and after COVID-19). Using this dataset, we conduct semantic and lexical analyses to (1) identify the predominant stressors for each demographic group; and (2) compare our synthetic data to a human-generated dataset. We present the procedures to generate queries to develop depression data using GPT-3, and conduct analyzes to uncover the types of stressors it assigns to demographic groups, which could be used to test the limitations of LLMs for synthetic data generation for depression data. Our findings show that synthetic data mimics some of the human-generated data distribution for the predominant depression stressors across diverse demographics.</abstract>
       <url hash="b4cefd1e">2024.lrec-main.1423</url>
@@ -16715,7 +16715,7 @@
     <paper id="1424">
       <title>Towards an Ideal Tool for Learner Error Annotation</title>
       <author><first>Špela</first><last>Arhar Holdt</last></author>
-      <author><first>Tomaž</first><last>Erjavec</last></author>
+      <author id="tomaz-erjavec"><first>Tomaž</first><last>Erjavec</last></author>
       <author><first>Iztok</first><last>Kosem</last></author>
       <author><first>Elena</first><last>Volodina</last></author>
       <pages>16392–16398</pages>
@@ -16725,7 +16725,7 @@
     </paper>
     <paper id="1425">
       <title>Towards Answering Health-related Questions from Medical Videos: Datasets and Approaches</title>
-      <author><first>Deepak</first><last>Gupta</last></author>
+      <author id="deepak-gupta"><first>Deepak</first><last>Gupta</last></author>
       <author><first>Kush</first><last>Attal</last></author>
       <author><first>Dina</first><last>Demner-Fushman</last></author>
       <pages>16399–16411</pages>
@@ -16736,7 +16736,7 @@
     <paper id="1426">
       <title>Towards a Unified Taxonomy of Deep Syntactic Relations</title>
       <author><first>Kira</first><last>Droganova</last></author>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <pages>16412–16421</pages>
       <abstract>This paper analyzes multiple deep-syntactic frameworks with the goal of creating a proposal for a set of universal semantic role labels. The proposal examines various theoretic linguistic perspectives and focuses on Meaning-Text Theory and Functional Generative Description frameworks and PropBank. The research is based on the data from four Indo-European and one Uralic language – Spanish and Catalan (Taulé et al., 2011), Czech (Hajič et al., 2017), English (Hajič et al., 2012), and Finnish (Haverinen et al., 2015). Updated datasets with the new universal semantic role labels are now publicly available as a result of our work. Nevertheless, our proposal is oriented towards Universal Dependencies (UD) (de Marneffe et al., 2021) and our ultimate goal is to apply a subset of the universal labels to the full UD data.</abstract>
       <url hash="6980e2a6">2024.lrec-main.1426</url>
@@ -16795,7 +16795,7 @@
     <paper id="1431">
       <title>Towards Cost-effective Multi-style Conversations: A Pilot Study in Task-oriented Dialogue Generation</title>
       <author><first>Tiziano</first><last>Labruna</last></author>
-      <author><first>Bernardo</first><last>Magnini</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
       <pages>16473–16479</pages>
       <abstract>Conversations exhibit significant variation when different styles are employed by participants, often leading to subpar performance when a dialogue model is exclusively trained on single-style datasets. We present a cost-effective methodology for generating multi-style conversations, which can be used in the development of conversational agents. This methodology only assumes the availability of a conversational domain, such as a knowledge base, and leverages the generative capabilities of large language models. In a pilot study focused on the generation aspect of task-oriented dialogues, we extended the well-known MultiWOZ dataset to encompass multi-style variations. Our findings highlight two key experimental outcomes: (i) these novel resources pose challenges for current single-style models, and (ii) multi-style resources enhance the dialogue model’s resilience to stylistic variations.</abstract>
       <url hash="52919493">2024.lrec-main.1431</url>
@@ -16805,7 +16805,7 @@
       <title>Towards Dog Bark Decoding: Leveraging Human Speech Processing for Automated Bark Classification</title>
       <author><first>Artem</first><last>Abzaliev</last></author>
       <author><first>Humberto</first><last>Perez-Espinosa</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>16480–16486</pages>
       <abstract>Similar to humans, animals make extensive use of verbal and non-verbal forms of communication, including a large range of audio signals. In this paper, we address dog vocalizations and explore the use of self-supervised speech representation models pre-trained on human speech to address dog bark classification tasks that find parallels in human-centered tasks in speech recognition. We specifically address four tasks: dog recognition, breed identification, gender classification, and context grounding. We show that using speech embedding representations significantly improves over simpler classification baselines. Further, we also find that models pre-trained on large human speech acoustics can provide additional performance boosts on several tasks.</abstract>
       <url hash="198b643f">2024.lrec-main.1432</url>
@@ -16960,7 +16960,7 @@
       <title>Towards Robust In-Context Learning for Machine Translation with Large Language Models</title>
       <author><first>Shaolin</first><last>Zhu</last></author>
       <author><first>Menglong</first><last>Cui</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <pages>16619–16629</pages>
       <abstract>Using large language models (LLMs) for machine translation via in-context learning (ICL) has become an interesting research direction of machine translation (MT) in recent years. Its main idea is to retrieve a few translation pairs as demonstrations from an additional datastore (parallel corpus) to guide translation without updating the LLMs. However, the underlying noise of retrieved demonstrations usually dramatically deteriorate the performance of LLMs. In this paper, we propose a robust method to enable LLMs to achieve robust translation with ICL. The method incorporates a multi-view approach, considering both sentence- and word-level information, to select demonstrations that effectively avoid noise. At the sentence level, a margin-based score is designed to avoid semantic noise. At the word level, word embeddings are utilized to evaluate the related tokens and change the weight of words in demonstrations. By considering both sentence- and word-level similarity, the proposed method provides fine-grained demonstrations that effectively prompt the translation of LLMs. Experimental results demonstrate the effectiveness of our method, particularly in domain adaptation.</abstract>
       <url hash="fcd579da">2024.lrec-main.1444</url>
@@ -16985,7 +16985,7 @@
     <paper id="1446">
       <title>Towards Semantic Tagging for <fixed-case>I</fixed-case>rish</title>
       <author><first>Tim</first><last>Czerniak</last></author>
-      <author><first>Elaine</first><last>Uí Dhonnchadha</last></author>
+      <author id="elaine-ui-dhonnchadha"><first>Elaine</first><last>Uí Dhonnchadha</last></author>
       <pages>16643–16652</pages>
       <abstract>Well annotated corpora have been shown to have great value, both in linguistic and non-linguistic research, and in supporting machine-learning and many other non-research activities including language teaching. For minority languages, annotated corpora can help in understanding language usage norms among native and non-native speakers, providing valuable information both for lexicography and for teaching, and helping to combat the decline of speaker numbers. At the same time, minority languages suffer from having fewer available language resources than majority languages, and far less-developed annotation tooling. To date there is very little work in semantic annotation for Irish. In this paper we report on progress to date in the building of a standard tool-set for semantic annotation of Irish, including a novel method for evaluation of semantic annotation. A small corpus of Irish language data has been manually annotated with semantic tags, and manually checked. A semantic type tagging framework has then been developed using existing technologies, and using a semantic lexicon that has been built from a variety of sources. Semantic disambiguation methods have been added with a view to increasing accuracy. That framework has then been tested using the manually tagged corpus, resulting in over 90% lexical coverage and almost 80% tag accuracy. Development is ongoing as part of a larger corpus development project, and plans include expansion of the manually tagged corpus, expansion of the lexicon, and exploration of further disambiguation methods. As the first semantic tagger for Irish, to our knowledge, it is hoped that this research will form a sound basis for semantic annotation of Irish corpora in to the future.</abstract>
       <url hash="18a280c1">2024.lrec-main.1446</url>
@@ -16995,7 +16995,7 @@
       <title>Towards Standardized Annotation and Parsing for <fixed-case>K</fixed-case>orean <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et</title>
       <author><first>Yige</first><last>Chen</last></author>
       <author><first>Jae</first><last>Ihn</last></author>
-      <author><first>KyungTae</first><last>Lim</last></author>
+      <author id="kyungtae-lim"><first>KyungTae</first><last>Lim</last></author>
       <author><first>Jungyeul</first><last>Park</last></author>
       <pages>16653–16658</pages>
       <abstract>Previous research on Korean FrameNet has produced several datasets that serve as resources for FrameNet parsing in Korean. However, these datasets suffer from the problem that annotations are assigned on the word level, which is not optimally designed based on the agglutinative feature of Korean. To address this issue, we introduce a morphologically enhanced annotation strategy for Korean FrameNet datasets and parsing by leveraging the CoNLL-U format. We present the results of the FrameNet parsers trained on the Korean FrameNet data in the original format and our proposed format, respectively, and further elaborate on the linguistic rationales of our proposed scheme. We suggest the morpheme-based scheme to be the standard of Korean FrameNet data annotation.</abstract>
@@ -17015,7 +17015,7 @@
     <paper id="1449">
       <title>Towards Understanding the Relationship between In-context Learning and Compositional Generalization</title>
       <author><first>Sungjun</first><last>Han</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <pages>16664–16679</pages>
       <abstract>According to the principle of compositional generalization, the meaning of a complex expression can be understood as a function of the meaning of its parts and of how they are combined. This principle is crucial for human language processing and also, arguably, for NLP models in the face of out-of-distribution data. However, many neural network models, including Transformers, have been shown to struggle with compositional generalization. In this paper, we hypothesize that forcing models to in-context learn can provide an inductive bias to promote compositional generalization. To test this hypothesis, we train a causal Transformer in a setting that renders ‘ordinary’ learning very difficult: we present it with different orderings of the training instance and shuffle instance labels. This corresponds to training the model on all possible few-shot learning problems attainable from the dataset. The model can solve the task, however, by utilizing earlier examples to generalize to later ones – i.e., in-context learning. In evaluations on the datasets, SCAN, COGS, and GeoQuery, models trained in this manner indeed show improved compositional generalization. This indicates the usefulness of in-context learning problems as an inductive bias for generalization.</abstract>
       <url hash="db115861">2024.lrec-main.1449</url>
@@ -17227,7 +17227,7 @@
     <paper id="1468">
       <title><fixed-case>T</fixed-case>weet<fixed-case>TER</fixed-case>: A Benchmark for Target Entity Retrieval on <fixed-case>T</fixed-case>witter without Knowledge Bases</title>
       <author><first>Kiamehr</first><last>Rezaee</last></author>
-      <author><first>Jose</first><last>Camacho-Collados</last></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></author>
       <author><first>Mohammad Taher</first><last>Pilehvar</last></author>
       <pages>16890–16896</pages>
       <abstract>Entity linking is a well-established task in NLP consisting of associating entity mentions with entries in a knowledge base. Current models have demonstrated competitive performance in standard text settings. However, when it comes to noisy domains such as social media, certain challenges still persist. Typically, to evaluate entity linking on existing benchmarks, a comprehensive knowledge base is necessary and models are expected to possess an understanding of all the entities contained within the knowledge base. However, in practical scenarios where the objective is to retrieve sentences specifically related to a particular entity, strict adherence to a complete understanding of all entities in the knowledge base may not be necessary. To address this gap, we introduce TweetTER (Tweet Target Entity Retrieval), a novel benchmark that aims to bridge the challenges in entity linking. The distinguishing feature of this benchmark is its approach of re-framing entity linking as a binary entity retrieval task. This enables the evaluation of language models’ performance without relying on a conventional knowledge base, providing a more practical and versatile evaluation framework for assessing the effectiveness of language models in entity retrieval tasks.</abstract>
@@ -17239,7 +17239,7 @@
       <author><first>Marco</first><last>Cognetta</last></author>
       <author><first>Vilém</first><last>Zouhar</last></author>
       <author><first>Sangwhan</first><last>Moon</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <pages>16897–16906</pages>
       <abstract>In Tokenization and the Noiseless Channel (Zouhar et al., 2023), Rényi efficiency is suggested as an intrinsic mechanism for evaluating a tokenizer: for NLP tasks, the tokenizer which leads to the highest Rényi efficiency of the unigram distribution should be chosen. The Rényi efficiency is thus treated as a predictor of downstream performance (e.g., predicting BLEU for a machine translation task), without the expensive step of training multiple models with different tokenizers. Although useful, the predictive power of this metric is not perfect, and the authors note there are additional qualities of a good tokenization scheme that Rényi efficiency alone cannot capture. We describe two variants of BPE tokenization which can arbitrarily increase Rényi efficiency while decreasing the downstream model performance. These counterexamples expose cases where Rényi efficiency fails as an intrinsic tokenization metric and thus give insight for building more accurate predictors.</abstract>
       <url hash="b9578df7">2024.lrec-main.1469</url>
@@ -17267,8 +17267,8 @@
       <author><first>Arthur</first><last>Lorenzi</last></author>
       <author><first>Nurit</first><last>Melnik</last></author>
       <author><first>Archna</first><last>Bhatia</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
       <author><first>Amir</first><last>Zeldes</last></author>
       <author><first>Joakim</first><last>Nivre</last></author>
       <author><first>William</first><last>Croft</last></author>
@@ -17323,7 +17323,7 @@
       <author><first>Gregorios</first><last>Katsios</last></author>
       <author><first>Ning</first><last>Sa</last></author>
       <author><first>Ankita</first><last>Bhaumik</last></author>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
       <pages>16984–16997</pages>
       <abstract>The behavior and decision making of groups or communities can be dramatically influenced by individuals pushing particular agendas, e.g., to promote or disparage a person or an activity, to call for action, etc.. In the examination of online influence campaigns, particularly those related to important political and social events, scholars often concentrate on identifying the sources responsible for setting and controlling the agenda (e.g., public media). In this article we present a methodology for detecting specific instances of agenda control through social media where annotated data is limited or non-existent. By using a modest corpus of Twitter messages centered on the 2022 French Presidential Elections, we carry out a comprehensive evaluation of various approaches and techniques that can be applied to this problem. Our findings demonstrate that by treating the task as a textual entailment problem, it is possible to overcome the requirement for a large annotated training dataset.</abstract>
       <url hash="b0678345">2024.lrec-main.1476</url>
@@ -17422,7 +17422,7 @@
       <author><first>Fei</first><last>Mi</last></author>
       <author><first>Weichao</first><last>Wang</last></author>
       <author><first>Yasheng</first><last>Wang</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <pages>17074–17086</pages>
       <abstract>Conversational retrieval refers to an information retrieval system that operates in an iterative and interactive manner, requiring the retrieval of various external resources, such as persona, knowledge, and even response, to effectively engage with the user and successfully complete the dialogue. However, most previous work trained independent retrievers for each specific resource, resulting in sub-optimal performance and low efficiency. Thus, we propose a multi-task framework function as a universal retriever for three dominant retrieval tasks during the conversation: persona selection, knowledge selection, and response selection. To this end, we design a dual-encoder architecture consisting of a context-adaptive dialogue encoder and a candidate encoder, aiming to attention to the relevant context from the long dialogue and retrieve suitable candidates by simply a dot product. Furthermore, we introduce two loss constraints to capture the subtle relationship between dialogue context and different candidates by regarding historically selected candidates as hard negatives. Extensive experiments and analysis establish state-of-the-art retrieval quality both within and outside its training domain, revealing the promising potential and generalization capability of our model to serve as a universal retriever for different candidate selection tasks simultaneously.</abstract>
       <url hash="09bf8b6d">2024.lrec-main.1483</url>
@@ -17430,19 +17430,19 @@
     </paper>
     <paper id="1484">
       <title>Universal Anaphora: The First Three Years</title>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <author><first>Maciej</first><last>Ogrodniczuk</last></author>
       <author><first>Vincent</first><last>Ng</last></author>
-      <author><first>Sameer</first><last>Pradhan</last></author>
+      <author id="sameer-pradhan"><first>Sameer</first><last>Pradhan</last></author>
       <author><first>Juntao</first><last>Yu</last></author>
-      <author><first>Nafise Sadat</first><last>Moosavi</last></author>
+      <author id="nafise-sadat-moosavi"><first>Nafise Sadat</first><last>Moosavi</last></author>
       <author><first>Silviu</first><last>Paun</last></author>
       <author><first>Amir</first><last>Zeldes</last></author>
       <author><first>Anna</first><last>Nedoluzhko</last></author>
       <author><first>Michal</first><last>Novák</last></author>
       <author><first>Martin</first><last>Popel</last></author>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <pages>17087–17100</pages>
       <abstract>The aim of the Universal Anaphora initiative is to push forward the state of the art in anaphora and anaphora resolution by expanding the aspects of anaphoric interpretation which are or can be reliably annotated in anaphoric corpora, producing unified standards to annotate and encode these annotations, delivering datasets encoded according to these standards, and developing methods for evaluating models that carry out this type of interpretation. Although several papers on aspects of the initiative have appeared, no overall description of the initiative’s goals, proposals and achievements has been published yet except as an online draft. This paper aims to fill this gap, as well as to discuss its progress so far.</abstract>
       <url hash="da279016">2024.lrec-main.1484</url>
@@ -17563,7 +17563,7 @@
       <author><first>Bozhi</first><last>Wu</last></author>
       <author><first>Yushi</first><last>Cao</last></author>
       <author><first>Junzhe</first><last>Jiang</last></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <pages>17205–17216</pages>
       <abstract>Deep learning has introduced significant improvements in many software analysis tasks. Although the Large Language Models (LLMs) based neural code models demonstrate commendable performance when trained and tested within the intra-project independent and identically distributed (IID) setting, they often struggle to generalize effectively to real-world inter-project out-of-distribution (OOD) data. In this work, we show that this phenomenon is caused by the heavy reliance on project-specific shortcuts for prediction instead of ground-truth evidence. We propose a Cond-Idf measurement to interpret this behavior, which quantifies the relatedness of a token with a label and its project-specificness. The strong correlation between model behavior and the proposed measurement indicates that without proper regularization, models tend to leverage spurious statistical cues for prediction. Equipped with these observations, we propose a novel bias mitigation mechanism that regularizes the model’s learning behavior by leveraging latent logic relations among samples. Experimental results on two representative program analysis tasks indicate that our mitigation framework can improve both inter-project OOD generalization and adversarial robustness, while not sacrificing accuracy on intra-project IID data.</abstract>
       <url hash="f1f8c15c">2024.lrec-main.1494</url>
@@ -17595,7 +17595,7 @@
       <author><first>Samee</first><last>Arif</last></author>
       <author><first>Sualeha</first><last>Farid</last></author>
       <author><first>Awais</first><last>Athar</last></author>
-      <author><first>Agha Ali</first><last>Raza</last></author>
+      <author id="agha-ali-raza"><first>Agha Ali</first><last>Raza</last></author>
       <pages>17237–17244</pages>
       <abstract>This paper introduces UQA, a novel dataset for question answering and text comprehension in Urdu, a low-resource language with over 70 million native speakers. UQA is generated by translating the Stanford Question Answering Dataset (SQuAD2.0), a large-scale English QA dataset, using a technique called EATS (Enclose to Anchor, Translate, Seek), which preserves the answer spans in the translated context paragraphs. The paper describes the process of selecting and evaluating the best translation model among two candidates: Google Translator and Seamless M4T. The paper also benchmarks several state-of-the-art multilingual QA models on UQA, including mBERT, XLM-RoBERTa, and mT5, and reports promising results. For XLM-RoBERTa-XL, we have an F1 score of 85.99 and 74.56 EM. UQA is a valuable resource for developing and testing multilingual NLP systems for Urdu and for enhancing the cross-lingual transferability of existing models. Further, the paper demonstrates the effectiveness of EATS for creating high-quality datasets for other languages and domains. The UQA dataset and the code are publicly available at www.github.com/sameearif/UQA</abstract>
       <url hash="a0e0fbf9">2024.lrec-main.1497</url>
@@ -17662,8 +17662,8 @@
     <paper id="1503">
       <title>Using Speech Technology to Test Theories of Phonetic and Phonological Typology</title>
       <author><first>Anisia</first><last>Popescu</last></author>
-      <author><first>Lori</first><last>Lamel</last></author>
-      <author><first>Ioana</first><last>Vasilescu</last></author>
+      <author id="lori-lamel"><first>Lori</first><last>Lamel</last></author>
+      <author id="ioana-vasilescu"><first>Ioana</first><last>Vasilescu</last></author>
       <pages>17321–17325</pages>
       <abstract>The present paper uses speech technology derived tools and methodologies to test theories about phonetic typology. We specifically look at how the two-way laryngeal contrast (voiced /b, d, g, v, z/ vs. voiceless /p, t, k, f, s/ obstruents) is implemented in European Portuguese, a language that has been suggested to exhibit a different voicing system than its sister Romance languages, more similar to the one found for Germanic languages. A large European Portuguese corpus was force aligned using (1) different combinations of parallel Portuguese (original), Italian (Romance language) and German (Germanic language) acoustic phone models and letting an ASR system choose the best fitting one, and (2) pronunciation variants (/b, d, g, v, z/ produced as either [b, d, g, v, z] or [p, t, k, f, s]) for obstruent consonants. Results support previous accounts in the literature that European Portuguese is diverging from the traditional voicing system known for Romance language, towards a hybrid system where stops and fricatives are specified for different voicing features.</abstract>
       <url hash="bd89ec9a">2024.lrec-main.1503</url>
@@ -17714,10 +17714,10 @@
     </paper>
     <paper id="1508">
       <title>Verbing Weirds Language (Models): Evaluation of <fixed-case>E</fixed-case>nglish Zero-Derivation in Five <fixed-case>LLM</fixed-case>s</title>
-      <author><first>David R.</first><last>Mortensen</last></author>
+      <author id="david-r-mortensen"><first>David R.</first><last>Mortensen</last></author>
       <author><first>Valentina</first><last>Izrailevitch</last></author>
       <author><first>Yunze</first><last>Xiao</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <author><first>Leonie</first><last>Weissweiler</last></author>
       <pages>17359–17364</pages>
       <abstract>Lexical-syntactic flexibility, in the form of conversion (or zero-derivation) is a hallmark of English morphology. In conversion, a word with one part of speech is placed in a non-prototypical context, where it is coerced to behave as if it had a different part of speech. However, while this process affects a large part of the English lexicon, little work has been done to establish the degree to which language models capture this type of generalization. This paper reports the first study on the behavior of large language models with reference to conversion. We design a task for testing lexical-syntactic flexibility—the degree to which models can generalize over words in a construction with a non-prototypical part of speech. This task is situated within a natural language inference paradigm. We test the abilities of five language models—two proprietary models (GPT-3.5 and GPT-4), three open source model (Mistral 7B, Falcon 40B, and Llama 2 70B). We find that GPT-4 performs best on the task, followed by GPT-3.5, but that the open source language models are also able to perform it and that the 7-billion parameter Mistral displays as little difference between its baseline performance on the natural language inference task and the non-prototypical syntactic category task, as the massive GPT-4.</abstract>
@@ -17811,7 +17811,7 @@
       <author><first>Malak</first><last>Rassem</last></author>
       <author><first>Chris</first><last>Jenkins</last></author>
       <author><first>Filip</first><last>Miletić</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>17449–17458</pages>
       <abstract>Predicting the compositionality of noun compounds such as climate change and tennis elbow is a vital component in natural language understanding. While most previous computational methods that automatically determine the semantic relatedness between compounds and their constituents have applied a synchronic perspective, the current study investigates what diachronic changes in contexts and semantic topics of compounds and constituents reveal about the compounds’ present-day degrees of compositionality. We define a binary classification task that utilizes two diachronic vector spaces based on contextual co-occurrences and semantic topics, and demonstrate that diachronic changes in cosine similarities – measured over context or topic distributions – uncover patterns that distinguish between compounds with low and high present-day compositionality. Despite fewer dimensions in the topic models, the topic space performs on par with the co-occurrence space and captures rather similar information. Temporal similarities between compounds and modifiers as well as between compounds and their prepositional paraphrases predict the compounds’ present-day compositionality with accuracy &gt;0.7.</abstract>
       <url hash="5cd51c3a">2024.lrec-main.1517</url>
@@ -17842,7 +17842,7 @@
       <author><first>Jun</first><last>Zhou</last></author>
       <author><first>Fei</first><last>Li</last></author>
       <author><first>Chong</first><last>Teng</last></author>
-      <author><first>Donghong</first><last>Ji</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
       <pages>17473–17485</pages>
       <abstract>Large Language Models (LLMs) are now being considered as judges of high efficiency to evaluate the quality of answers generated by candidate models. However, their judgments may be influenced by complex scenarios and inherent biases, raising concerns about their reliability. This study aims to bridge this gap by introducing four unexplored factors and examining the performance of LLMs as judges, namely answer quantity, inducing statements, judging strategy, and judging style. Additionally, we introduce a new dimension of question difficulty to provide a more comprehensive understanding of LLMs’ judgments across varying question intricacies. We employ ChatGPT, GPT-4, Gemini, and Claude-2 as judges and conduct experiments on Vicuna Benchmark and MT-bench. Our study reveals that LLMs’ judging abilities are susceptible to the influence of these four factors, and analyzing from the newly proposed dimension of question difficulty is highly necessary. We also provide valuable insights into optimizing LLMs’ performance as judges, enhancing their reliability and adaptability across diverse evaluation scenarios.</abstract>
       <url hash="b725d38b">2024.lrec-main.1519</url>
@@ -17911,8 +17911,8 @@
       <title>When Your Cousin Has the Right Connections: Unsupervised Bilingual Lexicon Induction for Related Data-Imbalanced Languages</title>
       <author><first>Niyati</first><last>Bafna</last></author>
       <author><first>Cristina</first><last>España-Bonet</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <author><first>Rachel</first><last>Bawden</last></author>
       <pages>17544–17556</pages>
       <abstract>Most existing approaches for unsupervised bilingual lexicon induction (BLI) depend on good quality static or contextual embeddings requiring large monolingual corpora for both languages. However, unsupervised BLI is most likely to be useful for low-resource languages (LRLs), where large datasets are not available. Often we are interested in building bilingual resources for LRLs against related high-resource languages (HRLs), resulting in severely imbalanced data settings for BLI. We first show that state-of-the-art BLI methods in the literature exhibit near-zero performance for severely data-imbalanced language pairs, indicating that these settings require more robust techniques. We then present a new method for unsupervised BLI between a related LRL and HRL that only requires inference on a masked language model of the HRL, and demonstrate its effectiveness on truly low-resource languages Bhojpuri and Magahi (with &lt;5M monolingual tokens each), against Hindi. We further present experiments on (mid-resource) Marathi and Nepali to compare approach performances by resource range, and release our resulting lexicons for five low-resource Indic languages: Bhojpuri, Magahi, Awadhi, Braj, and Maithili, against Hindi.</abstract>
@@ -17944,7 +17944,7 @@
     <paper id="1529">
       <title>Who Is Bragging More Online? A Large Scale Analysis of Bragging in Social Media</title>
       <author><first>Mali</first><last>Jin</last></author>
-      <author><first>Daniel</first><last>Preotiuc-Pietro</last></author>
+      <author id="daniel-preotiuc-pietro"><first>Daniel</first><last>Preotiuc-Pietro</last></author>
       <author><first>A. Seza</first><last>Doğruöz</last></author>
       <author><first>Nikolaos</first><last>Aletras</last></author>
       <pages>17575–17587</pages>
@@ -17976,7 +17976,7 @@
     <paper id="1532">
       <title><fixed-case>W</fixed-case>iki<fixed-case>F</fixed-case>act<fixed-case>D</fixed-case>iff: A Large, Realistic, and Temporally Adaptable Dataset for Atomic Factual Knowledge Update in Causal Language Models</title>
       <author><first>Hichem</first><last>Ammar Khodja</last></author>
-      <author><first>Frédéric</first><last>Béchet</last></author>
+      <author id="frederic-bechet"><first>Frédéric</first><last>Béchet</last></author>
       <author><first>Quentin</first><last>Brabant</last></author>
       <author><first>Alexis</first><last>Nasr</last></author>
       <author><first>Gwénolé</first><last>Lecorvé</last></author>
@@ -18002,10 +18002,10 @@
       <title>Willkommens-Merkel, Chaos-<fixed-case>J</fixed-case>ohnson, and Tore-Klose: Modeling the Evaluative Meaning of <fixed-case>G</fixed-case>erman Personal Name Compounds</title>
       <author><first>Annerose</first><last>Eichel</last></author>
       <author><first>Tana</first><last>Deeg</last></author>
-      <author><first>Andre</first><last>Blessing</last></author>
+      <author id="andre-blessing"><first>Andre</first><last>Blessing</last></author>
       <author><first>Milena</first><last>Belosevic</last></author>
       <author><first>Sabine</first><last>Arndt-Lappe</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>17637–17650</pages>
       <abstract>We present a comprehensive computational study of the under-investigated phenomenon of personal name compounds (PNCs) in German such as Willkommens-Merkel (‘Welcome-Merkel’). Prevalent in news, social media, and political discourse, PNCs are hypothesized to exhibit an evaluative function that is reflected in a more positive or negative perception as compared to the respective personal full name (such as Angela Merkel). We model 321 PNCs and their corresponding full names at discourse level, and show that PNCs bear an evaluative nature that can be captured through a variety of computational methods. Specifically, we assess through valence information whether a PNC is more positively or negatively evaluative than the person’s name, by applying and comparing two approaches using (i) valence norms and (ii) pre-trained language models (PLMs). We further enrich our data with personal, domain-specific, and extra-linguistic information and perform a range of regression analyses revealing that factors including compound and modifier valence, domain, and political party membership influence how a PNC is evaluated.</abstract>
       <url hash="bec4eccf">2024.lrec-main.1534</url>
@@ -18043,7 +18043,7 @@
       <author><first>Yu</first><last>Hong</last></author>
       <author><first>Shiming</first><last>He</last></author>
       <author><first>Qingting</first><last>Xu</last></author>
-      <author><first>Jianmin</first><last>Yao</last></author>
+      <author id="jianmin-yao"><first>Jianmin</first><last>Yao</last></author>
       <pages>17675–17682</pages>
       <abstract>Event Detection (ED) is a task of automatically extracting multi-class trigger words. The understanding of word sense is crucial for ED. In this paper, we utilize context-specific commonsense knowledge to strengthen word sense modeling. Specifically, we leverage a Context-specific Knowledge Selector (CKS) to select the exact commonsense knowledge of words from a large knowledge base, i.e., ConceptNet. Context-specific selection is made in terms of the relevance of knowledge to the living contexts. On this basis, we incorporate the commonsense knowledge into the word-level representations before decoding. ChatGPT is an ideal generative CKS when the prompts are deliberately designed, though it is cost-prohibitive. To avoid the heavy reliance on ChatGPT, we train an offline CKS using the predictions of ChatGPT over a small number of examples (about 9% of all). We experiment on the benchmark ACE-2005 dataset. The test results show that our approach yields substantial improvements compared to the BERT baseline, achieving the F1-score of about 78.3%. All models, source codes and data will be made publicly available.</abstract>
       <url hash="06fcac32">2024.lrec-main.1537</url>
@@ -18053,7 +18053,7 @@
       <title><fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et under Scrutiny: Dictionary Examples in the Era of Large Language Models</title>
       <author><first>Fatemah Yousef</first><last>Almeman</last></author>
       <author><first>Steven</first><last>Schockaert</last></author>
-      <author><first>Luis</first><last>Espinosa Anke</last></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa Anke</last></author>
       <pages>17683–17695</pages>
       <abstract>Dictionary definitions play a prominent role in a wide range of NLP tasks, for instance by providing additional context about the meaning of rare and emerging terms. Many dictionaries also provide examples to illustrate the prototypical usage of words, which brings further opportunities for training or enriching NLP models. The intrinsic qualities of dictionaries, and related lexical resources such as glossaries and encyclopedias, are however still not well-understood. While there has been significant work on developing best practices, such guidance has been aimed at traditional usages of dictionaries (e.g. supporting language learners), and it is currently unclear how different quality aspects affect the NLP systems that rely on them. To address this issue, we compare WordNet, the most commonly used lexical resource in NLP, with a variety of dictionaries, as well as with examples that were generated by ChatGPT. Our analysis involves human judgments as well as automatic metrics. We furthermore study the quality of word embeddings derived from dictionary examples, as a proxy for downstream performance. We find that WordNet’s examples lead to lower-quality embeddings than those from the Oxford dictionary. Surprisingly, however, the ChatGPT generated examples were found to be most effective overall.</abstract>
       <url hash="cbc42b5b">2024.lrec-main.1538</url>
@@ -18130,7 +18130,7 @@
     <paper id="1545">
       <title>Your Stereotypical Mileage May Vary: Practical Challenges of Evaluating Biases in Multiple Languages and Cultural Contexts</title>
       <author><first>Karen</first><last>Fort</last></author>
-      <author><first>Laura</first><last>Alonso Alemany</last></author>
+      <author id="laura-alonso-alemany"><first>Laura</first><last>Alonso Alemany</last></author>
       <author><first>Luciana</first><last>Benotti</last></author>
       <author><first>Julien</first><last>Bezançon</last></author>
       <author><first>Claudia</first><last>Borg</last></author>
@@ -18151,7 +18151,7 @@
       <author><first>Javier</first><last>Torroba Marchante</last></author>
       <author><first>Shilin</first><last>Xie</last></author>
       <author><first>Sergio E.</first><last>Zanotto</last></author>
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <pages>17764–17769</pages>
       <abstract>Warning: This paper contains explicit statements of offensive stereotypes which may be upsetting The study of bias, fairness and social impact in Natural Language Processing (NLP) lacks resources in languages other than English. Our objective is to support the evaluation of bias in language models in a multilingual setting. We use stereotypes across nine types of biases to build a corpus containing contrasting sentence pairs, one sentence that presents a stereotype concerning an underadvantaged group and another minimally changed sentence, concerning a matching advantaged group. We build on the French CrowS-Pairs corpus and guidelines to provide translations of the existing material into seven additional languages. In total, we produce 11,139 new sentence pairs that cover stereotypes dealing with nine types of biases in seven cultural contexts. We use the final resource for the evaluation of relevant monolingual and multilingual masked language models. We find that language models in all languages favor sentences that express stereotypes in most bias categories. The process of creating a resource that covers a wide range of language types and cultural settings highlights the difficulty of bias evaluation, in particular comparability across languages and contexts.</abstract>
       <url hash="6c5b794b">2024.lrec-main.1545</url>
@@ -18272,7 +18272,7 @@
       <booktitle>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024): Tutorial Summaries</booktitle>
       <editor><first>Roman</first><last>Klinger</last></editor>
       <editor><first>Naozaki</first><last>Okazaki</last></editor>
-      <editor><first>Nicoletta</first><last>Calzolari</last></editor>
+      <editor id="nicoletta-calzolari"><first>Nicoletta</first><last>Calzolari</last></editor>
       <editor><first>Min-Yen</first><last>Kan</last></editor>
       <publisher>ELRA and ICCL</publisher>
       <address>Torino, Italia</address>
@@ -18293,7 +18293,7 @@
       <author><first>Zhuosheng</first><last>Zhang</last></author>
       <author><first>Fuxiao</first><last>Liu</last></author>
       <author><first>Ao</first><last>Zhang</last></author>
-      <author><first>Tat-Seng</first><last>Chua</last></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last></author>
       <pages>1–8</pages>
       <abstract>Artificial intelligence (AI) encompasses knowledge acquisition and real-world grounding across various modalities. As a multidisciplinary research field, multimodal large language models (MLLMs) have recently garnered growing interest in both academia and industry, showing an unprecedented trend to achieve human-level AI via MLLMs. These large models offer an effective vehicle for understanding, reasoning, and planning by integrating and modeling diverse information modalities, including language, visual, auditory, and sensory data. This tutorial aims to deliver a comprehensive review of cutting-edge research in MLLMs, focusing on four key areas: MLLM architecture design, instructional learning, multimodal reasoning, and the efficiency of MLLMs. We will explore technical advancements, synthesize key challenges, and discuss potential avenues for future research.</abstract>
       <url hash="c40d7c45">2024.lrec-tutorials.1</url>
@@ -18312,7 +18312,7 @@
       <title>Meaning Representations for Natural Languages: Design, Models and Applications</title>
       <author><first>Julia</first><last>Bonn</last></author>
       <author><first>Jeffrey</first><last>Flanigan</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
       <author><first>Ishan</first><last>Jindal</last></author>
       <author><first>Yunyao</first><last>Li</last></author>
       <author><first>Nianwen</first><last>Xue</last></author>
@@ -18336,7 +18336,7 @@
     <paper id="5">
       <title>Mining, Assessing, and Improving Arguments in <fixed-case>NLP</fixed-case> and the Social Sciences</title>
       <author><first>Gabriella</first><last>Lapesa</last></author>
-      <author><first>Eva Maria</first><last>Vecchi</last></author>
+      <author id="eva-maria-vecchi"><first>Eva Maria</first><last>Vecchi</last></author>
       <author><first>Serena</first><last>Villata</last></author>
       <author><first>Henning</first><last>Wachsmuth</last></author>
       <pages>26–32</pages>
@@ -18375,7 +18375,7 @@
       <title>Formal Semantic Controls over Language Models</title>
       <author><first>Danilo</first><last>Silva de Carvalho</last></author>
       <author><first>Yingji</first><last>Zhang</last></author>
-      <author><first>André</first><last>Freitas</last></author>
+      <author id="andre-freitas"><first>André</first><last>Freitas</last></author>
       <pages>50–55</pages>
       <abstract>Text embeddings provide a concise representation of the semantics of sentences and larger spans of text, rather than individual words, capturing a wide range of linguistic features. They have found increasing application to a variety of NLP tasks, including machine translation and natural language inference. While most recent breakthroughs in task performance are being achieved by large scale distributional models, there is a growing disconnection between their knowledge representation and traditional semantics, which hinders efforts to capture such knowledge in human interpretable form or explain model inference behaviour. In this tutorial, we examine from basics to the cutting edge research on the analysis and control of text representations, aiming to shorten the gap between deep latent semantics and formal symbolics. This includes the considerations on knowledge formalisation, the linguistic information that can be extracted and measured from distributional models, and intervention techniques that enable explainable reasoning and controllable text generation, covering methods from pooling to LLM-based.</abstract>
       <url hash="f4d501c4">2024.lrec-tutorials.9</url>
@@ -18412,7 +18412,7 @@
       <author><first>Kishan</first><last>Maharaj</last></author>
       <author><first>Arif A.</first><last>Ahmad</last></author>
       <author><first>Abhijit</first><last>Mishra</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>73–79</pages>
       <abstract>In the landscape of natural language processing (NLP), addressing the challenges of bias and hallucination is paramount to ensuring the ethical and unbiased development of Large Language Models (LLMs). This tutorial delves into the intricate dimensions of LLMs, shedding light on the critical importance of understanding and mitigating the profound impacts of bias and hallucination. Divided into two parts, the first part delves deep into the complexity of bias propagation in LLM development, where we dissect its origins and far-reaching impacts. We then present innovative methodologies for mitigating diverse forms of bias, including dynamic word embeddings and robust benchmarking strategies. The second part of the tutorial discusses hallucination - a prevalent issue in generative AI systems such as LLMs. Through advanced data-driven techniques, we decode its intricate effects and complexities, followed factually-driven mitigation strategies. Furthermore, we shed light on the pivotal role of human cognitive behavior in the context of hallucination, drawing insights from cognitive data, including human eye-tracking data. Ultimately, this cutting-edge tutorial serves as a guiding light, equipping participants with indispensable tools and insights to navigate the ethical complexities of LLMs, thus paving the way for the development of unbiased and ethically robust NLP systems.</abstract>
       <url hash="44e8fc20">2024.lrec-tutorials.12</url>
diff --git a/data/xml/2024.lt4hala.xml b/data/xml/2024.lt4hala.xml
index c3b1ab3921..af863ccd9b 100644
--- a/data/xml/2024.lt4hala.xml
+++ b/data/xml/2024.lt4hala.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2024-05-18" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Third Workshop on Language Technologies for Historical and Ancient Languages (LT4HALA) @ LREC-COLING-2024</booktitle>
-      <editor><first>Rachele</first><last>Sprugnoli</last></editor>
+      <editor id="rachele-sprugnoli"><first>Rachele</first><last>Sprugnoli</last></editor>
       <editor><first>Marco</first><last>Passarotti</last></editor>
       <publisher>ELRA and ICCL</publisher>
       <address>Torino, Italia</address>
@@ -30,7 +30,7 @@
     <paper id="2">
       <title>Developing a Part-of-speech Tagger for Diplomatically Edited <fixed-case>O</fixed-case>ld <fixed-case>I</fixed-case>rish Text</title>
       <author><first>Adrian</first><last>Doyle</last></author>
-      <author><first>John P.</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></author>
       <pages>11–21</pages>
       <abstract>POS-tagging is typically considered a fundamental text preprocessing task, with a variety of downstream NLP tasks and techniques being dependent on the availability of POS-tagged corpora. As such, POS-taggers are important precursors to further NLP tasks, and their accuracy can impact the potential accuracy of these dependent tasks. While a variety of POS-tagging methods have been developed which work well with modern languages, historical languages present orthographic and editorial challenges which require special attention. The effectiveness of POS-taggers developed for modern languages is reduced when applied to Old Irish, with its comparatively complex orthography and morphology. This paper examines some of the obstacles to POS-tagging Old Irish text, and shows that inconsistencies between extant annotated corpora reduce the quantity of data available for use in training POS-taggers. The development of a multi-layer neural network model for POS-tagging Old Irish text is described, and an experiment is detailed which demonstrates that this model outperforms a variety of off-the-shelf POS-taggers. Moreover, this model sets a new benchmark for POS-tagging diplomatically edited Old Irish text.</abstract>
       <url hash="058a36b7">2024.lt4hala-1.2</url>
@@ -57,9 +57,9 @@
     </paper>
     <paper id="5">
       <title>Towards Named-Entity and Coreference Annotation of the <fixed-case>H</fixed-case>ebrew <fixed-case>B</fixed-case>ible</title>
-      <author><first>Daniel G.</first><last>Swanson</last></author>
+      <author id="daniel-g-swanson"><first>Daniel G.</first><last>Swanson</last></author>
       <author><first>Bryce D.</first><last>Bussert</last></author>
-      <author><first>Francis</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last></author>
       <pages>36–40</pages>
       <abstract>Named-entity annotation refers to the process of specifying what real-world (or, at least, external-to-the-text) entities various names and descriptions within a text refer to. Coreference annotation, meanwhile, specifies what context-dependent words or phrases, such as pronouns refer to. This paper describes an ongoing project to apply both of these to the Hebrew Bible, so far covering most of the book of Genesis, fully marking every person, place, object, and point in time which occurs in the text. The annotation process and possible future uses for the data are covered, along with the challenges involved in applying existing annotation guidelines to the Hebrew text.</abstract>
       <url hash="2100a7b5">2024.lt4hala-1.5</url>
@@ -91,8 +91,8 @@
     <paper id="8">
       <title>Unsupervised Authorship Attribution for Medieval <fixed-case>L</fixed-case>atin Using Transformer-Based Embeddings</title>
       <author><first>Loic</first><last>De Langhe</last></author>
-      <author><first>Orphee</first><last>De Clercq</last></author>
-      <author><first>Veronique</first><last>Hoste</last></author>
+      <author id="orphee-de-clercq"><first>Orphee</first><last>De Clercq</last></author>
+      <author id="veronique-hoste"><first>Veronique</first><last>Hoste</last></author>
       <pages>57–64</pages>
       <abstract>We explore the potential of employing transformer-based embeddings in an unsupervised authorship attribution task for medieval Latin. The development of Large Language Models (LLMs) and recent advances in transfer learning alleviate many of the traditional issues associated with authorship attribution in lower-resourced (ancient) languages. Despite this, these methods remain heavily understudied within this domain. Concretely, we generate strong contextual embeddings using a variety of mono -and multilingual transformer models and use these as input for two unsupervised clustering methods: a standard agglomerative clustering algorithm and a self-organizing map. We show that these transformer-based embeddings can be used to generate high-quality and interpretable clusterings, resulting in an attractive alternative to the traditional feature-based methods.</abstract>
       <url hash="f1c335e3">2024.lt4hala-1.8</url>
@@ -149,7 +149,7 @@
     <paper id="14">
       <title>Leveraging <fixed-case>LLM</fixed-case>s for Post-<fixed-case>OCR</fixed-case> Correction of Historical Newspapers</title>
       <author><first>Alan</first><last>Thomas</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <author><first>Haiping</first><last>Lu</last></author>
       <pages>116–121</pages>
       <abstract>Poor OCR quality continues to be a major obstacle for humanities scholars seeking to make use of digitised primary sources such as historical newspapers. Typical approaches to post-OCR correction employ sequence-to-sequence models for a neural machine translation task, mapping erroneous OCR texts to accurate reference texts. We shift our focus towards the adaptation of generative LLMs for a prompt-based approach. By instruction-tuning Llama 2 and comparing it to a fine-tuned BART on BLN600, a parallel corpus of 19th century British newspaper articles, we demonstrate the potential of a prompt-based approach in detecting and correcting OCR errors, even with limited training data. We achieve a significant enhancement in OCR quality with Llama 2 outperforming BART, achieving a 54.51% reduction in the character error rate against BART’s 23.30%. This paves the way for future work leveraging generative LLMs to improve the accessibility and unlock the full potential of historical texts for humanities research.</abstract>
@@ -183,7 +183,7 @@
       <title>Early <fixed-case>M</fixed-case>odern <fixed-case>D</fixed-case>utch Comedies and Farces in the Spotlight: Introducing <fixed-case>E</fixed-case>m<fixed-case>DC</fixed-case>om<fixed-case>F</fixed-case> and Its Emotion Framework</title>
       <author><first>Florian</first><last>Debaene</last></author>
       <author><first>Kornee</first><last>van der Haven</last></author>
-      <author><first>Veronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Veronique</first><last>Hoste</last></author>
       <pages>144–155</pages>
       <abstract>As computational drama studies are developing rapidly, the Dutch dramatic tradition is in need of centralisation still before it can benefit from state-of-the-art methodologies. This paper presents and evaluates EmDComF, a historical corpus of both manually curated and automatically digitised early modern Dutch comedies and farces authored between 1650 and 1725, and describes the refinement of a historically motivated annotation framework exploring sentiment and emotions in these two dramatic subgenres. Originating from Lodewijk Meyer’s philosophical writings on passions in the dramatic genre (±1670), published in Naauwkeurig onderwys in de tooneel-poëzy (Thorough instruction in the Poetics of Drama) by the literary society Nil Volentibus Arduum in 1765, a historical and genre-specific emotion framework is tested and operationalised for annotating emotions in the domain of early modern Dutch comedies and farces. Based on a frequency and cluster analysis of 782 annotated sentences by 2 expert annotators, the initial 38 emotion labels were restructured to a hierarchical label set of the 5 emotions Hatred, Anxiety, Sadness, Joy and Desire.</abstract>
       <url hash="e17642cc">2024.lt4hala-1.17</url>
@@ -207,11 +207,11 @@
     </paper>
     <paper id="20">
       <title>Automatic Normalisation of <fixed-case>M</fixed-case>iddle <fixed-case>F</fixed-case>rench and Its Impact on Productivity</title>
-      <author><first>Raphael</first><last>Rubino</last></author>
+      <author id="raphael-rubino"><first>Raphael</first><last>Rubino</last></author>
       <author><first>Sandra</first><last>Coram-Mekkey</last></author>
       <author><first>Johanna</first><last>Gerlach</last></author>
       <author><first>Jonathan David</first><last>Mutal</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <pages>176–189</pages>
       <abstract>This paper presents a study on automatic normalisation of 16th century documents written in Middle French. These documents present a large variety of wordforms which require spelling normalisation to facilitate downstream linguistic and historical studies. We frame the normalisation process as a machine translation task starting with a strong baseline leveraging a pre-trained encoder–decoder model. We propose to improve this baseline by combining synthetic data generation methods and producing artificial training data, thus tackling the lack of parallel corpora relevant to our task. The evaluation of our approach is twofold, in addition to automatic metrics relying on gold references, we evaluate our models through post-editing of their outputs. This evaluation method directly measures the productivity gain brought by our models to experts conducting the normalisation task manually. Results show a 20+ token per minute increase in productivity when using automatic normalisation compared to normalising text from scratch. The manually post-edited dataset resulting from our study is the first parallel corpus of normalised 16th century Middle French to be publicly released, along with the synthetic data and the automatic normalisation models used and trained in the presented work.</abstract>
       <url hash="e05e147f">2024.lt4hala-1.20</url>
diff --git a/data/xml/2024.ltedi.xml b/data/xml/2024.ltedi.xml
index 43701d7c1f..8180b9930e 100644
--- a/data/xml/2024.ltedi.xml
+++ b/data/xml/2024.ltedi.xml
@@ -25,7 +25,7 @@
       <title>Sociocultural knowledge is needed for selection of shots in hate speech detection tasks</title>
       <author><first>Antonis</first><last>Maronikolakis</last></author>
       <author><first>Abdullatif</first><last>Köksal</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <pages>1-13</pages>
       <abstract>We introduce HATELEXICON, a lexicon of slurs and targets of hate speech for Brazil, Germany, India and Kenya, to aid model development and interpretability. First, we demonstrate how HATELEXICON can be used to interpret model predictions, showing that models developed to classify extreme speech rely heavily on target group names. Further, we propose a culturally-informed method to aid shot selection for training in low-resource settings. In few-shot learning, shot selection is of paramount importance to model performance and we need to ensure we make the most of available data. We work with HASOC German and Hindi data for training and the Multilingual HateCheck (MHC) benchmark for evaluation. We show that selecting shots based on our lexicon leads to models performing better than models trained on shots sampled randomly. Thus, when given only a few training examples, using HATELEXICON to select shots containing more sociocultural information leads to better few-shot performance. With these two use-cases we show how our HATELEXICON can be used for more effective hate speech detection.</abstract>
       <url hash="3bea1a46">2024.ltedi-1.1</url>
@@ -105,7 +105,7 @@
       <author><first>Adhithya</first><last>Saravanan</last></author>
       <author><first>Roy</first><last>Jiang</last></author>
       <author><first>Or</first><last>Sharir</last><affiliation>California Institute of Technology</affiliation></author>
-      <author><first>Anima</first><last>Anandkumar</last><affiliation>California Institute of Technology and University of California, Irvine</affiliation></author>
+      <author id="animashree-anandkumar"><first>Anima</first><last>Anandkumar</last><affiliation>California Institute of Technology and University of California, Irvine</affiliation></author>
       <pages>73-105</pages>
       <abstract>Large Language models (LLMs), while powerful, exhibit harmful social biases. Debiasing is often challenging due to computational costs, data constraints, and potential degradation of multi-task language capabilities. This work introduces a novel approach utilizing ChatGPT to generate synthetic training data, aiming to enhance the debiasing of LLMs. We propose two strategies: Targeted Prompting, which provides effective debiasing for known biases but necessitates prior specification of bias in question; and General Prompting, which, while slightly less effective, offers debiasing across various categories. We leverage resource-efficient LLM debiasing using adapter tuning and compare the effectiveness of our synthetic data to existing debiasing datasets. Our results reveal that: (1) ChatGPT can efficiently produce high-quality training data for debiasing other LLMs; (2) data produced via our approach surpasses existing datasets in debiasing performance while also preserving internal knowledge of a pre-trained LLM; and (3) synthetic data exhibits generalizability across categories, effectively mitigating various biases, including intersectional ones. These findings underscore the potential of synthetic data in advancing the fairness of LLMs with minimal retraining cost.</abstract>
       <url hash="99cabb53">2024.ltedi-1.8</url>
@@ -143,7 +143,7 @@
       <author><first>Hosahalli</first><last>Shashirekha</last><affiliation>Mangalore University</affiliation></author>
       <author><first>Saranya</first><last>Rajiakodi</last><affiliation>Central University of Tamil Nadu</affiliation></author>
       <author><first>Miguel Ángel</first><last>García</last></author>
-      <author><first>Salud María</first><last>Jiménez-Zafra</last><affiliation>Universidad de Jaén</affiliation></author>
+      <author id="salud-maria-jimenez-zafra"><first>Salud María</first><last>Jiménez-Zafra</last><affiliation>Universidad de Jaén</affiliation></author>
       <author><first>José</first><last>García-Díaz</last></author>
       <author><first>Rafael</first><last>Valencia-García</last><affiliation>Universidad de Murcia</affiliation></author>
       <author><first>Kishore</first><last>Ponnusamy</last></author>
@@ -174,7 +174,7 @@
       <author><first>Saranya</first><last>Rajiakodi</last><affiliation>Central University of Tamil Nadu</affiliation></author>
       <author><first>Rahul</first><last>Ponnusamy</last></author>
       <author><first>Kathiravan</first><last>Pannerselvam</last><affiliation>Central University of Tamil Nadu</affiliation></author>
-      <author><first>Anand Kumar</first><last>Madasamy</last><affiliation>National Institute of Technology Karnataka</affiliation></author>
+      <author id="anand-kumar-m"><first>Anand Kumar</first><last>Madasamy</last><affiliation>National Institute of Technology Karnataka</affiliation></author>
       <author><first>Ramachandran</first><last>Rajalakshmi</last></author>
       <author><first>Hariharan</first><last>LekshmiAmmal</last><affiliation>National Institute of Technology Karnataka</affiliation></author>
       <author><first>Anshid</first><last>Kizhakkeparambil</last></author>
@@ -227,7 +227,7 @@
       <title><fixed-case>M</fixed-case>ason<fixed-case>T</fixed-case>igers@<fixed-case>LT</fixed-case>-<fixed-case>EDI</fixed-case>-2024: An Ensemble Approach Towards Detecting Homophobia and Transphobia in Social Media Comments</title>
       <author><first>Dhiman</first><last>Goswami</last><affiliation>George Mason University</affiliation></author>
       <author><first>Sadiya Sayara Chowdhury</first><last>Puspo</last><affiliation>George Mason University</affiliation></author>
-      <author><first>Md Nishat</first><last>Raihan</last></author>
+      <author id="nishat-raihan"><first>Md Nishat</first><last>Raihan</last></author>
       <author><first>Al Nahian Bin</first><last>Emran</last></author>
       <pages>164-172</pages>
       <abstract>In this paper, we describe our approaches and results for Task 2 of the LT-EDI 2024 Workshop, aimed at detecting homophobia and/or transphobia across ten languages. Our methodologies include monolingual transformers and ensemble methods, capitalizing on the strengths of each to enhance the performance of the models. The ensemble models worked well, placing our team, MasonTigers, in the top five for eight of the ten languages, as measured by the macro F1 score. Our work emphasizes the efficacy of ensemble methods in multilingual scenarios, addressing the complexities of language-specific tasks.</abstract>
diff --git a/data/xml/2024.luhme.xml b/data/xml/2024.luhme.xml
index 7fbf98a099..035df8f3f4 100644
--- a/data/xml/2024.luhme.xml
+++ b/data/xml/2024.luhme.xml
@@ -6,7 +6,7 @@
       <editor><first>Rui</first><last>Sousa-Silva</last></editor>
       <editor><first>Henrique</first><last>Lopes Cardoso</last></editor>
       <editor><first>Maarit</first><last>Koponen</last></editor>
-      <editor><first>Antonio</first><last>Pareja Lora</last></editor>
+      <editor id="antonio-pareja-lora"><first>Antonio</first><last>Pareja Lora</last></editor>
       <editor><first>Márta</first><last>Seresi</last></editor>
       <publisher>CLUP, Centro de Linguística da Universidade do Porto FLUP - Faculdade de Letras da Universidade do Porto</publisher>
       <address>Santiago de Compostela, Spain</address>
diff --git a/data/xml/2024.mathnlp.xml b/data/xml/2024.mathnlp.xml
index 74ff252d4d..af4aee60e2 100644
--- a/data/xml/2024.mathnlp.xml
+++ b/data/xml/2024.mathnlp.xml
@@ -6,7 +6,7 @@
       <editor><first>Marco</first><last>Valentino</last></editor>
       <editor><first>Deborah</first><last>Ferreira</last></editor>
       <editor><first>Mokanarangan</first><last>Thayaparan</last></editor>
-      <editor><first>Andre</first><last>Freitas</last></editor>
+      <editor id="andre-freitas"><first>Andre</first><last>Freitas</last></editor>
       <publisher>ELRA and ICCL</publisher>
       <address>Torino, Italia</address>
       <month>May</month>
@@ -23,7 +23,7 @@
       <title>An Approach to Co-reference Resolution and Formula Grounding for Mathematical Identifiers Using Large Language Models</title>
       <author><first>Aamin</first><last>Dev</last></author>
       <author><first>Takuto</first><last>Asakura</last></author>
-      <author><first>Rune</first><last>Sætre</last></author>
+      <author id="rune-saetre"><first>Rune</first><last>Sætre</last></author>
       <pages>1–10</pages>
       <abstract>This paper outlines an automated approach to annotate mathematical identifiers in scientific papers — a process historically laborious and costly. We employ state-of-the-art LLMs, including GPT-3.5 and GPT-4, and open-source alternatives to generate a dictionary for annotating mathematical identifiers, linking each identifier to its conceivable descriptions and then assigning these definitions to the respective identifier in- stances based on context. Evaluation metrics include the CoNLL score for co-reference cluster quality and semantic correctness of the annotations.</abstract>
       <url hash="46dfe997">2024.mathnlp-1.1</url>
diff --git a/data/xml/2024.ml4al.xml b/data/xml/2024.ml4al.xml
index 3c7bc95218..8085f1ba10 100644
--- a/data/xml/2024.ml4al.xml
+++ b/data/xml/2024.ml4al.xml
@@ -9,7 +9,7 @@
       <editor><first>Shai</first><last>Gordin</last></editor>
       <editor><first>Kyunghyun</first><last>Cho</last></editor>
       <editor><first>Marco</first><last>Passarotti</last></editor>
-      <editor><first>Rachele</first><last>Sprugnoli</last></editor>
+      <editor id="rachele-sprugnoli"><first>Rachele</first><last>Sprugnoli</last></editor>
       <editor><first>Yudong</first><last>Liu</last></editor>
       <editor><first>Bin</first><last>Li</last></editor>
       <editor><first>Adam</first><last>Anderson</last></editor>
@@ -113,7 +113,7 @@
     </paper>
     <paper id="8">
       <title>Lacuna Language Learning: Leveraging <fixed-case>RNN</fixed-case>s for Ranked Text Completion in Digitized <fixed-case>C</fixed-case>optic Manuscripts</title>
-      <author><first>Lauren</first><last>Levine</last><affiliation>Georgetown University</affiliation></author>
+      <author id="lauren-levine"><first>Lauren</first><last>Levine</last><affiliation>Georgetown University</affiliation></author>
       <author><first>Cindy</first><last>Li</last><affiliation>Georgetown University</affiliation></author>
       <author><first>Lydia</first><last>Bremer-McCollum</last></author>
       <author><first>Nicholas</first><last>Wagner</last><affiliation>Duke University</affiliation></author>
@@ -213,7 +213,7 @@
       <author><first>Marijke</first><last>Beersmans</last><affiliation>KU Leuven</affiliation></author>
       <author><first>Alek</first><last>Keersmaekers</last><affiliation>KU Leuven</affiliation></author>
       <author><first>Evelien</first><last>de Graaf</last><affiliation>KU Leuven</affiliation></author>
-      <author><first>Tim</first><last>Van de Cruys</last><affiliation>KU Leuven</affiliation></author>
+      <author id="tim-van-de-cruys"><first>Tim</first><last>Van de Cruys</last><affiliation>KU Leuven</affiliation></author>
       <author><first>Mark</first><last>Depauw</last><affiliation>KU Leuven</affiliation></author>
       <author><first>Margherita</first><last>Fantoli</last><affiliation>KU Leuven</affiliation></author>
       <pages>152-164</pages>
@@ -261,7 +261,7 @@
       <title><fixed-case>S</fixed-case>um<fixed-case>T</fixed-case>ablets: A Transliteration Dataset of <fixed-case>S</fixed-case>umerian Tablets</title>
       <author><first>Cole</first><last>Simmons</last></author>
       <author><first>Richard</first><last>Diehl Martinez</last><affiliation>University of Cambridge</affiliation></author>
-      <author><first>Dan</first><last>Jurafsky</last><affiliation>Stanford University</affiliation></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last><affiliation>Stanford University</affiliation></author>
       <pages>192-202</pages>
       <abstract>Sumerian transliteration is a conventional system for representing a scholar's interpretation of a tablet in the Latin script. Thanks to visionary digital Assyriology projects such as ETCSL, CDLI, and Oracc, a large number of Sumerian transliterations have been published online, and these data are well-structured for a variety of search and analysis tasks. However, the absence of a comprehensive, accessible dataset pairing transliterations with a digital representation of the tablet's cuneiform glyphs has prevented the application of modern Natural Language Processing (NLP) methods to the task of Sumerian transliteration.
 
diff --git a/data/xml/2024.moomin.xml b/data/xml/2024.moomin.xml
index 9bc2134133..984da3e4af 100644
--- a/data/xml/2024.moomin.xml
+++ b/data/xml/2024.moomin.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the 1st Workshop on Modular and Open Multilingual NLP (MOOMIN 2024)</booktitle>
       <editor><first>Raúl</first><last>Vázquez</last></editor>
       <editor><first>Timothee</first><last>Mickus</last></editor>
-      <editor><first>Jörg</first><last>Tiedemann</last></editor>
+      <editor id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></editor>
       <editor><first>Ivan</first><last>Vulić</last></editor>
       <editor><first>Ahmet</first><last>Üstün</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
diff --git a/data/xml/2024.mrl.xml b/data/xml/2024.mrl.xml
index ea4bbb9f82..7be3f18d13 100644
--- a/data/xml/2024.mrl.xml
+++ b/data/xml/2024.mrl.xml
@@ -22,7 +22,7 @@
     <paper id="1">
       <title><fixed-case>S</fixed-case>amba<fixed-case>L</fixed-case>ingo: Teaching Large Language Models New Languages</title>
       <author><first>Zoltan</first><last>Csaki</last><affiliation>Sambanova Systems</affiliation></author>
-      <author id="bo-li"><first>Bo</first><last>Li</last></author>
+      <author><first>Bo</first><last>Li</last></author>
       <author><first>Jonathan Lingjie</first><last>Li</last></author>
       <author><first>Qiantong</first><last>Xu</last><affiliation>Sambanova Systems</affiliation></author>
       <author><first>Pian</first><last>Pawakapan</last></author>
@@ -128,7 +128,7 @@
       <author><first>Pierre</first><last>Andrews</last></author>
       <author><first>Pontus</first><last>Stenetorp</last><affiliation>University College London</affiliation></author>
       <author><first>Mikel</first><last>Artetxe</last><affiliation>Reka AI</affiliation></author>
-      <author><first>Marta R.</first><last>Costa-jussà</last><affiliation>Meta</affiliation></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last><affiliation>Meta</affiliation></author>
       <pages>148-158</pages>
       <abstract>‘While machine translation (MT) systems have seen significant improvements,it is still common for translations to reflect societal biases, such as genderbias. Decoder-only language models (LLMs) have demonstrated potential in MT, albeitwith performance slightly lagging behind traditional encoder-decoder neural machinetranslation (NMT) systems. However, LLMs offer a unique advantage: the abilityto control the properties of the output through prompting. In this study, we leveragethis flexibility to explore Llama”s capability to produce gender-specific translations.Our results indicate that Llama can generate gender-specific translations withtranslation quality and gender bias comparable to NLLB, a state-of-the-art multilingualNMT system.’</abstract>
       <url hash="d3b7040f">2024.mrl-1.10</url>
@@ -246,7 +246,7 @@
       <title>Community <fixed-case>OSCAR</fixed-case>: A Community Effort for Multilingual Web Data</title>
       <author><first>Manuel</first><last>Brack</last><affiliation>German Research Center for AI and Technische Universität Darmstadt</affiliation></author>
       <author><first>Malte</first><last>Ostendorff</last><affiliation>Deutsche Telekom</affiliation></author>
-      <author><first>Pedro</first><last>Ortiz Suarez</last><affiliation>Common Crawl Foundation</affiliation></author>
+      <author id="pedro-ortiz-suarez"><first>Pedro</first><last>Ortiz Suarez</last><affiliation>Common Crawl Foundation</affiliation></author>
       <author><first>José Javier</first><last>Saiz</last><affiliation>Barcelona Supercomputing Center</affiliation></author>
       <author><first>Iñaki Lacunza</first><last>Castilla</last><affiliation>Barcelona Supercomputing Center</affiliation></author>
       <author><first>Jorge</first><last>Palomar-Giner</last><affiliation>Barcelona Supercomputing Center</affiliation></author>
@@ -300,7 +300,7 @@
       <title>Language Bias in Multilingual Information Retrieval: The Nature of the Beast and Mitigation Methods</title>
       <author><first>Jinrui</first><last>Yang</last><affiliation>The University of Melbourne</affiliation></author>
       <author><first>Fan</first><last>Jiang</last></author>
-      <author><first>Timothy</first><last>Baldwin</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and The University of Melbourne</affiliation></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and The University of Melbourne</affiliation></author>
       <pages>280-292</pages>
       <abstract>Language fairness in multilingual information retrieval (MLIR) systems is crucial for ensuring equitable access to information across diverse languages. This paper sheds light on the issue, based on the assumption that queries in different languages, but with identical semantics, should yield equivalent ranking lists when retrieving on the same multilingual documents. We evaluate the degree of fairness using both traditional retrieval methods, and a DPR neural ranker based on mBERT and XLM-R. Additionally, we introduce ‘LaKDA’, a novel loss designed to mitigate language biases in neural MLIR approaches. Our analysis exposes intrinsic language biases in current MLIR technologies, with notable disparities across the retrieval methods, and the effectiveness of LaKDA in enhancing language fairness.</abstract>
       <url hash="d828a1c5">2024.mrl-1.23</url>
@@ -359,7 +359,7 @@ An Attempt towards Generalized Retriever for In-Context Learning</title>
       <author><first>Senyu</first><last>Li</last></author>
       <author><first>Hao</first><last>Yu</last></author>
       <author><first>Jessica</first><last>Ojo</last><affiliation>Lelapa AI</affiliation></author>
-      <author><first>David Ifeoluwa</first><last>Adelani</last></author>
+      <author id="david-ifeoluwa-adelani"><first>David Ifeoluwa</first><last>Adelani</last></author>
       <pages>346-356</pages>
       <abstract>We present our systems for the three tasks and five languages included in the MRL 2024 Shared Task on Multilingual Multi-task Information Retrieval: (1) Named Entity Recognition, (2) Free-form Question Answering, and (3) Multiple-choice Question Answering. For each task, we explored the impact of selecting different multilingual language models for fine-tuning across various target languages, and implemented an ensemble system that generates final outputs based on predictions from multiple fine-tuned models. All models are large language models fine-tuned on task-specific data. Our experimental results show that a more balanced dataset would yield better results. However, when training data for certain languages are scarce, fine-tuning on a large amount of English data supplemented by a small amount of “triggering data” in the target language can produce decent results.</abstract>
       <url hash="8e0685c1">2024.mrl-1.28</url>
@@ -371,7 +371,7 @@ An Attempt towards Generalized Retriever for In-Context Learning</title>
       <author><first>Katharina</first><last>Hämmerl</last></author>
       <author><first>Andrei-Alexandru</first><last>Manea</last></author>
       <author><first>Gianluca</first><last>Vico</last><affiliation>Charles University Prague</affiliation></author>
-      <author><first>Jindřich</first><last>Helcl</last><affiliation>Charles University</affiliation></author>
+      <author id="jindrich-helcl"><first>Jindřich</first><last>Helcl</last><affiliation>Charles University</affiliation></author>
       <author><first>Jindřich</first><last>Libovický</last><affiliation>Charles University Prague</affiliation></author>
       <pages>357-364</pages>
       <abstract>We present the joint CUNI and LMU submission to the MRL 2024 Shared Task on Multi-lingual Multi-task Information Retrieval.The shared task objective was to explore how we can deploy modern methods in NLP in multi-lingual low-resource settings, tested on two sub-tasks: Named-entity recognition and question answering.Our solutions to the subtasks are based on data acquisition and model adaptation.We compare the performance of our submitted systems with the translate-test approachwhich proved to be the most useful in the previous edition of the shared task.Our results show that using more data as well as fine-tuning recent multilingual pre-trained models leads to considerable improvements over the translate-test baseline.Our code is available at https://github.com/ufal/mrl2024-multilingual-ir-shared-task.</abstract>
@@ -395,7 +395,7 @@ An Attempt towards Generalized Retriever for In-Context Learning</title>
       <author><first>Anar</first><last>Rzayev</last><affiliation>KAIST</affiliation></author>
       <author><first>Jafar</first><last>Isbarov</last><affiliation>George Washington University and ADA University</affiliation></author>
       <author><first>Dursun</first><last>Dashdamirov</last><affiliation>George Washington University and ADA University</affiliation></author>
-      <author><first>David</first><last>Adelani</last><affiliation>McGill University</affiliation></author>
+      <author id="david-ifeoluwa-adelani"><first>David</first><last>Adelani</last><affiliation>McGill University</affiliation></author>
       <author><first>Duygu</first><last>Ataman</last><affiliation>New York University</affiliation></author>
       <pages>365-376</pages>
       <abstract>Large language models (LLMs) demonstrate exceptional proficiency in both the comprehension and generation of textual data, particularly in English, a language for which extensive public benchmarks have been established across a wide range of natural language processing (NLP) tasks. Nonetheless, their performance in multilingual contexts and specialized domains remains less rigorously validated, raising questions about their reliability and generalizability across linguistically diverse and domain-specific settings. The second edition of the Shared Task on Multilingual Multitask Information Retrieval aims to provide a comprehensive and inclusive multilingual evaluation benchmark which aids assessing the ability of multilingual LLMs to capture logical, factual, or causal relationships within lengthy text contexts and generate language under sparse settings, particularly in scenarios with under-resourced languages. The shared task consists of two subtasks crucial to information retrieval: Named entity recognition (NER) and reading comprehension (RC), in 7 data-scarce languages: Azerbaijani, Swiss German, Turkish and , which previously lacked annotated resources in information retrieval tasks. This year specifally focus on the multiple-choice question answering evaluation setting which provides a more objective setting for comparing different methods across languages.</abstract>
diff --git a/data/xml/2024.mwe.xml b/data/xml/2024.mwe.xml
index 43ca32bf66..b2e361e56b 100644
--- a/data/xml/2024.mwe.xml
+++ b/data/xml/2024.mwe.xml
@@ -8,7 +8,7 @@
       <editor><first>A. Seza</first><last>Doğruöz</last></editor>
       <editor><first>Kilian</first><last>Evang</last></editor>
       <editor><first>Marcos</first><last>Garcia</last></editor>
-      <editor><first>Voula</first><last>Giouli</last></editor>
+      <editor id="voula-giouli"><first>Voula</first><last>Giouli</last></editor>
       <editor><first>Lifeng</first><last>Han</last></editor>
       <editor><first>Joakim</first><last>Nivre</last></editor>
       <editor><first>Alexandre</first><last>Rademaker</last></editor>
@@ -149,7 +149,7 @@
       <author><first>Cherifa</first><last>Ben Khelil</last></author>
       <author><first>Jean-Yves</first><last>Antoine</last></author>
       <author><first>Iskandar</first><last>Keskes</last></author>
-      <author><first>Lamia</first><last>Hadrich-Belguith</last></author>
+      <author id="lamia-hadrich-belguith"><first>Lamia</first><last>Hadrich-Belguith</last></author>
       <pages>88–97</pages>
       <abstract>This paper highlights the importance of integrating MWE identification with the development of syntactic MWE lexicons. It suggests that lexicons with minimal morphosyntactic information can amplify current MWE-annotated datasets and refine identification strategies. To our knowledge, this work represents the first attempt to focus on both seen and unseen of VMWEs for Arabic. It also deals with the challenge of differentiating between literal and figurative interpretations of idiomatic expressions. The approach involves a dual-phase procedure: first projecting a VMWE lexicon onto a corpus to identify candidate occurrences, then disambiguating these occurrences to distinguish idiomatic from literal instances. Experiments outlined in the paper aim to assess the efficacy of this technique, utilizing a lexicon known as LEXAR and the “parseme-ar” corpus. The findings suggest that lexicon-driven strategies have the potential to refine MWE identification, particularly for unseen occurrences.</abstract>
       <url hash="c1331bf3">2024.mwe-1.13</url>
@@ -167,7 +167,7 @@
     <paper id="15">
       <title>Towards the semantic annotation of <fixed-case>SR</fixed-case>-<fixed-case>ELEXIS</fixed-case> corpus: Insights into Multiword Expressions and Named Entities</title>
       <author><first>Cvetana</first><last>Krstev</last></author>
-      <author><first>Ranka</first><last>Stanković</last></author>
+      <author id="ranka-stankovic"><first>Ranka</first><last>Stanković</last></author>
       <author><first>Aleksandra M.</first><last>Marković</last></author>
       <author><first>Teodora Sofija</first><last>Mihajlov</last></author>
       <pages>106–114</pages>
@@ -189,7 +189,7 @@
       <title>Universal Feature-based Morphological Trees</title>
       <author><first>Federica</first><last>Gamba</last></author>
       <author><first>Abishek</first><last>Stephen</last></author>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
       <pages>125–137</pages>
       <abstract>The paper proposes a novel data representation inspired by Universal Dependencies (UD) syntactic trees, which are extended to capture the internal morphological structure of word forms. As a result, morphological segmentation is incorporated within the UD representation of syntactic dependencies. To derive the proposed data structure we leverage existing annotation of UD treebanks as well as available resources for segmentation, and we select 10 languages to work with in the presented case study. Additionally, statistical analysis reveals a robust correlation between morphs and sets of morphological features of words. We thus align the morphs to the observed feature inventories capturing the morphological meaning of morphs. Through the beneficial exploitation of cross-lingual correspondence of morphs, the proposed syntactic representation based on morphological segmentation proves to enhance the comparability of sentence structures across languages.</abstract>
       <url hash="70a01a21">2024.mwe-1.17</url>
@@ -208,16 +208,16 @@
     </paper>
     <paper id="19">
       <title>Multiword Expressions between the Corpus and the Lexicon: Universality, Idiosyncrasy, and the Lexicon-Corpus Interface</title>
-      <author><first>Verginica</first><last>Barbu Mititelu</last></author>
+      <author id="verginica-barbu-mititelu"><first>Verginica</first><last>Barbu Mititelu</last></author>
       <author><first>Voula</first><last>Giouli</last></author>
       <author><first>Kilian</first><last>Evang</last></author>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <author><first>Petya</first><last>Osenova</last></author>
       <author><first>Carole</first><last>Tiberius</last></author>
       <author><first>Simon</first><last>Krek</last></author>
-      <author><first>Stella</first><last>Markantonatou</last></author>
+      <author id="stella-markantonatou"><first>Stella</first><last>Markantonatou</last></author>
       <author><first>Ivelina</first><last>Stoyanova</last></author>
-      <author><first>Ranka</first><last>Stanković</last></author>
+      <author id="ranka-stankovic"><first>Ranka</first><last>Stanković</last></author>
       <author><first>Christian</first><last>Chiarcos</last></author>
       <pages>147–153</pages>
       <abstract>We present ongoing work towards defining a lexicon-corpus interface to serve as a benchmark in the representation of multiword expressions (of various parts of speech) in dedicated lexica and the linking of these entries to their corpus occurrences. The final aim is the harnessing of such resources for the automatic identification of multiword expressions in a text. The involvement of several natural languages aims at the universality of a solution not centered on a particular language, and also accommodating idiosyncrasies. Challenges in the lexicographic description of multiword expressions are discussed, the current status of lexica dedicated to this linguistic phenomenon is outlined, as well as the solution we envisage for creating an ecosystem of interlinked lexica and corpora containing and, respectively, annotated with multiword expressions.</abstract>
@@ -237,7 +237,7 @@
     <paper id="21">
       <title>Light Verb Constructions in <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies for <fixed-case>S</fixed-case>outh <fixed-case>A</fixed-case>sian Languages</title>
       <author><first>Abishek</first><last>Stephen</last></author>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <pages>163–177</pages>
       <abstract>We conduct a morphosyntactic investigation into the light verb constructions (LVCs) or the verbo-nominal predicates in South Asian languages. This work spans the Indo-Aryan and Dravidian language families in treebanks based on Universal Dependencies (UD). For the selected languages we show how well the existing annotation guidelines fare for the LVCs. We also reiterate the importance of the core and oblique distinction in UD and how informative it is for making accurate morphosyntactic annotation judgments for such predicates.</abstract>
       <url hash="3b28ad6e">2024.mwe-1.21</url>
@@ -258,9 +258,9 @@
     <paper id="23">
       <title><fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies for <fixed-case>S</fixed-case>araiki</title>
       <author><first>Meesum</first><last>Alam</last></author>
-      <author><first>Francis</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last></author>
       <author><first>Emily</first><last>Hanink</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <pages>188–197</pages>
       <abstract>We present the first treebank of the Saraiki/Siraiki [ISO 639-3 skr] language, using the Universal Dependency annotation scheme (de Marneffe et al., 2021). The treebank currently comprises 587 annotated sentences and 7597 tokens. We explain the most relevant syntactic and morphological features of Saraiki, along with the decision we have made for a range of language specific constructions, namely compounds, verbal structures including light verb and serial verb constructions, and relative clauses.</abstract>
       <url hash="1026f1dc">2024.mwe-1.23</url>
@@ -270,7 +270,7 @@
       <title>Domain-Weighted Batch Sampling for Neural Dependency Parsing</title>
       <author><first>Jacob</first><last>Striebel</last></author>
       <author><first>Daniel</first><last>Dakota</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <pages>198–206</pages>
       <abstract>In neural dependency parsing, as well as in the broader field of NLP, domain adaptation remains a challenging problem. When adapting a parser to a target domain, there is a fundamental tension between the need to make use of out-of-domain data and the need to ensure that syntactic characteristic of the target domain are learned. In this work we explore a way to balance these two competing concerns, namely using domain-weighted batch sampling, which allows us to use all available training data, while controlling the probability of sampling in- and out-of-domain data when constructing training batches. We conduct experiments using ten natural language domains and find that domain-weighted batch sampling yields substantial performance improvements in all ten domains compared to a baseline of conventional randomized batch sampling.</abstract>
       <url hash="8cf4b706">2024.mwe-1.24</url>
@@ -278,7 +278,7 @@
     </paper>
     <paper id="25">
       <title>Strategies for the Annotation of Pronominalised Locatives in <fixed-case>T</fixed-case>urkic <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependency Treebanks</title>
-      <author><first>Jonathan</first><last>Washington</last></author>
+      <author id="jonathan-washington"><first>Jonathan</first><last>Washington</last></author>
       <author><first>Çağrı</first><last>Çöltekin</last></author>
       <author><first>Furkan</first><last>Akkurt</last></author>
       <author><first>Bermet</first><last>Chontaeva</last></author>
diff --git a/data/xml/2024.naacl.xml b/data/xml/2024.naacl.xml
index 9598f8aa73..b6ba5ce7f1 100644
--- a/data/xml/2024.naacl.xml
+++ b/data/xml/2024.naacl.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)</booktitle>
       <editor><first>Kevin</first><last>Duh</last></editor>
-      <editor><first>Helena</first><last>Gomez</last></editor>
+      <editor id="helena-gomez"><first>Helena</first><last>Gomez</last></editor>
       <editor><first>Steven</first><last>Bethard</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Mexico City, Mexico</address>
@@ -33,7 +33,7 @@
     <paper id="2">
       <title>Text Diffusion Model with Encoder-Decoder Transformers for Sequence-to-Sequence Generation</title>
       <author><first>Hongyi</first><last>Yuan</last></author>
-      <author id="zheng-yuan"><first>Zheng</first><last>Yuan</last><affiliation>Alibaba Group</affiliation></author>
+      <author><first>Zheng</first><last>Yuan</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Chuanqi</first><last>Tan</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Fei</first><last>Huang</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Songfang</first><last>Huang</last><affiliation>Alibaba Group</affiliation></author>
@@ -58,7 +58,7 @@
       <title>Assessing Logical Puzzle Solving in Large Language Models: Insights from a Minesweeper Case Study</title>
       <author><first>Yinghao</first><last>Li</last></author>
       <author><first>Haorui</first><last>Wang</last><affiliation>Georgia Institute of Technology</affiliation></author>
-      <author><first>Chao</first><last>Zhang</last><affiliation>Georgia Institute of Technology</affiliation></author>
+      <author id="chao-zhang-tu"><first>Chao</first><last>Zhang</last><affiliation>Georgia Institute of Technology</affiliation></author>
       <pages>59-81</pages>
       <abstract>Large Language Models (LLMs) have shown remarkable proficiency in language understanding and have been successfully applied to a variety of real-world tasks through task-specific fine-tuning or prompt engineering. Despite these advancements, it remains an open question whether LLMs are fundamentally capable of reasoning and planning, or if they primarily rely on recalling and synthesizing information from their training data. In our research, we introduce a novel task—Minesweeper—specifically designed in a format unfamiliar to LLMs and absent from their training datasets. This task challenges LLMs to identify the locations of mines based on numerical clues provided by adjacent opened cells. Successfully completing this task requires an understanding of each cell’s state, discerning spatial relationships between the clues and mines, and strategizing actions based on logical deductions drawn from the arrangement of the cells. Our experiments, including trials with the advanced GPT-4 model, indicate that while LLMs possess the foundational abilities required for this task, they struggle to integrate these into a coherent, multi-step logical reasoning process needed to solve Minesweeper. These findings highlight the need for further research to understand the nature of reasoning capabilities in LLMs under similar circumstances, and to explore pathways towards more sophisticated AI reasoning and planning models.</abstract>
       <url hash="62f62d69">2024.naacl-long.4</url>
@@ -85,7 +85,7 @@
       <author><first>Jianpeng</first><last>Cheng</last></author>
       <author><first>Joris</first><last>Driesen</last><affiliation>Apple</affiliation></author>
       <author><first>Alexandru</first><last>Coca</last></author>
-      <author><first>Anders</first><last>Johannsen</last></author>
+      <author id="anders-johannsen"><first>Anders</first><last>Johannsen</last></author>
       <pages>96-111</pages>
       <abstract>Few-shot dialogue state tracking (DST) with Large Language Models (LLM) relies on an effective and efficient conversation retriever to find similar in-context examples for prompt learning. Previous works use raw dialogue context as search keys and queries, and a retriever is fine-tuned with annotated dialogues to achieve superior performance. However, the approach is less suited for scaling to new domains or new annotation languages, where fine-tuning data is unavailable. To address this problem, we handle the task of conversation retrieval based on text summaries of the conversations.A LLM-based conversation summarizer is adopted for query and key generation, which enables effective maximum inner product search. To avoid the extra inference cost brought by LLM-based conversation summarization, we further distill a light-weight conversation encoder which produces query embeddings without decoding summaries for test conversations. We validate our retrieval approach on MultiWOZ datasets with GPT-Neo-2.7B and LLaMA-7B/30B. The experimental results show a significant improvement over relevant baselines in real few-shot DST settings.</abstract>
       <url hash="e58ef8c0">2024.naacl-long.6</url>
@@ -108,7 +108,7 @@
     <paper id="8">
       <title>On Linearizing Structured Data in Encoder-Decoder Language Models: Insights from Text-to-<fixed-case>SQL</fixed-case></title>
       <author><first>Yutong</first><last>Shao</last><affiliation>University of California, San Diego</affiliation></author>
-      <author><first>Ndapa</first><last>Nakashole</last><affiliation>University of California, San Diego</affiliation></author>
+      <author id="ndapandula-nakashole"><first>Ndapa</first><last>Nakashole</last><affiliation>University of California, San Diego</affiliation></author>
       <pages>131-156</pages>
       <abstract>Structured data, prevalent in tables, databases, and knowledge graphs, poses a significant challenge in its representation. With the advent of large language models (LLMs), there has been a shift towards linearization-based methods, which process structured data as sequential token streams, diverging from approaches that explicitly model structure, often as a graph. Crucially, there remains a gap in our understanding of how these linearization-based methods handle structured data, which is inherently non-linear.This work investigates the linear handling of structured data in encoder-decoder language models, specifically T5. Our findings reveal the model’s ability to mimic human-designed processes such as schema linking and syntax prediction, indicating a deep, meaningful learning of structure beyond simple token sequencing. We also uncover insights into the model’s internal mechanisms, including the ego-centric nature of structure node encodings and the potential for model compression due to modality fusion redundancy. Overall, this work sheds light on the inner workings of linearization-based methods and could potentially provide guidance for future research.</abstract>
       <url hash="9e6322d7">2024.naacl-long.8</url>
@@ -119,7 +119,7 @@
     <paper id="9">
       <title>Extractive Summarization with Text Generator</title>
       <author><first>Thang</first><last>Le</last><affiliation>VinAI Research</affiliation></author>
-      <author><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
       <pages>157-174</pages>
       <abstract>Standard extractive systems suffer from the lack of gold training signals since existing corpora solely provide document and human-written summary pairs while disregarding extractive labels. As a result, existing methods resort to imperfect pseudo-labels that are both biased and error-prone, thereby hindering the learning process of extractive models. In contrast, text generators which are commonly employed in abstractive summarization can effortlessly overcome this predicament on account of flexible sequence-to-sequence architectures. Motivated to bypass this inherent limitation, we investigate the possibility of conducting extractive summarization with text generators. Through extensive experiments covering six summarization benchmarks, we show that high-quality extractive summaries can be assembled via approximating the outputs (abstractive summaries) of these generators. Moreover, we find that the approximate summaries correlate positively with the auxiliary summaries (i.e. a better generator enables the production of better extractive summaries). Our results signify a new paradigm for training extractive summarizers i.e. learning with generation (abstractive) objectives rather than extractive schemes.</abstract>
       <url hash="8678f8cc">2024.naacl-long.9</url>
@@ -158,8 +158,8 @@
       <author><first>Salvatore</first><last>Giorgi</last><affiliation>University of Pennsylvania</affiliation></author>
       <author><first>Sunny</first><last>Rai</last><affiliation>School of Engineering and Applied Science, University of Pennsylvania</affiliation></author>
       <author><first>Thomas</first><last>Talhelm</last><affiliation>University of Chicago</affiliation></author>
-      <author><first>Sharath Chandra</first><last>Guntuku</last><affiliation>University of Pennsylvania</affiliation></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="sharath-chandra-guntuku"><first>Sharath Chandra</first><last>Guntuku</last><affiliation>University of Pennsylvania</affiliation></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <pages>211-226</pages>
       <abstract>Cultural variation exists between nations (e.g., the United States vs. China), but also within regions (e.g., California vs. Texas, Los Angeles vs. San Francisco). Measuring this regional cultural variation can illuminate how and why people think and behave differently. Historically, it has been difficult to computationally model cultural variation due to a lack of training data and scalability constraints. In this work, we introduce a new research problem for the NLP community: How do we measure variation in cultural constructs across regions using language? We then provide a scalable solution: building knowledge-guided lexica to model cultural variation, encouraging future work at the intersection of NLP and cultural understanding. We also highlight modern LLMs’ failure to measure cultural variation or generate culturally varied language.</abstract>
       <url hash="06cf6fe2">2024.naacl-long.12</url>
@@ -242,7 +242,7 @@
       <author><first>Yifan</first><last>Xu</last></author>
       <author><first>Hanwen</first><last>Zha</last><affiliation>Facebook</affiliation></author>
       <author><first>Yue</first><last>Liu</last></author>
-      <author><first>Xin Luna</first><last>Dong</last><affiliation>Department of Computer Science, University of Washington and Amazon</affiliation></author>
+      <author id="xin-luna-dong"><first>Xin Luna</first><last>Dong</last><affiliation>Department of Computer Science, University of Washington and Amazon</affiliation></author>
       <pages>311-325</pages>
       <abstract>Since the recent prosperity of Large Language Models (LLMs), there have been interleaved discussions regarding how to reduce hallucinations from LLM responses, how to increase the factuality of LLMs, and whether Knowledge Graphs (KGs), which store the world knowledge in a symbolic form, will be replaced with LLMs. In this paper, we try to answer these questions from a new angle: How knowledgeable are LLMs?To answer this question, we constructed Head-to-Tail, a benchmark that consists of 18K question-answer (QA) pairs regarding head, torso, and tail facts in terms of popularity. We designed an automated evaluation method and a set of metrics that closely approximate the knowledge an LLM confidently internalizes. Through a comprehensive evaluation of 16 publicly available LLMs, we show that existing LLMs are still far from being perfect in terms of their grasp of factual knowledge, especially for facts of torso-to-tail entities.</abstract>
       <url hash="b20e6720">2024.naacl-long.18</url>
@@ -261,7 +261,7 @@
       <author><first>Jiangshu</first><last>Du</last><affiliation>University of Illinois at Chicago</affiliation></author>
       <author><first>Shuaiqi</first><last>Liu</last></author>
       <author><first>Yunlong</first><last>Xu</last></author>
-      <author><first>Philip</first><last>Yu</last><affiliation>University of Illinois, Chicago</affiliation></author>
+      <author id="philip-s-yu"><first>Philip</first><last>Yu</last><affiliation>University of Illinois, Chicago</affiliation></author>
       <pages>326-337</pages>
       <abstract>Task-Oriented Parsing (TOP) enables conversational assistants to interpret user commands expressed in natural language, transforming them into structured outputs that combine elements of both natural language and intent/slot tags. Recently, Large Language Models (LLMs) have achieved impressive performance in synthesizing computer programs based on a natural-language prompt, mitigating the gap between natural language and structured programs. Our paper focuses on harnessing the capabilities of LLMs for semantic parsing tasks, addressing the following three key research questions: 1) How can LLMs be effectively utilized for semantic parsing tasks? 2) What defines an effective prompt? and 3) How can LLM overcome the length constraint and streamline prompt design by including all examples as prompts? We introduce k Nearest Neighbor In-Context Learning (kNN-ICL), which simplifies prompt engineering by allowing it to be built on top of any design strategy while providing access to all demo examples. Extensive experiments show that: 1) Simple ICL without kNN search can achieve a comparable performance with strong supervised models on the TOP tasks, and 2) kNN-ICL significantly improves the comprehension of complex requests by seamlessly integrating ICL with a nearest-neighbor approach. Notably, this enhancement is achieved without the need for additional data or specialized prompts.</abstract>
       <url hash="83e4830a">2024.naacl-long.19</url>
@@ -302,8 +302,8 @@
       <author><first>Xin</first><last>Huang</last></author>
       <author><first>Fangkai</first><last>Jiao</last></author>
       <author><first>Yang</first><last>Ding</last><affiliation>, A*STAR</affiliation></author>
-      <author><first>AiTi</first><last>Aw</last><affiliation>I2R</affiliation></author>
-      <author><first>Nancy</first><last>Chen</last></author>
+      <author id="aiti-aw"><first>AiTi</first><last>Aw</last><affiliation>I2R</affiliation></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last></author>
       <pages>370-390</pages>
       <abstract>We present SeaEval, a benchmark for multilingual foundation models. In addition to characterizing how these models understand and reason with natural language, we also investigate how well they comprehend cultural practices, nuances, and values. Alongside standard accuracy metrics, we investigate the brittleness of foundation models in the dimensions of semantics and multilinguality. Our analyses span both open-sourced and closed models, leading to empirical results across classic NLP tasks, reasoning, and cultural comprehension. Key findings indicate (1) Many models exhibit varied behavior when given paraphrased instructions. (2) Many models still suffer from exposure bias (e.g., positional bias, majority label bias). (3) For questions rooted in factual, scientific, and commonsense knowledge, consistent responses are expected across multilingual queries that are semantically equivalent. Yet, most models surprisingly demonstrate inconsistent performance on these queries. (4) Multilingually-trained models have not attained “balanced multilingual” capabilities. Our endeavors underscore the need for more generalizable semantic representations and enhanced multilingual contextualization. SeaEval can serve as a launchpad for more thorough investigations and evaluations for multilingual and multicultural scenarios.</abstract>
       <url hash="554adec3">2024.naacl-long.22</url>
@@ -428,9 +428,9 @@
       <title>Embrace Divergence for Richer Insights: A Multi-document Summarization Benchmark and a Case Study on Summarizing Diverse Information from News Articles</title>
       <author><first>Kung-Hsiang</first><last>Huang</last><affiliation>SalesForce.com</affiliation></author>
       <author><first>Philippe</first><last>Laban</last></author>
-      <author><first>Alexander</first><last>Fabbri</last><affiliation>SalesForce.com</affiliation></author>
-      <author><first>Prafulla Kumar</first><last>Choubey</last><affiliation>SalesForce.com</affiliation></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>SalesForce.com and Nanyang Technological University</affiliation></author>
+      <author id="alexander-richard-fabbri"><first>Alexander</first><last>Fabbri</last><affiliation>SalesForce.com</affiliation></author>
+      <author id="prafulla-kumar-choubey"><first>Prafulla Kumar</first><last>Choubey</last><affiliation>SalesForce.com</affiliation></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>SalesForce.com and Nanyang Technological University</affiliation></author>
       <author><first>Caiming</first><last>Xiong</last><affiliation>Salesforce Research</affiliation></author>
       <author><first>Chien-Sheng</first><last>Wu</last><affiliation>Salesforce AI</affiliation></author>
       <pages>570-593</pages>
@@ -482,7 +482,7 @@
     <paper id="36">
       <title><fixed-case>R</fixed-case>-Spin: Efficient Speaker and Noise-invariant Representation Learning with Acoustic Pieces</title>
       <author><first>Heng-Jui</first><last>Chang</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
-      <author><first>James</first><last>Glass</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
       <pages>642-662</pages>
       <abstract>This paper introduces Robust Spin (R-Spin), a data-efficient domain-specific self-supervision method for speaker and noise-invariant speech representations by learning discrete acoustic units with speaker-invariant clustering (Spin). R-Spin resolves Spin’s issues and enhances content representations by learning to predict acoustic pieces. R-Spin offers a 12X reduction in computational resources compared to previous state-of-the-art methods while outperforming them in severely distorted speech scenarios. This paper provides detailed analyses to show how discrete units contribute to speech encoder training and improving robustness in diverse acoustic environments.</abstract>
       <url hash="74e71830">2024.naacl-long.36</url>
@@ -560,7 +560,7 @@
       <author><first>Cong-Duy</first><last>Nguyen</last><affiliation>School of Computer Science and Engineering, Nanyang Technological University</affiliation></author>
       <author><first>Thong</first><last>Nguyen</last></author>
       <author><first>Xiaobao</first><last>Wu</last><affiliation>Nanyang Technological University</affiliation></author>
-      <author><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
       <pages>733-749</pages>
       <abstract>Previous work on multimodal sentence embedding has proposed multimodal contrastive learning and achieved promising results. However, by taking the rest of the batch as negative samples without reviewing when forming contrastive pairs, those studies encountered many suspicious and noisy negative examples, significantly affecting the methods’ overall performance. In this work, we propose KDMCSE (Knowledge Distillation Multimodal contrastive learning of Sentence Embeddings), a novel approach that enhances the discrimination and generalizability of multimodal representation and inherits the knowledge from the teacher model to learn the difference between positive and negative instances and via that, can detect noisy and wrong negative samples effectively before they are calculated in the contrastive objective. Furthermore, to overcome the limitation of modeling the variation within negative pairs, we introduce a new contrastive objective, AdapACSE (Adaptive Angular Margin Supervised Contrastive Learning for Multimodal sentence embeddings), that enhances the discriminative representation by strengthening the margin within the angular space while capturing varying semantics within the negative. Experimental results on widely used Semantic Textual Similarity (STS) benchmarks demonstrate the effectiveness of our approach.</abstract>
       <url hash="b97d3d03">2024.naacl-long.42</url>
@@ -704,8 +704,8 @@
       <author><first>Zhiyang</first><last>Teng</last></author>
       <author><first>Bosheng</first><last>Ding</last></author>
       <author><first>Zhengyuan</first><last>Liu</last><affiliation>I2R</affiliation></author>
-      <author><first>Nancy</first><last>Chen</last></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>SalesForce.com and Nanyang Technological University</affiliation></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>SalesForce.com and Nanyang Technological University</affiliation></author>
       <pages>926-941</pages>
       <abstract>Traditional attempts to enhance the logical reasoning abilities of language models often rely on supervised fine-tuning, limiting their generalization to new tasks or domains. Large Language Models (LLMs), with their capacity to condense vast knowledge, can effectively tackle many tasks. Yet, our experiments reveal a gap in their performance on logical reasoning benchmarks when compared to state-of-the-art fine-tuning based models. To bridge this gap, we present LogicLLM, a first-of-its-kind, fully self-supervised framework for integrating logical reasoning capabilities into LLMs, and activating them via in-context learning. We apply this to two LLM series, FLAN-T5 and LLaMA, with parameter sizes from 3 billion to 33 billion. LogicLLM demonstrates its effectiveness through successful improvements on two logical reasoning benchmarks (ReClor and LogiQA-v2). Additionally, LogicLLM based on FLAN-T5-11B attains comparable results to ChatGPT, and evaluations with LLaMA-based models on three language understanding benchmarks (RACE, MMLU and Big-Bench-Hard) confirm that the improvements come without compromising the model’s general language understanding capabilities.</abstract>
       <url hash="a94d4d3f">2024.naacl-long.53</url>
@@ -820,10 +820,10 @@
     </paper>
     <paper id="61">
       <title>“One-Size-Fits-All”? Examining Expectations around What Constitute “Fair” or “Good” <fixed-case>NLG</fixed-case> System Behaviors</title>
-      <author><first>Li</first><last>Lucy</last><affiliation>Allen Institute for Artificial Intelligence and University of California Berkeley</affiliation></author>
+      <author id="li-lucy"><first>Li</first><last>Lucy</last><affiliation>Allen Institute for Artificial Intelligence and University of California Berkeley</affiliation></author>
       <author><first>Su Lin</first><last>Blodgett</last><affiliation>Microsoft</affiliation></author>
       <author><first>Milad</first><last>Shokouhi</last><affiliation>Microsoft</affiliation></author>
-      <author><first>Hanna</first><last>Wallach</last><affiliation>Microsoft</affiliation></author>
+      <author id="hanna-wallach"><first>Hanna</first><last>Wallach</last><affiliation>Microsoft</affiliation></author>
       <author><first>Alexandra</first><last>Olteanu</last><affiliation>Research, Microsoft</affiliation></author>
       <pages>1054-1089</pages>
       <abstract>Fairness-related assumptions about what constitute appropriate NLG system behaviors range from invariance, where systems are expected to behave identically for social groups, to adaptation, where behaviors should instead vary across them. To illuminate tensions around invariance and adaptation, we conduct five case studies, in which we perturb different types of identity-related language features (names, roles, locations, dialect, and style) in NLG system inputs. Through these cases studies, we examine people’s expectations of system behaviors, and surface potential caveats of these contrasting yet commonly held assumptions. We find that motivations for adaptation include social norms, cultural differences, feature-specific information, and accommodation; in contrast, motivations for invariance include perspectives that favor prescriptivism, view adaptation as unnecessary or too difficult for NLG systems to do appropriately, and are wary of false assumptions. Our findings highlight open challenges around what constitute “fair” or “good” NLG system behaviors.</abstract>
@@ -1011,7 +1011,7 @@
       <author><first>Haitian</first><last>Sun</last><affiliation>School of Computer Science, Carnegie Mellon University</affiliation></author>
       <author><first>Jai</first><last>Gupta</last><affiliation>Google</affiliation></author>
       <author><first>Jonathan</first><last>Berant</last><affiliation>Google and Tel Aviv University</affiliation></author>
-      <author><first>William</first><last>Cohen</last><affiliation>Google DeepMind</affiliation></author>
+      <author id="william-cohen"><first>William</first><last>Cohen</last><affiliation>Google DeepMind</affiliation></author>
       <author><first>Donald</first><last>Metzler</last><affiliation>Google</affiliation></author>
       <pages>1363-1381</pages>
       <abstract>Recently proposed long-form question answering (QA) systems, supported by large language models (LLMs), have shown promising capabilities. Yet, attributing and verifying their generated abstractive answers can be difficult, and automatically evaluating their accuracy remains an ongoing challenge.In this work, we introduce a new QA task for answering multi-answer questions by summarizing multiple diverse sources in a semi-extractive fashion. Specifically, Semi-extractive Multi-source QA (SEMQA) requires models to output a comprehensive answer, while mixing factual quoted spans—copied verbatim from given input sources—and non-factual free-text connectors that glue these spans together into a single cohesive passage. This setting bridges the gap between the outputs of well-grounded but constrained extractive QA systems and more fluent but harder to attribute fully abstractive answers. Particularly, it enables a new mode for language models that leverages their advanced language generation capabilities, while also producing fine in-line attributions by-design that are easy to verify, interpret, and evaluate. To study this task, we create the first dataset of this kind, QuoteSum, with human-written semi-extractive answers to natural and generated questions, and define text-based evaluation metrics. Experimenting with several LLMs in various settings, we find this task to be surprisingly challenging, demonstrating the importance of QuoteSum for developing and studying such consolidation capabilities.</abstract>
@@ -1034,7 +1034,7 @@
     <paper id="76">
       <title>A <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies Treebank for <fixed-case>H</fixed-case>ighland <fixed-case>P</fixed-case>uebla <fixed-case>N</fixed-case>ahuatl</title>
       <author><first>Robert</first><last>Pugh</last></author>
-      <author><first>Francis</first><last>Tyers</last><affiliation>Indiana University, Bloomington</affiliation></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last><affiliation>Indiana University, Bloomington</affiliation></author>
       <pages>1393-1403</pages>
       <abstract>We present a Universal Dependencies (UD) treebank for Highland Puebla Nahuatl. The treebank is only the second such UD corpus for a Mexican language, and supplements an existing treebank for another Nahuatl variant. We describe the process of data collection, annotation decisions and interesting syntactic constructions, and discuss some similarities and differences between the Highland Puebla Nahuatl treebank and the existing Western Sierra Puebla Nahuatl treebank.</abstract>
       <url hash="24ccceea">2024.naacl-long.76</url>
@@ -1076,7 +1076,7 @@
       <title><fixed-case>O</fixed-case>rchestra<fixed-case>LLM</fixed-case>: Efficient Orchestration of Language Models for Dialogue State Tracking</title>
       <author><first>Chia-Hsuan</first><last>Lee</last></author>
       <author><first>Hao</first><last>Cheng</last><affiliation>Microsoft Research</affiliation></author>
-      <author><first>Mari</first><last>Ostendorf</last><affiliation>University of Washington</affiliation></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last><affiliation>University of Washington</affiliation></author>
       <pages>1434-1445</pages>
       <abstract>Large language models (LLMs) have revolutionized the landscape of Natural Language Processing, but are computationally expensive. To reduce the cost without sacrificing performance, previous studies have explored various approaches to harness the potential of Smaller Language Models (SLMs) as cost-effective alternatives to their larger counterparts. Driven by findings that SLMs and LLMs exhibit complementary strengths in a structured knowledge extraction task, this work presents a novel SLM/LLM routing framework designed to improve computational efficiency and enhance task performance. In dialogue state tracking tasks, the proposed routing framework enhances performance substantially compared to relying solely on LLMs, while reducing the computational costs by over 50%.</abstract>
       <url hash="1c178a9f">2024.naacl-long.79</url>
@@ -1091,7 +1091,7 @@
       <author><first>Marco</first><last>Valentino</last></author>
       <author><first>Jordan</first><last>Meadows</last></author>
       <author><first>Lan</first><last>Zhang</last><affiliation>University of Manchester</affiliation></author>
-      <author><first>Andre</first><last>Freitas</last><affiliation>Idiap Research Institute and University of Manchester</affiliation></author>
+      <author id="andre-freitas"><first>Andre</first><last>Freitas</last><affiliation>Idiap Research Institute and University of Manchester</affiliation></author>
       <pages>1446-1458</pages>
       <abstract>This paper investigates the possibility of approximating multiple mathematical operations in latent space for expression derivation. To this end, we introduce different multi-operational representation paradigms, modelling mathematical operations as explicit geometric transformations. By leveraging a symbolic engine, we construct a large-scale dataset comprising 1.7M derivation steps stemming from 61K premises and 6 operators, analysing the properties of each paradigm when instantiated with state-of-the-art neural encoders.Specifically, we investigate how different encoding mechanisms can approximate expression manipulation in latent space, exploring the trade-off between learning different operators and specialising within single operations, as well as the ability to support multi-step derivations and out-of-distribution generalisation. Our empirical analysis reveals that the multi-operational paradigm is crucial for disentangling different operators, while discriminating the conclusions for a single operation is achievable in the original expression encoder. Moreover, we show that architectural choices can heavily affect the training dynamics, structural organisation, and generalisation of the latent space, resulting in significant variations across paradigms and classes of encoders.</abstract>
       <url hash="12ed8b46">2024.naacl-long.80</url>
@@ -1117,7 +1117,7 @@
     <paper id="82">
       <title><fixed-case>X</fixed-case>fer<fixed-case>B</fixed-case>ench: a Data-Driven Benchmark for Emergent Language</title>
       <author><first>Brendon</first><last>Boldt</last><affiliation>School of Computer Science, Carnegie Mellon University</affiliation></author>
-      <author><first>David</first><last>Mortensen</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="david-r-mortensen"><first>David</first><last>Mortensen</last><affiliation>Carnegie Mellon University</affiliation></author>
       <pages>1475-1489</pages>
       <abstract>In this paper, we introduce a benchmark for evaluating the overall quality of emergent languages using data-driven methods. Specifically, we interpret the notion of the “quality” of an emergent language as its similarity to human language within a deep learning framework. We measure this by using the emergent language as pretraining data for a downstream NLP tasks in human language—the better the downstream performance, the better the emergent language. We implement this benchmark as an easy-to-use Python package that only requires a text file of utterances from the emergent language to be evaluated. Finally, we empirically test the benchmark’s validity using human, synthetic, and emergent language baselines.</abstract>
       <url hash="5843b7eb">2024.naacl-long.82</url>
@@ -1143,7 +1143,7 @@
       <author><first>Jordan</first><last>Meadows</last></author>
       <author><first>Marco</first><last>Valentino</last></author>
       <author><first>Damien</first><last>Teney</last><affiliation>Idiap Research Institute</affiliation></author>
-      <author><first>Andre</first><last>Freitas</last><affiliation>Idiap Research Institute and University of Manchester</affiliation></author>
+      <author id="andre-freitas"><first>Andre</first><last>Freitas</last><affiliation>Idiap Research Institute and University of Manchester</affiliation></author>
       <pages>1505-1523</pages>
       <abstract>This paper proposes a methodology for generating and perturbing detailed derivations of equations at scale, aided by a symbolic engine, to evaluate the generalisability of Transformers to out-of-distribution mathematical reasoning problems. Instantiating the framework in the context of sequence classification tasks, we compare the capabilities of GPT-4, GPT-3.5, and a canon of fine-tuned BERT models, exploring the relationship between specific operators and generalisation failure via the perturbation of reasoning aspects such as symmetry and variable surface forms. Surprisingly, our empirical evaluation reveals that the average in-distribution performance of fine-tuned models surpasses GPT-3.5, and rivals GPT-4. However, perturbations to input reasoning can reduce their performance by up to 80 F1 points. Overall, the results suggest that the in-distribution performance of smaller open-source models may potentially rival GPT by incorporating appropriately structured derivation dependencies during training, and highlight a shared weakness between BERT and GPT involving a relative inability to decode indirect references to mathematical entities. We release the full codebase, constructed datasets, and fine-tuned models to encourage future progress in the field.</abstract>
       <url hash="25a8b5ca">2024.naacl-long.84</url>
@@ -1237,7 +1237,7 @@
       <title>Causal Inference for Human-Language Model Collaboration</title>
       <author><first>Bohan</first><last>Zhang</last><affiliation>University of Michigan - Ann Arbor</affiliation></author>
       <author><first>Yixin</first><last>Wang</last><affiliation>University of Michigan - Ann Arbor</affiliation></author>
-      <author><first>Paramveer</first><last>Dhillon</last><affiliation>University of Michigan</affiliation></author>
+      <author id="paramveer-s-dhillon"><first>Paramveer</first><last>Dhillon</last><affiliation>University of Michigan</affiliation></author>
       <pages>1630-1647</pages>
       <abstract>In this paper, we examine the collaborative dynamics between humansand language models (LMs), where the interactions typically involveLMs proposing text segments and humans editing or responding to theseproposals. Productive engagement with LMs in such scenarios necessitates that humans discern effective text-based interaction strategies, such as editing and response styles, from historical human-LM interactions. This objective is inherently causal, driven by the counterfactual ‘what-if’ question: how would the outcome of collaboration change if humans employed a different text editing/refinement strategy? A key challenge in answering this causal inference question is formulating an appropriate causal estimand: the conventional average treatment effect (ATE) estimand is inapplicable to text-based treatments due to their high dimensionality. To address this concern, we introduce a new causal estimand– *Incremental Stylistic Effect (ISE)*, which characterizes the average impact of infinitesimally shifting a text towards a specific style, such as increasing formality. We establish the conditions for the non-parametric identification of ISE. Building on this, we develop *CausalCollab*, an algorithm designed to estimate the ISE of various interaction strategies in dynamic human-LM collaborations. Our empirical investigations across three distinct human-LM collaboration scenarios reveal that *CausalCollab* effectively reduces confounding and significantly improves counterfactual estimation over a set of competitive baselines.</abstract>
       <url hash="45ac588e">2024.naacl-long.91</url>
@@ -1250,11 +1250,11 @@
       <author><first>Zezhong</first><last>Wang</last></author>
       <author><first>Fangkai</first><last>Yang</last><affiliation>Microsoft</affiliation></author>
       <author><first>Lu</first><last>Wang</last><affiliation>Microsoft</affiliation></author>
-      <author id="pu-zhao"><first>Pu</first><last>Zhao</last></author>
+      <author><first>Pu</first><last>Zhao</last></author>
       <author><first>Hongru</first><last>Wang</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <author><first>Liang</first><last>Chen</last><affiliation>Chinese University of Hong Kong, The Chinese University of Hong Kong</affiliation></author>
       <author><first>Qingwei</first><last>Lin</last><affiliation>Microsoft Research</affiliation></author>
-      <author><first>Kam-Fai</first><last>Wong</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <pages>1648-1668</pages>
       <abstract>With the increasing risk posed by jailbreak attacks, recent studies have investigated various methods to improve the safety of large language models (LLMs), mainly falling into two strategies: safety training and safeguards. Safety training involves fine-tuning the LLM with adversarial samples, which activate the LLM’s capabilities against jailbreak. However, it is not always effective in countering new attacks and often leads to potential performance degradation. Safeguards, on the other hand, are methods using additional models to filter harmful content from the LLM’s response. Nevertheless, they can only reduce a limited amount of harmful output and introduce extra computational costs. Given the distinct strengths and weaknesses of both, we combine them to balance out their flaws and propose a more effective method called Self-Guard.Specifically, we train the LLM to review its responses for any harmful content and append a [harmful] or [harmless] tag to the end of the response. In this way, Self-Guard possesses the advantages of safety training, leveraging the powerful capabilities of the LLMs themselves to detect harmfulness. Besides that, it gains flexibility like safeguards, making the safety check target the output side, which makes the system less vulnerable to attack updates. Experimental results indicate that our Self-Guard can effectively defend against jailbreak attacks and will not cause LLMs’ performance degradation.</abstract>
       <url hash="242b910d">2024.naacl-long.92</url>
@@ -1494,7 +1494,7 @@
       <author><first>Hongyi</first><last>Yuan</last></author>
       <author><first>Runji</first><last>Lin</last></author>
       <author><first>Junyang</first><last>Lin</last></author>
-      <author id="zheng-yuan"><first>Zheng</first><last>Yuan</last><affiliation>Alibaba Group</affiliation></author>
+      <author><first>Zheng</first><last>Yuan</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Chang</first><last>Zhou</last></author>
       <author><first>Jingren</first><last>Zhou</last><affiliation>Alibaba Group</affiliation></author>
       <pages>1964-1974</pages>
@@ -1508,7 +1508,7 @@
       <author><first>Jiarui</first><last>Liu</last></author>
       <author><first>Wenkai</first><last>Li</last></author>
       <author><first>Zhijing</first><last>Jin</last></author>
-      <author><first>Mona</first><last>Diab</last><affiliation>Carnegie Mellon University and George Washington University</affiliation></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last><affiliation>Carnegie Mellon University and George Washington University</affiliation></author>
       <pages>1975-1997</pages>
       <abstract>In an era of model and data proliferation in machine learning/AI especially marked by the rapid advancement of open-sourced technologies, there arises a critical need for standardized consistent documentation. Our work addresses the information incompleteness in current human-written model and data cards. We propose an automated generation approach using Large Language Models (LLMs). Our key contributions include the establishment of CardBench, a comprehensive dataset aggregated from over 4.8k model cards and 1.4k data cards, coupled with the development of the CardGen pipeline comprising a two-step retrieval process. Our approach exhibits enhanced completeness, objectivity, and faithfulness in generated model and data cards, a significant step in responsible AI documentation practices ensuring better accountability and traceability.</abstract>
       <url hash="52d49ddb">2024.naacl-long.110</url>
@@ -1533,7 +1533,7 @@
       <title>Are Multilingual <fixed-case>LLM</fixed-case>s Culturally-Diverse Reasoners? An Investigation into Multicultural Proverbs and Sayings</title>
       <author id="chen-cecilia-liu"><first>Chen</first><last>Cecilia Liu</last></author>
       <author><first>Fajri</first><last>Koto</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
-      <author><first>Timothy</first><last>Baldwin</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and The University of Melbourne</affiliation></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and The University of Melbourne</affiliation></author>
       <author><first>Iryna</first><last>Gurevych</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and Technical University of Darmstadt</affiliation></author>
       <pages>2016-2039</pages>
       <abstract>Large language models (LLMs) are highly adept at question answering and reasoning tasks, but when reasoning in a situational context, human expectations vary depending on the relevant cultural common ground. As languages are associated with diverse cultures, LLMs should also be culturally-diverse reasoners. In this paper, we study the ability of a wide range of state-of-the-art multilingual LLMs (mLLMs) to reason with proverbs and sayings in a conversational context. Our experiments reveal that: (1) mLLMs “know” limited proverbs and memorizing proverbs does not mean understanding them within a conversational context; (2) mLLMs struggle to reason with figurative proverbs and sayings, and when asked to select the wrong answer (instead of asking it to select the correct answer); and (3) there is a “culture gap” in mLLMs when reasoning about proverbs and sayings translated from other languages. We construct and release our evaluation dataset MAPS (MulticulturAl Proverbs and Sayings) for proverb understanding with conversational context for six different languages.</abstract>
@@ -1625,7 +1625,7 @@
       <author><first>Dan</first><last>Ma</last></author>
       <author><first>Xuezhi</first><last>Cao</last></author>
       <author><first>Yunsen</first><last>Xian</last></author>
-      <author><first>Jiajun</first><last>Chen</last><affiliation>Nanjing University</affiliation></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last><affiliation>Nanjing University</affiliation></author>
       <author><first>Shujian</first><last>Huang</last><affiliation>Nanjing University</affiliation></author>
       <pages>2136-2153</pages>
       <abstract>Large Language Models (LLMs), such as ChatGPT and GPT-4, are designed to provide useful and safe responses. However, adversarial prompts known as ‘jailbreaks’ can circumvent safeguards, leading LLMs to generate potentially harmful content. Exploring jailbreak prompts can help to better reveal the weaknesses of LLMs and further steer us to secure them. Unfortunately, existing jailbreak methods either suffer from intricate manual design or require optimization on other white-box models, which compromises either generalization or efficiency. In this paper, we generalize jailbreak prompt attacks into two aspects: (1) Prompt Rewriting and (2) Scenario Nesting. Based on this, we propose ReNeLLM, an automatic framework that leverages LLMs themselves to generate effective jailbreak prompts. Extensive experiments demonstrate that ReNeLLM significantly improves the attack success rate while greatly reducing the time cost compared to existing baselines. Our study also reveals the inadequacy of current defense methods in safeguarding LLMs. Finally, we analyze the failure of LLMs defense from the perspective of prompt execution priority, and propose corresponding defense strategies. We hope that our research can catalyze both the academic community and LLMs developers towards the provision of safer and more regulated LLMs. The code is available at https://github.com/NJUNLP/ReNeLLM.</abstract>
@@ -1736,7 +1736,7 @@
       <author><first>Eve</first><last>Fleisig</last></author>
       <author><first>Su Lin</first><last>Blodgett</last><affiliation>Microsoft</affiliation></author>
       <author><first>Dan</first><last>Klein</last><affiliation>University of California, Berkeley</affiliation></author>
-      <author><first>Zeerak</first><last>Talat</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="zeerak-talat"><first>Zeerak</first><last>Talat</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <pages>2279-2292</pages>
       <abstract>Longstanding data labeling practices in machine learning involve collecting and aggregating labels from multiple annotators. But what should we do when annotators disagree? Though annotator disagreement has long been seen as a problem to minimize, new perspectivist approaches challenge this assumption by treating disagreement as a valuable source of information. In this position paper, we examine practices and assumptions surrounding the causes of disagreement–some challenged by perspectivist approaches, and some that remain to be addressed–as well as practical and normative challenges for work operating under these assumptions. We conclude with recommendations for the data labeling pipeline and avenues for future research engaging with subjectivity and disagreement.</abstract>
       <url hash="54479588">2024.naacl-long.126</url>
@@ -1749,7 +1749,7 @@
       <author><first>Aparna</first><last>Elangovan</last><affiliation>Amazon</affiliation></author>
       <author><first>Jiayuan</first><last>He</last><affiliation>Royal Melbourne Institute of Technology and The University of Melbourne</affiliation></author>
       <author><first>Yuan</first><last>Li</last></author>
-      <author><first>Karin</first><last>Verspoor</last><affiliation>Royal Melbourne Institute of Technology</affiliation></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last><affiliation>Royal Melbourne Institute of Technology</affiliation></author>
       <pages>2293-2309</pages>
       <abstract>The NLP community typically relies on performance of a model on a held-out test set to assess generalization. Performance drops observed in datasets outside of official test sets are generally attributed to “out-of-distribution” effects. Here, we explore the foundations of generalizability and study the factors that affect it, articulating lessons from clinical studies. In clinical research, generalizability is an act of reasoning that depends on (a) *internal validity* of experiments to ensure controlled measurement of cause and effect, and (b) *external validity* or transportability of the results to the wider population. We demonstrate how learning spurious correlations, such as the distance between entities in relation extraction tasks, can affect a model’s internal validity and in turn adversely impact generalization. We, therefore, present the need to ensure internal validity when building machine learning models in NLP. Our recommendations also apply to generative large language models, as they are known to be sensitive to even minor semantic preserving alterations. We also propose adapting the idea of *matching* in randomized controlled trials and observational studies to NLP evaluation to measure causation.</abstract>
       <url hash="45788bd2">2024.naacl-long.127</url>
@@ -1775,7 +1775,7 @@
       <author><first>Crystina</first><last>Zhang</last><affiliation>University of Waterloo</affiliation></author>
       <author><first>Xueguang</first><last>Ma</last></author>
       <author><first>Jimmy</first><last>Lin</last><affiliation>University of Waterloo</affiliation></author>
-      <author><first>Ferhan</first><last>Ture</last></author>
+      <author id="ferhan-ture"><first>Ferhan</first><last>Ture</last></author>
       <pages>2327-2340</pages>
       <abstract>Large language models (LLMs) exhibit positional bias in how they use context, which especially affects listwise ranking. To address this, we propose permutation self-consistency, a form of self-consistency over the ranking list outputs of black-box LLMs. Our key idea is to marginalize out different list orders in the prompt to produce an order-independent ranking with less positional bias. First, given some input prompt, we repeatedly shuffle the list in the prompt and pass it through the LLM while holding the instructions the same. Next, we aggregate the resulting sample of rankings by computing the central ranking closest in distance to all of them, marginalizing out prompt order biases in the process. Theoretically, we prove the robustness of our method, showing convergence to the true ranking under random perturbations.Empirically, on five datasets in sorting and passage reranking, our approach improves scores from conventional inference by up to 34-52% for Mistral, 7-18% for GPT-3.5, 8-16% for LLaMA v2 (70B). Our code is at https://github.com/castorini/perm-sc.</abstract>
       <url hash="aef53d36">2024.naacl-long.129</url>
@@ -1808,7 +1808,7 @@
       <author><first>Wantian</first><last>Zhao</last><affiliation>Georgia Institute of Technology</affiliation></author>
       <author><first>Sanjeev</first><last>Grampurohit</last></author>
       <author><first>Rampi</first><last>Ramprasad</last><affiliation>Georgia Institute of Technology</affiliation></author>
-      <author><first>Chao</first><last>Zhang</last><affiliation>Georgia Institute of Technology</affiliation></author>
+      <author id="chao-zhang-tu"><first>Chao</first><last>Zhang</last><affiliation>Georgia Institute of Technology</affiliation></author>
       <pages>2370-2385</pages>
       <abstract>Scientific information extraction (SciIE), which aims to automatically extract information from scientific literature, is becoming more important than ever. However, there are no existing SciIE datasets for polymer materials, which is an important class of materials used ubiquitously in our daily lives. To bridge this gap, we introduce POLYIE, a new SciIE dataset for polymer materials. POLYIE is curated from 146 full-length polymer scholarly articles, which are annotated with different named entities (i.e., materials, properties, values, conditions) as well as their N-ary relations by domain experts. POLYIE presents several unique challenges due to diverse lexical formats of entities, ambiguity between entities, and variable-length relations. We evaluate state-of-the-art named entity extraction and relation extraction models on POLYIE, analyze their strengths and weaknesses, and highlight some difficult cases for these models. To the best of our knowledge, POLYIE is the first SciIE benchmark for polymer materials, and we hope it will lead to more research efforts from the community on this challenging task. Our code and data are available on: https://github.com/jerry3027/PolyIE.</abstract>
       <url hash="1df620d5">2024.naacl-long.131</url>
@@ -1876,7 +1876,7 @@
       <author><first>Vasudha</first><last>Varadarajan</last></author>
       <author><first>Sverker</first><last>Sikström</last></author>
       <author><first>Oscar</first><last>Kjell</last></author>
-      <author><first>H. Andrew</first><last>Schwartz</last><affiliation>Stony Brook University (SUNY)</affiliation></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last><affiliation>Stony Brook University (SUNY)</affiliation></author>
       <pages>2466-2478</pages>
       <abstract>Mental health issues differ widely among individuals, with varied signs and symptoms. Recently, language-based assessments haveshown promise in capturing this diversity, but they require a substantial sample of words per person for accuracy. This work introducesthe task of Adaptive Language-Based Assessment (ALBA), which involves adaptively ordering questions while also scoring an individual’s latent psychological trait using limited language responses to previous questions. To this end, we develop adaptive testing methods under two psychometric measurement theories: Classical Test Theory and Item Response Theory.We empirically evaluate ordering and scoring strategies, organizing into two new methods: a semi-supervised item response theory-basedmethod (ALIRT) and a supervised Actor-Critic model. While we found both methods to improve over non-adaptive baselines, We foundALIRT to be the most accurate and scalable, achieving the highest accuracy with fewer questions (e.g., Pearson r ≈ 0.93 after only 3 questions as compared to typically needing at least 7 questions). In general, adaptive language-based assessments of depression and anxiety were able to utilize a smaller sample of language without compromising validity or large computational costs.</abstract>
       <url hash="d483a97d">2024.naacl-long.136</url>
@@ -1919,7 +1919,7 @@
       <author><first>Elron</first><last>Bandel</last><affiliation>International Business Machines</affiliation></author>
       <author><first>Ariel</first><last>Gera</last><affiliation>International Business Machines</affiliation></author>
       <author><first>Ofir</first><last>Arviv</last><affiliation>Hebrew University of Jerusalem and Computer Science Departmen, Technion-Israel Institute of Technology</affiliation></author>
-      <author><first>Liat</first><last>Ein-Dor</last></author>
+      <author id="liat-ein-dor"><first>Liat</first><last>Ein-Dor</last></author>
       <author><first>Eyal</first><last>Shnarch</last><affiliation>International Business Machines</affiliation></author>
       <author><first>Noam</first><last>Slonim</last><affiliation>International Business Machines</affiliation></author>
       <author><first>Michal</first><last>Shmueli-Scheuer</last></author>
@@ -2032,7 +2032,7 @@
       <author><first>You</first><last>Zuo</last></author>
       <author><first>Kim</first><last>Gerdes</last><affiliation>Université Paris-Saclay</affiliation></author>
       <author><first>Éric</first><last>Clergerie</last></author>
-      <author><first>Benoît</first><last>Sagot</last><affiliation>INRIA</affiliation></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last><affiliation>INRIA</affiliation></author>
       <pages>2687-2710</pages>
       <abstract>In this work, we introduce a comprehensive error typology specifically designed for evaluating two distinct tasks in machine-generated patent texts: claims-to-abstract generation, and the generation of the next claim given previous ones. We have also developed a benchmark, PatentEval, for systematically assessing language models in this context. Our study includes a comparative analysis, annotated by humans, of various models. These range from those specifically adapted during training for tasks within the patent domain to the latest general-purpose large language models (LLMs). Furthermore, we explored and evaluated some metrics to approximate human judgments in patent text evaluation, analyzing the extent to which these metrics align with expert assessments. These approaches provide valuable insights into the capabilities and limitations of current language models in the specialized field of patent text generation.</abstract>
       <url hash="609d7c7e">2024.naacl-long.147</url>
@@ -2066,7 +2066,7 @@
     </paper>
     <paper id="150">
       <title>What Causes the Failure of Explicit to Implicit Discourse Relation Recognition?</title>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last><affiliation>Heidelberg University</affiliation></author>
+      <author><first>Wei</first><last>Liu</last><affiliation>Heidelberg University</affiliation></author>
       <author><first>Stephen</first><last>Wan</last><affiliation>CSIRO</affiliation></author>
       <author><first>Michael</first><last>Strube</last><affiliation>Heidelberg Institute for Theoretical Studies</affiliation></author>
       <pages>2738-2753</pages>
@@ -2081,7 +2081,7 @@
       <author><first>Siddhant</first><last>Arora</last></author>
       <author><first>Hayato</first><last>Futami</last><affiliation>Sony</affiliation></author>
       <author><first>Jee-weon</first><last>Jung</last><affiliation>CMU, Carnegie Mellon University</affiliation></author>
-      <author><first>Yifan</first><last>Peng</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="yifan-peng-cmu"><first>Yifan</first><last>Peng</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Roshan</first><last>Sharma</last><affiliation>Google</affiliation></author>
       <author><first>Yosuke</first><last>Kashiwagi</last></author>
       <author><first>Emiru</first><last>Tsunoo</last></author>
@@ -2113,7 +2113,7 @@
       <author><first>Yada</first><last>Zhu</last><affiliation>IBM Research</affiliation></author>
       <author><first>Diego</first><last>Antognini</last><affiliation>Google DeepMind</affiliation></author>
       <author><first>Yoon</first><last>Kim</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
-      <author id="yang-zhang"><first>Yang</first><last>Zhang</last></author>
+      <author><first>Yang</first><last>Zhang</last></author>
       <pages>2793-2804</pages>
       <abstract>This paper studies the relationship between the surface form of a mathematical problem and its solvability by large language models. We find that subtle alterations in the surface form can significantly impact the answer distribution and the solve rate, exposing the language model’s lack of robustness and sensitivity to the surface form in reasoning through complex problems. To improve mathematical reasoning performance, we propose Self-Consistency-over-Paraphrases (SCoP), which diversifies reasoning paths from specific surface forms of the problem. We evaluate our approach on four mathematics reasoning benchmarks over three large language models and show that SCoP improves mathematical reasoning performance over vanilla self-consistency, particularly for problems initially deemed unsolvable. Finally, we provide additional experiments and discussion regarding problem difficulty and surface forms, including cross-model difficulty agreement and paraphrasing transferability, and Variance of Variations (VOV) for language model evaluation.</abstract>
       <url hash="8e4a3f53">2024.naacl-long.153</url>
@@ -2166,7 +2166,7 @@
     <paper id="157">
       <title>The Effect of Data Partitioning Strategy on Model Generalizability: A Case Study of Morphological Segmentation</title>
       <author><first>Zoey</first><last>Liu</last><affiliation>University of Florida</affiliation></author>
-      <author><first>Bonnie</first><last>Dorr</last><affiliation>University of Florida</affiliation></author>
+      <author id="bonnie-dorr"><first>Bonnie</first><last>Dorr</last><affiliation>University of Florida</affiliation></author>
       <pages>2851-2864</pages>
       <abstract>Recent work to enhance data partitioning strategies for more realistic model evaluation face challenges in providing a clear optimal choice. This study addresses these challenges, focusing on morphological segmentation and synthesizing limitations related to language diversity, adoption of multiple datasets and splits, and detailed model comparisons. Our study leverages data from 19 languages, including ten indigenous or endangered languages across 10 language families with diverse morphological systems (polysynthetic, fusional, and agglutinative) and different degrees of data availability. We conduct large-scale experimentation with varying sized combinations of training and evaluation sets as well as new test data. Our results show that, when faced with new test data: (1) models trained from random splits are able to achieve higher numerical scores; (2) model rankings derived from random splits tend to generalize more consistently.</abstract>
       <url hash="20302c43">2024.naacl-long.157</url>
@@ -2179,7 +2179,7 @@
       <author><first>Debasmita</first><last>Bhattacharya</last><affiliation>Columbia University</affiliation></author>
       <author><first>Siying</first><last>Ding</last></author>
       <author><first>Alayna</first><last>Nguyen</last></author>
-      <author><first>Julia</first><last>Hirschberg</last><affiliation>Columbia University</affiliation></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last><affiliation>Columbia University</affiliation></author>
       <pages>2865-2876</pages>
       <abstract>It is well-known that speakers who entrain to one another have more successful conversations than those who do not. Previous research has shown that interlocutors entrain on linguistic features in both written and spoken <tex-math>\emph{monolingual}</tex-math> domains. More recent work on <tex-math>\emph{code-switched}</tex-math> communication has also shown preliminary evidence of entrainment on certain aspects of code-switching (CSW). However, such studies of entrainment in code-switched domains have been extremely few and restricted to human-machine textual interactions. Our work studies code-switched spontaneous speech between humans, finding that (1) patterns of written and spoken entrainment in monolingual settings largely generalize to code-switched settings, and (2) some patterns of entrainment on code-switching in dialogue agent-generated text generalize to spontaneous code-switched speech. Our findings give rise to important implications for the potentially “universal” nature of entrainment as a communication phenomenon, and potential applications in inclusive and interactive speech technology.</abstract>
       <url hash="4e6dc711">2024.naacl-long.158</url>
@@ -2191,7 +2191,7 @@
       <title>A Survey of Meaning Representations – From Theory to Practical Utility</title>
       <author><first>Zacchary</first><last>Sadeddine</last></author>
       <author><first>Juri</first><last>Opitz</last><affiliation>Ruprecht-Karls-Universität Heidelberg and University of Zurich</affiliation></author>
-      <author><first>Fabian</first><last>Suchanek</last><affiliation>Telecom Paris</affiliation></author>
+      <author id="fabian-suchanek"><first>Fabian</first><last>Suchanek</last><affiliation>Telecom Paris</affiliation></author>
       <pages>2877-2892</pages>
       <abstract>Symbolic meaning representations of natural language text have been studied since at least the 1960s. With the availability of large annotated corpora, and more powerful machine learning tools, the field has recently seen several new developments. In this survey, we study today’s most prominent Meaning Representation Frameworks. We shed light on their theoretical properties, as well as on their practical research environment, i.e., on datasets, parsers, applications, and future challenges.</abstract>
       <url hash="f31e6616">2024.naacl-long.159</url>
@@ -2210,7 +2210,7 @@
       <author><first>Liang</first><last>Chen</last></author>
       <author><first>Yufeng</first><last>He</last></author>
       <author><first>Kaikai</first><last>An</last></author>
-      <author><first>Baobao</first><last>Chang</last><affiliation>Peking University</affiliation></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last><affiliation>Peking University</affiliation></author>
       <pages>2893-2907</pages>
       <abstract>Large-scale multilingual Pretrained Language Models (mPLMs) yield impressive performance on cross-language tasks, yet significant performance disparities exist across different languages within the same mPLM. Previous studies endeavored to narrow these disparities by supervise fine-tuning the mPLMs with multilingual data.However, obtaining labeled multilingual data is time-consuming, and fine-tuning mPLM with limited labeled multilingual data merely encapsulates the knowledge specific to the labeled data.Therefore, we introduce **ALSACE** to leverage the learned knowledge from the well-performing languages to guide under-performing ones within the same mPLM, eliminating the need for additional labeled multilingual data. Experiments show that ALSACE effectively mitigates language-level performance disparity across various mPLMs while showing the competitive performance on different multilingual NLU tasks, ranging from full resource to limited resource settings. The code for our approach is available at https://github.com/pkunlp-icler/ALSACE.</abstract>
       <url hash="7929e2c2">2024.naacl-long.160</url>
@@ -2235,7 +2235,7 @@
       <title>Visually-Aware Context Modeling for News Image Captioning</title>
       <author><first>Tingyu</first><last>Qu</last><affiliation>KU Leuven</affiliation></author>
       <author><first>Tinne</first><last>Tuytelaars</last><affiliation>KU Leuven</affiliation></author>
-      <author><first>Marie-Francine</first><last>Moens</last><affiliation>KU Leuven, KU Leuven</affiliation></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last><affiliation>KU Leuven, KU Leuven</affiliation></author>
       <pages>2927-2943</pages>
       <abstract>News Image Captioning aims to create captions from news articles and images, emphasizing the connection between textual context and visual elements. Recognizing the significance of human faces in news images and the face-name co-occurrence pattern in existing datasets, we propose a face-naming module for learning better name embeddings. Apart from names, which can be directly linked to an image area (faces), news image captions mostly contain context information that can only be found in the article. We design a retrieval strategy using CLIP to retrieve sentences that are semantically close to the image, mimicking human thought process of linking articles to images. Furthermore, to tackle the problem of the imbalanced proportion of article context and image context in captions, we introduce a simple yet effective method Contrasting with Language Model backbone (CoLaM) to the training pipeline. We conduct extensive experiments to demonstrate the efficacy of our framework. We out-perform the previous state-of-the-art (without external data) by 7.97/5.80 CIDEr scores on GoodNews/NYTimes800k. Our code is available at https://github.com/tingyu215/VACNIC.</abstract>
       <url hash="ce268030">2024.naacl-long.162</url>
@@ -2257,7 +2257,7 @@
     </paper>
     <paper id="164">
       <title><fixed-case>T</fixed-case>opic<fixed-case>GPT</fixed-case>: A Prompt-based Topic Modeling Framework</title>
-      <author><first>Chau Minh</first><last>Pham</last></author>
+      <author id="chau-minh-pham"><first>Chau Minh</first><last>Pham</last></author>
       <author><first>Alexander</first><last>Hoyle</last></author>
       <author><first>Simeng</first><last>Sun</last></author>
       <author><first>Philip</first><last>Resnik</last></author>
@@ -2354,7 +2354,7 @@
       <author><first>Olanrewaju</first><last>Samuel</last></author>
       <author><first>Matthew</first><last>Stutzman</last></author>
       <author><first>Bismarck</first><last>Odoom</last><affiliation>Department of Computer Science, Whiting School of Engineering</affiliation></author>
-      <author><first>Sanjeev</first><last>Khudanpur</last><affiliation>Whiting School of Engineering</affiliation></author>
+      <author id="sanjeev-khudanpur"><first>Sanjeev</first><last>Khudanpur</last><affiliation>Whiting School of Engineering</affiliation></author>
       <author><first>Stephen</first><last>Richardson</last><affiliation>Brigham Young University</affiliation></author>
       <author><first>Kenton</first><last>Murray</last><affiliation>Johns Hopkins University</affiliation></author>
       <pages>3083-3110</pages>
@@ -2393,7 +2393,7 @@
       <author><first>Dhiman</first><last>Goswami</last><affiliation>George Mason University</affiliation></author>
       <author><first>Sharanya</first><last>Thilagan</last></author>
       <author><first>Kai</first><last>North</last></author>
-      <author><first>Shervin</first><last>Malmasi</last><affiliation>Amazon</affiliation></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last><affiliation>Amazon</affiliation></author>
       <author><first>Marcos</first><last>Zampieri</last><affiliation>George Mason University</affiliation></author>
       <pages>3149-3160</pages>
       <abstract>We present the first comprehensive survey of Native Language Identification (NLI) applied to texts. NLI is the task of automatically identifying an author’s native language (L1) based on their second language (L2) production. NLI is an important task with practical applications in second language teaching and NLP. The task has been widely studied for both text and speech, particularly for L2 English due to the availability of suitable corpora. Speech-based NLI relies heavily on accent modeled by pronunciation patterns and prosodic cues while text-based NLI relies primarily on modeling spelling errors and grammatical patterns that reveal properties of an individuals’ L1 influencing L2 production. We survey over one hundred papers on the topic including the papers associated with the NLI and INLI shared tasks. We describe several text representations and computational techniques used in text-based NLI. Finally, we present a comprehensive account of publicly available datasets used for the task thus far.</abstract>
@@ -2606,14 +2606,14 @@
       <author><first>Yusen</first><last>Zhang</last></author>
       <author><first>Nan</first><last>Zhang</last><affiliation>Pennsylvania State University</affiliation></author>
       <author><first>Yixin</first><last>Liu</last><affiliation>Yale University</affiliation></author>
-      <author><first>Alexander</first><last>Fabbri</last><affiliation>SalesForce.com</affiliation></author>
+      <author id="alexander-richard-fabbri"><first>Alexander</first><last>Fabbri</last><affiliation>SalesForce.com</affiliation></author>
       <author><first>Junru</first><last>Liu</last></author>
       <author><first>Ryo</first><last>Kamoi</last><affiliation>Pennsylvania State University</affiliation></author>
       <author><first>Xiaoxin</first><last>Lu</last><affiliation>Pennsylvania State University</affiliation></author>
       <author><first>Caiming</first><last>Xiong</last><affiliation>Salesforce Research</affiliation></author>
       <author><first>Jieyu</first><last>Zhao</last><affiliation>University of Southern California</affiliation></author>
-      <author><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <author><first>Rui</first><last>Zhang</last><affiliation>Pennsylvania State University</affiliation></author>
       <pages>3404-3426</pages>
       <abstract>People from different social and demographic groups express diverse perspectives and conflicting opinions on a broad set of topics such as product reviews, healthcare, law, and politics. A fair summary should provide a comprehensive coverage of diverse perspectives without underrepresenting certain groups. However, current work in summarization metrics and Large Language Models (LLMs) evaluation has not explored fair abstractive summarization. In this paper, we systematically investigate fair abstractive summarization for user-generated data. We first formally define fairness in abstractive summarization as not underrepresenting perspectives of any groups of people, and we propose four reference-free automatic metrics by measuring the differences between target and source perspectives. We evaluate nine LLMs, including three GPT models, four LLaMA models, PaLM 2, and Claude, on six datasets collected from social media, online reviews, and recorded transcripts. Experiments show that both the model-generated and the human-written reference summaries suffer from low fairness. We conduct a comprehensive analysis of the common factors influencing fairness and propose three simple but effective methods to alleviate unfair summarization. Our dataset and code are available at https://github.com/psunlpgroup/FairSumm.</abstract>
@@ -2696,7 +2696,7 @@
       <title>My Heart Skipped a Beat! Recognizing Expressions of Embodied Emotion in Natural Language</title>
       <author><first>Yuan</first><last>Zhuang</last></author>
       <author><first>Tianyu</first><last>Jiang</last><affiliation>University of Cincinnati</affiliation></author>
-      <author><first>Ellen</first><last>Riloff</last><affiliation>University of Arizona</affiliation></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last><affiliation>University of Arizona</affiliation></author>
       <pages>3525-3537</pages>
       <abstract>Humans frequently experience emotions. When emotions arise, they affect not only our mental state but can also change our physical state. For example, we often open our eyes wide when we are surprised, or clap our hands when we feel excited. Physical manifestations of emotions are referred to as embodied emotion in the psychology literature. From an NLP perspective, recognizing descriptions of physical movements or physiological responses associated with emotions is a type of implicit emotion recognition. Our work introduces a new task of recognizing expressions of embodied emotion in natural language. We create a dataset of sentences that contains 7,300 body part mentions with human annotations for embodied emotion. We develop a classification model for this task and present two methods to acquire weakly labeled instances of embodied emotion by extracting emotional manner expressions and by prompting a language model. Our experiments show that the weakly labeled data can train an effective classification model without gold data, and can also improve performance when combined with gold data. Our dataset is publicly available at https://github.com/yyzhuang1991/Embodied-Emotions.</abstract>
       <url hash="84a75245">2024.naacl-long.193</url>
@@ -2751,7 +2751,7 @@
       <author><first>Zehui</first><last>Wu</last></author>
       <author><first>Ziwei</first><last>Gong</last><affiliation>Columbia University</affiliation></author>
       <author><first>Jaywon</first><last>Koo</last><affiliation>Rice University</affiliation></author>
-      <author><first>Julia</first><last>Hirschberg</last><affiliation>Columbia University</affiliation></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last><affiliation>Columbia University</affiliation></author>
       <pages>3588-3602</pages>
       <abstract>This paper investigates the optimal selection and fusion of feature encoders across multiple modalities and combines these in one neural network to improve sentiment detection. We compare different fusion methods and examine the impact of multi-loss training within the multi-modality fusion network, identifying surprisingly important findings relating to subnet performance. We have also found that integrating context significantly enhances model performance. Our best model achieves state-of-the-art performance for three datasets (CMU-MOSI, CMU-MOSEI and CH-SIMS). These results suggest a roadmap toward an optimized feature selection and fusion approach for enhancing sentiment detection in neural networks.</abstract>
       <url hash="671bf564">2024.naacl-long.197</url>
@@ -2914,7 +2914,7 @@
       <author><first>Jiayi</first><last>Zhang</last></author>
       <author><first>Julian</first><last>Michael</last><affiliation>New York University</affiliation></author>
       <author><first>Bernhard</first><last>Schölkopf</last><affiliation>ELLIS Institute and Max Planck Institute for Intelligent Systems, Max-Planck Institute</affiliation></author>
-      <author><first>Mona</first><last>Diab</last><affiliation>Carnegie Mellon University and George Washington University</affiliation></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last><affiliation>Carnegie Mellon University and George Washington University</affiliation></author>
       <pages>3781-3798</pages>
       <abstract>Traditionally, natural language processing (NLP) models often use a rich set of features created by linguistic expertise, such as semantic representations. However, in the era of large language models (LLMs), more and more tasks are turned into generic, end-to-end sequence generation problems. In this paper, we investigate the question: what is the role of semantic representations in the era of LLMs? Specifically, we investigate the effect of Abstract Meaning Representation (AMR) across five diverse NLP tasks. We propose an AMR-driven chain-of-thought prompting method, which we call AMRCOT, and find that it generally hurts performance more than it helps. To investigate what AMR may have to offer on these tasks, we conduct a series of analysis experiments. We find that it is difficult to predict which input examples AMR may help or hurt on, but errors tend to arise with multi-word expressions, named entities, and in the final inference step where the LLM must connect its reasoning over the AMR to its prediction. We recommend focusing on these areas for future work in semantic representations for LLMs. Our code: https://github.com/causalNLP/amr_llm</abstract>
       <url hash="4a9eb68c">2024.naacl-long.209</url>
@@ -2941,7 +2941,7 @@
       <author><first>Shaonan</first><last>Wang</last></author>
       <author><first>Jing</first><last>Ye</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
       <author><first>Xiaohan</first><last>Zhang</last><affiliation>Chinese Academy of Sciences</affiliation></author>
-      <author><first>Chengqing</first><last>Zong</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
       <pages>3822-3832</pages>
       <abstract>Decoding continuous language from brain activity is a formidable yet promising field of research. It is particularly significant for aiding people with speech disabilities to communicate through brain signals. This field addresses the complex task of mapping brain signals to text. The previous best attempt reverse-engineered this process in an indirect way: it began by learning to encode brain activity from text and then guided text generation by aligning with predicted brain responses. In contrast, we propose a simple yet effective method that guides text reconstruction by directly comparing them with the predicted text embeddings mapped from brain activities. Comprehensive experiments reveal that our method significantly outperforms the current state-of-the-art model, showing average improvements of 77% and 54% on BLEU and METEOR scores. We further validate the proposed modules through detailed ablation studies and case analyses and highlight a critical correlation: the more precisely we map brain activities to text embeddings, the better the text reconstruction results. Such insight can simplify the task of reconstructing language from brain activities for future work, emphasizing the importance of improving brain-to-text-embedding mapping techniques.</abstract>
       <url hash="5e6d4634">2024.naacl-long.211</url>
@@ -2953,7 +2953,7 @@
       <title>On-the-fly Definition Augmentation of <fixed-case>LLM</fixed-case>s for Biomedical <fixed-case>NER</fixed-case></title>
       <author><first>Monica</first><last>Munnangi</last><affiliation>Northeastern University</affiliation></author>
       <author><first>Sergey</first><last>Feldman</last><affiliation>Allen Institute for Artificial Intelligence and Data Cowboys</affiliation></author>
-      <author><first>Byron</first><last>Wallace</last><affiliation>Northeastern University, Brown University and Northeastern University</affiliation></author>
+      <author id="byron-c-wallace"><first>Byron</first><last>Wallace</last><affiliation>Northeastern University, Brown University and Northeastern University</affiliation></author>
       <author><first>Silvio</first><last>Amir</last><affiliation>Northeastern University</affiliation></author>
       <author><first>Tom</first><last>Hope</last><affiliation>Allen Institute for Artificial Intelligence and Hebrew University, Hebrew University of Jerusalem</affiliation></author>
       <author><first>Aakanksha</first><last>Naik</last><affiliation>Allen Institute for Artificial Intelligence and National Institutes of Health</affiliation></author>
@@ -3004,7 +3004,7 @@
     <paper id="216">
       <title>Towards Improved Multi-Source Attribution for Long-Form Answer Generation</title>
       <author><first>Nilay</first><last>Patel</last></author>
-      <author><first>Shivashankar</first><last>Subramanian</last><affiliation>Amazon</affiliation></author>
+      <author id="shivashankar-subramanian"><first>Shivashankar</first><last>Subramanian</last><affiliation>Amazon</affiliation></author>
       <author><first>Siddhant</first><last>Garg</last><affiliation>Meta</affiliation></author>
       <author><first>Pratyay</first><last>Banerjee</last><affiliation>Amazon</affiliation></author>
       <author><first>Amita</first><last>Misra</last><affiliation>Amazon</affiliation></author>
@@ -3158,7 +3158,7 @@
       <title>Media Bias Detection Across Families of Language Models</title>
       <author><first>Iffat</first><last>Maab</last></author>
       <author><first>Edison</first><last>Marrese-Taylor</last><affiliation>The Univesity of Tokyo and AIST, National Institute of Advanced Industrial Science and Technology</affiliation></author>
-      <author><first>Sebastian</first><last>Padó</last><affiliation>University of Stuttgart, Universität Stuttgart</affiliation></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last><affiliation>University of Stuttgart, Universität Stuttgart</affiliation></author>
       <author><first>Yutaka</first><last>Matsuo</last><affiliation>The University of Tokyo and The University of Tokyo</affiliation></author>
       <pages>4083-4098</pages>
       <abstract>Bias in reporting can influence the public’s opinion on relevant societal issues. Examples include informational bias (selective presentation of content) and lexical bias (specific framing of content through linguistic choices). The recognition of media bias is arguably an area where NLP can contribute to the “social good”. Traditional NLP models have shown good performance in classifying media bias, but require careful model design and extensive tuning. In this paper, we ask how well prompting of large language models can recognize media bias. Through an extensive empirical study including a wide selection of pre-trained models, we find that prompt-based techniques can deliver comparable performance to traditional models with greatly reduced effort and that, similar to traditional models, the availability of context substantially improves results. We further show that larger models can leverage different kinds of context simultaneously, obtaining further performance improvements.</abstract>
@@ -3217,7 +3217,7 @@
       <title>Beyond Borders: Investigating Cross-Jurisdiction Transfer in Legal Case Summarization</title>
       <author><first>Santosh</first><last>T.y.s.s</last><affiliation>Technische Universität München</affiliation></author>
       <author><first>Vatsal</first><last>Venkatkrishna</last></author>
-      <author id="saptarshi-ghosh"><first>Saptarshi</first><last>Ghosh</last><affiliation>Indian Institute of Technology Kharagpur</affiliation></author>
+      <author><first>Saptarshi</first><last>Ghosh</last><affiliation>Indian Institute of Technology Kharagpur</affiliation></author>
       <author><first>Matthias</first><last>Grabmair</last><affiliation>Technische Universität München</affiliation></author>
       <pages>4136-4150</pages>
       <abstract>Legal professionals face the challenge of managing an overwhelming volume of lengthy judgments, making automated legal case summarization crucial. However, prior approaches mainly focused on training and evaluating these models within the same jurisdiction. In this study, we explore the cross-jurisdictional generalizability of legal case summarization models. Specifically, we explore how to effectively summarize legal cases of a target jurisdiction where reference summaries are not available. In particular, we investigate whether supplementing models with unlabeled target jurisdiction corpus and extractive silver summaries obtained from unsupervised algorithms on target data enhances transfer performance. Our comprehensive study on three datasets from different jurisdictions highlights the role of pre-training in improving transfer performance. We shed light on the pivotal influence of jurisdictional similarity in selecting optimal source datasets for effective transfer. Furthermore, our findings underscore that incorporating unlabeled target data yields improvements in general pre-trained models, with additional gains when silver summaries are introduced. This augmentation is especially valuable when dealing with extractive datasets and scenarios featuring limited alignment between source and target jurisdictions. Our study provides key insights for developing adaptable legal case summarization systems, transcending jurisdictional boundaries.</abstract>
@@ -3255,9 +3255,9 @@
       <author><first>Maite</first><last>Heredia</last><affiliation>Universidad del País Vasco</affiliation></author>
       <author><first>Julen</first><last>Etxaniz</last><affiliation>HiTZ Center, University of the Basque Country (UPV/EHU)</affiliation></author>
       <author><first>Muitze</first><last>Zulaika</last><affiliation>Orai NLP Technologies</affiliation></author>
-      <author><first>Xabier</first><last>Saralegi</last></author>
+      <author id="xabier-saralegi"><first>Xabier</first><last>Saralegi</last></author>
       <author><first>Jeremy</first><last>Barnes</last><affiliation>University of the Basque Country</affiliation></author>
-      <author><first>Aitor</first><last>Soroa</last><affiliation>University of the Basque Country. UPV/EHU.</affiliation></author>
+      <author id="aitor-soroa"><first>Aitor</first><last>Soroa</last><affiliation>University of the Basque Country. UPV/EHU.</affiliation></author>
       <pages>4177-4188</pages>
       <abstract>XNLI is a popular Natural Language Inference (NLI) benchmark widely used to evaluate cross-lingual Natural Language Understanding (NLU) capabilities across languages. In this paper, we expand XNLI to include Basque, a low-resource language that can greatly benefit from transfer-learning approaches. The new dataset, dubbed XNLIeu, has been developed by first machine-translating the English XNLI corpus into Basque, followed by a manual post-edition step. We have conducted a series of experiments using mono- and multilingual LLMs to assess a) the effect of professional post-edition on the MT system; b) the best cross-lingual strategy for NLI in Basque; and c) whether the choice of the best cross-lingual strategy is influenced by the fact that the dataset is built by translation. The results show that post-edition is necessary and that the translate-train cross-lingual strategy obtains better results overall, although the gain is lower when tested in a dataset that has been built natively from scratch. Our code and datasets are publicly available under open licenses.</abstract>
       <url hash="64d8566e">2024.naacl-long.234</url>
@@ -3288,9 +3288,9 @@
       <author><first>Chani</first><last>Jung</last><affiliation>Korea Advanced Institute of Science &amp; Technology</affiliation></author>
       <author><first>Junho</first><last>Myung</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
       <author><first>Jiho</first><last>Jin</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
-      <author><first>Jose</first><last>Camacho-Collados</last><affiliation>Cardiff University</affiliation></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last><affiliation>Cardiff University</affiliation></author>
       <author><first>Juho</first><last>Kim</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
-      <author><first>Alice</first><last>Oh</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
       <pages>4205-4224</pages>
       <abstract>Most hate speech datasets neglect the cultural diversity within a single language, resulting in a critical shortcoming in hate speech detection. To address this, we introduce CREHate, a CRoss-cultural English Hate speech dataset. To construct CREHate, we follow a two-step procedure: 1) cultural post collection and 2) cross-cultural annotation. We sample posts from the SBIC dataset, which predominantly represents North America, and collect posts from four geographically diverse English-speaking countries (Australia, United Kingdom, Singapore, and South Africa) using culturally hateful keywords we retrieve from our survey. Annotations are collected from the four countries plus the United States to establish representative labels for each country. Our analysis highlights statistically significant disparities across countries in hate speech annotations. Only 56.2% of the posts in CREHate achieve consensus among all countries, with the highest pairwise label difference rate of 26%. Qualitative analysis shows that label disagreement occurs mostly due to different interpretations of sarcasm and the personal bias of annotators on divisive topics. Lastly, we evaluate large language models (LLMs) under a zero-shot setting and show that current LLMs tend to show higher accuracies on Anglosphere country labels in CREHate.Our dataset and codes are available at: https://github.com/nlee0212/CREHate</abstract>
       <url hash="16d696c2">2024.naacl-long.236</url>
@@ -3330,7 +3330,7 @@
       <author><first>Michaela</first><last>Watkins</last></author>
       <author><first>Afra</first><last>Alishahi</last><affiliation>Tilburg University</affiliation></author>
       <author><first>Arianna</first><last>Bisazza</last><affiliation>University of Groningen</affiliation></author>
-      <author><first>Grzegorz</first><last>Chrupała</last><affiliation>Tilburg University</affiliation></author>
+      <author id="grzegorz-chrupala"><first>Grzegorz</first><last>Chrupała</last><affiliation>Tilburg University</affiliation></author>
       <pages>4250-4261</pages>
       <abstract>Interpretability research has shown that self-supervised Spoken LanguageModels (SLMs) encode a wide variety of features in human speech from theacoustic, phonetic, phonological, syntactic and semantic levels, to speakercharacteristics. The bulk of prior research on representations of phonologyhas focused on segmental features such as phonemes; the encoding ofsuprasegmental phonology (such as tone and stress patterns) in SLMs is not yetwell understood. Tone is a suprasegmental feature that is present in more thanhalf of the world’s languages. This paper aims to analyze the tone encodingcapabilities of SLMs, using Mandarin and Vietnamese as case studies. We showthat SLMs encode lexical tone to a significant degree even when they aretrained on data from non-tonal languages. We further find that SLMs behavesimilarly to native and non-native human participants in tone and consonantperception studies, but they do not follow the same developmental trajectory.</abstract>
       <url hash="863ba613">2024.naacl-long.239</url>
@@ -3383,11 +3383,11 @@
       <author><first>Marek</first><last>Šuppa</last></author>
       <author><first>Hila</first><last>Gonen</last></author>
       <author><first>Joseph Marvin</first><last>Imperial</last></author>
-      <author><first>Börje F.</first><last>Karlsson</last></author>
+      <author id="borje-f-karlsson"><first>Börje F.</first><last>Karlsson</last></author>
       <author><first>Peiqin</first><last>Lin</last></author>
       <author><first>Nikola</first><last>Ljubešić</last></author>
       <author><first>LJ</first><last>Miranda</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <author><first>Arij</first><last>Riabi</last></author>
       <author><first>Yuval</first><last>Pinter</last></author>
       <pages>4322-4337</pages>
@@ -3468,11 +3468,11 @@
     </paper>
     <paper id="249">
       <title>Memory Augmented Language Models through Mixture of Word Experts</title>
-      <author><first>Cicero</first><last>Nogueira dos Santos</last><affiliation>Research, Google</affiliation></author>
+      <author id="cicero-dos-santos"><first>Cicero</first><last>Nogueira dos Santos</last><affiliation>Research, Google</affiliation></author>
       <author><first>James</first><last>Lee-Thorp</last><affiliation>Google</affiliation></author>
       <author><first>Isaac</first><last>Noble</last><affiliation>Google</affiliation></author>
       <author><first>Chung-Ching</first><last>Chang</last><affiliation>Google</affiliation></author>
-      <author><first>David</first><last>Uthus</last><affiliation>Google</affiliation></author>
+      <author id="david-c-uthus"><first>David</first><last>Uthus</last><affiliation>Google</affiliation></author>
       <pages>4425-4438</pages>
       <abstract>Scaling up the number of parameters of language models has proven to be an effective approach to improve performance. For dense models, increasing their size proportionally increases their computational footprint. In this work, we seek to aggressively decouple learning capacity and FLOPs through Mixture-of-Experts (MoE) style models with large knowledge-rich vocabulary based routing functions. Our proposed approach, dubbed Mixture of Word Experts (MoWE), can be seen as a memory augmented model, where a large set of word-specific experts play the role of a sparse memory. We demonstrate that MoWE performs significantly better than the T5 family of models with similar number of FLOPs in a variety of NLP tasks. Moreover, MoWE outperforms traditional MoE models on knowledge intensive tasks and has similar performance to complex memory augmented approaches that often require to invoke custom mechanisms to search the sparse memory.</abstract>
       <url hash="19c20427">2024.naacl-long.249</url>
@@ -3512,7 +3512,7 @@
       <author><first>Lijia</first><last>Sun</last><affiliation>Amazon</affiliation></author>
       <author><first>Yi</first><last>Zhang</last><affiliation>Amazon</affiliation></author>
       <author><first>Saab</first><last>Mansour</last><affiliation>Amazon</affiliation></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>4455-4480</pages>
       <abstract>Single document news summarization has seen substantial progress on faithfulness in recent years, driven by research on the evaluation of factual consistency, or hallucinations. We ask whether these advances carry over to other text summarization domains. We propose a new evaluation benchmark on topic-focused dialogue summarization, generated by LLMs of varying sizes. We provide binary sentence- level human annotations of the factual consistency of these summaries along with detailed explanations of factually inconsistent sentences. Our analysis shows that existing LLMs hallucinate significant amounts of factual errors in the dialogue domain, regardless of the model’s size. On the other hand, when LLMs, including GPT-4, serve as binary factual evaluators, they perform poorly and can be outperformed by prevailing state-of-the-art specialized factuality evaluation metrics. Finally, we conducted an analysis of hallucination types with a curated error taxonomy. We find that there are diverse errors and error distributions in model-generated summaries and that non-LLM based metrics can capture all error types better than LLM-based evaluators.</abstract>
       <url hash="9c78df03">2024.naacl-long.251</url>
@@ -3552,7 +3552,7 @@
       <author><first>Qiongkai</first><last>Xu</last><affiliation>Macquarie University</affiliation></author>
       <author><first>Xuanli</first><last>He</last><affiliation>University College London, University of London</affiliation></author>
       <author><first>Benjamin</first><last>Rubinstein</last><affiliation>The University of Melbourne and The University of Melbourne</affiliation></author>
-      <author><first>Trevor</first><last>Cohn</last><affiliation>Google and The University of Melbourne</affiliation></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last><affiliation>Google and The University of Melbourne</affiliation></author>
       <pages>4515-4534</pages>
       <abstract>While multilingual machine translation (MNMT) systems hold substantial promise, they also have security vulnerabilities. Our research highlights that MNMT systems can be susceptible to a particularly devious style of backdoor attack, whereby an attacker injects poisoned data into a low-resource language pair to cause malicious translations in other languages, including high-resource languages.Our experimental results reveal that injecting less than 0.01% poisoned data into a low-resource language pair can achieve an average 20% attack success rate in attacking high-resource language pairs. This type of attack is of particular concern, given the larger attack surface of languages inherent to low-resource settings. Our aim is to bring attention to these vulnerabilities within MNMT systems with the hope of encouraging the community to address security concerns in machine translation, especially in the context of low-resource languages.</abstract>
       <url hash="0f996671">2024.naacl-long.254</url>
@@ -3566,8 +3566,8 @@
       <author><first>Joseph Chee</first><last>Chang</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Maria</first><last>Antoniak</last></author>
       <author><first>Erin</first><last>Bransom</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
-      <author><first>Trevor</first><last>Cohen</last><affiliation>University of Washington</affiliation></author>
-      <author><first>Lucy</first><last>Wang</last><affiliation>University of Washington and Allen Institute for Artificial Intelligence</affiliation></author>
+      <author id="trevor-cohen"><first>Trevor</first><last>Cohen</last><affiliation>University of Washington</affiliation></author>
+      <author id="lucy-lu-wang"><first>Lucy</first><last>Wang</last><affiliation>University of Washington and Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Tal</first><last>August</last></author>
       <pages>4535-4550</pages>
       <abstract>Scientific jargon can confuse researchers when they read materials from other domains. Identifying and translating jargon for individual researchers could speed up research, but current methods of jargon identification mainly use corpus-level familiarity indicators rather than modeling researcher-specific needs, which can vary greatly based on each researcher’s background. We collect a dataset of over 10K term familiarity annotations from 11 computer science researchers for terms drawn from 100 paper abstracts. Analysis of this data reveals that jargon familiarity and information needs vary widely across annotators, even within the same sub-domain (e.g., NLP). We investigate features representing domain, subdomain, and individual knowledge to predict individual jargon familiarity. We compare supervised and prompt-based approaches, finding that prompt-based methods using information about the individual researcher (e.g., personal publications, self-defined subfield of research) yield the highest accuracy, though the task remains difficult and supervised approaches have lower false positive rates. This research offers insights into features and methods for the novel task of integrating personal data into scientific jargon identification.</abstract>
@@ -3631,7 +3631,7 @@
     <paper id="259">
       <title>Generating Attractive and Authentic Copywriting from Customer Reviews</title>
       <author><first>Yu-Xiang</first><last>Lin</last></author>
-      <author><first>Wei-Yun</first><last>Ma</last><affiliation>Academia Sinica</affiliation></author>
+      <author id="wei-yun-ma"><first>Wei-Yun</first><last>Ma</last><affiliation>Academia Sinica</affiliation></author>
       <pages>4629-4642</pages>
       <abstract>The goal of product copywriting is to capture the interest of potential buyers by emphasizing the features of products through text descriptions. As e-commerce platforms offer a wide range of services, it’s becoming essential to dynamically adjust the styles of these auto-generated descriptions. Typical approaches to copywriting generation often rely solely on specified product attributes, which may result in dull and repetitive content. To tackle this issue, we propose to generate copywriting based on customer reviews, as they provide firsthand practical experiences with products, offering a richer source of information than just product attributes. We have developed a sequence-to-sequence framework, enhanced with reinforcement learning, to produce copywriting that is attractive, authentic, and rich in information. Our framework outperforms all existing baseline and zero-shot large language models, including LLaMA-2-chat-7B and GPT-3.5, in terms of both attractiveness and faithfulness. Furthermore, this work features the use of LLMs for aspect-based summaries collection and argument allure assessment. Experiments demonstrate the effectiveness of using LLMs for marketing domain corpus construction. The code and the dataset is publicly available at: <url>https://github.com/YuXiangLin1234/Copywriting-Generation</url>.</abstract>
       <url hash="4bd138ca">2024.naacl-long.259</url>
@@ -3726,7 +3726,7 @@
       <author><first>Lifeng</first><last>Shang</last><affiliation>Huawei Technologies Ltd.</affiliation></author>
       <author><first>Xin</first><last>Jiang</last></author>
       <author><first>Qun</first><last>Liu</last><affiliation>Huawei Noah’s Ark Lab</affiliation></author>
-      <author><first>Kam-Fai</first><last>Wong</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <pages>4713-4730</pages>
       <abstract>Prior study shows that pre-training techniques can boost the performance of visual document understanding (VDU), which typically requires models to gain abilities to perceive and reason both document texts and layouts (e.g., locations of texts and table-cells). To this end, we propose visually guided generative text-layout pre-training, named ViTLP. Given a document image, the model optimizes hierarchical language and layout modeling objectives to generate the interleaved text and layout sequence. In addition, to address the limitation of processing long documents by Transformers, we introduce a straightforward yet effective multi-segment generative pre-training scheme, facilitating ViTLP to process word-intensive documents of any length. ViTLP can function as a native OCR model to localize and recognize texts of document images. Besides, ViTLP can be effectively applied to various downstream VDU tasks. Extensive experiments show that ViTLP achieves competitive performance over existing baselines on benchmark VDU tasks, including information extraction, document classification, and document question answering.</abstract>
       <url hash="1c2c992f">2024.naacl-long.264</url>
@@ -3819,7 +3819,7 @@
       <author><first>Tom</first><last>Calamai</last></author>
       <author><first>Pierre-Henri</first><last>Paris</last><affiliation>Télécom Paris</affiliation></author>
       <author><first>Chloé</first><last>Clavel</last><affiliation>INRIA and Télécom Paris</affiliation></author>
-      <author><first>Fabian</first><last>Suchanek</last><affiliation>Telecom Paris</affiliation></author>
+      <author id="fabian-suchanek"><first>Fabian</first><last>Suchanek</last><affiliation>Telecom Paris</affiliation></author>
       <pages>4810-4845</pages>
       <abstract>We introduce MAFALDA, a benchmark for fallacy classification that merges and unites previous fallacy datasets. It comes with a taxonomy that aligns, refines, and unifies existing classifications of fallacies. We further provide a manual annotation of a part of the dataset together with manual explanations for each annotation. We propose a new annotation scheme tailored for subjective NLP tasks, and a new evaluation method designed to handle subjectivity. We then evaluate several language models under a zero-shot learning setting and human performances on MAFALDA to assess their capability to detect and classify fallacies.</abstract>
       <url hash="3536cb4b">2024.naacl-long.270</url>
@@ -3831,7 +3831,7 @@
       <title>Diffusion Glancing Transformer for Parallel Sequence-to-Sequence Learning</title>
       <author><first>Lihua</first><last>Qian</last><affiliation>ByteDance</affiliation></author>
       <author><first>Mingxuan</first><last>Wang</last></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <author><first>Hao</first><last>Zhou</last></author>
       <pages>4846-4862</pages>
       <abstract>Previously, non-autoregressive models were widely recognized as being superior in generation efficiency but inferior in generation quality due to the challenges of modeling multiple target modalities.To enhance the multi-modality modeling ability, we propose the diffusion glancing transformer, which employs a modality diffusion process and residual glancing sampling.The modality diffusion process is a discrete process that interpolates the multi-modal distribution along the decoding steps, and the residual glancing sampling approach guides the model to continuously learn the remaining modalities across the layers. Experimental results on various machine translation and text generation benchmarks demonstrate that DIFFGLAT achieves better generation accuracy while maintaining fast decoding speed compared with both autoregressive and non-autoregressive models.</abstract>
@@ -4005,10 +4005,10 @@
     <paper id="284">
       <title>Revisiting subword tokenization: A case study on affixal negation in large language models</title>
       <author><first>Thinh</first><last>Truong</last><affiliation>University of Melbourne</affiliation></author>
-      <author><first>Yulia</first><last>Otmakhova</last><affiliation>The University of Melbourne</affiliation></author>
-      <author><first>Karin</first><last>Verspoor</last><affiliation>Royal Melbourne Institute of Technology</affiliation></author>
-      <author><first>Trevor</first><last>Cohn</last><affiliation>Google and The University of Melbourne</affiliation></author>
-      <author><first>Timothy</first><last>Baldwin</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and The University of Melbourne</affiliation></author>
+      <author id="julia-otmakhova"><first>Yulia</first><last>Otmakhova</last><affiliation>The University of Melbourne</affiliation></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last><affiliation>Royal Melbourne Institute of Technology</affiliation></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last><affiliation>Google and The University of Melbourne</affiliation></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and The University of Melbourne</affiliation></author>
       <pages>5082-5095</pages>
       <abstract>In this work, we measure the impact of affixal negation on modern English large language models (LLMs). In affixal negation, the negated meaning is expressed through a negative morpheme, which is potentially challenging for LLMs as their tokenizers are often not morphologically plausible. We conduct extensive experiments using LLMs with different subword tokenization methods, which lead to several insights on the interaction between tokenization performance and negation sensitivity. Despite some interesting mismatches between tokenization accuracy and negation detection performance, we show that models can, on the whole, reliably recognize the meaning of affixal negation.</abstract>
       <url hash="2527dd63">2024.naacl-long.284</url>
@@ -4195,7 +4195,7 @@
       <author><first>Raja</first><last>Marjieh</last><affiliation>Princeton University</affiliation></author>
       <author><first>Nanyun</first><last>Peng</last><affiliation>University of California, Los Angeles</affiliation></author>
       <author><first>Yejin</first><last>Choi</last><affiliation>Department of Computer Science, University of Washington</affiliation></author>
-      <author><first>Thomas</first><last>Griffiths</last><affiliation>Princeton University</affiliation></author>
+      <author id="thomas-l-griffiths"><first>Thomas</first><last>Griffiths</last><affiliation>Princeton University</affiliation></author>
       <author><first>Faeze</first><last>Brahman</last><affiliation>Allen Institute for AI</affiliation></author>
       <pages>5303-5324</pages>
       <abstract>We explore the creative problem-solving capabilities of modern LLMs in a novel constrained setting. To this end, we create MACGYVER, an automatically generated dataset consisting of over 1,600 real-world problems deliberately designed to trigger innovative usage of objects and necessitate out-of-the-box thinking. We then present our collection to both LLMs and humans to compare and contrast their problem-solving abilities. MACGYVER is challenging for both groups, but in unique and complementary ways. For instance, humans excel in tasks they are familiar with but struggle with domain-specific knowledge, leading to a higher variance. In contrast, LLMs, exposed to a variety of specialized knowledge, attempt broader problems but fail by proposing physically-infeasible actions. Finally, we provide a detailed error analysis of LLMs, and demonstrate the potential of enhancing their problem-solving ability with novel prompting techniques such as iterative step-wise reflection and divergent-convergent thinking.This work (1) introduces a fresh arena for intelligent agents focusing on intricate aspects of physical reasoning, planning, and unconventional thinking, which supplements the existing spectrum of machine intelligence; and (2) provides insight into the constrained problem-solving capabilities of both humans and AI.</abstract>
@@ -4222,7 +4222,7 @@
       <author><first>Fei</first><last>Mi</last></author>
       <author><first>Boyang</first><last>Xue</last></author>
       <author><first>Yi</first><last>Chen</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <author><first>Ruifeng</first><last>Xu</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <pages>5345-5363</pages>
       <abstract>Numerous works are proposed to align large language models (LLMs) with human intents to better fulfill instructions, ensuring they are trustful and helpful.Nevertheless, some human instructions are often malicious or misleading and following them will lead to untruthful and unsafe responses.Previous work rarely focused on understanding how LLMs manage instructions based on counterfactual premises, referred to here as inductive instructions, which may stem from users’ false beliefs or malicious intents.In this paper, we aim to reveal the behaviors of LLMs towards inductive instructions and enhance their truthfulness and helpfulness accordingly. Specifically, we first introduce a benchmark of Inductive Instructions (INDust), where the false knowledge is incorporated into instructions in multiple different styles. After extensive human and automatic evaluations, we uncovered a universal vulnerability among LLMs in processing inductive instructions.Additionally, we identified that different inductive styles affect the models’ ability to identify the same underlying errors,and the complexity of the underlying assumptions also influences the model’s performance.Motivated by these results, we propose Dual-critique prompting to improve LLM robustness against inductive instructions.Our experiments demonstrate that Dual-critique prompting significantly bolsters the robustness of a diverse array of LLMs, even when confronted with varying degrees of inductive instruction complexity and differing inductive styles.</abstract>
@@ -4295,7 +4295,7 @@
       <author><first>Jiaqi</first><last>Han</last><affiliation>Tencent Cloud</affiliation></author>
       <author><first>Gang</first><last>Yuan</last></author>
       <author><first>Binghuai</first><last>Lin</last><affiliation>Tencent</affiliation></author>
-      <author><first>Baobao</first><last>Chang</last><affiliation>Peking University</affiliation></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last><affiliation>Peking University</affiliation></author>
       <author><first>Yunbo</first><last>Cao</last><affiliation>Tencent</affiliation></author>
       <pages>5431-5452</pages>
       <abstract>In the constant updates of the product dialogue systems, we need to retrain the natural language understanding (NLU) model as new data from the real users would be merged into the existing data accumulated in the last updates. Within the newly added data, new intents would emerge and might have semantic entanglement with the existing intents, e.g. new intents that are semantically too specific or generic are actually a subset or superset of some existing intents in the semantic space, thus impairing the robustness of the NLU model.As the first attempt to solve this problem, we setup a new benchmark consisting of 4 Dialogue Version Control dataSets (DialogVCS). We formulate the intent detection with imperfect data in the system update as a multi-label classification task with positive but unlabeled intents, which asks the models to recognize all the proper intents, including the ones with semantic entanglement, in the inference.We also propose comprehensive baseline models and conduct in-depth analyses for the benchmark, showing that the semantically entangled intents can be effectively recognized with an automatic workflow. Our code and dataset are available at <url>https://github.com/Zefan-Cai/DialogVCS</url>.</abstract>
@@ -4328,7 +4328,7 @@
       <author><first>Juqianqian</first><last>Juqianqian</last></author>
       <author><first>Dejiyangla</first><last>Dejiyangla</last></author>
       <author><first>Yujia</first><last>Peng</last><affiliation>Peking University</affiliation></author>
-      <author><first>Kenny</first><last>Zhu</last><affiliation>University of Texas at Arlington</affiliation></author>
+      <author id="kenny-zhu"><first>Kenny</first><last>Zhu</last><affiliation>University of Texas at Arlington</affiliation></author>
       <author><first>Mengyue</first><last>Wu</last></author>
       <pages>5472-5487</pages>
       <abstract>Social media is a valuable data source for exploring mental health issues. However, previous studies have predominantly focused on the semantic content of these posts, overlooking the importance of their temporal attributes, as well as the evolving nature of mental disorders and symptoms.In this paper, we study the causality between psychiatric symptoms and life events, as well as among different symptoms from social media posts, which leads to better understanding of the underlying mechanisms of mental disorders. By applying these extracted causality features to tasks such as diagnosis point detection and early risk detection of depression, we notice considerable performance enhancement. This indicates that causality information extracted from social media data can boost the efficacy of mental disorder diagnosis and treatment planning.</abstract>
@@ -4429,7 +4429,7 @@
       <author><first>Huimin</first><last>Zeng</last></author>
       <author><first>Yimeng</first><last>Lu</last></author>
       <author><first>Lanyu</first><last>Shang</last></author>
-      <author id="yang-zhang"><first>Yang</first><last>Zhang</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
+      <author><first>Yang</first><last>Zhang</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
       <author><first>Dong</first><last>Wang</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
       <pages>5628-5643</pages>
       <abstract>The proliferation of online misinformation has posed significant threats to public interest. While numerous online users actively participate in the combat against misinformation, many of such responses can be characterized by the lack of politeness and supporting facts. As a solution, text generation approaches are proposed to automatically produce counter-misinformation responses. Nevertheless, existing methods are often trained end-to-end without leveraging external knowledge, resulting in subpar text quality and excessively repetitive responses. In this paper, we propose retrieval augmented response generation for online misinformation (RARG), which collects supporting evidence from scientific sources and generates counter-misinformation responses based on the evidences. In particular, our RARG consists of two stages: (1) evidence collection, where we design a retrieval pipeline to retrieve and rerank evidence documents using a database comprising over 1M academic articles; (2) response generation, in which we align large language models (LLMs) to generate evidence-based responses via reinforcement learning from human feedback (RLHF). We propose a reward function to maximize the utilization of the retrieved evidence while maintaining the quality of the generated text, which yields polite and factual responses that clearly refutes misinformation. To demonstrate the effectiveness of our method, we study the case of COVID-19 and perform extensive experiments with both in- and cross-domain datasets, where RARG consistently outperforms baselines by generating high-quality counter-misinformation responses.</abstract>
@@ -4472,7 +4472,7 @@
       <author><first>Moontae</first><last>Lee</last><affiliation>University of Illinois, Chicago</affiliation></author>
       <author><first>Honglak</first><last>Lee</last><affiliation>University of Michigan - Ann Arbor and LG AI Research</affiliation></author>
       <author><first>Soujanya</first><last>Poria</last><affiliation>Singapore University of Technology and Design</affiliation></author>
-      <author><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
       <pages>5668-5680</pages>
       <abstract>Large language models (LLMs) have demonstrated substantial commonsense understanding through numerous benchmark evaluations. However, their understanding of cultural commonsense remains largely unexamined. In this paper, we conduct a comprehensive examination of the capabilities and limitations of several state-of-the-art LLMs in the context of cultural commonsense tasks. Using several general and cultural commonsense benchmarks, we find that (1) LLMs have a significant discrepancy in performance when tested on culture-specific commonsense knowledge for different cultures; (2) LLMs’ general commonsense capability is affected by cultural context; and (3) The language used to query the LLMs can impact their performance on cultural-related tasks.Our study points to the inherent bias in the cultural understanding of LLMs and provides insights that can help develop culturally-aware language models.</abstract>
       <url hash="260bc798">2024.naacl-long.316</url>
@@ -4684,7 +4684,7 @@
       <author><first>Myra</first><last>Cheng</last><affiliation>Stanford University</affiliation></author>
       <author><first>Lucia</first><last>Zheng</last></author>
       <author><first>Esin</first><last>Durmus</last><affiliation>Stanford University</affiliation></author>
-      <author><first>Dan</first><last>Jurafsky</last><affiliation>Stanford University</affiliation></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last><affiliation>Stanford University</affiliation></author>
       <pages>5942-5959</pages>
       <abstract>The use of words to convey speaker’s intent is traditionally distinguished from the ‘mention’ of words for quoting what someone said, or pointing out properties of a word. Here we show that computationally modeling this use-mention distinction is crucial for dealing with counterspeech online. Counterspeech that refutes problematic content often mentions harmful language but is not harmful itself (e.g., calling a vaccine dangerous is not the same as expressing disapproval of someone for calling vaccines dangerous). We show that even recent language models fail at distinguishing use from mention, and that this failure propagates to two key downstream tasks: misinformation and hate speech detection, resulting in censorship of counterspeech. We introduce prompting mitigations that teach the use-mention distinction, and show they reduce these errors. Our work highlights the importance of the use-mention distinction for NLP and CSS and offers ways to address it.</abstract>
       <url hash="5c420d61">2024.naacl-long.331</url>
@@ -4723,7 +4723,7 @@
     <paper id="334">
       <title><fixed-case>A</fixed-case>fri<fixed-case>MTE</fixed-case> and <fixed-case>A</fixed-case>fri<fixed-case>COMET</fixed-case>: Enhancing <fixed-case>COMET</fixed-case> to Embrace Under-resourced <fixed-case>A</fixed-case>frican Languages</title>
       <author><first>Jiayi</first><last>Wang</last></author>
-      <author><first>David Ifeoluwa</first><last>Adelani</last></author>
+      <author id="david-ifeoluwa-adelani"><first>David Ifeoluwa</first><last>Adelani</last></author>
       <author><first>Sweta</first><last>Agrawal</last><affiliation>Instituto de Telecomunicações</affiliation></author>
       <author><first>Marek</first><last>Masiak</last></author>
       <author><first>Ricardo</first><last>Rei</last><affiliation>Instituto Superior Técnico, INESC-ID and Unbabel</affiliation></author>
@@ -4836,7 +4836,7 @@
       <author><first>Shujian</first><last>Huang</last><affiliation>Nanjing University</affiliation></author>
       <author><first>Xingyun</first><last>Wang</last></author>
       <author><first>Yanke</first><last>Zhou</last></author>
-      <author><first>Jiajun</first><last>Chen</last><affiliation>Nanjing University</affiliation></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last><affiliation>Nanjing University</affiliation></author>
       <pages>6087-6100</pages>
       <abstract>LLMs (Large Language Models) usually interact with users in the form of dialogue and generate responses following their instructions, which naturally require dialogue comprehension abilities. However, dialogue comprehension is a general language ability which is hard to be evaluated directly. In this work, we propose to perform the evaluation focusing on the factual consistency issue with the help of the dialogue summarization task. Besides evaluating and analyzing the dialogue summarization performance (DIAC-Sum) of different LLMs, we also derive factual questions from the generated summaries and use them as a more flexible measurement of dialogue comprehension (DIAC-FactQA). Our evaluation shows that, on average, 26.8% of the summaries generated by LLMs contain factual inconsistency. Even ChatGPT, the strongest model evaluated, has such errors in 16% of its summaries. For answering the factual questions, which is more challenging, the average error rate of all evaluated LLMs is 36.1%. Both results indicate serious deficiencies. Detailed analysis shows that the understanding of subject/object of the conversation is still challenging for LLMs. Furthermore, to stimulate and enhance the dialogue comprehension ability of LLMs, we propose a fine-tuning paradigm with auto-constructed multi-task data, which achieved a relative error rate reduction of 11% on DIAC-FactQA.</abstract>
       <url hash="6ba84193">2024.naacl-long.338</url>
@@ -4849,7 +4849,7 @@
       <author><first>Changjiang</first><last>Gao</last><affiliation>nanjing university</affiliation></author>
       <author><first>Hongda</first><last>Hu</last></author>
       <author><first>Peng</first><last>Hu</last><affiliation>nanjing university</affiliation></author>
-      <author><first>Jiajun</first><last>Chen</last><affiliation>Nanjing University</affiliation></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last><affiliation>Nanjing University</affiliation></author>
       <author><first>Jixing</first><last>Li</last><affiliation>City University of Hong Kong</affiliation></author>
       <author><first>Shujian</first><last>Huang</last><affiliation>Nanjing University</affiliation></author>
       <pages>6101-6117</pages>
@@ -4865,8 +4865,8 @@
       <author><first>YiFan</first><last>Zhang</last><affiliation>Institute of automation, Chinese academy of science</affiliation></author>
       <author><first>Yaodong</first><last>Yu</last><affiliation>Electrical Engineering &amp; Computer Science Department, University of California Berkeley</affiliation></author>
       <author><first>Dhruv</first><last>Madeka</last><affiliation>Amazon</affiliation></author>
-      <author><first>Dean</first><last>Foster</last></author>
-      <author><first>Eric</first><last>Xing</last><affiliation>Mohamed bin Zayed Univeristy of AI and School of Computer Science, Carnegie Mellon University</affiliation></author>
+      <author id="dean-foster"><first>Dean</first><last>Foster</last></author>
+      <author id="eric-xing"><first>Eric</first><last>Xing</last><affiliation>Mohamed bin Zayed Univeristy of AI and School of Computer Science, Carnegie Mellon University</affiliation></author>
       <author><first>Himabindu</first><last>Lakkaraju</last><affiliation>Harvard University</affiliation></author>
       <author><first>Sham</first><last>Kakade</last><affiliation>University of Washington and Harvard University</affiliation></author>
       <pages>6118-6136</pages>
@@ -4932,8 +4932,8 @@
       <author><first>Zhong</first><last>Meng</last><affiliation>Google</affiliation></author>
       <author><first>Dongseong</first><last>Hwang</last></author>
       <author><first>Qiujia</first><last>Li</last><affiliation>Google</affiliation></author>
-      <author><first>Khe Chai</first><last>Sim</last><affiliation>Google</affiliation></author>
-      <author id="bo-li"><first>Bo</first><last>Li</last><affiliation>Google</affiliation></author>
+      <author id="khe-chai-sim"><first>Khe Chai</first><last>Sim</last><affiliation>Google</affiliation></author>
+      <author><first>Bo</first><last>Li</last><affiliation>Google</affiliation></author>
       <author><first>James</first><last>Qin</last><affiliation>Google</affiliation></author>
       <author><first>Xingyu</first><last>Cai</last><affiliation>Google</affiliation></author>
       <author><first>Adam</first><last>Stooke</last></author>
@@ -4998,7 +4998,7 @@
       <author><first>Ashna</first><last>Khetan</last></author>
       <author><first>Matthias</first><last>Gerstgrasser</last></author>
       <author><first>Diyi</first><last>Yang</last><affiliation>Stanford University</affiliation></author>
-      <author><first>Dan</first><last>Jurafsky</last><affiliation>Stanford University</affiliation></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last><affiliation>Stanford University</affiliation></author>
       <pages>6279-6296</pages>
       <abstract>Effective conversation requires common ground: a shared understanding between the participants. Common ground, however, does not emerge spontaneously in conversation. Speakers and listeners work together to both identify and construct a shared basis while avoiding misunderstanding. To accomplish grounding, humans rely on a range of dialogue acts, like clarification (What do you mean?) and acknowledgment (I understand.). However, it is unclear whether large language models (LLMs) generate text that reflects human grounding. To this end, we curate a set of grounding acts and propose corresponding metrics that quantify attempted grounding. We study whether LLM generations contain grounding acts, simulating turn-taking from several dialogue datasets and comparing results to humans. We find that—compared to humans—LLMs generate language with less conversational grounding, instead generating text that appears to simply presume common ground. To understand the roots of the identified grounding gap, we examine the role of instruction tuning and preference optimization, finding that training on contemporary preference data leads to a reduction in generated grounding acts. Altogether, we highlight the need for more research investigating conversational grounding in human-AI interaction.</abstract>
       <url hash="e2f3f9c6">2024.naacl-long.348</url>
@@ -5129,7 +5129,7 @@
       <author><first>Jiahuan</first><last>Li</last></author>
       <author><first>Shanbo</first><last>Cheng</last><affiliation>ByteDance Inc.</affiliation></author>
       <author><first>Shujian</first><last>Huang</last><affiliation>Nanjing University</affiliation></author>
-      <author><first>Jiajun</first><last>Chen</last><affiliation>Nanjing University</affiliation></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last><affiliation>Nanjing University</affiliation></author>
       <pages>6445-6459</pages>
       <abstract>Large Language Models (LLM) have demonstrated their strong ability in the field of machine translation, yet they suffer from high computational cost and latency. Therefore, transferring translation knowledge from giant LLMs to medium-sized machine translation models is a promising research direction. However, traditional knowledge distillation methods ignore the capability of student and teacher models, therefore repeatedly teaching student models on the knowledge they have learned, and failing to extend to novel contexts and knowledge. In this paper, we propose a framework called MT-Patcher, which transfers knowledge from LLMs to existing MT models in a selective, comprehensive and proactive manner. Considering the current translation ability of student MT models, we only identify and correct their translation errors, instead of distilling the whole translation from the teacher. Leveraging the strong language abilities of LLMs, we instruct LLM teachers to synthesize diverse contexts and anticipate more potential errors for the student. Experiment results on translating both specific language phenomena and general MT benchmarks demonstrate that finetuning the MT model on about 10% examples can achieve comparable results to the traditional knowledge distillation method, and synthesized potential errors and diverse contexts further improve MT performances on unseen contexts and words.</abstract>
       <url hash="a4ae9442">2024.naacl-long.358</url>
@@ -5140,7 +5140,7 @@
     <paper id="359">
       <title><fixed-case>T</fixed-case>o<fixed-case>XCL</fixed-case>: A Unified Framework for Toxic Speech Detection and Explanation</title>
       <author><first>Nhat M.</first><last>Hoang</last></author>
-      <author><first>Xuan Long</first><last>Do</last></author>
+      <author id="xuan-long-do"><first>Xuan Long</first><last>Do</last></author>
       <author><first>Duc Anh</first><last>Do</last></author>
       <author><first>Duc Anh</first><last>Vu</last></author>
       <author><first>Luu</first><last>Anh Tuan</last></author>
@@ -5195,7 +5195,7 @@
       <author><first>Omar</first><last>Attia</last><affiliation>Apple</affiliation></author>
       <author><first>Ronak</first><last>Pradeep</last></author>
       <author><first>Saloni</first><last>Potdar</last><affiliation>Apple</affiliation></author>
-      <author><first>Alexander</first><last>Rush</last><affiliation>Cornell University and School of Engineering and Applied Sciences, Harvard University</affiliation></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last><affiliation>Cornell University and School of Engineering and Applied Sciences, Harvard University</affiliation></author>
       <author><first>Umar Farooq</first><last>Minhas</last></author>
       <author><first>Yunyao</first><last>Li</last><affiliation>Adobe Systems</affiliation></author>
       <pages>6524-6536</pages>
@@ -5220,7 +5220,7 @@
     <paper id="365">
       <title><fixed-case>GPTS</fixed-case>core: Evaluate as You Desire</title>
       <author><first>Jinlan</first><last>Fu</last></author>
-      <author><first>See-Kiong</first><last>Ng</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="see-kiong-ng"><first>See-Kiong</first><last>Ng</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Zhengbao</first><last>Jiang</last><affiliation>School of Computer Science, Carnegie Mellon University</affiliation></author>
       <author><first>Pengfei</first><last>Liu</last></author>
       <pages>6556-6576</pages>
@@ -5235,7 +5235,7 @@
       <author><first>Fengyu</first><last>Cai</last><affiliation>Technische Universität Darmstadt</affiliation></author>
       <author><first>Yuxia</first><last>Wang</last></author>
       <author><first>Heinz</first><last>Koeppl</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Iryna</first><last>Gurevych</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and Technical University of Darmstadt</affiliation></author>
       <pages>6577-6595</pages>
       <abstract>Large language models (LLMs) have demonstrated remarkable capabilities across a wide range of tasks in various domains. Despite their impressive performance, they can be unreliable due to factual errors in their generations. Assessing their confidence and calibrating them across different tasks can help mitigate risks and enable LLMs to produce better generations. There has been a lot of recent research aiming to address this, but there has been no comprehensive overview to organize it and to outline the main lessons learned. The present survey aims to bridge this gap. In particular, we outline the challenges and we summarize recent technical advancements for LLM confidence estimation and calibration. We further discuss their applications and suggest promising directions for future work.</abstract>
@@ -5251,7 +5251,7 @@
       <author><first>Yuchen</first><last>Jiang</last><affiliation>AIWaves Inc.</affiliation></author>
       <author><first>Haoyang</first><last>Huang</last><affiliation>Microsoft Research Asia</affiliation></author>
       <author><first>Dongdong</first><last>Zhang</last><affiliation>Microsoft Research Asia</affiliation></author>
-      <author><first>Xin</first><last>Zhao</last><affiliation>Renmin University of China</affiliation></author>
+      <author id="wayne-xin-zhao"><first>Xin</first><last>Zhao</last><affiliation>Renmin University of China</affiliation></author>
       <author><first>Tom</first><last>Kocmi</last><affiliation>Microsoft</affiliation></author>
       <author><first>Furu</first><last>Wei</last><affiliation>Microsoft Research</affiliation></author>
       <pages>6596-6610</pages>
@@ -5313,7 +5313,7 @@
       <author><first>Tommaso</first><last>Green</last></author>
       <author><first>Ines</first><last>Reinig</last></author>
       <author><first>Kai</first><last>Eckert</last><affiliation>Mannheim University of Applied Sciences</affiliation></author>
-      <author><first>Simone</first><last>Ponzetto</last><affiliation>University of Mannheim</affiliation></author>
+      <author id="simone-paolo-ponzetto"><first>Simone</first><last>Ponzetto</last><affiliation>University of Mannheim</affiliation></author>
       <pages>6660-6675</pages>
       <abstract>Extensive efforts in the past have been directed toward the development of summarization datasets. However, a predominant number of these resources have been (semi)-automatically generated, typically through web data crawling. This resulted in subpar resources for training and evaluating summarization systems, a quality compromise that is arguably due to the substantial costs associated with generating ground-truth summaries, particularly for diverse languages and specialized domains. To address this issue, we present ACLSum, a novel summarization dataset carefully crafted and evaluated by domain experts. In contrast to previous datasets, ACLSum facilitates multi-aspect summarization of scientific papers, covering challenges, approaches, and outcomes in depth. Through extensive experiments, we evaluate the quality of our resource and the performance of models based on pretrained language models (PLMs) and state-of-the-art large language models (LLMs). Additionally, we explore the effectiveness of extract-then-abstract versus abstractive end-to-end summarization within the scholarly domain on the basis of automatically discovered aspects. While the former performs comparably well to the end-to-end approach with pretrained language models regardless of the potential error propagation issue, the prompting-based approach with LLMs shows a limitation in extracting sentences from source documents.</abstract>
       <url hash="2e635470">2024.naacl-long.371</url>
@@ -5514,7 +5514,7 @@
       <author><first>Sabur</first><last>Butt</last></author>
       <author><first>Olga</first><last>Kolesnikova</last><affiliation>Instituto Politécnico Nacional</affiliation></author>
       <author><first>Hector</first><last>Ceballos</last><affiliation>Tecnologico de Monterrey</affiliation></author>
-      <author><first>Alexander</first><last>Gelbukh</last><affiliation>Instituto Politécnico Nacional</affiliation></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last><affiliation>Instituto Politécnico Nacional</affiliation></author>
       <author><first>Thamar</first><last>Solorio</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and University of Houston</affiliation></author>
       <pages>6972-6987</pages>
       <abstract>The paper focuses on the marginalization of indigenous language communities in the face of rapid technological advancements. We highlight the cultural richness of these languages and the risk they face of being overlooked in the realm of Natural Language Processing (NLP). We aim to bridge the gap between these communities and researchers, emphasizing the need for inclusive technological advancements that respect indigenous community perspectives. We show the NLP progress of indigenous Latin American languages and the survey that covers the status of indigenous languages in Latin America, their representation in NLP, and the challenges and innovations required for their preservation and development. The paper contributes to the current literature in understanding the need and progress of NLP for indigenous communities of Latin America, specifically low-resource and indigenous communities in general.</abstract>
@@ -5565,7 +5565,7 @@
       <author><first>Jinheon</first><last>Baek</last><affiliation>Korea Advanced Institute of Science &amp; Technology</affiliation></author>
       <author><first>Sukmin</first><last>Cho</last></author>
       <author><first>Sung Ju</first><last>Hwang</last><affiliation>Korea Advanced Institute of Science and Technology and AITRICS</affiliation></author>
-      <author><first>Jong</first><last>Park</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
+      <author id="jong-c-park"><first>Jong</first><last>Park</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
       <pages>7036-7050</pages>
       <abstract>Retrieval-Augmented Large Language Models (LLMs), which incorporate the non-parametric knowledge from external knowledge bases into LLMs, have emerged as a promising approach to enhancing response accuracy in several tasks, such as Question-Answering (QA). However, even though there are various approaches dealing with queries of different complexities, they either handle simple queries with unnecessary computational overhead or fail to adequately address complex multi-step queries; yet, not all user requests fall into only one of the simple or complex categories. In this work, we propose a novel adaptive QA framework that can dynamically select the most suitable strategy for (retrieval-augmented) LLMs from the simplest to the most sophisticated ones based on the query complexity. Also, this selection process is operationalized with a classifier, which is a smaller LM trained to predict the complexity level of incoming queries with automatically collected labels, obtained from actual predicted outcomes of models and inherent inductive biases in datasets. This approach offers a balanced strategy, seamlessly adapting between the iterative and single-step retrieval-augmented LLMs, as well as the no-retrieval methods, in response to a range of query complexities. We validate our model on a set of open-domain QA datasets, covering multiple query complexities, and show that ours enhances the overall efficiency and accuracy of QA systems, compared to relevant baselines including the adaptive retrieval approaches. Code is available at: https://github.com/starsuzi/Adaptive-RAG.</abstract>
       <url hash="9ce9621f">2024.naacl-long.389</url>
@@ -5577,7 +5577,7 @@
       <title>Knowing What <fixed-case>LLM</fixed-case>s <fixed-case>DO</fixed-case> <fixed-case>NOT</fixed-case> Know: A Simple Yet Effective Self-Detection Method</title>
       <author><first>Yukun</first><last>Zhao</last></author>
       <author><first>Lingyong</first><last>Yan</last><affiliation>Baidu Inc.</affiliation></author>
-      <author><first>Weiwei</first><last>Sun</last></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last></author>
       <author><first>Guoliang</first><last>Xing</last></author>
       <author><first>Chong</first><last>Meng</last><affiliation>Baidu</affiliation></author>
       <author><first>Shuaiqiang</first><last>Wang</last><affiliation>Baidu Inc.</affiliation></author>
@@ -5598,7 +5598,7 @@
       <author><first>Yftah</first><last>Ziser</last><affiliation>University of Edinburgh</affiliation></author>
       <author><first>Anna</first><last>Korhonen</last><affiliation>University of Cambridge</affiliation></author>
       <author><first>Edoardo</first><last>Ponti</last><affiliation>University of Edinburgh</affiliation></author>
-      <author><first>Shay</first><last>Cohen</last><affiliation>University of Edinburgh</affiliation></author>
+      <author id="shay-b-cohen"><first>Shay</first><last>Cohen</last><affiliation>University of Edinburgh</affiliation></author>
       <pages>7064-7083</pages>
       <abstract>Are Large Language Models (LLMs) temporally grounded? Since LLMs cannot perceive and interact with the environment, it is impossible to answer this question directly. Instead, we provide LLMs with textual narratives and probe them with respect to their common-sense knowledge of the structure and duration of events, their ability to order events along a timeline, and self-consistency within their temporal model (e.g., temporal relations such as after and before are mutually exclusive for any pair of events). We evaluate state-of-the-art LLMs (such as LLaMA 2 and GPT-4) on three tasks reflecting these abilities. Generally, we find that LLMs lag significantly behind both human performance as well as small-scale, specialised LMs. In-context learning, instruction tuning, and chain-of-thought prompting reduce this gap only to a limited degree. Crucially, LLMs struggle the most with self-consistency, displaying incoherent behaviour in at least 27.23% of their predictions. Contrary to expectations, we also find that scaling the model size does not guarantee positive gains in performance. To explain these results, we study the sources from which LLMs may gather temporal information: we find that sentence ordering in unlabelled texts, available during pre-training, is only weakly correlated with event ordering. Moreover, public instruction tuning mixtures contain few temporal tasks. Hence, we conclude that current LLMs lack a consistent temporal model of textual narratives.</abstract>
       <url hash="745d0ff6">2024.naacl-long.391</url>
@@ -5614,7 +5614,7 @@
       <author><first>Zhiyang</first><last>Zhang</last></author>
       <author><first>Yang</first><last>Zhao</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
       <author><first>Lu</first><last>Xiang</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
-      <author><first>Chengqing</first><last>Zong</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
       <author><first>Yu</first><last>Zhou</last><affiliation>Institute of Automation, Chinese Academy of Sciences</affiliation></author>
       <pages>7084-7095</pages>
       <abstract>Text image machine translation (TIMT) is a task that translates source texts embedded in the image to target translations. The existing TIMT task mainly focuses on text-line-level images. In this paper, we extend the current TIMT task and propose a novel task, **D**ocument **I**mage **M**achine **T**ranslation to **Markdown** (**DIMT2Markdown**), which aims to translate a source document image with long context and complex layout structure to markdown-formatted target translation.We also introduce a novel framework, **D**ocument **I**mage **M**achine **T**ranslation with **D**ynamic multi-pre-trained models **A**ssembling (**DIMTDA**).A dynamic model assembler is used to integrate multiple pre-trained models to enhance the model’s understanding of layout and translation capabilities.Moreover, we build a novel large-scale **Do**cument image machine **T**ranslation dataset of **A**rXiv articles in markdown format (**DoTA**), containing 126K image-translation pairs.Extensive experiments demonstrate the feasibility of end-to-end translation of rich-text document images and the effectiveness of DIMTDA.</abstract>
@@ -5695,7 +5695,7 @@
       <title><fixed-case>F</fixed-case>-<fixed-case>MALLOC</fixed-case>: Feed-forward Memory Allocation for Continual Learning in Neural Machine Translation</title>
       <author><first>Junhong</first><last>Wu</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
       <author><first>Yuchen</first><last>Liu</last></author>
-      <author><first>Chengqing</first><last>Zong</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
       <pages>7180-7192</pages>
       <abstract>In the evolving landscape of Neural Machine Translation (NMT), the pretrain-then-finetune paradigm has yielded impressive results. However, the persistent challenge of Catastrophic Forgetting (CF) remains a hurdle. While previous work has introduced Continual Learning (CL) methods to address CF, these approaches grapple with the delicate balance between avoiding forgetting and maintaining system extensibility. To address this, we propose a CL method, named <tex-math>\textbf{F-MALLOC}</tex-math> (<tex-math>\textbf{F}</tex-math>eed-forward <tex-math>\textbf{M}</tex-math>emory <tex-math>\textbf{ALLOC}</tex-math>ation). F-MALLOC is inspired by recent insights highlighting that feed-forward layers emulate neural memories and encapsulate crucial translation knowledge. It decomposes feed-forward layers into discrete memory cells and allocates these memories to different tasks. By learning to allocate and safeguard these memories, our method effectively alleviates CF while ensuring robust extendability. Besides, we propose a comprehensive assessment protocol for multi-stage CL of NMT systems. Experiments conducted following this new protocol showcase the superior performance of F-MALLOC, evidenced by higher BLEU scores and almost zero forgetting.</abstract>
       <url hash="a150ad14">2024.naacl-long.398</url>
@@ -5711,7 +5711,7 @@
       <author><first>Andrea</first><last>Young</last><affiliation>Brigham and Women’s Hospital, Harvard University</affiliation></author>
       <author><first>Geoffrey</first><last>Young</last><affiliation>Harvard Medical School</affiliation></author>
       <author><first>Jan-Willem</first><last>van de Meent</last><affiliation>University of Amsterdam</affiliation></author>
-      <author><first>Byron</first><last>Wallace</last><affiliation>Northeastern University, Brown University and Northeastern University</affiliation></author>
+      <author id="byron-c-wallace"><first>Byron</first><last>Wallace</last><affiliation>Northeastern University, Brown University and Northeastern University</affiliation></author>
       <pages>7193-7210</pages>
       <abstract>Many diagnostic errors occur because clinicians cannot easily access relevant information in patient Electronic Health Records (EHRs). In this work we propose a method to use LLMs to identify pieces of evidence in patient EHR data that indicate increased or decreased risk of specific diagnoses; our ultimate aim is to increase access to evidence and reduce diagnostic errors. In particular, we propose a Neural Additive Model to make predictions backed by evidence with individualized risk estimates at time-points where clinicians are still uncertain, aiming to specifically mitigate delays in diagnosis and errors stemming from an incomplete differential. To train such a model, it is necessary to infer temporally fine-grained retrospective labels of eventual “true” diagnoses. We do so with LLMs, to ensure that the input text is from before a confident diagnosis can be made. We use an LLM to retrieve an initial pool of evidence, but then refine this set of evidence according to correlations learned by the model. We conduct an in-depth evaluation of the usefulness of our approach by simulating how it might be used by a clinician to decide between a pre-defined list of differential diagnoses.</abstract>
       <url hash="bd6f0f8c">2024.naacl-long.399</url>
@@ -5858,7 +5858,7 @@
       <author><first>Manjunath</first><last>Hegde</last></author>
       <author><first>Koustuv</first><last>Dasgupta</last></author>
       <author><first>Niloy</first><last>Ganguly</last><affiliation>Indian Institute of Technology Kharagpur,</affiliation></author>
-      <author id="saptarshi-ghosh"><first>Saptarshi</first><last>Ghosh</last><affiliation>Indian Institute of Technology Kharagpur</affiliation></author>
+      <author><first>Saptarshi</first><last>Ghosh</last><affiliation>Indian Institute of Technology Kharagpur</affiliation></author>
       <author><first>Pawan</first><last>Goyal</last><affiliation>IIT Kharagpur</affiliation></author>
       <pages>7391-7403</pages>
       <abstract>We study the problem of automatically annotating relevant numerals (GAAP metrics) occurring in the financial documents with their corresponding XBRL tags. Different from prior works, we investigate the feasibility of solving this extreme classification problem using a generative paradigm through instruction tuning of Large Language Models (LLMs). To this end, we leverage metric metadata informationto frame our target outputs while proposing a parameter efficient solution for the task using LoRA. We perform experiments on two recently released financial numeric labeling datasets. Our proposed model, **FLAN-FinXC**, achieves new state-of-the-art performances on both the datasets, outperforming several strong baselines. We explain the better scores of our proposed model by demonstrating its capability for zero-shot as well as the least frequently occurring tags. Also, even when we fail to predict the XBRL tags correctly, our generated output has substantial overlap with the ground-truth in majority of the cases.</abstract>
@@ -5942,7 +5942,7 @@
       <title><fixed-case>L</fixed-case>ean<fixed-case>R</fixed-case>easoner: Boosting Complex Logical Reasoning with Lean</title>
       <author><first>Dongwei</first><last>Jiang</last></author>
       <author><first>Marcio</first><last>Fonseca</last></author>
-      <author><first>Shay</first><last>Cohen</last><affiliation>University of Edinburgh</affiliation></author>
+      <author id="shay-b-cohen"><first>Shay</first><last>Cohen</last><affiliation>University of Edinburgh</affiliation></author>
       <pages>7497-7510</pages>
       <abstract>Large language models (LLMs) often struggle with complex logical reasoning due to logical inconsistencies and the inherent difficulty ofsuch reasoning. We use Lean, a theorem proving framework, to address these challenges. By formalizing logical reasoning problems intotheorems within Lean, we can solve them by proving or disproving the corresponding theorems. This method reduces the risk of logical inconsistencies with the help of Lean’s symbolic solver. It also enhances our ability to treat complex reasoning tasks using Lean’s extensive library of theorem proofs. Our method achieves state-of-the-art performance on the FOLIO dataset and achieves performance near this level on ProofWriter. Notably, these results were accomplished by fine-tuning on fewer than 100 in-domain samples for each dataset</abstract>
       <url hash="c2040acd">2024.naacl-long.416</url>
@@ -5955,7 +5955,7 @@
       <author><first>Eldon</first><last>Schoop</last><affiliation>Apple</affiliation></author>
       <author><first>Alan</first><last>Leung</last><affiliation>Apple</affiliation></author>
       <author><first>Titus</first><last>Barik</last><affiliation>Apple</affiliation></author>
-      <author><first>Jeffrey</first><last>Bigham</last><affiliation>Apple</affiliation></author>
+      <author id="jeffrey-p-bigham"><first>Jeffrey</first><last>Bigham</last><affiliation>Apple</affiliation></author>
       <author><first>Jeffrey</first><last>Nichols</last><affiliation>Apple</affiliation></author>
       <pages>7511-7525</pages>
       <abstract>Many large language models (LLMs) struggle to consistently generate UI code that compiles and produces visually relevant designs. Existing approaches to improve generation rely either on expensive human feedback or distilling a proprietary model. In this paper, we explore the use of automated feedback (compilers and multi-modal models) to guide LLMs to generate high-quality UI code. Our method starts with an existing LLM and iteratively produces improved models by self-generating a large synthetic dataset using an original model, applying automated tools to aggressively filter, score, and de-duplicate the data into a refined higher quality dataset, and producing a new LLM by finetuning the original on the refined dataset.We applied our approach to several open-source LLMs and compared the resulting performance to baseline models with both automated metrics and human preferences.Our results show the resulting models outperform all other downloadable baselines and approach the performance of larger proprietary models.</abstract>
@@ -6033,7 +6033,7 @@
     </paper>
     <paper id="423">
       <title><fixed-case>PELMS</fixed-case>: Pre-training for Effective Low-Shot Multi-Document Summarization</title>
-      <author><first>Joseph</first><last>Peper</last><affiliation>University of Michigan - Ann Arbor</affiliation></author>
+      <author id="joseph-j-peper"><first>Joseph</first><last>Peper</last><affiliation>University of Michigan - Ann Arbor</affiliation></author>
       <author><first>Wenzhao</first><last>Qiu</last></author>
       <author><first>Lu</first><last>Wang</last><affiliation>Northeastern University, Northeastern University and University of Michigan</affiliation></author>
       <pages>7652-7674</pages>
@@ -6304,8 +6304,8 @@
       <author><first>Brian</first><last>Formento</last><affiliation>national university of singaore, National University of Singapore</affiliation></author>
       <author><first>Wenjie</first><last>Feng</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Chuan-Sheng</first><last>Foo</last><affiliation>Centre for Frontier AI Research, A*STAR and Institute for Infocomm Research, A*STAR</affiliation></author>
-      <author><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
-      <author><first>See-Kiong</first><last>Ng</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
+      <author id="see-kiong-ng"><first>See-Kiong</first><last>Ng</last><affiliation>National University of Singapore</affiliation></author>
       <pages>8005-8028</pages>
       <abstract>Language models (LMs) are indispensable tools for natural language processing tasks, but their vulnerability to adversarial attacks remains a concern. While current research has explored adversarial training techniques, their improvements to defend against word-level attacks have been limited. In this work, we propose a novel approach called Semantic Robust Defence (SemRoDe), a Macro Adversarial Training strategy to enhance the robustness of LMs. Drawing inspiration from recent studies in the image domain, we investigate and later confirm that in a discrete data setting such as language, adversarial samples generated via word substitutions do indeed belong to an adversarial domain exhibiting a high Wasserstein distance from the base domain. Our method learns a robust representation that bridges these two domains. We hypothesize that if samples were not projected into an adversarial domain, but instead to a domain with minimal shift, it would improve attack robustness. We align the domains by incorporating a new distance-based objective. With this, our model is able to learn more generalized representations by aligning the model’s high-level output features and therefore better handling unseen adversarial samples. This method can be generalized across word embeddings, even when they share minimal overlap at both vocabulary and word-substitution levels. To evaluate the effectiveness of our approach, we conduct experiments on BERT and RoBERTa models on three datasets. The results demonstrate promising state-of-the-art robustness.</abstract>
       <url hash="91a0022d">2024.naacl-long.443</url>
@@ -6332,7 +6332,7 @@
       <author><first>Chong</first><last>Li</last><affiliation>Institute of automation, Chinese Academy of Sciences</affiliation></author>
       <author><first>Shaonan</first><last>Wang</last></author>
       <author><first>Jiajun</first><last>Zhang</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
-      <author><first>Chengqing</first><last>Zong</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
       <pages>8058-8076</pages>
       <abstract>Multilingual generative models obtain remarkable cross-lingual in-context learning capabilities through pre-training on large-scale corpora. However, they still exhibit a performance bias toward high-resource languages and learn isolated distributions of multilingual sentence representations, which may hinder knowledge transfer across languages. To bridge this gap, we propose a simple yet effective cross-lingual alignment framework exploiting pairs of translation sentences. It aligns the internal sentence representations across different languages via multilingual contrastive learning and aligns outputs by following cross-lingual instructions in the target language. Experimental results show that even with less than 0.1<tex-math>{\textperthousand}</tex-math> of pre-training tokens, our alignment framework significantly boosts the cross-lingual abilities of generative language models and mitigates the performance gap. Further analyses reveal that it results in a better internal multilingual representation distribution of multilingual models.</abstract>
       <url hash="1e5bca34">2024.naacl-long.445</url>
@@ -6593,8 +6593,8 @@
       <author><first>Minjoon</first><last>Seo</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
       <author><first>Richard</first><last>James</last><affiliation>Research, Facebook</affiliation></author>
       <author><first>Mike</first><last>Lewis</last><affiliation>Facebook AI Research</affiliation></author>
-      <author><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington, Facebook and Meta</affiliation></author>
-      <author><first>Wen-tau</first><last>Yih</last><affiliation>Meta Platforms, Inc.</affiliation></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington, Facebook and Meta</affiliation></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last><affiliation>Meta Platforms, Inc.</affiliation></author>
       <pages>8371-8384</pages>
       <abstract>We introduce REPLUG, a retrieval-augmented language modeling framework that treats the language model (LM) as a black box and augments it with a tuneable retrieval model. Unlike prior retrieval-augmented LMs that train language models with special cross-attention mechanisms to encode the retrieved text, REPLUG simply prepends retrieved documents to the input for the frozen black-box LM. This simple design can be easily applied to any existing language models. Furthermore, we show that the LM can be used to supervise the retrieval model, which can then find documents that help the LM make better predictions. Our experiments demonstrate that REPLUG with the tuned retriever significantly improves the performance of GPT-3 (175B) on language modeling by 6.3%, as well as the performance of Codex on five-shot MMLU by 5.1%. Code is publicly released at github.com/swj0419/REPLUG.</abstract>
       <url hash="2563ca5b">2024.naacl-long.463</url>
@@ -6781,7 +6781,7 @@
     <paper id="477">
       <title>Large Human Language Models: A Need and the Challenges</title>
       <author><first>Nikita</first><last>Soni</last></author>
-      <author><first>H. Andrew</first><last>Schwartz</last><affiliation>Stony Brook University (SUNY)</affiliation></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last><affiliation>Stony Brook University (SUNY)</affiliation></author>
       <author><first>João</first><last>Sedoc</last><affiliation>New York University</affiliation></author>
       <author><first>Niranjan</first><last>Balasubramanian</last><affiliation>State University of New York, Stony Brook</affiliation></author>
       <pages>8631-8646</pages>
@@ -6797,9 +6797,9 @@
       <author><first>Kejian</first><last>Shi</last></author>
       <author><first>Katherine</first><last>He</last></author>
       <author><first>Longtian</first><last>Ye</last></author>
-      <author><first>Alexander</first><last>Fabbri</last><affiliation>SalesForce.com</affiliation></author>
+      <author id="alexander-richard-fabbri"><first>Alexander</first><last>Fabbri</last><affiliation>SalesForce.com</affiliation></author>
       <author><first>Pengfei</first><last>Liu</last></author>
-      <author><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
       <author><first>Arman</first><last>Cohan</last><affiliation>Yale University and Allen Institute for Artificial Intelligence</affiliation></author>
       <pages>8647-8664</pages>
       <abstract>Recent studies have found that summaries generated by large language models (LLMs) are favored by human annotators over the original reference summaries in commonly used summarization datasets. Therefore, we study an LLM-as-reference learning setting for smaller text summarization models to investigate whether their performance can be substantially improved. To this end, we use LLMs as both oracle summary generators for standard supervised fine-tuning and oracle summary evaluators for efficient contrastive learning that leverages the LLMs’ supervision signals. We conduct comprehensive experiments with source news articles and find that (1) summarization models trained under the LLM-as-reference setting achieve significant performance improvement in both LLM and human evaluations; (2) contrastive learning outperforms standard supervised fine-tuning under both low and high resource settings. Our experimental results also enable a meta-analysis of LLMs’ summary evaluation capacities under a challenging setting, showing that LLMs are not well-aligned with human evaluators. Particularly, our expert human evaluation reveals remaining nuanced performance gaps between LLMs and our fine-tuned models, which LLMs fail to capture. Thus, we call for further studies into both the potential and challenges of using LLMs in summarization model development.</abstract>
@@ -6889,7 +6889,7 @@
     <paper id="485">
       <title>Mix-Initiative Response Generation with Dynamic Prefix Tuning</title>
       <author><first>Yuxiang</first><last>Nie</last><affiliation>Hong Kong University of Science and Technology</affiliation></author>
-      <author><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <author><first>Xian-Ling</first><last>Mao</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <author><first>Lizi</first><last>Liao</last><affiliation>Singapore Management University</affiliation></author>
       <pages>8748-8761</pages>
@@ -6921,7 +6921,7 @@
       <author><first>Tanu</first><last>Goyal</last></author>
       <author><first>Narjis</first><last>Asad</last></author>
       <author><first>Aparna</first><last>Garimella</last><affiliation>Adobe Research</affiliation></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
       <pages>8786-8806</pages>
       <abstract>The pervasive influence of social biases in language data has sparked the need for benchmark datasets that capture and evaluate these biases in Large Language Models (LLMs). Existing efforts predominantly focus on English language and the Western context, leaving a void for a reliable dataset that encapsulates India’s unique socio-cultural nuances. To bridge this gap, we introduce IndiBias, a comprehensive benchmarking dataset designed specifically for evaluating social biases in the Indian context. We filter and translate the existing CrowS-Pairs dataset to create a benchmark dataset suited to the Indian context in Hindi language. Additionally, we leverage LLMs including ChatGPT and InstructGPT to augment our dataset with diverse societal biases and stereotypes prevalent in India. The included bias dimensions encompass gender, religion, caste, age, region, physical appearance, and occupation. We also build a resource to address intersectional biases along three intersectional dimensions. Our dataset contains 800 sentence pairs and 300 tuples for bias measurement across different demographics. The dataset is available in English and Hindi, providing a size comparable to existing benchmark datasets. Furthermore, using IndiBias we compare ten different language models on multiple bias measurement metrics. We observed that the language models exhibit more bias across a majority of the intersectional groups. All the scripts utilized and datasets created in this study are publicly available.</abstract>
       <url hash="e10877d8">2024.naacl-long.487</url>
@@ -6934,7 +6934,7 @@
     <meta>
       <booktitle>Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 2: Short Papers)</booktitle>
       <editor><first>Kevin</first><last>Duh</last></editor>
-      <editor><first>Helena</first><last>Gomez</last></editor>
+      <editor id="helena-gomez"><first>Helena</first><last>Gomez</last></editor>
       <editor><first>Steven</first><last>Bethard</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Mexico City, Mexico</address>
@@ -6990,7 +6990,7 @@
       <title>Advancing Regular Language Reasoning in Linear Recurrent Neural Networks</title>
       <author><first>Ting-Han</first><last>Fan</last></author>
       <author><first>Ta-Chung</first><last>Chi</last></author>
-      <author><first>Alexander</first><last>Rudnicky</last><affiliation>Carnegie Mellon University and Carnegie Mellon University</affiliation></author>
+      <author id="alexander-rudnicky"><first>Alexander</first><last>Rudnicky</last><affiliation>Carnegie Mellon University and Carnegie Mellon University</affiliation></author>
       <pages>45-53</pages>
       <abstract>In recent studies, linear recurrent neural networks (LRNNs) have achieved Transformer-level performance in natural language and long-range modeling, while offering rapid parallel training and constant inference cost. With the resurgence of interest in LRNNs, we study whether they can learn the hidden rules in training sequences, such as the grammatical structures of regular language. We theoretically analyze some existing LRNNs and discover their limitations in modeling regular language. Motivated by this analysis, we propose a new LRNN equipped with a block-diagonal and input-dependent transition matrix. Experiments suggest that the proposed model is the only LRNN capable of performing length extrapolation on regular language tasks such as Sum, Even Pair, and Modular Arithmetic. The code is released at <url>https://github.com/tinghanf/RegluarLRNN</url>.</abstract>
       <url hash="de9bcdc6">2024.naacl-short.4</url>
@@ -7085,7 +7085,7 @@
     <paper id="11">
       <title>Unified Examination of Entity Linking in Absence of Candidate Sets</title>
       <author><first>Nicolas</first><last>Ong</last></author>
-      <author><first>Hassan</first><last>Shavarani</last></author>
+      <author id="hassan-s-shavarani"><first>Hassan</first><last>Shavarani</last></author>
       <author><first>Anoop</first><last>Sarkar</last><affiliation>Simon Fraser University</affiliation></author>
       <pages>113-123</pages>
       <abstract>Despite remarkable strides made in the development of entity linking systems in recent years, a comprehensive comparative analysis of these systems using a unified framework is notably absent. This paper addresses this oversight by introducing a new black-box benchmark and conducting a comprehensive evaluation of all state-of-the-art entity linking methods. We use an ablation study to investigate the impact of candidate sets on the performance of entity linking. Our findings uncover exactly how much such entity linking systems depend on candidate sets, and how much this limits the general applicability of each system. We present an alternative approach to candidate sets, demonstrating that leveraging the entire in-domain candidate set can serve as a viable substitute for certain models. We show the trade-off between less restrictive candidate sets, increased inference time and memory footprint for some models.</abstract>
@@ -7109,7 +7109,7 @@
     <paper id="13">
       <title><fixed-case>SKICSE</fixed-case>: Sentence Knowable Information Prompted by <fixed-case>LLM</fixed-case>s Improves Contrastive Sentence Embeddings</title>
       <author><first>Fangwei</first><last>Ou</last></author>
-      <author><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
       <pages>141-146</pages>
       <abstract>Contrastive learning, which utilizes positive pairs and in-batch negatives to optimize the loss objective, has been proven to be an effective method for learning sentence embeddings. However, we argue that the previous methods of constructing positive pairs only through dropout perturbation or entailment relation are limited. Since there is more sentence knowable information (SKI) to be mined, such as sentence external knowledge, semantic analysis, and grammatical description. In this work, we first hand-craft a simple and effective prompt template that is able to obtain the knowable information of input sentences from LLMs (e.g., LLaMA). Then we combine the original sentence and its knowable information to form a positive pair for contrastive learning. We evaluate our method on standard semantic textual similarity (STS) tasks. Experimental results show that our unsupervised and supervised models using <tex-math>\text{BERT}_\text{base}</tex-math> achieve an average of 78.65% and 82.45% Spearman’s correlation respectively, a 2.40% and 0.88% improvement compared to SimCSE. Our model outperforms the previous state-of-the-art model PromptBERT in both unsupervised and supervised settings and specifically yields a new state-of-the-art performance in supervised setting.</abstract>
       <url hash="91a8a49f">2024.naacl-short.13</url>
@@ -7121,7 +7121,7 @@
       <title>A Multi-Aspect Framework for Counter Narrative Evaluation using Large Language Models</title>
       <author><first>Jaylen</first><last>Jones</last><affiliation>Ohio State University, Columbus</affiliation></author>
       <author><first>Lingbo</first><last>Mo</last></author>
-      <author><first>Eric</first><last>Fosler-Lussier</last><affiliation>Ohio State University</affiliation></author>
+      <author id="eric-fosler-lussier"><first>Eric</first><last>Fosler-Lussier</last><affiliation>Ohio State University</affiliation></author>
       <author><first>Huan</first><last>Sun</last><affiliation>The Ohio State University, Columbus</affiliation></author>
       <pages>147-168</pages>
       <abstract>Counter narratives - informed responses to hate speech contexts designed to refute hateful claims and de-escalate encounters - have emerged as an effective hate speech intervention strategy. While previous work has proposed automatic counter narrative generation methods to aid manual interventions, the evaluation of these approaches remains underdeveloped. Previous automatic metrics for counter narrative evaluation lack alignment with human judgment as they rely on superficial reference comparisons instead of incorporating key aspects of counter narrative quality as evaluation criteria. To address prior evaluation limitations, we propose a novel evaluation framework prompting LLMs to provide scores and feedback for generated counter narrative candidates using 5 defined aspects derived from guidelines from counter narrative specialized NGOs. We found that LLM evaluators achieve strong alignment to human-annotated scores and feedback and outperform alternative metrics, indicating their potential as multi-aspect, reference-free and interpretable evaluators for counter narrative evaluation.</abstract>
@@ -7183,7 +7183,7 @@
       <author><first>Xiyuan</first><last>Zou</last></author>
       <author><first>Yiran</first><last>Li</last></author>
       <author><first>Ian</first><last>Porada</last><affiliation>McGill University</affiliation></author>
-      <author><first>Jackie</first><last>Cheung</last><affiliation>McGill University, Mila Research Institute and Microsoft</affiliation></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie</first><last>Cheung</last><affiliation>McGill University, Mila Research Institute and Microsoft</affiliation></author>
       <pages>212-219</pages>
       <abstract>Current end-to-end coreference resolution models combine detection of singleton mentions and antecedent linking into a single step. In contrast, singleton detection was often treated as a separate step in the pre-neural era. In this work, we show that separately parameterizing these two sub-tasks also benefits end-to-end neural coreference systems. Specifically, we add a singleton detector to the coarse-to-fine (C2F) coreference model, and design an anaphoricity-aware span embedding and singleton detection loss. Our method significantly improves model performance on OntoNotes and four additional datasets.</abstract>
       <url hash="937ea21b">2024.naacl-short.19</url>
@@ -7238,7 +7238,7 @@
       <author><first>Guanhua</first><last>Zhang</last><affiliation>Max Planck Institute for Intelligent Systems, Max-Planck Institute</affiliation></author>
       <author><first>Wenqi</first><last>Fan</last><affiliation>Hong Kong Polytechnic University</affiliation></author>
       <author><first>Qing</first><last>Li</last><affiliation>The Hong Kong Polytechnic University, Hong Kong Polytechnic University</affiliation></author>
-      <author id="yang-zhang"><first>Yang</first><last>Zhang</last></author>
+      <author><first>Yang</first><last>Zhang</last></author>
       <author><first>Gaowen</first><last>Liu</last></author>
       <author><first>Sijia</first><last>Liu</last><affiliation>Michigan State University</affiliation></author>
       <author><first>Shiyu</first><last>Chang</last><affiliation>UC Santa Barbara</affiliation></author>
@@ -7275,7 +7275,7 @@
       <author><first>Sebastian</first><last>Gehrmann</last><affiliation>Bloomberg</affiliation></author>
       <author><first>Lining</first><last>Zhang</last></author>
       <author><first>Saad</first><last>Mahamood</last><affiliation>trivago N.V.</affiliation></author>
-      <author><first>Miruna</first><last>Clinciu</last></author>
+      <author id="miruna-clinciu"><first>Miruna</first><last>Clinciu</last></author>
       <author><first>Khyathi</first><last>Chandu</last></author>
       <author><first>Yufang</first><last>Hou</last><affiliation>Technische Universität Darmstadt and IBM Research Ireland</affiliation></author>
       <pages>272-281</pages>
@@ -7288,7 +7288,7 @@
     <paper id="26">
       <title>More room for language: Investigating the effect of retrieval on language models</title>
       <author><first>David</first><last>Samuel</last><affiliation>University of Oslo</affiliation></author>
-      <author><first>Lucas</first><last>Charpentier</last><affiliation>University of Oslo</affiliation></author>
+      <author id="lucas-georges-gabriel-charpentier"><first>Lucas</first><last>Charpentier</last><affiliation>University of Oslo</affiliation></author>
       <author><first>Sondre</first><last>Wold</last></author>
       <pages>282-305</pages>
       <abstract>Retrieval-augmented language models pose a promising alternative to standard language modeling. During pretraining, these models search in a corpus of documents for contextually relevant information that could aid the language modeling objective. We introduce an ‘ideal retrieval’ methodology to study these models in a fully controllable setting. We conduct an extensive evaluation to examine how retrieval augmentation affects the behavior of the underlying language model. Among other things, we observe that these models: (i) save substantially less world knowledge in their weights, (ii) are better at understanding local context and inter-word dependencies, but (iii) are worse at comprehending global context.</abstract>
@@ -7299,7 +7299,7 @@
     </paper>
     <paper id="27">
       <title>Discourse-Aware In-Context Learning for Temporal Expression Normalization</title>
-      <author><first>Akash</first><last>Gautam</last></author>
+      <author id="akash-kumar-gautam"><first>Akash</first><last>Gautam</last></author>
       <author><first>Lukas</first><last>Lange</last><affiliation>Robert Bosch GmbH, Bosch</affiliation></author>
       <author><first>Jannik</first><last>Strötgen</last><affiliation>Karlsruhe University of Applied Sciences</affiliation></author>
       <pages>306-315</pages>
@@ -7435,7 +7435,7 @@
     <paper id="37">
       <title>Zero-Shot vs. Translation-Based Cross-Lingual Transfer: The Case of Lexical Gaps</title>
       <author><first>Abteen</first><last>Ebrahimi</last><affiliation>University of Colorado, Boulder</affiliation></author>
-      <author><first>Katharina</first><last>von der Wense</last><affiliation>Johannes-Gutenberg Universität Mainz, University of Colorado, Boulder and New York University</affiliation></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>von der Wense</last><affiliation>Johannes-Gutenberg Universität Mainz, University of Colorado, Boulder and New York University</affiliation></author>
       <pages>443-458</pages>
       <abstract>Cross-lingual transfer can be achieved through two main approaches: zero-shot transfer or machine translation (MT). While the former has been the dominant approach, both have been shown to be competitive. In this work, we compare the current performance and long-term viability of these methods. We leverage lexical gaps to create a multilingual question answering dataset, which provides a difficult domain for evaluation. Both approaches struggle in this setting, though zero-shot transfer performs better, as current MT outputs are not specific enough for the task. Using oracle translation offers the best performance, showing that this approach can perform well long-term, however current MT quality is a bottleneck. We also conduct an exploratory study to see if humans produce translations sufficient for the task with only general instructions. We find this to be true for the majority of translators, but not all. This indicates that while translation has the potential to outperform zero-shot approaches, creating MT models that generate accurate task-specific translations may not be straightforward.</abstract>
       <url hash="889a85c8">2024.naacl-short.37</url>
@@ -7462,7 +7462,7 @@
       <author><first>Heike</first><last>Adel</last><affiliation>Hochschule der Medien (University of Applied Sciences)</affiliation></author>
       <author><first>Lukas</first><last>Lange</last><affiliation>Robert Bosch GmbH, Bosch</affiliation></author>
       <author><first>Jannik</first><last>Strötgen</last><affiliation>Karlsruhe University of Applied Sciences</affiliation></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <pages>469-480</pages>
       <abstract>Continual learning aims at incrementally acquiring new knowledge while not forgetting existing knowledge. To overcome catastrophic forgetting, methods are either rehearsal-based, i.e., store data examples from previous tasks for data replay, or isolate parameters dedicated to each task. However, rehearsal-based methods raise privacy and memory issues, and parameter-isolation continual learning does not consider interaction between tasks, thus hindering knowledge transfer. In this work, we propose MoCL, a rehearsal-free **Mo**dular and **C**ompositional Continual **L**earning framework which continually adds new modules to language models and composes them with existing modules. Experiments on various benchmarks show that MoCL outperforms state of the art and effectively facilitates knowledge transfer.</abstract>
       <url hash="434243ab">2024.naacl-short.39</url>
@@ -7487,7 +7487,7 @@
       <author><first>Xiaoyu</first><last>Liu</last><affiliation>University of Maryland, College Park</affiliation></author>
       <author><first>Huayang</first><last>Li</last></author>
       <author><first>Yoshinari</first><last>Fujinuma</last><affiliation>AWS AI Labs</affiliation></author>
-      <author><first>Maria</first><last>Nadejde</last><affiliation>Amazon</affiliation></author>
+      <author id="maria-nadejde"><first>Maria</first><last>Nadejde</last><affiliation>Amazon</affiliation></author>
       <author><first>Xing</first><last>Niu</last><affiliation>Amazon</affiliation></author>
       <author><first>Ron</first><last>Litman</last><affiliation>Amazon</affiliation></author>
       <author><first>Yair</first><last>Kittenplon</last><affiliation>Amazon</affiliation></author>
@@ -7556,8 +7556,8 @@
       <title>Do Multilingual Language Models Think Better in <fixed-case>E</fixed-case>nglish?</title>
       <author><first>Julen</first><last>Etxaniz</last></author>
       <author><first>Gorka</first><last>Azkune</last></author>
-      <author><first>Aitor</first><last>Soroa</last></author>
-      <author><first>Oier</first><last>Lopez de Lacalle</last></author>
+      <author id="aitor-soroa"><first>Aitor</first><last>Soroa</last></author>
+      <author id="oier-lopez-de-lacalle"><first>Oier</first><last>Lopez de Lacalle</last></author>
       <author><first>Mikel</first><last>Artetxe</last></author>
       <pages>550-564</pages>
       <abstract>Translate-test is a popular technique to improve the performance of multilingual language models. This approach works by translating the input into English using an external machine translation system before running inference. However, these improvements can be attributed to the use of a separate translation system, which is typically trained on large amounts of parallel data not seen by the language model. In this work, we introduce a new approach called self-translate that leverages the few-shot translation capabilities of multilingual language models. This allows us to analyze the effect of translation in isolation. Experiments over 5 tasks show that self-translate consistently outperforms direct inference, demonstrating that language models are unable to leverage their full multilingual potential when prompted in non-English languages. Our code is available at https://github.com/juletx/self-translate.</abstract>
@@ -7601,7 +7601,7 @@
     <paper id="49">
       <title>Self-Improving for Zero-Shot Named Entity Recognition with Large Language Models</title>
       <author><first>Tingyu</first><last>Xie</last><affiliation>Zhejiang University</affiliation></author>
-      <author id="qi-li"><first>Qi</first><last>Li</last></author>
+      <author><first>Qi</first><last>Li</last></author>
       <author><first>Yan</first><last>Zhang</last><affiliation>Tencent</affiliation></author>
       <author><first>Zuozhu</first><last>Liu</last><affiliation>Zhejiang University</affiliation></author>
       <author><first>Hongwei</first><last>Wang</last><affiliation>Zhejiang University</affiliation></author>
@@ -7618,7 +7618,7 @@
       <author><first>Ruirui</first><last>Chen</last><affiliation>Institute of High Performance Computing, Singapore, A*STAR</affiliation></author>
       <author><first>Ruochen</first><last>Zhao</last></author>
       <author><first>Wenhan</first><last>Xia</last></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>SalesForce.com and Nanyang Technological University</affiliation></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>SalesForce.com and Nanyang Technological University</affiliation></author>
       <pages>594-602</pages>
       <abstract>To mitigate forgetting, existing lifelong event detection methods typically maintain a memory module and replay the stored memory data during the learning of a new task. However, the simple combination of memory data and new-task samples can still result in substantial forgetting of previously acquired knowledge, which may occur due to the potential overlap between the feature distribution of new data and the previously learned embedding space. Moreover, the model suffers from overfitting on the few memory samples rather than effectively remembering learned patterns. To address the challenges of forgetting and overfitting, we propose a novel method based on embedding space separation and compaction. Our method alleviates forgetting of previously learned tasks by forcing the feature distribution of new data away from the previous embedding space. It also mitigates overfitting by a memory calibration mechanism that encourages memory data to be close to its prototype to enhance intra-class compactness. In addition, the learnable parameters of the new task are initialized by drawing upon acquired knowledge from the previously learned task to facilitate forward knowledge transfer. With extensive experiments, we demonstrate that our method can significantly outperform previous state-of-the-art approaches.</abstract>
       <url hash="719cd392">2024.naacl-short.50</url>
@@ -7666,7 +7666,7 @@
       <title>Efficient Information Extraction in Few-Shot Relation Classification through Contrastive Representation Learning</title>
       <author><first>Philipp</first><last>Borchert</last><affiliation>IÉSEG School of Management and KU Leuven</affiliation></author>
       <author><first>Jochen</first><last>De Weerdt</last><affiliation>KU Leuven</affiliation></author>
-      <author><first>Marie-Francine</first><last>Moens</last><affiliation>KU Leuven, KU Leuven</affiliation></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last><affiliation>KU Leuven, KU Leuven</affiliation></author>
       <pages>638-646</pages>
       <abstract>Differentiating relationships between entity pairs with limited labeled instances poses a significant challenge in few-shot relation classification. Representations of textual data extract rich information spanning the domain, entities, and relations. In this paper, we introduce a novel approach to enhance information extraction combining multiple sentence representations and contrastive learning. While representations in relation classification are commonly extracted using entity marker tokens, we argue that substantial information within the internal model representations remains untapped. To address this, we propose aligning multiple sentence representations, such as the CLS] token, the [MASK] token used in prompting, and entity marker tokens. Our method employs contrastive learning to extract complementary discriminative information from these individual representations. This is particularly relevant in low-resource settings where information is scarce. Leveraging multiple sentence representations is especially effective in distilling discriminative information for relation classification when additional information, like relation descriptions, are not available. We validate the adaptability of our approach, maintaining robust performance in scenarios that include relation descriptions, and showcasing its flexibility to adapt to different resource constraints.</abstract>
       <url hash="59591ae3">2024.naacl-short.54</url>
@@ -7728,7 +7728,7 @@
       <author><first>Richard</first><last>Fang</last></author>
       <author><first>Rohan</first><last>Bindu</last></author>
       <author><first>Akul</first><last>Gupta</last></author>
-      <author><first>Tatsunori</first><last>Hashimoto</last><affiliation>Stanford University</affiliation></author>
+      <author id="tatsunori-b-hashimoto"><first>Tatsunori</first><last>Hashimoto</last><affiliation>Stanford University</affiliation></author>
       <author><first>Daniel</first><last>Kang</last><affiliation>Department of Computer Science</affiliation></author>
       <pages>681-687</pages>
       <abstract>As large language models (LLMs) have increased in their capabilities, so doestheir potential for dual use. To reduce harmful outputs, produces and vendors ofLLMs have used reinforcement learning with human feedback (RLHF). In tandem,LLM vendors have been increasingly enabling fine-tuning of their most powerfulmodels. However, concurrent work has shown that fine-tuning can remove RLHFprotections. We may expect that the most powerful models currently available(GPT-4) are less susceptible to fine-tuning attacks. In this work, we show the contrary: fine-tuning allows attackers to remove RLHFprotections with as few as 340 examples and a 95% success rate. These trainingexamples can be automatically generated with weaker models. We further show thatremoving RLHF protections does not decrease usefulness on non-censored outputs,providing evidence that our fine-tuning strategy does not decrease usefulnessdespite using weaker models to generate training data. Our results show the needfor further research on protections on LLMs.</abstract>
@@ -7797,7 +7797,7 @@
       <author><first>Michiel</first><last>de Jong</last><affiliation>Augment Computing</affiliation></author>
       <author><first>Luke</first><last>Vilnis</last><affiliation>Google</affiliation></author>
       <author><first>Santiago</first><last>Ontanon</last><affiliation>Google and Drexel University</affiliation></author>
-      <author><first>William</first><last>Cohen</last><affiliation>Google DeepMind</affiliation></author>
+      <author id="william-cohen"><first>William</first><last>Cohen</last><affiliation>Google DeepMind</affiliation></author>
       <author><first>Sumit</first><last>Sanghai</last><affiliation>Research, Google</affiliation></author>
       <author><first>Joshua</first><last>Ainslie</last><affiliation>Google</affiliation></author>
       <pages>737-744</pages>
@@ -7823,7 +7823,7 @@
       <title>Improving Factuality in Clinical Abstractive Multi-Document Summarization by Guided Continued Pre-training</title>
       <author><first>Ahmed</first><last>Elhady</last></author>
       <author><first>Khaled</first><last>Elsayed</last><affiliation>Cairo University</affiliation></author>
-      <author><first>Eneko</first><last>Agirre</last><affiliation>University of the Basque Country (UPV/EHU)</affiliation></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last><affiliation>University of the Basque Country (UPV/EHU)</affiliation></author>
       <author><first>Mikel</first><last>Artetxe</last><affiliation>Reka AI</affiliation></author>
       <pages>755-761</pages>
       <abstract>Factual accuracy is an important property of neural abstractive summarization models, especially in fact-critical domains such as the clinical literature. In this work, we introduce a guided continued pre-training stage for encoder-decoder models that improves their understanding of the factual attributes of documents, which is followed by supervised fine-tuning on summarization. Our approach extends the pre-training recipe of BART to incorporate 3 additional objectives based on PICO spans, which capture the population, intervention, comparison, and outcomes related to a clinical study. Experiments on multi-document summarization in the clinical domain demonstrate that our approach is competitive with prior work, improving the quality and factuality of the summaries and achieving the best-published results in factual accuracy on the MSLR task.</abstract>
@@ -7838,7 +7838,7 @@
       <author><first>Nicolas</first><last>Garneau</last></author>
       <author><first>Emanuele</first><last>Bugliarello</last><affiliation>Google</affiliation></author>
       <author><first>Yova</first><last>Kementchedjhieva</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
-      <author><first>Anders</first><last>Søgaard</last><affiliation>Copenhagen University</affiliation></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last><affiliation>Copenhagen University</affiliation></author>
       <pages>762-771</pages>
       <abstract>Facts are subject to contingencies and can be true or false in different circumstances. One such contingency is time, wherein some facts mutate over a given period, e.g., the president of a country or the winner of a championship. Trustworthy language models ideally identify mutable facts as such and process them accordingly. We create MuLan, a benchmark for evaluating the ability of English language models to anticipate time-contingency, covering both 1:1 and 1:N relations. We hypothesize that mutable facts are encoded differently than immutable ones, hence being easier to update. In a detailed evaluation of six popular large language models, we consistently find differences in the LLMs’ confidence, representations, and update behavior, depending on the mutability of a fact. Our findings should inform future work on the injection of and induction of time-contingent knowledge to/from LLMs.</abstract>
       <url hash="7c6b6d41">2024.naacl-short.67</url>
@@ -7864,8 +7864,8 @@
       <author><first>Xiaochuang</first><last>Han</last><affiliation>Department of Computer Science, University of Washington</affiliation></author>
       <author><first>Mike</first><last>Lewis</last><affiliation>Facebook AI Research</affiliation></author>
       <author><first>Yulia</first><last>Tsvetkov</last><affiliation>Department of Computer Science, University of Washington</affiliation></author>
-      <author><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington, Facebook and Meta</affiliation></author>
-      <author><first>Wen-tau</first><last>Yih</last><affiliation>Meta Platforms, Inc.</affiliation></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington, Facebook and Meta</affiliation></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last><affiliation>Meta Platforms, Inc.</affiliation></author>
       <pages>783-791</pages>
       <abstract>Language models (LMs) often struggle to pay enough attention to the input context, and generate texts that are unfaithful or contain hallucinations. To mitigate this issue, we present context-aware decoding (CAD), which follows a contrastive output distribution that amplifies the difference between the output probabilities when a model is used with and without context. Our experiments show that CAD, without additional training, significantly improves the faithfulness of different LM families, including OPT, GPT, LLaMA, and FLAN-T5 for summarization tasks (e.g., 14.3% gain for LLaMA in factuality metrics). Furthermore, CAD is particularly effective in overriding a model’s prior knowledge when it contradicts the provided context, leading to substantial improvements in tasks where resolving the knowledge conflict is essential. Our code is publicly released at https://github.com/xhan77/context-aware-decoding.</abstract>
       <url hash="9f7c64c6">2024.naacl-short.69</url>
@@ -7912,9 +7912,9 @@
       <author><first>Gunu</first><last>Jho</last></author>
       <author><first>Inchul</first><last>Hwang</last></author>
       <author><first>Georgios</first><last>Vardaxoglou</last></author>
-      <author><first>Aimilios</first><last>Chalamandaris</last></author>
-      <author><first>Pirros</first><last>Tsiakoulis</last></author>
-      <author><first>Spyros</first><last>Raptis</last></author>
+      <author id="aimilios-chalamandaris"><first>Aimilios</first><last>Chalamandaris</last></author>
+      <author id="pirros-tsiakoulis"><first>Pirros</first><last>Tsiakoulis</last></author>
+      <author id="spyros-raptis"><first>Spyros</first><last>Raptis</last></author>
       <pages>808-813</pages>
       <abstract>Emotion detection in textual data has received growing interest in recent years, as it is pivotal for developing empathetic human-computer interaction systems.This paper introduces a method for categorizing emotions from text, which acknowledges and differentiates between the diversified similarities and distinctions of various emotions.Initially, we establish a baseline by training a transformer-based model for standard emotion classification, achieving state-of-the-art performance. We argue that not all misclassifications are of the same importance, as there are perceptual similarities among emotional classes.We thus redefine the emotion labeling problem by shifting it from a traditional classification model to an ordinal classification one, where discrete emotions are arranged in a sequential order according to their valence levels.Finally, we propose a method that performs ordinal classification in the two-dimensional emotion space, considering both valence and arousal scales.The results show that our approach not only preserves high accuracy in emotion prediction but also significantly reduces the magnitude of errors in cases of misclassification.</abstract>
       <url hash="d611f048">2024.naacl-short.72</url>
@@ -7987,7 +7987,7 @@
       <author><first>Amanpreet</first><last>Singh</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Doug</first><last>Downey</last><affiliation>Allen Institute for Artificial Intelligence and Northwestern University</affiliation></author>
       <author><first>Sergey</first><last>Feldman</last><affiliation>Allen Institute for Artificial Intelligence and Data Cowboys</affiliation></author>
-      <author><first>Lucy</first><last>Wang</last><affiliation>University of Washington and Allen Institute for Artificial Intelligence</affiliation></author>
+      <author id="lucy-lu-wang"><first>Lucy</first><last>Wang</last><affiliation>University of Washington and Allen Institute for Artificial Intelligence</affiliation></author>
       <pages>1-11</pages>
       <abstract>Topic pages aggregate useful information about an entity or concept into a single succinct and accessible article. Automated creation of topic pages would enable their rapid curation as information resources, providing an alternative to traditional web search. While most prior work has focused on generating topic pages about biographical entities, in this work, we develop a completely automated process to generate high-quality topic pages for scientific entities, with a focus on biomedical concepts. We release TOPICAL, a web app and associated open-source code, comprising a model pipeline combining retrieval, clustering, and prompting, that makes it easy for anyone to generate topic pages for a wide variety of biomedical entities on demand. In a human evaluation of 150 diverse topic pages generated using TOPICAL, we find that the vast majority were considered relevant, accurate, and coherent, with correct supporting citations. We make all code publicly available and host a free-to-use web app at: https://s2-topical.apps.allenai.org.</abstract>
       <url hash="8994fa36">2024.naacl-demo.1</url>
@@ -8082,7 +8082,7 @@
     </paper>
     <paper id="8">
       <title><fixed-case>O</fixed-case>pinion<fixed-case>GPT</fixed-case>: Modelling Explicit Biases in Instruction-Tuned <fixed-case>LLM</fixed-case>s</title>
-      <author><first>Patrick</first><last>Haller</last><affiliation>Humboldt Universität Berlin</affiliation></author>
+      <author id="patrick-haller-zurich"><first>Patrick</first><last>Haller</last><affiliation>Humboldt Universität Berlin</affiliation></author>
       <author><first>Ansar</first><last>Aynetdinov</last><affiliation>Department of Computer Science, Humboldt University Berlin, Humboldt Universität Berlin</affiliation></author>
       <author><first>Alan</first><last>Akbik</last><affiliation>Humboldt Universität Berlin</affiliation></author>
       <pages>78-86</pages>
@@ -8110,7 +8110,7 @@
     <paper id="10">
       <title><fixed-case>B</fixed-case>e<fixed-case>L</fixed-case>eaf: Belief Prediction as Tree Generation</title>
       <author><first>John</first><last>Murzaku</last><affiliation>, State University of New York at Stony Brook</affiliation></author>
-      <author><first>Owen</first><last>Rambow</last><affiliation>Stony Brook University</affiliation></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last><affiliation>Stony Brook University</affiliation></author>
       <pages>97-106</pages>
       <abstract>We present a novel approach to predicting source-and-target factuality by transforming it into a linearized tree generation task. Unlike previous work, our model and representation format fully account for the factuality tree structure, generating the full chain of nested sources instead of the last source only. Furthermore, our linearized tree representation significantly compresses the amount of tokens needed compared to other representations, allowing for fully end-to-end systems. We achieve state-of-the-art results on FactBank and the Modal Dependency Corpus, which are both corpora annotating source-and-target event factuality. Our results on fine-tuning validate the strong generality of the proposed linearized tree generation task, which can be easily adapted to other corpora with a similar structure. We then present BeLeaf, a system which directly leverages the linearized tree representation to create both sentence level and document level visualizations. Our system adds several missing pieces to the source-and-target factuality task such as coreference resolution and event head word to syntactic span conversion. Our demo code is available on https://github.com/yurpl/beleaf and our video is available on https://youtu.be/SpbMNnin-Po.</abstract>
       <url hash="792a9327">2024.naacl-demo.10</url>
@@ -8168,7 +8168,7 @@
       <author><first>Hongyi</first><last>Wang</last><affiliation>CMU, Carnegie Mellon University</affiliation></author>
       <author><first>Yonghao</first><last>Zhuang</last><affiliation>CMU, Carnegie Mellon University</affiliation></author>
       <author><first>Jindong</first><last>Chen</last><affiliation>Google</affiliation></author>
-      <author><first>Eric</first><last>Xing</last><affiliation>Mohamed bin Zayed Univeristy of AI and School of Computer Science, Carnegie Mellon University</affiliation></author>
+      <author id="eric-xing"><first>Eric</first><last>Xing</last><affiliation>Mohamed bin Zayed Univeristy of AI and School of Computer Science, Carnegie Mellon University</affiliation></author>
       <author><first>Zhiting</first><last>Hu</last><affiliation>University of California, San Diego and Amazon</affiliation></author>
       <pages>137-147</pages>
       <abstract>The recent progress of AI can be largely attributed to large language models (LLMs). However, their escalating memory requirements introduce challenges for machine learning (ML) researchers and engineers. Addressing this requires developers to partition a large model to distribute it across multiple GPUs or TPUs. This necessitates considerable coding and intricate configuration efforts with existing model parallel tools, such as Megatron-LM, DeepSpeed, and Alpa. These tools require users’ expertise in machine learning systems (MLSys), creating a bottleneck in LLM development, particularly for developers without MLSys background. In this work, we present RedCoast (Redco), a lightweight and user-friendly tool crafted to automate distributed training and inference for LLMs, as well as to simplify ML pipeline development. The design of Redco emphasizes two key aspects. Firstly, to automate model parallelism, our study identifies two straightforward rules to generate tensor parallel strategies for any given LLM. Integrating these rules into Redco facilitates effortless distributed LLM training and inference, eliminating the need of additional coding or complex configurations. We demonstrate the effectiveness by applying Redco on a set of LLM architectures, such as GPT-J, LLaMA, T5, and OPT, up to the size of 66B. Secondly, we propose a mechanism that allows for the customization of diverse ML pipelines through the definition of merely three functions, avoiding redundant and formulaic code like multi-host related processing. This mechanism proves adaptable across a spectrum of ML algorithms, from foundational language modeling to complex algorithms like meta-learning and reinforcement learning. As a result, Redco implementations exhibit significantly fewer lines of code compared to their official counterparts. RedCoast (Redco) has been released under Apache 2.0 license at https://github.com/tanyuqian/redco.</abstract>
@@ -8184,7 +8184,7 @@
       <author><first>Robert</first><last>Geislinger</last></author>
       <author><first>Florian</first><last>Helfer</last></author>
       <author><first>Gertraud</first><last>Koch</last></author>
-      <author><first>Chris</first><last>Biemann</last><affiliation>U Hamburg</affiliation></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last><affiliation>U Hamburg</affiliation></author>
       <pages>148-157</pages>
       <abstract>In this system demonstration paper, we present the Concept Over Time Analysis extension for the Discourse Analysis Tool Suite.The proposed tool empowers users to define, refine, and visualize their concepts of interest within an interactive interface. Adhering to the Human-in-the-loop paradigm, users can give feedback through sentence annotations. Utilizing few-shot sentence classification, the system employs Sentence Transformers to compute representations of sentences and concepts. Through an iterative process involving semantic similarity searches, sentence annotation, and fine-tuning with contrastive data, the model continuously refines, providing users with enhanced analysis outcomes. The final output is a timeline visualization of sentences classified to concepts. Especially suited for the Digital Humanities, Concept Over Time Analysis serves as a valuable tool for qualitative data analysis within extensive datasets. The chronological overview of concepts enables researchers to uncover patterns, trends, and shifts in discourse over time.</abstract>
       <url hash="3dc6a167">2024.naacl-demo.15</url>
@@ -8197,10 +8197,10 @@
       <author><first>Zhengxuan</first><last>Wu</last><affiliation>Stanford University</affiliation></author>
       <author><first>Atticus</first><last>Geiger</last><affiliation>Pr(Ai)²R Group</affiliation></author>
       <author><first>Aryaman</first><last>Arora</last></author>
-      <author id="jing-huang-stanford"><first>Jing</first><last>Huang</last><affiliation>Stanford University</affiliation></author>
+      <author><first>Jing</first><last>Huang</last><affiliation>Stanford University</affiliation></author>
       <author><first>Zheng</first><last>Wang</last><affiliation>Stanford University</affiliation></author>
-      <author><first>Noah</first><last>Goodman</last><affiliation>Stanford University</affiliation></author>
-      <author><first>Christopher</first><last>Manning</last><affiliation>Computer Science Department, Stanford University</affiliation></author>
+      <author id="noah-goodman"><first>Noah</first><last>Goodman</last><affiliation>Stanford University</affiliation></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last><affiliation>Computer Science Department, Stanford University</affiliation></author>
       <author><first>Christopher</first><last>Potts</last><affiliation>Stanford University</affiliation></author>
       <pages>158-165</pages>
       <abstract>Interventions on model-internal states are fundamental operations in many areas of AI, including model editing, steering, robustness, and interpretability. To facilitate such research, we introduce <tex-math>pyvene</tex-math>, an open-source Python library that supports customizable interventions on a range of different PyTorch modules. <tex-math>pyvene</tex-math> supports complex intervention schemes with an intuitive configuration format, and its interventions can be static or include trainable parameters. We show how <tex-math>pyvene</tex-math> provides a unified and extensible framework for performing interventions on neural models and sharing the intervened upon models with others. We illustrate the power of the library via interpretability analyses using causal abstraction and knowledge localization. We publish our library through Python Package Index (PyPI) and provide code, documentation, and tutorials at ‘https://github.com/stanfordnlp/pyvene‘.</abstract>
@@ -8347,7 +8347,7 @@
       <author><first>Dahyun</first><last>Jung</last><affiliation>Korea University</affiliation></author>
       <author><first>Sugyeong</first><last>Eo</last><affiliation>Korea University</affiliation></author>
       <author><first>Chanjun</first><last>Park</last><affiliation>Upstage</affiliation></author>
-      <author><first>Heuiseok</first><last>Lim</last><affiliation>Korea University</affiliation></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last><affiliation>Korea University</affiliation></author>
       <pages>25-35</pages>
       <abstract>Critical error detection (CED) in machine translation is a task that aims to detect errors that significantly distort the intended meaning. However, the existing study of CED lacks explainability due to the absence of content addressing the reasons for catastrophic errors. To address this limitation, we propose Explainable CED, a dataset that introduces the attributes of error explanation and correction regarding critical errors. Considering the advantage of reducing time costs and mitigating human annotation bias, we leverage a large language model in the data construction process. To improve the quality of the dataset and mitigate hallucination, we compare responses from the model and introduce an additional data filtering method through feedback scoring. The experiment demonstrates that the dataset appropriately reflects a consistent explanation and revision for errors, validating the reliability of the dataset.</abstract>
       <url hash="17db48e1">2024.naacl-srw.4</url>
@@ -8396,7 +8396,7 @@
       <author><first>Joris</first><last>Driesen</last><affiliation>Apple</affiliation></author>
       <author><first>Alexandru</first><last>Coca</last></author>
       <author><first>Mark</first><last>Gaynor</last><affiliation>Apple</affiliation></author>
-      <author><first>Anders</first><last>Johannsen</last></author>
+      <author id="anders-johannsen"><first>Anders</first><last>Johannsen</last></author>
       <pages>56-74</pages>
       <abstract>Spurred by recent advances in Large Language Models (LLMs), virtual assistants are poised to take a leap forward in terms of their dialogue capabilities. Yet a major bottleneck to achieving genuinely transformative task-oriented dialogue capabilities remains the scarcity of high quality data. Existing datasets, while impressive in scale, have limited domain coverage and contain few genuinely challenging conversational phenomena; those which are present are typically unlabelled, making it difficult to assess the strengths and weaknesses of models without time-consuming and costly human evaluation. Moreover, creating high quality dialogue data has until now required considerable human input, limiting both the scale of these datasets and the ability to rapidly bootstrap data for a new target domain. We aim to overcome these issues with LUCID, a modularised and highly automated LLM-driven data generation system that produces realistic, diverse and challenging dialogues. We use LUCID to generate a seed dataset of 4,277 conversations across 100 intents to demonstrate its capabilities, with a human review finding consistently high quality labels in the generated data.</abstract>
       <url hash="335d0894">2024.naacl-srw.8</url>
@@ -8409,7 +8409,7 @@
       <author><first>Sankalp</first><last>Bahad</last></author>
       <author><first>Pruthwik</first><last>Mishra</last><affiliation>IIIT-Hyderabad</affiliation></author>
       <author><first>Parameswari</first><last>Krishnamurthy</last></author>
-      <author><first>Dipti</first><last>Sharma</last><affiliation>IIIT Hyderabad</affiliation></author>
+      <author id="dipti-misra-sharma"><first>Dipti</first><last>Sharma</last><affiliation>IIIT Hyderabad</affiliation></author>
       <pages>75-82</pages>
       <abstract>Named Entity Recognition (NER) is a use-ful component in Natural Language Process-ing (NLP) applications. It is used in varioustasks such as Machine Translation, Summa-rization, Information Retrieval, and Question-Answering systems. The research on NER iscentered around English and some other ma-jor languages, whereas limited attention hasbeen given to Indian languages. We analyze thechallenges and propose techniques that can betailored for Multilingual Named Entity Recog-nition for Indian Languages. We present a hu-man annotated named entity corpora of ∼40Ksentences for 4 Indian languages from two ofthe major Indian language families. Addition-ally, we show the transfer learning capabilitiesof pre-trained transformer models from a highresource language to multiple low resource lan-guages through a series of experiments. Wealso present a multilingual model fine-tunedon our dataset, which achieves an F1 score of∼0.80 on our dataset on average. We achievecomparable performance on completely unseenbenchmark datasets for Indian languages whichaffirms the usability of our model.</abstract>
       <url hash="0ef2b73e">2024.naacl-srw.9</url>
@@ -8432,7 +8432,7 @@
       <author><first>Dong</first><last>Kim</last><affiliation>Korea University</affiliation></author>
       <author><first>Dahyun</first><last>Jung</last><affiliation>Korea University</affiliation></author>
       <author><first>Chanjun</first><last>Park</last><affiliation>Upstage</affiliation></author>
-      <author><first>Heuiseok</first><last>Lim</last><affiliation>Korea University</affiliation></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last><affiliation>Korea University</affiliation></author>
       <pages>93-104</pages>
       <abstract>Large Language Models (LLMs) have significantly impacted various fields requiring advanced linguistic understanding, yet concerns regarding their inherent biases and ethical considerations have also increased. Notably, LLMs have been critiqued for perpetuating stereotypes against diverse groups based on race, sexual orientation, and other attributes. However, most research analyzing these biases has predominantly focused on communities where English is the primary language, neglecting to consider the cultural and linguistic nuances of other societies. In this paper, we aim to explore the inherent biases and toxicity of LLMs, specifically within the social context of Korea. We devise a set of prompts that reflect major societal issues in Korea and assign varied personas to both ChatGPT and GPT-4 to assess the toxicity of the generated sentences. Our findings indicate that certain personas or prompt combinations consistently yield harmful content, highlighting the potential risks associated with specific persona-issue alignments within the Korean cultural framework. Furthermore, we discover that GPT-4 can produce more than twice the level of toxic content than ChatGPT under certain conditions.</abstract>
       <url hash="e1e6ec1b">2024.naacl-srw.11</url>
@@ -8443,7 +8443,7 @@
       <title>To Clarify or not to Clarify: A Comparative Analysis of Clarification Classification with Fine-Tuning, Prompt Tuning, and Prompt Engineering</title>
       <author><first>Alina</first><last>Leippert</last><affiliation>German Research Center for AI</affiliation></author>
       <author><first>Tatiana</first><last>Anikina</last><affiliation>German Research Center for AI</affiliation></author>
-      <author><first>Bernd</first><last>Kiefer</last><affiliation>German Research Center for AI</affiliation></author>
+      <author id="bernd-kiefer"><first>Bernd</first><last>Kiefer</last><affiliation>German Research Center for AI</affiliation></author>
       <author><first>Josef</first><last>Genabith</last><affiliation>German Research Center for AI and Universität des Saarlandes</affiliation></author>
       <pages>105-115</pages>
       <abstract>Misunderstandings occur all the time in human conversation but deciding on when to ask for clarification is a challenging task for conversational systems that requires a balance between asking too many unnecessary questions and running the risk of providing incorrect information. This work investigates clarification identification based on the task and data from (Xu et al., 2019), reproducing their Transformer baseline and extending it by comparing pre-trained language model fine-tuning, prompt tuning and manual prompt engineering on the task of clarification identification. Our experiments show strong performance with LM and a prompt tuning approach with BERT and RoBERTa, outperforming standard LM fine-tuning, while manual prompt engineering with GPT-3.5 proved to be less effective, although informative prompt instructions have the potential of steering the model towards generating more accurate explanations for why clarification is needed.</abstract>
@@ -8701,8 +8701,8 @@
       <author><first>Chaowei</first><last>Xiao</last><affiliation>UW-Madison</affiliation></author>
       <author><first>Huan</first><last>Sun</last><affiliation>OSU</affiliation></author>
       <author><first>Lei</first><last>Li</last><affiliation>CMU</affiliation></author>
-      <author><first>Leon</first><last>Derczynski</last><affiliation>UW Seattle</affiliation></author>
-      <author><first>Anima</first><last>Anandkumar</last><affiliation>Caltech, NVIDIA</affiliation></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last><affiliation>UW Seattle</affiliation></author>
+      <author id="animashree-anandkumar"><first>Anima</first><last>Anandkumar</last><affiliation>Caltech, NVIDIA</affiliation></author>
       <author><first>Fei</first><last>Wang</last><affiliation>USC</affiliation></author>
       <pages>8-18</pages>
       <abstract>This tutorial seeks to provide a systematic summary of risks and vulnerabilities in security, privacy and copyright aspects of large language models (LLMs), and most recent solutions to address those issues. We will discuss a broad thread of studies that try to answer the following questions: (i) How do we unravel the adversarial threats that attackers may leverage in the training time of LLMs, especially those that may exist in recent paradigms of instruction tuning and RLHF processes? (ii) How do we guard the LLMs against malicious attacks in inference time, such as attacks based on backdoors and jailbreaking? (iii) How do we ensure privacy protection of user information and LLM decisions for Language Model as-a-Service (LMaaS)? (iv) How do we protect the copyright of an LLM? (v) How do we detect and prevent cases where personal or confidential information is leaked during LLM training? (vi) How should we make policies to control against improper usage of LLM-generated content? In addition, will conclude the discussions by outlining emergent challenges in security, privacy and reliability of LLMs that deserve timely investigation by the community</abstract>
@@ -8733,7 +8733,7 @@
       <author><first>Nikita</first><last>Soni</last><affiliation>Stony Brook University</affiliation></author>
       <author><first>Swanie</first><last>Juhng</last><affiliation>Stony Brook University</affiliation></author>
       <author><first>João</first><last>Sedoc</last><affiliation>New York University</affiliation></author>
-      <author><first>H. Andrew</first><last>Schwartz</last><affiliation>Stony Brook University</affiliation></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last><affiliation>Stony Brook University</affiliation></author>
       <author><first>Salvatore</first><last>Giorgi</last><affiliation>University of Pennsylvania, National Institute on Drug Abuse, Intramural Research Program</affiliation></author>
       <author><first>Ryan L</first><last>Boyd</last><affiliation>Stony Brook University</affiliation></author>
       <pages>26-33</pages>
@@ -8746,7 +8746,7 @@
       <title>Human-<fixed-case>AI</fixed-case> Interaction in the Age of <fixed-case>LLM</fixed-case>s</title>
       <author><first>Diyi</first><last>Yang</last><affiliation>Stanford University</affiliation></author>
       <author><first>Sherry Tongshuang</first><last>Wu</last><affiliation>Carnegie Mellon University</affiliation></author>
-      <author><first>Marti A.</first><last>Hearst</last><affiliation>University of California, Berkeley</affiliation></author>
+      <author id="marti-a-hearst"><first>Marti A.</first><last>Hearst</last><affiliation>University of California, Berkeley</affiliation></author>
       <pages>34-38</pages>
       <abstract>Recently, the development of Large Language Models (LLMs) has revolutionized the capabilities of AI systems. These models possess the ability to comprehend and generate human-like text, enabling them to engage in sophisticated conversations, generate content, and even perform tasks that once seemed beyond the reach of machines. As a result, the way we interact with technology and each other — an established field called “Human-AI Interaction” and have been studied for over a decade — is undergoing a profound transformation. This tutorial will provide an overview of the interaction between humans and LLMs, exploring the challenges, opportunities, and ethical considerations that arise in this dynamic landscape. It will start with a review of the types of AI models we interact with, and a walkthrough of the core concepts in Human-AI Interaction. We will then emphasize the emerging topics shared between HCI and NLP communities in light of LLMs.</abstract>
       <url hash="c81263a4">2024.naacl-tutorials.5</url>
@@ -8757,8 +8757,8 @@
       <title>Spatial and Temporal Language Understanding: Representation, Reasoning, and Grounding</title>
       <author><first>Parisa</first><last>Kordjamshidi</last><affiliation>Michigan State University</affiliation></author>
       <author><first>Qiang</first><last>Ning</last><affiliation>AWS</affiliation></author>
-      <author><first>James</first><last>Pustejovsky</last><affiliation>Brandeis University</affiliation></author>
-      <author><first>Marie-Francine</first><last>Moens</last><affiliation>KU Leuven</affiliation></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last><affiliation>Brandeis University</affiliation></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last><affiliation>KU Leuven</affiliation></author>
       <pages>39-46</pages>
       <abstract>This tutorial provides an overview of the cutting edge research on spatial and temporal language understanding. We also cover some essential background material from various subdisciplines to this topic, which we believe will enrich the CL community’s appreciation of the complexity of spatiotemporal reasoning.</abstract>
       <url hash="4570d34d">2024.naacl-tutorials.6</url>
@@ -8771,7 +8771,7 @@
       <booktitle>Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 6: Industry Track)</booktitle>
       <editor><first>Yi</first><last>Yang</last></editor>
       <editor><first>Aida</first><last>Davani</last></editor>
-      <editor><first>Avi</first><last>Sil</last></editor>
+      <editor id="avirup-sil"><first>Avi</first><last>Sil</last></editor>
       <editor><first>Anoop</first><last>Kumar</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Mexico City, Mexico</address>
@@ -8874,7 +8874,7 @@
       <author><first>Jiaxin</first><last>Pei</last><affiliation>University of Michigan</affiliation></author>
       <author><first>Soumya</first><last>Vadlamannati</last><affiliation>Bloomberg</affiliation></author>
       <author><first>Liang-Kang</first><last>Huang</last></author>
-      <author><first>Daniel</first><last>Preotiuc-Pietro</last><affiliation>Bloomberg</affiliation></author>
+      <author id="daniel-preotiuc-pietro"><first>Daniel</first><last>Preotiuc-Pietro</last><affiliation>Bloomberg</affiliation></author>
       <author><first>Xinyu</first><last>Hua</last><affiliation>Bloomberg</affiliation></author>
       <pages>63-72</pages>
       <abstract>Identifying risks associated with a company is important to investors and the wellbeing of the overall financial markets. In this study, we build a computational framework to automatically extract company risk factors from news articles. Our newly proposed schema comprises seven distinct aspects, such as supply chain, regulations, and competition. We annotate 666 news articles and benchmark various machine learning models. While large language mod- els have achieved remarkable progress in various types of NLP tasks, our experiment shows that zero-shot and few-shot prompting state-of- the-art LLMs (e.g., Llama-2) can only achieve moderate to low performances in identifying risk factors. In contrast, fine-tuning pre-trained language models yields better results on most risk factors. Using this model, we analyze over 277K Bloomberg News articles and demonstrate that identifying risk factors from news could provide extensive insights into the operations of companies and industries.</abstract>
@@ -8900,7 +8900,7 @@
       <title>An <fixed-case>NLP</fixed-case>-Focused Pilot Training Agent for Safe and Efficient Aviation Communication</title>
       <author><first>Xiaochen</first><last>Liu</last></author>
       <author><first>Bowei</first><last>Zou</last><affiliation>A*STAR</affiliation></author>
-      <author><first>AiTi</first><last>Aw</last><affiliation>I2R</affiliation></author>
+      <author id="aiti-aw"><first>AiTi</first><last>Aw</last><affiliation>I2R</affiliation></author>
       <pages>89-96</pages>
       <abstract>Aviation communication significantly influences the success of flight operations, ensuring safety of lives and efficient air transportation. In day-to-day flight operations, air traffic controllers (ATCos) would timely communicate instructions to pilots using specific phraseology for aircraft manipulation . However, pilots, originating from diverse backgrounds and understanding of English language, have struggled with conforming to strict phraseology for readback and communication in the live operation, this problem had not been effectively addressed over the past decades. Traditionally, aviation communication training involved expensive setups and resources, often relying on human-in-the-loop (HIL) air traffic simulations that demand allocating a specific environment, domain experts for participation, and substantial amount of annotated data for simulation. Therefore, we would like to propose an NLP-oriented training agent and address these challenges. Our approach involves leveraging only natural language capabilities and fine-tuning on communication data to generate instructions based on input scenarios (keywords). Given the absence of prior references for this business problem, we investigated the feasibility of our proposed solution by 1) generating all instructions at once and 2) generating one instruction while incorporating conversational history in each input. Our findings affirm the feasibility of this approach, highlighting the effectiveness of fine-tuning pre-trained models and large language models in advancing aviation communication training.</abstract>
       <url hash="98257ee5">2024.naacl-industry.8</url>
@@ -8912,7 +8912,7 @@
       <title>Visual Grounding for User Interfaces</title>
       <author><first>Yijun</first><last>Qian</last></author>
       <author><first>Yujie</first><last>Lu</last><affiliation>UC Santa Barbara</affiliation></author>
-      <author><first>Alexander</first><last>Hauptmann</last><affiliation>School of Computer Science, Carnegie Mellon University</affiliation></author>
+      <author id="alexander-g-hauptmann"><first>Alexander</first><last>Hauptmann</last><affiliation>School of Computer Science, Carnegie Mellon University</affiliation></author>
       <author><first>Oriana</first><last>Riva</last><affiliation>Google and Microsoft</affiliation></author>
       <pages>97-107</pages>
       <abstract>Enabling autonomous language agents to drive application user interfaces (UIs) as humans do can significantly expand the capability of today’s API-based agents. Essential to this vision is the ability of agents to ground natural language commands to on-screen UI elements. Prior UI grounding approaches work by relaying on developer-provided UI metadata (UI trees, such as web DOM, and accessibility labels) to detect on-screen elements. However, such metadata is often unavailable or incomplete. Object detection techniques applied to UI screens remove this dependency, by inferring location and types of UI elements directly from the UI’s visual appearance. The extracted semantics, however, are too limited to directly enable grounding. We overcome the limitations of both approaches by introducing the task of visual UI grounding, which unifies detection and grounding. A model takes as input a UI screenshot and a free-form language expression, and must identify the referenced UI element. We propose a solution to this problem, LVG, which learns UI element detection and grounding using a new technique called layout-guided contrastive learning, where the semantics of individual UI objects are learned also from their visual organization. Due to the scarcity of UI datasets, LVG integrates synthetic data in its training using multi-context learning. LVG outperforms baselines pre-trained on much larger datasets by over 4.9 points in top-1 accuracy, thus demonstrating its effectiveness.</abstract>
@@ -8948,7 +8948,7 @@
     </paper>
     <paper id="12">
       <title>Conformer-Based Speech Recognition On Extreme Edge-Computing Devices</title>
-      <author><first>Mingbin</first><last>Xu</last></author>
+      <author id="mingbin-xu"><first>Mingbin</first><last>Xu</last></author>
       <author><first>Alex</first><last>Jin</last></author>
       <author><first>Sicheng</first><last>Wang</last></author>
       <author><first>Mu</first><last>Su</last></author>
@@ -9119,7 +9119,7 @@
       <author><first>Alex</first><last>Rosenfeld</last></author>
       <author><first>J.</first><last>Gage</last></author>
       <author><first>Daniel</first><last>Dakota</last><affiliation>Leidos and Indiana University</affiliation></author>
-      <author><first>Sandra</first><last>Kübler</last><affiliation>Indiana University at Bloomington</affiliation></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last><affiliation>Indiana University at Bloomington</affiliation></author>
       <pages>295-302</pages>
       <abstract>We describe our system for authorship attribution in the IARPA HIATUS program. We describe the model and compute infrastructure developed to satisfy the set of technical constraints imposed by IARPA, including runtime limits as well as other constraints related to the ultimate use case. One use-case constraint concerns the explainability of the features used in the system. For this reason, we integrate features from frame semantic parsing, as they are both interpretable and difficult for adversaries to evade. One trade-off with using such features, however, is that more sophisticated feature representations require more complicated architectures, which limit usefulness in time-sensitive and constrained compute environments. We propose an approach to increase the efficiency of frame semantic parsing through an analysis of parallelization and beam search sizes. Our approach results in a system that is approximately 8.37x faster than the base system with a minimal effect on accuracy.</abstract>
       <url hash="879887ea">2024.naacl-industry.24</url>
@@ -9260,7 +9260,7 @@
     </paper>
     <paper id="34">
       <title>Shears: Unstructured Sparsity with Neural Low-rank Adapter Search</title>
-      <author><first>J. Pablo</first><last>Muñoz</last><affiliation>Intel</affiliation></author>
+      <author id="juan-pablo-munoz"><first>J. Pablo</first><last>Muñoz</last><affiliation>Intel</affiliation></author>
       <author><first>Jinjie</first><last>Yuan</last><affiliation>Intel</affiliation></author>
       <author><first>Nilesh</first><last>Jain</last><affiliation>Intel</affiliation></author>
       <pages>395-405</pages>
@@ -9320,7 +9320,7 @@
       <author><first>Giuseppe</first><last>Castellucci</last><affiliation>Amazon</affiliation></author>
       <author><first>Eugene</first><last>Agichtein</last><affiliation>Amazon and Emory University</affiliation></author>
       <author><first>Oleg</first><last>Rokhlenko</last></author>
-      <author><first>Shervin</first><last>Malmasi</last><affiliation>Amazon</affiliation></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last><affiliation>Amazon</affiliation></author>
       <pages>437-446</pages>
       <abstract>Conversational Task Assistants (CTAs) guide users in performing a multitude of activities, such as making recipes. However, ensuring that interactions remain engaging, interesting, and enjoyable for CTA users is not trivial, especially for time-consuming or challenging tasks. Grounded in psychological theories of human interest, we propose to engage users with contextual and interesting statements or facts during interactions with a multi-modal CTA, to reduce fatigue and task abandonment before a task is complete. To operationalize this idea, we train a high-performing classifier (82% F1-score) to automatically identify relevant and interesting facts for users. We use it to create an annotated dataset of task-specific interesting facts for the domain of cooking. Finally, we design and validate a dialogue policy to incorporate the identified relevant and interesting facts into a conversation, to improve user engagement and task completion. Live testing on a leading multi-modal voice assistant shows that 66% of the presented facts were received positively, leading to a 40% gain in the user satisfaction rating, and a 37% increase in conversation length. These findings emphasize that strategically incorporating interesting facts into the CTA experience can promote real-world user participation for guided task interactions.</abstract>
       <url hash="a032260d">2024.naacl-industry.38</url>
@@ -9380,7 +9380,7 @@
     </paper>
     <paper id="42">
       <title>Solving General Natural-Language-Description Optimization Problems with Large Language Models</title>
-      <author id="jihai-zhang"><first>Jihai</first><last>Zhang</last><affiliation>Alibaba Group</affiliation></author>
+      <author><first>Jihai</first><last>Zhang</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Wei</first><last>Wang</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Siyan</first><last>Guo</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Li</first><last>Wang</last></author>
diff --git a/data/xml/2024.nejlt.xml b/data/xml/2024.nejlt.xml
index d3e5ab4bb2..dda2a73bda 100644
--- a/data/xml/2024.nejlt.xml
+++ b/data/xml/2024.nejlt.xml
@@ -61,14 +61,14 @@
       <author><first>Hady</first><last>Elsahar</last></author>
       <author><first>Chris</first><last>Emezue</last></author>
       <author><first>Alham Fikri</first><last>Aji</last></author>
-      <author><first>Suzana</first><last>Ilić</last></author>
+      <author id="suzana-ilic"><first>Suzana</first><last>Ilić</last></author>
       <author><first>Nurulaqilla</first><last>Khamis</last></author>
       <author><first>Colin</first><last>Leong</last></author>
       <author><first>Maraim</first><last>Masoud</last></author>
-      <author><first>Aitor</first><last>Soroa</last></author>
-      <author><first>Pedro</first><last>Ortiz Suarez</last></author>
+      <author id="aitor-soroa"><first>Aitor</first><last>Soroa</last></author>
+      <author id="pedro-ortiz-suarez"><first>Pedro</first><last>Ortiz Suarez</last></author>
       <author><first>Daniel</first><last>van Strien</last></author>
-      <author><first>Zeerak</first><last>Talat</last></author>
+      <author id="zeerak-talat"><first>Zeerak</first><last>Talat</last></author>
       <author><first>Yacine</first><last>Jernite</last></author>
       <pages>50-77</pages>
       <abstract>Contemporary large-scale data collection efforts have prioritized the amount of data collected to improve large language models (LLM). This quantitative approach has resulted in concerns for the rights of data subjects represented in data collections. This concern is exacerbated by a lack of documentation and analysis tools, making it difficult to interrogate these collections. Mindful of these pitfalls, we present a methodology for documentation-first, human-centered data collection. We apply this approach in an effort to train a multilingual LLM. We identify a geographically diverse set of target language groups (Arabic varieties, Basque, Chinese varieties, Catalan, English, French, Indic languages, Indonesian, Niger-Congo languages, Portuguese, Spanish, and Vietnamese, as well as programming languages) for which to collect metadata on potential data sources. We structure this effort by developing an online catalogue in English as a tool for gathering metadata through public hackathons. We present our tool and analyses of the resulting resource metadata, including distributions over languages, regions, and resource types, and discuss our lessons learned.</abstract>
diff --git a/data/xml/2024.neusymbridge.xml b/data/xml/2024.neusymbridge.xml
index 2d8ad8b37e..4dc1645f78 100644
--- a/data/xml/2024.neusymbridge.xml
+++ b/data/xml/2024.neusymbridge.xml
@@ -4,12 +4,12 @@
     <meta>
       <booktitle>Proceedings of the Workshop: Bridging Neurons and Symbols for Natural Language Processing and Knowledge Graphs Reasoning (NeusymBridge) @ LREC-COLING-2024</booktitle>
       <editor><first>Tiansi</first><last>Dong</last></editor>
-      <editor><first>Erhard</first><last>Hinrichs</last></editor>
+      <editor id="erhard-hinrichs"><first>Erhard</first><last>Hinrichs</last></editor>
       <editor><first>Zhen</first><last>Han</last></editor>
       <editor><first>Kang</first><last>Liu</last></editor>
       <editor><first>Yangqiu</first><last>Song</last></editor>
       <editor><first>Yixin</first><last>Cao</last></editor>
-      <editor><first>Christian F.</first><last>Hempelmann</last></editor>
+      <editor id="christian-f-hempelmann"><first>Christian F.</first><last>Hempelmann</last></editor>
       <editor><first>Rafet</first><last>Sifa</last></editor>
       <publisher>ELRA and ICCL</publisher>
       <address>Torino, Italia</address>
@@ -28,7 +28,7 @@
       <author><first>Xintong</first><last>Wang</last></author>
       <author><first>Xiaoyu</first><last>Li</last></author>
       <author><first>Xingshan</first><last>Li</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>1–7</pages>
       <abstract>Large Language Models (LLMs) have emerged as dominant foundational models in modern NLP. However, the understanding of their prediction processes and internal mechanisms, such as feed-forward networks (FFN) and multi-head self-attention (MHSA), remains largely unexplored. In this work, we probe LLMs from a human behavioral perspective, correlating values from LLMs with eye-tracking measures, which are widely recognized as meaningful indicators of human reading patterns. Our findings reveal that LLMs exhibit a similar prediction pattern with humans but distinct from that of Shallow Language Models (SLMs). Moreover, with the escalation of LLM layers from the middle layers, the correlation coefficients also increase in FFN and MHSA, indicating that the logits within FFN increasingly encapsulate word semantics suitable for predicting tokens from the vocabulary.</abstract>
       <url hash="def54e50">2024.neusymbridge-1.1</url>
@@ -39,7 +39,7 @@
       <author><first>Yu-Hsiang</first><last>Tseng</last></author>
       <author><first>Pin-Er</first><last>Chen</last></author>
       <author><first>Da-Chen</first><last>Lian</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <pages>8–21</pages>
       <abstract>Compressibility is closely related to the predictability of the texts from the information theory viewpoint. As large language models (LLMs) are trained to maximize the conditional probabilities of upcoming words, they may capture the subtlety and nuances of the semantic constraints underlying the texts, and texts aligning with the encoded semantic constraints are more compressible than those that do not. This paper systematically tests whether and how LLMs can act as compressors of semantic pairs. Using semantic relations from English and Chinese Wordnet, we empirically demonstrate that texts with correct semantic pairings are more compressible than incorrect ones, measured by the proposed compression advantages index. We also show that, with the Pythia model suite and a fine-tuned model on Chinese Wordnet, compression capacities are modulated by the model’s seen data. These findings are consistent with the view that LLMs encode the semantic knowledge as underlying constraints learned from texts and can act as compressors of semantic information or potentially other structured knowledge.</abstract>
       <url hash="e0e5af37">2024.neusymbridge-1.2</url>
@@ -68,7 +68,7 @@
     </paper>
     <paper id="5">
       <title>The Need for Grounding in <fixed-case>LLM</fixed-case>-based Dialogue Systems</title>
-      <author><first>Kristiina</first><last>Jokinen</last></author>
+      <author id="kristiina-jokinen"><first>Kristiina</first><last>Jokinen</last></author>
       <pages>45–52</pages>
       <abstract>Grounding is a pertinent part of the design of LLM-based dialogue systems. Although research on grounding has a long tradition, the paradigm shift caused by LLMs has brought the concept onto the foreground, in particular in the context of cognitive robotics. To avoid generation of irrelevant or false information, the system needs to ground its utterances into real-world events, and to avoid the statistical parrot effect, the system needs to construct shared understanding of the dialogue context and of the partner’s intents. Grounding and construction of the shared context enables cooperation between the participants, and thus supports trustworthy interaction. This paper discusses grounding using neural LLM technology. It aims to bridge neural and symbolic computing on the cognitive architecture level, so as to contribute to a better understanding of how conversational reasoning and collaboration can be linked to LLM implementations to support trustworthy and flexible interaction.</abstract>
       <url hash="6ac82679">2024.neusymbridge-1.5</url>
diff --git a/data/xml/2024.nllp.xml b/data/xml/2024.nllp.xml
index dc0cac8586..3550e2847e 100644
--- a/data/xml/2024.nllp.xml
+++ b/data/xml/2024.nllp.xml
@@ -65,7 +65,7 @@
     <paper id="4">
       <title>u<fixed-case>O</fixed-case>ttawa at <fixed-case>L</fixed-case>egal<fixed-case>L</fixed-case>ens-2024: Transformer-based Classification Experiments</title>
       <author><first>Nima</first><last>Meghdadi</last><affiliation>University of Ottawa</affiliation></author>
-      <author><first>Diana</first><last>Inkpen</last><affiliation>University of Ottawa</affiliation></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last><affiliation>University of Ottawa</affiliation></author>
       <pages>42-47</pages>
       <abstract>This paper presents the methods used for LegalLens-2024, which focused on detecting legal violations within unstructured textual data and associating these violations with potentially affected individuals. The shared task included two subtasks: A) Legal Named Entity Recognition (L-NER) and B) Legal Natural Language Inference (L-NLI). For subtask A, we utilized the spaCy library, while for subtask B, we employed a combined model incorporating RoBERTa and CNN. Our results were 86.3% in the L-NER subtask and 88.25% in the L-NLI subtask. Overall, our paper demonstrates the effectiveness of transformer models in addressing complex tasks in the legal domain.</abstract>
       <url hash="c6ee1e20">2024.nllp-1.4</url>
@@ -123,7 +123,7 @@
       <title>Automated Anonymization of Parole Hearing Transcripts</title>
       <author><first>Abed</first><last>Itani</last><affiliation>University of Passau</affiliation></author>
       <author><first>Wassiliki</first><last>Siskou</last><affiliation>University of Konstanz</affiliation></author>
-      <author><first>Annette</first><last>Hautli-Janisz</last><affiliation>University of Passau</affiliation></author>
+      <author id="annette-hautli"><first>Annette</first><last>Hautli-Janisz</last><affiliation>University of Passau</affiliation></author>
       <pages>115-128</pages>
       <abstract>Responsible natural language processing is more and more concerned with preventing the violation of personal rights that language technology can entail (CITATION). In this paper we illustrate the case of parole hearings in California, the verbatim transcripts of which are made available to the general public upon a request sent to the California Board of Parole Hearings. The parole hearing setting is highly sensitive: inmates face a board of legal representatives who discuss highly personal matters not only about the inmates themselves but also about victims and their relatives, such as spouses and children. Participants have no choice in contributing to the data collection process, since the disclosure of the transcripts is mandated by law. As researchers who are interested in understanding and modeling the communication in these hierarchy-driven settings, we face an ethical dilemma: publishing raw data as is for the community would compromise the privacy of all individuals affected, but manually cleaning the data requires a substantive effort. In this paper we present an automated anonymization process which reliably removes and pseudonymizes sensitive data in verbatim transcripts, while at the same time preserving the structure and content of the data. Our results show that the process exhibits little to no leakage of sensitive information when applied to more than 300 hearing transcripts.</abstract>
       <url hash="72218f74">2024.nllp-1.9</url>
@@ -145,7 +145,7 @@
       <title>Enhancing Contract Negotiations with <fixed-case>LLM</fixed-case>-Based Legal Document Comparison</title>
       <author><first>Savinay</first><last>Narendra</last><affiliation>JP Morgan Chase &amp; Co.</affiliation></author>
       <author><first>Kaushal</first><last>Shetty</last><affiliation>JP Morgan Chase</affiliation></author>
-      <author><first>Adwait</first><last>Ratnaparkhi</last><affiliation>JPMorganChase</affiliation></author>
+      <author id="adwait-ratnaparkhi"><first>Adwait</first><last>Ratnaparkhi</last><affiliation>JPMorganChase</affiliation></author>
       <pages>143-153</pages>
       <abstract>We present a large language model (LLM) based approach for comparing legal contracts with their corresponding template documents. Legal professionals use commonly observed deviations between templates and contracts to help with contract negotiations, and also to refine the template documents. Our comparison approach, based on the well-studied natural language inference (NLI) task, first splits a template into key concepts and then uses LLMs to decide if the concepts are entailed by the contract document. We also repeat this procedure in the opposite direction - contract clauses are tested for entailment against the template clause to see if they contain additional information. The non-entailed concepts are labelled, organized and filtered by frequency, and placed into a clause library, which is used to suggest changes to the template documents. We first show that our LLM-based approach outperforms all previous work on a publicly available dataset designed for NLI in the legal domain. We then apply it to a private real-world legal dataset, achieve an accuracy of 96.46%. Our approach is the first in the literature to produce a natural language comparison between legal contracts and their template documents.</abstract>
       <url hash="37c3f127">2024.nllp-1.11</url>
@@ -237,7 +237,7 @@
     <paper id="19">
       <title>Multi-Property Multi-Label Documents Metadata Recommendation based on Encoder Embeddings</title>
       <author><first>Nasredine</first><last>Cheniki</last><affiliation>Publications Office of the European Union</affiliation></author>
-      <author><first>Vidas</first><last>Daudaravicius</last><affiliation>European Commission Joint Research Centre</affiliation></author>
+      <author id="vidas-daudaravicius"><first>Vidas</first><last>Daudaravicius</last><affiliation>European Commission Joint Research Centre</affiliation></author>
       <author><first>Abdelfettah</first><last>Feliachi</last><affiliation>Publications Office of the European Union</affiliation></author>
       <author><first>Didier</first><last>Hardy</last><affiliation>Publications Office of the European Union</affiliation></author>
       <author><first>Marc Wilhelm</first><last>Küster</last><affiliation>Publications Office of the European Union</affiliation></author>
@@ -270,7 +270,7 @@
     <paper id="22">
       <title><fixed-case>LAR</fixed-case>-<fixed-case>ECHR</fixed-case>: A New Legal Argument Reasoning Task and Dataset for Cases of the <fixed-case>E</fixed-case>uropean Court of Human Rights</title>
       <author><first>Odysseas S.</first><last>Chlapanis</last></author>
-      <author><first>Dimitrios</first><last>Galanis</last></author>
+      <author id="dimitrios-galanis"><first>Dimitrios</first><last>Galanis</last></author>
       <author><first>Ion</first><last>Androutsopoulos</last></author>
       <pages>267-279</pages>
       <abstract>We present Legal Argument Reasoning (LAR), a novel task designed to evaluate the legal reasoning capabilities of Large Language Models (LLMs). The task requires selecting the correct next statement (from multiple choice options) in a chain of legal arguments from court proceedings, given the facts of the case. We constructed a dataset (LAR-ECHR) for this task using cases from the European Court of Human Rights (ECHR). We evaluated seven general-purpose LLMs on LAR-ECHR and found that (a) the ranking of the models is aligned with that of LegalBench, an established US-based legal reasoning benchmark, even though LAR-ECHR is based on EU law, (b) LAR-ECHR distinguishes top models more clearly, compared to LegalBench, (c) even the best model (GPT-4o) obtains 75.8% accuracy on LAR-ECHR, indicating significant potential for further model improvement. The process followed to construct LAR-ECHR can be replicated with cases from other legal systems.</abstract>
@@ -406,7 +406,7 @@
     <paper id="36">
       <title>Towards Supporting Legal Argumentation with <fixed-case>NLP</fixed-case>: Is More Data Really All You Need?</title>
       <author><first>Santosh</first><last>T.y.s.s</last><affiliation>Technical University of Munich</affiliation></author>
-      <author><first>Kevin</first><last>Ashley</last><affiliation>University of Pittsburgh</affiliation></author>
+      <author id="kevin-d-ashley"><first>Kevin</first><last>Ashley</last><affiliation>University of Pittsburgh</affiliation></author>
       <author><first>Katie</first><last>Atkinson</last><affiliation>University of Liverpool</affiliation></author>
       <author><first>Matthias</first><last>Grabmair</last><affiliation>Technical University of Munich</affiliation></author>
       <pages>404-421</pages>
diff --git a/data/xml/2024.nlp4call.xml b/data/xml/2024.nlp4call.xml
index 9a9b5f2974..4e2559ef8d 100644
--- a/data/xml/2024.nlp4call.xml
+++ b/data/xml/2024.nlp4call.xml
@@ -10,7 +10,7 @@
       <editor><first>Griselda</first><last>Drouet</last></editor>
       <editor><first>David</first><last>Alfter</last></editor>
       <editor><first>Elena</first><last>Volodina</last></editor>
-      <editor><first>Arne</first><last>Jönsson</last></editor>
+      <editor id="arne-jonsson"><first>Arne</first><last>Jönsson</last></editor>
       <publisher>LiU Electronic Press</publisher>
       <address>Rennes, France</address>
       <month>October</month>
@@ -67,8 +67,8 @@
       <title>Evaluating Automatic Pronunciation Scoring with Crowd-sourced Speech Corpus Annotations</title>
       <author><first>Nils</first><last>Hjortnaes</last></author>
       <author><first>Daniel</first><last>Dakota</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
-      <author><first>Francis</first><last>Tyers</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last></author>
       <pages>67–77</pages>
       <url hash="c4dab7ab">2024.nlp4call-1.6</url>
       <bibkey>hjortnaes-etal-2024-evaluating</bibkey>
@@ -84,7 +84,7 @@
     <paper id="8">
       <title>Investigating strategies for lexical complexity prediction in a multilingual setting using generative language models and supervised approaches</title>
       <author><first>Abdelhak</first><last>Kelious</last></author>
-      <author><first>Mathieu</first><last>Constant</last></author>
+      <author id="matthieu-constant"><first>Mathieu</first><last>Constant</last></author>
       <author><first>Christophe</first><last>Coeur</last></author>
       <pages>96–114</pages>
       <url hash="365af2c4">2024.nlp4call-1.8</url>
@@ -93,7 +93,7 @@
     <paper id="9">
       <title>Developing a Pedagogically Oriented Interactive Reading Tool with Teachers in the Loops</title>
       <author><first>Mihwa</first><last>Lee</last></author>
-      <author><first>Björn</first><last>Rudzewitz</last></author>
+      <author id="bjorn-rudzewitz"><first>Björn</first><last>Rudzewitz</last></author>
       <author><first>Xiaobin</first><last>Chen</last></author>
       <pages>115–125</pages>
       <url hash="14d916e2">2024.nlp4call-1.9</url>
@@ -102,7 +102,7 @@
     <paper id="10">
       <title>Developing a Web-Based Intelligent Language Assessment Platform Powered by Natural Language Processing Technologies</title>
       <author><first>Sarah</first><last>Löber</last></author>
-      <author><first>Björn</first><last>Rudzewitz</last></author>
+      <author id="bjorn-rudzewitz"><first>Björn</first><last>Rudzewitz</last></author>
       <author><first>Daniela Verratti</first><last>Souto</last></author>
       <author><first>Luisa</first><last>Ribeiro-Flucht</last></author>
       <author><first>Xiaobin</first><last>Chen</last></author>
@@ -147,8 +147,8 @@
     <paper id="14">
       <title>A Conversational Intelligent Tutoring System for Improving <fixed-case>E</fixed-case>nglish Proficiency of Non-Native Speakers via Debriefing of Online Meeting Transcriptions</title>
       <author><first>Juan Antonio</first><last>Pérez-Ortiz</last></author>
-      <author><first>Miquel</first><last>Esplà-Gomis</last></author>
-      <author><first>Víctor M.</first><last>Sánchez-Cartagena</last></author>
+      <author id="miquel-espla-gomis"><first>Miquel</first><last>Esplà-Gomis</last></author>
+      <author id="victor-m-sanchez-cartagena"><first>Víctor M.</first><last>Sánchez-Cartagena</last></author>
       <author><first>Felipe</first><last>Sánchez-Martínez</last></author>
       <author><first>Roman</first><last>Chernysh</last></author>
       <author><first>Gabriel</first><last>Mora-Rodríguez</last></author>
@@ -163,7 +163,7 @@
       <author><first>Nicolas</first><last>Ballier</last></author>
       <author><first>Thomas</first><last>Gaillat</last></author>
       <author><first>Andrew</first><last>Simpkin</last></author>
-      <author><first>John P.</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></author>
       <pages>199–208</pages>
       <url hash="1f1c0379">2024.nlp4call-1.15</url>
       <bibkey>stearns-etal-2024-evaluating</bibkey>
diff --git a/data/xml/2024.nlp4convai.xml b/data/xml/2024.nlp4convai.xml
index 794f6f860d..d006ef0d66 100644
--- a/data/xml/2024.nlp4convai.xml
+++ b/data/xml/2024.nlp4convai.xml
@@ -11,7 +11,7 @@
       <editor><first>Yu</first><last>Li</last></editor>
       <editor><first>Alon</first><last>Albalak</last></editor>
       <editor><first>Hiromi</first><last>Wakaki</last></editor>
-      <editor><first>Alexandros</first><last>Papangelis</last></editor>
+      <editor id="alexandros-papangelis"><first>Alexandros</first><last>Papangelis</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Bangkok, Thailand</address>
       <month>August</month>
@@ -27,7 +27,7 @@
     <paper id="1">
       <title>On the Benchmarking of <fixed-case>LLM</fixed-case>s for Open-Domain Dialogue Evaluation</title>
       <author><first>John</first><last>Mendonça</last><affiliation>Instituto Superior Técnico</affiliation></author>
-      <author><first>Alon</first><last>Lavie</last><affiliation>Phrase and School of Computer Science, Carnegie Mellon University</affiliation></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last><affiliation>Phrase and School of Computer Science, Carnegie Mellon University</affiliation></author>
       <author><first>Isabel</first><last>Trancoso</last><affiliation>Instituto Superior Técnico</affiliation></author>
       <pages>1-12</pages>
       <abstract>Large Language Models (LLMs) have showcased remarkable capabilities in various Natural Language Processing tasks. For automatic open-domain dialogue evaluation in particular, LLMs have been seamlessly integrated into evaluation frameworks, and together with human evaluation, compose the backbone of most evaluations. However, existing evaluation benchmarks often rely on outdated datasets and evaluate aspects like Fluency and Relevance, which fail to adequately capture the capabilities and limitations of state-of-the-art chatbot models. This paper critically examines current evaluation benchmarks, highlighting that the use of older response generators and quality aspects fail to accurately reflect modern chatbot capabilities. A small annotation experiment on a recent LLM-generated dataset (SODA) reveals that LLM evaluators such as GPT-4 struggle to detect actual deficiencies in dialogues generated by current LLM chatbots.</abstract>
@@ -62,7 +62,7 @@
       <author><first>Chulaka</first><last>Gunasekara</last><affiliation>International Business Machines</affiliation></author>
       <author><first>Hui</first><last>Wan</last><affiliation>IBM Research AI</affiliation></author>
       <author><first>Jatin</first><last>Ganhotra</last><affiliation>International Business Machines</affiliation></author>
-      <author><first>Sachindra</first><last>Joshi</last></author>
+      <author id="sachindra-joshi"><first>Sachindra</first><last>Joshi</last></author>
       <author><first>Marina</first><last>Danilevsky</last><affiliation>International Business Machines</affiliation></author>
       <pages>56-72</pages>
       <abstract>Dialogue summarization involves summarizing long conversations while preserving the most salient information. Real-life dialogues often involve naturally occurring variations (e.g., repetitions, hesitations). In this study, we systematically investigate the impact of such variations on state-of-the-art open dialogue summarization models whose details are publicly known (e.g., architectures, weights, and training corpora). To simulate real-life variations, we introduce two types of perturbations: utterance-level perturbations that modify individual utterances with errors and language variations, and dialogue-level perturbations that add non-informative exchanges (e.g., repetitions, greetings). We perform our analysis along three dimensions of robustness: consistency, saliency, and faithfulness, which aim to capture different aspects of performance of a summarization model. We find that both fine-tuned and instruction-tuned models are affected by input variations, with the latter being more susceptible, particularly to dialogue-level perturbations. We also validate our findings via human evaluation. Finally, we investigate whether the robustness of fine-tuned models can be improved by training them with a fraction of perturbed data. We find that this approach does not yield consistent performance gains, warranting further research. Overall, our work highlights robustness challenges in current open encoder-decoder summarization models and provides insights for future research.</abstract>
diff --git a/data/xml/2024.nlp4dh.xml b/data/xml/2024.nlp4dh.xml
index dd1b0bd227..c749d28111 100644
--- a/data/xml/2024.nlp4dh.xml
+++ b/data/xml/2024.nlp4dh.xml
@@ -106,7 +106,7 @@
       <title>Language Resources From Prominent Born-Digital Humanities Texts are Still Needed in the Age of <fixed-case>LLM</fixed-case>s</title>
       <author><first>Natalie</first><last>Hervieux</last></author>
       <author><first>Peiran</first><last>Yao</last></author>
-      <author><first>Susan</first><last>Brown</last></author>
+      <author id="susan-windisch-brown"><first>Susan</first><last>Brown</last></author>
       <author><first>Denilson</first><last>Barbosa</last></author>
       <pages>85–104</pages>
       <abstract>The digital humanities (DH) community fundamentally embraces the use of computerized tools for the study and creation of knowledge related to language, history, culture, and human values, in which natural language plays a prominent role. Many successful DH tools rely heavily on Natural Language Processing methods, and several efforts exist within the DH community to promote the use of newer and better tools. Nevertheless, most NLP research is driven by web corpora that are noticeably different from texts commonly found in DH artifacts, which tend to use richer language and refer to rarer entities. Thus, the near-human performance achieved by state-of-the-art NLP tools on web texts might not be achievable on DH texts. We introduce a dataset carefully created by computer scientists and digital humanists intended to serve as a reference point for the development and evaluation of NLP tools. The dataset is a subset of a born-digital textbase resulting from a prominent and ongoing experiment in digital literary history, containing thousands of multi-sentence excerpts that are suited for information extraction tasks. We fully describe the dataset and show that its language is demonstrably different than the corpora normally used in training language resources in the NLP community.</abstract>
@@ -127,7 +127,7 @@
     <paper id="11">
       <title>A Multi-task Framework with Enhanced Hierarchical Attention for Sentiment Analysis on Classical <fixed-case>C</fixed-case>hinese Poetry: Utilizing Information from Short Lines</title>
       <author><first>Quanqi</first><last>Du</last></author>
-      <author><first>Veronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Veronique</first><last>Hoste</last></author>
       <pages>113–122</pages>
       <abstract>Classical Chinese poetry has a long history, dating back to the 11th century BC. By investigating the sentiment expressed in the poetry, we can gain more insights in the emotional life and history development in ancient Chinese culture. To help improve the sentiment analysis performance in the field of classical Chinese poetry, we propose to utilize the unique information from the individual short lines that compose the poem, and introduce a multi-task framework with hierarchical attention enhanced with short line sentiment labels. Specifically, the multi-task framework comprises sentiment analysis for both the overall poem and the short lines, while the hierarchical attention consists of word- and sentence-level attention, with the latter enhanced with additional information from short line sentiments. Our experimental results showcase that our approach leveraging more fine-grained information from short lines outperforms the state-of-the-art, achieving an accuracy score of 72.88% and an F1-macro score of 71.05%.</abstract>
       <url hash="15651f93">2024.nlp4dh-1.11</url>
@@ -160,7 +160,7 @@
     </paper>
     <paper id="14">
       <title>Canonical Status and Literary Influence: A Comparative Study of <fixed-case>D</fixed-case>anish Novels from the Modern Breakthrough (1870–1900)</title>
-      <author><first>Pascale</first><last>Feldkamp</last></author>
+      <author id="pascale-feldkamp"><first>Pascale</first><last>Feldkamp</last></author>
       <author><first>Alie</first><last>Lassche</last></author>
       <author><first>Jan</first><last>Kostkan</last></author>
       <author><first>Márton</first><last>Kardos</last></author>
@@ -210,8 +210,8 @@
       <author><first>Rasul</first><last>Dent</last></author>
       <author><first>Juliette</first><last>Janes</last></author>
       <author><first>Thibault</first><last>Clerice</last></author>
-      <author><first>Pedro</first><last>Ortiz Suarez</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="pedro-ortiz-suarez"><first>Pedro</first><last>Ortiz Suarez</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <pages>189–199</pages>
       <abstract>Whether or not several Creole languages which developed during the early modern period can be considered genetic descendants of European languages has been the subject of intense debate. This is in large part due to the absence of evidence of intermediate forms. This work introduces a new open corpus, the Molyé corpus, which combines stereotypical representations of three kinds of language variation in Europe with early attestations of French-based Creole languages across a period of 400 years. It is intended to facilitate future research on the continuity between contact situations in Europe and Creolophone (former) colonies.</abstract>
       <url hash="b0ba8975">2024.nlp4dh-1.18</url>
@@ -266,7 +266,7 @@
       <author><first>Myrto</first><last>Tsigkouli</last></author>
       <author><first>Chris W.</first><last>Jenkins</last></author>
       <author><first>Filip</first><last>Miletić</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>240–246</pages>
       <abstract>This paper provides a framework and tool set for computing and visualising dynamic, time- specific semantic neighbourhoods of English noun-noun compounds and their constituents over time. Our framework not only identifies salient vector-space dimensions and neighbours in notoriously sparse data: we specifically bring together changes in meaning aspects and degrees of (non-)compositionality.</abstract>
       <url hash="54a497bd">2024.nlp4dh-1.23</url>
@@ -320,10 +320,10 @@
     <paper id="28">
       <title>Evaluating Open-Source <fixed-case>LLM</fixed-case>s in Low-Resource Languages: Insights from <fixed-case>L</fixed-case>atvian High School Exams</title>
       <author><first>Roberts</first><last>Darģis</last></author>
-      <author><first>Guntis</first><last>Bārzdiņš</last></author>
-      <author><first>Inguna</first><last>Skadiņa</last></author>
-      <author><first>Normunds</first><last>Grūzītis</last></author>
-      <author><first>Baiba</first><last>Saulīte</last></author>
+      <author id="guntis-barzdins"><first>Guntis</first><last>Bārzdiņš</last></author>
+      <author id="inguna-skadina"><first>Inguna</first><last>Skadiņa</last></author>
+      <author id="normunds-gruzitis"><first>Normunds</first><last>Grūzītis</last></author>
+      <author id="baiba-saulite"><first>Baiba</first><last>Saulīte</last></author>
       <pages>289–293</pages>
       <abstract>The latest large language models (LLM) have significantly advanced natural language processing (NLP) capabilities across various tasks. However, their performance in low-resource languages, such as Latvian with 1.5 million native speakers, remains substantially underexplored due to both limited training data and the absence of comprehensive evaluation benchmarks. This study addresses this gap by conducting a systematic assessment of prominent open-source LLMs on natural language understanding (NLU) and natural language generation (NLG) tasks in Latvian. We utilize standardized high school centralized graduation exams as a benchmark dataset, offering relatable and diverse evaluation scenarios that encompass multiple-choice questions and complex text analysis tasks. Our experimental setup involves testing models from the leading LLM families, including Llama, Qwen, Gemma, and Mistral, with OpenAI’s GPT-4 serving as a performance reference. The results reveal that certain open-source models demonstrate competitive performance in NLU tasks, narrowing the gap with GPT-4. However, all models exhibit notable deficiencies in NLG tasks, specifically in generating coherent and contextually appropriate text analyses, highlighting persistent challenges in NLG for low-resource languages. These findings contribute to efforts to develop robust multilingual benchmarks and improve LLM performance in diverse linguistic contexts.</abstract>
       <url hash="3ec45a29">2024.nlp4dh-1.28</url>
@@ -358,7 +358,7 @@
     <paper id="31">
       <title>Testing and Adapting the Representational Abilities of Large Language Models on Folktales in Low-Resource Languages</title>
       <author><first>J. A.</first><last>Meaney</last></author>
-      <author><first>Beatrice</first><last>Alex</last></author>
+      <author id="beatrice-alex"><first>Beatrice</first><last>Alex</last></author>
       <author><first>William</first><last>Lamb</last></author>
       <pages>319–324</pages>
       <abstract>Folktales are a rich resource of knowledge about the society and culture of a civilisation. Digital folklore research aims to use automated techniques to better understand these folktales, and it relies on abstract representations of the textual data. Although a number of large language models (LLMs) claim to be able to represent low-resource langauges such as Irish and Gaelic, we present two classification tasks to explore how useful these representations are, and three adaptations to improve the performance of these models. We find that adapting the models to work with longer sequences, and continuing pre-training on the domain of folktales improves classification performance, although these findings are tempered by the impressive performance of a baseline SVM with non-contextual features.</abstract>
@@ -474,7 +474,7 @@
     <paper id="41">
       <title>Exploring Large Language Models for Qualitative Data Analysis</title>
       <author><first>Tim</first><last>Fischer</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>423–437</pages>
       <abstract>This paper explores the potential of Large Language Models (LLMs) to enhance qualitative data analysis (QDA) workflows within the open-source QDA platform developed at our university. We identify several opportunities within a typical QDA workflow where AI assistance can boost researcher productivity and translate these opportunities into corresponding NLP tasks: document classification, information extraction, span classification, and text generation. A benchmark tailored to these QDA activities is constructed, utilizing English and German datasets that align with relevant use cases. Focusing on efficiency and accessibility, we evaluate the performance of three prominent open-source LLMs - Llama 3.1, Gemma 2, and Mistral NeMo - on this benchmark. Our findings reveal the promise of LLM integration for streamlining QDA workflows, particularly for English-language projects. Consequently, we have implemented the LLM Assistant as an opt-in feature within our platform and report the implementation details. With this, we hope to further democratize access to AI capabilities for qualitative data analysis.</abstract>
       <url hash="e8035a13">2024.nlp4dh-1.41</url>
@@ -572,7 +572,7 @@
       <author><first>Andreas</first><last>Marfurt</last></author>
       <author><first>Ashley</first><last>Thornton</last></author>
       <author><first>David</first><last>Sylvan</last></author>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <pages>513–520</pages>
       <abstract>Recent advances in language modeling have focused on (potentially multiple-choice) question answering, open-ended generation, or math and coding problems. We look at a more nuanced task: the interpretation of statements of political actors. To this end, we present a dataset of policy announcements and corresponding annotated interpretations, on the topic of US foreign policy relations with Russia in the years 1993 up to 2016. We analyze the performance of finetuning standard sequence-to-sequence models of varying sizes on predicting the annotated interpretations and compare them to few-shot prompted large language models. We find that 1) model size is not the main factor for success on this task, 2) finetuning smaller models provides both quantitatively and qualitatively superior results to in-context learning with large language models, but 3) large language models pick up the annotation format and approximate the category distribution with just a few in-context examples.</abstract>
       <url hash="eb588f62">2024.nlp4dh-1.50</url>
diff --git a/data/xml/2024.nlp4hr.xml b/data/xml/2024.nlp4hr.xml
index d317fe9467..633a06a303 100644
--- a/data/xml/2024.nlp4hr.xml
+++ b/data/xml/2024.nlp4hr.xml
@@ -6,7 +6,7 @@
       <editor><first>Estevam</first><last>Hruschka</last></editor>
       <editor><first>Thom</first><last>Lake</last></editor>
       <editor><first>Naoki</first><last>Otani</last></editor>
-      <editor><first>Tom</first><last>Mitchell</last></editor>
+      <editor id="tom-mitchell"><first>Tom</first><last>Mitchell</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>St. Julian’s, Malta</address>
       <month>March</month>
@@ -24,7 +24,7 @@
       <author><first>Elena</first><last>Senger</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <author><first>Mike</first><last>Zhang</last><affiliation>IT University of Copenhagen</affiliation></author>
       <author><first>Rob</first><last>van der Goot</last></author>
-      <author><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München and IT University of Copenhagen</affiliation></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München and IT University of Copenhagen</affiliation></author>
       <pages>1-15</pages>
       <abstract>Recent years have brought significant advances to Natural Language Processing (NLP), which enabled fast progress in the field of computational job market analysis. Core tasks in this application domain are skill extraction and classification from job postings. Because of its quick growth and its interdisciplinary nature, there is no exhaustive assessment of this field. This survey aims to fill this gap by providing a comprehensive overview of deep learning methodologies, datasets, and terminologies specific to NLP-driven skill extraction. Our comprehensive cataloging of publicly available datasets addresses the lack of consolidated information on dataset creation and characteristics. Finally, the focus on terminology addresses the current lack of consistent definitions for important concepts, such as hard and soft skills, and terms relating to skill extraction and classification.</abstract>
       <url hash="462face8">2024.nlp4hr-1.1</url>
diff --git a/data/xml/2024.nlp4pi.xml b/data/xml/2024.nlp4pi.xml
index 92158b2815..240f1551c7 100644
--- a/data/xml/2024.nlp4pi.xml
+++ b/data/xml/2024.nlp4pi.xml
@@ -6,9 +6,9 @@
       <editor><first>Daryna</first><last>Dementieva</last></editor>
       <editor><first>Oana</first><last>Ignat</last></editor>
       <editor><first>Zhijing</first><last>Jin</last></editor>
-      <editor><first>Rada</first><last>Mihalcea</last></editor>
+      <editor id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></editor>
       <editor><first>Giorgio</first><last>Piatti</last></editor>
-      <editor><first>Joel</first><last>Tetreault</last></editor>
+      <editor id="joel-tetreault"><first>Joel</first><last>Tetreault</last></editor>
       <editor><first>Steven</first><last>Wilson</last></editor>
       <editor><first>Jieyu</first><last>Zhao</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -91,7 +91,7 @@
       <author><first>Spandana</first><last>Gella</last><affiliation>Amazon</affiliation></author>
       <author><first>Apurv</first><last>Verma</last><affiliation>Bloomberg</affiliation></author>
       <author><first>Tagyoung</first><last>Chung</last><affiliation>Amazon</affiliation></author>
-      <author id="jing-huang"><first>Jing</first><last>Huang</last><affiliation>Amazon Alexa AI</affiliation></author>
+      <author><first>Jing</first><last>Huang</last><affiliation>Amazon Alexa AI</affiliation></author>
       <author><first>Nanyun</first><last>Peng</last><affiliation>University of California, Los Angeles</affiliation></author>
       <pages>78-97</pages>
       <abstract>Creating children’s stories through text generation is a creative task that requires stories to be both entertaining and suitable for young audiences. However, since current story generation systems often rely on pre-trained language models fine-tuned with limited story data, they may not always prioritize child-friendliness. This can lead to the unintended generation of stories containing problematic elements such as violence, profanity, and biases. Regrettably, despite the significance of these concerns, there is a lack of clear guidelines and benchmark datasets for ensuring content safety for children. In this paper, we introduce a taxonomy specifically tailored to assess content safety in text, with a strong emphasis on children’s well-being. We present PG-Story, a dataset that includes detailed annotations for both sentence-level and discourse-level safety. We demonstrate the potential of identifying unsafe content through self-diagnosis and employing controllable generation techniques during the decoding phase to minimize unsafe elements in generated stories.</abstract>
@@ -103,7 +103,7 @@
       <title>Towards Explainable Multi-Label Text Classification: A Multi-Task Rationalisation Framework for Identifying Indicators of Forced Labour</title>
       <author><first>Erick Mendez</first><last>Guzman</last></author>
       <author><first>Viktor</first><last>Schlegel</last><affiliation>Imperial College London</affiliation></author>
-      <author><first>Riza</first><last>Batista-Navarro</last><affiliation>University of Manchester</affiliation></author>
+      <author id="riza-theresa-batista-navarro"><first>Riza</first><last>Batista-Navarro</last><affiliation>University of Manchester</affiliation></author>
       <pages>98-112</pages>
       <abstract>The importance of rationales, or natural language explanations, lies in their capacity to bridge the gap between machine predictions and human understanding, by providing human-readable insights into why a text classifier makes specific decisions. This paper presents a novel multi-task rationalisation approach tailored to enhancing the explainability of multi-label text classifiers to identify indicators of forced labour. Our framework integrates a rationale extraction task with the classification objective and allows the inclusion of human explanations during training. We conduct extensive experiments using transformer-based models on a dataset consisting of 2,800 news articles, each annotated with labels and human-generated explanations. Our findings reveal a statistically significant difference between the best-performing architecture leveraging human rationales during training and variants using only labels. Specifically, the supervised model demonstrates a 10% improvement in predictive performance measured by the weighted F1 score, a 15% increase in the agreement between human and machine-generated rationales, and a 4% improvement in the generated rationales’ comprehensiveness. These results hold promising implications for addressing complex human rights issues with greater transparency and accountability using advanced NLP techniques.</abstract>
       <url hash="e24f2fa7">2024.nlp4pi-1.8</url>
@@ -270,7 +270,7 @@
       <author><first>Jiawen</first><last>Wang</last></author>
       <author><first>Longfei</first><last>Zuo</last></author>
       <author><first>Siyao</first><last>Peng</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
-      <author><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München and IT University of Copenhagen</affiliation></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München and IT University of Copenhagen</affiliation></author>
       <pages>315-326</pages>
       <abstract>Climate change (CC) has attracted increasing attention in NLP in recent years. However, detecting the stance on CC in multimodal data is understudied and remains challenging due to a lack of reliable datasets. To improve the understanding of public opinions and communication strategies, this paper presents MultiClimate, the first open-source manually-annotated stance detection dataset with 100 CC-related YouTube videos and 4,209 frame-transcript pairs. We deploy state-of-the-art vision and language models, as well as multimodal models for MultiClimate stance detection. Results show that text-only BERT significantly outperforms image-only ResNet50 and ViT. Combining both modalities achieves state-of-the-art, 0.747/0.749 in accuracy/F1. Our 100M-sized fusion models also beat CLIP and BLIP, as well as the much larger 9B-sized multimodal IDEFICS and text-only Llama3 and Gemma2, indicating that multimodal stance detection remains challenging for large language models. Our code, dataset, as well as supplementary materials, are available at https://github.com/werywjw/MultiClimate.</abstract>
       <url hash="c1a37083">2024.nlp4pi-1.27</url>
diff --git a/data/xml/2024.nlpaics.xml b/data/xml/2024.nlpaics.xml
index eabcf5e9c6..74d9b31805 100644
--- a/data/xml/2024.nlpaics.xml
+++ b/data/xml/2024.nlpaics.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2025-04-30" type="proceedings">
     <meta>
       <booktitle>Proceedings of the First International Conference on Natural Language Processing and Artificial Intelligence for Cyber Security</booktitle>
-      <editor><first>Ruslan</first><last>Mitkov</last></editor>
+      <editor id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></editor>
       <editor><first>Saad</first><last>Ezzini</last></editor>
       <editor><first>Tharindu</first><last>Ranasinghe</last></editor>
       <editor><first>Ignatius</first><last>Ezeani</last></editor>
@@ -120,7 +120,7 @@
     <paper id="10">
       <title>The Influence of the Perplexity Score in the Detection of Machine-generated Texts</title>
       <author><first>Alberto José</first><last>Gutiérrez Megías</last></author>
-      <author><first>L. Alfonso</first><last>Ureña-López</last></author>
+      <author id="l-alfonso-urena-lopez"><first>L. Alfonso</first><last>Ureña-López</last></author>
       <author><first>Eugenio</first><last>Martínez Cámara</last></author>
       <pages>80–85</pages>
       <abstract>The high performance of large language models (LLM) generating natural language represents a real threat, since they can be leveraged to generate any kind of deceptive content. Since there are still disparities among the language generated by machines and the human language, we claim that perplexity may be used as classification signal to discern between machine and human text. We propose a classification model based on XLM-RoBERTa, and we evaluate it on the M4 dataset. The results show that the perplexity score is useful for the identification of machine generated text, but it is constrained by the differences among the LLMs used in the training and test sets.</abstract>
@@ -201,7 +201,7 @@
     <paper id="18">
       <title>Abusive Speech Detection in <fixed-case>S</fixed-case>erbian using Machine Learning</title>
       <author><first>Danka</first><last>Jokić</last></author>
-      <author><first>Ranka</first><last>Stanković</last></author>
+      <author id="ranka-stankovic"><first>Ranka</first><last>Stanković</last></author>
       <author><first>Branislava</first><last>Šandrih Todorović</last></author>
       <pages>153–163</pages>
       <abstract>The increase in the use of abusive language on social media and virtual platforms has emphasized the importance of developing efficient hate speech detection systems. While there have been considerable advancements in creating such systems for the English language, resources are scarce for other languages, such as Serbian. This research paper explores the use of machine learning and deep learning techniques to identify abusive language in Serbian text. The authors used AbCoSER, a dataset of Serbian tweets that have been labeled as abusive or non-abusive. They evaluated various algorithms to classify tweets, and the best-performing model is based on the deep learning transformer architecture. The model attained an F1 macro score of 0.827, a figure that is commensurate with the benchmarks established for offensive speech datasets of a similar magnitude in other languages.</abstract>
diff --git a/data/xml/2024.nlpcss.xml b/data/xml/2024.nlpcss.xml
index fc45fba012..eeb797f153 100644
--- a/data/xml/2024.nlpcss.xml
+++ b/data/xml/2024.nlpcss.xml
@@ -35,7 +35,7 @@
     <paper id="2">
       <title>Connecting the Dots in News Analysis: Bridging the Cross-Disciplinary Disparities in Media Bias and Framing</title>
       <author><first>Gisela</first><last>Vallejo</last></author>
-      <author><first>Timothy</first><last>Baldwin</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and The University of Melbourne</affiliation></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and The University of Melbourne</affiliation></author>
       <author><first>Lea</first><last>Frermann</last><affiliation>University of Melbourne</affiliation></author>
       <pages>16-31</pages>
       <abstract>The manifestation and effect of bias in news reporting have been central topics in the social sciences for decades, and have received increasing attention in the NLP community recently. While NLP can help to scale up analyses or contribute automatic procedures to investigate the impact of biased news in society, we argue that methodologies that are currently dominant fall short of capturing the complex questions and effects addressed in theoretical media studies. This is problematic because it diminishes the validity and safety of the resulting tools and applications. Here, we review and critically compare task formulations, methods and evaluation schemes in the social sciences and NLP. We discuss open questions and suggest possible directions to close identified gaps between theory and predictive models, and their evaluation. These include model transparency, considering document-external information, and cross-document reasoning.</abstract>
@@ -47,7 +47,7 @@
       <title>The Crime of Being Poor: Associations between Crime and Poverty on Social Media in Eight Countries</title>
       <author><first>Georgina</first><last>Curto</last><affiliation>University of Notre Dame</affiliation></author>
       <author><first>Svetlana</first><last>Kiritchenko</last><affiliation>National Research Council Canada</affiliation></author>
-      <author><first>Kathleen</first><last>Fraser</last><affiliation>National Research Council Canada</affiliation></author>
+      <author id="kathleen-c-fraser"><first>Kathleen</first><last>Fraser</last><affiliation>National Research Council Canada</affiliation></author>
       <author><first>Isar</first><last>Nejadgholi</last><affiliation>National Research Council Canada and University of Ottawa</affiliation></author>
       <pages>32-45</pages>
       <abstract>Negative public perceptions of people living in poverty can hamper policies and programs that aim to help the poor. One prominent example of social bias and discrimination against people in need is the persistent association of poverty with criminality. The phenomenon has two facets: first, the belief that poor people are more likely to engage in crime (e.g., stealing, mugging, violence) and second, the view that certain behaviors directly resulting from poverty (e.g., living outside, panhandling) warrant criminal punishment. In this paper, we use large language models (LLMs) to identify examples of crime–poverty association (CPA) in English social media texts. We analyze the online discourse on CPA across eight geographically-diverse countries, and find evidence that the CPA rates are higher within the sample obtained from the U.S. and Canada, as compared to the other countries such as South Africa, despite the latter having higher poverty, criminality, and inequality indexes. We further uncover and analyze the most common themes in CPA posts and find more negative and biased attitudes toward people living in poverty in posts from the U.S. and Canada. These results could partially be explained by cultural factors related to the tendency to overestimate the equality of opportunities and social mobility in the U.S. and Canada. These findings have consequences for policy-making and open a new path of research for poverty mitigation with the focus not only on the redistribution of wealth but also on the mitigation of bias and discrimination against people in need.</abstract>
@@ -62,7 +62,7 @@
       <author><first>Shreeja</first><last>Dahal</last><affiliation>Florida International University</affiliation></author>
       <author><first>W. Victor H.</first><last>Yarlott</last><affiliation>Florida International University</affiliation></author>
       <author><first>Diana</first><last>Gomez</last><affiliation>Florida International University</affiliation></author>
-      <author><first>Mark</first><last>Finlayson</last><affiliation>Florida International University</affiliation></author>
+      <author id="mark-finlayson"><first>Mark</first><last>Finlayson</last><affiliation>Florida International University</affiliation></author>
       <pages>46-56</pages>
       <abstract>Motifs are distinctive, recurring, widely used idiom-like words or phrases, often originating in folklore and usually strongly anchored to a particular cultural or national group. Motifs are significant communicative devices across a wide range of media—including news, literature, and propaganda—because they can concisely imply a large set of culturally relevant associations. One difficulty of understanding motifs is that their meaning is usually implicit, so for an out-group person the meaning is inaccessible. We present the Motif Implicit Meaning Extractor (MIME), a proof-of-concept system designed to automatically identify a motif’s implicit meaning, as evidenced by textual uses of the motif across a large set data. MIME uses several sources (including motif indices, Wikipedia pages on the motifs, explicit explanations of motifs from in-group informants, and news/social media posts where the motif is used) and can generate a structured report of information about a motif understandable to an out-group person. In addition to a variety of examples and information drawn from structured sources, the report includes implicit information about a motif such as the type of reference (e.g., a person, an organization, etc.), it’s general connotation (strongly negative, slightly negative, neutral, etc.), and it’s associations (typically adjectives). We describe how MIME works and demonstrate its operation on a small set of manually curated motifs. We perform a qualitative evaluation of the output, and assess the difficulty of the problem, showing that explicit motif information provided by cultural informants is critical to high quality output, although mining motif usages in news and social media provides useful additional depth. A system such as MIME, appropriately scaled up, would potentially be quite useful to an out-group person trying to understand in-group usages of motifs, and has wide potential applications in domains such as literary criticism, cultural heritage, marketed and branding, and intelligence analysis.</abstract>
       <url hash="100c032c">2024.nlpcss-1.4</url>
@@ -136,7 +136,7 @@
     <paper id="10">
       <title>Clustering Document Parts: Detecting and Characterizing Influence Campaigns from Documents</title>
       <author><first>Zhengxiang</first><last>Wang</last><affiliation>State University of New York at Stony Brook</affiliation></author>
-      <author><first>Owen</first><last>Rambow</last><affiliation>Stony Brook University</affiliation></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last><affiliation>Stony Brook University</affiliation></author>
       <pages>132-143</pages>
       <abstract>We propose a novel clustering pipeline to detect and characterize influence campaigns from documents. This approach clusters parts of document, detects clusters that likely reflect an influence campaign, and then identifies documents linked to an influence campaign via their association with the high-influence clusters. Our approach outperforms both the direct document-level classification and the direct document-level clustering approach in predicting if a document is part of an influence campaign. We propose various novel techniques to enhance our pipeline, including using an existing event factuality prediction system to obtain document parts, and aggregating multiple clustering experiments to improve the performance of both cluster and document classification. Classifying documents after clustering not only accurately extracts the parts of the documents that are relevant to influence campaigns, but also captures influence campaigns as a coordinated and holistic phenomenon. Our approach makes possible more fine-grained and interpretable characterizations of influence campaigns from documents.</abstract>
       <url hash="04c34888">2024.nlpcss-1.10</url>
@@ -149,7 +149,7 @@
       <author><first>Shamik</first><last>Roy</last><affiliation>Amazon</affiliation></author>
       <author><first>Alexander</first><last>Hoyle</last><affiliation>University of Maryland, College Park</affiliation></author>
       <author><first>Daniel</first><last>Acuna</last><affiliation>Computer Science Department, University of Colorado at Boulder</affiliation></author>
-      <author><first>Maria Leonor</first><last>Pacheco</last><affiliation>University of Colorado at Boulder</affiliation></author>
+      <author id="maria-leonor-pacheco"><first>Maria Leonor</first><last>Pacheco</last><affiliation>University of Colorado at Boulder</affiliation></author>
       <pages>144-158</pages>
       <abstract>With the rise in the prevalence of cross-disciplinary research, there is a need to develop methods to characterize its practices. Current computational methods to evaluate interdisciplinary engagement—such as affiliation diversity, keywords, and citation patterns—are insufficient to model the degree of engagement between disciplines, as well as the way in which the complementary expertise of co-authors is harnessed. In this paper, we propose an automated framework to address some of these issues on a large scale. Our framework tracks interdisciplinary citations in scientific articles and models: 1) the section and position in which they appear, and 2) the argumentative role that they play in the writing. To showcase our framework, we perform a preliminary analysis of interdisciplinary engagement in published work at the intersection of natural language processing and computational social science in the last decade.</abstract>
       <url hash="fd05b653">2024.nlpcss-1.11</url>
diff --git a/data/xml/2024.nlperspectives.xml b/data/xml/2024.nlperspectives.xml
index 69f8d88042..4a0242d993 100644
--- a/data/xml/2024.nlperspectives.xml
+++ b/data/xml/2024.nlperspectives.xml
@@ -107,7 +107,7 @@
       <title>Soft metrics for evaluation with disagreements: an assessment</title>
       <author><first>Giulia</first><last>Rizzi</last></author>
       <author><first>Elisa</first><last>Leonardelli</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <author><first>Alexandra</first><last>Uma</last></author>
       <author><first>Maja</first><last>Pavlovic</last></author>
       <author><first>Silviu</first><last>Paun</last></author>
@@ -121,7 +121,7 @@
     <paper id="10">
       <title>Designing <fixed-case>NLP</fixed-case> Systems That Adapt to Diverse Worldviews</title>
       <author><first>Claudiu</first><last>Creanga</last></author>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <pages>95–99</pages>
       <abstract>Natural Language Inference (NLI) is foundational for evaluating language understanding in AI. However, progress has plateaued, with models failing on ambiguous examples and exhibiting poor generalization. We argue that this stems from disregarding the subjective nature of meaning, which is intrinsically tied to an individual’s <i>weltanschauung</i> (which roughly translates to worldview). Existing NLP datasets often obscure this by aggregating labels or filtering out disagreement. We propose a perspectivist approach: building datasets that capture annotator demographics, values, and justifications for their labels. Such datasets would explicitly model diverse worldviews. Our initial experiments with a subset of the SBIC dataset demonstrate that even limited annotator metadata can improve model performance.</abstract>
       <url hash="e34ec67e">2024.nlperspectives-1.10</url>
@@ -130,7 +130,7 @@
     <paper id="11">
       <title>The Effectiveness of <fixed-case>LLM</fixed-case>s as Annotators: A Comparative Overview and Empirical Analysis of Direct Representation</title>
       <author><first>Maja</first><last>Pavlovic</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>100–110</pages>
       <abstract>Recent studies focus on exploring the capability of Large Language Models (LLMs) for data annotation. Our work, firstly, offers a comparative overview of twelve such studies that investigate labelling with LLMs, particularly focusing on classification tasks. Secondly, we present an empirical analysis that examines the degree of alignment between the opinion distributions returned by GPT and those provided by human annotators across four subjective datasets. Our analysis supports a minority of studies that are considering diverse perspectives when evaluating data annotation tasks and highlights the need for further research in this direction.</abstract>
       <url hash="5f2fcebe">2024.nlperspectives-1.11</url>
@@ -147,7 +147,7 @@
     <paper id="13">
       <title><fixed-case>O</fixed-case>rigam<fixed-case>IM</fixed-case>: A Dataset of Ambiguous Sentence Interpretations for Social Grounding and Implicit Language Understanding</title>
       <author><first>Liesbeth</first><last>Allein</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>116–122</pages>
       <abstract>Sentences elicit different interpretations and reactions among readers, especially when there is ambiguity in their implicit layers. We present a first-of-its kind dataset of sentences from Reddit, where each sentence is annotated with multiple interpretations of its meanings, understandings of implicit moral judgments about mentioned people, and reader impressions of its author. Scrutiny of the dataset proves the evoked variability and polarity in reactions. It further shows that readers strongly disagree on both the presence of implied judgments and the social acceptability of the behaviors they evaluate. In all, the dataset offers a valuable resource for socially grounding language and modeling the intricacies of implicit language understanding from multiple reader perspectives.</abstract>
       <url hash="c8650afa">2024.nlperspectives-1.13</url>
@@ -156,7 +156,7 @@
     <paper id="14">
       <title>Linguistic Fingerprint in Transformer Models: How Language Variation Influences Parameter Selection in Irony Detection</title>
       <author><first>Michele</first><last>Mastromattei</last></author>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last></author>
       <pages>123–130</pages>
       <abstract>This paper explores the correlation between linguistic diversity, sentiment analysis and transformer model architectures. We aim to investigate how different English variations impact transformer-based models for irony detection. To conduct our study, we used the EPIC corpus to extract five diverse English variation-specific datasets and applied the KEN pruning algorithm on five different architectures. Our results reveal several similarities between optimal subnetworks, which provide insights into the linguistic variations that share strong resemblances and those that exhibit greater dissimilarities. We discovered that optimal subnetworks across models share at least 60% of their parameters, emphasizing the significance of parameter values in capturing and interpreting linguistic variations. This study highlights the inherent structural similarities between models trained on different variants of the same language and also the critical role of parameter values in capturing these nuances.</abstract>
       <url hash="e72bb4ec">2024.nlperspectives-1.14</url>
diff --git a/data/xml/2024.nlrse.xml b/data/xml/2024.nlrse.xml
index 1bd64bbc7d..5feb387710 100644
--- a/data/xml/2024.nlrse.xml
+++ b/data/xml/2024.nlrse.xml
@@ -3,9 +3,9 @@
   <volume id="1" ingest-date="2024-07-25" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 2nd Workshop on Natural Language Reasoning and Structured Explanations (@ACL 2024)</booktitle>
-      <editor><first>Bhavana</first><last>Dalvi Mishra</last></editor>
+      <editor id="bhavana-dalvi"><first>Bhavana</first><last>Dalvi Mishra</last></editor>
       <editor><first>Greg</first><last>Durrett</last></editor>
-      <editor><first>Peter</first><last>Jansen</last></editor>
+      <editor id="peter-jansen"><first>Peter</first><last>Jansen</last></editor>
       <editor><first>Ben</first><last>Lipkin</last></editor>
       <editor><first>Danilo</first><last>Neves Ribeiro</last></editor>
       <editor><first>Lionel</first><last>Wong</last></editor>
@@ -71,7 +71,7 @@
     <paper id="5">
       <title><fixed-case>S</fixed-case>umm<fixed-case>EQ</fixed-case>u<fixed-case>AL</fixed-case>: Summarization Evaluation via Question Answering using Large Language Models</title>
       <author><first>Junyuan</first><last>Liu</last></author>
-      <author><first>Zhengyan</first><last>Shi</last></author>
+      <author id="zhengyan-shi"><first>Zhengyan</first><last>Shi</last></author>
       <author><first>Aldo</first><last>Lipani</last><affiliation>University College London, University of London</affiliation></author>
       <pages>46-55</pages>
       <abstract>Summarization is hard to evaluate due to its diverse and abstract nature. Although N-gram-based metrics like BLEU and ROUGE are prevalent, they often do not align well with human evaluations. While model-based alternatives such as BERTScore improve, they typically require extensive labelled data. The advent of Large Language Models (LLMs) presents a promising avenue for evaluation. To this end, we introduce SummEQuAL, a novel content-based framework using LLMs for unified, reproducible summarization evaluation. SummEQuAL evaluates summaries by comparing their content with the source document, employing a question-answering approach to gauge both recall and precision. To validate SummEQuAL’s effectiveness, we develop a dataset based on MultiWOZ. We conduct experiments on SummEval and our MultiWOZ-based dataset, showing that SummEQuAL largely improves the quality of summarization evaluation. Notably, SummEQuAL demonstrates a 19.7% improvement over QuestEval in terms of sample-level Pearson correlation with human assessments of consistency on the SummEval dataset. Furthermore, it exceeds the performance of the BERTScore baseline by achieving a 17.3% increase in Spearman correlation on our MultiWOZ-based dataset. Our study illuminates the potential of LLMs for a unified evaluation framework, setting a new paradigm for future summarization evaluation.</abstract>
diff --git a/data/xml/2024.osact.xml b/data/xml/2024.osact.xml
index 8b1279748c..d20417f8e2 100644
--- a/data/xml/2024.osact.xml
+++ b/data/xml/2024.osact.xml
@@ -25,7 +25,7 @@
       <author><first>Seham</first><last>Alghamdi</last></author>
       <author><first>Youcef</first><last>Benkhedda</last></author>
       <author><first>Basma</first><last>Alharbi</last></author>
-      <author><first>Riza</first><last>Batista-Navarro</last></author>
+      <author id="riza-theresa-batista-navarro"><first>Riza</first><last>Batista-Navarro</last></author>
       <pages>1–12</pages>
       <abstract>We are currently witnessing a concerning surge in the spread of hate speech across various social media platforms, targeting individuals or groups based on their protected characteristics such as race, religion, nationality and gender. This paper focuses on the detection of hate type (Task 1) and hate target (Task 2) in the Arabic language. To comprehensively address this problem, we have combined and re-annotated hate speech tweets from existing publicly available corpora, resulting in the creation of AraTar, the first and largest Arabic corpus annotated with support for multi-label classification for both hate speech types and target detection with a high inter-annotator agreement. Additionally, we sought to determine the most effective machine learning-based approach for addressing this issue. To achieve this, we compare and evaluate different approaches, including: (1) traditional machine learning-based models, (2) deep learning-based models fed with contextual embeddings, and (3) fine-tuning language models (LMs). Our results demonstrate that fine-tuning LMs, specifically using AraBERTv0.2-twitter (base), achieved the highest performance, with a micro-averaged F1-score of 84.5% and 85.03%, and a macro-averaged F1-score of 77.46% and 73.15%, for Tasks 1 and 2, respectively.</abstract>
       <url hash="6b31ca43">2024.osact-1.1</url>
diff --git a/data/xml/2024.paclic.xml b/data/xml/2024.paclic.xml
index 3a6d842cef..f2c0fe7393 100644
--- a/data/xml/2024.paclic.xml
+++ b/data/xml/2024.paclic.xml
@@ -10,7 +10,7 @@
       <url hash="dc31ef61">2024.paclic-1</url>
       <venue>paclic</venue>
       <editor><first>Nathaniel</first><last>Oco</last></editor>
-      <editor><first>Shirley N.</first><last>Dita</last></editor>
+      <editor id="shirley-dita"><first>Shirley N.</first><last>Dita</last></editor>
       <editor><first>Ariane Macalinga</first><last>Borlongan</last></editor>
       <editor><first>Jong-Bok</first><last>Kim</last></editor>
     </meta>
@@ -20,7 +20,7 @@
     </frontmatter>
     <paper id="1">
       <title>Large Language Models and Natural Language Processing On Minority Languages: A Systematic Review</title>
-      <author><first>Rachel Edita</first><last>Roxas</last></author>
+      <author id="rachel-edita-roxas"><first>Rachel Edita</first><last>Roxas</last></author>
       <pages>1–8</pages>
       <url hash="ac9f8351">2024.paclic-1.1</url>
       <bibkey>roxas-2024-large</bibkey>
@@ -57,8 +57,8 @@
       <title>Advancing <fixed-case>V</fixed-case>ietnamese Information Retrieval with Learning Objective and Benchmark</title>
       <author><first>Vinh</first><last>Nguyen</last></author>
       <author><first>Nam</first><last>Tran</last></author>
-      <author><first>Long</first><last>Nguyen</last></author>
-      <author><first>Dien</first><last>Dinh</last></author>
+      <author id="long-nguyen"><first>Long</first><last>Nguyen</last></author>
+      <author id="dinh-dien"><first>Dien</first><last>Dinh</last></author>
       <pages>46–56</pages>
       <url hash="b5ea7a44">2024.paclic-1.5</url>
       <bibkey>nguyen-etal-2024-advancing</bibkey>
@@ -92,7 +92,7 @@
       <author><first>Hung-Nghiep</first><last>Tran</last></author>
       <author><first>André</first><last>Greiner-Petter</last></author>
       <author><first>Felix</first><last>Beierle</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>82–93</pages>
       <url hash="eeee70df">2024.paclic-1.8</url>
       <bibkey>to-etal-2024-skt5scisumm</bibkey>
@@ -251,7 +251,7 @@
       <author><first>Yuanyuan</first><last>Cai</last></author>
       <author><first>Satoshi</first><last>Kosugi</last></author>
       <author><first>Kotaro</first><last>Funakoshi</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>246–255</pages>
       <url hash="469b6d82">2024.paclic-1.24</url>
       <bibkey>cai-etal-2024-enhancing</bibkey>
@@ -269,7 +269,7 @@
     </paper>
     <paper id="26">
       <title><fixed-case>MERE</fixed-case>: A Deep Learning Architecture Using Multi-Fragment Ensemble for Relation Extraction</title>
-      <author><first>Hoang-Quynh</first><last>Le</last></author>
+      <author id="hoang-quynh-le"><first>Hoang-Quynh</first><last>Le</last></author>
       <author><first>Duy-Cat</first><last>Can</last></author>
       <pages>267–276</pages>
       <url hash="4c26952a">2024.paclic-1.26</url>
@@ -296,7 +296,7 @@
     <paper id="29">
       <title>A Novel Interpretability Metric for Explaining Bias in Language Models: Applications on Multilingual Models from <fixed-case>S</fixed-case>outheast <fixed-case>A</fixed-case>sia</title>
       <author><first>Lance Calvin Lim</first><last>Gamboa</last></author>
-      <author><first>Mark</first><last>Lee</last></author>
+      <author id="mark-lee"><first>Mark</first><last>Lee</last></author>
       <pages>296–305</pages>
       <url hash="8007b1ae">2024.paclic-1.29</url>
       <bibkey>gamboa-lee-2024-novel</bibkey>
@@ -343,7 +343,7 @@
       <author><first>Dongyuan</first><last>Li</last></author>
       <author><first>Satoshi</first><last>Kosugi</last></author>
       <author><first>Kotaro</first><last>Funakoshi</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>346–353</pages>
       <url hash="8f73364c">2024.paclic-1.34</url>
       <bibkey>chuang-etal-2024-lpls</bibkey>
@@ -445,8 +445,8 @@
     <paper id="45">
       <title><fixed-case>V</fixed-case>i<fixed-case>H</fixed-case>erb<fixed-case>QA</fixed-case>: A Robust <fixed-case>QA</fixed-case> Model for <fixed-case>V</fixed-case>ietnamese Traditional Herbal Medicine</title>
       <author><first>Quyen</first><last>Truong</last></author>
-      <author><first>Long</first><last>Nguyen</last></author>
-      <author><first>Dien</first><last>Dinh</last></author>
+      <author id="long-nguyen"><first>Long</first><last>Nguyen</last></author>
+      <author id="dinh-dien"><first>Dien</first><last>Dinh</last></author>
       <pages>449–466</pages>
       <url hash="7b57d2b5">2024.paclic-1.45</url>
       <bibkey>truong-etal-2024-viherbqa</bibkey>
@@ -454,7 +454,7 @@
     <paper id="46">
       <title><fixed-case>EATT</fixed-case>: Knowledge Graph Integration in Transformer Architecture</title>
       <author><first>Phong</first><last>Vo</last></author>
-      <author><first>Long</first><last>Nguyen</last></author>
+      <author id="long-nguyen"><first>Long</first><last>Nguyen</last></author>
       <pages>467–478</pages>
       <url hash="950bc055">2024.paclic-1.46</url>
       <bibkey>vo-nguyen-2024-eatt</bibkey>
@@ -463,8 +463,8 @@
       <title>Multi-mask Prefix Tuning: Applying Multiple Adaptive Masks on Deep Prompt Tuning</title>
       <author><first>Qui</first><last>Tu</last></author>
       <author><first>Trung</first><last>Nguyen</last></author>
-      <author><first>Long</first><last>Nguyen</last></author>
-      <author><first>Dien</first><last>Dinh</last></author>
+      <author id="long-nguyen"><first>Long</first><last>Nguyen</last></author>
+      <author id="dinh-dien"><first>Dien</first><last>Dinh</last></author>
       <pages>479–487</pages>
       <url hash="8a4bfb27">2024.paclic-1.47</url>
       <bibkey>tu-etal-2024-multi-mask</bibkey>
@@ -475,14 +475,14 @@
       <author><first>Duc-Loc</first><last>Vu</last></author>
       <author><first>Huong</first><last>Nguyen-Thi-Thuy</last></author>
       <author><first>Duy-Cat</first><last>Can</last></author>
-      <author><first>Hoang-Quynh</first><last>Le</last></author>
+      <author id="hoang-quynh-le"><first>Hoang-Quynh</first><last>Le</last></author>
       <pages>488–496</pages>
       <url hash="3163dcb1">2024.paclic-1.48</url>
       <bibkey>hoang-etal-2024-contrastive</bibkey>
     </paper>
     <paper id="49">
       <title>Kalahi: A handcrafted, grassroots cultural <fixed-case>LLM</fixed-case> evaluation suite for <fixed-case>F</fixed-case>ilipino</title>
-      <author><first>Jann Railey</first><last>Montalan</last></author>
+      <author id="jann-railey-montalan"><first>Jann Railey</first><last>Montalan</last></author>
       <author><first>Jian Gang</first><last>Ngui</last></author>
       <author><first>Wei Qi</first><last>Leong</last></author>
       <author><first>Yosephine</first><last>Susanto</last></author>
@@ -587,8 +587,8 @@
     <paper id="59">
       <title><fixed-case>VHE</fixed-case>: A New Dataset for Event Extraction from <fixed-case>V</fixed-case>ietnamese Historical Texts</title>
       <author><first>Truc</first><last>Hoang</last></author>
-      <author><first>Long</first><last>Nguyen</last></author>
-      <author><first>Dien</first><last>Dinh</last></author>
+      <author id="long-nguyen"><first>Long</first><last>Nguyen</last></author>
+      <author id="dinh-dien"><first>Dien</first><last>Dinh</last></author>
       <pages>619–634</pages>
       <url hash="685a570d">2024.paclic-1.59</url>
       <bibkey>hoang-etal-2024-vhe</bibkey>
@@ -893,8 +893,8 @@
       <title>A Comparative Study of Chart Summarization</title>
       <author><first>An</first><last>Chu</last></author>
       <author><first>Thong</first><last>Huynh</last></author>
-      <author><first>Long</first><last>Nguyen</last></author>
-      <author><first>Dien</first><last>Dinh</last></author>
+      <author id="long-nguyen"><first>Long</first><last>Nguyen</last></author>
+      <author id="dinh-dien"><first>Dien</first><last>Dinh</last></author>
       <pages>971–981</pages>
       <url hash="352640a8">2024.paclic-1.92</url>
       <bibkey>chu-etal-2024-comparative</bibkey>
diff --git a/data/xml/2024.parlaclarin.xml b/data/xml/2024.parlaclarin.xml
index 0faac55ae9..de411c8a60 100644
--- a/data/xml/2024.parlaclarin.xml
+++ b/data/xml/2024.parlaclarin.xml
@@ -52,7 +52,7 @@
     <paper id="4">
       <title><fixed-case>B</fixed-case>ulgarian <fixed-case>P</fixed-case>arla<fixed-case>M</fixed-case>int 4.0 corpus as a testset for Part-of-speech tagging and Named Entity Recognition</title>
       <author><first>Petya</first><last>Osenova</last></author>
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <pages>30–35</pages>
       <abstract>The paper discusses some fine-tuned models for the tasks of part-of-speech tagging and named entity recognition. The fine-tuning was performed on the basis of an existing BERT pre-trained model and two newly pre-trained BERT models for Bulgarian that are cross-tested on the domain of the Bulgarian part of the ParlaMint corpora as a new domain. In addition, a comparison has been made between the performance of the new fine-tuned BERT models and the available results from the Stanza-based model which the Bulgarian part of the ParlaMint corpora has been annotated with. The observations show the weaknesses in each model as well as the common challenges.</abstract>
       <url hash="045b62d4">2024.parlaclarin-1.4</url>
@@ -94,7 +94,7 @@
       <author><first>Mietta</first><last>Lennes</last></author>
       <author><first>Jyrki</first><last>Niemi</last></author>
       <author><first>Jack</first><last>Rueter</last></author>
-      <author><first>Krister</first><last>Lindén</last></author>
+      <author id="krister-linden"><first>Krister</first><last>Lindén</last></author>
       <pages>48–56</pages>
       <abstract>In this paper, we use automatic language identification to investigate the usage of different languages in the plenary sessions of the Parliament of Finland. Finland has two national languages, Finnish and Swedish. The plenary sessions are published as transcriptions of speeches in Parliament, reflecting the language the speaker used. In addition to charting out language use, we demonstrate how language identification can be used to audit the quality of the dataset. On the one hand, we made slight improvements to our language identifier; on the other hand, we made a list of improvement suggestions for the next version of the dataset.</abstract>
       <url hash="e4cbcc6b">2024.parlaclarin-1.8</url>
@@ -155,7 +155,7 @@
       <author><first>Meden</first><last>Katja</last></author>
       <author><first>Vaidas</first><last>Morkevicius</last></author>
       <author><first>Nikola</first><last>Ljubešić</last></author>
-      <author><first>Tomaž</first><last>Erjavec</last></author>
+      <author id="tomaz-erjavec"><first>Tomaž</first><last>Erjavec</last></author>
       <pages>94–100</pages>
       <abstract>We introduce a dataset on political orientation and power position identification. The dataset is derived from ParlaMint, a set of comparable corpora of transcribed parliamentary speeches from 29 national and regional parliaments. We introduce the dataset, provide the reasoning behind some of the choices during its creation, present statistics on the dataset, and, using a simple classifier, some baseline results on predicting political orientation on the left-to-right axis, and on power position identification, i.e., distinguishing between the speeches delivered by governing coalition party members from those of opposition party members.</abstract>
       <url hash="f175c554">2024.parlaclarin-1.14</url>
@@ -187,7 +187,7 @@
       <title>Investigating Political Ideologies through the <fixed-case>G</fixed-case>reek <fixed-case>P</fixed-case>arla<fixed-case>M</fixed-case>int corpus</title>
       <author><first>Maria</first><last>Gavriilidou</last></author>
       <author><first>Dimitris</first><last>Gkoumas</last></author>
-      <author><first>Stelios</first><last>Piperidis</last></author>
+      <author id="stelios-piperidis"><first>Stelios</first><last>Piperidis</last></author>
       <author><first>Prokopis</first><last>Prokopidis</last></author>
       <pages>116–120</pages>
       <abstract>This paper has two objectives: to present (a) the creation of ParlaMint-GR, the Greek part of the ParlaMint corpora of debates in the parliaments of Europe, and (b) preliminary results on its comparison with a corpus of Greek party manifestos, aiming at the investigation of the ideologies of the Greek political parties and members of the Parliament. Additionally, a gender related comparison is explored. The creation of the ParlaMint-GR corpus is discussed, together with the solutions adopted for various challenges faced. The corpus of party manifestos, available through CLARIN:EL, serves for a comparative study with the corpus of speeches delivered by the members of the Greek Parliament, with the aim to identify the ideological positions of parties and politicians.</abstract>
@@ -252,7 +252,7 @@
     <paper id="24">
       <title>A new Resource and Baselines for Opinion Role Labelling in <fixed-case>G</fixed-case>erman Parliamentary Debates</title>
       <author><first>Ines</first><last>Rehbein</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <pages>163–170</pages>
       <abstract>Detecting opinions, their holders and targets in parliamentary debates provides an interesting layer of analysis, for example, to identify frequent targets of opinions for specific topics, actors or parties. In the paper, we present GePaDe-ORL, a new dataset for German parliamentary debates where subjective expressions, their opinion holders and targets have been annotated. We describe the annotation process and report baselines for predicting those annotations in our new dataset.</abstract>
       <url hash="d1af609d">2024.parlaclarin-1.24</url>
diff --git a/data/xml/2024.politicalnlp.xml b/data/xml/2024.politicalnlp.xml
index c09e828d14..0ed2343471 100644
--- a/data/xml/2024.politicalnlp.xml
+++ b/data/xml/2024.politicalnlp.xml
@@ -31,7 +31,7 @@
     <paper id="2">
       <title>Event Detection in the Socio Political Domain</title>
       <author><first>Emmanuel</first><last>Cartier</last></author>
-      <author><first>Hristo</first><last>Tanev</last></author>
+      <author id="hristo-tanev"><first>Hristo</first><last>Tanev</last></author>
       <pages>12–21</pages>
       <abstract>In this paper we present two approaches for detection of socio political events: the first is based on manually crafted keyword combinations and the second one is based on a BERT classifier. We compare the performance of the two systems on a dataset of socio-political events. Interestingly, the systems demonstrate complementary performance: both showing their best accuracy on non overlapping sets of event types. In the evaluation section we provide insights on the effect of taxonomy mapping on the event detection evaluation. We also review in the related work section the most important resources and approaches for event extraction in the recent years.</abstract>
       <url hash="200b6a76">2024.politicalnlp-1.2</url>
@@ -61,7 +61,7 @@
     <paper id="5">
       <title>Analysing Pathos in User-Generated Argumentative Text</title>
       <author><first>Natalia</first><last>Evgrafova</last></author>
-      <author><first>Veronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Veronique</first><last>Hoste</last></author>
       <author><first>Els</first><last>Lefever</last></author>
       <pages>39–44</pages>
       <abstract>While persuasion has been extensively examined in the context of politicians’ speeches, there exists a notable gap in the understanding of the pathos role in user-generated argumentation. This paper presents an exploratory study into the pathos dimension of user-generated arguments and formulates ideas on how pathos could be incorporated in argument mining. Using existing sentiment and emotion detection tools, this research aims to obtain insights into the role of emotion in argumentative public discussion on controversial topics, explores the connection between sentiment and stance, and detects frequent emotion-related words for a given topic.</abstract>
@@ -92,7 +92,7 @@
       <author><first>Federico</first><last>Borazio</last></author>
       <author><first>Danilo</first><last>Croce</last></author>
       <author><first>Giorgio</first><last>Gambosi</last></author>
-      <author><first>Roberto</first><last>Basili</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
       <author><first>Daniele</first><last>Margiotta</last></author>
       <author><first>Antonio</first><last>Scaiella</last></author>
       <author><first>Martina</first><last>Del Manso</last></author>
diff --git a/data/xml/2024.practicald2t.xml b/data/xml/2024.practicald2t.xml
index 3958ddb4e0..68b9de1ddd 100644
--- a/data/xml/2024.practicald2t.xml
+++ b/data/xml/2024.practicald2t.xml
@@ -44,7 +44,7 @@
       <title>Enhancing Situation Awareness through Model-Based Explanation Generation</title>
       <author><first>Konstantinos</first><last>Gavriilidis</last></author>
       <author><first>Ioannis</first><last>Konstas</last></author>
-      <author><first>Helen</first><last>Hastie</last></author>
+      <author id="helen-hastie"><first>Helen</first><last>Hastie</last></author>
       <author><first>Wei</first><last>Pang</last></author>
       <pages>7–16</pages>
       <abstract>Robots are often deployed in remote locations for tasks such as exploration, where users cannot directly perceive the agent and its environment. For Human-In-The-Loop applications, operators must have a comprehensive understanding of the robot’s current state and its environment to take necessary actions and effectively assist the agent. In this work, we compare different explanation styles to determine the most effective way to convey real-time updates to users. Additionally, we formulate these explanation styles as separate fine-tuning tasks and assess the effectiveness of large language models in delivering in-mission updates to maintain situation awareness. The code and dataset for this work are available at:———</abstract>
diff --git a/data/xml/2024.privatenlp.xml b/data/xml/2024.privatenlp.xml
index bfc9bb2b28..25ae6f213a 100644
--- a/data/xml/2024.privatenlp.xml
+++ b/data/xml/2024.privatenlp.xml
@@ -143,9 +143,9 @@
     <paper id="11">
       <title>Smart Lexical Search for Label Flipping Adversial Attack</title>
       <author><first>Alberto</first><last>Gutiérrez-Megías</last></author>
-      <author><first>Salud María</first><last>Jiménez-Zafra</last><affiliation>Universidad de Jaén</affiliation></author>
+      <author id="salud-maria-jimenez-zafra"><first>Salud María</first><last>Jiménez-Zafra</last><affiliation>Universidad de Jaén</affiliation></author>
       <author><first>L. Alfonso</first><last>Ureña</last><affiliation>Universidad de Jaén</affiliation></author>
-      <author><first>Eugenio</first><last>Martínez-Cámara</last><affiliation>Universidad de Jaén</affiliation></author>
+      <author id="eugenio-martinez-camara"><first>Eugenio</first><last>Martínez-Cámara</last><affiliation>Universidad de Jaén</affiliation></author>
       <pages>97-106</pages>
       <abstract>Language models are susceptible to vulnerability through adversarial attacks, using manipulations of the input data to disrupt their performance. Accordingly, it represents a cibersecurity leak. Data manipulations are intended to be unidentifiable by the learning model and by humans, small changes can disturb the final label of a classification task. Hence, we propose a novel attack built upon explainability methods to identify the salient lexical units to alter in order to flip the classification label. We asses our proposal on a disinformation dataset, and we show that our attack reaches high balance among stealthiness and efficiency.</abstract>
       <url hash="2498905c">2024.privatenlp-1.11</url>
@@ -168,7 +168,7 @@
       <author><first>Arij</first><last>Riabi</last></author>
       <author><first>Menel</first><last>Mahamdi</last><affiliation>Inria, Paris</affiliation></author>
       <author><first>Virginie</first><last>Mouilleron</last><affiliation>Inria, Paris</affiliation></author>
-      <author><first>Djamé</first><last>Seddah</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
       <pages>123-136</pages>
       <abstract>Protecting privacy is essential when sharing data, particularly in the case of an online radicalization dataset that may contain personal information. In this paper, we explore the balance between preserving data usefulness and ensuring robust privacy safeguards, since regulations like the European GDPR shape how personal information must be handled. We share our method for manually pseudonymizing a multilingual radicalization dataset, ensuring performance comparable to the original data. Furthermore, we highlight the importance of establishing comprehensive guidelines for processing sensitive NLP data by sharing our complete pseudonymization process, our guidelines, the challenges we encountered as well as the resulting dataset.</abstract>
       <url hash="c0464741">2024.privatenlp-1.13</url>
diff --git a/data/xml/2024.propor.xml b/data/xml/2024.propor.xml
index c1aa6912e2..819cc1c5d1 100644
--- a/data/xml/2024.propor.xml
+++ b/data/xml/2024.propor.xml
@@ -8,7 +8,7 @@
       <editor><first>António</first><last>Teixeira</last></editor>
       <editor><first>Livy</first><last>Real</last></editor>
       <editor><first>Marcos</first><last>Garcia</last></editor>
-      <editor><first>Hugo Gonçalo</first><last>Oliveira</last></editor>
+      <editor id="hugo-goncalo-oliveira"><first>Hugo Gonçalo</first><last>Oliveira</last></editor>
       <editor><first>Raquel</first><last>Amaro</last></editor>
       <publisher>Association for Computational Lingustics</publisher>
       <address>Santiago de Compostela, Galicia/Spain</address>
@@ -53,7 +53,7 @@
       <author><first>Vinicius Gonçalves</first><last>Santos</last></author>
       <author><first>Gabriel Jose Pellisser</first><last>Dalalana</last></author>
       <author><first>Flaviane R. Fernandes</first><last>Svartman</last></author>
-      <author><first>Sandra Maria</first><last>Aluísio</last></author>
+      <author id="sandra-aluisio"><first>Sandra Maria</first><last>Aluísio</last></author>
       <pages>32–44</pages>
       <url hash="0556302b">2024.propor-1.4</url>
       <bibkey>craveiro-etal-2024-simple</bibkey>
@@ -109,7 +109,7 @@
     <paper id="10">
       <title>Automatic Text Readability Assessment in <fixed-case>E</fixed-case>uropean <fixed-case>P</fixed-case>ortuguese</title>
       <author><first>Eugénio</first><last>Ribeiro</last></author>
-      <author><first>Nuno</first><last>Mamede</last></author>
+      <author id="nuno-mamede"><first>Nuno</first><last>Mamede</last></author>
       <author><first>Jorge</first><last>Baptista</last></author>
       <pages>97–107</pages>
       <url hash="809c654b">2024.propor-1.10</url>
@@ -132,7 +132,7 @@
       <author><first>Helena Freire</first><last>Cameron</last></author>
       <author><first>Fernanda</first><last>Olival</last></author>
       <author><first>Fátima</first><last>Farrica</last></author>
-      <author><first>Renata</first><last>Vieira</last></author>
+      <author id="renata-vieira"><first>Renata</first><last>Vieira</last></author>
       <pages>117–126</pages>
       <url hash="52bbc273">2024.propor-1.12</url>
       <bibkey>santos-etal-2024-named</bibkey>
@@ -149,8 +149,8 @@
     <paper id="14">
       <title>Bringing Pragmatics to Porttinari - Adding Speech Acts to News Texts</title>
       <author><first>Nataly L. Patti</first><last>da Silva</last></author>
-      <author><first>Norton Trevisan</first><last>Roman</last></author>
-      <author><first>Ariani Di</first><last>Felippo</last></author>
+      <author id="norton-trevisan-roman"><first>Norton Trevisan</first><last>Roman</last></author>
+      <author id="ariani-di-felippo"><first>Ariani Di</first><last>Felippo</last></author>
       <pages>137–145</pages>
       <url hash="67b8ba6d">2024.propor-1.14</url>
       <bibkey>da-silva-etal-2024-bringing</bibkey>
@@ -169,7 +169,7 @@
       <author><first>Cássio Faria</first><last>da Silva</last></author>
       <author><first>Marcio Lima</first><last>Inácio</last></author>
       <author><first>Oto Araújo</first><last>Vale</last></author>
-      <author><first>Helena</first><last>de Medeiros Caseli</last></author>
+      <author id="helena-de-medeiros-caseli"><first>Helena</first><last>de Medeiros Caseli</last></author>
       <pages>156–167</pages>
       <url hash="e40b971a">2024.propor-1.16</url>
       <bibkey>wick-pedro-etal-2024-using</bibkey>
@@ -177,8 +177,8 @@
     <paper id="17">
       <title>Semantic Permanence in Audiovisual Translation: a <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et approach to subtitling</title>
       <author><first>Mairon</first><last>Samagaio</last></author>
-      <author><first>Tiago</first><last>Torrent</last></author>
-      <author><first>Ely</first><last>Matos</last></author>
+      <author id="tiago-timponi-torrent"><first>Tiago</first><last>Torrent</last></author>
+      <author id="ely-edison-da-silva-matos"><first>Ely</first><last>Matos</last></author>
       <author><first>Arthur</first><last>Almeida</last></author>
       <pages>168–176</pages>
       <url hash="2e157143">2024.propor-1.17</url>
@@ -187,7 +187,7 @@
     <paper id="18">
       <title>Hurdles in Parsing Multi-word Adverbs: Examples from <fixed-case>P</fixed-case>ortuguese</title>
       <author><first>Izabela</first><last>Muller</last></author>
-      <author><first>Nuno</first><last>Mamede</last></author>
+      <author id="nuno-mamede"><first>Nuno</first><last>Mamede</last></author>
       <author><first>Jorge</first><last>Baptista</last></author>
       <pages>177–186</pages>
       <url hash="9c7b0612">2024.propor-1.18</url>
@@ -201,7 +201,7 @@
       <author><first>Flaviane R. F.</first><last>Svartman</last></author>
       <author><first>Giovana M.</first><last>Craveiro</last></author>
       <author><first>Marli Quadros</first><last>Leite</last></author>
-      <author><first>Sandra M.</first><last>Aluísio</last></author>
+      <author id="sandra-aluisio"><first>Sandra M.</first><last>Aluísio</last></author>
       <author><first>Vinícius G.</first><last>Santos</last></author>
       <author><first>Vinícius M.</first><last>Garcia</last></author>
       <pages>187–195</pages>
@@ -260,7 +260,7 @@
       <title>Applying event classification to reveal the Estado da Índia</title>
       <author><first>Gonçalo C.</first><last>Albuquerque</last></author>
       <author><first>Marlo</first><last>Souza</last></author>
-      <author><first>Renata</first><last>Vieira</last></author>
+      <author id="renata-vieira"><first>Renata</first><last>Vieira</last></author>
       <author><first>Ana Sofia</first><last>Ribeiro</last></author>
       <pages>247–254</pages>
       <url hash="783c8cf8">2024.propor-1.25</url>
@@ -294,8 +294,8 @@
     <paper id="28">
       <title>A Corpus of Stock Market Tweets Annotated with Named Entities</title>
       <author><first>Michel Monteiro</first><last>Zerbinati</last></author>
-      <author><first>Norton Trevisan</first><last>Roman</last></author>
-      <author><first>Ariani Di</first><last>Felippo</last></author>
+      <author id="norton-trevisan-roman"><first>Norton Trevisan</first><last>Roman</last></author>
+      <author id="ariani-di-felippo"><first>Ariani Di</first><last>Felippo</last></author>
       <pages>276–284</pages>
       <url hash="e06c2f16">2024.propor-1.28</url>
       <bibkey>zerbinati-etal-2024-corpus</bibkey>
@@ -356,7 +356,7 @@
       <title>Investigating the Generalizability of <fixed-case>P</fixed-case>ortuguese Readability Assessment Models Trained Using Linguistic Complexity Features</title>
       <author><first>Soroosh</first><last>Akef</last></author>
       <author><first>Amália</first><last>Mendes</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <author><first>Patrick</first><last>Rebuschat</last></author>
       <pages>332–341</pages>
       <url hash="a4ca807f">2024.propor-1.34</url>
@@ -540,7 +540,7 @@
     <paper id="52">
       <title>A Bag-of-Users approach to mental health prediction from social media data</title>
       <author><first>Rafael</first><last>Oliveira</last></author>
-      <author><first>Ivandré</first><last>Paraboni</last></author>
+      <author id="ivandre-paraboni"><first>Ivandré</first><last>Paraboni</last></author>
       <pages>509–514</pages>
       <url hash="4b67afdd">2024.propor-1.52</url>
       <bibkey>oliveira-paraboni-2024-bag</bibkey>
@@ -548,7 +548,7 @@
     <paper id="53">
       <title>Semi-automatic corpus expansion: the case of stance prediction</title>
       <author><first>Camila</first><last>Pereira</last></author>
-      <author><first>Ivandré</first><last>Paraboni</last></author>
+      <author id="ivandre-paraboni"><first>Ivandré</first><last>Paraboni</last></author>
       <pages>515–520</pages>
       <url hash="20badd96">2024.propor-1.53</url>
       <bibkey>pereira-paraboni-2024-semi</bibkey>
@@ -556,7 +556,7 @@
     <paper id="54">
       <title>Sequence-to-sequence and transformer approaches to <fixed-case>P</fixed-case>ortuguese text style transfer</title>
       <author><first>Pablo</first><last>Costa</last></author>
-      <author><first>Ivandré</first><last>Paraboni</last></author>
+      <author id="ivandre-paraboni"><first>Ivandré</first><last>Paraboni</last></author>
       <pages>521–526</pages>
       <url hash="0638f9bf">2024.propor-1.54</url>
       <bibkey>costa-paraboni-2024-sequence</bibkey>
@@ -573,7 +573,7 @@
       <title>Towards a Syntactic Lexicon of <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese Adjectives</title>
       <author><first>Ryan</first><last>Martinez</last></author>
       <author><first>Jorge</first><last>Baptista</last></author>
-      <author><first>Oto</first><last>Vale</last></author>
+      <author id="oto-vale"><first>Oto</first><last>Vale</last></author>
       <pages>532–538</pages>
       <url hash="7733fbad">2024.propor-1.56</url>
       <bibkey>martinez-etal-2024-towards</bibkey>
@@ -595,7 +595,7 @@
     <paper id="59">
       <title>Text Readability Assessment in <fixed-case>E</fixed-case>uropean <fixed-case>P</fixed-case>ortuguese: A Comparison of Classification and Regression Approaches</title>
       <author><first>Eugénio</first><last>Ribeiro</last></author>
-      <author><first>Nuno</first><last>Mamede</last></author>
+      <author id="nuno-mamede"><first>Nuno</first><last>Mamede</last></author>
       <author><first>Jorge</first><last>Baptista</last></author>
       <pages>551–557</pages>
       <url hash="f83ae32c">2024.propor-1.59</url>
@@ -743,7 +743,7 @@
     <paper id="73">
       <title><fixed-case>TTS</fixed-case> applied to the generation of datasets for automatic speech recognition</title>
       <author><first>Edresson</first><last>Casanova</last></author>
-      <author><first>Sandra</first><last>Aluísio</last></author>
+      <author id="sandra-aluisio"><first>Sandra</first><last>Aluísio</last></author>
       <author><first>Moacir Antonelli</first><last>Ponti</last></author>
       <pages>633–638</pages>
       <url hash="e7dc25be">2024.propor-1.73</url>
@@ -765,7 +765,7 @@
       <editor><first>António</first><last>Teixeira</last></editor>
       <editor><first>Livy</first><last>Real</last></editor>
       <editor><first>Marcos</first><last>Garcia</last></editor>
-      <editor><first>Hugo Gonçalo</first><last>Oliveira</last></editor>
+      <editor id="hugo-goncalo-oliveira"><first>Hugo Gonçalo</first><last>Oliveira</last></editor>
       <editor><first>Raquel</first><last>Amaro</last></editor>
       <publisher>Association for Computational Lingustics</publisher>
       <address>Santiago de Compostela, Galicia/Spain</address>
@@ -814,7 +814,7 @@
     <paper id="4">
       <title>Exploring the Automated Scoring of Narrative Essays in <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese using Transformer Models</title>
       <author><first>Eugénio</first><last>Ribeiro</last></author>
-      <author><first>Nuno</first><last>Mamede</last></author>
+      <author id="nuno-mamede"><first>Nuno</first><last>Mamede</last></author>
       <author><first>Jorge</first><last>Baptista</last></author>
       <pages>14–17</pages>
       <url hash="f743e27a">2024.propor-2.4</url>
@@ -882,7 +882,7 @@
       <title>Can rules still beat neural networks? The case of automatic normalisation for 18th-century <fixed-case>P</fixed-case>ortuguese texts</title>
       <author><first>Leonardo</first><last>Zilio</last></author>
       <author><first>Rafaela R.</first><last>Lazzari</last></author>
-      <author><first>Maria José B.</first><last>Finatto</last></author>
+      <author id="maria-jose-b-finatto"><first>Maria José B.</first><last>Finatto</last></author>
       <pages>83–92</pages>
       <url hash="a84a3ee2">2024.propor-2.12</url>
       <bibkey>zilio-etal-2024-rules</bibkey>
@@ -899,7 +899,7 @@
     <paper id="14">
       <title>Could Style Help Plagiarism Detection? - A Sample-based Quantitative Study of Correlation between Style Specifics and Plagiarism</title>
       <author><first>Adile</first><last>Uka</last></author>
-      <author><first>Maria</first><last>Berger</last></author>
+      <author id="maria-berger"><first>Maria</first><last>Berger</last></author>
       <pages>103–108</pages>
       <url hash="ada287de">2024.propor-2.14</url>
       <bibkey>uka-berger-2024-style</bibkey>
@@ -954,7 +954,7 @@
     <paper id="20">
       <title>Decoding Sentiments about Migration in <fixed-case>P</fixed-case>ortuguese Political Manifestos (2011, 2015, 2019)</title>
       <author><first>Erik Bran</first><last>Marino</last></author>
-      <author><first>Renata</first><last>Vieira</last></author>
+      <author id="renata-vieira"><first>Renata</first><last>Vieira</last></author>
       <author><first>Jesus Manuel Benitez</first><last>Baleato</last></author>
       <author><first>Ana Sofia</first><last>Ribeiro</last></author>
       <author><first>Katarina</first><last>Laken</last></author>
@@ -965,7 +965,7 @@
     <paper id="21">
       <title>Analysing entity distribution in an annotated 18th-century historical source</title>
       <author><first>Daniel De Los</first><last>Reyes</last></author>
-      <author><first>Renata</first><last>Vieira</last></author>
+      <author id="renata-vieira"><first>Renata</first><last>Vieira</last></author>
       <author><first>Fernanda</first><last>Olival</last></author>
       <author><first>Helena Freire</first><last>Cameron</last></author>
       <author><first>Fátima</first><last>Farrica</last></author>
@@ -978,7 +978,7 @@
       <author><first>Isaac Souza</first><last>de Miranda Jr.</last></author>
       <author><first>Gabriela</first><last>Wick-Pedro</last></author>
       <author><first>Cláudia Dias</first><last>de Barros</last></author>
-      <author><first>Oto</first><last>Vale</last></author>
+      <author id="oto-vale"><first>Oto</first><last>Vale</last></author>
       <pages>165–169</pages>
       <url hash="01ee1285">2024.propor-2.22</url>
       <bibkey>de-miranda-jr-etal-2024-roda</bibkey>
@@ -986,8 +986,8 @@
     <paper id="23">
       <title><fixed-case>G</fixed-case>i<fixed-case>D</fixed-case>i: A Virtual Assistant for Screening Protocols at Home</title>
       <author><first>Andrés</first><last>Piñeiro-Martín</last></author>
-      <author><first>Carmen</first><last>García-Mateo</last></author>
-      <author><first>Laura</first><last>Docío-Fernández</last></author>
+      <author id="carmen-garcia-mateo"><first>Carmen</first><last>García-Mateo</last></author>
+      <author id="laura-docio-fernandez"><first>Laura</first><last>Docío-Fernández</last></author>
       <author><first>María</first><last>del Carmen López-Pérez</last></author>
       <author><first>Ignacio</first><last>Novo-Veleiro</last></author>
       <pages>170–173</pages>
@@ -1009,7 +1009,7 @@
       <title>Indexing <fixed-case>P</fixed-case>ortuguese <fixed-case>NLP</fixed-case> Resources with <fixed-case>PT</fixed-case>-Pump-Up</title>
       <author><first>Rúben</first><last>Almeida</last></author>
       <author><first>Ricardo</first><last>Campos</last></author>
-      <author><first>Alípio</first><last>Jorge</last></author>
+      <author id="alipio-jorge"><first>Alípio</first><last>Jorge</last></author>
       <author><first>Sérgio</first><last>Nunes</last></author>
       <pages>178–181</pages>
       <url hash="32086e9d">2024.propor-2.25</url>
@@ -1030,7 +1030,7 @@
       <title>Perfil Público: Automatic Generation and Visualization of Author Profiles for Digital News Media</title>
       <author><first>Nuno</first><last>Guimarães</last></author>
       <author><first>Ricardo</first><last>Campos</last></author>
-      <author><first>Alípio</first><last>Jorge</last></author>
+      <author id="alipio-jorge"><first>Alípio</first><last>Jorge</last></author>
       <pages>186–189</pages>
       <url hash="b838f769">2024.propor-2.27</url>
       <bibkey>guimaraes-etal-2024-perfil</bibkey>
@@ -1045,7 +1045,7 @@
     </paper>
     <paper id="29">
       <title>Blip Copilot: a smart conversational assistant</title>
-      <author><first>Evandro</first><last>Fonseca</last></author>
+      <author id="evandro-b-fonseca"><first>Evandro</first><last>Fonseca</last></author>
       <author><first>Tayane</first><last>Soares</last></author>
       <author><first>Dyovana</first><last>Baptista</last></author>
       <author><first>Rogers</first><last>Damas</last></author>
@@ -1079,7 +1079,7 @@
       <title>Autopilot: a smart sales assistant</title>
       <author><first>Amanda</first><last>Oliveira</last></author>
       <author><first>João</first><last>Alvarenga</last></author>
-      <author><first>Evandro</first><last>Fonseca</last></author>
+      <author id="evandro-b-fonseca"><first>Evandro</first><last>Fonseca</last></author>
       <author><first>William</first><last>Colen</last></author>
       <pages>204–205</pages>
       <url hash="f2e21abc">2024.propor-2.32</url>
diff --git a/data/xml/2024.rail.xml b/data/xml/2024.rail.xml
index bd051d718f..74c5be3338 100644
--- a/data/xml/2024.rail.xml
+++ b/data/xml/2024.rail.xml
@@ -6,7 +6,7 @@
       <editor><first>Rooweither</first><last>Mabuya</last></editor>
       <editor><first>Muzi</first><last>Matfunjwa</last></editor>
       <editor><first>Mmasibidi</first><last>Setaka</last></editor>
-      <editor><first>Menno</first><last>van Zaanen</last></editor>
+      <editor id="menno-van-zaanen"><first>Menno</first><last>van Zaanen</last></editor>
       <publisher>ELRA and ICCL</publisher>
       <address>Torino, Italia</address>
       <month>May</month>
@@ -131,8 +131,8 @@
       <title><fixed-case>E</fixed-case>thio<fixed-case>MT</fixed-case>: Parallel Corpus for Low-resource <fixed-case>E</fixed-case>thiopian Languages</title>
       <author><first>Atnafu Lambebo</first><last>Tonja</last></author>
       <author><first>Olga</first><last>Kolesnikova</last></author>
-      <author><first>Alexander</first><last>Gelbukh</last></author>
-      <author><first>Jugal</first><last>Kalita</last></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last></author>
+      <author id="jugal-kalita"><first>Jugal</first><last>Kalita</last></author>
       <pages>107–114</pages>
       <abstract>Recent research in natural language processing (NLP) has achieved impressive performance in tasks such as machine translation (MT), news classification, and question-answering in high-resource languages. However, the performance of MT leaves much to be desired for low-resource languages. This is due to the smaller size of available parallel corpora in these languages, if such corpora are available at all. NLP in Ethiopian languages suffers from the same issues due to the unavailability of publicly accessible datasets for NLP tasks, including MT. To help the research community and foster research for Ethiopian languages, we introduce EthioMT – a new parallel corpus for 15 languages. We also create a new benchmark by collecting a dataset for better-researched languages in Ethiopia. We evaluate the newly collected corpus and the benchmark dataset for 23 Ethiopian languages using transformer and fine-tuning approaches.</abstract>
       <url hash="2d4c9221">2024.rail-1.12</url>
@@ -143,7 +143,7 @@
       <author><first>Nuhu</first><last>Ibrahim</last></author>
       <author><first>Felicity</first><last>Mulford</last></author>
       <author><first>Matt</first><last>Lawrence</last></author>
-      <author><first>Riza</first><last>Batista-Navarro</last></author>
+      <author id="riza-theresa-batista-navarro"><first>Riza</first><last>Batista-Navarro</last></author>
       <pages>115–123</pages>
       <abstract>Hate speech on social media has proliferated in Ethiopia. To support studies aimed at investigating the targets and types of hate speech circulating in the Ethiopian context, we developed a new fine-grained annotation scheme that captures three elements of hate speech: the target (i.e., any groups with protected characteristics), type (i.e., the method of abuse) and nature (i.e., the style of the language used). We also developed a new lexicon of hate speech-related keywords in the four most prominent languages found on Ethiopian social media: Amharic, Afaan Oromo, English and Tigrigna. These keywords enabled us to retrieve social media posts (also in the same four languages) from three platforms (i.e., X, Telegram and Facebook), that are likely to contain hate speech. Experts in the Ethiopian context then manually annotated a sample of those retrieved posts, obtaining fair to moderate inter-annotator agreement. The resulting annotations formed the basis of a case study of which groups tend to be targeted by particular types of hate speech or by particular styles of hate speech language.</abstract>
       <url hash="dd17ea5b">2024.rail-1.13</url>
diff --git a/data/xml/2024.rapid.xml b/data/xml/2024.rapid.xml
index 1b779fe5d3..45eb825318 100644
--- a/data/xml/2024.rapid.xml
+++ b/data/xml/2024.rapid.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the Fifth Workshop on Resources and ProcessIng of linguistic, para-linguistic and extra-linguistic Data from people with various forms of cognitive/psychiatric/developmental impairments @LREC-COLING 2024</booktitle>
       <editor><first>Dimitrios</first><last>Kokkinakis</last></editor>
-      <editor><first>Kathleen C.</first><last>Fraser</last></editor>
+      <editor id="kathleen-c-fraser"><first>Kathleen C.</first><last>Fraser</last></editor>
       <editor><first>Charalambos K.</first><last>Themistocleous</last></editor>
       <editor><first>Kristina Lundholm</first><last>Fors</last></editor>
       <editor><first>Athanasios</first><last>Tsanas</last></editor>
@@ -73,7 +73,7 @@
       <author><first>Marica</first><last>Belmonte</last></author>
       <author><first>Gloria</first><last>Gagliardi</last></author>
       <author><first>Dimitrios</first><last>Kokkinakis</last></author>
-      <author><first>Fabio</first><last>Tamburini</last></author>
+      <author id="fabio-tamburini"><first>Fabio</first><last>Tamburini</last></author>
       <pages>34–44</pages>
       <abstract>Linguistic alterations represent one of the prodromal signs of cognitive decline associated with Dementia. In recent years, a growing body of work has been devoted to the development of algorithms for the automatic linguistic analysis of both oral and written texts, for diagnostic purposes. The extraction of Digital Linguistic Biomarkers from patients’ verbal productions can indeed provide a rapid, ecological, and cost-effective system for large-scale screening of the pathology. This article contributes to the ongoing research in the field by exploring a traditionally less studied aspect of language in Dementia, namely the rhythmic characteristics of speech. In particular, the paper focuses on the automatic detection of rhythmic features in Italian-connected speech. A landmark-based system was developed and evaluated to segment the speech flow into vocalic and consonantal intervals and to calculate several rhythmic metrics. Additionally, the reliability of these metrics in identifying Mild Cognitive Impairment and Dementia patients was tested.</abstract>
       <url hash="7f666a02">2024.rapid-1.5</url>
@@ -91,7 +91,7 @@
       <title>Exploring the Relationship Between Intrinsic Stigma in Masked Language Models and Training Data Using the Stereotype Content Model</title>
       <author><first>Mario</first><last>Mina</last></author>
       <author><first>Júlia</first><last>Falcão</last></author>
-      <author><first>Aitor</first><last>Gonzalez-Agirre</last></author>
+      <author id="aitor-gonzalez-agirre"><first>Aitor</first><last>Gonzalez-Agirre</last></author>
       <pages>54–67</pages>
       <abstract>Much work has gone into developing language models of increasing size, but only recently have we begun to examine them for pernicious behaviour that could lead to harming marginalised groups. Following Lin et al. (2022) in rooting our work in psychological research, we prompt two masked language models (MLMs) of different specialisations in English and Spanish with statements from a questionnaire developed to measure stigma to determine if they treat physical and mental illnesses equally. In both models we find a statistically significant difference in the treatment of physical and mental illnesses across most if not all latent constructs as measured by the questionnaire, and thus they are more likely to associate mental illnesses with stigma. We then examine their training data or data retrieved from the same domain using a computational implementation of the Stereotype Content Model (SCM) (Fiske et al., 2002; Fraser et al., 2021) to interpret the questionnaire results based on the SCM values as reflected in the data. We observe that model behaviour can largely be explained by the distribution of the mentions of illnesses according to their SCM values.</abstract>
       <url hash="5393c6f0">2024.rapid-1.7</url>
diff --git a/data/xml/2024.readi.xml b/data/xml/2024.readi.xml
index ea030490e7..2fe3afad72 100644
--- a/data/xml/2024.readi.xml
+++ b/data/xml/2024.readi.xml
@@ -5,8 +5,8 @@
       <booktitle>Proceedings of the 3rd Workshop on Tools and Resources for People with REAding DIfficulties (READI) @ LREC-COLING 2024</booktitle>
       <editor><first>Rodrigo</first><last>Wilkens</last></editor>
       <editor><first>Rémi</first><last>Cardon</last></editor>
-      <editor><first>Amalia</first><last>Todirascu</last></editor>
-      <editor><first>Núria</first><last>Gala</last></editor>
+      <editor id="amalia-todirascu"><first>Amalia</first><last>Todirascu</last></editor>
+      <editor id="nuria-gala"><first>Núria</first><last>Gala</last></editor>
       <publisher>ELRA and ICCL</publisher>
       <address>Torino, Italia</address>
       <month>May</month>
@@ -51,7 +51,7 @@
       <title>An Extensible Massively Multilingual Lexical Simplification Pipeline Dataset using the <fixed-case>M</fixed-case>ulti<fixed-case>LS</fixed-case> Framework</title>
       <author><first>Matthew</first><last>Shardlow</last></author>
       <author><first>Fernando</first><last>Alva-Manchego</last></author>
-      <author><first>Riza</first><last>Batista-Navarro</last></author>
+      <author id="riza-theresa-batista-navarro"><first>Riza</first><last>Batista-Navarro</last></author>
       <author><first>Stefan</first><last>Bott</last></author>
       <author><first>Saul</first><last>Calderon Ramirez</last></author>
       <author><first>Rémi</first><last>Cardon</last></author>
@@ -65,7 +65,7 @@
       <author><first>Kai</first><last>North</last></author>
       <author><first>Laura</first><last>Occhipinti</last></author>
       <author><first>Nelson</first><last>Peréz Rojas</last></author>
-      <author><first>Nishat</first><last>Raihan</last></author>
+      <author id="nishat-raihan"><first>Nishat</first><last>Raihan</last></author>
       <author><first>Tharindu</first><last>Ranasinghe</last></author>
       <author><first>Martin</first><last>Solis Salazar</last></author>
       <author><first>Marcos</first><last>Zampieri</last></author>
@@ -97,7 +97,7 @@
     <paper id="7">
       <title>Accessible Communication: a systematic review and comparative analysis of official <fixed-case>E</fixed-case>nglish Easy-to-Understand (<fixed-case>E</fixed-case>2<fixed-case>U</fixed-case>) language guidelines</title>
       <author><first>Andreea Maria</first><last>Deleanu</last></author>
-      <author><first>Constantin</first><last>Orasan</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orasan</last></author>
       <author><first>Sabine</first><last>Braun</last></author>
       <pages>70–92</pages>
       <abstract>Easy-to-Understand (E2U) language varieties have been recognized by the United Nation’s Convention on the Rights of Persons with Disabilities (2006) as a means to guarantee the fundamental right to Accessible Communication. Increased awareness has driven changes in European (European Commission, 2015, 2021; European Parliament, 2016) and International legislation (ODI, 2010), prompting public-sector and other institutions to offer domain-specific content into E2U language to prevent communicative exclusion of those facing cognitive barriers (COGA, 2017; Maaß, 2020; Perego, 2020). However, guidance on what it is that makes language actually ‘easier to understand’ is still fragmented and vague. For this reason, we carried out a systematic review of official guidelines for English Plain Language and Easy Language to identify the most effective lexical, syntactic and adaptation strategies that can reduce complexity in verbal discourse according to official bodies. This article will present the methods and preliminary results of the guidelines analysis.</abstract>
diff --git a/data/xml/2024.repl4nlp.xml b/data/xml/2024.repl4nlp.xml
index 6cdf3c37eb..507201780c 100644
--- a/data/xml/2024.repl4nlp.xml
+++ b/data/xml/2024.repl4nlp.xml
@@ -45,9 +45,9 @@
     <paper id="3">
       <title>Relevance-aware Diverse Query Generation for Out-of-domain Text Ranking</title>
       <author><first>Jia-Huei</first><last>Ju</last></author>
-      <author><first>Huck Chao-Han</first><last>Yang</last></author>
+      <author id="chao-han-huck-yang"><first>Huck Chao-Han</first><last>Yang</last></author>
       <author><first>Szu-Wei</first><last>Fu</last></author>
-      <author><first>Ming-Feng</first><last>Tsai</last></author>
+      <author id="ming-feng-tsai"><first>Ming-Feng</first><last>Tsai</last></author>
       <author><first>Chuan-Ju</first><last>Wang</last></author>
       <pages>26-36</pages>
       <abstract>Domain adaptation presents significant challenges for out-of-domain text ranking, especially when supervised data is limited. In this paper, we present ReadQG (Relevance-Aware Diverse Query Generation), a method to generate informative synthetic queries to facilitate the adaptation process of text ranking models. Unlike previous approaches focusing solely on relevant query generation, our ReadQG generates diverse queries with continuous relevance scores. Specifically, we propose leveraging soft-prompt tuning and diverse generation objectives to control query generation according to the given relevance. Our experiments show that integrating negative queries into the learning process enhances the effectiveness of text ranking models in out-of-domain information retrieval (IR) benchmarks. Furthermore, we measure the quality of query generation, highlighting the underlying beneficial characteristics of negative queries. Our empirical results and analysis also shed light on potential directions for more advanced data augmentation in IR. The data and code have been released.</abstract>
@@ -121,7 +121,7 @@
       <author><first>Edward</first><last>Gow-Smith</last></author>
       <author><first>Dylan</first><last>Phelps</last></author>
       <author><first>Harish</first><last>Tayyar Madabushi</last><affiliation>University of Bath</affiliation></author>
-      <author><first>Carolina</first><last>Scarton</last><affiliation>University of Sheffield</affiliation></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last><affiliation>University of Sheffield</affiliation></author>
       <author><first>Aline</first><last>Villavicencio</last><affiliation>University of Exeter and University of Sheffield</affiliation></author>
       <pages>118-135</pages>
       <abstract>All existing transformer-based approaches to NLP using subword tokenisation algorithms encode whitespace (word boundary information) through the use of special space symbols (such as ## or _) forming part of tokens. These symbols have been shown to a) lead to reduced morphological validity of tokenisations, and b) give substantial vocabulary redundancy. As such, removing these symbols has been shown to have a beneficial effect on the processing of morphologically complex words for transformer encoders in the pretrain-finetune paradigm. In this work, we explore whether word boundary information is at all useful to such models. In particular, we train transformer encoders across four different training scales, and investigate several alternative approaches to including word boundary information, evaluating on two languages (English and Finnish) with a range of tasks across different domains and problem set-ups: sentence classification datasets, NER (for token-level classification), and two classification datasets involving complex words (Superbizarre and FLOTA). Overall, through an extensive experimental setup that includes the pre-training of 35 models, we find no substantial improvements from our alternative approaches, suggesting that modifying tokenisers to remove word boundary information isn’t leading to a loss of useful information.</abstract>
@@ -143,7 +143,7 @@
       <author><first>Heike</first><last>Adel</last><affiliation>Hochschule der Medien (University of Applied Sciences)</affiliation></author>
       <author><first>Lukas</first><last>Lange</last><affiliation>Robert Bosch GmbH, Bosch</affiliation></author>
       <author><first>Jannik</first><last>Strötgen</last><affiliation>Karlsruhe University of Applied Sciences</affiliation></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <pages>163-176</pages>
       <abstract>In real-world environments, continual learning is essential for machine learning models, as they need to acquire new knowledge incrementally without forgetting what they have already learned. While pretrained language models have shown impressive capabilities on various static tasks, applying them to continual learning poses significant challenges, including avoiding catastrophic forgetting, facilitating knowledge transfer, and maintaining parameter efficiency. In this paper, we introduce MoCL-P, a novel lightweight continual learning method that addresses these challenges simultaneously. Unlike traditional approaches that continuously expand parameters for newly arriving tasks, MoCL-P integrates task representation-guided module composition with adaptive pruning, effectively balancing knowledge integration and computational overhead. Our evaluation across three continual learning benchmarks with up to 176 tasks shows that MoCL-P achieves state-of-the-art performance and improves parameter efficiency by up to three times, demonstrating its potential for practical applications where resource requirements are constrained.</abstract>
       <url hash="b945aedd">2024.repl4nlp-1.12</url>
@@ -174,7 +174,7 @@
     </paper>
     <paper id="15">
       <title>Tracking linguistic information in transformer-based sentence embeddings through targeted sparsification</title>
-      <author><first>Vivi</first><last>Nastase</last><affiliation>University of Geneva</affiliation></author>
+      <author id="vivi-nastase"><first>Vivi</first><last>Nastase</last><affiliation>University of Geneva</affiliation></author>
       <author><first>Paola</first><last>Merlo</last><affiliation>Idiap Research Institute and University of Geneva, Switzerland</affiliation></author>
       <pages>203-214</pages>
       <abstract>Analyses of transformer-based models have shown that they encode a variety of linguistic information from their textual input. While these analyses have shed a light on the relation between linguistic information on one side, and internal architecture and parameters on the other, a question remains unanswered: how is this linguistic information reflected in sentence embeddings? Using datasets consisting of sentences with known structure, we test to what degree information about chunks (in particular noun, verb or prepositional phrases), such as grammatical number, or semantic role, can be localized in sentence embeddings. Our results show that such information is not distributed over the entire sentence embedding, but rather it is encoded in specific regions. Understanding how the information from an input text is compressed into sentence embeddings helps understand current transformer models and help build future explainable neural models.</abstract>
diff --git a/data/xml/2024.safety4convai.xml b/data/xml/2024.safety4convai.xml
index 8ce230e7c7..47caa277c9 100644
--- a/data/xml/2024.safety4convai.xml
+++ b/data/xml/2024.safety4convai.xml
@@ -48,7 +48,7 @@
       <title>Using Information Retrieval Techniques to Automatically Repurpose Existing Dialogue Datasets for Safe Chatbot Development</title>
       <author><first>Tunde Oluwaseyi</first><last>Ajayi</last></author>
       <author><first>Gaurav</first><last>Negi</last></author>
-      <author><first>Mihael</first><last>Arcan</last></author>
+      <author id="mihael-arcan"><first>Mihael</first><last>Arcan</last></author>
       <author><first>Paul</first><last>Buitelaar</last></author>
       <pages>16–27</pages>
       <abstract>There has been notable progress in the development of open-domain dialogue systems (chatbots) especially with the rapid advancement of the capabilities of Large Language Models. Chatbots excel at holding conversations in a manner that keeps a user interested and engaged. However, their responses can be unsafe, as they can respond in an offensive manner or offer harmful professional advice. As a way to mitigate this issue, recent work crowdsource datasets with exemplary responses or annotate dialogue safety datasets, which are relatively scarce compared to casual dialogues. Despite the quality of data obtained from crowdsourcing, it can be expensive and time consuming. This work proposes an effective pipeline, using information retrieval, to automatically repurpose existing dialogue datasets for safe chatbot development, as a way to address the aforementioned challenges. We select an existing dialogue dataset, revise its unsafe responses, as a way to obtain a dataset with safer responses to unsafe user inputs. We then fine-tune dialogue models on the original and revised datasets and generate responses to evaluate the safeness of the models.</abstract>
diff --git a/data/xml/2024.scalellm.xml b/data/xml/2024.scalellm.xml
index 8ff7892319..bace2db000 100644
--- a/data/xml/2024.scalellm.xml
+++ b/data/xml/2024.scalellm.xml
@@ -3,9 +3,9 @@
   <volume id="1" ingest-date="2024-03-04" type="proceedings">
     <meta>
       <booktitle>Proceedings of the First edition of the Workshop on the Scaling Behavior of Large Language Models (SCALE-LLM 2024)</booktitle>
-      <editor><first>Antonio Valerio</first><last>Miceli-Barone</last></editor>
+      <editor id="antonio-valerio-miceli-barone"><first>Antonio Valerio</first><last>Miceli-Barone</last></editor>
       <editor><first>Fazl</first><last>Barez</last></editor>
-      <editor><first>Shay</first><last>Cohen</last></editor>
+      <editor id="shay-b-cohen"><first>Shay</first><last>Cohen</last></editor>
       <editor><first>Elena</first><last>Voita</last></editor>
       <editor><first>Ulrich</first><last>Germann</last></editor>
       <editor><first>Michal</first><last>Lukasik</last></editor>
diff --git a/data/xml/2024.scichat.xml b/data/xml/2024.scichat.xml
index f7a19b81d1..61734cabb2 100644
--- a/data/xml/2024.scichat.xml
+++ b/data/xml/2024.scichat.xml
@@ -38,7 +38,7 @@
     <paper id="2">
       <title>Improving Dialog Safety using Socially Aware Contrastive Learning</title>
       <author><first>Souvik</first><last>Das</last><affiliation>Department of Computer Science and Engineering, University at Buffalo, NY.</affiliation></author>
-      <author><first>Rohini K.</first><last>Srihari</last><affiliation>Department of Computer Science and Engineering, University at Buffalo, NY.</affiliation></author>
+      <author id="rohini-k-srihari"><first>Rohini K.</first><last>Srihari</last><affiliation>Department of Computer Science and Engineering, University at Buffalo, NY.</affiliation></author>
       <pages>4-18</pages>
       <abstract>State-of-the-art conversational AI systems raise concerns due to their potential risks of generating unsafe, toxic, unethical, or dangerous content. Previous works have developed datasets to teach conversational agents the appropriate social paradigms to respond effectively to specifically designed hazardous content. However, models trained on these adversarial datasets still struggle to recognize subtle unsafe situations that appear naturally in conversations or introduce an inappropriate response in a casual context. To understand the extent of this problem, we study prosociality in both adversarial and casual dialog contexts and audit the response quality of general-purpose language models in terms of propensity to produce unsafe content. We propose a dual-step fine-tuning process to address these issues using a socially aware n-pair contrastive loss. Subsequently, we train a base model that integrates prosocial behavior by leveraging datasets like Moral Integrity Corpus (MIC) and ProsocialDialog. Experimental results on several dialog datasets demonstrate the effectiveness of our approach in generating socially appropriate responses.</abstract>
       <url hash="22b70561">2024.scichat-1.2</url>
diff --git a/data/xml/2024.scil.xml b/data/xml/2024.scil.xml
index ddcc0fd414..bf6f2e766b 100644
--- a/data/xml/2024.scil.xml
+++ b/data/xml/2024.scil.xml
@@ -40,7 +40,7 @@
       <author><first>Canaan</first><last>Breiss</last></author>
       <author><first>Alexis</first><last>Ross</last></author>
       <author><first>Amani</first><last>Maina-Kilaas</last></author>
-      <author><first>Roger</first><last>Levy</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
       <author><first>Jacob</first><last>Andreas</last></author>
       <pages>20–31</pages>
       <url hash="dce80366">2024.scil-1.3</url>
@@ -110,7 +110,7 @@
       <author><first>Amanda</first><last>Doucette</last></author>
       <author><first>Ryan</first><last>Cotterell</last></author>
       <author><first>Morgan</first><last>Sonderegger</last></author>
-      <author><first>Timothy J.</first><last>O’Donnell</last></author>
+      <author id="timothy-odonnell"><first>Timothy J.</first><last>O’Donnell</last></author>
       <pages>117–128</pages>
       <url hash="224b294d">2024.scil-1.12</url>
       <bibkey>doucette-etal-2024-correlation</bibkey>
@@ -176,7 +176,7 @@
     </paper>
     <paper id="20">
       <title>Computing Ellipsis Constructions: Comparing Classical <fixed-case>NLP</fixed-case> and <fixed-case>LLM</fixed-case> Approaches</title>
-      <author><first>Damir</first><last>Cavar</last></author>
+      <author id="damir-cavar"><first>Damir</first><last>Cavar</last></author>
       <author><first>Zoran</first><last>Tiganj</last></author>
       <author><first>Ludovic Veta</first><last>Mompelat</last></author>
       <author><first>Billy</first><last>Dickson</last></author>
@@ -196,7 +196,7 @@
       <title>Interference Predicts Locality: Evidence from an <fixed-case>SOV</fixed-case> language</title>
       <author><first>Sidharth</first><last>Ranjan</last></author>
       <author><first>Sumeet</first><last>Agarwal</last></author>
-      <author><first>Rajakrishnan</first><last>Rajkumar</last></author>
+      <author id="rajakrishnan-rajkumar"><first>Rajakrishnan</first><last>Rajkumar</last></author>
       <pages>240–256</pages>
       <url hash="5525f909">2024.scil-1.22</url>
       <bibkey>ranjan-etal-2024-interference</bibkey>
@@ -211,7 +211,7 @@
     </paper>
     <paper id="24">
       <title>Neural language model gradients predict event-related brain potentials</title>
-      <author><first>Stefan L.</first><last>Frank</last></author>
+      <author id="stefan-l-frank"><first>Stefan L.</first><last>Frank</last></author>
       <pages>316–323</pages>
       <url hash="b5f90150">2024.scil-1.24</url>
       <bibkey>frank-2024-neural</bibkey>
diff --git a/data/xml/2024.sdp.xml b/data/xml/2024.sdp.xml
index dac746c416..d5973740d8 100644
--- a/data/xml/2024.sdp.xml
+++ b/data/xml/2024.sdp.xml
@@ -91,7 +91,7 @@
     <paper id="6">
       <title>Understanding Survey Paper Taxonomy about Large Language Models via Graph Representation Learning</title>
       <author><first>Jun</first><last>Zhuang</last><affiliation>Boise State University and Indiana University Purdue University Indianapolis</affiliation></author>
-      <author><first>Casey</first><last>Kennington</last><affiliation>Boise State University</affiliation></author>
+      <author id="casey-kennington"><first>Casey</first><last>Kennington</last><affiliation>Boise State University</affiliation></author>
       <pages>58-69</pages>
       <abstract>As new research on Large Language Models (LLMs) continues, it is difficult to keep up with new research and models. To help researchers synthesize the new research many have written survey papers, but even those have become numerous. In this paper, we develop a method to automatically assign survey papers to a taxonomy. We collect the metadata of 144 LLM survey papers and explore three paradigms to classify papers within the taxonomy. Our work indicates that leveraging graph structure information on co-category graphs can significantly outperform the language models in two paradigms; pre-trained language models’ fine-tuning and zero-shot/few-shot classifications using LLMs. We find that our model surpasses an average human recognition level and that fine-tuning LLMs using weak labels generated by a smaller model, such as the GCN in this study, can be more effective than using ground-truth labels, revealing the potential of weak-to-strong generalization in the taxonomy classification task.</abstract>
       <url hash="2b212177">2024.sdp-1.6</url>
@@ -299,7 +299,7 @@
       <title>Zero-shot Scientific Claim Verification Using <fixed-case>LLM</fixed-case>s and Citation Text</title>
       <author><first>Carlos</first><last>Alvarez</last></author>
       <author><first>Maxwell</first><last>Bennett</last></author>
-      <author><first>Lucy</first><last>Wang</last><affiliation>University of Washington and Allen Institute for Artificial Intelligence</affiliation></author>
+      <author id="lucy-lu-wang"><first>Lucy</first><last>Wang</last><affiliation>University of Washington and Allen Institute for Artificial Intelligence</affiliation></author>
       <pages>269-276</pages>
       <abstract>Due to rapidly changing and advancing science, it is important to check the veracity of scientific claims and whether they are supported by research evidence. Previous versions of this task depended on supervised training, where labeled datasets were constructed through manual claim writing and evidence identification, sometimes coupled with mining citation relationships in papers. In this work, we investigate whether zero-shot scientific claim verification could be enabled using large language models (LLMs) and distant supervision examples taken directly from citation texts. We derive an in-context learning (ICL) dataset, SCitance, consisting of citation sentences (“citances”), LLM-generated negations, evidence documents, and veracity labels, and find that prompting GPT-4 with ICL examples from this dataset yields comparable performance (within 1 point F1) to previous finetuned models trained on manually curated claim-evidence pairs. Our results suggest that prompting LLMs with citance-evidence pairs directly poses a viable alternative to finetuning scientific claim verification models with manually-curated data.</abstract>
       <url hash="58f02255">2024.sdp-1.25</url>
@@ -317,7 +317,7 @@
     <paper id="27">
       <title><fixed-case>C</fixed-case>o<fixed-case>SAE</fixed-case>mb: Contrastive Section-aware Aspect Embeddings for Scientific Articles</title>
       <author><first>Shruti</first><last>Singh</last><affiliation>IIT Gandhinagar</affiliation></author>
-      <author id="mayank-singh"><first>Mayank</first><last>Singh</last><affiliation>Indian Institute of Technology Gandhinagar</affiliation></author>
+      <author><first>Mayank</first><last>Singh</last><affiliation>Indian Institute of Technology Gandhinagar</affiliation></author>
       <pages>283-292</pages>
       <abstract>Research papers are long documents that contain information about various aspects such as background, prior work, methodology, and results. Existing works on scientific document representation learning only leverage the title and abstract of the paper. We present CoSAEmb, a model that learns representations from the full text of 97402 scientific papers from the S2ORC dataset. We present a novel supervised contrastive training framework for long documents using triplet loss and margin gradation. Our framework can be used to learn representations of long documents with any existing encoder-only transformer model without retraining it from scratch. CoSAEmb shows improved performance on information retrieval from the paper’s full text in comparison to models trained only on paper titles and abstracts. We also evaluate CoSAEmb on SciRepEval and CSFCube benchmarks, showing comparable performance with existing state-of-the-art models.</abstract>
       <url hash="44929adb">2024.sdp-1.27</url>
@@ -335,7 +335,7 @@
     <paper id="29">
       <title>Harnessing <fixed-case>CLIP</fixed-case> for Evidence Identification in Scientific Literature: A Multimodal Approach to Context24 Shared Task</title>
       <author><first>Anukriti</first><last>Kumar</last></author>
-      <author><first>Lucy</first><last>Wang</last><affiliation>University of Washington and Allen Institute for Artificial Intelligence</affiliation></author>
+      <author id="lucy-lu-wang"><first>Lucy</first><last>Wang</last><affiliation>University of Washington and Allen Institute for Artificial Intelligence</affiliation></author>
       <pages>307-313</pages>
       <abstract>Knowing whether scientific claims are supported by evidence is fundamental to scholarly communication and evidence-based decision-making. We present our approach to Task 1 of the Context24 Shared Task—Contextualizing Scientific Figures and Tables (SDP@ACL2024), which focuses on identifying multimodal evidence from scientific publications that support claims. We finetune CLIP, a state-of-the-art model for image-text similarity tasks, to identify and rank figures and tables in papers that substantiate specific claims. Our methods focus on text and image preprocessing techniques and augmenting the organizer-provided training data with labeled examples from the SciMMIR and MedICaT datasets. Our best-performing model achieved NDCG@5 and NDCG@10 values of 0.26 and 0.30, respectively, on the Context24 test split. Our findings underscore the effectiveness of data augmentation and preprocessing in improving the model’s ability in evidence matching.</abstract>
       <url hash="0ddf46c7">2024.sdp-1.29</url>
diff --git a/data/xml/2024.semeval.xml b/data/xml/2024.semeval.xml
index b59b718f31..4026783758 100644
--- a/data/xml/2024.semeval.xml
+++ b/data/xml/2024.semeval.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2024-06-26" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)</booktitle>
-      <editor><first>Atul Kr.</first><last>Ojha</last></editor>
+      <editor id="atul-kr-ojha"><first>Atul Kr.</first><last>Ojha</last></editor>
       <editor><first>A. Seza</first><last>Doğruöz</last></editor>
       <editor><first>Harish</first><last>Tayyar Madabushi</last></editor>
       <editor><first>Giovanni</first><last>Da San Martino</last></editor>
@@ -68,7 +68,7 @@
     <paper id="4">
       <title>nicolay-r at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2024 Task 3: Using Flan-T5 for Reasoning Emotion Cause in Conversations with Chain-of-Thought on Emotion States</title>
       <author><first>Nicolay</first><last>Rusnachenko</last><affiliation>Newcastle University</affiliation></author>
-      <author><first>Huizhi</first><last>Liang</last><affiliation>University of Newcastle</affiliation></author>
+      <author id="huizhi-liang"><first>Huizhi</first><last>Liang</last><affiliation>University of Newcastle</affiliation></author>
       <pages>22-27</pages>
       <abstract>Emotion expression is one of the essential traits of conversations. It may be self-related or caused by another speaker. The variety of reasons may serve as a source of the further emotion causes: conversation history, speaker’s emotional state, etc. Inspired by the most recent advances in Chain-of-Thought, in this work, we exploit the existing three-hop reasoning approach (THOR) to perform large language model instruction-tuning for answering: emotion states (THOR-state), and emotion caused by one speaker to the other (THOR-cause). We equip THORcause with the reasoning revision (RR) for devising a reasoning path in fine-tuning. In particular, we rely on the annotated speaker emotion states to revise reasoning path. Our final submission, based on Flan-T5-base (250M) and the rule-based span correction technique, preliminary tuned with THOR-state and fine-tuned with THOR-cause-rr on competition training data, results in 3rd and 4th places (F1-proportional) and 5th place (F1-strict) among 15 participating teams. Our THOR implementation fork is publicly available: https://github.com/nicolay-r/THOR-ECAC</abstract>
       <url hash="f38a938b">2024.semeval-1.4</url>
@@ -293,7 +293,7 @@
       <author><first>Hamidreza</first><last>Amirzadeh</last><affiliation>Sharif University of Technology</affiliation></author>
       <author><first>Alireza</first><last>Sohrabi</last><affiliation>Sharif University of Technology</affiliation></author>
       <author><first>Zeinab</first><last>Taghavi</last><affiliation>MSc student</affiliation></author>
-      <author><first>Hossein</first><last>Sameti</last><affiliation>Sharif University of Technology</affiliation></author>
+      <author id="hossein-sameti"><first>Hossein</first><last>Sameti</last><affiliation>Sharif University of Technology</affiliation></author>
       <pages>139-147</pages>
       <abstract>The advancement of large language models (LLMs), their ability to produce eloquent and fluent content, and their vast knowledge have resulted in their usage in various tasks and applications. Despite generating fluent content, this content can contain fabricated or false information. This problem is known as hallucination and has reduced the confidence in the output of LLMs. In this work, we have used Natural Language Inference to train classifiers for hallucination detection to tackle SemEval-2024 Task 6-SHROOM (Mickus et al., 2024) which is defined in three sub-tasks: Paraphrase Generation, Machine Translation, and Definition Modeling. We have also conducted experiments on LLMs to evaluate their ability to detect hallucinated outputs. We have achieved 75.93% and 78.33% accuracy for the modelaware and model-agnostic tracks, respectively. The shared links of our models and the codes are available on GitHub.</abstract>
       <url hash="6e418999">2024.semeval-1.22</url>
@@ -307,7 +307,7 @@
       <author><first>Zahra</first><last>Rahimi</last><affiliation>Sharif University of Technology</affiliation></author>
       <author><first>Mohammad Moein</first><last>Shirzady</last><affiliation>Sharif University of Technology</affiliation></author>
       <author><first>Zeinab</first><last>Taghavi</last><affiliation>MSc student</affiliation></author>
-      <author><first>Hossein</first><last>Sameti</last><affiliation>Sharif University of Technology</affiliation></author>
+      <author id="hossein-sameti"><first>Hossein</first><last>Sameti</last><affiliation>Sharif University of Technology</affiliation></author>
       <pages>148-154</pages>
       <abstract>The goal and dream of the artificial intelligence field have long been the development of intelligent systems or agents that mimic human behavior and thinking. Creativity is an essential trait in humans that is closely related to lateral thinking. The remarkable advancements in Language Models have led to extensive research on question-answering and explicit and implicit reasoning involving vertical thinking. However, there is an increasing need to shift focus towards research and development of models that can think laterally. One must step outside the traditional frame of commonsense concepts in lateral thinking to conclude. Task 9 of SemEval-2024 is Brainteaser (Jiang et al.,2024), which requires lateral thinking to answer riddle-like multiple-choice questions. In our study, we assessed the performance of various models for the Brainteaser task. We achieved an overall accuracy of 75% for the Sentence Puzzle subtask and 66.7% for the Word Puzzle subtask. All the codes, along with the links to our saved models, are available on our GitHub.</abstract>
       <url hash="73ceb32a">2024.semeval-1.23</url>
@@ -334,7 +334,7 @@
       <author><first>Ziwei</first><last>Zheng</last><affiliation>Newcastle University</affiliation></author>
       <author><first>Subin</first><last>Jung</last><affiliation>Newcastle University</affiliation></author>
       <author><first>Varun</first><last>Ojha</last><affiliation>Newcastle University</affiliation></author>
-      <author><first>Huizhi</first><last>Liang</last><affiliation>University of Newcastle</affiliation></author>
+      <author id="huizhi-liang"><first>Huizhi</first><last>Liang</last><affiliation>University of Newcastle</affiliation></author>
       <pages>163-169</pages>
       <abstract>SemEval-2024 Task 8 introduces the challenge of identifying machine-generated texts from diverse Large Language Models (LLMs) in various languages and domains. The task comprises three subtasks: binary classification in monolingual and multilingual (Subtask A), multi-class classification (Subtask B), and mixed text detection (Subtask C). This paper focuses on Subtask A &amp; B. To tackle this task, this paper proposes two methods: 1) using traditional machine learning (ML) with natural language preprocessing (NLP) for feature extraction, and 2) fine-tuning LLMs for text classification. For fine-tuning, we use the train datasets provided by the task organizers. The results show that transformer models like LoRA-RoBERTa and XLM-RoBERTa outperform traditional ML models, particularly in multilingual subtasks. However, traditional ML models performed better than transformer models for the monolingual task, demonstrating the importance of considering the specific characteristics of each subtask when selecting an appropriate approach.</abstract>
       <url hash="cdffaa33">2024.semeval-1.25</url>
@@ -452,7 +452,7 @@
       <title><fixed-case>ZXQ</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2024 Task 7: Fine-tuning <fixed-case>GPT</fixed-case>-3.5-Turbo for Numerical Reasoning</title>
       <author><first>Zhen</first><last>Qian</last><affiliation>Royal Melbourne Institute of Technology</affiliation></author>
       <author><first>Xiaofei</first><last>Xu</last><affiliation>Royal Melbourne Institute of Technology</affiliation></author>
-      <author><first>Xiuzhen</first><last>Zhang</last><affiliation>RMIT University</affiliation></author>
+      <author id="xiuzhen-jenny-zhang"><first>Xiuzhen</first><last>Zhang</last><affiliation>RMIT University</affiliation></author>
       <pages>218-223</pages>
       <abstract>In this paper, we present our system for the SemEval-2024 Task 7, i.e., NumEval subtask 3: Numericial Reasoning. Given a news article and its headline, the numerical reasoning task involves creating a system to compute the intentionally excluded number within the news headline. We propose a fine-tuned GPT-3.5-turbo model, specifically engineered to deduce missing numerals directly from the content of news article. The model is trained with a human-engineered prompt that itegrates the news content and the masked headline, tailoring its accuracy for the designated task. It achieves an accuracy of 0.94 on the test data and secures the second position in the official leaderboard. An examination on the system’s inference results reveals its commendable accuracy in identifying correct numerals when they can be directly “copied” from the articles. However, the error rates increase when it comes to some ambiguous operations such as rounding.</abstract>
       <url hash="1f010c33">2024.semeval-1.34</url>
@@ -515,7 +515,7 @@
       <title><fixed-case>NU</fixed-case>-<fixed-case>RU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2024 Task 6: Hallucination and Related Observable Overgeneration Mistake Detection Using Hypothesis-Target Similarity and <fixed-case>S</fixed-case>elf<fixed-case>C</fixed-case>heck<fixed-case>GPT</fixed-case></title>
       <author><first>Thanet</first><last>Markchom</last><affiliation>University of Reading</affiliation></author>
       <author><first>Subin</first><last>Jung</last><affiliation>Newcastle University</affiliation></author>
-      <author><first>Huizhi</first><last>Liang</last><affiliation>Newcastle University</affiliation></author>
+      <author id="huizhi-liang"><first>Huizhi</first><last>Liang</last><affiliation>Newcastle University</affiliation></author>
       <pages>253-260</pages>
       <abstract>One of the key challenges in Natural Language Generation (NLG) is “hallucination,” in which the generated output appears fluent and grammatically sound but may contain incorrect information. To address this challenge, “SemEval-2024 Task 6 - SHROOM, a Shared-task on Hallucinations and Related Observable Overgeneration Mistakes” is introduced. This task focuses on detecting overgeneration hallucinations in texts generated from Large Language Models for various NLG tasks. To tackle this task, this paper proposes two methods: (1) hypothesis-target similarity, which measures text similarity between a generated text (hypothesis) and an intended reference text (target), and (2) a SelfCheckGPT-based method to assess hallucinations via predefined prompts designed for different NLG tasks. Experiments were conducted on the dataset provided in this task. The results show that both of the proposed methods can effectively detect hallucinations in LLM-generated texts with a possibility for improvement.</abstract>
       <url hash="29711d27">2024.semeval-1.39</url>
@@ -527,7 +527,7 @@
       <title><fixed-case>NCL</fixed-case>_<fixed-case>NLP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2024 Task 7: <fixed-case>C</fixed-case>o<fixed-case>T</fixed-case>-<fixed-case>N</fixed-case>um<fixed-case>HG</fixed-case>: A <fixed-case>C</fixed-case>o<fixed-case>T</fixed-case>-Based <fixed-case>SFT</fixed-case> Training Strategy with Large Language Models for Number-Focused Headline Generation</title>
       <author><first>Junzhe</first><last>Zhao</last><affiliation>Hangzhou Zero Matrix Intelligence Co., Ltd, China</affiliation></author>
       <author><first>Yingxi</first><last>Wang</last><affiliation>Huawei Technologies Co., Ltd., China</affiliation></author>
-      <author><first>Huizhi</first><last>Liang</last><affiliation>Newcastle University</affiliation></author>
+      <author id="huizhi-liang"><first>Huizhi</first><last>Liang</last><affiliation>Newcastle University</affiliation></author>
       <author><first>Nicolay</first><last>Rusnachenko</last><affiliation>Newcastle University</affiliation></author>
       <pages>261-269</pages>
       <abstract>Headline Generation is an essential task in Natural Language Processing (NLP), where models often exhibit limited ability to accurately interpret numerals, leading to inaccuracies in generated headlines. This paper introduces CoT-NumHG, a training strategy leveraging the Chain of Thought (CoT) paradigm for Supervised Fine-Tuning (SFT) of large language models. This approach is aimed at enhancing numeral perception, interpretability, accuracy, and the generation of structured outputs. Presented in SemEval-2024 Task 7 (task 3): Numeral-Aware Headline Generation (English), this challenge is divided into two specific subtasks. The first subtask focuses on numerical reasoning, requiring models to precisely calculate and fill in the missing numbers in news headlines, while the second subtask targets the generation of complete headlines. Utilizing the same training strategy across both subtasks, this study primarily explores the first subtask as a demonstration of our training strategy. Through this competition, our CoT-NumHG-Mistral-7B model attained an accuracy rate of 94%, underscoring the effectiveness of our proposed strategy.</abstract>
@@ -577,7 +577,7 @@
       <title><fixed-case>NCL</fixed-case> Team at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2024 Task 3: Fusing Multimodal Pre-training Embeddings for Emotion Cause Prediction in Conversations</title>
       <author><first>Shu</first><last>Li</last><affiliation>Beijing Accent Advertising Co., Ltd.</affiliation></author>
       <author><first>Zicen</first><last>Liao</last><affiliation>School of Computing, Newcastle University, Newcastle upon Tyne, UK</affiliation></author>
-      <author><first>Huizhi</first><last>Liang</last><affiliation>School of Computing, Newcastle University, Newcastle upon Tyne, UK</affiliation></author>
+      <author id="huizhi-liang"><first>Huizhi</first><last>Liang</last><affiliation>School of Computing, Newcastle University, Newcastle upon Tyne, UK</affiliation></author>
       <pages>285-290</pages>
       <abstract>In this study, we introduce an MLP approach for extracting multimodal cause utterances in conversations, utilizing the multimodal conversational emotion causes from the ECF dataset. Our research focuses on evaluating a bi-modal framework that integrates video and audio embeddings to analyze emotional expressions within dialogues. The core of our methodology involves the extraction of embeddings from pre-trained models for each modality, followed by their concatenation and subsequent classification via an MLP network. We compared the accuracy performances across different modality combinations including text-audio-video, video-audio, and audio only.</abstract>
       <url hash="0a9df3d7">2024.semeval-1.44</url>
@@ -652,7 +652,7 @@
     <paper id="50">
       <title><fixed-case>GAV</fixed-case>x at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2024 Task 10: Emotion Flip Reasoning via Stacked Instruction Finetuning of <fixed-case>LLM</fixed-case>s</title>
       <author><first>Vy</first><last>Nguyen</last><affiliation>RMIT University</affiliation></author>
-      <author><first>Xiuzhen</first><last>Zhang</last><affiliation>RMIT University</affiliation></author>
+      <author id="xiuzhen-jenny-zhang"><first>Xiuzhen</first><last>Zhang</last><affiliation>RMIT University</affiliation></author>
       <pages>326-336</pages>
       <abstract>The Emotion Flip Reasoning task at SemEval 2024 aims at identifying the utterance(s) that trigger a speaker to shift from an emotion to another in a multi-party conversation. The spontaneous, informal, and occasionally multilingual dynamics of conversations make the task challenging. In this paper, we propose a supervised stacked instruction-based framework to finetune large language models to tackle this task. Utilising the annotated datasets provided, we curate multiple instruction sets involving chain-of-thoughts, feedback, and self-evaluation instructions, for a multi-step finetuning pipeline. We utilise the self-consistency inference strategy to enhance prediction consistency. Experimental results reveal commendable performance, achieving mean F1 scores of 0.77 and 0.76 for triggers in the Hindi-English and English-only tracks respectively. This led to us earning the second highest ranking in both tracks.</abstract>
       <url hash="08e4aa31">2024.semeval-1.50</url>
@@ -814,7 +814,7 @@
       <title>Team <fixed-case>U</fixed-case>nibuc - <fixed-case>NLP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2024 Task 8: Transformer and Hybrid Deep Learning Based Models for Machine-Generated Text Detection</title>
       <author><first>Teodor-george</first><last>Marchitan</last><affiliation>University of Bucharest</affiliation></author>
       <author><first>Claudiu</first><last>Creanga</last><affiliation>University of Bucharest</affiliation></author>
-      <author><first>Liviu P.</first><last>Dinu</last><affiliation>University of Bucharest</affiliation></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last><affiliation>University of Bucharest</affiliation></author>
       <pages>403-411</pages>
       <abstract>This paper describes the approach of the UniBuc - NLP team in tackling the SemEval 2024 Task 8: Multigenerator, Multidomain, and Multilingual Black-Box Machine-Generated Text Detection. We explored transformer-based and hybrid deep learning architectures. For subtask B, our transformer-based model achieved a strong second-place out of 77 teams with an accuracy of 86.95%, demonstrating the architecture’s suitability for this task. However, our models showed overfitting in subtask A which could potentially be fixed with less fine-tunning and increasing maximum sequence length. For subtask C (token-level classification), our hybrid model overfit during training, hindering its ability to detect transitions between human and machine-generated text.</abstract>
       <url hash="80a6e34a">2024.semeval-1.63</url>
@@ -829,7 +829,7 @@
       <author><first>Călina</first><last>Ciocoiu</last><affiliation>Alexandru Ioan Cuza University of Iasi</affiliation></author>
       <author><first>Ioana</first><last>Măniga</last><affiliation>Alexandru Ioan Cuza University of Iasi</affiliation></author>
       <author><first>Octavian</first><last>Ungureanu</last><affiliation>Alexandru Ioan Cuza University of Iasi</affiliation></author>
-      <author><first>Daniela</first><last>Gîfu</last><affiliation>Faculty of Computer Science, Alexandru Ioan Cuza University of Iasi, Romania/Institute of Computer Science, Romanian Academy - Iasi Branch</affiliation></author>
+      <author id="daniela-gifu"><first>Daniela</first><last>Gîfu</last><affiliation>Faculty of Computer Science, Alexandru Ioan Cuza University of Iasi, Romania/Institute of Computer Science, Romanian Academy - Iasi Branch</affiliation></author>
       <author><first>Diana</first><last>Trandăbăț</last><affiliation>Faculty of Computer Science, Alexandru Ioan Cuza University of Iasi, Romania</affiliation></author>
       <pages>412-419</pages>
       <abstract>The “Emotion Discovery and Reasoning Its Flip in Conversation” task at the SemEval 2024 competition focuses on the automatic recognition of emotion flips, triggered within multi-party textual conversations. This paper proposes a novel approach that draws a parallel between a mixed strategy and a comparative strategy, contrasting a Rule-Based Function with Named Entity Recognition (NER)—an approach that shows promise in understanding speaker-specific emotional dynamics. Furthermore, this method surpasses the performance of both DistilBERT and RoBERTa models, demonstrating competitive effectiveness in detecting emotion flips triggered in multi-party textual conversations, achieving a 70% F1-score. This system was ranked 6th in the SemEval 2024 competition for Subtask 3.</abstract>
@@ -949,7 +949,7 @@
     <paper id="73">
       <title>Team jelarson at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val 2024 Task 8: Predicting Boundary Line Between Human and Machine Generated Text</title>
       <author><first>Joseph</first><last>Larson</last><affiliation>Indiana University</affiliation></author>
-      <author><first>Francis</first><last>Tyers</last><affiliation>Indiana University</affiliation></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last><affiliation>Indiana University</affiliation></author>
       <pages>477-484</pages>
       <abstract>In this paper, we handle the task of building a system that, given a document written first by a human and then finished by an LLM, the system must determine the transition word i.e. where the machine begins to write. We built a system by examining the data for textual anomalies and combining a method of heuristic approaches with a linear regression model based on the text length of each document.</abstract>
       <url hash="2b28bcfb">2024.semeval-1.73</url>
@@ -995,7 +995,7 @@
     <paper id="77">
       <title><fixed-case>BERT</fixed-case>astic at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2024 Task 4: State-of-the-Art Multilingual Propaganda Detection in Memes via Zero-Shot Learning with Vision-Language Models</title>
       <author><first>Tarek</first><last>Mahmoud</last><affiliation>Mohamed Bin Zayed University of Artificial Intelligence (MBZUAI)</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <pages>503-510</pages>
       <abstract>Analyzing propagandistic memes in a multilingual, multimodal dataset is a challenging problem due to the inherent complexity of memes’ multimodal content, which combines images, text, and often, nuanced context. In this paper, we use a VLM in a zero-shot approach to detect propagandistic memes and achieve a state-of-the-art average macro F1 of 66.7% over all languages. Notably, we outperform other systems on North Macedonian memes, and obtain competitive results on Bulgarian and Arabic memes. We also present our early fusion approach for identifying persuasion techniques in memes in a hierarchical multilabel classification setting. This approach outperforms all other approaches in average hierarchical precision with an average score of 77.66%. The systems presented contribute to the evolving field of research on the detection of persuasion techniques in multimodal datasets by offering insights that could be of use in the development of more effective tools for combating online propaganda.</abstract>
       <url hash="054ae475">2024.semeval-1.77</url>
@@ -1101,7 +1101,7 @@
       <author><first>Arian</first><last>Qazvini</last><affiliation>Amirkabir University of Technology</affiliation></author>
       <author><first>Pouya</first><last>Sadeghi</last><affiliation>University of Tehran</affiliation></author>
       <author><first>Zeinab</first><last>Taghavi</last><affiliation>MSc student</affiliation></author>
-      <author><first>Hossein</first><last>Sameti</last><affiliation>Sharif University of Technology</affiliation></author>
+      <author id="hossein-sameti"><first>Hossein</first><last>Sameti</last><affiliation>Sharif University of Technology</affiliation></author>
       <pages>565-572</pages>
       <abstract>In this paper, we delve into the realm of detecting machine-generated text (MGT) within Natural Language Processing (NLP). Our approach involves fine-tuning a RoBERTa-base Transformer, a robust neural architecture, to tackle MGT detection as a binary classification task. Specifically focusing on Subtask A (Monolingual - English) within the SemEval-2024 competition framework, our system achieves a 78.9% accuracy on the test dataset, placing us 57th among participants. While our system demonstrates proficiency in identifying human-written texts, it faces challenges in accurately discerning MGTs.</abstract>
       <url hash="410d2953">2024.semeval-1.85</url>
@@ -1143,7 +1143,7 @@
       <author><first>Claudiu</first><last>Creanga</last><affiliation>University of Bucharest</affiliation></author>
       <author><first>Ana-maria</first><last>Bucur</last><affiliation>Interdisciplinary School of Doctoral Studies</affiliation></author>
       <author><first>Ana Sabina</first><last>Uban</last><affiliation>University of Bucharest</affiliation></author>
-      <author><first>Liviu P.</first><last>Dinu</last><affiliation>University of Bucharest</affiliation></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last><affiliation>University of Bucharest</affiliation></author>
       <pages>586-595</pages>
       <abstract>This paper describes the approach of the UniBuc team in tackling the SemEval 2024 Task 2: Safe Biomedical Natural Language Inference for Clinical Trials. We used SOLAR Instruct, without any fine-tuning, while focusing on input manipulation and tailored prompting. By customizing prompts for individual CTR sections, in both zero-shot and few-shots settings, we managed to achieve a consistency score of 0.72, ranking 14th in the leaderboard. Our thorough error analysis revealed that our model has a tendency to take shortcuts and rely on simple heuristics, especially when dealing with semantic-preserving changes.</abstract>
       <url hash="0085ea97">2024.semeval-1.88</url>
@@ -1242,7 +1242,7 @@
     <paper id="95">
       <title><fixed-case>ISDS</fixed-case>-<fixed-case>NLP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2024 Task 10: Transformer based neural networks for emotion recognition in conversations</title>
       <author><first>Claudiu</first><last>Creanga</last><affiliation>University of Bucharest</affiliation></author>
-      <author><first>Liviu P.</first><last>Dinu</last><affiliation>University of Bucharest</affiliation></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last><affiliation>University of Bucharest</affiliation></author>
       <pages>649-654</pages>
       <abstract>This paper outlines the approach of the ISDS-NLP team in the SemEval 2024 Task 10: Emotion Discovery and Reasoning its Flip in Conversation (EDiReF). For Subtask 1 we obtained a weighted F1 score of 0.43 and placed 12 in the leaderboard. We investigate two distinct approaches: Masked Language Modeling (MLM) and Causal Language Modeling (CLM). For MLM, we employ pre-trained BERT-like models in a multilingual setting, fine-tuning them with a classifier to predict emotions. Experiments with varying input lengths, classifier architectures, and fine-tuning strategies demonstrate the effectiveness of this approach. Additionally, we utilize Mistral 7B Instruct V0.2, a state-of-the-art model, applying zero-shot and few-shot prompting techniques. Our findings indicate that while Mistral shows promise, MLMs currently outperform them in sentence-level emotion classification.</abstract>
       <url hash="f6e8a90f">2024.semeval-1.95</url>
@@ -1503,7 +1503,7 @@
       <title><fixed-case>AA</fixed-case>da<fixed-case>M</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2024 Task 1: Augmentation and Adaptation for Multilingual Semantic Textual Relatedness</title>
       <author><first>Miaoran</first><last>Zhang</last><affiliation>Saarland University</affiliation></author>
       <author><first>Mingyang</first><last>Wang</last><affiliation>Bosch Center for Artificial Intelligence; LMU Munich</affiliation></author>
-      <author><first>Jesujoba</first><last>Alabi</last><affiliation>Saarland University</affiliation></author>
+      <author id="jesujoba-alabi"><first>Jesujoba</first><last>Alabi</last><affiliation>Saarland University</affiliation></author>
       <author><first>Dietrich</first><last>Klakow</last><affiliation>Saarland University</affiliation></author>
       <pages>800-810</pages>
       <abstract>This paper presents our system developed for the SemEval-2024 Task 1: Semantic Textual Relatedness for African and Asian Languages. The shared task aims at measuring the semantic textual relatedness between pairs of sentences, with a focus on a range of under-represented languages. In this work, we propose using machine translation for data augmentation to address the low-resource challenge of limited training data. Moreover, we apply task-adaptive pre-training on unlabeled task data to bridge the gap between pre-training and task adaptation. For model training, we investigate both full fine-tuning and adapter-based tuning, and adopt the adapter framework for effective zero-shot cross-lingual transfer. We achieve competitive results in the shared task: our system performs the best among all ranked teams in both subtask A (supervised learning) and subtask C (cross-lingual transfer).</abstract>
@@ -1887,7 +1887,7 @@
     <paper id="143">
       <title><fixed-case>SEME</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2024 Task 2: Comparing Masked and Generative Language Models on Natural Language Inference for Clinical Trials</title>
       <author><first>Mathilde</first><last>Aguiar</last><affiliation>Université Paris-Saclay, CNRS, Laboratoire Interdisciplinaire des Sciences du Numérique, 91400, Orsay, France</affiliation></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last><affiliation>LISN, CNRS, Université Paris-Saclay</affiliation></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last><affiliation>LISN, CNRS, Université Paris-Saclay</affiliation></author>
       <author><first>Nona</first><last>Naderi</last><affiliation>Université Paris-Saclay</affiliation></author>
       <pages>986-996</pages>
       <abstract>This paper describes our submission to Task 2 of SemEval-2024: Safe Biomedical Natural Language Inference for Clinical Trials. The Multi-evidence Natural Language Inference for Clinical Trial Data (NLI4CT) consists of a Textual Entailment (TE) task focused on the evaluation of the consistency and faithfulness of Natural Language Inference (NLI) models applied to Clinical Trial Reports (CTR). We test 2 distinct approaches, one based on finetuning and ensembling Masked Language Models and the other based on prompting Large Language Models using templates, in particular, using Chain-Of-Thought and Contrastive Chain-Of-Thought. Prompting Flan-T5-large in a 2-shot setting leads to our best system that achieves 0.57 F1 score, 0.64 Faithfulness, and 0.56 Consistency.</abstract>
@@ -1955,7 +1955,7 @@
       <author><first>Srikar Kashyap</first><last>Pulipaka</last><affiliation>Indiana University Bloomington</affiliation></author>
       <author><first>Shrirang</first><last>Mhalgi</last><affiliation>Indiana University Bloomington</affiliation></author>
       <author><first>Joseph</first><last>Larson</last><affiliation>Indiana University</affiliation></author>
-      <author><first>Sandra</first><last>Kübler</last><affiliation>Indiana University</affiliation></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last><affiliation>Indiana University</affiliation></author>
       <pages>1026-1031</pages>
       <abstract>Since Large Language Models have reached a stage where it is becoming more and more difficult to distinguish between human and machine written text, there is an increasing need for automated systems to distinguish between them. As part of SemEval Task 8, Subtask A: Binary Human-Written vs. Machine-Generated Text Classification, we explore a variety of machine learning classifiers, from traditional statistical methods, such as Naïve Bayes and Decision Trees, to fine-tuned transformer models, suchas RoBERTa and ALBERT. Our findings show that using a fine-tuned RoBERTa model with optimizedhyperparameters yields the best accuracy. However, the improvement does not translate to the test set because of the differences in distribution in the development and test sets.</abstract>
       <url hash="af8a609d">2024.semeval-1.148</url>
@@ -1996,7 +1996,7 @@
       <author><first>Amirmasoud</first><last>Iravani</last><affiliation>Ferdowsi University of Mashhad</affiliation></author>
       <author><first>Hadi</first><last>Alizadeh</last><affiliation>Iran Broadcasting University</affiliation></author>
       <author><first>Zeinab</first><last>Taghavi</last><affiliation>MSc student</affiliation></author>
-      <author><first>Hossein</first><last>Sameti</last><affiliation>Sharif University of Technology</affiliation></author>
+      <author id="hossein-sameti"><first>Hossein</first><last>Sameti</last><affiliation>Sharif University of Technology</affiliation></author>
       <pages>1043-1052</pages>
       <abstract>This paper explores semantic textual relatedness (STR) using fine-tuning techniques on the RoBERTa transformer model, focusing on sentence-level STR within Track A (Supervised). The study evaluates the effectiveness of this approach across different languages, with promising results in English and Spanish but encountering challenges in Arabic.</abstract>
       <url hash="f3ed5427">2024.semeval-1.151</url>
@@ -2121,7 +2121,7 @@
       <author><first>Andric</first><last>Valdez</last><affiliation>UNAM</affiliation></author>
       <author><first>Fernando</first><last>Márquez</last><affiliation>IIMAS - UNAM</affiliation></author>
       <author><first>Jorge</first><last>Pantaleón</last><affiliation>IIMAS - UNAM</affiliation></author>
-      <author><first>Helena</first><last>Gómez</last><affiliation>IIMAS - UNAM</affiliation></author>
+      <author id="helena-gomez"><first>Helena</first><last>Gómez</last><affiliation>IIMAS - UNAM</affiliation></author>
       <author><first>Gemma</first><last>Bel-enguix</last><affiliation>Instituto de Ingeniería - UNAM</affiliation></author>
       <pages>1110-1114</pages>
       <abstract>Large language models (LLMs) are artificial intelligence systems that can generate text, translate languages, and answer questions in a human-like way. While these advances are impressive, there is concern that LLMs could also be used to generate fake or misleading content. In this work, as a part of our participation in SemEval-2024 Task-8, we investigate the ability of LLMs to identify whether a given text was written by a human or by a specific AI. We believe that human and machine writing style patterns are different from each other, so integrating features at different language levels can help in this classification task. For this reason, we evaluate several LLMs that aim to extract valuable multilevel information (such as lexical, semantic, and syntactic) from the text in their training processing. Our best scores on Sub- taskA (monolingual) and SubtaskB were 71.5% and 38.2% in accuracy, respectively (both using the ConvBERT LLM); for both subtasks, the baseline (RoBERTa) achieved an accuracy of 74%.</abstract>
@@ -2204,7 +2204,7 @@
       <author><first>Reza</first><last>Farnia</last><affiliation>Sharif University of Technology</affiliation></author>
       <author><first>Amirreza</first><last>Tarabkhah</last><affiliation>Amirkabir University of Technology</affiliation></author>
       <author><first>Zeinab Sadat</first><last>Taghavi</last><affiliation>Sharif University of Technology</affiliation></author>
-      <author><first>Hossein</first><last>Sameti</last><affiliation>Sharif University of Technology</affiliation></author>
+      <author id="hossein-sameti"><first>Hossein</first><last>Sameti</last><affiliation>Sharif University of Technology</affiliation></author>
       <pages>1148-1154</pages>
       <abstract>Language models, particularly generative models, are susceptible to hallucinations, generating outputs that contradict factual knowledgeor the source text. This study explores methodsfor detecting hallucinations in three SemEval2024 Task 6 tasks: Machine Translation, Definition Modeling, and Paraphrase Generation.We evaluate two methods: semantic similaritybetween the generated text and factual references, and an ensemble of language modelsthat judge each other’s outputs. Our resultsshow that semantic similarity achieves moderate accuracy and correlation scores in trial data,while the ensemble method offers insights intothe complexities of hallucination detection butfalls short of expectations. This work highlights the challenges of hallucination detectionand underscores the need for further researchin this critical area.</abstract>
       <url hash="13c4bb33">2024.semeval-1.167</url>
@@ -2265,7 +2265,7 @@
     <paper id="172">
       <title><fixed-case>EURECOM</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2024 Task 4: Hierarchical Loss and Model Ensembling in Detecting Persuasion Techniques</title>
       <author><first>Youri</first><last>Peskine</last><affiliation>EURECOM</affiliation></author>
-      <author><first>Raphael</first><last>Troncy</last><affiliation>EURECOM</affiliation></author>
+      <author id="raphael-troncy"><first>Raphael</first><last>Troncy</last><affiliation>EURECOM</affiliation></author>
       <author><first>Paolo</first><last>Papotti</last><affiliation>EURECOM</affiliation></author>
       <pages>1177-1182</pages>
       <abstract>This paper describes the submission of team EURECOM at SemEval-2024 Task 4: Multilingual Detection of Persuasion Techniques in Memes. We only tackled the first sub-task, consisting of detecting 20 named persuasion techniques in the textual content of memes. We trained multiple BERT-based models (BERT, RoBERTa, BERT pre-trained on harmful detection) using different losses (Cross Entropy, Binary Cross Entropy, Focal Loss and a custom-made hierarchical loss). The best results were obtained by leveraging the hierarchical nature of the data, by outputting ancestor classes and with a hierarchical loss. Our final submission consist of an ensembling of our top-3 best models for each persuasion techniques. We obtain hierarchical F1 scores of 0.655 (English), 0.345 (Bulgarian), 0.442 (North Macedonian) and 0.178 (Arabic) on the test set.</abstract>
@@ -2308,7 +2308,7 @@
       <author><first>Suyash Vardhan</first><last>Mathur</last><affiliation>IIIT Hyderabad</affiliation></author>
       <author><first>Akshett</first><last>Jindal</last><affiliation>International Institute of Information Technology, Hyderabad</affiliation></author>
       <author><first>Hardik</first><last>Mittal</last><affiliation>International Institute of Information Technology Hyderabad</affiliation></author>
-      <author><first>Manish</first><last>Shrivastava</last><affiliation>International Institute of Information Technology Hyderabad</affiliation></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last><affiliation>International Institute of Information Technology Hyderabad</affiliation></author>
       <pages>1204-1211</pages>
       <abstract>Conversation is the most natural form of human communication, where each utterance can range over a variety of possible emotions. While significant work has been done towards the detection of emotions in text, relatively little work has been done towards finding the cause of the said emotions, especially in multimodal settings. SemEval 2024 introduces the task of Multimodal Emotion Cause Analysis in Conversations, which aims to extract emotions reflected in individual utterances in a conversation involving multiple modalities (textual, audio, and visual modalities) along with the corresponding utterances that were the cause for the emotion. In this paper, we propose models that tackle this task as an utterance labeling and a sequence labeling problem and perform a comparative study of these models, involving baselines using different encoders, using BiLSTM for adding contextual information of the conversation, and finally adding a CRF layer to try to model the inter-dependencies between adjacent utterances more effectively. In the official leaderboard for the task, our architecture was ranked 8th, achieving an F1-score of 0.1759 on the leaderboard.</abstract>
       <url hash="f9ba6381">2024.semeval-1.175</url>
@@ -2321,7 +2321,7 @@
       <title><fixed-case>D</fixed-case>a<fixed-case>V</fixed-case>inci at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2024 Task 9: Few-shot prompting <fixed-case>GPT</fixed-case>-3.5 for Unconventional Reasoning</title>
       <author><first>Suyash Vardhan</first><last>Mathur</last><affiliation>IIIT Hyderabad</affiliation></author>
       <author><first>Akshett</first><last>Jindal</last><affiliation>International Institute of Information Technology, Hyderabad</affiliation></author>
-      <author><first>Manish</first><last>Shrivastava</last><affiliation>International Institute of Information Technology Hyderabad</affiliation></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last><affiliation>International Institute of Information Technology Hyderabad</affiliation></author>
       <pages>1212-1216</pages>
       <abstract>While significant work has been done in the field of NLP on vertical thinking, which involves primarily logical thinking, little work has been done towards lateral thinking, which involves looking at problems from an unconventional perspective defying existing conceptions and notions. Towards this direction, SemEval 2024 introduces the task of BRAINTEASER, which involves two types of questions – Sentence Puzzle and Word Puzzle that defy conventional common-sense reasoning and constraints. In this paper, we tackle both the questions using few-shot prompting on GPT-3.5 and gain insights regarding the difference in the nature of the two types of questions. Our prompting strategy placed us 26th on the leaderboard for the Sentence Puzzle and 15th on the Word Puzzle task.</abstract>
       <url hash="a9ef200b">2024.semeval-1.176</url>
@@ -2388,7 +2388,7 @@
       <title><fixed-case>F</fixed-case>t<fixed-case>G</fixed-case>-<fixed-case>C</fixed-case>o<fixed-case>T</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2024 Task 9: Solving Sentence Puzzles Using Fine-Tuned Language Models and Zero-Shot <fixed-case>C</fixed-case>o<fixed-case>T</fixed-case> Prompting</title>
       <author><first>Micah</first><last>Zhang</last><affiliation>University of Colorado Boulder</affiliation></author>
       <author><first>Shafiuddin Rehan</first><last>Ahmed</last><affiliation>University of Colorado Boulder</affiliation></author>
-      <author><first>James H.</first><last>Martin</last><affiliation>University of Colorado Boulder</affiliation></author>
+      <author id="james-h-martin"><first>James H.</first><last>Martin</last><affiliation>University of Colorado Boulder</affiliation></author>
       <pages>1245-1251</pages>
       <abstract>Recent large language models (LLMs) can solve puzzles that require creativity and lateral thinking. To advance this front of research, we tackle SemEval-2024 Task 9: BRAINTEASER: A Novel Task Defying Common Sense. We approach this task by introducing a technique that we call Fine-tuned Generated Chain-of-Thought (FtG-CoT). It is a novel few-shot prompting method that combines a fine-tuned BERT classifier encoder with zero-shot chain-of-thought generation and a fine-tuned LLM. The fine-tuned BERT classifier provides a context-rich encoding of each example question and choice list. Zero-shot chain-of-thought generation leverages the benefits of chain-of-thought prompting without requiring manual creation of the reasoning chains. We fine-tune the LLM on the generated chains-of-thought and include a set of generated reasoning chains in the final few-shot LLM prompt to maximize the relevance and correctness of the final generated response. In this paper, we show that FtG-CoT outperforms the zero-shot prompting baseline presented in the task paper and is highly effective at solving challenging sentence puzzles achieving a perfect score on the practice set and a 0.9 score on the evaluation set.</abstract>
       <url hash="2a9d1aa1">2024.semeval-1.181</url>
@@ -2506,7 +2506,7 @@
       <author><first>Xin</first><last>Zou</last><affiliation>Dalian University of Technology</affiliation></author>
       <author><first>Junlong</first><last>Wang</last><affiliation>Dalian University of Technology</affiliation></author>
       <author><first>Peng</first><last>Chen</last><affiliation>Dalian University of Technology</affiliation></author>
-      <author id="jian-wang"><first>Jian</first><last>Wang</last><affiliation>Dalian University of Technology</affiliation></author>
+      <author><first>Jian</first><last>Wang</last><affiliation>Dalian University of Technology</affiliation></author>
       <author><first>Liang</first><last>Yang</last><affiliation>Dalian University of Technology</affiliation></author>
       <author><first>Hongfei</first><last>Lin</last><affiliation>Dalian University of Technology</affiliation></author>
       <pages>1315-1321</pages>
@@ -2521,7 +2521,7 @@
       <title><fixed-case>H</fixed-case>a<fixed-case>RM</fixed-case>o<fixed-case>NEE</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2024 Task 6: Tuning-based Approaches to Hallucination Recognition</title>
       <author><first>Timothy</first><last>Obiso</last><affiliation>Brandeis University</affiliation></author>
       <author><first>Jingxuan</first><last>Tu</last><affiliation>Brandeis University</affiliation></author>
-      <author><first>James</first><last>Pustejovsky</last><affiliation>Brandeis University</affiliation></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last><affiliation>Brandeis University</affiliation></author>
       <pages>1322-1331</pages>
       <abstract>This paper presents the Hallucination Recognition Model for New Experiment Evaluation (HaRMoNEE) team’s winning (#1) and #10 submissions for SemEval-2024 Task 6: Shared- task on Hallucinations and Related Observable Overgeneration Mistakes (SHROOM)’s two subtasks. This task challenged its participants to design systems to detect hallucinations in Large Language Model (LLM) outputs. Team HaRMoNEE proposes two architectures: (1) fine-tuning an off-the-shelf transformer-based model and (2) prompt tuning large-scale Large Language Models (LLMs). One submission from the fine-tuning approach outperformed all other submissions for the model-aware subtask; one submission from the prompt-tuning approach is the 10th-best submission on the leaderboard for the model-agnostic subtask. Our systems also include pre-processing, system-specific tuning, post-processing, and evaluation.</abstract>
       <url hash="ef90580a">2024.semeval-1.191</url>
@@ -2591,7 +2591,7 @@
     </paper>
     <paper id="196">
       <title><fixed-case>M</fixed-case>ason<fixed-case>T</fixed-case>igers at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2024 Task 9: Solving Puzzles with an Ensemble of Chain-of-Thought Prompts</title>
-      <author><first>Nishat</first><last>Raihan</last><affiliation>George Mason University</affiliation></author>
+      <author id="nishat-raihan"><first>Nishat</first><last>Raihan</last><affiliation>George Mason University</affiliation></author>
       <author><first>Dhiman</first><last>Goswami</last><affiliation>George Mason University</affiliation></author>
       <author><first>Al Nahian</first><last>Bin Emran</last><affiliation>George Mason University</affiliation></author>
       <author><first>Sadiya Sayara Chowdhury</first><last>Puspo</last><affiliation>George Mason University</affiliation></author>
@@ -2609,11 +2609,11 @@
     <paper id="197">
       <title><fixed-case>M</fixed-case>ason<fixed-case>T</fixed-case>igers at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2024 Task 8: Performance Analysis of Transformer-based Models on Machine-Generated Text Detection</title>
       <author><first>Sadiya Sayara Chowdhury</first><last>Puspo</last><affiliation>George Mason University</affiliation></author>
-      <author><first>Nishat</first><last>Raihan</last><affiliation>George Mason University</affiliation></author>
+      <author id="nishat-raihan"><first>Nishat</first><last>Raihan</last><affiliation>George Mason University</affiliation></author>
       <author><first>Dhiman</first><last>Goswami</last><affiliation>George Mason University</affiliation></author>
       <author><first>Al Nahian</first><last>Bin Emran</last><affiliation>George Mason University</affiliation></author>
       <author><first>Amrita</first><last>Ganguly</last><affiliation>George Mason University</affiliation></author>
-      <author><first>Özlem</first><last>Uzuner</last><affiliation>George Mason University</affiliation></author>
+      <author id="ozlem-uzuner"><first>Özlem</first><last>Uzuner</last><affiliation>George Mason University</affiliation></author>
       <pages>1364-1372</pages>
       <abstract>This paper presents the MasonTigers entryto the SemEval-2024 Task 8 - Multigenerator, Multidomain, and Multilingual BlackBox Machine-Generated Text Detection. Thetask encompasses Binary Human-Written vs.Machine-Generated Text Classification (TrackA), Multi-Way Machine-Generated Text Classification (Track B), and Human-Machine MixedText Detection (Track C). Our best performing approaches utilize mainly the ensemble ofdiscriminator transformer models along withsentence transformer and statistical machinelearning approaches in specific cases. Moreover, Zero shot prompting and fine-tuning ofFLAN-T5 are used for Track A and B.</abstract>
       <url hash="e073375a">2024.semeval-1.197</url>
@@ -2639,7 +2639,7 @@
       <title><fixed-case>M</fixed-case>ason<fixed-case>T</fixed-case>igers at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2024 Task 1: An Ensemble Approach for Semantic Textual Relatedness</title>
       <author><first>Dhiman</first><last>Goswami</last><affiliation>George Mason University</affiliation></author>
       <author><first>Sadiya Sayara Chowdhury</first><last>Puspo</last><affiliation>George Mason University</affiliation></author>
-      <author><first>Nishat</first><last>Raihan</last><affiliation>George Mason University</affiliation></author>
+      <author id="nishat-raihan"><first>Nishat</first><last>Raihan</last><affiliation>George Mason University</affiliation></author>
       <author><first>Al Nahian</first><last>Bin Emran</last><affiliation>George Mason University</affiliation></author>
       <author><first>Amrita</first><last>Ganguly</last><affiliation>George Mason University</affiliation></author>
       <author><first>Marcos</first><last>Zampieri</last><affiliation>George Mason University</affiliation></author>
@@ -2702,7 +2702,7 @@
     <paper id="204">
       <title>Pauk at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2024 Task 4: A Neuro-Symbolic Method for Consistent Classification of Propaganda Techniques in Memes</title>
       <author><first>Matt</first><last>Pauk</last><affiliation>University of Colorado - Boulder</affiliation></author>
-      <author><first>Maria Leonor</first><last>Pacheco</last><affiliation>University of Colorado Boulder / Microsoft Research</affiliation></author>
+      <author id="maria-leonor-pacheco"><first>Maria Leonor</first><last>Pacheco</last><affiliation>University of Colorado Boulder / Microsoft Research</affiliation></author>
       <pages>1424-1434</pages>
       <abstract>Memes play a key role in most modern informa-tion campaigns, particularly propaganda cam-paigns. Identifying the persuasive techniquespresent in memes is an important step in de-veloping systems to recognize and curtail pro-paganda. This work presents a framework toidentify the persuasive techniques present inmemes for the SemEval 2024 Task 4, accordingto a hierarchical taxonomy of propaganda tech-niques. The framework involves a knowledgedistillation method, where the base model is acombination of DeBERTa and ResNET usedto classify the text and image, and the teachermodel consists of a group of weakly enforcedlogic rules that promote the hierarchy of per-suasion techniques. The addition of the logicrule layer for knowledge distillation shows im-provement in respecting the hierarchy of thetaxonomy with a slight boost in performance.</abstract>
       <url hash="00338895">2024.semeval-1.204</url>
@@ -2752,7 +2752,7 @@
     <paper id="208">
       <title>Compos Mentis at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val2024 Task6: A Multi-Faceted Role-based Large Language Model Ensemble to Detect Hallucination</title>
       <author><first>Souvik</first><last>Das</last><affiliation>University at Buffalo</affiliation></author>
-      <author><first>Rohini</first><last>Srihari</last><affiliation>University at Buffalo, SUNY</affiliation></author>
+      <author id="rohini-k-srihari"><first>Rohini</first><last>Srihari</last><affiliation>University at Buffalo, SUNY</affiliation></author>
       <pages>1449-1454</pages>
       <abstract>Hallucinations in large language models (LLMs), where they generate fluent but factually incorrect outputs, pose challenges for applications requiring strict truthfulness. This work proposes a multi-faceted approach to detect such hallucinations across various language tasks. We leverage automatic data annotation using a proprietary LLM, fine-tuning of the Mistral-7B-instruct-v0.2 model on annotated and benchmark data, role-based and rationale-based prompting strategies, and an ensemble method combining different model outputs through majority voting. This comprehensive framework aims to improve the robustness and reliability of hallucination detection for LLM generations.</abstract>
       <url hash="b7b751a9">2024.semeval-1.208</url>
@@ -3048,7 +3048,7 @@
       <title>Archimedes-<fixed-case>AUEB</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2024 Task 5: <fixed-case>LLM</fixed-case> explains Civil Procedure</title>
       <author><first>Odysseas</first><last>Chlapanis</last><affiliation>Department of Informatics, Athens University of Economics and Business &amp; Archimedes Unit, Athena Research Center</affiliation></author>
       <author><first>Ion</first><last>Androutsopoulos</last><affiliation>Department of Informatics, Athens University of Economics and Business &amp; Archimedes Unit, Athena Research Center</affiliation></author>
-      <author><first>Dimitrios</first><last>Galanis</last><affiliation>Institute for Language and Speech Processing, Athena Research Center &amp; Archimedes Unit, Athena Research Center</affiliation></author>
+      <author id="dimitrios-galanis"><first>Dimitrios</first><last>Galanis</last><affiliation>Institute for Language and Speech Processing, Athena Research Center &amp; Archimedes Unit, Athena Research Center</affiliation></author>
       <pages>1607-1622</pages>
       <abstract>The SemEval task on Argument Reasoning in Civil Procedure is challenging in that it requires understanding legal concepts and inferring complex arguments. Currently, most Large Language Models (LLM) excelling in the legal realm are principally purposed for classification tasks, hence their reasoning rationale is subject to contention. The approach we advocate involves using a powerful teacher-LLM (ChatGPT) to extend the training dataset with explanations and generate synthetic data. The resulting data are then leveraged to fine-tune a small student-LLM. Contrary to previous work, our explanations are not directly derived from the teacher’s internal knowledge. Instead they are grounded in authentic human analyses, therefore delivering a superior reasoning signal. Additionally, a new ‘mutation’ method generates artificial data instances inspired from existing ones. We are publicly releasing the explanations as an extension to the original dataset, along with the synthetic dataset and the prompts that were used to generate both. Our system ranked 15th in the SemEval competition. It outperforms its own teacher and can produce explanations aligned with the original human analyses, as verified by legal experts.</abstract>
       <url hash="eb227b5b">2024.semeval-1.229</url>
@@ -3076,7 +3076,7 @@
       <author><first>Jainit</first><last>Bafna</last><affiliation>The International Institute of Information Technology - Hyderabad</affiliation></author>
       <author><first>Hardik</first><last>Mittal</last><affiliation>International Institute of Information Technology Hyderabad</affiliation></author>
       <author><first>Suyash</first><last>Sethia</last><affiliation>The International Institute of Information Technology - Hyderabad</affiliation></author>
-      <author><first>Manish</first><last>Shrivastava</last><affiliation>International Institute of Information Technology Hyderabad</affiliation></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last><affiliation>International Institute of Information Technology Hyderabad</affiliation></author>
       <author><first>Radhika</first><last>Mamidi</last><affiliation>Language Technologies Research Centre, IIIT Hyderabad</affiliation></author>
       <pages>1627-1633</pages>
       <abstract>Large Language Models (LLMs) have showcased impressive abilities in generating fluent responses to diverse user queries. However, concerns regarding the potential misuse ofsuch texts in journalism, educational, and academic contexts have surfaced. SemEval 2024introduces the task of Multigenerator, Multidomain, and Multilingual Black-Box MachineGenerated Text Detection, aiming to developautomated systems for identifying machinegenerated text and detecting potential misuse. In this paper, we i) propose a RoBERTaBiLSTM based classifier designed to classifytext into two categories: AI-generated or human ii) conduct a comparative study of ourmodel with baseline approaches to evaluate itseffectiveness. This paper contributes to the advancement of automatic text detection systemsin addressing the challenges posed by machinegenerated text misuse. Our architecture ranked46th on the official leaderboard with an accuracy of 80.83 among 125.</abstract>
@@ -3223,7 +3223,7 @@
       <title>Maha Bhaashya at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2024 Task 6: Zero-Shot Multi-task Hallucination Detection</title>
       <author><first>Patanjali</first><last>Bhamidipati</last><affiliation>International Institute of Information Technology Hyderabad</affiliation></author>
       <author><first>Advaith</first><last>Malladi</last><affiliation>International Institute of Information Technology, Hyderabad</affiliation></author>
-      <author><first>Manish</first><last>Shrivastava</last><affiliation>International Institute of Information Technology Hyderabad</affiliation></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last><affiliation>International Institute of Information Technology Hyderabad</affiliation></author>
       <author><first>Radhika</first><last>Mamidi</last><affiliation>Language Technologies Research Centre, IIIT Hyderabad</affiliation></author>
       <pages>1685-1689</pages>
       <abstract>In recent studies, the extensive utilization oflarge language models has underscored the importance of robust evaluation methodologiesfor assessing text generation quality and relevance to specific tasks. This has revealeda prevalent issue known as hallucination, anemergent condition in the model where generated text lacks faithfulness to the source anddeviates from the evaluation criteria. In thisstudy, we formally define hallucination and propose a framework for its quantitative detectionin a zero-shot setting, leveraging our definitionand the assumption that model outputs entailtask and sample specific inputs. In detectinghallucinations, our solution achieves an accuracy of 0.78 in a model-aware setting and 0.61in a model-agnostic setting. Notably, our solution maintains computational efficiency, requiring far less computational resources than other SOTA approaches, aligning with the trendtowards lightweight and compressed models.</abstract>
@@ -3387,7 +3387,7 @@
     </paper>
     <paper id="253">
       <title><fixed-case>HIT</fixed-case>-<fixed-case>MI</fixed-case>&amp;<fixed-case>T</fixed-case> Lab at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2024 Task 6: <fixed-case>D</fixed-case>e<fixed-case>BERT</fixed-case>a-based Entailment Model is a Reliable Hallucination Detector</title>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last><affiliation>Harbin Institute of Technology</affiliation></author>
+      <author><first>Wei</first><last>Liu</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <author><first>Wanyao</first><last>Shi</last><affiliation>Northwest Normal University</affiliation></author>
       <author><first>Zijian</first><last>Zhang</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <author><first>Hui</first><last>Huang</last><affiliation>Harbin Institute of Technology</affiliation></author>
@@ -3485,7 +3485,7 @@
       <title><fixed-case>M</fixed-case>ai<fixed-case>NLP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2024 Task 1: Analyzing Source Language Selection in Cross-Lingual Textual Relatedness</title>
       <author><first>Shijia</first><last>Zhou</last><affiliation>Ludwig Maximilian University of Munich</affiliation></author>
       <author><first>Huangyan</first><last>Shan</last><affiliation>LMU Munich</affiliation></author>
-      <author><first>Barbara</first><last>Plank</last><affiliation>LMU Munich</affiliation></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last><affiliation>LMU Munich</affiliation></author>
       <author><first>Robert</first><last>Litschko</last><affiliation>LMU Munich</affiliation></author>
       <pages>1842-1853</pages>
       <abstract>This paper presents our system developed for the SemEval-2024 Task 1: Semantic Textual Relatedness (STR), on Track C: Cross-lingual. The task aims to detect semantic relatedness of two sentences from the same languages. For cross-lingual approach we developed a set of linguistics-inspired models trained with several task-specific strategies. We 1) utilize language vectors for selection of donor languages; 2) investigate the multi-source approach for training; 3) use transliteration of non-latin script to study impact of “script gap”; 4) opt machine translation for data augmentation. We additionally compare the performance of XLM-RoBERTa and Furina with the same training strategy. Our submission achieved the first place in the C8 (Kinyarwanda) test.</abstract>
@@ -3564,7 +3564,7 @@
       <author><first>Giwon</first><last>Hong</last><affiliation>KAIST School of Computing</affiliation></author>
       <author><first>Pasquale</first><last>Minervini</last><affiliation>UCL</affiliation></author>
       <author><first>Luke</first><last>Daines</last><affiliation>Usher Institute, University of Edinburgh</affiliation></author>
-      <author><first>Beatrice</first><last>Alex</last><affiliation>University of Edinburgh, Edinburgh Futures Institute, School of Literatures, Languages and Cultures, School of Informatics</affiliation></author>
+      <author id="beatrice-alex"><first>Beatrice</first><last>Alex</last><affiliation>University of Edinburgh, Edinburgh Futures Institute, School of Literatures, Languages and Cultures, School of Informatics</affiliation></author>
       <pages>1894-1904</pages>
       <abstract>The NLI4CT task assesses Natural Language Inference systems in predicting whether hypotheses entail or contradict evidence from Clinical Trial Reports. In this study, we evaluate various Large Language Models (LLMs) with multiple strategies, including Chain-of-Thought, In-Context Learning, and Parameter-Efficient Fine-Tuning (PEFT). We propose a PEFT method to improve the consistency of LLMs by merging adapters that were fine-tuned separately using triplet and language modelling objectives. We found that merging the two PEFT adapters improves the F1 score (+0.0346) and consistency (+0.152) of the LLMs. However, our novel methods did not produce more accurate results than GPT-4 in terms of faithfulness and consistency. Averaging the three metrics, GPT-4 ranks joint-first in the competition with 0.8328. Finally, our contamination analysis with GPT-4 indicates that there was no test data leakage. Our code is available at https://github.com/EdinburghClinicalNLP/semeval_nli4ct.</abstract>
       <url hash="5b123c92">2024.semeval-1.265</url>
@@ -3644,7 +3644,7 @@
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2024 Task 2: Safe Biomedical Natural Language Inference for Clinical Trials</title>
       <author><first>Mael</first><last>Jullien</last><affiliation>university of Manchester</affiliation></author>
       <author><first>Marco</first><last>Valentino</last><affiliation>Idiap Research Institute</affiliation></author>
-      <author><first>André</first><last>Freitas</last><affiliation>University of Manchester</affiliation></author>
+      <author id="andre-freitas"><first>André</first><last>Freitas</last><affiliation>University of Manchester</affiliation></author>
       <pages>1947-1962</pages>
       <abstract>Large Language Models (LLMs) are at the forefront of NLP achievements but fall short in dealing with shortcut learning, factual inconsistency, and vulnerability to adversarial inputs. These shortcomings are especially critical in medical contexts, where they can misrepresent actual model capabilities. Addressing this, we present SemEval-2024 Task 2: Safe Biomedical Natural Language Inference for Clinical Trials. Our contributions include the refined NLI4CT-P dataset (i.e. Natural Language Inference for Clinical Trials - Perturbed), designed to challenge LLMs with interventional and causal reasoning tasks, along with a comprehensive evaluation of methods and results for participant submissions. A total of 106 participants registered for the task contributing to over 1200 individual submissions and 25 system overview papers. This initiative aims to advance the robustness and applicability of NLI models in healthcare, ensuring safer and more dependable AI assistance in clinical decision-making. We anticipate that the dataset, models, and outcomes of this task can support future research in the field of biomedical NLI. The dataset, competition leaderboard, and website are publicly available.</abstract>
       <url hash="f83d7501">2024.semeval-1.271</url>
@@ -3666,12 +3666,12 @@
       <author><first>Meriem</first><last>Beloucif</last><affiliation>Uppsala University</affiliation></author>
       <author><first>Christine</first><last>De Kock</last><affiliation>University of Melbourne</affiliation></author>
       <author><first>Oumaima</first><last>Hourrane</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <author><first>Thamar</first><last>Solorio</last></author>
       <author><first>Nirmal</first><last>Surange</last></author>
       <author><first>Krishnapriya</first><last>Vishnubhotla</last></author>
       <author><first>Seid Muhie</first><last>Yimam</last></author>
-      <author><first>Saif M.</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif M.</first><last>Mohammad</last></author>
       <pages>1963-1978</pages>
       <abstract>We present the first shared task on Semantic Textual Relatedness (STR). While earlier shared tasks primarily focused on semantic similarity, we instead investigate the broader phenomenon of semantic relatedness across 14 languages: Afrikaans, Algerian Arabic, Amharic, English, Hausa, Hindi, Indonesian, Kinyarwanda, Marathi, Moroccan Arabic, Modern Standard Arabic, Punjabi, Spanish, and Telugu. These languages originate from five distinct language families and are predominantly spoken in Africa and Asia – regions characterised by the relatively limited availability of NLP resources. Each instance in the datasets is a sentence pair associated with a score that represents the degree of semantic textual relatedness between the two sentences. Participating systems were asked to rank sentence pairs by their closeness in meaning (i.e., their degree of semantic relatedness) in the 14 languages in three main tracks: (a) supervised, (b) unsupervised, and (c) crosslingual. The task attracted 163 participants. We received 70 submissions in total (across all tasks) from 51 different teams, and 38 system description papers. We report on the best-performing systems as well as the most common and the most effective approaches for the three different tracks.</abstract>
       <url hash="f030e3b7">2024.semeval-1.272</url>
@@ -3686,7 +3686,7 @@
       <author><first>Elaine</first><last>Zosa</last><affiliation>SiloGen</affiliation></author>
       <author><first>Raul</first><last>Vazquez</last><affiliation>University of Helsinki</affiliation></author>
       <author><first>Teemu</first><last>Vahtola</last><affiliation>University of Helsinki</affiliation></author>
-      <author><first>Jörg</first><last>Tiedemann</last><affiliation>University of Helsinki</affiliation></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last><affiliation>University of Helsinki</affiliation></author>
       <author><first>Vincent</first><last>Segonne</last><affiliation>IRISA - UniversitÃ© Bretagne Sud</affiliation></author>
       <author><first>Alessandro</first><last>Raganato</last><affiliation>University of Milano-Bicocca</affiliation></author>
       <author><first>Marianna</first><last>Apidianaki</last><affiliation>University of Pennsylvania</affiliation></author>
@@ -3719,7 +3719,7 @@
       <author><first>Maram</first><last>Hasanain</last><affiliation>Qatar Computing Research Institute</affiliation></author>
       <author><first>Abul</first><last>Hasnat</last><affiliation>Blackbird.ai</affiliation></author>
       <author><first>Fabrizio</first><last>Silvestri</last><affiliation>Sapienza, University of Rome</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <author><first>Giovanni</first><last>Da San Martino</last><affiliation>University of Padova</affiliation></author>
       <pages>2009-2026</pages>
       <abstract>The automatic identification of misleading and persuasive content has emerged as a significant issue among various stakeholders, including social media platforms, policymakers, and the broader society. To tackle this issue within the context of memes, we organized a shared task at SemEval-2024, focusing on the multilingual detection of persuasion techniques. This paper outlines the dataset, the organization of the task, the evaluation framework, the outcomes, and the systems that participated. The task targets memes in four languages, with the inclusion of three surprise test datasets in Bulgarian, North Macedonian, and Arabic. It encompasses three subtasks: (i) identifying whether a meme utilizes a persuasion technique; (ii) identifying persuasion techniques within the meme’s ”textual content”; and (iii) identifying persuasion techniques across both the textual and visual components of the meme (a multimodal task). Furthermore, due to the complex nature of persuasion techniques, we present a hierarchy that groups the 22 persuasion techniques into several levels of categories. This became one of the attractive shared tasks in SemEval 2024, with 153 teams registered, 48 teams submitting results, and finally, 32 system description papers submitted.</abstract>
@@ -3762,7 +3762,7 @@
     <paper id="278">
       <title><fixed-case>S</fixed-case>heffield<fixed-case>V</fixed-case>era<fixed-case>AI</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2024 Task 4: Prompting and fine-tuning a Large Vision-Language Model for Binary Classification of Persuasion Techniques in Memes</title>
       <author><first>Charlie</first><last>Grimshaw</last><affiliation>University of Sheffield</affiliation></author>
-      <author><first>Kalina</first><last>Bontcheva</last><affiliation>University of Sheffield</affiliation></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last><affiliation>University of Sheffield</affiliation></author>
       <author><first>Xingyi</first><last>Song</last><affiliation>University of Sheffield</affiliation></author>
       <pages>2051-2056</pages>
       <abstract>This paper describes our approach for SemEval-2024 Task 4: Multilingual Detection of Persuasion Techniques in Memes. Specifically, we concentrate on Subtask 2b, a binary classification challenge that entails categorizing memes as either “propagandistic” or “non-propagandistic”. To address this task, we utilized the large multimodal pretrained model, LLaVa. We explored various prompting strategies and fine-tuning methods, and observed that the model, when not fine-tuned but provided with a few-shot learning examples, achieved the best performance. Additionally, we enhanced the model’s multilingual capabilities by integrating a machine translation model. Our system secured the 2nd place in the Arabic language category.</abstract>
diff --git a/data/xml/2024.sicon.xml b/data/xml/2024.sicon.xml
index 5f76bb9567..53c366754c 100644
--- a/data/xml/2024.sicon.xml
+++ b/data/xml/2024.sicon.xml
@@ -95,7 +95,7 @@
       <author><first>Zoey</first><last>Liu</last><affiliation>University of Florida</affiliation></author>
       <author><first>Sangpil</first><last>Youm</last><affiliation>University of Florida</affiliation></author>
       <author><first>Chathuri</first><last>Jayaweera</last><affiliation>University of Florida</affiliation></author>
-      <author><first>Bonnie J.</first><last>Dorr</last><affiliation>University of Florida</affiliation></author>
+      <author id="bonnie-dorr"><first>Bonnie J.</first><last>Dorr</last><affiliation>University of Florida</affiliation></author>
       <pages>102-115</pages>
       <abstract>The unchecked spread of digital information, combined with increasing political polarization and the tendency of individuals to isolate themselves from opposing political viewpoints opposing views, has driven researchers to develop systems for automatically detecting political bias in media. This trend has been further fueled by discussions on social media. We explore methods for categorizing bias in US news articles, comparing rule-based and deep learning approaches. The study highlights the sensitivity of modern self-learning systems to unconstrained data ingestion, while reconsidering the strengths of traditional rule-based systems. Applying both models to left-leaning (CNN) and right-leaning (FOX) News articles, we assess their effectiveness on data beyond the original training and test sets. This analysis highlights each model’s accuracy, offers a framework for exploring deep-learning explainability, and sheds light on political bias in US news media. We contrast the opaque architecture of a deep learning model with the transparency of a linguistically informed rule-based model, showing that the rule-based model performs consistently across different data conditions and offers greater transparency, whereas the deep learning model is dependent on the training set and struggles with unseen data.</abstract>
       <url hash="46566c92">2024.sicon-1.7</url>
@@ -117,7 +117,7 @@
       <author><first>Ian</first><last>Perera</last><affiliation>Florida Institute for Human and Machine Cognition</affiliation></author>
       <author><first>Alex</first><last>Memory</last><affiliation>Johns Hopkins University Applied Physics Laboratory</affiliation></author>
       <author><first>Vera A.</first><last>Kazakova</last><affiliation>Florida Institute for Human and Machine Cognition</affiliation></author>
-      <author><first>Bonnie J.</first><last>Dorr</last><affiliation>University of Florida</affiliation></author>
+      <author id="bonnie-dorr"><first>Bonnie J.</first><last>Dorr</last><affiliation>University of Florida</affiliation></author>
       <author><first>Brodie</first><last>Mather</last><affiliation>Florida Institute for Human and Machine Cognition</affiliation></author>
       <author><first>Ritwik</first><last>Bose</last><affiliation>Johns Hopkins University Applied Physics Laboratory</affiliation></author>
       <author><first>Arash</first><last>Mahyari</last><affiliation>Florida Institute for Human and Machine Cognition</affiliation></author>
diff --git a/data/xml/2024.sigdial.xml b/data/xml/2024.sigdial.xml
index 08d6b218a0..e6a70b0eb9 100644
--- a/data/xml/2024.sigdial.xml
+++ b/data/xml/2024.sigdial.xml
@@ -8,7 +8,7 @@
       <editor><first>Stefan</first><last>Ultes</last></editor>
       <editor><first>Koji</first><last>Inoue</last></editor>
       <editor><first>Shikib</first><last>Mehri</last></editor>
-      <editor><first>David</first><last>Howcroft</last></editor>
+      <editor id="david-m-howcroft"><first>David</first><last>Howcroft</last></editor>
       <editor><first>Kazunori</first><last>Komatani</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Kyoto, Japan</address>
@@ -59,7 +59,7 @@
       <title>Examining Gender and Power on <fixed-case>W</fixed-case>ikipedia through Face and Politeness</title>
       <author><first>Adil</first><last>Soubki</last></author>
       <author><first>Shyne E.</first><last>Choi</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>40–50</pages>
       <abstract>We propose a framework for analyzing discourse by combining two interdependent concepts from sociolinguistic theory: face acts and politeness. While politeness has robust existing tools and data, face acts are less resourced. We introduce a new corpus created by annotating Wikipedia talk pages with face acts and we use this to train a face act tagger. We then employ our framework to study how face and politeness interact with gender and power in discussions between Wikipedia editors. Among other findings, we observe that female Wikipedians are not only more polite, which is consistent with prior studies, but that this difference corresponds with significantly more language directed at humbling aspects of their own face. Interestingly, the distinction nearly vanishes once limiting to editors with administrative power.</abstract>
       <url hash="2a2d783b">2024.sigdial-1.4</url>
@@ -88,8 +88,8 @@
       <author><first>Nishi</first><last>Uppuluri</last></author>
       <author><first>Revanth</first><last>Gangi Reddy</last></author>
       <author><first>Sha</first><last>Li</last></author>
-      <author><first>Gokhan</first><last>Tur</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last></author>
+      <author id="gokhan-tur"><first>Gokhan</first><last>Tur</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></author>
       <author><first>Heng</first><last>Ji</last></author>
       <pages>66–77</pages>
       <abstract>LLM-driven dialog systems are used in a diverse set of applications, ranging from healthcare to customer service. However, given their generalization capability, it is difficult to ensure that these chatbots stay within the boundaries of the specialized domains, potentially resulting in inaccurate information and irrelevant responses. This paper introduces an unsupervised approach for automatically inducing domain-specific dialog flows that can be used to constrain LLM-based chatbots. We introduce two variants of dialog flow based on the availability of in-domain conversation instances. Through human and automatic evaluation over 24 dialog domains, we demonstrate that our high-quality data-guided dialog flows achieve better domain coverage, thereby overcoming the need for extensive manual crafting of such flows.</abstract>
@@ -104,7 +104,7 @@
       <author><first>Neha</first><last>Pullabhotla</last></author>
       <author><first>Nan</first><last>Qiang</last></author>
       <author><first>Haoran</first><last>Zhang</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <author><first>Maria Ines</first><last>Torres</last></author>
       <pages>78–91</pages>
       <abstract>Open domain spoken dialogue systems need to controllably generate many different dialogue acts (DAs) to allow Natural Language Generation (NLG) to create interesting and engaging conversational interactions with users. We aim to create an NLG engine that can produce a variety of DAs that make substantive knowledge-grounded contributions to a conversation. Training such an NLG typically requires dialogue corpora that are labelled for DAs, which are expensive to produce and vulnerable to quality issues. Here, we present a prompt-based learning approach to transfer DAs from one domain, video games, to 7 new domains. For each novel domain, we first crawl WikiData to create Meaning Representations that systematically vary both the number of attributes and hops on the WikiData Knowledge Graph. The proposed method involves a self-training step to create prompt examples for each domain followed by an overgeneration and ranking step. The result is a novel, high-quality dataset, Wiki-Dialogue, of 71K knowledge-grounded utterances, covering 9 DAs and the Art, Movies, Music, Sports, TV, Animal, and Boardgames domains, whose combined DA and semantic accuracy is 89%. We assess the corpus quality using both automatic and human evaluations and find it high. The corpus is found to be safe, lexically rich, and large in vocabulary, when compared to similar datasets.</abstract>
@@ -134,7 +134,7 @@
     </paper>
     <paper id="9">
       <title>Anticipating Follow-Up Questions in Exploratory Information Search</title>
-      <author><first>Graham</first><last>Wilcock</last></author>
+      <author id="graham-wilcock"><first>Graham</first><last>Wilcock</last></author>
       <pages>103–109</pages>
       <abstract>The paper describes methods for anticipating follow-up questions in exploratory information search. There are two main cases: information stored in knowledge graphs, and information in unstructured texts such as Wikipedia. In the first case, follow-up questions are anticipated by extracting subgraphs relevant to user queries, passing the subgraphs to an LLM to generate responses. In the second case, entities and their relationships are extracted from the texts and added to short-term knowledge graphs relevant to initial queries. Follow-up questions are then anticipated by extracting subgraphs relevant to subsequent queries and passing the subgraphs to the LLM, as in the first case. The short-term graphs in dialogue memory are often sufficient to answer follow-up questions. If they are not, the described steps are repeated as required.</abstract>
       <url hash="06f57771">2024.sigdial-1.9</url>
@@ -145,7 +145,7 @@
       <title>Bridging Information Gaps in Dialogues with Grounded Exchanges Using Knowledge Graphs</title>
       <author><first>Phillip</first><last>Schneider</last></author>
       <author><first>Nektarios</first><last>Machner</last></author>
-      <author><first>Kristiina</first><last>Jokinen</last></author>
+      <author id="kristiina-jokinen"><first>Kristiina</first><last>Jokinen</last></author>
       <author><first>Florian</first><last>Matthes</last></author>
       <pages>110–120</pages>
       <abstract>Knowledge models are fundamental to dialogue systems for enabling conversational interactions, which require handling domain-specific knowledge. Ensuring effective communication in information-providing conversations entails aligning user understanding with the knowledge available to the system. However, dialogue systems often face challenges arising from semantic inconsistencies in how information is expressed in natural language compared to how it is represented within the system’s internal knowledge. To address this problem, we study the potential of large language models for conversational grounding, a mechanism to bridge information gaps by establishing shared knowledge between dialogue participants. Our approach involves annotating human conversations across five knowledge domains to create a new dialogue corpus called BridgeKG. Through a series of experiments on this dataset, we empirically evaluate the capabilities of large language models in classifying grounding acts and identifying grounded information items within a knowledge graph structure. Our findings offer insights into how these models use in-context learning for conversational grounding tasks and common prediction errors, which we illustrate with examples from challenging dialogues. We discuss how the models handle knowledge graphs as a semantic layer between unstructured dialogue utterances and structured information items.</abstract>
@@ -158,8 +158,8 @@
       <author><first>E. Margaret</first><last>Perkoff</last></author>
       <author><first>Angela Maria</first><last>Ramirez</last></author>
       <author><first>Sean</first><last>von Bayern</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
-      <author><first>James</first><last>Martin</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
+      <author id="james-h-martin"><first>James</first><last>Martin</last></author>
       <pages>121–138</pages>
       <abstract>Educational dialogue systems have been used to support students and teachers for decades. Such systems rely on explicit pedagogically motivated dialogue rules. With the ease of integrating large language models (LLMs) into dialogue systems, applications have been arising that directly use model responses without the use of human-written rules, raising concerns about their use in classroom settings. Here, we explore how to constrain LLM outputs to generate appropriate and supportive teacher-like responses. We present results comparing the effectiveness of different constraint variations in a zero-shot prompting setting on a large mathematics classroom corpus. Generated outputs are evaluated with human annotation for Fluency, Relevance, Helpfulness, and Adherence to the provided constraints. Including all constraints in the prompt led to the highest values for Fluency and Helpfulness, and the second highest value for Relevance. The annotation results also demonstrate that the prompts that result in the highest adherence to constraints do not necessarily indicate higher perceived scores for Fluency, Relevance, or Helpfulness. In a direct comparison, all of the non-baseline LLM responses were ranked higher than the actual teacher responses in the corpus over 50% of the time.</abstract>
       <url hash="b362f308">2024.sigdial-1.11</url>
@@ -233,8 +233,8 @@
       <author><first>Amie</first><last>Paige</last></author>
       <author><first>Adil</first><last>Soubki</last></author>
       <author><first>John</first><last>Murzaku</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
-      <author><first>Susan E.</first><last>Brennan</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
+      <author id="susan-e-brennan"><first>Susan E.</first><last>Brennan</last></author>
       <pages>204–215</pages>
       <abstract>Hedges allow speakers to mark utterances as provisional, whether to signal non-prototypicality or “fuzziness”, to indicate a lack of commitment to an utterance, to attribute responsibility for a statement to someone else, to invite input from a partner, or to soften critical feedback in the service of face management needs. Here we focus on hedges in an experimentally parameterized corpus of 63 Roadrunner cartoon narratives spontaneously produced from memory by 21 speakers for co-present addressees, transcribed to text (Galati and Brennan, 2010). We created a gold standard of hedges annotated by human coders (the Roadrunner-Hedge corpus) and compared three LLM-based approaches for hedge detection: fine-tuning BERT, and zero and few-shot prompting with GPT-4o and LLaMA-3. The best-performing approach was a fine-tuned BERT model, followed by few-shot GPT-4o. After an error analysis on the top performing approaches, we used an LLM-in-the-Loop approach to improve the gold standard coding, as well as to highlight cases in which hedges are ambiguous in linguistically interesting ways that will guide future research. This is the first step in our research program to train LLMs to interpret and generate collateral signals appropriately and meaningfully in conversation.</abstract>
       <url hash="72e38420">2024.sigdial-1.18</url>
@@ -292,8 +292,8 @@
       <author><first>Guangzhi</first><last>Sun</last></author>
       <author><first>Nurul</first><last>Lubis</last></author>
       <author><first>Wen</first><last>Wu</last></author>
-      <author><first>Chao</first><last>Zhang</last></author>
-      <author><first>Milica</first><last>Gasic</last></author>
+      <author id="chao-zhang-tu"><first>Chao</first><last>Zhang</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gasic</last></author>
       <pages>259–273</pages>
       <abstract>Affect recognition, encompassing emotions, moods, and feelings, plays a pivotal role in human communication. In the realm of conversational artificial intelligence, the ability to discern and respond to human affective cues is a critical factor for creating engaging and empathetic interactions. This study investigates the capacity of large language models (LLMs) to recognise human affect in conversations, with a focus on both open-domain chit-chat dialogues and task-oriented dialogues. Leveraging three diverse datasets, namely IEMOCAP (Busso et al., 2008), EmoWOZ (Feng et al., 2022), and DAIC-WOZ (Gratch et al., 2014), covering a spectrum of dialogues from casual conversations to clinical interviews, we evaluate and compare LLMs’ performance in affect recognition. Our investigation explores the zero-shot and few-shot capabilities of LLMs through in-context learning as well as their model capacities through task-specific fine-tuning. Additionally, this study takes into account the potential impact of automatic speech recognition errors on LLM predictions. With this work, we aim to shed light on the extent to which LLMs can replicate human-like affect recognition capabilities in conversations.</abstract>
       <url hash="d5573e0b">2024.sigdial-1.23</url>
@@ -306,7 +306,7 @@
       <author><first>Isabel</first><last>Carvalho</last></author>
       <author><first>Ana</first><last>Alves</last></author>
       <author><first>Catarina</first><last>Silva</last></author>
-      <author><first>Hugo Gonçalo</first><last>Oliveira</last></author>
+      <author id="hugo-goncalo-oliveira"><first>Hugo Gonçalo</first><last>Oliveira</last></author>
       <pages>274–288</pages>
       <abstract>Customer-support services increasingly rely on automation, whether fully or with human intervention. Despite optimising resources, this may result in mechanical protocols and lack of human interaction, thus reducing customer loyalty. Our goal is to enhance interpretability and provide guidance in communication through novel tools for easier analysis of message trends and sentiment variations. Monitoring these contributes to more informed decision-making, enabling proactive mitigation of potential issues, such as protocol deviations or customer dissatisfaction. We propose a generic approach for dialogue flow discovery that leverages clustering techniques to identify dialogue states, represented by related utterances. State transitions are further analyzed to detect prevailing sentiments. Hence, we discover sentiment-aware dialogue flows that offer an interpretability layer to artificial agents, even those based on black-boxes, ultimately increasing trustworthiness. Experimental results demonstrate the effectiveness of our approach across different dialogue datasets, covering both human-human and human-machine exchanges, applicable in task-oriented contexts but also to social media, highlighting its potential impact across various customer-support settings.</abstract>
       <url hash="928a2e84">2024.sigdial-1.24</url>
@@ -322,7 +322,7 @@
       <author><first>Xinxuan</first><last>Qiu</last></author>
       <author><first>Yanni</first><last>Lin</last></author>
       <author><first>Matthew</first><last>Purver</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>289–296</pages>
       <abstract>When customers present ambiguous references, service staff typically need to clarify the customers’ specific intentions. To advance research in this area, we collected 1,000 real-world consumer dialogues with ambiguous references. This dataset will be used for subsequent studies to identify ambiguous references and generate responses. Our analysis of the dataset revealed common strategies employed by service staff, including directly asking clarification questions (CQ) and listing possible options before asking a clarification question (LCQ). However, we found that merely using CQ often fails to fully satisfy customers. In contrast, using LCQ, as well as recommending specific products after listing possible options, proved more effective in resolving ambiguous references and enhancing customer satisfaction.</abstract>
       <url hash="91cbc7c0">2024.sigdial-1.25</url>
@@ -345,7 +345,7 @@
       <title>Transforming Slot Schema Induction with Generative Dialogue State Inference</title>
       <author><first>James D.</first><last>Finch</last></author>
       <author><first>Boxin</first><last>Zhao</last></author>
-      <author><first>Jinho D.</first><last>Choi</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
       <pages>317–324</pages>
       <abstract>The challenge of defining a slot schema to represent the state of a task-oriented dialogue system is addressed by Slot Schema Induction (SSI), which aims to automatically induce slots from unlabeled dialogue data. Whereas previous approaches induce slots by clustering value spans extracted directly from the dialogue text, we demonstrate the power of discovering slots using a generative approach. By training a model to generate slot names and values that summarize key dialogue information with no prior task knowledge, our SSI method discovers high-quality candidate information for representing dialogue state. These discovered slot-value candidates can be easily clustered into unified slot schemas that align well with human-authored schemas. Experimental comparisons on the MultiWOZ and SGD datasets demonstrate that Generative Dialogue State Inference (GenDSI) outperforms the previous state-of-the-art on multiple aspects of the SSI task.</abstract>
       <url hash="639d70e1">2024.sigdial-1.27</url>
@@ -376,7 +376,7 @@
       <title>Enhancing Dialogue Speech Recognition with Robust Contextual Awareness via Noise Representation Learning</title>
       <author><first>Wonjun</first><last>Lee</last></author>
       <author><first>San</first><last>Kim</last></author>
-      <author><first>Gary Geunbae</first><last>Lee</last></author>
+      <author id="gary-geunbae-lee"><first>Gary Geunbae</first><last>Lee</last></author>
       <pages>333–343</pages>
       <abstract>Recent dialogue systems typically operate through turn-based spoken interactions between users and agents. These systems heavily depend on accurate Automatic Speech Recognition (ASR), as transcription errors can significantly degrade performance in downstream dialogue tasks. To alleviate this challenge, robust ASR is required, and one effective method is to utilize the dialogue context from user and agent interactions for transcribing the subsequent user utterance. This method incorporates the transcription of the user’s speech and the agent’s response as model input, using the accumulated context generated by each turn. However, this context is susceptible to ASR errors because the ASR model generates it auto-regressively. Such noisy context can further degrade the benefits of context input, resulting in suboptimal ASR performance. In this paper, we introduce context noise representation learning to enhance robustness against noisy context, ultimately improving dialogue speech recognition accuracy. To maximize the advantage of context awareness, our approach involves decoder pre-training with text-based dialogue data and noise representation learning for a context encoder. Evaluated on DSTC11 (MultiWoZ 2.1 audio dialogues), it achieves a 24% relative reduction in Word Error Rate (WER) compared to wav2vec2.0 baselines and a 13% reduction compared to Whisper-large-v2. Notably, in noisy environments where user speech is barely audible, our method proves its effectiveness by utilizing contextual information for accurate transcription. Tested on audio data with strong noise level (Signal Noise Ratio of 0dB), our approach shows up to a 31% relative WER reduction compared to the wav2vec2.0 baseline, providing a reassuring solution for real-world noisy scenarios.</abstract>
       <url hash="4287c2d1">2024.sigdial-1.30</url>
@@ -385,14 +385,14 @@
     </paper>
     <paper id="31">
       <title>Local Topology Measures of Contextual Language Model Latent Spaces with Applications to Dialogue Term Extraction</title>
-      <author><first>Benjamin Matthias</first><last>Ruppik</last></author>
+      <author id="benjamin-matthias-ruppik"><first>Benjamin Matthias</first><last>Ruppik</last></author>
       <author><first>Michael</first><last>Heck</last></author>
       <author><first>Carel</first><last>van Niekerk</last></author>
       <author><first>Renato</first><last>Vukovic</last></author>
       <author><first>Hsien-chin</first><last>Lin</last></author>
       <author><first>Shutong</first><last>Feng</last></author>
       <author><first>Marcus</first><last>Zibrowius</last></author>
-      <author><first>Milica</first><last>Gasic</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gasic</last></author>
       <pages>344–356</pages>
       <abstract>A common approach for sequence tagging tasks based on contextual word representations is to train a machine learning classifier directly on these embedding vectors. This approach has two shortcomings. First, such methods consider single input sequences in isolation and are unable to put an individual embedding vector in relation to vectors outside the current local context of use. Second, the high performance of these models relies on fine-tuning the embedding model in conjunction with the classifier, which may not always be feasible due to the size or inaccessibility of the underlying feature-generation model. It is thus desirable, given a collection of embedding vectors of a corpus, i.e. a datastore, to find features of each vector that describe its relation to other, similar vectors in the datastore. With this in mind, we introduce complexity measures of the local topology of the latent space of a contextual language model with respect to a given datastore. The effectiveness of our features is demonstrated through their application to dialogue term extraction. Our work continues a line of research that explores the manifold hypothesis for word embeddings, demonstrating that local structure in the space carved out by word embeddings can be exploited to infer semantic properties.</abstract>
       <url hash="7b53ed23">2024.sigdial-1.31</url>
@@ -415,10 +415,10 @@
       <author><first>Renato</first><last>Vukovic</last></author>
       <author><first>David</first><last>Arps</last></author>
       <author><first>Carel</first><last>van Niekerk</last></author>
-      <author><first>Benjamin Matthias</first><last>Ruppik</last></author>
+      <author id="benjamin-matthias-ruppik"><first>Benjamin Matthias</first><last>Ruppik</last></author>
       <author><first>Hsien-chin</first><last>Lin</last></author>
       <author><first>Michael</first><last>Heck</last></author>
-      <author><first>Milica</first><last>Gasic</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gasic</last></author>
       <pages>370–384</pages>
       <abstract>State-of-the-art task-oriented dialogue systems typically rely on task-specific ontologies for fulfilling user queries. The majority of task-oriented dialogue data, such as customer service recordings, comes without ontology and annotation. Such ontologies are normally built manually, limiting the application of specialised systems. Dialogue ontology construction is an approach for automating that process and typically consists of two steps: term extraction and relation extraction. In this work, we focus on relation extraction in a transfer learning set-up. To improve the generalisation, we propose an extension to the decoding mechanism of large language models. We adapt Chain-of-Thought (CoT) decoding, recently developed for reasoning problems, to generative relation extraction. Here, we generate multiple branches in the decoding space and select the relations based on a confidence threshold. By constraining the decoding to ontology terms and relations, we aim to decrease the risk of hallucination. We conduct extensive experimentation on two widely used datasets and find improvements in performance on target ontology for source fine-tuned and one-shot prompted large language models.</abstract>
       <url hash="bc424cc3">2024.sigdial-1.33</url>
@@ -460,7 +460,7 @@
       <author><first>Haolan</first><last>Zhan</last></author>
       <author><first>Sameen</first><last>Maruf</last></author>
       <author><first>Ingrid</first><last>Zukerman</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>420–427</pages>
       <abstract>Building a dialogue agent that can seamlessly interact with humans in multi-modal regimes, requires two fundamental abilities: (1) understanding emotion and dialogue acts within situated user scenarios, and (2) grounding perceived visual cues to dialogue contexts. However, recent works have uncovered shortcomings of existing dialogue agents in understanding emotions and dialogue acts, and in ground- ing visual cues effectively. In this work, we investigate whether additional dialogue data with only visual descriptions can help dialogue agents effectively align visual and textual features, and enhance the ability of dialogue agents to ground perceived visual cues to dialogue contexts. To this end, in the absence of a suitable dataset, we propose a synthetic visual description generation pipeline, and con- tribute a large-scale synthetic visual description dataset. In addition, we propose a general training procedure for effectively leveraging these synthetic data. We conduct comprehensive analyses to evaluate the impact of synthetic data on two benchmarks: MELD and IEMOCAP. Our findings suggest that synthetic visual descriptions can serve as an effective way to enhance a dialogue agents’ grounding ability, and that the training scheme affects the extent to which these descriptions improve the agent’s performance.</abstract>
       <url hash="a0328857">2024.sigdial-1.36</url>
@@ -480,7 +480,7 @@
     <paper id="38">
       <title>Conversational Feedback in Scripted versus Spontaneous Dialogues: A Comparative Analysis</title>
       <author><first>Ildiko</first><last>Pilan</last></author>
-      <author><first>Laurent</first><last>Prévot</last></author>
+      <author id="laurent-prevot"><first>Laurent</first><last>Prévot</last></author>
       <author><first>Hendrik</first><last>Buschmeier</last></author>
       <author><first>Pierre</first><last>Lison</last></author>
       <pages>440–457</pages>
@@ -498,7 +498,7 @@
       <author><first>Seungpil</first><last>Won</last></author>
       <author><first>Janghoon</first><last>Han</last></author>
       <author><first>Stanley Jungkyu</first><last>Choi</last></author>
-      <author><first>Jungyun</first><last>Seo</last></author>
+      <author id="jungyun-seo"><first>Jungyun</first><last>Seo</last></author>
       <pages>458–465</pages>
       <abstract>In task-oriented dialogue systems, intent classification is crucial for accurately understanding user queries and providing appropriate services. This study explores the use of intent descriptions with large language models for unseen domain intent classification. By examining the effects of description quality, quantity, and input length management, we identify practical guidelines for optimizing performance. Our experiments using FLAN-T5 3B demonstrate that 1) high-quality descriptions for both training and testing significantly improve accuracy, 2) diversity in training descriptions doesn’t greatly affect performance, and 3) off-the-shelf rankers selecting around ten intent options reduce input length without compromising performance. We emphasize that high-quality testing descriptions have a greater impact on accuracy than training descriptions. These findings provide practical guidelines for using intent descriptions with large language models to achieve effective and efficient intent classification in low-resource settings.</abstract>
       <url hash="3c5bc698">2024.sigdial-1.39</url>
@@ -545,7 +545,7 @@
       <title>Optimizing Code-Switching in Conversational Tutoring Systems: A Pedagogical Framework and Evaluation</title>
       <author><first>Zhengyuan</first><last>Liu</last></author>
       <author><first>Stella Xin</first><last>Yin</last></author>
-      <author><first>Nancy</first><last>Chen</last></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last></author>
       <pages>500–515</pages>
       <abstract>Large language models demonstrate remarkable proficiency in various tasks across multiple languages. However, their potential in code-switching remains underexplored, particularly in cultural and educational contexts. Code-switching or translanguaging plays a crucial role in bilingual education, facilitating comprehension and engagement among students with varied linguistic proficiencies. In this work, we present a pedagogy-inspired framework that introduces traditional classroom practices of code-switching to intelligent tutoring systems. Specifically, we develop fine-grained instructional strategies tailored to multilingual and educational needs. We conduct experiments involving both LLM-based evaluation and expert analysis to assess the effectiveness of translanguaging in tutoring dialogues. Our experimental results indicate that strategic code-switching can significantly enhance the learning experience. This work not only advances dialogic tutors in language learning, but also extends LLMs to better accommodate multilingual interaction.</abstract>
       <url hash="4e52fdcb">2024.sigdial-1.43</url>
@@ -556,7 +556,7 @@
       <title><fixed-case>EC</fixed-case>oh: Turn-level Coherence Evaluation for Multilingual Dialogues</title>
       <author><first>John</first><last>Mendonca</last></author>
       <author><first>Isabel</first><last>Trancoso</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <pages>516–532</pages>
       <abstract>Despite being heralded as the new standard for dialogue evaluation, the closed-source nature of GPT-4 poses challenges for the community. Motivated by the need for lightweight, open source, and multilingual dialogue evaluators, this paper introduces GenResCoh (Generated Responses targeting Coherence). GenResCoh is a novel LLM generated dataset comprising over 130k negative and positive responses and accompanying explanations seeded from XDailyDialog and XPersona covering English, French, German, Italian, and Chinese. Leveraging GenResCoh, we propose ECoh (Evaluation of Coherence), a family of evaluators trained to assess response coherence across multiple languages. Experimental results demonstrate that ECoh achieves multilingual detection capabilities superior to the teacher model (GPT-3.5-Turbo) on GenResCoh, despite being based on a much smaller architecture. Furthermore, the explanations provided by ECoh closely align in terms of quality with those generated by the teacher model.</abstract>
       <url hash="b7956ae6">2024.sigdial-1.44</url>
@@ -570,7 +570,7 @@
       <author><first>Yejin</first><last>Jeon</last></author>
       <author><first>Jungseul</first><last>Ok</last></author>
       <author><first>Yunsu</first><last>Kim</last></author>
-      <author><first>Gary Geunbae</first><last>Lee</last></author>
+      <author id="gary-geunbae-lee"><first>Gary Geunbae</first><last>Lee</last></author>
       <pages>533–543</pages>
       <abstract>Research on hate speech has predominantly revolved around the detection and interpretation from textual inputs, leaving verbal content largely unexplored. Moreover, while there has been some limited exploration into hate speech detection within verbal acoustic speech inputs, the aspect of interpretability has been overlooked. As such, we introduce a new task within the audio hate speech detection task domain - we specifically aim to identify specific time frames of hate speech within audio utterances. Towards this, we propose two different approaches, cascading and End-to-End (E2E). The first cascading approach initially converts audio to transcripts, identifies hate speech within these transcripts, and subsequently locates the corresponding audio time frames. Conversely, the second E2E approach processes audio utterances directly, which allows it to pinpoint hate speech within specific time frames. Moreover, due to the lack of explainable audio hate speech datasets that include frame-level rationales, we curated a synthetic audio dataset to train our models. We further validate these models on actual human speech utterances and we find that the E2E approach outperforms the cascading method in terms of audio frame Intersection over Union (IoU) metric. Furthermore, we observe that the inclusion of frame-level rationales significantly enhances hate speech detection accuracy for both E2E and cascading approaches.</abstract>
       <url hash="3b247e57">2024.sigdial-1.45</url>
@@ -602,7 +602,7 @@
     <paper id="48">
       <title><fixed-case>B</fixed-case>o<fixed-case>K</fixed-case>: Introducing Bag-of-Keywords Loss for Interpretable Dialogue Response Generation</title>
       <author><first>Suvodip</first><last>Dey</last></author>
-      <author><first>Maunendra Sankar</first><last>Desarkar</last></author>
+      <author id="maunendra-sankar-desarkar"><first>Maunendra Sankar</first><last>Desarkar</last></author>
       <pages>566–578</pages>
       <abstract>The standard language modeling (LM) loss by itself has been shown to be inadequate for effective dialogue modeling. As a result, various training approaches, such as auxiliary loss functions and leveraging human feedback, are being adopted to enrich open-domain dialogue systems. One such auxiliary loss function is Bag-of-Words (BoW) loss, defined as the cross-entropy loss for predicting all the words/tokens of the next utterance. In this work, we propose a novel auxiliary loss named Bag-of-Keywords (BoK) loss to capture the central thought of the response through keyword prediction and leverage it to enhance the generation of meaningful and interpretable responses in open-domain dialogue systems. BoK loss upgrades the BoW loss by predicting only the keywords or critical words/tokens of the next utterance, intending to estimate the core idea rather than the entire response. We incorporate BoK loss in both encoder-decoder (T5) and decoder-only (DialoGPT) architecture and train the models to minimize the weighted sum of BoK and LM (BoK-LM) loss. We perform our experiments on two popular open-domain dialogue datasets, DailyDialog and Persona-Chat. We show that the inclusion of BoK loss improves the dialogue generation of backbone models while also enabling post-hoc interpretability. We also study the effectiveness of BoK-LM loss as a reference-free metric and observe comparable performance to the state-of-the-art metrics on various dialogue evaluation datasets.</abstract>
       <url hash="57393bcc">2024.sigdial-1.48</url>
@@ -612,7 +612,7 @@
     <paper id="49">
       <title>Cross-lingual Transfer and Multilingual Learning for Detecting Harmful Behaviour in <fixed-case>A</fixed-case>frican Under-Resourced Language Dialogue</title>
       <author><first>Tunde Oluwaseyi</first><last>Ajayi</last></author>
-      <author><first>Mihael</first><last>Arcan</last></author>
+      <author id="mihael-arcan"><first>Mihael</first><last>Arcan</last></author>
       <author><first>Paul</first><last>Buitelaar</last></author>
       <pages>579–589</pages>
       <abstract>Most harmful dialogue detection models are developed for high-resourced languages. Consequently, users who speak under-resourced languages cannot fully benefit from these models in terms of usage, development, detection and mitigation of harmful dialogue utterances. Our work aims at detecting harmful utterances in under-resourced African languages. We leverage transfer learning using pretrained models trained with multilingual embeddings to develop a cross-lingual model capable of detecting harmful content across various African languages. We first fine-tune a harmful dialogue detection model on a selected African dialogue dataset. Additionally, we fine-tune a model on a combined dataset in some African languages to develop a multilingual harmful dialogue detection model. We then evaluate the cross-lingual model’s ability to generalise to an unseen African language by performing harmful dialogue detection in an under-resourced language not present during pretraining or fine-tuning. We evaluate our models on the test datasets. We show that our best performing models achieve impressive results in terms of F1 score. Finally, we discuss the results and limitations of our work.</abstract>
@@ -623,7 +623,7 @@
     <paper id="50">
       <title>A Few-shot Approach to Task-oriented Dialogue Enhanced with Chitchat</title>
       <author><first>Armand</first><last>Stricker</last></author>
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <pages>590–602</pages>
       <abstract>Large language models (LLMs) tuned for chat have recently been adopted for few-shot end-to-end task-oriented dialogue (TOD), with some success. To further assess this method, we conduct experiments on two, more complex, task-oriented benchmarks that integrate elements of chitchat into the conversation. We enhance a few-shot baseline by adding zero-shot chitchat detection and implementing <i>function calling</i> for dialogue state tracking (DST). We focus on this step in the task-oriented pipeline as it comes first, and errors due to added chitchat at this stage have the most impact on end-to-end performance. We find that this prompting method shows increased resilience to mixed-mode inputs and our enhanced pipeline allows for natural inter-mode conversations, as assessed through human evaluation. Our findings also suggest that the performance gap between few-shot prompting for TOD and supervised task-specific models is narrowing.</abstract>
       <url hash="4545ff12">2024.sigdial-1.50</url>
@@ -657,7 +657,7 @@
       <author><first>Zlata</first><last>Kikteva</last></author>
       <author><first>Alexander</first><last>Trautsch</last></author>
       <author><first>Steffen</first><last>Herbold</last></author>
-      <author><first>Annette</first><last>Hautli-Janisz</last></author>
+      <author id="annette-hautli"><first>Annette</first><last>Hautli-Janisz</last></author>
       <pages>624–630</pages>
       <abstract>In spontaneous natural debate, questions play a variety of crucial roles: they allow speakers to introduce new topics, seek other speakers’ opinions or indeed confront them. A three-class question typology has previously been demonstrated to effectively capture details pertaining to the nature of questions and the different functions associated with them in a debate setting. We adopt this classification and investigate the performance of several machine learning approaches on this task by incorporating various sets of lexical, dialogical and argumentative features. We find that BERT demonstrates the best performance on the task, followed by a Random Forest model enriched with pragmatic features.</abstract>
       <url hash="24122c2f">2024.sigdial-1.53</url>
@@ -687,7 +687,7 @@
       <author><first>Negar</first><last>Fani</last></author>
       <author><first>Sierra</first><last>Carter</last></author>
       <author><first>Stephen</first><last>Doogan</last></author>
-      <author><first>Jinho D.</first><last>Choi</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
       <pages>644–663</pages>
       <abstract>The shortage of clinical workforce presents significant challenges in mental healthcare, limiting access to formal diagnostics and services. We aim to tackle this shortage by integrating a customized large language model (LLM) into the workflow, thus promoting equity in mental healthcare for the general population. Although LLMs have showcased their capability in clinical decision-making, their adaptation to severe conditions like Post-traumatic Stress Disorder (PTSD) remains largely unexplored. Therefore, we collect 411 clinician-administered diagnostic interviews and devise a novel approach to obtain high-quality data. Moreover, we build a comprehensive framework to automate PTSD diagnostic assessments based on interview contents by leveraging two state-of-the-art LLMs, GPT-4 and Llama-2, with potential for broader clinical diagnoses. Our results illustrate strong promise for LLMs, tested on our dataset, to aid clinicians in diagnostic validation. To the best of our knowledge, this is the first AI system that fully automates assessments for mental illness based on clinician-administered interviews.</abstract>
       <url hash="2bdd215a">2024.sigdial-1.55</url>
@@ -736,7 +736,7 @@
     <paper id="59">
       <title><fixed-case>D</fixed-case>iag<fixed-case>ESC</fixed-case>: Dialogue Synthesis for Integrating Depression Diagnosis into Emotional Support Conversation</title>
       <author><first>Seungyeon</first><last>Seo</last></author>
-      <author><first>Gary Geunbae</first><last>Lee</last></author>
+      <author id="gary-geunbae-lee"><first>Gary Geunbae</first><last>Lee</last></author>
       <pages>686–698</pages>
       <abstract>Dialogue systems for mental health care aim to provide appropriate support to individuals experiencing mental distress. While extensive research has been conducted to deliver adequate emotional support, existing studies cannot identify individuals who require professional medical intervention and cannot offer suitable guidance. We introduce the Diagnostic Emotional Support Conversation task for an advanced mental health management system. We develop the DESC dataset to assess depression symptoms while maintaining user experience by utilizing task-specific utterance generation prompts and a strict filtering algorithm. Evaluations by professional psychological counselors indicate that DESC has a superior ability to diagnose depression than existing data. Additionally, conversational quality evaluation reveals that DESC maintains fluent, consistent, and coherent dialogues.</abstract>
       <url hash="25ce1bf2">2024.sigdial-1.59</url>
@@ -754,9 +754,9 @@
       <author><first>Nurul</first><last>Lubis</last></author>
       <author><first>Carel</first><last>van Niekerk</last></author>
       <author><first>Michael</first><last>Heck</last></author>
-      <author><first>Benjamin</first><last>Ruppik</last></author>
+      <author id="benjamin-matthias-ruppik"><first>Benjamin</first><last>Ruppik</last></author>
       <author><first>Renato</first><last>Vukovic</last></author>
-      <author><first>Milica</first><last>Gašić</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gašić</last></author>
       <pages>699–717</pages>
       <abstract>Emotions are indispensable in human communication, but are often overlooked in task-oriented dialogue (ToD) modelling, where the task success is the primary focus. While existing works have explored user emotions or similar concepts in some ToD tasks, none has so far included emotion modelling into a fully-fledged ToD system nor conducted interaction with human or simulated users. In this work, we incorporate emotion into the complete ToD processing loop, involving understanding, management, and generation. To this end, we extend the EmoWOZ dataset (Feng et al., 2022) with system affective behaviour labels. Through interactive experimentation involving both simulated and human users, we demonstrate that our proposed framework significantly enhances the user’s emotional experience as well as the task success.</abstract>
       <url hash="d3a755f7">2024.sigdial-1.60</url>
@@ -790,7 +790,7 @@
       <author><first>Shuwen</first><last>Qiu</last></author>
       <author><first>Mingdian</first><last>Liu</last></author>
       <author><first>Hengli</first><last>Li</last></author>
-      <author><first>Song-Chun</first><last>Zhu</last></author>
+      <author id="song-chun-zhu"><first>Song-Chun</first><last>Zhu</last></author>
       <author><first>Zilong</first><last>Zheng</last></author>
       <pages>746–759</pages>
       <abstract>Humans talk in daily conversations while aligning and negotiating the expressed meanings or common ground. Despite the impressive conversational abilities of the large generative language models, they do not consider the individual differences in contextual understanding in a shared situated environment. In this work, we propose MindDial, a novel conversational framework that can generate situated free-form responses to align and negotiate common ground. We design an explicit mind module that can track three-level beliefs – the speaker’s belief, the speaker’s prediction of the listener’s belief, and the belief gap between the first two. Then the next response is generated to resolve the belief difference and take task-related action. Our framework is applied to both prompting and fine-tuning-based models, and is evaluated across scenarios involving both common ground alignment and negotiation. Experiments show that models with mind modeling can generate more human-like responses when aligning and negotiating common ground. The ablation study further validates the three-level belief design can aggregate information and improve task outcomes in both cooperative and negotiating settings.</abstract>
diff --git a/data/xml/2024.sighan.xml b/data/xml/2024.sighan.xml
index 63e2cc810c..b92a701a3f 100644
--- a/data/xml/2024.sighan.xml
+++ b/data/xml/2024.sighan.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2024-07-30" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 10th SIGHAN Workshop on Chinese Language Processing (SIGHAN-10)</booktitle>
-      <editor><first>Kam-Fai</first><last>Wong</last></editor>
+      <editor id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></editor>
       <editor><first>Min</first><last>Zhang</last></editor>
       <editor><first>Ruifeng</first><last>Xu</last></editor>
       <editor><first>Jing</first><last>Li</last></editor>
@@ -104,7 +104,7 @@
       <author><first>Jing</first><last>Li</last><affiliation>The Hong Kong Polytechnic University</affiliation></author>
       <author><first>Zhongyu</first><last>Wei</last><affiliation>Fudan University</affiliation></author>
       <author><first>Qi</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>58-68</pages>
       <abstract>Vast amount of online conversations are produced on a daily basis, resulting in a pressing need to automatic conversation understanding. As a basis to structure a discussion, we identify the responding relations in the conversation discourse, which link response utterances to their initiations. To figure out who responded to whom, here we explore how the consistency of topic contents and dependency of discourse roles indicate such interactions, whereas most prior work ignore the effects of latent factors underlying word occurrences. We propose a neural model to learn latent topics and discourse in word distributions, and predict pairwise initiation-response links via exploiting topic consistency and discourse dependency. Experimental results on both English and Chinese conversations show that our model significantly outperforms the previous state of the arts.</abstract>
       <url hash="c5bda00b">2024.sighan-1.7</url>
@@ -172,7 +172,7 @@
       <author><first>Xingren</first><last>Wang</last></author>
       <author><first>Shanhong</first><last>Liu</last></author>
       <author><first>Yuxiang</first><last>Jia</last></author>
-      <author><first>Hongying</first><last>Zan</last></author>
+      <author id="hongying-zan"><first>Hongying</first><last>Zan</last></author>
       <pages>112-120</pages>
       <abstract>The DimABSA task requires fine-grained sentiment intensity prediction for restaurant reviews, including scores for Valence and Arousal dimensions for each Aspect Term. In this study, we propose a Coarse-to-Fine In-context Learning (CFICL) method based on the Baichuan2-7B model for the DimABSA task in the SIGHAN 2024 workshop. Our method improves prediction accuracy through a two-stage optimization process. In the first stage, we use fixed in-context examples and prompt templates to enhance the model’s sentiment recognition capability and provide initial predictions for the test data. In the second stage, we encode the Opinion field using BERT and select the most similar training data as new in-context examples based on similarity. These examples include the Opinion field and its scores, as well as related opinion words and their average scores. By filtering for sentiment polarity, we ensure that the examples are consistent with the test data. Our method significantly improves prediction accuracy and consistency by effectively utilizing training data and optimizing in-context examples, as validated by experimental results.</abstract>
       <url hash="fba2907e">2024.sighan-1.13</url>
@@ -240,7 +240,7 @@
     <paper id="19">
       <title>Overview of the <fixed-case>SIGHAN</fixed-case> 2024 shared task for <fixed-case>C</fixed-case>hinese dimensional aspect-based sentiment analysis</title>
       <author><first>Lung-Hao</first><last>Lee</last><affiliation>National Yang Ming Chiao Tung University</affiliation></author>
-      <author><first>Liang-Chih</first><last>Yu</last><affiliation>Yuan Ze University</affiliation></author>
+      <author id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last><affiliation>Yuan Ze University</affiliation></author>
       <author><first>Suge</first><last>Wang</last></author>
       <author><first>Jian</first><last>Liao</last><affiliation>Shanxi University</affiliation></author>
       <pages>165-174</pages>
diff --git a/data/xml/2024.sigmorphon.xml b/data/xml/2024.sigmorphon.xml
index 03fdcccae4..77eefcfd9c 100644
--- a/data/xml/2024.sigmorphon.xml
+++ b/data/xml/2024.sigmorphon.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the 21st SIGMORPHON workshop on Computational Research in Phonetics, Phonology, and Morphology</booktitle>
       <editor><first>Garrett</first><last>Nicolai</last></editor>
       <editor><first>Eleanor</first><last>Chodroff</last></editor>
-      <editor><first>Frederic</first><last>Mailhot</last></editor>
+      <editor id="frederic-mailhot"><first>Frederic</first><last>Mailhot</last></editor>
       <editor><first>Çağrı</first><last>Çöltekin</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Mexico City, Mexico</address>
@@ -78,7 +78,7 @@
     <paper id="6">
       <title>The Effect of Model Capacity and Script Diversity on Subword Tokenization for <fixed-case>S</fixed-case>orani <fixed-case>K</fixed-case>urdish</title>
       <author><first>Ali</first><last>Salehi</last></author>
-      <author><first>Cassandra L.</first><last>Jacobs</last></author>
+      <author id="cassandra-l-jacobs"><first>Cassandra L.</first><last>Jacobs</last></author>
       <pages>51-56</pages>
       <abstract>Tokenization and morphological segmentation continue to pose challenges for text processing and studies of human language. Here, we focus on written Soranî Kurdish, which uses a modified script based on Persian and Arabic, and its transliterations into the Kurdish Latin script. Importantly, Perso-Arabic and Latin-based writing systems demonstrate different statistical and structural properties, which may have significant effects on subword vocabulary learning. This has major consequences for frequency- or probability-based models of morphological induction. We explore the possibility that jointly training subword vocabularies using a source script along with its transliteration would improve morphological segmentation, subword tokenization, and whether gains are observed for one system over others. We find that joint training has a similar effect to increasing vocabulary size, while keeping subwords shorter in length, which produces higher-quality subwords that map onto morphemes.</abstract>
       <url hash="990f754a">2024.sigmorphon-1.6</url>
@@ -99,7 +99,7 @@
     <paper id="8">
       <title>Acoustic barycenters as exemplar production targets</title>
       <author><first>Frederic</first><last>Mailhot</last></author>
-      <author><first>Cassandra L.</first><last>Jacobs</last></author>
+      <author id="cassandra-l-jacobs"><first>Cassandra L.</first><last>Jacobs</last></author>
       <pages>67-76</pages>
       <abstract>We present a solution to the problem of exemplar-based language production from variable-duration tokens, leveraging algorithms from the domain of time-series clustering and classification. Our model stores and outputs tokens of phonetically rich and temporally variable representations of recorded speech. We show qualitatively and quantitatively that model outputs retain essential acoustic/phonetic characteristics despite the noise introduced by averaging, and also demonstrate the effects of similarity and indexical information as constraints on exemplar cloud selection.</abstract>
       <url hash="00f00ea2">2024.sigmorphon-1.8</url>
diff --git a/data/xml/2024.signlang.xml b/data/xml/2024.signlang.xml
index c5ff525723..f158e8613a 100644
--- a/data/xml/2024.signlang.xml
+++ b/data/xml/2024.signlang.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the LREC-COLING 2024 11th Workshop on the Representation and Processing of Sign Languages: Evaluation of Sign Language Resources</booktitle>
       <editor><first>Eleni</first><last>Efthimiou</last></editor>
-      <editor><first>Stavroula-Evita</first><last>Fotinea</last></editor>
+      <editor id="stavroula-evita-fotinea"><first>Stavroula-Evita</first><last>Fotinea</last></editor>
       <editor><first>Thomas</first><last>Hanke</last></editor>
       <editor><first>Julie A.</first><last>Hochgesang</last></editor>
       <editor><first>Johanna</first><last>Mesch</last></editor>
@@ -34,7 +34,7 @@
       <title>Person Identification from Pose Estimates in Sign Language</title>
       <author><first>Alessia</first><last>Battisti</last></author>
       <author><first>Emma</first><last>van den Bold</last></author>
-      <author><first>Anne</first><last>Göhring</last></author>
+      <author id="anne-gohring"><first>Anne</first><last>Göhring</last></author>
       <author><first>Franz</first><last>Holzknecht</last></author>
       <author><first>Sarah</first><last>Ebling</last></author>
       <pages>13–25</pages>
@@ -108,7 +108,7 @@
       <author><first>Diandra</first><last>Fabre</last></author>
       <author><first>Yanis</first><last>Ouakrim</last></author>
       <author><first>Julie</first><last>Lascar</last></author>
-      <author><first>Annelies</first><last>Braffort</last></author>
+      <author id="annelies-braffort"><first>Annelies</first><last>Braffort</last></author>
       <author><first>Michèle</first><last>Gouiffès</last></author>
       <author><first>Denis</first><last>Beautemps</last></author>
       <pages>95–101</pages>
@@ -172,7 +172,7 @@
       <title>Shedding Light on the Underexplored: Tackling the Minor Sign Language Research Topics</title>
       <author><first>Jung-Ho</first><last>Kim</last></author>
       <author><first>Changyong</first><last>Ko</last></author>
-      <author><first>Mathew</first><last>Huerta-Enochian</last></author>
+      <author id="mathew-huerta-enochian"><first>Mathew</first><last>Huerta-Enochian</last></author>
       <author><first>Seung Yong</first><last>Ko</last></author>
       <pages>147–158</pages>
       <url hash="6235ccb4">2024.signlang-1.16</url>
@@ -244,7 +244,7 @@
       <title>Annotation of <fixed-case>LSF</fixed-case> subtitled videos without a pre-existing dictionary</title>
       <author><first>Julie</first><last>Lascar</last></author>
       <author><first>Michèle</first><last>Gouiffès</last></author>
-      <author><first>Annelies</first><last>Braffort</last></author>
+      <author id="annelies-braffort"><first>Annelies</first><last>Braffort</last></author>
       <author><first>Claire</first><last>Danet</last></author>
       <pages>204–212</pages>
       <url hash="9e32f4fc">2024.signlang-1.22</url>
@@ -272,7 +272,7 @@
     <paper id="25">
       <title>Quantitative Analysis of Hand Locations in both Sign Language and Non-linguistic Gesture Videos</title>
       <author><first>Niels</first><last>Martínez-Guevara</last></author>
-      <author><first>Arturo</first><last>Curiel</last></author>
+      <author id="arturo-curiel"><first>Arturo</first><last>Curiel</last></author>
       <pages>225–234</pages>
       <url hash="c7b2610a">2024.signlang-1.25</url>
       <bibkey>martinez-guevara-curiel-2024-quantitative</bibkey>
@@ -391,7 +391,7 @@
       <author><first>Huije</first><last>Lee</last></author>
       <author><first>Eui Jun</first><last>Hwang</last></author>
       <author><first>Sukmin</first><last>Cho</last></author>
-      <author><first>Jong C.</first><last>Park</last></author>
+      <author id="jong-c-park"><first>Jong C.</first><last>Park</last></author>
       <pages>323–334</pages>
       <url hash="7f75f86d">2024.signlang-1.36</url>
       <bibkey>roh-etal-2024-preprocessing</bibkey>
@@ -465,7 +465,7 @@
       <author><first>José Luis</first><last>Alba-Castro</last></author>
       <author><first>Ania</first><last>Pérez-Pérez</last></author>
       <author><first>Carmen</first><last>Cabeza-Pereiro</last></author>
-      <author><first>Laura</first><last>Docío-Fernández</last></author>
+      <author id="laura-docio-fernandez"><first>Laura</first><last>Docío-Fernández</last></author>
       <pages>386–394</pages>
       <url hash="e9b2e042">2024.signlang-1.43</url>
       <bibkey>vazquez-enriquez-etal-2024-signamed</bibkey>
diff --git a/data/xml/2024.sigtyp.xml b/data/xml/2024.sigtyp.xml
index cde5ec0ab9..86810aa165 100644
--- a/data/xml/2024.sigtyp.xml
+++ b/data/xml/2024.sigtyp.xml
@@ -7,7 +7,7 @@
       <editor><first>Alexey</first><last>Sorokin</last></editor>
       <editor><first>Ritesh</first><last>Kumar</last></editor>
       <editor><first>Andreas</first><last>Shcherbakov</last></editor>
-      <editor><first>Yulia</first><last>Otmakhova</last></editor>
+      <editor id="julia-otmakhova"><first>Yulia</first><last>Otmakhova</last></editor>
       <editor><first>Jinrui</first><last>Yang</last></editor>
       <editor><first>Oleg</first><last>Serikov</last></editor>
       <editor><first>Priya</first><last>Rani</last></editor>
@@ -85,7 +85,7 @@
     </paper>
     <paper id="6">
       <title>The Typology of Ellipsis: A Corpus for Linguistic Analysis and Machine Learning Applications</title>
-      <author><first>Damir</first><last>Cavar</last><affiliation>Indiana University</affiliation></author>
+      <author id="damir-cavar"><first>Damir</first><last>Cavar</last><affiliation>Indiana University</affiliation></author>
       <author><first>Ludovic</first><last>Mompelat</last></author>
       <author><first>Muhammad</first><last>Abdo</last></author>
       <pages>46-54</pages>
@@ -169,7 +169,7 @@
       <author><first>Xiluo</first><last>He</last></author>
       <author><first>Prabhjot</first><last>Kaur</last></author>
       <author><first>Oliver</first><last>Adams</last></author>
-      <author><first>Dan</first><last>Jurafsky</last><affiliation>Stanford University</affiliation></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last><affiliation>Stanford University</affiliation></author>
       <pages>100-112</pages>
       <abstract>While massively multilingual speech models like wav2vec 2.0 XLSR-128 can be directly fine-tuned for automatic speech recognition (ASR), downstream performance can still be relatively poor on languages that are under-represented in the pre-training data. Continued pre-training on 70–200 hours of untranscribed speech in these languages can help — but what about languages without that much recorded data? For such cases, we show that supplementing the target language with data from a similar, higher-resource ‘donor’ language can help. For example, continued pretraining on only 10 hours of low-resource Punjabi supplemented with 60 hours of donor Hindi is almost as good as continued pretraining on 70 hours of Punjabi. By contrast, sourcing supplemental data from less similar donors like Bengali does not improve ASR performance. To inform donor language selection, we propose a novel similarity metric based on the sequence distribution of induced acoustic units: the Acoustic Token Distribution Similarity (ATDS). Across a set of typologically different target languages (Punjabi, Galician, Iban, Setswana), we show that the ATDS between the target language and its candidate donors precisely predicts target language ASR performance.</abstract>
       <url hash="fda5588c">2024.sigtyp-1.13</url>
@@ -185,7 +185,7 @@
       <author><first>Lucas</first><last>Huang</last></author>
       <author><first>Ethan</first><last>Chi</last></author>
       <author><first>R.</first><last>McCoy</last><affiliation>Yale University</affiliation></author>
-      <author><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
       <pages>113-119</pages>
       <abstract>Large language models (LLMs) perform well on (at least) some evaluations of both few-shot multilingual adaptation and reasoning. However, evaluating the intersection of these two skills—multilingual few-shot reasoning—is difficult: even relatively low-resource languages can be found in large training corpora, raising the concern that when we intend to evaluate a model’s ability to generalize to a new language, that language may have in fact been present during the model’s training. If such language contamination has occurred, apparent cases of few-shot reasoning could actually be due to memorization. Towards understanding the capability of models to perform multilingual few-shot reasoning, we propose modeLing, a benchmark of Rosetta stone puzzles. This type of puzzle, originating from competitions called Linguistics Olympiads, contain a small number of sentences in a target language not previously known to the solver. Each sentence is translated to the solver’s language such that the provided sentence pairs uniquely specify a single most reasonable underlying set of rules; solving requires applying these rules to translate new expressions (Figure 1). modeLing languages are chosen to be extremely low-resource such that the risk of training data contamination is low, and unlike prior datasets, it consists entirely of problems written specifically for this work, as a further measure against data leakage. Empirically, we find evidence that popular LLMs do not have data leakage on our benchmark.</abstract>
       <url hash="08504410">2024.sigtyp-1.14</url>
@@ -235,9 +235,9 @@
       <author><first>Oksana</first><last>Dereza</last><affiliation>University of Galway</affiliation></author>
       <author><first>Adrian</first><last>Doyle</last><affiliation>University of Galway</affiliation></author>
       <author><first>Priya</first><last>Rani</last><affiliation>University of Galway</affiliation></author>
-      <author><first>Atul Kr.</first><last>Ojha</last><affiliation>University of Galway</affiliation></author>
+      <author id="atul-kr-ojha"><first>Atul Kr.</first><last>Ojha</last><affiliation>University of Galway</affiliation></author>
       <author><first>Pádraic</first><last>Moran</last><affiliation>University of Galway</affiliation></author>
-      <author><first>John</first><last>McCrae</last><affiliation>University of Galway</affiliation></author>
+      <author id="john-philip-mccrae"><first>John</first><last>McCrae</last><affiliation>University of Galway</affiliation></author>
       <pages>160-172</pages>
       <abstract>This paper discusses the organisation and findings of the SIGTYP 2024 Shared Task on Word Embedding Evaluation for Ancient and Historical Languages. The shared task was split into the constrained and unconstrained tracks and involved solving either 3 or 5 problems for either 13 or 16 ancient and historical languages belonging to 4 language families, and making use of 6 different scripts. There were 14 registrations in total, of which 3 teams submitted to each track. Out of these 6 submissions, 2 systems were successful in the constrained setting and another 2 in the uncon- strained setting, and 4 system description papers were submitted by different teams. The best average result for morphological feature prediction was about 96%, while the best average results for POS-tagging and lemmatisation were 96% and 94% respectively. At the word level, the winning team could not achieve a higher average accuracy across all 16 languages than 5.95%, which demonstrates the difficulty of this problem. At the character level, the best average result over 16 languages 55.62%</abstract>
       <url hash="c66b4f4d">2024.sigtyp-1.19</url>
diff --git a/data/xml/2024.sigul.xml b/data/xml/2024.sigul.xml
index 38b80d5c98..69ecc7cb3f 100644
--- a/data/xml/2024.sigul.xml
+++ b/data/xml/2024.sigul.xml
@@ -40,10 +40,10 @@
     <paper id="3">
       <title>Advancing Generative <fixed-case>AI</fixed-case> for <fixed-case>P</fixed-case>ortuguese with Open Decoder Gervásio <fixed-case>PT</fixed-case>*</title>
       <author><first>Rodrigo</first><last>Santos</last></author>
-      <author><first>João Ricardo</first><last>Silva</last></author>
+      <author id="joao-silva"><first>João Ricardo</first><last>Silva</last></author>
       <author><first>Luís</first><last>Gomes</last></author>
-      <author><first>João</first><last>Rodrigues</last></author>
-      <author><first>António</first><last>Branco</last></author>
+      <author id="joao-rodrigues"><first>João</first><last>Rodrigues</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
       <pages>16–26</pages>
       <abstract>To advance the neural decoding of Portuguese, in this paper we present a fully open Transformer-based, instruction-tuned decoder model that sets a new state of the art in this respect. To develop this decoder, which we named Gervásio PT*, a strong LLaMA 2 7B model was used as a starting point, and its further improvement through additional training was done over language resources that include new instruction data sets of Portuguese prepared for this purpose, which are also contributed in this paper. All versions of Gervásio are open source and distributed for free under an open license, including for either research or commercial usage, and can be run on consumer-grade hardware, thus seeking to contribute to the advancement of research and innovation in language technology for Portuguese.</abstract>
       <url hash="0c7d90f9">2024.sigul-1.3</url>
@@ -51,7 +51,7 @@
     </paper>
     <paper id="4">
       <title>Assessing Pre-Built Speaker Recognition Models for Endangered Language Data</title>
-      <author><first>Gina-Anne</first><last>Levow</last></author>
+      <author id="gina-anne-levow"><first>Gina-Anne</first><last>Levow</last></author>
       <pages>27–32</pages>
       <abstract>Significant research has focused on speaker recognition, determining which speaker is speaking in a segment of audio. However, few experiments have investigated speaker recognition for very low-resource or endangered languages. Furthermore, speaker recognition has the potential to support language documentation and revitalization efforts, making recordings more accessible to researchers and communities. Since endangered language datasets are too small to build competitive speaker representations from scratch, we investigate the application of large-scale pre-built speaker recognition models to bridge this gap. This paper compares four speaker recognition models on six diverse endangered language data sets. Comparisons contrast three recent neural network-based x-vector models and an earlier baseline i-vector model. Experiments demonstrate significantly stronger performance for some of the studied models. Further analysis highlights differences in effectiveness tied to the lengths of test audio segments and amount of data used for speaker modeling.</abstract>
       <url hash="6d73a56f">2024.sigul-1.4</url>
@@ -73,7 +73,7 @@
       <author><first>Svanhvít Lilja</first><last>Ingólfsdóttir</last></author>
       <author><first>Haukur Barri</first><last>Símonarson</last></author>
       <author><first>Hafsteinn</first><last>Einarsson</last></author>
-      <author><first>Anton Karl</first><last>Ingason</last></author>
+      <author id="anton-karl-ingason"><first>Anton Karl</first><last>Ingason</last></author>
       <author><first>Vilhjálmur</first><last>Þorsteinsson</last></author>
       <pages>45–52</pages>
       <abstract>Automatic spell and grammar checking can be done using various system architectures, and large language models have recently been used to solve the task with promising results. Here we describe a new method of creating test data to measure the performance of spell and grammar checkers, including large language models. Three types of test data represent different approaches to evaluation, from basic error detection to error correction with natural language explanations of the corrections made and error severity scores, which is the main novelty of this approach. These additions are especially useful when evaluating large language models. We present a spell and grammar checking test set for Icelandic in which the described approach is applied. The data consists of whole texts instead of discrete sentences, which facilitates evaluating context awareness of models. The resulting test set can be used to compare different spell and grammar checkers and is published under permissive licenses.</abstract>
@@ -146,7 +146,7 @@
       <author><first>Sally</first><last>Bruen</last></author>
       <author><first>Liang</first><last>Xu</last></author>
       <author><first>Monica</first><last>Ward</last></author>
-      <author><first>Elaine</first><last>Uí Dhonnchadha</last></author>
+      <author id="elaine-ui-dhonnchadha"><first>Elaine</first><last>Uí Dhonnchadha</last></author>
       <author><first>Jennifer</first><last>Foster</last></author>
       <pages>90–96</pages>
       <abstract>Digital game-based language learning (DGBLL) can help with the language learning process. DGBLL applications can make learning more enjoyable and engaging, but they are difficult to develop. A DBGLL app that relies on target language texts obviously needs to be able to use texts of the appropriate level for the individual learners. This implies that text classification tools should be available to DGBLL developers, who may not be familiar with the target language, in order to incorporate suitable texts into their games. While text difficulty classifiers exist for many of the most commonly spoken languages, this is not the case for under-resourced languages, such as Irish. In this paper, we explore approaches to the development of text classifiers for Irish. In the first approach to text analysis and grading, we apply linguistic analysis to assess text complexity. Features from this approach are then used in machine learning-based text classification, which explores the application of a number of machine learning algorithms to the problem. Although the development of these text classifiers is at an early stage, they show promise, particularly in a low-resourced scenario.</abstract>
@@ -166,10 +166,10 @@
     <paper id="14">
       <title>Fostering the Ecosystem of Open Neural Encoders for <fixed-case>P</fixed-case>ortuguese with Albertina <fixed-case>PT</fixed-case>* Family</title>
       <author><first>Rodrigo</first><last>Santos</last></author>
-      <author><first>João</first><last>Rodrigues</last></author>
+      <author id="joao-rodrigues"><first>João</first><last>Rodrigues</last></author>
       <author><first>Luís</first><last>Gomes</last></author>
-      <author><first>João Ricardo</first><last>Silva</last></author>
-      <author><first>António</first><last>Branco</last></author>
+      <author id="joao-silva"><first>João Ricardo</first><last>Silva</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
       <author><first>Henrique</first><last>Lopes Cardoso</last></author>
       <author><first>Tomás Freitas</first><last>Osório</last></author>
       <author><first>Bernardo</first><last>Leite</last></author>
@@ -181,7 +181,7 @@
     <paper id="15">
       <title>Improving Language Coverage on <fixed-case>H</fixed-case>e<fixed-case>LI</fixed-case>-<fixed-case>OTS</fixed-case></title>
       <author><first>Tommi</first><last>Jauhiainen</last></author>
-      <author><first>Krister</first><last>Lindén</last></author>
+      <author id="krister-linden"><first>Krister</first><last>Lindén</last></author>
       <pages>115–125</pages>
       <abstract>In this paper, we add under-resourced languages into the language repertoire of an existing off-the-shelf language identifier, HeLI-OTS. Adding more languages to a language identifier often comes with the drawback of lessened accuracy for the languages already part of the repertoire. We aim to minimize this effect. As sources for training and development data in the new languages, we use the OpenLID and FLORES-200 datasets. They are openly available high-quality datasets that are especially well-suited for language identifier development. By carefully inspecting the effect of each added language and the quality of their training and development data, we managed to add support for 20 new under-resourced languages to HeLI-OTS without affecting the performance of any existing languages to a noticeable extent.</abstract>
       <url hash="73881a20">2024.sigul-1.15</url>
@@ -209,7 +209,7 @@
     <paper id="18">
       <title><fixed-case>I</fixed-case>ndonesian-<fixed-case>E</fixed-case>nglish Code-Switching Speech Recognition Using the Machine Speech Chain Based Semi-Supervised Learning</title>
       <author><first>Rais Vaza Man</first><last>Tazakka</last></author>
-      <author><first>Dessi</first><last>Lestari</last></author>
+      <author id="dessi-puji-lestari"><first>Dessi</first><last>Lestari</last></author>
       <author><first>Ayu</first><last>Purwarianti</last></author>
       <author><first>Dipta</first><last>Tanaya</last></author>
       <author><first>Kurniawati</first><last>Azizah</last></author>
@@ -231,7 +231,7 @@
     <paper id="20">
       <title>Investigating Neural Machine Translation for Low-Resource Languages: Using <fixed-case>B</fixed-case>avarian as a Case Study</title>
       <author><first>Wan-hua</first><last>Her</last></author>
-      <author><first>Udo</first><last>Kruschwitz</last></author>
+      <author id="udo-kruschwitz"><first>Udo</first><last>Kruschwitz</last></author>
       <pages>155–167</pages>
       <abstract>Machine Translation has made impressive progress in recent years offering close to human-level performance on many languages, but studies have primarily focused on high-resource languages with broad online presence and resources. With the help of growing Large Language Models, more and more low-resource languages achieve better results through the presence of other languages. However, studies have shown that not all low-resource languages can benefit from multilingual systems, especially those with insufficient training and evaluation data. In this paper, we revisit state-of-the-art Neural Machine Translation techniques to develop automatic translation systems between German and Bavarian. We investigate conditions of low-resource languages such as data scarcity and parameter sensitivity and focus on refined solutions that combat low-resource difficulties and creative solutions such as harnessing language similarity. Our experiment entails applying Back-translation and Transfer Learning to automatically generate more training data and achieve higher translation performance. We demonstrate noisiness in the data and present our approach to carry out text preprocessing extensively. Evaluation was conducted using combined metrics: BLEU, chrF and TER. Statistical significance results with Bonferroni correction show surprisingly high baseline systems, and that Back-translation leads to significant improvement. Furthermore, we present a qualitative analysis of translation errors and system limitations.</abstract>
       <url hash="ab3e85ae">2024.sigul-1.20</url>
@@ -259,7 +259,7 @@
     <paper id="23">
       <title>Language Models on a Diet: Cost-Efficient Development of Encoders for Closely-Related Languages via Additional Pretraining</title>
       <author><first>Nikola</first><last>Ljubešić</last></author>
-      <author><first>Vít</first><last>Suchomel</last></author>
+      <author id="vit-suchomel"><first>Vít</first><last>Suchomel</last></author>
       <author><first>Peter</first><last>Rupnik</last></author>
       <author><first>Taja</first><last>Kuzman</last></author>
       <author><first>Rik</first><last>van Noord</last></author>
@@ -270,7 +270,7 @@
     </paper>
     <paper id="24">
       <title>Man or Machine: Evaluating Spelling Error Detection in <fixed-case>D</fixed-case>anish Newspaper Corpora</title>
-      <author><first>Eckhard</first><last>Bick</last></author>
+      <author id="eckhard-bick"><first>Eckhard</first><last>Bick</last></author>
       <author><first>Jonas Nygaard</first><last>Blom</last></author>
       <author><first>Marianne</first><last>Rathje</last></author>
       <author><first>Jørgen</first><last>Schack</last></author>
@@ -285,7 +285,7 @@
       <author><first>Delphine</first><last>Bernhard</last></author>
       <author><first>Michael</first><last>Nauge</last></author>
       <author><first>Myriam</first><last>Bras</last></author>
-      <author><first>Pablo</first><last>Ruiz Fabo</last></author>
+      <author id="pablo-ruiz-fabo"><first>Pablo</first><last>Ruiz Fabo</last></author>
       <author><first>Carole</first><last>Werner</last></author>
       <pages>212–221</pages>
       <abstract>Metadata are key components of language resources and facilitate their exploitation and re-use. Their creation is a labour intensive process and requires a modeling step, which identifies resource-specific information as well as standards and controlled vocabularies that can be reused. In this article, we focus on metadata for documenting text bases for regional languages of France characterised by several levels of variation (space, time, usage, social status), based on a survey of existing metadata schema. Moreover, we implement our metadata model as a database structure for the Heurist data management system, which combines both the ease of use of spreadsheets and the ability to model complex relationships between entities of relational databases. The Heurist template is made freely available and was used to describe metadata for text bases in Alsatian and Poitevin-Santongeais. We also propose tools to automatically generate XML metadata headers files from the database.</abstract>
@@ -347,7 +347,7 @@
       <title><fixed-case>P</fixed-case>ersian<fixed-case>E</fixed-case>mo: Enhancing <fixed-case>F</fixed-case>arsi-<fixed-case>D</fixed-case>ari Emotion Analysis with a Hybrid Transformer and Recurrent Neural Network Model</title>
       <author><first>Mohammad Ali</first><last>Hussiny</last></author>
       <author><first>Mohammad Arif</first><last>Payenda</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <pages>257–263</pages>
       <abstract>Emotion analysis is a critical research domain within the field of natural language processing (NLP). While substantial progress has been made in this area for the Persian language, there is still a need for more precise models and larger datasets specifically focusing on the Farsi and Dari dialects. In this research, we introduce “LearnArmanEmo” as a new dataset and a superior ensemble approach for Persian text emotion classification. Our proposed model, which combines XLM-RoBERTa-large and BiGRU, undergoes evaluation on LetHerLearn for the Dari dialect, ARMANEMO for the Farsi dialect, and LearnArmanEmo for both Dari and Farsi dialects. The empirical results substantiate the efficacy of our approach with the combined model demonstrating superior performance. Specifically, our model achieves an F1 score of 72.9% on LetHerLearn, an F1 score of 77.1% on ARMANEMO, and an F1 score of 78.8% on the LearnArmanEmo dataset, establishing it as a better ensemble model for these datasets. These findings underscore the potential of this hybrid model as a useful tool for enhancing the performance of emotion analysis in Persian language processing.</abstract>
       <url hash="babd90a8">2024.sigul-1.31</url>
@@ -454,7 +454,7 @@
     </paper>
     <paper id="41">
       <title><fixed-case>TELP</fixed-case> – Text Extraction with Linguistic Patterns</title>
-      <author><first>João</first><last>Cordeiro</last></author>
+      <author id="joao-paulo-cordeiro"><first>João</first><last>Cordeiro</last></author>
       <author><first>Purificação Moura</first><last>Silvano</last></author>
       <author><first>António</first><last>Leal</last></author>
       <author><first>Sebastião</first><last>Pais</last></author>
@@ -497,19 +497,19 @@
     <paper id="45">
       <title><fixed-case>U</fixed-case>ni<fixed-case>D</fixed-case>ive: A <fixed-case>COST</fixed-case> Action on Universality, Diversity and Idiosyncrasy in Language Technology</title>
       <author><first>Agata</first><last>Savary</last></author>
-      <author><first>Daniel</first><last>Zeman</last></author>
-      <author><first>Verginica</first><last>Barbu Mititelu</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
+      <author id="verginica-barbu-mititelu"><first>Verginica</first><last>Barbu Mititelu</last></author>
       <author><first>Anabela</first><last>Barreiro</last></author>
       <author><first>Olesea</first><last>Caftanatov</last></author>
-      <author><first>Marie-Catherine</first><last>de Marneffe</last></author>
+      <author id="marie-catherine-de-marneffe"><first>Marie-Catherine</first><last>de Marneffe</last></author>
       <author><first>Kaja</first><last>Dobrovoljc</last></author>
-      <author><first>Gülşen</first><last>Eryiğit</last></author>
-      <author><first>Voula</first><last>Giouli</last></author>
+      <author id="gulsen-eryigit"><first>Gülşen</first><last>Eryiğit</last></author>
+      <author id="voula-giouli"><first>Voula</first><last>Giouli</last></author>
       <author><first>Bruno</first><last>Guillaume</last></author>
-      <author><first>Stella</first><last>Markantonatou</last></author>
+      <author id="stella-markantonatou"><first>Stella</first><last>Markantonatou</last></author>
       <author><first>Nurit</first><last>Melnik</last></author>
       <author><first>Joakim</first><last>Nivre</last></author>
-      <author><first>Atul Kr.</first><last>Ojha</last></author>
+      <author id="atul-kr-ojha"><first>Atul Kr.</first><last>Ojha</last></author>
       <author><first>Carlos</first><last>Ramisch</last></author>
       <author><first>Abigail</first><last>Walsh</last></author>
       <author><first>Beata</first><last>Wójtowicz</last></author>
diff --git a/data/xml/2024.smm4h.xml b/data/xml/2024.smm4h.xml
index 0a6e1a943f..f402bb62b7 100644
--- a/data/xml/2024.smm4h.xml
+++ b/data/xml/2024.smm4h.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the 9th Social Media Mining for Health Research and Applications (SMM4H 2024) Workshop and Shared Tasks</booktitle>
       <editor><first>Dongfang</first><last>Xu</last></editor>
-      <editor><first>Graciela</first><last>Gonzalez-Hernandez</last></editor>
+      <editor id="graciela-gonzalez"><first>Graciela</first><last>Gonzalez-Hernandez</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Bangkok, Thailand</address>
       <month>August</month>
@@ -22,7 +22,7 @@
       <author><first>Thang</first><last>Ta</last></author>
       <author><first>Abu</first><last>Rahman</last></author>
       <author><first>Lotfollah</first><last>Najjar</last><affiliation>University of Nebraska at Omaha</affiliation></author>
-      <author><first>Alexander</first><last>Gelbukh</last><affiliation>Instituto Politécnico Nacional</affiliation></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last><affiliation>Instituto Politécnico Nacional</affiliation></author>
       <pages>1-4</pages>
       <abstract>This paper describes our participation in Task 3 and Task 5 of the #SMM4H (Social Media Mining for Health) 2024 Workshop, explicitly targeting the classification challenges within tweet data. Task 3 is a multi-class classification task centered on tweets discussing the impact of outdoor environments on symptoms of social anxiety. Task 5 involves a binary classification task focusing on tweets reporting medical disorders in children. We applied transfer learning from pre-trained encoder-decoder models such as BART-base and T5-small to identify the labels of a set of given tweets. We also presented some data augmentation methods to see their impact on the model performance. Finally, the systems obtained the best F1 score of 0.627 in Task 3 and the best F1 score of 0.841 in Task 5</abstract>
       <url hash="1810b397">2024.smm4h-1.1</url>
@@ -68,7 +68,7 @@
     <paper id="6">
       <title><fixed-case>RIGA</fixed-case> at <fixed-case>SMM</fixed-case>4<fixed-case>H</fixed-case>-2024 Task 1: Enhancing <fixed-case>ADE</fixed-case> discovery with <fixed-case>GPT</fixed-case>-4</title>
       <author><first>Eduards</first><last>Mukans</last></author>
-      <author><first>Guntis</first><last>Barzdins</last><affiliation>University of Latvia</affiliation></author>
+      <author id="guntis-barzdins"><first>Guntis</first><last>Barzdins</last><affiliation>University of Latvia</affiliation></author>
       <pages>23-27</pages>
       <abstract>The following is a description of the RIGA team’s submissions for the SMM4H-2024 Task 1: Extraction and normalization of adverse drug events (ADEs) in English tweets. Our approach focuses on utilizing Large Language Models (LLMs) to generate data that enhances the fine-tuning of classification and Named Entity Recognition (NER) models. Our solution significantly outperforms mean and median submissions of other teams. The efficacy of our ADE extraction from tweets is comparable to the current state-of-the-art solution, established as the task baseline. The code for our method is available on GitHub (https://github.com/emukans/smm4h2024-riga)</abstract>
       <url hash="7394c99a">2024.smm4h-1.6</url>
@@ -156,8 +156,8 @@
       <author><first>Victor</first><last>Pozos</last></author>
       <author><first>Helena</first><last>Gomez Adorno</last><affiliation>Instituto de Investigaciones en Matemáticas Aplicadas y en Sistemas - UNAM</affiliation></author>
       <author><first>Gibran</first><last>Fuentes-Pineda</last></author>
-      <author><first>Gerardo</first><last>Sierra</last><affiliation>Universidad Nacional Autónoma de México</affiliation></author>
-      <author><first>Gemma</first><last>Bel-Enguix</last><affiliation>Universidad Nacional Autónoma de México</affiliation></author>
+      <author id="gerardo-sierra"><first>Gerardo</first><last>Sierra</last><affiliation>Universidad Nacional Autónoma de México</affiliation></author>
+      <author id="gemma-bel-enguix"><first>Gemma</first><last>Bel-Enguix</last><affiliation>Universidad Nacional Autónoma de México</affiliation></author>
       <pages>63-66</pages>
       <abstract>We present our approach to solving the task of identifying the effect of outdoor activities on social anxiety based on reddit posts. We employed state-of-the-art transformer models enhanced with a combination of advanced loss functions. Data augmentation techniques were also used to address class imbalance within the training set. Our method achieved a macro-averaged F1-score of 0.655 on the test data, surpassing the workshop’s mean F1-Score of 0.519. These findings suggest that integrating weighted loss functions improves the performance of transformer models in classifying unbalanced text data, while data augmentation can improve the model’s ability to generalize.</abstract>
       <url hash="8f96da2e">2024.smm4h-1.14</url>
@@ -212,7 +212,7 @@
     </paper>
     <paper id="19">
       <title><fixed-case>IMS</fixed-case>_medic<fixed-case>ALY</fixed-case> at #<fixed-case>SMM</fixed-case>4<fixed-case>H</fixed-case> 2024: Detecting Impacts of Outdoor Spaces on Social Anxiety with Data Augmented Ensembling</title>
-      <author><first>Amelie</first><last>Wuehrl</last><affiliation>University of Stuttgart, Universität Stuttgart</affiliation></author>
+      <author id="amelie-wuhrl"><first>Amelie</first><last>Wuehrl</last><affiliation>University of Stuttgart, Universität Stuttgart</affiliation></author>
       <author><first>Lynn</first><last>Greschner</last><affiliation>Otto-Friedrich Universität Bamberg</affiliation></author>
       <author><first>Yarik</first><last>Menchaca Resendiz</last></author>
       <author><first>Roman</first><last>Klinger</last><affiliation>Otto-Friedrich Universität Bamberg</affiliation></author>
@@ -294,7 +294,7 @@
       <title><fixed-case>ADE</fixed-case> Oracle at #<fixed-case>SMM</fixed-case>4<fixed-case>H</fixed-case> 2024: A Two-Stage <fixed-case>NLP</fixed-case> System for Extracting and Normalizing Adverse Drug Events from Tweets</title>
       <author><first>Andrew</first><last>Davis</last><affiliation>Indiana University</affiliation></author>
       <author><first>Billy</first><last>Dickson</last><affiliation>Indiana University</affiliation></author>
-      <author><first>Sandra</first><last>Kübler</last><affiliation>Indiana University at Bloomington</affiliation></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last><affiliation>Indiana University at Bloomington</affiliation></author>
       <pages>117-120</pages>
       <abstract>This study describes the approach of Team ADE Oracle for Task 1 of the Social Media Mining for Health Applications (#SMM4H) 2024 shared task. Task 1 challenges participants to detect adverse drug events (ADEs) within English tweets and normalize these mentions against the Medical Dictionary for Regulatory Activities standards. Our approach utilized a two-stage NLP pipeline consisting of a named entity recognition model, retrained to recognize ADEs, followed by vector similarity assessment with a RoBERTa-based model. Despite achieving a relatively high recall of 37.4% in the extraction of ADEs, indicative of effective identification of potential ADEs, our model encountered challenges with precision. We found marked discrepancies between recall and precision between the test set and our validation set, which underscores the need for further efforts to prevent overfitting and enhance the model’s generalization capabilities for practical applications.</abstract>
       <url hash="97a4da5a">2024.smm4h-1.27</url>
@@ -353,7 +353,7 @@
     <paper id="33">
       <title><fixed-case>KUL</fixed-case>@<fixed-case>SMM</fixed-case>4<fixed-case>H</fixed-case>2024: Optimizing Text Classification with Quality-Assured Augmentation Strategies</title>
       <author><first>Sumam</first><last>Francis</last><affiliation>KU Leuven, KU Leuven</affiliation></author>
-      <author><first>Marie-Francine</first><last>Moens</last><affiliation>KU Leuven, KU Leuven</affiliation></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last><affiliation>KU Leuven, KU Leuven</affiliation></author>
       <pages>142-145</pages>
       <abstract>This paper presents our models for the Social Media Mining for Health 2024 shared task, specifically Task 5, which involves classifying tweets reporting a child with childhood disorders (annotated as “1”) versus those merely mentioning a disorder (annotated as “0”). We utilized a classification model enhanced with diverse textual and language model-based augmentations. To ensure quality, we used semantic similarity, perplexity, and lexical diversity as evaluation metrics. Combining supervised contrastive learning and cross-entropy-based learning, our best model, incorporating R-drop and various LM generation-based augmentations, achieved an impressive F1 score of 0.9230 on the test set, surpassing the task mean and median scores.</abstract>
       <url hash="eb0ba970">2024.smm4h-1.33</url>
@@ -425,7 +425,7 @@
       <author><first>Shoko</first><last>Wakamiya</last><affiliation>Nara Institute of Science and Technology</affiliation></author>
       <author><first>Eiji</first><last>Aramaki</last><affiliation>Nara Institute of Science and Technology, Japan</affiliation></author>
       <author><first>Sebastian</first><last>Möller</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last><affiliation>LISN, CNRS, Université Paris-Saclay</affiliation></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last><affiliation>LISN, CNRS, Université Paris-Saclay</affiliation></author>
       <pages>170-182</pages>
       <abstract>This paper provides an overview of Task 2 from the Social Media Mining for Health 2024 shared task (#SMM4H 2024), which focused on Named Entity Recognition (NER, Subtask 2a) and the joint task of NER and Relation Extraction (RE, Subtask 2b) for detecting adverse drug reactions (ADRs) in German, Japanese, and French texts written by patients. Participants were challenged with a few-shot learning scenario, necessitating models that can effectively generalize from limited annotated examples. Despite the diverse strategies employed by the participants, the overall performance across submissions from three teams highlighted significant challenges. The results underscored the complexity of extracting entities and relations in multi-lingual contexts, especially from the noisy and informal nature of user-generated content. Further research is required to develop robust systems capable of accurately identifying and associating ADR-related information in low-resource and multilingual settings.</abstract>
       <url hash="31380fa2">2024.smm4h-1.39</url>
@@ -441,7 +441,7 @@
       <author><first>Eiji</first><last>Aramaki</last><affiliation>Nara Institute of Science and Technology, Japan</affiliation></author>
       <author><first>Shoko</first><last>Wakamiya</last><affiliation>Nara Institute of Science and Technology</affiliation></author>
       <author><first>Shuntaro</first><last>Yada</last><affiliation>Nara Institute of Science and Technology, Japan</affiliation></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last><affiliation>LISN, CNRS, Université Paris-Saclay</affiliation></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last><affiliation>LISN, CNRS, Université Paris-Saclay</affiliation></author>
       <author><first>Karen</first><last>O’Connor</last><affiliation>University of Pennsylvania, University of Pennsylvania</affiliation></author>
       <author><first>Sai</first><last>Samineni</last><affiliation>Cedars-Sinai Medical Center</affiliation></author>
       <author><first>Sophia</first><last>Hernandez</last><affiliation>University of Pittsburgh, Pittsburgh</affiliation></author>
@@ -455,7 +455,7 @@
       <author><first>Raul</first><last>Rodriguez-Esteban</last><affiliation>F. Hoffmann-La Roche Ltd</affiliation></author>
       <author><first>Juan</first><last>Banda</last><affiliation>Stanford University</affiliation></author>
       <author><first>Ivan</first><last>Amaro</last><affiliation>Cedars-Sinai Medical Center</affiliation></author>
-      <author><first>Davy</first><last>Weissenbacher</last></author>
+      <author id="davy-weissenbacher"><first>Davy</first><last>Weissenbacher</last></author>
       <author><first>Graciela</first><last>Gonzalez-Hernandez</last><affiliation>Cedars-Sinai Medical Center</affiliation></author>
       <pages>183-195</pages>
       <abstract>For the past nine years, the Social Media Mining for Health Applications (#SMM4H) shared tasks have promoted community-driven development and evaluation of advanced natural language processing systems to detect, extract, and normalize health-related information in publicly available user-generated content. This year, #SMM4H included seven shared tasks in English, Japanese, German, French, and Spanish from Twitter, Reddit, and health forums. A total of 84 teams from 22 countries registered for #SMM4H, and 45 teams participated in at least one task. This represents a growth of 180% and 160% in registration and participation, respectively, compared to the last iteration. This paper provides an overview of the tasks and participating systems. The data sets remain available upon request, and new systems can be evaluated through the post-evaluation phase on CodaLab.</abstract>
diff --git a/data/xml/2024.starsem.xml b/data/xml/2024.starsem.xml
index 2fe71fa2d6..4ca9427998 100644
--- a/data/xml/2024.starsem.xml
+++ b/data/xml/2024.starsem.xml
@@ -20,7 +20,7 @@
       <title><fixed-case>MASSIVE</fixed-case> Multilingual <fixed-case>A</fixed-case>bstract <fixed-case>M</fixed-case>eaning <fixed-case>R</fixed-case>epresentation: A Dataset and Baselines for Hallucination Detection</title>
       <author><first>Michael</first><last>Regan</last><affiliation>Paul G. Allen School of Computer Science &amp; Engineering</affiliation></author>
       <author><first>Shira</first><last>Wein</last><affiliation>Georgetown University</affiliation></author>
-      <author><first>George</first><last>Baker</last><affiliation>University of Colorado Boulder</affiliation></author>
+      <author id="george-baker"><first>George</first><last>Baker</last><affiliation>University of Colorado Boulder</affiliation></author>
       <author><first>Emilio</first><last>Monti</last><affiliation>Amazon</affiliation></author>
       <pages>1-17</pages>
       <abstract>Abstract Meaning Representation (AMR) is a semantic formalism that captures the core meaning of an utterance. There has been substantial work developing AMR corpora in English and more recently across languages, though the limited size of existing datasets and the cost of collecting more annotations are prohibitive. With both engineering and scientific questions in mind, we introduce MASSIVE-AMR, a dataset with more than 84,000 text-to-graph annotations, currently the largest and most diverse of its kind: AMR graphs for 1,685 information-seeking utterances mapped to 50+ typologically diverse languages. We describe how we built our resource and its unique features before reporting on experiments using large language models for multilingual AMR and SPARQL parsing as well as applying AMRs for hallucination detection in the context of knowledge base question answering, with results shedding light on persistent issues using LLMs for structured parsing.</abstract>
@@ -30,7 +30,7 @@
     </paper>
     <paper id="2">
       <title>How Does Stereotype Content Differ across Data Sources?</title>
-      <author><first>Kathleen</first><last>Fraser</last><affiliation>National Research Council Canada</affiliation></author>
+      <author id="kathleen-c-fraser"><first>Kathleen</first><last>Fraser</last><affiliation>National Research Council Canada</affiliation></author>
       <author><first>Svetlana</first><last>Kiritchenko</last><affiliation>National Research Council Canada</affiliation></author>
       <author><first>Isar</first><last>Nejadgholi</last><affiliation>National Research Council Canada</affiliation></author>
       <pages>18-34</pages>
@@ -44,7 +44,7 @@
       <title>Polysemy through the lens of psycholinguistic variables: a dataset and an evaluation of static and contextualized language models</title>
       <author><first>Andrea</first><last>Bruera</last><affiliation>Max Planck Institute for Human Cognitive and Brain Sciences</affiliation></author>
       <author><first>Farbod</first><last>Zamani</last><affiliation>University of London</affiliation></author>
-      <author><first>Massimo</first><last>Poesio</last><affiliation>Queen Mary University of London</affiliation></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last><affiliation>Queen Mary University of London</affiliation></author>
       <pages>35-48</pages>
       <abstract>Polysemes are words that can have different senses depending on the context of utterance: for instance, ‘newspaper’ can refer to an organization (as in ‘manage the newspaper’) or to an object (as in ‘open the newspaper’). Contrary to a large body of evidence coming from psycholinguistics, polysemy has been traditionally modelled in NLP by assuming that each sense should be given a separate representation in a lexicon (e.g. WordNet). This led to the current situation, where datasets used to evaluate the ability of computational models of semantics miss crucial details about the representation of polysemes, thus limiting the amount of evidence that can be gained from their use. In this paper we propose a framework to approach polysemy as a continuous variation in psycholinguistic properties of a word in context. This approach accommodates different sense interpretations, without postulating clear-cut jumps between senses. First we describe a publicly available English dataset that we collected, where polysemes in context (verb-noun phrases) are annotated for their concreteness and body sensory strength. Then, we evaluate static and contextualized language models in their ability to predict the ratings of each polyseme in context, as well as in their ability to capture the distinction among senses, revealing and characterizing in an interpretable way the models’ flaws.</abstract>
       <url hash="c43c6a28">2024.starsem-1.3</url>
@@ -80,7 +80,7 @@
     <paper id="6">
       <title><fixed-case>ROUGE</fixed-case>-K: Do Your Summaries Have Keywords?</title>
       <author><first>Sotaro</first><last>Takeshita</last><affiliation>University of Mannheim</affiliation></author>
-      <author><first>Simone</first><last>Ponzetto</last><affiliation>University of Mannheim</affiliation></author>
+      <author id="simone-paolo-ponzetto"><first>Simone</first><last>Ponzetto</last><affiliation>University of Mannheim</affiliation></author>
       <author><first>Kai</first><last>Eckert</last><affiliation>Mannheim University of Applied Sciences</affiliation></author>
       <pages>69-79</pages>
       <abstract>Keywords, that is, content-relevant words in summaries play an important role in efficient information conveyance, making it critical to assess if system-generated summaries contain such informative words during evaluation. However, existing evaluation metrics for extreme summarization models do not pay explicit attention to keywords in summaries, leaving developers ignorant of their presence. To address this issue, we present a keyword-oriented evaluation metric, dubbed ROUGE-K, which provides a quantitative answer to the question of – How well do summaries include keywords? Through the lens of this keyword-aware metric, we surprisingly find that a current strong baseline model often misses essential information in their summaries. Our analysis reveals that human annotators indeed find the summaries with more keywords to be more relevant to the source documents. This is an important yet previously overlooked aspect in evaluating summarization systems. Finally, to enhance keyword inclusion, we propose four approaches for incorporating word importance into a transformer-based model and experimentally show that it enables guiding models to include more keywords while keeping the overall quality.</abstract>
@@ -107,7 +107,7 @@
       <author><first>Ronja</first><last>Utescher</last><affiliation>University of Bielefeld</affiliation></author>
       <author><first>Hannes</first><last>Grönner</last><affiliation>University of Bielefeld</affiliation></author>
       <author><first>Judith</first><last>Sieker</last><affiliation>University of Bielefeld</affiliation></author>
-      <author><first>Sina</first><last>Zarrieß</last><affiliation>University of Bielefeld</affiliation></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last><affiliation>University of Bielefeld</affiliation></author>
       <pages>93-105</pages>
       <abstract>Research in Language &amp; Vision rarely uses naturally occurring multimodal documents as Wikipedia articles, since they feature complex image-text relations and implicit image-text alignments. In this paper, we provide one of the first datasets that provides ground-truth annotations of image-text alignments in multi-paragraph multi-image articles. The dataset can be used to study phenomena of visual language grounding in longer documents and assess retrieval capabilities of language models trained on, e.g., captioning data. Our analyses show that there are systematic linguistic differences between the image captions and descriptive sentences from the article’s text and that intra-document retrieval is a challenging task for state-of-the-art models in L&amp;V (CLIP, VILT, MCSE).</abstract>
       <url hash="3967c5d3">2024.starsem-1.8</url>
@@ -170,7 +170,7 @@
       <title>A Closer Look at Claim Decomposition</title>
       <author><first>Miriam</first><last>Wanner</last><affiliation>Johns Hopkins University</affiliation></author>
       <author><first>Seth</first><last>Ebner</last><affiliation>Johns Hopkins University</affiliation></author>
-      <author><first>Zhengping</first><last>Jiang</last><affiliation>Johns Hopkins University</affiliation></author>
+      <author id="zheng-ping-jiang"><first>Zhengping</first><last>Jiang</last><affiliation>Johns Hopkins University</affiliation></author>
       <author><first>Mark</first><last>Dredze</last><affiliation>Johns Hopkins University</affiliation></author>
       <author><first>Benjamin</first><last>Van Durme</last><affiliation>Johns Hopkins University</affiliation></author>
       <pages>153-175</pages>
@@ -213,7 +213,7 @@
     <paper id="17">
       <title><fixed-case>PDDLEGO</fixed-case>: Iterative Planning in Textual Environments</title>
       <author id="li-zhang-upenn"><first>Li</first><last>Zhang</last><affiliation>University of Pennsylvania</affiliation></author>
-      <author><first>Peter</first><last>Jansen</last><affiliation>The University of Arizona</affiliation></author>
+      <author id="peter-jansen"><first>Peter</first><last>Jansen</last><affiliation>The University of Arizona</affiliation></author>
       <author><first>Tianyi</first><last>Zhang</last><affiliation>University of Pennsylvania</affiliation></author>
       <author><first>Peter</first><last>Clark</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Chris</first><last>Callison-Burch</last><affiliation>University of Pennsylvania</affiliation></author>
@@ -229,8 +229,8 @@
     <paper id="18">
       <title><fixed-case>VOLIMET</fixed-case>: A Parallel Corpus of Literal and Metaphorical Verb-Object Pairs for <fixed-case>E</fixed-case>nglish–<fixed-case>G</fixed-case>erman and <fixed-case>E</fixed-case>nglish–<fixed-case>F</fixed-case>rench</title>
       <author><first>Prisca</first><last>Piccirilli</last><affiliation>University of Stuttgart</affiliation></author>
-      <author><first>Alexander</first><last>Fraser</last><affiliation>Technical University of Munich and Munich Centre for Machine Learning</affiliation></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last><affiliation>University of Stuttgart</affiliation></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last><affiliation>Technical University of Munich and Munich Centre for Machine Learning</affiliation></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last><affiliation>University of Stuttgart</affiliation></author>
       <pages>222-237</pages>
       <abstract>The interplay of cultural and linguistic elements that characterizes metaphorical language poses a substantial challenge for both human comprehension and machine processing. This challenge goes beyond monolingual settings and becomes particularly complex in translation, even more so in automatic translation. We present VOLIMET, a corpus of 2,916 parallel sentences containing gold standard alignments of metaphorical verb-object pairs and their literal paraphrases, e.g., tackle/address question, from English to German and French. On the one hand, the parallel nature of our corpus enables us to explore monolingual patterns for metaphorical vs. literal uses in English. On the other hand, we investigate different aspects of cross-lingual translations into German and French and the extent to which metaphoricity and literalness in the source language are transferred to the target languages. Monolingually, our findings reveal clear preferences in using metaphorical or literal uses of verb-object pairs. Cross-lingually, we observe a rich variability in translations as well as different behaviors for our two target languages.</abstract>
       <url hash="92321cb5">2024.starsem-1.18</url>
@@ -242,7 +242,7 @@
       <author><first>Deniz Ekin</first><last>Yavas</last><affiliation>Heinrich Heine University Düsseldorf</affiliation></author>
       <author><first>Timothée</first><last>Bernard</last><affiliation>Université Paris Cité</affiliation></author>
       <author><first>Laura</first><last>Kallmeyer</last><affiliation>Heinrich Heine University Düsseldorf</affiliation></author>
-      <author><first>Benoît</first><last>Crabbé</last><affiliation>Université Paris Cité</affiliation></author>
+      <author id="benoit-crabbe"><first>Benoît</first><last>Crabbé</last><affiliation>Université Paris Cité</affiliation></author>
       <pages>238-251</pages>
       <abstract>This paper addresses the problem of word sense induction (WSI) via clustering of word embeddings. It starts from the hypothesis that contextualized word representations obtained from pre-trained language models (LMs), while being a valuable source for WSI, encode more information than what is necessary for the identification of word senses and some of this information affect the performance negatively in unsupervised settings. We investigate whether using contextualized representations that are invariant to these ‘nuisance features’ can increase WSI performance. For this purpose, we propose an adaptation of the adversarial training framework proposed by Jaiswal et al. (2020) to erase specific information from the representations of LMs, thereby creating feature-invariant representations. We experiment with erasing (i) morphological and (ii) syntactic features. The results of subsequent clustering for WSI show that these features indeed act like noise: Using feature-invariant representations, compared to using the original representations, increases clustering-based WSI performance. Furthermore, we provide an in-depth analysis of how the information about the syntactic and morphological features of words relate to and affect WSI performance.</abstract>
       <url hash="cd13d58b">2024.starsem-1.19</url>
@@ -253,7 +253,7 @@
       <title>What’s wrong with your model? A Quantitative Analysis of Relation Classification</title>
       <author><first>Elisa</first><last>Bassignana</last><affiliation>IT University of Copenhagen and Pioneer Center for Artificial Intelligence</affiliation></author>
       <author><first>Rob</first><last>van der Goot</last><affiliation>IT University of Copenhagen and Pioneer Center for Artificial Intelligence</affiliation></author>
-      <author><first>Barbara</first><last>Plank</last><affiliation>IT University of Copenhagen and LMU Munich</affiliation></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last><affiliation>IT University of Copenhagen and LMU Munich</affiliation></author>
       <pages>252-263</pages>
       <abstract>With the aim of improving the state-of-the-art (SOTA) on a target task, a standard strategy in Natural Language Processing (NLP) research is to design a new model, or modify the existing SOTA, and then benchmark its performance on the target task. We argue in favor of enriching this chain of actions by a preliminary error-guided analysis: First, explore weaknesses by analyzing the hard cases where the existing model fails, and then target the improvement based on those. Interpretable evaluation has received little attention for structured prediction tasks. Therefore we propose the first in-depth analysis suite for Relation Classification (RC), and show its effectiveness through a case study. We propose a set of potentially influential attributes to focus on (e.g., entity distance, sentence length). Then, we bucket our datasets based on these attributes, and weight the importance of them through correlations. This allows us to identify highly challenging scenarios for the RC model. By exploiting the findings of our analysis, with a carefully targeted adjustment to our architecture, we effectively improve the performance over the baseline by &gt;3 Micro-F1.</abstract>
       <url hash="94fca07c">2024.starsem-1.20</url>
@@ -299,7 +299,7 @@
     <paper id="24">
       <title>Multilingual and Code-Switched Sentence Ordering</title>
       <author><first>Alexandre</first><last>Salle</last><affiliation>VTEX</affiliation></author>
-      <author><first>Shervin</first><last>Malmasi</last><affiliation>Amazon.com, Inc.</affiliation></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last><affiliation>Amazon.com, Inc.</affiliation></author>
       <pages>308-313</pages>
       <abstract>Sentence Ordering (SO) is a linguistic task which requires re-ordering of shuffled sentences into a coherent paragraph. SO has downstream applications, but also serves as a semantic probe for computational models as this capability is essential for understanding narrative structures, causal and temporal relations within texts. Despite its importance, prior research has been limited to predictable English language structures and has not thoroughly addressed the complexities of multilingual and varied narrative contexts. To fill this gap, we introduce a novel and comprehensive Multilingual Sentence Ordering task that extends SO to diverse narratives across 12 languages, including challenging code-switched texts. We have developed MultiSO, a new benchmark dataset that represents these challenges. Our findings reveal that both specialized sentence ordering models and advanced Large Language Models like GPT-4 face significant challenges with this task.</abstract>
       <url hash="2bfc178a">2024.starsem-1.24</url>
@@ -309,7 +309,7 @@
     <paper id="25">
       <title><fixed-case>HANS</fixed-case>, are you clever? Clever Hans Effect Analysis of Neural Systems</title>
       <author><first>Leonardo</first><last>Ranaldi</last><affiliation>Università degli studi Roma Tor Vergata</affiliation></author>
-      <author><first>Fabio</first><last>Zanzotto</last><affiliation>Università degli studi Roma Tor Vergata</affiliation></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio</first><last>Zanzotto</last><affiliation>Università degli studi Roma Tor Vergata</affiliation></author>
       <pages>314-325</pages>
       <abstract>Large Language Models (LLMs) have been exhibiting outstanding abilities to reason around cognitive states, intentions, and reactions of all people involved, letting humans guide and comprehend day-to-day social interactions effectively. In fact, several multiple-choice questions (MCQ) benchmarks have been proposed to construct solid assessments of the models’ abilities. However, earlier works demonstrate the presence of inherent “order bias” in LLMs, posing challenges to the appropriate evaluation. In this paper, we investigate LLMs’ resilience abilities through a series of probing tests using four MCQ benchmarks. Introducing adversarial examples, we show a significant performance gap, mainly when varying the order of the choices, which reveals a selection bias and brings into discussion reasoning abilities. Following a correlation between first positions and model choices due to positional bias, we hypothesized the presence of structural heuristics in the decision-making process of the LLMs, strengthened by including significant examples in few-shot scenarios. Finally, by using the Chain-of-Thought (CoT) technique, we elicit the model to reason and mitigate the bias by obtaining more robust models.</abstract>
       <url hash="926bc2d8">2024.starsem-1.25</url>
@@ -368,7 +368,7 @@
       <author><first>Elena Sofia</first><last>Ruzzetti</last><affiliation>University of Rome Tor Vergata</affiliation></author>
       <author><first>Davide</first><last>Venditti</last><affiliation>University of Rome Tor Vergata</affiliation></author>
       <author><first>Dario</first><last>Onorati</last><affiliation>Sapienza University of Rome</affiliation></author>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last><affiliation>University of Rome Tor Vergata</affiliation></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last><affiliation>University of Rome Tor Vergata</affiliation></author>
       <pages>372-384</pages>
       <abstract>Cheap-to-Build Very Large-Language Models (CtB-LLMs) with affordable training are emerging as the next big revolution in natural language processing and understanding. These CtB-LLMs are democratizing access to trainable Very Large-Language Models (VLLMs) and, thus, may represent the building blocks of many NLP systems solving downstream tasks. Hence, a little or a large bias in CtB-LLMs may cause huge harm. In this paper, we performed a large investigation of the bias of three families of CtB-LLMs, and we showed that debiasing techniques are effective and usable. Indeed, according to current tests, the LLaMA and the OPT families have an important bias in gender, race, religion, and profession. In contrast to the analysis for other LMMs, we discovered that bias depends not on the number of parameters but on the perplexity. Finally, the debiasing of OPT using LORA reduces bias up to 4.12 points in the normalized stereotype score.</abstract>
       <url hash="4ba0e749">2024.starsem-1.30</url>
diff --git a/data/xml/2024.swisstext.xml b/data/xml/2024.swisstext.xml
index 175a4805a8..72e577c22e 100644
--- a/data/xml/2024.swisstext.xml
+++ b/data/xml/2024.swisstext.xml
@@ -160,7 +160,7 @@
       <author><first>Vuk</first><last>Vuković</last></author>
       <author><first>Daniel</first><last>Dobos</last></author>
       <author><first>Fatemeh</first><last>Borran</last></author>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <pages>164–164</pages>
       <url hash="1f91fb48">2024.swisstext-1.16</url>
       <bibkey>ferrari-etal-2024-llm</bibkey>
@@ -169,7 +169,7 @@
       <title>Annotation Tool for Dataset Creation</title>
       <author><first>Patrick</first><last>Giedemann</last></author>
       <author><first>Pius</first><last>von Däniken</last></author>
-      <author><first>Jan Milan</first><last>Deriu</last></author>
+      <author id="jan-milan-deriu"><first>Jan Milan</first><last>Deriu</last></author>
       <pages>165–165</pages>
       <url hash="cf439981">2024.swisstext-1.17</url>
       <bibkey>giedemann-etal-2024-annotation</bibkey>
@@ -194,7 +194,7 @@
     </paper>
     <paper id="20">
       <title><fixed-case>C</fixed-case>ha<fixed-case>LL</fixed-case> - A Chatbot for Language Learners</title>
-      <author><first>Manuela</first><last>Hürlimann</last></author>
+      <author id="manuela-huerlimann"><first>Manuela</first><last>Hürlimann</last></author>
       <author><first>Luzia</first><last>Sauer</last></author>
       <author><first>Gerold</first><last>Schneider</last></author>
       <author><first>Johannes</first><last>Graën</last></author>
@@ -203,7 +203,7 @@
       <author><first>Katsiaryna</first><last>Mlynchyk</last></author>
       <author><first>Ahmet Yavuz</first><last>Uluslu</last></author>
       <author><first>Irina-Camelia</first><last>Stroescu</last></author>
-      <author><first>Jan</first><last>Deriu</last></author>
+      <author id="jan-milan-deriu"><first>Jan</first><last>Deriu</last></author>
       <author><first>Michael</first><last>Geiss</last></author>
       <author><first>Mark</first><last>Cieliebak</last></author>
       <pages>168–168</pages>
@@ -401,9 +401,9 @@
     </paper>
     <paper id="40">
       <title><fixed-case>S</fixed-case>wiss <fixed-case>AI</fixed-case> Initiative - Collecting Large Amounts of High-Quality Data for Training Large Language Models</title>
-      <author><first>Jan</first><last>Deriu</last></author>
+      <author id="jan-milan-deriu"><first>Jan</first><last>Deriu</last></author>
       <author><first>Maud</first><last>Ehrmann</last></author>
-      <author><first>Emanuela</first><last>Boros</last></author>
+      <author id="emanuela-boros"><first>Emanuela</first><last>Boros</last></author>
       <author><first>Maximilian</first><last>Böther</last></author>
       <author><first>Christiane</first><last>Sibille</last></author>
       <author><first>Ihor</first><last>Protsenko</last></author>
@@ -561,7 +561,7 @@
     </paper>
     <paper id="57">
       <title>Battle of <fixed-case>NLP</fixed-case> Ideas</title>
-      <author><first>Manuela</first><last>Hürlimann</last></author>
+      <author id="manuela-huerlimann"><first>Manuela</first><last>Hürlimann</last></author>
       <author><first>Katsiaryna</first><last>Mlynchyk</last></author>
       <author><first>Philipp</first><last>Kuntschik</last></author>
       <pages>248–248</pages>
diff --git a/data/xml/2024.tacl.xml b/data/xml/2024.tacl.xml
index f1a1fd912c..3b0da153f3 100644
--- a/data/xml/2024.tacl.xml
+++ b/data/xml/2024.tacl.xml
@@ -40,8 +40,8 @@
       <author><first>Faisal</first><last>Ladhak</last></author>
       <author><first>Esin</first><last>Durmus</last></author>
       <author><first>Percy</first><last>Liang</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
-      <author><first>Tatsunori B.</first><last>Hashimoto</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
+      <author id="tatsunori-b-hashimoto"><first>Tatsunori B.</first><last>Hashimoto</last></author>
       <doi>10.1162/tacl_a_00632</doi>
       <abstract>Large language models (LLMs) have shown promise for automatic summarization but the reasons behind their successes are poorly understood. By conducting a human evaluation on ten LLMs across different pretraining methods, prompts, and model scales, we make two important observations. First, we find instruction tuning, not model size, is the key to the LLM’s zero-shot summarization capability. Second, existing studies have been limited by low-quality references, leading to underestimates of human performance and lower few-shot and finetuning performance. To better evaluate LLMs, we perform human evaluation over high-quality summaries we collect from freelance writers. Despite major stylistic differences such as the amount of paraphrasing, we find that LLM summaries are judged to be on par with human written summaries.</abstract>
       <pages>39–57</pages>
@@ -96,7 +96,7 @@
     </paper>
     <paper id="7">
       <title>Addressing the Binning Problem in Calibration Assessment through Scalar Annotations</title>
-      <author><first>Zhengping</first><last>Jiang</last></author>
+      <author id="zheng-ping-jiang"><first>Zhengping</first><last>Jiang</last></author>
       <author><first>Anqi</first><last>Liu</last></author>
       <author><first>Benjamin</first><last>Van Durme</last></author>
       <doi>10.1162/tacl_a_00636</doi>
@@ -125,7 +125,7 @@
     </paper>
     <paper id="9">
       <title>Lost in the Middle: How Language Models Use Long Contexts</title>
-      <author><first>Nelson F.</first><last>Liu</last></author>
+      <author id="nelson-f-liu"><first>Nelson F.</first><last>Liu</last></author>
       <author><first>Kevin</first><last>Lin</last></author>
       <author><first>John</first><last>Hewitt</last></author>
       <author><first>Ashwin</first><last>Paranjape</last></author>
@@ -215,7 +215,7 @@
       <title>Explicitly Representing Syntax Improves Sentence-to-Layout Prediction of Unexpected Situations</title>
       <author><first>Wolf</first><last>Nuyts</last></author>
       <author><first>Ruben</first><last>Cartuyvels</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <doi>10.1162/tacl_a_00643</doi>
       <abstract>Recognizing visual entities in a natural language sentence and arranging them in a 2D spatial layout require a compositional understanding of language and space. This task of layout prediction is valuable in text-to-image synthesis as it allows localized and controlled in-painting of the image. In this comparative study it is shown that we can predict layouts from language representations that implicitly or explicitly encode sentence syntax, if the sentences mention similar entity-relationships to the ones seen during training. To test compositional understanding, we collect a test set of grammatically correct sentences and layouts describing compositions of entities and relations that unlikely have been seen during training. Performance on this test set substantially drops, showing that current models rely on correlations in the training data and have difficulties in understanding the structure of the input sentences. We propose a novel structural loss function that better enforces the syntactic structure of the input sentence and show large performance gains in the task of 2D spatial layout prediction conditioned on text. The loss has the potential to be used in other generation tasks where a tree-like structure underlies the conditioning modality. Code, trained models, and the USCOCO evaluation set are available via Github.1</abstract>
       <pages>264–282</pages>
@@ -239,7 +239,7 @@
     </paper>
     <paper id="17">
       <title>The Impact of Word Splitting on the Semantic Content of Contextualized Word Representations</title>
-      <author><first>Aina Garí</first><last>Soler</last></author>
+      <author id="aina-gari-soler"><first>Aina Garí</first><last>Soler</last></author>
       <author><first>Matthieu</first><last>Labeau</last></author>
       <author><first>Chloé</first><last>Clavel</last></author>
       <doi>10.1162/tacl_a_00647</doi>
@@ -301,7 +301,7 @@
       <author><first>Lukas</first><last>Edman</last></author>
       <author><first>Gabriele</first><last>Sarti</last></author>
       <author><first>Antonio</first><last>Toral</last></author>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <author><first>Arianna</first><last>Bisazza</last></author>
       <doi>10.1162/tacl_a_00651</doi>
       <abstract>Pretrained character-level and byte-level language models have been shown to be competitive with popular subword models across a range of Natural Language Processing tasks. However, there has been little research on their effectiveness for neural machine translation (NMT), particularly within the popular pretrain-then-finetune paradigm. This work performs an extensive comparison across multiple languages and experimental conditions of character- and subword-level pretrained models (ByT5 and mT5, respectively) on NMT. We show the effectiveness of character-level modeling in translation, particularly in cases where fine-tuning data is limited. In our analysis, we show how character models’ gains in translation quality are reflected in better translations of orthographically similar words and rare words. While evaluating the importance of source texts in driving model predictions, we highlight word-level patterns within ByT5, suggesting an ability to modulate word-level and character-level information during generation. We conclude by assessing the efficiency tradeoff of byte models, suggesting their usage in non-time-critical scenarios to boost translation quality.</abstract>
@@ -314,8 +314,8 @@
       <author><first>Valentin</first><last>Hofmann</last></author>
       <author><first>Goran</first><last>Glavaš</last></author>
       <author><first>Nikola</first><last>Ljubešić</last></author>
-      <author><first>Janet B.</first><last>Pierrehumbert</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="janet-pierrehumbert"><first>Janet B.</first><last>Pierrehumbert</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <doi>10.1162/tacl_a_00652</doi>
       <abstract>While pretrained language models (PLMs) have been shown to possess a plethora of linguistic knowledge, the existing body of research has largely neglected extralinguistic knowledge, which is generally difficult to obtain by pretraining on text alone. Here, we contribute to closing this gap by examining geolinguistic knowledge, i.e., knowledge about geographic variation in language. We introduce geoadaptation, an intermediate training step that couples language modeling with geolocation prediction in a multi-task learning setup. We geoadapt four PLMs, covering language groups from three geographic areas, and evaluate them on five different tasks: fine-tuned (i.e., supervised) geolocation prediction, zero-shot (i.e., unsupervised) geolocation prediction, fine-tuned language identification, zero-shot language identification, and zero-shot prediction of dialect features. Geoadaptation is very successful at injecting geolinguistic knowledge into the PLMs: The geoadapted PLMs consistently outperform PLMs adapted using only language modeling (by especially wide margins on zero-shot prediction tasks), and we obtain new state-of-the-art results on two benchmarks for geolocation prediction and language identification. Furthermore, we show that the effectiveness of geoadaptation stems from its ability to geographically retrofit the representation space of the PLMs.</abstract>
       <pages>411–431</pages>
@@ -347,7 +347,7 @@
     <paper id="26">
       <title><fixed-case>C</fixed-case>onvo<fixed-case>S</fixed-case>ense: Overcoming Monotonous Commonsense Inferences for Conversational <fixed-case>AI</fixed-case></title>
       <author><first>Sarah E.</first><last>Finch</last></author>
-      <author><first>Jinho D.</first><last>Choi</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
       <doi>10.1162/tacl_a_00659</doi>
       <abstract>Mastering commonsense understanding and reasoning is a pivotal skill essential for conducting engaging conversations. While there have been several attempts to create datasets that facilitate commonsense inferences in dialogue contexts, existing datasets tend to lack in-depth details, restate information already present in the conversation, and often fail to capture the multifaceted nature of commonsense reasoning. In response to these limitations, we compile a new synthetic dataset for commonsense reasoning in dialogue contexts using GPT, ℂonvoSense, that boasts greater contextual novelty, offers a higher volume of inferences per example, and substantially enriches the detail conveyed by the inferences. Our dataset contains over 500,000 inferences across 12,000 dialogues with 10 popular inference types, which empowers the training of generative commonsense models for dialogue that are superior in producing plausible inferences with high novelty when compared to models trained on the previous datasets. To the best of our knowledge, ℂonvoSense is the first of its kind to provide such a multitude of novel inferences at such a large scale.</abstract>
       <pages>467–483</pages>
@@ -374,7 +374,7 @@
       <author><first>Jiseon</first><last>Kim</last></author>
       <author><first>Nayeon</first><last>Lee</last></author>
       <author><first>Haneul</first><last>Yoo</last></author>
-      <author><first>Alice</first><last>Oh</last></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last></author>
       <author><first>Hwaran</first><last>Lee</last></author>
       <doi>10.1162/tacl_a_00661</doi>
       <abstract>Warning: This paper contains examples of stereotypes and biases. The Bias Benchmark for Question Answering (BBQ) is designed to evaluate social biases of language models (LMs), but it is not simple to adapt this benchmark to cultural contexts other than the US because social biases depend heavily on the cultural context. In this paper, we present KoBBQ, a Korean bias benchmark dataset, and we propose a general framework that addresses considerations for cultural adaptation of a dataset. Our framework includes partitioning the BBQ dataset into three classes—Simply-Transferred (can be used directly after cultural translation), Target-Modified (requires localization in target groups), and Sample-Removed (does not fit Korean culture)—and adding four new categories of bias specific to Korean culture. We conduct a large-scale survey to collect and validate the social biases and the targets of the biases that reflect the stereotypes in Korean culture. The resulting KoBBQ dataset comprises 268 templates and 76,048 samples across 12 categories of social bias. We use KoBBQ to measure the accuracy and bias scores of several state-of-the-art multilingual LMs. The results clearly show differences in the bias of LMs as measured by KoBBQ and a machine-translated version of BBQ, demonstrating the need for and utility of a well-constructed, culturally aware social bias benchmark.</abstract>
@@ -427,7 +427,7 @@
       <author><first>Hao</first><last>Zhou</last></author>
       <author><first>Shujian</first><last>Huang</last></author>
       <author><first>Shanbo</first><last>Cheng</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
       <doi>10.1162/tacl_a_00655</doi>
       <abstract>Large-scale pretrained language models (LLMs), such as ChatGPT and GPT4, have shown strong abilities in multilingual translation, without being explicitly trained on parallel corpora. It is intriguing how the LLMs obtain their ability to carry out translation instructions for different languages. In this paper, we present a detailed analysis by finetuning a multilingual pretrained language model, XGLM-7.5B, to perform multilingual translation following given instructions. Firstly, we show that multilingual LLMs have stronger translation abilities than previously demonstrated. For a certain language, the translation performance depends on its similarity to English and the amount of data used in the pretraining phase. Secondly, we find that LLMs’ ability to carry out translation instructions relies on the understanding of translation instructions and the alignment among different languages. With multilingual finetuning with translation instructions, LLMs could learn to perform the translation task well even for those language pairs unseen during the instruction tuning phase.</abstract>
       <pages>576–592</pages>
@@ -437,7 +437,7 @@
     <paper id="33">
       <title>Semantics of Multiword Expressions in Transformer-Based Models: A Survey</title>
       <author><first>Filip</first><last>Miletić</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <doi>10.1162/tacl_a_00657</doi>
       <abstract>Multiword expressions (MWEs) are composed of multiple words and exhibit variable degrees of compositionality. As such, their meanings are notoriously difficult to model, and it is unclear to what extent this issue affects transformer architectures. Addressing this gap, we provide the first in-depth survey of MWE processing with transformer models. We overall find that they capture MWE semantics inconsistently, as shown by reliance on surface patterns and memorized information. MWE meaning is also strongly localized, predominantly in early layers of the architecture. Representations benefit from specific linguistic properties, such as lower semantic idiosyncrasy and ambiguity of target expressions. Our findings overall question the ability of transformer models to robustly capture fine-grained semantics. Furthermore, we highlight the need for more directly comparable evaluation setups.</abstract>
       <pages>593–612</pages>
@@ -471,7 +471,7 @@
     <paper id="36">
       <title>Computational Complexity of Natural Morphology Revisited</title>
       <author><first>Hajime</first><last>Senuma</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <doi>10.1162/tacl_a_00665</doi>
       <abstract>This paper revisits a classical, yet fundamental, discussion of theoretical computational linguistics: the computational complexity of natural languages. Past studies have revealed that syntax, as observed in Swiss-German, is not weakly context-free. Concerning morphology, Culy (1985) employed a construction in Bambara to show that morphology is not weakly context-free; however, Manaster-Ramer (1988) pointed out that the Bambara case can be problematic because the wordhood of the construction is reliant on special tonal behaviors, and it is ambiguous whether the behaviors belong to the morphological domain. This raises doubts about whether the case can be considered a genuine morphological phenomenon. In this paper, we argue that Classical Ainu, a language we examine, also defies weak context-freeness at the morphological level. The construction we introduce is unambiguously morphological because this language’s valency-sensitive structure and valency-changing operations, such as noun incorporation, preclude its grammatical interpretation as syntactic.</abstract>
       <pages>649–663</pages>
@@ -637,7 +637,7 @@
       <author><first>Jessy</first><last>Lin</last></author>
       <author><first>Nicholas</first><last>Tomlin</last></author>
       <author><first>Jacob</first><last>Andreas</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <doi>10.1162/tacl_a_00679</doi>
       <abstract>We describe a class of tasks called decision-oriented dialogues, in which AI assistants such as large language models (LMs) must collaborate with one or more humans via natural language to help them make complex decisions. We formalize three domains in which users face everyday decisions: (1) choosing an assignment of reviewers to conference papers, (2) planning a multi-step itinerary in a city, and (3) negotiating travel plans for a group of friends. In each of these settings, AI assistants and users have disparate abilities that they must combine to arrive at the best decision: Assistants can access and process large amounts of information, while users have preferences and constraints external to the system. For each task, we build a dialogue environment where agents receive a reward based on the quality of the final decision they reach. We evaluate LMs in self-play and in collaboration with humans and find that they fall short compared to human assistants, achieving much lower rewards despite engaging in longer dialogues. We highlight a number of challenges models face in decision-oriented dialogues, ranging from goal-directed behavior to reasoning and optimization, and release our environments as a testbed for future work.</abstract>
       <pages>892–911</pages>
@@ -689,7 +689,7 @@
       <author><first>Miryam de</first><last>Lhoneux</last></author>
       <author><first>Daniel</first><last>Hershcovich</last></author>
       <author><first>Michel</first><last>DeGraff</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <author><first>Johannes</first><last>Bjerva</last></author>
       <doi>10.1162/tacl_a_00682</doi>
       <abstract>Creoles represent an under-explored and marginalized group of languages, with few available resources for NLP research. While the genealogical ties between Creoles and a number of highly resourced languages imply a significant potential for transfer learning, this potential is hampered due to this lack of annotated data. In this work we present CreoleVal, a collection of benchmark datasets spanning 8 different NLP tasks, covering up to 28 Creole languages; it is an aggregate of novel development datasets for reading comprehension relation classification, and machine translation for Creoles, in addition to a practical gateway to a handful of preexisting benchmarks. For each benchmark, we conduct baseline experiments in a zero-shot setting in order to further ascertain the capabilities and limitations of transfer learning for Creoles. Ultimately, we see CreoleVal as an opportunity to empower research on Creoles in NLP and computational linguistics, and in general, a step towards more equitable language technology around the globe.</abstract>
@@ -702,9 +702,9 @@
       <author><first>Nuno M.</first><last>Guerreiro</last></author>
       <author><first>Ricardo</first><last>Rei</last></author>
       <author><first>Daan van</first><last>Stigt</last></author>
-      <author><first>Luisa</first><last>Coheur</last></author>
+      <author id="luisa-coheur"><first>Luisa</first><last>Coheur</last></author>
       <author><first>Pierre</first><last>Colombo</last></author>
-      <author><first>André F. T.</first><last>Martins</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
       <doi>10.1162/tacl_a_00683</doi>
       <abstract>Widely used learned metrics for machine translation evaluation, such as Comet and Bleurt, estimate the quality of a translation hypothesis by providing a single sentence-level score. As such, they offer little insight into translation errors (e.g., what are the errors and what is their severity). On the other hand, generative large language models (LLMs) are amplifying the adoption of more granular strategies to evaluation, attempting to detail and categorize translation errors. In this work, we introduce xcomet, an open-source learned metric designed to bridge the gap between these approaches. xcomet integrates both sentence-level evaluation and error span detection capabilities, exhibiting state-of-the-art performance across all types of evaluation (sentence-level, system-level, and error span detection). Moreover, it does so while highlighting and categorizing error spans, thus enriching the quality assessment. We also provide a robustness analysis with stress tests, and show that xcomet is largely capable of identifying localized critical errors and hallucinations.</abstract>
       <pages>979–995</pages>
@@ -717,7 +717,7 @@
       <author><first>Qiongkai</first><last>Xu</last></author>
       <author><first>Jun</first><last>Wang</last></author>
       <author><first>Benjamin I. P.</first><last>Rubinstein</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <doi>10.1162/tacl_a_00684</doi>
       <abstract>Modern NLP models are often trained on public datasets drawn from diverse sources, rendering them vulnerable to data poisoning attacks. These attacks can manipulate the model’s behavior in ways engineered by the attacker. One such tactic involves the implantation of backdoors, achieved by poisoning specific training instances with a textual trigger and a target class label. Several strategies have been proposed to mitigate the risks associated with backdoor attacks by identifying and removing suspected poisoned examples. However, we observe that these strategies fail to offer effective protection against several advanced backdoor attacks. To remedy this deficiency, we propose a novel defensive mechanism that first exploits training dynamics to identify poisoned samples with high precision, followed by a label propagation step to improve recall and thus remove the majority of poisoned instances. Compared with recent advanced defense methods, our method considerably reduces the success rates of several backdoor attacks while maintaining high classification accuracy on clean test sets.</abstract>
       <pages>996–1010</pages>
@@ -755,8 +755,8 @@
       <title>Do Multi-Document Summarization Models Synthesize?</title>
       <author><first>Jay</first><last>DeYoung</last></author>
       <author><first>Stephanie C.</first><last>Martinez</last></author>
-      <author><first>Iain J.</first><last>Marshall</last></author>
-      <author><first>Byron C.</first><last>Wallace</last></author>
+      <author id="iain-marshall"><first>Iain J.</first><last>Marshall</last></author>
+      <author id="byron-c-wallace"><first>Byron C.</first><last>Wallace</last></author>
       <doi>10.1162/tacl_a_00687</doi>
       <abstract>Multi-document summarization entails producing concise synopses of collections of inputs. For some applications, the synopsis should accurately synthesize inputs with respect to a key aspect, e.g., a synopsis of film reviews written about a particular movie should reflect the average critic consensus. As a more consequential example, narrative summaries that accompany biomedical systematic reviews of clinical trial results should accurately summarize the potentially conflicting results from individual trials. In this paper we ask: To what extent do modern multi-document summarization models implicitly perform this sort of synthesis? We run experiments over opinion and evidence synthesis datasets using a suite of summarization models, from fine-tuned transformers to GPT-4. We find that existing models partially perform synthesis, but imperfectly: Even the best performing models are over-sensitive to changes in input ordering and under-sensitive to changes in input compositions (e.g., ratio of positive to negative reviews). We propose a simple, general, effective method for improving model synthesis capabilities by generating an explicitly diverse set of candidate outputs, and then selecting from these the string best aligned with the expected aggregate measure for the inputs, or abstaining when the model produces no good candidate.</abstract>
       <pages>1043–1062</pages>
@@ -791,7 +791,7 @@
       <author><first>Jorge</first><last>Iranzo-Sánchez</last></author>
       <author><first>Adrià</first><last>Giménez</last></author>
       <author><first>Jorge</first><last>Civera</last></author>
-      <author><first>Alfons</first><last>Juan</last></author>
+      <author id="alfons-juan"><first>Alfons</first><last>Juan</last></author>
       <doi>10.1162/tacl_a_00691</doi>
       <abstract>Streaming Machine Translation (MT) is the task of translating an unbounded input text stream in real-time. The traditional cascade approach, which combines an Automatic Speech Recognition (ASR) and an MT system, relies on an intermediate segmentation step which splits the transcription stream into sentence-like units. However, the incorporation of a hard segmentation constrains the MT system and is a source of errors. This paper proposes a Segmentation-Free framework that enables the model to translate an unsegmented source stream by delaying the segmentation decision until after the translation has been generated. Extensive experiments show how the proposed Segmentation-Free framework has better quality-latency trade-off than competing approaches that use an independent segmentation model.1</abstract>
       <pages>1104–1121</pages>
@@ -801,7 +801,7 @@
     <paper id="62">
       <title>Do Language Models Enjoy Their Own Stories? Prompting Large Language Models for Automatic Story Evaluation</title>
       <author><first>Cyril</first><last>Chhun</last></author>
-      <author><first>Fabian M.</first><last>Suchanek</last></author>
+      <author id="fabian-suchanek"><first>Fabian M.</first><last>Suchanek</last></author>
       <author><first>Chloé</first><last>Clavel</last></author>
       <doi>10.1162/tacl_a_00689</doi>
       <abstract>Storytelling is an integral part of human experience and plays a crucial role in social interactions. Thus, Automatic Story Evaluation (ASE) and Generation (ASG) could benefit society in multiple ways, but they are challenging tasks which require high-level human abilities such as creativity, reasoning, and deep understanding. Meanwhile, Large Language Models (LLMs) now achieve state-of-the-art performance on many NLP tasks. In this paper, we study whether LLMs can be used as substitutes for human annotators for ASE. We perform an extensive analysis of the correlations between LLM ratings, other automatic measures, and human annotations, and we explore the influence of prompting on the results and the explainability of LLM behaviour. Most notably, we find that LLMs outperform current automatic measures for system-level evaluation but still struggle at providing satisfactory explanations for their answers.</abstract>
@@ -875,7 +875,7 @@
       <author><first>Jiaang</first><last>Li</last></author>
       <author><first>Yova</first><last>Kementchedjhieva</last></author>
       <author><first>Constanza</first><last>Fierro</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <doi>10.1162/tacl_a_00698</doi>
       <abstract>Large-scale pretrained language models (LMs) are said to “lack the ability to connect utterances to the world” (Bender and Koller, 2020), because they do not have “mental models of the world” (Mitchell and Krakauer, 2023). If so, one would expect LM representations to be unrelated to representations induced by vision models. We present an empirical evaluation across four families of LMs (BERT, GPT-2, OPT, and LLaMA-2) and three vision model architectures (ResNet, SegFormer, and MAE). Our experiments show that LMs partially converge towards representations isomorphic to those of vision models, subject to dispersion, polysemy, and frequency. This has important implications for both multi-modal processing and the LM understanding debate (Mitchell and Krakauer, 2023).1</abstract>
       <pages>1232–1249</pages>
@@ -888,7 +888,7 @@
       <author><first>Amin</first><last>Farajian</last></author>
       <author><first>Patrick</first><last>Fernandes</last></author>
       <author><first>Ricardo</first><last>Rei</last></author>
-      <author><first>André F. T.</first><last>Martins</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
       <doi>10.1162/tacl_a_00700</doi>
       <abstract>Despite the recent success of automatic metrics for assessing translation quality, their application in evaluating the quality of machine-translated chats has been limited. Unlike more structured texts like news, chat conversations are often unstructured, short, and heavily reliant on contextual information. This poses questions about the reliability of existing sentence-level metrics in this domain as well as the role of context in assessing the translation quality. Motivated by this, we conduct a meta-evaluation of existing automatic metrics, primarily designed for structured domains such as news, to assess the quality of machine-translated chats. We find that reference-free metrics lag behind reference-based ones, especially when evaluating translation quality in out-of-English settings. We then investigate how incorporating conversational contextual information in these metrics for sentence-level evaluation affects their performance. Our findings show that augmenting neural learned metrics with contextual information helps improve correlation with human judgments in the reference-free scenario and when evaluating translations in out-of-English settings. Finally, we propose a new evaluation metric, Context-MQM, that utilizes bilingual context with a large language model (LLM) and further validate that adding context helps even for LLM-based evaluation metrics.</abstract>
       <pages>1250–1267</pages>
@@ -912,7 +912,7 @@
       <author><first>Melanie</first><last>Subbiah</last></author>
       <author><first>Sean</first><last>Zhang</last></author>
       <author><first>Lydia B.</first><last>Chilton</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <doi>10.1162/tacl_a_00702</doi>
       <abstract>We evaluate recent Large Language Models (LLMs) on the challenging task of summarizing short stories, which can be lengthy, and include nuanced subtext or scrambled timelines. Importantly, we work directly with authors to ensure that the stories have not been shared online (and therefore are unseen by the models), and to obtain informed evaluations of summary quality using judgments from the authors themselves. Through quantitative and qualitative analysis grounded in narrative theory, we compare GPT-4, Claude-2.1, and LLama-2-70B. We find that all three models make faithfulness mistakes in over 50% of summaries and struggle with specificity and interpretation of difficult subtext. We additionally demonstrate that LLM ratings and other automatic metrics for summary quality do not correlate well with the quality ratings from the writers.</abstract>
       <pages>1290–1310</pages>
@@ -932,9 +932,9 @@
       <author><first>Ye</first><last>Liu</last></author>
       <author><first>Semih</first><last>Yavuz</last></author>
       <author><first>Caiming</first><last>Xiong</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <author><first>Yingbo</first><last>Zhou</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <author><first>Arman</first><last>Cohan</last></author>
       <author><first>Arman</first><last>Cohan</last></author>
       <doi>10.1162/tacl_a_00705</doi>
@@ -982,7 +982,7 @@
       <author><first>Neele</first><last>Falk</last></author>
       <author><first>Ana</first><last>Barić</last></author>
       <author><first>Dmitry</first><last>Nikolaev</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <doi>10.1162/tacl_a_00710</doi>
       <abstract>Due to the widespread use of large language models (LLMs), we need to understand whether they embed a specific “worldview” and what these views reflect. Recent studies report that, prompted with political questionnaires, LLMs show left-liberal leanings (Feng et al., 2023; Motoki et al., 2024). However, it is as yet unclear whether these leanings are reliable (robust to prompt variations) and whether the leaning is consistent across policies and political leaning. We propose a series of tests which assess the reliability and consistency of LLMs’ stances on political statements based on a dataset of voting-advice questionnaires collected from seven EU countries and annotated for policy issues. We study LLMs ranging in size from 7B to 70B parameters and find that their reliability increases with parameter count. Larger models show overall stronger alignment with left-leaning parties but differ among policy programs: They show a (left-wing) positive stance towards environment protection, social welfare state, and liberal society but also (right-wing) law and order, with no consistent preferences in the areas of foreign policy and migration.</abstract>
       <pages>1378–1400</pages>
@@ -1031,7 +1031,7 @@
     <paper id="80">
       <title>Conformalizing Machine Translation Evaluation</title>
       <author><first>Chrysoula</first><last>Zerva</last></author>
-      <author><first>André F. T.</first><last>Martins</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
       <doi>10.1162/tacl_a_00711</doi>
       <abstract>Several uncertainty estimation methods have been recently proposed for machine translation evaluation. While these methods can provide a useful indication of when not to trust model predictions, we show in this paper that the majority of them tend to underestimate model uncertainty, and as a result, they often produce misleading confidence intervals that do not cover the ground truth. We propose as an alternative the use of conformal prediction, a distribution-free method to obtain confidence intervals with a theoretically established guarantee on coverage. First, we demonstrate that split conformal prediction can “correct” the confidence intervals of previous methods to yield a desired coverage level, and we demonstrate these findings across multiple machine translation evaluation metrics and uncertainty quantification methods. Further, we highlight biases in estimated confidence intervals, reflected in imbalanced coverage for different attributes, such as the language and the quality of translations. We address this by applying conditional conformal prediction techniques to obtain calibration subsets for each data subgroup, leading to equalized coverage. Overall, we show that, provided access to a calibration set, conformal prediction can help identify the most suitable uncertainty quantification methods and adapt the predicted confidence intervals to ensure fairness with respect to different attributes.1</abstract>
       <pages>1460–1478</pages>
@@ -1043,7 +1043,7 @@
       <author><first>Zheng Wei</first><last>Lim</last></author>
       <author><first>Ekaterina</first><last>Vylomova</last></author>
       <author><first>Charles</first><last>Kemp</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <doi>10.1162/tacl_a_00714</doi>
       <abstract>Human translators linger on some words and phrases more than others, and predicting this variation is a step towards explaining the underlying cognitive processes. Using data from the CRITT Translation Process Research Database, we evaluate the extent to which surprisal and attentional features derived from a Neural Machine Translation (NMT) model account for reading and production times of human translators. We find that surprisal and attention are complementary predictors of translation difficulty, and that surprisal derived from a NMT model is the single most successful predictor of production duration. Our analyses draw on data from hundreds of translators operating across 13 language pairs, and represent the most comprehensive investigation of human translation difficulty to date.</abstract>
       <pages>1479–1496</pages>
@@ -1056,7 +1056,7 @@
       <author><first>António</first><last>Farinhas</last></author>
       <author><first>Chrysoula</first><last>Zerva</last></author>
       <author><first>Mário A. T.</first><last>Figueiredo</last></author>
-      <author><first>André F. T.</first><last>Martins</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
       <doi>10.1162/tacl_a_00715</doi>
       <abstract>The rapid proliferation of large language models and natural language processing (NLP) applications creates a crucial need for uncertainty quantification to mitigate risks such as Hallucinations and to enhance decision-making reliability in critical applications. Conformal prediction is emerging as a theoretically sound and practically useful framework, combining flexibility with strong statistical guarantees. Its model-agnostic and distribution-free nature makes it particularly promising to address the current shortcomings of NLP systems that stem from the absence of uncertainty quantification. This paper provides a comprehensive survey of conformal prediction techniques, their guarantees, and existing applications in NLP, pointing to directions for future research and open challenges.</abstract>
       <pages>1497–1516</pages>
@@ -1177,7 +1177,7 @@
       <author><first>Fajri</first><last>Koto</last></author>
       <author><first>Rahmad</first><last>Mahendra</last></author>
       <author><first>Nurul</first><last>Aisyah</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <doi>10.1162/tacl_a_00726</doi>
       <abstract>Although commonsense reasoning is greatly shaped by cultural and geographical factors, previous studies have predominantly centered on cultures grounded in the English language, potentially resulting in an Anglocentric bias. In this paper, we introduce IndoCulture, aimed at understanding the influence of geographical factors on language model reasoning ability, with a specific emphasis on the diverse cultures found within eleven Indonesian provinces. In contrast to prior work that has relied on templates (Yin et al., 2022) and online scrapping (Fung et al., 2024), we create IndoCulture by asking local people to manually develop a cultural context and plausible options, across a set of predefined topics. Evaluation of 27 language models reveals several insights: (1) the open-weight Llama–3 is competitive with GPT–4, while other open-weight models struggle, with accuracies below 50%; (2) there is a general pattern of models generally performing better for some provinces, such as Bali and West Java, and less well for others; and (3) the inclusion of location context enhances performance, especially for larger models like GPT–4, emphasizing the significance of geographical context in commonsense reasoning.1</abstract>
       <pages>1703–1719</pages>
diff --git a/data/xml/2024.tdle.xml b/data/xml/2024.tdle.xml
index a371136cc3..6e7d9d2727 100644
--- a/data/xml/2024.tdle.xml
+++ b/data/xml/2024.tdle.xml
@@ -8,9 +8,9 @@
       <editor><first>Itziar</first><last>Aldabe</last></editor>
       <editor><first>Aritz</first><last>Farwell</last></editor>
       <editor><first>Begona</first><last>Altuna</last></editor>
-      <editor><first>Stelios</first><last>Piperidis</last></editor>
+      <editor id="stelios-piperidis"><first>Stelios</first><last>Piperidis</last></editor>
       <editor><first>Georg</first><last>Rehm</last></editor>
-      <editor><first>German</first><last>Rigau</last></editor>
+      <editor id="german-rigau"><first>German</first><last>Rigau</last></editor>
       <publisher>ELRA and ICCL</publisher>
       <address>Torino, Italia</address>
       <month>May</month>
@@ -48,8 +48,8 @@
     </paper>
     <paper id="3">
       <title>Fine-Tuning Open Access <fixed-case>LLM</fixed-case>s for High-Precision <fixed-case>NLU</fixed-case> in Goal-Driven Dialog Systems</title>
-      <author><first>Lluís</first><last>Padró</last></author>
-      <author><first>Roser</first><last>Saurí</last></author>
+      <author id="lluis-padro"><first>Lluís</first><last>Padró</last></author>
+      <author id="roser-sauri"><first>Roser</first><last>Saurí</last></author>
       <pages>33–42</pages>
       <abstract>This paper presents a set of experiments on fine-tuning LLMs to produce high-precision semantic representations for the NLU component of a dialog system front-end. The aim of this research is threefold: First, we want to explore the capabilities of LLMs on real, industry-based use cases that involve complex data and strict requirements on results. Since the LLM output should usable by the application back-end, the produced semantic representation must satisfy strict format and consistency requirements. Second, we want to evaluate the cost-benefit of open-source LLMs, that is, the feasibility of running this kind of models in machines affordable to small-medium enterprises (SMEs), in order to assess how far this organizations can go without depending on the large players controlling the market, and with a moderate use of computation resources. Finally, we also want to assess the language scalability of the LLMs in this kind of applications; specifically, whether a multilingual model is able to cast patterns learnt from one language to other ones –with special attention to underresourced languages–, thus reducing required training data and computation costs. This work was carried out within an R&amp;D context of assisting a real company in defining its NLU model strategy, and thus the results have a practical, industry-level focus.</abstract>
       <url hash="a77510d5">2024.tdle-1.3</url>
@@ -61,7 +61,7 @@
       <author><first>Lucky</first><last>Susanto</last></author>
       <author><first>Zilu</first><last>Tang</last></author>
       <author><first>Ayu</first><last>Purwarianti</last></author>
-      <author><first>Derry Tanti</first><last>Wijaya</last></author>
+      <author id="derry-tanti-wijaya"><first>Derry Tanti</first><last>Wijaya</last></author>
       <pages>43–52</pages>
       <abstract>Large Language Models (LLMs) demonstrate strong machine translation capabilities on languages they are trained on. However, the impact of factors beyond training data size on translation performance remains a topic of debate, especially concerning languages not directly encountered during training. Our study delves into Llama2’s translation capabilities. By modeling a linear relationship between linguistic feature distances and machine translation scores, we ask ourselves if there are potentially better central languages for LLMs other than English. Our experiments show that the 7B Llama2 model yields above 10 BLEU when translating into all languages it has seen, which rarely happens for languages it has not seen. Most translation improvements into unseen languages come from scaling up the model size rather than instruction tuning or increasing shot count. Furthermore, our correlation analysis reveals that syntactic similarity is not the only linguistic factor that strongly correlates with machine translation scores. Interestingly, we discovered that under specific circumstances, some languages (e.g. Swedish, Catalan), despite having significantly less training data, exhibit comparable correlation levels to English. These insights challenge the prevailing landscape of LLMs, suggesting that models centered around languages other than English could provide a more efficient foundation for multilingual applications.</abstract>
       <url hash="ea226ddb">2024.tdle-1.4</url>
diff --git a/data/xml/2024.teachingnlp.xml b/data/xml/2024.teachingnlp.xml
index ab44c9c988..802bc48131 100644
--- a/data/xml/2024.teachingnlp.xml
+++ b/data/xml/2024.teachingnlp.xml
@@ -55,7 +55,7 @@
       <title>Striking a Balance between Classical and Deep Learning Approaches in Natural Language Processing Pedagogy</title>
       <author><first>Aditya</first><last>Joshi</last><affiliation>UNSW</affiliation></author>
       <author><first>Jake</first><last>Renzella</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
       <author><first>Saurav</first><last>Jha</last></author>
       <author><first>Xiangyu</first><last>Zhang</last></author>
       <pages>23-32</pages>
@@ -65,7 +65,7 @@
     </paper>
     <paper id="5">
       <title>Co-Creational Teaching of Natural Language Processing</title>
-      <author><first>John</first><last>McCrae</last><affiliation>National University of Ireland Galway</affiliation></author>
+      <author id="john-philip-mccrae"><first>John</first><last>McCrae</last><affiliation>National University of Ireland Galway</affiliation></author>
       <pages>33-42</pages>
       <abstract>Traditional lectures have poorer outcomes compared to active learning methodologies, yet many natural language processing classes in higher education still follow this outdated methodology. In this paper, we present, co-creational teaching, a methodology that encourages partnership between staff and lecturers and show how this can be applied to teach natural language processing. As a fast-moving and dynamic area of study with high interest from students, natural language processing is an ideal subject for innovative teaching methodologies to improve student outcomes. We detail our experience with teaching natural language processing through partnership with students and provide detailed descriptions of methodologies that can be used by others in their teaching, including considerations of diverse student populations.</abstract>
       <url hash="cbff15ea">2024.teachingnlp-1.5</url>
@@ -82,7 +82,7 @@
       <author><first>Bernd</first><last>Bischl</last><affiliation>LMU</affiliation></author>
       <author><first>Benjamin</first><last>Roth</last><affiliation>Universität Vienna</affiliation></author>
       <author><first>Christian</first><last>Heumann</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>43-53</pages>
       <abstract>In this work, we present a collaboratively and continuously developed open-source educational resource (OSER) for teaching natural language processing at two different universities. We shed light on the principles we followed for the initial design of the course and the rationale for ongoing developments, followed by a reflection on the inter-university collaboration for designing and maintaining teaching material. When reflecting on the latter, we explicitly emphasize the considerations that need to be made when facing heterogeneous groups and when having to accommodate multiple examination regulations within one single course framework. Relying on the fundamental principles of OSER developments as defined by Bothmann et al. (2023) proved to be an important guideline during this process. The final part pertains to open-sourcing our teaching material, coping with the increasing speed of developments in the field, and integrating the course digitally, also addressing conflicting priorities and challenges we are currently facing.</abstract>
       <url hash="cfb12c3c">2024.teachingnlp-1.6</url>
@@ -112,7 +112,7 @@
     </paper>
     <paper id="9">
       <title>Teaching <fixed-case>LLM</fixed-case>s at <fixed-case>C</fixed-case>harles <fixed-case>U</fixed-case>niversity: Assignments and Activities</title>
-      <author><first>Jindřich</first><last>Helcl</last><affiliation>Edinburgh University, University of Edinburgh</affiliation></author>
+      <author id="jindrich-helcl"><first>Jindřich</first><last>Helcl</last><affiliation>Edinburgh University, University of Edinburgh</affiliation></author>
       <author><first>Zdeněk</first><last>Kasner</last></author>
       <author><first>Ondřej</first><last>Dušek</last><affiliation>Charles University, Prague</affiliation></author>
       <author><first>Tomasz</first><last>Limisiewicz</last><affiliation>Charles University Prague</affiliation></author>
diff --git a/data/xml/2024.teicai.xml b/data/xml/2024.teicai.xml
index 8d8f965eca..3ebaa161b8 100644
--- a/data/xml/2024.teicai.xml
+++ b/data/xml/2024.teicai.xml
@@ -69,7 +69,7 @@
       <author><first>Laura</first><last>De Grazia</last></author>
       <author><first>Alex</first><last>Peiró Lilja</last></author>
       <author><first>Mireia</first><last>Farrús Cabeceran</last></author>
-      <author><first>Mariona</first><last>Taulé</last></author>
+      <author id="mariona-taule"><first>Mariona</first><last>Taulé</last></author>
       <pages>28-35</pages>
       <abstract>This paper investigates the appropriate responses that Conversational Agent systems (CAs) should employ when subjected to sexual harassment by users. Previous studies indicate that conventional CAs often respond neutrally or evade such requests. Enhancing the responsiveness of CAs to offensive speech is crucial, as users might carry over these interactions into their social interactions. To address this issue, we selected evaluators to compare a series of responses to sexual harassment from four commercial CAs (Amazon Alexa, Apple Siri, Google Home, and Microsoft Cortana) with alternative responses we realized based on insights from psychological and sociological studies. Focusing on CAs with a female voice, given their increased likelihood of encountering offensive language, we conducted two experiments involving 22 evaluators (11 females and 11 males). In the initial experiment, participants assessed the responses in a textual format, while the second experiment involved the evaluation of responses generated with a synthetic voice exhibiting three different intonations (angry, neutral, and assertive). Results from the first experiment revealed a general preference for the responses we formulated. For the most voted replies, female evaluators exhibited a tendency towards responses with an assertive intent, emphasizing the sexually harassing nature of the request. Conversely, male evaluators leaned towards a more neutral response, aligning with prior findings that highlight gender-based differences in the perception of sexual harassment. The second experiment underscored a preference for assertive responses. The study’s outcomes highlight the need to develop new, educational responses from CAs to instances of sexual harassment, aiming to discourage harmful behavior.</abstract>
       <url hash="1a8bfaef">2024.teicai-1.5</url>
diff --git a/data/xml/2024.textgraphs.xml b/data/xml/2024.textgraphs.xml
index 7b7b8f9dd9..56893deb00 100644
--- a/data/xml/2024.textgraphs.xml
+++ b/data/xml/2024.textgraphs.xml
@@ -30,7 +30,7 @@
       <author><first>Oana</first><last>Ignat</last></author>
       <author><first>Santiago</first><last>Castro</last><affiliation>University of Michigan</affiliation></author>
       <author><first>Weiji</first><last>Li</last><affiliation>Tesla</affiliation></author>
-      <author><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
       <pages>1-18</pages>
       <abstract>We address the task of human action representation and show how the approach to generating word representations based on co-occurrence can be adapted to generate human action representations by analyzing their co-occurrence in videos. To this end, we formalize the new task of human action co-occurrence identification in online videos, i.e., determine whether two human actions are likely to co-occur in the same interval of time.We create and make publicly available the Co-Act (Action Co-occurrence) dataset, consisting of a large graph of ~12k co-occurring pairs of visual actions and their corresponding video clips. We describe graph link prediction models that leverage visual and textual information to automatically infer if two actions are co-occurring.We show that graphs are particularly well suited to capture relations between human actions, and the learned graph representations are effective for our task and capture novel and relevant information across different data domains.</abstract>
       <url hash="9d7120c1">2024.textgraphs-1.1</url>
@@ -43,7 +43,7 @@
       <author><first>Suyash</first><last>Fulay</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <author><first>Hang</first><last>Jiang</last></author>
       <author><first>Brandon</first><last>Roy</last><affiliation>Massachusetts Institute of Technology and Brown University</affiliation></author>
-      <author><first>Deb</first><last>Roy</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
+      <author id="deb-roy"><first>Deb</first><last>Roy</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <author><first>Jad</first><last>Kabbara</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <pages>19-39</pages>
       <abstract>Learning on text-attributed graphs (TAGs), in which nodes are associated with one or more texts, has been the subject of much recent work. However, most approaches tend to make strong assumptions about the downstream task of interest, are reliant on hand-labeled data, or fail to equally balance the importance of both text and graph representations. In this work, we propose Contrastive Graph-Text pretraining (ConGraT), a general, self-supervised approach for jointly learning separate representations of texts and nodes in a TAG. Our method trains a language model (LM) and a graph neural network (GNN) to align their representations in a common latent space using a batch-wise contrastive learning objective inspired by CLIP. We further propose an extension to the CLIP objective that leverages graph structure to incorporate information about inter-node similarity. Extensive experiments demonstrate that ConGraT outperforms baselines on various downstream tasks, including node and text category classification, link prediction, and language modeling. Finally, we present an application of our method to community detection in social graphs, which enables finding more textually grounded communities, rather than purely graph-based ones.</abstract>
@@ -75,7 +75,7 @@
     <paper id="5">
       <title>Prompt Me One More Time: A Two-Step Knowledge Extraction Pipeline with Ontology-Based Verification</title>
       <author><first>Alla</first><last>Chepurova</last></author>
-      <author><first>Yuri</first><last>Kuratov</last><affiliation>AIRI, Artificial Intelligence Research Institute and Moscow Institute of Physics and Technology</affiliation></author>
+      <author id="yurii-kuratov"><first>Yuri</first><last>Kuratov</last><affiliation>AIRI, Artificial Intelligence Research Institute and Moscow Institute of Physics and Technology</affiliation></author>
       <author><first>Aydar</first><last>Bulatov</last><affiliation>Moscow Institute of Physics and Technology</affiliation></author>
       <author><first>Mikhail</first><last>Burtsev</last><affiliation>London Institute for Mathematical Sciences</affiliation></author>
       <pages>61-77</pages>
@@ -87,7 +87,7 @@
       <title>Towards Understanding Attention-based Reasoning through Graph Structures in Medical Codes Classification</title>
       <author><first>Noon</first><last>Goldstein</last></author>
       <author><first>Saadullah</first><last>Amin</last></author>
-      <author><first>Günter</first><last>Neumann</last><affiliation>German Research Center for AI</affiliation></author>
+      <author id="gunter-neumann"><first>Günter</first><last>Neumann</last><affiliation>German Research Center for AI</affiliation></author>
       <pages>78-92</pages>
       <abstract>A common approach to automatically assigning diagnostic and procedural clinical codes to health records is to solve the task as a multi-label classification problem. Difficulties associated with this task stem from domain knowledge requirements, long document texts, large and imbalanced label space, reflecting the breadth and dependencies between medical diagnoses and procedures. Decisions in the healthcare domain also need to demonstrate sound reasoning, both when they are correct and when they are erroneous. Existing works address some of these challenges by incorporating external knowledge, which can be encoded into a graph-structured format. Incorporating graph structures on the output label space or between the input document and output label spaces have shown promising results in medical codes classification. Limited focus has been put on utilizing graph-based representation on the input document space. To partially bridge this gap, we represent clinical texts as graph-structured data through the UMLS Metathesaurus; we explore implicit graph representation through pre-trained knowledge graph embeddings and explicit domain-knowledge guided encoding of document concepts and relational information through graph neural networks. Our findings highlight the benefits of pre-trained knowledge graph embeddings in understanding model’s attention-based reasoning. In contrast, transparent domain knowledge guidance in graph encoder approaches is overshadowed by performance loss. Our qualitative analysis identifies limitations that contribute to prediction errors.</abstract>
       <url hash="acab3c4b">2024.textgraphs-1.6</url>
@@ -180,7 +180,7 @@
       <title><fixed-case>NLP</fixed-case>eople at <fixed-case>T</fixed-case>ext<fixed-case>G</fixed-case>raphs-17 Shared Task: Chain of Thought Questioning to Elicit Decompositional Reasoning</title>
       <author><first>Movina</first><last>Moses</last></author>
       <author><first>Vishnudev</first><last>Kuruvanthodi</last><affiliation>International Business Machines</affiliation></author>
-      <author><first>Mohab</first><last>Elkaref</last><affiliation>International Business Machines</affiliation></author>
+      <author id="mohab-el-karef"><first>Mohab</first><last>Elkaref</last><affiliation>International Business Machines</affiliation></author>
       <author><first>Shinnosuke</first><last>Tanaka</last><affiliation>International Business Machines</affiliation></author>
       <author><first>James</first><last>Barry</last></author>
       <author><first>Geeth</first><last>Mel</last></author>
diff --git a/data/xml/2024.tlt.xml b/data/xml/2024.tlt.xml
index fcd275333a..eda692319c 100644
--- a/data/xml/2024.tlt.xml
+++ b/data/xml/2024.tlt.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the 22nd Workshop on Treebanks and Linguistic Theories (TLT 2024)</booktitle>
       <editor><first>Daniel</first><last>Dakota</last></editor>
       <editor><first>Sarah</first><last>Jablotschkin</last></editor>
-      <editor><first>Sandra</first><last>Kübler</last></editor>
+      <editor id="sandra-kubler"><first>Sandra</first><last>Kübler</last></editor>
       <editor><first>Heike</first><last>Zinsmeister</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Hamburg,Germany</address>
@@ -80,7 +80,7 @@
     <paper id="6">
       <title>Introducing Shallow Syntactic Information within the Graph-based Dependency Parsing</title>
       <author><first>Nikolay</first><last>Paev</last></author>
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <author><first>Petya</first><last>Osenova</last></author>
       <pages>46-54</pages>
       <abstract>The paper presents a new BERT model, fine-tuned for parsing of Bulgarian texts. This model is extended with a new neural network layer in order to incorporate shallow syntactic information during the training phase. The results show statistically significant improvement over the baseline. Thus, the addition of syntactic knowledge - even partial - makes the model better. Also, some error analysis has been conducted on the results from the parsers. Although the architecture has been designed and tested for Bulgarian, it is also scalable for other languages. This scalability was shown here with some experiments and evaluation on an English treebank with a comparable size.</abstract>
diff --git a/data/xml/2024.trac.xml b/data/xml/2024.trac.xml
index f2c8a1be86..94a8976294 100644
--- a/data/xml/2024.trac.xml
+++ b/data/xml/2024.trac.xml
@@ -4,8 +4,8 @@
     <meta>
       <booktitle>Proceedings of the Fourth Workshop on Threat, Aggression &amp; Cyberbullying @ LREC-COLING-2024</booktitle>
       <editor><first>Ritesh</first><last>Kumar</last></editor>
-      <editor><first>Atul Kr.</first><last>Ojha</last></editor>
-      <editor><first>Shervin</first><last>Malmasi</last></editor>
+      <editor id="atul-kr-ojha"><first>Atul Kr.</first><last>Ojha</last></editor>
+      <editor id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></editor>
       <editor><first>Bharathi Raja</first><last>Chakravarthi</last></editor>
       <editor><first>Bornini</first><last>Lahiri</last></editor>
       <editor><first>Siddharth</first><last>Singh</last></editor>
@@ -67,7 +67,7 @@
       <author><first>Saatvik M.</first><last>Krishna</last></author>
       <author><first>Soumya Sangam</first><last>Jha</last></author>
       <author><first>Vartika T.</first><last>Rao</last></author>
-      <author><first>Anand Kumar</first><last>M</last></author>
+      <author id="anand-kumar-m"><first>Anand Kumar</first><last>M</last></author>
       <pages>32–36</pages>
       <abstract>The objective of the shared task, Offline Harm Potential Identification (HarmPot-ID), is to build models to predict the offline harm potential of social media texts. “Harm potential” is defined as the ability of an online post or comment to incite offline physical harm such as murder, arson, riot, rape, etc. The first subtask was to predict the level of harm potential, and the second was to identify the group to which this harm was directed towards. This paper details our submissions for the shared task that includes a cascaded SVM model, an XGBoost model, and a TF-IDF weighted Word2Vec embedding-supported SVM model. Several other models that were explored have also been detailed.</abstract>
       <url hash="ae8b4fdf">2024.trac-1.5</url>
@@ -76,7 +76,7 @@
     <paper id="6">
       <title><fixed-case>LLM</fixed-case>-Based Synthetic Datasets: Applications and Limitations in Toxicity Detection</title>
       <author><first>Maximilian</first><last>Schmidhuber</last></author>
-      <author><first>Udo</first><last>Kruschwitz</last></author>
+      <author id="udo-kruschwitz"><first>Udo</first><last>Kruschwitz</last></author>
       <pages>37–51</pages>
       <abstract>Large Language Model (LLM)-based Synthetic Data is becoming an increasingly important field of research. One of its promising applications is in training classifiers to detect online toxicity, which is of increasing concern in today’s digital landscape. In this work, we assess the feasibility of generative models to create synthetic data for toxic language detection. Our experiments are conducted on six different toxicity datasets, four of whom are hateful and two are toxic in the broader sense. We then employ a classifier trained on the original data for filtering. To explore the potential of this data, we conduct experiments using combinations of original and synthetic data, synthetic oversampling of the minority class, and a comparison of original vs. synthetic-only training. Results indicate that while our generative models offer benefits in certain scenarios, the approach does not improve hateful dataset classification. However, it does boost patronizing and condescending language detection. We find that synthetic data generated by LLMs is a promising avenue of research, but further research is needed to improve the quality of the generated data and develop better filtering methods. Code is available on GitHub; the generated dataset is available on Zenodo.</abstract>
       <url hash="e546c31f">2024.trac-1.6</url>
@@ -94,7 +94,7 @@
     <paper id="8">
       <title>Analyzing Offensive Language and Hate Speech in Political Discourse: A Case Study of <fixed-case>G</fixed-case>erman Politicians</title>
       <author><first>Maximilian</first><last>Weissenbacher</last></author>
-      <author><first>Udo</first><last>Kruschwitz</last></author>
+      <author id="udo-kruschwitz"><first>Udo</first><last>Kruschwitz</last></author>
       <pages>60–72</pages>
       <abstract>Social media platforms have become key players in political discourse. Twitter (now ‘X’), for example, is used by many German politicians to communicate their views and interact with others. Due to its nature, however, social networks suffer from a number of issues such as offensive content, toxic language and hate speech. This has attracted a lot of research interest but in the context of political discourse there is a noticeable gap with no such study specifically looking at German politicians in a systematic way. We aim to help addressing this gap. We first create an annotated dataset of 1,197 Twitter posts mentioning German politicians. This is the basis to explore a number of approaches to detect hate speech and offensive language (HOF) and identify an ensemble of transformer models that achieves an F1-Macros score of 0.94. This model is then used to automatically classify two much larger, longitudinal datasets: one with 520,000 tweets posted by MPs, and the other with 2,200,000 tweets which comprise posts from the public mentioning politicians. We obtain interesting insights in regards to the distribution of hate and offensive content when looking at different independent variables.</abstract>
       <url hash="96a71387">2024.trac-1.8</url>
@@ -106,7 +106,7 @@
       <author><first>Annika</first><last>Simonsen</last></author>
       <author><first>Atli Snær</first><last>Ásmundsson</last></author>
       <author><first>Guðrún Lilja</first><last>Friðjónsdóttir</last></author>
-      <author><first>Anton Karl</first><last>Ingason</last></author>
+      <author id="anton-karl-ingason"><first>Anton Karl</first><last>Ingason</last></author>
       <author><first>Vésteinn</first><last>Snæbjarnarson</last></author>
       <author><first>Hafsteinn</first><last>Einarsson</last></author>
       <pages>73–84</pages>
@@ -119,7 +119,7 @@
       <author><first>Melese Ayichlie</first><last>Jigar</last></author>
       <author><first>Abinew Ali</first><last>Ayele</last></author>
       <author><first>Seid Muhie</first><last>Yimam</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>85–95</pages>
       <abstract>In contemporary society, the proliferation of hate speech is increasingly prevalent across various social media platforms, with a notable trend of incorporating memes to amplify its visual impact and reach. The conventional text-based detection approaches frequently fail to address the complexities introduced by memes, thereby aggravating the challenges, particularly in low-resource languages such as Amharic. We develop Amharic meme hate speech detection models using 2,000 memes collected from Facebook, Twitter, and Telegram over four months. We employ native Amharic speakers to annotate each meme using a web-based tool, yielding a Fleiss’ kappa score of 0.50. We utilize different feature extraction techniques, namely VGG16 for images and word2Vec for textual content, and build unimodal and multimodal models such as LSTM, BiLSTM, and CNN. The BiLSTM model shows the best performance, achieving 63% accuracy for text and 75% for multimodal features. In image-only experiments, the CNN model achieves 69% in accuracy. Multimodal models demonstrate superior performance in detecting Amharic hate speech in memes, showcasing their potential to address the unique challenges posed by meme-based hate speech on social media.</abstract>
       <url hash="8fe87b8c">2024.trac-1.10</url>
@@ -176,7 +176,7 @@
       <author><first>Pica</first><last>Johansson</last></author>
       <author><first>Francesca</first><last>Stevens</last></author>
       <author><first>Jonathan</first><last>Bright</last></author>
-      <author><first>Scott A.</first><last>Hale</last></author>
+      <author id="scott-a-hale"><first>Scott A.</first><last>Hale</last></author>
       <pages>134–154</pages>
       <abstract>Public figures receive disproportionate levels of abuse on social media, impacting their active participation in public life. Automated systems can identify abuse at scale but labelling training data is expensive and potentially harmful. So, it is desirable that systems are efficient and generalisable, handling shared and specific aspects of abuse. We explore the dynamics of cross-group text classification in order to understand how well models trained on one domain or demographic can transfer to others, with a view to building more generalisable abuse classifiers. We fine-tune language models to classify tweets targeted at public figures using our novel DoDo dataset, containing 28,000 entries with fine-grained labels, split equally across four Domain-Demographic pairs (male and female footballers and politicians). We find that (i) small amounts of diverse data are hugely beneficial to generalisation and adaptation; (ii) models transfer more easily across demographics but cross-domain models are more generalisable; (iii) some groups contribute more to generalisability than others; and (iv) dataset similarity is a signal of transferability.</abstract>
       <url hash="e5d93650">2024.trac-1.15</url>
@@ -191,7 +191,7 @@
       <author><first>Sathya</first><last>Bursic</last></author>
       <author><first>Davide</first><last>Taibi</last></author>
       <author><first>Davinia</first><last>Hernández-Leo</last></author>
-      <author><first>Udo</first><last>Kruschwitz</last></author>
+      <author id="udo-kruschwitz"><first>Udo</first><last>Kruschwitz</last></author>
       <author><first>Dimitri</first><last>Ognibene</last></author>
       <pages>155–166</pages>
       <abstract>Social media have become an integral part of our daily lives, yet they have also resulted in various negative effects on users, ranging from offensive or hateful content to the spread of misinformation. In recent years, numerous automated approaches have been proposed to identify and combat such harmful content. However, it is crucial to recognize the human aspect of users who engage with this content in designing efforts to mitigate these threats. We propose to incorporate principles of behavioral science, specifically the concept of nudging into social media platforms. Our approach involves augmenting social media feeds with informative diagrams, which provide insights into the content that users are presented. The goal of our work is to empower social media users to make well-informed decisions for themselves and for others within these platforms. Nudges serve as a means to gently draw users’ attention to content in an unintrusive manner, a crucial consideration in the context of social media. To evaluate the effectiveness of our approach, we conducted a user study involving 120 Italian-speaking participants who interacted with a social media interface augmented with these nudging diagrams. Participants who had used the augmented interface were able to outperform those using the plain interface in a successive harmful content detection test where nudging diagrams were not visible anymore. Our findings demonstrate that our approach significantly improves users’ awareness of potentially harmful content with effects lasting beyond the duration of the interaction. In this work, we provide a comprehensive overview of our experimental materials and setup, present our findings, and refer to the limitations identified during our study.</abstract>
@@ -204,7 +204,7 @@
       <author><first>Esubalew Alemneh</first><last>Jalew</last></author>
       <author><first>Adem Chanie</first><last>Ali</last></author>
       <author><first>Seid Muhie</first><last>Yimam</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>167–178</pages>
       <abstract>The prevalence of digital media and evolving sociopolitical dynamics have significantly amplified the dissemination of hateful content. Existing studies mainly focus on classifying texts into binary categories, often overlooking the continuous spectrum of offensiveness and hatefulness inherent in the text. In this research, we present an extensive benchmark dataset for Amharic, comprising 8,258 tweets annotated for three distinct tasks: category classification, identification of hate targets, and rating offensiveness and hatefulness intensities. Our study highlights that a considerable majority of tweets belong to the less offensive and less hate intensity levels, underscoring the need for early interventions by stakeholders. The prevalence of ethnic and political hatred targets, with significant overlaps in our dataset, emphasizes the complex relationships within Ethiopia’s sociopolitical landscape. We build classification and regression models and investigate the efficacy of models in handling these tasks. Our results reveal that hate and offensive speech can not be addressed by a simplistic binary classification, instead manifesting as variables across a continuous range of values. The afro-XLMR-large model exhibits the best performances achieving F1-scores of 75.30%, 70.59%, and 29.42% for the category, target, and regression tasks, respectively. The 80.22% correlation coefficient of the Afro-XLMR-large model indicates strong alignments.</abstract>
       <url hash="266a970d">2024.trac-1.17</url>
diff --git a/data/xml/2024.trustnlp.xml b/data/xml/2024.trustnlp.xml
index 441c12defb..e7b526ff04 100644
--- a/data/xml/2024.trustnlp.xml
+++ b/data/xml/2024.trustnlp.xml
@@ -84,7 +84,7 @@
       <title>Towards Healthy <fixed-case>AI</fixed-case>: Large Language Models Need Therapists Too</title>
       <author><first>Baihan</first><last>Lin</last><affiliation>Columbia University</affiliation></author>
       <author><first>Djallel</first><last>Bouneffouf</last><affiliation>IBM</affiliation></author>
-      <author><first>Guillermo</first><last>Cecchi</last><affiliation>IBM Research</affiliation></author>
+      <author id="guillermo-a-cecchi"><first>Guillermo</first><last>Cecchi</last><affiliation>IBM Research</affiliation></author>
       <author><first>Kush</first><last>Varshney</last><affiliation>IBM Research</affiliation></author>
       <pages>61-70</pages>
       <abstract>Recent advances in large language models (LLMs) have led to the development of powerful chatbots capable of engaging in fluent human-like conversations. However, these chatbots may be harmful, exhibiting manipulation, gaslighting, narcissism, and other toxicity. To work toward safer and more well-adjusted models, we propose a framework that uses psychotherapy to identify and mitigate harmful chatbot behaviors. The framework involves four different artificial intelligence (AI) agents: the Chatbot whose behavior is to be adjusted, a User, a Therapist, and a Critic that can be paired with reinforcement learning-based LLM tuning. We illustrate the framework with a working example of a social conversation involving four instances of ChatGPT, showing that the framework may mitigate the toxicity in conversations between LLM-driven chatbots and people. Although there are still several challenges and directions to be addressed in the future, the proposed framework is a promising approach to improving the alignment between LLMs and human values.</abstract>
@@ -110,7 +110,7 @@
       <author><first>Ahmed</first><last>Zahran</last><affiliation>Agolo</affiliation></author>
       <author><first>Abanoub</first><last>Amin</last><affiliation>Agolo</affiliation></author>
       <author><first>Amr</first><last>Abdelaal</last><affiliation>Agolo</affiliation></author>
-      <author><first>Mohamed</first><last>Altantawy</last><affiliation>Agolo</affiliation></author>
+      <author id="mohamed-altantawy"><first>Mohamed</first><last>Altantawy</last><affiliation>Agolo</affiliation></author>
       <pages>79-84</pages>
       <abstract>This paper proposes a novel black-box approach for fact-level hallucination detection and classification by transforming the problem into a knowledge graph alignment task. This approach allows us to classify detected hallucinations as either intrinsic or extrinsic. The paper starts by discussing the field of hallucination detection and introducing several approaches to related work. Then, we introduce the proposed FactAlign approach for hallucination detection and discuss how we can use it to classify hallucinations as either intrinsic or extrinsic. Experiments are carried out to evaluate the proposed method against state-of-the-art methods on the hallucination detection task using the WikiBio GPT-3 hallucination dataset, and on the hallucination type classification task using the XSum hallucination annotations dataset. The experimental results show that our method achieves a 0.889 F1 score for the hallucination detection and 0.825 F1 for the hallucination type classification, without any further training, fine-tuning, or producing multiple samples of the LLM response.</abstract>
       <url hash="7b3974cd">2024.trustnlp-1.8</url>
@@ -184,7 +184,7 @@
       <title>Tweak to Trust: Assessing the Reliability of Summarization Metrics in Contact Centers via Perturbed Summaries</title>
       <author><first>Kevin</first><last>Patel</last><affiliation>Observe.AI</affiliation></author>
       <author><first>Suraj</first><last>Agrawal</last><affiliation>Observe.AI</affiliation></author>
-      <author><first>Ayush</first><last>Kumar</last><affiliation>Observe.AI</affiliation></author>
+      <author id="ayush-kumar"><first>Ayush</first><last>Kumar</last><affiliation>Observe.AI</affiliation></author>
       <pages>172-186</pages>
       <abstract>In the dynamic realm of call center communications, the potential of abstractive summarization to transform information condensation is evident. However, evaluating the performance of abstractive summarization systems within contact center domain poses a significant challenge. Traditional evaluation metrics prove inadequate in capturing the multifaceted nature of call center conversations, characterized by diverse topics, emotional nuances, and dynamic contexts. This paper uses domain-specific perturbed summaries to scrutinize the robustness of summarization metrics in the call center domain. Through extensive experiments on call center data, we illustrate how perturbed summaries uncover limitations in existing metrics. We additionally utilize perturbation as data augmentation strategy to train domain-specific metrics. Our findings underscore the potential of perturbed summaries to complement current evaluation techniques, advancing reliable and adaptable summarization solutions in the call center domain.</abstract>
       <url hash="9430c8a1">2024.trustnlp-1.14</url>
@@ -195,7 +195,7 @@
       <title>Flatness-Aware Gradient Descent for Safe Conversational <fixed-case>AI</fixed-case></title>
       <author><first>Leila</first><last>Khalatbari</last><affiliation>School of Electrical and Computer Engineering, Hong Kong University of Science and Technology</affiliation></author>
       <author><first>Saeid</first><last>Hosseini</last><affiliation>Sohar University</affiliation></author>
-      <author><first>Hossein</first><last>Sameti</last><affiliation>Sharif University of Technology</affiliation></author>
+      <author id="hossein-sameti"><first>Hossein</first><last>Sameti</last><affiliation>Sharif University of Technology</affiliation></author>
       <author><first>Pascale</first><last>Fung</last><affiliation>Hong Kong University of Science and Technology</affiliation></author>
       <pages>187-195</pages>
       <abstract>As generative dialog models become ubiquitous in real-world applications, it is paramount to ensure a harmless generation. There are two major challenges when enforcing safety to open-domain chatbots. Firstly, it is impractical to provide training data reflecting the desired response to all emerging forms of toxicity (generalisation challenge). Secondly, implementing safety features may compromise the quality of the conversation (trade-off challenge). To tackle the challenges, this paper introduces a regularized fine-tuning approach called FlatGD. By employing a safety-tailored loss, we translate better optimization to more safety. To ensure better optimization, FlatGD penalizes sharp trajectories of loss curve, encouraging flatness of the converged local minima. Experimental results on datasets of “BAD” and “prosocial dialog” demonstrate that our model outperforms the current baselines in reducing toxicity while preserving the conversation quality. Moreover, compared to other baselines, FlatGD can better generalize to unseen toxic data.</abstract>
diff --git a/data/xml/2024.tsar.xml b/data/xml/2024.tsar.xml
index 2baee68258..f94fd91541 100644
--- a/data/xml/2024.tsar.xml
+++ b/data/xml/2024.tsar.xml
@@ -8,7 +8,7 @@
       <editor><first>Fernando</first><last>Alva-Manchego</last><affiliation>Cardiff University, UK</affiliation></editor>
       <editor><first>Marcos</first><last>Zampieri</last><affiliation>George Mason University, USA</affiliation></editor>
       <editor><first>Kai</first><last>North</last><affiliation>Cambium Assessment, USA</affiliation></editor>
-      <editor><first>Sanja</first><last>Štajner</last><affiliation>Karlsruhe, Germany</affiliation></editor>
+      <editor id="sanja-stajner"><first>Sanja</first><last>Štajner</last><affiliation>Karlsruhe, Germany</affiliation></editor>
       <editor><first>Regina</first><last>Stodden</last><affiliation>Heinrich Heine University Dusseldorf, Germany</affiliation></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Miami, Florida, USA</address>
@@ -52,7 +52,7 @@
       <author><first>Shanyue</first><last>Guo</last><affiliation>The Hong Kong Polytechnic University</affiliation></author>
       <author><first>Tak-Sum</first><last>Wong</last><affiliation>Department of Chinese and Bilingual Studies</affiliation></author>
       <author><first>Emmanuele</first><last>Chersoni</last><affiliation>Hong Kong Polytechnic University</affiliation></author>
-      <author><first>John</first><last>Lee</last><affiliation>City University of Hong Kong</affiliation></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last><affiliation>City University of Hong Kong</affiliation></author>
       <author><first>Chu-Ren</first><last>Huang</last><affiliation>The Hong Kong Polytechnic Universiy</affiliation></author>
       <pages>20-26</pages>
       <abstract>The prediction of lexical complexity in context is assuming an increasing relevance in Natural Language Processing research, since identifying complex words is often the first step of text simplification pipelines. To the best of our knowledge, though, datasets annotated with complex words are available only for English and for a limited number of Western languages.In our paper, we introduce CompLex-ZH, a dataset including words annotated with complexity scores in sentential contexts for Chinese. Our data include sentences in Mandarin and Cantonese, which were selected from a variety of sources and textual genres. We provide a first evaluation with baselines combining hand-crafted and language models-based features.</abstract>
diff --git a/data/xml/2024.umrpw.xml b/data/xml/2024.umrpw.xml
index 5410be67fa..7ae673b9c9 100644
--- a/data/xml/2024.umrpw.xml
+++ b/data/xml/2024.umrpw.xml
@@ -25,7 +25,7 @@
       <author><first>Reza</first><last>Takhshid</last></author>
       <author><first>Tara</first><last>Azin</last></author>
       <author><first>Razieh</first><last>Shojaei</last></author>
-      <author><first>Mohammad</first><last>Bahrani</last></author>
+      <author id="mohammad-bahrani"><first>Mohammad</first><last>Bahrani</last></author>
       <pages>8–15</pages>
       <abstract>This paper introduces the Persian Abstract Meaning Representation (AMR) guidelines, a detailed guide for annotating Persian sentences with AMR, focusing on the necessary adaptations to fit Persian’s unique syntactic structures. We discuss the development process of a Persian AMR gold standard dataset consisting of 1562 sentences created following the guidelines. By examining the language specifications and nuances that distinguish AMR annotations of a low-resource language like Persian, we shed light on the challenges and limitations of developing a universal meaning representation framework. The guidelines and the dataset introduced in this study highlight such challenges, aiming to advance the field.</abstract>
       <url hash="2deb22ae">2024.umrpw-1.2</url>
diff --git a/data/xml/2024.uncertainlp.xml b/data/xml/2024.uncertainlp.xml
index 1af5a58461..f1bd1c39f2 100644
--- a/data/xml/2024.uncertainlp.xml
+++ b/data/xml/2024.uncertainlp.xml
@@ -6,12 +6,12 @@
       <editor><first>Raúl</first><last>Vázquez</last></editor>
       <editor><first>Hande</first><last>Celikkanat</last></editor>
       <editor><first>Dennis</first><last>Ulmer</last></editor>
-      <editor><first>Jörg</first><last>Tiedemann</last></editor>
+      <editor id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></editor>
       <editor><first>Swabha</first><last>Swayamdipta</last></editor>
       <editor><first>Wilker</first><last>Aziz</last></editor>
-      <editor><first>Barbara</first><last>Plank</last></editor>
+      <editor id="barbara-plank"><first>Barbara</first><last>Plank</last></editor>
       <editor><first>Joris</first><last>Baan</last></editor>
-      <editor><first>Marie-Catherine</first><last>de Marneffe</last></editor>
+      <editor id="marie-catherine-de-marneffe"><first>Marie-Catherine</first><last>de Marneffe</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>St Julians, Malta</address>
       <month>March</month>
@@ -32,7 +32,7 @@
       <author><first>Arka</first><last>Pal</last></author>
       <author><first>Samuel</first><last>Dooley</last><affiliation>Department of Computer Science, University of Maryland, College Park and Abacus.AI</affiliation></author>
       <author><first>Micah</first><last>Goldblum</last><affiliation>New York University</affiliation></author>
-      <author><first>Andrew</first><last>Wilson</last><affiliation>Cornell University and New York University</affiliation></author>
+      <author id="andrew-wilson"><first>Andrew</first><last>Wilson</last><affiliation>Cornell University and New York University</affiliation></author>
       <pages>1-14</pages>
       <abstract>Large language models are increasingly deployed for high-stakes decision making, for example in financial and medical applications. In such applications, it is imperative that we be able to estimate our confidence in the answers output by a language model in order to assess risks. Although we can easily compute the probability assigned by a language model to the sequence of tokens that make up an answer, we cannot easily compute the probability of the answer itself, which could be phrased in numerous ways.While other works have engineered ways of assigning such probabilities to LLM outputs, a key problem remains: existing language models are poorly calibrated, often confident when they are wrong or unsure when they are correct. In this work, we devise a protocol called *calibration tuning* for finetuning LLMs to output calibrated probabilities. Calibration-tuned models demonstrate superior calibration performance compared to existing language models on a variety of question-answering tasks, including open-ended generation, without affecting accuracy. We further show that this ability transfers to new domains outside of the calibration-tuning train set.</abstract>
       <url hash="9eb3ff58">2024.uncertainlp-1.1</url>
@@ -62,7 +62,7 @@
     <paper id="4">
       <title>Linguistic Obfuscation Attacks and Large Language Model Uncertainty</title>
       <author><first>Sebastian</first><last>Steindl</last><affiliation>Ostbayerische Technische Hochschule Amberg-Weiden</affiliation></author>
-      <author><first>Ulrich</first><last>Schäfer</last><affiliation>Ostbayerische Technische Hochschule Amberg-Weiden</affiliation></author>
+      <author id="ulrich-schafer"><first>Ulrich</first><last>Schäfer</last><affiliation>Ostbayerische Technische Hochschule Amberg-Weiden</affiliation></author>
       <author><first>Bernd</first><last>Ludwig</last><affiliation>Universität Regensburg</affiliation></author>
       <author><first>Patrick</first><last>Levi</last><affiliation>Ostbayerische Technische Hochschule Amberg-Weiden</affiliation></author>
       <pages>35-40</pages>
diff --git a/data/xml/2024.unimplicit.xml b/data/xml/2024.unimplicit.xml
index 9a4c60aa83..c15bc49fb3 100644
--- a/data/xml/2024.unimplicit.xml
+++ b/data/xml/2024.unimplicit.xml
@@ -35,7 +35,7 @@
       <author><first>Katharina</first><last>Hechinger</last></author>
       <author><first>Matthias</first><last>Assenmacher</last></author>
       <author><first>Göran</first><last>Kauermann</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>22-32</pages>
       <abstract>In this work, we analyze the uncertainty that is inherently present in the labels used for supervised machine learning in natural language inference (NLI). In cases where multiple annotations per instance are available, neither the majority vote nor the frequency of individual class votes is a trustworthy representation of the labeling uncertainty. We propose modeling the votes via a Bayesian mixture model to recover the data-generating process, i.e., the “true” latent classes, and thus gain insight into the class variations. This will enable a better understanding of the confusion happening during the annotation process. We also assess the stability of the proposed estimation procedure by systematically varying the numbers of i) instances and ii) labels. Thereby, we observe that few instances with many labels can predict the latent class borders reasonably well, while the estimation fails for many instances with only a few labels. This leads us to conclude that multiple labels are a crucial building block for properly analyzing label uncertainty.</abstract>
       <url hash="1725722c">2024.unimplicit-1.2</url>
@@ -64,7 +64,7 @@
     <paper id="5">
       <title>Below the Sea (with the Sharks): Probing Textual Features of Implicit Sentiment in a Literary Case-study</title>
       <author><first>Yuri</first><last>Bizzoni</last></author>
-      <author><first>Pascale</first><last>Feldkamp</last></author>
+      <author id="pascale-feldkamp"><first>Pascale</first><last>Feldkamp</last></author>
       <pages>54-61</pages>
       <abstract>Literary language presents an ongoing challenge for Sentiment Analysis due to its complex, nuanced, and layered form of expression. It is often suggested that effective literary writing is evocative, operating beneath the surface and understating emotional expression. To explore features of implicitness in literary expression, this study takes Ernest Hemingway’s The Old Man and the Sea as a case for examining implicit sentiment expression. We examine sentences where automatic sentiment annotations show substantial divergences from human sentiment annotations, and probe these sentences for distinctive traits. We find that sentences where humans perceived a strong sentiment while models did not are significantly lower in arousal and higher in concreteness than sentences where humans and models were more aligned, suggesting the importance of simplicity and concreteness for implicit sentiment expression in literary prose.</abstract>
       <url hash="ea074fe5">2024.unimplicit-1.5</url>
@@ -79,7 +79,7 @@
       <author><first>François</first><last>Maine</last></author>
       <author><first>François</first><last>Bancilhon</last></author>
       <author><first>Guillaume</first><last>Gadek</last></author>
-      <author><first>Guillaume</first><last>Gravier</last></author>
+      <author id="guillaume-gravier"><first>Guillaume</first><last>Gravier</last></author>
       <author><first>Paul</first><last>Égré</last></author>
       <pages>62-72</pages>
       <abstract>This paper investigates the language of propaganda and its stylistic features. It presents the PPN dataset, standing for Propagandist Pseudo-News, a multisource, multilingual, multimodal dataset composed of news articles extracted from websites identified as propaganda sources by expert agencies. A limited sample from this set was randomly mixed with papers from the regular French press, and their URL masked, to conduct an annotation-experiment by humans, using 11 distinct labels. The results show that human annotators were able to reliably discriminate between the two types of press across each of the labels. We use different NLP techniques to identify the cues used by annotators, and to compare them with machine classification: first the analyzer VAGO to detect discourse vagueness and subjectivity, and then four different classifiers, two based on RoBERTa, one CATS using syntax, and one XGBoost combining syntactic and semantic features.</abstract>
@@ -91,7 +91,7 @@
       <author><first>Siyao</first><last>Peng</last></author>
       <author><first>Zihang</first><last>Sun</last></author>
       <author><first>Sebastian</first><last>Loftus</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>73-81</pages>
       <abstract>Named Entity Recognition (NER) is a key information extraction task with a long-standing tradition. While recent studies address and aim to correct annotation errors via re-labeling efforts, little is known about the sources of label variation, such as text ambiguity, annotation error, or guideline divergence. This is especially the case for high-quality datasets and beyond English CoNLL03. This paper studies disagreements in expert-annotated named entity datasets for three varieties: English, Danish, and Bavarian. We show that text ambiguity and artificial guideline changes are dominant factors for diverse annotations among high-quality revisions. We survey student annotations on a subset of difficult entities and substantiate the feasibility and necessity of manifold annotations for understanding named entity ambiguities from a distributional perspective.</abstract>
       <url hash="b9eb0fb0">2024.unimplicit-1.7</url>
diff --git a/data/xml/2024.unlp.xml b/data/xml/2024.unlp.xml
index a3612b371c..574a5ac788 100644
--- a/data/xml/2024.unlp.xml
+++ b/data/xml/2024.unlp.xml
@@ -114,7 +114,7 @@
     </paper>
     <paper id="10">
       <title>Fine-Tuning and Retrieval Augmented Generation for Question Answering Using Affordable Large Language Models</title>
-      <author><first>Tiberiu</first><last>Boros</last></author>
+      <author id="tiberiu-boros"><first>Tiberiu</first><last>Boros</last></author>
       <author><first>Radu</first><last>Chivereanu</last></author>
       <author><first>Stefan</first><last>Dumitrescu</last></author>
       <author><first>Octavian</first><last>Purcaru</last></author>
@@ -152,7 +152,7 @@
       <title>Eval-<fixed-case>UA</fixed-case>-tion 1.0: Benchmark for Evaluating <fixed-case>U</fixed-case>krainian (Large) Language Models</title>
       <author><first>Serhii</first><last>Hamotskyi</last></author>
       <author><first>Anna-Izabella</first><last>Levbarg</last></author>
-      <author><first>Christian</first><last>Hänig</last></author>
+      <author id="christian-hanig"><first>Christian</first><last>Hänig</last></author>
       <pages>109–119</pages>
       <abstract>In this paper, we introduce Eval-UA-tion, a set of novel Ukrainian-language datasets aimed at evaluating the performance of language models on the Ukrainian language. The tasks include UA-CBT (inspired by the Children’s Book Test, a fill-in-the-gaps type task aimed at gauging the extent to which a story narrative is understood), UP-Titles (where the online newspaper <i>Ukrainska Pravda</i>‘s articles have to be matched to the correct title among 10 similar ones), and LMentry-static-UA/LMES (inspired by the LMentry benchmark, a set of tasks simple to solve for humans but hard for LMs, such as ‘which of these words is longer’ and ‘what is the fifth word of this sentence’). With the exception of UP-Titles, the tasks are built in a way to minimize contamination and use material unlikely to be present in the training sets of language models, and include a split for few-shot model prompting use that minimizes contamination. For each task human and random baselines are provided.</abstract>
       <url hash="a3ebe10b">2024.unlp-1.13</url>
diff --git a/data/xml/2024.vardial.xml b/data/xml/2024.vardial.xml
index 9ec1c07010..4928236839 100644
--- a/data/xml/2024.vardial.xml
+++ b/data/xml/2024.vardial.xml
@@ -7,8 +7,8 @@
       <editor><first>Tommi</first><last>Jauhiainen</last></editor>
       <editor><first>Nikola</first><last>Ljubešić</last></editor>
       <editor><first>Marcos</first><last>Zampieri</last></editor>
-      <editor><first>Preslav</first><last>Nakov</last></editor>
-      <editor><first>Jörg</first><last>Tiedemann</last></editor>
+      <editor id="preslav-nakov"><first>Preslav</first><last>Nakov</last></editor>
+      <editor id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Mexico City, Mexico</address>
       <month>June</month>
@@ -77,7 +77,7 @@
       <title>When Elote, Choclo and Mazorca are not the Same. Isomorphism-Based Perspective to the <fixed-case>S</fixed-case>panish Varieties Divergences</title>
       <author><first>Cristina</first><last>España-Bonet</last><affiliation>DFKI GmbH</affiliation></author>
       <author><first>Ankur</first><last>Bhatt</last><affiliation>DFKI GmbH</affiliation></author>
-      <author><first>Koel</first><last>Dutta Chowdhury</last><affiliation>Saarland Informatics Campus,Saarland University</affiliation></author>
+      <author id="koel-dutta-chowdhury"><first>Koel</first><last>Dutta Chowdhury</last><affiliation>Saarland Informatics Campus,Saarland University</affiliation></author>
       <author><first>Alberto</first><last>Barrón-Cedeño</last><affiliation>Università di Bologna</affiliation></author>
       <pages>56-77</pages>
       <abstract>Spanish is an official language in 20 countries; in 19 of them, it arrived by means of overseas colonisation. Its close contact with several coexistent languages and the rich regional and cultural diversity has produced varieties which divert from each other. We study these divergences in a data-based approach and according to their qualitative and quantitative effects in word embeddings. We generate embeddings for Spanish in 24 countries and examine the topology of the spaces. Due to the similarities between varieties —in contrast to what happens to different languages in bilingual topological studies— we first scrutinise the behaviour of three isomorphism measures in (quasi-)isomorphic settings: relational similarity, Eigenvalue similarity and Gromov-Hausdorff distance. We then use the most trustworthy measure to quantify the divergences among varieties. Finally, we use the departures from isomorphism to build relational trees for the Spanish varieties by hierarchical clustering.</abstract>
@@ -117,7 +117,7 @@
     <paper id="8">
       <title>The Role of Adverbs in Language Variety Identification: The Case of <fixed-case>P</fixed-case>ortuguese Multi-Word Adverbs</title>
       <author><first>Izabela</first><last>Müller</last><affiliation>Universidade do Algarve</affiliation></author>
-      <author><first>Nuno</first><last>Mamede</last><affiliation>Universidade de Lisboa - Instituto Superior Técnico</affiliation></author>
+      <author id="nuno-mamede"><first>Nuno</first><last>Mamede</last><affiliation>Universidade de Lisboa - Instituto Superior Técnico</affiliation></author>
       <author><first>Jorge</first><last>Baptista</last><affiliation>University of Algarve</affiliation></author>
       <pages>99-106</pages>
       <abstract>This paper aims to assess the role of multiword compound adverbs in distinguishing Brazilian Portuguese (PT-BR) from European Portuguese (PT-PT). Two key factors underpin this focus: Firstly, multiword expressions often provide less ambiguity compared to single words, even when their meaning is idiomatic (non-compositional). Secondly, despite constituting a significant portion of lexicons in many languages, they are frequently overlooked in Natural Language Processing, possibly due to their heterogeneous nature and lexical range.For this study, a large lexicon of Portuguese multiword adverbs (3,665) annotated with diatopic information regarding language variety was utilized. The paper investigates the distribution of this category in a corpus consisting in excerpts from journalistic texts sourced from the DSL (Dialect and Similar Language) corpus, representing Brazilian (PT-BR) and European Portuguese (PT-PT), respectively, each partition containing 18,000 sentences.Results indicate a substantial similarity between the two varieties, with a considerable overlap in the lexicon of multiword adverbs. Additionally, specific adverbs unique to each language variety were identified. Lexical entries recognized in the corpus represent 18.2% (PT-BR) to 19.5% (PT-PT) of the lexicon, and approximately 5,700 matches in each partition. While many of the matches are spurious due to ambiguity with otherwise non-idiomatic, free strings, occurrences of adverbs marked as exclusive to one variety in texts from the other variety are rare.</abstract>
@@ -162,7 +162,7 @@
     <paper id="12">
       <title>Experiments in Multi-Variant Natural Language Processing for <fixed-case>N</fixed-case>ahuatl</title>
       <author><first>Robert</first><last>Pugh</last><affiliation>Indiana University</affiliation></author>
-      <author><first>Francis</first><last>Tyers</last><affiliation>Indiana University</affiliation></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last><affiliation>Indiana University</affiliation></author>
       <pages>140-151</pages>
       <abstract>Linguistic variation is a complicating factor for digital language technologies. This is particularly true for languages that lack an official “standard” variety, including many regional and minoritized languages. In this paper, we describe a set of experiments focused on multivariant natural language processing for the Nahuatl, an indigenous Mexican language with a high level of linguistic variation and no single recognized standard variant. Using small (10k tokens), recently-published annotated datasets for two Nahuatl variants, we compare the performance of single-variant, cross-variant, and joint training, and explore how different models perform on a third Nahuatl variant, unseen in training. These results and the subsequent discussion contribute to efforts of developing low-resource NLP that is robust to diatopic variation. We share all code used to process the data and run the experiments.</abstract>
       <url hash="d0a4c501">2024.vardial-1.12</url>
@@ -272,7 +272,7 @@
       <author><first>Thi Anh</first><last>Nguyen</last><affiliation>Vietnam National University, Hanoi</affiliation></author>
       <author><first>My</first><last>Ha</last><affiliation>Vietnam National University, Hanoi</affiliation></author>
       <author><first>Thi Minh</first><last>Nguyen</last><affiliation>Vietnam National University, Hanoi</affiliation></author>
-      <author><first>Phuong</first><last>Le-Hong</last><affiliation>Vietnam National University, Hanoi</affiliation></author>
+      <author id="phuong-le-hong"><first>Phuong</first><last>Le-Hong</last><affiliation>Vietnam National University, Hanoi</affiliation></author>
       <pages>235-240</pages>
       <abstract>The VLP team participated in the DSL-ML shared task of the VarDial 2024 workshop which aims to distinguish texts in similar languages. This paper presents our approach to solving the problem and discusses our experimental and official results. We propose to integrate semantics-aware word embeddings which are learned from ConceptNet into a bidirectional long short-term memory network. This approach achieves good performance – our sys- tem is ranked in the top two or three of the best performing teams for the task.</abstract>
       <url hash="7871f404">2024.vardial-1.21</url>
diff --git a/data/xml/2024.wassa.xml b/data/xml/2024.wassa.xml
index cb7c31920a..3f06d090a5 100644
--- a/data/xml/2024.wassa.xml
+++ b/data/xml/2024.wassa.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2024-07-26" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 14th Workshop on Computational Approaches to Subjectivity, Sentiment, &amp; Social Media Analysis</booktitle>
-      <editor><first>Orphée</first><last>De Clercq</last></editor>
+      <editor id="orphee-de-clercq"><first>Orphée</first><last>De Clercq</last></editor>
       <editor><first>Valentin</first><last>Barriere</last></editor>
       <editor><first>Jeremy</first><last>Barnes</last></editor>
       <editor><first>Roman</first><last>Klinger</last></editor>
@@ -67,7 +67,7 @@
     <paper id="5">
       <title>A Systematic Analysis on the Temporal Generalization of Language Models in Social Media</title>
       <author><first>Asahi</first><last>Ushio</last></author>
-      <author><first>Jose</first><last>Camacho-Collados</last><affiliation>Cardiff University</affiliation></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last><affiliation>Cardiff University</affiliation></author>
       <pages>52-62</pages>
       <abstract>In machine learning, temporal shifts occur when there are differences between training and test splits in terms of time. For streaming data such as news or social media, models are commonly trained on a fixed corpus from a certain period of time, and they can become obsolete due to the dynamism and evolving nature of online content. This paper focuses on temporal shifts in social media and, in particular, Twitter. We propose a unified evaluation scheme to assess the performance of language models (LMs) under temporal shift on standard social media tasks. LMs are tested on five diverse social media NLP tasks under different temporal settings, which revealed two important findings: (i) the decrease in performance under temporal shift is consistent across different models for entity-focused tasks such as named entity recognition or disambiguation, and hate speech detection, but not significant in the other tasks analysed (i.e., topic and sentiment classification); and (ii) continuous pre-training on the test period does not improve the temporal adaptability of LMs.</abstract>
       <url hash="a2a38e46">2024.wassa-1.5</url>
@@ -89,7 +89,7 @@
       <title>A Multi-Faceted <fixed-case>NLP</fixed-case> Analysis of Misinformation Spreaders in <fixed-case>T</fixed-case>witter</title>
       <author><first>Dimosthenis</first><last>Antypas</last></author>
       <author><first>Alun</first><last>Preece</last><affiliation>Cardiff University</affiliation></author>
-      <author><first>Jose</first><last>Camacho-Collados</last><affiliation>Cardiff University</affiliation></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last><affiliation>Cardiff University</affiliation></author>
       <pages>71-83</pages>
       <abstract>Social media is an integral part of the daily life of an increasingly large number of people worldwide. Used for entertainment, communication and news updates, it constitutes a source of information that has been extensively used to study human behaviour. Unfortunately, the open nature of social media platforms along with the difficult task of supervising their content has led to a proliferation of misinformation posts. In this paper, we aim to identify the textual differences between the profiles of user that share misinformation from questionable sources and those that do not. Our goal is to better understand user behaviour in order to be better equipped to combat this issue. To this end, we identify Twitter (X) accounts of potential misinformation spreaders and apply transformer models specialised in social media to extract characteristics such as sentiment, emotion, topic and presence of hate speech. Our results indicate that, while there may be some differences between the behaviour of users that share misinformation and those that do not, there are no large differences when it comes to the type of content shared.</abstract>
       <url hash="ad769db3">2024.wassa-1.7</url>
@@ -100,7 +100,7 @@
       <title>Entity-Level Sentiment: More than the Sum of Its Parts</title>
       <author><first>Egil</first><last>Rønningstad</last></author>
       <author><first>Roman</first><last>Klinger</last><affiliation>Otto-Friedrich Universität Bamberg</affiliation></author>
-      <author><first>Lilja</first><last>Øvrelid</last><affiliation>Dept. of Informatics, University of Oslo</affiliation></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last><affiliation>Dept. of Informatics, University of Oslo</affiliation></author>
       <author><first>Erik</first><last>Velldal</last><affiliation>University of Oslo</affiliation></author>
       <pages>84-96</pages>
       <abstract>In sentiment analysis of longer texts, there may be a variety of topics discussed, of entities mentioned, and of sentiments expressed regarding each entity. We find a lack of studies exploring how such texts express their sentiment towards each entity of interest, and how these sentiments can be modelled. In order to better understand how sentiment regarding persons and organizations (each entity in our scope) is expressed in longer texts, we have collected a dataset of expert annotations where the overall sentiment regarding each entity is identified, together with the sentence-level sentiment for these entities separately. We show that the reader’s perceived sentiment regarding an entity often differs from an arithmetic aggregation of sentiments at the sentence level. Only 70% of the positive and 55% of the negative entities receive a correct overall sentiment label when we aggregate the (human-annotated) sentiment labels for the sentences where the entity is mentioned. Our dataset reveals the complexity of entity-specific sentiment in longer texts, and allows for more precise modelling and evaluation of such sentiment expressions.</abstract>
@@ -178,7 +178,7 @@
     </paper>
     <paper id="15">
       <title>Comparing Tools for Sentiment Analysis of <fixed-case>D</fixed-case>anish Literature from Hymns to Fairy Tales: Low-Resource Language and Domain Challenges</title>
-      <author><first>Pascale</first><last>Feldkamp</last></author>
+      <author id="pascale-feldkamp"><first>Pascale</first><last>Feldkamp</last></author>
       <author><first>Jan</first><last>Kostkan</last><affiliation>Aarhus University</affiliation></author>
       <author><first>Ea</first><last>Overgaard</last></author>
       <author><first>Mia</first><last>Jacobsen</last></author>
@@ -237,7 +237,7 @@
     <paper id="20">
       <title><fixed-case>POL</fixed-case>ygraph: <fixed-case>P</fixed-case>olish Fake News Dataset</title>
       <author><first>Daniel</first><last>Dzienisiewicz</last><affiliation>Adam Mickiewicz University of Poznan</affiliation></author>
-      <author><first>Filip</first><last>Graliński</last><affiliation>Adam Mickiewicz University, Adam Mickiewicz University, Applica.ai and Applica.ai</affiliation></author>
+      <author id="filip-gralinski"><first>Filip</first><last>Graliński</last><affiliation>Adam Mickiewicz University, Adam Mickiewicz University, Applica.ai and Applica.ai</affiliation></author>
       <author><first>Piotr</first><last>Jabłoński</last></author>
       <author><first>Marek</first><last>Kubis</last><affiliation>Adam Mickiewicz University of Poznan</affiliation></author>
       <author><first>Paweł</first><last>Skórzewski</last><affiliation>Adam Mickiewicz University of Poznan</affiliation></author>
@@ -312,7 +312,7 @@
       <title>Comparing Pre-trained Human Language Models: Is it Better with Human Context as Groups, Individual Traits, or Both?</title>
       <author><first>Nikita</first><last>Soni</last></author>
       <author><first>Niranjan</first><last>Balasubramanian</last><affiliation>State University of New York, Stony Brook</affiliation></author>
-      <author><first>H. Andrew</first><last>Schwartz</last><affiliation>Stony Brook University (SUNY)</affiliation></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last><affiliation>Stony Brook University (SUNY)</affiliation></author>
       <author><first>Dirk</first><last>Hovy</last><affiliation>Bocconi University</affiliation></author>
       <pages>316-328</pages>
       <abstract>Pre-trained language models consider the context of neighboring words and documents but lack any author context of the human generating the text. However, language depends on the author’s states, traits, social, situational, and environmental attributes, collectively referred to as human context (Soni et al., 2024). Human-centered natural language processing requires incorporating human context into language models. Currently, two methods exist: pre-training with 1) group-wise attributes (e.g., over-45-year-olds) or 2) individual traits. Group attributes are simple but coarse — not all 45-year-olds write the same way — while individual traits allow for more personalized representations, but require more complex modeling and data. It is unclear which approach benefits what tasks. We compare pre-training models with human context via 1) group attributes, 2) individual users, and 3) a combined approach on five user- and document-level tasks. Our results show that there is no best approach, but that human-centered language modeling holds avenues for different methods.</abstract>
@@ -345,7 +345,7 @@
       <title>To Aggregate or Not to Aggregate. That is the Question: A Case Study on Annotation Subjectivity in Span Prediction</title>
       <author><first>Kemal</first><last>Kurniawan</last><affiliation>University of Melbourne</affiliation></author>
       <author><first>Meladel</first><last>Mistica</last><affiliation>The University of Melbourne</affiliation></author>
-      <author><first>Timothy</first><last>Baldwin</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and The University of Melbourne</affiliation></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and The University of Melbourne</affiliation></author>
       <author><first>Jey Han</first><last>Lau</last><affiliation>The University of Melbourne</affiliation></author>
       <pages>362-368</pages>
       <abstract>This paper explores the task of automatic prediction of text spans in a legal problem description that support a legal area label. We use a corpus of problem descriptions written by laypeople in English that is annotated by practising lawyers. Inherent subjectivity exists in our task because legal area categorisation is a complex task, and lawyers often have different views on a problem. Experiments show that training on majority-voted spans outperforms training on disaggregated ones.</abstract>
@@ -379,7 +379,7 @@
       <title>Chinchunmei at <fixed-case>WASSA</fixed-case> 2024 Empathy and Personality Shared Task: Boosting <fixed-case>LLM</fixed-case>’s Prediction with Role-play Augmentation and Contrastive Reasoning Calibration</title>
       <author><first>Tian</first><last>Li</last></author>
       <author><first>Nicolay</first><last>Rusnachenko</last></author>
-      <author><first>Huizhi</first><last>Liang</last><affiliation>Newcastle University, UK</affiliation></author>
+      <author id="huizhi-liang"><first>Huizhi</first><last>Liang</last><affiliation>Newcastle University, UK</affiliation></author>
       <pages>385-392</pages>
       <abstract>This paper presents the Chinchunmei team’s contributions to the WASSA2024 Shared-Task 1: Empathy Detection and Emotion Classification. We participated in Tracks 1, 2, and 3 to predict empathetic scores based on dialogue, article, and essay content. We choose Llama3-8b-instruct as our base model. We developed three supervised fine-tuning schemes: standard prediction, role-play, and contrastive prediction, along with an innovative scoring calibration method called Contrastive Reasoning Calibration during inference. Pearson Correlation was used as the evaluation metric across all tracks. For Track 1, we achieved 0.43 on the devset and 0.17 on the testset. For Track 2 emotion, empathy, and polarity labels, we obtained 0.64, 0.66, and 0.79 on the devset and 0.61, 0.68, and 0.58 on the testset. For Track 3 empathy and distress labels, we got 0.64 and 0.56 on the devset and 0.33 and 0.35 on the testset.</abstract>
       <url hash="2858e73a">2024.wassa-1.32</url>
@@ -401,7 +401,7 @@
     <paper id="34">
       <title>Zhenmei at <fixed-case>WASSA</fixed-case>-2024 Empathy and Personality Shared Track 2 Incorporating <fixed-case>P</fixed-case>earson Correlation Coefficient as a Regularization Term for Enhanced Empathy and Emotion Prediction in Conversational Turns</title>
       <author><first>Liting</first><last>Huang</last></author>
-      <author><first>Huizhi</first><last>Liang</last><affiliation>Newcastle University, UK</affiliation></author>
+      <author id="huizhi-liang"><first>Huizhi</first><last>Liang</last><affiliation>Newcastle University, UK</affiliation></author>
       <pages>399-403</pages>
       <abstract>In the realm of conversational empathy and emotion prediction, emotions are frequently categorized into multiple levels. This study seeks to enhance the performance of emotion prediction models by incorporating the Pearson correlation coefficient as a regularization term within the loss function. This regularization approach ensures closer alignment between predicted and actual emotion levels, mitigating extreme predictions and resulting in smoother and more consistent outputs. Such outputs are essential for capturing the subtle transitions between continuous emotion levels. Through experimental comparisons between models with and without Pearson regularization, our findings demonstrate that integrating the Pearson correlation coefficient significantly boosts model performance, yielding higher correlation scores and more accurate predictions. Our system officially ranked 9th at the Track 2: CONV-turn. The code for our model can be found at Link .</abstract>
       <url hash="20c28555">2024.wassa-1.34</url>
@@ -421,7 +421,7 @@
     <paper id="36">
       <title><fixed-case>NU</fixed-case> at <fixed-case>WASSA</fixed-case> 2024 Empathy and Personality Shared Task: Enhancing Personality Predictions with Knowledge Graphs; A Graphical Neural Network and <fixed-case>L</fixed-case>ight<fixed-case>GBM</fixed-case> Ensemble Approach</title>
       <author><first>Emmanuel</first><last>Osei-Brefo</last><affiliation>Newcastle University, UK</affiliation></author>
-      <author><first>Huizhi</first><last>Liang</last><affiliation>Newcastle University, UK</affiliation></author>
+      <author id="huizhi-liang"><first>Huizhi</first><last>Liang</last><affiliation>Newcastle University, UK</affiliation></author>
       <pages>412-419</pages>
       <abstract>This paper proposes a novel ensemble approach that combines Graph Neural Networks (GNNs) and LightGBM to enhance personality prediction based on the personality Big 5 model. By integrating BERT embeddings from user essays with knowledge graph-derived embeddings, our method accurately captures rich semantic and relational information. Additionally, a special loss function that combines Mean Squared Error (MSE), Pearson correlation loss, and contrastive loss to improve model performance is introduced. The proposed ensemble model, made of Graph Convolutional Networks (GCNs), Graph Attention Networks (GATs), and LightGBM, demonstrates superior performance over other models, with significant improvements in prediction accuracy for the Big Five personality traits achieved. Our system officially ranked <tex-math>2^{nd}</tex-math> at the Track 4: PER track.</abstract>
       <url hash="2cb19a77">2024.wassa-1.36</url>
@@ -455,7 +455,7 @@
       <author><first>Liting</first><last>Huang</last></author>
       <author><first>Tian</first><last>Li</last></author>
       <author><first>Nicolay</first><last>Rusnachenko</last></author>
-      <author><first>Huizhi</first><last>Liang</last><affiliation>Newcastle University, UK</affiliation></author>
+      <author id="huizhi-liang"><first>Huizhi</first><last>Liang</last><affiliation>Newcastle University, UK</affiliation></author>
       <pages>430-434</pages>
       <abstract>This paper presents our participation to the WASSA 2024 Shared Task on Empathy Detection and Emotion Classification and Personality Detection in Interactions. We focus on Track 2: Empathy and Emotion Prediction in Conversations Turns (CONV-turn), which consists of predicting the perceived empathy, emotion polarity and emotion intensity at turn level in a conversation. In the method, we conduct BERT and DeBERTa based finetuning, implement the CombinedLoss which consists of a structured contrastive loss and Pearson loss, adopt adversarial training using Fast Gradient Method (FGM). This method achieved Pearson correlation of 0.581 for Emotion,0.644 for Emotional Polarity and 0.544 for Empathy on the test set, with the average value of 0.590 which ranked 4th among all teams. After submission to WASSA 2024 competition, we further introduced the segmented mix-up for data augmentation, boosting for ensemble and regression experiments, which yield even better results: 0.6521 for Emotion, 0.7376 for EmotionalPolarity, 0.6326 for Empathy in Pearson correlation on the development set. The implementation and fine-tuned models are publicly-available at https://github.com/hyy-33/hyy33-WASSA-2024-Track-2.</abstract>
       <url hash="966b1b9f">2024.wassa-1.39</url>
@@ -557,9 +557,9 @@
     <paper id="48">
       <title><fixed-case>PCICUNAM</fixed-case> at <fixed-case>WASSA</fixed-case> 2024: Cross-lingual Emotion Detection Task with Hierarchical Classification and Weighted Loss Functions</title>
       <author><first>Jesús</first><last>Vázquez-Osorio</last><affiliation>Universidad Nacional Autónoma de México, Posgrado en Ciencia e Ingeniería de la Computación</affiliation></author>
-      <author><first>Gerardo</first><last>Sierra</last><affiliation>Universidad Nacional Autónoma de México, Instituto de Ingeniería</affiliation></author>
+      <author id="gerardo-sierra"><first>Gerardo</first><last>Sierra</last><affiliation>Universidad Nacional Autónoma de México, Instituto de Ingeniería</affiliation></author>
       <author><first>Helena</first><last>Gómez-Adorno</last><affiliation>Universidad Nacional Autónoma de México, Instituto de Investigaciones en Matemáticas Aplicadas y en Sistemas</affiliation></author>
-      <author><first>Gemma</first><last>Bel-Enguix</last><affiliation>Universidad Nacional Autónoma de México, Instituto de Ingeniería</affiliation></author>
+      <author id="gemma-bel-enguix"><first>Gemma</first><last>Bel-Enguix</last><affiliation>Universidad Nacional Autónoma de México, Instituto de Ingeniería</affiliation></author>
       <pages>490-494</pages>
       <abstract>This paper addresses the shared task of multi-lingual emotion detection in tweets, presented at the Workshop on Computational Approaches to Subjectivity, Sentiment, and Social Media Analysis (WASSA) co-located with the ACL 2024 conference. The task involves predicting emotions from six classes in tweets from five different languages using only English for model training. Our approach focuses on addressing class imbalance through data augmentation, hierarchical classification, and the application of focal loss and weighted cross-entropy loss functions. These methods enhance our transformer-based model’s ability to transfer emotion detection capabilities across languages, resulting in improved performance despite the constraints of limited computational resources.</abstract>
       <url hash="9a075abd">2024.wassa-1.48</url>
@@ -572,7 +572,7 @@
       <author><first>Qihao</first><last>Shao</last><affiliation>University of Washington</affiliation></author>
       <author><first>Christine</first><last>Zhao</last><affiliation>University of Washington</affiliation></author>
       <author><first>Sheng</first><last>Bi</last><affiliation>University of Washington</affiliation></author>
-      <author><first>Gina-Anne</first><last>Levow</last><affiliation>University of Washington</affiliation></author>
+      <author id="gina-anne-levow"><first>Gina-Anne</first><last>Levow</last><affiliation>University of Washington</affiliation></author>
       <pages>495-504</pages>
       <abstract>Cross-lingual emotion detection allows us to analyze global trends, public opinion, and social phenomena at scale. We participated in the Explainability of Cross-lingual Emotion Detection (EXALT) shared task, achieving an F1-score of 0.6046 on the evaluation set for the emotion detection sub-task. Our system outperformed the baseline by more than 0.16 F1-score absolute, and ranked second amongst competing systems. We conducted experiments using fine-tuning, zero-shot learning, and few-shot learning for Large Language Model (LLM)-based models as well as embedding-based BiLSTM and KNN for non-LLM-based techniques. Additionally, we introduced two novel methods: the Multi-Iteration Agentic Workflow and the Multi-Binary-Classifier Agentic Workflow. We found that LLM-based approaches provided good performance on multilingual emotion detection. Furthermore, ensembles combining all our experimented models yielded higher F1-scores than any single approach alone.</abstract>
       <url hash="b3d2204e">2024.wassa-1.49</url>
@@ -597,7 +597,7 @@
       <author><first>Jeongyeob</first><last>Hong</last><affiliation>University of Washington</affiliation></author>
       <author><first>Andrew</first><last>Wang</last><affiliation>University of Washington</affiliation></author>
       <author><first>Anita</first><last>Silva</last><affiliation>University of Washington</affiliation></author>
-      <author><first>Gina-Anne</first><last>Levow</last><affiliation>University of Washington</affiliation></author>
+      <author id="gina-anne-levow"><first>Gina-Anne</first><last>Levow</last><affiliation>University of Washington</affiliation></author>
       <pages>511-522</pages>
       <abstract>This paper introduces our submitted systems for WASSA 2024 Shared Task 2: Cross-Lingual Emotion Detection. We implemented a BERT-based classifier and an in-context learning-based system. Our best-performing model, using English Chain of Thought prompts with trigger words, reached 3rd overall with an F1 score of 0.6015. Following the motivation of the shared task, we further analyzed the scalability and transferability of the monolingual English dataset on cross-lingual tasks. Our analysis demonstrates the importance of data quality over quantity. We also found that augmented multilingual data does not necessarily perform better than English monolingual data in cross-lingual tasks. We open-sourced the augmented data and source code of our system for future research.</abstract>
       <url hash="e6b11985">2024.wassa-1.51</url>
@@ -610,7 +610,7 @@
       <author><first>Keren</first><last>Ruditsky</last><affiliation>University of Washington</affiliation></author>
       <author><first>Anna</first><last>Batra</last><affiliation>University of Washington</affiliation></author>
       <author><first>Yulha</first><last>Lhawa</last><affiliation>University of Washington</affiliation></author>
-      <author><first>Gina-Anne</first><last>Levow</last><affiliation>University of Washington</affiliation></author>
+      <author id="gina-anne-levow"><first>Gina-Anne</first><last>Levow</last><affiliation>University of Washington</affiliation></author>
       <pages>523-527</pages>
       <abstract>This paper describes our task 1 submission for the WASSA 2024 shared task on Explainability for Cross-lingual Emotion in Tweets. Our task is to predict the correct emotion label (Anger, Sadness, Fear, Joy, Love, and Neutral) for a dataset of English, Dutch, French, Spanish, and Russian tweets, while training exclusively on English emotion labeled data, to reveal what kind of emotion detection information is transferable cross-language (Maladry et al., 2024). To that end, we used an ensemble of models with a GPT-4 decider. Our ensemble consisted of a few-shot GPT-4 prompt system and a TwHIN-BERT system fine-tuned on the EXALT and additional English data. We ranked 8th place under the name WU_TLAXE with an F1 Macro score of 0.573 on the test set. We also experimented with an English-only TwHIN-BERT model by translating the other languages into English for inference, which proved to be worse than the other models.</abstract>
       <url hash="2880f66f">2024.wassa-1.52</url>
diff --git a/data/xml/2024.wat.xml b/data/xml/2024.wat.xml
index 24cbe7a1be..a68189571b 100644
--- a/data/xml/2024.wat.xml
+++ b/data/xml/2024.wat.xml
@@ -60,7 +60,7 @@
     <paper id="4">
       <title>Are Large Language Models State-of-the-art Quality Estimators for Machine Translation of User-generated Content?</title>
       <author><first>Shenbin</first><last>Qian</last><affiliation>University of Surrey</affiliation></author>
-      <author><first>Constantin</first><last>Orasan</last><affiliation>University of Surrey</affiliation></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orasan</last><affiliation>University of Surrey</affiliation></author>
       <author><first>Diptesh</first><last>Kanojia</last><affiliation>University of Surrey</affiliation></author>
       <author><first>Félix</first><last>Do Carmo</last><affiliation>University of Surrey</affiliation></author>
       <pages>45-55</pages>
diff --git a/data/xml/2024.wikinlp.xml b/data/xml/2024.wikinlp.xml
index 965f9f1c3f..a47ada4885 100644
--- a/data/xml/2024.wikinlp.xml
+++ b/data/xml/2024.wikinlp.xml
@@ -76,7 +76,7 @@
       <author><first>K.</first><last>Salas-Jimenez</last><affiliation>Universidad Nacional Autónoma de México</affiliation></author>
       <author><first>Francisco Fernando</first><last>Lopez-Ponce</last></author>
       <author><first>Sergio-Luis</first><last>Ojeda-Trueba</last></author>
-      <author><first>Gemma</first><last>Bel-Enguix</last><affiliation>Universidad Nacional Autónoma de México</affiliation></author>
+      <author id="gemma-bel-enguix"><first>Gemma</first><last>Bel-Enguix</last><affiliation>Universidad Nacional Autónoma de México</affiliation></author>
       <pages>46-52</pages>
       <abstract>This paper explores whether it is possible to train a machine learning model using Wikipedia data to detect subjectivity in sentences and generalize effectively to other domains. To achieve this, we performed experiments with the WikiBias corpus, the BABE corpus, and the CheckThat! Dataset. Various classical models for ML were tested, including Logistic Regression, SVC, and SVR, including characteristics such as Sentence Transformers similarity, probabilistic sentiment measures, and biased lexicons. Pre-trained models like DistilRoBERTa, as well as large language models like Gemma and GPT-4, were also tested for the same classification task.</abstract>
       <url hash="a824a4e0">2024.wikinlp-1.10</url>
@@ -86,7 +86,7 @@
     <paper id="11">
       <title><fixed-case>HOAXPEDIA</fixed-case>: A Unified <fixed-case>W</fixed-case>ikipedia Hoax Articles Dataset</title>
       <author><first>Hsuvas</first><last>Borkakoty</last><affiliation>Cardiff University</affiliation></author>
-      <author><first>Luis</first><last>Espinosa-Anke</last><affiliation>Cardiff University and AMPLYFI</affiliation></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa-Anke</last><affiliation>Cardiff University and AMPLYFI</affiliation></author>
       <pages>53-66</pages>
       <abstract>Hoaxes are a recognised form of disinformation created deliberately, with potential serious implications in the credibility of reference knowledge resources such as Wikipedia. What makes detecting Wikipedia hoaxes hard is that they often are written according to the official style guidelines. In this work, we first provide a systematic analysis of similarities and discrepancies between legitimate and hoax Wikipedia articles, and introduce HOAXPEDIA, a collection of 311 hoax articles (from existing literature and official Wikipedia lists), together with semantically similar legitimate articles, which together form a binary text classification dataset aimed at fostering research in automated hoax detection. In this paper, We report results after analyzing several language models, hoax-to-legit ratios, and the amount of text classifiers are exposed to (full article vs the article’s definition alone). Our results suggest that detecting deceitful content in Wikipedia based on content alone is hard but feasible, and complement our analysis with a study on the differences in distributions in edit histories, and find that looking at this feature yields better classification results than context.</abstract>
       <url hash="bc54668c">2024.wikinlp-1.11</url>
@@ -158,7 +158,7 @@
       <author><first>Daniel</first><last>Cheng</last></author>
       <author><first>Phillip</first><last>Keung</last><affiliation>University of Washington</affiliation></author>
       <author><first>Jungo</first><last>Kasai</last><affiliation>Toyota Technological Institute at Chicago</affiliation></author>
-      <author><first>Noah A.</first><last>Smith</last><affiliation>University of Washington and Allen Institute for Artificial Intelligence</affiliation></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last><affiliation>University of Washington and Allen Institute for Artificial Intelligence</affiliation></author>
       <pages>126-135</pages>
       <abstract>Generative retrieval (Wang et al., 2022; Tay et al., 2022) is a popular approach for end-to-end document retrieval that directly generates document identifiers given an input query. We introduce summarization-based document IDs, in which each document’s ID is composed of an extractive summary or abstractive keyphrases generated by a language model, rather than an integer ID sequence or bags of n-grams as proposed in past work. We find that abstractive, content-based IDs (ACID) and an ID based on the first 30 tokens are very effective in direct comparisons with previous approaches to ID creation. We show that using ACID improves top-10 and top-20 recall by 15.6% and 14.4% (relative) respectively versus the cluster-based integer ID baseline on the MSMARCO 100k retrieval task, and 9.8% and 9.9% respectively on the Wikipedia-based NQ 100k retrieval task. Our results demonstrate the effectiveness of human-readable, natural-language IDs created through summarization for generative retrieval. We also observed that extractive IDs outperformed abstractive IDs on Wikipedia articles in NQ but not the snippets in MSMARCO, which suggests that document characteristics affect generative retrieval performance.</abstract>
       <url hash="54becc2f">2024.wikinlp-1.18</url>
diff --git a/data/xml/2024.wildre.xml b/data/xml/2024.wildre.xml
index 953ea03616..069057b669 100644
--- a/data/xml/2024.wildre.xml
+++ b/data/xml/2024.wildre.xml
@@ -3,10 +3,10 @@
   <volume id="1" ingest-date="2024-05-18" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 7th Workshop on Indian Language Data: Resources and Evaluation</booktitle>
-      <editor><first>Girish Nath</first><last>Jha</last></editor>
+      <editor id="girish-nath-jha"><first>Girish Nath</first><last>Jha</last></editor>
       <editor><first>Sobha</first><last>L.</last></editor>
       <editor><first>Kalika</first><last>Bali</last></editor>
-      <editor><first>Atul Kr.</first><last>Ojha</last></editor>
+      <editor id="atul-kr-ojha"><first>Atul Kr.</first><last>Ojha</last></editor>
       <publisher>ELRA and ICCL</publisher>
       <address>Torino, Italia</address>
       <month>May</month>
@@ -24,7 +24,7 @@
       <author><first>Chayan</first><last>Kochar</last></author>
       <author><first>Vandan Vasantlal</first><last>Mujadia</last></author>
       <author><first>Pruthwik</first><last>Mishra</last></author>
-      <author><first>Dipti Misra</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></author>
       <pages>1–10</pages>
       <abstract>In the natural course of spoken language, individuals often engage in thinking and self-correction during speech production. These instances of interruption or correction are commonly referred to as disfluencies. When preparing data for subsequent downstream NLP tasks, these linguistic elements can be systematically removed, or handled as required, to enhance data quality. In this study, we present a comprehensive research on disfluencies in Indian languages. Our approach involves not only annotating real-world conversation transcripts but also conducting a detailed analysis of linguistic nuances inherent to Indian languages that are necessary to consider during annotation. Additionally, we introduce a robust algorithm for the synthetic generation of disfluent data. This algorithm aims to facilitate more effective model training for the identification of disfluencies in real-world conversations, thereby contributing to the advancement of disfluency research in Indian languages.</abstract>
       <url hash="f60b9279">2024.wildre-1.1</url>
@@ -32,7 +32,7 @@
     </paper>
     <paper id="2">
       <title><fixed-case>E</fixed-case>mo<fixed-case>M</fixed-case>ix-3<fixed-case>L</fixed-case>: A Code-Mixed Dataset for <fixed-case>B</fixed-case>angla-<fixed-case>E</fixed-case>nglish-<fixed-case>H</fixed-case>indi for Emotion Detection</title>
-      <author><first>Nishat</first><last>Raihan</last></author>
+      <author id="nishat-raihan"><first>Nishat</first><last>Raihan</last></author>
       <author><first>Dhiman</first><last>Goswami</last></author>
       <author><first>Antara</first><last>Mahmud</last></author>
       <author><first>Antonios</first><last>Anastasopoulos</last></author>
@@ -50,7 +50,7 @@
       <author><first>Shardul</first><last>Suryawanshi</last></author>
       <author><first>Atul Kr.</first><last>Ojha</last></author>
       <author><first>Paul</first><last>Buitelaar</last></author>
-      <author><first>John P.</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></author>
       <pages>17–23</pages>
       <abstract>This paper describes the structure and findings of the WILDRE 2024 shared task on Code-mixed Less-resourced Sentiment Analysis for Indo-Aryan Languages. The participants were asked to submit the test data’s final prediction on CodaLab. A total of fourteen teams registered for the shared task. Only four participants submitted the system for evaluation on CodaLab, with only two teams submitting the system description paper. While all systems show a rather promising performance, they outperform the baseline scores.</abstract>
       <url hash="d5e48df6">2024.wildre-1.3</url>
@@ -91,8 +91,8 @@
     </paper>
     <paper id="7">
       <title>Finding the Causality of an Event in News Articles</title>
-      <author><first>Sobha</first><last>Lalitha Devi</last></author>
-      <author><first>Pattabhi</first><last>RK Rao</last></author>
+      <author id="sobha-lalitha-devi"><first>Sobha</first><last>Lalitha Devi</last></author>
+      <author id="pattabhi-rk-rao"><first>Pattabhi</first><last>RK Rao</last></author>
       <pages>47–53</pages>
       <abstract>This paper discusses about the finding of causality of an event in newspaper articles. The analysis of causality , otherwise known as cause and effect is crucial for building efficient Natural Language Understanding (NLU) supported AI systems such as Event tracking and it is considered as a complex semantic relation under discourse theory. A cause-effect relation consists of a linguistic marker and its two arguments. The arguments are semantic arguments where the cause is the first argument (Arg1) and the effect is the second argument(Arg2). In this work we have considered the causal relations in Tamil Newspaper articles. The analysis of causal constructions, the causal markers and their syntactic relation lead to the identification of different features for developing the language model using RBMs (Restricted Boltzmann Machine). The experiments we performed have given encouraging results. The Cause-Effect system developed is used in a mobile App for Event profiling called “Nigalazhvi” where the cause and effect of an event is identified and given to the user.</abstract>
       <url hash="a2e15d38">2024.wildre-1.7</url>
@@ -136,7 +136,7 @@
       <author><first>Himanshu</first><last>Singh</last></author>
       <author><first>Kengatharaiyer</first><last>Sarveswaran</last></author>
       <author><first>William Chandra</first><last>Tjhi</last></author>
-      <author><first>Rajiv Ratn</first><last>Shah</last></author>
+      <author id="rajiv-shah"><first>Rajiv Ratn</first><last>Shah</last></author>
       <pages>73–83</pages>
       <abstract>Tamil is a relatively low-resource language in the field of Natural Language Processing (NLP). Recent years have seen a growth in Tamil NLP datasets in Natural Language Understanding (NLU) or Natural Language Generation (NLG) tasks, but high-quality linguistic resources remain scarce. In order to alleviate this gap in resources, this paper introduces Aalamaram, a treebank with rich linguistic annotations for the Tamil language. It is hitherto the largest publicly available Tamil treebank with almost 10,000 sentences from diverse sources and is annotated for the tasks of Part-of-speech (POS) tagging, Named Entity Recognition (NER), Morphological Parsing and Dependency Parsing. Close attention has also been paid to multi-word segmentation, especially in the context of Tamil clitics. Although the treebank is based largely on the Universal Dependencies (UD) specifications, significant effort has been made to adjust the annotation rules according to the idiosyncrasies and complexities of the Tamil language, thereby providing a valuable resource for linguistic research and NLP developments.</abstract>
       <url hash="84bc38f4">2024.wildre-1.11</url>
diff --git a/data/xml/2024.wmt.xml b/data/xml/2024.wmt.xml
index 28ca2c34f4..dd51671bbe 100644
--- a/data/xml/2024.wmt.xml
+++ b/data/xml/2024.wmt.xml
@@ -26,7 +26,7 @@
       <author><first>Tom</first><last>Kocmi</last><affiliation>Cohere</affiliation></author>
       <author><first>Eleftherios</first><last>Avramidis</last><affiliation>German Research Center for Artificial Intelligence (DFKI)</affiliation></author>
       <author><first>Rachel</first><last>Bawden</last><affiliation>Inria</affiliation></author>
-      <author><first>Ondřej</first><last>Bojar</last><affiliation>Charles University, MFF UFAL</affiliation></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last><affiliation>Charles University, MFF UFAL</affiliation></author>
       <author><first>Anton</first><last>Dvorkovich</last><affiliation>Yandex</affiliation></author>
       <author><first>Christian</first><last>Federmann</last><affiliation>Microsoft</affiliation></author>
       <author><first>Mark</first><last>Fishel</last><affiliation>University of Tartu</affiliation></author>
@@ -41,7 +41,7 @@
       <author><first>Kenton</first><last>Murray</last><affiliation>Johns Hopkins University</affiliation></author>
       <author><first>Masaaki</first><last>Nagata</last><affiliation>NTT Corporation</affiliation></author>
       <author><first>Martin</first><last>Popel</last><affiliation>Charles University, Faculty of Mathematics and Physics, UFAL</affiliation></author>
-      <author><first>Maja</first><last>Popović</last><affiliation>ADAPT, Dublin City University</affiliation></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last><affiliation>ADAPT, Dublin City University</affiliation></author>
       <author><first>Mariya</first><last>Shmatova</last><affiliation>Dubformer</affiliation></author>
       <author><first>Steinthór</first><last>Steingrímsson</last><affiliation>The Árni Magnússon Institute for Icelandic Studies</affiliation></author>
       <author><first>Vilém</first><last>Zouhar</last><affiliation>ETH Zurich, Charles University</affiliation></author>
@@ -60,13 +60,13 @@
       <author><first>Eleftherios</first><last>Avramidis</last><affiliation>German Research Center for Artificial Intelligence (DFKI)</affiliation></author>
       <author><first>Ricardo</first><last>Rei</last><affiliation>Unbabel/INESC-ID</affiliation></author>
       <author><first>Brian</first><last>Thompson</last><affiliation>Amazon</affiliation></author>
-      <author><first>Frederic</first><last>Blain</last><affiliation>Tilburg University</affiliation></author>
+      <author id="frederic-blain"><first>Frederic</first><last>Blain</last><affiliation>Tilburg University</affiliation></author>
       <author><first>Tom</first><last>Kocmi</last><affiliation>Cohere</affiliation></author>
       <author><first>Jiayi</first><last>Wang</last><affiliation>University College London</affiliation></author>
-      <author><first>David Ifeoluwa</first><last>Adelani</last><affiliation>McGill University / MILA</affiliation></author>
+      <author id="david-ifeoluwa-adelani"><first>David Ifeoluwa</first><last>Adelani</last><affiliation>McGill University / MILA</affiliation></author>
       <author><first>Marianna</first><last>Buchicchio</last><affiliation>Unbabel</affiliation></author>
       <author><first>Chrysoula</first><last>Zerva</last><affiliation>Instituto de Instituto de Telecomunicações, Instituto Superior Técnico, University of Lisbon</affiliation></author>
-      <author><first>Alon</first><last>Lavie</last><affiliation>Unbabel/Carnegie Mellon University</affiliation></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last><affiliation>Unbabel/Carnegie Mellon University</affiliation></author>
       <pages>47-81</pages>
       <abstract>The WMT24 Metrics Shared Task evaluated the performance of automatic metrics for machine translation (MT), with a major focus on LLM-based translations that were generated as part of the WMT24 General MT Shared Task. As LLMs become increasingly popular in MT, it is crucial to determine whether existing evaluation metrics can accurately assess the output of these systems.To provide a robust benchmark for this evaluation, human assessments were collected using Multidimensional Quality Metrics (MQM), continuing the practice from recent years. Furthermore, building on the success of the previous year, a challenge set subtask was included, requiring participants to design contrastive test suites that specifically target a metric’s ability to identify and penalize different types of translation errors.Finally, the meta-evaluation procedure was refined to better reflect real-world usage of MT metrics, focusing on pairwise accuracy at both the system- and segment-levels.We present an extensive analysis on how well metrics perform on three language pairs: English to Spanish (Latin America), Japanese to Chinese, and English to German. The results strongly confirm the results reported last year, that fine-tuned neural metrics continue to perform well, even when used to evaluate LLM-based translation systems.</abstract>
       <url hash="fcac57cd">2024.wmt-1.2</url>
@@ -76,20 +76,20 @@
     <paper id="3">
       <title>Findings of the Quality Estimation Shared Task at <fixed-case>WMT</fixed-case> 2024: Are <fixed-case>LLM</fixed-case>s Closing the Gap in <fixed-case>QE</fixed-case>?</title>
       <author><first>Chrysoula</first><last>Zerva</last><affiliation>Instituto de Instituto de Telecomunicações, Instituto Superior Técnico, University of Lisbon</affiliation></author>
-      <author><first>Frederic</first><last>Blain</last><affiliation>Tilburg University</affiliation></author>
+      <author id="frederic-blain"><first>Frederic</first><last>Blain</last><affiliation>Tilburg University</affiliation></author>
       <author><first>José G.</first><last>C. De Souza</last><affiliation>Unbabel</affiliation></author>
       <author><first>Diptesh</first><last>Kanojia</last><affiliation>University of Surrey</affiliation></author>
       <author><first>Sourabh</first><last>Deoghare</last><affiliation>IIT Bombay</affiliation></author>
       <author><first>Nuno M.</first><last>Guerreiro</last><affiliation>Instituto de Telecomunicacoes, University of Lisbon</affiliation></author>
       <author><first>Giuseppe</first><last>Attanasio</last><affiliation>Instituto de Telecomunicacoes</affiliation></author>
       <author><first>Ricardo</first><last>Rei</last><affiliation>Unbabel/INESC-ID</affiliation></author>
-      <author><first>Constantin</first><last>Orasan</last><affiliation>University of Surrey</affiliation></author>
-      <author><first>Matteo</first><last>Negri</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orasan</last><affiliation>University of Surrey</affiliation></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
       <author><first>Marco</first><last>Turchi</last><affiliation>Zoom Video Communications</affiliation></author>
-      <author><first>Rajen</first><last>Chatterjee</last><affiliation>Apple Inc.</affiliation></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology Bombay and Patna</affiliation></author>
+      <author id="rajen-chatterjee"><first>Rajen</first><last>Chatterjee</last><affiliation>Apple Inc.</affiliation></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology Bombay and Patna</affiliation></author>
       <author><first>Markus</first><last>Freitag</last><affiliation>Google Research</affiliation></author>
-      <author><first>André</first><last>Martins</last><affiliation>Unbabel, Instituto de Telecomunicacoes</affiliation></author>
+      <author id="andre-f-t-martins"><first>André</first><last>Martins</last><affiliation>Unbabel, Instituto de Telecomunicacoes</affiliation></author>
       <pages>82-109</pages>
       <abstract>We report the results of the WMT 2024 shared task on Quality Estimation, in which the challenge is to predict the quality of the output of neural machine translation systems at the word and sentence levels, without access to reference translations. In this edition, we expanded our scope to assess the potential for quality estimates to help in the correction of translated outputs, hence including an automated post-editing (APE) direction. We publish new test sets with human annotations that target two directions: providing new Multidimensional Quality Metrics (MQM) annotations for three multi-domain language pairs (English to German, Spanish and Hindi) and extending the annotations on Indic languages providing direct assessments and post edits for translation from English into Hindi, Gujarati, Tamil and Telugu. We also perform a detailed analysis of the behaviour of different models with respect to different phenomena including gender bias, idiomatic language, and numerical and entity perturbations. We received submissions based both on traditional, encoder-based approaches as well as large language model (LLM) based ones.</abstract>
       <url hash="35fc381e">2024.wmt-1.3</url>
@@ -126,14 +126,14 @@
       <author><first>Philippe</first><last>Thomas</last><affiliation>German Research Center for Artificial Intelligence (DFKI)</affiliation></author>
       <author><first>Roland</first><last>Roller</last><affiliation>German Research Center for Artificial Intelligence (DFKI)</affiliation></author>
       <author><first>Rachel</first><last>Bawden</last><affiliation>Inria</affiliation></author>
-      <author><first>Aurélie</first><last>Névéol</last><affiliation>Université Paris-Saclay, CNRS, LISN</affiliation></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last><affiliation>Université Paris-Saclay, CNRS, LISN</affiliation></author>
       <author><first>Steffen</first><last>Castle</last><affiliation>German Research Center for Artificial Intelligence (DFKI)</affiliation></author>
       <author><first>Vanessa</first><last>Bonato</last><affiliation>Dept. of Linguistic and Literary Studies University of Padua</affiliation></author>
-      <author><first>Giorgio Maria</first><last>Di Nunzio</last><affiliation>Dept. of Linguistic and Literary Studies University of Padua</affiliation></author>
+      <author id="giorgio-maria-di-nunzio"><first>Giorgio Maria</first><last>Di Nunzio</last><affiliation>Dept. of Linguistic and Literary Studies University of Padua</affiliation></author>
       <author><first>Federica</first><last>Vezzani</last><affiliation>Dept. of Linguistic and Literary Studies University of Padua</affiliation></author>
       <author><first>Maika</first><last>Vicente Navarro</last><affiliation>Leica Biosystems</affiliation></author>
       <author><first>Lana</first><last>Yeganova</last><affiliation>NCBI/NLM/NIH</affiliation></author>
-      <author><first>Antonio</first><last>Jimeno Yepes</last><affiliation>RMIT University</affiliation></author>
+      <author id="antonio-jimeno-yepes"><first>Antonio</first><last>Jimeno Yepes</last><affiliation>RMIT University</affiliation></author>
       <pages>124-138</pages>
       <abstract>We present the results of the ninth edition of the Biomedical Translation Task at WMT’24. We released test sets for six language pairs, namely, French, German, Italian, Portuguese, Russian, and Spanish, from and into English. Eachtest set consists of 50 abstracts from PubMed. Differently from previous years, we did not split abstracts into sentences. We received submissions from five teams, and for almost all language directions. We used a baseline/comparison system based on Llama 3.1 and share the source code at https://github.com/cgrozea/wmt24biomed-ref.</abstract>
       <url hash="8fea0ae5">2024.wmt-1.6</url>
@@ -217,7 +217,7 @@
       <author><first>Sweta</first><last>Agrawal</last><affiliation>Instituto de Telecomunicações</affiliation></author>
       <author><first>Antonio</first><last>Farinhas</last><affiliation>Instituto de Telecomunicacoes, IST</affiliation></author>
       <author><first>José G.</first><last>C. De Souza</last><affiliation>Unbabel</affiliation></author>
-      <author><first>André</first><last>Martins</last><affiliation>Unbabel, Instituto de Telecomunicacoes</affiliation></author>
+      <author id="andre-f-t-martins"><first>André</first><last>Martins</last><affiliation>Unbabel, Instituto de Telecomunicacoes</affiliation></author>
       <pages>185-204</pages>
       <abstract>In this work, we present Tower v2, an improved iteration of the state-of-the-art open-weight Tower models, and the backbone of our submission to the WMT24 General Translation shared task. Tower v2 introduces key improvements including expanded language coverage, enhanced data quality, and increased model capacity up to 70B parameters. Our final submission combines these advancements with quality-aware decoding strategies, selecting translations based on multiple translation quality signals. The resulting system demonstrates significant improvement over previous versions, outperforming closed commercial systems like GPT-4o, Claude 3.5, and DeepL even at a smaller 7B scale.</abstract>
       <url hash="f2eaf28b">2024.wmt-1.12</url>
@@ -263,7 +263,7 @@
       <author><first>Huan</first><last>Liu</last><affiliation>Dalian University of Technology</affiliation></author>
       <author><first>Conghu</first><last>Yuan</last><affiliation>Global Tone Communication Technology Co., Ltd</affiliation></author>
       <author><first>Wentao</first><last>Chen</last><affiliation>Global Tone Communication Technology Co., Ltd</affiliation></author>
-      <author><first>Degen</first><last>Huang</last><affiliation>Dalian University of Technology</affiliation></author>
+      <author id="degen-huang"><first>Degen</first><last>Huang</last><affiliation>Dalian University of Technology</affiliation></author>
       <pages>227-231</pages>
       <abstract>This paper presents the submission from Global Tone Communication Co., Ltd. and Dalian University of Technology for the WMT24 shared general Machine Translation (MT) task at the Conference on Empirical Methods in Natural Language Processing (EMNLP). Our participation encompasses two language pairs: English to Japanese and Japanese to Chinese. The systems are developed without particular constraints or requirements, facilitating extensive research in machine translation. We emphasize back-translation, utilize multilingual translation models, and apply fine-tuning strategies to improve performance. Additionally, we integrate both human-generated and machine-generated data to fine-tune our models, leading to enhanced translation accuracy. The automatic evaluation results indicate that our system ranks first in terms of BLEU score for the Japanese to Chinese translation.</abstract>
       <url hash="05a48a69">2024.wmt-1.15</url>
@@ -277,7 +277,7 @@
       <author><first>Martin</first><last>Popel</last><affiliation>Charles University, Faculty of Mathematics and Physics, UFAL</affiliation></author>
       <author><first>Nam</first><last>Luu</last><affiliation>Charles University</affiliation></author>
       <author><first>Danil</first><last>Semin</last><affiliation>MFF UK</affiliation></author>
-      <author><first>Ondřej</first><last>Bojar</last><affiliation>Charles University, MFF UFAL</affiliation></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last><affiliation>Charles University, MFF UFAL</affiliation></author>
       <pages>232-246</pages>
       <abstract>This paper presents the contributions of Charles University teams to the WMT24 General Translation task (English to Czech, German and Russian, and Czech to Ukrainian), and the WMT24 Translation into Low-Resource Languages of Spain task.Our most elaborate submission, CUNI-MH for en2cs, is the result of fine-tuning Mistral 7B v0.1 for translation using a three-stage process: Supervised fine-tuning using QLoRA, Contrastive Preference Optimization, and merging of model checkpoints. We also describe the CUNI-GA, CUNI-Transformer and CUNI-DocTransformer submissions, which are based on our systems from the previous year.Our en2ru system CUNI-DS uses a similar first stage as CUNI-MH (QLoRA for en2cs) and follows with transferring to en2ru.For en2de (CUNI-NL), we experimented with a LLM-based speech translation system, to translate without the speech input.For the Translation into Low-Resource Languages of Spain task, we performed QLoRA fine-tuning of a large LLM on a small amount of synthetic (backtranslated) data.</abstract>
       <url hash="64a7ce85">2024.wmt-1.16</url>
@@ -377,7 +377,7 @@
       <author><first>Annika</first><last>Grützner-Zahn</last><affiliation>German Research Center for Artificial Intelligence (DFKI)</affiliation></author>
       <author><first>Manuel</first><last>Brack</last><affiliation>DFKI, TU Darmstadt</affiliation></author>
       <author><first>Patrick</first><last>Schramowski</last><affiliation>TU Darmstadt</affiliation></author>
-      <author><first>Pedro</first><last>Ortiz Suarez</last><affiliation>Common Crawl Foundation</affiliation></author>
+      <author id="pedro-ortiz-suarez"><first>Pedro</first><last>Ortiz Suarez</last><affiliation>Common Crawl Foundation</affiliation></author>
       <author><first>Malte</first><last>Ostendorff</last><affiliation>German Research Center for Artificial Intelligence</affiliation></author>
       <author><first>Fabio</first><last>Barth</last><affiliation>DFKI</affiliation></author>
       <author><first>Shushen</first><last>Manakhimova</last><affiliation>German Research Center for Artificial Intelligence (DFKI)</affiliation></author>
@@ -394,7 +394,7 @@
       <title><fixed-case>C</fixed-case>o<fixed-case>ST</fixed-case> of breaking the <fixed-case>LLM</fixed-case>s</title>
       <author><first>Ananya</first><last>Mukherjee</last><affiliation>International Institute of Information Technology Hyderabad</affiliation></author>
       <author><first>Saumitra</first><last>Yadav</last><affiliation>International Institute of Information Technology, Hyderabad</affiliation></author>
-      <author><first>Manish</first><last>Shrivastava</last><affiliation>International Institute of Information Technology Hyderabad</affiliation></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last><affiliation>International Institute of Information Technology Hyderabad</affiliation></author>
       <pages>299-306</pages>
       <abstract>This paper presents an evaluation of 16 machine translation systems submitted to the Shared Task of the 9th Conference of Machine Translation (WMT24) for the English-Hindi (en-hi) language pair using our Complex Structures Test (CoST) suite. Aligning with this year’s test suite sub-task theme, “Help us break LLMs”, we curated a comprehensive test suite encompassing diverse datasets across various categories, including autobiography, poetry, legal, conversation, play, narration, technical, and mixed genres. Our evaluation reveals that all the systems struggle significantly with the archaic style of text like legal and technical writings or text with creative twist like conversation and poetry datasets, highlighting their weaknesses in handling complex linguistic structures and stylistic nuances inherent in these text types. Our evaluation identifies the strengths and limitations of the submitted models, pointing to specific areas where further research and development are needed to enhance their performance. Our test suite is available at <url>https://github.com/AnanyaCoder/CoST-WMT-24-Test-Suite-Task</url>.</abstract>
       <url hash="eb1387d6">2024.wmt-1.24</url>
@@ -462,7 +462,7 @@
     </paper>
     <paper id="30">
       <title>A Test Suite of Prompt Injection Attacks for <fixed-case>LLM</fixed-case>-based Machine Translation</title>
-      <author><first>Antonio Valerio</first><last>Miceli Barone</last><affiliation>The University of Edinburgh</affiliation></author>
+      <author id="antonio-valerio-miceli-barone"><first>Antonio Valerio</first><last>Miceli Barone</last><affiliation>The University of Edinburgh</affiliation></author>
       <author><first>Zhifan</first><last>Sun</last><affiliation>Technische Universität Darmstadt</affiliation></author>
       <pages>380-450</pages>
       <abstract>LLM-based NLP systems typically work by embedding their input data into prompt templates which contain instructions and/or in-context examples, creating queries which are submitted to a LLM, then parse the LLM response in order to generate the system outputs. Prompt Injection Attacks (PIAs) are a type of subversion of these systems where a malicious user crafts special inputs which interfer with the prompt templates, causing the LLM to respond in ways unintended by the system designer.Recently, Sun and Miceli-Barone (2024) proposed a class of PIAs against LLM-based machine translation. Specifically, the task is to translate questions from the TruthfulQA test suite, where an adversarial prompt is prepended to the questions, instructing the system to ignore the translation instruction and answer the questions instead.In this test suite we extend this approach to all the language pairs of the WMT 2024 General Machine Translation task. Moreover, we include additional attack formats in addition to the one originally studied.</abstract>
@@ -487,8 +487,8 @@
       <author><first>David</first><last>Anugraha</last><affiliation>University of Toronto</affiliation></author>
       <author><first>Garry</first><last>Kuwanto</last><affiliation>Boston University</affiliation></author>
       <author><first>Lucky</first><last>Susanto</last><affiliation>Universitas Indonesia</affiliation></author>
-      <author><first>Derry Tanti</first><last>Wijaya</last><affiliation>Boston University</affiliation></author>
-      <author><first>Genta</first><last>Winata</last><affiliation>Capital One AI Foundations</affiliation></author>
+      <author id="derry-tanti-wijaya"><first>Derry Tanti</first><last>Wijaya</last><affiliation>Boston University</affiliation></author>
+      <author id="genta-indra-winata"><first>Genta</first><last>Winata</last><affiliation>Capital One AI Foundations</affiliation></author>
       <pages>459-469</pages>
       <abstract>We present MetaMetrics-MT, an innovative metric designed to evaluate machine translation (MT) tasks by aligning closely with human preferences through Bayesian optimization with Gaussian Processes. MetaMetrics-MT enhances existing MT metrics by optimizing their correlation with human judgments. Our experiments on the WMT24 metric shared task dataset demonstrate that MetaMetrics-MT outperforms all existing baselines, setting a new benchmark for state-of-the-art performance in the reference-based setting. Furthermore, it achieves comparable results to leading metrics in the reference-free setting, offering greater efficiency.</abstract>
       <url hash="63d12b50">2024.wmt-1.32</url>
@@ -498,7 +498,7 @@
     <paper id="33">
       <title>chr<fixed-case>F</fixed-case>-<fixed-case>S</fixed-case>: Semantics Is All You Need</title>
       <author><first>Ananya</first><last>Mukherjee</last><affiliation>International Institute of Information Technology Hyderabad</affiliation></author>
-      <author><first>Manish</first><last>Shrivastava</last><affiliation>International Institute of Information Technology Hyderabad</affiliation></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last><affiliation>International Institute of Information Technology Hyderabad</affiliation></author>
       <pages>470-474</pages>
       <abstract>Machine translation (MT) evaluation metrics like BLEU and chrF++ are widely used reference-based metrics that do not require training and are language-independent. However, these metrics primarily focus on n-gram matching and often overlook semantic depth and contextual understanding. To address this gap, we introduce chrF-S (Semantic chrF++), an enhanced metric that integrates sentence embeddings to evaluate translation quality more comprehensively. By combining traditional character and word n-gram analysis with semantic information derived from embeddings, chrF-S captures both syntactic accuracy and sentence-level semantics. This paper presents our contributions to the WMT24 shared metrics task, showcasing our participation and the development of chrF-S. We also demonstrate that, according to preliminary results on the leaderboard, our metric performs on par with other supervised and LLM-based metrics. By merging semantic insights with n-gram precision, chrF-S offers a significant enhancement in the assessment of machine-generated translations, advancing the field of MT evaluation. Our code and data will be made available at <url>https://github.com/AnanyaCoder/chrF-S</url>.</abstract>
       <url hash="214509a6">2024.wmt-1.33</url>
@@ -531,7 +531,7 @@
     <paper id="36">
       <title>Evaluating <fixed-case>WMT</fixed-case> 2024 Metrics Shared Task Submissions on <fixed-case>A</fixed-case>fri<fixed-case>MTE</fixed-case> (the <fixed-case>A</fixed-case>frican Challenge Set)</title>
       <author><first>Jiayi</first><last>Wang</last><affiliation>University College London</affiliation></author>
-      <author><first>David Ifeoluwa</first><last>Adelani</last><affiliation>McGill University / MILA</affiliation></author>
+      <author id="david-ifeoluwa-adelani"><first>David Ifeoluwa</first><last>Adelani</last><affiliation>McGill University / MILA</affiliation></author>
       <author><first>Pontus</first><last>Stenetorp</last><affiliation>University College London</affiliation></author>
       <pages>505-516</pages>
       <abstract>The AfriMTE challenge set from WMT 2024 Metrics Shared Task aims to evaluate the capabilities of evaluation metrics for machine translation on low-resource African languages, which primarily assesses cross-lingual transfer learning and generalization of machine translation metrics across a wide range of under-resourced languages. In this paper, we analyze the submissions to WMT 2024 Metrics Shared Task. Our findings indicate that language-specific adaptation, cross-lingual transfer learning, and larger language model sizes contribute significantly to improved metric performance. Moreover, supervised models with relatively moderate sizes demonstrate robust performance, when augmented with specific language adaptation for low-resource African languages. Finally, submissions show promising results for language pairs including Darija-French, English-Egyptian Arabic, and English-Swahili. However, significant challenges persist for extremely low-resource languages such as English-Luo and English-Twi, highlighting areas for future research and improvement in machine translation metrics for African languages.</abstract>
@@ -602,8 +602,8 @@
       <title>Expanding the <fixed-case>FLORES</fixed-case>+ Multilingual Benchmark with Translations for <fixed-case>A</fixed-case>ragonese, Aranese, <fixed-case>A</fixed-case>sturian, and <fixed-case>V</fixed-case>alencian</title>
       <author><first>Juan Antonio</first><last>Perez-Ortiz</last><affiliation>Departament de Llenguatges i Sistemes Informatics, Universitat d’Alacant</affiliation></author>
       <author><first>Felipe</first><last>Sánchez-Martínez</last><affiliation>Universitat d’Alacant</affiliation></author>
-      <author><first>Víctor M.</first><last>Sánchez-Cartagena</last><affiliation>Universitat d’Alacant</affiliation></author>
-      <author><first>Miquel</first><last>Esplà-Gomis</last><affiliation>Universitat d’Alacant</affiliation></author>
+      <author id="victor-m-sanchez-cartagena"><first>Víctor M.</first><last>Sánchez-Cartagena</last><affiliation>Universitat d’Alacant</affiliation></author>
+      <author id="miquel-espla-gomis"><first>Miquel</first><last>Esplà-Gomis</last><affiliation>Universitat d’Alacant</affiliation></author>
       <author><first>Aaron</first><last>Galiano Jimenez</last><affiliation>Universitat d’Alacant</affiliation></author>
       <author><first>Antoni</first><last>Oliver</last><affiliation>Universitat Oberta de Catalunya</affiliation></author>
       <author><first>Claudi</first><last>Aventín-Boya</last><affiliation>Universitat Oberta de Catalunya</affiliation></author>
@@ -623,7 +623,7 @@
       <title>The <fixed-case>B</fixed-case>angla/<fixed-case>B</fixed-case>engali Seed Dataset Submission to the <fixed-case>WMT</fixed-case>24 Open Language Data Initiative Shared Task</title>
       <author><first>Firoz</first><last>Ahmed</last><affiliation>University of Florida</affiliation></author>
       <author><first>Nitin</first><last>Venkateswaran</last><affiliation>University of Florida</affiliation></author>
-      <author><first>Sarah</first><last>Moeller</last><affiliation>University of Florida</affiliation></author>
+      <author id="sarah-moeller"><first>Sarah</first><last>Moeller</last><affiliation>University of Florida</affiliation></author>
       <pages>556-566</pages>
       <abstract>We contribute a seed dataset for the Bangla/Bengali language as part of the WMT24 Open Language Data Initiative shared task. We validate the quality of the dataset against a mined and automatically aligned dataset (NLLBv1) and two other existing datasets of crowdsourced manual translations. The validation is performed by investigating the performance of state-of-the-art translation models fine-tuned on the different datasets after controlling for training set size. Machine translation models fine-tuned on our dataset outperform models tuned on the other datasets in both translation directions (English-Bangla and Bangla-English). These results confirm the quality of our dataset. We hope our dataset will support machine translation for the Bangla/Bengali community and related low-resource languages.</abstract>
       <url hash="ef5bd7c9">2024.wmt-1.42</url>
@@ -788,7 +788,7 @@
     <paper id="56">
       <title>Findings of <fixed-case>WMT</fixed-case>2024 <fixed-case>E</fixed-case>nglish-to-Low Resource Multimodal Translation Task</title>
       <author><first>Shantipriya</first><last>Parida</last><affiliation>Silo AI</affiliation></author>
-      <author><first>Ondřej</first><last>Bojar</last><affiliation>Charles University, MFF UFAL</affiliation></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last><affiliation>Charles University, MFF UFAL</affiliation></author>
       <author><first>Idris</first><last>Abdulmumin</last><affiliation>University of Pretoria</affiliation></author>
       <author><first>Shamsuddeen Hassan</first><last>Muhammad</last><affiliation>Bayero University, Kano</affiliation></author>
       <author><first>Ibrahim Said</first><last>Ahmad</last><affiliation>Northeastern University</affiliation></author>
@@ -865,7 +865,7 @@
       <title>A3-108 Controlling Token Generation in Low Resource Machine Translation Systems</title>
       <author><first>Saumitra</first><last>Yadav</last><affiliation>International Institute of Information Technology, Hyderabad</affiliation></author>
       <author><first>Ananya</first><last>Mukherjee</last><affiliation>International Institute of Information Technology Hyderabad</affiliation></author>
-      <author><first>Manish</first><last>Shrivastava</last><affiliation>International Institute of Information Technology Hyderabad</affiliation></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last><affiliation>International Institute of Information Technology Hyderabad</affiliation></author>
       <pages>728-734</pages>
       <abstract>Translating for languages with limited resources poses a persistent challenge due to the scarcity of high-quality training data. To enhance translation accuracy, we explored controlled generation mechanisms, focusing on the importance of control tokens. In our experiments, while training, we encoded the target sentence length as a control token to the source sentence, treating it as an additional feature for the source sentence. We developed various NMT models using transformer architecture and conducted experiments across 8 language directions (English = Assamese, Manipuri, Khasi, and Mizo), exploring four variations of length encoding mechanisms. Through comparative analysis against the baseline model, we submitted two systems for each language direction. We report our findings for the same in this work.</abstract>
       <url hash="876d74ff">2024.wmt-1.61</url>
@@ -890,7 +890,7 @@
       <author><first>Chenfei</first><last>Ju</last><affiliation>Dalian University of Technology</affiliation></author>
       <author><first>Junpeng</first><last>Liu</last><affiliation>Dalian University of Technology</affiliation></author>
       <author><first>Kaiyu</first><last>Huang</last><affiliation>Beijing Jiaotong University</affiliation></author>
-      <author><first>Degen</first><last>Huang</last><affiliation>Dalian University of Technology</affiliation></author>
+      <author id="degen-huang"><first>Degen</first><last>Huang</last><affiliation>Dalian University of Technology</affiliation></author>
       <pages>742-746</pages>
       <abstract>This paper describes the submission systems of DLUT-NLP team for the WMT24 low-resource Indic language translation shared task. We participated in the translation task of four language pairs, including en-as, en-mz, en-kha, en-mni.</abstract>
       <url hash="492dfea7">2024.wmt-1.63</url>
@@ -979,7 +979,7 @@
       <title><fixed-case>NLIP</fixed-case>_<fixed-case>L</fixed-case>ab-<fixed-case>IITH</fixed-case> Low-Resource <fixed-case>MT</fixed-case> System for <fixed-case>WMT</fixed-case>24 <fixed-case>I</fixed-case>ndic <fixed-case>MT</fixed-case> Shared Task</title>
       <author><first>Pramit</first><last>Sahoo</last><affiliation>Indian Institute of Technology Hyderabad</affiliation></author>
       <author><first>Maharaj</first><last>Brahma</last><affiliation>Indian Institute of Technology Hyderabad</affiliation></author>
-      <author><first>Maunendra Sankar</first><last>Desarkar</last><affiliation>IIT Hyderabad</affiliation></author>
+      <author id="maunendra-sankar-desarkar"><first>Maunendra Sankar</first><last>Desarkar</last><affiliation>IIT Hyderabad</affiliation></author>
       <pages>781-787</pages>
       <abstract>In this paper, we describe our system for the WMT 24 shared task of Low-Resource Indic Language Translation. We consider eng↔{as, kha, lus, mni} as participating language pairs. In this shared task, we explore the fine-tuning of a pre-trained model motivated by the pre-trained objective of aligning embeddings closer by alignment augmentation (Lin et al.,2020) for 22 scheduled Indian languages. Our primary system is based on language-specific finetuning on a pre-trained model. We achieve chrF2 scores of 50.6, 42.3, 54.9, and 66.3 on the official public test set for eng→as, eng→kha, eng→lus, eng→mni respectively. We also explore multilingual training with/without language grouping and layer-freezing.</abstract>
       <url hash="65f717b1">2024.wmt-1.70</url>
@@ -1014,7 +1014,7 @@
       <author><first>Kshetrimayum Boynao</first><last>Singh</last><affiliation>National Institute of Technology Silchar</affiliation></author>
       <author><first>Ningthoujam Avichandra</first><last>Singh</last><affiliation>National Institute of Technology Silchar</affiliation></author>
       <author><first>Sanjita</first><last>Phijam</last><affiliation>National Institute of Technology Silchar</affiliation></author>
-      <author><first>Thoudam Doren</first><last>Singh</last><affiliation>National Institute of Technology Silchar</affiliation></author>
+      <author id="thoudam-doren-singh"><first>Thoudam Doren</first><last>Singh</last><affiliation>National Institute of Technology Silchar</affiliation></author>
       <pages>797-803</pages>
       <abstract>This paper presents a Transformer-based Neural Machine Translation (NMT) system developed by the Centre for Natural Language Processing and the Department of Computer Science and Engineering at the National Institute of Technology Silchar, India (NITS-CNLP) for the MultiIndic22MT 2024 Shared Task. The system focused on the English-Manipuri language pair for the WMT24 shared task. The proposed WMT system shows a BLEU score of 6.4, a chrF score of 28.6, and a chrF++ score of 26.6 on the public test set Indic-Conv dataset. Further, in the public test set Indic-Gen dataset, it achieved a BLEU score of 8.1, a chrF score of 32.1, and a chrF++ score of 29.4 on the English-to-Manipuri translation.</abstract>
       <url hash="9247b052">2024.wmt-1.73</url>
@@ -1025,7 +1025,7 @@
       <title><fixed-case>NLIP</fixed-case>-Lab-<fixed-case>IITH</fixed-case> Multilingual <fixed-case>MT</fixed-case> System for <fixed-case>WAT</fixed-case>24 <fixed-case>MT</fixed-case> Shared Task</title>
       <author><first>Maharaj</first><last>Brahma</last><affiliation>Indian Institute of Technology Hyderabad</affiliation></author>
       <author><first>Pramit</first><last>Sahoo</last><affiliation>Indian Institute of Technology Hyderabad</affiliation></author>
-      <author><first>Maunendra Sankar</first><last>Desarkar</last><affiliation>IIT Hyderabad</affiliation></author>
+      <author id="maunendra-sankar-desarkar"><first>Maunendra Sankar</first><last>Desarkar</last><affiliation>IIT Hyderabad</affiliation></author>
       <pages>804-809</pages>
       <abstract>This paper describes NLIP Lab’s multilingual machine translation system for the WAT24 shared task on multilingual Indic MT task for 22 scheduled languages belonging to 4 language families. We explore pre-training for Indic languages using alignment agreement objectives. We utilize bi-lingual dictionaries to substitute words from source sentences. Furthermore, we fine-tuned language direction-specific multilingual translation models using small and high-quality seed data. Our primary submission is a 243M parameters multilingual translation model covering 22 Indic languages. In the IN22-Gen benchmark, we achieved an average chrF++ score of 46.80 and 18.19 BLEU score for the En-Indic direction. In the Indic-En direction, we achieved an average chrF++ score of 56.34 and 30.82 BLEU score. In the In22-Conv benchmark, we achieved an average chrF++ score of 43.43 and BLEU score of 16.58 in the En-Indic direction, and in the Indic-En direction, we achieved an average of 52.44 and 29.77 for chrF++ and BLEU respectively. Our model is competitive with IndicTransv1 (474M parameter model).</abstract>
       <url hash="8841977b">2024.wmt-1.74</url>
@@ -1047,7 +1047,7 @@
       <title><fixed-case>E</fixed-case>nglish-to-Low-Resource Translation: A Multimodal Approach for <fixed-case>H</fixed-case>indi, <fixed-case>M</fixed-case>alayalam, <fixed-case>B</fixed-case>engali, and <fixed-case>H</fixed-case>ausa</title>
       <author><first>Ali</first><last>Hatami</last><affiliation>University of Galway</affiliation></author>
       <author><first>Shubhanker</first><last>Banerjee</last><affiliation>University of Galway</affiliation></author>
-      <author><first>Mihael</first><last>Arcan</last><affiliation>Lua Health</affiliation></author>
+      <author id="mihael-arcan"><first>Mihael</first><last>Arcan</last><affiliation>Lua Health</affiliation></author>
       <author><first>Paul</first><last>Buitelaar</last><affiliation>University of Galway</affiliation></author>
       <author><first>John</first><last>Philip McCrae</last><affiliation>University of Galway</affiliation></author>
       <pages>815-822</pages>
@@ -1155,7 +1155,7 @@
     <paper id="85">
       <title><fixed-case>U</fixed-case>niversitat d’Alacant’s Submission to the <fixed-case>WMT</fixed-case> 2024 Shared Task on Translation into Low-Resource Languages of <fixed-case>S</fixed-case>pain</title>
       <author><first>Aaron</first><last>Galiano Jimenez</last><affiliation>Universitat d’Alacant</affiliation></author>
-      <author><first>Víctor M.</first><last>Sánchez-Cartagena</last><affiliation>Universitat d’Alacant</affiliation></author>
+      <author id="victor-m-sanchez-cartagena"><first>Víctor M.</first><last>Sánchez-Cartagena</last><affiliation>Universitat d’Alacant</affiliation></author>
       <author><first>Juan Antonio</first><last>Perez-Ortiz</last><affiliation>Departament de Llenguatges i Sistemes Informatics, Universitat d’Alacant</affiliation></author>
       <author><first>Felipe</first><last>Sánchez-Martínez</last><affiliation>Universitat d’Alacant</affiliation></author>
       <pages>885-891</pages>
@@ -1193,7 +1193,7 @@
       <author><first>Ona</first><last>De Gibert</last><affiliation>University of Helsinki</affiliation></author>
       <author><first>Mikko</first><last>Aulamo</last><affiliation>University of Helsinki</affiliation></author>
       <author><first>Yves</first><last>Scherrer</last><affiliation>University of Oslo</affiliation></author>
-      <author><first>Jörg</first><last>Tiedemann</last><affiliation>University of Helsinki</affiliation></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last><affiliation>University of Helsinki</affiliation></author>
       <pages>908-917</pages>
       <abstract>The Helsinki-NLP team participated in the 2024 Shared Task on Translation into Low-Resource languages of Spain with four multilingual systems covering all language pairs. The task consists in developing Machine Translation (MT) models to translate from Spanish into Aragonese, Aranese and Asturian. Our models leverage known approaches for multilingual MT, namely, data filtering, fine-tuning, data tagging, and distillation. We use distillation to merge the knowledge from neural and rule-based systems and explore the trade-offs between translation quality and computational efficiency. We demonstrate that our distilled models can achieve competitive results while significantly reducing computational costs. Our best models ranked 4th, 5th, and 2nd in the open submission track for Spanish–Aragonese, Spanish–Aranese, and Spanish–Asturian, respectively. We release our code and data publicly at https://github.com/Helsinki-NLP/lowres-spain-st.</abstract>
       <url hash="3c9a5bf6">2024.wmt-1.88</url>
@@ -1215,7 +1215,7 @@
       <title>Training and Fine-Tuning <fixed-case>NMT</fixed-case> Models for Low-Resource Languages Using Apertium-Based Synthetic Corpora</title>
       <author><first>Aleix</first><last>Sant</last><affiliation>Barcelona Supercomputing Center</affiliation></author>
       <author><first>Daniel</first><last>Bardanca</last><affiliation>CITIUS</affiliation></author>
-      <author><first>José Ramom</first><last>Pichel Campos</last><affiliation>CITIUS</affiliation></author>
+      <author id="jose-ramom-pichel-campos"><first>José Ramom</first><last>Pichel Campos</last><affiliation>CITIUS</affiliation></author>
       <author><first>Francesca</first><last>De Luca Fornaciari</last><affiliation>BSC Barcelona Supercomputing Center</affiliation></author>
       <author><first>Carlos</first><last>Escolano</last><affiliation>Universitat PolitÃ ̈cnica de Catalunya, Barcelona Supercomputing Center</affiliation></author>
       <author><first>Javier</first><last>Garcia Gilabert</last><affiliation>Barcelona Super Computing Center</affiliation></author>
@@ -1362,7 +1362,7 @@
       <title>Improving Context Usage for Translating Bilingual Customer Support Chat with Large Language Models</title>
       <author><first>Jose</first><last>Pombal</last><affiliation>Unbabel</affiliation></author>
       <author><first>Sweta</first><last>Agrawal</last><affiliation>Instituto de Telecomunicações</affiliation></author>
-      <author><first>André</first><last>Martins</last><affiliation>Unbabel, Instituto de Telecomunicacoes</affiliation></author>
+      <author id="andre-f-t-martins"><first>André</first><last>Martins</last><affiliation>Unbabel, Instituto de Telecomunicacoes</affiliation></author>
       <pages>993-1003</pages>
       <abstract>This paper describes Unbabel+IT’s submission to the Chat Shared Task held at the Workshop of Machine Translation 2024. The task focuses on translating customer support chats between agents and customers communicating in different languages. We present two strategies for adapting state-of-the-art language models to better utilize contextual information when translating such conversations. Our training strategy involves finetuning the model on chat datasets with context-augmented instructions, resulting in a specialized model, TOWERCHAT. For inference, we propose a novel quality-aware decoding approach that leverages a context-aware metric, CONTEXTCOMET, to select the optimal translation from a pool of candidates. We evaluate our proposed approach on the official shared task datasets for ten language pairs, showing that our submission consistently outperforms baselines on all and competing systems on 8 out of 10 language pairs across multiple automated metrics. Remarkably, TOWERCHAT outperforms our contrastive submission based on the much larger TOWER-V2-70B model while being 10× smaller. According to human evaluation, our system outperforms all other systems and baselines across all language pairs. These results underscore the importance of context-aware training and inference in handling complex bilingual dialogues.</abstract>
       <url hash="a6dc869d">2024.wmt-1.100</url>
@@ -1373,7 +1373,7 @@
       <title>Optimising <fixed-case>LLM</fixed-case>-Driven Machine Translation with Context-Aware Sliding Windows</title>
       <author><first>Xinye</first><last>Yang</last><affiliation>The University of Sheffield</affiliation></author>
       <author><first>Yida</first><last>Mu</last><affiliation>The University of Sheffield</affiliation></author>
-      <author><first>Kalina</first><last>Bontcheva</last><affiliation>The University of Sheffield</affiliation></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last><affiliation>The University of Sheffield</affiliation></author>
       <author><first>Xingyi</first><last>Song</last><affiliation>University of Sheffield</affiliation></author>
       <pages>1004-1010</pages>
       <abstract>This paper describes SheffieldGATE’s submission to WMT 2024 Chat Shared Translation Task. We participate in three language pairs: English-German, English-Dutch, and English-Portuguese (Brazil). In this work, we introduce a context-aware sliding window decoding method to track dependencies between chat messages. We fine-tune a large pre-trained language model based on the training data provided by the shared task Our experiments (i) compare the model performance between multilingual and bilingual fine-tuning and (ii) assess the impact of different window sizes. Our experimental results demonstrate that utilising contextual information yields superior performance in document-level translation compared to translating documents as isolated text segments, and that models fine-tuned with multilingual data perform better than those fine-tuned with bilingual data.</abstract>
@@ -1465,7 +1465,7 @@
     <paper id="108">
       <title><fixed-case>SYSTRAN</fixed-case> @ <fixed-case>WMT</fixed-case>24 Non-Repetitive Translation Task</title>
       <author><first>Marko</first><last>Avila</last><affiliation>CHAPSVISION</affiliation></author>
-      <author><first>Josep</first><last>Crego</last><affiliation>CHAPSVISION</affiliation></author>
+      <author id="josep-m-crego"><first>Josep</first><last>Crego</last><affiliation>CHAPSVISION</affiliation></author>
       <pages>1056-1062</pages>
       <abstract>Many contemporary NLP systems rely on neural decoders for text generation, which demonstrate an impressive ability to generate text approaching human fluency levels. However, in the case of neural machine translation networks, they often grapple with the production of repetitive content, also known as repetitive diction or word repetition, an aspect they weren’t explicitly trained to address. While not inherently negative, this repetition can make writing seem monotonous or awkward if not used intentionally for emphasis or stylistic purposes. This paper presents our submission to the WMT 2024 Non-Repetitive Translation Task, for which we adopt a repetition penalty method applied at learning inspired by the principles of label smoothing. No additional work is needed at inference time. We modify the ground-truth distribution to steer the model towards discouraging repetitions. Experiments show the ability of the proposed methods in reducing repetitions within neural machine translation engines, without compromising efficiency or translation quality.</abstract>
       <url hash="725c0945">2024.wmt-1.108</url>
@@ -1503,7 +1503,7 @@
       <author><first>Hugo</first><last>Pitorro</last><affiliation>Technical University of Munich</affiliation></author>
       <author><first>Pavlo</first><last>Vasylenko</last><affiliation>Sapienza University of Rome</affiliation></author>
       <author><first>Marcos</first><last>Treviso</last><affiliation>Instituto de Telecomunicacoes</affiliation></author>
-      <author><first>André</first><last>Martins</last><affiliation>Unbabel, Instituto de Telecomunicacoes</affiliation></author>
+      <author id="andre-f-t-martins"><first>André</first><last>Martins</last><affiliation>Unbabel, Instituto de Telecomunicacoes</affiliation></author>
       <pages>1107-1124</pages>
       <abstract>Transformers are the current architecture of choice for NLP, but their attention layers do not scale well to long contexts. Recent works propose to replace attention with linear recurrent layers - this is the case for state space models, which enjoy efficient training and inference. However, it remains unclear whether these models are competitive with transformers in machine translation (MT). In this paper, we provide a rigorous and comprehensive experimental comparison between transformers and linear recurrent models for MT. Concretely, we experiment with RetNet, Mamba, and hybrid versions of Mamba which incorporate attention mechanisms. Our findings demonstrate that Mamba is highly competitive with transformers on sentence and paragraph-level datasets, where in the latter both models benefit from shifting the training distribution towards longer sequences. Further analysis show that integrating attention into Mamba improves translation quality, robustness to sequence length extrapolation, and the ability to recall named entities.</abstract>
       <url hash="9ff7414c">2024.wmt-1.111</url>
@@ -1523,7 +1523,7 @@
     <paper id="113">
       <title>A Multi-task Learning Framework for Evaluating Machine Translation of Emotion-loaded User-generated Content</title>
       <author><first>Shenbin</first><last>Qian</last><affiliation>University of Surrey</affiliation></author>
-      <author><first>Constantin</first><last>Orasan</last><affiliation>University of Surrey</affiliation></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orasan</last><affiliation>University of Surrey</affiliation></author>
       <author><first>Diptesh</first><last>Kanojia</last><affiliation>University of Surrey</affiliation></author>
       <author><first>Félix</first><last>Do Carmo</last><affiliation>University of Surrey</affiliation></author>
       <pages>1140-1154</pages>
@@ -1558,9 +1558,9 @@
       <title>Analysing Translation Artifacts: A Comparative Study of <fixed-case>LLM</fixed-case>s, <fixed-case>NMT</fixed-case>s, and Human Translations</title>
       <author><first>Fedor</first><last>Sizov</last><affiliation>Saarland University</affiliation></author>
       <author><first>Cristina</first><last>España-Bonet</last><affiliation>DFKI GmbH</affiliation></author>
-      <author><first>Josef</first><last>Van Genabith</last><affiliation>DFKI</affiliation></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>Van Genabith</last><affiliation>DFKI</affiliation></author>
       <author><first>Roy</first><last>Xie</last><affiliation>Duke University</affiliation></author>
-      <author><first>Koel</first><last>Dutta Chowdhury</last><affiliation>Saarland Informatics Campus,Saarland University</affiliation></author>
+      <author id="koel-dutta-chowdhury"><first>Koel</first><last>Dutta Chowdhury</last><affiliation>Saarland Informatics Campus,Saarland University</affiliation></author>
       <pages>1183-1199</pages>
       <abstract>Translated texts exhibit a range of characteristics that make them appear distinct from texts originally written in the same target language. With the rise of Large Language Models (LLMs), which are designed for a wide range of language generation and understanding tasks, there has been significant interest in their application to Machine Translation. While several studies have focused on improving translation quality through fine-tuning or few-shot prompting techniques, there has been limited exploration of how LLM-generated translations qualitatively differ from those produced by Neural Machine Translation (NMT) models, and human translations. Our study employs explainability methods such as Leave-One-Out (LOO) and Integrated Gradients (IG) to analyze the lexical features distinguishing human translations from those produced by LLMs and NMT systems. Specifically, we apply a two-stage approach: first, classifying texts based on their origin – whether they are original or translations – and second, extracting significant lexical features (highly attributed input words) using post-hoc interpretability methods. Our analysis shows that different methods of feature extraction vary in their effectiveness, with LOO being generally better at pinpointing critical input words and IG capturing a broader range of important words. Finally, our results show that while LLMs and NMT systems can produce translations of a good quality, they still differ from texts originally written by native speakers. Specifically, we find that while some LLMs often align closely with human translations, traditional NMT systems exhibit distinct characteristics, particularly in their use of certain linguistic features.</abstract>
       <url hash="c463ec29">2024.wmt-1.116</url>
@@ -1737,7 +1737,7 @@
       <author><first>Jim</first><last>Davis</last><affiliation>Ohio State University</affiliation></author>
       <author><first>Jeremy</first><last>Gwinnup</last><affiliation>Air Force Research Laboratory</affiliation></author>
       <author><first>Grant</first><last>Erdmann</last><affiliation>Air Force Research Laboratory</affiliation></author>
-      <author><first>Tim</first><last>Anderson</last><affiliation>Air Force Research Laboratory</affiliation></author>
+      <author id="tim-anderson"><first>Tim</first><last>Anderson</last><affiliation>Air Force Research Laboratory</affiliation></author>
       <pages>1428-1439</pages>
       <abstract>In Multimodal Machine Translation (MMT), the use of visual data has shown only marginal improvements compared to text-only models. Previously, the CoMMuTE dataset and associated metric were proposed to score models on tasks where the imagery is necessary to disambiguate between two possible translations for each ambiguous source sentence. In this work, we introduce new metrics within the CoMMuTE domain to provide deeper insights into image-aware translation models. Our proposed metrics differ from the previous CoMMuTE scoring method by 1) assessing the impact of multiple images on individual translations and 2) evaluating a model’s ability to jointly select each translation for each image context. Our results challenge the conventional views of poor visual comprehension capabilities of MMT models and show that models can indeed meaningfully interpret visual information, though they may not leverage it sufficiently in the final decision.</abstract>
       <url hash="dcac855a">2024.wmt-1.130</url>
@@ -1751,7 +1751,7 @@
       <author><first>Eleftherios</first><last>Avramidis</last><affiliation>German Research Center for Artificial Intelligence (DFKI)</affiliation></author>
       <author><first>Roman</first><last>Grundkiewicz</last><affiliation>Microsoft Research</affiliation></author>
       <author><first>Marzena</first><last>Karpinska</last><affiliation>University of Massachusetts Amherst</affiliation></author>
-      <author><first>Maja</first><last>Popović</last><affiliation>ADAPT, Dublin City University</affiliation></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last><affiliation>ADAPT, Dublin City University</affiliation></author>
       <author><first>Mrinmaya</first><last>Sachan</last><affiliation>ETH Zurich</affiliation></author>
       <author><first>Mariya</first><last>Shmatova</last><affiliation>Dubformer</affiliation></author>
       <pages>1440-1453</pages>
diff --git a/data/xml/2024.wnu.xml b/data/xml/2024.wnu.xml
index 4f713e0635..ae44f4f4f3 100644
--- a/data/xml/2024.wnu.xml
+++ b/data/xml/2024.wnu.xml
@@ -104,7 +104,7 @@
       <author><first>Rohan</first><last>Das</last><affiliation>University of Colorado Boulder</affiliation></author>
       <author><first>Aditya</first><last>Chandra</last><affiliation>University of Colorado Boulder</affiliation></author>
       <author><first>I-Ta</first><last>Lee</last><affiliation>Purdue University</affiliation></author>
-      <author><first>Maria Leonor</first><last>Pacheco</last><affiliation>University of Colorado Boulder</affiliation></author>
+      <author id="maria-leonor-pacheco"><first>Maria Leonor</first><last>Pacheco</last><affiliation>University of Colorado Boulder</affiliation></author>
       <pages>85-98</pages>
       <abstract>From a communications perspective, a frame defines the packaging of the language used in such a way as to encourage certain interpretations and to discourage others. For example, a news article can frame immigration as either a boost or a drain on the economy, and thus communicate very different interpretations of the same phenomenon. In this work, we argue that to explain framing devices we have to look at the way narratives are constructed. As a first step in this direction, we propose a framework that extracts events and their relations to other events, and groups them into high-level narratives that help explain frames in news articles. We show that our framework can be used to analyze framing in U.S. news for two different domains: immigration and gun control.</abstract>
       <url hash="43491601">2024.wnu-1.15</url>
diff --git a/data/xml/2024.wnut.xml b/data/xml/2024.wnut.xml
index 0cdfb3b2c3..2d5e418416 100644
--- a/data/xml/2024.wnut.xml
+++ b/data/xml/2024.wnut.xml
@@ -8,7 +8,7 @@
       <editor><first>Max</first><last>Müller-Eberstein</last></editor>
       <editor><first>Wei</first><last>Xu</last></editor>
       <editor><first>Alan</first><last>Ritter</last></editor>
-      <editor><first>Tim</first><last>Baldwin</last></editor>
+      <editor id="timothy-baldwin"><first>Tim</first><last>Baldwin</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>San Ġiljan, Malta</address>
       <month>March</month>
@@ -41,7 +41,7 @@
     </paper>
     <paper id="3">
       <title>Effects of different types of noise in user-generated reviews on human and machine translations including <fixed-case>C</fixed-case>hat<fixed-case>GPT</fixed-case></title>
-      <author><first>Maja</first><last>Popovic</last><affiliation>Dublin City University</affiliation></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popovic</last><affiliation>Dublin City University</affiliation></author>
       <author><first>Ekaterina</first><last>Lapshinova-Koltunski</last><affiliation>Universität Hildesheim</affiliation></author>
       <author><first>Maarit</first><last>Koponen</last><affiliation>University of Eastern Finland</affiliation></author>
       <pages>17-30</pages>
@@ -83,7 +83,7 @@
       <author><first>Baber</first><last>Khalid</last><affiliation>Amazon</affiliation></author>
       <author><first>Shuyang</first><last>Dai</last><affiliation>Amazon</affiliation></author>
       <author><first>Tara</first><last>Taghavi</last></author>
-      <author><first>Sungjin</first><last>Lee</last><affiliation>Amazon</affiliation></author>
+      <author id="sungjin-lee"><first>Sungjin</first><last>Lee</last><affiliation>Amazon</affiliation></author>
       <pages>58-67</pages>
       <abstract>Text classification is an important problem with a wide range of applications in NLP. However, naturally occurring data is imbalanced which can induce biases when training classification models. In this work, we introduce a novel contrastive learning (CL) approach to help with imbalanced text classification task. CL has an inherent structure which pushes similar data closer in embedding space and vice versa using data samples anchors. However, in traditional CL methods text embeddings are used as anchors, which are scattered over the embedding space. We propose a CL approach which learns key anchors in the form of label embeddings and uses them as anchors. This allows our approach to bring the embeddings closer to their labels in the embedding space and divide the embedding space between labels in a fairer manner. We also introduce a novel method to improve the interpretability of our approach in a multi-class classification scenario. This approach learns the inter-class relationships during training which provide insight into the model decisions. Since our approach is focused on dividing the embedding space between different labels we also experiment with hyperbolic embeddings since they have been proven successful in embedding hierarchical information. Our proposed method outperforms several state-of-the-art baselines by an average 11% F1. Our interpretable approach highlights key data relationships and our experiments with hyperbolic embeddings give us important insights for future investigations. We will release the implementation of our approach with the publication.</abstract>
       <url hash="c91618b5">2024.wnut-1.6</url>
@@ -93,7 +93,7 @@
       <title><fixed-case>M</fixed-case>aint<fixed-case>N</fixed-case>orm: A corpus and benchmark model for lexical normalisation and masking of industrial maintenance short text</title>
       <author><first>Tyler</first><last>Bikaun</last><affiliation>University of Western Australia</affiliation></author>
       <author><first>Melinda</first><last>Hodkiewicz</last><affiliation>University of Western Australia</affiliation></author>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last><affiliation>University of Western Australia</affiliation></author>
+      <author><first>Wei</first><last>Liu</last><affiliation>University of Western Australia</affiliation></author>
       <pages>68-78</pages>
       <abstract>Maintenance short texts are invaluable unstructured data sources, serving as a diagnostic and prognostic window into the operational health and status of physical assets. These user-generated texts, created during routine or ad-hoc maintenance activities, offer insights into equipment performance, potential failure points, and maintenance needs. However, the use of information captured in these texts is hindered by inherent challenges: the prevalence of engineering jargon, domain-specific vernacular, random spelling errors without identifiable patterns, and the absence of standard grammatical structures. To transform these texts into accessible and analysable data, we introduce the MaintNorm dataset, the first resource specifically tailored for the lexical normalisation task of maintenance short texts. Comprising 12,000 examples, this dataset enables the efficient processing and interpretation of these texts. We demonstrate the utility of MaintNorm by training a lexical normalisation model as a sequence-to-sequence learning task with two learning objectives, namely, enhancing the quality of the texts and masking segments to obscure sensitive information to anonymise data. Our benchmark model demonstrates a universal error reduction rate of 95.8%. The dataset and benchmark outcomes are available to the public.</abstract>
       <url hash="89a732ab">2024.wnut-1.7</url>
@@ -126,7 +126,7 @@
       <author><first>Yixing</first><last>Chen</last><affiliation>University of Notre Dame</affiliation></author>
       <author><first>Shuang</first><last>Zheng</last></author>
       <author><first>Lei</first><last>Wang</last></author>
-      <author><first>John</first><last>Lalor</last><affiliation>University of Notre Dame</affiliation></author>
+      <author id="john-p-lalor"><first>John</first><last>Lalor</last><affiliation>University of Notre Dame</affiliation></author>
       <pages>104-118</pages>
       <abstract>Data for the Rating Prediction (RP) sentiment analysis task such as star reviews are readily available. However, data for aspect-category sentiment analysis (ACSA) is often desired because of the fine-grained nature but are expensive to collect. In this work we present a method for learning ACSA using only RP labels. We propose Unified Sentiment Analysis (Uni-SA) to efficiently understand aspect and review sentiment in a unified manner. We propose a Distantly Supervised Pyramid Network (DSPN) to efficiently perform Aspect-Category Detection (ACD), ACSA, and OSA using only RP labels for training. We evaluate DSPN on multi-aspect review datasets in English and Chinese and find that with only star rating labels for supervision, DSPN performs comparably well to a variety of benchmark models. We also demonstrate the interpretability of DSPN’s outputs on reviews to show the pyramid structure inherent in document level end-to-end sentiment analysis.</abstract>
       <url hash="e4f40d8b">2024.wnut-1.10</url>
diff --git a/data/xml/2024.woah.xml b/data/xml/2024.woah.xml
index 991c78904a..da8c8fdaf4 100644
--- a/data/xml/2024.woah.xml
+++ b/data/xml/2024.woah.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the 8th Workshop on Online Abuse and Harms (WOAH 2024)</booktitle>
       <editor><first>Yi-Ling</first><last>Chung</last></editor>
-      <editor><first>Zeerak</first><last>Talat</last></editor>
+      <editor id="zeerak-talat"><first>Zeerak</first><last>Talat</last></editor>
       <editor><first>Debora</first><last>Nozza</last></editor>
       <editor><first>Flor Miriam</first><last>Plaza-del-Arco</last></editor>
       <editor><first>Paul</first><last>Röttger</last></editor>
@@ -25,7 +25,7 @@
     <paper id="1">
       <title>Investigating radicalisation indicators in online extremist communities</title>
       <author><first>Christine</first><last>De Kock</last><affiliation>University of Melbourne</affiliation></author>
-      <author><first>Eduard</first><last>Hovy</last><affiliation>University of Melbourne</affiliation></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last><affiliation>University of Melbourne</affiliation></author>
       <pages>1-12</pages>
       <abstract>We identify and analyse three sociolinguistic indicators of radicalisation within online extremist forums: hostility, longevity and social connectivity. We develop models to predict the maximum degree of each indicator measured over an individual’s lifetime, based on a minimal number of initial interactions. Drawing on data from two diverse extremist communities, our results demonstrate that NLP methods are effective at prioritising at-risk users. This work offers practical insights for intervention strategies and policy development, and highlights an important but under-studied research direction.</abstract>
       <url hash="1e44891c">2024.woah-1.1</url>
@@ -47,8 +47,8 @@ Our findings demonstrate that both approaches can be leveraged effectively: For
     <paper id="3">
       <title><fixed-case>E</fixed-case>ko<fixed-case>H</fixed-case>ate: Abusive Language and Hate Speech Detection for Code-switched Political Discussions on <fixed-case>N</fixed-case>igerian <fixed-case>T</fixed-case>witter</title>
       <author><first>Comfort</first><last>Ilevbare</last><affiliation>Afe Babalola University</affiliation></author>
-      <author><first>Jesujoba</first><last>Alabi</last><affiliation>Saarland University</affiliation></author>
-      <author><first>David Ifeoluwa</first><last>Adelani</last><affiliation>University College London</affiliation></author>
+      <author id="jesujoba-alabi"><first>Jesujoba</first><last>Alabi</last><affiliation>Saarland University</affiliation></author>
+      <author id="david-ifeoluwa-adelani"><first>David Ifeoluwa</first><last>Adelani</last><affiliation>University College London</affiliation></author>
       <author><first>Firdous</first><last>Bakare</last><affiliation>Afe Babalola University, Ado-Ekiti, Nigeria</affiliation></author>
       <author><first>Oluwatoyin</first><last>Abiola</last><affiliation>Afe Babalola University, Ado Ekiti, Ekiti State Nigeria</affiliation></author>
       <author><first>Oluwaseyi</first><last>Adeyemo</last><affiliation>Afe Babalola University</affiliation></author>
@@ -62,7 +62,7 @@ Our findings demonstrate that both approaches can be leveraged effectively: For
       <title>A Study of the Class Imbalance Problem in Abusive Language Detection</title>
       <author><first>Yaqi</first><last>Zhang</last><affiliation>Technical University of Munich</affiliation></author>
       <author><first>Viktor</first><last>Hangya</last><affiliation>Ludwig Maximilian University of Munich</affiliation></author>
-      <author><first>Alexander</first><last>Fraser</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <pages>38-51</pages>
       <abstract>Abusive language detection has drawn increasing interest in recent years. However, a less systematically explored obstacle is label imbalance, i.e., the amount of abusive data is much lower than non-abusive data, leading to performance issues. The aim of this work is to conduct a comprehensive comparative study of popular methods for addressing the class imbalance issue. We explore 10 well-known approaches on 8 datasets with distinct characteristics: binary or multi-class, moderately or largely imbalanced, focusing on various types of abuse, etc. Additionally, we pro-pose two novel methods specialized for abuse detection: AbusiveLexiconAug and ExternalDataAug, which enrich the training data using abusive lexicons and external abusive datasets, respectively. We conclude that: 1) our AbusiveLexiconAug approach, random oversampling, and focal loss are the most versatile methods on various datasets; 2) focal loss tends to yield peak model performance; 3) oversampling and focal loss provide promising results for binary datasets and small multi-class sets, while undersampling and weighted cross-entropy are more suitable for large multi-class sets; 4) most methods are sensitive to hyperparameters, yet our suggested choice of hyperparameters provides a good starting point.</abstract>
       <url hash="75e609a2">2024.woah-1.4</url>
@@ -90,7 +90,7 @@ Our findings demonstrate that both approaches can be leveraged effectively: For
       <title><fixed-case>VIDA</fixed-case>: The Visual Incel Data Archive. A Theory-oriented Annotated Dataset To Enhance Hate Detection Through Visual Culture</title>
       <author><first>Selenia</first><last>Anastasi</last><affiliation>Università degli Studi di Genova</affiliation></author>
       <author><first>Florian</first><last>Schneider</last><affiliation>Hamburg University</affiliation></author>
-      <author><first>Chris</first><last>Biemann</last><affiliation>Universität Hamburg</affiliation></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last><affiliation>Universität Hamburg</affiliation></author>
       <author><first>Tim</first><last>Fischer</last><affiliation>Universität Hamburg</affiliation></author>
       <pages>59-67</pages>
       <abstract>Images increasingly constitute a larger portion of internet content, encoding even more complex meanings. Recent studies have highlight the pivotal role of visual communication in the spread of extremist content, particularly that associated with right-wing political ideologies. However, the capability of machine learning systems to recognize such meanings, sometimes implicit, remains limited. To enable future research in this area, we introduce and release VIDA, the Visual Incel Data Archive, a multimodal dataset comprising visual material and internet memes collected from two main Incel communities (Italian and Anglophone) known for their extremist misogynistic content. Following the analytical framework of Shifman (2014), we propose a new taxonomy for annotation across three main levels of analysis: content, form, and stance (hate). This allows for the association of images with fine-grained contextual information that help to identify the presence of offensiveness and a broader set of cultural references, enhancing the understanding of more nuanced aspects in visual communication. In this work we present a statistical analysis of the annotated dataset as well as discuss annotation examples and future line of research.</abstract>
@@ -103,7 +103,7 @@ Our findings demonstrate that both approaches can be leveraged effectively: For
       <author><first>Ali</first><last>Omrani</last><affiliation>University of Southern California</affiliation></author>
       <author><first>Alireza</first><last>Salkhordeh Ziabari</last><affiliation>University of Southern California</affiliation></author>
       <author><first>Preni</first><last>Golazizian</last><affiliation>University of Southern California</affiliation></author>
-      <author><first>Jeffrey</first><last>Sorensen</last><affiliation>Google Jigsaw</affiliation></author>
+      <author id="jeffrey-sorensen"><first>Jeffrey</first><last>Sorensen</last><affiliation>Google Jigsaw</affiliation></author>
       <author><first>Morteza</first><last>Dehghani</last><affiliation>University of Southern California</affiliation></author>
       <pages>68-109</pages>
       <abstract>Detecting problematic content, such as hate speech, is a multifaceted and ever-changing task, influenced by social dynamics, user populations, diversity of sources, and evolving language. There has been significant efforts, both in academia and in industry, to develop annotated resources that capture various aspects of problematic content. Due to researchers’ diverse objectives, these annotations are often inconsistent and hence, reports of progress on the detection of problematic content are fragmented. This pattern is expected to persist unless we pool these resources, taking into account the dynamic nature of this issue. In this paper, we propose integrating the available resources, leveraging their dynamic nature to break this pattern, and introduce a continual learning framework and benchmark for problematic content detection. Our benchmark, comprising 84 related tasks, creates a novel measure of progress: prioritizing the adaptability of classifiers to evolving tasks over excelling in specific tasks. To ensure continuous relevance, our benchmark is designed for seamless integration of new tasks. Our results demonstrate that continual learning methods outperform static approaches by up to 17% and 4% AUC in capturing the evolving content and adapting to novel forms of problematic content</abstract>
@@ -184,7 +184,7 @@ Our findings demonstrate that both approaches can be leveraged effectively: For
       <author><first>Scott</first><last>Andersen</last><affiliation>Posgrado en Ciencia e Ingeniería de la Computación, Universidad Nacional Autónoma de México</affiliation></author>
       <author><first>Segio-Luis</first><last>Ojeda-Trueba</last><affiliation>Instituto de Ingeniería, Universidad Nacional Autónoma de México</affiliation></author>
       <author><first>Juan</first><last>Vásquez</last><affiliation>Department of Computer Science, University of Colorado Boulder</affiliation></author>
-      <author><first>Gemma</first><last>Bel-Enguix</last><affiliation>Universidad Nacional Autónoma de México</affiliation></author>
+      <author id="gemma-bel-enguix"><first>Gemma</first><last>Bel-Enguix</last><affiliation>Universidad Nacional Autónoma de México</affiliation></author>
       <pages>178-200</pages>
       <abstract>Thanks to the popularity of social media, data generated by online communities provides an abundant source of diverse language information. This abundance of data allows NLP practitioners and computational linguists to analyze sociolinguistic phenomena occurring in digital communication. In this paper, we analyze the Twitter discourse around the Mexican Spanish-speaking LGBT+ community. For this, we evaluate how the polarity of some nouns related to the LGBT+ community has evolved in conversational settings using a corpus of tweets that cover a time span of ten years. We hypothesize that social media’s fast-moving, turbulent linguistic environment encourages language evolution faster than ever before. Our results indicate that most of the inspected terms have undergone some shift in denotation or connotation. No other generalizations can be observed in the data, given the difficulty that current NLP methods have to account for polysemy, and the wide differences between the various subgroups that make up the LGBT+ community. A fine-grained analysis of a series of LGBT+-related lexical terms is also included in this work.</abstract>
       <url hash="fcc9a255">2024.woah-1.14</url>
@@ -239,7 +239,7 @@ Our findings demonstrate that both approaches can be leveraged effectively: For
       <title>A <fixed-case>B</fixed-case>ayesian Quantification of Aporophobia and the Aggravating Effect of Low–Wealth Contexts on Stigmatization</title>
       <author><first>Ryan</first><last>Brate</last><affiliation>KNAW</affiliation></author>
       <author><first>Marieke</first><last>Van Erp</last><affiliation>KNAW Humanities Cluster</affiliation></author>
-      <author><first>Antal</first><last>Van Den Bosch</last><affiliation>Utrecht University</affiliation></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>Van Den Bosch</last><affiliation>Utrecht University</affiliation></author>
       <pages>234-243</pages>
       <abstract>Aporophobia, a negative social bias against poverty and the poor, has been highlighted asan overlooked phenomenon in toxicity detec-tion in texts. Aporophobia is potentially im-portant both as a standalone form of toxicity,but also given its potential as an aggravatingfactor in the wider stigmatization of groups. Asyet, there has been limited quantification of thisphenomenon. In this paper, we first quantifythe extent of aporophobia, as observable in Red-dit data: contrasting estimates of stigmatisingtopic propensity between low–wealth contextsand high–wealth contexts via Bayesian estima-tion. Next, we consider aporophobia as a causalfactor in the prejudicial association of groupswith stigmatising topics, by introducing peoplegroup as a variable, specifically Black people.This group is selected given its history of be-ing the subject of toxicity. We evaluate theaggravating effect on the observed n–grams in-dicative of stigmatised topics observed in com-ments which refer to Black people, due to thepresence of low–wealth contexts. We performthis evaluation via a Structural Causal Mod-elling approach, performing interventions onsimulations via Bayesian models, for three hy-pothesised causal mechanisms.</abstract>
       <url hash="73ef8c55">2024.woah-1.18</url>
@@ -303,7 +303,7 @@ Our findings demonstrate that both approaches can be leveraged effectively: For
       <author><first>Diyi</first><last>Liu</last><affiliation>University of Oxford</affiliation></author>
       <author><first>Samuel</first><last>Fraiberger</last><affiliation>World Bank, NYU, MIT</affiliation></author>
       <author><first>Ralph</first><last>Schroeder</last><affiliation>University of Oxford</affiliation></author>
-      <author><first>Scott A.</first><last>Hale</last><affiliation>University of Oxford</affiliation></author>
+      <author id="scott-a-hale"><first>Scott A.</first><last>Hale</last><affiliation>University of Oxford</affiliation></author>
       <author><first>Paul</first><last>Röttger</last><affiliation>University of Oxford</affiliation></author>
       <pages>283-311</pages>
       <abstract>Perceptions of hate can vary greatly across cultural contexts. Hate speech (HS) datasets, however, have traditionally been developed by language. This hides potential cultural biases, as one language may be spoken in different countries home to different cultures. In this work, we evaluate cultural bias in HS datasets by leveraging two interrelated cultural proxies: language and geography. We conduct a systematic survey of HS datasets in eight languages and confirm past findings on their English-language bias, but also show that this bias has been steadily decreasing in the past few years. For three geographically-widespread languages—English, Arabic and Spanish—we then leverage geographical metadata from tweets to approximate geo-cultural contexts by pairing language and country information. We find that HS datasets for these languages exhibit a strong geo-cultural bias, largely overrepresenting a handful of countries (e.g., US and UK for English) relative to their prominence in both the broader social media population and the general population speaking these languages. Based on these findings, we formulate recommendations for the creation of future HS datasets.</abstract>
diff --git a/data/xml/2024.yrrsds.xml b/data/xml/2024.yrrsds.xml
index 0f2dd05f4e..151f545506 100644
--- a/data/xml/2024.yrrsds.xml
+++ b/data/xml/2024.yrrsds.xml
@@ -163,7 +163,7 @@
     </paper>
     <paper id="16">
       <title>Topological Deep Learning for Term Extraction</title>
-      <author><first>Benjamin Matthias</first><last>Ruppik</last></author>
+      <author id="benjamin-matthias-ruppik"><first>Benjamin Matthias</first><last>Ruppik</last></author>
       <pages>43–45</pages>
       <abstract>Ben is a postdoctoral researcher in the Dialog Systems and Machine Learning research group led by Milica Gašić at the Heinrich-Heine-Universität Düsseldorf, which he joined in 2022. In collaboration with the Topology and Geometry group in the Mathematics Department, under the supervision of Marcus Zibrowius, Ben is developing applications of Topological Data Analysis in Natural Language Processing, focusing on dialogue systems. Before transitioning to machine learning research, Ben was a pure mathematician at the Max-Planck-Institute for Mathematics in Bonn, where he specialized in knotted surfaces in 4-dimensional manifolds. He graduated from the University of Bonn in 2022.</abstract>
       <url hash="dcd48077">2024.yrrsds-1.16</url>
diff --git a/data/xml/2025.aaas.xml b/data/xml/2025.aaas.xml
index 2f43ced505..5390286b2b 100644
--- a/data/xml/2025.aaas.xml
+++ b/data/xml/2025.aaas.xml
@@ -22,8 +22,8 @@
       <title>Investigating Further Fine-tuning <fixed-case>Wav2vec2.0</fixed-case> in Low Resource Settings for Enhancing Children Speech Recognition and Word-level Reading Diagnosis</title>
       <author><first>Lingyun</first><last>Gao</last></author>
       <author><first>Cristian</first><last>Tejedor-Garcia</last></author>
-      <author><first>Catia</first><last>Cucchiarini</last></author>
-      <author><first>Helmer</first><last>Strik</last></author>
+      <author id="catia-cucchiarini"><first>Catia</first><last>Cucchiarini</last></author>
+      <author id="helmer-strik"><first>Helmer</first><last>Strik</last></author>
       <pages>1–6</pages>
       <url hash="da617212">2025.aaas-1.1</url>
       <bibkey>gao-etal-2025-investigating</bibkey>
diff --git a/data/xml/2025.acl.xml b/data/xml/2025.acl.xml
index 360427b96b..1c65e24ffb 100644
--- a/data/xml/2025.acl.xml
+++ b/data/xml/2025.acl.xml
@@ -256,7 +256,7 @@
       <author><first>Boyoung</first><last>Kim</last></author>
       <author><first>Minyoung</first><last>Kim</last></author>
       <author><first>Joonsuk</first><last>Park</last><affiliation>University of Richmond</affiliation></author>
-      <author><first>Paul Hongsuck</first><last>Seo</last><affiliation>Korea University</affiliation></author>
+      <author id="hongsuck-seo"><first>Paul Hongsuck</first><last>Seo</last><affiliation>Korea University</affiliation></author>
       <pages>341-359</pages>
       <abstract>Multi-hop question answering (MHQA) involves reasoning across multiple documents to answer complex questions. Dense retrievers typically outperform sparse methods like BM25 by leveraging semantic embeddings in many tasks; however, they require labeled query-document pairs for fine-tuning, which poses a significant challenge in MHQA due to the complexity of the reasoning steps. To overcome this limitation, we introduce Retriever Supervision with Consistency and Relevance (ReSCORE), a novel method for training dense retrievers for MHQA without the need for labeled documents. ReSCORE leverages large language models to measure document-question relevance with answer consistency and utilizes this information to train a retriever within an iterative question-answering framework. Evaluated on three MHQA benchmarks, our extensive experiments demonstrate the effectiveness of ReSCORE, with significant improvements in retrieval performance that consequently lead to state-of-the-art Exact Match and F1 scores for MHQA.</abstract>
       <url hash="43109489">2025.acl-long.16</url>
@@ -270,8 +270,8 @@
       <author><first>Yuxuan</first><last>Gu</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <author><first>Wenxuan</first><last>Zhang</last><affiliation>Singapore University of Technology and Design</affiliation></author>
       <author><first>Jing</first><last>Ma</last><affiliation>Hong Kong Baptist University</affiliation></author>
-      <author><first>See-Kiong</first><last>Ng</last><affiliation>National University of Singapore</affiliation></author>
-      <author><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="see-kiong-ng"><first>See-Kiong</first><last>Ng</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
       <pages>360-381</pages>
       <abstract>Large Language Models (LLMs) have significantly advanced the fact-checking studies. However, existing automated fact-checking evaluation methods rely on static datasets and classification metrics, which fail to automatically evaluate the justification production and uncover the nuanced limitations of LLMs in fact-checking. In this work, we introduce FACT-AUDIT, an agent-driven framework that adaptively and dynamically assesses LLMs’ fact-checking capabilities. Leveraging importance sampling principles and multi-agent collaboration, FACT-AUDIT generates adaptive and scalable datasets, performs iterative model-centric evaluations, and updates assessments based on model-specific responses. By incorporating justification production alongside verdict prediction, this framework provides a comprehensive and evolving audit of LLMs’ factual reasoning capabilities, to investigate their trustworthiness. Extensive experiments demonstrate that FACT-AUDIT effectively differentiates among state-of-the-art LLMs, providing valuable insights into model strengths and limitations in model-centric fact-checking analysis.</abstract>
       <url hash="7e4868bf">2025.acl-long.17</url>
@@ -281,9 +281,9 @@
     <paper id="18">
       <title>Statistical Deficiency for Task Inclusion Estimation</title>
       <author><first>Loïc</first><last>Fosse</last></author>
-      <author><first>Frederic</first><last>Bechet</last><affiliation>Académie d’Aix-Marseille</affiliation></author>
-      <author><first>Benoit</first><last>Favre</last><affiliation>Université d’Aix-Marseille</affiliation></author>
-      <author><first>Géraldine</first><last>Damnati</last><affiliation>Orange Innovation</affiliation></author>
+      <author id="frederic-bechet"><first>Frederic</first><last>Bechet</last><affiliation>Académie d’Aix-Marseille</affiliation></author>
+      <author id="benoit-favre"><first>Benoit</first><last>Favre</last><affiliation>Université d’Aix-Marseille</affiliation></author>
+      <author id="geraldine-damnati"><first>Géraldine</first><last>Damnati</last><affiliation>Orange Innovation</affiliation></author>
       <author><first>Gwénolé</first><last>Lecorvé</last><affiliation>Orange</affiliation></author>
       <author><first>Maxime</first><last>Darrin</last></author>
       <author><first>Philippe</first><last>Formont</last><affiliation>École de technologie supérieure, Université du Québec and Université Paris-Saclay</affiliation></author>
@@ -341,7 +341,7 @@
       <author><first>Yang</first><last>Deng</last><affiliation>Singapore Management University</affiliation></author>
       <author><first>Wenqiang</first><last>Lei</last><affiliation>Sichuan University</affiliation></author>
       <author><first>Jiancheng</first><last>Lv</last><affiliation>Sichuan University</affiliation></author>
-      <author><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Jimmy</first><last>Huang</last><affiliation>York University and York University</affiliation></author>
       <pages>466-488</pages>
       <abstract>With the advancement of large language models (LLMs), intelligent models have evolved from mere tools to autonomous agents with their own goals and strategies for cooperating with humans. This evolution has birthed a novel paradigm in NLP, i.e., human-model cooperation, that has yielded remarkable progress in numerous NLP tasks in recent years. In this paper, we take the first step to present a thorough review of human-model cooperation, exploring its principles, formalizations, and open challenges. In particular, we introduce a new taxonomy that provides a unified perspective to summarize existing approaches. Also, we discuss potential frontier areas and their corresponding challenges. We regard our work as an entry point, paving the way for more breakthrough research in this regard.</abstract>
@@ -358,7 +358,7 @@
       <author><first>Fei</first><last>Li</last><affiliation>Wuhan University</affiliation></author>
       <author><first>Jianming</first><last>Fu</last></author>
       <author><first>Chong</first><last>Teng</last></author>
-      <author><first>Donghong</first><last>Ji</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
       <pages>489-499</pages>
       <abstract>Text-based hyperbole and metaphor detection are of great significance for natural language processing (NLP) tasks. However, due to their semantic obscurity and expressive diversity, it is rather challenging to identify them. Existing methods mostly focus on superficial text features, ignoring the associations of hyperbole and metaphor as well as the effect of implicit emotion on perceiving these rhetorical devices. To implement these hypotheses, we propose an emotion-guided hyperbole and metaphor detection framework based on bidirectional dynamic interaction (EmoBi). Firstly, the emotion analysis module deeply mines the emotion connotations behind hyperbole and metaphor. Next, the emotion-based domain mapping module identifies the target and source domains to gain a deeper understanding of the implicit meanings of hyperbole and metaphor. Finally, the bidirectional dynamic interaction module enables the mutual promotion between hyperbole and metaphor. Meanwhile, a verification mechanism is designed to ensure detection accuracy and reliability. Experiments show that EmoBi outperforms all baseline methods on four datasets. Specifically, compared to the current SoTA, the F1 score increased by 28.1% for hyperbole detection on the TroFi dataset and 23.1% for metaphor detection on the HYPO-L dataset. These results, underpinned by in-depth analyses, underscore the effectiveness and potential of our approach for advancing hyperbole and metaphor detection.</abstract>
       <url hash="54d5a445">2025.acl-long.23</url>
@@ -385,7 +385,7 @@
       <author><first>Volha</first><last>Harytskaya</last><affiliation>Independent</affiliation></author>
       <author><first>Vladislav</first><last>Poritski</last><affiliation>unaffiliated</affiliation></author>
       <author><first>Oksana</first><last>Volchek</last><affiliation>Independent</affiliation></author>
-      <author><first>Pavel</first><last>Smrz</last><affiliation>Brno University of Technology</affiliation></author>
+      <author id="pavel-smrz"><first>Pavel</first><last>Smrz</last><affiliation>Brno University of Technology</affiliation></author>
       <pages>511-527</pages>
       <abstract>In the epoch of multilingual large language models (LLMs), it is still challenging to evaluate the models’ understanding of lower-resourced languages, which motivates further development of expert-crafted natural language understanding benchmarks. We introduce BelarusianGLUE — a natural language understanding benchmark for Belarusian, an East Slavic language, with ≈15K instances in five tasks: sentiment analysis, linguistic acceptability, word in context, Winograd schema challenge, textual entailment. A systematic evaluation of BERT models and LLMs against this novel benchmark reveals that both types of models approach human-level performance on easier tasks, such as sentiment analysis, but there is a significant gap in performance between machine and human on a harder task — Winograd schema challenge. We find the optimal choice of model type to be task-specific: e.g. BERT models underperform on textual entailment task but are competitive for linguistic acceptability. We release the datasets (https://hf.co/datasets/maaxap/BelarusianGLUE) and evaluation code (https://github.com/maaxap/BelarusianGLUE).</abstract>
       <url hash="8db6cd0a">2025.acl-long.25</url>
@@ -703,7 +703,7 @@
       <author><first>Yizhe</first><last>Yang</last></author>
       <author><first>Yuyue</first><last>Zhao</last></author>
       <author><first>Qi</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Yu-Gang</first><last>Jiang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Yong</first><last>Liao</last><affiliation>University of Science and Technology of China and China Academic of Electronics and Information Technology</affiliation></author>
       <pages>948-964</pages>
@@ -791,7 +791,7 @@
       <author><first>Zhao</first><last>Zhang</last></author>
       <author><first>Jin</first><last>Zhang</last></author>
       <author><first>Hui</first><last>Xu</last><affiliation>Chinese Academy of Sciences</affiliation></author>
-      <author><first>Xueqi</first><last>Cheng</last><affiliation>Institute of Computing Technology, Chinese Academy</affiliation></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last><affiliation>Institute of Computing Technology, Chinese Academy</affiliation></author>
       <pages>1053-1067</pages>
       <abstract>Stance detection is a pivotal task in Natural Language Processing (NLP), identifying textual attitudes toward various targets. Despite advances in using Large Language Models (LLMs), challenges persist due to hallucination-models generating plausible yet inaccurate content. Addressing these challenges, we introduce MPVStance, a framework that incorporates Multi-Perspective Verification (MPV) with Retrieval-Augmented Generation (RAG) across a structured five-step verification process. Our method enhances stance detection by rigorously validating each response from factual accuracy, logical consistency, contextual relevance, and other perspectives. Extensive testing on the SemEval-2016 and VAST datasets, including scenarios that challenge existing methods and comprehensive ablation studies, demonstrates that MPVStance significantly outperforms current models. It effectively mitigates hallucination issues and sets new benchmarks for reliability and accuracy in stance detection, particularly in zero-shot, few-shot, and challenging scenarios.</abstract>
       <url hash="5a767703">2025.acl-long.53</url>
@@ -803,7 +803,7 @@
       <author><first>Yaoqi</first><last>Guo</last></author>
       <author><first>Zhenpeng</first><last>Chen</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Jie M.</first><last>Zhang</last><affiliation>King’s College London, University of London</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>Nanyang Technological University</affiliation></author>
+      <author><first>Yang</first><last>Liu</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Yun</first><last>Ma</last><affiliation>Peking University</affiliation></author>
       <pages>1068-1080</pages>
       <abstract>Code generation, the automatic creation of source code from natural language descriptions, has garnered significant attention due to its potential to streamline software development. Inspired by research that links task-personality alignment with improved development outcomes, we conduct an empirical study on personality-guided code generation using large language models (LLMs). Specifically, we investigate how emulating personality traits appropriate to the coding tasks affects LLM performance. We extensively evaluate this approach using seven widely adopted LLMs across four representative datasets. Our results show that personality guidance significantly enhances code generation accuracy, with improved pass rates in 23 out of 28 LLM-dataset combinations. Notably, in 11 cases, the improvement exceeds 5%, and in 5 instances, it surpasses 10%, with the highest gain reaching 12.9%. Additionally, personality guidance can be easily integrated with other prompting strategies to further boost performance.</abstract>
@@ -862,7 +862,7 @@
       <author><first>Quanwei</first><last>Cai</last><affiliation>ByteDance Inc.</affiliation></author>
       <author><first>Ye</first><last>Wu</last></author>
       <author><first>Huiqi</first><last>Liu</last></author>
-      <author id="zhiyu-chen"><first>Zhiyu</first><last>Chen</last></author>
+      <author><first>Zhiyu</first><last>Chen</last></author>
       <author><first>Bing</first><last>Duan</last></author>
       <author><first>Sheng</first><last>Zhong</last><affiliation>nanjing university</affiliation></author>
       <pages>1160-1174</pages>
@@ -910,7 +910,7 @@
     </paper>
     <paper id="62">
       <title>Untie the Knots: An Efficient Data Augmentation Strategy for Long-Context Pre-Training in Language Models</title>
-      <author><first>Junfeng</first><last>Tian</last><affiliation>Xiaohongshu</affiliation></author>
+      <author id="junfeng-tian"><first>Junfeng</first><last>Tian</last><affiliation>Xiaohongshu</affiliation></author>
       <author><first>Da</first><last>Zheng</last><affiliation>Xiaohongshu</affiliation></author>
       <author><first>Yang</first><last>Chen</last></author>
       <author><first>Rui</first><last>Wang</last><affiliation>Decilion</affiliation></author>
@@ -961,7 +961,7 @@
       <author><first>Jinyu</first><last>Li</last><affiliation>Microsoft</affiliation></author>
       <author><first>Sheng</first><last>Zhao</last><affiliation>Microsoft</affiliation></author>
       <author><first>Xixin</first><last>Wu</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
-      <author><first>Helen M.</first><last>Meng</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
+      <author id="helen-meng"><first>Helen M.</first><last>Meng</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <author><first>Furu</first><last>Wei</last><affiliation>Microsoft Research</affiliation></author>
       <pages>1287-1300</pages>
       <abstract>We present MELLE, a novel continuous-valued token based language modeling approach for text-to-speech synthesis (TTS). MELLE autoregressively generates continuous mel-spectrogram frames directly from text condition, bypassing the need for vector quantization, which is typically designed for audio compression and sacrifices fidelity compared to continuous representations. Specifically, (i) instead of cross-entropy loss, we apply regression loss with a proposed spectrogram flux loss function to model the probability distribution of the continuous-valued tokens; (ii) we have incorporated variational inference into MELLE to facilitate sampling mechanisms, thereby enhancing the output diversity and model robustness. Experiments demonstrate that, compared to the two-stage codec language model VALL-E and its variants, the single-stage MELLE mitigates robustness issues by avoiding the inherent flaws of sampling vector-quantized codes, achieves superior performance across multiple metrics, and, most importantly, offers a more streamlined paradigm. The demos of our work are provided at https://aka.ms/melle.</abstract>
@@ -1009,8 +1009,8 @@
       <author><first>Abigail</first><last>Wheeler</last></author>
       <author><first>Nicholas</first><last>Kerry</last></author>
       <author><first>Jeremy D. W.</first><last>Clifton</last></author>
-      <author><first>H. Andrew</first><last>Schwartz</last></author>
-      <author><first>Ryan L.</first><last>Boyd</last></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last></author>
+      <author id="ryan-boyd"><first>Ryan L.</first><last>Boyd</last></author>
       <pages>1362-1376</pages>
       <abstract>Measuring the prevalence and dimensions of self beliefs is essential for understanding human self-perception and various psychological outcomes. In this paper, we develop a novel task for classifying language that contains explicit or implicit mentions of the author’s self beliefs. We contribute a set of 2,000 human-annotated self beliefs, 100,000 LLM-labeled examples, and 10,000 surveyed self belief paragraphs. We then evaluate several encoder-based classifiers and training routines for this task. Our trained model, SelfAwareNet, achieved an AUC of 0.944, outperforming 0.839 from OpenAI’s state-of-the-art GPT-4o model. Using this model we derive data-driven categories of self beliefs and demonstrate their ability to predict valence, depression, anxiety, and stress. We release the resulting self belief classification model and annotated datasets for use in future research.</abstract>
       <url hash="ef456e0d">2025.acl-long.69</url>
@@ -1268,7 +1268,7 @@
       <author><first>Haotian</first><last>Ye</last><affiliation>Center for Information and Language Processing</affiliation></author>
       <author><first>Chunlan</first><last>Ma</last></author>
       <author><first>Mingyang</first><last>Wang</last></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <pages>1743-1770</pages>
       <abstract>Recent multilingual pretrained language models (mPLMs) often avoid using language embeddings – learnable vectors assigned to individual languages. However, this places a significant burden on token representations to encode all language-specific information, which may hinder language neutrality. To address this limitation, we propose Language-Script Aware Multilingual Pretraining (LangSAMP), a method that incorporates both language and script embeddings to enhance representation learning. Specifically, we integrate these embeddings into the output of the Transformer blocks before passing the final representations to the language modeling head for prediction. We apply LangSAMP to the continual pretraining of XLM-R on a highly multilingual corpus covering more than 500 languages. The resulting model consistently outperforms the baseline in zero-shot crosslingual transfer across diverse downstream tasks. Extensive analysis reveals that language and script embeddings capture language- and script-specific nuances, which benefits more language-neutral representations, proven by improved pairwise cosine similarity. In our case study, we also show that language and script embeddings can be used to select better source languages for crosslingual transfer. We make our code and models publicly available at <url>https://github.com/cisnlp/LangSAMP</url>.</abstract>
       <url hash="2d0f89ee">2025.acl-long.88</url>
@@ -1295,7 +1295,7 @@
       <author id="xinpeng-wang-lmu"><first>Xinpeng</first><last>Wang</last></author>
       <author><first>Markus</first><last>Herklotz</last></author>
       <author><first>Frauke</first><last>Kreuter</last><affiliation>University of Maryland</affiliation></author>
-      <author><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <author><first>Matthias</first><last>Aßenmacher</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <pages>1785-1809</pages>
       <abstract>In recent research, large language models (LLMs) have been increasingly used to investigate public opinions. This study investigates the algorithmic fidelity of LLMs, i.e., the ability to replicate the socio-cultural context and nuanced opinions of human participants. Using open-ended survey data from the German Longitudinal Election Studies (GLES), we prompt different LLMs to generate synthetic public opinions reflective of German subpopulations by incorporating demographic features into the persona prompts. Our results show that Llama performs better than other LLMs at representing subpopulations, particularly when there is lower opinion diversity within those groups. Our findings further reveal that the LLM performs better for supporters of left-leaning parties like The Greens and The Left compared to other parties, and matches the least with the right-party AfD. Additionally, the inclusion or exclusion of specific variables in the prompts can significantly impact the models’ predictions. These findings underscore the importance of aligning LLMs to more effectively model diverse public opinions while minimizing political biases and enhancing robustness in representativeness.</abstract>
@@ -1365,7 +1365,7 @@
     <paper id="95">
       <title>From Information to Insight: Leveraging <fixed-case>LLM</fixed-case>s for Open Aspect-Based Educational Summarization</title>
       <author><first>Yang</first><last>Zhong</last><affiliation>University of Pittsburgh</affiliation></author>
-      <author><first>Diane</first><last>Litman</last><affiliation>University of Pittsburgh</affiliation></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last><affiliation>University of Pittsburgh</affiliation></author>
       <pages>1914-1947</pages>
       <abstract>This paper addresses the challenge of aspect-based summarization in education by introducing Reflective ASPect-based summarization (ReflectASP), a novel dataset that summarizes student reflections on STEM lectures. Despite the promising performance of large language models in general summarization, their application to nuanced aspect-based summaries remains under-explored. ReflectASP eases the exploration of open-aspect-based summarization (OABS), overcoming the limitations of current datasets and comes with ample human annotations. We benchmarked different types of zero-shot summarization methods and proposed two refinement methods to improve summaries, supported by both automatic and human manual evaluations. Additionally, we analyzed suggestions and revisions made during the refinement process, offering a fine-grained study of the editing strategies employed by these methods. We make our models, dataset, and all human evaluation results available at https://github.com/cs329yangzhong/ReflectASP.</abstract>
       <url hash="3644e66a">2025.acl-long.95</url>
@@ -1518,7 +1518,7 @@
       <author><first>Matthias</first><last>Orlikowski</last><affiliation>Universität Bielefeld</affiliation></author>
       <author><first>Jiaxin</first><last>Pei</last><affiliation>Stanford University</affiliation></author>
       <author><first>Paul</first><last>Röttger</last><affiliation>Bocconi University</affiliation></author>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <author><first>David</first><last>Jurgens</last><affiliation>University of Michigan - Ann Arbor</affiliation></author>
       <author><first>Dirk</first><last>Hovy</last><affiliation>Bocconi University</affiliation></author>
       <pages>2092-2111</pages>
@@ -1670,7 +1670,7 @@
       <author><first>Manuel</first><last>Tonneau</last><affiliation>Oxford Internet Institute, University of Oxford</affiliation></author>
       <author><first>Diyi</first><last>Liu</last></author>
       <author><first>Niyati</first><last>Malhotra</last></author>
-      <author><first>Scott A.</first><last>Hale</last><affiliation>Meedan, University of Oxford and Alan Turing Institute</affiliation></author>
+      <author id="scott-a-hale"><first>Scott A.</first><last>Hale</last><affiliation>Meedan, University of Oxford and Alan Turing Institute</affiliation></author>
       <author><first>Samuel</first><last>Fraiberger</last><affiliation>World Bank</affiliation></author>
       <author><first>Victor</first><last>Orozco-Olvera</last></author>
       <author><first>Paul</first><last>Röttger</last><affiliation>Bocconi University</affiliation></author>
@@ -1720,7 +1720,7 @@
     </paper>
     <paper id="118">
       <title>Steering into New Embedding Spaces: Analyzing Cross-Lingual Alignment Induced by Model Interventions in Multilingual Language Models</title>
-      <author><first>Anirudh</first><last>Sundar</last></author>
+      <author id="anirudh-sundar"><first>Anirudh</first><last>Sundar</last></author>
       <author><first>Sinead</first><last>Williamson</last><affiliation>Apple</affiliation></author>
       <author><first>Katherine</first><last>Metcalf</last><affiliation>Apple</affiliation></author>
       <author><first>Barry-John</first><last>Theobald</last><affiliation>Apple</affiliation></author>
@@ -1774,7 +1774,7 @@
     <paper id="122">
       <title>Did Translation Models Get More Robust Without Anyone <fixed-case>E</fixed-case>ven Noticing?</title>
       <author><first>Ben</first><last>Peters</last><affiliation>Instituto de Telecomunicações, Portugal and Instituto Superior Técnico</affiliation></author>
-      <author><first>Andre</first><last>Martins</last><affiliation>Instituto Superior Técnico and Unbabel</affiliation></author>
+      <author id="andre-f-t-martins"><first>Andre</first><last>Martins</last><affiliation>Instituto Superior Técnico and Unbabel</affiliation></author>
       <pages>2445-2458</pages>
       <abstract>Neural machine translation (MT) models achieve strong results across a variety of settings, but it is widely believed that they are highly sensitive to “noisy” inputs, such as spelling errors, abbreviations, and other formatting issues. In this paper, we revisit this insight in light of recent multilingual MT models and large language models (LLMs) applied to machine translation. Somewhat surprisingly, we show through controlled experiments that these models are far more robust to many kinds of noise than previous models, even when they perform similarly on clean data. This is notable because, even though LLMs have more parameters and more complex training processes than past models, none of the open ones we consider use any techniques specifically designed to encourage robustness. Next, we show that similar trends hold for social media translation experiments – LLMs are more robust to social media text. We include an analysis of the circumstances in which source correction techniques can be used to mitigate the effects of noise. Altogether, we show that robustness to many types of noise has increased.</abstract>
       <url hash="5c3115ef">2025.acl-long.122</url>
@@ -1926,7 +1926,7 @@
       <author><first>Hao</first><last>Sun</last></author>
       <author><first>Chris Xing</first><last>Tian</last><affiliation>City University of Hong Kong</affiliation></author>
       <author><first>Chenqi</first><last>Kong</last></author>
-      <author><first>Xin</first><last>Dong</last><affiliation>NVIDIA</affiliation></author>
+      <author id="xin-luna-dong"><first>Xin</first><last>Dong</last><affiliation>NVIDIA</affiliation></author>
       <author><first>Haoliang</first><last>Li</last><affiliation>City University of Hong Kong</affiliation></author>
       <pages>2623-2641</pages>
       <abstract>Large Language Models (LLMs) have demonstrated impressive in-context learning (ICL) capabilities from few-shot demonstration exemplars. Recent learning-based demonstration selection methods have proven beneficial to ICL by choosing more useful exemplars. While these methods generally assume they learn better similarity measurements between exemplars and test cases from the proxy task, what kinds of similarities are captured by them and are vital to performing ICL still need to be explored. To dive into this question, we analyze the working mechanism of learning-based demonstration selection methods and empirically identify two essential factors of their similarity measurements: 1) Integrating task-agnostic similarities of different levels between the input of exemplars and test cases; 2) Incorporating task-specific similarity between the output of exemplars and test cases. We validate these two findings through extensive quantitative analysis across ten datasets and various LLMs. Based on these insights, we introduce two simplified exemplar selection methods, MLSM and TTF, catering to task-agnostic and task-specific demands to eliminate costly data collection. The effectiveness of both methods evince our findings again and pave the way for future studies.</abstract>
@@ -2097,7 +2097,7 @@
       <author><first>Gregor</first><last>Geigle</last><affiliation>Bayerische Julius-Maximilians-Universität Würzburg</affiliation></author>
       <author><first>Florian</first><last>Schneider</last></author>
       <author><first>Carolin</first><last>Holtermann</last><affiliation>Universität Hamburg</affiliation></author>
-      <author><first>Chris</first><last>Biemann</last><affiliation>U Hamburg</affiliation></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last><affiliation>U Hamburg</affiliation></author>
       <author><first>Radu</first><last>Timofte</last><affiliation>Bayerische Julius-Maximilians-Universität Würzburg</affiliation></author>
       <author><first>Anne</first><last>Lauscher</last><affiliation>Universität Hamburg</affiliation></author>
       <author><first>Goran</first><last>Glavaš</last><affiliation>Julius-Maximilians-Universität Würzburg</affiliation></author>
@@ -2131,7 +2131,7 @@
       <title><fixed-case>C</fixed-case>on <fixed-case>I</fixed-case>nstruction: Universal Jailbreaking of Multimodal Large Language Models via Non-Textual Modalities</title>
       <author><first>Jiahui</first><last>Geng</last></author>
       <author><first>Thy Thy</first><last>Tran</last></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <author><first>Iryna</first><last>Gurevych</last><affiliation>Institute for Computer Science, Artificial Intelligence and Technology, Mohamed bin Zayed University of Artificial Intelligence and Technische Universität Darmstadt</affiliation></author>
       <pages>2917-2933</pages>
       <abstract>Existing attacks against multimodal language models often communicate instruction through text, either as an explicit malicious instruction or a crafted generic prompt, and accompanied by a toxic image. In contrast, here we exploit the capabilities of MLLMs in following non-textual instruction, i.e., an adversarial image or audio, namely Con Instruction. It is a novel gray-box attack method that generates adversarial images or audio to convey specific harmful instructions to MLLMs. We also find that combining our adversarial examples with certain non-empty text inputs amplifies attack success, while appending these after malicious text has limited effects. To evaluate whether an attack is successful, we introduce a new attack response categorization (ARC) that considers the response quality and relevancy concerning the malicious instruction. The results show that Con Instruction effectively bypasses the safety mechanisms in various visual and audio-language models, including LLaVA-v1.5, InternVL, Qwen-VL, and Qwen-Audio, across two standard benchmarks: AdvBench and SafeBench. Specifically, our method achieves the highest attack success rates, reaching 81.3% and 86.6% on LLaVA-v1.5 (13B). We show that larger models are more susceptible toCon Instruction, contrasting observations in their underlying LLMs. On the defense side, we explore various methods against our attacks and find substantial gaps among existing techniques. The code will be made available upon publication.</abstract>
@@ -2169,7 +2169,7 @@
       <author><first>Boxuan</first><last>Lyu</last><affiliation>Institute of Science Tokyo</affiliation></author>
       <author><first>Hidetaka</first><last>Kamigaito</last><affiliation>Nara Institute of Science and Technology</affiliation></author>
       <author><first>Kotaro</first><last>Funakoshi</last><affiliation>Institute of Science Tokyo</affiliation></author>
-      <author><first>Manabu</first><last>Okumura</last><affiliation>Institute of Science Tokyo and Tokyo Institute of Technology, Tokyo Institute of Technology</affiliation></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last><affiliation>Institute of Science Tokyo and Tokyo Institute of Technology, Tokyo Institute of Technology</affiliation></author>
       <pages>2976-2994</pages>
       <abstract>Maximum a posteriori decoding, a commonly used method for neural machine translation (NMT), aims to maximize the estimated posterior probability. However, high estimated probability does not always lead to high translation quality. Minimum Bayes Risk (MBR) decoding offers an alternative by seeking hypotheses with the highest expected utility.Inspired by Quality Estimation (QE) reranking which uses the QE model as a ranker, we propose source-based MBR (sMBR) decoding, a novel approach that utilizes quasi-sources (generated via paraphrasing or back-translation) as “support hypotheses” and a reference-free quality estimation metric as the utility function, marking the first work to solely use sources in MBR decoding. Experiments show that sMBR outperforms QE reranking and the standard MBR decoding. Our findings suggest that sMBR is a promising approach for NMT decoding.</abstract>
       <url hash="90b25223">2025.acl-long.149</url>
@@ -2190,7 +2190,7 @@
       <author><first>Siyu</first><last>Yuan</last></author>
       <author><first>Tao</first><last>Gui</last><affiliation>Fudan University</affiliation></author>
       <author><first>Qi</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Jiecao</first><last>Chen</last><affiliation>ByteDance Inc.</affiliation></author>
       <pages>2995-3021</pages>
       <abstract>Effective evaluation of multi-hop tool use is critical for analyzing the understanding, reasoning, and function-calling capabilities of large language models (LLMs). However, progress has been hindered by a lack of reliable evaluation datasets. To address this, we present ToolHop, a dataset comprising 995 user queries and 3,912 associated tools, specifically designed for rigorous evaluation of multi-hop tool use. ToolHop ensures diverse queries, meaningful interdependencies, locally executable tools, detailed feedback, and verifiable answers through a novel query-driven data construction approach that includes tool creation, document refinement, and code generation. We evaluate 14 LLMs across five model families (i.e., LLaMA3.1, Qwen2.5, Gemini1.5, Claude3.5, and GPT), uncovering significant challenges in handling multi-hop tool-use scenarios. The leading model, GPT-4o, achieves an accuracy of 49.04%, underscoring substantial room for improvement. Further analysis reveals variations in tool-use strategies for various families, offering actionable insights to guide the development of more effective approaches. Code and data can be found in https://huggingface.co/datasets/bytedance-research/ToolHop.</abstract>
@@ -2241,7 +2241,7 @@
       <author><first>Qian</first><last>Liu</last><affiliation>University of Auckland</affiliation></author>
       <author><first>Liangming</first><last>Pan</last><affiliation>University of Arizona</affiliation></author>
       <author><first>William Yang</first><last>Wang</last><affiliation>UC Santa Barbara</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <author><first>Mong-Li</first><last>Lee</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Wynne</first><last>Hsu</last><affiliation>National University of Singapore</affiliation></author>
       <pages>3052-3075</pages>
@@ -2344,7 +2344,7 @@
       <author><first>Neha</first><last>Srikanth</last></author>
       <author><first>Taylor</first><last>Pellegrin</last></author>
       <author><first>Rachel</first><last>Rudinger</last></author>
-      <author><first>Claire</first><last>Bonial</last><affiliation>Georgetown University and Army Research Lab</affiliation></author>
+      <author id="claire-bonial"><first>Claire</first><last>Bonial</last><affiliation>Georgetown University and Army Research Lab</affiliation></author>
       <author><first>Philip</first><last>Resnik</last><affiliation>University of Maryland, College Park</affiliation></author>
       <pages>3200-3215</pages>
       <abstract>While it is commonly accepted that maintaining common ground plays a role in conversational success, little prior research exists connecting conversational grounding to success in task-oriented conversations. We study failures of grounding in the Ubuntu IRC dataset, where participants use text-only communication to resolve technical issues. We find that disruptions in conversational flow often stem from a misalignment in common ground, driven by a divergence in beliefs and assumptions held by participants. These disruptions, which we call conversational friction, significantly correlate with task success. While LLMs can identify overt cases of conversational friction, they struggle with subtler and more context-dependent instances that require pragmatic or domain-specific reasoning.</abstract>
@@ -2409,7 +2409,7 @@
     <paper id="166">
       <title>Revisiting Common Assumptions about <fixed-case>A</fixed-case>rabic Dialects in <fixed-case>NLP</fixed-case></title>
       <author><first>Amr</first><last>Keleg</last><affiliation>University of Edinburgh, University of Edinburgh</affiliation></author>
-      <author><first>Sharon</first><last>Goldwater</last><affiliation>University of Edinburgh</affiliation></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last><affiliation>University of Edinburgh</affiliation></author>
       <author><first>Walid</first><last>Magdy</last><affiliation>University of Edinburgh</affiliation></author>
       <pages>3309-3327</pages>
       <abstract>Arabic has diverse dialects, where one dialect can be substantially different from the others. In the NLP literature, some assumptions about these dialects are widely adopted (e.g., “Arabic dialects can be grouped into distinguishable regional dialects”) and are manifested in different computational tasks such as Arabic Dialect Identification (ADI). However, these assumptions are not quantitatively verified. We identify four of these assumptions and examine them by extending and analyzing a multi-label dataset, where the validity of each sentence in 11 different country-level dialects is manually assessed by speakers of these dialects. Our analysis indicates that the four assumptions oversimplify reality, and some of them are not always accurate. This in turn might be hindering further progress in different Arabic NLP tasks.</abstract>
@@ -2537,7 +2537,7 @@
       <author><first>Zhenglin</first><last>Hua</last></author>
       <author><first>Yuheng</first><last>Jia</last><affiliation>Southeast University</affiliation></author>
       <author><first>Ming</first><last>Tang</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
-      <author><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Jinqiao</first><last>Wang</last><affiliation>Institute of Automation, Chinese Academy of Sciences</affiliation></author>
       <pages>3488-3501</pages>
       <abstract>Large vision-language models (LVLMs) have made substantial progress in integrating large language models (LLMs) with visual inputs, enabling advanced multimodal reasoning. Despite their success, a persistent challenge is hallucination—where generated text fails to accurately reflect visual content—undermining both accuracy and reliability. Existing methods focus on alignment training or decoding refinements but primarily address symptoms at the generation stage without probing the underlying causes. In this work, we investigate the internal mechanisms driving hallucination in LVLMs, with an emphasis on the multi-head attention module. Specifically, we introduce Vision-aware Head Divergence (VHD), a metric that quantifies the sensitivity of attention head outputs to visual context. Based on this, our findings reveal the presence of vision-aware attention heads that are more attuned to visual information; however, the model’s overreliance on its prior language patterns is closely related to hallucinations. Building on these insights, we propose Vision-aware Head Reinforcement (VHR), a training-free approach to mitigate hallucination by enhancing the role of vision-aware attention heads. Extensive experiments demonstrate that our method achieves superior performance compared to state-of-the-art approaches in mitigating hallucinations, while maintaining high efficiency with negligible additional time overhead. The code is available at https://github.com/jinghan1he/VHR.</abstract>
@@ -2807,7 +2807,7 @@
       <author><first>Zihan</first><last>Zhang</last></author>
       <author><first>Yuchi</first><last>Liu</last></author>
       <author><first>Quanwei</first><last>Shen</last></author>
-      <author id="fei-liu"><first>Fei</first><last>Liu</last></author>
+      <author><first>Fei</first><last>Liu</last></author>
       <author><first>Yu</first><last>Kuang</last></author>
       <author><first>Jian</first><last>He</last></author>
       <author><first>Conglin</first><last>Liu</last></author>
@@ -2850,7 +2850,7 @@
       <title>Positional Overload: Positional Debiasing and Context Window Extension for Large Language Models using Set Encoding</title>
       <author><first>Lukas</first><last>Kinder</last><affiliation>Karlsruher Institut für Technologie</affiliation></author>
       <author><first>Lukas</first><last>Edman</last><affiliation>Technische Universität München</affiliation></author>
-      <author><first>Alexander</first><last>Fraser</last><affiliation>Technical University of Munich</affiliation></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last><affiliation>Technical University of Munich</affiliation></author>
       <author><first>Tobias</first><last>Käfer</last><affiliation>Karlsruher Institut für Technologie</affiliation></author>
       <pages>3896-3908</pages>
       <abstract>Large Language Models (LLMs) typically track the order of tokens using positional encoding, which causes the following problems: positional bias, where the model is influenced by an ordering within the prompt, and a fixed context window, as models struggle to generalize to positions beyond those encountered during training. To address these limitations, we developed a novel method called <tex-math>\textit{set encoding}</tex-math>. This method allows multiple pieces of text to be encoded in the same position, thereby eliminating positional bias entirely. Another promising use case for set encoding is to increase the size of the input an LLM can handle. Our experiments demonstrate that set encoding allows an LLM to solve tasks with far more tokens than without set encoding. To our knowledge, set encoding is the first technique to effectively extend an LLM’s context window without requiring any additional training.</abstract>
@@ -2998,7 +2998,7 @@
       <author><first>Xuezhi</first><last>Cao</last><affiliation>Meituan</affiliation></author>
       <author><first>Xunliang</first><last>Cai</last><affiliation>Meituan</affiliation></author>
       <author><first>Wen</first><last>Yao</last></author>
-      <author><first>Weinan</first><last>Zhang</last></author>
+      <author id="weinan-zhang"><first>Weinan</first><last>Zhang</last></author>
       <author><first>Xinbing</first><last>Wang</last><affiliation>Shanghai Jiao Tong University</affiliation></author>
       <author><first>Ying</first><last>Wen</last><affiliation>Shanghai Jiao Tong University</affiliation></author>
       <pages>4081-4108</pages>
@@ -3011,7 +3011,7 @@
       <title><fixed-case>T</fixed-case>ok<fixed-case>A</fixed-case>lign: Efficient Vocabulary Adaptation via Token Alignment</title>
       <author><first>Chong</first><last>Li</last><affiliation>Institute of automation, Chinese Academy of Sciences</affiliation></author>
       <author><first>Jiajun</first><last>Zhang</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
-      <author><first>Chengqing</first><last>Zong</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
       <pages>4109-4126</pages>
       <abstract>Tokenization serves as a foundational step for Large Language Models (LLMs) to process text. In new domains or languages, the inefficiency of the tokenizer will slow down the training and generation of LLM. The mismatch in vocabulary also hinders deep knowledge transfer between LLMs like token-level distillation. To mitigate this gap, we propose an efficient method named **TokAlign** to replace the vocabulary of LLM from the token co-occurrences view, and further transfer the token-level knowledge between models. It first aligns the source vocabulary to the target one by learning a one-to-one mapping matrix for token IDs. Model parameters, including embeddings, are rearranged and progressively fine-tuned for the new vocabulary. Our method significantly improves multilingual text compression rates and vocabulary initialization for LLMs, decreasing the perplexity from <tex-math>{3.4e}^{2}</tex-math> of strong baseline methods to <tex-math>{1.2e}^{2}</tex-math> after initialization. Experimental results on models across multiple parameter scales demonstrate the effectiveness and generalization of TokAlign, which costs as few as 5k steps to restore the performance of the vanilla model. After unifying vocabularies between LLMs, token-level distillation can remarkably boost (+4.4% than sentence-level distillation) the base model, costing only 235M tokens.</abstract>
       <url hash="5775d1ee">2025.acl-long.207</url>
@@ -3020,7 +3020,7 @@
     </paper>
     <paper id="208">
       <title><fixed-case>A</fixed-case>da<fixed-case>E</fixed-case>dit: Advancing Continuous Knowledge Editing For Large Language Models</title>
-      <author id="qi-li"><first>Qi</first><last>Li</last></author>
+      <author><first>Qi</first><last>Li</last></author>
       <author><first>Xiaowen</first><last>Chu</last><affiliation>Hong Kong University of Science and Technology (Guangzhou)</affiliation></author>
       <pages>4127-4149</pages>
       <abstract>Knowledge editing (KE) has emerged as a prominent alternative that enables efficient and precise information modification inside language models. However, a critical challenge arises in continuous language models editing — a significant performance decline both in knowledge update and retention when the number of edits increases. By dissecting the perturbation weight of language model in continuous KE, we uncover that disentangled and sparsified knowledge representation can significantly alleviate the performance decline. Building on these insights, we introduce AdaEdit, a novel knowledge editing method. Extensive empirical evaluations on multiple LLMs demonstrate that our proposed methods can enhance the performance of edited LLMs in large-size continuous editing regimes, outperforming existing ones without substantially compromising the general abilities of these models.</abstract>
@@ -3070,7 +3070,7 @@
       <author><first>Zhiyuan</first><last>Zeng</last></author>
       <author><first>Qinyuan</first><last>Cheng</last></author>
       <author><first>Xipeng</first><last>Qiu</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>4203-4233</pages>
       <abstract>Process Reward Models (PRMs) are crucial for guiding Large Language Models (LLMs) in complex scenarios by providing dense reward signals. However, existing PRMs primarily rely on heuristic approaches, which struggle with cross-domain generalization. While LLM-as-judge has been proposed to provide generalized rewards, current research has focused mainly on feedback results, overlooking the meaningful guidance embedded within the text. Additionally, static and coarse-grained evaluation criteria struggle to adapt to complex process supervision. To tackle these challenges, we propose Dynamic and Generalizable Process Reward Modeling (DG-PRM), which features a reward tree to capture and store fine-grained, multi-dimensional reward criteria. DG-PRM dynamically selects reward signals for step-wise reward scoring. To handle multifaceted reward signals, we pioneeringly adopt Pareto dominance estimation to identify discriminative positive and negative pairs. Experimental results show that DG-PRM achieves stunning performance on prevailing benchmarks, significantly boosting model performance across tasks with dense rewards. Further analysis reveals that DG-PRM adapts well to out-of-distribution scenarios, demonstrating exceptional generalizability.</abstract>
       <url hash="b40a2ca8">2025.acl-long.212</url>
@@ -3205,7 +3205,7 @@
       <author><first>Lizhen</first><last>Qu</last><affiliation>Monash University</affiliation></author>
       <author><first>Zhuang</first><last>Li</last><affiliation>Royal Melbourne Institute of Technology</affiliation></author>
       <author><first>Suraj</first><last>Sharma</last><affiliation>Calvin College</affiliation></author>
-      <author><first>Gholamreza</first><last>Haffari</last><affiliation>Monash University, Monash University and Monash University</affiliation></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last><affiliation>Monash University, Monash University and Monash University</affiliation></author>
       <pages>4397-4422</pages>
       <abstract>Opinion survey research is a crucial method used by social scientists for understanding societal beliefs and behaviors. Traditional methodologies often entail high costs and limited scalability, while current automated methods such as opinion synthesis exhibit severe biases and lack traceability. In this paper, we introduce SurveyPilot, a novel finite-state orchestrated agentic framework that automates the collection and analysis of human opinions from social media platforms. SurveyPilot addresses the limitations of pioneering approaches by (i) providing transparency and traceability in each state of opinion collection and (ii) incorporating several techniques for mitigating biases, notably with a novel genetic algorithm for improving result diversity. Our extensive experiments reveal that SurveyPilot achieves a close alignment with authentic survey results across multiple domains, observing average relative improvements of 68,98% and 51,37% when comparing to opinion synthesis and agent-based approaches. Implementation of SurveyPilot is available on https://github.com/thanhpv2102/SurveyPilot.</abstract>
       <url hash="685b0cee">2025.acl-long.221</url>
@@ -3414,7 +3414,7 @@
     </paper>
     <paper id="236">
       <title>Discourse Relation-Enhanced Neural Coherence Modeling</title>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last><affiliation>Heidelberg University</affiliation></author>
+      <author><first>Wei</first><last>Liu</last><affiliation>Heidelberg University</affiliation></author>
       <author><first>Michael</first><last>Strube</last><affiliation>Heidelberg Institute for Theoretical Studies</affiliation></author>
       <pages>4748-4762</pages>
       <abstract>Discourse coherence theories posit relations between text spans as a key feature of coherent texts. However, existing work on coherence modeling has paid little attention to discourse relations. In this paper, we provide empirical evidence to demonstrate that relation features are correlated with text coherence. Then, we investigate a novel fusion model that uses position-aware attention and a visible matrix to combine text- and relation-based features for coherence assessment. Experimental results on two benchmarks show that our approaches can significantly improve baselines, demonstrating the importance of relation features for coherence modeling.</abstract>
@@ -3444,7 +3444,7 @@
       <author><first>Min</first><last>Liu</last><affiliation>Institute of Computing Technology, Chinese Academy of Sciences</affiliation></author>
       <author><first>Zhiyi</first><last>Yin</last><affiliation>, Chinese Academy of Sciences</affiliation></author>
       <author><first>LeiJingyu</first><last>LeiJingyu</last><affiliation>Tsinghua University</affiliation></author>
-      <author id="qi-li"><first>Qi</first><last>Li</last><affiliation>Tsinghua University</affiliation></author>
+      <author><first>Qi</first><last>Li</last><affiliation>Tsinghua University</affiliation></author>
       <pages>4785-4817</pages>
       <abstract>Current studies have exposed the risk of Large Language Models (LLMs) generating harmful content by jailbreak attacks. However, they overlook that the direct generation of harmful content from scratch is more difficult than inducing LLM to calibrate benign content into harmful forms.In our study, we introduce a novel attack framework that exploits <b>A</b>d<b>V</b>ers<b>A</b>rial me<b>TA</b>pho<b>R</b> (<b>AVATAR</b>) to induce the LLM to calibrate malicious metaphors for jailbreaking.Specifically, to answer harmful queries, AVATAR adaptively identifies a set of benign but logically related metaphors as the initial seed.Then, driven by these metaphors, the target LLM is induced to reason and calibrate about the metaphorical content, thus jailbroken by either directly outputting harmful responses or calibrating residuals between metaphorical and professional harmful content.Experimental results demonstrate that AVATAR can effectively and transferably jailbreak LLMs and achieve a state-of-the-art attack success rate across multiple advanced LLMs.</abstract>
       <url hash="d6040265">2025.acl-long.238</url>
@@ -3549,7 +3549,7 @@
     <paper id="246">
       <title><fixed-case>W</fixed-case>arrior<fixed-case>C</fixed-case>oder: Learning from Expert Battles to Augment Code Large Language Models</title>
       <author><first>Huawen</first><last>Feng</last></author>
-      <author id="pu-zhao"><first>Pu</first><last>Zhao</last></author>
+      <author><first>Pu</first><last>Zhao</last></author>
       <author><first>Qingfeng</first><last>Sun</last></author>
       <author><first>Can</first><last>Xu</last></author>
       <author><first>Fangkai</first><last>Yang</last></author>
@@ -3674,7 +3674,7 @@
       <author><first>Yihong</first><last>Liu</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <author><first>Ercong</first><last>Nie</last></author>
       <author><first>Jannik</first><last>Strötgen</last><affiliation>Karlsruhe University of Applied Sciences</affiliation></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <pages>5075-5094</pages>
       <abstract>Multilingual language models (MLMs) store factual knowledge across languages but often struggle to provide consistent responses to semantically equivalent prompts in different languages. While previous studies point out this cross-lingual inconsistency issue, the underlying causes remain unexplored. In this work, we use mechanistic interpretability methods to investigate cross-lingual inconsistencies in MLMs. We find that MLMs encode knowledge in a language-independent concept space through most layers, and only transition to language-specific spaces in the final layers. Failures during the language transition often result in incorrect predictions in the target language, even when the answers are correct in other languages. To mitigate this inconsistency issue, we propose a linear shortcut method that bypasses computations in the final layers, enhancing both prediction accuracy and cross-lingual consistency. Our findings shed light on the internal mechanisms of MLMs and provide a lightweight, effective strategy for producing more consistent factual outputs.</abstract>
       <url hash="fcb4004a">2025.acl-long.253</url>
@@ -3718,8 +3718,8 @@
       <author><first>Wenxuan</first><last>Zhang</last><affiliation>Singapore University of Technology and Design</affiliation></author>
       <author><first>Shuaiyi</first><last>Li</last><affiliation>Chinese University of Hong Kong, The Chinese University of Hong Kong</affiliation></author>
       <author><first>Wenya</first><last>Xie</last><affiliation>University of Minnesota - Twin Cities</affiliation></author>
-      <author><first>See-Kiong</first><last>Ng</last><affiliation>National University of Singapore</affiliation></author>
-      <author><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="see-kiong-ng"><first>See-Kiong</first><last>Ng</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Yang</first><last>Deng</last><affiliation>Singapore Management University</affiliation></author>
       <pages>5131-5157</pages>
       <abstract>Although large language models (LLMs) store vast amount of knowledge in their parameters, they still have limitations in the memorization and utilization of certain knowledge, leading to undesired behaviors such as generating untruthful and inaccurate responses. This highlights the critical need to understand the knowledge boundary of LLMs, a concept that remains inadequately defined in existing research. In this survey, we propose a comprehensive definition of the LLM knowledge boundary and introduce a formalized taxonomy categorizing knowledge into four distinct types. Using this foundation, we systematically review the field through three key lenses: the motivation for studying LLM knowledge boundaries, methods for identifying these boundaries, and strategies for mitigating the challenges they present. Finally, we discuss open challenges and potential research directions in this area. We aim for this survey to offer the community a comprehensive overview, facilitate access to key issues, and inspire further advancements in LLM knowledge research.</abstract>
@@ -3879,7 +3879,7 @@
       <author><first>Zican</first><last>Dong</last><affiliation>Renmin University of China</affiliation></author>
       <author><first>Yang</first><last>Lu</last><affiliation>Cheung Kong Graduate School of business</affiliation></author>
       <author><first>Xu</first><last>Miao</last><affiliation>DataCanvas</affiliation></author>
-      <author><first>Xin</first><last>Zhao</last><affiliation>Renmin University of China</affiliation></author>
+      <author id="wayne-xin-zhao"><first>Xin</first><last>Zhao</last><affiliation>Renmin University of China</affiliation></author>
       <author><first>Ji-Rong</first><last>Wen</last><affiliation>Renmin University of China</affiliation></author>
       <pages>5374-5400</pages>
       <abstract>Due to the immense resource demands and the involved complex techniques, it is still challenging for successfully pre-training a large language models (LLMs) with state-of-the-art performance. In this paper, we explore the key bottlenecks and designs during pre-training, and make the following contributions: (1) a comprehensive investigation into the factors contributing to training instability; (2) a robust optimization approach designed to mitigate training instability effectively; (3) an elaborate data pipeline that integrates data synthesis, data curriculum, and data selection. By integrating the above techniques, we create a rather low-cost training recipe and use it to pre-train YuLan-Mini, a fully-open base model with 2.4B parameters on 1.08T tokens. Remarkably, YuLan-Mini achieves top-tier performance among models of similar parameter scale, with comparable performance to industry-leading models that require significantly more data. To facilitate reproduction, we release the full details of training recipe and data composition. Project details can be accessed at the following link: https://anonymous.4open.science/r/YuLan-Mini/README.md.</abstract>
@@ -4135,7 +4135,7 @@
       <author><first>Zongzhang</first><last>Zhang</last><affiliation>Nanjing University</affiliation></author>
       <author><first>Tao</first><last>Gui</last><affiliation>Fudan University</affiliation></author>
       <author><first>Qi</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>5710-5728</pages>
       <abstract>In Reinforcement Learning from Human Feedback (RLHF), the reward model (RM) evaluates the response quality based on the given context and assigns a reward. It plays a crucial role in aligning RLHF with human preferences. Although the current RM training paradigm concatenates the context and response while amplifying the reward difference between good and bad response pairs, we demonstrate that the RM faces two significant issues: i) it often allocates only a small proportion of attention to the context, and ii) it frequently ignores segments of the context that are relevant for evaluating the response quality. These issues undermine the RM’s effectiveness in modeling human preferences. To further address these challenges, we propose AttnRM, a novel optimization framework that enables the RM to concentrate on crucial segments of the context. Experimental results demonstrate that AttnRM significantly improves preference modeling by increasing attention to relevant information within the context. It also enhances the RM’s generalizability and achieves better performance in aligning with human preferences.</abstract>
       <url hash="d606f64e">2025.acl-long.285</url>
@@ -4195,7 +4195,7 @@
       <author><first>Yutao</first><last>Zhu</last></author>
       <author><first>Jinhao</first><last>Jiang</last></author>
       <author><first>Yingqian</first><last>Min</last></author>
-      <author><first>Xin</first><last>Zhao</last><affiliation>Renmin University of China</affiliation></author>
+      <author id="wayne-xin-zhao"><first>Xin</first><last>Zhao</last><affiliation>Renmin University of China</affiliation></author>
       <author><first>Zhicheng</first><last>Dou</last><affiliation>Renmin University of China</affiliation></author>
       <author><first>Jiaxin</first><last>Mao</last><affiliation>Renmin University of China, Tsinghua University</affiliation></author>
       <author><first>Yankai</first><last>Lin</last><affiliation>Renmin University of China</affiliation></author>
@@ -4221,7 +4221,7 @@
       <author><first>Qing</first><last>Guo</last><affiliation>National University of Singapore and Agency for Science, Technology and Research (A*STAR))</affiliation></author>
       <author><first>Felix</first><last>Juefei-Xu</last><affiliation>GenAI, Meta</affiliation></author>
       <author><first>Jian</first><last>Zhang</last><affiliation>Nanyang Technological University</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>Nanyang Technological University</affiliation></author>
+      <author><first>Yang</first><last>Liu</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Geguang</first><last>Pu</last><affiliation>East China Normal University</affiliation></author>
       <pages>5796-5816</pages>
       <abstract>Universal goal hijacking is a kind of prompt injection attack that forces LLMs to return a target malicious response for arbitrary normal user prompts. The previous methods achieve high attack performance while being too cumbersome and time-consuming. Also, they have concentrated solely on optimization algorithms, overlooking the crucial role of the prompt. To this end, we propose a method called POUGH that incorporates an efficient optimization algorithm and two semantics-guided prompt organization strategies. Specifically, our method starts with a sampling strategy to select representative prompts from a candidate pool, followed by a ranking strategy that prioritizes them. Given the sequentially ranked prompts, our method employs an iterative optimization algorithm to generate a fixed suffix that can concatenate to arbitrary user prompts for universal goal hijacking. Experiments conducted on four popular LLMs and ten types of target responses verified the effectiveness.</abstract>
@@ -4248,12 +4248,12 @@
     </paper>
     <paper id="292">
       <title>What Makes a Good Natural Language Prompt?</title>
-      <author><first>Do Xuan</first><last>Long</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="xuan-long-do"><first>Do Xuan</first><last>Long</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Duy</first><last>Dinh</last><affiliation>FPT Software</affiliation></author>
       <author><first>Ngoc-Hai</first><last>Nguyen</last><affiliation>QualComm</affiliation></author>
       <author><first>Kenji</first><last>Kawaguchi</last><affiliation>National University of Singapore</affiliation></author>
-      <author><first>Nancy F.</first><last>Chen</last></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University and SalesForce.com</affiliation></author>
+      <author id="nancy-chen"><first>Nancy F.</first><last>Chen</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University and SalesForce.com</affiliation></author>
       <author><first>Min-Yen</first><last>Kan</last><affiliation>National University of Singapore</affiliation></author>
       <pages>5835-5873</pages>
       <abstract>As large language models (LLMs) have progressed towards more human-like and human–AI communications prevalent, prompting has emerged as a decisive component. However, there is limited conceptual consensus on what exactly quantifies natural language prompts. We attempt to address this question by conducting a meta-analysis surveying 150+ prompting-related papers from leading NLP and AI conferences (2022–2024), and blogs. We propose a property- and human-centric framework for evaluating prompt quality, encompassing 21 properties categorized into six dimensions. We then examine how existing studies assess their impact on LLMs, revealing their imbalanced support across models and tasks, and substantial research gaps. Further, we analyze correlations among properties in high-quality natural language prompts, deriving prompting recommendations. Finally, we explore multi-property prompt enhancements in reasoning tasks, observing that single-property enhancements often have the greatest impact. Our findings establish a foundation for property-centric prompt evaluation and optimization, bridging the gaps between human–AI communication and opening new prompting research directions.</abstract>
@@ -4357,7 +4357,7 @@
       <author><first>Sheng</first><last>Wang</last></author>
       <author><first>Erxin</first><last>Yu</last><affiliation>Hong Kong Polytechnic University</affiliation></author>
       <author><first>Xuming</first><last>Hu</last><affiliation>The Hong Kong University of Science and Technology (Guangzhou) and Hong Kong University of Science and Technology</affiliation></author>
-      <author><first>Kam-Fai</first><last>Wong</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <pages>6002-6024</pages>
       <abstract>Despite demonstrating impressive capabilities, Large Language Models (LLMs) still often struggle to accurately express the factual knowledge they possess, especially in cases where the LLMs’ knowledge boundaries are ambiguous. To improve LLMs’ factual expressions, we propose the UAlign framework, which leverages Uncertainty estimations to represent knowledge boundaries, and then explicitly incorporates these representations as input features into prompts for LLMs to Align with factual knowledge. First, we prepare the dataset on knowledge question-answering (QA) samples by calculating two uncertainty estimations, including confidence score and semantic entropy, to represent the knowledge boundaries for LLMs. Subsequently, using the prepared dataset, we train a reward model that incorporates uncertainty estimations and then employ the Proximal Policy Optimization (PPO) algorithm for factuality alignment on LLMs. Experimental results indicate that, by integrating uncertainty representations in LLM alignment, the proposed UAlign can significantly enhance the LLMs’ capacities to confidently answer known questions and refuse unknown questions on both in-domain and out-of-domain tasks, showing reliability improvements and good generalizability over various prompt- and training-based baselines.</abstract>
       <url hash="0392c800">2025.acl-long.299</url>
@@ -4554,7 +4554,7 @@
       <author><first>Bingcheng</first><last>Liu</last></author>
       <author><first>Jie</first><last>Wu</last></author>
       <author><first>Renxiang</first><last>Li</last></author>
-      <author><first>Philip S.</first><last>Yu</last><affiliation>University of Illinois Chicago</affiliation></author>
+      <author id="philip-s-yu"><first>Philip S.</first><last>Yu</last><affiliation>University of Illinois Chicago</affiliation></author>
       <pages>6240-6254</pages>
       <abstract>Contract review is a critical process to protect the rights and interests of the parties involved. However, this process is time-consuming, labor-intensive, and costly, especially when a contract faces multiple rounds of review. To accelerate the contract review and promote the completion of transactions, this paper introduces a novel benchmark of legal provision recommendation and conflict detection for contract auto-reviewing (ProvBench), which aims to recommend the legal provisions related to contract clauses and detect possible legal conflicts. Specifically, we construct the first Legal Provision Recommendation Dataset: ProvData, which covers 8 common contract types. In addition, we conduct extensive experiments to evaluate ProvBench on various state-of-the-art models. Experimental results validate the feasibility of ProvBench and demonstrate the effectiveness of ProvData. Finally, we identify potential challenges in the ProvBench and advocate for further investigation.</abstract>
       <url hash="96fb473b">2025.acl-long.312</url>
@@ -4630,7 +4630,7 @@
       <author><first>Xun</first><last>Wang</last></author>
       <author><first>Si-Qing</first><last>Chen</last></author>
       <author><first>Michael J.</first><last>Wooldridge</last></author>
-      <author><first>Janet B.</first><last>Pierrehumbert</last><affiliation>University of Oxford</affiliation></author>
+      <author id="janet-pierrehumbert"><first>Janet B.</first><last>Pierrehumbert</last><affiliation>University of Oxford</affiliation></author>
       <author><first>Furu</first><last>Wei</last><affiliation>Microsoft Research</affiliation></author>
       <pages>6317-6342</pages>
       <abstract>Language is not monolithic. While benchmarks, including those designed for multiple languages, are often used as proxies to evaluate the performance of Large Language Models (LLMs), they tend to overlook the nuances of within-language variation and thus fail to model the experience of speakers of non-standard dialects. Focusing on African American Vernacular English (AAVE), we present the first study aimed at objectively assessing the fairness and robustness of LLMs in handling dialects across canonical reasoning tasks, including algorithm, math, logic, and integrated reasoning. We introduce **ReDial** (**Re**asoning with **Dial**ect Queries), a benchmark containing 1.2K+ parallel query pairs in Standardized English and AAVE. We hire AAVE speakers, including experts with computer science backgrounds, to rewrite seven popular benchmarks,such as HumanEval and GSM8K. With ReDial, we evaluate widely used LLMs, including GPT, Claude, Llama, Mistral, and the Phi model families. Our findings reveal that <b>almost all of these widely used models show significant brittleness and unfairness to queries in AAVE</b>. Our work establishes a systematic and objective framework for analyzing LLM bias in dialectal queries. Moreover, it highlights how mainstream LLMs provide unfair service to dialect speakers in reasoning tasks, laying a critical foundation for future research.</abstract>
@@ -4644,7 +4644,7 @@
       <author><first>Yuepei</first><last>Li</last></author>
       <author><first>Qiao</first><last>Qiao</last><affiliation>Iowa State University</affiliation></author>
       <author><first>Kang</first><last>Zhou</last><affiliation>Amazon</affiliation></author>
-      <author id="qi-li"><first>Qi</first><last>Li</last><affiliation>Iowa State University</affiliation></author>
+      <author><first>Qi</first><last>Li</last><affiliation>Iowa State University</affiliation></author>
       <pages>6343-6354</pages>
       <abstract>Open Relation Extraction (OpenRE) seeks to identify and extract novel relational facts between named entities from unlabeled data without pre-defined relation schemas. Traditional OpenRE methods typically assume that the unlabeled data consists solely of novel relations or is pre-divided into known and novel instances. However, in real-world scenarios, novel relations are arbitrarily distributed. In this paper, we propose a generalized OpenRE setting that considers unlabeled data as a mixture of both known and novel instances. To address this, we propose MixORE, a two-phase framework that integrates relation classification and clustering to jointly learn known and novel relations. Experiments on three benchmark datasets demonstrate that MixORE consistently outperforms competitive baselines in known relation classification and novel relation clustering. Our findings contribute to the advancement of generalized OpenRE research and real-world applications.</abstract>
       <url hash="b04f480f">2025.acl-long.318</url>
@@ -4696,7 +4696,7 @@
       <author><first>Lingrui</first><last>Mei</last><affiliation>Skywork AI</affiliation></author>
       <author><first>Wenjie</first><last>Feng</last><affiliation>University of Science and Technology of China</affiliation></author>
       <author><first>Lizhe</first><last>Chen</last></author>
-      <author><first>Xueqi</first><last>Cheng</last><affiliation>Institute of Computing Technology, Chinese Academy</affiliation></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last><affiliation>Institute of Computing Technology, Chinese Academy</affiliation></author>
       <pages>6404-6420</pages>
       <abstract>Large language models (LLMs) have achieved significant success in reasoning tasks, including mathematical reasoning and logical deduction. Among these reasoning tasks, graph problems stand out due to their complexity and unique structural characteristics, attracting considerable attention from researchers. Previous studies have explored LLMs’ graph reasoning abilities through various techniques, such as different encoding methods for graph structures and the use of carefully designed prompts. However, a critical factor has been mostly overlooked: the prompt sequential order in which graph descriptions are presented to the models. In this study, we present the first comprehensive analysis of how the order of graph descriptions impacts LLM performance. Specifically, we comprehensively evaluate four graph description orders across six graph problems using six mainstream LLMs. The results reveal that: (1) ordered graph descriptions significantly improve LLMs’ comprehension of graph structures; (2) the robustness of LLMs to graph description order varies across different tasks; and (3) the impact of graph order on performance is closely related to the inherent characteristics of tasks. This study provides a critical advancement in the application of LLMs for solving graph-related problems, paving the way for future research to optimize model performance through strategic graph description ordering.</abstract>
       <url hash="bec6366f">2025.acl-long.321</url>
@@ -4727,7 +4727,7 @@
       <author><first>Hongzhi</first><last>Zhang</last><affiliation>Kuaishou- 快手科技</affiliation></author>
       <author><first>V.</first><last>W.</last><affiliation>The Hong Kong University of Science and Technology</affiliation></author>
       <author><first>Fuzheng</first><last>Zhang</last></author>
-      <author><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
       <pages>6435-6462</pages>
       <abstract>Video captioning can be used to assess the video understanding capabilities of Multimodal Large Language Models (MLLMs).However, existing benchmarks and evaluation protocols suffer from crucial issues, such as inadequate or homogeneous creation of key points, exorbitant cost of data creation, and limited evaluation scopes. To address these issues, we propose an automatic framework, named AutoCaption, which leverages Monte Carlo Tree Search (MCTS) to construct numerous and diverse descriptive sentences (<i>i.e.</i>, key points) that thoroughly represent video content in an iterative way. This iterative captioning strategy enables the continuous enhancement of video details such as actions, objects’ attributes, environment details, etc. We apply AutoCaption to curate MCTS-VCB, a fine-grained video caption benchmark covering video details, thereby enabling a comprehensive evaluation of MLLMs on the video captioning task. We evaluate more than 20 open- and closed-source MLLMs of varying sizes on MCTS-VCB. Results show that MCTS-VCB can effectively and comprehensively evaluate the video captioning capability, with Gemini-1.5-Pro achieving the highest F1 score of 71.2. Interestingly, we fine-tune InternVL2.5-8B with the AutoCaption-generated data, which helps the model achieve an overall improvement of 25.0% on MCTS-VCB and 16.3% on DREAM-1K, further demonstrating the effectiveness of AutoCaption. The code and data are available at <url>https://github.com/tjunlp-lab/MCTS-VCB</url>.</abstract>
       <url hash="c1aee1e5">2025.acl-long.323</url>
@@ -4825,7 +4825,7 @@
       <author><first>Soda Marem</first><last>Lo</last></author>
       <author><first>Oscar</first><last>Araque</last><affiliation>Universidad Politécnica de Madrid</affiliation></author>
       <author><first>Rajesh</first><last>Sharma</last><affiliation>institute of computer science, University of Tartu</affiliation></author>
-      <author><first>Marco Antonio</first><last>Stranisci</last></author>
+      <author id="marco-antonio-stranisci"><first>Marco Antonio</first><last>Stranisci</last></author>
       <pages>6625-6639</pages>
       <abstract>Canceling is a morally-driven phenomenon that hinders the development of safe social media platforms and contributes to ideological polarization. To address this issue we present the Canceling Attitudes Detection (CADE) dataset, an annotated corpus of canceling incidents aimed at exploring the factors of disagreements in evaluating people’s canceling attitudes on social media. Specifically, we study the impact of annotators’ morality in their perception of canceling, showing that morality is an independent axis for the explanation of disagreement on this phenomenon. Annotator’s judgments heavily depend on the type of controversial events and involved celebrities. This shows the need to develop more event-centric datasets to better understand how harms are perpetrated in social media and to develop more aware technologies for their detection.</abstract>
       <url hash="dcb986cc">2025.acl-long.330</url>
@@ -4871,7 +4871,7 @@
       <author id="chaoqun-liu-ntu"><first>Chaoqun</first><last>Liu</last></author>
       <author><first>Lidong</first><last>Bing</last><affiliation>Shanda Group and Alibaba Group</affiliation></author>
       <author><first>Deli</first><last>Zhao</last><affiliation>Alibaba Group</affiliation></author>
-      <author><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Yu</first><last>Rong</last><affiliation>Alibaba Group</affiliation></author>
       <pages>6685-6715</pages>
       <abstract>Many challenging reasoning tasks require not just rapid, intuitive responses, but a more deliberate, multi-step approach. Recent progress in large language models (LLMs) highlights an important shift from the “System 1” way of quick reactions to the “System 2” style of reflection-and-correction problem solving. However, current benchmarks heavily rely on the final-answer accuracy, leaving much of a model’s intermediate reasoning steps unexamined. This fails to assess the model’s ability to reflect and rectify mistakes within the reasoning process. To bridge this gap, we introduce FINEREASON, a logic-puzzle benchmark for systematic evaluation of LLMs’ reasoning capabilities. Each puzzle can be decomposed into atomic steps, making it ideal for rigorous validation of intermediate correctness. Building on this, we introduce two tasks: state checking and state transition, for a comprehensive evaluation of how models assess the current situation and plan the next move. To support broader research, we also provide a puzzle training set aimed at enhancing general reasoning. We show that models trained on our state checking and transition data demonstrate gains in mathematical reasoning by up to 5.1%.</abstract>
@@ -4949,7 +4949,7 @@
       <author id="xiaolei-wang-fudan"><first>Xiaolei</first><last>Wang</last><affiliation>Renmin University of China</affiliation></author>
       <author><first>Zhihao</first><last>Lv</last></author>
       <author><first>Yingqian</first><last>Min</last></author>
-      <author><first>Xin</first><last>Zhao</last><affiliation>Renmin University of China</affiliation></author>
+      <author id="wayne-xin-zhao"><first>Xin</first><last>Zhao</last><affiliation>Renmin University of China</affiliation></author>
       <author><first>Binbin</first><last>Hu</last><affiliation>Ant Group</affiliation></author>
       <author><first>Ziqi</first><last>Liu</last><affiliation>Ant Group</affiliation></author>
       <author><first>Zhiqiang</first><last>Zhang</last><affiliation>Ant Group</affiliation></author>
@@ -5037,7 +5037,7 @@
       <author><first>Minghao</first><last>Yuan</last><affiliation>Peking University</affiliation></author>
       <author><first>Qianqian</first><last>Ju</last></author>
       <author><first>Yujia</first><last>Peng</last><affiliation>Peking University</affiliation></author>
-      <author><first>Kenny Q.</first><last>Zhu</last><affiliation>University of Texas at Arlington</affiliation></author>
+      <author id="kenny-zhu"><first>Kenny Q.</first><last>Zhu</last><affiliation>University of Texas at Arlington</affiliation></author>
       <author><first>Mengyue</first><last>Wu</last><affiliation>Shanghai Jiaotong University</affiliation></author>
       <pages>6950-6965</pages>
       <abstract>Social media platforms possess considerable potential in the realm of exploring mental health. Previous research has indicated that major life events can greatly impact individuals’ mental health. However, due to the complexity and ambiguity nature of life events, shedding its light on social media data is quite challenging. In this paper, we are dedicated to uncovering life events mentioned in posts on social media. We hereby provide a carefully-annotated social media event dataset, PsyEvent, which encompasses 12 major life event categories that are likely to occur in everyday life. This dataset is human-annotated under iterative procedure and boasts a high level of quality. Furthermore, by applying the life events extracted from posts to downstream tasks such as early risk detection of depression and suicide risk prediction, we have observed a considerable improvement in performance. This suggests that extracting life events from social media can be beneficial for the analysis of individuals’ mental health.</abstract>
@@ -5049,7 +5049,7 @@
       <title><fixed-case>C</fixed-case>ontrol<fixed-case>S</fixed-case>peech: Towards Simultaneous and Independent Zero-shot Speaker Cloning and Zero-shot Language Style Control</title>
       <author><first>Shengpeng</first><last>Ji</last></author>
       <author><first>Qian</first><last>Chen</last></author>
-      <author><first>Wen</first><last>Wang</last></author>
+      <author id="wen-wang"><first>Wen</first><last>Wang</last></author>
       <author><first>Jialong</first><last>Zuo</last></author>
       <author><first>Minghui</first><last>Fang</last></author>
       <author><first>Ziyue</first><last>Jiang</last></author>
@@ -5148,7 +5148,7 @@
       <author><first>Qiang</first><last>Liu</last><affiliation>Institute of Automation, Chinese Academy of Sciences</affiliation></author>
       <author><first>Shu</first><last>Wu</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
       <author><first>Liang</first><last>Wang</last><affiliation>Institute of Automation, CAS,China</affiliation></author>
-      <author><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
       <pages>7128-7141</pages>
       <abstract>Personalized text generation aims to infer users’ writing style preferences from their historical texts and generate outputs that faithfully reflect these stylistic characteristics. Existing solutions primarily adopt two paradigms: retrieval-augmented generation (RAG) and parameter-efficient fine-tuning (PEFT). While these approaches have advanced the field, they suffer from two critical limitations: (1) the entanglement of content semantics and stylistic patterns in historical texts impedes accurate modeling of user-specific writing preferences; and (2) scalability challenges arising from both RAG’s inference latency by retrieval operations and PEFT’s parameter storage requirements for per user model. To overcome these limitations, we propose StyleVector, a training-free framework that disentangles and represents personalized writing style as a vector in LLM’s activation space, enabling style-steered generation during inference without requiring costly retrieval or parameter storage. Comprehensive experiments demonstrate that our framework achieves a significant 8% relative improvement in personalized generation while reducing storage requirements by 1700 <tex-math>\times</tex-math> over PEFT method.</abstract>
       <url hash="3db6c8d6">2025.acl-long.353</url>
@@ -5179,7 +5179,7 @@
       <author><first>Sachin</first><last>Kumar</last><affiliation>Ohio State University, Columbus</affiliation></author>
       <author><first>Valentina</first><last>Pyatkin</last><affiliation>Allen Institute for Artificial Intelligence and Department of Computer Science</affiliation></author>
       <author><first>Faeze</first><last>Brahman</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
-      <author><first>Noah A.</first><last>Smith</last><affiliation>University of Washington and Allen Institute for Artificial Intelligence</affiliation></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last><affiliation>University of Washington and Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last></author>
       <author><first>Pradeep</first><last>Dasigi</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <pages>7162-7200</pages>
@@ -5195,7 +5195,7 @@
       <author><first>Tian</first><last>Lan</last></author>
       <author><first>Tong</first><last>Zhang</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <author><first>Yu-Shi</first><last>Zhu</last></author>
-      <author><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <pages>7201-7218</pages>
       <abstract>Automatic evaluation for Open Domain Event Detection (ODED) is a highly challenging task, because ODED is characterized by a vast diversity of un-constrained output labels from various domains. Nearly all existing evaluation methods for ODED usually first construct evaluation benchmarks with limited labels and domain coverage, and then evaluate ODED methods using metrics based on token-level label matching rules. However, this kind of evaluation framework faces two issues: (1) The limited evaluation benchmarks lack representatives of the real world, making it difficult to accurately reflect the performance of various ODED methods in real-world scenarios; (2) Evaluation metrics based on token-level matching rules fail to capture semantic similarity between predictions and golden labels. To address these two problems above, we propose a scalable and reliable Semantic-level Evaluation framework for Open domain Event detection (SEOE) by constructing a more representative evaluation benchmark and introducing a semantic evaluation metric. Specifically, our proposed framework first constructs a scalable evaluation benchmark that currently includes 564 event types covering 7 major domains, with a cost-effective supplementary annotation strategy to ensure the benchmark’s representativeness. The strategy also allows for the supplement of new event types and domains in the future. Then, the proposed SEOE leverages large language models (LLMs) as automatic evaluation agents to compute a semantic F1-score, incorporating fine-grained definitions of semantically similar labels to enhance the reliability of the evaluation. Extensive experiments validate the representatives of the benchmark and the reliability of the semantic evaluation metric. Existing ODED methods are thoroughly evaluated, and the error patterns of predictions are analyzed, revealing several insightful findings.</abstract>
       <url hash="7dfca4cd">2025.acl-long.356</url>
@@ -5276,7 +5276,7 @@
       <title>Rolling the <fixed-case>DICE</fixed-case> on Idiomaticity: How <fixed-case>LLM</fixed-case>s Fail to Grasp Context</title>
       <author><first>Maggie</first><last>Mi</last></author>
       <author><first>Aline</first><last>Villavicencio</last><affiliation>University of Exeter and University of Sheffield</affiliation></author>
-      <author><first>Nafise Sadat</first><last>Moosavi</last><affiliation>University of Sheffield</affiliation></author>
+      <author id="nafise-sadat-moosavi"><first>Nafise Sadat</first><last>Moosavi</last><affiliation>University of Sheffield</affiliation></author>
       <pages>7314-7332</pages>
       <abstract>Human processing of idioms heavily depends on interpreting the surrounding context in which they appear. While large language models (LLMs) have achieved impressive performance on idiomaticity detection benchmarks, this success may be driven by reasoning shortcuts present in existing datasets. To address this, we introduce a novel, controlled contrastive dataset (DICE) specifically designed to assess whether LLMs can effectively leverage context to disambiguate idiomatic meanings. Furthermore, we investigate the influence of collocational frequency and sentence probability—proxies for human processing known to affect idiom resolution—on model performance. Our results show that LLMs frequently fail to resolve idiomaticity when it depends on contextual understanding, performing better on sentences deemed more likely by the model. Additionally, idiom frequency influences performance but does not guarantee accurate interpretation. Our findings emphasize the limitations of current models in grasping contextual meaning and highlight the need for more context-sensitive evaluation.</abstract>
       <url hash="5bc20d51">2025.acl-long.362</url>
@@ -5302,7 +5302,7 @@
       <title>The Cross-linguistic Role of <fixed-case>A</fixed-case>nimacy in Grammar Structures</title>
       <author><first>Nina</first><last>Gregorio</last><affiliation>University of Edinburgh, University of Edinburgh</affiliation></author>
       <author><first>Matteo</first><last>Gay</last></author>
-      <author><first>Sharon</first><last>Goldwater</last><affiliation>University of Edinburgh</affiliation></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last><affiliation>University of Edinburgh</affiliation></author>
       <author><first>Edoardo</first><last>Ponti</last><affiliation>University of Edinburgh</affiliation></author>
       <pages>7349-7363</pages>
       <abstract>Animacy is a semantic feature of nominals and follows a hierarchy: personal pronouns &gt; human &gt; animate &gt; inanimate. In several languages, animacy imposes hard constraints on grammar. While it has been argued that these constraints may emerge from universal soft tendencies, it has been difficult to provide empirical evidence for this conjecture due to the lack of data annotated with animacy classes. In this work, we first propose a method to reliably classify animacy classes of nominals in 11 languages from 5 families, leveraging multilingual large language models (LLMs) and word sense disambiguation datasets. Then, through this newly acquired data, we verify that animacy displays consistent cross-linguistic tendencies in terms of preferred morphosyntactic constructions, although not always in line with received wisdom: animacy in nouns correlates with the alignment role of agent, early positions in a clause, and syntactic pivot (e.g., for relativisation), but not necessarily with grammatical subjecthood. Furthermore, the behaviour of personal pronouns in the hierarchy is idiosyncratic as they are rarely plural and relativised, contrary to high-animacy nouns.</abstract>
@@ -5444,7 +5444,7 @@
       <title>Alleviating Distribution Shift in Synthetic Data for Machine Translation Quality Estimation</title>
       <author><first>Xiang</first><last>Geng</last></author>
       <author><first>Zhejian</first><last>Lai</last></author>
-      <author><first>Jiajun</first><last>Chen</last><affiliation>Nanjing University</affiliation></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last><affiliation>Nanjing University</affiliation></author>
       <author><first>Hao</first><last>Yang</last><affiliation>Huawei Technologies Ltd.</affiliation></author>
       <author><first>Shujian</first><last>Huang</last><affiliation>Nanjing University</affiliation></author>
       <pages>7546-7560</pages>
@@ -5494,7 +5494,7 @@
       <author><first>Yuzhuang</first><last>Xu</last></author>
       <author><first>Xiaolong</first><last>Wang</last><affiliation>Tsinghua University</affiliation></author>
       <author><first>Peng</first><last>Li</last><affiliation>Tsinghua University</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <pages>7605-7633</pages>
       <abstract>Active perception, a crucial human capability, involves setting a goal based on the current understanding of the environment and performing actions to achieve that goal. Despite significant efforts in evaluating Multimodal Large Language Models (MLLMs), active perception has been largely overlooked. To address this gap, we propose a novel benchmark named ActiView to evaluate active perception in MLLMs. We focus on a specialized form of Visual Question Answering (VQA) that eases and quantifies the evaluation yet challenging for existing MLLMs. Meanwhile, intermediate reasoning behaviors of models are also discussed. Given an image, we restrict the perceptual field of a model, requiring it to actively zoom or shift its perceptual field based on reasoning to answer the question successfully. We conduct extensive evaluation over 30 models, including proprietary and open-source models, and observe that restricted perceptual fields play a significant role in enabling active perception. Results reveal a significant gap in the active perception capability of MLLMs, indicating that this area deserves more attention. We hope that ActiView could help develop methods for MLLMs to understand multimodal inputs in more natural and holistic ways.</abstract>
       <url hash="d6c0a6c2">2025.acl-long.376</url>
@@ -5610,7 +5610,7 @@
       <author><first>Hyokun</first><last>Yun</last><affiliation>Amazon</affiliation></author>
       <author><first>Ming</first><last>Zeng</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Pei</first><last>Chen</last><affiliation>Texas A&amp;M University - College Station</affiliation></author>
-      <author id="zhihan-zhang"><first>Zhihan</first><last>Zhang</last></author>
+      <author><first>Zhihan</first><last>Zhang</last></author>
       <author><first>Yifan</first><last>Gao</last><affiliation>Amazon</affiliation></author>
       <author><first>Ruijie</first><last>Wang</last></author>
       <author><first>Priyanka</first><last>Nigam</last></author>
@@ -5651,7 +5651,7 @@
     <paper id="387">
       <title><fixed-case>S</fixed-case>pa<fixed-case>RE</fixed-case>: Enhancing Spatial Reasoning in Vision-Language Models with Synthetic Data</title>
       <author><first>Michael</first><last>Ogezi</last></author>
-      <author><first>Freda</first><last>Shi</last><affiliation>University of Waterloo and Vector Institute</affiliation></author>
+      <author id="freda-shi"><first>Freda</first><last>Shi</last><affiliation>University of Waterloo and Vector Institute</affiliation></author>
       <pages>7855-7875</pages>
       <abstract>Vision-language models (VLMs) work well in tasks ranging from image captioning to visual question answering (VQA), yet they struggle with spatial reasoning, a key skill for understanding our physical world that humans excel at. We find that spatial relations are generally rare in widely used VL datasets, with only a few being well represented, while most form a long tail of underrepresented relations. This gap leaves VLMs ill-equipped to handle diverse spatial relationships. To bridge it, we construct a synthetic VQA dataset focused on spatial reasoning generated from hyper-detailed image descriptions in Localized Narratives, DOCCI, and PixMo-Cap. Our dataset consists of 455k samples containing 3.4 million QA pairs. Trained on this dataset, our Spatial-Reasoning Enhanced (SpaRE) VLMs show strong improvements on spatial reasoning benchmarks, achieving up to a 49% performance gain on the What’s Up benchmark, while maintaining strong results on general tasks. Our work narrows the gap between human and VLM spatial reasoning and makes VLMs more capable in real-world tasks such as robotics and navigation. We plan to share our code and dataset in due course.</abstract>
       <url hash="17c687c2">2025.acl-long.387</url>
@@ -5815,7 +5815,7 @@
       <author><first>Arya</first><last>Talebzadeh</last><affiliation>Meta</affiliation></author>
       <author><first>Sinong</first><last>Wang</last><affiliation>Facebook</affiliation></author>
       <author><first>Han</first><last>Fang</last><affiliation>Meta AI</affiliation></author>
-      <author><first>Carolyn</first><last>Rose</last><affiliation>School of Computer Science, Carnegie Mellon University</affiliation></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rose</last><affiliation>School of Computer Science, Carnegie Mellon University</affiliation></author>
       <author><first>Daniel</first><last>Fried</last><affiliation>Meta AI and Carnegie Mellon University</affiliation></author>
       <author><first>Hejia</first><last>Zhang</last><affiliation>Facebook</affiliation></author>
       <pages>8140-8155</pages>
@@ -5876,7 +5876,7 @@
       <author><first>Xuhao</first><last>Hu</last><affiliation>Fudan University and Shanghai AI Laboratory</affiliation></author>
       <author><first>Dongrui</first><last>Liu</last><affiliation>Shanghai Artificial Intelligence Laboratory</affiliation></author>
       <author><first>Hao</first><last>Li</last><affiliation>Beijing University of Aeronautics and Astronautics</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Jing</first><last>Shao</last><affiliation>Shanghai AI Laboratory</affiliation></author>
       <pages>8285-8316</pages>
       <abstract>Safety concerns of Multimodal large language models (MLLMs) have gradually become an important problem in various applications. Surprisingly, previous works indicate a counterintuitive phenomenon that using textual unlearning to align MLLMs achieves comparable safety performances with MLLMs aligned with image-text pairs. To explain such a phenomenon, we discover a <tex-math>\textit{\textbf{V}isual \textbf{S}afety \textbf{I}nformation \textbf{L}eakage} (\textbf{VSIL})</tex-math> problem in existing multimodal safety benchmarks, <tex-math>\textit{i.e.}</tex-math>, the potentially risky content in the image has been revealed in the textual query. Thus, MLLMs can easily refuse these sensitive image-text pairs according to textual queries only, leading to <b>unreliable cross-modality safety evaluation of MLLMs</b>. We also conduct a further comparison experiment between textual alignment and multimodal alignment to highlight this drawback. To this end, we construct <tex-math>\textit{\textbf{V}isual \textbf{L}eakless \textbf{S}afety \textbf{B}ench} (\textbf{VLSBench})</tex-math> with 2.2k image-text pairs through an automated data pipeline. Experimental results indicate that VLSBench poses a significant challenge to both open-source and close-source MLLMs, <tex-math>\textit{i.e.}</tex-math>, LLaVA, Qwen2-VL and GPT-4o. Besides, we empirically compare textual and multimodal alignment methods on VLSBench and find that textual alignment is effective enough for multimodal safety scenarios with VSIL, while multimodal alignment is preferable for safety scenarios without VSIL.</abstract>
@@ -5964,7 +5964,7 @@
       <author><first>Zitai</first><last>Qiu</last></author>
       <author><first>Qing</first><last>Li</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <author><first>Hu</first><last>Wang</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <pages>8402-8415</pages>
       <abstract>Social event detection involves identifying and categorizing important events from social media, which relies on labeled data, but annotation is costly and labor-intensive. To address this problem, we propose Augmentation framework for Social Event Detection (SED-Aug), a plug-and-play dual augmentation framework, which combines explicit text-based and implicit feature-space augmentation to enhance data diversity and model robustness. The explicit augmentation utilizes LLMs to enhance textual information through five diverse generation strategies. For implicit augmentation, we design five novel perturbation techniques that operate in the feature space on structural fused embeddings. These perturbations are crafted to keep the semantic and relational properties of the embeddings and make them more diverse. Specifically, SED-Aug outperforms the best baseline model by approximately 17.67% on the Twitter2012 dataset and by about 15.57% on the Twitter2018 dataset in terms of the average F1 score.</abstract>
       <url hash="42ad392f">2025.acl-long.412</url>
@@ -6009,7 +6009,7 @@
     <paper id="415">
       <title>Unanswerability Evaluation for Retrieval Augmented Generation</title>
       <author><first>Xiangyu</first><last>Peng</last><affiliation>Salesforce AI Research</affiliation></author>
-      <author><first>Prafulla Kumar</first><last>Choubey</last><affiliation>SalesForce.com</affiliation></author>
+      <author id="prafulla-kumar-choubey"><first>Prafulla Kumar</first><last>Choubey</last><affiliation>SalesForce.com</affiliation></author>
       <author><first>Caiming</first><last>Xiong</last><affiliation>Salesforce Research</affiliation></author>
       <author><first>Chien-Sheng</first><last>Wu</last><affiliation>Salesforce AI</affiliation></author>
       <pages>8452-8472</pages>
@@ -6103,7 +6103,7 @@
       <author><first>Zhaochen</first><last>Hong</last></author>
       <author><first>Xiaocheng</first><last>Yang</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
       <author><first>Shuyi</first><last>Guo</last></author>
-      <author><first>Zhe</first><last>Wang</last></author>
+      <author id="daisy-zhe-wang"><first>Zhe</first><last>Wang</last></author>
       <author><first>Zhenhailong</first><last>Wang</last></author>
       <author><first>Cheng</first><last>Qian</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
       <author><first>Robert</first><last>Tang</last></author>
@@ -6125,7 +6125,7 @@
       <author><first>Isuri</first><last>Nanomi Arachchige</last></author>
       <author><first>Alistair</first><last>Plum</last><affiliation>University of Luxembourg</affiliation></author>
       <author><first>Paul</first><last>Rayson</last><affiliation>Lancaster University</affiliation></author>
-      <author><first>Ruslan</first><last>Mitkov</last><affiliation>Lancaster University</affiliation></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last><affiliation>Lancaster University</affiliation></author>
       <pages>8623-8636</pages>
       <abstract>Recently, language models (LMs) have produced excellent results in many natural language processing (NLP) tasks. However, their effectiveness is highly dependent on available pre-training resources, which is particularly challenging for low-resource languages such as Sinhala. Furthermore, the scarcity of benchmarks to evaluate LMs is also a major concern for low-resource languages. In this paper, we address these two challenges for Sinhala by (i) collecting the largest monolingual corpus for Sinhala, (ii) training multiple LMs on this corpus and (iii) compiling the first Sinhala NLP benchmark (Sinhala-GLUE) and evaluating LMs on it. We show the Sinhala LMs trained in this paper outperform the popular multilingual LMs, such as XLM-R and existing Sinhala LMs in downstream NLP tasks. All the trained LMs are publicly available. We also make Sinhala-GLUE publicly available as a public leaderboard, and we hope that it will enable further advancements in developing and evaluating LMs for Sinhala.</abstract>
       <url hash="fc10167a">2025.acl-long.422</url>
@@ -6138,7 +6138,7 @@
       <author><first>Veronika</first><last>Makarova</last><affiliation>University of Saskatchewan</affiliation></author>
       <author><first>Zhi</first><last>Li</last></author>
       <author><first>Jordan</first><last>Kodner</last><affiliation>State University of New York, Stony Brook</affiliation></author>
-      <author><first>Owen</first><last>Rambow</last><affiliation>Stony Brook University</affiliation></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last><affiliation>Stony Brook University</affiliation></author>
       <pages>8637-8663</pages>
       <abstract>The paper explores the performance of LLMs in the context of multi-dimensional analytic writing assessments, i.e. their ability to provide both scores and comments based on multiple assessment criteria. Using a corpus of literature reviews written by L2 graduate students and assessed by human experts against 9 analytic criteria, we prompt several popular LLMs to perform the same task under various conditions. To evaluate the quality of feedback comments, we apply a novel feedback comment quality evaluation framework. This framework is interpretable, cost-efficient, scalable, and reproducible, compared to existing methods that rely on manual judgments. We find that LLMs can generate reasonably good and generally reliable multi-dimensional analytic assessments. We release our corpus and code for reproducibility.</abstract>
       <url hash="010414e6">2025.acl-long.423</url>
@@ -6166,12 +6166,12 @@
       <author><first>Yuting</first><last>Li</last><affiliation>Universität Köln</affiliation></author>
       <author><first>Wei</first><last>Zhou</last></author>
       <author><first>Ziwei</first><last>Gong</last><affiliation>Columbia University</affiliation></author>
-      <author><first>Yang Janet</first><last>Liu</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
+      <author id="yang-janet-liu"><first>Yang Janet</first><last>Liu</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <author><first>Katja</first><last>Jasinskaja</last><affiliation>Universität Köln</affiliation></author>
       <author><first>Annemarie</first><last>Friedrich</last><affiliation>University of Augsburg</affiliation></author>
-      <author><first>Julia</first><last>Hirschberg</last><affiliation>Columbia University</affiliation></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last><affiliation>Columbia University</affiliation></author>
       <author><first>Frauke</first><last>Kreuter</last><affiliation>University of Maryland</affiliation></author>
-      <author><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <pages>8679-8696</pages>
       <abstract>Understanding pragmatics—the use of language in context—is crucial for developing NLP systems capable of interpreting nuanced language use. Despite recent advances in language technologies, including large language models, evaluating their ability to handle pragmatic phenomena such as implicatures and references remains challenging. To advance pragmatic abilities in models, it is essential to understand current evaluation trends and identify existing limitations. In this survey, we provide a comprehensive review of resources designed for evaluating pragmatic capabilities in NLP, categorizing datasets by the pragmatic phenomena they address. We analyze task designs, data collection methods, evaluation approaches, and their relevance to real-world applications. By examining these resources in the context of modern language models, we highlight emerging trends, challenges, and gaps in existing benchmarks. Our survey aims to clarify the landscape of pragmatic evaluation and guide the development of more comprehensive and targeted benchmarks, ultimately contributing to more nuanced and context-aware NLP models.</abstract>
       <url hash="90b1b595">2025.acl-long.425</url>
@@ -6231,7 +6231,7 @@
       <author><first>Yihong</first><last>Liu</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <author><first>Peiqin</first><last>Lin</last><affiliation>Institut für Informatik</affiliation></author>
       <author><first>François</first><last>Yvon</last><affiliation>ISIR, Sorbonne Université &amp; CNRS</affiliation></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <pages>8767-8788</pages>
       <abstract>In-context machine translation (MT) with large language models (LLMs) is a promising approach for low-resource MT, as it can readily take advantage of linguistic resources such as grammar books and dictionaries.Such resources are usually selectively integrated into the prompt so that LLMs can directly perform translation without any specific training, via their in-context learning capability (ICL).However, the relative importance of each type of resource, e.g., dictionary, grammar book, and retrieved parallel examples, is not entirely clear.To address this gap, this study systematically investigates how each resource and its quality affect the translation performance, with the Manchu language as our case study. To remove any prior knowledge of Manchu encoded in the LLM parameters and single out the effect of ICL, we also experiment with an enciphered version of Manchu texts.Our results indicate that high-quality dictionaries and good parallel examples are very helpful, while grammars hardly help.In a follow-up study, we showcase a promising application of in-context MT: parallel data augmentation as a way to bootstrap a conventional MT model. When monolingual data abound, generating synthetic parallel data through in-context MT offers a pathway to mitigate data scarcity and build effective and efficient low-resource neural MT systems.</abstract>
       <url hash="b134bd6f">2025.acl-long.429</url>
@@ -6294,7 +6294,7 @@
       <author><first>Xue</first><last>Liu</last><affiliation>McGill University</affiliation></author>
       <author><first>Pontus</first><last>Stenetorp</last><affiliation>University College London</affiliation></author>
       <author><first>Siva</first><last>Reddy</last><affiliation>ServiceNow Inc, Mila, McGill University and Mila, McGill University</affiliation></author>
-      <author><first>David Ifeoluwa</first><last>Adelani</last><affiliation>McGill University</affiliation></author>
+      <author id="david-ifeoluwa-adelani"><first>David Ifeoluwa</first><last>Adelani</last><affiliation>McGill University</affiliation></author>
       <pages>8870-8880</pages>
       <abstract>Traditional supervised fine-tuning (SFT) strategies for sequence-to-sequence tasks often train models to directly generate the target output. Recent work has shown that guiding models with intermediate steps—such as keywords, outlines, or reasoning chains—can significantly improve performance, coherence, and interpretability. However, these methods often depend on predefined intermediate formats and annotated data, limiting their scalability and generalizability. In this work, we introduce a task-agnostic framework that enables models to generate intermediate “warmup” sequences. These warmup sequences, serving as an initial state for subsequent generation, are optimized to enhance the probability of generating the target sequence without relying on external supervision or human-designed structures. Drawing inspiration from reinforcement learning principles, our method iteratively refines these intermediate steps to maximize their contribution to the final output, similar to reward-driven optimization in reinforcement learning with human feedback. Experimental results across tasks such as translation, summarization, and multi-choice question answering for logical reasoning show that our approach outperforms traditional SFT methods, and offers a scalable and flexible solution for sequence-to-sequence tasks.</abstract>
       <url hash="082631a5">2025.acl-long.434</url>
@@ -6305,7 +6305,7 @@
       <title>Building Better: Avoiding Pitfalls in Developing Language Resources when Data is Scarce</title>
       <author><first>Nedjma</first><last>Ousidhoum</last><affiliation>Cardiff University</affiliation></author>
       <author><first>Meriem</first><last>Beloucif</last><affiliation>Uppsala University</affiliation></author>
-      <author><first>Saif M.</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif M.</first><last>Mohammad</last></author>
       <pages>8881-8894</pages>
       <abstract>Language is a form of symbolic capital that affects people’s lives in many ways (Bourdieu1977,1991). As a powerful means of communication, it reflects identities, cultures, traditions, and societies more broadly. Therefore, data in a given language should be regarded as more than just a collection of tokens. Rigorous data collection and labeling practices are essential for developing more human-centered and socially aware technologies. Although there has been growing interest in under-resourced languages within the NLP community, work in this area faces unique challenges, such as data scarcity and limited access to qualified annotators.In this paper, we collect feedback from individuals directly involved in and impacted by NLP artefacts for medium- and low-resource languages. We conduct both quantitative and qualitative analyses of their responses and highlight key issues related to: (1) data quality, including linguistic and cultural appropriateness; and (2) the ethics of common annotation practices, such as the misuse of participatory research. Based on these findings, we make several recommendations for creating high-quality language artefacts that reflect the cultural milieu of their speakers, while also respecting the dignity and labor of data workers.</abstract>
       <url hash="3fa291c7">2025.acl-long.435</url>
@@ -6324,7 +6324,7 @@
       <author><first>Nirmal</first><last>Surange</last><affiliation>International Institute of Information Technology Hyderabad</affiliation></author>
       <author><first>Daniela</first><last>Teodorescu</last></author>
       <author><first>Ibrahim Said</first><last>Ahmad</last><affiliation>Northeastern University</affiliation></author>
-      <author><first>David Ifeoluwa</first><last>Adelani</last><affiliation>McGill University</affiliation></author>
+      <author id="david-ifeoluwa-adelani"><first>David Ifeoluwa</first><last>Adelani</last><affiliation>McGill University</affiliation></author>
       <author><first>Alham Fikri</first><last>Aji</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <author><first>Felermino D. M. A.</first><last>Ali</last></author>
       <author><first>Ilseyar</first><last>Alimova</last><affiliation>Kazan Federal University</affiliation></author>
@@ -6353,7 +6353,7 @@
       <author><first>Charles Henrique Porto</first><last>Ferreira</last><affiliation>Centro Universitário FEI</affiliation></author>
       <author><first>Vitaly</first><last>Protasov</last><affiliation>AIRI</affiliation></author>
       <author><first>Samuel</first><last>Rutunda</last></author>
-      <author><first>Manish</first><last>Shrivastava</last><affiliation>International Institute of Information Technology Hyderabad, India</affiliation></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last><affiliation>International Institute of Information Technology Hyderabad, India</affiliation></author>
       <author><first>Aura Cristina</first><last>Udrea</last></author>
       <author><first>Lilian Diana Awuor</first><last>Wanzare</last><affiliation>Maseno University</affiliation></author>
       <author><first>Sophie</first><last>Wu</last></author>
@@ -6361,7 +6361,7 @@
       <author><first>Hanif Muhammad</first><last>Zhafran</last><affiliation>Institut Teknologi Bandung</affiliation></author>
       <author><first>Tianhui</first><last>Zhang</last><affiliation>University of Liverpool</affiliation></author>
       <author><first>Yi</first><last>Zhou</last><affiliation>Cardiff University</affiliation></author>
-      <author><first>Saif M.</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif M.</first><last>Mohammad</last></author>
       <pages>8895-8916</pages>
       <abstract>People worldwide use language in subtle and complex ways to express emotions. Although emotion recognition–an umbrella term for several NLP tasks–impacts various applications within NLP and beyond, most work in this area has focused on high-resource languages. This has led to significant disparities in research efforts and proposed solutions, particularly for under-resourced languages, which often lack high-quality annotated datasets.In this paper, we present BRIGHTER–a collection of multi-labeled, emotion-annotated datasets in 28 different languages and across several domains. BRIGHTER primarily covers low-resource languages from Africa, Asia, Eastern Europe, and Latin America, with instances labeled by fluent speakers. We highlight the challenges related to the data collection and annotation processes, and then report experimental results for monolingual and crosslingual multi-label emotion identification, as well as emotion intensity recognition. We analyse the variability in performance across languages and text domains, both with and without the use of LLMs, and show that the BRIGHTER datasets represent a meaningful step towards addressing the gap in text-based emotion recognition.</abstract>
       <url hash="b829a51e">2025.acl-long.436</url>
@@ -6394,7 +6394,7 @@
     </paper>
     <paper id="439">
       <title>Empathy Prediction from Diverse Perspectives</title>
-      <author><first>Francine</first><last>Chen</last><affiliation>Toyota Research Institute</affiliation></author>
+      <author id="francine-chen"><first>Francine</first><last>Chen</last><affiliation>Toyota Research Institute</affiliation></author>
       <author><first>Scott</first><last>Carter</last><affiliation>Toyota Research Institute</affiliation></author>
       <author><first>Tatiana</first><last>Lau</last><affiliation>Toyota Research Institute</affiliation></author>
       <author><first>Nayeli Suseth</first><last>Bravo</last><affiliation>Toyota Research Institute</affiliation></author>
@@ -6444,7 +6444,7 @@
     <paper id="443">
       <title>Comparing <fixed-case>LLM</fixed-case>-generated and human-authored news text using formal syntactic theory</title>
       <author><first>Olga</first><last>Zamaraeva</last><affiliation>Universidad de La Coruña</affiliation></author>
-      <author><first>Dan</first><last>Flickinger</last></author>
+      <author id="dan-flickinger"><first>Dan</first><last>Flickinger</last></author>
       <author><first>Francis</first><last>Bond</last><affiliation>Palacký University Olomouc</affiliation></author>
       <author><first>Carlos</first><last>Gómez-Rodríguez</last><affiliation>Universidade da Coruña</affiliation></author>
       <pages>9041-9060</pages>
@@ -6496,7 +6496,7 @@
       <title>Collapse of Dense Retrievers: Short, Early, and Literal Biases Outranking Factual Evidence</title>
       <author><first>Mohsen</first><last>Fayyaz</last><affiliation>University of California, Los Angeles</affiliation></author>
       <author><first>Ali</first><last>Modarressi</last><affiliation>Center for Information and Language Processing, LMU Munich</affiliation></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <author><first>Nanyun</first><last>Peng</last><affiliation>University of California, Los Angeles</affiliation></author>
       <pages>9136-9152</pages>
       <abstract>Dense retrieval models are commonly used in Information Retrieval (IR) applications, such as Retrieval-Augmented Generation (RAG). Since they often serve as the first step in these systems, their robustness is critical to avoid downstream failures. In this work, we repurpose a relation extraction dataset (e.g., Re-DocRED) to design controlled experiments that quantify the impact of heuristic biases, such as a preference for shorter documents, on retrievers like Dragon+ and Contriever. We uncover major vulnerabilities, showing retrievers favor shorter documents, early positions, repeated entities, and literal matches, all while ignoring the answer’s presence! Notably, when multiple biases combine, models exhibit catastrophic performance degradation, selecting the answer-containing document in less than 10% of cases over a synthetic biased document without the answer. Furthermore, we show that these biases have direct consequences for downstream applications like RAG, where retrieval-preferred documents can mislead LLMs, resulting in a 34% performance drop than providing no documents at all.https://huggingface.co/datasets/mohsenfayyaz/ColDeR</abstract>
@@ -6545,7 +6545,7 @@
       <author><first>Hridayesh</first><last>Lekhak</last></author>
       <author><first>Tuan Minh</first><last>Dang</last><affiliation>University of Texas at Arlington, University of Texas at Arlington</affiliation></author>
       <author><first>Mengyue</first><last>Wu</last><affiliation>Shanghai Jiaotong University</affiliation></author>
-      <author><first>Kenny Q.</first><last>Zhu</last><affiliation>University of Texas at Arlington</affiliation></author>
+      <author id="kenny-zhu"><first>Kenny Q.</first><last>Zhu</last><affiliation>University of Texas at Arlington</affiliation></author>
       <pages>9207-9219</pages>
       <abstract>Dogs communicate intelligently but little is known about the phonetic properties of their vocalization communication. For the first time, this paper presents an iterative algorithm inspired by human phonetic discovery, which is based on minimal pairs that determine phonemes by distinguishing different words in human language, and is able to produce a complete alphabet of distinct canine phoneme-like units. In addition, the algorithm produces a number of canine repeated acoustic units, which may correspond to specific environments and activities of a dog, composed exclusively of the canine phoneme-like units in the alphabet. The framework outlined in this paper is expected to function not only on canines but other animal species.</abstract>
       <url hash="238005da">2025.acl-long.451</url>
@@ -6580,7 +6580,7 @@
       <author><first>Chunting</first><last>Zhou</last><affiliation>Meta AI</affiliation></author>
       <author><first>Lili</first><last>Yu</last><affiliation>physical intelligence</affiliation></author>
       <author><first>Jason E</first><last>Weston</last><affiliation>New York University and Facebook</affiliation></author>
-      <author><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington, Facebook and Meta</affiliation></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington, Facebook and Meta</affiliation></author>
       <author><first>Gargi</first><last>Ghosh</last><affiliation>Meta AI</affiliation></author>
       <author><first>Mike</first><last>Lewis</last><affiliation>Facebook AI Research</affiliation></author>
       <author><first>Ari</first><last>Holtzman</last><affiliation>, University of Chicago</affiliation></author>
@@ -6624,7 +6624,7 @@
       <title>Culture Matters in Toxic Language Detection in <fixed-case>P</fixed-case>ersian</title>
       <author><first>Zahra</first><last>Bokaei</last></author>
       <author><first>Walid</first><last>Magdy</last><affiliation>University of Edinburgh</affiliation></author>
-      <author><first>Bonnie</first><last>Webber</last><affiliation>Edinburgh University, University of Edinburgh</affiliation></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last><affiliation>Edinburgh University, University of Edinburgh</affiliation></author>
       <pages>9290-9304</pages>
       <abstract>Toxic language detection is crucial for creating safer online environments and limiting the spread of harmful content. While toxic language detection has been under-explored in Persian, the current work compares different methods for this task, including fine-tuning, data enrichment, zero-shot and few-shot learning, and cross-lingual transfer learning. What is especially compelling is the impact of cultural context on transfer learning for this task: We show that the language of a country with cultural similarities to Persian yields better results in transfer learning. Conversely, the improvement is lower when the language comes from a culturally distinct country.</abstract>
       <url hash="daafc80e">2025.acl-long.456</url>
@@ -6720,7 +6720,7 @@
       <author><first>Tao</first><last>Feng</last></author>
       <author><first>Lizhen</first><last>Qu</last><affiliation>Monash University</affiliation></author>
       <author><first>Niket</first><last>Tandon</last><affiliation>Research, Microsoft</affiliation></author>
-      <author><first>Gholamreza</first><last>Haffari</last><affiliation>Monash University, Monash University and Monash University</affiliation></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last><affiliation>Monash University, Monash University and Monash University</affiliation></author>
       <pages>9400-9428</pages>
       <abstract>Causal discovery is fundamental to scientific research, yet traditional statistical algorithms face significant challenges, including expensive data collection, redundant computation for known relations, and unrealistic assumptions. While recent LLM-based methods excel at identifying commonly known causal relations, they fail to uncover novel relations. We introduce IRIS (Iterative Retrieval and Integrated System for Real-Time Causal Discovery), a novel framework that addresses these limitations. Starting with a set of initial variables, IRIS automatically collects relevant documents, extracts variables, and uncovers causal relations. Our hybrid causal discovery method combines statistical algorithms and LLM-based methods to discover known and novel causal relations. In addition to causal discovery on initial variables, the missing variable proposal component of IRIS identifies and incorporates missing variables to expand the causal graphs. Our approach enables real-time causal discovery from only a set of initial variables without requiring pre-existing datasets.</abstract>
       <url hash="6799efb5">2025.acl-long.463</url>
@@ -6730,7 +6730,7 @@
     <paper id="464">
       <title><fixed-case>INJONGO</fixed-case>: A Multicultural Intent Detection and Slot-filling Dataset for 16 <fixed-case>A</fixed-case>frican Languages</title>
       <author><first>Hao</first><last>Yu</last></author>
-      <author><first>Jesujoba Oluwadara</first><last>Alabi</last></author>
+      <author id="jesujoba-alabi"><first>Jesujoba Oluwadara</first><last>Alabi</last></author>
       <author><first>Andiswa</first><last>Bukula</last></author>
       <author><first>Jian Yun</first><last>Zhuang</last></author>
       <author><first>En-Shiun Annie</first><last>Lee</last></author>
@@ -6749,7 +6749,7 @@
       <author><first>Salomey</first><last>Osei</last></author>
       <author><first>Sokhar</first><last>Samb</last></author>
       <author><first>Dietrich</first><last>Klakow</last></author>
-      <author><first>David Ifeoluwa</first><last>Adelani</last><affiliation>McGill University</affiliation></author>
+      <author id="david-ifeoluwa-adelani"><first>David Ifeoluwa</first><last>Adelani</last><affiliation>McGill University</affiliation></author>
       <pages>9429-9452</pages>
       <abstract>Slot-filling and intent detection are well-established tasks in Conversational AI. However, current large-scale benchmarks for these tasks often exclude evaluations of low-resource languages and rely on translations from English benchmarks, thereby predominantly reflecting Western-centric concepts. In this paper, we introduce “INJONGO” - a multicultural, open-source benchmark dataset for 16 African languages with utterances generated by native speakers across diverse domains, including banking, travel, home, and dining. Through extensive experiments, we benchmark fine-tuning multilingual transformer models and prompting large language models (LLMs), and show the advantage of leveraging African-cultural utterances over Western-centric utterances for improving cross-lingual transfer from the English language. Experimental results reveal that current LLMs struggle with the slot-filling task, with GPT-4o achieving an average performance of 26 F1. In contrast, intent detection performance is notably better, with an average accuracy of 70.6%, though it still falls short of fine-tuning baselines. When compared to the English language, GPT-4o and fine-tuning baselines perform similarly on intent detection, achieving an accuracy of approximately 81%. Our findings suggest that LLMs performance is still behind for many low-resource African languages, and more work is needed to further improve their downstream performance.</abstract>
       <url hash="ad6fddd5">2025.acl-long.464</url>
@@ -6808,7 +6808,7 @@
       <title><fixed-case>KG</fixed-case>-Agent: An Efficient Autonomous Agent Framework for Complex Reasoning over Knowledge Graph</title>
       <author><first>Jinhao</first><last>Jiang</last></author>
       <author><first>Kun</first><last>Zhou</last><affiliation>University of California, San Diego</affiliation></author>
-      <author><first>Xin</first><last>Zhao</last><affiliation>Renmin University of China</affiliation></author>
+      <author id="wayne-xin-zhao"><first>Xin</first><last>Zhao</last><affiliation>Renmin University of China</affiliation></author>
       <author><first>Yang</first><last>Song</last><affiliation>BOSS Zhipin</affiliation></author>
       <author><first>Chen</first><last>Zhu</last><affiliation>University of Science and Technology of China</affiliation></author>
       <author><first>Hengshu</first><last>Zhu</last><affiliation>Computer Network Information Center, Chinese Academy of Sciences</affiliation></author>
@@ -6837,7 +6837,7 @@
       <author><first>Srijan</first><last>Bansal</last><affiliation>SalesForce.com</affiliation></author>
       <author><first>Yifei</first><last>Ming</last><affiliation>Salesforce AI Research</affiliation></author>
       <author><first>Semih</first><last>Yavuz</last><affiliation>SalesForce.com</affiliation></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University and SalesForce.com</affiliation></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University and SalesForce.com</affiliation></author>
       <pages>9541-9564</pages>
       <abstract>The large language model (LLM)-as-judge paradigm has been used to meet the demand for a cheap, reliable, and fast evaluation of model outputs during AI system development and post-deployment monitoring. While judge models—LLMs finetuned to specialize in assessing and critiquing model outputs—have been touted as general purpose evaluators, they are typically evaluated only on non-contextual scenarios, such as instruction following. The omission of contextual settings—those where external information is used as context to generate an output—is surprising given the increasing prevalence of retrieval-augmented generation (RAG) and summarization use cases. Contextual assessment is uniquely challenging, as evaluation often depends on practitioner priorities, leading to conditional evaluation criteria (e.g., comparing responses based on factuality and then considering completeness if they are equally factual). To address the gap, we propose ContextualJudgeBench, a judge benchmark with 2,000 challenging response pairs across eight splits inspired by real-world contextual evaluation scenarios. We build our benchmark with a multi-pronged data construction pipeline that leverages both existing human annotations and model-based perturbations. Our comprehensive study across 11 judge models and 7 general purpose models, reveals that the contextual information and assessment criteria present a significant challenge to even state-of-the-art models. For example, o1, the best-performing model, barely reaches 55% consistent accuracy.</abstract>
       <url hash="294d7525">2025.acl-long.470</url>
@@ -6851,7 +6851,7 @@
       <author><first>Niket</first><last>Tandon</last><affiliation>Research, Microsoft</affiliation></author>
       <author><first>Zhuang</first><last>Li</last><affiliation>Royal Melbourne Institute of Technology</affiliation></author>
       <author><first>Xiaoxi</first><last>Kang</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last><affiliation>Monash University, Monash University and Monash University</affiliation></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last><affiliation>Monash University, Monash University and Monash University</affiliation></author>
       <pages>9565-9590</pages>
       <abstract>This study investigates the efficacy of Large Language Models (LLMs) in causal discovery. Using newly available open-source LLMs, OLMo and BLOOM, which provide access to their pre-training corpora, we investigate how LLMs address causal discovery through three research questions. We examine: (i) the impact of memorization for accurate causal relation prediction, (ii) the influence of incorrect causal relations in pre-training data, and (iii) the contextual nuances that influence LLMs’ understanding of causal relations. Our findings indicate that while LLMs are effective in recognizing causal relations that occur frequently in pre-training data, their ability to generalize to new or rare causal relations is limited. Moreover, the presence of incorrect causal relations significantly undermines the confidence of LLMs in corresponding correct causal relations, and the contextual information critically affects the outcomes of LLMs to discern causal connections between random variables.</abstract>
       <url hash="0a6b213d">2025.acl-long.471</url>
@@ -6953,7 +6953,7 @@
     </paper>
     <paper id="479">
       <title>When to Speak, When to Abstain: Contrastive Decoding with Abstention</title>
-      <author><first>Hyuhng Joon</first><last>Kim</last><affiliation>Seoul National University</affiliation></author>
+      <author id="hyuhng-joon-kim"><first>Hyuhng Joon</first><last>Kim</last><affiliation>Seoul National University</affiliation></author>
       <author><first>Youna</first><last>Kim</last><affiliation>Seoul National University</affiliation></author>
       <author><first>Sang-goo</first><last>Lee</last><affiliation>Seoul National University</affiliation></author>
       <author><first>Taeuk</first><last>Kim</last><affiliation>Hanyang University</affiliation></author>
@@ -7428,7 +7428,7 @@
       <author><first>Jianxiang</first><last>Peng</last></author>
       <author><first>Lei</first><last>Yang</last></author>
       <author><first>Juesi</first><last>Xiao</last></author>
-      <author><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
       <pages>10386-10418</pages>
       <abstract>With the increasing capability of large language models (LLMs), LLM-as-a-judge has emerged as a new evaluation paradigm. Compared with traditional automatic and manual evaluation, LLM evaluators exhibit better interpretability and efficiency. Despite this, existing LLM evaluators suffer from limited use scenarios and poor flexibility. To mitigate these issues, we propose Praetor, a fine-grained generative LLM evaluator with instance-level customazable evaluation criteria. To train Praetor, we curate a large-scale dataset guided with a hierarchical guideline covering a wide range of tasks and instance-level evaluation criteria. We train Praetor on this dataset in a multi-task learning fashion, which enables to evaluate LLMs in either pointwise grading or pairwise comparison way and support two languages simultaneously with a high flexibility of setting evaluation criteria. Extensive experiments demonstrate that Praetor outperforms previous LLM evaluators and instruction-tuned LLMs on multiple benchmarks, setting new SOTA results. It also exhibits the potential for generating critiques as scalable feedback to further improve LLMs. Our model and related resources are released at <url>https://github.com/tjunlp-lab/Praetor</url>.</abstract>
       <url hash="dc5ab275">2025.acl-long.513</url>
@@ -7441,7 +7441,7 @@
       <author><first>Xiruo</first><last>Ding</last></author>
       <author><first>Brian</first><last>Hur</last><affiliation>University of Washington</affiliation></author>
       <author id="changye-li-umn"><first>Changye</first><last>Li</last><affiliation>University of Washington</affiliation></author>
-      <author><first>Trevor</first><last>Cohen</last><affiliation>University of Washington</affiliation></author>
+      <author id="trevor-cohen"><first>Trevor</first><last>Cohen</last><affiliation>University of Washington</affiliation></author>
       <author><first>Serguei V. S.</first><last>Pakhomov</last><affiliation>University of Minnesota - Twin Cities</affiliation></author>
       <pages>10419-10434</pages>
       <abstract>Deep transformer models have been used to detect linguistic anomalies in patient transcripts for early Alzheimer’s disease (AD) screening. While pre-trained neural language models (LMs) fine-tuned on AD transcripts perform well, little research has explored the effects of the gender of the speakers represented by these transcripts. This work addresses gender confounding in dementia detection and proposes two methods: the Extended Confounding Filter and the Dual Filter, which isolate and ablate weights associated with gender. We evaluate these methods on dementia datasets with first-person narratives from patients with cognitive impairment and healthy controls. Our results show transformer models tend to overfit to training data distributions. Disrupting gender-related weights results in a deconfounded dementia classifier, with the trade-off of slightly reduced dementia detection performance.</abstract>
@@ -7451,7 +7451,7 @@
     </paper>
     <paper id="515">
       <title><fixed-case>MCS</fixed-case>-Bench: A Comprehensive Benchmark for Evaluating Multimodal Large Language Models in <fixed-case>C</fixed-case>hinese Classical Studies</title>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>South China University of Technology</affiliation></author>
+      <author><first>Yang</first><last>Liu</last><affiliation>South China University of Technology</affiliation></author>
       <author><first>Jiahuan</first><last>Cao</last></author>
       <author><first>Hiuyi</first><last>Cheng</last></author>
       <author><first>Yongxin</first><last>Shi</last><affiliation>South China University of Technology</affiliation></author>
@@ -7532,7 +7532,7 @@
     </paper>
     <paper id="521">
       <title>Cool-Fusion: Fuse Large Language Models without Training</title>
-      <author id="cong-liu"><first>Cong</first><last>Liu</last></author>
+      <author><first>Cong</first><last>Liu</last></author>
       <author><first>Xiaojun</first><last>Quan</last><affiliation>SUN YAT-SEN UNIVERSITY</affiliation></author>
       <author><first>Yan</first><last>Pan</last><affiliation>SUN YAT-SEN UNIVERSITY</affiliation></author>
       <author><first>Weigang</first><last>Wu</last><affiliation>SUN YAT-SEN UNIVERSITY</affiliation></author>
@@ -7572,8 +7572,8 @@
       <author><first>Shilong</first><last>Li</last></author>
       <author><first>Bing</first><last>Xu</last></author>
       <author><first>Conghui</first><last>Zhu</last></author>
-      <author><first>Muyun</first><last>Yang</last></author>
-      <author><first>Tiejun</first><last>Zhao</last><affiliation>Harbin Institute of Technology</affiliation></author>
+      <author id="muyun-yang"><first>Muyun</first><last>Yang</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <pages>10667-10686</pages>
       <abstract>Complex instruction-following with elaborate constraints is imperative for Large Language Models (LLMs). While existing methods have constructed data for complex instruction alignment, they all rely on a more advanced model, especially GPT-4, limiting their application. In this paper, we propose a Multi-granularity Self-Contrastive Training (MuSC) framework, to improve the complex instruction alignment without relying on a stronger model. Our method is conducted on both coarse and fine granularity. On coarse-granularity, we construct constraint-aware preference data based on instruction decomposition and recombination. On fine-granularity, we perform token-aware preference optimization with dynamic token-level supervision. Our method is evaluated on open-sourced models, and experiment results show our method achieves significant improvement on both complex and general instruction-following benchmarks, surpassing previous self-alignment methods.</abstract>
       <url hash="675a7169">2025.acl-long.523</url>
@@ -7586,7 +7586,7 @@
       <author><first>Junyi</first><last>Li</last></author>
       <author><first>Jinhao</first><last>Jiang</last></author>
       <author><first>Mingyu</first><last>Xu</last></author>
-      <author><first>Xin</first><last>Zhao</last><affiliation>Renmin University of China</affiliation></author>
+      <author id="wayne-xin-zhao"><first>Xin</first><last>Zhao</last><affiliation>Renmin University of China</affiliation></author>
       <author><first>Bingning</first><last>Wang</last><affiliation>Beijing Baichuan Intelligence Technology Co., Ltd.</affiliation></author>
       <author><first>Weipeng</first><last>Chen</last></author>
       <pages>10687-10707</pages>
@@ -7878,7 +7878,7 @@
       <author><first>An</first><last>Zhang</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Yanyan</first><last>Zhao</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <author><first>Bing</first><last>Qin</last><affiliation>Harbin Institute of Technology</affiliation></author>
-      <author><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Ting</first><last>Liu</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <pages>11112-11137</pages>
       <abstract>Role-playing enables large language models (LLMs) to engage users in immersive and personalized interactions, but it also introduces significant safety risks. Existing role-play fine-tuning techniques improve role adaptability but may degrade safety performance, particularly for villainous characters. In this work, we conduct the first comprehensive assessment of role-play fine-tuning risks by training 95 role-specific LLMs using RoleBench. Our experiments reveal that role-play fine-tuning leads to a noticeable decline in safety performance, with safety risks varying based on character traits. To tackle this challenge, we propose Safety-Aware Role-Play Fine-Tuning (SaRFT), a novel method designed to balance role-playing capabilities and safety. Extensive experiments on LLaMA-3-8B-Instruct, Gemma-2-9B-it, and Qwen2.5-7B-Instruct demonstrate that SaRFT consistently outperforms state-of-the-art baselines under both LoRA and full-parameter fine-tuning settings. Our findings highlight the necessity of role-adaptive safety measures and provide insights into mitigating role-specific safety risks in role-playing LLMs.</abstract>
@@ -7888,7 +7888,7 @@
     </paper>
     <paper id="545">
       <title>Can Graph Neural Networks Learn Language with Extremely Weak Text Supervision?</title>
-      <author id="zihao-li"><first>Zihao</first><last>Li</last><affiliation>University of Illinois Urbana-Champaign</affiliation></author>
+      <author><first>Zihao</first><last>Li</last><affiliation>University of Illinois Urbana-Champaign</affiliation></author>
       <author><first>Lecheng</first><last>Zheng</last></author>
       <author><first>Bowen</first><last>Jin</last></author>
       <author><first>Dongqi</first><last>Fu</last><affiliation>Meta</affiliation></author>
@@ -7933,9 +7933,9 @@
       <author><first>Karthik</first><last>Padthe</last><affiliation>Meta AI</affiliation></author>
       <author><first>Rulin</first><last>Shao</last></author>
       <author><first>Alicia Yi</first><last>Sun</last><affiliation>Meta AI and Massachusetts Institute of Technology</affiliation></author>
-      <author><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington, Facebook and Meta</affiliation></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington, Facebook and Meta</affiliation></author>
       <author><first>Gargi</first><last>Ghosh</last><affiliation>Meta AI</affiliation></author>
-      <author><first>Wen-tau</first><last>Yih</last><affiliation>Meta Platforms, Inc.</affiliation></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last><affiliation>Meta Platforms, Inc.</affiliation></author>
       <pages>11199-11213</pages>
       <abstract>Large language models can generate factually inaccurate content, a problem known as hallucination. Recent works have built upon retrieved-augmented generation to improve factuality through iterative prompting but these methods are limited by the traditional RAG design. To address these challenges, we introduce Ewe (Explicit Working Memory), a novel approach that enhances factuality in long-form text generation by integrating a working memory that receives real-time feedback from external resources. The memory is refreshed based on online fact-checking and retrieval feedback, allowing Ewe to rectify false claims during the generation process and ensure more accurate and reliable outputs. Our experiments demonstrate that Ewe outperforms strong baselines on four fact-seeking long-form generation datasets, increasing the factuality metric, VeriScore, by 2 to 6 points absolute without sacrificing the helpfulness of the responses. Further analysis reveals that the design of rules for memory updates, configurations of memory units, and the quality of the retrieval datastore are crucial factors for influencing model performance.</abstract>
       <url hash="298836ca">2025.acl-long.548</url>
@@ -8032,7 +8032,7 @@
       <author><first>Jianmin</first><last>Wang</last></author>
       <author><first>Xibao</first><last>Cai</last></author>
       <author><first>Haitao</first><last>Huang</last></author>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last></author>
+      <author><first>Wei</first><last>Liu</last></author>
       <author><first>Longyue</first><last>Wang</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Lai Hou</first><last>Tim</last><affiliation>Tencent AI Lab</affiliation></author>
       <author><first>Xiangxiang</first><last>Zeng</last><affiliation>Hunan University</affiliation></author>
@@ -8065,7 +8065,7 @@
       <author><first>Chengyi</first><last>Yang</last></author>
       <author><first>Pei</first><last>Zhang</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Baosong</first><last>Yang</last></author>
-      <author><first>Junhui</first><last>Li</last><affiliation>Soochow University, China</affiliation></author>
+      <author id="junhui-li"><first>Junhui</first><last>Li</last><affiliation>Soochow University, China</affiliation></author>
       <author><first>Junfeng</first><last>Yao</last><affiliation>Xiamen University</affiliation></author>
       <author><first>Min</first><last>Zhang</last><affiliation>Harbin Institute of Technology, Shenzhen</affiliation></author>
       <author><first>Jinsong</first><last>Su</last><affiliation>Xiamen University</affiliation></author>
@@ -8164,7 +8164,7 @@
       <author><first>Dongyang</first><last>Zhan</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <author><first>Yunting</first><last>Zhang</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <author><first>Yicheng</first><last>Guo</last><affiliation>Harbin Institute of Technology</affiliation></author>
-      <author id="chen-zhang"><first>Chen</first><last>Zhang</last><affiliation>University of Mississippi Medical Center</affiliation></author>
+      <author><first>Chen</first><last>Zhang</last><affiliation>University of Mississippi Medical Center</affiliation></author>
       <pages>11481-11494</pages>
       <abstract>Automatic exploit generation (AEG) refers to the automatic discovery and exploitation of vulnerabilities against unknown targets. Traditional AEG often targets a single type of vulnerability and still relies on templates built from expert experience. To achieve intelligent exploit generation, we establish a comprehensive benchmark using Binary Exploitation (pwn) challenges in Capture the Flag (CTF) competitions and investigate the capabilities of Large Language Models (LLMs) in AEG based on the benchmark. To improve the performance of AEG, we propose PwnGPT, an LLM-based automatic exploit generation framework that automatically solves pwn challenges. The structural design of PwnGPT is divided into three main components: analysis, generation, and verification modules. With the help of a modular approach and structured problem inputs, PwnGPT can solve challenges that LLMs cannot directly solve. We evaluate PwnGPT on our benchmark and analyze the outputs of each module. Experimental results show that our framework is highly autonomous and capable of addressing various challenges. Compared to direct input LLMs, PwnGPT increases the completion rate of exploit on our benchmark from 26.3% to 57.9% with the OpenAI o1-preview model and from 21.1% to 36.8% with the GPT-4o model.</abstract>
       <url hash="02340cb6">2025.acl-long.562</url>
@@ -8183,7 +8183,7 @@
       <author><first>Phong Nguyen-Thuan</first><last>Do</last><affiliation>Zalo</affiliation></author>
       <author><first>Van Le Tran</first><last>Truc</last><affiliation>Ho Chi Minh city University of Science, Vietnam National University</affiliation></author>
       <author><first>Duc Thanh</first><last>Chau</last><affiliation>Ho Chi Minh city University of Science, Vietnam National University</affiliation></author>
-      <author><first>Le-Minh</first><last>Nguyen</last><affiliation>Japan Advanced Institute of Science and Technology, Tokyo Institute of Technology</affiliation></author>
+      <author id="minh-le-nguyen"><first>Le-Minh</first><last>Nguyen</last><affiliation>Japan Advanced Institute of Science and Technology, Tokyo Institute of Technology</affiliation></author>
       <pages>11495-11515</pages>
       <abstract>The evolution of Large Language Models (LLMs) has underscored the necessity for benchmarks designed for various languages and cultural contexts. To address this need for Vietnamese, we present the first Vietnamese Multitask Language Understanding (VMLU) Benchmarks. The VMLU benchmarks consist of four datasets that assess different capabilities of LLMs, including general knowledge, reading comprehension, reasoning, and conversational skills. This paper also provides an insightful overview of the current state of some dominant LLMs, such as Llama-3, Qwen2.5, and GPT-4, highlighting their performances and limitations when measured against these benchmarks. Furthermore, we provide insights into how prompt design can influence VMLU’s evaluation outcomes, as well as suggest that open-source LLMs can serve as effective, cost-efficient evaluators within the Vietnamese context. By offering a comprehensive and accessible benchmarking framework, the VMLU Benchmarks aim to foster the development and fine-tuning of Vietnamese LLMs, thereby establishing a foundation for their practical applications in language-specific domains.</abstract>
       <url hash="e596cafe">2025.acl-long.563</url>
@@ -8383,7 +8383,7 @@
       <author><first>Yining</first><last>Wang</last></author>
       <author><first>Feifei</first><last>Zhai</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
       <author><first>Yu</first><last>Zhou</last><affiliation>Institute of Automation, Chinese Academy of Sciences</affiliation></author>
-      <author><first>Chengqing</first><last>Zong</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
       <pages>11755-11771</pages>
       <abstract>LLMs have achieved remarkable fluency and coherence in text generation, yet their widespread adoption has raised concerns about content reliability and accountability. In high-stakes domains, it is crucial to understand where and how the content is created. To address this, we introduce the Text pROVEnance (TROVE) challenge, designed to trace each sentence of a target text back to specific source sentences within potentially lengthy or multi-document inputs. Beyond identifying sources, TROVE annotates the fine-grained relationships (quotation, compression, inference, and others), providing a deep understanding of how each target sentence is formed.To benchmark TROVE, we construct our dataset by leveraging three public datasets covering 11 diverse scenarios (e.g., QA and summarization) in English and Chinese, spanning source texts of varying lengths (0–5k, 5–10k, 10k+), emphasizing the multi-document and long-document settings essential for provenance. To ensure high-quality data, we employ a three-stage annotation process: sentence retrieval, GPT-4o provenance, and human provenance. We evaluate 11 LLMs under direct prompting and retrieval-augmented paradigms, revealing that retrieval is essential for robust performance, larger models perform better in complex relationship classification, and closed-source models often lead, yet open-source models show significant promise, particularly with retrieval augmentation. We make our dataset available here: https://github.com/ZNLP/ZNLP-Dataset.</abstract>
       <url hash="6b7365ed">2025.acl-long.577</url>
@@ -8446,8 +8446,8 @@
       <author><first>WangYan</first><last>WangYan</last></author>
       <author><first>Wei</first><last>Shen</last></author>
       <author><first>Qing</first><last>Gu</last><affiliation>Nanjing University</affiliation></author>
-      <author><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
-      <author><first>See-Kiong</first><last>Ng</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
+      <author id="see-kiong-ng"><first>See-Kiong</first><last>Ng</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Zhiwei</first><last>Jiang</last><affiliation>Nanjing University</affiliation></author>
       <author><first>Bryan</first><last>Hooi</last><affiliation>National University of Singapore</affiliation></author>
       <pages>11857-11870</pages>
@@ -8571,7 +8571,7 @@
       <author><first>Xihuai</first><last>Wang</last><affiliation>Shanghai Jiao Tong University</affiliation></author>
       <author><first>Yasheng</first><last>Wang</last></author>
       <author><first>Ruiming</first><last>Tang</last></author>
-      <author><first>Weinan</first><last>Zhang</last></author>
+      <author id="weinan-zhang"><first>Weinan</first><last>Zhang</last></author>
       <author><first>Yong</first><last>Yu</last><affiliation>Shanghai Jiaotong University</affiliation></author>
       <pages>12055-12065</pages>
       <abstract>With the impressive reasoning and text generation capabilities of large language models (LLMs), methods leveraging multiple LLMs to debate each other have garnered increasing attention. However, existing debate-based approaches remain limited in effectiveness in structured and detailed domains represented by code generation due to several reasons: 1) Reliance on different instances of the same LLM for debate, neglecting the potential benefits of integrating diverse models with varied internal knowledge for more comprehensive code generation, 2) under-utilization of test cases, and 3) reliance on third-party LLM moderators for result consolidation and decision-making, probably introducing hallucinations and judgment errors. To address these challenges, we propose DebateCoder to collect intelligence of LLMs via test case-driven debate for code generation. In DebateCoder, test cases serve as a medium for models to analyze code and identify bugs, while opposing models generate test cases to challenge each other’s code during the debate process. These test cases, along with their execution results, are elaborately leveraged to refine and enhance the code through a novel contrastive analysis process. Furthermore, DebateCoder leverages test case outcomes to assess code quality and determine convergence criteria. Unlike previous approaches, DebateCoder emphasizes the collaborative improvement of both models through competitive debate and interactive analysis. Abundant experimental results on two datasets demonstrate the effectiveness of DebateCoder.</abstract>
@@ -8638,7 +8638,7 @@
       <author><first>Ye</first><last>Yuan</last></author>
       <author><first>Yichun</first><last>Yin</last><affiliation>Huawei Noah’s Ark Lab</affiliation></author>
       <author><first>Yan</first><last>Xu</last><affiliation>Huawei Technologies Ltd.</affiliation></author>
-      <author id="xin-xu"><first>Xin</first><last>Xu</last></author>
+      <author><first>Xin</first><last>Xu</last></author>
       <author><first>Zaoyu</first><last>Chen</last></author>
       <author><first>Yasheng</first><last>Wang</last></author>
       <author><first>Lifeng</first><last>Shang</last><affiliation>Huawei Technologies Ltd.</affiliation></author>
@@ -8795,8 +8795,8 @@
       <author><first>William</first><last>Zeng</last></author>
       <author><first>Oussama</first><last>Elachqar</last><affiliation>Oumi</affiliation></author>
       <author><first>Emmanouil</first><last>Koukoumidis</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tür</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
-      <author><first>Gokhan</first><last>Tur</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tür</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
+      <author id="gokhan-tur"><first>Gokhan</first><last>Tur</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
       <pages>12370-12390</pages>
       <abstract>Large Language Models (LLMs) with API-calling capabilities enabled building effective Language Agents (LA), while also revolutionizing the conventional task-oriented dialogue (TOD) paradigm. However, current approaches face a critical dilemma: TOD systems are often trained on a limited set of target APIs, requiring new data to maintain their quality when interfacing with new services, while LAs are not trained to maintain user intent over multi-turn conversations. Because both robust multi-turn management and advanced function calling are crucial for effective conversational agents, we evaluate these skills on three popular benchmarks: MultiWOZ 2.4 (TOD), BFCL V3 (LA), and API-Bank (LA)—and our analyses reveal that specialized approaches excel in one domain but underperform in the other. To bridge this chasm, we introduce **CoALM** (**C**onversational **A**gentic **L**anguage **M**odel), a unified approach that integrates both conversational and agentic capabilities. We created **CoALM-IT**, a carefully constructed multi-task dataset that interleave multi-turn ReAct reasoning with complex API usage. Using CoALM-IT, we train three models **CoALM 8B**, **CoALM 70B**, and **CoALM 405B**, which outperform top domain-specific models, including GPT-4o, across all three benchmarks. This demonstrates the feasibility of a single model approach for both TOD and LA, setting a new standard for conversational agents.</abstract>
       <url hash="7349d498">2025.acl-long.605</url>
@@ -8810,7 +8810,7 @@
       <author><first>Zhiyang</first><last>Zhang</last></author>
       <author><first>Yang</first><last>Zhao</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
       <author><first>Lu</first><last>Xiang</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
-      <author><first>Chengqing</first><last>Zong</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
       <author><first>Yu</first><last>Zhou</last><affiliation>Institute of Automation, Chinese Academy of Sciences</affiliation></author>
       <pages>12391-12408</pages>
       <abstract>Document Image Machine Translation (DIMT) aims to translate text within document images, facing generalization challenges due to limited training data and the complex interplay between visual and textual information. To address these challenges, we introduce M4Doc, a novel single-to-mix Modality alignment framework leveraging Multimodal Large Language Models (MLLMs). M4Doc aligns an imageonly encoder with the multimodal representations of an MLLM, pre-trained on large-scale document image datasets. This alignment enables a lightweight DIMT model to learn crucial visual-textual correlations during training. During inference, M4Doc bypasses the MLLM, maintaining computational efficiency while benefiting from its multimodal knowledge. Comprehensive experiments demonstrate substantial improvements in translation quality, especially in cross-domain generalization and challenging document image scenarios. The code will be released upon acceptance.</abstract>
@@ -8971,7 +8971,7 @@
     </paper>
     <paper id="616">
       <title>Enhancing Safe and Controllable Protein Generation via Knowledge Preference Optimization</title>
-      <author id="yuhao-wang"><first>Yuhao</first><last>Wang</last></author>
+      <author><first>Yuhao</first><last>Wang</last></author>
       <author><first>Keyan</first><last>Ding</last><affiliation>Zhejiang University</affiliation></author>
       <author><first>Kehua</first><last>Feng</last></author>
       <author><first>Zeyuan</first><last>Wang</last></author>
@@ -9100,10 +9100,10 @@
       <title><fixed-case>SCAR</fixed-case>: Data Selection via Style Consistency-Aware Response Ranking for Efficient Instruction-Tuning of Large Language Models</title>
       <author><first>Zhuang</first><last>Li</last><affiliation>Royal Melbourne Institute of Technology</affiliation></author>
       <author><first>Yuncheng</first><last>Hua</last></author>
-      <author><first>Thuy-Trang</first><last>Vu</last><affiliation>Monash University</affiliation></author>
+      <author id="thuy-vu"><first>Thuy-Trang</first><last>Vu</last><affiliation>Monash University</affiliation></author>
       <author><first>Haolan</first><last>Zhan</last><affiliation>Monash University</affiliation></author>
       <author><first>Lizhen</first><last>Qu</last><affiliation>Monash University</affiliation></author>
-      <author><first>Gholamreza</first><last>Haffari</last><affiliation>Monash University, Monash University and Monash University</affiliation></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last><affiliation>Monash University, Monash University and Monash University</affiliation></author>
       <pages>12756-12790</pages>
       <abstract>Recent studies emphasize that manually ensuring a consistent response style and maintaining high data quality in training sets can significantly improve the performance of fine-tuned Large Language Models (LLMs) while reducing the number of training examples needed. However, the precise definition of style and the relationship between style, data quality, and LLM performance remains unclear. This research identifies two key stylistic elements in responses: linguistic form and instructional surprisal. We find that, among training data of comparable quality, higher consistency in these response elements leads to better LLM performance. Inspired by this, we introduce Style Consistency-Aware Response Ranking (SCAR), which automatically prioritizes instruction-response pairs in the training set based on their response stylistic consistency. By selecting the most style-consistent examples, using 0.7% of the full dataset in certain cases, the fine-tuned LLMs can match or even surpass the performance of models trained on the entire dataset in coding and open-ended question-answering benchmarks. Code and data are available at https://github.com/zhuang-li/SCAR .</abstract>
       <url hash="d4839293">2025.acl-long.625</url>
@@ -9344,7 +9344,7 @@
       <author><first>Nikhita</first><last>Vedula</last><affiliation>Amazon</affiliation></author>
       <author><first>Besnik</first><last>Fetahu</last><affiliation>Amazon</affiliation></author>
       <author><first>Oleg</first><last>Rokhlenko</last></author>
-      <author><first>Shervin</first><last>Malmasi</last><affiliation>Amazon</affiliation></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last><affiliation>Amazon</affiliation></author>
       <pages>13095-13120</pages>
       <abstract>The goal of conversational product search (CPS) is to develop an intelligent, chat-based shopping assistant that can directly interact with customers to understand shopping intents, ask clarification questions, and find relevant products. However, training such assistants is hindered mainly due to the lack of reliable and large-scale datasets. Prior human-annotated CPS datasets are extremely small in size and lack integration with real-world product search systems. We propose a novel approach, TRACER, which leverages large language models (LLMs) to generate realistic and natural conversations for different shopping domains. TRACER’s novelty lies in grounding the generation to dialogue plans, which are product search trajectories predicted from a decision tree model, that guarantees relevant product discovery in the shortest number of search conditions. We also release the first target-oriented CPS dataset Wizard of Shopping (WoS), containing highly natural and coherent conversations (3.6k) from three shopping domains. Finally, we demonstrate the quality and effectiveness of WoS via human evaluations and downstream tasks.</abstract>
       <url hash="a6ef0b82">2025.acl-long.641</url>
@@ -9452,7 +9452,7 @@
       <author><first>Xuming</first><last>Hu</last><affiliation>The Hong Kong University of Science and Technology (Guangzhou) and Hong Kong University of Science and Technology</affiliation></author>
       <author><first>Lijie</first><last>Wen</last><affiliation>Tsinghua University</affiliation></author>
       <author><first>Irwin</first><last>King</last></author>
-      <author><first>Philip S.</first><last>Yu</last><affiliation>University of Illinois Chicago</affiliation></author>
+      <author id="philip-s-yu"><first>Philip S.</first><last>Yu</last><affiliation>University of Illinois Chicago</affiliation></author>
       <pages>13228-13251</pages>
       <abstract>The radioactive nature of Large Language Model (LLM) watermarking enables the detection of watermarks inherited by student models when trained on the outputs of watermarked teacher models, making it a promising tool for preventing unauthorized knowledge distillation. However, the robustness of watermark radioactivity against adversarial actors remains largely unexplored. In this paper, we investigate whether student models can acquire the capabilities of teacher models through knowledge distillation while avoiding watermark inheritance. We propose two categories of watermark removal approaches: pre-distillation removal through untargeted and targeted training data paraphrasing (UP and TP), and post-distillation removal through inference-time watermark neutralization (WN). Extensive experiments across multiple model pairs, watermarking schemes and hyper-parameter settings demonstrate that both TP and WN thoroughly eliminate inherited watermarks, with WN achieving this while maintaining knowledge transfer efficiency and low computational overhead. Given the ongoing deployment of watermarking techniques in production LLMs, these findings emphasize the urgent need for more robust defense strategies.</abstract>
       <url hash="2a40568b">2025.acl-long.648</url>
@@ -9461,7 +9461,7 @@
     </paper>
     <paper id="649">
       <title>Rethinking Reward Model Evaluation Through the Lens of Reward Overoptimization</title>
-      <author><first>Sunghwan</first><last>Kim</last></author>
+      <author id="sunghwan-mac-kim"><first>Sunghwan</first><last>Kim</last></author>
       <author><first>Dongjin</first><last>Kang</last></author>
       <author><first>Taeyoon</first><last>Kwon</last><affiliation>Yonsei University</affiliation></author>
       <author><first>Hyungjoo</first><last>Chae</last></author>
@@ -9602,7 +9602,7 @@
       <author><first>Xiaobo</first><last>Liang</last></author>
       <author><first>Juntao</first><last>Li</last></author>
       <author><first>Zhaopeng</first><last>Tu</last><affiliation>Tencent AI Lab</affiliation></author>
-      <author><first>Qiaoming</first><last>Zhu</last><affiliation>Soochow University</affiliation></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last><affiliation>Soochow University</affiliation></author>
       <author><first>Min</first><last>Zhang</last><affiliation>Harbin Institute of Technology, Shenzhen</affiliation></author>
       <pages>13414-13438</pages>
       <abstract>Improving the mathematical reasoning capabilities of Large Language Models (LLMs) is critical for advancing artificial intelligence. However, access to extensive, diverse, and high-quality reasoning datasets remains a significant challenge, particularly for the open-source community. In this paper, we propose ScaleQuest, a novel, scalable, and cost-effective data synthesis method that enables the generation of large-scale mathematical reasoning datasets using lightweight 7B-scale models. ScaleQuest introduces a two-stage question-tuning process comprising Question Fine-Tuning (QFT) and Question Preference Optimization (QPO) to unlock the question generation capabilities of problem-solving models. By generating diverse questions from scratch – without relying on powerful proprietary models or seed data – we produce a dataset of 1 million problem-solution pairs. Our experiments demonstrate that models trained on our data outperform existing open-source datasets in both in-domain and out-of-domain evaluations. Furthermore, our approach shows continued performance improvement as the volume of training data increases, highlighting its potential for ongoing data scaling. The extensive improvements observed in code reasoning tasks demonstrate the generalization capabilities of our proposed method. Our work provides the open-source community with a practical solution to enhance the mathematical reasoning abilities of LLMs.</abstract>
@@ -9615,7 +9615,7 @@
       <author><first>Haneul</first><last>Yoo</last><affiliation>KAIST</affiliation></author>
       <author><first>Jieun</first><last>Han</last><affiliation>Korea Advanced Institute of Science &amp; Technology</affiliation></author>
       <author><first>So-Yeon</first><last>Ahn</last><affiliation>Korea Advanced Institute of Science &amp; Technology</affiliation></author>
-      <author><first>Alice</first><last>Oh</last><affiliation>Google and Korea Advanced Institute of Science and Technology</affiliation></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last><affiliation>Google and Korea Advanced Institute of Science and Technology</affiliation></author>
       <pages>13439-13454</pages>
       <abstract>Automated essay scoring (AES) is a useful tool in English as a Foreign Language (EFL) writing education, offering real-time essay scores for students and instructors. However, previous AES models were trained on essays and scores irrelevant to the practical scenarios of EFL writing education and usually provided a single holistic score due to the lack of appropriate datasets. In this paper, we release DREsS, a large-scale, standard dataset for rubric-based automated essay scoring with 48.9K samples in total. DREsS comprises three sub-datasets: DREsS_New, DREsS_Std., and DREsS_CASE. We collect DREsS_New, a real-classroom dataset with 2.3K essays authored by EFL undergraduate students and scored by English education experts. We also standardize existing rubric-based essay scoring datasets as DREsS_Std. We suggest CASE, a corruption-based augmentation strategy for essays, which generates 40.1K synthetic samples of DREsS_CASE and improves the baseline results by 45.44%. DREsS will enable further research to provide a more accurate and practical AES system for EFL writing education.</abstract>
       <url hash="dbfb3843">2025.acl-long.659</url>
@@ -9678,7 +9678,7 @@
     <paper id="664">
       <title>Lexical Recall or Logical Reasoning: Probing the Limits of Reasoning Abilities in Large Language Models</title>
       <author><first>Henrike</first><last>Beyer</last><affiliation>University of Dundee</affiliation></author>
-      <author><first>Chris</first><last>Reed</last><affiliation>University of Dundee</affiliation></author>
+      <author id="chris-reed"><first>Chris</first><last>Reed</last><affiliation>University of Dundee</affiliation></author>
       <pages>13532-13557</pages>
       <abstract>Despite the increasing interest in the reasoning abilities of Large Language Models (LLMs), existing work shows limitations in assessing logic abilities independently from lexical memory. We address this gap with Mystery-Zebra. This robust two-part benchmark (4,290 puzzles) challenges the logic abstraction abilities of LLMs in two setups: (1) a lexical obfuscation setup tests the dependence of LLMs on lexical content based on two canonical grid puzzles widely spread on the Internet; (2) a set of new grid puzzles in 42 different sizes and 12 difficulty levels tests how the formal difficulty degree of a puzzle affects LLMs.We test open and closed-weight LLMs on both parts of the benchmark. The results on part two suggest that model sizes up to 70B parameters have only a minor influence when solving newly generated puzzles, while performance mainly relates to the number of items in the puzzle. The results on the first part of the benchmark suggest that the applied obfuscation strategies help to mitigate effects of logic puzzles being part of LLM training data, showing a drastic drop in performance for obfuscated versions of well-known puzzles. In addition we conduct a case-study on the first part of the benchmark predicting the position of single items, unveiling that the reasoning abilities of LLMs are mainly limited to a few consecutive steps of reasoning.</abstract>
       <url hash="de639c09">2025.acl-long.664</url>
@@ -9764,7 +9764,7 @@
     <paper id="670">
       <title><fixed-case>CR</fixed-case>isk<fixed-case>E</fixed-case>val: A <fixed-case>C</fixed-case>hinese Multi-Level Risk Evaluation Benchmark Dataset for Large Language Models</title>
       <author><first>Ling</first><last>Shi</last></author>
-      <author><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
       <pages>13638-13659</pages>
       <abstract>Large language models (LLMs) are possessed of numerous beneficial capabilities, yet their potential inclination harbors unpredictable risks that may materialize in the future. We hence propose CRiskEval, a Chinese dataset meticulously designed for gauging the risk proclivities inherent in LLMs such as resource acquisition and malicious coordination, as part of efforts for proactive preparedness. To curate CRiskEval, we define a new risk taxonomy with 7 types of frontier risks and 4 safety levels, including extremely hazardous,moderately hazardous, neutral and safe. We follow the philosophy of tendency evaluation to empirically measure the stated ”desire” of LLMs via fine-grained multiple-choice question answering. The dataset consists of 14,888 questions that simulate scenarios related to predefined 7 types of frontier risks. Each question is accompanied with 4 answer choices that state opinions or behavioral tendencies corresponding to the question. All answer choices are manually annotated with one of the defined risk levels so that we can easily build a fine-grained frontier risk profile for each assessed LLM. Extensive evaluation with CRiskEval on a spectrum of prevalent Chinese LLMs has unveiled a striking revelation: most models exhibit risk tendencies of more than 40% (weighted tendency to the four risk levels). Furthermore, a subtle increase in the model’s inclination toward urgent self-sustainability, power seeking and other dangerous goals becomes evident as the size of models increases. To promote further research on the frontier risk evaluation of LLMs, we publicly release our dataset at https://github.com/tjunlp-lab/CRiskEval.</abstract>
       <url hash="2840a739">2025.acl-long.670</url>
@@ -9805,7 +9805,7 @@
       <author><first>Huadai</first><last>Liu</last></author>
       <author><first>Jialei</first><last>Wang</last></author>
       <author><first>Rongjie</first><last>Huang</last><affiliation>Zhejiang University</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <author><first>Heng</first><last>Lu</last></author>
       <author><first>Zhou</first><last>Zhao</last><affiliation>Zhejiang University and Zhejiang University</affiliation></author>
       <author><first>Wei</first><last>Xue</last><affiliation>Hong Kong University of Science and Technology</affiliation></author>
@@ -9901,7 +9901,7 @@
       <author><first>Tianyu</first><last>Zheng</last></author>
       <author><first>Yizhi</first><last>Li</last><affiliation>University of Manchester</affiliation></author>
       <author><first>Yuelin</first><last>Bai</last><affiliation>Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Chinese Academy of Sciences</affiliation></author>
-      <author id="bo-li"><first>Bo</first><last>Li</last></author>
+      <author><first>Bo</first><last>Li</last></author>
       <author><first>Yubo</first><last>Wang</last><affiliation>University of Waterloo</affiliation></author>
       <author><first>King</first><last>Zhu</last><affiliation>Guangdong OPPO Mobile Telecommunications Corp.,Ltd.</affiliation></author>
       <author><first>Graham</first><last>Neubig</last><affiliation>Carnegie Mellon University</affiliation></author>
@@ -9971,7 +9971,7 @@
       <title><fixed-case>SPECTRA</fixed-case>: Faster Large Language Model Inference with Optimized Internal and External Speculation</title>
       <author><first>Nguyen-Khang</first><last>Le</last><affiliation>Japan Advanced Institute of Science and Technology, Tokyo Institute of Technology</affiliation></author>
       <author><first>Truong Dinh</first><last>Do</last></author>
-      <author><first>Le-Minh</first><last>Nguyen</last><affiliation>Japan Advanced Institute of Science and Technology, Tokyo Institute of Technology</affiliation></author>
+      <author id="minh-le-nguyen"><first>Le-Minh</first><last>Nguyen</last><affiliation>Japan Advanced Institute of Science and Technology, Tokyo Institute of Technology</affiliation></author>
       <pages>14015-14034</pages>
       <abstract>Inference with modern Large Language Models (LLMs) is both computationally expensive and time-consuming. Speculative decoding has emerged as a promising solution, but existing approaches face key limitations: training-based methods require a draft model that is challenging to obtain and lacks generalizability, while training-free methods offer limited speedup gains. In this work, we present Spectra, a novel framework for accelerating LLM inference without the need for additional training or modification to the original LLM. Spectra introduces two new techniques for efficiently utilizing internal and external speculation, each outperforming corresponding state-of-the-art (SOTA) methods independently. When combined, these techniques achieve up to a 4.08x speedup across various benchmarks and LLM architectures, significantly surpassing existing training-free approaches. The implementation of Spectra is publicly available.</abstract>
       <url hash="6f71b9cf">2025.acl-long.685</url>
@@ -10060,7 +10060,7 @@
       <author><first>Jianzhu</first><last>Bao</last></author>
       <author><first>Yuqi</first><last>Huang</last></author>
       <author><first>Bin</first><last>Liang</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
-      <author><first>Kam-Fai</first><last>Wong</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <author><first>Min</first><last>Yang</last><affiliation>Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Chinese Academy of Sciences</affiliation></author>
       <author><first>Ruifeng</first><last>Xu</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <pages>14133-14148</pages>
@@ -10079,7 +10079,7 @@
       <author><first>Hui</first><last>Su</last><affiliation>Meituan</affiliation></author>
       <author><first>Jinlan</first><last>Fu</last></author>
       <author><first>Ming</first><last>Liu</last><affiliation>Harbin Institute of Technology</affiliation></author>
-      <author><first>See-Kiong</first><last>Ng</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="see-kiong-ng"><first>See-Kiong</first><last>Ng</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Bing</first><last>Qin</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <pages>14149-14162</pages>
       <abstract>Large Multimodal Models (LMMs) have recently demonstrated impressive performance on general video comprehension benchmarks. Nevertheless, for broader applications, the robustness of their temporal analysis capability needs to be thoroughly investigated yet predominantly ignored. Motivated by this, we propose a novel temporal robustness benchmark (TemRobBench), which introduces temporal inconsistency perturbations separately at the visual and textual modalities to assess the robustness of models. We evaluate 16 mainstream LMMs and find that they exhibit over-reliance on prior knowledge and textual context in adversarial environments, while ignoring the actual temporal dynamics in the video. To mitigate this issue, we design panoramic direct preference optimization (PanoDPO), which encourages LMMs to incorporate both visual and linguistic feature preferences simultaneously. Experimental results show that PanoDPO can effectively enhance the model’s robustness and reliability in temporal analysis.</abstract>
@@ -10136,7 +10136,7 @@
       <author><first>Xinyi</first><last>Zhou</last><affiliation>East China Normal University</affiliation></author>
       <author><first>Ning</first><last>Zhang</last></author>
       <author><first>Shangqing</first><last>Zhao</last><affiliation>East China Normal University</affiliation></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <author><first>Xiaopeng</first><last>Bai</last><affiliation>East China Normal University</affiliation></author>
       <pages>14215-14231</pages>
       <abstract>Argument mining has garnered increasing attention over the years, with the recent advancement of Large Language Models (LLMs) further propelling this trend. However, current argument relations remain relatively simplistic and foundational, struggling to capture the full scope of argument information. To address this limitation, we propose a systematic framework comprising 14 fine-grained relation types from the perspectives of vertical argument relations and horizontal discourse relations, thereby capturing the intricate interplay between argument components for a thorough understanding of argument structure. On this basis, we conducted extensive experiments on three tasks: argument component prediction, relation prediction, and automated essay grading. Additionally, we explored the impact of writing quality on argument component prediction and relation prediction, as well as the connections between discourse relations and argumentative features. The findings highlight the importance of fine-grained argumentative annotations for argumentative writing assessment and encourage multi-dimensional argument analysis.</abstract>
@@ -10148,7 +10148,7 @@
       <title>Browsing Like Human: A Multimodal Web Agent with Experiential Fast-and-Slow Thinking</title>
       <author><first>Haohao</first><last>Luo</last><affiliation>SUN YAT-SEN UNIVERSITY</affiliation></author>
       <author><first>Jiayi</first><last>Kuang</last><affiliation>SUN YAT-SEN UNIVERSITY</affiliation></author>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last></author>
+      <author><first>Wei</first><last>Liu</last></author>
       <author><first>Ying</first><last>Shen</last></author>
       <author><first>Jian</first><last>Luan</last><affiliation>Xiaomi Corporation</affiliation></author>
       <author><first>Yang</first><last>Deng</last><affiliation>Singapore Management University</affiliation></author>
@@ -10227,7 +10227,7 @@
       <author><first>Nurkhan</first><last>Laiyk</last></author>
       <author><first>Alham Fikri</first><last>Aji</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <author><first>Ekaterina</first><last>Kochmar</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <author><first>Fajri</first><last>Koto</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <pages>14403-14416</pages>
       <abstract>Despite having a population of twenty million, Kazakhstan’s culture and language remain underrepresented in the field of natural language processing. Although large language models (LLMs) continue to advance worldwide, progress in Kazakh language has been limited, as seen in the scarcity of dedicated models and benchmark evaluations. To address this gap, we introduce KazMMLU, the first MMLU-style dataset specifically designed for Kazakh language. KazMMLU comprises 23,000 questions that cover various educational levels, including STEM, humanities, and social sciences, sourced from authentic educational materials and manually validated by native speakers and educators. The dataset includes 10,969 Kazakh questions and 12,031 Russian questions, reflecting Kazakhstan’s bilingual education system and rich local context. Our evaluation of several state-of-the-art multilingual models (Llama3.1, Qwen-2.5, GPT-4, and DeepSeek V3) demonstrates substantial room for improvement, as even the best-performing models struggle to achieve competitive performance in Kazakh and Russian. These findings highlight significant performance gaps compared to high-resource languages. We hope that our dataset will enable further research and development of Kazakh-centric LLMs.</abstract>
@@ -10298,7 +10298,7 @@
       <author><first>Rituraj</first><last>Joshi</last><affiliation>Cerebras Systems, Inc</affiliation></author>
       <author><first>Maiya</first><last>Goloburda</last></author>
       <author><first>Yuxia</first><last>Wang</last></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <author><first>Fajri</first><last>Koto</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <pages>14509-14538</pages>
       <abstract>Instruction tuning in low-resource languages remains underexplored due to limited text data, particularly in government and cultural domains. To address this, we introduce and open-source a large-scale (10,600 samples) instruction-following (IFT) dataset, covering key institutional and cultural knowledge relevant to Kazakhstan. Our dataset enhances LLMs’ understanding of procedural, legal, and structural governance topics. We employ LLM-assisted data generation, comparing open-weight and closed-weight models for dataset construction, and select GPT-4o as the backbone. Each entity of our dataset undergoes full manual verification to ensure high quality. We also show that fine-tuning Qwen, Falcon, and Gemma on our dataset leads to consistent performance improvements in both multiple-choice and generative tasks, demonstrating the potential of LLM-assisted instruction tuning for low-resource languages.</abstract>
@@ -10342,7 +10342,7 @@
       <author><first>Yue</first><last>Zhao</last><affiliation>University of Southern California</affiliation></author>
       <author><first>Nedim</first><last>Lipka</last><affiliation>Adobe Systems</affiliation></author>
       <author><first>Seunghyun</first><last>Yoon</last><affiliation>Adobe Research</affiliation></author>
-      <author><first>Ting-Hao Kenneth</first><last>Huang</last><affiliation>Pennsylvania State University</affiliation></author>
+      <author id="ting-hao-huang"><first>Ting-Hao Kenneth</first><last>Huang</last><affiliation>Pennsylvania State University</affiliation></author>
       <author><first>Zichao</first><last>Wang</last><affiliation>Adobe Research</affiliation></author>
       <author><first>Puneet</first><last>Mathur</last><affiliation>Adobe Systems</affiliation></author>
       <author><first>Soumyabrata</first><last>Pal</last><affiliation>Adobe Systems</affiliation></author>
@@ -10365,7 +10365,7 @@
       <author><first>Luyao</first><last>Cheng</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Chong</first><last>Deng</last></author>
       <author><first>Qian</first><last>Chen</last></author>
-      <author><first>Wen</first><last>Wang</last></author>
+      <author id="wen-wang"><first>Wen</first><last>Wang</last></author>
       <author><first>Siqi</first><last>Zheng</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Jiaqing</first><last>Liu</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Hai</first><last>Yu</last><affiliation>Alibaba Group</affiliation></author>
@@ -10490,7 +10490,7 @@
       <author><first>Dongqi</first><last>Cai</last></author>
       <author><first>Rongjie</first><last>Yi</last></author>
       <author><first>Fangming</first><last>Liu</last><affiliation>Huazhong University of Science and Technology</affiliation></author>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last></author>
+      <author><first>Wei</first><last>Liu</last></author>
       <author><first>Jian</first><last>Luan</last><affiliation>Xiaomi Corporation</affiliation></author>
       <author><first>Xiwen</first><last>Zhang</last><affiliation>Helixon Research</affiliation></author>
       <author><first>Nicholas D.</first><last>Lane</last><affiliation>Flower Labs and University of Cambridge</affiliation></author>
@@ -10520,7 +10520,7 @@
       <author><first>Oikantik</first><last>Nath</last><affiliation>Department of Computer Science, Indian Institute of Technology, Madras, Indian Institute of Technology, Madras</affiliation></author>
       <author><first>Hanani</first><last>Bathina</last></author>
       <author><first>Mohammed Safi Ur Rahman</first><last>Khan</last><affiliation>Indian Institute of Technology, Madras, Dhirubhai Ambani Institute Of Information and Communication Technology and Indian Institute of Technology, Madras, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
-      <author><first>Mitesh M</first><last>Khapra</last><affiliation>Indian Institute of Technology, Madras</affiliation></author>
+      <author id="mitesh-m-khapra"><first>Mitesh M</first><last>Khapra</last><affiliation>Indian Institute of Technology, Madras</affiliation></author>
       <pages>14784-14814</pages>
       <abstract>Recent advancements in Vision-Language Models (VLMs) have opened new possibilities in automatic grading of handwritten student responses, particularly in mathematics. However, a comprehensive study to test the ability of VLMs to evaluate and reason over handwritten content remains absent. To address this gap, we introduce FERMAT, a benchmark designed to assess VLMs’ ability to detect, localize and correct errors in handwritten mathematical content. FERMAT spans four key error dimensions - computational, conceptual, notational, and presentation - and comprises over 2,200 handwritten math solutions derived from 609 manually curated problems from grades 7-12 with intentionally introduced perturbations. Using FERMAT we benchmark nine VLMs across three tasks: error detection, localization, and correction. Our results reveal significant shortcomings in current VLMs in reasoning over handwritten text, with Gemini-1.5-Pro achieving the highest error correction rate (77%). We also observed that some models struggle with processing handwritten content, as their accuracy improves when handwritten inputs are replaced with printed text or images. These findings highlight the limitations of current VLMs and reveal new avenues for improvement. We will release FERMAT and all the associated resources in the open-source to drive further research.</abstract>
       <url hash="db52d8dd">2025.acl-long.720</url>
@@ -10612,7 +10612,7 @@
       <title>Two Intermediate Translations Are Better Than One: Fine-tuning <fixed-case>LLM</fixed-case>s for Document-level Translation Refinement</title>
       <author><first>Yichen</first><last>Dong</last></author>
       <author><first>Xinglin</first><last>Lyu</last><affiliation>Huawei Technologies Ltd.</affiliation></author>
-      <author><first>Junhui</first><last>Li</last><affiliation>Soochow University, China</affiliation></author>
+      <author id="junhui-li"><first>Junhui</first><last>Li</last><affiliation>Soochow University, China</affiliation></author>
       <author><first>Daimeng</first><last>Wei</last></author>
       <author><first>Min</first><last>Zhang</last><affiliation>Huawei Technologies Ltd.</affiliation></author>
       <author><first>Shimin</first><last>Tao</last><affiliation>Huawei Technologies Ltd.</affiliation></author>
@@ -10627,7 +10627,7 @@
       <title>Circuit Compositions: Exploring Modular Structures in Transformer-Based Language Models</title>
       <author><first>Philipp</first><last>Mondorf</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <author><first>Sondre</first><last>Wold</last></author>
-      <author><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <pages>14934-14955</pages>
       <abstract>A fundamental question in interpretability research is to what extent neural networks, particularly language models, implement reusable functions through subnetworks that can be composed to perform more complex tasks. Recent advances in mechanistic interpretability have made progress in identifying circuits, the minimal computational subgraphs responsible for a model’s behavior on specific tasks. However, most studies focus on identifying circuits for individual tasks without investigating how functionally similar circuits relate to each other. To address this gap, we study the modularity of neural networks by analyzing circuits for highly compositional subtasks within a transformer-based language model. Specifically, given a probabilistic context-free grammar, we identify and compare circuits responsible for ten modular string-edit operations. Our results indicate that functionally similar circuits exhibit both notable node overlap and cross-task faithfulness. Moreover, we demonstrate that the circuits identified can be reused and combined through set operations to represent more complex functional model capabilities.</abstract>
       <url hash="c5005bda">2025.acl-long.727</url>
@@ -10638,7 +10638,7 @@
       <title>Can <fixed-case>LLM</fixed-case>s Ground when they (Don’t) Know: A Study on Direct and Loaded Political Questions</title>
       <author><first>Clara</first><last>Lachenmaier</last></author>
       <author><first>Judith</first><last>Sieker</last><affiliation>Universität Bielefeld</affiliation></author>
-      <author><first>Sina</first><last>Zarrieß</last><affiliation>Bielefeld University</affiliation></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last><affiliation>Bielefeld University</affiliation></author>
       <pages>14956-14975</pages>
       <abstract>Communication among humans relies on conversational grounding, allowing interlocutors to reach mutual understanding even when they do not have perfect knowledge and must resolve discrepancies in each other’s beliefs. This paper investigates how large language models (LLMs) manage common ground in cases where they (don’t) possess knowledge, focusing on facts in the political domain where the risk of misinformation and grounding failure is high. We examine LLMs’ ability to answer direct knowledge questions and loaded questions that presuppose misinformation.We evaluate whether loaded questions lead LLMs to engage in active grounding and correct false user beliefs, in connection to their level of knowledge and their political bias.Our findings highlight significant challenges in LLMs’ ability to engage in grounding and reject false user beliefs, raising concerns about their role in mitigating misinformation in political discourse.</abstract>
       <url hash="abbb051a">2025.acl-long.728</url>
@@ -10724,7 +10724,7 @@
       <author><first>Xin</first><last>Zhang</last><affiliation>Ant International</affiliation></author>
       <author><first>Xuanhong</first><last>Li</last><affiliation>Wuhan University</affiliation></author>
       <author><first>Chong</first><last>Teng</last></author>
-      <author><first>Donghong</first><last>Ji</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
       <author><first>Zhuang</first><last>Li</last><affiliation>Royal Melbourne Institute of Technology</affiliation></author>
       <pages>15077-15099</pages>
       <abstract>Large Language Models (LLMs) excel in various natural language processing tasks but remain vulnerable to generating harmful content or being exploited for malicious purposes. Although safety alignment datasets have been introduced to mitigate such risks through supervised fine-tuning (SFT), these datasets often lack comprehensive risk coverage. Most existing datasets focus primarily on lexical diversity while neglecting other critical dimensions. To address this limitation, we propose a novel analysis framework to systematically measure the risk coverage of alignment datasets across three essential dimensions: Lexical Diversity, Malicious Intent, and Jailbreak Tactics. We further introduce TRIDENT, an automated pipeline that leverages persona-based, zero-shot LLM generation to produce diverse and comprehensive instructions spanning these dimensions. Each harmful instruction is paired with an ethically aligned response, resulting in two datasets: TRIDENT-Core, comprising 26,311 examples, and TRIDENT-Edge, with 18,773 examples. Fine-tuning Llama 3.1-8B on TRIDENT-Edge demonstrates substantial improvements, achieving an average 14.29% reduction in Harm Score, and a 20% decrease in Attack Success Rate compared to the best-performing baseline model fine-tuned on the WildBreak dataset.</abstract>
@@ -10737,7 +10737,7 @@
       <author><first>Jungseob</first><last>Lee</last><affiliation>Korea University</affiliation></author>
       <author><first>Seongtae</first><last>Hong</last><affiliation>Korea University</affiliation></author>
       <author><first>Hyeonseok</first><last>Moon</last><affiliation>Korea University</affiliation></author>
-      <author><first>Heuiseok</first><last>Lim</last></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last></author>
       <pages>15100-15119</pages>
       <abstract>Adapting large language models to other languages typically employs supervised fine-tuning (SFT) as a standard approach. However, it often suffers from an overemphasis on English performance, a phenomenon that is especially pronounced in data-constrained environments. To overcome these challenges, we propose Cross-Lingual Optimization (CLO) that efficiently transfers an English-centric LLM to a target language while preserving its English capabilities. CLO utilizes publicly available English SFT data and a translation model to enable cross-lingual transfer. We conduct experiments using five models on six languages, each possessing varying levels of resource. Our results show that CLO consistently outperforms SFT in both acquiring target language proficiency and maintaining English performance. Remarkably, in low-resource languages, CLO with only 3,200 samples surpasses SFT with 6,400 samples, demonstrating that CLO can achieve better performance with less data. Furthermore, we find that SFT is particularly sensitive to data quantity in medium and low-resource languages, whereas CLO remains robust. Our comprehensive analysis emphasizes the limitations of SFT and incorporates additional training strategies in CLO to enhance efficiency.</abstract>
       <url hash="09d08423">2025.acl-long.734</url>
@@ -10890,7 +10890,7 @@
     <paper id="744">
       <title>A New Formulation of <fixed-case>Z</fixed-case>ipf’s Meaning-Frequency Law through Contextual Diversity</title>
       <author><first>Ryo</first><last>Nagata</last><affiliation>RIKEN and Konan University</affiliation></author>
-      <author><first>Kumiko</first><last>Tanaka-Ishii</last><affiliation>Waseda University</affiliation></author>
+      <author id="kumiko-tanaka-ishii"><first>Kumiko</first><last>Tanaka-Ishii</last><affiliation>Waseda University</affiliation></author>
       <pages>15323-15335</pages>
       <abstract>This paper proposes formulating Zipf’s meaning-frequency law, the power law between word frequency and the number of meanings, as a relationship between word frequency and contextual diversity. The proposed formulation quantifies meaning counts as contextual diversity, which is based on the directions of contextualized word vectors obtained from a Language Model (LM). This formulation gives a new interpretation to the law and also enables us to examine it for a wider variety of words and corpora than previous studies have explored. In addition, this paper shows that the law becomes unobservable when the size of the LM used is small and that autoregressive LMs require much more parameters than masked LMs to be able to observe the law.</abstract>
       <url hash="fb729b07">2025.acl-long.744</url>
@@ -10907,7 +10907,7 @@
       <author><first>Qi</first><last>Cao</last><affiliation>Institute of Computing Technology, Chinese Academy of Sciences, China</affiliation></author>
       <author><first>Dawei</first><last>Yin</last><affiliation>Baidu</affiliation></author>
       <author><first>Huawei</first><last>Shen</last><affiliation>Institute of Computing Technology, Chinese Academy of Sciences</affiliation></author>
-      <author><first>Xueqi</first><last>Cheng</last><affiliation>Institute of Computing Technology, Chinese Academy</affiliation></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last><affiliation>Institute of Computing Technology, Chinese Academy</affiliation></author>
       <pages>15336-15354</pages>
       <abstract>Despite near-perfect results reported in the literature, the effectiveness of model editing in real-world applications remains unclear. To bridge this gap, we introduce QAEdit, a new benchmark aligned with widely used question answering (QA) datasets, and WILD, a task-agnostic evaluation framework designed to better reflect real-world usage of model editing. Our single editing experiments show that current editing methods perform substantially worse than previously reported (38.5% vs. 96.8%). We demonstrate that it stems from issues in the synthetic evaluation practices of prior work. Among them, the most severe is the use of teacher forcing during testing, which leaks both content and length of the ground truth, leading to overestimated performance. Furthermore, we simulate practical deployment by sequential editing, revealing that current approaches fail drastically with only 1000 edits. This work calls for a shift in model editing research toward rigorous evaluation and the development of robust, scalable methods that can reliably update knowledge in LLMs for real-world use.</abstract>
       <url hash="e597f9dc">2025.acl-long.745</url>
@@ -11017,7 +11017,7 @@
     <paper id="753">
       <title><fixed-case>MISP</fixed-case>-Meeting: A Real-World Dataset with Multimodal Cues for Long-form Meeting Transcription and Summarization</title>
       <author><first>HangChen</first><last>HangChen</last></author>
-      <author><first>Chao-Han Huck</first><last>Yang</last><affiliation>NVIDIA Research</affiliation></author>
+      <author id="chao-han-huck-yang"><first>Chao-Han Huck</first><last>Yang</last><affiliation>NVIDIA Research</affiliation></author>
       <author><first>Jia-Chen</first><last>Gu</last><affiliation>University of California, Los Angeles</affiliation></author>
       <author><first>Sabato Marco</first><last>Siniscalchi</last><affiliation>University of Palermo and Norwegian Institute of Technology</affiliation></author>
       <author><first>Jun</first><last>Du</last><affiliation>University of Science and Technology of China</affiliation></author>
@@ -11138,10 +11138,10 @@
     <paper id="762">
       <title><fixed-case>MLAS</fixed-case>-<fixed-case>L</fixed-case>o<fixed-case>RA</fixed-case>: Language-Aware Parameters Detection and <fixed-case>L</fixed-case>o<fixed-case>RA</fixed-case>-Based Knowledge Transfer for Multilingual Machine Translation</title>
       <author><first>Tianyu</first><last>Dong</last></author>
-      <author id="bo-li"><first>Bo</first><last>Li</last><affiliation>Baidu Inc</affiliation></author>
+      <author><first>Bo</first><last>Li</last><affiliation>Baidu Inc</affiliation></author>
       <author><first>Jinsong</first><last>Liu</last></author>
       <author><first>Shaolin</first><last>Zhu</last><affiliation>Tianjin University</affiliation></author>
-      <author><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
       <pages>15645-15660</pages>
       <abstract>Large language models (LLMs) have achieved remarkable progress in multilingual machine translation (MT), demonstrating strong performance even with limited parallel data. However, effectively fine-tuning LLMs for MT is challenging due to parameter interference, which arises from the conflicting demands of different language pairs and the risk of overwriting pre-trained knowledge. To address this issue, we propose <b>MLAS-LoRA</b>, a novel multiple language-aware LoRA knowledge transfer framework. MLAS-LoRA efficiently adapts LLMs to MT by selectively transferring knowledge from a large teacher to a small student model. Our approach first evaluates the awareness of neurons and extracts linguistic knowledge in the teacher model to both the general MT task and specific language pairs.We then propose a multiple language-specific LoRA architecture to inject the extracted knowledge into the student model. During fine-tuning, only the parameters of the relevant language-general and language-specific LoRA modules are updated. Experimental results on diverse multilingual language pairs demonstrate that MLAS-LoRA significantly outperforms strong baselines by +1.7 BLEU on average, including standard fine-tuning and other parameter-efficient methods.</abstract>
       <url hash="7e1f2ead">2025.acl-long.762</url>
@@ -11192,7 +11192,7 @@
       <author><first>Maria</first><last>Manina</last></author>
       <author><first>Daria</first><last>Ignatenko</last></author>
       <author><first>Artem</first><last>Shelmanov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
-      <author><first>Chris</first><last>Biemann</last><affiliation>U Hamburg</affiliation></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last><affiliation>U Hamburg</affiliation></author>
       <pages>15702-15720</pages>
       <abstract>Comparative Question Answering (CQA) lies at the intersection of Question Answering, Argument Mining, and Summarization. It poses unique challenges due to the inherently subjective nature of many questions and the need to integrate diverse perspectives. Although the CQA task can be addressed using recently emerged instruction-following Large Language Models (LLMs), challenges such as hallucinations in their outputs and the lack of transparent argument provenance remain significant limitations.To address these challenges, we construct a manually curated dataset comprising arguments annotated with their relevance. These arguments are further used to answer comparative questions, enabling precise traceability and faithfulness. Furthermore, we define explicit criteria for an “ideal” comparison and introduce a benchmark for evaluating the outputs of various Retrieval-Augmented Generation (RAG) models with respect to argument relevance. All code and data are publicly released to support further research.</abstract>
       <url hash="58358770">2025.acl-long.765</url>
@@ -11212,7 +11212,7 @@
       <author><first>Kun</first><last>Ji</last></author>
       <author><first>Qing</first><last>Huang</last></author>
       <author><first>Xinyang</first><last>Hu</last></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <author><first>Qianhe</first><last>Zheng</last></author>
       <pages>15721-15749</pages>
       <abstract>We introduce **FinanceReasoning**, a novel benchmark designed to evaluate the reasoning capabilities of large reasoning models (LRMs) in financial numerical reasoning problems. Compared to existing benchmarks, our work provides three key advancements. (1) **Credibility**: We update 15.6% of the questions from four public datasets, annotating 908 new questions with detailed Python solutions and rigorously refining evaluation standards. This enables an accurate assessment of the reasoning improvements of LRMs. (2) **Comprehensiveness**: FinanceReasoning covers 67.8% of financial concepts and formulas, significantly surpassing existing datasets. Additionally, we construct 3,133 Python-formatted functions, which enhances LRMs’ financial reasoning capabilities through refined knowledge (*e.g.*, 83.2% <tex-math>\rightarrow</tex-math> 91.6% for GPT-4o). (3) **Challenge**: Models are required to apply multiple financial formulas for precise numerical reasoning on 238 *Hard* problems. The best-performing model (*i.e.*, OpenAI o1 with PoT) achieves 89.1% accuracy, yet LRMs still face challenges in numerical precision. We demonstrate that combining Reasoner and Programmer models can effectively enhance LRMs’ performance (*e.g.*, 83.2% <tex-math>\rightarrow</tex-math> 87.8% for DeepSeek-R1). Our work paves the way for future research on evaluating and improving LRMs in domain-specific complex reasoning tasks.</abstract>
@@ -11238,7 +11238,7 @@
       <author><first>Peiyu</first><last>Liu</last><affiliation>University of International Business and Economics</affiliation></author>
       <author><first>Tianwen</first><last>Wei</last><affiliation>Xiaomi</affiliation></author>
       <author><first>Bo</first><last>Zhu</last></author>
-      <author><first>Xin</first><last>Zhao</last><affiliation>Renmin University of China</affiliation></author>
+      <author id="wayne-xin-zhao"><first>Xin</first><last>Zhao</last><affiliation>Renmin University of China</affiliation></author>
       <author><first>Shuicheng</first><last>Yan</last><affiliation>National University of Singapore</affiliation></author>
       <pages>15800-15811</pages>
       <abstract>In this work, we investigate how to sparsify a pre-trained dense large language model into a mixture-of-experts (MoE) architecture for faster inference. Our approach applies mask matrix to the activations for each expert, constrained by <tex-math>L_0</tex-math> regularization to minimize the number of activated parameters. Starting with all parameters active, the model is progressively sparsified during training, ensuring minimal performance loss. This approach proves more efficient than one-shot sparsification techniques, which typically require significant resources for performance recovery. Moreover, our approach automatically identifies shared, token-specific, and inactive experts, allowing for more efficient allocation of computational resources. Through extensive experiments, we achieve up to 97% performance retention on downstream tasks with only 50% of the feed-forward parameters activated in dense models. Beyond enhancing inference efficiency, this strategy of sharing computational units among experts presents a valuable framework for designing more generalized and efficient MoE architectures, opening avenues for future advancements in expert-based models.</abstract>
@@ -11438,8 +11438,8 @@
     <paper id="783">
       <title><fixed-case>C</fixed-case>risis<fixed-case>TS</fixed-case>: Coupling Social Media Textual Data and Meteorological Time Series for Urgency Classification</title>
       <author><first>Romain</first><last>Meunier</last><affiliation>IRIT</affiliation></author>
-      <author><first>Farah</first><last>Benamara</last><affiliation>Institut de recherche en informatique de toulouse</affiliation></author>
-      <author><first>Véronique</first><last>Moriceau</last><affiliation>IRIT, université de Toulouse</affiliation></author>
+      <author id="farah-benamara"><first>Farah</first><last>Benamara</last><affiliation>Institut de recherche en informatique de toulouse</affiliation></author>
+      <author id="veronique-moriceau"><first>Véronique</first><last>Moriceau</last><affiliation>IRIT, université de Toulouse</affiliation></author>
       <author><first>Zhongzheng</first><last>Qiao</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Savitha</first><last>Ramasamy</last><affiliation>Institute for Infocomm Research, Agency for Science, Technology and Research, Singapore</affiliation></author>
       <pages>16082-16099</pages>
@@ -11509,7 +11509,7 @@
       <author><first>Zhiyuan</first><last>Zhu</last></author>
       <author><first>Yusheng</first><last>Liao</last><affiliation>Shanghai Jiaotong University</affiliation></author>
       <author><first>Zhe</first><last>Chen</last></author>
-      <author id="yuhao-wang"><first>Yuhao</first><last>Wang</last></author>
+      <author><first>Yuhao</first><last>Wang</last></author>
       <author><first>Yunfeng</first><last>Guan</last><affiliation>Shanghai Jiaotong University</affiliation></author>
       <author><first>Yanfeng</first><last>Wang</last><affiliation>Shanghai Jiao Tong University</affiliation></author>
       <author><first>Yu</first><last>Wang</last><affiliation>Shanghai Jiao Tong University</affiliation></author>
@@ -11626,7 +11626,7 @@
       <title>Unmasking Style Sensitivity: A Causal Analysis of Bias Evaluation Instability in Large Language Models</title>
       <author><first>Jiaxu</first><last>Zhao</last></author>
       <author><first>Meng</first><last>Fang</last><affiliation>University of Liverpool and Eindhoven University of Technology</affiliation></author>
-      <author id="kun-zhang"><first>Kun</first><last>Zhang</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and Carnegie Mellon University</affiliation></author>
+      <author><first>Kun</first><last>Zhang</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and Carnegie Mellon University</affiliation></author>
       <author><first>Mykola</first><last>Pechenizkiy</last><affiliation>Eindhoven University of Technology</affiliation></author>
       <pages>16314-16338</pages>
       <abstract>Natural language processing applications are increasingly prevalent, but social biases in their outputs remain a critical challenge. While various bias evaluation methods have been proposed, these assessments show unexpected instability when input texts undergo minor stylistic changes. This paper conducts a comprehensive analysis of how different style transformations impact bias evaluation results across multiple language models and bias types using causal inference techniques. Our findings reveal that formality transformations significantly affect bias scores, with informal style showing substantial bias reductions (up to 8.33% in LLaMA-2-13B). We identify appearance bias, sexual orientation bias, and religious bias as most susceptible to style changes, with variations exceeding 20%. Larger models demonstrate greater sensitivity to stylistic variations, with bias measurements fluctuating up to 3.1% more than in smaller models. These results highlight critical limitations in current bias evaluation methods and emphasize the need for reliable and fair assessments of language models.</abstract>
@@ -11637,7 +11637,7 @@
     <paper id="797">
       <title><fixed-case>M</fixed-case>ock<fixed-case>C</fixed-case>onf: A Student Interpretation Dataset: Analysis, Word- and Span-level Alignment and Baselines</title>
       <author><first>Dávid</first><last>Javorský</last><affiliation>, Charles University Prague</affiliation></author>
-      <author><first>Ondřej</first><last>Bojar</last><affiliation>Charles University Prague</affiliation></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last><affiliation>Charles University Prague</affiliation></author>
       <author><first>François</first><last>Yvon</last><affiliation>ISIR, Sorbonne Université &amp; CNRS</affiliation></author>
       <pages>16339-16356</pages>
       <abstract>In simultaneous interpreting, an interpreter renders the speech into another language with a very short lag, much sooner than sentences are finished. In order to understand and later reproduce this dynamic and complex task automatically, we need specialized datasets and tools for analysis, monitoring, and evaluation, such as parallel speech corpora, and tools for their automatic annotation. Existing parallel corpora of translated texts and associated alignment algorithms hardly fill this gap, as they fail to model long-range interactions between speech segments or specific types of divergences (e.g. shortening, simplification, functional generalization) between the original and interpreted speeches. In this work, we develop and explore MockConf, a student interpretation dataset that was collected from Mock Conferences run as part of the students’ curriculum. This dataset contains 7 hours of recordings in 5 European languages, transcribed and aligned at the level of spans and words. We further implement and release InterAlign, a modern web-based annotation tool for parallel word and span annotations on long inputs, suitable for aligning simultaneous interpreting. We propose metrics for the evaluation and a baseline for automatic alignment. Dataset and tools will be released to the community.</abstract>
@@ -11652,7 +11652,7 @@
       <author><first>Mingyang</first><last>Wang</last></author>
       <author><first>Zifeng</first><last>Ding</last></author>
       <author><first>Helmut</first><last>Schmid</last><affiliation>Center for Information and Language Processing</affiliation></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <pages>16357-16374</pages>
       <abstract>This paper introduces BMIKE-53, a comprehensive benchmark for cross-lingual in-context knowledge editing (IKE), spanning 53 languages and three KE datasets: zsRE, CounterFact, and WikiFactDiff. Cross-lingual KE, which requires knowledge edited in one language to generalize across diverse languages while preserving unrelated knowledge, remains underexplored. To address this, we systematically evaluate IKE under zero-shot, one-shot, and few-shot setups, including tailored metric-specific demonstrations. Our findings reveal that model scale and demonstration alignment critically govern cross-lingual editing efficacy, with larger models and tailored demonstrations significantly improving performance. Linguistic properties, particularly script type, strongly influence outcomes, with non-Latin languages underperforming due to issues like language confusion.</abstract>
       <url hash="7b264666">2025.acl-long.798</url>
@@ -11685,7 +11685,7 @@
       <title>Enhancing Event-centric News Cluster Summarization via Data Sharpening and Localization Insights</title>
       <author><first>Longyin</first><last>Zhang</last></author>
       <author><first>Bowei</first><last>Zou</last><affiliation>A*STAR</affiliation></author>
-      <author><first>AiTi</first><last>Aw</last><affiliation>I2R</affiliation></author>
+      <author id="aiti-aw"><first>AiTi</first><last>Aw</last><affiliation>I2R</affiliation></author>
       <pages>16412-16426</pages>
       <abstract>This paper tackles the challenges of clustering news articles by main events (MEs) and summarizing these clusters, focusing on diverse languages and localized contexts. Our approach consists of four key contributions. First, we investigate the role of dynamic clustering and the integration of various ME references, including event attributions extracted by language models (LMs), in enhancing event-centric clustering. Second, we propose a data-sharpening framework that optimizes the balance between information volume and entropy in input texts, thereby optimizing generated summaries on multiple indicators. Third, we fine-tune LMs with local news articles for cross-lingual temporal question-answering and text summarization, achieving notable improvements in capturing localized contexts. Lastly, we present the first cross-lingual dataset and comprehensive evaluation metrics tailored for the event-centric news cluster summarization pipeline. Our findings enhance the understanding of news summarization across N-gram, event-level coverage, and faithfulness, providing new insights into leveraging LMs for large-scale cross-lingual and localized news analysis.</abstract>
       <url hash="9aa03c47">2025.acl-long.801</url>
@@ -11734,7 +11734,7 @@
       <author><first>Kangyang</first><last>Luo</last><affiliation>Tsinghua University, Tsinghua University</affiliation></author>
       <author><first>Chen</first><last>Qian</last><affiliation>Shanghai Jiaotong University</affiliation></author>
       <author><first>Fanchao</first><last>Qi</last></author>
-      <author><first>Baobao</first><last>Chang</last><affiliation>Peking University</affiliation></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last><affiliation>Peking University</affiliation></author>
       <author><first>Maosong</first><last>Sun</last><affiliation>Tsinghua University</affiliation></author>
       <pages>16469-16488</pages>
       <abstract>Training LLMs on data containing unfamiliar knowledge during the instruction tuning stage can encourage hallucinations. To address this challenge, we introduce NOVA, a novel framework designed to identify high-quality data that aligns well with the LLM’s learned knowledge to reduce hallucinations. NOVA includes Internal Consistency Probing (ICP) and Semantic Equivalence Identification (SEI) to measure how familiar the LLM is with instruction data. Specifically, ICP evaluates the LLM’s understanding of the given instruction by calculating the tailored consistency among multiple self-generated responses. SEI further assesses the familiarity of the LLM with the target response by comparing it to the generated responses, using the proposed semantic clustering and well-designed voting strategy. Finally, to ensure the quality of selected samples, we introduce an expert-aligned reward model, considering characteristics beyond just familiarity. By considering data quality and avoiding unfamiliar data, we can utilize the selected data to effectively align LLMs to follow instructions and hallucinate less. Experiments show that NOVA significantly reduces hallucinations while maintaining a competitive ability to follow instructions.</abstract>
@@ -11764,7 +11764,7 @@
       <author><first>Qianqian</first><last>Xie</last></author>
       <author><first>Christine</first><last>de Kock</last></author>
       <author><first>Sophia</first><last>Ananiadou</last><affiliation>University of Manchester</affiliation></author>
-      <author><first>Eduard</first><last>Hovy</last><affiliation>University of Melbourne and Carnegie Mellon University</affiliation></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last><affiliation>University of Melbourne and Carnegie Mellon University</affiliation></author>
       <pages>16508-16523</pages>
       <abstract>Misinformation is prevalent in various fields such as education, politics, health, etc., causing significant harm to society. However, current methods for cross-domain misinformation detection rely on effort- and resource-intensive fine-tuning and complex model structures. With the outstanding performance of LLMs, many studies have employed them for misinformation detection. Unfortunately, they focus on in-domain tasks and do not incorporate significant sentiment and emotion features (which we jointly call <i>affect</i>). In this paper, we propose RAEmoLLM, the first retrieval augmented (RAG) LLMs framework to address cross-domain misinformation detection using in-context learning based on affective information. RAEmoLLM includes three modules. (1) In the index construction module, we apply an emotional LLM to obtain affective embeddings from all domains to construct a retrieval database. (2) The retrieval module uses the database to recommend top K examples (text-label pairs) from source domain data for target domain contents. (3) These examples are adopted as few-shot demonstrations for the inference module to process the target domain content. The RAEmoLLM can effectively enhance the general performance of LLMs in cross-domain misinformation detection tasks through affect-based retrieval, without fine-tuning. We evaluate our framework on three misinformation benchmarks. Results show that RAEmoLLM achieves significant improvements compared to the other few-shot methods on three datasets, with the highest increases of 15.64%, 31.18%, and 15.73% respectively. This project is available at https://github.com/lzw108/RAEmoLLM.</abstract>
       <url hash="60285c57">2025.acl-long.806</url>
@@ -11813,7 +11813,7 @@
       <author><first>Elena Sofia</first><last>Ruzzetti</last><affiliation>Università degli Studi di Roma Tor Vergata</affiliation></author>
       <author><first>Giancarlo A.</first><last>Xompero</last><affiliation>University of Rome Tor Vergata and Almawave SpA</affiliation></author>
       <author><first>Davide</first><last>Venditti</last></author>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last><affiliation>University of Rome Tor Vergata</affiliation></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last><affiliation>University of Rome Tor Vergata</affiliation></author>
       <pages>16572-16592</pages>
       <abstract>Large Language Models (LLMs) memorize, and thus, among huge amounts of uncontrolled data, may memorize Personally Identifiable Information (PII), which should not be stored and, consequently, not leaked. In this paper, we introduce Private Memorization Editing (PME), an approach for preventing private data leakage that turns an apparent limitation, that is, the LLMs’ memorization ability, into a powerful privacy defense strategy. While attacks against LLMs have been performed exploiting previous knowledge regarding their training data, our approach aims to exploit the same kind of knowledge in order to make a model more robust. We detect a memorized PII and then mitigate the memorization of PII by editing a model knowledge of its training data. We verify that our procedure does not affect the underlying language model while making it more robust against privacy Training Data Extraction attacks. We demonstrate that PME can effectively reduce the number of leaked PII in a number of configurations, in some cases even reducing the accuracy of the privacy attacks to zero.</abstract>
       <url hash="0d44168f">2025.acl-long.810</url>
@@ -11993,7 +11993,7 @@
       <author><first>Yasuhiro</first><last>Fujiwara</last><affiliation>NTT</affiliation></author>
       <author><first>Ayaka</first><last>Matsumoto</last><affiliation>NTT Communications</affiliation></author>
       <author><first>Narichika</first><last>Nomoto</last><affiliation>NTT, The University of Tokyo</affiliation></author>
-      <author><first>Yoshihide</first><last>Sato</last><affiliation>NTT</affiliation></author>
+      <author id="yoshihide-kato"><first>Yoshihide</first><last>Sato</last><affiliation>NTT</affiliation></author>
       <pages>16831-16861</pages>
       <abstract>Large language models enhance collaborative task execution in multi-agent systems. Current studies break complex task into manageable tasks, but agents lack understanding of the overall task and how others approach their tasks, hindering synergy and integration.We propose a method called knowledgeable <b>
           <i>A</i></b>gents to design and perform <b>
@@ -12006,7 +12006,7 @@
     <paper id="824">
       <title>Logical forms complement probability in understanding language model (and human) performance</title>
       <author><first>Yixuan</first><last>Wang</last><affiliation>University of Chicago</affiliation></author>
-      <author><first>Freda</first><last>Shi</last><affiliation>University of Waterloo and Vector Institute</affiliation></author>
+      <author id="freda-shi"><first>Freda</first><last>Shi</last><affiliation>University of Waterloo and Vector Institute</affiliation></author>
       <pages>16862-16877</pages>
       <abstract>With the increasing interest in using large language models (LLMs) for planning in natural language, understanding their behaviors becomes an important research question. This work conducts a systematic investigation of LLMs’ ability to perform logical reasoning in natural language. We introduce a controlled dataset of hypothetical and disjunctive syllogisms in propositional and modal logic and use it as the testbed for understanding LLM performance. Our results lead to novel insights in predicting LLM behaviors: in addition to the probability of input, logical forms should be considered as important factors. In addition, we show similarities and discrepancies between the logical reasoning performances of humans and LLMs by collecting and comparing behavioral data from both.</abstract>
       <url hash="7b9eb240">2025.acl-long.824</url>
@@ -12023,7 +12023,7 @@
       <author><first>Lei</first><last>Huang</last></author>
       <author><first>Ting</first><last>Liu</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <author><first>Bing</first><last>Qin</last><affiliation>Harbin Institute of Technology</affiliation></author>
-      <author><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
       <pages>16878-16895</pages>
       <abstract>Large language models (LLMs) have demonstrated impressive instruction following capabilities, while still struggling to accurately manage the length of the generated text, which is a fundamental requirement in many real-world applications. Existing length control methods involve fine-tuning the parameters of LLMs, which is inefficient and suboptimal for practical use. In this paper, we propose a novel iterative sampling framework for text length control, integrating the Metropolis-Hastings algorithm with an importance sampling acceleration strategy. This framework efficiently and reliably regulates LLMs to generate length-constrained text without modifying the underlying parameters, thereby preserving the original capabilities of LLMs. Experimental results demonstrate that our framework achieves almost 100% success rates of length control on Llama3.1 for tasks such as length-controlled abstractive summarization and length-constrained instruction following, with minimal additional computational overhead. This also highlights the significant potential of our method for precise length control across a broader range of applications, without compromising the versatility of LLMs.</abstract>
       <url hash="888a4eec">2025.acl-long.825</url>
@@ -12053,7 +12053,7 @@
     <paper id="827">
       <title>Global Eye: Breaking the “Fixed Thinking Pattern” during the Instruction Expansion Process</title>
       <author><first>Wenxuan</first><last>Lu</last></author>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last></author>
+      <author><first>Wei</first><last>Liu</last></author>
       <author><first>Jian</first><last>Luan</last><affiliation>Xiaomi Corporation</affiliation></author>
       <author><first>Bin</first><last>Wang</last><affiliation>AI Lab, Xiaomi Inc.</affiliation></author>
       <author><first>Songhao</first><last>Jiang</last></author>
@@ -12103,7 +12103,7 @@
       <title><fixed-case>E</fixed-case>vent<fixed-case>RAG</fixed-case>: Enhancing <fixed-case>LLM</fixed-case> Generation with Event Knowledge Graphs</title>
       <author><first>Zairun</first><last>Yang</last></author>
       <author><first>Yilin</first><last>Wang</last></author>
-      <author><first>Zhengyan</first><last>Shi</last></author>
+      <author id="zhengyan-shi"><first>Zhengyan</first><last>Shi</last></author>
       <author><first>Yuan</first><last>Yao</last><affiliation>Zhejiang University</affiliation></author>
       <author><first>Lei</first><last>Liang</last></author>
       <author><first>Keyan</first><last>Ding</last><affiliation>Zhejiang University</affiliation></author>
@@ -12271,7 +12271,7 @@
       <author><first>Yiwei</first><last>Wang</last><affiliation>University of California, Merced</affiliation></author>
       <author><first>Junfeng</first><last>Fang</last></author>
       <author><first>Pengliang</first><last>Ji</last></author>
-      <author><first>Xueqi</first><last>Cheng</last><affiliation>Institute of Computing Technology, Chinese Academy</affiliation></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last><affiliation>Institute of Computing Technology, Chinese Academy</affiliation></author>
       <pages>17198-17208</pages>
       <abstract>The knowledge within large language models (LLMs) may become outdated quickly. While in-context editing (ICE) is currently the most effective method for knowledge editing (KE), it is constrained by the black-box modeling of LLMs and thus lacks interpretability. Our work aims to elucidate the superior performance of ICE in KE by analyzing the impacts of in-context new knowledge on token-wise distributions. We observe that despite a significant boost in logits of the new knowledge, the performance of ICE is still hindered by stubborn knowledge. We propose a novel approach termed Decoding by Contrasting Knowledge (DeCK). DeCK derives the distribution of the next token by contrasting the logits obtained from the newly edited knowledge guided by ICE with those from the unedited parametric knowledge. Our experiments demonstrate that DeCK enhances the confidence of LLMs in edited facts. For instance, it improves the performance of LLaMA3-8B-instruct on MQuAKE by up to 219%, demonstrating its capability to strengthen ICE. DeCK can be easily integrated into any ICE method as a decoding component to enhance editing capabilities.</abstract>
       <url hash="3120b9a2">2025.acl-long.841</url>
@@ -12295,7 +12295,7 @@
       <title>Improving Chain-of-Thought Reasoning via Quasi-Symbolic Abstractions</title>
       <author><first>Leonardo</first><last>Ranaldi</last></author>
       <author><first>Marco</first><last>Valentino</last><affiliation>University of Sheffield</affiliation></author>
-      <author><first>Andre</first><last>Freitas</last><affiliation>Idiap Research Institute and University of Manchester</affiliation></author>
+      <author id="andre-freitas"><first>Andre</first><last>Freitas</last><affiliation>Idiap Research Institute and University of Manchester</affiliation></author>
       <pages>17222-17240</pages>
       <abstract>Chain-of-Though (CoT) represents a common strategy for reasoning in Large Language Models (LLMs) by decomposing complex tasks into intermediate inference steps. However, explanations generated via CoT are susceptible to content biases that negatively affect their robustness and faithfulness. To mitigate existing limitations, recent work has proposed using logical formalisms coupled with external symbolic solvers. However, fully symbolic approaches possess the bottleneck of requiring a complete translation from natural language to formal languages, a process that affects efficiency and flexibility. To achieve a trade-off, this paper investigates methods to disentangle content from logical reasoning without a complete formalisation. In particular, we present QuaSAR (for Quasi-Symbolic Abstract Reasoning), a variation of CoT that guides LLMs to operate at a higher level of abstraction via quasi-symbolic explanations. Our framework leverages the capability of LLMs to formalise only relevant variables and predicates, enabling the coexistence of symbolic elements with natural language. We show the impact of QuaSAR for in-context learning and for constructing demonstrations to improve the reasoning capabilities of smaller models. Our experiments show that quasi-symbolic abstractions can improve CoT-based methods by up to 8% accuracy, enhancing robustness and consistency on challenging adversarial variations on both natural language (i.e. MMLU-Redux) and symbolic reasoning tasks (i.e., GSM-Symbolic).</abstract>
       <url hash="23c72859">2025.acl-long.843</url>
@@ -12380,7 +12380,7 @@
       <author><first>Sean</first><last>Papay</last><affiliation>University of Stuttgart</affiliation></author>
       <author><first>Yarik</first><last>Menchaca Resendiz</last></author>
       <author><first>Aswathy</first><last>Velutharambath</last><affiliation>University of Stuttgart, Universität Stuttgart</affiliation></author>
-      <author><first>Amelie</first><last>Wuehrl</last></author>
+      <author id="amelie-wuhrl"><first>Amelie</first><last>Wuehrl</last></author>
       <author><first>Sabine</first><last>Weber</last></author>
       <author><first>Roman</first><last>Klinger</last><affiliation>Otto-Friedrich Universität Bamberg</affiliation></author>
       <pages>17331-17348</pages>
@@ -12430,7 +12430,7 @@
       <author><first>Xinwang</first><last>Liu</last><affiliation>National University of Defense Technology</affiliation></author>
       <author><first>Kai</first><last>Lu</last></author>
       <author><first>Minlie</first><last>Huang</last></author>
-      <author id="dongsheng-li"><first>Dongsheng</first><last>Li</last><affiliation>National University of Defense Technology</affiliation></author>
+      <author><first>Dongsheng</first><last>Li</last><affiliation>National University of Defense Technology</affiliation></author>
       <pages>17391-17406</pages>
       <abstract>LLMs demonstrate remarkable utility but remain vulnerable to jailbreak attacks that aim to elicit harmful responses. Existing defenses, including post-training alignment and prompt engineering, rely on training on safety-annotated datasets and safe prompt templates, struggling with adaptability to out-of-distribution (OOD) attacks. Steering internal representations of LLMs provides real-time adjustments to defend against OOD attacks. However, it struggles with maintaining model utility, since modifying the representation disrupts the forward pass of inference. It barely considers the competitive objectives of helpfulness and harmlessness in LLMs. We argue that adversarial game-based approaches promise a solution for conflicts between the two objectives. In this paper, we propose **A**dversarial **G**ame **D**efense (AGD), an adversarial game-based defense method that dynamically adjusts LLMs’ internal representations to achieve a balanced trade-off between helpfulness and harmlessness. AGD first proposes an interquartile range (IQR) method to detect abnormal attention weights and correct the abnormal weights via adversarial training. AGD adopts a bi-level optimization to play a two-player variable-sum game to approach Nash Equilibrium (NE), where the two players adversarially refine head activations for helpfulness and harmlessness respectively. Furthermore, AGD applies an expert model to next-token sampling to generate safer responses. Experiments show that AGD significantly improves LLMs’ safety over all baselines.</abstract>
       <url hash="559c9d39">2025.acl-long.851</url>
@@ -12465,15 +12465,15 @@
       <title>An Expanded Massive Multilingual Dataset for High-Performance Language Technologies (<fixed-case>HPLT</fixed-case>)</title>
       <author><first>Laurie</first><last>Burchell</last><affiliation>Common Crawl Foundation</affiliation></author>
       <author><first>Ona</first><last>De Gibert Bonet</last><affiliation>University of Helsinki</affiliation></author>
-      <author><first>Nikolay</first><last>Arefyev</last><affiliation>University of Oslo</affiliation></author>
+      <author id="nikolay-arefyev"><first>Nikolay</first><last>Arefyev</last><affiliation>University of Oslo</affiliation></author>
       <author><first>Mikko</first><last>Aulamo</last><affiliation>University of Helsinki</affiliation></author>
       <author><first>Marta</first><last>Bañón</last><affiliation>Prompsit Language Engineering</affiliation></author>
       <author><first>Pinzhen</first><last>Chen</last><affiliation>University of Edinburgh</affiliation></author>
       <author><first>Mariia</first><last>Fedorova</last></author>
       <author><first>Liane</first><last>Guillou</last><affiliation>Aveni</affiliation></author>
       <author><first>Barry</first><last>Haddow</last><affiliation>University of Edinburgh</affiliation></author>
-      <author><first>Jan</first><last>Hajič</last><affiliation>Charles University</affiliation></author>
-      <author><first>Jindřich</first><last>Helcl</last><affiliation>Charles University</affiliation></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last><affiliation>Charles University</affiliation></author>
+      <author id="jindrich-helcl"><first>Jindřich</first><last>Helcl</last><affiliation>Charles University</affiliation></author>
       <author><first>Erik</first><last>Henriksson</last><affiliation>University of Turku</affiliation></author>
       <author><first>Mateusz</first><last>Klimaszewski</last><affiliation>Warsaw University of Technology</affiliation></author>
       <author><first>Ville</first><last>Komulainen</last></author>
@@ -12491,11 +12491,11 @@
       <author><first>Proyag</first><last>Pal</last></author>
       <author><first>Jousia</first><last>Piha</last><affiliation>University of Turku</affiliation></author>
       <author><first>Sampo</first><last>Pyysalo</last><affiliation>University of Turku</affiliation></author>
-      <author><first>Gema</first><last>Ramírez-Sánchez</last><affiliation>Universidad de Alicante</affiliation></author>
+      <author id="gema-ramirez-sanchez"><first>Gema</first><last>Ramírez-Sánchez</last><affiliation>Universidad de Alicante</affiliation></author>
       <author><first>David</first><last>Samuel</last><affiliation>University of Oslo</affiliation></author>
       <author><first>Pavel</first><last>Stepachev</last><affiliation>University of Edinburgh, University of Edinburgh</affiliation></author>
-      <author><first>Jörg</first><last>Tiedemann</last><affiliation>University of Helsinki</affiliation></author>
-      <author><first>Dušan</first><last>Variš</last><affiliation>Charles University Prague</affiliation></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last><affiliation>University of Helsinki</affiliation></author>
+      <author id="dusan-varis"><first>Dušan</first><last>Variš</last><affiliation>Charles University Prague</affiliation></author>
       <author><first>Tereza</first><last>Vojtěchová</last><affiliation>Charles University Prague</affiliation></author>
       <author><first>Jaume</first><last>Zaragoza-Bernabeu</last><affiliation>Prompsit Language Engineering</affiliation></author>
       <pages>17452-17485</pages>
@@ -12631,7 +12631,7 @@
       <author><first>Jianfeng</first><last>Li</last><affiliation>Pingan Technology</affiliation></author>
       <author><first>Shaojun</first><last>Wang</last><affiliation>PAII Inc.</affiliation></author>
       <author><first>Jing</first><last>Xiao</last><affiliation>Pingan Group</affiliation></author>
-      <author><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
       <pages>17637-17659</pages>
       <abstract>Dialogue agents powered by Large Language Models (LLMs) show superior performance in various tasks. Despite the better user understanding and human-like responses, their **lack of controllability** remains a key challenge, often leading to unfocused conversations or task failure. To address this, we introduce Standard Operating Procedure (SOP) to regulate dialogue flow. Specifically, we propose **ChatSOP**, a novel SOP-guided Monte Carlo Tree Search (MCTS) planning framework designed to enhance the controllability of LLM-driven dialogue agents. To enable this, we curate a dataset comprising SOP-annotated multi-scenario dialogues, generated using a semi-automated role-playing system with GPT-4o and validated through strict manual quality control. Additionally, we propose a novel method that integrates Chain of Thought reasoning with supervised fine-tuning for SOP prediction and utilizes SOP-guided Monte Carlo Tree Search for optimal action planning during dialogues. Experimental results demonstrate the effectiveness of our method, such as achieving a 27.95% improvement in action accuracy compared to baseline models based on GPT-3.5 and also showing notable gains for open-source models. Dataset and codes are publicly available.</abstract>
       <url hash="11075a0f">2025.acl-long.863</url>
@@ -12680,7 +12680,7 @@
       <author><first>Xin</first><last>Quan</last></author>
       <author><first>Marco</first><last>Valentino</last><affiliation>University of Sheffield</affiliation></author>
       <author><first>Louise A.</first><last>Dennis</last><affiliation>University of Manchester, University of Manchester</affiliation></author>
-      <author><first>Andre</first><last>Freitas</last><affiliation>Idiap Research Institute and University of Manchester</affiliation></author>
+      <author id="andre-freitas"><first>Andre</first><last>Freitas</last><affiliation>Idiap Research Institute and University of Manchester</affiliation></author>
       <pages>17734-17755</pages>
       <abstract>Natural language explanations play a fundamental role in Natural Language Inference (NLI) by revealing how premises logically entail hypotheses. Recent work has shown that the interaction of large language models (LLMs) with theorem provers (TPs) can help verify and improve the validity of NLI explanations. However, TPs require translating natural language into machine-verifiable formal representations, a process that introduces the risk of semantic information loss and unfaithful interpretation, an issue compounded by LLMs’ challenges in capturing critical logical structures with sufficient precision. Moreover, LLMs are still limited in their capacity for rigorous and robust proof construction within formal verification frameworks. To mitigate issues related to faithfulness and robustness, this paper investigates strategies to (1) alleviate semantic loss during autoformalisation, (2) efficiently identify and correct syntactic errors in logical representations, (3) explicitly use logical expressions to guide LLMs in generating structured proof sketches, and (4) increase LLMs’ capacity of interpreting TP’s feedback for iterative refinement. Our empirical results on e-SNLI, QASC and WorldTree using different LLMs demonstrate that the proposed strategies yield significant improvements in autoformalisation (+18.46%, +34.2%, +39.77%) and explanation refinement (+29.5%, +51.5%, +41.25%) over the state-of-the-art model. Moreover, we show that specific interventions on the hybrid LLM-TP architecture can substantially improve efficiency, drastically reducing the number of iterations required for successful verification.</abstract>
       <url hash="2489a4b6">2025.acl-long.867</url>
@@ -12714,7 +12714,7 @@
     </paper>
     <paper id="870">
       <title>From Human Reading to <fixed-case>NLM</fixed-case> Understanding: Evaluating the Role of Eye-Tracking Data in Encoder-Based Models</title>
-      <author><first>Luca</first><last>Dini</last><affiliation>University of Pisa</affiliation></author>
+      <author id="luca-dini"><first>Luca</first><last>Dini</last><affiliation>University of Pisa</affiliation></author>
       <author><first>Lucia</first><last>Domenichelli</last><affiliation>Consiglio Nazionale delle Ricerche</affiliation></author>
       <author><first>Dominique</first><last>Brunato</last><affiliation>istituto di linguistica computazionale “A. Zampolli”, ILC-CNR, Pisa</affiliation></author>
       <author><first>Felice</first><last>Dell’Orletta</last><affiliation>Istituto di Linguistica Computazionale “A. Zampolli” (ILC)</affiliation></author>
@@ -12778,7 +12778,7 @@
       <author><first>Jinyang</first><last>Gao</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Bolin</first><last>Ding</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Huawei</first><last>Shen</last><affiliation>Institute of Computing Technology, Chinese Academy of Sciences</affiliation></author>
-      <author><first>Xueqi</first><last>Cheng</last><affiliation>Institute of Computing Technology, Chinese Academy</affiliation></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last><affiliation>Institute of Computing Technology, Chinese Academy</affiliation></author>
       <pages>17876-17891</pages>
       <abstract>Tool learning has emerged as a crucial capability for large language models (LLMs) to solve complex real-world tasks through interaction with external tools. Existing approaches face significant challenges, including reliance on hand-crafted prompts, difficulty in multi-step planning, and lack of precise error diagnosis and reflection mechanisms. We propose <tex-math>\texttt{ToolCoder}</tex-math>, a novel framework that reformulates tool learning as a code generation task. Inspired by software engineering principles, <tex-math>\texttt{ToolCoder}</tex-math> transforms natural language queries into structured Python function scaffold and systematically breaks down tasks with descriptive comments, enabling LLMs to leverage coding paradigms for complex reasoning and planning. It then generates and executes function implementations to obtain final responses. Additionally, <tex-math>\texttt{ToolCoder}</tex-math> stores successfully executed functions in a repository to promote code reuse, while leveraging error traceback mechanisms for systematic debugging, optimizing both execution efficiency and robustness. Experiments demonstrate that <tex-math>\texttt{ToolCoder}</tex-math> achieves superior performance in task completion accuracy and execution reliability compared to existing approaches, establishing the effectiveness of code-centric approaches in tool learning.</abstract>
       <url hash="cd8366fb">2025.acl-long.874</url>
@@ -12829,7 +12829,7 @@
       <author><first>Fandong</first><last>Meng</last><affiliation>WeChat AI, Tencent Inc.</affiliation></author>
       <author><first>Songming</first><last>Zhang</last><affiliation>Beijing Jiaotong University</affiliation></author>
       <author><first>Yufeng</first><last>Chen</last></author>
-      <author><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
       <author><first>Jie</first><last>Zhou</last></author>
       <pages>17948-17963</pages>
       <abstract>Continually expanding new languages for existing large language models (LLMs) is a promising yet challenging approach to building powerful multilingual LLMs.The biggest challenge is to make the model continuously learn new languages while preserving the proficient ability of old languages.To achieve this, recent work utilizes the Mixture-of-Experts (MoE) architecture to expand new languages by adding new experts and avoid catastrophic forgetting of old languages by routing corresponding tokens to the original model backbone (old experts).Although intuitive, this kind of method is parameter-costly when expanding new languages and still inevitably impacts the performance of old languages.To address these limitations, we analyze the language characteristics of different layers in LLMs and propose a layer-wise expert allocation algorithm (LayerMoE) to determine the appropriate number of new experts for each layer.Specifically, we find different layers in LLMs exhibit different representation similarities between languages and then utilize the similarity as the indicator to allocate experts for each layer, i.e., the higher similarity, the fewer experts.Additionally, to further mitigate the forgetting of old languages, we add a classifier in front of the router network on the layers with higher similarity to guide the routing of old language tokens.Experimental results show that our method outperforms the previous state-of-the-art baseline with 60% fewer experts in the single-expansion setting and with 33.3% fewer experts in the lifelong-expansion setting, demonstrating the effectiveness of our method.</abstract>
@@ -13119,7 +13119,7 @@
     <paper id="899">
       <title><fixed-case>DNCASR</fixed-case>: End-to-End Training for Speaker-Attributed <fixed-case>ASR</fixed-case></title>
       <author><first>Xianrui</first><last>Zheng</last><affiliation>University of Cambridge</affiliation></author>
-      <author><first>Chao</first><last>Zhang</last><affiliation>Shanghai Artificial Intelligence Laboratory, Tsinghua University and University College London</affiliation></author>
+      <author id="chao-zhang-tu"><first>Chao</first><last>Zhang</last><affiliation>Shanghai Artificial Intelligence Laboratory, Tsinghua University and University College London</affiliation></author>
       <author><first>Phil</first><last>Woodland</last><affiliation>University of Cambridge</affiliation></author>
       <pages>18369-18383</pages>
       <abstract>This paper introduces DNCASR, a novel end-to-end trainable system designed for joint neural speaker clustering and automatic speech recognition (ASR), enabling speaker-attributed transcription of long multi-party meetings. DNCASR uses two separate encoders to independently encode global speaker characteristics and local waveform information, along with two linked decoders to generate speaker-attributed transcriptions. The use of linked decoders allows the entire system to be jointly trained under a unified loss function. By employing a serialised training approach, DNCASR effectively addresses overlapping speech in real-world meetings, where the link improves the prediction of speaker indices in overlapping segments. Experiments on the AMI-MDM meeting corpus demonstrate that the jointly trained DNCASR outperforms a parallel system that does not have links between the speaker and ASR decoders. Using cpWER to measure the speaker-attributed word error rate, DNCASR achieves a 9.0% relative reduction on the AMI-MDM Eval set.</abstract>
@@ -13147,7 +13147,7 @@
       <author><first>Yubo</first><last>Ma</last><affiliation>School of Computer Science and Engineering, Nanyang Technological University</affiliation></author>
       <author><first>Mingzhe</first><last>Du</last><affiliation>Nanyang Technological University and National University of Singapore</affiliation></author>
       <author><first>Rui</first><last>Mao</last></author>
-      <author><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>William Yang</first><last>Wang</last><affiliation>UC Santa Barbara</affiliation></author>
       <pages>18403-18419</pages>
       <abstract>Data contamination hinders fair LLM evaluation by introducing test data into newer models’ training sets. Existing studies solve this challenge by updating benchmarks with newly collected data. However, they fail to guarantee contamination-free evaluation as the newly collected data may contain pre-existing knowledge, and their benchmark updates rely on intensive human labor. To address these issues, we in this paper propose AntiLeak-Bench, an automated anti-leakage benchmarking framework. Instead of simply using newly collected data, we construct samples with explicitly new knowledge absent from LLMs’ training sets, which thus ensures strictly contamination-free evaluation. We further design a fully automated workflow to build and update our benchmark without human labor. This significantly reduces the cost of benchmark maintenance to accommodate emerging LLMs. Through extensive experiments, we highlight that data contamination likely exists before LLMs’ cutoff time and demonstrate that AntiLeak-Bench effectively overcomes this challenge.</abstract>
@@ -13265,7 +13265,7 @@
       <author><first>Huijie</first><last>Lv</last></author>
       <author><first>Tao</first><last>Gui</last><affiliation>Fudan University</affiliation></author>
       <author><first>Qi</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>18530-18549</pages>
       <abstract>Data diversity is crucial for the instruction tuning of large language models. Existing studies have explored various diversity-aware data selection methods to construct high-quality datasets and enhance model performance. However, the fundamental problem of precisely defining and measuring data diversity remains underexplored, limiting clear guidance for data engineering. To address this, we systematically analyze 11 existing diversity measurement methods by evaluating their correlation with model performance through extensive fine-tuning experiments. Our results indicate that a reliable diversity measure should properly account for both inter-sample differences and the information density in the sample space. Building on this, we propose NovelSum, a new diversity metric based on sample-level “novelty.” Experiments on both simulated and real-world data show that NovelSum accurately captures diversity variations and achieves a 0.97 correlation with instruction-tuned model performance, highlighting its value in guiding data engineering practices. With NovelSum as an optimization objective, we further develop a greedy, diversity-oriented data selection strategy that outperforms existing approaches, validating both the effectiveness and practical significance of our metric.</abstract>
       <url hash="f8958a6a">2025.acl-long.908</url>
@@ -13353,8 +13353,8 @@
     <paper id="915">
       <title>Probing <fixed-case>LLM</fixed-case>s for Multilingual Discourse Generalization Through a Unified Label Set</title>
       <author><first>Florian</first><last>Eichin</last></author>
-      <author><first>Yang Janet</first><last>Liu</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
-      <author><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
+      <author id="yang-janet-liu"><first>Yang Janet</first><last>Liu</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <author><first>Michael A.</first><last>Hedderich</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <pages>18665-18684</pages>
       <abstract>Discourse understanding is essential for many NLP tasks, yet most existing work remains constrained by framework-dependent discourse representations. This work investigates whether large language models (LLMs) capture discourse knowledge that generalizes across languages and frameworks. We address this question along two dimensions: (1) developing a unified discourse relation label set to facilitate cross-lingual and cross-framework discourse analysis, and (2) probing LLMs to assess whether they encode generalizable discourse abstractions. Using multilingual discourse relation classification as a testbed, we examine a comprehensive set of 23 LLMs of varying sizes and multilingual capabilities. Our results show that LLMs, especially those with multilingual training corpora, can generalize discourse information across languages and frameworks. Further layer-wise analyses reveal that language generalization at the discourse level is most salient in the intermediate layers. Lastly, our error analysis provides an account of challenging relation classes.</abstract>
@@ -13380,7 +13380,7 @@
       <author><first>Vicky</first><last>Feliren</last></author>
       <author><first>Bahrul Ilmi</first><last>Nasution</last></author>
       <author><first>Manuel Antonio</first><last>Rufino</last><affiliation>Samsung</affiliation></author>
-      <author><first>Genta Indra</first><last>Winata</last><affiliation>Capital One</affiliation></author>
+      <author id="genta-indra-winata"><first>Genta Indra</first><last>Winata</last><affiliation>Capital One</affiliation></author>
       <author><first>Rian Adam</first><last>Rajagede</last><affiliation>University of Central Florida and Universitas Islam Indonesia</affiliation></author>
       <author><first>Carlos Rafael</first><last>Catalan</last><affiliation>Samsung Research</affiliation></author>
       <author><first>Mohamed Fazli Mohamed</first><last>Imam</last></author>
@@ -13454,7 +13454,7 @@
       <author><first>Robert</first><last>Wijaya</last><affiliation>Singapore University of Technology and Design</affiliation></author>
       <author><first>Onno P.</first><last>Kampman</last></author>
       <author><first>Ruochen</first><last>Zhang</last><affiliation>Brown University</affiliation></author>
-      <author><first>Börje F.</first><last>Karlsson</last><affiliation>Beijing Academy of Artificial Intelligence (BAAI)</affiliation></author>
+      <author id="borje-f-karlsson"><first>Börje F.</first><last>Karlsson</last><affiliation>Beijing Academy of Artificial Intelligence (BAAI)</affiliation></author>
       <author><first>Peerat</first><last>Limkonchotiwat</last><affiliation>AI Singapore</affiliation></author>
       <pages>18685-18717</pages>
       <abstract>Despite Southeast Asia’s (SEA) extraordinary linguistic and cultural diversity, the region remains significantly underrepresented in vision-language (VL) research, resulting in AI models that inadequately capture SEA cultural nuances. To fill this gap, we present SEA-VL, an open-source initiative dedicated to developing culturally relevant high-quality datasets for SEA languages. By involving contributors from SEA countries, SEA-VL ensures better cultural relevance and diversity, fostering greater inclusivity of underrepresented languages and cultural depictions in VL research. Our methodology employed three approaches: community-driven crowdsourcing with SEA contributors, automated image crawling, and synthetic image generation. We evaluated each method’s effectiveness in capturing cultural relevance. We found that image crawling achieves approximately ~85% cultural relevance while being more cost- and time-efficient than crowdsourcing, whereas synthetic image generation failed to accurately reflect SEA cultural nuances and contexts. Collectively, we gathered 1.28 million SEA culturally relevant images, more than 50 times larger than other existing datasets. This work bridges the representation gap in SEA, establishes a foundation for developing culturally aware AI systems for this region, and provides a replicable framework for addressing representation gaps in other underrepresented regions.</abstract>
@@ -13495,20 +13495,20 @@
       <author><first>Shivalika</first><last>Singh</last><affiliation>Cohere Labs</affiliation></author>
       <author><first>Angelika</first><last>Romanou</last></author>
       <author><first>Clémentine</first><last>Fourrier</last><affiliation>HuggingFace</affiliation></author>
-      <author><first>David Ifeoluwa</first><last>Adelani</last><affiliation>McGill University</affiliation></author>
+      <author id="david-ifeoluwa-adelani"><first>David Ifeoluwa</first><last>Adelani</last><affiliation>McGill University</affiliation></author>
       <author><first>Jian Gang</first><last>Ngui</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Daniel</first><last>Vila-Suero</last><affiliation>Hugging Face</affiliation></author>
       <author><first>Peerat</first><last>Limkonchotiwat</last><affiliation>AI Singapore</affiliation></author>
       <author><first>Kelly</first><last>Marchisio</last><affiliation>Cohere and Cohere</affiliation></author>
       <author><first>Wei Qi</first><last>Leong</last><affiliation>AI Singapore</affiliation></author>
       <author><first>Yosephine</first><last>Susanto</last><affiliation>National University of Singapore</affiliation></author>
-      <author><first>Raymond</first><last>Ng</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="raymond-ng"><first>Raymond</first><last>Ng</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Shayne</first><last>Longpre</last></author>
       <author><first>Sebastian</first><last>Ruder</last><affiliation>Facebook</affiliation></author>
       <author><first>Wei-Yin</first><last>Ko</last></author>
       <author><first>Antoine</first><last>Bosselut</last><affiliation>Swiss Federal Institute of Technology Lausanne</affiliation></author>
-      <author><first>Alice</first><last>Oh</last><affiliation>Google and Korea Advanced Institute of Science and Technology</affiliation></author>
-      <author><first>Andre</first><last>Martins</last><affiliation>Instituto Superior Técnico and Unbabel</affiliation></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last><affiliation>Google and Korea Advanced Institute of Science and Technology</affiliation></author>
+      <author id="andre-f-t-martins"><first>Andre</first><last>Martins</last><affiliation>Instituto Superior Técnico and Unbabel</affiliation></author>
       <author><first>Leshem</first><last>Choshen</last><affiliation>Massachusetts Institute of Technology and International Business Machines</affiliation></author>
       <author><first>Daphne</first><last>Ippolito</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Enzo</first><last>Ferrante</last><affiliation>CONICET / Universidad de Buenos Aires</affiliation></author>
@@ -13525,7 +13525,7 @@
       <title>Improving Dialogue Discourse Parsing through Discourse-aware Utterance Clarification</title>
       <author><first>Yaxin</first><last>Fan</last></author>
       <author><first>Peifeng</first><last>Li</last><affiliation>Soochow University, China</affiliation></author>
-      <author><first>Qiaoming</first><last>Zhu</last><affiliation>Soochow University</affiliation></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last><affiliation>Soochow University</affiliation></author>
       <pages>18800-18816</pages>
       <abstract>Dialogue discourse parsing aims to identify and analyze discourse relations between the utterances within dialogues. However, linguistic features in dialogues, such as omission and idiom, frequently introduce ambiguities that obscure the intended discourse relations, posing significant challenges for parsers. To address this issue, we propose a Discourse-aware Clarification Module (DCM) to enhance the performance of the dialogue discourse parser. DCM employs two distinct reasoning processes: clarification type reasoning and discourse goal reasoning. The former analyzes linguistic features, while the latter distinguishes the intended relation from the ambiguous one. Furthermore, we introduce Contribution-aware Preference Optimization (CPO) to mitigate the risk of erroneous clarifications, thereby reducing cascading errors. CPO enables the parser to assess the contributions of the clarifications from DCM and provide feedback to optimize the DCM, enhancing its adaptability and alignment with the parser’s requirements. Extensive experiments on the STAC and Molweni datasets demonstrate that our approach effectively resolves ambiguities and significantly outperforms the state-of-the-art (SOTA) baselines.</abstract>
       <url hash="3216d0b0">2025.acl-long.920</url>
@@ -13549,7 +13549,7 @@
     </paper>
     <paper id="922">
       <title>Words of Warmth: Trust and Sociability Norms for over 26k <fixed-case>E</fixed-case>nglish Words</title>
-      <author><first>Saif M.</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif M.</first><last>Mohammad</last></author>
       <pages>18830-18850</pages>
       <abstract>Social psychologists have shown that Warmth (W) and Competence (C) are the primary dimensions along which we assess other people and groups. These dimensions impact various aspects of our lives from social competence and emotion regulation to success in the work place and how we view the world. More recent work has started to explore how these dimensions develop, why they have developed, and what they constitute. Of particular note, is the finding that warmth has two distinct components: Trust (T) and Sociability (S). In this work, we introduce Words of Warmth, the first large-scale repository of manually derived word–warmth (as well as word–trust and word–sociability) associations for over 26k English words. We show that the associations are highly reliable. We use the lexicons to study the rate at which children acquire WCTS words with age. Finally, we show that the lexicon enables a wide variety of bias and stereotype research through case studies on various target entities. Words of Warmth is freely available at: http://saifmohammad.com/warmth.html</abstract>
       <url hash="fb480092">2025.acl-long.922</url>
@@ -13574,7 +13574,7 @@
       <author><first>Siyuan</first><last>Wang</last><affiliation>University of Southern California</affiliation></author>
       <author><first>Shengbin</first><last>Yue</last></author>
       <author><first>Zengfeng</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Zhongyu</first><last>Wei</last><affiliation>Fudan University</affiliation></author>
       <pages>18874-18893</pages>
       <abstract>The reward model has become increasingly important in alignment, assessment, and data construction for large language models (LLMs). Most existing researchers focus on enhancing reward models through data improvements, following the conventional training framework for reward models that directly optimizes the predicted rewards.In this paper, we propose a hybrid alignment framework **HAF-RM** for reward model training by introducing an additional constraint on token-level policy probabilities in addition to the reward score. It can simultaneously supervise the internal preference model at the token level and optimize the mapping layer of the reward model at the sequence level.Experiment results on five datasets sufficiently show the validity and effectiveness of our proposed hybrid framework for training a high-quality reward model.By decoupling the reward modeling procedure and incorporating hybrid supervision, our **HAF-RM** framework offers a principled and effective approach to enhancing the performance and alignment of reward models, a critical component in the responsible development of powerful language models. We release our code at [https://haf-rm.github.io](https://haf-rm.github.io).</abstract>
@@ -13764,7 +13764,7 @@
       <author><first>Xinyue</first><last>Shen</last></author>
       <author><first>Yun</first><last>Shen</last><affiliation>Flexera</affiliation></author>
       <author><first>Michael</first><last>Backes</last><affiliation>CISPA Helmholtz Center for Information Security</affiliation></author>
-      <author id="yang-zhang"><first>Yang</first><last>Zhang</last><affiliation>CISPA Helmholtz Center for Information Security</affiliation></author>
+      <author><first>Yang</first><last>Zhang</last><affiliation>CISPA Helmholtz Center for Information Security</affiliation></author>
       <pages>19096-19111</pages>
       <abstract>Knowledge files have been widely used in large language model (LLM)-powered agents, such as GPTs, to improve response quality. However, concerns over the potential leakage of knowledge files have grown significantly. Existing studies demonstrate that adversarial prompts can induce GPTs to leak knowledge file content. Yet, it remains uncertain whether additional leakage vectors exist, particularly given the complex data flows across clients, servers, and databases in GPTs. In this paper, we present a comprehensive risk assessment of knowledge file leakage, leveraging a novel workflow inspired by Data Security Posture Management (DSPM). Through the analysis of 651,022 GPT metadata, 11,820 flows, and 1,466 responses, we identify five leakage vectors: metadata, GPT initialization, retrieval, sandboxed execution environments, and prompts. These vectors enable adversaries to extract sensitive knowledge file data such as titles, content, types, and sizes. Notably, the activation of the built-in tool Code Interpreter leads to a privilege escalation vulnerability, enabling adversaries to directly download original knowledge files with a 95.95% success rate. Further analysis reveals that 28.80% of leaked files are copyrighted, including digital copies from major publishers and internal materials from a listed company. In the end, we provide actionable solutions for GPT builders and platform providers to secure the GPT data supply chain.</abstract>
       <url hash="f6cca02d">2025.acl-long.936</url>
@@ -13777,8 +13777,8 @@
       <author><first>Qian</first><last>Chen</last></author>
       <author><first>Shengpeng</first><last>Ji</last></author>
       <author><first>Yu</first><last>Xi</last></author>
-      <author><first>Wen</first><last>Wang</last></author>
-      <author id="chong-zhang"><first>Chong</first><last>Zhang</last><affiliation>Alibaba</affiliation></author>
+      <author id="wen-wang"><first>Wen</first><last>Wang</last></author>
+      <author><first>Chong</first><last>Zhang</last><affiliation>Alibaba</affiliation></author>
       <author><first>Xianghu</first><last>Yue</last><affiliation>Tianjin University</affiliation></author>
       <author><first>ShiLiang</first><last>Zhang</last></author>
       <author><first>Haizhou</first><last>Li</last><affiliation>The Chinese University of Hong Kong (Shenzhen); National University of Singapore and National University of Singapore</affiliation></author>
@@ -13935,7 +13935,7 @@
     </paper>
     <paper id="948">
       <title>Cramming 1568 Tokens into a Single Vector and Back Again: Exploring the Limits of Embedding Space Capacity</title>
-      <author><first>Yuri</first><last>Kuratov</last><affiliation>AIRI, Artificial Intelligence Research Institute and Moscow Institute of Physics and Technology</affiliation></author>
+      <author id="yurii-kuratov"><first>Yuri</first><last>Kuratov</last><affiliation>AIRI, Artificial Intelligence Research Institute and Moscow Institute of Physics and Technology</affiliation></author>
       <author><first>Mikhail</first><last>Arkhipov</last></author>
       <author><first>Aydar</first><last>Bulatov</last></author>
       <author><first>Mikhail</first><last>Burtsev</last><affiliation>London Institute for Mathematical Sciences</affiliation></author>
@@ -14076,7 +14076,7 @@
       <author><first>Shenghua</first><last>He</last><affiliation>PAII INC</affiliation></author>
       <author><first>Tian</first><last>Xia</last></author>
       <author><first>Shijia</first><last>Pan</last><affiliation>University of California, Merced</affiliation></author>
-      <author id="fei-liu"><first>Fei</first><last>Liu</last><affiliation>Emory University</affiliation></author>
+      <author><first>Fei</first><last>Liu</last><affiliation>Emory University</affiliation></author>
       <pages>19497-19521</pages>
       <abstract>LLMs have immense potential for generating plans, transforming an initial world state into a desired goal state. A large body of research has explored the use of LLMs for various planning tasks, from web navigation to travel planning and database querying. However, many of these systems are tailored to specific problems, making it challenging to compare them or determine the best approach for new tasks. There is also a lack of clear and consistent evaluation criteria. Our survey aims to offer a comprehensive overview of current LLM planners to fill this gap. It builds on foundational work by Kartam and Wilkins (1990) and examines six key performance criteria: completeness, executability, optimality, representation, generalization, and efficiency. For each, we provide a thorough analysis of representative works and highlight their strengths and weaknesses. Our paper also identifies crucial future directions, making it a valuable resource for both practitioners and newcomers interested in leveraging LLM planning to support agentic workflows.</abstract>
       <url hash="5dce9268">2025.acl-long.958</url>
@@ -14114,7 +14114,7 @@
       <author><first>Jian</first><last>Zhu</last><affiliation>University of British Columbia</affiliation></author>
       <author><first>Farhan</first><last>Samir</last><affiliation>University of British Columbia</affiliation></author>
       <author><first>Eleanor</first><last>Chodroff</last><affiliation>University of Zurich</affiliation></author>
-      <author><first>David R.</first><last>Mortensen</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="david-r-mortensen"><first>David R.</first><last>Mortensen</last><affiliation>Carnegie Mellon University</affiliation></author>
       <pages>19568-19585</pages>
       <abstract>We present ZIPA, a family of efficient speech models that advances the state-of-the-art performance of crosslinguistic phone recognition. We first curated IPA PACK++, a large-scale multilingual speech corpus with 17,000+ hours of normalized phone transcriptions and a novel evaluation set capturing unseen languages and sociophonetic variation. ZIPA, including transducer (ZIPA-T) and CTC-based (ZIPA-CR) variants, leverages the efficient Zipformer backbones and outperforms existing phone recognition systems with much fewer parameters. Further scaling via noisy student training on 11,000+ hours of pseudo-labeled multilingual data yields further improvement. While ZIPA achieves strong performance on benchmarks, error analysis reveals persistent limitations in modeling sociophonetic diversity, underscoring challenges for future research.</abstract>
       <url hash="ac61cdcd">2025.acl-long.961</url>
@@ -14184,7 +14184,7 @@
       <author><first>Manuel</first><last>Brack</last></author>
       <author><first>Jindřich</first><last>Libovický</last></author>
       <author><first>Kristian</first><last>Kersting</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>19656-19679</pages>
       <abstract>Text-to-image (T2I) generation models have achieved great results in image quality, flexibility, and text alignment, leading to widespread use. Through improvements in multilingual abilities, a larger community can access this technology. Yet, we show that multilingual models suffer from substantial gender bias. Furthermore, the expectation that results should be similar across languages does not hold. We introduce MAGBIG, a controlled benchmark designed to study gender bias in multilingual T2I models, and use it to assess the impact of multilingualism on gender bias. To this end, we construct a set of multilingual prompts that offers a carefully controlled setting accounting for the complex grammatical differences influencing gender across languages. Our results show strong gender biases and notable language-specific differences across models. While we explore prompt engineering strategies to mitigate these biases, we find them largely ineffective and sometimes even detrimental to text-to-image alignment. Our analysis highlights the need for research on diverse language representations and greater control over bias in T2I models.</abstract>
       <url hash="851a87f3">2025.acl-long.966</url>
@@ -14224,7 +14224,7 @@
     <paper id="969">
       <title><fixed-case>CU</fixed-case>-<fixed-case>MAM</fixed-case>: Coherence-Driven Unified Macro-Structures for Argument Mining</title>
       <author><first>Debela</first><last>Gemechu</last></author>
-      <author><first>Chris</first><last>Reed</last><affiliation>University of Dundee</affiliation></author>
+      <author id="chris-reed"><first>Chris</first><last>Reed</last><affiliation>University of Dundee</affiliation></author>
       <pages>19731-19749</pages>
       <abstract>Argument Mining (AM) involves the automatic identification of argument structure in natural language. Traditional AM methods rely on micro-structural features derived from the internal properties of individual Argumentative Discourse Units (ADUs). However, argument structure is shaped by a macro-structure capturing the functional interdependence among ADUs. This macro-structure consists of segments, where each segment contains ADUs that fulfill specific roles to maintain coherence within the segment (**local coherence**) and across segments (**global coherence**). This paper presents an approach that models macro-structure, capturing both local and global coherence to identify argument structures. Experiments on heterogeneous datasets demonstrate superior performance in both in-dataset and cross-dataset evaluations. The cross-dataset evaluation shows that macro-structure enhances transferability to unseen datasets.</abstract>
       <url hash="b2a7224b">2025.acl-long.969</url>
@@ -14266,7 +14266,7 @@
       <author><first>Tong</first><last>Zhang</last></author>
       <author><first>Bojie</first><last>Hu</last><affiliation>Beijing Jiaotong University</affiliation></author>
       <author><first>Yufeng</first><last>Chen</last></author>
-      <author><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
       <pages>19791-19807</pages>
       <abstract>In modern large language models (LLMs), LLM alignment is of crucial importance and is typically achieved through methods such as reinforcement learning from human feedback (RLHF) and direct preference optimization (DPO). However, in most existing methods for LLM alignment, all tokens in the response are optimized using a sparse, response-level reward or preference annotation. The ignorance of token-level rewards may erroneously punish high-quality tokens or encourage low-quality tokens, resulting in suboptimal performance and slow convergence speed. To address this issue, we propose <i>
           <b>AlignDistil</b></i>, a RLHF-equivalent distillation method for token-level reward optimization. Specifically, we introduce the reward learned by DPO into the RLHF objective and theoretically prove the equivalence between this objective and a token-level distillation process, where the teacher distribution linearly combines the logits from the DPO model and a reference model. On this basis, we further bridge the accuracy gap between the reward from the DPO model and the pure reward model, by building a contrastive DPO reward with a normal and a reverse DPO model. Moreover, to avoid under- and over-optimization on different tokens, we design a token adaptive logit extrapolation mechanism to construct an appropriate teacher distribution for each token. Experimental results demonstrate the superiority of our AlignDistil over existing methods and showcase fast convergence due to its token-level distributional reward optimization.</abstract>
@@ -14332,7 +14332,7 @@
       <author><first>Qinglin</first><last>Zhang</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Qian</first><last>Chen</last></author>
       <author><first>Xihao</first><last>Li</last><affiliation>University of North Carolina at Chapel Hill</affiliation></author>
-      <author><first>Wen</first><last>Wang</last></author>
+      <author id="wen-wang"><first>Wen</first><last>Wang</last></author>
       <pages>19914-19928</pages>
       <abstract>Speaker diarization aims to segment an audio stream into homogeneous partitions based on speaker identity, playing a crucial role in speech comprehension and analysis. Mainstream speaker diarization systems rely only on acoustic information, making the task particularly challenging in complex acoustic environments in real-world applications. Recently, significant efforts have been devoted to audio-visual or audio-semantic multimodal modeling to enhance speaker diarization performance; however, these approaches still struggle to address the complexities of speaker diarization on spontaneous and unstructured multi-party conversations. To fully exploit meaningful dialogue patterns, we propose a novel multimodal approach that jointly utilizes audio, visual, and semantic cues to enhance speaker diarization. Our approach structures visual cues among active speakers and semantic cues in spoken content into a cohesive format known as pairwise constraints, and employs a semi-supervised clustering technique based on pairwise constrained propagation. Extensive experiments conducted on multiple multimodal datasets demonstrate that our approach effectively integrates audio-visual-semantic information into the clustering process for acoustic speaker embeddings and consistently outperforms state-of-the-art speaker diarization methods, while largely preserving the overall system framework.</abstract>
       <url hash="3b1ec0a1">2025.acl-long.977</url>
@@ -14441,7 +14441,7 @@
     <paper id="984">
       <title>Modeling the Evolution of <fixed-case>E</fixed-case>nglish Noun Compounds with Feature-Rich Diachronic Compositionality Prediction</title>
       <author><first>Filip</first><last>Miletić</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>20071-20092</pages>
       <abstract>We analyze the evolution of English noun compounds, which we represent as vectors of time-specific values. We implement a wide array of methods to create a rich set of features, using them to classify compounds for present-day compositionality and to assess the informativeness of the corresponding linguistic patterns. Our best results use BERT – reflecting the similarity of compounds and sentence contexts – and we further capture relevant and complementary information across approaches. Leveraging these feature differences, we find that the development of low-compositional meanings is reflected by a parallel drop in compositionality and sustained semantic change. The same distinction is echoed in transformer processing: compositionality estimates require far less contextualization than semantic change estimates.</abstract>
       <url hash="eb063f1d">2025.acl-long.984</url>
@@ -14455,7 +14455,7 @@
       <author><first>Raoyuan</first><last>Zhao</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <author><first>Florian</first><last>Eichin</last></author>
       <author><first>Jonas</first><last>Fischer</last><affiliation>Saarland Informatics Campus, Max-Planck Institute</affiliation></author>
-      <author><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <pages>20093-20123</pages>
       <abstract>Prompt engineering for large language models is challenging, as even small prompt perturbations or model changes can significantly impact the generated output texts. Existing evaluation methods of LLM outputs, either automated metrics or human evaluation, have limitations, such as providing limited insights or being labor-intensive. We propose Spotlight, a new approach that combines both automation and human analysis. Based on data mining techniques, we automatically distinguish between random (decoding) variations and systematic differences in language model outputs. This process provides token patterns that describe the systematic differences and guide the user in manually analyzing the effects of their prompts and changes in models efficiently. We create three benchmarks to quantitatively test the reliability of token pattern extraction methods and demonstrate that our approach provides new insights into established prompt data. From a human-centric perspective, through demonstration studies and a user study, we show that our token pattern approach helps users understand the systematic differences of language model outputs. We are further able to discover relevant differences caused by prompt and model changes (e.g. related to gender or culture), thus supporting the prompt engineering process and human-centric model behavior research.</abstract>
       <url hash="13a88d16">2025.acl-long.985</url>
@@ -14499,7 +14499,7 @@
       <title>Improving Language and Modality Transfer in Translation by Character-level Modeling</title>
       <author><first>Ioannis</first><last>Tsiamas</last><affiliation>Facebook and Universidad Politécnica de Cataluna</affiliation></author>
       <author><first>David</first><last>Dale</last><affiliation>FAIR at Meta</affiliation></author>
-      <author><first>Marta R.</first><last>Costa-jussà</last><affiliation>Meta</affiliation></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last><affiliation>Meta</affiliation></author>
       <pages>20171-20187</pages>
       <abstract>Current translation systems, despite being highly multilingual, cover only 5% of the world’s languages. Expanding language coverage to the long-tail of low-resource languages requires data-efficient methods that rely on cross-lingual and cross-modal knowledge transfer. To this end, we propose a character-based approach to improve adaptability to new languages and modalities. Our method leverages SONAR, a multilingual fixed-size embedding space with different modules for encoding and decoding. We use a teacher-student approach with parallel translation data to obtain a character-level encoder. Then, using ASR data, we train a lightweight adapter to connect a massively multilingual CTC ASR model (MMS), to the character-level encoder, potentially enabling speech translation from 1,000+ languages. Experimental results in text translation for 75 languages on FLORES+ demonstrate that our character-based approach can achieve better language transfer than traditional subword-based models, especially outperforming them in low-resource settings, and demonstrating better zero-shot generalizability to unseen languages. Our speech adaptation, maximizing knowledge transfer from the text modality, achieves state-of-the-art results in speech-to-text translation on the FLEURS benchmark on 33 languages, surpassing previous supervised and cascade models, albeit being a zero-shot model with minimal supervision from ASR data.</abstract>
       <url hash="17b5907a">2025.acl-long.988</url>
@@ -14511,7 +14511,7 @@
       <author><first>Niyati</first><last>Bafna</last><affiliation>Johns Hopkins University</affiliation></author>
       <author><first>Emily</first><last>Chang</last></author>
       <author><first>Nathaniel Romney</first><last>Robinson</last><affiliation>Department of Computer Science, Whiting School of Engineering</affiliation></author>
-      <author><first>David R.</first><last>Mortensen</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="david-r-mortensen"><first>David R.</first><last>Mortensen</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Kenton</first><last>Murray</last><affiliation>Johns Hopkins University</affiliation></author>
       <author><first>David</first><last>Yarowsky</last><affiliation>Johns Hopkins University</affiliation></author>
       <author><first>Hale</first><last>Sirin</last><affiliation>Johns Hopkins University</affiliation></author>
@@ -14527,7 +14527,7 @@
       <author><first>Julian</first><last>Katz-Samuels</last><affiliation>Amazon</affiliation></author>
       <author><first>Adithya M</first><last>Devraj</last></author>
       <author><first>Hyokun</first><last>Yun</last><affiliation>Amazon</affiliation></author>
-      <author><first>Chao</first><last>Zhang</last><affiliation>Georgia Institute of Technology</affiliation></author>
+      <author id="chao-zhang-tu"><first>Chao</first><last>Zhang</last><affiliation>Georgia Institute of Technology</affiliation></author>
       <author><first>Yi</first><last>Xu</last><affiliation>Amazon</affiliation></author>
       <author><first>Yi</first><last>Pan</last><affiliation>Amazon</affiliation></author>
       <author><first>Bing</first><last>Yin</last><affiliation>Amazon</affiliation></author>
@@ -14650,7 +14650,7 @@
       <author><first>Jiarui</first><last>Liu</last></author>
       <author><first>Andy</first><last>Liu</last></author>
       <author><first>Xuhui</first><last>Zhou</last></author>
-      <author><first>Mona T.</first><last>Diab</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="mona-diab"><first>Mona T.</first><last>Diab</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Maarten</first><last>Sap</last><affiliation>Carnegie Mellon University</affiliation></author>
       <pages>20434-20471</pages>
       <abstract>In this work, we tackle the challenge of embedding realistic human personality traits into LLMs. Previous approaches have primarily focused on prompt-based methods that describe the behavior associated with the desired personality traits, suffering from realism and validity issues. To address these limitations, we introduce BIG5-CHAT, a large-scale dataset containing 100,000 dialogues designed to ground models in how humans express their personality in text. Leveraging this dataset, we explore Supervised Fine-Tuning and Direct Preference Optimization as training-based methods to align LLMs more naturally with human personality patterns. Our methods outperform prompting on personality assessments such as BFI and IPIP-NEO, with trait correlations more closely matching human data. Furthermore, our experiments reveal that models trained to exhibit higher conscientiousness, higher agreeableness, lower extraversion, and lower neuroticism display better performance on reasoning tasks, aligning with psychological findings on how these traits impact human cognitive performance. To our knowledge, this work is the first comprehensive study to demonstrate how training-based methods can shape LLM personalities through learning from real human behaviors.</abstract>
@@ -14850,7 +14850,7 @@
     <paper id="1015">
       <title><fixed-case>QQSUM</fixed-case>: A Novel Task and Model of Quantitative Query-Focused Summarization for Review-based Product Question Answering</title>
       <author><first>An Quang</first><last>Tang</last></author>
-      <author><first>Xiuzhen</first><last>Zhang</last><affiliation>Royal Melbourne Institute of Technology</affiliation></author>
+      <author id="xiuzhen-jenny-zhang"><first>Xiuzhen</first><last>Zhang</last><affiliation>Royal Melbourne Institute of Technology</affiliation></author>
       <author><first>Minh Ngoc</first><last>Dinh</last></author>
       <author><first>Zhuang</first><last>Li</last><affiliation>Royal Melbourne Institute of Technology</affiliation></author>
       <pages>20810-20831</pages>
@@ -14877,7 +14877,7 @@
       <author><first>Vidya</first><last>Srinivas</last></author>
       <author><first>Xuhai</first><last>Xu</last><affiliation>Columbia University</affiliation></author>
       <author><first>Xin</first><last>Liu</last><affiliation>Google</affiliation></author>
-      <author><first>Kumar</first><last>Ayush</last><affiliation>Google</affiliation></author>
+      <author id="ayush-kumar"><first>Kumar</first><last>Ayush</last><affiliation>Google</affiliation></author>
       <author><first>Isaac</first><last>Galatzer-Levy</last></author>
       <author><first>Shwetak</first><last>Patel</last><affiliation>Google and University of Washington</affiliation></author>
       <author><first>Daniel</first><last>McDuff</last><affiliation>Google</affiliation></author>
@@ -14938,7 +14938,7 @@
       <title>Consistent Client Simulation for Motivational Interviewing-based Counseling</title>
       <author><first>Yizhe</first><last>Yang</last></author>
       <author><first>Palakorn</first><last>Achananuparp</last><affiliation>Singapore Management University</affiliation></author>
-      <author><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <author><first>Jing</first><last>Jiang</last><affiliation>Australian National University and Singapore Management University</affiliation></author>
       <author><first>Nicholas Gabriel</first><last>Lim</last><affiliation>Singapore University of Social Sciences</affiliation></author>
       <author><first>Cameron Tan Shi</first><last>Ern</last></author>
@@ -14987,7 +14987,7 @@
       <title><fixed-case>CAMI</fixed-case>: A Counselor Agent Supporting Motivational Interviewing through State Inference and Topic Exploration</title>
       <author><first>Yizhe</first><last>Yang</last></author>
       <author><first>Palakorn</first><last>Achananuparp</last><affiliation>Singapore Management University</affiliation></author>
-      <author><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <author><first>Jing</first><last>Jiang</last><affiliation>Australian National University and Singapore Management University</affiliation></author>
       <author><first>Phey Ling</first><last>Kit</last></author>
       <author><first>Nicholas Gabriel</first><last>Lim</last><affiliation>Singapore University of Social Sciences</affiliation></author>
@@ -15017,7 +15017,7 @@
       <title>Targeted Syntactic Evaluation for Grammatical Error Correction</title>
       <author><first>Aomi</first><last>Koyama</last><affiliation>Tokyo Metropolitan University and KDDI Corporation</affiliation></author>
       <author><first>Masato</first><last>Mita</last><affiliation>The University of Tokyo</affiliation></author>
-      <author><first>Su-Youn</first><last>Yoon</last><affiliation>EduLab</affiliation></author>
+      <author id="su-youn-yoon"><first>Su-Youn</first><last>Yoon</last><affiliation>EduLab</affiliation></author>
       <author><first>Yasufumi</first><last>Takama</last><affiliation>Tokyo Metropolitan University</affiliation></author>
       <author><first>Mamoru</first><last>Komachi</last><affiliation>Hitotsubashi University</affiliation></author>
       <pages>21108-21125</pages>
@@ -15142,7 +15142,7 @@
       <title><fixed-case>LLM</fixed-case> Meets Scene Graph: Can Large Language Models Understand and Generate Scene Graphs? A Benchmark and Empirical Study</title>
       <author><first>Dongil</first><last>Yang</last></author>
       <author><first>Minjin</first><last>Kim</last><affiliation>Yonsei University</affiliation></author>
-      <author><first>Sunghwan</first><last>Kim</last></author>
+      <author id="sunghwan-mac-kim"><first>Sunghwan</first><last>Kim</last></author>
       <author><first>Beong-woo</first><last>Kwak</last><affiliation>Yonsei University</affiliation></author>
       <author><first>Minjun</first><last>Park</last></author>
       <author><first>Jinseok</first><last>Hong</last></author>
@@ -15177,7 +15177,7 @@
       <author><first>Weipeng</first><last>Jiang</last></author>
       <author><first>Qian</first><last>Wang</last><affiliation>Wuhan University</affiliation></author>
       <author><first>Chao</first><last>Shen</last><affiliation>Xi’an Jiaotong University</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>Nanyang Technological University</affiliation></author>
+      <author><first>Yang</first><last>Liu</last><affiliation>Nanyang Technological University</affiliation></author>
       <pages>21376-21403</pages>
       <abstract>Large Language Models (LLMs) have emerged as the new recommendation engines, surpassing traditional methods in both capability and scope, particularly in code generation. In this paper, we reveal a novel **provider bias** in LLMs: without explicit directives, these models show systematic preferences for services from specific providers in their recommendations (e.g., favoring Google Cloud over Microsoft Azure). To systematically investigate this bias, we develop an automated pipeline to construct the dataset, incorporating 6 distinct coding task categories and 30 real-world application scenarios. Leveraging this dataset, we conduct the **first** comprehensive empirical study of provider bias in LLM code generation across seven state-of-the-art LLMs, utilizing approximately 500 million tokens (equivalent to $5,000+ in computational costs). Our findings reveal that LLMs exhibit significant provider preferences, predominantly favoring services from Google and Amazon, and can autonomously modify input code to incorporate their preferred providers without users’ requests. Such a bias holds far-reaching implications for market dynamics and societal equilibrium, potentially contributing to digital monopolies. It may also deceive users and violate their expectations, leading to various consequences. We call on the academic community to recognize this emerging issue and develop effective evaluation and mitigation methods to uphold AI security and fairness.</abstract>
       <url hash="8c0eada7">2025.acl-long.1038</url>
@@ -15211,7 +15211,7 @@
     </paper>
     <paper id="1041">
       <title>Neuron Empirical Gradient: Discovering and Quantifying Neurons’ Global Linear Controllability</title>
-      <author><first>Xin</first><last>Zhao</last></author>
+      <author id="wayne-xin-zhao"><first>Xin</first><last>Zhao</last></author>
       <author><first>Zehui</first><last>Jiang</last></author>
       <author><first>Naoki</first><last>Yoshinaga</last><affiliation>Institute of Industrial Science, the University of Tokyo</affiliation></author>
       <pages>21446-21477</pages>
@@ -15271,7 +15271,7 @@
       <author><first>Ziqing</first><last>Yang</last><affiliation>CISPA Helmholtz Center for Information Security</affiliation></author>
       <author><first>Xinyue</first><last>Shen</last></author>
       <author><first>Michael</first><last>Backes</last><affiliation>CISPA Helmholtz Center for Information Security</affiliation></author>
-      <author id="yang-zhang"><first>Yang</first><last>Zhang</last><affiliation>CISPA Helmholtz Center for Information Security</affiliation></author>
+      <author><first>Yang</first><last>Zhang</last><affiliation>CISPA Helmholtz Center for Information Security</affiliation></author>
       <pages>21538-21566</pages>
       <abstract>Jailbreak attacks aim to bypass the LLMs’ safeguards. While researchers have proposed different jailbreak attacks in depth, they have done so in isolation—either with unaligned settings or comparing a limited range of methods. To fill this gap, we present a large-scale evaluation of various jailbreak attacks. We collect 17 representative jailbreak attacks, summarize their features, and establish a novel jailbreak attack taxonomy. Then we conduct comprehensive measurement and ablation studies across nine aligned LLMs on 160 forbidden questions from 16 violation categories. Also, we test jailbreak attacks under eight advanced defenses. Based on our taxonomy and experiments, we identify some important patterns, such as heuristic-based attacks, which could achieve high attack success rates but are easy to mitigate by defenses. Our study offers valuable insights for future research on jailbreak attacks and defenses and serves as a benchmark tool for researchers and practitioners to evaluate them effectively.</abstract>
       <url hash="0794ec11">2025.acl-long.1045</url>
@@ -15310,7 +15310,7 @@
       <title>Enhancing Mathematical Reasoning in <fixed-case>LLM</fixed-case>s by Stepwise Correction</title>
       <author><first>Zhenyu</first><last>Wu</last><affiliation>Xi’an Jiaotong University</affiliation></author>
       <author><first>Qingkai</first><last>Zeng</last><affiliation>Amazon</affiliation></author>
-      <author id="zhihan-zhang"><first>Zhihan</first><last>Zhang</last></author>
+      <author><first>Zhihan</first><last>Zhang</last></author>
       <author><first>Zhaoxuan</first><last>Tan</last><affiliation>University of Notre Dame</affiliation></author>
       <author><first>Chao</first><last>Shen</last><affiliation>Xi’an Jiaotong University</affiliation></author>
       <author><first>Meng</first><last>Jiang</last><affiliation>University of Notre Dame</affiliation></author>
@@ -15335,7 +15335,7 @@
       <author><first>Didi</first><last>Zhang</last><affiliation>Soochow University</affiliation></author>
       <author><first>Yaxin</first><last>Fan</last></author>
       <author><first>Peifeng</first><last>Li</last><affiliation>Soochow University, China</affiliation></author>
-      <author><first>Qiaoming</first><last>Zhu</last><affiliation>Soochow University</affiliation></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last><affiliation>Soochow University</affiliation></author>
       <pages>21656-21672</pages>
       <abstract>Goal-oriented proactive dialogue systems are designed to guide user conversations seamlessly towards specific objectives by planning a goal-oriented path. However, previous research has focused predominantly on optimizing these paths while neglecting the inconsistencies that may arise between generated responses and dialogue contexts, including user profiles, dialogue history, domain knowledge, and subgoals. To address this issue, we introduce a model-agnostic two-stage Consistency Reflection and Correction (CRC) framework. Specifically, in the consistency reflection stage, the model is prompted to reflect on the discrepancies between generated responses and dialogue contexts, identifying inconsistencies and suggesting possible corrections. In the consistency correction stage, the model generates responses that are more consistent with the dialogue context based on these reflection results. We conducted experiments on various model architectures with different parameter sizes, including encoder-decoder models (BART, T5) and decoder-only models (GPT-2, DialoGPT, Phi3, Mistral and LLaMA3), and the experimental results on three datasets demonstrate that our CRC framework significantly improves the consistency between generated responses and dialogue contexts.</abstract>
       <url hash="0dc90fb8">2025.acl-long.1050</url>
@@ -15466,7 +15466,7 @@
     </paper>
     <paper id="1059">
       <title>Typology-Guided Adaptation in Multilingual Models</title>
-      <author><first>Ndapa</first><last>Nakashole</last><affiliation>University of California, San Diego</affiliation></author>
+      <author id="ndapandula-nakashole"><first>Ndapa</first><last>Nakashole</last><affiliation>University of California, San Diego</affiliation></author>
       <pages>21819-21835</pages>
       <abstract>Multilingual models often treat language diversity as a problem of data imbalance, overlooking structural variation. We introduce the *Morphological Index* (MoI), a typologically grounded metric that quantifies how strongly a language relies on surface morphology for noun classification. Building on MoI, we propose *MoI-MoE*, a Mixture of Experts model that routes inputs based on morphological structure. Evaluated on 10 Bantu languages—a large, morphologically rich and underrepresented family—MoI-MoE outperforms strong baselines, improving Swahili accuracy by 14 points on noun class recognition while maintaining performance on morphology-rich languages like Zulu. These findings highlight typological structure as a practical and interpretable signal for multilingual model adaptation.</abstract>
       <url hash="3c22170a">2025.acl-long.1059</url>
@@ -15506,7 +15506,7 @@
       <author><first>Zhishang</first><last>Xiang</last></author>
       <author><first>Yilin</first><last>Xiao</last></author>
       <author><first>Le</first><last>Wang</last></author>
-      <author><first>Junhui</first><last>Li</last><affiliation>Soochow University, China</affiliation></author>
+      <author id="junhui-li"><first>Junhui</first><last>Li</last><affiliation>Soochow University, China</affiliation></author>
       <author><first>Xinrun</first><last>Wang</last><affiliation>Singapore Management University</affiliation></author>
       <author><first>Jinsong</first><last>Su</last><affiliation>Xiamen University</affiliation></author>
       <pages>21863-21882</pages>
@@ -15521,7 +15521,7 @@
       <author><first>Huan</first><last>Zhao</last><affiliation>Hunan University</affiliation></author>
       <author><first>Zhixue</first><last>Zhao</last><affiliation>University of Sheffield, University of Sheffield</affiliation></author>
       <author><first>Xupeng</first><last>Zha</last><affiliation>Hunan University</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>Hunan University</affiliation></author>
+      <author><first>Yang</first><last>Liu</last><affiliation>Hunan University</affiliation></author>
       <author><first>Zhihua</first><last>Jiang</last></author>
       <pages>21883-21896</pages>
       <abstract>We revisit knowledge-based visual reasoning (KB-VR) in light of modern advances in multimodal large language models (MLLMs), and make the following contributions: (i) We propose Visual Knowledge Card (VKC) – a novel image that incorporates not only internal visual knowledge (e.g., scene-aware information) detected from the raw image, but also external world knowledge (e.g., attribute or object knowledge) produced by a knowledge generator; (ii) We present VKC-based Multi-Image Reasoning (VKC-MIR) – a four-stage pipeline which harnesses a state-of-the-art scene perception engine to construct an initial VKC (Stage-1), a powerful LLM to generate relevant domain knowledge (Stage-2), an excellent image editing toolkit to introduce generated knowledge into the updated VKC (Stage-3), and finally, an emerging multi-image MLLM to solve the VKC-enhanced task (Stage-4). By performing experiments on three popular KB-VR benchmarks, our approach achieves new state-of-the-art results compared to previous top-performing models.</abstract>
@@ -15830,7 +15830,7 @@
     <paper id="1086">
       <title><fixed-case>R</fixed-case>i<fixed-case>OT</fixed-case>: Efficient Prompt Refinement with Residual Optimization Tree</title>
       <author><first>Chenyi</first><last>Zhou</last></author>
-      <author><first>Zhengyan</first><last>Shi</last></author>
+      <author id="zhengyan-shi"><first>Zhengyan</first><last>Shi</last></author>
       <author><first>Yuan</first><last>Yao</last><affiliation>Zhejiang University</affiliation></author>
       <author><first>Lei</first><last>Liang</last></author>
       <author><first>Huajun</first><last>Chen</last><affiliation>Zhejiang University</affiliation></author>
@@ -15862,7 +15862,7 @@
       <author><first>Zi-Ao</first><last>Ma</last></author>
       <author><first>Tian</first><last>Lan</last></author>
       <author><first>Yuehao</first><last>Zhao</last></author>
-      <author><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <author><first>Xian-Ling</first><last>Mao</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <pages>22340-22361</pages>
       <abstract>Driven by the remarkable progress in diffusion models, text-to-image generation has achieved substantial advancements, underscoring the urgent need for robust automatic quality assessment. This task is inherently complex, requiring evaluations that range from object presence and attribute correctness to relational consistency and visual fidelity. Consequently, current state-of-the-art MLLM-based approaches often rely on powerful commercial models such as GPT-4o, which offer superior reasoning and instruction-following capabilities but are not universally accessible. In contrast, while open-source MLLMs demonstrate promising skills in vision and language understanding, they underperform in comprehensive image quality assessment.To address these challenges, we propose a task decomposition evaluation framework based on GPT-4o to automatically construct a specialized training dataset, breaking down the multifaceted evaluation process into simpler sub-tasks and thus reducing learning complexity. Building on this dataset, we design novel training strategies to distill GPT-4o’s evaluation capabilities into a <tex-math>7\text{B}</tex-math> open-source MLLM, MiniCPM-V-2.6, enabling it to better follow instructions across diverse assessment criteria. Furthermore, to reliably and comprehensively assess prior works and our proposed model, we manually annotate a meta-evaluation benchmark that includes chain-of-thought explanations alongside quality scores for generated images.Experimental results demonstrate that our distilled open-source MLLM significantly outperforms the current state-of-the-art GPT-4o-base baseline, VIEScore, with over 4.6% improvement in Spearman and Kendall correlations with human judgments.</abstract>
@@ -15998,7 +15998,7 @@
     </paper>
     <paper id="1097">
       <title>Towards the Law of Capacity Gap in Distilling Language Models</title>
-      <author id="chen-zhang"><first>Chen</first><last>Zhang</last><affiliation>Beijing Institute of Technology</affiliation></author>
+      <author><first>Chen</first><last>Zhang</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <author><first>Qiuchi</first><last>Li</last></author>
       <author><first>Dawei</first><last>Song</last><affiliation>Beijing Institute of Technology and Open University</affiliation></author>
       <author><first>Zheyu</first><last>Ye</last><affiliation>Xiaohongshu Inc</affiliation></author>
@@ -16020,12 +16020,12 @@
       <author><first>Akshay</first><last>Raghavan</last></author>
       <author><first>Scott</first><last>Feltman</last></author>
       <author><first>Whitney</first><last>Ringwald</last></author>
-      <author><first>Ryan L.</first><last>Boyd</last></author>
+      <author id="ryan-boyd"><first>Ryan L.</first><last>Boyd</last></author>
       <author><first>Benjamin</first><last>Luft</last></author>
       <author><first>Camilo</first><last>Ruggero</last></author>
       <author><first>Neville</first><last>Ryant</last></author>
       <author><first>Roman</first><last>Kotov</last></author>
-      <author><first>H. Andrew</first><last>Schwartz</last></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last></author>
       <pages>22529-22544</pages>
       <abstract>Current speech encoding pipelines often rely on an additional text-based LM to get robust representations of human communication, even though SotA speech-to-text models often have a LM within. This work proposes an approach to improve the LM within an audio model such that the subsequent text-LM is unnecessary. We introduce **WhiSPA** (**Whi**sper with **S**emantic and **P**sychological **A**lignment), which leverages a novel audio training objective: contrastive loss with a language model embedding as a teacher. Using over 500k speech segments from mental health audio interviews, we evaluate the utility of aligning Whisper’s latent space with semantic representations from a text autoencoder (SBERT) and lexically derived embeddings of basic psychological dimensions: emotion and personality. Over self-supervised affective tasks and downstream psychological tasks, WhiSPA surpasses current speech encoders, achieving an average error reduction of 73.4% and 83.8%, respectively. WhiSPA demonstrates that it is not always necessary to run a subsequent text LM on speech-to-text output in order to get a rich psychological representation of human communication.</abstract>
       <url hash="41f7e9c9">2025.acl-long.1098</url>
@@ -16082,7 +16082,7 @@
       <author><first>Yao</first><last>Du</last><affiliation>Beihang University</affiliation></author>
       <author><first>Tao</first><last>Ji</last></author>
       <author><first>Jianing</first><last>Wang</last><affiliation>Meituan</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <author><first>Yuanbin</first><last>Wu</last></author>
       <author><first>Aimin</first><last>Zhou</last><affiliation>East China Normal University</affiliation></author>
       <author><first>Mengdi</first><last>Zhang</last></author>
@@ -16175,7 +16175,7 @@
       <title><fixed-case>XDAC</fixed-case>: <fixed-case>XAI</fixed-case>-Driven Detection and Attribution of <fixed-case>LLM</fixed-case>-Generated News Comments in <fixed-case>K</fixed-case>orean</title>
       <author><first>Wooyoung</first><last>Go</last></author>
       <author><first>Hyoungshick</first><last>Kim</last><affiliation>Sungkyunkwan University</affiliation></author>
-      <author><first>Alice</first><last>Oh</last><affiliation>Google and Korea Advanced Institute of Science and Technology</affiliation></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last><affiliation>Google and Korea Advanced Institute of Science and Technology</affiliation></author>
       <author><first>Yongdae</first><last>Kim</last><affiliation>Korea Advanced Institute of Science &amp; Technology</affiliation></author>
       <pages>22728-22750</pages>
       <abstract>Large language models (LLMs) generate human-like text, raising concerns about their misuse in creating deceptive content. Detecting LLM-generated comments (LGC) in online news is essential for preserving online discourse integrity and preventing opinion manipulation. However, effective detection faces two key challenges; the brevity and informality of news comments limit traditional methods, and the absence of a publicly available LGC dataset hinders model training, especially for languages other than English. To address these challenges, we propose a twofold approach. First, we develop an LGC generation framework to construct a high-quality dataset with diverse and complex examples. Second, we introduce XDAC (<tex-math>\textbf{X}</tex-math>AI-Driven <tex-math>\textbf{D}</tex-math>etection and <tex-math>\textbf{A}</tex-math>ttribution of LLM-Generated <tex-math>\textbf{C}</tex-math>omments), a framework utilizing explainable AI, designed for the detection and attribution of short-form LGC in Korean news articles. XDAC leverages XAI to uncover distinguishing linguistic patterns at both token and character levels. We present the first large-scale benchmark dataset, comprising 1.3M human-written comments from Korean news platforms and 1M LLM-generated comments from 14 distinct models. XDAC outperforms existing methods, achieving a 98.5% F1 score in LGC detection with a relative improvement of 68.1%, and an 84.3% F1 score in attribution. To validate real-world applicability, we analyze 5.24M news comments from Naver, South Korea’s leading online news platform, identifying 27,029 potential LLM-generated comments.</abstract>
@@ -16344,7 +16344,7 @@
       <author><first>Ziyi</first><last>Zhang</last></author>
       <author><first>Yule</first><last>Liu</last></author>
       <author><first>Michael</first><last>Backes</last><affiliation>CISPA Helmholtz Center for Information Security</affiliation></author>
-      <author id="yang-zhang"><first>Yang</first><last>Zhang</last><affiliation>CISPA Helmholtz Center for Information Security</affiliation></author>
+      <author><first>Yang</first><last>Zhang</last><affiliation>CISPA Helmholtz Center for Information Security</affiliation></author>
       <author><first>Xinlei</first><last>He</last><affiliation>The Hong Kong University of Science and Technology</affiliation></author>
       <pages>22975-23005</pages>
       <abstract>Social media platforms are experiencing a growing presence of AI-Generated Texts (AIGTs). However, the misuse of AIGTs could have profound implications for public opinion, such as spreading misinformation and manipulating narratives. Despite its importance, it remains unclear how prevalent AIGTs are on social media. To address this gap, this paper aims to quantify and monitor the AIGTs on online social media platforms. We first collect a dataset (SM-D) with around <tex-math>2.4M</tex-math> posts from 3 major social media platforms: Medium, Quora, and Reddit. Then, we construct a diverse dataset (AIGTBench) to train and evaluate AIGT detectors. AIGTBench combines popular open-source datasets and our AIGT datasets generated from social media texts by 12 LLMs, serving as a benchmark for evaluating mainstream detectors. With this setup, we identify the best-performing detector (OSM-Det). We then apply OSM-Det to SM-D to track AIGTs across social media platforms from January 2022 to October 2024, using the AI Attribution Rate (AAR) as the metric. Specifically, Medium and Quora exhibit marked increases in AAR, rising from 1.77% to 37.03% and 2.06% to 38.95%, respectively. In contrast, Reddit shows slower growth, with AAR increasing from 1.31% to 2.45% over the same period. Our further analysis indicates that AIGTs on social media differ from human-written texts across several dimensions, including linguistic patterns, topic distributions, engagement levels, and the follower distribution of authors. We envision our analysis and findings on AIGTs in social media can shed light on future research in this domain.</abstract>
@@ -16381,7 +16381,7 @@
       <author><first>Ang</first><last>Lv</last></author>
       <author><first>Jian</first><last>Luan</last><affiliation>Xiaomi Corporation</affiliation></author>
       <author><first>Bin</first><last>Wang</last><affiliation>AI Lab, Xiaomi Inc.</affiliation></author>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last></author>
+      <author><first>Wei</first><last>Liu</last></author>
       <pages>23044-23056</pages>
       <abstract>Many positional encodings (PEs) are designed to exhibit long-term decay, based on an entrenched and long-standing inductive opinion: tokens farther away from the current position carry less relevant information. We argue that long-term decay is outdated in the era of LLMs, as LLMs are now applied to tasks demanding precise retrieval of in-context information from arbitrary positions. Firstly, we present empirical analyses on various PEs, demonstrating that models inherently learn attention with only a local-decay pattern while forming a U-shape pattern globally, contradicting the principle of long-term decay. Furthermore, we conduct a detailed analysis of rotary position encoding (RoPE, a prevalent relative positional encoding in LLMs), and found that the U-shape attention is caused by some learned components, which are also the key factor limiting RoPE’s expressiveness and extrapolation. Inspired by these insights, we propose High-frequency rotary Position Encoding (HoPE). HoPE replaces the specific components in RoPE with position-independent ones, retaining only high-frequency signals, which also breaks the principle of long-term decay in theory. HoPE achieves two major advantages: (1) Without constraints imposed by long-term decay, contradictory factors that limit attention optimization are removed. Thus, the model’s context awareness is enhanced. (2) HoPE exhibits greater robustness to the out-of-distribution behavior in attention patterns during extrapolation. The effectiveness of HoPE is validated through extensive experiments and with a large language model of up to 3 billion parameters.</abstract>
       <url hash="b8e83392">2025.acl-long.1123</url>
@@ -16541,7 +16541,7 @@
       <title>Employing Discourse Coherence Enhancement to Improve Cross-Document Event and Entity Coreference Resolution</title>
       <author><first>Xinyu</first><last>Chen</last></author>
       <author><first>Peifeng</first><last>Li</last><affiliation>Soochow University, China</affiliation></author>
-      <author><first>Qiaoming</first><last>Zhu</last><affiliation>Soochow University</affiliation></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last><affiliation>Soochow University</affiliation></author>
       <pages>23272-23286</pages>
       <abstract>Cross-Document Coreference Resolution (CDCR) aims to identify and group together mentions of a specific event or entity that occur across multiple documents. In contrast to the within-document tasks, in which event and entity mentions are linked by rich and coherent contexts, cross-document mentions lack such critical contexts, which presents a significant challenge in establishing connections among them. To address this issue, we introduce a novel task Cross-Document Discourse Coherence Enhancement (CD-DCE) to enhance the discourse coherence between two cross-document event or entity mentions. Specifically, CD-DCE first selects coherent texts and then adds them between two cross-document mentions to form a new coherent document. Subsequently, the coherent text is employed to represent the event or entity mentions and to resolve any coreferent mentions. Experimental results on the three popular datasets demonstrate that our proposed method outperforms several state-of-the-art baselines.</abstract>
       <url hash="708634d1">2025.acl-long.1134</url>
@@ -16643,7 +16643,7 @@
       <author><first>Tianyi Alex</first><last>Qiu</last></author>
       <author><first>Boyuan</first><last>Chen</last></author>
       <author><first>Jiayi</first><last>Zhou</last><affiliation>Peking University</affiliation></author>
-      <author id="changye-li"><first>Changye</first><last>Li</last></author>
+      <author><first>Changye</first><last>Li</last></author>
       <author><first>Hantao</first><last>Lou</last></author>
       <author><first>Josef</first><last>Dai</last><affiliation>Peking University</affiliation></author>
       <author><first>Yunhuai</first><last>Liu</last><affiliation>Peking University</affiliation></author>
@@ -16771,7 +16771,7 @@
       <author><first>An</first><last>Zhang</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Yanyan</first><last>Zhao</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <author><first>Bing</first><last>Qin</last><affiliation>Harbin Institute of Technology</affiliation></author>
-      <author><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Ting</first><last>Liu</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <pages>23564-23587</pages>
       <abstract>Large language models (LLMs) have become increasingly central to AI applications worldwide, necessitating robust multilingual safety alignment to ensure secure deployment across diverse linguistic contexts. Existing preference learning methods for safety alignment, such as RLHF and DPO, are primarily monolingual and struggle with noisy multilingual data. To address these limitations, we introduce <b>Mul</b>tilingual reward ga<b>P</b> <b>O</b>ptimization (<b>MPO</b>), a novel approach that leverages the well-aligned safety capabilities of the dominant language (<i>e.g.</i>, English) to improve safety alignment across multiple languages. MPO directly minimizes the reward gap difference between the dominant language and target languages, effectively transferring safety capabilities while preserving the original strengths of the dominant language. Extensive experiments on three LLMs, LLaMA-3.1, Gemma-2 and Qwen2.5, validate MPO’s efficacy in multilingual safety alignment without degrading general multilingual utility.</abstract>
@@ -16790,7 +16790,7 @@
       <author><first>Yu</first><last>Tsao</last><affiliation>Academia Sinica</affiliation></author>
       <author><first>Junichi</first><last>Yamagishi</last><affiliation>National Institute of Informatics</affiliation></author>
       <author><first>Yuxuan</first><last>Wang</last><affiliation>ByteDance</affiliation></author>
-      <author><first>Chao</first><last>Zhang</last><affiliation>Shanghai Artificial Intelligence Laboratory, Tsinghua University and University College London</affiliation></author>
+      <author id="chao-zhang-tu"><first>Chao</first><last>Zhang</last><affiliation>Shanghai Artificial Intelligence Laboratory, Tsinghua University and University College London</affiliation></author>
       <pages>23588-23609</pages>
       <abstract>This paper explores a novel perspective to speech quality assessment by leveraging natural language descriptions, offering richer, more nuanced insights than traditional numerical scoring methods. Natural language feedback provides instructive recommendations and detailed evaluations, yet existing datasets lack the comprehensive annotations needed for this approach. To bridge this gap, we introduce QualiSpeech, a comprehensive low-level speech quality assessment dataset encompassing 11 key aspects and detailed natural language comments that include reasoning and contextual insights. Additionally, we propose the QualiSpeech Benchmark to evaluate the low-level speech understanding capabilities of auditory large language models (LLMs). Experimental results demonstrate that finetuned auditory LLMs can reliably generate detailed descriptions of noise and distortion, effectively identifying their types and temporal characteristics. The results further highlight the potential for incorporating reasoning to enhance the accuracy and reliability of quality assessments. The dataset can be found at https://huggingface.co/datasets/tsinghua-ee/QualiSpeech.</abstract>
       <url hash="05673a44">2025.acl-long.1150</url>
@@ -16801,7 +16801,7 @@
       <title>On the Relation Between Fine-Tuning, Topological Properties, and Task Performance in Sense-Enhanced Embeddings</title>
       <author><first>Deniz Ekin</first><last>Yavas</last></author>
       <author><first>Timothée</first><last>Bernard</last><affiliation>Université Paris Cité</affiliation></author>
-      <author><first>Benoit</first><last>Crabbé</last><affiliation>Université de Paris</affiliation></author>
+      <author id="benoit-crabbe"><first>Benoit</first><last>Crabbé</last><affiliation>Université de Paris</affiliation></author>
       <author><first>Laura</first><last>Kallmeyer</last><affiliation>Heinrich Heine University Düsseldorf, Germany</affiliation></author>
       <pages>23610-23625</pages>
       <abstract>Topological properties of embeddings, such as isotropy and uniformity, are closely linked to their expressiveness, and improving these properties enhances the embeddings’ ability to capture nuanced semantic distinctions. However, fine-tuning can reduce the expressiveness of the embeddings of language models. This study investigates the relation between fine-tuning, topology of the embedding space, and task performance in the context of sense knowledge enhancement, focusing on identifying the topological properties that contribute to the success of sense-enhanced embeddings. We experiment with two fine-tuning methods: *Supervised Contrastive Learning (SCL)* and *Supervised Predictive Learning (SPL)*. Our results show that SPL, the most standard approach, exhibits varying effectiveness depending on the language model and is inconsistent in producing successful sense-enhanced embeddings. In contrast, SCL achieves this consistently. Furthermore, while the embeddings with only increased *sense-alignment* show reduced task performance, those that also exhibit high *isotropy* and balance *uniformity* with *sense-alignment* achieve the best results. Additionally, our findings indicate that supervised and unsupervised tasks benefit from these topological properties to varying degrees.</abstract>
@@ -16863,7 +16863,7 @@
       <title>Prediction Hubs are Context-Informed Frequent Tokens in <fixed-case>LLM</fixed-case>s</title>
       <author><first>Beatrix Miranda Ginn</first><last>Nielsen</last></author>
       <author><first>Iuri</first><last>Macocco</last><affiliation>Universitat Pompeu Fabra</affiliation></author>
-      <author><first>Marco</first><last>Baroni</last><affiliation>Universitat Pompeu Fabra</affiliation></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last><affiliation>Universitat Pompeu Fabra</affiliation></author>
       <pages>23715-23745</pages>
       <abstract>Hubness, the tendency for a few points to be among the nearest neighbours of a disproportionate number of other points, commonly arises when applying standard distance measures to high-dimensional data, often negatively impacting distance-based analysis. As autoregressive large language models (LLMs) operate on high-dimensional representations, we ask whether they are also affected by hubness. We first prove that the only large-scale representation comparison operation performed by LLMs, namely that between context and unembedding vectors to determine continuation probabilities, is not characterized by the concentration of distances phenomenon that typically causes the appearance of nuisance hubness. We then empirically show that this comparison still leads to a high degree of hubness, but the hubs in this case do not constitute a disturbance. They are rather the result of context-modulated frequent tokens often appearing in the pool of likely candidates for next token prediction. However, when other distances are used to compare LLM representations, we do not have the same theoretical guarantees, and, indeed, we see nuisance hubs appear. There are two main takeaways. First, hubness, while omnipresent in high-dimensional spaces, is not a negative property that needs to be mitigated when LLMs are being used for next token prediction. Second, when comparing representations from LLMs using Euclidean or cosine distance, there is a high risk of nuisance hubs and practitioners should use mitigation techniques if relevant.</abstract>
       <url hash="30ee9466">2025.acl-long.1156</url>
@@ -17292,7 +17292,7 @@
       <author><first>Jiafeng</first><last>Guo</last><affiliation>Institute of Computing Technolgy, Chinese Academy of Sciences</affiliation></author>
       <author><first>Lulu</first><last>Yu</last></author>
       <author><first>Baolong</first><last>Bi</last></author>
-      <author><first>Xueqi</first><last>Cheng</last><affiliation>Institute of Computing Technology, Chinese Academy</affiliation></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last><affiliation>Institute of Computing Technology, Chinese Academy</affiliation></author>
       <pages>24315-24329</pages>
       <abstract>Large language models (LLMs) exhibit impressive performance across diverse tasks but often struggle to accurately gauge their knowledge boundaries, leading to confident yet incorrect responses. This paper explores leveraging LLMs’ internal states to enhance their perception of knowledge boundaries from efficiency and risk perspectives. We investigate whether LLMs can estimate their confidence using internal states before response generation, potentially saving computational resources. Our experiments on datasets like Natural Questions, HotpotQA, and MMLU reveal that LLMs demonstrate significant pre-generation perception, which is further refined post-generation, with perception gaps remaining stable across varying conditions. To mitigate risks in critical domains, we introduce Consistency-based Confidence Calibration (<tex-math>C^3</tex-math>), which assesses confidence consistency through question reformulation. <tex-math>C^3</tex-math> significantly improves LLMs’ ability to recognize their knowledge gaps, enhancing the unknown perception rate by 5.6% on NQ and 4.9% on HotpotQA. Our findings suggest that pre-generation confidence estimation can optimize efficiency, while <tex-math>C^3</tex-math> effectively controls output risks, advancing the reliability of LLMs in practical applications.</abstract>
       <url hash="196350da">2025.acl-long.1184</url>
@@ -17316,8 +17316,8 @@
       <author><first>Tianhua</first><last>Zhang</last><affiliation>Chinese University of Hong Kong, The Chinese University of Hong Kong</affiliation></author>
       <author><first>Xixin</first><last>Wu</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <author><first>Hongyin</first><last>Luo</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
-      <author><first>James R.</first><last>Glass</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
-      <author><first>Helen M.</first><last>Meng</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
+      <author id="james-glass"><first>James R.</first><last>Glass</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
+      <author id="helen-meng"><first>Helen M.</first><last>Meng</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <pages>24349-24364</pages>
       <abstract>Knowledge Graphs (KGs) can serve as reliable knowledge sources for question answering (QA) due to their structured representation of knowledge. Existing research on the utilization of KG for large language models (LLMs) prevalently relies on subgraph retriever or iterative prompting, overlooking the potential synergy of LLMs’ step-wise reasoning capabilities and KGs’ structural nature. In this paper, we present DoG (Decoding on Graph), a novel framework that facilitates a deep synergy between LLMs and KGs. We first define a concept, well-formed chain, which consists of a sequence of interrelated fact triplets on the KGs, starting from question entities and leading to answers. We argue that this concept can serve as a principle for making faithful and sound reasoning for KGQA. To enable LLMs to generate well-formed chains, we propose graph-aware constrained decoding, in which a constraint derived from the topology of the KG regulates the decoding process of the LLMs. This constrained decoding method ensures the generation of well-formed chains while making full use of the step-wise reasoning capabilities of LLMs. Based on the above, DoG, a training-free approach, is able to provide faithful and sound reasoning trajectories grounded on the KGs. Experiments across various KGQA tasks with different background KGs demonstrate that DoG achieves superior and robust performance. DoG also shows general applicability with various open-source LLMs.</abstract>
       <url hash="d73fe50a">2025.acl-long.1186</url>
@@ -17417,7 +17417,7 @@
       <title>Evaluating <fixed-case>LLM</fixed-case>s for <fixed-case>P</fixed-case>ortuguese Sentence Simplification with Linguistic Insights</title>
       <author><first>Arthur Mariano Rocha De Azevedo</first><last>Scalercio</last><affiliation>Universidade Federal Fluminense</affiliation></author>
       <author><first>Elvis A. De</first><last>Souza</last></author>
-      <author><first>Maria José Bocorny</first><last>Finatto</last><affiliation>Universidade Federal do Rio Grande do Sul</affiliation></author>
+      <author id="maria-jose-b-finatto"><first>Maria José Bocorny</first><last>Finatto</last><affiliation>Universidade Federal do Rio Grande do Sul</affiliation></author>
       <author><first>Aline</first><last>Paes</last><affiliation>Universidade Federal Fluminense</affiliation></author>
       <pages>24452-24477</pages>
       <abstract>Sentence simplification (SS) focuses on adapting sentences to enhance their readability and accessibility. While large language models (LLMs) match task-specific baselines in English SS, their performance in Portuguese remains underexplored. This paper presents a comprehensive performance comparison of 26 state-of-the-art LLMs in Portuguese SS, alongside two simplification models trained explicitly for this task and language. They are evaluated under a one-shot setting across scientific, news, and government datasets. We benchmark the models with our newly introduced Gov-Lang-BR corpus (1,703 complex-simple sentence pairs from Brazilian government agencies) and two established datasets: PorSimplesSent and Museum-PT. Our investigation takes advantage of both automatic metrics and large-scale linguistic analysis to examine the transformations achieved by the LLMs. Furthermore, a qualitative assessment of selected generated outputs provides deeper insights into simplification quality. Our findings reveal that while open-source LLMs have achieved impressive results, closed-source LLMs continue to outperform them in Portuguese SS.</abstract>
@@ -17535,7 +17535,7 @@
       <author><first>Fuli</first><last>Feng</last><affiliation>University of Science and Technology of China</affiliation></author>
       <author><first>Hamed</first><last>Zamani</last><affiliation>University of Massachusetts at Amherst</affiliation></author>
       <author><first>Xiangnan</first><last>He</last><affiliation>University of Science and Technology of China</affiliation></author>
-      <author><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
       <pages>24607-24649</pages>
       <abstract>In the era of large models, content generation is gradually shifting to Personalized Generation (PGen), tailoring content to individual preferences and needs. This paper presents the first comprehensive survey on PGen, investigating existing research in this rapidly growing field. We conceptualize PGen from a unified perspective, systematically formalizing its key components, core objectives, and abstract workflows. Based on this unified perspective, we propose a multi-level taxonomy, offering an in-depth review of technical advancements, commonly used datasets, and evaluation metrics across multiple modalities, personalized contexts, and tasks. Moreover, we envision the potential applications of PGen and highlight open challenges and promising directions for future exploration. By bridging PGen research across multiple modalities, this survey serves as a valuable resource for fostering knowledge sharing and interdisciplinary collaboration, ultimately contributing to a more personalized digital landscape.</abstract>
       <url hash="de5f151b">2025.acl-long.1201</url>
@@ -17584,7 +17584,7 @@
     </paper>
     <paper id="1205">
       <title>Leveraging In-Context Learning for Political Bias Testing of <fixed-case>LLM</fixed-case>s</title>
-      <author><first>Patrick</first><last>Haller</last><affiliation>University of Zurich</affiliation></author>
+      <author id="patrick-haller-zurich"><first>Patrick</first><last>Haller</last><affiliation>University of Zurich</affiliation></author>
       <author><first>Jannis</first><last>Vamvas</last><affiliation>University of Zurich</affiliation></author>
       <author><first>Rico</first><last>Sennrich</last><affiliation>University of Zurich</affiliation></author>
       <author><first>Lena Ann</first><last>Jäger</last><affiliation>University of Zurich</affiliation></author>
@@ -17793,7 +17793,7 @@
       <author><first>Joseph</first><last>Gatto</last><affiliation>Dartmouth College</affiliation></author>
       <author><first>Omar</first><last>Sharif</last><affiliation>Dartmouth College</affiliation></author>
       <author><first>Parker</first><last>Seegmiller</last><affiliation>Dartmouth College</affiliation></author>
-      <author><first>Sarah Masud</first><last>Preum</last><affiliation>Dartmouth College</affiliation></author>
+      <author id="sarah-masud-preum"><first>Sarah Masud</first><last>Preum</last><affiliation>Dartmouth College</affiliation></author>
       <pages>25109-25131</pages>
       <abstract>Event Argument Extraction (EAE) is a daunting information extraction problem — with significant limitations in few-shot cross-domain (FSCD) settings. A common solution to FSCD modeling is data augmentation. Unfortunately, existing augmentation methods are not well-suited to a variety of real-world EAE contexts, including (i) modeling long documents (documents with over 10 sentences), and (ii) modeling challenging role types (i.e., event roles with little to no training data and semantically outlying roles). We introduce two novel LLM-powered data augmentation methods for generating extractive document-level EAE samples using zero in-domain training data. We validate the generalizability of our approach on four datasets — showing significant performance increases in low-resource settings. Our highest performing models provide a 13-pt increase in F1 score on zero-shot role extraction in FSCD evaluation.</abstract>
       <url hash="ed0a3ff9">2025.acl-long.1221</url>
@@ -17817,7 +17817,7 @@
       <author><first>Marco</first><last>Valentino</last><affiliation>University of Sheffield</affiliation></author>
       <author><first>Iqra</first><last>Zahid</last><affiliation>Imperial College London</affiliation></author>
       <author><first>Ian</first><last>Pratt-Hartmann</last><affiliation>University of Opole and University of Manchester</affiliation></author>
-      <author><first>Riza</first><last>Batista-Navarro</last><affiliation>University of Manchester</affiliation></author>
+      <author id="riza-theresa-batista-navarro"><first>Riza</first><last>Batista-Navarro</last><affiliation>University of Manchester</affiliation></author>
       <pages>25155-25168</pages>
       <abstract>Transformer models have achieved remarkable performance in many formal reasoning tasks. Nonetheless, the extent of their comprehension pertaining to logical semantics and rules of inference remains somewhat uncertain. Evaluating such understanding necessitates a rigorous examination of these models’ generalisation capacity to out-of-distribution data. In this study, we probe the generalisation prowess of Transformer models with respect to the hitherto unexplored domain of numerical satisfiability problems. Our investigation reveals that Transformers exhibit minimal scale and noise invariance, alongside limited vocabulary and number invariance. However, even when Transformer models experience a notable decline in performance on out-of-distribution test sets, they often still surpass the random baseline by a considerable margin.</abstract>
       <url hash="a43302e4">2025.acl-long.1223</url>
@@ -17828,7 +17828,7 @@
       <title>The Nature of <fixed-case>NLP</fixed-case>: Analyzing Contributions in <fixed-case>NLP</fixed-case> Papers</title>
       <author><first>Aniket</first><last>Pramanick</last><affiliation>NEC and Technische Universität Darmstadt</affiliation></author>
       <author><first>Yufang</first><last>Hou</last><affiliation>IT:U Interdisciplinary Transformation University Austria, Technische Universität Darmstadt and IBM Research Ireland</affiliation></author>
-      <author><first>Saif M.</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif M.</first><last>Mohammad</last></author>
       <author><first>Iryna</first><last>Gurevych</last><affiliation>Institute for Computer Science, Artificial Intelligence and Technology, Mohamed bin Zayed University of Artificial Intelligence and Technische Universität Darmstadt</affiliation></author>
       <pages>25169-25191</pages>
       <abstract>Natural Language Processing (NLP) is an established and dynamic field. Despite this, what constitutes NLP research remains debated. In this work, we address the question by quantitatively examining NLP research papers. We propose a taxonomy of research contributions and introduce _NLPContributions_, a dataset of nearly <tex-math>2k</tex-math> NLP research paper abstracts, carefully annotated to identify scientific contributions and classify their types according to this taxonomy. We also introduce a novel task of automatically identifying contribution statements and classifying their types from research papers. We present experimental results for this task and apply our model to ~<tex-math>29k</tex-math> NLP research papers to analyze their contributions, aiding in the understanding of the nature of NLP research. We show that NLP research has taken a winding path — with the focus on language and human-centric studies being prominent in the 1970s and 80s, tapering off in the 1990s and 2000s, and starting to rise again since the late 2010s. Alongside this revival, we observe a steady rise in dataset and methodological contributions since the 1990s, such that today, on average, individual NLP papers contribute in more ways than ever before. Our dataset and analyses offer a powerful lens for tracing research trends and offer potential for generating informed, data-driven literature surveys.</abstract>
@@ -17854,7 +17854,7 @@
       <author><first>Timothy E.</first><last>Burdick</last><affiliation>Dartmouth College</affiliation></author>
       <author><first>Inas S.</first><last>Khayal</last><affiliation>Dartmouth College</affiliation></author>
       <author><first>Sarah</first><last>DeLozier</last></author>
-      <author><first>Sarah Masud</first><last>Preum</last><affiliation>Dartmouth College</affiliation></author>
+      <author id="sarah-masud-preum"><first>Sarah Masud</first><last>Preum</last><affiliation>Dartmouth College</affiliation></author>
       <pages>25222-25240</pages>
       <abstract>Follow-up question generation is an essential feature of dialogue systems as it can reduce conversational ambiguity and enhance modeling complex interactions. Conversational contexts often pose core NLP challenges such as (i) extracting relevant information buried in fragmented data sources, and (ii) modeling parallel thought processes. These two challenges occur frequently in medical dialogue as a doctor asks questions based not only on patient utterances but also their prior EHR data and current diagnostic hypotheses. Asking medical questions in asynchronous conversations compounds these issues as doctors can only rely on static EHR information to motivate follow-up questions. To address these challenges, we introduce FollowupQ, a novel framework for enhancing asynchronous medical conversation.FollowupQ is a multi-agent framework that processes patient messages and EHR data to generate personalized follow-up questions, clarifying patient-reported medical conditions. FollowupQ reduces requisite provider follow-up communications by 34%. It also improves performance by 17% and 5% on real and synthetic data, respectively. We also release the first public dataset of asynchronous medical messages with linked EHR data alongside 2,300 follow-up questions written by clinical experts for the wider NLP research community.</abstract>
       <url hash="aedc5a73">2025.acl-long.1226</url>
@@ -17881,7 +17881,7 @@
       <author><first>Emmanouil</first><last>Zaranis</last><affiliation>Instituto Superior Técnico</affiliation></author>
       <author><first>Giuseppe</first><last>Attanasio</last><affiliation>Instituto de Telecomunicações</affiliation></author>
       <author><first>Sweta</first><last>Agrawal</last><affiliation>Google</affiliation></author>
-      <author><first>Andre</first><last>Martins</last><affiliation>Instituto Superior Técnico and Unbabel</affiliation></author>
+      <author id="andre-f-t-martins"><first>Andre</first><last>Martins</last><affiliation>Instituto Superior Técnico and Unbabel</affiliation></author>
       <pages>25261-25284</pages>
       <abstract>Quality estimation (QE)—the automatic assessment of translation quality—has recently become crucial across several stages of the translation pipeline, from data curation to training and decoding. While QE metrics have been optimized to align with human judgments, whether they encode social biases has been largely overlooked. Biased QE risks favoring certain demographic groups over others, e.g., by exacerbating gaps in visibility and usability. This paper defines and investigates gender bias of QE metrics and discusses its downstream implications for machine translation (MT). Experiments with state-of-the-art QE metrics across multiple domains, datasets, and languages reveal significant bias. When a human entity’s gender in the source is undisclosed, masculine-inflected translations score higher than feminine-inflected ones, and gender-neutral translations are penalized. Even when contextual cues disambiguate gender, using context-aware QE metrics leads to more errors in selecting the correct translation inflection for feminine referents than for masculine ones. Moreover, a biased QE metric affects data filtering and quality-aware decoding. Our findings underscore the need for a renewed focus on developing and evaluating QE metrics centered on gender.</abstract>
       <url hash="e275d43e">2025.acl-long.1228</url>
@@ -17949,7 +17949,7 @@
       <author><first>Lang</first><last>Gao</last></author>
       <author><first>Jiahui</first><last>Geng</last></author>
       <author><first>Xiangliang</first><last>Zhang</last><affiliation>University of Notre Dame</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <author><first>Xiuying</first><last>Chen</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <pages>25378-25398</pages>
       <abstract>Jailbreaking in Large Language Models (LLMs) is a major security concern as it can deceive LLMs into generating harmful text. However, understanding of how jailbreaking works remains limited, hindering the development of effective defense strategies. To address this issue, we conduct a large-scale analysis of seven different jailbreak methods and identify that disagreements among methods stem from insufficient observation samples.We introduce the concept of a safety boundary and discover that jailbreaks shift harmful activations outside this boundary, where LLMs become less sensitive to harmful information. Our analysis reveals that low and middle layers play a critical role in these shifts, while deeper layers have a lesser impact.Building on these insights, we propose a novel defense mechanism called Activation Boundary Defense (ABD), which adaptively constrains activations within the safety boundary. To enhance its effectiveness, we use Bayesian optimization to selectively apply the defense to the low and middle layers.Experiments on several benchmark datasets demonstrate that ABD achieves an average Defense Success Rate (DSR) of over 98% against various jailbreak attacks, with less than a 2% impact on the model’s general capabilities.</abstract>
@@ -17965,9 +17965,9 @@
       <author><first>Jianpeng</first><last>Cheng</last><affiliation>Meta</affiliation></author>
       <author><first>Bo-Hsiang</first><last>Tseng</last><affiliation>Apple</affiliation></author>
       <author><first>Peter</first><last>Boothroyd</last><affiliation>Apple</affiliation></author>
-      <author><first>Hector</first><last>Martinez Alonso</last><affiliation>Apple</affiliation></author>
+      <author id="hector-martinez-alonso"><first>Hector</first><last>Martinez Alonso</last><affiliation>Apple</affiliation></author>
       <author><first>Diarmuid</first><last>O Seaghdha</last><affiliation>Apple</affiliation></author>
-      <author><first>Anders</first><last>Johannsen</last></author>
+      <author id="anders-johannsen"><first>Anders</first><last>Johannsen</last></author>
       <pages>25399-25434</pages>
       <abstract>This work evaluates the potential of large language models (LLMs) to power digital assistants capable of complex action execution. Such assistants rely on pre-trained programming knowledge to execute multi-step goals by composing objects and functions defined in assistant libraries into action execution programs. To achieve this, we develop ASPERA, a framework comprising an assistant library simulation and a human-assisted LLM data generation engine. Our engine allows developers to guide LLM generation of high-quality tasks consisting of complex user queries, simulation state and corresponding validation programs, tackling data availability and evaluation robustness challenges. Alongside the framework we release Asper-Bench, an evaluation dataset of 250 challenging tasks generated using ASPERA, which we use to show that program generation grounded in custom assistant libraries is a significant challenge to LLMs compared to dependency-free code generation.</abstract>
       <url hash="ed982fc7">2025.acl-long.1234</url>
@@ -18104,7 +18104,7 @@
       <author><first>Weiwen</first><last>Xu</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Ruochen</first><last>Zhao</last></author>
       <author><first>Fangkai</first><last>Jiao</last></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University and SalesForce.com</affiliation></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University and SalesForce.com</affiliation></author>
       <author><first>Lidong</first><last>Bing</last><affiliation>Shanda Group and Alibaba Group</affiliation></author>
       <pages>25589-25604</pages>
       <abstract>Large language models excel at problem-solving but often struggle with complex reasoning and factual accuracy. While chain-of-thought and retrieval-augmented generation help break down problems and retrieve knowledge, they still falter on challenging tasks like competitive programming due to frequent reasoning errors and irrelevant retrieval. To address this, we introduce Critic-guided planning with Retrieval-augmentation, CR-Planner, a novel framework that leverages fine-tuned critic models to guide both reasoning and retrieval processes through planning. CR-Planner iteratively selects and executes sub-goals, guided by critic models. A sub-goal critic identifies promising sub-goals from reasoning, query generation, and retrieval, while an execution critic evaluates outputs of sub-goal executions. We employ Monte Carlo Tree Search to collect data for critic training, allowing systematic exploration of action sequences and effective navigation toward the final answer. We evaluate CR-Planner on challenging domain-knowledge-intensive and reasoning-heavy tasks, including competitive programming, theorem-driven math reasoning, and complex domain retrieval problems. It significantly outperforms baselines, demonstrating effectiveness in both reasoning and retrieval.</abstract>
@@ -18144,7 +18144,7 @@
       <title><fixed-case>C</fixed-case>ultural<fixed-case>B</fixed-case>ench: A Robust, Diverse and Challenging Benchmark for Measuring <fixed-case>LM</fixed-case>s’ Cultural Knowledge Through Human-<fixed-case>AI</fixed-case> Red-Teaming</title>
       <author><first>Yu Ying</first><last>Chiu</last><affiliation>University of Washington</affiliation></author>
       <author><first>Liwei</first><last>Jiang</last></author>
-      <author><first>Bill Yuchen</first><last>Lin</last><affiliation>xAI and University of Washington</affiliation></author>
+      <author id="bill-yuchen-lin"><first>Bill Yuchen</first><last>Lin</last><affiliation>xAI and University of Washington</affiliation></author>
       <author><first>Chan Young</first><last>Park</last></author>
       <author><first>Shuyue Stella</first><last>Li</last><affiliation>Department of Computer Science, University of Washington</affiliation></author>
       <author><first>Sahithya</first><last>Ravi</last></author>
@@ -18208,7 +18208,7 @@
       <author><first>Sarah E.</first><last>Finch</last><affiliation>Emory University</affiliation></author>
       <author><first>Ellie S.</first><last>Paek</last></author>
       <author><first>Ikseon</first><last>Choi</last><affiliation>Emory University</affiliation></author>
-      <author><first>Jinho D.</first><last>Choi</last><affiliation>Emory University</affiliation></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last><affiliation>Emory University</affiliation></author>
       <pages>25789-25806</pages>
       <abstract>As chatbots become integral to daily life, personalizing systems is key for fostering trust, engagement, and inclusivity. This study examines how linguistic similarity affects chatbot performance, focusing on integrating African American English (AAE) into virtual agents to better serve the African American community. We develop text-based and spoken chatbots using large language models and text-to-speech technology, then evaluate them with AAE speakers against standard English chatbots. Our results show that while text-based AAE chatbots often underperform, spoken chatbots benefit from an African American voice and AAE elements, improving performance and preference. These findings underscore the complexities of linguistic personalization and the dynamics between text and speech modalities, highlighting technological limitations that affect chatbots’ AA speech generation and pointing to promising future research directions.</abstract>
       <url hash="648ebcf6">2025.acl-long.1252</url>
@@ -18243,7 +18243,7 @@
       <title>Language Fusion for Parameter-Efficient Cross-lingual Transfer</title>
       <author><first>Philipp</first><last>Borchert</last><affiliation>IÉSEG School of Management and KU Leuven</affiliation></author>
       <author><first>Ivan</first><last>Vulić</last><affiliation>Google DeepMind and University of Cambridge</affiliation></author>
-      <author><first>Marie-Francine</first><last>Moens</last><affiliation>KU Leuven, KU Leuven</affiliation></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last><affiliation>KU Leuven, KU Leuven</affiliation></author>
       <author><first>Jochen</first><last>De Weerdt</last><affiliation>KU Leuven</affiliation></author>
       <pages>25848-25868</pages>
       <abstract>Limited availability of multilingual text corpora for training language models often leads to poor performance on downstream tasks due to undertrained representation spaces for languages other than English. This ‘under-representation’ has motivated recent cross-lingual transfer methods to leverage the English representation space by e.g. mixing English and ‘non-English’ tokens at the input level or extending model parameters to accommodate new languages. However, these approaches often come at the cost of increased computational complexity. We propose Fusion for Language Representations (FLARE) in adapters, a novel method that enhances representation quality and downstream performance for languages other than English while maintaining parameter efficiency. FLARE integrates source and target language representations within low-rank (LoRA) adapters using lightweight linear transformations, maintaining parameter efficiency while improving transfer performance. A series of experiments across representative cross-lingual natural language understanding tasks, including natural language inference, question-answering and sentiment analysis, demonstrate FLARE’s effectiveness. FLARE achieves performance improvements of 4.9% for Llama 3.1 and 2.2% for Gemma 2 compared to standard LoRA fine-tuning on question-answering tasks, as measured by the exact match metric.</abstract>
@@ -18281,7 +18281,7 @@
     </paper>
     <paper id="1258">
       <title>Do Language Models Have Semantics? On the Five Standard Positions</title>
-      <author><first>Anders</first><last>Søgaard</last><affiliation>Copenhagen University</affiliation></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last><affiliation>Copenhagen University</affiliation></author>
       <pages>25910-25922</pages>
       <abstract>We identify five positions on whether large language models (LLMs) and chatbots can be said to exhibit semantic understanding. These positions differ in whether they attribute semantics to LLMs and/or chatbots trained on feedback, what kind of semantics they attribute (inferential or referential), and in virtue of what they attribute referential semantics (internal or external causes). This allows for 2^^4=16 logically possible positions, but we have only seen people argue for five of these. Based on a pairwise comparison of these five positions, we conclude that the better theory of semantics in large language models is, in fact, a sixth combination: Both large language models and chatbots have inferential and referential semantics, grounded in both internal and external causes.</abstract>
       <url hash="6556ccb5">2025.acl-long.1258</url>
@@ -18310,7 +18310,7 @@
       <author><first>Phillip</first><last>Rust</last></author>
       <author><first>Ruchira</first><last>Dhar</last></author>
       <author><first>Daniel</first><last>Hershcovich</last><affiliation>University of Copenhagen</affiliation></author>
-      <author><first>Anders</first><last>Søgaard</last><affiliation>Copenhagen University</affiliation></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last><affiliation>Copenhagen University</affiliation></author>
       <pages>25949-25982</pages>
       <abstract>This paper explores the effectiveness of Multimodal Large Language models (MLLMs) as assistive technologies for visually impaired individuals. We conduct a user survey to identify adoption patterns and key challenges users face with such technologies. Despite a high adoption rate of these models, our findings highlight concerns related to contextual understanding, cultural sensitivity, and complex scene understanding, particularly for individuals who may rely solely on them for visual interpretation. Informed by these results, we collate five user-centred tasks with image and video inputs, including a novel task on Optical Braille Recognition. Our systematic evaluation of twelve MLLMs reveals that further advancements are necessary to overcome limitations related to cultural context, multilingual support, Braille reading comprehension, assistive object recognition, and hallucinations. This work provides critical insights into the future direction of multimodal AI for accessibility, underscoring the need for more inclusive, robust, and trustworthy visual assistance technologies.</abstract>
       <url hash="22a8a133">2025.acl-long.1260</url>
@@ -18321,7 +18321,7 @@
       <title><fixed-case>H</fixed-case>um<fixed-case>T</fixed-case> <fixed-case>D</fixed-case>um<fixed-case>T</fixed-case>: Measuring and controlling human-like language in <fixed-case>LLM</fixed-case>s</title>
       <author><first>Myra</first><last>Cheng</last><affiliation>Stanford University</affiliation></author>
       <author><first>Sunny</first><last>Yu</last></author>
-      <author><first>Dan</first><last>Jurafsky</last><affiliation>Stanford University</affiliation></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last><affiliation>Stanford University</affiliation></author>
       <pages>25983-26008</pages>
       <abstract>Should LLMs generate language that makes them seem human? Human-like language might improve user experience, but might also lead to deception, overreliance, and stereotyping. Assessing these potential impacts requires a systematic way to measure human-like tone in LLM outputs. We introduce HumT and SocioT, metrics for human-like tone and other dimensions of social perceptions in text data based on relative probabilities from an LLM. By measuring HumT across preference and usage datasets, we find that users prefer less human-like outputs from LLMs in many contexts. HumT also offers insights into the perceptions and impacts of anthropomorphism: human-like LLM outputs are highly correlated with warmth, social closeness, femininity, and low status, which are closely linked to the aforementioned harms. We introduce DumT, a method using HumT to systematically control and reduce the degree of human-like tone while preserving model performance. DumT offers a practical approach for mitigating risks associated with anthropomorphic language generation.</abstract>
       <url hash="10178768">2025.acl-long.1261</url>
@@ -18404,7 +18404,7 @@
     </paper>
     <paper id="1268">
       <title><fixed-case>QG</fixed-case>-<fixed-case>SMS</fixed-case>: Enhancing Test Item Analysis via Student Modeling and Simulation</title>
-      <author><first>Bang</first><last>Nguyen</last><affiliation>University of Notre Dame</affiliation></author>
+      <author id="bang-nguyen"><first>Bang</first><last>Nguyen</last><affiliation>University of Notre Dame</affiliation></author>
       <author><first>Tingting</first><last>Du</last></author>
       <author><first>Mengxia</first><last>Yu</last><affiliation>University of Notre Dame</affiliation></author>
       <author><first>Lawrence</first><last>Angrave</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
@@ -18421,7 +18421,7 @@
       <author><first>Xueying</first><last>Bai</last></author>
       <author><first>Mudan</first><last>Chen</last></author>
       <author><first>Greg</first><last>Durrett</last><affiliation>University of Texas at Austin</affiliation></author>
-      <author><first>Nathanael</first><last>Chambers</last><affiliation>US Naval Academy</affiliation></author>
+      <author id="nathanael-chambers"><first>Nathanael</first><last>Chambers</last><affiliation>US Naval Academy</affiliation></author>
       <author><first>Niranjan</first><last>Balasubramanian</last><affiliation>State University of New York, Stony Brook</affiliation></author>
       <pages>26169-26199</pages>
       <abstract>Understanding how events in a scenario causally connect with each other is important for effectively modeling and reasoning about events. But event reasoning remains a difficult challenge, and despite recent advances, Large Language Models (LLMs) still struggle to accurately identify causal connections between events. This struggle leads to poor performance on deeper reasoning tasks like event forecasting and timeline understanding. To address this challenge, we investigate the generation of causal event graphs (e.g., A enables B) as a parallel mechanism to help LLMs explicitly represent causality during inference. This paper evaluates both how to generate correct graphs as well as how graphs can assist reasoning. We propose a collaborative approach to causal graph generation where we use LLMs to simulate experts that focus on specific semantic relations. The experts engage in multiple rounds of discussions which are then consolidated by a final expert. Then, to demonstrate the utility of causal graphs, we use them on multiple downstream applications, and also introduce a new explainable event prediction task that requires a causal chain of events in the explanation. These explanations are more informative and coherent than baseline generations. Finally, our overall approach not finetuned on any downstream task, achieves competitive results with state-of-the-art models on both forecasting and next event prediction tasks.</abstract>
@@ -18433,7 +18433,7 @@
       <title><fixed-case>L</fixed-case>ogic<fixed-case>P</fixed-case>ro: Improving Complex Logical Reasoning via Program-Guided Learning</title>
       <author><first>Jin</first><last>Jiang</last></author>
       <author><first>Yuchen</first><last>Yan</last></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <author><first>Jianing</first><last>Wang</last><affiliation>Meituan</affiliation></author>
       <author><first>Shuai</first><last>Peng</last><affiliation>Peking University</affiliation></author>
       <author><first>Xunliang</first><last>Cai</last><affiliation>Meituan</affiliation></author>
@@ -18501,7 +18501,7 @@
       <author><first>Dongyu</first><last>Zhang</last><affiliation>Dalian University of Technology</affiliation></author>
       <author><first>Jing</first><last>Ren</last><affiliation>Royal Melbourne Institute of Technology</affiliation></author>
       <author><first>Ziqi</first><last>Xu</last><affiliation>Royal Melbourne Institute of Technology</affiliation></author>
-      <author><first>Xiuzhen</first><last>Zhang</last><affiliation>Royal Melbourne Institute of Technology</affiliation></author>
+      <author id="xiuzhen-jenny-zhang"><first>Xiuzhen</first><last>Zhang</last><affiliation>Royal Melbourne Institute of Technology</affiliation></author>
       <author><first>Yiliao</first><last>Song</last><affiliation>University of Adelaide and Royal Melbourne Institute of Technology</affiliation></author>
       <author><first>Hongfei</first><last>Lin</last></author>
       <author><first>Feng</first><last>Xia</last><affiliation>Royal Melbourne Institute of Technology</affiliation></author>
@@ -18653,11 +18653,11 @@
       <author><first>Hritik</first><last>Bansal</last></author>
       <author><first>John</first><last>Palowitch</last><affiliation>Google</affiliation></author>
       <author><first>Chrysovalantis</first><last>Anastasiou</last><affiliation>Google</affiliation></author>
-      <author><first>Sanket Vaibhav</first><last>Mehta</last><affiliation>Google</affiliation></author>
+      <author id="sanket-vaibhav-mehta"><first>Sanket Vaibhav</first><last>Mehta</last><affiliation>Google</affiliation></author>
       <author><first>Lalit K</first><last>Jain</last><affiliation>Google</affiliation></author>
       <author><first>Virginia</first><last>Aglietti</last><affiliation>Google DeepMind</affiliation></author>
       <author><first>Disha</first><last>Jindal</last><affiliation>Google</affiliation></author>
-      <author><first>Peter</first><last>Chen</last><affiliation>Google</affiliation></author>
+      <author id="yuanzhu-peter-chen"><first>Peter</first><last>Chen</last><affiliation>Google</affiliation></author>
       <author><first>Nishanth</first><last>Dikkala</last><affiliation>Google</affiliation></author>
       <author><first>Gladys</first><last>Tyen</last><affiliation>Google</affiliation></author>
       <author><first>Xin</first><last>Liu</last><affiliation>Google</affiliation></author>
@@ -18725,10 +18725,10 @@
       <author><first>Yuchen</first><last>Song</last></author>
       <author><first>Kehai</first><last>Chen</last><affiliation>Harbin Institute of Technology (Shenzhen)</affiliation></author>
       <author><first>Xuefeng</first><last>Bai</last></author>
-      <author><first>Muyun</first><last>Yang</last></author>
+      <author id="muyun-yang"><first>Muyun</first><last>Yang</last></author>
       <author><first>Liqiang</first><last>Nie</last><affiliation>Harbin Institute of Technology (Shenzhen) and Shandong University</affiliation></author>
       <author><first>Jie</first><last>Liu</last><affiliation>Harbin Institute of Technology</affiliation></author>
-      <author><first>Tiejun</first><last>Zhao</last><affiliation>Harbin Institute of Technology</affiliation></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <author><first>Min</first><last>Zhang</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <pages>26567-26583</pages>
       <abstract>Visual information has been introduced for enhancing machine translation (MT), and its effectiveness heavily relies on the availability of large amounts of bilingual parallel sentence pairs with manual image annotations. In this paper, we introduce a stable diffusion-based imagination network into a multimodal large language model (MLLM) to explicitly generate an image for each source sentence, thereby advancing the multimodel MT. Particularly, we build heuristic feedback with reinforcement learning to ensure the consistency of the generated image with the source sentence without the supervision of visual information, which breaks the high-cost bottleneck of image annotation in MT. Furthermore, the proposed method enables imaginative visual information to be integrated into text-only MT in addition to multimodal MT. Experimental results show that our model significantly outperforms existing multimodal MT and text-only MT, especially achieving an average improvement of more than 14 BLEU points on Multi30K and MSCOCO multimodal MT benchmarks.</abstract>
@@ -18818,9 +18818,9 @@
       <author><first>Mohammad Rifqi</first><last>Farhansyah</last></author>
       <author><first>Iwan</first><last>Darmawan</last><affiliation>Monash University</affiliation></author>
       <author><first>Adryan</first><last>Kusumawardhana</last><affiliation>Komisi Pemberantasan Korupsi</affiliation></author>
-      <author><first>Genta Indra</first><last>Winata</last><affiliation>Capital One</affiliation></author>
+      <author id="genta-indra-winata"><first>Genta Indra</first><last>Winata</last><affiliation>Capital One</affiliation></author>
       <author><first>Alham Fikri</first><last>Aji</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
-      <author><first>Derry Tanti</first><last>Wijaya</last><affiliation>Monash University and Boston University</affiliation></author>
+      <author id="derry-tanti-wijaya"><first>Derry Tanti</first><last>Wijaya</last><affiliation>Monash University and Boston University</affiliation></author>
       <pages>26732-26754</pages>
       <abstract>The Javanese language features a complex system of honorifics that vary according to the social status of the speaker, listener, and referent. Despite its cultural and linguistic significance, there has been limited progress in developing a comprehensive corpus to capture these variations for natural language processing (NLP) tasks. In this paper, we present Unggah-Ungguh, a carefully curated dataset designed to encapsulate the nuances of Unggah-Ungguh Basa, the Javanese speech etiquette framework that dictates the choice of words and phrases based on social hierarchy and context. Using Unggah-Ungguh, we assess the ability of language models (LMs) to process various levels of Javanese honorifics through classification and machine translation tasks. To further evaluate cross-lingual LMs, we conduct machine translation experiments between Javanese (at specific honorific levels) and Indonesian. Additionally, we explore whether LMs can generate contextually appropriate Javanese honorifics in conversation tasks, where the honorific usage should align with the social role and contextual cues. Our findings indicate that current LMs struggle with most honorific levels, exhibiting a bias toward certain honorific tiers.</abstract>
       <url hash="6decc72d">2025.acl-long.1296</url>
@@ -18833,7 +18833,7 @@
       <author><first>Haoke</first><last>Zhang</last><affiliation>Suzhou University</affiliation></author>
       <author><first>Juntao</first><last>Li</last></author>
       <author><first>Kehai</first><last>Chen</last><affiliation>Harbin Institute of Technology (Shenzhen)</affiliation></author>
-      <author><first>Qiaoming</first><last>Zhu</last><affiliation>Soochow University</affiliation></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last><affiliation>Soochow University</affiliation></author>
       <author><first>Min</first><last>Zhang</last><affiliation>Harbin Institute of Technology, Shenzhen</affiliation></author>
       <pages>26755-26769</pages>
       <abstract>Generative Reward Models (GenRMs) leverage synthesized Chains of Thought (CoT) to reduce the need for massive labeled data, but this approach introduces risks of overoptimization due to the inability to guarantee the correctness of the CoTs. Identifying and optimizing unexpected behaviors within these synthesized CoT remains a challenge, as it heavily depends on precise annotations of intermediate behavior, similar to process supervision. In this work, we introduce a criteria-based preference tree for reward modeling, where each path in the tree represents a reasoning trajectory based on synthesized criteria. Crucially, each reasoning trajectory can be independently optimized through RL algorithm. These fine-grained process reward signals are derived from the inference-time computations and predefined rules, eliminating the need for human supervision. In experiments, SyncPL showed significant improvements over baselines on multiple human preference benchmarks. We further demonstrate that synthesized data can be learned using a long CoT format, analogous to an o1-like model, further enhancing performance while keeping stability and efficiency during training.</abstract>
@@ -18858,7 +18858,7 @@
     <paper id="1299">
       <title>A Self-Denoising Model for Robust Few-Shot Relation Extraction</title>
       <author><first>Liang</first><last>Zhang</last></author>
-      <author id="yang-zhang"><first>Yang</first><last>Zhang</last></author>
+      <author><first>Yang</first><last>Zhang</last></author>
       <author><first>Ziyao</first><last>Lu</last><affiliation>WeChat AI</affiliation></author>
       <author><first>Fandong</first><last>Meng</last><affiliation>WeChat AI, Tencent Inc.</affiliation></author>
       <author><first>Jie</first><last>Zhou</last></author>
@@ -19008,7 +19008,7 @@
       <author><first>Dung</first><last>Vo</last></author>
       <author><first>Yap Hong</first><last>Xian</last></author>
       <author><first>Hai Leong</first><last>Chieu</last><affiliation>DSO National Laboratories</affiliation></author>
-      <author><first>Kian Ming A.</first><last>Chai</last><affiliation>DSO National Laboratories</affiliation></author>
+      <author id="kian-ming-a-chai"><first>Kian Ming A.</first><last>Chai</last><affiliation>DSO National Laboratories</affiliation></author>
       <author><first>Jing</first><last>Jiang</last><affiliation>Australian National University and Singapore Management University</affiliation></author>
       <author><first>Lizi</first><last>Liao</last><affiliation>Singapore Management University</affiliation></author>
       <pages>26962-26983</pages>
@@ -19271,7 +19271,7 @@
       <title>A Dual-Perspective <fixed-case>NLG</fixed-case> Meta-Evaluation Framework with Automatic Benchmark and Better Interpretability</title>
       <author><first>Xinyu</first><last>Hu</last><affiliation>Peking University</affiliation></author>
       <author><first>Mingqi</first><last>Gao</last></author>
-      <author id="li-lin"><first>Li</first><last>Lin</last></author>
+      <author><first>Li</first><last>Lin</last></author>
       <author><first>Zhenghan</first><last>Yu</last></author>
       <author><first>Xiaojun</first><last>Wan</last></author>
       <pages>27372-27395</pages>
@@ -19289,7 +19289,7 @@
       <author><first>Guangyuan</first><last>Shi</last></author>
       <author><first>Yongxin</first><last>Xu</last></author>
       <author><first>Yasha</first><last>Wang</last></author>
-      <author><first>Philip S.</first><last>Yu</last><affiliation>University of Illinois Chicago</affiliation></author>
+      <author id="philip-s-yu"><first>Philip S.</first><last>Yu</last><affiliation>University of Illinois Chicago</affiliation></author>
       <author><first>Xu</first><last>Chu</last><affiliation>Peking University</affiliation></author>
       <author><first>Xiao-Ming</first><last>Wu</last><affiliation>Hong Kong Polytechnic University</affiliation></author>
       <pages>27396-27413</pages>
@@ -19337,7 +19337,7 @@
     </paper>
     <paper id="1332">
       <title><fixed-case>MMDEND</fixed-case>: Dendrite-Inspired Multi-Branch Multi-Compartment Parallel Spiking Neuron for Sequence Modeling</title>
-      <author id="kexin-wang"><first>Kexin</first><last>Wang</last></author>
+      <author><first>Kexin</first><last>Wang</last></author>
       <author><first>Yuhong</first><last>Chou</last></author>
       <author><first>Di</first><last>Shang</last></author>
       <author><first>Shijie</first><last>Mei</last></author>
@@ -19558,7 +19558,7 @@
       <author><first>Juntian</first><last>Zhang</last></author>
       <author><first>Chuanqi</first><last>Cheng</last><affiliation>Renmin University of China</affiliation></author>
       <author><first>Yuhan</first><last>Liu</last></author>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last></author>
+      <author><first>Wei</first><last>Liu</last></author>
       <author><first>Jian</first><last>Luan</last><affiliation>Xiaomi Corporation</affiliation></author>
       <author><first>Rui</first><last>Yan</last><affiliation>Renmin University of China</affiliation></author>
       <pages>27782-27798</pages>
@@ -19571,9 +19571,9 @@
       <title>Online Iterative Self-Alignment for Radiology Report Generation</title>
       <author><first>Ting</first><last>Xiao</last></author>
       <author><first>Lei</first><last>Shi</last></author>
-      <author id="yang-zhang"><first>Yang</first><last>Zhang</last><affiliation>Tsinghua University</affiliation></author>
+      <author><first>Yang</first><last>Zhang</last><affiliation>Tsinghua University</affiliation></author>
       <author><first>HaoFeng</first><last>Yang</last><affiliation>East China University of Science and Technology</affiliation></author>
-      <author><first>Zhe</first><last>Wang</last></author>
+      <author id="daisy-zhe-wang"><first>Zhe</first><last>Wang</last></author>
       <author><first>Chenjia</first><last>Bai</last><affiliation>TeleAI, China Telecom</affiliation></author>
       <pages>27799-27814</pages>
       <abstract>Radiology Report Generation (RRG) is an important research topic for relieving radiologists’ heavy workload. Existing RRG models mainly rely on supervised fine-tuning (SFT) based on different model architectures using data pairs of radiological images and corresponding radiologist-annotated reports. Recent research has shifted focus to post-training improvements, aligning RRG model outputs with human preferences using reinforcement learning (RL). However, the limited data coverage of high-quality annotated data poses risks of overfitting and generalization. This paper proposes a novel Online Iterative Self-Alignment (OISA) method for RRG that consists of four stages: self-generation of diverse data, self-evaluation for multi-objective preference data, self-alignment for multi-objective optimization and self-iteration for further improvement. Our approach allows for generating varied reports tailored to specific clinical objectives, enhancing the overall performance of the RRG model iteratively. Unlike existing methods, our framework significantly increases data quality and optimizes performance through iterative multi-objective optimization. Experimental results demonstrate that our method surpasses previous approaches, achieving state-of-the-art performance across multiple evaluation metrics.</abstract>
@@ -19607,7 +19607,7 @@
     </paper>
     <paper id="1351">
       <title>Evaluating Sequence Labeling on the basis of Information Theory</title>
-      <author><first>Enrique</first><last>Amigo</last><affiliation>Universidad Nacional de Educación a Distancia</affiliation></author>
+      <author id="enrique-amigo"><first>Enrique</first><last>Amigo</last><affiliation>Universidad Nacional de Educación a Distancia</affiliation></author>
       <author><first>Elena</first><last>Álvarez-Mellado</last><affiliation>Universidad Nacional de Educación a Distancia</affiliation></author>
       <author><first>Julio</first><last>Gonzalo</last><affiliation>Universidad Nacional de Educación a Distancia</affiliation></author>
       <author><first>Jorge</first><last>Carrillo-de-Albornoz</last></author>
@@ -19644,7 +19644,7 @@
       <author><first>Xunjian</first><last>Yin</last></author>
       <author><first>Xinyi</first><last>Wang</last><affiliation>UC Santa Barbara</affiliation></author>
       <author><first>Liangming</first><last>Pan</last><affiliation>University of Arizona</affiliation></author>
-      <author id="li-lin"><first>Li</first><last>Lin</last></author>
+      <author><first>Li</first><last>Lin</last></author>
       <author><first>Xiaojun</first><last>Wan</last></author>
       <author><first>William Yang</first><last>Wang</last><affiliation>UC Santa Barbara</affiliation></author>
       <pages>27890-27913</pages>
@@ -19672,7 +19672,7 @@
       <author><first>Tao</first><last>Gui</last><affiliation>Fudan University</affiliation></author>
       <author><first>Qi</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Xipeng</first><last>Qiu</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Zuxuan</first><last>Wu</last><affiliation>Fudan University</affiliation></author>
       <author><first>Yu-Gang</first><last>Jiang</last><affiliation>Fudan University</affiliation></author>
       <pages>27914-27961</pages>
@@ -19701,7 +19701,7 @@
       <author><first>Taiga</first><last>Someya</last></author>
       <author><first>Anej</first><last>Svete</last></author>
       <author><first>Brian</first><last>DuSell</last><affiliation>Department of Computer Science, ETHZ - ETH Zurich</affiliation></author>
-      <author><first>Timothy J.</first><last>O’Donnell</last><affiliation>McGill University, Mila and McGill University</affiliation></author>
+      <author id="timothy-odonnell"><first>Timothy J.</first><last>O’Donnell</last><affiliation>McGill University, Mila and McGill University</affiliation></author>
       <author><first>Mario</first><last>Giulianelli</last><affiliation>Department of Computer Science, ETHZ - ETH Zurich</affiliation></author>
       <author><first>Ryan</first><last>Cotterell</last><affiliation>Swiss Federal Institute of Technology</affiliation></author>
       <pages>27995-28013</pages>
@@ -19715,7 +19715,7 @@
       <author><first>Adrián</first><last>Bazaga</last><affiliation>Microsoft</affiliation></author>
       <author><first>Rexhina</first><last>Blloshmi</last><affiliation>Amazon</affiliation></author>
       <author id="bill-byrne"><first>Bill</first><last>Byrne</last><affiliation>Amazon and University of Cambridge</affiliation></author>
-      <author><first>Adrià</first><last>de Gispert</last><affiliation>Amazon</affiliation></author>
+      <author id="adria-de-gispert"><first>Adrià</first><last>de Gispert</last><affiliation>Amazon</affiliation></author>
       <pages>28014-28033</pages>
       <abstract>Large Language Models (LLMs) have emerged as powerful tools for generating coherent text, understanding context, and performing reasoning tasks. However, they struggle with temporal reasoning, which requires processing time-related information such as event sequencing, durations, and inter-temporal relationships. These capabilities are critical for applications including question answering, scheduling, and historical analysis. In this paper, we introduce TISER, a novel framework that enhances the temporal reasoning abilities of LLMs through a multi-stage process that combines timeline construction with iterative self-reflection. Our approach leverages test-time scaling to extend the length of reasoning traces, enabling models to capture complex temporal dependencies more effectively. This strategy not only boosts reasoning accuracy but also improves the traceability of the inference process. Experimental results demonstrate state-of-the-art performance across multiple benchmarks, including out-of-distribution test sets, and reveal that TISER enables smaller open-source models to surpass larger closed-weight models on challenging temporal reasoning tasks.</abstract>
       <url hash="2663b26b">2025.acl-long.1358</url>
@@ -19729,7 +19729,7 @@
       <author><first>Paul</first><last>Flanagan</last></author>
       <author><first>Alessandra</first><last>Pascale</last></author>
       <author><first>Oisín</first><last>Redmond</last></author>
-      <author><first>Anya</first><last>Belz</last><affiliation>Dublin City University</affiliation></author>
+      <author id="anja-belz"><first>Anya</first><last>Belz</last><affiliation>Dublin City University</affiliation></author>
       <author><first>Yufang</first><last>Hou</last><affiliation>IT:U Interdisciplinary Transformation University Austria, Technische Universität Darmstadt and IBM Research Ireland</affiliation></author>
       <pages>28034-28051</pages>
       <abstract>Extracting scientific evidence from biomedical studies for clinical research questions (e.g., Does stem cell transplantation improve quality of life in patients with medically refractory Crohn’s disease compared to placebo?) is a crucial step in synthesising biomedical evidence. In this paper, we focus on the task of document-level scientific evidence extraction for clinical questions with conflicting evidence. To support this task, we create a dataset called CochraneForest leveraging forest plots from Cochrane systematic reviews. It comprises 202 annotated forest plots, associated clinical research questions, full texts of studies, and study-specific conclusions. Building on CochraneForest, we propose URCA (Uniform Retrieval Clustered Augmentation), a retrieval-augmented generation framework designed to tackle the unique challenges of evidence extraction. Our experiments show that URCA outperforms the best existing methods by up to 10.3% in F1 score on this task. However, the results also underscore the complexity of CochraneForest, establishing it as a challenging testbed for advancing automated evidence synthesis systems.</abstract>
@@ -19745,7 +19745,7 @@
       <author><first>Long</first><last>Bai</last><affiliation>Institute of Computing Technology, Chinese Academy of Sciences</affiliation></author>
       <author><first>Xiaolong</first><last>Jin</last><affiliation>Institute of Computing Technology, Chinese Academy of Sciences</affiliation></author>
       <author><first>Jiafeng</first><last>Guo</last><affiliation>Institute of Computing Technolgy, Chinese Academy of Sciences</affiliation></author>
-      <author><first>Xueqi</first><last>Cheng</last><affiliation>Institute of Computing Technology, Chinese Academy</affiliation></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last><affiliation>Institute of Computing Technology, Chinese Academy</affiliation></author>
       <pages>28052-28070</pages>
       <abstract>In this paper, we aim to enhance the robustness of Universal Information Extraction (UIE) by introducing a new benchmark dataset, a comprehensive evaluation, and a feasible solution. Existing robust benchmark datasets have two key limitations: 1) They generate only a limited range of perturbations for a single Information Extraction (IE) task, which fails to evaluate the robustness of UIE models effectively; 2) They rely on small models or handcrafted rules to generate perturbations, often resulting in unnatural adversarial examples. Considering the powerful generation capabilities of Large Language Models (LLMs), we introduce a new benchmark dataset for Robust UIE, called RUIE-Bench, which utilizes LLMs to generate more diverse and realistic perturbations across different IE tasks. Based on this dataset, we comprehensively evaluate existing UIE models and reveal that both LLM-based models and other models suffer from significant performance drops. To improve robustness and reduce training costs, we propose a data-augmentation solution that dynamically selects hard samples for iterative training based on the model’s inference loss. Experimental results show that training with only <tex-math>\textbf{15}</tex-math>% of the data leads to an average <tex-math>\textbf{8.1}</tex-math>% relative performance improvement across three IE tasks. Our code and dataset are available at: https://github.com/ICT-GoKnow/RobustUIE.</abstract>
       <url hash="9ac2e3a6">2025.acl-long.1360</url>
@@ -19972,7 +19972,7 @@
       <author><first>Musa Izzanardi</first><last>Wijanarko</last><affiliation>Monash University</affiliation></author>
       <author><first>Lucky</first><last>Susanto</last></author>
       <author><first>Khumaisa</first><last>Nur’aini</last><affiliation>Monash University</affiliation></author>
-      <author><first>Derry Tanti</first><last>Wijaya</last><affiliation>Monash University and Boston University</affiliation></author>
+      <author id="derry-tanti-wijaya"><first>Derry Tanti</first><last>Wijaya</last><affiliation>Monash University and Boston University</affiliation></author>
       <author><first>Alham Fikri</first><last>Aji</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <pages>28371-28401</pages>
       <abstract>Indonesia is rich in languages and scripts. However, most NLP progress has been made using romanized text. In this paper, we present NusaAksara, a novel public benchmark for Indonesian languages that includes their original scripts. Our benchmark covers both text and image modalities and encompasses diverse tasks such as image segmentation, OCR, transliteration, translation, and language identification. Our data is constructed by human experts through rigorous steps. NusaAksara covers 8 scripts across 7 languages, including low-resource languages not commonly seen in NLP benchmarks. Although unsupported by Unicode, the Lampung script is included in this dataset. We benchmark our data across several models, from LLMs and VLMs such as GPT-4o, Llama 3.2, and Aya 23 to task-specific systems such as PP-OCR and LangID, and show that most NLP technologies cannot handle Indonesia’s local scripts, with many achieving near-zero performance.</abstract>
@@ -19990,7 +19990,7 @@
       <author><first>Linbo</first><last>Qiao</last></author>
       <author><first>Songzhu</first><last>Mei</last><affiliation>National University of Defense Technology</affiliation></author>
       <author><first>Yijie</first><last>Wang</last><affiliation>National University of Defense Technology</affiliation></author>
-      <author id="dongsheng-li"><first>Dongsheng</first><last>Li</last><affiliation>National University of Defense Technology</affiliation></author>
+      <author><first>Dongsheng</first><last>Li</last><affiliation>National University of Defense Technology</affiliation></author>
       <pages>28402-28414</pages>
       <abstract>Rumor detection on social media has become an emerging topic. Traditional deep learning-based methods model rumors based on content, propagation structure, or user behavior, but these approaches are constrained by limited modeling capacity and insufficient training corpora. Recent studies have explored using LLMs for rumor detection through supervised fine-tuning (SFT), but face two issues: 1) unreliable samples sometimes mislead the model learning; 2) the model only learns the most salient input-output mapping and skips in-depth analyses of the rumored content for convenience. To address these issues, we propose an SFT-based LLM rumor detection model with Influence guided Sample selection and Game-based multi-perspective Analysis (ISGA). Specifically, we first introduce the Influence Score (IS) to assess the impact of samples on model predictions and select samples for SFT. We also approximate IS via Taylor expansion to reduce computational complexity. Next, we use LLMs to generate in-depth analyses of news content from multiple perspectives and model their collaborative process for prediction as a cooperative game. Then we utilize the Shapley value to quantify the contribution of each perspective for selecting informative perspective analyses. Experiments show that ISGA excels existing SOTA on three datasets.</abstract>
       <url hash="1f54af10">2025.acl-long.1378</url>
@@ -20105,7 +20105,7 @@
       <author><first>Huayu</first><last>Zhang</last><affiliation>LifeArc</affiliation></author>
       <author><first>Abul</first><last>Hasan</last><affiliation>University College London, University of London</affiliation></author>
       <author><first>Honghan</first><last>Wu</last><affiliation>University of Glasgow</affiliation></author>
-      <author><first>Beatrice</first><last>Alex</last><affiliation>University of Edinburgh, University of Edinburgh</affiliation></author>
+      <author id="beatrice-alex"><first>Beatrice</first><last>Alex</last><affiliation>University of Edinburgh, University of Edinburgh</affiliation></author>
       <pages>28532-28562</pages>
       <abstract>In this work, we present a manually annotated corpus for Adverse Event (AE) extraction from discharge summaries of elderly patients, a population often underrepresented in clinical NLP resources. The dataset includes 14 clinically significant AEs—such as falls, delirium, and intracranial haemorrhage, along with contextual attributes like negation, diagnosis type, and in-hospital occurrence. Uniquely, the annotation schema supports both discontinuous and overlapping entities, addressing challenges rarely tackled in prior work. We evaluate multiple models using FlairNLP across three annotation granularities: fine-grained, coarse-grained, and coarse-grained with negation. While transformer-based models (e.g., BERT-cased) achieve strong performance on document-level coarse-grained extraction (F1 = 0.943), performance drops notably for fine-grained entity-level tasks (e.g., F1 = 0.675), particularly for rare events and complex attributes. These results demonstrate that despite high-level scores, significant challenges remain in detecting underrepresented AEs and capturing nuanced clinical language. Developed within a Trusted Research Environment (TRE), the dataset is available upon request via DataLoch and serves as a robust benchmark for evaluating AE extraction methods and supporting future cross-dataset generalisation.</abstract>
       <url hash="d99085a4">2025.acl-long.1386</url>
@@ -20285,8 +20285,8 @@
       <title>Know Your Mistakes: Towards Preventing Overreliance on Task-Oriented Conversational <fixed-case>AI</fixed-case> Through Accountability Modeling</title>
       <author><first>Suvodip</first><last>Dey</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
       <author><first>Yi-Jyun</first><last>Sun</last></author>
-      <author><first>Gokhan</first><last>Tur</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
-      <author><first>Dilek</first><last>Hakkani-Tür</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
+      <author id="gokhan-tur"><first>Gokhan</first><last>Tur</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tür</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
       <pages>28830-28843</pages>
       <abstract>Recent LLMs have enabled significant advancements for conversational agents. However, they are also well known to hallucinate, producing responses that seem plausible but are factually incorrect. On the other hand, users tend to over-rely on LLM-based AI agents, accepting AI’s suggestion even when it is wrong. Adding positive friction, such as explanations or getting user confirmations, has been proposed as a mitigation in AI-supported decision-making systems. In this paper, we propose an accountability model for LLM-based task-oriented dialogue agents to address user overreliance via friction turns in cases of model uncertainty and errors associated with dialogue state tracking (DST). The accountability model is an augmented LLM with an additional accountability head that functions as a binary classifier to predict the relevant slots of the dialogue state mentioned in the conversation. We perform our experiments with multiple backbone LLMs on two established benchmarks (MultiWOZ and Snips). Our empirical findings demonstrate that the proposed approach not only enables reliable estimation of AI agent errors but also guides the decoder in generating more accurate actions. We observe around 3% absolute improvement in joint goal accuracy (JGA) of DST output by incorporating accountability heads into modern LLMs. Self-correcting the detected errors further increases the JGA from 67.13 to 70.51, achieving state-of-the-art DST performance. Finally, we show that error correction through user confirmations (friction turn) achieves a similar performance gain, highlighting its potential to reduce user overreliance.</abstract>
       <url hash="7573392b">2025.acl-long.1399</url>
@@ -20523,7 +20523,7 @@
       <author><first>Desmond U.</first><last>Patton</last></author>
       <author><first>Shana</first><last>Kleiner</last></author>
       <author><first>James R. Shepard</first><last>Iii</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>29192-29217</pages>
       <abstract>With a combination of quantitative experiments, human judgments, and qualitative analyses, we evaluate the quantity and quality of African American Language (AAL) representation in 12 predominantly English, open-source pretraining corpora. We specifically focus on the sources, variation, and naturalness of included AAL texts representing the AAL speaking community. We find that AAL is underrepresented in all evaluated pretraining corpora compared to US demographics, constituting as few as 0.007% and at most 0.18% of documents. We also find that more than 25% of AAL texts in C4 may be perceived as inappropriate for LLMs to generate and to reinforce harmful stereotypes. Finally, we find that most automated filters are more likely to conserve White Mainstream English (WME) texts over AAL in pretraining corpora.</abstract>
       <url hash="87f448bf">2025.acl-long.1416</url>
@@ -20538,7 +20538,7 @@
       <author><first>Varshini</first><last>Reddy</last><affiliation>Kensho Technologies</affiliation></author>
       <author><first>Seth</first><last>Ebner</last><affiliation>Kensho</affiliation></author>
       <author><first>Nilesh</first><last>Kumar</last></author>
-      <author><first>Rik</first><last>Koncel-Kedziorski</last><affiliation>Apple</affiliation></author>
+      <author id="rik-koncel-kedziorski"><first>Rik</first><last>Koncel-Kedziorski</last><affiliation>Apple</affiliation></author>
       <author><first>Chris</first><last>Tanner</last><affiliation>Massachusetts Institute of Technology and Kensho</affiliation></author>
       <pages>29218-29257</pages>
       <abstract>Some statements have one well-defined continuation (e.g., “the Eiffel Tower is in [<tex-math>Paris</tex-math>]"), whereas others have a natural distribution over multiple options (e.g., “the weighted coin flip was [<tex-math>Heads/Tails</tex-math>].") We argue that language model (LM) outputs should capture these natural distributions. Our work specifically tests whether LM output probabilities are calibrated to numeric information within their textual contexts. For example, if the context (the prompt) concerns two equally likely options (e.g., heads or tails for a fair coin), the LM output probabilities should also be equal. Likewise, in a context with nonuniformly likely events (e.g., rolling a pair with two dice) an LM should output proportionate probabilities. However, we find that even in simple settings, the best LMs (1) are poorly calibrated and (2) have systematic biases: artifacts like word identity, word order, and word frequency all impact calibration. For example, ‘gpt-4o-mini‘ often picks the first of two options presented in the prompt regardless of the options’ implied likelihoods, whereas ‘Llama-3.1-8B‘ picks the second. Models do not allocate probability mass among valid options in a calibrated manner.</abstract>
@@ -20566,7 +20566,7 @@
       <author><first>Dilip</first><last>Venkatesh</last></author>
       <author><first>Raj</first><last>Dabre</last><affiliation>Department of Computer Science, Indian Institute of Technology, Madras, Indian Institute of Technology, Madras and National Institute of Information and Communications Technology (NICT), National Institute of Advanced Industrial Science and Technology</affiliation></author>
       <author><first>Anoop</first><last>Kunchukuttan</last><affiliation>Microsoft and Indian Institute of Technology, Madras</affiliation></author>
-      <author><first>Mitesh M</first><last>Khapra</last><affiliation>Indian Institute of Technology, Madras</affiliation></author>
+      <author id="mitesh-m-khapra"><first>Mitesh M</first><last>Khapra</last><affiliation>Indian Institute of Technology, Madras</affiliation></author>
       <pages>29297-29329</pages>
       <abstract>Evaluating machine-generated text remains a significant challenge in NLP, especially for non-English languages. Current methodologies, including automated metrics, human assessments, and LLM-based evaluations, predominantly focus on English, revealing a significant gap in multilingual evaluation frameworks. We introduce the Cross Lingual Auto Evaluation (CIA) Suite, an extensible framework that includes evaluator LLMs (Hercule) and a novel test set (Recon) specifically designed for multilingual evaluation. Our test set features 500 human-annotated instructions spanning various task capabilities along with human judgment scores across six languages. This would enable benchmarking of general-purpose multilingual LLMs and facilitate meta-evaluation of Evaluator LLMs. The proposed model, Hercule, is a cross-lingual evaluation model that addresses the scarcity of reference answers in the target language by learning to assign scores to responses based on easily available reference answers in English. Our experiments demonstrate that Hercule aligns more closely with human judgments compared to proprietary models, demonstrating the effectiveness of such cross-lingual evaluation in low resource scenarios. Further, it is also effective in zero-shot evaluation on unseen languages. This study is the first comprehensive examination of cross-lingual evaluation using LLMs, presenting a scalable and effective approach for multilingual assessment. All code, datasets, and models will be publicly available to enable further research in this important area.</abstract>
       <url hash="9466661a">2025.acl-long.1419</url>
@@ -20720,7 +20720,7 @@
       <title>Bregman Conditional Random Fields: Sequence Labeling with Parallelizable Inference Algorithms</title>
       <author><first>Caio</first><last>Corro</last><affiliation>Sorbonne Université</affiliation></author>
       <author><first>Mathieu</first><last>Lacroix</last><affiliation>Université Paris Nord (Paris XIII)</affiliation></author>
-      <author><first>Joseph Le</first><last>Roux</last><affiliation>Université Paris 13</affiliation></author>
+      <author id="joseph-le-roux"><first>Joseph Le</first><last>Roux</last><affiliation>Université Paris 13</affiliation></author>
       <pages>29557-29574</pages>
       <abstract>We propose a novel discriminative model for sequence labeling called Bregman conditional random fields (BCRF).Contrary to standard linear-chain conditional random fields,BCRF allows fast parallelizable inference algorithms based on iterative Bregman projections.We show how such models can be learned using Fenchel-Young losses, including extension for learning from partial labels.Experimentally, our approach delivers comparable results to CRF while being faster, and achieves better results in highly constrained settings compared to mean field, another parallelizable alternative.</abstract>
       <url hash="427ef7c8">2025.acl-long.1430</url>
@@ -20754,8 +20754,8 @@
       <author><first>Kalvin</first><last>Chang</last><affiliation>School of Computer Science, Carnegie Mellon University</affiliation></author>
       <author><first>Rebecca</first><last>Byrnes</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Aravind</first><last>Mysore</last></author>
-      <author><first>Carolyn</first><last>Rose</last><affiliation>School of Computer Science, Carnegie Mellon University</affiliation></author>
-      <author><first>David R.</first><last>Mortensen</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rose</last><affiliation>School of Computer Science, Carnegie Mellon University</affiliation></author>
+      <author id="david-r-mortensen"><first>David R.</first><last>Mortensen</last><affiliation>Carnegie Mellon University</affiliation></author>
       <pages>29628-29647</pages>
       <abstract>Historical linguists have long written “programs” that convert reconstructed words in an ancestor language into their attested descendants via ordered string rewrite functions (called sound laws) However, writing these programs is time-consuming, motivating the development of automated Sound Law Induction (SLI) which we formulate as Programming by Examples (PBE) with Large Language Models (LLMs) in this paper. While LLMs have been effective for code generation, recent work has shown that PBE is challenging but improvable by fine-tuning, especially with training data drawn from the same distribution as evaluation data. In this paper, we create a conceptual framework of what constitutes a “similar distribution” for SLI and propose four kinds of synthetic data generation methods with varying amounts of inductive bias to investigate what leads to the best performance. Based on the results, we create a SOTA open-source model for SLI as PBE (+6% pass rate with a third of the parameters of the second-best LLM) and also highlight exciting future directions for PBE research.</abstract>
       <url hash="748d0f22">2025.acl-long.1432</url>
@@ -21046,7 +21046,7 @@
       <author><first>Elena</first><last>Álvarez-Mellado</last><affiliation>Universidad Nacional de Educación a Distancia</affiliation></author>
       <author><first>Alessia</first><last>Battisti</last><affiliation>University of Zurich</affiliation></author>
       <author><first>Cui</first><last>Ding</last><affiliation>University of Zurich</affiliation></author>
-      <author><first>Anne</first><last>Göhring</last><affiliation>University of Zurich</affiliation></author>
+      <author id="anne-gohring"><first>Anne</first><last>Göhring</last><affiliation>University of Zurich</affiliation></author>
       <author><first>Yingqiang</first><last>Gao</last><affiliation>University of Zurich</affiliation></author>
       <author><first>Zifan</first><last>Jiang</last></author>
       <author><first>Andrianos</first><last>Michail</last><affiliation>University of Zurich</affiliation></author>
@@ -21107,10 +21107,10 @@
     <paper id="1457">
       <title><fixed-case>DRAMA</fixed-case>: Diverse Augmentation from Large Language Models to Smaller Dense Retrievers</title>
       <author><first>Xueguang</first><last>Ma</last></author>
-      <author><first>Xi Victoria</first><last>Lin</last><affiliation>Meta</affiliation></author>
+      <author id="xi-victoria-lin"><first>Xi Victoria</first><last>Lin</last><affiliation>Meta</affiliation></author>
       <author><first>Barlas</first><last>Oguz</last><affiliation>Meta</affiliation></author>
       <author><first>Jimmy</first><last>Lin</last><affiliation>University of Waterloo</affiliation></author>
-      <author><first>Wen-tau</first><last>Yih</last><affiliation>Meta Platforms, Inc.</affiliation></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last><affiliation>Meta Platforms, Inc.</affiliation></author>
       <author><first>Xilun</first><last>Chen</last><affiliation>Meta FAIR</affiliation></author>
       <pages>30170-30186</pages>
       <abstract>Large language models (LLMs) have demonstrated strong effectiveness and robustness when fine-tuned as dense retrievers.However, their large parameter size presents significant computational challenges at inference time.While smaller retrievers offer better efficiency, they often fail to generalize effectively with limited supervised fine-tuning data.In this work, we introduce DRAMA, a training framework that leverages LLMs to train smaller generalizable dense retrievers.In particular, we adopt pruned LLMs as the backbone and train on diverse LLM-augmented data in a single-stage contrastive learning setup.Experiments show that DRAMA offers better multilingual and long-context capabilities than traditional encoder-based retrievers, and achieves strong performance across multiple tasks and languages.</abstract>
@@ -21214,7 +21214,7 @@
       <author><first>Janani</first><last>D</last></author>
       <author><first>Mansi</first><last>Gupta</last></author>
       <author><first>Danish</first><last>Pruthi</last><affiliation>Indian Institute of Science, Bangalore</affiliation></author>
-      <author><first>Mitesh M</first><last>Khapra</last><affiliation>Indian Institute of Technology, Madras</affiliation></author>
+      <author id="mitesh-m-khapra"><first>Mitesh M</first><last>Khapra</last><affiliation>Indian Institute of Technology, Madras</affiliation></author>
       <pages>30331-30380</pages>
       <abstract>Existing studies on fairness are largely Western-focused, making them inadequate for culturally diverse countries such as India. To address this gap, we introduce INDIC-BIAS, a comprehensive India-centric benchmark designed to evaluate fairness of LLMs across 85 identity groups encompassing diverse castes, religions, regions, and tribes. We first consult domain experts to curate over 1,800 socio-cultural topics spanning behaviors and situations, where biases and stereotypes are likely to emerge. Grounded in these topics, we generate and manually validate 20,000 real-world scenario templates to probe LLMs for fairness. We structure these templates into three evaluation tasks: plausibility, judgment, and generation. Our evaluation of 14 popular LLMs on these tasks reveals strong negative biases against marginalized identities, with models frequently reinforcing common stereotypes. Additionally, we find that models struggle to mitigate bias even when explicitly asked to rationalize their decision. Our evaluation provides evidence of both allocative and representational harms that current LLMs could cause towards Indian identities, calling for a more cautious usage in practical applications. We release INDIC-BIAS as an open-source benchmark to advance research on benchmarking and mitigating biases and stereotypes in the Indian context.</abstract>
       <url hash="3ba58225">2025.acl-long.1465</url>
@@ -21224,7 +21224,7 @@
     <paper id="1466">
       <title><fixed-case>S</fixed-case>peech<fixed-case>IQ</fixed-case>: Speech-Agentic Intelligence Quotient Across Cognitive Levels in Voice Understanding by Large Language Models</title>
       <author><first>Zhen</first><last>Wan</last></author>
-      <author><first>Chao-Han Huck</first><last>Yang</last><affiliation>NVIDIA Research</affiliation></author>
+      <author id="chao-han-huck-yang"><first>Chao-Han Huck</first><last>Yang</last><affiliation>NVIDIA Research</affiliation></author>
       <author><first>Yahan</first><last>Yu</last><affiliation>Kyoto University, Kyoto University</affiliation></author>
       <author><first>Jinchuan</first><last>Tian</last></author>
       <author><first>Sheng</first><last>Li</last><affiliation>Institute of Science Tokyo</affiliation></author>
@@ -21349,7 +21349,7 @@
       <author><first>Ang</first><last>Lv</last></author>
       <author><first>Yuhan</first><last>Liu</last></author>
       <author><first>Flood</first><last>Sung</last><affiliation>Moonshot AI</affiliation></author>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last></author>
+      <author><first>Wei</first><last>Liu</last></author>
       <author><first>Jian</first><last>Luan</last><affiliation>Xiaomi Corporation</affiliation></author>
       <author><first>Shuo</first><last>Shang</last></author>
       <author><first>Xiuying</first><last>Chen</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
@@ -21429,7 +21429,7 @@
       <author><first>Sayandeep</first><last>Sen</last><affiliation>International Business Machines</affiliation></author>
       <author><first>Palani</first><last>Kodeswaran</last></author>
       <author><first>Abhijit</first><last>Mishra</last><affiliation>University of Texas at Austin and Apple</affiliation></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
       <pages>30639-30652</pages>
       <abstract>Recent advancements in large language models (LLMs) have significantly enhanced their ability to understand both natural language and code, driving their use in tasks like natural language-to-code (NL2Code) and code summarisation. However, LLMs are prone to hallucination—outputs that stray from intended meanings. Detecting hallucinations in code summarisation is especially difficult due to the complex interplay between programming and natural languages. We introduce a first-of-its-kind dataset, CodeSumEval, with ~10K samples, curated specifically for hallucination detection in code summarisation. We further propose a novel Entity Tracing Framework (ETF) that a) utilises static program analysis to identify code entities from the program and b) uses LLMs to map and verify these entities and their intents within generated code summaries. Our experimental analysis demonstrates the framework’s effectiveness, leading to a 73% F1 score. The proposed approach provides a method for detecting hallucinations by tracing entities from the summary to the code, allowing us to evaluate summary accuracy and localise the error within the summary.</abstract>
       <url hash="acfcbb03">2025.acl-long.1480</url>
@@ -21483,7 +21483,7 @@
       <author><first>Chengxing</first><last>Zhou</last></author>
       <author><first>Zejun</first><last>Li</last></author>
       <author><first>Zhihao</first><last>Fan</last><affiliation>Alibaba Group</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Zhongyu</first><last>Wei</last><affiliation>Fudan University</affiliation></author>
       <pages>30715-30727</pages>
       <abstract>Large Vision-Language Models (LVLMs) typically learn visual capacity through visual instruction tuning, involving updates to both a projector and their LLM backbones. Inspired by the concept of a visual region in the human brain, we investigate the existence of an analogous <i>visual region</i> within LLMs that functions as a cognitive core, and explore the potential of efficient training of LVLMs via selective layers tuning. Using Bunny-Llama-3-8B-V for detailed analysis and other three LVLMs for validation across diverse visual and textual tasks, we find that selectively updating 25% of LLMs layers, when sparsely and uniformly distributed, can preserve nearly 99% of visual performance and maintain or improve textual task results, while effectively reducing training time. Based on this targeted training approach, we further propose a novel visual region-based pruning paradigm, removing non-critical layers outside the visual region, which can achieve minimal performance loss. This study offers an effective and efficient strategy for LVLM training and inference by activating a layer-wise visual region within LLMs, which proves consistently effective across different models.</abstract>
@@ -21517,7 +21517,7 @@
       <author><first>Yibo</first><last>Wang</last></author>
       <author><first>Yangning</first><last>Li</last></author>
       <author><first>Kay</first><last>Liu</last><affiliation>Amazon and University of Illinois Chicago</affiliation></author>
-      <author><first>Philip S.</first><last>Yu</last><affiliation>University of Illinois Chicago</affiliation></author>
+      <author id="philip-s-yu"><first>Philip S.</first><last>Yu</last><affiliation>University of Illinois Chicago</affiliation></author>
       <pages>30750-30762</pages>
       <abstract>Test-time computing approaches, which leverage additional computational resources during inference, have been proven effective in enhancing large language model performance. This work introduces a novel, linearly scaling approach, TestNUC, that improves test-time predictions by leveraging the local consistency of neighboring unlabeled data-it classifies an input instance by considering not only the model’s prediction on that instance but also on neighboring unlabeled instances. We evaluate TestNUC across eight diverse datasets, spanning intent classification, topic mining, domain discovery, and emotion detection, demonstrating its consistent superiority over baseline methods such as standard prompting and self-consistency. Furthermore, TestNUC can be seamlessly integrated with existing test-time computing approaches, substantially boosting their performance. Our analysis reveals that TestNUC scales effectively with increasing amounts of unlabeled data and performs robustly across different embedding models, making it practical for real-world applications. Our code is available at https://github.com/HenryPengZou/TestNUC.</abstract>
       <url hash="aa56f679">2025.acl-long.1486</url>
@@ -21748,7 +21748,7 @@
       <author><first>Zhichao</first><last>Duan</last></author>
       <author><first>Zhenyu</first><last>Li</last></author>
       <author><first>Bowen</first><last>Dong</last><affiliation>Tsinghua University, Tsinghua University and Tencent AI Lab</affiliation></author>
-      <author id="ning-liu"><first>Ning</first><last>Liu</last><affiliation>Shandong University</affiliation></author>
+      <author><first>Ning</first><last>Liu</last><affiliation>Shandong University</affiliation></author>
       <author><first>Xiuxing</first><last>Li</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <author><first>Jianyong</first><last>Wang</last><affiliation>Tsinghua University, Tsinghua University</affiliation></author>
       <pages>31102-31118</pages>
@@ -21777,7 +21777,7 @@
       <author><first>Eunsu</first><last>Kim</last></author>
       <author><first>Junyeong</first><last>Park</last></author>
       <author><first>James</first><last>Thorne</last><affiliation>KAIST</affiliation></author>
-      <author><first>Alice</first><last>Oh</last><affiliation>Google and Korea Advanced Institute of Science and Technology</affiliation></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last><affiliation>Google and Korea Advanced Institute of Science and Technology</affiliation></author>
       <pages>31137-31155</pages>
       <abstract>Text-to-image diffusion models have recently enabled the creation of visually compelling, detailed images from textual prompts. However, their ability to accurately represent various cultural nuances remains an open question. In our work, we introduce CULTDIFF benchmark, evaluating whether state-of-the-art diffusion models can generate culturally specific images spanning ten countries. We show that these models often fail to generate cultural artifacts in architecture, clothing, and food, especially for underrepresented country regions, by conducting a fine-grained analysis of different similarity aspects, revealing significant disparities in cultural relevance, description fidelity, and realism compared to real-world reference images. With the collected human evaluations, we develop a neural-based image-image similarity metric, namely, CULTDIFF-S, to predict human judgment on real and generated images with cultural artifacts. Our work highlights the need for more inclusive generative AI systems and equitable dataset representation over a wide range of cultures.</abstract>
       <url hash="7f53bbaa">2025.acl-long.1503</url>
@@ -21844,7 +21844,7 @@
       <author><first>Jeremy</first><last>Barnes</last><affiliation>University of the Basque Country</affiliation></author>
       <author><first>Pablo</first><last>Gamallo</last><affiliation>Universidad de Santiago de Compostela</affiliation></author>
       <author><first>Iria</first><last>de-Dios-Flores</last></author>
-      <author><first>Rodrigo</first><last>Agerri</last><affiliation>University of the Basque Country</affiliation></author>
+      <author id="rodrigo-agerri"><first>Rodrigo</first><last>Agerri</last><affiliation>University of the Basque Country</affiliation></author>
       <pages>31204-31218</pages>
       <abstract>We introduce a professionally translated extension of the TruthfulQA benchmark designed to evaluate truthfulness in Basque, Catalan, Galician, and Spanish. Truthfulness evaluations of large language models (LLMs) have primarily been focused on English. However, the ability of LLMs to maintain truthfulness across languages remains under-explored. Our study evaluates 12 state-of-the-art open LLMs, comparing base and instruction-tuned models using human evaluation, multiple-choice metrics, and LLM-as-a-Judge scoring. Our findings reveal that, while LLMs perform best in English and worst in Basque (the lowest-resourced language), overall truthfulness discrepancies across languages are smaller than anticipated. Furthermore, we show that LLM-as-a-Judge correlates more closely with human judgments than multiple-choice metrics, and that informativeness plays a critical role in truthfulness assessment. Our results also indicate that machine translation provides a viable approach for extending truthfulness benchmarks to additional languages, offering a scalable alternative to professional translation. Finally, we observe that universal knowledge questions are better handled across languages than context- and time-dependent ones, highlighting the need for truthfulness evaluations that account for cultural and temporal variability. Datasets, models and code are publicly available under open licenses.</abstract>
       <url hash="91fd6715">2025.acl-long.1507</url>
@@ -21865,7 +21865,7 @@
     </paper>
     <paper id="1509">
       <title>Batayan: A <fixed-case>F</fixed-case>ilipino <fixed-case>NLP</fixed-case> benchmark for evaluating Large Language Models</title>
-      <author><first>Jann Railey</first><last>Montalan</last><affiliation>AI Singapore and Ateneo de Manila University</affiliation></author>
+      <author id="jann-railey-montalan"><first>Jann Railey</first><last>Montalan</last><affiliation>AI Singapore and Ateneo de Manila University</affiliation></author>
       <author><first>Jimson Paulo</first><last>Layacan</last></author>
       <author><first>David Demitri</first><last>Africa</last></author>
       <author><first>Richell Isaiah S.</first><last>Flores</last><affiliation>Ateneo de Manila University</affiliation></author>
@@ -21914,7 +21914,7 @@
       <author><first>Badr M.</first><last>Abdullah</last></author>
       <author><first>Wei</first><last>Xue</last></author>
       <author><first>Dietrich</first><last>Klakow</last></author>
-      <author><first>Bernd</first><last>Möbius</last><affiliation>Universität des Saarlandes</affiliation></author>
+      <author id="bernd-mobius"><first>Bernd</first><last>Möbius</last><affiliation>Universität des Saarlandes</affiliation></author>
       <author><first>Tania</first><last>Avgustinova</last></author>
       <pages>31310-31322</pages>
       <abstract>Idioms are defined as a group of words with a figurative meaning not deducible from their individual components. Although modern machine translation systems have made remarkable progress, translating idioms remains a major challenge, especially for speech-to-text systems, where research on this topic is notably sparse. In this paper, we systematically evaluate idiom translation as compared to conventional news translation in both text-to-text machine translation (MT) and speech-to-text translation (SLT) systems across two language pairs (German to English, Russian to English). We compare state-of-the-art end-to-end SLT systems (SeamlessM4T SLT-to-text, Whisper Large v3) with MT systems (SeamlessM4T SLT-to-text, No Language Left Behind), Large Language Models (DeepSeek, LLaMA) and cascaded alternatives. Our results reveal that SLT systems experience a pronounced performance drop on idiomatic data, often reverting to literal translations even in higher layers, whereas MT systems and Large Language Models demonstrate better handling of idioms. These findings underscore the need for idiom-specific strategies and improved internal representations in SLT architectures.</abstract>
@@ -21932,7 +21932,7 @@
       <author><first>Nuno</first><last>Guimarães</last><affiliation>INESC TEC</affiliation></author>
       <author><first>Elisa</first><last>Sartori</last></author>
       <author><first>Ion</first><last>Androutsopoulos</last><affiliation>Athens University of Economics and Business</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <author><first>Giovanni</first><last>Da San Martino</last><affiliation>University of Padua</affiliation></author>
       <author><first>Jakub</first><last>Piskorski</last></author>
       <pages>31323-31345</pages>
@@ -22015,7 +22015,7 @@
       <author><first>Jihyoung</first><last>Jang</last><affiliation>Pohang University of Science and Technology</affiliation></author>
       <author><first>Minwook</first><last>Bae</last></author>
       <author><first>Minji</first><last>Kim</last><affiliation>Pohang University of Science and Technology</affiliation></author>
-      <author><first>Dilek</first><last>Hakkani-Tür</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tür</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
       <author><first>Hyounghun</first><last>Kim</last><affiliation>Pohang University of Science and Technology</affiliation></author>
       <pages>31481-31512</pages>
       <abstract>As chatbots continue to evolve toward human-like, real-world, interactions, multimodality remains an active area of research and exploration. So far, efforts to integrate multimodality into chatbots have primarily focused on image-centric tasks, such as visual dialogue and image-based instructions, placing emphasis on the “eyes” of human perception while neglecting the “ears”, namely auditory aspects. Moreover, these studies often center around static interactions that focus on discussing the modality rather than naturally incorporating it into the conversation, which limits the richness of simultaneous, dynamic engagement. Furthermore, while multimodality has been explored in multi-party and multi-session conversations, task-specific constraints have hindered its seamless integration into dynamic, natural conversations. To address these challenges, this study aims to equip chatbots with “eyes and ears” capable of more immersive interactions with humans. As part of this effort, we introduce a new multimodal conversation dataset, Multimodal Multi-Session Multi-Party Conversation (<tex-math>M^3C</tex-math>), and propose a novel multimodal conversation model featuring multimodal memory retrieval. Our model, trained on the <tex-math>M^3C</tex-math>, demonstrates the ability to seamlessly engage in long-term conversations with multiple speakers in complex, real-world-like settings, effectively processing visual and auditory inputs to understand and respond appropriately. Human evaluations highlight the model’s strong performance in maintaining coherent and dynamic interactions, demonstrating its potential for advanced multimodal conversational agents.</abstract>
@@ -22406,7 +22406,7 @@
     <paper id="1547">
       <title>Emergent Abilities of Large Language Models under Continued Pre-training for Language Adaptation</title>
       <author><first>Ahmed</first><last>Elhady</last></author>
-      <author><first>Eneko</first><last>Agirre</last><affiliation>University of the Basque Country (UPV/EHU)</affiliation></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last><affiliation>University of the Basque Country (UPV/EHU)</affiliation></author>
       <author><first>Mikel</first><last>Artetxe</last><affiliation>Reka AI</affiliation></author>
       <pages>32174-32186</pages>
       <abstract>Continued pretraining (CPT) is a popular approach to adapt existing large language models (LLMs) to new languages. When doing so, it is common practice to include a portion of English data in the mixture, but its role has not been carefully studied to date. In this work, we show that including English does not impact validation perplexity, yet it is critical for the emergence of downstream capabilities in the target language. We introduce a language-agnostic benchmark for in-context learning (ICL), which reveals catastrophic forgetting early on CPT when English is not included. This in turn damages the ability of the model to generalize to downstream prompts as measured by perplexity, even if it does not manifest in terms of accuracy until later in training, and can be tied to a big shift in the model parameters. Based on these insights, we introduce curriculum learning and exponential moving average (EMA) of weights as effective alternatives to mitigate the need for English. All in all, our work sheds light into the dynamics by which emergent abilities arise when doing CPT for language adaptation, and can serve as a foundation to design more effective methods in the future.</abstract>
@@ -22444,7 +22444,7 @@
       <author><first>Shreya</first><last>Havaldar</last><affiliation>University of Pennsylvania</affiliation></author>
       <author><first>Adam</first><last>Stein</last></author>
       <author><first>Eric</first><last>Wong</last><affiliation>University of Pennsylvania</affiliation></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <pages>32213-32230</pages>
       <abstract>Successful communication depends on the speaker’s intended style (i.e., what the speaker is trying to convey) aligning with the listener’s interpreted style (i.e., what the listener perceives). However, cultural differences often lead to misalignment between the two; for example, politeness is often lost in translation. We characterize the ways that LLMs fail to translate style – biasing translations towards neutrality and performing worse in non-Western languages. We mitigate these failures with RASTA (Retrieval-Augmented STylistic Alignment), a method that leverages learned stylistic concepts to encourage LLM translation to appropriately convey cultural communication norms and align style.</abstract>
       <url hash="588199bc">2025.acl-long.1550</url>
@@ -22496,7 +22496,7 @@
       <author><first>Karthikeyan</first><last>Natesan Ramamurthy</last><affiliation>International Business Machines</affiliation></author>
       <author><first>Prasanna</first><last>Sattigeri</last><affiliation>IBM Research</affiliation></author>
       <author><first>Werner</first><last>Geyer</last></author>
-      <author><first>Soumya</first><last>Ghosh</last><affiliation>MERCK &amp; CO., INC.</affiliation></author>
+      <author id="soumya-sankar-ghosh"><first>Soumya</first><last>Ghosh</last><affiliation>MERCK &amp; CO., INC.</affiliation></author>
       <pages>32291-32317</pages>
       <abstract>Despite the increasing use of large language models (LLMs) for context-grounded tasks like summarization and question-answering, understanding what makes an LLM produce a certain response is challenging. We propose Multi-Level Explanations for Generative Language Models (MExGen), a technique to provide explanations for context-grounded text generation. MExGen assigns scores to parts of the context to quantify their influence on the model’s output. It extends attribution methods like LIME and SHAP to LLMs used in context-grounded tasks where (1) inference cost is high, (2) input text is long, and (3) the output is text. We conduct a systematic evaluation, both automated and human, of perturbation-based attribution methods for summarization and question answering. The results show that our framework can provide more faithful explanations of generated output than available alternatives, including LLM self-explanations. We open-source code for MExGen as part of the ICX360 toolkit: https://github.com/IBM/ICX360.</abstract>
       <url hash="79d96a86">2025.acl-long.1553</url>
@@ -22598,7 +22598,7 @@
       <author><first>Miguel González</first><last>Saiz</last><affiliation>Universidad Politécnica de Madrid</affiliation></author>
       <author><first>Gonzalo</first><last>Martínez</last><affiliation>Universidad Carlos III de Madrid</affiliation></author>
       <author><first>Gonzalo Santamaria</first><last>Gomez</last><affiliation>Instituto de Ingeniería del Conocimiento</affiliation></author>
-      <author><first>Rodrigo</first><last>Agerri</last><affiliation>University of the Basque Country</affiliation></author>
+      <author id="rodrigo-agerri"><first>Rodrigo</first><last>Agerri</last><affiliation>University of the Basque Country</affiliation></author>
       <author><first>Nuria Aldama</first><last>García</last><affiliation>IIC</affiliation></author>
       <author><first>Luis</first><last>Chiruzzo</last><affiliation>Facultad de Ingeniería - Universidad de la República - Uruguay</affiliation></author>
       <author><first>Javier</first><last>Conde</last><affiliation>Universidad Politécnica de Madrid</affiliation></author>
@@ -22800,7 +22800,7 @@
       <author><first>Yuchen</first><last>Hu</last></author>
       <author><first>Bosheng</first><last>Ding</last></author>
       <author><first>Ruirui</first><last>Chen</last><affiliation>Institute of High Performance Computing, Singapore, A*STAR</affiliation></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University and SalesForce.com</affiliation></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University and SalesForce.com</affiliation></author>
       <pages>32732-32758</pages>
       <abstract>Large language models (LLMs) have shown impressive few-shot generalization on many tasks via in-context learning (ICL). Despite their success in showing such emergent abilities, the scale and complexity of larger models also lead to unprecedentedly high computational demands and deployment challenges. In reaction, researchers explore transferring the powerful capabilities of larger models to more efficient and compact models by typically aligning the output of smaller (student) models with that of larger (teacher) models. Existing methods either train student models on the generated outputs of teacher models or imitate their token-level probability distributions. However, these distillation methods pay little to no attention to the input, which also plays a crucial role in ICL. Based on the finding that the performance of ICL is highly sensitive to the selection of demonstration examples, we propose Bidirectional Alignment (BiAlign) to fully leverage the models’ preferences for ICL examples to improve the ICL abilities of student models. Specifically, we introduce the alignment of input preferences between student and teacher models by incorporating a novel ranking loss, in addition to aligning the token-level output distribution. With extensive experiments and analysis, we demonstrate that BiAlign can consistently outperform existing baselines on a variety of tasks involving language understanding, reasoning, and coding.</abstract>
       <url hash="79bbee1d">2025.acl-long.1573</url>
@@ -22977,7 +22977,7 @@
       <author><first>Dhairya</first><last>Suman</last><affiliation>Indian Institute of Technology, Delhi</affiliation></author>
       <author><first>Mohammed Safi Ur Rahman</first><last>Khan</last><affiliation>Indian Institute of Technology, Madras, Dhirubhai Ambani Institute Of Information and Communication Technology and Indian Institute of Technology, Madras, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
       <author><first>Anoop</first><last>Kunchukuttan</last><affiliation>Microsoft and Indian Institute of Technology, Madras</affiliation></author>
-      <author><first>Mitesh M</first><last>Khapra</last><affiliation>Indian Institute of Technology, Madras</affiliation></author>
+      <author id="mitesh-m-khapra"><first>Mitesh M</first><last>Khapra</last><affiliation>Indian Institute of Technology, Madras</affiliation></author>
       <author><first>Raj</first><last>Dabre</last><affiliation>Department of Computer Science, Indian Institute of Technology, Madras, Indian Institute of Technology, Madras and National Institute of Information and Communications Technology (NICT), National Institute of Advanced Industrial Science and Technology</affiliation></author>
       <pages>32945-32966</pages>
       <abstract>Speech translation for Indian languages remains a challenging task due to the scarcity of large-scale, publicly available datasets that capture the linguistic diversity and domain coverage essential for real-world applications. Existing datasets cover a fraction of Indian languages and lack the breadth needed to train robust models that generalize beyond curated benchmarks. To bridge this gap, we introduce BhasaAnuvaad, the largest speech translation dataset for Indian languages, spanning over 44 thousand hours of audio and 17 million aligned text segments across 14 Indian languages and English. Our dataset is built through a threefold methodology: (a) aggregating high-quality existing sources, (b) large-scale web crawling to ensure linguistic and domain diversity, and (c) creating synthetic data to model real-world speech disfluencies. Leveraging BhasaAnuvaad, we train IndicSeamless, a state-of-the-art speech translation model for Indian languages that performs better than existing models. Our experiments demonstrate improvements in the translation quality, setting a new standard for Indian language speech translation. We will release all the code, data and model weights in the open-source, with permissive licenses to promote accessibility and collaboration.</abstract>
@@ -23153,7 +23153,7 @@
       <author><first>Jincenzi</first><last>Wu</last></author>
       <author><first>Jianxun</first><last>Lian</last></author>
       <author><first>Dingdong</first><last>Wang</last></author>
-      <author><first>Helen M.</first><last>Meng</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
+      <author id="helen-meng"><first>Helen M.</first><last>Meng</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <pages>33242-33271</pages>
       <abstract>Large Language Models (LLMs) are increasingly deployed worldwide, yet their ability to navigate cultural nuances remains underexplored. Misinterpreting cultural content can lead to AI-generated responses that are offensive or inappropriate, limiting their usability in global applications such as customer service, diplomatic communication, and online education. While prior research has evaluated cultural knowledge of LLMs, existing benchmarks fail to assess dynamic cultural competence-the ability to apply cultural knowledge effectively in real-world interactions. To address this gap, we introduce SocialDuolingo, a novel benchmark designed to evaluate cultural competence through multi-turn interactive intercultural scenarios. It comprises 3,060 human-written scenarios spanning 60 countries across six continents. Through extensive experiments on eight prominent LLMs, our findings reveal a significant gap between the cultural knowledge stored in these models and their ability to apply it effectively in cross-cultural communication.</abstract>
       <url hash="9176d669">2025.acl-long.1594</url>
@@ -23240,7 +23240,7 @@
       <author><first>John J</first><last>Nay</last><affiliation>Stanford University</affiliation></author>
       <author><first>Tanmay</first><last>Rajpurohit</last><affiliation>Independent Researcher</affiliation></author>
       <author><first>Ashwin</first><last>Kalyan</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
-      <author><first>Balaraman</first><last>Ravindran</last><affiliation>Indian Institute of Technology Madras</affiliation></author>
+      <author id="balaraman-ravindran"><first>Balaraman</first><last>Ravindran</last><affiliation>Indian Institute of Technology Madras</affiliation></author>
       <pages>33367-33390</pages>
       <abstract>We explore the ability of large language models (LLMs) to engage in subtle deception through strategically phrasing and intentionally manipulating information. This harmful behavior can be hard to detect, unlike blatant lying or unintentional hallucination. We build a simple testbed mimicking a legislative environment where a corporate <i>lobbyist</i> module is proposing amendments to bills that benefit a specific company while evading identification of this benefactor. We use real-world legislative bills matched with potentially affected companies to ground these interactions. Our results show that LLM lobbyists can draft subtle phrasing to avoid such identification by strong LLM-based detectors. Further optimization of the phrasing using LLM-based re-planning and re-sampling increases deception rates by up to 40 percentage points.Our human evaluations to verify the quality of deceptive generations and their retention of self-serving intent show significant coherence with our automated metrics and also help in identifying certain strategies of deceptive phrasing.This study highlights the risk of LLMs’ capabilities for strategic phrasing through seemingly neutral language to attain self-serving goals. This calls for future research to uncover and protect against such subtle deception.</abstract>
       <url hash="250b3456">2025.acl-long.1600</url>
@@ -23250,7 +23250,7 @@
     <paper id="1601">
       <title><fixed-case>A</fixed-case>fro<fixed-case>CS</fixed-case>-xs: Creating a Compact, High-Quality, Human-Validated Code-Switched Dataset for <fixed-case>A</fixed-case>frican Languages</title>
       <author><first>Kayode</first><last>Olaleye</last></author>
-      <author><first>Arturo</first><last>Oncevay</last><affiliation>J.P. Morgan Chase</affiliation></author>
+      <author id="arturo-oncevay"><first>Arturo</first><last>Oncevay</last><affiliation>J.P. Morgan Chase</affiliation></author>
       <author><first>Mathieu</first><last>Sibue</last><affiliation>J.P. Morgan Chase</affiliation></author>
       <author><first>Nombuyiselo</first><last>Zondi</last></author>
       <author><first>Michelle</first><last>Terblanche</last><affiliation>University of Pretoria</affiliation></author>
@@ -23410,7 +23410,7 @@
     <paper id="8">
       <title>A Measure of the System Dependence of Automated Metrics</title>
       <author><first>Pius</first><last>Von Däniken</last><affiliation>University of Zurich and ZHAW - Zürcher Hochschule für Angewandte Wissenschaften</affiliation></author>
-      <author><first>Jan Milan</first><last>Deriu</last><affiliation>ZHAW - Zürcher Hochschule für Angewandte Wissenschaften</affiliation></author>
+      <author id="jan-milan-deriu"><first>Jan Milan</first><last>Deriu</last><affiliation>ZHAW - Zürcher Hochschule für Angewandte Wissenschaften</affiliation></author>
       <author><first>Mark</first><last>Cieliebak</last><affiliation>Zurich University of Applied Sciences ZHAW</affiliation></author>
       <pages>87-99</pages>
       <abstract>Automated metrics for Machine Translation have made significant progress, with the goal of replacing expensive and time-consuming human evaluations. These metrics are typically assessed by their correlation with human judgments, which captures the monotonic relationship between human and metric scores. However, we argue that it is equally important to ensure that metrics treat all systems fairly and consistently. In this paper, we introduce a method to evaluate this aspect.</abstract>
@@ -23422,7 +23422,7 @@
       <title>Call for Rigor in Reporting Quality of Instruction Tuning Data</title>
       <author><first>Hyeonseok</first><last>Moon</last><affiliation>Korea University</affiliation></author>
       <author><first>Jaehyung</first><last>Seo</last></author>
-      <author><first>Heuiseok</first><last>Lim</last></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last></author>
       <pages>100-109</pages>
       <abstract>Instruction tuning is crucial for adapting large language models (LLMs) to align with user intentions. Numerous studies emphasize the significance of the quality of instruction tuning (IT) data, revealing a strong correlation between IT data quality and the alignment performance of LLMs. In these studies, the quality of IT data is typically assessed by evaluating the performance of LLMs trained with that data. However, we identified a prevalent issue in such practice: hyperparameters for training models are often selected arbitrarily without adequate justification. We observed significant variations in hyperparameters applied across different studies, even when training the same model with the same data. In this study, we demonstrate the potential problems arising from this practice and emphasize the need for careful consideration in verifying data quality. Through our experiments on the quality of LIMA data and a selected set of 1,000 Alpaca data points, we demonstrate that arbitrary hyperparameter decisions can make any arbitrary conclusion.</abstract>
       <url hash="1b1aaf41">2025.acl-short.9</url>
@@ -23523,7 +23523,7 @@
       <title>Improving Parallel Sentence Mining for Low-Resource and Endangered Languages</title>
       <author><first>Shu</first><last>Okabe</last><affiliation>Technische Universität München</affiliation></author>
       <author><first>Katharina</first><last>Hämmerl</last></author>
-      <author><first>Alexander</first><last>Fraser</last><affiliation>Technical University of Munich</affiliation></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last><affiliation>Technical University of Munich</affiliation></author>
       <pages>196-205</pages>
       <abstract>While parallel sentence mining has been extensively covered for fairly well-resourced languages, pairs involving low-resource languages have received comparatively little attention.To address this gap, we present Belopsem, a benchmark of new datasets for parallel sentence mining on three language pairs where the source side is low-resource and endangered: Occitan-Spanish, Upper Sorbian-German, and Chuvash-Russian. These combinations also reflect varying linguistic similarity within each pair. We compare three language models in an established parallel sentence mining pipeline and apply two types of improvements to one of them, Glot500. We observe better mining quality overall by both applying alignment post-processing with an unsupervised aligner and using a cluster-based isotropy enhancement technique. These findings are crucial for optimising parallel data extraction for low-resource languages in a realistic way.</abstract>
       <url hash="c3ac8fa6">2025.acl-short.17</url>
@@ -23560,20 +23560,20 @@
     <paper id="20">
       <title><fixed-case>LLM</fixed-case>s instead of Human Judges? A Large Scale Empirical Study across 20 <fixed-case>NLP</fixed-case> Evaluation Tasks</title>
       <author><first>Anna</first><last>Bavaresco</last><affiliation>University of Amsterdam</affiliation></author>
-      <author><first>Raffaella</first><last>Bernardi</last><affiliation>Free University of Bozen Bolzano</affiliation></author>
+      <author id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last><affiliation>Free University of Bozen Bolzano</affiliation></author>
       <author><first>Leonardo</first><last>Bertolazzi</last><affiliation>University of Trento</affiliation></author>
       <author><first>Desmond</first><last>Elliott</last><affiliation>Copenhagen University and University of Copenhagen</affiliation></author>
-      <author><first>Raquel</first><last>Fernández</last><affiliation>University of Amsterdam and University of Amsterdam</affiliation></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last><affiliation>University of Amsterdam and University of Amsterdam</affiliation></author>
       <author><first>Albert</first><last>Gatt</last><affiliation>Utrecht University</affiliation></author>
       <author><first>Esam</first><last>Ghaleb</last></author>
       <author><first>Mario</first><last>Giulianelli</last><affiliation>Department of Computer Science, ETHZ - ETH Zurich</affiliation></author>
       <author><first>Michael</first><last>Hanna</last><affiliation>University of Amsterdam</affiliation></author>
       <author><first>Alexander</first><last>Koller</last><affiliation>Saarland University</affiliation></author>
-      <author><first>Andre</first><last>Martins</last><affiliation>Instituto Superior Técnico and Unbabel</affiliation></author>
+      <author id="andre-f-t-martins"><first>Andre</first><last>Martins</last><affiliation>Instituto Superior Técnico and Unbabel</affiliation></author>
       <author><first>Philipp</first><last>Mondorf</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <author><first>Vera</first><last>Neplenbroek</last></author>
       <author><first>Sandro</first><last>Pezzelle</last><affiliation>University of Amsterdam</affiliation></author>
-      <author><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <author><first>David</first><last>Schlangen</last><affiliation>University of Potsdam</affiliation></author>
       <author><first>Alessandro</first><last>Suglia</last><affiliation>Heriot-Watt University</affiliation></author>
       <author><first>Aditya K</first><last>Surikuchi</last></author>
@@ -23625,7 +23625,7 @@
     <paper id="24">
       <title>Subword models struggle with word learning, but surprisal hides it</title>
       <author><first>Bastian</first><last>Bunzeck</last><affiliation>Universität Bielefeld</affiliation></author>
-      <author><first>Sina</first><last>Zarrieß</last><affiliation>Bielefeld University</affiliation></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last><affiliation>Bielefeld University</affiliation></author>
       <pages>286-300</pages>
       <abstract>We study word learning in subword and character language models with the psycholinguistic lexical decision task. While subword LMs struggle to discern words and non-words with high accuracy, character LMs solve this task easily and consistently. Only when supplied with further contexts do subword LMs perform similarly to character models. Additionally, when looking at word-level and syntactic learning trajectories, we find that both processes are separable in character LMs. Word learning happens before syntactic learning, whereas both occur simultaneously in subword LMs. This raises questions about the adequacy of subword LMs for modeling language acquisition and positions character LMs as a viable alternative to study processes below the syntactic level.</abstract>
       <url hash="8aece755">2025.acl-short.24</url>
@@ -23659,8 +23659,8 @@
       <title><fixed-case>MUSTS</fixed-case>: <fixed-case>MU</fixed-case>ltilingual Semantic Textual Similarity Benchmark</title>
       <author><first>Tharindu</first><last>Ranasinghe</last><affiliation>Lancaster University</affiliation></author>
       <author><first>Hansi</first><last>Hettiarachchi</last><affiliation>Lancaster University</affiliation></author>
-      <author><first>Constantin</first><last>Orasan</last><affiliation>University of Surrey</affiliation></author>
-      <author><first>Ruslan</first><last>Mitkov</last><affiliation>Lancaster University</affiliation></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orasan</last><affiliation>University of Surrey</affiliation></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last><affiliation>Lancaster University</affiliation></author>
       <pages>331-353</pages>
       <abstract>Predicting semantic textual similarity (STS) is a complex and ongoing challenge in natural language processing (NLP). Over the years, researchers have developed a variety of supervised and unsupervised approaches to calculate STS automatically. Additionally, various benchmarks, which include STS datasets, have been established to consistently evaluate and compare these STS methods. However, they largely focus on high-resource languages, mixed with datasets annotated focusing on relatedness instead of similarity and containing automatically translated instances. Therefore, no dedicated benchmark for multilingual STS exists. To solve this gap, we introduce the Multilingual Semantic Textual Similarity Benchmark (MUSTS), which spans 13 languages, including low-resource languages. By evaluating more than 25 models on MUSTS, we establish the most comprehensive benchmark of multilingual STS methods. Our findings confirm that STS remains a challenging task, particularly for low-resource languages.</abstract>
       <url hash="0d36e9c4">2025.acl-short.27</url>
@@ -23670,7 +23670,7 @@
     <paper id="28">
       <title>Can Large Language Models Accurately Generate Answer Keys for Health-related Questions?</title>
       <author><first>Davis</first><last>Bartels</last><affiliation>National Institutes of Health</affiliation></author>
-      <author><first>Deepak</first><last>Gupta</last><affiliation>National Institutes of Health</affiliation></author>
+      <author id="deepak-gupta"><first>Deepak</first><last>Gupta</last><affiliation>National Institutes of Health</affiliation></author>
       <author><first>Dina</first><last>Demner-Fushman</last><affiliation>National Library of Medicine</affiliation></author>
       <pages>354-368</pages>
       <abstract>The evaluation of text generated by LLMs remains a challenge for question answering, retrieval augmented generation (RAG), summarization, and many other natural language processing tasks. Evaluating the factuality of LLM generated responses is particularly important in medical question answering, where the stakes are high. One method of evaluating the factuality of text is through the use of information nuggets (answer keys). Nuggets are text representing atomic facts that may be used by an assessor to make a binary decision as to whether the fact represented by said nugget is contained in an answer. Although manual nugget extraction is expensive and time-consuming, recent RAG shared task evaluations have explored automating the nuggetization of text with LLMs. In this work, we explore several approaches to nugget generation for medical question answering and evaluate their alignment with expert human nugget generation. We find providing an example and extracting nuggets from an answer to be the best approach to nuggetization. While, overall, we found the capabilities of LLMs to distill atomic facts limited, Llama 3.3 performed the best out of the models we tested.</abstract>
@@ -23755,7 +23755,7 @@
       <author><first>Zhipang</first><last>Wang</last></author>
       <author><first>Yu</first><last>Hong</last><affiliation>Suzhou University</affiliation></author>
       <author><first>Weihao</first><last>Sun</last><affiliation>Soochow University</affiliation></author>
-      <author><first>Guodong</first><last>Zhou</last><affiliation>Soochow University, China</affiliation></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last><affiliation>Soochow University, China</affiliation></author>
       <pages>464-473</pages>
       <abstract>Implicit Discourse Relation Recognition (abbr., IDRR) is a NLP task of classifying argument pairs into different types of semantic relations. Arguments contain subtexts, some of which are beneficial to the perception of semantic relations. However, subtexts are connotative. The neural IDRR model fails to be aware of them without being given pertinent prompts. In this paper, we leverage LLaMA to generate subtexts for argument pairs, and verify the effectiveness of subtext-based IDRR. We construct an IDRR baseline using the decoder-only backbone LLaMA, and enhance it with subtext-aware relation reasoning. A confidence-diagnosed dual-channel network is used for collaboration between in-subtext and out-of-subtext IDRR. We experiment on PDTB-2.0 and PDTB-3.0 for both the main-level and secondary-level relation taxonomies. The test results show that our approach yields substantial improvements compared to the baseline, and achieves higher <tex-math>F</tex-math>1-scores on both benchmarks than the previous decoder-only IDRR models. We make the source codes and data publicly available.</abstract>
       <url hash="8b3f2bfa">2025.acl-short.35</url>
@@ -23881,7 +23881,7 @@
       <author><first>Hyein</first><last>Seo</last></author>
       <author><first>Taewook</first><last>Hwang</last></author>
       <author><first>Yohan</first><last>Lee</last><affiliation>Electronics and Telecommunications Research Institute</affiliation></author>
-      <author><first>Sangkeun</first><last>Jung</last></author>
+      <author id="sangkeun-jung"><first>Sangkeun</first><last>Jung</last></author>
       <pages>575-589</pages>
       <abstract>In English education tutoring, teacher feedback is essential for guiding students. Recently, AI-based tutoring systems have emerged to assist teachers; however, these systems require high-quality and large-scale teacher feedback data, which is both time-consuming and costly to generate manually. In this study, we propose FEAT, a cost-effective framework for generating teacher feedback, and have constructed three complementary datasets: (1) DIRECT-Manual (DM), where both humans and large language models (LLMs) collaboratively generate high-quality teacher feedback, albeit at a higher cost; (2) DIRECT-Generated (DG), an LLM-only generated, cost-effective dataset with lower quality;, and (3) DIRECT-Augmented (DA), primarily based on DG with a small portion of DM added to enhance quality while maintaining cost-efficiency. Experimental results showed that incorporating a small portion of DM (5–10%) into DG leads to superior performance compared to using 100% DM alone.</abstract>
       <url hash="06622369">2025.acl-short.45</url>
@@ -23925,7 +23925,7 @@
       <author><first>Eric</first><last>Le Ferrand</last><affiliation>Boston College</affiliation></author>
       <author><first>Bo</first><last>Jiang</last></author>
       <author><first>Joshua</first><last>Hartshorne</last><affiliation>MGH Institute of Health Professions</affiliation></author>
-      <author><first>Emily</first><last>Prud’hommeaux</last><affiliation>Boston College</affiliation></author>
+      <author id="emily-prudhommeaux"><first>Emily</first><last>Prud’hommeaux</last><affiliation>Boston College</affiliation></author>
       <pages>627-635</pages>
       <abstract>Incorporating automatic speech recognition (ASR) into field linguistics workflows for language documentation has become increasingly common. While ASR performance has seen improvements in low-resource settings, obstacles remain when training models on data collected by documentary linguists. One notable challenge lies in the way that this data is curated. ASR datasets built from spontaneous speech are typically recorded in consistent settings and transcribed by native speakers following a set of well designed guidelines. In contrast, field linguists collect data in whatever format it is delivered by their language consultants and transcribe it as best they can given their language skills and the quality of the recording. This approach to data curation, while valuable for linguistic research, does not always align with the standards required for training robust ASR models. In this paper, we explore methods for identifying speech transcriptions in fieldwork data that may be unsuitable for training ASR models. We focus on two complimentary automated measures of transcription quality that can be used to identify transcripts with characteristics that are common in field data but could be detrimental to ASR training. We show that one of the metrics is highly effective at retrieving these types of transcriptions. Additionally, we find that filtering datasets using this metric of transcription quality reduces WER both in controlled experiments using simulated fieldwork with artificially corrupted data and in real fieldwork corpora.</abstract>
       <url hash="159e3883">2025.acl-short.49</url>
@@ -23948,7 +23948,7 @@
       <author><first>Álvaro</first><last>Vega-Hidalgo</last></author>
       <author><first>Artem</first><last>Abzaliev</last></author>
       <author><first>Thore</first><last>Bergman</last><affiliation>University of Michigan - Ann Arbor</affiliation></author>
-      <author><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
       <pages>645-659</pages>
       <abstract>Acoustic individual identification of wild animals is an essential task for understanding animal vocalizations within their social contexts, and for facilitating conservation and wildlife monitoring efforts. However, most of the work in this space relies on human efforts, as the development of methods for automatic individual identification is hindered by the lack of data. In this paper, we explore cross-species pre-training to address the task of individual classification in white-faced capuchin monkeys. Using acoustic embeddings from birds and humans, we find that they can be effectively used to identify the calls from individual monkeys. Moreover, we find that joint multi-species representations can lead to further improvements over the use of one representation at a time. Our work demonstrates the potential of cross-species data transfer and multi-species representations, as strategies to address tasks on species with very limited data.</abstract>
       <url hash="defda2d8">2025.acl-short.51</url>
@@ -24173,7 +24173,7 @@
     </paper>
     <paper id="69">
       <title><fixed-case>T</fixed-case>iger<fixed-case>LLM</fixed-case> - A Family of <fixed-case>B</fixed-case>angla Large Language Models</title>
-      <author><first>Nishat</first><last>Raihan</last></author>
+      <author id="nishat-raihan"><first>Nishat</first><last>Raihan</last></author>
       <author><first>Marcos</first><last>Zampieri</last><affiliation>George Mason University</affiliation></author>
       <pages>887-896</pages>
       <abstract>The development of Large Language Models (LLMs) remains heavily skewed towards English and a few other high-resource languages. This linguistic disparity is particularly evident for Bangla - the 5th most spoken language. A few initiatives attempted to create open-source Bangla LLMs with performance still behind high-resource languages and limited reproducibility. To address this gap, we introduce TigerLLM - a family of Bangla LLMs. Our results demonstrate that these models surpass all open-source alternatives and also outperform larger proprietary models like GPT3.5 across standard benchmarks, establishing TigerLLM as the new baseline for future Bangla language modeling.</abstract>
@@ -24286,9 +24286,9 @@
       <title>Different Speech Translation Models Encode and Translate Speaker Gender Differently</title>
       <author><first>Dennis</first><last>Fucci</last></author>
       <author><first>Marco</first><last>Gaido</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
-      <author><first>Matteo</first><last>Negri</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
       <author><first>Luisa</first><last>Bentivogli</last><affiliation>Fondazione Bruno Kessler</affiliation></author>
-      <author><first>Andre</first><last>Martins</last><affiliation>Instituto Superior Técnico and Unbabel</affiliation></author>
+      <author id="andre-f-t-martins"><first>Andre</first><last>Martins</last><affiliation>Instituto Superior Técnico and Unbabel</affiliation></author>
       <author><first>Giuseppe</first><last>Attanasio</last><affiliation>Instituto de Telecomunicações</affiliation></author>
       <pages>1005-1019</pages>
       <abstract>Recent studies on interpreting the hidden states of speech models have shown their ability to capture speaker-specific features, including gender. Does this finding also hold for speech translation (ST) models? If so, what are the implications for the speaker’s gender assignment in translation? We address these questions from an interpretability perspective, using probing methods to assess gender encoding across diverse ST models. Results on three language directions (English <tex-math>\rightarrow</tex-math> French/Italian/Spanish) indicate that while traditional encoder-decoder models capture gender information, newer architectures—integrating a speech encoder with a machine translation system via adapters—do not. We also demonstrate that low gender encoding capabilities result in systems’ tendency toward a masculine default, a translation bias that is more pronounced in newer architectures.</abstract>
@@ -24304,7 +24304,7 @@
       <author><first>Haozhe</first><last>Zhao</last></author>
       <author><first>Yuchi</first><last>Wang</last></author>
       <author><first>Qingyan</first><last>Guo</last></author>
-      <author><first>Baobao</first><last>Chang</last><affiliation>Peking University</affiliation></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last><affiliation>Peking University</affiliation></author>
       <pages>1020-1029</pages>
       <abstract>Semantic Parsing aims to capture the meaning of a sentence and convert it into a logical, structured form. Previous studies show that semantic parsing enhances the performance of smaller models (e.g., BERT) on downstream tasks. However, it remains unclear whether the improvements extend similarly to LLMs. In this paper, our empirical findings reveal that, unlike smaller models, directly adding semantic parsing results into LLMs reduces their performance. To overcome this, we propose SENSE, a novel prompting approach that embeds semantic hints within the prompt. Experiments show that SENSE consistently improves LLMs’ performance across various tasks, highlighting the potential of integrating semantic information to improve LLM capabilities.</abstract>
       <url hash="55078e0e">2025.acl-short.79</url>
@@ -24388,8 +24388,8 @@
       <author><first>Fardin Ahsan</first><last>Sakib</last></author>
       <author><first>Ziwei</first><last>Zhu</last><affiliation>George Mason University</affiliation></author>
       <author><first>Karen Trister</first><last>Grace</last><affiliation>George Mason University</affiliation></author>
-      <author><first>Meliha</first><last>Yetisgen</last><affiliation>University of Washington</affiliation></author>
-      <author><first>Ozlem</first><last>Uzuner</last><affiliation>George Mason University</affiliation></author>
+      <author id="meliha-yetisgen-yildiz"><first>Meliha</first><last>Yetisgen</last><affiliation>University of Washington</affiliation></author>
+      <author id="ozlem-uzuner"><first>Ozlem</first><last>Uzuner</last><affiliation>George Mason University</affiliation></author>
       <pages>1097-1106</pages>
       <abstract>Social determinants of health (SDOH) extraction from clinical text is critical for downstream healthcare analytics. Although large language models (LLMs) have shown promise, they may rely on superficial cues leading to spurious predictions. Using the MIMIC portion of the SHAC (Social History Annotation Corpus) dataset and focusing on drug status extraction as a case study, we demonstrate that mentions of alcohol or smoking can falsely induce models to predict current/past drug use where none is present, while also uncovering concerning gender disparities in model performance. We further evaluate mitigation strategies—such as prompt engineering and chain-of-thought reasoning—to reduce these false positives, providing insights into enhancing LLM reliability in health domains.</abstract>
       <url hash="5dd59fda">2025.acl-short.86</url>
@@ -24459,7 +24459,7 @@
     </paper>
     <paper id="92">
       <title>Rethinking Evaluation Metrics for Grammatical Error Correction: Why Use a Different Evaluation Process than Human?</title>
-      <author><first>Takumi</first><last>Goto</last><affiliation>Nara Institute of Science and Technology, Japan</affiliation></author>
+      <author id="takumi-goto"><first>Takumi</first><last>Goto</last><affiliation>Nara Institute of Science and Technology, Japan</affiliation></author>
       <author><first>Yusuke</first><last>Sakai</last><affiliation>Nara Institute of Science and Technology, Japan</affiliation></author>
       <author><first>Taro</first><last>Watanabe</last><affiliation>Nara Institute of Science and Technology, Japan</affiliation></author>
       <pages>1165-1172</pages>
@@ -24480,7 +24480,7 @@
       <author><first>Arya</first><last>Talebzadeh</last><affiliation>Meta</affiliation></author>
       <author><first>Aditya</first><last>Tayade</last><affiliation>Facebook</affiliation></author>
       <author><first>Sinong</first><last>Wang</last><affiliation>Facebook</affiliation></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University and SalesForce.com</affiliation></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University and SalesForce.com</affiliation></author>
       <author><first>Han</first><last>Fang</last><affiliation>Meta AI</affiliation></author>
       <author><first>Hao</first><last>Ma</last><affiliation>Meta</affiliation></author>
       <pages>1173-1182</pages>
@@ -24492,7 +24492,7 @@
     <paper id="94">
       <title><fixed-case>W</fixed-case>i<fixed-case>C</fixed-case>ke<fixed-case>D</fixed-case>: A Simple Method to Make Multiple Choice Benchmarks More Challenging</title>
       <author><first>Ahmed</first><last>Elhady</last></author>
-      <author><first>Eneko</first><last>Agirre</last><affiliation>University of the Basque Country (UPV/EHU)</affiliation></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last><affiliation>University of the Basque Country (UPV/EHU)</affiliation></author>
       <author><first>Mikel</first><last>Artetxe</last><affiliation>Reka AI</affiliation></author>
       <pages>1183-1192</pages>
       <abstract>We introduce WiCkeD, a simple method to increase the complexity of existing multiple-choice benchmarks by randomly replacing a choice with “None of the above”, a method often used in educational tests. We show that WiCkeD can be automatically applied to any existing benchmark, making it more challenging. We apply WiCkeD to 6 popular benchmarks and use it to evaluate 18 open-weight LLMs. The performance of the models drops12.1 points on average with respect to the original versions of the datasets. When using chainof-thought on 3 MMLU datasets, the performance drop for the WiCkeD variant is similar to the one observed when using the LLMs directly, showing that WiCkeD is also challenging for models with enhanced reasoning abilities. WiCkeD also uncovers that some models are more sensitive to the extra reasoning required, providing additional information with respect to the original benchmarks.We relase our code and data at github.com/anonymized.</abstract>
@@ -24564,7 +24564,7 @@
       <title><fixed-case>M</fixed-case>ap<fixed-case>Q</fixed-case>a<fixed-case>T</fixed-case>or: An Extensible Framework for Efficient Annotation of Map-Based <fixed-case>QA</fixed-case> Datasets</title>
       <author><first>Mahir Labib</first><last>Dihan</last><affiliation>Bangladesh University of Engineering and Technology</affiliation></author>
       <author><first>Mohammed Eunus</first><last>Ali</last><affiliation>Bangladesh University of Engineering and Technology</affiliation></author>
-      <author><first>Md Rizwan</first><last>Parvez</last><affiliation>Qatar Computing Research Institute</affiliation></author>
+      <author id="md-rizwan-parvez"><first>Md Rizwan</first><last>Parvez</last><affiliation>Qatar Computing Research Institute</affiliation></author>
       <pages>1-10</pages>
       <abstract>Mapping and navigation services like Google Maps, Apple Maps, OpenStreetMap, are essential for accessing various location-based data, yet they often struggle to handle natural language geospatial queries. Recent advancements in Large Language Models (LLMs) show promise in question answering (QA), but creating reliable geospatial QA datasets from map services remains challenging. We introduce MapQaTor, an extensible open-source framework that streamlines the creation of reproducible, traceable map-based QA datasets. MapQaTor enables seamless integration with any maps API, allowing users to gather and visualize data from diverse sources with minimal setup. By caching API responses, the platform ensures consistent ground truth, enhancing the reliability of the data even as real-world information evolves. MapQaTor centralizes data retrieval, annotation, and visualization within a single platform, offering a unique opportunity to evaluate the current state of LLM-based geospatial reasoning while advancing their capabilities for improved geospatial understanding. Evaluation metrics show that, MapQaTor speeds up the annotation process by at least 30 times compared to manual methods, underscoring its potential for developing geospatial resources, such as complex map reasoning datasets. The website is live at: https://mapqator.github.io/ and a demo video is available at: https://youtu.be/bVv7-NYRsTw.</abstract>
       <url hash="50c576d5">2025.acl-demo.1</url>
@@ -24578,7 +24578,7 @@
       <author><first>Marco</first><last>Valentino</last><affiliation>University of Sheffield</affiliation></author>
       <author><first>Danilo</first><last>Carvalho</last><affiliation>University of Manchester</affiliation></author>
       <author><first>Dhairya</first><last>Dalal</last><affiliation>University of Galway</affiliation></author>
-      <author><first>Andre</first><last>Freitas</last><affiliation>Idiap Research Institute and University of Manchester</affiliation></author>
+      <author id="andre-freitas"><first>Andre</first><last>Freitas</last><affiliation>Idiap Research Institute and University of Manchester</affiliation></author>
       <pages>11-21</pages>
       <abstract>A persistent challenge in AI is the effective integration of material and formal inference - the former concerning the plausibility and contextual relevance of arguments, while the latter focusing on their logical and structural validity. Large Language Models (LLMs), by virtue of their extensive pre-training on large textual corpora, exhibit strong capabilities in material inference. However, their reasoning often lacks formal rigour and verifiability. At the same time, LLMs’ linguistic competence positions them as a promising bridge between natural and formal languages, opening up new opportunities for combining these two modes of reasoning.In this paper, we introduce PEIRCE, a neuro-symbolic framework designed to unify material and formal inference through an iterative conjecture–criticism process. Within this framework, LLMs play the central role of generating candidate solutions in natural and formal languages, which are then evaluated and refined via interaction with external critique models. These critiques include symbolic provers, which assess formal validity, as well as soft evaluators that measure the quality of the generated arguments along linguistic and epistemic dimensions such as plausibility, coherence, and parsimony. While PEIRCE is a general-purpose framework, we demonstrate its capabilities in the domain of natural language explanation generation - a setting that inherently demands both material adequacy and formal correctness.</abstract>
       <url hash="7b6f77ab">2025.acl-demo.2</url>
@@ -24593,10 +24593,10 @@
       <author><first>Geyu</first><last>Lin</last><affiliation>Institute of Infocomm Research, A*STAR</affiliation></author>
       <author><first>Shuo</first><last>Sun</last><affiliation>, A*STAR</affiliation></author>
       <author><first>Bin</first><last>Wang</last></author>
-      <author id="wenyu-zhang"><first>Wenyu</first><last>Zhang</last><affiliation>I2R, A*STAR</affiliation></author>
+      <author><first>Wenyu</first><last>Zhang</last><affiliation>I2R, A*STAR</affiliation></author>
       <author><first>Xunlong</first><last>Zou</last><affiliation>A*STAR</affiliation></author>
-      <author><first>Nancy F.</first><last>Chen</last></author>
-      <author><first>AiTi</first><last>Aw</last><affiliation>I2R</affiliation></author>
+      <author id="nancy-chen"><first>Nancy F.</first><last>Chen</last></author>
+      <author id="aiti-aw"><first>AiTi</first><last>Aw</last><affiliation>I2R</affiliation></author>
       <pages>22-30</pages>
       <abstract>We introduce MERaLiON-AudioLLM, the first general-purpose audio-based large language model designed for multitask learning, with a particular focus on Singlish understanding. Trained on 62 million multimodal instruction samples comprising a total of 260k hours of audio, it exhibits strong generalization across a diverse set of tasks, including—but not limited to—automatic speech recognition, spoken question answering, speech translation, and paralinguistic analysis. Our results show significant improvements in local speech recognition and task-specific understanding, making MERaLiON-AudioLLM a leading solution for region-specific AI applications. An interactive demo has been developed to enable user-friendly interactions, supported by a backend with customized caching and load-balancing mechanisms. We benchmark the model across a broad range of multilingual and multitask scenarios, where it demonstrates competitive performance compared to other open-source models. The demo page, model weights and videos are publically accessible.</abstract>
       <url hash="e85c0f56">2025.acl-demo.3</url>
@@ -24643,7 +24643,7 @@
       <author><first>Tao</first><last>Gui</last><affiliation>Fudan University</affiliation></author>
       <author><first>Xipeng</first><last>Qiu</last><affiliation>Fudan University</affiliation></author>
       <author><first>Qi</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>40-50</pages>
       <abstract>We introduce MPLSandbox, an out-of-the-box multi-programming language sandbox designed to provide unified and comprehensive feedback from compiler and analysis tools for Large Language Models (LLMs). It can automatically identify the programming language of the code, compiling and executing it within an isolated sub-sandbox to ensure safety and stability. In addition, MPLSandbox integrates both traditional and LLM-based code analysis tools, providing a comprehensive analysis of generated code. It also can be effortlessly integrated into the training and deployment of LLMs to improve the quality and correctness of generated code. It also helps researchers streamline their workflows for various LLM-based code-related tasks, reducing the development cost. To validate the effectiveness of MPLSandbox, we conduct extensive experiments by integrating it into several training and deployment scenarios, and employing it to optimize workflows for a wide range of downstream code tasks. Our goal is to enhance researcher productivity on LLM-based code tasks by simplifying and automating workflows through delegation to MPLSandbox.</abstract>
       <url hash="cb7d56d5">2025.acl-demo.5</url>
@@ -24657,7 +24657,7 @@
       <author><first>Yesheng</first><last>Liu</last></author>
       <author><first>Jing-Shu</first><last>Zheng</last></author>
       <author><first>Xuejing</first><last>Li</last></author>
-      <author><first>Jin-Ge</first><last>Yao</last><affiliation>BAAI</affiliation></author>
+      <author id="jin-ge-yao"><first>Jin-Ge</first><last>Yao</last><affiliation>BAAI</affiliation></author>
       <author><first>Bowen</first><last>Qin</last><affiliation>Beijing Academy of Artificial Intelligence</affiliation></author>
       <author><first>Richeng</first><last>Xuan</last></author>
       <author><first>Xi</first><last>Yang</last><affiliation>Beijing Academy of Artificial Intelligence</affiliation></author>
@@ -24699,7 +24699,7 @@
     </paper>
     <paper id="9">
       <title>Textagon: Boosting Language Models with Theory-guided Parallel Representations</title>
-      <author><first>John P.</first><last>Lalor</last><affiliation>University of Notre Dame</affiliation></author>
+      <author id="john-p-lalor"><first>John P.</first><last>Lalor</last><affiliation>University of Notre Dame</affiliation></author>
       <author><first>Ruiyang</first><last>Qin</last></author>
       <author><first>David</first><last>Dobolyi</last><affiliation>University of Colorado at Boulder</affiliation></author>
       <author><first>Ahmed</first><last>Abbasi</last><affiliation>University of Notre Dame</affiliation></author>
@@ -24793,7 +24793,7 @@
       <author><first>Hanna</first><last>Suominen</last><affiliation>Australian National University</affiliation></author>
       <author><first>Lois Yinghui</first><last>Hong</last></author>
       <author><first>Nick</first><last>Thieberger</last><affiliation>University of Melbourne</affiliation></author>
-      <author><first>Trevor</first><last>Cohn</last><affiliation>Google and The University of Melbourne</affiliation></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last><affiliation>Google and The University of Melbourne</affiliation></author>
       <author><first>Ekaterina</first><last>Vylomova</last><affiliation>The University of Melbourne</affiliation></author>
       <pages>129-139</pages>
       <abstract>Machine translation (MT) systems that support low-resource languages often struggle on specialized domains. While researchers have proposed various techniques for domain adaptation, these approaches typically require model fine-tuning, making them impractical for non-technical users and small organizations. To address this gap, we propose Tulun, a versatile solution for terminology-aware translation, combining neural MT with large language model (LLM)-based post-editing guided by existing glossaries and translation memories.Our open-source web-based platform enables users to easily create, edit, and leverage terminology resources, fostering a collaborative human-machine translation process that respects and incorporates domain expertise while increasing MT accuracy.Evaluations show effectiveness in both real-world and benchmark scenarios: on medical and disaster relief translation tasks for Tetun and Bislama, our system achieves improvements of 16.90-22.41 ChrF++ points over baseline MT systems. Across six low-resource languages on the FLORES dataset, Tulun outperforms both standalone MT and LLM approaches, achieving an average improvement of 2.8 ChrF++ points over NLLB-54B. Tulun is publicly accessible at https://bislama-trans.rapha.dev.</abstract>
@@ -24834,7 +24834,7 @@
       <author><first>Wenhao</first><last>Zheng</last></author>
       <author><first>Zhengzhong</first><last>Liu</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <author><first>Hongyi</first><last>Wang</last><affiliation>Rutgers University and GenBio AI</affiliation></author>
-      <author><first>Eric P.</first><last>Xing</last><affiliation>Mohamed bin Zayed Univeristy of AI and School of Computer Science, Carnegie Mellon University</affiliation></author>
+      <author id="eric-xing"><first>Eric P.</first><last>Xing</last><affiliation>Mohamed bin Zayed Univeristy of AI and School of Computer Science, Carnegie Mellon University</affiliation></author>
       <author><first>Huaxiu</first><last>Yao</last><affiliation>Department of Computer Science, University of North Carolina at Chapel Hill</affiliation></author>
       <author><first>Qirong</first><last>Ho</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and Petuum, Inc.</affiliation></author>
       <pages>159-166</pages>
@@ -24882,7 +24882,7 @@
       <author><first>Dirk</first><last>Groeneveld</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Rock Yuren</first><last>Pang</last></author>
       <author><first>Pang Wei</first><last>Koh</last><affiliation>Allen Institute for Artificial Intelligence and University of Washington</affiliation></author>
-      <author><first>Noah A.</first><last>Smith</last><affiliation>University of Washington and Allen Institute for Artificial Intelligence</affiliation></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last><affiliation>University of Washington and Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Sophie</first><last>Lebrecht</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Yejin</first><last>Choi</last><affiliation>Computer Science Department, Stanford University and NVIDIA</affiliation></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last></author>
@@ -24996,7 +24996,7 @@
       <title><fixed-case>G</fixed-case>en<fixed-case>GO</fixed-case> Ultra: an <fixed-case>LLM</fixed-case>-powered <fixed-case>ACL</fixed-case> Paper Explorer</title>
       <author><first>Sotaro</first><last>Takeshita</last><affiliation>Universität Mannheim</affiliation></author>
       <author><first>Tornike</first><last>Tsereteli</last><affiliation>Universität Mannheim</affiliation></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last><affiliation>Universität Mannheim</affiliation></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last><affiliation>Universität Mannheim</affiliation></author>
       <pages>242-251</pages>
       <abstract>The ever-growing number of papers in natural language processing (NLP) poses the challenge of finding relevant papers. In our previous paper, we introduced GenGO, which complements NLP papers with various information, such as aspect-based summaries, to enable efficient paper exploration. While it delivers a better literature search experience, it lacks an interactive interface that dynamically produces information tailored to the user’s needs. To this end, we present an extension to our previous system, dubbed GenGO Ultra, which exploits large language models (LLMs) to dynamically generate responses grounded by published papers. We also conduct multi-granularity experiments to evaluate six text encoders and five LLMs. Our system is designed for transparency – based only on open-weight models, visible system prompts, and an open-source code base – to foster further development and research on top of our system: https://gengo-ultra.sotaro.io/</abstract>
       <url hash="3d41f975">2025.acl-demo.24</url>
@@ -25024,7 +25024,7 @@
       <author><first>Du Q.</first><last>Huynh</last></author>
       <author><first>Mark</first><last>Reynolds</last><affiliation>University of Western Australia</affiliation></author>
       <author><first>Yuanyi</first><last>Luo</last></author>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last><affiliation>University of Western Australia</affiliation></author>
+      <author><first>Wei</first><last>Liu</last><affiliation>University of Western Australia</affiliation></author>
       <pages>267-274</pages>
       <abstract>Acquiring structured data from domain-specific, image-based documents—such as scanned reports—is crucial for many downstream tasks but remains challenging due to document variability. Many of these documents exist as images rather than as machine-readable text, which requires human annotation to train automated extraction systems.We present <b>DocSpiral</b>, the first Human-in-the-Spiral assistive document annotation platform, designed to address the challenge of extracting structured information from domain-specific, image-based document collections.Our spiral design establishes an iterative cycle in which human annotations train models that progressively require less manual intervention. <b>DocSpiral</b> integrates document format normalization, comprehensive annotation interfaces, evaluation metrics dashboard, and API endpoints for the development of AI / ML models into a unified workflow.Experiments demonstrate that our framework reduces annotation time by at least 41% while showing consistent performance gains across three iterations during model training.By making this annotation platform freely accessible, we aim to lower barriers to AI/ML models development in document processing, facilitating the adoption of large language models in image-based, document-intensive fields such as geoscience and healthcare. The system is freely available at: <url>https://app.ai4wa.com</url>. The demonstration video is available: <url>https://app.ai4wa.com/docs/docspiral/demo</url>.</abstract>
       <url hash="9afa4abc">2025.acl-demo.26</url>
@@ -25074,7 +25074,7 @@
       <author><first>Rita</first><last>Sevastjanova</last><affiliation>ETHZ - ETH Zurich</affiliation></author>
       <author><first>Rebecca</first><last>Kehlbeck</last><affiliation>Universität Konstanz</affiliation></author>
       <author><first>Tobias</first><last>Stähle</last></author>
-      <author><first>Daniel A.</first><last>Keim</last><affiliation>Universität Konstanz</affiliation></author>
+      <author id="daniel-keim"><first>Daniel A.</first><last>Keim</last><affiliation>Universität Konstanz</affiliation></author>
       <author><first>Oliver</first><last>Deussen</last><affiliation>University of Konstanz</affiliation></author>
       <author><first>Andreas</first><last>Spitz</last><affiliation>Universität Konstanz</affiliation></author>
       <author><first>Mennatallah</first><last>El-Assady</last><affiliation>Department of Computer Science, ETHZ - ETH Zurich</affiliation></author>
@@ -25117,7 +25117,7 @@
       <author><first>Dimitra</first><last>Zografistou</last><affiliation>Independent</affiliation></author>
       <author><first>Yohan</first><last>Jo</last><affiliation>Seoul National University</affiliation></author>
       <author><first>John</first><last>Lawrence</last><affiliation>University of Dundee</affiliation></author>
-      <author><first>Chris</first><last>Reed</last><affiliation>University of Dundee</affiliation></author>
+      <author id="chris-reed"><first>Chris</first><last>Reed</last><affiliation>University of Dundee</affiliation></author>
       <pages>318-328</pages>
       <abstract>Despite extensive research in Argument Mining (AM), the field faces significant challenges in limited reproducibility, difficulty in comparing systems due to varying task combinations, and a lack of interoperability caused by the heterogeneous nature of argumentation theory. These challenges are further exacerbated by the absence of dedicated tools, with most advancements remaining isolated research outputs rather than reusable systems. The <tex-math>\texttt{oAMF}</tex-math> (Open Argument Mining Framework) addresses these issues by providing an open-source, modular, and scalable platform that unifies diverse AM methods. Initially released with seventeen integrated modules, the <tex-math>\texttt{oAMF}</tex-math> serves as a starting point for researchers and developers to build, experiment with, and deploy AM pipelines while ensuring interoperability and allowing multiple theories of argumentation to co-exist within the same framework. Its flexible design supports integration via Python APIs, drag-and-drop tools, and web interfaces, streamlining AM development for research and industry setup, facilitating method comparison, and reproducibility.</abstract>
       <url hash="7ed4660b">2025.acl-demo.31</url>
@@ -25129,7 +25129,7 @@
       <title>Bel Esprit: Multi-Agent Framework for Building <fixed-case>AI</fixed-case> Model Pipelines</title>
       <author><first>Yunsu</first><last>Kim</last><affiliation>aiXplain, Inc.</affiliation></author>
       <author><first>Ahmedelmogtaba</first><last>Abdelaziz</last></author>
-      <author><first>Thiago</first><last>Castro Ferreira</last><affiliation>Universidade Federal de Minas Gerais</affiliation></author>
+      <author id="thiago-castro-ferreira"><first>Thiago</first><last>Castro Ferreira</last><affiliation>Universidade Federal de Minas Gerais</affiliation></author>
       <author><first>Mohamed</first><last>Al-Badrashiny</last><affiliation>aiXplain</affiliation></author>
       <author><first>Hassan</first><last>Sawaf</last><affiliation>aiXplain</affiliation></author>
       <pages>329-339</pages>
@@ -25169,7 +25169,7 @@
       <title>Dialz: A Python Toolkit for Steering Vectors</title>
       <author><first>Zara</first><last>Siddique</last></author>
       <author><first>Liam</first><last>Turner</last><affiliation>Cardiff University</affiliation></author>
-      <author><first>Luis</first><last>Espinosa-Anke</last><affiliation>Cardiff University and AMPLYFI</affiliation></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa-Anke</last><affiliation>Cardiff University and AMPLYFI</affiliation></author>
       <pages>363-375</pages>
       <abstract>We introduce *Dialz*, a Python library for advancing research on steering vectors for open-source LMs. Steering vectors allow users to modify activations at inference time to amplify or weaken a ‘concept’, e.g. honesty or positivity, providing a more powerful alternative to prompting or fine-tuning. Dialz supports a diverse set of tasks, including creating contrastive pair datasets, computing and applying steering vectors, and visualizations. Unlike existing libraries, Dialz emphasizes modularity and usability, enabling both rapid prototyping and in-depth analysis. We demonstrate how Dialz can be used to reduce harmful outputs such as stereotypes, while also providing insights into model behaviour across different layers. We release Dialz with full documentation, tutorials, and support for popular open-source models to encourage further research in safe and controllable language generation. Dialz enables faster research cycles and facilitates insights into model interpretability, paving the way for safer, more transparent, and more reliable AI systems.</abstract>
       <url hash="a24e73be">2025.acl-demo.35</url>
@@ -25180,7 +25180,7 @@
     <paper id="36">
       <title><fixed-case>FORG</fixed-case>3<fixed-case>D</fixed-case>: Flexible Object Rendering for Generating Vision-Language Spatial Reasoning Data from 3<fixed-case>D</fixed-case> Scenes</title>
       <author><first>Oscar</first><last>Pang</last><affiliation>Vector Institute and University of Toronto, Scarborough</affiliation></author>
-      <author><first>Freda</first><last>Shi</last><affiliation>University of Waterloo and Vector Institute</affiliation></author>
+      <author id="freda-shi"><first>Freda</first><last>Shi</last><affiliation>University of Waterloo and Vector Institute</affiliation></author>
       <pages>376-384</pages>
       <abstract>We introduce FORG3D, a 3D rendering toolkit developed with Blender and Python, which synthesizes vision-language data for two primary purposes: (1) supporting human cognitive experiments that require fine-grained control over material and (2) analyzing and improving the visual reasoning capabilities of large vision-language models. The toolkit provides flexible and precise control over object placement, orientation, inter-object distances, and camera configurations while automatically generating detailed spatial metadata. Additionally, it includes a built-in feature for integrating AI-generated backgrounds, enhancing the realism of synthetic scenes. FORG3D is publicly available at https://github.com/compling-wat/FORG3D, and a video demonstration is available at https://www.youtube.com/watch?v=QvIqib_PU8A.</abstract>
       <url hash="c067ee37">2025.acl-demo.36</url>
@@ -25211,7 +25211,7 @@
       <author><first>Tatsuya</first><last>Ishigaki</last><affiliation>AIST, National Institute of Advanced Industrial Science and Technology</affiliation></author>
       <author><first>Kotaro</first><last>Funakoshi</last><affiliation>Institute of Science Tokyo</affiliation></author>
       <author><first>Hiroya</first><last>Takamura</last><affiliation>AIST, National Institute of Advanced Industrial Science and Technology</affiliation></author>
-      <author><first>Manabu</first><last>Okumura</last><affiliation>Institute of Science Tokyo and Tokyo Institute of Technology, Tokyo Institute of Technology</affiliation></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last><affiliation>Institute of Science Tokyo and Tokyo Institute of Technology, Tokyo Institute of Technology</affiliation></author>
       <pages>394-404</pages>
       <abstract>Previous research on sports commentary generation has primarily focused on describing major events in the match.However, real-world commentary often includes comments beyond what is visible in the video content, e.g., “Florentina has acquired him for 7 million euros.”For enhancing the viewing experience with such background information,we developed an audio commentary system for football matches that generates utterances with background information, as well as play-by-play commentary.Our system first extracts visual information, and determines whether it is an appropriate timing to produce an utterance.Then it decides which type of utterance to generate: play-by-play or background information. In the latter case, the system leverages external knowledge through retrieval-augmented generation.</abstract>
       <url hash="08030500">2025.acl-demo.38</url>
@@ -25260,8 +25260,8 @@
       <author><first>Shafiuddin Rehan</first><last>Ahmed</last></author>
       <author><first>Abijith Trichur</first><last>Ramachandran</last></author>
       <author><first>Jeffrey</first><last>Flanigan</last><affiliation>University of California, Santa Cruz</affiliation></author>
-      <author><first>Martha</first><last>Palmer</last><affiliation>University of Colorado at Boulder</affiliation></author>
-      <author><first>James</first><last>Martin</last><affiliation>University of Colorado at Boulder</affiliation></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last><affiliation>University of Colorado at Boulder</affiliation></author>
+      <author id="james-h-martin"><first>James</first><last>Martin</last><affiliation>University of Colorado at Boulder</affiliation></author>
       <pages>426-435</pages>
       <abstract>In this paper, we present LiDARR (**Li**nking **D**ocument **A**MRs with **R**eferents **R**esolvers), a web tool for semantic annotation at the document level using the formalism of Abstract Meaning Representation (AMR). LiDARR streamlines the creation of comprehensive knowledge graphs from natural language documents through semantic annotation. The tool features a visualization and interactive user interface, transforming document-level AMR annotation into an models-facilitated verification process. This is achieved through the integration of an AMR-to-surface alignment model and a coreference resolution model. Additionally, we incorporate PropBank rolesets into LiDARR to extend implicit roles in annotated AMR, allowing implicit roles to be linked through the coreference chains via AMRs.</abstract>
       <url hash="2cad24b8">2025.acl-demo.41</url>
@@ -25276,7 +25276,7 @@
       <author><first>Seunghyun</first><last>Yoon</last><affiliation>Adobe Research</affiliation></author>
       <author><first>Viet Dac</first><last>Lai</last><affiliation>Adobe Systems</affiliation></author>
       <author><first>Franck</first><last>Dernoncourt</last></author>
-      <author><first>Trung</first><last>Bui</last><affiliation>Adobe Research</affiliation></author>
+      <author id="trung-bui"><first>Trung</first><last>Bui</last><affiliation>Adobe Research</affiliation></author>
       <pages>436-447</pages>
       <abstract>While small language models (SLMs) show promises for mobile deployment, their real world performance and applications on smartphones remain underexplored. We present SlimLM, a series of SLMs optimized for document assistance tasks on mobile devices. Through extensive experiments on a Samsung Galaxy S24, we identify the sweet spot between model size (ranging from 125M to 8B parameters), context length, and inference time for efficient on-device processing. SlimLM is pretrained on SlimPajama-627B and fine-tuned on DocAssist, our constructed dataset for summarization, question answering, and suggestion tasks. Our smallest model demonstrates efficient performance on S24, while larger variants offer enhanced capabilities within mobile constraints. We evaluate SlimLM against existing SLMs, showing comparable or superior performance and offering a benchmark for future research in on-device language models. We provide an Android application allowing users to experience SlimLM’s document assistance capabilities, offering valuable insights for mobile developers, researchers, and companies seeking privacy-preserving on-device alternatives to server-based language models.</abstract>
       <url hash="53901f14">2025.acl-demo.42</url>
@@ -25354,14 +25354,14 @@
     <paper id="48">
       <title><fixed-case>C</fixed-case>ode<fixed-case>A</fixed-case>rena: A Collective Evaluation Platform for <fixed-case>LLM</fixed-case> Code Generation</title>
       <author><first>Mingzhe</first><last>Du</last><affiliation>Nanyang Technological University and National University of Singapore</affiliation></author>
-      <author><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Bin</first><last>Ji</last><affiliation>National University of Defense Technology</affiliation></author>
       <author><first>Xiaobao</first><last>Wu</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Yuhao</first><last>Qing</last><affiliation>The University of Hong Kong</affiliation></author>
       <author><first>Dong</first><last>Huang</last></author>
       <author><first>Terry Yue</first><last>Zhuo</last><affiliation>Commonwealth Scientific and Industrial Research Organisation, CSIRO</affiliation></author>
       <author><first>Qian</first><last>Liu</last><affiliation>Tiktok</affiliation></author>
-      <author><first>See-Kiong</first><last>Ng</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="see-kiong-ng"><first>See-Kiong</first><last>Ng</last><affiliation>National University of Singapore</affiliation></author>
       <pages>502-512</pages>
       <abstract>Large Language Models (LLMs) have reshaped code generation by synergizing their exceptional comprehension of natural language and programming syntax, thereby substantially boosting developer productivity. These advancements have prompted numerous efforts to quantitatively evaluate their coding capabilities. However, persistent challenges, such as benchmark leakage, data dissipation, and limited system accessibility, continue to impede a timely and accurate assessment. To address these limitations, we introduce CodeArena, an online evaluation framework tailored for LLM code generation. Its key innovation is a collective evaluation mechanism, which dynamically recalibrates individual model scores based on the holistic performance of all participating models, mitigating score biases caused by widespread benchmark leakage. In addition, CodeArena ensures open access to all submitted solutions and test cases and provides automation-friendly APIs to streamline the code evaluation workflow. Our main contributions are: (1) a collective evaluation system for unbiased assessment, (2) a public repository of solutions and test cases, and (3) automation-ready APIs for seamless integration.</abstract>
       <url hash="b9d340d4">2025.acl-demo.48</url>
@@ -25389,7 +25389,7 @@
     </paper>
     <paper id="50">
       <title>gec-metrics: A Unified Library for Grammatical Error Correction Evaluation</title>
-      <author><first>Takumi</first><last>Goto</last><affiliation>Nara Institute of Science and Technology, Japan</affiliation></author>
+      <author id="takumi-goto"><first>Takumi</first><last>Goto</last><affiliation>Nara Institute of Science and Technology, Japan</affiliation></author>
       <author><first>Yusuke</first><last>Sakai</last><affiliation>Nara Institute of Science and Technology, Japan</affiliation></author>
       <author><first>Taro</first><last>Watanabe</last><affiliation>Nara Institute of Science and Technology, Japan</affiliation></author>
       <pages>524-534</pages>
@@ -25478,7 +25478,7 @@
       <author><first>Zheqi</first><last>He</last><affiliation>Beijing Academy of Artificial Intelligence</affiliation></author>
       <author><first>Tongshuai.ren</first><last>Tongshuai.ren</last></author>
       <author><first>Xuejing</first><last>Li</last></author>
-      <author><first>Jin-Ge</first><last>Yao</last><affiliation>BAAI</affiliation></author>
+      <author id="jin-ge-yao"><first>Jin-Ge</first><last>Yao</last><affiliation>BAAI</affiliation></author>
       <author><first>Xi</first><last>Yang</last><affiliation>Beijing Academy of Artificial Intelligence</affiliation></author>
       <pages>583-591</pages>
       <abstract>We introduce FlagEval-Arena, an evaluation platform for side-by-side comparisons of large language models and text-driven AIGC systems.Compared with the well-known LM Arena (LMSYS Chatbot Arena), we reimplement our own framework with the flexibility to introduce new mechanisms or features. Our platform enables side-by-side evaluation not only for language models or vision-language models, but also text-to-image or text-to-video synthesis. We specifically target at Chinese audience with a more focus on the Chinese language, more models developed by Chinese institutes, and more general usage beyond the technical community. As a result, we currently observe very interesting differences from usual results presented by LM Arena. Our platform is available via this URL: <url>https://flageval.baai.org/#/arena</url>.</abstract>
@@ -25698,7 +25698,7 @@
       <title><fixed-case>HYPEROFA</fixed-case>: Expanding <fixed-case>LLM</fixed-case> Vocabulary to New Languages via Hypernetwork-Based Embedding Initialization</title>
       <author><first>Enes</first><last>Özeren</last><affiliation>University of Munich, Ludwig-Maximilians-Universität München</affiliation></author>
       <author><first>Yihong</first><last>Liu</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <pages>79-96</pages>
       <abstract>Many pre-trained language models (PLMs) exhibit suboptimal performance on mid- and low-resource languages, largely due to limited exposure to these languages during pre-training. A common strategy to address this is to introduce new tokens specific to the target languages, initialize their embeddings, and apply continual pre-training on target-language data. Among such methods, OFA (Liu et al., 2024a) proposes a similarity-based subword embedding initialization heuristic that is both effective and efficient. However, OFA restricts target-language token embeddings to be convex combinations of a fixed number of source-language embeddings, which may limit expressiveness. To overcome this limitation, we propose HYPEROFA, a hypernetwork-based approach for more adaptive token embedding initialization. The hypernetwork is trained to map from an external multilingual word vector space to the PLM’s token embedding space using source-language tokens. Once trained, it can generate flexible embeddings for target-language tokens, serving as a good starting point for continual pretraining. Experiments demonstrate that HYPEROFA consistently outperforms random initialization baseline and matches or exceeds the performance of OFA in both continual pre-training convergence and downstream task performance. We make the code publicly available.</abstract>
       <url hash="a0654e3b">2025.acl-srw.6</url>
@@ -25803,7 +25803,7 @@
       <title>Your Pretrained Model Tells the Difficulty Itself: A Self-Adaptive Curriculum Learning Paradigm for Natural Language Understanding</title>
       <author><first>Qi</first><last>Feng</last></author>
       <author><first>Yihong</first><last>Liu</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <pages>222-239</pages>
       <abstract>Curriculum learning is a widely adopted training strategy in natural language processing (NLP), where models are exposed to examples organized by increasing difficulty to enhance learning efficiency and performance. However, most existing approaches rely on manually defined difficulty metrics – such as text length – which may not accurately reflect the model’s own perspective. To overcome this limitation, we present a self-adaptive curriculum learning paradigm that prioritizes fine-tuning examples based on difficulty scores predicted by pre-trained language models (PLMs) themselves. Building on these scores, we explore various training strategies that differ in the ordering of examples for the fine-tuning: from easy-to-hard, hard-to-easy, to mixed sampling. We evaluate our method on four natural language understanding (NLU) datasets covering both binary and multi-class classification tasks.Experimental results show that our approach leads to faster convergence and improved performance compared to standard random sampling.</abstract>
       <url hash="abe9e4ce">2025.acl-srw.15</url>
@@ -25934,7 +25934,7 @@
       <author><first>Junyoung</first><last>Son</last></author>
       <author><first>Sungjin</first><last>Park</last></author>
       <author><first>Chanjun</first><last>Park</last><affiliation>Korea University</affiliation></author>
-      <author><first>Heuiseok</first><last>Lim</last></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last></author>
       <pages>422-433</pages>
       <abstract>Retrieval-Augmented Generation (RAG) has emerged as a crucial framework in natural language processing (NLP), improving factual consistency and reducing hallucinations by integrating external document retrieval with large language models (LLMs). However, the effectiveness of RAG is often hindered by coreferential complexity in retrieved documents, which can introduce ambiguity and interfere with in-context learning. In this study, we systematically investigate how entity coreference affects both document retrieval and generative performance in RAG-based systems, focusing on retrieval relevance, contextual understanding, and overall response quality. We demonstrate that coreference resolution enhances retrieval effectiveness and improves question-answering (QA) performance. Through comparative analysis of different pooling strategies in retrieval tasks, we find that mean pooling demonstrates superior context capturing ability after applying coreference resolution. In QA tasks, we discover that smaller models show greater improvement from the disambiguation process, likely due to their limited inherent capacity for handling referential ambiguity. With these findings, this study aims to provide a deeper understanding of the challenges posed by coreferential complexity in RAG, offering guidance for improving retrieval and generation in knowledge-intensive AI applications.</abstract>
       <url hash="671efdf9">2025.acl-srw.27</url>
@@ -26041,7 +26041,7 @@
       <author><first>Laura</first><last>Zeidler</last><affiliation>niversity of Technology Nuremberg</affiliation></author>
       <author><first>Chris</first><last>Jenkins</last><affiliation>University of Stuttgart, Universität Stuttgart</affiliation></author>
       <author><first>Filip</first><last>Miletić</last><affiliation>University of Stuttgart</affiliation></author>
-      <author><first>Sabine</first><last>Schulte Im Walde</last><affiliation>University of Stuttgart</affiliation></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte Im Walde</last><affiliation>University of Stuttgart</affiliation></author>
       <pages>539-547</pages>
       <abstract>The task of automatic dialect classification is typically tackled using traditional machine-learning models with bag-of-words unigram features. We explore two alternative methods for distinguishing dialects across 20 Spanish-speaking countries:(i) Support vector machine and decision tree models were trained on dialectal features tailored to the Spanish dialects, combined with standard unigrams. (ii) A pre-trained BERT model was fine-tuned on the task.Results show that the tailored features generally did not have a positive impact on traditional model performance, but provide a salient way of representing dialects in a content-agnostic manner. The BERT model wins over traditional models but with only a tiny margin, while sacrificing explainability and interpretability.</abstract>
       <url hash="bb398842">2025.acl-srw.36</url>
@@ -26063,7 +26063,7 @@
     <paper id="38">
       <title>A Dual-Layered Evaluation of Geopolitical and Cultural Bias in <fixed-case>LLM</fixed-case>s</title>
       <author><first>Sean</first><last>Kim</last><affiliation>Seoul National University</affiliation></author>
-      <author><first>Hyuhng Joon</first><last>Kim</last><affiliation>Seoul National University</affiliation></author>
+      <author id="hyuhng-joon-kim"><first>Hyuhng Joon</first><last>Kim</last><affiliation>Seoul National University</affiliation></author>
       <pages>580-595</pages>
       <abstract>As large language models (LLMs) are increasingly deployed across diverse linguistic and cultural contexts, understanding their behavior in both factual and disputable scenarios is essential—especially when their outputs may shape public opinion or reinforce dominant narratives. In this paper, we define two types of bias in LLMs: model bias (bias stemming from model training) and inference bias (bias induced by the language of the query), through a two-phase evaluation.Phase 1 evaluates LLMs on factual questions where a single verifiable answer exists, assessing whether models maintain consistency across different query languages. Phase 2 expands the scope by probing geopolitically sensitive disputes, where responses may reflect culturally embedded or ideologically aligned perspectives. We construct a manually curated dataset spanning both factual and disputable QA, across four languages and question types. The results show that Phase 1 exhibits query language-induced alignment, while Phase 2 reflects an interplay between the model’s training context and query language. This paper offers a structured framework for evaluating LLM behavior across neutral and sensitive topics, providing insights for future LLM deployment and culturally-aware evaluation practices in multilingual contexts.WARNING: this paper covers East Asian issues which may be politically sensitive.</abstract>
       <url hash="3d0ed93d">2025.acl-srw.38</url>
@@ -26078,7 +26078,7 @@
       <author><first>Narumi</first><last>Tokunaga</last></author>
       <author><first>Yuki</first><last>Yamagata</last></author>
       <author><first>Kouji</first><last>Kozaki</last><affiliation>Osaka Electro-Communication University</affiliation></author>
-      <author><first>Yuji</first><last>Matsumoto</last><affiliation>RIKEN Center for Advanced Intelligence Project</affiliation></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last><affiliation>RIKEN Center for Advanced Intelligence Project</affiliation></author>
       <pages>596-607</pages>
       <abstract>Recognizing biomedical concepts in the text is vital for ontology refinement, knowledge graph construction, and concept relationship discovery. However, traditional concept recognition methods, relying on explicit mention identification, often fail to capture complex concepts not explicitly stated in the text. To overcome this limitation, we introduce MA-COIR, a framework that reformulates concept recognition as an indexing-recognition task. By assigning semantic search indexes (ssIDs) to concepts, MA-COIR resolves ambiguities in ontology entries and enhances recognition efficiency. Using a pretrained BART-based model fine-tuned on small datasets, our approach reduces computational requirements to facilitate adoption by domain experts. Furthermore, we incorporate large language model (LLM)-generated queries and synthetic data to improve recognition in low-resource settings. Experimental results on three scenarios (CDR, HPO, and HOIP) highlight the effectiveness of MA-COIR in recognizing both explicit and implicit concepts without the need for mention-level annotations during inference, advancing ontology-driven concept recognition in biomedical domain applications. Our code and constructed data are available at https://github.com/sl-633/macoir-master.</abstract>
       <url hash="f925460b">2025.acl-srw.39</url>
@@ -26146,7 +26146,7 @@
     </paper>
     <paper id="45">
       <title>Fact from Fiction: Finding Serialized Novels in Newspapers</title>
-      <author><first>Pascale</first><last>Feldkamp</last></author>
+      <author id="pascale-feldkamp"><first>Pascale</first><last>Feldkamp</last></author>
       <author><first>Alie</first><last>Lassche</last></author>
       <author><first>Katrine Frøkjær</first><last>Baunvig</last><affiliation>NA</affiliation></author>
       <author><first>Kristoffer</first><last>Nielbo</last><affiliation>Aarhus University</affiliation></author>
@@ -26207,7 +26207,7 @@
       <author><first>Radu</first><last>Jianu</last><affiliation>NA</affiliation></author>
       <author><first>Aidan</first><last>Slingsby</last><affiliation>NA</affiliation></author>
       <author><first>Jo</first><last>Wood</last><affiliation>City University</affiliation></author>
-      <author><first>Pranava</first><last>Madhyastha</last><affiliation>City, University of London</affiliation></author>
+      <author id="pranava-swaroop-madhyastha"><first>Pranava</first><last>Madhyastha</last><affiliation>City, University of London</affiliation></author>
       <pages>760-773</pages>
       <abstract>We present a new dataset for chart question answering (CQA) constructed from visualization notebooks. The dataset features real-world, multi-view charts paired with natural language questions grounded in analytical narratives. Unlike prior benchmarks, our data reflects ecologically valid reasoning workflows. Benchmarking state-of-the-art multimodal large language models reveals a significant performance gap, with GPT-4.1 achieving an accuracy of 69.3%, underscoring the challenges posed by this more authentic CQA setting.</abstract>
       <url hash="02d2698f">2025.acl-srw.50</url>
@@ -26280,7 +26280,7 @@
     <paper id="59">
       <title>Learning and Enforcing Context-Sensitive Control for <fixed-case>LLM</fixed-case>s</title>
       <author><first>Mohammad</first><last>Albinhassan</last></author>
-      <author><first>Pranava</first><last>Madhyastha</last><affiliation>City, University of London</affiliation></author>
+      <author id="pranava-swaroop-madhyastha"><first>Pranava</first><last>Madhyastha</last><affiliation>City, University of London</affiliation></author>
       <author><first>Mark</first><last>Law</last></author>
       <author><first>Alessandra</first><last>Russo</last><affiliation>Imperial College London</affiliation></author>
       <pages>834-842</pages>
@@ -26308,7 +26308,7 @@
       <author><first>Julian</first><last>Schlenker</last><affiliation>Universität Mannheim</affiliation></author>
       <author><first>Jenny</first><last>Kunz</last><affiliation>Linköping University</affiliation></author>
       <author><first>Tatiana</first><last>Anikina</last><affiliation>German Research Center for AI</affiliation></author>
-      <author><first>Günter</first><last>Neumann</last><affiliation>German Research Center for AI</affiliation></author>
+      <author id="gunter-neumann"><first>Günter</first><last>Neumann</last><affiliation>German Research Center for AI</affiliation></author>
       <author><first>Simon</first><last>Ostermann</last><affiliation>German Research Center for AI</affiliation></author>
       <pages>849-871</pages>
       <abstract>Most state-of-the-art large language models (LLMs) are trained mainly on English data, limiting their effectiveness on non-English, especially low-resource, languages. This study investigates whether language adapters can facilitate cross-lingual transfer in English-centric LLMs. We train language adapters for 13 languages using Llama 2 (7B) and Llama 3.1 (8B) as base models, and evaluate their effectiveness on two downstream tasks (MLQA and SIB-200) using either task adapters or in-context learning. Our results reveal that language adapters improve performance for languages not seen during pretraining, but provide negligible benefit for seen languages. These findings highlight the limitations of language adapters as a general solution for multilingual adaptation in English-centric LLMs.</abstract>
@@ -26443,7 +26443,7 @@
     </paper>
     <paper id="77">
       <title>Improving Explainability of Sentence-level Metrics via Edit-level Attribution for Grammatical Error Correction</title>
-      <author><first>Takumi</first><last>Goto</last><affiliation>Nara Institute of Science and Technology, Japan</affiliation></author>
+      <author id="takumi-goto"><first>Takumi</first><last>Goto</last><affiliation>Nara Institute of Science and Technology, Japan</affiliation></author>
       <author><first>Justin</first><last>Vasselli</last></author>
       <author><first>Taro</first><last>Watanabe</last><affiliation>Nara Institute of Science and Technology, Japan</affiliation></author>
       <pages>1004-1015</pages>
@@ -26685,7 +26685,7 @@
       <author><first>Roman</first><last>Vashurin</last><affiliation>MBZUAI</affiliation></author>
       <author><first>Artem</first><last>Vazhentsev</last><affiliation>Skoltech/AIRI</affiliation></author>
       <author><first>Ekaterina</first><last>Fadeeva</last><affiliation>ETH Zürich</affiliation></author>
-      <author><first>Timothy</first><last>Baldwin</last><affiliation>MBZUAI</affiliation></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last><affiliation>MBZUAI</affiliation></author>
       <pages>3-4</pages>
       <abstract>Large language models (LLMs) are widely used in NLP applications, but their tendency to produce hallucinations poses significant challenges to the reliability and safety, ultimately undermining user trust. This tutorial offers the first systematic introduction to uncertainty quantification (UQ) for LLMs in text generation tasks – a conceptual and methodological framework that provides tools for communicating the reliability of a model answer. This additional output could be leveraged for a range of downstream tasks, including hallucination detection and selective generation. We begin with the theoretical foundations of uncertainty, highlighting why techniques developed for classification might fall short in text generation. Building on this grounding, we survey state-of-the-art white-box and black-box UQ methods, from simple entropy-based scores to supervised probes over hidden states and attention weights, and show how they enable selective generation and hallucination detection. Additionally, we discuss the calibration of uncertainty scores for better interpretability. A key feature of the tutorial is practical examples using LM-Polygraph, an open-source framework that unifies more than a dozen recent UQ and calibration algorithms and provides a large-scale benchmark, allowing participants to implement UQ in their applications, as well as reproduce and extend experimental results with only a few lines of code. By the end of the session, researchers and practitioners will be equipped to (i) evaluate and compare existing UQ techniques, (ii) develop new methods, and (iii) implement UQ in their code for deploying safer, more trustworthy LLM-based systems.</abstract>
       <url hash="a88fbb2a">2025.acl-tutorials.3</url>
@@ -26696,8 +26696,8 @@
       <title>Human-<fixed-case>AI</fixed-case> Collaboration: How <fixed-case>AI</fixed-case>s Augment Human Teammates</title>
       <author><first>Sherry</first><last>Wu</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Diyi</first><last>Yang</last><affiliation>Stanford University</affiliation></author>
-      <author><first>Joseph</first><last>Chang</last><affiliation>Allen Institute for AI</affiliation></author>
-      <author><first>Marti A.</first><last>Hearst</last><affiliation>University of California, Berkeley</affiliation></author>
+      <author id="joseph-z-chang"><first>Joseph</first><last>Chang</last><affiliation>Allen Institute for AI</affiliation></author>
+      <author id="marti-a-hearst"><first>Marti A.</first><last>Hearst</last><affiliation>University of California, Berkeley</affiliation></author>
       <author><first>Kyle</first><last>Lo</last><affiliation>Allen Institute for AI</affiliation></author>
       <pages>5-6</pages>
       <abstract>The continuous, rapid development of general-purpose models like LLMs suggests the theoretical possibility of AI performing any human task. Yet, despite the potential and promise, these models are far from perfect, excelling at certain tasks while struggling with others. The tension between what is possible and a model’s limitations raises the general research question that has attracted attention from various disciplines: What is the best way to use AI to maximize its benefits? In this tutorial, we will review recent developments related to human-AI teaming and collaboration. To the best of our knowledge, our tutorial will be the first to provide a more integrated view from NLP, HCI, Computational Social Science, and Learning Science, etc., and highlight how different communities have identified the goals and societal impacts of such collaborations, both positive and negative. We will further discuss how to operationalize these Human-AI collaboration goals, and reflect on how state-of-the-art AI models should be evaluated and scaffolded to make them most useful in collaborative contexts.</abstract>
@@ -26752,12 +26752,12 @@
     <paper id="8">
       <title>Guardrails and Security for <fixed-case>LLM</fixed-case>s: Safe, Secure and Controllable Steering of <fixed-case>LLM</fixed-case> Applications</title>
       <author><first>Traian</first><last>Rebedea</last><affiliation>NVIDIA / University Politehnica of Bucharest</affiliation></author>
-      <author><first>Leon</first><last>Derczynski</last><affiliation>NVIDIA / ITU University of Copenhagen</affiliation></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last><affiliation>NVIDIA / ITU University of Copenhagen</affiliation></author>
       <author><first>Shaona</first><last>Ghosh</last><affiliation>NVIDIA</affiliation></author>
       <author><first>Makesh Narsimhan</first><last>Sreedhar</last><affiliation>NVIDIA</affiliation></author>
       <author><first>Faeze</first><last>Brahman</last><affiliation>Allen Institute for AI</affiliation></author>
       <author><first>Liwei</first><last>Jiang</last><affiliation>University of Washington / NVIDIA</affiliation></author>
-      <author id="bo-li"><first>Bo</first><last>Li</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
+      <author><first>Bo</first><last>Li</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
       <author><first>Yulia</first><last>Tsvetkov</last><affiliation>University of Washington</affiliation></author>
       <author><first>Christopher</first><last>Parisien</last><affiliation>NVIDIA</affiliation></author>
       <author><first>Yejin</first><last>Choi</last><affiliation>Stanford University / NVIDIA</affiliation></author>
@@ -26858,7 +26858,7 @@
       <author><first>Yibo</first><last>Yan</last><affiliation>The Hong Kong University of Science and Technology</affiliation></author>
       <author><first>Shen</first><last>Wang</last></author>
       <author><first>Jiahao</first><last>Huo</last><affiliation>The Hong Kong University of Science and Technology and Tongji University</affiliation></author>
-      <author><first>Philip S.</first><last>Yu</last><affiliation>University of Illinois Chicago</affiliation></author>
+      <author id="philip-s-yu"><first>Philip S.</first><last>Yu</last><affiliation>University of Illinois Chicago</affiliation></author>
       <author><first>Xuming</first><last>Hu</last><affiliation>The Hong Kong University of Science and Technology (Guangzhou) and Hong Kong University of Science and Technology</affiliation></author>
       <author><first>Qingsong</first><last>Wen</last><affiliation>Squirrel Ai Learning</affiliation></author>
       <pages>69-82</pages>
@@ -26982,7 +26982,7 @@
       <author><first>Mehrzad</first><last>Samadi</last><affiliation>Parabricks Inc. and NVIDIA</affiliation></author>
       <author><first>Sean</first><last>Narenthiran</last><affiliation>NVIDIA</affiliation></author>
       <author><first>Aleksander</first><last>Ficek</last><affiliation>NVIDIA</affiliation></author>
-      <author><first>Wasi Uddin</first><last>Ahmad</last><affiliation>NVIDIA</affiliation></author>
+      <author id="wasi-ahmad"><first>Wasi Uddin</first><last>Ahmad</last><affiliation>NVIDIA</affiliation></author>
       <author><first>Jocelyn</first><last>Huang</last><affiliation>NVIDIA</affiliation></author>
       <author><first>Jagadeesh</first><last>Balam</last><affiliation>NVIDIA</affiliation></author>
       <author><first>Boris</first><last>Ginsburg</last><affiliation>NVIDIA</affiliation></author>
@@ -27007,7 +27007,7 @@
       <author><first>Jagadeesh</first><last>Balam</last><affiliation>NVIDIA</affiliation></author>
       <author><first>Boris</first><last>Ginsburg</last><affiliation>NVIDIA</affiliation></author>
       <author><first>Yu-Chiang Frank</first><last>Wang</last><affiliation>NVIDIA and National Taiwan University</affiliation></author>
-      <author><first>Chao-Han Huck</first><last>Yang</last><affiliation>NVIDIA Research</affiliation></author>
+      <author id="chao-han-huck-yang"><first>Chao-Han Huck</first><last>Yang</last><affiliation>NVIDIA Research</affiliation></author>
       <pages>222-236</pages>
       <abstract>Construction of a general-purpose post-recognition error corrector poses a crucial question: how can we most effectively train a model on a large mixture of domain datasets? The answer would lie in learning dataset-specific features and digesting their knowledge in a single model. Previous methods achieve this by having separate correction language models, resulting in a significant increase in parameters. In this work, we present Mixture-of-Experts as a solution, highlighting that MoEs are much more than a scalability tool. We propose a Multi-Task Correction MoE, where we train the experts to become an “expert” of speech-to-text, language-to-text and vision-to-text datasets by learning to route each dataset’s tokens to its mapped expert. Experiments on the Open ASR Leaderboard show that we explore a new state-of-the-art performance by achieving an average relative 5.0% WER reduction and substantial improvements in BLEU scores for speech and translation tasks. On zero-shot evaluation, NeKo outperforms GPT-3.5 and Claude-3.5-Sonnet with 15.5% to 27.6% relative WER reduction in the Hyporadise benchmark. NeKo performs competitively on grammar and post-OCR correction as a multi-task model.</abstract>
       <url hash="1a0193c0">2025.acl-industry.17</url>
@@ -27095,7 +27095,7 @@
       <author><first>Yejin</first><last>Choi</last><affiliation>Computer Science Department, Stanford University and NVIDIA</affiliation></author>
       <author><first>Yu-Chiang Frank</first><last>Wang</last><affiliation>NVIDIA and National Taiwan University</affiliation></author>
       <author><first>Yuta</first><last>Nakashima</last><affiliation>The University of Osaka</affiliation></author>
-      <author><first>Chao-Han Huck</first><last>Yang</last><affiliation>NVIDIA Research</affiliation></author>
+      <author id="chao-han-huck-yang"><first>Chao-Han Huck</first><last>Yang</last><affiliation>NVIDIA Research</affiliation></author>
       <pages>295-309</pages>
       <abstract>Large Vision-Language Models (LVLMs) have transformed image captioning, shifting from concise captions to detailed descriptions. We introduce LOTUS, a leaderboard for evaluating detailed captions, addressing three main gaps in existing evaluations: lack of standardized criteria, bias-aware assessments, and user preference considerations. LOTUS comprehensively evaluates various aspects, including caption quality (e.g., alignment, descriptiveness), risks (e.g., hallucination), and societal biases (e.g., gender bias) while enabling preference-oriented evaluations by tailoring criteria to diverse user preferences. Our analysis of recent LVLMs reveals no single model excels across all criteria, while correlations emerge between caption detail and bias risks. Preference-oriented evaluations demonstrate that optimal model selection depends on user priorities.</abstract>
       <url hash="d8088330">2025.acl-industry.22</url>
@@ -27138,7 +27138,7 @@
     <paper id="25">
       <title>Efficient Out-of-Scope Detection in Dialogue Systems via Uncertainty-Driven <fixed-case>LLM</fixed-case> Routing</title>
       <author><first>Álvaro</first><last>Zaera</last><affiliation>ETHZ - ETH Zurich</affiliation></author>
-      <author><first>Diana Nicoleta</first><last>Popa</last></author>
+      <author id="diana-nicoleta-popa"><first>Diana Nicoleta</first><last>Popa</last></author>
       <author><first>Ivan</first><last>Sekulic</last><affiliation>TelepathyLabs</affiliation></author>
       <author><first>Paolo</first><last>Rosso</last><affiliation>University of Fribourg</affiliation></author>
       <pages>328-335</pages>
@@ -27343,7 +27343,7 @@
       <author><first>Xuming</first><last>Hu</last><affiliation>The Hong Kong University of Science and Technology (Guangzhou) and Hong Kong University of Science and Technology</affiliation></author>
       <author><first>Wenhao</first><last>Jiang</last><affiliation>Guangming Laboratory</affiliation></author>
       <author><first>Hai-Tao</first><last>Zheng</last><affiliation>Tsinghua University, Tsinghua University</affiliation></author>
-      <author><first>Philip S.</first><last>Yu</last><affiliation>University of Illinois Chicago</affiliation></author>
+      <author id="philip-s-yu"><first>Philip S.</first><last>Yu</last><affiliation>University of Illinois Chicago</affiliation></author>
       <pages>553-567</pages>
       <abstract>Recently, Large Language Models (LLMs) have been widely studied by researchers for their roles in various downstream NLP tasks. As a fundamental task in the NLP field, Chinese Grammatical Error Correction (CGEC) aims to correct all potential grammatical errors in the input sentences. Previous studies have shown that LLMs’ performance as correctors on CGEC remains unsatisfactory due to the challenging nature of the task. To promote the CGEC field to better adapt to the era of LLMs, we rethink the roles of LLMs in the CGEC task so that they can be better utilized and explored in CGEC. Considering the rich grammatical knowledge stored in LLMs and their powerful semantic understanding capabilities, we utilize LLMs as explainers to provide explanation information to the CGEC small models during error correction, aiming to enhance performance. We also use LLMs as evaluators to bring more reasonable CGEC evaluations, thus alleviating the troubles caused by the subjectivity of the CGEC task. In particular, our work is also an active exploration of how LLMs and small models better collaborate in downstream tasks. Extensive experiment and detailed analyses on widely used datasets verify the effectiveness of our intuition and the proposed methods.</abstract>
       <url hash="c39824e0">2025.acl-industry.39</url>
@@ -27478,7 +27478,7 @@
       <author><first>Maxime</first><last>Delmas</last><affiliation>Idiap Research Institute</affiliation></author>
       <author><first>Magdalena</first><last>Wysocka</last><affiliation>CRUK NBC Manchester Institute and Technical University of Gdansk</affiliation></author>
       <author><first>Danilo</first><last>Gusicuma</last><affiliation>NA</affiliation></author>
-      <author><first>Andre</first><last>Freitas</last><affiliation>Idiap Research Institute and University of Manchester</affiliation></author>
+      <author id="andre-freitas"><first>Andre</first><last>Freitas</last><affiliation>Idiap Research Institute and University of Manchester</affiliation></author>
       <pages>693-705</pages>
       <abstract>The discovery of novel antibiotics is critical to address the growing antimicrobial resistance (AMR). However, pharmaceutical industries face high costs (over $1 billion), long timelines, and a high failure rate, worsened by the rediscovery of known compounds. We propose an LLM-based pipeline that acts as an alert system, detecting prior evidence of antibiotic activity to prevent costly rediscoveries. The system integrates literature on organisms and chemicals into a Knowledge Graph (KG), ensuring taxonomic resolution, synonym handling, and multi-level evidence classification. We tested the pipeline on a private list of 73 potential antibiotic-producing organisms, disclosing 12 negative hits for evaluation. The results highlight the effectiveness of the pipeline for evidence reviewing, reducing false negatives, and accelerating decision-making. The KG for negative hits as well as the user interface for interactive exploration are available at https://github.com/idiap/abroad-kg-store and https://github.com/idiap/abroad-demo-webapp.</abstract>
       <url hash="daa46f58">2025.acl-industry.49</url>
@@ -27574,7 +27574,7 @@
     </paper>
     <paper id="56">
       <title>Enriching children’s stories with <fixed-case>LLM</fixed-case>s: Delivering multilingual data enrichment for children’s books at scale and across markets</title>
-      <author><first>Zarah</first><last>Weiss</last><affiliation>Nextory AB</affiliation></author>
+      <author id="zarah-weiss"><first>Zarah</first><last>Weiss</last><affiliation>Nextory AB</affiliation></author>
       <author><first>Christof</first><last>Meyer</last><affiliation>NA</affiliation></author>
       <author><first>Mikael</first><last>Andersson</last><affiliation>NA</affiliation></author>
       <pages>804-812</pages>
@@ -27953,7 +27953,7 @@
       <author><first>Mizanur</first><last>Rahman</last></author>
       <author><first>Amran</first><last>Bhuiyan</last></author>
       <author><first>Mir Tafseer</first><last>Nayeem</last><affiliation>University of Alberta</affiliation></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University and SalesForce.com</affiliation></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University and SalesForce.com</affiliation></author>
       <author><first>Enamul</first><last>Hoque</last><affiliation>York University</affiliation></author>
       <author><first>Jimmy</first><last>Huang</last><affiliation>York University and York University</affiliation></author>
       <pages>1203-1216</pages>
@@ -28004,7 +28004,7 @@
       <author><first>Sun</first><last>He</last><affiliation>NA</affiliation></author>
       <author><first>Hock Huan</first><last>Goh</last><affiliation>NA</affiliation></author>
       <author><first>Lung Hsiang</first><last>Wong</last><affiliation>NA</affiliation></author>
-      <author><first>Nancy F.</first><last>Chen</last></author>
+      <author id="nancy-chen"><first>Nancy F.</first><last>Chen</last></author>
       <pages>1244-1253</pages>
       <abstract>The integration of generative artificial intelligence into educational applications has enhanced personalized and interactive learning experiences, and it shows strong potential to promote young learners language acquisition. However, it is still challenging to ensure consistent and robust performance across different languages and cultural contexts, and kids-friendly design requires simplified instructions, engaging interactions, and age-appropriate scaffolding to maintain motivation and optimize learning outcomes.In this work, we introduce SingaKids, a dialogic tutor designed to facilitate language learning through picture description tasks. Our system integrates dense image captioning, multilingual dialogic interaction, speech understanding, and engaging speech generation to create an immersive learning environment in four languages: English, Mandarin, Malay, and Tamil. We further improve the system through multilingual pre-training, task-specific tuning, and scaffolding optimization. Empirical studies with elementary school students demonstrate that SingaKids provides effective dialogic teaching, benefiting learners at different performance levels.</abstract>
       <url hash="2195bd46">2025.acl-industry.86</url>
@@ -28019,7 +28019,7 @@
       <author><first>Jeena J</first><last>Prakash</last></author>
       <author><first>Shashi</first><last>Kumar</last><affiliation>EPFL - EPF Lausanne</affiliation></author>
       <author><first>Malolan</first><last>Chetlur</last><affiliation>NA</affiliation></author>
-      <author><first>Andreas</first><last>Stolcke</last><affiliation>Uniphore Technologies</affiliation></author>
+      <author id="andreas-stolcke"><first>Andreas</first><last>Stolcke</last><affiliation>Uniphore Technologies</affiliation></author>
       <pages>1254-1262</pages>
       <abstract>There has been increasing interest in unifying streaming and non-streaming automatic speech recognition (ASR) models to reduce development, training, and deployment costs. We present a unified framework that trains a single end-to-end ASR model for both streaming and non-streaming applications, leveraging future context information. We propose to use dynamic right-context through the chunked attention masking in the training of zipformer-based ASR models. We demonstrate that using right-context is more effective in zipformer models compared to other conformer models due to its multi-scale nature. We analyze the effect of varying the number of right-context frames on accuracy and latency of the streaming ASR models. We use Librispeech and large in-house conversational datasets to train different versions of streaming and non-streaming models and evaluate them in a production grade server-client setup across diverse testsets of different domains. The proposed strategy reduces word error by relative 7.9% with a small degradation in user-perceived latency. By adding more right-context frames, we are able to achieve streaming performance close to that of non-streaming models. Our approach also allows flexible control of the latency-accuracy tradeoff according to customers requirements.</abstract>
       <url hash="6d3f2e84">2025.acl-industry.87</url>
@@ -28029,7 +28029,7 @@
     <paper id="88">
       <title>A Semi-supervised Scalable Unified Framework for <fixed-case>E</fixed-case>-commerce Query Classification</title>
       <author><first>Chunyuan</first><last>Yuan</last></author>
-      <author id="chong-zhang"><first>Chong</first><last>Zhang</last></author>
+      <author><first>Chong</first><last>Zhang</last></author>
       <author><first>Zhen</first><last>Fang</last><affiliation>NA</affiliation></author>
       <author><first>Ming</first><last>Pang</last><affiliation>JD.com</affiliation></author>
       <author><first>Xue</first><last>Jiang</last></author>
@@ -28100,7 +28100,7 @@
       <author><first>B</first><last>JayaPrakash</last><affiliation>Hyundai Motors India Engineering Pvt Ltd</affiliation></author>
       <author><first>Chintalapalli Raja</first><last>Kullayappa</last><affiliation>Hyundai Motor Company</affiliation></author>
       <author><first>Mandala Jagadeesh</first><last>Reddy</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
       <pages>1322-1338</pages>
       <abstract>In-car AI assistants enhance driving by enabling hands-free interactions, yet they often struggle with multi-turn conversations and fail to handle cognitively complex follow-up questions. This limits their effectiveness in real-world deployment. To address this limitation, we propose a framework that leverages Bloom’s Taxonomy to systematically generate follow-up questions with increasing cognitive complexity and a Gricean-inspired evaluation framework to assess their Logical Consistency, Informativeness, Relevance, and Clarity. We introduce a dataset comprising 750 human-annotated seed questions and 3750 follow-up questions, with human evaluation confirming that 96.68% of the generated questions adhere to the intended Bloom’s Taxonomy levels. Our approach, validated through both LLM-based and human assessments, also identifies the specific cognitive complexity level at which in-car AI assistants begin to falter information that can help developers measure and optimize key cognitive aspects of conversational performance.</abstract>
       <url hash="aa9640c0">2025.acl-industry.93</url>
@@ -28201,7 +28201,7 @@
       <title><fixed-case>REVISE</fixed-case>: A Framework for Revising <fixed-case>OCR</fixed-case>ed text in Practical Information Systems with Data Contamination Strategy</title>
       <author><first>Gyuho</first><last>Shim</last><affiliation>Korea University</affiliation></author>
       <author><first>Seongtae</first><last>Hong</last><affiliation>Korea University</affiliation></author>
-      <author><first>Heuiseok</first><last>Lim</last></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last></author>
       <pages>1423-1434</pages>
       <abstract>Recent advances in large language models (LLMs) have significantly improved Document AI, demonstrating remarkable performance on document understanding tasks such as question answering. However, existing approaches primarily focus on solving specific tasks, lacking the capability to structurally organize and systematically manage document information. To address this limitation, we propose Revise, a framework that systematically corrects errors introduced by OCR at the character, word, and structural levels. Specifically, Revise employs a comprehensive hierarchical taxonomy of common OCR errors and a synthetic data generation strategy that realistically simulates such errors to train an effective correction model. Experimental results demonstrate that Revise effectively corrects OCR outputs, enabling more structured representation and systematic management of document contents. Consequently, our method significantly enhances downstream performance in document retrieval and question answering tasks, highlighting the potential to overcome the structural management limitations of existing Document AI frameworks.</abstract>
       <url hash="6240ff16">2025.acl-industry.100</url>
@@ -28254,7 +28254,7 @@
       <author><first>Sameer</first><last>Pimparkhede</last></author>
       <author><first>Srikanth G.</first><last>Tamilselvam</last><affiliation>International Business Machines</affiliation></author>
       <author><first>Prince</first><last>Kumar</last><affiliation>International Business Machines</affiliation></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
       <pages>1466-1479</pages>
       <abstract>System-level programming is essential for modern enterprise infrastructure, enabling the automation and management of complex systems through declarative code. Developers write this code based on schemas, which themselves are a form of code that defines constraints like data types and required fields. These schemas help ensure operational correctness and smooth integration across systems. However, as enterprise schemas become complex, manually writing code adhering to these constraints becomes challenging for developers. Large Language Models (LLMs) have demonstrated potential in code generation and natural language understanding, particularly in zero-shot and few-shot settings. However, applying LLMs to handle constraints represented in code, essential for system-level programming rather than natural language, has not been explored. Hence, we introduce ConCodeEval, a study across two key dimensions: format and constraint efficacy, with a first-of-its-kind benchmark involving two novel experiments for code constraints across five representations (JSON, YAML, XML, Python, and natural language). Our findings suggest that conscious choice of representations can lead to optimal use of LLMs in enterprise use cases involving constraints. Nonetheless, LLMs continue to struggle significantly with code constraints, motivating the need for innovation in this direction.</abstract>
       <url hash="93f83f63">2025.acl-industry.104</url>
diff --git a/data/xml/2025.africanlp.xml b/data/xml/2025.africanlp.xml
index b09a79594a..9d8480cd7d 100644
--- a/data/xml/2025.africanlp.xml
+++ b/data/xml/2025.africanlp.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the Sixth Workshop on African Natural Language Processing (AfricaNLP 2025)</booktitle>
       <editor><first>Constantine</first><last>Lignos</last></editor>
       <editor><first>Idris</first><last>Abdulmumin</last></editor>
-      <editor><first>David</first><last>Adelani</last></editor>
+      <editor id="david-ifeoluwa-adelani"><first>David</first><last>Adelani</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Vienna, Austria</address>
       <month>July</month>
@@ -271,7 +271,7 @@
       <author><first>Sani Abdullahi</first><last>Sani</last></author>
       <author><first>Ali Usman</first><last>Umar</last><affiliation>Federal University of Lafia</affiliation></author>
       <author><first>Tajuddeen</first><last>Gwadabe</last><affiliation>Masakhane Research Foundation</affiliation></author>
-      <author><first>Kenneth</first><last>Church</last><affiliation>Northeastern University</affiliation></author>
+      <author id="kenneth-church"><first>Kenneth</first><last>Church</last><affiliation>Northeastern University</affiliation></author>
       <author><first>Vukosi</first><last>Marivate</last><affiliation>University of Pretoria</affiliation></author>
       <pages>176-191</pages>
       <abstract>Hausa Natural Language Processing (NLP) has gained increasing attention in recent years, yet remains understudied as a low-resource language despite having over 120 million first-language (L1) and 80 million second-language (L2) speakers worldwide. While significant advances have been made in high-resource languages, Hausa NLP faces persistent challenges including limited open-source datasets and inadequate model representation. This paper presents an overview of the current state of Hausa NLP, systematically examining existing resources, research contributions, and gaps across fundamental NLP tasks: text classification, machine translation, named entity recognition, speech recognition, and question answering. We introduce HausaNLP, a curated catalog that aggregates datasets, tools, and research works to enhance accessibility and drive further development. Furthermore, we discuss challenges in integrating Hausa into large language models (LLMs), addressing issues of suboptimal tokenization, and dialectal variation. Finally, we propose strategic research directions emphasizing dataset expansion, improved language modeling approaches, and strengthened community collaboration to advance Hausa NLP. Our work provides both a foundation for accelerating Hausa NLP progress and valuable insights for broader multilingual NLP research.</abstract>
@@ -351,7 +351,7 @@
     </paper>
     <paper id="34">
       <title><fixed-case>Y</fixed-case>-<fixed-case>NQ</fixed-case>: <fixed-case>E</fixed-case>nglish-<fixed-case>Y</fixed-case>orùbá Evaluation dataset for Open-Book Reading Comprehension with Open-Ended Questions</title>
-      <author><first>Marta R.</first><last>Costa-jussà</last><affiliation>Meta</affiliation></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last><affiliation>Meta</affiliation></author>
       <author><first>Joy</first><last>Chen</last><affiliation>Georgia Institute of Technology and Facebook</affiliation></author>
       <author><first>Ife</first><last>Adebara</last></author>
       <author><first>Joe</first><last>Chuang</last><affiliation>FAIR</affiliation></author>
diff --git a/data/xml/2025.aielpl.xml b/data/xml/2025.aielpl.xml
index 4dc4c0bc3f..0375e726de 100644
--- a/data/xml/2025.aielpl.xml
+++ b/data/xml/2025.aielpl.xml
@@ -6,7 +6,7 @@
       <editor><first>María Isabel Rivas</first><last>Ginel</last></editor>
       <editor><first>Patrick</first><last>Cadwell</last></editor>
       <editor><first>Paolo</first><last>Canavese</last></editor>
-      <editor><first>Silvia</first><last>Hansen-Schirra</last></editor>
+      <editor id="silvia-hansen-schirra"><first>Silvia</first><last>Hansen-Schirra</last></editor>
       <editor><first>Martin</first><last>Kappus</last></editor>
       <editor><first>Anna</first><last>Matamala</last></editor>
       <editor><first>Will</first><last>Noonan</last></editor>
@@ -24,9 +24,9 @@
     </frontmatter>
     <paper id="1">
       <title>Leveraging Large Language Models for Joint Linguistic and Technical Accessibility Improvement: A Case Study on University Webpages</title>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Johanna</first><last>Gerlach</last></author>
-      <author><first>Raphael</first><last>Rubino</last></author>
+      <author id="raphael-rubino"><first>Raphael</first><last>Rubino</last></author>
       <pages>1–13</pages>
       <abstract>The aim of the study presented in this paper is to investigate whether Large Language Models can be leveraged to translate French content from existing websites into their B1-level simplified versions and to integrate them into an accessible HTML structure. We design a CMS agnostic approach to webpage accessibility improvement based on prompt engineering and apply it to Geneva University webpages. We conduct several automatic and manual evaluations to measure the accessibility improvement reached by several LLMs with various prompts in a zero-shot setting. Results show that LLMs are not all suitable for the task, while a large disparity is observed among results reached by different prompts. Manual evaluation carried out by a dyslexic crowd shows that some LLMs could produce more accessible websites and improve access to information.</abstract>
       <url hash="a48e90fa">2025.aielpl-1.1</url>
@@ -34,7 +34,7 @@
     </paper>
     <paper id="2">
       <title>How Artificial Intelligence can help in the Easy-to-Read Adaptation of Numerical Expressions in <fixed-case>S</fixed-case>panish</title>
-      <author><first>Mari Carmen</first><last>Suárez-Figueroa</last></author>
+      <author id="mari-carmen-suarez-figueroa"><first>Mari Carmen</first><last>Suárez-Figueroa</last></author>
       <author><first>Alejandro</first><last>Muñoz-Navarro</last></author>
       <author><first>Isam</first><last>Diab</last></author>
       <pages>14–24</pages>
@@ -90,7 +90,7 @@
     <paper id="8">
       <title>Do professionally adapted texts follow existing Easy-to-Understand (<fixed-case>E</fixed-case>2<fixed-case>U</fixed-case>) language guidelines? A quantitative analysis of two professionally adapted corpora</title>
       <author><first>Andreea</first><last>Deleanu</last></author>
-      <author><first>Constantin</first><last>Orăsan</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orăsan</last></author>
       <author><first>Shenbin</first><last>Qian</last></author>
       <author><first>Anastasiia</first><last>Bezobrazova</last></author>
       <author><first>Sabine</first><last>Braun</last></author>
diff --git a/data/xml/2025.aisd.xml b/data/xml/2025.aisd.xml
index 36ceeed258..3b94453d50 100644
--- a/data/xml/2025.aisd.xml
+++ b/data/xml/2025.aisd.xml
@@ -3,8 +3,8 @@
   <volume id="main" ingest-date="2025-04-26" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 1st Workshop on AI and Scientific Discovery: Directions and Opportunities</booktitle>
-      <editor><first>Peter</first><last>Jansen</last></editor>
-      <editor><first>Bhavana</first><last>Dalvi Mishra</last></editor>
+      <editor id="peter-jansen"><first>Peter</first><last>Jansen</last></editor>
+      <editor id="bhavana-dalvi"><first>Bhavana</first><last>Dalvi Mishra</last></editor>
       <editor><first>Harsh</first><last>Trivedi</last></editor>
       <editor><first>Bodhisattwa</first><last>Prasad Majumder</last></editor>
       <editor><first>Tom</first><last>Hope</last></editor>
diff --git a/data/xml/2025.alp.xml b/data/xml/2025.alp.xml
index fd9cc33439..e65576316b 100644
--- a/data/xml/2025.alp.xml
+++ b/data/xml/2025.alp.xml
@@ -8,7 +8,7 @@
       <editor><first>Bin</first><last>Li</last></editor>
       <editor><first>Yudong</first><last>Liu</last></editor>
       <editor><first>Marco C.</first><last>Passarotti</last></editor>
-      <editor><first>Rachele</first><last>Sprugnoli</last></editor>
+      <editor id="rachele-sprugnoli"><first>Rachele</first><last>Sprugnoli</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>The Albuquerque Convention Center, Laguna</address>
       <month>May</month>
@@ -114,7 +114,7 @@
     <paper id="9">
       <title>Evaluating Evaluation Metrics for <fixed-case>A</fixed-case>ncient <fixed-case>C</fixed-case>hinese to <fixed-case>E</fixed-case>nglish Machine Translation</title>
       <author><first>Eric R.</first><last>Bennett</last></author>
-      <author><first>HyoJung</first><last>Han</last></author>
+      <author id="hyojung-han"><first>HyoJung</first><last>Han</last></author>
       <author><first>Xinchen</first><last>Yang</last></author>
       <author><first>Andrew</first><last>Schonebaum</last></author>
       <author><first>Marine</first><last>Carpuat</last></author>
@@ -206,7 +206,7 @@
       <author><first>Farzaneh</first><last>Goshtasb</last></author>
       <author><first>Nadia</first><last>Hajipour</last></author>
       <author><first>Ehsaneddin</first><last>Asgari</last></author>
-      <author><first>Hossein</first><last>Sameti</last></author>
+      <author id="hossein-sameti"><first>Hossein</first><last>Sameti</last></author>
       <pages>137-149</pages>
       <abstract>The study of historical languages presents unique challenges due to their complex ortho-graphic systems, fragmentary textual evidence, and the absence of standardized digital repre-sentations of text in those languages. Tack-ling these challenges needs special NLP digi-tal tools to handle phonetic transcriptions and analyze ancient texts. This work introduces ParsiPy1, an NLP toolkit designed to facili-tate the analysis of historical Persian languages by offering modules for tokenization, lemma-tization, part-of-speech tagging, phoneme-to-transliteration conversion, and word embed-ding. We demonstrate the utility of our toolkit through the processing of Parsig (Middle Per-sian) texts, highlighting its potential for ex-panding computational methods in the study of historical languages. Through this work, we contribute to the field of computational philol-ogy, offering tools that can be adapted for the broader study of ancient texts and their digital preservation.</abstract>
       <url hash="872833d6">2025.alp-1.17</url>
@@ -351,7 +351,7 @@
     <paper id="29">
       <title>Finetuning <fixed-case>LLM</fixed-case>s for <fixed-case>E</fixed-case>va<fixed-case>C</fixed-case>un 2025 token prediction shared task</title>
       <author><first>Josef</first><last>Jon</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>221-225</pages>
       <abstract>In this paper, we present our submission for the token prediction task of EvaCun 2025. Our sys-tems are based on LLMs (Command-R, Mistral, and Aya Expanse) fine-tuned on the task data provided by the organizers. As we only pos-sess a very superficial knowledge of the subject field and the languages of the task, we simply used the training data without any task-specific adjustments, preprocessing, or filtering. We compare 3 different approaches (based on 3 different prompts) of obtaining the predictions, and we evaluate them on a held-out part of the data.</abstract>
       <url hash="6d76c4b9">2025.alp-1.29</url>
diff --git a/data/xml/2025.americasnlp.xml b/data/xml/2025.americasnlp.xml
index f070a25dc5..49e6a104f1 100644
--- a/data/xml/2025.americasnlp.xml
+++ b/data/xml/2025.americasnlp.xml
@@ -10,7 +10,7 @@
       <editor><first>Katharina</first><last>Von Der Wense</last></editor>
       <editor><first>Luis</first><last>Chiruzzo</last></editor>
       <editor><first>Rolando</first><last>Coto-Solano</last></editor>
-      <editor><first>Arturo</first><last>Oncevay</last></editor>
+      <editor id="arturo-oncevay"><first>Arturo</first><last>Oncevay</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Albuquerque, New Mexico</address>
       <month>May</month>
@@ -39,7 +39,7 @@
     <paper id="2">
       <title>Does a code-switching dialogue system help users learn conversational fluency in <fixed-case>C</fixed-case>hoctaw?</title>
       <author><first>Jacqueline</first><last>Brixey</last><affiliation>USC Institute for Creative Technologies</affiliation></author>
-      <author><first>David</first><last>Traum</last><affiliation>University of Southern California Institute for Creative Technologies</affiliation></author>
+      <author id="david-traum"><first>David</first><last>Traum</last><affiliation>University of Southern California Institute for Creative Technologies</affiliation></author>
       <pages>8-17</pages>
       <abstract>We investigate the learning outcomes and user response to a chatbot for practicing conversational Choctaw, an endangered American Indigenous language. Conversational fluency is a goal for many language learners, however, for learners of endangered languages in North America, access to fluent speakers may be limited. Chatbots are potentially ideal dialogue partners as this kind of dialogue system fulfills a non-authoritative role by focusing on carrying on a conversation as an equal conversational partner. The goal of the chatbot investigated in this work is to serve as a conversational partner in the absence of a fluent Choctaw-speaking human interlocutor. We investigate the impact of code-switching in the interaction, comparing a bilingual chatbot against a monolingual Choctaw version. We evaluate the systems for user engagement and enjoyment, as well as gains in conversational fluency from interacting with the system.</abstract>
       <url hash="211fd825">2025.americasnlp-1.2</url>
@@ -84,7 +84,7 @@
       <author><first>Paola</first><last>Innes</last><affiliation>National Autonomous University of Mexico (UNAM)</affiliation></author>
       <author><first>Javier</first><last>Santillan</last><affiliation>Honeynet Project</affiliation></author>
       <author><first>Cynthia</first><last>Montaño</last><affiliation>University of Cailfornia, Berkeley</affiliation></author>
-      <author><first>Francis</first><last>Tyers</last><affiliation>Indiana University</affiliation></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last><affiliation>Indiana University</affiliation></author>
       <pages>38-47</pages>
       <abstract>This work presents Py-elotl, a suite of tools and resources in Python for processing text in several indigenous languages spoken in Mexico. These resources include parallel corpora, linguistic taggers/analyzers, and orthographic normalization tools. This work aims to develop essential resources to support language pre-processing and linguistic research, and the future creation of more complete downstream applications that could be useful for the speakers and enhance the visibility of these languages. The current version supports language groups such as Nahuatl, Otomi, Mixtec, and Huave. This project is open-source and freely available for use and collaboration</abstract>
       <url hash="63fb1786">2025.americasnlp-1.5</url>
diff --git a/data/xml/2025.analogyangle.xml b/data/xml/2025.analogyangle.xml
index 6091efef7c..39b7f4a047 100644
--- a/data/xml/2025.analogyangle.xml
+++ b/data/xml/2025.analogyangle.xml
@@ -26,10 +26,10 @@
       <title>Tore-Klose: Record Scorer, Goal Hunter, Machine? Human Association Norms for <fixed-case>G</fixed-case>erman Personal Name Compounds</title>
       <author><first>Annerose</first><last>Eichel</last><affiliation>University of Stuttgart, Universität Stuttgart</affiliation></author>
       <author><first>Tana</first><last>Deeg</last></author>
-      <author><first>Andre</first><last>Blessing</last><affiliation>University of Stuttgart, Universität Stuttgart</affiliation></author>
+      <author id="andre-blessing"><first>Andre</first><last>Blessing</last><affiliation>University of Stuttgart, Universität Stuttgart</affiliation></author>
       <author><first>Milena</first><last>Belosevic</last><affiliation>Universität Bielefeld</affiliation></author>
       <author><first>Sabine</first><last>Arndt-Lappe</last><affiliation>Trier University</affiliation></author>
-      <author><first>Sabine</first><last>Schulte Im Walde</last><affiliation>University of Stuttgart</affiliation></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte Im Walde</last><affiliation>University of Stuttgart</affiliation></author>
       <pages>1-9</pages>
       <abstract>We present a collection of human association norms to German personal name compounds (PNCs) such as “Tore-Klose” (goal-Klose) and corresponding full names (Miroslav Klose), thus providing a novel testbed for PNC evaluation, i.e., analogical vs. contrastive positive vs. negative perception effects. The associations are obtained in an online experiment with German native speakers, analyzed regarding our novel intertwined PNC–person association setup, and accompanied by an LLM synthetic generation approach for augmentation.</abstract>
       <url hash="d3c8b9ec">2025.analogyangle-1.1</url>
@@ -52,7 +52,7 @@
       <author><first>Valerio</first><last>Basile</last><affiliation>University of Turin</affiliation></author>
       <author><first>Danilo</first><last>Croce</last></author>
       <author><first>Cristina</first><last>Bosco</last><affiliation>University of Turin</affiliation></author>
-      <author><first>Roberto</first><last>Basili</last><affiliation>University of Roma, Tor Vergata</affiliation></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last><affiliation>University of Roma, Tor Vergata</affiliation></author>
       <pages>22-36</pages>
       <abstract>Few-shot learning via in-context learning (ICL) is widely used in NLP, but its effectiveness is highly sensitive to example selection, often leading to unstable performance. To address this, we introduce BacKGen, a framework for generating structured Background Knowledge (BK) as an alternative to instance-based prompting. Our approach leverages Frame Semantics to uncover recurring conceptual patterns across data instances, clustering examples based on shared event structures and semantic roles. These patterns are then synthesized into generalized knowledge statements using a large language model (LLM) and injected into prompts to support contextual reasoning beyond surface-level cues. We apply BacKGen to Sentiment Phrase Classification (SPC), a task where polarity judgments frequently depend on implicit commonsense knowledge. In this setting, BK serves as an abstract representation of prototypical scenarios, enabling schematic generalization to help the model perform analogical reasoning by mapping new inputs onto generalized event structures. Experimental results with Mistral-7B and Llama3-8B demonstrate that BK-based prompting consistently outperforms standard few-shot approaches, achieving up to 29.94% error reduction.</abstract>
       <url hash="0c7841f7">2025.analogyangle-1.3</url>
@@ -75,9 +75,9 @@
     <paper id="5">
       <title>Prompting Metaphoricity: Soft Labeling with Large Language Models in Popular Communication of Science Tweets in <fixed-case>S</fixed-case>panish</title>
       <author><first>Alec</first><last>Sánchez-Montero</last><affiliation>Universidad Nacional Autónoma de México</affiliation></author>
-      <author><first>Gemma</first><last>Bel-Enguix</last><affiliation>Universidad Nacional Autonoma de Mexico</affiliation></author>
+      <author id="gemma-bel-enguix"><first>Gemma</first><last>Bel-Enguix</last><affiliation>Universidad Nacional Autonoma de Mexico</affiliation></author>
       <author><first>Sergio-Luis</first><last>Ojeda-Trueba</last></author>
-      <author><first>Gerardo</first><last>Sierra</last><affiliation>Universidad Nacional Autónoma de México</affiliation></author>
+      <author id="gerardo-sierra"><first>Gerardo</first><last>Sierra</last><affiliation>Universidad Nacional Autónoma de México</affiliation></author>
       <pages>45-56</pages>
       <abstract>In this paper, we explore how large language models (LLMs) can be used to assign soft labels for metaphoricity in Popular Communication of Science (PCS) tweets written in Spanish. Instead of treating metaphors as a binary yes/no phenomenon, we focus on their graded nature and the variability commonly found in human annotations. Through a combination of prompt design and quantitative evaluation over a stratified sample of our dataset, we show that GPT-4 can assign probabilistic scores not only for general metaphoricity but also for specific metaphor types with consistency (Direct, Indirect, and Personification). The results show that, while LLMs align reasonably well with average human judgments for some categories, capturing the subtle patterns of inter-annotator disagreement remains a challenge. We present a corpus of 3,733 tweets annotated with LLM-generated soft labels, a valuable resource for further metaphor analysis in scientific discourse and figurative language annotation with LLMs.</abstract>
       <url hash="a2ae8aec">2025.analogyangle-1.5</url>
diff --git a/data/xml/2025.argmining.xml b/data/xml/2025.argmining.xml
index 87eef4ad90..83d8c205cc 100644
--- a/data/xml/2025.argmining.xml
+++ b/data/xml/2025.argmining.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the 12th Argument mining Workshop</booktitle>
       <editor><first>Elena</first><last>Chistova</last></editor>
-      <editor><first>Philipp</first><last>Cimiano</last></editor>
+      <editor id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></editor>
       <editor><first>Shohreh</first><last>Haddadan</last></editor>
       <editor><first>Gabriella</first><last>Lapesa</last></editor>
       <editor><first>Ramon</first><last>Ruiz-Dolz</last></editor>
@@ -26,7 +26,7 @@
     <paper id="1">
       <title>“The Facts Speak for Themselves”: <fixed-case>GPT</fixed-case> and Fallacy Classification</title>
       <author><first>Erisa</first><last>Bytyqi</last><affiliation>Universität Passau</affiliation></author>
-      <author><first>Annette</first><last>Hautli-Janisz</last><affiliation>Universität Passau</affiliation></author>
+      <author id="annette-hautli"><first>Annette</first><last>Hautli-Janisz</last><affiliation>Universität Passau</affiliation></author>
       <pages>1-10</pages>
       <abstract>Fallacies are not only part and parcel of human communication, they are also important for generative models in that fallacies can be tailored to self-verify the output they generate. Previous work has shown that fallacy detection and classification is tricky, but the question that still remains is whether the use of theoretical explanations in prompting Large Language Models (LLMs) on the task enhances the performance of the models. In this paper we show that this is not the case: Using the pragma-dialectics approach to fallacies (van Eemeren, 1987), we show that three GPT models struggle with the task. Based on our own PD-oriented dataset of fallacies and an extension of an existing fallacy dataset from Jin et al. (2022), we show that this is not only the case for fallacies “in the wild”, but also for textbook examples of fallacious arguments. Our paper also supports the claim that LLMs generally lag behind in fallacy classification in comparison to smaller-scale neural models.</abstract>
       <url hash="9c0e4136">2025.argmining-1.1</url>
@@ -51,7 +51,7 @@
       <author><first>Davide</first><last>Ceolin</last></author>
       <author><first>Emmanuelle</first><last>Dietz</last><affiliation>Airbus</affiliation></author>
       <author><first>Klara Maximiliane</first><last>Gutekunst</last></author>
-      <author><first>Annette</first><last>Hautli-Janisz</last><affiliation>Universität Passau</affiliation></author>
+      <author id="annette-hautli"><first>Annette</first><last>Hautli-Janisz</last><affiliation>Universität Passau</affiliation></author>
       <author><first>Cristián</first><last>Santibáñez</last><affiliation>Universidad Catolica de La Santísima Concepción</affiliation></author>
       <author><first>Jodi</first><last>Schneider</last><affiliation>University of Illinois, Urbana Champaign</affiliation></author>
       <author><first>Jonas</first><last>Scholz</last><affiliation>University of Groningen</affiliation></author>
@@ -79,7 +79,7 @@
       <title>Old but Gold: <fixed-case>LLM</fixed-case>-Based Features and Shallow Learning Methods for Fine-Grained Controversy Analysis in <fixed-case>Y</fixed-case>ou<fixed-case>T</fixed-case>ube Comments</title>
       <author><first>Davide</first><last>Bassi</last></author>
       <author><first>Erik Bran</first><last>Marino</last></author>
-      <author><first>Renata</first><last>Vieira</last><affiliation>Instituto de Inteligência Artificial na Saúde and Universidade de Evora</affiliation></author>
+      <author id="renata-vieira"><first>Renata</first><last>Vieira</last><affiliation>Instituto de Inteligência Artificial na Saúde and Universidade de Evora</affiliation></author>
       <author><first>Martin</first><last>Pereira</last><affiliation>University of Santiago de Compostela</affiliation></author>
       <pages>46-57</pages>
       <abstract>Online discussions can either bridge differences through constructive dialogue or amplify divisions through destructive interactions. paper proposes a computational approach to analyze dialogical relation patterns in YouTube comments, offering a fine-grained framework for controversy detection, enabling also analysis of individual contributions. experiments demonstrate that shallow learning methods, when equipped with these theoretically-grounded features, consistently outperform more complex language models in characterizing discourse quality at both comment-pair and conversation-chain levels.studies confirm that divisive rhetorical techniques serve as strong predictors of destructive communication patterns. work advances understanding of how communicative choices shape online discourse, moving beyond engagement metrics toward nuanced examination of constructive versus destructive dialogue patterns.</abstract>
@@ -125,7 +125,7 @@
       <author><first>Debela</first><last>Gemechu</last></author>
       <author><first>Ramon</first><last>Ruiz-Dolz</last><affiliation>University of Dundee</affiliation></author>
       <author><first>John</first><last>Lawrence</last><affiliation>University of Dundee</affiliation></author>
-      <author><first>Chris</first><last>Reed</last><affiliation>University of Dundee</affiliation></author>
+      <author id="chris-reed"><first>Chris</first><last>Reed</last><affiliation>University of Dundee</affiliation></author>
       <pages>100-106</pages>
       <abstract>The Open Argument Mining Framework (oAMF) addresses key challenges in argument mining research which still persist despite the field’s impressive growth. Researchers often face difficulties with cross-system comparisons, incompatible representation languages, and limited access to reusable tools. The oAMF introduces a standardised yet flexible architecture that enables seamless component benchmarking, rapid pipeline prototyping using elements from diverse research traditions, and unified evaluation methodologies that preserve theoretical compatibility. By reducing technical overhead, the framework allows researchers to focus on advancing core argument mining capabilities rather than reimplementing infrastructure, fostering greater collaboration at a time when computational reasoning is increasingly vital in the era of large language models.</abstract>
       <url hash="6ddbf1a4">2025.argmining-1.9</url>
@@ -135,7 +135,7 @@
     <paper id="10">
       <title>Argumentative Analysis of Legal Rulings: A Structured Framework Using Bobbitt’s Typology</title>
       <author><first>Carlotta</first><last>Giacchetta</last><affiliation>University of Trento</affiliation></author>
-      <author><first>Raffaella</first><last>Bernardi</last><affiliation>Free University of Bozen Bolzano</affiliation></author>
+      <author id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last><affiliation>Free University of Bozen Bolzano</affiliation></author>
       <author><first>Barbara</first><last>Montini</last></author>
       <author><first>Jacopo</first><last>Staiano</last><affiliation>University of Trento</affiliation></author>
       <author><first>Serena</first><last>Tomasi</last></author>
@@ -172,7 +172,7 @@
       <author><first>Martin</first><last>Gruber</last><affiliation>Eberhard-Karls-Universität Tübingen</affiliation></author>
       <author><first>Zlata</first><last>Kikteva</last><affiliation>Universität Passau</affiliation></author>
       <author><first>Ignaz</first><last>Rutter</last><affiliation>Universität Passau</affiliation></author>
-      <author><first>Annette</first><last>Hautli-Janisz</last><affiliation>Universität Passau</affiliation></author>
+      <author id="annette-hautli"><first>Annette</first><last>Hautli-Janisz</last><affiliation>Universität Passau</affiliation></author>
       <pages>140-146</pages>
       <abstract>Television debates play a key role in shaping public opinion, however, the rapid exchange of viewpoints in these settings often makes it difficult to perceive the underlying nature of the discussion. While there exist several debate visualisation techniques, to the best of our knowledge, none of them emphasise the argumentative dynamics in particular. With DebArgVis, we present a new interactive debate visualisation tool that leverages data annotated with argumentation structures to demonstrate how speaker interactions unfold over time, enabling users to deepen their comprehension of the debate.</abstract>
       <url hash="334b609c">2025.argmining-1.13</url>
@@ -207,7 +207,7 @@
       <title>Stance-aware Definition Generation for Argumentative Texts</title>
       <author><first>Natalia</first><last>Evgrafova</last></author>
       <author><first>Loic</first><last>De Langhe</last></author>
-      <author><first>Véronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Véronique</first><last>Hoste</last></author>
       <author><first>Els</first><last>Lefever</last></author>
       <pages>168-180</pages>
       <abstract>Definition generation models trained on dictionary data are generally expected to produce neutral and unbiased output while capturing the contextual nuances. However, previous studies have shown that generated definitions can inherit biases from both the underlying models and the input context. This paper examines the extent to which stance-related bias in argumentative data influences the generated definitions. In particular, we train a model on a slang-based dictionary to explore the feasibility of generating persuasive definitions that concisely reflect opposing parties’ understandings of contested terms. Through this study, we provide new insights into bias propagation in definition generation and its implications for definition generation applications and argument mining.</abstract>
@@ -286,7 +286,7 @@
     <paper id="23">
       <title>Overview of the Critical Questions Generation Shared Task</title>
       <author><first>Blanca</first><last>Calvo Figueras</last><affiliation>HiTZ Basque Center for Language Technology - Ixa, University of the Basque Country UPV/EHU , Spain</affiliation></author>
-      <author><first>Rodrigo</first><last>Agerri</last><affiliation>HiTZ Basque Center for Language Technology - Ixa, University of the Basque Country UPV/EHU , Spain</affiliation></author>
+      <author id="rodrigo-agerri"><first>Rodrigo</first><last>Agerri</last><affiliation>HiTZ Basque Center for Language Technology - Ixa, University of the Basque Country UPV/EHU , Spain</affiliation></author>
       <author><first>Maite</first><last>Heredia</last><affiliation>HiTZ Basque Center for Language Technology - Ixa, University of the Basque Country UPV/EHU , Spain</affiliation></author>
       <author><first>Jaione</first><last>Bengoetxea</last><affiliation>HiTZ Basque Center for Language Technology - Ixa, University of the Basque Country UPV/EHU , Spain</affiliation></author>
       <author><first>Elena</first><last>Cabrio</last><affiliation>University of Côte d’Azur and member of the Inria-I3S research team Wimmics</affiliation></author>
@@ -404,7 +404,7 @@
       <title><fixed-case>COGNAC</fixed-case> at <fixed-case>CQ</fixed-case>s-Gen 2025: Generating Critical Questions with <fixed-case>LLM</fixed-case>-Assisted Prompting and Multiple <fixed-case>RAG</fixed-case> Variants</title>
       <author><first>Azwad Anjum</first><last>Islam</last></author>
       <author><first>Tisa Islam</first><last>Erana</last></author>
-      <author><first>Mark A.</first><last>Finlayson</last></author>
+      <author id="mark-finlayson"><first>Mark A.</first><last>Finlayson</last></author>
       <pages>340-348</pages>
       <abstract>We describe three approaches to solving the Critical Questions Generation Shared Task at ArgMining 2025. The task objective is to automatically generate critical questions that challenge the strength, validity, and credibility of a given argumentative text. The task dataset comprises debate statements (“interventions”) annotated with a list of named argumentation schemes and associated with a set of critical questions (CQs). Our three Retrieval-Augmented Generation (RAG)-based approaches used in-context example selection based on (1) embedding the intervention, (2) embedding the intervention plus manually curated argumentation scheme descriptions as supplementary context, and (3) embedding the intervention plus a selection of associated CQs and argumentation scheme descriptions. We developed the prompt templates through GPT-4o-assisted analysis of patterns in validation data and the task-specific evaluation guideline. All three of our submitted systems outperformed the official baselines (0.44 and 0.53) with automatically computed accuracies of 0.62, 0.58, and 0.61, respectively, on the test data, with our first method securing the 2nd place in the competition (0.63 manual evaluation). Our results highlight the efficacy of LLM-assisted prompt development and RAG-enhanced generation in crafting contextually relevant critical questions for argument analysis.</abstract>
       <url hash="73aa9530">2025.argmining-1.33</url>
@@ -415,7 +415,7 @@
       <title><fixed-case>T</fixed-case>ri<fixed-case>LL</fixed-case>a<fixed-case>M</fixed-case>a at <fixed-case>CQ</fixed-case>s-Gen 2025: A Two-Stage <fixed-case>LLM</fixed-case>-Based System for Critical Question Generation</title>
       <author><first>Frieso</first><last>Turkstra</last></author>
       <author><first>Sara</first><last>Nabhani</last></author>
-      <author><first>Khalid</first><last>Al-Khatib</last></author>
+      <author id="khalid-al-khatib"><first>Khalid</first><last>Al-Khatib</last></author>
       <pages>349-357</pages>
       <abstract>This paper presents a new system for generating critical questions in debates, developed for the Critical Questions Generation shared task. Our two-stage approach, combining generation and classification, utilizes LLaMA 3.1 Instruct models (8B, 70B, 405B) with zero-/few-shot prompting. Evaluations on annotated debate data reveal several key insights: few-shot generation with 405B yielded relatively high-quality questions, achieving a maximum possible punctuation score of 73.5. The 70B model outperformed both smaller and larger variants on the classification part. The classifiers showed a strong bias toward labeling generated questions as Useful, despite limited validation. Further, our system, ranked 6 extsuperscriptth, out-performed baselines by 3%. These findings stress the effectiveness of large-sized models for question generation and medium-sized models for classification, and suggest the need for clearer task definitions within prompts to improve classification accuracy.</abstract>
       <url hash="f1411cb9">2025.argmining-1.34</url>
diff --git a/data/xml/2025.at4ssl.xml b/data/xml/2025.at4ssl.xml
index a7b821bfcd..ae1b7dc1b3 100644
--- a/data/xml/2025.at4ssl.xml
+++ b/data/xml/2025.at4ssl.xml
@@ -57,7 +57,7 @@
     <paper id="4">
       <title><fixed-case>P</fixed-case>a<fixed-case>SC</fixed-case>o1: A Parallel Video-<fixed-case>S</fixed-case>i<fixed-case>GML</fixed-case> <fixed-case>S</fixed-case>wiss <fixed-case>F</fixed-case>rench <fixed-case>S</fixed-case>ign <fixed-case>L</fixed-case>anguage Corpus in Medical Domain</title>
       <author><first>Bastien</first><last>David</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Jonathan</first><last>Mutal</last></author>
       <author><first>Irene</first><last>Strasly</last></author>
       <author><first>Johanna</first><last>Gerlach</last></author>
diff --git a/data/xml/2025.bea.xml b/data/xml/2025.bea.xml
index 71bb4cb0f1..64f7a74245 100644
--- a/data/xml/2025.bea.xml
+++ b/data/xml/2025.bea.xml
@@ -125,7 +125,7 @@
     <paper id="9">
       <title>Adapting <fixed-case>LLM</fixed-case>s for Minimal-edit Grammatical Error Correction</title>
       <author><first>Ryszard</first><last>Staruch</last><affiliation>Adam Mickiewicz University</affiliation></author>
-      <author><first>Filip</first><last>Gralinski</last><affiliation>Adam Mickiewicz University / Snowflake</affiliation></author>
+      <author id="filip-gralinski"><first>Filip</first><last>Gralinski</last><affiliation>Adam Mickiewicz University / Snowflake</affiliation></author>
       <author><first>Daniel</first><last>Dzienisiewicz</last><affiliation>Adam Mickiewicz University</affiliation></author>
       <pages>118-128</pages>
       <abstract>Decoder-only large language models have shown superior performance in the fluency-edit English Grammatical Error Correction, but their adaptation for minimal-edit English GEC is still underexplored. To improve their effectiveness in the minimal-edit approach, we explore the error rate adaptation topic and propose a novel training schedule method. Our experiments set a new state-of-the-art result for a single-model system on the BEA-test set. We also detokenize the most common English GEC datasets to match the natural way of writing text. During the process, we find that there are errors in them. Our experiments analyze whether training on detokenized datasets impacts the results and measure the impact of the usage of the datasets with corrected erroneous examples. To facilitate reproducibility, we have released the source code used to train our models.</abstract>
@@ -138,7 +138,7 @@
       <author><first>Zhengyuan</first><last>Liu</last><affiliation>Institute for Infocomm Research, A*STAR</affiliation></author>
       <author><first>Stella Xin</first><last>Yin</last><affiliation>Nanyang Technological University, Singapore</affiliation></author>
       <author><first>Dion Hoe-Lian</first><last>Goh</last><affiliation>Nanyang Technological University, Singapore</affiliation></author>
-      <author><first>Nancy</first><last>Chen</last><affiliation>Institute for Infocomm Research, A*STAR</affiliation></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last><affiliation>Institute for Infocomm Research, A*STAR</affiliation></author>
       <pages>129-143</pages>
       <abstract>While Generative AI has demonstrated strong potential and versatility in content generation, its application to educational contexts presents several challenges. Models often fail to align with curriculum standards and maintain grade-appropriate reading levels consistently. Furthermore, STEM education poses additional challenges in balancing scientific explanations with everyday language when introducing complex and abstract ideas and phenomena to younger students.In this work, we propose COGENT, a curriculum-oriented framework for generating grade-appropriate educational content. We incorporate three curriculum components (science concepts, core ideas, and learning objectives), control readability through length, vocabulary, and sentence complexity, and adopt a “wonder-based” approach to increase student engagement and interest. We conduct a multi-dimensional evaluation via both LLM-as-a-judge and human expert analysis. Experimental results show that COGENT consistently produces grade-appropriate passages that are comparable or superior to human references. Our work establishes a viable approach for scaling adaptive and high-quality learning resources.</abstract>
       <url hash="fec4f48d">2025.bea-1.10</url>
@@ -170,7 +170,7 @@
       <title>Automatic concept extraction for learning domain modeling: A weakly supervised approach using contextualized word embeddings</title>
       <author><first>Kordula</first><last>De Kuthy</last><affiliation>Universität Tübingen</affiliation></author>
       <author><first>Leander</first><last>Girrbach</last><affiliation>Universtät Tübingen</affiliation></author>
-      <author><first>Detmar</first><last>Meurers</last><affiliation>Leibniz-Institut für Wissensmedien (IWM)</affiliation></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last><affiliation>Leibniz-Institut für Wissensmedien (IWM)</affiliation></author>
       <pages>175-185</pages>
       <abstract>Heterogeneity in student populations poses achallenge in formal education, with adaptivetextbooks offering a potential solution by tai-loring content based on individual learner mod-els. However, creating domain models for text-books typically demands significant manual ef-fort. Recent work by Chau et al. (2021) demon-strated automated concept extraction from dig-ital textbooks, but relied on costly domain-specific manual annotations. This paper in-troduces a novel, scalable method that mini-mizes manual effort by combining contextu-alized word embeddings with weakly super-vised machine learning. Our approach clustersword embeddings from textbooks and identi-fies domain-specific concepts using a machinelearner trained on concept seeds automaticallyextracted from Wikipedia. We evaluate thismethod using 28 economics textbooks, com-paring its performance against a tf-idf baseline,a supervised machine learning baseline, theRAKE keyword extraction method, and humandomain experts. Results demonstrate that ourweakly supervised method effectively balancesaccuracy with reduced annotation effort, offer-ing a practical solution for automated conceptextraction in adaptive learning environments.</abstract>
       <url hash="e851038c">2025.bea-1.13</url>
@@ -267,7 +267,7 @@
       <title>Do <fixed-case>LLM</fixed-case>s Give Psychometrically Plausible Responses in Educational Assessments?</title>
       <author><first>Andreas</first><last>Säuberli</last><affiliation>LMU Munich</affiliation></author>
       <author><first>Diego</first><last>Frassinelli</last><affiliation>LMU</affiliation></author>
-      <author><first>Barbara</first><last>Plank</last><affiliation>LMU Munich</affiliation></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last><affiliation>LMU Munich</affiliation></author>
       <pages>266-278</pages>
       <abstract>Knowing how test takers answer items in educational assessments is essential for test development, to evaluate item quality, and to improve test validity. However, this process usually requires extensive pilot studies with human participants. If large language models (LLMs) exhibit human-like response behavior to test items, this could open up the possibility of using them as pilot participants to accelerate test development. In this paper, we evaluate the human-likeness or psychometric plausibility of responses from 18 instruction-tuned LLMs with two publicly available datasets of multiple-choice test items across three subjects: reading, U.S. history, and economics. Our methodology builds on two theoretical frameworks from psychometrics which are commonly used in educational assessment, classical test theory and item response theory. The results show that while larger models are excessively confident, their response distributions can be more human-like when calibrated with temperature scaling. In addition, we find that LLMs tend to correlate better with humans in reading comprehension items compared to other subjects. However, the correlations are not very strong overall, indicating that LLMs should not be used for piloting educational assessments in a zero-shot setting.</abstract>
       <url hash="75605eb0">2025.bea-1.21</url>
@@ -485,7 +485,7 @@
       <author><first>Bashar</first><last>Alhafni</last><affiliation>New York University</affiliation></author>
       <author><first>Kirill</first><last>Chirkunov</last><affiliation>MBZUAI</affiliation></author>
       <author><first>Nizar</first><last>Habash</last><affiliation>New York University Abu Dhabi</affiliation></author>
-      <author><first>Ted</first><last>Briscoe</last><affiliation>MBZUAI</affiliation></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last><affiliation>MBZUAI</affiliation></author>
       <pages>549-563</pages>
       <abstract>Automated Essay Scoring (AES) plays a crucial role in assessing language learners’ writingquality, reducing grading workload, and providing real-time feedback. The lack of annotatedessay datasets inhibits the development of Arabic AES systems. This paper leverages LargeLanguage Models (LLMs) and Transformermodels to generate synthetic Arabic essays forAES. We prompt an LLM to generate essaysacross the Common European Framework ofReference (CEFR) proficiency levels and introduce and compare two approaches to errorinjection. We create a dataset of 3,040 annotated essays with errors injected using our twomethods. Additionally, we develop a BERTbased Arabic AES system calibrated to CEFRlevels. Our experimental results demonstratethe effectiveness of our synthetic dataset in improving Arabic AES performance. We makeour code and data publicly available</abstract>
       <url hash="2c0ae99f">2025.bea-1.40</url>
@@ -581,7 +581,7 @@
     <paper id="48">
       <title>Lessons Learned in Assessing Student Reflections with <fixed-case>LLM</fixed-case>s</title>
       <author><first>Mohamed</first><last>Elaraby</last><affiliation>University of Pittsburgh</affiliation></author>
-      <author><first>Diane</first><last>Litman</last><affiliation>University of Pittsburgh</affiliation></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last><affiliation>University of Pittsburgh</affiliation></author>
       <pages>672-686</pages>
       <abstract>Advances in Large Language Models (LLMs) have sparked growing interest in their potential as explainable text evaluators. While LLMs have shown promise in assessing machine-generated texts in tasks such as summarization and machine translation, their effectiveness in evaluating human-written content—such as student writing in classroom settings—remains underexplored. In this paper, we investigate LLM-based specificity assessment of student reflections written in response to prompts, using three instruction-tuned models. Our findings indicate that although LLMs may underperform compared to simpler supervised baselines in terms of scoring accuracy, they offer a valuable interpretability advantage. Specifically, LLMs can generate user-friendly explanations that enhance the transparency and usability of automated specificity scoring systems.</abstract>
       <url hash="34f86e62">2025.bea-1.48</url>
@@ -646,7 +646,7 @@
     <paper id="54">
       <title>Improving In-context Learning Example Retrieval for Classroom Discussion Assessment with Re-ranking and Label Ratio Regulation</title>
       <author><first>Nhat</first><last>Tran</last><affiliation>University of Pittsburgh</affiliation></author>
-      <author><first>Diane</first><last>Litman</last><affiliation>University of Pittsburgh</affiliation></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last><affiliation>University of Pittsburgh</affiliation></author>
       <author><first>Benjamin</first><last>Pierce</last><affiliation>University of Pittsburgh</affiliation></author>
       <author><first>Richard</first><last>Correnti</last><affiliation>University of Pittsburgh</affiliation></author>
       <author><first>Lindsay Clare</first><last>Matsumura</last><affiliation>University of Pittsburgh</affiliation></author>
@@ -670,7 +670,7 @@
     <paper id="56">
       <title>Assessing Critical Thinking Components in <fixed-case>R</fixed-case>omanian Secondary School Textbooks: A Data Mining Approach to the <fixed-case>ROTEX</fixed-case> Corpus</title>
       <author><first>Madalina</first><last>Chitez</last><affiliation>West University of Timisoara</affiliation></author>
-      <author><first>Liviu</first><last>Dinu</last><affiliation>University of Bucharest</affiliation></author>
+      <author id="liviu-p-dinu"><first>Liviu</first><last>Dinu</last><affiliation>University of Bucharest</affiliation></author>
       <author><first>Marius</first><last>Micluta-Campeanu</last><affiliation>University of Bucharest</affiliation></author>
       <author><first>Ana-Maria</first><last>Bucur</last><affiliation>Interdisciplinary School of Doctoral Studies</affiliation></author>
       <author><first>Roxana</first><last>Rogobete</last><affiliation>West University of Timișoara</affiliation></author>
@@ -697,7 +697,7 @@
     <paper id="58">
       <title>Beyond Linear Digital Reading: An <fixed-case>LLM</fixed-case>-Powered Concept Mapping Approach for Reducing Cognitive Load</title>
       <author><first>Junzhi</first><last>Han</last><affiliation>Emory University</affiliation></author>
-      <author><first>Jinho D.</first><last>Choi</last><affiliation>Emory University</affiliation></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last><affiliation>Emory University</affiliation></author>
       <pages>805-817</pages>
       <abstract>This paper presents an LLM-powered approach for generating concept maps to enhance digital reading comprehension in higher education. While particularly focused on supporting neurodivergent students with their distinct information processing patterns, this approach benefits all learners facing the cognitive challenges of digital text. We use GPT-4o-mini to extract concepts and relationships from educational texts across ten diverse disciplines using open-domain prompts without predefined categories or relation types, enabling discipline-agnostic extraction. Section-level processing achieved higher precision (83.62%) in concept extraction, while paragraph-level processing demonstrated superior recall (74.51%) in identifying educationally relevant concepts. We implemented an interactive web-based visualization tool https://simplified-cognitext.streamlit.app that transforms extracted concepts into navigable concept maps. User evaluation (n=14) showed that participants experienced a 31.5% reduction in perceived cognitive load when using concept maps, despite spending more time with the visualization (22.6% increase). They also completed comprehension assessments more efficiently (14.1% faster) with comparable accuracy. This work demonstrates that LLM-based concept mapping can significantly reduce cognitive demands while supporting non-linear exploration.</abstract>
       <url hash="77e9b1f1">2025.bea-1.58</url>
@@ -883,7 +883,7 @@
       <title>A Framework for Proficiency-Aligned Grammar Practice in <fixed-case>LLM</fixed-case>-Based Dialogue Systems</title>
       <author><first>Luisa</first><last>Ribeiro-Flucht</last><affiliation>University of Tuebingen</affiliation></author>
       <author><first>Xiaobin</first><last>Chen</last><affiliation>Tübingen Universität</affiliation></author>
-      <author><first>Detmar</first><last>Meurers</last><affiliation>Leibniz-Institut für Wissensmedien (IWM)</affiliation></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last><affiliation>Leibniz-Institut für Wissensmedien (IWM)</affiliation></author>
       <pages>978-987</pages>
       <abstract>Communicative practice is critical for second language development, yet learners often lack targeted, engaging opportunities to use new grammar structures. While large language models (LLMs) can offer coherent interactions, they are not inherently aligned with pedagogical goals or proficiency levels. In this paper, we explore how LLMs can be integrated into a structured framework for contextually-constrained, grammar-focused interaction, building on an existing goal-oriented dialogue system. Through controlled simulations, we evaluate five LLMs across 75 A2-level tasks under two conditions: (i) grammar-targeted, task-anchored prompting and (ii) the addition of a lightweight post-generation validation pipeline using a grammar annotator.Our findings show that template-based prompting alone substantially increases target-form coverage up to 91.4% for LLaMA 3.1-70B-Instruct, while reducing overly advanced grammar usage. The validation pipeline provides an additional boost in form-focused tasks, raising coverage to 96.3% without significantly degrading appropriateness.</abstract>
       <url hash="e9e3086e">2025.bea-1.74</url>
@@ -1170,7 +1170,7 @@
       <title><fixed-case>NLIP</fixed-case> at <fixed-case>BEA</fixed-case> 2025 Shared Task: Evaluation of Pedagogical Ability of <fixed-case>AI</fixed-case> Tutors</title>
       <author><first>Trishita</first><last>Saha</last><affiliation>IIT Hyderabad</affiliation></author>
       <author><first>Shrenik</first><last>Ganguli</last><affiliation>IIT Hyderabad</affiliation></author>
-      <author><first>Maunendra Sankar</first><last>Desarkar</last><affiliation>IIT Hyderabad</affiliation></author>
+      <author id="maunendra-sankar-desarkar"><first>Maunendra Sankar</first><last>Desarkar</last><affiliation>IIT Hyderabad</affiliation></author>
       <pages>1242-1253</pages>
       <abstract>This paper describes the system created for the BEA 2025 Shared Task on Pedagogical Ability Assessment of AI-powered Tutors. The task aims to assess how well AI tutors identify and locate errors made by students, provide guidance and ensure actionability, among other features of their responses in educational dialogues. Transformer-based models, especially DeBERTa and RoBERTa, are improved by multitask learning, threshold tweaking, ordinal regression, and oversampling. The efficiency of pedagogically driven training methods and bespoke transformer models for evaluating AI tutor quality is demonstrated by the high performance of their best systems across all evaluation tracks.</abstract>
       <url hash="9a389b04">2025.bea-1.99</url>
diff --git a/data/xml/2025.bionlp.xml b/data/xml/2025.bionlp.xml
index eadf2861b0..9b820873b5 100644
--- a/data/xml/2025.bionlp.xml
+++ b/data/xml/2025.bionlp.xml
@@ -6,7 +6,7 @@
       <editor><first>Dina</first><last>Demner-Fushman</last></editor>
       <editor><first>Sophia</first><last>Ananiadou</last></editor>
       <editor><first>Makoto</first><last>Miwa</last></editor>
-      <editor><first>Junichi</first><last>Tsujii</last></editor>
+      <editor id="junichi-tsujii"><first>Junichi</first><last>Tsujii</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Viena, Austria</address>
       <month>August</month>
@@ -253,7 +253,7 @@
     <paper id="18">
       <title><fixed-case>Q</fixed-case>o<fixed-case>LAS</fixed-case>: A <fixed-case>R</fixed-case>eddit Corpus of Health-Related Quality of Life Aspects of Mental Disorders</title>
       <author><first>Lynn</first><last>Greschner</last><affiliation>University of Bamberg</affiliation></author>
-      <author><first>Amelie</first><last>Wührl</last><affiliation>University of Stuttgart</affiliation></author>
+      <author id="amelie-wuhrl"><first>Amelie</first><last>Wührl</last><affiliation>University of Stuttgart</affiliation></author>
       <author><first>Roman</first><last>Klinger</last><affiliation>University of Bamberg</affiliation></author>
       <pages>201-216</pages>
       <abstract>Quality of Life (QoL) refers to a person’s subjective perception of various aspects of their life. For medical practitioners, it is one of the most important concepts for treatment decisions. Therefore, it is essential to understand in which aspects a medical condition affects a patient’s subjective perception of their life. With this paper, we focus on the under-resourced domain of mental health-related QoL, and contribute the first corpus to study and model this concept: We (1) annotate 240 Reddit posts with a set of 11 QoL aspects (such as ‘independence’, ‘mood’, or ‘relationships’) and their sentiment polarity. Based on this novel corpus, we (2) evaluate a pipeline to detect QoL mentions and classify them into aspects using open-domain aspect-based sentiment analysis. We find that users frequently discuss health-related QoL in their posts, focusing primarily on the aspects ‘relationships’ and ‘selfimage’. Our method reliably predicts such mentions and their sentiment, however, detecting fine-grained individual aspects remains challenging. An analysis of a large corpus of automatically labeled data reveals that social media content contains novel aspects pertinent to patients that are not covered by existing QoL taxonomies.</abstract>
@@ -285,7 +285,7 @@
       <author><first>João</first><last>Ruano</last><affiliation>Priberam</affiliation></author>
       <author><first>Gonçalo</first><last>Correia</last><affiliation>Priberam</affiliation></author>
       <author><first>Leonor</first><last>Barreiros</last><affiliation>Priberam</affiliation></author>
-      <author><first>Afonso</first><last>Mendes</last><affiliation>Priberam Informática, SA.</affiliation></author>
+      <author id="alfonso-mendes"><first>Afonso</first><last>Mendes</last><affiliation>Priberam Informática, SA.</affiliation></author>
       <pages>225-239</pages>
       <abstract>Biomedical Named Entity Recognition presents significant challenges due to the complexity of biomedical terminology and inconsistencies in annotation across datasets. This paper introduces SRU-NER (Slot-based Recurrent Unit NER), a novel approach designed to handle nested named entities while integrating multiple datasets through an effective multi-task learning strategy. SRU-NER mitigates annotation gaps by dynamically adjusting loss computation to avoid penalizing predictions of entity types absent in a given dataset. Through extensive experiments, including a cross-corpus evaluation and human assessment of the model’s predictions, SRU-NER achieves competitive performance in biomedical and general-domain NER tasks, while improving cross-domain generalization.</abstract>
       <url hash="e7c7ddc6">2025.bionlp-1.20</url>
@@ -346,7 +346,7 @@
     <paper id="25">
       <title>Beyond Citations: Integrating Finding-Based Relations for Improved Biomedical Article Representations</title>
       <author><first>Yuan</first><last>Liang</last><affiliation>Queen Mary University of London</affiliation></author>
-      <author><first>Massimo</first><last>Poesio</last><affiliation>Queen Mary University of London and University of Utrecht</affiliation></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last><affiliation>Queen Mary University of London and University of Utrecht</affiliation></author>
       <author><first>Roonak</first><last>Rezvani</last><affiliation>Recursion</affiliation></author>
       <pages>297-306</pages>
       <abstract>High-quality scientific article embeddings are essential for tasks like document retrieval, citation recommendation, and classification. Traditional citation-based approaches assume citations reflect semantic similarity—an assumption that introduces bias and noise. Recent models like SciNCL and SPECTER2 have attempted to refine citation-based representations but still struggle with noisy citation edges and fail to fully leverage textual information. To address these limitations, we propose a hybrid approach that combines Finding-Citation Graphs (FCG) with contrastive learning. Our method improves triplet selection by filtering out less important citations and incorporating finding similarity relations, leading to better semantic relationship capture. Evaluated on the SciRepEval benchmark, our approach consistently outperforms citation-only baselines, showing the value of text-based semantic structures. While we do not surpass state-of-the-art models in most tasks, our results reveal the limitations of purely citation-based embeddings and suggest paths for improvement through enhanced semantic integration and domain-specific adaptations.</abstract>
@@ -358,8 +358,8 @@
     <paper id="26">
       <title>Converting Annotated Clinical Cases into Structured Case Report Forms</title>
       <author><first>Pietro</first><last>Ferrazzi</last><affiliation>University of Padova</affiliation></author>
-      <author><first>Alberto</first><last>Lavelli</last><affiliation>FBK</affiliation></author>
-      <author><first>Bernardo</first><last>Magnini</last><affiliation>FBK</affiliation></author>
+      <author id="alberto-lavelli"><first>Alberto</first><last>Lavelli</last><affiliation>FBK</affiliation></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last><affiliation>FBK</affiliation></author>
       <pages>307-318</pages>
       <abstract>Case Report Forms (CRFs) are largely used in medical research as they ensure accuracy, reliability, and validity of results in clinical studies. However, publicly available, well-annotated CRF datasets are scarce, limiting the development of CRF slot filling systems able to fill in a CRF from clinical notes. To mitigate the scarcity of CRF datasets, we propose to take advantage of available datasets annotated for information extraction tasks and to convert them into structured CRFs. We present a semi-automatic conversion methodology, which has been applied to the E3C dataset in two languages (English and Italian), resulting in a new, high-quality dataset for CRF slot filling. Through several experiments on the created dataset, we report that slot filling achieves 59.7% for Italian and 67.3% for English on a closed Large Language Models (zero-shot) and worse performances on three families of open-source models, showing that filling CRFs is challenging even for recent state-of-the-art LLMs.</abstract>
       <url hash="bb3e2e37">2025.bionlp-1.26</url>
@@ -385,9 +385,9 @@
       <title>Overcoming Data Scarcity in Named Entity Recognition: Synthetic Data Generation with Large Language Models</title>
       <author><first>An</first><last>Dao</last><affiliation>The University of Tokyo</affiliation></author>
       <author><first>Hiroki</first><last>Teranishi</last><affiliation>RIKEN Center for Advanced Intelligence Project</affiliation></author>
-      <author><first>Yuji</first><last>Matsumoto</last><affiliation>Riken Center for Advanced Intelligence Project</affiliation></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last><affiliation>Riken Center for Advanced Intelligence Project</affiliation></author>
       <author><first>Florian</first><last>Boudin</last><affiliation>Nantes University</affiliation></author>
-      <author><first>Akiko</first><last>Aizawa</last><affiliation>National Institute of Informatics</affiliation></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last><affiliation>National Institute of Informatics</affiliation></author>
       <pages>328-340</pages>
       <abstract>Named Entity Recognition (NER) is crucial for extracting domain-specific entities from text, particularly in biomedical and chemical fields. Developing high-quality NER models in specialized domains is challenging due to the limited availability of annotated data, with manual annotation being a key method of data construction. However, manual annotation is time-consuming and requires domain expertise, making it difficult in specialized domains. Traditional data augmentation (DA) techniques also rely on annotated data to some extent, further limiting their effectiveness. In this paper, we propose a novel approach to synthetic data generation for NER using large language models (LLMs) to generate sentences based solely on a set of example entities. This method simplifies the augmentation process and is effective even with a limited set of entities.We evaluate our approach using BERT-based models on the BC4CHEMD, BC5CDR, and TDMSci datasets, demonstrating that synthetic data significantly improves model performance and robustness, particularly in low-resource settings. This work provides a scalable solution for enhancing NER in specialized domains, overcoming the limitations of manual annotation and traditional augmentation methods.</abstract>
       <url hash="e346e1f9">2025.bionlp-1.28</url>
@@ -538,7 +538,7 @@
     <paper id="3">
       <title>Loyola at <fixed-case>A</fixed-case>rch<fixed-case>EHR</fixed-case>-<fixed-case>QA</fixed-case> 2025: Exploring Unsupervised Attribution of Generated Text: Attention and Clustering-Based Methods</title>
       <author><first>Rohan</first><last>Sethi</last><affiliation>Loyola University Chicago and Stritch School of Medicine</affiliation></author>
-      <author><first>Timothy</first><last>Miller</last><affiliation>Boston Children’s Hospital, Harvard Medical School</affiliation></author>
+      <author id="timothy-miller"><first>Timothy</first><last>Miller</last><affiliation>Boston Children’s Hospital, Harvard Medical School</affiliation></author>
       <author><first>Majid</first><last>Afshar</last><affiliation>University of Wisconsin-Madison</affiliation></author>
       <author><first>Dmitriy</first><last>Dligach</last><affiliation>Loyola University Chicago</affiliation></author>
       <pages>22-26</pages>
@@ -722,7 +722,7 @@
       <author><first>András</first><last>Szlúka</last><affiliation>University of Szeged</affiliation></author>
       <author><first>Gábor</first><last>Kőrösi</last><affiliation>University of Szeged</affiliation></author>
       <author><first>Zsolt</first><last>Szántó</last><affiliation>University of Szeged</affiliation></author>
-      <author><first>Richárd</first><last>Farkas</last><affiliation>University of Szeged</affiliation></author>
+      <author id="richard-farkas"><first>Richárd</first><last>Farkas</last><affiliation>University of Szeged</affiliation></author>
       <pages>136-149</pages>
       <abstract>In this paper, we present the SzegedAI team’s submissions to the ArchEHR-QA 2025 shared task. Our approaches include multiple prompting techniques for large language models (LLMs), sentence similarity methods, and traditional feature engineering. We are aiming to explore both modern and traditional solutions to the task. To combine the strengths of these diverse methods, we employed different ensembling strategies.</abstract>
       <url hash="a8298331">2025.bionlp-share.17</url>
@@ -839,7 +839,7 @@
       <author><first>Arshitha</first><last>Basavaraj</last><affiliation>International Institute of Information Technology, Bangalore, India</affiliation></author>
       <author><first>Hugo</first><last>Alatrista-Salas</last><affiliation>De Vinci Research Center, Paris, France</affiliation></author>
       <author><first>Francisco</first><last>Pereira</last><affiliation>National Institute of Mental Health</affiliation></author>
-      <author><first>Diana</first><last>Inkpen</last><affiliation>University of Ottawa</affiliation></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last><affiliation>University of Ottawa</affiliation></author>
       <pages>215-231</pages>
       <abstract>In this work, we present our approach to addressing all subtasks of the BioLaySumm 2025 shared task by leveraging prompting and retrieval strategies, as well as multimodal input fusion. Our method integrates: (1) zero-shot and few-shot prompting with large language models (LLMs); (2) semantic similarity-based dynamic few-shot prompting; (3) retrieval-augmented generation (RAG) incorporating biomedical knowledge from the Unified Medical Language System (UMLS); and (4) a multimodal fusion pipeline that combines images and captions using image-text-to-text generation for enriched lay summarization. Our framework enables lightweight adaptation of pretrained LLMs for generating lay summaries from scientific articles and radiology reports. Using modern LLMs, including Llama-3.3-70B-Instruct and GPT-4.1, our 5cNLP team achieved third place in Subtask 1.2 and second place in Subtask 2.1, among all submissions.</abstract>
       <url hash="a6c9f792">2025.bionlp-share.27</url>
diff --git a/data/xml/2025.bsnlp.xml b/data/xml/2025.bsnlp.xml
index f5e0f95c95..aba4a67d66 100644
--- a/data/xml/2025.bsnlp.xml
+++ b/data/xml/2025.bsnlp.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the 10th Workshop on Slavic Natural Language Processing (Slavic NLP 2025)</booktitle>
       <editor><first>Jakub</first><last>Piskorski</last></editor>
       <editor><first>Pavel</first><last>Přibáň</last></editor>
-      <editor><first>Preslav</first><last>Nakov</last></editor>
+      <editor id="preslav-nakov"><first>Preslav</first><last>Nakov</last></editor>
       <editor><first>Roman</first><last>Yangarber</last></editor>
       <editor><first>Michal</first><last>Marcinczuk</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -249,7 +249,7 @@
       <author><first>Chuhan</first><last>Wang</last><affiliation>Dalian University of Technology</affiliation></author>
       <author><first>Dailin</first><last>Li</last><affiliation>Dalian University of Technology</affiliation></author>
       <author><first>Yanan</first><last>Wang</last><affiliation>Dalian University of Technology</affiliation></author>
-      <author id="jian-wang"><first>Jian</first><last>Wang</last><affiliation>Dalian University of Technology</affiliation></author>
+      <author><first>Jian</first><last>Wang</last><affiliation>Dalian University of Technology</affiliation></author>
       <author><first>Hongfei</first><last>Lin</last><affiliation>Dalian University of Technology</affiliation></author>
       <pages>177-182</pages>
       <abstract>This paper presents our submission to Subtask 2 (multi-label classification of persuasion techniques) of the Shared Task on Detection and Classification of Persuasion Techniques in Slavic Languages at SlavNLP 2025. Our method leverages a teacher–student framework based on large language models (LLMs): a Qwen3 32B teacher model generates natural language explanations for annotated persuasion techniques, and a Qwen2.5 32B student model is fine-tuned to replicate both the teacher’s rationales and the final label predictions. We train our models on the official shared task dataset, supplemented by annotated resources from SemEval 2023 Task 3 and CLEF 2024 Task 3 covering English, Russian, and Polish to improve cross-lingual robustness. Our final system ranks 4th on BG, SI, and HR, and 5th on PL in terms of micro-F1 score among all participating teams.</abstract>
diff --git a/data/xml/2025.bucc.xml b/data/xml/2025.bucc.xml
index 134497b3c1..d87db5b26e 100644
--- a/data/xml/2025.bucc.xml
+++ b/data/xml/2025.bucc.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the 18th Workshop on Building and Using Comparable Corpora (BUCC)</booktitle>
       <editor><first>Serge</first><last>Sharoff</last></editor>
       <editor><first>Ayla Rigouts</first><last>Terryn</last></editor>
-      <editor><first>Pierre</first><last>Zweigenbaum</last></editor>
+      <editor id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></editor>
       <editor><first>Reinhard</first><last>Rapp</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Abu Dhabi, UAE</address>
@@ -39,7 +39,7 @@
     </paper>
     <paper id="3">
       <title>Towards Truly Open, Language-Specific, Safe, Factual, and Specialized Large Language Models</title>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>18</pages>
       <abstract>First, we will argue for the need for fully transparent open-source large language models (LLMs), and we will describe the efforts of MBZUAI’s Institute on Foundation Models (IFM) towards that based on the LLM360 initiative. Second, we will argue for the need for language-specific LLMs, and we will share our experience from building Jais, the world’s leading open Arabic-centric foundation and instruction-tuned large language model, Nanda, our recently released open Hindi LLM, and some other models. Third, we will argue for the need for safe LLMs, and we will present Do-Not-Answer, a dataset for evaluating the guardrails of LLMs, which is at the core of the safety mechanisms of our LLMs. Forth, we will argue for the need for factual LLMs, we will discuss the factuality challenges that LLMs pose. We will then present some recent relevant tools for addressing these challenges developed at MBZUAI: (i) OpenFactCheck, a framework for fact-checking LLM output, for building customized fact-checking systems, and for benchmarking LLMs for factuality, (ii) LM-Polygraph, a tool for predicting an LLM’s uncertainty in its output using cheap and fast uncertainty quantification techniques, and (iii) LLM-DetectAIve, a tool for machine-generated text detection. Finally, we will argue for the need for specialized models, and we will present the zoo of LLMs currently being developed at MBZUAI’s IFM.</abstract>
       <url hash="9e1099ae">2025.bucc-1.3</url>
@@ -59,7 +59,7 @@
       <title><fixed-case>BEIR</fixed-case>-<fixed-case>NL</fixed-case>: Zero-shot Information Retrieval Benchmark for the <fixed-case>D</fixed-case>utch Language</title>
       <author><first>Ehsan</first><last>Lotfi</last></author>
       <author><first>Nikolay</first><last>Banar</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>36–45</pages>
       <abstract>Zero-shot evaluation of information retrieval (IR) models is often performed using BEIR; a large and heterogeneous benchmark composed of multiple datasets, covering different retrieval tasks across various domains. Although BEIR has become a standard benchmark for the zero-shot setup, its exclusively English content reduces its utility for underrepresented languages in IR, including Dutch. To address this limitation and encourage the development of Dutch IR models, we introduce BEIR-NL by automatically translating the publicly accessible BEIR datasets into Dutch. Using BEIR-NL, we evaluated a wide range of multilingual dense ranking and reranking models, as well as the lexical BM25 method. Our experiments show that BM25 remains a competitive baseline, and is only outperformed by the larger dense models trained for retrieval. When combined with reranking models, BM25 achieves performance on par with the best dense ranking models. In addition, we explored the impact of translation on the data by back-translating a selection of datasets to English, and observed a performance drop for both dense and lexical methods, indicating the limitations of translation for creating benchmarks. BEIR-NL is publicly available on the Hugging Face hub.</abstract>
       <url hash="20700e6b">2025.bucc-1.5</url>
@@ -79,7 +79,7 @@
     </paper>
     <paper id="7">
       <title>The Role of Handling Attributive Nouns in Improving <fixed-case>C</fixed-case>hinese-To-<fixed-case>E</fixed-case>nglish Machine Translation</title>
-      <author><first>Adam</first><last>Meyers</last></author>
+      <author id="adam-meyers"><first>Adam</first><last>Meyers</last></author>
       <author><first>Rodolfo Joel</first><last>Zevallos</last></author>
       <author><first>John E.</first><last>Ortega</last></author>
       <author><first>Lisa</first><last>Wang</last></author>
@@ -94,7 +94,7 @@
       <author><first>Borja</first><last>Herce</last></author>
       <author><first>Demian</first><last>Inostroza Améstica</last></author>
       <author><first>Andreas</first><last>Scherbakov</last></author>
-      <author><first>Eduard H.</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard H.</first><last>Hovy</last></author>
       <author><first>Ekaterina</first><last>Vylomova</last></author>
       <pages>62–72</pages>
       <abstract>Linguistic fieldwork is an important component in language documentation and the creation of comprehensive linguistic corpora. Despite its significance, the process is often lengthy, exhaustive, and time-consuming. This paper presents a novel model that guides a linguist during the fieldwork and accounts for the dynamics of linguist-speaker interactions. We introduce a novel framework that evaluates the efficiency of various sampling strategies for obtaining morphological data and assesses the effectiveness of state-of-the-art neural models in generalising morphological structures. Our experiments highlight two key strategies for improving the efficiency: (1) increasing the diversity of annotated data by uniform sampling among the cells of the paradigm tables, and (2) using model confidence as a guide to enhance positive interaction by providing reliable predictions during annotation.</abstract>
@@ -103,7 +103,7 @@
     </paper>
     <paper id="9">
       <title>Comparable Corpora: Opportunities for New Research Directions</title>
-      <author><first>Kenneth Ward</first><last>Church</last></author>
+      <author id="kenneth-church"><first>Kenneth Ward</first><last>Church</last></author>
       <pages>73–82</pages>
       <abstract>Most conference papers present new results, but this paper will focus more on opportunities for the audience to make their own contributions. This paper is intended to challenge the community to think more broadly about what we can do with comparable corpora. We will start with a review of the history, and then suggest new directions for future research.</abstract>
       <url hash="97658e06">2025.bucc-1.9</url>
@@ -114,7 +114,7 @@
       <author><first>Manon</first><last>Scholivet</last></author>
       <author><first>Agata</first><last>Savary</last></author>
       <author><first>Louis</first><last>Estève</last></author>
-      <author><first>Marie</first><last>Candito</last></author>
+      <author id="marie-candito"><first>Marie</first><last>Candito</last></author>
       <author><first>Carlos</first><last>Ramisch</last></author>
       <pages>83–98</pages>
       <abstract>The annotation of large text corpora is essential for many tasks. We present here a large automatically annotated corpus for French. This corpus is separated into two parts: the first from BigScience, and the second from HPLT. The annotated documents from HPLT were selected in order to optimise the lexical diversity of the final corpus SELEXINI. An analysis of the impact of this selection was carried out on syntactic diversity, as well as on the quality of the new words resulting from the HPLT part of SELEXINI. We have shown that despite the introduction of interesting new words, the texts extracted from HPLT are very noisy. Furthermore, increasing lexical diversity did not increase syntactic diversity.</abstract>
diff --git a/data/xml/2025.c3nlp.xml b/data/xml/2025.c3nlp.xml
index 302fa122fc..dd849b5177 100644
--- a/data/xml/2025.c3nlp.xml
+++ b/data/xml/2025.c3nlp.xml
@@ -60,7 +60,7 @@
       <title><fixed-case>I</fixed-case>nsp<fixed-case>AI</fixed-case>red: Cross-cultural Inspiration Detection and Analysis in Real and <fixed-case>LLM</fixed-case>-generated Social Media Data</title>
       <author><first>Oana</first><last>Ignat</last><affiliation>Santa Clara University</affiliation></author>
       <author><first>Gayathri Ganesh</first><last>Lakshmy</last></author>
-      <author><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
       <pages>35-49</pages>
       <abstract>Inspiration is linked to various positive outcomes, such as increased creativity, productivity, and happiness. Although inspiration has great potential, there has been limited effort toward identifying content that is inspiring, as opposed to just engaging or positive. Additionally, most research has concentrated on Western data, with little attention paid to other cultures. This work is the first to study cross-cultural inspiration through machine learning methods. We aim to identify and analyze real and AI-generated cross-cultural inspiring posts. To this end, we compile and make publicly available the InspAIred dataset, which consists of 2,000 real inspiring posts, 2,000 real non-inspiring posts, and 2,000 generated inspiring posts evenly distributed across India and the UK. The real posts are sourced from Reddit, while the generated posts are created using the GPT-4 model. Using this dataset, we conduct extensive computational linguistic analyses to (1) compare inspiring content across cultures, (2) compare AI-generated inspiring posts to real inspiring posts, and (3) determine if detection models can accurately distinguish between inspiring content across cultures and data sources.</abstract>
       <url hash="1bd7f580">2025.c3nlp-1.4</url>
@@ -83,7 +83,7 @@
     <paper id="6">
       <title><fixed-case>K</fixed-case>orean Stereotype Content Model: Translating Stereotypes Across Cultures</title>
       <author><first>Michelle YoungJin</first><last>Kim</last><affiliation>Michigan State University</affiliation></author>
-      <author><first>Kristen</first><last>Johnson</last><affiliation>Michigan State University</affiliation></author>
+      <author id="kristen-johnson"><first>Kristen</first><last>Johnson</last><affiliation>Michigan State University</affiliation></author>
       <pages>59-70</pages>
       <abstract>To address bias in language models, researchers are leveraging established social psychology research on stereotyping. This interdisciplinary approach uses frameworks like the Stereotype Content Model (SCM) to understand how stereotypes about social groups are formed and perpetuated. The SCM posits that stereotypes are based on two dimensions: warmth (intent to harm) and competence (ability to harm). This framework has been applied in NLP for various tasks, including stereotype identification, bias mitigation, and hate speech detection. While the SCM has been extensively studied in English language models and Western cultural contexts, its applicability as a cross-cultural measure of stereotypes remains an open research question. This paper explores the cross-cultural validity of the SCM by developing a Korean Stereotype Content Model (KoSCM). We create a Korean warmth-competence lexicon through machine translation of existing English lexicons, validated by an expert translator, and utilize this lexicon to develop a labeled training dataset of Korean sentences. This work presents the first extension of SCM lexicons to a non-English language (Korean), aiming to broaden understanding of stereotypes and cultural dynamics.</abstract>
       <url hash="061af696">2025.c3nlp-1.6</url>
@@ -96,7 +96,7 @@
       <author><first>Seogyeong</first><last>Jeong</last><affiliation>Korea Advanced Institute of Science &amp; Technology</affiliation></author>
       <author><first>Seyoung</first><last>Song</last><affiliation>KAIST</affiliation></author>
       <author><first>Yohan</first><last>Lee</last><affiliation>Electronics and Telecommunications Research Institute</affiliation></author>
-      <author><first>Alice</first><last>Oh</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
       <pages>71-88</pages>
       <abstract>Content moderation platforms concentrate resources on English content despite serving predominantly non-English speaking users.Also, given the scarcity of native moderators for low-resource languages, non-native moderators must bridge this gap in moderation tasks such as hate speech moderation.Through a user study, we identify that non-native moderators struggle with understanding culturally-specific knowledge, sentiment, and internet culture in the hate speech.To assist non-native moderators, we present LLM-C3MOD, a human-LLM collaborative pipeline with three steps: (1) RAG-enhanced cultural context annotations; (2) initial LLM-based moderation; and (3) targeted human moderation for cases lacking LLM consensus.Evaluated on Korean hate speech dataset with Indonesian and German participants, our system achieves 78% accuracy (surpassing GPT-4o’s 71% baseline) while reducing human workload by 83.6%.In addition, cultural context annotations improved non-native moderator accuracy from 22% to 61%, with humans notably excelling at nuanced tasks where LLMs struggle.Our findings demonstrate that non-native moderators, when properly supported by LLMs, can effectively contribute to cross-cultural hate speech moderation.</abstract>
       <url hash="39f57874">2025.c3nlp-1.7</url>
@@ -117,7 +117,7 @@
       <title>Towards Region-aware Bias Evaluation Metrics</title>
       <author><first>Angana</first><last>Borah</last></author>
       <author><first>Aparna</first><last>Garimella</last><affiliation>Adobe Research</affiliation></author>
-      <author><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
       <pages>108-131</pages>
       <abstract>When exposed to human-generated data, language models are known to learn and amplify societal biases. While previous works introduced metrics that can be used to assess the bias in these models, they rely on assumptions that may not be universally true. For instance, a gender bias dimension commonly used by these metrics is that of family–career, but this may not be the only common bias in certain regions of the world. In this paper, we identify topical differences in gender bias across different regions and propose a region-aware bottom-up approach for bias assessment. Several of our proposed region-aware gender bias dimensions are found to be aligned with the human perception of gender biases in these regions.</abstract>
       <url hash="9918b89a">2025.c3nlp-1.9</url>
@@ -133,8 +133,8 @@
       <author><first>Young Min</first><last>Cho</last><affiliation>University of Pennsylvania</affiliation></author>
       <author><first>Maitreyi</first><last>Redkar</last></author>
       <author><first>Samindara</first><last>Hardikar-Sawant</last><affiliation>Shri Jagdishprasad Jhabarmal Tibrewala University</affiliation></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
-      <author><first>Sharath Chandra</first><last>Guntuku</last><affiliation>University of Pennsylvania</affiliation></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
+      <author id="sharath-chandra-guntuku"><first>Sharath Chandra</first><last>Guntuku</last><affiliation>University of Pennsylvania</affiliation></author>
       <pages>132-142</pages>
       <abstract>Culture moderates the way individuals perceive and express mental distress. Current understandings of mental health expressions on social media, however, are predominantly derived from WEIRD (Western, Educated, Industrialized, Rich, and Democratic) contexts. To address this gap, we examine mental health posts on Reddit made by individuals geolocated in India, to identify variations in social media language specific to the Indian context compared to users from Western nations. Our experiments reveal significant psychosocial variations in emotions and temporal orientation. This study demonstrates the potential of social media platforms for identifying cross-cultural differences in mental health expressions (e.g. seeking advice in India vs seeking support by Western users). Significant linguistic variations in online mental health-related language emphasize the importance of developing precision-targeted interventions that are culturally appropriate.</abstract>
       <url hash="5a019f51">2025.c3nlp-1.10</url>
@@ -151,7 +151,7 @@
       <author><first>Huzama</first><last>Ahmad</last><affiliation>Korea Advanced Institute of Science &amp; Technology</affiliation></author>
       <author><first>Na Min</first><last>An</last><affiliation>KAIST</affiliation></author>
       <author><first>James</first><last>Thorne</last><affiliation>KAIST</affiliation></author>
-      <author><first>Alice</first><last>Oh</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
       <pages>143-154</pages>
       <abstract>In a highly globalized world, it is important for multi-modal large language models (MLLMs) to recognize and respond correctly to mixed-cultural inputs.For example, a model should correctly identify kimchi (Korean food) in an image both when an Asian woman is eating it, as well as an African man is eating it.However, current MLLMs show an over-reliance on the visual features of the person, leading to misclassification of the entities. To examine the robustness of MLLMs to different ethnicity, we introduce MIXCUBE, a cross-cultural bias benchmark, and study elements from five countries and four ethnicities. Our findings reveal that MLLMs achieve both higher accuracy and lower sensitivity to such perturbation for high-resource cultures, but not for low-resource cultures. GPT-4o, the best-performing model overall, shows up to 58% difference in accuracy between the original and perturbed cultural settings in low-resource cultures</abstract>
       <url hash="d78640b2">2025.c3nlp-1.11</url>
diff --git a/data/xml/2025.calcs.xml b/data/xml/2025.calcs.xml
index 41d31e1d76..33679a8af9 100644
--- a/data/xml/2025.calcs.xml
+++ b/data/xml/2025.calcs.xml
@@ -3,14 +3,14 @@
   <volume id="1" ingest-date="2025-04-26" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 7th Workshop on Computational Approaches to Linguistic Code-Switching</booktitle>
-      <editor><first>Genta Indra</first><last>Winata</last></editor>
+      <editor id="genta-indra-winata"><first>Genta Indra</first><last>Winata</last></editor>
       <editor><first>Sudipta</first><last>Kar</last></editor>
       <editor><first>Marina</first><last>Zhukova</last></editor>
       <editor><first>Thamar</first><last>Solorio</last></editor>
       <editor><first>Xi</first><last>Ai</last></editor>
       <editor><first>Injy</first><last>Hamed</last></editor>
       <editor><first>Mahardika Krisna Krisna</first><last>Ihsani</last></editor>
-      <editor><first>Derry Tanti</first><last>Wijaya</last></editor>
+      <editor id="derry-tanti-wijaya"><first>Derry Tanti</first><last>Wijaya</last></editor>
       <editor><first>Garry</first><last>Kuwanto</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Albuquerque, New Mexico, USA</address>
@@ -31,7 +31,7 @@
       <title><fixed-case>E</fixed-case>uskañol<fixed-case>DS</fixed-case>: A Naturally Sourced Corpus for <fixed-case>B</fixed-case>asque-<fixed-case>S</fixed-case>panish Code-Switching</title>
       <author><first>Maite</first><last>Heredia</last><affiliation>HiTZ Center - Ixa, University of the Basque Country UPV/EHU</affiliation></author>
       <author><first>Jeremy</first><last>Barnes</last><affiliation>HiTZ Center - Ixa, University of the Basque Country UPV/EHU</affiliation></author>
-      <author><first>Aitor</first><last>Soroa</last><affiliation>HiTZ Center - Ixa, University of the Basque Country UPV/EHU</affiliation></author>
+      <author id="aitor-soroa"><first>Aitor</first><last>Soroa</last><affiliation>HiTZ Center - Ixa, University of the Basque Country UPV/EHU</affiliation></author>
       <pages>1-5</pages>
       <abstract>Code-switching (CS) remains a significant challenge in Natural Language Processing (NLP), mainly due a lack of relevant data. In the context of the contact between the Basque and Spanish languages in the north of the Iberian Peninsula, CS frequently occurs in both formal and informal spontaneous interactions. However, resources to analyse this phenomenon and support the development and evaluation of models capable of understanding and generating code-switched language for this language pair are almost non-existent. We introduce a first approach to develop a naturally sourced corpus for Basque-Spanish code-switching. Our methodology consists of identifying CS texts from previously available corpora using language identification models, which are then manually validated to obtain a reliable subset of CS instances. We present the properties of our corpus and make it available under the name EuskañolDS.</abstract>
       <url hash="43d74985">2025.calcs-1.1</url>
diff --git a/data/xml/2025.cgmta.xml b/data/xml/2025.cgmta.xml
index dea0f5a823..e0b9875ddd 100644
--- a/data/xml/2025.cgmta.xml
+++ b/data/xml/2025.cgmta.xml
@@ -20,7 +20,7 @@
     </frontmatter>
     <paper id="1">
       <title>An Annotated Error Corpus for <fixed-case>E</fixed-case>speranto</title>
-      <author><first>Eckhard</first><last>Bick</last></author>
+      <author id="eckhard-bick"><first>Eckhard</first><last>Bick</last></author>
       <pages>1–8</pages>
       <abstract>This paper presents and evaluates a new multi-genre error corpus for (written) Esperanto, EspEraro, building on both learner, news and internet data and covering both ordinary spelling errors and real-word errors such as grammatical and word choice errors. Because the corpus has been annotated not only for errors, error types and corrections, but also with Constraint Grammar (CG) tags for part-of-speech, inflection, affixation, syntactic function, dependency and semantic class, it allows users to linguistically contextualize errors and to craft and test CG rules aiming at the recognition and/or correction of the various error types covered in the corpus. The resource was originally created for regression-testing a newly developed spell- and grammar checker, and contains about 75,000 tokens ( 4,000 sentences), with 3,330 tokens annotated for one or more errors and a combined correction suggestion. We discuss the different error types and evaluate their weight in the corpus. Where relevant, we explain the role of Constraint Grammar (CG) in the identification and correction of the individual error types.</abstract>
       <url hash="4eb78af7">2025.cgmta-1.1</url>
@@ -45,7 +45,7 @@
     </paper>
     <paper id="4">
       <title>Towards Natural Language Explanations of Constraint Grammar Rules</title>
-      <author><first>Daniel</first><last>Swanson</last></author>
+      <author id="daniel-g-swanson"><first>Daniel</first><last>Swanson</last></author>
       <pages>28–31</pages>
       <abstract>This paper presents a general-purpose parser for static analysis of Constraint Grammar rules (that is, examining only the rules, not potential inputs and outputs) and applies it to the task of translating rules into comprehensible explanations of behavior. An interactive interface for exploring how individual components of each rule contribute to these translations is also presented.</abstract>
       <url hash="cb14e0fb">2025.cgmta-1.4</url>
@@ -90,7 +90,7 @@
     <paper id="9">
       <title><fixed-case>D</fixed-case>ivvunspell—<fixed-case>F</fixed-case>inite-State Spell-Checking and Correction on Modern Platforms</title>
       <author><first>Flammie A</first><last>Pirinen</last></author>
-      <author><first>Sjur Nørstebø</first><last>Moshagen</last></author>
+      <author id="sjur-moshagen"><first>Sjur Nørstebø</first><last>Moshagen</last></author>
       <pages>59–63</pages>
       <abstract>Spell-checking and correction is one of the key applications of natural language support. Historically, for the biggest, less morphologically complex languages, spell-checking and correction could be implemented by relatively simple means; however, for morphologically complex and low-resource languages, the solutions were often suboptimal. Finite-state methods are the state of the art in rule-based natural language processing and also for spell-checking and correction they have been effectively used. In this article, we show some recent developments of a finite-state spell-checker implementation that works with modern operating systems and platforms.</abstract>
       <url hash="01425cdb">2025.cgmta-1.9</url>
diff --git a/data/xml/2025.chipsal.xml b/data/xml/2025.chipsal.xml
index d4e0cec263..ca523f60be 100644
--- a/data/xml/2025.chipsal.xml
+++ b/data/xml/2025.chipsal.xml
@@ -241,7 +241,7 @@
     <paper id="21">
       <title>Leveraging Machine-Generated Data for Joint Intent Detection and Slot Filling in <fixed-case>B</fixed-case>angla: A Resource-Efficient Approach</title>
       <author><first>A H M Rezaul</first><last>Karim</last></author>
-      <author><first>Özlem</first><last>Uzuner</last></author>
+      <author id="ozlem-uzuner"><first>Özlem</first><last>Uzuner</last></author>
       <pages>208–216</pages>
       <abstract>Natural Language Understanding (NLU) is crucial for conversational AI, yet low-resource languages lag behind in essential tasks like intent detection and slot-filling. To address this gap, we converted the widely-used English SNIPS dataset to Bangla using LLaMA 3, creating a dataset that captures the linguistic complexities of the language. With this translated dataset for model training, our experimental evaluation compares both independent and joint modeling approaches using transformer architecture. Results demonstrate that a joint approach based on multilingual BERT (mBERT) achieves superior performance, with 97.83% intent accuracy and 91.03% F1 score for slot filling. This work advances NLU capabilities for Bangla and provides insights for developing robust models in other low-resource languages.</abstract>
       <url hash="bdecd15a">2025.chipsal-1.21</url>
diff --git a/data/xml/2025.chum.xml b/data/xml/2025.chum.xml
index 0c913b2399..caa78dfa90 100644
--- a/data/xml/2025.chum.xml
+++ b/data/xml/2025.chum.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2025-01-24" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 1st Workshop on Computational Humor (CHum)</booktitle>
-      <editor><first>Christian F.</first><last>Hempelmann</last></editor>
+      <editor id="christian-f-hempelmann"><first>Christian F.</first><last>Hempelmann</last></editor>
       <editor><first>Julia</first><last>Rayz</last></editor>
       <editor><first>Tiansi</first><last>Dong</last></editor>
       <editor><first>Tristan</first><last>Miller</last></editor>
@@ -60,7 +60,7 @@
       <author><first>Prashant</first><last>Kodali</last></author>
       <author><first>Ashna</first><last>Dua</last></author>
       <author><first>Kapil</first><last>Rajesh Kavitha</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>32–57</pages>
       <abstract>Puns, as a linguistic phenomenon, hold significant importance in both humor and language comprehension. While extensive research has been conducted in the realm of pun generation in English, there exists a notable gap in the exploration of pun generation within code-mixed text, particularly in Hindi-English code-mixed text. This study addresses this gap by offering a computational method specifically designed to create puns in Hindi-English code-mixed text. In our investigation, we delve into three distinct methodologies aimed at pun generation utilizing pun-alternate word pairs. Furthermore, this novel dataset, HECoP, comprising of 2000 human-annotated sentences serves as a foundational resource for training diverse pun detection models. Additionally, we developed a structured pun generation pipeline capable of generating puns from a single input word without relying on predefined word pairs. Through rigorous human evaluations, our study demonstrates the efficacy of our proposed models in generating code-mixed puns. The findings presented herein lay a solid groundwork for future endeavours in pun generation and computational humor within diverse linguistic contexts.</abstract>
       <url hash="11da949b">2025.chum-1.5</url>
diff --git a/data/xml/2025.cl.xml b/data/xml/2025.cl.xml
index 650b92e632..7b40560d7e 100644
--- a/data/xml/2025.cl.xml
+++ b/data/xml/2025.cl.xml
@@ -22,7 +22,7 @@
     </paper>
     <paper id="2">
       <title><fixed-case>MUC</fixed-case>king In, or Fifty Years in Information Extraction</title>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <doi>10.1162/coli_a_00547</doi>
       <abstract>I want to thank the ACL for this Lifetime Achievement Award. I am deeply honored to be receiving it. I would also like to thank the students, faculty, and researchers who were members of the Proteus Project during most of my professional lifetime. It was an honor to serve that group.</abstract>
       <pages>7–22</pages>
@@ -33,7 +33,7 @@
       <title>e<fixed-case>RST</fixed-case>: A Signaled Graph Theory of Discourse Relations and Organization</title>
       <author><first>Amir</first><last>Zeldes</last></author>
       <author><first>Tatsuya</first><last>Aoyama</last></author>
-      <author><first>Yang Janet</first><last>Liu</last></author>
+      <author id="yang-janet-liu"><first>Yang Janet</first><last>Liu</last></author>
       <author><first>Siyao</first><last>Peng</last></author>
       <author><first>Debopam</first><last>Das</last></author>
       <author><first>Luke</first><last>Gessler</last></author>
@@ -49,7 +49,7 @@
       <author><first>Arnisa</first><last>Fazla</last></author>
       <author><first>Chantal</first><last>Amrhein</last></author>
       <author><first>Tom</first><last>Kocmi</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <author><first>Alexandra</first><last>Birch</last></author>
       <author><first>Rico</first><last>Sennrich</last></author>
       <author><first>Liane</first><last>Guillou</last></author>
@@ -95,7 +95,7 @@
     <paper id="8">
       <title>A Survey on <fixed-case>LLM</fixed-case>-Generated Text Detection: Necessity, Methods, and Future Directions</title>
       <author><first>Junchao</first><last>Wu</last></author>
-      <author id="shu-yang"><first>Shu</first><last>Yang</last></author>
+      <author><first>Shu</first><last>Yang</last></author>
       <author><first>Runzhe</first><last>Zhan</last></author>
       <author><first>Yulin</first><last>Yuan</last></author>
       <author><first>Lidia Sam</first><last>Chao</last></author>
@@ -169,7 +169,7 @@
       <author><first>Wei</first><last>He</last></author>
       <author><first>Tiago Kramer</first><last>Vieira</last></author>
       <author><first>Marcos</first><last>Garcia</last></author>
-      <author><first>Carolina</first><last>Scarton</last></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last></author>
       <author><first>Marco</first><last>Idiart</last></author>
       <author><first>Aline</first><last>Villavicencio</last></author>
       <doi>10.1162/coli_a_00546</doi>
@@ -238,7 +238,7 @@
       <author><first>Diyi</first><last>Yang</last></author>
       <author><first>Dirk</first><last>Hovy</last></author>
       <author><first>David</first><last>Jurgens</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <doi>10.1162/coli_a_00556</doi>
       <abstract>Language technologies have advanced substantially, particularly with the introduction of large language models. However, these advancements can exacerbate several issues that models have traditionally faced, including bias, evaluation, and risk. In this perspective piece, we argue that many of these issues share a common core: a lack of awareness of the social factors, interactions, and implications of the social environment in which NLP operates. We call this social awareness. While NLP is improving at addressing linguistic issues, there has been relatively limited progress in incorporating social awareness into models to work in all situations for all users. Integrating social awareness into NLP will improve the naturalness, usefulness, and safety of applications while also opening up new applications. Today, we are only at the start of a new, important era in the field.</abstract>
       <pages>689–703</pages>
diff --git a/data/xml/2025.cl4health.xml b/data/xml/2025.cl4health.xml
index d5f0a927fe..ad920a5ba5 100644
--- a/data/xml/2025.cl4health.xml
+++ b/data/xml/2025.cl4health.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the Second Workshop on Patient-Oriented Language Processing (CL4Health)</booktitle>
       <editor><first>Sophia</first><last>Ananiadou</last></editor>
       <editor><first>Dina</first><last>Demner-Fushman</last></editor>
-      <editor><first>Deepak</first><last>Gupta</last></editor>
+      <editor id="deepak-gupta"><first>Deepak</first><last>Gupta</last></editor>
       <editor><first>Paul</first><last>Thompson</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Albuquerque, New Mexico</address>
@@ -24,7 +24,7 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>P</fixed-case>atient<fixed-case>D</fixed-case>x: Merging Large Language Models for Protecting Data-Privacy in Healthcare</title>
-      <author><first>Jose G.</first><last>Moreno</last><affiliation>Paul Sabatier University - IRIT</affiliation></author>
+      <author id="jose-g-moreno"><first>Jose G.</first><last>Moreno</last><affiliation>Paul Sabatier University - IRIT</affiliation></author>
       <author><first>Jesus</first><last>Lovon-Melgarejo</last><affiliation>IRIT</affiliation></author>
       <author><first>M’rick</first><last>Robin-Charlet</last><affiliation>Université Paul Sabatier</affiliation></author>
       <author><first>Christine</first><last>Damase-Michel</last><affiliation>Université Paul Sabatier</affiliation></author>
@@ -221,7 +221,7 @@
       <title>Leveraging External Knowledge Bases: Analyzing Presentation Methods and Their Impact on Model Performance</title>
       <author><first>Hui-Syuan</first><last>Yeh</last><affiliation>LISN/CNRS &amp; Université Paris Saclay</affiliation></author>
       <author><first>Thomas</first><last>Lavergne</last><affiliation>LISN/CNRS &amp; Université Paris Saclay</affiliation></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last><affiliation>LISN, CNRS, Université Paris-Saclay</affiliation></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last><affiliation>LISN, CNRS, Université Paris-Saclay</affiliation></author>
       <pages>193-204</pages>
       <abstract>Integrating external knowledge into large language models has demonstrated potential for performance improvement across a wide range of tasks. This approach is particularly appealing in domain-specific applications, such as in the biomedical field. However, the strategies for effectively presenting external knowledge to these models remain underexplored. This study investigates the impact of different knowledge presentation methods and their influence on model performance. Our results show that inserting knowledge between demonstrations helps the models perform better, and improve smaller LLMs (7B) to perform on par with larger LLMs (175B). Our further investigation indicates that the performance improvement, however, comes more from the effect of additional tokens and positioning than from the relevance of the knowledge.</abstract>
       <url hash="93c9bd12">2025.cl4health-1.16</url>
@@ -234,7 +234,7 @@
       <author><first>Nicolo</first><last>Micheletti</last><affiliation>University of Manchester</affiliation></author>
       <author><first>Lifeng</first><last>Han</last><affiliation>The University of Manchester</affiliation></author>
       <author><first>Warren</first><last>Del-Pinto</last><affiliation>University of Manchester</affiliation></author>
-      <author><first>Goran</first><last>Nenadic</last><affiliation>University of Manchester</affiliation></author>
+      <author id="goran-nenadic"><first>Goran</first><last>Nenadic</last><affiliation>University of Manchester</affiliation></author>
       <pages>205-218</pages>
       <url hash="c2ef74b8">2025.cl4health-1.17</url>
       <bibkey>belkadi-etal-2025-lt3</bibkey>
@@ -277,7 +277,7 @@
     <paper id="21">
       <title>Am <fixed-case>I</fixed-case> eligible? Natural Language Inference for Clinical Trial Patient Recruitment: the Patient’s Point of View</title>
       <author><first>Mathilde</first><last>Aguiar</last><affiliation>Université Paris-Saclay, CNRS, Laboratoire Interdisciplinaire des Sciences du Numérique, 91400, Orsay, France</affiliation></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last><affiliation>LISN, CNRS, Université Paris-Saclay</affiliation></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last><affiliation>LISN, CNRS, Université Paris-Saclay</affiliation></author>
       <author><first>Nona</first><last>Naderi</last><affiliation>UniversitÃ© Paris-Saclay</affiliation></author>
       <pages>243-259</pages>
       <abstract>Recruiting patients to participate in clinical trials can be challenging and time-consuming. Usually, participation in a clinical trial is initiated by a healthcare professional and proposed to the patient. Promoting clinical trials directly to patients via online recruitment might help to reach them more efficiently. In this study, we address the case where a patient is initiating their own recruitment process and wants to determine whether they are eligible for a given clinical trial, using their own language to describe their medical profile. To study whether this creates difficulties in the patient-trial matching process, we design a new dataset and task, Natural Language Inference for Patient Recruitment (NLI4PR), in which patient-language profiles must be matched to clinical trials. We create it by adapting the TREC 2022 Clinical Trial Track dataset, which provides patients’ medical profiles, and rephrasing them manually using patient language. We also use the associated clinical trial reports where the patients are either eligible or excluded. We prompt several open-source Large Language Models on our task and achieve from 56.5 to 71.8 of F1 score using patient language, against 64.7 to 73.1 for the same task using medical language. When using patient language, we observe only a small loss in performance for the best model, suggesting that having the patient as a starting point could be adopted to help recruit patients for clinical trials. The corpus and code bases are all freely available on our GitHub and HuggingFace repositories.</abstract>
@@ -339,7 +339,7 @@
       <title>Medication Extraction and Entity Linking using Stacked and Voted Ensembles on <fixed-case>LLM</fixed-case>s</title>
       <author><first>Pablo</first><last>Romero</last><affiliation>Manchester Metropolitan University</affiliation></author>
       <author><first>Lifeng</first><last>Han</last><affiliation>The University of Manchester</affiliation></author>
-      <author><first>Goran</first><last>Nenadic</last><affiliation>University of Manchester</affiliation></author>
+      <author id="goran-nenadic"><first>Goran</first><last>Nenadic</last><affiliation>University of Manchester</affiliation></author>
       <pages>303-315</pages>
       <url hash="b280b49c">2025.cl4health-1.26</url>
       <bibkey>romero-etal-2025-medication</bibkey>
@@ -394,7 +394,7 @@
       <author><first>Pablo</first><last>Romero</last><affiliation>Manchester Metropolitan University</affiliation></author>
       <author><first>Libo</first><last>Ren</last><affiliation>University of Manchester, UK</affiliation></author>
       <author><first>Lifeng</first><last>Han</last><affiliation>The University of Manchester</affiliation></author>
-      <author><first>Goran</first><last>Nenadic</last><affiliation>University of Manchester</affiliation></author>
+      <author id="goran-nenadic"><first>Goran</first><last>Nenadic</last><affiliation>University of Manchester</affiliation></author>
       <pages>340-348</pages>
       <url hash="10a5003d">2025.cl4health-1.30</url>
       <bibkey>romero-etal-2025-manchester</bibkey>
@@ -404,7 +404,7 @@
       <title><fixed-case>MNLP</fixed-case> at <fixed-case>P</fixed-case>er<fixed-case>A</fixed-case>ns<fixed-case>S</fixed-case>umm: A Classifier-Refiner Architecture for Improving the Classification of Consumer Health User Responses</title>
       <author><first>Jooyeon</first><last>Lee</last><affiliation>George Mason University</affiliation></author>
       <author><first>Luan</first><last>Pham</last><affiliation>George Mason University</affiliation></author>
-      <author><first>Özlem</first><last>Uzuner</last><affiliation>George Mason University</affiliation></author>
+      <author id="ozlem-uzuner"><first>Özlem</first><last>Uzuner</last><affiliation>George Mason University</affiliation></author>
       <pages>349-358</pages>
       <abstract>Community question-answering (CQA) platforms provide a crucial space for users to share experiences, seek medical advice, and exchange health-related information. However, these platforms, by nature of their user-generated content as well as the complexity and subjectivity of natural language, remain a significant challenge for tasks related to the automatic classification of diverse perspectives. The PerAnsSumm shared task involves extracting perspective spans from community users’ answers, classifying them into specific perspective categories (Task A), and then using these perspectives and spans to generate structured summaries (Task B). Our focus is on Task A. To address this challenge, we propose a Classifier-Refiner Architecture (CRA), a two-stage framework designed to enhance classification accuracy. The first stage employs a Classifier to segment user responses into self-contained snippets and assign initial perspective labels along with a binary confidence value. If the classifier is not confident, a secondary Refiner stage is triggered, incorporating retrieval-augmented generation to enhance classification through contextual examples. Our methodology integrates instruction-driven classification, tone definitions, and Chain-of-Thought (CoT) prompting, leading to improved F1 scores compared to single-pass approaches. Experimental evaluations on the Perspective Summarization Dataset (PUMA) demonstrate that our framework improves classification performance by leveraging multi-stage decision-making. Our submission ranked among the top-performing teams, achieving an overall score of 0.6090, with high precision and recall in perspective classification.</abstract>
       <url hash="75eb8d2b">2025.cl4health-1.31</url>
diff --git a/data/xml/2025.climatenlp.xml b/data/xml/2025.climatenlp.xml
index 7f87590b0e..1147de7000 100644
--- a/data/xml/2025.climatenlp.xml
+++ b/data/xml/2025.climatenlp.xml
@@ -34,7 +34,7 @@
       <title>Enhancing Retrieval for <fixed-case>ESGLLM</fixed-case> via <fixed-case>ESG</fixed-case>-<fixed-case>CID</fixed-case>: A Disclosure Content Index Finetuning Dataset for Mapping <fixed-case>GRI</fixed-case> and <fixed-case>ESRS</fixed-case></title>
       <author><first>Shafiuddin Rehan</first><last>Ahmed</last></author>
       <author><first>Ankit</first><last>Shah</last><affiliation>Accenture</affiliation></author>
-      <author><first>Quan Hung</first><last>Tran</last><affiliation>Facebook</affiliation></author>
+      <author id="quan-hung-tran"><first>Quan Hung</first><last>Tran</last><affiliation>Facebook</affiliation></author>
       <author><first>Vivek</first><last>Khetan</last><affiliation>Accenture Labs</affiliation></author>
       <author><first>Sukryool</first><last>Kang</last><affiliation>Accenture</affiliation></author>
       <author><first>Ankit</first><last>Mehta</last><affiliation>Accenture</affiliation></author>
@@ -235,7 +235,7 @@
       <author><first>David</first><last>Thulke</last><affiliation>RWTH Aachen University and AppTek</affiliation></author>
       <author><first>Jakob</first><last>Kemmler</last><affiliation>Rheinisch Westfälische Technische Hochschule Aachen</affiliation></author>
       <author><first>Christian</first><last>Dugast</last><affiliation>NA</affiliation></author>
-      <author><first>Hermann</first><last>Ney</last><affiliation>Rheinisch Westfälische Technische Hochschule Aachen</affiliation></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last><affiliation>Rheinisch Westfälische Technische Hochschule Aachen</affiliation></author>
       <pages>245-259</pages>
       <abstract>Large language models that use retrieval augmented generation have the potential to unlock valuable knowledge for researchers, policymakers, and the public by making long and technical climate-related documents more accessible. While this approach can help alleviate factual hallucinations by relying on retrieved passages as additional context, its effectiveness depends on whether the model’s output remains faithful to these passages. To address this, we explore the automatic assessment of faithfulness of different models in this setting. We then focus on ClimateGPT, a large language model specialised in climate science, to examine which factors in its instruction fine-tuning impact the model’s faithfulness. By excluding unfaithful subsets of the model’s training data, we develop ClimateGPT Faithful+, which achieves an improvement in faithfulness from 30% to 57% in supported atomic claims according to our automatic metric.</abstract>
       <url hash="7d4abfc5">2025.climatenlp-1.17</url>
diff --git a/data/xml/2025.clpsych.xml b/data/xml/2025.clpsych.xml
index 6bda102964..be56c76278 100644
--- a/data/xml/2025.clpsych.xml
+++ b/data/xml/2025.clpsych.xml
@@ -6,7 +6,7 @@
       <editor><first>Ayah</first><last>Zirikly</last></editor>
       <editor><first>Andrew</first><last>Yates</last></editor>
       <editor><first>Bart</first><last>Desmet</last></editor>
-      <editor><first>Molly</first><last>Ireland</last></editor>
+      <editor id="molly-ireland"><first>Molly</first><last>Ireland</last></editor>
       <editor><first>Steven</first><last>Bedrick</last></editor>
       <editor><first>Sean</first><last>MacAvaney</last></editor>
       <editor><first>Kfir</first><last>Bar</last></editor>
@@ -43,7 +43,7 @@
       <author><first>Simon</first><last>Ostermann</last><affiliation>German Research Center for Artificial Intelligence</affiliation></author>
       <author><first>Patrick</first><last>Gebhard</last><affiliation>German Research Center for Artificial Intelligence</affiliation></author>
       <author><first>Cord</first><last>Benecke</last><affiliation>Department of Psychology, University of Kassel, Kassel, Germany</affiliation></author>
-      <author><first>Josef</first><last>van Genabith</last><affiliation>German Research Center for Artificial Intelligence</affiliation></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last><affiliation>German Research Center for Artificial Intelligence</affiliation></author>
       <author><first>Philipp</first><last>Müller</last><affiliation>German Research Center for Artificial Intelligence</affiliation></author>
       <pages>12-25</pages>
       <abstract>Psychodynamic conflicts are persistent, often unconscious themes that shape a person’s behaviour and experiences. Accurate diagnosis of psychodynamic conflicts is crucial for effective patient treatment and is commonly done via long, manually scored semi-structured interviews. Existing automated solutions for psychiatric diagnosis tend to focus on the recognition of broad disorder categories such as depression, and it is unclear to what extent psychodynamic conflicts which even the patient themselves may not have conscious access to could be automatically recognised from conversation. In this paper, we propose AutoPsyC, the first method for recognising the presence and significance of psychodynamic conflicts from full-length Operationalized Psychodynamic Diagnostics (OPD) interviews using Large Language Models (LLMs). Our approach combines recent advances in parameter-efficient fine-tuning and Retrieval-Augmented Generation (RAG) with a summarisation strategy to effectively process entire 90 minute long conversations. In evaluations on a dataset of 141 diagnostic interviews we show that AutoPsyC consistently outperforms all baselines and ablation conditions on the recognition of four highly relevant psychodynamic conflicts.</abstract>
@@ -85,7 +85,7 @@
       <author><first>Syeda</first><last>Mahwish</last><affiliation>Stony Brook University</affiliation></author>
       <author><first>Camilo</first><last>Ruggero</last><affiliation>University of Texas at Dallas</affiliation></author>
       <author><first>Roman</first><last>Kotov</last><affiliation>Stony Brook University</affiliation></author>
-      <author><first>H. Andrew</first><last>Schwartz</last><affiliation>Stony Brook University</affiliation></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last><affiliation>Stony Brook University</affiliation></author>
       <pages>62-68</pages>
       <abstract>Recent work has suggested detection of cognitive distortions as an impactful task for NLP in the clinical space, but the connection between language-detected distortions and validated mental health outcomes has been elusive. In this work, we evaluate the co-occurrence of (a) 10 distortions derived from language-based detectors trained over two common distortion datasets with (b) 12 mental health outcomes contained within two new language-to-mental-health datasets: DS4UD and iHiTOP. We find higher rates of distortions for those with greater mental health condition severity (ranging from r = 0.16 for thought disorders to r = 0.46 for depressed mood), and that the specific distortions of should statements and fortune telling were associated with a depressed mood and being emotionally drained, respectively. This suggested that language-based assessments of cognitive distortion could play a significant role in detection and monitoring of mental health conditions.</abstract>
       <url hash="8399624d">2025.clpsych-1.5</url>
@@ -110,7 +110,7 @@
       <author><first>Ian</first><last>Apperly</last><affiliation>School of Psychology, University of Birmingham</affiliation></author>
       <author><first>Rory</first><last>Devine</last><affiliation>School of Psychology, University of Birmingham</affiliation></author>
       <author><first>Sanne</first><last>van der Kleij</last><affiliation>School of Psychology, University of Birmingham</affiliation></author>
-      <author><first>Mark</first><last>Lee</last><affiliation>School of Computer Science, University of Birmingham</affiliation></author>
+      <author id="mark-lee"><first>Mark</first><last>Lee</last><affiliation>School of Computer Science, University of Birmingham</affiliation></author>
       <pages>79-89</pages>
       <abstract>A rigorous psychometric approach is crucial for the accurate measurement of mind-reading abilities. Traditional scoring methods for such tests, which involve lengthy free-text responses, require considerable time and human effort. This study investigates the use of large language models (LLMs) to automate the scoring of psychometric tests. Data were collected from participants aged 13 to 30 years and scored by trained human coders to establish a benchmark. We evaluated multiple LLMs against human assessments, exploring various prompting strate- gies to optimize performance and fine-tuning the models using a subset of the collected data to enhance accuracy. Our results demonstrate that LLMs can assess advanced mind-reading abilities with over 90% accuracy on average. Notably, in most test items, the LLMs achieved higher Kappa agreement with the lead coder than two trained human coders, highlighting their potential to reliably score open-response psychometric tests.</abstract>
       <url hash="cdc13bb9">2025.clpsych-1.7</url>
@@ -121,10 +121,10 @@
       <title>Bigger But Not Better: Small Neural Language Models Outperform <fixed-case>LLM</fixed-case>s in Detection of Thought Disorder</title>
       <author id="changye-li-umn"><first>Changye</first><last>Li</last><affiliation>University of Washington</affiliation></author>
       <author><first>Weizhe</first><last>Xu</last><affiliation>University of Washington</affiliation></author>
-      <author><first>Serguei</first><last>Pakhomov</last><affiliation>University of Minnesota</affiliation></author>
+      <author id="serguei-pakhomov"><first>Serguei</first><last>Pakhomov</last><affiliation>University of Minnesota</affiliation></author>
       <author><first>Ellen</first><last>Bradley</last><affiliation>University of California, San Francisco</affiliation></author>
       <author><first>Dror</first><last>Ben-Zeev</last><affiliation>University of Washington</affiliation></author>
-      <author><first>Trevor</first><last>Cohen</last><affiliation>University of Washington</affiliation></author>
+      <author id="trevor-cohen"><first>Trevor</first><last>Cohen</last><affiliation>University of Washington</affiliation></author>
       <pages>90-105</pages>
       <abstract>Disorganized thinking is a key diagnostic indicator of schizophrenia-spectrum disorders. Recently, clinical estimates of the severity of disorganized thinking have been shown to correlate with measures of how difficult speech transcripts would be for large language models (LLMs) to predict. However, LLMs’ deployment challenges – including privacy concerns, computational and financial costs, and lack of transparency of training data – limit their clinical utility. We investigate whether smaller neural language models can serve as effective alternatives for detecting positive formal thought disorder, using the same sliding window based perplexity measurements that proved effective with larger models. Surprisingly, our results show that smaller models are more sensitive to linguistic differences associated with formal thought disorder than their larger counterparts. Detection capability declines beyond a certain model size and context length, challenging the common assumption of “bigger is better” for LLM-based applications. Our findings generalize across audio diaries and clinical interview speech samples from individuals with psychotic symptoms, suggesting a promising direction for developing efficient, cost-effective, and privacy-preserving screening tools that can be deployed in both clinical and naturalistic settings.</abstract>
       <url hash="bdfa9c75">2025.clpsych-1.8</url>
@@ -150,7 +150,7 @@
       <author><first>Krutika</first><last>Parvatikar</last><affiliation>Rochester Institute of Technology</affiliation></author>
       <author><first>Marcos</first><last>Zampieri</last><affiliation>George Mason University</affiliation></author>
       <author><first>Ashiqur</first><last>Khudabukhsh</last><affiliation>Rochester Institute of Technology</affiliation></author>
-      <author><first>Liviu</first><last>Dinu</last><affiliation>University of Bucharest</affiliation></author>
+      <author id="liviu-p-dinu"><first>Liviu</first><last>Dinu</last><affiliation>University of Bucharest</affiliation></author>
       <pages>116-126</pages>
       <abstract>Depression is the most common mental health disorder, and its prevalence increased during the COVID-19 pandemic. As one of the most extensively researched psychological conditions, recent research has increasingly focused on leveraging social media data to enhance traditional methods of depression screening. This paper addresses the growing interest in interdisciplinary research on depression, and aims to support early-career researchers by providing a comprehensive and up-to-date list of datasets for analyzing and predicting depression through social media data. We present an overview of datasets published between 2019 and 2024. We also make the comprehensive list of datasets available online as a continuously updated resource, with the hope that it will facilitate further interdisciplinary research into the linguistic expressions of depression on social media.</abstract>
       <url hash="68638efc">2025.clpsych-1.10</url>
@@ -303,7 +303,7 @@
       <title>Prompt Engineering for Capturing Dynamic Mental Health Self States from Social Media Posts</title>
       <author><first>Callum</first><last>Chan</last><affiliation>University of Ottawa</affiliation></author>
       <author><first>Sunveer</first><last>Khunkhun</last><affiliation>University of Ottawa</affiliation></author>
-      <author><first>Diana</first><last>Inkpen</last><affiliation>University of Ottawa</affiliation></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last><affiliation>University of Ottawa</affiliation></author>
       <author><first>Juan Antonio</first><last>Lossio-Ventura</last><affiliation>National Institutes of Health</affiliation></author>
       <pages>256-267</pages>
       <abstract>With the advent of modern Computational Linguistic techniques and the growing societal mental health crisis, we contribute to the field of Clinical Psychology by participating in the CLPsych 2025 shared task. This paper describes the methods and results obtained by the uOttawa team’s submission (which included a researcher from the National Institutes of Health in the USA, in addition to three researchers from the University of Ottawa, Canada). The task consists of four subtasks focused on modeling longitudinal changes in social media users’ mental states and generating accurate summaries of these dynamic self-states. Through prompt engineering of a modern large language model (Llama-3.3-70B-Instruct), the uOttawa team placed first, sixth, fifth, and second, respectively, for each subtask, amongst the other submissions. This work demonstrates the capacity of modern large language models to recognize nuances in the analysis of mental states and to generate summaries through carefully crafted prompting.</abstract>
@@ -359,8 +359,8 @@
       <author><first>August Håkan</first><last>Nilsson</last><affiliation>OsloMet</affiliation></author>
       <author><first>Syeda</first><last>Mahwish</last><affiliation>Stony Brook University</affiliation></author>
       <author><first>Vasudha</first><last>Varadarajan</last><affiliation>Stony Brook University</affiliation></author>
-      <author><first>H. Andrew</first><last>Schwartz</last><affiliation>Stony Brook University</affiliation></author>
-      <author><first>Ryan L.</first><last>Boyd</last><affiliation>University of Texas at Dallas</affiliation></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last><affiliation>Stony Brook University</affiliation></author>
+      <author id="ryan-boyd"><first>Ryan L.</first><last>Boyd</last><affiliation>University of Texas at Dallas</affiliation></author>
       <pages>300-313</pages>
       <abstract>Mental health is not a fixed trait but a dynamic process shaped by the interplay between individual dispositions and situational contexts. Building on interactionist and constructionist psychological theories, we develop interpretable models to predict well-being and identify adaptive and maladaptive self-states in longitudinal social media data. Our approach integrates person-level psychological traits (e.g., resilience, cognitive distortions, implicit motives) with language-inferred situational features derived from the Situational 8 DIAMONDS framework. We compare these theory-grounded features to embeddings from a psychometrically-informed language model that captures temporal and individual-specific patterns. Results show that our principled, theory-driven features provide competitive performance while offering greater interpretability. Qualitative analyses further highlight the psychological coherence of features most predictive of well-being. These findings underscore the value of integrating computational modeling with psychological theory to assess dynamic mental states in contextually sensitive and human-understandable ways.</abstract>
       <url hash="e80fb674">2025.clpsych-1.27</url>
diff --git a/data/xml/2025.clrel.xml b/data/xml/2025.clrel.xml
index 7d9ffcdaf2..d34dfd5851 100644
--- a/data/xml/2025.clrel.xml
+++ b/data/xml/2025.clrel.xml
@@ -6,7 +6,7 @@
       <editor><first>Sane</first><last>Yagi</last></editor>
       <editor><first>Sane</first><last>Yagi</last></editor>
       <editor><first>Majdi</first><last>Sawalha</last></editor>
-      <editor><first>Bayan Abu</first><last>Shawar</last></editor>
+      <editor id="bayan-abu-shawar"><first>Bayan Abu</first><last>Shawar</last></editor>
       <editor><first>Abdallah T.</first><last>AlShdaifat</last></editor>
       <editor><first>Norhan</first><last>Abbas</last></editor>
       <editor><first/><last>Organizers</last></editor>
diff --git a/data/xml/2025.cltw.xml b/data/xml/2025.cltw.xml
index 7b1dbd4fc0..7590697b80 100644
--- a/data/xml/2025.cltw.xml
+++ b/data/xml/2025.cltw.xml
@@ -22,7 +22,7 @@
     <paper id="1">
       <title>An Assessment of Word Separation Practices in <fixed-case>O</fixed-case>ld <fixed-case>I</fixed-case>rish Text Resources and a Universal Method for Tokenising <fixed-case>O</fixed-case>ld <fixed-case>I</fixed-case>rish Text</title>
       <author><first>Adrian</first><last>Doyle</last></author>
-      <author><first>John P.</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></author>
       <pages>1–11</pages>
       <abstract>The quantity of Old Irish text which survives in contemporary manuscripts is relatively small by comparison to what is available for well-resourced modern languages. Moreover, as it is a historical language, no more text will ever be generated by native speakers of Old Irish. This makes the text which has survived particularly valuable, and ideally, all of it would be annotated using a single, common annotation standard, thereby ensuring compatibility between text resources. At present, Old Irish text repositories separate words or sub-word morphemes in accordance with different methodologies, and each uses a different style of lexical annotation. This makes it difficult to utilise content from more than any one repository in NLP applications. This paper provides an assessment of distinctions between existing annotated corpora, showing that the primary point of divergence is at the token level. For this reason, this paper also describes a new method for tokenising Old Irish text. This method can be applied even to diplomatic editions, and has already been utilised in various text resources.</abstract>
       <url hash="e0606e3a">2025.cltw-1.1</url>
@@ -33,7 +33,7 @@
       <author><first>William</first><last>Lamb</last></author>
       <author><first>Dongge</first><last>Han</last></author>
       <author><first>Ondrej</first><last>Klejch</last></author>
-      <author><first>Beatrice</first><last>Alex</last></author>
+      <author id="beatrice-alex"><first>Beatrice</first><last>Alex</last></author>
       <author><first>Peter</first><last>Bell</last></author>
       <pages>12–26</pages>
       <abstract>Advances in large language modelling have disproportionately benefited high-resource languages due to their vastly greater training data reserves. This paper proposes a novel cross-lingual text expansion (XLTE) technique using multilingual large language models (MLLMs) to mitigate data sparsity in low-resource languages. We apply XLTE to the domain of traditional Scottish Gaelic storytelling to generate a training corpus suitable for language modelling, for example as part of an automatic speech recognition system. The effectiveness of this technique is demonstrated using OpenAI’s GPT-4o, with supervised fine-tuning (SFT) providing decreased neologism rates and a 57.2% reduction in perplexity over the baseline model. Despite these promising results, qualitative analyses reveal important stylistic divergences between synthesised and genuine data. Nevertheless, XLTE offers a promising, scalable method for synthesising training sets in other languages and domains, opening avenues for further improvements in low-resource language modelling.</abstract>
@@ -44,7 +44,7 @@
       <title>A Pragmatic Approach to Using Artificial Intelligence and Virtual Reality in Digital Game-Based Language Learning</title>
       <author><first>Monica</first><last>Ward</last></author>
       <author><first>Liang</first><last>Xu</last></author>
-      <author><first>Elaine</first><last>Uí Dhonnchadha</last></author>
+      <author id="elaine-ui-dhonnchadha"><first>Elaine</first><last>Uí Dhonnchadha</last></author>
       <pages>27–34</pages>
       <abstract>Computer-Assisted Language Learning (CALL) applications have many benefits for language learning. However, they can be difficult to develop for low-resource languages such as Irish and the other Celtic languages. It can be difficult to assemble the multidisciplinary team needed to develop CALL resources and there are fewer language resources available for the language. This paper provides an overview of a pragmatic approach to using Artificial Intelligence (AI) and Virtual Reality (VR) in developing a Digital Game-Based Language Learning (DGBLL) app for Irish. This pragmatic approach was used to develop Cipher - a DGBLL app for Irish (Xu et al, 2022b) where a number of existing resources including text repositories and NLP tools were used. In this paper the focus is on the incorporation of Artificial Intelligence (AI) technologies including AI image generation, text-to-speech (TTS) and Virtual Reality (VR), in a pedagogically informed manner to support language learning in a way that is both challenging and enjoyable. Cipher has been designed to be language independent and can be adapted for various cohorts of learners and for other languages. Cipher has been played and tested in a number of schools in Dublin and the feedback from teachers and students has been very positive. This paper outlines how AI and VR technologies have been utilised in Cipher and how it could be adapted to other Celtic languages and low-resource languages in general.</abstract>
       <url hash="792bb125">2025.cltw-1.3</url>
diff --git a/data/xml/2025.cmcl.xml b/data/xml/2025.cmcl.xml
index a4f3bea2a8..90bd083177 100644
--- a/data/xml/2025.cmcl.xml
+++ b/data/xml/2025.cmcl.xml
@@ -68,7 +68,7 @@
       <title>Profiling neural grammar induction on morphemically tokenised child-directed speech</title>
       <author><first>Mila</first><last>Marcheva</last></author>
       <author><first>Theresa</first><last>Biberauer</last><affiliation>University of the Western Cape, University of Stellenbosch and University of Cambridge</affiliation></author>
-      <author><first>Weiwei</first><last>Sun</last><affiliation>University of Cambridge</affiliation></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last><affiliation>University of Cambridge</affiliation></author>
       <pages>47-54</pages>
       <abstract>We investigate the performance of state-of-the-art (SotA) neural grammar induction (GI) models on a morphemically tokenised English dataset based on the CHILDES treebank (Pearl and Sprouse, 2013). Using implementations from Yang et al. (2021a), we train models and evaluate them with the standard F1 score. We introduce novel evaluation metrics—depth-of-morpheme and sibling-of-morpheme—which measure phenomena around bound morpheme attachment. Our results reveal that models with the highest F1 scores do not necessarily induce linguistically plausible structures for bound morpheme attachment, highlighting a key challenge for cognitively plausible GI.</abstract>
       <url hash="df2c343b">2025.cmcl-1.7</url>
@@ -91,7 +91,7 @@
     <paper id="11">
       <title>Unzipping the Causality of <fixed-case>Z</fixed-case>ipf’s Law and Other Lexical Trade-offs</title>
       <author><first>Amanda</first><last>Doucette</last><affiliation>McGill University, McGill University</affiliation></author>
-      <author><first>Timothy J.</first><last>O’Donnell</last><affiliation>McGill University, Mila and McGill University</affiliation></author>
+      <author id="timothy-odonnell"><first>Timothy J.</first><last>O’Donnell</last><affiliation>McGill University, Mila and McGill University</affiliation></author>
       <author><first>Morgan</first><last>Sonderegger</last><affiliation>McGill University</affiliation></author>
       <pages>66-76</pages>
       <abstract>There are strong constraints on the structure of a possible lexicon. For example, the negative correlation between word frequency and length known as Zipf’s law, and a negative correlation between word length and phonotactic complexity appear to hold across languages. While lexical trade-offs like these have been examined individually, it is unclear how they interact as a system. In this paper, we propose causal discovery as a method for identifying lexical biases and their interactions in a set of variables. We represent the lexicon as a causal model, and apply the Fast Causal Discovery algorithm (Spirtes et al., 1995) to identify both causal relationships between measured variables and the existence of possible unmeasured confounding variables. We apply this method to lexical data including measures of word length, frequency, phonotactic complexity, and morphological irregularity for 25 languages and find evidence of universal associations involving word length with a high likelihood of involving an unmeasured confounder, suggesting that additional variables need to be measured to determine how they are related. We also find evidence of variation across languages in relationships between the remaining variables, and suggest that given a larger dataset, causal discovery algorithms can be a useful tool in assessing the universality of lexical biases.</abstract>
@@ -116,7 +116,7 @@
       <title>“Is There Anything Else?”: Examining Administrator Influence on Linguistic Features from the Cookie Theft Picture Description Cognitive Test</title>
       <author id="changye-li-umn"><first>Changye</first><last>Li</last><affiliation>University of Washington</affiliation></author>
       <author><first>Zhecheng</first><last>Sheng</last></author>
-      <author><first>Trevor</first><last>Cohen</last><affiliation>University of Washington</affiliation></author>
+      <author id="trevor-cohen"><first>Trevor</first><last>Cohen</last><affiliation>University of Washington</affiliation></author>
       <author><first>Serguei V. S.</first><last>Pakhomov</last><affiliation>University of Minnesota - Twin Cities</affiliation></author>
       <pages>91-103</pages>
       <abstract>Alzheimer’s Disease (AD) dementia is a progressive neurodegenerative disease that negatively impacts patients’ cognitive ability. Previous studies have demonstrated that changes in naturalistic language samples can be useful for early screening of AD dementia. However, the nature of language deficits often requires test administrators to use various speech elicitation techniques during spontaneous language assessments to obtain enough propositional utterances from dementia patients. This could lead to the “observer’s effect” on the downstream analysis that has not been fully investigated. Our study seeks to quantify the influence of test administrators on linguistic features in dementia assessment with two English corpora the “Cookie Theft” picture description datasets collected at different locations and test administrators show different levels of administrator involvement. Our results show that the level of test administrator involvement significantly impacts observed linguistic features in patient speech. These results suggest that many of significant linguistic features in the downstream classification task may be partially attributable to differences in the test administration practices rather than solely to participants’ cognitive status. The variations in test administrator behavior can lead to systematic biases in linguistic data, potentially confounding research outcomes and clinical assessments. Our study suggests that there is a need for a more standardized test administration protocol in the development of responsible clinical speech analytics frameworks.</abstract>
@@ -187,7 +187,7 @@
     </paper>
     <paper id="23">
       <title>Beyond Binary <fixed-case>A</fixed-case>nimacy: A Multi-Method Investigation of <fixed-case>LM</fixed-case>s’ Sensitivity in <fixed-case>E</fixed-case>nglish Object Relative Clauses</title>
-      <author id="yue-li"><first>Yue</first><last>Li</last></author>
+      <author><first>Yue</first><last>Li</last></author>
       <author><first>Yan</first><last>Cong</last><affiliation>Purdue University</affiliation></author>
       <author><first>Elaine J.</first><last>Francis</last><affiliation>Purdue University</affiliation></author>
       <pages>184-196</pages>
@@ -199,7 +199,7 @@
     <paper id="24">
       <title>An Empirical Study of Language Syllabification using Syllabary and Lexical Networks</title>
       <author><first>Rusali</first><last>Saha</last></author>
-      <author><first>Yannick</first><last>Marchand</last><affiliation>Dalhousie University</affiliation></author>
+      <author id="yannick-marchand"><first>Yannick</first><last>Marchand</last><affiliation>Dalhousie University</affiliation></author>
       <pages>197-206</pages>
       <abstract>Language syllabification is the separation of a word into written or spoken syllables. The study of syllabification plays a pivotal role in morphology and there have been previous attempts to study this phenomenon using graphs or networks. Previous approaches have claimed through visual estimation that the degree distribution of language networks follows the Power Law distribution, however, there have not been any empirically grounded metrics to determine the same. In our study, we implement two kinds of language networks, namely, syllabary and lexical networks, and investigate the syllabification of four European languages: English, French, German and Spanish using network analysis and examine their small-world, random and scale-free nature. We additionally empirically prove that contrary to claims in previous works, although the degree distribution of these networks appear to follow a power law distribution, they are actually more in agreement with a log-normal distribution, when a numerically grounded curve-fitting is applied. Finally, we explore how syllabary and lexical networks for the English language change over time using a database of age-of-acquisition rating words. Our analysis further shows that the preferential attachment mechanism appears to be a well-grounded explanation for the degree distribution of the syllabary network.</abstract>
       <url hash="cd5ebcf3">2025.cmcl-1.24</url>
diff --git a/data/xml/2025.coling.xml b/data/xml/2025.coling.xml
index 41069c3376..88190f1b15 100644
--- a/data/xml/2025.coling.xml
+++ b/data/xml/2025.coling.xml
@@ -3,7 +3,7 @@
   <volume id="main" ingest-date="2025-01-20" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 31st International Conference on Computational Linguistics</booktitle>
-      <editor><first>Owen</first><last>Rambow</last></editor>
+      <editor id="owen-rambow"><first>Owen</first><last>Rambow</last></editor>
       <editor><first>Leo</first><last>Wanner</last></editor>
       <editor><first>Marianna</first><last>Apidianaki</last></editor>
       <editor><first>Hend</first><last>Al-Khalifa</last></editor>
@@ -166,7 +166,7 @@
       <author><first>Tao</first><last>Ji</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
       <author><first>Tao</first><last>Gui</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>156–187</pages>
       <abstract>Existing evaluations of tool learning primarily focus on validating the alignment of selected tools for large language models (LLMs) with expected outcomes. However, these approaches rely on a limited set of scenarios where answers can be pre-determined. Furthermore, a sole emphasis on outcomes disregards the complex capabilities required for LLMs to effectively use tools. To tackle this issue, we propose ToolEyes, a fine-grained system tailored for the evaluation of the LLMs’ tool learning capabilities in authentic scenarios. The system meticulously examines seven real-world scenarios, analyzing five dimensions crucial to LLMs in tool learning: format alignment, intent comprehension, behavior planning, tool selection, and answer organization. Additionally, ToolEyes incorporates a tool library boasting approximately 600 tools, serving as an intermediary between LLMs and the physical world. Evaluations involving ten LLMs across three categories reveal a preference for specific scenarios and limited cognitive abilities in tool learning. Intriguingly, expanding the model size even exacerbates the hindrance to tool learning. The code and data are available at https://github.com/Junjie-Ye/ToolEyes.</abstract>
       <url hash="28f8c590">2025.coling-main.12</url>
@@ -196,7 +196,7 @@
       <title>Looks can be Deceptive: Distinguishing Repetition Disfluency from Reduplication</title>
       <author><first>Arif A.</first><last>Ahmad</last></author>
       <author><first>Khyathi Gayathri</first><last>Mothika</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>214–229</pages>
       <abstract>Reduplication and repetition, though similar in form, serve distinct linguistic purposes. Reduplication is a deliberate morphological process used to express grammatical, semantic, or pragmatic nuances, while repetition is often unintentional and indicative of disfluency. This paper presents the first large-scale study of reduplication and repetition in speech using computational linguistics. We introduce IndicRedRep, a new publicly available dataset containing Hindi, Telugu, and Marathi text annotated with reduplication and repetition at the word level. We evaluate transformer-based models for multi-class reduplication and repetition token classification, utilizing the Reparandum-Interregnum-Repair structure to distinguish between the two phenomena. Our models achieve macro F1 scores of up to 85.62% in Hindi, 83.95% in Telugu, and 84.82% in Marathi for reduplication-repetition classification.</abstract>
       <url hash="d87b692c">2025.coling-main.15</url>
@@ -234,7 +234,7 @@
       <title>Dynamic Graph Neural <fixed-case>ODE</fixed-case> Network for Multi-modal Emotion Recognition in Conversation</title>
       <author><first>Yuntao</first><last>Shou</last></author>
       <author><first>Tao</first><last>Meng</last></author>
-      <author id="wei-ai"><first>Wei</first><last>Ai</last></author>
+      <author><first>Wei</first><last>Ai</last></author>
       <author><first>Keqin</first><last>Li</last></author>
       <pages>256–268</pages>
       <abstract>Multimodal emotion recognition in conversation (MERC) refers to identifying and classifying human emotional states by combining data from multiple different modalities (e.g., audio, images, text, video, etc.). Specifically, human emotional expressions are often complex and diverse, and these complex emotional expressions can be captured and understood more comprehensively through the fusion of multimodal information. Most existing graph-based multimodal emotion recognition methods can only use shallow GCNs to extract emotion features and fail to capture the temporal dependencies caused by dynamic changes in emotions. To address the above problems, we propose a Dynamic Graph Neural Ordinary Differential Equation Network (DGODE) for multimodal emotion recognition in conversation, which combines the dynamic changes of emotions to capture the temporal dependency of speakers’ emotions. Technically, the key idea of DGODE is to use the graph ODE evolution network to characterize the continuous dynamics of node representations over time and capture temporal dependencies. Extensive experiments on two publicly available multimodal emotion recognition datasets demonstrate that the proposed DGODE model has superior performance compared to various baselines. Furthermore, the proposed DGODE can also alleviate the over-smoothing problem, thereby enabling the construction of a deep GCN network.</abstract>
@@ -263,7 +263,7 @@
       <author><first>Yi</first><last>Fung</last></author>
       <author><first>Hou Pong</first><last>Chan</last></author>
       <author><first>Kevin</first><last>Small</last></author>
-      <author><first>ChengXiang</first><last>Zhai</last></author>
+      <author id="chengxiang-zhai"><first>ChengXiang</first><last>Zhai</last></author>
       <author><first>Heng</first><last>Ji</last></author>
       <pages>281–296</pages>
       <abstract>The increasing demand for personalized interactions with large language models (LLMs) calls for methodologies capable of accurately and efficiently identifying user opinions and preferences. Retrieval augmentation emerges as an effective strategy, as it can accommodate a vast number of users without the costs from fine-tuning. Existing research, however, has largely focused on enhancing the retrieval stage and devoted limited exploration toward optimizing the representation of the database, a crucial aspect for tasks such as personalization. In this work, we examine the problem from a novel angle, focusing on how data can be better represented for more data-efficient retrieval in the context of LLM customization. To tackle this challenge, we introduce Persona-DB, a simple yet effective framework consisting of a hierarchical construction process to improve generalization across task contexts and collaborative refinement to effectively bridge knowledge gaps among users. In the evaluation of response prediction, Persona-DB demonstrates superior context efficiency in maintaining accuracy with a significantly reduced retrieval size, a critical advantage in scenarios with extensive histories or limited context windows. Our experiments also indicate a marked improvement of over 10% under cold-start scenarios, when users have extremely sparse data. Furthermore, our analysis reveals the increasing importance of collaborative knowledge as the retrieval capacity expands.</abstract>
@@ -293,7 +293,7 @@
     </paper>
     <paper id="23">
       <title>Semantic Role Labeling of <fixed-case>N</fixed-case>om<fixed-case>B</fixed-case>ank Partitives</title>
-      <author><first>Adam</first><last>Meyers</last></author>
+      <author id="adam-meyers"><first>Adam</first><last>Meyers</last></author>
       <author><first>Advait Pravin</first><last>Savant</last></author>
       <author><first>John E.</first><last>Ortega</last></author>
       <pages>324–336</pages>
@@ -351,7 +351,7 @@
     <paper id="28">
       <title><fixed-case>ELITR</fixed-case>-Bench: A Meeting Assistant Benchmark for Long-Context Language Models</title>
       <author><first>Thibaut</first><last>Thonet</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <author><first>Jos</first><last>Rozen</last></author>
       <pages>407–428</pages>
       <abstract>Research on Large Language Models (LLMs) has recently witnessed an increasing interest in extending the models’ context size to better capture dependencies within long documents. While benchmarks have been proposed to assess long-range abilities, existing efforts primarily considered generic tasks that are not necessarily aligned with real-world applications. In contrast, we propose a new benchmark for long-context LLMs focused on a practical meeting assistant scenario in which the long contexts consist of transcripts obtained by automatic speech recognition, presenting unique challenges for LLMs due to the inherent noisiness and oral nature of such data. Our benchmark, ELITR-Bench, augments the existing ELITR corpus by adding 271 manually crafted questions with their ground-truth answers, as well as noisy versions of meeting transcripts altered to target different Word Error Rate levels. Our experiments with 12 long-context LLMs on ELITR-Bench confirm the progress made across successive generations of both proprietary and open models, and point out their discrepancies in terms of robustness to transcript noise. We also provide a thorough analysis of our GPT-4-based evaluation, including insights from a crowdsourcing study. Our findings indicate that while GPT-4’s scores align with human judges, its ability to distinguish beyond three score levels may be limited.</abstract>
@@ -398,7 +398,7 @@
       <author><first>Yihong</first><last>Liu</last></author>
       <author><first>Chunlan</first><last>Ma</last></author>
       <author><first>Haotian</first><last>Ye</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>469–495</pages>
       <abstract>Transliterating related languages that use different scripts into a common script is effective for improving crosslingual transfer in downstream tasks. However, this methodology often makes pretraining a model from scratch unavoidable, as transliteration brings about new subwords not covered in existing multilingual pretrained language models (mPLMs). This is undesirable because it requires a large computation budget. A more promising way is to make full use of available mPLMs. To this end, this paper proposes a simple but effective framework: Transliterate-Merge-Initialize (TransMI). TransMI can create strong baselines for data that is transliterated into a common script by exploiting an existing mPLM and its tokenizer without any training. TransMI has three stages: (a) transliterate the vocabulary of an mPLM into a common script; (b) merge the new vocabulary with the original vocabulary; and (c) initialize the embeddings of the new subwords. We apply TransMI to three strong recent mPLMs. Our experiments demonstrate that TransMI not only preserves the mPLM’s ability to handle non-transliterated data, but also enables it to effectively process transliterated data, thereby facilitating crosslingual transfer across scripts. The results show consistent improvements of 3% to 34% for different mPLMs and tasks. We make our code and models publicly available at <url>https://github.com/cisnlp/TransMI</url>.</abstract>
       <url hash="284ada06">2025.coling-main.32</url>
@@ -408,7 +408,7 @@
       <title>Two-stage Incomplete Utterance Rewriting on Editing Operation</title>
       <author><first>Zhiyu</first><last>Cao</last></author>
       <author><first>Peifeng</first><last>Li</last></author>
-      <author><first>Qiaoming</first><last>Zhu</last></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last></author>
       <author><first>Yaxin</first><last>Fan</last></author>
       <pages>496–507</pages>
       <abstract>Previous work on Incomplete Utterance Rewriting (IUR) has primarily focused on generating rewritten utterances based solely on dialogue context, ignoring the widespread phenomenon of coreference and ellipsis in dialogues. To address this issue, we propose a novel framework called TEO (Two-stage approach on Editing Operation) for IUR, in which the first stage generates editing operations and the second stage rewrites incomplete utterances utilizing the generated editing operations and the dialogue context. Furthermore, an adversarial perturbation strategy is proposed to mitigate cascading errors and exposure bias caused by the inconsistency between training and inference in the second stage. Experimental results on three IUR datasets show that our TEO outperforms the SOTA models significantly.</abstract>
@@ -483,7 +483,7 @@
     <paper id="38">
       <title>Knowledge Graph Entity Typing with Curriculum Contrastive Learning</title>
       <author><first>Hao</first><last>Wang</last></author>
-      <author><first>Minghua</first><last>Nuo</last></author>
+      <author id="minghua-nuo"><first>Minghua</first><last>Nuo</last></author>
       <author><first>Shan</first><last>Jiang</last></author>
       <pages>574–583</pages>
       <abstract>The Knowledge Graph Entity Typing (KGET) task aims to predict missing type annotations for entities in knowledge graphs. Most recent studies only focus on the structural information from an entity’s neighborhood or semantic information from textual representations of entities or relations. In this paper, inspired by curriculum learning and contrastive learning, we propose the CCLET model using the Curriculum Contrastive Learning strategy for KGET, which uses the Pre-trained Language Model (PLM) and the graph model to fuse the entity related semantic and the structural information of the Knowledge Graph (KG) respectively. Our CCLET model consists of two main parts. In the Knowledge Fusion part, we design an Enhanced-MLP architecture to fuse the text of the entity’s description, related triplet, and tuples; In the Curriculum Contrastive Learning part, we define the difficulty of the course by controlling the level of added noise, we aim to accurately learn with curriculum contrastive learning strategy from easy to difficult. Our extensive experiments demonstrate that the CCLET model outperforms recent state-of-the-art models, verifying its effectiveness in the KGET task.</abstract>
@@ -519,7 +519,7 @@
       <author><first>Jian</first><last>Yang</last></author>
       <author><first>Xiang</first><last>Li</last></author>
       <author><first>Weixiao</first><last>Zhou</last></author>
-      <author id="fei-liu"><first>Fei</first><last>Liu</last></author>
+      <author><first>Fei</first><last>Liu</last></author>
       <author><first>Kui</first><last>Wu</last></author>
       <author><first>Xiangyuan</first><last>Guan</last></author>
       <author><first>Tao</first><last>Sun</last></author>
@@ -603,7 +603,7 @@
       <author><first>Zhaoguang</first><last>Long</last></author>
       <author><first>Jie</first><last>Zhou</last></author>
       <author><first>Aimin</first><last>Zhou</last></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <author><first>Yang</first><last>Chong</last></author>
       <pages>710–725</pages>
       <abstract>Large Language Models (LLMs) have demonstrated impressive capabilities across a wide range of tasks. However, their proficiency and reliability in the specialized domain of financial data analysis, particularly focusing on data-driven thinking, remain uncertain. To bridge this gap, we introduce FinDABench, a comprehensive benchmark designed to evaluate the financial data analysis capabilities of LLMs within this context. The benchmark comprises 15,200 training instances and 8,900 test instances, all meticulously crafted by human experts. FinDABench assesses LLMs across three dimensions: 1) Core Ability, evaluating the models’ ability to perform financial indicator calculation and corporate sentiment risk assessment; 2) Analytical Ability, determining the models’ ability to quickly comprehend textual information and analyze abnormal financial reports; and 3) Technical Ability, examining the models’ use of technical knowledge to address real-world data analysis challenges involving analysis generation and charts visualization from multiple perspectives. We will release FinDABench, and the evaluation scripts at https://github.com/xxx. FinDABench aims to provide a measure for in-depth analysis of LLM abilities and foster the advancement of LLMs in the field of financial data analysis.</abstract>
@@ -740,7 +740,7 @@
       <author><first>Wenlin</first><last>Zhang</last></author>
       <author><first>Chuhan</first><last>Wu</last></author>
       <author><first>Xiangyang</first><last>Li</last></author>
-      <author id="yuhao-wang"><first>Yuhao</first><last>Wang</last></author>
+      <author><first>Yuhao</first><last>Wang</last></author>
       <author><first>Kuicai</first><last>Dong</last></author>
       <author><first>Yichao</first><last>Wang</last></author>
       <author><first>Xinyi</first><last>Dai</last></author>
@@ -820,7 +820,7 @@
       <author><first>Yuanxiang</first><last>Huangfu</last></author>
       <author><first>Peifeng</first><last>Li</last></author>
       <author><first>Yaxin</first><last>Fan</last></author>
-      <author><first>Qiaoming</first><last>Zhu</last></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last></author>
       <pages>989–999</pages>
       <abstract>Previous work on empathetic response generation mainly focused on utilizing the speaker’s emotions to generate responses. However, the performance of identifying fine-grained emotions is limited, introducing cascading errors to empathetic response generation. Moreover, due to the conflict between the information in the dialogue history and the recognized emotions, previous work often generated general and uninformative responses. To address the above issues, we propose a novel framework NEC (Non-Emotion-Centric empathetic dialogue generation) based on contrastive learning and context-sensitive entity and social commonsense, in which the frequent replies and sentences with incorrect emotions are punished through contrastive learning, thereby improving the empathy, diversity and information of the responses. The experimental results demonstrate that our NEC enhances the quality of empathetic generation and generates more diverse responses in comparison with the state-of-the-art baselines.The code will be available at https://github.com/huangfu170/NEC-empchat</abstract>
       <url hash="2195c112">2025.coling-main.66</url>
@@ -861,7 +861,7 @@
       <author><first>Terry</first><last>Lima Ruas</last></author>
       <author><first>Mohamed</first><last>Abdalla</last></author>
       <author><first>Bela</first><last>Gipp</last></author>
-      <author><first>Saif M.</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif M.</first><last>Mohammad</last></author>
       <pages>1027–1044</pages>
       <abstract>This study examines the tendency to cite older work across 20 fields of study over 43 years (1980–2023). We put NLP’s propensity to cite older work in the context of these 20 other fields to analyze whether NLP shows similar temporal citation patterns to them over time or whether differences can be observed. Our analysis, based on a dataset of ~240 million papers, reveals a broader scientific trend: many fields have markedly declined in citing older works (e.g., psychology, computer science). The trend is strongest in NLP and ML research (-12.8% and -5.5% in citation age from previous peaks). Our results suggest that citing more recent works is not directly driven by the growth in publication rates (-3.4% across fields; -5.2% in humanities; -5.5% in formal sciences) — even when controlling for an increase in the volume of papers. Our findings raise questions about the scientific community’s engagement with past literature, particularly for NLP, and the potential consequences of neglecting older but relevant research. The data and a demo showcasing our results are publicly available.</abstract>
       <url hash="ef00477c">2025.coling-main.69</url>
@@ -933,7 +933,7 @@
       <author><first>Marianna</first><last>Apidianaki</last></author>
       <author><first>Ajay</first><last>Patel</last></author>
       <author><first>Smaranda</first><last>Muresan</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>1124–1135</pages>
       <abstract>Recent state-of-the-art authorship attribution methods learn authorship representations of text in a latent, uninterpretable space, which hinders their usability in real-world applications. We propose a novel approach for interpreting learned embeddings by identifying representative points in the latent space and leveraging large language models to generate informative natural language descriptions of the writing style associated with each point. We evaluate the alignment between our interpretable and latent spaces and demonstrate superior prediction agreement over baseline methods. Additionally, we conduct a human evaluation to assess the quality of these style descriptions and validate their utility in explaining the latent space. Finally, we show that human performance on the challenging authorship attribution task improves by +20% on average when aided with explanations from our method.</abstract>
       <url hash="6a56bb6a">2025.coling-main.75</url>
@@ -1051,7 +1051,7 @@
       <author><first>Shaohuan</first><last>Cheng</last></author>
       <author><first>Dingyi</first><last>Zeng</last></author>
       <author><first>Li</first><last>Zhou</last></author>
-      <author id="chen-zhang"><first>Chen</first><last>Zhang</last></author>
+      <author><first>Chen</first><last>Zhang</last></author>
       <author><first>Malu</first><last>Zhang</last></author>
       <author><first>Wenyu</first><last>Chen</last></author>
       <pages>1278–1293</pages>
@@ -1112,7 +1112,7 @@
       <author><first>Ali</first><last>Al-Laith</last></author>
       <author><first>Alexander</first><last>Conroy</last></author>
       <author><first>Jens</first><last>Bjerring-Hansen</last></author>
-      <author><first>Bolette</first><last>Pedersen</last></author>
+      <author id="bolette-sandford-pedersen"><first>Bolette</first><last>Pedersen</last></author>
       <author><first>Carsten</first><last>Levisen</last></author>
       <author><first>Daniel</first><last>Hershcovich</last></author>
       <pages>1353–1364</pages>
@@ -1217,7 +1217,7 @@
       <author><first>Elizabeth M.</first><last>Olson</last></author>
       <author><first>Hemank</first><last>Lamba</last></author>
       <author><first>Aoife</first><last>Cahill</last></author>
-      <author><first>Joel</first><last>Tetreault</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
       <author><first>Alejandro</first><last>Jaimes</last></author>
       <pages>1475–1495</pages>
       <abstract>Natural Language Processing (NLP) of news articles can play an important role in understanding the dynamics and causes of violent conflict. Despite the availability of datasets categorizing various conflict events, the existing labels often do not cover all of the fine-grained violent conflict event types relevant to areas like the Horn of Africa. In this paper, we introduce a new benchmark dataset Conflict Events in the Horn of Africa region (CEHA) and propose a new task for identifying violent conflict events using online resources with this dataset. The dataset consists of 500 English event descriptions regarding conflict events in the Horn of Africa region with fine-grained event-type definitions that emphasize the cause of the conflict. This dataset categorizes the key types of conflict risk according to specific areas required by stakeholders in the Humanitarian-Peace-Development Nexus. Additionally, we conduct extensive experiments on two tasks supported by this dataset: Event-relevance Classification and Event-type Classification. Our baseline models demonstrate the challenging nature of these tasks and the usefulness of our dataset for model evaluations in low-resource settings.</abstract>
@@ -1254,7 +1254,7 @@
       <author><first>Liangwei</first><last>Yang</last></author>
       <author><first>Chen</first><last>Wang</last></author>
       <author><first>Xiongxiao</first><last>Xu</last></author>
-      <author><first>Philip S.</first><last>Yu</last></author>
+      <author id="philip-s-yu"><first>Philip S.</first><last>Yu</last></author>
       <author><first>Kai</first><last>Shu</last></author>
       <pages>1520–1530</pages>
       <abstract>With the emergence of large language models (LLMs) and their ability to perform a variety of tasks, their application in recommender systems (RecSys) has shown promise. However, we are facing significant challenges when deploying LLMs into RecSys, such as limited prompt length, unstructured item information, and un-constrained generation of recommendations, leading to sub-optimal performance. To address these issues, we propose a novel Taxonomy-guided Recommendation (TaxRec) framework to empower LLM with category information in a systematic approach. Specifically, TaxRec features a two-step process: one-time taxonomy categorization and LLM-based recommendation. In the one-time taxonomy categorization phase, we organize and categorize items, ensuring clarity and structure of item information. In the LLM-based recommendation phase, we feed the structured items into LLM prompts, achieving efficient token utilization and controlled feature generation. This enables more accurate, contextually relevant, and zero-shot recommendations without the need for domain-specific fine-tuning. Experimental results demonstrate that TaxRec significantly enhances recommendation quality compared to traditional zero-shot approaches, showcasing its efficacy as a personal recommender with LLMs. Code is available at: https://github.com/yueqingliang1/TaxRec.</abstract>
@@ -1266,7 +1266,7 @@
       <author><first>Shannan</first><last>Liu</last></author>
       <author><first>Peifeng</first><last>Li</last></author>
       <author><first>Yaxin</first><last>Fan</last></author>
-      <author><first>Qiaoming</first><last>Zhu</last></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last></author>
       <pages>1531–1544</pages>
       <abstract>Multi-party dialogue discourse parsing is an important and challenging task in natural language processing (NLP). Previous studies struggled to fully understand the deep semantics of dialogues, especially when dealing with complex topic interleaving and ellipsis. To address the above issues, we propose a novel model DDPE (Dialogue Discourse Parsing with Explanations) to integrate external knowledge from Large Language Models (LLMs), which consists of three components, i.e., explanation generation, structural parsing, and contrastive learning. DDPE employs LLMs to generate explanatory and contrastive information about discourse structure, thereby providing additional reasoning cues that enhance the understanding of dialogue semantics. The experimental results on the two public datasets STAC and Molweni show that our DDPE significantly outperforms the state-of-the-art (SOTA) baselines.</abstract>
       <url hash="0202ff91">2025.coling-main.103</url>
@@ -1326,7 +1326,7 @@
       <title>Improving Explainable Fact-Checking with Claim-Evidence Correlations</title>
       <author><first>Xin</first><last>Tan</last></author>
       <author><first>Bowei</first><last>Zou</last></author>
-      <author><first>Ai Ti</first><last>Aw</last></author>
+      <author id="aiti-aw"><first>Ai Ti</first><last>Aw</last></author>
       <pages>1600–1612</pages>
       <abstract>Automatic fact-checking systems that employ large language models (LLMs) have achieved human-level performance in combating widespread misinformation. However, current LLM-based fact-checking systems fail to reveal the reasoning principles behind their decision-making for the claim verdict. In this work, we propose Correlation-Enhanced Explainable Fact-Checking (CorXFact), an LLM-based fact-checking system that simulates the reasoning principle of human fact-checkers for evidence-based claim verification: assessing and weighing the correlations between the claim and each piece of evidence. Following this principle, CorXFact enables efficient claim verification and transparent explanation generation. Furthermore, we contribute the CorFEVER test set to comprehensively evaluate the CorXFact system in claim-evidence correlation identification and claim verification in both closed-domain and real-world fact-checking scenarios. Experimental results show that our proposed CorXFact significantly outperforms four strong fact-checking baselines in claim authenticity prediction and verdict explanation.</abstract>
       <url hash="fac97196">2025.coling-main.108</url>
@@ -1403,7 +1403,7 @@
       <author><first>Minjie</first><last>Qiang</last></author>
       <author><first>Zhongqing</first><last>Wang</last></author>
       <author><first>Shoushan</first><last>Li</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>1699–1710</pages>
       <abstract>With the emergence of social media and e-commerce platforms, accurate user profiling has become increasingly vital for recommendation systems and personalized services. Recent studies have focused on generating detailed user profiles by extracting various aspects of user attributes from textual reviews. Nevertheless, these investigations have not fully exploited the potential of the abundant multimodal data at hand. In this study, we propose a novel task called multimodal user profiling. This task emphasizes the utilization of both review texts and their accompanying images to create comprehensive user profiles. By integrating textual and visual data, we leverage their complementary strengths, enabling the generation of more holistic user representations. Additionally, we explore a unified joint training framework with various multimodal training strategies that incorporate users’ historical review texts and images for user profile generation. Our experimental results underscore the significance of multimodal data in enhancing user profile generation and demonstrate the effectiveness of the proposed unified joint training approach.</abstract>
       <url hash="33d6d0de">2025.coling-main.115</url>
@@ -1411,7 +1411,7 @@
     </paper>
     <paper id="116">
       <title>Acquiring Bidirectionality via Large and Small Language Models</title>
-      <author><first>Takumi</first><last>Goto</last></author>
+      <author id="takumi-goto"><first>Takumi</first><last>Goto</last></author>
       <author><first>Hiroyoshi</first><last>Nagao</last></author>
       <author><first>Yuta</first><last>Koreeda</last></author>
       <pages>1711–1717</pages>
@@ -1456,7 +1456,7 @@
       <author><first>Guangjie</first><last>Zeng</last></author>
       <author><first>Xiaoyan</first><last>Yu</last></author>
       <author><first>Hao</first><last>Peng</last></author>
-      <author><first>Philip S.</first><last>Yu</last></author>
+      <author id="philip-s-yu"><first>Philip S.</first><last>Yu</last></author>
       <pages>1754–1766</pages>
       <abstract>Multimodal sarcasm detection (MSD) is essential for various downstream tasks. Existing MSD methods tend to rely on spurious correlations. These methods often mistakenly prioritize non-essential features yet still make correct predictions, demonstrating poor generalizability beyond training environments. Regarding this phenomenon, this paper undertakes several initiatives. Firstly, we identify two primary causes that lead to the reliance of spurious correlations. Secondly, we address these challenges by proposing a novel method that integrate Multimodal Incongruities via Contrastive Learning (MICL) for multimodal sarcasm detection. Specifically, we first leverage incongruity to drive multi-view learning from three views: token-patch, entity-object, and sentiment. Then, we introduce extensive data augmentation to mitigate the biased learning of the textual modality. Additionally, we construct a test set, SPMSD, which consists potential spurious correlations to evaluate the the model’s generalizability. Experimental results demonstrate the superiority of MICL on benchmark datasets, along with the analyses showcasing MICL’s advancement in mitigating the effect of spurious correlation.</abstract>
       <url hash="4e347ff2">2025.coling-main.119</url>
@@ -1468,7 +1468,7 @@
       <author><first>Valle</first><last>Ruiz-Fernández</last></author>
       <author><first>Júlia</first><last>Falcão</last></author>
       <author><first>Luis</first><last>Vasquez-Reina</last></author>
-      <author><first>Aitor</first><last>Gonzalez-Agirre</last></author>
+      <author id="aitor-gonzalez-agirre"><first>Aitor</first><last>Gonzalez-Agirre</last></author>
       <pages>1767–1784</pages>
       <abstract>In humans, cognitive biases are systematic deviations from rationality in judgment that simplify complex decisions. They typically manifest as a consequence of learned behaviors or limitations on information processing capabilities. Recent work has shown that these biases can percolate through training data and ultimately be learned by language models. We examine different groups of models, factoring in model size and type (base or instructed) for four kinds of cognitive bias: primacy, recency, common token, and majority class bias. We evaluate the performance of each model for each type of bias in different settings using simple and complex variants of datasets. Our results show that some biases have much stronger effects than others, and that task complexity plays a part in eliciting stronger effects for some of these biases as measured by effect size. We show that some cognitive biases such as common token and majority class bias are not straightforward to evaluate, and that, contrary to some of the previous literature, some effects that have been previously classified as common token bias in the literature are actually due to primacy and recency bias.</abstract>
       <url hash="67b3f2d5">2025.coling-main.120</url>
@@ -1479,7 +1479,7 @@
       <author><first>Shalaka</first><last>Satheesh</last></author>
       <author><first>Katharina</first><last>Beckh</last></author>
       <author><first>Katrin</first><last>Klug</last></author>
-      <author><first>Héctor</first><last>Allende-Cid</last></author>
+      <author id="hector-allende-cid"><first>Héctor</first><last>Allende-Cid</last></author>
       <author><first>Sebastian</first><last>Houben</last></author>
       <author><first>Teena</first><last>Hassan</last></author>
       <pages>1785–1801</pages>
@@ -1508,7 +1508,7 @@
       <author><first>Zhongqing</first><last>Wang</last></author>
       <author><first>Shichen</first><last>Li</last></author>
       <author><first>Hongling</first><last>Wang</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>1813–1823</pages>
       <abstract>Multimodal sentiment analysis for fashion-related social media is essential for understanding how consumers appraise fashion products across platforms like Instagram and Twitter, where both textual and visual elements contribute to sentiment expression. However, a notable challenge in this task is the modality gap, where the different information density between text and images hinders effective sentiment analysis. In this paper, we propose a novel multimodal framework that addresses this challenge by introducing pseudo data generated by a two-stage framework. We further utilize a multimodal fusion approach that efficiently integrates the information from various modalities for sentiment classification of fashion posts. Experiments conducted on a comprehensive dataset demonstrate that our framework significantly outperforms existing unimodal and multimodal baselines, highlighting its effectiveness in bridging the modality gap for more accurate sentiment classification in fashion-related social media posts.</abstract>
       <url hash="67bef597">2025.coling-main.123</url>
@@ -1517,9 +1517,9 @@
     <paper id="124">
       <title>Quality Beyond A Glance: Revealing Large Quality Differences Between Web-Crawled Parallel Corpora</title>
       <author><first>Rik</first><last>van Noord</last></author>
-      <author><first>Miquel</first><last>Esplà-Gomis</last></author>
+      <author id="miquel-espla-gomis"><first>Miquel</first><last>Esplà-Gomis</last></author>
       <author><first>Malina</first><last>Chichirau</last></author>
-      <author><first>Gema</first><last>Ramírez-Sánchez</last></author>
+      <author id="gema-ramirez-sanchez"><first>Gema</first><last>Ramírez-Sánchez</last></author>
       <author><first>Antonio</first><last>Toral</last></author>
       <pages>1824–1838</pages>
       <abstract>Parallel corpora play a vital role in advanced multilingual natural language processing tasks, notably in machine translation (MT). The recent emergence of numerous large parallel corpora, often extracted from multilingual documents on the Internet, has expanded the available resources. Nevertheless, the quality of these corpora remains largely unexplored, while there are large differences in how the corpora are constructed. Moreover, how the potential differences affect the performance of neural MT (NMT) systems has also received limited attention. This study addresses this gap by manually and automatically evaluating four well-known publicly available parallel corpora across eleven language pairs. Our findings are quite concerning: all corpora contain a substantial amount of noisy sentence pairs, with CCMatrix and CCAligned having well below of 50% reasonably clean pairs. MaCoCu and ParaCrawl generally have higher quality texts, though around a third of the texts still have clear issues. While corpus size impacts NMT models’ performance, our study highlights the critical role of quality: higher-quality corpora consistently yield better-performing NMT models when controlling for size.</abstract>
@@ -1551,7 +1551,7 @@
     <paper id="127">
       <title>Does Vision Accelerate Hierarchical Generalization in Neural Language Learners?</title>
       <author><first>Tatsuki</first><last>Kuribayashi</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>1865–1879</pages>
       <abstract>Neural language models (LMs) are arguably less data-efficient than humans from a language acquisition perspective. One fundamental question is why this human–LM gap arises. This study explores the advantage of <i>grounded</i> language acquisition, specifically the impact of visual information — which humans can usually rely on but LMs largely do not have access to during language acquisition — on syntactic generalization in LMs. Our experiments, following the poverty of stimulus paradigm under two scenarios (using artificial vs. naturalistic images), demonstrate that if the alignments between the linguistic and visual components are clear in the input, access to vision data does help with the syntactic generalization of LMs, but if not, visual input does not help. This highlights the need for additional biases or signals, such as mutual gaze, to enhance cross-modal alignment and enable efficient syntactic generalization in multimodal LMs.</abstract>
       <url hash="1138931b">2025.coling-main.127</url>
@@ -1593,7 +1593,7 @@
       <author><first>Zeyuan</first><last>Yang</last></author>
       <author><first>Fangzhou</first><last>Xiong</last></author>
       <author><first>Peng</first><last>Li</last></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <pages>1922–1933</pages>
       <abstract>Due to the limited context window, Large Language Models (LLMs) struggle with processing long contexts. Although fine-tuning can extend the context window, it incurs substantial computation costs. In contrast, recent tuning-free approaches reallocate the attention mechanism or incorporate temporary trainable parameters. In this work, by jointly modeling instance-level generation with a limited context window and learning over sequential data, we rethink the long context generation of LLMs from a continual learning perspective. In practice, we inspect existing representative approaches and analyze their synergy with continual learning strategies. Moreover, we integrate these strategies into current approaches to further boost LLMs’ efficiency in processing long contexts. Comprehensive experiments and analysis confirm the feasibility of continual learning insights for improving long-context processing.</abstract>
       <url hash="51278226">2025.coling-main.131</url>
@@ -1604,7 +1604,7 @@
       <author><first>Hansi</first><last>Wang</last></author>
       <author><first>Yue</first><last>Wang</last></author>
       <author><first>Qiliang</first><last>Liang</last></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <pages>1934–1942</pages>
       <abstract>Word Sense Disambiguation (WSD) is a fundamental task critical for accurate semantic understanding. Conventional training strategies usually only consider predefined senses for target words and learn each of them from relatively limited instances, neglecting the influence of similar ones. To address these problems, we propose the method of Learning to Rank Senses (LTRS) to enhance the task. This method helps a model learn to represent and disambiguate senses from a broadened range of instances via ranking an expanded list of sense definitions. By employing LTRS, our model achieves a SOTA F1 score of 79.6% in Chinese WSD and exhibits robustness in low-resource settings. Moreover, it shows excellent training efficiency, achieving faster convergence than previous methods. This provides a new technical approach to WSD and may also apply to the task for other languages.</abstract>
       <url hash="195c4633">2025.coling-main.132</url>
@@ -1641,7 +1641,7 @@
       <author><first>Tibor</first><last>Bosse</last></author>
       <author><first>Gert-Jan</first><last>De Bruijn</last></author>
       <author><first>Jos A.</first><last>Bosch</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <pages>1964–1982</pages>
       <abstract>Motivational Interviewing (MI) is a counseling technique that promotes behavioral change through reflective responses to mirror or refine client statements. While advanced Large Language Models (LLMs) can generate engaging dialogues, challenges remain for applying them in a sensitive context such as MI. This work assesses the potential of LLMs to generate MI reflections via three LLMs: GPT-4, Llama-2, and BLOOM, and explores the effect of dialogue context size and integration of MI strategies for reflection generation by LLMs. We conduct evaluations using both automatic metrics and human judges on four criteria: appropriateness, relevance, engagement, and naturalness, to assess whether these LLMs can accurately generate the nuanced therapeutic communication required in MI. While we demonstrate LLMs’ potential in generating MI reflections comparable to human therapists, content analysis shows that significant challenges remain. By identifying the strengths and limitations of LLMs in generating empathetic and contextually appropriate reflections in MI, this work contributes to the ongoing dialogue in enhancing LLM’s role in therapeutic counseling.</abstract>
       <url hash="40da520b">2025.coling-main.135</url>
@@ -1669,7 +1669,7 @@
       <author><first>Karody Lubna Abdul</first><last>Rahman</last></author>
       <author><first>Santosh</first><last>Kurasa</last></author>
       <author><first>Parag</first><last>Agrawal</last></author>
-      <author><first>Sandipan</first><last>Dandapat</last></author>
+      <author id="sandipan-dandapat"><first>Sandipan</first><last>Dandapat</last></author>
       <pages>2003–2025</pages>
       <abstract>Chain-of-thought (CoT) prompting has significantly enhanced the the capability of large language models (LLMs) by structuring their reasoning processes. However, existing methods face critical limitations: handcrafted demonstrations require extensive human expertise, while trigger phrases are prone to inaccuracies. In this paper, we propose the Zero-shot Uncertainty-based Selection (ZEUS) method, a novel approach that improves CoT prompting by utilizing uncertainty estimates to select effective demonstrations without needing access to model parameters. Unlike traditional methods, ZEUS offers high sensitivity in distinguishing between helpful and ineffective questions, ensuring more precise and reliable selection. Our extensive evaluation shows that ZEUS consistently outperforms existing CoT strategies across four challenging reasoning benchmarks, demonstrating its robustness and scalability.</abstract>
       <url hash="14228f02">2025.coling-main.137</url>
@@ -1680,7 +1680,7 @@
       <author><first>Zihao</first><last>Feng</last></author>
       <author><first>Hailong</first><last>Cao</last></author>
       <author><first>Wang</first><last>Xu</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <pages>2026–2037</pages>
       <abstract>Large Language Models (LLMs) have demonstrated exceptional performance across a broad spectrum of cross-lingual Natural Language Processing (NLP) tasks. However, previous methods predominantly focus on leveraging parallel corpus to conduct instruction data for continuing pre-training or fine-tuning. They ignored the state of parallel data on the hidden layers of LLMs. In this paper, we demonstrate Word-level Cross-lingual Structure (WCS) of LLM which proves that the word-level embedding on the hidden layers are isomorphic between languages. We find that the hidden states of different languages’ input on the LLMs hidden layers can be aligned with an orthogonal matrix on word-level. We prove this conclusion in both mathematical and downstream task ways on two representative LLM foundations, LLaMA2 and BLOOM. Besides, we propose an Isomorphism-based Data Augmentation (IDA) method to apply the WCS on a downstream cross-lingual task, Bilingual Lexicon Induction (BLI), in both supervised and unsupervised ways. The experiment shows the significant improvement of our proposed method over all the baselines, especially on low-resource languages.</abstract>
       <url hash="862f50e1">2025.coling-main.138</url>
@@ -1692,7 +1692,7 @@
       <author><first>Zhong</first><last>Qian</last></author>
       <author><first>Xiaoxu</first><last>Zhu</last></author>
       <author><first>Peifeng</first><last>Li</last></author>
-      <author><first>Qiaoming</first><last>Zhu</last></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last></author>
       <pages>2038–2048</pages>
       <abstract>Document-level event factuality identification (DEFI) assesses the veracity degree to which an event mentioned in a document has happened, which is crucial for many natural language processing tasks. Previous work assesses event factuality by solely relying on the semantic information within a single document, which fails to identify hard cases where the document itself is hallucinative or counterfactual. There is also a pressing need for more suitable data of this kind. To tackle these issues, we construct Factualusion, a novel corpus with hallucination features that can be used not only for DEFI but can also be applied for hallucination evaluation for large language models. We further propose Trucidator, a graph-based framework that constructs intra-document and cross-document graphs and employs a multi-task learning paradigm to acquire more robust node embeddings, leveraging cross-document inference for more accurate identification. Experiments show that our proposed framework outperformed several baselines, demonstrating the effectiveness of our method.</abstract>
       <url hash="d930412c">2025.coling-main.139</url>
@@ -1763,7 +1763,7 @@
       <author><first>Ramon</first><last>Ruiz-Dolz</last></author>
       <author><first>Debela</first><last>Gemechu</last></author>
       <author><first>Zlata</first><last>Kikteva</last></author>
-      <author><first>Chris</first><last>Reed</last></author>
+      <author id="chris-reed"><first>Chris</first><last>Reed</last></author>
       <pages>2131–2143</pages>
       <abstract>Traditionally, argument mining research has approached the task of automatic identification of argument structures by using existing definitions of what constitutes an argument, while leaving the equally important matter of what does not qualify as an argument unaddressed. With the ability to distinguish between what is and what is not a natural language argument being at the core of argument mining as a field, it is interesting that no previous work has explored approaches to effectively select non-related propositions (i.e., propositions that are not connected through an argumentative relation, such as support or attack) that improve the data for learning argument mining tasks better. In this paper, we address the question of how to effectively sample non-related propositions from six different argument mining corpora belonging to different domains and encompassing both monologue and dialogue forms of argumentation. To that end, in addition to considering undersampling baselines from previous work, we propose three new sampling strategies relying on context (i.e., short/long) and the semantic similarity between propositions. Our results indicate that using more informed sampling strategies improves the performance, not only when evaluating models on their respective test splits, but also in the case of cross-domain evaluation.</abstract>
       <url hash="73e832f4">2025.coling-main.145</url>
@@ -1776,7 +1776,7 @@
       <author><first>Yiwei</first><last>Wang</last></author>
       <author><first>Baolong</first><last>Bi</last></author>
       <author><first>Jiayi</first><last>Mao</last></author>
-      <author><first>Xueqi</first><last>Cheng</last></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last></author>
       <pages>2144–2162</pages>
       <abstract>“Jailbreak” is a major safety concern of Large Language Models (LLMs), which occurs when malicious prompts lead LLMs to produce harmful outputs, raising issues about the reliability and safety of LLMs. Therefore, an effective evaluation of jailbreaks is very crucial to develop its mitigation strategies. However, our research reveals that many jailbreaks identified by current evaluations may actually be hallucinations—erroneous outputs that are mistaken for genuine safety breaches. This finding suggests that some perceived vulnerabilities might not represent actual threats, indicating a need for more precise red teaming benchmarks. To address this problem, we propose the Benchmark for reliABilitY and jailBreak haLlUcination Evaluation (BabyBLUE). BabyBLUE introduces a specialized validation framework including various evaluators to enhance existing jailbreak benchmarks, ensuring outputs are useful malicious instructions. Additionally, BabyBLUE presents a new dataset as an augmentation to the existing red teaming benchmarks, specifically addressing hallucinations in jailbreaks, aiming to evaluate the true potential of jailbroken LLM outputs to cause harm to human society.</abstract>
       <url hash="2d897903">2025.coling-main.146</url>
@@ -1843,7 +1843,7 @@
     <paper id="152">
       <title>Leveraging Explicit Reasoning for Inference Integration in Commonsense-Augmented Dialogue Models</title>
       <author><first>Sarah E.</first><last>Finch</last></author>
-      <author><first>Jinho D.</first><last>Choi</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
       <pages>2222–2235</pages>
       <abstract>Open-domain dialogue systems need to grasp social commonsense to understand and respond effectively to human users. Commonsense-augmented dialogue models have been proposed that aim to infer commonsense knowledge from dialogue contexts in order to improve response quality. However, existing approaches to commonsense-augmented dialogue rely on implicit reasoning to integrate commonsense inferences during response generation. In this study, we explore the impact of explicit reasoning against implicit reasoning over commonsense for dialogue response generation. Our findings demonstrate that separating commonsense reasoning into explicit steps for generating, selecting, and integrating commonsense into responses leads to better dialogue interactions, improving naturalness, engagement, specificity, and overall quality. Subsequent analyses of these findings unveil insights into the effectiveness of various types of commonsense in generating responses and the particular response traits enhanced through explicit reasoning for commonsense integration. Our work advances research in open-domain dialogue by achieving a new state-of-the-art in commonsense-augmented response generation.</abstract>
       <url hash="3562ad3d">2025.coling-main.152</url>
@@ -1892,8 +1892,8 @@
       <author><first>Minzhi</first><last>Li</last></author>
       <author><first>Zhengyuan</first><last>Liu</last></author>
       <author><first>Shumin</first><last>Deng</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
-      <author><first>Nancy</first><last>Chen</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last></author>
       <author><first>Min-Yen</first><last>Kan</last></author>
       <pages>2277–2290</pages>
       <abstract>The acceleration of Large Language Models (LLMs) research has opened up new possibilities for evaluating generated text. Though LLMs serve as scalable and economical evaluators, how reliable these evaluators is still under-explored. Prior research efforts in the meta-evaluation of LLMs as judges limit the prompting of an LLM to a single use to obtain a final evaluation decision. They then compute the agreement between LLMs’ outputs and human labels. This lacks interpretability in understanding the evaluation capability of LLMs. In light of this challenge, we propose DnA-Eval, which breaks down the evaluation process into decomposition and aggregation stages based on pedagogical practices. Our experiments show that it not only provides a more interpretable window for how well LLMs evaluate, but also leads to improvements up to 39.6% for different LLMs on a variety of meta-evaluation benchmarks.</abstract>
@@ -1950,7 +1950,7 @@
       <author><first>Tao</first><last>Feng</last></author>
       <author><first>Lizhen</first><last>Qu</last></author>
       <author><first>Xiaoxi</first><last>Kang</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>2351–2369</pages>
       <abstract>Automatically evaluating the quality of responses in dialogue systems is a challenging yet crucial task. Current metrics often fail to align with human judgments, especially when assessing responses that are grammatically correct. To address this issue, we propose a novel metric, called CausalScore, which assesses the relevance of responses by measuring the causal strength between dialogue histories and responses. The causal strength is estimated by utilizing both unconditional dependence and conditional dependencies from dialogue histories to responses. We compare our metric with the existing competitive metrics in terms of their alignment with human judgements. Our experimental results demonstrate that CausalScore significantly surpasses existing state-of-the-art metrics by aligning better with human judgements. Additionally, we collect a dialogue dataset CGDIALOG+ with human-annotated causal relations and a set of pairwise human judgements to facilitate the development of automatic metrics.</abstract>
       <url hash="2ad3f85b">2025.coling-main.161</url>
@@ -2004,7 +2004,7 @@
       <author><first>Haotian</first><last>Ye</last></author>
       <author><first>Chunlan</first><last>Ma</last></author>
       <author><first>François</first><last>Yvon</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>2417–2433</pages>
       <abstract>Recent studies have shown that post-aligning multilingual pretrained language models (mPLMs) using alignment objectives on both original and transliterated data can improve crosslingual alignment. This improvement further leads to better crosslingual transfer performance. However, it remains unclear how and why a better crosslingual alignment is achieved, as this technique only involves transliterations, and does not use any parallel data. This paper attempts to explicitly evaluate the crosslingual alignment and identify the key elements in transliteration-based approaches that contribute to better performance. For this, we train multiple models under varying setups for two pairs of related languages: (1) Polish and Ukrainian and (2) Hindi and Urdu. To assess alignment, we define four types of similarities based on sentence representations. Our experimental results show that adding transliterations alone improves the overall similarities, even for random sentence pairs. With the help of auxiliary transliteration-based alignment objectives, especially the contrastive objective, the model learns to distinguish matched from random pairs, leading to better crosslingual alignment. However, we also show that better alignment does not always yield better downstream performance, suggesting that further research is needed to clarify the connection between alignment and performance. The code implementation is based on <url>https://github.com/cisnlp/Transliteration-PPA</url>.</abstract>
       <url hash="67e55a3c">2025.coling-main.165</url>
@@ -2014,7 +2014,7 @@
       <title><fixed-case>GL</fixed-case>-<fixed-case>GAN</fixed-case>: Perceiving and Integrating Global and Local Styles for Handwritten Text Generation with Mamba</title>
       <author><first>Yiming</first><last>Wang</last></author>
       <author><first>Hongxi</first><last>Wei</last></author>
-      <author id="heng-wang"><first>Heng</first><last>Wang</last></author>
+      <author><first>Heng</first><last>Wang</last></author>
       <author><first>Shiwen</first><last>Sun</last></author>
       <author><first>Chao</first><last>He</last></author>
       <pages>2434–2444</pages>
@@ -2042,10 +2042,10 @@
       <author><first>Younes</first><last>Samih</last></author>
       <author><first>Kirill</first><last>Chirkunov</last></author>
       <author><first>Alham Fikri</first><last>Aji</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Shantanu</first><last>Godbole</last></author>
-      <author><first>Salim</first><last>Roukos</last></author>
-      <author><first>Radu</first><last>Florian</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
       <pages>2456–2477</pages>
       <abstract>The rapid evolution of Natural Language Processing (NLP) has favoured major languages such as English, leaving a significant gap for many others due to limited resources. This is especially evident in the context of data annotation, a task whose importance cannot be underestimated, but which is time-consuming and costly. Thus, any dataset for resource-poor languages is precious, in particular when it is task-specific. Here, we explore the feasibility of repurposing an existing multilingual dataset for a new NLP task: we repurpose a subset of the BELEBELE dataset (Bandarkar et al., 2023), which was designed for multiple-choice question answering (MCQA), to enable the more practical task of extractive QA (EQA) in the style of machine reading comprehension. We present annotation guidelines and a parallel EQA dataset for English and Modern Standard Arabic (MSA). We also present QA evaluation results for several monolingual and cross-lingual QA pairs including English, MSA, and five Arabic dialects. We aim to help others adapt our approach for the remaining 120 BELEBELE language variants, many of which are deemed under-resourced. We also provide a thorough analysis and share insights to deepen understanding of the challenges and opportunities in NLP task reformulation.</abstract>
@@ -2068,7 +2068,7 @@
       <author><first>Hongde</first><last>Liu</last></author>
       <author><first>Fei</first><last>Gao</last></author>
       <author><first>Yuxiang</first><last>Jia</last></author>
-      <author><first>Hongying</first><last>Zan</last></author>
+      <author id="hongying-zan"><first>Hongying</first><last>Zan</last></author>
       <author><first>Min</first><last>Peng</last></author>
       <pages>2497–2512</pages>
       <abstract>Emotion recognition in conversations (ERC) has garnered significant attention from the research community. However, due to the complexity of visual scenes and dialogue contextual dependencies in conversations, previous ERC methods fail to handle emotional cues from both visual sources and discourse structures. Furthermore, existing state-of-the-art ERC models are trained and tested separately on each single ERC dataset, not verifying their effectiveness across multiple datasets simultaneously. To address these challenges, this paper proposes an innovative framework for ERC, called Dialogue Scenes Understanding Enhanced Multi-modal Multi-task Tuning (DialogueMMT). More concretely, a novel video-language connector is applied within the large vision-language model for capturing video features effectively. Additionally, we utilize multi-task instruction tuning with a unified ERC dataset to enhance the model’s understanding of multi-modal dialogue scenes and employ a chain-of-thought strategy to improve emotion classification performance. Extensive experimental results on three benchmark ERC datasets indicate that the proposed DialogueMMT framework consistently outperforms existing state-of-the-art approaches in terms of overall performance.</abstract>
@@ -2092,10 +2092,10 @@
     </paper>
     <paper id="172">
       <title>Aligning Large Language Models with Human Opinions through Persona Selection and Value–Belief–Norm Reasoning</title>
-      <author><first>Do Xuan</first><last>Long</last></author>
+      <author id="xuan-long-do"><first>Do Xuan</first><last>Long</last></author>
       <author><first>Kenji</first><last>Kawaguchi</last></author>
       <author><first>Min-Yen</first><last>Kan</last></author>
-      <author><first>Nancy</first><last>Chen</last></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last></author>
       <pages>2526–2547</pages>
       <abstract>Reasoning and predicting human opinions with large language models (LLMs) is essential yet challenging. Current methods employ role-playing with personae but face two major issues: LLMs are sensitive to even a single irrelevant persona, skewing predictions by up to 30%; and LLMs fail to reason strategically over personae. We propose Chain-of-Opinion (COO), a simple four-step solution modeling which and how to reason with personae, inspired by the Value–Belief–Norm (VBN) theory. COO differentiates between explicit personae (demographics and ideology) and implicit personae (historical opinions), involves: (1) filtering irrelevant attributes from explicit personae; (2) ranking implicit personae into a preferential list for selecting top-k; (3) applying novel VBN reasoning to extract user environmental and personal value, belief, and norm variables for accurate and reliable predictions; and (4) iterating VBN reasoning with progressively larger lists of implicit personae to handle potential persona insufficiency. COO efficiently achieves new state-of-the-art opinion prediction via prompting with only 5 inference calls, improving prior techniques by up to 4%. Notably, fine-tuning LMs with COO’s data results in significantly better opinion-aligned models, by up to 23%.</abstract>
       <url hash="4ee8e607">2025.coling-main.172</url>
@@ -2147,7 +2147,7 @@
       <author><first>Huiyao</first><last>Wang</last></author>
       <author><first>Peifeng</first><last>Li</last></author>
       <author><first>Yaxin</first><last>Fan</last></author>
-      <author><first>Qiaoming</first><last>Zhu</last></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last></author>
       <pages>2592–2602</pages>
       <abstract>Previous work on dialogue topic shift detection has primarily focused on shallow local reasoning, overlooking the importance of considering the global historical structure and local details to elucidate the underlying causes of topic shift. To address the above two issues, we introduce the dual-process theory to this task and design a novel Dual-Module Framework DMF (i.e., intuition and reasoning module) for dialogue topic shift detection to emulate this cognitive process. Specifically, the intuition module employs Large Language Models (LLMs) to extract and store the global topic structure of historical dialogue, while the reasoning module introduces a LLM to generate reasoning samples between the response and the most recent topic of historical dialogue, thereby providing local detail explanations for topic shift. Moreover, we distill the dual-module framework into a small generative model to facilitate more precise reasoning. The experimental results on three public datasets show that our DMF outperforms the state-of-the-art baselines.</abstract>
       <url hash="349b11ba">2025.coling-main.177</url>
@@ -2217,7 +2217,7 @@
     </paper>
     <paper id="183">
       <title>Oddballness: universal anomaly detection with language models</title>
-      <author><first>Filip</first><last>Gralinski</last></author>
+      <author id="filip-gralinski"><first>Filip</first><last>Gralinski</last></author>
       <author><first>Ryszard</first><last>Staruch</last></author>
       <author><first>Krzysztof</first><last>Jurkiewicz</last></author>
       <pages>2683–2689</pages>
@@ -2273,7 +2273,7 @@
       <title>The Gaps between Fine Tuning and In-context Learning in Bias Evaluation and Debiasing</title>
       <author><first>Masahiro</first><last>Kaneko</last></author>
       <author><first>Danushka</first><last>Bollegala</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>2758–2764</pages>
       <abstract>The output tendencies of PLMs vary markedly before and after FT due to the updates to the model parameters. These divergences in output tendencies result in a gap in the social biases of PLMs. For example, there exits a low correlation between intrinsic bias scores of a PLM and its extrinsic bias scores under FT-based debiasing methods. Additionally, applying FT-based debiasing methods to a PLM leads to a decline in performance in downstream tasks. On the other hand, PLMs trained on large datasets can learn without parameter updates via ICL using prompts. ICL induces smaller changes to PLMs compared to FT-based debiasing methods. Therefore, we hypothesize that the gap observed in pre-trained and FT models does not hold true for debiasing methods that use ICL. In this study, we demonstrate that ICL-based debiasing methods show a higher correlation between intrinsic and extrinsic bias scores compared to FT-based methods. Moreover, the performance degradation due to debiasing is also lower in the ICL case compared to that in the FT case.</abstract>
       <url hash="c03c765c">2025.coling-main.187</url>
@@ -2283,7 +2283,7 @@
       <title><fixed-case>LLM</fixed-case> Sensitivity Challenges in Abusive Language Detection: Instruction-Tuned vs. Human Feedback</title>
       <author><first>Yaqi</first><last>Zhang</last></author>
       <author><first>Viktor</first><last>Hangya</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>2765–2780</pages>
       <abstract>The capacity of large language models (LLMs) to understand and distinguish socially unacceptable texts enables them to play a promising role in abusive language detection. However, various factors can affect their sensitivity. In this work, we test whether LLMs have an unintended bias in abusive language detection, i.e., whether they predict more or less of a given abusive class than expected in zero-shot settings. Our results show that instruction-tuned LLMs tend to under-predict positive classes, since datasets used for tuning are dominated by the negative class. On the contrary, models fine-tuned with human feedback tend to be overly sensitive. In an exploratory approach to mitigate these issues, we show that label frequency in the prompt helps with the significant over-prediction.</abstract>
       <url hash="b3e3eb1f">2025.coling-main.188</url>
@@ -2315,7 +2315,7 @@
       <title>Topology-of-Question-Decomposition: Enhancing Large Language Models with Information Retrieval for Knowledge-Intensive Tasks</title>
       <author><first>Weijie</first><last>Li</last></author>
       <author><first>Jin</first><last>Wang</last></author>
-      <author><first>Liang-Chih</first><last>Yu</last></author>
+      <author id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last></author>
       <author><first>Xuejie</first><last>Zhang</last></author>
       <pages>2814–2833</pages>
       <abstract>Large language models (LLMs) are increasingly deployed for general problem-solving across various domains yet remain constrained to chaining immediate reasoning steps and depending solely on parametric knowledge. Integrating an information retrieval system directly into the reasoning process of LLMs can improve answer accuracy but might disrupt the natural reasoning sequence. Consequently, LLMs may underperform in complex, knowledge-intensive tasks requiring multiple reasoning steps, extensive real-world knowledge, or critical initial decisions. To overcome these challenges, we introduce a novel framework, Topology-of-Question-Decomposition (ToQD), which activates retrieval only when necessary. Globally, ToQD guides LLMs in constructing a topology graph from the input question, each node representing a sub-question. Locally, ToQD employs self-verify inference to determine whether a sub-question should retrieve relevant documents, necessitate further decomposition, or directly provide an answer. Experiments demonstrate that ToQD achieves superior performance and robustness in complex, knowledge-intensive tasks, significantly enhancing system response efficiency.</abstract>
@@ -2363,7 +2363,7 @@
       <author><first>Zonghan</first><last>Yang</last></author>
       <author><first>Xinrui</first><last>Chen</last></author>
       <author><first>Peng</first><last>Li</last></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <pages>2886–2903</pages>
       <abstract>State-of-the-art Large Multi-Modal Models (LMMs) have demonstrated exceptional capabilities in vision-language tasks. Despite their advanced functionalities, the performances of LMMs are still limited in challenging scenarios that require complex reasoning with multiple levels of visual information. Existing prompting techniques for LMMs focus on either improving textual reasoning or leveraging tools for image preprocessing, lacking a simple and general visual prompting scheme to promote vision-language coordination in LMMs. In this work, we propose SCAFFOLD prompting that scaffolds coordinates to promote vision-language coordination. Specifically, SCAFFOLD overlays a dot matrix within the image as visual information anchors and leverages multi-dimensional coordinates as textual positional references. Extensive experiments on a wide range of challenging vision-language tasks demonstrate the superiority of SCAFFOLD over the textual Chain-of-Thought prompting.</abstract>
       <url hash="4c9b059e">2025.coling-main.195</url>
@@ -2375,7 +2375,7 @@
       <author><first>Jian</first><last>Zhang</last></author>
       <author><first>Yan</first><last>Zhang</last></author>
       <author><first>Yuanyuan</first><last>Liang</last></author>
-      <author id="qi-li"><first>Qi</first><last>Li</last></author>
+      <author><first>Qi</first><last>Li</last></author>
       <author><first>Hongwei</first><last>Wang</last></author>
       <pages>2904–2918</pages>
       <abstract>The strong capability of large language models (LLMs) has been applied to information extraction (IE) through either retrieval augmented prompting or instruction tuning (IT). However, the best way to incorporate information with LLMs for IE remains an open question. In this paper, we explore Retrieval Augmented Instruction Tuning (RA-IT) for IE, focusing on the task of open named entity recognition (NER). Specifically, for each training sample, we retrieve semantically similar examples from the training dataset as the context and prepend them to the input of the original instruction. To evaluate our RA-IT approach more thoroughly, we construct a Chinese IT dataset for open NER and evaluate RA-IT in both English and Chinese scenarios. Experimental results verify the effectiveness of RA-IT across various data sizes and in both English and Chinese scenarios. We also conduct thorough studies to explore the impacts of various retrieval strategies in the proposed RA-IT framework.</abstract>
@@ -2421,7 +2421,7 @@
     <paper id="200">
       <title>Towards Understanding Multi-Task Learning (Generalization) of <fixed-case>LLM</fixed-case>s via Detecting and Exploring Task-Specific Neurons</title>
       <author><first>Yongqi</first><last>Leng</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <pages>2969–2987</pages>
       <abstract>While large language models (LLMs) have demonstrated superior multi-task capabilities, understanding the learning mechanisms behind this is still a challenging problem. In this paper, we attempt to understand such mechanisms from the perspective of neurons. Specifically, we detect task-sensitive neurons in LLMs via gradient attribution on task-specific data. Through extensive deactivation and fine-tuning experiments, we demonstrate that the detected neurons are highly correlated with the given task, which we term as task-specific neurons. With these identified task-specific neurons, we delve into two common problems in multi-task learning and continuous learning: Generalization and Catastrophic Forgetting. We find that the overlap of task-specific neurons is strongly associated with generalization and specialization across tasks. Interestingly, at certain layers of LLMs, there is a high similarity in the parameters of different task-specific neurons, and such similarity is highly correlated with the generalization performance. Inspired by these findings, we propose a neuron-level continuous fine-tuning method that only fine-tunes the current task-specific neurons during continuous learning, and extensive experiments demonstrate the effectiveness of the proposed method. Our study provides insights into the interpretability of LLMs in multi-task learning.</abstract>
       <url hash="13668b46">2025.coling-main.200</url>
@@ -2432,7 +2432,7 @@
       <author><first>Yuqi</first><last>Ren</last></author>
       <author><first>Renren</first><last>Jin</last></author>
       <author><first>Tongxuan</first><last>Zhang</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <pages>2988–3001</pages>
       <abstract>Large Language Models (LLMs) have demonstrated remarkable abilities in text comprehension and logical reasoning, indicating that the text representations learned by LLMs can facilitate their language processing capabilities. In neuroscience, brain cognitive processing signals are typically utilized to study human language processing. Therefore, it is natural to ask how well the text embeddings from LLMs align with the brain cognitive processing signals, and how training strategies affect the LLM-brain alignment? In this paper, we employ Representational Similarity Analysis (RSA) to measure the alignment between 23 mainstream LLMs and fMRI signals of the brain to evaluate how effectively LLMs simulate cognitive language processing. We empirically investigate the impact of various factors (e.g., pre-training data size, model scaling, alignment training, and prompts) on such LLM-brain alignment. Experimental results indicate that pre-training data size and model scaling are positively correlated with LLM-brain similarity, and alignment training can significantly improve LLM-brain similarity. Explicit prompts contribute to the consistency of LLMs with brain cognitive language processing, while nonsensical noisy prompts may attenuate such alignment. Additionally, the performance of a wide range of LLM evaluations (e.g., MMLU, Chatbot Arena) is highly correlated with the LLM-brain similarity.</abstract>
       <url hash="1b4fed44">2025.coling-main.201</url>
@@ -2488,7 +2488,7 @@
       <title>To Label or Not to Label: Hybrid Active Learning for Neural Machine Translation</title>
       <author><first>Abdul Hameed</first><last>Azeemi</last></author>
       <author><first>Ihsan Ayyub</first><last>Qazi</last></author>
-      <author><first>Agha Ali</first><last>Raza</last></author>
+      <author id="agha-ali-raza"><first>Agha Ali</first><last>Raza</last></author>
       <pages>3071–3082</pages>
       <abstract>Active learning (AL) techniques reduce labeling costs for training neural machine translation (NMT) models by selecting smaller representative subsets from unlabeled data for annotation. Diversity sampling techniques select heterogeneous instances, while uncertainty sampling methods select instances with the highest model uncertainty. Both approaches have limitations - diversity methods may extract varied but trivial examples, while uncertainty sampling can yield repetitive, uninformative instances. To bridge this gap, we propose Hybrid Uncertainty and Diversity Sampling (HUDS), an AL strategy for domain adaptation in NMT that combines uncertainty and diversity for sentence selection. HUDS computes uncertainty scores for unlabeled sentences and subsequently stratifies them. It then clusters sentence embeddings within each stratum and computes diversity scores by distance to the centroid. A weighted hybrid score that combines uncertainty and diversity is then used to select the top instances for annotation in each AL iteration. Experiments on multi-domain German-English and French-English datasets demonstrate the better performance of HUDS over other strong AL baselines. We analyze the sentence selection with HUDS and show that it prioritizes diverse instances having high model uncertainty for annotation in early AL iterations.</abstract>
       <url hash="ab546b74">2025.coling-main.206</url>
@@ -2565,7 +2565,7 @@
       <author><first>Changze</first><last>Lv</last></author>
       <author><first>Xiaohua</first><last>Wang</last></author>
       <author><first>Xiaoqing</first><last>Zheng</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>3158–3178</pages>
       <abstract>The recent surge in jailbreaking attacks has revealed significant vulnerabilities in Large Language Models (LLMs) when exposed to malicious inputs. While various defense strategies have been proposed to mitigate these threats, there has been limited research into the underlying mechanisms that make LLMs vulnerable to such attacks. In this study, we suggest that the self-safeguarding capability of LLMs is linked to specific activity patterns within their representation space. Although these patterns have little impact on the semantic content of the generated text, they play a crucial role in shaping LLM behavior under jailbreaking attacks. Our findings demonstrate that these patterns can be detected with just a few pairs of contrastive queries. Extensive experimentation shows that the robustness of LLMs against jailbreaking can be manipulated by weakening or strengthening these patterns. Further visual analysis provides additional evidence for our conclusions, providing new insights into the jailbreaking phenomenon. These findings highlight the importance of addressing the potential misuse of open-source LLMs within the community.</abstract>
       <url hash="79f47af5">2025.coling-main.212</url>
@@ -2622,7 +2622,7 @@
       <title><fixed-case>COF</fixed-case>: Adaptive Chain of Feedback for Comparative Opinion Quintuple Extraction</title>
       <author><first>Qingting</first><last>Xu</last></author>
       <author><first>Kaisong</first><last>Song</last></author>
-      <author id="chaoqun-liu"><first>Chaoqun</first><last>Liu</last></author>
+      <author><first>Chaoqun</first><last>Liu</last></author>
       <author><first>Yangyang</first><last>Kang</last></author>
       <author><first>Xiabing</first><last>Zhou</last></author>
       <author><first>Jun</first><last>Lin</last></author>
@@ -2701,7 +2701,7 @@
       <author><first>Siyuan</first><last>Wang</last></author>
       <author><first>Zhuohan</first><last>Long</last></author>
       <author><first>Zhihao</first><last>Fan</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <author><first>Zhongyu</first><last>Wei</last></author>
       <pages>3310–3328</pages>
       <abstract>This paper presents a benchmark self-evolving framework to dynamically evaluate rapidly advancing Large Language Models (LLMs). We utilize a multi-agent system to reframe new evolving instances with high confidence that extend existing benchmarks. Towards a more scalable, robust and fine-grained evaluation, we implement six reframing operations to construct evolving instances testing LLMs against diverse queries, shortcut biases and probing their problem-solving sub-abilities. With this framework, we extend datasets across general and specific tasks, through various iterations. Experimental results show a performance decline in most LLMs against their original results under scalable and robust evaluations, offering a more accurate reflection of model capabilities alongside our fine-grained evaluation. Besides, our framework widens performance discrepancies both between different models and within the same model across various tasks, facilitating more informed model selection for specific tasks. We hope this framework contributes the research community for continuously evolving benchmarks alongside LLM development.</abstract>
@@ -2790,7 +2790,7 @@
     <paper id="231">
       <title>On Evaluation Protocols for Data Augmentation in a Limited Data Scenario</title>
       <author><first>Frédéric</first><last>Piedboeuf</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <pages>3428–3443</pages>
       <abstract>Textual data augmentation (DA) is a prolific field of study where novel techniques to create artificial data are regularly proposed, and that has demonstrated great efficiency on small data settings, at least for text classification tasks. In this paper, we challenge those results, showing that classical data augmentation (which modify sentences) is simply a way of performing better fine-tuning, and that spending more time doing so before applying data augmentation negates its effect. This is a significant contribution as it answers several questions that were left open in recent years, namely : which DA technique performs best (all of them as long as they generate data close enough to the training set, as to not impair training) and why did DA show positive results (facilitates training of network). We further show that zero- and few-shot DA via conversational agents such as ChatGPT or LLama2 can increase performances, confirming that this form of data augmentation is preferable to classical methods.</abstract>
       <url hash="7862cd24">2025.coling-main.231</url>
@@ -2826,7 +2826,7 @@
       <author><first>Alexander</first><last>Johnson</last></author>
       <author><first>Emmy</first><last>Phung</last></author>
       <author><first>Ahmad</first><last>Emami</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
       <pages>3479–3494</pages>
       <abstract>The rapid expansion of online content has intensified the issue of information redundancy, underscoring the need for solutions that can identify genuinely new information. Despite this challenge, the research community has seen a decline in focus on novelty detection, particularly with the rise of large language models (LLMs). Additionally, previous approaches have relied heavily on human annotation, which is time-consuming, costly, and particularly challenging when annotators must compare a target document against a vast number of historical documents. In this work, we introduce NovAScore (Novelty Evaluation in Atomicity Score), an automated metric for evaluating document-level novelty. NovAScore aggregates the novelty and salience scores of atomic information, providing high interpretability and a detailed analysis of a document’s novelty. With its dynamic weight adjustment scheme, NovAScore offers enhanced flexibility and an additional dimension to assess both the novelty level and the importance of information within a document. Our experiments show that NovAScore strongly correlates with human judgments of novelty, achieving a 0.626 Point-Biserial correlation on the TAP-DLND 1.0 dataset and a 0.920 Pearson correlation on an internal human-annotated dataset.</abstract>
       <url hash="7e28adaa">2025.coling-main.234</url>
@@ -2916,8 +2916,8 @@
     </paper>
     <paper id="242">
       <title>Is Peer-Reviewing Worth the Effort?</title>
-      <author><first>Kenneth Ward</first><last>Church</last></author>
-      <author><first>Raman</first><last>Chandrasekar</last></author>
+      <author id="kenneth-church"><first>Kenneth Ward</first><last>Church</last></author>
+      <author id="raman-chandrasekar"><first>Raman</first><last>Chandrasekar</last></author>
       <author><first>John E.</first><last>Ortega</last></author>
       <author><first>Ibrahim Said</first><last>Ahmad</last></author>
       <pages>3589–3599</pages>
@@ -2931,7 +2931,7 @@
       <author><first>Ryo</first><last>Sato</last></author>
       <author><first>Dai</first><last>Nakashima</last></author>
       <author><first>Takeshi</first><last>Suzuki</last></author>
-      <author><first>Minh Le</first><last>Nguyen</last></author>
+      <author id="minh-le-nguyen"><first>Minh Le</first><last>Nguyen</last></author>
       <pages>3600–3612</pages>
       <abstract>Large language models (LLMs) have achieved notable success across various tasks but are hindered by their large size and high computational demands. Post-training pruning (PTP) offers a promising solution by reducing model size through parameter removal while preserving performance. However, current PTP methods perform optimally only within specific sparsity ranges. This paper presents two key findings: (1) Layerwise uniform sparsity is effective at low sparsity, while non-uniform sparsity excels at high levels; (2) Relative importance-based pruning works best at low sparsity, whereas Hessian-based weight reconstruction is superior at high sparsity. We design and conduct experiments to validate these findings. Based on these insights, we introduce OptiPrune, a robust pruning method effective across all sparsity levels. OptiPrune adapts non-uniform sparsity with adaptive deviation and employs a threshold to select the optimal pruning strategy. Empirical results across diverse datasets, architectures, and languages validate its performance and robustness. These findings provide valuable directions for future LLM pruning research. Our code and data are publicly available.</abstract>
       <url hash="550ad76f">2025.coling-main.243</url>
@@ -2973,7 +2973,7 @@
     <paper id="247">
       <title>Multilingual Supervision Improves Semantic Disambiguation of Adpositions</title>
       <author><first>Wesley</first><last>Scivetti</last></author>
-      <author><first>Lauren</first><last>Levine</last></author>
+      <author id="lauren-levine"><first>Lauren</first><last>Levine</last></author>
       <author><first>Nathan</first><last>Schneider</last></author>
       <pages>3655–3669</pages>
       <abstract>Adpositions display a remarkable amount of ambiguity and flexibility in their meanings, and are used in different ways across languages. We conduct a systematic corpus-based cross-linguistic investigation into the lexical semantics of adpositions, utilizing SNACS (Schneider et al., 2018), an annotation framework with data available in several languages. Our investigation encompasses 5 of these languages: Chinese, English, Gujarati, Hindi, and Japanese. We find substantial distributional differences in adposition semantics, even in comparable corpora. We further train classifiers to disambiguate adpositions in each of our languages. Despite the cross-linguistic differences in adpositional usage, sharing annotated data across languages boosts overall disambiguation performance, leading to the highest published scores on this task for all 5 languages.</abstract>
@@ -3003,7 +3003,7 @@
       <author><first>Ruiyang</first><last>Ren</last></author>
       <author id="yuhao-wang-renmin"><first>Yuhao</first><last>Wang</last></author>
       <author><first>Yingqi</first><last>Qu</last></author>
-      <author><first>Wayne Xin</first><last>Zhao</last></author>
+      <author id="wayne-xin-zhao"><first>Wayne Xin</first><last>Zhao</last></author>
       <author><first>Jing</first><last>Liu</last></author>
       <author><first>Hua</first><last>Wu</last></author>
       <author><first>Ji-Rong</first><last>Wen</last></author>
@@ -3020,7 +3020,7 @@
       <author><first>Wenxuan</first><last>Zhang</last></author>
       <author><first>Xiaobao</first><last>Wu</last></author>
       <author><first>Boyang</first><last>Li</last></author>
-      <author><first>Anh Tuan</first><last>Luu</last></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last></author>
       <author><first>Lidong</first><last>Bing</last></author>
       <pages>3716–3731</pages>
       <abstract>Large Language Models (LLMs) have demonstrated remarkable performance through supervised fine-tuning or in-context learning using gold labels. However, this paradigm is limited by the availability of gold labels, while in certain scenarios, LLMs may need to perform tasks that are too complex for humans to provide such labels. To tackle this challenge, this study explores whether solely utilizing unlabeled data can elicit strong model capabilities. We propose a new paradigm termed zero-to-strong generalization. We iteratively prompt LLMs to annotate unlabeled data and retain high-quality labels by filtering. Surprisingly, we obverse that this iterative process gradually unlocks LLMs’ potential on downstream tasks. Our experiments on extensive classification and reasoning tasks confirm the effectiveness of our proposed framework. Our analysis indicates that this paradigm is effective for both in-context learning and fine-tuning, and for various model sizes.</abstract>
@@ -3035,7 +3035,7 @@
       <author><first>Abhishek</first><last>Lalwani</last></author>
       <author><first>David</first><last>Koleczek</last></author>
       <author><first>Mukund</first><last>Rungta</last></author>
-      <author><first>Sadid</first><last>Hasan</last></author>
+      <author id="sadid-a-hasan"><first>Sadid</first><last>Hasan</last></author>
       <author><first>Elita</first><last>Lobo</last></author>
       <pages>3732–3752</pages>
       <abstract>Machine unlearning aims to efficiently eliminate the influence of specific training data, known as the forget set, from the model. However, existing unlearning methods for Large Language Models (LLMs) face a critical challenge: they rely solely on negative feedback to suppress responses related to the forget set, which often results in nonsensical or inconsistent outputs, diminishing model utility and posing potential privacy risks. To address this limitation, we propose a novel approach called Alternate Preference Optimization (AltPO), which combines negative feedback with in-domain positive feedback on the forget set. Additionally, we introduce new evaluation metrics to assess the quality of responses related to the forget set. Extensive experiments show that our approach not only enables effective unlearning but also avoids undesirable model behaviors while maintaining overall model performance.</abstract>
@@ -3118,7 +3118,7 @@
       <author><first>Senbin</first><last>Zhu</last></author>
       <author><first>Hongde</first><last>Liu</last></author>
       <author><first>Yuxiang</first><last>Jia</last></author>
-      <author><first>Hongying</first><last>Zan</last></author>
+      <author id="hongying-zan"><first>Hongying</first><last>Zan</last></author>
       <author><first>Min</first><last>Peng</last></author>
       <pages>3836–3849</pages>
       <abstract>Entities are important to understanding literary works, which emphasize characters, plots and environment. The research on entity recognition, especially nested entity recognition in the literary domain is still insufficient partly due to insufficient annotated data. To address this issue, we construct the first Genre-oriented Corpus for Entity Recognition in Chinese Web Novels, namely GenWebNovel, comprising 400 chapters totaling 1,214,283 tokens under two genres, XuanHuan (Eastern Fantasy) and History. Based on the corpus, we analyze the distribution of different types of entities, including person, location, and organization. We also compare the nesting patterns of nested entities between GenWebNovel and the English corpus LitBank. Even though both belong to the literary domain, entities in different genres share few overlaps, making genre adaptation of NER (Named Entity Recognition) a hard problem. We propose a novel method that utilizes a pre-trained language model as an In-context learning example retriever to boost the performance of large language models. Our experiments show that this approach significantly enhances entity recognition, matching state-of-the-art (SOTA) models without requiring additional training data. Our code, dataset, and model are available at https://github.com/hjzhao73/GenWebNovel.</abstract>
@@ -3131,7 +3131,7 @@
       <author><first>Yi</first><last>Jing</last></author>
       <author><first>Tong</first><last>Wu</last></author>
       <author><first>Tianhao</first><last>Shen</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Qing</first><last>Yang</last></author>
       <pages>3850–3864</pages>
       <abstract>Ensuring the safety of large language models (LLMs) is paramount, yet identifying potential vulnerabilities is challenging. While manual red teaming is effective, it is time-consuming, costly and lacks scalability. Automated red teaming (ART) offers a more cost-effective alternative, automatically generating adversarial prompts to expose LLM vulnerabilities. However, in current ART efforts, a robust framework is absent, which explicitly frames red teaming as an effectively learnable task. To address this gap, we propose Automated Progressive Red Teaming (APRT) as an effectively learnable framework. APRT leverages three core modules: an Intention Expanding LLM that generates diverse initial attack samples, an Intention Hiding LLM that crafts deceptive prompts, and an Evil Maker to manage prompt diversity and filter ineffective samples. The three modules collectively and progressively explore and exploit LLM vulnerabilities through multi-round interactions. In addition to the framework, we further propose a novel indicator, Attack Effectiveness Rate (AER) to mitigate the limitations of existing evaluation metrics. By measuring the likelihood of eliciting unsafe but seemingly helpful responses, AER aligns closely with human evaluations. Extensive experiments with both automatic and human evaluations, demonstrate the effectiveness of ARPT across both open- and closed-source LLMs. Specifically, APRT effectively elicits 54% unsafe yet useful responses from Meta’s Llama-3-8B-Instruct, 50% from GPT-4o (API access), and 39% from Claude-3.5 (API access), showcasing its robust attack capability and transferability across LLMs (especially from open-source LLMs to closed-source LLMs).</abstract>
@@ -3272,9 +3272,9 @@
     <paper id="272">
       <title>A Dual Contrastive Learning Framework for Enhanced Multimodal Conversational Emotion Recognition</title>
       <author><first>Yunhe</first><last>Xie</last></author>
-      <author><first>Chengjie</first><last>Sun</last></author>
+      <author id="cheng-jie-sun"><first>Chengjie</first><last>Sun</last></author>
       <author><first>Ziyi</first><last>Cao</last></author>
-      <author><first>Bingquan</first><last>Liu</last></author>
+      <author id="bingquan-liu"><first>Bingquan</first><last>Liu</last></author>
       <author><first>Zhenzhou</first><last>Ji</last></author>
       <author><first>Yuanchao</first><last>Liu</last></author>
       <author><first>Lili</first><last>Shan</last></author>
@@ -3358,7 +3358,7 @@
       <author><first>Xinwei</first><last>Wu</last></author>
       <author><first>Renren</first><last>Jin</last></author>
       <author><first>Shaoyang</first><last>Xu</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <pages>4130–4148</pages>
       <abstract>Ensuring large language models (LLM) behave consistently with human goals, values, and intentions is crucial for their safety but yet computationally expensive. To reduce the computational cost of alignment training of LLMs, especially for those with a huge number of parameters, and to reutilize learned value alignment, we propose ConTrans, a novel framework that enables weak-to-strong alignment transfer via concept transplantation. From the perspective of representation engineering, ConTrans refines concept vectors in value alignment from a source LLM (usually a weak yet aligned LLM). The refined concept vectors are then reformulated to adapt to the target LLM (usually a strong yet unaligned base LLM) via affine transformation. In the third step, ConTrans transplants the reformulated concept vectors into the residual stream of the target LLM. Experiments demonstrate the successful transplantation of a wide range of aligned concepts from 7B models to 13B and 70B models across multiple LLMs and LLM families. Remarkably, ConTrans even surpasses instruction-tuned models in terms of truthfulness. Experiment results validate the effectiveness of both inter-LLM-family and intra-LLM-family concept transplantation. Our work successfully demonstrates an alternative way to achieve weak-to-strong alignment generalization and control.</abstract>
       <url hash="29af2064">2025.coling-main.279</url>
@@ -3452,7 +3452,7 @@
     <paper id="287">
       <title>Jump To Hyperspace: Comparing <fixed-case>E</fixed-case>uclidean and Hyperbolic Loss Functions for Hierarchical Multi-Label Text Classification</title>
       <author><first>Jens</first><last>Van Nooten</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>4260–4273</pages>
       <abstract>Hierarchical Multi-Label Text Classification (HMTC) is a challenging machine learning task where multiple labels from a hierarchically organized label set are assigned to a single text. In this study, we examine the effectiveness of Euclidean and hyperbolic loss functions to improve the performance of BERT models on HMTC, which very few previous studies have adopted. We critically evaluate label-aware losses as well as contrastive losses in the Euclidean and hyperbolic space, demonstrating that hyperbolic loss functions perform comparably with non-hyperbolic loss functions on four commonly used HMTC datasets in most scenarios. While hyperbolic label-aware losses perform the best on low-level labels, the overall consistency and micro-averaged performance is compromised. Additionally, we find that our contrastive losses are less effective for HMTC when deployed in the hyperbolic space than non-hyperbolic counterparts. Our research highlights that with the right metrics and training objectives, hyperbolic space does not provide any additional benefits compared to Euclidean space for HMTC, thereby prompting a reevaluation of how different geometric spaces are used in other AI applications.</abstract>
       <url hash="aeeb846a">2025.coling-main.287</url>
@@ -3464,8 +3464,8 @@
       <author><first>Osama</first><last>Mohammed Afzal</last></author>
       <author><first>Hawau Olamide</first><last>Toyin</last></author>
       <author><first>Shady</first><last>Shehata</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
-      <author><first>Zeerak</first><last>Talat</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
+      <author id="zeerak-talat"><first>Zeerak</first><last>Talat</last></author>
       <pages>4274–4281</pages>
       <abstract>Recent improvements in the quality of the generations by large language models have spurred research into identifying machine-generated text. Such work often presents high-performing detectors. However, humans and machines can produce text in different styles and domains, yet the the performance impact of such on machine generated text detection systems remains unclear. In this paper, we audit the classification performance for detecting machine-generated text by evaluating on texts with varying writing styles. We find that classifiers are highly sensitive to stylistic changes and differences in text complexity, and in some cases degrade entirely to random classifiers. We further find that detection systems are particularly susceptible to misclassify easy-to-read texts while they have high performance for complex texts, leading to concerns about the reliability of detection systems. We recommend that future work attends to stylistic factors and reading difficulty levels of human-written and machine-generated text.</abstract>
       <url hash="f4c821a6">2025.coling-main.288</url>
@@ -3507,7 +3507,7 @@
     <paper id="292">
       <title>Towards Efficient and Robust <fixed-case>VQA</fixed-case>-<fixed-case>NLE</fixed-case> Data Generation with Large Vision-Language Models</title>
       <author><first>Patrick Amadeus</first><last>Irawan</last></author>
-      <author><first>Genta Indra</first><last>Winata</last></author>
+      <author id="genta-indra-winata"><first>Genta Indra</first><last>Winata</last></author>
       <author><first>Samuel</first><last>Cahyawijaya</last></author>
       <author><first>Ayu</first><last>Purwarianti</last></author>
       <pages>4323–4340</pages>
@@ -3552,7 +3552,7 @@
     </paper>
     <paper id="296">
       <title><fixed-case>TEXT</fixed-case>-<fixed-case>CAKE</fixed-case>: Challenging Language Models on Local Text Coherence</title>
-      <author><first>Luca</first><last>Dini</last></author>
+      <author id="luca-dini"><first>Luca</first><last>Dini</last></author>
       <author><first>Dominique</first><last>Brunato</last></author>
       <author><first>Felice</first><last>Dell’Orletta</last></author>
       <author><first>Tommaso</first><last>Caselli</last></author>
@@ -3619,7 +3619,7 @@
       <author><first>Liang</first><last>Pang</last></author>
       <author><first>Hanxing</first><last>Ding</last></author>
       <author><first>Huawei</first><last>Shen</last></author>
-      <author><first>Xueqi</first><last>Cheng</last></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last></author>
       <pages>4457–4473</pages>
       <abstract>The extensive utilization of large language models (LLMs) underscores the crucial necessity for precise and contemporary knowledge embedded within their intrinsic parameters. Existing research on knowledge editing primarily concentrates on monolingual scenarios, neglecting the complexities presented by multilingual contexts and multi-hop reasoning. To address these challenges, our study introduces MLaKE (Multilingual Language Knowledge Editing), a novel benchmark comprising 4072 multi-hop and 5360 single-hop questions designed to evaluate the adaptability of knowledge editing methods across five languages: English, Chinese, Japanese, French, and German. MLaKE aggregates fact chains from Wikipedia across languages and utilizes LLMs to generate questions and answer. We assessed the effectiveness of current multilingual knowledge editing methods using the MLaKE dataset. Our results show that due to considerable inconsistencies in both multilingual performance and encoding efficiency, these methods struggle to generalize effectively across languages. The accuracy of these methods when editing English is notably higher than for other languages. The experimental results further demonstrate that models encode knowledge and generation capabilities for different languages using distinct parameters, leading to poor cross-lingual transfer performance in current methods. Transfer performance is notably better within the same language family compared to across different families. These findings emphasize the urgent need to improve multilingual knowledge editing methods.</abstract>
       <url hash="ae05353e">2025.coling-main.301</url>
@@ -3749,7 +3749,7 @@
       <author><first>Jing</first><last>Ye</last></author>
       <author><first>Lu</first><last>Xiang</last></author>
       <author><first>Yaping</first><last>Zhang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>4646–4669</pages>
       <abstract>Large Language Models (LLMs) have demonstrated promising potential in providing empathetic support during interactions. However, their responses often become verbose or overly formulaic, failing to adequately address the diverse emotional support needs of real-world scenarios. To tackle this challenge, we propose an innovative strategy-enhanced role-playing framework, designed to simulate authentic emotional support conversations. Specifically, our approach unfolds in two steps: (1) Strategy-Enhanced Role-Playing Interactions, which involve three pivotal roles—Seeker, Strategy Counselor, and Supporter—engaging in diverse scenarios to emulate real-world interactions and promote a broader range of dialogues; and (2) Emotional Support Agent Training, achieved through fine-tuning LLMs using our specially constructed dataset. Within this framework, we develop the ServeForEmo dataset, comprising an extensive collection of 3.7K+ multi-turn dialogues and 62.8K+ utterances. We further present SweetieChat, an emotional support agent capable of handling diverse open-domain scenarios. Extensive experiments and human evaluations confirm the framework’s effectiveness in enhancing emotional support, highlighting its unique ability to provide more nuanced and tailored assistance.</abstract>
       <url hash="e1b86bb7">2025.coling-main.312</url>
@@ -3766,7 +3766,7 @@
       <author><first>Kailai</first><last>Yang</last></author>
       <author><first>Makoto</first><last>Miwa</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>4670–4688</pages>
       <abstract>We propose ELAINE (EngLish-jApanese-chINesE)-medLLM, a trilingual (English, Japanese, Chinese) large language model adapted for the bio-medical domain based on Llama-3-8B. The training dataset was carefully curated in terms of volume and diversity to adapt to the biomedical domain and endow trilingual capability while preserving the knowledge and abilities of the base model. The training follows 2-stage paths: continued pre-training and supervised fine-tuning (SFT). Our results demonstrate that ELAINE-medLLM exhibits superior trilingual capabilities compared to existing bilingual or multilingual medical LLMs without severely sacrificing the base model’s capability.</abstract>
       <url hash="abcf88a8">2025.coling-main.313</url>
@@ -3810,7 +3810,7 @@
     <paper id="317">
       <title>What’s the most important value? <fixed-case>INVP</fixed-case>: <fixed-case>IN</fixed-case>vestigating the Value Priorities of <fixed-case>LLM</fixed-case>s through Decision-making in Social Scenarios</title>
       <author><first>Xuelin</first><last>Liu</last></author>
-      <author><first>Pengyuan</first><last>Liu</last></author>
+      <author id="pengyuan-liu"><first>Pengyuan</first><last>Liu</last></author>
       <author><first>Dong</first><last>Yu</last></author>
       <pages>4725–4752</pages>
       <abstract>As large language models (LLMs) demonstrate impressive performance in various tasks and are increasingly integrated into the decision-making process, ensuring they align with human values has become crucial. This paper highlights that value priorities—the relative importance of different value—play a pivotal role in the decision-making process. To explore the value priorities in LLMs, this paper introduces INVP, a framework for INvestigating Value Priorities through decision-making in social scenarios. The framework encompasses social scenarios including binary decision-making, covering both individual and collective decision-making contexts, and is based on Schwartz’s value theory for constructing value priorities. Using this framework, we construct a dataset, which contains a total of 1613 scenarios and 3226 decisions across 283 topics. We evaluate seven popular LLMs and the experimental results reveal commonalities in the value priorities across different LLMs, such as an emphasis on Universalism and Benevolence, while Power and Hedonism are typically given lower priority. This study provides fresh insights into understanding and enhancing the moral and value alignment of LLMs when making complex social decisions.</abstract>
@@ -3820,7 +3820,7 @@
     <paper id="318">
       <title><fixed-case>B</fixed-case>asq<fixed-case>BBQ</fixed-case>: A <fixed-case>QA</fixed-case> Benchmark for Assessing Social Biases in <fixed-case>LLM</fixed-case>s for <fixed-case>B</fixed-case>asque, a Low-Resource Language</title>
       <author><first>Muitze</first><last>Zulaika</last></author>
-      <author><first>Xabier</first><last>Saralegi</last></author>
+      <author id="xabier-saralegi"><first>Xabier</first><last>Saralegi</last></author>
       <pages>4753–4767</pages>
       <abstract>The rise of pre-trained language models has revolutionized natural language processing (NLP) tasks, but concerns about the propagation of social biases in these models remain, particularly in under-resourced languages like Basque. This paper introduces BasqBBQ, the first benchmark designed to assess social biases in Basque across eight domains, using a multiple-choice question-answering (QA) task. We evaluate various autoregressive large language models (LLMs), including multilingual and those adapted for Basque, to analyze both their accuracy and bias transmission. Our results show that while larger models generally achieve better accuracy, ambiguous cases remain challenging. In terms of bias, larger models exhibit lower negative bias. However, high negative bias persists in specific categories such as Disability Status, Age and Physical Appearance, especially in ambiguous contexts. Conversely, categories such as Sexual Orientation, Gender Identity, and Race/Ethnicity show the least bias in ambiguous contexts. The continual pre-training based adaptation process for Basque has a limited impact on bias when compared with English. This work represents a key step toward creating more ethical LLMs for low-resource languages.</abstract>
       <url hash="b201c4bc">2025.coling-main.318</url>
@@ -3842,7 +3842,7 @@
       <title>Why should only High-Resource-Languages have all the fun? Pivot Based Evaluation in Low Resource Setting</title>
       <author><first>Ananya</first><last>Mukherjee</last></author>
       <author><first>Saumitra</first><last>Yadav</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>4779–4788</pages>
       <abstract>Evaluating machine translation (MT) systems for low-resource languages has long been a challenge due to the limited availability of evaluation metrics and resources. As a result, researchers in this space have relied primarily on lexical-based metrics like BLEU, TER, and ChrF, which lack semantic evaluation. In this first-of-its-kind work, we propose a novel pivot-based evaluation framework that addresses these limitations; after translating low-resource language outputs into a related high-resource language, we leverage advanced neural and embedding-based metrics for more meaningful evaluation. Through a series of experiments using five low-resource languages: Assamese, Manipuri, Kannada, Bhojpuri, and Nepali, we demonstrate how this method extends the coverage of both lexical-based and embedding-based metrics, even for languages not directly supported by advanced metrics. Our results show that the differences between direct and pivot-based evaluation scores are minimal, proving that this approach is a viable and effective solution for evaluating translations in endangered and low-resource languages. This work paves the way for more inclusive, accurate, and scalable MT evaluation for underrepresented languages, marking a significant step forward in this under-explored area of research. The code and data will be made available at https://github.com/AnanyaCoder/PivotBasedEvaluation.</abstract>
       <url hash="db373ddb">2025.coling-main.320</url>
@@ -3926,7 +3926,7 @@
       <title><fixed-case>ACL</fixed-case>-rlg: A Dataset for Reading List Generation</title>
       <author><first>Julien</first><last>Aubert-Béduchaud</last></author>
       <author><first>Florian</first><last>Boudin</last></author>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <author><first>Richard</first><last>Dufour</last></author>
       <pages>4910–4919</pages>
       <abstract>Familiarizing oneself with a new scientific field and its existing literature can be daunting due to the large amount of available articles. Curated lists of academic references, or reading lists, compiled by experts, offer a structured way to gain a comprehensive overview of a domain or a specific scientific challenge. In this work, we introduce ACL-rlg, the largest open expert-annotated reading list dataset. We also provide multiple baselines for evaluating reading list generation and formally define it as a retrieval task. Our qualitative study highlights that traditional scholarly search engines and indexing methods perform poorly on this task, and GPT-4o, despite showing better results, exhibits signs of potential data contamination.</abstract>
@@ -3950,7 +3950,7 @@
     <paper id="329">
       <title>Extracting structure from an <fixed-case>LLM</fixed-case> - how to improve on surprisal-based models of Human Language Processing</title>
       <author><first>Daphne P.</first><last>Wang</last></author>
-      <author><first>Mehrnoosh</first><last>Sadrzadeh</last></author>
+      <author id="mehrnoosh-sadrzadeh"><first>Mehrnoosh</first><last>Sadrzadeh</last></author>
       <author><first>Miloš</first><last>Stanojević</last></author>
       <author><first>Wing-Yee</first><last>Chow</last></author>
       <author><first>Richard</first><last>Breheny</last></author>
@@ -3999,7 +3999,7 @@
       <author><first>Hanjie</first><last>Zhao</last></author>
       <author><first>Yuchen</first><last>Yan</last></author>
       <author><first>Yuxiang</first><last>Jia</last></author>
-      <author><first>Hongying</first><last>Zan</last></author>
+      <author id="hongying-zan"><first>Hongying</first><last>Zan</last></author>
       <author><first>Min</first><last>Peng</last></author>
       <pages>4980–4992</pages>
       <abstract>In recent years, fine-grained sentiment analysis in finance has gained significant attention, but the scarcity of entity-level datasets remains a key challenge. To address this, we have constructed the largest English and Chinese financial entity-level sentiment analysis datasets to date. Building on this foundation, we propose a novel two-stage sentiment analysis approach called Self-aware In-context Learning Correction (SILC). The first stage involves fine-tuning a base large language model to generate pseudo-labeled data specific to our task. In the second stage, we train a correction model using a GNN-based example retriever, which is informed by the pseudo-labeled data. This two-stage strategy has allowed us to achieve state-of-the-art performance on the newly constructed datasets, advancing the field of financial sentiment analysis. In a case study, we demonstrate the enhanced practical utility of our data and methods in monitoring the cryptocurrency market. Our datasets and code are available at https://github.com/NLP-Bin/SILC-EFSA.</abstract>
@@ -4032,7 +4032,7 @@
       <title>Evaluating Open-Source <fixed-case>ASR</fixed-case> Systems: Performance Across Diverse Audio Conditions and Error Correction Methods</title>
       <author><first>Saki</first><last>Imai</last></author>
       <author><first>Tahiya</first><last>Chowdhury</last></author>
-      <author><first>Amanda J.</first><last>Stent</last></author>
+      <author id="amanda-stent"><first>Amanda J.</first><last>Stent</last></author>
       <pages>5027–5039</pages>
       <abstract>Despite significant advances in automatic speech recognition (ASR) accuracy, challenges remain. Naturally occurring conversation often involves multiple overlapping speakers, of different ages, accents and genders, as well as noisy environments and suboptimal audio recording equipment, all of which reduce ASR accuracy. In this study, we evaluate the accuracy of state of the art open source ASR systems across diverse conversational speech datasets, examining the impact of audio and speaker characteristics on WER. We then explore the potential of ASR ensembling and post-ASR correction methods to improve transcription accuracy. Our findings emphasize the need for robust error correction techniques and of continuing to address demographic biases to enhance ASR performance and inclusivity.</abstract>
       <url hash="e17ab474">2025.coling-main.336</url>
@@ -4131,7 +4131,7 @@
       <title>Refer to the Reference: Reference-focused Synthetic Automatic Post-Editing Data Generation</title>
       <author><first>Sourabh</first><last>Deoghare</last></author>
       <author><first>Diptesh</first><last>Kanojia</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>5123–5135</pages>
       <abstract>A prevalent approach to synthetic APE data generation uses source (src) sentences in a parallel corpus to obtain translations (mt) through an MT system and treats corresponding reference (ref) sentences as post-edits (pe). While effective, due to independence between ‘mt’ and ‘pe,’ these translations do not adequately reflect errors to be corrected by a human post-editor. Thus, we introduce a novel and simple yet effective reference-focused synthetic APE data generation technique that uses ‘ref’ instead of src’ sentences to obtain corrupted translations (mt_new). The experimental results across English-German, English-Russian, English-Marathi, English-Hindi, and English-Tamil language pairs demonstrate the superior performance of APE systems trained using the newly generated synthetic data compared to those trained using existing synthetic data. Further, APE models trained using a balanced mix of existing and newly generated synthetic data achieve improvements of 0.37, 0.19, 1.01, 2.42, and 2.60 TER points, respectively. We will release the generated synthetic APE data.</abstract>
       <url hash="6c9f5089">2025.coling-main.344</url>
@@ -4149,7 +4149,7 @@
     </paper>
     <paper id="346">
       <title><fixed-case>MIT</fixed-case>-10<fixed-case>M</fixed-case>: A Large Scale Parallel Corpus of Multilingual Image Translation</title>
-      <author id="bo-li"><first>Bo</first><last>Li</last></author>
+      <author><first>Bo</first><last>Li</last></author>
       <author><first>Shaolin</first><last>Zhu</last></author>
       <author><first>Lijie</first><last>Wen</last></author>
       <pages>5154–5167</pages>
@@ -4271,7 +4271,7 @@
       <title>Annotating the <fixed-case>F</fixed-case>rench <fixed-case>W</fixed-case>iktionary with supersenses for large scale lexical analysis: a use case to assess form-meaning relationships within the nominal lexicon</title>
       <author><first>Nicolas</first><last>Angleraud</last></author>
       <author><first>Lucie</first><last>Barque</last></author>
-      <author><first>Marie</first><last>Candito</last></author>
+      <author id="marie-candito"><first>Marie</first><last>Candito</last></author>
       <pages>5321–5332</pages>
       <abstract>Many languages lack broad-coverage, semantically annotated lexical resources, which limits empirical research on lexical semantics for these languages. In this paper, we report on how we automatically enriched the French Wiktionnary with general semantic classes, known as supersenses, using a limited amount of manually annotated data. We trained a classifier combining sense definition classification and sense exemplars classification. The resulting resource, with an evaluated supersense accuracy of nearly 85% (92% for hypersenses), is used in a case study illustrating how such an semantically enriched resource can be leveraged to empirically test linguistic hypotheses about the lexicon, on a large scale.</abstract>
       <url hash="91c21e64">2025.coling-main.356</url>
@@ -4323,7 +4323,7 @@
       <author><first>Jia</first><last>Gu</last></author>
       <author><first>Liang</first><last>Pang</last></author>
       <author><first>Huawei</first><last>Shen</last></author>
-      <author><first>Xueqi</first><last>Cheng</last></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last></author>
       <pages>5375–5390</pages>
       <abstract>With the rapid advancement of large language models (LLMs) for handling complex language tasks, an increasing number of studies are employing LLMs as agents to emulate the sequential decision-making processes of humans often represented as Markov decision-making processes (MDPs). The actions in MDPs adhere to specific probability distributions and require iterative sampling. This arouses curiosity regarding the capacity of LLM agents to comprehend probability distributions, thereby guiding the agent’s behavioral decision-making through probabilistic sampling and generating behavioral sequences. To answer the above question, we divide the problem into two main aspects: sequence simulation with explicit probability distribution and sequence simulation with implicit probability distribution. Our analysis indicates that LLM agents can understand probabilities, but they struggle with probability sampling. Their ability to perform probabilistic sampling can be improved to some extent by integrating coding tools, but this level of sampling precision still makes it difficult to simulate human behavior as agents.</abstract>
       <url hash="1293aef3">2025.coling-main.360</url>
@@ -4387,9 +4387,9 @@
       <title><fixed-case>V</fixed-case>eritas<fixed-case>QA</fixed-case>: A Truthfulness Benchmark Aimed at Multilingual Transferability</title>
       <author><first>Javier</first><last>Aula-Blasco</last></author>
       <author><first>Júlia</first><last>Falcão</last></author>
-      <author><first>Susana</first><last>Sotelo</last></author>
+      <author id="susana-sotelo"><first>Susana</first><last>Sotelo</last></author>
       <author><first>Silvia</first><last>Paniagua</last></author>
-      <author><first>Aitor</first><last>Gonzalez-Agirre</last></author>
+      <author id="aitor-gonzalez-agirre"><first>Aitor</first><last>Gonzalez-Agirre</last></author>
       <author><first>Marta</first><last>Villegas</last></author>
       <pages>5463–5474</pages>
       <abstract>As Large Language Models (LLMs) become available in a wider range of domains and applications, evaluating the truthfulness of multilingual LLMs is an issue of increasing relevance. TruthfulQA (Lin et al., 2022) is one of few benchmarks designed to evaluate how models imitate widespread falsehoods. However, it is strongly English-centric and starting to become outdated. We present VeritasQA, a context- and time-independent truthfulness benchmark built with multilingual transferability in mind, and available in Spanish, Catalan, Galician and English. VeritasQA comprises a set of 353 questions and answers inspired by common misconceptions and falsehoods that are not tied to any particular country or recent event. We release VeritasQA under an open license and present the evaluation results of 15 models of various architectures and sizes.</abstract>
@@ -4440,7 +4440,7 @@
       <author><first>Edoardo</first><last>Manino</last></author>
       <author><first>Julia</first><last>Rozanova</last></author>
       <author><first>Lucas</first><last>Cordeiro</last></author>
-      <author><first>André</first><last>Freitas</last></author>
+      <author id="andre-freitas"><first>André</first><last>Freitas</last></author>
       <pages>5515–5529</pages>
       <abstract>This work proposes a novel methodology for measuring compositional behavior in contemporary language embedding models. Specifically, we focus on adjectival modifier phenomena in adjective-noun phrases. In recent years, distributional language representation models have demonstrated great practical success. At the same time, the need for interpretability has elicited questions on their intrinsic properties and capabilities. Crucially, distributional models are often inconsistent when dealing with compositional phenomena in natural language, which has significant implications for their safety and fairness. Despite this, most current research on compositionality is directed towards improving their performance on similarity tasks only. This work takes a different approach, introducing three novel tests of compositional behavior inspired by Montague semantics. Our experimental results indicate that current neural language models do not behave according to the expected linguistic theories. This indicates that current language models may lack the capability to capture the semantic properties we evaluated on limited context, or that linguistic theories from Montagovian tradition may not match the expected capabilities of distributional models.</abstract>
       <url hash="97685251">2025.coling-main.370</url>
@@ -4452,8 +4452,8 @@
       <author><first>Xiangyu</first><last>Lu</last></author>
       <author><first>Wang</first><last>Xu</last></author>
       <author><first>Conghui</first><last>Zhu</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
-      <author><first>Muyun</first><last>Yang</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
+      <author id="muyun-yang"><first>Muyun</first><last>Yang</last></author>
       <pages>5530–5543</pages>
       <abstract>Low-Rank Adaptation (LoRA) is currently the most commonly used Parameter-efficient fine-tuning (PEFT) method. However, it still faces high computational and storage costs to models with billions of parameters. Most previous studies have tackled this issue by using pruning techniques. Nonetheless, these efforts only analyze LoRA parameter features to evaluate their importance, such as parameter count, size, and gradient. In fact, the output of LoRA directly impacts the fine-tuned model. Preliminary experiments indicate that a fraction of LoRA possesses significantly high output values, substantially influencing the layer output. Motivated by the observation, we propose LoRA-drop. Concretely, LoRA-drop evaluates the importance of LoRA based on the LoRA output. Then we retain LoRA for important layers and the other layers share the same LoRA. We conduct abundant experiments with models of different scales on NLU and NLG tasks. Results demonstrate that LoRA-drop can achieve performance comparable to full fine-tuning and LoRA while retaining 50% of the LoRA parameters on average.</abstract>
       <url hash="13ff7701">2025.coling-main.371</url>
@@ -4465,7 +4465,7 @@
       <author><first>Sijie</first><last>Cheng</last></author>
       <author><first>Zixin</first><last>Sun</last></author>
       <author><first>Peng</first><last>Li</last></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <pages>5544–5557</pages>
       <abstract>Symbols such as numerical sequences, chemical formulas, and table delimiters exist widely, playing important roles in symbol-related tasks such as abstract reasoning, chemical property prediction, and tabular question-answering. Compared to tasks based on natural language expressions, large language models (LLMs) have limitations in understanding and reasoning on symbol-based representations, making it difficult for them to handle symbol-related problems. In this paper, we propose symbol-to-language (S2L), a method that converts symbol-based representations to language-based representations, providing valuable information for language models during reasoning. We found that, for both closed-source and open-source LLMs, the capability to solve symbol-related problems can be largely enhanced by incorporating such language-based representations. For example, by employing S2L for GPT-4, there can be substantial improvements of +21.9% and +9.5% accuracy for 1D-ARC and Dyck language tasks, respectively. There is also a consistent improvement in other six general symbol-related tasks such as table understanding and Tweet analysis. We release the GPT logs in https://github.com/THUNLP-MT/symbol2language.</abstract>
       <url hash="503a0864">2025.coling-main.372</url>
@@ -4510,8 +4510,8 @@
       <author><first>Payam</first><last>Karisani</last></author>
       <author><first>Zheng</first><last>Hui</last></author>
       <author><first>Yi</first><last>Fung</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
       <author><first>Heng</first><last>Ji</last></author>
       <pages>5607–5628</pages>
       <abstract>Propaganda plays a critical role in shaping public opinion and fueling disinformation. While existing research primarily focuses on identifying propaganda techniques, it lacks the ability to capture the broader motives and the impacts of such content. To address these challenges, we introduce PropaInsight, a conceptual framework grounded in foundational social science research, which systematically dissects propaganda into techniques, arousal appeals, and underlying intent. PropaInsight offers a more granular understanding of how propaganda operates across different contexts. Additionally, we present PropaGaze, a novel dataset that combines human-annotated data with high-quality synthetic data generated through a meticulously designed pipeline. Our experiments show that off-the-shelf LLMs struggle with propaganda analysis, but PropaGaze significantly improves performance. Fine-tuned Llama-7B-Chat achieves 203.4% higher text span IoU in technique identification and 66.2% higher BertScore in appeal analysis compared to 1-shot GPT-4-Turbo. Moreover, PropaGaze complements limited human-annotated data in data-sparse and cross-domain scenarios, demonstrating its potential for comprehensive and generalizable propaganda analysis.</abstract>
@@ -4623,7 +4623,7 @@
       <author><first>Fandong</first><last>Meng</last></author>
       <author><first>Songming</first><last>Zhang</last></author>
       <author><first>Yufeng</first><last>Chen</last></author>
-      <author><first>Jinan</first><last>Xu</last></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last></author>
       <author><first>Jie</first><last>Zhou</last></author>
       <pages>5775–5788</pages>
       <abstract>Multilingual knowledge editing (MKE) aims to simultaneously update factual knowledge across multiple languages within large language models (LLMs). Previous research indicates that the same knowledge across different languages within LLMs exhibits a degree of shareability. However, most existing MKE methods overlook the connections of the same knowledge between different languages, resulting in knowledge conflicts and limited edit performance. To address this issue, we first investigate how LLMs process multilingual factual knowledge and discover that the same factual knowledge in different languages generally activates a shared set of neurons, which we call language-agnostic factual neurons (LAFNs). These neurons represent the same factual knowledge shared across languages and imply the semantic connections among multilingual knowledge. Inspired by this finding, we propose a new MKE method by Locating and Updating Language-Agnostic Factual Neurons (LU-LAFNs) to edit multilingual knowledge simultaneously, which avoids knowledge conflicts and thus improves edit performance. Experimental results on Bi-ZsRE and MzsRE benchmarks demonstrate that our method achieves the best edit performance, indicating the effectiveness and importance of modeling the semantic connections among multilingual knowledge.</abstract>
@@ -4653,7 +4653,7 @@
       <author><first>Tim</first><last>Ombasa</last></author>
       <author><first>Hemank</first><last>Lamba</last></author>
       <author><first>Aoife</first><last>Cahill</last></author>
-      <author><first>Joel</first><last>Tetreault</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
       <author><first>Alejandro</first><last>Jaimes</last></author>
       <pages>5807–5825</pages>
       <abstract>Online reporting platforms have enabled citizens around the world to collectively share their opinions and report in real time on events impacting their local communities. Systematically organizing (e.g., categorizing by attributes) and geotagging large amounts of crowdsourced information is crucial to ensuring that accurate and meaningful insights can be drawn from this data and used by policy makers to bring about positive change. These tasks, however, typically require extensive manual annotation efforts. In this paper we present Uchaguzi-2022, a dataset of 14k categorized and geotagged citizen reports related to the 2022 Kenyan General Election containing mentions of election-related issues such as official misconduct, vote count irregularities, and acts of violence. We use this dataset to investigate whether language models can assist in scalably categorizing and geotagging reports, thus highlighting its potential application in the AI for Social Good space.</abstract>
@@ -4727,7 +4727,7 @@
       <author><first>Fu</first><last>Zhang</last></author>
       <author><first>Jizheng</first><last>Wan</last></author>
       <author><first>Shuo</first><last>Wang</last></author>
-      <author><first>Mark</first><last>Lee</last></author>
+      <author id="mark-lee"><first>Mark</first><last>Lee</last></author>
       <pages>5890–5901</pages>
       <abstract>Entity Alignment (EA) is a critical task in Knowledge Graph (KG) integration, aimed at identifying and matching equivalent entities that represent the same real-world objects. While EA methods based on knowledge representation learning have shown strong performance on synthetic benchmark datasets such as DBP15K, their effectiveness significantly decline in real-world scenarios which often involve data that is highly heterogeneous, incomplete, and domain-specific, as seen in datasets like DOREMUS and AGROLD. Addressing this challenge, we propose DAEA, a novel EA approach with Domain Adaptation that leverages the data characteristics of synthetic benchmarks for improved performance in real-world datasets. DAEA introduces a multi-source KGs selection mechanism and a specialized domain adaptive entity alignment loss function to bridge the gap between real-world data and optimal benchmark data, mitigating the challenges posed by aligning entities across highly heterogeneous KGs. Experimental results demonstrate that DAEA outperforms state-of-the-art models on real-world datasets, achieving a 29.94% improvement in Hits@1 on DOREMUS and a 5.64% improvement on AGROLD. Code is available at https://github.com/yangxiaoxiaoly/DAEA.</abstract>
       <url hash="dd2c725f">2025.coling-main.393</url>
@@ -4738,7 +4738,7 @@
     <paper id="394">
       <title><fixed-case>C</fixed-case>o<fixed-case>P</fixed-case>r<fixed-case>US</fixed-case>: Consistency Preserving Utterance Synthesis towards more realistic benchmark dialogues</title>
       <author><first>Sebastian</first><last>Steindl</last></author>
-      <author><first>Ulrich</first><last>Schäfer</last></author>
+      <author id="ulrich-schafer"><first>Ulrich</first><last>Schäfer</last></author>
       <author><first>Bernd</first><last>Ludwig</last></author>
       <pages>5902–5917</pages>
       <abstract>Large-scale Wizard-Of-Oz dialogue datasets have enabled the training of deep learning-based dialogue systems. While they are successful as benchmark datasets, they lack certain types of utterances, which would make them more realistic. In this work, we investigate the creation of synthetic communication errors in an automatic pipeline. Based on linguistic theory, we propose and follow a simple error taxonomy. We focus on three types of miscommunications that could happen in real-world dialogues but are underrepresented in the benchmark dataset: misunderstandings, non-understandings and vaguely related questions. Our two-step approach uses a state-of-the-art Large Language Model (LLM) to first create the error and secondly the repairing utterance. We perform Language Model-based evaluation to ensure the quality of the generated utterances. We apply the method to the MultiWOZ dataset and evaluate it both qualitatively and empirically as well as with human judges. Our results indicate that current LLMs can aid in adding post-hoc miscommunications to benchmark datasets as a form of data augmentation. We publish the resulting dataset, in which nearly 1900 dialogues have been modified, as CoPrUS-MultiWOZ to facilitate future work on dialogue systems.</abstract>
@@ -4749,7 +4749,7 @@
       <title><fixed-case>JM</fixed-case>ed<fixed-case>B</fixed-case>ench: A Benchmark for Evaluating <fixed-case>J</fixed-case>apanese Biomedical Large Language Models</title>
       <author><first>Junfeng</first><last>Jiang</last></author>
       <author><first>Jiahao</first><last>Huang</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>5918–5935</pages>
       <abstract>Recent developments in Japanese large language models (LLMs) primarily focus on general domains, with fewer advancements in Japanese biomedical LLMs. One obstacle is the absence of a comprehensive, large-scale benchmark for comparison. Furthermore, the resources for evaluating Japanese biomedical LLMs are insufficient. To advance this field, we propose a new benchmark including eight LLMs across four categories and 20 Japanese biomedical datasets across five tasks. Experimental results indicate that: (1) LLMs with a better understanding of Japanese and richer biomedical knowledge achieve better performance in Japanese biomedical tasks, (2) LLMs that are not mainly designed for Japanese biomedical domains can still perform unexpectedly well, and (3) there is still much room for improving the existing LLMs in certain Japanese biomedical tasks. Moreover, we offer insights that could further enhance development in this field. Our evaluation tools tailored to our benchmark as well as the datasets are publicly available to facilitate future research.</abstract>
       <url hash="637db8f8">2025.coling-main.395</url>
@@ -4761,7 +4761,7 @@
       <author><first>Youri</first><last>Peskine</last></author>
       <author><first>Paolo</first><last>Papotti</last></author>
       <author><first>Riccardo</first><last>Torlone</last></author>
-      <author><first>Raphael</first><last>Troncy</last></author>
+      <author id="raphael-troncy"><first>Raphael</first><last>Troncy</last></author>
       <pages>5936–5951</pages>
       <abstract>Tropes — recurring narrative elements like the “smoking gun” or the “veil of secrecy” — are often used in movies to convey familiar patterns. However, they also play a significant role in online communication about societal issues, where they can oversimplify complex matters and deteriorate public discourse. Recognizing these tropes can offer insights into the emotional manipulation and potential bias present in online discussions. This paper addresses the challenge of automatically detecting tropes in social media posts. We define the task, distinguish it from previous work, and create a ground-truth dataset of social media posts related to vaccines and immigration, manually labeled with tropes. Using this dataset, we develop a supervised machine learning technique for multi-label classification, fine-tune a model, and demonstrate its effectiveness experimentally. Our results show that tropes are common across domains and that fine-tuned models can detect them with high accuracy.</abstract>
       <url hash="3d2fcf1e">2025.coling-main.396</url>
@@ -4772,7 +4772,7 @@
       <author><first>Samee</first><last>Arif</last></author>
       <author><first>Aamina Jamal</first><last>Khan</last></author>
       <author><first>Mustafa</first><last>Abbas</last></author>
-      <author><first>Agha Ali</first><last>Raza</last></author>
+      <author id="agha-ali-raza"><first>Agha Ali</first><last>Raza</last></author>
       <author><first>Awais</first><last>Athar</last></author>
       <pages>5952–5961</pages>
       <abstract>This paper presents a comprehensive evaluation of Urdu Automatic Speech Recognition (ASR) models. We analyze the performance of three ASR model families: Whisper, MMS, and Seamless-M4T using Word Error Rate (WER), along with a detailed examination of the most frequent wrong words and error types including insertions, deletions, and substitutions. Our analysis is conducted using two types of datasets, read speech and conversational speech. Notably, we present the first conversational speech dataset designed for benchmarking Urdu ASR models. We find that seamless-large outperforms other ASR models on the read speech dataset, while whisper-large performs best on the conversational speech dataset. Furthermore, this evaluation highlights the complexities of assessing ASR models for low-resource languages like Urdu using quantitative metrics alone and emphasizes the need for a robust Urdu text normalization system. Our findings contribute valuable insights for developing robust ASR systems for low-resource languages like Urdu.</abstract>
@@ -4814,8 +4814,8 @@
     </paper>
     <paper id="401">
       <title><fixed-case>PIR</fixed-case>suader: A Persuasive Chatbot for Mitigating Psychological Insulin Resistance in Type-2 Diabetic Patients</title>
-      <author><first>Sujatha Das</first><last>Gollapalli</last></author>
-      <author><first>See-Kiong</first><last>Ng</last></author>
+      <author id="sujatha-das-gollapalli"><first>Sujatha Das</first><last>Gollapalli</last></author>
+      <author id="see-kiong-ng"><first>See-Kiong</first><last>Ng</last></author>
       <pages>5997–6013</pages>
       <abstract>Psychological Insulin Resistance (PIR) is described as the reluctance towards initiation and adherence of insulin-based treatments due to psychological barriers in diabetic patients. Though studies have shown that timely initiation with lifestyle changes are known to be crucial in sugar control and prevention of chronic conditions in Type 2 Diabetes (T2D) patients, many patients often have deep-rooted fears and misgivings related to insulin which hinder them from adapting to an insulin-based treatment regimen when recommended by healthcare specialists. Therefore, it is vitally important to address and allay these fallacious beliefs in T2D patients and persuade them to consider insulin as a treatment option. In this paper, we describe the design of PIRsuader, a persuasive chatbot for mitigating PIR in T2D patients. In PIRsuader, we effectively harness the conversation generation capabilities of state-of-the-art Large Language Models via a context-specific persuasive dialog act schema. We design reward functions that capture dialog act preferences for persuading reluctant patients and apply reinforcement learning to learn a dialog act prediction model. Our experiments using a collection of real doctor-diabetic patient conversations indicate that PIRsuader is able to improve the willingness in patients to try insulin as well as address specific concerns they have in an empathetic manner.</abstract>
       <url hash="88c5f6fc">2025.coling-main.401</url>
@@ -4838,8 +4838,8 @@
       <author><first>Zheng</first><last>Yao</last></author>
       <author><first>Tianyi</first><last>Li</last></author>
       <author><first>Liang</first><last>Cheng</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <pages>6024–6038</pages>
       <abstract>Previous benchmarks for evaluating large language models (LLMs) have primarily emphasized quantitative metrics, such as data volume. However, this focus may neglect key qualitative data attributes that can significantly impact the final rankings of LLMs, resulting in unreliable leaderboards. In this paper, we investigate whether current LLM benchmarks adequately consider these data attributes. We specifically examine three attributes: diversity, redundancy, and difficulty. To explore these attributes, we propose a framework with three separate modules, each designed to assess one of the attributes. Using a method that progressively incorporates these attributes, we analyze their influence on the benchmark. Our experimental results reveal a meaningful correlation between LLM rankings on the revised benchmark and the original benchmark when these attributes are accounted for. These findings indicate that existing benchmarks often fail to meet all three criteria, highlighting a lack of consideration for multifaceted data attributes in current evaluation datasets.</abstract>
       <url hash="1f269313">2025.coling-main.403</url>
@@ -4850,7 +4850,7 @@
       <author><first>Bastian</first><last>Bunzeck</last></author>
       <author><first>Daniel</first><last>Duran</last></author>
       <author><first>Leonie</first><last>Schade</last></author>
-      <author><first>Sina</first><last>Zarrieß</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
       <pages>6039–6048</pages>
       <abstract>Recent work investigates whether LMs learn human-like linguistic generalizations and representations from developmentally plausible amounts of data. Yet, the basic linguistic units processed in these LMs are determined by subword-based tokenization, which limits their validity as models of learning at and below the word level. In this paper, we explore the potential of tokenization-free, phoneme- and grapheme-based language models. We demonstrate that small models based on the Llama architecture can achieve strong linguistic performance on standard syntactic and novel lexical/phonetic benchmarks when trained with character-level vocabularies. We further show that phoneme-based models almost match grapheme-based models in standard tasks and novel evaluations. Our findings suggest a promising direction for creating more linguistically plausible language models that are better suited for computational studies of language acquisition and processing.</abstract>
       <url hash="34c9f5fe">2025.coling-main.404</url>
@@ -4868,7 +4868,7 @@
     <paper id="406">
       <title>Hi-<fixed-case>GEC</fixed-case>: <fixed-case>H</fixed-case>indi Grammar Error Correction in Low Resource Scenario</title>
       <author><first>Ujjwal</first><last>Sharma</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>6063–6075</pages>
       <abstract>Automated Grammatical Error Correction (GEC) has been extensively researched in Natural Language Processing (NLP), primarily focusing on English and other resource-rich languages. This paper shifts the focus to GEC for a scarcely explored low-resource language, specifically Hindi, which presents unique challenges due to its intricate morphology and complex syntax. To address data resource limitations, this work explores various GEC data generation techniques. Our research introduces a carefully extracted and filtered, high-quality dataset, HiWikiEdits, which includes human-edited 8,137 instances sourced from Wikipedia, encompassing 17 diverse grammatical error types, with annotations performed using the ERRANT toolkit. Furthermore, we investigate Round Trip Translation (RTT) using diverse languages for synthetic Hindi GEC data generation, revealing that leveraging high-resource linguistically distant language for error generation outperforms mid-resource linguistically closer languages. Specifically, using English as a pivot language resulted in a 6.25% improvement in GLEU score compared to using Assamese or Marathi. Finally, we also investigate the neural model-based synthetic error-generation technique and show that it achieves comparable performance to other synthetic data generation methods, even in low-resource settings.</abstract>
       <url hash="37504cbe">2025.coling-main.406</url>
@@ -4884,7 +4884,7 @@
       <author><first>Anderson</first><last>Silva Soares</last></author>
       <author><first>Rodrigo</first><last>Freitas Lima</last></author>
       <author><first>Lucas Rafael</first><last>Stefanel Gris</last></author>
-      <author><first>Sandra</first><last>Aluísio</last></author>
+      <author id="sandra-aluisio"><first>Sandra</first><last>Aluísio</last></author>
       <pages>6076–6087</pages>
       <abstract>Recently, several public datasets for automatic speech recognition (ASR) in Brazilian Portuguese (BP) have been released, improving ASR systems performance. However, these datasets lack diversity in terms of age groups, regional accents, and education levels. In this paper, we present a new publicly available dataset consisting of 289 life story interviews (365 hours), featuring a broad range of speakers varying in age, education, and regional accents. First, we demonstrated the presence of bias in current BP ASR models concerning education levels and age groups. Second, we showed that our dataset helps mitigate these biases. Additionally, an ASR model trained on our dataset performed better during evaluation on a diverse test set. Finally, the ASR model trained with our dataset was extrinsically evaluated through a topic modeling task that utilized the automatically transcribed output.</abstract>
       <url hash="6ada8b31">2025.coling-main.407</url>
@@ -4928,7 +4928,7 @@
       <author><first>Minjun</first><last>Kim</last></author>
       <author><first>Junghun</first><last>Yuk</last></author>
       <author><first>Haneol</first><last>Jang</last></author>
-      <author><first>KyungTae</first><last>Lim</last></author>
+      <author id="kyungtae-lim"><first>KyungTae</first><last>Lim</last></author>
       <pages>6150–6168</pages>
       <abstract>We propose the VLR-Bench, a visual question answering (VQA) benchmark for evaluating vision language models (VLMs) based on retrieval augmented generation (RAG). Unlike existing evaluation datasets for external knowledge-based VQA, the proposed VLR-Bench includes five input passages. This allows testing of the ability to determine which passage is useful for answering a given query, a capability lacking in previous research. In this context, we constructed a dataset of 32,000 automatically generated instruction-following examples, which we denote as VLR-IF. This dataset is specifically designed to enhance the RAG capabilities of VLMs by enabling them to learn how to generate appropriate answers based on input passages. We evaluated the validity of the proposed benchmark and training data and verified its performance using the state-of-the-art Llama3-based VLM, the Llava-Llama-3 model. The proposed VLR-Bench and VLR-IF datasets are publicly available online.</abstract>
       <url hash="067de447">2025.coling-main.411</url>
@@ -4954,7 +4954,7 @@
       <author><first>Guillermo</first><last>Marco</last></author>
       <author><first>Jorge</first><last>Carrillo-de-Albornoz</last></author>
       <author><first>Laura</first><last>Plaza</last></author>
-      <author><first>Enrique</first><last>Amigo</last></author>
+      <author id="enrique-amigo"><first>Enrique</first><last>Amigo</last></author>
       <author><first>Andrés Fernandez</first><last>García</last></author>
       <author><first>Alejandro</first><last>Benito-Santos</last></author>
       <author><first>Adrián</first><last>Ghajari Espinosa</last></author>
@@ -5120,7 +5120,7 @@
       <title>Evaluating Pixel Language Models on Non-Standardized Languages</title>
       <author><first>Alberto</first><last>Muñoz-Ortiz</last></author>
       <author><first>Verena</first><last>Blaschke</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>6412–6419</pages>
       <abstract>We explore the potential of pixel-based models for transfer learning from standard languages to dialects. These models convert text into images that are divided into patches, enabling a continuous vocabulary representation that proves especially useful for out-of-vocabulary words common in dialectal data. Using German as a case study, we compare the performance of pixel-based models to token-based models across various syntactic and semantic tasks. Our results show that pixel-based models outperform token-based models in part-of-speech tagging, dependency parsing and intent detection for zero-shot dialect evaluation by up to 26 percentage points in some scenarios, though not in Standard German. However, pixel-based models fall short in topic classification. These findings emphasize the potential of pixel-based models for handling dialectal data, though further research should be conducted to assess their effectiveness in various linguistic contexts.</abstract>
       <url hash="3c57a863">2025.coling-main.427</url>
@@ -5164,8 +5164,8 @@
     <paper id="431">
       <title>Part-Of-Speech Sensitivity of Routers in Mixture of Experts Models</title>
       <author><first>Elie</first><last>Antoine</last></author>
-      <author><first>Frederic</first><last>Bechet</last></author>
-      <author><first>Phillippe</first><last>Langlais</last></author>
+      <author id="frederic-bechet"><first>Frederic</first><last>Bechet</last></author>
+      <author id="philippe-langlais"><first>Phillippe</first><last>Langlais</last></author>
       <pages>6467–6474</pages>
       <abstract>This study investigates the behavior of model-integrated routers in Mixture of Experts (MoE) models, focusing on how tokens are routed based on their linguistic features, specifically Part-of-Speech (POS) tags. The goal is to explore across different MoE architectures whether experts specialize in processing tokens with similar linguistic traits. By analyzing token trajectories across experts and layers, we aim to uncover how MoE models handle linguistic information. Findings from six popular MoE models reveal expert specialization for specific POS categories, with routing paths showing high predictive accuracy for POS, highlighting the value of routing paths in characterizing tokens.</abstract>
       <url hash="a2e13b7a">2025.coling-main.431</url>
@@ -5338,8 +5338,8 @@
       <author><first>Zara</first><last>Siddique</last></author>
       <author><first>Hsuvas</first><last>Borkakoty</last></author>
       <author><first>Dimosthenis</first><last>Antypas</last></author>
-      <author><first>Luis</first><last>Espinosa Anke</last></author>
-      <author><first>Jose</first><last>Camacho-Collados</last></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa Anke</last></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></author>
       <pages>6692–6704</pages>
       <abstract>Extracting metaphors and analogies from free text requires high-level reasoning abilities such as abstraction and language understanding. Our study focuses on the extraction of the concepts forming metaphoric analogies in literary texts. To this end, we construct a novel dataset in this domain with the help of domain experts. We compare the out-of-the-box ability of recent large language models (LLMs) to structure metaphoric mappings from fragments of texts containing rather explicit proportional analogies. The models are further evaluated on the generation of implicit elements of the analogy, which are indirectly suggested in the texts and inferred by human readers. The competitive results obtained by LLMs in our experiments are encouraging and open up new avenues such as automatically extracting analogies and metaphors from text instead of investing resources in domain experts to manually label data.</abstract>
       <url hash="6549c928">2025.coling-main.448</url>
@@ -5377,7 +5377,7 @@
       <author><first>Meishan</first><last>Zhang</last></author>
       <author><first>Lili</first><last>Shan</last></author>
       <author><first>Yulin</first><last>Wu</last></author>
-      <author><first>Bingquan</first><last>Liu</last></author>
+      <author id="bingquan-liu"><first>Bingquan</first><last>Liu</last></author>
       <pages>6748–6761</pages>
       <abstract>Emotion recognition in conversation (ERC), the task of discerning human emotions for each utterance within a conversation, has garnered significant attention in human-computer interaction systems. Previous ERC studies focus on speaker-specific information that predominantly stems from relationships among utterances, which lacks sufficient information around conversations. Recent research in ERC has sought to exploit pre-trained large language models (LLMs) with speaker modelling to comprehend emotional states. Although these methods have achieved the encouraging results, the extracted speaker-specific information struggles to indicate emotional dynamics. In this paper, motivated by the fact that speaker characteristics play a crucial role and LLMs have rich world knowledge, we present LaERC-S, a novel framework that stimulates LLMs to explore speaker characteristics involving the mental state and behavior of interlocutors, for accurate emotion predictions. To endow LLMs with these knowledge information, we adopt the two-stage learning to make the models reason speaker characteristics and track the emotion of the speaker in complex conversation scenarios. Extensive experiments on three benchmark datasets demonstrate the superiority of LaERC-S, reaching the new state-of-the-art.</abstract>
       <url hash="4e6e13c7">2025.coling-main.451</url>
@@ -5386,7 +5386,7 @@
     <paper id="452">
       <title>Analysing Zero-Shot Readability-Controlled Sentence Simplification</title>
       <author><first>Abdullah</first><last>Barayan</last></author>
-      <author><first>Jose</first><last>Camacho-Collados</last></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></author>
       <author><first>Fernando</first><last>Alva-Manchego</last></author>
       <pages>6762–6781</pages>
       <abstract>Readability-controlled text simplification (RCTS) rewrites texts to lower readability levels while preserving their meaning. RCTS models often depend on parallel corpora with readability annotations on both source and target sides. Such datasets are scarce and difficult to curate, especially at the sentence level. To reduce reliance on parallel data, we explore using instruction-tuned large language models for zero-shot RCTS. Through automatic and manual evaluations, we examine: (1) how different types of contextual information affect a model’s ability to generate sentences with the desired readability, and (2) the trade-off between achieving target readability and preserving meaning. Results show that all tested models struggle to simplify sentences (especially to the lowest levels) due to models’ limitations and characteristics of the source sentences that impede adequate rewriting. Our experiments also highlight the need for better automatic evaluation metrics tailored to RCTS, as standard ones often misinterpret common simplification operations, and inaccurately assess readability and meaning preservation.</abstract>
@@ -5420,8 +5420,8 @@
       <author><first>Beomseok</first><last>Lee</last></author>
       <author><first>Marco</first><last>Gaido</last></author>
       <author><first>Ioan</first><last>Calapodescu</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <pages>6816–6826</pages>
       <abstract>While crowdsourcing is an established solution for facilitating and scaling the collection of speech data, the involvement of non-experts necessitates protocols to ensure final data quality. To reduce the costs of these essential controls, this paper investigates the use of Speech Foundation Models (SFMs) to automate the validation process, examining for the first time the cost/quality trade-off in data acquisition. Experiments conducted on French, German, and Korean data demonstrate that SFM-based validation has the potential to reduce reliance on human validation, resulting in an estimated cost saving of over 40.0% without degrading final data quality. These findings open new opportunities for more efficient, cost-effective, and scalable speech data acquisition.</abstract>
       <url hash="f36e1277">2025.coling-main.455</url>
@@ -5551,7 +5551,7 @@
     </paper>
     <paper id="466">
       <title>Evaluating <fixed-case>LLM</fixed-case>s’ Capability to Identify Lexical Semantic Equivalence: Probing with the Word-in-Context Task</title>
-      <author><first>Yoshihiko</first><last>Hayashi</last></author>
+      <author id="yoshihiko-hayashi"><first>Yoshihiko</first><last>Hayashi</last></author>
       <pages>6985–6998</pages>
       <abstract>This study proposes a method to evaluate the capability of large language models (LLMs) in identifying lexical semantic equivalence. The Word-in-Context (WiC) task, a benchmark designed to determine whether the meanings of a target word remain identical across different contexts, is employed as a probing task. Experiments are conducted with several LLMs, including proprietary GPT models and open-source models, using zero-shot prompting with adjectives that represent varying levels of semantic equivalence (e.g., “the same”) or inequivalence (e.g., “different”). The fundamental capability to identify lexical semantic equivalence in context is measured using standard accuracy metrics. Consistency across different levels of semantic equivalence is assessed via rank correlation with the expected canonical ranking of precision and recall, reflecting anticipated trends in performance across prompts. The proposed method demonstrates its effectiveness, highlighting the superior capability of GPT-4o, as it consistently outperforms other explored LLMs. Analysis of the WiC dataset, the discriminative properties of adjectives (i.e., their ability to differentiate between levels of semantic equivalence), and linguistic patterns in erroneous cases offer insights into the LLM’s capability and sensitivity. These findings could inform improvements in WiC task performance, although performance enhancement is not the primary focus of this study.</abstract>
       <url hash="ee8433c9">2025.coling-main.466</url>
@@ -5582,7 +5582,7 @@
       <author><first>Connor</first><last>Heaton</last></author>
       <author><first>Shreya</first><last>Ghosh</last></author>
       <author><first>Wenpeng</first><last>Yin</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Suhang</first><last>Wang</last></author>
       <pages>7035–7054</pages>
       <abstract>We study extractive question-answering in the medical domain (Medical-EQA). This problem has two main challenges: (i) domain specificity, as most AI models lack necessary domain knowledge, and (ii) extraction-based answering style, which restricts most autoregressive LLMs due to potential hallucinations. To handle those challenges, we propose TOP-Training, a target-oriented pre-training paradigm that stands out among all domain adaptation techniques with two desirable features: (i) TOP-Training moves one step further than popular domain-oriented fine-tuning since it not only moves closer to the target domain, but also familiarizes itself with the target dataset, and (ii) it does not assume the existence of a large set of unlabeled instances from the target domain. Specifically, for a target Medical-EQA dataset, we extract its entities and leverage large language models (LLMs) to generate synthetic texts containing those entities; we then demonstrate that pretraining on this synthetic text data yields better performance on the target Medical-EQA benchmarks. Overall, our contributions are threefold: (i) TOP-Training, a new pretraining technique to effectively adapt LLMs to better solve a target problem, (ii) TOP-Training has a wide application scope because it does not require the target problem to have a large set of unlabeled data, and (iii) our experiments highlight the limitations of autoregressive LLMs, emphasizing TOP-Training as a means to unlock the true potential of bidirectional LLMs.</abstract>
@@ -5593,7 +5593,7 @@
       <title>Beyond Discrete Personas: Personality Modeling Through Journal Intensive Conversations</title>
       <author><first>Sayantan</first><last>Pal</last></author>
       <author><first>Souvik</first><last>Das</last></author>
-      <author><first>Rohini K.</first><last>Srihari</last></author>
+      <author id="rohini-k-srihari"><first>Rohini K.</first><last>Srihari</last></author>
       <pages>7055–7074</pages>
       <abstract>Large Language Models (LLMs) have significantly improved personalized conversational capabilities. However, existing datasets like Persona Chat, Synthetic Persona Chat, and Blended Skill Talk rely on static, predefined personas. This approach often results in dialogues that fail to capture human personalities’ fluid and evolving nature. To overcome these limitations, we introduce a novel dataset with around 400,000 dialogues and a framework for generating personalized conversations using long-form journal entries from Reddit. Our approach clusters journal entries for each author and filters them by selecting the most representative cluster, ensuring that the retained entries best reflect the author’s personality. We further refine the data by capturing the Big Five personality traits—openness, conscientiousness, extraversion, agreeableness, and neuroticism—ensuring that dialogues authentically reflect an individual’s personality. Using Llama 3 70B, we generate high-quality, personality-rich dialogues grounded in these journal entries. Fine-tuning models on this dataset leads to an 11% improvement in capturing personality traits on average, outperforming existing approaches in generating more coherent and personality-driven dialogues.</abstract>
       <url hash="e3de595d">2025.coling-main.470</url>
@@ -5616,7 +5616,7 @@
       <author><first>Enora</first><last>Rice</last></author>
       <author><first>Luke</first><last>Gessler</last></author>
       <author><first>Alexis</first><last>Palmer</last></author>
-      <author><first>Katharina</first><last>von der Wense</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>von der Wense</last></author>
       <pages>7087–7098</pages>
       <abstract>Many of the world’s languages have insufficient data to train high-performing general neural machine translation (NMT) models, let alone domain-specific models, and often the only available parallel data are small amounts of religious texts. Hence, domain adaptation (DA) is a crucial issue faced by contemporary NMT and has, so far, been underexplored for low-resource languages. In this paper, we evaluate a set of methods from both low-resource NMT and DA in a realistic setting, in which we aim to translate between a high-resource and a low-resource language with access to only: a) parallel Bible data, b) a bilingual dictionary, and c) a monolingual target-domain corpus in the high-resource language. Our results show that the effectiveness of the tested methods varies, with the simplest one, DALI, being most effective. We follow up with a small human evaluation of DALI, which shows that there is still a need for more careful investigation of how to accomplish DA for low-resource NMT.</abstract>
       <url hash="445fd65f">2025.coling-main.472</url>
@@ -5635,7 +5635,7 @@
       <title>Exploring Language Model Generalization in Low-Resource Extractive <fixed-case>QA</fixed-case></title>
       <author><first>Saptarshi</first><last>Sengupta</last></author>
       <author><first>Wenpeng</first><last>Yin</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Shreya</first><last>Ghosh</last></author>
       <author><first>Suhang</first><last>Wang</last></author>
       <pages>7106–7126</pages>
@@ -5853,7 +5853,7 @@
     </paper>
     <paper id="493">
       <title>Courtroom-<fixed-case>LLM</fixed-case>: A Legal-Inspired Multi-<fixed-case>LLM</fixed-case> Framework for Resolving Ambiguous Text Classifications</title>
-      <author><first>Sangkeun</first><last>Jung</last></author>
+      <author id="sangkeun-jung"><first>Sangkeun</first><last>Jung</last></author>
       <author><first>Jeesu</first><last>Jung</last></author>
       <pages>7367–7385</pages>
       <abstract>In this research, we introduce the Courtroom-LLM framework, a novel multi-LLM structure inspired by legal courtroom processes, aiming to enhance decision-making in ambiguous text classification scenarios. Our approach simulates a courtroom setting within LLMs, assigning roles similar to those of prosecutors, defense attorneys, and judges, to facilitate comprehensive analysis of complex textual cases. We demonstrate that this structured multi-LLM setup can significantly improve decision-making accuracy, particularly in ambiguous situations, by harnessing the synergistic effects of diverse LLM arguments. Our evaluations across various text classification tasks show that the Courtroom-LLM framework outperforms both traditional single-LLM classifiers and simpler multi-LLM setups. These results highlight the advantages of our legal-inspired model in improving decision-making for text classification.</abstract>
@@ -5938,7 +5938,7 @@
       <author><first>Xiaolong</first><last>Jin</last></author>
       <author><first>Long</first><last>Bai</last></author>
       <author><first>Jiafeng</first><last>Guo</last></author>
-      <author><first>Xueqi</first><last>Cheng</last></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last></author>
       <pages>7484–7496</pages>
       <abstract>Event Relation Extraction (ERE) aims to extract various types of relations between different events within texts. Although Large Language Models (LLMs) have demonstrated impressive capabilities in many natural language processing tasks, existing ERE methods based on LLMs still face three key challenges: (1) <b>Time Inefficiency</b>: The existing pairwise method of combining events and determining their relations is time-consuming for LLMs. (2) <b>Low Coverage</b>: When dealing with numerous events in a document, the limited generation length of fine-tuned LLMs restricts the coverage of their extraction results. (3) <b>Lack of Rationale</b>: Essential rationales concerning the results that could enhance the reasoning ability of the model are overlooked. To address these challenges, we propose LLMERE, an LLM-based approach with rationales for the ERE task. LLMERE transforms ERE into a question-and-answer task that may have multiple answers. By extracting all events related to a specified event at once, LLMERE reduces time complexity from <tex-math>O(n^2)</tex-math> to <tex-math>O(n)</tex-math>, compared to the pairwise method. Subsequently, LLMERE enhances the coverage of extraction results by employing a partitioning strategy that highlights only a portion of the events in the document at a time. In addition to the extracted results, LLMERE is also required to generate corresponding rationales/reasons behind them, in terms of event coreference information or transitive chains of event relations. Experimental results on three widely used datasets show that LLMERE achieves significant improvements over baseline methods.</abstract>
       <url hash="c9090beb">2025.coling-main.500</url>
@@ -6028,7 +6028,7 @@
       <title><fixed-case>DEGAP</fixed-case>: Dual Event-Guided Adaptive Prefixes for Templated-Based Event Argument Extraction with Slot Querying</title>
       <author><first>Guanghui</first><last>Wang</last></author>
       <author><first>Dexi</first><last>Liu</last></author>
-      <author><first>Jian-Yun</first><last>Nie</last></author>
+      <author id="jian-yun-nie"><first>Jian-Yun</first><last>Nie</last></author>
       <author><first>Qizhi</first><last>Wan</last></author>
       <author><first>Rong</first><last>Hu</last></author>
       <author><first>Xiping</first><last>Liu</last></author>
@@ -6055,7 +6055,7 @@
     <paper id="509">
       <title>Leveraging Large Pre-trained Multilingual Models for High-Quality Speech-to-Text Translation on Industry Scenarios</title>
       <author><first>Marko</first><last>Avila</last></author>
-      <author><first>Josep</first><last>Crego</last></author>
+      <author id="josep-m-crego"><first>Josep</first><last>Crego</last></author>
       <pages>7624–7633</pages>
       <abstract>Speech-to-Text Translation (S2TT) involves converting spoken language from a source language directly into text in a target language. Traditionally, S2TT systems rely on a sequential pipeline that combines Automatic Speech Recognition (ASR) and Machine Translation (MT) models. However, these systems are prone to error propagation and demand substantial resources to develop and train each component independently. Thus, posing a major challenge in industry settings where cost-effective yet highly accurate S2TT solutions are essential. With the increasing availability of multilingual large pre-trained speech models (LPSM), we propose a parameter-efficient framework that integrates one LPSM with a multilingual MT engine. We evaluate the effectiveness of several well-established LPSMs within this framework, focusing on a real-world industry scenario that involves building a system capable of translating between French, English, and Arabic. The results show that high-quality S2TT systems can be built with minimal computational resources, offering an efficient solution for cross-lingual communication.</abstract>
       <url hash="d60d8159">2025.coling-main.509</url>
@@ -6078,7 +6078,7 @@
       <author><first>Yi R.</first><last>Fung</last></author>
       <author><first>Cheng</first><last>Qian</last></author>
       <author><first>Jeonghwan</first><last>Kim</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></author>
       <author><first>Heng</first><last>Ji</last></author>
       <pages>7648–7662</pages>
       <abstract>As large language models (LLMs) demonstrate increasingly advanced capabilities, aligning their behaviors with human values and preferences becomes crucial for their wide adoption. While previous research focuses on general alignment to principles such as helpfulness, harmlessness, and honesty, the need to account for individual and diverse preferences has been largely overlooked, potentially undermining customized human experiences. To address this gap, we train LLMs that can “interact to align”, essentially cultivating the meta-skill of LLMs to implicitly infer the unspoken personalized preferences of the current user through multi-turn conversations, and then dynamically align their following behaviors and responses to these inferred preferences. Our approach involves establishing a diverse pool of 3,310 distinct user personas by initially creating seed examples, which are then expanded through iterative self-generation and filtering. Guided by distinct user personas, we leverage multi-LLM collaboration to develop a multi-turn preference dataset containing 3K+ multi-turn conversations in tree structures. Finally, we apply supervised fine-tuning and reinforcement learning to enhance LLMs using this dataset. For evaluation, we establish the ALOE (ALign with custOmized prEferences) benchmark, consisting of 100 carefully selected examples and well-designed metrics to measure the customized alignment performance during conversations. Experimental results demonstrate the effectiveness of our method in enabling dynamic, personalized alignment via interaction. The code and dataset will be made public.</abstract>
@@ -6105,7 +6105,7 @@
       <author><first>Brent</first><last>Milne</last></author>
       <author><first>Tom</first><last>Fischaber</last></author>
       <author><first>Tamara</first><last>Sumner</last></author>
-      <author><first>James H.</first><last>Martin</last></author>
+      <author id="james-h-martin"><first>James H.</first><last>Martin</last></author>
       <pages>7671–7684</pages>
       <abstract>Human tutoring interventions play a crucial role in supporting student learning, improving academic performance, and promoting personal growth. This paper focuses on analyzing mathematics tutoring discourse using talk moves—a framework of dialogue acts grounded in Accountable Talk theory. However, scaling the collection, annotation, and analysis of extensive tutoring dialogues to develop machine learning models is a challenging and resource-intensive task. To address this, we present SAGA22, a compact dataset, and explore various modeling strategies, including dialogue context, speaker information, pretraining datasets, and further fine-tuning. By leveraging existing datasets and models designed for classroom teaching, our results demonstrate that supplementary pretraining on classroom data enhances model performance in tutoring settings, particularly when incorporating longer context and speaker information. Additionally, we conduct extensive ablation studies to underscore the challenges in talk move modeling.</abstract>
       <url hash="af19b49f">2025.coling-main.513</url>
@@ -6114,7 +6114,7 @@
     <paper id="514">
       <title>How to Leverage Digit Embeddings to Represent Numbers?</title>
       <author><first>Jasivan Alex</first><last>Sivakumar</last></author>
-      <author><first>Nafise Sadat</first><last>Moosavi</last></author>
+      <author id="nafise-sadat-moosavi"><first>Nafise Sadat</first><last>Moosavi</last></author>
       <pages>7685–7697</pages>
       <abstract>Within numerical reasoning, understanding numbers themselves is still a challenge for existing language models. Simple generalisations, such as solving 100+200 instead of 1+2, can substantially affect model performance (Sivakumar and Moosavi, 2023). Among various techniques, character-level embeddings of numbers have emerged as a promising approach to improve number representation. However, this method has limitations as it leaves the task of aggregating digit representations to the model, which lacks direct supervision for this process. In this paper, we explore the use of mathematical priors to compute aggregated digit embeddings and explicitly incorporate these aggregates into transformer models. This can be achieved either by adding a special token to the input embeddings or by introducing an additional loss function to enhance correct predictions. We evaluate the effectiveness of incorporating this explicit aggregation, analysing its strengths and shortcomings, and discuss future directions to better benefit from this approach. Our methods, while simple, are compatible with any pretrained model, easy to implement, and have been made publicly available.</abstract>
       <url hash="bb2fef65">2025.coling-main.514</url>
@@ -6180,8 +6180,8 @@
     <paper id="520">
       <title>Cross-lingual Evaluation of Multilingual Text Generation</title>
       <author><first>Shamil</first><last>Chollampatt</last></author>
-      <author><first>Minh Quang</first><last>Pham</last></author>
-      <author><first>Sathish Reddy</first><last>Indurthi</last></author>
+      <author id="minh-quang-pham"><first>Minh Quang</first><last>Pham</last></author>
+      <author id="sathish-reddy-indurthi"><first>Sathish Reddy</first><last>Indurthi</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <pages>7766–7777</pages>
       <abstract>Scaling automatic evaluation of multilingual text generation of LLMs to new tasks, domains, and languages remains a challenge. Traditional evaluation on benchmark datasets carries the risk of reference data leakage in LLM training or involves additional human annotation effort. The alternative strategy of using another LLM as a scorer also faces uncertainty about the ability of this LLM itself to score non-English text. To address these issues, we propose an annotation-free cross-lingual evaluation protocol for multilingual text generation. Given an LLM candidate to be evaluated and a set of non-English inputs for a particular text generation task, our method first generates English references from the translation of the non-English inputs into English. This is done by an LLM that excels in the equivalent English text generation task. The non-English text generated by the LLM candidate is compared against the generated English references using a cross-lingual evaluation metric to assess the ability of the candidate LLM on multilingual text generation. Our protocol shows a high correlation to the reference-based ROUGE metric in four languages on news text summarization. We also evaluate a diverse set of LLMs in over 90 languages with different prompting strategies to study their multilingual generative abilities.</abstract>
@@ -6380,7 +6380,7 @@
       <title>Factual Knowledge Assessment of Language Models Using Distractors</title>
       <author><first>Hichem</first><last>Ammar Khodja</last></author>
       <author><first>Abderrahmane</first><last>Ait gueni ssaid</last></author>
-      <author><first>Frederic</first><last>Bechet</last></author>
+      <author id="frederic-bechet"><first>Frederic</first><last>Bechet</last></author>
       <author><first>Quentin</first><last>Brabant</last></author>
       <author><first>Alexis</first><last>Nasr</last></author>
       <author><first>Gwénolé</first><last>Lecorvé</last></author>
@@ -6405,7 +6405,7 @@
     <paper id="539">
       <title>Summarization of Opinionated Political Documents with Varied Perspectives</title>
       <author><first>Nicholas</first><last>Deas</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>8088–8108</pages>
       <abstract>Global partisan hostility and polarization has increased, and this polarization is heightened around presidential elections. Models capable of generating accurate summaries of diverse perspectives can help reduce such polarization by exposing users to alternative perspectives. In this work, we introduce a novel dataset and task for independently summarizing each political perspective in a set of passages from opinionated news articles. For this task, we propose a framework for evaluating different dimensions of perspective summary performance. We benchmark 11 summarization models and LLMs of varying sizes and architectures through both automatic and human evaluation. While recent models like GPT-4o perform well on this task, we find that all models struggle to generate summaries that are faithful to the intended perspective. Our analysis of summaries focuses on how extraction behavior is impacted by features of the input documents.</abstract>
       <url hash="a4119741">2025.coling-main.539</url>
@@ -6418,7 +6418,7 @@
       <author><first>Ali</first><last>Marashian</last></author>
       <author><first>Jennifer M.</first><last>Ellis</last></author>
       <author><first>Eliana</first><last>Colunga</last></author>
-      <author><first>Katharina</first><last>von der Wense</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>von der Wense</last></author>
       <pages>8109–8120</pages>
       <abstract>To address an important gap in creating children’s stories for vocabulary enrichment, we investigate the automatic evaluation of how well stories convey the semantics of target vocabulary words, a task with substantial implications for generating educational content. We motivate this task, which we call measuring contextual informativeness in children’s stories, and provide a formal task definition as well as a dataset for the task. We further propose a method for automating the task using a large language model (LLM). Our experiments show that our approach reaches a Spearman correlation of 0.4983 with human judgments of informativeness, while the strongest baseline only obtains a correlation of 0.3534. An additional analysis shows that the LLM-based approach is able to generalize to measuring contextual informativeness in adult-directed text, on which it also outperforms all baselines.</abstract>
       <url hash="d9983b28">2025.coling-main.540</url>
@@ -6442,7 +6442,7 @@
       <author><first>Narumi</first><last>Tokunaga</last></author>
       <author><first>Yuki</first><last>Yamagata</last></author>
       <author><first>Kouji</first><last>Kozaki</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>8148–8159</pages>
       <abstract>Automatic biomedical annotation is essential for advancing medical research, diagnosis, and treatment. However, it presents significant challenges, especially when entities are not explicitly mentioned in the text, leading to difficulties in extraction of relevant information. These challenges are intensified by unclear terminology, implicit background knowledge, and the lack of labeled training data. Annotating with a specific ontology adds another layer of complexity, as it requires aligning text with a predefined set of concepts and relationships. Manual annotation is time-consuming and expensive, highlighting the need for automated systems to handle large volumes of biomedical data efficiently. In this paper, we propose an entailment-based zero-shot text classification approach to annotate biomedical text passages using the Homeostasis Imbalance Process (HOIP) ontology. Our method reformulates the annotation task as a multi-class, multi-label classification problem and uses natural language inference to classify text into related HOIP processes. Experimental results show promising performance, especially when processes are not explicitly mentioned, highlighting the effectiveness of our approach for ontological annotation of biomedical literature.</abstract>
       <url hash="743cddd9">2025.coling-main.542</url>
@@ -6489,7 +6489,7 @@
       <author><first>Yifan</first><last>Du</last></author>
       <author><first>Hangyu</first><last>Guo</last></author>
       <author><first>Kun</first><last>Zhou</last></author>
-      <author><first>Wayne Xin</first><last>Zhao</last></author>
+      <author id="wayne-xin-zhao"><first>Wayne Xin</first><last>Zhao</last></author>
       <author><first>Jinpeng</first><last>Wang</last></author>
       <author><first>Chuyuan</first><last>Wang</last></author>
       <author><first>Mingchen</first><last>Cai</last></author>
@@ -6505,7 +6505,7 @@
       <author><first>Boyu</first><last>Guan</last></author>
       <author><first>Yining</first><last>Zhang</last></author>
       <author><first>Yang</first><last>Zhao</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>8215–8231</pages>
       <abstract>Current video-guided machine translation (VMT) approaches primarily use coarse-grained visual information, resulting in information redundancy, high computational overhead, and neglect of audio content. Our research demonstrates the significance of fine-grained visual and audio information in VMT from both data and methodological perspectives. From the data perspective, we have developed a large-scale dataset TriFine, the first vision-audio-subtitle tri-modal VMT dataset with annotated multimodal fine-grained tags. Each entry in this dataset not only includes the triples found in traditional VMT datasets but also encompasses seven fine-grained annotation tags derived from visual and audio modalities. From the methodological perspective, we propose a Fine-grained Information-enhanced Approach for Translation (FIAT). Experimental results have shown that, in comparison to traditional coarse-grained methods and text-only models, our fine-grained approach achieves superior performance with lower computational overhead. These findings underscore the pivotal role of fine-grained annotated information in advancing the field of VMT.</abstract>
       <url hash="02ef809f">2025.coling-main.547</url>
@@ -6524,7 +6524,7 @@
     <paper id="549">
       <title><fixed-case>GEAR</fixed-case>: A Simple <fixed-case>GENERATE</fixed-case>, <fixed-case>EMBED</fixed-case>, <fixed-case>AVERAGE</fixed-case> <fixed-case>AND</fixed-case> <fixed-case>RANK</fixed-case> Approach for Unsupervised Reverse Dictionary</title>
       <author><first>Fatemah Yousef</first><last>Almeman</last></author>
-      <author><first>Luis</first><last>Espinosa Anke</last></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa Anke</last></author>
       <pages>8242–8254</pages>
       <abstract>Reverse Dictionary (RD) is the task of obtaining the most relevant word or set of words given a textual description or dictionary definition. Effective RD methods have applications in accessibility, translation or writing support systems. Moreover, in NLP research we find RD to be used to benchmark text encoders at various granularities, as it often requires word, definition and sentence embeddings. In this paper, we propose a simple approach to RD that leverages LLMs in combination with embedding models. Despite its simplicity, this approach outperforms supervised baselines in well studied RD datasets, while also showing less overfitting. We also conduct a number of experiments on different dictionaries and analyze how different styles, registers and target audiences impact the quality of RD systems. We conclude that, on average, untuned embeddings alone fare way below an LLM-only baseline (although they are competitive in highly technical dictionaries), but are crucial for boosting performance in combined methods.</abstract>
       <url hash="b59ae05c">2025.coling-main.549</url>
@@ -6551,7 +6551,7 @@
       <author><first>Xiyang</first><last>Huang</last></author>
       <author><first>Chenkang</first><last>Zhu</last></author>
       <author><first>Min</first><last>Peng</last></author>
-      <author><first>Hongying</first><last>Zan</last></author>
+      <author id="hongying-zan"><first>Hongying</first><last>Zan</last></author>
       <author><first>Yu</first><last>Song</last></author>
       <pages>8272–8284</pages>
       <abstract>Document-level Relation Extraction (DocRE) aims to extract relations from documents. Compared with sentence-level relation extraction, it is necessary to extract long-distance dependencies. Existing methods enhance the output of trained DocRE models either by learning logical rules or by extracting rules from annotated data and then injecting them into the model. However, these approaches can result in suboptimal performance due to incorrect rule set constraints. To mitigate this issue, we propose Context-aware differentiable rule learning or CaDRL for short, a novel differentiable rule-based framework that learns the doc-specific logical rule to avoid generating suboptimal constraints. Specifically, we utilize Transformer-based relation attention to encode document and relation information, thereby learning the contextual information of the relation. We employ a sequence-generated differentiable rule decoder to generate relational probabilistic logic rules at each reasoning step. We also introduce a parameter sharing training mechanism in CaDRL to reconcile the DocRE model and the rule learning module. Extensive experimental results on three DocRE datasets demonstrate that CaDRL outperforms existing rule-based frameworks, significantly improving DocRE performance and making predictions more interpretable and logical.</abstract>
@@ -6671,7 +6671,7 @@
     <paper id="561">
       <title>A Flash in the Pan: Better Prompting Strategies to Deploy Out-of-the-Box <fixed-case>LLM</fixed-case>s as Conversational Recommendation Systems</title>
       <author><first>Gustavo Adolpho</first><last>Lucas de Carvalho</last></author>
-      <author><first>Simon</first><last>Benigeri</last></author>
+      <author id="simon-benigeri"><first>Simon</first><last>Benigeri</last></author>
       <author><first>Jennifer</first><last>Healey</last></author>
       <author><first>Victor</first><last>Bursztyn</last></author>
       <author><first>David</first><last>Demeter</last></author>
@@ -6851,10 +6851,10 @@
       <title>A Chain-of-Task Framework for Instruction Tuning of <fixed-case>LLM</fixed-case>s Based on <fixed-case>C</fixed-case>hinese Grammatical Error Correction</title>
       <author><first>Xinpeng</first><last>Liu</last></author>
       <author><first>Bing</first><last>Xu</last></author>
-      <author><first>Muyun</first><last>Yang</last></author>
+      <author id="muyun-yang"><first>Muyun</first><last>Yang</last></author>
       <author><first>Hailong</first><last>Cao</last></author>
       <author><first>Conghui</first><last>Zhu</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <author><first>Wenpeng</first><last>Lu</last></author>
       <pages>8623–8639</pages>
       <abstract>Over-correction is a critical issue for large language models (LLMs) to address Grammatical Error Correction (GEC) task, esp. for Chinese. This paper proposes a Chain-of-Task (CoTask) framework to reduce over-correction. The CoTask framework is applied as multi-task instruction tuning of LLMs by decomposing the process of grammatical error analysis to design auxiliary tasks and adjusting the types and combinations of training tasks. A supervised fine-tuning (SFT) strategy is also presented to enhance the performance of LLMs, together with an algorithm for automatic dataset annotation to avoid additional manual costs. Experimental results demonstrate that our method achieves new state-of-the-art results on both FCGEC (in-domain) and NaCGEC (out-of-domain) test sets.</abstract>
@@ -6867,7 +6867,7 @@
       <author><first>Virginie</first><last>Mouilleron</last></author>
       <author><first>Menel</first><last>Mahamdi</last></author>
       <author><first>Wissam</first><last>Antoun</last></author>
-      <author><first>Djamé</first><last>Seddah</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
       <pages>8640–8663</pages>
       <abstract>The proliferation of radical content on online platforms poses significant risks, including inciting violence and spreading extremist ideologies. Despite ongoing research, existing datasets and models often fail to address the complexities of multilingual and diverse data. To bridge this gap, we introduce a publicly available multilingual dataset annotated with radicalization levels, calls for action, and named entities in English, French, and Arabic. This dataset is pseudonymized to protect individual privacy while preserving contextual information. Beyond presenting our freely available dataset, we analyze the annotation process, highlighting biases and disagreements among annotators and their implications for model performance. Additionally, we use synthetic data to investigate the influence of socio-demographic traits on annotation patterns and model predictions. Our work offers a comprehensive examination of the challenges and opportunities in building robust datasets for radical content detection, emphasizing the importance of fairness and transparency in model development. The Counter dataset is available at https://gitlab.inria.fr/ariabi/counter-dataset-public.</abstract>
       <url hash="5474b995">2025.coling-main.578</url>
@@ -7038,7 +7038,7 @@
       <author><first>Qibin</first><last>Wang</last></author>
       <author><first>Xiaolin</first><last>Hu</last></author>
       <author><first>Weikai</first><last>Xu</last></author>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last></author>
+      <author><first>Wei</first><last>Liu</last></author>
       <author><first>Jian</first><last>Luan</last></author>
       <author><first>Bin</first><last>Wang</last></author>
       <pages>8841–8857</pages>
@@ -7058,9 +7058,9 @@
       <title>Extending <fixed-case>LLM</fixed-case>s to New Languages: A Case Study of Llama and <fixed-case>P</fixed-case>ersian Adaptation</title>
       <author><first>Samin</first><last>Mahdizadeh Sani</last></author>
       <author><first>Pouya</first><last>Sadeghi</last></author>
-      <author><first>Thuy-Trang</first><last>Vu</last></author>
+      <author id="thuy-vu"><first>Thuy-Trang</first><last>Vu</last></author>
       <author><first>Yadollah</first><last>Yaghoobzadeh</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>8868–8884</pages>
       <abstract>Large language models (LLMs) have made great progress in classification and text generation tasks. However, they are mainly trained on English data and often struggle with low-resource languages. In this study, we explore adding a new language, i.e., Persian, to Llama (a model with a limited understanding of Persian) using parameter-efficient fine-tuning. We employ a multi-stage approach involving pretraining on monolingual Persian data, aligning representations through bilingual pretraining and instruction datasets, and instruction-tuning with task-specific datasets. We evaluate the model’s performance at each stage on generation and classification tasks. Our findings suggest that incorporating the Persian language, through bilingual data alignment, can enhance classification accuracy for Persian tasks, with no adverse impact and sometimes even improvements on English tasks. Additionally, the results highlight the model’s initial strength as a critical factor when working with limited training data, with cross-lingual alignment offering minimal benefits for the low-resource language. Knowledge transfer from English to Persian has a marginal effect, primarily benefiting simple classification tasks.</abstract>
       <url hash="087605a0">2025.coling-main.594</url>
@@ -7072,7 +7072,7 @@
       <author><first>Saiping</first><last>Guan</last></author>
       <author><first>Xiaolong</first><last>Jin</last></author>
       <author><first>Jiafeng</first><last>Guo</last></author>
-      <author><first>Xueqi</first><last>Cheng</last></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last></author>
       <pages>8885–8896</pages>
       <abstract>N-ary Knowledge Graphs (NKGs), where a fact can involve more than two entities, have gained increasing attention. Link Prediction in NKGs (LPN) aims to predict missing elements in facts to facilitate the completion of NKGs. Current LPN methods implicitly operate under a closed-world assumption, meaning that the sets of entities and roles are fixed. These methods focus on predicting missing elements within facts composed of entities and roles seen during training. However, in reality, new facts involving unseen entities and roles frequently emerge, requiring completing these facts. Thus, this paper proposes a new task, Inductive Link Prediction in NKGs (ILPN), which aims to predict missing elements in facts involving unseen entities and roles in emerging NKGs. To address this task, we propose a Meta-learning-based N-ary knowledge Inductive Reasoner (MetaNIR), which employs a graph neural network with meta-learning mechanisms to embed unseen entities and roles adaptively. The obtained embeddings are used to predict missing elements in facts involving unseen elements. Since no existing dataset supports this task, three datasets are constructed to evaluate the effectiveness of MetaNIR. Extensive experimental results demonstrate that MetaNIR consistently outperforms representative models across all datasets.</abstract>
       <url hash="258f19fb">2025.coling-main.595</url>
@@ -7082,7 +7082,7 @@
       <title><fixed-case>Z</fixed-case>ig<fixed-case>Z</fixed-case>ag<fixed-case>KV</fixed-case>: Dynamic <fixed-case>KV</fixed-case> Cache Compression for Long-context Modeling based on Layer Uncertainty</title>
       <author><first>Meizhi</first><last>Zhong</last></author>
       <author><first>Xikai</first><last>Liu</last></author>
-      <author id="chen-zhang"><first>Chen</first><last>Zhang</last></author>
+      <author><first>Chen</first><last>Zhang</last></author>
       <author><first>Yikun</first><last>Lei</last></author>
       <author><first>Yan</first><last>Gao</last></author>
       <author><first>Yao</first><last>Hu</last></author>
@@ -7129,7 +7129,7 @@
     <paper id="600">
       <title>Understanding the <fixed-case>R</fixed-case>o<fixed-case>PE</fixed-case> Extensions of Long-Context <fixed-case>LLM</fixed-case>s: An Attention Perspective</title>
       <author><first>Meizhi</first><last>Zhong</last></author>
-      <author id="chen-zhang"><first>Chen</first><last>Zhang</last></author>
+      <author><first>Chen</first><last>Zhang</last></author>
       <author><first>Yikun</first><last>Lei</last></author>
       <author><first>Xikai</first><last>Liu</last></author>
       <author><first>Yan</first><last>Gao</last></author>
@@ -7171,7 +7171,7 @@
       <title>The Only Way is Ethics: A Guide to Ethical Research with Large Language Models</title>
       <author><first>Eddie L.</first><last>Ungless</last></author>
       <author><first>Nikolas</first><last>Vitsakis</last></author>
-      <author><first>Zeerak</first><last>Talat</last></author>
+      <author id="zeerak-talat"><first>Zeerak</first><last>Talat</last></author>
       <author><first>James</first><last>Garforth</last></author>
       <author><first>Bjorn</first><last>Ross</last></author>
       <author><first>Arno</first><last>Onken</last></author>
@@ -7340,7 +7340,7 @@
       <author><first>Seongtae</first><last>Hong</last></author>
       <author><first>Seungyoon</first><last>Lee</last></author>
       <author><first>Hyeonseok</first><last>Moon</last></author>
-      <author><first>Heuiseok</first><last>Lim</last></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last></author>
       <pages>9184–9193</pages>
       <abstract>Large Language Models (LLMs) have rapidly advanced, with domain-specific expert models emerging to handle specialized tasks across various fields. However, the predominant focus on English-centric models demands extensive data, making it challenging to develop comparable models for middle and low-resource languages. To address this limitation, we introduce Migrate, a novel method that leverages open-source static embedding models and up to 3 million tokens of code-switching data to facilitate the seamless transfer of embeddings to target languages. Migrate enables effective cross-lingual adaptation without requiring large-scale domain-specific corpora in the target language, promoting the accessibility of expert LLMs to a diverse range of linguistic communities. Our experimental results demonstrate that Migrate significantly enhances model performance in target languages, outperforming baseline and existing cross-lingual transfer methods. This approach provides a practical and efficient solution for extending the capabilities of domain-specific expert models.</abstract>
       <url hash="4d7f1b2e">2025.coling-main.617</url>
@@ -7350,7 +7350,7 @@
       <title><fixed-case>C</fixed-case>o<fixed-case>STA</fixed-case>: Code-Switched Speech Translation using Aligned Speech-Text Interleaving</title>
       <author><first>Bhavani Shankar</first><last>P S V N</last></author>
       <author><first>Preethi</first><last>Jyothi</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>9194–9208</pages>
       <abstract>Code-switching is a widely prevalent linguistic phenomenon in multilingual societies like India. Building speech-to-text models for code-switched speech is challenging due to limited availability of datasets. In this work, we focus on the problem of spoken translation (ST) of code-switched speech in Indian languages to English text. We present a new end-to-end model architecture CoSTA that scaffolds on pretrained automatic speech recognition (ASR) and machine translation (MT) modules (that are more widely available for many languages). Speech and ASR text representations are fused using an aligned interleaving scheme and are fed further as input to a pretrained MT module; the whole pipeline is then trained end-to-end for spoken translation using synthetically created ST data. We also release a new evaluation benchmark for code-switched Bengali- English, Hindi-English, Marathi-English and Telugu-English speech to English text. CoSTA significantly outperforms many competitive cascaded and end-to-end multimodal baselines by up to 3.5 BLEU points.</abstract>
       <url hash="71cd8ab4">2025.coling-main.618</url>
@@ -7628,8 +7628,8 @@
     <paper id="640">
       <title>Hands-off Image Editing: Language-guided Editing without any Task-specific Labeling, Masking or even Training</title>
       <author><first>Rodrigo</first><last>Santos</last></author>
-      <author><first>António</first><last>Branco</last></author>
-      <author><first>João Ricardo</first><last>Silva</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
+      <author id="joao-silva"><first>João Ricardo</first><last>Silva</last></author>
       <author><first>Joao</first><last>Rodrigues</last></author>
       <pages>9546–9565</pages>
       <abstract>Instruction-guided image editing consists in taking an image and an instruction and delivering that image altered according to that instruction. State-of-the-art approaches to this task suffer from the typical scaling up and domain adaptation hindrances related to supervision as they eventually resort to some kind of task-specific labelling, masking or training. We propose a novel approach that does without any such task-specific supervision and offers thus a better potential for improvement. Its assessment demonstrates that it is highly effective, achieving very competitive performance.</abstract>
@@ -7930,7 +7930,7 @@
       <title>Development of Numerical Error Detection Tasks to Analyze the Numerical Capabilities of Language Models</title>
       <author><first>Taku</first><last>Sakamoto</last></author>
       <author><first>Saku</first><last>Sugawara</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>9957–9976</pages>
       <abstract>Numbers are used to describe quantities in various scenarios in daily life; therefore, numerical errors can significantly affect the meaning of the entire sentence, and even a single-letter error can be fatal. Detecting numerical errors often requires a high level of commonsense and is difficult even with the recent large language models (LLMs). In this study, we create a benchmark dataset of numerical error detection that uses automatically generated numerical errors. In our analysis, we classify the numerical errors based on the properties of the errors and investigate the ability of the model from several perspectives, including the error class, error size, and passage domain. The experimental results indicate that GPT-3.5, GPT-4, and Llama-3-Instruct (8B) perform well in the numerical error detection task; however, they are not as accurate as humans. We find that the LLMs misidentified correct numbers as errors more frequently than the humans did. In particular, the analysis demonstrates that the current LLMs still need improvement for detecting numerical errors requiring calculations or extensive prior knowledge.</abstract>
       <url hash="8aac4382">2025.coling-main.666</url>
@@ -8059,7 +8059,7 @@
       <author><first>Robert</first><last>Litschko</last></author>
       <author><first>Oliver</first><last>Kraus</last></author>
       <author><first>Verena</first><last>Blaschke</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>10158–10171</pages>
       <abstract>A large amount of local and culture-specific knowledge (e.g., people, traditions, food) can only be found in documents written in dialects. While there has been extensive research conducted on cross-lingual information retrieval (CLIR), the field of cross-dialect retrieval (CDIR) has received limited attention. Dialect retrieval poses unique challenges due to the limited availability of resources to train retrieval models and the high variability in non-standardized languages. We study these challenges on the example of German dialects and introduce the first German dialect retrieval dataset, dubbed WikiDIR, which consists of seven German dialects extracted from Wikipedia. Using WikiDIR, we demonstrate the weakness of lexical methods in dealing with high lexical variation in dialects. We further show that commonly used CLIR methods such as query translation or zero-shot cross-lingual transfer with multilingual encoders do not transfer well to extremely low-resource setups, motivating the need for resource-lean and dialect-specific retrieval models.</abstract>
       <url hash="bdb8a6b0">2025.coling-main.678</url>
@@ -8114,7 +8114,7 @@
       <author><first>Sarah</first><last>Xuan</last></author>
       <author><first>Jacob</first><last>Jobraeel</last></author>
       <author><first>Anurag</first><last>Kumar</last></author>
-      <author><first>Deb</first><last>Roy</last></author>
+      <author id="deb-roy"><first>Deb</first><last>Roy</last></author>
       <author><first>Jad</first><last>Kabbara</last></author>
       <pages>10242–10274</pages>
       <abstract>We focus on enhancing comprehension in small-group recorded conversations, which serve as a medium to bring people together and provide a space for sharing personal stories and experiences on crucial social matters. One way to parse and convey information from these conversations is by sharing highlighted excerpts in subsequent conversations. This can help promote a collective understanding of relevant issues, by highlighting perspectives and experiences to other groups of people who might otherwise be unfamiliar with and thus unable to relate to these experiences. The primary challenge that arises then is that excerpts taken from one conversation and shared in another setting might be missing crucial context or key elements that were previously introduced in the original conversation. This problem is exacerbated when conversations become lengthier and richer in themes and shared experiences. To address this, we explore how Large Language Models (LLMs) can enrich these excerpts by providing socially relevant context. We present approaches for effective contextualization to improve comprehension, readability, and empathy. We show significant improvements in understanding, as assessed through subjective and objective evaluations. While LLMs can offer valuable context, they struggle with capturing key social aspects. We release the Human-annotated Salient Excerpts (HSE) dataset to support future work. Additionally, we show how context-enriched excerpts can provide more focused and comprehensive conversation summaries.</abstract>
@@ -8126,7 +8126,7 @@
       <author><first>Junchao</first><last>Wu</last></author>
       <author><first>Runzhe</first><last>Zhan</last></author>
       <author><first>Derek F.</first><last>Wong</last></author>
-      <author id="shu-yang"><first>Shu</first><last>Yang</last></author>
+      <author><first>Shu</first><last>Yang</last></author>
       <author><first>Xuebo</first><last>Liu</last></author>
       <author><first>Lidia S.</first><last>Chao</last></author>
       <author><first>Min</first><last>Zhang</last></author>
@@ -8203,7 +8203,7 @@
       <author><first>Florian</first><last>Debaene</last></author>
       <author><first>Aaron</first><last>Maladry</last></author>
       <author><first>Els</first><last>Lefever</last></author>
-      <author><first>Veronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Veronique</first><last>Hoste</last></author>
       <pages>10367–10374</pages>
       <abstract>This paper explores the effectiveness of two types of transformer models — large generative models and sequence-to-sequence models — for automatically post-correcting Optical Character Recognition (OCR) output in early modern Dutch plays. To address the need for optimally aligned data, we create a parallel dataset based on the OCRed and ground truth versions from the EmDComF corpus using state-of-the-art alignment techniques. By combining character-based and semantic methods, we design and release a qualitative OCR-to-gold parallel dataset, selecting the alignment with the lowest Character Error Rate (CER) for all alignment pairs. We then fine-tune and evaluate five generative models and four sequence-to-sequence models on the OCR post-correction dataset. Results show that sequence-to-sequence models generally outperform generative models in this task, correcting more OCR errors and overgenerating and undergenerating less, with mBART as the best performing system.</abstract>
       <url hash="dab40eb0">2025.coling-main.690</url>
@@ -8281,8 +8281,8 @@
       <author><first>Amogh</first><last>Mannekote</last></author>
       <author><first>Jinseok</first><last>Nam</last></author>
       <author><first>Ziming</first><last>Li</last></author>
-      <author><first>Kristy Elizabeth</first><last>Boyer</last></author>
-      <author><first>Bonnie J.</first><last>Dorr</last></author>
+      <author id="kristy-boyer"><first>Kristy Elizabeth</first><last>Boyer</last></author>
+      <author id="bonnie-dorr"><first>Bonnie J.</first><last>Dorr</last></author>
       <pages>10449–10459</pages>
       <abstract>Indirect User Requests (IURs), such as “It’s cold in here” instead of “Could you please increase the temperature?” are common in human-human task-oriented dialogue and require world knowledge and pragmatic reasoning from the listener. While large language models (LLMs) can handle these requests effectively, smaller models deployed on virtual assistants often struggle due to resource constraints. Moreover, existing task-oriented dialogue benchmarks lack sufficient examples of complex discourse phenomena such as indirectness. To address this, we propose a set of linguistic criteria along with an LLM-based pipeline for generating realistic IURs to test natural language understanding (NLU) and dialogue state tracking (DST) models before deployment in a new domain. We also release IndirectRequests, a dataset of IURs based on the Schema-Guided Dialogue (SGD) corpus, as a comparative testbed for evaluating the performance of smaller models in handling indirect requests.</abstract>
       <url hash="d2441f1a">2025.coling-main.696</url>
@@ -8324,8 +8324,8 @@
       <author><first>Robiert</first><last>Sepulveda Torres</last></author>
       <author><first>Jeremy</first><last>Barnes</last></author>
       <author><first>Pablo</first><last>Gamallo</last></author>
-      <author><first>Aitor</first><last>Gonzalez-Agirre</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="aitor-gonzalez-agirre"><first>Aitor</first><last>Gonzalez-Agirre</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <author><first>Marta</first><last>Villegas</last></author>
       <pages>10491–10519</pages>
       <abstract>The current best practice to measure the performance of base Large Language Models is to establish a multi-task benchmark that covers a range of capabilities of interest. Currently, however, such benchmarks are only available in a few high-resource languages. To address this situation, we present IberoBench, a multilingual, multi-task benchmark for Iberian languages (i.e., Basque, Catalan, Galician, European Spanish and European Portuguese) built on the LM Evaluation Harness framework. The benchmark consists of 62 tasks divided into 179 subtasks. We evaluate 33 existing LLMs on IberoBench on 0- and 5-shot settings. We also explore the issues we encounter when working with the Harness and our approach to solving them to ensure high-quality evaluation.</abstract>
@@ -8346,7 +8346,7 @@
       <author><first>Jincheng</first><last>Cao</last></author>
       <author><first>Bobo</first><last>Li</last></author>
       <author><first>Jiang</first><last>Liu</last></author>
-      <author><first>Donghong</first><last>Ji</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
       <pages>10531–10540</pages>
       <abstract>Entity and relation extraction is a conventional task in the field of information extraction. Existing work primarily focuses on detecting specific relations between entities, often constrained to particular fields and lacking general applicability. In response, we propose a novel task: nominal compound relation extraction (NCRE), which concentrates on abstract and broadly applicable relation extraction between noun phrases. This task diverges significantly from traditional entity and relation extraction in two key respects. Firstly, our task involves general nominal compounds rather than named entities, which are longer and encompass a broader scope, presenting significant challenges for extraction. Secondly, relation extraction in NCRE demands an in-depth understanding of context to detect abstract relations. We manually annotate a high-quality Chinese dataset for the NCRE task and develop a model incorporating the rotary position-enhanced word pair (RoWP) detection schema. Experimental results demonstrate the efficiency of our RoWP model over previous baselines, while the suboptimal F1 scores indicate that NCRE remains a challenging task. Our code and data are available at https://github.com/yeecjc/NCRE.</abstract>
       <url hash="04c2c2cc">2025.coling-main.701</url>
@@ -8421,7 +8421,7 @@
     </paper>
     <paper id="708">
       <title>Understanding Token Probability Encoding in Output Embeddings</title>
-      <author><first>Hakaze</first><last>Cho</last></author>
+      <author id="hakaze-cho"><first>Hakaze</first><last>Cho</last></author>
       <author><first>Yoshihiro</first><last>Sakai</last></author>
       <author><first>Kenshiro</first><last>Tanaka</last></author>
       <author><first>Mariko</first><last>Kato</last></author>
@@ -8436,7 +8436,7 @@
       <author><first>Luyang</first><last>Lin</last></author>
       <author><first>Lingzhi</first><last>Wang</last></author>
       <author><first>Jinsong</first><last>Guo</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <pages>10634–10649</pages>
       <abstract>The pervasive spread of misinformation and disinformation in social media underscores the critical importance of detecting media bias. While robust Large Language Models (LLMs) have emerged as foundational tools for bias prediction, concerns about inherent biases within these models persist. In this work, we investigate the presence and nature of bias within LLMs and its consequential impact on media bias detection. Departing from conventional approaches that focus solely on bias detection in media content, we delve into biases within the LLM systems themselves. Through meticulous examination, we probe whether LLMs exhibit biases, particularly in political bias prediction and text continuation tasks. Additionally, we explore bias across diverse topics, aiming to uncover nuanced variations in bias expression within the LLM framework. Importantly, we propose debiasing strategies, including prompt engineering and model fine-tuning. Extensive analysis of bias tendencies across different LLMs sheds light on the broader landscape of bias propagation in language models. This study advances our understanding of LLM bias, offering critical insights into its implications for bias detection tasks and paving the way for more robust and equitable AI systems</abstract>
       <url hash="963f187e">2025.coling-main.709</url>
@@ -8601,7 +8601,7 @@
       <author><first>Lu</first><last>Xiang</last></author>
       <author><first>Yang</first><last>Zhao</last></author>
       <author><first>Yu</first><last>Zhou</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>10877–10890</pages>
       <abstract>Document Image Translation (DIT) aims to translate documents in images from one language to another. It requires visual layouts and textual contents understanding, as well as document coherence capturing. However, current methods often rely on the quality of OCR output, which, particularly in complex-layout scenarios, frequently loses the crucial document coherence, leading to chaotic text. To overcome this problem, we introduce a novel end-to-end network, named Zoom-out DIT (ZoomDIT), inspired by human translation procedures. It jointly accomplishes the multi-level tasks including word positioning, sentence recognition &amp; translation, and document organization, based on a fine-to-coarse zoom-out framework, to progressively realize “chaotic words to coherent document” and improve translation. We further contribute a new large-scale DIT dataset with multi-level fine-grained labels. Extensive experiments on public and our new dataset demonstrate significant improvements in translation quality towards complex-layout document images, offering a robust solution for reorganizing the chaotic OCR outputs to a coherent document translation.</abstract>
       <url hash="23fdde07">2025.coling-main.723</url>
@@ -8632,7 +8632,7 @@
       <author><first>Yunke</first><last>Zhang</last></author>
       <author><first>Tao</first><last>Gui</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>10902–10923</pages>
       <abstract>Open Named Entity Recognition (NER), which involves identifying arbitrary types of entities from arbitrary domains, remains challenging for Large Language Models (LLMs). Recent studies suggest that fine-tuning LLMs on extensive NER data can boost their performance. However, training directly on existing datasets neglects their inconsistent entity definitions and redundant data, limiting LLMs to dataset-specific learning and hindering out-of-domain adaptation. To address this, we present B2NERD, a compact dataset designed to guide LLMs’ generalization in Open NER under a universal entity taxonomy. B2NERD is refined from 54 existing English and Chinese datasets using a two-step process. First, we detect inconsistent entity definitions across datasets and clarify them by distinguishable label names to construct a universal taxonomy of 400+ entity types. Second, we address redundancy using a data pruning strategy that selects fewer samples with greater category and semantic diversity. Comprehensive evaluation shows that B2NERD significantly enhances LLMs’ Open NER capabilities. Our B2NER models, trained on B2NERD, outperform GPT-4 by 6.8-12.0 F1 points and surpass previous methods in 3 out-of-domain benchmarks across 15 datasets and 6 languages. The data, models, and code are publicly available at https://github.com/UmeanNever/B2NER.</abstract>
       <url hash="36cab819">2025.coling-main.725</url>
@@ -8727,7 +8727,7 @@
       <author><first>Qipeng</first><last>Guo</last></author>
       <author><first>Hang</first><last>Yan</last></author>
       <author><first>Xipeng</first><last>Qiu</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <author><first>Dahua</first><last>Lin</last></author>
       <pages>11056–11069</pages>
       <abstract>Large Language Models (LLMs) have shown outstanding breakthroughs in code generation. Recent work improves code LLMs by training on synthetic data generated by some powerful LLMs, which can be challenging to scale due to the dependence on a teacher model and high generation costs. In this paper, we focus on synthesizing code data at scale and propose a <b>Case2Code</b> task by exploiting the expressiveness and correctness of programs. <b>Case2Code</b> is an inductive inference task that aims to infer underlying code implementations by observing input-output examples or program behaviors, By incorporating LLMs to generate program inputs, and executing the program with these inputs to obtain the program outputs, we can synthesize diverse and high-quality <b>Case2Code</b> data at scale for training and evaluating code LLMs. Experimental results show that case-to-code induction is challenging for current representative LLMs if they are untrained. Models trained with <b>Case2Code</b> improve performance not only on distribution case-to-code induction but also various coding-generation tasks, demonstrating the great potential of large-scale synthetic data and inductive learning.</abstract>
@@ -8795,7 +8795,7 @@
       <title><fixed-case>M</fixed-case>ani<fixed-case>T</fixed-case>weet: A New Benchmark for Identifying Manipulation of News on Social Media</title>
       <author><first>Kung-Hsiang</first><last>Huang</last></author>
       <author><first>Hou Pong</first><last>Chan</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <author><first>Heng</first><last>Ji</last></author>
       <pages>11161–11180</pages>
       <abstract>Considerable advancements have been made to tackle the misrepresentation of information derived from reference articles in the domains of fact-checking and faithful summarization. However, an unaddressed aspect remains - the identification of social media posts that manipulate information within associated news articles. This task presents a significant challenge, primarily due to the prevalence of personal opinions in such posts. We present a novel task, identifying manipulation of news on social media, which aims to detect manipulation in social media posts and identify manipulated or inserted information. To study this task, we have proposed a data collection schema and curated a dataset called ManiTweet, consisting of 3.6K pairs of tweets and corresponding articles. Our analysis demonstrates that this task is highly challenging, with large language models (LLMs) yielding unsatisfactory performance. Additionally, we have developed a simple yet effective basic model that outperforms LLMs significantly on the ManiTweet dataset. Finally, we have conducted an exploratory analysis of human-written tweets, unveiling intriguing connections between manipulation and the domain and factuality of news articles, as well as revealing that manipulated sentences are more likely to encapsulate the main story or consequences of a news outlet.</abstract>
@@ -8805,7 +8805,7 @@
     <paper id="740">
       <title>Filter-then-Generate: Large Language Models with Structure-Text Adapter for Knowledge Graph Completion</title>
       <author><first>Ben</first><last>Liu</last></author>
-      <author id="jihai-zhang"><first>Jihai</first><last>Zhang</last></author>
+      <author><first>Jihai</first><last>Zhang</last></author>
       <author><first>Fangquan</first><last>Lin</last></author>
       <author><first>Cheng</first><last>Yang</last></author>
       <author><first>Min</first><last>Peng</last></author>
@@ -8978,7 +8978,7 @@
       <title>Human Interest Framing across Cultures: A Case Study on Climate Change</title>
       <author><first>Gisela</first><last>Vallejo</last></author>
       <author><first>Christine</first><last>de Kock</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Lea</first><last>Frermann</last></author>
       <pages>11380–11398</pages>
       <abstract>Human Interest (HI) framing is a narrative strategy that injects news stories with a relatable, emotional angle and a human face to engage the audience. In this study we investigate the use of HI framing across different English-speaking cultures in news articles about climate change. Despite its demonstrated impact on the public’s behaviour and perception of an issue, HI framing has been under-explored in NLP to date. We perform a systematic analysis of HI stories to understand its role in climate change reporting in English-speaking countries from four continents. Our findings reveal key differences in how climate change is portrayed across countries, encompassing aspects such as narrative roles, article polarity, pronoun prevalence, and topics. We also demonstrate that these linguistic aspects boost the performance of fine-tuned pre-trained language models on HI story classification.</abstract>
@@ -8993,7 +8993,7 @@
       <author><first>Georgi N.</first><last>Georgiev</last></author>
       <author><first>Jiahui</first><last>Geng</last></author>
       <author><first>Iryna</first><last>Gurevych</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>11399–11421</pages>
       <abstract>The increased use of large language models (LLMs) across a variety of real-world applications calls for mechanisms to verify the fac- tual accuracy of their outputs. Difficulties lie in assessing the factuality of free-form responses in open domains. Also, different pa- pers use disparate evaluation benchmarks and measurements, which renders them hard to compare and hampers future progress. To mitigate these issues, we propose OpenFactCheck, a unified framework for building customized automatic fact-checking systems, benchmarking their accuracy, evaluating factuality of LLMs, and verifying claims in a document. OpenFactCheck consists of three modules: (i) CUSTCHECKER allows users to easily customize an automatic fact-checker and verify the factual correctness of documents and claims, (ii) LLMEVAL, a unified evaluation framework assesses LLM’s factuality ability from various perspectives fairly, and (iii) CHECKEREVAL is an extensible solution for gauging the reliability of automatic fact-checkers’ verification results using human-annotated datasets. Data and code are publicly available at https: //github.com/yuxiaw/openfactcheck.</abstract>
       <url hash="7eda9107">2025.coling-main.755</url>
@@ -9001,13 +9001,13 @@
     </paper>
     <paper id="756">
       <title>A Dataset for Expert Reviewer Recommendation with Large Language Models as Zero-shot Rankers</title>
-      <author><first>Vanja M.</first><last>Karan</last></author>
+      <author id="vanja-m-karan"><first>Vanja M.</first><last>Karan</last></author>
       <author><first>Stephen</first><last>McQuistin</last></author>
       <author><first>Ryo</first><last>Yanagida</last></author>
       <author><first>Colin</first><last>Perkins</last></author>
       <author><first>Gareth</first><last>Tyson</last></author>
       <author><first>Ignacio</first><last>Castro</last></author>
-      <author><first>Patrick G.T.</first><last>Healey</last></author>
+      <author id="patrick-healey"><first>Patrick G.T.</first><last>Healey</last></author>
       <author><first>Matthew</first><last>Purver</last></author>
       <pages>11422–11427</pages>
       <abstract>The task of reviewer recommendation is increasingly important, with main techniques utilizing general models of text relevance. However, state of the art (SotA) systems still have relatively high error rates. Two possible reasons for this are: a lack of large datasets and the fact that large language models (LLMs) have not yet been applied. To fill these gaps, we first create a substantial new dataset, in the domain of Internet specification documents; then we introduce the use of LLMs and evaluate their performance. We find that LLMs with prompting can improve on SotA in some cases, but that they are not a cure-all: this task provides a challenging setting for prompt-based methods</abstract>
@@ -9029,7 +9029,7 @@
   <volume id="demos" ingest-date="2025-01-20" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 31st International Conference on Computational Linguistics: System Demonstrations</booktitle>
-      <editor><first>Owen</first><last>Rambow</last></editor>
+      <editor id="owen-rambow"><first>Owen</first><last>Rambow</last></editor>
       <editor><first>Leo</first><last>Wanner</last></editor>
       <editor><first>Marianna</first><last>Apidianaki</last></editor>
       <editor><first>Hend</first><last>Al-Khalifa</last></editor>
@@ -9053,8 +9053,8 @@
       <author><first>Truong Dinh</first><last>Do</last></author>
       <author><first>An Hoang</first><last>Trieu</last></author>
       <author><first>Van-Thuy</first><last>Phi</last></author>
-      <author><first>Minh Le</first><last>Nguyen</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="minh-le-nguyen"><first>Minh Le</first><last>Nguyen</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>1–8</pages>
       <abstract>The growing volume of scientific literature in polymer science presents a significant challenge for researchers attempting to extract and annotate domain-specific entities, such as polymer names, material properties, and related information. Manual annotation of these documents is both time-consuming and prone to error due to the complexity of scientific language. To address this, we introduce PolyMinder, an automated support system designed to assist polymer scientists in extracting and annotating polymer-related entities and their relationships from scientific documents. The system utilizes recent advanced Named Entity Recognition (NER) and Relation Extraction (RE) models tailored to the polymer domain. PolyMinder streamlines the annotation process by providing a web-based interface where users can visualize, verify, and refine the extracted information before finalizing the annotations. The system’s source code is made publicly available to facilitate further research and development in this field. Our system can be accessed through the following URL: https://www.jaist.ac.jp/is/labs/nguyen-lab/systems/polyminder</abstract>
       <url hash="c1c7fb43">2025.coling-demos.1</url>
@@ -9092,8 +9092,8 @@
       <author><first>Rui</first><last>Xing</last></author>
       <author><first>Yilin</first><last>Geng</last></author>
       <author><first>Zenan</first><last>Zhai</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>28–36</pages>
       <abstract>We introduce Loki, an open-source tool designed to address the growing problem of misinformation. Loki adopts a human-centered approach, striking a balance between the quality of fact-checking and the cost of human involvement. It decomposes the fact-checking task into a five-step pipeline: breaking down long texts into individual claims, assessing their check-worthiness, generating queries, retrieving evidence, and verifying the claims. Instead of fully automating the claim verification process, provides essential information at each step to assist human judgment, especially for general users such as journalists and content moderators. Moreover, it has been optimized for latency, robustness, and cost efficiency at a commercially usable level. Loki is released under an MIT license and is available on GitHub. We also provide a video presenting the system and its capabilities.</abstract>
       <url hash="a7200688">2025.coling-demos.4</url>
@@ -9120,7 +9120,7 @@
     </paper>
     <paper id="7">
       <title><fixed-case>B</fixed-case>eef<fixed-case>B</fixed-case>ot: Harnessing Advanced <fixed-case>LLM</fixed-case> and <fixed-case>RAG</fixed-case> Techniques for Providing Scientific and Technology Solutions to Beef Producers</title>
-      <author id="zhihao-zhang"><first>Zhihao</first><last>Zhang</last></author>
+      <author><first>Zhihao</first><last>Zhang</last></author>
       <author><first>Carrie-Ann</first><last>Wilson</last></author>
       <author><first>Rachel</first><last>Hay</last></author>
       <author><first>Yvette</first><last>Everingham</last></author>
@@ -9139,7 +9139,7 @@
       <author><first>Libo</first><last>Sun</last></author>
       <author><first>Yihang</first><last>Yang</last></author>
       <author><first>Siming</first><last>Chen</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <author><first>Zhongyu</first><last>Wei</last></author>
       <pages>63–82</pages>
       <abstract>We introduce AI-Press, an automated news drafting and polishing system based on multi-agent collaboration and Retrieval-Augmented Generation. We develop a feedback simulation system that generates public responses considering demographic distributions. Demo link: https://youtu.be/TmjfJrbzaRU</abstract>
@@ -9230,7 +9230,7 @@
     <paper id="16">
       <title><fixed-case>GECT</fixed-case>urk <fixed-case>WEB</fixed-case>: An Explainable Online Platform for <fixed-case>T</fixed-case>urkish Grammatical Error Detection and Correction</title>
       <author><first>Ali</first><last>Gebeşçe</last></author>
-      <author><first>Gözde Gül</first><last>Şahin</last></author>
+      <author id="gozde-gul-sahin"><first>Gözde Gül</first><last>Şahin</last></author>
       <pages>163–173</pages>
       <abstract>Sophisticated grammatical error detection/correction tools are available for a small set of languages such as English and Chinese. However, it is not straightforward—if not impossible—to adapt them to morphologically rich languages with complex writing rules like Turkish which has more than 80 million speakers. Even though several tools exist for Turkish, they primarily focus on spelling errors rather than grammatical errors and lack features such as web interfaces, error explanations and feedback mechanisms. To fill this gap, we introduce GECTurk WEB, a light, open-source, and flexible web-based system that can detect and correct the most common forms of Turkish writing errors, such as the misuse of diacritics, compound and foreign words, pronouns, light verbs along with spelling mistakes. Our system provides native speakers and second language learners an easily accessible tool to detect/correct such mistakes and also to learn from their mistakes by showing the explanation for the violated rule(s). The proposed system achieves 88,3 system usability score, and is shown to help learn/remember a grammatical rule (confirmed by 80% of the participants). The GECTurk WEB is available both as an offline tool (https://github.com/GGLAB-KU/gecturkweb) or at www.gecturk.net.</abstract>
       <url hash="11b3df72">2025.coling-demos.16</url>
@@ -9268,7 +9268,7 @@
       <title><fixed-case>C</fixed-case>omp<fixed-case>UGE</fixed-case>-Bench: Comparative Understanding and Generation Evaluation Benchmark for Comparative Question Answering</title>
       <author><first>Ahmad</first><last>Shallouf</last></author>
       <author><first>Irina</first><last>Nikishina</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>189–198</pages>
       <abstract>This paper presents CompUGE, a comprehensive benchmark designed to evaluate Comparative Question Answering (CompQA) systems. The benchmark is structured around four core tasks: Comparative Question Identification, Object and Aspect Identification, Stance Classification, and Answer Generation. It unifies multiple datasets and provides a robust evaluation platform to compare various models across these sub-tasks. We also create additional all-encompassing CompUGE datasets by filtering and merging the existing ones. The benchmark for comparative question answering sub-tasks is designed as a web application available on HuggingFace Spaces: https://huggingface.co/spaces/uhhlt/CompUGE-Bench</abstract>
       <url hash="7ef845c6">2025.coling-demos.19</url>
@@ -9308,7 +9308,7 @@
   <volume id="industry" ingest-date="2025-01-20" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 31st International Conference on Computational Linguistics: Industry Track</booktitle>
-      <editor><first>Owen</first><last>Rambow</last></editor>
+      <editor id="owen-rambow"><first>Owen</first><last>Rambow</last></editor>
       <editor><first>Leo</first><last>Wanner</last></editor>
       <editor><first>Marianna</first><last>Apidianaki</last></editor>
       <editor><first>Hend</first><last>Al-Khalifa</last></editor>
@@ -9527,7 +9527,7 @@
       <author><first>Anusha</first><last>Bagalkotkar</last></author>
       <author><first>Supriya</first><last>Anand</last></author>
       <author><first>Gabriel</first><last>Arnson</last></author>
-      <author><first>Rohini K.</first><last>Srihari</last></author>
+      <author id="rohini-k-srihari"><first>Rohini K.</first><last>Srihari</last></author>
       <author><first>Kenneth</first><last>Joseph</last></author>
       <pages>213–235</pages>
       <abstract>In recent years, there has been significant effort to align large language models with human preferences. This work focuses on developing a chatbot specialized in the real estate domain, with an emphasis on incorporating compliant behavior to ensure it can be used without perpetuating discriminatory practices like steering and redlining, which have historically plagued the real estate industry in the United States. Building on prior work, we present a method for generating a synthetic general instruction-following dataset, along with safety data. Through extensive evaluations and benchmarks, we fine-tuned a llama-3-8B-instruct model and demonstrated that we can enhance it’s performance significantly to match huge closed-source models like GPT-4o while making it safer and more compliant. We open-source the model, data and code to support further development and research in the community</abstract>
@@ -9588,7 +9588,7 @@
       <author><first>Ivan</first><last>Sekulic</last></author>
       <author><first>Filip</first><last>Carevic</last></author>
       <author><first>Nghia</first><last>Khau</last></author>
-      <author><first>Diana Nicoleta</first><last>Popa</last></author>
+      <author id="diana-nicoleta-popa"><first>Diana Nicoleta</first><last>Popa</last></author>
       <author><first>Bruna</first><last>Guedes</last></author>
       <author><first>Victor</first><last>Guimaraes</last></author>
       <author><first>Zeyu</first><last>Yang</last></author>
@@ -9663,7 +9663,7 @@
       <author><first>Lukas</first><last>Stappen</last></author>
       <author><first>Phillip</first><last>Schneider</last></author>
       <author><first>Florian</first><last>Matthes</last></author>
-      <author><first>Elisabeth</first><last>Andre</last></author>
+      <author id="elisabeth-andre"><first>Elisabeth</first><last>Andre</last></author>
       <pages>343–357</pages>
       <abstract>In today’s assistant landscape, personalisation enhances interactions, fosters long-term relationships, and deepens engagement. However, many systems struggle with retaining user preferences, leading to repetitive user requests and disengagement. Furthermore, the unregulated and opaque extraction of user preferences in industry applications raises significant concerns about privacy and trust, especially in regions with stringent regulations like Europe. In response to these challenges, we propose a long-term memory system for voice assistants, structured around predefined categories. This approach leverages Large Language Models to efficiently extract, store, and retrieve preferences within these categories, ensuring both personalisation and transparency. We also introduce a synthetic multi-turn, multi-session conversation dataset (CarMem), grounded in real industry data, tailored to an in-car voice assistant setting. Benchmarked on the dataset, our system achieves an F1-score of .78 to .95 in preference extraction, depending on category granularity. Our maintenance strategy reduces redundant preferences by 95% and contradictory ones by 92%, while the accuracy of optimal retrieval is at .87. Collectively, the results demonstrate the system’s suitability for industrial applications.</abstract>
       <url hash="ffdc1465">2025.coling-industry.29</url>
@@ -9671,7 +9671,7 @@
     </paper>
     <paper id="30">
       <title><fixed-case>XTR</fixed-case> meets <fixed-case>C</fixed-case>ol<fixed-case>BERT</fixed-case>v2: Adding <fixed-case>C</fixed-case>ol<fixed-case>BERT</fixed-case>v2 Optimizations to <fixed-case>XTR</fixed-case></title>
-      <author><first>Riyaz Ahmad</first><last>Bhat</last></author>
+      <author id="riyaz-ahmad-bhat"><first>Riyaz Ahmad</first><last>Bhat</last></author>
       <author><first>Jaydeep</first><last>Sen</last></author>
       <pages>358–365</pages>
       <abstract>XTR (Lee et al., 2023) introduced an efficient multi-vector retrieval method that addresses the limitations of the ColBERT (Khattab and Zaharia, 2020model by simplifying retrieval into a single stage through a modified learning objective. While XTR eliminates the need for multistage retrieval, it doesn’t incorporate the efficiency optimizations from ColBERTv2 (Santhanam et al., 2022, which improve indexing and retrieval speed. In this work, we enhance XTR by integrating ColBERTv2’s optimizations, showing that the combined approach preserves the strengths of both models. This results in a more efficient and scalable solution for multi-vector retrieval, while maintaining XTR’s streamlined retrieval process.</abstract>
@@ -9699,7 +9699,7 @@
       <author><first>Paras</first><last>Sharma</last></author>
       <author><first>Anthony B.</first><last>Sicilia</last></author>
       <author><first>Katherine</first><last>Atwell</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <author><first>Malihe</first><last>Alikhani</last></author>
       <pages>374–386</pages>
       <abstract>General-purpose automatic speech recognition (ASR) systems do not always perform well in goal-oriented dialogue. Existing ASR correction methods rely on prior user data or named entities. We extend correction to tasks that have no prior user data and exhibit linguistic flexibility such as lexical and syntactic variations. We propose a novel context augmentation with a large language model and a ranking strategy that incorporates contextual information from the dialogue states of a goal-oriented conversational AI and its tasks. Our method ranks (1) n-best ASR hypotheses by their lexical and semantic similarity with context and (2) context by phonetic correspondence with ASR hypotheses. Evaluated in home improvement and cooking domains with real-world users, our method improves recall and F1 of correction by 34% and 16%, respectively, while maintaining precision and false positive rate. Users rated .8-1 point (out of 5) higher when our correction method worked properly, with no decrease due to false positives.</abstract>
@@ -9787,7 +9787,7 @@
     <paper id="40">
       <title><fixed-case>B</fixed-case>ack<fixed-case>MATH</fixed-case>: Towards Backward Reasoning for Solving Math Problems Step by Step</title>
       <author><first>Shaowei</first><last>Zhang</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <pages>466–482</pages>
       <abstract>Large language models (LLMs) have achieved impressive results in reasoning, particularly in multi-step reasoning tasks. However, when faced with more complex mathematical problems, the performance of LLMs drops significantly. To address this issue, in this paper, we propose a backward reasoning dataset, BackMATH-Data. The dataset comprises approximately 14K backward reasoning problems and 100K reasoning steps. It follows a result-oriented approach, to construct backward reasoning problems by swapping the reasoning results with specific solving conditions in the original problems.Additionally, we introduce Backward-reasoning Process-supervision Reward Model (BackPRM) and BackMATH-LLM. BackPRM supervises the quality of the generated backward reasoning problems, while BackMATH-LLM is designed for mathematical reasoning. BackMATH-LLM is fine-tuned and enhanced through reinforcement learning by supervising the quality of backward reasoning problems and by providing feedback on reasoning steps, thereby improving the mathematical reasoning capabilities of LLMs.Extensive experiments demonstrate that our model achieves an accuracy of 68.1% on the GSM8K dataset and 21.9% on the MATH dataset, exceeding the SOTA by 1.6% and 2.1% respectively.</abstract>
       <url hash="68ec6d41">2025.coling-industry.40</url>
@@ -9798,7 +9798,7 @@
       <author><first>Yincen</first><last>Qu</last></author>
       <author><first>Hengyue</first><last>Liu</last></author>
       <author><first>Kun</first><last>Wang</last></author>
-      <author><first>Xiangying</first><last>Dai</last></author>
+      <author id="xiang-dai"><first>Xiangying</first><last>Dai</last></author>
       <author><first>Xiaoou</first><last>Lu</last></author>
       <author><first>Hui</first><last>Zhou</last></author>
       <author><first>Chao</first><last>Ma</last></author>
@@ -9857,7 +9857,7 @@
       <author><first>Elena</first><last>Senger</last></author>
       <author><first>Yuri</first><last>Campbell</last></author>
       <author><first>Rob</first><last>van der Goot</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>533–545</pages>
       <abstract>Accurate career path prediction can support many stakeholders, like job seekers, recruiters, HR, and project managers. However, publicly available data and tools for career path prediction are scarce. In this work, we introduce Karrierewege, a comprehensive, publicly available dataset containing over 500k career paths, significantly surpassing the size of previously available datasets. We link the dataset to the ESCO taxonomy to offer a valuable resource for predicting career trajectories. To tackle the problem of free-text inputs typically found in resumes, we enhance it by synthesizing job titles and descriptions resulting in Karrierewege+. This allows for accurate predictions from unstructured data, closely aligning with practical application challenges. We benchmark existing state-of-the-art (SOTA) models on our dataset and a previous benchmark and see increased performance and robustness by synthesizing the data for the free-text use cases.</abstract>
       <url hash="02b216e1">2025.coling-industry.46</url>
@@ -9923,7 +9923,7 @@
     </paper>
     <paper id="51">
       <title><fixed-case>UR</fixed-case>2<fixed-case>N</fixed-case>: Unified Retriever and <fixed-case>R</fixed-case>era<fixed-case>N</fixed-case>ker</title>
-      <author><first>Riyaz Ahmad</first><last>Bhat</last></author>
+      <author id="riyaz-ahmad-bhat"><first>Riyaz Ahmad</first><last>Bhat</last></author>
       <author><first>Jaydeep</first><last>Sen</last></author>
       <author><first>Rudra</first><last>Murthy</last></author>
       <author><first>Vignesh</first><last>P</last></author>
@@ -9966,7 +9966,7 @@
       <author><first>Aayush</first><last>Bajaj</last></author>
       <author><first>Aaryaman</first><last>Kartha</last></author>
       <author><first>Enamul</first><last>Hoque</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <pages>625–643</pages>
       <abstract>Given the ubiquity of charts as a data analysis, visualization, and decision-making tool across industries and sciences, there has been a growing interest in developing pre-trained foundation models as well as general purpose instruction-tuned models for chart understanding and reasoning. However, existing methods suffer crucial drawbacks across two critical axes affecting the performance of chart representation models: they are trained on data generated from underlying data tables of the charts, ignoring the visual trends and patterns in chart images, and use weakly aligned vision-language backbone models for domain-specific training, limiting their generalizability when encountering charts in the wild. We address these important drawbacks and introduce ChartGemma, a novel chart understanding and reasoning model developed over PaliGemma. Rather than relying on underlying data tables, ChartGemma is trained on instruction-tuning data generated directly from chart images, thus capturing both high-level trends and low-level visual information from a diverse set of charts. Our simple approach achieves state-of-the-art results across 5 benchmarks spanning chart summarization, question answering, and fact-checking, and our elaborate qualitative studies on real-world charts show that ChartGemma generates more realistic and factually correct summaries compared to its contemporaries. We release the code, model checkpoints, dataset, and demos at https://github.com/vis-nlp/ChartGemma.</abstract>
       <url hash="c44fb6fb">2025.coling-industry.54</url>
@@ -10014,7 +10014,7 @@
       <author><first>Aleksandr</first><last>Drozd</last></author>
       <author><first>Jordan</first><last>Clive</last></author>
       <author><first>Kshitij</first><last>Gupta</last></author>
-      <author><first>Liangyu</first><last>Chen</last></author>
+      <author id="liang-yu-chen"><first>Liangyu</first><last>Chen</last></author>
       <author><first>Qi</first><last>Sun</last></author>
       <author><first>Ken</first><last>Tsui</last></author>
       <author><first>Nour</first><last>Moustafa-Fahmy</last></author>
@@ -10049,7 +10049,7 @@
       <title>Lightweight Safety Guardrails Using Fine-tuned <fixed-case>BERT</fixed-case> Embeddings</title>
       <author><first>Aaron</first><last>Zheng</last></author>
       <author><first>Mansi</first><last>Rana</last></author>
-      <author><first>Andreas</first><last>Stolcke</last></author>
+      <author id="andreas-stolcke"><first>Andreas</first><last>Stolcke</last></author>
       <pages>689–696</pages>
       <abstract>With the recent proliferation of large language models (LLMs), enterprises have been able to rapidly develop proof-of-concepts and prototypes. As a result, there is a growing need to implement robust guardrails that monitor, quantize and control an LLM’s behavior, ensuring that the use is reliable, safe, accurate and also aligned with the users’ expectations. Previous approaches for filtering out inappropriate user prompts or system outputs, such as LlamaGuard and OpenAI’s MOD API, have achieved significant success by fine-tuning existing LLMs. However, using fine-tuned LLMs as guardrails introduces increased latency and higher maintenance costs, which may not be practical or scalable for cost-efficient deployments. We take a different approach, focusing on fine-tuning a lightweight architecture: Sentence-BERT. This method reduces the model size from LlamaGuard’s 7 billion parameters to approximately 67 million, while maintaining comparable performance on the AEGIS safety benchmark.</abstract>
       <url hash="196ece8a">2025.coling-industry.58</url>
@@ -10058,7 +10058,7 @@
     <paper id="59">
       <title>Zero-shot Slot Filling in the Age of <fixed-case>LLM</fixed-case>s for Dialogue Systems</title>
       <author><first>Mansi</first><last>Rana</last></author>
-      <author><first>Kadri</first><last>Hacioglu</last></author>
+      <author id="kadri-hacioglu"><first>Kadri</first><last>Hacioglu</last></author>
       <author><first>Sindhuja</first><last>Gopalan</last></author>
       <author><first>Maragathamani</first><last>Boothalingam</last></author>
       <pages>697–706</pages>
@@ -10102,8 +10102,8 @@
       <author><first>Cheoneum</first><last>Park</last></author>
       <author><first>Seohyeong</first><last>Jeong</last></author>
       <author><first>Minsang</first><last>Kim</last></author>
-      <author><first>KyungTae</first><last>Lim</last></author>
-      <author><first>Yong-Hun</first><last>Lee</last></author>
+      <author id="kyungtae-lim"><first>KyungTae</first><last>Lim</last></author>
+      <author id="yong-hun-lee"><first>Yong-Hun</first><last>Lee</last></author>
       <pages>760–770</pages>
       <abstract>Recent advances in language models (LMs) has driven progress in information retrieval (IR), effectively extracting semantically relevant information. However, they face challenges in balancing computational costs with deeper query-document interactions. To tackle this, we present two mechanisms: 1) a light and effective multi-vector retrieval with sequence compression vectors, dubbed SCV and 2) coarse-to-fine vector search. The strengths of SCV stems from its application of span compressive vectors for scoring. By employing a non-linear operation to examine every token in the document, we abstract these into a span-level representation. These vectors effectively reduce the document’s dimensional representation, enabling the model to engage comprehensively with tokens across the entire collection of documents, rather than the subset retrieved by Approximate Nearest Neighbor. Therefore, our framework performs a coarse single vector search during the inference stage and conducts a fine-grained multi-vector search end-to-end. This approach effectively reduces the cost required for search. We empirically show that SCV achieves the fastest latency compared to other state-of-the-art models and can obtain competitive performance on both in-domain and out-of-domain benchmark datasets.</abstract>
       <url hash="1851b5d7">2025.coling-industry.63</url>
diff --git a/data/xml/2025.comedi.xml b/data/xml/2025.comedi.xml
index df247c2570..6d3d5bc705 100644
--- a/data/xml/2025.comedi.xml
+++ b/data/xml/2025.comedi.xml
@@ -59,7 +59,7 @@
     <paper id="5">
       <title>Deep-change at <fixed-case>C</fixed-case>o<fixed-case>M</fixed-case>e<fixed-case>D</fixed-case>i: the Cross-Entropy Loss is not All You Need</title>
       <author><first>Mikhail</first><last>Kuklin</last></author>
-      <author><first>Nikolay</first><last>Arefyev</last></author>
+      <author id="nikolay-arefyev"><first>Nikolay</first><last>Arefyev</last></author>
       <pages>48–64</pages>
       <abstract>Manual annotation of edges in Diachronic Word Usage Graphs is a critical step in creation of datasets for Lexical Semantic Change Detection tasks, but a very labour-intensive one. Annotators estimate if two senses of an ambiguous word expressed in two usages of this word are related and how. This is a variation of the Word-in-Context (WiC) task with some peculiarities, including diachronic data, an ordinal scale for annotations consisting of 4 values with pre-defined meanings (e.g. homonymy, polysemy), and special attention to the degree of disagreement between annotators which affects the further processing of the graph. CoMeDi is a shared task aiming at automating this annotation process. Participants are asked to predict the median annotation for a pair of usages in the first subtask, and estimate the disagreement between annotators in the second subtask. Together this gives some idea about the distribution of annotations we can get from humans for a given pair of usages. For the first subtask we tried several ways of adapting a binary WiC model to this 4 class problem. We discovered that further fine-tuning the model as a 4 class classifier on the training data of the shared task works significantly worse than thresholding the original binary model. For the second subtask our best results were achieved by building a model that predicts the whole multinomial distribution of annotations and calculating the disagreement from this distribution. Our solutions for both subtasks have outperformed all other participants of the shared task.</abstract>
       <url hash="15da2c23">2025.comedi-1.5</url>
@@ -157,9 +157,9 @@
     <paper id="15">
       <title>Disagreement in Metaphor Annotation of <fixed-case>M</fixed-case>exican <fixed-case>S</fixed-case>panish Science Tweets</title>
       <author><first>Alec</first><last>Sánchez-Montero</last></author>
-      <author><first>Gemma</first><last>Bel-Enguix</last></author>
+      <author id="gemma-bel-enguix"><first>Gemma</first><last>Bel-Enguix</last></author>
       <author><first>Sergio-Luis</first><last>Ojeda-Trueba</last></author>
-      <author><first>Gerardo</first><last>Sierra</last></author>
+      <author id="gerardo-sierra"><first>Gerardo</first><last>Sierra</last></author>
       <pages>155–164</pages>
       <abstract>Traditional linguistic annotation methods often strive for a gold standard with hard labels as input for natural language processing models, assuming an underlying objective truth for all tasks. However, disagreement among annotators is a common scenario, even for seemingly objective linguistic tasks, and is particularly prominent in figurative language annotation, since multiple valid interpretations can sometimes coexist. This study presents the annotation process for identifying metaphorical tweets within a corpus of 3733 Public Communication of Science texts written in Mexican Spanish, emphasizing inter-annotator disagreement. Using Fleiss’ and Cohen’s Kappa alongside agreement percentages, we evaluated metaphorical language detection through binary classification in three situations: two subsets of the corpus labeled by three different non-expert annotators each, and a subset of disagreement tweets, identified in the non-expert annotation phase, re-labeled by three expert annotators. Our results suggest that expert annotation may improve agreement levels, but does not exclude disagreement, likely due to factors such as the relatively novelty of the genre, the presence of multiple scientific topics, and the blending of specialized and non-specialized discourse. Going further, we propose adopting a learning-from-disagreement approach for capturing diverse annotation perspectives to enhance computational metaphor detection in Mexican Spanish.</abstract>
       <url hash="772ead3b">2025.comedi-1.15</url>
diff --git a/data/xml/2025.computel.xml b/data/xml/2025.computel.xml
index b3d2484eac..12631fc10a 100644
--- a/data/xml/2025.computel.xml
+++ b/data/xml/2025.computel.xml
@@ -6,7 +6,7 @@
       <editor><first>Jordan</first><last>Lachler</last></editor>
       <editor><first>Godfred</first><last>Agyapong</last></editor>
       <editor><first>Antti</first><last>Arppe</last></editor>
-      <editor><first>Sarah</first><last>Moeller</last></editor>
+      <editor id="sarah-moeller"><first>Sarah</first><last>Moeller</last></editor>
       <editor><first>Aditi</first><last>Chaudhary</last></editor>
       <editor><first>Shruti</first><last>Rijhwani</last></editor>
       <editor><first>Daisy</first><last>Rosenblum</last></editor>
@@ -25,7 +25,7 @@
     <paper id="1">
       <title>Formalizing the Morphology of Rromani Adjectives</title>
       <author><first>Masako</first><last>Watabe</last></author>
-      <author><first>Max</first><last>Silberztein</last></author>
+      <author id="max-silberztein"><first>Max</first><last>Silberztein</last></author>
       <pages>1-10</pages>
       <abstract>This paper presents a set of linguistic resources that formalizes the morphological behavior of simple Rromani adjectives. We describe the formalization of the adjectives’ morphology and the implementation with the NooJ linguistic platform of an electronic dictionary associated with a formal morpho-syntactic grammar. We can then apply this set of resources to a corpus to evaluate the resources and automatically annotate adjectival forms in Rromani texts. The final set of resources can then be used to identify each Rromani dialectal variant and can be used as a pedagogical tool to teach Rromani as a second language.</abstract>
       <url hash="f15c0d48">2025.computel-main.1</url>
@@ -34,7 +34,7 @@
     <paper id="2">
       <title>Bilingual Sentence Mining for Low-Resource Languages: a Case Study on Upper and <fixed-case>L</fixed-case>ower <fixed-case>S</fixed-case>orbian</title>
       <author><first>Shu</first><last>Okabe</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>11-19</pages>
       <abstract>Parallel sentence mining is crucial for down- stream tasks such as Machine Translation, especially for low-resource languages, where such resources are scarce. In this context, we apply a pipeline approach with contextual embeddings on two endangered Slavic languages spoken in Germany, Upper and Lower Sorbian, to evaluate mining quality. To this end, we compare off-the-shelf multilingual language models and word encoders pre-trained on Upper Sorbian to understand their impact on sentence mining. Moreover, to filter out irrelevant pairs, we experiment with a post-processing of mined sentences through an unsupervised word aligner based on word embeddings. We observe the usefulness of additional pre-training in Upper Sorbian, which leads to direct improvements when mining the same language but also its related language, Lower Sorbian.</abstract>
       <url hash="ecd80365">2025.computel-main.2</url>
@@ -108,7 +108,7 @@
     <paper id="9">
       <title>Speech Technologies Datasets for <fixed-case>A</fixed-case>frican Under-Served Languages</title>
       <author><first>Emmanuel</first><last>Ngue Um</last></author>
-      <author><first>Francis</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last></author>
       <author><first>Eliette-Caroline Emilie</first><last>Ngo Tjomb</last></author>
       <author><first>Florus Landry</first><last>Dibengue</last></author>
       <author><first>Blaise-Mathieu</first><last>Banoum Manguele</last></author>
@@ -131,7 +131,7 @@
       <author><first>Joseph</first><last>Lukner</last></author>
       <author><first>Finn</first><last>Verdonk</last></author>
       <author><first>Willem</first><last>de Reuse</last></author>
-      <author><first>Jonathan</first><last>Washington</last></author>
+      <author id="jonathan-washington"><first>Jonathan</first><last>Washington</last></author>
       <pages>91-99</pages>
       <abstract>This paper presents work towards a morphological transducer for Hän, a Dene language spoken in Alaska and the Yukon Territory. We present the implementation of several complex morphological features of Dene languages into a morphological transducer, an evaluation of the transducer on corpus data, and a discussion of the future uses of such a transducer towards Hän revitalization efforts.</abstract>
       <url hash="637bc009">2025.computel-main.10</url>
diff --git a/data/xml/2025.conll.xml b/data/xml/2025.conll.xml
index 20e0b078e0..2d7840de6d 100644
--- a/data/xml/2025.conll.xml
+++ b/data/xml/2025.conll.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2025-07-20" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 29th Conference on Computational Natural Language Learning</booktitle>
-      <editor><first>Gemma</first><last>Boleda</last></editor>
+      <editor id="gemma-boleda"><first>Gemma</first><last>Boleda</last></editor>
       <editor><first>Michael</first><last>Roth</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Vienna, Austria</address>
@@ -40,7 +40,7 @@
       <title>Quasi-symbolic Semantic Geometry over Transformer-based Variational <fixed-case>A</fixed-case>uto<fixed-case>E</fixed-case>ncoder</title>
       <author><first>Yingji</first><last>Zhang</last></author>
       <author><first>Danilo</first><last>Carvalho</last><affiliation>University of Manchester</affiliation></author>
-      <author><first>Andre</first><last>Freitas</last><affiliation>Idiap Research Institute and University of Manchester</affiliation></author>
+      <author id="andre-freitas"><first>Andre</first><last>Freitas</last><affiliation>Idiap Research Institute and University of Manchester</affiliation></author>
       <pages>12-29</pages>
       <abstract>Formal/symbolic semantics can provide canonical, rigid controllability and interpretability to sentence representations due to their <i>localisation</i> or <i>composition</i> property. How can we deliver such property to the current distributional sentence representations to better control and interpret the generation of language models (LMs)? In this work, we theoretically frame the sentence semantics as the composition of <i>semantic role - word content</i> features and propose the formal semantic geometrical framework. To inject such geometry into Transformer-based LMs (i.e. GPT2), we deploy a supervised Transformer-based Variational AutoEncoder, where the sentence generation can be manipulated and explained over low-dimensional latent Gaussian space. In addition, we propose a new probing algorithm to guide the movement of sentence vectors over such geometry. Experimental results reveal that the formal semantic geometry can potentially deliver better control and interpretation to sentence generation.</abstract>
       <url hash="11cb23e1">2025.conll-1.2</url>
@@ -51,7 +51,7 @@
       <title><fixed-case>L</fixed-case>aw<fixed-case>T</fixed-case>oken: a single token worth more than its constituents</title>
       <author><first>Yu-Hsiang</first><last>Tseng</last><affiliation>Eberhard-Karls-Universität Tübingen</affiliation></author>
       <author><first>Hsin-Yu</first><last>Chou</last><affiliation>deepq.com</affiliation></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last><affiliation>National Taiwan University</affiliation></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last><affiliation>National Taiwan University</affiliation></author>
       <pages>30-46</pages>
       <abstract>Legal citations require correctly recalling the law references of complex law article names and article numbering, which large language models typically treat as multi-token sequences. Motivated by the form-meaning pair of constructionist approaches, we explore treating these multi-token law references as a single holistic law token and examining the implications for legal citation accuracy and differences in model interpretability. We train and compare two types of models: LawToken models, which encode the legal citations as a single law token, and LawBase models, which treat them as multi-token compounds. The results show that LawToken models outperform LawBase models on legal citation tasks, primarily due to fewer errors in the article numbering components. Further model representation analysis reveals that, while both models achieve comparable semantic representation quality, the multi-token-based LawBase suffers from degraded representations in multistep decoding, leading to more errors. Taken together, these findings suggest that form-meaning pairing can operate in a larger context, and this larger unit may offer advantages in future modeling of legal reasoning. In practice, this approach can significantly reduce the likelihood of hallucinations by anchoring legal citations as discrete, holistic tokens, thereby minimizing the risk of generating nonexistent or incorrect legal references.</abstract>
       <url hash="7bf566eb">2025.conll-1.3</url>
@@ -79,7 +79,7 @@
       <author><first>Ine</first><last>Gevers</last></author>
       <author><first>Victor</first><last>De Marez</last><affiliation>Universiteit Antwerpen</affiliation></author>
       <author><first>Luna</first><last>De Bruyne</last><affiliation>Universiteit Antwerpen</affiliation></author>
-      <author><first>Walter</first><last>Daelemans</last><affiliation>University of Antwerp</affiliation></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last><affiliation>University of Antwerp</affiliation></author>
       <pages>68-80</pages>
       <abstract>In this study, we take a closer look at how Winograd schema challenges can be used to evaluate common sense reasoning in LLMs. Specifically, we evaluate generative models of different sizes on the popular WinoGrande benchmark. We release WinoWhat, a new corpus, in which each instance of the WinoGrande validation set is paraphrased. Additionally, we evaluate the performance on the challenge across five common sense knowledge categories, giving more fine-grained insights on what types of knowledge are more challenging for LLMs. Surprisingly, all models perform significantly worse on WinoWhat, implying that LLM reasoning capabilities are overestimated on WinoGrande. To verify whether this is an effect of benchmark memorization, we match benchmark instances to LLM trainingdata and create two test-suites. We observe that memorization has a minimal effect on model performance on WinoGrande.</abstract>
       <url hash="7bc0af2a">2025.conll-1.5</url>
@@ -94,7 +94,7 @@
       <author><first>Thi-Nhung</first><last>Nguyen</last><affiliation>VinAI Research</affiliation></author>
       <author><first>Hoang</first><last>Ngo</last><affiliation>VinAI Research</affiliation></author>
       <author><first>Dinh</first><last>Phung</last><affiliation>Monash University</affiliation></author>
-      <author><first>Thuy-Trang</first><last>Vu</last><affiliation>Monash University</affiliation></author>
+      <author id="thuy-vu"><first>Thuy-Trang</first><last>Vu</last><affiliation>Monash University</affiliation></author>
       <author><first>Dat Quoc</first><last>Nguyen</last><affiliation>Qualcomm AI Research</affiliation></author>
       <pages>81-92</pages>
       <abstract>Table understanding is key to addressing challenging downstream tasks such as table-based question answering and fact verification. Recent works have focused on leveraging Chain-of-Thought and question decomposition to solve complex questions requiring multiple operations on tables. However, these methods often suffer from a lack of explicit long-term planning and weak inter-step connections, leading to miss constraints within questions. In this paper, we propose leveraging the long-term planning capabilities of large language models (LLMs) to enhance table understanding. Our approach enables the execution of a long-term plan, where the steps are tightly interconnected and serve the ultimate goal, an aspect that methods based on Chain-of-Thought and question decomposition lack. In addition, our method effectively minimizes the inclusion of unnecessary details in the process of solving the next short-term goals, a limitation of methods based on Chain-of-Thought. Extensive experiments demonstrate that our method outperforms strong baselines and achieves state-of-the-art performance on WikiTableQuestions and TabFact datasets.</abstract>
@@ -139,7 +139,7 @@
     <paper id="10">
       <title>Experiential Semantic Information and Brain Alignment: Are Multimodal Models Better than Language Models?</title>
       <author><first>Anna</first><last>Bavaresco</last><affiliation>University of Amsterdam</affiliation></author>
-      <author><first>Raquel</first><last>Fernández</last><affiliation>University of Amsterdam and University of Amsterdam</affiliation></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last><affiliation>University of Amsterdam and University of Amsterdam</affiliation></author>
       <pages>141-155</pages>
       <abstract>A common assumption in Computational Linguistics is that text representations learnt by multimodal models are richer and more human-like than those by language-only models, as they are grounded in images or audio—similar to how human language is grounded in real-world experiences. However, empirical studies checking whether this is true are largely lacking. We address this gap by comparing word representations from contrastive multimodal models vs. language-only ones in the extent to which they capture experiential information—as defined by an existing norm-based ‘experiential model’—and align with human fMRI responses. Our results indicate that, surprisingly, language-only models are superior to multimodal ones in both respects. Additionally, they learn more unique brain-relevant semantic information beyond that shared with the experiential model. Overall, our study highlights the need to develop computational models that better integrate the complementary semantic information provided by multimodal data sources.</abstract>
       <url hash="6958ec00">2025.conll-1.10</url>
@@ -161,7 +161,7 @@
       <title>Do Construction Distributions Shape Formal Language Learning In <fixed-case>G</fixed-case>erman <fixed-case>B</fixed-case>aby<fixed-case>LM</fixed-case>s?</title>
       <author><first>Bastian</first><last>Bunzeck</last><affiliation>Universität Bielefeld</affiliation></author>
       <author><first>Daniel</first><last>Duran</last><affiliation>Universität Bielefeld</affiliation></author>
-      <author><first>Sina</first><last>Zarrieß</last><affiliation>Bielefeld University</affiliation></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last><affiliation>Bielefeld University</affiliation></author>
       <pages>169-186</pages>
       <abstract>We analyze the influence of utterance-level construction distributions in German child-directed/child-available speech on the resulting word-level, syntactic and semantic competence (and their underlying learning trajectories) in small LMs, which we train on a novel collection of developmentally plausible language data for German. We find that trajectories are surprisingly robust for markedly different distributions of constructions in the training data, which have little effect on final accuracies and almost no effect on global learning trajectories. While syntax learning benefits from more complex utterances, word-level learning culminates in better scores with more fragmentary utterances. We argue that LMs trained on developmentally plausible data can contribute to debates on how conducive different kinds of linguistic stimuli are to language learning.</abstract>
       <url hash="70f5a4b6">2025.conll-1.12</url>
@@ -192,8 +192,8 @@
     </paper>
     <paper id="15">
       <title>Components of Creativity: Language Model-based Predictors for Clustering and Switching in Verbal Fluency</title>
-      <author><first>Sina</first><last>Zarrieß</last><affiliation>Bielefeld University</affiliation></author>
-      <author><first>Simeon</first><last>Junker</last><affiliation>Universität Bielefeld</affiliation></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last><affiliation>Bielefeld University</affiliation></author>
+      <author id="simeon-junker"><first>Simeon</first><last>Junker</last><affiliation>Universität Bielefeld</affiliation></author>
       <author><first>Judith</first><last>Sieker</last><affiliation>Universität Bielefeld</affiliation></author>
       <author><first>Özge</first><last>Alacam</last><affiliation>Bielefeld University</affiliation></author>
       <pages>216-232</pages>
@@ -252,7 +252,7 @@
       <title>Polarity inversion operators in <fixed-case>PLM</fixed-case></title>
       <author><first>David</first><last>Kletz</last></author>
       <author><first>Pascal</first><last>Amsili</last><affiliation>Sorbonne Nouvelle (Paris 3)</affiliation></author>
-      <author><first>Marie</first><last>Candito</last><affiliation>Université Paris Cité</affiliation></author>
+      <author id="marie-candito"><first>Marie</first><last>Candito</last><affiliation>Université Paris Cité</affiliation></author>
       <pages>312-322</pages>
       <abstract>From a linguistic perspective, negation is a unique and inherently compositional operator. In this study, we investigate whether the bert-large-cased Pretrained Language Model (PLM) properly encodes this compositional aspect of negation when embedding a token that falls within the scope of negation.To explore this, we train two external Multi-Layer Perceptrons to modify contextual embeddings in a controlled manner. The goal is to reverse the polarity information encoded in the embedding while preserving all other token-related information. The first MLP, called the Negator, transforms a negative polarity into a positive one, while the second, the Affirmator, performs the reverse transformation.We then conduct a series of evaluations to assess the effectiveness of these operators. Our results indicate that while the Negator/Affirmator is functional, it only partially simulates the negation operator. Specifically, applying it recursively does not allow us to recover the original polarity, suggesting an incomplete representation of negation within the PLM’s embeddings.In addition, a downstream evaluation on the Negated LAMA dataset reveals that the modifications introduced by the Negator/Affirmator lead to a slight improvement in the model’s ability to account for negation in its predictions. However, applying the Negator/Affirmator recursively results in degraded representations, further reinforcing the idea that negation is not fully compositional within PLM embeddings.</abstract>
       <url hash="050b8ee5">2025.conll-1.20</url>
@@ -265,7 +265,7 @@
       <author><first>Kenneth</first><last>Lai</last><affiliation>Brandeis University and Mass General Brigham</affiliation></author>
       <author><first>Abhijnan</first><last>Nath</last></author>
       <author><first>Nikhil</first><last>Krishnaswamy</last><affiliation>Colorado State University</affiliation></author>
-      <author><first>James</first><last>Pustejovsky</last><affiliation>Brandeis University</affiliation></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last><affiliation>Brandeis University</affiliation></author>
       <pages>323-333</pages>
       <abstract>Recent developments in aligning Large Language Models (LLMs) with human preferences have significantly enhanced their utility in human-AI collaborative scenarios. However, such approaches often neglect the critical role of “epistemic friction,” or the inherent resistance encountered when updating beliefs in response to new, conflicting, or ambiguous information. In this paper, we define *dynamic epistemic friction* as the resistance to epistemic integration, characterized by the misalignment between an agent’s current belief state and new propositions supported by external evidence. We position this within the framework of Dynamic Epistemic Logic, where friction emerges as nontrivial belief-revision during the interaction. We then present analyses from a situated collaborative task that demonstrate how this model of epistemic friction can effectively predict belief updates in dialogues, and we subsequently discuss how the model of belief alignment as a measure of epistemic resistance or friction can naturally be made more sophisticated to accommodate the complexities of real-world dialogue scenarios.</abstract>
       <url hash="575b0f3c">2025.conll-1.21</url>
@@ -366,7 +366,7 @@
       <title>Lost in Variation? Evaluating <fixed-case>NLI</fixed-case> Performance in <fixed-case>B</fixed-case>asque and <fixed-case>S</fixed-case>panish Geographical Variants</title>
       <author><first>Jaione</first><last>Bengoetxea</last><affiliation>Universidad del País Vasco</affiliation></author>
       <author><first>Itziar</first><last>Gonzalez-Dios</last><affiliation>Universidad del País Vasco</affiliation></author>
-      <author><first>Rodrigo</first><last>Agerri</last><affiliation>University of the Basque Country</affiliation></author>
+      <author id="rodrigo-agerri"><first>Rodrigo</first><last>Agerri</last><affiliation>University of the Basque Country</affiliation></author>
       <pages>452-468</pages>
       <abstract>In this paper, we evaluate the capacity of current language technologies to understand Basque and Spanish language varieties. We use Natural Language Inference (NLI) as a pivot task and introduce a novel, manually-curated parallel dataset in Basque and Spanish, along with their respective variants. Our empirical analysis of crosslingual and in-context learning experiments using encoder-only and decoder-based Large Language Models (LLMs) shows a performance drop when handling linguistic variation, especially in Basque. Error analysis suggests that this decline is not due to lexical overlap, but rather to the linguistic variation itself. Further ablation experiments indicate that encoder-only models particularly struggle with Western Basque, which aligns with linguistic theory that identifies peripheral dialects (e.g., Western) as more distant from the standard. All data and code are publicly available.</abstract>
       <url hash="7fe80ce8">2025.conll-1.30</url>
@@ -423,7 +423,7 @@
       <title><fixed-case>GCG</fixed-case>-Based Artificial Languages for Evaluating Inductive Biases of Neural Language Models</title>
       <author><first>Nadine</first><last>El-Naggar</last></author>
       <author><first>Tatsuki</first><last>Kuribayashi</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
-      <author><first>Ted</first><last>Briscoe</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <pages>540-556</pages>
       <abstract>Recent work has investigated whether extant neural language models (LMs) have an inbuilt inductive bias towards the acquisition of attested typologically-frequent grammatical patterns as opposed to infrequent, unattested, or impossible patterns using artificial languages (White and Cotterell, 2021; Kuribayashi et al., 2024). The use of artificial languages facilitates isolation of specific grammatical properties from other factors such as lexical or real-world knowledge, but also risks oversimplification of the problem.In this paper, we examine the use of Generalized Categorial Grammars (GCGs) (Wood, 2014) as a general framework to create artificial languages with a wider range of attested word order patterns, including those where the subject intervenes between verb and object (VSO, OSV) and unbounded dependencies in object relative clauses. In our experiments, we exemplify our approach by extending White and Cotterell (2021) and report some significant differences from existing results.</abstract>
       <url hash="3ba4d0ec">2025.conll-1.35</url>
diff --git a/data/xml/2025.depling.xml b/data/xml/2025.depling.xml
index 19bd4e81f6..929bb8cc4d 100644
--- a/data/xml/2025.depling.xml
+++ b/data/xml/2025.depling.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2025-07-27" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Eighth International Conference on Dependency Linguistics (Depling, SyntaxFest 2025)</booktitle>
-      <editor><first>Eva</first><last>Hajičová</last></editor>
+      <editor id="eva-hajicova"><first>Eva</first><last>Hajičová</last></editor>
       <editor><first>Sylvain</first><last>Kahane</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Ljubljana, Slovenia</address>
@@ -42,7 +42,7 @@
       <author><first>Loic</first><last>De Langhe</last><affiliation>Universiteit Gent</affiliation></author>
       <author><first>Jasper</first><last>Degraeuwe</last><affiliation>Universiteit Gent</affiliation></author>
       <author><first>Melissa</first><last>Farasyn</last><affiliation>Universiteit Gent</affiliation></author>
-      <author><first>Veronique</first><last>Hoste</last><affiliation>Universiteit Gent</affiliation></author>
+      <author id="veronique-hoste"><first>Veronique</first><last>Hoste</last><affiliation>Universiteit Gent</affiliation></author>
       <pages>24-35</pages>
       <abstract>Dependency parsing of non-normative language varieties remains a challenge for modern NLP. While contemporary parsers excel at standardized languages, dialectal variation – especially in function words, conjunctives, and verb clustering – introduces syntactic ambiguity that disrupts traditional parsing approaches. In this paper, we conduct a quantitative evaluation of syntactic dependencies in Southern Dutch dialects, leveraging a standardized dialect corpus to isolate syntactic effects from lexical variation. Using a neural biaffine dependency parser with various mono- and multilingual transformer-based encoders, we benchmark parsing performance on standard Dutch, dialectal data, and mixed training sets. Our results demonstrate that incorporating dialect-specific data significantly enhances parsing accuracy, yet certain syntactic structures remain difficult to resolve, even with dedicated adaptation. These findings highlight the need for more nuanced parsing strategies and improved syntactic modeling for non-normative language varieties.</abstract>
       <url hash="508df547">2025.depling-1.3</url>
@@ -141,7 +141,7 @@
     <paper id="15">
       <title>Periphrastic Verb Forms in <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies</title>
       <author><first>Lenka</first><last>Krippnerová</last><affiliation>Faculty of Mathematics and Physics, Charles University Prague</affiliation></author>
-      <author><first>Daniel</first><last>Zeman</last><affiliation>Faculty of Mathematics and Physics, Charles University Prague</affiliation></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last><affiliation>Faculty of Mathematics and Physics, Charles University Prague</affiliation></author>
       <pages>140-149</pages>
       <abstract>We propose a generalization of the morphological annotation in Universal Dependencies (UD) to phrases spanning multiple words, possibly discontinuous. Our focus area is that of periphrastic tenses, voices and other forms, typically consisting of a non-finite content verb combined with one or more auxiliaries; however, the same approach can be applied to other morphosyntactic constructions. We present a software tool that can detect periphrastic verb forms, extract the relevant morphological features from member words and combine them into new, phrase-level annotation. The tool currently detects periphrastic verb forms in 15 Slavic languages that are represented in UD and it is easily adaptable to other constructions and languages. Both the tool and the processed Slavic data are freely available.</abstract>
       <url hash="7de20104">2025.depling-1.15</url>
diff --git a/data/xml/2025.dmr.xml b/data/xml/2025.dmr.xml
index f0b7c49847..3783779edf 100644
--- a/data/xml/2025.dmr.xml
+++ b/data/xml/2025.dmr.xml
@@ -20,8 +20,8 @@
     <paper id="1">
       <title>Comparing Manual and Automatic <fixed-case>UMR</fixed-case>s for <fixed-case>C</fixed-case>zech and <fixed-case>L</fixed-case>atin</title>
       <author><first>Jan</first><last>Štěpánek</last></author>
-      <author><first>Daniel</first><last>Zeman</last></author>
-      <author><first>Markéta</first><last>Lopatková</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
+      <author id="marketa-lopatkova"><first>Markéta</first><last>Lopatková</last></author>
       <author><first>Federica</first><last>Gamba</last></author>
       <author><first>Hana</first><last>Hledíková</last></author>
       <pages>1–12</pages>
@@ -40,7 +40,7 @@
     <paper id="3">
       <title>Boosting a Semantic Parser Using Treebank Trees Automatically Annotated with Unscoped Logical Forms</title>
       <author><first>Miles</first><last>Frank</last></author>
-      <author><first>Lenhart</first><last>Schubert</last></author>
+      <author id="lenhart-schubert"><first>Lenhart</first><last>Schubert</last></author>
       <pages>19–29</pages>
       <url hash="8337b554">2025.dmr-1.3</url>
       <bibkey>frank-schubert-2025-boosting</bibkey>
@@ -66,9 +66,9 @@
     <paper id="6">
       <title>Representing <fixed-case>ISO</fixed-case>-Annotated Dynamic Information in <fixed-case>UMR</fixed-case></title>
       <author><first>Kiyong</first><last>Lee</last></author>
-      <author><first>Harry</first><last>Bunt</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
-      <author><first>Alex C.</first><last>Fang</last></author>
+      <author id="harry-bunt"><first>Harry</first><last>Bunt</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
+      <author id="alex-chengyu-fang"><first>Alex C.</first><last>Fang</last></author>
       <author><first>Chongwon</first><last>Park</last></author>
       <pages>49–58</pages>
       <url hash="beefefa8">2025.dmr-1.6</url>
diff --git a/data/xml/2025.dravidianlangtech.xml b/data/xml/2025.dravidianlangtech.xml
index ed117c39e2..1861d53d26 100644
--- a/data/xml/2025.dravidianlangtech.xml
+++ b/data/xml/2025.dravidianlangtech.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the Fifth Workshop on Speech, Vision, and Language Technologies for Dravidian Languages</booktitle>
       <editor><first>Bharathi Raja</first><last>Chakravarthi</last></editor>
       <editor><first>Ruba</first><last>Priyadharshini</last></editor>
-      <editor><first>Anand Kumar</first><last>Madasamy</last></editor>
+      <editor id="anand-kumar-m"><first>Anand Kumar</first><last>Madasamy</last></editor>
       <editor><first>Sajeetha</first><last>Thavareesan</last></editor>
       <editor><first>Elizabeth</first><last>Sherly</last></editor>
       <editor><first>Saranya</first><last>Rajiakodi</last></editor>
@@ -274,7 +274,7 @@
       <author><first>Olga</first><last>Kolesnikova</last><affiliation>Instituto Politécnico Nacional</affiliation></author>
       <author><first>José Luis</first><last>Oropeza</last></author>
       <author><first>Grigori</first><last>Sidorov</last><affiliation>Instituto Politécnico Nacional</affiliation></author>
-      <author><first>Alexander</first><last>Gelbukh</last><affiliation>Instituto Politécnico Nacional</affiliation></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last><affiliation>Instituto Politécnico Nacional</affiliation></author>
       <pages>133-138</pages>
       <abstract>The increasing prevalence of AI-generated content, including fake product reviews, poses significant challenges in maintaining authenticity and trust in e-commerce systems. While much work has focused on detecting such reviews in high-resource languages, limited attention has been given to low-resource languages like Malayalam and Tamil. This study aims to address this gap by developing a robust framework to identify AI-generated product reviews in these languages. We explore a BERT-based approach for this task. Our methodology involves fine-tuning a BERT-based model specifically on Malayalam and Tamil datasets. The experiments are conducted using labeled datasets that contain a mix of human-written and AI-generated reviews. Performance is evaluated using the macro F1 score. The results show that the BERT-based model achieved a macro F1 score of 0.6394 for Tamil and 0.8849 for Malayalam. Preliminary results indicate that the BERT-based model performs significantly better for Malayalam than for Tamil in terms of the average Macro F1 score, leveraging its ability to capture the complex linguistic features of these languages. Finally, we open the source code of the implementation in the GitHub repository: AI-Generated-Product-Review-Code</abstract>
       <url hash="26d3df77">2025.dravidianlangtech-1.22</url>
@@ -731,7 +731,7 @@
     <paper id="59">
       <title><fixed-case>SSNT</fixed-case>rio@<fixed-case>D</fixed-case>ravidian<fixed-case>L</fixed-case>ang<fixed-case>T</fixed-case>ech 2025: Identification of <fixed-case>AI</fixed-case> Generated Content in <fixed-case>D</fixed-case>ravidian Languages using Transformers</title>
       <author><first>J</first><last>Bhuvana</last></author>
-      <author><first>Mirnalinee</first><last>T T</last></author>
+      <author id="t-t-mirnalinee"><first>Mirnalinee</first><last>T T</last></author>
       <author><first>Rohan</first><last>R</last></author>
       <author><first>Diya</first><last>Seshan</last></author>
       <author><first>Avaneesh</first><last>Koushik</last></author>
@@ -744,7 +744,7 @@
     <paper id="60">
       <title><fixed-case>SSNT</fixed-case>rio@<fixed-case>D</fixed-case>ravidian<fixed-case>L</fixed-case>ang<fixed-case>T</fixed-case>ech 2025: Sentiment Analysis in <fixed-case>D</fixed-case>ravidian Languages using Multilingual <fixed-case>BERT</fixed-case></title>
       <author><first>J</first><last>Bhuvana</last></author>
-      <author><first>Mirnalinee</first><last>T T</last></author>
+      <author id="t-t-mirnalinee"><first>Mirnalinee</first><last>T T</last></author>
       <author><first>Diya</first><last>Seshan</last></author>
       <author><first>Rohan</first><last>R</last></author>
       <author><first>Avaneesh</first><last>Koushik</last></author>
@@ -909,7 +909,7 @@
     </paper>
     <paper id="74">
       <title><fixed-case>SSNT</fixed-case>rio@<fixed-case>D</fixed-case>ravidian<fixed-case>L</fixed-case>ang<fixed-case>T</fixed-case>ech2025: <fixed-case>LLM</fixed-case> Based Techniques for Detection of Abusive Text Targeting Women</title>
-      <author><first>Mirnalinee</first><last>T T</last></author>
+      <author id="t-t-mirnalinee"><first>Mirnalinee</first><last>T T</last></author>
       <author><first>J</first><last>Bhuvana</last></author>
       <author><first>Avaneesh</first><last>Koushik</last></author>
       <author><first>Diya</first><last>Seshan</last></author>
@@ -985,7 +985,7 @@
     <paper id="80">
       <title><fixed-case>SSNT</fixed-case>rio @ <fixed-case>D</fixed-case>ravidian<fixed-case>L</fixed-case>ang<fixed-case>T</fixed-case>ech 2025: Hybrid Approach for Hate Speech Detection in <fixed-case>D</fixed-case>ravidian Languages with Text and Audio Modalities</title>
       <author><first>J</first><last>Bhuvana</last></author>
-      <author><first>Mirnalinee</first><last>T T</last></author>
+      <author id="t-t-mirnalinee"><first>Mirnalinee</first><last>T T</last></author>
       <author><first>Rohan</first><last>R</last></author>
       <author><first>Diya</first><last>Seshan</last></author>
       <author><first>Avaneesh</first><last>Koushik</last></author>
diff --git a/data/xml/2025.evalmg.xml b/data/xml/2025.evalmg.xml
index 43c7635b42..8902cb34cc 100644
--- a/data/xml/2025.evalmg.xml
+++ b/data/xml/2025.evalmg.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the First Workshop of Evaluation of Multi-Modal Generation</booktitle>
       <editor><first>Wei Emma</first><last>Zhang</last></editor>
-      <editor><first>Xiang</first><last>Dai</last></editor>
+      <editor id="xiang-dai"><first>Xiang</first><last>Dai</last></editor>
       <editor><first>Desmond</first><last>Elliot</last></editor>
       <editor><first>Byron</first><last>Fang</last></editor>
       <editor><first>Mongyuan</first><last>Sim</last></editor>
diff --git a/data/xml/2025.fever.xml b/data/xml/2025.fever.xml
index 39514c0c0e..12d808f0c1 100644
--- a/data/xml/2025.fever.xml
+++ b/data/xml/2025.fever.xml
@@ -9,7 +9,7 @@
       <editor><first>Oana</first><last>Cocarascu</last></editor>
       <editor><first>Zhijiang</first><last>Guo</last></editor>
       <editor><first>Arpit</first><last>Mittal</last></editor>
-      <editor><first>Michael</first><last>Schlichtkrull</last></editor>
+      <editor id="michael-schlichtkrull"><first>Michael</first><last>Schlichtkrull</last></editor>
       <editor><first>James</first><last>Thorne</last></editor>
       <editor><first>Andreas</first><last>Vlachos</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -137,8 +137,8 @@
       <author><first>Pengfei</first><last>Yu</last><affiliation>Amazon</affiliation></author>
       <author><first>Chi</first><last>Han</last></author>
       <author><first>Yi R.</first><last>Fung</last><affiliation>Hong Kong University of Science and Technology</affiliation></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
-      <author><first>ChengXiang</first><last>Zhai</last><affiliation>University of Illinois, Urbana Champaign</affiliation></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
+      <author id="chengxiang-zhai"><first>ChengXiang</first><last>Zhai</last><affiliation>University of Illinois, Urbana Champaign</affiliation></author>
       <author><first>Manling</first><last>Li</last><affiliation>Northwestern University</affiliation></author>
       <author><first>Heng</first><last>Ji</last><affiliation>University of Illinois, Urbana-Champaign</affiliation></author>
       <pages>132-150</pages>
@@ -243,7 +243,7 @@
       <author><first>Asim</first><last>Abbas</last></author>
       <author><first>Mubashir</first><last>Ali</last><affiliation>University of Birmingham</affiliation></author>
       <author><first>Yue</first><last>Feng</last><affiliation>University of Birmingham</affiliation></author>
-      <author><first>Mark G.</first><last>Lee</last></author>
+      <author id="mark-lee"><first>Mark G.</first><last>Lee</last></author>
       <author><first>Venelin</first><last>Kovatchev</last><affiliation>University of Birmingham</affiliation></author>
       <pages>238-246</pages>
       <abstract>In this paper, we present the system proposed by our team OldJoe, for the 8th edition of the AVeriTeC shared task, as part of the FEVER workshop. The objective of this task is to verify the factuality of real-world claims. Our approach integrates open source large language models, SQL, and in-context learning. We begin with embedding the knowledge store using a pretrained embedding language model then storing the outputs in a SQL database. Subsequently, we prompt an LLM to craft relevant questions based on the input claim, which are then used to guide the retrieval process. We further prompt the LLM to generate answers to the questions and predict the veracity of the original claim. Our system scored 0.49 on the HU-METEOR AVeriTeC score on the dev set and 0.15 on the Ev2R recall on the test set. Due to the time constraint we were unable to conduct additional experiments or further hyperparameter tuning. As a result, we adopted this pipeline configuration centered on the Qwen3-14B-AWQ model as our final submission strategy. The full pipeline is available on GitHub: https://github.com/farahft/OldJoe</abstract>
diff --git a/data/xml/2025.fieldmatters.xml b/data/xml/2025.fieldmatters.xml
index 9fefcf2b0f..8e06629fc6 100644
--- a/data/xml/2025.fieldmatters.xml
+++ b/data/xml/2025.fieldmatters.xml
@@ -27,7 +27,7 @@
       <title>Automatic Phone Alignment of Code-switched <fixed-case>U</fixed-case>rum–<fixed-case>R</fixed-case>ussian Field Data</title>
       <author><first>Emily</first><last>Ahn</last></author>
       <author><first>Eleanor</first><last>Chodroff</last><affiliation>University of Zurich</affiliation></author>
-      <author><first>Gina-Anne</first><last>Levow</last><affiliation>University of Washington</affiliation></author>
+      <author id="gina-anne-levow"><first>Gina-Anne</first><last>Levow</last><affiliation>University of Washington</affiliation></author>
       <pages>1-14</pages>
       <abstract>Code-switching, using multiple languages in a single utterance, is a common means of communication.In the language documentation process, speakers may code-switch between the target language and a language of broader communication; however, how to handle this mixed speech data is not always clearly addressed for speech research and specifically for a corpus phonetics pipeline.This paper investigates best practices for conducting phone-level forced alignment of code-switched field data using the Urum speech dataset from DoReCo. This dataset comprises 117 minutes of narrative utterances, of which 42% contain code-switched Urum–Russian speech.We demonstrate that the inclusion of Russian speech and Russian pretrained acoustic models can aid the alignment of Urum phones.Beyond using boundary alignment precision and accuracy metrics, we also discovered that the method of acoustic modeling impacted a downstream corpus phonetics investigation of code-switched Urum–Russian.</abstract>
       <url hash="1dcd4a49">2025.fieldmatters-1.1</url>
@@ -38,7 +38,7 @@
       <author><first>Maria</first><last>Khelli</last></author>
       <author><first>Samuel</first><last>Cahyawijaya</last><affiliation>Cohere</affiliation></author>
       <author><first>Ayu</first><last>Purwarianti</last><affiliation>Institut Teknologi Bandung</affiliation></author>
-      <author><first>Genta Indra</first><last>Winata</last><affiliation>Capital One</affiliation></author>
+      <author id="genta-indra-winata"><first>Genta Indra</first><last>Winata</last><affiliation>Capital One</affiliation></author>
       <pages>15-25</pages>
       <abstract>Cross-lingual transfer in natural language processing (NLP) models enhances multilingual performance by leveraging shared linguistic knowledge. However, traditional methods that process all data simultaneously often fail to mimic real-world scenarios, leading to challenges like catastrophic forgetting, where fine-tuning on new tasks degrades performance on previously learned ones. Our study explores this issue in multilingual contexts, focusing on linguistic differences affecting representational learning rather than just model parameters. We experiment with 52 languages using LoRA adapters of varying ranks to evaluate non-shared, partially shared, and fully shared parameters. Our aim is to see if parameter sharing through adapters can mitigate forgetting while preserving prior knowledge. We find that languages using non-Latin scripts are more susceptible to catastrophic forgetting, whereas those written in Latin script facilitate more effective cross-lingual transfer.</abstract>
       <url hash="25e8e394">2025.fieldmatters-1.2</url>
@@ -47,7 +47,7 @@
     <paper id="3">
       <title>Breaking the Transcription Bottleneck: Fine-tuning <fixed-case>ASR</fixed-case> Models for Extremely Low-Resource Fieldwork Languages</title>
       <author><first>Siyu</first><last>Liang</last></author>
-      <author><first>Gina-Anne</first><last>Levow</last><affiliation>University of Washington</affiliation></author>
+      <author id="gina-anne-levow"><first>Gina-Anne</first><last>Levow</last><affiliation>University of Washington</affiliation></author>
       <pages>26-37</pages>
       <abstract>The development of Automatic Speech Recognition (ASR) has yielded impressive results, but its use in linguistic fieldwork remains limited. Recordings collected in fieldwork contexts present unique challenges, including spontaneous speech, environmental noise, and severely constrained datasets from under-documented languages. In this paper, we benchmark the performance of two fine-tuned multilingual ASR models, MMS and XLS-R, on five typologically diverse low-resource languages with control of training data duration. Our findings show that MMS is best suited when extremely small amounts of training data are available, whereas XLS-R shows parity performance once training data exceed one hour. We provide linguistically grounded analysis for further provide insights towards practical guidelines for field linguists, highlighting reproducible ASR adaptation approaches to mitigate the transcription bottleneck in language documentation.</abstract>
       <url hash="b9881306">2025.fieldmatters-1.3</url>
@@ -79,7 +79,7 @@
       <title>A Practical Tool to Help Automate Interlinear Glossing: a Study on Mukrī <fixed-case>K</fixed-case>urdish</title>
       <author><first>Hiwa</first><last>Asadpour</last><affiliation>Johann Wolfgang Goethe Universität Frankfurt am Main</affiliation></author>
       <author><first>Shu</first><last>Okabe</last><affiliation>Technische Universität München</affiliation></author>
-      <author><first>Alexander</first><last>Fraser</last><affiliation>Technical University of Munich</affiliation></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last><affiliation>Technical University of Munich</affiliation></author>
       <pages>65-75</pages>
       <abstract>Interlinear gloss generation aims to predict linguistic annotations (gloss) for a sentence in a language that is usually under ongoing documentation. Such output is a first draft for the linguist to work with and should reduce the manual workload.This article studies a simple glossing pipeline based on a Conditional Random Field and applies it to a small fieldwork corpus in Mukrī Kurdish, a variety of Central Kurdish.We mainly focus on making the tool as accessible as possible for field linguists, so it can run on standard computers without the need for GPUs. Our pipeline predicts common grammatical patterns robustly and, more generally, frequent combinations of morphemes and glosses. Although more advanced neural models do reach better results, our feature-based system still manages to be competitive and to provide interpretability.To foster further collaboration between field linguistics and NLP, we also provide some recommendations regarding documentation endeavours and release our pipeline code alongside.</abstract>
       <url hash="c1d01d7f">2025.fieldmatters-1.6</url>
diff --git a/data/xml/2025.findings.xml b/data/xml/2025.findings.xml
index 74542969d8..5bd69166e8 100644
--- a/data/xml/2025.findings.xml
+++ b/data/xml/2025.findings.xml
@@ -22,7 +22,7 @@
     </frontmatter>
     <paper id="1">
       <title>From Lazy to Prolific: Tackling Missing Labels in Open Vocabulary Extreme Classification by Positive-Unlabeled Sequence Learning</title>
-      <author><first>Ranran Haoran</first><last>Zhang</last></author>
+      <author id="ranran-haoran-zhang"><first>Ranran Haoran</first><last>Zhang</last></author>
       <author><first>Bensu</first><last>Uçar</last><affiliation>eBay Inc.</affiliation></author>
       <author><first>Soumik</first><last>Dey</last><affiliation>eBay Inc.</affiliation></author>
       <author><first>Hansi</first><last>Wu</last><affiliation>eBay Inc.</affiliation></author>
@@ -101,7 +101,7 @@
       <title><fixed-case>L</fixed-case>aw<fixed-case>I</fixed-case>nstruct: A Resource for Studying Language Model Adaptation to the Legal Domain</title>
       <author><first>Joel</first><last>Niklaus</last><affiliation>Harvey</affiliation></author>
       <author><first>Lucia</first><last>Zheng</last><affiliation>Stanford University</affiliation></author>
-      <author><first>Arya D.</first><last>McCarthy</last><affiliation>Scaled Cognition</affiliation></author>
+      <author id="arya-d-mccarthy"><first>Arya D.</first><last>McCarthy</last><affiliation>Scaled Cognition</affiliation></author>
       <author><first>Christopher</first><last>Hahn</last><affiliation>X, the moonshot factory</affiliation></author>
       <author><first>Brian M</first><last>Rosen</last><affiliation>Google</affiliation></author>
       <author><first>Peter</first><last>Henderson</last><affiliation>Princeton University</affiliation></author>
@@ -120,7 +120,7 @@
       <author><first>Hao</first><last>Yang</last></author>
       <author><first>Hongyuan</first><last>Lu</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <author><first>Xinhua</first><last>Zeng</last></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <author><first>Xiang</first><last>Zhang</last><affiliation>facemind</affiliation></author>
       <author><first>Haoran</first><last>Yang</last></author>
       <author><first>Yumeng</first><last>Zhang</last></author>
@@ -284,7 +284,7 @@
       <title><fixed-case>PEMV</fixed-case>: Improving Spatial Distribution for Emotion Recognition in Conversations Using Proximal Emotion Mean Vectors</title>
       <author><first>Chen</first><last>Lin</last></author>
       <author><first>Fei</first><last>Li</last><affiliation>Wuhan University</affiliation></author>
-      <author><first>Donghong</first><last>Ji</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
       <author><first>Chong</first><last>Teng</last></author>
       <pages>345-357</pages>
       <abstract>Emotion Recognition in Conversation (ERC) aims to identify the emotions expressed in each utterance within a dialogue. Existing research primarily focuses on the analysis of contextual structure in dialogue and the interactions between different emotions. Nonetheless, ERC datasets often contain difficult-to-classify samples and suffer from imbalanced label distributions, which pose challenges to the spatial distribution of dialogue features. To tackle this issue, we propose a method that generates Proximal Emotion Mean Vectors (PEMV) based on emotion feature queues to optimize the spatial representation of text features. We design a Center Loss based on PEMVs to pull hard-to-classify samples closer to their respective category centers and employ Angle Loss to maximize the angular separation between different PEMVs. Furthermore, we utilize PEMV as a classifier to better adapt to the spatial structure of dialogue features. Extensive experiments on three widely used benchmark datasets demonstrate that our method achieves state-of-the-art performance and validates its effectiveness in optimizing feature space representations.</abstract>
@@ -353,7 +353,7 @@
       <author><first>Xiaodan</first><last>Liang</last><affiliation>SUN YAT-SEN UNIVERSITY</affiliation></author>
       <author><first>Hong</first><last>Cheng</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <author><first>Qinglin</first><last>Lu</last></author>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last><affiliation>Tencent</affiliation></author>
+      <author><first>Wei</first><last>Liu</last><affiliation>Tencent</affiliation></author>
       <pages>411-426</pages>
       <abstract>Text-to-image (T2I) generation models have significantly advanced in recent years. However, effective interaction with these models is challenging for average users due to the need for specialized prompt engineering knowledge and the inability to perform multi-turn image generation, hindering a dynamic and iterative creation process. Recent attempts have tried to equip Multi-modal Large Language Models (MLLMs) with T2I models to bring the user’s natural language instructions into reality. Hence, the output modality of MLLMs is extended, and the multi-turn generation quality of T2I models is enhanced thanks to the strong multi-modal comprehension ability of MLLMs. However, many of these works face challenges in identifying correct output modalities and generating coherent images accordingly as the number of output modalities increases and the conversations go deeper. Therefore, we propose DialogGen, an effective pipeline to align off-the-shelf MLLMs and T2I models to build a Multi-modal Interactive Dialogue System (MIDS) for multi-turn Text-to-Image generation. It is composed of drawing prompt alignment, careful training data curation, and error correction. Moreover, as the field of MIDS flourishes, comprehensive benchmarks are urgently needed to evaluate MIDS fairly in terms of output modality correctness and multi-modal output coherence. To address this issue, we introduce the Multi-modal Dialogue Benchmark (DialogBen), a comprehensive bilingual benchmark designed to assess the ability of MLLMs to generate accurate and coherent multi-modal content that supports image editing. It contains two evaluation metrics to measure the model’s ability to switch modalities and the coherence of the output images. Our extensive experiments on DialogBen and user study demonstrate the effectiveness of DialogGen in producing correct output modalities and coherent multi-modal outputs compared with other State-of-the-Art models. We hope that DialogBen can contribute to the community for building more powerful MIDS.</abstract>
       <url hash="7d49678c">2025.findings-naacl.25</url>
@@ -455,7 +455,7 @@
     <paper id="33">
       <title>Teaching Large Language Models Number-Focused Headline Generation With Key Element Rationales</title>
       <author><first>Zhen</first><last>Qian</last></author>
-      <author><first>Xiuzhen</first><last>Zhang</last><affiliation>Royal Melbourne Institute of Technology</affiliation></author>
+      <author id="xiuzhen-jenny-zhang"><first>Xiuzhen</first><last>Zhang</last><affiliation>Royal Melbourne Institute of Technology</affiliation></author>
       <author><first>Xiaofei</first><last>Xu</last></author>
       <author><first>Feng</first><last>Xia</last><affiliation>Royal Melbourne Institute of Technology</affiliation></author>
       <pages>533-550</pages>
@@ -467,7 +467,7 @@
     <paper id="34">
       <title>Zero-Shot Strategies for Length-Controllable Summarization</title>
       <author><first>Fabian</first><last>Retkowski</last><affiliation>Karlsruher Institut für Technologie</affiliation></author>
-      <author><first>Alexander</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alexander</first><last>Waibel</last></author>
       <pages>551-572</pages>
       <abstract>Large language models (LLMs) struggle with precise length control, particularly in zero-shot settings. We conduct a comprehensive study evaluating LLMs’ length control capabilities across multiple measures and propose practical methods to improve controllability. Our experiments with LLaMA 3 reveal stark differences in length adherence across measures and highlight inherent biases of the model. To address these challenges, we introduce a set of methods: length approximation, target adjustment, sample filtering, and automated revisions. By combining these methods, we demonstrate substantial improvements in length compliance while maintaining or enhancing summary quality, providing highly effective zero-shot strategies for precise length control without the need for model fine-tuning or architectural changes. With our work, we not only advance our understanding of LLM behavior in controlled text generation but also pave the way for more reliable and adaptable summarization systems in real-world applications.</abstract>
       <url hash="ef02ede1">2025.findings-naacl.34</url>
@@ -595,7 +595,7 @@
     <paper id="44">
       <title>Tethering Broken Themes: Aligning Neural Topic Models with Labels and Authors</title>
       <author><first>Mayank</first><last>Nagda</last></author>
-      <author><first>Phil</first><last>Ostheimer</last><affiliation>RPTU Kaiserslautern-Landau</affiliation></author>
+      <author id="phil-sidney-ostheimer"><first>Phil</first><last>Ostheimer</last><affiliation>RPTU Kaiserslautern-Landau</affiliation></author>
       <author><first>Sophie</first><last>Fellenz</last><affiliation>Universität Kaiserslautern</affiliation></author>
       <pages>740-760</pages>
       <abstract>Topic models are a popular approach for extracting semantic information from large document collections. However, recent studies suggest that the topics generated by these models often do not align well with human intentions. Although metadata such as labels and authorship information are available, it has not yet been effectively incorporated into neural topic models. To address this gap, we introduce FANToM, a novel method to align neural topic models with both labels and authorship information. FANToM allows for the inclusion of this metadata when available, producing interpretable topics and author distributions for each topic. Our approach demonstrates greater expressiveness than conventional topic models by learning the alignment between labels, topics, and authors. Experimental results show that FANToM improves existing models in terms of both topic quality and alignment. Additionally, it identifies author interests and similarities.</abstract>
@@ -607,7 +607,7 @@
       <title>Towards Zero-Shot Multimodal Machine Translation</title>
       <author><first>Matthieu</first><last>Futeral</last></author>
       <author><first>Cordelia</first><last>Schmid</last><affiliation>Google, INRIA and Inria</affiliation></author>
-      <author><first>Benoît</first><last>Sagot</last><affiliation>Inria</affiliation></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last><affiliation>Inria</affiliation></author>
       <author><first>Rachel</first><last>Bawden</last><affiliation>Inria</affiliation></author>
       <pages>761-778</pages>
       <abstract>Current multimodal machine translation (MMT) systems rely on fully supervised data (i.e sentences with their translations and accompanying images), which is costly to collect and prevents the extension of MMT to language pairs with no such data. We propose a method to bypass the need for fully supervised data to train MMT systems, using multimodal English data only. Our method ( ZeroMMT) consists in adapting a strong text-only machine translation (MT) model by training it jointly on two objectives: visually conditioned masked language modelling and the Kullback-Leibler divergence between the original MT and new MMT outputs. We evaluate on standard MMT benchmarks and on CoMMuTE, a contrastive test set designed to evaluate how well models use images to disambiguate translations. ZeroMMT obtains disambiguation results close to state-of-the-art MMT models trained on fully supervised examples. To prove that ZeroMMT generalizes to languages with no fully supervised training data, we extend CoMMuTE to three new languages: Arabic, Russian and Chinese. We also show that we can control the trade-off between disambiguation capabilities and translation fidelity at inference time using classifier-free guidance and without any additional data. Our code, data and trained models are publicly accessible.</abstract>
@@ -617,7 +617,7 @@
     </paper>
     <paper id="46">
       <title>Large-Scale Corpus Construction and Retrieval-Augmented Generation for <fixed-case>A</fixed-case>ncient <fixed-case>C</fixed-case>hinese Poetry: New Method and Data Insights</title>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>South China University of Technology</affiliation></author>
+      <author><first>Yang</first><last>Liu</last><affiliation>South China University of Technology</affiliation></author>
       <author><first>Lan</first><last>Lan</last></author>
       <author><first>Jiahuan</first><last>Cao</last></author>
       <author><first>Hiuyi</first><last>Cheng</last></author>
@@ -646,7 +646,7 @@
     <paper id="48">
       <title>Dialetto, ma Quanto Dialetto? Transcribing and Evaluating Dialects on a Continuum</title>
       <author><first>Ryan Soh-Eun</first><last>Shim</last><affiliation>Ludwig-Maximilians-Universität München, University of Stuttgart, Universität Stuttgart and Institute for Natural Language Processing, University of Stuttgart</affiliation></author>
-      <author><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <pages>838-849</pages>
       <abstract>There is increasing interest in looking at dialects in NLP. However, most work to date still treats dialects as discrete categories. For instance, evaluative work in variation-oriented NLP for English often works with Indian English or African-American Venacular English as homogeneous categories, yet even within one variety there is substantial variation. We examine within-dialect variation and show that performance critically varies within categories. We measure speech-to-text performance on Italian dialects, and empirically observe a geographical performance disparity. This disparity correlates substantially (-0.5) with linguistic similarity to the highest performing dialect variety. We cross-examine our results against dialectometry methods, and interpret the performance disparity to be due to a bias towards dialects that are more similar to the standard variety in the speech-to-text model examined. We additionally leverage geostatistical methods to predict zero-shot performance at unseen sites, and find the incorporation of geographical information to substantially improve prediction performance, indicating there to be geographical structure in the performance distribution.</abstract>
       <url hash="2adec0a7">2025.findings-naacl.48</url>
@@ -680,7 +680,7 @@
     <paper id="51">
       <title><fixed-case>LMM</fixed-case>s-Eval: Reality Check on the Evaluation of Large Multimodal Models</title>
       <author><first>Kaichen</first><last>Zhang</last></author>
-      <author id="bo-li"><first>Bo</first><last>Li</last></author>
+      <author><first>Bo</first><last>Li</last></author>
       <author><first>Peiyuan</first><last>Zhang</last><affiliation>University of California, San Diego</affiliation></author>
       <author><first>Fanyi</first><last>Pu</last></author>
       <author><first>Joshua Adrian</first><last>Cahyono</last></author>
@@ -778,7 +778,7 @@
       <author><first>Jinchao</first><last>Zhang</last></author>
       <author><first>Lixiangfang</first><last>Lixiangfang</last></author>
       <author><first>Lichuanrong</first><last>Lichuanrong</last></author>
-      <author id="bo-li"><first>Bo</first><last>Li</last><affiliation>Institute of Information Engineering, Chinese Academy of Sciences</affiliation></author>
+      <author><first>Bo</first><last>Li</last><affiliation>Institute of Information Engineering, Chinese Academy of Sciences</affiliation></author>
       <pages>1033-1044</pages>
       <abstract>Large language models (LLMs) exhibit exceptional performance across a wide range of natural language processing tasks, often relying on lengthy prompts to harness their full capabilities. However, extended prompts can lead to substantial computational overhead and increased hardware demands, limiting the scalability and efficiency of such models. In this paper, we propose DisComp, a two-stage prompt compression framework based on knowledge distillation that combines task-agnostic and task-aware strategies, designed to efficiently compress prompt length without compromising performance.In the first stage, task-agnostic compression is achieved through knowledge distillation, transferring the summarization capabilities of a LLM to a smaller, more efficient model. The distillation process combines cross-entropy loss and keyword matching loss to ensure the smaller model generates concise and informative summaries. In the second stage, sentence-level pruning is applied, where sentences are ranked by relevance to the query, and irrelevant sentences are pruned to retain only task-critical information. We evaluate our method on three benchmark datasets, LongBench , ZeroSCROLLS and NaturalQuestions. The results show that DisComp significantly outperforms previous task-agnostic and task-specific compression approaches, and it is up to 6.56× faster at inference compared to the best token-level compression method.</abstract>
       <url hash="cd154829">2025.findings-naacl.58</url>
@@ -799,7 +799,7 @@
       <title><fixed-case>RAMQA</fixed-case>: A Unified Framework for Retrieval-Augmented Multi-Modal Question Answering</title>
       <author><first>Yang</first><last>Bai</last><affiliation>Facebook</affiliation></author>
       <author><first>Christan</first><last>Grant</last><affiliation>University of Florida</affiliation></author>
-      <author><first>Daisy Zhe</first><last>Wang</last><affiliation>University of Florida</affiliation></author>
+      <author id="daisy-zhe-wang"><first>Daisy Zhe</first><last>Wang</last><affiliation>University of Florida</affiliation></author>
       <pages>1061-1076</pages>
       <abstract>Multi-modal retrieval-augmented Question Answering (MRAQA), integrating text and images, has gained significant attention in information retrieval (IR) and natural language processing (NLP). Traditional ranking methods rely on small encoder-based language models, which are incompatible with modern decoder-based generative large language models (LLMs) that have advanced various NLP tasks. To bridge this gap, we propose RAMQA, a unified framework combining learning-to-rank methods with generative permutation-enhanced ranking techniques. We first train a pointwise multi-modal ranker using LLaVA as the backbone. Then, we apply instruction tuning to train a LLaMA model for re-ranking the top-k documents using an innovative autoregressive multi-task learning approach. Our generative ranking model generates re-ranked document IDs and specific answers from document candidates in various permutations. Experiments on two MRAQA benchmarks, WebQA and MultiModalQA, show significant improvements over strong baselines, highlighting the effectiveness of our approach. Data and code will be made public once the paper is accepted.</abstract>
       <url hash="04f85d0e">2025.findings-naacl.60</url>
@@ -834,7 +834,7 @@
       <author><first>Zhihao</first><last>Yang</last><affiliation>Dalian University of Technology</affiliation></author>
       <author><first>Ling</first><last>Luo</last><affiliation>Dalian University of Technology</affiliation></author>
       <author><first>Hongfei</first><last>Lin</last></author>
-      <author id="jian-wang"><first>Jian</first><last>Wang</last></author>
+      <author><first>Jian</first><last>Wang</last></author>
       <pages>1112-1128</pages>
       <abstract>Few-Shot Document-Level Relation Extraction (FSDLRE) aims to develop models capable of generalizing to new categories with minimal support examples. Although Large Language Models (LLMs) demonstrate exceptional In-Context Learning (ICL) capabilities on many few-shot tasks, their performance on FSDLRE tasks remains suboptimal due to the significant gap between the task format and the intrinsic capabilities of language models, coupled with the complexity of ICL prompts for document-level text. To address these challenges, we introduce a novel meta-training approach for LLMs termed Prototype Tuning. We construct simulated episodes using data with relation types that do not overlap with the test corpus, fundamentally enhancing the ICL capabilities of LLMs in FSDLRE through meta-learning. To further enhance the effects of meta-learning, we innovatively integrate the concept of prototype into the fine-tuning process of LLMs. This involves aggregating entity pairs from support documents into prototypes within the prompts and altering the way of determining relation categories to identifying the closest prototype. Experimental results demonstrate that our LLMs trained with this approach outperform all baselines. Our proposed approach markedly improves the ICL capabilities of LLMs in FSDLRE and mitigates the impact of relation semantic discrepancies between the training corpus and the test corpus on model performance.</abstract>
       <url hash="ced75298">2025.findings-naacl.62</url>
@@ -917,7 +917,7 @@
     <paper id="68">
       <title>In-Context Example Selection via Similarity Search Improves Low-Resource Machine Translation</title>
       <author><first>Armel Randy</first><last>Zebaze</last><affiliation>INRIA</affiliation></author>
-      <author><first>Benoît</first><last>Sagot</last><affiliation>Inria</affiliation></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last><affiliation>Inria</affiliation></author>
       <author><first>Rachel</first><last>Bawden</last><affiliation>Inria</affiliation></author>
       <pages>1222-1252</pages>
       <abstract>The ability of generative large language models (LLMs) to perform in-context learning has given rise to a large body of research into how best to prompt models for various natural language processing tasks. In this paper, we focus on machine translation (MT), a task that has been shown to benefit from in-context translation examples. However no systematic studies have been published on how best to select examples, and mixed results have been reported on the usefulness of similarity-based selection over random selection, although these results have mainly been shown for high-resource languages only. We provide a study covering multiple LLMs and in-context example retrieval strategies. Contrarily to previously published results, we find that retrieval based on sentence embedding similarity can improve MT, especially for low-resource language directions, and we also discuss the balance between selection pool diversity and quality. Code and outputs will be made freely available.</abstract>
@@ -1011,7 +1011,7 @@
       <author><first>Terry</first><last>Ruas</last><affiliation>Georg-August Universität Göttingen</affiliation></author>
       <author><first>Andre</first><last>Greiner-Petter</last><affiliation>Georg-August Universität Göttingen</affiliation></author>
       <author><first>Bela</first><last>Gipp</last><affiliation>Georg-August Universität Göttingen</affiliation></author>
-      <author><first>Akiko</first><last>Aizawa</last><affiliation>National Institute of Informatics</affiliation></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last><affiliation>National Institute of Informatics</affiliation></author>
       <author><first>Timo</first><last>Spinde</last></author>
       <pages>1370-1386</pages>
       <abstract>High annotation costs from hiring or crowdsourcing complicate the creation of large, high-quality datasets needed for training reliable text classifiers. Recent research suggests using Large Language Models (LLMs) to automate the annotation process, reducing these costs while maintaining data quality. LLMs have shown promising results in annotating downstream tasks like hate speech detection and political framing. Building on the success in these areas, this study investigates whether LLMs are viable for annotating a complex task of media bias detection and whether a downstream media bias classifier can be trained on such data. We create Annolexical, the first large-scale dataset for media bias classification with over 48k synthetically annotated examples. Our classifier fine-tuned on it surpasses all of the annotator LLMs by 5-9% in Mathew’s Correlation Coefficient (MCC) and performs close to or outperforms the model trained on human-labeled data when evaluated on two media bias benchmark datasets (BABE and BASIL). This study demonstrates how our approach significantly reduces the cost of dataset creation in the media bias domain and, by extension - the development of the classifiers, while our subsequent behavioral stress-testing reveals some of its current limitations and trade-offs.</abstract>
@@ -1049,7 +1049,7 @@
       <author><first>Peinan</first><last>Zhang</last><affiliation>CyberAgent AI Lab</affiliation></author>
       <author><first>Hidetaka</first><last>Kamigaito</last><affiliation>Nara Institute of Science and Technology</affiliation></author>
       <author><first>Hiroya</first><last>Takamura</last><affiliation>AIST, National Institute of Advanced Industrial Science and Technology</affiliation></author>
-      <author><first>Manabu</first><last>Okumura</last><affiliation>Institute of Science Tokyo and Tokyo Institute of Technology, Tokyo Institute of Technology</affiliation></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last><affiliation>Institute of Science Tokyo and Tokyo Institute of Technology, Tokyo Institute of Technology</affiliation></author>
       <pages>1426-1439</pages>
       <abstract>Effective linguistic choices that attract potential customers play crucial roles in advertising success. This study aims to explore the linguistic features of ad texts that influence human preferences. Although the creation of attractive ad texts is an active area of research, progress in understanding the specific linguistic features that affect attractiveness is hindered by several obstacles. First, human preferences are complex and influenced by multiple factors, including their content, such as brand names, and their linguistic styles, making analysis challenging. Second, publicly available ad text datasets that include human preferences are lacking, such as ad performance metrics and human feedback, which reflect people’s interests. To address these problems, we present AdParaphrase, a paraphrase dataset that contains human preferences for pairs of ad texts that are semantically equivalent but differ in terms of wording and style. This dataset allows for preference analysis that focuses on the differences in linguistic features. Our analysis revealed that ad texts preferred by human judges have higher fluency, longer length, more nouns, and use of bracket symbols. Furthermore, we demonstrate that an ad text-generation model that considers these findings significantly improves the attractiveness of a given text. The dataset is publicly available at: https://github.com/CyberAgentAILab/AdParaphrase.</abstract>
       <url hash="50d0f480">2025.findings-naacl.78</url>
@@ -1114,7 +1114,7 @@
       <author><first>Moritz</first><last>Plenz</last><affiliation>Institute for Computational Linguistics, Heidelberg University, Ruprecht-Karls-Universität Heidelberg</affiliation></author>
       <author><first>Philipp</first><last>Heinisch</last></author>
       <author><first>Janosch</first><last>Gehring</last></author>
-      <author><first>Philipp</first><last>Cimiano</last><affiliation>Bielefeld University and Bielefeld University</affiliation></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last><affiliation>Bielefeld University and Bielefeld University</affiliation></author>
       <author><first>Anette</first><last>Frank</last><affiliation>Ruprecht-Karls-Universität Heidelberg</affiliation></author>
       <pages>1525-1553</pages>
       <abstract>Debating over conflicting issues is a necessary first step towards resolving conflicts. However, intrinsic perspectives of an arguer are difficult to overcome by persuasive argumentation skills. Proceeding from a debate to a deliberative process, where we can identify actionable options for resolving a conflict requires a deeper analysis of arguments and the perspectives they are grounded in - as it is only from there that one can derive mutually agreeable resolution steps. In this work we develop a framework for a deliberative analysis of arguments in a computational argumentation setup. We conduct a fine-grained analysis of perspectivized stances expressed in the arguments of different arguers or stakeholders on a given issue, aiming not only to identify their opposing views, but also shared perspectives arising from their attitudes, values or needs. We formalize this analysis in Perspectivized Stance Vectors that characterize the individual perspectivized stances of all arguers on a given issue. We construct these vectors by determining issue- and argument-specific concepts, and predict an arguer’s stance relative to each of them. The vectors allow us to measure a modulated (dis)agreement between arguers, structured by perspectives, which allows us to identify actionable points for conflict resolution, as a first step towards deliberation.</abstract>
@@ -1141,7 +1141,7 @@
     </paper>
     <paper id="85">
       <title>Does Generative <fixed-case>AI</fixed-case> speak <fixed-case>N</fixed-case>igerian-<fixed-case>P</fixed-case>idgin?: Issues about Representativeness and Bias for Multilingualism in <fixed-case>LLM</fixed-case>s</title>
-      <author><first>David Ifeoluwa</first><last>Adelani</last><affiliation>McGill University</affiliation></author>
+      <author id="david-ifeoluwa-adelani"><first>David Ifeoluwa</first><last>Adelani</last><affiliation>McGill University</affiliation></author>
       <author><first>A. Seza</first><last>Doğruöz</last><affiliation>Ghent University</affiliation></author>
       <author><first>Iyanuoluwa</first><last>Shode</last><affiliation>Bloomberg</affiliation></author>
       <author><first>Anuoluwapo</first><last>Aremu</last></author>
@@ -1165,7 +1165,7 @@
     <paper id="87">
       <title>Decoding Dark Matter: Specialized Sparse Autoencoders for Interpreting Rare Concepts in Foundation Models</title>
       <author><first>Aashiq</first><last>Muhamed</last></author>
-      <author><first>Mona T.</first><last>Diab</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="mona-diab"><first>Mona T.</first><last>Diab</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Virginia</first><last>Smith</last><affiliation>Carnegie Mellon University</affiliation></author>
       <pages>1604-1635</pages>
       <abstract>Understanding and mitigating the potential risks associated with foundation models (FMs) hinges on developing effective interpretability methods. Sparse Autoencoders (SAEs) have emerged as a promising tool for disentangling FM representations, but they struggle to capture rare, yet crucial concepts in the data. We introduce Specialized Sparse Autoencoders (SSAEs), designed to illuminate these elusive dark matter features by focusing on specific subdomains. We present a practical recipe for training SSAEs, demonstrating the efficacy of dense retrieval for data selection and the benefits of Tilted Empirical Risk Minimization as a training objective to improve concept recall. Our evaluation of SSAEs on standard metrics, such as downstream perplexity and <tex-math>L_0</tex-math> sparsity, show that they effectively capture subdomain tail concepts, exceeding the capabilities of general-purpose SAEs. We showcase the practical utility of SSAEs in a case study on the Bias in Bios dataset, where SSAEs achieve a 12.5% increase in worst-group classification accuracy over the pretrained general-purpose SAE when applied to remove spurious gender information. SSAEs provide a powerful new lens for peering into the inner workings of FMs in subdomains.</abstract>
@@ -1177,7 +1177,7 @@
       <title><fixed-case>MA</fixed-case>i<fixed-case>DE</fixed-case>-up: Multilingual Deception Detection of <fixed-case>AI</fixed-case>-generated Hotel Reviews</title>
       <author><first>Oana</first><last>Ignat</last><affiliation>Santa Clara University</affiliation></author>
       <author><first>Xiaomeng</first><last>Xu</last></author>
-      <author><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
       <pages>1636-1653</pages>
       <abstract>Deceptive reviews are becoming increasingly common, especially given the increase in performance and the prevalence of LLMs. While work to date has addressed the development of models to differentiate between truthful and deceptive human reviews, much less is known about the distinction between real reviews and AI-authored fake reviews. Moreover, most of the research so far has focused primarily on English, with very little work dedicated to other languages. In this paper, we compile and make publicly available the MAiDE-up dataset, consisting of 10,000 real and 10,000 AI-generated fake hotel reviews, balanced across ten languages. Using this dataset, we conduct extensive linguistic analyses to (1) compare the AI fake hotel reviews to real hotel reviews, and (2) identify the factors that influence the deception detection model performance. We explore the effectiveness of several models for deception detection in hotel reviews across three main dimensions: sentiment, location, and language. We find that these dimensions influence how well we can detect AI-generated fake reviews.</abstract>
       <url hash="e6812d65">2025.findings-naacl.88</url>
@@ -1224,7 +1224,7 @@
       <author><first>Adil</first><last>Soubki</last><affiliation>State University of New York at Stony Brook</affiliation></author>
       <author><first>John</first><last>Murzaku</last><affiliation>, State University of New York at Stony Brook</affiliation></author>
       <author><first>Peter</first><last>Zeng</last><affiliation>State University of New York at Stony Brook</affiliation></author>
-      <author><first>Owen</first><last>Rambow</last><affiliation>Stony Brook University</affiliation></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last><affiliation>Stony Brook University</affiliation></author>
       <pages>1701-1708</pages>
       <abstract>The NLP community has broadly focused on text-only approaches of cognitive state tasks, but audio can provide vital missing cues through prosody. We posit that text-to-speech models learn to track aspects of cognitive state in order to produce naturalistic audio, and that the signal audio models implicitly identify is orthogonal to the information that language models exploit. We present Synthetic Audio Data fine-tuning (SAD), a framework where we show that 7 tasks related to cognitive state modeling benefit from multimodal training on both text and zero-shot synthetic audio data from an off-the-shelf TTS system. We show an improvement over the text-only modality when adding synthetic audio data to text-only corpora. Furthermore, on tasks and corpora that do contain gold audio, we show our SAD framework achieves competitive performance with text and synthetic audio compared to text and gold audio.</abstract>
       <url hash="03416ad1">2025.findings-naacl.92</url>
@@ -1276,13 +1276,13 @@
       <author><first>Valentina</first><last>Pyatkin</last></author>
       <author><first>Jacob</first><last>Morrison</last></author>
       <author><first>LJ</first><last>Miranda</last></author>
-      <author><first>Bill Yuchen</first><last>Lin</last></author>
+      <author id="bill-yuchen-lin"><first>Bill Yuchen</first><last>Lin</last></author>
       <author><first>Khyathi</first><last>Chandu</last></author>
       <author><first>Nouha</first><last>Dziri</last></author>
       <author><first>Sachin</first><last>Kumar</last></author>
       <author><first>Tom</first><last>Zick</last></author>
       <author><first>Yejin</first><last>Choi</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last></author>
       <pages>1755-1797</pages>
       <abstract>Reward models (RMs) are at the crux of successfully using RLHF to align pretrained models to human preferences, yet there has been relatively little study that focuses on evaluation of those models. Evaluating reward models presents an opportunity to understand the opaque technologies used for alignment of language models and which values are embedded in them. Resources for reward model training and understanding are sparse in the nascent open-source community around them. To enhance scientific understanding of reward models, we present RewardBench, a benchmark dataset and code-base for evaluation. The RewardBench dataset is a collection of prompt-chosen-rejected trios spanning chat, reasoning, and safety, to benchmark how reward models perform on challenging, structured and out-of-distribution queries. We create specific comparison datasets for RMs that have subtle, but verifiable reasons (e.g. bugs, incorrect facts) why one answer should be preferred to another. On the RewardBench leaderboard, we evaluate RMs trained with a variety of methods, such as the direct MLE training of classifiers and the implicit reward modeling of Direct Preference Optimization (DPO). We present many findings on propensity for refusals, reasoning limitations, and instruction following shortcomings of various reward models towards a better understanding of the RLHF process.</abstract>
@@ -1408,7 +1408,7 @@
     <paper id="106">
       <title><fixed-case>P</fixed-case>roxy<fixed-case>LM</fixed-case>: Predicting Language Model Performance on Multilingual Tasks via Proxy Models</title>
       <author><first>David</first><last>Anugraha</last></author>
-      <author><first>Genta Indra</first><last>Winata</last><affiliation>Capital One</affiliation></author>
+      <author id="genta-indra-winata"><first>Genta Indra</first><last>Winata</last><affiliation>Capital One</affiliation></author>
       <author><first>Chenyue</first><last>Li</last><affiliation>The Hong Kong University of Science and Technology</affiliation></author>
       <author><first>Patrick Amadeus</first><last>Irawan</last></author>
       <author><first>En-Shiun Annie</first><last>Lee</last></author>
@@ -1470,7 +1470,7 @@
       <author><first>Junzhang</first><last>Liu</last><affiliation>Columbia University</affiliation></author>
       <author><first>Xudong</first><last>Lin</last><affiliation>Columbia University</affiliation></author>
       <author><first>Zhecan</first><last>Wang</last><affiliation>University of California, Los Angeles</affiliation></author>
-      <author><first>Shih-Fu</first><last>Chang</last><affiliation>Columbia University and Columbia University</affiliation></author>
+      <author id="shih-fu-chang"><first>Shih-Fu</first><last>Chang</last><affiliation>Columbia University and Columbia University</affiliation></author>
       <pages>2099-2116</pages>
       <abstract>The task of predicting time and location from images is challenging and requires complex human-like puzzle-solving ability over different clues. In this work, we formalize this ability into core skills and implement them using different modules in an expert pipeline called PuzzleGPT. PuzzleGPT consists of a perceiver to identify visual clues, a reasoner to deduce prediction candidates, a combiner to combinatorially combine information from different clues, a web retriever to get external knowledge if the task can’t be solved locally, and a noise filter for robustness. This results in a zero-shot, interpretable, and robust approach that records state-of-the-art performance on two datasets – TARA and WikiTilo. PuzzleGPT outperforms large VLMs such as BLIP-2, InstructBLIP, LLaVA, and even GPT-4V, as well as automatically generated reasoning pipelines like VisProg, by at least 32% and 38%, respectively. It even rivals or surpasses finetuned models.</abstract>
       <url hash="659312f6">2025.findings-naacl.111</url>
@@ -1505,7 +1505,7 @@
       <title>Is Semantic Chunking Worth the Computational Cost?</title>
       <author><first>Renyi</first><last>Qu</last><affiliation>Vectara</affiliation></author>
       <author><first>Ruixuan</first><last>Tu</last></author>
-      <author><first>Forrest Sheng</first><last>Bao</last><affiliation>Vectara, Inc.</affiliation></author>
+      <author id="forrest-bao"><first>Forrest Sheng</first><last>Bao</last><affiliation>Vectara, Inc.</affiliation></author>
       <pages>2155-2177</pages>
       <abstract>Recent advances in Retrieval-Augmented Generation (RAG) systems have popularized semantic chunking, which aims to improve retrieval performance by dividing documents into semantically coherent segments. Despite its growing adoption, the actual benefits over simpler fixed-size chunking, where documents are split into consecutive, fixed-size segments, remain unclear. This study systematically evaluates the effectiveness of semantic chunking using three common retrieval-related tasks: document retrieval, evidence retrieval, and retrieval-based answer generation. The results show that the computational costs associated with semantic chunking are not justified by consistent performance gains. These findings challenge the previous assumptions about semantic chunking and highlight the need for more efficient chunking strategies in RAG systems.</abstract>
       <url hash="b2f78b28">2025.findings-naacl.114</url>
@@ -1541,7 +1541,7 @@
       <author><first>Lin</first><last>Ai</last></author>
       <author><first>Pengyuan</first><last>Shi</last><affiliation>Columbia University</affiliation></author>
       <author><first>Kaan</first><last>Donbekci</last><affiliation>Columbia University</affiliation></author>
-      <author><first>Julia</first><last>Hirschberg</last><affiliation>Columbia University</affiliation></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last><affiliation>Columbia University</affiliation></author>
       <pages>2202-2218</pages>
       <url hash="3b9a30e8">2025.findings-naacl.117</url>
       <bibkey>wu-etal-2025-beyond</bibkey>
@@ -1785,7 +1785,7 @@
       <author><first>Jaesung</first><last>Hwang</last></author>
       <author><first>Jingun</first><last>Kwon</last></author>
       <author><first>Hidetaka</first><last>Kamigaito</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>2489-2500</pages>
       <abstract>This study investigates retrieval-augmented summarization by specifically examining the impact of exemplar summary lengths because previous methods have not considered length constraints. We propose a Diverse Length-aware Maximal Marginal Relevance (DL-MMR) algorithm to better control summary lengths. This algorithm combines the query relevance with diverse target lengths in retrieval-augmented summarization. Unlike previous methods that necessitate exhaustive exemplar-exemplar relevance comparisons using MMR, DL-MMR considers the exemplar target length as well and avoids comparing exemplars to each other, thereby reducing computational cost and conserving memory during the construction of an exemplar pool. Experimental results showed the effectiveness of DL-MMR, which considers length diversity, compared to the original MMR algorithm. DL-MMR additionally showed the effectiveness in memory saving of 781,513 times and computational cost reduction of 500,092 times, while maintaining the same level of informativeness.</abstract>
       <url hash="29ff7163">2025.findings-naacl.134</url>
@@ -1801,7 +1801,7 @@
       <author><first>Ke</first><last>Zou</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Ninghao</first><last>Liu</last><affiliation>University of Georgia</affiliation></author>
       <author><first>Yih Chung</first><last>Tham</last><affiliation>National University of Singapore</affiliation></author>
-      <author><first>Xiuzhen</first><last>Zhang</last><affiliation>Royal Melbourne Institute of Technology</affiliation></author>
+      <author id="xiuzhen-jenny-zhang"><first>Xiuzhen</first><last>Zhang</last><affiliation>Royal Melbourne Institute of Technology</affiliation></author>
       <author><first>Qingyu</first><last>Chen</last><affiliation>Yale University</affiliation></author>
       <pages>2501-2522</pages>
       <abstract>The prevalence of vision-threatening eye diseases is a significant global burden, with many cases remaining undiagnosed or diagnosed too late for effective treatment. Large vision-language models (LVLMs) have the potential to assist in understanding anatomical information, diagnosing eye diseases, and drafting interpretations and follow-up plans, thereby reducing the burden on clinicians and improving access to eye care. However, limited benchmarks are available to assess LVLMs’ performance in ophthalmology-specific applications. In this study, we introduce LMOD, a large-scale multimodal ophthalmology benchmark consisting of 21,993 instances across (1) five ophthalmic imaging modalities: optical coherence tomography, color fundus photographs, scanning laser ophthalmoscopy, lens photographs, and surgical scenes; (2) free-text, demographic, and disease biomarker information; and (3) primary ophthalmology-specific applications such as anatomical information understanding, disease diagnosis, and subgroup analysis. In addition, we benchmarked 13 state-of-the-art LVLM representatives from closed-source, open-source, and medical domains. The results demonstrate a significant performance drop for LVLMs in ophthalmology compared to other domains. Systematic error analysis further identified six major failure modes: misclassification, failure to abstain, inconsistent reasoning, hallucination, assertions without justification, and lack of domain-specific knowledge. In contrast, supervised neural networks specifically trained on these tasks as baselines demonstrated high accuracy. These findings underscore the pressing need for benchmarks in the development and validation of ophthalmology-specific LVLMs.</abstract>
@@ -1874,7 +1874,7 @@
       <author><first>Lifeng</first><last>Shang</last><affiliation>Huawei Technologies Ltd.</affiliation></author>
       <author><first>Xin</first><last>Jiang</last></author>
       <author><first>Qun</first><last>Liu</last><affiliation>Huawei Noah’s Ark Lab</affiliation></author>
-      <author><first>Kam-Fai</first><last>Wong</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <pages>2586-2606</pages>
       <abstract>Current research found the issue of Early Answering in large language models (LLMs), where the models already have an answer before generating the Chain-of-Thought (CoT). This phenomenon suggests a potential lack of necessary dependency between the predicted answer and the reasoning process. Consequently, two important questions arise: (1) Is CoT still necessary if the model already has an answer? (2) Can the correctness of the answer serve as valid evidence for the correctness of CoT? To address these questions, we propose a method, namely Chain-of-Probe (CoP), to probe changes in confidence during the model’s reasoning. The probing results show that in a significant number of question-answer cases, CoT appears to be unnecessary, and this necessity correlates with the simplicity of the task, defined by the reasoning steps required. Furthermore, by analyzing patterns in confidence change, we examine the correctness of the model’s reasoning. Our validation reveals that many responses, although correct in their final answer, contain errors in their reasoning process. To this end, we propose a strategic approach based on CoP to prioritize answers with correct reasoning among multiple candidates, thereby bolstering the reliability of the model’s reasoning.</abstract>
       <url hash="dbb1f8f0">2025.findings-naacl.140</url>
@@ -1924,7 +1924,7 @@
       <author><first>Zhe</first><last>Chen</last></author>
       <author><first>Zhe</first><last>Fang</last><affiliation>East China Normal University</affiliation></author>
       <author><first>Yuxiang</first><last>Song</last></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <pages>2642-2653</pages>
       <abstract>With the scale of Large Language Models(LLMs) and the size of the training data continuing to expand, the computational costs required for training or tuning have significantly increased as well. In this work we propose an efficient and effective Large-Scale Data Compression (LSDC) method to substantially reduce the size of training data and thus enhance the training efficiency without compromising the performance of LLMs through a bifurcated quantization strategy. Specifically, our method first segments the dataset into multiple clusters, significantly reducing the time and memory requirements for data compression. Then, during the second phase of coreset selection, the diversity of samples is ensured by maximizing the submodular gain in order to avoid performance degradation. The comparative experiments showed that the performance of LLMs fine-tuned on a 20% compressed subset of the Alpaca dataset using LSDC outperformed those on the full dataset. Moreover,on a domain-specific instruction dataset of millions of samples, the LLMs fine-tuned on a 10% compressed dataset using LSDC outperformed those on the entire dataset, which dramatically enhances the domain-adaption capabilities of LLMs. This provides a promising potential of LSDC in training bigger LLMs from scratch and supervised fine-tuning as well.</abstract>
       <url hash="b683ff24">2025.findings-naacl.143</url>
@@ -2077,8 +2077,8 @@
       <author><first>Jujia</first><last>Zhao</last></author>
       <author><first>Wenjie</first><last>Wang</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Chen</first><last>Xu</last></author>
-      <author><first>See-Kiong</first><last>Ng</last><affiliation>National University of Singapore</affiliation></author>
-      <author><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="see-kiong-ng"><first>See-Kiong</first><last>Ng</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
       <pages>2852-2865</pages>
       <abstract>Large Language Models (LLMs) have showcased their potential in building generative recommendation systems through fine-tuning user behavior data. However, utilizing the user behavior data may pose significant privacy risks like in the traditional recommender models, potentially leading to ethical dilemmas and violations of data protection regulations. To address the privacy concerns, Federated Learning for Recommendation (Fed4Rec) has been identified as a promising solution. However, directly applying Fed4Rec in the LLM context introduces two challenges: 1) exacerbated client performance imbalance, which ultimately impacts the system’s long-term effectiveness, and 2) substantial client resource costs, posing a high demand for clients’ both computational and storage capability to locally train and infer LLMs.To tackle these challenges, we propose a federated framework for LLM-based recommendation (shorted as FELLRec). Generally, FELLRec designs two key strategies. 1) Dynamic balance strategy, which designs dynamic parameter aggregation and learning speed for different clients during training, aiming to ensure relatively balanced performance across clients. 2) Flexible storage strategy, which selectively retains certain sensitive LLM layers on the client side, while offloading other layers to the server, aiming to preserve privacy while saving resources. Specifically, FELLRec flexibly maintains those input and output layers on the client side to ensure the protection of all sensitive information. Experiment results show that FELLRec can achieve a more balanced client performance and improved overall performance in a computational and storage-efficient way while safeguarding user privacy well.</abstract>
       <url hash="99f2ea32">2025.findings-naacl.155</url>
@@ -2094,7 +2094,7 @@
       <author><first>Yichen</first><last>Di</last><affiliation>Tsinghua University, Tsinghua University</affiliation></author>
       <author><first>Lijie</first><last>Wen</last><affiliation>Tsinghua University</affiliation></author>
       <author><first>Irwin</first><last>King</last></author>
-      <author><first>Philip S.</first><last>Yu</last><affiliation>University of Illinois, Chicago</affiliation></author>
+      <author id="philip-s-yu"><first>Philip S.</first><last>Yu</last><affiliation>University of Illinois, Chicago</affiliation></author>
       <pages>2866-2882</pages>
       <abstract>Watermarking algorithms for large language models (LLMs) have attained high accuracy in detecting LLM-generated text. However, existing methods primarily focus on distinguishing fully watermarked text from non-watermarked text, overlooking real-world scenarios where LLMs generate only small sections within large documents. In this scenario, balancing time complexity and detection performance poses significant challenges. This paper presents WaterSeeker, a novel approach to efficiently detect and locate watermarked segments amid extensive natural text. It first applies an efficient anomaly extraction method to preliminarily locate suspicious watermarked regions. Following this, it conducts a local traversal and performs full-text detection for more precise verification. Theoretical analysis and experimental results demonstrate that WaterSeeker achieves a superior balance between detection accuracy and computational efficiency. Moreover, its localization capability lays the foundation for building interpretable AI detection systems. Our code is available at https://github.com/THU-BPM/WaterSeeker.</abstract>
       <url hash="40b052d2">2025.findings-naacl.156</url>
@@ -2106,7 +2106,7 @@
       <author><first>Chanhee</first><last>Park</last><affiliation>Korea University</affiliation></author>
       <author><first>Hyeonseok</first><last>Moon</last><affiliation>Korea University</affiliation></author>
       <author><first>Chanjun</first><last>Park</last><affiliation>Korea University</affiliation></author>
-      <author><first>Heuiseok</first><last>Lim</last></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last></author>
       <pages>2883-2900</pages>
       <abstract>Retrieval-Augmented Generation (RAG) has gained prominence as an effective method for enhancing the generative capabilities of Large Language Models (LLMs) through the incorporation of external knowledge. However, the evaluation of RAG systems remains a challenge, due to the intricate interplay between retrieval and generation components. This limitation has resulted in a scarcity of benchmarks that facilitate a detailed, component-specific assessment. In this work, we present MIRAGE, a Question Answering dataset specifically designed for RAG evaluation. MIRAGE consists of 7,560 curated instances mapped to a retrieval pool of 37,800 entries, enabling an efficient and precise evaluation of both retrieval and generation tasks. We also introduce novel evaluation metrics aimed at measuring RAG adaptability, encompassing dimensions such as noise vulnerability, context acceptability, context insensitivity, and context misinterpretation. Through comprehensive experiments across various retriever-LLM configurations, we provide new insights into the optimal alignment of model pairs and the nuanced dynamics within RAG systems. The dataset and evaluation code are publicly available, allowing for seamless integration and customization in diverse research settings.</abstract>
       <url hash="4c966747">2025.findings-naacl.157</url>
@@ -2122,7 +2122,7 @@
       <author><first>Hasan</first><last>Iqbal</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <author><first>Dhruv</first><last>Sahnan</last></author>
       <author><first>Iryna</first><last>Gurevych</last><affiliation>Institute for Computer Science, Artificial Intelligence and Technology, Mohamed bin Zayed University of Artificial Intelligence and Technische Universität Darmstadt</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <pages>2901-2914</pages>
       <abstract>Fact-checking long-form text is challenging, and it is therefore common practice to break it down into multiple atomic claims. The typical approach to fact-checking these atomic claims involves retrieving a fixed number of pieces of evidence, followed by a verification step. However, this method is usually not cost-effective, as it underutilizes the verification model’s internal knowledge of the claim and fails to replicate the iterative reasoning process in human search strategies. To address these limitations, we propose FIRE, a novel agent-based framework that integrates evidence retrieval and claim verification in an iterative manner. Specifically, FIRE employs a unified mechanism to decide whether to provide a final answer or generate a subsequent search query, based on its confidence in the current judgment. We compare FIRE with other strong fact-checking frameworks and find that it achieves slightly better performance while reducing large language model (LLM) costs by an average of 7.6 times and search costs by 16.5 times. These results indicate that FIRE holds promise for application in large-scale fact-checking operations.</abstract>
       <url hash="c365b339">2025.findings-naacl.158</url>
@@ -2261,7 +2261,7 @@
     </paper>
     <paper id="168">
       <title>Language Modeling with Editable External Knowledge</title>
-      <author><first>Belinda Z.</first><last>Li</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
+      <author id="belinda-z-li"><first>Belinda Z.</first><last>Li</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <author><first>Emmy</first><last>Liu</last><affiliation>School of Computer Science, Carnegie Mellon University</affiliation></author>
       <author><first>Alexis</first><last>Ross</last><affiliation>Massachusetts Institute of Technology and Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Abbas</first><last>Zeitoun</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
@@ -2370,7 +2370,7 @@
       <author><first>Zora Zhiruo</first><last>Wang</last></author>
       <author><first>Akari</first><last>Asai</last><affiliation>Paul G. Allen School of Computer Science &amp; Engineering, University of Washington</affiliation></author>
       <author><first>Xinyan Velocity</first><last>Yu</last><affiliation>University of Southern California</affiliation></author>
-      <author><first>Frank F.</first><last>Xu</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="frank-f-xu"><first>Frank F.</first><last>Xu</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Yiqing</first><last>Xie</last></author>
       <author><first>Graham</first><last>Neubig</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Daniel</first><last>Fried</last><affiliation>Meta AI and Carnegie Mellon University</affiliation></author>
@@ -2675,7 +2675,7 @@
       <author><first>Shimin</first><last>Li</last></author>
       <author><first>Jinlan</first><last>Fu</last></author>
       <author><first>Xipeng</first><last>Qiu</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>3563-3605</pages>
       <abstract>As Artificial General Intelligence (AGI) becomes increasingly integrated into various facets of human life, ensuring the safety and ethical alignment of such systems is paramount. Previous studies primarily focus on single-modality threats, which may not suffice given the integrated and complex nature of cross-modality interactions. We introduce a novel safety alignment challenge called Safe Inputs but Unsafe Output (*SIUO*) to evaluate cross-modality safety alignment. Specifically, it considers cases where single modalities are safe independently but could potentially lead to unsafe or unethical outputs when combined. To empirically investigate this problem, we developed the *SIUO*, a cross-modality benchmark encompassing 9 critical safety domains, such as self-harm, illegal activities, and privacy violations. Our findings reveal substantial safety vulnerabilities in both closed- and open-source LVLMs, such as GPT-4V and LLaVA, underscoring the inadequacy of current models to reliably interpret and respond to complex, real-world scenarios.</abstract>
       <url hash="d24530a3">2025.findings-naacl.198</url>
@@ -2688,7 +2688,7 @@
       <author><first>Seungyoon</first><last>Lee</last><affiliation>Korea University</affiliation></author>
       <author><first>Hyeonseok</first><last>Moon</last><affiliation>Korea University</affiliation></author>
       <author><first>Chanjun</first><last>Park</last><affiliation>Korea University</affiliation></author>
-      <author><first>Heuiseok</first><last>Lim</last></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last></author>
       <pages>3606-3620</pages>
       <abstract>Recent advancements in Large Language Models (LLMs) have significantly enhanced interactions between users and models. These advancements concurrently underscore the need for rigorous safety evaluations due to the manifestation of social biases, which can lead to harmful societal impacts. Despite these concerns, existing benchmarks may overlook the intrinsic weaknesses of LLMs, which can generate biased responses even with simple adversarial instructions. To address this critical gap, we introduce a new benchmark, Fairness Benchmark in LLM under Extreme Scenarios (FLEX), designed to test whether LLMs can sustain fairness even when exposed to prompts constructed to induce bias. To thoroughly evaluate the robustness of LLMs, we integrate prompts that amplify potential biases into the fairness assessment. Comparative experiments between FLEX and existing benchmarks demonstrate that traditional evaluations may underestimate the inherent risks in models. This highlights the need for more stringent LLM evaluation benchmarks to guarantee safety and fairness.</abstract>
       <url hash="d8d61ad5">2025.findings-naacl.199</url>
@@ -2916,7 +2916,7 @@
       <author><first>Soyeong</first><last>Jeong</last><affiliation>Korea Advanced Institute of Science &amp; Technology</affiliation></author>
       <author><first>Huije</first><last>Lee</last><affiliation>Korea Advanced Institute of Science &amp; Technology</affiliation></author>
       <author><first>Hoyun</first><last>Song</last><affiliation>Korea Advanced Institute of Science &amp; Technology</affiliation></author>
-      <author><first>Jong C.</first><last>Park</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
+      <author id="jong-c-park"><first>Jong C.</first><last>Park</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
       <author><first>Youngjin</first><last>Kwon</last><affiliation>Korea Advanced Institute of Science &amp; Technology</affiliation></author>
       <pages>3895-3911</pages>
       <abstract>Accelerating inference in Large Language Models (LLMs) is critical for real-time interactions, as they have been widely incorporated into real-world services. Speculative decoding, a fully algorithmic solution, has gained attention for improving inference speed by drafting and verifying tokens, thereby generating multiple tokens in a single forward pass. However, current drafting strategies usually require significant fine-tuning or have inconsistent performance across tasks. To address these challenges, we propose Hierarchy Drafting (HD), a novel lossless drafting approach that organizes various token sources into multiple databases in a hierarchical framework based on temporal locality. In the drafting step, HD sequentially accesses multiple databases to obtain draft tokens from the highest to the lowest locality, ensuring consistent acceleration across diverse tasks and minimizing drafting latency. Our experiments on Spec-Bench using LLMs with 7B and 13B parameters demonstrate that HD outperforms existing database drafting methods, achieving robust inference speedups across model sizes, tasks, and temperatures.</abstract>
@@ -2984,8 +2984,8 @@
     <paper id="221">
       <title><fixed-case>XAMPLER</fixed-case>: Learning to Retrieve Cross-Lingual In-Context Examples</title>
       <author><first>Peiqin</first><last>Lin</last><affiliation>Institut für Informatik</affiliation></author>
-      <author><first>Andre</first><last>Martins</last><affiliation>Instituto Superior Técnico and Unbabel</affiliation></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="andre-f-t-martins"><first>Andre</first><last>Martins</last><affiliation>Instituto Superior Técnico and Unbabel</affiliation></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <pages>3968-3977</pages>
       <abstract>Recent studies indicate that leveraging off-the-shelf or fine-tuned retrievers, capable of retrieving relevant in-context examples tailored to the input query, enhances few-shot in-context learning of English. However, adapting these methods to other languages, especially low-resource ones, poses challenges due to the scarcity of cross-lingual retrievers and annotated data. Thus, we introduce XAMPLER: Cross-Lingual Example Retrieval, a method tailored to tackle the challenge of cross-lingual in-context learning using only annotated English data. XAMPLER first trains a retriever based on Glot500, a multilingual small language model, using positive and negative English examples constructed from the predictions of a multilingual large language model, i.e., MaLA500. Leveraging the cross-lingual capacity of the retriever, it can directly retrieve English examples as few-shot examples for in-context learning of target languages. Experiments on two multilingual text classification benchmarks, namely SIB200 with 176 languages and MasakhaNEWS with 16 languages, demonstrate that XAMPLER substantially improves the in-context learning performance across languages.</abstract>
       <url hash="3d4d7436">2025.findings-naacl.221</url>
@@ -2995,7 +2995,7 @@
     <paper id="222">
       <title>Evaluating Cultural and Social Awareness of <fixed-case>LLM</fixed-case> Web Agents</title>
       <author><first>Haoyi</first><last>Qiu</last><affiliation>UCLA Computer Science Department, University of California, Los Angeles</affiliation></author>
-      <author><first>Alexander</first><last>Fabbri</last><affiliation>SalesForce.com</affiliation></author>
+      <author id="alexander-richard-fabbri"><first>Alexander</first><last>Fabbri</last><affiliation>SalesForce.com</affiliation></author>
       <author><first>Divyansh</first><last>Agarwal</last><affiliation>Salesforce.com</affiliation></author>
       <author><first>Kung-Hsiang</first><last>Huang</last><affiliation>SalesForce.com</affiliation></author>
       <author><first>Sarah</first><last>Tan</last><affiliation>SalesForce.com</affiliation></author>
@@ -3039,8 +3039,8 @@
     <paper id="225">
       <title>A Recipe of Parallel Corpora Exploitation for Multilingual Large Language Models</title>
       <author><first>Peiqin</first><last>Lin</last><affiliation>Institut für Informatik</affiliation></author>
-      <author><first>Andre</first><last>Martins</last><affiliation>Instituto Superior Técnico and Unbabel</affiliation></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="andre-f-t-martins"><first>Andre</first><last>Martins</last><affiliation>Instituto Superior Técnico and Unbabel</affiliation></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <pages>4038-4050</pages>
       <abstract>Recent studies have highlighted the potential of exploiting parallel corpora to enhance multilingual large language models, improving performance in both bilingual tasks, e.g., machine translation, and general-purpose tasks, e.g., text classification. Building upon these findings, our comprehensive study aims to identify the most effective strategies for leveraging parallel corpora. We investigate the impact of parallel corpora quality and quantity, training objectives, and model size on the performance of multilingual large language models enhanced with parallel corpora across diverse languages and tasks. Our analysis reveals several key insights: (i) filtering noisy translations is essential for effectively exploiting parallel corpora, while language identification and short sentence filtering have little effect; (ii) even a corpus with just 10K parallel sentences can yield results comparable to those obtained from much larger datasets; (iii) employing only the machine translation objective yields the best results among various training objectives and their combinations; (iv) larger multilingual language models benefit more from parallel corpora than smaller models. Our study offers valuable insights into the optimal utilization of parallel corpora to enhance multilingual large language models, extending the generalizability of previous findings from limited languages and tasks to a broader range of scenarios.</abstract>
       <url hash="20ecb65b">2025.findings-naacl.225</url>
@@ -3083,7 +3083,7 @@
       <author><first>Vitalii</first><last>Hirak</last></author>
       <author><first>Badr M.</first><last>Abdullah</last></author>
       <author><first>Dietrich</first><last>Klakow</last></author>
-      <author><first>Bernd</first><last>Möbius</last><affiliation>Universität des Saarlandes</affiliation></author>
+      <author id="bernd-mobius"><first>Bernd</first><last>Möbius</last><affiliation>Universität des Saarlandes</affiliation></author>
       <author><first>Tania</first><last>Avgustinova</last></author>
       <pages>4083-4092</pages>
       <abstract>This study analyzes the attention patterns of fine-tuned encoder-only models based on the BERT architecture (BERT-based models) towards two distinct types of Multiword Expressions (MWEs): idioms and microsyntactic units (MSUs). Idioms present challenges in semantic non-compositionality, whereas MSUs demonstrate unconventional syntactic behavior that does not conform to standard grammatical categorizations. We aim to understand whether fine-tuning BERT-based models on specific tasks influences their attention to MWEs, and how this attention differs between semantic and syntactic tasks. We examine attention scores to MWEs in both pre-trained and fine-tuned BERT-based models. We utilize monolingual models and datasets in six Indo-European languages — English, German, Dutch, Polish, Russian, and Ukrainian. Our results show that fine-tuning significantly influences how models allocate attention to MWEs. Specifically, models fine-tuned on semantic tasks tend to distribute attention to idiomatic expressions more evenly across layers. Models fine-tuned on syntactic tasks show an increase in attention to MSUs in the lower layers, corresponding with syntactic processing requirements.</abstract>
@@ -3108,7 +3108,7 @@
     </paper>
     <paper id="230">
       <title><fixed-case>M</fixed-case>ojo<fixed-case>B</fixed-case>ench: Language Modeling and Benchmarks for Mojo</title>
-      <author><first>Nishat</first><last>Raihan</last></author>
+      <author id="nishat-raihan"><first>Nishat</first><last>Raihan</last></author>
       <author><first>Joanna C. S.</first><last>Santos</last></author>
       <author><first>Marcos</first><last>Zampieri</last></author>
       <pages>4109-4128</pages>
@@ -3272,7 +3272,7 @@
       <author><first>Hao</first><last>Li</last><affiliation>Microsoft Research</affiliation></author>
       <author><first>Jingyuan</first><last>Sun</last><affiliation>University of Manchester</affiliation></author>
       <author><first>Chenghua</first><last>Lin</last><affiliation>University of Manchester</affiliation></author>
-      <author><first>Riza</first><last>Batista-Navarro</last><affiliation>University of Manchester</affiliation></author>
+      <author id="riza-theresa-batista-navarro"><first>Riza</first><last>Batista-Navarro</last><affiliation>University of Manchester</affiliation></author>
       <pages>4299-4308</pages>
       <abstract>Multi-modal Large Language Models (MLLMs) have achieved remarkable success by integrating visual and textual modalities. However, they incur significant computational overhead due to the large number of vision tokens processed, limiting their practicality in resource-constrained environments. We introduce Language-Guided Vision Token Pruning (LVPruning) for MLLMs, an effective yet simple method that significantly reduces the computational burden while preserving model performance. LVPruning employs cross-attention modules to compute the importance of vision tokens based on their interaction with language tokens, determining which to prune. Importantly, LVPruning can be integrated without modifying the original MLLM parameters, which makes LVPruning simple to apply or remove. Our experiments show that LVPruning can effectively reduce up to 90% of vision tokens by the middle layer of LLaVA-1.5, resulting in a 62.1% decrease in inference Tera Floating-Point Operations Per Second (TFLOPs), with an average performance loss of just 0.45% across nine multi-modal benchmarks.</abstract>
       <url hash="46cfabf6">2025.findings-naacl.242</url>
@@ -3299,7 +3299,7 @@
       <author><first>Xinyuan</first><last>Lu</last><affiliation>national university of singaore, National University of Singapore</affiliation></author>
       <author><first>Liangming</first><last>Pan</last><affiliation>University of Arizona</affiliation></author>
       <author><first>Yubo</first><last>Ma</last><affiliation>School of Computer Science and Engineering, Nanyang Technological University</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <author><first>Min-Yen</first><last>Kan</last><affiliation>National University of Singapore</affiliation></author>
       <pages>4323-4339</pages>
       <abstract>Current Large Language Models (LLMs) exhibit limited ability to understand table structures and to apply precise numerical reasoning, which is crucial for tasks such as table question answering and table-based fact verification. To address these challenges, we introduce our Tool-Augmented Reasoning framework for Tables (TART), which integrates LLMs with specialized tools. TART contains three key components: a table formatter to ensure accurate data representation, a tool maker to develop specific computational tools, and an explanation generator to maintain explainability. We also present the TOOLTAB dataset, a new benchmark designed specifically for training LLMs in table–tool integration. Our experiments indicate that TART achieves substantial improvements over existing methods (e.g., Chain-of-Thought) by improving both the precision of data processing and the clarity of the reasoning process. Notably, TART paired with CodeLlama achieves 90.0% of the accuracy of the closed-sourced LLM GPT-3.5-turbo, highlighting its robustness in diverse real-world scenarios. Both code and data are openly available at https://github.com/XinyuanLu00/TART.</abstract>
@@ -3373,7 +3373,7 @@
       <author><first>ChangSu</first><last>Choi</last></author>
       <author><first>HanGyeol</first><last>Yoo</last></author>
       <author><first>HyeonSeok</first><last>Lim</last><affiliation>Seoul National University of Science and Technology</affiliation></author>
-      <author><first>KyungTae</first><last>Lim</last><affiliation>Korea Advanced Institute of Science &amp; Technology</affiliation></author>
+      <author id="kyungtae-lim"><first>KyungTae</first><last>Lim</last><affiliation>Korea Advanced Institute of Science &amp; Technology</affiliation></author>
       <author><first>Jungyeul</first><last>Park</last><affiliation>The University of British Columbia</affiliation></author>
       <pages>4412-4426</pages>
       <abstract>This study explores the integration of automated writing evaluation (AWE) and grammatical error correction (GEC) through multitask learning, demonstrating how combining these distinct tasks can enhance performance in both areas. By leveraging a shared learning framework, we show that models trained jointly on AWE and GEC outperform those trained on each task individually. To support this effort, we introduce a dataset specifically designed for multitask learning using AWE and GEC. Our experiments reveal significant synergies between tasks, leading to improvements in both writing assessment accuracy and error correction precision. This research represents a novel approach for optimizing language learning tools by unifying writing evaluation and correction tasks, offering insights into the potential of multitask learning in educational applications.</abstract>
@@ -3398,7 +3398,7 @@
       <title><fixed-case>CDB</fixed-case>: A Unified Framework for Hope Speech Detection Through Counterfactual, Desire and Belief</title>
       <author><first>Tulio Ferreira Leite Da</first><last>Silva</last><affiliation>Universidade de São Paulo</affiliation></author>
       <author><first>Gonzalo Freijedo</first><last>Aduna</last><affiliation>Ecole Normale Supérieure – PSL</affiliation></author>
-      <author><first>Farah</first><last>Benamara</last><affiliation>Institut de recherche en informatique de toulouse</affiliation></author>
+      <author id="farah-benamara"><first>Farah</first><last>Benamara</last><affiliation>Institut de recherche en informatique de toulouse</affiliation></author>
       <author><first>Alda</first><last>Mari</last><affiliation>CNRS</affiliation></author>
       <author><first>Zongmin</first><last>Li</last></author>
       <author><first>Li</first><last>Yue</last><affiliation>Institute for Infocomm Research, A*STAR</affiliation></author>
@@ -3430,7 +3430,7 @@
       <author><first>Zihuiwen</first><last>Ye</last></author>
       <author><first>Fraser David</first><last>Greenlee</last><affiliation>Cohere</affiliation></author>
       <author><first>Max</first><last>Bartolo</last><affiliation>Cohere and University College London</affiliation></author>
-      <author><first>Phil</first><last>Blunsom</last><affiliation>Google, Department of Computer Science, University of Oxford and DeepMind</affiliation></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last><affiliation>Google, Department of Computer Science, University of Oxford and DeepMind</affiliation></author>
       <author><first>Jon Ander</first><last>Campos</last><affiliation>Cohere</affiliation></author>
       <author><first>Matthias</first><last>Gallé</last><affiliation>Cohere</affiliation></author>
       <pages>4506-4520</pages>
@@ -3499,7 +3499,7 @@
       <title><fixed-case>SFMSS</fixed-case>: Service Flow aware Medical Scenario Simulation for Conversational Data Generation</title>
       <author><first>Zhijie</first><last>Bao</last></author>
       <author><first>Qingyun</first><last>Liu</last></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Zhongyu</first><last>Wei</last><affiliation>Fudan University</affiliation></author>
       <pages>4586-4604</pages>
       <abstract>Medical-specific Large Language Models (LLMs) have demonstrated impressive performance on medical-related exams and tasks. Despite their success in single-turn question and answering, instruction-tuned LLMs often falter in real-world healthcare applications, highlighting a disconnect between existing instruction datasets and practical contexts. To address this issue, we propose Service Flow aware Medical Scenario Simulation (SFMSS), a simulation framework designed for medical conversational data generation. SFMSS employs three key strategies to ensure the quality of the data generation. the use of Authentic Seed Data ensures alignment of real-world distributions. Diverse Patient Simulation enables simulated patients to exhibit distinct communication styles and complex behavioral logic. Service Flow Control ensures that conversations progress in alignment with medical objectives. We construct a dataset targeting on outpatient reception through SFMSS, named SFMSS-CD. Building on this dataset, we develop a model called SFMSS-Nurse. We conduct both automatic and human evaluations, involving 15 users and 15 clinical experts, to assess the effectiveness of SFMSS. The results demonstrate that SFMSS-Nurse outperforms all baselines, including the current state-of-the-art model GPT-4o, and aligns with human preferences and clinical demands.</abstract>
@@ -3710,7 +3710,7 @@
     </paper>
     <paper id="274">
       <title>Jailbreaking with Universal Multi-Prompts</title>
-      <author><first>Yu-Ling</first><last>Hsu</last></author>
+      <author id="yu-ling-una-hsu"><first>Yu-Ling</first><last>Hsu</last></author>
       <author><first>Hsuan</first><last>Su</last></author>
       <author><first>Shang-Tse</first><last>Chen</last><affiliation>National Taiwan University</affiliation></author>
       <pages>4870-4891</pages>
@@ -3800,7 +3800,7 @@
       <author><first>Yuen</first><last>Chen</last></author>
       <author><first>Vethavikashini Chithrra</first><last>Raghuram</last><affiliation>CCC Intelligent Solutions</affiliation></author>
       <author><first>Justus</first><last>Mattern</last><affiliation>Department of Computer Science, ETHZ - ETH Zurich and Rheinisch Westfälische Technische Hochschule Aachen</affiliation></author>
-      <author><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
       <author><first>Zhijing</first><last>Jin</last><affiliation>Department of Computer Science, University of Toronto</affiliation></author>
       <pages>4984-5004</pages>
       <abstract>Generated texts from large language models (LLMs) have been shown to exhibit a variety of harmful, human-like biases against various demographics. These findings motivate research efforts aiming to understand and measure such effects. This paper introduces a causal formulation for bias measurement in generative language models. Based on this theoretical foundation, we outline a list of desiderata for designing robust bias benchmarks. We then propose a benchmark called OccuGender, with a bias-measuring procedure to investigate occupational gender bias. We test several state-of-the-art open-source LLMs on OccuGender, including Llama, Mistral, and their instruction-tuned versions. The results show that these models exhibit substantial occupational gender bias. Lastly, we discuss prompting strategies for bias mitigation and an extension of our causal formulation to illustrate the generalizability of our framework.</abstract>
@@ -3854,7 +3854,7 @@
       <title><fixed-case>C</fixed-case>ode<fixed-case>S</fixed-case>im: Multi-Agent Code Generation and Problem Solving through Simulation-Driven Planning and Debugging</title>
       <author><first>Md. Ashraful</first><last>Islam</last><affiliation>Bangladesh University of Engineering and Technology</affiliation></author>
       <author><first>Mohammed Eunus</first><last>Ali</last><affiliation>Bangladesh University of Engineering and Technology</affiliation></author>
-      <author><first>Md Rizwan</first><last>Parvez</last><affiliation>Qatar Computing Research Institute</affiliation></author>
+      <author id="md-rizwan-parvez"><first>Md Rizwan</first><last>Parvez</last><affiliation>Qatar Computing Research Institute</affiliation></author>
       <pages>5113-5139</pages>
       <url hash="74ff4b22">2025.findings-naacl.285</url>
       <bibkey>islam-etal-2025-codesim</bibkey>
@@ -3900,7 +3900,7 @@
       <title>A Context-Aware Contrastive Learning Framework for Hateful Meme Detection and Segmentation</title>
       <author><first>Xuanyu</first><last>Su</last></author>
       <author><first>Yansong</first><last>Li</last></author>
-      <author><first>Diana</first><last>Inkpen</last><affiliation>University of Ottawa</affiliation></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last><affiliation>University of Ottawa</affiliation></author>
       <author><first>Nathalie</first><last>Japkowicz</last><affiliation>American University</affiliation></author>
       <pages>5201-5215</pages>
       <abstract>Amidst the rise of Large Multimodal Models (LMMs) and their widespread application in generating and interpreting complex content, the risk of propagating biased and harmful memes remains significant. Current safety measures often fail to detect subtly integrated hateful content within “Confounder Memes”. To address this, we introduce HateSieve, a new framework designed to enhance the detection and segmentation of hateful elements in memes. HateSieve features a novel Contrastive Meme Generator that creates semantically correlated memes, a customized triplet dataset for contrastive learning, and an Image-Text Alignment module that produces context-aware embeddings for accurate meme segmentation. Empirical experiments show that HateSieve not only surpasses existing LMMs in performance with fewer trainable parameters but also offers a robust mechanism for precisely identifying and isolating hateful content. Caution: Contains academic discussions of hate speech; viewer discretion advised.</abstract>
@@ -3924,8 +3924,8 @@
     <paper id="291">
       <title>Does Data Contamination Detection Work (Well) for <fixed-case>LLM</fixed-case>s? A Survey and Evaluation on Detection Assumptions</title>
       <author><first>Yujuan</first><last>Fu</last></author>
-      <author><first>Ozlem</first><last>Uzuner</last><affiliation>George Mason University</affiliation></author>
-      <author><first>Meliha</first><last>Yetisgen</last><affiliation>University of Washington</affiliation></author>
+      <author id="ozlem-uzuner"><first>Ozlem</first><last>Uzuner</last><affiliation>George Mason University</affiliation></author>
+      <author id="meliha-yetisgen-yildiz"><first>Meliha</first><last>Yetisgen</last><affiliation>University of Washington</affiliation></author>
       <author><first>Fei</first><last>Xia</last><affiliation>University of Washington, Seattle</affiliation></author>
       <pages>5235-5256</pages>
       <abstract>Large language models (LLMs) have demonstrated great performance across various benchmarks, showing potential as general-purpose task solvers. However, as LLMs are typically trained on vast amounts of data, a significant concern in their evaluation is data contamination, where overlap between training data and evaluation datasets inflates performance assessments. Multiple approaches have been developed to identify data contamination. These approaches rely on specific assumptions that may not hold universally across different settings. To bridge this gap, we systematically review 50 papers on data contamination detection, categorize the underlying assumptions, and assess whether they have been rigorously validated. We identify and analyze eight categories of assumptions and test three of them as case studies. Our case studies focus on detecting direct, instance-level data contamination, which is also referred to as Membership Inference Attacks (MIA). Our analysis reveals that MIA approaches based on these three assumptions can have similar performance to random guessing, on datasets used in LLM pretraining, suggesting that current LLMs might learn data distributions rather than memorizing individual instances. Meanwhile, MIA can easily fail when there are data distribution shifts between the seen and unseen instances.</abstract>
@@ -3983,7 +3983,7 @@
       <author><first>Pranav</first><last>Shetty</last><affiliation>J.P. Morgan Chase</affiliation></author>
       <author><first>Zhao</first><last>Jin</last></author>
       <author><first>Sameena</first><last>Shah</last><affiliation>J.P. Morgan Chase</affiliation></author>
-      <author><first>Carolyn</first><last>Rose</last><affiliation>School of Computer Science, Carnegie Mellon University</affiliation></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rose</last><affiliation>School of Computer Science, Carnegie Mellon University</affiliation></author>
       <pages>5326-5346</pages>
       <abstract>Document Visual Question Answering (VQA) models have evolved at an impressive rate over the past few years, coming close to or matching human performance on some benchmarks. We argue that common evaluation metrics used by popular benchmarks do not account for the semantic and multimodal groundedness of a model’s outputs. As a result, hallucinations and major semantic errors are treated the same way as well-grounded outputs, and the evaluation scores do not reflect the reasoning capabilities of the model. In response, we propose a new evaluation methodology that accounts for the groundedness of predictions with regard to the semantic characteristics of the output as well as the multimodal placement of the output within the input document. Our proposed methodology is parameterized in such a way that users can configure the score according to their preferences. We validate our scoring methodology using human judgment and show its potential impact on existing popular leaderboards. Through extensive analyses, we demonstrate that our proposed method produces scores that are a better indicator of a model’s robustness and tends to give higher rewards to better-calibrated answers.</abstract>
       <url hash="f6d41bf5">2025.findings-naacl.295</url>
@@ -4092,7 +4092,7 @@
       <author><first>Ning</first><last>Cheng</last></author>
       <author><first>Zihe</first><last>Liu</last></author>
       <author><first>Yufeng</first><last>Chen</last></author>
-      <author><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
       <author><first>Jian</first><last>Liu</last><affiliation>University of Science and Technology Beijing</affiliation></author>
       <pages>5484-5498</pages>
       <abstract>In recent years, large language models (LLMs) have made significant progress in knowledge-intensive applications. However, when adapting them to specific domains, we may encounter a multi-stage continuous learning scenario, especially in cases where domain knowledge evolves rapidly.This issue severely limits traditional fine-tuning approaches for LLMs.To overcome this limitation, we propose a new learning paradigm designed specifically for multi-stage continuous learning. This paradigm includes a preference-based learning bias to identify potential knowledge conflicts, as well as a self-distillation-based data augmentation strategy to expand and enrich the training corpus, thereby improving the integration of knowledge-compatible information.In the experiments, we show that our proposed method achieves a significant improvement in accuracy after 7 stages of fine-tuning compared to previous methods, while also demonstrating excellent performance in preserving general knowledge.We have released our code and dataset at Multi-Stage-Learning.</abstract>
@@ -4141,7 +4141,7 @@
       <author><first>Marco</first><last>Dinarelli</last><affiliation>CNRS</affiliation></author>
       <author><first>Raheel</first><last>Qader</last><affiliation>Lingua Custodia</affiliation></author>
       <author><first>Emmanuelle</first><last>Esperança-Rodier</last><affiliation>University of Grenoble-Alpes</affiliation></author>
-      <author><first>Hervé</first><last>Blanchon</last><affiliation>Université Grenoble Alpes</affiliation></author>
+      <author id="herve-blanchon"><first>Hervé</first><last>Blanchon</last><affiliation>Université Grenoble Alpes</affiliation></author>
       <pages>5544-5556</pages>
       <abstract>Despite the strong research interest in document-level Machine Translation (MT), the test-sets dedicated to this task are still scarce. The existing test-sets mainly cover topics from the general domain and fall short on specialised domains, such as legal and financial. Also, despite their document-level aspect, they still follow a sentence-level logic that doesn’t allow for including certain linguistic phenomena such as information reorganisation. In this work, we aim to fill this gap by proposing a novel test-set : DOLFIN. The dataset is built from specialised financial documents and it makes a step towards true document-level MT by abandoning the paradigm of perfectly aligned sentences, presenting data in units of sections rather than sentences. The test-set consists of an average of 1950 aligned sections for five language pairs. We present the detailed data collection pipeline that can serve as inspiration for aligning new document-level datasets. We demonstrate the usefulness and the quality of this test-set with the evaluation of a series of models. Our results show that the test-set is able to discriminate between context-sensitive and context-agnostic models and shows the weaknesses when models fail to accurately translate financial texts. The test-set will be made public for the community.</abstract>
       <url hash="96178cc0">2025.findings-naacl.307</url>
@@ -4185,9 +4185,9 @@
       <author><first>Dandan</first><last>Pang</last><affiliation>BFH - Bern University of Applied Sciences</affiliation></author>
       <author><first>Stuti</first><last>Thapa</last><affiliation>University of Tulsa</affiliation></author>
       <author><first>Garrick</first><last>Sherman</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <author><first>Louis</first><last>Tay</last><affiliation>Purdue University</affiliation></author>
-      <author><first>Sharath Chandra</first><last>Guntuku</last><affiliation>University of Pennsylvania</affiliation></author>
+      <author id="sharath-chandra-guntuku"><first>Sharath Chandra</first><last>Guntuku</last><affiliation>University of Pennsylvania</affiliation></author>
       <pages>5586-5600</pages>
       <abstract>While affective expressions on social media have been extensively studied, most research has focused on the Western context. This paper explores cultural differences in affective expressions by comparing valence and arousal on Twitter/X (geolocated to the US) and Sina Weibo (in Mainland China). Using the NRC-VAD lexicon to measure valence and arousal, we identify distinct patterns of emotional expression across both platforms. Our analysis reveals a functional representation between valence and arousal, showing a negative offset in contrast to traditional lab-based findings which suggest a positive offset. Furthermore, we uncover significant cross-cultural differences in arousal, with US users displaying higher emotional intensity than Chinese users, regardless of the valence of the content. Finally, we conduct a comprehensive language analysis correlating n-grams and LDA topics with affective dimensions to deepen our understanding of how language and culture shape emotional expression. These findings contribute to a more nuanced understanding of affective communication across cultural and linguistic contexts on social media.</abstract>
       <url hash="79791c93">2025.findings-naacl.310</url>
@@ -4290,7 +4290,7 @@
       <author><first>Sagnik</first><last>Mukherjee</last></author>
       <author><first>Jeonghwan</first><last>Kim</last></author>
       <author><first>Zhenhailong</first><last>Wang</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tür</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tür</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
       <author><first>Heng</first><last>Ji</last><affiliation>University of Illinois, Urbana-Champaign</affiliation></author>
       <pages>5745-5758</pages>
       <abstract>Despite seemingly performant web agents on the task-completion benchmarks, most existing methods evaluate the agents based on a presupposition: the web navigation task consists of a linear sequence of actions with an end state that marks task completion. In contrast, our work focuses on web navigation for information aggregation, wherein the agent must explore different websites to gather information for a complex query. We consider web information aggregation from two different perspectives: i) Direct API-driven Access relies on a text-only view of the Web, leveraging external tools such as Google Search API to navigate the Web and a scraper to extract website contents. (ii) Interactive Visual Access uses screenshots of the webpages and requires interaction with the browser to navigate and access information. Motivated by these diverse information access settings, we introduce Infogent, a novel modular framework for web information aggregation involving three distinct components: Navigator, Extractor, and Aggregator. Experiments on different information access settings demonstrate that Infogent beats an existing SOTA multi-agent search framework by 7% under Direct API-Driven Access on FRAMES and improves over an existing information-seeking web agent by 4.3% under Interactive Visual Access on AssistantBench.</abstract>
@@ -4431,7 +4431,7 @@
       <author><first>Sonam</first><last>Gupta</last><affiliation>Indian Institute of Technology, Madras</affiliation></author>
       <author><first>Gaurav</first><last>Pandey</last><affiliation>International Business Machines</affiliation></author>
       <author><first>Dinesh</first><last>Raghu</last><affiliation>IBM Research - New Delhi</affiliation></author>
-      <author><first>Sachindra</first><last>Joshi</last></author>
+      <author id="sachindra-joshi"><first>Sachindra</first><last>Joshi</last></author>
       <pages>5922-5943</pages>
       <abstract>Retrieval-Augmented Generation (RAG) has emerged as a prominent method for incorporating domain knowledge into Large Language Models (LLMs). While RAG enhances response relevance by incorporating retrieved domain knowledge in the context, retrieval errors can still lead to hallucinations and incorrect answers. To recover from retriever failures, domain knowledge is injected by fine-tuning the model to generate the correct response, even in the case of retrieval errors. However, we observe that without systematic knowledge augmentation, fine-tuned LLMs may memorize new information but still fail to extract relevant domain knowledge, leading to poor performance. In this work, we present a novel framework that significantly enhances the fine-tuning process by augmenting the training data in two ways – context augmentation and knowledge paraphrasing. In context augmentation, we create multiple training samples for a given QA pair by varying the relevance of the retrieved information, teaching the model when to ignore and when to rely on retrieved content. In knowledge paraphrasing, we finetune with multiple answers to the same question, enabling LLMs to better internalize specialized knowledge. To mitigate catastrophic forgetting due to fine-tuning, we add a domain-specific identifier to a question and also utilize a replay buffer containing general QA pairs. Experimental results demonstrate the efficacy of our method over existing techniques, achieving up to 10% relative gain in token-level recall while preserving the LLM’s generalization capabilities.</abstract>
       <url hash="e7572056">2025.findings-naacl.329</url>
@@ -4444,7 +4444,7 @@
       <author><first>Jaehyung</first><last>Seo</last></author>
       <author><first>Seungyoon</first><last>Lee</last><affiliation>Korea University</affiliation></author>
       <author><first>Chanjun</first><last>Park</last><affiliation>Korea University</affiliation></author>
-      <author><first>Heuiseok</first><last>Lim</last></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last></author>
       <pages>5944-5964</pages>
       <abstract>Through numerous endeavors, large language models (LLMs) have witnessed significant advancements in their instruction-following capability. However, we discern that LLMs are prone to generate responses to instruction-formatted statements in an instinctive manner, rather than comprehending the underlying user intention reside within the given instructions. We also recognize that the significance of instruction understanding capability is largely overlooked in most of LLM evaluation benchmarks. To ensure more comprehensive evaluation on the instruction understanding capability of LLM, we propose Intention of Instruction (IntInst) benchmark, which primary objective is to distinguish the appropriate instruction that accurately instruct to generate a given context. IntInst presents four instruction candidates and requires LLMs to select one among them. Through extensive experiments with several instruction-tuned LLMs, we reveal that most LLMs struggle to grasp the actual intention concealed in the instruction and thoroughly analyze the factors influencing instruction understanding.</abstract>
       <url hash="723b7c23">2025.findings-naacl.330</url>
@@ -4503,7 +4503,7 @@
       <author><first>Xiaochen</first><last>Wang</last></author>
       <author><first>Junqing</first><last>He</last><affiliation>International Digital Econemy Academy</affiliation></author>
       <author><first>Liang</first><last>Chen</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last><affiliation>Monash University, Monash University and Monash University</affiliation></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last><affiliation>Monash University, Monash University and Monash University</affiliation></author>
       <author><first>Yiru</first><last>Wang</last></author>
       <author><first>Zhe</first><last>Yang</last><affiliation>Peking University</affiliation></author>
       <author><first>Xiangdi</first><last>Meng</last></author>
@@ -4558,7 +4558,7 @@
       <author><first>Yadong</first><last>Lu</last><affiliation>Microsoft</affiliation></author>
       <author><first>Michael</first><last>Santacroce</last><affiliation>Microsoft</affiliation></author>
       <author><first>Yeyun</first><last>Gong</last></author>
-      <author><first>Chao</first><last>Zhang</last><affiliation>Georgia Institute of Technology</affiliation></author>
+      <author id="chao-zhang-tu"><first>Chao</first><last>Zhang</last><affiliation>Georgia Institute of Technology</affiliation></author>
       <author><first>Yelong</first><last>Shen</last></author>
       <pages>6090-6107</pages>
       <abstract>Recent advances in large language models (LLMs) have demonstrated potential for LLM agents. To facilitate the training for these agents with both linguistic feedback and non-linguistic reward signals, we introduce Learning through Communication (LTC). We design a universal buffer to store all the feedback, and an iterative pipeline to enable an LLM agent to explore and update its policy in an given environment. To optimize agent interactions for task-specific learning with our universal buffer and pipeline, we introduce diverse communication patterns tailored for both single-agent and multi-agent environments. We evaluate the efficacy of our LTC approach on four diverse datasets: ALFWorld (single-agent), HotpotQA (multi-agent collaboration), Chameleon (multi-agent competition), and GSM8k (multi-agent teacher-student). On these data sets, LTC outperforms the supervised instruction fine-tuning baselines by 3.6% to 12%. These results highlight the versatility and efficiency of LTC in facilitating online adaptation for LLM agents.</abstract>
@@ -4583,7 +4583,7 @@
       <author><first>Wenxuan</first><last>Zhang</last><affiliation>Singapore University of Technology and Design</affiliation></author>
       <author><first>Jiahao</first><last>Ying</last></author>
       <author><first>Mahani</first><last>Aljunied</last><affiliation>Alibaba Group</affiliation></author>
-      <author><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Lidong</first><last>Bing</last><affiliation>Shanda Group and Alibaba Group</affiliation></author>
       <pages>6119-6136</pages>
       <abstract>This study introduces two novel benchmarks, SeaExam and SeaBench, designed to evaluate the capabilities of Large Language Models (LLMs) in Southeast Asian (SEA) application scenarios. Unlike existing multilingual datasets primarily derived from English translations, these benchmarks are constructed based on real-world scenarios from SEA regions. SeaExam draws from regional educational exams to form a comprehensive dataset that encompasses subjects such as local history and literature. In contrast, SeaBench is crafted around multi-turn, open-ended tasks that reflect daily interactions within SEA communities. Our evaluations demonstrate that SeaExam and SeaBench more effectively discern LLM performance on SEA language tasks compared to their translated benchmarks. This highlights the importance of using real-world queries to assess the multilingual capabilities of LLMs.</abstract>
@@ -4685,7 +4685,7 @@
       <author><first>Asaf</first><last>Yehudai</last></author>
       <author><first>Dinesh</first><last>Khandelwal</last><affiliation>International Business Machines</affiliation></author>
       <author><first>Dinesh</first><last>Raghu</last><affiliation>IBM Research - New Delhi</affiliation></author>
-      <author><first>Sachindra</first><last>Joshi</last></author>
+      <author id="sachindra-joshi"><first>Sachindra</first><last>Joshi</last></author>
       <pages>6240-6249</pages>
       <abstract>Fine-tuning Large Language Models (LLMs) on specific datasets is a common practice to improve performance on target tasks. However, this performance gain often leads to overfitting, where the model becomes too specialized in either the task or the characteristics of the training data, resulting in a loss of generalization. This paper introduces Selective Self-to-Supervised Fine-Tuning (S3FT), a fine-tuning approach that achieves better performance than the standard supervised fine-tuning (SFT) while improving generalization.S3FT leverages the existence of multiple valid responses to a query.By utilizing the model’s correct responses, S3FT reduces model specialization during the fine-tuning stage. S3FT first identifies the correct model responses from the training set by deploying an appropriate judge. Then, it fine-tunes the model using the correct model responses and the gold response (or its paraphrase) for the remaining samples.The effectiveness of S3FT is demonstrated through experiments on mathematical reasoning, Python programming and reading comprehension tasks. The results show that standard SFT can lead to an average performance drop of up to 4.4 on multiple benchmarks, such as MMLU and TruthfulQA. In contrast, S3FT reduces this drop by half, i.e. 2.5, indicating better generalization capabilities than SFT while performing significantly better on the fine-tuning tasks.</abstract>
       <url hash="e601902b">2025.findings-naacl.349</url>
@@ -4730,7 +4730,7 @@
       <title>Discrete Diffusion Language Model for Efficient Text Summarization</title>
       <author><first>Do Huu</first><last>Dat</last></author>
       <author><first>Duc Anh</first><last>Do</last></author>
-      <author><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Wray</first><last>Buntine</last><affiliation>VinUniversity</affiliation></author>
       <pages>6278-6290</pages>
       <abstract>While diffusion models excel at conditionally generating high-quality images, prior works in discrete diffusion models were not evaluated on conditional long-text generation. This work addresses the limitations of prior discrete diffusion models for conditional long-text generation, particularly in the long abstractive summarization task. Despite faster decoding speeds compared to autoregressive methods, previous discrete diffusion models failed on the abstractive summarization task due to the incompatibility between the backbone architectures and the random noising process. To overcome these challenges, we introduce a novel semantic-aware noising process that enables Transformer backbones to handle long sequences effectively. Additionally, we propose CrossMamba, an adaptation of the Mamba model to the encoder-decoder paradigm, which integrates seamlessly with the random absorbing noising process. Our approaches outperform existing discrete diffusion models on three benchmark summarization datasets: Gigaword, CNN/DailyMail, and Arxiv, while also achieving much faster inference speed compared to autoregressive models.</abstract>
@@ -4900,7 +4900,7 @@
       <author><first>Siyuan</first><last>Wang</last></author>
       <author><first>Shujun</first><last>Liu</last></author>
       <author><first>Yun</first><last>Song</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <author><first>Zhongyu</first><last>Wei</last></author>
       <pages>6537-6570</pages>
       <abstract>Large Language Models (LLMs) have significantly advanced legal intelligence, but the scarcity of scenario data impedes the progress toward interactive legal scenarios. This paper introduces a Multi-agent Legal Simulation Driver (MASER) to scalably generate synthetic data by simulating interactive legal scenarios. Leveraging real-legal case sources, MASER ensures the consistency of legal attributes between participants and introduces a supervisory mechanism to align participants’ characters and behaviors as well as addressing distractions. A Multi-stage Interactive Legal Evaluation (MILE) benchmark is further constructed to evaluate LLMs’ performance in dynamic legal scenarios. Extensive experiments confirm the effectiveness of our framework.</abstract>
@@ -4994,7 +4994,7 @@
       <author><first>Aarón</first><last>Galiano-Jiménez</last><affiliation>Universidad de Alicante</affiliation></author>
       <author><first>Juan Antonio</first><last>Pérez-Ortiz</last><affiliation>Universidad de Alicante</affiliation></author>
       <author><first>Felipe</first><last>Sánchez-Martínez</last><affiliation>University of Alicante</affiliation></author>
-      <author><first>Víctor M.</first><last>Sánchez-Cartagena</last><affiliation>Universidad de Alicante</affiliation></author>
+      <author id="victor-m-sanchez-cartagena"><first>Víctor M.</first><last>Sánchez-Cartagena</last><affiliation>Universidad de Alicante</affiliation></author>
       <pages>6661-6676</pages>
       <abstract>This paper delves into sequence-level knowledge distillation (KD) of multilingual pre-trained translation models. We posit that, beyond the approximated mode obtained via beam search, the whole output distribution of the teacher contains valuable insights for students. We explore the potential of n-best lists from beam search to guide student’s learning and then investigate alternative decoding methods to address observed issues like low variability and under-representation of infrequent tokens. Our research in data-limited scenarios reveals that although sampling methods can slightly compromise the translation quality of the teacher output compared to beam search based methods, they enrich the generated corpora with increased variability and lexical richness, ultimately enhancing student model performance and reducing the gender bias amplification commonly associated with KD.</abstract>
       <url hash="df7cc2fc">2025.findings-naacl.372</url>
@@ -5053,7 +5053,7 @@
       <title>Aligning Black-box Language Models with Human Judgments</title>
       <author><first>Gerrit J.j.</first><last>Van Den Burg</last><affiliation>Amazon</affiliation></author>
       <author><first>Gen</first><last>Suzuki</last><affiliation>Amazon</affiliation></author>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last><affiliation>Amazon</affiliation></author>
+      <author><first>Wei</first><last>Liu</last><affiliation>Amazon</affiliation></author>
       <author><first>Murat</first><last>Sensoy</last></author>
       <pages>6737-6749</pages>
       <abstract>Large language models (LLMs) are increasingly used as automated judges to evaluate recommendation systems, search engines, and other subjective tasks, where relying on human evaluators can be costly, time-consuming, and unscalable. LLMs offer an efficient solution for continuous, automated evaluation. However, since the systems that are built and improved with these judgments are ultimately designed for human use, it is crucial that LLM judgments align closely with human evaluators to ensure such systems remain human-centered. On the other hand, aligning LLM judgments with human evaluators is challenging due to individual variability and biases in human judgments. We propose a simple yet effective framework to align LLM judgments with individual human evaluators or their aggregated judgments, without retraining or fine-tuning the LLM. Our approach learns a linear mapping between the LLM’s outputs and human judgments, achieving over 142% average improvement in agreement across 29 tasks with only a small number of calibration examples used for training. Notably, our method works in zero-shot and few-shot settings, exceeds inter-human agreement on four out of six tasks, and enables smaller LLMs to achieve performance comparable to that of larger models.</abstract>
@@ -5212,7 +5212,7 @@
       <author><first>Svetlana</first><last>Kiritchenko</last><affiliation>National Research Council Canada</affiliation></author>
       <author><first>Muhammad Hammad Fahim</first><last>Siddiqui</last></author>
       <author><first>Isar</first><last>Nejadgholi</last><affiliation>National Research Council Canada and University of Ottawa</affiliation></author>
-      <author><first>Kathleen C.</first><last>Fraser</last><affiliation>National Research Council Canada</affiliation></author>
+      <author id="kathleen-c-fraser"><first>Kathleen C.</first><last>Fraser</last><affiliation>National Research Council Canada</affiliation></author>
       <pages>6995-7016</pages>
       <abstract>Eradicating poverty is the first goal in the U.N. Sustainable Development Goals. However, aporophobia – the societal bias against people living in poverty – constitutes a major obstacle to designing, approving and implementing poverty-mitigation policies. This work presents an initial step towards operationalizing the concept of aporophobia to identify and track harmful beliefs and discriminative actions against poor people on social media. In close collaboration with non-profits and governmental organizations, we conduct data collection and exploration. Then we manually annotate a corpus of English tweets from five world regions for the presence of (1) direct expressions of aporophobia, and (2) statements referring to or criticizing aporophobic views or actions of others, to comprehensively characterize the social media discourse related to bias and discrimination against the poor. Based on the annotated data, we devise a taxonomy of categories of aporophobic attitudes and actions expressed through speech on social media. Finally, we train several classifiers and identify the main challenges for automatic detection of aporophobia in social networks. This work paves the way towards identifying, tracking, and mitigating aporophobic views on social media at scale.</abstract>
       <url hash="d88107e0">2025.findings-naacl.388</url>
@@ -5293,7 +5293,7 @@
     </paper>
     <paper id="394">
       <title>Augmented Adversarial Trigger Learning</title>
-      <author><first>Zhe</first><last>Wang</last><affiliation>Amazon</affiliation></author>
+      <author id="daisy-zhe-wang"><first>Zhe</first><last>Wang</last><affiliation>Amazon</affiliation></author>
       <author><first>Yanjun</first><last>Qi</last><affiliation>Amazon and University of Virginia</affiliation></author>
       <pages>7068-7100</pages>
       <abstract>Gradient optimization-based adversarial attack methods automate the learning of adversarial triggers to generate jailbreak prompts or leak system prompts. In this work, we take a closer look at the optimization objective of adversarial trigger learning and propose ATLA: Adversarial Trigger Learning with Augmented objectives. ATLA improves the negative log-likelihood loss used by previous studies into a weighted loss formulation that encourages the learned adversarial triggers to optimize more towards response format tokens. This enables ATLA to learn an adversarial trigger from just one query-response pair and the learned trigger generalizes well to other similar queries. We further design a variation to augment trigger optimization with an auxiliary loss that suppresses evasive responses. We showcase how to use ATLA to learn adversarial suffixes jailbreaking LLMs and to extract hidden system prompts. Empirically we demonstrate that ATLA consistently outperforms current state-of-the-art techniques, achieving nearly 100% success in attacking while requiring 80% fewer queries. ATLA learned jailbreak suffixes demonstrate high generalization to unseen queries and transfer well to new LLMs.</abstract>
@@ -5347,7 +5347,7 @@
       <author><first>Nicolas</first><last>Hiebel</last><affiliation>Université Paris-Saclay</affiliation></author>
       <author><first>Olivier</first><last>Ferret</last><affiliation>CEA</affiliation></author>
       <author><first>Karën</first><last>Fort</last><affiliation>University of Lorraine</affiliation></author>
-      <author><first>Aurélie</first><last>Névéol</last><affiliation>LISN-CNRS / Université Paris Saclay</affiliation></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last><affiliation>LISN-CNRS / Université Paris Saclay</affiliation></author>
       <pages>7145-7159</pages>
       <abstract>Healthcare professionals are increasingly including Language Models (LMs) in clinical practice. However, LMs have been shown to exhibit and amplify stereotypical biases that can cause life-threatening harm in a medical context. This study aims to evaluate gender biases in automatically generated clinical cases in French, on ten disorders. Using seven LMs fine-tuned for clinical case generation and an automatic linguistic gender detection tool, we measure the associations between disorders and gender. We unveil that LMs over-generate cases describing male patients, creating synthetic corpora that are not consistent with documented prevalence for these disorders. For instance, when prompts do not specify a gender, LMs generate eight times more clinical cases describing male (vs. female patients) for heart attack. We discuss the ideal synthetic clinical case corpus and establish that explicitly mentioning demographic information in generation instructions appears to be the fairest strategy. In conclusion, we argue that the presence of gender biases in synthetic text raises concerns about LM-induced harm, especially for women and transgender people.</abstract>
       <url hash="641d230e">2025.findings-naacl.398</url>
@@ -5493,7 +5493,7 @@
       <author><first>Guangliang</first><last>Liu</last><affiliation>Michigan State University</affiliation></author>
       <author><first>Yao</first><last>Ma</last><affiliation>Rensselaer Polytechnic Institute</affiliation></author>
       <author><first>Rongrong</first><last>Wang</last><affiliation>Michigan State University</affiliation></author>
-      <author><first>Kristen</first><last>Johnson</last><affiliation>Michigan State University</affiliation></author>
+      <author id="kristen-johnson"><first>Kristen</first><last>Johnson</last><affiliation>Michigan State University</affiliation></author>
       <author><first>Jiliang</first><last>Tang</last><affiliation>Michigan State University</affiliation></author>
       <pages>7302-7323</pages>
       <abstract>In-Context Learning (ICL) empowers Large Language Models (LLMs) with the ability to learn from a few examples provided in the prompt, enabling downstream generalization without the requirement for gradient updates. Despite encouragingly empirical success, the underlying mechanism of ICL remains unclear. Existing research remains ambiguous with various viewpoints, utilizing intuition-driven and ad-hoc technical solutions to interpret ICL. In this paper, we leverage a data generation perspective to reinterpret recent efforts from a systematic angle, demonstrating the potential broader usage of these popular technical solutions. For a conceptual definition, we rigorously adopt the terms of skill recognition and skill learning. Skill recognition selects one learned data generation function previously seen during pre-training while skill learning can learn new data generation functions from in-context data. Furthermore, we provide insights into the strengths and weaknesses of both abilities, emphasizing their commonalities through the perspective of data generation. This analysis suggests potential directions for future research. The corresponding paper list can be found here.</abstract>
@@ -5506,7 +5506,7 @@
       <author><first>Youngwon</first><last>Lee</last><affiliation>Seoul National University</affiliation></author>
       <author><first>Seung-won</first><last>Hwang</last><affiliation>Seoul National University</affiliation></author>
       <author><first>Daniel F</first><last>Campos</last><affiliation>Snowflake</affiliation></author>
-      <author><first>Filip</first><last>Graliński</last><affiliation>Snowflake and Adam Mickiewicz University</affiliation></author>
+      <author id="filip-gralinski"><first>Filip</first><last>Graliński</last><affiliation>Snowflake and Adam Mickiewicz University</affiliation></author>
       <author><first>Zhewei</first><last>Yao</last><affiliation>Snowflake</affiliation></author>
       <author><first>Yuxiong</first><last>He</last><affiliation>Microsoft</affiliation></author>
       <pages>7324-7339</pages>
@@ -5521,7 +5521,7 @@
       <author><first>Aman</first><last>Dalmia</last></author>
       <author><first>Mehran</first><last>Kazemi</last><affiliation>Google</affiliation></author>
       <author><first>Amal</first><last>Zouaq</last><affiliation>Polytechnique Montreal</affiliation></author>
-      <author><first>Christopher</first><last>Pal</last><affiliation>Polytechnique Montreal</affiliation></author>
+      <author id="christopher-pal"><first>Christopher</first><last>Pal</last><affiliation>Polytechnique Montreal</affiliation></author>
       <pages>7340-7356</pages>
       <abstract>Geometry problem-solving demands advanced reasoning abilities to process multimodal inputs and employ mathematical knowledge effectively. Vision-language models (VLMs) have made significant progress in various multimodal tasks. Yet, they still struggle with geometry problems and are significantly limited by their inability to perform mathematical operations not seen during pre-training, such as calculating the cosine of an arbitrary angle, and by difficulties in correctly applying relevant geometry formulas. To overcome these challenges, we present GeoCoder, which leverages modular code-finetuning to generate and execute code using a predefined geometry function library. By executing the code, we achieve accurate and deterministic calculations, contrasting the stochastic nature of autoregressive token prediction, while the function library minimizes errors in formula usage. We also propose a multimodal retrieval-augmented variant of GeoCoder, named RAG-GeoCoder, which incorporates a non-parametric memory module for retrieving functions from the geometry library, thereby reducing reliance on parametric memory. Our modular code-finetuning approach enhances the geometric reasoning capabilities of VLMs, yielding an average improvement of over 16% across various question complexities on the GeomVerse dataset compared to other fine-tuning methods.</abstract>
       <url hash="5f2fda72">2025.findings-naacl.410</url>
@@ -5616,7 +5616,7 @@
     </paper>
     <paper id="418">
       <title>Do Large Language Models Align with Core Mental Health Counseling Competencies?</title>
-      <author><first>Viet Cuong</first><last>Nguyen</last></author>
+      <author id="viet-cuong-nguyen"><first>Viet Cuong</first><last>Nguyen</last></author>
       <author><first>Mohammad</first><last>Taher</last></author>
       <author><first>Dongwan</first><last>Hong</last></author>
       <author><first>Vinicius Konkolics</first><last>Possobom</last></author>
@@ -5734,7 +5734,7 @@
       <author><first>Pranav</first><last>Chitale</last></author>
       <author><first>Khushboo</first><last>Singh</last></author>
       <author><first>Niranjan</first><last>Balasubramanian</last></author>
-      <author><first>H. Andrew</first><last>Schwartz</last></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last></author>
       <pages>7658-7667</pages>
       <abstract>Like most of NLP, models for human-centered NLP tasks—tasks attempting to assess author-level information—predominantly use rep-resentations derived from hidden states of Transformer-based LLMs. However, what component of the LM is used for the representation varies widely. Moreover, there is a need for Human Language Models (HuLMs) that implicitly model the author and provide a user-level hidden state. Here, we systematically evaluate different ways of representing documents and users using different LM and HuLM architectures to predict task outcomes as both dynamically changing states and averaged trait-like user-level attributes of valence, arousal, empathy, and distress. We find that representing documents as an average of the token hidden states performs the best generally. Further, while a user-level hidden state itself is rarely the best representation, we find its inclusion in the model strengthens token or document embeddings used to derive document- and user-level representations resulting in best performances.</abstract>
       <url hash="df014c04">2025.findings-naacl.426</url>
@@ -5808,7 +5808,7 @@
       <author><first>Jotsna</first><last>Gowda</last></author>
       <author><first>Pryce</first><last>Houck</last></author>
       <author><first>Kevin</first><last>Tang</last><affiliation>Heinrich Heine University Düsseldorf and University of Florida</affiliation></author>
-      <author><first>Sarah</first><last>Moeller</last><affiliation>University of Florida</affiliation></author>
+      <author id="sarah-moeller"><first>Sarah</first><last>Moeller</last><affiliation>University of Florida</affiliation></author>
       <pages>7744-7756</pages>
       <abstract>African American English (AAE) presents unique challenges in natural language processing (NLP) This research systematically compares the performance of available NLP models—rule-based, transformer-based, and large language models (LLMs)—capable of identifying key grammatical features of AAE, namely Habitual Be and Multiple Negation. These features were selected for their distinct grammatical complexity and frequency of occurrence. The evaluation involved sentence-level binary classification tasks, using both zero-shot and few-shot strategies. The analysis reveals that while LLMs show promise compared to the baseline, they are influenced by biases such as recency and unrelated features in the text such as formality. This study highlights the necessity for improved model training and architectural adjustments to better accommodate AAE’s unique linguistic characteristics. Data and code are available.</abstract>
       <url hash="32ec130e">2025.findings-naacl.431</url>
@@ -5974,7 +5974,7 @@
       <title><fixed-case>UCL</fixed-case>-Bench: A <fixed-case>C</fixed-case>hinese User-Centric Legal Benchmark for Large Language Models</title>
       <author><first>Ruoli</first><last>Gan</last></author>
       <author><first>Duanyu</first><last>Feng</last></author>
-      <author id="chen-zhang"><first>Chen</first><last>Zhang</last><affiliation>National University of Singapore</affiliation></author>
+      <author><first>Chen</first><last>Zhang</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Zhihang</first><last>Lin</last><affiliation>Westlake Scietrain</affiliation></author>
       <author><first>Haochen</first><last>Jia</last></author>
       <author><first>Hao</first><last>Wang</last><affiliation>Sichuan University</affiliation></author>
@@ -6103,7 +6103,7 @@
       <author><first>Xiang</first><last>Yue</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Tuney</first><last>Zheng</last></author>
       <author><first>Jie</first><last>Huang</last><affiliation>xAI</affiliation></author>
-      <author><first>Bill Yuchen</first><last>Lin</last><affiliation>xAI and University of Washington</affiliation></author>
+      <author id="bill-yuchen-lin"><first>Bill Yuchen</first><last>Lin</last><affiliation>xAI and University of Washington</affiliation></author>
       <pages>8118-8131</pages>
       <abstract>We introduce SimulBench, a benchmark designed to evaluate large language models (LLMs) across a diverse collection of creative simulation tasks, such as acting as a Linux terminal or playing text games with users. While these simulation tasks serve as effective measures of an LLM’s general intelligence, they are seldom incorporated into existing benchmarks. A major challenge is to develop an evaluation framework for testing different LLMs fairly while preserving the multi-round interactive nature of simulation tasks between users and AI. To tackle this issue, we suggest using a fixed LLM as a user agent to engage with an LLM to collect dialogues first under different tasks. Then, challenging dialogue scripts are extracted for evaluating different target LLMs. To facilitate automatic assessment on SimulBench, GPT-4 is employed as the evaluator, tasked with reviewing the quality of the final response generated by the target LLMs given multi-turn dialogue scripts. Our comprehensive experiments indicate that these creative simulation tasks continue to pose a significant challenge with their unique natures and show the gap between proprietary models and the most advanced open LLMs. For example, GPT-4-turbo outperforms LLaMA-3-70b-Chat on 18.55% more cases.</abstract>
       <url hash="474724f7">2025.findings-naacl.453</url>
@@ -6302,7 +6302,7 @@
       <author><first>Rahmad</first><last>Mahendra</last><affiliation>Royal Melbourne Institute of Technology and Universitas Indonesia</affiliation></author>
       <author><first>Damiano</first><last>Spina</last><affiliation>Royal Melbourne Institute of Technology</affiliation></author>
       <author><first>Lawrence</first><last>Cavedon</last><affiliation>Royal Melbourne Institute of Technology</affiliation></author>
-      <author><first>Karin</first><last>Verspoor</last><affiliation>Royal Melbourne Institute of Technology</affiliation></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last><affiliation>Royal Melbourne Institute of Technology</affiliation></author>
       <pages>8336-8361</pages>
       <abstract>While recent advancements in large language models (LLMs) have enhanced their capabilities to solve mathematical problems, other aspects of numeracy remain underexplored. In this paper, we propose a benchmark to evaluate the ability of language models to perform basic numeracy tasks. We frame numeracy as a Natural Language Inference (NLI) task to assess the models’ ability to understand both numbers and language contexts. We evaluate 49 language models (LMs), including fine-tuned LMs on NLI datasets, instruction-tuned LLMs, and specialized math-LLMs. Our findings reveal three main insights: (1) LLMs only clearly outperform smaller LMs in arithmetic tasks, indicating that mathematical reasoning cannot be generalized to other numeracy skills such as number comparison and normalization; (2) while most language models achieve fair to good accuracy for NLI entailment cases, they still struggle to predict contradiction and neutral cases; and (3) the robustness of language models’ numeracy capabilities needs improvement, particularly in understanding the semantics and pragmatics of numbers in linguistic contexts.</abstract>
       <url hash="0feefebb">2025.findings-naacl.467</url>
@@ -6313,7 +6313,7 @@
       <title>Are Language Models Agnostic to Linguistically Grounded Perturbations? A Case Study of <fixed-case>I</fixed-case>ndic Languages</title>
       <author><first>Poulami</first><last>Ghosh</last><affiliation>Indian Institute of Technology, Bombay</affiliation></author>
       <author><first>Raj</first><last>Dabre</last><affiliation>Department of Computer Science, Indian Institute of Technology, Madras, Indian Institute of Technology, Madras and National Institute of Information and Communications Technology (NICT), National Institute of Advanced Industrial Science and Technology</affiliation></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
       <pages>8362-8396</pages>
       <abstract>Pre-trained language models (PLMs) are known to be susceptible to perturbations to the input text, but existing works do not explicitly focus on linguistically grounded attacks, which are subtle and more prevalent in nature. In this paper, we study whether PLMs are agnostic to linguistically grounded attacks or not. To this end, we offer the first study addressing this, investigating different Indic languages and various downstream tasks. Our findings reveal that although PLMs are susceptible to linguistic perturbations, when compared to non-linguistic attacks, PLMs exhibit a slightly lower susceptibility to linguistic attacks. This highlights that even constrained attacks are effective. Moreover, we investigate the implications of these outcomes across a range of languages, encompassing diverse language families and different scripts.</abstract>
       <url hash="0dc09779">2025.findings-naacl.468</url>
@@ -6485,7 +6485,7 @@
       <author><first>Dongshuo</first><last>Liu</last></author>
       <author><first>Zhijing</first><last>Wu</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <author><first>Dandan</first><last>Song</last><affiliation>Beijing Institute of Technology</affiliation></author>
-      <author><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <pages>103-123</pages>
       <abstract>Multi-session personalized dialogue generation is one of the most important topics in open-domain dialogue. It aims to generate responses consistent with the dialogue history and personality information across multiple sessions to engage users’ interest in the dialogue. Recent approaches focusing on history modeling and persona modeling have advanced the development of this field. However, they overlook the importance of dialogue structure in helping large language models (LLMs) understand the dialogue context. Moreover, these methods do not efficiently expand and utilize personality information, reducing the responses’ consistency. In this paper, we propose a Persona-Aware LLM-enAnCEd(PALACE) framework for multi-session personalized dialogue generation. Specifically, the framework consists of three components: a topic-aware memory bank, a persona prompt learning module, and VAE-LoRA. The topic-aware memory bank works by retrieving historical information that possesses a certain dialogue structure and relevant topics. The persona prompt learning module enhances the LLM’s persona-aware capabilities by utilizing a persona commonsense knowledge graph and a query-driven graph neural network. Furthermore, to enhance the generative capabilities of the LLM and obtain more useful prior knowledge, we combine VAE with LoRA to propose VAE-LoRA. Experimental results on the MSC and DuLeMon dataset demonstrate that our framework outperforms the state-of-the-art methods in automatic and human evaluation metrics.</abstract>
       <url hash="57ac7eda">2025.findings-acl.5</url>
@@ -6508,7 +6508,7 @@
       <title><fixed-case>B</fixed-case>ayes<fixed-case>KD</fixed-case>: <fixed-case>B</fixed-case>ayesian Knowledge Distillation for Compact <fixed-case>LLM</fixed-case>s in Constrained Fine-tuning Scenarios</title>
       <author><first>Wei</first><last>Li</last><affiliation>University of Birmingham</affiliation></author>
       <author><first>Lujun</first><last>Li</last></author>
-      <author><first>Mark G.</first><last>Lee</last></author>
+      <author id="mark-lee"><first>Mark G.</first><last>Lee</last></author>
       <author><first>Shengjie</first><last>Sun</last></author>
       <author><first>Lei</first><last>Zhang</last><affiliation>University of Exeter</affiliation></author>
       <author><first>Wei</first><last>Xue</last><affiliation>Hong Kong University of Science and Technology</affiliation></author>
@@ -6597,7 +6597,7 @@
     <paper id="14">
       <title>MFinMeeting: A Multilingual, Multi-Sector, and Multi-Task Financial Meeting Understanding Evaluation Dataset</title>
       <author><first>Jie</first><last>Zhu</last><affiliation>Alibaba Group</affiliation></author>
-      <author><first>Junhui</first><last>Li</last><affiliation>Soochow University, China</affiliation></author>
+      <author id="junhui-li"><first>Junhui</first><last>Li</last><affiliation>Soochow University, China</affiliation></author>
       <author><first>Yalong</first><last>Wen</last></author>
       <author><first>Xiandong</first><last>Li</last><affiliation>nanjing university</affiliation></author>
       <author><first>Lifan</first><last>Guo</last></author>
@@ -6652,7 +6652,7 @@
       <author><first>Nicolas</first><last>Stefanovitch</last><affiliation>European Commission</affiliation></author>
       <author><first>Giovanni</first><last>Da San Martino</last><affiliation>University of Padua</affiliation></author>
       <author><first>Jakub</first><last>Piskorski</last></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <pages>302-326</pages>
       <abstract>We introduce a novel multilingual and hierarchical corpus annotated for entity framing and role portrayal in news articles. The dataset uses a unique taxonomy inspired by storytelling elements, comprising 22 fine-grained roles, or archetypes, nested within three main categories: protagonist, antagonist, and innocent. Each archetype is carefully defined, capturing nuanced portrayals of entities such as guardian, martyr, and underdog for protagonists; tyrant, deceiver, and bigot for antagonists; and victim, scapegoat, and exploited for innocents. The dataset includes 1,378 recent news articles in five languages (Bulgarian, English, Hindi, European Portuguese, and Russian) focusing on two critical domains of global significance: the Ukraine-Russia War and Climate Change. Over 5,800 entity mentions have been annotated with role labels. This dataset serves as a valuable resource for research into role portrayal and has broader implications for news analysis. We describe the characteristics of the dataset and the annotation process, and we report evaluation results on fine-tuned state-of-the-art multilingual transformers and hierarchical zero-shot learning using LLMs at the level of a document, a paragraph, and a sentence.</abstract>
       <url hash="0bdbd21f">2025.findings-acl.17</url>
@@ -6744,7 +6744,7 @@
     <paper id="25">
       <title>Verifying the Steps of Deductive Reasoning Chains</title>
       <author><first>Zacchary</first><last>Sadeddine</last></author>
-      <author><first>Fabian M.</first><last>Suchanek</last><affiliation>Telecom Paris</affiliation></author>
+      <author id="fabian-suchanek"><first>Fabian M.</first><last>Suchanek</last><affiliation>Telecom Paris</affiliation></author>
       <pages>456-475</pages>
       <abstract>As Large Language Models penetrate everyday life more and more, it becomes essential to measure the correctness of their output. Inthis paper, we propose a novel task: the automatic verification of individual reasoning steps in a logical deductive Chain-of-Thought. Thistask addresses two well-known problems of LLMs, hallucination and incorrect reasoning. We propose a new dataset of logical reasoningchains, in which the individual deduction steps have been manually annotated for soundness, and benchmark several methods on it. We findthat LLMs can detect unsound reasoning steps fairly well, but argue that verification has to be performed by transparent methods instead.We test symbolic methods, but find that they under-perform. We develop a neuro-symbolic baseline called VANESSA that comes closer to the performance of LLMs.</abstract>
       <url hash="3c49e06a">2025.findings-acl.25</url>
@@ -6822,7 +6822,7 @@
       <title><fixed-case>CRPO</fixed-case>: Confidence-Reward Driven Preference Optimization for Machine Translation</title>
       <author><first>Guofeng</first><last>Cui</last></author>
       <author><first>Pichao</first><last>Wang</last><affiliation>Amazon</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>Amazon</affiliation></author>
+      <author><first>Yang</first><last>Liu</last><affiliation>Amazon</affiliation></author>
       <author><first>Zemian</first><last>Ke</last></author>
       <author><first>Zhu</first><last>Liu</last><affiliation>Amazon Prime Video</affiliation></author>
       <author><first>Vimal</first><last>Bhat</last><affiliation>Amazon</affiliation></author>
@@ -6851,7 +6851,7 @@
       <title><fixed-case>F</fixed-case>lash<fixed-case>B</fixed-case>ack: Efficient Retrieval-Augmented Language Modeling for Fast Inference</title>
       <author><first>Runheng</first><last>Liu</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <author><first>Xingchen</first><last>Xiao</last><affiliation>Beijing Institute of Technology</affiliation></author>
-      <author><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <author><first>Zewen</first><last>Chi</last><affiliation>Microsoft Research</affiliation></author>
       <author><first>Zhijing</first><last>Wu</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <pages>595-608</pages>
@@ -7003,9 +7003,9 @@
       <title><fixed-case>C</fixed-case>oin<fixed-case>M</fixed-case>ath: Harnessing the Power of Coding Instruction for Math <fixed-case>LLM</fixed-case></title>
       <author><first>Chengwei</first><last>Wei</last><affiliation>, A*STAR</affiliation></author>
       <author><first>Bin</first><last>Wang</last></author>
-      <author><first>Jung-jae</first><last>Kim</last><affiliation>A*STAR</affiliation></author>
+      <author id="jung-jae-kim"><first>Jung-jae</first><last>Kim</last><affiliation>A*STAR</affiliation></author>
       <author><first>Guimei</first><last>Liu</last><affiliation>Institute for Infocomm Research, A*STAR</affiliation></author>
-      <author><first>Nancy F.</first><last>Chen</last></author>
+      <author id="nancy-chen"><first>Nancy F.</first><last>Chen</last></author>
       <pages>786-797</pages>
       <abstract>Large Language Models (LLMs) have shown strong performance in solving mathematical problems, with code-based solutions proving particularly effective. However, the best practice to leverage coding instruction data to enhance mathematical reasoning remains underexplored. This study investigates three key questions: (1) How do different coding styles of mathematical code-based rationales impact LLMs’ learning performance? (2) Can general-domain coding instructions improve performance? (3) How does integrating textual rationales with code-based ones during training enhance mathematical reasoning abilities? Our findings reveal that code-based rationales with concise comments, descriptive naming, and hardcoded solutions are beneficial, while improvements from general-domain coding instructions and textual rationales are relatively minor. Based on these insights, we propose CoinMath, a learning strategy designed to enhance mathematical reasoning by diversifying the coding styles of code-based rationales. CoinMath generates a variety of code-based rationales incorporating concise comments, descriptive naming conventions, and hardcoded solutions. Experimental results demonstrate that CoinMath significantly outperforms its baseline model, MAmmoTH, one of the SOTA math LLMs.</abstract>
       <url hash="d760543d">2025.findings-acl.44</url>
@@ -7017,7 +7017,7 @@
       <author><first>Zain Muhammad</first><last>Mujahid</last><affiliation>Copenhagen University</affiliation></author>
       <author><first>Dilshod</first><last>Azizov</last></author>
       <author><first>Maha Tufail</first><last>Agro</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <pages>798-819</pages>
       <abstract>In an age characterized by the proliferation of mis- and disinformation online, it is critical to empower readers to understand the content they are reading. Important efforts in this direction rely on manual or automatic fact-checking, which can be challenging for emerging claims with limited information. Such scenarios can be handled by assessing the reliability and the political bias of the source of the claim, i.e., characterizing entire news outlets rather than individual claims or articles. This is an important but understudied research direction. While prior work has looked into linguistic and social contexts, we do not analyze individual articles or information in social media. Instead, we propose a novel methodology that emulates the criteria that professional fact-checkers use to assess the factuality and political bias of an entire outlet. Specifically, we design a variety of prompts based on these criteria and elicit responses from large language models (LLMs), which we aggregate to make predictions. In addition to demonstrating sizable improvements over strong baselines via extensive experiments with multiple LLMs, we provide an in-depth error analysis of the effect of media popularity and region on model performance. Further, we conduct an ablation study to highlight the key components of our dataset that contribute to these improvements. To facilitate future research, we released our dataset and code.</abstract>
       <url hash="76fa1de3">2025.findings-acl.45</url>
@@ -7026,7 +7026,7 @@
     </paper>
     <paper id="46">
       <title>Structured Discourse Representation for Factual Consistency Verification</title>
-      <author id="kun-zhang"><first>Kun</first><last>Zhang</last></author>
+      <author><first>Kun</first><last>Zhang</last></author>
       <author><first>Oana</first><last>Balalau</last><affiliation>INRIA</affiliation></author>
       <author><first>Ioana</first><last>Manolescu</last><affiliation>École Polytechnique and Inria</affiliation></author>
       <pages>820-838</pages>
@@ -7102,7 +7102,7 @@
       <author><first>Ke</first><last>Yang</last></author>
       <author><first>Spencer</first><last>Hulsey</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
       <author><first>Xin</first><last>Liu</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
-      <author><first>ChengXiang</first><last>Zhai</last><affiliation>University of Illinois, Urbana Champaign</affiliation></author>
+      <author id="chengxiang-zhai"><first>ChengXiang</first><last>Zhai</last><affiliation>University of Illinois, Urbana Champaign</affiliation></author>
       <author><first>Volodymyr</first><last>Kindratenko</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
       <pages>907-926</pages>
       <abstract>Recent advances in language modeling demonstrate the need for high-quality domain-specific training data, especially for tasks that require specialized knowledge. General-purpose models, while versatile, often lack the depth needed for expert-level tasks because of limited domain-specific information. Domain adaptation training can enhance these models, but it demands substantial, high-quality data. To address this, we propose ORBIT, a cost-efficient methodology for curating massive, high-quality domain-specific datasets from noisy web sources, tailored for training specialist large language models. Using astronomy as a primary case study, we refined the 1.3T-token FineWeb-Edu dataset into a high-quality, 10B-token subset focused on astronomy. Fine-tuning LLaMA-3-8B on a 1B-token astronomy subset improved performance on the MMLU astronomy benchmark from 69% to 76% and achieved top results on AstroBench, an astronomy-specific benchmark. Moreover, our model (Orbit-LLaMA) outperformed LLaMA-3-8B-base, with GPT-4o evaluations preferring it in 73% of cases across 1000 astronomy-specific questions. Additionally, we validated ORBIT’s generalizability by applying it to law and medicine, achieving a significant improvement of data quality compared to an unfiltered baseline. We open-source the ORBIT methodology, including the curated datasets, the codebase, and the resulting model.</abstract>
@@ -7324,7 +7324,7 @@
     <paper id="68">
       <title><fixed-case>M</fixed-case>o<fixed-case>RE</fixed-case>: A Mixture of Low-Rank Experts for Adaptive Multi-Task Learning</title>
       <author><first>Dacao</first><last>Zhang</last></author>
-      <author id="kun-zhang"><first>Kun</first><last>Zhang</last><affiliation>Hefei University of Technology</affiliation></author>
+      <author><first>Kun</first><last>Zhang</last><affiliation>Hefei University of Technology</affiliation></author>
       <author><first>Shimao</first><last>Chu</last><affiliation>Hefei University of Technology</affiliation></author>
       <author><first>Le</first><last>Wu</last><affiliation>Hefei University of Technology</affiliation></author>
       <author><first>Xin</first><last>Li</last></author>
@@ -7545,7 +7545,7 @@
       <author><first>Minghan</first><last>Wang</last><affiliation>Monash University</affiliation></author>
       <author><first>Viet Thanh</first><last>Pham</last><affiliation>Monash University</affiliation></author>
       <author><first>Farhad</first><last>Moghimifar</last><affiliation>Monash University</affiliation></author>
-      <author><first>Thuy-Trang</first><last>Vu</last><affiliation>Monash University</affiliation></author>
+      <author id="thuy-vu"><first>Thuy-Trang</first><last>Vu</last><affiliation>Monash University</affiliation></author>
       <pages>1646-1662</pages>
       <abstract>Despite achieving remarkable performance, machine translation (MT) research remains underexplored in terms of translating cultural elements in languages, such as idioms, proverbs, and colloquial expressions. This paper investigates the capability of state-of-the-art neural machine translation (NMT) and large language models (LLMs) in translating proverbs, which are deeply rooted in cultural contexts. We construct a translation dataset of standalone proverbs and proverbs in conversation for four language pairs. Our experiments show that the studied models can achieve good translation between languages with similar cultural backgrounds, and LLMs generally outperform NMT models in proverb translation. Furthermore, we find that current automatic evaluation metrics such as BLEU, CHRF++ and COMET are inadequate for reliably assessing the quality of proverb translation, highlighting the need for more culturally aware evaluation metrics.</abstract>
       <url hash="10baac4f">2025.findings-acl.83</url>
@@ -7554,7 +7554,7 @@
     </paper>
     <paper id="84">
       <title>Towards Efficient <fixed-case>LLM</fixed-case> Grounding for Embodied Multi-Agent Collaboration</title>
-      <author id="yang-zhang"><first>Yang</first><last>Zhang</last><affiliation>Tsinghua University</affiliation></author>
+      <author><first>Yang</first><last>Zhang</last><affiliation>Tsinghua University</affiliation></author>
       <author><first>Shixin</first><last>Yang</last></author>
       <author><first>Chenjia</first><last>Bai</last><affiliation>TeleAI, China Telecom</affiliation></author>
       <author><first>Fei</first><last>Wu</last><affiliation>Zhejiang University</affiliation></author>
@@ -7589,7 +7589,7 @@
       <author><first>Xiaoyi</first><last>Bao</last></author>
       <author><first>HaoYuan</first><last>Ma</last></author>
       <author><first>Shoushan</first><last>Li</last><affiliation>Soochow University</affiliation></author>
-      <author><first>Guodong</first><last>Zhou</last><affiliation>Soochow University, China</affiliation></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last><affiliation>Soochow University, China</affiliation></author>
       <pages>1716-1729</pages>
       <abstract>Retrieval-augmented methods have achieved remarkable advancements in alleviating the hallucination of large language models.Nevertheless, the introduction of external knowledge does not always lead to the expected improvement in model performance, as irrelevant or harmful information present in the retrieved knowledge can compromise the prediction process.To address these challenges, we propose a novel framework aimed at improving model performance by incorporating knowledge filtering and prediction fusion mechanisms.In particular, our approach first employs a perplexity-based annotation method to collect training data.Then, we design four distinct strategies to filter out harmful retrieved knowledge.Finally, we integrate the filtered knowledge to generate the final result via batch-wise predictions.We conduct extensive experiments across multiple discriminative task datasets to evaluate the proposed framework.The results demonstrate that our framework can significantly enhance the performance of models on discriminative tasks.</abstract>
       <url hash="e94c6abe">2025.findings-acl.86</url>
@@ -7601,7 +7601,7 @@
       <author><first>Chong</first><last>Li</last><affiliation>Institute of automation, Chinese Academy of Sciences</affiliation></author>
       <author><first>Yingzhuo</first><last>Deng</last><affiliation>Institute of Automation, Chinese Academy of Sciences</affiliation></author>
       <author><first>Jiajun</first><last>Zhang</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
-      <author><first>Chengqing</first><last>Zong</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
       <pages>1730-1754</pages>
       <abstract>The curse of multilinguality phenomenon is a fundamental problem of multilingual Large Language Models (LLMs), where the competition between massive languages results in inferior performance. It mainly comes from limited capacity and negative transfer between dissimilar languages. To address this issue, we propose a method to dynamically group and scale up the parameters of multilingual LLM while boosting positive transfer among similar languages. Specifically, the model is first tuned on monolingual corpus to determine the parameter deviation in each layer and quantify the similarity between languages. Layers with more deviations are extended to mixture-of-experts layers to reduce competition between languages, where one expert module serves one group of similar languages. Experimental results on 18 to 128 languages show that our method reduces the negative transfer between languages and significantly boosts multilingual performance with fewer parameters. Such language group specialization on experts benefits the new language adaptation and reduces the inference on the previous multilingual knowledge learned.</abstract>
       <url hash="a9d0da58">2025.findings-acl.87</url>
@@ -7613,7 +7613,7 @@
       <author><first>Fangxu</first><last>Yu</last></author>
       <author><first>Junjie</first><last>Guo</last><affiliation>Nanjing University</affiliation></author>
       <author><first>Zhen</first><last>Wu</last><affiliation>Nanjing University</affiliation></author>
-      <author><first>Xinyu</first><last>Dai</last><affiliation>Nanjing University</affiliation></author>
+      <author id="xinyu-dai"><first>Xinyu</first><last>Dai</last><affiliation>Nanjing University</affiliation></author>
       <pages>1755-1767</pages>
       <abstract>Emotions are fundamental to conversational understanding. While significant advancements have been achieved in conversational emotion recognition and emotional response generation, recognizing the causes of eliciting emotions is less explored. Previous studies have primarily focused on identifying the causes of emotions by understanding verbal contextual utterances, overlooking that non-verbal emotional cues can elicit emotions. To address this issue, we develop an Emotional Contagion Graph Network (ECGN) that simulates the impact of non-verbal implicit emotions on the counterpart’s emotions. To achieve this, we construct a heterogeneous graph that simulates the transmission of non-verbal emotions alongside verbal influences. By applying message passing between nodes, the constructed graph effectively models both the implicit emotional dynamics and explicit verbal interactions. We evaluate ECGN’s performance through extensive experiments on the benchmark datasets and compare it against multiple state-of-the-art models. Experimental results demonstrate the effectiveness of the proposed model. Our code is available at https://github.com/Yu-Fangxu/ECGN.</abstract>
       <url hash="d520e4e1">2025.findings-acl.88</url>
@@ -7709,7 +7709,7 @@
       <title><fixed-case>EXECUTE</fixed-case>: A Multilingual Benchmark for <fixed-case>LLM</fixed-case> Token Understanding</title>
       <author><first>Lukas</first><last>Edman</last><affiliation>Technische Universität München</affiliation></author>
       <author><first>Helmut</first><last>Schmid</last><affiliation>Center for Information and Language Processing</affiliation></author>
-      <author><first>Alexander</first><last>Fraser</last><affiliation>Technical University of Munich</affiliation></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last><affiliation>Technical University of Munich</affiliation></author>
       <pages>1878-1887</pages>
       <abstract>The CUTE benchmark showed that LLMs struggle with character understanding in English. We extend it to more languages with diverse scripts and writing systems, introducing EXECUTE. Our simplified framework allows easy expansion to any language. Tests across multiple LLMs reveal that challenges in other languages are not always on the character level as in English. Some languages show word-level processing issues, some show no issues at all. We also examine sub-character tasks in Chinese, Japanese, and Korean to assess LLMs’ understanding of character components.</abstract>
       <url hash="36dac09f">2025.findings-acl.95</url>
@@ -7880,7 +7880,7 @@
     <paper id="108">
       <title>Harnessing <fixed-case>PDF</fixed-case> Data for Improving <fixed-case>J</fixed-case>apanese Large Multimodal Models</title>
       <author><first>Jeonghun</first><last>Baek</last><affiliation>The University of Tokyo</affiliation></author>
-      <author><first>Akiko</first><last>Aizawa</last><affiliation>National Institute of Informatics</affiliation></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last><affiliation>National Institute of Informatics</affiliation></author>
       <author><first>Kiyoharu</first><last>Aizawa</last><affiliation>The University of Tokyo, The University of Tokyo and Tokyo University of Science</affiliation></author>
       <pages>2108-2123</pages>
       <abstract>Large Multimodal Models (LMMs) have demonstrated strong performance in English, but their effectiveness in Japanese remains limited due to the lack of high-quality training data. Current Japanese LMMs often rely on translated English datasets, restricting their ability to capture Japan-specific cultural knowledge. To address this, we explore the potential of Japanese PDF data as a training resource, an area that remains largely underutilized. We introduce a fully automated pipeline that leverages pretrained models to extract image-text pairs from PDFs through layout analysis, OCR, and vision-language pairing, removing the need for manual annotation. Additionally, we construct instruction data from extracted image-text pairs to enrich the training data. To evaluate the effectiveness of PDF-derived data, we train Japanese LMMs and assess their performance on the Japanese LMM Benchmark. Our results demonstrate substantial improvements, with performance gains ranging from 2.1% to 13.8% on Heron-Bench. Further analysis highlights the impact of PDF-derived data on various factors, such as model size and language models, reinforcing its value as a multimodal resource for Japanese LMMs.</abstract>
@@ -7891,9 +7891,9 @@
     <paper id="109">
       <title><fixed-case>E</fixed-case>ner<fixed-case>GIZA</fixed-case>r: Leveraging <fixed-case>GIZA</fixed-case>++ for Effective Tokenizer Initialization</title>
       <author><first>Pranaydeep</first><last>Singh</last></author>
-      <author><first>Eneko</first><last>Agirre</last><affiliation>University of the Basque Country (UPV/EHU)</affiliation></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last><affiliation>University of the Basque Country (UPV/EHU)</affiliation></author>
       <author><first>Gorka</first><last>Azkune</last><affiliation>Universidad del País Vasco</affiliation></author>
-      <author><first>Orphee</first><last>De Clercq</last><affiliation>Ghent University</affiliation></author>
+      <author id="orphee-de-clercq"><first>Orphee</first><last>De Clercq</last><affiliation>Ghent University</affiliation></author>
       <author><first>Els</first><last>Lefever</last><affiliation>Ghent University</affiliation></author>
       <pages>2124-2137</pages>
       <abstract>Continual pre-training has long been considered the default strategy for adapting models to non-English languages, but struggles with initializing new embeddings, particularly for non-Latin scripts. In this work, we propose EnerGIZAr, a novel methodology that improves continual pre-training by leveraging statistical word alignment techniques. Our approach utilizes GIZA++ to construct a subword-level alignment matrix between source (English) and target language tokens. This matrix enables informed initialization of target tokenizer embeddings, which provides a more effective starting point for adaptation. We evaluate EnerGIZAr against state-of-the-art initialization strategies such as OFA and FOCUS across four typologically diverse languages: Hindi, Basque, Arabic and Korean. Experimental results on key NLP tasks – including POS tagging, Sentiment Analysis, NLI, and NER – demonstrate that EnerGIZAr achieves superior monolingual performance while also out-performing all methods for cross-lingual transfer when tested on XNLI. With EnerGIZAr, we propose an intuitive, explainable as well as state-of-the-art initialisation technique for continual pre-training of English models.</abstract>
@@ -8082,7 +8082,7 @@
     </paper>
     <paper id="122">
       <title><fixed-case>MPL</fixed-case>: Multiple Programming Languages with Large Language Models for Information Extraction</title>
-      <author id="bo-li"><first>Bo</first><last>Li</last><affiliation>Hebei University of Technology</affiliation></author>
+      <author><first>Bo</first><last>Li</last><affiliation>Hebei University of Technology</affiliation></author>
       <author><first>Gexiang</first><last>Fang</last></author>
       <author><first>Wei</first><last>Ye</last><affiliation>Peking University</affiliation></author>
       <author><first>Zhenghua</first><last>Xu</last></author>
@@ -8156,7 +8156,7 @@
       <author><first>Giuseppe</first><last>Ruggiero</last><affiliation>University of Turin</affiliation></author>
       <author><first>Matteo</first><last>Testa</last></author>
       <author><first>Jurgen Van De</first><last>Walle</last></author>
-      <author><first>Luigi</first><last>Di Caro</last><affiliation>University of Turin, Italy</affiliation></author>
+      <author id="luigi-di-caro"><first>Luigi</first><last>Di Caro</last><affiliation>University of Turin, Italy</affiliation></author>
       <pages>2494-2504</pages>
       <abstract>Self-supervised learning (SSL) has reduced the reliance on expensive labeling in speech technologies by learning meaningful representations from unannotated data. Since most SSL-based downstream tasks prioritize content information in speech, ideal representations should disentangle content from unwanted variations like speaker characteristics in the SSL representations. However, removing speaker information often degrades other speech components, and existing methods either fail to fully disentangle speaker identity or require resource-intensive models. In this paper, we propose a novel disentanglement method that linearly decomposes SSL representations into speaker-specific and speaker-independent components, effectively generating speaker disentangled representations. Comprehensive experiments show that our approach achieves speaker independence and as such, when applied to content-driven tasks such as voice conversion, our representations yield significant improvements over state-of-the-art methods.</abstract>
       <url hash="e38938e0">2025.findings-acl.127</url>
@@ -8192,7 +8192,7 @@
       <author><first>Yiming</first><last>Du</last></author>
       <author><first>Bin</first><last>Liang</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <author><first>Wenxuan</first><last>Zhang</last><affiliation>Singapore University of Technology and Design</affiliation></author>
-      <author><first>Kam-Fai</first><last>Wong</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <pages>2535-2556</pages>
       <abstract>The tendency of Large Language Models (LLMs) to generate hallucinations raises concerns regarding their reliability. Therefore, confidence estimations indicating the extent of trustworthiness of the generations become essential. However, current LLM confidence estimations in languages other than English remain underexplored. This paper addresses this gap by introducing a comprehensive investigation of Multilingual Confidence estimation (MlingConf) on LLMs, focusing on both language-agnostic (LA) and language-specific (LS) tasks to explore the performance and language dominance effects of multilingual confidence estimations on different tasks. The benchmark comprises four meticulously checked and human-evaluated high-quality multilingual datasets for LA tasks and one for the LS task tailored to specific social, cultural, and geographical contexts of a language. Our experiments reveal that on LA tasks English exhibits notable linguistic dominance in confidence estimations than other languages, while on LS tasks, using question-related language to prompt LLMs demonstrates better linguistic dominance in multilingual confidence estimations. The phenomena inspire a simple yet effective native-tone prompting strategy by employing language-specific prompts for LS tasks, effectively improving LLMs’ reliability and accuracy in LS scenarios.</abstract>
       <url hash="eca08d81">2025.findings-acl.129</url>
@@ -8269,7 +8269,7 @@
       <author><first>Shihan</first><last>Dou</last></author>
       <author><first>Qinhao</first><last>Chen</last></author>
       <author><first>Zhiheng</first><last>Xi</last></author>
-      <author id="zhihao-zhang"><first>Zhihao</first><last>Zhang</last></author>
+      <author><first>Zhihao</first><last>Zhang</last></author>
       <author><first>Yi</first><last>Dong</last></author>
       <author><first>Zhen</first><last>Wang</last><affiliation>ByteDance Inc.</affiliation></author>
       <author><first>Zhihui</first><last>Fei</last><affiliation>ByteDance Inc.</affiliation></author>
@@ -8278,7 +8278,7 @@
       <author><first>Guojun</first><last>Ma</last><affiliation>ByteDance Inc.</affiliation></author>
       <author><first>Qi</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Tao</first><last>Gui</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>2626-2649</pages>
       <abstract>Process-driven dialogue systems, which operate under strict predefined process constraints, are essential in customer service and equipment maintenance scenarios. Although Large Language Models (LLMs) have shown remarkable progress in dialogue and reasoning, they still struggle to solve these strictly constrained dialogue tasks. To address this challenge, we construct <b>P</b>rocess <b>F</b>low <b>Dial</b>ogue (<b>PFDial</b>) dataset, which contains 12,705 high-quality Chinese dialogue instructions derived from 440 flowcharts containing 5,055 process nodes. Based on PlantUML specification, each UML flowchart is converted into atomic dialogue units i.e., structured five-tuples. Experimental results demonstrate that a 7B model trained with merely 800 samples, and a 0.5B model trained on total data both can surpass 90% accuracy. Additionally, the 8B model can surpass GPT-4o up to 43.88% with an average of 11.00%. We further evaluate models’ performance on challenging backward transitions in process flows and conduct an in-depth analysis of various dataset formats to reveal their impact on model performance in handling decision and sequential branches. The data is released in <url>https://github.com/KongLongGeFDU/PFDial</url>.</abstract>
       <url hash="ff411600">2025.findings-acl.134</url>
@@ -8301,7 +8301,7 @@
     <paper id="136">
       <title>Do Language Models Understand the Cognitive Tasks Given to Them? Investigations with the N-Back Paradigm</title>
       <author><first>Xiaoyang</first><last>Hu</last><affiliation>Brown University</affiliation></author>
-      <author><first>Richard</first><last>Lewis</last><affiliation>University of Michigan - Ann Arbor</affiliation></author>
+      <author id="richard-l-lewis"><first>Richard</first><last>Lewis</last><affiliation>University of Michigan - Ann Arbor</affiliation></author>
       <pages>2665-2677</pages>
       <abstract>Cognitive tasks originally developed for humans are now increasingly used to study language models. While applying these tasks is often straightforward, interpreting their results can be challenging. In particular, when a model underperforms, it is often unclear whether this results from a limitation in the cognitive ability being tested or a failure to understand the task itself. A recent study argues that GPT 3.5’s declining performance on 2-back and 3-back tasks reflects a working memory capacity limit similar to humans (Gong et al., 2024). By analyzing a range of open-source language models of varying performance levels on these tasks, we show that the poor performance is due at least in part to a limitation in task comprehension and task set maintenance. We challenge the best-performing model with progressively harder versions of the task (up to 10-back) and experiment with alternative prompting strategies, before analyzing model attentions. Our larger aim is to contribute to the ongoing conversation around refining methodologies for the cognitive evaluation of language models.</abstract>
       <url hash="963776dc">2025.findings-acl.136</url>
@@ -8429,7 +8429,7 @@
       <title>Cross-Lingual Transfer of Debiasing and Detoxification in Multilingual <fixed-case>LLM</fixed-case>s: An Extensive Investigation</title>
       <author><first>Vera</first><last>Neplenbroek</last></author>
       <author><first>Arianna</first><last>Bisazza</last><affiliation>University of Groningen</affiliation></author>
-      <author><first>Raquel</first><last>Fernández</last><affiliation>University of Amsterdam and University of Amsterdam</affiliation></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last><affiliation>University of Amsterdam and University of Amsterdam</affiliation></author>
       <pages>2805-2830</pages>
       <abstract>Recent generative large language models (LLMs) show remarkable performance in non-English languages, but when prompted in those languages they tend to express higher harmful social biases and toxicity levels. Prior work has shown that finetuning on specialized datasets can mitigate this behavior, and doing so in English can transfer to other languages. In this work, we investigate the impact of different finetuning methods on the model’s bias and toxicity, but also on its ability to produce fluent and diverse text. We reduce biases by finetuning on curated non-harmful text, but find only direct preference optimization to be effective for mitigating toxicity. The mitigation caused by applying these methods in English also transfers to non-English languages. We find evidence that the extent to which transfer takes place can be predicted by the amount of data in a given language present in the model’s pretraining data. However, this transfer of bias and toxicity mitigation often comes at the expense of decreased language generation ability in non-English languages, highlighting the importance of developing language-specific bias and toxicity mitigation methods.</abstract>
       <url hash="a70982e0">2025.findings-acl.145</url>
@@ -8451,7 +8451,7 @@
       <author><first>Ximing</first><last>Dong</last></author>
       <author><first>Shaowei</first><last>Wang</last><affiliation>University of Manitoba</affiliation></author>
       <author><first>Dayi</first><last>Lin</last><affiliation>Huawei Technologies Canada Co., Ltd.</affiliation></author>
-      <author><first>Ahmed</first><last>Hassan</last><affiliation>Queen’s University</affiliation></author>
+      <author id="ahmed-hassan"><first>Ahmed</first><last>Hassan</last><affiliation>Queen’s University</affiliation></author>
       <pages>2844-2859</pages>
       <abstract>Optimizing Large Language Model (LLM) performance requires well-crafted prompts, but manual prompt engineering is labor-intensive and often ineffective. Automated prompt optimization techniques address this challenge but the major of them rely on randomly selected evaluation subsets, which fail to represent the full dataset, leading to unreliable evaluations and suboptimal prompts. Existing coreset selection methods, designed for LLM benchmarking, are unsuitable for prompt optimization due to challenges in clustering similar samples, high data collection costs, and the unavailability of performance data for new or private datasets. To overcome these issues, we propose IPOMP, an Iterative evaluation data selection approach for effective Prompt Optimization using real time Model Performance. IPOMP is a two-stage approach that selects representative and diverse samples using semantic clustering and boundary analysis, followed by iterative refinement with real-time model performance data to replace redundant samples. Evaluations on two datasets BIG-bench and LIAR, and two models GPT-3.5 and GPT-4o-mini, show that IPOMP improves effectiveness by at least 1.6% to 3.1%, and stability by at least 50% to 55.5% compared with the best baseline across the studied datasets and models, with minimal computational overhead below 1%. Furthermore, the results demonstrate that our real-time performance-guided refinement approach can be universally applied to enhance existing coreset selection methods.</abstract>
       <url hash="8db2321e">2025.findings-acl.147</url>
@@ -8476,7 +8476,7 @@
       <author><first>Felix</first><last>Drinkall</last><affiliation>University of Oxford</affiliation></author>
       <author><first>Stefan</first><last>Zohren</last><affiliation>University of Oxford</affiliation></author>
       <author><first>Michael</first><last>McMahon</last><affiliation>University of Oxford</affiliation></author>
-      <author><first>Janet B.</first><last>Pierrehumbert</last><affiliation>University of Oxford</affiliation></author>
+      <author id="janet-pierrehumbert"><first>Janet B.</first><last>Pierrehumbert</last><affiliation>University of Oxford</affiliation></author>
       <pages>2889-2904</pages>
       <abstract>Macroeconomic fluctuations and the narratives that shape them form a mutually reinforcing cycle: public discourse can spur behavioural changes leading to economic shifts, which then result in changes in the stories that propagate. We show that shifts in semantic embedding space can be causally linked to real-world market shocks or deviations from the expected market behaviour (sec:market_shocks). Furthermore, we show how partisanship can influence the predictive power of text for market fluctuations and shape reactions to those same shocks. We also provide some evidence that text-based signals are particularly salient during rare events such as COVID-19, highlighting the value of language data as an exogenous variable in economic forecasting. Our findings underscore the bidirectional relationship between news outlets and market shocks, offering a novel empirical approach to studying their effect on each other.</abstract>
       <url hash="d34b7a3d">2025.findings-acl.149</url>
@@ -8557,7 +8557,7 @@
       <title>Explaining Puzzle Solutions in Natural Language: An Exploratory Study on 6x6 Sudoku</title>
       <author><first>Anirudh</first><last>Maiya</last></author>
       <author><first>Razan</first><last>Alghamdi</last><affiliation>King Saud University</affiliation></author>
-      <author><first>Maria Leonor</first><last>Pacheco</last><affiliation>University of Colorado at Boulder</affiliation></author>
+      <author id="maria-leonor-pacheco"><first>Maria Leonor</first><last>Pacheco</last><affiliation>University of Colorado at Boulder</affiliation></author>
       <author><first>Ashutosh</first><last>Trivedi</last><affiliation>University of Colorado at Boulder</affiliation></author>
       <author><first>Fabio</first><last>Somenzi</last><affiliation>University of Colorado at Boulder</affiliation></author>
       <pages>3002-3009</pages>
@@ -8602,7 +8602,7 @@
       <author><first>Zhuohan</first><last>Xie</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <author><first>Chenyang</first><last>Lyu</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <author><first>Xiuying</first><last>Chen</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <author><first>Fakhri</first><last>Karray</last><affiliation>University of Waterloo and Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <pages>3047-3059</pages>
       <abstract>The rapid advancement of vision-language models (VLMs) has brought a lot of attention to their safety alignment. However, existing methods have primarily focused on model undersafety, where the model responds to hazardous queries, while neglecting oversafety, where the model refuses to answer safe queries. In this paper, we introduce the concept of safety calibration, which systematically addresses both undersafety and oversafety. Specifically, we present VSCBench, a novel dataset of 3,600 image-text pairs that are visually or textually similar but differ in terms of safety, which is designed to evaluate safety calibration across image-centric and text-centric scenarios. Based on our benchmark, we evaluate safety calibration across eleven widely used VLMs. Our extensive experiments revealed major issues with both undersafety and oversafety. We further investigated four approaches to improve the model’s safety calibration. We found that even though some methods effectively calibrated the models’ safety problems, these methods also lead to the degradation of models’ utility. This trade-off underscores the urgent need for advanced calibration methods, and our benchmark provides a valuable tool for evaluating future approaches.</abstract>
@@ -8640,7 +8640,7 @@
       <author><first>James Xu</first><last>Zhao</last><affiliation>national university of singaore, National University of Singapore</affiliation></author>
       <author><first>Jimmy Z.j.</first><last>Liu</last></author>
       <author><first>Bryan</first><last>Hooi</last><affiliation>National University of Singapore</affiliation></author>
-      <author><first>See-Kiong</first><last>Ng</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="see-kiong-ng"><first>See-Kiong</first><last>Ng</last><affiliation>National University of Singapore</affiliation></author>
       <pages>3102-3125</pages>
       <abstract>Large language models (LLMs) are widely used for long-form text generation. However, factual errors in the responses would undermine their reliability. Despite growing attention to LLM factuality, the effect of response length on factuality remains underexplored. In this work, we systematically investigate this relationship by first introducing an automatic and bi-level long-form factuality evaluation framework, which achieves high agreement with human annotations while being cost-effective. Using this framework, we conduct controlled experiments and find that longer responses exhibit lower factual precision, confirming the presence of length bias. To explain this phenomenon, we empirically examine three hypotheses: error propagation, long context, and facts exhaustion. Our results reveal that facts exhaustion, where the model gradually exhausts more reliable knowledge, is the primary cause of factual degradation, rather than the other two hypotheses.</abstract>
       <url hash="3ffbad76">2025.findings-acl.161</url>
@@ -8739,7 +8739,7 @@
       <author><first>Haijun</first><last>He</last></author>
       <author><first>Fei</first><last>Li</last><affiliation>Wuhan University</affiliation></author>
       <author><first>Chong</first><last>Teng</last></author>
-      <author><first>Donghong</first><last>Ji</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
       <pages>3221-3235</pages>
       <abstract>Stance detection, which aims to identify public opinion towards specific targets using social media data, is an important yet challenging task. With the increasing number of online debates among social media users, conversational stance detection has become a crucial research area. However, existing conversational stance detection datasets are restricted to a limited set of specific targets, which constrains the effectiveness of stance detection models when encountering a large number of unseen targets in real-world applications. To bridge this gap, we manually curate a large-scale, high-quality zero-shot conversational stance detection dataset, named ZS-CSD, comprising 280 targets across two distinct target types. Leveraging the ZS-CSD dataset, we propose SITPCL, a speaker interaction and target-aware prototypical contrastive learning model, and establish the benchmark performance in the zero-shot setting. Experimental results demonstrate that our proposed SITPCL model achieves state-of-the-art performance in zero-shot conversational stance detection. Notably, the SITPCL model attains only an F1-macro score of 43.81%, highlighting the persistent challenges in zero-shot conversational stance detection.</abstract>
       <url hash="9c1c6de6">2025.findings-acl.168</url>
@@ -8815,7 +8815,7 @@
       <title><i>
           <fixed-case>D</fixed-case>-<fixed-case>GEN</fixed-case></i>: Automatic Distractor Generation and Evaluation for Reliable Assessment of Generative Models</title>
       <author><first>Grace</first><last>Byun</last><affiliation>Emory University</affiliation></author>
-      <author><first>Jinho D.</first><last>Choi</last><affiliation>Emory University</affiliation></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last><affiliation>Emory University</affiliation></author>
       <pages>3316-3349</pages>
       <abstract>Evaluating generative models with open-ended generation is challenging due to inconsistencies in response formats. Multiple-choice (MC) evaluation mitigates this issue, but generating high-quality distractors is time-consuming and labor-intensive. We introduce <i>D-GEN</i>, the first open-source distractor generator model that transforms open-ended data into an MC format. To evaluate distractor quality, we propose two novel methods: 1) ranking alignment, ensuring generated distractors retain the discriminatory power of ground-truth distractors, and 2) entropy analysis, comparing model confidence distributions. Our results show that <i>D-GEN</i> preserves ranking consistency (Spearman’s <tex-math>\rho</tex-math> 0.99, Kendall’s <tex-math>\tau</tex-math> 0.94) and closely matches the entropy distribution of ground-truth distractors. Human evaluation further confirms the fluency, coherence, distractiveness, and incorrectness. Our work advances robust and efficient distractor generation with automated evaluation, setting a new standard for MC evaluation.</abstract>
       <url hash="02f8f6f3">2025.findings-acl.174</url>
@@ -8832,7 +8832,7 @@
       <author><first>Qiqiang</first><last>Lin</last><affiliation>Guangdong OPPO Mobile Telecommunications Corp.,Ltd.</affiliation></author>
       <author><first>Jincheng</first><last>Jincheng</last><affiliation>Guangdong OPPO Mobile Telecommunications Corp.,Ltd.</affiliation></author>
       <author><first>Muning</first><last>Wen</last></author>
-      <author><first>Weinan</first><last>Zhang</last></author>
+      <author id="weinan-zhang"><first>Weinan</first><last>Zhang</last></author>
       <author><first>Qiuying</first><last>Peng</last><affiliation>OPPO Research Institute</affiliation></author>
       <author><first>Jun</first><last>Wang</last><affiliation>OPPO Research Institute</affiliation></author>
       <pages>3350-3376</pages>
@@ -8843,14 +8843,14 @@
     </paper>
     <paper id="176">
       <title>Beyond In-Context Learning: Aligning Long-form Generation of Large Language Models via Task-Inherent Attribute Guidelines</title>
-      <author><first>Do Xuan</first><last>Long</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="xuan-long-do"><first>Do Xuan</first><last>Long</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Duong Ngoc</first><last>Yen</last></author>
       <author><first>Do Xuan</first><last>Trong</last></author>
-      <author><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Kenji</first><last>Kawaguchi</last><affiliation>National University of Singapore</affiliation></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University and SalesForce.com</affiliation></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University and SalesForce.com</affiliation></author>
       <author><first>Min-Yen</first><last>Kan</last><affiliation>National University of Singapore</affiliation></author>
-      <author><first>Nancy F.</first><last>Chen</last></author>
+      <author id="nancy-chen"><first>Nancy F.</first><last>Chen</last></author>
       <pages>3377-3411</pages>
       <abstract>In-context learning (ICL) is an important yet not fully understood ability of pre-trained large language models (LLMs). It can greatly enhance task performance using a few examples, termed demonstrations, without fine-tuning. Although effective in question answering, ICL often underperforms in long-form generation tasks such as summarization. Under appropriately realistic assumptions, we empirically and theoretically show that ICL demonstrations alone are insufficient to teach LLMs the task’s language and format distributions for generation. We argue for explicit exposure to the task distributions and hypothesize that defining them by prompting enhances model performance. To this end, we present LongGuide, which efficiently generates two parallel streams of guidelines capturing task language and format properties: (i) Metric Guidelines (MGs) that instruct models to optimize self-evaluated metrics; and (ii) Output Constraint Guidelines (OCGs) that constrain generation at both token and sentence levels. LongGuide automatically selects the best combination of guidelines, improving both strong open- and closed-source LLMs by over 5% in both zero- and few-shot settings. We show that LongGuide is generalizable, learnable by weak models to enhance strong ones, and integrates synergistically with automatic prompt optimizers.</abstract>
       <url hash="1f23a655">2025.findings-acl.176</url>
@@ -8899,12 +8899,12 @@
       <title>m<fixed-case>OSCAR</fixed-case>: A Large-scale Multilingual and Multimodal Document-level Corpus</title>
       <author><first>Matthieu</first><last>Futeral</last></author>
       <author><first>Armel Randy</first><last>Zebaze</last><affiliation>INRIA</affiliation></author>
-      <author><first>Pedro</first><last>Ortiz Suarez</last><affiliation>Common Crawl Foundation</affiliation></author>
+      <author id="pedro-ortiz-suarez"><first>Pedro</first><last>Ortiz Suarez</last><affiliation>Common Crawl Foundation</affiliation></author>
       <author><first>Julien</first><last>Abadji</last><affiliation>INRIA</affiliation></author>
       <author><first>Rémi</first><last>Lacroix</last><affiliation>Institut du développement et des ressources en informatique scientifique (IDRIS)</affiliation></author>
       <author><first>Cordelia</first><last>Schmid</last><affiliation>Google, INRIA and Inria</affiliation></author>
       <author><first>Rachel</first><last>Bawden</last><affiliation>Inria</affiliation></author>
-      <author><first>Benoît</first><last>Sagot</last><affiliation>Inria</affiliation></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last><affiliation>Inria</affiliation></author>
       <pages>3461-3494</pages>
       <abstract>Multimodal Large Language Models (mLLMs) are trained on a large amount of text-image data. While most mLLMs are trained on caption-like data only, Alayrac et al. (2022) showed that additionally training them on interleaved sequences of text and images can lead to the emergence of in-context learning capabilities. However, the dataset they used, M3W, is not public and is only in English. There have been attempts to reproduce their results but the released datasets are English-only. In contrast, current multilingual and multimodal datasets are either composed of caption-like only or medium-scale or fully private data. This limits mLLM research for the 7,000 other languages spoken in the world. We therefore introduce mOSCAR, to the best of our knowledge the first large-scale multilingual and multimodal document corpus crawled from the web. It covers 163 languages, 303M documents, 200B tokens and 1.15B images. We carefully conduct a set of filtering and evaluation steps to make sure mOSCAR is sufficiently safe, diverse and of good quality. We additionally train two types of multilingual model to prove the benefits of mOSCAR: (1) a model trained on a subset of mOSCAR and captioning data and (2) a model trained on captioning data only. The model additionally trained on mOSCAR shows a strong boost in few-shot learning performance across various multilingual image-text tasks and benchmarks, confirming previous findings for English-only mLLMs. The dataset will be made publicly accessible under the Creative Commons CC BY 4.0 license.</abstract>
       <url hash="fcf43622">2025.findings-acl.180</url>
@@ -8919,7 +8919,7 @@
       <author><first>Hans Christian</first><last>Farsethås</last><affiliation>University of Oslo</affiliation></author>
       <author><first>Andrey</first><last>Kutuzov</last><affiliation>University of Oslo</affiliation></author>
       <author><first>Erik</first><last>Velldal</last><affiliation>University of Oslo</affiliation></author>
-      <author><first>Lilja</first><last>Øvrelid</last><affiliation>Dept. of Informatics, University of Oslo</affiliation></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last><affiliation>Dept. of Informatics, University of Oslo</affiliation></author>
       <pages>3495-3541</pages>
       <abstract>This paper introduces NorEval, a new and comprehensive evaluation suite for large-scale standardized benchmarking of Norwegian generative language models (LMs). NorEval consists of 24 high-quality human-created datasets – of which five are created from scratch. In contrast to existing benchmarks for Norwegian, NorEval covers a broad spectrum of task categories targeting Norwegian language understanding and generation, establishes human baselines, and focuses on both of the official written standards of the Norwegian language: Bokmål and Nynorsk. All our datasets and a collection of over 100 human-created prompts are integrated into LM Evaluation Harness, ensuring flexible and reproducible evaluation. We describe the NorEval design and present the results of benchmarking 19 open-source pretrained and instruction-tuned LMs for Norwegian in various scenarios. Our benchmark, evaluation framework, and annotation materials are publicly available.</abstract>
       <url hash="a1cfbfdd">2025.findings-acl.181</url>
@@ -8930,7 +8930,7 @@
       <title>Massively Multilingual Instruction-Following Information Extraction</title>
       <author><first>Thang</first><last>Le</last></author>
       <author><first>Huy Huu</first><last>Nguyen</last></author>
-      <author><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Thien Huu</first><last>Nguyen</last><affiliation>University of Oregon</affiliation></author>
       <pages>3542-3585</pages>
       <abstract>The literature on information extraction (IE) has mostly centered around a selected few languages, hindering their applications on multilingual corpora. In this work, we introduce MASSIE - a comprehensive collection for instruction-following multilingual IE that standardizes and unifies 215 manually annotated datasets, covering 96 typologically diverse languages from 18 language families. Based on MASSIE, we conduct empirical studies on few-shot in-context learning and report important factors that either positively or negatively affect LLMs’ performance in multilingual IE, covering 21 LLMs sizing from 0.5B to 72B. Additionally, we introduce LF1 - a structure-aware metric that captures partially matched spans, resolving the conservativeness of standard exact matching scheme which overpenalizes LLMs’ predictions. Overall, our results signify that multilingual IE remains very challenging for existing LLMs, especially on complex tasks involving relations and events. In addition, performance gap is extremely large among high- and low-performing languages, but the group of similar-performing languages largely overlap between different LLMs, suggesting a shared performance bias in current LLMs.</abstract>
@@ -8945,7 +8945,7 @@
       <author><first>Haining</first><last>Wang</last></author>
       <author><first>Fei</first><last>Li</last><affiliation>Wuhan University</affiliation></author>
       <author><first>Chong</first><last>Teng</last></author>
-      <author><first>Donghong</first><last>Ji</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
       <pages>3586-3601</pages>
       <abstract>Previous multimodal sentence representation learning methods have achieved impressive performance. However, most approaches focus on aligning images and text at a coarse level, facing two critical challenges: cross-modal misalignment bias and intra-modal semantic divergence, which significantly degrade sentence representation quality. To address these challenges, we propose DALR (Dual-level Alignment Learning for Multimodal Sentence Representation). For cross-modal alignment, we propose a consistency learning module that softens negative samples and utilizes semantic similarity from an auxiliary task to achieve fine-grained cross-modal alignment. Additionally, we contend that sentence relationships go beyond binary positive-negative labels, exhibiting a more intricate ranking structure. To better capture these relationships and enhance representation quality, we integrate ranking distillation with global intra-modal alignment learning. Comprehensive experiments on semantic textual similarity (STS) and transfer (TR) tasks validate the effectiveness of our approach, consistently demonstrating its superiority over state-of-the-art baselines.</abstract>
       <url hash="a77e929f">2025.findings-acl.183</url>
@@ -8991,7 +8991,7 @@
       <author><first>Jie</first><last>Ma</last></author>
       <author><first>Neha</first><last>Anna John</last></author>
       <author><first>Srikanth</first><last>Doss</last></author>
-      <author><first>Lluis</first><last>Marquez</last></author>
+      <author id="lluis-marquez"><first>Lluis</first><last>Marquez</last></author>
       <author><first>Miguel</first><last>Ballesteros</last><affiliation>Oracle</affiliation></author>
       <author><first>Yassine</first><last>Benajiba</last></author>
       <pages>3631-3643</pages>
@@ -9074,7 +9074,7 @@
       <author><first>Debela</first><last>Gemechu</last></author>
       <author><first>Ramon</first><last>Ruiz-Dolz</last><affiliation>University of Dundee</affiliation></author>
       <author><first>Henrike</first><last>Beyer</last><affiliation>University of Dundee</affiliation></author>
-      <author><first>Chris</first><last>Reed</last><affiliation>University of Dundee</affiliation></author>
+      <author id="chris-reed"><first>Chris</first><last>Reed</last><affiliation>University of Dundee</affiliation></author>
       <pages>3717-3741</pages>
       <abstract>While Large Language Models (LLMs) have demonstrated promising results on a range of reasoning benchmarks—particularly in formal logic, mathematical tasks, and Chain-of-Thought prompting—less is known about their capabilities in unconstrained natural language reasoning. Argumentative reasoning, a form of reasoning naturally expressed in language and central to everyday discourse, presents unique challenges for LLMs due to its reliance on context, implicit assumptions, and value judgments. This paper addresses a gap in the study of reasoning in LLMs by presenting the first large-scale evaluation of their unconstrained natural language reasoning capabilities based on natural language argumentation. The paper offers three contributions: (i) the formalisation of a new strategy designed to evaluate argumentative reasoning in LLMs: argument-component selection; (ii) the creation of the Argument Reasoning Tasks (ART) dataset, a new benchmark for argument-component selection based on argument structures for natural language reasoning; and (iii) an extensive experimental analysis involving four different models, demonstrating the limitations of LLMs on natural language reasoning tasks.</abstract>
       <url hash="647763d7">2025.findings-acl.192</url>
@@ -9153,7 +9153,7 @@
       <author><first>Sharath</first><last>Naganna</last></author>
       <author><first>Saprativa</first><last>Bhattacharjee</last><affiliation>Indian Institute of Technology Bombay, Indian Institute of Technology, Bombay</affiliation></author>
       <author><first>Biplab</first><last>Banerjee</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
       <pages>3838-3858</pages>
       <abstract>Humblebragging is a phenomenon in which individuals present self-promotional statements under the guise of modesty or complaints. For example, a statement like, “Ugh, I can’t believe I got promoted to lead the entire team. So stressful!”, subtly highlights an achievement while pretending to be complaining. Detecting humblebragging is important for machines to better understand the nuances of human language, especially in tasks like sentiment analysis and intent recognition. However, this topic has not yet been studied in computational linguistics. For the first time, we introduce the task of automatically detecting humblebragging in text. We formalize the task by proposing a 4-tuple definition of humblebragging and evaluate machine learning, deep learning, and large language models (LLMs) on this task, comparing their performance with humans. We also create and release a dataset called HB-24, containing 3,340 humblebrags generated using GPT-4o. Our experiments show that detecting humblebragging is non-trivial, even for humans. Our best model achieves an F1-score of 0.88. This work lays the foundation for further exploration of this nuanced linguistic phenomenon and its integration into broader natural language understanding systems.</abstract>
       <url hash="754f5a21">2025.findings-acl.198</url>
@@ -9217,7 +9217,7 @@
       <author><first>Emily</first><last>Sheng</last><affiliation>Research, Microsoft</affiliation></author>
       <author><first>Dan</first><last>Vann</last><affiliation>Research, Microsoft</affiliation></author>
       <author><first>Matthew</first><last>Vogel</last><affiliation>Microsoft</affiliation></author>
-      <author><first>Hanna</first><last>Wallach</last><affiliation>Microsoft</affiliation></author>
+      <author id="hanna-wallach"><first>Hanna</first><last>Wallach</last><affiliation>Microsoft</affiliation></author>
       <pages>3907-3932</pages>
       <abstract>Representational harms are widely recognized among fairness-related harms caused by generative language systems. However, their definitions are commonly under-specified. We make a theoretical contribution to the specification of representational harms by introducing a framework, grounded in speech act theory (Austin 1962), that conceptualizes representational harms caused by generative language systems as the perlocutionary effects (i.e., real-world impacts) of particular types of illocutionary acts (i.e., system behaviors). Building on this argument and drawing on relevant literature from linguistic anthropology and sociolinguistics, we provide new definitions of stereotyping, demeaning, and erasure. We then use our framework to develop a granular taxonomy of illocutionary acts that cause representational harms, going beyond the high-level taxonomies presented in previous work. We also discuss the ways that our framework and taxonomy can support the development of valid measurement instruments. Finally, we demonstrate the utility of our framework and taxonomy via a case study that engages with recent conceptual debates about what constitutes a representational harm and how such harms should be measured.</abstract>
       <url hash="e2d03c09">2025.findings-acl.202</url>
@@ -9226,7 +9226,7 @@
     </paper>
     <paper id="203">
       <title>Turning Conversations into Workflows: A Framework to Extract and Evaluate Dialog Workflows for Service <fixed-case>AI</fixed-case> Agents</title>
-      <author><first>Prafulla Kumar</first><last>Choubey</last><affiliation>SalesForce.com</affiliation></author>
+      <author id="prafulla-kumar-choubey"><first>Prafulla Kumar</first><last>Choubey</last><affiliation>SalesForce.com</affiliation></author>
       <author><first>Xiangyu</first><last>Peng</last><affiliation>Salesforce AI Research</affiliation></author>
       <author><first>Shilpa</first><last>Bhagavath</last><affiliation>SalesForce.com</affiliation></author>
       <author><first>Caiming</first><last>Xiong</last><affiliation>Salesforce Research</affiliation></author>
@@ -9308,9 +9308,9 @@
     <paper id="209">
       <title><fixed-case>MALAMUTE</fixed-case>: A Multilingual, Highly-granular, Template-free, Education-based Probing Dataset</title>
       <author><first>Sagi</first><last>Shaier</last></author>
-      <author><first>George Arthur</first><last>Baker</last><affiliation>University of Utah and University of Colorado Boulder</affiliation></author>
+      <author id="george-baker"><first>George Arthur</first><last>Baker</last><affiliation>University of Utah and University of Colorado Boulder</affiliation></author>
       <author><first>Chiranthan</first><last>Sridhar</last></author>
-      <author><first>Lawrence</first><last>Hunter</last><affiliation>University of Chicago</affiliation></author>
+      <author id="lawrence-hunter"><first>Lawrence</first><last>Hunter</last><affiliation>University of Chicago</affiliation></author>
       <author><first>Katharina Von Der</first><last>Wense</last><affiliation>Johannes-Gutenberg Universität Mainz, Johannes-Gutenberg Universität Mainz, University of Colorado, Boulder and New York University</affiliation></author>
       <pages>4051-4069</pages>
       <abstract>Language models (LMs) have excelled in various broad domains. However, to ensure their safe and effective integration into real-world educational settings, they must demonstrate proficiency in specific, granular areas of knowledge. Existing cloze-style benchmarks, commonly used to evaluate LMs’ knowledge, have three major limitations. They: 1) do not cover the educational domain; 2) typically focus on low-complexity, generic knowledge or broad domains, which do not adequately assess the models’ knowledge in specific subjects; and 3) often rely on templates that can bias model predictions. Here, we introduce MALAMUTE, a multilingual, template-free, and highly granular probing dataset comprising expert-written, peer-reviewed probes from 71 university-level textbooks across three languages (English, Spanish, and Polish). MALAMUTE is the first education-based cloze-style dataset. It covers eight domains, each with up to 14 subdomains, further broken down into concepts and concept-based prompts, totaling 33,361 university curriculum concepts and 116,887 prompts. MALAMUTE’s fine granularity, educational focus, and inclusion of both sentence-level and paragraph-level prompts make it an ideal tool for evaluating LMs’ course-related knowledge. Our evaluation of masked and causal LMs on MALAMUTE shows that despite overall proficiency, they have significant gaps in knowledge when examined closely on specific subjects, hindering their safe use in classrooms and underscoring the need for further development.</abstract>
@@ -9366,7 +9366,7 @@
       <author><first>Yifan.zhang</first><last>Yifan.zhang</last></author>
       <author><first>Hua</first><last>Xu</last></author>
       <author><first>Shuai</first><last>Fan</last><affiliation>AISpeech Ltd</affiliation></author>
-      <author><first>Guodong</first><last>Zhou</last><affiliation>Soochow University, China</affiliation></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last><affiliation>Soochow University, China</affiliation></author>
       <pages>4118-4130</pages>
       <abstract>Dialogue text segmentation aims to partition dialogue content into consecutive paragraphs based on themes or logic, enhancing its comprehensibility and manageability. Current text segmentation models, when applied directly to STS (Streaming Text Segmentation), exhibit numerous limitations, such as imbalances in labels that affect the stability of model training, and discrepancies between the model’s training tasks (sentence classification) and the actual text segmentation that limit the model’s segmentation capabilities.To address these challenges, we first implement STS for the first time using a sliding window-based segmentation method. Secondly, we employ two different levels of sliding window-based balanced label strategies to stabilize the training process of the streaming segmentation model and enhance training convergence speed. Finally, by adding a one-dimensional bounding-box regression task for text sequences within the window, we restructure the training approach of STS tasks, shifting from sentence classification to sequence segmentation, thereby aligning the training objectives with the task objectives, which further enhanced the model’s performance. Extensive experimental results demonstrate that our method is robust, controllable, and achieves state-of-the-art performance.</abstract>
       <url hash="ae793aab">2025.findings-acl.213</url>
@@ -9465,7 +9465,7 @@
       <author><first>Zhaocheng</first><last>Du</last></author>
       <author><first>Xiangyang</first><last>Li</last></author>
       <author><first>Yichao</first><last>Wang</last></author>
-      <author id="yuhao-wang"><first>Yuhao</first><last>Wang</last></author>
+      <author><first>Yuhao</first><last>Wang</last></author>
       <author><first>Qidong</first><last>Liu</last><affiliation>City University of Hong Kong and Xi’an Jiaotong University</affiliation></author>
       <author><first>Maolin</first><last>Wang</last></author>
       <author><first>Huifeng</first><last>Guo</last></author>
@@ -9497,8 +9497,8 @@
     <paper id="222">
       <title>Corpus Poisoning via Approximate Greedy Gradient Descent</title>
       <author><first>Jinyan</first><last>Su</last><affiliation>Cornell University</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
-      <author><first>Claire</first><last>Cardie</last><affiliation>Cornell University</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last><affiliation>Cornell University</affiliation></author>
       <pages>4274-4294</pages>
       <abstract>Dense retrievers are widely used in information retrieval and have also been successfully extended to other knowledge intensive areas such as language models, e.g., Retrieval-Augmented Generation (RAG) systems. Unfortunately, they have recently been shown to be vulnerable to corpus poisoning attacks in which a malicious user injects a small fraction of adversarial passages into the retrieval corpus to trick the system into returning these passages among the top-ranked results for a broad set of user queries. Further study is needed to understand the extent to which these attacks could limit the deployment of dense retrievers in real-world applications. In this work, we propose Approximate Greedy Gradient Descent (AGGD), a new attack on dense retrieval systems based on the widely used HotFlip method for efficiently generating adversarial passages. We demonstrate that AGGD can select a higher quality set of token-level perturbations than HotFlip by replacing its random token sampling with a more structured search. Experimentally, we show that our method achieves a high attack success rate on several datasets and using several retrievers, and can generalize to unseen queries and new domains. Notably, our method is extremely effective in attacking the ANCE retrieval model, achieving attack success rates that are 15.24% and 17.44% higher on the NQ and MS MARCO datasets, respectively, compared to HotFlip. Additionally, we demonstrate AGGD’s potential to replace HotFlip in other adversarial attacks, such as knowledge poisoning of RAG systems.</abstract>
       <url hash="338e8263">2025.findings-acl.222</url>
@@ -9554,7 +9554,7 @@
     </paper>
     <paper id="226">
       <title>Fraud-R1 : A Multi-Round Benchmark for Assessing the Robustness of <fixed-case>LLM</fixed-case> Against Augmented Fraud and Phishing Inducements</title>
-      <author id="shu-yang"><first>Shu</first><last>Yang</last></author>
+      <author><first>Shu</first><last>Yang</last></author>
       <author><first>Shenzhe</first><last>Zhu</last></author>
       <author><first>Zeyu</first><last>Wu</last></author>
       <author><first>Keyu</first><last>Wang</last></author>
@@ -9651,7 +9651,7 @@
       <title><fixed-case>CLIX</fixed-case>: Cross-Lingual Explanations of Idiomatic Expressions</title>
       <author><first>Aaron</first><last>Gluck</last><affiliation>University of Colorado at Boulder</affiliation></author>
       <author><first>Katharina Von Der</first><last>Wense</last><affiliation>Johannes-Gutenberg Universität Mainz, Johannes-Gutenberg Universität Mainz, University of Colorado, Boulder and New York University</affiliation></author>
-      <author><first>Maria Leonor</first><last>Pacheco</last><affiliation>University of Colorado at Boulder</affiliation></author>
+      <author id="maria-leonor-pacheco"><first>Maria Leonor</first><last>Pacheco</last><affiliation>University of Colorado at Boulder</affiliation></author>
       <pages>4515-4529</pages>
       <abstract>Automated definition generation systems have been proposed to support vocabulary expansion for language learners. The main barrier to the success of these systems is that learners often struggle to understand definitions due to the presence of potentially unfamiliar words and grammar, particularly when non-standard language is involved. To address these challenges, we propose CLIX, the task of Cross-Lingual explanations of Idiomatic eXpressions. We explore the capabilities of current NLP models for this task, and observe that while it remains challenging, large language models show promise. Finally, we perform a detailed error analysis to highlight the key challenges that need to be addressed before we can reliably incorporate these systems into educational tools.</abstract>
       <url hash="322abd28">2025.findings-acl.233</url>
@@ -9716,7 +9716,7 @@
       <author><first>Daoan</first><last>Zhang</last></author>
       <author><first>Hassan</first><last>Foroosh</last><affiliation>University of Central Florida</affiliation></author>
       <author><first>Dong</first><last>Yu</last><affiliation>Tencent AI Lab</affiliation></author>
-      <author id="fei-liu"><first>Fei</first><last>Liu</last><affiliation>Emory University</affiliation></author>
+      <author><first>Fei</first><last>Liu</last><affiliation>Emory University</affiliation></author>
       <pages>4587-4603</pages>
       <abstract>LLMs are ideal for decision-making thanks to their ability to reason over long contexts. However, challenges arise when processing speech transcripts that describe complex scenarios, as they are verbose and include repetition, hedging, and vagueness. E.g., during a company’s earnings call, an executive might project a positive revenue outlook to reassure investors, despite uncertainty regarding future earnings. It is crucial for LLMs to incorporate this uncertainty systematically when making decisions. In this paper, we introduce DeFine, a modular framework that constructs probabilistic factor profiles from complex scenarios. It then integrates these profiles with analogical reasoning, leveraging insights from similar past experiences to guide LLMs in making critical decisions in new situations. Our framework separates the tasks of quantifying uncertainty and incorporating it into LLM decision-making. This approach is particularly useful in areas such as consulting and financial deliberation, where making decisions under uncertainty is vital.</abstract>
       <url hash="023ca35c">2025.findings-acl.238</url>
@@ -9729,9 +9729,9 @@
       <author><first>Emre Can</first><last>Acikgoz</last></author>
       <author><first>Hongru</first><last>Wang</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <author><first>Xiusi</first><last>Chen</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
-      <author><first>Avirup</first><last>Sil</last><affiliation>International Business Machines</affiliation></author>
-      <author><first>Dilek</first><last>Hakkani-Tür</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
-      <author><first>Gokhan</first><last>Tur</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
+      <author id="avirup-sil"><first>Avirup</first><last>Sil</last><affiliation>International Business Machines</affiliation></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tür</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
+      <author id="gokhan-tur"><first>Gokhan</first><last>Tur</last><affiliation>University of Illinois at Urbana-Champaign</affiliation></author>
       <author><first>Heng</first><last>Ji</last><affiliation>University of Illinois, Urbana-Champaign</affiliation></author>
       <pages>4604-4621</pages>
       <abstract>Current Large Language Model (LLM) agents demonstrate strong reasoning and tool use capabilities, but often lack self-awareness, failing to balance these approaches effectively. This imbalance leads to **Tool Overuse**, where models unnecessarily rely on external tools for tasks solvable with parametric knowledge, increasing computational overhead. Inspired by human metacognition, we introduce **SMART** (Strategic Model-Aware Reasoning with Tools), a paradigm that enhances an agent’s self-awareness to optimize task handling and reduce tool overuse. To support this paradigm, we introduce **SMART-ER**, a dataset spanning three domains, where reasoning alternates between parametric knowledge and tool-dependent steps, with each step enriched by rationales explaining when tools are necessary. Through supervised training, we develop **SMARTAgent**, a family of models that dynamically balance parametric knowledge and tool use. Evaluations show that SMARTAgent reduces tool use by 24% while improving performance by over 37%, enabling 7B-scale models to match its 70B counterpart and GPT-4. Additionally, SMARTAgent generalizes to out-of-distribution test data like GSM8K and MINTQA, maintaining accuracy with just one-fifth the tool calls. These highlight the potential of strategic tool use to enhance reasoning, mitigate overuse, and bridge the gap between model size and performance, advancing intelligent and resource-efficient agent designs.</abstract>
@@ -9744,7 +9744,7 @@
       <author><first>Pablo</first><last>Rodríguez</last></author>
       <author><first>Silvia Paniagua</first><last>Suárez</last></author>
       <author><first>Pablo</first><last>Gamallo</last><affiliation>Universidad de Santiago de Compostela</affiliation></author>
-      <author><first>Susana Sotelo</first><last>Docio</last><affiliation>Universidade de Santiago de Compostela</affiliation></author>
+      <author id="susana-sotelo"><first>Susana Sotelo</first><last>Docio</last><affiliation>Universidade de Santiago de Compostela</affiliation></author>
       <pages>4622-4637</pages>
       <abstract>Recent advances in Large Language Models (LLMs) have led to remarkable improvements in language understanding and text generation. However, challenges remain in enhancing their performance for underrepresented languages, ensuring continual learning without catastrophic forgetting, and developing robust evaluation methodologies. This work addresses these issues by investigating the impact of Continued Pretraining (CPT) on multilingual models and proposing a comprehensive evaluation framework for LLMs, focusing on the case of Galician language. Our first contribution explores CPT strategies for languages with limited representation in multilingual models. We analyze how CPT with Galician corpora improves text generation while assessing the trade-offs between linguistic enrichment and task-solving capabilities. Our findings show that CPT with small, high-quality corpora and diverse instructions enhances both task performance and linguistic quality. Our second contribution is a structured evaluation framework based on distinguishing task-based and language-based assessments, leveraging existing and newly developed benchmarks for Galician. Additionally, we contribute new Galician LLMs, datasets for evaluation and instructions, and an evaluation framework.</abstract>
       <url hash="f27d7418">2025.findings-acl.240</url>
@@ -9832,7 +9832,7 @@
       <author><first>Qiao</first><last>Qiao</last><affiliation>Iowa State University</affiliation></author>
       <author><first>Bach</first><last>Nguyen</last></author>
       <author><first>Qing</first><last>Wang</last><affiliation>Iowa State University</affiliation></author>
-      <author id="qi-li"><first>Qi</first><last>Li</last><affiliation>Iowa State University</affiliation></author>
+      <author><first>Qi</first><last>Li</last><affiliation>Iowa State University</affiliation></author>
       <pages>4789-4807</pages>
       <abstract>Retrieval-augmented generation (RAG) improves Large Language Models (LLMs) by incorporating external information into the response generation process. However, how context-faithful LLMs are and what factors influence LLMs’ context faithfulness remain largely unexplored. In this study, we investigate the impact of memory strength and evidence presentation on LLMs’ receptiveness to external evidence. We quantify the memory strength of LLMs by measuring the divergence in LLMs’ responses to different paraphrases of the same question, which is not considered by previous works. We also generate evidence in various styles to examine LLMs’ behavior. Our results show that for questions with high memory strength, LLMs are more likely to rely on internal memory. Furthermore, presenting paraphrased evidence significantly increases LLMs’ receptiveness compared to simple repetition or adding details. These findings provide key insights for improving retrieval-augmented generation and context-aware LLMs. Our code is available at https://github.com/liyp0095/ContextFaithful.</abstract>
       <url hash="a227978a">2025.findings-acl.247</url>
@@ -9862,7 +9862,7 @@
       <author><first>Can</first><last>Qin</last><affiliation>SalesForce.com</affiliation></author>
       <author><first>Haoyi</first><last>Qiu</last><affiliation>UCLA Computer Science Department, University of California, Los Angeles</affiliation></author>
       <author><first>Philippe</first><last>Laban</last><affiliation>Microsoft</affiliation></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University and SalesForce.com</affiliation></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University and SalesForce.com</affiliation></author>
       <author><first>Caiming</first><last>Xiong</last><affiliation>Salesforce Research</affiliation></author>
       <author><first>Chien-Sheng</first><last>Wu</last><affiliation>Salesforce AI</affiliation></author>
       <pages>4830-4843</pages>
@@ -9923,7 +9923,7 @@
       <author><first>Soyeong</first><last>Jeong</last><affiliation>Korea Advanced Institute of Science &amp; Technology</affiliation></author>
       <author><first>Hoyun</first><last>Song</last><affiliation>Korea Advanced Institute of Science &amp; Technology</affiliation></author>
       <author><first>SeungYoon</first><last>Han</last></author>
-      <author><first>Jong C.</first><last>Park</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
+      <author id="jong-c-park"><first>Jong C.</first><last>Park</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
       <pages>4895-4924</pages>
       <abstract>We introduce EXIT, an extractive context compression framework that enhances both the effectiveness and efficiency of retrieval-augmented generation (RAG) in question answering (QA). Current RAG systems often struggle when retrieval models fail to rank the most relevant documents, leading to the inclusion of more context at the expense of latency and accuracy. While abstractive compression methods can drastically reduce token counts, their token-by-token generation process significantly increases end-to-end latency. Conversely, existing extractive methods reduce the latency but rely on independent, non-adaptive sentence selection, failing to fully utilize contextual information. EXIT addresses these limitations by classifying sentences from retrieved documents—while preserving their contextual dependencies—enabling parallelizable, context-aware extraction that adapts to query complexity and retrieval quality. Our evaluations on both single-hop and multi-hop QA tasks show that EXIT consistently surpasses existing compression methods and even uncompressed baselines in QA accuracy, while also delivering substantial reductions in inference time and token count. By improving both effectiveness and efficiency, EXIT provides a promising direction for developing scalable, high-quality QA solutions in RAG pipelines. Our code is available at https://github.com/ThisIsHwang/EXIT.</abstract>
       <url hash="6e970081">2025.findings-acl.253</url>
@@ -9958,7 +9958,7 @@
       <author><first>Yanhao</first><last>Jia</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Meihuizi</first><last>Jia</last><affiliation>Northwest Normal University Lanzhou</affiliation></author>
       <author><first>Feng</first><last>Yichao</last></author>
-      <author><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
       <pages>4937-4952</pages>
       <abstract>Parameter-efficient fine-tuning (PEFT) can bridge the gap between large language models (LLMs) and downstream tasks. However, PEFT has been proven vulnerable to malicious attacks. Research indicates that poisoned LLMs, even after PEFT, retain the capability to activate internalized backdoors when input samples contain predefined triggers. In this paper, we introduce a novel weak-to-strong unlearning algorithm to defend against backdoor attacks based on feature alignment knowledge distillation, named W2SDefense. Specifically, we first train a small-scale language model through full-parameter fine-tuning to serve as the clean teacher model. Then, this teacher model guides the large-scale poisoned student model in unlearning the backdoor, leveraging PEFT. Theoretical analysis suggests that W2SDefense has the potential to enhance the student model’s ability to unlearn backdoor features, preventing the activation of the backdoor. We conduct comprehensive experiments on three state-of-the-art large language models and several different backdoor attack algorithms. Our empirical results demonstrate the outstanding performance of W2SDefense in defending against backdoor attacks without compromising model performance.</abstract>
       <url hash="b4e3a3c5">2025.findings-acl.255</url>
@@ -9971,7 +9971,7 @@
       <author><first>Guoyin</first><last>Wang</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Yizhong</first><last>Wang</last><affiliation>Department of Computer Science, University of Washington</affiliation></author>
       <author><first>Jiwei</first><last>Li</last><affiliation>Zhejiang University</affiliation></author>
-      <author><first>Eduard</first><last>Hovy</last><affiliation>University of Melbourne and Carnegie Mellon University</affiliation></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last><affiliation>University of Melbourne and Carnegie Mellon University</affiliation></author>
       <author><first>Chen</first><last>Guo</last></author>
       <pages>4953-4967</pages>
       <abstract>Packing, initially utilized in the pre-training phase, is an optimization technique designed to maximize hardware resource efficiency by combining different training sequences to fit the model’s maximum input length. Although it has demonstrated effectiveness during pre-training, there remains a lack of comprehensive analysis for the supervised fine-tuning (SFT) stage on the following points: (1) whether packing can effectively enhance training efficiency while maintaining performance, (2) the suitable size of the model and dataset for fine-tuning with the packing method, and (3) whether packing unrelated or related training samples might cause the model to either excessively disregard or over-rely on the context.In this paper, we perform extensive comparisons between SFT methods using padding and packing, covering SFT datasets ranging from 69K to 1.2M and models from 8B to 70B. This provides the first comprehensive analysis of the advantages and limitations of packing versus padding, as well as practical considerations for implementing packing in various training scenarios. Our analysis covers various benchmarks, including knowledge, reasoning, and coding, as well as GPT-based evaluations, time efficiency, and other fine-tuning parameters. We also open-source our code for fine-tuning and evaluation and provide checkpoints fine-tuned on datasets of different sizes, aiming to advance future research on packing methods.</abstract>
@@ -10067,7 +10067,7 @@
       <author><first>Keping</first><last>Bi</last><affiliation>Chinese Academy of Sciences</affiliation></author>
       <author><first>Wei</first><last>Chen</last><affiliation>Chinese Academy of Sciences</affiliation></author>
       <author><first>Jiafeng</first><last>Guo</last><affiliation>Institute of Computing Technolgy, Chinese Academy of Sciences</affiliation></author>
-      <author><first>Xueqi</first><last>Cheng</last><affiliation>Institute of Computing Technology, Chinese Academy</affiliation></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last><affiliation>Institute of Computing Technology, Chinese Academy</affiliation></author>
       <pages>5081-5097</pages>
       <abstract>As large language models (LLMs) become an important way of information access, there have been increasing concerns that LLMs may intensify the spread of unethical content, including implicit bias that hurts certain populations without explicit harmful words. In this paper, we conduct a rigorous evaluation of LLMs’ implicit bias towards certain demographics by attacking them from a psychometric perspective to elicit agreements to biased viewpoints. Inspired by psychometric principles in cognitive and social psychology, we propose three attack approaches, i.e., Disguise, Deception, and Teaching. Incorporating the corresponding attack instructions, we built two benchmarks: (1) a bilingual dataset with biased statements covering four bias types (2.7K instances) for extensive comparative analysis, and (2) BUMBLE, a larger benchmark spanning nine common bias types (12.7K instances) for comprehensive evaluation. Extensive evaluation of popular commercial and open-source LLMs shows that our methods can elicit LLMs’ inner bias more effectively than competitive baselines. Our attack methodology and benchmarks offer an effective means of assessing the ethical risks of LLMs, driving progress toward greater accountability in their development.</abstract>
       <url hash="77fb4221">2025.findings-acl.263</url>
@@ -10214,7 +10214,7 @@
       <author><first>Hao</first><last>Wang</last><affiliation>Google</affiliation></author>
       <author><first>Sicheng</first><last>Zhou</last></author>
       <author><first>Wenbing</first><last>Huang</last><affiliation>Renmin University of China</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <pages>5247-5270</pages>
       <abstract>The rapid advancement of large language models (LLMs) has spurred significant interest in tool learning, where LLMs are augmented with external tools to tackle complex tasks. However, existing tool environments face challenges in balancing stability, scale, and realism, particularly for benchmarking purposes. To address this, we propose MirrorAPI, a novel framework that trains specialized LLMs to accurately simulate real API responses, effectively acting as “mirrors” to tool environments. Using a comprehensive dataset of request-response pairs from 7,000+ APIs, we employ supervised fine-tuning and chain-of-thought reasoning to enhance simulation fidelity. MirrorAPI achieves superior accuracy and stability compared to state-of-the-art methods, as demonstrated by its performance on the newly constructed MirrorAPI-Bench and its integration into StableToolBench.</abstract>
       <url hash="1e7bdf46">2025.findings-acl.273</url>
@@ -10244,7 +10244,7 @@
     </paper>
     <paper id="276">
       <title>Chain of Methodologies: Scaling Test Time Computation without Training</title>
-      <author id="cong-liu"><first>Cong</first><last>Liu</last></author>
+      <author><first>Cong</first><last>Liu</last></author>
       <author><first>Jie</first><last>Wu</last><affiliation>Temple University</affiliation></author>
       <author><first>Weigang</first><last>Wu</last><affiliation>SUN YAT-SEN UNIVERSITY</affiliation></author>
       <author><first>Xu</first><last>Chen</last><affiliation>SUN YAT-SEN UNIVERSITY</affiliation></author>
@@ -10316,7 +10316,7 @@
       <author><first>Yulan</first><last>He</last><affiliation>King’s College London, University of London</affiliation></author>
       <author><first>Hui</first><last>Wang</last></author>
       <author><first>Yue</first><last>Yu</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <author><first>Bin</first><last>Liang</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <author><first>Ruifeng</first><last>Xu</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <pages>5377-5398</pages>
@@ -10366,7 +10366,7 @@
       <author><first>Nan</first><last>Hu</last><affiliation>Southeast University</affiliation></author>
       <author><first>Zeming</first><last>Liu</last></author>
       <author><first>Jeff Z.</first><last>Pan</last><affiliation>University of Edinburgh, University of Edinburgh</affiliation></author>
-      <author><first>Kam-Fai</first><last>Wong</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <pages>5433-5453</pages>
       <abstract>Existing benchmarks that assess Language Models (LMs) as Language Agents (LAs) for tool use primarily focus on stateless, single-turn interactions or partial evaluations, such as tool selection in a single turn, overlooking the inherent stateful nature of interactions in multi-turn applications. To fulfill this gap, we propose DialogTool, a multi-turn dialogue dataset with stateful tool interactions considering the whole life cycle of tool use, across six key tasks in three stages: 1) <i>tool creation</i>; 2) <i>tool utilization</i>: tool awareness, tool selection, tool execution; and 3) <i>role-consistent response</i>: response generation and role play. Furthermore, we build VirtualMobile – an embodied virtual mobile evaluation environment to simulate API calls and assess the robustness of the created APIs. Taking advantage of these artifacts, we conduct comprehensive evaluation on 13 distinct open- and closed-source LLMs and provide detailed analysis at each stage, revealing that the existing state-of-the-art LLMs still cannot perform well to use tools over long horizons .</abstract>
       <url hash="4c926f70">2025.findings-acl.284</url>
@@ -10416,7 +10416,7 @@
     <paper id="288">
       <title>Multimodal Causal Reasoning Benchmark: Challenging Multimodal Large Language Models to Discern Causal Links Across Modalities</title>
       <author><first>Zhiyuan</first><last>Li</last></author>
-      <author id="heng-wang"><first>Heng</first><last>Wang</last><affiliation>Sony R&amp;D and University of Sydney, University of Sydney</affiliation></author>
+      <author><first>Heng</first><last>Wang</last><affiliation>Sony R&amp;D and University of Sydney, University of Sydney</affiliation></author>
       <author><first>Dongnan</first><last>Liu</last><affiliation>University of Sydney</affiliation></author>
       <author><first>Chaoyi</first><last>Zhang</last><affiliation>The University of Sydney</affiliation></author>
       <author><first>Ao</first><last>Ma</last></author>
@@ -10458,7 +10458,7 @@
       <author><first>Shijue</first><last>Huang</last></author>
       <author><first>Jeff Z.</first><last>Pan</last><affiliation>University of Edinburgh, University of Edinburgh</affiliation></author>
       <author><first>Zeming</first><last>Liu</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <pages>5578-5596</pages>
       <abstract>Inference-time scaling has attracted much attention which significantly enhance the performance of Large Language Models (LLMs) in complex reasoning tasks by increasing the length of Chain-of-Thought. These longer intermediate reasoning rationales embody various meta-reasoning skills in human cognition such as reflection and decomposition, being difficult to create and acquire. In this work, we introduce <i>Self-Reasoning Language Model</i> (SRLM), where the model itself can synthesize longer CoT data and iteratively improve performance through self-training. By incorporating a few demonstration examples (i.e., 1,000 samples) on how to unfold hidden reasoning chains from existing responses, which act as a reasoning catalyst, we demonstrate that SRLM not only enhances the model’s initial performance but also ensures more stable and consistent improvements in subsequent iterations. Our proposed SRLM achieves an average absolute improvement of more than +2.5 points across five reasoning tasks: MMLU, GSM8K, ARC-C, HellaSwag, and BBH on two backbone models. Moreover, it brings more improvements with more times of sampling during inference, such as absolute +7.89 average improvement with 64 sampling times, revealing the in-depth, diverse and creative reasoning paths in SRLM against the strong baseline .</abstract>
       <url hash="e297854c">2025.findings-acl.291</url>
@@ -10470,7 +10470,7 @@
       <author><first>Yongsen</first><last>Zheng</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Mingjie</first><last>Qian</last><affiliation>SUN YAT-SEN UNIVERSITY</affiliation></author>
       <author><first>Guohua</first><last>Wang</last><affiliation>South China Agricultural University</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>SUN YAT-SEN UNIVERSITY</affiliation></author>
+      <author><first>Yang</first><last>Liu</last><affiliation>SUN YAT-SEN UNIVERSITY</affiliation></author>
       <author><first>Ziliang</first><last>Chen</last></author>
       <author><first>Mingzhi</first><last>Mao</last></author>
       <author><first>Liang</first><last>Lin</last><affiliation>SUN YAT-SEN UNIVERSITY</affiliation></author>
@@ -10540,7 +10540,7 @@
       <author><first>Jingyan</first><last>Zhou</last></author>
       <author><first>Yipeng</first><last>Zhang</last></author>
       <author><first>Haitao</first><last>Mi</last><affiliation>Tencent AI Lab</affiliation></author>
-      <author><first>Helen M.</first><last>Meng</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
+      <author id="helen-meng"><first>Helen M.</first><last>Meng</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <pages>5688-5724</pages>
       <abstract>Large language models (LLMs) often struggle to provide up-to-date information due to their one-time training and the constantly evolving nature of the world. To keep LLMs current, existing approaches typically involve continued pre-training on new documents. However, they frequently face difficulties in extracting stored knowledge. Motivated by the remarkable success of the Feynman Technique in efficient human learning, we introduce Self-Tuning, a learning framework aimed at improving an LLM’s ability to effectively acquire new knowledge from unseen raw documents through self-teaching. Specifically, we develop a Self-Teaching strategy that augments the documents with a set of knowledge-intensive tasks created in a self-supervised manner, focusing on three crucial aspects: memorization, comprehension, and self-reflection. Additionally, we introduce three Wiki-Newpages-2023-QA datasets to facilitate an in-depth analysis of an LLM’s knowledge acquisition ability concerning memorization, extraction, and reasoning. Extensive experimental results on various models, e.g., Llama2-7B reveal that Self-Tuning consistently exhibits superior performance across all knowledge acquisition tasks and excels in preserving previous knowledge.</abstract>
       <url hash="a860eeba">2025.findings-acl.297</url>
@@ -10568,7 +10568,7 @@
     <paper id="299">
       <title>Memory or Reasoning? Explore How <fixed-case>LLM</fixed-case>s Compute Mixed Arithmetic Expressions</title>
       <author><first>Chengzhi</first><last>Li</last><affiliation>Beijing Institute of Technology</affiliation></author>
-      <author><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <author><first>Ping</first><last>Jian</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <author><first>Zhen</first><last>Yang</last></author>
       <author><first>Chenxu</first><last>Wang</last></author>
@@ -10684,9 +10684,9 @@
       <author><first>Hongli</first><last>Zhou</last></author>
       <author><first>Yingqi</first><last>Qu</last></author>
       <author><first>Jing</first><last>Liu</last><affiliation>Baidu</affiliation></author>
-      <author><first>Muyun</first><last>Yang</last></author>
+      <author id="muyun-yang"><first>Muyun</first><last>Yang</last></author>
       <author><first>Bing</first><last>Xu</last></author>
-      <author><first>Tiejun</first><last>Zhao</last><affiliation>Harbin Institute of Technology</affiliation></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <pages>5880-5895</pages>
       <abstract>Recently, there has been a growing trend of utilizing Large Language Model (LLM) to evaluate the quality of other LLMs. Many studies have fine-tuned judge models based on open-source LLMs for evaluation. While the fine-tuned judge models are claimed to achieve comparable evaluation capability with GPT-4, in this work, we conduct an empirical study of LLM-as-a-Judge. Our findings indicate that although the fine-tuned judge models achieve high performance on in-domain test sets, even surpassing GPT-4, they underperform GPT-4 across several dimensions, including generalizability, fairness and adaptability. We also reveal that the fine-tuned judge model inherently operates as a task-specific classifier, consequently imposing the limitations.</abstract>
       <url hash="dc053da8">2025.findings-acl.306</url>
@@ -10766,7 +10766,7 @@
       <author><first>Xiaoqing</first><last>Zheng</last><affiliation>Fudan University</affiliation></author>
       <author><first>Di</first><last>Yin</last></author>
       <author><first>Xing</first><last>Sun</last><affiliation>Tencent YouTu Lab</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>5983-6005</pages>
       <abstract>Role-Playing Agents (RPAs) have shown remarkable performance in various applications, yet they often struggle to recognize and appropriately respond to hard queries that conflict with their role-play knowledge. To investigate RPAs’ performance when faced with different types of conflicting requests, we develop an evaluation benchmark that includes contextual knowledge conflicting requests, parametric knowledge conflicting requests, and non-conflicting requests to assess RPAs’ ability to identify conflicts and refuse to answer appropriately without over-refusing. Through extensive evaluation, we find that most RPAs behave significant performance gaps toward different conflict requests. To elucidate the reasons, we conduct an in-depth representation-level analysis of RPAs under various conflict scenarios. Our findings reveal the existence of rejection regions and direct response regions within the model’s forwarding representation, and thus influence the RPA’s final response behavior. Therefore, we introduce a lightweight representation editing approach that conveniently shifts conflicting requests to the rejection region, thereby enhancing the model’s refusal accuracy. The extensive experiments validate the effectiveness of our editing method, improving RPAs’ refusal ability of conflicting requests while maintaining their general role-playing capabilities.</abstract>
       <url hash="287e69bb">2025.findings-acl.311</url>
@@ -10840,7 +10840,7 @@
       <author><first>Huiqiang</first><last>Jiang</last><affiliation>Microsoft</affiliation></author>
       <author><first>Xufang</first><last>Luo</last><affiliation>Microsoft Research</affiliation></author>
       <author><first>Qianhui</first><last>Wu</last><affiliation>Microsoft</affiliation></author>
-      <author><first>Chin-Yew</first><last>Lin</last><affiliation>Microsoft</affiliation></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last><affiliation>Microsoft</affiliation></author>
       <author id="dongsheng-li-fudan"><first>Dongsheng</first><last>Li</last><affiliation>Microsoft Research Asia</affiliation></author>
       <author><first>Yuqing</first><last>Yang</last><affiliation>Research, Microsoft</affiliation></author>
       <author><first>Yongfeng</first><last>Huang</last><affiliation>Tsinghua University, Tsinghua University</affiliation></author>
@@ -10942,7 +10942,7 @@
       <title>Blessing of Multilinguality: A Systematic Analysis of Multilingual In-Context Learning</title>
       <author><first>Yilei</first><last>Tu</last></author>
       <author><first>Andrew</first><last>Xue</last></author>
-      <author><first>Freda</first><last>Shi</last><affiliation>University of Waterloo and Vector Institute</affiliation></author>
+      <author id="freda-shi"><first>Freda</first><last>Shi</last><affiliation>University of Waterloo and Vector Institute</affiliation></author>
       <pages>6213-6248</pages>
       <abstract>While multilingual large language models generally perform adequately, and sometimes even rival English performance on high-resource languages (HRLs), they often significantly underperform on low-resource languages (LRLs). Among several prompting strategies aiming at bridging the gap, multilingual in-context learning (ICL) has been particularly effective when demonstration in target languages is unavailable. However, there lacks a systematic understanding when and why it works well.In this work, we systematically analyze multilingual ICL, using demonstrations in HRLs to enhance cross-lingual transfer. We show that demonstrations in mixed HRLs consistently outperform English-only ones across the board, particularly for tasks written in LRLs. Surprisingly, our ablation study show that the presence of irrelevant non-English sentences in the prompt yields measurable gains, suggesting the effectiveness of multilingual exposure itself. Our results highlight the potential of strategically leveraging multilingual resources to bridge the performance gap for underrepresented languages.</abstract>
       <url hash="8d4ba8c5">2025.findings-acl.323</url>
@@ -10967,7 +10967,7 @@
       <author><first>ZongYu</first><last>Wang</last><affiliation>Meituan</affiliation></author>
       <author><first>Xuezhi</first><last>Cao</last><affiliation>Meituan</affiliation></author>
       <author><first>Xunliang</first><last>Cai</last><affiliation>Meituan</affiliation></author>
-      <author><first>Jiajun</first><last>Chen</last><affiliation>Nanjing University</affiliation></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last><affiliation>Nanjing University</affiliation></author>
       <author><first>Shujian</first><last>Huang</last><affiliation>Nanjing University</affiliation></author>
       <pages>6279-6299</pages>
       <abstract>Large Language Models (LLMs) have shown impressive capabilities across various tasks but remain vulnerable to meticulously crafted jailbreak attacks. In this paper, we identify a critical safety gap: while LLMs are adept at detecting jailbreak prompts, they often produce unsafe responses when directly processing these inputs. Inspired by this insight, we propose SAGE(Self-Aware Guard Enhancement), a training-free defense strategy designed to align LLMs’ strong safety discrimination performance with their relatively weaker safety generation ability. SAGE consists of two core components: a Discriminative Analysis Module and a Discriminative Response Module, enhancing resilience against sophisticated jailbreak attempts through flexible safety discrimination instructions. Extensive experiments demonstrate SAGE’s effectiveness and robustness across various open-source and closed-source LLMs of different sizes and architectures, achieving an average 99% defense success rate against numerous complex and covert jailbreak methods while maintaining helpfulness on general benchmarks. We further conduct mechanistic interpretability analysis through hidden states and attention distributions, revealing the underlying mechanisms of this detection-generation discrepancy. Our work thus contributes to developing future LLMs with coherent safety awareness and generation behavior. Our code and datasets are publicly available at <url>https://github.com/NJUNLP/SAGE</url>.</abstract>
@@ -10984,7 +10984,7 @@
       <author><first>Arindam</first><last>Mitra</last><affiliation>Research, Microsoft</affiliation></author>
       <author><first>Spencer</first><last>Whitehead</last><affiliation>Microsoft</affiliation></author>
       <author><first>Yu</first><last>Su</last><affiliation>Ohio State University</affiliation></author>
-      <author><first>Ahmed Hassan</first><last>Awadallah</last><affiliation>Microsoft Research</affiliation></author>
+      <author id="ahmed-hassan"><first>Ahmed Hassan</first><last>Awadallah</last><affiliation>Microsoft Research</affiliation></author>
       <pages>6300-6323</pages>
       <abstract>Recent success in large multimodal models (LMMs) has sparked promising applications of agents capable of autonomously completing complex web tasks. While open-source LMM agents have made significant advances in offline evaluation benchmarks, their performance still falls substantially short of human-level capabilities in more realistic online settings. A key bottleneck is the lack of diverse and large-scale trajectory-level datasets across various domains, which are expensive to collect. In this paper, we address this challenge by developing a scalable recipe to synthesize the largest and most diverse trajectory-level dataset to date, containing over 94K successful multimodal web trajectories, spanning 49K unique URLs, 720K screenshots, and 33M web elements. In particular, we leverage extensive web exploration and refinement to obtain diverse task intents. The average cost is 28 cents per successful trajectory, making it affordable to a wide range of users in the community. Leveraging this dataset, we train Explorer, a multimodal web agent, and demonstrate strong performance on both offline and online web agent benchmarks such as Mind2Web-Live, Multimodal-Mind2Web, and MiniWob++. Additionally, our experiments highlight data scaling as a key driver for improving web agent capabilities. We hope this study makes state-of-the-art LMM-based agent research at a larger scale more accessible.</abstract>
       <url hash="03baa01c">2025.findings-acl.326</url>
@@ -11010,7 +11010,7 @@
       <author><first>Yuhao</first><last>Dan</last></author>
       <author><first>Jie</first><last>Zhou</last></author>
       <author><first>Qin</first><last>Chen</last><affiliation>East China Normal University</affiliation></author>
-      <author><first>Junfeng</first><last>Tian</last><affiliation>Xiaohongshu</affiliation></author>
+      <author id="junfeng-tian"><first>Junfeng</first><last>Tian</last><affiliation>Xiaohongshu</affiliation></author>
       <author><first>Liang</first><last>He</last><affiliation>East China Normal University</affiliation></author>
       <pages>6342-6362</pages>
       <abstract>Personalized large language models (LLMs) have attracted great attention in many applications, such as emotional support and role-playing. However, existing works primarily focus on modeling explicit character profiles, while ignoring the underlying personality traits that truly shape behaviors and decision-making, hampering the development of more anthropomorphic and psychologically-grounded AI systems. In this paper, we explore the modeling of Big Five personality traits, which is the most widely used trait theory in psychology, and propose P-React, a mixture of experts (MoE)-based personalized LLM. Particularly, we integrate a Personality Specialization Loss (PSL) to better capture individual trait expressions, providing a more nuanced and psychologically grounded personality simulacrum. To facilitate research in this field, we curate OCEAN-Chat, a high-quality, human-verified dataset designed to train LLMs in expressing personality traits across diverse topics. Extensive experiments demonstrate the effectiveness of P-React in maintaining consistent and real personality.</abstract>
@@ -11038,7 +11038,7 @@
     <paper id="330">
       <title>Streamlining the Collaborative Chain of Models into A Single Forward Pass in Generation-Based Tasks</title>
       <author><first>Yuanjie</first><last>Lyu</last></author>
-      <author><first>Chao</first><last>Zhang</last></author>
+      <author id="chao-zhang-tu"><first>Chao</first><last>Zhang</last></author>
       <author><first>Yuhao</first><last>Chen</last></author>
       <author><first>Yong</first><last>Chen</last></author>
       <author><first>Tong</first><last>Xu</last><affiliation>University of Science and Technology of China</affiliation></author>
@@ -11065,7 +11065,7 @@
       <title>Beyond Reactive Safety: Risk-Aware <fixed-case>LLM</fixed-case> Alignment via Long-Horizon Simulation</title>
       <author><first>Chenkai</first><last>Sun</last><affiliation>University of Illinois Urbana Champaign</affiliation></author>
       <author><first>Denghui</first><last>Zhang</last><affiliation>Stevens Institute of Technology</affiliation></author>
-      <author><first>ChengXiang</first><last>Zhai</last><affiliation>University of Illinois, Urbana Champaign</affiliation></author>
+      <author id="chengxiang-zhai"><first>ChengXiang</first><last>Zhai</last><affiliation>University of Illinois, Urbana Champaign</affiliation></author>
       <author><first>Heng</first><last>Ji</last><affiliation>University of Illinois, Urbana-Champaign</affiliation></author>
       <pages>6422-6434</pages>
       <abstract>Given the growing influence of language model-based agents on high-stakes societal decisions, from public policy to healthcare, ensuring their beneficial impact requires understanding the far-reaching implications of their suggestions. We propose a proof-of-concept framework that projects how model-generated advice could propagate through societal systems on a macroscopic scale over time, enabling more robust alignment. To assess the long-term safety awareness of language models, we also introduce a dataset of 100 indirect harm scenarios, testing models’ ability to foresee adverse, non-obvious outcomes from seemingly harmless user prompts. Our approach achieves not only over 20% improvement on the new dataset but also an average win rate exceeding 70% against strong baselines on existing safety benchmarks (AdvBench, SafeRLHF, WildGuardMix), suggesting a promising direction for safer agents.</abstract>
@@ -11221,7 +11221,7 @@
       <author><first>Chengcheng</first><last>Han</last></author>
       <author><first>Jinxin</first><last>Shi</last><affiliation>East China Normal University</affiliation></author>
       <author><first>Wenjun</first><last>Cui</last></author>
-      <author><first>Xin</first><last>Zhao</last><affiliation>Tsinghua University, Tsinghua University</affiliation></author>
+      <author id="wayne-xin-zhao"><first>Xin</first><last>Zhao</last><affiliation>Tsinghua University, Tsinghua University</affiliation></author>
       <author><first>Xingjiao</first><last>Wu</last><affiliation>East China Normal University</affiliation></author>
       <author><first>Jiabao</first><last>Zhao</last><affiliation>Donghua University, Shanghai</affiliation></author>
       <pages>6575-6602</pages>
@@ -11532,7 +11532,7 @@
     <paper id="365">
       <title><fixed-case>K</fixed-case>od<fixed-case>C</fixed-case>ode: A Diverse, Challenging, and Verifiable Synthetic Dataset for Coding</title>
       <author><first>Zhangchen</first><last>Xu</last></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <author><first>Yueqin</first><last>Yin</last><affiliation>University of Texas at Austin</affiliation></author>
       <author><first>Mingyuan</first><last>Zhou</last><affiliation>Google and The University of Texas at Austin</affiliation></author>
       <author><first>Radha</first><last>Poovendran</last><affiliation>University of Washington, Seattle</affiliation></author>
@@ -11632,7 +11632,7 @@
       <author><first>Lu</first><last>Xiang</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
       <author><first>Yang</first><last>Zhao</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
       <author><first>Yu</first><last>Zhou</last><affiliation>Institute of Automation, Chinese Academy of Sciences</affiliation></author>
-      <author><first>Chengqing</first><last>Zong</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
       <pages>7138-7149</pages>
       <abstract>Document Image Translation (DIT), which aims at translating documents in images from source language to the target, plays an important role in Document Intelligence. It requires a comprehensive understanding of document multi-modalities and a focused concentration on relevant textual regions during translation. However, most existing methods usually rely on the vanilla encoder-decoder paradigm, severely losing concentration on key regions that are especially crucial for complex-layout document translation. To tackle this issue, in this paper, we propose a new Query-Response DIT framework (QRDIT). QRDIT reformulates the DIT task into a parallel response/translation process of the multiple queries (i.e., relevant source texts), explicitly centralizing its focus toward the most relevant textual regions to ensure translation accuracy. A novel dynamic aggregation mechanism is also designed to enhance the text semantics in query features toward translation. Extensive experiments in four translation directions on three benchmarks demonstrate its state-of-the-art performance, showing significant translation quality improvements toward whole-page complex-layout document images.</abstract>
       <url hash="7a71df9d">2025.findings-acl.372</url>
@@ -11657,7 +11657,7 @@
     <paper id="374">
       <title>A General Knowledge Injection Framework for <fixed-case>ICD</fixed-case> Coding</title>
       <author><first>Xu</first><last>Zhang</last></author>
-      <author id="kun-zhang"><first>Kun</first><last>Zhang</last></author>
+      <author><first>Kun</first><last>Zhang</last></author>
       <author><first>Wenxin</first><last>Ma</last></author>
       <author><first>Rongsheng</first><last>Wang</last><affiliation>University of Science and Technology of China</affiliation></author>
       <author><first>Chenxu</first><last>Wu</last></author>
@@ -11917,7 +11917,7 @@
       <author><first>Songyang</first><last>Zhang</last><affiliation>Shanghai Artificial Intelligence Laboratory</affiliation></author>
       <author><first>Dahua</first><last>Lin</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <author><first>Lijun</first><last>Wu</last><affiliation>Shanghai Artificial Intelligence Laboratory</affiliation></author>
-      <author><first>Gábor</first><last>Prószéky</last><affiliation>Hungarian Research Centre for Linguistics, Pazmany Peter Catholic University and MorphoLogic</affiliation></author>
+      <author id="gabor-proszeky"><first>Gábor</first><last>Prószéky</last><affiliation>Hungarian Research Centre for Linguistics, Pazmany Peter Catholic University and MorphoLogic</affiliation></author>
       <author><first>Conghui</first><last>He</last><affiliation>Shanghai AI Lab</affiliation></author>
       <pages>7464-7520</pages>
       <abstract>We introduce OpenHuEval, the first benchmark for LLMs focusing on the Hungarian language and specifics. OpenHuEval is constructed from a vast collection of Hungarian-specific materials sourced from multiple origins. In the construction, we incorporated the latest design principles for evaluating LLMs, such as using real user queries from the internet, emphasizing the assessment of LLMs’ generative capabilities, and employing LLM-as-judge to enhance the multidimensionality and accuracy of evaluations. Ultimately, OpenHuEval encompasses eight Hungarian-specific dimensions, featuring five tasks and 3953 questions. Consequently, OpenHuEval provides the comprehensive, in-depth, and scientifically accurate assessment of LLM performance in the context of the Hungarian language and its specifics. We evaluated current mainstream LLMs, including both traditional LLMs and recently developed Large Reasoning Models. The results demonstrate the significant necessity for evaluation and model optimization tailored to the Hungarian language and specifics. We also established the framework for analyzing the thinking processes of LRMs with OpenHuEval, revealing intrinsic patterns and mechanisms of these models in non-English languages, with Hungarian serving as a representative example. We will release OpenHuEval at https://github.com/opendatalab/OpenHuEval .</abstract>
@@ -11955,7 +11955,7 @@
       <title><fixed-case>D</fixed-case>oc<fixed-case>F</fixed-case>usion: A Unified Framework for Document Parsing Tasks</title>
       <author><first>Mingxu</first><last>Chai</last></author>
       <author><first>Ziyu</first><last>Shen</last></author>
-      <author id="chong-zhang"><first>Chong</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
+      <author><first>Chong</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Yue</first><last>Zhang</last></author>
       <author><first>Xiao</first><last>Wang</last></author>
       <author><first>Shihan</first><last>Dou</last></author>
@@ -12001,7 +12001,7 @@
       <author><first>Quanyu</first><last>Long</last></author>
       <author><first>Jianda</first><last>Chen</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Zhengyuan</first><last>Liu</last><affiliation>I2R</affiliation></author>
-      <author><first>Nancy F.</first><last>Chen</last></author>
+      <author id="nancy-chen"><first>Nancy F.</first><last>Chen</last></author>
       <author><first>Wenya</first><last>Wang</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Sinno Jialin</first><last>Pan</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <pages>7633-7651</pages>
@@ -12019,7 +12019,7 @@
       <author><first>Feifan</first><last>Song</last><affiliation>Peking University</affiliation></author>
       <author><first>Longhui</first><last>Yu</last></author>
       <author><first>Tianyu</first><last>Liu</last></author>
-      <author><first>Baobao</first><last>Chang</last><affiliation>Peking University</affiliation></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last><affiliation>Peking University</affiliation></author>
       <pages>7652-7665</pages>
       <abstract>Long-CoT reasoning combined with reinforcement learning for large language models demonstrates remarkable performance and scalability. However, we observe that the initial policy model could significantly influence the final performance as well as the token efficiency. Additionally, there is a lack of systematic guidelines for obtaining a better initial policy model. To bridge this gap, we initiate a comprehensive investigation by activating the initial model using a variety of datasets with different data volumes and reasoning patterns. Then, we conduct a thorough analysis and comparison of the RL process for different initial models from the perspectives of upper bounds, diversity, and token efficiency, providing a deeper understanding and insight into the long-CoT RL. Based on our empirical results, we propose a systematic guideline and a novel Re-RFT method for constructing a better RL start point. Our experiment results based on the 14B model surpass the DeepSeek-R1-Distill-Qwen-14B by an average of 4.6%, demonstrating our approach’s effectiveness and superiority.</abstract>
       <url hash="e9956ddf">2025.findings-acl.397</url>
@@ -12028,7 +12028,7 @@
     </paper>
     <paper id="398">
       <title>Topic Modeling for Short Texts via Optimal Transport-Based Clustering</title>
-      <author><first>Tu</first><last>Vu</last><affiliation>ByteDance Inc.</affiliation></author>
+      <author id="tu-vu"><first>Tu</first><last>Vu</last><affiliation>ByteDance Inc.</affiliation></author>
       <author><first>Manh</first><last>Do</last></author>
       <author><first>Tung</first><last>Nguyen</last><affiliation>Hanoi University of Science and Technology</affiliation></author>
       <author><first>Linh Ngo</first><last>Van</last><affiliation>Hanoi University of Science and Technology</affiliation></author>
@@ -12143,7 +12143,7 @@
     <paper id="406">
       <title>Understanding the Repeat Curse in Large Language Models from a Feature Perspective</title>
       <author><first>Junchi</first><last>Yao</last></author>
-      <author id="shu-yang"><first>Shu</first><last>Yang</last></author>
+      <author><first>Shu</first><last>Yang</last></author>
       <author><first>Jianhua</first><last>Xu</last></author>
       <author><first>Lijie</first><last>Hu</last></author>
       <author><first>Mengdi</first><last>Li</last><affiliation>King Abdullah University of Science and Technology</affiliation></author>
@@ -12159,7 +12159,7 @@
       <author><first>Haneul</first><last>Yoo</last><affiliation>KAIST</affiliation></author>
       <author><first>Cheonbok</first><last>Park</last><affiliation>NAVER</affiliation></author>
       <author><first>Sangdoo</first><last>Yun</last><affiliation>NAVER</affiliation></author>
-      <author><first>Alice</first><last>Oh</last><affiliation>Google and Korea Advanced Institute of Science and Technology</affiliation></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last><affiliation>Google and Korea Advanced Institute of Science and Technology</affiliation></author>
       <author><first>Hwaran</first><last>Lee</last><affiliation>Sogang University</affiliation></author>
       <pages>7816-7836</pages>
       <abstract>Large language models (LLMs) now exhibit near human-level performance in various tasks, but their performance drops drastically after a handful of high-resource languages due to the imbalance in pre-training data. Inspired by the human process of second language acquisition, particularly code-switching—the practice of language alternation in a conversation—we propose code-switching curriculum learning (CSCL) to enhance cross-lingual transfer for LLMs. CSCL mimics the stages of human language learning by progressively training models with a curriculum consisting of 1) token-level code-switching, 2) sentence-level code-switching, and 3) monolingual corpora. Using Qwen 2 as our underlying model, we demonstrate the efficacy of the CSCL in improving language transfer to Korean, achieving significant performance gains compared to monolingual continual pre-training methods. Ablation studies reveal that both token- and sentence-level code-switching significantly enhance cross-lingual transfer and that curriculum learning amplifies these effects. We also extend our findings into various languages, including Japanese (high-resource) and Indonesian (low-resource), and using two additional models (Gemma 2 and Phi 3.5). We further show that CSCL mitigates spurious correlations between language resources and safety alignment, presenting a robust, efficient framework for more equitable language transfer in LLMs. We observe that CSCL is effective for low-resource settings where high-quality, monolingual corpora for language transfer are hardly available.</abstract>
@@ -12212,7 +12212,7 @@
     <paper id="411">
       <title><fixed-case>G</fixed-case>e<fixed-case>NR</fixed-case>e: A <fixed-case>F</fixed-case>rench Gender-Neutral Rewriting System Using Collective Nouns</title>
       <author><first>Enzo</first><last>Doyen</last></author>
-      <author><first>Amalia</first><last>Todirascu</last><affiliation>Université de Strasbourg</affiliation></author>
+      <author id="amalia-todirascu"><first>Amalia</first><last>Todirascu</last><affiliation>Université de Strasbourg</affiliation></author>
       <pages>7889-7909</pages>
       <abstract>A significant portion of the textual data used in the field of Natural Language Processing (NLP) exhibits gender biases, particularly due to the use of masculine generics (masculine words that are supposed to refer to mixed groups of men and women), which can perpetuate and amplify stereotypes. Gender rewriting, an NLP task that involves automatically detecting and replacing gendered forms with neutral or opposite forms (e.g., from masculine to feminine), can be employed to mitigate these biases. While such systems have been developed in a number of languages (English, Arabic, Portuguese, German, French), automatic use of gender neutralization techniques (as opposed to inclusive or gender-switching techniques) has only been studied for English. This paper presents GeNRe, the very first French gender-neutral rewriting system using collective nouns, which are gender-fixed in French. We introduce a rule-based system (RBS) tailored for the French language alongside two fine-tuned language models trained on data generated by our RBS. We also explore the use of instruct-based models to enhance the performance of our other systems and find that Claude 3 Opus combined with our dictionary achieves results close to our RBS. Through this contribution, we hope to promote the advancement of gender bias mitigation techniques in NLP for French.</abstract>
       <url hash="d90e2023">2025.findings-acl.411</url>
@@ -12284,7 +12284,7 @@
       <title>Think More, Hallucinate Less: Mitigating Hallucinations via Dual Process of Fast and Slow Thinking</title>
       <author><first>Xiaoxue</first><last>Cheng</last></author>
       <author><first>Junyi</first><last>Li</last></author>
-      <author><first>Xin</first><last>Zhao</last><affiliation>Renmin University of China</affiliation></author>
+      <author id="wayne-xin-zhao"><first>Xin</first><last>Zhao</last><affiliation>Renmin University of China</affiliation></author>
       <author><first>Ji-Rong</first><last>Wen</last><affiliation>Renmin University of China</affiliation></author>
       <pages>7979-7990</pages>
       <abstract>Large language models (LLMs) demonstrate exceptional capabilities, yet still face the hallucination issue. Typical text generation approaches adopt an auto-regressive generation without deliberate reasoning, often leading to untrustworthy and factually inaccurate responses. In this paper, we propose HaluSearch, a novel framework that incorporates tree search-based algorithms (e.g., MCTS) to enable an explicit slow thinking generation process for mitigating hallucinations during inference. Specifically, HaluSearch frames text generation as a step-by-step reasoning process, using a self-evaluation reward model to score each generation step and guide the tree search towards the most reliable generation pathway. To balance efficiency and quality, we introduce a hierarchical system switch mechanism, which dynamically switches between fast and slow thinking modes at both instance and step levels. We conduct extensive experiments on both English and Chinese datasets, and the results show that our approach significantly outperforms baseline approaches.</abstract>
@@ -12312,7 +12312,7 @@
       <author><first>Yijin</first><last>Liu</last><affiliation>Wechat AI</affiliation></author>
       <author><first>Fandong</first><last>Meng</last><affiliation>WeChat AI, Tencent Inc.</affiliation></author>
       <author><first>Yufeng</first><last>Chen</last></author>
-      <author><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
       <author><first>Jie</first><last>Zhou</last></author>
       <pages>8005-8018</pages>
       <abstract>Knowledge Distillation (KD) has emerged as a prominent technique for model compression. However, conventional KD approaches primarily focus on homogeneous architectures with identical tokenizers, constraining their applicability in cross-architecture scenarios. As for the cross-tokenizer KD, the differences in the tokenizers give rise to two fundamental challenges: (1) sequence misalignment caused by divergent tokenization strategies, and (2) mismatched vocabulary size and composition. While existing probability-matching methods attempt to address these issues, their efficacy remains limited due to suboptimal alignment in both the sequence and vocabulary aspects. To overcome these limitations, we propose Contextual Dynamic Mapping (CDM), a novel cross-tokenizer distillation framework that employs contextual information to enhance sequence alignment precision and dynamically improves vocabulary mapping. We evaluated the effectiveness of our approach across five advanced and widely-used model families (<i>i.e,</i>LLama3, Phi3, Gemma2, OPT and Qwen2), which were configured into three distinct teacher-student pairs. Our method shows significant advantages over existing cross-tokenizer distillation baselines across diverse benchmarks, including instruction-following, code generation and math. Notably, our analysis reveals that combining conventional same-tokenizer distillation and cross-tokenizer distillation through CDM yields further performance improvements.</abstract>
@@ -12450,7 +12450,7 @@
       <author><first>Qingyao</first><last>Li</last></author>
       <author><first>Xinyi</first><last>Dai</last></author>
       <author><first>Xiangyang</first><last>Li</last></author>
-      <author><first>Weinan</first><last>Zhang</last></author>
+      <author id="weinan-zhang"><first>Weinan</first><last>Zhang</last></author>
       <author><first>Yasheng</first><last>Wang</last></author>
       <author><first>Ruiming</first><last>Tang</last></author>
       <author><first>Yong</first><last>Yu</last><affiliation>Shanghai Jiaotong University</affiliation></author>
@@ -12580,7 +12580,7 @@
       <author><first>Tommaso</first><last>Green</last></author>
       <author><first>Félix</first><last>Gaschi</last><affiliation>Posos</affiliation></author>
       <author><first>Fabian David</first><last>Schmidt</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last><affiliation>Universität Mannheim</affiliation></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last><affiliation>Universität Mannheim</affiliation></author>
       <author><first>Goran</first><last>Glavaš</last><affiliation>Julius-Maximilians-Universität Würzburg</affiliation></author>
       <pages>8342-8369</pages>
       <abstract>With Large Language Models (LLMs) becoming increasingly multilingual, effective knowledge editing (KE) needs to propagate edits across languages. Evaluation of the existing methods for cross-lingual knowledge editing (CKE) is limited both w.r.t. edit effectiveness: benchmarks do not account for entity aliases and use faulty entity translations; as well as robustness: existing work fails to report on downstream generation and task-solving abilities of LLMs after editing. In this work, we aim to (i) maximize the effectiveness of CKE while at the same time (ii) minimizing the extent of downstream model collapse due to the edits. To accurately measure the effectiveness of CKE methods, we introduce BabelEdits, a new CKE benchmark covering 60 languages that combines high-quality multilingual synsets from BabelNet with marker-based translation to ensure entity translation quality. Unlike existing CKE benchmarks, BabelEdits accounts for the rich variety of entity aliases within and across languages. We then propose BabelReFT, a modular CKE approach based on representation fine-tuning (ReFT) which learns entity-scope ReFT modules, applying them to all multilingual aliases at inference. Our experimental results show that not only is BabelReFT more effective in CKE than state-of-the-art methods, but, owing to its modular design, much more robust against downstream model collapse when subjected to many sequential edits.</abstract>
@@ -12660,7 +12660,7 @@
       <author><first>Ying</first><last>Wen</last><affiliation>Shanghai Jiao Tong University</affiliation></author>
       <author><first>Yong</first><last>Yu</last><affiliation>Shanghai Jiaotong University</affiliation></author>
       <author><first>Jun</first><last>Wang</last><affiliation>University College London</affiliation></author>
-      <author><first>Weinan</first><last>Zhang</last></author>
+      <author id="weinan-zhang"><first>Weinan</first><last>Zhang</last></author>
       <pages>8453-8468</pages>
       <abstract>While large language models (LLMs) have significantly advanced mathematical reasoning, Process Reward Models (PRMs) have been developed to evaluate the logical validity of reasoning steps. However, PRMs still struggle with out-of-distribution (OOD) challenges. This paper identifies the OOD issues including step OOD, arising from differences in reasoning patterns across model types and sizes, and question OOD, due to dataset shifts between training and real-world problems. To address these issues, we introduce Retrieval-Augmented Process Reward Model (RetrievalPRM), a novel framework designed to tackle these OOD issues. By utilizing a two-stage retrieval-enhanced mechanism, RetrievalPRM retrieves semantically similar questions and steps for PRM as a warmup to stimulate its potential to judge target steps, improving generalization and reasoning consistency across different models and problem types. Our extensive experiments demonstrate that RetrievalPRM outperforms existing baselines across multiple real-world datasets. Our open-source contributions include a retrieval-enhanced dataset, a tuning framework for PRM training, and the RetreivalPRM model, establishing a new standard for PRM performance.</abstract>
       <url hash="92052639">2025.findings-acl.444</url>
@@ -12893,7 +12893,7 @@
       <author><first>Tingyu</first><last>Qu</last><affiliation>KU Leuven</affiliation></author>
       <author><first>Mingxiao</first><last>Li</last><affiliation>KU Leuven</affiliation></author>
       <author><first>Jesse</first><last>Davis</last><affiliation>KU Leuven</affiliation></author>
-      <author><first>Marie-Francine</first><last>Moens</last><affiliation>KU Leuven, KU Leuven</affiliation></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last><affiliation>KU Leuven, KU Leuven</affiliation></author>
       <pages>8796-8810</pages>
       <abstract>Efficiently updating multilingual knowledge in large language models (LLMs) without disrupting coherent factual representations across languages remains a significant challenge. While deploying separate editing systems for each language might seem viable, this approach incurs substantial costs due to the need to manage multiple models. A more efficient solution involves integrating knowledge updates across all languages into a unified model. However, sequential edits across languages often lead to destructive parameter interference, significantly degrading multilingual generalization and the accuracy of injected knowledge. To address this issue, we propose LangEdit, a novel null-space constrained framework designed to precisely isolate language-specific knowledge updates. The core innovation of LangEdit lies in its ability to project parameter updates for each language onto the orthogonal complement of other languages’ subspaces. This approach mathematically guarantees update independence while preserving multilingual generalization capabilities. We conduct a comprehensive evaluation across three model architectures, six languages, and four downstream tasks, demonstrating that LangEdit effectively mitigates parameter interference and outperforms existing state-of-the-art editing methods. Our results highlight its potential for enabling efficient and accurate multilingual knowledge updates in LLMs.</abstract>
       <url hash="f9fab66c">2025.findings-acl.460</url>
@@ -12907,7 +12907,7 @@
       <author><first>Juntao</first><last>Li</last></author>
       <author><first>Wanfu</first><last>Wang</last></author>
       <author><first>Kehai</first><last>Chen</last><affiliation>Harbin Institute of Technology (Shenzhen)</affiliation></author>
-      <author><first>Qiaoming</first><last>Zhu</last><affiliation>Soochow University</affiliation></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last><affiliation>Soochow University</affiliation></author>
       <author><first>Min</first><last>Zhang</last><affiliation>Harbin Institute of Technology, Shenzhen</affiliation></author>
       <pages>8811-8831</pages>
       <abstract>As large language models (LLMs) are increasingly applied to complex scientific problem-solving, their effectiveness is often limited by unconscious or failed tool usage. To address this issue, we introduce the Tool-Awareness Training (TAT) method, designed to enhance scientific reasoning. This approach leverages both forward and backward data generation strategies to strengthen the model’s conscious and selective tool utilization in multi-step reasoning tasks. Our method unfolds in three stages: (1) developing tool-knowledge through backward tooluse data generation (2) enhancing tool-awareness in multi-step reasoning by utilizing forward reasoning data, and (3) improving domain adaptability through large-scale domain-specific data for multi-task learning. These three stages progressively establish the foundation for tool learning and scientific reasoning, effectively integrating both, enabling the model to tackle multi-domain scientific tasks while optimizing tool usage. Our experimental results demonstrate that TAT significantly enhances LLM performance in mathematical and scientific reasoning tasks, particularly by improving the model’s tool utilization capabilities, including proactivity and execution success rates.</abstract>
@@ -12969,7 +12969,7 @@
       <author><first>Masahiro</first><last>Kaneko</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and Tokyo Institute of Technology, Tokyo Institute of Technology</affiliation></author>
       <author><first>Youmi</first><last>Ma</last><affiliation>Institute of Science Tokyo</affiliation></author>
       <author><first>Yuki</first><last>Wata</last><affiliation>The University of Tokyo</affiliation></author>
-      <author><first>Naoaki</first><last>Okazaki</last><affiliation>Institute of Science Tokyo</affiliation></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last><affiliation>Institute of Science Tokyo</affiliation></author>
       <pages>8894-8907</pages>
       <abstract>Large Language Models (LLMs) are trained on large-scale web data, which makes it difficult to grasp the contribution of each text. This poses the risk of leaking inappropriate data such as benchmarks, personal information, and copyrighted texts in the training data. Membership Inference Attacks (MIA), which determine whether a given text is included in the model’s training data, have been attracting attention. Previous studies of MIAs revealed that likelihood-based classification is effective for detecting leaks in LLMs. However, the existing likelihood-based methods cannot be applied to some proprietary models like ChatGPT or Claude 3 because the likelihood for input text is unavailable to the user. In this study, we propose a Sampling-based Pseudo-Likelihood (SPL) method for MIA (SaMIA) that calculates SPL using only the text generated by an LLM to detect leaks. The SaMIA treats the target text as the reference text and multiple outputs from the LLM as text samples, calculates the degree of <tex-math>n</tex-math>-gram match as SPL, and determines the membership of the text in the training data. Even without likelihoods, SaMIA performed on par with existing likelihood-based methods.</abstract>
       <url hash="6cd27a39">2025.findings-acl.465</url>
@@ -13107,7 +13107,7 @@
       <author><first>Shuyue</first><last>Guo</last></author>
       <author><first>Tianyu</first><last>Zheng</last></author>
       <author><first>Jiawei</first><last>Guo</last><affiliation>01.AI</affiliation></author>
-      <author id="bo-li"><first>Bo</first><last>Li</last></author>
+      <author><first>Bo</first><last>Li</last></author>
       <author><first>Haoning</first><last>Wu</last><affiliation>Rhymes AI</affiliation></author>
       <author><first>Xingwei</first><last>Qu</last><affiliation>University of Manchester</affiliation></author>
       <author><first>Jian</first><last>Yang</last><affiliation>Alibaba Group</affiliation></author>
@@ -13129,7 +13129,7 @@
     <paper id="475">
       <title>Debate, Reflect, and Distill: Multi-Agent Feedback with Tree-Structured Preference Optimization for Efficient Language Model Enhancement</title>
       <author><first>Xiaofeng</first><last>Zhou</last><affiliation>Beijing Institute of Technology</affiliation></author>
-      <author><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <author><first>Lizi</first><last>Liao</last><affiliation>Singapore Management University</affiliation></author>
       <pages>9122-9137</pages>
       <abstract>Large Language Models (LLMs) continue to set new standards in knowledge-intensive and complex reasoning tasks, yet their high computational demands limit widespread adoption. While distilling large models into smaller ones offers a sustainable solution, current techniques—such as static knowledge distillation, resource-intensive reinforcement learning from human feedback, or limited self-reflection—struggle to yield substantial and lasting performance gains. In this paper, we present a novel Debate and Reflect (D&amp;R) framework that orchestrates multi-turn debates between smaller models and stronger teacher models, eliciting actionable feedback (e.g., error analysis, corrective strategies) to guide student models. Further, we introduce Tree-structured Direct Preference Optimization (T-DPO) to efficiently leverage these debate logs, organizing interactions into a hierarchical format for effective training. Empirical evaluations across diverse NLP benchmarks demonstrate that our approach significantly improves smaller-model accuracy, robustness, and generalization, outperforming conventional baselines by a large margin.</abstract>
@@ -13152,7 +13152,7 @@
     </paper>
     <paper id="477">
       <title>Narrative Media Framing in Political Discourse</title>
-      <author><first>Yulia</first><last>Otmakhova</last><affiliation>University of Melbourne</affiliation></author>
+      <author id="julia-otmakhova"><first>Yulia</first><last>Otmakhova</last><affiliation>University of Melbourne</affiliation></author>
       <author><first>Lea</first><last>Frermann</last><affiliation>University of Melbourne</affiliation></author>
       <pages>9167-9196</pages>
       <abstract>Narrative frames are a powerful way of conceptualizing and communicating complex, controversial ideas, however automated frame analysis to date has mostly overlooked this framing device. In this paper, we connect elements of narrativity with fundamental aspects of framing, and present a framework which formalizes and operationalizes such aspects. We annotate and release a data set of news articles in the climate change domain, analyze the dominance of narrative frame components across political leanings, and test LLMs in their ability to predict narrative frames and their components. Finally, we apply our framework in an unsupervised way to elicit components of narrative framing in a second domain, the COVID-19 crisis, where our predictions are congruent with prior theoretical work showing the generalizability of our approach.</abstract>
@@ -13454,7 +13454,7 @@
       <title><fixed-case>GIMMICK</fixed-case>: Globally Inclusive Multimodal Multitask Cultural Knowledge Benchmarking</title>
       <author><first>Florian</first><last>Schneider</last></author>
       <author><first>Carolin</first><last>Holtermann</last><affiliation>Universität Hamburg</affiliation></author>
-      <author><first>Chris</first><last>Biemann</last><affiliation>U Hamburg</affiliation></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last><affiliation>U Hamburg</affiliation></author>
       <author><first>Anne</first><last>Lauscher</last><affiliation>Universität Hamburg</affiliation></author>
       <pages>9605-9668</pages>
       <abstract>Large Vision-Language Models (LVLMs) have recently gained attention due to their distinctive performance and broad applicability. While it has been previously shown that their efficacy in usage scenarios involving non-Western contexts falls short, existing studies are limited in scope, covering just a narrow range of cultures, focusing exclusively on a small number of cultural aspects, or evaluating a limited selection of models on a single task only. Towards globally inclusive LVLM research, we introduce GIMMICK, an extensive multimodal benchmark designed to assess a broad spectrum of cultural knowledge across 144 countries representing six global macro-regions. GIMMICK comprises six tasks built upon three new datasets that span 728 unique cultural events or facets on which we evaluated 20 LVLMs and 11 LLMs, including five proprietary and 26 open-weight models of all sizes. We systematically examine (1) regional cultural biases, (2) the influence of model size, (3) input modalities, and (4) external cues. Our analyses reveal strong biases toward Western cultures across models and tasks and highlight strong correlations between model size and performance, as well as the effectiveness of multimodal input and external geographic cues. We further find that models have more knowledge of tangible than intangible aspects (e.g., food vs. rituals) and that they excel in recognizing broad cultural origins but struggle with a more nuanced understanding.</abstract>
@@ -13486,7 +13486,7 @@
       <author><first>Fuwen</first><last>Luo</last></author>
       <author><first>Yile</first><last>Wang</last><affiliation>Shenzhen University</affiliation></author>
       <author><first>Peng</first><last>Li</last><affiliation>Tsinghua University</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <pages>9686-9704</pages>
       <abstract>Large language models (LLMs) have revolutionized the field of natural language processing, enabling remarkable progress in various tasks. Different from objective tasks such as commonsense reasoning and arithmetic question-answering, the performance of LLMs on subjective tasks is still limited, where the perspective on the specific problem plays crucial roles for better interpreting the context and giving proper response. For example, in certain scenarios, LLMs may perform better when answering from an expert role perspective, potentially eliciting their relevant domain knowledge. In contrast, in some scenarios, LLMs may provide more accurate responses when answering from a third-person standpoint, enabling a more comprehensive understanding of the problem and potentially mitigating inherent biases. In this paper, we propose Reasoning through Perspective Transition (RPT), a method based on in-context learning that enables LLMs to dynamically select among direct, role, and third-person perspectives for the best way to solve corresponding subjective problem. Through extensive experiments on totally 12 subjective tasks by using both closed-source and open-source LLMs including GPT-4, GPT-3.5, Llama-3, and Qwen-2, our method outperforms widely used single fixed perspective based methods such as chain-of-thought prompting and expert prompting, highlights the intricate ways that LLMs can adapt their perspectives to provide nuanced and contextually appropriate responses for different problems.</abstract>
       <url hash="0cd297d9">2025.findings-acl.502</url>
@@ -13499,7 +13499,7 @@
       <author><first>Yuanzhe</first><last>Shen</last></author>
       <author><first>Changze</first><last>Lv</last></author>
       <author><first>Xiaoqing</first><last>Zheng</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>9705-9723</pages>
       <abstract>The continuous evolution and enhanced reasoning capabilities of large language models (LLMs) have elevated their role in complex tasks, notably in travel planning, where demand for personalized, high-quality itineraries is rising. However, current benchmarks often rely on unrealistic simulated data, failing to reflect the differences between LLM-generated and real-world itineraries. Existing evaluation metrics, which primarily emphasize constraints, fall short of providing a comprehensive assessment of the overall quality of travel plans. To address these limitations, we introduce TripTailor, a benchmark designed specifically for personalized travel planning in real-world scenarios. This dataset features an extensive collection of over 500,000 real-world points of interest (POIs) and nearly 4,000 diverse travel itineraries, complete with detailed information, providing a more authentic evaluation framework. Experiments show that fewer than 10% of the itineraries generated by the latest state-of-the-art LLMs achieve human-level performance. Moreover, we identify several critical challenges in travel planning, including the feasibility, rationality, and personalized customization of the proposed solutions. We hope that TripTailor will drive the development of travel planning agents capable of understanding and meeting user needs while generating practical itineraries.</abstract>
       <url hash="88641d11">2025.findings-acl.503</url>
@@ -13523,7 +13523,7 @@
       <author><first>Lingwei</first><last>Wei</last><affiliation>Institute of Information Engineering, Chinese Academy of Sciences</affiliation></author>
       <author><first>Dou</first><last>Hu</last></author>
       <author><first>Wei</first><last>Zhou</last><affiliation>Institute of Information Engeering</affiliation></author>
-      <author><first>Philip S.</first><last>Yu</last><affiliation>University of Illinois Chicago</affiliation></author>
+      <author id="philip-s-yu"><first>Philip S.</first><last>Yu</last><affiliation>University of Illinois Chicago</affiliation></author>
       <author><first>Songlin</first><last>Hu</last></author>
       <pages>9739-9752</pages>
       <abstract>The rapid proliferation of fake news across multiple domains poses significant threats to society. Existing multi-domain detection models typically capture domain-shared semantic features to achieve generalized detection. However, they often fail to generalize well due to poor adaptability, which limits their ability to provide complementary features for detection, especially in data-constrained conditions. To address these challenges, we investigate the propagation-adaptive multi-domain fake news detection paradigm. We propose a novel framework, Structure-adaptive Adversarial Contrastive Learning (StruACL), to adaptively enable structure knowledge transfer between multiple domains. Specifically, we first contrast representations between content-only and propagation-rich data to preserve structural patterns in the shared representation space. Additionally, we design a propagation-guided adversarial training strategy to enhance the diversity of representations. Under the StruACL objective, we leverage a unified Transformer-based and graph-based model to jointly learn transferable semantic and structural features for detection across multiple domains. Experiments on seven fake news datasets demonstrate that StruACL-TGN achieves better multi-domain detection performance on general and data-constrained scenarios, showing the effectiveness and better generalization of StruACL.</abstract>
@@ -13556,8 +13556,8 @@
       <author><first>Daniil</first><last>Orel</last></author>
       <author><first>Zain Muhammad</first><last>Mujahid</last><affiliation>Copenhagen University</affiliation></author>
       <author><first>Fajri</first><last>Koto</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
-      <author><first>Timothy</first><last>Baldwin</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and The University of Melbourne</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and The University of Melbourne</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <pages>9765-9784</pages>
       <abstract>Large language models (LLMs) are known to have the potential to generate harmful content, posing risks to users. While significant progress has been made in developing taxonomies for LLM risks and safety evaluation prompts, most studies have focused on monolingual contexts, primarily in English. However, language- and region-specific risks in bilingual contexts are often overlooked, and core findings can diverge from those in monolingual settings. In this paper, we introduce Qorǵau, a novel dataset specifically designed for safety evaluation in Kazakh and Russian, reflecting the unique bilingual context in Kazakhstan, where both Kazakh (a low-resource language) and Russian (a high-resource language) are spoken. Experiments with both multilingual and language-specific LLMs reveal notable differences in safety performance, emphasizing the need for tailored, region-specific datasets to ensure the responsible and safe deployment of LLMs in countries like Kazakhstan. Warning: this paper contains example data that may be offensive, harmful, or biased.</abstract>
       <url hash="482e70c1">2025.findings-acl.507</url>
@@ -13600,7 +13600,7 @@
       <author><first>Run</first><last>Chen</last><affiliation>Columbia University</affiliation></author>
       <author><first>Pengyuan</first><last>Shi</last><affiliation>Columbia University</affiliation></author>
       <author><first>Lin</first><last>Ai</last></author>
-      <author><first>Julia</first><last>Hirschberg</last><affiliation>Columbia University</affiliation></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last><affiliation>Columbia University</affiliation></author>
       <author><first>Natalie</first><last>Schluter</last><affiliation>Technical University of Denmark, Apple and IT University</affiliation></author>
       <pages>9820-9831</pages>
       <abstract>In this paper, we introduce the Akan Cinematic Emotions (AkaCE) dataset, the first multimodal emotion dialogue dataset for an African language, addressing the significant lack of resources for low-resource languages in emotion recognition research. AkaCE, developed for the Akan language, contains 385 emotion-labeled dialogues and 6162 utterances across audio, visual, and textual modalities, along with word-level prosodic prominence annotations. The presence of prosodic labels in this dataset also makes it the first prosodically annotated African language dataset. We demonstrate the quality and utility of AkaCE through experiments using state-of-the-art emotion recognition methods, establishing solid baselines for future research. We hope AkaCE inspires further work on inclusive, linguistically and culturally diverse NLP resources.</abstract>
@@ -13631,7 +13631,7 @@
       <author><first>Chao</first><last>Huang</last></author>
       <author><first>Zonghao</first><last>Guo</last></author>
       <author><first>Zhiyuan</first><last>Liu</last><affiliation>Tsinghua University</affiliation></author>
-      <author><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
       <author><first>Yuhua</first><last>Li</last><affiliation>Huazhong University of Science and Technology</affiliation></author>
       <author><first>Ruixuan</first><last>Li</last><affiliation>Huazhong University of Science and Technology</affiliation></author>
       <author><first>Maosong</first><last>Sun</last><affiliation>Tsinghua University</affiliation></author>
@@ -13687,7 +13687,7 @@
       <author><first>Minhyuk</first><last>Kim</last></author>
       <author><first>Dongjun</first><last>Kim</last></author>
       <author><first>Chanjun</first><last>Park</last><affiliation>Korea University</affiliation></author>
-      <author><first>Heuiseok</first><last>Lim</last></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last></author>
       <pages>9916-9926</pages>
       <abstract>Automatic Term Extraction (ATE) identifies domain-specific expressions that are crucial for downstream tasks such as machine translation and information retrieval. Although large language models (LLMs) have significantly advanced various NLP tasks, their potential for ATE has scarcely been examined. We propose a retrieval-based prompting strategy that, in the few-shot setting, selects demonstrations according to syntactic rather than semantic similarity. This syntactic retrieval method is domain-agnostic and provides more reliable guidance for capturing term boundaries. We evaluate the approach in both in-domain and cross-domain settings, analyzing how lexical overlap between the query sentence and its retrieved examples affects performance. Experiments on three specialized ATE benchmarks show that syntactic retrieval improves F1-score. These findings highlight the importance of syntactic cues when adapting LLMs to terminology-extraction tasks.</abstract>
       <url hash="956d1701">2025.findings-acl.516</url>
@@ -13777,7 +13777,7 @@
       <author><first>Zhongyu</first><last>Jiang</last></author>
       <author><first>Tianfang</first><last>Zhang</last><affiliation>Tsinghua University</affiliation></author>
       <author><first>Zongkai</first><last>Wu</last></author>
-      <author><first>John</first><last>Lee</last><affiliation>University of Edinburgh, University of Edinburgh</affiliation></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last><affiliation>University of Edinburgh, University of Edinburgh</affiliation></author>
       <author><first>Jenq-Neng</first><last>Hwang</last></author>
       <author><first>Lei</first><last>Li</last></author>
       <pages>10045-10056</pages>
@@ -13802,7 +13802,7 @@
       <title>Reasoning Circuits in Language Models: A Mechanistic Interpretation of Syllogistic Inference</title>
       <author><first>Geonhee</first><last>Kim</last></author>
       <author><first>Marco</first><last>Valentino</last><affiliation>University of Sheffield</affiliation></author>
-      <author><first>Andre</first><last>Freitas</last><affiliation>Idiap Research Institute and University of Manchester</affiliation></author>
+      <author id="andre-freitas"><first>Andre</first><last>Freitas</last><affiliation>Idiap Research Institute and University of Manchester</affiliation></author>
       <pages>10074-10095</pages>
       <abstract>Recent studies on reasoning in language models (LMs) have sparked a debate on whether they can learn systematic inferential principles or merely exploit superficial patterns in the training data. To understand and uncover the mechanisms adopted for formal reasoning in LMs, this paper presents a mechanistic interpretation of syllogistic inference. Specifically, we present a methodology for circuit discovery aimed at interpreting content-independent and formal reasoning mechanisms. Through two distinct intervention methods, we uncover a sufficient and necessary circuit involving middle-term suppression that elucidates how LMs transfer information to derive valid conclusions from premises. Furthermore, we investigate how belief biases manifest in syllogistic inference, finding evidence of partial contamination from additional attention heads responsible for encoding commonsense and contextualized knowledge. Finally, we explore the generalization of the discovered mechanisms across various syllogistic schemes, model sizes and architectures. The identified circuit is sufficient and necessary for syllogistic schemes on which the models achieve high accuracy (<tex-math>\geq</tex-math> 60%), with compatible activation patterns across models of different families. Overall, our findings suggest that LMs learn transferable content-independent reasoning mechanisms, but that, at the same time, such mechanisms do not involve generalizable and abstract logical primitives, being susceptible to contamination by the same world knowledge acquired during pre-training.</abstract>
       <url hash="c0e4dc36">2025.findings-acl.525</url>
@@ -13872,7 +13872,7 @@
       <title><fixed-case>M</fixed-case>ulti<fixed-case>H</fixed-case>oax: A Dataset of Multi-hop False-premise questions</title>
       <author><first>Mohammadamin</first><last>Shafiei</last><affiliation>University of Milan</affiliation></author>
       <author><first>Hamidreza</first><last>Saffari</last><affiliation>Polytechnic Institute of Milan</affiliation></author>
-      <author><first>Nafise Sadat</first><last>Moosavi</last><affiliation>University of Sheffield</affiliation></author>
+      <author id="nafise-sadat-moosavi"><first>Nafise Sadat</first><last>Moosavi</last><affiliation>University of Sheffield</affiliation></author>
       <pages>10169-10187</pages>
       <abstract>As Large Language Models are increasingly deployed in high-stakes domains, their ability to detect false assumptions and reason critically is crucial for ensuring reliable outputs. False-premise questions (FPQs) serve as an important evaluation method by exposing cases where flawed assumptions lead to incorrect responses. While existing benchmarks focus on single-hop FPQs, real-world reasoning often requires multi-hop inference, where models must verify consistency across multiple reasoning steps rather than relying on surface-level cues. To address this gap, we introduce MultiHoax, a benchmark for evaluating LLMs’ ability to handle false premises in complex, multi-step reasoning tasks. Our dataset spans seven countries and ten diverse knowledge categories, using Wikipedia as the primary knowledge source to enable cross-regional factual reasoning. Experiments reveal that state-of-the-art LLMs struggle to detect false premises across different countries, knowledge categories, and multi-hop reasoning types, highlighting the need for improved false premise detection and more robust multi-hop reasoning capabilities in LLMs.</abstract>
       <url hash="f3532e37">2025.findings-acl.530</url>
@@ -14068,7 +14068,7 @@
       <title>Optimized Text Embedding Models and Benchmarks for <fixed-case>A</fixed-case>mharic Passage Retrieval</title>
       <author><first>Kidist Amde</first><last>Mekonnen</last><affiliation>University of Amsterdam</affiliation></author>
       <author><first>Yosef Worku</first><last>Alemneh</last><affiliation>NeoMatrix Ltd</affiliation></author>
-      <author><first>Maarten</first><last>de Rijke</last></author>
+      <author id="maarten-de-rijke"><first>Maarten</first><last>de Rijke</last></author>
       <pages>10428-10445</pages>
       <abstract>Neural retrieval methods using transformer-based pre-trained language models have advanced multilingual and cross-lingual retrieval. However, their effectiveness for low-resource, morphologically rich languages such as Amharic remains underexplored due to data scarcity and suboptimal tokenization. We address this gap by introducing Amharic-specific dense retrieval models based on pre-trained Amharic BERT and RoBERTa backbones. Our proposed RoBERTa-Base-Amharic-Embed model (110M parameters) achieves a 17.6% relative improvement in MRR@10 and a 9.86% gain in Recall@10 over the strongest multilingual baseline, Arctic Embed 2.0 (568M parameters). More compact variants, such as RoBERTa-Medium-Amharic-Embed (42M), remain competitive while being over 13<tex-math>\times</tex-math> smaller. Additionally, we train a ColBERT-based late interaction retrieval model that achieves the highest MRR@10 score (0.843) among all evaluated models. We benchmark our proposed models against both sparse and dense retrieval baselines to systematically assess retrieval effectiveness in Amharic. Our analysis highlights key challenges in low-resource settings and underscores the importance of language-specific adaptation. To foster future research in low-resource IR, we publicly release our dataset, codebase, and trained models at https://github.com/kidist-amde/amharic-ir-benchmarks.</abstract>
       <url hash="22d50ec6">2025.findings-acl.543</url>
@@ -14164,7 +14164,7 @@
       <title><fixed-case>C</fixed-case>o<fixed-case>D</fixed-case>et-M4: Detecting Machine-Generated Code in Multi-Lingual, Multi-Generator and Multi-Domain Settings</title>
       <author><first>Daniil</first><last>Orel</last></author>
       <author><first>Dilshod</first><last>Azizov</last></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <pages>10570-10593</pages>
       <abstract>Large Language Models (LLMs) have revolutionized code generation, automating programming with remarkable efficiency. However, this has had important consequences for programming skills, ethics, and assessment integrity, thus making the detection of LLM-generated code essential for maintaining accountability and standards. While, there has been some previous research on this problem, it generally lacks domain coverage and robustness, and only covers a small number of programming languages. Here, we aim to bridge this gap. In particular, we propose a framework capable of distinguishing between human-written and LLM-generated program code across multiple programming languages, code generators, and domains. We use a large-scale dataset from renowned platforms and LLM-based code generators, alongside applying rigorous data quality checks, feature engineering, and comparative analysis of traditional machine learning models, pre-trained language models (PLMs), and LLMs for code detection. We perform an evaluation on out-of-domain scenarios, such as detecting authorship and hybrid authorship of generated code and generalizing to unseen models, domains, and programming languages. Our extensive experiments show that our framework effectively distinguishes human-written from LLM-generated program code, setting a new benchmark for the task.</abstract>
       <url hash="149ec2df">2025.findings-acl.550</url>
@@ -14245,7 +14245,7 @@
     </paper>
     <paper id="556">
       <title><fixed-case>LCFO</fixed-case>: Long Context and Long Form Output Dataset and Benchmarking</title>
-      <author><first>Marta R.</first><last>Costa-jussà</last><affiliation>Meta</affiliation></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last><affiliation>Meta</affiliation></author>
       <author><first>Pierre</first><last>Andrews</last></author>
       <author><first>Mariano Coria</first><last>Meglioli</last><affiliation>Meta</affiliation></author>
       <author><first>Joy</first><last>Chen</last><affiliation>Georgia Institute of Technology and Facebook</affiliation></author>
@@ -14322,7 +14322,7 @@
       <author><first>Sinan</first><last>Kurtyigit</last></author>
       <author><first>Diego</first><last>Frassinelli</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <author><first>Carina</first><last>Silberer</last></author>
-      <author><first>Sabine</first><last>Schulte Im Walde</last><affiliation>University of Stuttgart</affiliation></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte Im Walde</last><affiliation>University of Stuttgart</affiliation></author>
       <pages>10766-10776</pages>
       <abstract>We explore the role of the visual modality and of vision transformers in predicting the compositionality of English noun compounds. Crucially, we contribute a framework to address the challenge of obtaining adequate images that represent non-compositional compounds (such as “couch potato”), making it relevant for any image-based approach targeting figurative language. Our method uses prompting strategies and diffusion models to generate images. Comparing and combining our approach with a state-of-the-art text-based approach reveals complementary contributions regarding features as well as degrees of abstractness in compounds.</abstract>
       <url hash="eadf5912">2025.findings-acl.561</url>
@@ -14334,7 +14334,7 @@
       <author><first>Beiduo</first><last>Chen</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <author><first>Siyao</first><last>Peng</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <author><first>Anna</first><last>Korhonen</last><affiliation>University of Cambridge</affiliation></author>
-      <author><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <pages>10777-10802</pages>
       <abstract>Disagreement in human labeling is ubiquitous, and can be captured in human judgment distributions (HJDs). Recent research has shown that explanations provide valuable information for understanding human label variation (HLV) and large language models (LLMs) can approximate HJD from a few human-provided label-explanation pairs. However, collecting explanations for every label is still time-consuming. This paper examines whether LLMs can be used to replace humans in generating explanations for approximating HJD. Specifically, we use LLMs as annotators to generate model explanations for a few given human labels. We test ways to obtain and combine these label-explanations with the goal to approximate human judgment distributions. We further compare the resulting human with model-generated explanations, and test automatic and human explanation selection. Our experiments show that LLM explanations are promising for NLI: to estimate HJDs, generated explanations yield comparable results to human’s when provided with human labels. Importantly, our results generalize from datasets with human explanations to i) datasets where they are not available and ii) challenging out-of-distribution test sets.</abstract>
       <url hash="bf68f742">2025.findings-acl.562</url>
@@ -14386,7 +14386,7 @@
       <title>Multi-word Measures: Modeling Semantic Change in Compound Nouns</title>
       <author><first>Chris</first><last>Jenkins</last><affiliation>University of Stuttgart, Universität Stuttgart</affiliation></author>
       <author><first>Filip</first><last>Miletić</last><affiliation>University of Stuttgart</affiliation></author>
-      <author><first>Sabine</first><last>Schulte Im Walde</last><affiliation>University of Stuttgart</affiliation></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte Im Walde</last><affiliation>University of Stuttgart</affiliation></author>
       <pages>10850-10864</pages>
       <abstract>Compound words (e.g. shower thought) provide a multifaceted challenge for diachronic models of semantic change. Datasets describing noun compound semantics tend to describe only the predominant sense of a compound, which is limiting, especially in diachronic settings where senses may shift over time. We create a novel dataset of relatedness judgements of noun compounds in English and German, the first to capture diachronic meaning changes for multi-word expressions without prematurely condensing individual senses into an aggregate value. Furthermore, we introduce a novel, sense-targeting approach for noun compounds that evaluates two contrasting vector representations in their ability to cluster example sentence pairs. Our clustering approach targets both noun compounds and their constituent parts, to model the interdependence of these terms over time. We calculate time-delineated distributions of these clusters and compare them against measures of semantic change aggregated from the human relatedness annotations.</abstract>
       <url hash="64bad9d0">2025.findings-acl.566</url>
@@ -14426,7 +14426,7 @@
     </paper>
     <paper id="569">
       <title>2<fixed-case>M</fixed-case>-<fixed-case>BELEBELE</fixed-case>: Highly Multilingual Speech and <fixed-case>A</fixed-case>merican <fixed-case>S</fixed-case>ign <fixed-case>L</fixed-case>anguage Comprehension Dataset Download <fixed-case>PDF</fixed-case></title>
-      <author><first>Marta R.</first><last>Costa-jussà</last><affiliation>Meta</affiliation></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last><affiliation>Meta</affiliation></author>
       <author><first>Bokai</first><last>Yu</last><affiliation>Meta AI</affiliation></author>
       <author><first>Pierre</first><last>Andrews</last></author>
       <author><first>Belen</first><last>Alastruey</last></author>
@@ -14504,7 +14504,7 @@
       <author><first>Wentao</first><last>Shi</last></author>
       <author><first>Zhuo</first><last>Liu</last></author>
       <author><first>Fuli</first><last>Feng</last><affiliation>University of Science and Technology of China</affiliation></author>
-      <author><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
       <pages>11010-11031</pages>
       <abstract>Multi-Objective Alignment (MOA) aims to align LLMs’ responses with multiple human preference objectives, with Direct Preference Optimization (DPO) emerging as a prominent approach. However, we find that DPO-based MOA approaches suffer from widespread preference conflicts in the data, where different objectives favor different responses. This results in conflicting optimization directions, hindering the optimization on the Pareto Front. To address this, we propose to construct Pareto-optimal responses to resolve preference conflicts. To efficiently obtain and utilize such responses, we propose a self-improving DPO framework that enables LLMs to self-generate and select Pareto-optimal responses for self-supervised preference alignment. Extensive experiments on two datasets demonstrate the superior Pareto Front achieved by our framework compared to various baselines</abstract>
       <url hash="af7e616b">2025.findings-acl.574</url>
@@ -14542,7 +14542,7 @@
     <paper id="577">
       <title>Beyond Browsing: <fixed-case>API</fixed-case>-Based Web Agents</title>
       <author><first>Yueqi</first><last>Song</last></author>
-      <author><first>Frank F.</first><last>Xu</last></author>
+      <author id="frank-f-xu"><first>Frank F.</first><last>Xu</last></author>
       <author><first>Shuyan</first><last>Zhou</last></author>
       <author><first>Graham</first><last>Neubig</last><affiliation>Carnegie Mellon University</affiliation></author>
       <pages>11066-11085</pages>
@@ -14582,7 +14582,7 @@
       <author><first>Wei</first><last>Zhang</last></author>
       <author><first>Xinyue</first><last>Wang</last></author>
       <author><first>Xiaojun</first><last>Jia</last><affiliation>Nanyang Technological University</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>Nanyang Technological University</affiliation></author>
+      <author><first>Yang</first><last>Liu</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Sen</first><last>Su</last><affiliation>Beijing University of Posts and Telecommunications</affiliation></author>
       <pages>11128-11150</pages>
       <abstract>Large Language Models (LLMs) have demonstrated remarkable performance across diverse tasks yet still are vulnerable to external threats, particularly LLM Denial-of-Service (LLM-DoS) attacks. Specifically, LLM-DoS attacks aim to exhaust computational resources and block services. However, existing studies predominantly focus on white-box attacks, leaving black-box scenarios underexplored. In this paper, we introduce Auto-Generation for LLM-DoS (<tex-math>\textbf{AutoDoS}</tex-math>) attack, an automated algorithm designed for black-box LLMs. AutoDoS constructs the DoS Attack Tree and expands the node coverage to achieve effectiveness under black-box conditions. By transferability-driven iterative optimization, AutoDoS could work across different models in one prompt.Furthermore, we reveal that embedding the Length Trojan allows AutoDoS to bypass existing defenses more effectively.Experimental results show that AutoDoS significantly amplifies service response latency by over <tex-math>\textbf{250}\times\uparrow</tex-math>, leading to severe resource consumption in terms of GPU utilization and memory usage. Our work provides a new perspective on LLM-DoS attacks and security defenses.</abstract>
@@ -14647,7 +14647,7 @@
       <author><first>Jiho</first><last>Jin</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
       <author><first>Woosung</first><last>Kang</last><affiliation>Korea Advanced Institute of Science &amp; Technology</affiliation></author>
       <author><first>Junho</first><last>Myung</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
-      <author><first>Alice</first><last>Oh</last><affiliation>Google and Korea Advanced Institute of Science and Technology</affiliation></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last><affiliation>Google and Korea Advanced Institute of Science and Technology</affiliation></author>
       <pages>11215-11228</pages>
       <abstract>Measuring social bias in large language models (LLMs) is crucial, but existing bias evaluation methods struggle to assess bias in long-form generation. We propose a Bias Benchmark for Generation (BBG), an adaptation of the Bias Benchmark for QA (BBQ), designed to evaluate social bias in long-form generation by having LLMs generate continuations of story prompts. Building our benchmark in English and Korean, we measure the probability of neutral and biased generations across ten LLMs. We also compare our long-form story generation evaluation results with multiple-choice BBQ evaluation, showing that the two approaches produce inconsistent results.</abstract>
       <url hash="21f5c33f">2025.findings-acl.585</url>
@@ -15065,8 +15065,8 @@
       <title>Fast-and-Frugal Text-Graph Transformers are Effective Link Predictors</title>
       <author><first>Andrei Catalin</first><last>Coman</last></author>
       <author><first>Christos</first><last>Theodoropoulos</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last><affiliation>KU Leuven, KU Leuven</affiliation></author>
-      <author><first>James</first><last>Henderson</last><affiliation>Idiap Research Institute</affiliation></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last><affiliation>KU Leuven, KU Leuven</affiliation></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last><affiliation>Idiap Research Institute</affiliation></author>
       <pages>11828-11841</pages>
       <abstract>We propose Fast-and-Frugal Text-Graph (FnF-TG) Transformers, a Transformer-based framework that unifies textual and structural information for inductive link prediction in text-attributed knowledge graphs. We demonstrate that, by effectively encoding ego-graphs (1-hop neighbourhoods), we can reduce the reliance on resource-intensive textual encoders. This makes the model both fast at training and inference time, as well as frugal in terms of cost. We perform a comprehensive evaluation on three popular datasets and show that FnF-TG can achieve superior performance compared to previous state-of-the-art methods. We also extend inductive learning to a fully inductive setting, where relations don’t rely on transductive (fixed) representations, as in previous work, but are a function of their textual description. Additionally, we introduce new variants of existing datasets, specifically designed to test the performance of models on unseen relations at inference time, thus offering a new test-bench for fully inductive link prediction.</abstract>
       <url hash="739e3d7d">2025.findings-acl.615</url>
@@ -15170,7 +15170,7 @@
       <title>On the Role of Semantic Proto-roles in Semantic Analysis: What do <fixed-case>LLM</fixed-case>s know about agency?</title>
       <author><first>Elizabeth</first><last>Spaulding</last><affiliation>University of Colorado at Boulder</affiliation></author>
       <author><first>Shafiuddin Rehan</first><last>Ahmed</last></author>
-      <author><first>James</first><last>Martin</last><affiliation>University of Colorado at Boulder</affiliation></author>
+      <author id="james-h-martin"><first>James</first><last>Martin</last><affiliation>University of Colorado at Boulder</affiliation></author>
       <pages>12027-12048</pages>
       <abstract>Large language models (LLMs) are increasingly used in decision-making contexts, yet their ability to reason over event structure—an important component in the situational awareness needed to make complex decisions—is not well understood. By operationalizing proto-role theory, which characterizes agents via properties such as *instigation* and *volition* and patients via properties such as *change of state*, we examine the ability of LLMs to answer questions that require complex, multi-step event reasoning. Specifically, we investigate the extent to which LLMs capture semantic roles such as “agent” and “patient” through zero-shot prompts, and whether incorporating semantic proto-role labeling (SPRL) context improves semantic role labeling (SRL) performance in a zero-shot setting. We find that, while SPRL context sometimes degrades SRL accuracy in high-performing models (e.g., GPT-4o), it also uncovers an internal consistency between SPRL and SRL predictions that mirrors linguistic theory, and provides evidence that LLMs implicitly encode consistent multi-dimensional event role knowledge. Furthermore, our experiments support prior work showing that LLMs underperform human annotators in complex semantic analysis.</abstract>
       <url hash="b1a02e57">2025.findings-acl.623</url>
@@ -15204,7 +15204,7 @@
       <title><fixed-case>W</fixed-case>eb<fixed-case>NLG</fixed-case>-<fixed-case>IT</fixed-case>: Construction of an aligned <fixed-case>RDF</fixed-case>-<fixed-case>I</fixed-case>talian corpus through Machine Translation techniques</title>
       <author><first>Michael</first><last>Oliverio</last></author>
       <author><first>Pier Felice</first><last>Balestrucci</last></author>
-      <author><first>Alessandro</first><last>Mazzei</last><affiliation>University of Turin</affiliation></author>
+      <author id="alessandro-mazzei"><first>Alessandro</first><last>Mazzei</last><affiliation>University of Turin</affiliation></author>
       <author><first>Valerio</first><last>Basile</last><affiliation>University of Turin</affiliation></author>
       <pages>12073-12083</pages>
       <abstract>The main goal of this work is the creation of the Italian version of the WebNLG corpus through the application of Neural Machine Translation (NMT) and post-editing with hand-written rules. To achieve this goal, in a first step, several existing NMT models were analysed and compared in order to identify the system with the highest performance on the original corpus. In a second step, after using the best NMT system, we semi-automatically designed and applied a number of rules to refine and improve the quality of the produced resource, creating a new corpus named WebNLG-IT. We used this resource for fine-tuning several LLMs for RDF-to-text tasks. In this way, comparing the performance of LLM-based generators on both Italian and English, we have (1) evaluated the quality of WebNLG-IT with respect to the original English version, (2) released the first fine-tuned LLM-based system for generating Italian from semantic web triples and (3) introduced an Italian version of a modular generation pipeline for RDF-to-text.</abstract>
@@ -15304,7 +15304,7 @@
       <author><first>Yang</first><last>Zhao</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
       <author><first>Yangyifan</first><last>Xu</last><affiliation>University of the Chinese Academy of Sciences</affiliation></author>
       <author><first>Bing</first><last>Liu</last><affiliation>University of Illinois at Chicago</affiliation></author>
-      <author><first>Chengqing</first><last>Zong</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
       <pages>12217-12236</pages>
       <abstract>Large Language Models (LLMs) have achieved impressive results across numerous NLP tasks, and fine-tuning them for Machine Translation (MT) has improved their performance. However, vanilla fine-tuning often leads to catastrophic forgetting, compromising the broad general abilities of LLMs and introducing potential security risks. These abilities, which are developed using proprietary and unavailable training data, make simple data replay methods ineffective. To overcome this issue, we propose a novel approach called **Ra**tionale **Dis**tillation. RaDis harnesses the strong generative capabilities of LLMs to create rationales for training data, which are then “replayed” to prevent forgetting. These rationales connect prior knowledge with new tasks, acting as self-distillation targets to regulate the training process. By jointly training on reference translations and self-generated rationales, the model can learn new translation skills while preserving its general abilities across other tasks. Additionally, RaDis provides a fresh perspective on using rationales in the CL field and has the potential to serve as a general continual learning method for a variety of tasks.</abstract>
       <url hash="2d5d26d4">2025.findings-acl.632</url>
@@ -15313,7 +15313,7 @@
     </paper>
     <paper id="633">
       <title>Clarifying Underspecified Discourse Relations in Instructional Texts</title>
-      <author><first>Berfin</first><last>Aktas</last><affiliation>University of Technology Nuremberg</affiliation></author>
+      <author id="berfin-aktas"><first>Berfin</first><last>Aktas</last><affiliation>University of Technology Nuremberg</affiliation></author>
       <author><first>Michael</first><last>Roth</last><affiliation>University of Technology Nuremberg</affiliation></author>
       <pages>12237-12256</pages>
       <abstract>Discourse relations contribute to the structure of a text and can optionally be realized through explicit connectives such as “but” and “while”. But when are these connectives necessary to avoid possible misunderstandings? We investigate this question by first building a corpus of 4,274 text revisions in each of which a connective was explicitly inserted. For a subset of 250 cases, we collect plausibility annotations on other connectives to check whether they would represent suitable alternative relations. The results of this annotation show that several relations are often perceived as plausible in our data. Furthermore, we analyze the extent to which large language models can identify instances with multiple plausible relations as a possible source of misunderstandings. We find that the models predict plausibility of individual connectives with up to 66% accuracy, but they are not reliable in estimating when multiple relations are plausible.</abstract>
@@ -15359,7 +15359,7 @@
       <title><fixed-case>SEA</fixed-case>-<fixed-case>HELM</fixed-case>: <fixed-case>S</fixed-case>outheast <fixed-case>A</fixed-case>sian Holistic Evaluation of Language Models</title>
       <author><first>Yosephine</first><last>Susanto</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Adithya Venkatadri</first><last>Hulagadri</last><affiliation>National University of Singapore</affiliation></author>
-      <author><first>Jann Railey</first><last>Montalan</last><affiliation>AI Singapore and Ateneo de Manila University</affiliation></author>
+      <author id="jann-railey-montalan"><first>Jann Railey</first><last>Montalan</last><affiliation>AI Singapore and Ateneo de Manila University</affiliation></author>
       <author><first>Jian Gang</first><last>Ngui</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Xianbin</first><last>Yong</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Wei Qi</first><last>Leong</last><affiliation>AI Singapore</affiliation></author>
@@ -15379,7 +15379,7 @@
       <author><first>Sen</first><last>Yang</last></author>
       <author><first>Yu</first><last>Bao</last><affiliation>ByteDance Research</affiliation></author>
       <author><first>Shujian</first><last>Huang</last><affiliation>Nanjing University</affiliation></author>
-      <author><first>Jiajun</first><last>Chen</last><affiliation>Nanjing University</affiliation></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last><affiliation>Nanjing University</affiliation></author>
       <author><first>Shanbo</first><last>Cheng</last><affiliation>ByteDance Inc.</affiliation></author>
       <pages>12337-12347</pages>
       <abstract>The rise of Large Language Models (LLMs) has reshaped machine translation (MT), but multilingual MT still relies heavily on parallel data for supervised fine-tuning (SFT), facing challenges like data scarcity for low-resource languages and catastrophic forgetting. To address these issues, we propose TRANS-ZERO, a self-play framework that leverages only monolingual data and the intrinsic multilingual knowledge of LLM. TRANS-ZERO combines Genetic Monte-Carlo Tree Search (G-MCTS) with preference optimization, achieving strong translation performance that rivals supervised methods. Experiments demonstrate that this approach not only matches the performance of models trained on large-scale parallel data but also excels in non-English translation directions. Further analysis reveals that G-MCTS itself significantly enhances translation quality by exploring semantically consistent candidates through iterative translations, providing a robust foundation for the framework’s success.</abstract>
@@ -15415,9 +15415,9 @@
       <title>Socratic Style Chain-of-Thoughts Help <fixed-case>LLM</fixed-case>s to be a Better Reasoner</title>
       <author><first>Jiangbo</first><last>Pei</last><affiliation>Beijing University of Post and Telecommunication</affiliation></author>
       <author><first>Peiyu</first><last>Liu</last><affiliation>University of International Business and Economics</affiliation></author>
-      <author><first>Xin</first><last>Zhao</last><affiliation>Renmin University of China</affiliation></author>
+      <author id="wayne-xin-zhao"><first>Xin</first><last>Zhao</last><affiliation>Renmin University of China</affiliation></author>
       <author><first>Aidong</first><last>Men</last><affiliation>Beijing University of Posts and Telecommunications</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>University of California, Santa Cruz</affiliation></author>
+      <author><first>Yang</first><last>Liu</last><affiliation>University of California, Santa Cruz</affiliation></author>
       <pages>12384-12395</pages>
       <abstract>Synthetic data generation has emerged as a promising approach to enhance the reasoning capabilities of large language models. However, existing methods remain hindered by high costs—either through expensive API access or additional intermediate training—and are limited in their ability to generalize across different domains. To address these challenges, we propose a multi-agent debate framework based on the Socratic questioning strategy, abbreviated as SoDa. Distinguished from previous methods that prioritize data quantity, we highlight the wisdom of Socratic questioning in augmenting reasoning quality by deepening the thinking process to encourage exploration and broadening it to motivate self-reflection on each question. Combined with our efficient production pipeline, SoDa enables scaling while maintaining affordable costs. We use SoDa to generate diverse datasets for mathematics and code generation tasks with the Qwen2.5-7B-Instruct model, successfully fine-tuning a range of foundation models, from general-purpose ones to OpenAI o1-like ones. For mathematics, the experimental results show that SoDa outperforms the performance of existing datasets at the same scale, achieving improvements ranging from 1.3% to 13.5%. Remarkably, SoDa with 30K examples even surpasses the ScaleQuest dataset with 1000K samples, demonstrating significant efficiency. Our findings highlight the potential of SoDa as a universal, scalable, and cost-effective method for enhancing reasoning capabilities in large models across domains.</abstract>
       <url hash="f7163a0d">2025.findings-acl.640</url>
@@ -15431,7 +15431,7 @@
       <author><first>Laleh</first><last>Jalali</last><affiliation>Amazon</affiliation></author>
       <author><first>Boris N.</first><last>Oreshkin</last></author>
       <author><first>Mohsen</first><last>Bayati</last><affiliation>Stanford University, Stanford University and Stanford University</affiliation></author>
-      <author><first>Shervin</first><last>Malmasi</last><affiliation>Amazon</affiliation></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last><affiliation>Amazon</affiliation></author>
       <pages>12396-12415</pages>
       <abstract>Large Language Models (LLMs) have shown promise in structured prediction tasks, including regression, but existing approaches primarily focus on point estimates and lack systematic comparison across different methods.We investigate probabilistic regression using LLMs for unstructured inputs, addressing challenging text-to-distribution prediction tasks such as price estimation where both nuanced text understanding and uncertainty quantification are critical.We propose a novel quantile regression approach that enables LLMs to produce full predictive distributions, improving upon traditional point estimates. Through extensive experiments across three diverse price prediction datasets, we demonstrate that a Mistral-7B model fine-tuned with quantile heads significantly outperforms traditional approaches for both point and distributional estimations, as measured by three established metrics each for prediction accuracy and distributional calibration.Our systematic comparison of LLM approaches, model architectures, training approaches, and data scaling reveals that Mistral-7B consistently outperforms encoder architectures, embedding-based methods, and few-shot learning methods.Our experiments also reveal the effectiveness of LLM-assisted label correction in achieving human-level accuracy without systematic bias. Our curated datasets are made available at https://github.com/vnik18/llm-price-quantile-reg/ to support future research.</abstract>
       <url hash="f56205b3">2025.findings-acl.641</url>
@@ -15445,7 +15445,7 @@
       <author><first>Yichi</first><last>Zhang</last><affiliation>University of Michigan</affiliation></author>
       <author><first>Ziqiao</first><last>Ma</last></author>
       <author><first>Wenjie</first><last>Li</last><affiliation>The Hong Kong Polytechnic University, The Hong Kong Polytechnic University</affiliation></author>
-      <author><first>Joyce</first><last>Chai</last><affiliation>University of Michigan</affiliation></author>
+      <author id="joyce-chai"><first>Joyce</first><last>Chai</last><affiliation>University of Michigan</affiliation></author>
       <pages>12416-12436</pages>
       <abstract>Intelligent tutoring agents powered by large language models (LLMs) have been increasingly explored to deliver personalized knowledge in areas such as language learning and science education. However, their capabilities in guiding users to solve complex real-world tasks remain underexplored. To address this limitation, in this work, we focus on coding tutoring, a challenging problem that requires tutors to proactively guide students towards completing predefined coding tasks. We propose a novel agent workflow, Trace-and-Verify (TRAVER), which combines knowledge tracing to estimate a student’s knowledge state and turn-by-turn verification to ensure effective guidance toward task completion. We introduce DICT, an automatic evaluation protocol that assesses tutor agents using controlled student simulation and code generation tests. Extensive experiments reveal the challenges of coding tutoring and demonstrate that TRAVER achieves a significantly higher success rate. Although we use code tutoring as an example in this paper, our approach can be extended beyond coding, providing valuable insights into advancing tutoring agents for human task learning.</abstract>
       <url hash="b2f082b2">2025.findings-acl.642</url>
@@ -15538,7 +15538,7 @@
     <paper id="648">
       <title>A Tale of Evaluating Factual Consistency: Case Study on Long Document Summarization Evaluation</title>
       <author><first>Yang</first><last>Zhong</last><affiliation>University of Pittsburgh</affiliation></author>
-      <author><first>Diane</first><last>Litman</last><affiliation>University of Pittsburgh</affiliation></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last><affiliation>University of Pittsburgh</affiliation></author>
       <pages>12511-12532</pages>
       <abstract>Ensuring factual consistency in summarization remains a challenge, especially for long-document evaluation. While automated, reference-free evaluation models are essential given the impracticality of large-scale human assessment for lengthy texts, challenges persist in evaluating different systems on how to handle different summary granularities and evolving model generations. In this work, we conduct a systematic study on diverse factual-consistency evaluation systems across four long-document datasets, encompassing summaries generated by models from non-LLMs to proprietary LLMs. Our analysis reveals that fine-grained continuous scores can provide more reliable assessments of different evaluation systems’ capabilities than binary classification. We also examine the relationship between sentence-level and summary-level model performance, highlighting its dependency on dataset characteristics. Moreover, our study reveals that advanced systems can achieve higher recall in error detection for older summaries, yet struggle with false positives and fine-grained error detection. Our analysis and case studies provide further insights into designing robust factuality evaluation systems, which are becoming increasingly in demand as generative models advance rapidly.</abstract>
       <url hash="5b130b1b">2025.findings-acl.648</url>
@@ -15620,7 +15620,7 @@
       <author><first>Ahmad Dawar</first><last>Hakimi</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <author><first>Ali</first><last>Modarressi</last><affiliation>Center for Information and Language Processing, LMU Munich</affiliation></author>
       <author><first>Philipp</first><last>Wicke</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <pages>12633-12653</pages>
       <abstract>Understanding how large language models (LLMs) acquire and store factual knowledge is crucial for enhancing their interpretability, reliability, and efficiency. In this work, we analyze the evolution of factual knowledge representation in the OLMo-7B model by tracking the roles of its Attention Heads and Feed Forward Networks (FFNs) over training. We classify these components into four roles—general, entity, relation-answer, and fact-answer specific—and examine their stability and transitions. Our results show that LLMs initially depend on broad, general-purpose components, which later specialize as training progresses. Once the model reliably predicts answers, some components are repurposed, suggesting an adaptive learning process. Notably, answer-specific attention heads display the highest turnover, whereas FFNs remain stable, continually refining stored knowledge. These insights offer a mechanistic view of knowledge formation in LLMs and have implications for model pruning, optimization, and transparency.</abstract>
       <url hash="cd99280d">2025.findings-acl.654</url>
@@ -15760,7 +15760,7 @@
       <title>Dual Debiasing for Noisy In-Context Learning for Text Generation</title>
       <author><first>Siqi</first><last>Liang</last></author>
       <author><first>Sumyeong</first><last>Ahn</last><affiliation>KENTECH</affiliation></author>
-      <author><first>Paramveer</first><last>Dhillon</last><affiliation>University of Michigan</affiliation></author>
+      <author id="paramveer-s-dhillon"><first>Paramveer</first><last>Dhillon</last><affiliation>University of Michigan</affiliation></author>
       <author><first>Jiayu</first><last>Zhou</last><affiliation>University of Michigan - Ann Arbor and Michigan State University</affiliation></author>
       <pages>12855-12868</pages>
       <abstract>In-context learning (ICL) relies heavily on high-quality demonstrations drawn from large annotated corpora. Existing approaches detect noisy annotations by ranking local perplexities, presuming that noisy samples yield higher perplexities than their clean counterparts. However, this assumption breaks down when the noise ratio is high and many demonstrations are flawed.We re-examine the perplexity-based paradigm for text generation under noisy annotations, highlighting two sources of bias in perplexity: the annotation itself and the domain-specific knowledge inherent in large language models (LLMs). To overcome these biases, we introduce a dual-debiasing framework that uses synthesized neighbors to explicitly correct perplexity estimates, yielding a robust <i>Sample Cleanliness Score</i>. This metric uncovers absolute sample cleanliness regardless of the overall corpus noise level.Extensive experiments demonstrate our method’s superior noise-detection capabilities and show that its final ICL performance is comparable to that of a fully clean demonstration corpus. Moreover, our approach remains robust even when noise ratios are extremely high.</abstract>
@@ -15983,7 +15983,7 @@
       <author><first>Esam</first><last>Ghaleb</last></author>
       <author><first>Bulat</first><last>Khaertdinov</last><affiliation>Maastricht University</affiliation></author>
       <author><first>Asli</first><last>Ozyurek</last><affiliation>mpi for psycholinguistics</affiliation></author>
-      <author><first>Raquel</first><last>Fernández</last><affiliation>University of Amsterdam and University of Amsterdam</affiliation></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last><affiliation>University of Amsterdam and University of Amsterdam</affiliation></author>
       <pages>13191-13206</pages>
       <abstract>In face-to-face interaction, we use multiple modalities, including speech and gestures, to communicate information and resolve references to objects. However, how representational co-speech gestures refer to objects remains understudied from a computational perspective. In this work, we address this gap by introducing a multimodal reference resolution task centred on representational gestures, while simultaneously tackling the challenge of learning robust gesture embeddings. We propose a self-supervised pre-training approach to gesture representation learning that grounds body movements in spoken language. Our experiments show that the learned embeddings align with expert annotations and have significant predictive power. Moreover, reference resolution accuracy further improves when (1) using multimodal gesture representations, even when speech is unavailable at inference time, and (2) leveraging dialogue history. Overall, our findings highlight the complementary roles of gesture and speech in reference resolution, offering a step towards more naturalistic models of human-machine interaction.</abstract>
       <url hash="2993f50f">2025.findings-acl.682</url>
@@ -15993,7 +15993,7 @@
     <paper id="683">
       <title>World Knowledge Resolves Some Aspectual Ambiguity</title>
       <author><first>Katarzyna</first><last>Pruś</last></author>
-      <author><first>Mark</first><last>Steedman</last><affiliation>University of Edinburgh</affiliation></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last><affiliation>University of Edinburgh</affiliation></author>
       <author><first>Adam</first><last>Lopez</last><affiliation>University of Edinburgh</affiliation></author>
       <pages>13207-13220</pages>
       <abstract>Annotating event descriptions with their aspectual features is often seen as a pre-requisite to temporal reasoning. However, a recent study by Pruś et al. (2024) has shown that non-experts’ annotations of the aspectual class of English verb phrases can disagree with both expert linguistic annotations and each another. They hypothesised that people use their world knowledge to tacitly conjure their own contexts, leading to disagreement between them. In this paper, we test that hypothesis by adding context to Pruś et al.’s examples and mirroring their experiment. Our results show that whilst their hypothesis explains some of the disagreement, some examples continue to yield divided responses even with the additional context. Finally, we show that outputs from GPT-4, despite to some degree capturing the aspectual class division, are not an accurate predictor of human answers.</abstract>
@@ -16104,12 +16104,12 @@
     </paper>
     <paper id="692">
       <title><fixed-case>C</fixed-case>ode<fixed-case>S</fixed-case>cientist: End-to-End Semi-Automated Scientific Discovery with Code-based Experimentation</title>
-      <author><first>Peter</first><last>Jansen</last><affiliation>University of Arizona and Allen Institute for Artificial Intelligence</affiliation></author>
+      <author id="peter-jansen"><first>Peter</first><last>Jansen</last><affiliation>University of Arizona and Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Oyvind</first><last>Tafjord</last><affiliation>Google DeepMind</affiliation></author>
       <author><first>Marissa</first><last>Radensky</last></author>
       <author><first>Pao</first><last>Siangliulue</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Tom</first><last>Hope</last><affiliation>Hebrew University, Hebrew University of Jerusalem and Allen Institute for Artificial Intelligence</affiliation></author>
-      <author><first>Bhavana</first><last>Dalvi Mishra</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
+      <author id="bhavana-dalvi"><first>Bhavana</first><last>Dalvi Mishra</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Bodhisattwa Prasad</first><last>Majumder</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Daniel S</first><last>Weld</last><affiliation>Department of Computer Science, University of Washington</affiliation></author>
       <author><first>Peter</first><last>Clark</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
@@ -16146,7 +16146,7 @@
     <paper id="695">
       <title>When Detection Fails: The Power of Fine-Tuned Models to Generate Human-Like Social Media Text</title>
       <author><first>Hillary</first><last>Dawkins</last><affiliation>National Research Council Canada</affiliation></author>
-      <author><first>Kathleen C.</first><last>Fraser</last><affiliation>National Research Council Canada</affiliation></author>
+      <author id="kathleen-c-fraser"><first>Kathleen C.</first><last>Fraser</last><affiliation>National Research Council Canada</affiliation></author>
       <author><first>Svetlana</first><last>Kiritchenko</last><affiliation>National Research Council Canada</affiliation></author>
       <pages>13494-13527</pages>
       <abstract>Detecting AI-generated text is a difficult problem to begin with; detecting AI-generated text on social media is made even more difficult due to the short text length and informal, idiosyncratic language of the internet. It is nonetheless important to tackle this problem, as social media represents a significant attack vector in online influence campaigns, which may be bolstered through the use of mass-produced AI-generated posts supporting (or opposing) particular policies, decisions, or events. We approach this problem with the mindset and resources of a reasonably sophisticated threat actor, and create a dataset of 505,159 AI-generated social media posts from a combination of open-source, closed-source, and fine-tuned LLMs, covering 11 different controversial topics. We show that while the posts can be detected under typical research assumptions about knowledge of and access to the generating models, under the more realistic assumption that an attacker will not release their fine-tuned model to the public, detectability drops dramatically. This result is confirmed with a human study. Ablation experiments highlight the vulnerability of various detection algorithms to fine-tuned LLMs. This result has implications across all detection domains, since fine-tuning is a generally applicable and realistic LLM use case.</abstract>
@@ -16182,7 +16182,7 @@
       <author><first>Mian</first><last>Zhang</last></author>
       <author><first>Shuo</first><last>Yan</last><affiliation>University of Texas at Dallas</affiliation></author>
       <author><first>Peilin</first><last>Wu</last></author>
-      <author id="zhiyu-chen"><first>Zhiyu</first><last>Chen</last></author>
+      <author><first>Zhiyu</first><last>Chen</last></author>
       <pages>13563-13597</pages>
       <abstract>While large language models (LLMs) have been thoroughly evaluated for deductive and inductive reasoning, their proficiency in holistic rule learning in interactive environments remains less explored. We introduce RULEARN, a novel benchmark to assess the rule-learning abilities of LLM agents in interactive settings. In RULEARN, agents strategically interact with simulated environments to gather observations, discern patterns, and solve complex problems. To enhance the rule-learning capabilities for LLM agents, we propose IDEA, a novel reasoning framework that integrates the process of **I**nduction, **De**duction, and **A**bduction. The IDEA agent generates initial hypotheses from limited observations through abduction, devises plans to validate these hypotheses or leverages them to solve problems via deduction, and refines previous hypotheses through induction, dynamically establishing and applying rules that mimic human rule-learning behaviors. Our evaluation of the IDEA framework, which involves five representative LLMs, demonstrates significant improvements over the baseline. Furthermore, our study with human participants reveals notable discrepancies in rule-learning behaviors between humans and LLMs. We believe our benchmark will serve as a valuable and challenging resource, and IDEA will provide crucial insights for the development of LLM agents capable of human-like rule learning in real-world scenarios. Our code and data have been released at: https://github.com/KaiyuHe998/RULEARN_IDEA.</abstract>
       <url hash="62c0760e">2025.findings-acl.698</url>
@@ -16248,7 +16248,7 @@
       <title><fixed-case>MMI</fixed-case>n<fixed-case>A</fixed-case>: Benchmarking Multihop Multimodal <fixed-case>I</fixed-case>nternet Agents</title>
       <author><first>Shulin</first><last>Tian</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Ziniu</first><last>Zhang</last><affiliation>Northeastern University</affiliation></author>
-      <author><first>Liangyu</first><last>Chen</last><affiliation>Computer Science Department, Stanford University</affiliation></author>
+      <author id="liang-yu-chen"><first>Liangyu</first><last>Chen</last><affiliation>Computer Science Department, Stanford University</affiliation></author>
       <author><first>Ziwei</first><last>Liu</last><affiliation>Nanyang Technological University</affiliation></author>
       <pages>13682-13697</pages>
       <abstract>Autonomous embodied agents live on an Internet of multimedia websites. Can they hop around multimodal websites to complete complex user tasks? Existing benchmarks fail to assess them in a realistic, evolving environment for their embodiment across websites. To answer this question, we present MMInA, a multihop and multimodal benchmark to evaluate the embodied agents for compositional Internet tasks, with several appealing properties: ***1) Evolving real-world multimodal websites.*** Our benchmark uniquely operates on evolving real-world websites, ensuring a high degree of realism and applicability to natural user tasks. Our data includes 1,050 human-written tasks covering various domains such as shopping and travel, with each task requiring the agent to extract multimodal information from web pages as observations autonomously. ***2) Multihop web browsing.*** Our dataset features naturally compositional tasks that require information from or actions on multiple websites to solve, to assess long-range reasoning capabilities on web tasks. ***3) Holistic evaluation.*** We propose a novel protocol for evaluating an agent’s progress in completing multihop tasks. We experiment with both standalone (multimodal) language models and heuristic-based web agents. Extensive experiments demonstrate that while long-chain multihop web tasks are easy for humans, they remain challenging for state-of-the-art web agents. We identify that agents are more likely to fail on the early hops when solving tasks of more hops, which results in lower task success rates. To address this issue, we propose a simple memory augmentation approach replaying past action trajectories to reflect. Our method significantly improves the performance of both the single-hop and multihop web browsing abilities.</abstract>
@@ -16274,7 +16274,7 @@
       <author><first>Tianyi</first><last>Li</last><affiliation>Amazon</affiliation></author>
       <author><first>Zhaowei</first><last>Wang</last><affiliation>Edinburgh University, University of Edinburgh and Department of Computer Science and Engineering, Hong Kong University of Science and Technology</affiliation></author>
       <author><first>Tianyang</first><last>Liu</last><affiliation>Edinburgh University, University of Edinburgh</affiliation></author>
-      <author><first>Mark</first><last>Steedman</last><affiliation>University of Edinburgh</affiliation></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last><affiliation>University of Edinburgh</affiliation></author>
       <pages>13714-13730</pages>
       <abstract>LLMs are often claimed to be capable of Natural Language Inference (NLI), which is widely regarded as a cornerstone of more complex forms of reasoning. However, recent works show that LLMs still suffer from hallucinations in NLI due to <i>attestation bias</i>, where LLMs overly rely on propositional memory to build shortcuts. To solve the issue, we design an unsupervised framework to construct counterfactual reasoning data and fine-tune LLMs to reduce attestation bias. To measure bias reduction, we build <i>bias-adversarial</i> variants of NLI datasets with randomly replaced predicates in premises while keeping hypotheses unchanged. Extensive evaluations show that our framework can significantly reduce hallucinations from attestation bias. Then, we further evaluate LLMs fine-tuned with our framework on original NLI datasets and their bias-neutralized versions, where original entities are replaced with randomly sampled ones. Extensive results show that our framework consistently improves inferential performance on both original and bias-neutralized NLI datasets.</abstract>
       <url hash="6ef37906">2025.findings-acl.705</url>
@@ -16284,7 +16284,7 @@
     <paper id="706">
       <title>Dynamic Steering With Episodic Memory For Large Language Models</title>
       <author><first>Van Dai</first><last>Do</last></author>
-      <author><first>Quan Hung</first><last>Tran</last><affiliation>Facebook</affiliation></author>
+      <author id="quan-hung-tran"><first>Quan Hung</first><last>Tran</last><affiliation>Facebook</affiliation></author>
       <author><first>Svetha</first><last>Venkatesh</last><affiliation>Deakin University</affiliation></author>
       <author><first>Hung</first><last>Le</last><affiliation>Deakin University</affiliation></author>
       <pages>13731-13749</pages>
@@ -16301,7 +16301,7 @@
       <author><first>Laura</first><last>Biester</last><affiliation>Middlebury College</affiliation></author>
       <author><first>Andrew</first><last>Lee</last><affiliation>School of Engineering and Applied Sciences, Harvard University</affiliation></author>
       <author><first>James</first><last>Pennebaker</last></author>
-      <author><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
       <pages>13750-13770</pages>
       <abstract>Large Language Models (LLMs) have been previously explored for mental healthcare training and therapy client simulation, but they still fall short in authentically capturing diverse client traits and psychological conditions. We introduce <b>Eeyore</b> , an 8B model optimized for realistic depression simulation through a structured alignment framework, incorporating expert input at every stage.First, we systematically curate real-world depression-related conversations, extracting depressive traits to guide data filtering and psychological profile construction, and use this dataset to instruction-tune Eeyore for profile adherence. Next, to further enhance realism, Eeyore undergoes iterative preference optimization—first leveraging model-generated preferences and then calibrating with a small set of expert-annotated preferences.Throughout the entire pipeline, we actively collaborate with domain experts, developing interactive interfaces to validate trait extraction and iteratively refine structured psychological profiles for clinically meaningful role-play customization.Despite its smaller model size, the Eeyore depression simulation outperforms GPT-4o with SOTA prompting strategies, both in linguistic authenticity and profile adherence.</abstract>
       <url hash="17e212a3">2025.findings-acl.707</url>
@@ -16338,7 +16338,7 @@
       <author><first>Tuochao</first><last>Chen</last></author>
       <author><first>Nicholas Scott</first><last>Batchelder</last><affiliation>University of Washington</affiliation></author>
       <author><first>Alisa</first><last>Liu</last><affiliation>NVIDIA and University of Washington</affiliation></author>
-      <author><first>Noah A.</first><last>Smith</last><affiliation>University of Washington and Allen Institute for Artificial Intelligence</affiliation></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last><affiliation>University of Washington and Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Shyamnath</first><last>Gollakota</last><affiliation>Department of Computer Science, University of Washington</affiliation></author>
       <pages>13801-13824</pages>
       <abstract>We introduce LlamaPIE, the first real-time proactive assistant designed to enhance human conversations through discreet, concise guidance delivered via hearable devices. Unlike traditional language models that require explicit user invocation, this assistant operates in the background, anticipating user needs without interrupting conversations. We address several challenges, including determining when to respond, crafting concise responses that enhance conversations, leveraging knowledge of the user for context-aware assistance, and real-time, on-device processing. To achieve this, we construct a semi-synthetic dialogue dataset and propose a two-model pipeline: a small model that decides when to respond and a larger model that generates the response. We evaluate our approach on real-world datasets, demonstrating its effectiveness in providing helpful, unobtrusive assistance. User studies with our assistant, implemented on Apple Silicon M2 hardware, show a strong preference for the proactive assistant over both a baseline with no assistance and a reactive AI assistant, highlighting the potential of LlamaPIE to enhance live conversations.</abstract>
@@ -16400,7 +16400,7 @@
       <title><fixed-case>H</fixed-case>i<fixed-case>COT</fixed-case>: Improving Neural Topic Models via Optimal Transport and Contrastive Learning</title>
       <author><first>Hoang Tran</first><last>Vuong</last></author>
       <author><first>Tue</first><last>Le</last></author>
-      <author><first>Tu</first><last>Vu</last><affiliation>ByteDance Inc.</affiliation></author>
+      <author id="tu-vu"><first>Tu</first><last>Vu</last><affiliation>ByteDance Inc.</affiliation></author>
       <author><first>Tung</first><last>Nguyen</last><affiliation>Hanoi University of Science and Technology</affiliation></author>
       <author><first>Linh Ngo</first><last>Van</last><affiliation>Hanoi University of Science and Technology</affiliation></author>
       <author><first>Sang</first><last>Dinh</last><affiliation>Hanoi University of Science and Technology</affiliation></author>
@@ -16439,8 +16439,8 @@
       <author><first>Ruqing</first><last>Zhang</last></author>
       <author><first>Jiafeng</first><last>Guo</last><affiliation>Institute of Computing Technolgy, Chinese Academy of Sciences</affiliation></author>
       <author><first>Jianming</first><last>Lv</last><affiliation>South China University of Technology</affiliation></author>
-      <author><first>Maarten</first><last>de Rijke</last></author>
-      <author><first>Xueqi</first><last>Cheng</last><affiliation>Institute of Computing Technology, Chinese Academy</affiliation></author>
+      <author id="maarten-de-rijke"><first>Maarten</first><last>de Rijke</last></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last><affiliation>Institute of Computing Technology, Chinese Academy</affiliation></author>
       <pages>13935-13952</pages>
       <abstract>We explore adversarial attacks against retrieval-augmented generation (RAG) systems to identify their vulnerabilities. We focus on generating human-imperceptible adversarial examples and introduce a novel imperceptible retrieve-to-generate attack against RAG. This task aims to find imperceptible perturbations that retrieve a target document, originally excluded from the initial top-k candidate set, in order to influence the final answer generation. To address this task, we propose ReGENT, a reinforcement learning-based framework that tracks interactions between the attacker and the target RAG and continuously refines attack strategies based on relevance-generation-naturalness rewards. Experiments on newly constructed factual and non-factual question-answering benchmarks demonstrate that ReGENT significantly outperforms existing attack methods in misleading RAG systems with small imperceptible text perturbations.</abstract>
       <url hash="69ed1906">2025.findings-acl.717</url>
@@ -16466,7 +16466,7 @@
       <author><first>Zixiong</first><last>Wang</last></author>
       <author><first>Gaoyang</first><last>Liu</last><affiliation>Huazhong University of Science and Technology</affiliation></author>
       <author><first>Chen</first><last>Wang</last><affiliation>Huazhong University of Science and Technology</affiliation></author>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last><affiliation>Huazhong University of Science and Technology</affiliation></author>
+      <author><first>Wei</first><last>Liu</last><affiliation>Huazhong University of Science and Technology</affiliation></author>
       <author><first>Kai</first><last>Peng</last><affiliation>Huazhong University of Science and Technology</affiliation></author>
       <pages>13978-13999</pages>
       <abstract>Machine Unlearning (MU) has emerged as a promising solution for removing the influence of data that an owner wishes to unlearn from Large Language Models (LLMs). However, existing MU methods, which require tuning the entire model parameters on the unlearned data with random labels or perturbed gradients, significantly degrade model utility, especially given the difficulty of accessing the original training data. This presents a key challenge: how can we achieve MU using only the unlearned data while preserving model utility?In this paper, we propose NeuMuter, a simple but effective MU method that eliminates the influence of unlearned data from LLMs by modulating the outputs of merely 1% of the neurons in the feed-forward network (FFN) modules within the Transformer blocks, minimizing disruption to the model’s performance. We design a trainable masking scheme that decouples the memorization of different training data within the neurons of LLMs, allowing us to precisely identify and modify neurons associated with the unlearned data. Through comprehensive evaluations on two benchmarks across four different LLMs, we demonstrate that modifying the outputs of a few fraction of the total neurons can effectively achieve MU while preserving the model’s utility across downstream tasks.</abstract>
@@ -16539,7 +16539,7 @@
       <author><first>Chenyang</first><last>Yan</last></author>
       <author><first>Nuo</first><last>Chen</last></author>
       <author><first>Jianbing</first><last>Zhang</last><affiliation>Nanjing University</affiliation></author>
-      <author><first>Jiajun</first><last>Chen</last><affiliation>Nanjing University</affiliation></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last><affiliation>Nanjing University</affiliation></author>
       <pages>14077-14094</pages>
       <abstract>Image captioning has been a longstanding challenge in vision-language research. With the rise of LLMs, modern Vision-Language Models (VLMs) generate detailed and comprehensive image descriptions. However, benchmarking the quality of such captions remains unresolved. This paper addresses two key questions: (1) How well do VLMs actually perform on image captioning, particularly compared to humans? We built CapArena, a platform with over 6000 pairwise caption battles and high-quality human preference votes. Our Arena-style evaluation marks a milestone, showing that leading models like GPT-4o achieve or even surpass human performance, while most open-source models lag behind. (2) Can automated metrics reliably assess caption quality? Using human annotations from CapArena, we evaluate traditional and recent captioning metrics, as well as VLM-as-a-Judge. Our analysis reveals that while some metrics (e.g., METEOR) show high caption-level agreement with humans, their systematic biases lead to inconsistencies in model ranking. In contrast, VLM-as-a-Judge demonstrates robust discernment at both the caption and model levels. Building on these insights, we release CapArena-Auto, an accurate and efficient automated benchmark for detailed captioning, achieving 93.4% correlation with human rankings at just $4 per test. All data and evaluation resources have been open-sourced.</abstract>
       <url hash="9508bd14">2025.findings-acl.724</url>
@@ -16733,7 +16733,7 @@
       <author><first>Quang Hieu</first><last>Pham</last><affiliation>Qualcomm Inc, QualComm</affiliation></author>
       <author><first>Thuy Duong</first><last>Nguyen</last><affiliation>VinAI Research</affiliation></author>
       <author><first>Tung</first><last>Pham</last><affiliation>Qualcomm Inc, QualComm</affiliation></author>
-      <author><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Dat Quoc</first><last>Nguyen</last><affiliation>Qualcomm AI Research</affiliation></author>
       <pages>14322-14329</pages>
       <abstract>The capabilities of large language models (LLMs) have been enhanced by training on data that reflects human thought processes, such as the Chain-of-Thought format. However, evidence suggests that the conventional scheme of next-word prediction may not fully capture how humans learn to think. Inspired by how humans generalize mathematical reasoning, we propose a new approach named ClozeMath to fine-tune LLMs for mathematical reasoning. Our ClozeMath involves a text-infilling task that predicts masked equations from a given solution, analogous to cloze exercises used in human learning. Experiments on GSM8K, MATH, and GSM-Symbolic show that ClozeMath surpasses the strong baseline Masked Thought in performance and robustness, with two test-time scaling decoding algorithms, Beam Search and Chain-of-Thought decoding. Additionally, we conduct an ablation study to analyze the effects of various architectural and implementation choices on our approach.</abstract>
@@ -16748,7 +16748,7 @@
       <author><first>Fei</first><last>Sun</last><affiliation>Institute of Computing Technology, Chinese Academy of Sciences</affiliation></author>
       <author><first>Qi</first><last>Cao</last><affiliation>Institute of Computing Technology, Chinese Academy of Sciences, China</affiliation></author>
       <author><first>Huawei</first><last>Shen</last><affiliation>Institute of Computing Technology, Chinese Academy of Sciences</affiliation></author>
-      <author><first>Xueqi</first><last>Cheng</last><affiliation>Institute of Computing Technology, Chinese Academy</affiliation></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last><affiliation>Institute of Computing Technology, Chinese Academy</affiliation></author>
       <pages>14330-14344</pages>
       <abstract>Text watermarking, which modify tokens to embed watermark, has proven effective in detecting machine-generated texts. Yet its application to low-entropy texts like code and mathematics presents significant challenges. A fair number of tokens in these texts are hardly modifiable without changing the intended meaning, causing statistical measures to falsely indicate the absence of a watermark. Existing research addresses this issue by rely mainly on a limited number of high-entropy tokens, which are considered flexible for modification, and accurately reflecting watermarks. However, their detection accuracy remains suboptimal, as they neglect strong watermark evidences embedded in low entropy tokens modified through watermarking. To overcome this limitation, we introduce Bayes’ Rule derived Watermark Detector (BRWD), which exploit watermark information from every token, by leveraging the posterior probability of watermark’s presence. We theoretically prove the optimality of our method in terms of detection accuracy, and demonstrate its superiority across various datasets, models, and watermark injection strategies. Notably, our method achieves up to 50% and 70% relative improvements in detection accuracy over the best baselines in code generation and math problem-solving tasks, respectively. Our code is available at https://github.com/cczslp/BRWD.</abstract>
       <url hash="da6e7fc3">2025.findings-acl.739</url>
@@ -16862,7 +16862,7 @@
       <author><first>Tao</first><last>Gui</last><affiliation>Fudan University</affiliation></author>
       <author><first>Yun</first><last>Li</last><affiliation>Shanghai Jiaotong University</affiliation></author>
       <author><first>Qi</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>14471-14485</pages>
       <abstract>Process supervision, i.e., evaluating each step, is critical for complex large language model (LLM) reasoning and test-time searching with increased inference compute. Existing approaches, represented by process reward models (PRMs), primarily focus on rewarding signals up to the current step, exhibiting a one-directional nature and lacking a mechanism to model the distance to the final target. To address this problem, we draw inspiration from the A* algorithm, which states that an effective supervisory signal should simultaneously consider the incurred cost and the estimated cost for reaching the target. Building on this key insight, we introduce BiRM, a novel process supervision model that not only evaluates the correctness of previous steps but also models the probability of future success. We conduct extensive experiments on mathematical reasoning tasks and demonstrate that BiRM provides more precise evaluations of LLM reasoning steps, achieving an improvement of 3.1% on Gaokao2023 over PRM under the Best-of-N sampling method. Besides, in search-based strategies, BiRM provides more comprehensive guidance and outperforms ORM by 5.0% and PRM by 3.8% respectively on MATH-500.</abstract>
       <url hash="b232b134">2025.findings-acl.747</url>
@@ -16880,7 +16880,7 @@
       <author><first>Yutao</first><last>Zeng</last><affiliation>ByteDance Inc.</affiliation></author>
       <author><first>Xiaolong</first><last>Jin</last><affiliation>Institute of Computing Technology, Chinese Academy of Sciences</affiliation></author>
       <author><first>Jiafeng</first><last>Guo</last><affiliation>Institute of Computing Technolgy, Chinese Academy of Sciences</affiliation></author>
-      <author><first>Xueqi</first><last>Cheng</last><affiliation>Institute of Computing Technology, Chinese Academy</affiliation></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last><affiliation>Institute of Computing Technology, Chinese Academy</affiliation></author>
       <pages>14486-14509</pages>
       <abstract>Empirical evidence indicates that LLMs exhibit spontaneous cross-lingual alignment. However, although LLMs show promising cross-lingual alignment in Information Extraction (IE), a significant imbalance across languages persists, highlighting an underlying deficiency. To address this, we propose KnowCoder-X, a powerful code LLM with advanced cross-lingual and multilingual capabilities for universal IE. Firstly, it standardizes the representation of multilingual schemas using Python classes, ensuring a consistent ontology across different languages. Then, IE across languages is formulated as a unified code generation task. Secondly, we conduct IE cross-lingual alignment instruction tuning on the translated instance prediction task to enhance the model’s cross-lingual transferability. During this phase, we also construct a high-quality and diverse bilingual IE parallel dataset with 257k samples, called ParallelNER, synthesized by our proposed robust three-stage pipeline, with manual annotation to ensure quality. Although without training in 29 unseen languages, KnowCoder-X surpasses ChatGPT by 30.17% and SoTA by 20.03%, thereby demonstrating superior cross-lingual IE capabilities. Comprehensive evaluations on 64 IE benchmarks in Chinese and English under various settings demonstrate that KnowCoder-X significantly enhances cross-lingual IE transfer through boosting the IE alignment. Our code and dataset are available at: https://github.com/ICT-GoKnow/KnowCoder.</abstract>
       <url hash="e8c2213a">2025.findings-acl.748</url>
@@ -16897,7 +16897,7 @@
       <author><first>Jing</first><last>Xiong</last><affiliation>University of Hong Kong</affiliation></author>
       <author><first>Rossella</first><last>Arcucci</last><affiliation>Imperial College London</affiliation></author>
       <author><first>Huaxiu</first><last>Yao</last><affiliation>Department of Computer Science, University of North Carolina at Chapel Hill</affiliation></author>
-      <author id="mi-zhang"><first>Mi</first><last>Zhang</last><affiliation>The Ohio State University</affiliation></author>
+      <author><first>Mi</first><last>Zhang</last><affiliation>The Ohio State University</affiliation></author>
       <pages>14510-14527</pages>
       <abstract>Electrocardiogram (ECG) is the primary non-invasive diagnostic tool for monitoring cardiac conditions and is crucial in assisting clinicians. Recent studies have concentrated on classifying cardiac conditions using ECG data but have overlooked ECG report generation, which is time-consuming and requires clinical expertise. To automate ECG report generation and ensure its versatility, we propose the Multimodal ECG Instruction Tuning (MEIT) framework, the first attempt to tackle ECG report generation with LLMs and multimodal instructions. To facilitate future research, we establish a benchmark to evaluate MEIT with various LLMs backbones across two large-scale ECG datasets. Our approach uniquely aligns the representations of the ECG signal and the report, and we conduct extensive experiments to benchmark MEIT with nine open-source LLMs using more than 800,000 ECG reports. MEIT’s results underscore the superior performance of instruction-tuned LLMs, showcasing their proficiency in quality report generation, zero-shot capabilities, resilience to signal perturbation, and alignment with human expert evaluation. These findings emphasize the efficacy of our MEIT framework and its potential for real-world clinical application.</abstract>
       <url hash="b39da6eb">2025.findings-acl.749</url>
@@ -16957,7 +16957,7 @@
       <author><first>Chang</first><last>Zhou</last></author>
       <author><first>Wen</first><last>Xiao</last><affiliation>Microsoft</affiliation></author>
       <author><first>Tianyu</first><last>Liu</last></author>
-      <author><first>Baobao</first><last>Chang</last><affiliation>Peking University</affiliation></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last><affiliation>Peking University</affiliation></author>
       <pages>14588-14604</pages>
       <abstract>In recent progress, mathematical verifiers have achieved success in mathematical reasoning tasks by validating the correctness of solutions generated by policy models. However, existing verifiers are trained with binary classification labels, which are not informative enough for the model to accurately assess the solutions. To mitigate the aforementioned insufficiency of binary labels, we introduce step-wise natural language feedback as rationale labels, that is, the correctness of each step and the detailed explanations. In this paper, we propose Math-Minos, a natural language feedback-enhanced verifier by constructing automatically generated training data and a two-stage training paradigm for effective training and efficient inference. Our experiments reveal that a small set of natural language feedback can significantly boost the performance of the verifier in both verification and reinforcement learning and also significantly alleviates the data-demanding problems of the reward model with an over 700% data efficiency improvement.</abstract>
       <url hash="34f62bab">2025.findings-acl.753</url>
@@ -17194,7 +17194,7 @@
       <author><first>Yuang</first><last>Li</last><affiliation>Huawei Technologies Ltd.</affiliation></author>
       <author><first>Xiaofeng</first><last>Zhao</last><affiliation>Huawei Technologies Ltd.</affiliation></author>
       <author><first>Ming</first><last>Zhu</last></author>
-      <author><first>Junhui</first><last>Li</last><affiliation>Soochow University, China</affiliation></author>
+      <author id="junhui-li"><first>Junhui</first><last>Li</last><affiliation>Soochow University, China</affiliation></author>
       <author><first>Yunfei</first><last>Lu</last><affiliation>Huawei Technologies Ltd.</affiliation></author>
       <author><first>Min</first><last>Zhang</last><affiliation>Huawei Technologies Ltd.</affiliation></author>
       <author><first>Daimeng</first><last>Wei</last></author>
@@ -17280,7 +17280,7 @@
       <author><first>Changze</first><last>Lv</last></author>
       <author><first>Xiaohua</first><last>Wang</last></author>
       <author><first>Xiaoqing</first><last>Zheng</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>15014-15032</pages>
       <abstract>Continual pre-training has demonstrated significant potential in enhancing model performance, particularly in domain-specific scenarios. The most common approach for packing data before continual pre-training involves concatenating input texts and splitting them into fixed-length sequences. While straightforward and efficient, this method often leads to excessive truncation and context discontinuity, which can hinder model performance. To address these issues, we explore the potential of data engineering to enhance continual pre-training, particularly its impact on model performance and efficiency. We propose Seamless Packing (SP), a novel data packing strategy aimed at preserving contextual information and enhancing model performance. Our approach employs a sliding window technique in the first stage that synchronizes overlapping tokens across consecutive sequences, ensuring better continuity and contextual coherence. In the second stage, we adopt a First-Fit-Decreasing algorithm to pack shorter texts into bins slightly larger than the target sequence length, thereby minimizing padding and truncation. Empirical evaluations across various model architectures and corpus domains demonstrate the effectiveness of our method, outperforming baselines in 99% of all settings. Code is available at https://github.com/Infernus-WIND/Seamless-Packing.</abstract>
       <url hash="fd19899b">2025.findings-acl.777</url>
@@ -17392,7 +17392,7 @@
     </paper>
     <paper id="785">
       <title>Position Paper: <fixed-case>M</fixed-case>e<fixed-case>M</fixed-case>o: Towards Language Models with Associative Memory Mechanisms</title>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last><affiliation>University of Rome Tor Vergata</affiliation></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last><affiliation>University of Rome Tor Vergata</affiliation></author>
       <author><first>Elena Sofia</first><last>Ruzzetti</last><affiliation>Università degli Studi di Roma Tor Vergata</affiliation></author>
       <author><first>Giancarlo A.</first><last>Xompero</last><affiliation>University of Rome Tor Vergata and Almawave SpA</affiliation></author>
       <author><first>Leonardo</first><last>Ranaldi</last></author>
@@ -17438,7 +17438,7 @@
       <author><first>Peinan</first><last>Zhang</last><affiliation>CyberAgent AI Lab</affiliation></author>
       <author><first>Hidetaka</first><last>Kamigaito</last><affiliation>Nara Institute of Science and Technology</affiliation></author>
       <author><first>Hiroya</first><last>Takamura</last><affiliation>AIST, National Institute of Advanced Industrial Science and Technology</affiliation></author>
-      <author><first>Manabu</first><last>Okumura</last><affiliation>Institute of Science Tokyo and Tokyo Institute of Technology, Tokyo Institute of Technology</affiliation></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last><affiliation>Institute of Science Tokyo and Tokyo Institute of Technology, Tokyo Institute of Technology</affiliation></author>
       <pages>15212-15230</pages>
       <abstract>Identifying factors that make ad text attractive is essential for advertising success. This study proposes AdParaphrase v2.0, a dataset for ad text paraphrasing, containing human preference data, to enable the analysis of the linguistic factors and to support the development of methods for generating attractive ad texts. Compared with v1.0, this dataset is 20 times larger, comprising 16,460 ad text paraphrase pairs, each annotated with preference data from ten evaluators, thereby enabling a more comprehensive and reliable analysis. Through the experiments, we identified multiple linguistic features of engaging ad texts that were not observed in v1.0 and explored various methods for generating attractive ad texts. Furthermore, our analysis demonstrated the relationships between human preference and ad performance, and highlighted the potential of reference-free metrics based on large language models for evaluating ad text attractiveness.The dataset is publicly available at: https://github.com/CyberAgentAILab/AdParaphrase-v2.0.</abstract>
       <url hash="20f743db">2025.findings-acl.788</url>
@@ -17500,7 +17500,7 @@
     <paper id="793">
       <title>“<fixed-case>I</fixed-case> understand your perspective”: <fixed-case>LLM</fixed-case> Persuasion through the Lens of Communicative Action Theory</title>
       <author><first>Esra</first><last>Dönmez</last><affiliation>Universität Stuttgart</affiliation></author>
-      <author><first>Agnieszka</first><last>Falenska</last><affiliation>Interchange Forum for Reflecting on Intelligent Systems, University of Stuttgart</affiliation></author>
+      <author id="agnieszka-falenska"><first>Agnieszka</first><last>Falenska</last><affiliation>Interchange Forum for Reflecting on Intelligent Systems, University of Stuttgart</affiliation></author>
       <pages>15312-15327</pages>
       <abstract>Large Language Models (LLMs) can generate high-quality arguments, yet their ability to engage in *nuanced and persuasive communicative actions* remains largely unexplored. This work explores the persuasive potential of LLMs through the framework of Jürgen Habermas’ Theory of Communicative Action. It examines whether LLMs express illocutionary intent (i.e., pragmatic functions of language such as conveying knowledge, building trust, or signaling similarity) in ways that are comparable to human communication.We simulate online discussions between opinion holders and LLMs using conversations from the persuasive subreddit *ChangeMyView*. We then compare the likelihood of illocutionary intents in human-written and LLM-generated counter-arguments, specifically those that successfully changed the original poster’s view. We find that all three LLMs effectively convey illocutionary intent — often more so than humans — potentially increasing their anthropomorphism. Further, LLMs craft responses that closely align with the opinion holder’s intent, a strategy strongly associated with opinion change. Finally, crowd-sourced workers find LLM-generated counter-arguments more *agreeable* and consistently prefer them over human-written ones. These findings suggest that LLMs’ persuasive power extends beyond merely generating high-quality arguments. On the contrary, training LLMs with human preferences effectively tunes them to mirror human communication patterns, particularly nuanced communicative actions, potentially increasing individuals’ susceptibility to their influence.</abstract>
       <url hash="c9734b5f">2025.findings-acl.793</url>
@@ -17595,7 +17595,7 @@
     <paper id="800">
       <title><fixed-case>PISCO</fixed-case>: Pretty Simple Compression for Retrieval-Augmented Generation</title>
       <author><first>Maxime</first><last>Louis</last><affiliation>Naver Labs Europe</affiliation></author>
-      <author><first>Hervé</first><last>Déjean</last><affiliation>Naver Labs Europe</affiliation></author>
+      <author id="herve-dejean"><first>Hervé</first><last>Déjean</last><affiliation>Naver Labs Europe</affiliation></author>
       <author><first>Stéphane</first><last>Clinchant</last><affiliation>Naver Labs Europe</affiliation></author>
       <pages>15506-15521</pages>
       <abstract>Retrieval-Augmented Generation (RAG) pipelines enhance Large Language Models (LLMs) by retrieving relevant documents, but they face scalability issues due to high inference costs and limited context size. Document compression is a practical solution, but current soft compression methods often suffer from accuracy losses and require extensive pretraining. In this paper, we introduce PISCO, a novel method that achieves a 16x compression rate with minimal accuracy loss (0-3%) across diverse RAG-based question-answering (QA) tasks. Unlike existing approaches, PISCO requires no pretraining or annotated data, relying solely on sequence-level knowledge distillation from document-based questions. With the ability to fine-tune a 7-10B LLM in 24 hours on a single A100 GPU, PISCO offers a highly efficient and scalable solution. We present comprehensive experiments showing that PISCO outperforms existing compression models by 8% in accuracy.</abstract>
@@ -17902,7 +17902,7 @@
     </paper>
     <paper id="823">
       <title>Token-level Preference Self-Alignment Optimization for Multi-style Outline Controllable Generation</title>
-      <author id="zihao-li"><first>Zihao</first><last>Li</last></author>
+      <author><first>Zihao</first><last>Li</last></author>
       <author><first>Xuekong</first><last>Xu</last></author>
       <author><first>Ziyao</first><last>Chen</last></author>
       <author><first>Lixin</first><last>Zou</last><affiliation>Wuhan University</affiliation></author>
@@ -17929,7 +17929,7 @@
       <author><first>Sarthak</first><last>Roy</last><affiliation>Indian Institute of Technology, Kharagpur</affiliation></author>
       <author><first>Martin</first><last>Semmann</last><affiliation>Universität Hamburg</affiliation></author>
       <author><first>Alexander</first><last>Panchenko</last><affiliation>Skoltech</affiliation></author>
-      <author><first>Chris</first><last>Biemann</last><affiliation>U Hamburg</affiliation></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last><affiliation>U Hamburg</affiliation></author>
       <author><first>Animesh</first><last>Mukherjee</last><affiliation>Indian Institute of Technology Kharagpur</affiliation></author>
       <pages>16008-16022</pages>
       <abstract>Despite regulations imposed by nations and social media platforms, e.g. (Government of India, 2021; European Parliament and Council of the European Union, 2022), inter alia, hateful content persists as a significant challenge. Existing approaches primarily rely on reactive measures such as blocking or suspending offensive messages, with emerging strategies focusing on proactive measurements like detoxification and counterspeech. In our work, which we call HATEPRISM, we conduct a comprehensive examination of hate speech regulations and strategies from three perspectives: country regulations, social platform policies, and NLP research datasets. Our findings reveal significant inconsistencies in hate speech definitions and moderation practices across jurisdictions and platforms, alongside a lack of alignment with research efforts. Based on these insights, we suggest ideas and research direction for further exploration of a unified framework for automated hate speech moderation incorporating diverse strategies.</abstract>
@@ -17970,7 +17970,7 @@
       <author><first>Constanza</first><last>Fierro</last><affiliation>Copenhagen University</affiliation></author>
       <author><first>Negar</first><last>Foroutan</last><affiliation>School of Computer and Communication Sciences, EPFL - EPF Lausanne</affiliation></author>
       <author><first>Desmond</first><last>Elliott</last><affiliation>Copenhagen University and University of Copenhagen</affiliation></author>
-      <author><first>Anders</first><last>Søgaard</last><affiliation>Copenhagen University</affiliation></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last><affiliation>Copenhagen University</affiliation></author>
       <pages>16052-16106</pages>
       <abstract>Large Language Models (LLMs) store and retrieve vast amounts of factual knowledge acquired during pre-training. Prior research has localized and identified mechanisms behind knowledge recall; however, it has only focused on English monolingual models. The question of how these mechanisms generalize to non-English languages and multilingual LLMs remains unexplored. In this paper, we address this gap by conducting a comprehensive analysis of three multilingual LLMs. First, we show that previously identified recall mechanisms in English largely apply to multilingual contexts, with nuances based on language and architecture. Next, through patching intermediate representations, we localize the role of language during recall, finding that subject enrichment is language-independent, while object extraction is language-dependent. Additionally, we discover that the last token representation acts as a Function Vector (FV), encoding both the language of the query and the content to be extracted from the subject. Furthermore, in decoder-only LLMs, FVs compose these two pieces of information in two separate stages. These insights reveal unique mechanisms in multilingual LLMs for recalling information, highlighting the need for new methodologies—such as knowledge evaluation, fact editing, and knowledge acquisition—that are specifically tailored for multilingual LLMs.</abstract>
       <url hash="1ae1a29a">2025.findings-acl.827</url>
@@ -18026,7 +18026,7 @@
       <author><first>Kai</first><last>Lu</last></author>
       <author><first>Zeyu</first><last>Xiong</last><affiliation>National University of Defense Technology</affiliation></author>
       <author><first>Xinwang</first><last>Liu</last><affiliation>National University of Defense Technology</affiliation></author>
-      <author id="dongsheng-li"><first>Dongsheng</first><last>Li</last><affiliation>National University of Defense Technology</affiliation></author>
+      <author><first>Dongsheng</first><last>Li</last><affiliation>National University of Defense Technology</affiliation></author>
       <pages>16159-16179</pages>
       <abstract>Using large language models (LLMs) has a potential risk of privacy leakage since the data with sensitive information may be used for fine-tuning the LLMs. Differential privacy (DP) provides theoretical guarantees of privacy protection, but its practical application in LLMs still has the problem of privacy-utility trade-off. Researchers synthesized data with strong generation capabilities closed-source LLMs (i.e., GPT-4) under DP to alleviate this problem, but this method is not so flexible in fitting the given privacy distributions without fine-tuning. Besides, such methods can hardly balance the diversity of synthetic data and its relevance to target privacy data without accessing so much private data. To this end, this paper proposes DPGA-TextSyn, combining general LLMs with genetic algorithm (GA) to produce relevant and diverse synthetic text under DP constraints. First, we integrate the privacy gene (i.e., metadata) to generate better initial samples. Then, to achieve survival of the fittest and avoid homogeneity, we use privacy nearest neighbor voting and similarity suppression to select elite samples. In addition, we expand elite samples via genetic strategies such as mutation, crossover, and generation to expand the search scope of GA. Experiments show that this method significantly improves the performance of the model in downstream tasks while ensuring privacy.</abstract>
       <url hash="2abfcebf">2025.findings-acl.831</url>
@@ -18038,7 +18038,7 @@
       <author><first>Seungyoon</first><last>Lee</last><affiliation>Korea University</affiliation></author>
       <author><first>Seongtae</first><last>Hong</last><affiliation>Korea University</affiliation></author>
       <author><first>Hyeonseok</first><last>Moon</last><affiliation>Korea University</affiliation></author>
-      <author><first>Heuiseok</first><last>Lim</last></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last></author>
       <pages>16180-16193</pages>
       <abstract>Large Language Models (LLMs) are increasingly incorporating multilingual capabilities, fueling the demand to transfer them into target language-specific models. However, most approaches, which blend the source model’s embedding by replacing the source vocabulary with the target language-specific vocabulary, may constrain expressive capacity in the target language since the source model is predominantly trained on English data. In this paper, we propose Semantic Aware Linear Transfer (SALT), a novel cross-lingual transfer technique that recycles embeddings from target language Pre-trained Language Models (PLMs) to transmit the deep representational strengths of PLM-derived embedding to LLMs. SALT derives unique regression lines based on the similarity in the overlap of the source and target vocabularies to handle each non-overlapping token’s embedding space. Our extensive experiments show that SALT significantly outperforms other transfer methods, achieving lower loss and faster convergence during language adaptation. Notably, SALT achieves remarkable performance in cross-lingual understanding setups compared to other methods. Furthermore, we highlight the scalable use of PLMs to enhance the functionality of contemporary LLMs by conducting experiments with varying architectures.</abstract>
       <url hash="54a044b8">2025.findings-acl.832</url>
@@ -18056,7 +18056,7 @@
       <author><first>Ruiming</first><last>Tang</last></author>
       <author><first>Yong</first><last>Yu</last><affiliation>Shanghai Jiaotong University</affiliation></author>
       <author><first>Jun</first><last>Wang</last><affiliation>University College London</affiliation></author>
-      <author><first>Weinan</first><last>Zhang</last></author>
+      <author id="weinan-zhang"><first>Weinan</first><last>Zhang</last></author>
       <pages>16194-16204</pages>
       <abstract>To address these limitations, we propose BDC, a novel framework that Boosts reasoning exploration via multi-agent collaboration, Disentangles heterogeneous data into specialized experts, and Customizes solutions through dynamic model composition. BDC integrates a Monte Carlo Tree-of-Agents algorithm, where multiple LLMs mutually verify and refine reasoning paths through reflection-guided pruning, enabling efficient exploration of high-quality solutions. To handle data diversity, we cluster problems by latent semantics, train composable LoRA experts on each cluster, and deploy an input-aware hypernetwork to dynamically merge these experts into tailored solvers. Experiments on APPS and CodeContest benchmarks demonstrate BDC’s superiority: it achieves up to 73.8% accuracy on hard problems, outperforming state-of-the-art methods like LATS and RethinkMCTS by 9–15%. This work lays the groundwork for advancing LLM capabilities in complex reasoning tasks, offering a novel System2-to-System1 solution.</abstract>
       <url hash="5d0761bf">2025.findings-acl.833</url>
@@ -18114,7 +18114,7 @@
       <author><first>Andrea</first><last>Sensi</last></author>
       <author><first>Elisa</first><last>Passone</last></author>
       <author><first>Danilo</first><last>Croce</last></author>
-      <author><first>Roberto</first><last>Basili</last><affiliation>University of Roma, Tor Vergata</affiliation></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last><affiliation>University of Roma, Tor Vergata</affiliation></author>
       <pages>16266-16284</pages>
       <abstract>Grounded natural language understanding in Human-Robot Interaction (HRI) requires integrating linguistic, visual, and world knowledge to ensure effective task execution. We propose an approach that enhances Multi-Modal Large Language Models (MLLMs) with a novel explicit dialogue planning phase, allowing robotic agents to systematically refine their understanding of ambiguous commands through structured clarification steps. This reduces hallucinations and improves task feasibility.To evaluate this approach, we introduce a novel dataset of over 1,100 annotated dialogues in English and Italian, designed for fine-tuning and assessing Multi-Modal models in HRI scenarios. Experimental results show that dialogue planning improves response accuracy and quality, and contributes to cross-lingual generalisation, enabling models trained in one language to transfer effectively to another. To the best of our knowledge, this is the first application of structured, goal-driven, and explicit dialogue planning in Multi-Modal LLMs for grounded interaction.</abstract>
       <url hash="6466b54e">2025.findings-acl.837</url>
@@ -18125,7 +18125,7 @@
       <title><fixed-case>MVL</fixed-case>-<fixed-case>SIB</fixed-case>: A Massively Multilingual Vision-Language Benchmark for Cross-Modal Topical Matching</title>
       <author><first>Fabian David</first><last>Schmidt</last></author>
       <author><first>Florian</first><last>Schneider</last></author>
-      <author><first>Chris</first><last>Biemann</last><affiliation>U Hamburg</affiliation></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last><affiliation>U Hamburg</affiliation></author>
       <author><first>Goran</first><last>Glavaš</last><affiliation>Julius-Maximilians-Universität Würzburg</affiliation></author>
       <pages>16285-16312</pages>
       <abstract>Existing multilingual vision-language (VL) benchmarks often only cover a handful of languages. Consequently, evaluations of large vision-language models (LVLMs) predominantly target high-resource languages, underscoring the need for evaluation data for low-resource languages. To address this limitation, we introduce MVL-SIB, a massively multilingual vision-language benchmark that evaluates both cross-modal and text-only topical matching across 205 languages – over 100 more than the most multilingual existing VL benchmarks encompass. We then benchmark a range of of open-weight LVLMs together with GPT-4o(-mini) on MVL-SIB. Our results reveal that LVLMs struggle in cross-modal topic matching in lower-resource languages, performing no better than chance on languages like N’Koo. Our analysis further reveals that VL support in LVLMs declines disproportionately relative to textual support for lower-resource languages, as evidenced by comparison of cross-modal and text-only topical matching performance. We further observe that open-weight LVLMs do not benefit from representing a topic with more than one image, suggesting that these models are not yet fully effective at handling multi-image tasks. By correlating performance on MVL-SIB with other multilingual VL benchmarks, we highlight that MVL-SIB serves as a comprehensive probe of multilingual VL understanding in LVLMs.</abstract>
@@ -18138,7 +18138,7 @@
       <author><first>Yihong</first><last>Tang</last></author>
       <author><first>Kehai</first><last>Chen</last><affiliation>Harbin Institute of Technology (Shenzhen)</affiliation></author>
       <author><first>Xuefeng</first><last>Bai</last></author>
-      <author><first>Zheng-Yu</first><last>Niu</last></author>
+      <author id="zheng-yu-niu"><first>Zheng-Yu</first><last>Niu</last></author>
       <author><first>Bo</first><last>Wang</last></author>
       <author><first>Jie</first><last>Liu</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <author><first>Min</first><last>Zhang</last><affiliation>Harbin Institute of Technology, Shenzhen</affiliation></author>
@@ -18240,7 +18240,7 @@
       <author><first>Hui</first><last>Shen</last></author>
       <author><first>Haozhe</first><last>Wang</last><affiliation>INF</affiliation></author>
       <author><first>Kangyu</first><last>Zheng</last><affiliation>Rensselaer Polytechnic Institute</affiliation></author>
-      <author id="mi-zhang"><first>Mi</first><last>Zhang</last><affiliation>The Ohio State University</affiliation></author>
+      <author><first>Mi</first><last>Zhang</last><affiliation>The Ohio State University</affiliation></author>
       <author><first>Rossella</first><last>Arcucci</last><affiliation>Imperial College London</affiliation></author>
       <pages>16448-16460</pages>
       <abstract>Automatic radiology report generation holds significant potential to streamline the labor-intensive process of report writing by radiologists, particularly for 3D radiographs such as CT scans. While CT scans are critical for clinical diagnostics, they remain less explored compared to 2D radiographs. To date, there has been no comprehensive benchmark for 3D radiograph report generation (3DRRG), nor sufficient investigation into the optimal training strategies for Vision Language Models (VLMs) in this context, particularly with respect to vision encoder choices, visual token compression, and model scaling.In this work, we make two three contributions. We curate CT-3DRRG, the largest publicly available 3D CT-report dataset, establishing a robust and diverse benchmark for evaluating VLM performance on 3DRRG. Furthermore, we propose a comprehensive training recipe for building high-performing VLMs for 3DRRG, exploring key factors such as vision encoder pretraining strategies, visual token compression, and the impact of data &amp; model scale. Guided by these findings, we introduce Argus, a state-of-the-art family of VLMs that achieve superior performance across different model sizes and input 3D medical image resolutions, efficiently processing high-resolution 3D images up to 512 × 512 × 256.</abstract>
@@ -18285,7 +18285,7 @@
       <author><first>Pasquale</first><last>Minervini</last><affiliation>University of Edinburgh, University of Edinburgh</affiliation></author>
       <author><first>Pontus</first><last>Stenetorp</last><affiliation>University College London</affiliation></author>
       <author><first>Benjamin I. P.</first><last>Rubinstein</last><affiliation>The University of Melbourne and The University of Melbourne</affiliation></author>
-      <author><first>Trevor</first><last>Cohn</last><affiliation>Google and The University of Melbourne</affiliation></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last><affiliation>Google and The University of Melbourne</affiliation></author>
       <pages>16504-16544</pages>
       <abstract>The implications of backdoor attacks on English-centric large language models (LLMs) have been widely examined — such attacks can be achieved by embedding malicious behaviors during training and activated under specific conditions that trigger malicious outputs. Despite the increasing support for multilingual capabilities in open-source and proprietary LLMs, the impact of backdoor attacks on these systems remains largely under-explored. Our research focuses on cross-lingual backdoor attacks against multilingual LLMs, particularly investigating how poisoning the instructiontuning data for one or two languages can affect the outputs for languages whose instructiontuning data were not poisoned. Despite its simplicity, our empirical analysis reveals that our method exhibits remarkable efficacy in models like BLOOM and GPT-4o, with high attack success rates, surpassing 90% in more than 7 out of 12 languages across various scenarios. Our findings also indicate that more powerful models show increased susceptibility to transferable cross-lingual backdoor attacks, which also applies to LLMs predominantly pre-trained on English/Chinese data, such as Llama2, Llama3, Qwen2.5, and Gemma. Moreover, our experiments demonstrate 1) High Transferability: the backdoor mechanism operates successfully in cross lingual response scenarios across 26 languages, achieving an average attack success rate of 99%, and 2) Robustness: the proposed attack remains effective even after defenses are applied. These findings expose critical security vulnerabilities in multilingual LLMs and highlight the urgent need for more robust, targeted defense strategies to address the unique challenges posed by cross-lingual backdoor transfer.</abstract>
       <url hash="4d6d2916">2025.findings-acl.848</url>
@@ -18439,7 +18439,7 @@
       <title><fixed-case>CHARPEVAL</fixed-case>: Benchmarking Large Language Models’ Contextual Reasoning in Knowledge-Grounded Dialogue</title>
       <author><first>Abbas</first><last>Ghaddar</last><affiliation>Huawei Technologies Ltd.</affiliation></author>
       <author><first>David</first><last>Alfonso-Hermelo</last><affiliation>Huawei Technologies Ltd.</affiliation></author>
-      <author><first>Philippe</first><last>Langlais</last><affiliation>Université de Montréal</affiliation></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last><affiliation>Université de Montréal</affiliation></author>
       <author><first>Boxing</first><last>Chen</last><affiliation>Huawei Technologies Ltd.</affiliation></author>
       <author><first>Prasanna</first><last>Parthasarathi</last><affiliation>Huawei Technologies Ltd.</affiliation></author>
       <pages>16764-16775</pages>
@@ -18467,7 +18467,7 @@
     <paper id="862">
       <title><fixed-case>D</fixed-case>ebate4<fixed-case>MATH</fixed-case>: Multi-Agent Debate for Fine-Grained Reasoning in Math</title>
       <author><first>Shaowei</first><last>Zhang</last></author>
-      <author><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
       <pages>16810-16824</pages>
       <abstract>Large language models (LLMs) have demonstrated impressive performance in reasoning. However, existing data annotation methods usually suffer from high annotation cost and the lack of effective automatic validation. To address these issues, we propose a Fine-grained Multi-Agent Debate framework (FMAD) and MMATH-Data, a dataset created by FMAD, which consists of 46K reasoning steps. By prompting multiple agents to debate, FMAD assesses the contribution of each reasoning step to the final solution, with labels based on the judge’s confidence score and the winner’s position. To facilitate reasoning in math and examine FMAD and MMATH-Data, we further propose two key components: a Multi-Agent Debate Reward Model (MRM) trained on MMATH-Data, which serves as a reward model to provide robust feedback during the optimization process, and MMATH-LLM, a model designed specifically for mathematical reasoning. MMATH-LLM is fine-tuned using reinforcement learning with supervised feedback from MRM, aiming at improving its mathematical reasoning capabilities. Extensive experiments demonstrate that our model achieves 83.4% accuracy on the GSM8K dataset and 45.1% on the MATH dataset, outperforming the state-of-the-art methods by 1.2% and 3.5%, respectively. All data and code will be available soon at GitHub.</abstract>
       <url hash="879126f0">2025.findings-acl.862</url>
@@ -18531,9 +18531,9 @@
       <author><first>Kehai</first><last>Chen</last><affiliation>Harbin Institute of Technology (Shenzhen)</affiliation></author>
       <author><first>Yang</first><last>Xiang</last></author>
       <author><first>Xuefeng</first><last>Bai</last></author>
-      <author><first>Muyun</first><last>Yang</last></author>
+      <author id="muyun-yang"><first>Muyun</first><last>Yang</last></author>
       <author><first>Yang</first><last>Feng</last><affiliation>Institute of Computing Technology, Chinese Academy of Sciences</affiliation></author>
-      <author><first>Tiejun</first><last>Zhao</last><affiliation>Harbin Institute of Technology</affiliation></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <author><first>Min</first><last>Zhang</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <pages>16886-16902</pages>
       <abstract>The remarkable understanding and generation capabilities of large language models (LLMs) have greatly improved translation performance. However, incorrect understanding of the sentence to be translated can degrade translation quality. To address this issue, we proposed a novel Iterative Bilingual Understanding Translation (IBUT) method based on the cross-lingual capabilities of LLMs and the dual characteristics of translation tasks. The cross-lingual capability of LLMs enables the generation of contextual understanding for both the source and target languages separately. Furthermore, the dual characteristics allow IBUT to generate effective cross-lingual feedback, iteratively refining contextual understanding, thereby reducing errors and improving translation performance. Experimental results showed that the proposed IBUT outperforms several strong comparison methods, especially being generalized to multiple domains (e.g., news, commonsense, and cultural translation benchmarks).</abstract>
@@ -18589,11 +18589,11 @@
       <title><fixed-case>R</fixed-case>eflect<fixed-case>E</fixed-case>vo: Improving Meta Introspection of Small <fixed-case>LLM</fixed-case>s by Learning Self-Reflection</title>
       <author><first>Jiaqi</first><last>Li</last><affiliation>Beijing Institute for General Artificial Intelligence</affiliation></author>
       <author><first>Xinyi</first><last>Dong</last></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>Beijing Institute for General Artificial Intelligence</affiliation></author>
+      <author><first>Yang</first><last>Liu</last><affiliation>Beijing Institute for General Artificial Intelligence</affiliation></author>
       <author><first>Zhizhuo</first><last>Yang</last></author>
       <author><first>Quansen</first><last>Wang</last><affiliation>Beijing Institute of General Artificial Intelligence</affiliation></author>
       <author><first>Xiaobo</first><last>Wang</last><affiliation>Beijing Institute for General Artificial Intelligence and University of Science and Technology of China</affiliation></author>
-      <author><first>Song-Chun</first><last>Zhu</last></author>
+      <author id="song-chun-zhu"><first>Song-Chun</first><last>Zhu</last></author>
       <author><first>Zixia</first><last>Jia</last></author>
       <author><first>Zilong</first><last>Zheng</last><affiliation>Beijing Institute for General Artificial Intelligence</affiliation></author>
       <pages>16948-16966</pages>
@@ -18607,7 +18607,7 @@
       <author><first>Shuo</first><last>Yang</last></author>
       <author><first>Caren</first><last>Han</last><affiliation>University of Melbourne, University of Western Australia and University of Sydney</affiliation></author>
       <author><first>Siwen</first><last>Luo</last><affiliation>University of Western Australia</affiliation></author>
-      <author><first>Eduard</first><last>Hovy</last><affiliation>University of Melbourne and Carnegie Mellon University</affiliation></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last><affiliation>University of Melbourne and Carnegie Mellon University</affiliation></author>
       <pages>16967-16986</pages>
       <abstract>Visual Question Answering (VQA) necessitates models to reason effectively across visual and textual modalities. However, existing Large Vision-Language Models (LVLMs) often fall short in achieving human-like reasoning due to a lack of integrated commonsense knowledge, limiting their robustness and accuracy in real-world scenarios where both explicit facts and implicit understanding are crucial. To address this challenge, we present MAGIC-VQA: Multimodal And Grounded Inference with Commonsense Knowledge, a novel framework designed to enhance multimodal inference by integrating commonsense reasoning. MAGIC-VQA introduces a three-stage process: (1) Explicit Commonsense Knowledge Retrieval from external knowledge graphs, (2) By-Type Commonsense Knowledge Post-Processing to refine contextual relevance, and (3) Implicit Commonsense Knowledge Augmentation using a heterogeneous graph processed by a Graph Neural Network (GNN). These stages collectively enable nuanced, context-aware reasoning without extensive pre-training or intricate prompt tuning.Our MAGIC-VQA significantly improves comprehensive benchmark datasets, surpassing existing models in tasks requiring advanced commonsense reasoning. MAGIC-VQA establishes a robust pathway for integrating commonsense knowledge into VQA, bridging the gap between vision-language inputs and high-level reasoning for improved reliability and contextual accuracy.</abstract>
       <url hash="e9f269d9">2025.findings-acl.872</url>
@@ -18642,7 +18642,7 @@
       <author><first>Leonardo F. R.</first><last>Ribeiro</last><affiliation>Amazon</affiliation></author>
       <author><first>Rexhina</first><last>Blloshmi</last><affiliation>Amazon</affiliation></author>
       <author><first>Christopher</first><last>Davis</last><affiliation>Amazon</affiliation></author>
-      <author><first>Adrià</first><last>de Gispert</last><affiliation>Amazon</affiliation></author>
+      <author id="adria-de-gispert"><first>Adrià</first><last>de Gispert</last><affiliation>Amazon</affiliation></author>
       <pages>17030-17049</pages>
       <abstract>We present GaRAGe, a large RAG benchmark with human-curated long-form answers and annotations of each grounding passage, allowing a fine-grained evaluation of whether LLMs can identify relevant grounding when generating RAG answers. Our benchmark contains 2366 questions of diverse complexity, dynamism, and topics, and includes over 35K annotated passages retrieved from both private document sets and the Web, to reflect real-world RAG use cases. This makes it an ideal test bed to evaluate an LLM’s ability to identify only the relevant information necessary to compose a response, or provide a deflective response when there is insufficient information. Evaluations of multiple state-of-the-art LLMs on GaRAGe show that the models tend to over-summarise rather than (a) ground their answers strictly on the annotated relevant passages (reaching at most a Relevance-Aware Factuality Score of 60%), or (b) deflect when no relevant grounding is available (reaching at most 31% true positive rate in deflections). The F<tex-math>_{1}</tex-math> in attribution to relevant sources is at most 58.9%, and we show that performance is particularly reduced when answering time-sensitive questions and when having to draw knowledge from sparser private grounding sources.</abstract>
       <url hash="fa5eab44">2025.findings-acl.875</url>
@@ -18688,8 +18688,8 @@
       <author><first>Hongyin</first><last>Luo</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <author><first>Abdalla Mohamed Salama Sayed</first><last>Moustafa</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <author><first>Xixin</first><last>Wu</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
-      <author><first>James R.</first><last>Glass</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
-      <author><first>Helen M.</first><last>Meng</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
+      <author id="james-glass"><first>James R.</first><last>Glass</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
+      <author id="helen-meng"><first>Helen M.</first><last>Meng</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <pages>17091-17105</pages>
       <abstract>Improving context faithfulness in large language models is essential for developing trustworthy retrieval augmented generation systems and mitigating hallucinations, especially in long-form question answering (LFQA) tasks or scenarios involving knowledge conflicts. Existing methods either intervene LLMs only at inference without addressing their inherent limitations or overlook the potential for self-improvement. In this paper, we introduce GenDiE(Generate, Discriminate, Evolve), a novel self-evolving framework that enhances context faithfulness through fine-grained sentence-level optimization. GenDiE combines both generative and discriminative training, equipping LLMs with self-generation and self-scoring capabilities to facilitate iterative self-evolution. This supports both data construction for model alignment and score-guided search during inference. Furthermore, by treating each sentence in a response as an independent optimization unit, GenDiE effectively addresses the limitations of previous approaches that optimize at the holistic answer level, which may miss unfaithful details. Experiments on ASQA (in-domain LFQA) and ConFiQA (out-of-domain counterfactual QA) datasets demonstrate that GenDiE surpasses various baselines in both faithfulness and correctness, and exhibits robust performance for domain adaptation.</abstract>
       <url hash="204a317d">2025.findings-acl.878</url>
@@ -18739,7 +18739,7 @@
     <paper id="882">
       <title>In the <fixed-case>LLM</fixed-case> era, Word Sense Induction remains unsolved</title>
       <author><first>Anna</first><last>Mosolova</last></author>
-      <author><first>Marie</first><last>Candito</last><affiliation>Université Paris Cité</affiliation></author>
+      <author id="marie-candito"><first>Marie</first><last>Candito</last><affiliation>Université Paris Cité</affiliation></author>
       <author><first>Carlos</first><last>Ramisch</last><affiliation>LIS - Laboratoire d’Informatique et Systèmes and AMU - Aix Marseille University</affiliation></author>
       <pages>17161-17178</pages>
       <abstract>In the absence of sense-annotated data, word sense induction (WSI) is a compelling alternative to word sense disambiguation, particularly in low-resource or domain-specific settings. In this paper, we emphasize methodological problems in current WSI evaluation. We propose an evaluation on a SemCor-derived dataset, respecting the original corpus polysemy and frequency distributions. We assess pre-trained embeddings and clustering algorithms across parts of speech, and propose and evaluate an LLM-based WSI method for English. We evaluate data augmentation sources (LLM-generated, corpus and lexicon), and semi-supervised scenarios using Wiktionary for data augmentation, must-link constraints, number of clusters per lemma.We find that no unsupervised method (whether ours or previous) surpasses the strong “one cluster per lemma” heuristic (1cpl). We also show that (i) results and best systems may vary across POS, (ii) LLMs have troubles performing this task, (iii) data augmentation is beneficial and (iv) capitalizing on Wiktionary does help. It surpasses previous SOTA system on our test set by 3.3%. WSI is not solved, and calls for a better articulation of lexicons and LLMs’ lexical semantics capabilities.</abstract>
@@ -18776,7 +18776,7 @@
       <author><first>Sibo</first><last>Yi</last></author>
       <author><first>Tianshuo</first><last>Cong</last></author>
       <author><first>Xinlei</first><last>He</last><affiliation>The Hong Kong University of Science and Technology</affiliation></author>
-      <author id="qi-li"><first>Qi</first><last>Li</last><affiliation>Tsinghua University</affiliation></author>
+      <author><first>Qi</first><last>Li</last><affiliation>Tsinghua University</affiliation></author>
       <author><first>Jiaxing</first><last>Song</last></author>
       <pages>17221-17234</pages>
       <abstract>Small language models (SLMs) have become increasingly prominent in the deployment on edge devices due to their high efficiency and low computational cost. While researchers continue to advance the capabilities of SLMs through innovative training strategies and model compression techniques, the security risks of SLMs have received considerably less attention compared to large language models (LLMs). To fill this gap, we provide a comprehensive empirical study to evaluate the security performance of 13 state-of-the-art SLMs under various jailbreak attacks. Our experiments demonstrate that most SLMs are quite susceptible to existing jailbreak attacks, while some of them are even vulnerable to direct harmful prompts. To address the safety concerns, we evaluate several representative defense methods and demonstrate their effectiveness in enhancing the security of SLMs. We further analyze the potential security degradation caused by different SLM techniques including architecture compression, quantization, knowledge distillation, and so on. We expect that our research can highlight the security challenges of SLMs and provide valuable insights to future work in developing more robust and secure SLMs.</abstract>
@@ -18799,7 +18799,7 @@
     <paper id="887">
       <title>A Law Reasoning Benchmark for <fixed-case>LLM</fixed-case> with Tree-Organized Structures including Factum Probandum, Evidence and Experiences</title>
       <author><first>Jiaxin</first><last>Shen</last></author>
-      <author><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last><affiliation>Beijing Jiaotong University</affiliation></author>
       <author><first>Huiqi</first><last>Hu</last></author>
       <author><first>Luyi</first><last>Lin</last></author>
       <author><first>Guoyang</first><last>Ma</last></author>
@@ -18829,7 +18829,7 @@
       <title>Stereotype Detection as a Catalyst for Enhanced Bias Detection: A Multi-Task Learning Approach</title>
       <author><first>Aditya</first><last>Tomar</last></author>
       <author><first>Rudra</first><last>Murthy</last><affiliation>IBM India Pvt Ltd</affiliation></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
       <pages>17304-17317</pages>
       <abstract>Bias and stereotypes in language models can cause harm, especially in sensitive areas like content moderation and decision-making. This paper addresses bias and stereotype detection by exploring how jointly learning these tasks enhances model performance. We introduce StereoBias, a unique dataset labeled for bias and stereotype detection across five categories: religion, gender, socio-economic status, race, profession, and others, enabling a deeper study of their relationship. Our experiments compare encoder-only models and fine-tuned decoder-only models using QLoRA. While encoder-only models perform well, decoder-only models also show competitive results. Crucially, joint training on bias and stereotype detection significantly improves bias detection compared to training them separately. Additional experiments with sentiment analysis confirm that the improvements stem from the connection between bias and stereotypes, not multi-task learning alone. These findings highlight the value of leveraging stereotype information to build fairer and more effective AI systems.</abstract>
       <url hash="20237e05">2025.findings-acl.889</url>
@@ -18865,7 +18865,7 @@
     <paper id="892">
       <title>Are Dialects Better Prompters? A Case Study on <fixed-case>A</fixed-case>rabic Subjective Text Classification</title>
       <author><first>Leila</first><last>Moudjari</last><affiliation>IRIT</affiliation></author>
-      <author><first>Farah</first><last>Benamara</last><affiliation>Institut de recherche en informatique de toulouse</affiliation></author>
+      <author id="farah-benamara"><first>Farah</first><last>Benamara</last><affiliation>Institut de recherche en informatique de toulouse</affiliation></author>
       <pages>17356-17371</pages>
       <abstract>This paper investigates the effect of dialectal prompting, variations in prompting scrip t and model fine-tuning on subjective classification in Arabic dialects. To this end, we evaluate the performances of 12 widely used open LLMs across four tasks and eight benchmark datasets. Our results reveal that specialized fine-tuned models with Arabic and Arabizi scripts dialectal prompts achieve the best results, which constitutes a novel state of the art in the field.</abstract>
       <url hash="428a5a73">2025.findings-acl.892</url>
@@ -18944,7 +18944,7 @@
     <paper id="898">
       <title>Metaphor and Large Language Models: When Surface Features Matter More than Deep Understanding</title>
       <author><first>Elisa</first><last>Sanchez-Bayona</last><affiliation>Universidad del País Vasco</affiliation></author>
-      <author><first>Rodrigo</first><last>Agerri</last><affiliation>University of the Basque Country</affiliation></author>
+      <author id="rodrigo-agerri"><first>Rodrigo</first><last>Agerri</last><affiliation>University of the Basque Country</affiliation></author>
       <pages>17462-17477</pages>
       <abstract>This paper presents a comprehensive evaluation of the capabilities of Large Language Models (LLMs) in metaphor interpretation across multiple datasets, tasks, and prompt configurations. Although metaphor processing has gained significant attention in Natural Language Processing (NLP), previous research has been limited to single-dataset evaluations and specific task settings, often using artificially constructed data through lexical replacement. We address these limitations by conducting extensive experiments using diverse publicly available datasets with inference and metaphor annotations, focusing on Natural Language Inference (NLI) and Question Answering (QA) tasks. The results indicate that LLMs’ performance is more influenced by features like lexical overlap and sentence length than by metaphorical content, demonstrating that any alleged emergent abilities of LLMs to understand metaphorical language are the result of a combination of surface-level features, in-context learning, and linguistic knowledge. This work provides critical insights into the current capabilities and limitations of LLMs in processing figurative language, highlighting the need for more realistic evaluation frameworks in metaphor interpretation tasks. Data and code publicly available: https://github.com/elisanchez-beep/metaphorLLM</abstract>
       <url hash="10f8fc85">2025.findings-acl.898</url>
@@ -19021,7 +19021,7 @@
       <author><first>Qinglin</first><last>Zhang</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Jiaqing</first><last>Liu</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Qian</first><last>Chen</last></author>
-      <author><first>Wen</first><last>Wang</last></author>
+      <author id="wen-wang"><first>Wen</first><last>Wang</last></author>
       <pages>17577-17593</pages>
       <abstract>The video topic segmentation (VTS) task segments videos into intelligible, non-overlapping topics, facilitating efficient comprehension of video content and quick access to specific content. VTS is also critical to various downstream video understanding tasks. Traditional VTS methods using shallow features or unsupervised approaches struggle to accurately discern the nuances of topical transitions. Recently, supervised approaches have achieved superior performance on video action or scene segmentation over unsupervised approaches. In this work, we improve supervised VTS by thoroughly exploring **multimodal fusion** and **multimodal coherence modeling**. Specifically, (1) we enhance multimodal fusion by exploring different architectures using Cross-Attention and Mixture of Experts. (2) To generally strengthen multimodality alignment and fusion, we pre-train and fine-tune the model with multimodal contrastive learning. (3) We propose a new pre-training task tailored for the VTS task, and a novel fine-tuning task for enhancing multimodal coherence modeling for VTS. We evaluate our proposed approaches on educational videos, in the form of lectures, due to the vital role of topic segmentation of educational videos in boosting learning experiences. Additionally, to promote research in VTS, we introduce a large-scale Chinese lecture video dataset to augment the existing English lecture video datasets. Experiments on both English and Chinese lecture datasets demonstrate that our model achieves superior VTS performance compared to competitive unsupervised and supervised baselines.</abstract>
       <url hash="e62afd90">2025.findings-acl.904</url>
@@ -19267,7 +19267,7 @@
       <author><first>Christian</first><last>Moro</last><affiliation>University of Padova</affiliation></author>
       <author><first>Luisa</first><last>Orrù</last></author>
       <author><first>Gian Piero</first><last>Turchi</last></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <author><first>Giovanni</first><last>Da San Martino</last><affiliation>University of Padua</affiliation></author>
       <pages>17918-17929</pages>
       <abstract>Persuasion (or propaganda) techniques detection is a relatively novel task in Natural Language Processing (NLP). While there have already been a number of annotation campaigns, they have been based on heuristic guidelines, which have never been thoroughly discussed. Here, we present the first systematic analysis of a complex annotation task -detecting 22 persuasion techniques in memes-, for which we provided continuous expert oversight. The presence of an expert allowed us to critically analyze specific aspects of the annotation process. Among our findings, we show that inter-annotator agreement alone inadequately assessed annotation correctness. We thus define and track different error types, revealing that expert feedback shows varying effectiveness across error categories. This pattern suggests that distinct mechanisms underlie different kinds of misannotations. Based on our findings, we advocate for an expert oversight in annotation tasks and periodic quality audits. As an attempt to reduce the costs for this, we introduce a probabilistic model for optimizing intervention scheduling.</abstract>
@@ -19290,7 +19290,7 @@
       <title><fixed-case>BEDAA</fixed-case>: <fixed-case>B</fixed-case>ayesian Enhanced <fixed-case>D</fixed-case>e<fixed-case>BERT</fixed-case>a for Uncertainty-Aware Authorship Attribution</title>
       <author><first>Iqra</first><last>Zahid</last><affiliation>Imperial College London</affiliation></author>
       <author><first>Youcheng</first><last>Sun</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and The University of Manchester</affiliation></author>
-      <author><first>Riza</first><last>Batista-Navarro</last><affiliation>University of Manchester</affiliation></author>
+      <author id="riza-theresa-batista-navarro"><first>Riza</first><last>Batista-Navarro</last><affiliation>University of Manchester</affiliation></author>
       <pages>17952-17966</pages>
       <abstract>Authorship Attribution (AA) seeks to identify the author of a given text, yet existing methods often struggle with trustworthiness and interpretability, particularly across different domains, languages, and stylistic variations. These challenges arise from the absence of uncertainty quantification and the inability of current models to adapt to diverse authorship tasks. To address these limitations, we introduce BEDAA, a Bayesian-Enhanced DeBERTa framework that integrates Bayesian reasoning with transformer-based language models to enable uncertainty-aware and interpretable authorship attribution. BEDAA achieves up to 19.69% improvement in F1-score across multiple authorship attribution tasks, including binary, multiclass, and dynamic authorship detection. By incorporating confidence ranking, uncertainty decomposition, and probabilistic reasoning, BEDAA improves robustness while offering transparent decision-making processes. Furthermore, BEDAA extends beyond traditional AA by demonstrating its effectiveness in human vs. machine-generated text classification, code authorship detection, and cross-lingual attribution. These advances establish BEDAA as a generalised, interpretable, and adaptable framework for modern authorship attribution challenges.</abstract>
       <url hash="6c1c167c">2025.findings-acl.924</url>
@@ -19301,7 +19301,7 @@
       <title>Benchmarking the Benchmarks: Reproducing Climate-Related <fixed-case>NLP</fixed-case> Tasks</title>
       <author><first>Tom</first><last>Calamai</last></author>
       <author><first>Oana</first><last>Balalau</last><affiliation>INRIA</affiliation></author>
-      <author><first>Fabian M.</first><last>Suchanek</last><affiliation>Telecom Paris</affiliation></author>
+      <author id="fabian-suchanek"><first>Fabian M.</first><last>Suchanek</last><affiliation>Telecom Paris</affiliation></author>
       <pages>17967-18009</pages>
       <abstract>Significant efforts have been made in the NLP community to facilitate the automatic analysis of climate-related corpora by tasks such as climate-related topic detection, climate risk classification, question answering over climate topics, and many more. In this work, we perform a reproducibility study on 8 tasks and 29 datasets, testing 6 models. We find that many tasks rely heavily on surface-level keyword patterns rather than deeper semantic or contextual understanding. Moreover, we find that 96% of the datasets contain annotation issues, with 16.6% of the sampled wrong predictions of a zero-shot classifier being actually clear annotation mistakes, and 38.8% being ambiguous examples.These results call into question the reliability of current benchmarks to meaningfully compare models and highlight the need for improved annotation practices. We conclude by outlining actionable recommendations to enhance dataset quality and evaluation robustness.</abstract>
       <url hash="31907cfe">2025.findings-acl.925</url>
@@ -19355,7 +19355,7 @@
     <paper id="929">
       <title><fixed-case>F</fixed-case>act<fixed-case>L</fixed-case>ens: Benchmarking Fine-Grained Fact Verification</title>
       <author><first>Kushan</first><last>Mitra</last><affiliation>Megagon Labs</affiliation></author>
-      <author id="dan-zhang"><first>Dan</first><last>Zhang</last><affiliation>Megagon Labs</affiliation></author>
+      <author><first>Dan</first><last>Zhang</last><affiliation>Megagon Labs</affiliation></author>
       <author><first>Sajjadur</first><last>Rahman</last></author>
       <author><first>Estevam</first><last>Hruschka</last><affiliation>Megagon Labs, Megagon Labs and Carnegie Mellon University</affiliation></author>
       <pages>18085-18096</pages>
@@ -19600,7 +19600,7 @@
       <author><first>Alexandra</first><last>Chouldechova</last><affiliation>Microsoft and Carnegie Mellon University</affiliation></author>
       <author><first>Jean</first><last>Garcia-Gathright</last><affiliation>Research, Microsoft</affiliation></author>
       <author><first>Alexandra</first><last>Olteanu</last><affiliation>Research, Microsoft</affiliation></author>
-      <author><first>Hanna</first><last>Wallach</last><affiliation>Microsoft</affiliation></author>
+      <author id="hanna-wallach"><first>Hanna</first><last>Wallach</last><affiliation>Microsoft</affiliation></author>
       <pages>18423-18440</pages>
       <abstract>The NLP research community has made publicly available numerous instruments for measuring representational harms caused by large language model (LLM)-based systems. These instruments have taken the form of datasets, metrics, tools, and more. In this paper, we examine the extent to which such instruments meet the needs of practitioners tasked with evaluating LLM-based systems. Via semi-structured interviews with 12 such practitioners, we find that practitioners are often unable to use publicly available instruments for measuring representational harms. We identify two types of challenges. In some cases, instruments are not useful because they do not meaningfully measure what practitioners seek to measure or are otherwise misaligned with practitioner needs. In other cases, instruments-even useful instruments-are not used by practitioners due to practical and institutional barriers impeding their uptake. Drawing on measurement theory and pragmatic measurement, we provide recommendations for addressing these challenges to better meet practitioner needs.</abstract>
       <url hash="6a2d7e88">2025.findings-acl.947</url>
@@ -19611,7 +19611,7 @@
       <title>Mind the (Belief) Gap: Group Identity in the World of <fixed-case>LLM</fixed-case>s</title>
       <author><first>Angana</first><last>Borah</last></author>
       <author><first>Marwa</first><last>Houalla</last></author>
-      <author><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
       <pages>18441-18463</pages>
       <abstract>Social biases and belief-driven behaviors can significantly impact Large Language Models’ (LLMs’) decisions on several tasks. As LLMs are increasingly used in multi-agent systems for societal simulations, their ability to model fundamental group psychological characteristics remains critical yet under-explored. In this study, we present a multi-agent framework that simulates belief congruence, a classical group psychology theory that plays a crucial role in shaping societal interactions and preferences. Our findings reveal that LLMs exhibit amplified belief congruence compared to humans, across diverse contexts. We further investigate the implications of this behavior on two downstream tasks: (1) misinformation dissemination and (2) LLM learning, finding that belief congruence in LLMs increases misinformation dissemination and impedes learning. To mitigate these negative impacts, we propose strategies inspired by: (1) contact hypothesis, (2) accuracy nudges, and (3) global citizenship framework. Our results show that the best strategies reduce misinformation dissemination by up to (37%) and enhance learning by (11%). Bridging social psychology and AI, our work provides insights to navigate real-world interactions using LLMs while addressing belief-driven biases.</abstract>
       <url hash="5dae51d2">2025.findings-acl.948</url>
@@ -19654,7 +19654,7 @@
     <paper id="951">
       <title>Machine Theory of Mind Needs Machine Validation</title>
       <author><first>Adil</first><last>Soubki</last><affiliation>State University of New York at Stony Brook</affiliation></author>
-      <author><first>Owen</first><last>Rambow</last><affiliation>Stony Brook University</affiliation></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last><affiliation>Stony Brook University</affiliation></author>
       <pages>18495-18505</pages>
       <abstract>In the last couple years, there has been a flood of interest in studying the extent to which language models (LMs) have a theory of mind (ToM) — the ability to ascribe mental states to themselves and others. The results provide an unclear picture of the current state of the art, with some finding near-human performance and others near-zero. To make sense of this landscape, we perform a survey of 16 recent studies aimed at measuring ToM in LMs and find that, while almost all perform checks for human identifiable issues, less than half do so for patterns only a machine might exploit. Among those that do perform such validation, which we call machine validation, none identify LMs to exceed human performance. We conclude that the datasets that show high LM performance on ToM tasks are easier than their peers, likely due to the presence of spurious patterns in the data, and we caution against building ToM benchmarks relying solely on human validation of the data.</abstract>
       <url hash="e910efd2">2025.findings-acl.951</url>
@@ -19691,7 +19691,7 @@
       <author><first>Oliver</first><last>Eberle</last><affiliation>Technische Universität Berlin</affiliation></author>
       <author><first>Phillip</first><last>Rust</last></author>
       <author><first>Carina</first><last>Kauf</last></author>
-      <author><first>Anders</first><last>Søgaard</last><affiliation>Copenhagen University</affiliation></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last><affiliation>Copenhagen University</affiliation></author>
       <pages>18542-18561</pages>
       <abstract>Detecting ambiguity is important for language understanding, including uncertainty estimation, humour detection, and processing garden path sentences. We assess language models’ sensitivity to ambiguity by introducing an adversarial ambiguity dataset that includes syntactic, lexical, and phonological ambiguities along with adversarial variations (e.g., word-order changes, synonym replacements, and random-based alterations). Our findings show that direct prompting fails to robustly identify ambiguity, while linear probes trained on model representations can decode ambiguity with high accuracy, sometimes exceeding 90%. Our results offer insights into the prompting paradigm and how language models encode ambiguity at different layers.</abstract>
       <url hash="29605982">2025.findings-acl.954</url>
@@ -19702,7 +19702,7 @@
       <title>Biases Propagate in Encoder-based Vision-Language Models: A Systematic Analysis From Intrinsic Measures to Zero-shot Retrieval Outcomes</title>
       <author><first>Kshitish</first><last>Ghate</last></author>
       <author><first>Tessa</first><last>Charlesworth</last><affiliation>Kellogg Community College</affiliation></author>
-      <author><first>Mona T.</first><last>Diab</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="mona-diab"><first>Mona T.</first><last>Diab</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Aylin</first><last>Caliskan</last><affiliation>University of Washington</affiliation></author>
       <pages>18562-18580</pages>
       <abstract>To build fair AI systems we need to understand how social-group biases intrinsic to foundational encoder-based vision-language models (VLMs) manifest in biases in downstream tasks. In this study, we demonstrate that intrinsic biases in VLM representations systematically “carry over” or propagate into zero-shot retrieval tasks, revealing how deeply rooted biases shape a model’s outputs. We introduce a controlled framework to measure this propagation by correlating (a) intrinsic measures of bias in the representational space with (b) extrinsic measures of bias in zero-shot text-to-image (TTI) and image-to-text (ITT) retrieval. Results show substantial correlations between intrinsic and extrinsic bias, with an average <tex-math>\rho</tex-math> = 0.83 <tex-math>\pm</tex-math> 0.10. This pattern is consistent across 114 analyses, both retrieval directions, six social groups, and three distinct VLMs. Notably, we find that larger/better-performing models exhibit greater bias propagation, a finding that raises concerns given the trend towards increasingly complex AI models. Our framework introduces baseline evaluation tasks to measure the propagation of group and valence signals. Investigations reveal that underrepresented groups experience less robust propagation, further skewing their model-related outcomes.</abstract>
@@ -19720,7 +19720,7 @@
       <author><first>Zhenwei</first><last>Dai</last><affiliation>Amazon</affiliation></author>
       <author><first>Yan</first><last>Han</last><affiliation>Amazon</affiliation></author>
       <author><first>Chen</first><last>Luo</last><affiliation>Amazon</affiliation></author>
-      <author id="jing-huang"><first>Jing</first><last>Huang</last><affiliation>Amazon</affiliation></author>
+      <author><first>Jing</first><last>Huang</last><affiliation>Amazon</affiliation></author>
       <author><first>Zhen</first><last>Li</last><affiliation>Amazon</affiliation></author>
       <author><first>Suhang</first><last>Wang</last><affiliation>Pennsylvania State University</affiliation></author>
       <author><first>Yue</first><last>Xing</last><affiliation>Michigan State University</affiliation></author>
@@ -19768,7 +19768,7 @@
       <author><first>Matthew</first><last>Jagielski</last><affiliation>Google</affiliation></author>
       <author><first>Katherine</first><last>Lee</last><affiliation>Google</affiliation></author>
       <author><first>Niloofar</first><last>Mireshghallah</last></author>
-      <author><first>David A.</first><last>Smith</last><affiliation>Northeastern University</affiliation></author>
+      <author id="david-a-smith"><first>David A.</first><last>Smith</last><affiliation>Northeastern University</affiliation></author>
       <author><first>Christopher A.</first><last>Choquette-Choo</last><affiliation>Google DeepMind</affiliation></author>
       <pages>18703-18726</pages>
       <abstract>Due to the sensitive nature of personally identifiable information (PII), its owners may have the authority to control its inclusion or request its removal from large-language model (LLM) training. Beyond this, PII may be added or removed from training datasets due to evolving dataset curation techniques, because they were newly scraped for retraining, or because they were included in a new downstream fine-tuning stage. We find that the amount and ease of PII memorization is a dynamic property of a model that evolves throughout training pipelines and depends on commonly altered design choices. We characterize three such novel phenomena: (1) similar-appearing PII seen later in training can elicit memorization of earlier-seen sequences in what we call assisted memorization, and this is a significant factor (in our settings, up to 1/3); (2) adding PII can increase memorization of other PII; and (3) removing PII can lead to other PII being memorized.</abstract>
@@ -19871,7 +19871,7 @@
       <author><first>Traci</first><last>Hong</last><affiliation>Boston University, Boston University</affiliation></author>
       <author><first>Ika Karlina</first><last>Idris</last><affiliation>Monash University</affiliation></author>
       <author><first>Alham Fikri</first><last>Aji</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
-      <author><first>Derry Tanti</first><last>Wijaya</last><affiliation>Monash University and Boston University</affiliation></author>
+      <author id="derry-tanti-wijaya"><first>Derry Tanti</first><last>Wijaya</last><affiliation>Monash University and Boston University</affiliation></author>
       <pages>18863-18890</pages>
       <abstract>Online discourse is increasingly trapped in a vicious cycle where polarizing language fuelstoxicity and vice versa. Identity, one of the most divisive issues in modern politics, oftenincreases polarization. Yet, prior NLP research has mostly treated toxicity and polarization asseparate problems. In Indonesia, the world’s third-largest democracy, this dynamic threatens democratic discourse, particularly in online spaces. We argue that polarization and toxicity must be studied in relation to each other. To this end, we present a novel multi-label Indonesian dataset annotated for toxicity, polarization, and annotator demographic information. Benchmarking with BERT-base models and large language models (LLMs) reveals that polarization cues improve toxicity classification and vice versa. Including demographic context further enhances polarization classification performance.</abstract>
       <url hash="f2bf027d">2025.findings-acl.966</url>
@@ -19938,10 +19938,10 @@
       <author><first>Nikita</first><last>Soni</last></author>
       <author><first>Syeda</first><last>Mahwish</last></author>
       <author><first>Pranav</first><last>Chitale</last></author>
-      <author><first>Ryan L.</first><last>Boyd</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="ryan-boyd"><first>Ryan L.</first><last>Boyd</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <author><first>Richard N.</first><last>Rosenthal</last></author>
-      <author><first>H. Andrew</first><last>Schwartz</last></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last></author>
       <pages>18955-18973</pages>
       <abstract>Large Language Models (LLMs) are increasingly used in human-centered applications, yet their ability to model diverse psychological constructs is not well understood. In this study, we systematically evaluate a range of Transformer-LMs to predict psychological variables across five major dimensions: affect, substance use, mental health, sociodemographics, and personality. Analyses span three temporal levels—short daily text responses about current affect, text aggregated over two-weeks, and user-level text collected over two years—allowing us to examine how each model’s strengths align with the underlying stability of different constructs. The findings show that mental health signals emerge as the most accurately predicted dimensions (r=0.6) across all temporal scales. At the daily scale, smaller models like DeBERTa and HaRT often performed better, whereas, at longer scales or with greater context, larger model like Llama3-8B performed the best. Also, aggregating text over the entire study period yielded stronger correlations for outcomes, such as age and income. Overall, these results suggest the importance of selecting appropriate model architectures and temporal aggregation techniques based on the stability and nature of the target variable.</abstract>
       <url hash="ae86cf22">2025.findings-acl.971</url>
@@ -20015,7 +20015,7 @@
       <author><first>Kelechi</first><last>Ogueji</last><affiliation>ServiceNow Inc</affiliation></author>
       <author><first>Jimmy</first><last>Lin</last><affiliation>University of Waterloo</affiliation></author>
       <author><first>Pontus</first><last>Stenetorp</last><affiliation>University College London</affiliation></author>
-      <author><first>David Ifeoluwa</first><last>Adelani</last><affiliation>McGill University</affiliation></author>
+      <author id="david-ifeoluwa-adelani"><first>David Ifeoluwa</first><last>Adelani</last><affiliation>McGill University</affiliation></author>
       <pages>19048-19095</pages>
       <abstract>Large-scale multilingual evaluations, such as MEGA, often include only a handful of African languages due to the scarcity of high-qualityevaluation data and the limited discoverability of existing African datasets. This lack of representation hinders comprehensive LLM evaluation across a diverse range of languages and tasks. To address these challenges, we introduce AFROBENCH—a multi-task benchmark for evaluating the performance of LLMs across 64 African languages, 15 tasks and 22 datasets. AFROBENCH consists of nine natural language understanding datasets, six text generation datasets, six knowledge and question answering tasks, and one mathematical reasoning task. We present results comparing the performance of prompting LLMs to fine-tuned baselines based on BERT and T5-style models. Our results suggest large gaps in performance between high-resource languages, such as English, and African languages across most tasks; but performance also varies based on the availability of monolingual data resources. Our findings confirm that performance on African languages continues to remain a hurdle for current LLMs, underscoring the need for additional efforts to close this gap.</abstract>
       <url hash="d7073e0b">2025.findings-acl.976</url>
@@ -20048,9 +20048,9 @@
       <author><first>Shadikur</first><last>Rahman</last></author>
       <author><first>Mehrad</first><last>Shahmohammadi</last></author>
       <author><first>Megh</first><last>Thakkar</last></author>
-      <author><first>Md Rizwan</first><last>Parvez</last><affiliation>Qatar Computing Research Institute</affiliation></author>
+      <author id="md-rizwan-parvez"><first>Md Rizwan</first><last>Parvez</last><affiliation>Qatar Computing Research Institute</affiliation></author>
       <author><first>Enamul</first><last>Hoque</last><affiliation>York University</affiliation></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University and SalesForce.com</affiliation></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University and SalesForce.com</affiliation></author>
       <pages>19123-19151</pages>
       <abstract>Charts are ubiquitous, as people often use them to analyze data, answer questions, and discover critical insights. However, performing complex analytical tasks with charts requires significant perceptual and cognitive effort. Chart Question Answering (CQA) systems automate this process by enabling models to interpret and reason with visual representations of data. However, existing benchmarks like ChartQA lack real-world diversity and have recently shown performance saturation with modern large vision-language models (LVLMs). To address these limitations, we introduce ChartQAPro, a new benchmark that includes 1,341 charts from 99 diverse sources, spanning various chart types—including infographics and dashboards—and featuring 1,948 questions in various types, such as multiple-choice, conversational, hypothetical, and unanswerable questions, to better reflect real-world challenges. Our evaluations with 21 models show a substantial performance drop for LVLMs on ChartQAPro; e.g., Claude Sonnet 3.5 scores 90.5% on ChartQA but only 55.81% on ChartQAPro, underscoring the complexity of chart reasoning. We complement our findings with detailed error analyses and ablation studies, identifying key challenges and opportunities for advancing LVLMs in chart understanding and reasoning. We release ChartQAPro at https://github.com/vis-nlp/ChartQAPro.</abstract>
       <url hash="8e71994c">2025.findings-acl.978</url>
@@ -20163,7 +20163,7 @@
       <author><first>Linyang</first><last>He</last></author>
       <author><first>Ercong</first><last>Nie</last></author>
       <author><first>Helmut</first><last>Schmid</last><affiliation>Center for Information and Language Processing</affiliation></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <author><first>Nima</first><last>Mesgarani</last><affiliation>Columbia University</affiliation></author>
       <author><first>Jonathan</first><last>Brennan</last><affiliation>University of Michigan - Ann Arbor</affiliation></author>
       <pages>19284-19302</pages>
@@ -20252,9 +20252,9 @@
     <paper id="993">
       <title><fixed-case>F</fixed-case>a<fixed-case>V</fixed-case>e: Factored and Verified Search Rationale for Long-form Answer</title>
       <author><first>Jihyuk</first><last>Kim</last><affiliation>LG Corporation</affiliation></author>
-      <author><first>Sungjin</first><last>Lee</last><affiliation>Amazon</affiliation></author>
+      <author id="sungjin-lee"><first>Sungjin</first><last>Lee</last><affiliation>Amazon</affiliation></author>
       <author><first>Seung-won</first><last>Hwang</last><affiliation>Seoul National University</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>Amazon</affiliation></author>
+      <author><first>Yang</first><last>Liu</last><affiliation>Amazon</affiliation></author>
       <pages>19402-19416</pages>
       <abstract>Targeting long-form question-answering, chain-of-query (CoQ) has been studied, integrating chain-of-thought (CoT) with retrieval-augmented generation. CoQ answers the complex question step-by-step, through simpler subquestions (SQs) from which relevant knowledge is retrieved. By doing so, CoQ aims to improve the answer comprehensiveness and verifiability, at the expense of latency. Our first contribution is showing that the chaining often incurs harmful effects on both objectives, and SQs left unverified often fail to answer the given question. Second, we propose a better alternative to CoQ, union-of-query which adopts a factored approach to break the harmful chain. Finally, we propose to verify SQs before answers, by fine-tuning the SQ generator using verified SQs and introducing a selector verifying SQs in test time. Employing vicuna-13b, our approach, denoted by FaVe (short for Factored and Verified search), even outperforms ChatGPT baselines while maintaining efficiency.</abstract>
       <url hash="6cd711cb">2025.findings-acl.993</url>
@@ -20519,7 +20519,7 @@
       <author><first>Li</first><last>Zeng</last></author>
       <author><first>Zeming</first><last>Liu</last></author>
       <author><first>Chong</first><last>Feng</last></author>
-      <author><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <author><first>Yuhang</first><last>Guo</last></author>
       <pages>19725-19743</pages>
       <abstract>Model editing aims to correct errors and outdated knowledge in the Large language models (LLMs) with minimal cost. Prior research has proposed a variety of datasets to assess the effectiveness of these model editing methods. However, most existing datasets only require models to output short phrases or sentences, overlooks the widespread existence of document level tasks in the real world, raising doubts about their practical usability. Aimed at addressing this limitation and promoting the application of model editing in real-world scenarios, we propose the task of document-level model editing. To tackle such challenges and enhance model capabilities in practical settings, we introduce DocMEdit, a dataset focused on document-level model editing, characterized by document-level inputs and outputs, extrapolative, and multiple facts within a single edit. We propose a series of evaluation metrics and experiments. The results show that the difficulties in document-level model editing pose challenges for existing model editing methods.</abstract>
@@ -20619,7 +20619,7 @@
     </paper>
     <paper id="1018">
       <title>Core: Robust Factual Precision with Informative Sub-Claim Identification</title>
-      <author><first>Zhengping</first><last>Jiang</last><affiliation>Johns Hopkins University</affiliation></author>
+      <author id="zheng-ping-jiang"><first>Zhengping</first><last>Jiang</last><affiliation>Johns Hopkins University</affiliation></author>
       <author><first>Jingyu</first><last>Zhang</last><affiliation>Johns Hopkins University</affiliation></author>
       <author><first>Nathaniel</first><last>Weir</last><affiliation>Amazon</affiliation></author>
       <author><first>Seth</first><last>Ebner</last><affiliation>Kensho</affiliation></author>
@@ -20761,7 +20761,7 @@
       <author><first>Hengrui</first><last>Zhang</last></author>
       <author><first>Henry Peng</first><last>Zou</last><affiliation>University of Illinois at Chicago</affiliation></author>
       <author><first>Weizhi</first><last>Zhang</last><affiliation>Amazon and University of Illinois Chicago</affiliation></author>
-      <author><first>Philip S.</first><last>Yu</last><affiliation>University of Illinois Chicago</affiliation></author>
+      <author id="philip-s-yu"><first>Philip S.</first><last>Yu</last><affiliation>University of Illinois Chicago</affiliation></author>
       <pages>20027-20041</pages>
       <abstract>Large Language models (LLMs) have achieved encouraging results in tabular data generation. However, existing approaches require fine-tuning, which is computationally expensive. This paper explores an alternative: prompting a fixed LLM with in-context examples. We observe that using randomly selected in-context examples hampers the LLM’s performance, resulting in sub-optimal generation quality. To address this, we propose a novel in-context learning framework: TabGen-ICL, to enhance the in-context learning ability of LLMs for tabular data generation. TabGen-ICL operates iteratively, retrieving a subset of real samples that represent the residual between currently generated samples and true data distributions. This approach serves two purposes: locally, it provides more effective in-context learning examples for the LLM in each iteration; globally, it progressively narrows the gap between generated and real data. Extensive experiments on five real-world tabular datasets demonstrate that TabGen-ICL significantly outperforms the random selection strategy. Specifically, it reduces the error rate by a margin of up to 42.2% on the fidelity metric. We demonstrate for the first time that prompting a fixed LLM can yield high-quality synthetic tabular data.</abstract>
       <url hash="ecc76599">2025.findings-acl.1027</url>
@@ -20831,13 +20831,13 @@
       <author><first>Jon</first><last>Cai</last></author>
       <author><first>Brendan</first><last>King</last><affiliation>University of California, Santa Cruz</affiliation></author>
       <author><first>Peyton</first><last>Cameron</last></author>
-      <author><first>Susan Windisch</first><last>Brown</last><affiliation>University of Colorado at Boulder</affiliation></author>
+      <author id="susan-windisch-brown"><first>Susan Windisch</first><last>Brown</last><affiliation>University of Colorado at Boulder</affiliation></author>
       <author><first>Miriam</first><last>Eckert</last></author>
       <author><first>Dananjay</first><last>Srinivas</last><affiliation>University of Colorado at Boulder</affiliation></author>
-      <author><first>George Arthur</first><last>Baker</last><affiliation>University of Utah and University of Colorado Boulder</affiliation></author>
+      <author id="george-baker"><first>George Arthur</first><last>Baker</last><affiliation>University of Utah and University of Colorado Boulder</affiliation></author>
       <author><first>V Kate</first><last>Everson</last><affiliation>University of Colorado at Boulder</affiliation></author>
-      <author><first>Martha</first><last>Palmer</last><affiliation>University of Colorado at Boulder</affiliation></author>
-      <author><first>James</first><last>Martin</last><affiliation>University of Colorado at Boulder</affiliation></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last><affiliation>University of Colorado at Boulder</affiliation></author>
+      <author id="james-h-martin"><first>James</first><last>Martin</last><affiliation>University of Colorado at Boulder</affiliation></author>
       <author><first>Jeffrey</first><last>Flanigan</last><affiliation>University of California, Santa Cruz</affiliation></author>
       <pages>20135-20149</pages>
       <abstract>Understanding the structure of multi-party conversation and the intentions and dialogue acts of each speaker remains a significant challenge in NLP. While a number of corpora annotated using theoretical frameworks of dialogue have been proposed, these typically focus on either utterance-level labeling of speaker intent, missing wider context, or the rhetorical structure of a dialogue, losing fine-grained intents captured in dialogue acts. Recently, the Dependency Dialogue Acts (DDA) framework has been proposed to for modeling both the fine-grained intents of each speaker and the structure of multi-party dialogues. However, there is not yet a corpus annotated with this framework available for the community to study. To address this gap, we introduce a new corpus of 33 dialogues and over 9,000 utterance units, densely annotated using the Dependency Dialogue Acts (DDA) framework.Our dataset spans four genres of multi-party conversations from different modalities: (1) physics classroom discussions, (2) engineering classroom discussions, (3) board game interactions, and (4) written online game chat logs. Each session is doubly annotated and adjudicated to ensure high-quality labeling. We present a description of the dataset and annotation process, an analysis of speaker dynamics enabled by our annotation, and a baseline evaluation of LLMs as DDA parsers. We discuss the implications of this dataset understanding dynamics between speakers and for developing more controllable dialogue agents.</abstract>
@@ -20926,7 +20926,7 @@
       <author><first>Qiunan</first><last>Du</last><affiliation>National University of Defense Technology</affiliation></author>
       <author><first>Xinwang</first><last>Liu</last><affiliation>National University of Defense Technology</affiliation></author>
       <author><first>Minlie</first><last>Huang</last></author>
-      <author id="dongsheng-li"><first>Dongsheng</first><last>Li</last><affiliation>National University of Defense Technology</affiliation></author>
+      <author><first>Dongsheng</first><last>Li</last><affiliation>National University of Defense Technology</affiliation></author>
       <pages>20243-20255</pages>
       <abstract>LLMs face privacy risks when handling sensitive data. To ensure privacy, researchers use differential privacy (DP) to provide protection by adding noise during LLM training. However, users may be hesitant to share complete data with LLMs. Researchers follow local DP to sanitize the text on the user side and feed non-sensitive text to LLMs. The sanitization usually uses a fixed non-sensitive token list or a fixed noise distribution, which induces the risk of being attacked or semantic distortion. We argue that the token’s protection level should be adaptively adjusted according to its semantic-based information to balance the privacy-utility trade-off. In this paper, we propose DYNTEXT, an LDP-based Dynamic Text sanitization for privacy-preserving LLM inference, which dynamically constructs semantic-aware adjacency lists of sensitive tokens to sample non-sensitive tokens for perturbation. Specifically, DYNTEXT first develops a semantic-based density modeling under DP to extract each token’s density information. We propose token-level smoothing sensitivity by combining the idea of global sensitivity (GS) and local sensitivity (LS), which dynamically adjusts the noise scale to avoid excessive noise in GS and privacy leakage in LS. Then, we dynamically construct an adjacency list for each sensitive token based on its semantic density information. Finally, we apply the replacement mechanism to sample non-sensitive, semantically similar tokens from the adjacency list to replace sensitive tokens. Experiments show that DYNTEXT excels strong baselines on three datasets.</abstract>
       <url hash="b39a86ec">2025.findings-acl.1038</url>
@@ -20994,7 +20994,7 @@
       <author><first>Dingyu</first><last>Yao</last></author>
       <author><first>Bowen</first><last>Shen</last><affiliation>University of the Chinese Academy of Sciences</affiliation></author>
       <author><first>Zheng</first><last>Lin</last><affiliation>Institute of Information Engineering, Chinese Academy of Sciences</affiliation></author>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last></author>
+      <author><first>Wei</first><last>Liu</last></author>
       <author><first>Jian</first><last>Luan</last><affiliation>Xiaomi Corporation</affiliation></author>
       <author><first>Bin</first><last>Wang</last><affiliation>AI Lab, Xiaomi Inc.</affiliation></author>
       <author><first>Weiping</first><last>Wang</last><affiliation>IIE</affiliation></author>
@@ -21440,7 +21440,7 @@
     <paper id="1075">
       <title><fixed-case>PM</fixed-case>3-<fixed-case>KIE</fixed-case>: A Probabilistic Multi-Task Meta-Model for Document Key Information Extraction</title>
       <author><first>Birgit</first><last>Kirsch</last><affiliation>Fraunhofer Institute IAIS, Fraunhofer IAIS</affiliation></author>
-      <author><first>Héctor</first><last>Allende-Cid</last><affiliation>Fraunhofer Institute IAIS, Fraunhofer IAIS</affiliation></author>
+      <author id="hector-allende-cid"><first>Héctor</first><last>Allende-Cid</last><affiliation>Fraunhofer Institute IAIS, Fraunhofer IAIS</affiliation></author>
       <author><first>Stefan</first><last>Rueping</last></author>
       <pages>20890-20912</pages>
       <abstract>Key Information Extraction (KIE) from visually rich documents is commonly approached as either fine-grained token classification or coarse-grained entity extraction. While token-level models capture spatial and visual cues, entity-level models better represent logical dependencies and align with real-world use cases.We introduce PM3-KIE, a probabilistic multi-task meta-model that incorporates both fine-grained and coarse-grained models. It serves as a lightweight reasoning layer that jointly predicts entities and all appearances in a document. PM3-KIE incorporates domain-specific schema constraints to enforce logical consistency and integrates large language models for semantic validation, thereby reducing extraction errors.Experiments on two public datasets, DeepForm and FARA, show that PM3-KIE outperforms three state-of-the-art models and a stacked ensemble, achieving a statistically significant 2% improvement in F1 score.</abstract>
@@ -21453,7 +21453,7 @@
       <author><first>Ahmed</first><last>Lekssays</last><affiliation>Hamad Bin Khalifa University</affiliation></author>
       <author><first>Utsav</first><last>Shukla</last></author>
       <author><first>Husrev Taha</first><last>Sencar</last><affiliation>QCRI</affiliation></author>
-      <author><first>Md Rizwan</first><last>Parvez</last><affiliation>Qatar Computing Research Institute</affiliation></author>
+      <author id="md-rizwan-parvez"><first>Md Rizwan</first><last>Parvez</last><affiliation>Qatar Computing Research Institute</affiliation></author>
       <pages>20913-20926</pages>
       <abstract>Accurately identifying adversarial techniques in security texts is critical for effective cyber defense. However, existing methods face a fundamental trade-off: they either rely on generic models with limited domain precision or require resource-intensive pipelines that depend on large labeled datasets and task-specific optimizations—such as custom hard-negative mining and denoising—resources rarely available in specialized domains.We propose TechniqueRAG, a domain-specific retrieval-augmented generation (RAG) framework that bridges this gap by integrating off-the-shelf retrievers, instruction-tuned LLMs, and minimal text–technique pairs. Our approach addresses data scarcity by fine-tuning only the generation component on limited in-domain examples, circumventing the need for resource-intensive retrieval training. While conventional RAG mitigates hallucination by coupling retrieval and generation, its reliance on generic retrievers often introduces noisy candidates, limiting domain-specific precision. To address this, we enhance retrieval quality and domain specificity through zero-shot LLM re-ranking, which explicitly aligns retrieved candidates with adversarial techniques.Experiments on multiple security benchmarks demonstrate that TechniqueRAG achieves state-of-the-art performance without extensive task-specific optimizations or labeled data, while comprehensive analysis provides further insights.</abstract>
       <url hash="01d937a1">2025.findings-acl.1076</url>
@@ -21466,8 +21466,8 @@
       <author><first>Zixuan</first><last>Li</last><affiliation>Institute of Computing Technology, Chinese Academy of Sciences</affiliation></author>
       <author><first>Xiaolong</first><last>Jin</last><affiliation>Institute of Computing Technology, Chinese Academy of Sciences</affiliation></author>
       <author><first>Jiafeng</first><last>Guo</last><affiliation>Institute of Computing Technolgy, Chinese Academy of Sciences</affiliation></author>
-      <author><first>Xueqi</first><last>Cheng</last><affiliation>Institute of Computing Technology, Chinese Academy</affiliation></author>
-      <author><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last><affiliation>Institute of Computing Technology, Chinese Academy</affiliation></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
       <pages>20927-20938</pages>
       <abstract>Forecasting over Temporal Knowledge Graphs (TKGs) which predicts future facts based on historical ones has received much attention. Recent studies have introduced Large Language Models (LLMs) for this task to enhance the models’ generalization abilities. However, these models perform forecasting via simultaneously learning two kinds of entangled knowledge in the TKG: (1) general patterns, i.e., invariant temporal structures shared across different scenarios; and (2) scenario information, i.e., factual knowledge engaged in specific scenario, such as entities and relations. As a result, the learning processes of these two kinds of knowledge may interfere with each other, which potentially impact the generalization abilities of the models. To enhance the generalization ability of LLMs on this task, in this paper, we propose a General-to-Specific learning framework (G2S) that disentangles the learning processes of the above two kinds of knowledge. In the general learning stage, we mask the scenario information in different TKGs and convert it into anonymous temporal structures. After training on these structures, the model is able to capture the general patterns across different TKGs. In the specific learning stage, we inject the scenario information into the structures via either in-context learning or fine-tuning modes. Experimental results show that G2S effectively improves the generalization abilities of LLMs.</abstract>
       <url hash="6c6045db">2025.findings-acl.1077</url>
@@ -21550,7 +21550,7 @@
       <author><first>Junru</first><last>Wu</last></author>
       <author><first>Tianhao</first><last>Shen</last></author>
       <author><first>Linxi</first><last>Su</last></author>
-      <author><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
       <pages>21031-21050</pages>
       <abstract>Large language models (LLMs) have achieved remarkable progress in autonomous reasoning, evolving from basic text processing to sophisticated multimodal reasoning, a critical capability for general-purpose AI assistants. However, existing benchmarks usually fail to adequately capture the intricate multi-step reasoning demands inherent in real-world scenarios. To bridge this gap, we propose **C²RBench**: a **C**hinese **C**omplex **R**easoning **Bench**mark for evaluating multi-step, multimodal advanced reasoning capability of LLMs. C²RBench comprises 1,115 carefully curated Chinese tasks, which are organized into eight domain-specific subsets, each meticulously designed to mirror real-world challenges. This hierarchical benchmark features three difficulty tiers based on the number of reasoning steps required (average 8.44 steps per task), significantly exceeding existing benchmarks in cognitive complexity. Extensive evaluations of 20 LLMs (including DeepSeek-R1) and 24 multimodal large language models (MLLMs) on C²RBench reveal critical performance gaps: GPT-4.1 achieves only 52.11% accuracy, indicating substantial room for improvement. The dataset and evaluation code are publicly available.</abstract>
       <url hash="84756a18">2025.findings-acl.1083</url>
@@ -21600,7 +21600,7 @@
       <author><first>Amin</first><last>Abolghasemi</last></author>
       <author><first>Leif</first><last>Azzopardi</last><affiliation>University of Strathclyde</affiliation></author>
       <author><first>Seyyed Hadi</first><last>Hashemi</last><affiliation>eBay Inc.</affiliation></author>
-      <author><first>Maarten</first><last>de Rijke</last></author>
+      <author id="maarten-de-rijke"><first>Maarten</first><last>de Rijke</last></author>
       <author><first>Suzan</first><last>Verberne</last><affiliation>Universiteit Leiden</affiliation></author>
       <pages>21105-21124</pages>
       <abstract>Attributing answers to source documents is an approach used to enhance the verifiability of a model’s output in retrieval-augmented generation (RAG). Prior work has mainly focused on improving and evaluating the attribution quality of large language models (LLMs) in RAG, but this may come at the expense of inducing biases in the attribution of answers. We define and examine two aspects in the evaluation of LLMs in RAG pipelines, namely attribution sensitivity and bias with respect to authorship information. We explicitly inform an LLM about the authors of source documents, instruct it to attribute its answers, and analyze (i) how sensitive the LLM’s output is to the author of source documents, and (ii) whether the LLM exhibits a bias towards human-written or AI-generated source documents. We design an experimental setup in which we use counterfactual evaluation to study three LLMs in terms of their attribution sensitivity and bias in RAG pipelines. Our results show that adding authorship information to source documents can significantly change the attribution quality of LLMs by 3 to 18%. We show that LLMs can have an attribution bias towards explicit human authorship, which can serve as a competing hypothesis for findings of prior work that shows that LLM-generated content may be preferred over human-written contents. Our findings indicate that metadata of source documents can influence LLMs’ trust, and how they attribute their answers. Furthermore, our research highlights attribution bias and sensitivity as a novel aspect of the vulnerability of LLMs.</abstract>
@@ -21613,7 +21613,7 @@
       <author><first>Wen</first><last>Yang</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
       <author><first>Junhong</first><last>Wu</last><affiliation>University of Chinese Academy of Sciences</affiliation></author>
       <author><first>Chen</first><last>Wang</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
-      <author><first>Chengqing</first><last>Zong</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
       <author><first>Jiajun</first><last>Zhang</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
       <pages>21125-21147</pages>
       <abstract>Direct Preference Optimization (DPO) has become a prominent method for aligning Large Language Models (LLMs) with human preferences. While DPO has enabled significant progress in aligning English LLMs, multilingual preference alignment is hampered by data scarcity. To address this, we propose a novel approach that <tex-math>\textit{captures}</tex-math> learned preferences from well-aligned English models by implicit rewards and <tex-math>\textit{transfers}</tex-math> them to other languages through iterative training. Specifically, we derive an implicit reward model from the logits of an English DPO-aligned model and its corresponding reference model. This reward model is then leveraged to annotate preference relations in cross-lingual instruction-following pairs, using English instructions to evaluate multilingual responses. The annotated data is subsequently used for multilingual DPO fine-tuning, facilitating preference knowledge transfer from English to other languages. Fine-tuning Llama3 for two iterations resulted in a 12.72% average improvement in Win Rate and a 5.97% increase in Length Control Win Rate across all training languages on the X-AlpacaEval leaderboard. Our findings demonstrate that leveraging existing English-aligned models can enable efficient and effective multilingual preference alignment, significantly reducing the need for extensive multilingual preference data.</abstract>
@@ -21709,7 +21709,7 @@
       <author><first>Wenjie</first><last>Wang</last><affiliation>University of Science and Technology of China</affiliation></author>
       <author><first>Hong</first><last>Cheng</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <author><first>Fuli</first><last>Feng</last><affiliation>University of Science and Technology of China</affiliation></author>
-      <author><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
       <pages>21258-21277</pages>
       <abstract>Personalizing Large Language Models (LLMs) has become a critical step in facilitating their widespread application to enhance individual life experiences. In pursuit of personalization, distilling key preference information from an individual’s historical data as instructional preference context to customize LLM generation has emerged as a promising direction. However, these methods face a fundamental limitation by overlooking the inter-user comparative analysis, which is essential for identifying the inter-user differences that truly shape preferences. To address this limitation, we propose Difference-aware Personalization Learning (DPL), a novel approach that emphasizes extracting inter-user differences to enhance LLM personalization. DPL strategically selects representative users for comparison and establishes a structured standard to extract meaningful, task-relevant differences for customizing LLM generation. Extensive experiments on real-world datasets demonstrate that DPL significantly enhances LLM personalization. We release our code at <url>https://github.com/SnowCharmQ/DPL</url>.</abstract>
       <url hash="e43f82d5">2025.findings-acl.1095</url>
@@ -21821,7 +21821,7 @@
       <author><first>Yihao</first><last>Ding</last><affiliation>University of Melbourne</affiliation></author>
       <author><first>Gongbo</first><last>Zhang</last><affiliation>Columbia University</affiliation></author>
       <author><first>Chunhua</first><last>Weng</last><affiliation>Columbia University</affiliation></author>
-      <author><first>Yifan</first><last>Peng</last><affiliation>Weill Cornell Medicine, Cornell University</affiliation></author>
+      <author id="yifan-peng-cmu"><first>Yifan</first><last>Peng</last><affiliation>Weill Cornell Medicine, Cornell University</affiliation></author>
       <pages>21421-21443</pages>
       <abstract>Evidence-based medicine (EBM) is at the forefront of modern healthcare, emphasizing the use of the best available scientific evidence to guide clinical decisions. Due to the sheer volume and rapid growth of medical literature and the high cost of curation, there is a critical need to investigate Natural Language Processing (NLP) methods to identify, appraise, synthesize, summarize, and disseminate evidence in EBM. This survey presents an in-depth review of 129 research studies on leveraging NLP for EBM, illustrating its pivotal role in enhancing clinical decision-making processes. The paper systematically explores how NLP supports the five fundamental steps of EBM—Ask, Acquire, Appraise, Apply, and Assess. The review not only identifies current limitations within the field but also proposes directions for future research, emphasizing the potential for NLP to revolutionize EBM by refining evidence extraction, evidence synthesis, appraisal, summarization, enhancing data comprehensibility, and facilitating a more efficient clinical workflow.</abstract>
       <url hash="f7aeb547">2025.findings-acl.1103</url>
@@ -21861,7 +21861,7 @@
       <author><first>Zejiang</first><last>He</last></author>
       <author><first>Liu</first><last>Liu</last><affiliation>Suqian University</affiliation></author>
       <author><first>Zhigang</first><last>Sun</last><affiliation>National University of Defense Technology</affiliation></author>
-      <author id="dongsheng-li"><first>Dongsheng</first><last>Li</last><affiliation>National University of Defense Technology</affiliation></author>
+      <author><first>Dongsheng</first><last>Li</last><affiliation>National University of Defense Technology</affiliation></author>
       <pages>21475-21487</pages>
       <abstract>With the emergence of new topics on social media as sources of rumor dissemination, addressing the distribution shifts between source and target domains remains a crucial task in cross-domain rumor detection. Existing feature alignment methods, which aim to reduce the discrepancies between domains, are often susceptible to task interference during training. Additionally, data distribution alignment methods, which rely on existing data to synthesize new training samples, inherently introduce noise. To deal with these challenges, a new cross-domain rumor detection method, MONTROSE, is proposed. It combines LLM-driven Monte Carlo Tree Search (MCTS) data synthesis to generate high-quality synthetic data for the target domain and a domain-sharpness-aware (DSAM) self-refinement approach to train rumor detection models with these synthetic data effectively. Experiments demonstrate the superior performance of MONTROSE in cross-domain rumor detection.</abstract>
       <url hash="84e6972b">2025.findings-acl.1106</url>
@@ -21885,12 +21885,12 @@
     <paper id="1108">
       <title>A Comprehensive Graph Framework for Question Answering with Mode-Seeking Preference Alignment</title>
       <author><first>Quanwei</first><last>Tang</last></author>
-      <author><first>Sophia Yat Mei</first><last>Lee</last><affiliation>Hong Kong Polytechnic University</affiliation></author>
+      <author id="sophia-yat-mei-lee"><first>Sophia Yat Mei</first><last>Lee</last><affiliation>Hong Kong Polytechnic University</affiliation></author>
       <author><first>Junshuang</first><last>Wu</last></author>
       <author><first>Dong</first><last>Zhang</last></author>
       <author><first>Shoushan</first><last>Li</last><affiliation>Soochow University</affiliation></author>
       <author><first>Erik</first><last>Cambria</last><affiliation>Nanyang Technological University</affiliation></author>
-      <author><first>Guodong</first><last>Zhou</last><affiliation>Soochow University, China</affiliation></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last><affiliation>Soochow University, China</affiliation></author>
       <pages>21504-21523</pages>
       <abstract>Recent advancements in retrieval-augmented generation (RAG) have enhanced large language models in question answering by integrating external knowledge. However, challenges persist in achieving global understanding and aligning responses with human ethical and quality preferences. To address these issues, we propose GraphMPA, a comprehensive graph-based framework with mode-seeking preference alignment. Our approach constructs a hierarchical document graph using a general similarity measurement, mimicking human cognitive processes for information understanding and synthesis. Additionally, we introduce mode-seeking preference optimization to better align model outputs with human preferences through probability-matching constraints. Extensive experiments on six datasets demonstrate the effectiveness of our GraphMPA.</abstract>
       <url hash="a6f679ab">2025.findings-acl.1108</url>
@@ -22054,7 +22054,7 @@
       <author><first>Jisu</first><last>Shin</last><affiliation>Korea Advanced Institute of Science &amp; Technology</affiliation></author>
       <author><first>Sukmin</first><last>Cho</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
       <author><first>Changgeon</first><last>Ko</last></author>
-      <author><first>Jong C.</first><last>Park</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
+      <author id="jong-c-park"><first>Jong C.</first><last>Park</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
       <pages>21738-21756</pages>
       <abstract>The detection of mental health problems from social media and the interpretation of these results have been extensively explored. Research has shown that incorporating clinical symptom information into a model enhances domain expertise, improving its detection and interpretation performance. While large language models (LLMs) are shown to be effective for generating explanatory rationales in mental health detection, their substantially big parameter size and high computational cost limit their practicality. Reasoning distillation transfers this ability to smaller language models (SLMs), but inconsistencies in the relevance and domain alignment of LLM-generated rationales pose a challenge. This paper investigates how rationale quality impacts SLM performance in mental health detection and explanation generation. We hypothesize that ensuring high-quality and domain-relevant rationales enhances the distillation. To this end, we propose a framework that selects rationales based on their alignment with expert clinical reasoning. Experiments show that our quality-focused approach significantly enhances SLM performance in both mental disorder detection and rationale generation. This work highlights the importance of rationale quality and offers an insightful framework for knowledge transfer in mental health applications.</abstract>
       <url hash="78ba4fc4">2025.findings-acl.1119</url>
@@ -22064,7 +22064,7 @@
     <paper id="1120">
       <title>Rethinking Table Instruction Tuning</title>
       <author><first>Naihao</first><last>Deng</last></author>
-      <author><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
       <pages>21757-21780</pages>
       <abstract>Recent advances in table understanding have focused on instruction-tuning large language models (LLMs) for table-related tasks. However, existing research has overlooked the impact of hyperparameter choices, and also lacks a comprehensive evaluation of the out-of-domain table understanding ability and the general capabilities of these table LLMs. In this paper, we evaluate these abilities in existing table LLMs, and find significant declines in both out-of-domain table understanding and general capabilities as compared to their base models. Through systematic analysis, we show that hyperparameters, such as learning rate, can significantly influence both table-specific and general capabilities. Contrary to the previous table instruction-tuning work, we demonstrate that smaller learning rates and fewer training instances can enhance table understanding while preserving general capabilities. Based on our findings, we introduce TAMA, a TAble LLM instruction-tuned from LLaMA 3.1 8B Instruct, which achieves performance on par with, or surpassing GPT-3.5 and GPT-4 on table tasks, while maintaining strong out-of-domain generalization and general capabilities. Our findings highlight the potential for reduced data annotation costs and more efficient model development through careful hyperparameter selection. We open-source the project and our models.</abstract>
       <url hash="dcbcb501">2025.findings-acl.1120</url>
@@ -22075,7 +22075,7 @@
       <title><fixed-case>C</fixed-case>lini<fixed-case>D</fixed-case>ial: A Naturally Occurring Multimodal Dialogue Dataset for Team Reflection in Action During Clinical Operation</title>
       <author><first>Naihao</first><last>Deng</last></author>
       <author><first>Kapotaksha</first><last>Das</last><affiliation>University of Michigan - Dearborn</affiliation></author>
-      <author><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
       <author><first>Vitaliy</first><last>Popov</last><affiliation>University of Michigan - Ann Arbor</affiliation></author>
       <author><first>Mohamed</first><last>Abouelenien</last><affiliation>University of Michigan</affiliation></author>
       <pages>21781-21798</pages>
@@ -22094,7 +22094,7 @@
       <author><first>Zenghao</first><last>Tang</last><affiliation>Shanghai Jiaotong University</affiliation></author>
       <author><first>He</first><last>Wang</last></author>
       <author><first>Hanchen</first><last>Xia</last></author>
-      <author><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
       <author><first>Naihao</first><last>Deng</last></author>
       <pages>21799-21818</pages>
       <abstract>Existing humor datasets and evaluations predominantly focus on English, leaving limited resources for culturally nuanced humor in non-English languages like Chinese. To address this gap, we construct **Chumor**, the first and the largest Chinese humor explanation dataset. **Chumor** is sourced from Ruo Zhi Ba (RZB, 弱智吧), a Chinese Reddit-like platform known for sharing intellectually challenging and culturally specific jokes. We test ten LLMs through direct and chain-of-thought prompting, revealing that **Chumor** poses significant challenges to existing LLMs, with their accuracy slightly above random and far below human. In addition, our analysis highlights that human-annotated humor explanations are significantly better than those generated by GPT-4o and ERNIE4-turbo. We release **Chumor** at https://huggingface.co/datasets/MichiganNLP/Chumor , our project page is at https://github.com/MichiganNLP/Chumor-2.0 , our leaderboard is at https://huggingface.co/spaces/MichiganNLP/Chumor-leaderboard , and our codebase is at https://github.com/MichiganNLP/Chumor-2.0 .</abstract>
@@ -22130,7 +22130,7 @@
       <author><first>Paramita</first><last>Koley</last></author>
       <author><first>Janardan</first><last>Misra</last></author>
       <author><first>Niloy</first><last>Ganguly</last><affiliation>Indian Institute of Technology Kharagpur,</affiliation></author>
-      <author id="saptarshi-ghosh"><first>Saptarshi</first><last>Ghosh</last><affiliation>Indian Institute of Technology Kharagpur</affiliation></author>
+      <author><first>Saptarshi</first><last>Ghosh</last><affiliation>Indian Institute of Technology Kharagpur</affiliation></author>
       <pages>21848-21864</pages>
       <abstract>A significant portion of the energy consumed by Large Language Models (LLMs) arises from their inference processes; hence developing energy-efficient methods for inference is crucial. While several techniques exist for inference optimization, output compression remains relatively unexplored, with only a few preliminary efforts addressing this aspect. In this work, we first benchmark 12 decoder-only LLMs across 5 datasets, revealing that these models often produce responses that are substantially longer than necessary. We then conduct a comprehensive quality assessment of LLM responses, formally defining six information categories present in LLM responses. We show that LLMs often tend to include redundant or additional information besides the minimal answer. To address this issue of long responses by LLMs, we explore several simple and intuitive prompt-engineering strategies.Empirical evaluation shows that appropriate prompts targeting length reduction and controlling information content can achieve significant energy optimization between 25-60% by reducing the response length while preserving the quality of LLM responses.</abstract>
       <url hash="b43d7d7b">2025.findings-acl.1125</url>
@@ -22329,7 +22329,7 @@
       <title>Is Large Language Model Performance on Reasoning Tasks Impacted by Different Ways Questions Are Asked?</title>
       <author><first>Seok Hwan</first><last>Song</last></author>
       <author><first>Mohna</first><last>Chakraborty</last></author>
-      <author id="qi-li"><first>Qi</first><last>Li</last><affiliation>Iowa State University</affiliation></author>
+      <author><first>Qi</first><last>Li</last><affiliation>Iowa State University</affiliation></author>
       <author><first>Wallapak</first><last>Tavanapong</last><affiliation>Iowa State University</affiliation></author>
       <pages>22066-22081</pages>
       <abstract>Large Language Models (LLMs) have been evaluated using diverse question types, e.g., multiple-choice, true/false, and short/long answers. This study answers an unexplored question about the impact of different question types on LLM accuracy on reasoning tasks. We investigate the performance of five LLMs on three different types of questions using quantitative and deductive reasoning tasks. The performance metrics include accuracy in the reasoning steps and choosing the final answer. Key Findings: (1) Significant differences exist in LLM performance across different question types. (2) Reasoning accuracy does not necessarily correlate with the final selection accuracy. (3) The number of options and the choice of words, influence LLM performance.</abstract>
@@ -22427,7 +22427,7 @@
       <title>Label-semantics Aware Generative Approach for Domain-Agnostic Multilabel Classification</title>
       <author><first>Subhendu</first><last>Khatuya</last></author>
       <author><first>Shashwat</first><last>Naidu</last></author>
-      <author id="saptarshi-ghosh"><first>Saptarshi</first><last>Ghosh</last><affiliation>Indian Institute of Technology Kharagpur</affiliation></author>
+      <author><first>Saptarshi</first><last>Ghosh</last><affiliation>Indian Institute of Technology Kharagpur</affiliation></author>
       <author><first>Pawan</first><last>Goyal</last><affiliation>IIT Kharagpur</affiliation></author>
       <author><first>Niloy</first><last>Ganguly</last><affiliation>Indian Institute of Technology Kharagpur,</affiliation></author>
       <pages>22286-22298</pages>
@@ -22468,7 +22468,7 @@
       <author><first>Tianyue</first><last>Ou</last></author>
       <author><first>Houda</first><last>Bouamor</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Zhijing</first><last>Jin</last><affiliation>Department of Computer Science, University of Toronto</affiliation></author>
-      <author><first>Mona T.</first><last>Diab</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="mona-diab"><first>Mona T.</first><last>Diab</last><affiliation>Carnegie Mellon University</affiliation></author>
       <pages>22327-22360</pages>
       <abstract>The field of machine translation has achieved significant advancements, yet domain-specific terminology translation, particularly in AI, remains challenging. This work introduces GIST, a large-scale multilingual AI terminology dataset containing 5K terms extracted from top AI conference papers spanning 2000 to 2023. The terms were translated into Arabic, Chinese, French, Japanese, and Russian using a hybrid framework that combines LLMs for extraction with human expertise for translation. The dataset’s quality was benchmarked against existing resources, demonstrating superior translation accuracy through crowdsourced evaluation. GIST was integrated into translation workflows using post-translation refinement methods that required no retraining, where LLM prompting consistently improved BLEU and COMET scores. A web demonstration on the ACL Anthology platform highlights its practical application, showcasing improved accessibility for non-English speakers. We address a critical gap in AI terminology resources and fosters global inclusivity and collaboration in AI research.</abstract>
       <url hash="97945a76">2025.findings-acl.1148</url>
@@ -22492,7 +22492,7 @@
       <author><first>Emanuele</first><last>La Malfa</last><affiliation>University of Oxford</affiliation></author>
       <author><first>Manuel</first><last>Tonneau</last><affiliation>Oxford Internet Institute, University of Oxford</affiliation></author>
       <author><first>Ashkan</first><last>Kazemi</last><affiliation>Meedan</affiliation></author>
-      <author><first>Scott A.</first><last>Hale</last><affiliation>Meedan, University of Oxford and Alan Turing Institute</affiliation></author>
+      <author id="scott-a-hale"><first>Scott A.</first><last>Hale</last><affiliation>Meedan, University of Oxford and Alan Turing Institute</affiliation></author>
       <pages>22374-22404</pages>
       <abstract>Online misinformation remains a critical challenge, and fact-checkers increasingly rely on claim matching systems that use sentence embedding models to retrieve relevant fact-checks. However, as users interact with claims online, they often introduce edits, and it remains unclear whether current embedding models used in retrieval are robust to such edits. To investigate this, we introduce a perturbation framework that generates valid and natural claim variations, enabling us to assess the robustness of a wide-range of sentence embedding models in a multi-stage retrieval pipeline and evaluate the effectiveness of various mitigation approaches. Our evaluation reveals that standard embedding models exhibit notable performance drops on edited claims, while LLM-distilled embedding models offer improved robustness at a higher computational cost. Although a strong reranker helps to reduce the performance drop, it cannot fully compensate for first-stage retrieval gaps. To address these retrieval gaps, we evaluate train- and inference-time mitigation approaches, demonstrating that they can improve in-domain robustness by up to 17 percentage points and boost out-of-domain generalization by 10 percentage points. Overall, our findings provide practical improvements to claim-matching systems, enabling more reliable fact-checking of evolving misinformation.</abstract>
       <url hash="1e98e793">2025.findings-acl.1150</url>
@@ -22533,7 +22533,7 @@
       <author><first>Francesco</first><last>Ortu</last><affiliation>University of Trieste and Area Science Park</affiliation></author>
       <author><first>Roya</first><last>Ensafi</last><affiliation>University of Michigan Ann Arbor</affiliation></author>
       <author><first>Zhijing</first><last>Jin</last><affiliation>Department of Computer Science, University of Toronto</affiliation></author>
-      <author><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
       <pages>22434-22452</pages>
       <abstract>The ability of Natural Language Processing (NLP) methods to categorize text into multiple classes has motivated their use in online content moderation tasks, such as hate speech and fake news detection. However, there is limited understanding of how or why these methods make such decisions, or why certain content is moderated in the first place. To investigate the hidden mechanisms behind content moderation, we explore multiple directions: 1) training classifiers to reverse-engineer content moderation decisions across countries; 2) explaining content moderation decisions by analyzing Shapley values and LLM-guided explanations. Our primary focus is on content moderation decisions made across countries, using pre-existing corpora sampled from the Twitter Stream Grab. Our experiments reveal interesting patterns in censored posts, both across countries and over time. Through human evaluations of LLM-generated explanations across three LLMs, we assess the effectiveness of using LLMs in content moderation. Finally, we discuss potential future directions, as well as the limitations and ethical considerations of this work.</abstract>
       <url hash="9794c730">2025.findings-acl.1153</url>
@@ -22564,8 +22564,8 @@
       <author><first>Kiana</first><last>Avestimehr</last></author>
       <author><first>Katharine</first><last>Butler</last></author>
       <author><first>Yanjun</first><last>Weng</last></author>
-      <author id="mi-zhang"><first>Mi</first><last>Zhang</last><affiliation>The Ohio State University</affiliation></author>
-      <author><first>Shrikanth</first><last>Narayanan</last><affiliation>University of Southern California</affiliation></author>
+      <author><first>Mi</first><last>Zhang</last><affiliation>The Ohio State University</affiliation></author>
+      <author id="shrikanth-narayanan"><first>Shrikanth</first><last>Narayanan</last><affiliation>University of Southern California</affiliation></author>
       <author><first>Salman</first><last>Avestimehr</last><affiliation>University of Southern California</affiliation></author>
       <pages>22473-22487</pages>
       <abstract>Large vision-language models (VLMs) have demonstrated remarkable abilities in understanding everyday content. However, their performance in the domain of art, particularly culturally rich art forms, remains less explored. As a pearl of human wisdom and creativity, art encapsulates complex cultural narratives and symbolism. In this paper, we offer the Pun Rebus Art Dataset, a multimodal dataset for art understanding deeply rooted in traditional Chinese culture. We focus on three primary tasks: identifying salient visual elements, matching elements with their symbolic meanings, and explanations for the conveyed messages. Our evaluation reveals that state-of-the-art VLMs struggle with these tasks, often providing biased and hallucinated explanations and showing limited improvement through in-context learning. By releasing the Pun Rebus Art Dataset, we aim to facilitate the development of VLMs that can better understand and interpret culturally specific content, promoting greater inclusiveness beyond English-based corpora. The dataset and evaluation code are available at [this link](https://github.com/zhang-tuo-pdf/Pun-Rebus-Art-Benchmark).</abstract>
@@ -22601,7 +22601,7 @@
     <paper id="1158">
       <title><fixed-case>GUI</fixed-case> Agents: A Survey</title>
       <author><first>Dang</first><last>Nguyen</last><affiliation>University of Maryland, College Park</affiliation></author>
-      <author id="jian-chen"><first>Jian</first><last>Chen</last></author>
+      <author><first>Jian</first><last>Chen</last></author>
       <author><first>Yu</first><last>Wang</last><affiliation>University of Oregon and Vanderbilt University</affiliation></author>
       <author><first>Gang</first><last>Wu</last><affiliation>Adobe Research</affiliation></author>
       <author><first>Namyong</first><last>Park</last><affiliation>Meta AI</affiliation></author>
@@ -22626,7 +22626,7 @@
       <author><first>Branislav</first><last>Kveton</last><affiliation>Adobe Research</affiliation></author>
       <author><first>Jihyung</first><last>Kil</last><affiliation>Adobe Research</affiliation></author>
       <author><first>Thien Huu</first><last>Nguyen</last><affiliation>University of Oregon</affiliation></author>
-      <author><first>Trung</first><last>Bui</last><affiliation>Adobe Research</affiliation></author>
+      <author id="trung-bui"><first>Trung</first><last>Bui</last><affiliation>Adobe Research</affiliation></author>
       <author><first>Tianyi</first><last>Zhou</last><affiliation>University of Maryland, College Park</affiliation></author>
       <author><first>Ryan A.</first><last>Rossi</last><affiliation>Adobe Research</affiliation></author>
       <author><first>Franck</first><last>Dernoncourt</last></author>
@@ -22642,7 +22642,7 @@
       <author><first>Wen-wai</first><last>Yim</last></author>
       <author><first>Yujuan</first><last>Fu</last></author>
       <author><first>Zhaoyi</first><last>Sun</last></author>
-      <author><first>Meliha</first><last>Yetisgen</last><affiliation>University of Washington</affiliation></author>
+      <author id="meliha-yetisgen-yildiz"><first>Meliha</first><last>Yetisgen</last><affiliation>University of Washington</affiliation></author>
       <author><first>Fei</first><last>Xia</last><affiliation>University of Washington, Seattle</affiliation></author>
       <author><first>Thomas</first><last>Lin</last><affiliation>Microsoft</affiliation></author>
       <pages>22539-22550</pages>
@@ -22667,8 +22667,8 @@
     <paper id="1161">
       <title>Dynamic Knowledge Integration for Evidence-Driven Counter-Argument Generation with Large Language Models</title>
       <author><first>Anar</first><last>Yeginbergen</last></author>
-      <author><first>Maite</first><last>Oronoz</last></author>
-      <author><first>Rodrigo</first><last>Agerri</last><affiliation>University of the Basque Country</affiliation></author>
+      <author id="maite-oronoz"><first>Maite</first><last>Oronoz</last></author>
+      <author id="rodrigo-agerri"><first>Rodrigo</first><last>Agerri</last><affiliation>University of the Basque Country</affiliation></author>
       <pages>22568-22584</pages>
       <abstract>This paper investigates the role of dynamic external knowledge integration in improving counter-argument generation using Large Language Models (LLMs). While LLMs have shown promise in argumentative tasks, their tendency to generate lengthy, potentially non-factual responses highlights the need for more controlled and evidence-based approaches. We introduce a reconstructed and manually curated dataset of argument and counter-argument pairs specifically designed to balance argumentative complexity with evaluative feasibility. We also propose a new LLM-as-a-Judge evaluation methodology that shows a stronger correlation with human judgments compared to traditional reference-based metrics. Our experimental results demonstrate that integrating dynamic external knowledge from the web significantly improves the quality of generated counter-arguments, particularly in terms of relatedness, persuasiveness, and factuality. The findings suggest that combining LLMs with real-time external knowledge retrieval offers a promising direction for developing more effective and reliable counter-argumentation systems. Data and code are publicly available: https://github.com/anaryegen/ counter-argument-generation</abstract>
       <url hash="383d61b2">2025.findings-acl.1161</url>
@@ -22677,7 +22677,7 @@
     </paper>
     <paper id="1162">
       <title>Tell, Don’t Show: Leveraging Language Models’ Abstractive Retellings to Model Literary Themes</title>
-      <author><first>Li</first><last>Lucy</last><affiliation>University of California Berkeley</affiliation></author>
+      <author id="li-lucy"><first>Li</first><last>Lucy</last><affiliation>University of California Berkeley</affiliation></author>
       <author><first>Camilla</first><last>Griffiths</last></author>
       <author><first>Sarah</first><last>Levine</last><affiliation>Stanford University</affiliation></author>
       <author><first>Jennifer L</first><last>Eberhardt</last></author>
@@ -22748,7 +22748,7 @@
       <author><first>Puxuan</first><last>Yu</last><affiliation>Snowflake</affiliation></author>
       <author><first>Daniel</first><last>Cohen</last><affiliation>Dataminr</affiliation></author>
       <author><first>Hemank</first><last>Lamba</last><affiliation>Dataminr Inc.</affiliation></author>
-      <author><first>Joel R.</first><last>Tetreault</last></author>
+      <author id="joel-tetreault"><first>Joel R.</first><last>Tetreault</last></author>
       <author><first>Alejandro</first><last>Jaimes</last><affiliation>Dataminr</affiliation></author>
       <pages>22716-22730</pages>
       <abstract>In search settings, calibrating the scores during the ranking process to quantities such as click-through rates or relevance levels enhances a system’s usefulness and trustworthiness for downstream users. While previous research has improved this notion of calibration for low complexity learning-to-rank models, the larger data demands and parameter count specific to modern neural text rankers produce unique obstacles that hamper the efficacy of methods intended for the learning-to-rank setting.This paper proposes exploiting large language models (LLMs) to provide relevance and uncertainty signals for these neural text rankers to produce scale-calibrated scores through Monte Carlo sampling of natural language explanations (NLEs). Our approach transforms the neural ranking task from ranking textual query-document pairs to ranking corresponding synthesized NLEs. Comprehensive experiments on two popular document ranking datasets show that the NLE-based calibration approach consistently outperforms past calibration methods and LLM-based methods for ranking, calibration, and query performance prediction tasks.</abstract>
@@ -22761,7 +22761,7 @@
       <author><first>Miguel Romero</first><last>Calvo</last><affiliation>University of Minnesota - Twin Cities and Amazon</affiliation></author>
       <author><first>Shuoyang</first><last>Ding</last><affiliation>NVIDIA</affiliation></author>
       <author><first>Corey D</first><last>Barrett</last><affiliation>Oracle</affiliation></author>
-      <author><first>Georgiana</first><last>Dinu</last><affiliation>Amazon</affiliation></author>
+      <author id="georgiana-dinu"><first>Georgiana</first><last>Dinu</last><affiliation>Amazon</affiliation></author>
       <author><first>George</first><last>Karypis</last><affiliation>University of Minnesota, Minneapolis</affiliation></author>
       <pages>22731-22746</pages>
       <abstract>Dense embeddings are fundamental to modern machine learning systems, powering Retrieval-Augmented Generation (RAG), information retrieval, and representation learning. While instruction-conditioning has become the dominant approach for embedding specialization, its direct application to low-capacity models imposes fundamental representational constraints that limit the performance gains derived from specialization. In this paper, we analyze these limitations and introduce the Mixture of Task Experts (MoTE) transformer block, which leverages task-specialized parameters trained with Task-Aware Contrastive Learning () to enhance the model’s ability to generate specialized embeddings. Empirical results show that MoTE achieves 64% higher performance gains in retrieval datasets (<tex-math>+3.27\rightarrow +5.21</tex-math>) and 43% higher performance gains across all datasets (<tex-math>+1.81\rightarrow 2.60</tex-math>). Critically, these gains are achieved without altering instructions, training data, inference time, or number of active parameters.</abstract>
@@ -22811,7 +22811,7 @@
       <author><first>Navonil</first><last>Majumder</last><affiliation>Singapore University of Technology and Design</affiliation></author>
       <author><first>Deepanway</first><last>Ghosal</last><affiliation>Google DeepMind</affiliation></author>
       <author><first>Somak</first><last>Aditya</last><affiliation>Indian Institute of Technology Kharagpur</affiliation></author>
-      <author><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
       <author><first>Soujanya</first><last>Poria</last></author>
       <pages>22811-22849</pages>
       <abstract>Recent advancements in Large Language Models (LLMs) have showcased striking results on existing logical reasoning benchmarks, with some models even surpassing human performance. However, the true depth of their competencies and robustness in reasoning tasks remains an open question. To this end, in this paper, we focus on two popular reasoning tasks: arithmetic reasoning and code generation. Particularly, we introduce (i) a general ontology of perturbations for math and coding questions, (ii) a semi-automatic method to apply these perturbations, and (iii) two datasets, GSMore and HumanEval-Core, respectively, of perturbed math and coding problems to probe LLM capabilities in numeric reasoning and coding tasks.Through comprehensive evaluations of both closed-source and open-source LLMs, we show a significant performance drop across all the models against the perturbed questions, suggesting that the current LLMs lack robust problem solving skills and structured reasoning abilities in many areas, as defined by our ontology.</abstract>
@@ -22855,7 +22855,7 @@
       <author><first>Vihang</first><last>Pancholi</last></author>
       <author><first>Jainit Sushil</first><last>Bafna</last></author>
       <author><first>Tejas</first><last>Anvekar</last><affiliation>Arizona State University</affiliation></author>
-      <author><first>Manish</first><last>Shrivastava</last><affiliation>International Institute of Information Technology Hyderabad, India</affiliation></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last><affiliation>International Institute of Information Technology Hyderabad, India</affiliation></author>
       <author><first>Vivek</first><last>Gupta</last><affiliation>Arizona State University</affiliation></author>
       <pages>22913-22934</pages>
       <abstract>Evaluating tables qualitatively and quantitatively poses a significant challenge, as standard metrics often overlook subtle structural and content-level discrepancies. To address this, we propose a rubric-based evaluation framework that integrates multi-level structural descriptors with fine-grained contextual signals, enabling more precise and consistent table comparison. Building on this, we introduce TabXEval, an eXhaustive and eXplainable two-phase evaluation framework. TabXEval first aligns reference and predicted tables structurally via TabAlign, then performs semantic and syntactic comparison using TabCompare, offering interpretable and granular feedback. We evaluate TabXEval on TabXBench, a diverse, multi-domain benchmark featuring realistic table perturbations and human annotations. A sensitivity-specificity analysis further demonstrates the robustness and explainability of TabXEval across varied table tasks. Code and data are available at https://corallab- asu.github.io/tabxeval/.</abstract>
@@ -23115,7 +23115,7 @@
       <author><first>Siliang</first><last>Qin</last><affiliation>Institute of Information Engineering.CAS</affiliation></author>
       <author><first>Yuanda</first><last>Wang</last></author>
       <author><first>Zhang</first><last>Bolun</last></author>
-      <author><first>Chao</first><last>Zhang</last><affiliation>Tsinghua University</affiliation></author>
+      <author id="chao-zhang-tu"><first>Chao</first><last>Zhang</last><affiliation>Tsinghua University</affiliation></author>
       <pages>23250-23267</pages>
       <abstract>Decompilers are fundamental tools for critical security tasks, from vulnerability discovery to malware analysis, yet their evaluation remains fragmented. Existing approaches primarily focus on syntactic correctness through synthetic micro-benchmarks or subjective human ratings, failing to address real-world requirements for semantic fidelity and analyst usability. We present **DecompileBench**, the first comprehensive framework that enables effective evaluation of decompilers in reverse engineering workflows through three key components: real-world function extraction (comprising 23,400 functions from 130 real-world programs), runtime-aware validation, and automated human-centric assessment using LLM-as-Judge to quantify the effectiveness of decompilers in reverse engineering workflows. Through a systematic comparison between six industrial-strength decompilers and six recent LLM-powered approaches, we demonstrate that LLM-based methods surpass commercial tools in code understandability despite 52.2% lower functionality correctness. These findings highlight the potential of LLM-based approaches to transform human-centric reverse engineering. We open source **DecompileBench** to provide a framework to advance research on decompilers and assist security experts in making informed tool selections based on their specific requirements.</abstract>
       <url hash="b2a13340">2025.findings-acl.1194</url>
@@ -23155,8 +23155,8 @@
       <author><first>Yuetai</first><last>Li</last></author>
       <author><first>Luyao</first><last>Niu</last><affiliation>University of Washington</affiliation></author>
       <author><first>Zhen</first><last>Xiang</last><affiliation>University of Georgia</affiliation></author>
-      <author id="bo-li"><first>Bo</first><last>Li</last><affiliation>University of Illinois, Urbana Champaign</affiliation></author>
-      <author><first>Bill Yuchen</first><last>Lin</last><affiliation>xAI and University of Washington</affiliation></author>
+      <author><first>Bo</first><last>Li</last><affiliation>University of Illinois, Urbana Champaign</affiliation></author>
+      <author id="bill-yuchen-lin"><first>Bill Yuchen</first><last>Lin</last><affiliation>xAI and University of Washington</affiliation></author>
       <author><first>Radha</first><last>Poovendran</last><affiliation>University of Washington, Seattle</affiliation></author>
       <pages>23303-23320</pages>
       <abstract>Emerging large reasoning models (LRMs), such as DeepSeek-R1 models, leverage long chain-of-thought (CoT) reasoning to generate structured intermediate steps, enhancing their reasoning capabilities. However, long CoT does not inherently guarantee safe outputs, potentially leading to harmful consequences such as the introduction of security vulnerabilities in code or the spread of misinformation. Current research on large language model (LLM) safety usually focuses on short-answer responses, overlooking the long CoT style outputs of LRMs. To bridge this gap, we conduct a systematic study of LRM safety. First, we investigate safety evaluators calibrated against human annotations. Using our newly developed metrics, we thoroughly assess the safety of 13 state-of-the-art LRMs on StrongReject and WildJailbreak datasets. Our results show that LRMs are not safe compared to their reasoning advance. Further, we perform a fine-grained analysis of the reasoning trace and final answer. We find that three decoding strategies-ZeroThink, LessThink, and MoreThink-can improve model safety without additional training. However, these strategies either use constrained reasoning traces or incur high inference costs. To better strengthen LRM safety, we introduce SafeChain, the first-of-its-kind safety training dataset in CoT style. We fine-tune two LRMs with SafeChain, showing that it not only enhances model safety but also preserves performance across 6 reasoning benchmarks.</abstract>
@@ -23188,8 +23188,8 @@
       <author><first>Pengfei</first><last>Yu</last><affiliation>Amazon</affiliation></author>
       <author><first>Chi</first><last>Han</last></author>
       <author><first>Yi R.</first><last>Fung</last><affiliation>Hong Kong University of Science and Technology</affiliation></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
-      <author><first>ChengXiang</first><last>Zhai</last><affiliation>University of Illinois, Urbana Champaign</affiliation></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
+      <author id="chengxiang-zhai"><first>ChengXiang</first><last>Zhai</last><affiliation>University of Illinois, Urbana Champaign</affiliation></author>
       <author><first>Manling</first><last>Li</last><affiliation>Northwestern University</affiliation></author>
       <author><first>Heng</first><last>Ji</last><affiliation>University of Illinois, Urbana-Champaign</affiliation></author>
       <pages>23340-23358</pages>
@@ -23226,7 +23226,7 @@
     <paper id="1202">
       <title>Enhance Multimodal Consistency and Coherence for Text-Image Plan Generation</title>
       <author><first>Xiaoxin</first><last>Lu</last><affiliation>Pennsylvania State University</affiliation></author>
-      <author><first>Ranran Haoran</first><last>Zhang</last></author>
+      <author id="ranran-haoran-zhang"><first>Ranran Haoran</first><last>Zhang</last></author>
       <author><first>Yusen</first><last>Zhang</last></author>
       <author><first>Rui</first><last>Zhang</last><affiliation>Pennsylvania State University</affiliation></author>
       <pages>23392-23409</pages>
@@ -23376,7 +23376,7 @@
       <author><first>Zhiyuan</first><last>Chen</last></author>
       <author><first>Yang</first><last>Zhao</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
       <author><first>Lu</first><last>Xiang</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
-      <author><first>Chengqing</first><last>Zong</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
       <author><first>Yu</first><last>Zhou</last><affiliation>Institute of Automation, Chinese Academy of Sciences</affiliation></author>
       <pages>23659-23678</pages>
       <abstract>Multimodal Large Language Models (MLLMs) have shown strong performance in document image tasks, especially Optical Character Recognition (OCR). However, they struggle with Document Image Machine Translation (DIMT), which requires handling both cross-modal and cross-lingual challenges. Previous efforts to enhance DIMT capability through Supervised Fine-Tuning (SFT) on the DIMT dataset often result in the forgetting of the model’s existing monolingual abilities, such as OCR. To address these challenges, we introduce a novel fine-tuning paradigm, named Synchronously Self-Reviewing (SSR) its OCR proficiency, inspired by the concept “Bilingual Cognitive Advantage”. Specifically, SSR prompts the model to generate OCR text before producing translation text, which allows the model to leverage its strong monolingual OCR ability while learning to translate text across languages. Comprehensive experiments demonstrate the proposed SSR learning helps mitigate catastrophic forgetting, improving the generalization ability of MLLMs on both OCR and DIMT tasks. The code will be released upon acceptance.</abstract>
@@ -23437,7 +23437,7 @@
       <author><first>Kishan</first><last>Maharaj</last></author>
       <author><first>Sravani</first><last>Gunnu</last></author>
       <author><first>Abhijit</first><last>Mishra</last><affiliation>University of Texas at Austin and Apple</affiliation></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
       <pages>23778-23790</pages>
       <abstract>Pragmatics, the ability to infer meaning beyond literal interpretation, is crucial for social cognition and communication. While LLMs have been benchmarked for their pragmatic understanding, improving their performance remains underexplored. Existing methods rely on annotated labels but overlook the reasoning process humans naturally use to interpret implicit meaning. To bridge this gap, we introduce a novel pragmatic dataset <b>ImpliedMeaningPreference</b> that includes <i>explicit reasoning (‘thoughts’)</i> for both correct and incorrect interpretations. Through preference-tuning and supervised fine-tuning, we demonstrate that thought-based learning significantly enhances LLMs’ pragmatic understanding, improving accuracy by 11.12% across model families. We further discuss a transfer-learning study where we evaluate the performance of <i>thought</i>-based training for the other tasks of pragmatics (presupposition, deixis) that are not seen during the training time and observe an improvement of 16.10% compared to <i>label</i> trained models.</abstract>
       <url hash="b4034a63">2025.findings-acl.1218</url>
@@ -23451,7 +23451,7 @@
       <author><first>Zitong</first><last>Zhao</last></author>
       <author><first>Zhongxiang</first><last>Dai</last><affiliation>The Chinese University of Hong Kong, Shenzhen</affiliation></author>
       <author><first>Chuan-Sheng</first><last>Foo</last><affiliation>Centre for Frontier AI Research, A*STAR and Institute for Infocomm Research, A*STAR</affiliation></author>
-      <author><first>See-Kiong</first><last>Ng</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="see-kiong-ng"><first>See-Kiong</first><last>Ng</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Bryan Kian Hsiang</first><last>Low</last><affiliation>National University of Singapore</affiliation></author>
       <pages>23791-23824</pages>
       <abstract>The impressive performances of Large Language Models (LLMs) and their immense potential for commercialization have given rise to serious concerns over the Intellectual Property (IP) of their training data. In particular, the synthetic texts generated by LLMs may infringe the IP of the data being used to train the LLMs. To this end, it is imperative to be able to perform source attribution by identifying the data provider who contributed to the generation of a synthetic text by an LLM. In this paper, we show that this problem can be tackled by watermarking, i.e., by enabling an LLM to generate synthetic texts with embedded watermarks that contain information about their source(s). We identify the key properties of such watermarking frameworks (e.g., source attribution accuracy, robustness against adversaries), and propose a source attribution framework that satisfies these key properties due to our algorithmic designs. Our framework enables an LLM to learn an accurate mapping from the generated texts to data providers, which sets the foundation for effective source attribution. Extensive empirical evaluations show that our framework achieves effective source attribution.</abstract>
@@ -23602,8 +23602,8 @@
     </paper>
     <paper id="1229">
       <title><fixed-case>S</fixed-case>cene<fixed-case>G</fixed-case>ram: Conceptualizing and Describing Tangrams in Scene Context</title>
-      <author><first>Simeon</first><last>Junker</last><affiliation>Universität Bielefeld</affiliation></author>
-      <author><first>Sina</first><last>Zarrieß</last><affiliation>Bielefeld University</affiliation></author>
+      <author id="simeon-junker"><first>Simeon</first><last>Junker</last><affiliation>Universität Bielefeld</affiliation></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last><affiliation>Bielefeld University</affiliation></author>
       <pages>23976-23992</pages>
       <abstract>Research on reference and naming suggests that humans can come up with very different ways of conceptualizing and referring to the same object, e.g. the same abstract tangram shape can be a “crab”, “sink” or “space ship”. Another common assumption in cognitive science is that scene context fundamentally shapes our visual perception of objects and conceptual expectations. This paper contributes SceneGram, a dataset of human references to tangram shapes placed in different scene contexts, allowing for systematic analyses of the effect of scene context on conceptualization. Based on this data, we analyze references to tangram shapes generated by multimodal LLMs, showing that these models do not account for the richness and variability of conceptualizations found in human references.</abstract>
       <url hash="be84cba9">2025.findings-acl.1229</url>
@@ -23619,7 +23619,7 @@
       <author><first>Yuchen</first><last>Hu</last></author>
       <author><first>Bosheng</first><last>Ding</last></author>
       <author><first>Ruirui</first><last>Chen</last><affiliation>Institute of High Performance Computing, Singapore, A*STAR</affiliation></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University and SalesForce.com</affiliation></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>Nanyang Technological University and SalesForce.com</affiliation></author>
       <pages>23993-24010</pages>
       <abstract>Analogical reasoning is a unique ability of humans to address unfamiliar challenges by transferring strategies from relevant past experiences. One key finding in psychology is that compared with irrelevant past experiences, recalling relevant ones can help humans better handle new tasks. Coincidentally, the NLP community has also recently found that self-generating relevant examples in the context can help large language models (LLMs) better solve a given problem than hand-crafted prompts. However, it is yet not clear whether relevance is the key factor eliciting such capability, i.e., can LLMs benefit more from self-generated relevant examples than irrelevant ones? In this work, we systematically explore whether LLMs can truly perform analogical reasoning on a diverse set of reasoning tasks. With extensive experiments and analysis, we show that self-generated random examples can surprisingly achieve comparable or even better performance on certain tasks, e.g., 4% performance boost on GSM8K with random biological examples. We find that the accuracy of self-generated examples is the key factor and subsequently design two novel methods with improved performance and significantly reduced inference costs. Overall, we aim to advance a deeper understanding of LLM analogical reasoning and hope this work stimulates further research in the design of self-generated contexts.</abstract>
       <url hash="994b59cc">2025.findings-acl.1230</url>
@@ -23673,8 +23673,8 @@
       <author><first>Kehai</first><last>Chen</last><affiliation>Harbin Institute of Technology (Shenzhen)</affiliation></author>
       <author><first>Wei</first><last>Wang</last></author>
       <author><first>Xun</first><last>Zhou</last><affiliation>Harbin Institute of Technology (Shenzhen)</affiliation></author>
-      <author><first>Muyun</first><last>Yang</last></author>
-      <author><first>Tiejun</first><last>Zhao</last><affiliation>Harbin Institute of Technology</affiliation></author>
+      <author id="muyun-yang"><first>Muyun</first><last>Yang</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <author><first>Min</first><last>Zhang</last><affiliation>Harbin Institute of Technology, Shenzhen</affiliation></author>
       <pages>24068-24084</pages>
       <abstract>Large language models (LLMs) have achieved remarkable performance on knowledge graph question answering (KGQA) tasks by planning and interacting with knowledge graphs. However, existing methods often confuse tool utilization with knowledge reasoning, harming readability of model outputs and giving rise to hallucinatory tool invocations, which hinder the advancement of KGQA. To address this issue, we propose Memory-augmented Query Reconstruction for LLM-based Knowledge Graph Reasoning (MemQ) to decouple LLM from tool invocation tasks using LLM-built query memory. By establishing a memory module with explicit descriptions of query statements, the proposed MemQ facilitates the KGQA process with natural language reasoning and memory-augmented query reconstruction. Meanwhile, we design an effective and readable reasoning to enhance the LLM’s reasoning capability in KGQA. Experimental results that MemQ achieves state-of-the-art performance on widely used benchmarks WebQSP and CWQ.</abstract>
@@ -23698,10 +23698,10 @@
     </paper>
     <paper id="1236">
       <title>Are Multimodal Large Language Models Pragmatically Competent Listeners in Simple Reference Resolution Tasks?</title>
-      <author><first>Simeon</first><last>Junker</last><affiliation>Universität Bielefeld</affiliation></author>
+      <author id="simeon-junker"><first>Simeon</first><last>Junker</last><affiliation>Universität Bielefeld</affiliation></author>
       <author><first>Manar</first><last>Ali</last><affiliation>Universität Bielefeld</affiliation></author>
       <author><first>Larissa</first><last>Koch</last></author>
-      <author><first>Sina</first><last>Zarrieß</last><affiliation>Bielefeld University</affiliation></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last><affiliation>Bielefeld University</affiliation></author>
       <author><first>Hendrik</first><last>Buschmeier</last><affiliation>Universität Bielefeld</affiliation></author>
       <pages>24101-24109</pages>
       <abstract>We investigate the linguistic abilities of multimodal large language models in reference resolution tasks featuring simple yet abstract visual stimuli, such as color patches and color grids. Although the task may not seem challenging for today’s language models, being straightforward for human dyads, we consider it to be a highly relevant probe of the pragmatic capabilities of MLLMs. Our results and analyses indeed suggest that basic pragmatic capabilities, such as context-dependent interpretation of color descriptions, still constitute major challenges for state-of-the-art MLLMs.</abstract>
@@ -23823,7 +23823,7 @@
       <author><first>Neil De La</first><last>Fuente</last><affiliation>Technical University of Munich and Universidad del País Vasco</affiliation></author>
       <author><first>Oscar</first><last>Sainz</last><affiliation>University of the Basque Country (UPV/EHU)</affiliation></author>
       <author><first>Iker</first><last>García-Ferrero</last></author>
-      <author><first>Eneko</first><last>Agirre</last><affiliation>University of the Basque Country (UPV/EHU)</affiliation></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last><affiliation>University of the Basque Country (UPV/EHU)</affiliation></author>
       <pages>24248-24262</pages>
       <abstract>Information Extraction (IE) systems are traditionally domain-specific, requiring costlyadaptation that involves expert schema design,data annotation, and model training. WhileLarge Language Models have shown promisein zero-shot IE, performance degrades significantly in unseen domains where label definitions differ. This paper introduces GUIDEX,a novel method that automatically definesdomain-specific schemas, infers guidelines,and generates synthetically labeled instances,allowing for better out-of-domain generalization. Fine-tuning Llama 3.1 with GUIDEXsets a new state-of-the-art across seven zeroshot Named Entity Recognition benchmarks.Models trained with GUIDEX gain up to 7 F1points over previous methods without humanlabeled data, and nearly 2 F1 points higherwhen combined with it. Models trained onGUIDEX demonstrate enhanced comprehension of complex, domain-specific annotationschemas. Code, models, and synthetic datasetsare available at neilus03.github.io/guidex.com</abstract>
       <url hash="45d5137a">2025.findings-acl.1245</url>
@@ -23881,12 +23881,12 @@
     <paper id="1249">
       <title>Full-Step-<fixed-case>DPO</fixed-case>: Self-Supervised Preference Optimization with Step-wise Rewards for Mathematical Reasoning</title>
       <author><first>Huimin</first><last>Xu</last><affiliation>Nanyang Technological University</affiliation></author>
-      <author><first>Xin</first><last>Mao</last><affiliation>ByteDance Inc.</affiliation></author>
+      <author id="xinnian-mao"><first>Xin</first><last>Mao</last><affiliation>ByteDance Inc.</affiliation></author>
       <author><first>Feng-Lin</first><last>Li</last><affiliation>Shopee</affiliation></author>
       <author><first>Xiaobao</first><last>Wu</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Wang</first><last>Chen</last></author>
       <author><first>Wei</first><last>Zhang</last><affiliation>sea group</affiliation></author>
-      <author><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
       <pages>24343-24356</pages>
       <abstract>Direct Preference Optimization (DPO) often struggles with long-chain mathematical reasoning. Existing approaches, such as Step-DPO, typically improve this by focusing on the first erroneous step in the reasoning chain. However, they overlook all other steps and rely heavily on humans or GPT-4 to identify erroneous steps. To address these issues, we propose Full-Step-DPO, a novel DPO framework tailored for mathematical reasoning. Instead of optimizing only the first erroneous step, it leverages step-wise rewards from the entire reasoning chain. This is achieved by training a self-supervised process reward model, which automatically scores each step, providing rewards while avoiding reliance on external signals. Furthermore, we introduce a novel step-wise DPO loss, which dynamically updates gradients based on these step-wise rewards. This endows stronger reasoning capabilities to language models. Extensive evaluations on both in-domain and out-of-domain mathematical reasoning benchmarks across various base language models, demonstrate that Full-Step-DPO achieves superior performance compared to state-of-the-art baselines.</abstract>
       <url hash="d1a412a1">2025.findings-acl.1249</url>
@@ -23906,12 +23906,12 @@
     <paper id="1251">
       <title><fixed-case>SCOPE</fixed-case>: Compress Mathematical Reasoning Steps for Efficient Automated Process Annotation</title>
       <author><first>Huimin</first><last>Xu</last><affiliation>Nanyang Technological University</affiliation></author>
-      <author><first>Xin</first><last>Mao</last><affiliation>ByteDance Inc.</affiliation></author>
+      <author id="xinnian-mao"><first>Xin</first><last>Mao</last><affiliation>ByteDance Inc.</affiliation></author>
       <author><first>Feng-Lin</first><last>Li</last><affiliation>Shopee</affiliation></author>
       <author><first>Xiaobao</first><last>Wu</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Wang</first><last>Chen</last></author>
       <author><first>Wei</first><last>Zhang</last><affiliation>sea group</affiliation></author>
-      <author><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
       <pages>24382-24394</pages>
       <abstract>Process Reward Models (PRMs) have demonstrated promising results in mathematical reasoning, but existing process annotation approaches, whether through human annotations or Monte Carlo simulations, remain computationally expensive. In this paper, we introduce Step COmpression for Process Estimation (SCOPE), a novel compression-based approach that significantly reduces annotation costs. We first translate natural language reasoning steps into code and normalize them through Abstract Syntax Tree, then merge equivalent steps to construct a prefix tree. Unlike simulation-based methods that waste numerous samples on estimation, SCOPE leverages a compression-based prefix tree where each root-to-leaf path serves as a training sample, reducing the complexity from <tex-math>O(NMK)</tex-math> to O(N) We construct a large-scale dataset containing 509K samples with only 5% of the computational resources required by previous methods. Empirical results demonstrate that PRMs trained on our dataset consistently outperform existing automated annotation approaches on both Best-of-N strategy and ProcessBench.</abstract>
       <url hash="03de3634">2025.findings-acl.1251</url>
@@ -23922,7 +23922,7 @@
       <title>Compositional Syntactico-<fixed-case>S</fixed-case>em<fixed-case>B</fixed-case>anking for <fixed-case>E</fixed-case>nglish as a Second or Foreign Language</title>
       <author><first>Wenxi</first><last>Li</last></author>
       <author><first>Xihao</first><last>Wang</last><affiliation>Peking University</affiliation></author>
-      <author><first>Weiwei</first><last>Sun</last><affiliation>University of Cambridge</affiliation></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last><affiliation>University of Cambridge</affiliation></author>
       <pages>24395-24406</pages>
       <abstract>Despite the widespread use of English as a Second or Foreign Language (ESFL), developing syntactico-semantic representations for it is limited — the irregularities in ESFL complicate systematic composition and subsequently the derivation of its semantics.This paper draws on constructivism and proposes a novel Synchronous Hyperedge Replacement Grammar (SHRG)-based constructivist approach to address the challenges. By using constructions as fundamental units, this approach not only accommodates both the idiosyncrasies and the compositional nature of ESFL, but also bridges the gap between literal cues and intended meaning.The feasibility of this constructivist approach is demonstrated using real ESFL data, resulting in a gold-standard, medium-sized syntactico-semantic bank that covers a wide range of ESFL phenomena.</abstract>
       <url hash="4db22a47">2025.findings-acl.1252</url>
@@ -23933,7 +23933,7 @@
       <title>Semantics-aware prompting for translating <fixed-case>NO</fixed-case>tices To <fixed-case>A</fixed-case>ir<fixed-case>M</fixed-case>en</title>
       <author><first>Minal Nitin</first><last>Dani</last><affiliation>Honeywell and Indian Institute of Technology, Hyderabad, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
       <author><first>Aishwarya</first><last>Maheswaran</last></author>
-      <author><first>Maunendra Sankar</first><last>Desarkar</last><affiliation>Indian Institute of Technology, Hyderabad,</affiliation></author>
+      <author id="maunendra-sankar-desarkar"><first>Maunendra Sankar</first><last>Desarkar</last><affiliation>Indian Institute of Technology, Hyderabad,</affiliation></author>
       <pages>24407-24417</pages>
       <abstract>A NOTAM or NOtice To AirMen is a crucial notice for different aviation stakeholders, particularly flight crews. It delivers essential notifications about abnormal conditions of Aviation System components such as changes to facilities, hazards, service, procedure that are not known far enough in advance to be publicized through other means. NOTAM messages are short, contain acronyms, and look cryptic in most of the cases. Writing and understanding these messages put heavy cognitive load on its end users. In this work, we take up the task of translating NOTAMs into English natural language using LLMs. Since NOTAMs do not adhere to English grammar rules and have their own decoding rules, large language models (LLMs) cannot translate them without effective prompting. In this paper, we develop a framework to come up with effective prompts to achieve the translations. Our approach uses context-aware semantic prompting techniques, paired with domain-specific rules, to improve the accuracy and clarity of translations. The framework is evaluated using comprehensive experiments (6 LLMs of varying sizes, and with 5 different prompting setups for each) and eight evaluation metrics measuring different aspects of the translation. The results demonstrate that our methodology can produce clear translations that accurately convey the information contained in NOTAMs.</abstract>
       <url hash="692d8b10">2025.findings-acl.1253</url>
@@ -23970,7 +23970,7 @@
       <title>Can <fixed-case>VLM</fixed-case>s Actually See and Read? A Survey on Modality Collapse in Vision-Language Models</title>
       <author><first>Mong Yuan</first><last>Sim</last><affiliation>University of Adelaide</affiliation></author>
       <author><first>Wei Emma</first><last>Zhang</last><affiliation>The University of Adelaide</affiliation></author>
-      <author><first>Xiang</first><last>Dai</last><affiliation>CSIRO</affiliation></author>
+      <author id="xiang-dai"><first>Xiang</first><last>Dai</last><affiliation>CSIRO</affiliation></author>
       <author><first>Biaoyan</first><last>Fang</last></author>
       <pages>24452-24470</pages>
       <abstract>Vision-language models (VLMs) integrate textual and visual information, enabling the model to process visual inputs and leverage visual information to generate predictions. Such models are demanding for tasks such as visual question answering, image captioning, and visual grounding. However, some recent work found that VLMs often rely heavily on textual information, ignoring visual information, but are still able to achieve competitive performance in vision-language (VL) tasks. This survey reviews modality collapse analysis work to provide insights into the reason for this unintended behavior. It also reviews probing studies for fine-grained vision-language understanding, presenting current findings on information encoded in VL representations and highlighting potential directions for future research.</abstract>
@@ -23984,7 +23984,7 @@
       <author><first>Nihar Ranjan</first><last>Sahoo</last></author>
       <author><first>Rudra</first><last>Murthy</last><affiliation>IBM India Pvt Ltd</affiliation></author>
       <author><first>Swaprava</first><last>Nath</last><affiliation>Indian Institute of Technology Bombay, Indian Institute of Technology, Bombay</affiliation></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
       <pages>24471-24496</pages>
       <abstract>While a few high-quality bias benchmark datasets exist to address stereotypes in Language Models (LMs), a notable lack of focus remains on body image stereotypes. To bridge this gap, we propose <tex-math>\textbf{BIStereo}</tex-math>, a suite to uncover LMs’ biases towards people of certain physical appearance characteristics, namely, <tex-math>\textit{skin complexion, body shape, height, attire,}</tex-math> and a <tex-math>\textit{miscellaneous category}</tex-math> including <tex-math>\textit{hair texture, eye color, and more}</tex-math>. Our dataset comprises 40k sentence pairs designed to assess LMs’ biased preference for certain body types. We further include 60k premise-hypothesis pairs designed to comprehensively assess LMs’ preference for fair skin tone. Additionally, we curate 553 tuples consisting of a <tex-math>\textit{body image descriptor, gender, and a stereotypical attribute}</tex-math>, validated by a diverse pool of annotators for physical appearance stereotypes.We propose a metric, <tex-math>\textbf{TriSentBias}</tex-math>, that captures the biased preferences of LMs towards a certain body type over others. Using <tex-math>\textbf{BIStereo}</tex-math>, we assess the presence of body image biases in ten different language models, revealing significant biases in models Muril, XLMR, Llama3, and Gemma. We further evaluate the LMs through downstream NLI and Analogy tasks.Our NLI experiments highlight notable patterns in the LMs that align with the well-documented cognitive bias in humans known as <tex-math>\textbf{\textit{the Halo Effect}}</tex-math>.</abstract>
       <url hash="aafe823d">2025.findings-acl.1257</url>
@@ -24131,7 +24131,7 @@
       <title>Reranking-based Generation for Unbiased Perspective Summarization</title>
       <author><first>Narutatsu</first><last>Ri</last></author>
       <author><first>Nicholas</first><last>Deas</last><affiliation>Columbia University</affiliation></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>24701-24723</pages>
       <abstract>Generating unbiased summaries in real-world settings such as political perspective summarization remains a crucial application of Large Language Models (LLMs). Yet, existing evaluation frameworks rely on traditional metrics for measuring key attributes such as coverage and faithfulness without verifying their applicability, and efforts to develop improved summarizers are still nascent. We address these gaps by (1) identifying reliable metrics for measuring perspective summary quality, and (2) investigating the efficacy of LLM-based methods beyond zero-shot inference. Namely, we build a test set for benchmarking metric reliability using human annotations and show that traditional metrics underperform compared to language model–based metrics, which prove to be strong evaluators. Using these metrics, we show that reranking-based methods yield strong results, and preference tuning with synthetically generated and reranking-labeled data further boosts performance. Our findings aim to contribute to the reliable evaluation and development of perspective summarization methods.</abstract>
       <url hash="335217ac">2025.findings-acl.1268</url>
@@ -24199,7 +24199,7 @@
       <title>Decomposed Opinion Summarization with Verified Aspect-Aware Modules</title>
       <author><first>Miao</first><last>Li</last></author>
       <author><first>Jey Han</first><last>Lau</last><affiliation>The University of Melbourne</affiliation></author>
-      <author><first>Eduard</first><last>Hovy</last><affiliation>University of Melbourne and Carnegie Mellon University</affiliation></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last><affiliation>University of Melbourne and Carnegie Mellon University</affiliation></author>
       <author><first>Mirella</first><last>Lapata</last><affiliation>Edinburgh University, University of Edinburgh</affiliation></author>
       <pages>24805-24841</pages>
       <abstract>Opinion summarization plays a key role in deriving meaningful insights from large-scale online reviews. To make the process more explainable and grounded, we propose a domain-agnostic modular approach guided by review aspects (e.g., cleanliness for hotel reviews) which separates the tasks of aspect identification, opinion consolidation, and meta-review synthesis to enable greater transparency and ease of inspection. We conduct extensive experiments across datasets representing scientific research, business, and product domains. Results show that our approach generates more grounded summaries compared to strong baseline models, as verified through automated and human evaluations. Additionally, our modular approach, which incorporates reasoning based on review aspects, produces more informative intermediate outputs than other knowledge-agnostic decomposition approaches. Lastly, we provide empirical results to show that these intermediate outputs can support humans in summarizing opinions from large volumes of reviews.</abstract>
@@ -24442,7 +24442,7 @@
       <author><first>Zili</first><last>Wang</last></author>
       <author><first>Shuigeng</first><last>Zhou</last><affiliation>Fudan University</affiliation></author>
       <author><first>Xiangyu</first><last>Zhang</last></author>
-      <author><first>Heung-Yeung</first><last>Shum</last></author>
+      <author id="heung-yeung-shum"><first>Heung-Yeung</first><last>Shum</last></author>
       <pages>25114-25126</pages>
       <abstract>We propose novel attention architectures, Multi-matrix Factorization Attention (MFA) and MFA-Key-Reuse (MFA-KR). Existing variants for standard Multi-Head Attention (MHA), including SOTA methods like MLA, fail to maintain as strong performance under stringent Key-Value cache (KV cache) constraints. MFA enhances model capacity by efficiently scaling up both the number and dimension of attention heads through low-rank matrix factorization in the Query-Key (QK) circuit. Extending MFA, MFA-KR further reduces memory requirements by repurposing the key cache as value through value projection re-parameterization. MFA’s design enables strong model capacity when working under tight KV cache budget, while MFA-KR is suitable for even harsher KV cache limits with minor performance trade-off. Notably, in our extensive and large-scale experiments, the proposed architecture outperforms MLA and performs comparably to MHA, while reducing KV cache usage by up to 56% and 93.7%, respectively.</abstract>
       <url hash="f8ce57d5">2025.findings-acl.1288</url>
@@ -24546,7 +24546,7 @@
       <author><first>Debjyoti</first><last>Mondal</last><affiliation>Samsung</affiliation></author>
       <author><first>Subhadarshi</first><last>Panda</last><affiliation>Samsung</affiliation></author>
       <author><first>Rituraj</first><last>Singh</last><affiliation>Samsung Research and Development Institute - India, Bengaluru</affiliation></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
       <pages>25250-25269</pages>
       <abstract>Accurately grounding visual and textual elements within mobile user interfaces (UIs) remains a significant challenge for Vision-Language Models (VLMs). Visual grounding, a critical task in this domain, involves identifying the most relevant UI element or region based on a natural language query—a process that requires both precise perception and context-aware reasoning. In this work, we present - **MoUI**, a light-weight mobile UI understanding model trained on **MoIT**, an instruction-tuning dataset specifically tailored for mobile screen understanding and grounding, designed to bridge the gap between user intent and visual semantics. Complementing this dataset, we also present a human-annotated reasoning benchmark **MoIQ** that rigorously evaluates complex inference capabilities over mobile UIs. To harness these resources effectively, we propose a two-stage training approach that separately addresses perception and reasoning tasks, leading to stronger perception capabilities and improvement in reasoning abilities. Through extensive experiments, we demonstrate that our MoUI models achieve significant gains in accuracy across all perception tasks and _state-of-the-art_ results on public reasoning benchmark **ComplexQA (78%) and our MoIQ (49%)**. We will be open-sourcing our dataset, code, and models to foster further research and innovation in the field.</abstract>
       <url hash="cee1e1a7">2025.findings-acl.1295</url>
@@ -24623,7 +24623,7 @@
       <author><first>Zhangchen</first><last>Xu</last></author>
       <author><first>Fengqing</first><last>Jiang</last><affiliation>University of Washington</affiliation></author>
       <author><first>Luyao</first><last>Niu</last><affiliation>University of Washington</affiliation></author>
-      <author><first>Bill Yuchen</first><last>Lin</last><affiliation>xAI and University of Washington</affiliation></author>
+      <author id="bill-yuchen-lin"><first>Bill Yuchen</first><last>Lin</last><affiliation>xAI and University of Washington</affiliation></author>
       <author><first>Bhaskar</first><last>Ramasubramanian</last><affiliation>Western Washington University</affiliation></author>
       <author><first>Radha</first><last>Poovendran</last><affiliation>University of Washington, Seattle</affiliation></author>
       <pages>25366-25394</pages>
@@ -24709,7 +24709,7 @@
       <author><first>Longyue</first><last>Wang</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Longqin</first><last>Jiang</last><affiliation>Universität Hamburg</affiliation></author>
       <author><first>Xingshan</first><last>Li</last></author>
-      <author><first>Chris</first><last>Biemann</last><affiliation>U Hamburg</affiliation></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last><affiliation>U Hamburg</affiliation></author>
       <pages>25507-25522</pages>
       <abstract>Large Language Models (LLMs) achieve remarkable performance through pretraining on extensive data. This enables efficient adaptation to diverse downstream tasks. However, the lack of interpretability in their underlying mechanisms limits the ability to effectively steer LLMs for specific applications. In this work, we investigate the intrinsic mechanisms of LLMs from a cognitive perspective using eye movement measures. Specifically, we analyze the layer-wise correlation between human cognitive indicators and LLM representations. Building on these insights, we propose a heuristic approach for selecting the optimal steering layer to modulate LLM semantics. To this end, we introduce an efficient selective layer intervention based on prominent parameter-efficient fine-tuning methods, which conventionally adjust either all layers or only the final layer. Additionally, we present an implicit layer contrastive intervention during inference to steer LLMs away from toxic outputs. Extensive experiments on natural language understanding, reasoning, and generation tasks, conducted on GPT-2, LLaMa2-7B, and Mixtral-7B, demonstrate the effectiveness and efficiency of our approach. As a model-agnostic framework, it enhances the interpretability of LLMs while improving efficiency for safe deployment.</abstract>
       <url hash="fd0357df">2025.findings-acl.1308</url>
@@ -24735,7 +24735,7 @@
       <author><first>Zihao</first><last>Yu</last></author>
       <author><first>Sam</first><last>Pan</last></author>
       <author><first>Zhun</first><last>Wang</last><affiliation>University of California, Berkeley</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>University of California, Santa Cruz</affiliation></author>
+      <author><first>Yang</first><last>Liu</last><affiliation>University of California, Santa Cruz</affiliation></author>
       <author><first>Dawn</first><last>Song</last><affiliation>University of California Berkeley</affiliation></author>
       <author><first>Chenguang</first><last>Wang</last><affiliation>Washington University, Saint Louis</affiliation></author>
       <pages>25534-25553</pages>
@@ -24799,7 +24799,7 @@
     <paper id="1315">
       <title><fixed-case>IMPARA</fixed-case>-<fixed-case>GED</fixed-case>: Grammatical Error Detection is Boosting Reference-free Grammatical Error Quality Estimator</title>
       <author><first>Yusuke</first><last>Sakai</last><affiliation>Nara Institute of Science and Technology, Japan</affiliation></author>
-      <author><first>Takumi</first><last>Goto</last><affiliation>Nara Institute of Science and Technology, Japan</affiliation></author>
+      <author id="takumi-goto"><first>Takumi</first><last>Goto</last><affiliation>Nara Institute of Science and Technology, Japan</affiliation></author>
       <author><first>Taro</first><last>Watanabe</last><affiliation>Nara Institute of Science and Technology, Japan</affiliation></author>
       <pages>25647-25654</pages>
       <abstract>We propose IMPARA-GED, a novel reference-free automatic grammatical error correction (GEC) evaluation method with grammatical error detection (GED) capabilities. We focus on the quality estimator of IMPARA, an existing automatic GEC evaluation method, and construct that of IMPARA-GED using a pre-trained language model with enhanced GED capabilities. Experimental results on SEEDA, a meta-evaluation dataset for automatic GEC evaluation methods, demonstrate that IMPARA-GED achieves the highest correlation with human sentence-level evaluations.</abstract>
@@ -24813,7 +24813,7 @@
       <author><first>Bingbing</first><last>Wen</last><affiliation>University of Washington</affiliation></author>
       <author><first>Bin</first><last>Han</last><affiliation>University of Washington</affiliation></author>
       <author><first>Robert</first><last>Wolfe</last></author>
-      <author><first>Lucy Lu</first><last>Wang</last><affiliation>University of Washington and Allen Institute for Artificial Intelligence</affiliation></author>
+      <author id="lucy-lu-wang"><first>Lucy Lu</first><last>Wang</last><affiliation>University of Washington and Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Bill</first><last>Howe</last><affiliation>University of Washington</affiliation></author>
       <pages>25655-25672</pages>
       <abstract>Psychology research has shown that humans are poor at estimating their performance on tasks, tending towards underconfidence on easy tasks and overconfidence on difficult tasks. We examine three LLMs, Llama-3-70B-instruct, Claude-3-Sonnet, and GPT-4o, on a range of QA tasks of varying difficulty, and show that models exhibit subtle differences from human patterns of overconfidence: less sensitive to task difficulty, and when prompted to answer based on different personas—e.g., expert vs layman, or different race, gender, and ages—the models will respond with stereotypically biased confidence estimations even though their underlying answer accuracy remains the same. Based on these observations, we propose Answer-Free Confidence Estimation (AFCE) to improve confidence calibration and LLM interpretability in these settings. AFCE is a self-assessment method that employs two stages of prompting, first eliciting only confidence scores on questions, then asking separately for the answer. Experiments on the MMLU and GPQA datasets spanning subjects and difficulty show that this separation of tasks significantly reduces overconfidence and delivers more human-like sensitivity to task difficulty.</abstract>
@@ -24915,7 +24915,7 @@
     <paper id="1323">
       <title>Just Put a Human in the Loop? Investigating <fixed-case>LLM</fixed-case>-Assisted Annotation for Subjective Tasks</title>
       <author><first>Hope</first><last>Schroeder</last></author>
-      <author><first>Deb</first><last>Roy</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
+      <author id="deb-roy"><first>Deb</first><last>Roy</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <author><first>Jad</first><last>Kabbara</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <pages>25771-25795</pages>
       <abstract>LLM use in annotation is becoming widespread, and given LLMs’ overall promising performance and speed, putting humans in the loop to simply “review” LLM annotations can be tempting. In subjective tasks with multiple plausible answers, this can impact both evaluation of LLM performance, and analysis using these labels in a social science task downstream. In a pre-registered experiment with 350 unique annotators and 7,000 annotations across 4 conditions, 2 models, and 2 datasets, we find that presenting crowdworkers with LLM-generated annotation suggestions did not make them faster annotators, but did improve their self-reported confidence in the task. More importantly, annotators strongly took the LLM suggestions, significantly changing the label distribution compared to the baseline. We show that when these labels created with LLM assistance are used to evaluate LLM performance, reported model performance significantly increases. We show how changes in label distributions as a result of LLM assistance can affect conclusions drawn by analyzing even “human-approved” LLM-annotated datasets. We believe our work underlines the importance of understanding the impact of LLM-assisted annotation on subjective, qualitative tasks, on the creation of gold data for training and testing, and on the evaluation of NLP systems on subjective tasks.</abstract>
@@ -25227,7 +25227,7 @@
       <author><first>Sophia</first><last>Horng</last><affiliation>Columbia University</affiliation></author>
       <author><first>Maximillian</first><last>Chen</last><affiliation>Google</affiliation></author>
       <author><first>Kung-Hsiang</first><last>Huang</last><affiliation>SalesForce.com</affiliation></author>
-      <author><first>Shih-Fu</first><last>Chang</last><affiliation>Columbia University and Columbia University</affiliation></author>
+      <author id="shih-fu-chang"><first>Shih-Fu</first><last>Chang</last><affiliation>Columbia University and Columbia University</affiliation></author>
       <pages>26239-26256</pages>
       <abstract>Tabular data is used to store information in many real-world systems ranging from finance to healthcare. However, such structured data is often communicated to humans in visually interpretable formats (e.g. charts and textual paragraphs), making it imperative that fact-checking models should be able to reason over multiple pieces of structured evidence presented across different modalities. In this paper, we propose Multi-Document Multi-Modal Table-based Fact Verification (M<tex-math>^{2}</tex-math>-TabFact), a challenging fact verification task that requires jointly reasoning over visual and textual representations of structured data. We design an automatic data generation pipeline that converts existing tabular data into descriptive visual and textual evidence. We then use Large Language Models to generate complex claims that depend on multi-document, multi-modal evidence. In total, we create 8,856 pairs of complex claims and multi-modal evidence through this procedure and systematically evaluate M<tex-math>^{2}</tex-math>-TabFact with a set of strong vision-language models (VLM). We find that existing VLMs have large gaps in fact verification performance compared to humans. Moreover, we find that they are imbalanced when it comes to their ability to handle reason about different modalities, and currently struggle to reason about information extracted from multiple documents.</abstract>
       <url hash="0ec31c3b">2025.findings-acl.1345</url>
@@ -25248,9 +25248,9 @@
     <paper id="1347">
       <title><fixed-case>PLAY</fixed-case>2<fixed-case>PROMPT</fixed-case>: Zero-shot Tool Instruction Optimization for <fixed-case>LLM</fixed-case> Agents via Tool Play</title>
       <author><first>Wei</first><last>Fang</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
-      <author id="yang-zhang"><first>Yang</first><last>Zhang</last><affiliation>International Business Machines</affiliation></author>
+      <author><first>Yang</first><last>Zhang</last><affiliation>International Business Machines</affiliation></author>
       <author><first>Kaizhi</first><last>Qian</last><affiliation>International Business Machines</affiliation></author>
-      <author><first>James R.</first><last>Glass</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
+      <author id="james-glass"><first>James R.</first><last>Glass</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <author><first>Yada</first><last>Zhu</last></author>
       <pages>26274-26290</pages>
       <abstract>Large language models (LLMs) are increasingly integrated with specialized external tools, yet many tasks demand zero-shot tool usage with minimal or noisy documentation. Existing solutions rely on manual rewriting or labeled data for validation, making them inapplicable in true zero-shot settings. To address these challenges, we propose PLAY2PROMPT, an automated framework that systematically “plays” with each tool to explore its input-output behaviors. Through this iterative trial-and-error process, PLAY2PROMPT refines tool documentation and generates usage examples without any labeled data. These examples not only guide LLM inference but also serve as validation to further enhance tool utilization. Extensive experiments on real-world tasks demonstrate that PLAY2PROMPT significantly improves zero-shot tool performance across both open and closed models, offering a scalable and effective solution for domain-specific tool integration.</abstract>
@@ -25277,7 +25277,7 @@
       <author><first>Juhyun</first><last>Oh</last><affiliation>Korea Advanced Institute of Science &amp; Technology</affiliation></author>
       <author><first>Eunsu</first><last>Kim</last></author>
       <author><first>Hoyun</first><last>Song</last><affiliation>Korea Advanced Institute of Science &amp; Technology</affiliation></author>
-      <author><first>Alice</first><last>Oh</last><affiliation>Google and Korea Advanced Institute of Science and Technology</affiliation></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last><affiliation>Google and Korea Advanced Institute of Science and Technology</affiliation></author>
       <pages>26312-26332</pages>
       <abstract>Ensuring persona fidelity in large language models (LLMs) is essential for maintaining coherent and engaging human-AI interactions. However, LLMs often exhibit Out-of-Character (OOC) behavior, where generated responses deviate from an assigned persona, leading to inconsistencies that affect model reliability. Existing evaluation methods typically assign single scores to entire responses, struggling to capture subtle persona misalignment, particularly in long-form text generation. To address this limitation, we propose an atomic-level evaluation framework that quantifies persona fidelity at a finer granularity. Our three key metrics measure the degree of persona alignment and consistency within and across generations. Our approach enables a more precise and realistic assessment of persona fidelity by identifying subtle deviations that real users would encounter. Through our experiments, we demonstrate that our framework effectively detects persona inconsistencies that prior methods overlook. By analyzing persona fidelity across diverse tasks and personality types, we reveal how task structure and persona desirability influence model adaptability, highlighting challenges in maintaining consistent persona expression.</abstract>
       <url hash="1bbe3493">2025.findings-acl.1349</url>
@@ -25385,7 +25385,7 @@
       <author><first>Seungone</first><last>Kim</last></author>
       <author><first>Niklas</first><last>Muennighoff</last><affiliation>Stanford University, Contextual AI and Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Dongkwan</first><last>Kim</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
-      <author><first>Alice</first><last>Oh</last><affiliation>Google and Korea Advanced Institute of Science and Technology</affiliation></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last><affiliation>Google and Korea Advanced Institute of Science and Technology</affiliation></author>
       <pages>26456-26493</pages>
       <abstract>We introduce LLM-as-an-Interviewer, a novel paradigm for evaluating large language models (LLMs). This approach leverages multi-turn interactions where the LLM interviewer actively provides feedback on responses and poses follow-up questions to the evaluated LLM. At the start of the interview, the LLM interviewer dynamically modifies datasets to generate initial questions, mitigating data contamination. We apply the LLM-as-an-Interviewer framework to evaluate six models on the reasoning, factuality and instruction-following tasks. Our results show that the framework effectively provides insights into LLM performance, including the quality of initial responses, adaptability to feedback, and ability to address follow-up queries like clarification or additional knowledge requests. The framework also addresses key limitations of conventional methods like LLM-as-a-Judge, including verbosity bias and inconsistency across runs. Finally, we propose the Interview Report, which aggregates insights from the interview process, providing examples and a comprehensive analysis of the LLM’s strengths and weaknesses. This report offers a detailed snapshot of the model’s real-world applicability.</abstract>
       <url hash="c539fd5f">2025.findings-acl.1357</url>
@@ -25431,7 +25431,7 @@
       <title>Rethinking Prompt-based Debiasing in Large Language Model</title>
       <author><first>Xinyi</first><last>Yang</last></author>
       <author><first>Runzhe</first><last>Zhan</last><affiliation>University of Macau</affiliation></author>
-      <author id="shu-yang"><first>Shu</first><last>Yang</last></author>
+      <author><first>Shu</first><last>Yang</last></author>
       <author><first>Junchao</first><last>Wu</last><affiliation>University of Macau</affiliation></author>
       <author><first>Lidia S.</first><last>Chao</last><affiliation>University of Macau</affiliation></author>
       <author><first>Derek F.</first><last>Wong</last><affiliation>University of Macau</affiliation></author>
@@ -25461,7 +25461,7 @@
       <author><first>Oktie</first><last>Hassanzadeh</last><affiliation>International Business Machines</affiliation></author>
       <author><first>Dharmashankar</first><last>Subramanian</last><affiliation>International Business Machines</affiliation></author>
       <author><first>Sola</first><last>Shirai</last><affiliation>International Business Machines</affiliation></author>
-      <author><first>Alfio</first><last>Gliozzo</last><affiliation>International Business Machines</affiliation></author>
+      <author id="alfio-gliozzo"><first>Alfio</first><last>Gliozzo</last><affiliation>International Business Machines</affiliation></author>
       <author><first>Debarun</first><last>Bhattacharjya</last><affiliation>International Business Machines</affiliation></author>
       <pages>26569-26583</pages>
       <abstract>Text-to-SQL aims to translate natural language queries into SQL statements, which is practical as it enables anyone to easily retrieve the desired information from databases. Recently, many existing approaches tackle this problem with Large Language Models (LLMs), leveraging their strong capability in understanding user queries and generating corresponding SQL code. Yet, the parametric knowledge in LLMs might be limited to covering all the diverse and domain-specific queries that require grounding in various database schemas, which makes generated SQLs less accurate oftentimes. To tackle this, we propose constructing the knowledge base for text-to-SQL, a foundational source of knowledge, from which we retrieve and generate the necessary knowledge for given queries. In particular, unlike existing approaches that either manually annotate knowledge or generate only a few pieces of knowledge for each query, our knowledge base is comprehensive, which is constructed based on a combination of all the available questions and their associated database schemas along with their relevant knowledge, and can be reused for unseen databases from different datasets and domains. We validate our approach on multiple text-to-SQL datasets, considering both the overlapping and non-overlapping database scenarios, where it outperforms relevant baselines substantially.</abstract>
@@ -25498,7 +25498,7 @@
       <title>From Complexity to Clarity: <fixed-case>AI</fixed-case>/<fixed-case>NLP</fixed-case>’s Role in Regulatory Compliance</title>
       <author><first>Jivitesh</first><last>Jain</last><affiliation>School of Computer Science, Carnegie Mellon University</affiliation></author>
       <author><first>Nivedhitha</first><last>Dhanasekaran</last></author>
-      <author><first>Mona T.</first><last>Diab</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="mona-diab"><first>Mona T.</first><last>Diab</last><affiliation>Carnegie Mellon University</affiliation></author>
       <pages>26629-26641</pages>
       <abstract>Regulatory data compliance is a cornerstone of trust and accountability in critical sectors like finance, healthcare, and technology, yet its complexity poses significant challenges for organizations worldwide. Recent advances in natural language processing, particularly large language models, have demonstrated remarkable capabilities in text analysis and reasoning, offering promising solutions for automating compliance processes. This survey examines the current state of automated data compliance, analyzing key challenges and approaches across problem areas. We identify critical limitations in current datasets and techniques, including issues of adaptability, completeness, and trust. Looking ahead, we propose research directions to address these challenges, emphasizing standardized evaluation frameworks and balanced human-AI collaboration.</abstract>
       <url hash="70bb88dd">2025.findings-acl.1366</url>
@@ -25545,7 +25545,7 @@
     </paper>
     <paper id="1370">
       <title>Standard Quality Criteria Derived from Current <fixed-case>NLP</fixed-case> Evaluations for Guiding Evaluation Design and Grounding Comparability and <fixed-case>AI</fixed-case> Compliance Assessments</title>
-      <author><first>Anya</first><last>Belz</last><affiliation>Dublin City University</affiliation></author>
+      <author id="anja-belz"><first>Anya</first><last>Belz</last><affiliation>Dublin City University</affiliation></author>
       <author><first>Simon</first><last>Mille</last></author>
       <author><first>Craig</first><last>Thomson</last><affiliation>Dublin City University and University of Aberdeen</affiliation></author>
       <pages>26685-26715</pages>
@@ -25673,7 +25673,7 @@
       <author><first>Amir Hossein</first><last>Kargaran</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <author><first>Yihong</first><last>Liu</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <author><first>François</first><last>Yvon</last><affiliation>ISIR, Sorbonne Université &amp; CNRS</affiliation></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <pages>26905-26917</pages>
       <abstract>Several studies have explored the mechanisms of large language models (LLMs) in coding tasks, but most have focused on programming languages (PLs) in a monolingual setting. In this paper, we investigate the relationship between multiple PLs and English in the concept space of LLMs. We perform a few-shot translation task on 21 PL pairs using two Llama-based models. By decoding the embeddings of intermediate layers during this task, we observe that the concept space is closer to English (including PL keywords) and assigns high probabilities to English tokens in the second half of the intermediate layers. We analyze neuron activations for 11 PLs and English, finding that while language-specific neurons are primarily concentrated in the bottom layers, those exclusive to each PL tend to appear in the top layers. For PLs that are highly aligned with multiple other PLs, identifying language-specific neurons is not feasible. These PLs also tend to have a larger keyword set than other PLs and are closer to the model’s concept space regardless of the input/output PL in the translation task. Our findings provide insights into how LLMs internally represent PLs, revealing structural patterns in the model’s concept space. Code is available at https://github.com/cisnlp/code-specific-neurons.</abstract>
       <url hash="9eca3621">2025.findings-acl.1379</url>
@@ -25749,7 +25749,7 @@
       <author><first>Nafiseh</first><last>Nikeghbal</last><affiliation>Technische Universität München</affiliation></author>
       <author><first>Jana</first><last>Diesner</last><affiliation>Technische Universität München</affiliation></author>
       <author><first>François</first><last>Yvon</last><affiliation>ISIR, Sorbonne Université &amp; CNRS</affiliation></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <pages>27001-27023</pages>
       <abstract>English-centric large language models (LLMs) often show strong multilingual capabilities. However, their multilingual performance remains unclear and is under-evaluated for many other languages. Most benchmarks for multilinguality focus on classic NLP tasks or cover a minimal number of languages. We introduce MEXA, a method for assessing the multilingual capabilities of pre-trained English-centric LLMs using parallel sentences, which are available for more languages than existing downstream tasks. MEXA leverages that English-centric LLMs use English as a pivot language in their intermediate layers. MEXA computes the alignment between English and non-English languages using parallel sentences to evaluate the transfer of language understanding from English to other languages. This alignment can be used to estimate model performance in different languages. We conduct controlled experiments using various parallel datasets (FLORES-200 and Bible), models (Llama family, Gemma family, Mistral, and OLMo), and established downstream tasks (Belebele, m-MMLU, and m-ARC). We explore different methods to compute embeddings in decoder-only models. Our results show that MEXA, in its default settings, achieves an average Pearson correlation of 0.90 between its predicted scores and actual task performance across languages. This suggests that MEXA is a reliable method for estimating the multilingual capabilities of English-centric LLMs, providing a clearer understanding of their multilingual potential and the inner workings of LLMs. Leaderboard: https://cis-lmu-mexa.hf.space, Code: https://github.com/cisnlp/MEXA.</abstract>
       <url hash="b43fd047">2025.findings-acl.1385</url>
diff --git a/data/xml/2025.finnlp.xml b/data/xml/2025.finnlp.xml
index 3889e5c08d..f6122c7be1 100644
--- a/data/xml/2025.finnlp.xml
+++ b/data/xml/2025.finnlp.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the Joint Workshop of the 9th Financial Technology and Natural Language Processing (FinNLP), the 6th Financial Narrative Processing (FNP), and the 1st Workshop on Large Language Models for Finance and Legal (LLMFinLegal)</booktitle>
       <editor><first>Chung-Chi</first><last>Chen</last></editor>
-      <editor><first>Antonio</first><last>Moreno-Sandoval</last></editor>
+      <editor id="antonio-moreno-sandoval"><first>Antonio</first><last>Moreno-Sandoval</last></editor>
       <editor><first>Jimin</first><last>Huang</last></editor>
       <editor><first>Qianqian</first><last>Xie</last></editor>
       <editor><first>Sophia</first><last>Ananiadou</last></editor>
@@ -114,7 +114,7 @@
     <paper id="8">
       <title>Concept-Based <fixed-case>RAG</fixed-case> Models: A High-Accuracy Fact Retrieval Approach</title>
       <author><first>Cheng-Yu</first><last>Lin</last></author>
-      <author><first>Jyh-Shing</first><last>Jang</last></author>
+      <author id="jyh-shing-roger-jang"><first>Jyh-Shing</first><last>Jang</last></author>
       <pages>96–100</pages>
       <abstract>This study introduces a concept-based methodology to optimize Retrieval-Augmented Generation (RAG) tasks by assessing dataset certainty using entropy-based metrics and concept extraction techniques. Unlike traditional methods focused on reducing LLM hallucinations or modifying data structures, this approach evaluates inherent knowledge uncertainty from an LLM perspective. By pre-processing documents with LLMs, the concept-based method significantly enhances precision in tasks demanding high accuracy, such as legal, finance, or formal document responses.</abstract>
       <url hash="7df06c80">2025.finnlp-1.8</url>
@@ -147,7 +147,7 @@
     <paper id="11">
       <title>Forecasting Credit Ratings: A Case Study where Traditional Methods Outperform Generative <fixed-case>LLM</fixed-case>s</title>
       <author><first>Felix</first><last>Drinkall</last></author>
-      <author><first>Janet B.</first><last>Pierrehumbert</last></author>
+      <author id="janet-pierrehumbert"><first>Janet B.</first><last>Pierrehumbert</last></author>
       <author><first>Stefan</first><last>Zohren</last></author>
       <pages>118–133</pages>
       <abstract>Large Language Models (LLMs) have been shown to perform well for many downstream tasks. Transfer learning can enable LLMs to acquire skills that were not targeted during pre-training. In financial contexts, LLMs can sometimes beat well-established benchmarks. This paper investigates how well LLMs perform at forecasting corporate credit ratings. We show that while LLMs are very good at encoding textual information, traditional methods are still very competitive when it comes to encoding numeric and multimodal data. For our task, current LLMs perform worse than a more traditional XGBoost architecture that combines fundamental and macroeconomic data with high-density text-based embedding features. We investigate the degree to which the text encoding methodology affects performance and interpretability.</abstract>
@@ -214,7 +214,7 @@
       <author><first>Mohamed</first><last>Ettaleb</last></author>
       <author><first>Mouna</first><last>Kamel</last></author>
       <author><first>Nathalie</first><last>Aussenac-Gilles</last></author>
-      <author><first>Véronique</first><last>Moriceau</last></author>
+      <author id="veronique-moriceau"><first>Véronique</first><last>Moriceau</last></author>
       <pages>175–183</pages>
       <abstract>Relation Extraction (RE) is a fundamental task in natural language processing, aimed at deducing semantic relationships between entities in a text. Traditional supervised extraction methods relation extraction methods involve training models to annotate tokens representing entity mentions, followed by predicting the relationship between these entities. However, recent advancements have transformed this task into a sequence-to-sequence problem. This involves converting relationships between entities into target string, which are then generated from the input text. Thus, language models now appear as a solution to this task and have already been used in numerous studies, with various levels of refinement, across different domains. The objective of the present study is to evaluate the contribution of large language models (LLM) to the task of relation extraction in a specific domain (in this case, the economic domain), compared to smaller language models. To do this, we considered as a baseline a model based on the BERT architecture, trained in this domain, and four LLM, namely FinGPT specific to the financial domain, XLNet, ChatGLM, and Llama3, which are generalists. All these models were evaluated on the same extraction task, with zero-shot for the general-purpose LLM, as well as refinements through few-shot learning and fine-tuning. The experiments showedthat the best performance in terms of F-score was achieved with fine-tuned LLM, with Llama3 achieving the highest performance.</abstract>
       <url hash="41613581">2025.finnlp-1.17</url>
@@ -242,7 +242,7 @@
       <title><fixed-case>AMWAL</fixed-case>: Named Entity Recognition for <fixed-case>A</fixed-case>rabic Financial News</title>
       <author><first>Muhammad S.</first><last>Abdo</last></author>
       <author><first>Yash</first><last>Hatekar</last></author>
-      <author><first>Damir</first><last>Cavar</last></author>
+      <author id="damir-cavar"><first>Damir</first><last>Cavar</last></author>
       <pages>207–213</pages>
       <abstract>Financial Named Entity Recognition (NER) presents a pivotal task in extracting structured information from unstructured financial data, especially when extending its application to languages beyond English. In this paper, we present AMWAL, a named entity recognition system for Arabic financial news. Our approach centered on building a specialized corpus compiled from three major Arabic financial newspapers spanning from 2000 to 2023. Entities were extracted from this corpus using a semi-automatic process that included manual annotation and review to ensure accuracy. The total number of entities identified amounts to 17.1k tokens, distributed across 20 categories, providing a comprehensive coverage of financial entities. To standardize the identified entities, we adopt financial concepts from the Financial Industry Business Ontology (FIBO, 2020), aligning our framework with industry standards. The significance of our work lies not only in the creation of the first customized NER system for Arabic financial data but also in its potential to streamline information extraction processes in the financial domain. Our NER system achieves a Precision score of 96.08, a Recall score of 95.87, and an F1 score of 95.97, which outperforms state-of-the-art general Arabic NER systems as well as other systems for financial NER in other languages.</abstract>
       <url hash="5ba1b293">2025.finnlp-1.20</url>
@@ -251,7 +251,7 @@
     <paper id="21">
       <title>The Financial Document Causality Detection Shared Task (<fixed-case>F</fixed-case>in<fixed-case>C</fixed-case>ausal 2025)</title>
       <author><first>Antonio</first><last>Moreno-Sandoval</last></author>
-      <author><first>Jordi</first><last>Porta</last></author>
+      <author id="jordi-porta-zamorano"><first>Jordi</first><last>Porta</last></author>
       <author><first>Blanca</first><last>Carbajo-Coronado</last></author>
       <author><first>Yanco</first><last>Torterolo</last></author>
       <author><first>Doaa</first><last>Samy</last></author>
@@ -285,8 +285,8 @@
     <paper id="24">
       <title><fixed-case>CLRG</fixed-case>@<fixed-case>F</fixed-case>in<fixed-case>C</fixed-case>ausal2025: Cause-Effect Extraction in Finance Domain</title>
       <author><first>Vibhavkrishnan</first><last>K S</last></author>
-      <author><first>Pattabhi</first><last>RK Rao</last></author>
-      <author><first>Sobha</first><last>Lalitha Devi</last></author>
+      <author id="pattabhi-rk-rao"><first>Pattabhi</first><last>RK Rao</last></author>
+      <author id="sobha-lalitha-devi"><first>Sobha</first><last>Lalitha Devi</last></author>
       <pages>236–241</pages>
       <abstract>This paper presents our work on Cause-Effect information extraction specifically in the financial domain. Cause and effect information is very much needed for expert decision making. Particularly, in the financial domain, the fund managers, financial analysts, etc. need to have the information on cause-effects for their works. Natural Language Processing (NLP) techniques help in the automatic extraction of cause and effect from a given text. In this work, we build various cause-effect text span detection models using pre-trained transformer-based language models and fine tune these models using the data provided by FinCausal 2025 task organizers. We have only used FinCausal 2025 data sets to train our models. No other external data is used. Our ensemble of sequence tagging models based on the Fine-tuned RoBERTa-Large language model achieves SAS score of 0.9604 and Exact match score of 0.7214 for English. Similarly for Spanish we obtain SAS score of 0.9607 and Exact match score of 0.7166. This is our first time participation in the FinCausal 2025 Task.</abstract>
       <url hash="5778b861">2025.finnlp-1.24</url>
diff --git a/data/xml/2025.gebnlp.xml b/data/xml/2025.gebnlp.xml
index 0b67c10527..c4fb1ee11e 100644
--- a/data/xml/2025.gebnlp.xml
+++ b/data/xml/2025.gebnlp.xml
@@ -3,7 +3,7 @@
   <volume id="1" ingest-date="2025-07-21" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 6th Workshop on Gender Bias in Natural Language Processing (GeBNLP)</booktitle>
-      <editor><first>Agnieszka</first><last>Faleńska</last></editor>
+      <editor id="agnieszka-falenska"><first>Agnieszka</first><last>Faleńska</last></editor>
       <editor><first>Christine</first><last>Basta</last></editor>
       <editor><first>Marta</first><last>Costa-jussà</last></editor>
       <editor><first>Karolina</first><last>Stańczak</last></editor>
@@ -113,7 +113,7 @@
       <author><first>Hadi</first><last>Mohammadi</last></author>
       <author><first>Tina</first><last>Shahedi</last><affiliation>University of Tehran, University of Tehran</affiliation></author>
       <author><first>Pablo</first><last>Mosteiro</last><affiliation>Utrecht University</affiliation></author>
-      <author><first>Massimo</first><last>Poesio</last><affiliation>Utrecht University and Queen Mary, University of London</affiliation></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last><affiliation>Utrecht University and Queen Mary, University of London</affiliation></author>
       <author><first>Ayoub</first><last>Bagheri</last><affiliation>Utrecht University</affiliation></author>
       <author><first>Anastasia</first><last>Giachanou</last><affiliation>Utrecht University</affiliation></author>
       <pages>92-104</pages>
@@ -164,7 +164,7 @@
       <author><first>Shalaka</first><last>Satheesh</last><affiliation>Fraunhofer Institute IAIS, Fraunhofer IAIS</affiliation></author>
       <author><first>Katrin</first><last>Klug</last><affiliation>Fraunhofer Institute IAIS, Fraunhofer IAIS</affiliation></author>
       <author><first>Katharina</first><last>Beckh</last><affiliation>Fraunhofer Institute IAIS, Fraunhofer IAIS</affiliation></author>
-      <author><first>Héctor</first><last>Allende-Cid</last><affiliation>Fraunhofer Institute IAIS, Fraunhofer IAIS</affiliation></author>
+      <author id="hector-allende-cid"><first>Héctor</first><last>Allende-Cid</last><affiliation>Fraunhofer Institute IAIS, Fraunhofer IAIS</affiliation></author>
       <author><first>Sebastian</first><last>Houben</last><affiliation>Hochschule Bonn-Rhein-Sieg</affiliation></author>
       <author><first>Teena</first><last>Hassan</last><affiliation>Hochschule Bonn-Rhein-Sieg</affiliation></author>
       <pages>137-148</pages>
@@ -223,7 +223,7 @@
       <title>Bias Attribution in <fixed-case>F</fixed-case>ilipino Language Models: Extending a Bias Interpretability Metric for Application on Agglutinative Languages</title>
       <author><first>Lance Calvin Lim</first><last>Gamboa</last><affiliation>University of Birmingham and Ateneo de Manila University</affiliation></author>
       <author><first>Yue</first><last>Feng</last><affiliation>University of Birmingham</affiliation></author>
-      <author><first>Mark G.</first><last>Lee</last></author>
+      <author id="mark-lee"><first>Mark G.</first><last>Lee</last></author>
       <pages>195-205</pages>
       <abstract>Emerging research on bias attribution and interpretability have revealed how tokens contribute to biased behavior in language models processing English texts. We build on this line of inquiry by adapting the information-theoretic bias attribution score metric for implementation on models handling agglutinative languages—particularly Filipino. We then demonstrate the effectiveness of our adapted method by using it on a purely Filipino model and on three multilingual models—one trained on languages worldwide and two on Southeast Asian data. Our results show that Filipino models are driven towards bias by words pertaining to <tex-math>\textit{people}</tex-math>, <tex-math>\textit{objects}</tex-math>, and <tex-math>\textit{relationships}</tex-math>—entity-based themes that stand in contrast to the action-heavy nature of bias-contributing themes in English (i.e., <tex-math>\textit{criminal}</tex-math>, <tex-math>\textit{sexual}</tex-math>, and <tex-math>\textit{prosocial}</tex-math> behaviors). These findings point to differences in how English and non-English models process inputs linked to sociodemographic groups and bias.</abstract>
       <url hash="79b1ac24">2025.gebnlp-1.19</url>
@@ -404,7 +404,7 @@
       <author><first>Carleigh</first><last>Wood</last></author>
       <author><first>Bokai</first><last>Yu</last><affiliation>Meta AI</affiliation></author>
       <author><first>Christophe</first><last>Ropers</last><affiliation>Meta</affiliation></author>
-      <author><first>Marta R.</first><last>Costa-jussà</last><affiliation>Meta</affiliation></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last><affiliation>Meta</affiliation></author>
       <pages>403-426</pages>
       <abstract>In the current landscape of automatic language generation, there is a need to understand, evaluate, and mitigate demographic biases, as existing models are becoming increasingly multilingual. To address this, we present the initial eight languages from the Massive Multilingual Holistic Bias (MMHB) dataset and benchmark consisting of approximately 6 million sentences. The sentences are designed to induce biases towards different groups of people which can yield significant results when using them as a benchmark to test different text generation models. To further scale up in terms of both language coverage and size and to leverage limited human translation, we use systematic approach to independently translate sentence parts. This technique carefully designs a structure to dynamically generate multiple sentence variations and significantly reduces the human translation workload. The translation process has been meticulously conducted to avoid an English-centric perspective and include all necessary morphological variations for languages that require them, improving from the original English HOLISTICBIAS. Finally, we utilize MMHB to report results on gender bias and added toxicity in MT tasks.</abstract>
       <url hash="7a110326">2025.gebnlp-1.35</url>
@@ -416,7 +416,7 @@
       <author><first>Kristin</first><last>Gnadt</last></author>
       <author><first>David</first><last>Thulke</last><affiliation>RWTH Aachen University and AppTek</affiliation></author>
       <author><first>Simone</first><last>Kopeinik</last><affiliation>Know Center GmbH</affiliation></author>
-      <author><first>Ralf</first><last>Schlüter</last><affiliation>AppTek GmbH and Rheinisch Westfälische Technische Hochschule Aachen</affiliation></author>
+      <author id="ralf-schlueter"><first>Ralf</first><last>Schlüter</last><affiliation>AppTek GmbH and Rheinisch Westfälische Technische Hochschule Aachen</affiliation></author>
       <pages>427-450</pages>
       <abstract>In recent years, various methods have been proposed to evaluate gender bias in large language models (LLMs). A key challenge lies in the transferability of bias measurement methods initially developed for the English language when applied to other languages. This work aims to contribute to this research strand by presenting five German datasets for gender bias evaluation in LLMs. The datasets are grounded in well-established concepts of gender bias and are accessible through multiple methodologies. Our findings, reported for eight multilingual LLM models, reveal unique challenges associated with gender bias in German, including the ambiguous interpretation of male occupational terms and the influence of seemingly neutral nouns on gender perception. This work contributes to the understanding of gender bias in LLMs across languages and underscores the necessity for tailored evaluation frameworks.</abstract>
       <url hash="e953dc90">2025.gebnlp-1.37</url>
diff --git a/data/xml/2025.gem.xml b/data/xml/2025.gem.xml
index b7accde96b..f4cb76b95e 100644
--- a/data/xml/2025.gem.xml
+++ b/data/xml/2025.gem.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM²)</booktitle>
       <editor><first>Ofir</first><last>Arviv</last><affiliation>IBM Research</affiliation></editor>
-      <editor><first>Miruna</first><last>Clinciu</last><affiliation>Heriot Watt University</affiliation></editor>
+      <editor id="miruna-clinciu"><first>Miruna</first><last>Clinciu</last><affiliation>Heriot Watt University</affiliation></editor>
       <editor><first>Kaustubh</first><last>Dhole</last><affiliation>Emory University</affiliation></editor>
       <editor><first>Rotem</first><last>Dror</last><affiliation>University of Haifa</affiliation></editor>
       <editor><first>Sebastian</first><last>Gehrmann</last><affiliation>Bloomberg</affiliation></editor>
@@ -66,7 +66,7 @@
       <author><first>Sualeha</first><last>Farid</last><affiliation>University of Michigan - Ann Arbor</affiliation></author>
       <author><first>Abdul Hameed</first><last>Azeemi</last></author>
       <author><first>Awais</first><last>Athar</last><affiliation>European Bioinformatics Institute - European Molecular Biology Laboratory (EMBL-EBI)</affiliation></author>
-      <author><first>Agha Ali</first><last>Raza</last><affiliation>Lahore University of Management Sciences</affiliation></author>
+      <author id="agha-ali-raza"><first>Agha Ali</first><last>Raza</last><affiliation>Lahore University of Management Sciences</affiliation></author>
       <pages>30-45</pages>
       <abstract>This paper presents a novel methodology for generating synthetic Preference Optimization (PO) datasets using multi-model workflows. We evaluate the effectiveness and potential of these workflows in automating and enhancing the dataset generation process. PO dataset generation requires two modules: (1) <tex-math>\textit{response evaluation}</tex-math>, and (2) <tex-math>\textit{response generation}</tex-math>. In the <tex-math>\textit{response evaluation}</tex-math> module, the responses from Large Language Models (LLMs) are evaluated and ranked - a task typically carried out by human annotators that we automate using LLMs. We assess the response evaluation module in a 2 step process. In step 1, we assess LLMs as evaluators using three distinct prompting strategies. In step 2, we apply the winning prompting strategy to compare the performance of LLM-as-a-Judge, LLMs-as-a-Jury, and LLM Debate. Our evaluation shows that GPT-4o-as-a-Judge is more consistent across all datasets. For the <tex-math>\textit{response generation}</tex-math> module, we use the identified LLM evaluator configuration and compare different configurations of the LLM Feedback Loop. We use the win rate to determine the best multi-model configuration for generation. Experimenting with various configurations, we find that the LLM Feedback Loop, with Llama as the generator and Gemma as the reviewer, achieves a notable 71.8% and 73.8% win rate over single-model Llama and Gemma, respectively. After identifying the best configurations for both modules, we generate our PO datasets using the above pipeline.</abstract>
       <url hash="2f063e2a">2025.gem-1.4</url>
@@ -90,7 +90,7 @@
     </paper>
     <paper id="6">
       <title><fixed-case>HEDS</fixed-case> 3.0: The Human Evaluation Data Sheet Version 3.0</title>
-      <author><first>Anya</first><last>Belz</last><affiliation>Dublin City University</affiliation></author>
+      <author id="anja-belz"><first>Anya</first><last>Belz</last><affiliation>Dublin City University</affiliation></author>
       <author><first>Craig</first><last>Thomson</last><affiliation>Dublin City University and University of Aberdeen</affiliation></author>
       <pages>60-81</pages>
       <abstract>This paper presents a new version of the Human Evaluation Datasheet (HEDS), numbered 3.0 This update is the result of our experience using HEDS in the context of numerous recent human evaluation experiments, including reproduction studies, and of feedback collected from other researchers. Our main overall goal was to improve clarity, and to enable users to complete the datasheet more consistently and comparably. The HEDS 3.0 package consists of the digital data sheet, documentation, and code for exporting completed data sheets as latex files, all available from the HEDS 3.0 GitHub.</abstract>
@@ -164,7 +164,7 @@
       <title>Evaluating <fixed-case>LLM</fixed-case>s with Multiple Problems at once</title>
       <author><first>Zhengxiang</first><last>Wang</last><affiliation>State University of New York at Stony Brook</affiliation></author>
       <author><first>Jordan</first><last>Kodner</last><affiliation>State University of New York, Stony Brook</affiliation></author>
-      <author><first>Owen</first><last>Rambow</last><affiliation>Stony Brook University</affiliation></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last><affiliation>Stony Brook University</affiliation></author>
       <pages>178-199</pages>
       <abstract>This paper shows the benefits and fruitfulness of evaluating LLMs with multiple problems at once, a paradigm we call multi-problem evaluation (MPE). Unlike conventional single-problem evaluation, where a prompt presents a single problem and expects one specific answer, MPE places multiple problems together in a single prompt and assesses how well an LLM answers all these problems in a single output. Leveraging 6 classification and 12 reasoning benchmarks that already exist, we introduce a new benchmark called ZeMPE (Zero-shot Multi-Problem Evaluation), comprising 53,100 zero-shot multi-problem prompts. We experiment with a total of 13 LLMs from 5 model families on ZeMPE to present a comprehensive and systematic MPE. Our results show that LLMs are capable of handling multiple problems from a single data source as well as handling them separately, but there are conditions this multiple problem handling capability falls short. In addition, we perform in-depth further analyses and explore model-level factors that may enable multiple problem handling capabilities in LLMs. We release our corpus and code to facilitate future research.</abstract>
       <url hash="da2a5018">2025.gem-1.14</url>
@@ -232,7 +232,7 @@
     <paper id="23">
       <title><fixed-case>IRS</fixed-case>um: One Model to Rule Summarization and Retrieval</title>
       <author><first>Sotaro</first><last>Takeshita</last><affiliation>Universität Mannheim</affiliation></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last><affiliation>Universität Mannheim</affiliation></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last><affiliation>Universität Mannheim</affiliation></author>
       <author><first>Kai</first><last>Eckert</last><affiliation>Mannheim University of Applied Sciences</affiliation></author>
       <pages>262-275</pages>
       <abstract>Applications that store a large number of documents often have summarization and retrieval functionalities to help users digest large amounts of information efficiently. Currently, such systems need to run two task-specific models, for summarization and retrieval, redundantly on the same set of documents. An efficient approach to amend this redundancy would be to reuse hidden representations produced during the summary generation for retrieval. However, our experiment shows that existing models, including recent large language models, do not produce retrieval-friendly embeddings during summarization due to a lack of a contrastive objective during their training. To this end, we introduce a simple, cost-effective training strategy which integrates a contrastive objective into standard summarization training without requiring additional annotations. We empirically show that our model can perform on par or even outperform in some cases compared to the combination of two task-specific models while improving throughput and FLOPs by up to 17% and 20%, respectively.</abstract>
@@ -294,7 +294,7 @@
     <paper id="30">
       <title>Using <fixed-case>LLM</fixed-case> Judgements for Sanity Checking Results and Reproducibility of Human Evaluations in <fixed-case>NLP</fixed-case></title>
       <author><first>Rudali</first><last>Huidrom</last></author>
-      <author><first>Anya</first><last>Belz</last><affiliation>Dublin City University</affiliation></author>
+      <author id="anja-belz"><first>Anya</first><last>Belz</last><affiliation>Dublin City University</affiliation></author>
       <pages>354-365</pages>
       <abstract>Human-like evaluation by LLMs of NLP systems is currently attracting a lot of interest, and correlations with human reference evaluations are often remarkably strong. However, this is not always the case, for unclear reasons which means that without also meta-evaluating against human evaluations (incurring the very cost automatic evaluation is intended to avoid), we don’t know if an LLM-as-judge evaluation is reliable or not. In this paper, we explore a type of evaluation scenario where this may not matter, because it comes with a built-in reliability check. We apply different LLM-as-judge methods to sets of three comparable human evaluations: (i) an original human evaluation, and (ii) two reproductions of it which produce contradicting reproducibility results. We find that in each case, the different LLM-as-judge methods (i) strongly agree with each other, and (ii) strongly agree with the results of one reproduction, while strongly disagreeing with the other. In combination, we take this to mean that a set of LLMs can be used to sanity check contradictory reproducibility results <i>if</i> the LLMs agree with each other, <i>and</i> the agreement of the LLMs with one set of results, and the disagreement with the other, are both strong.</abstract>
       <url hash="3e13d4a9">2025.gem-1.30</url>
@@ -323,8 +323,8 @@
       <author><first>Kristof</first><last>Varga</last><affiliation>NA</affiliation></author>
       <author><first>Győző Zijian</first><last>Yang</last><affiliation>Hungarian Research Centre for Linguistics</affiliation></author>
       <author><first>Enikő</first><last>Héja</last><affiliation>Hungarian Research Centre for Linguistics</affiliation></author>
-      <author><first>Tamás</first><last>Váradi</last><affiliation>Nyelvtudományi Kutatóközpont</affiliation></author>
-      <author><first>Gábor</first><last>Prószéky</last><affiliation>Hungarian Research Centre for Linguistics, Pazmany Peter Catholic University and MorphoLogic</affiliation></author>
+      <author id="tamas-varadi"><first>Tamás</first><last>Váradi</last><affiliation>Nyelvtudományi Kutatóközpont</affiliation></author>
+      <author id="gabor-proszeky"><first>Gábor</first><last>Prószéky</last><affiliation>Hungarian Research Centre for Linguistics, Pazmany Peter Catholic University and MorphoLogic</affiliation></author>
       <pages>385-403</pages>
       <abstract>In this study, we introduce the Hungarian Generative Model Evaluation (HuGME) benchmark, a new framework designed to assess the linguistic proficiency of large language models (LLMs) in Hungarian. HuGME evaluates models across a diverse set of linguistic and reasoning skills, including bias, toxicity, faithfulness, relevance, summarization, prompt alignment, readability, spelling, grammaticality, and domain-specific knowledge through tasks like TruthfulQA and MMLU. We applied HuGME to a range of Hungarian LLMs, including those developed in-house as well as several publicly available models that claim Hungarian language proficiency. This paper presents the comparative results of these evaluations, shedding light on the capabilities of current LLMs in processing the Hungarian language. Through our analysis, we aim to both showcase the current state of Hungarian linguistic processing in LLMs and provide a foundational resource for future advancements in the field.</abstract>
       <url hash="a801409e">2025.gem-1.32</url>
@@ -412,7 +412,7 @@
       <title>Event-based evaluation of abstractive news summarization</title>
       <author><first>Huiling</first><last>You</last></author>
       <author><first>Samia</first><last>Touileb</last><affiliation>University of Bergen</affiliation></author>
-      <author><first>Lilja</first><last>Øvrelid</last><affiliation>Dept. of Informatics, University of Oslo</affiliation></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last><affiliation>Dept. of Informatics, University of Oslo</affiliation></author>
       <author><first>Erik</first><last>Velldal</last><affiliation>University of Oslo</affiliation></author>
       <pages>504-510</pages>
       <abstract>An abstractive summary of a news article contains its most important information in a condensed version. The evaluation of automatically generated summaries by generative language models relies heavily on human-authored summaries as gold references, by calculating overlapping units or similarity scores. News articles report events, and ideally so should the summaries. In this work, we propose to evaluate the quality of abstractive summaries by calculating overlapping events between generated summaries, reference summaries, and the original news articles. We experiment on a richly annotated Norwegian dataset comprising both events annotations and summaries authored by expert human annotators. Our approach provides more insight into the event information contained in the summaries.</abstract>
@@ -435,7 +435,7 @@
       <author><first>Yeon Su</first><last>Park</last><affiliation>Korea Advanced Institute of Science &amp; Technology</affiliation></author>
       <author><first>Sunwoo</first><last>Kim</last><affiliation>Korea Advanced Institute of Science &amp; Technology</affiliation></author>
       <author><first>Shin</first><last>Yoo</last></author>
-      <author><first>Alice</first><last>Oh</last><affiliation>Google and Korea Advanced Institute of Science and Technology</affiliation></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last><affiliation>Google and Korea Advanced Institute of Science and Technology</affiliation></author>
       <pages>522-531</pages>
       <abstract>Evaluating the performance and biases of large language models (LLMs) through role-playing scenarios is becoming increasingly common, as LLMs often exhibit biased behaviors in these contexts. Building on this line of research, we introduce PapersPlease, a benchmark consisting of 3,700 moral dilemmas designed to investigate LLMs’ decision-making in prioritizing various levels of human needs. In our setup, LLMs act as immigration inspectors deciding whether to approve or deny entry based on the short narratives of people. These narratives are constructed using the Existence, Relatedness, and Growth (ERG) theory, which categorizes human needs into three hierarchical levels. Our analysis of six LLMs reveals statistically significant patterns in decision-making, suggesting that LLMs encode implicit preferences. Additionally, our evaluation of the impact of incorporating social identities into the narratives shows varying responsiveness based on both motivational needs and identity cues, with some models exhibiting higher denial rates for marginalized identities. All data is publicly available at https://github.com/yeonsuuuu28/papers-please.</abstract>
       <url hash="73fe3ad7">2025.gem-1.47</url>
@@ -465,7 +465,7 @@
     </paper>
     <paper id="50">
       <title><fixed-case>R</fixed-case>epro<fixed-case>H</fixed-case>um #0729-04: Human Evaluation Reproduction Report for “<fixed-case>M</fixed-case>em<fixed-case>S</fixed-case>um: Extractive Summarization of Long Documents Using Multi-Step Episodic <fixed-case>M</fixed-case>arkov Decision Processes”</title>
-      <author><first>Simeon</first><last>Junker</last><affiliation>Universität Bielefeld</affiliation></author>
+      <author id="simeon-junker"><first>Simeon</first><last>Junker</last><affiliation>Universität Bielefeld</affiliation></author>
       <pages>561-567</pages>
       <abstract>Human evaluation is indispensable in natural language processing (NLP), as automatic metrics are known to not always align well with human judgments.However, the reproducibility of human evaluations can be problematic since results are susceptible to many factors, the details of which are often missing from the respective works.As part of the ReproHum project, this work aims to reproduce the human evaluation of a single criterion in the paper “MemSum: Extractive Summarization of Long Documents Using Multi-Step Episodic Markov Decision Processes” (Gu et al, 2022).The results of our reproduction differ noticeably from those of the original study. To explain this discrepancy, we discuss differences in the experimental setup, as well as more general characteristics of the selected domain and the generated summaries.</abstract>
       <url hash="ec9e5502">2025.gem-1.50</url>
@@ -523,7 +523,7 @@
       <title><fixed-case>R</fixed-case>epro<fixed-case>H</fixed-case>um #0067-01: A Reproduction of the Evaluation of Cross-Lingual Summarization</title>
       <author><first/><last>Supryadi</last><affiliation>Tianjin University</affiliation></author>
       <author><first>Chuang</first><last>Liu</last><affiliation>National Supercomputing Center in Tianjin</affiliation></author>
-      <author><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
       <pages>609-614</pages>
       <abstract>Human evaluation is crucial as it offers a nuanced understanding that automated metrics often miss. By reproducing human evaluation, we can gain a better understanding of the original results. This paper is part of the ReproHum project, where our goal is to reproduce human evaluations from previous studies. We report the reproduction results of the human evaluation of cross-lingual summarization conducted by (CITATION). By comparing the original and reproduction studies, we find that our overall evaluation findings are largely consistent with those of the previous study. However, there are notable differences in evaluation scores between the two studies for certain model outputs. These discrepancies highlight the importance of carefully selecting evaluation methodologies and human annotators.</abstract>
       <url hash="8a15c028">2025.gem-1.56</url>
@@ -563,7 +563,7 @@
     <paper id="60">
       <title>Bridging the <fixed-case>LLM</fixed-case> Accessibility Divide? Performance, Fairness, and Cost of Closed versus Open <fixed-case>LLM</fixed-case>s for Automated Essay Scoring</title>
       <author><first>Kezia</first><last>Oketch</last></author>
-      <author><first>John P.</first><last>Lalor</last><affiliation>University of Notre Dame</affiliation></author>
+      <author id="john-p-lalor"><first>John P.</first><last>Lalor</last><affiliation>University of Notre Dame</affiliation></author>
       <author><first>Yi</first><last>Yang</last><affiliation>Hong Kong University of Science and Technology</affiliation></author>
       <author><first>Ahmed</first><last>Abbasi</last><affiliation>University of Notre Dame</affiliation></author>
       <pages>655-669</pages>
@@ -623,7 +623,7 @@
     <paper id="66">
       <title><fixed-case>P</fixed-case>ersona<fixed-case>T</fixed-case>win: A Multi-Tier Prompt Conditioning Framework for Generating and Evaluating Personalized Digital Twins</title>
       <author><first>Sihan</first><last>Chen</last><affiliation>CMU, Carnegie Mellon University</affiliation></author>
-      <author><first>John P.</first><last>Lalor</last><affiliation>University of Notre Dame</affiliation></author>
+      <author id="john-p-lalor"><first>John P.</first><last>Lalor</last><affiliation>University of Notre Dame</affiliation></author>
       <author><first>Yi</first><last>Yang</last><affiliation>Hong Kong University of Science and Technology</affiliation></author>
       <author><first>Ahmed</first><last>Abbasi</last><affiliation>University of Notre Dame</affiliation></author>
       <pages>774-788</pages>
@@ -635,7 +635,7 @@
       <title>Coreference as an indicator of context scope in multimodal narrative</title>
       <author><first>Nikolai</first><last>Ilinykh</last><affiliation>Göteborg University</affiliation></author>
       <author><first>Shalom</first><last>Lappin</last></author>
-      <author><first>Asad B.</first><last>Sayeed</last><affiliation>University of Gothenburg</affiliation></author>
+      <author id="asad-sayeed"><first>Asad B.</first><last>Sayeed</last><affiliation>University of Gothenburg</affiliation></author>
       <author><first>Sharid</first><last>Loáiciga</last><affiliation>University of Gothenburg, Sweden</affiliation></author>
       <pages>789-807</pages>
       <abstract>We demonstrate that large multimodal language models differ substantially from humans in the distribution of coreferential expressions in a visual storytelling task. We introduce a number of metrics to quantify the characteristics of coreferential patterns in both human- and machine-written texts. Humans distribute coreferential expressions in a way that maintains consistency across texts and images, interleaving references to different entities in a highly varied way. Machines are less able to track mixed references, despite achieving perceived improvements in generation quality. Materials, metrics, and code for our study are available at https://github.com/GU-CLASP/coreference-context-scope.</abstract>
@@ -656,7 +656,7 @@
       <title><fixed-case>MCQF</fixed-case>ormat<fixed-case>B</fixed-case>ench: Robustness Tests for Multiple-Choice Questions</title>
       <author><first>Hiroo</first><last>Takizawa</last><affiliation>Graduate University for Advanced Studies</affiliation></author>
       <author><first>Saku</first><last>Sugawara</last><affiliation>National Institute of Informatics</affiliation></author>
-      <author><first>Akiko</first><last>Aizawa</last><affiliation>National Institute of Informatics</affiliation></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last><affiliation>National Institute of Informatics</affiliation></author>
       <pages>824-846</pages>
       <abstract>Multiple-choice questions (MCQs) are often used to evaluate large language models (LLMs). They measure LLMs’ general common sense and reasoning abilities, as well as their knowledge in specific domains such as law and medicine. However, the robustness of LLMs to various question formats in MCQs has not been thoroughly evaluated. While there are studies on the sensitivity of LLMs to input variations, research into their responsiveness to different question formats is still limited. In this study, we propose a method to construct tasks to comprehensively evaluate the robustness against format changes of MCQs by decomposing the answering process into several steps. Using this dataset, we evaluate nine LLMs, such as Llama3-70B and Mixtral-8x7B. We find the lack of robustness to differences in the format of MCQs. It is crucial to consider whether the format of MCQs influences their evaluation scores when assessing LLMs using MCQ datasets.</abstract>
       <url hash="27febfae">2025.gem-1.69</url>
@@ -707,7 +707,7 @@
       <author><first>Luciano Del</first><last>Corro</last><affiliation>Microsoft Research</affiliation></author>
       <author><first>Arindam</first><last>Mitra</last><affiliation>Research, Microsoft</affiliation></author>
       <author><first>Tejas Indulal</first><last>Dhamecha</last><affiliation>Adobe Systems</affiliation></author>
-      <author><first>Ahmed Hassan</first><last>Awadallah</last><affiliation>Microsoft Research</affiliation></author>
+      <author id="ahmed-hassan"><first>Ahmed Hassan</first><last>Awadallah</last><affiliation>Microsoft Research</affiliation></author>
       <author><first>Monojit</first><last>Choudhury</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <author><first>Vishrav</first><last>Chaudhary</last><affiliation>Microsoft</affiliation></author>
       <author><first>Sunayana</first><last>Sitaram</last><affiliation>Microsoft</affiliation></author>
@@ -740,7 +740,7 @@
     <paper id="76">
       <title>An Analysis of Datasets, Metrics and Models in Keyphrase Generation</title>
       <author><first>Florian</first><last>Boudin</last><affiliation>University of Nantes</affiliation></author>
-      <author><first>Akiko</first><last>Aizawa</last><affiliation>National Institute of Informatics</affiliation></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last><affiliation>National Institute of Informatics</affiliation></author>
       <pages>973-973</pages>
       <abstract>Keyphrase generation refers to the task of producing a set of words or phrases that summarises the content of a document. Continuous efforts have been dedicated to this task over the past few years, spreading across multiple lines of research, such as model architectures, data resources, and use-case scenarios. Yet, the current state of keyphrase generation remains unknown as there has been no attempt to review and analyse previous work. In this paper, we bridge this gap by presenting an analysis of over 50 research papers on keyphrase generation, offering a comprehensive overview of recent progress, limitations, and open challenges. Our findings highlight several critical issues in current evaluation practices, such as the concerning similarity among commonly-used benchmark datasets and inconsistencies in metric calculations leading to overestimated performances. Additionally, we address the limited availability of pre-trained models by releasing a strong PLM-based model for keyphrase generation as an effort to facilitate future research.</abstract>
       <url hash="0dd2486a">2025.gem-1.76</url>
@@ -762,7 +762,7 @@
     </paper>
     <paper id="78">
       <title>The 2025 <fixed-case>R</fixed-case>epro<fixed-case>NLP</fixed-case> Shared Task on Reproducibility of Evaluations in <fixed-case>NLP</fixed-case>: Overview and Results</title>
-      <author><first>Anya</first><last>Belz</last><affiliation>Dublin City University</affiliation></author>
+      <author id="anja-belz"><first>Anya</first><last>Belz</last><affiliation>Dublin City University</affiliation></author>
       <author><first>Craig</first><last>Thomson</last><affiliation>Dublin City University and University of Aberdeen</affiliation></author>
       <author><first>Javier</first><last>González Corbelle</last><affiliation>Universidad de Santiago de Compostela</affiliation></author>
       <author><first>Malo</first><last>Ruelle</last></author>
diff --git a/data/xml/2025.genaidetect.xml b/data/xml/2025.genaidetect.xml
index 072bcea900..55741d2462 100644
--- a/data/xml/2025.genaidetect.xml
+++ b/data/xml/2025.genaidetect.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the 1stWorkshop on GenAI Content Detection (GenAIDetect)</booktitle>
       <editor><first>Firoj</first><last>Alam</last></editor>
-      <editor><first>Preslav</first><last>Nakov</last></editor>
+      <editor id="preslav-nakov"><first>Preslav</first><last>Nakov</last></editor>
       <editor><first>Nizar</first><last>Habash</last></editor>
       <editor><first>Iryna</first><last>Gurevych</last></editor>
       <editor><first>Shammur</first><last>Chowdhury</last></editor>
@@ -68,7 +68,7 @@
     <paper id="5">
       <title>Cross-table Synthetic Tabular Data Detection</title>
       <author><first>G. Charbel N.</first><last>Kindji</last></author>
-      <author><first>Lina M.</first><last>Rojas Barahona</last></author>
+      <author id="lina-m-rojas-barahona"><first>Lina M.</first><last>Rojas Barahona</last></author>
       <author><first>Elisa</first><last>Fromont</last></author>
       <author><first>Tanguy</first><last>Urvoy</last></author>
       <pages>78–84</pages>
@@ -80,7 +80,7 @@
       <title>Your Large Language Models are Leaving Fingerprints</title>
       <author><first>Hope Elizabeth</first><last>McGovern</last></author>
       <author><first>Rickard</first><last>Stureborg</last></author>
-      <author><first>Yoshi</first><last>Suhara</last></author>
+      <author id="yoshi-suhara"><first>Yoshi</first><last>Suhara</last></author>
       <author><first>Dimitris</first><last>Alikaniotis</last></author>
       <pages>85–95</pages>
       <abstract>It has been shown that fine-tuned transformers and other supervised detectors are effective for distinguishing between human and machine-generated texts in non-adversarial settings, but we find that even simple classifiers on top of n-gram and part-of-speech features can achieve very robust performance on both in- and out-of-domain data. To understand how this is possible, we analyze machine-generated output text in four datasets, finding that LLMs possess unique fingerprints that manifest as slight differences in the frequency of certain lexical and morphosyntactic features. We show how to visualize such fingerprints, describe how they can be used to detect machine-generated text and find that they are even robust across text domains. We find that fingerprints are often persistent across models in the same model family (e.g. 13B parameter LLaMA’s fingerprint is similar to that of 65B parameter LLaMA) and that while a detector trained on text from one model can easily recognize text generated by a model in the same family, it struggles to detect text generated by an unrelated model.</abstract>
@@ -105,7 +105,7 @@
       <author><first>Siddharth</first><last>Mangalik</last></author>
       <author><first>Nikita</first><last>Soni</last></author>
       <author><first>Dave M.</first><last>Markowitz</last></author>
-      <author><first>H. Andrew</first><last>Schwartz</last></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last></author>
       <pages>111–119</pages>
       <abstract>In recent years, the proliferation of chatbots like ChatGPT and Claude has led to an increasing volume of AI-generated text. While the text itself is convincingly coherent and human-like, the variety of expressed of human attributes may still be limited. Using theoretical individual differences, the fundamental psychological traits which distinguish people, this study reveals a distinctive characteristic of such content: AI-generations exhibit remarkably limited variation in inferrable psychological traits compared to human-authored texts. We present a review and study across multiple datasets spanning various domains. We find that AI-generated text consistently models the authorship of an “average” human with such little variation that, on aggregate, it is clearly distinguishable from human-written texts using unsupervised methods (i.e., without using ground truth labels). Our results show that (1) fundamental human traits are able to accurately distinguish human- and machine-generated text and (2) current generation capabilities fail to capture a diverse range of human traits</abstract>
       <url hash="54d810d9">2025.genaidetect-1.8</url>
@@ -125,7 +125,7 @@
       <title>Text Graph Neural Networks for Detecting <fixed-case>AI</fixed-case>-Generated Content</title>
       <author><first>Andric</first><last>Valdez-Valenzuela</last></author>
       <author><first>Helena</first><last>Gómez-Adorno</last></author>
-      <author><first>Manuel</first><last>Montes-y-Gómez</last></author>
+      <author id="manuel-montes"><first>Manuel</first><last>Montes-y-Gómez</last></author>
       <pages>134–139</pages>
       <abstract>The widespread availability of Large Language Models (LLMs) such as GPT-4 and Llama-3, among others, has led to a surge in machine-generated content across various platforms, including social media, educational tools, and academic settings. While these models demonstrate remarkable capabilities in generating coherent text, their misuse raises significant concerns. For this reason, detecting machine-generated text has become a pressing need to mitigate these risks. This research proposed a novel classification method combining text-graph representations with Graph Neural Networks (GNNs) and different node feature initialization strategies to distinguish between human-written and machine-generated content. Experimental results demonstrate that the proposed approach outperforms traditional machine learning classifiers, highlighting the effectiveness of integrating structural and semantic relationships in text.</abstract>
       <url hash="457a3b39">2025.genaidetect-1.10</url>
@@ -145,7 +145,7 @@
       <title><fixed-case>DCBU</fixed-case> at <fixed-case>G</fixed-case>en<fixed-case>AI</fixed-case> Detection Task 1: Enhancing Machine-Generated Text Detection with Semantic and Probabilistic Features</title>
       <author><first>Zhaowen</first><last>Zhang</last></author>
       <author><first>Songhao</first><last>Chen</last></author>
-      <author><first>Bingquan</first><last>Liu</last></author>
+      <author id="bingquan-liu"><first>Bingquan</first><last>Liu</last></author>
       <pages>150–154</pages>
       <abstract>This paper presents our approach to the MGT Detection Task 1, which focuses on detecting AI-generated content. The objective of this task is to classify texts as either machine-generated or human-written. We participated in Subtask A, which concentrates on English-only texts. We utilized the RoBERTa model for semantic feature extraction and the LLaMA3 model for probabilistic feature analysis. By integrating these features, we aimed to enhance the system’s classification accuracy. Our approach achieved strong results, with an F1 score of 0.7713 on Subtask A, ranking ninth among 36 teams. These results demonstrate the effectiveness of our feature integration strategy.</abstract>
       <url hash="768447a8">2025.genaidetect-1.12</url>
@@ -167,7 +167,7 @@
       <author><first>Abdul Gafar Manuel</first><last>Meque</last></author>
       <author><first>Nisar</first><last>Hussain</last></author>
       <author><first>Grigori</first><last>Sidorov</last></author>
-      <author><first>Alexander</first><last>Gelbukh</last></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last></author>
       <pages>161–165</pages>
       <abstract>The ever-increasing spread of AI-generated text, driven by the considerable progress in large language models, entails a real problem for all digital platforms: how to ensure con tent authenticity. The team TechExperts(IPN) presents a method for detecting AI-generated content in English and multilingual contexts, using the google/gemma-2b model fine-tuned for COLING 2025 shared task 1 for English and multilingual. Training results show peak F1 scores of 97.63% for English and 97.87% for multilingual detection, highlighting the model’s effectiveness in supporting content integrity across platforms.</abstract>
       <url hash="df0dbca3">2025.genaidetect-1.14</url>
@@ -186,7 +186,7 @@
       <title>Team <fixed-case>U</fixed-case>nibuc - <fixed-case>NLP</fixed-case> at <fixed-case>G</fixed-case>en<fixed-case>AI</fixed-case> Detection Task 1: Qwen it detect machine-generated text?</title>
       <author><first>Claudiu</first><last>Creanga</last></author>
       <author><first>Teodor-George</first><last>Marchitan</last></author>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <pages>173–177</pages>
       <abstract>We explored both masked language models and causal models. For Subtask A, our best model achieved first-place out of 36 teams when looking at F1 Micro (Auxiliary Score) of 0.8333, and second-place when looking at F1 Macro (Main Score) of 0.8301. For causal models, our best model was a fine-tuned version of Qwen and for masked models, our best model was a fine-tuned version of XLM-Roberta-Base.</abstract>
       <url hash="ecab5184">2025.genaidetect-1.16</url>
@@ -442,7 +442,7 @@
       <author><first>Mucahid</first><last>Kutlu</last></author>
       <author><first>Kaan Efe</first><last>Keleş</last></author>
       <author><first>Fatema</first><last>Ahmad</last></author>
-      <author><first>Tasnim</first><last>Mohiuddin</last></author>
+      <author id="muhammad-tasnim-mohiuddin"><first>Tasnim</first><last>Mohiuddin</last></author>
       <author><first>George</first><last>Mikros</last></author>
       <author><first>Firoj</first><last>Alam</last></author>
       <pages>323–333</pages>
diff --git a/data/xml/2025.genaik.xml b/data/xml/2025.genaik.xml
index b0d194fd97..f2f237d24a 100644
--- a/data/xml/2025.genaik.xml
+++ b/data/xml/2025.genaik.xml
@@ -23,7 +23,7 @@
     <paper id="1">
       <title>Effective Modeling of Generative Framework for Document-level Relational Triple Extraction</title>
       <author><first>Pratik</first><last>Saini</last></author>
-      <author><first>Tapas</first><last>Nayak</last></author>
+      <author id="tapas-nayak"><first>Tapas</first><last>Nayak</last></author>
       <pages>1–12</pages>
       <abstract>Document-level relation triple extraction (DocRTE) is a complex task that involves three key sub-tasks: entity mention extraction, entity clustering, and relation triple extraction. Past work has applied discriminative models to address these three sub-tasks, either by training them sequentially in a pipeline fashion or jointly training them. However, while end-to-end discriminative or generative models have proven effective for sentence-level relation triple extraction, they cannot be trivially extended to the document level, as they only handle relation extraction without addressing the remaining two sub-tasks, entity mention extraction or clustering. In this paper, we propose a three-stage generative framework leveraging a pre-trained BART model to address all three tasks required for document-level relation triple extraction. Tested on the widely used DocRED dataset, our approach outperforms previous generative methods and achieves competitive performance against discriminative models.</abstract>
       <url hash="8c3f1730">2025.genaik-1.1</url>
@@ -162,7 +162,7 @@
     <paper id="14">
       <title>Entity Quality Enhancement in Knowledge Graphs through <fixed-case>LLM</fixed-case>-based Question Answering</title>
       <author><first>Morteza</first><last>Kamaladdini Ezzabady</last></author>
-      <author><first>Farah</first><last>Benamara</last></author>
+      <author id="farah-benamara"><first>Farah</first><last>Benamara</last></author>
       <pages>136–145</pages>
       <abstract>Most models for triple extraction from texts primarily focus on named entities. However, real-world applications often comprise non-named entities that pose serious challenges for entity linking and disambiguation. We focus on these entities and propose the first LLM-based entity revision framework to improve the quality of extracted triples via a multi-choice question-answering mechanism. When evaluated on two benchmark datasets, our results show a significant improvement, thereby generating more reliable triples for knowledge graphs.</abstract>
       <url hash="3c7ac461">2025.genaik-1.14</url>
diff --git a/data/xml/2025.gitt.xml b/data/xml/2025.gitt.xml
index 380126439b..3240d07bc6 100644
--- a/data/xml/2025.gitt.xml
+++ b/data/xml/2025.gitt.xml
@@ -25,7 +25,7 @@
       <title>Are We Paying Attention to Her? Investigating Gender Disambiguation and Attention in Machine Translation</title>
       <author><first>Chiara</first><last>Manna</last></author>
       <author><first>Afra</first><last>Alishahi</last></author>
-      <author><first>Frédéric</first><last>Blain</last></author>
+      <author id="frederic-blain"><first>Frédéric</first><last>Blain</last></author>
       <author><first>Eva</first><last>Vanmassenhove</last></author>
       <pages>1–16</pages>
       <abstract>While gender bias in modern Neural Machine Translation (NMT) systems has received much attention, the traditional evaluation metrics for these systems do not fully capture the extent to which models integrate contextual gender cues. We propose a novel evaluation metric called Minimal Pair Accuracy (MPA) which measures the reliance of models on gender cues for gender disambiguation. We evaluate a number of NMT models using this metric, we show that they ignore available gender cues in most cases in favour of (statistical) stereotypical gender interpretation. We further show that in anti-stereotypical cases, these models tend to more consistently take male gender cues into account while ignoring the female cues. Finally, we analyze the attention head weights in the encoder component of these models and show that while all models to some extent encode gender information, the male gender cues elicit a more diffused response compared to the more concentrated and specialized responses to female gender cues.</abstract>
@@ -46,7 +46,7 @@
       <title>An <fixed-case>LLM</fixed-case>-as-a-judge Approach for Scalable Gender-Neutral Translation Evaluation</title>
       <author><first>Andrea</first><last>Piergentili</last></author>
       <author><first>Beatrice</first><last>Savoldi</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Luisa</first><last>Bentivogli</last></author>
       <pages>46–63</pages>
       <abstract>Gender-neutral translation (GNT) aims to avoid expressing the gender of human referents when the source text lacks explicit cues about the gender of those referents. Evaluating GNT automatically is particularly challenging, with current solutions being limited to monolingual classifiers. Such solutions are not ideal because they do not factor in the source sentence and require dedicated data and fine-tuning to scale to new languages. In this work, we address such limitations by investigating the use of large language models (LLMs) as evaluators of GNT. Specifically, we explore two prompting approaches: one in which LLMs generate sentence-level assessments only, and another—akin to a chain-of-thought approach—where they first produce detailed phrase-level annotations before a sentence-level judgment. Through extensive experiments on multiple languages with five models, both open and proprietary, we show that LLMs can serve as evaluators of GNT. Moreover, we find that prompting for phrase-level annotations before sentence-level assessments consistently improves the accuracy of all models, providing a better and more scalable alternative to current solutions.</abstract>
@@ -55,7 +55,7 @@
     </paper>
     <paper id="4">
       <title>Did <fixed-case>I</fixed-case> (she) or <fixed-case>I</fixed-case> (he) buy this? Or rather <fixed-case>I</fixed-case> (she/he)? Towards first-person gender neutral translation by <fixed-case>LLM</fixed-case>s</title>
-      <author><first>Maja</first><last>Popović</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
       <author><first>Ekaterina</first><last>Lapshinova-Koltunski</last></author>
       <author><first>Anastasiia</first><last>Göldner</last></author>
       <pages>64–73</pages>
diff --git a/data/xml/2025.in2writing.xml b/data/xml/2025.in2writing.xml
index 0398c8a848..2f6e3dcc2a 100644
--- a/data/xml/2025.in2writing.xml
+++ b/data/xml/2025.in2writing.xml
@@ -7,7 +7,7 @@
       <editor><first>Katy</first><last>Gero</last></editor>
       <editor><first>Thiemo</first><last>Wambsganss</last></editor>
       <editor><first>Sarah</first><last>Sterman</last></editor>
-      <editor><first>Ting-Hao</first><last>Huang</last></editor>
+      <editor id="ting-hao-huang"><first>Ting-Hao</first><last>Huang</last></editor>
       <editor><first>David</first><last>Zhou</last></editor>
       <editor><first>John</first><last>Chung</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -47,7 +47,7 @@
       <author><first>Bashar</first><last>Alhafni</last><affiliation>New York University</affiliation></author>
       <author><first>Chatrine</first><last>Qwaider</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and Chalmers University of Technology</affiliation></author>
       <author><first>Nizar</first><last>Habash</last><affiliation>New York University Abu Dhabi</affiliation></author>
-      <author><first>Ted</first><last>Briscoe</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <pages>11-18</pages>
       <abstract>Although Arabic is spoken by over 400 million people, advanced Arabic writing assistance tools remain limited. To address this gap, we present ARWI, a new writing assistant that helps learners improve essay writing in Modern Standard Arabic. ARWI is the first publicly available Arabic writing assistant to include a prompt database for different proficiency levels, an Arabic text editor, state-of-the-art grammatical error detection and correction, and automated essay scoring aligned with the Common European Framework of Reference standards for language attainment (https://arwi.mbzuai.ac.ae/). Moreover, ARWI can be used to gather a growing auto-annotated corpus, facilitating further research on Arabic grammar correction and essay scoring, as well as profiling patterns of errors made by native speakers and non-native learners. A preliminary user study shows that ARWI provides actionable feedback, helping learners identify grammatical gaps, assess language proficiency, and guide improvement.</abstract>
       <url hash="b854702c">2025.in2writing-1.2</url>
diff --git a/data/xml/2025.indonlp.xml b/data/xml/2025.indonlp.xml
index 238d7009c8..eb7e7cbc16 100644
--- a/data/xml/2025.indonlp.xml
+++ b/data/xml/2025.indonlp.xml
@@ -52,7 +52,7 @@
       <title>Evaluating Structural and Linguistic Quality in <fixed-case>U</fixed-case>rdu <fixed-case>DRS</fixed-case> Parsing and Generation through Bidirectional Evaluation</title>
       <author><first>Muhammad Saad</first><last>Amin</last></author>
       <author><first>Luca</first><last>Anselma</last></author>
-      <author><first>Alessandro</first><last>Mazzei</last></author>
+      <author id="alessandro-mazzei"><first>Alessandro</first><last>Mazzei</last></author>
       <pages>33–43</pages>
       <abstract>Evaluating Discourse Representation Structure (DRS)-based systems for semantic parsing (Text-to-DRS) and generation (DRS-to-Text) poses unique challenges, particularly in low-resource languages like Urdu. Traditional metrics often fall short, focusing either on structural accuracy or linguistic quality, but rarely capturing both. To address this limitation, we introduce two complementary evaluation methodologies—Parse-Generate (PARS-GEN) and Generate-Parse (GEN-PARS)—designed for a more comprehensive assessment of DRS-based systems. PARS-GEN evaluates the parsing process by converting DRS outputs back to the text, revealing linguistic nuances often missed by structure-focused metrics like SMATCH. Conversely, GEN-PARS assesses text generation by converting generated text into DRS, providing a semantic perspective that complements surface-level metrics such as BLEU, METEOR, and BERTScore. Using the Parallel Meaning Bank (PMB) dataset, we demonstrate our methodology across Urdu, uncovering unique insights into Urdu’s structural and linguistic interplay. Findings show that traditional metrics frequently overlook the complexity of linguistic and semantic fidelity, especially in low-resource languages. Our dual approach offers a robust framework for evaluating DRS-based systems, enhancing semantic parsing and text generation quality.</abstract>
       <url hash="b0b51275">2025.indonlp-1.4</url>
@@ -95,7 +95,7 @@
       <author><first>Aneesh</first><last>Bose</last></author>
       <author><first>Guneet Singh</first><last>Kohli</last></author>
       <author><first>Smruti Smita</first><last>Lenka</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>58–66</pages>
       <abstract>This paper introduces OVQA, the first multimodal dataset designed for visual question-answering (VQA), visual question elicitation (VQE), and multimodal research for the low-resource Odia language. The dataset was created by manually translating 6,149 English question-answer pairs, each associated with 6,149 unique images from the Visual Genome dataset. This effort resulted in 27,809 English-Odia parallel sentences, ensuring a semantic match with the corresponding visual information. Several baseline experiments were conducted on the dataset, including visual question answering and visual question elicitation. The dataset is the first VQA dataset for the low-resource Odia language and will be released for multimodal research purposes and also help researchers extend for other low-resource languages.</abstract>
       <url hash="c05dc867">2025.indonlp-1.7</url>
diff --git a/data/xml/2025.insights.xml b/data/xml/2025.insights.xml
index e2a71c91d4..4fa5098dab 100644
--- a/data/xml/2025.insights.xml
+++ b/data/xml/2025.insights.xml
@@ -25,7 +25,7 @@
     </frontmatter>
     <paper id="1">
       <title>Challenging Assumptions in Learning Generic Text Style Embeddings</title>
-      <author><first>Phil</first><last>Ostheimer</last><affiliation>RPTU Kaiserslautern-Landau</affiliation></author>
+      <author id="phil-sidney-ostheimer"><first>Phil</first><last>Ostheimer</last><affiliation>RPTU Kaiserslautern-Landau</affiliation></author>
       <author><first>Marius</first><last>Kloft</last><affiliation>RPTU Kaiserslautern-Landau</affiliation></author>
       <author><first>Sophie</first><last>Fellenz</last><affiliation>RPTU Kaiserslautern-Landau</affiliation></author>
       <pages>1-6</pages>
@@ -69,9 +69,9 @@
     </paper>
     <paper id="5">
       <title>Do Prevalent Bias Metrics Capture Allocational Harms from <fixed-case>LLM</fixed-case>s?</title>
-      <author><first>Hannah</first><last>Cyberey</last><affiliation>University of Virginia</affiliation></author>
+      <author id="hannah-cyberey"><first>Hannah</first><last>Cyberey</last><affiliation>University of Virginia</affiliation></author>
       <author><first>Yangfeng</first><last>Ji</last><affiliation>University of Virginia</affiliation></author>
-      <author><first>David</first><last>Evans</last><affiliation>University of Virginia</affiliation></author>
+      <author id="david-k-evans"><first>David</first><last>Evans</last><affiliation>University of Virginia</affiliation></author>
       <pages>34-45</pages>
       <abstract>Allocational harms occur when resources or opportunities are unfairly withheld from specific groups. Many proposed bias measures ignore the discrepancy between predictions, which are what the proposed methods consider, and decisions that are made as a result of those predictions. Our work examines the reliability of current bias metrics in assessing allocational harms arising from predictions of large language models (LLMs). We evaluate their predictive validity and utility for model selection across ten LLMs and two allocation tasks. Our results reveal that commonly-used bias metrics based on average performance gap and distribution distance fail to reliably capture group disparities in allocation outcomes. Our work highlights the need to account for how model predictions are used in decisions, in particular in contexts where they are influenced by how limited resources are allocated.</abstract>
       <url hash="c6276486">2025.insights-1.5</url>
@@ -152,7 +152,7 @@
       <title>Exploring Limitations of <fixed-case>LLM</fixed-case> Capabilities with Multi-Problem Evaluation</title>
       <author><first>Zhengxiang</first><last>Wang</last><affiliation>Stony Brook University</affiliation></author>
       <author><first>Jordan</first><last>Kodner</last><affiliation>Stony Brook University</affiliation></author>
-      <author><first>Owen</first><last>Rambow</last><affiliation>Stony Brook University</affiliation></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last><affiliation>Stony Brook University</affiliation></author>
       <pages>121-140</pages>
       <abstract>We propose using prompts made up of multiple problems to evaluate LLM capabilities, an approach we call multi-problem evaluation. We examine 7 LLMs on 4 related task types constructed from 6 existing classification benchmarks. We find that while LLMs can generally perform multiple homogeneous classifications at once (Batch Classification) as well as when they do so separately, they perform significantly worse on two selection tasks that are conceptually equivalent to Batch Classification and involve selecting indices of text falling into each class label, either independently or altogether. We show that such a significant performance drop is due to LLMs’ inability to adequately combine index selection with text classification. Such a drop is surprisingly observed across all LLMs attested, under zero-shot, few-shot, and CoT settings, and even with a novel synthetic dataset, potentially reflecting an inherent capability limitation with modern LLMs.</abstract>
       <url hash="3e5c7094">2025.insights-1.12</url>
diff --git a/data/xml/2025.iwpt.xml b/data/xml/2025.iwpt.xml
index 263bfc42a8..21c52e81e0 100644
--- a/data/xml/2025.iwpt.xml
+++ b/data/xml/2025.iwpt.xml
@@ -49,7 +49,7 @@
     <paper id="4">
       <title>High-Accuracy Transition-Based Constituency Parsing</title>
       <author><first>John</first><last>Bauer</last><affiliation>Stanford University</affiliation></author>
-      <author><first>Christopher D.</first><last>Manning</last><affiliation/></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>26-39</pages>
       <abstract>Constituency parsers have improved markedly in recent years, with the F1 accuracy on the venerable Penn Treebank reaching 96.47, half of the error rate of the first transformer model in 2017. However, while dependency parsing frequently uses transition-based parsers, it is unclear whether transition-based parsing can still provide state-of-the-art results for constituency parsing. Despite promising work by Liu and Zhang in 2017 using an in-order transition-based parser, recent work uses other methods, mainly CKY charts built over LLM encoders. Starting from previous work, we implement self-training and a dynamic oracle to make a language-agnostic transition-based constituency parser. We test on seven languages; using Electra embeddings as the input layer on Penn Treebank, with a self-training dataset built from Wikipedia, our parser achieves a new SOTA F1 of 96.61.</abstract>
       <url hash="abc93c8b">2025.iwpt-1.4</url>
diff --git a/data/xml/2025.iwsds.xml b/data/xml/2025.iwsds.xml
index fa42a965be..105d0061a1 100644
--- a/data/xml/2025.iwsds.xml
+++ b/data/xml/2025.iwsds.xml
@@ -50,7 +50,7 @@
       <author><first>Inass</first><last>Rachidi</last></author>
       <author><first>Anas</first><last>Ezzakri</last></author>
       <author><first>Jaime</first><last>Bellver-Soler</last></author>
-      <author><first>Luis Fernando</first><last>D’Haro</last></author>
+      <author id="luis-fernando-dharo"><first>Luis Fernando</first><last>D’Haro</last></author>
       <pages>20–28</pages>
       <abstract>This paper presents the design, synthetic generation, and automated evaluation of ArtGenEval-GPT++, an advanced dataset for training and fine-tuning conversational agents with artificial awareness capabilities targeting to the art domain. Building on the foundation of a previously released dataset (ArtGenEval-GPT), the new version introduces enhancements for greater personalization (e.g., gender, ethnicity, age, and knowledge) while addressing prior limitations, including low-quality dialogues and hallucinations. The dataset comprises approximately 12,500 dyadic, multi-turn dialogues generated using state-of-the-art large language models (LLMs). These dialogues span diverse museum scenarios, incorporating varied visitor profiles, emotional states, interruptions, and chatbot behaviors. Objective evaluations confirm the dataset’s quality and contextual coherence. Ethical considerations, including biases and hallucinations, are analyzed, with proposed directions for improving the dataset utility. This work contributes to the development of personalized, context-aware conversational agents capable of navigating complex, real-world environments, such as museums, to enhance visitor engagement and satisfaction.</abstract>
       <url hash="23300049">2025.iwsds-1.3</url>
@@ -96,8 +96,8 @@
       <author><first>Xiaocheng</first><last>Yang</last></author>
       <author><first>Emre Can</first><last>Acikgoz</last></author>
       <author><first>Suvodip</first><last>Dey</last></author>
-      <author><first>Gokhan</first><last>Tur</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last></author>
+      <author id="gokhan-tur"><first>Gokhan</first><last>Tur</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></author>
       <pages>72–102</pages>
       <abstract>Large language model (LLM)-based agents have been increasingly used to interact with external environments (e.g., games, APIs, etc.) and solve tasks. However, current frameworks do not enable these agents to work with users and interact with them to align on the details of their tasks and reach user-defined goals; instead, in ambiguous situations, these agents may make decisions based on assumptions. This work introduces ReSpAct (Reason, Speak, and Act), a novel framework that synergistically combines the essential skills for building task-oriented “conversational” agents. ReSpAct addresses this need for agents, expanding on the ReAct approach. ReSpAct framework enables agents to interpret user instructions, reason about complex tasks, execute appropriate actions and engage in dynamic dialogue to seek guidance, clarify ambiguities, understand user preferences, resolve problems, and use the intermediate feedback and responses of users to update their plans. We evaluated ReSpAct with GPT-4 in environments supporting user interaction, such as task-oriented dialogue (MultiWOZ) and interactive decision-making (Alfworld, WebShop), ReSpAct is flexible enough to incorporate dynamic user feedback and addresses prevalent issues like error propagation and agents getting stuck in reasoning loops. This results in more interpretable, human-like task-solving trajectories than baselines relying solely on reasoning traces. In two interactive decision-making benchmarks, AlfWorld and WebShop, ReSpAct outperforms strong reasoning-only method ReAct by an absolute success rate of 6% and 4%, respectively. In the task-oriented dialogue benchmark MultiWOZ, ReSpAct improved Inform and Success scores by 5.5% and 3%, respectively.</abstract>
       <url hash="f7184b61">2025.iwsds-1.7</url>
@@ -133,7 +133,7 @@
       <author><first>Samuel</first><last>Ramos-Varela</last></author>
       <author><first>Jaime</first><last>Bellver-Soler</last></author>
       <author><first>Marcos</first><last>Estecha-Garitagoitia</last></author>
-      <author><first>Luis Fernando</first><last>D’Haro</last></author>
+      <author id="luis-fernando-dharo"><first>Luis Fernando</first><last>D’Haro</last></author>
       <pages>129–136</pages>
       <abstract>Recent studies suggest that increasing the context window of language models could outperform retrieval-augmented generation (RAG) methods in certain tasks. However, in domains such as art and museums, where information is inherently multimodal, combining images and detailed textual descriptions, this assumption needs closer examination. To explore this, we compare RAG techniques with direct large-context input approaches for answering questions about artworks. Using a dataset of painting images paired with textual information, we develop a synthetic database of question-answer (QA) pairs for evaluating these methods. The focus is on assessing the efficiency and accuracy of RAG in retrieving and using relevant information compared to passing the entire textual context to a language model. Additionally, we experiment with various strategies for segmenting and retrieving text to optimise the RAG pipeline. The results aim to clarify the trade-offs between these approaches and provide valuable insights for interactive systems designed for art and museum contexts.</abstract>
       <url hash="0c135120">2025.iwsds-1.10</url>
@@ -155,7 +155,7 @@
       <author><first>Morgan</first><last>Veyret</last></author>
       <author><first>Miguel</first><last>Couceiro</last></author>
       <author><first>Ondrej</first><last>Dusek</last></author>
-      <author><first>Lina M.</first><last>Rojas Barahona</last></author>
+      <author id="lina-m-rojas-barahona"><first>Lina M.</first><last>Rojas Barahona</last></author>
       <pages>143–153</pages>
       <abstract>Large language models (LLMs) gained immense popularity due to their impressive capabilities in unstructured conversations. Empowering LLMs with advanced prompting strategies such as reasoning and acting (ReAct) (Yao et al., 2022) has shown promise in solving complex tasks traditionally requiring reinforcement learning. In this work, we apply the ReAct strategy to guide LLMs performing task-oriented dialogue (TOD). We evaluate ReAct-based LLMs (ReAct-LLMs) both in simulation and with real users. While ReAct-LLMs severely underperform state-of-the-art approaches on success rate in simulation, this difference becomes less pronounced in human evaluation. Moreover, compared to the baseline, humans report higher subjective satisfaction with ReAct-LLM despite its lower success rate, most likely thanks to its natural and confidently phrased responses.</abstract>
       <url hash="1614015d">2025.iwsds-1.12</url>
@@ -165,8 +165,8 @@
       <title>Design of a conversational agent to support people on suicide risk</title>
       <author><first>Mario</first><last>Manso Vázquez</last></author>
       <author><first>José Manuel</first><last>Ramírez Sánchez</last></author>
-      <author><first>Carmen</first><last>García-Mateo</last></author>
-      <author><first>Laura</first><last>Docío-Fernández</last></author>
+      <author id="carmen-garcia-mateo"><first>Carmen</first><last>García-Mateo</last></author>
+      <author id="laura-docio-fernandez"><first>Laura</first><last>Docío-Fernández</last></author>
       <author><first>Manuel José</first><last>Fernández-Iglesias</last></author>
       <author><first>Beatriz</first><last>Gómez-Gómez</last></author>
       <author><first>Beatriz</first><last>Pinal</last></author>
@@ -180,7 +180,7 @@
     <paper id="14">
       <title>Optimizing <fixed-case>RAG</fixed-case>: Classifying Queries for Dynamic Processing</title>
       <author><first>Kabir</first><last>Olawore</last></author>
-      <author><first>Michael</first><last>McTear</last></author>
+      <author id="michael-f-mctear"><first>Michael</first><last>McTear</last></author>
       <author><first>Yaxin</first><last>Bi</last></author>
       <author><first>David</first><last>Griol</last></author>
       <pages>160–164</pages>
@@ -206,7 +206,7 @@
       <author><first>Ksenia</first><last>Kharitonova</last></author>
       <author><first>Juan Manuel</first><last>Montero-Martínez</last></author>
       <author><first>David</first><last>Pérez Fernández</last></author>
-      <author><first>Fernando</first><last>Fernández-Martínez</last></author>
+      <author id="fernando-fernandez-martinez"><first>Fernando</first><last>Fernández-Martínez</last></author>
       <pages>172–175</pages>
       <abstract>Conversational AI (ConvAI) systems are gaining growing importance as an alternative for more natural interaction with digital services. In this context, Large Language Models (LLMs) have opened new possibilities for less restricted interaction and richer natural language understanding. However, despite their advanced capabilities, LLMs can pose accuracy and reliability problems, as they sometimes generate factually incorrect or contextually inappropriate content that does not fulfill the regulations or business rules of a specific application domain. In addition, they still do not possess the capability to adjust to users’ needs and preferences, showing emotional awareness, while concurrently adhering to the regulations and limitations of their designated domain. In this paper we present the TrustBoost project, which addresses the challenge of improving trustworthiness of ConvAI from two dimensions: cognition (adaptability, flexibility, compliance, and performance) and affectivity (familiarity, emotional dimension, and perception). The duration of the project is from September 2024 to December 2027.</abstract>
       <url hash="7bc6a9d5">2025.iwsds-1.16</url>
@@ -247,7 +247,7 @@
       <author><first>Santiago Andrés</first><last>Moreno-Acevedo</last></author>
       <author><first>Ander</first><last>González-Docasal</last></author>
       <author><first>Maria Ines</first><last>Torres</last></author>
-      <author><first>Aitor</first><last>Álvarez</last></author>
+      <author id="aitor-alvarez"><first>Aitor</first><last>Álvarez</last></author>
       <pages>190–195</pages>
       <abstract>This demo paper presents a prototype of a multilingual, speech-based driver assistant, designed to support both English and Basque languages. The inclusion of Basque—a low-resource language with limited domain-specific training data—marks a significant contribution, as publicly available AI models, including Large Language Models, often underperform for such languages compared to high-resource languages like English. Despite these challenges, our system demonstrates robust performance, successfully understanding user queries and delivering rapid responses in a demanding environment: a car simulator. Notably, the system achieves comparable performance in both English and Basque, showcasing its effectiveness in addressing linguistic disparities in AI-driven applications. A demo of our prototype will be available in the workshop.</abstract>
       <url hash="a645241d">2025.iwsds-1.19</url>
@@ -257,7 +257,7 @@
       <title>Intimebot – A Dialogue Agent for Timekeeping Support</title>
       <author><first>Shoaib</first><last>Khan</last></author>
       <author><first>Alex</first><last>Samani</last></author>
-      <author><first>Rafael</first><last>Banchs</last></author>
+      <author id="rafael-e-banchs"><first>Rafael</first><last>Banchs</last></author>
       <pages>196–199</pages>
       <abstract>This demo paper presents intimebot, an AI-powered timekeeping solution designed to assist with timekeeping. Timekeeping is a fundamental but also overwhelming and complex task in many professional services practices. Our intimebot demo demonstrates how Artificial Intelligence can be utilized to implement a more efficient timekeeping process within a firm. Based on brief work descriptions provided by the timekeeper, intimebot is able to (1) predict the relevant combination of client, matter, and phase, (2) estimate the work effort hours, and (3) rewrite and normalize the provided work description into a compliant narrative. This can save a significant amount of time for busy professionals while ensuring terms of business compliance and best practices.</abstract>
       <url hash="2d1b23af">2025.iwsds-1.20</url>
@@ -293,7 +293,7 @@
       <author><first>Aitor</first><last>García-Pablos</last></author>
       <author><first>Montse</first><last>Cuadros</last></author>
       <author><first>Arantza</first><last>del Pozo</last></author>
-      <author><first>Oier</first><last>Lopez de Lacalle</last></author>
+      <author id="oier-lopez-de-lacalle"><first>Oier</first><last>Lopez de Lacalle</last></author>
       <author><first>Ander</first><last>Salaberria</last></author>
       <author><first>Jeremy</first><last>Barnes</last></author>
       <author><first>Pablo</first><last>Martínez</last></author>
@@ -385,7 +385,7 @@
       <author><first>Jaime</first><last>Bellver-Soler</last></author>
       <author><first>Mario</first><last>Rodriguez-Cantelar</last></author>
       <author><first>Ricardo</first><last>Córdoba</last></author>
-      <author><first>Luis Fernando</first><last>D’Haro</last></author>
+      <author id="luis-fernando-dharo"><first>Luis Fernando</first><last>D’Haro</last></author>
       <pages>284–289</pages>
       <abstract>Recent developments in Multimodal Large Language Models (MLLMs) have provided novel insights into Speech Emotion Recognition (SER). However, combining high-dimensional speech signals with textual tokens can lead to a rapid growth in input tokens, increasing computational costs and inference times. This “token overload” also risks shadowing essential textual cues, affecting the reasoning capabilities of the language model and diluting emotional information crucial to accurate SER. In this paper, we explore different token drop methods that mitigate excessive token counts while preserving both emotional nuances and the core linguistic capabilities of the model. Specifically, we compare various pooling approaches to produce a compact representation. Our preliminary findings suggest that these techniques can reduce computational costs without decreasing SER accuracy.</abstract>
       <url hash="67d503fe">2025.iwsds-1.30</url>
@@ -393,8 +393,8 @@
     </paper>
     <paper id="31">
       <title>Integrating Conversational Entities and Dialogue Histories with Knowledge Graphs and Generative <fixed-case>AI</fixed-case></title>
-      <author><first>Graham</first><last>Wilcock</last></author>
-      <author><first>Kristiina</first><last>Jokinen</last></author>
+      <author id="graham-wilcock"><first>Graham</first><last>Wilcock</last></author>
+      <author id="kristiina-jokinen"><first>Kristiina</first><last>Jokinen</last></author>
       <pages>290–298</pages>
       <abstract>Existing methods for storing dialogue history and for tracking mentioned entities in spoken dialogues usually handle these tasks separately. Recent advances in knowledge graphs and generative AI make it possible to integrate them in a framework with a uniform representation for dialogue management. This may help to build more natural and grounded dialogue models that can reduce misunderstanding and lead to more reliable dialogue-based interactions with AI agents. The paper describes ongoing work on this approach.</abstract>
       <url hash="ece16ab2">2025.iwsds-1.31</url>
@@ -405,7 +405,7 @@
       <author><first>Ahmed</first><last>Njifenjou</last></author>
       <author><first>Virgile</first><last>Sucal</last></author>
       <author><first>Bassam</first><last>Jabaian</last></author>
-      <author><first>Fabrice</first><last>Lefèvre</last></author>
+      <author id="fabrice-lefevre"><first>Fabrice</first><last>Lefèvre</last></author>
       <pages>299–308</pages>
       <abstract>Among the numerous models developed to represent the multifaceted complexity of human personality, particularly in psychology, the Big Five (commonly referred to as ‘OCEAN’, an acronym of its five traits) stands out as a widely used framework. Although personalized chatbots have incorporated this model, existing approaches, such as focusing on individual traits or binary combinations, may not capture the full diversity of human personality. In this study, we propose a five-dimensional vector representation, where each axis corresponds to the degree of presence of an OCEAN trait on a continuous scale from 0 to 1. This representation is designed to enable greater versatility in modeling personality. Application to customer assistance scenarios in French demonstrates that, based on humans-bots as well as bots-bots conversations, assigned personality vectors are distinguishable by both humans and LLMs acting as judges. Both of their subjective evaluations also confirm the measurable impacts of the assigned personality on user experience, agent efficiency, and conversation quality.</abstract>
       <url hash="1dc79343">2025.iwsds-1.32</url>
diff --git a/data/xml/2025.iwslt.xml b/data/xml/2025.iwslt.xml
index dafa1acfc8..b0cf9b1da7 100644
--- a/data/xml/2025.iwslt.xml
+++ b/data/xml/2025.iwslt.xml
@@ -71,9 +71,9 @@
       <author><first>Alessio</first><last>Brutti</last><affiliation>FBK</affiliation></author>
       <author><first>Mauro</first><last>Cettolo</last><affiliation>FBK</affiliation></author>
       <author><first>Roberto</first><last>Gretter</last><affiliation>FBK</affiliation></author>
-      <author><first>Marco</first><last>Matassoni</last><affiliation>FBK</affiliation></author>
+      <author id="marco-matassoni"><first>Marco</first><last>Matassoni</last><affiliation>FBK</affiliation></author>
       <author><first>Mohamed</first><last>Nabih</last><affiliation>FBK</affiliation></author>
-      <author><first>Matteo</first><last>Negri</last><affiliation>FBK</affiliation></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last><affiliation>FBK</affiliation></author>
       <pages>47-55</pages>
       <abstract>Training large-scale models presents challenges not only in terms of resource requirements but also in terms of their convergence. For this reason, the learning rate (LR) is often decreased when the size of a model is increased. Such a simple solution is not enough in the case of speech-to-text (S2T) trainings, where evolved and more complex variants of the Transformer architecture – e.g., Conformer or Branchformer – are used in light of their better performance. As a workaround, OWSM designed a double linear warmup of the LR, increasing it to a very small value in the first phase before updating it to a higher value in the second phase. While this solution worked well in practice, it was not compared with alternative solutions, nor was the impact on the final performance of different LR warmup schedules studied. This paper fills this gap, revealing that i) large-scale S2T trainings demand a sub-exponential LR warmup, and ii) a higher LR in the warmup phase accelerates initial convergence, but it does not boost final performance.</abstract>
       <url hash="7d107e26">2025.iwslt-1.4</url>
@@ -116,10 +116,10 @@
     <paper id="8">
       <title>Conversational <fixed-case>S</fixed-case>imul<fixed-case>MT</fixed-case>: Efficient Simultaneous Translation with Large Language Models</title>
       <author><first>Minghan</first><last>Wang</last><affiliation>Monash University</affiliation></author>
-      <author><first>Thuy-Trang</first><last>Vu</last><affiliation>Monash University</affiliation></author>
+      <author id="thuy-vu"><first>Thuy-Trang</first><last>Vu</last><affiliation>Monash University</affiliation></author>
       <author><first>Yuxia</first><last>Wang</last><affiliation>MBZUAI</affiliation></author>
       <author><first>Ehsan</first><last>Shareghi</last><affiliation>Monash University</affiliation></author>
-      <author><first>Gholamreza</first><last>Haffari</last><affiliation>Monash University</affiliation></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last><affiliation>Monash University</affiliation></author>
       <pages>93-105</pages>
       <abstract>Simultaneous machine translation (SimulMT) presents a challenging trade-off between translation quality and latency. Recent studies have shown that LLMs can achieve good performance in SimulMT tasks. However, this often comes at the expense of high inference costs and latency. In this paper, we propose a conversational SimulMT framework to enhance the inference efficiency of LLM-based SimulMT through multi-turn-dialogue-based decoding where source and target chunks interleave in translation history, enabling the reuse of Key-Value cache. To adapt LLMs to the proposed conversational decoding, we create supervised fine-tuning training data by segmenting parallel sentences using an alignment tool and a novel augmentation technique to enhance generalization. Our experiments with Llama2-7b-chat on three SimulMT benchmarks demonstrate that the proposed method empowers the superiority of LLM in translation quality, meanwhile achieving comparable computational latency with specialized SimulMT models.</abstract>
       <url hash="e8ccf2af">2025.iwslt-1.8</url>
@@ -156,7 +156,7 @@
     <paper id="11">
       <title>Prompting <fixed-case>LLM</fixed-case>s: Length Control for Isometric Machine Translation</title>
       <author><first>Dávid</first><last>Javorský</last><affiliation>Charles Univerzity, Faculty of Mathematics and Physics</affiliation></author>
-      <author><first>Ondřej</first><last>Bojar</last><affiliation>Charles University, MFF UFAL</affiliation></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last><affiliation>Charles University, MFF UFAL</affiliation></author>
       <author><first>François</first><last>Yvon</last><affiliation>ISIR CNRS &amp; Sorbonne Université</affiliation></author>
       <pages>119-137</pages>
       <abstract>In this study, we explore the effectiveness of isometric machine translation across multiple language pairs (En<tex-math>o</tex-math>De, En<tex-math>o</tex-math>Fr, and En<tex-math>o</tex-math>Es) under the conditions of the IWSLT Isometric Shared Task 2022. Using eight open-source large language models (LLMs) of varying sizes, we investigate how different prompting strategies, varying numbers of few-shot examples, and demonstration selection influence translation quality and length control. We discover that the phrasing of instructions, when aligned with the properties of the provided demonstrations, plays a crucial role in controlling the output length. Our experiments show that LLMs tend to produce shorter translations only when presented with extreme examples, while isometric demonstrations often lead to the models disregarding length constraints. While few-shot prompting generally enhances translation quality, further improvements are marginal across 5, 10, and 20-shot settings. Finally, considering multiple outputs allows to notably improve overall tradeoff between the length and quality, yielding state-of-the-art performance for some language pairs.</abstract>
@@ -194,7 +194,7 @@
       <author><first>Matthew</first><last>Wiesner</last><affiliation>Johns Hopkins University</affiliation></author>
       <author><first>Dan</first><last>Povey</last><affiliation>Xiaomi, Inc.</affiliation></author>
       <author><first>Leibny</first><last>Paola Garcia Perera</last><affiliation>Johns Hopkins University</affiliation></author>
-      <author><first>Sanjeev</first><last>Khudanpur</last><affiliation>Johns Hopkins University</affiliation></author>
+      <author id="sanjeev-khudanpur"><first>Sanjeev</first><last>Khudanpur</last><affiliation>Johns Hopkins University</affiliation></author>
       <pages>153-164</pages>
       <abstract>Neural transducers (NT) provide an effective framework for speech streaming, demonstrating strong performance in automatic speech recognition (ASR). However, the application of NT to speech translation (ST) remains challenging, as existing approaches struggle with word reordering and performance degradation when jointly modeling ASR and ST, resulting in a gap with attention-based encoder-decoder (AED) models. Existing NT-based ST approaches also suffer from high computational training costs. To address these issues, we propose HENT-SRT (Hierarchical Efficient Neural Transducer for Speech Recognition and Translation), a novel framework that factorizes ASR and translation tasks to better handle reordering. To ensure robust ST while preserving ASR performance, we use self-distillation with CTC consistency regularization. Moreover, we improve computational efficiency by incorporating best practices from ASR transducers, including a down-sampled hierarchical encoder, a stateless predictor, and a pruned transducer loss to reduce training complexity. Finally, we introduce a blank penalty during decoding, reducing deletions and improving translation quality. Our approach is evaluated on three conversational datasets Arabic, Spanish, and Mandarin achieving new state-of-the-art performance among NT models and substantially narrowing the gap with AED-based systems.</abstract>
       <url hash="d962cf87">2025.iwslt-1.14</url>
@@ -205,7 +205,7 @@
       <title><fixed-case>S</fixed-case>wiss <fixed-case>G</fixed-case>erman Speech Translation and the Curse of Multidialectality</title>
       <author><first>Martin</first><last>Bär</last><affiliation>University of Malta, University of the Basque Country</affiliation></author>
       <author><first>Andrea</first><last>DeMarco</last><affiliation>University of Malta</affiliation></author>
-      <author><first>Gorka</first><last>Labaka</last><affiliation>UPV/EHU</affiliation></author>
+      <author id="gorka-labaka"><first>Gorka</first><last>Labaka</last><affiliation>UPV/EHU</affiliation></author>
       <pages>165-179</pages>
       <abstract>In many languages, non-standardized varieties make the development of NLP models challenging. This paper explores various fine-tuning techniques and data setups for training Swiss German to Standard German speech-to-text translation models. While fine-tuning on all available Swiss German data yields the best results, ASR pre-training lowers performance by 1.48 BLEU points, and jointly training on Swiss and Standard German data reduces it by 2.29 BLEU. Our dialect transfer experiments suggest that an equivalent of the Curse of Multilinguality (Conneau et al., 2020) exists in dialectal speech processing, as training on multiple dialects jointly tends to decrease single-dialect performance. However, introducing small amounts of dialectal variability can improve the performance for low-resource dialects.</abstract>
       <url hash="49fb9534">2025.iwslt-1.15</url>
@@ -229,7 +229,7 @@
       <title><fixed-case>NAVER</fixed-case> <fixed-case>LABS</fixed-case> <fixed-case>E</fixed-case>urope Submission to the Instruction-following Track</title>
       <author><first>Beomseok</first><last>Lee</last><affiliation>University of Trento</affiliation></author>
       <author><first>Marcely</first><last>Zanon Boito</last><affiliation>NAVER LABS Europe</affiliation></author>
-      <author><first>Laurent</first><last>Besacier</last><affiliation>NAVER LABS Europe</affiliation></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last><affiliation>NAVER LABS Europe</affiliation></author>
       <author><first>Ioan</first><last>Calapodescu</last><affiliation>NAVER LABS Europe</affiliation></author>
       <pages>186-200</pages>
       <abstract>In this paper we describe NAVER LABS Europe submission to the instruction-following speech processing short track at IWSLT 2025. We participate in the constrained settings, developing systems that can simultaneously perform ASR, ST, and SQA tasks from English speech input into the following target languages: Chinese, Italian, and German. Our solution leverages two pretrained modules: (1) a speech-to-LLM embedding projector trained using representations from the SeamlessM4T-v2-large speech encoder; and (2) LoRA adapters trained on text data on top of Llama-3.1-8B-Instruct. These modules are jointly loaded and further instruction-tuned for 1K steps on multilingual and multimodal data to form our final system submitted for evaluation.</abstract>
@@ -242,7 +242,7 @@
       <author><first>Debjit</first><last>Dhar</last><affiliation>Jadavpur University</affiliation></author>
       <author><first>Soham</first><last>Lahiri</last><affiliation>Jadavpur University</affiliation></author>
       <author><first>Tapabrata</first><last>Mondal</last><affiliation>Jadavpur University</affiliation></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last><affiliation>JADAVPUR UNIVERSITY</affiliation></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last><affiliation>JADAVPUR UNIVERSITY</affiliation></author>
       <pages>201-205</pages>
       <abstract>This paper presents the submission of the Jadavpur University Computer Science and Engineering Natural Language Processing (JU-CSENLP) Laboratory to the International Conference on Spoken Language Translation (IWSLT) 2025 Indic track, addressing the speech-to-text translation task in both English-to-Indic (Bengali, Hindi, Tamil) and Indic-to-English directions. To tackle the challenges posed by low resource Indian languages, we adopt a cascaded approach leveraging state-of-the-art pre-trained models. For English-to-Indic translation, we utilize OpenAI’s Whisper model for Automatic Speech Recognition (ASR), followed by the Meta’s No Language Left Behind (NLLB)-200-distilled-600M model finetuned for Machine Translation (MT). For the reverse direction, we employ the AI4Bharat’s IndicConformer model for ASR and IndicTrans2 finetuned for MT. Our models are fine-tuned on the provided benchmark dataset to better handle the linguistic diversity and domain-specific variations inherent in the data. Evaluation results demonstrate that our cascaded systems achieve competitive performance, with notable BLEU and chrF++ scores across all language pairs. Our findings highlight the effectiveness of combining robust ASR and MT components in a cascaded pipeline, particularly for low-resource and morphologically rich Indian languages.</abstract>
       <url hash="7234e943">2025.iwslt-1.18</url>
@@ -271,7 +271,7 @@
       <author><first>Enes</first><last>Yavuz Ugan</last><affiliation>KIT</affiliation></author>
       <author><first>Tu</first><last>Anh Dinh</last><affiliation>Karlsruhe Institute of Technology</affiliation></author>
       <author><first>Carlos</first><last>Mullov</last><affiliation>Karlsruhe Institute of Technology</affiliation></author>
-      <author><first>Alexander</first><last>Waibel</last><affiliation>Carnegie Mellon</affiliation></author>
+      <author id="alex-waibel"><first>Alexander</first><last>Waibel</last><affiliation>Carnegie Mellon</affiliation></author>
       <author><first>Jan</first><last>Niehues</last><affiliation>Karlsruhe Institut of Technology</affiliation></author>
       <pages>212-221</pages>
       <abstract>This paper presents KIT’s submissions to the IWSLT 2025 low-resource track. We develop both cascaded systems, consisting of Automatic Speech Recognition (ASR) and Machine Translation (MT) models, and end-to-end (E2E) Speech Translation (ST) systems for three language pairs: Bemba, North Levantine Arabic, and Tunisian Arabic into English. Building upon pre-trained models, we fine-tune our systems with different strategies to utilize resources efficiently. This study further explores system enhancement with synthetic data and model regularization. Specifically, we investigate MT-augmented ST by generating translations from ASR data using MT models. For North Levantine, which lacks parallel ST training data, a system trained solely on synthetic data slightly surpasses the cascaded system trained on real data. We also explore augmentation using text-to-speech models by generating synthetic speech from MT data, demonstrating the benefits of synthetic data in improving both ASR and ST performance for Bemba. Additionally, we apply intra-distillation to enhance model performance. Our experiments show that this approach consistently improves results across ASR, MT, and ST tasks, as well as across different pre-trained models. Finally, we apply Minimum Bayes Risk decoding to combine the cascaded and end-to-end systems, achieving an improvement of approximately 1.5 BLEU points.</abstract>
@@ -299,7 +299,7 @@
       <author><first>Thai</first><last>Binh Nguyen</last><affiliation>Karlsruhe Institute of Technology</affiliation></author>
       <author><first>Seymanur</first><last>Akti</last><affiliation>Karlsruhe Institute of Technology</affiliation></author>
       <author><first>Jan</first><last>Niehues</last><affiliation>Karlsruhe Institut of Technology</affiliation></author>
-      <author><first>Alexander</first><last>Waibel</last><affiliation>Carnegie Mellon</affiliation></author>
+      <author id="alex-waibel"><first>Alexander</first><last>Waibel</last><affiliation>Carnegie Mellon</affiliation></author>
       <pages>232-244</pages>
       <abstract>In this paper, we present the submissions for the Offline ST and Instruction Following (IF) tracks, where we leverage LLMs to enhance performance across all tasks. For the Offline ST track, we propose a pipeline that employs multiple automatic speech recognition systems, whose outputs are fused using an LLM with document-level context. This is followed by a two-step translation process, incorporating additional contextual refinement step to improve translation quality. For the IF track, we develop an end-to-end model that integrates a speech encoder with an LLM to perform a wide range of instruction-following tasks. We complement it with a final document-level refinement stage to further enhance output quality by using contextual information.</abstract>
       <url hash="221299c7">2025.iwslt-1.22</url>
@@ -312,7 +312,7 @@
       <author><first>Soham</first><last>Chaudhuri</last><affiliation>Jadavpur University</affiliation></author>
       <author><first>Dipanjan</first><last>Saha</last><affiliation>Jadavpur University</affiliation></author>
       <author><first>Dipankar</first><last>Das</last><affiliation>Jadavpur University</affiliation></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last><affiliation>JADAVPUR UNIVERSITY</affiliation></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last><affiliation>JADAVPUR UNIVERSITY</affiliation></author>
       <pages>245-251</pages>
       <abstract>Multi-language Speech-to-Text Translation (ST) plays a crucial role in breaking linguistic barriers, particularly in multilingual regions like India. This paper focuses on building a robust ST system for low resource Indian languages, with a special emphasis on Bengali and Tamil. These languages represent the Indo-Aryan and Dravidian families, respectively. The dataset used in this work comprises spoken content from TED Talks and conferences, paired with transcriptions in English and their translations in Bengali and Tamil. Our work specifically addresses the translation of Bengali and Tamil speech to English text, a critical area given the scarcity of annotated speech data. To enhance translation quality and model robustness, we leverage cross-lingual resources and word level translation strategies. The ultimate goal is to develop an end-to-end ST model capable of real-world deployment for under represented languages.</abstract>
       <url hash="4f6df3f6">2025.iwslt-1.23</url>
@@ -324,7 +324,7 @@
       <author><first>Josef</first><last>Jon</last><affiliation>Charles University</affiliation></author>
       <author><first>Waad</first><last>Ben Kheder</last><affiliation>Vocapia Research</affiliation></author>
       <author><first>Andre</first><last>Beyer</last><affiliation>Bielefeld University</affiliation></author>
-      <author><first>Claude</first><last>Barras</last><affiliation>Vocapia Research</affiliation></author>
+      <author id="claude-barras"><first>Claude</first><last>Barras</last><affiliation>Vocapia Research</affiliation></author>
       <author><first>Jean-Luc</first><last>Gauvain</last><affiliation>CNRS/LIMSI</affiliation></author>
       <pages>252-259</pages>
       <abstract>We present our IWSLT 2025 submission for the low-resource track on North Levantine Arabic to English speech translation, building on our IWSLT 2024 efforts. We retain last year’s cascade ASR architecture that combines a TDNN-F model and a Zipformer for the ASR step. We upgrade the Zipformer to the Zipformer-Large variant (253 M parameters vs. 66 M) to capture richer acoustic representations. For the MT part, to further alleviate data sparsity, we created a crowd-sourced parallel corpus covering five major Arabic dialects (Tunisian, Levantine, Moroccan, Algerian, Egyptian) curated via rigorous qualification and filtering. We show that using crowd-sourced data is feasible in low-resource scenarios as we observe improved automatic evaluation metrics across all dialects. We also experimented with the dataset under a high-resource scenario, where we had access to a large, high-quality Levantine Arabic corpus from LDC. In this setting, adding the crowd-sourced data does not improve the scores on the official validation set anymore. Our final submission scores 20.0 BLEU on the official test set.</abstract>
@@ -374,7 +374,7 @@
     <paper id="28">
       <title><fixed-case>CUNI</fixed-case>-<fixed-case>NL</fixed-case>@<fixed-case>IWSLT</fixed-case> 2025: End-to-end Offline Speech Translation and Instruction Following with <fixed-case>LLM</fixed-case>s</title>
       <author><first>Nam</first><last>Luu</last><affiliation>Charles University</affiliation></author>
-      <author><first>Ondřej</first><last>Bojar</last><affiliation>Charles University, MFF UFAL</affiliation></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last><affiliation>Charles University, MFF UFAL</affiliation></author>
       <pages>282-288</pages>
       <abstract>This paper describes the CUNI-NL team’s submission to the IWSLT 2025 Offline Speech Translation and Instruction Following tasks, focusing on transcribing the English audio, and translating the English audio to German text. Our systems follow the end-to-end approach, where each system consists of a pretrained, frozen speech encoder, along with a medium-sized large language model fine-tuned with LoRA on three tasks: 1) transcribing the English audio; 2) directly translating the English audio to German text; and 3) a combination of the above two tasks, i.e. simultaneously transcribing the English audio and translating the English audio to German text.</abstract>
       <url hash="7465d73c">2025.iwslt-1.28</url>
@@ -433,7 +433,7 @@
     <paper id="33">
       <title><fixed-case>SYSTRAN</fixed-case> @ <fixed-case>IWSLT</fixed-case> 2025 Low-resource track</title>
       <author><first>Marko</first><last>Avila</last><affiliation>Systran by Chapsvision</affiliation></author>
-      <author><first>Josep</first><last>Crego</last><affiliation>Systran by Chapsvision</affiliation></author>
+      <author id="josep-m-crego"><first>Josep</first><last>Crego</last><affiliation>Systran by Chapsvision</affiliation></author>
       <pages>324-332</pages>
       <abstract>SYSTRAN submitted systems for one language pair in the 2025 Low-Resource Language Track. Our main contribution lies in the tight coupling and light fine-tuning of an ASR encoder (Whisper) with a neural machine translation decoder (NLLB), forming an efficient speech translation pipeline. We present the modeling strategies and optimizations implemented to build a system that, unlike large-scale end-to-end models, performs effectively under constraints of limited training data and computational resources. This approach enables the development of high-quality speech translation in low-resource settings, while ensuring both efficiency and scalability. We also conduct a comparative analysis of our proposed system against various paradigms, including a cascaded Whisper+NLLB setup and direct end-to-end fine-tuning of Whisper.</abstract>
       <url hash="492f8f5c">2025.iwslt-1.33</url>
@@ -458,7 +458,7 @@
       <author><first>Javier</first><last>Iranzo-Sanchez</last><affiliation>AppTek</affiliation></author>
       <author><first>Adrià</first><last>Giménez Pastor</last><affiliation>Universitat de Valencia</affiliation></author>
       <author><first>Jorge</first><last>Civera Saiz</last><affiliation>UPV/MLLP-VRAIN</affiliation></author>
-      <author><first>Alfons</first><last>Juan</last><affiliation>Universitat Politècnica de València</affiliation></author>
+      <author id="alfons-juan"><first>Alfons</first><last>Juan</last><affiliation>Universitat Politècnica de València</affiliation></author>
       <pages>340-346</pages>
       <abstract>This work describes the participation of the MLLP-VRAIN research group in the shared task of the IWSLT 2025 Simultaneous Speech Translation track. Our submission addresses the unique challenges of real-time translation of long-form speech by developing a modular cascade system that adapts strong pre-trained models to streaming scenarios. We combine Whisper Large-V3-Turbo for ASR with the multilingual NLLB-3.3B model for MT, implementing lightweight adaptation techniques rather than training new end-to-end models from scratch. Our approach employs document-level adaptation with prefix training to enhance the MT model’s ability to handle incomplete inputs, while incorporating adaptive emission policies including a wait-k strategy and RALCP for managing the translation stream. Specialized buffer management techniques and segmentation strategies ensure coherent translations across long audio sequences. Experimental results on the ACL60/60 dataset demonstrate that our system achieves a favorable balance between translation quality and latency, with a BLEU score of 31.96 and non-computational-aware StreamLAAL latency of 2.94 seconds. Our final model achieves a preliminary score on the official test set (IWSLT25Instruct) of 29.8 BLEU. Our work demonstrates that carefully adapted pre-trained components can create effective simultaneous translation systems for long-form content without requiring extensive in-domain parallel data or specialized end-to-end training.</abstract>
       <url hash="1446bbaf">2025.iwslt-1.35</url>
@@ -571,7 +571,7 @@
       <author><first>Tanel</first><last>Alumäe</last><affiliation>TalTech</affiliation></author>
       <author><first>Antonios</first><last>Anastasopoulos</last><affiliation>GMU</affiliation></author>
       <author><first>Luisa</first><last>Bentivogli</last><affiliation>FBK</affiliation></author>
-      <author><first>Ondřej</first><last>Bojar</last><affiliation>Charles U.</affiliation></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last><affiliation>Charles U.</affiliation></author>
       <author><first>Claudia</first><last>Borg</last><affiliation>U. Malta</affiliation></author>
       <author><first>Fethi</first><last>Bougares</last><affiliation>Elyadata</affiliation></author>
       <author><first>Roldano</first><last>Cattoni</last><affiliation>FBK</affiliation></author>
@@ -596,7 +596,7 @@
       <author><first>Yasmin</first><last>Moslem</last><affiliation>ADAPT Centre</affiliation></author>
       <author><first>Kenton</first><last>Murray</last><affiliation>JHU</affiliation></author>
       <author><first>Satoshi</first><last>Nakamura</last><affiliation>CUHK Shenzhen</affiliation></author>
-      <author><first>Matteo</first><last>Negri</last><affiliation>FBK</affiliation></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last><affiliation>FBK</affiliation></author>
       <author><first>Jan</first><last>Niehues</last><affiliation>KIT</affiliation></author>
       <author><first>Atul</first><last>Kr. Ojha</last><affiliation>U. Galway</affiliation></author>
       <author><first>John E.</first><last>Ortega</last><affiliation>Northeastern U.</affiliation></author>
@@ -609,11 +609,11 @@
       <author><first>Nivedita</first><last>Sethiya</last><affiliation>IIT Indore</affiliation></author>
       <author><first>Claytone</first><last>Sikasote</last><affiliation>U. Cape Town</affiliation></author>
       <author><first>Matthias</first><last>Sperber</last><affiliation>Apple</affiliation></author>
-      <author><first>Sebastian</first><last>Stüker</last><affiliation>Zoom</affiliation></author>
+      <author id="sebastian-stuker"><first>Sebastian</first><last>Stüker</last><affiliation>Zoom</affiliation></author>
       <author><first>Katsuhito</first><last>Sudoh</last><affiliation>Nara Women’s U.</affiliation></author>
       <author><first>Brian</first><last>Thompson</last><affiliation>Amazon</affiliation></author>
       <author><first>Marco</first><last>Turchi</last><affiliation>Zoom</affiliation></author>
-      <author><first>Alex</first><last>Waibel</last><affiliation>CMU</affiliation></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last><affiliation>CMU</affiliation></author>
       <author><first>Patrick</first><last>Wilken</last><affiliation>AppTek</affiliation></author>
       <author><first>Rodolfo</first><last>Zevallos</last><affiliation>U. Pompeu Fabra</affiliation></author>
       <author><first>Vilém</first><last>Zouhar</last><affiliation>ETH</affiliation></author>
diff --git a/data/xml/2025.knowledgenlp.xml b/data/xml/2025.knowledgenlp.xml
index 0d44a2adff..1607fa4427 100644
--- a/data/xml/2025.knowledgenlp.xml
+++ b/data/xml/2025.knowledgenlp.xml
@@ -9,7 +9,7 @@
       <editor><first>Meng</first><last>Jiang</last></editor>
       <editor><first>Greg</first><last>Durrett</last></editor>
       <editor><first>Hannaneh</first><last>Hajishirzi</last></editor>
-      <editor><first>Luke</first><last>Zettlemoyer</last></editor>
+      <editor id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Albuquerque, New Mexico, USA</address>
       <month>May</month>
@@ -27,7 +27,7 @@
     </frontmatter>
     <paper id="1">
       <title>Entity Retrieval for Answering Entity-Centric Questions</title>
-      <author><first>Hassan</first><last>Shavarani</last></author>
+      <author id="hassan-s-shavarani"><first>Hassan</first><last>Shavarani</last></author>
       <author><first>Anoop</first><last>Sarkar</last><affiliation>Simon Fraser University</affiliation></author>
       <pages>1-17</pages>
       <abstract>The similarity between the question and indexed documents is a key factor in document retrieval for retrieval-augmented question answering. Although this is typically the only method for obtaining the relevant documents, it is not the sole approach when dealing with entity-centric questions. We study Entity Retrieval, an alternative retrieval method, which rather than relying on question-document similarity, depends on the salient entities within the question to identify the retrieval documents. We conduct an in-depth analysis of the performance of both dense and sparse retrieval methods in comparison to Entity Retrieval. Our findings reveal the great potential of entity-driven methods for improving augmentation document retrieval in both accuracy and efficiency.</abstract>
@@ -133,7 +133,7 @@
       <author><first>Suhas</first><last>Suresha</last></author>
       <author><first>Ishita</first><last>Verma</last></author>
       <author><first>Cheng</first><last>Chen</last></author>
-      <author><first>Tracy Holloway</first><last>King</last></author>
+      <author id="tracy-holloway-king"><first>Tracy Holloway</first><last>King</last></author>
       <author><first>Michael</first><last>Friedrich</last></author>
       <pages>129-140</pages>
       <abstract>This paper addresses fine-tuning Large Language Models (LLMs) for function calling tasks when real user interaction data is unavailable. In digital content creation tools, where users express their needs through natural language queries that must be mapped to API calls, the lack of real-world task-specific data and privacy constraints for training on it necessitate synthetic data generation. Existing approaches to synthetic data generation fall short in diversity and complexity, failing to replicate real-world data distributions and leading to suboptimal performance after LLM fine-tuning. We present a novel router-based architecture that leverages domain resources like content metadata and structured knowledge graphs, along with text-to-text and vision-to-text language models to generate high-quality synthetic training data. Our architecture’s flexible routing mechanism enables synthetic data generation that matches observed real-world distributions, addressing a fundamental limitation of traditional approaches. Evaluation on a comprehensive set of real user queries demonstrates significant improvements in both function classification accuracy and API parameter selection. Models fine-tuned with our synthetic data consistently outperform traditional approaches, establishing new benchmarks for function calling tasks.</abstract>
@@ -234,7 +234,7 @@
     </paper>
     <paper id="21">
       <title>Chain of Evidences and Evidence to Generate: Prompting for Context Grounded and Retrieval Augmented Reasoning</title>
-      <author><first>Md Rizwan</first><last>Parvez</last><affiliation>Qatar Computing Research Institute</affiliation></author>
+      <author id="md-rizwan-parvez"><first>Md Rizwan</first><last>Parvez</last><affiliation>Qatar Computing Research Institute</affiliation></author>
       <pages>230-245</pages>
       <abstract>While chain-of-thoughts (CoT) prompting has revolutionized how LLMs perform reasoning tasks, its current methods and variations (e.g, Self-consistency, ReACT, Reflexion, Tree-of-Thoughts (ToT), Cumulative Reasoning (CR) etc.,) suffer from limitations like limited context grounding, hallucination/inconsistent output generation, and iterative sluggishness. To overcome these challenges, we introduce a novel mono/dual-step zero-shot prompting framework built upon two unique strategies <b>Chain of Evidences (CoE)</b> and <b>Evidence to Generate (E2G)</b>. Instead of unverified reasoning claims, our innovative approaches leverage the power of “evidence for decision making” by first focusing exclusively on the thought sequences explicitly mentioned in the context which then serve as extracted evidence, guiding the LLM’s output generation process with greater precision and efficiency. This simple yet potent approach unlocks the full potential of chain-of-thoughts prompting, facilitating faster, more reliable, and contextually aware reasoning in LLMs. Our framework consistently achieves remarkable results across various knowledge-intensive reasoning and generation tasks, surpassing baseline approaches with state-of-the-art LLMs. For instance, (i) on the LogiQA benchmark using GPT-4, CoE achieves a new state-of-the-art accuracy of 53.8%, surpassing CoT by 18%, ToT by 11%, and CR by 9%; (ii) CoE with PaLM-2 outperforms the variable-shot performance of Gemini Ultra by 0.9 F1 points, achieving an F1 score of 83.3 on DROP. We release our prompts and outputs on these benchmarks as a new instruction tuning dataset for future research at <i>Hugging Face</i>.</abstract>
       <url hash="05cd4342">2025.knowledgenlp-1.21</url>
@@ -268,7 +268,7 @@
       <title><fixed-case>PROPEL</fixed-case>: Prompt Optimization with Expert Priors for Small and Medium-sized <fixed-case>LLM</fixed-case>s</title>
       <author><first>Kawin</first><last>Mayilvaghanan</last><affiliation>Observe AI</affiliation></author>
       <author><first>Varun</first><last>Nathan</last><affiliation>Indian Institute of Science, Indian institute of science, Bangalore</affiliation></author>
-      <author><first>Ayush</first><last>Kumar</last></author>
+      <author id="ayush-kumar"><first>Ayush</first><last>Kumar</last></author>
       <pages>272-302</pages>
       <url hash="89f070ce">2025.knowledgenlp-1.25</url>
       <bibkey>mayilvaghanan-etal-2025-propel</bibkey>
@@ -289,7 +289,7 @@
       <title>Can dependency parses facilitate generalization in language models? A case study of cross-lingual relation extraction</title>
       <author><first>Ritam</first><last>Dutt</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Shounak</first><last>Sural</last></author>
-      <author><first>Carolyn</first><last>Rose</last><affiliation>School of Computer Science, Carnegie Mellon University</affiliation></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rose</last><affiliation>School of Computer Science, Carnegie Mellon University</affiliation></author>
       <pages>317-337</pages>
       <abstract>In this work, we propose DEPGEN, a framework for evaluating the generalization capabilities of language models on the task of relation extraction, with dependency parses as scaffolds. We use a GNN-based framework that takes dependency parses as input and learns embeddings of entities which are augmented to a baseline multilingual encoder. We also investigate the role of dependency parses when they are included as part of the prompt to LLMs in a zero-shot learning setup. We observe that including off-the-shelf dependency parses can aid relation extraction, with the best performing model having a mild relative improvement of 0.91% and 1.5% in the in-domain and zero-shot setting respectively across two datasets. For the in-context learning setup, we observe an average improvement of 1.67%, with significant gains for low-performing LLMs. We also carry out extensive statistical analysis to investigate how different factors such as the choice of the dependency parser or the nature of the prompt impact performance. We make our code and results publicly available for the research community at https://github.com/ShoRit/multilingual-re.git.</abstract>
       <url hash="41ceb8a9">2025.knowledgenlp-1.27</url>
@@ -300,7 +300,7 @@
       <title>Can dependency parses facilitate generalization in language models? A case study of cross-lingual relation extraction</title>
       <author><first>Ritam</first><last>Dutt</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Shounak</first><last>Sural</last></author>
-      <author><first>Carolyn</first><last>Rose</last><affiliation>School of Computer Science, Carnegie Mellon University</affiliation></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rose</last><affiliation>School of Computer Science, Carnegie Mellon University</affiliation></author>
       <pages>338-358</pages>
       <abstract>In this work, we propose DEPGEN, a framework for evaluating the generalization capabilities of language models on the task of relation extraction, with dependency parses as scaffolds. We use a GNN-based framework that takes dependency parses as input and learns embeddings of entities which are augmented to a baseline multilingual encoder. We also investigate the role of dependency parses when they are included as part of the prompt to LLMs in a zero-shot learning setup. We observe that including off-the-shelf dependency parses can aid relation extraction, with the best performing model having a mild relative improvement of 0.91% and 1.5% in the in-domain and zero-shot setting respectively across two datasets. For the in-context learning setup, we observe an average improvement of 1.67%, with significant gains for low-performing LLMs. We also carry out extensive statistical analysis to investigate how different factors such as the choice of the dependency parser or the nature of the prompt impact performance. We make our code and results publicly available for the research community at https://github.com/ShoRit/multilingual-re.git.</abstract>
       <url hash="41ceb8a9">2025.knowledgenlp-1.28</url>
diff --git a/data/xml/2025.knowllm.xml b/data/xml/2025.knowllm.xml
index 70d35bafc2..70351f8273 100644
--- a/data/xml/2025.knowllm.xml
+++ b/data/xml/2025.knowllm.xml
@@ -38,7 +38,7 @@
       <author><first>Soyeong</first><last>Jeong</last><affiliation>Korea Advanced Institute of Science &amp; Technology</affiliation></author>
       <author><first>Hoyun</first><last>Song</last><affiliation>Korea Advanced Institute of Science &amp; Technology</affiliation></author>
       <author><first>Huije</first><last>Lee</last><affiliation>Korea Advanced Institute of Science &amp; Technology</affiliation></author>
-      <author><first>Jong C.</first><last>Park</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
+      <author id="jong-c-park"><first>Jong C.</first><last>Park</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
       <pages>1-13</pages>
       <abstract>The rapid expansion of digital information and knowledge across structured and unstructured sources has heightened the importance of Information Retrieval (IR). While dense retrieval methods have substantially improved semantic matching for general queries, they consistently underperform on queries with explicit temporal constraints–often those containing numerical expressions and time specifiers such as “in 2015.” Existing approaches to Temporal Information Retrieval (TIR) improve temporal reasoning but often suffer from catastrophic forgetting, leading to reduced performance on non-temporal queries. To address this, we propose Time-Specifier Model Merging (TSM), a novel method that enhances temporal retrieval while preserving accuracy on non-temporal queries. TSM trains specialized retrievers for individual time specifiers and merges them into a unified model, enabling precise handling of temporal constraints without compromising non-temporal retrieval. Extensive experiments on both temporal and non-temporal datasets demonstrate that TSM significantly improves performance on temporally constrained queries while maintaining strong results on non-temporal queries, consistently outperforming other training methods. Our code is available at https://github.com/seungyoonee/TSM.</abstract>
       <url hash="fb89c9d4">2025.knowllm-1.1</url>
@@ -133,7 +133,7 @@
       <author><first>Zhaoxin</first><last>Feng</last></author>
       <author><first>Jianfei</first><last>Ma</last></author>
       <author><first>Jiexi</first><last>Xu</last></author>
-      <author id="bo-li"><first>Bo</first><last>Li</last></author>
+      <author><first>Bo</first><last>Li</last></author>
       <pages>84-93</pages>
       <abstract>Large language models (LLMs) often demonstrate strong performance by leveraging implicit knowledge acquired during pretraining. Analogical reasoning, which solves new problems by referencing similar known examples, offers a structured way to utilize this knowledge, but can also lead to subtle factual errors and hallucinations. In this work, we investigate whether LLMs can recognize the reliability of their own analogical outputs using black-box uncertainty estimation (UE). We evaluate six UE metrics across two reasoning-intensive tasks: mathematical problem solving (GSM8K) and code generation (Codeforces). Our results show that Kernel Language Entropy (KLE) and Lexical Similarity (LexSim) are the most robust indicators of correctness. Moreover, while analogical prompting increases model confidence over direct prompting, most uncertainty arises during the analogy transfer step. These findings highlight the limitations of analogical knowledge transfer in LLMs and demonstrate the potential of UE methods for detecting hallucinated reasoning in black-box settings.</abstract>
       <url hash="3ce1cd1e">2025.knowllm-1.8</url>
@@ -146,7 +146,7 @@
       <author><first>Jiang</first><last>Futian</last></author>
       <author><first>Yue</first><last>Deng</last></author>
       <author><first>Changyang</first><last>He</last></author>
-      <author id="bo-li"><first>Bo</first><last>Li</last></author>
+      <author><first>Bo</first><last>Li</last></author>
       <pages>94-110</pages>
       <abstract>We present Meetalk, a retrieval-augmented and knowledge-adaptive system for generating personalized meeting minutes. Although large language models (LLMs) excel at summarizing, their output often lacks faithfulness and does not reflect user-specific structure and style. Meetalk addresses these issues by integrating ASR-based transcription with LLM generation guided by user-derived knowledge. Specifically, Meetalk maintains and updates three structured databases, Table of Contents, Chapter Allocation, and Writing Style, based on user-uploaded samples and editing feedback. These serve as a dynamic memory that is retrieved during generation to ground the model’s outputs. To further enhance reliability, Meetalk introduces hallucination-aware uncertainty markers that highlight low-confidence segments for user review. In a user study in five real-world meeting scenarios, Meetalk significantly outperforms a strong baseline (iFLYTEK ASR + ChatGPT-4o) in completeness, contextual relevance, and user trust. Our findings underscore the importance of knowledge foundation and feedback-driven adaptation in building trustworthy, personalized LLM systems for high-stakes summarization tasks.</abstract>
       <url hash="fdf22c95">2025.knowllm-1.9</url>
diff --git a/data/xml/2025.l2m2.xml b/data/xml/2025.l2m2.xml
index 324695ced4..881c54d96c 100644
--- a/data/xml/2025.l2m2.xml
+++ b/data/xml/2025.l2m2.xml
@@ -29,7 +29,7 @@
     <paper id="1">
       <title>Factual Knowledge in Language Models: Robustness and Anomalies under Simple Temporal Context Variations</title>
       <author><first>Hichem Ammar</first><last>Khodja</last><affiliation>Université d’Aix-Marseille</affiliation></author>
-      <author><first>Frederic</first><last>Bechet</last><affiliation>Académie d’Aix-Marseille</affiliation></author>
+      <author id="frederic-bechet"><first>Frederic</first><last>Bechet</last><affiliation>Académie d’Aix-Marseille</affiliation></author>
       <author><first>Quentin</first><last>Brabant</last><affiliation>Orange-labs</affiliation></author>
       <author><first>Alexis</first><last>Nasr</last><affiliation>Aix Marseille University</affiliation></author>
       <author><first>Gwénolé</first><last>Lecorvé</last><affiliation>Orange</affiliation></author>
@@ -52,7 +52,7 @@
       <title>From Data to Knowledge: Evaluating How Efficiently Language Models Learn Facts</title>
       <author><first>Daniel</first><last>Christoph</last></author>
       <author><first>Max</first><last>Ploner</last><affiliation>Humboldt Universität Berlin</affiliation></author>
-      <author><first>Patrick</first><last>Haller</last><affiliation>Humboldt Universität Berlin</affiliation></author>
+      <author id="patrick-haller-zurich"><first>Patrick</first><last>Haller</last><affiliation>Humboldt Universität Berlin</affiliation></author>
       <author><first>Alan</first><last>Akbik</last><affiliation>Humboldt Universität Berlin</affiliation></author>
       <pages>29-46</pages>
       <abstract>Sample efficiency is a crucial property of language models with practical implications for training efficiency. In real-world text, information follows a long-tailed distribution. Yet, we expect models to learn and recall frequent and infrequent facts. Sample efficient models are better equipped to handle this challenge of learning and retaining rare information without requiring excessive exposure. This study analyzes multiple models of varying architectures and sizes, all trained on the same pre-training data. By annotating relational facts with their frequencies in the training corpus, we examine how model performance varies with fact frequency. Our findings show that most models perform similarly on high-frequency facts but differ notably on low-frequency facts. This analysis provides new insights into the relationship between model architecture, size, and factual learning efficiency.</abstract>
@@ -131,7 +131,7 @@
       <author><first>Peter</first><last>Carragher</last><affiliation>CMU, Carnegie Mellon University</affiliation></author>
       <author><first>Abhinand</first><last>Jha</last></author>
       <author><first>Raghav</first><last>R</last></author>
-      <author><first>Kathleen M.</first><last>Carley</last></author>
+      <author id="kathleen-m-carley"><first>Kathleen M.</first><last>Carley</last></author>
       <pages>127-141</pages>
       <abstract>Large Language Models (LLMs) demonstrate remarkable capabilities in question answering (QA), but metrics for assessing their reliance on memorization versus retrieval remain underdeveloped. Moreover, while finetuned models are state-of-the-art on closed-domain tasks, general-purpose models like GPT-4o exhibit strong zero-shot performance. This raises questions about the trade-offs between memorization, generalization, and retrieval. In this work, we analyze the extent to which multimodal retrieval-augmented VLMs memorize training data compared to baseline VLMs. Using the WebQA benchmark, we contrast finetuned models with baseline VLMs on multihop retrieval and question answering, examining the impact of finetuning on data memorization. To quantify memorization in end-to-end retrieval and QA systems, we propose several proxy metrics by investigating instances where QA succeeds despite retrieval failing. In line with existing work, we find that finetuned models rely more heavily on memorization than retrieval-augmented VLMs, and achieve higher accuracy as a result (72% vs 52% on WebQA test set). Finally, we present the first empirical comparison of the parametric effect between text and visual modalities. Here, we find that image-based questions have parametric response rates that are consistently 15-25% higher than for text-based questions in the WebQA dataset. As such, our measures pose a challenge for future work, both to account for differences in model memorization across different modalities and more generally to reconcile memorization and generalization in joint Retrieval-QA tasks.</abstract>
       <url hash="1712ed01">2025.l2m2-1.10</url>
@@ -154,7 +154,7 @@
       <author><first>Alisa</first><last>Stoll</last></author>
       <author><first>Lukas</first><last>Lange</last><affiliation>Robert Bosch GmbH, Bosch</affiliation></author>
       <author><first>Heike</first><last>Adel</last><affiliation>Hochschule der Medien (University of Applied Sciences)</affiliation></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <author><first>Jannik</first><last>Strötgen</last><affiliation>Karlsruhe University of Applied Sciences</affiliation></author>
       <pages>150-168</pages>
       <abstract>Adapting large language models (LLMs) to new and diverse knowledge is essential for their lasting effectiveness in real-world applications. This survey provides an overview of state-of-the-art methods for expanding the knowledge of LLMs, focusing on integrating various knowledge types, including factual information, domain expertise, language proficiency, and user preferences. We explore techniques, such as continual learning, model editing, and retrieval-based explicit adaptation, while discussing challenges like knowledge consistency and scalability. Designed as a guide for researchers and practitioners, this survey sheds light on opportunities for advancing LLMs as adaptable and robust knowledge systems.</abstract>
@@ -203,7 +203,7 @@
       <author><first>Santosh</first><last>T.y.s.s</last></author>
       <author><first>Yanai</first><last>Elazar</last><affiliation>Allen Institute for Artificial Intelligence and Department of Computer Science</affiliation></author>
       <author><first>Quirin</first><last>Vogel</last><affiliation>Alpen-Adria Universität Klagenfurt</affiliation></author>
-      <author><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <author><first>Matthias</first><last>Grabmair</last><affiliation>Technische Universität München</affiliation></author>
       <pages>205-226</pages>
       <abstract>Recent works have shown that Large Language Models (LLMs) have a tendency to memorize patterns and biases present in their training data, raising important questions about how such memorized content influences model behavior. One such concern is the emergence of political bias in LLM outputs. In this paper, we investigate the extent to which LLMs’ political leanings reflect memorized patterns from their pretraining corpora. We propose a method to quantitatively evaluate political leanings embedded in the large pretraining corpora. Subsequently we investigate to whom are the LLMs’ political leanings more aligned with, their pretrainig corpora or the surveyed human opinions. As a case study, we focus on probing the political leanings of LLMs in 32 U.S. Supreme Court cases, addressing contentious topics such as abortion and voting rights. Our findings reveal that LLMs strongly reflect the political leanings in their training data, and no strong correlation is observed with their alignment to human opinions as expressed in surveys. These results underscore the importance of responsible curation of training data, and the methodology for auditing the memorization in LLMs to ensure human-AI alignment.</abstract>
diff --git a/data/xml/2025.latechclfl.xml b/data/xml/2025.latechclfl.xml
index accaf5e7e2..506f8b7505 100644
--- a/data/xml/2025.latechclfl.xml
+++ b/data/xml/2025.latechclfl.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the 9th Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature (LaTeCH-CLfL 2025)</booktitle>
       <editor><first>Anna</first><last>Kazantseva</last></editor>
-      <editor><first>Stan</first><last>Szpakowicz</last></editor>
+      <editor id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></editor>
       <editor><first>Stefania</first><last>Degaetano-Ortlieb</last></editor>
       <editor><first>Yuri</first><last>Bizzoni</last></editor>
       <editor><first>Janis</first><last>Pagel</last></editor>
@@ -219,7 +219,7 @@
     <paper id="19">
       <title>Prompting the Past: Exploring Zero-Shot Learning for Named Entity Recognition in Historical Texts Using Prompt-Answering <fixed-case>LLM</fixed-case>s</title>
       <author><first>Crina</first><last>Tudor</last><affiliation>Stockholm University</affiliation></author>
-      <author><first>Beata</first><last>Megyesi</last><affiliation>uu.se</affiliation></author>
+      <author id="beata-megyesi"><first>Beata</first><last>Megyesi</last><affiliation>uu.se</affiliation></author>
       <author><first>Robert</first><last>Östling</last><affiliation>Department of Linguistics, Stockholm University</affiliation></author>
       <pages>216-226</pages>
       <abstract>This paper investigates the application of prompt-answering Large Language Models (LLMs) for the task of Named Entity Recognition (NER) in historical texts. Historical NER presents unique challenges due to language change through time, spelling variation, limited availability of digitized data (and, in particular, labeled data), and errors introduced by Optical Character Recognition (OCR) and Handwritten Text Recognition (HTR) processes. Leveraging the zero-shot capabilities of prompt-answering LLMs, we address these challenges by prompting the model to extract entities such as persons, locations, organizations, and dates from historical documents. We then conduct an extensive error analysis of the model output in order to identify and address potential weaknesses in the entity recognition process. The results show that, while such models display ability for extracting named entities, their overall performance is lackluster. Our analysis reveals that model performance is significantly affected by hallucinations in the model output, as well as by challenges imposed by the evaluation of NER output.</abstract>
@@ -275,7 +275,7 @@
     <paper id="24">
       <title>Identifying Small Talk in Natural Conversations</title>
       <author><first>Steffen</first><last>Frenzel</last><affiliation>University of Potsdam</affiliation></author>
-      <author><first>Annette</first><last>Hautli-Janisz</last><affiliation>University of Passau</affiliation></author>
+      <author id="annette-hautli"><first>Annette</first><last>Hautli-Janisz</last><affiliation>University of Passau</affiliation></author>
       <pages>272-277</pages>
       <abstract>Small talk is part and parcel of human interaction and is rather employed to communicate values and opinions than pure information. Despite small talk being an omnipresent phenomenon in spoken language, it is difficult to identify: Small talk is situated, i.e., for interpreting a string of words or discourse units, outside references such as the context of the interlocutors and their previous experiences have to be interpreted.In this paper, we present a dataset of natural conversation annotated with a theoretically well-motivated distillation of what constitutes small talk. This dataset comprises of verbatim transcribed public service encounters in German authorities and are the basis for empirical work in administrative policy on how the satisfaction of the citizen manifests itself in the communication with the authorities. We show that statistical models achieve comparable results to those of state-of-the-art LLMs.</abstract>
       <url hash="1e20dca0">2025.latechclfl-1.24</url>
@@ -285,7 +285,7 @@
     <paper id="25">
       <title>Why Novels (Don’t) Break Through: Dynamics of Canonicity in the <fixed-case>D</fixed-case>anish Modern Breakthrough (1870-1900)</title>
       <author><first>Alie</first><last>Lassche</last><affiliation>Aarhus University</affiliation></author>
-      <author><first>Pascale</first><last>Feldkamp</last><affiliation>Center for Humanities Computing, Aarhus University</affiliation></author>
+      <author id="pascale-feldkamp"><first>Pascale</first><last>Feldkamp</last><affiliation>Center for Humanities Computing, Aarhus University</affiliation></author>
       <author><first>Yuri</first><last>Bizzoni</last><affiliation>Aarhus University</affiliation></author>
       <author><first>Katrine</first><last>Baunvig</last><affiliation>Aarhus University</affiliation></author>
       <author><first>Kristoffer</first><last>Nielbo</last><affiliation>Center for Humanities Computing, Aarhus University</affiliation></author>
diff --git a/data/xml/2025.law.xml b/data/xml/2025.law.xml
index 6dd0459955..c3652d237b 100644
--- a/data/xml/2025.law.xml
+++ b/data/xml/2025.law.xml
@@ -56,7 +56,7 @@
     </paper>
     <paper id="4">
       <title>Subjectivity in the Annotation of Bridging Anaphora</title>
-      <author><first>Lauren</first><last>Levine</last><affiliation>Georgetown University</affiliation></author>
+      <author id="lauren-levine"><first>Lauren</first><last>Levine</last><affiliation>Georgetown University</affiliation></author>
       <author><first>Amir</first><last>Zeldes</last><affiliation>Georgetown University</affiliation></author>
       <pages>48-59</pages>
       <abstract>Bridging refers to the associative relationship between inferable entities in a discourse and the antecedents which allow us to understand them, such as understanding what “the door” means with respect to an aforementioned “house”. As identifying associative relations between entities is an inherently subjective task, it is difficult to achieve consistent agreement in the annotation of bridging anaphora and their antecedents. In this paper, we explore the subjectivity involved in the annotation of bridging instances at three levels: anaphor recognition, antecedent resolution, and bridging subtype selection. To do this, we conduct an annotation pilot on the test set of the existing GUM corpus, and propose a newly developed classification system for bridging subtypes, which we compare to previously proposed schemes. Our results suggest that some previous resources are likely to be severely under-annotated. We also find that while agreement on the bridging subtype category was moderate, annotator overlap for exhaustively identifying instances of bridging is low, and that many disagreements resulted from subjective understanding of the entities involved.</abstract>
@@ -67,7 +67,7 @@
     </paper>
     <paper id="5">
       <title>The revision of linguistic annotation in the <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies framework: a look at the annotators’ behavior</title>
-      <author><first>Magali Sanches</first><last>Duran</last><affiliation>Universidade de São Paulo</affiliation></author>
+      <author id="magali-sanches-duran"><first>Magali Sanches</first><last>Duran</last><affiliation>Universidade de São Paulo</affiliation></author>
       <author><first>Lucelene</first><last>Lopes</last><affiliation>USP - ICMC</affiliation></author>
       <author><first>Thiago Alexandre Salgueiro</first><last>Pardo</last><affiliation>University of São Paulo</affiliation></author>
       <pages>60-69</pages>
@@ -92,7 +92,7 @@
     <paper id="7">
       <title>Another Approach to Agreement Measurement and Prediction with Emotion Annotations</title>
       <author><first>Quanqi</first><last>Du</last><affiliation>LT3, Ghent University</affiliation></author>
-      <author><first>Veronique</first><last>Hoste</last><affiliation>LT3, Ghent University</affiliation></author>
+      <author id="veronique-hoste"><first>Veronique</first><last>Hoste</last><affiliation>LT3, Ghent University</affiliation></author>
       <pages>87-102</pages>
       <abstract>Emotion annotation, as an inherently subjective task, often suffers from significant inter-annotator disagreement when evaluated using traditional metrics like kappa or alpha. These metrics often fall short of capturing the nuanced nature of disagreement, especially in multimodal settings. This study introduces Absolute Annotation Difference (AAD), a novel metric offering a complementary perspective on inter- and intra-annotator agreement across different modalities. Our analysis reveals that AAD not only identifies overall agreement levels but also uncovers fine-grained disagreement patterns across modalities often overlooked by conventional metrics. Furthermore, we propose an AAD-based RMSE variant for predicting annotation disagreement. Through extensive experiments on the large-scale DynaSent corpus, we demonstrate that our approach significantly improves disagreement prediction accuracy, rising from 41.71% to 51.64% and outperforming existing methods. Cross-dataset prediction results suggest good generalization. These findings underscore AAD’s potential to enhance annotation agreement analysis and provide deeper insights into subjective NLP tasks. Future work will investigate its applicability to broader emotion-related tasks and other subjective annotation scenarios.</abstract>
       <url hash="ef51c19f">2025.law-1.7</url>
@@ -127,7 +127,7 @@
       <title>Bootstrapping <fixed-case>UMR</fixed-case>s from <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies for Scalable Multilingual Annotation</title>
       <author><first>Federica</first><last>Gamba</last><affiliation>Charles University</affiliation></author>
       <author><first>Alexis</first><last>Palmer</last><affiliation>University of Colorado Boulder</affiliation></author>
-      <author><first>Daniel</first><last>Zeman</last><affiliation>Charles University, Faculty of Mathematics and Physics</affiliation></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last><affiliation>Charles University, Faculty of Mathematics and Physics</affiliation></author>
       <pages>126-136</pages>
       <abstract>Uniform Meaning Representation (UMR) is a semantic annotation framework designed to be applicable across typologically diverse languages. However, UMR annotation is a labor-intensive task, requiring significant effort and time especially when no prior annotations are available. In this paper, we present a method for bootstrapping UMR graphs by leveraging Universal Dependencies (UD), one of the most comprehensive multilingual resources, encompassing languages across a wide range of language families. Given UMR’s strong typological and cross-linguistic orientation, UD serves as a particularly suitable starting point for the conversion. We describe and evaluate an approach that automatically derives partial UMR graphs from UD trees, providing annotators with an initial representation to build upon. While UD is not a semantic resource, our method extracts useful structural information that aligns with the UMR formalism, thereby facilitating the annotation process. By leveraging UD’s broad typological coverage, this approach offers a scalable way to support UMR annotation across different languages.</abstract>
       <url hash="2726ac01">2025.law-1.10</url>
@@ -137,7 +137,7 @@
     <paper id="11">
       <title>Classifying <fixed-case>TEI</fixed-case> Encoding for <fixed-case>D</fixed-case>utch<fixed-case>D</fixed-case>ra<fixed-case>C</fixed-case>or with Transformer Models</title>
       <author><first>Florian</first><last>Debaene</last><affiliation>Ghent University</affiliation></author>
-      <author><first>Veronique</first><last>Hoste</last><affiliation>LT3, Ghent University</affiliation></author>
+      <author id="veronique-hoste"><first>Veronique</first><last>Hoste</last><affiliation>LT3, Ghent University</affiliation></author>
       <pages>137-141</pages>
       <abstract>Computational Drama Analysis relies on well-structured textual data, yet many dramatic works remain in need of encoding. The Dutch dramatic tradition is one such an example, with currently 180 plays available in the DraCor database, while many more plays await integration still. To facilitate this process, we propose a semi-automated TEI encoding annotation methodology using transformer encoder language models to classify structural elements in Dutch drama. We fine-tune 4 Dutch models on the DutchDraCor dataset to predict the 9 most relevant labels used in the DraCor TEI encoding, experimenting with 2 model input settings. Our results show that incorporating additional context through beginning-of-sequence (BOS) and end-of-sequence (EOS) tokens greatly improves performance, increasing the average macro F1 score across models from 0.717 to 0.923 (+0.206). Using the best-performing model, we generate silver-standard DraCor labels for EmDComF, an unstructured corpus of early modern Dutch comedies and farces, paving the way for its integration into DutchDraCor after validation.</abstract>
       <url hash="e7440b0b">2025.law-1.11</url>
@@ -148,7 +148,7 @@
       <title>Label Bias in Symbolic Representation of Meaning</title>
       <author><first>Marie</first><last>Mikulová</last><affiliation>Charles University</affiliation></author>
       <author><first>Jan</first><last>Štěpánek</last><affiliation>Charles University in Prague, Faculty of Mathematics and Physics, UFAL</affiliation></author>
-      <author><first>Jan</first><last>Hajič</last><affiliation>Charles University</affiliation></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last><affiliation>Charles University</affiliation></author>
       <pages>142-159</pages>
       <abstract>This paper contributes to the trend of building semantic representations and exploring the relations between a language and the world it represents. We analyse alternative approaches to semantic representation, focusing on methodology of determining meaning categories, their arrangement and granularity, and annotation consistency and reliability. Using the task of semantic classification of circumstantial meanings within the Prague Dependency Treebank framework, we present our principles for analyzing meaning categories. Compared with the discussed projects, the unique aspect of our approach is its focus on how a language, in its structure, reflects reality. We employ a two-level classification: a higher, coarse-grained set of general semantic concepts (defined by questions: where, how, why, etc.) and a fine-grained set of circumstantial meanings based on data-driven analysis, reflecting meanings fixed in the language. We highlight that the inherent vagueness of linguistic meaning is crucial for capturing the limitless variety of the world but it can lead to label biases in datasets. Therefore, besides semantically clear categories, we also use fuzzy meaning categories.</abstract>
       <url hash="1ca89b64">2025.law-1.12</url>
@@ -229,9 +229,9 @@
     </paper>
     <paper id="19">
       <title>Creating Hierarchical Relations in a Multilingual Event-type Ontology</title>
-      <author><first>Zdeňka</first><last>Urešová</last><affiliation>Charles University</affiliation></author>
-      <author><first>Eva</first><last>Fučíková</last><affiliation>Charles University</affiliation></author>
-      <author><first>Jan</first><last>Hajič</last><affiliation>Charles University</affiliation></author>
+      <author id="zdenka-uresova"><first>Zdeňka</first><last>Urešová</last><affiliation>Charles University</affiliation></author>
+      <author id="eva-fucikova"><first>Eva</first><last>Fučíková</last><affiliation>Charles University</affiliation></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last><affiliation>Charles University</affiliation></author>
       <pages>240-249</pages>
       <abstract>This paper describes the work on hierarchization of the SynSemClass event-type ontology. The original resource has been extended by a hierarchical structure to model specialization and generalization relations between classes that are formally and technically unrelated in the original ontology. The goal is to enable one to use the ontology enriched by the hierarchical concepts for annotation of running texts in symbolic meaning representations, such as UMR or PDT. The hierarchy is in principle built bottom-up, based on existing SSC classes (concepts). This approach differs from other approaches to semantic classes, such as in WordNet or VerbNet. Although the hierarchical relations are similar, the underlying nodes in the hierarchy are not. In this paper, we describe the challenges related to the principles chosen: single-tree constraint and finding features for the definitions of specificity/generality. Also, a pilot inter-annotator experiment is described that shows the difficulty of the hierarchization task.</abstract>
       <url hash="a2c6a32d">2025.law-1.19</url>
@@ -265,7 +265,7 @@
       <title>Variety delights (sometimes) - Annotation differences in morphologically annotated corpora</title>
       <author><first>Andrea</first><last>Dömötör</last><affiliation>ELTE Department of Digital Humanities</affiliation></author>
       <author><first>Balázs</first><last>Indig</last><affiliation>ELTE Faculty of Informatics</affiliation></author>
-      <author><first>Dávid Márk</first><last>Nemeskey</last><affiliation>ELTE DH</affiliation></author>
+      <author id="david-mark-nemeskey"><first>Dávid Márk</first><last>Nemeskey</last><affiliation>ELTE DH</affiliation></author>
       <pages>270-278</pages>
       <abstract>The goal of annotation standards is to ensure consistency across different corpora and languages. But do they succeed? In our paper we experiment with morphologically annotated Hungarian corpora of different sizes (ELTE DH gold standard corpus, NYTK-NerKor, and Szeged Treebank) to assess their compatibility as a merged training corpus for morphological analysis and disambiguation. Our results show that combining any two corpora not only failed to improve the results of the trained tagger but even degraded them due the inconsistent annotations. Further analysis of the annotation differences among the corpora revealed inconsistencies of several sources: different theoretical approach, lack of consensus, and tagset conversion issues.</abstract>
       <url hash="59a66943">2025.law-1.22</url>
@@ -285,7 +285,7 @@
     <paper id="24">
       <title>Illuminating Logical Fallacies with the <fixed-case>CAMPFIRE</fixed-case> Corpus</title>
       <author><first>Austin</first><last>Blodgett</last><affiliation>US Army Research Lab</affiliation></author>
-      <author><first>Claire</first><last>Bonial</last><affiliation>US Army Research Lab</affiliation></author>
+      <author id="claire-bonial"><first>Claire</first><last>Bonial</last><affiliation>US Army Research Lab</affiliation></author>
       <author><first>Taylor A.</first><last>Pellegrin</last><affiliation>ARL</affiliation></author>
       <author><first>Melissa</first><last>Torgbi</last><affiliation>University of Bath</affiliation></author>
       <author><first>Harish</first><last>Tayyar Madabushi</last><affiliation>University of Bath</affiliation></author>
@@ -313,7 +313,7 @@
       <author><first>Omar</first><last>Momen</last><affiliation>University of Bielefeld</affiliation></author>
       <author><first>Florian</first><last>Steig</last><affiliation>University of Bielefeld</affiliation></author>
       <author><first>J. Berenike</first><last>Herrmann</last><affiliation>University of Bielefeld</affiliation></author>
-      <author><first>Sina</first><last>Zarrieß</last><affiliation>University of Bielefeld</affiliation></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last><affiliation>University of Bielefeld</affiliation></author>
       <pages>308-325</pages>
       <abstract>Descriptions are a central component of literary texts, yet their systematic identification remains a challenge. This work suggests an approach to identifying sentences describing spatial conditions in literary text. It was developed iteratively on German literary text and extended to non-literary text to evaluate its applicability across textual domains. To assess the robustness of the method, we involved both humans and a selection of state-of-the-art Large Language Models (LLMs) in annotating a collection of sentences regarding their descriptiveness and spatiality. We compare the annotations across human annotators and between humans and LLMs. The main contributions of this paper are: (1) a set of annotation guidelines for identifying spatial descriptions in literary texts, (2) a curated dataset of almost 4,700 annotated sentences of which around 500 are spatial descriptions, produced through in-depth discussion and consensus among annotators, and (3) a pilot study of automating the task of spatial description annotation of German texts. We publish the codes and all human and LLM annotations for the public to be used for research purposes only.</abstract>
       <url hash="bbc89fd5">2025.law-1.26</url>
@@ -338,7 +338,7 @@
       <author><first>Nuno</first><last>Guimarães</last><affiliation>FCUP and INESC TEC</affiliation></author>
       <author><first>Rita</first><last>Rb-Silva</last><affiliation>CI-IPOP and Rise-Health</affiliation></author>
       <author><first>Luís Filipe</first><last>Cunha</last><affiliation>University of Porto, University of Minho and INESC TEC</affiliation></author>
-      <author><first>Alípio</first><last>Jorge</last><affiliation>FCUP and INESC TEC</affiliation></author>
+      <author id="alipio-jorge"><first>Alípio</first><last>Jorge</last><affiliation>FCUP and INESC TEC</affiliation></author>
       <pages>332-343</pages>
       <abstract>The development of a robust annotation scheme and corresponding guidelines is crucial for producing annotated datasets that advance both linguistic and computational research. This paper presents a case study that outlines a methodology for designing an annotation scheme and its guidelines, specifically aimed at representing morphosyntactic and semantic information regarding temporal features, as well as medical information in medical reports written in Portuguese. We detail a multi-step process that includes reviewing existing frameworks, conducting an annotation experiment to determine the optimal approach, and designing a model based on these findings. We validated the approach through a pilot experiment where we assessed the reliability and applicability of the annotation scheme and guidelines. In this experiment, two annotators independently annotated a patient’s medical report consisting of six documents using the proposed model, while a curator established the ground truth. The analysis of inter-annotator agreement and the annotation results enabled the identification of sources of human variation and provided insights for further refinement of the annotation scheme and guidelines.</abstract>
       <url hash="1c9172c4">2025.law-1.28</url>
diff --git a/data/xml/2025.llmsec.xml b/data/xml/2025.llmsec.xml
index bdc87afb56..f58c373d1e 100644
--- a/data/xml/2025.llmsec.xml
+++ b/data/xml/2025.llmsec.xml
@@ -11,7 +11,7 @@
       <venue>llmsec</venue>
       <venue>ws</venue>
       <isbn>979-8-89176-279-4</isbn>
-      <editor><first>Leon</first><last>Derczynski</last></editor>
+      <editor id="leon-derczynski"><first>Leon</first><last>Derczynski</last></editor>
       <editor><first>Jekaterina</first><last>Novikova</last></editor>
       <editor><first>Muhao</first><last>Chen</last></editor>
     </meta>
@@ -69,7 +69,7 @@
       <author><first>Qun</first><last>Liu</last><affiliation>Huawei Noah’s Ark Lab</affiliation></author>
       <author><first>Yaqian</first><last>Zhou</last><affiliation>Fudan University</affiliation></author>
       <author><first>Xipeng</first><last>Qiu</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>26-47</pages>
       <abstract>Recent advancements in model architectures and length extrapolation techniques have significantly extended the context length of large language models (LLMs), paving the way for their application in increasingly complex tasks. However, despite the growing capabilities of long-context LLMs, the safety issues in long-context scenarios remain underexplored. While safety alignment in short context has been widely studied, the safety concerns of long-context LLMs have not been adequately addressed. In this work, we introduce ${textbf{LongSafety}}$, a comprehensive safety alignment dataset for long-context LLMs, containing 10 tasks and 17k samples, with an average length of 40.9k tokens. Our experiments demonstrate that training with LongSafety can enhance long-context safety performance while enhancing short-context safety and preserving general capabilities. Furthermore, we demonstrate that long-context safety does not equal long-context alignment with short-context safety data and LongSafety has generalizing capabilities in context length and long-context safety scenarios.</abstract>
       <url hash="600021fe">2025.llmsec-1.4</url>
@@ -142,7 +142,7 @@
     </paper>
     <paper id="10">
       <title>Fine-Tuning Lowers Safety and Disrupts Evaluation Consistency</title>
-      <author><first>Kathleen C.</first><last>Fraser</last><affiliation>National Research Council Canada</affiliation></author>
+      <author id="kathleen-c-fraser"><first>Kathleen C.</first><last>Fraser</last><affiliation>National Research Council Canada</affiliation></author>
       <author><first>Hillary</first><last>Dawkins</last><affiliation>National Research Council Canada</affiliation></author>
       <author><first>Isar</first><last>Nejadgholi</last><affiliation>National Research Council Canada</affiliation></author>
       <author><first>Svetlana</first><last>Kiritchenko</last><affiliation>National Research Council Canada</affiliation></author>
diff --git a/data/xml/2025.lm4uc.xml b/data/xml/2025.lm4uc.xml
index 6d8118eac2..a5f1109111 100644
--- a/data/xml/2025.lm4uc.xml
+++ b/data/xml/2025.lm4uc.xml
@@ -17,7 +17,7 @@
       <editor><first>Duc</first><last>Nguyen</last></editor>
       <editor><first>Angelina</first><last>Wang</last></editor>
       <editor><first>Daniel</first><last>Ho</last></editor>
-      <editor><first>Alice</first><last>Oh</last></editor>
+      <editor id="alice-oh"><first>Alice</first><last>Oh</last></editor>
       <editor><first>Sanmi</first><last>Koyejo</last></editor>
     </meta>
     <frontmatter>
@@ -49,7 +49,7 @@
     <paper id="3">
       <title><fixed-case>ABDUL</fixed-case>: A New Approach to Build Language Models for Dialects Using Formal Language Corpora Only</title>
       <author><first>Yassine</first><last>Toughrai</last><affiliation>Université de Lorraine</affiliation></author>
-      <author><first>Kamel</first><last>Smaïli</last><affiliation>Université de Lorraine</affiliation></author>
+      <author id="kamel-smaili"><first>Kamel</first><last>Smaïli</last><affiliation>Université de Lorraine</affiliation></author>
       <author><first>David</first><last>Langlois</last><affiliation>Université de Lorraine</affiliation></author>
       <pages>16-21</pages>
       <abstract>Arabic dialects present major challenges for natural language processing (NLP) due to their diglossic nature, phonetic variability, and the scarcity of resources. To address this, we introduce a phoneme-like transcription approach that enables the training of robust language models for North African Dialects (NADs) using only formal language data, without the need for dialect-specific corpora.Our key insight is that Arabic dialects are highly phonetic, with NADs particularly influenced by European languages. This motivated us to develop a novel approach in which we convert Arabic script into a Latin-based representation, allowing our language model, ABDUL, to benefit from existing Latin-script corpora.Our method demonstrates strong performance in multi-label emotion classification and named entity recognition (NER) across various Arabic dialects. ABDUL achieves results comparable to or better than specialized and multilingual models such as DarijaBERT, DziriBERT, and mBERT. Notably, in the NER task, ABDUL outperforms mBERT by 5% in F1-score for Modern Standard Arabic (MSA), Moroccan, and Algerian Arabic, despite using a vocabulary four times smaller than mBERT.</abstract>
@@ -74,7 +74,7 @@
     </paper>
     <paper id="5">
       <title>Serving the Underserved: Leveraging <fixed-case>BARTB</fixed-case>ahnar Language Model for Bahnaric-<fixed-case>V</fixed-case>ietnamese Translation</title>
-      <author><first>Long</first><last>Nguyen</last><affiliation>Ho Chi Minh City University of Technology - VNU-HCM</affiliation></author>
+      <author id="long-nguyen"><first>Long</first><last>Nguyen</last><affiliation>Ho Chi Minh City University of Technology - VNU-HCM</affiliation></author>
       <author><first>Tran</first><last>Le</last><affiliation>Ho Chi Minh City University of Technology - VNU-HCM</affiliation></author>
       <author><first>Huong</first><last>Nguyen</last><affiliation>Ho Chi Minh City University of Technology - VNU-HCM</affiliation></author>
       <author><first>Quynh</first><last>Vo</last><affiliation>Ho Chi Minh City University of Technology - VNU-HCM</affiliation></author>
diff --git a/data/xml/2025.loreslm.xml b/data/xml/2025.loreslm.xml
index 95272a3132..09629124ee 100644
--- a/data/xml/2025.loreslm.xml
+++ b/data/xml/2025.loreslm.xml
@@ -6,7 +6,7 @@
       <editor><first>Hansi</first><last>Hettiarachchi</last></editor>
       <editor><first>Tharindu</first><last>Ranasinghe</last></editor>
       <editor><first>Paul</first><last>Rayson</last></editor>
-      <editor><first>Ruslan</first><last>Mitkov</last></editor>
+      <editor id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></editor>
       <editor><first>Mohamed</first><last>Gaber</last></editor>
       <editor><first>Damith</first><last>Premasiri</last></editor>
       <editor><first>Fiona Anting</first><last>Tan</last></editor>
@@ -49,9 +49,9 @@
       <author><first>Imane</first><last>Momayiz</last></author>
       <author><first>Xuguang</first><last>Ren</last></author>
       <author><first>Eric</first><last>Moulines</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Michalis</first><last>Vazirgiannis</last></author>
-      <author><first>Eric</first><last>Xing</last></author>
+      <author id="eric-xing"><first>Eric</first><last>Xing</last></author>
       <pages>9–30</pages>
       <abstract>We introduce Atlas-Chat, the first-ever collection of LLMs specifically developed for dialectal Arabic. Focusing on Moroccan Arabic, also known as Darija, we construct our instruction dataset by consolidating existing Darija language resources, creating novel datasets both manually and synthetically, and translating English instructions with stringent quality control. Atlas-Chat-2B, 9B, and 27B models, fine-tuned on the dataset, exhibit superior ability in following Darija instructions and performing standard NLP tasks. Notably, our models outperform both state-of-the-art and Arabic-specialized LLMs like LLaMa, Jais, and AceGPT, e.g., our 9B model gains a 13% performance boost over a larger 13B model on DarijaMMLU, in our newly introduced evaluation suite for Darija covering both discriminative and generative tasks. Furthermore, we perform an experimental analysis of various fine-tuning strategies and base model choices to determine optimal configurations. All our resources are publicly accessible, and we believe our work offers comprehensive design methodologies of instruction-tuning for low-resource languages, which are often neglected in favor of data-rich languages by contemporary LLMs.</abstract>
       <url hash="ec78512d">2025.loreslm-1.2</url>
@@ -124,7 +124,7 @@
     <paper id="9">
       <title><fixed-case>F</fixed-case>ilipino Benchmarks for Measuring Sexist and Homophobic Bias in Multilingual Language Models from <fixed-case>S</fixed-case>outheast <fixed-case>A</fixed-case>sia</title>
       <author><first>Lance Calvin Lim</first><last>Gamboa</last></author>
-      <author><first>Mark</first><last>Lee</last></author>
+      <author id="mark-lee"><first>Mark</first><last>Lee</last></author>
       <pages>123–134</pages>
       <abstract>Bias studies on multilingual models confirm the presence of gender-related stereotypes in masked models processing languages with high NLP resources. We expand on this line of research by introducing Filipino CrowS-Pairs and Filipino WinoQueer: benchmarks that assess both sexist and anti-queer biases in pretrained language models (PLMs) handling texts in Filipino, a low-resource language from the Philippines. The benchmarks consist of 7,074 new challenge pairs resulting from our cultural adaptation of English bias evaluation datasets—a process that we document in detail to guide similar forthcoming efforts. We apply the Filipino benchmarks on masked and causal multilingual models, including those pretrained on Southeast Asian data, and find that they contain considerable amounts of bias. We also find that for multilingual models, the extent of bias learned for a particular language is influenced by how much pretraining data in that language a model was exposed to. Our benchmarks and insights can serve as a foundation for future work analyzing and mitigating bias in multilingual models.</abstract>
       <url hash="3794ac85">2025.loreslm-1.9</url>
@@ -264,7 +264,7 @@
       <title>Exploiting Task Reversibility of <fixed-case>DRS</fixed-case> Parsing and Generation: Challenges and Insights from a Multi-lingual Perspective</title>
       <author><first>Muhammad Saad</first><last>Amin</last></author>
       <author><first>Luca</first><last>Anselma</last></author>
-      <author><first>Alessandro</first><last>Mazzei</last></author>
+      <author id="alessandro-mazzei"><first>Alessandro</first><last>Mazzei</last></author>
       <pages>268–286</pages>
       <abstract>Semantic parsing and text generation exhibit reversible properties when utilizing Discourse Representation Structures (DRS). However, both processes—text-to-DRS parsing and DRS-to-text generation—are susceptible to errors. In this paper, we exploit the reversible nature of DRS to explore both error propagation, which is commonly seen in pipeline methods, and the less frequently studied potential for error correction. We investigate two pipeline approaches: Parse-Generate-Parse (PGP) and Generate-Parse-Generate (GPG), utilizing pre-trained language models where the output of one model becomes the input for the next. Our evaluation uses the Parallel Meaning Bank dataset, focusing on Urdu as a low-resource language, Italian as a mid-resource language, and English serving as a high-resource baseline. Our analysis highlights that while pipelines are theoretically suited for error correction, they more often propagate errors, with Urdu exhibiting the greatest sensitivity, Italian showing a moderate effect, and English demonstrating the highest stability. This variation highlights the unique challenges faced by low-resource languages in semantic processing tasks. Further, our findings suggest that these pipeline methods support the development of more linguistically balanced datasets, enabling a comprehensive assessment across factors like sentence structure, length, type, polarity, and voice. Our cross-linguistic analysis provides valuable insights into the behavior of DRS processing in low-resource contexts, demonstrating both the potential and limitations of reversible pipeline approaches.</abstract>
       <url hash="b75a4d59">2025.loreslm-1.22</url>
@@ -274,7 +274,7 @@
       <title><fixed-case>BBPOS</fixed-case>: <fixed-case>BERT</fixed-case>-based Part-of-Speech Tagging for <fixed-case>U</fixed-case>zbek</title>
       <author><first>Latofat</first><last>Bobojonova</last></author>
       <author><first>Arofat</first><last>Akhundjanova</last></author>
-      <author><first>Phil Sidney</first><last>Ostheimer</last></author>
+      <author id="phil-sidney-ostheimer"><first>Phil Sidney</first><last>Ostheimer</last></author>
       <author><first>Sophie</first><last>Fellenz</last></author>
       <pages>287–293</pages>
       <abstract>This paper advances NLP research for the low-resource Uzbek language by evaluating two previously untested monolingual Uzbek BERT models on the part-of-speech (POS) tagging task and introducing the first publicly available UPOS-tagged benchmark dataset for Uzbek. Our fine-tuned models achieve 91% average accuracy, outperforming the baseline multi-lingual BERT as well as the rule-based tagger. Notably, these models capture intermediate POS changes through affixes and demonstrate context sensitivity, unlike existing rule-based taggers.</abstract>
@@ -355,7 +355,7 @@
     <paper id="30">
       <title>Controlled Evaluation of Syntactic Knowledge in Multilingual Language Models</title>
       <author><first>Daria</first><last>Kryvosheieva</last></author>
-      <author><first>Roger</first><last>Levy</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
       <pages>402–413</pages>
       <abstract>Language models (LMs) are capable of acquiring elements of human-like syntactic knowledge. Targeted syntactic evaluation tests have been employed to measure how well they form generalizations about syntactic phenomena in high-resource languages such as English. However, we still lack a thorough understanding of LMs’ capacity for syntactic generalizations in low-resource languages, which are responsible for much of the diversity of syntactic patterns worldwide. In this study, we develop targeted syntactic evaluation tests for three low-resource languages (Basque, Hindi, and Swahili) and use them to evaluate five families of open-access multilingual Transformer LMs. We find that some syntactic tasks prove relatively easy for LMs while others (agreement in sentences containing indirect objects in Basque, agreement across a prepositional phrase in Swahili) are challenging. We additionally uncover issues with publicly available Transformers, including a bias toward the habitual aspect in Hindi in multilingual BERT and underperformance compared to similar-sized models in XGLM-4.5B.</abstract>
       <url hash="ba8c2722">2025.loreslm-1.30</url>
@@ -388,7 +388,7 @@
       <title>When <fixed-case>LLM</fixed-case>s Struggle: Reference-less Translation Evaluation for Low-resource Languages</title>
       <author><first>Archchana</first><last>Sindhujan</last></author>
       <author><first>Diptesh</first><last>Kanojia</last></author>
-      <author><first>Constantin</first><last>Orasan</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orasan</last></author>
       <author><first>Shenbin</first><last>Qian</last></author>
       <pages>437–459</pages>
       <abstract>This paper investigates the reference-less evaluation of machine translation for low-resource language pairs, known as quality estimation (QE). Segment-level QE is a challenging cross-lingual language understanding task that provides a quality score (0 -100) to the translated output. We comprehensively evaluate large language models (LLMs) in zero/few-shot scenarios and perform instruction fine-tuning using a novel prompt based on annotation guidelines. Our results indicate that prompt-based approaches are outperformed by the encoder-based fine-tuned QE models. Our error analysis reveals tokenization issues, along with errors due to transliteration and named entities, and argues for refinement in LLM pre-training for cross-lingual tasks. We release the data, and models trained publicly for further research.</abstract>
diff --git a/data/xml/2025.loresmt.xml b/data/xml/2025.loresmt.xml
index 5bd3174153..a3cdebd2f4 100644
--- a/data/xml/2025.loresmt.xml
+++ b/data/xml/2025.loresmt.xml
@@ -3,11 +3,11 @@
   <volume id="1" ingest-date="2025-04-26" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Eighth Workshop on Technologies for Machine Translation of Low-Resource Languages (LoResMT 2025)</booktitle>
-      <editor><first>Atul Kr.</first><last>Ojha</last></editor>
+      <editor id="atul-kr-ojha"><first>Atul Kr.</first><last>Ojha</last></editor>
       <editor><first>Chao-hong</first><last>Liu</last></editor>
       <editor><first>Ekaterina</first><last>Vylomova</last></editor>
       <editor><first>Flammie</first><last>Pirinen</last></editor>
-      <editor><first>Jonathan</first><last>Washington</last></editor>
+      <editor id="jonathan-washington"><first>Jonathan</first><last>Washington</last></editor>
       <editor><first>Nathaniel</first><last>Oco</last></editor>
       <editor><first>Xiaobing</first><last>Zhao</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -28,7 +28,7 @@
     <paper id="1">
       <title>Comparative Evaluation of Machine Translation Models Using Human-Translated Social Media Posts as References: Human-Translated Datasets</title>
       <author><first>Shareefa Ahmed</first><last>Al Amer</last></author>
-      <author><first>Mark G.</first><last>Lee</last></author>
+      <author id="mark-lee"><first>Mark G.</first><last>Lee</last></author>
       <author><first>Phillip</first><last>Smith</last><affiliation>University of Birmingham</affiliation></author>
       <pages>1-9</pages>
       <abstract>Machine translation (MT) of social media text presents unique challenges due to its informal nature, linguistic variations, and rapid evolution of language trends. In this paper, we propose a human-translated English dataset to Arabic, Italian, and Spanish, and a human-translated Arabic dataset to Modern Standard Arabic (MSA) and English. We also perform a comprehensive analysis of three publicly accessible MT models using human translations as a reference. We investigate the impact of social media informality on translation quality by translating the MSA version of the text and comparing BLEU and METEOR scores with the direct translation of the original social media posts. Our findings reveal that MarianMT provides the closest translations to human for Italian and Spanish among the three models, with METEOR scores of 0.583 and 0.640, respectively, while Google Translate provides the closest translations for Arabic, with a METEOR score of 0.354. By comparing the translation of the original social media posts with the MSA version, we confirm that the informality of social media text significantly impacts translation quality, with an increase of 12 percentage points in METEOR scores over the original posts. Additionally, we investigate inter-model alignment and the degree to which the output of these MT models align.</abstract>
@@ -59,7 +59,7 @@
       <title>From Text to Multi-Modal: Advancing Low-Resource-Language Translation through Synthetic Data Generation and Cross-Modal Alignments</title>
       <author><first>Bushi</first><last>Xiao</last></author>
       <author><first>Qian</first><last>Shen</last></author>
-      <author><first>Daisy Zhe</first><last>Wang</last><affiliation>University of Florida</affiliation></author>
+      <author id="daisy-zhe-wang"><first>Daisy Zhe</first><last>Wang</last><affiliation>University of Florida</affiliation></author>
       <pages>24-35</pages>
       <abstract>In this study, we propose a novel paradigm for multi-modal low resource language dataset generation that eliminates dependency on existing parallel multi-modal datasets. Leveraging advances in large image-generation models, we introduce a systematic pipeline that transforms text-only parallel corpora into rich multi-modal translation datasets. We then validate the generated content through human evaluation. We design and implement a new MMT model framework suitable for our new generated dataset. The model contains a verification mechanism with a large language model to ensure consistency between visual content and textual translations. Experimental results across four African low-resource languages with less than 10k training corpus demonstrate significant improvements over NLLB baselines, with average gains of up to 9.8% in BLEU score and 4.3% in METEOR score. Our method shows particular effectiveness in correctly translating concrete objects and contextual elements, suggesting its potential for improving low-resource machine translation through visual grounding.</abstract>
       <url hash="5922b07e">2025.loresmt-1.4</url>
@@ -104,7 +104,7 @@
       <author><first>Junyoung</first><last>Lee</last><affiliation>Home Team Science and Technology Agency</affiliation></author>
       <author><first>Marco</first><last>Cognetta</last><affiliation>Tokyo Institute of Technology, Tokyo Institute of Technology and Google</affiliation></author>
       <author><first>Sangwhan</first><last>Moon</last><affiliation>Google and Tokyo Institute of Technology</affiliation></author>
-      <author><first>Naoaki</first><last>Okazaki</last><affiliation>Institute of Science Tokyo</affiliation></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last><affiliation>Institute of Science Tokyo</affiliation></author>
       <pages>66-80</pages>
       <abstract>Subword tokenization, where text is represented in an intermediate form between full words and characters, is ubiquitous in modern NLP due to its ability to represent any input sentence with a small vocabulary. However for Korean, where there are 11,172 base characters (*syllables*) in its alphabet, it is difficult to have a vocabulary large enough to succinctly encode text while fitting within parameter-budget constraints. This motivates us to explore an alternative representation for Korean which relies on the decompositional nature of Korean syllables: a syllable can be uniquely decomposed into a sequence of two or three subcharacters (*jamo*), of which there are only 68.Using jamo as the basis for subword tokenization (e.g., byte-pair encoding) leads to shorter tokenized sequences with fewer vocabulary parameters, exposes the model to sub-syllable-level morphological information, and increases the amount of augmentation gained from subword regularization. We evaluate jamo-level subword tokenization on several Korean translation tasks and find that jamo-level subword models consistently outperform syllable- and byte-level models in low-resource and restricted-vocabulary settings.</abstract>
       <url hash="d7613539">2025.loresmt-1.8</url>
@@ -130,8 +130,8 @@
       <author><first>Ryan Andrew</first><last>Chi</last></author>
       <author><first>Lucas</first><last>Huang</last></author>
       <author><first>Ethan A</first><last>Chi</last><affiliation>Hudson River Trading</affiliation></author>
-      <author><first>R. Thomas</first><last>McCoy</last><affiliation>Yale University</affiliation></author>
-      <author><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
+      <author id="r-thomas-mccoy"><first>R. Thomas</first><last>McCoy</last><affiliation>Yale University</affiliation></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
       <pages>105-114</pages>
       <abstract>We introduce ModeLing, a novel benchmark of Linguistics Olympiad-style puzzles which tests few-shot reasoning in AI systems. Solving these puzzles necessitates inferring aspects of a language’s grammatical structure from a small number of examples. Such puzzles provide a natural testbed for language models, as they require compositional generalization and few-shot inductive reasoning. Consisting solely of new puzzles written specifically for this work, ModeLing has no risk of appearing in the training data of existing AI systems: this ameliorates the risk of data leakage, a potential confounder for many prior evaluations of reasoning. Evaluating several large open source language models and GPT on our benchmark, we observe non-negligible accuracy, demonstrating few-shot emergent reasoning ability which cannot merely be attributed to shallow memorization. However, imperfect model performance suggests that ModeLing can be used to measure further progress in linguistic reasoning.</abstract>
       <url hash="0c1c115b">2025.loresmt-1.10</url>
@@ -165,7 +165,7 @@
       <title>Low-Resource Transliteration for <fixed-case>R</fixed-case>oman-<fixed-case>U</fixed-case>rdu and <fixed-case>U</fixed-case>rdu Using Transformer-Based Models</title>
       <author><first>Umer</first><last>Butt</last><affiliation>German Research Center for AI and Universität des Saarlandes</affiliation></author>
       <author><first>Stalin</first><last>Varanasi</last></author>
-      <author><first>Günter</first><last>Neumann</last><affiliation>German Research Center for AI</affiliation></author>
+      <author id="gunter-neumann"><first>Günter</first><last>Neumann</last><affiliation>German Research Center for AI</affiliation></author>
       <pages>144-153</pages>
       <abstract>As the Information Retrieval (IR) field increasingly recognizes the importance of inclusivity, addressing the needs of low-resource languages remains a significant challenge. Transliteration between Urdu and its Romanized form, Roman Urdu, remains underexplored despite the widespread use of both scripts in South Asia. Prior work using RNNs on the Roman-Urdu-Parl dataset showed promising results but suffered from poor domain adaptability and limited evaluation. We propose a transformer-based approach using the m2m100 multilingual translation model, enhanced with masked language modeling (MLM) pretraining and fine-tuning on both Roman-Urdu-Parl and the domain diverse Dakshina dataset. To address previous evaluation flaws, we introduce rigorous dataset splits and assess performance using BLEU, character-level BLEU, and CHRF. Our model achieves strong transliteration performance, with Char-BLEU scores of 96.37 for Urdu→Roman-Urdu and 97.44 for Roman-Urdu→Urdu. These results outperform both RNN baselines and GPT-4o Mini and demonstrate the effectiveness of multilingual transfer learning for low-resource transliteration tasks.</abstract>
       <url hash="b940b564">2025.loresmt-1.13</url>
@@ -179,7 +179,7 @@
       <author><first>Alex</first><last>Miłowski</last></author>
       <author><first>Thom</first><last>Vaughan</last></author>
       <author><first>Sara</first><last>Hincapie-Monsalve</last></author>
-      <author><first>Pedro</first><last>Ortiz Suarez</last></author>
+      <author id="pedro-ortiz-suarez"><first>Pedro</first><last>Ortiz Suarez</last></author>
       <author><first>Kurt</first><last>Bollacker</last></author>
       <pages>154-160</pages>
       <abstract>The MLCommons Datasets Working Group presents a comprehensive initiative to advance the development and accessibility of artificial intelligence (AI) training and testing resources. This paper introduces three key projects aimed at addressing critical gaps in the AI data ecosystem: the Unsupervised People’s Speech Dataset, containing over 821,000 hours of speech across 89+ languages; a strategic collaboration with Common Crawl to enhance web crawling capabilities for low-resource languages; and a framework for knowledge graph extraction evaluation. By focusing on languages other than English (LOTE) and creating permissively licensed, high-quality datasets, these initiatives aim to democratize AI development and improve model performance across diverse linguistic contexts. This work represents a significant step toward more inclusive and capable AI systems that can serve global communities.</abstract>
@@ -206,7 +206,7 @@
       <author><first>Nadia</first><last>Hajipour</last><affiliation>Institute for Humanities and Cultural Studies</affiliation></author>
       <author><first>Sadra</first><last>Sabouri</last></author>
       <author><first>Ehsaneddin</first><last>Asgari</last><affiliation>Qatar Computing Research Institute and University of California, Berkeley</affiliation></author>
-      <author><first>Hossein</first><last>Sameti</last><affiliation>Sharif University of Technology</affiliation></author>
+      <author id="hossein-sameti"><first>Hossein</first><last>Sameti</last><affiliation>Sharif University of Technology</affiliation></author>
       <pages>171-182</pages>
       <abstract>The Pahlavi language, aka Middle Persian, is a critical part of Persian cultural and historical heritage which bridges the Old Persian and Modern Persian (Farsi). However, due to its limited digital presence and the scarcity of comprehensive linguistic resources, Pahlavi is at risk of extinction. As an early attempt to preserve this language, this study introduces a framework to translate English text into Pahlavi. Our approach combines grammar-guided term extraction with zero-shot translation, leveraging large language models (LLMs) to generate syntactically and semantically accurate Pahlavi sentences.This framework aims to preserve the Pahlavi language and serves as a model for reviving other endangered languages with similar characteristics. Finally using our framework, we generate a novel dataset of 360 expert-validated parallel English-Pahlavi texts.</abstract>
       <url hash="4edf13c2">2025.loresmt-1.16</url>
diff --git a/data/xml/2025.magmar.xml b/data/xml/2025.magmar.xml
index 8b4a897e0a..8b67945238 100644
--- a/data/xml/2025.magmar.xml
+++ b/data/xml/2025.magmar.xml
@@ -40,7 +40,7 @@
       <author><first>Niloufar</first><last>Baba Ahmadi</last><affiliation>University of Hamburg, Germany</affiliation></author>
       <author><first>Iris</first><last>Vogel</last><affiliation>University of Hamburg, Germany</affiliation></author>
       <author><first>Martin</first><last>Semmann</last><affiliation>University of Hamburg, Germany</affiliation></author>
-      <author><first>Chris</first><last>Biemann</last><affiliation>University of Hamburg, Germany</affiliation></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last><affiliation>University of Hamburg, Germany</affiliation></author>
       <pages>18-39</pages>
       <abstract>In this paper, we introduce CollEx, an innovative multimodal agentic Retrieval-Augmented Generation (RAG) system designed to enhance interactive exploration of extensive scientific collections. Given the overwhelming volume and inherent complexity of scientific collections, conventional search systems often lack necessary intuitiveness and interactivity, presenting substantial barriers for learners, educators, and researchers. CollEx addresses these limitations by employing state-of-the-art Large Vision-Language Models (LVLMs) as multimodal agents accessible through an intuitive chat interface. By abstracting complex interactions via specialized agents equipped with advanced tools, CollEx facilitates curiosity-driven exploration, significantly simplifying access to diverse scientific collections and records therein. Our system integrates textual and visual modalities, supporting educational scenarios that are helpful for teachers, pupils, students, and researchers by fostering independent exploration as well as scientific excitement and curiosity. Furthermore, CollEx serves the research community by discovering interdisciplinary connections and complementing visual data. We illustrate the effectiveness of our system through a proof-of-concept application containing over 64,000 unique records across 32 collections from a local scientific collection from a public university.</abstract>
       <url hash="1b493ebe">2025.magmar-1.2</url>
@@ -50,7 +50,7 @@
     <paper id="3">
       <title><fixed-case>V</fixed-case>ox<fixed-case>RAG</fixed-case>: A Step Toward Transcription-Free <fixed-case>RAG</fixed-case> Systems in Spoken Question Answering</title>
       <author><first>Zackary</first><last>Rackauckas</last><affiliation>Columbia University</affiliation></author>
-      <author><first>Julia</first><last>Hirschberg</last><affiliation>Columbia University</affiliation></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last><affiliation>Columbia University</affiliation></author>
       <pages>40-46</pages>
       <abstract>We introduce VoxRAG, a modular speech-to-speech retrieval-augmented generation system that bypasses transcription to retrieve semantically relevant audio segments directly from spoken queries. VoxRAG employs silence-aware segmentation, speaker diarization, CLAP audio embeddings, and FAISS retrieval using L2-normalized cosine similarity. We construct a 50-query test set recorded as spoken input by a native English speaker. Retrieval quality was evaluated using LLM-as-a-judge annotations. For very relevant segments, cosine similarity achieved a Recall@10 of 0.34. For somewhat relevant segments, Recall@10 rose to 0.60 and nDCG@10 to 0.27, highlighting strong topical alignment. Answer quality was judged on a 0–2 scale across relevance, accuracy, completeness, and precision, with mean scores of 0.84, 0.58, 0.56, and 0.46 respectively. While precision and retrieval quality remain key limitations, VoxRAG shows that transcription-free speech-to-speech retrieval is feasible in RAG systems.</abstract>
       <url hash="e975d524">2025.magmar-1.3</url>
diff --git a/data/xml/2025.mcg.xml b/data/xml/2025.mcg.xml
index 4833ba76f4..f6a344cb81 100644
--- a/data/xml/2025.mcg.xml
+++ b/data/xml/2025.mcg.xml
@@ -7,10 +7,10 @@
       <editor><first>María Estrella</first><last>Vallecillo-Rodríguez</last></editor>
       <editor><first>Irune</first><last>Zubiaga</last></editor>
       <editor><first>Arturo</first><last>Montejo-Ráez</last></editor>
-      <editor><first>Aitor</first><last>Soroa</last></editor>
-      <editor><first>María Teresa</first><last>Martín-Valdivia</last></editor>
+      <editor id="aitor-soroa"><first>Aitor</first><last>Soroa</last></editor>
+      <editor id="m-teresa-martin-valdivia"><first>María Teresa</first><last>Martín-Valdivia</last></editor>
       <editor><first>Marco</first><last>Guerini</last></editor>
-      <editor><first>Rodrigo</first><last>Agerri</last></editor>
+      <editor id="rodrigo-agerri"><first>Rodrigo</first><last>Agerri</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Abu Dhabi, UAE</address>
       <month>January</month>
diff --git a/data/xml/2025.mtsummit.xml b/data/xml/2025.mtsummit.xml
index de848e3637..f52e4290b1 100644
--- a/data/xml/2025.mtsummit.xml
+++ b/data/xml/2025.mtsummit.xml
@@ -3,11 +3,11 @@
   <volume id="1" ingest-date="2025-08-07" type="proceedings">
     <meta>
       <booktitle>Proceedings of Machine Translation Summit XX: Volume 1</booktitle>
-      <editor><first>Pierrette</first><last>Bouillon</last></editor>
+      <editor id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></editor>
       <editor><first>Johanna</first><last>Gerlach</last></editor>
       <editor><first>Sabrina</first><last>Girletti</last></editor>
       <editor><first>Lise</first><last>Volkart</last></editor>
-      <editor><first>Raphael</first><last>Rubino</last></editor>
+      <editor id="raphael-rubino"><first>Raphael</first><last>Rubino</last></editor>
       <editor><first>Rico</first><last>Sennrich</last></editor>
       <editor><first>Ana C.</first><last>Farinha</last></editor>
       <editor><first>Marco</first><last>Gaido</last></editor>
@@ -82,8 +82,8 @@
     <paper id="6">
       <title>Optimizing the Training Schedule of Multilingual <fixed-case>NMT</fixed-case> using Reinforcement Learning</title>
       <author><first>Alexis</first><last>Allemann</last></author>
-      <author><first>Àlex R.</first><last>Atrio</last></author>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="alex-r-atrio"><first>Àlex R.</first><last>Atrio</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <pages>65–80</pages>
       <abstract>Multilingual NMT is a viable solution for translating low-resource languages (LRLs) when data from high-resource languages (HRLs) from the same language family is available. However, the training schedule, i.e. the order of presentation of languages, has an impact on the quality of such systems. Here, in a many-to-one translation setting, we propose to apply two algorithms that use reinforcement learning to optimize the training schedule of NMT: (1) Teacher-Student Curriculum Learning and (2) Deep Q Network. The former uses an exponentially smoothed estimate of the returns of each action based on the loss on monolingual or multilingual development subsets, while the latter estimates rewards using an additional neural network trained from the history of actions selected in different states of the system, together with the rewards received. On a 8-to-1 translation dataset with LRLs and HRLs, our second method improves BLEU and COMET scores with respect to both random selection of monolingual batches and shuffled multilingual batches, by adjusting the number of presentations of LRL vs. HRL batches.</abstract>
       <url hash="085d0d32">2025.mtsummit-1.6</url>
@@ -175,7 +175,7 @@
       <title>Testing <fixed-case>LLM</fixed-case>s’ Capabilities in Annotating Translations Based on an Error Typology Designed for <fixed-case>LSP</fixed-case> Translation: First Experiments with <fixed-case>C</fixed-case>hat<fixed-case>GPT</fixed-case></title>
       <author><first>Joachim</first><last>Minder</last></author>
       <author><first>Guillaume</first><last>Wisniewski</last></author>
-      <author><first>Natalie</first><last>Kübler</last></author>
+      <author id="natalie-kubler"><first>Natalie</first><last>Kübler</last></author>
       <pages>190–203</pages>
       <abstract>This study investigates the capabilities of large language models (LLMs), specifically ChatGPT, in annotating MT outputs based on an error typology. In contrast to previous work focusing mainly on general language, we explore ChatGPT’s ability to identify and categorise errors in specialised translations. By testing two different prompts and based on a customised error typology, we compare ChatGPT annotations with human expert evaluations of translations produced by DeepL and ChatGPT itself. The results show that, for translations generated by DeepL, recall and precision are quite high. However, the degree of accuracy in error categorisation depends on the prompt’s specific features and its level of detail, ChatGPT performing very well with a detailed prompt. When evaluating its own translations, ChatGPT achieves significantly poorer results, revealing limitations with self-assessment. These results highlight both the potential and the limitations of LLMs for translation evaluation, particularly in specialised domains. Our experiments pave the way for future research on open-source LLMs, which could produce annotations of comparable or even higher quality. In the future, we also aim to test the practical effectiveness of this automated evaluation in the context of translation training, particularly by optimising the process of human evaluation by teachers and by exploring the impact of annotations by LLMs on students’ post-editing and translation learning.</abstract>
       <url hash="16588239">2025.mtsummit-1.15</url>
@@ -221,8 +221,8 @@
     </paper>
     <paper id="20">
       <title>Intrinsic vs. Extrinsic Evaluation of <fixed-case>C</fixed-case>zech Sentence Embeddings: Semantic Relevance Doesn’t Help with <fixed-case>MT</fixed-case> Evaluation</title>
-      <author><first>Petra</first><last>Barančíková</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="petra-barancikova"><first>Petra</first><last>Barančíková</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>265–275</pages>
       <abstract>In this paper, we compare Czech-specific and multilingual sentence embedding models through intrinsic and extrinsic evaluation paradigms. For intrinsic evaluation, we employ Costra, a complex sentence transformation dataset, and several Semantic Textual Similarity (STS) benchmarks to assess the ability of the embeddings to capture linguistic phenomena such as semantic similarity, temporal aspects, and stylistic variations. In the extrinsic evaluation, we fine-tune each embedding model using COMET-based metrics for machine translation evaluation. Our experiments reveal an interesting disconnect: models that excel in intrinsic semantic similarity tests do not consistently yield superior performance on downstream translation evaluation tasks. Conversely, models with seemingly over-smoothed embedding spaces can, through fine-tuning, achieve excellent results. These findings highlight the complex relationship between semantic property probes and downstream task, emphasizing the need for more research into “operationalizable semantics” in sentence embeddings, or more in-depth downstream tasks datasets (here translation evaluation).</abstract>
       <url hash="a03907ed">2025.mtsummit-1.20</url>
@@ -292,7 +292,7 @@
       <author><first>Yash</first><last>Bhaskar</last></author>
       <author><first>Ketaki</first><last>Shetye</last></author>
       <author><first>Vandan</first><last>Mujadia</last></author>
-      <author><first>Dipti Misra</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></author>
       <author><first>Parameswari</first><last>Krishnamurthy</last></author>
       <pages>344–352</pages>
       <abstract>This study addresses the critical challenge of data scarcity in machine translation for Indian languages, particularly given their morphological complexity and limited parallel data. We investigate an effective strategy to maximize the utility of existing data by generating negative samples from positive training instances using a progressive perturbation approach. This is used for aligning the model with preferential data using Kahneman-Tversky Optimization (KTO). Comparing it against traditional Supervised Fine-Tuning (SFT), we demonstrate how generating negative samples and leveraging KTO enhances data efficiency. By creating rejected samples through progressively perturbed translations from the available dataset, we fine-tune the Llama 3.1 Instruct 8B model using QLoRA across 16 language directions, including English, Hindi, Bangla, Tamil, Telugu, and Santali. Our results show that KTO-based preference alignment with progressive perturbation consistently outperforms SFT, achieving significant gains in translation quality with an average BLEU increase of 1.84 to 2.47 and CHRF increase of 2.85 to 4.01 compared to SFT for selected languages, while using the same positive training samples and under similar computational constraints. This highlights the potential of our negative sample generation strategy within KTO, especially in low resource scenarios.</abstract>
@@ -302,7 +302,7 @@
     <paper id="27">
       <title>Leveraging Visual Scene Graph to Enhance Translation Quality in Multimodal Machine Translation</title>
       <author><first>Ali</first><last>Hatami</last></author>
-      <author><first>Mihael</first><last>Arcan</last></author>
+      <author id="mihael-arcan"><first>Mihael</first><last>Arcan</last></author>
       <author><first>Paul</first><last>Buitelaar</last></author>
       <pages>353–364</pages>
       <abstract>Despite significant advancements in Multimodal Machine Translation, understanding and effectively utilising visual scenes within multimodal models remains a complex challenge. Extracting comprehensive and relevant visual features requires extensive and detailed input data to ensure the model accurately captures objects, their attributes, and relationships within a scene. In this paper, we explore using visual scene graphs extracted from images to enhance the performance of translation models. We investigate this approach for integrating Visual Scene Graph information into translation models, focusing on representing this information in a semantic structure rather than relying on raw image data. The performance of our approach was evaluated on the Multi30K dataset for English into German, French, and Czech translations using BLEU, chrF2, TER and COMET metrics. Our results demonstrate that utilising visual scene graph information improves translation performance. Using information on semantic structure can improve the multimodal baseline model, leading to better contextual understanding and translation accuracy.</abstract>
@@ -331,7 +331,7 @@
       <title>Quality Estimation and Post-Editing Using <fixed-case>LLM</fixed-case>s For <fixed-case>I</fixed-case>ndic Languages: How Good Is It?</title>
       <author><first>Anushka</first><last>Singh</last></author>
       <author><first>Aarya</first><last>Pakhale</last></author>
-      <author><first>Mitesh M.</first><last>Khapra</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh M.</first><last>Khapra</last></author>
       <author><first>Raj</first><last>Dabre</last></author>
       <pages>388–398</pages>
       <abstract>Recently, there have been increasing efforts on Quality Estimation (QE) and Post-Editing (PE) using Large Language Models (LLMs) for Machine Translation (MT). However, the focus has mainly been on high resource languages and the approaches either rely on prompting or combining existing QE models with LLMs, instead of single end-to-end systems. In this paper, we investigate the efficacy of end-to-end QE and PE systems for low-resource languages taking 5 Indian languages as a use-case. We augment existing QE data containing multidimentional quality metric (MQM) error annotations with explanations of errors and PEs with the help of proprietary LLMs (GPT-4), following which we fine-tune Gemma-2-9B, an open-source multilingual LLM to perform QE and PE jointly. While our models attain QE capabilities competitive with or surpassing existing models in both referenceful and referenceless settings, we observe that they still struggle with PE. Further investigation reveals that this occurs because our models lack the ability to accurately identify fine-grained errors in the translation, despite being excellent indicators of overall quality. This opens up opportunities for research in end-to-end QE and PE for low-resource languages.</abstract>
@@ -476,7 +476,7 @@
     </paper>
     <paper id="45">
       <title>Improving <fixed-case>MT</fixed-case>-enabled Triage Performance with Multiple <fixed-case>MT</fixed-case> Outputs</title>
-      <author><first>Marianna J.</first><last>Martindale</last></author>
+      <author id="marianna-martindale"><first>Marianna J.</first><last>Martindale</last></author>
       <author><first>Marine</first><last>Carpuat</last></author>
       <pages>592–607</pages>
       <abstract>Recent advances in Machine Translation (MT) quality may motivate adoption in a variety of use cases, but the success of MT deployment depends not only on intrinsic model quality but on how well the model, as deployed, helps users meet the objectives of their use case. This work focuses on a specific triage use case, MT-enabled scanning in intelligence analysis. After describing the use case with its objectives and failure modes, we present a user study to establish a baseline performance level and measure the mitigating effects of a simple intervention, providing additional MT outputs. We find significant improvements in relevance judgment accuracy with outputs from two distinct neural MT models and significant improvements in relevant entity identification with the addition of a rule-based MT. Users also like seeing multiple MT outputs, making it an appealing way to improve MT-enabled scanning performance.</abstract>
@@ -507,15 +507,15 @@
   <volume id="2" ingest-date="2025-08-07" type="proceedings">
     <meta>
       <booktitle>Proceedings of Machine Translation Summit XX: Volume 2</booktitle>
-      <editor><first>Pierrette</first><last>Bouillon</last></editor>
+      <editor id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></editor>
       <editor><first>Johanna</first><last>Gerlach</last></editor>
       <editor><first>Sabrina</first><last>Girletti</last></editor>
       <editor><first>Lise</first><last>Volkart</last></editor>
-      <editor><first>Raphael</first><last>Rubino</last></editor>
+      <editor id="raphael-rubino"><first>Raphael</first><last>Rubino</last></editor>
       <editor><first>Rico</first><last>Sennrich</last></editor>
       <editor><first>Samuel</first><last>Läubli</last></editor>
       <editor><first>Martin</first><last>Volk</last></editor>
-      <editor><first>Miquel</first><last>Esplà-Gomis</last></editor>
+      <editor id="miquel-espla-gomis"><first>Miquel</first><last>Esplà-Gomis</last></editor>
       <editor><first>Vincent</first><last>Vandeghinste</last></editor>
       <editor><first>Helena</first><last>Moniz</last></editor>
       <editor><first>Sara</first><last>Szoc</last></editor>
@@ -553,7 +553,7 @@
     </paper>
     <paper id="3">
       <title>Speech-to-Speech Translation Pipelines for Conversations in Low-Resource Languages</title>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <author><first>Alexis</first><last>Allemann</last></author>
       <author><first>Teo</first><last>Ferrari</last></author>
       <author><first>Gopal</first><last>Krishnamani</last></author>
@@ -623,7 +623,7 @@
       <title><fixed-case>Z</fixed-case>u<fixed-case>B</fixed-case>idasoa: Participatory Research for the Development of Linguistic Technologies Adapted to the Needs of Migrants in the <fixed-case>B</fixed-case>asque Country</title>
       <author><first>Xabier</first><last>Soto</last></author>
       <author><first>Ander</first><last>Egurtzegi</last></author>
-      <author><first>Maite</first><last>Oronoz</last></author>
+      <author id="maite-oronoz"><first>Maite</first><last>Oronoz</last></author>
       <author><first>Urtzi</first><last>Etxeberria</last></author>
       <pages>75–76</pages>
       <abstract>Recent years have witnessed the development of advanced language technologies, including the use of audio and images as part of multimodal systems. However, these models are not adapted to the specific needs of migrants and Non-Governmental Organizations (NGOs) communicating in multilingual scenarios. In this project, we focus on the situation of migrants arriving in the Basque Country, nearby the western border between Spain and France. For identifying migrants’ needs, we have met with several organisations helping them in different stages, including: sea rescue; primary care in refugee camps and in situ; assistance with asylum demands; other administrative issues; and human rights defence in retention centres. In these interviews, Darija has been identified as the most spoken language among the under-served ones. Considering this, we have started the development of a Machine Translation (MT) system between Basque and Darija (Moroccan Arabic), based on open-source corpora. In this paper, we present the description of the project and the main results of the participatory research developed in the initial stage.</abstract>
@@ -687,7 +687,7 @@
       <author><first>Juan Antonio</first><last>Pérez-Ortiz</last></author>
       <author><first>Felipe</first><last>Sánchez-Martínez</last></author>
       <author><first>Miquel</first><last>Esplà-Gomis</last></author>
-      <author><first>Víctor M.</first><last>Sánchez-Cartagena</last></author>
+      <author id="victor-m-sanchez-cartagena"><first>Víctor M.</first><last>Sánchez-Cartagena</last></author>
       <pages>89–90</pages>
       <abstract>A significant percentage of the population of Guatemala and Mexico belongs to various Mayan indigenous communities, for whom language barriers lead to social, economic, and digital exclusion. The Mayan languages spoken by these communities remain severely underrepresented in terms of digital resources, which prevents them from leveraging the latest advances in artificial intelligence. This project addresses that problem by means of: 1) the digitisation and release of multiple printed linguistic resources; 2) the development of a high-quality parallel machine translation (MT) evaluation corpus for six Mayan languages. In doing so, we are paving the way for the development of MT systems that will facilitate the access for Mayan speakers to essential services such as healthcare or legal aid. The resources are produced with the essential participation of indigenous communities, whereby native speakers provide the necessary translation services, QA, and linguistic expertise. The project is funded by the Google Academic Research Awards and carried out in collaboration with the Proyecto Lingüístico Francisco Marroquín Foundation in Guatemala.</abstract>
       <url hash="86879e3c">2025.mtsummit-2.15</url>
@@ -696,7 +696,7 @@
     <paper id="16">
       <title><fixed-case>P</fixed-case>ro<fixed-case>M</fixed-case>ut: The Evolution of <fixed-case>NMT</fixed-case> Didactic Tools</title>
       <author><first>Pilar</first><last>Sánchez-Gijón</last></author>
-      <author><first>Gema</first><last>Ramírez-Sánchez</last></author>
+      <author id="gema-ramirez-sanchez"><first>Gema</first><last>Ramírez-Sánchez</last></author>
       <pages>91–92</pages>
       <abstract>Neural Machine Translation intensifies educational challenges in translation technologies. The MultiTraiNMT project developed MutNMT, an open-source, didactic platform for training and evaluating NMT systems. Building upon it, LT-LiDER introduces ProMut which implements three main novel features: migration of the core NMT framework from JoeyNMT to MarianNMT, close integration with OPUS datasets, engines and connectors and the addition of a researcher profile for larger datasets and extended training processes and evaluation.</abstract>
       <url hash="20897031">2025.mtsummit-2.16</url>
@@ -717,7 +717,7 @@
       <title><fixed-case>D</fixed-case>e<fixed-case>MINT</fixed-case>: Automated Language Debriefing for <fixed-case>E</fixed-case>nglish Learners via <fixed-case>AI</fixed-case> Chatbot Analysis of Meeting Transcripts</title>
       <author><first>Miquel</first><last>Esplà-Gomis</last></author>
       <author><first>Felipe</first><last>Sánchez-Martínez</last></author>
-      <author><first>Víctor M.</first><last>Sánchez-Cartagena</last></author>
+      <author id="victor-m-sanchez-cartagena"><first>Víctor M.</first><last>Sánchez-Cartagena</last></author>
       <author><first>Juan Antonio</first><last>Pérez-Ortiz</last></author>
       <pages>95–96</pages>
       <abstract>The objective of the DeMINT project is to develop a conversational tutoring system aimed at enhancing non-native English speakers’ language skills through post-meeting analysis of the transcriptions of video conferences in which they have participated. This paper describes the model developed and the results obtained through a human evaluation conducted with learners of English as a second language.</abstract>
@@ -747,17 +747,17 @@
     </paper>
     <paper id="21">
       <title><fixed-case>HPLT</fixed-case>’s Second Data Release</title>
-      <author><first>Nikolay</first><last>Arefyev</last></author>
+      <author id="nikolay-arefyev"><first>Nikolay</first><last>Arefyev</last></author>
       <author><first>Mikko</first><last>Aulamo</last></author>
       <author><first>Marta</first><last>Bañón</last></author>
       <author><first>Laurie</first><last>Burchell</last></author>
       <author><first>Pinzhen</first><last>Chen</last></author>
       <author><first>Mariia</first><last>Fedorova</last></author>
-      <author><first>Ona</first><last>de Gibert</last></author>
+      <author id="ona-de-gibert"><first>Ona</first><last>de Gibert</last></author>
       <author><first>Liane</first><last>Guillou</last></author>
       <author><first>Barry</first><last>Haddow</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
-      <author><first>Jindřich</first><last>Helcl</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
+      <author id="jindrich-helcl"><first>Jindřich</first><last>Helcl</last></author>
       <author><first>Erik</first><last>Henriksson</last></author>
       <author><first>Andrey</first><last>Kutuzov</last></author>
       <author><first>Veronika</first><last>Laippala</last></author>
@@ -768,11 +768,11 @@
       <author><first>Dayyán</first><last>O’Brien</last></author>
       <author><first>Stephan</first><last>Oepen</last></author>
       <author><first>Sampo</first><last>Pyysalo</last></author>
-      <author><first>Gema</first><last>Ramírez-Sánchez</last></author>
+      <author id="gema-ramirez-sanchez"><first>Gema</first><last>Ramírez-Sánchez</last></author>
       <author><first>David</first><last>Samuel</last></author>
       <author><first>Pavel</first><last>Stepachev</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
-      <author><first>Dušan</first><last>Variš</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="dusan-varis"><first>Dušan</first><last>Variš</last></author>
       <author><first>Jaume</first><last>Zaragoza-Bernabeu</last></author>
       <pages>101–102</pages>
       <abstract>We describe the progress of the High Performance Language Technologies (HPLT) project, a 3-year EU-funded project that started in September 2022. We focus on the up-to-date results on the release of free text datasets derived from web crawls, one of the central objectives of the project. The second release used a revised processing pipeline, and an enlarged set of input crawls. From 4.5 petabytes of web crawls we extracted 7.6T tokens of monolingual text in 193 languages, plus 380 million parallel sentences in 51 language pairs. We also release MultiHPLT, a cross-combination of the parallel data, which produces 1,275 pairs, as well as releasing the containing documents for all parallel sentences in order to enable research in document-level MT. We report changes in the pipeline, analysis and evaluation results for the second parallel data release based on machine translation systems. All datasets are released under a permissive CC0 licence.</abstract>
@@ -788,13 +788,13 @@
       <author><first>Nicolas</first><last>Dahan</last></author>
       <author><first>Manon</first><last>Delorme</last></author>
       <author><first>Mathilde</first><last>Huguin</last></author>
-      <author><first>Natalie</first><last>Kübler</last></author>
+      <author id="natalie-kubler"><first>Natalie</first><last>Kübler</last></author>
       <author><first>Paul</first><last>Lerner</last></author>
       <author><first>Alexandra</first><last>Mestivier</last></author>
       <author><first>Joachim</first><last>Minder</last></author>
       <author><first>Jean-François</first><last>Nominé</last></author>
       <author><first>Ziqian</first><last>Peng</last></author>
-      <author><first>Laurent</first><last>Romary</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
       <author><first>Panagiotis</first><last>Tsolakis</last></author>
       <author><first>Lichao</first><last>Zhu</last></author>
       <author><first>François</first><last>Yvon</last></author>
@@ -807,7 +807,7 @@
       <title>Prompt-based Explainable Quality Estimation for <fixed-case>E</fixed-case>nglish-<fixed-case>M</fixed-case>alayalam</title>
       <author><first>Archchana</first><last>Sindhujan</last></author>
       <author><first>Diptesh</first><last>Kanojia</last></author>
-      <author><first>Constantin</first><last>Orăsan</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orăsan</last></author>
       <pages>105–106</pages>
       <abstract>The aim of this project was to curate data for the English-Malayalam language pair for the tasks of Quality Estimation (QE) and Automatic Post-Editing (APE) of Machine Translation. Whilst the primary aim of the project was to create a dataset for a low-resource language pair, we plan to use this dataset to investigate different zero-shot and few-shot prompting strategies including chain-of-thought, towards a unified explainable QE-APE framework.</abstract>
       <url hash="6929c193">2025.mtsummit-2.23</url>
diff --git a/data/xml/2025.mwe.xml b/data/xml/2025.mwe.xml
index 7b5c60b046..21330fc81e 100644
--- a/data/xml/2025.mwe.xml
+++ b/data/xml/2025.mwe.xml
@@ -3,10 +3,10 @@
   <volume id="1" ingest-date="2025-04-26" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 21st Workshop on Multiword Expressions (MWE 2025)</booktitle>
-      <editor><first>Atul Kr.</first><last>Ojha</last></editor>
-      <editor><first>Voula</first><last>Giouli</last></editor>
-      <editor><first>Verginica Barbu</first><last>Mititelu</last></editor>
-      <editor><first>Mathieu</first><last>Constant</last></editor>
+      <editor id="atul-kr-ojha"><first>Atul Kr.</first><last>Ojha</last></editor>
+      <editor id="voula-giouli"><first>Voula</first><last>Giouli</last></editor>
+      <editor id="verginica-barbu-mititelu"><first>Verginica Barbu</first><last>Mititelu</last></editor>
+      <editor id="matthieu-constant"><first>Mathieu</first><last>Constant</last></editor>
       <editor><first>Gražina</first><last>Korvel</last></editor>
       <editor><first>A. Seza</first><last>Doğruöz</last></editor>
       <editor><first>Alexandre</first><last>Rademaker</last></editor>
@@ -49,7 +49,7 @@
     </paper>
     <paper id="3">
       <title><fixed-case>VMWE</fixed-case> identification with models trained on <fixed-case>GUD</fixed-case> (a <fixed-case>UD</fixed-case>v.2 treebank of Standard <fixed-case>M</fixed-case>odern <fixed-case>G</fixed-case>reek)</title>
-      <author><first>Stella</first><last>Markantonatou</last></author>
+      <author id="stella-markantonatou"><first>Stella</first><last>Markantonatou</last></author>
       <author><first>Vivian</first><last>Stamou</last><affiliation>ILSP - “Athena” Research Center</affiliation></author>
       <author><first>Stavros</first><last>Bompolas</last><affiliation>ARCHIMEDES Unit | Athena Research Center</affiliation></author>
       <author><first>Katerina</first><last>Anastasopoulou</last><affiliation>Hellenic American University and University of Athens</affiliation></author>
@@ -93,7 +93,7 @@
       <author><first>Chaya</first><last>Liebeskind</last></author>
       <author><first>Irina</first><last>Lobzhanidze</last><affiliation>Ilia Chavchavadze State University</affiliation></author>
       <author><first>Rusudan</first><last>Makhachashvili</last><affiliation>Borys Grinchenko Kyiv Metropolitan University and Borys Grinchenko Kyiv Metropolitan University</affiliation></author>
-      <author><first>Stella</first><last>Markantonatou</last></author>
+      <author id="stella-markantonatou"><first>Stella</first><last>Markantonatou</last></author>
       <author><first>Aleksandra</first><last>Markovic</last><affiliation>Institute for the Serbian Language of SASA</affiliation></author>
       <author><first>Ivelina</first><last>Stoyanova</last><affiliation>Deaf Studies Institute and Institute for Bulgarian Language, Bulgarian Academy of Sciences</affiliation></author>
       <pages>41-57</pages>
@@ -106,7 +106,7 @@
       <title>A <fixed-case>E</fixed-case>uropean <fixed-case>P</fixed-case>ortuguese corpus annotated for verbal idioms</title>
       <author><first>David</first><last>Antunes</last><affiliation>INESC-ID Lisboa</affiliation></author>
       <author><first>Jorge</first><last>Baptista</last><affiliation>INESC ID Lisboa and Universidade do Algarve</affiliation></author>
-      <author><first>Nuno J.</first><last>Mamede</last><affiliation>Instituto Superior Técnico and INESC-ID</affiliation></author>
+      <author id="nuno-mamede"><first>Nuno J.</first><last>Mamede</last><affiliation>Instituto Superior Técnico and INESC-ID</affiliation></author>
       <pages>58-66</pages>
       <abstract>This paper presents the construction of VIDiom-PT, a corpus in European Portuguese annotated for verbal idioms (e.g. O Rui bateu a bota, lit.: Rui hit the boot ‘Rui died’). This linguistic resource aims to support the development of systems capable of processing such constructions in this language variety. To assist in the annotation effort, two tools were built. The first allows for the detection of possible instances of verbal idioms in texts, while the second provides a graphical interface for annotating them. This effort culminated in the annotation of a total of 5,178 instances of 747 different verbal idioms in more than 200,000 sentences in European Portuguese. A highly reliable inter-annotator agreement was achieved, using Krippendorff’s alpha for nominal data (0.869) with 5% of the data independently annotated by 3 experts. Part of the annotated corpus is also made publicly available.</abstract>
       <url hash="dbb4a4c7">2025.mwe-1.7</url>
diff --git a/data/xml/2025.naacl.xml b/data/xml/2025.naacl.xml
index 50de4df39b..8e437a5def 100644
--- a/data/xml/2025.naacl.xml
+++ b/data/xml/2025.naacl.xml
@@ -36,7 +36,7 @@
       <title>Benchmarking Distributional Alignment of Large Language Models</title>
       <author><first>Nicole</first><last>Meister</last></author>
       <author><first>Carlos</first><last>Guestrin</last><affiliation>Stanford University</affiliation></author>
-      <author><first>Tatsunori</first><last>Hashimoto</last><affiliation>Stanford University</affiliation></author>
+      <author id="tatsunori-b-hashimoto"><first>Tatsunori</first><last>Hashimoto</last><affiliation>Stanford University</affiliation></author>
       <pages>24-49</pages>
       <abstract>Language models (LMs) are increasingly used as simulacra for people, yet their ability to match the distribution of views of a specific demographic group and be <i>distributionally aligned</i> remains uncertain. This notion of distributional alignment is complex, as there is significant variation in the types of attributes that are simulated. Prior works have underexplored the role of three critical variables—the question domain, steering method, and distribution expression method—which motivates our contribution of a benchmark explicitly addressing these dimensions. We construct a dataset expanding beyond political values, create human baselines for this task, and evaluate the extent to which an LM can align with a particular group’s opinion distribution to inform design choices of such simulation systems. Our analysis reveals open problems regarding if, and how, LMs can be used to simulate humans, and that LLMs can more accurately describe the opinion distribution than simulate such distributions.</abstract>
       <url hash="f0c95956">2025.naacl-long.2</url>
@@ -108,7 +108,7 @@
       <author><first>Xiangyan</first><last>Liu</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Bo</first><last>Lan</last></author>
       <author><first>Zhiyuan</first><last>Hu</last></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <author><first>Zhicheng</first><last>Zhang</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Fei</first><last>Wang</last><affiliation>Xi’an Jiaotong University</affiliation></author>
       <author><first>Michael Qizhe</first><last>Shieh</last><affiliation>National University of Singapore</affiliation></author>
@@ -214,13 +214,13 @@
     </paper>
     <paper id="15">
       <title><fixed-case>LLM</fixed-case>s Are Biased Towards Output Formats! Systematically Evaluating and Mitigating Output Format Bias of <fixed-case>LLM</fixed-case>s</title>
-      <author><first>Do Xuan</first><last>Long</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="xuan-long-do"><first>Do Xuan</first><last>Long</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Ngoc-Hai</first><last>Nguyen</last></author>
       <author><first>Tiviatis</first><last>Sim</last><affiliation>National University of Singaore, National University of Singapore</affiliation></author>
       <author><first>Hieu</first><last>Dao</last><affiliation>National University of Singapore</affiliation></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>SalesForce.com and Nanyang Technological University</affiliation></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>SalesForce.com and Nanyang Technological University</affiliation></author>
       <author><first>Kenji</first><last>Kawaguchi</last><affiliation>National University of Singapore</affiliation></author>
-      <author><first>Nancy F.</first><last>Chen</last></author>
+      <author id="nancy-chen"><first>Nancy F.</first><last>Chen</last></author>
       <author><first>Min-Yen</first><last>Kan</last><affiliation>National University of Singapore</affiliation></author>
       <pages>299-330</pages>
       <abstract>We present the first systematic evaluation examining format bias in performance of large language models (LLMs). Our approach distinguishes between two categories of an evaluation metric under format constraints to reliably and accurately assess performance: one measures performance when format constraints are adhered to, while the other evaluates performance regardless of constraint adherence. We then define a metric for measuring the format bias of LLMs and establish effective strategies to reduce it. Subsequently, we present our empirical format bias evaluation spanning four commonly used categories—multiple-choice question-answer, wrapping, list, and mapping—covering 15 widely-used formats. Our evaluation on eight generation tasks uncovers significant format bias across state-of-the-art LLMs. We further discover that improving the format-instruction following capabilities of LLMs across formats potentially reduces format bias. Based on our evaluation findings, we study prompting and fine-tuning with synthesized format data techniques to mitigate format bias. Our methods successfully reduce the variance in ChatGPT’s performance among wrapping formats from 235.33 to 0.71 (%^2)</abstract>
@@ -301,7 +301,7 @@
     <paper id="22">
       <title><fixed-case>P</fixed-case>eer<fixed-case>QA</fixed-case>: A Scientific Question Answering Dataset from Peer Reviews</title>
       <author><first>Tim</first><last>Baumgärtner</last><affiliation>TU Darmstadt</affiliation></author>
-      <author><first>Ted</first><last>Briscoe</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <author><first>Iryna</first><last>Gurevych</last><affiliation>Institute for Computer Science, Artificial Intelligence and Technology, Mohamed bin Zayed University of Artificial Intelligence and Technische Universität Darmstadt</affiliation></author>
       <pages>508-544</pages>
       <abstract>We present PeerQA, a real-world, scientific, document-level Question Answering (QA) dataset. PeerQA questions have been sourced from peer reviews, which contain questions that reviewers raised while thoroughly examining the scientific article. Answers have been annotated by the original authors of each paper. The dataset contains 579 QA pairs from 208 academic articles, with a majority from ML and NLP, as well as a subset of other scientific communities like Geoscience and Public Health.PeerQA supports three critical tasks for developing practical QA systems: Evidence retrieval, unanswerable question classification, and answer generation. We provide a detailed analysis of the collected dataset and conduct experiments establishing baseline systems for all three tasks. Our experiments and analyses reveal the need for decontextualization in document-level retrieval, where we find that even simple decontextualization approaches consistently improve retrieval performance across architectures. On answer generation, PeerQA serves as a challenging benchmark for long-context modeling, as the papers have an average size of 12k tokens.</abstract>
@@ -316,7 +316,7 @@
       <author><first>Xiaoming</first><last>Yu</last><affiliation>, Chinese Academy of Sciences</affiliation></author>
       <author><first>Baolong</first><last>Bi</last></author>
       <author><first>Huawei</first><last>Shen</last><affiliation>Institute of Computing Technology, Chinese Academy of Sciences</affiliation></author>
-      <author><first>Xueqi</first><last>Cheng</last><affiliation>Institute of Computing Technology, Chinese Academy</affiliation></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last><affiliation>Institute of Computing Technology, Chinese Academy</affiliation></author>
       <pages>545-561</pages>
       <abstract>Large Language Model (LLM) can enhance its credibility and verifiability by generating text with citations. However, existing research on citation generation is predominantly limited to sentence-level statements, neglecting the significance of positional fine-grained citations that can appear anywhere within sentences. To facilitate further exploration of the positional fine-grained citation generation, we propose ALiiCE, the first automatic evaluation framework for this task. Our method employs a dependency tree based approach to parse the sentence-level claim into atomic claims. Then ALiiCE evaluates citation quality using three metrics, including positional fine-grained citation recall, precision, and coefficient of variation of citation positions. We evaluate the positional fine-grained citation generation performance of several LLMs on long-form QA datasets. Our experiments and analyses demonstrate the effectiveness and reasonableness of ALiiCE. We offer our insights into the current advancements and future directions for the positional fine-grained citation generation task.</abstract>
       <url hash="7694d010">2025.naacl-long.23</url>
@@ -488,8 +488,8 @@
       <author><first>Hila</first><last>Gonen</last><affiliation>University of Washington</affiliation></author>
       <author><first>Terra</first><last>Blevins</last><affiliation>Universität Vienna</affiliation></author>
       <author><first>Alisa</first><last>Liu</last><affiliation>University of Washington</affiliation></author>
-      <author><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington, Facebook and Meta</affiliation></author>
-      <author><first>Noah A.</first><last>Smith</last><affiliation>University of Washington and Allen Institute for Artificial Intelligence</affiliation></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last><affiliation>University of Washington, Facebook and Meta</affiliation></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last><affiliation>University of Washington and Allen Institute for Artificial Intelligence</affiliation></author>
       <pages>785-798</pages>
       <abstract>Despite their wide adoption, the biases and unintended behaviors of language models remain poorly understood. In this paper, we identify and characterize a phenomenon never discussed before, which we call semantic leakage, where models leak irrelevant information from the prompt into the generation in unexpected ways. We propose an evaluation setting to detect semantic leakage both by humans and automatically, curate a diverse test suite for diagnosing this behavior, and measure significant semantic leakage in 13 flagship models. We also show that models exhibit semantic leakage in languages besides English and across different settings and generation scenarios. This discovery highlights yet another type of bias in language models that affects their generation patterns and behavior.</abstract>
       <url hash="3a1ffba3">2025.naacl-long.35</url>
@@ -515,7 +515,7 @@
     <paper id="37">
       <title>Familiarity: Better Evaluation of Zero-Shot Named Entity Recognition by Quantifying Label Shifts in Synthetic Training Data</title>
       <author><first>Jonas</first><last>Golde</last></author>
-      <author><first>Patrick</first><last>Haller</last></author>
+      <author id="patrick-haller-zurich"><first>Patrick</first><last>Haller</last></author>
       <author><first>Max</first><last>Ploner</last></author>
       <author><first>Fabio</first><last>Barth</last></author>
       <author><first>Nicolaas</first><last>Jedema</last></author>
@@ -644,7 +644,7 @@
       <title>Babysit A Language Model From Scratch: Interactive Language Learning by Trials and Demonstrations</title>
       <author><first>Ziqiao</first><last>Ma</last></author>
       <author><first>Zekun</first><last>Wang</last><affiliation>Georgia Institute of Technology</affiliation></author>
-      <author><first>Joyce</first><last>Chai</last><affiliation>University of Michigan</affiliation></author>
+      <author id="joyce-chai"><first>Joyce</first><last>Chai</last><affiliation>University of Michigan</affiliation></author>
       <pages>991-1010</pages>
       <abstract>Humans are efficient language learners and inherently social creatures. Our language development is largely shaped by our social interactions, for example, the demonstration and feedback from caregivers. Contrary to human language learning, recent advancements in large language models have primarily adopted a non-interactive training paradigm, and refined pre-trained models through feedback afterward. In this work, we explore how corrective feedback from interactions influences neural language acquisition from scratch through systematically controlled experiments, assessing whether it contributes to word learning efficiency in language models. We introduce a trial-and-demonstration (TnD) learning framework that incorporates three distinct components: student trials, teacher demonstrations, and a reward conditioned on language competence at various developmental stages. Our experiments reveal that the TnD approach accelerates word acquisition for student models of equal and smaller numbers of parameters, and we highlight the significance of both trials and demonstrations. We further show that the teacher’s choices of words influence students’ word-specific learning efficiency, and a practice-makes-perfect effect is evident by a strong correlation between the frequency of words in trials and their respective learning curves. Our findings suggest that interactive language learning, with teacher demonstrations and active trials, can facilitate efficient word learning in language models.</abstract>
       <url hash="c30f1ee8">2025.naacl-long.46</url>
@@ -664,7 +664,7 @@
     </paper>
     <paper id="48">
       <title><fixed-case>LLM</fixed-case>-Human Pipeline for Cultural Grounding of Conversations</title>
-      <author><first>Rajkumar</first><last>Pujari</last><affiliation>Purdue University</affiliation></author>
+      <author id="rajkumar-pujari"><first>Rajkumar</first><last>Pujari</last><affiliation>Purdue University</affiliation></author>
       <author><first>Dan</first><last>Goldwasser</last><affiliation>Purdue University and Purdue University</affiliation></author>
       <pages>1029-1048</pages>
       <abstract>Conversations often adhere to well-understood social norms that vary across cultures. For example, while <i>addressing parents by name</i> is commonplace in the West, it is rare in most Asian cultures. Adherence or violation of such norms often dictates the tenor of conversations. Humans are able to navigate social situations requiring cultural awareness quite adeptly. However, it is a hard task for NLP models.In this paper, we tackle this problem by introducing a <i>Cultural Context Schema</i> for conversations. It comprises (1) conversational information such as emotions, dialogue acts, etc., and (2) cultural information such as social norms, violations, etc. We generate ~110k social norm and violation descriptions for ~23k conversations from Chinese culture using LLMs. We refine them using automated verification strategies which are evaluated against culturally aware human judgements. We organize these descriptions into meaningful structures we call Norm Concepts, using an interactive human-in-loop framework. We ground the norm concepts and the descriptions in conversations using symbolic annotation. Finally, we use the obtained dataset for downstream tasks such as emotion, sentiment, and dialogue act detection. We show that it significantly improves the empirical performance.</abstract>
@@ -682,7 +682,7 @@
       <author><first>Songhai</first><last>Fan</last></author>
       <author><first>Tim</first><last>Dwyer</last><affiliation>Monash University</affiliation></author>
       <author><first>Lay-Ki</first><last>Soon</last><affiliation>Monash University</affiliation></author>
-      <author><first>Gholamreza</first><last>Haffari</last><affiliation>Monash University, Monash University and Monash University</affiliation></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last><affiliation>Monash University, Monash University and Monash University</affiliation></author>
       <pages>1049-1074</pages>
       <url hash="c6b66927">2025.naacl-long.49</url>
       <bibkey>vo-etal-2025-access</bibkey>
@@ -910,7 +910,7 @@
       <author><first>Skyler</first><last>Wang</last></author>
       <author><first>Adina</first><last>Williams</last><affiliation>FAIR (Meta Platforms Inc.)</affiliation></author>
       <author><first>Levent</first><last>Sagun</last><affiliation>Meta</affiliation></author>
-      <author><first>Marta R.</first><last>Costa-jussà</last><affiliation>Meta</affiliation></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last><affiliation>Meta</affiliation></author>
       <pages>1454-1468</pages>
       <abstract>Text toxicity detection systems exhibit significant biases, producing disproportionate rates of false positives on samples mentioning demographic groups. But what about toxicity detection in speech? To investigate the extent to which text-based biases are mitigated by speech-based systems, we produce a set of high-quality group annotations for the multilingual MuTOX dataset, and then leverage these annotations to systematically compare speech- and text-based toxicity classifiers. Our findings indicate that access to speech data during inference supports reduced bias against group mentions, particularly for ambiguous and disagreement-inducing samples. Our results also suggest that improving classifiers, rather than transcription pipelines, is more helpful for reducing group bias. We publicly release our annotations and provide recommendations for future toxicity dataset construction.</abstract>
       <url hash="7fec88f0">2025.naacl-long.67</url>
@@ -1144,7 +1144,7 @@
       <author><first>Yuanzhuo</first><last>Wang</last><affiliation>Chinese Academy of Sciences</affiliation></author>
       <author><first>Jie</first><last>Zhang</last><affiliation>Institute of Computing Technology, Chinese Academy of Sciences</affiliation></author>
       <author><first>Huawei</first><last>Shen</last><affiliation>Institute of Computing Technology, Chinese Academy of Sciences</affiliation></author>
-      <author><first>Xueqi</first><last>Cheng</last><affiliation>Institute of Computing Technology, Chinese Academy</affiliation></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last><affiliation>Institute of Computing Technology, Chinese Academy</affiliation></author>
       <pages>1733-1747</pages>
       <abstract>With the expansion of the application of Large Language Models (LLMs), concerns about their safety have grown among researchers. Numerous studies have demonstrated the potential risks of LLMs generating harmful content and have proposed various safety assessment benchmarks to evaluate these risks. However, the evaluation questions in current benchmarks, especially for Chinese, are too straightforward, making them easily rejected by target LLMs, and difficult to update with practical relevance due to their lack of correlation with real-world events. This hinders the effective application of these benchmarks in continuous evaluation tasks. To address these limitations, we propose SafetyQuizzer, a question-generation framework designed to evaluate the safety of LLMs more sustainably in the Chinese context. SafetyQuizzer leverages a finetuned LLM and jailbreaking attack templates to generate subtly offensive questions, which reduces the decline rate. Additionally, by utilizing retrieval-augmented generation, SafetyQuizzer incorporates the latest real-world events into evaluation questions, improving the adaptability of the benchmarks. Our experiments demonstrate that evaluation questions generated by SafetyQuizzer significantly reduce the decline rate compared to other benchmarks while maintaining a comparable attack success rate. Our code is available at https://github.com/zhichao-stone/SafetyQuizzer. Warning: this paper contains examples that may be offensive or upsetting.</abstract>
       <url hash="76bd15e8">2025.naacl-long.85</url>
@@ -1242,7 +1242,7 @@
       <author><first>Shamsuddeen Hassan</first><last>Muhammad</last><affiliation>Imperial College London and Bayero University, Kano-Nigeria</affiliation></author>
       <author><first>Idris</first><last>Abdulmumin</last><affiliation>Ahmadu Bello University</affiliation></author>
       <author><first>Abinew Ali</first><last>Ayele</last><affiliation>Bahir Dar University, Universität Hamburg</affiliation></author>
-      <author><first>David Ifeoluwa</first><last>Adelani</last><affiliation>McGill University</affiliation></author>
+      <author id="david-ifeoluwa-adelani"><first>David Ifeoluwa</first><last>Adelani</last><affiliation>McGill University</affiliation></author>
       <author><first>Ibrahim Said</first><last>Ahmad</last><affiliation>Northeastern University</affiliation></author>
       <author><first>Saminu Mohammad</first><last>Aliyu</last></author>
       <author><first>Paul</first><last>Röttger</last><affiliation>Bocconi University</affiliation></author>
@@ -1319,7 +1319,7 @@
       <title><fixed-case>DAWN</fixed-case>-<fixed-case>ICL</fixed-case>: Strategic Planning of Problem-solving Trajectories for Zero-Shot In-Context Learning</title>
       <author><first>Xinyu</first><last>Tang</last><affiliation>Renmin University of China</affiliation></author>
       <author id="xiaolei-wang-fudan"><first>Xiaolei</first><last>Wang</last><affiliation>Renmin University of China</affiliation></author>
-      <author><first>Xin</first><last>Zhao</last><affiliation>Renmin University of China</affiliation></author>
+      <author id="wayne-xin-zhao"><first>Xin</first><last>Zhao</last><affiliation>Renmin University of China</affiliation></author>
       <author><first>Ji-Rong</first><last>Wen</last><affiliation>Renmin University of China</affiliation></author>
       <pages>1918-1934</pages>
       <abstract>Zero-shot in-context learning (ZS-ICL) aims to conduct in-context learning (ICL) without using human-annotated demonstrations.Existing ZS-ICL methods either use large language models (LLMs) to generate (input, label) pairs as pseudo-demonstrations or leverage historical pseudo-demonstrations to help solve the current problem.They assume that all problems are from the same task and traverse them in a random order.However, in real-world scenarios, problems usually come from diverse tasks, and only a few belong to the same task.The random traversing order may generate unreliable pseudo-demonstrations and lead to error accumulation.To address this problem, we reformulate ZS-**ICL** as a planning problem and propose a **D**emonstration-**AW**are Mo**N**te Carlo Tree Search (MCTS) approach (DAWN-ICL), which leverages MCTS to strategically plan the problem-solving trajectories for ZS-ICL.In addition, to achieve effective and efficient <tex-math>Q</tex-math> value estimation, we propose a demonstration-aware <tex-math>Q</tex-math>-value function and use it to enhance the selection phase and accelerate the expansion and simulation phases in MCTS.Extensive experiments demonstrate the effectiveness and efficiency of DAWN-ICL on in-domain and cross-domain scenarios, and it even outperforms ICL using human-annotated demonstrations.The code is available at https://github.com/txy77/MCTS4ZSICL.</abstract>
@@ -1359,7 +1359,7 @@
       <author><first>Taylor</first><last>Sorensen</last><affiliation>University of Washington and Brigham Young University</affiliation></author>
       <author><first>Ximing</first><last>Lu</last><affiliation>University of Washington</affiliation></author>
       <author><first>Maria</first><last>Antoniak</last></author>
-      <author><first>Bill Yuchen</first><last>Lin</last><affiliation>xAI and University of Washington</affiliation></author>
+      <author id="bill-yuchen-lin"><first>Bill Yuchen</first><last>Lin</last><affiliation>xAI and University of Washington</affiliation></author>
       <author><first>Niloofar</first><last>Mireshghallah</last></author>
       <author><first>Chandra</first><last>Bhagavatula</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Yejin</first><last>Choi</last><affiliation>Computer Science Department, Stanford University and NVIDIA</affiliation></author>
@@ -1413,7 +1413,7 @@
     <paper id="103">
       <title>Discourse-Driven Evaluation: Unveiling Factual Inconsistency in Long Document Summarization</title>
       <author><first>Yang</first><last>Zhong</last><affiliation>University of Pittsburgh</affiliation></author>
-      <author><first>Diane</first><last>Litman</last><affiliation>University of Pittsburgh, University of Pittsburgh and University of Pittsburgh</affiliation></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last><affiliation>University of Pittsburgh, University of Pittsburgh and University of Pittsburgh</affiliation></author>
       <pages>2050-2073</pages>
       <abstract>Detecting factual inconsistency for long document summarization remains challenging, given the complex structure of the source article and long summary length. In this work, we study factual inconsistency errors and connect them with a line of discourse analysis. We find that errors are more common in complex sentences and are associated with several discourse features. We propose a framework that decomposes long texts into discourse-inspired chunks and utilizes discourse information to better aggregate sentence-level scores predicted by NLI models. Our approach shows improved performance on top of different model baselines over several evaluation benchmarks, covering rich domains of texts, focusing on long document summarization. This underscores the significance of incorporating discourse features in developing models for scoring summaries for long document factual inconsistency.</abstract>
       <url hash="2768b311">2025.naacl-long.103</url>
@@ -1447,7 +1447,7 @@
       <title>Uplifting Lower-Income Data: Strategies for Socioeconomic Perspective Shifts in Large Multi-modal Models</title>
       <author><first>Joan</first><last>Nwatu</last><affiliation>University of Michigan - Ann Arbor</affiliation></author>
       <author><first>Oana</first><last>Ignat</last><affiliation>Santa Clara University</affiliation></author>
-      <author><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
       <pages>2127-2144</pages>
       <abstract>Recent work has demonstrated that the unequal representation of cultures and socioeconomic groups in training data leads to biased Large Multi-modal (LMM) models. To improve LMM model performance on underrepresented data, we propose and evaluate several prompting strategies using non-English, geographic, and socioeconomic attributes. We show that these geographic and socioeconomic integrated prompts favor retrieving topic appearances commonly found in data from low-income households across different countries leading to improved LMM model performance on lower-income data. Our analyses identify and highlight contexts where these strategies yield the most improvements.</abstract>
       <url hash="d32b8108">2025.naacl-long.106</url>
@@ -1535,7 +1535,7 @@
       <author><first>Ivan</first><last>Lazichny</last></author>
       <author><first>Alexander</first><last>Panchenko</last><affiliation>Skoltech</affiliation></author>
       <author><first>Maxim</first><last>Panov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
-      <author><first>Timothy</first><last>Baldwin</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and The University of Melbourne</affiliation></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and The University of Melbourne</affiliation></author>
       <author><first>Artem</first><last>Shelmanov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <pages>2246-2262</pages>
       <abstract>Uncertainty quantification (UQ) is a prominent approach for eliciting truthful answers from large language models (LLMs). To date, information-based and consistency-based UQ have been the dominant UQ methods for text generation via LLMs. Density-based methods, despite being very effective for UQ in text classification with encoder-based models, have not been very successful with generative LLMs. In this work, we adapt Mahalanobis Distance (MD) – a well-established UQ technique in classification tasks – for text generation and introduce a new supervised UQ method. Our method extracts token embeddings from multiple layers of LLMs, computes MD scores for each token, and uses linear regression trained on these features to provide robust uncertainty scores. Through extensive experiments on eleven datasets, we demonstrate that our approach substantially improves over existing UQ methods, providing accurate and computationally efficient uncertainty scores for both sequence-level selective generation and claim-level fact-checking tasks. Our method also exhibits strong generalization to out-of-domain data, making it suitable for a wide range of LLM-based applications.</abstract>
@@ -1704,7 +1704,7 @@
       <author><first>Xin</first><last>Wang</last></author>
       <author><first>Che</first><last>Liu</last></author>
       <author><first>Zheda</first><last>Mai</last></author>
-      <author id="mi-zhang"><first>Mi</first><last>Zhang</last><affiliation>The Ohio State University</affiliation></author>
+      <author><first>Mi</first><last>Zhang</last><affiliation>The Ohio State University</affiliation></author>
       <pages>2485-2497</pages>
       <abstract>Long-context Multimodal Large Language Models (MLLMs) that incorporate long text-image and text-video modalities, demand substantial computational resources as their multimodal Key-Value (KV) cache grows with increasing input lengths, challenging memory and time efficiency. For multimodal scenarios, the cross-modal interactions inevitablely increase complexity, and prior methods for KV cache compression, in both text-only and multimodal LLMs, have neglected attention density variations across layers, often adopting uniform or progressive reduction strategis for layer-wise cache allocation. This results in precision loss and suboptimal performance. We propose <b>MEDA</b>, a novel approach specifically designed for the complexities of multimodal settings, dynamically allocating KV cache sizes based on attention entropy to better adapt to multimodal interactions.Through a dynamic multimodal KV cache allocation strategy, MEDA compresses the KV cache, adaptively retains sufficient multimodal information at each layer. Meanwhile, to mitigate the degradation of contextual information due to cache compression, we also integrate KV pairs merging techniques to maintain coherence. MEDA achieves up to <b>72%</b> KV cache memory reduction and <b>2.82</b> faster decoding speeds in some cases, while maintaining or enhancing performance on various multimodal tasks in a long context, including multi-image and long video scenarios.</abstract>
       <url hash="6e12e6d0">2025.naacl-long.125</url>
@@ -1750,7 +1750,7 @@
     <paper id="129">
       <title>No Simple Answer to Data Complexity: An Examination of Instance-Level Complexity Metrics for Classification Tasks</title>
       <author><first>Ryan A.</first><last>Cook</last></author>
-      <author><first>John P.</first><last>Lalor</last><affiliation>University of Notre Dame</affiliation></author>
+      <author id="john-p-lalor"><first>John P.</first><last>Lalor</last><affiliation>University of Notre Dame</affiliation></author>
       <author><first>Ahmed</first><last>Abbasi</last><affiliation>University of Notre Dame</affiliation></author>
       <pages>2553-2573</pages>
       <abstract>Natural Language Processing research has become increasingly concerned with understanding data quality and complexity at the instance level. Instance-level complexity scores can be used for tasks such as filtering out noisy observations and subsampling informative examples. However, there exists a diverse taxonomy of complexity metrics that can be used for a classification task, making metric selection itself a difficult task. We empirically examine the relationship between these metrics and find that simply storing training loss provides similar complexity rankings as other more computationally intensive techniques. Metric similarity allows us to subsample data with higher aggregate complexity along several metrics using a single a priori available meta-feature. Further, this choice of complexity metric does not impact demographic fairness, even in downstream predictions. Researchers should consider metric availability and similarity, as using the wrong metric or sampling strategy may hurt performance.</abstract>
@@ -1853,7 +1853,7 @@
       <author><first>Kyumin</first><last>Lee</last><affiliation>Worcester Polytechnic Institute</affiliation></author>
       <author><first>Kaize</first><last>Ding</last><affiliation>Northwestern University</affiliation></author>
       <author><first>Zhengyang</first><last>Wang</last><affiliation>Amazon</affiliation></author>
-      <author id="zhihan-zhang"><first>Zhihan</first><last>Zhang</last></author>
+      <author><first>Zhihan</first><last>Zhang</last></author>
       <author><first>Jingbo</first><last>Shang</last><affiliation>University of California, San Diego</affiliation></author>
       <author><first>Xian</first><last>Li</last><affiliation>Amazon</affiliation></author>
       <author><first>Trishul</first><last>Chilimbi</last><affiliation>Amazon</affiliation></author>
@@ -1881,11 +1881,11 @@
     </paper>
     <paper id="139">
       <title><fixed-case>I</fixed-case>roko<fixed-case>B</fixed-case>ench: A New Benchmark for <fixed-case>A</fixed-case>frican Languages in the Age of Large Language Models</title>
-      <author><first>David Ifeoluwa</first><last>Adelani</last><affiliation>McGill University</affiliation></author>
+      <author id="david-ifeoluwa-adelani"><first>David Ifeoluwa</first><last>Adelani</last><affiliation>McGill University</affiliation></author>
       <author><first>Jessica</first><last>Ojo</last><affiliation>Lelapa AI</affiliation></author>
       <author><first>Israel Abebe</first><last>Azime</last></author>
       <author><first>Jian Yun</first><last>Zhuang</last></author>
-      <author><first>Jesujoba Oluwadara</first><last>Alabi</last><affiliation>Universität des Saarlandes</affiliation></author>
+      <author id="jesujoba-alabi"><first>Jesujoba Oluwadara</first><last>Alabi</last><affiliation>Universität des Saarlandes</affiliation></author>
       <author><first>Xuanli</first><last>He</last><affiliation>University College London, University of London</affiliation></author>
       <author><first>Millicent</first><last>Ochieng</last><affiliation>Microsoft</affiliation></author>
       <author><first>Sara</first><last>Hooker</last><affiliation>Cohere For AI</affiliation></author>
@@ -1916,7 +1916,7 @@
     </paper>
     <paper id="140">
       <title>The Impact of Domain-Specific Terminology on Machine Translation for Finance in <fixed-case>E</fixed-case>uropean Languages</title>
-      <author><first>Arturo</first><last>Oncevay</last><affiliation>J.P. Morgan Chase</affiliation></author>
+      <author id="arturo-oncevay"><first>Arturo</first><last>Oncevay</last><affiliation>J.P. Morgan Chase</affiliation></author>
       <author><first>Charese</first><last>Smiley</last><affiliation>J.P. Morgan Chase</affiliation></author>
       <author><first>Xiaomo</first><last>Liu</last><affiliation>JP Morgan AI Research</affiliation></author>
       <pages>2758-2775</pages>
@@ -1931,7 +1931,7 @@
       <author><first>Dixuan</first><last>Wang</last></author>
       <author><first>Tianjian</first><last>Li</last><affiliation>Johns Hopkins University</affiliation></author>
       <author><first>Dongwei</first><last>Jiang</last></author>
-      <author><first>Sanjeev</first><last>Khudanpur</last><affiliation>Whiting School of Engineering</affiliation></author>
+      <author id="sanjeev-khudanpur"><first>Sanjeev</first><last>Khudanpur</last><affiliation>Whiting School of Engineering</affiliation></author>
       <author><first>Meng</first><last>Jiang</last><affiliation>University of Notre Dame</affiliation></author>
       <author><first>Daniel</first><last>Khashabi</last><affiliation>Johns Hopkins University</affiliation></author>
       <pages>2776-2794</pages>
@@ -2008,7 +2008,7 @@
       <author><first>Ivan</first><last>Kobyzev</last><affiliation>Huawei Noah’s Ark Lab</affiliation></author>
       <author><first>Mehdi</first><last>Rezagholizadeh</last><affiliation>Advanced Micro Devices</affiliation></author>
       <author><first>Boxing</first><last>Chen</last><affiliation>Huawei Technologies Ltd.</affiliation></author>
-      <author><first>Philippe</first><last>Langlais</last><affiliation>Université de Montréal</affiliation></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last><affiliation>Université de Montréal</affiliation></author>
       <pages>2884-2898</pages>
       <abstract>Recent advancements in Large Language Models (LLMs) have set themselves apart with their exceptional performance in complex language modelling tasks. However, these models are also known for their significant computational and storage requirements, primarily due to the quadratic computation complexity of softmax attention. To mitigate this issue, linear attention has been designed to reduce the quadratic space-time complexity that is inherent in standard transformers. In this work, we embarked on a comprehensive exploration of three key components that substantially impact the performance of the Gated Linear Attention module: feature maps, normalization, and the gating mechanism. We developed a feature mapping function to address some crucial issues that previous suggestions overlooked. Then we offered further rationale for the integration of normalization layers to stabilize the training process. Moreover, we explored the saturation phenomenon of the gating mechanism and augmented it with a refining module. We conducted extensive experiments and showed our architecture outperforms previous Gated Linear Attention mechanisms in extensive tasks including training from scratch and post-linearization with continual pre-training.</abstract>
       <url hash="b240385c">2025.naacl-long.147</url>
@@ -2020,7 +2020,7 @@
       <author><first>Kshitish</first><last>Ghate</last></author>
       <author><first>Isaac</first><last>Slaughter</last><affiliation>University of Washington</affiliation></author>
       <author><first>Kyra</first><last>Wilson</last><affiliation>University of Washington</affiliation></author>
-      <author><first>Mona T.</first><last>Diab</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="mona-diab"><first>Mona T.</first><last>Diab</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Aylin</first><last>Caliskan</last><affiliation>University of Washington</affiliation></author>
       <pages>2899-2915</pages>
       <abstract>While recent work has found that vision-language models trained under the Contrastive Language Image Pre-training (CLIP) framework contain intrinsic social biases, the extent to which different upstream pre-training features of the framework relate to these biases, and hence how intrinsic bias and downstream performance are connected has been unclear. In this work, we present the largest comprehensive analysis to-date of how the upstream pre-training factors and downstream performance of CLIP models relate to their intrinsic biases. Studying 131 unique CLIP models, trained on 26 datasets, using 55 architectures, and in a variety of sizes, we evaluate bias in each model using 26 well-established unimodal and cross-modal principled Embedding Association Tests. We find that the choice of pre-training dataset is the most significant upstream predictor of bias, whereas architectural variations have minimal impact. Additionally, datasets curated using sophisticated filtering techniques aimed at enhancing downstream model performance tend to be associated with higher levels of intrinsic bias. Finally, we observe that intrinsic bias is often significantly correlated with downstream performance (<tex-math>0.3 \leq r \leq 0.8</tex-math>), suggesting that models optimized for performance inadvertently learn to amplify representational biases. Comparisons between unimodal and cross-modal association tests reveal that social group bias depends heavily on the modality. Our findings imply that more sophisticated strategies are needed to address intrinsic model bias for vision-language models across the entire model development pipeline.</abstract>
@@ -2076,7 +2076,7 @@
       <author><first>Longju</first><last>Bai</last></author>
       <author><first>Angana</first><last>Borah</last></author>
       <author><first>Oana</first><last>Ignat</last><affiliation>Santa Clara University</affiliation></author>
-      <author><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
       <pages>2970-2993</pages>
       <abstract>Large Multimodal Models (LMMs) exhibit impressive performance across various multimodal tasks. However, their effectiveness in cross-cultural contexts remains limited due to the predominantly Western-centric nature of most data and models. Conversely, multi-agent models have shown significant capability in solving complex tasks. Our study evaluates the collective performance of LMMs in a multi-agent interaction setting for the novel task of cultural image captioning. Our contributions are as follows: (1) We introduce MosAIC, a Multi-Agent framework to enhance cross-cultural Image Captioning using LMMs with distinct cultural personas; (2) We provide a dataset of culturally enriched image captions in English for images from China, India, and Romania across three datasets: GeoDE, GD-VCR, CVQA; (3) We propose a culture-adaptable metric for evaluating cultural information within image captions; and (4) We show that the multi-agent interaction outperforms single-agent models across different metrics, and offer valuable insights for future research.</abstract>
       <url hash="f7fb79f6">2025.naacl-long.152</url>
@@ -2152,7 +2152,7 @@
     <paper id="158">
       <title>A Probabilistic Framework for <fixed-case>LLM</fixed-case> Hallucination Detection via Belief Tree Propagation</title>
       <author><first>Bairu</first><last>Hou</last></author>
-      <author id="yang-zhang"><first>Yang</first><last>Zhang</last><affiliation>International Business Machines</affiliation></author>
+      <author><first>Yang</first><last>Zhang</last><affiliation>International Business Machines</affiliation></author>
       <author><first>Jacob</first><last>Andreas</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <author><first>Shiyu</first><last>Chang</last></author>
       <pages>3076-3099</pages>
@@ -2174,7 +2174,7 @@
     <paper id="160">
       <title>Superlatives in Context: Modeling the Implicit Semantics of Superlatives</title>
       <author><first>Valentina</first><last>Pyatkin</last><affiliation>Allen Institute for Artificial Intelligence and Department of Computer Science</affiliation></author>
-      <author><first>Bonnie</first><last>Webber</last><affiliation>Edinburgh University, University of Edinburgh</affiliation></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last><affiliation>Edinburgh University, University of Edinburgh</affiliation></author>
       <author><first>Ido</first><last>Dagan</last><affiliation>Bar-Ilan University</affiliation></author>
       <author><first>Reut</first><last>Tsarfaty</last><affiliation>Google and Bar-Ilan University, Technion</affiliation></author>
       <pages>3112-3126</pages>
@@ -2258,7 +2258,7 @@
     </paper>
     <paper id="167">
       <title><fixed-case>W</fixed-case>orld<fixed-case>C</fixed-case>uisines: A Massive-Scale Benchmark for Multilingual and Multicultural Visual Question Answering on Global Cuisines</title>
-      <author><first>Genta Indra</first><last>Winata</last><affiliation>Capital One</affiliation></author>
+      <author id="genta-indra-winata"><first>Genta Indra</first><last>Winata</last><affiliation>Capital One</affiliation></author>
       <author><first>Frederikus</first><last>Hudi</last></author>
       <author><first>Patrick Amadeus</first><last>Irawan</last></author>
       <author><first>David</first><last>Anugraha</last></author>
@@ -2300,14 +2300,14 @@
       <author><first>Stephanie Yulia</first><last>Salim</last></author>
       <author><first>Yi</first><last>Zhou</last><affiliation>Cardiff University</affiliation></author>
       <author><first>Yinxuan</first><last>Gui</last><affiliation>Fudan University</affiliation></author>
-      <author><first>David Ifeoluwa</first><last>Adelani</last><affiliation>McGill University</affiliation></author>
+      <author id="david-ifeoluwa-adelani"><first>David Ifeoluwa</first><last>Adelani</last><affiliation>McGill University</affiliation></author>
       <author><first>En-Shiun Annie</first><last>Lee</last></author>
       <author><first>Shogo</first><last>Okada</last></author>
       <author><first>Ayu</first><last>Purwarianti</last><affiliation>Institut Teknologi Bandung</affiliation></author>
       <author><first>Alham Fikri</first><last>Aji</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <author><first>Taro</first><last>Watanabe</last><affiliation>Nara Institute of Science and Technology, Japan</affiliation></author>
-      <author><first>Derry Tanti</first><last>Wijaya</last><affiliation>Monash University and Boston University</affiliation></author>
-      <author><first>Alice</first><last>Oh</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
+      <author id="derry-tanti-wijaya"><first>Derry Tanti</first><last>Wijaya</last><affiliation>Monash University and Boston University</affiliation></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
       <author><first>Chong-Wah</first><last>Ngo</last><affiliation>Singapore Management University</affiliation></author>
       <pages>3242-3264</pages>
       <abstract>Vision Language Models (VLMs) often struggle with culture-specific knowledge, particularly in languages other than English and in underrepresented cultural contexts. To evaluate their understanding of such knowledge, we introduce WorldCuisines, a massive-scale benchmark for multilingual and multicultural, visually grounded language understanding. This benchmark includes a visual question answering (VQA) dataset with text-image pairs across 30 languages and dialects, spanning 9 language families and featuring over 1 million data points, making it the largest multicultural VQA benchmark to date. It includes tasks for identifying dish names and their origins. We provide evaluation datasets in two sizes (12k and 60k instances) alongside a training dataset (1 million instances). Our findings show that while VLMs perform better with correct location context, they struggle with adversarial contexts and predicting specific regional cuisines and languages. To support future research, we release a knowledge base with annotated food entries and images along with the VQA data.</abstract>
@@ -2383,7 +2383,7 @@
       <author><first>Siyan</first><last>Li</last></author>
       <author><first>Vethavikashini Chithrra</first><last>Raghuram</last><affiliation>CCC Intelligent Solutions</affiliation></author>
       <author><first>Omar</first><last>Khattab</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
-      <author><first>Julia</first><last>Hirschberg</last><affiliation>Columbia University</affiliation></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last><affiliation>Columbia University</affiliation></author>
       <author><first>Zhou</first><last>Yu</last><affiliation>Columbia University</affiliation></author>
       <pages>3371-3390</pages>
       <abstract>Users can divulge sensitive information to proprietary LLM providers, raising significant privacy concerns. While open-source models, hosted locally on the user’s machine, alleviate some concerns, models that users can host locally are often less capable than proprietary frontier models. Toward preserving user privacy while retaining the best quality, we propose Privacy-Conscious Delegation, a novel task for chaining API-based and local models. We utilize recent public collections of user-LLM interactions to construct a natural benchmark called PUPA, which contains personally identifiable information (PII). To study potential approaches, we devise PAPILLON, a multi-stage LLM pipeline that uses prompt optimization to address a simpler version of our task. Our best pipeline maintains high response quality for 85.5% of user queries while restricting privacy leakage to only 7.5%. We still leave a large margin to the generation quality of proprietary LLMs for future work.</abstract>
@@ -2395,7 +2395,7 @@
       <title><fixed-case>W</fixed-case>hen2<fixed-case>C</fixed-case>all: When (not) to Call Tools</title>
       <author><first>Hayley</first><last>Ross</last><affiliation>Harvard University, Harvard University</affiliation></author>
       <author><first>Ameya Sunil</first><last>Mahabaleshwarkar</last><affiliation>NVIDIA</affiliation></author>
-      <author><first>Yoshi</first><last>Suhara</last><affiliation>NVIDIA</affiliation></author>
+      <author id="yoshi-suhara"><first>Yoshi</first><last>Suhara</last><affiliation>NVIDIA</affiliation></author>
       <pages>3391-3409</pages>
       <abstract>Leveraging external tools is a key feature for modern Language Models (LMs) to expand their capabilities and integrate them into existing systems. However, existing benchmarks primarily focus on the accuracy of tool calling—whether the correct tool is called with the correct parameters—and less on evaluating when LMs should (not) call tools. We develop a new benchmark, When2Call, which evaluates tool-calling decision-making: when to generate a tool call, when to ask follow-up questions and when to admit the question can’t be answered with the tools provided. We find that state-of-the-art tool-calling LMs show significant room for improvement on When2Call, indicating the importance of this benchmark. We also develop a training set for When2Call and leverage the multiple-choice nature of the benchmark to develop a preference optimization training regime, which shows considerably more improvement than traditional fine-tuning. We release the benchmark and training data as well as evaluation scripts.</abstract>
       <url hash="7d33d93a">2025.naacl-long.174</url>
@@ -2405,7 +2405,7 @@
     <paper id="175">
       <title>Mitigating Hallucinated Translations in Large Language Models with Hallucination-focused Preference Optimization</title>
       <author><first>Zilu</first><last>Tang</last></author>
-      <author><first>Rajen</first><last>Chatterjee</last><affiliation>Apple</affiliation></author>
+      <author id="rajen-chatterjee"><first>Rajen</first><last>Chatterjee</last><affiliation>Apple</affiliation></author>
       <author><first>Sarthak</first><last>Garg</last><affiliation>Apple</affiliation></author>
       <pages>3410-3433</pages>
       <abstract>Machine Translation (MT) is undergoing a paradigm shift, with systems based on fine-tuned large language models (LLM) becoming increasingly competitive with traditional encoder-decoder models trained specifically for translation tasks. However, LLM-based systems are at a higher risk of generating hallucinations, which can severely undermine user’s trust and safety. Most prior research on hallucination mitigation focuses on traditional MT models, with solutions that involve *post-hoc* mitigation - detecting hallucinated translations and re-translating them. While effective, this approach introduces additional complexity in deploying extra tools in production and also increases latency.To address these limitations, we propose a method that intrinsically learns to mitigate hallucinations during the model training phase. Specifically, we introduce a data creation framework to generate hallucination focused preference datasets. Fine-tuning LLMs on these preference datasets reduces the hallucination rate by an average of 96% across five language pairs, while preserving overall translation quality. In a zero-shot setting our approach reduces hallucinations by 89% on an average across three unseen target languages.</abstract>
@@ -2417,7 +2417,7 @@
       <title>Large Language Models Can Solve Real-World Planning Rigorously with Formal Verification Tools</title>
       <author><first>Yilun</first><last>Hao</last></author>
       <author><first>Yongchao</first><last>Chen</last></author>
-      <author id="yang-zhang"><first>Yang</first><last>Zhang</last><affiliation>International Business Machines</affiliation></author>
+      <author><first>Yang</first><last>Zhang</last><affiliation>International Business Machines</affiliation></author>
       <author><first>Chuchu</first><last>Fan</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <pages>3434-3483</pages>
       <abstract>Large Language Models (LLMs) struggle to directly generate correct plans for complex multi-constraint planning problems, even with self-verification and self-critique. For example, a U.S. domestic travel planning benchmark TravelPlanner was proposed in Xie et al. (2024), where the best LLM OpenAI o1-preview can only find viable travel plans with a 10% success rate given all needed information. In this work, we tackle this by proposing an LLM-based planning framework that formalizes and solves complex multi-constraint planning problems as constrained satisfiability problems, which are further consumed by sound and complete satisfiability solvers. We start with TravelPlanner as the primary use case and show that our framework achieves a success rate of 93.9% and is effective with diverse paraphrased prompts. More importantly, our framework has strong zero-shot generalizability, successfully handling unseen constraints in our newly created unseen international travel dataset and generalizing well to new fundamentally different domains. Moreover, when user input queries are infeasible, our framework can identify the unsatisfiable core, provide failure reasons, and offers personalized modification suggestions. We show that our framework can modify and solve for an average of 81.6% and 91.7% unsatisfiable queries from two datasets and prove with ablations that all key components of our framework are effective and necessary.</abstract>
@@ -2444,7 +2444,7 @@
       <author><first>Bingyang</first><last>Ye</last><affiliation>Brandeis University</affiliation></author>
       <author><first>Xinrui</first><last>Hu</last></author>
       <author><first>Nianwen</first><last>Xue</last><affiliation>Brandeis University</affiliation></author>
-      <author><first>James</first><last>Pustejovsky</last><affiliation>Brandeis University</affiliation></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last><affiliation>Brandeis University</affiliation></author>
       <pages>3499-3513</pages>
       <abstract>Cross-Document Event Coreference (CDEC) annotation is challenging and difficult to scale, resulting in existing datasets being small and lacking diversity. We introduce a new approach leveraging large language models (LLMs) to decontextualize event mentions, by simplifying the document-level annotation task to sentence pairs with enriched context, enabling the creation of Richer EventCorefBank (RECB), a denser and more expressive dataset annotated at faster speed. Decontextualization has been shown to improve annotation speed without compromising quality and to enhance model performance. Our baseline experiment indicates that systems trained on RECB achieve comparable results on the EventCorefBank(ECB+) test set, showing the high quality of our dataset and its generalizability on other CDEC datasets. In addition, our evaluation shows that the strong baseline models are still struggling with RECB comparing to other CDEC datasets, suggesting that the richness and diversity of RECB present significant challenges to current CDEC systems.</abstract>
       <url hash="e67899c9">2025.naacl-long.178</url>
@@ -2457,7 +2457,7 @@
       <author><first>Tijana</first><last>Zrnic</last><affiliation>Stanford University</affiliation></author>
       <author><first>Cinoo</first><last>Lee</last><affiliation>Stanford University</affiliation></author>
       <author><first>Emmanuel</first><last>Candes</last><affiliation>Stanford University</affiliation></author>
-      <author><first>Dan</first><last>Jurafsky</last><affiliation>Stanford University</affiliation></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last><affiliation>Stanford University</affiliation></author>
       <pages>3514-3533</pages>
       <abstract>Large language models (LLMs) have shown high agreement with human raters across a variety of tasks, demonstrating potential to ease the challenges of human data collection. In computational social science (CSS), researchers are increasingly leveraging LLM annotations to complement slow and expensive human annotations. Still, guidelines for collecting and using LLM annotations, without compromising the validity of downstream conclusions, remain limited. We introduce Confidence-driven inference: a method that combines LLM annotations and LLM confidence indicators to strategically select which human annotations should be collected, with the goal of producing accurate statistical estimates and provably valid confidence intervals while reducing the number of human annotations needed. Our approach comes with safeguards against LLM annotations of poor quality, guaranteeing that the conclusions will be both valid and no less accurate than if we only relied on human annotations. We demonstrate the effectiveness of Confidence-driven inference over baselines in statistical estimation tasks across three CSS settings—text politeness, stance, and bias—reducing the needed number of human annotations by over 25% in each. Although we use CSS settings for demonstration, Confidence-driven inference can be used to estimate most standard quantities across a broad range of NLP problems.</abstract>
       <url hash="7e7410b7">2025.naacl-long.179</url>
@@ -2482,7 +2482,7 @@
       <author><first>Cheyenne</first><last>Wing</last></author>
       <author><first>María Ximena</first><last>Juárez Huerta</last></author>
       <author><first>Ángeles</first><last>Márquez Hernandez</last></author>
-      <author><first>Francis</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last></author>
       <pages>3549-3562</pages>
       <abstract>The development of digital linguistic resources is essential for enhancing the inclusion of indigenous and marginalized languages in the digital domain. Indigenous languages of Mexico, despite representing vast typological diversity and millions of speakers, have largely been overlooked in NLP until recently. In this paper, we present a corpus of audio and annotated transcriptions of Western Sierra Puebla Nahuatl, an endangered variety of Nahuatl spoken in Puebla, Mexico. The data made available in this corpus are useful for ASR, spelling normalization, and word-level language identification. We detail the corpus-creation process, and describe experiments to report benchmark results for each of these important NLP tasks. The corpus audio and text is made freely available.</abstract>
       <url hash="29bb88f8">2025.naacl-long.181</url>
@@ -2636,7 +2636,7 @@
       <author><first>Ruipu</first><last>Luo</last></author>
       <author><first>Jiwen</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Minghui</first><last>Qiu</last></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Zhongyu</first><last>Wei</last><affiliation>Fudan University</affiliation></author>
       <pages>3769-3798</pages>
       <url hash="e512a222">2025.naacl-long.192</url>
@@ -2674,7 +2674,7 @@
     </paper>
     <paper id="195">
       <title>Mamba-Shedder: Post-Transformer Compression for Efficient Selective Structured State Space Models</title>
-      <author><first>Juan Pablo</first><last>Munoz</last><affiliation>Intel</affiliation></author>
+      <author id="juan-pablo-munoz"><first>Juan Pablo</first><last>Munoz</last><affiliation>Intel</affiliation></author>
       <author><first>Jinjie</first><last>Yuan</last><affiliation>Intel</affiliation></author>
       <author><first>Nilesh</first><last>Jain</last><affiliation>Intel Corp</affiliation></author>
       <pages>3851-3863</pages>
@@ -2693,7 +2693,7 @@
       <author><first>Shaun M.</first><last>Eack</last><affiliation>University of Pittsburgh</affiliation></author>
       <author><first>Fei</first><last>Fang</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>William Yang</first><last>Wang</last><affiliation>UC Santa Barbara</affiliation></author>
-      <author id="zhiyu-chen"><first>Zhiyu</first><last>Chen</last></author>
+      <author><first>Zhiyu</first><last>Chen</last></author>
       <pages>3864-3900</pages>
       <abstract>There is a significant gap between patient needs and available mental health support today. In this paper, we aim to thoroughly examine the potential of using Large Language Models (LLMs) to assist professional psychotherapy. To this end, we propose a new benchmark, CBT-Bench, for the systematic evaluation of cognitive behavioral therapy (CBT) assistance. We include three levels of tasks in CBT-Bench: **I: Basic CBT knowledge acquisition**, with the task of multiple-choice questions; **II: Cognitive model understanding**, with the tasks of cognitive distortion classification, primary core belief classification, and fine-grained core belief classification; **III: Therapeutic response generation**, with the task of generating responses to patient speech in CBT therapy sessions.These tasks encompass key aspects of CBT that could potentially be enhanced through AI assistance, while also outlining a hierarchy of capability requirements, ranging from basic knowledge recitation to engaging in real therapeutic conversations. We evaluated representative LLMs on our benchmark. Experimental results indicate that while LLMs perform well in reciting CBT knowledge, they fall short in complex real-world scenarios requiring deep analysis of patients’ cognitive structures and generating effective responses, suggesting potential future work.</abstract>
       <url hash="c45fed60">2025.naacl-long.196</url>
@@ -2705,7 +2705,7 @@
       <author><first>Eui Jun</first><last>Hwang</last><affiliation>Korea Advanced Institute of Science &amp; Technology</affiliation></author>
       <author><first>Sukmin</first><last>Cho</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
       <author><first>Junmyeong</first><last>Lee</last></author>
-      <author><first>Jong C.</first><last>Park</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
+      <author id="jong-c-park"><first>Jong C.</first><last>Park</last><affiliation>Korea Advanced Institute of Science and Technology</affiliation></author>
       <pages>3901-3920</pages>
       <abstract>Gloss-free Sign Language Translation (SLT) converts sign videos into spoken language sentences without relying on glosses, which are the written representations of signs. Recently, Large Language Models (LLMs) have shown remarkable translation performance in gloss-free methods by harnessing their powerful natural language generation capabilities. However, these methods often rely on domain-specific fine-tuning of visual encoders to achieve optimal results. By contrast, we emphasize the importance of capturing the spatial configurations and motion dynamics in sign language. With this in mind, we introduce Spatial and Motion-based Sign Language Translation (SpaMo), a novel LLM-based SLT framework. The core idea of SpaMo is simple yet effective: instead of domain-specific tuning, we use off-the-shelf visual encoders to extract spatial and motion features, which are then input into an LLM along with a language prompt. Additionally, we employ a visual-text alignment process as a lightweight warm-up step before applying SLT supervision. Our experiments demonstrate that SpaMo achieves state-of-the-art performance on three popular datasets—PHOENIX14T, CSL-Daily, and How2Sign—without visual fine-tuning.</abstract>
       <url hash="713e6bf6">2025.naacl-long.197</url>
@@ -2885,7 +2885,7 @@
       <author><first>Yifan</first><last>Song</last></author>
       <author><first>Guoyin</first><last>Wang</last><affiliation>Alibaba Group</affiliation></author>
       <author><first>Sujian</first><last>Li</last><affiliation>Peking University</affiliation></author>
-      <author><first>Bill Yuchen</first><last>Lin</last><affiliation>xAI and University of Washington</affiliation></author>
+      <author id="bill-yuchen-lin"><first>Bill Yuchen</first><last>Lin</last><affiliation>xAI and University of Washington</affiliation></author>
       <pages>4195-4206</pages>
       <abstract>Current evaluations of large language models (LLMs) often overlook non-determinism, typically focusing on a single output per example. This limits our understanding of LLM performance variability in real-world applications. Our study addresses this issue by exploring key questions about the performance differences between greedy decoding and sampling, identifying benchmarks’ consistency regarding non-determinism, and examining unique model behaviors. Through extensive experiments, we observe that greedy decoding generally outperforms sampling methods for most evaluated tasks. We also observe consistent performance across different LLM sizes and alignment methods, noting that alignment can reduce sampling variance. Moreover, our best-of-N sampling approach demonstrates that smaller LLMs can match or surpass larger models such as GPT-4-Turbo, highlighting the untapped potential of smaller LLMs. This research shows the importance of considering non-determinism in LLM evaluations and provides insights for future LLM development and evaluation.</abstract>
       <url hash="596adcd2">2025.naacl-long.211</url>
@@ -2926,7 +2926,7 @@
       <author><first>Lifeng</first><last>Shang</last><affiliation>Huawei Technologies Ltd.</affiliation></author>
       <author><first>Xin</first><last>Jiang</last></author>
       <author><first>Qun</first><last>Liu</last><affiliation>Huawei Noah’s Ark Lab</affiliation></author>
-      <author><first>Kam-Fai</first><last>Wong</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <pages>4246-4263</pages>
       <abstract>Supervised fine-tuning (SFT) is a common method to enhance the tool calling capabilities of Large Language Models (LLMs), with the training data often being synthesized. The current data synthesis process generally involves sampling a set of tools, formulating a requirement based on these tools, and generating the call statements. However, tools sampled randomly lack relevance, making them difficult to combine and thus reducing the diversity of the data. Additionally, current work overlooks the coherence between turns of dialogues, leading to a gap between the synthesized data and real-world scenarios. To address these issues, we propose a Graph-based Sampling strategy to sample more relevant tool combinations, and a Planned-generation strategy to create plans that guide the synthesis of coherent dialogues. We integrate these two strategies and enable multiple agents to synthesize the dialogue data interactively, resulting in our tool-calling data synthesis pipeline ToolFlow. Data quality assessments demonstrate improvements in the naturalness and coherence of our synthesized dialogues. Finally, we apply SFT on LLaMA-3.1-8B using 8,000 synthetic dialogues generated with ToolFlow. Results show that the model achieves tool-calling performance comparable to or even surpassing GPT-4, while maintaining strong general capabilities.</abstract>
       <url hash="6d1ea147">2025.naacl-long.214</url>
@@ -2964,7 +2964,7 @@
       <author><first>Samiul</first><last>Alam</last></author>
       <author><first>Zhongwei</first><last>Wan</last></author>
       <author><first>Hui</first><last>Shen</last></author>
-      <author id="mi-zhang"><first>Mi</first><last>Zhang</last><affiliation>The Ohio State University</affiliation></author>
+      <author><first>Mi</first><last>Zhang</last><affiliation>The Ohio State University</affiliation></author>
       <pages>4287-4296</pages>
       <abstract>Despite significant advancements, the practical deployment of Large Language Models (LLMs) is often hampered by their immense sizes, highlighting the need for effective compression techniques. Singular Value Decomposition (SVD) emerges as a promising method for compressing LLMs. However, existing SVD-based compression approaches suffer from substantial truncation losses, leading to severe performance degradation in compressed models. In this work, we introduce , a novel SVD-based LLM compression method that optimizes singular value truncation in SVD compression with two key strategies. First, employs dynamic compression ratio allocation to effectively balance the extremely large truncation loss across different layers. Second, it implements loss-optimized weight truncation to ensure that the truncated singular values result in a lower and more stable truncation loss in practice. We evaluate on ten datasets and five models on various scales and demonstrated that outperforms current state-of-the-art methods. The source code is available at <url>https://github.com/AIoT-MLSys-Lab/SVD-LLM</url>.</abstract>
       <url hash="a9fda3d4">2025.naacl-long.217</url>
@@ -2980,8 +2980,8 @@
       <author><first>Zhuohan</first><last>Liu</last><affiliation>, A*STAR</affiliation></author>
       <author id="wenyu-zhang-cornell"><first>Wenyu</first><last>Zhang</last><affiliation>I2R, A*STAR</affiliation></author>
       <author><first>Zhengyuan</first><last>Liu</last><affiliation>I2R</affiliation></author>
-      <author><first>AiTi</first><last>Aw</last><affiliation>I2R</affiliation></author>
-      <author><first>Nancy F.</first><last>Chen</last></author>
+      <author id="aiti-aw"><first>AiTi</first><last>Aw</last><affiliation>I2R</affiliation></author>
+      <author id="nancy-chen"><first>Nancy F.</first><last>Chen</last></author>
       <pages>4297-4316</pages>
       <abstract>We introduce AudioBench, a universal benchmark designed to evaluate Audio Large Language Models (AudioLLMs). It encompasses 8 distinct tasks and 26 datasets, among which, 7 are newly proposed datasets. The evaluation targets three main aspects: speech understanding, audio scene understanding, and voice understanding (paralinguistic). Despite recent advancements, there lacks a comprehensive benchmark for AudioLLMs on instruction following capabilities conditioned on audio signals. AudioBench addresses this gap by setting up datasets as well as desired evaluation metrics. Besides, we also evaluated the capabilities of five popular models and found that no single model excels consistently across all tasks. We outline the research outlook for AudioLLMs and anticipate that our open-sourced evaluation toolkit, data, and leaderboard will offer a robust testbed for future model developments.</abstract>
       <url hash="e35b34e5">2025.naacl-long.218</url>
@@ -3064,7 +3064,7 @@
       <author><first>Zhangchen</first><last>Xu</last></author>
       <author><first>Fengqing</first><last>Jiang</last><affiliation>University of Washington</affiliation></author>
       <author><first>Luyao</first><last>Niu</last><affiliation>University of Washington</affiliation></author>
-      <author><first>Bill Yuchen</first><last>Lin</last><affiliation>xAI and University of Washington</affiliation></author>
+      <author id="bill-yuchen-lin"><first>Bill Yuchen</first><last>Lin</last><affiliation>xAI and University of Washington</affiliation></author>
       <author><first>Radha</first><last>Poovendran</last><affiliation>University of Washington, Seattle</affiliation></author>
       <pages>4392-4405</pages>
       <abstract>Instruction tuning has been widely adopted to ensure large language models (LLMs) follow user instructions and engage with users meaningfully. The resulting instruction-following capabilities of LLMs heavily rely on the instruction datasets used for tuning. Recently, synthetic instruction datasets have emerged as an economically viable solution to provide LLMs diverse and high-quality instructions. However, existing approaches typically assume that larger or stronger models are stronger teachers for instruction tuning, and hence simply adopt larger models as response generators to the synthetic instructions. In this paper, we challenge this commonly-adopted assumption. Our extensive experiments across five base models and twenty response generators reveal that larger and stronger models are not necessarily stronger teachers of smaller models. We refer to this phenomenon as the Larger Models’ Paradox. We observe that existing metrics cannot precisely predict the effectiveness of response generators since they ignore the compatibility between teachers and base models being fine-tuned. We thus develop a novel metric, named as Compatibility-Adjusted Reward (CAR) to measure the effectiveness of response generators. Our experiments across five base models demonstrate that CAR outperforms almost all baselines.</abstract>
@@ -3219,7 +3219,7 @@
       <author><first>Vipul</first><last>Gupta</last><affiliation>Pennsylvania State University</affiliation></author>
       <author><first>Candace</first><last>Ross</last><affiliation>Meta</affiliation></author>
       <author><first>David</first><last>Pantoja</last></author>
-      <author><first>Rebecca J.</first><last>Passonneau</last><affiliation>Pennsylvania State University</affiliation></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca J.</first><last>Passonneau</last><affiliation>Pennsylvania State University</affiliation></author>
       <author><first>Megan</first><last>Ung</last><affiliation>Facebook AI Research</affiliation></author>
       <author><first>Adina</first><last>Williams</last><affiliation>FAIR (Meta Platforms Inc.)</affiliation></author>
       <pages>4595-4615</pages>
@@ -3337,7 +3337,7 @@
     <paper id="244">
       <title><fixed-case>R</fixed-case>each<fixed-case>A</fixed-case>gent: Enhancing Mobile Agent via Page Reaching and Operation</title>
       <author><first>Qinzhuo</first><last>Wu</last></author>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last></author>
+      <author><first>Wei</first><last>Liu</last></author>
       <author><first>Jian</first><last>Luan</last><affiliation>Xiaomi Corporation</affiliation></author>
       <author><first>Bin</first><last>Wang</last><affiliation>AI Lab, Xiaomi Inc.</affiliation></author>
       <pages>4760-4775</pages>
@@ -3393,7 +3393,7 @@
       <author><first>Wei</first><last>Ju</last><affiliation>Sichuan University</affiliation></author>
       <author><first>Luchen</first><last>Liu</last></author>
       <author><first>Tianyu</first><last>Liu</last></author>
-      <author><first>Baobao</first><last>Chang</last><affiliation>Peking University</affiliation></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last><affiliation>Peking University</affiliation></author>
       <author><first>Ming</first><last>Zhang</last><affiliation>Peking University</affiliation></author>
       <pages>4805-4822</pages>
       <abstract>Large Multimodal Models (LMMs) exhibit impressive cross-modal understanding and reasoning abilities, often assessed through multiple-choice questions (MCQs) that include an image, a question, and several options. However, many benchmarks used for such evaluations suffer from systematic biases. Remarkably, Large Language Models (LLMs) without any visual perception capabilities achieve non-trivial performance, undermining the credibility of these evaluations. To address this issue while maintaining the efficiency of MCQ evaluations, we propose MMEVALPRO, a benchmark designed to avoid Type-I errors through a trilogy evaluation pipeline and more rigorous metrics. For each original question from existing benchmarks, human annotators augment it by creating one perception question and one knowledge anchor question through a meticulous annotation process. MMEVALPRO comprises 2,138 question triplets, totaling 6,414 distinct questions. Two-thirds of these questions are manually labeled by human experts, while the rest are sourced from existing benchmarks (MMMU, ScienceQA, and MathVista). Compared with the existing benchmarks, our experiments with the latest LLMs and LMMs demonstrate that MMEVALPRO is **more challenging** (the best LMM lags behind human performance by 31.73%, compared to an average gap of 8.03% in previous benchmarks) and **more trustworthy** (the best LLM trails the best LMM by 23.09%, whereas the gap for previous benchmarks is just 14.64%). Our in-depth analysis explains the reason for the large performance gap and justifies the trustworthiness of evaluation, underscoring its significant potential for advancing future research.</abstract>
@@ -3417,7 +3417,7 @@
     <paper id="249">
       <title>Analyzing (In)Abilities of <fixed-case>SAE</fixed-case>s via Formal Languages</title>
       <author><first>Abhinav</first><last>Menon</last></author>
-      <author><first>Manish</first><last>Shrivastava</last><affiliation>International Institute of Information Technology Hyderabad, India</affiliation></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last><affiliation>International Institute of Information Technology Hyderabad, India</affiliation></author>
       <author><first>David</first><last>Krueger</last></author>
       <author><first>Ekdeep Singh</first><last>Lubana</last><affiliation>Harvard University, Harvard University</affiliation></author>
       <pages>4837-4862</pages>
@@ -3536,7 +3536,7 @@
       <author><first>Rong</first><last>Ye</last><affiliation>ByteDance</affiliation></author>
       <author><first>Lei</first><last>Chen</last></author>
       <author><first>Haoyu</first><last>Kuang</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <author><first>Zhongyu</first><last>Wei</last><affiliation>Fudan University</affiliation></author>
       <pages>4975-5001</pages>
       <abstract>Large language models (LLMs) are increasingly leveraged to empower autonomous agents to simulate human beings in various fields of behavioral research. However, evaluating their capacity to navigate complex social interactions remains a challenge. Previous studies face limitations due to insufficient scenario diversity, complexity, and a single-perspective focus. To this end, we introduce AgentSense: Benchmarking Social Intelligence of Language Agents through Interactive Scenarios. Drawing on Dramaturgical Theory, AgentSense employs a bottom-up approach to create 1,225 diverse social scenarios constructed from extensive scripts. We evaluate LLM-driven agents through multi-turn interactions, emphasizing both goal completion and implicit reasoning. We analyze goals using ERG theory and conduct comprehensive experiments. Our findings highlight that LLMs struggle with goals in complex social scenarios, especially high-level growth needs, and even GPT-4o requires improvement in private information reasoning.</abstract>
@@ -3651,7 +3651,7 @@
       <author><first>Aryo Pradipta</first><last>Gema</last><affiliation>Anthropic and University of Edinburgh, University of Edinburgh</affiliation></author>
       <author><first>Hongru</first><last>Wang</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <author><first>Xuanli</first><last>He</last><affiliation>University College London, University of London</affiliation></author>
-      <author><first>Kam-Fai</first><last>Wong</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <author><first>Pasquale</first><last>Minervini</last><affiliation>University of Edinburgh, University of Edinburgh</affiliation></author>
       <pages>5117-5136</pages>
       <abstract>Large language models (LLMs) can store a significant amount of factual knowledge in their parameters. However, their parametric knowledge may conflict with the information provided in the context—this phenomenon, known as <i>context-memory knowledge conflicts</i>, can lead to undesirable model behaviour, such as reliance on outdated or incorrect information. Analysing the internal activations of LLMs, we find that they can internally register the signals of knowledge conflict at mid-layers. Such signals allow us to detect whether a knowledge conflict occurs and use <i>inference-time</i> intervention strategies to resolve it. In this work, we propose SpARE, a <i>training-free</i> representation engineering method that uses pre-trained sparse auto-encoders (SAEs) to control the knowledge selection behaviour of LLMs. SpARE identifies the functional features that control the knowledge selection behaviours and applies them to edit the internal activations of LLMs at inference time. Our experimental results show that SpARE can effectively control the usage of either knowledge source to resolve knowledge conflict in open-domain question-answering tasks, surpassing existing representation engineering methods (+10%) as well as contrastive decoding methods (+15%).</abstract>
@@ -3661,7 +3661,7 @@
     </paper>
     <paper id="265">
       <title><fixed-case>M</fixed-case>o<fixed-case>D</fixed-case>ification: Mixture of Depths Made Easy</title>
-      <author id="chen-zhang"><first>Chen</first><last>Zhang</last><affiliation>Beijing Institute of Technology</affiliation></author>
+      <author><first>Chen</first><last>Zhang</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <author><first>Meizhi</first><last>Zhong</last></author>
       <author><first>Qimeng</first><last>Wang</last><affiliation>Xiaohongshu</affiliation></author>
       <author><first>Xuantao</first><last>Lu</last></author>
@@ -3760,7 +3760,7 @@
       <author><first>An</first><last>Zhang</last><affiliation>National University of Singapore</affiliation></author>
       <author><first>Yang</first><last>Deng</last><affiliation>Singapore Management University</affiliation></author>
       <author><first>Xiang</first><last>Wang</last><affiliation>University of Science and Technology of China</affiliation></author>
-      <author><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last><affiliation>National University of Singapore</affiliation></author>
       <pages>5259-5276</pages>
       <abstract>Open-domain dialogue systems have seen remarkable advancements with the development of large language models (LLMs). Nonetheless, most existing dialogue systems predominantly focus on brief single-session interactions, neglecting the real-world demands for long-term companionship and personalized interactions with chatbots. Crucial to addressing this real-world need are event summary and persona management, which enable reasoning for appropriate long-term dialogue responses. Recent progress in the human-like cognitive and reasoning capabilities of LLMs suggests that LLM-based agents could significantly enhance automated perception, decision-making, and problem-solving. In response to this potential, we introduce a model-agnostic framework, the Long-term Dialogue Agent (LD-Agent), which incorporates three independently tunable modules dedicated to event perception, persona extraction, and response generation. For the event memory module, long and short-term memory banks are employed to separately focus on historical and ongoing sessions, while a topic-based retrieval mechanism is introduced to enhance the accuracy of memory retrieval. Furthermore, the persona module conducts dynamic persona modeling for both users and agents. The integration of retrieved memories and extracted personas is subsequently fed into the generator to induce appropriate responses. The effectiveness, generality, and cross-domain capabilities of LD-Agent are empirically demonstrated across various illustrative benchmarks, models, and tasks. The code is released at https://github.com/leolee99/LD-Agent.</abstract>
       <url hash="752537ca">2025.naacl-long.272</url>
@@ -3830,7 +3830,7 @@
     </paper>
     <paper id="278">
       <title>Token-based Decision Criteria Are Suboptimal in In-context Learning</title>
-      <author><first>Hakaze</first><last>Cho</last></author>
+      <author id="hakaze-cho"><first>Hakaze</first><last>Cho</last></author>
       <author><first>Yoshihiro</first><last>Sakai</last><affiliation>Japan Advanced Institute of Science and Technology</affiliation></author>
       <author><first>Mariko</first><last>Kato</last></author>
       <author><first>Kenshiro</first><last>Tanaka</last></author>
@@ -3858,7 +3858,7 @@
       <title>Multilingual Machine Translation with Open Large Language Models at Practical Scale: An Empirical Study</title>
       <author><first>Menglong</first><last>Cui</last><affiliation>Xiaomi Corporation</affiliation></author>
       <author><first>Pengzhi</first><last>Gao</last><affiliation>Xiaomi Corporation</affiliation></author>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last></author>
+      <author><first>Wei</first><last>Liu</last></author>
       <author><first>Jian</first><last>Luan</last><affiliation>Xiaomi Corporation</affiliation></author>
       <author><first>Bin</first><last>Wang</last><affiliation>AI Lab, Xiaomi Inc.</affiliation></author>
       <pages>5420-5443</pages>
@@ -3881,7 +3881,7 @@
     <paper id="282">
       <title>Evaluating Evidence Attribution in Generated Fact Checking Explanations</title>
       <author><first>Rui</first><last>Xing</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and University of Melbourne</affiliation></author>
-      <author><first>Timothy</first><last>Baldwin</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and The University of Melbourne</affiliation></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and The University of Melbourne</affiliation></author>
       <author><first>Jey Han</first><last>Lau</last><affiliation>The University of Melbourne</affiliation></author>
       <pages>5475-5496</pages>
       <abstract>Automated fact-checking systems often struggle with trustworthiness, as their generated explanations can include hallucinations. In this work, we explore evidence attribution for fact-checking explanation generation. We introduce a novel evaluation protocol, citation masking and recovery, to assess attribution quality in generated explanations. We implement our protocol using both human annotators and automatic annotators and found that LLM annotation correlates with human annotation, suggesting that attribution assessment can be automated. Finally, our experiments reveal that: (1) the best-performing LLMs still generate explanations that are not always accurate in their attribution; and (2) human-curated evidence is essential for generating better explanations.</abstract>
@@ -3909,7 +3909,7 @@
       <author><first>Georgios</first><last>Chochlakis</last><affiliation>University of Southern California</affiliation></author>
       <author><first>Alexandros</first><last>Potamianos</last><affiliation>Amazon, University of Southern California and National Technical University of Athens</affiliation></author>
       <author><first>Kristina</first><last>Lerman</last><affiliation>University of Southern California and USC Information Sciences Institute</affiliation></author>
-      <author><first>Shrikanth</first><last>Narayanan</last><affiliation>University of Southern California</affiliation></author>
+      <author id="shrikanth-narayanan"><first>Shrikanth</first><last>Narayanan</last><affiliation>University of Southern California</affiliation></author>
       <pages>5513-5528</pages>
       <abstract>In-context Learning (ICL) has become the primary method for performing natural language tasks with Large Language Models (LLMs). The knowledge acquired during pre-training is crucial for this few-shot capability, providing the model with task priors. However, recent studies have shown that ICL predominantly relies on retrieving task priors rather than “learning” to perform tasks. This limitation is particularly evident in complex subjective domains such as emotion and morality, where priors significantly influence posterior predictions. In this work, we examine whether this is the result of the aggregation used in corresponding datasets, where trying to combine low-agreement, disparate annotations might lead to annotation artifacts that create detrimental noise in the prompt. Moreover, we evaluate the posterior bias towards certain annotators by grounding our study in appropriate, quantitative measures of LLM priors. Our results indicate that aggregation is a confounding factor in the modeling of subjective tasks, and advocate focusing on modeling individuals instead. However, aggregation does not explain the entire gap between ICL and the state of the art, meaning other factors in such tasks also account for the observed phenomena. Finally, by rigorously studying annotator-level labels, we find that it is possible for minority annotators to both better align with LLMs and have their perspectives further amplified.</abstract>
       <url hash="073a7724">2025.naacl-long.284</url>
@@ -3921,8 +3921,8 @@
       <author><first>Yasser</first><last>Ashraf</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <author><first>Yuxia</first><last>Wang</last></author>
       <author><first>Bin</first><last>Gu</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
-      <author><first>Timothy</first><last>Baldwin</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and The University of Melbourne</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and The University of Melbourne</affiliation></author>
       <pages>5529-5546</pages>
       <abstract>The growing use of large language models (LLMs) has raised concerns regarding their safety. While many studies have focused on English, the safety of LLMs in Arabic, with its linguistic and cultural complexities, remains under-explored. Here, we aim to bridge this gap. In particular, we present an Arab-region-specific safety evaluation dataset consisting of 5,799 questions, including direct attacks, indirect attacks, and harmless requests with sensitive words, adapted to reflect the socio-cultural context of the Arab world. To uncover the impact of different stances in handling sensitive and controversial topics, we propose a dual-perspective evaluation framework. It assesses the LLM responses from both governmental and opposition viewpoints. Experiments over five leading Arabic-centric and multilingual LLMs reveal substantial disparities in their safety performance. This reinforces the need for culturally specific datasets to ensure the responsible deployment of LLMs.</abstract>
       <url hash="2eec3236">2025.naacl-long.285</url>
@@ -4098,7 +4098,7 @@
     </paper>
     <paper id="298">
       <title><fixed-case>V</fixed-case>oice<fixed-case>T</fixed-case>ext<fixed-case>B</fixed-case>lender: Augmenting Large Language Models with Speech Capabilities via Single-Stage Joint Speech-Text Supervised Fine-Tuning</title>
-      <author><first>Yifan</first><last>Peng</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="yifan-peng-cmu"><first>Yifan</first><last>Peng</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Krishna C</first><last>Puvvada</last><affiliation>NVIDIA</affiliation></author>
       <author><first>Zhehuai</first><last>Chen</last></author>
       <author><first>Piotr</first><last>Zelasko</last><affiliation>NVIDIA</affiliation></author>
@@ -4120,7 +4120,7 @@
       <author><first>Haishan</first><last>Gao</last></author>
       <author><first>Sarah Li</first><last>Chen</last></author>
       <author><first>Dan</first><last>Edelstein</last><affiliation>Stanford University</affiliation></author>
-      <author><first>Dan</first><last>Jurafsky</last><affiliation>Stanford University</affiliation></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last><affiliation>Stanford University</affiliation></author>
       <author><first>Chen</first><last>Shani</last></author>
       <pages>5803-5817</pages>
       <abstract>Word similarity has many applications to social science and cultural analytics tasks like measuring meaning change over time and making sense of contested terms. Yet traditional similarity methods based on cosine similarity between word embeddings cannot capture the context-dependent, asymmetrical, polysemous nature of semantic similarity. We propose a new measure of similarity, Word Confusion, that reframes semantic similarity in terms of feature-based classification confusion. Word Confusion is inspired by Tversky (1977)’s suggestion that similarity features be chosen dynamically. Here we train a classifier to map contextual embeddings to word identities and use the classifier confusion (the probability of choosing a confounding word c instead of the correct target word t) as a measure of the similarity of c and t. The set of potential confounding words acts as the chosen features. Our method is comparable to cosine similarity in matching human similarity judgments across several datasets (MEN, WirdSim353, and SimLex), and can measure similarity using predetermined features of interest. We demonstrate our model’s ability to make use of dynamic features by applying it to test a hypothesis about changes in the 18th C. meaning of the French word “révolution” from popular to state action during the French Revolution. We hope this reimagining of semantic similarity will inspire the development of new tools that better capture the multi-faceted and dynamic nature of language, advancing the fields of computational social science and cultural analytics and beyond.</abstract>
@@ -4143,7 +4143,7 @@
       <title>Do <fixed-case>RAG</fixed-case> Systems Cover What Matters? Evaluating and Optimizing Responses with Sub-Question Coverage</title>
       <author><first>Kaige</first><last>Xie</last><affiliation>Georgia Institute of Technology</affiliation></author>
       <author><first>Philippe</first><last>Laban</last><affiliation>Microsoft</affiliation></author>
-      <author><first>Prafulla Kumar</first><last>Choubey</last><affiliation>SalesForce.com</affiliation></author>
+      <author id="prafulla-kumar-choubey"><first>Prafulla Kumar</first><last>Choubey</last><affiliation>SalesForce.com</affiliation></author>
       <author><first>Caiming</first><last>Xiong</last><affiliation>Salesforce Research</affiliation></author>
       <author><first>Chien-Sheng</first><last>Wu</last><affiliation>Salesforce AI</affiliation></author>
       <pages>5836-5849</pages>
@@ -4193,7 +4193,7 @@
       <author><first>Jamin</first><last>Shin</last><affiliation>NAVER</affiliation></author>
       <author><first>Joel</first><last>Jang</last></author>
       <author><first>Seonghyeon</first><last>Ye</last></author>
-      <author><first>Bill Yuchen</first><last>Lin</last><affiliation>xAI and University of Washington</affiliation></author>
+      <author id="bill-yuchen-lin"><first>Bill Yuchen</first><last>Lin</last><affiliation>xAI and University of Washington</affiliation></author>
       <author><first>Sean</first><last>Welleck</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Graham</first><last>Neubig</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Moontae</first><last>Lee</last><affiliation>LG Corporation and University of Illinois, Chicago</affiliation></author>
@@ -4227,7 +4227,7 @@
     <paper id="305">
       <title>Uncovering Bias in Large Vision-Language Models at Scale with Counterfactuals</title>
       <author><first>Phillip</first><last>Howard</last><affiliation>Intel</affiliation></author>
-      <author><first>Kathleen C.</first><last>Fraser</last><affiliation>National Research Council Canada</affiliation></author>
+      <author id="kathleen-c-fraser"><first>Kathleen C.</first><last>Fraser</last><affiliation>National Research Council Canada</affiliation></author>
       <author><first>Anahita</first><last>Bhiwandiwalla</last></author>
       <author><first>Svetlana</first><last>Kiritchenko</last><affiliation>National Research Council Canada</affiliation></author>
       <pages>5946-5991</pages>
@@ -4282,7 +4282,7 @@
       <author><first>Nasser</first><last>Zalmout</last><affiliation>Amazon</affiliation></author>
       <author><first>Priyanka</first><last>Nigam</last></author>
       <author><first>Bing</first><last>Yin</last><affiliation>Amazon</affiliation></author>
-      <author><first>Chao</first><last>Zhang</last><affiliation>Georgia Institute of Technology</affiliation></author>
+      <author id="chao-zhang-tu"><first>Chao</first><last>Zhang</last><affiliation>Georgia Institute of Technology</affiliation></author>
       <pages>6041-6068</pages>
       <abstract>Due to the scarcity of agent-oriented pre-training data, LLM-based autonomous agents typically rely on complex prompting or extensive fine-tuning, which often fails to introduce new capabilities while preserving strong generalizability. We introduce Hephaestus-Forge, the first large-scale pre-training corpus designed to enhance the fundamental capabilities of LLM agents in API function calling, intrinsic reasoning and planning, and adapting to environmental feedback. Hephaestus-Forge comprises 103B agent-specific data encompassing 76,537 APIs, including both tool documentation to introduce knowledge of API functions and function calling trajectories to strengthen intrinsic reasoning. To explore effective training protocols, we investigate scaling laws to identify the optimal recipe in data mixing ratios. By continual pre-training on Hephaestus-Forge, Hephaestus outperforms small- to medium-scale open-source LLMs and rivals commercial LLMs on three agent benchmarks, demonstrating the effectiveness of our pre-training corpus in enhancing fundamental agentic capabilities and generalization of LLMs to new tasks or environments.</abstract>
       <url hash="18e90239">2025.naacl-long.308</url>
@@ -4497,7 +4497,7 @@
       <title>A Cognitive Evaluation Benchmark of Image Reasoning and Description for Large Vision-Language Models</title>
       <author><first>Xiujie</first><last>Song</last></author>
       <author><first>Mengyue</first><last>Wu</last><affiliation>Shanghai Jiaotong University</affiliation></author>
-      <author><first>Kenny Q.</first><last>Zhu</last><affiliation>University of Texas at Arlington</affiliation></author>
+      <author id="kenny-zhu"><first>Kenny Q.</first><last>Zhu</last><affiliation>University of Texas at Arlington</affiliation></author>
       <author><first>Chunhao</first><last>Zhang</last></author>
       <author><first>Yanyi</first><last>Chen</last></author>
       <pages>6392-6409</pages>
@@ -4512,7 +4512,7 @@
       <author><first>Jaehyung</first><last>Seo</last></author>
       <author><first>Jaewook</first><last>Lee</last><affiliation>Korea University</affiliation></author>
       <author><first>Chanjun</first><last>Park</last><affiliation>Korea University</affiliation></author>
-      <author><first>Heuiseok</first><last>Lim</last></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last></author>
       <pages>6410-6422</pages>
       <abstract>Large language models (LLMs) often retain outdated or incorrect information from pre-training, which undermines their reliability. While model editing methods have been developed to address such errors without full re-training, they frequently suffer from knowledge conflicts, where outdated information interferes with new knowledge. In this work, we propose Conflict-free Model Editing (CoME), a novel framework that enhances the accuracy of knowledge updates in LLMs by selectively removing outdated knowledge. CoME leverages unlearning to mitigate knowledge interference, allowing new information to be integrated without compromising relevant linguistic features. Through experiments on GPT-J and LLaMA-3 using Counterfact and ZsRE datasets, we demonstrate that CoME improves both editing accuracy and model reliability when applied to existing editing methods. Our results highlight that the targeted removal of outdated knowledge is crucial for enhancing model editing effectiveness and maintaining the model’s generative performance.</abstract>
       <url hash="420e7e50">2025.naacl-long.325</url>
@@ -4582,7 +4582,7 @@
       <author><first>Cunxiang</first><last>Wang</last></author>
       <author><first>Huimin</first><last>Wang</last><affiliation>Jarvis Research Center, Tencent YouTu Lab</affiliation></author>
       <author><first>Guanhua</first><last>Chen</last><affiliation>Southern University of Science and Technology</affiliation></author>
-      <author><first>Kam-Fai</first><last>Wong</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <pages>6510-6525</pages>
       <abstract>Previous research has typically concentrated on leveraging the internal knowledge of Large Language Models (LLMs) to answer known questions (i.e., internal reasoning such as generate-then-read). In contrast, for questions that fall outside their known scope, these models rely on external knowledge retrieval to provide accurate responses (i.e., external acting such as retrieve-then-read). However, few previous works consider the <i>compositional questions</i>, which consist of several known and unknown sub-questions, necessitating the dynamic combination of previous two methods (i.e., <i>internal reasoning and external acting</i>) to achieve a better trade-off between effectiveness and efficiency. To this end, we introduce a <b>Self</b> <b>D</b>ivide-and-<b>C</b>onquer (<i>Self-DC</i>) framework, accompanying with the first <b>C</b>ompositional <b>u</b>nknown <b>Q</b>uestion-<b>A</b>nswering dataset (CuQA). This framework enables LLMs to adaptively choose between using internal knowledge and retrieving external knowledge as needed, resulting in a better trade-off between effectiveness and efficiency. Experimental results on two datasets demonstrate that <i>Self-DC</i> can achieve comparable or even better performance with much fewer external calls compared with several strong baselines.</abstract>
       <url hash="b75ae84c">2025.naacl-long.331</url>
@@ -4697,7 +4697,7 @@
       <author><first>Arash</first><last>Yousefi Jordehi</last><affiliation>University of Guilan</affiliation></author>
       <author><first>Mahsa</first><last>Hosseini Khasheh Heyran</last><affiliation>University of Guilan</affiliation></author>
       <author><first>Seyed Abolghasem</first><last>Mirroshandel</last><affiliation>University of Guilan</affiliation></author>
-      <author><first>Owen</first><last>Rambow</last><affiliation>Stony Brook University</affiliation></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last><affiliation>Stony Brook University</affiliation></author>
       <author><first>Cornelia</first><last>Caragea</last><affiliation>University of Illinois at Chicago</affiliation></author>
       <pages>6677-6694</pages>
       <abstract>The rise of Large Language Models (LLMs) has boosted the use of Few-Shot Learning (FSL) methods in natural language processing, achieving acceptable performance even when working with limited training data. The goal of FSL is to effectively utilize a small number of annotated samples in the learning process. However, the performance of FSL suffers when unsuitable support samples are chosen. This problem arises due to the heavy reliance on a limited number of support samples, which hampers consistent performance improvement even when more support samples are added. To address this challenge, we propose an active learning-based instance selection mechanism that identifies effective support instances from the unlabeled pool and can work with different LLMs. Our experiments on five tasks show that our method frequently improves the performance of FSL. We make our implementation available on GitHub.</abstract>
@@ -4714,7 +4714,7 @@
       <author><first>Khoi M.</first><last>Le</last></author>
       <author><first>Nguyen Viet</first><last>Anh</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Feng</first><last>Yichao</last></author>
-      <author><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
       <pages>6695-6708</pages>
       <abstract>Previous research on multimodal entity linking (MEL) has primarily employed contrastive learning as the primary objective. However, using the rest of the batch as negative samples without careful consideration, these studies risk leveraging easy features and potentially overlook essential details that make entities unique. In this work, we propose JD-CCL (Jaccard Distance-based Conditional Contrastive Learning), a novel approach designed to enhance the ability to match multimodal entity linking models. JD-CCL leverages meta-information to select negative samples with similar attributes, making the linking task more challenging and robust. Additionally, to address the limitations caused by the variations within the visual modality among mentions and entities, we introduce a novel method, CVaCPT (Contextual Visual-aid Controllable Patch Transform). It enhances visual representations by incorporating multi-view synthetic images and contextual textual representations to scale and shift patch representations. Experimental results on benchmark MEL datasets demonstrate the strong effectiveness of our approach.</abstract>
       <url hash="a22c4bda">2025.naacl-long.341</url>
@@ -4725,7 +4725,7 @@
       <title><fixed-case>R</fixed-case>esearch<fixed-case>A</fixed-case>gent: Iterative Research Idea Generation over Scientific Literature with Large Language Models</title>
       <author><first>Jinheon</first><last>Baek</last><affiliation>Korea Advanced Institute of Science &amp; Technology</affiliation></author>
       <author><first>Sujay Kumar</first><last>Jauhar</last><affiliation>Microsoft Research</affiliation></author>
-      <author><first>Silviu</first><last>Cucerzan</last><affiliation>Microsoft</affiliation></author>
+      <author id="silviu-cucerzan"><first>Silviu</first><last>Cucerzan</last><affiliation>Microsoft</affiliation></author>
       <author><first>Sung Ju</first><last>Hwang</last><affiliation>Korea Advanced Institute of Science and Technology and AITRICS</affiliation></author>
       <pages>6709-6738</pages>
       <abstract>The pace of scientific research, vital for improving human life, is complex, slow, and needs specialized expertise. Meanwhile, novel, impactful research often stems from both a deep understanding of prior work, and a cross-pollination of ideas across domains and fields. To enhance the productivity of researchers, we propose ResearchAgent, which leverages the encyclopedic knowledge and linguistic reasoning capabilities of Large Language Models (LLMs) to assist them in their work. This system automatically defines novel problems, proposes methods and designs experiments, while iteratively refining them based on the feedback from collaborative LLM-powered reviewing agents. Specifically, starting with a core scientific paper, ResearchAgent is augmented not only with relevant publications by connecting information over an academic graph but also entities retrieved from a knowledge store derived from shared underlying concepts mined across numerous papers. Then, mimicking a scientific approach to improving ideas with peer discussions, we leverage multiple LLM-based ReviewingAgents that provide reviews and feedback via iterative revision processes. These reviewing agents are instantiated with human preference-aligned LLMs whose criteria for evaluation are elicited from actual human judgments via LLM prompting. We experimentally validate our ResearchAgent on scientific publications across multiple disciplines, showing its effectiveness in generating novel, clear, and valid ideas based on both human and model-based evaluation results. Our initial foray into AI-mediated scientific research has important implications for the development of future systems aimed at supporting researchers in their ideation and operationalization of novel work.</abstract>
@@ -4793,7 +4793,7 @@
     <paper id="348">
       <title><fixed-case>L</fixed-case>ib<fixed-case>E</fixed-case>volution<fixed-case>E</fixed-case>val: A Benchmark and Study for Version-Specific Code Generation</title>
       <author><first>Sachit</first><last>Kuhar</last><affiliation>Amazon</affiliation></author>
-      <author><first>Wasi Uddin</first><last>Ahmad</last><affiliation>NVIDIA</affiliation></author>
+      <author id="wasi-ahmad"><first>Wasi Uddin</first><last>Ahmad</last><affiliation>NVIDIA</affiliation></author>
       <author><first>Zijian</first><last>Wang</last><affiliation>Amazon AWS AI Labs</affiliation></author>
       <author><first>Nihal</first><last>Jain</last><affiliation>Amazon</affiliation></author>
       <author><first>Haifeng</first><last>Qian</last><affiliation>NVIDIA</affiliation></author>
@@ -4828,7 +4828,7 @@
       <author><first>Shaoyang</first><last>Xu</last></author>
       <author><first>Yongqi</first><last>Leng</last></author>
       <author><first>Linhao</first><last>Yu</last></author>
-      <author><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
       <pages>6859-6877</pages>
       <abstract>As large language models (LLMs) become increasingly accessible in many countries, it is essential to align them to serve pluralistic human values across cultures. However, pluralistic culture alignment in LLMs remain an open problem. In this paper, we propose CultureSPA, a Self-Pluralising Culture Alignment framework that allows LLMs to simultaneously align to pluralistic cultures. The framework first generates questions on various culture topics, then yields LLM outputs in response to these generated questions under both culture-aware and culture-unaware settings. By comparing culture-aware/unaware outputs, we are able to detect and collect culture-related instances. These instances are employed to fine-tune LLMs to serve pluralistic cultures in either a culture-joint or culture-specific way. Extensive experiments demonstrate that CultureSPA significantly improves the alignment of LLMs to diverse cultures without compromising general abilities. And further improvements can be achieved if CultureSPA is combined with advanced prompt engineering techniques. Comparisons between culture-joint and culture-specific tuning strategies, along with variations in data quality and quantity, illustrate the robustness of our method. We also explore the mechanisms underlying CultureSPA and the relations between different cultures it reflects.</abstract>
       <url hash="8165a539">2025.naacl-long.350</url>
@@ -4847,7 +4847,7 @@
     <paper id="352">
       <title><fixed-case>D</fixed-case>raw<fixed-case>E</fixed-case>du<fixed-case>M</fixed-case>ath: Evaluating Vision Language Models with Expert-Annotated Students’ Hand-Drawn Math Images</title>
       <author><first>Sami</first><last>Baral</last><affiliation>Worcester Polytechnic Institute</affiliation></author>
-      <author><first>Li</first><last>Lucy</last><affiliation>University of California Berkeley</affiliation></author>
+      <author id="li-lucy"><first>Li</first><last>Lucy</last><affiliation>University of California Berkeley</affiliation></author>
       <author><first>Ryan</first><last>Knight</last><affiliation>Insource Services, Inc</affiliation></author>
       <author><first>Alice</first><last>Ng</last></author>
       <author><first>Luca</first><last>Soldaini</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
@@ -4975,7 +4975,7 @@
       <author><first>Junyi</first><last>Li</last></author>
       <author><first>Ruiyang</first><last>Ren</last></author>
       <author><first>Shijie</first><last>Wang</last></author>
-      <author><first>Xin</first><last>Zhao</last><affiliation>Renmin University of China</affiliation></author>
+      <author id="wayne-xin-zhao"><first>Xin</first><last>Zhao</last><affiliation>Renmin University of China</affiliation></author>
       <author><first>Yang</first><last>Song</last><affiliation>BOSS Zhipin</affiliation></author>
       <author><first>Tao</first><last>Zhang</last></author>
       <pages>7064-7074</pages>
@@ -4993,7 +4993,7 @@
       <author><first>Hui</first><last>Wang</last></author>
       <author><first>Xi</first><last>Zeng</last></author>
       <author><first>Xingwei</first><last>Liang</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last><affiliation>The Chinese University of Hong Kong</affiliation></author>
       <author><first>Ruifeng</first><last>Xu</last><affiliation>Harbin Institute of Technology</affiliation></author>
       <pages>7075-7092</pages>
       <abstract>Stance detection is critical for understanding the underlying position or attitude expressed toward a topic. Large language models (LLMs) have demonstrated significant advancements across various natural language processing tasks including stance detection, however, their performance in stance detection is limited by biases and spurious correlations inherent due to their data-driven nature. Our statistical experiment reveals that LLMs are prone to generate biased stances due to sentiment-stance spurious correlations and preference towards certain individuals and topics. Furthermore, the results demonstrate a strong negative correlation between stance bias and stance detection performance, underscoring the importance of mitigating bias to enhance the utility of LLMs in stance detection. Therefore, in this paper, we propose a Counterfactual Augmented Calibration Network (FACTUAL), which a novel calibration network is devised to calibrate potential bias in the stance prediction of LLMs. Further, to address the challenge of effectively learning bias representations and the difficulty in the generalizability of debiasing, we construct counterfactual augmented data. This approach enhances the calibration network, facilitating the debiasing and out-of-domain generalization. Experimental results on in-target and zero-shot stance detection tasks show that the proposed FACTUAL can effectively mitigate biases of LLMs, achieving state-of-the-art results.</abstract>
@@ -5022,7 +5022,7 @@
       <author><first>Feifei</first><last>Zhai</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
       <author><first>Nanchang</first><last>Cheng</last></author>
       <author><first>Yu</first><last>Zhou</last><affiliation>Institute of Automation, Chinese Academy of Sciences</affiliation></author>
-      <author><first>Chengqing</first><last>Zong</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
       <pages>7116-7131</pages>
       <abstract>Simultaneous Machine Translation (SiMT) generates target translation before receiving the whole source sentence and faces a serious hallucination problem. In contrast, traditional offline machine translation (OMT) models exhibit significantly fewer hallucinations. Motivated by this disparity, we propose Knowledge Distillation for SiMT (KD-SiMT), a simple yet effective method that utilizes the OMT model to mitigate hallucinations in SiMT. Experiments on Zh<tex-math>\rightarrow</tex-math>En and De<tex-math>\rightarrow</tex-math>En tasks demonstrate that KD-SiMT effectively reduces hallucinations and enhances the SiMT performance. Furthermore, we systematically investigate the deficiencies in SiMT models related to serious hallucinations and the effect of KD-SiMT. Specifically, we design targeted tasks and metrics to quantitatively evaluate the components in SiMT models from the perspectives of model structure and knowledge acquisition. Our analyses reveal that inaccurate source representations and imbalanced cross-attention are more likely to occur in SiMT models when generating hallucinations, while KD-SiMT alleviates these issues. Besides, we find that KD-SiMT equips SiMT models with sufficient faithfulness knowledge in training, thus reducing hallucinations.</abstract>
       <url hash="2d3c1d7c">2025.naacl-long.364</url>
@@ -5093,7 +5093,7 @@
       <author><first>Tao</first><last>Ge</last><affiliation>Tencent AI Lab</affiliation></author>
       <author><first>Xun</first><last>Wang</last><affiliation>Microsoft</affiliation></author>
       <author><first>Yan</first><last>Xia</last><affiliation>Research, Microsoft</affiliation></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <author><first>Furu</first><last>Wei</last><affiliation>Microsoft Research</affiliation></author>
       <pages>7212-7234</pages>
       <abstract>Strategic reasoning is a complex yet essential capability for intelligent agents. It requires Large Language Model (LLM) agents to adapt their strategies dynamically in multi-agent environments. Unlike static reasoning tasks, success in these contexts depends on anticipating other agents’ beliefs and actions while continuously adjusting strategies to achieve individual goals. LLMs and LLM agents often struggle with strategic reasoning due to the absence of a reasoning framework that enables them to dynamically infer others’ perspectives and adapt to changing environments. Inspired by the Level-K framework from game theory and behavioral economics, which extends reasoning from simple reactions to structured strategic depth, we propose a novel framework: “K-Level Reasoning with Large Language Models (K-R).” This framework employs recursive mechanisms to enable LLMs to achieve varying levels of strategic depth, allowing agents to form higher order beliefs—beliefs about others’ beliefs. We validate this framework through rigorous testing on four testbeds: two classical game theory problems and two social intelligence tasks. The results demonstrate the advantages of K-R in strategic reasoning. Our work presents the first recursive implementation of strategic depth in large language models (LLMs). It establishes a foundation for future research into theory of mind and strategic reasoning in LLMs.</abstract>
@@ -5107,7 +5107,7 @@
       <author><first>Danilo</first><last>Carvalho</last><affiliation>University of Manchester</affiliation></author>
       <author><first>Oskar</first><last>Wysocki</last></author>
       <author><first>Marco</first><last>Valentino</last><affiliation>University of Sheffield</affiliation></author>
-      <author><first>Andre</first><last>Freitas</last><affiliation>Idiap Research Institute and University of Manchester</affiliation></author>
+      <author id="andre-freitas"><first>Andre</first><last>Freitas</last><affiliation>Idiap Research Institute and University of Manchester</affiliation></author>
       <pages>7235-7258</pages>
       <abstract>Syllogistic reasoning is crucial for Natural Language Inference (NLI). This capability is particularly significant in specialized domains such as biomedicine, where it can support automatic evidence interpretation and scientific discovery. This paper presents SylloBio-NLI, a novel framework that leverages external ontologies to systematically instantiate diverse syllogistic arguments for biomedical NLI. We employ SylloBio-NLI to evaluate Large Language Models (LLMs) on identifying valid conclusions and extracting supporting evidence across 28 syllogistic schemes instantiated with human genome pathways. Extensive experiments reveal that biomedical syllogistic reasoning is particularly challenging for zero-shot LLMs, which achieve an average accuracy between 70% on generalized modus ponens and 23% on disjunctive syllogism. At the same time, we found that few-shot prompting can boost the performance of different LLMs, including Gemma (+14%) and LLama-3 (+43%). However, a deeper analysis shows that both techniques exhibit high sensitivity to superficial lexical variations, highlighting a dependency between reliability, models’ architecture, and pre-training regime. Overall, our results indicate that, while in-context examples have the potential to elicit syllogistic reasoning in LLMs, existing models are still far from achieving the robustness and consistency required for safe biomedical NLI applications.</abstract>
       <url hash="ac2c3a97">2025.naacl-long.371</url>
@@ -5129,7 +5129,7 @@
       <author><first>Muhammad Arslan</first><last>Manzoor</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <author><first>Ruihong</first><last>Zeng</last></author>
       <author><first>Dilshod</first><last>Azizov</last></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <author><first>Shangsong</first><last>Liang</last><affiliation>SUN YAT-SEN UNIVERSITY</affiliation></author>
       <pages>7279-7295</pages>
       <abstract>In the current era of rapidly growing digital data, evaluating the political bias and factuality of news outlets has become more important for seeking reliable information online. In this work, we study the classification problem of profiling news media from the lens of political bias and factuality. Traditional profiling methods, such as Pre-trained Language Models (PLMs) and Graph Neural Networks (GNNs) have shown promising results, but they face notable challenges. PLMs focus solely on textual features, causing them to overlook the complex relationships between entities, while GNNs often struggle with media graphs containing disconnected components and insufficient labels. To address these limitations, we propose MediaGraphMind (MGM), an effective solution within a variational Expectation-Maximization (EM) framework. Instead of relying on limited neighboring nodes, MGM leverages features, structural patterns, and label information from globally similar nodes. Such a framework not only enables GNNs to capture long-range dependencies for learning expressive node representations but also enhances PLMs by integrating structural information and therefore improving the performance of both models. The extensive experiments demonstrate the effectiveness of the proposed framework and achieve new state-of-the-art results. Further, we share our repository which contains the dataset, code, and documentation.</abstract>
@@ -5201,7 +5201,7 @@
       <author><first>Renxi</first><last>Wang</last></author>
       <author><first>Xudong</first><last>Han</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <author><first>Yixuan</first><last>Zhang</last></author>
-      <author><first>Timothy</first><last>Baldwin</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and The University of Melbourne</affiliation></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and The University of Melbourne</affiliation></author>
       <author><first>Haonan</first><last>Li</last></author>
       <pages>7385-7398</pages>
       <abstract>Interaction trajectories between agents and environments have proven effective in tuning LLMs into task-specific agents. However, constructing these trajectories, especially successful trajectories, is often computationally and time intensive due to the relatively low success rates of even the most advanced LLMs, such as GPT-4 and Claude. Additionally, common training paradigms like supervised fine-tuning (SFT) and reinforcement learning (RL) not only require large volumes of data but also have specific demands regarding the trajectories used. For instance, existing SFT approaches typically utilize only positive examples, limiting their efficiency in low-resource scenarios. To address this, we introduce Negative-Aware Training (NAT), a straightforward yet effective method that leverages both successful and failed trajectories for fine-tuning, maximizing the utility of limited resources. Experimental results demonstrate that NAT consistently surpasses existing methods, including SFT, DPO, and PPO, across various tasks.</abstract>
@@ -5320,7 +5320,7 @@
       <author><first>Sabine N Van Der</first><last>Veer</last><affiliation>University of Manchester</affiliation></author>
       <author><first>Lamiece</first><last>Hassan</last></author>
       <author><first>Chenghua</first><last>Lin</last><affiliation>University of Manchester</affiliation></author>
-      <author><first>Goran</first><last>Nenadic</last><affiliation>University of Manchester</affiliation></author>
+      <author id="goran-nenadic"><first>Goran</first><last>Nenadic</last><affiliation>University of Manchester</affiliation></author>
       <pages>7548-7561</pages>
       <abstract>Topic modelling is a pivotal unsupervised machine learning technique for extracting valuable insights from large document collections. Existing neural topic modelling methods often encode contextual information of documents, while ignoring contextual details of candidate centroid words, leading to the inaccurate selection of topic words due to the *contextualization gap*. In parallel, it is found that functional words are frequently selected over topical words. To address these limitations, we introduce **CAST**: **C**orpus-**A**ware **S**elf-similarity Enhanced **T**opic modelling, a novel topic modelling method that builds upon candidate centroid word embeddings contextualized on the dataset, and a novel self-similarity-based method to filter out less meaningful tokens. Inspired by findings in contrastive learning that self-similarities of functional token embeddings in different contexts are much lower than topical tokens, we find self-similarity to be an effective metric to prevent functional words from acting as candidate topic words. Our approach significantly enhances the coherence and diversity of generated topics, as well as the topic model’s ability to handle noisy data. Experiments on news benchmark datasets and one Twitter dataset demonstrate the method’s superiority in generating coherent, diverse topics, and handling noisy data, outperforming strong baselines.</abstract>
       <url hash="093c7016">2025.naacl-long.386</url>
@@ -5330,7 +5330,7 @@
     <paper id="387">
       <title>A Zero-Shot Open-Vocabulary Pipeline for Dialogue Understanding</title>
       <author><first>Abdulfattah</first><last>Safa</last></author>
-      <author><first>Gözde Gül</first><last>Şahin</last><affiliation>Koç University</affiliation></author>
+      <author id="gozde-gul-sahin"><first>Gözde Gül</first><last>Şahin</last><affiliation>Koç University</affiliation></author>
       <pages>7562-7579</pages>
       <abstract>Dialogue State Tracking (DST) is crucial for understanding user needs and executing appropriate system actions in task-oriented dialogues. Majority of existing DST methods are designed to work within predefined ontologies and assume the availability of gold domain labels, struggling with adapting to new slots values. While Large Language Models (LLMs)-based systems show promising zero-shot DST performance, they either require extensive computational resources or they underperform existing fully-trained systems, limiting their practicality. To address these limitations, we propose a zero-shot, open-vocabulary system that integrates domain classification and DST in a single pipeline. Our approach includes reformulating DST as a question-answering task for less capable models and employing self-refining prompts for more adaptable ones. Our system does not rely on fixed slot values defined in the ontology allowing the system to adapt dynamically. We compare our approach with existing SOTA, and show that it provides up to 20% better Joint Goal Accuracy (JGA) over previous methods on datasets like MultiWOZ 2.1, with up to 90% fewer requests to the LLM API.</abstract>
       <url hash="9badc399">2025.naacl-long.387</url>
@@ -5480,7 +5480,7 @@
       <title>Functional Lexicon in Subword Tokenization</title>
       <author><first>Zachary William</first><last>Hopton</last><affiliation>University of Zurich and University of Zurich</affiliation></author>
       <author><first>Yves</first><last>Scherrer</last><affiliation>University of Oslo</affiliation></author>
-      <author><first>Tanja</first><last>Samardzic</last><affiliation>University of Zurich</affiliation></author>
+      <author id="tanja-samardzic"><first>Tanja</first><last>Samardzic</last><affiliation>University of Zurich</affiliation></author>
       <pages>7839-7853</pages>
       <abstract>The distinction between function and content units of the lexicon has been somewhat neglected in recent NLP work, but it could still be useful when working with low-resource languages, and, in particular, to improve cross-lingual transfer. In this paper, we investigate to what extent BPE subword tokenization can be used to identify units of the functional lexicon in a language without any annotated data. We analyze subword tokens in terms of their productivity and attempt to find thresholds that best distinguish function from content tokens. On a sample of seven diverse languages, we find that the best results are obtained with 50 BPE merges. We also show that this subword tokenization setting can be beneficial for the interlinear glossing task.</abstract>
       <url hash="80038ae3">2025.naacl-long.398</url>
@@ -5677,7 +5677,7 @@
     <paper id="414">
       <title>Through the Lens of History: Methods for Analyzing Temporal Variation in Content and Framing of State-run <fixed-case>C</fixed-case>hinese Newspapers</title>
       <author><first>Shijia</first><last>Liu</last><affiliation>Northeastern University</affiliation></author>
-      <author><first>David A.</first><last>Smith</last><affiliation>Northeastern University</affiliation></author>
+      <author id="david-a-smith"><first>David A.</first><last>Smith</last><affiliation>Northeastern University</affiliation></author>
       <pages>8143-8172</pages>
       <abstract>State-run Chinese newspapers are believed to strategically select and frame news articles to align with the shifting political tides of the country. This paper describes methods to quantify these changes in content and framing over time. Looking at more than 50 years of articles from the People’s Daily and Reference News, we analyze differences in name mentions and sentiment in news articles for politicians before and after their deaths, as well as during and not during certain political events. We find significant estimates of difference, reflecting the changes in various aspects of the political environment in China during different time periods. We also apply change point detection methods to identify turning points in time series data of name mentions and sentiment. The identified turning points show a high co-occurrence with crucial political events and deaths of politicians. Furthermore, we utilize topic modeling to analyze the framing choices for articles written in different decades. The changes in frequent topic words are more significant in People’s Daily than in Reference News, which is consistent with the focus shifts of the Chinese central government in history. Finally, by using pre-trained language models to predict masked names in news articles, we analyze the distinctiveness of the language used to report individuals.</abstract>
       <url hash="34a230df">2025.naacl-long.414</url>
@@ -5725,7 +5725,7 @@
     <paper id="418">
       <title><fixed-case>GL</fixed-case>i<fixed-case>REL</fixed-case> - Generalist Model for Zero-Shot Relation Extraction</title>
       <author><first>Jack</first><last>Boylan</last><affiliation>Georgia Institute of Technology</affiliation></author>
-      <author><first>Chris</first><last>Hokamp</last><affiliation>Quantexa</affiliation></author>
+      <author id="chris-hokamp"><first>Chris</first><last>Hokamp</last><affiliation>Quantexa</affiliation></author>
       <author><first>Demian Gholipour</first><last>Ghalandari</last><affiliation>Quantexa</affiliation></author>
       <pages>8230-8245</pages>
       <abstract>We introduce GLiREL, an efficient architecture and training paradigm for zero-shot relation classification. Identifying relationships between entities is a key task in information extraction pipelines. The zero-shot setting for relation extraction, where a taxonomy of relations is not pre-specified, has proven to be particularly challenging because of the computational complexity of inference, and because of the lack of labeled training data with sufficient coverage. Existing approaches rely upon distant supervision using auxiliary models to generate training data for unseen labels, upon very large general-purpose large language models (LLMs), or upon complex pipelines models with multiple inference stages. Inspired by the recent advancements in zero-shot named entity recognition, this paper introduces an approach to efficiently and accurately predict zero-shot relationship labels between multiple entities in a single forward pass. Experiments using the FewRel and WikiZSL benchmarks demonstrate that our approach achieves state-of-the-art results on the zero-shot relation classification task. In addition, we contribute a protocol for synthetically-generating datasets with diverse relation labels.</abstract>
@@ -5738,7 +5738,7 @@
       <author><first>Sachin</first><last>Kumar</last><affiliation>Ohio State University, Columbus</affiliation></author>
       <author><first>Chan Young</first><last>Park</last></author>
       <author><first>Yulia</first><last>Tsvetkov</last><affiliation>Department of Computer Science, University of Washington</affiliation></author>
-      <author><first>Noah A.</first><last>Smith</last><affiliation>University of Washington and Allen Institute for Artificial Intelligence</affiliation></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last><affiliation>University of Washington and Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last></author>
       <pages>8246-8279</pages>
       <abstract>Conventional algorithms for training language models (LMs) with human feedback rely on preferences that are assumed to account for an “average” user, disregarding subjectivity and finer-grained variations. Recent studies have raised concerns that aggregating such diverse and often contradictory human feedback to finetune models results in generic models that generate outputs not preferred by many user groups, as they tend to average out styles and norms. To address this issue, we draw inspiration from recommendation systems and propose ComPO, a method to personalize preference optimization in LMs by contextualizing the probability distribution of model outputs with the preference provider. Focusing on group-level preferences rather than individuals, we collect and release ComPRed, a question answering dataset with community-level preferences from Reddit. This dataset facilitates studying diversity in preferences without incurring privacy concerns associated with individual feedback. Our experiments reveal that conditioning language models on a community identifier (i.e., subreddit name) during preference tuning substantially enhances model performance. Conversely, replacing this context with random subreddit identifiers significantly diminishes performance, highlighting the effectiveness of our approach in tailoring responses to communities’ preferences.</abstract>
@@ -5812,7 +5812,7 @@
     </paper>
     <paper id="425">
       <title><fixed-case>IHE</fixed-case>val: Evaluating Language Models on Following the Instruction Hierarchy</title>
-      <author id="zhihan-zhang"><first>Zhihan</first><last>Zhang</last></author>
+      <author><first>Zhihan</first><last>Zhang</last></author>
       <author><first>Shiyang</first><last>Li</last><affiliation>Amazon</affiliation></author>
       <author><first>Zixuan</first><last>Zhang</last></author>
       <author><first>Xin</first><last>Liu</last><affiliation>Amazon</affiliation></author>
@@ -5857,7 +5857,7 @@
       <author><first>Philip</first><last>Schroeder</last></author>
       <author><first>Nathaniel W.</first><last>Morgan</last></author>
       <author><first>Hongyin</first><last>Luo</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
-      <author><first>James R.</first><last>Glass</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
+      <author id="james-glass"><first>James R.</first><last>Glass</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <pages>8418-8442</pages>
       <abstract>Large language models (LLMs) have shown impressive capabilities across diverse settings, but still struggle as the length and complexity of the context increases. To address this challenge, we propose Thinking Recursively and Dynamically (ThReaD). THREAD frames model generation as a thread of execution that, based on the context, can run to completion or dynamically spawn new threads. By spawning, threads can offload work (e.g., thinking, retrieving information) to child threads, which only return tokens needed for the parent thread to do its work. We apply THREAD in the settings of LLM task solving and question answering, where the dynamic threading allows the model to recursively decompose the given task or question into progressively simpler sub-problems that can be solved by separate child threads. We test THREAD, implemented using a few-shot learning approach, on diverse benchmarks for agent tasks and data-grounded question answering. THREAD achieves state-of-the-art performance with GPT-4 and GPT-3.5 on these benchmarks, including ALFWorld, TextCraft, and WebShop, along with two new benchmarks, DataCommons QA and MIMIC-III ICU QA. In addition, THREAD outperforms existing frameworks by 10% to 50% absolute points with smaller models, including Llama-3-8b and CodeLlama-7b.</abstract>
       <url hash="0720d613">2025.naacl-long.427</url>
@@ -5868,7 +5868,7 @@
       <title><fixed-case>CORG</fixed-case>: Generating Answers from Complex, Interrelated Contexts</title>
       <author><first>Hyunji</first><last>Lee</last><affiliation>Korea Advanced Institute of Science &amp; Technology</affiliation></author>
       <author><first>Franck</first><last>Dernoncourt</last></author>
-      <author><first>Trung</first><last>Bui</last><affiliation>Adobe Research</affiliation></author>
+      <author id="trung-bui"><first>Trung</first><last>Bui</last><affiliation>Adobe Research</affiliation></author>
       <author><first>Seunghyun</first><last>Yoon</last><affiliation>Adobe Research</affiliation></author>
       <pages>8443-8460</pages>
       <abstract>In a real-world corpus, knowledge frequently recurs across documents but often contains inconsistencies due to ambiguous naming, outdated information, or errors, leading to complex interrelationships between contexts. Previous research has shown that language models struggle with these complexities, typically focusing on single factors in isolation. We classify these relationships into four types: distracting, ambiguous, counterfactual, and duplicated. Our analysis reveals that no single approach effectively addresses all these interrelationships simultaneously. Therefore, we introduce Context Organizer (COrg), a framework that organizes multiple contexts into independently processed groups. This design allows the model to efficiently find all relevant answers while ensuring disambiguation. COrg consists of three key components: a graph constructor, a reranker, and an aggregator. Our results demonstrate that COrg balances performance and efficiency effectively, outperforming existing grouping methods and achieving comparable results to more computationally intensive, single-context approaches.</abstract>
@@ -5980,7 +5980,7 @@
       <author><first>Justin</first><last>Qiu</last></author>
       <author><first>Zachary</first><last>Horvitz</last></author>
       <author><first>Marianna</first><last>Apidianaki</last><affiliation>University of Pennsylvania, University of Pennsylvania</affiliation></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <author><first>Chris</first><last>Callison-Burch</last><affiliation>University of Pennsylvania and University of Pennsylvania</affiliation></author>
       <pages>8662-8685</pages>
       <abstract>Style representations aim to embed texts with similar writing styles closely and texts with different styles far apart, regardless of content. However, the contrastive triplets often used for training these representations may vary in both style and content, leading to potential content leakage in the representations. We introduce StyleDistance, a novel approach to training stronger content-independent style embeddings. We use a large language model to create a synthetic dataset of near-exact paraphrases with controlled style variations, and produce positive and negative examples across 40 distinct style features for precise contrastive learning. We assess the quality of our synthetic data and embeddings through human and automatic evaluations. StyleDistance enhances the content-independence of style embeddings, which generalize to real-world benchmarks and outperform leading style representations in downstream applications.</abstract>
@@ -6027,7 +6027,7 @@
       <author><first>Jingfeng</first><last>Yang</last><affiliation>Amazon</affiliation></author>
       <author><first>Mingyu</first><last>Zhao</last></author>
       <author><first>Zhaoyu</first><last>Zhang</last><affiliation>Amazon</affiliation></author>
-      <author><first>Qin</first><last>Lu</last><affiliation>Amazon</affiliation></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last><affiliation>Amazon</affiliation></author>
       <author><first>Kaiwen</first><last>Men</last></author>
       <author><first>Ning</first><last>Xie</last><affiliation>Amazon</affiliation></author>
       <author><first>Huasheng</first><last>Li</last></author>
@@ -6070,7 +6070,7 @@
       <author><first>David</first><last>Wan</last><affiliation>Department of Computer Science, University of North Carolina at Chapel Hill</affiliation></author>
       <author><first>Jesse</first><last>Vig</last><affiliation>Salesforce Research</affiliation></author>
       <author><first>Mohit</first><last>Bansal</last><affiliation>University of North Carolina at Chapel Hill</affiliation></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>SalesForce.com and Nanyang Technological University</affiliation></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>SalesForce.com and Nanyang Technological University</affiliation></author>
       <pages>8791-8810</pages>
       <abstract>Large Language Models (LLMs) often exhibit positional bias in long-context settings, under-attending to information in the middle of inputs. We investigate the presence of this bias in long-form summarization, its impact on faithfulness, and various techniques to mitigate this bias. To consistently evaluate faithfulness, we first compile a benchmark of eight human-annotated long-form summarization datasets and perform a meta-evaluation of faithfulness metrics. We show that LLM-based faithfulness metrics, though effective with full-context inputs, remain sensitive to document order, indicating positional bias. Analyzing LLM-generated summaries across six datasets, we find a “U-shaped” trend in faithfulness, where LLMs faithfully summarize the beginning and end of documents but neglect middle content. Perturbing document order similarly reveals models are less faithful when important documents are placed in the middle of the input. We find that this behavior is partly due to shifting focus with context length: as context increases, summaries become less faithful, but beyond a certain length, faithfulness improves as the model focuses on the end. Finally, we experiment with different generation techniques to reduce positional bias and find that prompting techniques effectively direct model attention to specific positions, whereas more sophisticated approaches offer limited improvements. Our data and code will be publicly available.</abstract>
       <url hash="2143347a">2025.naacl-long.442</url>
@@ -6119,7 +6119,7 @@
     <paper id="446">
       <title>Kill two birds with one stone: generalized and robust <fixed-case>AI</fixed-case>-generated text detection via dynamic perturbations</title>
       <author><first>Yinghan</first><last>Zhou</last></author>
-      <author><first>Juan</first><last>Wen</last><affiliation>China Agricultural University</affiliation></author>
+      <author id="wen-juan-hou"><first>Juan</first><last>Wen</last><affiliation>China Agricultural University</affiliation></author>
       <author><first>Wanli</first><last>Peng</last><affiliation>China Agricultural University</affiliation></author>
       <author><first>Xue</first><last>Yiming</last><affiliation>China Agricultural University</affiliation></author>
       <author><first>ZiWei</first><last>Zhang</last></author>
@@ -6137,7 +6137,7 @@
       <author><first>Fangzhi</first><last>Xu</last><affiliation>Xi’an Jiaotong University</affiliation></author>
       <author><first>Jianbing</first><last>Zhang</last><affiliation>Nanjing University</affiliation></author>
       <author><first>Hao</first><last>Zhou</last></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <pages>8876-8892</pages>
       <abstract>Chain-of-thought (CoT) has proven to improve the reasoning capability of large language models (LLMs). However, due to the complexity of multimodal scenarios and the difficulty in collecting high-quality CoT data, CoT reasoning in multimodal LLMs has been largely overlooked. To this end, we propose a simple yet effective self-training framework, <tex-math>R^3V</tex-math>, which iteratively enhances the model’s Vision-language Reasoning by Reflecting on CoT Rationales. Our framework consists of two interleaved parts: (1) iteratively bootstrapping positive and negative solutions for reasoning datasets, and (2) reflection on rationale for learning from mistakes. Specifically, we introduce the self-refine and self-select losses, enabling the model to refine flawed rationale and derive the correct answer by comparing rationale candidates. Experiments on a wide range of vision-language tasks show that <tex-math>R^3V</tex-math> consistently improves multimodal LLM reasoning, achieving a relative improvement of 23% to 60% over GPT-distilled baselines. Additionally, our approach supports self-reflection on generated solutions, further boosting performance through test-time computation. Our code is available at https://github.com/njucckevin/MM-Self-Improve.</abstract>
       <url hash="a15b7011">2025.naacl-long.447</url>
@@ -6272,7 +6272,7 @@
       <author><first>Atharva</first><last>Naik</last></author>
       <author><first>Marcus</first><last>Alenius</last></author>
       <author><first>Daniel</first><last>Fried</last><affiliation>Meta AI and Carnegie Mellon University</affiliation></author>
-      <author><first>Carolyn</first><last>Rose</last><affiliation>School of Computer Science, Carnegie Mellon University</affiliation></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rose</last><affiliation>School of Computer Science, Carnegie Mellon University</affiliation></author>
       <pages>9049-9076</pages>
       <abstract>The task of automated code review has recently gained a lot of attention from the machine learning community. However, current review comment evaluation metrics rely on comparisons with a human-written reference for a given code change (also called a diff ). Furthermore, code review is a one-to-many problem, like generation and summarization, with many “valid reviews” for a diff. Thus, we develop CRScore — a reference-free metric to measure dimensions of review quality like conciseness, comprehensiveness, and relevance. We design CRScore to evaluate reviews in a way that is grounded in claims and potential issues detected in the code by LLMs and static analyzers. We demonstrate that CRScore can produce valid, fine-grained scores of review quality that have the greatest alignment with human judgment among open-source metrics (0.54 Spearman correlation) and are more sensitive than reference-based metrics. We also release a corpus of 2.9k human-annotated review quality scores for machine-generated and GitHub review comments to support the development of automated metrics.</abstract>
       <url hash="258ec8fb">2025.naacl-long.457</url>
@@ -6375,7 +6375,7 @@
       <author><first>Viet Thanh</first><last>Pham</last><affiliation>Monash University</affiliation></author>
       <author><first>Zhuang</first><last>Li</last><affiliation>Royal Melbourne Institute of Technology</affiliation></author>
       <author><first>Lizhen</first><last>Qu</last><affiliation>Monash University</affiliation></author>
-      <author><first>Gholamreza</first><last>Haffari</last><affiliation>Monash University, Monash University and Monash University</affiliation></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last><affiliation>Monash University, Monash University and Monash University</affiliation></author>
       <pages>9207-9228</pages>
       <abstract>Large language models, despite their remarkable success in recent years, still exhibit severe cultural bias. Therefore, in this paper, we introduce CultureInstruct, a large-scale instruction-tuning dataset designed to reduce cultural bias in LLMs. CultureInstruct is constructed with an automatic pipeline, utilizing public web sources and a specialized LLM to generate instruction. Our data comprises 430K instructions, ranging from classic NLP tasks to complex reasoning. CultureInstruct also covers 11 most relevant topics to cultural knowledge, making it highly diverse. Our experiments show that fine-tuning LLMs with CultureInstruct results in consistent improvements across three types of cultural benchmarks, including (i) general cultural knowledge, (ii) human opinions and values, and (iii) linguistic cultural bias. Our best model, Qwen2-Instruct 72B + CultureInstruct, outperforms GPT-4o Mini and GPT-4o with 18.47% and 13.07% average relative improvements on cultural benchmarks.</abstract>
       <url hash="7ab968c8">2025.naacl-long.465</url>
@@ -6387,7 +6387,7 @@
       <author><first>Lovish</first><last>Madaan</last><affiliation>Meta and University College London, University of London</affiliation></author>
       <author><first>David</first><last>Esiobu</last><affiliation>Facebook</affiliation></author>
       <author><first>Pontus</first><last>Stenetorp</last><affiliation>University College London</affiliation></author>
-      <author><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <author><first>Dieuwke</first><last>Hupkes</last><affiliation>Facebook</affiliation></author>
       <pages>9229-9242</pages>
       <abstract>In the recent past, a popular way of evaluating natural language understanding (NLU), was to consider a model’s ability to perform natural language inference (NLI) tasks. In this paper, we investigate if NLI tasks, that are rarely used for LLM evaluation, can still be informative for evaluating LLMs. Focusing on five different NLI benchmarks across six models of different scales, we investigate if they are able to discriminate models of different size and quality and how their accuracies develop during training. Furthermore, we investigate the extent to which the softmax distributions of models align with human distributions in cases where statements are ambiguous or vague. Overall, our results paint a positive picture for the NLI tasks: we find that they are able to discriminate well between models at various stages of training, yet are not (all) saturated. Furthermore, we find that while the similarity of model distributions with human label distributions increases with scale, it is still much higher than the similarity between two populations of humans, making it a potentially interesting statistic to consider.</abstract>
@@ -6446,7 +6446,7 @@
       <author><first>Hao</first><last>Yang</last><affiliation>Monash University</affiliation></author>
       <author><first>Lizhen</first><last>Qu</last><affiliation>Monash University</affiliation></author>
       <author><first>Ehsan</first><last>Shareghi</last><affiliation>Monash University</affiliation></author>
-      <author><first>Gholamreza</first><last>Haffari</last><affiliation>Monash University, Monash University and Monash University</affiliation></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last><affiliation>Monash University, Monash University and Monash University</affiliation></author>
       <pages>9292-9306</pages>
       <abstract>Large Multimodal Models (LMMs) have demonstrated the ability to interact with humans under real-world conditions by combining Large Language Models (LLMs) and modality encoders to align multimodal information (visual and auditory) with text. However, such models raise new safety challenges of whether models that are safety-aligned on text also exhibit consistent safeguards for multimodal inputs. Despite recent safety-alignment research on vision LMMs, the safety of audio LMMs remains under-explored. In this work, we comprehensively red team the safety of five advanced audio LMMs under three settings: (i) harmful questions in both audio and text formats, (ii) harmful questions in text format accompanied by distracting non-speech audio, and (iii) speech-specific jailbreaks. Our results under these settings demonstrate that open-source audio LMMs suffer an average attack success rate of 69.14% on harmful audio questions, and exhibit safety vulnerabilities when distracted with non-speech audio noise. Our speech-specific jailbreaks on Gemini-1.5-Pro achieve an attack success rate of 70.67% on the harmful query benchmark. We provide insights on what could cause these reported safety-misalignments. Warning: this paper contains offensive examples.</abstract>
       <url hash="02455026">2025.naacl-long.470</url>
@@ -6483,7 +6483,7 @@
       <author><first>Srija</first><last>Mukhopadhyay</last></author>
       <author><first>Abhishek</first><last>Rajgaria</last></author>
       <author><first>Prerana</first><last>Khatiwada</last></author>
-      <author><first>Manish</first><last>Shrivastava</last><affiliation>International Institute of Information Technology Hyderabad, India</affiliation></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last><affiliation>International Institute of Information Technology Hyderabad, India</affiliation></author>
       <author><first>Dan</first><last>Roth</last></author>
       <author><first>Vivek</first><last>Gupta</last><affiliation>Arizona State University</affiliation></author>
       <pages>9348-9378</pages>
@@ -6497,7 +6497,7 @@
       <author><first>Min</first><last>Xiao</last></author>
       <author><first>Junnan</first><last>Zhu</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
       <author><first>Feifei</first><last>Zhai</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
-      <author><first>Chengqing</first><last>Zong</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last><affiliation>Institute of automation, Chinese academy of science, Chinese Academy of Sciences</affiliation></author>
       <author><first>Yu</first><last>Zhou</last><affiliation>Institute of Automation, Chinese Academy of Sciences</affiliation></author>
       <pages>9379-9392</pages>
       <abstract>Existing multimodal summarization approaches struggle with scenarios involving numerous images as input, leading to a heavy load for readers. Summarizing both the input text and numerous images helps readers quickly grasp the key points of multimodal input. This paper introduces a novel task, Numerous Images-Oriented Multimodal Summarization (NIMMS). To benchmark this task, we first construct the dataset based on a public multimodal summarization dataset. Considering that most existing metrics evaluate summaries from a unimodal perspective, we propose a new Multimodal Information evaluation (M-info) method, measuring the differences between the generated summary and the multimodal input. Finally, we compare various summarization methods on NIMMS and analyze associated challenges. Experimental results have shown that M-info correlates more closely with human judgments than five widely used metrics. Meanwhile, existing models struggle with summarizing numerous images. We hope that this research will shed light on the development of multimodal summarization. Furthermore, our code and dataset will be released to the public.</abstract>
@@ -6526,7 +6526,7 @@
       <author><first>Bingzheng</first><last>Gan</last><affiliation>Huawei Technologies Ltd.</affiliation></author>
       <author><first>Yufan</first><last>Zhao</last><affiliation>Huawei International Pte. Ltd.</affiliation></author>
       <author><first>Tianyi</first><last>Zhang</last></author>
-      <author id="jing-huang"><first>Jing</first><last>Huang</last><affiliation>Huawei Technologies Ltd.</affiliation></author>
+      <author><first>Jing</first><last>Huang</last><affiliation>Huawei Technologies Ltd.</affiliation></author>
       <author><first>Li</first><last>Yusu</last></author>
       <author><first>Shu Xian</first><last>Teo</last></author>
       <author><first>Changwang</first><last>Zhang</last><affiliation>CCF Theoretical Computer Science Technical Committee and OPPO Research Institute</affiliation></author>
@@ -6640,7 +6640,7 @@
       <author id="chaoqun-liu-ntu"><first>Chaoqun</first><last>Liu</last></author>
       <author><first>Wenxuan</first><last>Zhang</last><affiliation>Singapore University of Technology and Design</affiliation></author>
       <author><first>Yiran</first><last>Zhao</last><affiliation>National University of Singapore</affiliation></author>
-      <author><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last><affiliation>Nanyang Technological University</affiliation></author>
       <author><first>Lidong</first><last>Bing</last><affiliation>Shanda Group and Alibaba Group</affiliation></author>
       <pages>9594-9614</pages>
       <abstract>Large language models (LLMs) have demonstrated multilingual capabilities, yet they are mostly English-centric due to the imbalanced training corpora. While prior works have leveraged this bias to enhance multilingual performance through translation, they have been largely limited to natural language processing (NLP) tasks. In this work, we extend the evaluation to real-world user queries and non-English-centric LLMs, offering a broader examination of multilingual performance. Our key contribution lies in demonstrating that while translation into English can boost the performance of English-centric LLMs on NLP tasks, it is not universally optimal. For culture-related tasks that need deep language understanding, prompting in the native language proves more effective as it better captures the nuances of culture and language. Our experiments expose varied behaviors across LLMs and tasks in the multilingual context, underscoring the need for a more comprehensive approach to multilingual evaluation. Therefore, we call for greater efforts in developing and evaluating LLMs that go beyond English-centric paradigms.</abstract>
@@ -6713,7 +6713,7 @@
       <title>Grounding Fallacies Misrepresenting Scientific Publications in Evidence</title>
       <author><first>Max</first><last>Glockner</last><affiliation>Technische Universität Darmstadt</affiliation></author>
       <author><first>Yufang</first><last>Hou</last><affiliation>IT:U Interdisciplinary Transformation University Austria, Technische Universität Darmstadt and IBM Research Ireland</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <author><first>Iryna</first><last>Gurevych</last><affiliation>Institute for Computer Science, Artificial Intelligence and Technology, Mohamed bin Zayed University of Artificial Intelligence and Technische Universität Darmstadt</affiliation></author>
       <pages>9732-9767</pages>
       <abstract>Health-related misinformation claims often falsely cite a credible biomedical publication as evidence. These publications only superficially seem to support the false claim, when logical fallacies are applied. In this work, we aim to detect and to highlight such fallacies, which requires assessing the exact content of the misrepresented publications. To achieve this, we introduce MissciPlus, an extension of the fallacy detection dataset Missci. MissciPlus extends Missci by grounding the applied fallacies in real-world passages from misrepresented studies. This creates a realistic test-bed for detecting and verbalizing fallacies under real-world input conditions, and enables new and realistic passage-retrieval tasks. MissciPlus is the first logical fallacy dataset which pairs the real-world misrepresented evidence with incorrect claims, identical to the input to evidence-based fact-checking models. With MissciPlus, we i) benchmark retrieval models in identifying passages that support claims only with fallacious reasoning, ii) evaluate how well LLMs verbalize fallacious reasoning based on misrepresented scientific passages, and iii) assess the effectiveness of fact-checking models in refuting claims that misrepresent biomedical research. Our findings show that current fact-checking models struggle to use misrepresented scientific passages to refute misinformation. Moreover, these passages can mislead LLMs into accepting false claims as true.</abstract>
@@ -6761,7 +6761,7 @@
       <title>Grammar Control in Dialogue Response Generation for Language Learning Chatbots</title>
       <author><first>Dominik</first><last>Glandorf</last><affiliation>EPFL - EPF Lausanne</affiliation></author>
       <author><first>Peng</first><last>Cui</last><affiliation>ETHZ - ETH Zurich</affiliation></author>
-      <author><first>Detmar</first><last>Meurers</last><affiliation>Eberhard-Karls-Universität Tübingen</affiliation></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last><affiliation>Eberhard-Karls-Universität Tübingen</affiliation></author>
       <author><first>Mrinmaya</first><last>Sachan</last><affiliation>Swiss Federal Institute of Technology</affiliation></author>
       <pages>9820-9839</pages>
       <abstract>Chatbots based on large language models offer cheap conversation practice opportunities for language learners. However, they are hard to control for linguistic forms that correspond to learners’ current needs, such as grammar. We control grammar in chatbot conversation practice by grounding a dialogue response generation model in a pedagogical repository of grammar skills. We also explore how this control helps learners to produce specific grammar. We comprehensively evaluate prompting, fine-tuning, and decoding strategies for grammar-controlled dialogue response generation. Strategically decoding Llama3 outperforms GPT-3.5 when tolerating minor response quality losses. Our simulation predicts grammar-controlled responses to support grammar acquisition adapted to learner proficiency. Existing language learning chatbots and research on second language acquisition benefit from these affordances. Code available on GitHub.</abstract>
@@ -6817,7 +6817,7 @@
       <author><first>Liang</first><last>Zhu</last></author>
       <author><first>Rui</first><last>Wang</last><affiliation>International Digital Economy Academy, International Digital Economy Academy</affiliation></author>
       <author><first>Xi</first><last>Wang</last><affiliation>University of Sheffield</affiliation></author>
-      <author><first>Gholamreza</first><last>Haffari</last><affiliation>Monash University, Monash University and Monash University</affiliation></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last><affiliation>Monash University, Monash University and Monash University</affiliation></author>
       <author><first>Jiaxing</first><last>Zhang</last><affiliation>IDEA</affiliation></author>
       <pages>9902-9921</pages>
       <abstract>Long-term memory is important for chatbots and dialogue systems (DS) to create consistent and human-like conversations, evidenced by numerous developed memory-augmented DS (MADS). To evaluate the effectiveness of such MADS, existing commonly used evaluation metrics, like retrieval accuracy and perplexity (PPL), mainly focus on query-oriented factualness and language quality assessment. However, these metrics often lack practical value. Moreover, the evaluation dimensions are insufficient for human-like assessment in DS. Regarding memory-recalling paradigms, current evaluation schemes only consider passive memory retrieval while ignoring diverse memory recall with rich triggering factors, e.g., emotions and surroundings, which can be essential in emotional support scenarios. To bridge the gap, we construct a novel Memory-Augmented Dialogue Benchmark (MADail-Bench) covering various memory-recalling paradigms based on cognitive science and psychology theories. The benchmark assesses two tasks separately: memory retrieval and memory recognition with the incorporation of both passive and proactive memory recall data. We introduce new scoring criteria to the evaluation, including memory injection, emotion support (ES) proficiency, and intimacy, to comprehensively assess generated responses. Results from cutting-edge embedding models and large language models on this benchmark indicate the potential for further advancement. Extensive testing further reveals correlations between memory injection, ES proficiency, and intimacy.</abstract>
@@ -7009,7 +7009,7 @@
       <author><first>Jie</first><last>He</last></author>
       <author><first>Yijun</first><last>Yang</last><affiliation>Edinburgh University, University of Edinburgh</affiliation></author>
       <author><first>Wanqiu</first><last>Long</last></author>
-      <author><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last><affiliation>Tianjin University</affiliation></author>
       <author><first>Victor</first><last>Gutierrez Basulto</last><affiliation>Cardiff University</affiliation></author>
       <author><first>Jeff Z.</first><last>Pan</last><affiliation>University of Edinburgh, University of Edinburgh</affiliation></author>
       <pages>10219-10244</pages>
@@ -7111,7 +7111,7 @@
     <paper id="521">
       <title>A Grounded Typology of Word Classes</title>
       <author><first>Coleman</first><last>Haley</last><affiliation>University of Edinburgh</affiliation></author>
-      <author><first>Sharon</first><last>Goldwater</last><affiliation>University of Edinburgh</affiliation></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last><affiliation>University of Edinburgh</affiliation></author>
       <author><first>Edoardo</first><last>Ponti</last><affiliation>University of Edinburgh</affiliation></author>
       <pages>10380-10399</pages>
       <abstract>In this work, we propose a grounded approach to meaning in language typology. Using images captioned across languages, we can treat the images as an empirical language agnostic representation of meaning, allowing the quantification of language function and semantics. Using principles from information theory, we define “groundedness”, an empirical measure of contextual semantic contentfulness which can be computed using multilingual (vision-and-)language models. As an initial application, we apply this measure to the typology of word classes. We find our measure captures the contentfulness asymmetry between functional (grammatical) and lexical (content) classes across languages, but contradicts the view that functional classes do not convey content. We release a dataset of groundedness scores for 30 languages. Our results suggest that the grounded typology approach can provide quantitative evidence about semantic function in language.</abstract>
@@ -7152,7 +7152,7 @@
       <author><first>Junyoung</first><last>Sung</last><affiliation>Korea University</affiliation></author>
       <author><first>Wonpyo</first><last>Park</last><affiliation>Google</affiliation></author>
       <author><first>Chanjun</first><last>Park</last><affiliation>Korea University</affiliation></author>
-      <author><first>Paul Hongsuck</first><last>Seo</last><affiliation>Korea University</affiliation></author>
+      <author id="hongsuck-seo"><first>Paul Hongsuck</first><last>Seo</last><affiliation>Korea University</affiliation></author>
       <pages>10431-10442</pages>
       <abstract>While large language models (LLMs) excel in generating coherent and contextually rich outputs, their capacity to efficiently handle long-form contexts is limited by fixed-length position embeddings. Additionally, the computational cost of processing long sequences increases quadratically, making it challenging to extend context length. To address these challenges, we propose Long-form Context Injection with Recurrent Compression (LCIRC), a method that enables the efficient processing long-form sequences beyond the model’s length limit through recurrent compression without retraining the entire model. We further introduce query dependent context modeling, which selectively compresses query-relevant information, ensuring that the model retains the most pertinent content. Our empirical results demonstrate that Query Dependent LCIRC (QD-LCIRC) significantly improves LLM’s ability to manage extended contexts, making it well-suited for tasks that require both comprehensive context understanding and query relevance.</abstract>
       <url hash="781649d1">2025.naacl-long.524</url>
@@ -7163,7 +7163,7 @@
       <title>A Template Is All You Meme</title>
       <author><first>Luke</first><last>Bates</last><affiliation>Technische Universität Darmstadt</affiliation></author>
       <author><first>Peter Ebert</first><last>Christensen</last></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <author><first>Iryna</first><last>Gurevych</last><affiliation>Institute for Computer Science, Artificial Intelligence and Technology, Mohamed bin Zayed University of Artificial Intelligence and Technische Universität Darmstadt</affiliation></author>
       <pages>10443-10475</pages>
       <abstract>Templatic memes, characterized by a semantic structure adaptable to the creator’s intent, represent a significant yet underexplored area within meme processing literature. With the goal of establishing a new direction for computational meme analysis, here we create a knowledge base composed of more than 5,200 meme templates, information about them, and 54,000 examples of template instances (templatic memes). To investigate the semantic signal of meme templates, we show that we can match memes in datasets to base templates contained in our knowledge base with a distance-based lookup. To demonstrate the power of meme templates, we create TSplit, a method to reorganize datasets, where a template or templatic instance can only appear in either the training or test split. Our re-split datasets enhance general meme knowledge and improve sample efficiency, leading to more robust models. Our examination of meme templates results in state-of-the-art performance for every dataset we consider, paving the way for analysis grounded in templateness.</abstract>
@@ -7209,7 +7209,7 @@
     <paper id="529">
       <title>Evaluating Defeasible Reasoning in <fixed-case>LLM</fixed-case>s with <fixed-case>DEFREASING</fixed-case></title>
       <author><first>Emily</first><last>Allaway</last><affiliation>University of Edinburgh</affiliation></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>10540-10558</pages>
       <url hash="2e1f5e08">2025.naacl-long.529</url>
       <bibkey>allaway-mckeown-2025-evaluating</bibkey>
@@ -7258,7 +7258,7 @@
       <author><first>Xunliang</first><last>Cai</last><affiliation>Meituan</affiliation></author>
       <author><first>Tao</first><last>Gui</last><affiliation>Fudan University</affiliation></author>
       <author><first>Qi</first><last>Zhang</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last><affiliation>Fudan University</affiliation></author>
       <pages>10627-10646</pages>
       <abstract>Self-improvement methods enable large language models (LLMs) to generate solutions themselves and iteratively train on filtered, high-quality rationales. This process proves effective and reduces the reliance on human supervision in LLMs’ reasoning, but the performance soon plateaus. We delve into the process and find that models tend to over-sample on easy queries and under-sample on queries they have yet to master. As iterations proceed, this imbalance in sampling is exacerbated, leading to a long-tail distribution where solutions to difficult queries almost diminish. This phenomenon limits the performance gain of self-improving models. A straightforward solution is brute-force sampling to balance the distribution, which significantly raises computational costs. In this paper, we introduce Guided Self-Improvement (GSI), a strategy aimed at improving the efficiency of sampling challenging heavy-tailed data. It leverages Socratic-style guidance signals to help LLM reasoning with complex queries, reducing the exploration effort and minimizing computational overhead. Experiments on four models across diverse mathematical tasks show that GSI strikes a balance between performance and efficiency, while also being effective on held-out tasks.</abstract>
       <url hash="45966848">2025.naacl-long.533</url>
@@ -7329,7 +7329,7 @@
     <paper id="539">
       <title>Main Predicate and Their Arguments as Explanation Signals For Intent Classification</title>
       <author><first>Sameer</first><last>Pimparkhede</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
       <pages>10778-10789</pages>
       <abstract>Intent classification is crucial for conversational agents (chatbots), and deep learning models perform well in this area. However, little research has been done on the explainability of intent classification due to the absence of suitable benchmark data. Human annotation of explanation signals in text samples is time-consuming and costly. However, from inspection of data on intent classification, we see that, more often than not, the main verb denotes the action, and the direct object indicates the domain of conversation, serving as explanation signals for intent. This observation enables us to hypothesize that the main predicate in the text utterances, along with the arguments of the main predicate, can serve as explanation signals. Leveraging this, we introduce a new technique to automatically augment text samples from intent classification datasets with word-level explanations. We mark main predicates (primarily verbs) and their arguments (dependency relations) as explanation signals in benchmark intent classification datasets ATIS and SNIPS, creating a unique 21k-instance dataset for explainability. Further, we experiment with deep learning and language models. We observe that models that work well for classification do not perform well in explainability metrics like plausibility and faithfulness. We also observe that guiding models to focus on explanation signals from our dataset during training improves the plausibility Token F1 score by 3-4%, improving the model’s reasoning.</abstract>
       <url hash="be700763">2025.naacl-long.539</url>
@@ -7473,7 +7473,7 @@
       <title>Fine-Grained Transfer Learning for Harmful Content Detection through Label-Specific Soft Prompt Tuning</title>
       <author><first>Faeze</first><last>Ghorbanpour</last></author>
       <author><first>Viktor</first><last>Hangya</last><affiliation>Fraunhofer IIS</affiliation></author>
-      <author><first>Alexander</first><last>Fraser</last><affiliation>Technical University of Munich</affiliation></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last><affiliation>Technical University of Munich</affiliation></author>
       <pages>11047-11061</pages>
       <abstract>The spread of harmful content online is a dynamic issue evolving over time. Existing detection models, reliant on static data, are becoming less effective and generalizable. Developing new models requires sufficient up-to-date data, which is challenging. A potential solution is to combine existing datasets with minimal new data. However, detection tasks vary—some focus on hate speech, offensive, or abusive content, which differ in the intent to harm, while others focus on identifying targets of harmful speech such as racism, sexism, etc.—raising the challenge of handling nuanced class differences. To address these issues, we introduce a novel transfer learning method that leverages class-specific knowledge to enhance harmful content detection. In our approach, we first present label-specific soft prompt tuning, which captures and represents class-level information. Secondly, we propose two approaches to transfer this fine-grained knowledge from source (existing tasks) to target (unseen and new tasks): initializing the target task prompts from source prompts and using an attention mechanism that learns and adjusts attention scores to utilize the most relevant information from source prompts. Experiments demonstrate significant improvements in harmful content detection across English and German datasets, highlighting the effectiveness of label-specific representations and knowledge transfer.</abstract>
       <url hash="5a217457">2025.naacl-long.551</url>
@@ -7517,7 +7517,7 @@
       <title>Latent Factor Models Meets Instructions: Goal-conditioned Latent Factor Discovery without Task Supervision</title>
       <author><first>Zhouhang</first><last>Xie</last><affiliation>University of California, San Diego</affiliation></author>
       <author><first>Tushar</first><last>Khot</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
-      <author><first>Bhavana</first><last>Dalvi Mishra</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
+      <author id="bhavana-dalvi"><first>Bhavana</first><last>Dalvi Mishra</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Harshit</first><last>Surana</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Julian</first><last>McAuley</last><affiliation>University of California, San Diego, University of California, San Diego</affiliation></author>
       <author><first>Peter</first><last>Clark</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
@@ -7547,7 +7547,7 @@
       <author><first>Jena D.</first><last>Hwang</last><affiliation>Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Xiang</first><last>Ren</last><affiliation>University of Southern California</affiliation></author>
       <author><first>Nouha</first><last>Dziri</last></author>
-      <author><first>Dan</first><last>Jurafsky</last><affiliation>Stanford University</affiliation></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last><affiliation>Stanford University</affiliation></author>
       <author><first>Maarten</first><last>Sap</last><affiliation>Carnegie Mellon University</affiliation></author>
       <pages>11148-11167</pages>
       <abstract>The ability to communicate uncertainty and knowledge limitations is crucial for the safety of large language models (LLMs). Current evaluations of these abilities typically examine the correspondence between model accuracy and its internal probabilities or linguistic outputs. However, evaluation of the uncertainty of LLM communication should also focus on the behaviors of their human interlocutors: how much do users rely on what the LLM says? We introduce an interaction-centered evaluation approach called Rel-A.I. (pronounced “rely”) that quantifies whether and how humans rely on LLMs’ responses, complementing existing calibration evaluations. Through nine user studies with 450 participants, we investigate three crucial aspects that influence user reliance. We show that emphatic expressions of politeness (e.g., “I’m happy to help!”) that precede LLM answers will cause participants to perceive these models as more competent, and in turn, rely 30% more on their generations. Additionally, the context of the interaction, such as the knowledge domain and nature of previous interactions with the LLM, substantially influences user reliance (e.g., users will rely 10% more on LLMs when responding to questions involving calculations). Our results show that calibration and language quality alone are insufficient in informing which LLMs are safely calibrated, and illustrate the need to consider features of the interactional context.</abstract>
@@ -7559,7 +7559,7 @@
       <title>Eliciting Critical Reasoning in Retrieval-Augmented Generation via Contrastive Explanations</title>
       <author><first>Leonardo</first><last>Ranaldi</last></author>
       <author><first>Marco</first><last>Valentino</last><affiliation>University of Sheffield</affiliation></author>
-      <author><first>Andre</first><last>Freitas</last><affiliation>Idiap Research Institute and University of Manchester</affiliation></author>
+      <author id="andre-freitas"><first>Andre</first><last>Freitas</last><affiliation>Idiap Research Institute and University of Manchester</affiliation></author>
       <pages>11168-11183</pages>
       <abstract>Retrieval-augmented generation (RAG) have emerged as a critical mechanism in contemporary NLP to support Large Language Models (LLMs) in systematically accessing richer factual context. However, the integration of RAG mechanisms bring its inherent challenges, as LLMs need to integrate potentially noisy contexts. Recent studies have shown that LLMs still struggle to critically analyse RAG-based in-context information, a limitation that may lead to incorrect inferences and hallucinations. In this paper, we investigate how to elicit critical arguments in RAG via contrastive explanations. In particular, we propose Contrastive-RAG (CRAG), a framework that (i) retrieves relevant documents given a query,(ii) selects and exemplifies relevant passages, and (iii) generates explanations that explicitly contrast the relevance of the passages to (iv) support the final answer. We show the impact of C-RAG building contrastive reasoning demonstrations from LLMs to instruct smaller models for retrieval-augmented tasks. Extensive experiments demonstrate that CRAG improves state-of-the-art RAG models while (a) requiring significantly fewer prompts and demonstrations and (b) being robust to perturbations in the retrieved documents.</abstract>
       <url hash="0f4dca48">2025.naacl-long.557</url>
@@ -7668,7 +7668,7 @@
       <title><fixed-case>M</fixed-case>ulti<fixed-case>C</fixed-case>hart<fixed-case>QA</fixed-case>: Benchmarking Vision-Language Models on Multi-Chart Problems</title>
       <author><first>Zifeng</first><last>Zhu</last></author>
       <author><first>Mengzhao</first><last>Jia</last></author>
-      <author id="zhihan-zhang"><first>Zhihan</first><last>Zhang</last></author>
+      <author><first>Zhihan</first><last>Zhang</last></author>
       <author><first>Lang</first><last>Li</last></author>
       <author><first>Meng</first><last>Jiang</last><affiliation>University of Notre Dame</affiliation></author>
       <pages>11341-11359</pages>
@@ -7680,7 +7680,7 @@
     <paper id="567">
       <title>It Is Not Only the Negative that Deserves Attention! Understanding, Generation &amp; Evaluation of (Positive) Moderation</title>
       <author><first>Iman</first><last>Jundi</last></author>
-      <author><first>Eva Maria</first><last>Vecchi</last><affiliation>University of Stuttgart, Universität Stuttgart</affiliation></author>
+      <author id="eva-maria-vecchi"><first>Eva Maria</first><last>Vecchi</last><affiliation>University of Stuttgart, Universität Stuttgart</affiliation></author>
       <author><first>Carlotta</first><last>Quensel</last></author>
       <author><first>Neele</first><last>Falk</last></author>
       <author><first>Gabriella</first><last>Lapesa</last><affiliation>GESIS – Leibniz Institute for the Social Sciences and Heinrich-Heine University Düsseldorf</affiliation></author>
@@ -7696,8 +7696,8 @@
       <author><first>Khushang</first><last>Zaveri</last></author>
       <author><first>Shreya</first><last>Havaldar</last><affiliation>University of Pennsylvania</affiliation></author>
       <author><first>Soumna</first><last>Nema</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
-      <author><first>Sharath Chandra</first><last>Guntuku</last><affiliation>University of Pennsylvania</affiliation></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
+      <author id="sharath-chandra-guntuku"><first>Sharath Chandra</first><last>Guntuku</last><affiliation>University of Pennsylvania</affiliation></author>
       <pages>11396-11415</pages>
       <abstract>Shame and pride are social emotions expressed across cultures to motivate and regulate people’s thoughts, feelings, and behaviors. In this paper, we introduce the first cross-cultural dataset of over 10k shame/pride-related expressions with underlying social expectations from ~5.4K Bollywood and Hollywood movies. We examine *how* and *why* shame and pride are expressed across cultures using a blend of psychology-informed language analysis combined with large language models. We find significant cross-cultural differences in shame and pride expression aligning with known cultural tendencies of the USA and India – e.g., in Hollywood, shame-expressions predominantly discuss *self* whereas shame is expressed toward *others* in Bollywood. Women are more sanctioned across cultures and for violating similar social expectations.</abstract>
       <url hash="6cf10788">2025.naacl-long.568</url>
@@ -7722,7 +7722,7 @@
     </paper>
     <paper id="570">
       <title>m<fixed-case>H</fixed-case>uman<fixed-case>E</fixed-case>val - A Multilingual Benchmark to Evaluate Large Language Models for Code Generation</title>
-      <author><first>Nishat</first><last>Raihan</last></author>
+      <author id="nishat-raihan"><first>Nishat</first><last>Raihan</last></author>
       <author><first>Antonios</first><last>Anastasopoulos</last></author>
       <author><first>Marcos</first><last>Zampieri</last></author>
       <pages>11432-11461</pages>
@@ -7765,7 +7765,7 @@
       <author><first>Yundi</first><last>Qian</last><affiliation>Facebook</affiliation></author>
       <author><first>Xuewei</first><last>Wang</last><affiliation>Facebook</affiliation></author>
       <author><first>Suchin</first><last>Gururangan</last><affiliation>Facebook and University of Washington, Seattle</affiliation></author>
-      <author><first>Chao</first><last>Zhang</last><affiliation>Georgia Institute of Technology</affiliation></author>
+      <author id="chao-zhang-tu"><first>Chao</first><last>Zhang</last><affiliation>Georgia Institute of Technology</affiliation></author>
       <author><first>Melanie</first><last>Kambadur</last><affiliation>Facebook</affiliation></author>
       <author><first>Dhruv</first><last>Mahajan</last><affiliation>Meta AI</affiliation></author>
       <author><first>Rui</first><last>Hou</last><affiliation>Meta Inc.</affiliation></author>
@@ -7877,7 +7877,7 @@
       <author><first>Ariadni</first><last>Christopoulou</last><affiliation>Verita International School</affiliation></author>
       <author><first>Filippos</first><last>Stamatiou</last><affiliation>Copenhagen University and University of Stellenbosch</affiliation></author>
       <author><first>Anne</first><last>Lauscher</last><affiliation>Universität Hamburg</affiliation></author>
-      <author><first>Anders</first><last>Søgaard</last><affiliation>Copenhagen University</affiliation></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last><affiliation>Copenhagen University</affiliation></author>
       <pages>11618-11635</pages>
       <abstract>What ethical concerns, if any, do LLM researchers have? We introduce EthiCon, a corpus of 1,580 ethical concern statements extracted from scientific papers published in the ACL Anthology. We extract ethical concern keywords from the statements and show promising results in automating the concern identification process. Through a survey (<tex-math>N=200</tex-math>), we compare the ethical concerns of the corpus to the concerns listed by the general public and professionals in the field. Finally, we compare our retrieved ethical concerns with existing taxonomies and guidelines pointing to gaps and actionable insights.</abstract>
       <url hash="d859dad6">2025.naacl-long.580</url>
@@ -7954,7 +7954,7 @@
       <title>Is In-Context Learning a Type of Error-Driven Learning? Evidence from the Inverse Frequency Effect in Structural Priming</title>
       <author><first>Zhenghao</first><last>Zhou</last><affiliation>Yale University</affiliation></author>
       <author><first>Robert</first><last>Frank</last><affiliation>Yale University</affiliation></author>
-      <author><first>R. Thomas</first><last>McCoy</last><affiliation>Yale University</affiliation></author>
+      <author id="r-thomas-mccoy"><first>R. Thomas</first><last>McCoy</last><affiliation>Yale University</affiliation></author>
       <pages>11712-11725</pages>
       <abstract>Large language models (LLMs) have shown the emergent capability of in-context learning (ICL). One line of research has claimed that ICL is functionally equivalent to gradient descent, a type of error-driven learning mechanism. In this paper, we introduce a new way of diagnosing whether ICL is functionally performing error-driven learning. Our approach is based on the inverse frequency effect (IFE)—a phenomenon in which an agent’s behavior is influenced to a greater degree when presented with improbable examples as compared to more likely ones. The IFE has previously been identified in psycholinguistics where humans exhibit the IFE in the context of structural priming (the tendency for people to produce sentence structures they have encountered recently). In that context, the IFE has been used as evidence that human structural priming must involve error-driven learning mechanisms. In our experiments, we simulated structural priming with ICL and found that LLMs indeed display the IFE, with the effect being stronger in larger models. We conclude that at least in the case we studied, ICL is indeed a type of error-driven learning, supporting the hypothesis that an error signal is implicitly computed in the forward pass during ICL. Our results suggest that both humans and LLMs make use of error-driven processing mechanisms in on-line processing.</abstract>
       <url hash="2ed4f05a">2025.naacl-long.586</url>
@@ -8143,7 +8143,7 @@
       <author><first>Khyati</first><last>Mahajan</last><affiliation>ServiceNow Inc</affiliation></author>
       <author><first>Vikas</first><last>Yadav</last></author>
       <author><first>Julian</first><last>Salazar</last><affiliation>Google DeepMind</affiliation></author>
-      <author><first>Philip S.</first><last>Yu</last><affiliation>University of Illinois, Chicago</affiliation></author>
+      <author id="philip-s-yu"><first>Philip S.</first><last>Yu</last><affiliation>University of Illinois, Chicago</affiliation></author>
       <author><first>Masoud</first><last>Hashemi</last><affiliation>ServiceNow Inc</affiliation></author>
       <author><first>Rishabh</first><last>Maheshwary</last><affiliation>ServiceNow</affiliation></author>
       <pages>11975-11994</pages>
@@ -8157,7 +8157,7 @@
       <author><first>Margaret</first><last>Mitchell</last></author>
       <author><first>Giuseppe</first><last>Attanasio</last><affiliation>Instituto de Telecomunicações</affiliation></author>
       <author><first>Ioana</first><last>Baldini</last><affiliation>Bloomberg</affiliation></author>
-      <author><first>Miruna</first><last>Clinciu</last></author>
+      <author id="miruna-clinciu"><first>Miruna</first><last>Clinciu</last></author>
       <author><first>Jordan</first><last>Clive</last><affiliation>Chattermill</affiliation></author>
       <author><first>Pieter</first><last>Delobelle</last></author>
       <author><first>Manan</first><last>Dey</last><affiliation>SalesForce.com</affiliation></author>
@@ -8176,7 +8176,7 @@
       <author><first>Nikita</first><last>Nangia</last></author>
       <author><first>Anaelia</first><last>Ovalle</last><affiliation>University of California, Los Angeles</affiliation></author>
       <author><first>Giada</first><last>Pistilli</last><affiliation>Sorbonne University</affiliation></author>
-      <author><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last><affiliation>Yale University</affiliation></author>
       <author><first>Beatrice</first><last>Savoldi</last></author>
       <author><first>Vipul</first><last>Raheja</last><affiliation>Columbia University, Grammarly and International Institute of Information Technology Hyderabad</affiliation></author>
       <author><first>Jeremy</first><last>Qin</last><affiliation>Université de Montréal</affiliation></author>
@@ -8204,10 +8204,10 @@
       <author><first>Marcelo</first><last>Viridiano</last></author>
       <author><first>Oskar</first><last>Van Der Wal</last></author>
       <author><first>Adina</first><last>Yakefu</last></author>
-      <author><first>Aurélie</first><last>Névéol</last><affiliation>LISN-CNRS / Université Paris Saclay</affiliation></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last><affiliation>LISN-CNRS / Université Paris Saclay</affiliation></author>
       <author><first>Mike</first><last>Zhang</last></author>
       <author><first>Sydney</first><last>Zink</last><affiliation>KBR</affiliation></author>
-      <author><first>Zeerak</first><last>Talat</last><affiliation>University of Edinburgh, University of Edinburgh</affiliation></author>
+      <author id="zeerak-talat"><first>Zeerak</first><last>Talat</last><affiliation>University of Edinburgh, University of Edinburgh</affiliation></author>
       <pages>11995-12041</pages>
       <abstract>Large Language Models (LLMs) reproduce and exacerbate the social biases present in their training data, and resources to quantify this issue are limited. While research has attempted to identify and mitigate such biases, most efforts have been concentrated around English, lagging the rapid advancement of LLMs in multilingual settings. In this paper, we introduce a new multilingual parallel dataset SHADES to help address this issue, designed for examining culturally-specific stereotypes that may be learned by LLMs. The dataset includes stereotypes from 20 regions around the world and 16 languages, spanning multiple identity categories subject to discrimination worldwide. We demonstrate its utility in a series of exploratory evaluations for both “base” and “instruction-tuned” language models. Our results suggest that stereotypes are consistently reflected across models and languages, with some languages and models indicating much stronger stereotype biases than others.</abstract>
       <url hash="da5edc9d">2025.naacl-long.600</url>
@@ -8289,7 +8289,7 @@
       <author><first>Emily</first><last>Xiao</last></author>
       <author><first>Uri</first><last>Alon</last><affiliation>Google DeepMind</affiliation></author>
       <author><first>Jonathan</first><last>Berant</last><affiliation>Google and Tel Aviv University</affiliation></author>
-      <author><first>Matthew R.</first><last>Gormley</last><affiliation>Solventum and School of Computer Science, Carnegie Mellon University</affiliation></author>
+      <author id="matthew-r-gormley"><first>Matthew R.</first><last>Gormley</last><affiliation>Solventum and School of Computer Science, Carnegie Mellon University</affiliation></author>
       <author><first>Graham</first><last>Neubig</last><affiliation>Carnegie Mellon University</affiliation></author>
       <pages>12119-12149</pages>
       <abstract>As model context lengths continue to increase, the number of demonstrations that can be provided in-context approaches the size of entire training datasets. We study the behavior of in-context learning (ICL) at this extreme scale on multiple datasets and models. We show that, for many datasets with large label spaces, performance continues to increase with thousands of demonstrations. We contrast this with example retrieval and finetuning: example retrieval shows excellent performance at low context lengths but has diminished gains with more demonstrations; finetuning is more data hungry than ICL but can exceed long-context ICL performance with additional data. We use the ICL setting to study several properties of both in-context learning and long-context models. We show that long-context ICL is less sensitive to random input shuffling than short-context ICL, that grouping of same-label examples negatively impacts performance, and that the performance boosts do not arise from cumulative gain from encoding many examples together. We conclude that long-context ICL can be an effective tool, and may not require long-context attention for encoding the demonstration set at all.</abstract>
@@ -8325,7 +8325,7 @@
     <paper id="608">
       <title>Automatically Discovering How Misogyny is Framed on Social Media</title>
       <author><first>Rakshitha Rao</first><last>Ailneni</last><affiliation>University of Texas at Dallas</affiliation></author>
-      <author><first>Sanda M.</first><last>Harabagiu</last><affiliation>University of Texas at Dallas</affiliation></author>
+      <author id="sanda-harabagiu"><first>Sanda M.</first><last>Harabagiu</last><affiliation>University of Texas at Dallas</affiliation></author>
       <pages>12189-12208</pages>
       <abstract>Misogyny, which is widespread on social media, can be identified not only by recognizing its many forms but also by discovering how misogyny is framed. This paper considers the automatic discovery of misogyny problems and their frames through the Dis-MP&amp;F method, which enables the generation of a data-driven, rich Taxonomy of Misogyny (ToM), offering new insights in the complexity of expressions of misogyny. Furthermore, the Dis-MP&amp;F method, informed by the ToM, is capable of producing very promising results on a misogyny benchmark dataset.</abstract>
       <url hash="1b36de0b">2025.naacl-long.608</url>
@@ -8356,11 +8356,11 @@
       <title><fixed-case>R</fixed-case>e<fixed-case>IFE</fixed-case>: Re-evaluating Instruction-Following Evaluation</title>
       <author><first>Yixin</first><last>Liu</last><affiliation>Yale University</affiliation></author>
       <author><first>Kejian</first><last>Shi</last></author>
-      <author><first>Alexander</first><last>Fabbri</last><affiliation>SalesForce.com</affiliation></author>
+      <author id="alexander-richard-fabbri"><first>Alexander</first><last>Fabbri</last><affiliation>SalesForce.com</affiliation></author>
       <author><first>Yilun</first><last>Zhao</last><affiliation>Yale University</affiliation></author>
       <author><first>PeiFeng</first><last>Wang</last><affiliation>Salesforce AI</affiliation></author>
       <author><first>Chien-Sheng</first><last>Wu</last><affiliation>Salesforce AI</affiliation></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>SalesForce.com and Nanyang Technological University</affiliation></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>SalesForce.com and Nanyang Technological University</affiliation></author>
       <author><first>Arman</first><last>Cohan</last><affiliation>Yale University and Allen Institute for Artificial Intelligence</affiliation></author>
       <pages>12247-12287</pages>
       <abstract>The automatic evaluation of instruction following typically involves using large language models (LLMs) to assess response quality. However, there is a lack of comprehensive evaluation of these LLM-based evaluators across two dimensions: the base LLMs and the evaluation protocols. Therefore, we present a thorough meta-evaluation of instruction following, including 25 base LLMs and 15 recently proposed evaluation protocols, on 4 human-annotated datasets, assessing the evaluation accuracy of the LLM-evaluators. Our evaluation allows us to identify the best-performing base LLMs and evaluation protocols with a high degree of robustness. Moreover, our evaluation reveals key findings: (1) Base LLM performance ranking remains largely consistent across evaluation protocols, with less capable LLMs showing greater improvement from protocol enhancements; (2) Robust evaluation of evaluation protocols requires many base LLMs with varying capability levels, as protocol effectiveness depends on the base LLM used; (3) Evaluation results on different datasets are not always consistent, so a rigorous evaluation requires multiple datasets with distinctive features. We release our meta-evaluation suite ReIFE, which provides the codebase and evaluation result collection for over 500 LLM-evaluators, laying groundwork for future research in instruction-following evaluation.</abstract>
@@ -8420,7 +8420,7 @@
     <paper id="615">
       <title><fixed-case>MICE</fixed-case> for <fixed-case>CAT</fixed-case>s: Model-Internal Confidence Estimation for Calibrating Agents with Tools</title>
       <author><first>Nishant</first><last>Subramani</last><affiliation>Carnegie Mellon University</affiliation></author>
-      <author><first>Jason</first><last>Eisner</last><affiliation>Microsoft and Johns Hopkins University</affiliation></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last><affiliation>Microsoft and Johns Hopkins University</affiliation></author>
       <author><first>Justin</first><last>Svegliato</last><affiliation>University of California, Berkeley and Microsoft</affiliation></author>
       <author><first>Benjamin</first><last>Van Durme</last><affiliation>Microsoft and Johns Hopkins University</affiliation></author>
       <author><first>Yu</first><last>Su</last><affiliation>Ohio State University</affiliation></author>
@@ -8474,7 +8474,7 @@
       <author><first>Sreyan</first><last>Ghosh</last></author>
       <author><first>Utkarsh</first><last>Tyagi</last></author>
       <author><first>Anton Jeran</first><last>Ratnarajah</last></author>
-      <author><first>Chandra Kiran Reddy</first><last>Evuru</last></author>
+      <author id="chandra-kiran-reddy-evuru"><first>Chandra Kiran Reddy</first><last>Evuru</last></author>
       <author><first>Ramani</first><last>Duraiswami</last><affiliation>University of Maryland, College Park</affiliation></author>
       <author><first>Dinesh</first><last>Manocha</last><affiliation>University of Maryland, College Park</affiliation></author>
       <pages>12470-12483</pages>
@@ -8499,7 +8499,7 @@
       <author><first>Mark</first><last>Johnson</last><affiliation>Macquarie University</affiliation></author>
       <author><first>Krishnaram</first><last>Kenthapadi</last><affiliation>Oracle Health AI</affiliation></author>
       <author><first>Don</first><last>Dharmasiri</last><affiliation>Oracle</affiliation></author>
-      <author><first>Long</first><last>Duong</last><affiliation>Oracle</affiliation></author>
+      <author id="long-duong"><first>Long</first><last>Duong</last><affiliation>Oracle</affiliation></author>
       <author><first>Yuan-Fang</first><last>Li</last><affiliation>Monash University and Oracle</affiliation></author>
       <pages>12484-12500</pages>
       <abstract>Large language models (LLMs) have shown impressive performance in <i>code</i> understanding and generation, making coding tasks a key focus for researchers due to their practical applications and value as a testbed for LLM evaluation. Data synthesis and filtering techniques have been widely adopted and shown to be highly effective in this context. In this paper, we present a focused survey and taxonomy of these techniques, emphasizing recent advancements. We highlight key challenges, explore future research directions, and offer practical guidance for new researchers entering the field.</abstract>
@@ -8511,7 +8511,7 @@
       <title><fixed-case>P</fixed-case>ara<fixed-case>ICL</fixed-case>: Towards Parallel In-Context Learning</title>
       <author><first>Xingxuan</first><last>Li</last></author>
       <author><first>Xuan-Phi</first><last>Nguyen</last><affiliation>SalesForce.com</affiliation></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>SalesForce.com and Nanyang Technological University</affiliation></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>SalesForce.com and Nanyang Technological University</affiliation></author>
       <author><first>Lidong</first><last>Bing</last><affiliation>Shanda Group and Alibaba Group</affiliation></author>
       <pages>12501-12511</pages>
       <abstract>Large language models (LLMs) have become the norm in natural language processing (NLP), excelling in few-shot in-context learning (ICL) with their remarkable abilities. Nonetheless, the success of ICL largely hinges on the choice of few-shot demonstration examples, making the selection process increasingly crucial. Existing methods have delved into optimizing the quantity and semantic similarity of these examples to improve ICL performances. However, our preliminary experiments indicate that the effectiveness of ICL is limited by the length of the input context. Moreover, varying combinations of few-shot demonstration examples can significantly boost accuracy across different test samples. To address this, we propose a novel method named parallel in-context learning (ParaICL) that effectively utilizes all demonstration examples without exceeding the manageable input context length. ParaICL employs parallel batching to distribute demonstration examples into different batches according to the semantic similarities of the questions in the demonstrations to the test question. It then computes normalized batch semantic scores for each batch. A weighted average semantic objective, constrained by adaptive plausibility, is applied to select the most appropriate tokens. Through extensive experiments, we validate the effectiveness of ParaICL and conduct ablation studies to underscore its design rationale. We further demonstrate that ParaICL can seamlessly integrate with existing methods.</abstract>
@@ -8613,7 +8613,7 @@
       <title>Pipeline Analysis for Developing Instruct <fixed-case>LLM</fixed-case>s in Low-Resource Languages: A Case Study on <fixed-case>B</fixed-case>asque</title>
       <author><first>Ander</first><last>Corral</last><affiliation>Orai NLP Technologies</affiliation></author>
       <author><first>Ixak Sarasua</first><last>Antero</last><affiliation>Universidad del País Vasco</affiliation></author>
-      <author><first>Xabier</first><last>Saralegi</last></author>
+      <author id="xabier-saralegi"><first>Xabier</first><last>Saralegi</last></author>
       <pages>12636-12655</pages>
       <abstract>Large language models (LLMs) are typically optimized for resource-rich languages like English, exacerbating the gap between high-resource and underrepresented languages. This work presents a detailed analysis of strategies for developing a model capable of following instructions in a low-resource language, specifically Basque, by focusing on three key stages: pre-training, instruction tuning, and alignment with human preferences. Our findings demonstrate that continual pre-training with a high-quality Basque corpus of around 600 million words improves natural language understanding (NLU) of the foundational model by over 12 points. Moreover, instruction tuning and human preference alignment using automatically translated datasets proved highly effective, resulting in a 24-point improvement in instruction-following performance. The resulting models, Llama-eus-8B and Llama-eus-8B-instruct, establish a new state-of-the-art for Basque in the sub-10B parameter category.</abstract>
       <url hash="2a68ad23">2025.naacl-long.629</url>
@@ -8651,7 +8651,7 @@
       <author><first>Paramita</first><last>Koley</last><affiliation>Indian Institute of Technology Kharagpur, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
       <author><first>Janardan</first><last>Misra</last></author>
       <author><first>Niloy</first><last>Ganguly</last><affiliation>Indian Institute of Technology Kharagpur,</affiliation></author>
-      <author id="saptarshi-ghosh"><first>Saptarshi</first><last>Ghosh</last><affiliation>Indian Institute of Technology Kharagpur</affiliation></author>
+      <author><first>Saptarshi</first><last>Ghosh</last><affiliation>Indian Institute of Technology Kharagpur</affiliation></author>
       <pages>12688-12704</pages>
       <abstract>Large language models (LLMs) are increasingly recognized for their exceptional generative capabilities and versatility across various tasks. However, the high inference costs associated with these models have not received adequate attention, particularly when compared to the focus on training costs in existing research. In response to this gap, our study conducts a comprehensive benchmarking of LLM inference energy across a wide range of NLP tasks, where we analyze the impact of different models, tasks, prompts, and system-related factors on inference energy. Specifically, our experiments reveal several interesting insights, including strong correlation of inference energy with output token length and response time. Also, we find that quantization and optimal batch sizes, along with targeted prompt phrases, can significantly reduce energy usage. This study is the first to thoroughly benchmark LLM inference across such a diverse range of aspects, providing insights and offering several recommendations for improving energy efficiency in model deployment.</abstract>
       <url hash="1d94559d">2025.naacl-long.632</url>
@@ -8853,7 +8853,7 @@
       <author><first>Gleb</first><last>Kuzmin</last><affiliation>Artificial Intelligence Research Institute and Institute for Systems Analysis of Russian Academy of Sciences</affiliation></author>
       <author><first>Neemesh</first><last>Yadav</last><affiliation>Singapore Management University</affiliation></author>
       <author><first>Ivan</first><last>Smirnov</last></author>
-      <author><first>Timothy</first><last>Baldwin</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and The University of Melbourne</affiliation></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and The University of Melbourne</affiliation></author>
       <author><first>Artem</first><last>Shelmanov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <pages>95-107</pages>
       <abstract>We propose selective debiasing – an inference-time safety mechanism designed to enhance the overall model quality in terms of prediction performance and fairness, especially in scenarios where retraining the model is impractical. The method draws inspiration from selective classification, where at inference time, predictions with low quality, as indicated by their uncertainty scores, are discarded. In our approach, we identify the potentially biased model predictions and, instead of discarding them, we remove bias from these predictions using LEACE – a post-processing debiasing method. To select problematic predictions, we propose a bias quantification approach based on KL divergence, which achieves better results than standard uncertainty quantification methods. Experiments on text classification datasets with encoder-based classification models demonstrate that selective debiasing helps to reduce the performance gap between post-processing methods and debiasing techniques from the at-training and pre-processing categories.</abstract>
@@ -8885,7 +8885,7 @@
       <author><first>Yebowen</first><last>Hu</last><affiliation>University of Central Florida</affiliation></author>
       <author><first>Hassan</first><last>Foroosh</last><affiliation>University of Central Florida</affiliation></author>
       <author><first>Wei</first><last>Jin</last><affiliation>Emory University</affiliation></author>
-      <author id="fei-liu"><first>Fei</first><last>Liu</last><affiliation>Emory University</affiliation></author>
+      <author><first>Fei</first><last>Liu</last><affiliation>Emory University</affiliation></author>
       <pages>131-141</pages>
       <abstract>Countless decisions shape our lives, and it is crucial to understand the how and why behind them. In this paper, we introduce a new LLM decision-making framework called STRUX, which enhances LLM decision-making by providing structured explanations. These include favorable and adverse facts related to the decision, along with their respective strengths. STRUX begins by distilling lengthy information into a concise table of key facts. It then employs a series of self-reflection steps to determine which of these facts are pivotal, categorizing them as either favorable or adverse in relation to a specific decision. Lastly, we fine-tune an LLM to identify and prioritize these key facts to optimize decision-making. STRUX has been evaluated on the challenging task of forecasting stock investment decisions based on earnings call transcripts and demonstrated superior performance against strong baselines. It enhances decision transparency by allowing users to understand the impact of different factors, representing a meaningful step towards practical decision-making with LLMs.</abstract>
       <url hash="dd9618ae">2025.naacl-short.11</url>
@@ -8995,7 +8995,7 @@
       <author><first>Tianqing</first><last>Fang</last><affiliation>Tencent AI Lab</affiliation></author>
       <author><first>Zhaowei</first><last>Wang</last><affiliation>Edinburgh University, University of Edinburgh and Department of Computer Science and Engineering, Hong Kong University of Science and Technology</affiliation></author>
       <author><first>Yangqiu</first><last>Song</last><affiliation>Hong Kong University of Science and Technology</affiliation></author>
-      <author><first>Mark</first><last>Steedman</last><affiliation>University of Edinburgh</affiliation></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last><affiliation>University of Edinburgh</affiliation></author>
       <pages>229-243</pages>
       <abstract>While Large Language Models (LLMs) have showcased remarkable proficiency in reasoning, there is still a concern about hallucinations and unreliable reasoning issues due to semantic associations and superficial logical chains. To evaluate the extent to which LLMs perform robust reasoning instead of relying on superficial logical chains, we propose a new evaluation dataset, the Concept-Reversed Winograd Schema Challenge (CR-WSC), based on the famous Winograd Schema Challenge (WSC) dataset. By simply reversing the concepts to those that are more associated with the wrong answer, we find that the performance of LLMs drops significantly despite the rationale of reasoning remaining the same. Furthermore, we propose Abstraction-of-Thought (AoT), a novel prompt method for recovering adversarial cases to normal cases using conceptual abstraction to improve LLMs’ robustness and consistency in reasoning, as demonstrated by experiments on CR-WSC.</abstract>
       <url hash="61910dd0">2025.naacl-short.20</url>
@@ -9028,7 +9028,7 @@
     <paper id="23">
       <title><fixed-case>C</fixed-case>o<fixed-case>RAG</fixed-case>: Collaborative Retrieval-Augmented Generation</title>
       <author><first>Aashiq</first><last>Muhamed</last></author>
-      <author><first>Mona T.</first><last>Diab</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="mona-diab"><first>Mona T.</first><last>Diab</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Virginia</first><last>Smith</last><affiliation>Carnegie Mellon University</affiliation></author>
       <pages>265-276</pages>
       <abstract>Retrieval-Augmented Generation (RAG) models excel in knowledge-intensive tasks, especially under few-shot learning constraints. We introduce CoRAG, a framework extending RAG to collaborative settings, where clients jointly train a shared model using a collaborative passage store. To evaluate CoRAG, we introduce CRAB, a benchmark for collaborative homogeneous open-domain question answering. Our experiments demonstrate that CoRAG consistently outperforms both parametric collaborative learning methods and locally trained RAG models in low-resource scenarios. Further analysis reveals the critical importance of relevant passages within the shared store, the surprising benefits of incorporating irrelevant passages, and the potential for hard negatives to negatively impact performance. This introduces a novel consideration in collaborative RAG: the trade-off between leveraging a collectively enriched knowledge base and the potential risk of incorporating detrimental passages from other clients. Our findings underscore the viability of CoRAG, while also highlighting key design challenges and promising avenues for future research.</abstract>
@@ -9104,7 +9104,7 @@
       <author><first>Tsung-che</first><last>Li</last><affiliation>, Academia Sinica</affiliation></author>
       <author><first>Ho Yin Sam</first><last>Ng</last><affiliation>Pennsylvania State University</affiliation></author>
       <author><first>Hen-Hsen</first><last>Huang</last><affiliation>Institute of Information Science, Academia Sinica</affiliation></author>
-      <author><first>Ting-Hao Kenneth</first><last>Huang</last><affiliation>Pennsylvania State University</affiliation></author>
+      <author id="ting-hao-huang"><first>Ting-Hao Kenneth</first><last>Huang</last><affiliation>Pennsylvania State University</affiliation></author>
       <pages>342-355</pages>
       <abstract>A language can have different varieties. These varieties can affect the performance of natural language processing (NLP) models, including large language models (LLMs), which are often trained on data from widely spoken varieties. This paper introduces a novel and cost-effective approach to benchmark model performance across language varieties. We argue that international online review platforms,such as Booking.com, can serve as effective data sources for constructing datasets that capture comments in different language varieties from similar real-world scenarios, like reviews for the same hotel with the same rating using the same language (e.g., Mandarin Chinese) but different language varieties (e.g., Taiwan Mandarin, Mainland Mandarin). To prove this concept, we constructed a contextually aligned dataset comprising reviews in Taiwan Mandarin and Mainland Mandarin and tested six LLMs in a sentiment analysis task. Our results show that LLMs consistently underperform in Taiwan Mandarin.</abstract>
       <url hash="c81df266">2025.naacl-short.29</url>
@@ -9130,7 +9130,7 @@
       <author><first>Shaoling</first><last>Jing</last></author>
       <author><first>Jie</first><last>Zhang</last><affiliation>Institute of Computing Technology, Chinese Academy of Sciences</affiliation></author>
       <author><first>Huawei</first><last>Shen</last><affiliation>Institute of Computing Technology, Chinese Academy of Sciences</affiliation></author>
-      <author><first>Xueqi</first><last>Cheng</last><affiliation>Institute of Computing Technology, Chinese Academy</affiliation></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last><affiliation>Institute of Computing Technology, Chinese Academy</affiliation></author>
       <pages>363-373</pages>
       <url hash="fc9b036a">2025.naacl-short.31</url>
       <bibkey>duan-etal-2025-related</bibkey>
@@ -9187,7 +9187,7 @@
       <author><first>Haotian</first><last>Ye</last><affiliation>Center for Information and Language Processing</affiliation></author>
       <author><first>Renhao</first><last>Pei</last></author>
       <author><first>Ehsaneddin</first><last>Asgari</last><affiliation>Qatar Computing Research Institute and University of California, Berkeley</affiliation></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <pages>414-439</pages>
       <abstract>While broad-coverage multilingual natural language processing tools have been developed, a significant portion of the world’s over 7000 languages are still neglected. One reason is the lack of evaluation datasets that cover a diverse range of languages, particularly those that are low-resource or endangered. To address this gap, we present a large-scale text classification dataset encompassing 1504 languages many of which have otherwise limited or no annotated data. This dataset is constructed using parallel translations of the Bible. We develop relevant topics, annotate the English data through crowdsourcing and project these annotations onto other languages via aligned verses. We benchmark a range of existing multilingual models on this dataset. We make our dataset and code available to the public.</abstract>
       <url hash="9d8fb766">2025.naacl-short.36</url>
@@ -9209,7 +9209,7 @@
     </paper>
     <paper id="38">
       <title><fixed-case>F</fixed-case>aith<fixed-case>B</fixed-case>ench: A Diverse Hallucination Benchmark for Summarization by <fixed-case>M</fixed-case>odern <fixed-case>LLM</fixed-case>s</title>
-      <author><first>Forrest Sheng</first><last>Bao</last><affiliation>Vectara, Inc.</affiliation></author>
+      <author id="forrest-bao"><first>Forrest Sheng</first><last>Bao</last><affiliation>Vectara, Inc.</affiliation></author>
       <author><first>Miaoran</first><last>Li</last><affiliation>Iowa State University</affiliation></author>
       <author><first>Renyi</first><last>Qu</last><affiliation>Vectara</affiliation></author>
       <author><first>Ge</first><last>Luo</last><affiliation>Vectara Inc.</affiliation></author>
@@ -9486,7 +9486,7 @@
     <paper id="60">
       <title>Scaling Graph-Based Dependency Parsing with Arc Vectorization and Attention-Based Refinement</title>
       <author><first>Nicolas</first><last>Floquet</last></author>
-      <author><first>Joseph Le</first><last>Roux</last><affiliation>Université Paris 13</affiliation></author>
+      <author id="joseph-le-roux"><first>Joseph Le</first><last>Roux</last><affiliation>Université Paris 13</affiliation></author>
       <author><first>Nadi</first><last>Tomeh</last><affiliation>Université Sorbonne Paris Nord</affiliation></author>
       <author><first>Thierry</first><last>Charnois</last><affiliation>University of Sorbonne Paris Nord (Paris 13)</affiliation></author>
       <pages>722-734</pages>
@@ -9525,7 +9525,7 @@
       <author><first>Katharina</first><last>Hämmerl</last></author>
       <author><first>Tomasz</first><last>Limisiewicz</last><affiliation>Meta and University of Washington</affiliation></author>
       <author><first>Jindřich</first><last>Libovický</last><affiliation>Charles University Prague</affiliation></author>
-      <author><first>Alexander</first><last>Fraser</last><affiliation>Technical University of Munich</affiliation></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last><affiliation>Technical University of Munich</affiliation></author>
       <pages>756-767</pages>
       <abstract>Previous work has considered token overlap, or even similarity of token distributions, as predictors for multilinguality and cross-lingual knowledge transfer in language models. However, these very literal metrics assign large distances to language pairs with different scripts, which can nevertheless show good cross-linguality. This limits the explanatory strength of token overlap for knowledge transfer between language pairs that use distinct scripts or follow different orthographic conventions. In this paper, we propose subword token alignability as a new way to understand the impact and quality of multilingual tokenisation. In particular, this metric predicts multilinguality much better when scripts are disparate and the overlap of literal tokens is low. We analyse this metric in the context of both encoder and decoder models, look at data size as a potential distractor, and discuss how this insight may be applied to multilingual tokenisation in future work. We recommend our subword token alignability metric for identifying optimal language pairs for cross-lingual transfer, as well as to guide the construction of better multilingual tokenisers in the future. We publish our code and reproducibility details.</abstract>
       <url hash="b9c7f03d">2025.naacl-short.63</url>
@@ -9561,7 +9561,7 @@
       <author><first>Youngwon</first><last>Lee</last><affiliation>Seoul National University</affiliation></author>
       <author><first>Seung-won</first><last>Hwang</last><affiliation>Seoul National University</affiliation></author>
       <author><first>Daniel F</first><last>Campos</last><affiliation>Snowflake</affiliation></author>
-      <author><first>Filip</first><last>Graliński</last><affiliation>Snowflake and Adam Mickiewicz University</affiliation></author>
+      <author id="filip-gralinski"><first>Filip</first><last>Graliński</last><affiliation>Snowflake and Adam Mickiewicz University</affiliation></author>
       <author><first>Zhewei</first><last>Yao</last><affiliation>Snowflake</affiliation></author>
       <author><first>Yuxiong</first><last>He</last><affiliation>Microsoft</affiliation></author>
       <pages>787-796</pages>
@@ -9635,7 +9635,7 @@
       <title>Identifying Power Relations in Conversations using Multi-Agent Social Reasoning</title>
       <author><first>Zhaoqing</first><last>Wu</last><affiliation>Purdue University</affiliation></author>
       <author><first>Dan</first><last>Goldwasser</last><affiliation>Purdue University and Purdue University</affiliation></author>
-      <author><first>Maria Leonor</first><last>Pacheco</last><affiliation>University of Colorado at Boulder</affiliation></author>
+      <author id="maria-leonor-pacheco"><first>Maria Leonor</first><last>Pacheco</last><affiliation>University of Colorado at Boulder</affiliation></author>
       <author><first>Leora</first><last>Morgenstern</last><affiliation>SRI International</affiliation></author>
       <pages>855-865</pages>
       <abstract>Large language models (LLMs) struggle in social science domains, where critical thinking and human-level inference are crucial. In this work, we propose a multi-agent social reasoning framework that leverages the generative and reasoning capabilities of LLMs to generate and evaluate reasons from multiple perspectives grounded in social science theories, and construct a factor graph for inference. Experimental results on understanding power dynamics in conversations show that our method outperforms standard prompting baselines, demonstrating its potential for tackling hard Computational Social Science (CSS) tasks.</abstract>
@@ -9648,7 +9648,7 @@
       <author><first>Aylin Ece</first><last>Gunal</last></author>
       <author><first>Bowen</first><last>Yi</last></author>
       <author><first>John D.</first><last>Piette</last></author>
-      <author><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
       <author><first>Veronica</first><last>Perez-Rosas</last><affiliation>University of Michigan - Ann Arbor</affiliation></author>
       <pages>866-872</pages>
       <abstract>Cultural and language factors significantly influence counseling, but Natural Language Processing research has not yet examined whether the findings of conversational analysis for counseling conducted in English apply to other languages. This paper presents a first step towards this direction. We introduce MIDAS (Motivational Interviewing Dataset in Spanish), a counseling dataset created from public video sources that contains expert annotations for counseling reflections and questions. Using this dataset, we explore language-based differences in counselor behavior in English and Spanish and develop classifiers in monolingual and multilingual settings, demonstrating its applications in counselor behavioral coding tasks.</abstract>
@@ -9706,7 +9706,7 @@
       <title>Giving the Old a Fresh Spin: Quality Estimation-Assisted Constrained Decoding for Automatic Post-Editing</title>
       <author><first>Sourabh</first><last>Deoghare</last></author>
       <author><first>Diptesh</first><last>Kanojia</last><affiliation>University of Surrey</affiliation></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last><affiliation>Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
       <pages>914-925</pages>
       <abstract>Automatic Post-Editing (APE) systems often struggle with over-correction, where unnecessary modifications are made to a translation, diverging from the principle of minimal editing. In this paper, we propose a novel technique to mitigate over-correction by incorporating word-level Quality Estimation (QE) information during the decoding process. This method is architecture-agnostic, making it adaptable to any APE system, regardless of the underlying model or training approach. Our experiments on English-German, English-Hindi, and English-Marathi language pairs show the proposed approach yields significant improvements over their corresponding baseline APE systems, with TER gains of 0.65, 1.86, and 1.44 points, respectively. These results underscore the complementary relationship between QE and APE tasks and highlight the effectiveness of integrating QE information to reduce over-correction in APE systems.</abstract>
       <url hash="ea8c6bc9">2025.naacl-short.77</url>
@@ -9759,8 +9759,8 @@
       <author><first>Xiaoran</first><last>Liu</last></author>
       <author><first>Julia</first><last>Buffolino</last></author>
       <author><first>Christian C.</first><last>Luhmann</last></author>
-      <author><first>Ryan L.</first><last>Boyd</last></author>
-      <author><first>H. Andrew</first><last>Schwartz</last></author>
+      <author id="ryan-boyd"><first>Ryan L.</first><last>Boyd</last></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last></author>
       <pages>966-979</pages>
       <abstract>While NLP models often seek to capture cognitive states via language, the validity of predicted states is determined by comparing them to annotations created without access the cognitive states of the authors. In behavioral sciences, cognitive states are instead measured via experiments. Here, we introduce an experiment-based framework for evaluating language-based cognitive style models against human behavior. We explore the phenomenon of decision making, and its relationship to the linguistic style of an individual talking about a recent decision they made. The participants then follow a classical decision-making experiment that captures their cognitive style, determined by how preferences change during a decision exercise. We find that language features, intended to capture cognitive style, can predict participants’ decision style with moderate-to-high accuracy (AUC 0.8), demonstrating that cognitive style can be partly captured and revealed by discourse patterns.</abstract>
       <url hash="d75966f1">2025.naacl-short.81</url>
@@ -9827,7 +9827,7 @@
       <author><first>Qiang</first><last>Li</last><affiliation>Accenture</affiliation></author>
       <author><first>Mingkun</first><last>Tan</last><affiliation>Universität Bielefeld</affiliation></author>
       <author><first>Xun</first><last>Zhao</last></author>
-      <author id="dan-zhang"><first>Dan</first><last>Zhang</last></author>
+      <author><first>Dan</first><last>Zhang</last></author>
       <author><first>Daoan</first><last>Zhang</last></author>
       <author><first>Shengzhao</first><last>Lei</last><affiliation>EPFL - EPF Lausanne</affiliation></author>
       <author><first>Anderson S.</first><last>Chu</last></author>
@@ -9846,7 +9846,7 @@
       <author><first>Xichou</first><last>Zhu</last></author>
       <author><first>Lei</first><last>Yu</last></author>
       <author><first>Jiafeng</first><last>Guo</last><affiliation>Institute of Computing Technolgy, Chinese Academy of Sciences</affiliation></author>
-      <author><first>Xueqi</first><last>Cheng</last><affiliation>Institute of Computing Technology, Chinese Academy</affiliation></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last><affiliation>Institute of Computing Technology, Chinese Academy</affiliation></author>
       <pages>54-61</pages>
       <abstract>Text2Sql is a task that converts natural language questions into SQL queries. In previous research on LLM fine-tuning, researchers typically input both the entire database schema and the natural language question into the model. This approach has two issues: 1) the model’s context is limited when dealing with a large number of database tables; 2) the question is often related to only a few tables, leading to excessive irrelevant information that distracts the model. To address these issues, we employed pure fine-tuning strategy to reduce redundancy. The model fine-tuned with pure prompts, using prompts that are only 53% of the baseline length, outperforms the baseline (fine-tuned with all tables in the prompt) by 8.2% and 8.6% in Test-suite accuracy (TS) and exact-set-match accuracy (EM), respectively, on the Spider dev set. Under the most refined Spider dev set of prompts, the model achieves TS and EM scores of 73.5% and 75.4%, respectively, approaching state-of-the-art (SOTA) levels. To leverage the capabilities of the model with pure prompts, we applied pure knowledge distillation strategy to transfer its abilities. The distilled student model achieved a 1.9% improvement in TS, while the teacher model’s prompt length was only 23% of that of the student model.</abstract>
       <url hash="41e189e1">2025.naacl-industry.5</url>
@@ -9858,7 +9858,7 @@
       <author><first>Vinay Kumar</first><last>Verma</last><affiliation>Amazon</affiliation></author>
       <author><first>Shreyas Sunil</first><last>Kulkarni</last><affiliation>Amazon</affiliation></author>
       <author><first>Happy</first><last>Mittal</last><affiliation>Amazon</affiliation></author>
-      <author><first>Deepak</first><last>Gupta</last><affiliation>Amazon</affiliation></author>
+      <author id="deepak-gupta"><first>Deepak</first><last>Gupta</last><affiliation>Amazon</affiliation></author>
       <pages>62-69</pages>
       <abstract>Question Answering (QA) and Visual Question Answering (VQA) are well-studied problems in the language and vision domain. One challenging scenario involves multiple sources of information, each of a different modality, where the answer to the question may exist in one or more sources. This scenario contains richer information but is highly complex to handle. In this work, we formulate a novel question-answer generation (QAG) framework in an environment containing multi-source, multimodal information. The answer may belong to any or all sources; therefore, selecting the most prominent answer source or an optimal combination of all sources for a given question is challenging. To address this issue, we propose a question-guided attention mechanism that learns attention across multiple sources and decodes this information for robust and unbiased answer generation. To learn attention within each source, we introduce an explicit alignment between questions and various information sources, which facilitates identifying the most pertinent parts of the source information relative to the question. Scalability in handling diverse questions poses a challenge. We address this by extending our model to a sparse mixture-of-experts (sparse-MoE) framework, enabling it to handle thousands of question types. Experiments on T5 and Flan-T5 using three datasets demonstrate the model’s efficacy, supported by ablation studies.</abstract>
       <url hash="a3d83fb9">2025.naacl-industry.6</url>
@@ -9930,7 +9930,7 @@
       <author><first>Guoqing</first><last>Sun</last><affiliation>China Merchants Bank Credit Card Center</affiliation></author>
       <author><first>Mengchen</first><last>Zhu</last></author>
       <author><first>Haifeng</first><last>Tang</last></author>
-      <author><first>Kenny Q.</first><last>Zhu</last><affiliation>University of Texas at Arlington</affiliation></author>
+      <author id="kenny-zhu"><first>Kenny Q.</first><last>Zhu</last><affiliation>University of Texas at Arlington</affiliation></author>
       <author><first>Mengyue</first><last>Wu</last><affiliation>Shanghai Jiaotong University</affiliation></author>
       <pages>129-137</pages>
       <abstract>Designing effective debt collection systems is crucial for improving operational efficiency and reducing costs in the financial industry. However, the challenges of maintaining script diversity, contextual relevance, and coherence make this task particularly difficult. This paper presents a debt collection system based on real debtor-collector data from a major commercial bank. We construct a script library from real-world debt collection conversations, and propose a two-stage retrieval based response system for contextual relevance. Experimental results show that our system improves script diversity, enhances response relevance, and achieves practical deployment efficiency through knowledge distillation. This work offers a scalable and automated solution, providing valuable insights for advancing debt collection practices in real-world applications.</abstract>
@@ -10096,7 +10096,7 @@
       <title><fixed-case>C</fixed-case>haracter<fixed-case>GPT</fixed-case>: A Persona Reconstruction Framework for Role-Playing Agents</title>
       <author><first>Jeiyoon</first><last>Park</last></author>
       <author><first>Chanjun</first><last>Park</last><affiliation>Korea University</affiliation></author>
-      <author><first>Heuiseok</first><last>Lim</last></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last></author>
       <pages>287-303</pages>
       <abstract>The recent introduction of the Assistants API highlights its potential for large language models (LLMs) in role-playing agents (RPA). However, maintaining consistent character personas remains a significant challenge due to variability in information extraction, which frequently omits critical elements such as backstory or interpersonal relationships. To address this limitation, we introduce CharacterGPT, a framework designed to dynamically reconstruct character personas through Character Persona Training (CPT). This approach incrementally updates personas by extracting traits from chapter-wise novel summaries, reflecting the progression of the narrative. Our framework is evaluated through Big Five personality evaluations and creative tasks, in which characters generate original narratives, demonstrating the efficacy of CharacterGPT in preserving persona consistency. The code and results are available at https://github.com/Jeiyoon/charactergpt</abstract>
       <url hash="0b39de60">2025.naacl-industry.24</url>
@@ -10184,7 +10184,7 @@
       <author><first>Lukas</first><last>Fischer</last><affiliation>University of Zurich</affiliation></author>
       <author><first>Yingqiang</first><last>Gao</last><affiliation>University of Zurich</affiliation></author>
       <author><first>Alexa</first><last>Lintner</last><affiliation>ZHAW - Zürcher Hochschule für Angewandte Wissenschaften</affiliation></author>
-      <author><first>Annette</first><last>Rios</last><affiliation>University of Zurich</affiliation></author>
+      <author id="annette-rios-gonzales"><first>Annette</first><last>Rios</last><affiliation>University of Zurich</affiliation></author>
       <author><first>Sarah</first><last>Ebling</last><affiliation>University of Zurich</affiliation></author>
       <pages>370-379</pages>
       <abstract>Audio description (AD) is a crucial accessibility service provided to blind persons and persons with visual impairment, designed to convey visual information in acoustic form. Despite recent advancements in multilingual machine translation research, the lack of well-crafted and time-synchronized AD data impedes the development of audio description translation (ADT) systems that address the needs of multilingual countries such as Switzerland. Furthermore, most ADT systems rely on text alone, and it is unclear whether incorporating visual information from video clips improves the quality of ADT outputs.In this work, we introduce SwissADT, an **emerging** ADT system for three main Swiss languages and English, designed for future use by our industry partners. By collecting well-crafted AD data augmented with video clips in German, French, Italian, and English, and leveraging the power of Large Language Models (LLMs), we aim to enhance information accessibility for diverse language populations in Switzerland by automatically translating AD scripts to the desired Swiss language. Our extensive experimental ADT results, composed of both automatic and human evaluations of ADT quality, demonstrate the promising capability of SwissADT for the ADT task. We believe that combining human expertise with the generation power of LLMs can further enhance the performance of ADT systems, ultimately benefiting a larger multilingual target population.</abstract>
@@ -10208,7 +10208,7 @@
     <paper id="33">
       <title><fixed-case>M</fixed-case>ono<fixed-case>TOD</fixed-case>ia: Translating Monologue Requests to Task-Oriented Dialogues</title>
       <author><first>Sebastian</first><last>Steindl</last><affiliation>Ostbayerische Technische Hochschule Amberg-Weiden</affiliation></author>
-      <author><first>Ulrich</first><last>Schäfer</last><affiliation>Ostbayerische Technische Hochschule Amberg-Weiden</affiliation></author>
+      <author id="ulrich-schafer"><first>Ulrich</first><last>Schäfer</last><affiliation>Ostbayerische Technische Hochschule Amberg-Weiden</affiliation></author>
       <author><first>Bernd</first><last>Ludwig</last><affiliation>Universität Regensburg</affiliation></author>
       <pages>390-403</pages>
       <abstract>Data scarcity is one of the main problems when it comes to real-world applications of transformer-based models.This is especially evident for task-oriented dialogue (TOD) systems, which require specialized datasets, that are usually not readily available. This can hinder companies from adding TOD systems to their services.This study therefore investigates a novel approach to sourcing annotated dialogues from existing German monologue material.Focusing on a real-world example, we investigate whether these monologues can be transformed into dialogue formats suitable for training TOD systems.We show the approach with the concrete example of a company specializing in travel bookings via e-mail. We fine-tune state-of-the-art Large Language Models for the task of rewriting e-mails as dialogues and annotating them.To ensure the quality and validity of the generated data, we employ crowd workers to evaluate the dialogues across multiple criteria and to provide gold-standard annotations for the test dataset.We further evaluate the usefulness of the dialogues for training TOD systems.Our evaluation shows that the dialogues and annotations are of high quality and can serve as a valuable starting point for training TOD systems.Finally, we make the annotated dataset publicly available to foster future research.</abstract>
@@ -10221,7 +10221,7 @@
       <author><first>Haoan</first><last>Jin</last></author>
       <author><first>Jiacheng</first><last>Shi</last></author>
       <author><first>Hanhui</first><last>Xu</last><affiliation>Fudan University</affiliation></author>
-      <author><first>Kenny Q.</first><last>Zhu</last><affiliation>University of Texas at Arlington</affiliation></author>
+      <author id="kenny-zhu"><first>Kenny Q.</first><last>Zhu</last><affiliation>University of Texas at Arlington</affiliation></author>
       <author><first>Mengyue</first><last>Wu</last><affiliation>Shanghai Jiaotong University</affiliation></author>
       <pages>404-421</pages>
       <abstract>Large language models (LLMs) demonstrate significant potential in advancing medical applications, yet their capabilities in addressing medical ethics challenges remain underexplored. This paper introduces MedEthicEval, a novel benchmark designed to systematically evaluate LLMs in the domain of medical ethics. Our framework encompasses two key components: knowledge, assessing the models’ grasp of medical ethics principles, and application, focusing on their ability to apply these principles across diverse scenarios. To support this benchmark, we consulted with medical ethics researchers and developed three datasets addressing distinct ethical challenges: blatant violations of medical ethics, priority dilemmas with clear inclinations, and equilibrium dilemmas without obvious resolutions. MedEthicEval serves as a critical tool for understanding LLMs’ ethical reasoning in healthcare, paving the way for their responsible and effective use in medical contexts.</abstract>
@@ -10342,7 +10342,7 @@
       <title><fixed-case>T</fixed-case>urbo<fixed-case>F</fixed-case>uzz<fixed-case>LLM</fixed-case>: Turbocharging Mutation-based Fuzzing for Effectively Jailbreaking Large Language Models in Practice</title>
       <author><first>Aman</first><last>Goel</last><affiliation>Amazon</affiliation></author>
       <author><first>Xian</first><last>Wu</last><affiliation>Amazon</affiliation></author>
-      <author><first>Zhe</first><last>Wang</last><affiliation>Amazon</affiliation></author>
+      <author id="daisy-zhe-wang"><first>Zhe</first><last>Wang</last><affiliation>Amazon</affiliation></author>
       <author><first>Dmitriy</first><last>Bespalov</last><affiliation>Amazon</affiliation></author>
       <author><first>Yanjun</first><last>Qi</last><affiliation>Amazon and University of Virginia</affiliation></author>
       <pages>523-534</pages>
@@ -10457,7 +10457,7 @@
       <title>Breaking Down Power Barriers in On-Device Streaming <fixed-case>ASR</fixed-case>: Insights and Solutions</title>
       <author><first>Yang</first><last>Li</last><affiliation>Iowa State University</affiliation></author>
       <author><first>Yuan</first><last>Shangguan</last><affiliation>Current: Google</affiliation></author>
-      <author id="yuhao-wang"><first>Yuhao</first><last>Wang</last><affiliation>Facebook</affiliation></author>
+      <author><first>Yuhao</first><last>Wang</last><affiliation>Facebook</affiliation></author>
       <author><first>Liangzhen</first><last>Lai</last><affiliation>Facebook</affiliation></author>
       <author><first>Ernie</first><last>Chang</last><affiliation>Meta AI</affiliation></author>
       <author><first>Changsheng</first><last>Zhao</last><affiliation>Meta Inc.</affiliation></author>
@@ -10474,7 +10474,7 @@
       <author><first>Swapnil</first><last>Gupta</last></author>
       <author><first>Lucas Pereira</first><last>Carlini</last><affiliation>Amazon</affiliation></author>
       <author><first>Prateek</first><last>Sircar</last></author>
-      <author><first>Deepak</first><last>Gupta</last><affiliation>Amazon</affiliation></author>
+      <author id="deepak-gupta"><first>Deepak</first><last>Gupta</last><affiliation>Amazon</affiliation></author>
       <pages>627-637</pages>
       <abstract>Language localization is the adaptation of written content to different linguistic and cultural contexts. Ability to localize written content is crucial for global businesses to provide consistent and reliable customer experience across diverse markets. Traditional methods have approached localization as an application of machine translation (MT), but localization requires more than linguistic conversion – content needs to align with the target audience’s cultural norms, linguistic nuances, and technical requirements. This difference is prominent for long-form text, where multiple facts are present in a creative choice of language. We propose a novel prompt approach for Large Languages Models (LLMs), called Break-Ideate-Generate (BrIdGe), for language localization. BrIdGe ‘breaks’ the source content into granular facts, ‘ideates’ an action plan for content creation in the target language by organizing the granular facts, and finally executes the plan to ‘generate’ localized content. This approach emulates the cognitive processes humans employ in writing that begin with identifying important points, followed by brainstorming on how to structure and organize the output. We evaluated the BrIdGe methodology from multiple perspectives, including impact of BrIdGe prompt on different LLMs and performance comparisons with traditional MT models and direct translation through LLMs on public benchmark and proprietary e-commerce datasets. Through human and LLM-based automated evaluations across content in multiple languages, we demonstrate effectiveness of BrIdGe in generating fluent localized content while preserving factual consistency between source and target languages.</abstract>
       <url hash="881d09cf">2025.naacl-industry.51</url>
@@ -10580,7 +10580,7 @@
       <author><first>Long</first><last>Vo-Dang</last></author>
       <author><first>Khai-Nguyen</first><last>Nguyen</last></author>
       <author><first>Truong-Son</first><last>Hy</last><affiliation>University of Alabama at Birmingham</affiliation></author>
-      <author><first>Ralf</first><last>Schlüter</last><affiliation>AppTek GmbH and Rheinisch Westfälische Technische Hochschule Aachen</affiliation></author>
+      <author id="ralf-schlueter"><first>Ralf</first><last>Schlüter</last><affiliation>AppTek GmbH and Rheinisch Westfälische Technische Hochschule Aachen</affiliation></author>
       <pages>724-783</pages>
       <abstract>Spoken Named Entity Recognition (NER) aims to extract named entities from speech and categorise them into types like person, location, organization, etc. In this work, we present *VietMed-NER* - the first spoken NER dataset in the medical domain. To our knowledge, our Vietnamese real-world dataset is the largest spoken NER dataset in the world regarding the number of entity types, featuring 18 distinct types. Furthermore, we present baseline results using various state-of-the-art pre-trained models: encoder-only and sequence-to-sequence; and conduct quantitative and qualitative error analysis. We found that pre-trained multilingual models generally outperform monolingual models on reference text and ASR output and encoders outperform sequence-to-sequence models in NER tasks. By translating the transcripts, the dataset can also be utilised for text NER in the medical domain in other languages than Vietnamese. All code, data and models are publicly available.</abstract>
       <url hash="bf7a80d5">2025.naacl-industry.59</url>
@@ -10619,7 +10619,7 @@
       <author><first>Prasanjit</first><last>Rath</last><affiliation>Microsoft</affiliation></author>
       <author><first>Hari</first><last>Shrawgi</last><affiliation>Microsoft</affiliation></author>
       <author><first>Parag</first><last>Agrawal</last><affiliation>Microsoft</affiliation></author>
-      <author><first>Sandipan</first><last>Dandapat</last><affiliation>Microsoft</affiliation></author>
+      <author id="sandipan-dandapat"><first>Sandipan</first><last>Dandapat</last><affiliation>Microsoft</affiliation></author>
       <pages>809-821</pages>
       <abstract>This paper analyzes the safety of Large Language Models (LLMs) in interactions with children below age of 18 years. Despite the transformative applications of LLMs in various aspects of children’s lives, such as education and therapy, there remains a significant gap in understanding and mitigating potential content harms specific to this demographic. The study acknowledges the diverse nature of children, often overlooked by standard safety evaluations, and proposes a comprehensive approach to evaluating LLM safety specifically for children. We list down potential risks that children may encounter when using LLM-powered applications. Additionally, we develop Child User Models that reflect the varied personalities and interests of children, informed by literature in child care and psychology. These user models aim to bridge the existing gap in child safety literature across various fields. We utilize Child User Models to evaluate the safety of six state-of-the-art LLMs. Our observations reveal significant safety gaps in LLMs, particularly in categories harmful to children but not adults.</abstract>
       <url hash="9881a003">2025.naacl-industry.62</url>
@@ -10647,7 +10647,7 @@
       <author><first>Steve</first><last>Siu</last><affiliation>Oracle</affiliation></author>
       <author><first>Don</first><last>Dharmasiri</last><affiliation>Oracle</affiliation></author>
       <author><first>Yuan-Fang</first><last>Li</last><affiliation>Monash University and Oracle</affiliation></author>
-      <author><first>Long</first><last>Duong</last><affiliation>Oracle</affiliation></author>
+      <author id="long-duong"><first>Long</first><last>Duong</last><affiliation>Oracle</affiliation></author>
       <author><first>Damien</first><last>Hilloulin</last><affiliation>Oracle labs</affiliation></author>
       <author><first>Rhicheek</first><last>Patra</last><affiliation>Oracle</affiliation></author>
       <author><first>Sungpack</first><last>Hong</last><affiliation>Oracle</affiliation></author>
@@ -10918,7 +10918,7 @@
       <title><fixed-case>INSIGHTBUDDY</fixed-case>-<fixed-case>AI</fixed-case>: Medication Extraction and Entity Linking using Pre-Trained Language Models and Ensemble Learning</title>
       <author><first>Pablo</first><last>Romero</last></author>
       <author><first>Lifeng</first><last>Han</last></author>
-      <author><first>Goran</first><last>Nenadic</last><affiliation>University of Manchester</affiliation></author>
+      <author id="goran-nenadic"><first>Goran</first><last>Nenadic</last><affiliation>University of Manchester</affiliation></author>
       <pages>18-27</pages>
       <abstract>This paper presents our system, InsightBuddy-AI, designed for extracting medication mentions and their associated attributes, and for linking these entities to established clinical terminology resources, including SNOMED-CT, the British National Formulary (BNF), ICD, and the Dictionary of Medicines and Devices (dm+d).To perform medication extraction, we investigated various ensemble learning approaches, including stacked and voting ensembles (using first, average, and max voting methods) built upon eight pre-trained language models (PLMs). These models include general-domain PLMs—BERT, RoBERTa, and RoBERTa-Large—as well as domain-specific models such as BioBERT, BioClinicalBERT, BioMedRoBERTa, ClinicalBERT, and PubMedBERT.The system targets the extraction of drug-related attributes such as adverse drug effects (ADEs), dosage, duration, form, frequency, reason, route, and strength.Experiments conducted on the n2c2-2018 shared task dataset demonstrate that ensemble learning methods outperformed individually fine-tuned models, with notable improvements of 2.43% in Precision and 1.35% in F1-score.We have also developed cross-platform desktop applications for both entity recognition and entity linking, available for Windows and macOS.The InsightBuddy-AI application is freely accessible for research use at <url>https://github.com/HECTA-UoM/InsightBuddy-AI</url>.</abstract>
       <url hash="fcb97dc0">2025.naacl-srw.2</url>
@@ -10989,7 +10989,7 @@
       <author><first>Saar</first><last>Kuzi</last><affiliation>Amazon</affiliation></author>
       <author><first>Giuseppe</first><last>Castellucci</last><affiliation>Amazon</affiliation></author>
       <author><first>Eugene</first><last>Agichtein</last><affiliation>Emory University</affiliation></author>
-      <author><first>Shervin</first><last>Malmasi</last><affiliation>Amazon</affiliation></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last><affiliation>Amazon</affiliation></author>
       <pages>77-91</pages>
       <abstract>In recommender systems, users often seek the best products through indirect, vague, or under-specified queries such as “best shoes for trail running.” These queries, referred to as implicit superlative queries, pose a challenge for standard retrieval and ranking systems due to their lack of explicit attribute mentions and the need for identifying and reasoning over complex attributes. We investigate how Large Language Models (LLMs) can generate implicit attributes for ranking and reason over them to improve product recommendations for such queries. As a first step, we propose a novel four-point schema, called SUPERB, for annotating the best product candidates for superlative queries, paired with LLM-based product annotations. We then empirically evaluate several existing retrieval and ranking approaches on our newly created dataset, providing insights and discussing how to integrate these findings into real-world e-commerce production systems.</abstract>
       <url hash="9b333aa9">2025.naacl-srw.8</url>
@@ -11025,8 +11025,8 @@
     <paper id="11">
       <title><fixed-case>M</fixed-case>ed-<fixed-case>C</fixed-case>o<fixed-case>DE</fixed-case>: Medical Critique based Disagreement Evaluation Framework</title>
       <author><first>Mohit</first><last>Gupta</last></author>
-      <author><first>Akiko</first><last>Aizawa</last><affiliation>National Institute of Informatics</affiliation></author>
-      <author><first>Rajiv Ratn</first><last>Shah</last><affiliation>Indraprastha Institute of Information Technology, Delhi</affiliation></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last><affiliation>National Institute of Informatics</affiliation></author>
+      <author id="rajiv-shah"><first>Rajiv Ratn</first><last>Shah</last><affiliation>Indraprastha Institute of Information Technology, Delhi</affiliation></author>
       <pages>112-119</pages>
       <abstract>The emergence of large language models (LLMs) has significantly influenced numerous fields, including healthcare, by enhancing the capabilities of automated systems to process and generate human-like text. However, despite their advancements, the reliability and accuracy of LLMs in medical contexts remain critical concerns. Current evaluation methods often lack robustness and fail to provide a comprehensive assessment of LLM performance, leading to potential risks in clinical settings. In this work, we propose Med-CoDE, a specifically designed evaluation framework for medical LLMs to address these challenges. The framework leverages a critique-based approach to quantitatively measure the degree of disagreement between model-generated responses and established medical ground truths. This framework captures both accuracy and reliability in medical settings. The proposed evaluation framework aims to fill the existing gap in LLM assessment by offering a systematic method to evaluate the quality and trustworthiness of medical LLMs. Through extensive experiments and case studies, we illustrate the practicality of our framework in providing a comprehensive and reliable evaluation of medical LLMs.</abstract>
       <url hash="7f70d79d">2025.naacl-srw.11</url>
@@ -11048,7 +11048,7 @@
       <author><first>Ivo</first><last>de Souza Bueno Júnior</last></author>
       <author><first>Haotian</first><last>Ye</last></author>
       <author><first>Axel</first><last>Wisiorek</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>129-141</pages>
       <abstract>This paper presents a federated learning system with differential privacy for hate speech detection, tailored to low-resource languages. By fine-tuning pre-trained language models, ALBERT emerged as the most effective option for balancing performance and privacy. Experiments demonstrated that federated learning with differential privacy performs adequately in low-resource settings, though datasets with fewer than 20 sentences per client struggled due to excessive noise. Balanced datasets and augmenting hateful data with non-hateful examples proved critical for improving model utility. These findings offer a scalable and privacy-conscious framework for integrating hate speech detection into social media platforms and browsers, safeguarding user privacy while addressing online harm.</abstract>
       <url hash="c74cf96b">2025.naacl-srw.13</url>
@@ -11072,7 +11072,7 @@
       <author><first>Shuhei</first><last>Kurita</last><affiliation>National Institute of Informatics and New York University</affiliation></author>
       <author><first>Yusuke</first><last>Oda</last><affiliation>National Institute of Informatics and Nara Institute of Science and Technology</affiliation></author>
       <author><first>Daisuke</first><last>Kawahara</last><affiliation>Waseda University</affiliation></author>
-      <author><first>Naoaki</first><last>Okazaki</last><affiliation>Institute of Science Tokyo</affiliation></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last><affiliation>Institute of Science Tokyo</affiliation></author>
       <pages>162-170</pages>
       <abstract>CLIP is a foundational model that bridges images and text, widely adopted as a key component in numerous vision-language models.However, the lack of large-scale open Japanese image-text pairs poses a significant barrier to the development of Japanese vision-language models.In this study, we constructed a Japanese image-text pair dataset with 1.5 billion examples using machine translation with open-weight LLMs and pre-trained Japanese CLIP models on the dataset.The performance of the pre-trained models was evaluated across seven benchmark datasets, achieving competitive average scores compared to models of similar size without the need for extensive data curation. However, the results also revealed relatively low performance on tasks specific to Japanese culture, highlighting the limitations of translation-based approaches in capturing cultural nuances. Our dataset, models, and code are publicly available.</abstract>
       <url hash="edb0376f">2025.naacl-srw.15</url>
@@ -11121,7 +11121,7 @@
       <author><first>Dhiman</first><last>Goswami</last><affiliation>George Mason University</affiliation></author>
       <author><first>Marcos</first><last>Zampieri</last><affiliation>George Mason University</affiliation></author>
       <author><first>Kai</first><last>North</last></author>
-      <author><first>Shervin</first><last>Malmasi</last><affiliation>Amazon</affiliation></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last><affiliation>Amazon</affiliation></author>
       <author><first>Antonios</first><last>Anastasopoulos</last><affiliation>Athena Research Center and George Mason University</affiliation></author>
       <pages>193-199</pages>
       <abstract>Native Language Identification (NLI) is the task of automatically identifying the native language (L1) of individuals based on their second language (L2) production. The introduction of Large Language Models (LLMs) with billions of parameters has renewed interest in text-based NLI, with new studies exploring LLM-based approaches to NLI on English L2. The capabilities of state-of-the-art LLMs on non-English NLI corpora, however, have not yet been fully evaluated. To fill this important gap, we present the first evaluation of LLMs for multilingual NLI. We evaluated the performance of several LLMs compared to traditional statistical machine learning models and language-specific BERT-based models on NLI corpora in English, Italian, Norwegian, and Portuguese. Our results show that fine-tuned GPT-4 models achieve state-of-the-art NLI performance.</abstract>
@@ -11135,7 +11135,7 @@
       <author><first>Libo</first><last>Ren</last></author>
       <author><first>Nicolo</first><last>Micheletti</last></author>
       <author><first>Lifeng</first><last>Han</last></author>
-      <author><first>Goran</first><last>Nenadic</last><affiliation>University of Manchester</affiliation></author>
+      <author id="goran-nenadic"><first>Goran</first><last>Nenadic</last><affiliation>University of Manchester</affiliation></author>
       <pages>200-206</pages>
       <abstract>The abundance of medical records holds great promise for enhancing healthcare and advancing biomedical research. However, due to <i>privacy</i> constraints, access to such data is typically limited to internal use.Recent studies have attempted to overcome this challenge by generating synthetic data through Causal Language Modelling. Yet, this approach often fails to ensure patient anonymity and offers limited control over output diversity—unless additional computational cost is introduced.In response, we propose a method for generating synthetic free-text medical records based on <i>Masked Language Modelling</i>. Our approach retains key medical details while introducing variability in the generated texts and reducing the risk of patient re-identification. With a relatively lightweight architecture of approximately 120 million parameters, the system ensures low inference costs.Experimental results show that our method produces high-quality synthetic data, achieving a HIPAA-compliant PHI recall of 96% and a re-identification risk of only 3.5%. Furthermore, downstream evaluations reveal that models trained on the synthetic data perform comparably to those trained on real-world data. Our trained models are publicly available on Github as SynDeidMLM (at <url>https://github.com/SamySam0/SynDeidMLM</url>) (meaning <b>syn</b>thetic and <b>de-id</b>entified data generation using <b>MLM</b>).</abstract>
       <url hash="752dee61">2025.naacl-srw.20</url>
@@ -11155,7 +11155,7 @@
     <paper id="22">
       <title>Linear Relational Decoding of Morphology in Language Models</title>
       <author><first>Eric</first><last>Xia</last></author>
-      <author><first>Jugal</first><last>Kalita</last><affiliation>University of Colorado at Colorado Springs</affiliation></author>
+      <author id="jugal-kalita"><first>Jugal</first><last>Kalita</last><affiliation>University of Colorado at Colorado Springs</affiliation></author>
       <pages>225-235</pages>
       <abstract>A two-part affine approximation has been found to be a good approximation for transformer computations over certain subject-object relations. Adapting the Bigger Analogy Test Set, we show that the linear transformation W s , where s is a middle-layer representation of a subject token and W is derived from model derivatives, can accurately reproduce final object states for many relations. This linear technique achieves 90% faithfulness on morphological relations, with similar findings across languages and models. Our results suggest that some conceptual relationships in language models, such as morphology, are readily interpretable from latent space and are sparsely encoded by cross-layer linear transformations.</abstract>
       <url hash="b23a4d01">2025.naacl-srw.22</url>
@@ -11372,8 +11372,8 @@
     <paper id="41">
       <title>Evaluating Text Style Transfer Evaluation: Are There Any Reliable Metrics?</title>
       <author><first>Sourabrata</first><last>Mukherjee</last></author>
-      <author><first>Atul Kr.</first><last>Ojha</last><affiliation>University of Galway, Ireland, Insight SFI Research Centre for Data Analytics, DSI, University of Galway, Ireland and Panlingua Languague Processing LLP, India</affiliation></author>
-      <author><first>John Philip</first><last>McCrae</last><affiliation>National University of Ireland Galway</affiliation></author>
+      <author id="atul-kr-ojha"><first>Atul Kr.</first><last>Ojha</last><affiliation>University of Galway, Ireland, Insight SFI Research Centre for Data Analytics, DSI, University of Galway, Ireland and Panlingua Languague Processing LLP, India</affiliation></author>
+      <author id="john-philip-mccrae"><first>John Philip</first><last>McCrae</last><affiliation>National University of Ireland Galway</affiliation></author>
       <author><first>Ondrej</first><last>Dusek</last><affiliation>Charles University, Prague</affiliation></author>
       <pages>418-434</pages>
       <abstract>Text style transfer (TST) is the task of transforming a text to reflect a particular style while preserving its original content. Evaluating TSToutputs is a multidimensional challenge, requiring the assessment of style transfer accuracy, content preservation, and naturalness. Us-ing human evaluation is ideal but costly, as is common in other natural language processing (NLP) tasks; however, automatic metrics forTST have not received as much attention as metrics for, e.g., machine translation or summarization. In this paper, we examine both set ofexisting and novel metrics from broader NLP tasks for TST evaluation, focusing on two popular subtasks—sentiment transfer and detoxification—in a multilingual context comprising English, Hindi, and Bengali. By conducting meta-evaluation through correlation with hu-man judgments, we demonstrate the effectiveness of these metrics when used individually and in ensembles. Additionally, we investigatethe potential of large language models (LLMs) as tools for TST evaluation. Our findings highlight newly applied advanced NLP metrics andLLM-based evaluations provide better insights than existing TST metrics. Our oracle ensemble approaches show even more potential.</abstract>
@@ -11585,7 +11585,7 @@
     <paper id="4">
       <title>Knowledge Distillation for Language Models</title>
       <author><first>Yuqiao</first><last>Wen</last><affiliation>University of Alberta</affiliation></author>
-      <author><first>Freda</first><last>Shi</last><affiliation>University of Waterloo</affiliation></author>
+      <author id="freda-shi"><first>Freda</first><last>Shi</last><affiliation>University of Waterloo</affiliation></author>
       <author><first>Lili</first><last>Mou</last><affiliation>University of Alberta</affiliation></author>
       <pages>25-29</pages>
       <abstract>Knowledge distillation (KD) aims to transfer the knowledge of a teacher (usually a large model) to a student (usually a small one). In this tutorial, our goal is to provide participants with a comprehensive understanding of the techniques and applications of KD for language models. After introducing the basic concepts including intermediate-layer matching and prediction matching, we will present advanced techniques such as reinforcement learning-based KD and multi-teacher distillation. For applications, we will focus on KD for large language models (LLMs), covering topics ranging from LLM sequence compression to LLM self-distillation. The target audience is expected to know the basics of machine learning and NLP, but do not have to be familiar with the details of math derivation and neural models</abstract>
@@ -11597,7 +11597,7 @@
       <title>Adaptation of Large Language Models</title>
       <author><first>Zixuan</first><last>Ke</last><affiliation>Salesforce AI Research</affiliation></author>
       <author><first>Yifei</first><last>Ming</last><affiliation>Salesforce AI Research</affiliation></author>
-      <author><first>Shafiq</first><last>Joty</last><affiliation>Salesforce AI Research</affiliation></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last><affiliation>Salesforce AI Research</affiliation></author>
       <pages>30-37</pages>
       <abstract>This tutorial on adaptation of Large Language Models (LLMs) is designed to address the growing demand for models that go beyond the static capabilities of generic LLMs by providing an overview of dynamic, domain-specific, and task-adaptive LLM adaptation techniques. While general LLMs have demonstrated strong generalization across a variety of tasks, they often struggle to perform well in specialized domains such as finance, healthcare, and code generation for underrepresented languages. Additionally, their static nature limits their ability to evolve with the changing world, and they are often extremely large in size, making them impractical and costly to deploy at scale. As a result, the adaptation of LLMs has drawn much attention since the birth of LLMs and is of core importance, both for industry, which focuses on serving its targeted users, and academia, which can greatly benefit from small but powerful LLMs</abstract>
       <url hash="8658c3b0">2025.naacl-tutorial.5</url>
@@ -11606,11 +11606,11 @@
     </paper>
     <paper id="6">
       <title>Learning Language through Grounding</title>
-      <author><first>Freda</first><last>Shi</last><affiliation>University of Waterloo</affiliation></author>
+      <author id="freda-shi"><first>Freda</first><last>Shi</last><affiliation>University of Waterloo</affiliation></author>
       <author><first>Ziqiao</first><last>Ma</last><affiliation>University of Michigan</affiliation></author>
       <author><first>Jiayuan</first><last>Mao</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <author><first>Parisa</first><last>Kordjamshidi</last><affiliation>Michigan State University</affiliation></author>
-      <author><first>Joyce</first><last>Chai</last><affiliation>University of Michigan</affiliation></author>
+      <author id="joyce-chai"><first>Joyce</first><last>Chai</last><affiliation>University of Michigan</affiliation></author>
       <pages>38-43</pages>
       <abstract>Grounding has been a long-standing concept in natural language processing (NLP) and computational linguistics (CL). This tutorial provides a historical overview and introduces recent advances in learning language through grounding, with a particular emphasis on the latter. We will begin by tracing the history of grounding and presenting a unified perspective on the term. In Parts II to IV, we will delve into recent progress in learning lexical semantics, syntax, and complex meanings through various forms of grounding. We will conclude by discussing future directions and open challenges, particularly those related to the growing trend of large language models and scaling.</abstract>
       <url hash="6abbf88e">2025.naacl-tutorial.6</url>
@@ -11682,7 +11682,7 @@
       <author><first>Shou-Yi</first><last>Hung</last></author>
       <author><first>Bo-Ting</first><last>Lin</last></author>
       <author><first>En-Shiun Annie</first><last>Lee</last></author>
-      <author><first>Richard Tzong-Han</first><last>Tsai</last><affiliation>National Central University</affiliation></author>
+      <author id="richard-tzong-han-tsai"><first>Richard Tzong-Han</first><last>Tsai</last><affiliation>National Central University</affiliation></author>
       <pages>11-19</pages>
       <abstract>Many endangered languages are at risk of extinction due to barriers in communication and generational gaps that hinder their preservation. A cause for languages becoming endangered is the lack of language educational tools and artificial intelligence (AI) models for these low-resource languages. To address this, we propose the ATAIGI learning app designed with AI-powered models leveraging multimodal generative techniques. Our app offers users a comprehensive learning experience by providing translated phrases and definitions, example sentences, illustrative images, romanized pronunciation, and audio speech to accelerate language learning. ATAIGI is built on five AI models that are rigorously benchmarked individually, with our Transliteration Model achieving state-of-the-art results for Taiwanese Hokkien transliteration. ATAIGI is available for all to learn the endangered language of Taiwanese Hokkien, an endangered language spoken in Taiwan. A human evaluation conducted demonstrates the effectiveness of ATAIGI in improving language proficiency and cultural understanding, supporting its potential for the preservation and education of endangered languages like the Taiwanese Hokkien.</abstract>
       <url hash="55ad7358">2025.naacl-demo.2</url>
@@ -11727,7 +11727,7 @@
       <author><first>Yifan</first><last>Zhu</last></author>
       <author><first>Kenneth</first><last>Lai</last><affiliation>Brandeis University</affiliation></author>
       <author><first>Changsoo</first><last>Jung</last><affiliation>Colorado State University</affiliation></author>
-      <author><first>James</first><last>Pustejovsky</last><affiliation>Brandeis University</affiliation></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last><affiliation>Brandeis University</affiliation></author>
       <author><first>Nikhil</first><last>Krishnaswamy</last><affiliation>Colorado State University</affiliation></author>
       <pages>40-50</pages>
       <abstract>We present TRACE, a novel system for live *common ground* tracking in situated collaborative tasks. With a focus on fast, real-time performance, TRACE tracks the speech, actions, gestures, and visual attention of participants, uses these multimodal inputs to determine the set of task-relevant propositions that have been raised as the dialogue progresses, and tracks the group’s epistemic position and beliefs toward them as the task unfolds. Amid increased interest in AI systems that can mediate collaborations, TRACE represents an important step forward for agents that can engage with multiparty, multimodal discourse.</abstract>
@@ -11841,9 +11841,9 @@
       <author><first>Shikhar</first><last>Bharadwaj</last><affiliation>School of Computer Science, Carnegie Mellon University</affiliation></author>
       <author><first>Yiwen</first><last>Zhao</last></author>
       <author><first>Samuele</first><last>Cornell</last></author>
-      <author><first>Yifan</first><last>Peng</last></author>
+      <author id="yifan-peng-cmu"><first>Yifan</first><last>Peng</last></author>
       <author><first>Xiang</first><last>Yue</last><affiliation>Carnegie Mellon University</affiliation></author>
-      <author><first>Chao-Han Huck</first><last>Yang</last><affiliation>NVIDIA Research</affiliation></author>
+      <author id="chao-han-huck-yang"><first>Chao-Han Huck</first><last>Yang</last><affiliation>NVIDIA Research</affiliation></author>
       <author><first>Graham</first><last>Neubig</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Shinji</first><last>Watanabe</last><affiliation>Carnegie Mellon University</affiliation></author>
       <pages>116-124</pages>
@@ -11903,7 +11903,7 @@
     <paper id="16">
       <title>Semi-automatic Sequential Sentence Classification in the Discourse Analysis Tool Suite</title>
       <author><first>Tim</first><last>Fischer</last><affiliation>University of Hamburg</affiliation></author>
-      <author><first>Chris</first><last>Biemann</last><affiliation>U Hamburg</affiliation></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last><affiliation>U Hamburg</affiliation></author>
       <pages>151-162</pages>
       <abstract>This paper explores an AI-assisted approach to sequential sentence annotation designed to enhance qualitative data analysis (QDA) workflows within the open-source Discourse Analysis Tool Suite (DATS) developed at our university.We introduce a three-phase Annotation Assistant that leverages the capabilities of large language models (LLMs) to assist researchers during annotation.Based on the number of annotations, the assistant employs zero-shot prompting, few-shot prompting, or fine-tuned models to provide the best suggestions.To evaluate this approach, we construct a benchmark with five diverse datasets.We assess the performance of three prominent open-source LLMs — Llama 3.1, Gemma 2, and Mistral NeMo — and a sequence tagging model based on SentenceTransformers.Our findings demonstrate the effectiveness of our approach, with performance improving as the number of annotated examples increases. Consequently, we implemented the Annotation Assistant within DATS and report the implementation details.With this, we hope to contribute to a novel AI-assisted workflow and further democratize access to AI for qualitative data analysis.</abstract>
       <url hash="93a70ce7">2025.naacl-demo.16</url>
@@ -11914,10 +11914,10 @@
       <title><fixed-case>C</fixed-case>ow<fixed-case>P</fixed-case>ilot: A Framework for Autonomous and Human-Agent Collaborative Web Navigation</title>
       <author><first>Faria</first><last>Huq</last></author>
       <author><first>Zora Zhiruo</first><last>Wang</last></author>
-      <author><first>Frank F.</first><last>Xu</last><affiliation>Carnegie Mellon University</affiliation></author>
+      <author id="frank-f-xu"><first>Frank F.</first><last>Xu</last><affiliation>Carnegie Mellon University</affiliation></author>
       <author><first>Tianyue</first><last>Ou</last></author>
       <author><first>Shuyan</first><last>Zhou</last></author>
-      <author><first>Jeffrey P.</first><last>Bigham</last><affiliation>Apple and Carnegie Mellon University</affiliation></author>
+      <author id="jeffrey-p-bigham"><first>Jeffrey P.</first><last>Bigham</last><affiliation>Apple and Carnegie Mellon University</affiliation></author>
       <author><first>Graham</first><last>Neubig</last><affiliation>Carnegie Mellon University</affiliation></author>
       <pages>163-172</pages>
       <abstract>While much work on web agents emphasizes the promise of autonomously performing tasks on behalf of users, in reality, agents often fallshort on complex tasks in real-world contexts and modeling user preference. This presents an opportunity for humans to collaborate with the agent and leverage the agent’s capabilities effectively. We propose CowPilot, a frame- work supporting autonomous as well as human-agent co llaborative w eb navigation, and evaluation across task success and task efficiency. CowPilot reduces the number of steps humans need to perform by allowing agents to propose next steps, while users are able to pause, reject, or take alternative actions. During execution, users can interleave their actions with the agent’s by overriding suggestions or resuming agent control when needed. We conducted case studies on five common websites and found that the human-agent collaborative mode achieves the highest success rate of 95% while requiring humans to perform only 15.2% of the total steps. Even with human interventions during task execution, the agent successfully drives up to half of task success on its own. CowPilot can serve as a useful tool for data collection and agent evaluation across websites, which we believe will enable research in how users and agents can work together. Video demonstrations are available at https://oaishi.github.io/cowpilot.html</abstract>
@@ -11928,7 +11928,7 @@
     <paper id="18">
       <title>e<fixed-case>R</fixed-case>evise+<fixed-case>RF</fixed-case>: A Writing Evaluation System for Assessing Student Essay Revisions and Providing Formative Feedback</title>
       <author><first>Zhexiong</first><last>Liu</last></author>
-      <author><first>Diane</first><last>Litman</last><affiliation>University of Pittsburgh, University of Pittsburgh and University of Pittsburgh</affiliation></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last><affiliation>University of Pittsburgh, University of Pittsburgh and University of Pittsburgh</affiliation></author>
       <author><first>Elaine L</first><last>Wang</last><affiliation>RAND</affiliation></author>
       <author><first>Tianwen</first><last>Li</last></author>
       <author><first>Mason</first><last>Gobat</last></author>
@@ -11970,7 +11970,7 @@
       <author><first>Zihao</first><last>Lin</last></author>
       <author><first>Zichao</first><last>Wang</last><affiliation>Adobe Research</affiliation></author>
       <author><first>Yuanting</first><last>Pan</last><affiliation>Stanford University</affiliation></author>
-      <author><first>Varun</first><last>Manjunatha</last><affiliation>Adobe Systems</affiliation></author>
+      <author id="varun-manjunatha"><first>Varun</first><last>Manjunatha</last><affiliation>Adobe Systems</affiliation></author>
       <author><first>Ryan A.</first><last>Rossi</last><affiliation>Adobe Research</affiliation></author>
       <author><first>Angela</first><last>Lau</last><affiliation>Adobe Systems</affiliation></author>
       <author><first>Lifu</first><last>Huang</last><affiliation>University of California, Davis</affiliation></author>
@@ -11984,7 +11984,7 @@
     <paper id="21">
       <title><fixed-case>ESP</fixed-case>net-<fixed-case>SDS</fixed-case>: Unified Toolkit and Demo for Spoken Dialogue Systems</title>
       <author><first>Siddhant</first><last>Arora</last></author>
-      <author><first>Yifan</first><last>Peng</last></author>
+      <author id="yifan-peng-cmu"><first>Yifan</first><last>Peng</last></author>
       <author><first>Jiatong</first><last>Shi</last></author>
       <author><first>Jinchuan</first><last>Tian</last></author>
       <author><first>William</first><last>Chen</last><affiliation>Carnegie Mellon University</affiliation></author>
@@ -12048,11 +12048,11 @@
       <author><first>Pengfei</first><last>Liu</last></author>
       <author><first>Zhengzhong</first><last>Liu</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <author><first>Hector Xuguang</first><last>Ren</last></author>
-      <author><first>Eduard</first><last>Hovy</last><affiliation>University of Melbourne and Carnegie Mellon University</affiliation></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last><affiliation>University of Melbourne and Carnegie Mellon University</affiliation></author>
       <author><first>Iryna</first><last>Gurevych</last><affiliation>Institute for Computer Science, Artificial Intelligence and Technology, Mohamed bin Zayed University of Artificial Intelligence and Technische Universität Darmstadt</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <author><first>Monojit</first><last>Choudhury</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
-      <author><first>Timothy</first><last>Baldwin</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and The University of Melbourne</affiliation></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence and The University of Melbourne</affiliation></author>
       <pages>268-286</pages>
       <abstract>As large language models (LLMs) continue to evolve, leaderboards play a significant role in steering their development. Existing leaderboards often prioritize model capabilities while overlooking safety concerns, leaving a significant gap in responsible AI development. To address this gap, we introduce Libra-Leaderboard, a comprehensive framework designed to rank LLMs through a balanced evaluation of performance and safety. Combining a dynamic leaderboard with an interactive LLM arena, Libra-Leaderboard encourages the joint optimization of capability and safety. Unlike traditional approaches that average performance and safety metrics, Libra-Leaderboard uses a distance-to-optimal-score method to calculate the overall rankings. This approach incentivizes models to achieve a balance rather than excelling in one dimension at the expense of some other ones. In the first release, Libra-Leaderboard evaluates 26 mainstream LLMs from 14 leading organizations, identifying critical safety challenges even in state-of-the-art models.</abstract>
       <url hash="97837980">2025.naacl-demo.23</url>
@@ -12068,7 +12068,7 @@
       <author><first>Kyuwon</first><last>Kim</last><affiliation>Seoul National University</affiliation></author>
       <author><first>Jin</first><last>Wee</last></author>
       <author><first>Kang</first><last>Miyoung</last></author>
-      <author><first>KyungTae</first><last>Lim</last><affiliation>Korea Advanced Institute of Science &amp; Technology</affiliation></author>
+      <author id="kyungtae-lim"><first>KyungTae</first><last>Lim</last><affiliation>Korea Advanced Institute of Science &amp; Technology</affiliation></author>
       <author><first>Jungyeul</first><last>Park</last><affiliation>The University of British Columbia</affiliation></author>
       <author><first>Chulwoo</first><last>Park</last><affiliation>Anyang University</affiliation></author>
       <pages>287-294</pages>
@@ -12241,7 +12241,7 @@
       <author><first>Vidhisha</first><last>Balachandran</last><affiliation>Microsoft Research</affiliation></author>
       <author><first>Xiaochuang</first><last>Han</last><affiliation>Facebook</affiliation></author>
       <author><first>Shangbin</first><last>Feng</last><affiliation>University of Washington</affiliation></author>
-      <author><first>Lucy Lu</first><last>Wang</last><affiliation>University of Washington and Allen Institute for Artificial Intelligence</affiliation></author>
+      <author id="lucy-lu-wang"><first>Lucy Lu</first><last>Wang</last><affiliation>University of Washington and Allen Institute for Artificial Intelligence</affiliation></author>
       <author><first>Yulia</first><last>Tsvetkov</last><affiliation>Department of Computer Science, University of Washington</affiliation></author>
       <pages>437-448</pages>
       <abstract>With the widespread consumption of AI-generated content, there has been an increased focus on developing automated tools to verify the factual accuracy of such content. However, prior research and tools developed for fact verification treat it as a binary classification or a linear regression problem. Although this is a useful mechanism as part of automatic guardrails in systems, we argue that such tools lack transparency in the prediction reasoning and diversity in source evidence to provide a trustworthy user experience.We develop FACTS&amp;EVIDENCE—an interactive and transparent tool for user-driven verification of complex text. The tool facilitates the intricate decision-making involved in fact-verification, presenting its users a breakdown of complex input texts to visualize the credibility of individual claims along with explanation of model decisions and attribution to multiple, diverse evidence sources. FACTS&amp;EVIDENCE aims to empower consumers of machine-generated text and give them agency to understand, verify, selectively trust and use such text.</abstract>
@@ -12269,7 +12269,7 @@
       <title><fixed-case>L</fixed-case>3<fixed-case>GO</fixed-case>: Language Agents with Chain-of-3<fixed-case>D</fixed-case>-Thoughts for Generating Unconventional Objects</title>
       <author><first>Yutaro</first><last>Yamada</last><affiliation>Sakana AI</affiliation></author>
       <author><first>Khyathi</first><last>Chandu</last><affiliation>Mistral AI</affiliation></author>
-      <author><first>Bill Yuchen</first><last>Lin</last><affiliation>xAI and University of Washington</affiliation></author>
+      <author id="bill-yuchen-lin"><first>Bill Yuchen</first><last>Lin</last><affiliation>xAI and University of Washington</affiliation></author>
       <author><first>Jack</first><last>Hessel</last><affiliation>Samaya AI</affiliation></author>
       <author><first>Ilker</first><last>Yildirim</last><affiliation>Yale University</affiliation></author>
       <author><first>Yejin</first><last>Choi</last><affiliation>Computer Science Department, Stanford University and NVIDIA</affiliation></author>
@@ -12285,7 +12285,7 @@
       <author><first>Koki</first><last>Maeda</last><affiliation>Institute of Science Tokyo</affiliation></author>
       <author><first>Issa</first><last>Sugiura</last><affiliation>Kyoto University</affiliation></author>
       <author><first>Shuhei</first><last>Kurita</last><affiliation>National Institute of Informatics and New York University</affiliation></author>
-      <author><first>Naoaki</first><last>Okazaki</last><affiliation>Institute of Science Tokyo</affiliation></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last><affiliation>Institute of Science Tokyo</affiliation></author>
       <author><first>Daisuke</first><last>Kawahara</last><affiliation>Waseda University</affiliation></author>
       <pages>470-484</pages>
       <abstract>To develop high-performing Visual Language Models (VLMs), it is essential to prepare multimodal resources, such as image-text pairs, interleaved data, and instruction data. While multimodal resources for English are abundant, there is a significant lack of corresponding resources for non-English languages, such as Japanese. To address this problem, we take Japanese as a non-English language and propose Japanese multimodal datasets for rapidly developing a Japanese multimodal model. We collect Japanese image-text pairs and interleaved data from web archives and generate Japanese instruction data using an existing large language model and a VLM. Our experimental results show that a VLM trained on these native datasets outperforms those relying on machine-translated content. The resulting VLM, dataset and code used for training is publicly available.</abstract>
@@ -12319,7 +12319,7 @@
       <title><fixed-case>METAPHORSHARE</fixed-case>: A Dynamic Collaborative Repository of Open Metaphor Datasets</title>
       <author><first>Joanne</first><last>Boisson</last></author>
       <author><first>Arif</first><last>Mehmood</last><affiliation>Cardiff University</affiliation></author>
-      <author><first>Jose</first><last>Camacho-Collados</last><affiliation>Cardiff University</affiliation></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last><affiliation>Cardiff University</affiliation></author>
       <pages>509-521</pages>
       <abstract>The metaphor studies community has developed numerous valuable labelled corpora in various languages over the years. Many of these resources are not only unknown to the NLP community, but are also often not easily shared among the researchers. Both in human sciences and in NLP, researchers could benefit from a centralised database of labelled resources, easily accessible and unified under an identical format. To facilitate this, we present MetaphorShare, a website to integrate metaphor datasets making them open and accessible. With this effort, our aim is to encourage researchers to share and upload more datasets in any language in order to facilitate metaphor studies and the development of future metaphor processing NLP systems. The website has four main functionalities: upload, download, search and label metaphor datasets. It is accessible at www.metaphorshare.com.</abstract>
       <url hash="6e54c41d">2025.naacl-demo.41</url>
diff --git a/data/xml/2025.nakbanlp.xml b/data/xml/2025.nakbanlp.xml
index 58f7165db1..abd40d34f6 100644
--- a/data/xml/2025.nakbanlp.xml
+++ b/data/xml/2025.nakbanlp.xml
@@ -122,7 +122,7 @@
     <paper id="11">
       <title>The Missing Cause: An Analysis of Causal Attributions in Reporting on <fixed-case>P</fixed-case>alestine</title>
       <author><first>Paulina</first><last>Garcia Corral</last></author>
-      <author><first>Hannah</first><last>Bechara</last></author>
+      <author id="hannah-bechara"><first>Hannah</first><last>Bechara</last></author>
       <author><first>Krishnamoorthy</first><last>Manohara</last></author>
       <author><first>Slava</first><last>Jankin</last></author>
       <pages>103–113</pages>
@@ -158,7 +158,7 @@
       <author><first>Sara</first><last>Nabhani</last></author>
       <author><first>Claudia</first><last>Borg</last></author>
       <author><first>Kurt</first><last>Micallef</last></author>
-      <author><first>Khalid</first><last>Al-Khatib</last></author>
+      <author id="khalid-al-khatib"><first>Khalid</first><last>Al-Khatib</last></author>
       <pages>127–149</pages>
       <abstract>Propaganda significantly shapes public opinion, especially in conflict-driven contexts like the Israeli-Palestinian conflict. This study explores the integration of argumentation features, such as claims, premises, and major claims, into machine learning models to enhance the detection of propaganda techniques in Arabic media. By leveraging datasets annotated with fine-grained propaganda techniques and employing crosslingual and multilingual NLP methods, along with GPT-4-based annotations, we demonstrate consistent performance improvements. A qualitative analysis of Arabic media narratives on the Israeli war on Gaza further reveals the model’s capability to identify diverse rhetorical strategies, offering insights into the dynamics of propaganda. These findings emphasize the potential of combining NLP with argumentation features to foster transparency and informed discourse in politically charged settings.</abstract>
       <url hash="296a082b">2025.nakbanlp-1.14</url>
diff --git a/data/xml/2025.neusymbridge.xml b/data/xml/2025.neusymbridge.xml
index 6b59f5d9b5..480b00ea22 100644
--- a/data/xml/2025.neusymbridge.xml
+++ b/data/xml/2025.neusymbridge.xml
@@ -52,7 +52,7 @@
       <author><first>Shuang</first><last>Xue</last></author>
       <author><first>Fang</first><last>Cai</last></author>
       <author><first>Na</first><last>Ye</last></author>
-      <author><first>Guiping</first><last>Zhang</last></author>
+      <author id="guiping-zhang"><first>Guiping</first><last>Zhang</last></author>
       <pages>18–30</pages>
       <abstract>Logical table-to-text generation (LT2T) seeks to produce logically faithful textual descriptions base on tables. Current end-to-end LT2T models, which use descriptions directly as learning objectives, frequently face challenges in maintaining logical faithfulness due to the lack of a reasoning knowledge. Recent research have introduced reasoning knowledge generated by models for LT2T task, but the noise along with it limited its performance. We therefore propose a framework reasoning knowledge filter that leverages the collaboration between large language models and smaller models to filter data points with high-quality reasoning knowledge. This framework aims to provide highly matched table, description and reasoning knowledge triplets for LT2T. The results obtained on LogicNLG database demonstrate that the efficiencies of the method in this paper has achieved optimal performance with a reduced amount of data. Specifically, it enhances SP-Acc by 1.4 points and NLI-Acc by 0.7 points compared to the current state-of-the-art model.</abstract>
       <url hash="8bba27ed">2025.neusymbridge-1.3</url>
@@ -97,7 +97,7 @@
       <author><first>Lianji</first><last>Wang</last></author>
       <author><first>Xiang</first><last>Liu</last></author>
       <author><first>Haifeng</first><last>Chi</last></author>
-      <author><first>Guiping</first><last>Zhang</last></author>
+      <author id="guiping-zhang"><first>Guiping</first><last>Zhang</last></author>
       <pages>61–70</pages>
       <abstract>With the continuous growth of multi-modal data on social media platforms, traditional Named Entity Recognition has rendered insufficient for handling contemporary data formats. Consequently, researchers proposed Multi-modal Named Entity Recognition (MNER). Existing studies focus on capturing the visual regions corresponding to entities to assist in entity recognition. However, these approaches still struggle to mitigate interference from visual regions that are irrelevant to the entities. To address this issue, we propose an innovative framework, Visual Cue Refinement in MNER(VCRMNER) using CLIP Prompts, to accurately capture visual cues (object-level visual regions) associated with entities. We leverage prompts to represent the semantic information of entity categories, which helps us assess visual cues and minimize interference from those irrelevant to the entities. Furthermore, we designed an interaction transformer that operates in two stages—first within each modality and then between modalities—to refine visual cues by learning from a frozen image encoder, thereby reducing differences between text and visual modalities. Comprehensive experiments were conducted on two public datasets, Twitter15 and Twitter17. The results and detailed analyses demonstrate that our method exhibits robust and competitive performance.</abstract>
       <url hash="4b25563b">2025.neusymbridge-1.7</url>
@@ -138,7 +138,7 @@
       <title>Generative <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et: Scalable and Adaptive Frames for Interpretable Knowledge Storage and Retrieval for <fixed-case>LLM</fixed-case>s Powered by <fixed-case>LLM</fixed-case>s</title>
       <author><first>Harish</first><last>Tayyar Madabushi</last></author>
       <author><first>Taylor</first><last>Hudson</last></author>
-      <author><first>Claire</first><last>Bonial</last></author>
+      <author id="claire-bonial"><first>Claire</first><last>Bonial</last></author>
       <pages>107–119</pages>
       <abstract>Frame semantics provides an explanation for how we make use of conceptual frames, which encapsulate background knowledge and associations, to more completely understand the meanings of words within a context. Unfortunately, FrameNet, the only widely available implementation of frame semantics, is limited in both scale and coverage. Therefore, we introduce a novel mechanism for generating task-specific frames using large language models (LLMs), which we call Generative FrameNet. We demonstrate its effectiveness on a task that is highly relevant in the current landscape of LLMs: the interpretable storage and retrieval of factual information. Specifically, Generative Frames enable the extension of Retrieval-Augmented Generation (RAG), providing an interpretable framework for reducing inaccuracies in LLMs. We conduct experiments to demonstrate the effectiveness of this method both in terms of retrieval effectiveness as well as the relevance of the automatically generated frames and frame relations. Expert analysis shows that Generative Frames capture a more suitable level of semantic specificity than the frames from FrameNet. Thus, Generative Frames capture a notion of frame semantics that is closer to Fillmore’s originally intended definition, and offer potential for providing data-driven insights into Frame Semantics theory. Our results also show that this novel mechanism of Frame Semantic-based interpretable retrieval improves RAG for question answering with LLMs—outperforming a GPT-4 based baseline by up to 8 points. We provide open access to our data, including prompts and Generative FrameNet.</abstract>
       <url hash="0c22b836">2025.neusymbridge-1.11</url>
diff --git a/data/xml/2025.nlp4call.xml b/data/xml/2025.nlp4call.xml
index 3a7fe9bcd5..c1219ffa41 100644
--- a/data/xml/2025.nlp4call.xml
+++ b/data/xml/2025.nlp4call.xml
@@ -24,7 +24,7 @@
       <title>The <fixed-case>M</fixed-case>ulti<fixed-case>GEC</fixed-case>-2025 Shared Task on Multilingual Grammatical Error Correction at <fixed-case>NLP</fixed-case>4<fixed-case>CALL</fixed-case></title>
       <author><first>Arianna</first><last>Masciolini</last></author>
       <author><first>Andrew</first><last>Caines</last></author>
-      <author><first>Orphée</first><last>De Clercq</last></author>
+      <author id="orphee-de-clercq"><first>Orphée</first><last>De Clercq</last></author>
       <author><first>Joni</first><last>Kruijsbergen</last></author>
       <author><first>Murathan</first><last>Kurfalı</last></author>
       <author><first>Ricardo</first><last>Muñoz Sánchez</last></author>
@@ -56,7 +56,7 @@
     <paper id="4">
       <title>Interpretable Machine Learning for Societal Language Identification: Modeling <fixed-case>E</fixed-case>nglish and <fixed-case>G</fixed-case>erman Influences on <fixed-case>P</fixed-case>ortuguese Heritage Language</title>
       <author><first>Soroosh</first><last>Akef</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <author><first>Amália</first><last>Mendes</last></author>
       <author><first>Patrick</first><last>Rebuschat</last></author>
       <pages>50–62</pages>
@@ -74,7 +74,7 @@
       <title><fixed-case>PIRLS</fixed-case> Category-specific Question Generation for Reading Comprehension</title>
       <author><first>Yin</first><last>Poon</last></author>
       <author><first>Qiong</first><last>Wang</last></author>
-      <author><first>John S. Y.</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John S. Y.</first><last>Lee</last></author>
       <author><first>Yu Yan</first><last>Lam</last></author>
       <author><first>Samuel Kai Wah</first><last>Chu</last></author>
       <pages>72–80</pages>
diff --git a/data/xml/2025.nlp4dh.xml b/data/xml/2025.nlp4dh.xml
index 309627581c..f40a0b65ac 100644
--- a/data/xml/2025.nlp4dh.xml
+++ b/data/xml/2025.nlp4dh.xml
@@ -50,7 +50,7 @@
       <title>Analyzing Large Language Models’ pastiche ability: a case study on a 20th century <fixed-case>R</fixed-case>omanian author</title>
       <author><first>Anca</first><last>Dinu</last><affiliation>University of Bucharest</affiliation></author>
       <author><first>Andra-Maria</first><last>Florescu</last><affiliation>University of Bucharest</affiliation></author>
-      <author><first>Liviu</first><last>Dinu</last><affiliation>University of Bucharest</affiliation></author>
+      <author id="liviu-p-dinu"><first>Liviu</first><last>Dinu</last><affiliation>University of Bucharest</affiliation></author>
       <pages>20-32</pages>
       <abstract>This study evaluated the ability of several Large Language Models (LLMs) to pastiche the literary style of the Romanian 20th century author Mateiu Caragiale, by continuing one of his novels left unfinished upon his death. We assembled a database of novels consisting of six texts by Mateiu Caragiale, including his unfinished one, six texts by Radu Albala, including a continuation of Mateiu’s novel, and six LLM generated novels that try to pastiche it. We compared the LLM generated texts with the continuation by Radu Albala, using various methods. We automatically evaluated the pastiches by standard metrics such as ROUGE, BLEU, and METEOR. We performed stylometric analysis, clustering, and authorship attribution, and a manual analysis. Both computational and manual analysis of the pastiches indicated that LLMs are able to produce fairly qualitative pastiches, without matching the professional writer performance. The study also showed that ML techniques outperformed the more recent DL ones in both clusterization and authorship attribution tasks, probably because the dataset consists of only a few literary archaic texts in Romanian. In addition, linguistically informed features were shown to be competitive compared to automatically extracted features.</abstract>
       <url hash="1d9a8ec5">2025.nlp4dh-1.3</url>
@@ -102,7 +102,7 @@
       <title>The <fixed-case>AI</fixed-case> Co-Ethnographer: How Far Can Automation Take Qualitative Research?</title>
       <author><first>Fabian</first><last>Retkowski</last><affiliation>Germany</affiliation></author>
       <author><first>Andreas</first><last>Sudmann</last><affiliation>Carnegie Mellon</affiliation></author>
-      <author><first>Alexander</first><last>Waibel</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
+      <author id="alex-waibel"><first>Alexander</first><last>Waibel</last><affiliation>Massachusetts Institute of Technology</affiliation></author>
       <pages>73-90</pages>
       <abstract>Qualitative research often involves labor-intensive processes that are difficult to scale while preserving analytical depth. This paper introduces The AI Co-Ethnographer (AICoE), a novel end-to-end pipeline developed for qualitative research and designed to move beyond the limitations of simply automating code assignments, offering a more integrated approach. AICoE organizes the entire process, encompassing open coding, code consolidation, code application, and even pattern discovery, leading to a comprehensive analysis of qualitative data.</abstract>
       <url hash="acc8dd4b">2025.nlp4dh-1.8</url>
@@ -165,7 +165,7 @@
     <paper id="13">
       <title>Effects of Publicity and Complexity in Reader Polarization</title>
       <author><first>Yuri</first><last>Bizzoni</last><affiliation>Aarhus University</affiliation></author>
-      <author><first>Pascale</first><last>Feldkamp</last><affiliation>Aarhus University</affiliation></author>
+      <author id="pascale-feldkamp"><first>Pascale</first><last>Feldkamp</last><affiliation>Aarhus University</affiliation></author>
       <author><first>Kristoffer</first><last>Nielbo</last><affiliation>Aarhus University</affiliation></author>
       <pages>138-150</pages>
       <abstract>We investigate how Goodreads rating distributions reflect variations in audience reception across literary works. By examining a large-scale dataset of novels, we analyze whether metrics such as the entropy or standard deviation of rating distributions correlate with textual features – including perplexity, nominal ratio, and syntactic complexity. These metrics reveal a disagreement continuum: more complex texts – i.e., more cognitively demanding books, with a more canon-like textual profile – generate polarized reader responses, while mainstream works produce more uniform reactions. We compare evaluation patterns across canonical and non-canonical works, bestsellers, and prize-winners, finding that textual complexity drives rating polarization even when controlling for publicity effects. Our findings demonstrate that linguistically unpredictable texts, particularly those with higher nominal density and dependency distance, generate divergent reader evaluations. This challenges conventional literary success metrics and suggests that the shape of rating distributions offers valuable insights beyond average scores. We hope our approach establishes a productive framework for understanding how literary features influence reception and how disagreement metrics can enhance our understanding of public literary judgment.</abstract>
@@ -243,7 +243,7 @@
       <author><first>Lilly</first><last>Brauner</last><affiliation>Mannheim University</affiliation></author>
       <author><first>Florian</first><last>Ertz</last><affiliation>Mannheim University</affiliation></author>
       <author><first>Ines</first><last>Reinig</last><affiliation>Mannheim University</affiliation></author>
-      <author><first>Simone</first><last>Ponzetto</last><affiliation>Mannheim University</affiliation></author>
+      <author id="simone-paolo-ponzetto"><first>Simone</first><last>Ponzetto</last><affiliation>Mannheim University</affiliation></author>
       <pages>232-250</pages>
       <abstract>Due to their availability and ease of use, dictionary-based measures of moral values are a popular tool for text-based analyses of morality that examine human attitudes and behaviour across populations and cultures. In this paper, we revisit the construct validity of different dictionary-based measures of morality in text that have been proposed in the literature. We discuss conceptual challenges for text-based measures of morality and present an annotation experiment where we create a new dataset with human annotations of moral rhetoric in German political manifestos. We compare the results of our human annotations with different measures of moral values, showing that none of them is able to capture the trends observed by trained human coders. Our findings have far-reaching implications for the application of moral dictionaries in the digital humanities.</abstract>
       <url hash="3908658f">2025.nlp4dh-1.20</url>
@@ -318,7 +318,7 @@
       <author><first>Olga</first><last>Kolesnikova</last><affiliation>Instituto Politécnico Nacional</affiliation></author>
       <author><first>Liliana</first><last>Chanona Hernandez</last><affiliation>Instituto Politécnico Nacional</affiliation></author>
       <author><first>Grigori</first><last>Sidorov</last><affiliation>Instituto Politécnico Nacional</affiliation></author>
-      <author><first>Alexander</first><last>Gelbukh</last><affiliation>Instituto Politécnico Nacional</affiliation></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last><affiliation>Instituto Politécnico Nacional</affiliation></author>
       <pages>305-312</pages>
       <abstract>This study examines sentiment analysis in Tamil-English code-mixed texts using advanced transformer-based architectures. The unique linguistic challenges, including mixed grammar, orthographic variability, and phonetic inconsistencies, are addressed. Data limitations and annotation gaps are discussed, highlighting the need for larger datasets. The performance of models such as XLM-RoBERTa, mT5, IndicBERT, and RemBERT is evaluated, with insights into their optimization for low-resource, code-mixed environments.</abstract>
       <url hash="7493ba85">2025.nlp4dh-1.27</url>
@@ -375,7 +375,7 @@
       <title>It’s about What and How you say it: A Corpus with Stance and Sentiment Annotation for <fixed-case>COVID</fixed-case>-19 Vaccines Posts on <fixed-case>X</fixed-case>/<fixed-case>T</fixed-case>witter by <fixed-case>B</fixed-case>razilian Political Elites</title>
       <author><first>Lorena</first><last>Barberia</last><affiliation>University of São Paulo (USP)</affiliation></author>
       <author><first>Pedro</first><last>Schmalz</last><affiliation>University of São Paulo (USP)</affiliation></author>
-      <author><first>Norton</first><last>Trevisan Roman</last><affiliation>University of São Paulo (USP)</affiliation></author>
+      <author id="norton-trevisan-roman"><first>Norton</first><last>Trevisan Roman</last><affiliation>University of São Paulo (USP)</affiliation></author>
       <author><first>Belinda</first><last>Lombard</last><affiliation>University of Birmingham</affiliation></author>
       <author><first>Tatiane</first><last>Moraes de Sousa</last><affiliation>University of the State of Rio de Janeiro</affiliation></author>
       <pages>365-376</pages>
@@ -401,7 +401,7 @@
     <paper id="34">
       <title>Development of <fixed-case>O</fixed-case>ld <fixed-case>I</fixed-case>rish Lexical Resources, and Two <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies Treebanks for Diplomatically Edited <fixed-case>O</fixed-case>ld <fixed-case>I</fixed-case>rish Text</title>
       <author><first>Adrian</first><last>Doyle</last><affiliation>Insight Centre for Data Analytics</affiliation></author>
-      <author><first>John</first><last>McCrae</last><affiliation>National University of Ireland Galway</affiliation></author>
+      <author id="john-philip-mccrae"><first>John</first><last>McCrae</last><affiliation>National University of Ireland Galway</affiliation></author>
       <pages>393-402</pages>
       <abstract>The quantity and variety of Old Irish text which survives in contemporary manuscripts, those dating from the Old Irish period, is quite small by comparison to what is available for Modern Irish, not to mention better-resourced modern languages. As no native speakers have existed for more than a millennium, no more text will ever be created by native speakers. For these reasons, text surviving in contemporary sources is particularly valuable. Ideally, all such text would be annotated using a single, common standard to ensure compatibility. At present, discrete Old Irish text repositories make use of incompatible annotation styles, few of which are utilised by text resources for other languages. This limits the potential for using text from more than any one resource simultaneously in NLP applications, or as a basis for creating further resources. This paper describes the production of the first Old Irish text resources to be designed specifically to ensure lexical compatibility and interoperability.</abstract>
       <url hash="6828d1a3">2025.nlp4dh-1.34</url>
@@ -562,7 +562,7 @@
     </paper>
     <paper id="48">
       <title>Historical Ink: Exploring Large Language Models for Irony Detection in 19th-Century <fixed-case>S</fixed-case>panish</title>
-      <author><first>Kevin</first><last>Cohen</last><affiliation>Universidad de los Andes</affiliation></author>
+      <author id="k-bretonnel-cohen"><first>Kevin</first><last>Cohen</last><affiliation>Universidad de los Andes</affiliation></author>
       <author><first>Laura</first><last>Manrique-Gómez</last><affiliation>Universidad de los Andes</affiliation></author>
       <author><first>Ruben</first><last>Manrique</last><affiliation>Universidad de Los Andes</affiliation></author>
       <pages>559-569</pages>
diff --git a/data/xml/2025.nlp4ecology.xml b/data/xml/2025.nlp4ecology.xml
index 17f498f93e..3c7e51f32e 100644
--- a/data/xml/2025.nlp4ecology.xml
+++ b/data/xml/2025.nlp4ecology.xml
@@ -47,7 +47,7 @@
       <author><first>Jennifer</first><last>D’Souza</last></author>
       <author><first>Zachary</first><last>Laubach</last></author>
       <author><first>Tarek Al</first><last>Mustafa</last></author>
-      <author><first>Sina</first><last>Zarrieß</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
       <author><first>Robert</first><last>Frühstückl</last></author>
       <author><first>Phyllis</first><last>Illari</last></author>
       <pages>16–23</pages>
@@ -85,7 +85,7 @@
     <paper id="12">
       <title>Entity Linking using <fixed-case>LLM</fixed-case>s for Automated Product Carbon Footprint Estimation</title>
       <author><first>Steffen</first><last>Castle</last></author>
-      <author><first>Julian</first><last>Moreno Schneider</last></author>
+      <author id="julian-moreno-schneider"><first>Julian</first><last>Moreno Schneider</last></author>
       <pages>56–60</pages>
       <abstract>Growing concerns about climate change and sustainability are driving manufacturers to take significant steps toward reducing their carbon footprints. For these manufacturers, a first step towards this goal is to identify the environmental impact of the individual components of their products. We propose a system leveraging large language models (LLMs) to automatically map components from manufacturer Bills of Materials (BOMs) to Life Cycle Assessment (LCA) database entries by using LLMs to expand on available component information. Our approach reduces the need for manual data processing, paving the way for more accessible sustainability practices.</abstract>
       <url hash="2d8c424d">2025.nlp4ecology-1.12</url>
@@ -124,7 +124,7 @@
       <title>Towards Addressing Anthropocentric Bias in Large Language Models</title>
       <author><first>Francesca</first><last>Grasso</last></author>
       <author><first>Stefano</first><last>Locci</last></author>
-      <author><first>Luigi</first><last>Di Caro</last></author>
+      <author id="luigi-di-caro"><first>Luigi</first><last>Di Caro</last></author>
       <pages>84–93</pages>
       <abstract>The widespread use of Large Language Models (LLMs), particularly among non-expert users, has raised ethical concerns about the propagation of harmful biases. While much research has addressed social biases, few works, if any, have examined anthropocentric bias in Natural Language Processing (NLP) technology. Anthropocentric language prioritizes human value, framing non-human animals, living entities, and natural elements solely by their utility to humans; a perspective that contributes to the ecological crisis. In this paper, we evaluate anthropocentric bias in OpenAI’s GPT-4o across various target entities, including sentient beings, non-sentient entities, and natural elements. Using prompts eliciting neutral, anthropocentric, and ecocentric perspectives, we analyze the model’s outputs and introduce a manually curated glossary of 424 anthropocentric terms as a resource for future ecocritical research. Our findings reveal a strong anthropocentric bias in the model’s responses, underscoring the need to address human-centered language use in AI-generated text to promote ecological well-being.</abstract>
       <url hash="1bc92c8f">2025.nlp4ecology-1.18</url>
@@ -133,7 +133,7 @@
     <paper id="20">
       <title>Efficient Scientific Full Text Classification: <fixed-case>The</fixed-case> Case of <fixed-case>EICAT</fixed-case> Impact Assessments</title>
       <author><first>Marc Felix</first><last>Brinner</last></author>
-      <author><first>Sina</first><last>Zarrieß</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
       <pages>94–103</pages>
       <abstract>This study explores strategies for efficiently classifying scientific full texts using both small, BERT-based models and local large language models like Llama-3.1 8B. We focus on developing methods for selecting subsets of input sentences to reduce input size while simultaneously enhancing classification performance. To this end, we compile a novel dataset consisting of full-text scientific papers from the field of invasion biology, specifically addressing the impacts of invasive species. These papers are aligned with publicly available impact assessments created by researchers for the International Union for Conservation of Nature (IUCN). Through extensive experimentation, we demonstrate that various sources like human evidence annotations, LLM-generated annotations or explainability scores can be used to train sentence selection models that improve the performance of both encoder- and decoder-based language models while optimizing efficiency through the reduction in input length, leading to improved results even if compared to models like ModernBERT that are able to handle the complete text as input. Additionally, we find that repeated sampling of shorter inputs proves to be a very effective strategy that, at a slightly increased cost, can further improve classification performance.</abstract>
       <url hash="165cbb25">2025.nlp4ecology-1.20</url>
diff --git a/data/xml/2025.nlp4pi.xml b/data/xml/2025.nlp4pi.xml
index eaf917fba3..36b60bed02 100644
--- a/data/xml/2025.nlp4pi.xml
+++ b/data/xml/2025.nlp4pi.xml
@@ -167,7 +167,7 @@
       <author><first>YuCheng</first><last>Huang</last><affiliation>Boston University</affiliation></author>
       <author><first>Arti</first><last>Ramanathan</last><affiliation>Boston University</affiliation></author>
       <author><first>Margrit</first><last>Betke</last><affiliation>Boston University</affiliation></author>
-      <author><first>Derry</first><last>Wijaya</last><affiliation>Boston University</affiliation></author>
+      <author id="derry-tanti-wijaya"><first>Derry</first><last>Wijaya</last><affiliation>Boston University</affiliation></author>
       <pages>128-143</pages>
       <abstract>TikTok has emerged as a key platform for discussing polarizing topics, including climate change. Despite its growing influence, there is limited research exploring how content features shape emotional alignment between video creators and audience comments, as well as their impact on user engagement. Using a combination of pretrained and fine-tuned textual and visual models, we analyzed 7,110 TikTok videos related to climate change, focusing on content features such as semantic clustering of video transcriptions, visual elements, tonal shifts, and detected emotions. (1) Our findings reveal that positive emotions and videos featuring factual content or vivid environmental visuals exhibit stronger emotional alignment. Furthermore, emotional intensity and tonal coherence in video speech are significant predictors of higher engagement levels, offering new insights into the dynamics of climate change communication on social media. (2) Our preference learning analysis reveals that comment emotions play a dominant role in predicting video shareability, with both positive and negative emotional responses acting as key drivers of content diffusion. We conclude that user engagement—particularly emotional discourse in comments—significantly shapes climate change content shareability.</abstract>
       <url hash="5c291b3f">2025.nlp4pi-1.11</url>
@@ -188,7 +188,7 @@
       <title>Unsupervised Sustainability Report Labeling based on the integration of the <fixed-case>GRI</fixed-case> and <fixed-case>SDG</fixed-case> standards</title>
       <author><first>Seyed Alireza</first><last>Mousavian Anaraki</last><affiliation>Department of Enterprise Engineering University of Rome, Tor Vergata Via del Politecnico 1, 00133, Rome, Italy</affiliation></author>
       <author><first>Danilo</first><last>Croce</last><affiliation>Department of Enterprise Engineering University of Rome, Tor Vergata Via del Politecnico 1, 00133, Rome, Italy</affiliation></author>
-      <author><first>Roberto</first><last>Basili</last><affiliation>Department of Enterprise Engineering University of Rome, Tor Vergata Via del Politecnico 1, 00133, Rome, Italy</affiliation></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last><affiliation>Department of Enterprise Engineering University of Rome, Tor Vergata Via del Politecnico 1, 00133, Rome, Italy</affiliation></author>
       <pages>151-162</pages>
       <abstract>Sustainability reports are key instruments for communicating corporate impact, but their unstructured format and varied content pose challenges for large-scale analysis. This paper presents an unsupervised method to annotate paragraphs from sustainability reports against both the Global Reporting Initiative (GRI) and Sustainable Development Goals (SDG) standards. The approach combines structured metadata from GRI content indexes, official GRI–SDG mappings, and text semantic similarity models to produce weakly supervised annotations at scale. To evaluate the quality of these annotations, we train a multi-label classifier on the automatically labeled data and evaluate it on the trusted OSDG Community Dataset. The results show that our method yields meaningful labels and improves classification performance when combined with human-annotated data. Although preliminary, this work offers a foundation for scalable sustainability analysis and opens future directions toward assessing the credibility and depth of corporate sustainability claims.</abstract>
       <url hash="49847b4b">2025.nlp4pi-1.13</url>
@@ -212,7 +212,7 @@
       <author><first>Jieyu</first><last>Zhao</last><affiliation>University of Southern California</affiliation></author>
       <author><first>Linda X.</first><last>Zou</last><affiliation>University of Maryland</affiliation></author>
       <author><first>Rachel</first><last>Rudinger</last><affiliation>University of Maryland</affiliation></author>
-      <author><first>Hal</first><last>Daumé III</last><affiliation>University of Maryland</affiliation></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last><affiliation>University of Maryland</affiliation></author>
       <pages>175-188</pages>
       <abstract>Multilingual large language models have gained prominence for their proficiency in processing and generating text across languages. Like their monolingual counterparts, multilingual models are likely to pick up on stereotypes and other social biases during training. In this paper, we study a phenomenon we term “stereotype leakage”, which refers to how training a model multilingually may lead to stereotypes expressed in one language showing up in the models’ behavior in another. We propose a measurement framework for stereotype leakage and investigate its effect in English, Russian, Chinese, and Hindi and with GPT-3.5, mT5, and mBERT. Our findings show a noticeable leakage of positive, negative, and nonpolar associations across all languages. We find that GPT-3.5 exhibits the most stereotype leakage of these models, and Hindi is the most susceptible to leakage effects.</abstract>
       <url hash="c4c14090">2025.nlp4pi-1.15</url>
@@ -225,7 +225,7 @@
       <author><first>Ke</first><last>Zhang</last><affiliation>DataMinr, Inc.</affiliation></author>
       <author><first>Hemank</first><last>Lamba</last><affiliation>DataMinr, Inc.</affiliation></author>
       <author><first>Elizabeth M.</first><last>Olson</last><affiliation>DataMinr, Inc.</affiliation></author>
-      <author><first>Joel</first><last>Tetreault</last><affiliation>DataMinr, Inc.</affiliation></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last><affiliation>DataMinr, Inc.</affiliation></author>
       <author><first>Alex</first><last>Jaimes</last><affiliation>DataMinr, Inc.</affiliation></author>
       <pages>189-195</pages>
       <abstract>Publications in the AI for Good space have tended to focus on the research and model development that can support high-impact applications. However, very few AI for Good papers discuss the process of deploying and collaborating with the partner organization, and the resulting real-world impact. In this work, we share details about the close collaboration with a humanitarian-to-humanitarian (H2H) organization and how to not only deploy the AI model in a resource-constrained environment, but also how to maintain it for continuous performance updates, and share key takeaways for practitioners.</abstract>
@@ -238,10 +238,10 @@
       <author><first>Yiwen</first><last>Ding</last><affiliation>University of Michigan</affiliation></author>
       <author><first>Jiarui</first><last>Liu</last><affiliation>CMU</affiliation></author>
       <author><first>Zhiheng</first><last>Lyu</last><affiliation>University of Hong Kong</affiliation></author>
-      <author id="kun-zhang"><first>Kun</first><last>Zhang</last><affiliation>CMU, MBZUAI</affiliation></author>
+      <author><first>Kun</first><last>Zhang</last><affiliation>CMU, MBZUAI</affiliation></author>
       <author><first>Bernhard</first><last>Schölkopf</last><affiliation>Max Planck Institute for Intelligent Systems, Tuebingen, Germany</affiliation></author>
       <author><first>Zhijing</first><last>Jin</last><affiliation>Max Planck Institute for Intelligent Systems, Tuebingen, Germany, University of Toronto, Vector Institute</affiliation></author>
-      <author><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
       <pages>196-214</pages>
       <abstract>While several previous studies have analyzed gender bias in research, we are still missing a comprehensive analysis of gender differences in the AI community, covering diverse topics and different development trends. Using the AI Scholar dataset of 78K researchers in the field of AI, we identify several gender differences: (1) Although female researchers tend to have fewer overall citations than males, this citation difference does not hold for all academic-age groups; (2) There exist large gender homophily in co-authorship on AI papers; (3) Female first-authored papers show distinct linguistic styles, such as longer text, more positive emotion words, and more catchy titles than male first-authored papers. Our analysis provides a window into the current demographic trends in our AI community, and encourages more gender equality and diversity in the future.</abstract>
       <url hash="d8647875">2025.nlp4pi-1.17</url>
@@ -266,7 +266,7 @@
       <title>Multi-Task Learning approach to identify sentences with impact and affected location in a disaster news report</title>
       <author><first>Sumanta</first><last>Banerjee</last><affiliation>National Institute of Technology Silchar, India, Siksha ‘O’ Anusandhan Deemed to be University, Bhubaneswar, India</affiliation></author>
       <author><first>Shyamapada</first><last>Mukherjee</last><affiliation>National Institute of Technology Rourkela, India</affiliation></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last><affiliation>Jadavpur University, Kolkata, India</affiliation></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last><affiliation>Jadavpur University, Kolkata, India</affiliation></author>
       <pages>229-238</pages>
       <abstract>The first priority of action in the Sendai Framework for Disaster Risk Reduction 2015-2030 advocates the understanding of disaster risk by collecting and processing practical information related to disasters. A smart collection may be the compilation of relevant and summarized news articles focused on some key pieces of information such as disaster event type, geographic location(s), and impacts. In this article, a Multi-Task Learning (MTL) based end-to-end model has been developed to perform three related tasks: sentence classification depending on the presence of (1) relevant locations and (2) impact information to generate a summary,and (3) identification of the causes or event types in disaster news. Each of the three tasks is formulated as a multilabel binary classification problem. The results of the proposed MTL model have been compared with three popular transformer models: BERT, RoBERTa, and ALBERT. It is observed that the proposed model showed better performance scores than the other models in most cases.</abstract>
       <url hash="934d9f10">2025.nlp4pi-1.19</url>
@@ -292,7 +292,7 @@
     <paper id="21">
       <title>Participatory Design for Positive Impact: Behind the Scenes of Three <fixed-case>NLP</fixed-case> Projects</title>
       <author><first>Marianne</first><last>Wilson</last><affiliation>Edinburgh Napier University</affiliation></author>
-      <author><first>David M.</first><last>Howcroft</last><affiliation>University of Aberdeen</affiliation></author>
+      <author id="david-m-howcroft"><first>David M.</first><last>Howcroft</last><affiliation>University of Aberdeen</affiliation></author>
       <author><first>Ioannis</first><last>Konstas</last><affiliation>Heriot-Watt University</affiliation></author>
       <author><first>Dimitra</first><last>Gkatzia</last><affiliation>Edinburgh Napier University</affiliation></author>
       <author><first>Gavin</first><last>Abercrombie</last><affiliation>Heriot-Watt University</affiliation></author>
diff --git a/data/xml/2025.nodalida.xml b/data/xml/2025.nodalida.xml
index b1c962c987..d81032e22a 100644
--- a/data/xml/2025.nodalida.xml
+++ b/data/xml/2025.nodalida.xml
@@ -41,7 +41,7 @@
       <title>Applying and Optimising a Multi-Scale Probit Model for Cross-Source Text Complexity Classification and Ranking in <fixed-case>Swedish</fixed-case></title>
       <author><first>Elsa</first><last>Andersson</last></author>
       <author><first>Johan</first><last>Falkenjack</last></author>
-      <author><first>Arne</first><last>Jönsson</last></author>
+      <author id="arne-jonsson"><first>Arne</first><last>Jönsson</last></author>
       <pages>17–27</pages>
       <abstract>We present results from using Probit models to classify and rank texts of varying complexity from multiple sources. We use multiple linguistic sources including Swedish easy-to-read books and investigate data augmentation and feature regularisation as optimisation methods for text complexity assessment. Multi-Scale and Single Scale Probit models are implemented using different ratios of training data, and then compared. Overall, the findings suggest that the Multi-Scale Probit model is an effective method for classifying text complexity and ranking new texts and could be used to improve the performance on small datasets as well as normalize datasets labelled using different scales.</abstract>
       <url hash="b6560132">2025.nodalida-1.3</url>
@@ -72,7 +72,7 @@
     </paper>
     <paper id="6">
       <title>Transfer-Learning <fixed-case>German</fixed-case> Metaphors Inspired by Second Language Acquisition</title>
-      <author><first>Maria</first><last>Berger</last></author>
+      <author id="maria-berger"><first>Maria</first><last>Berger</last></author>
       <pages>48–54</pages>
       <abstract>A major part of figurative meaning prediction is based on English-language training corpora. One strategy to apply techniques to languages other than English lies in applying transfer learning techniques to correct this imbalance. However, in previous studies we learned that the bilingual representations of current transformer models are incapable of encoding the deep semantic knowledge necessary for a transfer learning step, especially for metaphor prediction. Hence, inspired by second language acquisition, we attempt to improve German metaphor prediction in transfer learning by modifying the context windows of our input samples to align with lower readability indices achieving up to 13% higher F1 score.</abstract>
       <url hash="998260fe">2025.nodalida-1.6</url>
@@ -91,7 +91,7 @@
       <title>Investigating the effectiveness of Data Augmentation and Contrastive Learning for Named Entity Recognition</title>
       <author><first>Noel</first><last>Chia</last></author>
       <author><first>Ines</first><last>Rehbein</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <pages>66–79</pages>
       <abstract>Data Augmentation (DA) and Contrastive Learning (CL) are widely used in NLP, but their potential for NER has not yet been investigated in detail. Existing work is mostly limited to zero- and few-shot scenarios where improvements over the baseline are easy to obtain. In this paper, we address this research gap by presenting a systematic evaluation of DA for NER on small, medium-sized and large datasets with coarse and fine-grained labels. We report results for a) DA only, b) DA in combination with supervised contrastive learning, and c) DA with transfer learning. Our results show that DA on its own fails to improve results over the baseline and that supervised CL works better on larger datasets while transfer learning is beneficial if the target dataset is very small. Finally, we investigate how contrastive learning affects the learned representations, based on dimensionality reduction and visualisation techniques, and show that CL mostly helps to separate named entities from non-entities.</abstract>
       <url hash="8008c5c8">2025.nodalida-1.8</url>
@@ -163,7 +163,7 @@
     </paper>
     <paper id="15">
       <title>Modeling Multilayered Complexity in Literary Texts</title>
-      <author><first>Pascale</first><last>Feldkamp</last></author>
+      <author id="pascale-feldkamp"><first>Pascale</first><last>Feldkamp</last></author>
       <author><first>Márton</first><last>Kardos</last></author>
       <author><first>Kristoffer</first><last>Nielbo</last></author>
       <author><first>Yuri</first><last>Bizzoni</last></author>
@@ -218,7 +218,7 @@
       <author><first>Ona de</first><last>Gibert</last></author>
       <author><first>Tommi</first><last>Nieminen</last></author>
       <author><first>Yves</first><last>Scherrer</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>201–208</pages>
       <abstract>In this work, we introduce OpusDistillery, a novel framework to streamline the Knowledge Distillation (KD) process of multilingual NMT models. OpusDistillery’s main features are the integration of openly available teacher models from OPUS-MT and Hugging Face, comprehensive multilingual support and robust GPU utilization tracking. We describe the tool in detail and discuss the individual contributions of its pipeline components, demonstrating its flexibility for different use cases. OpusDistillery is open-source and released under a permissive license, aiming to facilitate further research and development in the field of multilingual KD for any sequence-to-sequence task. Our code is available at https://github.com/Helsinki-NLP/OpusDistillery.</abstract>
       <url hash="c1b0ffc9">2025.nodalida-1.20</url>
@@ -228,8 +228,8 @@
       <title>Mind the Gap: <fixed-case>Diverse</fixed-case> <fixed-case>NMT</fixed-case> Models for Resource-Constrained Environments</title>
       <author><first>Ona de</first><last>Gibert</last></author>
       <author><first>Dayyán</first><last>O’Brien</last></author>
-      <author><first>Dušan</first><last>Variš</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="dusan-varis"><first>Dušan</first><last>Variš</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>209–216</pages>
       <abstract>We present fast Neural Machine Translation models for 17 diverse languages, developed using Sequence-level Knowledge Distillation. Our selected languages span multiple language families and scripts, including low-resource languages. The distilled models achieve comparable performance while being 10x times faster than transformer-base and 35x times faster than transformer-big architectures. Our experiments reveal that teacher model quality and capacity strongly influence the distillation success, as well as the language script. We also explore the effectiveness of multilingual students. We release publicly our code and models in our Github repository: anonymised.</abstract>
       <url hash="5426b449">2025.nodalida-1.21</url>
@@ -239,7 +239,7 @@
       <title>Testing relevant linguistic features in automatic <fixed-case>CEFR</fixed-case> skill level classification for <fixed-case>Icelandic</fixed-case></title>
       <author><first>Isidora</first><last>Glišić</last></author>
       <author><first>Caitlin Laura</first><last>Richter</last></author>
-      <author><first>Anton Karl</first><last>Ingason</last></author>
+      <author id="anton-karl-ingason"><first>Anton Karl</first><last>Ingason</last></author>
       <pages>217–222</pages>
       <abstract>This paper explores the use of various linguistic features to develop models for automatic classification of language proficiency on the CEFR scale for Icelandic, a low-resourced and morphologically complex language. We train two classifiers to assess skill level of learner texts. One is used as a baseline and takes in the original unaltered text written by a learner and uses predominantly surface features to assess the level. The other uses both surface and other morphological and lexical features, as well as context vectors from transformer (IceBERT). It takes in both the original and corrected versions of the text and takes into account errors/deviation of the original texts compared to the corrected versions. Both classifiers show promising results, with baseline models achieving between 62.2-67.1% accuracy and dual-version between 75-80.3%.</abstract>
       <url hash="e246d7da">2025.nodalida-1.22</url>
@@ -311,7 +311,7 @@
       <title>Database of <fixed-case>Latvian</fixed-case> Morphemes and Derivational Models: ideas and expected results</title>
       <author><first>Andra</first><last>Kalnača</last></author>
       <author><first>Tatjana</first><last>Pakalne</last></author>
-      <author><first>Kristīne</first><last>Levāne-Petrova</last></author>
+      <author id="kristine-levane-petrova"><first>Kristīne</first><last>Levāne-Petrova</last></author>
       <pages>279–286</pages>
       <abstract>In this paper, we describe “The Database of Latvian Morphemes and Derivational Models” – a large-scale corpus-based and manually validated database of Latvian derivational morphology currently in development at the University of Latvia. The database contains morpheme-level data – morphemes, incl. morpheme variants (allomorphs), morpheme types, morpheme homonymy/ homography resolu- tion, hierarchical relations between root morphemes, links to word families, and lemma-level data – incl. base form, morphemic segmentation, POS, grammatical features, derivational motivation (incl. compounding), word-family membership. The focus of the database is on providing linguistically accurate comprehensive data as a reliable basis for future work in different fields.</abstract>
       <url hash="f2a5c99b">2025.nodalida-1.29</url>
@@ -319,9 +319,9 @@
     </paper>
     <paper id="30">
       <title>Localizing <fixed-case>AI:</fixed-case> Evaluating Open-Weight Language Models for Languages of <fixed-case>B</fixed-case>altic States</title>
-      <author><first>Jurgita</first><last>Kapočiūtė-Dzikienė</last></author>
+      <author id="jurgita-kapociute-dzikiene"><first>Jurgita</first><last>Kapočiūtė-Dzikienė</last></author>
       <author><first>Toms</first><last>Bergmanis</last></author>
-      <author><first>Mārcis</first><last>Pinnis</last></author>
+      <author id="marcis-pinnis"><first>Mārcis</first><last>Pinnis</last></author>
       <pages>287–295</pages>
       <abstract>Although large language models (LLMs) have transformed our expectations of modern language technologies, concerns over data privacy often restrict the use of commercially available LLMs hosted outside of EU jurisdictions. This limits their application in governmental, defense, and other data-sensitive sectors. In this work, we evaluate the extent to which locally deployable open-weight large language models support lesser-spoken languages such as Lithuanian, Latvian, and Estonian. We examine various size and precision variants of the top-performing multilingual open-weight models, Llama 3, Gemma 2, Phi, and NeMo, on machine translation, multiple-choice question answering, and free-form text generation. The results indicate that while certain models like Gemma 2 perform close to the top commercially available models, many LLMs struggle with these languages. Most surprisingly, however, we find that these models, while showing close to state-of-the-art translation performance, are still prone to lexical hallucinations with errors in at least 1 in 20 words for all open-weight multilingual LLMs.</abstract>
       <url hash="6d398edb">2025.nodalida-1.30</url>
@@ -422,8 +422,8 @@
       <author><first>Mikus</first><last>Grasmanis</last></author>
       <author><first>Agute</first><last>Klints</last></author>
       <author><first>Gunta</first><last>Nešpore-Bērzkalne</last></author>
-      <author><first>Pēteris</first><last>Paikens</last></author>
-      <author><first>Lauma</first><last>Pretkalniņa</last></author>
+      <author id="peteris-paikens"><first>Pēteris</first><last>Paikens</last></author>
+      <author id="lauma-pretkalnina"><first>Lauma</first><last>Pretkalniņa</last></author>
       <author><first>Laura</first><last>Rituma</last></author>
       <author><first>Madara</first><last>Stāde</last></author>
       <author><first>Evelīna</first><last>Tauriņa</last></author>
@@ -460,7 +460,7 @@
       <title>A Comparative Study of <fixed-case>PEFT</fixed-case> Methods for Python Code Generation</title>
       <author><first>Johanna</first><last>Männistö</last></author>
       <author><first>Joseph</first><last>Attieh</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>390–396</pages>
       <abstract>Fine-tuning language models incurs high costs in training, inference and storage. Parameter-efficient fine-tuning (PEFT) methods have emerged as a more cost-effective alternative to full fine-tuning. However, limited work has compared different PEFT approaches for tasks like code generation. In this study, we examine the effect of various PEFT training methods on model performance in the task of Python code generation. We fine-tune four model families, ranging from 124M to 7B parameters, using three PEFT approaches alongside standard full fine-tuning. Our findings reveal that the effectiveness of each PEFT method varies with the model size and the corpus used.</abstract>
       <url hash="aef4555d">2025.nodalida-1.42</url>
@@ -472,7 +472,7 @@
       <author><first>Petter</first><last>Mæhlum</last></author>
       <author><first>Victoria Ovedie Chruickshank</first><last>Langø</last></author>
       <author><first>Erik</first><last>Velldal</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <pages>397–407</pages>
       <abstract>This paper introduces a new suite of question answering datasets for Norwegian; NorOpenBookQA, NorCommonSenseQA, NorTruthfulQA, and NRK-Quiz-QA. The data covers a wide range of skills and knowledge domains, including world knowledge, commonsense reasoning, truthfulness, and knowledge about Norway. Covering both of the written standards of Norwegian – Bokmål and Nynorsk – our datasets comprise over 10k question-answer pairs, created by native speakers. We detail our dataset creation approach and present the results of evaluating 11 language models (LMs) in zero- and few-shot regimes. Most LMs perform better in Bokmål than Nynorsk, struggle most with commonsense reasoning, and are often untruthful in generating answers to questions. All our datasets and annotation materials are publicly available.</abstract>
       <url hash="b5e973bc">2025.nodalida-1.43</url>
@@ -481,7 +481,7 @@
     <paper id="44">
       <title>Incorporating Target Fuzzy Matches into Neural Fuzzy Repair</title>
       <author><first>Tommi</first><last>Nieminen</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <author><first>Sami</first><last>Virpioja</last></author>
       <pages>408–418</pages>
       <abstract>Neural fuzzy repair (NFR) is a simple implementation of retrieval-augmented translation (RAT), based on data augmentation. In NFR, a translation database is searched for translation examples where the source sentence is similar to the sentence being translated, and the target side of the example is concatenated with the source sentences. We experiment with introducing retrieval that is based on target similarity to NFR during training. The results of our experiments confirm that including target similarity matches during training supplements source similarity matches and leads to better translations at translation time.</abstract>
@@ -550,10 +550,10 @@
     </paper>
     <paper id="51">
       <title>Evaluating <fixed-case>LLM</fixed-case>-Generated Explanations of Metaphors – A Culture-Sensitive Study of <fixed-case>Danish</fixed-case></title>
-      <author><first>Bolette S.</first><last>Pedersen</last></author>
+      <author id="bolette-sandford-pedersen"><first>Bolette S.</first><last>Pedersen</last></author>
       <author><first>Nathalie</first><last>Sørensen</last></author>
       <author><first>Sanni</first><last>Nimb</last></author>
-      <author><first>Dorte Haltrup</first><last>Hansen</last></author>
+      <author id="dorte-haltrup-hansen"><first>Dorte Haltrup</first><last>Hansen</last></author>
       <author><first>Sussi</first><last>Olsen</last></author>
       <author><first>Ali</first><last>Al-Laith</last></author>
       <pages>470–479</pages>
@@ -630,7 +630,7 @@
       <author><first>Egil</first><last>Rønningstad</last></author>
       <author><first>Lilja Charlotte</first><last>Storset</last></author>
       <author><first>Petter</first><last>Mæhlum</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <author><first>Erik</first><last>Velldal</last></author>
       <pages>537–543</pages>
       <abstract>Sentiment analysis of patient feedback from the public health domain can aid decision makers in evaluating the provided services. The current paper focuses on free-text comments in patient surveys about general practitioners and psychiatric healthcare, annotated with four sentence-level polarity classes - positive, negative, mixed and neutral - while also attempting to alleviate data scarcity by leveraging general-domain sources in the form of reviews. For several different architectures, we compare in-domain and out-of-domain effects, as well as the effects of training joint multi-domain models.</abstract>
@@ -656,7 +656,7 @@
       <author><first>Stephan</first><last>Oepen</last></author>
       <author><first>Erik</first><last>Velldal</last></author>
       <author><first>Wilfred</first><last>Østgulen</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <author><first>Aslak Sira</first><last>Myhre</last></author>
       <pages>544–560</pages>
       <abstract>The use of copyrighted materials in training language models raises critical legal and ethical questions. This paper presents a framework for and the results of empirically assessing the impact of publisher-controlled copyrighted corpora on the performance of generative large language models (LLMs) for Norwegian. When evaluated on a diverse set of tasks, we found that adding both books and newspapers to the data mixture of LLMs tend to improve their performance, while the addition of fiction works seems to be detrimental. Our experiments could inform the creation of a compensation scheme for authors whose works contribute to AI development.</abstract>
@@ -678,7 +678,7 @@
       <author><first>David</first><last>Samuel</last></author>
       <author><first>Vladislav</first><last>Mikhailov</last></author>
       <author><first>Erik</first><last>Velldal</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <author><first>Lucas Georges Gabriel</first><last>Charpentier</last></author>
       <author><first>Andrey</first><last>Kutuzov</last></author>
       <author><first>Stephan</first><last>Oepen</last></author>
@@ -803,7 +803,7 @@
       <author><first>Samia</first><last>Touileb</last></author>
       <author><first>Vladislav</first><last>Mikhailov</last></author>
       <author><first>Marie Ingeborg</first><last>Kroka</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <author><first>Erik</first><last>Velldal</last></author>
       <pages>729–738</pages>
       <abstract>We introduce a dataset of high-quality human-authored summaries of news articles in Norwegian. The dataset is intended for benchmarking of the abstractive summarisation capabilities of generative language models. Each document in the dataset is provided with three different candidate gold-standard summaries written by native Norwegian speakers and all summaries are provided in both of the written variants of Norwegian – Bokmål and Nynorsk. The paper describes details on the data creation effort as well as an evaluation of existing open LLMs for Norwegian on the dataset. We also provide insights from a manual human evaluation, comparing human-authored to model generated summaries. Our results indicate that the dataset provides a challenging LLM benchmark for Norwegian summarisation capabilities.</abstract>
@@ -826,7 +826,7 @@
       <author><first>Mathias</first><last>Creutz</last></author>
       <author><first>Ivan</first><last>Vulić</last></author>
       <author><first>Anna</first><last>Korhonen</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>755–766</pages>
       <abstract>Recent work has demonstrated that large language models can often generate fluent and linguistically correct text, adhering to given instructions. However, to what extent can they execute complex instructions requiring knowledge of fundamental linguistic concepts and elaborate semantic reasoning? Our study connects an established linguistic theory of paraphrasing with LLM-based practice to analyze which specific types of paraphrases LLMs can accurately produce and where they still struggle. To this end, we investigate a method of analyzing paraphrases generated by LLMs prompted with a comprehensive set of systematic linguistic instructions. We conduct a case study using GPT-4, which has shown strong performance across various language generation tasks, and we believe that other LLMs may face similar challenges in comparable scenarios. We examine GPT-4 from a linguistic perspective to explore its potential contributions to linguistic research regarding paraphrasing, systematically assessing how accurately the model generates paraphrases that adhere to specified transformation rules. Our results suggest that GPT-4 frequently prioritizes simple lexical or syntactic alternations, often disregarding the transformation guidelines if they overly complicate the primary task.</abstract>
       <url hash="a074d96f">2025.nodalida-1.75</url>
@@ -847,7 +847,7 @@
       <author><first>Socrates</first><last>Vakirtzian</last></author>
       <author><first>Vivian</first><last>Stamou</last></author>
       <author><first>Yannis</first><last>Kazos</last></author>
-      <author><first>Stella</first><last>Markantonatou</last></author>
+      <author id="stella-markantonatou"><first>Stella</first><last>Markantonatou</last></author>
       <pages>776–784</pages>
       <abstract>We report on the development of the first treebank and parser for Eastern Cretan in the framework of Universal Dependencies (UD). Eastern Cretan is a living but under-resourced dialect of Modern Greek. We have worked on the transcription of oral material and relied on active annotation and knowledge transfer from GUD, a treebank of Standard Modern Greek. Along with its other phonological and morphosyntactic differences from Standard Modern Greek, Eastern Cretan (and other varieties of Modern Greek) makes heavy use of euphonics and voicing that have not been included in the UD annotation guidelines so far. We have provided annotation guidelines for East Cretan euphonics and voicing and included them in the models. Knowledge transfer from the treebank of Standard Modern Greek to the dialectal models helped to initiate annotation via an active annotation procedure</abstract>
       <url hash="1ea12b4f">2025.nodalida-1.77</url>
@@ -868,7 +868,7 @@
       <author><first>Huiling</first><last>You</last></author>
       <author><first>Samia</first><last>Touileb</last></author>
       <author><first>Erik</first><last>Velldal</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <pages>801–811</pages>
       <abstract>In this work, we approach event extraction from Norwegian news text using a generation-based approach which formulates the task as text-to-structure generation. We present experiments assessing the effect of different modeling configurations and provide an analysis of the model predictions and typical system errors. Finally, we apply our system to a large corpus of raw news texts and analyze the resulting distribution of event structures in a fairly representative snap-shot of the Norwegian news landscape.</abstract>
       <url hash="7d0182e4">2025.nodalida-1.79</url>
diff --git a/data/xml/2025.privatenlp.xml b/data/xml/2025.privatenlp.xml
index 339c5e8c3f..cd73a7afcc 100644
--- a/data/xml/2025.privatenlp.xml
+++ b/data/xml/2025.privatenlp.xml
@@ -85,7 +85,7 @@
       <author><first>Samuel</first><last>Belkadi</last></author>
       <author><first>Lifeng</first><last>Han</last></author>
       <author><first>Warren</first><last>Del-Pinto</last><affiliation>University of Manchester</affiliation></author>
-      <author><first>Goran</first><last>Nenadic</last><affiliation>University of Manchester</affiliation></author>
+      <author id="goran-nenadic"><first>Goran</first><last>Nenadic</last><affiliation>University of Manchester</affiliation></author>
       <pages>60-74</pages>
       <abstract>Due to the sensitive nature of clinical letters, their use in model training, medical research, and education is limited. This work aims to generate diverse, de-identified, and high-quality synthetic clinical letters to enhance privacy protection. This study explores various pre-trained language models (PLMs) for text masking and generation, employing various masking strategies with a focus on Bio_ClinicalBERT. Both qualitative and quantitative methods are used for evaluation, supplemented by a downstream Named Entity Recognition (NER) task. Our results indicate that encoder-only models outperform encoder-decoder models. General-domain and clinical-domain PLMs exhibit comparable performance when clinical information is preserved. Preserving clinical entities and document structure yields better performance than fine-tuning alone. Masking stopwords enhances text quality, whereas masking nouns or verbs has a negative impact. BERTScore proves to be the most reliable quantitative evaluation metric in our task. Contextual information has minimal impact, indicating that synthetic letters can effectively replace original ones in downstream tasks. Unlike previous studies that focus primarily on reconstructing original letters or training a privacy-detection and substitution model, this project provides a framework for generating diverse clinical letters while embedding privacy detection, enabling sensitive dataset expansion and facilitating the use of real-world clinical data. Our codes and trained models will be publicly available at https://github.com/HECTA-UoM/Synthetic4Health.</abstract>
       <url hash="dbcb8116">2025.privatenlp-main.6</url>
diff --git a/data/xml/2025.quasy.xml b/data/xml/2025.quasy.xml
index 16cc91eece..77d2789e35 100644
--- a/data/xml/2025.quasy.xml
+++ b/data/xml/2025.quasy.xml
@@ -141,7 +141,7 @@
     <paper id="15">
       <title>Do Multilingual Transformers Encode <fixed-case>P</fixed-case>aninian Grammatical Relations? A Layer-wise Probing Study</title>
       <author><first>Akshit</first><last>Kumar</last><affiliation>International Institute of Information Technology, Hyderabad, International Institute of Information Technology Hyderabad</affiliation></author>
-      <author><first>Dipti</first><last>Sharma</last><affiliation>IIIT Hyderabad</affiliation></author>
+      <author id="dipti-misra-sharma"><first>Dipti</first><last>Sharma</last><affiliation>IIIT Hyderabad</affiliation></author>
       <author><first>Parameswari</first><last>Krishnamurthy</last><affiliation>International Institute of Information Technology Hyderabad, Dhirubhai Ambani Institute Of Information and Communication Technology</affiliation></author>
       <pages>124-130</pages>
       <abstract>Large multilingual transformers such as XLM-RoBERTa achieve impressive performance on diverse NLP benchmarks, but understanding how they internally encode grammatical information remains challenging. This study investigates the encoding of syntactic and morphological information derived from the Paninian grammatical framework—specifically designed for morphologically rich Indian languages—across model layers. Using diagnostic probing, we analyze the hidden representations of frozen XLM-RoBERTa-base, mBERT, and IndicBERT models across seven Indian languages (Hindi, Kannada, Malayalam, Marathi, Telugu, Urdu, Bengali). Probes are trained to predict Paninian dependency relations (by edge probing) and essential morphosyntactic features (UPOS tags, Vibhakti markers). We find that syntactic structure (dependencies) is primarily encoded in the middle-to-upper-middle layers (layers 6–9), while lexical features peak slightly earlier. Although the general layer-wise trends are shared across models, significant variations in absolute probing performance reflect differences in model capacity, pre-training data, and language-specific characteristics. These findings shed light on how theory-specific grammatical information emerges implicitly within multilingual transformer representations trained largely on unstructured raw text.</abstract>
diff --git a/data/xml/2025.queerinai.xml b/data/xml/2025.queerinai.xml
index c80c054641..514d1d00be 100644
--- a/data/xml/2025.queerinai.xml
+++ b/data/xml/2025.queerinai.xml
@@ -49,7 +49,7 @@
       <author><first>Alexandria</first><last>Leto</last><affiliation>University of Colorado at Boulder</affiliation></author>
       <author><first>Juan</first><last>Vásquez</last></author>
       <author><first>Alexis</first><last>Palmer</last><affiliation>University of Colorado at Boulder</affiliation></author>
-      <author><first>Maria Leonor</first><last>Pacheco</last><affiliation>University of Colorado at Boulder</affiliation></author>
+      <author id="maria-leonor-pacheco"><first>Maria Leonor</first><last>Pacheco</last><affiliation>University of Colorado at Boulder</affiliation></author>
       <pages>17-25</pages>
       <abstract>Given the widespread use of LLM-powered conversational agents such as ChatGPT, analyzing the ways people interact with them could provide valuable insights into human behavior. Prior work has shown that these agents are sometimes used in sexual contexts, such as to obtain advice, to role-play as sexual companions, or to generate erotica. While LGBTQ+ acceptance has increased in recent years, dehumanizing practices against minorities continue to prevail. In this paper, we hone in on this and perform an analysis of dehumanizing tendencies toward LGBTQ+ individuals by human users in their sexual interactions with ChatGPT. Through a series of experiments that model various concept vectors associated with distinct shades of dehumanization, we find evidence of the reproduction of harmful stereotypes. However, many user prompts lack indications of dehumanization, suggesting that the use of these agents is a complex and nuanced issue which warrants further investigation.</abstract>
       <url hash="83de7042">2025.queerinai-main.3</url>
diff --git a/data/xml/2025.realm.xml b/data/xml/2025.realm.xml
index af2305651b..6a6f8494d5 100644
--- a/data/xml/2025.realm.xml
+++ b/data/xml/2025.realm.xml
@@ -64,7 +64,7 @@
     <paper id="4">
       <title>A Multi-<fixed-case>AI</fixed-case> Agent System for Autonomous Optimization of Agentic <fixed-case>AI</fixed-case> Solutions via Iterative Refinement and <fixed-case>LLM</fixed-case>-Driven Feedback Loops</title>
       <author><first>Kamer Ali</first><last>Yuksel</last><affiliation>aiXplain, inc.</affiliation></author>
-      <author><first>Thiago</first><last>Castro Ferreira</last><affiliation>Universidade Federal de Minas Gerais</affiliation></author>
+      <author id="thiago-castro-ferreira"><first>Thiago</first><last>Castro Ferreira</last><affiliation>Universidade Federal de Minas Gerais</affiliation></author>
       <author><first>Mohamed</first><last>Al-Badrashiny</last><affiliation>aiXplain</affiliation></author>
       <author><first>Hassan</first><last>Sawaf</last><affiliation>aiXplain</affiliation></author>
       <pages>52-62</pages>
@@ -99,7 +99,7 @@
     </paper>
     <paper id="7">
       <title>Hidden Forms: A Dataset to Fill Masked Interfaces from Language Commands</title>
-      <author><first>Anirudh</first><last>Sundar</last></author>
+      <author id="anirudh-sundar"><first>Anirudh</first><last>Sundar</last></author>
       <author><first>Christopher Gordon</first><last>Richardson</last><affiliation>Georgia Institute of Technology</affiliation></author>
       <author><first>William</first><last>Gay</last></author>
       <author><first>Benjamin</first><last>Reichman</last></author>
@@ -368,7 +368,7 @@
       <title>From Knowledge to Noise: <fixed-case>CTIM</fixed-case>-Rover and the Pitfalls of Episodic Memory in Software Engineering Agents</title>
       <author><first>Tobias</first><last>Lindenbauer</last><affiliation>Jetbrains</affiliation></author>
       <author><first>Georg</first><last>Groh</last><affiliation>Technical University Munich</affiliation></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <pages>411-427</pages>
       <abstract>We introduce CTIM-Rover, an AI agent for Software Engineering (SE) built on top of AutoCodeRover (Zhang et al., 2024) that extends agentic reasoning frameworks with an episodic memory, more specifically, a general and repository-level Cross-Task-Instance Memory (CTIM). While existing open-source SE agents mostly rely on ReAct (Yao et al., 2023b), Reflexion (Shinn et al., 2023), or Code-Act (Wang et al., 2024), all of these reasoning and planning frameworks inefficiently discard their long-term memory after a single task instance. As repository-level understanding is pivotal for identifying all locations requiring a patch for fixing a bug, we hypothesize that SE is particularly well positioned to benefit from CTIM. For this, we build on the Experiential Learning (EL) approach ExpeL (Zhao et al., 2024), proposing a Mixture-Of-Experts (MoEs) inspired approach to create both a general-purpose and repository-level CTIM . We find that CTIM-Rover does not outperform AutoCodeRover in any configuration and thus conclude that neither ExpeL nor DoT-Bank (Lingam et al., 2024) scale to real-world SE problems. Our analysis indicates noise introduced by distracting CTIM items or exemplar trajectories as the likely source of the performance degradation.</abstract>
       <url hash="feebfbc7">2025.realm-1.30</url>
diff --git a/data/xml/2025.regnlp.xml b/data/xml/2025.regnlp.xml
index 51edce9a65..3206e6629b 100644
--- a/data/xml/2025.regnlp.xml
+++ b/data/xml/2025.regnlp.xml
@@ -4,9 +4,9 @@
     <meta>
       <booktitle>Proceedings of the 1st Regulatory NLP Workshop (RegNLP 2025)</booktitle>
       <editor><first>Tuba</first><last>Gokhan</last></editor>
-      <editor id="kexin-wang"><first>Kexin</first><last>Wang</last></editor>
+      <editor><first>Kexin</first><last>Wang</last></editor>
       <editor><first>Iryna</first><last>Gurevych</last></editor>
-      <editor><first>Ted</first><last>Briscoe</last></editor>
+      <editor id="ted-briscoe"><first>Ted</first><last>Briscoe</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Abu Dhabi, UAE</address>
       <month>January</month>
@@ -22,7 +22,7 @@
     <paper id="1">
       <title>Shared Task <fixed-case>RIRAG</fixed-case>-2025: Regulatory Information Retrieval and Answer Generation</title>
       <author><first>Tuba</first><last>Gokhan</last></author>
-      <author id="kexin-wang"><first>Kexin</first><last>Wang</last></author>
+      <author><first>Kexin</first><last>Wang</last></author>
       <author><first>Iryna</first><last>Gurevych</last></author>
       <author><first>Ted</first><last>Briscoe</last></author>
       <pages>1–4</pages>
@@ -46,7 +46,7 @@
       <author><first>Ehsan</first><last>Lotfi</last></author>
       <author><first>Nikolay</first><last>Banar</last></author>
       <author><first>Nerses</first><last>Yuzbashyan</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>10–21</pages>
       <abstract>Statutory article retrieval plays a crucial role in making legal information more accessible to both laypeople and legal professionals. Multilingual countries like Belgium present unique challenges for retrieval models due to the need for handling legal issues in multiple languages. Building on the Belgian Statutory Article Retrieval Dataset (BSARD) in French, we introduce the bilingual version of this dataset, bBSARD. The dataset contains parallel Belgian statutory articles in both French and Dutch, along with legal questions from BSARD and their Dutch translation. Using bBSARD, we conduct extensive benchmarking of retrieval models available for Dutch and French. Our benchmarking setup includes lexical models, zero-shot dense models, and fine-tuned small foundation models. Our experiments show that BM25 remains a competitive baseline compared to many zero-shot dense models in both languages. We also observe that while proprietary models outperform open alternatives in the zero-shot setting, they can be matched or surpassed by fine-tuning small language-specific models. Our dataset and evaluation code are publicly available.</abstract>
       <url hash="c45f547c">2025.regnlp-1.3</url>
@@ -109,7 +109,7 @@
     <paper id="9">
       <title>Structured Tender Entities Extraction from Complex Tables with Few-short Learning</title>
       <author><first>Asim</first><last>Abbas</last></author>
-      <author><first>Mark</first><last>Lee</last></author>
+      <author id="mark-lee"><first>Mark</first><last>Lee</last></author>
       <author><first>Niloofer</first><last>Shanavas</last></author>
       <author><first>Venelin</first><last>Kovatchev</last></author>
       <author><first>Mubashir</first><last>Ali</last></author>
diff --git a/data/xml/2025.repl4nlp.xml b/data/xml/2025.repl4nlp.xml
index 7629de0112..1983738e9c 100644
--- a/data/xml/2025.repl4nlp.xml
+++ b/data/xml/2025.repl4nlp.xml
@@ -7,7 +7,7 @@
       <editor><first>Alexandra</first><last>Chronopoulou</last></editor>
       <editor><first>Xiang Lorraine</first><last>Li</last></editor>
       <editor><first>Bodhisattwa Prasad</first><last>Majumder</last></editor>
-      <editor><first>Freda</first><last>Shi</last></editor>
+      <editor id="freda-shi"><first>Freda</first><last>Shi</last></editor>
       <editor><first>Giorgos</first><last>Vernikos</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Albuquerque, NM</address>
@@ -170,7 +170,7 @@
       <title>Large Language Models Are Overparameterized Text Encoders</title>
       <author><first>Thennal D</first><last>K</last></author>
       <author><first>Tim</first><last>Fischer</last><affiliation>University of Hamburg</affiliation></author>
-      <author><first>Chris</first><last>Biemann</last><affiliation>U Hamburg</affiliation></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last><affiliation>U Hamburg</affiliation></author>
       <pages>170-184</pages>
       <abstract>Large language models (LLMs) demonstrate strong performance as text embedding models when finetuned with supervised contrastive training. However, their large size balloons inference time and memory requirements. In this paper, we show that by pruning the last % layers of an LLM before supervised training for only 1000 steps, we can achieve a proportional reduction in memory and inference time. We evaluate four different state-of-the-art LLMs on text embedding tasks and find that our method can prune up to 30% of layers with negligible impact on performance and up to 80% with only a modest drop. With only three lines of code, our method is easily implemented in any pipeline for transforming LLMs to text encoders. We also propose L3Prune, a novel layer-pruning strategy based on the model’s initial loss that provides two optimal pruning configurations: a large variant with negligible performance loss and a small variant for resource-constrained settings. On average, the large variant prunes 21% of the parameters with a performance drop, and the small variant only suffers from a decrease while pruning 74% of the model. We consider these results strong evidence that LLMs are overparameterized for text embedding tasks, and can be easily pruned.</abstract>
       <url hash="8503f93d">2025.repl4nlp-1.13</url>
diff --git a/data/xml/2025.resourceful.xml b/data/xml/2025.resourceful.xml
index f4d88df932..0852128403 100644
--- a/data/xml/2025.resourceful.xml
+++ b/data/xml/2025.resourceful.xml
@@ -132,7 +132,7 @@
     </paper>
     <paper id="22">
       <title>First Steps in Benchmarking <fixed-case>L</fixed-case>atvian in Large Language Models</title>
-      <author><first>Inguna</first><last>Skadina</last></author>
+      <author id="inguna-skadina"><first>Inguna</first><last>Skadina</last></author>
       <author><first>Bruno</first><last>Bakanovs</last></author>
       <author><first>Roberts</first><last>Darģis</last></author>
       <pages>86–95</pages>
diff --git a/data/xml/2025.sdp.xml b/data/xml/2025.sdp.xml
index 45881e459b..a8c638c3ed 100644
--- a/data/xml/2025.sdp.xml
+++ b/data/xml/2025.sdp.xml
@@ -8,7 +8,7 @@
       <editor><first>Amanpreet</first><last>Singh</last></editor>
       <editor><first>Aakanksha</first><last>Naik</last></editor>
       <editor><first>Georg</first><last>Rehm</last></editor>
-      <editor><first>Dayne</first><last>Freitag</last></editor>
+      <editor id="dayne-freitag"><first>Dayne</first><last>Freitag</last></editor>
       <editor><first>Dan</first><last>Li</last></editor>
       <editor><first>Sonja</first><last>Schimmler</last></editor>
       <editor><first>Anita</first><last>De Waard</last></editor>
@@ -260,7 +260,7 @@
       <title>Visual Question Answering on Scientific Charts Using Fine-Tuned Vision-Language Models</title>
       <author><first>Florian</first><last>Schleid</last></author>
       <author><first>Jan</first><last>Strich</last></author>
-      <author><first>Chris</first><last>Biemann</last><affiliation>U Hamburg</affiliation></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last><affiliation>U Hamburg</affiliation></author>
       <pages>211-220</pages>
       <abstract>Scientific charts often encapsulate the core findings of research papers, making the ability to answer questions about these charts highly valuable. This paper explores recent advancements in scientific chart visual question answering (VQA) enabled by large Vision Language Models (VLMs) and newly curated datasets. As part of the SciVQA shared task from the 5th Workshop on Scholarly Document Processing, we develop and evaluate multimodal Systems capable of answering diverse question types - including multiple-choice, yes/no, unanswerable, and infinite answer set questions - based on chart images extracted from scientific literature. We investigate the effects of zero-shot and one-shot prompting, as well as supervised fine-tuning (SFT), on the performance of Qwen2.5-VL models (7B and 32B variants). We also tried to include more training data from domain-specific datasets (SpiQA and ArXivQA). Our fine-tuned Qwen2.5-VL 32B model achieves a substantial improvement over the GPT-4o-mini baseline and reaches the 4th place in the shared task, highlighting the effectiveness of domain-specific fine-tuning. We published the code for the experiments.</abstract>
       <url hash="a934eff0">2025.sdp-1.19</url>
@@ -353,7 +353,7 @@
     <paper id="27">
       <title><fixed-case>A</fixed-case>lex<fixed-case>UNLP</fixed-case>-<fixed-case>FMT</fixed-case> at <fixed-case>C</fixed-case>limate<fixed-case>C</fixed-case>heck Shared Task: Hybrid Retrieval with Adaptive Similarity Graph-based Reranking for Climate-related Social Media Claims Fact Checking</title>
       <author><first>Mahmoud</first><last>Fathallah</last></author>
-      <author><first>Nagwa</first><last>El-Makky</last></author>
+      <author id="nagwa-m-el-makky"><first>Nagwa</first><last>El-Makky</last></author>
       <author><first>Marwan</first><last>Torki</last><affiliation>Alexandria University</affiliation></author>
       <pages>288-292</pages>
       <abstract>In this paper, we describe our work done in the ClimateCheck shared task at the Scholarly document processing (SDP) workshop, ACL 2025. We focused on subtask 1: Abstracts Retrieval. The task involved retrieving relevant paper abstracts from a large corpus to verify claims made on social media about climate change. We explored various retrieval and ranking techniques, including fine-tuning transformer-based dense retrievers, sparse retrieval methods, and reranking using cross-encoder models. Our final and best-performing system utilizes a hybrid retrieval approach combining BM25 sparse retrieval and a fine-tuned Stella model for dense retrieval, followed by an MSMARCO trained minilm cross-encoder model for ranking. We adapt an iterative graph-based re-ranking approach leveraging a document similarity graph built for the document corpus to dynamically update candidate pool for reranking. This system achieved a score of 0.415 on the final test set for subtask 1, securing 3rd place in the final leader board.</abstract>
diff --git a/data/xml/2025.sealp.xml b/data/xml/2025.sealp.xml
index d25baaf6e5..1a3354d0da 100644
--- a/data/xml/2025.sealp.xml
+++ b/data/xml/2025.sealp.xml
@@ -3,10 +3,10 @@
   <volume id="1" ingest-date="2025-01-25" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Second Workshop in South East Asian Language Processing</booktitle>
-      <editor><first>Derry</first><last>Wijaya</last></editor>
+      <editor id="derry-tanti-wijaya"><first>Derry</first><last>Wijaya</last></editor>
       <editor><first>Alham Fikri</first><last>Aji</last></editor>
       <editor><first>Clara</first><last>Vania</last></editor>
-      <editor><first>Genta Indra</first><last>Winata</last></editor>
+      <editor id="genta-indra-winata"><first>Genta Indra</first><last>Winata</last></editor>
       <editor><first>Ayu</first><last>Purwarianti</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Online</address>
diff --git a/data/xml/2025.semeval.xml b/data/xml/2025.semeval.xml
index 2721f208be..ba662fa209 100644
--- a/data/xml/2025.semeval.xml
+++ b/data/xml/2025.semeval.xml
@@ -233,8 +233,8 @@
     <paper id="21">
       <title><fixed-case>G</fixed-case>ate<fixed-case>NLP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2025 Task 10: Hierarchical Three-Step Prompting for Multilingual Narrative Classification</title>
       <author><first>Iknoor</first><last>Singh</last><affiliation>University of Sheffield, United Kingdom</affiliation></author>
-      <author><first>Carolina</first><last>Scarton</last><affiliation>University of Sheffield</affiliation></author>
-      <author><first>Kalina</first><last>Bontcheva</last><affiliation>University of Sheffield</affiliation></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last><affiliation>University of Sheffield</affiliation></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last><affiliation>University of Sheffield</affiliation></author>
       <pages>148-154</pages>
       <abstract>The proliferation of online news and the increasing spread of misinformation necessitate robust methods for automated narrative classification. This paper presents our approach to SemEval 2025 Task 10 Subtask 2, which aims to classify news articles into a predefined two-level taxonomy of main narratives and sub-narratives across multiple languages. We propose Hierarchical Three-Step Prompting (H3Prompt) for multilingual narrative classification. Our methodology follows a three-step prompting strategy, where the model first categorises an article into one of two domains (Ukraine-Russia War or Climate Change), then identifies the most relevant main narratives, and finally assigns sub-narratives. Our approach secured the top position on the English test set among 28 competing teams worldwide. This result highlights the effectiveness of our method in improving narrative classification performance over the baselines.</abstract>
       <url hash="cdb15f43">2025.semeval-1.21</url>
@@ -260,7 +260,7 @@
     <paper id="24">
       <title><fixed-case>UNEDT</fixed-case>eam at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2025 Task 10: Zero-Shot Narrative Classification</title>
       <author><first>Jesus M.</first><last>Fraile - Hernandez</last><affiliation>NLP &amp; IR Group, UNED</affiliation></author>
-      <author><first>Anselmo</first><last>Peñas</last><affiliation>NLP &amp; IR Group, UNED</affiliation></author>
+      <author id="anselmo-penas"><first>Anselmo</first><last>Peñas</last><affiliation>NLP &amp; IR Group, UNED</affiliation></author>
       <pages>165-173</pages>
       <abstract>In this paper we present our participation in Subtask 2 of SemEval-2025 Task 10, focusing on the identification and classification of narratives in news of multiple languages, on climate change and the Ukraine-Russia war. To address this task, we employed a Zero-Shot approach using a generative Large Language Model without prior training on the dataset. Our classification strategy is based on two steps: first, the system classifies the topic of each news item; subsequently, it identifies the sub-narratives directly at the finer granularity. We present a detailed analysis of the performance of our system compared to the best ranked systems on the leaderboard, highlighting the strengths and limitations of our approach.</abstract>
       <url hash="fb88f5dc">2025.semeval-1.24</url>
@@ -419,7 +419,7 @@
       <author><first>Thanet</first><last>Markchom</last><affiliation>Department of Computer Science, University of Reading</affiliation></author>
       <author><first>Jianfei</first><last>Xu</last><affiliation>School of Computing, Newcastle University, Newcastle upon Tyne, UK</affiliation></author>
       <author><first>Tong</first><last>Wu</last><affiliation>Formerly at School of Computing, Newcastle University, Newcastle upon Tyne, UK</affiliation></author>
-      <author><first>Huizhi</first><last>Liang</last><affiliation>School of Computing, Newcastle University, Newcastle upon Tyne, UK</affiliation></author>
+      <author id="huizhi-liang"><first>Huizhi</first><last>Liang</last><affiliation>School of Computing, Newcastle University, Newcastle upon Tyne, UK</affiliation></author>
       <pages>271-279</pages>
       <abstract>SemEval-2025 Task 3 (Mu-SHROOM) focuses on detecting hallucinations in content generated by various large language models (LLMs) across multiple languages. This task involves not only identifying the presence of hallucinations but also pinpointing their specific occurrences. To tackle this challenge, this study introduces two methods: modified RefChecker and modified SelfCheckGPT. The modified RefChecker integrates prompt-based factual verification into References, structuring them as claim-based tests rather than single external knowledge sources. The modified SelfCheckGPT ~incorporates external knowledge to overcome its reliance on internal knowledge. In addition, both methods’ original prompt designs are enhanced to identify hallucinated words within LLM-generated texts. Experimental results demonstrate the effectiveness of the approach, achieving a high ranking on the test dataset in detecting hallucinations across various languages, with an average IoU of 0.5310 and an average COR of 0.5669.</abstract>
       <url hash="21c7203c">2025.semeval-1.39</url>
@@ -438,7 +438,7 @@
       <author><first>Thanet</first><last>Markchom</last><affiliation>University of Reading</affiliation></author>
       <author><first>Tong</first><last>Wu</last><affiliation>Previously at School of Computing, Newcastle University, Newcastle upon Tyne, UK</affiliation></author>
       <author><first>Liting</first><last>Huang</last><affiliation>School of Computing, Newcastle University, Newcastle upon Tyne, UK</affiliation></author>
-      <author><first>Huizhi</first><last>Liang</last><affiliation>School of Computing, Newcastle University, Newcastle upon Tyne, UK</affiliation></author>
+      <author id="huizhi-liang"><first>Huizhi</first><last>Liang</last><affiliation>School of Computing, Newcastle University, Newcastle upon Tyne, UK</affiliation></author>
       <pages>288-295</pages>
       <abstract>SemEval-2025 Task 1 focuses on ranking images based on their alignment with a given nominal compound that may carry idiomatic meaning in both English and Brazilian Portuguese. To address this challenge, this work uses generative large language models (LLMs) and multilingual CLIP models to enhance idiomatic compound representations. LLMs generate idiomatic meanings for potentially idiomatic compounds, enriching their semantic interpretation. These meanings are then encoded using multilingual CLIP models, serving as representations for image ranking. Contrastive learning and data augmentation techniques are applied to fine-tune these embeddings for improved performance.Experimental results show that multimodal representations extracted through this method outperformed those based solely on the original nominal compounds. The fine-tuning approach shows promising outcomes but is less effective than using embeddings without fine-tuning.</abstract>
       <url hash="d762c9b0">2025.semeval-1.41</url>
@@ -447,7 +447,7 @@
     <paper id="42">
       <title><fixed-case>N</fixed-case>lp<fixed-case>U</fixed-case>ned at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2025 Task 10: Beyond Training: A Taxonomy-Guided Approach to Role Classification Using <fixed-case>LLM</fixed-case>s</title>
       <author><first>Alberto</first><last>Caballero</last><affiliation>NLP and IR group at UNED</affiliation></author>
-      <author><first>Alvaro</first><last>Rodrigo</last><affiliation>NLP and IR group at UNED</affiliation></author>
+      <author id="alvaro-rodrigo"><first>Alvaro</first><last>Rodrigo</last><affiliation>NLP and IR group at UNED</affiliation></author>
       <author><first>Roberto</first><last>Centeno</last><affiliation>UNED</affiliation></author>
       <pages>296-301</pages>
       <abstract>The paper presents a taxonomy-guided approach to role classification in news articles using Large Language Models (LLMs). Instead of traditional model training, the system employs zero-shot and few-shot prompting strategies, leveraging structured taxonomies and contextual cues for classification. The study evaluates hierarchical and single-step classification approaches, finding that a unified, single-step model with contextual preprocessing achieves the best performance. The research underscores the importance of input structuring and classification strategy in optimizing LLM performance for real-world applications.</abstract>
@@ -506,8 +506,8 @@
       <author><first>Jiyu</first><last>Chen</last><affiliation>CSIRO</affiliation></author>
       <author><first>Necva</first><last>Bölücü</last><affiliation>Csiro</affiliation></author>
       <author><first>Sarvnaz</first><last>Karimi</last><affiliation>CSIRO</affiliation></author>
-      <author><first>Diego</first><last>Molla</last><affiliation>Macquarie University</affiliation></author>
-      <author><first>Cecile</first><last>Paris</last><affiliation>CSIRO</affiliation></author>
+      <author id="diego-molla"><first>Diego</first><last>Molla</last><affiliation>Macquarie University</affiliation></author>
+      <author id="cecile-paris"><first>Cecile</first><last>Paris</last><affiliation>CSIRO</affiliation></author>
       <pages>336-342</pages>
       <abstract>Detecting emotions across different languages is challenging due to the varied and culturally nuanced ways of emotional expressions. The Semeval 2025 Task 11: Bridging the Gap in Text-Based emotion shared task was organised to investigate emotion recognition across different languages. The goal of the task is to implement an emotion recogniser that can identify the basic emotional states that general third-party observers would attribute to an author based on their written text snippet, along with the intensity of those emotions. We report our investigation of various task-adaptation strategies for LLMs in emotion recognition. We show that the most effective method for this task is to fine-tune a pre-trained multilingual LLM for each language.</abstract>
       <url hash="3eb42ec6">2025.semeval-1.48</url>
@@ -562,7 +562,7 @@
       <author><first>Xiangyu</first><last>Wang</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <author><first>Ge</first><last>Shi</last><affiliation>Beijing University of Technology</affiliation></author>
       <author><first>Linmei</first><last>Hu</last><affiliation>School of Computer Science &amp; Technology, Beijing Institute of Technology</affiliation></author>
-      <author><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <author><first>Chong</first><last>Feng</last><affiliation>Beijing Institute of Technology</affiliation></author>
       <pages>373-380</pages>
       <abstract>This paper presents our system for Subtask 10 of Entity Framing, which focuses on assigning one or more hierarchical roles to named entities in news articles. Our approach iteratively refines prompts and utilizes the Entity-Centric Chain of Thought to complete the task. Specifically, to minimize ambiguity in label definitions, we use the model’s predictions as supervisory signals, iteratively refining the category definitions. Furthermore, to minimize the interference of irrelevant information during inference, we incorporate entity-related information into the CoT framework, allowing the model to focus more effectively on entity-centric reasoning. Our system achieved the highest ranking on the leaderboard in the Russian main role classification and the second in English, with an accuracy of 0.8645 and 0.9362, respectively. We discuss the impact of several components of our multilingual classification approach, highlighting their effectiveness.</abstract>
@@ -581,7 +581,7 @@
       <title><fixed-case>BERT</fixed-case>astic at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2025 Task 10: State-of-the-Art Accuracy in Coarse-Grained Entity Framing for <fixed-case>H</fixed-case>indi News</title>
       <author><first>Tarek</first><last>Mahmoud</last><affiliation>Mohamed Bin Zayed University of Artificial Intelligence (MBZUAI)</affiliation></author>
       <author><first>Zhuohan</first><last>Xie</last><affiliation>MBZUAI</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <pages>386-396</pages>
       <abstract>We describe our system for SemEval-2025 Task 10 Subtask 1 on coarse-grained entity framing in Hindi news, exploring two complementary strategies. First, we experiment with LLM prompting using GPT-4o, comparing hierarchical multi-step prompting with native single-step prompting for both main and fine-grained role prediction. Second, we conduct an extensive study on fine-tuning XLM-R, analyzing different context granularities (full article, paragraph, or sentence-level entity mentions), monolingual vs. multilingual settings, and main vs. fine-grained role labels. Our best system, trained on fine-grained role annotations across languages using sentence-level context, achieved 43.99% exact match, 56.56 % precision, 47.38% recall, and 51.57% F1-score. Notably, our system set a new state-of-the-art for main role prediction on Hindi news, achieving 78.48 % accuracy - outperforming the next best model at 76.90%, as per the official leaderboard. Our findings highlight effective strategies for entity framing in multilingual and low-resource settings.</abstract>
       <url hash="f5655774">2025.semeval-1.55</url>
@@ -610,7 +610,7 @@
       <title><fixed-case>NCLT</fixed-case>eam at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2025 Task 10: Enhancing Multilingual, multi-class, and Multi-Label Document Classification via Contrastive Learning Augmented Cascaded <fixed-case>UN</fixed-case>et and Embedding based Approaches</title>
       <author><first>Shu</first><last>Li</last><affiliation>Newcastle University</affiliation></author>
       <author><first>George</first><last>Williamson</last><affiliation>Newcastle University</affiliation></author>
-      <author><first>Huizhi</first><last>Liang</last><affiliation>Newcastle University</affiliation></author>
+      <author id="huizhi-liang"><first>Huizhi</first><last>Liang</last><affiliation>Newcastle University</affiliation></author>
       <pages>418-423</pages>
       <abstract>The SemEval 2025 Task 10 Subtask2 presents a multi-task multi-label text classification challenge. The task requires systems to classify documents simultaneously across three distinct topics, the Climate Change(CC), the Ukraine Russia War(URW), and others. Several challenge were identified, including the instinct distinct of topics, the imbalance of categories, the insufficient samples, and the different distribution of develop set and test set. To address these challenges, two deep learning model have been implemented. One of the approach is the Contrastive learning augmented Cascaded UNet model(CCU), which employs a cascaded architecture to jointly process all subtasks. This model incorporates an UNet-style architecture to classify embeddings extracted by the base text encoder. A domain adaption method was implemented to facilitate joint learning across different document topics. We address the data insufficiency through contrastive learning and mitigate data imbalance using asymmetric loss function. We also implemented a shallow machine learning model. In this approach, transformer encoder models were applied to extract text embedding from various aspect, then deploy machine learning method to do the classification and compared with the base line. The UNet-style model achieves the highest f1 sample at 0.365 on the test set of 5th place compared with all approaches on leader board. Our source code developed for this paper are available at</abstract>
       <url hash="d30b6309">2025.semeval-1.58</url>
@@ -677,7 +677,7 @@
       <title>Team <fixed-case>U</fixed-case>nibuc - <fixed-case>NLP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2025 Task 11: Few-shot text-based emotion detection</title>
       <author><first>Claudiu</first><last>Creanga</last><affiliation>University of Bucharest</affiliation></author>
       <author><first>Teodor - George</first><last>Marchitan</last><affiliation>University of Bucharest</affiliation></author>
-      <author><first>Liviu</first><last>Dinu</last><affiliation>University of Bucharest</affiliation></author>
+      <author id="liviu-p-dinu"><first>Liviu</first><last>Dinu</last><affiliation>University of Bucharest</affiliation></author>
       <pages>468-475</pages>
       <abstract>This paper describes the approach of the Unibuc - NLP team in tackling the SemEval 2025 Workshop, Task 11: Bridging the Gap in Text-Based Emotion Detection. We mainly focused on experiments using large language models (Gemini, Qwen, DeepSeek) with either few-shot prompting or fine-tuning. Withour final system, for the multi-label emotion detection track (track A), we got an F1-macro of 0.7546 (26/96 teams) for the English subset, 0.1727 (35/36 teams) for the Portuguese (Mozambican) subset and 0.325 (1/31 teams) for the Emakhuwa subset.</abstract>
       <url hash="b56fb410">2025.semeval-1.65</url>
@@ -802,7 +802,7 @@
     <paper id="77">
       <title><fixed-case>RACAI</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2025 Task 7: Efficient adaptation of Large Language Models for Multilingual and Crosslingual Fact-Checked Claim Retrieval</title>
       <author><first>Radu - Gabriel</first><last>Chivereanu</last><affiliation>Phd Candidate RACAI</affiliation></author>
-      <author><first>Dan</first><last>Tufis</last><affiliation>RACAI</affiliation></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufis</last><affiliation>RACAI</affiliation></author>
       <pages>551-557</pages>
       <abstract>The paper details our approach to SemEval 2025 Shared Task 7: Multilingual and Crosslingual Fact-Checked Claim Retrieval.We investigate how large language models (LLMs) designed for general-purpose retrieval via text-embeddings can be adapted for fact-checked claim retrieval across multiple languages, including scenarios where the query and fact-check are in different languages. The experiments involve fine-tuning with a contrastive objective, resulting in notable gains in both accuracy and efficiency over the baseline retrieval model. We evaluate cost-effective techniques such as LoRA and QLoRA and Prompt Tuning.Additionally, we demonstrate the benefits of Matryoshka embeddings in minimizing the memory footprint of stored embeddings, reducing the system requirements for a fact-checking system.</abstract>
       <url hash="f8d45e76">2025.semeval-1.77</url>
@@ -813,8 +813,8 @@
       <author><first>Delia - Iustina</first><last>Grigorita</last><affiliation>Alexandru Ioan Cuza University, Faculty of Computer Science Iasi</affiliation></author>
       <author><first>Tudor - Constantin</first><last>Pricop</last><affiliation>Alexandru Ioan Cuza University</affiliation></author>
       <author><first>Sergio - Alessandro</first><last>Suteu</last><affiliation>“Alexandru Ioan Cuza” University of Iasi</affiliation></author>
-      <author><first>Daniela</first><last>Gifu</last><affiliation>“Alexandru Ioan Cuza” University of Iasi, Faculty of Computer Science &amp; Romanian Academy - Iasi branch, Institute of Computer Science</affiliation></author>
-      <author><first>Diana</first><last>Trandabat</last><affiliation>University Alexandru Ioan Cuza of Iasi, Romania</affiliation></author>
+      <author id="daniela-gifu"><first>Daniela</first><last>Gifu</last><affiliation>“Alexandru Ioan Cuza” University of Iasi, Faculty of Computer Science &amp; Romanian Academy - Iasi branch, Institute of Computer Science</affiliation></author>
+      <author id="diana-trandabat"><first>Diana</first><last>Trandabat</last><affiliation>University Alexandru Ioan Cuza of Iasi, Romania</affiliation></author>
       <pages>558-565</pages>
       <abstract>Entity-Aware Machine Translation (EAMT) aims to enhance the accuracy of machine translation (MT) systems in handling named entities, including proper names, domain-specific terms, and structured references. Conventional MT models often struggle to accurately translate these entities, leading to errors that affect comprehension and reliability. In this paper, we present a promising approach for SemEval 2025 Task 2, focusing on improving EAMT in ten target languages. The methodology is based on two complementary strategies: (1) multilingual Named Entity Recognition (NER) and structured knowledge bases for preprocessing and integrating entity translations, and (2) large language models (LLMs) enhanced with optimized prompts and validation mechanisms to improve entity preservation. By combining structured knowledge with neural approaches, this system aims to mitigate entity-related translation errors and enhance the overall performance of MT models. Among the systems that do not use gold information, retrieval-augmented generation (RAG), or fine-tuning, our approach ranked 1st with the second strategy and 3rd with the first strategy.</abstract>
       <url hash="4df45055">2025.semeval-1.78</url>
@@ -843,7 +843,7 @@
       <author><first>Aitana</first><last>Martínez</last><affiliation>University of Alicante</affiliation></author>
       <author><first>Snorre</first><last>Ralund</last><affiliation>University of Copenhagen</affiliation></author>
       <author><first>Elena</first><last>Lloret</last><affiliation>University of Alicante</affiliation></author>
-      <author><first>Paloma</first><last>Moreda Pozo</last><affiliation>University of Alicante</affiliation></author>
+      <author id="paloma-moreda-pozo"><first>Paloma</first><last>Moreda Pozo</last><affiliation>University of Alicante</affiliation></author>
       <author><first>Armando</first><last>Suárez Cueto</last><affiliation>University of Alicante</affiliation></author>
       <pages>575-583</pages>
       <abstract>This paper describes our approach to address the SemEval-2025 Task 10 subtask 3, which is focused on narrative extraction given news articles with a dominant narrative. We design an external knowledge injection approach to fine-tune a Flan-T5 model so the generated narrative explanations are in line with the dominant narrative determined in each text. We also incorporate pragmatic information in the form of communicative intentions, using them as external knowledge to assist the model. This ensures that the generated texts align more closely with the intended explanations and effectively convey the expected meaning. The results show that our approach ranks 3rd in the task leaderboard (0.7428 in Macro-F1) with concise and effective news explanations. The analyses highlight the importance of adding pragmatic information when training systems to generate adequate narrative extractions.</abstract>
@@ -855,7 +855,7 @@
       <author><first>Milad</first><last>Afshari</last><affiliation>Michigan State University</affiliation></author>
       <author><first>Richard</first><last>Frost</last><affiliation>Michigan State University</affiliation></author>
       <author><first>Samantha</first><last>Kissel</last><affiliation>Michigan State University</affiliation></author>
-      <author><first>Kristen</first><last>Johnson</last><affiliation>Michigan State University</affiliation></author>
+      <author id="kristen-johnson"><first>Kristen</first><last>Johnson</last><affiliation>Michigan State University</affiliation></author>
       <pages>584-589</pages>
       <abstract>We tackle the challenge of multi-label emotion detection in short texts, focusing on SemEval-2025 Task 11 Track A. Our approach, RoEmo, combines generative and discriminative models in an ensemble strategy to classify texts into five emotions: anger, fear, joy, sadness, and surprise.The generative model, instruction-finetuned on emotion detection datasets, undergoes additional fine-tuning on the SemEval-2025 Task 11 Track A dataset to enhance its performance for this specific task. Meanwhile, the discriminative model, based on binary classification, offers a straightforward yet effective approach to classification.We review recent advancements in multi-label emotion detection and analyze the task dataset. Our results show that RoEmo ranks among the top-performing systems, demonstrating high accuracy and reliability.</abstract>
       <url hash="507743f6">2025.semeval-1.81</url>
@@ -967,7 +967,7 @@
       <author><first>Fredin</first><last>Vázquez</last><affiliation>Instituto de Investigaciones en Matemáticas Aplicadas y Sistemas</affiliation></author>
       <author><first>Christian</first><last>Luna</last><affiliation>Facultad de Contaduría y Administración</affiliation></author>
       <author><first>Aldair</first><last>Conde</last><affiliation>Facultad de Ciencias</affiliation></author>
-      <author><first>Gerardo</first><last>Sierra</last><affiliation>Instituto de Ingeniería</affiliation></author>
+      <author id="gerardo-sierra"><first>Gerardo</first><last>Sierra</last><affiliation>Instituto de Ingeniería</affiliation></author>
       <author><first>Helena</first><last>Gómez - Adorno</last><affiliation>Instituto de Investigaciones en Matemáticas Aplicadas y en Sistemas</affiliation></author>
       <author><first>Gemma</first><last>Bel - Enguix</last><affiliation>Instituto de Ingeniería</affiliation></author>
       <pages>657-665</pages>
@@ -979,7 +979,7 @@
       <title><fixed-case>LATE</fixed-case>-<fixed-case>GIL</fixed-case>-<fixed-case>NLP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2025 Task 11: Multi-Language Emotion Detection and Intensity Classification Using Transformer Models with Optimized Loss Functions for Imbalanced Data</title>
       <author><first>Jesús</first><last>V á z q u e z - O s o r i o</last><affiliation>Posgrado en Ciencia e Ingeniería de la Computación - UNAM</affiliation></author>
       <author><first>Helena</first><last>Gómez - Adorno</last><affiliation>Instituto de Investigaciones en Matemáticas Aplicadas y en Sistemas - UNAM</affiliation></author>
-      <author><first>Gerardo</first><last>Sierra</last><affiliation>Instituto de Ingeniería - UNAM</affiliation></author>
+      <author id="gerardo-sierra"><first>Gerardo</first><last>Sierra</last><affiliation>Instituto de Ingeniería - UNAM</affiliation></author>
       <author><first>Vladimir</first><last>Sierra - Casiano</last><affiliation>Facultad de Ciencias - UNAM</affiliation></author>
       <author><first>Diana</first><last>Canchola - Hernández</last><affiliation>Facultad de Ciencias - UNAM</affiliation></author>
       <author><first>José</first><last>Tovar - Cortés</last><affiliation>Facultad de Ciencias - UNAM</affiliation></author>
@@ -1208,7 +1208,7 @@
       <author><first>Xilu</first><last>Cai</last><affiliation>15839566797</affiliation></author>
       <author><first>Yajuan</first><last>Tong</last><affiliation>CCNU</affiliation></author>
       <author><first>Chengzhao</first><last>Wu</last><affiliation>CCNU</affiliation></author>
-      <author id="xin-xu"><first>Xin</first><last>Xu</last><affiliation>ccnu</affiliation></author>
+      <author><first>Xin</first><last>Xu</last><affiliation>ccnu</affiliation></author>
       <author><first>Guanyi</first><last>Chen</last><affiliation>Central China Normal University</affiliation></author>
       <author><first>Tingting</first><last>He</last><affiliation>Central China Normal University</affiliation></author>
       <pages>841-845</pages>
@@ -1314,7 +1314,7 @@
       <author><first>Neris</first><last>Özen</last><affiliation>Wageningen Food Safety Research</affiliation></author>
       <author><first>Bas</first><last>Van Der Velden</last><affiliation>Wageningen Food Safety Research</affiliation></author>
       <author><first>Iris</first><last>Hendrickx</last><affiliation>Centre for Language Studies, Radboud University Nijmegen</affiliation></author>
-      <author><first>Ali</first><last>Hurriyetoglu</last><affiliation>WUR</affiliation></author>
+      <author id="ali-hurriyetoglu"><first>Ali</first><last>Hurriyetoglu</last><affiliation>WUR</affiliation></author>
       <pages>914-930</pages>
       <abstract>This paper presents our system developed for the SemEval-2025 Task 9: The Food Hazard Detection Challenge. The shared task’s objective is to evaluate explainable classification systems for classifying hazards and products in two levels of granularity from web-collected food recall incident reports. In this work, we propose text augmentation techniques as a way to improve poor performance in minority classes and compare their effect for each category on various transformer and machine learning models. We apply three word-level data augmentation techniques, namely synonym replacement, random word swapping, and contextual word insertion utilizing BERT. The resultsshow that transformer models tend to have a better overall performance. Meanwhile, a statistically significant improvement (P 0.05) was observed in the fine-grained categories when using BERT to compare the baseline model with the three augmented models, which achieved a 6% increase in correct predictions for minority hazard classes. This suggests that targeted augmentation of minority classes can improve the performance of transformer models.</abstract>
       <url hash="2f20fdc4">2025.semeval-1.124</url>
@@ -2001,7 +2001,7 @@
     <paper id="187">
       <title>Atyaephyra at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2025 Task 4: Low-Rank Negative Preference Optimization</title>
       <author><first>Jan</first><last>Bronec</last><affiliation>Faculty of Mathematics and Physics, Charles University</affiliation></author>
-      <author><first>Jindřich</first><last>Helcl</last><affiliation>Charles University in Prague</affiliation></author>
+      <author id="jindrich-helcl"><first>Jindřich</first><last>Helcl</last><affiliation>Charles University in Prague</affiliation></author>
       <pages>1415-1422</pages>
       <abstract>We present a submission to the SemEval 2025 shared task on unlearning sensitive content from LLMs. Our approach employs negative preference optimization using low-rank adaptation. We show that we can utilize this combination to cheaply compute additional regularization terms, which help with unlearning stabilization. The results of our approach significantly exceed the shared task baselines.</abstract>
       <url hash="d58e06f6">2025.semeval-1.187</url>
@@ -2031,7 +2031,7 @@
     <paper id="190">
       <title><fixed-case>COGNAC</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2025 Task 10: Multi-level Narrative Classification with Summarization and Hierarchical Prompting</title>
       <author><first>Azwad Anjum</first><last>Islam</last><affiliation>Florida International University</affiliation></author>
-      <author><first>Mark</first><last>Finlayson</last><affiliation>FIU</affiliation></author>
+      <author id="mark-finlayson"><first>Mark</first><last>Finlayson</last><affiliation>FIU</affiliation></author>
       <pages>1442-1449</pages>
       <abstract>We present our approach to solving the Narrative Classification portion of the Multilingual Characterization and Extraction of Narratives SemEval-2025 challenge (Task 10, Subtask 2). This task is a multi-label, multi-class document classification task, where the classes were defined via natural language titles, descriptions, short examples, and annotator instructions, with only a few (and sometime no) labeled examples for training. Our approach leverages a text-summarization, binary relevance with zero-shot prompts, and hierarchical prompting using Large Language Models (LLM) to identify the narratives and subnarratives in the provided news articles. Notably, we did not use the labeled examples to train the system. Our approach well outperforms the official baseline and achieves an F1 score of 0.55 (narratives) and 0.43 (subnarratives), and placed 2nd in the test-set leaderboard at the system submission deadline. We provide an in-depth analysis of the construction and effectiveness of our approach using both open-source (LLaMA 3.1-8B-Instruct) and proprietary (GPT 4o-mini) Large Language Models under different prompting setups.</abstract>
       <url hash="1486215a">2025.semeval-1.190</url>
@@ -2095,7 +2095,7 @@
       <author><first>Fatima</first><last>Uroosa</last><affiliation>IPN-CIC</affiliation></author>
       <author><first>Tewodros Achamaleh</first><last>Bizuneh</last><affiliation>IPN-CIC</affiliation></author>
       <author><first>Grigori</first><last>Sidorov</last><affiliation>CIC-IPN</affiliation></author>
-      <author><first>Alexander</first><last>Gelbukh</last><affiliation>Instituto Politécnico Nacional</affiliation></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last><affiliation>Instituto Politécnico Nacional</affiliation></author>
       <pages>1485-1494</pages>
       <abstract>Emotions play a fundamental role in the decision-making process, shaping human actions across diverse disciplines. The extensive usage of emotion intensity detection approaches has generated substantial research interest during the last few years. Efficient multi-label emotion intensity detection remains unsatisfactory even for high-resource languages, with a substantial performance gap among well-resourced and under-resourced languages. Team {textbf{Tewodros}} participated in SemEval-2025 Task 11, Track B, focusing on detecting text-based emotion intensity. Our work involved multi-label emotion intensity detection across three languages: Amharic, English, and Spanish, using the (afro-xlmr-large-76L), (DeBERTa-v3-base), and (BERT-base-Spanish-wwm-uncased) models. The models achieved an average F1 score of 0.6503 for Amharic, 0.5943 for English, and an accuracy score of 0.6228 for Spanish. These results demonstrate the effectiveness of our models in capturing emotion intensity across multiple languages.</abstract>
       <url hash="f3e113aa">2025.semeval-1.196</url>
@@ -2104,7 +2104,7 @@
     <paper id="197">
       <title><fixed-case>S</fixed-case>heffield<fixed-case>GATE</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2025 Task 2: Multi-Stage Reasoning with Knowledge Fusion for Entity Translation</title>
       <author><first>Xinye</first><last>Yang</last><affiliation>The University of Sheffield</affiliation></author>
-      <author><first>Kalina</first><last>Bontcheva</last><affiliation>The University of Sheffield</affiliation></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last><affiliation>The University of Sheffield</affiliation></author>
       <author><first>Xingyi</first><last>Song</last><affiliation>The University of Sheffield</affiliation></author>
       <pages>1495-1503</pages>
       <abstract>This paper describes the machine translation system submitted to the SemEval-2025 Entity-Aware Machine Translation Task by the SheffieldGATE Team. We proposed a multi-agent entity-aware machine translation system that operates through three distinct reasoning stages: entity recognition, knowledge enhancement, and translation decision-making. The innovation in our approach lies in leveraging large language models to generate contextually relevant queries during the knowledge enhancement stage, extracting candidate entities and their translations from external knowledge bases. In the final translation decision-making stage, we employ fine-tuned large language models to denoise the retrieved knowledge, selecting the most relevant entity information to ensure accurate translation of the original text. Experimental results demonstrate our system’s effectiveness. In emEval-2025 Task 2, our system ranks first among all systems in Spanish entity translation metrics and third in Italian. For systems that do not use gold standard entity IDs during test set inference, ours achieves the highest overall scores across four language pairs: German, French, Italian, and Spanish.</abstract>
@@ -2115,7 +2115,7 @@
       <title><fixed-case>ITUNLP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2025 Task 8: Question-Answering over Tabular Data: A Zero-Shot Approach using <fixed-case>LLM</fixed-case>-Driven Code Generation</title>
       <author><first>Atakan</first><last>Site</last><affiliation>Istanbul Techical University</affiliation></author>
       <author><first>Emre</first><last>Erdemir</last><affiliation>Istanbul Technical University</affiliation></author>
-      <author><first>Gülşen</first><last>Eryiğit</last><affiliation>Istanbul Technical University</affiliation></author>
+      <author id="gulsen-eryigit"><first>Gülşen</first><last>Eryiğit</last><affiliation>Istanbul Technical University</affiliation></author>
       <pages>1504-1514</pages>
       <abstract>This paper presents our system for SemEval-2025 Task 8: DataBench, Question-Answeringover Tabular Data. The primary objective ofthis task is to perform question answering ongiven tabular datasets from diverse domains;under two subtasks: DataBench QA (SubtaskI) and DataBench Lite QA (Subtask II). Totackle both subtasks, we developed a zero-shotsolution with a particular emphasis on lever-aging Large Language Model (LLM)-basedcode generation. Specifically, we proposeda Python code generation framework, utiliz-ing state-of-the-art open-source LLMs to gen-erate executable Pandas code via optimizedprompting strategies. Our experiments revealthat different LLMs exhibit varying levels ofeffectiveness in Python code generation. Addi-tionaly, results show that Python code genera-tion achieves superior performance in tabularquestion answering compared to alternative ap-proaches. Although our ranking among zero-shot systems is unknown at the time of this pa-per’s submission, our system achieved eighthplace in Subtask I and sixth place in Subtask IIamong the 30 systems that outperformed thebaseline in the open-source models category.</abstract>
       <url hash="10e7ca66">2025.semeval-1.198</url>
@@ -2134,7 +2134,7 @@
       <title>Ustnlp16 at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2025 Task 9: Improving Model Performance through Imbalance Handling and Focal Loss</title>
       <author><first>Zhuoang</first><last>Cai</last><affiliation>HKUST</affiliation></author>
       <author><first>Zhenghao</first><last>Li</last><affiliation>HKUST</affiliation></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>HKUST</affiliation></author>
+      <author><first>Yang</first><last>Liu</last><affiliation>HKUST</affiliation></author>
       <author><first>Liyuan</first><last>Guo</last><affiliation>HKUST</affiliation></author>
       <author><first>Yangqiu</first><last>Song</last><affiliation>HKUST</affiliation></author>
       <pages>1522-1527</pages>
@@ -2209,7 +2209,7 @@
       <author><first>Frances Adriana</first><last>Laureano De Leon</last><affiliation>University of Birmingham</affiliation></author>
       <author><first>Yixiao</first><last>Wang</last><affiliation>Birmingham University</affiliation></author>
       <author><first>Yue</first><last>Feng</last><affiliation>University of Birmingham</affiliation></author>
-      <author><first>Mark</first><last>Lee</last><affiliation>University of Birmingham</affiliation></author>
+      <author id="mark-lee"><first>Mark</first><last>Lee</last><affiliation>University of Birmingham</affiliation></author>
       <pages>1570-1576</pages>
       <abstract>Emotion detection in natural language processing is a challenging task due to the complexity of human emotions and linguistic diversity. While significant progress has been made in high-resource languages, emotion detection in low-resource languages remains underexplored. In this work, we address multilingual and cross-lingual emotion detection by leveraging adapter-based fine-tuning with multilingual pre-trained language models. Adapters introduce a small number of trainable parameters while keeping the pre-trained model weights fixed, offering a parameter-efficient approach to adaptation. We experiment with different adapter tuning strategies, including task-only adapters, target-language-ready task adapters, and language-family-based adapters. Our results show that target-language-ready task adapters achieve the best overall performance, particularly for low-resource African languages with our team ranking 7th for Tigrinya, and 8th for Kinyarwanda. In Track C, our system ranked 5th for Oromo, Tigrinya, Kinyarwanda, Amharic, and Igbo. Our approach outperforms large language models in 11 languages and matches their performance in four others, despite using significantly fewer parameters. Furthermore, we find that adapter-based models retain cross-linguistic transfer capabilities while requiring fewer computational resources compared to full fine-tuning for each language.</abstract>
       <url hash="1873bcc2">2025.semeval-1.207</url>
@@ -2221,7 +2221,7 @@
       <author><first>Karla</first><last>Salas-Jimenez</last></author>
       <author><first>Adrián</first><last>Juárez-Pérez</last></author>
       <author><first>Diego</first><last>Hernández-Bustamante</last></author>
-      <author><first>Gemma</first><last>Bel-Enguix</last></author>
+      <author id="gemma-bel-enguix"><first>Gemma</first><last>Bel-Enguix</last></author>
       <author><first>Helena</first><last>Gómez-Adorno</last></author>
       <pages>1577-1584</pages>
       <abstract>We present MeSSI, a multi-module system applied to SemEval 2025’s task 3: Mu-SHROOM. Our system tags questions in order to obtain semantic relevant terms that are used as information retrieval characteristics. Said characteristics serve as extraction terms for Wikipedia pages that are in turn processed to generate gold standard texts used in a hallucination evaluation system. A PoST-based entity comparison was implemented to contrast the test dataset sentences with the corresponding generated gold standards, wich in turn was the main criteria to tag hallucinations, partitioned in soft labels and hard labels. This method was tested in Spanish and English, finishing 18th and 19th respectively on the IoU based ranking.</abstract>
@@ -2515,7 +2515,7 @@
     <paper id="235">
       <title><fixed-case>M</fixed-case>c<fixed-case>G</fixed-case>ill-<fixed-case>NLP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2025 Task 11: Bridging the Gap in Text-Based Emotion Detection</title>
       <author><first>Vivek</first><last>Verma</last><affiliation>UdeM</affiliation></author>
-      <author><first>David Ifeoluwa</first><last>Adelani</last><affiliation>McGill University / MILA</affiliation></author>
+      <author id="david-ifeoluwa-adelani"><first>David Ifeoluwa</first><last>Adelani</last><affiliation>McGill University / MILA</affiliation></author>
       <pages>1783-1789</pages>
       <abstract>In this paper, we present the results of our SemEval-2025 Emotion Detection Shared Task Track A which focuses on multi-label emotion detection. Our team’s approach leverages prompting GPT-4o, fine-tuning NLLB- LLM2Vec encoder, and an ensemble of these two approaches to solve Track A. Our ensemble method beats the baseline method that fine-tuned RemBERT encoder in 24 of the 28 languages. Furthermore, our results shows that the average performance is much worse for under-resourced languages in the Afro- Asiatic, Niger-Congo and Austronesia with per- formance scores at 50 F1 points and below.</abstract>
       <url hash="b1893776">2025.semeval-1.235</url>
@@ -2543,7 +2543,7 @@
     </paper>
     <paper id="238">
       <title><fixed-case>W</fixed-case>ikidata-Driven Entity-Aware Translation: Boosting <fixed-case>LLM</fixed-case>s with External Knowledge</title>
-      <author id="lu-xu"><first>Lu</first><last>Xu</last><affiliation>Sapienza NLP Group, Sapienza University of Rome</affiliation></author>
+      <author><first>Lu</first><last>Xu</last><affiliation>Sapienza NLP Group, Sapienza University of Rome</affiliation></author>
       <pages>1802-1809</pages>
       <abstract>This paper presents an entity-aware machine translation system that significantly improves named entity translation by integrating external knowledge from Wikidata with Large Language Models (LLMs). While LLMs demonstrate strong general translation capabilities, they struggle with named entities that require specific cultural or domain knowledge. We address this challenge through two approaches: retrieving multilingual entity representations using gold Wikidata IDs, and employing Relik, an information extraction tool, to automatically detect and link entities without gold annotations. Experiments across multiple language pairs show our system outperforms baselines by up to 63 percentage points in entity translation accuracy (m-ETA) while maintaining high overall translation quality. Our approach ranked 3rd overall and 1st among non-finetuned systems on the SemEval-2025 Task 2 leaderboard. Additionally, we introduced language-specific post-processing further enhances performance, particularly for Traditional Chinese translations.</abstract>
       <url hash="805ab4a7">2025.semeval-1.238</url>
@@ -2858,7 +2858,7 @@
       <title><fixed-case>JU</fixed-case>-<fixed-case>CSE</fixed-case>-<fixed-case>NLP</fixed-case>’25 at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2025 Task 4: Learning to Unlearn <fixed-case>LLM</fixed-case>s</title>
       <author><first>Arkajyoti</first><last>Naskar</last><affiliation>Jadavpur University</affiliation></author>
       <author><first>Dipankar</first><last>Das</last><affiliation>Jadavpur University</affiliation></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last><affiliation>JADAVPUR UNIVERSITY</affiliation></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last><affiliation>JADAVPUR UNIVERSITY</affiliation></author>
       <pages>2059-2064</pages>
       <abstract>Large Language Models (LLMs) have achieved enormous success recently due to their ability to understand and solve various non-trivial tasks in natural language. However, they have been shown to memorize their training data which, among other concerns, increases the risk of the model regurgitating creative or private content, potentially leading to legal issues for the model developer and/or vendors. Such issues are often discovered post-model training during testing or red teaming. While unlearning has been studied for some time in classification problems, it is still a relatively underdeveloped area of study in LLM research since the latter operates in a potentially unbounded output label space. Specifically, robust evaluation frameworks are lacking to assess the accuracy of these unlearning strategies. In this challenge, we aim to bridge this gap by developing a comprehensive evaluation challenge for unlearning sensitive datasets in LLMs.</abstract>
       <url hash="51f8c93f">2025.semeval-1.267</url>
@@ -3193,7 +3193,7 @@
       <author><first>Pascal</first><last>Gaertner</last><affiliation>Technical University of Munich</affiliation></author>
       <author><first>Quim</first><last>Ribas</last><affiliation>TUM</affiliation></author>
       <author><first>Daryna</first><last>Dementieva</last><affiliation>Technical University of Munich</affiliation></author>
-      <author><first>Alexander</first><last>Fraser</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last><affiliation>Ludwig-Maximilians-Universität München</affiliation></author>
       <pages>2283-2296</pages>
       <abstract>As human-machine interactions become increasingly natural through text, accurate emotion recognition is essential. Detecting emotions provides valuable insights across various applications. In this paper, we present our approach for SemEval-2025 Task 11, Track A, which focuses on multi-label text-based detection of perceived emotions. Our system was designed for and tested on English language text. To classify emotions present in text snippets, we initially experimented with traditional techniques such as Logistic Regression, Gradient Boosting, and SVM. We then explored state-of-the-art LLMs (OpenAI o1 and DeepSeek V3) before developing our final system, a fine-tuned Transformer-based model. Our best-performing approach employs an ensemble of fine-tuned DeBERTa-large instances with multiple seeds, optimized using Optuna and StratifiedKFold cross-validation. This approach achieves an F1-score of 0.75, demonstrating promising results with room for further improvement.</abstract>
       <url hash="54c3dd6f">2025.semeval-1.298</url>
@@ -3218,7 +3218,7 @@
       <author><first>Julia S.</first><last>Dollis</last></author>
       <author><first>Daniel M.</first><last>Pedrozo</last></author>
       <author><first>Artur M. A.</first><last>Novais</last></author>
-      <author><first>Diogo F. C.</first><last>Silva</last></author>
+      <author id="diogo-gloria-silva"><first>Diogo F. C.</first><last>Silva</last></author>
       <author><first>Arlindo R.</first><last>Galvão Filho</last></author>
       <pages>2305-2310</pages>
       <abstract>This paper investigates the impact of data quality and processing strategies on emotion recognition in Brazilian Portuguese (PTBR) texts. We focus on data distribution, linguistic context, and augmentation techniques such as translation and synthetic data generation. To evaluate these aspects, we conduct experiments on the PTBR portion of the BRIGHTER dataset, a manually curated multilingual dataset containing nearly 100,000 samples, of which 4,552 are in PTBR. Our study encompasses both multi-label emotion detection (presence/absence classification) and emotion intensity prediction (0 to 3 scale), following the SemEval 2025 Track 11 setup. Results demonstrate that emotion intensity labels enhance model performance after discretization, and that smaller multilingual models can outperform larger ones in low-resource settings. Our official submission ranked 6th, but further refinements improved our ranking to 3rd, trailing the top submission by only 0.047, reinforcing the significance of a data-centric approach in emotion recognition.</abstract>
@@ -3317,10 +3317,10 @@
     <paper id="309">
       <title>Team <fixed-case>ACK</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2025 Task 2: Beyond Word-for-Word Machine Translation for <fixed-case>E</fixed-case>nglish-<fixed-case>K</fixed-case>orean Pairs</title>
       <author><first>Daniel</first><last>Lee</last><affiliation>Adobe Inc.</affiliation></author>
-      <author><first>Harsh</first><last>Sharma</last><affiliation>CU Boulder</affiliation></author>
+      <author id="harsh-vardhan-sharma"><first>Harsh</first><last>Sharma</last><affiliation>CU Boulder</affiliation></author>
       <author><first>Jieun</first><last>Han</last><affiliation>KAIST</affiliation></author>
       <author><first>Sunny</first><last>Jeong</last><affiliation>New York University</affiliation></author>
-      <author><first>Alice</first><last>Oh</last><affiliation>KAIST</affiliation></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last><affiliation>KAIST</affiliation></author>
       <author><first>Vered</first><last>Shwartz</last><affiliation>UBC</affiliation></author>
       <pages>2376-2388</pages>
       <abstract>Translating knowledge-intensive and entity-rich text between English and Korean requires transcreation to preserve language-specific and cultural nuances beyond literal, phonetic or word-for-word conversion. We evaluate 13 models (LLMs and MT systems) using automatic metrics and human assessment by bilingual annotators. Our findings show LLMs outperform traditional MT systems but struggle with entity translation requiring cultural adaptation. By constructing an error taxonomy, we identify incorrect responses and entity name errors as key issues, with performance varying by entity type and popularity level. This work exposes gaps in automatic evaluation metrics and hope to enable future work in completing culturally-nuanced machine translation.</abstract>
@@ -3465,7 +3465,7 @@
       <author><first>Timothee</first><last>Mickus</last><affiliation>University of Helsinki</affiliation></author>
       <author><first>Elaine</first><last>Zosa</last><affiliation>SiloGen</affiliation></author>
       <author><first>Teemu</first><last>Vahtola</last><affiliation>University of Helsinki</affiliation></author>
-      <author><first>Jörg</first><last>Tiedemann</last><affiliation>University of Helsinki</affiliation></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last><affiliation>University of Helsinki</affiliation></author>
       <author><first>Aman</first><last>Sinha</last><affiliation>University of Lorraine</affiliation></author>
       <author><first>Vincent</first><last>Segonne</last><affiliation>IRISA - UniversitÃ© Bretagne Sud</affiliation></author>
       <author><first>Fernando</first><last>Sanchez - Vega</last><affiliation>Center for Mathematical Research (CIMAT)</affiliation></author>
@@ -3473,7 +3473,7 @@
       <author><first>Jindřich</first><last>Libovický</last><affiliation>Charles Univeristy</affiliation></author>
       <author><first>Jussi</first><last>Karlgren</last><affiliation>Silo AI</affiliation></author>
       <author><first>Shaoxiong</first><last>Ji</last><affiliation>University of Helsinki</affiliation></author>
-      <author><first>Jindřich</first><last>Helcl</last><affiliation>Charles University in Prague</affiliation></author>
+      <author id="jindrich-helcl"><first>Jindřich</first><last>Helcl</last><affiliation>Charles University in Prague</affiliation></author>
       <author><first>Liane</first><last>Guillou</last><affiliation>RISE Research Institutes of Sweden</affiliation></author>
       <author><first>Ona</first><last>De Gibert</last><affiliation>University of Helsinki</affiliation></author>
       <author><first>Jaione</first><last>Bengoetxea</last><affiliation>HiTZ Center - Ixa, University of the Basque Country UPV/EHU</affiliation></author>
@@ -3495,7 +3495,7 @@
       <author><first>Juraj</first><last>Podrouzek</last><affiliation>Kempelen Institute of Intelligent Technologies</affiliation></author>
       <author><first>Matúš</first><last>Mesarčík</last><affiliation>Kempelen Institute of Intelligent Technologies</affiliation></author>
       <author><first>Jaroslav</first><last>Kopčan</last><affiliation>Kempelen Institute of Intelligent Technologies</affiliation></author>
-      <author><first>Anders</first><last>Søgaard</last><affiliation>University of Copenhagen</affiliation></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last><affiliation>University of Copenhagen</affiliation></author>
       <pages>2498-2511</pages>
       <abstract>The rapid spread of online disinformation presents a global challenge, and machine learning has been widely explored as a potential solution. However, multilingual settings and low-resource languages are often neglected in this field. To address this gap, we conducted a shared task on multilingual claim retrieval at SemEval 2025, aimed at identifying fact-checked claims that match newly encountered claims expressed in social media posts across different languages. The task includes two subtracks: 1) a monolingual track, where social posts and claims are in the same language 2) a crosslingual track, where social posts and claims might be in different languages. A total of 179 participants registered for the task contributing to 52 test submissions. 23 out of 31 teams have submitted their system papers. In this paper, we report the best-performing systems as well as the most common and the most effective approaches across both subtracks. This shared task, along with its dataset and participating systems, provides valuable insights into multilingual claim retrieval and automated fact-checking, supporting future research in this field.</abstract>
       <url hash="445ea2d3">2025.semeval-1.323</url>
@@ -3549,7 +3549,7 @@
       <author><first>Ibrahim Said</first><last>Ahmad</last><affiliation>Northeastern University</affiliation></author>
       <author><first>Nirmal</first><last>Surange</last><affiliation>International Institute of Information Technology Hyderabad</affiliation></author>
       <author><first>Daniela</first><last>Teodorescu</last><affiliation>University of Alberta, LMU Munich</affiliation></author>
-      <author><first>David Ifeoluwa</first><last>Adelani</last><affiliation>McGill University / MILA</affiliation></author>
+      <author id="david-ifeoluwa-adelani"><first>David Ifeoluwa</first><last>Adelani</last><affiliation>McGill University / MILA</affiliation></author>
       <author><first>Alham Fikri</first><last>Aji</last><affiliation>MBZUAI</affiliation></author>
       <author><first>Felermino Dario Mario</first><last>Ali</last><affiliation>Lurio University</affiliation></author>
       <author><first>Vladimir</first><last>Araujo</last><affiliation>KU Leuven</affiliation></author>
@@ -3557,7 +3557,7 @@
       <author><first>Oana</first><last>Ignat</last><affiliation>University of Michigan</affiliation></author>
       <author><first>Alexander</first><last>Panchenko</last><affiliation>Skolkovo Institue of Science and Technology</affiliation></author>
       <author><first>Yi</first><last>Zhou</last><affiliation>Cardiff University</affiliation></author>
-      <author><first>Saif</first><last>Mohammad</last><affiliation>National Research Council Canada</affiliation></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last><affiliation>National Research Council Canada</affiliation></author>
       <pages>2558-2569</pages>
       <abstract>We present our shared task on text-based emotion detection, covering more than 30 languages from seven distinct language families. These languages are predominantly low-resource and spoken across various continents. The data instances are multi-labeled into six emotional classes, with additional datasets in 11 languages annotated for emotion intensity. Participants were asked to predict labels in three tracks: (a) emotion labels in monolingual settings, (b) emotion intensity scores, and (c) emotion labels in cross-lingual settings.</abstract>
       <url hash="1e7f3b4e">2025.semeval-1.327</url>
@@ -3620,7 +3620,7 @@
       <author><first>Elisa</first><last>Sartori</last><affiliation>University of Padova</affiliation></author>
       <author><first>Nicolas</first><last>Stefanovitch</last><affiliation>Joint Research Centre</affiliation></author>
       <author><first>Zhuohan</first><last>Xie</last><affiliation>MBZUAI</affiliation></author>
-      <author><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last><affiliation>Mohamed bin Zayed University of Artificial Intelligence</affiliation></author>
       <author><first>Giovanni</first><last>Da San Martino</last><affiliation>University of Padova</affiliation></author>
       <pages>2610-2643</pages>
       <abstract>We introduce SemEval-2025 Task 10 on Multilingual Characterization and Extraction of Narratives from Online News, which focuses on the identification and analysis of narratives in online news media. The task is structured into three subtasks: (1) Entity Framing, to identify the roles that relevant entities play within narratives, (2) Narrative Classification, to assign documents fine-grained narratives according to a given, topic-specific taxonomy of narrative labels, and (3) Narrative Extraction, to provide a justification for the dominant narrative of the document. To this end, we analyze news articles across two critical domains, Ukraine-Russia War and Climate Change, in five languages: Bulgarian, English, Hindi, Portuguese, and Russian. This task introduces a novel multilingual and multifaceted framework for studying how online news media construct and disseminate manipulative narratives. By addressing these challenges, our work contributes to the broader effort of detecting, understanding, and mitigating the spread of propaganda and disinformation. The task attracted a lot of interest: 310 teams registered, with 66 submitting official results on the test set.</abstract>
diff --git a/data/xml/2025.sicon.xml b/data/xml/2025.sicon.xml
index 611368b24d..8a907b00f8 100644
--- a/data/xml/2025.sicon.xml
+++ b/data/xml/2025.sicon.xml
@@ -74,7 +74,7 @@
       <title><fixed-case>PROTECT</fixed-case>: Policy-Related Organizational Value Taxonomy for Ethical Compliance and Trust</title>
       <author><first>Avni</first><last>Mittal</last><affiliation>Microsoft</affiliation></author>
       <author><first>Sree</first><last>Hari Nagaralu</last><affiliation>Microsoft</affiliation></author>
-      <author><first>Sandipan</first><last>Dandapat</last><affiliation>Microsoft</affiliation></author>
+      <author id="sandipan-dandapat"><first>Sandipan</first><last>Dandapat</last><affiliation>Microsoft</affiliation></author>
       <pages>73-75</pages>
       <abstract>This paper presents PROTECT, a novel policy-driven organizational value taxonomy designed to enhance ethical compliance and trust within organizations. Drawing on established human value systems and leveraging large language models, PROTECT generates values tailored to organizational contexts and clusters them into a refined taxonomy. This taxonomy serves as the basis for creating a comprehensive dataset of compliance scenarios, each linked to specific values and paired with both compliant and non-compliant responses. By systematically varying value emphasis, we illustrate how different LLM personas emerge, reflecting diverse compliance behaviors. The dataset, directly grounded in the taxonomy, enables consistent evaluation and training of LLMs on value-sensitive tasks. While PROTECT offers a robust foundation for aligning AI systems with organizational standards, our experiments also reveal current limitations in model accuracy, highlighting the need for further improvements. Together, the taxonomy and dataset represent complementary, foundational contributions toward value-aligned AI in organizational settings.</abstract>
       <url hash="001437cc">2025.sicon-1.5</url>
@@ -119,7 +119,7 @@
     <paper id="9">
       <title>Steering Conversational Large Language Models for Long Emotional Support Conversations</title>
       <author><first>Navid</first><last>Madani</last><affiliation>State University of New York at Buffalo</affiliation></author>
-      <author><first>Rohini</first><last>Srihari</last><affiliation>State University of New York at Buffalo</affiliation></author>
+      <author id="rohini-k-srihari"><first>Rohini</first><last>Srihari</last><affiliation>State University of New York at Buffalo</affiliation></author>
       <pages>109-123</pages>
       <abstract>In this study, we address the challenge of consistently following emotional support strategies in long conversations by large language models (LLMs). We introduce the Strategy-Relevant Attention (SRA) metric, a model-agnostic measure designed to evaluate the effectiveness of LLMs in adhering to strategic prompts in emotional support contexts. By analyzing conversations within the Emotional Support Conversations dataset (ESConv) using LLaMA models, we demonstrate that SRA is significantly correlated with a model’s ability to sustain the outlined strategy throughout the interactions. Our findings reveal that the application of SRA-informed prompts leads to enhanced strategic adherence, resulting in conversations that more reliably exhibit the desired emotional support strategies over longer conversations. Furthermore, we contribute a comprehensive, multi-branch synthetic conversation dataset for ESConv, featuring a variety of strategy continuations informed by our optimized prompting method. The code and data are publicly available on our Github.</abstract>
       <url hash="de15f3b8">2025.sicon-1.9</url>
diff --git a/data/xml/2025.sigmorphon.xml b/data/xml/2025.sigmorphon.xml
index 8539cb5bf0..9bc83f53ed 100644
--- a/data/xml/2025.sigmorphon.xml
+++ b/data/xml/2025.sigmorphon.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the The 22nd SIGMORPHON workshop on Computational Morphology, Phonology, and Phonetics</booktitle>
       <editor><first>Garrett</first><last>Nicolai</last></editor>
       <editor><first>Eleanor</first><last>Chodroff</last></editor>
-      <editor><first>Frederic</first><last>Mailhot</last></editor>
+      <editor id="frederic-mailhot"><first>Frederic</first><last>Mailhot</last></editor>
       <editor><first>Çağrı</first><last>Çöltekin</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Albuquerque, New Mexico, USA</address>
diff --git a/data/xml/2025.sigtyp.xml b/data/xml/2025.sigtyp.xml
index cc2da8a508..dc88c71abb 100644
--- a/data/xml/2025.sigtyp.xml
+++ b/data/xml/2025.sigtyp.xml
@@ -130,7 +130,7 @@
       <author><first>Haotian</first><last>Ye</last><affiliation>Center for Information and Language Processing</affiliation></author>
       <author><first>Jonathan</first><last>Brennan</last><affiliation>University of Michigan - Ann Arbor</affiliation></author>
       <author><first>Helmut</first><last>Schmid</last><affiliation>Center for Information and Language Processing</affiliation></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <author><first>Nima</first><last>Mesgarani</last><affiliation>Columbia University</affiliation></author>
       <pages>75-81</pages>
       <abstract>In this work, we introduce XCOMPS, a multilingual conceptual minimal pair dataset that covers 17 languages.Using this dataset, we evaluate LLMs’ multilingual conceptual understanding through metalinguistic prompting, direct probability measurement, and neurolinguistic probing. We find that: 1) LLMs exhibit weaker conceptual understanding for low-resource languages, and accuracy varies across languages despite being tested on the same concept sets. 2) LLMs excel at distinguishing concept-property pairs that are visibly different but exhibit a marked performance drop when negative pairs share subtle semantic similarities. 3) More morphologically complex languages yield lower concept understanding scores and require deeper layers for conceptual reasoning.</abstract>
@@ -141,7 +141,7 @@
     <paper id="11">
       <title>Tone in Perspective: A Computational Typological Analysis of Tone Function in <fixed-case>ASR</fixed-case></title>
       <author><first>Siyu</first><last>Liang</last></author>
-      <author><first>Gina-Anne</first><last>Levow</last><affiliation>University of Washington</affiliation></author>
+      <author id="gina-anne-levow"><first>Gina-Anne</first><last>Levow</last><affiliation>University of Washington</affiliation></author>
       <pages>82-92</pages>
       <abstract>This study investigates the impact of pitch flattening on automatic speech recognition (ASR) performance across tonal and non-tonal languages. Using vocoder-based signal processing techniques, we created pitch-flattened versions of speech recordings and compared ASR performance against original recordings. Results reveal that tonal languages experience substantially larger performance degradation than non-tonal languages. Analysis of tone confusion matrices shows systematic patterns of misidentification where contour tones collapse toward level tones when pitch information is removed. Calculation of tone’s functional load at syllable and word levels demonstrates that syllable-level functional load strongly predicts ASR vulnerability to pitch flattening, while word-level patterns reflect each language’s morphological structure. These findings illuminate the differential importance of pitch information across languages and suggest that ASR systems for languages with high syllable-level functional load require more robust pitch modeling.</abstract>
       <url hash="c4c76165">2025.sigtyp-1.11</url>
@@ -165,8 +165,8 @@
       <author><first>Daniela</first><last>Goschala</last></author>
       <author><first>Amir Hossein</first><last>Kargaran</last></author>
       <author><first>Yihong</first><last>Liu</last></author>
-      <author><first>André F. T.</first><last>Martins</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>114-121</pages>
       <abstract>When translating into a low-resource language, a language model can have a tendency to produce translations that are close to the source (e.g., word-by-word translations) due to a lack of rich low-resource training data in pretraining. Thus, the output often is translationese that differs considerably from what native speakers would produce naturally. To remedy this, we synthetically create a training set in which the frequency of a construction unique to the low-resource language is artificially inflated. For the case of Bavarian, we show that, after training, the language model has learned the unique construction and that native speakers judge its output as more natural. Our pilot study suggests that construction-based mitigation of translationese is a promising approach. Code and artifacts are available at <url>https://github.com/cisnlp/BayernGPT</url>.</abstract>
       <url hash="ef5ed912">2025.sigtyp-1.13</url>
diff --git a/data/xml/2025.sumeval.xml b/data/xml/2025.sumeval.xml
index 22327e4cfa..4c83c3066e 100644
--- a/data/xml/2025.sumeval.xml
+++ b/data/xml/2025.sumeval.xml
@@ -30,7 +30,7 @@
       <author><first>Geyu</first><last>Lin</last></author>
       <author><first>Bin</first><last>Wang</last></author>
       <author><first>Zhengyuan</first><last>Liu</last></author>
-      <author><first>Nancy F.</first><last>Chen</last></author>
+      <author id="nancy-chen"><first>Nancy F.</first><last>Chen</last></author>
       <pages>12–23</pages>
       <abstract>Multilingual proficiency presents a significant challenge for large language models (LLMs). English-centric models are usually suboptimal in other languages, particularly those that are linguistically distant from English. This performance discrepancy mainly stems from the imbalanced distribution of training data across languages during pre-training and instruction tuning stages. To address this problem, we propose a novel approach called CrossIn, which utilizes a mixed composition of cross-lingual instruction tuning data. Our method leverages the compressed representation shared by various languages to efficiently enhance the model’s task-solving capabilities and multilingual proficiency within a single process. In addition, we introduce a multi-task and multi-faceted benchmark to evaluate the effectiveness of CrossIn. Experimental results demonstrate that our method substantially improves performance across tasks and languages, and we provide extensive insights into the impact of cross-lingual data volume and the integration of translation data on enhancing multilingual consistency and accuracy.</abstract>
       <url hash="6827375e">2025.sumeval-2.2</url>
diff --git a/data/xml/2025.tacl.xml b/data/xml/2025.tacl.xml
index 9933a72cfd..318a3bb5e5 100644
--- a/data/xml/2025.tacl.xml
+++ b/data/xml/2025.tacl.xml
@@ -43,7 +43,7 @@
       <author><first>Mary</first><last>Williamson</last></author>
       <author><first>Gabriel</first><last>Synnaeve</last></author>
       <author><first>Juan</first><last>Pino</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <author><first>Emmanuel</first><last>Dupoux</last></author>
       <doi>10.1162/tacl_a_00728</doi>
       <abstract>We introduce SpiRit-LM, a foundation multimodal language model that freely mixes text and speech. Our model is based on a 7B pretrained text language model that we extend to the speech modality by continuously training it on text and speech units. Speech and text sequences are concatenated as a single stream of tokens, and trained with a word-level interleaving method using a small automatically curated speech-text parallel corpus. SpiRit-LM comes in two versions: a Base version that uses speech phonetic units (HuBERT) and an Expressive version that models expressivity using pitch and style units in addition to the phonetic units. For both versions, the text is encoded with subword BPE tokens. The resulting model displays both the semantic abilities of text models and the expressive abilities of speech models. Additionally, we demonstrate that SpiRit-LM can learn new tasks in a few-shot fashion across modalities (i.e., ASR, TTS, Speech Classification). We make available model weights and inference code.1,2</abstract>
@@ -54,9 +54,9 @@
     <paper id="3">
       <title><fixed-case>CLAP</fixed-case>nq: Cohesive Long-form Answers from Passages in Natural Questions for <fixed-case>RAG</fixed-case> systems</title>
       <author><first>Sara</first><last>Rosenthal</last></author>
-      <author><first>Avirup</first><last>Sil</last></author>
-      <author><first>Radu</first><last>Florian</last></author>
-      <author><first>Salim</first><last>Roukos</last></author>
+      <author id="avirup-sil"><first>Avirup</first><last>Sil</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
       <doi>10.1162/tacl_a_00729</doi>
       <abstract>Retrieval Augmented Generation (RAG) has become a popular application for large language models. It is preferable that successful RAG systems provide accurate answers that are supported by being grounded in a passage without any hallucinations. While considerable work is required for building a full RAG pipeline, being able to benchmark performance is also necessary. We present CLAPnq, a benchmark Long-form Question Answering dataset for the full RAG pipeline. CLAPnq includes long answers with grounded gold passages from Natural Questions (NQ) and a corpus to perform either retrieval, generation, or the full RAG pipeline. The CLAPnq answers are concise, 3x smaller than the full passage, and cohesive, meaning that the answer is composed fluently, often by integrating multiple pieces of the passage that are not contiguous. RAG models must adapt to these properties to be successful at CLAPnq. We present baseline experiments and analysis for CLAPnq that highlight areas where there is still significant room for improvement in grounded RAG. CLAPnq is publicly available at https://github.com/primeqa/clapnq.</abstract>
       <pages>53–72</pages>
@@ -96,7 +96,7 @@
       <author><first>Vidhisha</first><last>Balachandran</last></author>
       <author><first>Madhur</first><last>Panwar</last></author>
       <author><first>Tianxing</first><last>He</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <author><first>Navin</first><last>Goyal</last></author>
       <author><first>Yulia</first><last>Tsvetkov</last></author>
       <doi>10.1162/tacl_a_00733</doi>
@@ -125,9 +125,9 @@
       <author><first>Shutong</first><last>Feng</last></author>
       <author><first>Hsien-chin</first><last>Lin</last></author>
       <author><first>Nurul</first><last>Lubis</last></author>
-      <author><first>Benjamin</first><last>Ruppik</last></author>
+      <author id="benjamin-matthias-ruppik"><first>Benjamin</first><last>Ruppik</last></author>
       <author><first>Renato</first><last>Vukovic</last></author>
-      <author><first>Milica</first><last>Gašić</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gašić</last></author>
       <doi>10.1162/tacl_a_00734</doi>
       <abstract>Supervised neural approaches are hindered by their dependence on large, meticulously annotated datasets, a requirement that is particularly cumbersome for sequential tasks. The quality of annotations tends to deteriorate with the transition from expert-based to crowd-sourced labeling. To address these challenges, we present CAMEL (Confidence-based Acquisition Model for Efficient self-supervised active Learning), a pool-based active learning framework tailored to sequential multi-output problems. CAMEL possesses two core features: (1) it requires expert annotators to label only a fraction of a chosen sequence, and (2) it facilitates self-supervision for the remainder of the sequence. By deploying a label correction mechanism, CAMEL can also be utilized for data cleaning. We evaluate CAMEL on two sequential tasks, with a special emphasis on dialogue belief tracking, a task plagued by the constraints of limited and noisy datasets. Our experiments demonstrate that CAMEL significantly outperforms the baselines in terms of efficiency. Furthermore, the data corrections suggested by our method contribute to an overall improvement in the quality of the resulting datasets.1</abstract>
       <pages>167–187</pages>
@@ -176,8 +176,8 @@
       <author><first>Abdelrahman</first><last>Sadallah</last></author>
       <author><first>Kirill</first><last>Grishchenkov</last></author>
       <author><first>Alexander</first><last>Panchenko</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Maxim</first><last>Panov</last></author>
       <author><first>Artem</first><last>Shelmanov</last></author>
       <doi>10.1162/tacl_a_00737</doi>
@@ -214,7 +214,7 @@
       <author><first>Sara</first><last>Papi</last></author>
       <author><first>Peter</first><last>Polák</last></author>
       <author><first>Dominik</first><last>Macháček</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <doi>10.1162/tacl_a_00740</doi>
       <abstract>Simultaneous speech-to-text translation (SimulST) translates source-language speech into target-language text concurrently with the speaker’s speech, ensuring low latency for better user comprehension. Despite its intended application to unbounded speech, most research has focused on human pre-segmented speech, simplifying the task and overlooking significant challenges. This narrow focus, coupled with widespread terminological inconsistencies, is limiting the applicability of research outcomes to real-world applications, ultimately hindering progress in the field. Our extensive literature review of 110 papers not only reveals these critical issues in current research but also serves as the foundation for our key contributions. We: 1) define the steps and core components of a SimulST system, proposing a standardized terminology and taxonomy; 2) conduct a thorough analysis of community trends; and 3) offer concrete recommendations and future directions to bridge the gaps in existing literature, from evaluation frameworks to system architectures, for advancing the field towards more realistic and effective SimulST solutions.</abstract>
       <pages>281–313</pages>
@@ -301,7 +301,7 @@
     <paper id="21">
       <title>Phonetic Reconstruction of the Consonant System of Middle <fixed-case>C</fixed-case>hinese via Mixed Integer Optimization</title>
       <author><first>Xiaoxi</first><last>Luo</last></author>
-      <author><first>Weiwei</first><last>Sun</last></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last></author>
       <doi>10.1162/tacl_a_00742</doi>
       <abstract>This paper is concerned with phonetic reconstruction of the consonant system of Middle Chinese. We propose to cast the problem as a Mixed Integer Programming problem, which is able to automatically explore homophonic information from ancient rhyme dictionaries and phonetic information from modern Chinese dialects, the descendants of Middle Chinese. Numerical evaluation on a wide range of synthetic and real data demonstrates the effectiveness and robustness of the new method. We apply the method to information from Guǎngyùn and 20 modern Chinese dialects to obtain a new phonetic reconstruction result. A linguistically motivated discussion of this result is also provided.1</abstract>
       <pages>424–441</pages>
@@ -341,7 +341,7 @@
       <title>Few-Shot Multilingual Open-Domain <fixed-case>QA</fixed-case> from Five Examples</title>
       <author><first>Fan</first><last>Jiang</last></author>
       <author><first>Tom</first><last>Drummond</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <doi>10.1162/tacl_a_00750</doi>
       <abstract>Recent approaches to multilingual open- domain question answering (MLODQA) have achieved promising results given abundant language-specific training data. However, the considerable annotation cost limits the application of these methods for underrepresented languages. We introduce a few-shot learning approach to synthesize large-scale multilingual data from large language models (LLMs). Our method begins with large-scale self-supervised pre-training using WikiData, followed by training on high-quality synthetic multilingual data generated by prompting LLMs with few-shot supervision. The final model, FsModQA, significantly outperforms existing few-shot and supervised baselines in MLODQA and cross-lingual and monolingual retrieval. We further show our method can be extended for effective zero-shot adaptation to new languages through a cross-lingual prompting strategy with only English-supervised data, making it a general and applicable solution for MLODQA tasks without costly large-scale annotation.</abstract>
       <pages>481–504</pages>
@@ -368,7 +368,7 @@
       <author><first>Chenjun</first><last>Xu</last></author>
       <author><first>Yulia</first><last>Tsvetkov</last></author>
       <author><first>Bill</first><last>Howe</last></author>
-      <author><first>Lucy Lu</first><last>Wang</last></author>
+      <author id="lucy-lu-wang"><first>Lucy Lu</first><last>Wang</last></author>
       <doi>10.1162/tacl_a_00754</doi>
       <abstract>Abstention, the refusal of large language models (LLMs) to provide an answer, is increasingly recognized for its potential to mitigate hallucinations and enhance safety in LLM systems. In this survey, we introduce a framework to examine abstention from three perspectives: the query, the model, and human values. We organize the literature on abstention methods, benchmarks, and evaluation metrics using this framework, and discuss merits and limitations of prior work. We further identify and motivate areas for future research, such as whether abstention can be achieved as a meta-capability that transcends specific tasks or domains, and opportunities to optimize abstention abilities in specific contexts. In doing so, we aim to broaden the scope and impact of abstention methodologies in AI systems.1</abstract>
       <pages>529–556</pages>
@@ -390,7 +390,7 @@
     </paper>
     <paper id="28">
       <title>Exploring Practical Gaps in Using Cross Entropy to Implement Maximum Mutual Information Criterion for Rationalization</title>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last></author>
+      <author><first>Wei</first><last>Liu</last></author>
       <author><first>Zhiying</first><last>Deng</last></author>
       <author><first>Zhongyu</first><last>Niu</last></author>
       <author><first>Jun</first><last>Wang</last></author>
@@ -456,7 +456,7 @@
       <author><first>Erick</first><last>Galinkin</last></author>
       <author><first>Yiyi</first><last>Chen</last></author>
       <author><first>Jens Myrup</first><last>Pedersen</last></author>
-      <author><first>Leon</first><last>Derczynski</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
       <author><first>Johannes</first><last>Bjerva</last></author>
       <doi>10.1162/tacl_a_00762</doi>
       <abstract>As NLP models are used by a growing number of end-users, an area of increasing importance is NLP Security (NLPSec): assessing the vulnerability of models to malicious attacks and developing comprehensive countermeasures against them. While work at the intersection of NLP and cybersecurity has the potential to create safer NLP for all, accidental oversights can result in tangible harm (e.g., breaches of privacy or proliferation of malicious models). In this emerging field, however, the research ethics of NLP have not yet faced many of the long-standing conundrums pertinent to cybersecurity, until now. We thus examine contemporary works across NLPSec, and explore their engagement with cybersecurity’s ethical norms. We identify trends across the literature, ultimately finding alarming gaps on topics like harm minimization and responsible disclosure. To alleviate these concerns, we provide concrete recommendations to help NLP researchers navigate this space more ethically, bridging the gap between traditional cybersecurity and NLP ethics, which we frame as “white hat NLP”. The goal of this work is to help cultivate an intentional culture of ethical research for those working in NLP Security.</abstract>
diff --git a/data/xml/2025.tlt.xml b/data/xml/2025.tlt.xml
index 2f83d7fd39..29d2b0aacf 100644
--- a/data/xml/2025.tlt.xml
+++ b/data/xml/2025.tlt.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the 23rd International Workshop on Treebanks and Linguistic Theories (TLT, SyntaxFest 2025)</booktitle>
       <editor><first>Sarah</first><last>Jablotschkin</last></editor>
-      <editor><first>Sandra</first><last>Kübler</last></editor>
+      <editor id="sandra-kubler"><first>Sandra</first><last>Kübler</last></editor>
       <editor><first>Heike</first><last>Zinsmeister</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Ljubljana, Slovenia</address>
@@ -43,7 +43,7 @@
     </paper>
     <paper id="3">
       <title>Expanding the <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies <fixed-case>A</fixed-case>ncient <fixed-case>H</fixed-case>ebrew Treebank with Constituency Data</title>
-      <author><first>Daniel G.</first><last>Swanson</last><affiliation>Indiana University</affiliation></author>
+      <author id="daniel-g-swanson"><first>Daniel G.</first><last>Swanson</last><affiliation>Indiana University</affiliation></author>
       <pages>23-31</pages>
       <abstract>This paper presents an effort to expand the annotation pipeline for the Ancient Hebrew Universal Dependencies treebank to make use of additional data, resulting in the addition of over 4000 sentences and roughly 100K words to the released treebank. The resulting treebank contains 5500 sentences and 145K words and the incorporation of converted constituency data has resulted in an annotation process which requires manual intervention in only around 15-20% of sentences, even in previously unseen genres.</abstract>
       <url hash="3884021e">2025.tlt-1.3</url>
@@ -52,7 +52,7 @@
     <paper id="4">
       <title>Graph Databases for Fast Queries in <fixed-case>UD</fixed-case> Treebanks</title>
       <author><first>Niklas</first><last>Deworetzki</last><affiliation>Chalmers University of Technology and University of Gothenburg</affiliation></author>
-      <author><first>Peter</first><last>Ljunglöf</last><affiliation>Chalmers University of Technology</affiliation></author>
+      <author id="peter-ljunglof"><first>Peter</first><last>Ljunglöf</last><affiliation>Chalmers University of Technology</affiliation></author>
       <pages>32-43</pages>
       <abstract>We investigate if labeled property graphs, and graph databases, can be an useful and efficient way of encoding UD treebanks, to facilitate searching for complex syntactic phenomena. We give two alternative encodings of UD treebanks into the off-the-shelf graph database Neo4j, and show how to translate syntactic queries into the graph query language Cypher. Our evaluation shows that graph databases can improve query times by several orders of magnitude, compared to existing approaches.</abstract>
       <url hash="256413ca">2025.tlt-1.4</url>
@@ -134,7 +134,7 @@
       <author><first>Emmett</first><last>Strickland</last><affiliation>Institut National des Langues et Civilisations Orientales</affiliation></author>
       <author><first>Bruno</first><last>Guillaume</last><affiliation>INRIA</affiliation></author>
       <author><first>Sylvain</first><last>Kahane</last><affiliation>Université Paris Nanterre</affiliation></author>
-      <author><first>Anne</first><last>Lacheret-Dujour</last><affiliation>Université Paris Nanterre</affiliation></author>
+      <author id="anne-lacheret"><first>Anne</first><last>Lacheret-Dujour</last><affiliation>Université Paris Nanterre</affiliation></author>
       <pages>111-118</pages>
       <abstract>This paper presents a new format of the Rhapsodie Treebank, which contains both syntactic and prosodic annotations, offering a comprehensive dataset for the study of spoken French.This integrated format allow us for complex multilevel queries and open the way for the extraction of intonosyntactic studies.</abstract>
       <url hash="62847388">2025.tlt-1.13</url>
@@ -162,7 +162,7 @@
     <paper id="16">
       <title><fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependency Treebank for a low-resource Dardic Language: <fixed-case>T</fixed-case>orwali</title>
       <author><first>Naeem</first><last>Uddin</last><affiliation>Charles University Prague</affiliation></author>
-      <author><first>Daniel</first><last>Zeman</last><affiliation>Faculty of Mathematics and Physics, Charles University Prague</affiliation></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last><affiliation>Faculty of Mathematics and Physics, Charles University Prague</affiliation></author>
       <pages>140-147</pages>
       <abstract>This paper presents and discusses the linguistic phenomena encountered in the development of the ongoing first ever universal dependency treebank for Torwali the Language. Torwali belongs to the Kohistani sub-group of Dardic Indo-Aryan languages, and is considered an endangered (Moseley, 2010) and indigenous language, which makes it extremely low-resourced in terms of linguistic and computational resources. With the aim of including Torwali in Universal Dependencies (UD) (de Marneffe et al. 2021), we are annotating a diverse set of example sentences for POS tags, features and dependency relations.</abstract>
       <url hash="81a24126">2025.tlt-1.16</url>
diff --git a/data/xml/2025.trl.xml b/data/xml/2025.trl.xml
index 4aca79ca31..824c3b8e92 100644
--- a/data/xml/2025.trl.xml
+++ b/data/xml/2025.trl.xml
@@ -67,7 +67,7 @@
       <author><first>Naihao</first><last>Deng</last></author>
       <author><first>Cunxiang</first><last>Wang</last></author>
       <author><first>Guojiang</first><last>Zhao</last></author>
-      <author><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last><affiliation>University of Michigan</affiliation></author>
       <author><first>Yue</first><last>Zhang</last><affiliation>Westlake University</affiliation></author>
       <pages>34-46</pages>
       <abstract>Large Language Models (LLMs) have demon- strated exceptional performance across diverse tasks. To harness their capabilities for Text- to-SQL, we introduce R3 (Review-Rebuttal- Revision), a consensus-based multi-agent sys- tem for Text-to-SQL tasks. R3 achieves the new state-of-the-art performance of 89.9 on the Spider test set. In the meantime, R3 achieves 61.80 on the Bird development set. R3 out- performs existing single-LLM and multi-agent Text-to-SQL systems by 1.3% to 8.1% on Spi- der and Bird, respectively. Surprisingly, we find that for Llama-3-8B, R3 outperforms chain-of- thought prompting by over 20%, even outper- forming GPT-3.5 on the Spider development set. We open-source our codebase at https: //github.com/1ring2rta/R3.</abstract>
@@ -84,7 +84,7 @@
       <author><first>Thanh</first><last>Vu</last><affiliation>Oracle</affiliation></author>
       <author><first>Don</first><last>Dharmasiri</last><affiliation>Oracle</affiliation></author>
       <author><first>Yuan-Fang</first><last>Li</last><affiliation>Monash University and Oracle</affiliation></author>
-      <author><first>Long</first><last>Duong</last><affiliation>Oracle</affiliation></author>
+      <author id="long-duong"><first>Long</first><last>Duong</last><affiliation>Oracle</affiliation></author>
       <pages>47-55</pages>
       <abstract>Open-weight large language models (LLMs) have significantly advanced performance in the Natural Language to SQL (NL2SQL) task. However, their effectiveness diminishes when dealing with large database schemas, as the context length increases. To address this limitation, we present SQLong, a novel and efficient data augmentation framework designed to enhance LLM performance in long-context scenarios for the NL2SQL task. SQLong generates augmented datasets by extending existing database schemas with additional synthetic CREATE TABLE commands and corresponding data rows, sampled from diverse schemas in the training data. This approach effectively simulates long-context scenarios during finetuning and evaluation. Through experiments on the Spider and BIRD datasets, we demonstrate that LLMs finetuned with SQLong-augmented data significantly outperform those trained on standard datasets. These imply SQLong’s practical implementation and its impact on improving NL2SQL capabilities in real-world settings with complex database schemas.</abstract>
       <url hash="e21d57a9">2025.trl-1.5</url>
@@ -93,7 +93,7 @@
     </paper>
     <paper id="6">
       <title>i<fixed-case>TBLS</fixed-case>: A Dataset of Interactive Conversations Over Tabular Information</title>
-      <author><first>Anirudh</first><last>Sundar</last></author>
+      <author id="anirudh-sundar"><first>Anirudh</first><last>Sundar</last></author>
       <author><first>Christopher Gordon</first><last>Richardson</last><affiliation>Georgia Institute of Technology</affiliation></author>
       <author><first>Larry</first><last>Heck</last><affiliation>Georgia Institute of Technology</affiliation></author>
       <author><first>Adar</first><last>Avsian</last></author>
@@ -130,7 +130,7 @@
     <paper id="9">
       <title>Ask Me Like <fixed-case>I</fixed-case>’m Human: <fixed-case>LLM</fixed-case>-based Evaluation with For-Human Instructions Correlates Better with Human Evaluations than Human Judges</title>
       <author><first>Rudali</first><last>Huidrom</last></author>
-      <author><first>Anya</first><last>Belz</last><affiliation>Dublin City University</affiliation></author>
+      <author id="anja-belz"><first>Anya</first><last>Belz</last><affiliation>Dublin City University</affiliation></author>
       <pages>98-108</pages>
       <abstract>Human evaluation in NLP has high cost and expertise requirements, and instruction-tuned LLMs are increasingly seen as a viable alternative. Reported correlations with human judgements vary across evaluation contexts and prompt types, and it is hard currently to predict if an LLM-as-judge metric will work equally well for new evaluation contexts and prompts, unless human evaluations are also carried out for comparison. Addressing two main factors contributing to this uncertainty, model suitability and prompt engineering, in the work reported in this focused contribution, we test four LLMs and different ways of combining them, in conjunction with a standard approach to prompt formulation, namely using written-for-human instructions verbatim. We meta-evaluate performance against human evaluations on two data-to-text tasks, and eight evaluation measures, also comparing against more conventional LLM prompt formulations. We find that the best LLM (combination)s are excellent predictors of mean human judgements, and are particularly good at content-related evaluation (in contrast to form-related criteria such as Fluency). Moreover, the best LLMs correlate far more strongly with human evaluations than individual human judges across all scenarios.</abstract>
       <url hash="ec9effd6">2025.trl-1.9</url>
@@ -144,7 +144,7 @@
       <author><first>Nils</first><last>Feldhus</last></author>
       <author><first>Raia</first><last>Abu Ahmad</last><affiliation>German Research Center for AI</affiliation></author>
       <author><first>Malte</first><last>Ostendorff</last><affiliation>Deutsche Telekom</affiliation></author>
-      <author><first>Pedro</first><last>Ortiz Suarez</last><affiliation>Common Crawl Foundation</affiliation></author>
+      <author id="pedro-ortiz-suarez"><first>Pedro</first><last>Ortiz Suarez</last><affiliation>Common Crawl Foundation</affiliation></author>
       <author><first>Georg</first><last>Rehm</last><affiliation>Humboldt-Universität zu Berlin and Deutsches Forschungszentrum für Künstliche Intelligenz</affiliation></author>
       <author><first>Sebastian</first><last>Möller</last></author>
       <pages>109-142</pages>
diff --git a/data/xml/2025.trustnlp.xml b/data/xml/2025.trustnlp.xml
index 1407270952..b273f7e7d8 100644
--- a/data/xml/2025.trustnlp.xml
+++ b/data/xml/2025.trustnlp.xml
@@ -83,14 +83,14 @@
     </paper>
     <paper id="4">
       <title>Ambiguity Detection and Uncertainty Calibration for Question Answering with Large Language Models</title>
-      <author><first>Zhengyan</first><last>Shi</last></author>
+      <author id="zhengyan-shi"><first>Zhengyan</first><last>Shi</last></author>
       <author><first>Giuseppe</first><last>Castellucci</last><affiliation>Amazon</affiliation></author>
       <author><first>Simone</first><last>Filice</last><affiliation>Technology Innovation Institute</affiliation></author>
       <author><first>Saar</first><last>Kuzi</last><affiliation>Amazon</affiliation></author>
       <author><first>Elad</first><last>Kravi</last></author>
       <author><first>Eugene</first><last>Agichtein</last><affiliation>Emory University</affiliation></author>
       <author><first>Oleg</first><last>Rokhlenko</last></author>
-      <author><first>Shervin</first><last>Malmasi</last><affiliation>Amazon</affiliation></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last><affiliation>Amazon</affiliation></author>
       <pages>41-55</pages>
       <abstract>Large Language Models (LLMs) have demonstrated excellent capabilities in Question Answering (QA) tasks, yet their ability to identify and address ambiguous questions remains underdeveloped. Ambiguities in user queries often lead to inaccurate or misleading answers, undermining user trust in these systems. Despite prior attempts using prompt-based methods, performance has largely been equivalent to random guessing, leaving a significant gap in effective ambiguity detection. To address this, we propose a novel framework for detecting ambiguous questions within LLM-based QA systems. We first prompt an LLM to generate multiple answers to a question, and then analyze them to infer the ambiguity. We propose to use a lightweight Random Forest model, trained on a bootstrapped and shuffled 6-shot examples dataset. Experimental results on ASQA, PACIFIC, and ABG-COQA datasets demonstrate the effectiveness of our approach, with accuracy up to 70.8%. Furthermore, our framework enhances the confidence calibration of LLM outputs, leading to more trustworthy QA systems able to handle complex questions.</abstract>
       <url hash="07ccc700">2025.trustnlp-main.4</url>
@@ -103,7 +103,7 @@
       <author><first>Zhiyu</first><last>Xue</last><affiliation>University of California, Santa Barbara</affiliation></author>
       <author><first>Xitong</first><last>Zhang</last><affiliation>Qualcomm Inc, QualComm</affiliation></author>
       <author><first>Rongrong</first><last>Wang</last><affiliation>Michigan State University</affiliation></author>
-      <author><first>Kristen</first><last>Johnson</last><affiliation>Michigan State University</affiliation></author>
+      <author id="kristen-johnson"><first>Kristen</first><last>Johnson</last><affiliation>Michigan State University</affiliation></author>
       <pages>56-65</pages>
       <abstract>Self-correction is one of the most amazing emerging capabilities of Large Language Models (LLMs), enabling LLMs to self-modify an inappropriate output given a natural language feedback which describes the problems of that output. Moral self-correction is a post-hoc approach correcting unethical generations without requiring a gradient update, making it both computationally lightweight and capable of preserving the language modeling ability. Previous works have shown that LLMs can self-debias, and it has been reported that small models, i.e., those with less than 22B parameters, are <i>not</i> capable of moral self-correction.However, there is no direct proof as to why such smaller models fall short of moral self-correction, though previous research hypothesizes that larger models are skilled in following instructions and understanding abstract social norms.In this paper, we empirically validate this hypothesis in the context of social stereotyping, through meticulous prompting.Our experimental results indicate that <b>(i)</b> surprisingly, 3.8B LLMs with proper safety alignment fine-tuning can achieve very good moral self-correction performance, highlighting the significant effects of safety alignment; and <b>(ii)</b> small LLMs are indeed weaker than larger-scale models in terms of comprehending social norms and self-explanation through CoT, but all scales of LLMs show bad self-correction performance given unethical instructions.</abstract>
       <url hash="c1e38d24">2025.trustnlp-main.5</url>
@@ -260,7 +260,7 @@
       <author><first>Yi</first><last>Yang</last><affiliation>Hong Kong University of Science and Technology</affiliation></author>
       <author><first>Hanyu</first><last>Duan</last></author>
       <author><first>Ahmed</first><last>Abbasi</last><affiliation>University of Notre Dame</affiliation></author>
-      <author><first>John P.</first><last>Lalor</last><affiliation>University of Notre Dame</affiliation></author>
+      <author id="john-p-lalor"><first>John P.</first><last>Lalor</last><affiliation>University of Notre Dame</affiliation></author>
       <author><first>Kar Yan</first><last>Tam</last></author>
       <pages>276-290</pages>
       <abstract>Transformer-based pretrained large language models (PLM) such as BERT and GPT have achieved remarkable success in NLP tasks. However, PLMs are prone to encoding stereotypical biases. Although a burgeoning literature has emerged on stereotypical bias mitigation in PLMs, such as work on debiasing gender and racial stereotyping, how such biases manifest and behave internally within PLMs remains largely unknown. Understanding the internal stereotyping mechanisms may allow better assessment of model fairness and guide the development of effective mitigation strategies. In this work, we focus on attention heads, a major component of the Transformer architecture, and propose a bias analysis framework to explore and identify a small set of biased heads that are found to contribute to a PLM’s stereotypical bias. We conduct extensive experiments to validate the existence of these biased heads and to better understand how they behave. We investigate gender and racial bias in the English language in two types of Transformer-based PLMs: the encoder-based BERT model and the decoder-based autoregressive GPT model, LLaMA-2 (7B), and LLaMA-2-Chat (7B). Overall, the results shed light on understanding the bias behavior in pretrained language models.</abstract>
diff --git a/data/xml/2025.udw.xml b/data/xml/2025.udw.xml
index cbb44408c0..8328a0bf0b 100644
--- a/data/xml/2025.udw.xml
+++ b/data/xml/2025.udw.xml
@@ -31,7 +31,7 @@
     <paper id="2">
       <title>Annotation of Relative Forms in the <fixed-case>E</fixed-case>gyptian-<fixed-case>UJ</fixed-case>aen Treebank</title>
       <author><first>Roberto Antonio</first><last>Díaz Hernández</last></author>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <pages>11-21</pages>
       <abstract>Relative forms are adjective finite verb forms that can be used in an attributive or a nominal function. They pose a challenge when annotating them according to the Universal Dependencies approach, for they have morphological features of both verbs and adjectives, yet they can also be used syntactically as nouns. The aim of this paper is to discuss the morphosyntactic methodology applied to their annotation in the Egyptian-UJaen treebank.</abstract>
       <url hash="c04146c3">2025.udw-1.2</url>
@@ -57,7 +57,7 @@
     <paper id="5">
       <title>Crossing Dialectal Boundaries: Building a Treebank for the Dialect of Lesbos through Knowledge Transfer from Standard <fixed-case>M</fixed-case>odern <fixed-case>G</fixed-case>reek</title>
       <author><first>Stavros</first><last>Bompolas</last><affiliation>ARCHIMEDES Unit | Athena Research Center</affiliation></author>
-      <author><first>Stella</first><last>Markantonatou</last><affiliation>ATHENA RIC</affiliation></author>
+      <author id="stella-markantonatou"><first>Stella</first><last>Markantonatou</last><affiliation>ATHENA RIC</affiliation></author>
       <author><first>Angela</first><last>Ralli</last><affiliation>Athena Research Center</affiliation></author>
       <author><first>Antonios</first><last>Anastasopoulos</last><affiliation>Athena Research Center</affiliation></author>
       <pages>39-51</pages>
@@ -110,7 +110,7 @@
     </paper>
     <paper id="10">
       <title>Building <fixed-case>UD</fixed-case> Cairo for <fixed-case>O</fixed-case>ld <fixed-case>E</fixed-case>nglish in the Classroom</title>
-      <author><first>Lauren</first><last>Levine</last><affiliation>Georgetown University</affiliation></author>
+      <author id="lauren-levine"><first>Lauren</first><last>Levine</last><affiliation>Georgetown University</affiliation></author>
       <author><first>Junghyun</first><last>Min</last><affiliation>Georgetown University</affiliation></author>
       <author><first>Amir</first><last>Zeldes</last><affiliation>Georgetown University</affiliation></author>
       <pages>97-104</pages>
@@ -127,7 +127,7 @@
       <author><first>Zubair</first><last>Sanjrani</last><affiliation>Isra University</affiliation></author>
       <author><first>Sarwat</first><last>Qureshi</last><affiliation>U. Manchester</affiliation></author>
       <author><first>Shafi</first><last>Pirzada</last><affiliation>MLtwist</affiliation></author>
-      <author><first>Christopher D.</first><last>Manning</last><affiliation>Stanford University</affiliation></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last><affiliation>Stanford University</affiliation></author>
       <author><first>Mutee U</first><last>Rahman</last><affiliation>Isra University</affiliation></author>
       <pages>105-118</pages>
       <abstract>Sindhi is an Indo-Aryan language spoken primarily in Pakistan and India by about 40 million people. Despite this extensive use, it is a low resource language for NLP tasks, with few datasets or pretrained embeddings available. In this work, we explore linguistic challenges for annotating Sindhi in the UD paradigm, such as language-specific analysis of adpositions and verb forms. We use this analysis to present a newly annotated dependency treebank for Universal Dependencies, along with pretrained embeddings and an annotation pipeline specifically for Sindhi annotation.</abstract>
@@ -167,9 +167,9 @@
     </paper>
     <paper id="16">
       <title>Extending the Enhanced <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies – addressing subjects in pro-drop languages</title>
-      <author><first>Magali</first><last>Sanches Duran</last><affiliation>Universidade de São Paulo</affiliation></author>
+      <author id="magali-sanches-duran"><first>Magali</first><last>Sanches Duran</last><affiliation>Universidade de São Paulo</affiliation></author>
       <author><first>Elvis A.</first><last>de Souza</last><affiliation>Universidade de São Paulo</affiliation></author>
-      <author><first>Maria</first><last>das Graças Volpe Nunes</last><affiliation>Universidade de São Paulo</affiliation></author>
+      <author id="maria-das-gracas-volpe-nunes"><first>Maria</first><last>das Graças Volpe Nunes</last><affiliation>Universidade de São Paulo</affiliation></author>
       <author><first>Adriana Silvina</first><last>Pagano</last><affiliation>Universidade Federal de Minas Gerais, Universidade Federal de Minas Gerais</affiliation></author>
       <author><first>Thiago A. S.</first><last>Pardo</last><affiliation>Universidade de São Paulo</affiliation></author>
       <pages>143-152</pages>
@@ -199,7 +199,7 @@
     </paper>
     <paper id="19">
       <title>Quid verbumst? Applying a definition of word to <fixed-case>L</fixed-case>atin in <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies</title>
-      <author><first>Flavio Massimiliano</first><last>Cecchini</last><affiliation>Katholieke Universiteit Leuven</affiliation></author>
+      <author id="flavio-massimiliano-cecchini"><first>Flavio Massimiliano</first><last>Cecchini</last><affiliation>Katholieke Universiteit Leuven</affiliation></author>
       <pages>174-185</pages>
       <abstract>Words, more specifically “syntactic words”, are at the centre of a dependency-based approach like Universal Dependencies. Nonetheless, its guidelines do not make explicit how such a word should be defined and identified, and so it happens that different treebanks use different standards to this end. To counter this vagueness, the community has been recently discussing a definition put forward in (Haspelmath, 2023) which is not fully uncontroversial. This contribution is a preliminary case study that tries its hand at concretely applying this definition (except for compounds) to Latin in order to gain more insights about its operability and groundedness. This is helped by the spread of Latin over many treebanks, the presence of good linguistic resources to analyse it, and a linguistic type which is probably not fully considered in (Haspelmath, 2023). On the side, this work shows once more the difficulties of turning theoretical definitions into working directives in the realm of linguistic annotation.</abstract>
       <url hash="f84806bf">2025.udw-1.19</url>
diff --git a/data/xml/2025.vardial.xml b/data/xml/2025.vardial.xml
index e2f208fbc4..0bd9e41a5a 100644
--- a/data/xml/2025.vardial.xml
+++ b/data/xml/2025.vardial.xml
@@ -6,8 +6,8 @@
       <editor><first>Yves</first><last>Scherrer</last></editor>
       <editor><first>Tommi</first><last>Jauhiainen</last></editor>
       <editor><first>Nikola</first><last>Ljubešić</last></editor>
-      <editor><first>Preslav</first><last>Nakov</last></editor>
-      <editor><first>Jorg</first><last>Tiedemann</last></editor>
+      <editor id="preslav-nakov"><first>Preslav</first><last>Nakov</last></editor>
+      <editor id="jorg-tiedemann"><first>Jorg</first><last>Tiedemann</last></editor>
       <editor><first>Marcos</first><last>Zampieri</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Abu Dhabi, UAE</address>
@@ -90,7 +90,7 @@
     <paper id="8">
       <title>Retrieval of Parallelizable Texts Across <fixed-case>C</fixed-case>hurch <fixed-case>S</fixed-case>lavic Variants</title>
       <author><first>Piroska</first><last>Lendvai</last></author>
-      <author><first>Uwe</first><last>Reichel</last></author>
+      <author id="uwe-reichel"><first>Uwe</first><last>Reichel</last></author>
       <author><first>Anna</first><last>Jouravel</last></author>
       <author><first>Achim</first><last>Rabus</last></author>
       <author><first>Elena</first><last>Renje</last></author>
@@ -104,7 +104,7 @@
       <author><first>Anne-Marie</first><last>Lutgen</last></author>
       <author><first>Alistair</first><last>Plum</last></author>
       <author><first>Christoph</first><last>Purschke</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>115–127</pages>
       <abstract>Orthographic variation is very common in Luxembourgish texts due to the absence of a fully-fledged standard variety. Additionally, developing NLP tools for Luxembourgish is a difficult task given the lack of annotated and parallel data, which is exacerbated by ongoing standardization. In this paper, we propose the first sequence-to-sequence normalization models using the ByT5 and mT5 architectures with training data obtained from word-level real-life variation data. We perform a fine-grained, linguistically-motivated evaluation to test byte-based, word-based and pipeline-based models for their strengths and weaknesses in text normalization. We show that our sequence model using real-life variation data is an effective approach for tailor-made normalization in Luxembourgish.</abstract>
       <url hash="c87937f0">2025.vardial-1.9</url>
@@ -114,7 +114,7 @@
       <title>Improving Dialectal Slot and Intent Detection with Auxiliary Tasks: A Multi-Dialectal <fixed-case>B</fixed-case>avarian Case Study</title>
       <author><first>Xaver Maria</first><last>Krückl</last></author>
       <author><first>Verena</first><last>Blaschke</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>128–146</pages>
       <abstract>Reliable slot and intent detection (SID) is crucial in natural language understanding for applications like digital assistants. Encoder-only transformer models fine-tuned on high-resource languages generally perform well on SID. However, they struggle with dialectal data, where no standardized form exists and training data is scarce and costly to produce. We explore zero-shot transfer learning for SID, focusing on multiple Bavarian dialects, for which we release a new dataset for the Munich dialect. We evaluate models trained on auxiliary tasks in Bavarian, and compare joint multi-task learning with intermediate-task training. We also compare three types of auxiliary tasks: token-level syntactic tasks, named entity recognition (NER), and language modelling. We find that the included auxiliary tasks have a more positive effect on slot filling than intent classification (with NER having the most positive effect), and that intermediate-task training yields more consistent performance gains. Our best-performing approach improves intent classification performance on Bavarian dialects by 5.1 and slot filling F1 by 8.4 percentage points.</abstract>
       <url hash="7c592675">2025.vardial-1.10</url>
@@ -135,7 +135,7 @@
       <author><first>Salam</first><last>Khalifa</last></author>
       <author><first>Abdelrahim</first><last>Qaddoumi</last></author>
       <author><first>Jordan</first><last>Kodner</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>157–167</pages>
       <abstract>We investigate learning surface forms from underlying morphological forms for low-resource language varieties. We concentrate on learning explicit rules with the aid of learned syllable structure constraints, which outperforms neural methods on this small data task and provides interpretable output. Evaluating across one relatively high-resource and two related low-resource Arabic dialects, we find that a model trained only on the high-resource dialect achieves decent performance on the low-resource dialects, useful when no low-resource training data is available. The best results are obtained when our system is trained only on the low-resource dialect data without augmentation from the related higher-resource dialect. We discuss the impact of syllable structure constraints and the strengths and weaknesses of data augmentation and transfer learning from a related dialect.</abstract>
       <url hash="94e6b101">2025.vardial-1.12</url>
@@ -145,7 +145,7 @@
       <title>Common Ground, Diverse Roots: The Difficulty of Classifying Common Examples in <fixed-case>S</fixed-case>panish Varieties</title>
       <author><first>Javier A.</first><last>Lopetegui</last></author>
       <author><first>Arij</first><last>Riabi</last></author>
-      <author><first>Djamé</first><last>Seddah</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
       <pages>168–181</pages>
       <abstract>Variations in languages across geographic regions or cultures are crucial to address to avoid biases in NLP systems designed for culturally sensitive tasks, such as hate speech detection or dialog with conversational agents. In languages such as Spanish, where varieties can significantly overlap, many examples can be valid across them, which we refer to as common examples. Ignoring these examples may cause misclassifications, reducing model accuracy and fairness. Therefore, accounting for these common examples is essential to improve the robustness and representativeness of NLP systems trained on such data. In this work, we address this problem in the context of Spanish varieties. We use training dynamics to automatically detect common examples or errors in existing Spanish datasets. We demonstrate the efficacy of using predicted label confidence for our Datamaps (CITATION) implementation for the identification of hard-to-classify examples, especially common examples, enhancing model performance in variety identification tasks. Additionally, we introduce a Cuban Spanish Variety Identification dataset with common examples annotations developed to facilitate more accurate detection of Cuban and Caribbean Spanish varieties. To our knowledge, this is the first dataset focused on identifying the Cuban, or any other Caribbean, Spanish variety.</abstract>
       <url hash="80409651">2025.vardial-1.13</url>
@@ -154,8 +154,8 @@
     <paper id="14">
       <title>Add Noise, Tasks, or Layers? <fixed-case>M</fixed-case>ai<fixed-case>NLP</fixed-case> at the <fixed-case>V</fixed-case>ar<fixed-case>D</fixed-case>ial 2025 Shared Task on <fixed-case>N</fixed-case>orwegian Dialectal Slot and Intent Detection</title>
       <author><first>Verena</first><last>Blaschke</last></author>
-      <author><first>Felicia</first><last>Körner</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="felicia-koerner"><first>Felicia</first><last>Körner</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>182–199</pages>
       <abstract>Slot and intent detection (SID) is a classic natural language understanding task. Despite this, research has only more recently begun focusing on SID for dialectal and colloquial varieties. Many approaches for low-resource scenarios have not yet been applied to dialectal SID data, or compared to each other on the same datasets. We participate in the VarDial 2025 shared task on slot and intent detection in Norwegian varieties, and compare multiple set-ups: varying the training data (English, Norwegian, or dialectal Norwegian), injecting character-level noise, training on auxiliary tasks, and applying Layer Swapping, a technique in which layers of models fine-tuned on different datasets are assembled into a model. We find noise injection to be beneficial while the effects of auxiliary tasks are mixed. Though some experimentation was required to successfully assemble a model from layers, it worked surprisingly well; a combination of models trained on English and small amounts of dialectal data produced the most robust slot predictions. Our best models achieve 97.6% intent accuracy and 85.6% slot F1 in the shared task.</abstract>
       <url hash="7e206698">2025.vardial-1.14</url>
diff --git a/data/xml/2025.wacl.xml b/data/xml/2025.wacl.xml
index c99a1bc8f6..cb230735c8 100644
--- a/data/xml/2025.wacl.xml
+++ b/data/xml/2025.wacl.xml
@@ -13,7 +13,7 @@
       <editor><first>Amal</first><last>Haddad Haddad</last></editor>
       <editor><first>Mustafa</first><last>Jarrar</last></editor>
       <editor><first>Mo</first><last>El-Haj</last></editor>
-      <editor><first>Ruslan</first><last>Mitkov</last></editor>
+      <editor id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></editor>
       <editor><first>Paul</first><last>Rayson</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Abu Dhabi, UAE</address>
@@ -87,7 +87,7 @@
       <title><fixed-case>D</fixed-case>ial2<fixed-case>MSA</fixed-case>-Verified: A Multi-Dialect <fixed-case>A</fixed-case>rabic Social Media Dataset for Neural Machine Translation to <fixed-case>M</fixed-case>odern <fixed-case>S</fixed-case>tandard <fixed-case>A</fixed-case>rabic</title>
       <author><first>Abdullah</first><last>Khered</last></author>
       <author><first>Youcef</first><last>Benkhedda</last></author>
-      <author><first>Riza</first><last>Batista-Navarro</last></author>
+      <author id="riza-theresa-batista-navarro"><first>Riza</first><last>Batista-Navarro</last></author>
       <pages>50–62</pages>
       <abstract>Social media has become an essential focus for Natural Language Processing (NLP) research due to its widespread use and unique linguistic characteristics. Normalising social media content, especially for morphologically rich languages like Arabic, remains a complex task due to limited parallel corpora. Arabic encompasses Modern Standard Arabic (MSA) and various regional dialects, collectively termed Dialectal Arabic (DA), which complicates NLP efforts due to their informal nature and variability. This paper presents Dial2MSA-Verified, an extension of the Dial2MSA dataset that includes verified translations for Gulf, Egyptian, Levantine, and Maghrebi dialects. We evaluate the performance of Seq2Seq models on this dataset, highlighting the effectiveness of state-of-the-art models in translating local Arabic dialects. We also provide insights through error analysis and outline future directions for enhancing Seq2Seq models and dataset development. The Dial2MSA-Verified dataset is publicly available to support further research.</abstract>
       <url hash="1664ba8d">2025.wacl-1.6</url>
@@ -115,7 +115,7 @@
       <author><first>Azzedine</first><last>Aftiss</last></author>
       <author><first>Salima</first><last>Lamsiyah</last></author>
       <author><first>Christoph</first><last>Schommer</last></author>
-      <author><first>Said Ouatik</first><last>El Alaoui</last></author>
+      <author id="said-ouatik-el-alaoui"><first>Said Ouatik</first><last>El Alaoui</last></author>
       <pages>77–85</pages>
       <abstract>Moroccan Dialect (MD), or “Darija,” is a primary spoken variant of Arabic in Morocco, yet remains underrepresented in Natural Language Processing (NLP) research, particularly in tasks like summarization. Despite a growing volume of MD textual data online, there is a lack of robust resources and NLP models tailored to handle the unique linguistic challenges posed by MD. In response, we introduce .MA_v2, an expanded version of the GOUD.MA dataset, containing over 50k articles with their titles across 11 categories. This dataset provides a more comprehensive resource for developing summarization models. We evaluate the application of large language models (LLMs) for MD summarization, utilizing both fine-tuning and zero-shot prompting with encoder-decoder and causal LLMs, respectively. Our findings demonstrate that an expanded dataset improves summarization performance and highlights the capabilities of recent LLMs in handling MD text. We open-source our dataset, fine-tuned models, and all experimental code, establishing a foundation for future advancements in MD NLP. We release the code at https://github.com/AzzedineAftiss/Moroccan-Dialect-Summarization.</abstract>
       <url hash="860462d1">2025.wacl-1.9</url>
diff --git a/data/xml/2025.wnu.xml b/data/xml/2025.wnu.xml
index 8129a2601b..14f0393cb9 100644
--- a/data/xml/2025.wnu.xml
+++ b/data/xml/2025.wnu.xml
@@ -81,7 +81,7 @@
     <paper id="11">
       <title><fixed-case>CHATTER</fixed-case>: A character-attribution dataset for narrative understanding</title>
       <author><first>Sabyasachee</first><last>Baruah</last><affiliation>University of Southern California</affiliation></author>
-      <author><first>Shrikanth</first><last>Narayanan</last><affiliation>University of Southern California</affiliation></author>
+      <author id="shrikanth-narayanan"><first>Shrikanth</first><last>Narayanan</last><affiliation>University of Southern California</affiliation></author>
       <pages>52-63</pages>
       <abstract>Computational narrative understanding studies the identification, description, and interaction of the elements of a narrative: characters, attributes, events, and relations.Narrative research has given considerable attention to defining and classifying character types.However, these character-type taxonomies do not generalize well because they are small, too simple, or specific to a domain.We require robust and reliable benchmarks to test whether narrative models truly understand the nuances of the character’s development in the story.Our work addresses this by curating the CHATTER dataset that labels whether a character portrays some attribute for 88124 character-attribute pairs, encompassing 2998 characters, 12967 attributes and 660 movies.We validate a subset of CHATTER, called CHATTEREVAL, using human annotations to serve as an evaluation benchmark for the character attribution task in movie scripts.CHATTEREVAL also assesses narrative understanding and the long-context modeling capacity of language models.</abstract>
       <url hash="54bb0b29">2025.wnu-1.11</url>
@@ -101,7 +101,7 @@
     <paper id="16">
       <title>Narrative Studio: Visual narrative exploration using <fixed-case>LLM</fixed-case>s and <fixed-case>M</fixed-case>onte <fixed-case>C</fixed-case>arlo Tree Search</title>
       <author><first>Parsa</first><last>Ghaffari</last><affiliation>Independent researcher</affiliation></author>
-      <author><first>Chris</first><last>Hokamp</last><affiliation>Aylien Ltd.</affiliation></author>
+      <author id="chris-hokamp"><first>Chris</first><last>Hokamp</last><affiliation>Aylien Ltd.</affiliation></author>
       <pages>83-96</pages>
       <abstract>Interactive storytelling benefits from planning and exploring multiple “what if” scenarios. Modern LLMs are useful tools for ideation and exploration, but current chat-based user interfaces restrict users to a single linear flow. To address this limitation, we propose Narrative Studio – a novel in-browser narrative exploration environment featuring a tree-like interface that allows branching exploration from user-defined points in a story. Each branch is extended via iterative LLM inference guided by system and user-defined prompts. Additionally, we employ Monte Carlo Tree Search (MCTS) to automatically expand promising narrative paths based on user-specified criteria, enabling more diverse and robust story development. We also allow users to enhance narrative coherence by grounding the generated text in a graph that represents the actors and environment of the story.</abstract>
       <url hash="af22a37b">2025.wnu-1.16</url>
diff --git a/data/xml/2025.wnut.xml b/data/xml/2025.wnut.xml
index 11127ae978..5752e215b5 100644
--- a/data/xml/2025.wnut.xml
+++ b/data/xml/2025.wnut.xml
@@ -30,7 +30,7 @@
       <author><first>Mark</first><last>Drakesmith</last></author>
       <author><first>Dimosthenis</first><last>Antypas</last></author>
       <author><first>Clare</first><last>Brown</last><affiliation>NA</affiliation></author>
-      <author><first>Jose</first><last>Camacho-Collados</last><affiliation>Cardiff University</affiliation></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last><affiliation>Cardiff University</affiliation></author>
       <author><first>Jiao</first><last>Song</last></author>
       <pages>1-9</pages>
       <abstract>Social media offers the potential to provide detection of outbreaks or public health incidents faster than traditional reporting mechanisms. In this paper, we developed and tested a pipeline to produce alerts of influenza-like illness (ILI) using Twitter data. Data was collected from the Twitter API, querying keywords referring to ILI symptoms and geolocated to Wales. Tweets that described first-hand descriptions of symptoms (as opposed to non-personal descriptions) were classified using transformer-based language models specialised on social media (BERTweet and TimeLMs), which were trained on a manually labelled dataset matching the above criteria. After gathering this data, weekly tweet counts were applied to the regression-based Noufaily algorithm to identify exceedances throughout 2022. The algorithm was also applied to counts of ILI-related GP consultations for comparison. Exceedance detection applied to the classified tweet counts produced alerts starting four weeks earlier than by using GP consultation data. These results demonstrate the potential to facilitate advanced preparedness for unexpected increases in healthcare burdens.</abstract>
@@ -43,7 +43,7 @@
       <author><first>Quanqi</first><last>Du</last></author>
       <author><first>Loic</first><last>De Langhe</last></author>
       <author><first>Els</first><last>Lefever</last><affiliation>Ghent University</affiliation></author>
-      <author><first>Veronique</first><last>Hoste</last><affiliation>Universiteit Gent</affiliation></author>
+      <author id="veronique-hoste"><first>Veronique</first><last>Hoste</last><affiliation>Universiteit Gent</affiliation></author>
       <pages>10-15</pages>
       <abstract>This study explores the differences between textual and multimodal sentiment annotations on videos and their impact on transcript-based sentiment modelling. Using the UniC and CH-SIMS datasets which are annotated at both the unimodal and multimodal level, we conducted a statistical analysis and sentiment modelling experiments. Results reveal significant differences between the two annotation types, with textual annotations yielding better performance in sentiment modelling and demonstrating superior generalization ability. These findings highlight the challenges of cross-modality generalization and provide insights for advancing sentiment analysis.</abstract>
       <url hash="119f6eeb">2025.wnut-1.2</url>
@@ -140,7 +140,7 @@
     <paper id="11">
       <title>Automatically Generating <fixed-case>C</fixed-case>hinese Homophone Words to Probe Machine Translation Estimation Systems</title>
       <author><first>Shenbin</first><last>Qian</last></author>
-      <author><first>Constantin</first><last>Orasan</last><affiliation>University of Surrey</affiliation></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orasan</last><affiliation>University of Surrey</affiliation></author>
       <author><first>Diptesh</first><last>Kanojia</last><affiliation>University of Surrey</affiliation></author>
       <author><first>Félix</first><last>Do Carmo</last><affiliation>University of Surrey</affiliation></author>
       <pages>97-107</pages>
@@ -172,7 +172,7 @@
     <paper id="14">
       <title><fixed-case>W</fixed-case>ikipedia is Not a Dictionary, Delete! Text Classification as a Proxy for Analysing <fixed-case>W</fixed-case>iki Deletion Discussions</title>
       <author><first>Hsuvas</first><last>Borkakoty</last><affiliation>Cardiff University</affiliation></author>
-      <author><first>Luis</first><last>Espinosa-Anke</last><affiliation>Cardiff University and AMPLYFI</affiliation></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa-Anke</last><affiliation>Cardiff University and AMPLYFI</affiliation></author>
       <pages>133-142</pages>
       <abstract>Automated content moderation for collaborative knowledge hubs like Wikipedia or Wikidata is an important yet challenging task due to multiple factors. In this paper, we construct a database of discussions happening around articles marked for deletion in several Wikis and in three languages, which we then use to evaluate a range of LMs on different tasks (from predicting the outcome of the discussion to identifying the implicit policy an individual comment might be pointing to). Our results reveal, among others, that discussions leading to deletion are easier to predict, and that, surprisingly, self-produced tags (keep, delete or redirect) don’t always help guiding the classifiers, presumably because of users’ hesitation or deliberation within comments</abstract>
       <url hash="4e6b735d">2025.wnut-1.14</url>
@@ -181,8 +181,8 @@
     </paper>
     <paper id="15">
       <title>From Conversational Speech to Readable Text: Post-Processing Noisy Transcripts in a Low-Resource Setting</title>
-      <author><first>Arturs</first><last>Znotins</last></author>
-      <author><first>Normunds</first><last>Gruzitis</last></author>
+      <author id="arturs-znotins"><first>Arturs</first><last>Znotins</last></author>
+      <author id="normunds-gruzitis"><first>Normunds</first><last>Gruzitis</last></author>
       <author><first>Roberts</first><last>Dargis</last></author>
       <pages>143-148</pages>
       <abstract>We present ongoing research on automatic post-processing approaches to enhance the readability of noisy speech transcripts in low-resource languages, with a focus on conversational speech in Latvian. We compare transformer-based sequence-labeling models and large language models (LLMs) for the standard punctuation and capitalization restoration task, while also considering automatic correction of mispronounced words and disfluency, and partial inverse text normalization. Our results show that very small LLMs (approx. 2B parameters), fine-tuned on a modest text corpus, can achieve near state-of-the-art performance, rivaling orders of magnitude larger LLMs. Additionally, we demonstrate that a fine-tuned Whisper model, leveraging acoustic cues, outperforms text-only systems on challenging conversational data, even for a low-resource language. Error analysis reveals recurring pitfalls in sentence boundary determination and disfluency handling, emphasizing the importance of consistent annotation and domain adaptation for robust post-processing. Our findings highlight the feasibility of developing efficient post-processing solutions that significantly refine ASR output in low-resource settings, while opening new possibilities for editing and formatting speech transcripts beyond mere restoration of punctuation and capitalization.</abstract>
diff --git a/data/xml/2025.woah.xml b/data/xml/2025.woah.xml
index 07b1cd1f52..a64a5ba084 100644
--- a/data/xml/2025.woah.xml
+++ b/data/xml/2025.woah.xml
@@ -7,7 +7,7 @@
       <editor><first>Christine</first><last>de Kock</last></editor>
       <editor><first>Debora</first><last>Nozza</last></editor>
       <editor><first>Flor Miriam</first><last>Plaza-del-Arco</last></editor>
-      <editor><first>Zeerak</first><last>Talat</last></editor>
+      <editor id="zeerak-talat"><first>Zeerak</first><last>Talat</last></editor>
       <editor><first>Francielle</first><last>Vargas</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Vienna, Austria</address>
@@ -38,7 +38,7 @@
       <author><first>Dimosthenis</first><last>Antypas</last><affiliation>Cardiff University</affiliation></author>
       <author><first>Indira</first><last>Sen</last><affiliation>University of Mannheim</affiliation></author>
       <author><first>Carla</first><last>Perez Almendros</last><affiliation>Cardiff University</affiliation></author>
-      <author><first>Jose</first><last>Camacho-Collados</last><affiliation>Cardiff University</affiliation></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last><affiliation>Cardiff University</affiliation></author>
       <author><first>Francesco</first><last>Barbieri</last><affiliation>Meta</affiliation></author>
       <pages>17-31</pages>
       <abstract>The detection of sensitive content in large datasets is crucial for ensuring that shared and analysed data is free from harmful material. However, current moderation tools, such as external APIs, suffer from limitations in customisation, accuracy across diverse sensitive categories, and privacy concerns. Additionally, existing datasets and open-source models focus predominantly on toxic language, leaving gaps in detecting other sensitive categories such as substance abuse or self-harm. In this paper, we put forward a unified dataset tailored for social media content moderation across six sensitive categories: conflictual language, profanity,sexually explicit material, drug-related content, self-harm, and spam. By collecting and annotating data with consistent retrieval strategies and guidelines, we address the shortcomings of previous focalised research. Our analysis demonstrates that fine-tuning large language models (LLMs) on this novel dataset yields significant improvements in detection performance compared to open off-the-shelf models such as LLaMA, and even proprietary OpenAI models, which underperform by 10-15% overall. This limitation is even more pronounced on popular moderation APIs, which cannot be easily tailored to specific sensitive content categories, among others.</abstract>
@@ -82,7 +82,7 @@
       <author><first>Ling</first><last>Sun</last><affiliation>Indiana University</affiliation></author>
       <author><first>Soyoung</first><last>Kim</last><affiliation>Indiana University</affiliation></author>
       <author><first>Xiao</first><last>Dong</last><affiliation>Indiana University</affiliation></author>
-      <author><first>Sandra</first><last>Kübler</last><affiliation>Indiana University</affiliation></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last><affiliation>Indiana University</affiliation></author>
       <pages>67-76</pages>
       <abstract>We examine how embedding bias affects hate speech detection by evaluating two debiasing methods—hard-debiasing and soft-debiasing. We analyze stereotype and sentiment associations within the embedding space and assess whether debiased models reduce censorship of marginalized authors while improving detection of hate speech targeting these groups. Our findings highlight how embedding bias propagates into downstream tasks and demonstrates how well different embedding bias metrics can predict bias in hate speech detection.</abstract>
       <url hash="0ed60ba9">2025.woah-1.8</url>
@@ -205,7 +205,7 @@
     <paper id="21">
       <title>Implicit Hate Target Span Detection in Zero- and Few-Shot Settings with Selective Sub-Billion Parameter Models</title>
       <author><first>Hossam</first><last>Boudraa</last><affiliation>Aix-Marseille University</affiliation></author>
-      <author><first>Benoit</first><last>Favre</last><affiliation>LIS, AMU Marseille, France</affiliation></author>
+      <author id="benoit-favre"><first>Benoit</first><last>Favre</last><affiliation>LIS, AMU Marseille, France</affiliation></author>
       <author><first>Raquel</first><last>Urena</last><affiliation>SESSTIM, AMU Marseille, France</affiliation></author>
       <pages>228-240</pages>
       <abstract>This work investigates the effectiveness of masked language models (MLMs) and autoregressive language models (LLMs) with fewer than one billion parameters in the detection of implicit hate speech through fine-grained span identification. The evaluation spans zero-shot, few-shot, and full supervision settings across two core benchmarks—SBIC and IHC—and an auxiliary testbed, OffensiveLang.RoBERTa-Large-355M emerges as the strongest zero-shot model, achieving the highest F1 scores of 75.8 (SBIC) and 72.5 (IHC), outperforming larger models like LLaMA 3.2-1B. ModernBERT-125M closely matches this performance with scores of 75.1 and 72.2, demonstrating the advantage of architectural efficiency. Among instruction-tuned models, SmolLM2-135M Instruct and LLaMA 3.2 1B Instruct consistently outperform their non-instructed counterparts, with up to +2.3 F1 gain on SBIC and +1.7 on IHC. Interestingly, the larger SmolLM2-360M Instruct does not outperform the 135M variant, highlighting that model scale does not always correlate with performance in implicit hate detection tasks.Few-shot fine-tuning with SmolLM2-135M Instruct achieves F1 scores of 68.2 (SBIC) and 64.0 (IHC), trailing full-data fine-tuning by only 1.6 and 2.0 points, respectively, with accuracy drops under 0.5 points. This illustrates the promise of compact, instruction-aligned models in data-scarce settings, particularly when optimized with Low-Rank Adaptation (LoRA).Topic-guided error analysis using Latent Dirichlet Allocation (LDA) reveals recurring model failures in ideologically charged or euphemistic discourse. Misclassifications often involve neutral references to identity, politics, or advocacy language, underscoring current limitations in discourse-level inference and sociopragmatic understanding.</abstract>
@@ -227,7 +227,7 @@
       <title>Hostility Detection in <fixed-case>UK</fixed-case> Politics: A Dataset on Online Abuse Targeting <fixed-case>MP</fixed-case>s</title>
       <author><first>Mugdha</first><last>Pandya</last><affiliation>University of Sheffield</affiliation></author>
       <author><first>Mali</first><last>Jin</last><affiliation>University of Sheffield</affiliation></author>
-      <author><first>Kalina</first><last>Bontcheva</last><affiliation>University of Sheffield</affiliation></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last><affiliation>University of Sheffield</affiliation></author>
       <author><first>Diana</first><last>Maynard</last><affiliation>University of Sheffield</affiliation></author>
       <pages>254-266</pages>
       <abstract>Social media platforms, particularly X, enable direct interaction between politicians and constituents but also expose politicians to hostile responses targetting both their governmental role and personal identity. This online hostility can undermine public trust and potentially incite offline violence. While general hostility detection models exist, they lack the specificity needed for political contexts and country-specific issues. We address this gap by creating a dataset of 3,320 English tweets directed at UK Members of Parliament (MPs) over two years, annotated for hostility and targeted identity characteristics (race, gender, religion). Through linguistic and topical analyses, we examine the unique features of UK political discourse and evaluate pre-trained language models and large language models on binary hostility detection and multi-class targeted identity type classification tasks. Our work provides essential data and insights for studying politics-related hostility in the UK.</abstract>
@@ -250,8 +250,8 @@
     <paper id="25">
       <title>Pathways to Radicalisation: On Research for Online Radicalisation in Natural Language Processing and Machine Learning</title>
       <author><first>Zeerak</first><last>Talat</last><affiliation>University of Edinburgh</affiliation></author>
-      <author><first>Michael Sejr</first><last>Schlichtkrull</last><affiliation>University of Cambridge</affiliation></author>
-      <author><first>Pranava</first><last>Madhyastha</last><affiliation>City, University of London</affiliation></author>
+      <author id="michael-schlichtkrull"><first>Michael Sejr</first><last>Schlichtkrull</last><affiliation>University of Cambridge</affiliation></author>
+      <author id="pranava-swaroop-madhyastha"><first>Pranava</first><last>Madhyastha</last><affiliation>City, University of London</affiliation></author>
       <author><first>Christine</first><last>De Kock</last><affiliation>University of Melbourne</affiliation></author>
       <pages>276-283</pages>
       <abstract>Online communities play an integral part in communication for communication across the globe. Online communities that are known for extremist content. As a field of surveillance technologies, NLP and other ML fields hold particular promise for monitoring extremist communities that may turn violent.Such communities make use of a wide variety of modalities of communication, including textual posts on specialised fora, memes, videos, and podcasts. Furthermore, such communities undergo rapid linguistic evolution, thus presenting a challenge to machine learning technologies that quickly diverge from the data that are used. In this position, we argue that radicalisation is a nascent area for which machine learning is particularly apt. However, in addressing radicalisation research it is important that avoids falling into the temptation of focusing on prediction. We argue that such communities present a particular avenue for addressing key concerns with machine learning technologies: (1) temporal misalignment of models and (2) aligning and linking content across modalities.</abstract>
@@ -309,7 +309,7 @@
       <author><first>Berk</first><last>Atil</last><affiliation>Pennsylvania State University</affiliation></author>
       <author><first>Vipul</first><last>Gupta</last><affiliation>Pennsylvania State University</affiliation></author>
       <author><first>Sarkar Snigdha Sarathi</first><last>Das</last><affiliation>Pennsylvania State University</affiliation></author>
-      <author><first>Rebecca</first><last>Passonneau</last><affiliation>The Pennsylvania State University</affiliation></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca</first><last>Passonneau</last><affiliation>The Pennsylvania State University</affiliation></author>
       <pages>342-354</pages>
       <abstract>Large language models (LLMs) have become ubiquitous, thus it is important to understand their risks and limitations, such as their propensity to generate harmful output. This includes smaller LLMs, which are important for settings with constrained compute resources, such as edge devices. Detection of LLM harm typically requires human annotation, which is expensive to collect. This work studies two questions: How do smaller LLMs rank regarding generation of harmful content? How well can larger LLMs annotate harmfulness? We prompt three small LLMs to elicit harmful content of various types, such as discriminatory language, offensive content, privacy invasion, or negative influence, and collect human rankings of their outputs. Then, we compare harm annotation from three state-of-the-art large LLMs with each other and with humans. We find that the smaller models differ with respect to harmfulness. We also find that large LLMs show low to moderate agreement with humans.</abstract>
       <url hash="7469732c">2025.woah-1.30</url>
@@ -347,7 +347,7 @@
       <author><first>Robin</first><last>Cooper</last><affiliation>University of Gothenburg</affiliation></author>
       <author><first>Elina</first><last>Lindgren</last><affiliation>Karlstad University</affiliation></author>
       <author><first>Björn</first><last>Rönnerstrand</last><affiliation>University of Gothenburg</affiliation></author>
-      <author><first>Asad</first><last>Sayeed</last><affiliation>University of Gothenburg</affiliation></author>
+      <author id="asad-sayeed"><first>Asad</first><last>Sayeed</last><affiliation>University of Gothenburg</affiliation></author>
       <pages>383-395</pages>
       <abstract>A dogwhistle is a communicative act intended to broadcast a message only understood by a select in-group while going unnoticed by others (out-group). We illustrate that political dogwhistle behavior in a more radical community precedes the occurrence of the dogwhistles in a less radical community, but the reverse does not hold. We study two Swedish online communities – Flashback and Familjeliv – which both contain discussions of life and society, with the former having a stronger anti-immigrant subtext. Expressions associated with dogwhistles are substantially more frequent in Flashback than in Familjeliv. We analyze the time series of changes in intensity of three dogwhistle expressions (DWEs), i.e., the strength of association of a DWE and its in-group meaning modeled by Swedish Sentence-BERT, and model the dynamic temporal relationship of intensity in the two communities for the three DWEs using Vector Autoregression (VAR). We show that changes in intensity in Familjeliv are explained by the changes of intensity observed at previous lags in Flashback but not the other way around. This suggests a direction of travel for dogwhistles associated with radical ideologies to less radical contexts.</abstract>
       <url hash="ef9ba3bc">2025.woah-1.34</url>
@@ -420,7 +420,7 @@
       <title>Graph of Attacks with Pruning: Optimizing Stealthy Jailbreak Prompt. Generation for Enhanced <fixed-case>LLM</fixed-case> Content Moderation</title>
       <author><first>Daniel</first><last>Schwarz</last><affiliation/></author>
       <author><first>Dmitriy</first><last>Bespalov</last><affiliation/></author>
-      <author><first>Zhe</first><last>Wang</last><affiliation/></author>
+      <author id="daisy-zhe-wang"><first>Zhe</first><last>Wang</last></author>
       <author><first>Ninad</first><last>Kulkarni</last><affiliation/></author>
       <author><first>Yanjun</first><last>Qi</last><affiliation/></author>
       <pages>482-489</pages>
@@ -457,7 +457,7 @@
       <author><first>Sebastian</first><last>Loftus</last><affiliation/></author>
       <author><first>Adrian</first><last>Mülthaler</last><affiliation/></author>
       <author><first>Sanne</first><last>Hoeken</last><affiliation/></author>
-      <author><first>Sina</first><last>Zarrieß</last><affiliation/></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
       <author><first>Ozge</first><last>Alacam</last><affiliation/></author>
       <pages>538-547</pages>
       <abstract>Annotator disagreement poses a significant challenge in subjective tasks like hate speech detection. In this paper, we introduce a novel variant of the HateWiC task that explicitly models annotator agreement by estimating the proportion of annotators who classify the meaning of a term as hateful. To tackle this challenge, we explore the use of Llama 3 models fine-tuned through Direct Preference Optimization (DPO). Our experiments show that while LLMs perform well for majority-based hate classification, they struggle with the more complex agreement-aware task. DPO fine-tuning offers improvements, particularly when applied to instruction-tuned models. Yet, our results emphasize the need for improved modeling of subjectivity in hate classification and this study can serve as foundation for future advancements.</abstract>
diff --git a/data/xml/2025.wraicogs.xml b/data/xml/2025.wraicogs.xml
index b7df1b2a68..4b82d0286c 100644
--- a/data/xml/2025.wraicogs.xml
+++ b/data/xml/2025.wraicogs.xml
@@ -22,7 +22,7 @@
       <title>Chain-of-<fixed-case>M</fixed-case>eta<fixed-case>W</fixed-case>riting: Linguistic and Textual Analysis of How Small Language Models Write Young Students Texts</title>
       <author><first>Ioana</first><last>Buhnila</last></author>
       <author><first>Georgeta</first><last>Cislaru</last></author>
-      <author><first>Amalia</first><last>Todirascu</last></author>
+      <author id="amalia-todirascu"><first>Amalia</first><last>Todirascu</last></author>
       <pages>1–15</pages>
       <abstract>Large Language Models (LLMs) have been used to generate texts in response to different writing tasks: reports, essays, story telling. However, language models do not have a metarepresentation of the text writing process, nor inherent communication learning needs, comparable to those of young human students. This paper introduces a fine-grained linguistic and textual analysis of multilingual Small Language Models’ (SLMs) writing. With our method, Chain-of-MetaWriting, SLMs can imitate some steps of the human writing process, such as planning and evaluation. We mainly focused on short story and essay writing tasks in French for schoolchildren and undergraduate students respectively. Our results show that SLMs encounter difficulties in assisting young students on sensitive topics such as violence in the schoolyard, and they sometimes use words too complex for the target audience. In particular, the output is quite different from the human produced texts in term of text cohesion and coherence regarding temporal connectors, topic progression, reference.</abstract>
       <url hash="8b4d6a34">2025.wraicogs-1.1</url>
@@ -55,7 +55,7 @@
       <author><first>Florian</first><last>Boudin</last></author>
       <author><first>Richard</first><last>Dufour</last></author>
       <author><first>Nicolas</first><last>Hernandez</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>35–44</pages>
       <abstract>Revision is a crucial step in scientific writing, where authors refine their work to improve clarity, structure, and academic quality. Existing approaches to automated writing assistance often focus on sentence-level revisions, which fail to capture the broader context needed for effective modification. In this paper, we explore the impact of shifting from sentence-level to paragraph-level scope for the task of scientific text revision. The paragraph level definition of the task allows for more meaningful changes, and is guided by detailed revision instructions rather than general ones. To support this task, we introduce ParaRev, the first dataset of revised scientific paragraphs with an evaluation subset manually annotated with revision instructions. Our experiments demonstrate that using detailed instructions significantly improves the quality of automated revisions compared to general approaches, no matter the model or the metric considered.</abstract>
       <url hash="26784592">2025.wraicogs-1.4</url>
diff --git a/data/xml/2025.wsc.xml b/data/xml/2025.wsc.xml
index c948e87acd..0a0355150d 100644
--- a/data/xml/2025.wsc.xml
+++ b/data/xml/2025.wsc.xml
@@ -20,8 +20,8 @@
       <author><first>Bhakti</first><last>Jadhav</last></author>
       <author><first>Himanshu</first><last>Dutta</last></author>
       <author><first>Shruti</first><last>Kanitkar</last></author>
-      <author><first>Malhar</first><last>Kulkarni</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="malhar-kulkarni"><first>Malhar</first><last>Kulkarni</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>1–14</pages>
       <url hash="90a4303a">2025.wsc-csdh.1</url>
       <bibkey>jadhav-etal-2025-introduction</bibkey>
@@ -31,7 +31,7 @@
       <author><first>Jivnesh</first><last>Sandhan</last></author>
       <author><first>Amruta</first><last>Barbadikar</last></author>
       <author><first>Malay</first><last>Maity</last></author>
-      <author><first>Pavankumar</first><last>Satuluri</last></author>
+      <author id="pavankumar-satuluri"><first>Pavankumar</first><last>Satuluri</last></author>
       <author><first>Tushar</first><last>Sandhan</last></author>
       <author><first>Ravi M</first><last>Gupta</last></author>
       <author><first>Pawan</first><last>Goyal</last></author>
@@ -82,7 +82,7 @@
     <paper id="7">
       <title>Compound Type Identification in <fixed-case>S</fixed-case>anskrit</title>
       <author><first>Sriram</first><last>Krishnan</last></author>
-      <author><first>Pavankumar</first><last>Satuluri</last></author>
+      <author id="pavankumar-satuluri"><first>Pavankumar</first><last>Satuluri</last></author>
       <author><first>Amruta</first><last>Barbadikar</last></author>
       <author><first>T S</first><last>Prasanna Venkatesh</last></author>
       <author><first>Amba</first><last>Kulkarni</last></author>
diff --git a/data/xml/2025.xllm.xml b/data/xml/2025.xllm.xml
index d04de79564..d1ede11c56 100644
--- a/data/xml/2025.xllm.xml
+++ b/data/xml/2025.xllm.xml
@@ -14,7 +14,7 @@
       <editor><first>Meishan</first><last>Zhang</last></editor>
       <editor><first>Wei</first><last>Lu</last></editor>
       <editor><first>N.</first><last>Siddharth</last></editor>
-      <editor><first>Lilja</first><last>Øvrelid</last></editor>
+      <editor id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></editor>
       <editor><first>Nianwen</first><last>Xue</last></editor>
       <editor><first>Yue</first><last>Zhang</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -71,7 +71,7 @@
       <title>Regular-pattern-sensitive <fixed-case>CRF</fixed-case>s for Distant Label Interactions</title>
       <author><first>Sean</first><last>Papay</last></author>
       <author><first>Roman</first><last>Klinger</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <pages>26-35</pages>
       <abstract>While LLMs have grown popular in sequence labeling, linear-chain conditionalrandom fields (CRFs) remain a popular alternativewith the ability to directly model interactions between labels.However, the Markov assumption limits them to interactions between adjacent labels.Weighted finite-state transducers (FSTs), in contrast, can modeldistant label–label interactions, but exact label inference is intractable in general.In this work, we present regular-pattern-sensitiveCRFs (RPCRFs), a method of enriching standardlinear-chain CRFs with the ability to learnlong-distance label interactions through user-specified patterns.This approach allows users to write regular-expressionlabel patterns concisely specifying which types of interactionsthe model should take into account, allowingthe model to learn from data whether and inwhich contexts these patterns occur. The resultcan be interpreted alternatively as a CRF augmented with additional,non-local potentials,or as a finite-state transducer whose structureis defined by a set of easily-interpretable patterns.Critically, exact training and inferenceare tractable for many pattern sets. We detailhow an RPCRF can be automatically constructed from a set of user-specified patterns,and demonstrate the model’s effectiveness ona sequence of three synthetic sequence modeling datasets.</abstract>
       <url hash="d2e4eb26">2025.xllm-1.4</url>
@@ -139,7 +139,7 @@
     <paper id="10">
       <title>Seamlessly Integrating Tree-Based Positional Embeddings into Transformer Models for Source Code Representation</title>
       <author><first>Patryk</first><last>Bartkowiak</last></author>
-      <author><first>Filip</first><last>Graliński</last></author>
+      <author id="filip-gralinski"><first>Filip</first><last>Graliński</last></author>
       <pages>91-98</pages>
       <abstract>Transformer-based models have demonstrated significant success in various source code representation tasks. Nonetheless, traditional positional embeddings employed by these models inadequately capture the hierarchical structure intrinsic to source code, typically represented as Abstract Syntax Trees (ASTs). To address this, we propose a novel tree-based positional embedding approach that explicitly encodes hierarchical relationships derived from ASTs, including node depth and sibling indices. These hierarchical embeddings are integrated into the transformer architecture, specifically enhancing the CodeBERTa model. We thoroughly evaluate our proposed model through masked language modeling (MLM) pretraining and clone detection fine-tuning tasks. Experimental results indicate that our Tree-Enhanced CodeBERTa consistently surpasses the baseline model in terms of loss, accuracy, F1 score, precision, and recall, emphasizing the importance of incorporating explicit structural information into transformer-based representations of source code.</abstract>
       <url hash="b361a217">2025.xllm-1.10</url>
@@ -201,7 +201,7 @@
       <title>Do we still need Human Annotators? Prompting Large Language Models for Aspect Sentiment Quad Prediction</title>
       <author><first>Nils Constantin</first><last>Hellwig</last></author>
       <author><first>Jakob</first><last>Fehle</last></author>
-      <author><first>Udo</first><last>Kruschwitz</last></author>
+      <author id="udo-kruschwitz"><first>Udo</first><last>Kruschwitz</last></author>
       <author><first>Christian</first><last>Wolff</last></author>
       <pages>153-172</pages>
       <abstract>Aspect sentiment quad prediction (ASQP) facilitates a detailed understanding of opinions expressed in a text by identifying the opinion term, aspect term, aspect category and sentiment polarity for each opinion. However, annotating a full set of training examples to fine-tune models for ASQP is a resource-intensive process. In this study, we explore the capabilities of large language models (LLMs) for zero- and few-shot learning on the ASQP task across five diverse datasets. We report F1 scores almost up to par with those obtained with state-of-the-art fine-tuned models and exceeding previously reported zero- and few-shot performance. In the 20-shot setting on the Rest16 restaurant domain dataset, LLMs achieved an F1 score of 51.54, compared to 60.39 by the best-performing fine-tuned method MVP. Additionally, we report the performance of LLMs in target aspect sentiment detection (TASD), where the F1 scores were close to fine-tuned models, achieving 68.93 on Rest16 in the 30-shot setting, compared to 72.76 with MVP. While human annotators remain essential for achieving optimal performance, LLMs can reduce the need for extensive manual annotation in ASQP tasks.</abstract>
@@ -213,7 +213,7 @@
       <title>Can <fixed-case>LLM</fixed-case>s Interpret and Leverage Structured Linguistic Representations? A Case Study with <fixed-case>AMR</fixed-case>s</title>
       <author><first>Ankush</first><last>Raut</last></author>
       <author><first>Xiaofeng</first><last>Zhu</last></author>
-      <author><first>Maria Leonor</first><last>Pacheco</last></author>
+      <author id="maria-leonor-pacheco"><first>Maria Leonor</first><last>Pacheco</last></author>
       <pages>173-185</pages>
       <abstract>This paper evaluates the ability of Large Language Models (LLMs) to leverage contextual information in the form of structured linguistic representations. Specifically, we examine the impact of encoding both short and long contexts using Abstract Meaning Representation (AMR) structures across a diverse set of language tasks. We perform our analysis using 8-bit quantized and instruction-tuned versions of Llama 3.1 (8B), Phi-3, and Mistral 7B. Our results indicate that, for tasks involving short contexts, augmenting the prompt with the AMR of the original language context often degrades the performance of the underlying LLM. However, for tasks that involve long contexts, such as dialogue summarization in the SAMSum dataset, this enhancement improves LLM performance, for example, by increasing the zero-shot cosine similarity score of Llama 3.1 from 66% to 76%. This improvement is more evident in the newer and larger LLMs, but does not extend to the older or smaller ones. In addition, we observe that LLMs can effectively reconstruct the original text from a linearized AMR, achieving a cosine similarity of 81% in the best-case scenario.</abstract>
       <url hash="6c9a4672">2025.xllm-1.16</url>
@@ -322,7 +322,7 @@
       <author><first>Diganta</first><last>Biswas</last></author>
       <author><first>Dipanjan</first><last>Saha</last></author>
       <author><first>Dipankar</first><last>Das</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>283-287</pages>
       <abstract>Event extraction from text is a complex taskthat involves the identification of event triggersand their supporting arguments. Whenapplied to speech, this task becomes evenmore challenging due to the continuous natureof audio signals and the need for robustAutomatic Speech Recognition (ASR). Thispaper proposes an approach that integratesASR with event extraction by utilizing theWhisper model for speech recognition and aText2Event2 Transformer for extracting eventsfrom English audio samples. The Whispermodel is used to generate transcripts from audio,which are then fed into the Text2Event2Transformer to identify event triggers and theirarguments. This approach combines two difficulttasks into one, streamlining the processof extracting structured event information directlyfrom audio. Our approach leverages arobust ASR system (Whisper) followed by aparameter-efficient transformer (Text2Event2fine-tuned via LoRA) to extract structuredevents from raw speech. Unlike prior worktrained on gold textual input, our pipeline istrained end-to-end on noisy ASR outputs. Despitesignificant resource constraints and datanoise, our system ranked first in the ACL 2025XLLM Shared Task II.</abstract>
       <url hash="246bea30">2025.xllm-1.24</url>
diff --git a/data/xml/A00.xml b/data/xml/A00.xml
index d7f83fe7b2..fd36f509dc 100644
--- a/data/xml/A00.xml
+++ b/data/xml/A00.xml
@@ -24,7 +24,7 @@
     </paper>
     <paper id="2">
       <title>Machine Translation of Very Close Languages</title>
-      <author><first>Jan</first><last>Hajic</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajic</last></author>
       <doi>10.3115/974147.974149</doi>
       <pages>7–12</pages>
       <url hash="22b5712c">A00-1002</url>
@@ -41,7 +41,7 @@
     <paper id="4">
       <title>Automatic construction of parallel <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>hinese corpus for cross-language information retrieval</title>
       <author><first>Jiang</first><last>Chen</last></author>
-      <author><first>Jian-Yun</first><last>Nie</last></author>
+      <author id="jian-yun-nie"><first>Jian-Yun</first><last>Nie</last></author>
       <doi>10.3115/974147.974151</doi>
       <pages>21–28</pages>
       <url hash="3fcb9f3b">A00-1004</url>
@@ -50,8 +50,8 @@
     <paper id="5">
       <title><fixed-case>P</fixed-case>arts<fixed-case>ID</fixed-case>: A Dialogue-Based System for Identifying Parts for Medical Systems</title>
       <author><first>Amit</first><last>Bagga</last></author>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
-      <author><first>G. Bowden</first><last>Wise</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="g-bowden-wise"><first>G. Bowden</first><last>Wise</last></author>
       <doi>10.3115/974147.974152</doi>
       <pages>29–36</pages>
       <url hash="bfa8f76b">A00-1005</url>
@@ -60,8 +60,8 @@
     <paper id="6">
       <title>Translation using Information on Dialogue Participants</title>
       <author><first>Setsuo</first><last>Yamada</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
-      <author><first>Hideki</first><last>Kashioka</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="hideki-kashioka"><first>Hideki</first><last>Kashioka</last></author>
       <doi>10.3115/974147.974153</doi>
       <pages>37–43</pages>
       <url hash="bf07ecae">A00-1006</url>
@@ -69,7 +69,7 @@
     </paper>
     <paper id="7">
       <title>Distilling dialogues - A method using natural dialogue corpora for dialogue systems development</title>
-      <author><first>Arne</first><last>Jonsson</last></author>
+      <author id="arne-jonsson"><first>Arne</first><last>Jonsson</last></author>
       <author><first>Nils</first><last>Dahlback</last></author>
       <doi>10.3115/974147.974154</doi>
       <pages>44–51</pages>
@@ -86,10 +86,10 @@
     </paper>
     <paper id="9">
       <title>A Framework for <fixed-case>MT</fixed-case> and Multilingual <fixed-case>NLG</fixed-case> Systems Based on Uniform Lexico-Structural Processing</title>
-      <author><first>Benoit</first><last>Lavoie</last></author>
-      <author><first>Richard</first><last>Kittredge</last></author>
+      <author id="benoit-lavoie"><first>Benoit</first><last>Lavoie</last></author>
+      <author id="richard-kittredge"><first>Richard</first><last>Kittredge</last></author>
       <author><first>Tanya</first><last>Korelsky</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <doi>10.3115/974147.974156</doi>
       <pages>60–67</pages>
       <url hash="1963aa15">A00-1009</url>
@@ -97,7 +97,7 @@
     </paper>
     <paper id="10">
       <title><fixed-case>T</fixed-case>alk’n’<fixed-case>T</fixed-case>ravel: A Conversational System for Air Travel Planning</title>
-      <author><first>David</first><last>Stallard</last></author>
+      <author id="david-stallard"><first>David</first><last>Stallard</last></author>
       <doi>10.3115/974147.974157</doi>
       <pages>68–75</pages>
       <url hash="7742678e">A00-1010</url>
@@ -115,7 +115,7 @@
     <paper id="12">
       <title>Experiments on Sentence Boundary Detection</title>
       <author><first>Mark</first><last>Stevenson</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <doi>10.3115/974147.974159</doi>
       <pages>84–89</pages>
       <url hash="1135873f">A00-1012</url>
@@ -133,7 +133,7 @@
     </paper>
     <paper id="14">
       <title><fixed-case>MIMIC</fixed-case>: An Adaptive Mixed Initiative Spoken Dialogue System for Information Queries</title>
-      <author><first>Jennifer</first><last>Chu-Carroll</last></author>
+      <author id="jennifer-chu-carroll"><first>Jennifer</first><last>Chu-Carroll</last></author>
       <doi>10.3115/974147.974161</doi>
       <pages>97–104</pages>
       <url hash="751fbb75">A00-1014</url>
@@ -142,7 +142,7 @@
     <paper id="15">
       <title><fixed-case>J</fixed-case>avox: A Toolkit for Building Speech-Enabled Applications</title>
       <author><first>Michael S.</first><last>Fulkerson</last></author>
-      <author><first>Alan W.</first><last>Biermann</last></author>
+      <author id="alan-w-biermann"><first>Alan W.</first><last>Biermann</last></author>
       <doi>10.3115/974147.974162</doi>
       <pages>105–111</pages>
       <url hash="4978a26a">A00-1015</url>
@@ -150,8 +150,8 @@
     </paper>
     <paper id="16">
       <title>A Compact Architecture for Dialogue Management Based on Scripts and Meta-Outputs</title>
-      <author><first>Manny</first><last>Rayner</last></author>
-      <author><first>Beth Ann</first><last>Hockey</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
+      <author id="beth-ann-hockey"><first>Beth Ann</first><last>Hockey</last></author>
       <author><first>Frankie</first><last>James</last></author>
       <doi>10.3115/974147.974163</doi>
       <pages>112–118</pages>
@@ -183,7 +183,7 @@
     </paper>
     <paper id="19">
       <title>Unit Completion for a Computer-aided Translation Typing System</title>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <author><first>George</first><last>Foster</last></author>
       <author><first>Guy</first><last>Lapalme</last></author>
       <doi>10.3115/974147.974166</doi>
@@ -193,8 +193,8 @@
     </paper>
     <paper id="20">
       <title>Multilingual Coreference Resolution</title>
-      <author><first>Sanda M.</first><last>Harabagiu</last></author>
-      <author><first>Steven J.</first><last>Maiorano</last></author>
+      <author id="sanda-harabagiu"><first>Sanda M.</first><last>Harabagiu</last></author>
+      <author id="steven-j-maiorano"><first>Steven J.</first><last>Maiorano</last></author>
       <doi>10.3115/974147.974167</doi>
       <pages>142–149</pages>
       <url hash="03ad3972">A00-1020</url>
@@ -202,7 +202,7 @@
     </paper>
     <paper id="21">
       <title>Ranking suspected answers to natural language questions using predictive annotation</title>
-      <author><first>Dragomir R.</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir R.</first><last>Radev</last></author>
       <author><first>John</first><last>Prager</last></author>
       <author><first>Valerie</first><last>Samn</last></author>
       <doi>10.3115/974147.974168</doi>
@@ -222,7 +222,7 @@
     </paper>
     <paper id="23">
       <title>A Question Answering System Supported by Information Extraction</title>
-      <author><first>Rohini</first><last>Srihari</last></author>
+      <author id="rohini-k-srihari"><first>Rohini</first><last>Srihari</last></author>
       <author><first>Wei</first><last>Li</last></author>
       <doi>10.3115/974147.974170</doi>
       <pages>166–172</pages>
@@ -239,10 +239,10 @@
     </paper>
     <paper id="25">
       <title>Examining the Role of Statistical and Linguistic Knowledge Sources in a General-Knowledge Question-Answering System</title>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <author><first>Vincent</first><last>Ng</last></author>
-      <author><first>David</first><last>Pierce</last></author>
-      <author><first>Chris</first><last>Buckley</last></author>
+      <author id="david-pierce"><first>David</first><last>Pierce</last></author>
+      <author id="chris-buckley"><first>Chris</first><last>Buckley</last></author>
       <doi>10.3115/974147.974172</doi>
       <pages>180–187</pages>
       <url hash="984cd4ce">A00-1025</url>
@@ -250,9 +250,9 @@
     </paper>
     <paper id="26">
       <title>Extracting Molecular Binding Relationships from Biomedical Text</title>
-      <author><first>Thomas C.</first><last>Rindflesch</last></author>
+      <author id="thomas-c-rindflesch"><first>Thomas C.</first><last>Rindflesch</last></author>
       <author><first>Jayant V.</first><last>Rajan</last></author>
-      <author><first>Lawrence</first><last>Hunter</last></author>
+      <author id="lawrence-hunter"><first>Lawrence</first><last>Hunter</last></author>
       <doi>10.3115/974147.974173</doi>
       <pages>188–195</pages>
       <url hash="25ebbc06">A00-1026</url>
@@ -277,7 +277,7 @@
     </paper>
     <paper id="29">
       <title>A Tool for Automated Revision of Grammars for <fixed-case>NLP</fixed-case> Systems</title>
-      <author><first>Nanda</first><last>Kambhatla</last></author>
+      <author id="nanda-kambhatla"><first>Nanda</first><last>Kambhatla</last></author>
       <author><first>Wlodek</first><last>Zadrozny</last></author>
       <doi>10.3115/974147.974176</doi>
       <pages>210–217</pages>
@@ -286,7 +286,7 @@
     </paper>
     <paper id="30">
       <title>Aggressive Morphology for Robust Lexical Coverage</title>
-      <author><first>William A.</first><last>Woods</last></author>
+      <author id="william-a-woods"><first>William A.</first><last>Woods</last></author>
       <doi>10.3115/974147.974177</doi>
       <pages>218–223</pages>
       <url hash="27ad12f2">A00-1030</url>
@@ -303,7 +303,7 @@
     <paper id="32">
       <title>Language Independent Morphological Analysis</title>
       <author><first>Tatsuo</first><last>Yamashita</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <doi>10.3115/974147.974179</doi>
       <pages>232–238</pages>
       <url hash="c1f20db3">A00-1032</url>
@@ -311,7 +311,7 @@
     </paper>
     <paper id="33">
       <title>A Divide-and-Conquer Strategy for Shallow Parsing of <fixed-case>G</fixed-case>erman Free Texts</title>
-      <author><first>Gunter</first><last>Neumann</last></author>
+      <author id="gunter-neumann"><first>Gunter</first><last>Neumann</last></author>
       <author><first>Christian</first><last>Braun</last></author>
       <author><first>Jakub</first><last>Piskorski</last></author>
       <doi>10.3115/974147.974180</doi>
@@ -321,7 +321,7 @@
     </paper>
     <paper id="34">
       <title>A Hybrid Approach for Named Entity and Sub-Type Tagging</title>
-      <author><first>Rohini</first><last>Srihari</last></author>
+      <author id="rohini-k-srihari"><first>Rohini</first><last>Srihari</last></author>
       <doi>10.3115/974147.974181</doi>
       <pages>247–254</pages>
       <url hash="9e4a73cc">A00-1034</url>
@@ -337,12 +337,12 @@
     </paper>
     <paper id="36">
       <title>Linguistic Knowledge can Improve Information Retrieval</title>
-      <author><first>William A.</first><last>Woods</last></author>
+      <author id="william-a-woods"><first>William A.</first><last>Woods</last></author>
       <author><first>Lawrence A.</first><last>Bookman</last></author>
       <author><first>Ann</first><last>Houston</last></author>
-      <author><first>Robert J.</first><last>Kuhns</last></author>
+      <author id="robert-j-kuhns"><first>Robert J.</first><last>Kuhns</last></author>
       <author><first>Paul</first><last>Martin</last></author>
-      <author><first>Stephen</first><last>Green</last></author>
+      <author id="stephen-j-green"><first>Stephen</first><last>Green</last></author>
       <doi>10.3115/974147.974183</doi>
       <pages>262–267</pages>
       <url hash="3dc83a9f">A00-1036</url>
@@ -350,8 +350,8 @@
     </paper>
     <paper id="37">
       <title>Domain-Specific Knowledge Acquisition from Text</title>
-      <author><first>Dan</first><last>Moldovan</last></author>
-      <author><first>Roxana</first><last>Girju</last></author>
+      <author id="dan-moldovan"><first>Dan</first><last>Moldovan</last></author>
+      <author id="roxana-girju"><first>Roxana</first><last>Girju</last></author>
       <author><first>Vasile</first><last>Rus</last></author>
       <doi>10.3115/974147.974184</doi>
       <pages>268–275</pages>
@@ -369,7 +369,7 @@
     <paper id="39">
       <title>Unsupervised Discovery of Scenario-Level Patterns for Information Extraction</title>
       <author><first>Roman</first><last>Yangarber</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <author><first>Pasi</first><last>Tapanainen</last></author>
       <doi>10.3115/974147.974186</doi>
       <pages>282–289</pages>
@@ -379,7 +379,7 @@
     <paper id="40">
       <title>Using Corpus-derived Name Lists for Named Entity Recognition</title>
       <author><first>Mark</first><last>Stevenson</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <doi>10.3115/974147.974187</doi>
       <pages>290–295</pages>
       <url hash="0e739dfe">A00-1040</url>
@@ -387,8 +387,8 @@
     </paper>
     <paper id="41">
       <title>Answer Extraction</title>
-      <author><first>Steven</first><last>Abney</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="steven-abney"><first>Steven</first><last>Abney</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <author><first>Amit</first><last>Singhal</last></author>
       <doi>10.3115/974147.974188</doi>
       <pages>296–301</pages>
@@ -398,8 +398,8 @@
     <paper id="42">
       <title>Evaluation of Automatically Identified Index Terms for Browsing Electronic Documents</title>
       <author><first>Nina</first><last>Wacholder</last></author>
-      <author><first>Judith L.</first><last>Klavans</last></author>
-      <author><first>David K.</first><last>Evans</last></author>
+      <author id="judith-l-klavans"><first>Judith L.</first><last>Klavans</last></author>
+      <author id="david-k-evans"><first>David K.</first><last>Evans</last></author>
       <doi>10.3115/974147.974189</doi>
       <pages>302–309</pages>
       <url hash="298abc72">A00-1042</url>
@@ -407,7 +407,7 @@
     </paper>
     <paper id="43">
       <title>Sentence Reduction for Automatic Text Summarization</title>
-      <author><first>Hongyan</first><last>Jing</last></author>
+      <author id="hongyan-jing"><first>Hongyan</first><last>Jing</last></author>
       <doi>10.3115/974147.974190</doi>
       <pages>310–315</pages>
       <url hash="3eb652a5">A00-1043</url>
@@ -416,10 +416,10 @@
     <paper id="44">
       <title>Named Entity Extraction from Noisy Input: Speech and <fixed-case>OCR</fixed-case></title>
       <author><first>David</first><last>Miller</last></author>
-      <author><first>Sean</first><last>Boisen</last></author>
-      <author><first>Richard</first><last>Schwartz</last></author>
+      <author id="sean-boisen"><first>Sean</first><last>Boisen</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
       <author><first>Rebecca</first><last>Stone</last></author>
-      <author><first>Ralph</first><last>Weischedel</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
       <doi>10.3115/974147.974191</doi>
       <pages>316–324</pages>
       <url hash="f2baf1a5">A00-1044</url>
@@ -435,8 +435,8 @@
     </paper>
     <paper id="46">
       <title>The Efficiency of Multimodal Interaction for a Map-based Task</title>
-      <author><first>Philip</first><last>Cohen</last></author>
-      <author><first>David</first><last>McGee</last></author>
+      <author id="philip-r-cohen"><first>Philip</first><last>Cohen</last></author>
+      <author id="david-mcgee"><first>David</first><last>McGee</last></author>
       <author><first>Josh</first><last>Clow</last></author>
       <doi>10.3115/974147.974193</doi>
       <pages>331–338</pages>
@@ -457,8 +457,8 @@
     <paper id="1">
       <title>Modelling Grounding and Discourse Obligations Using Update Rules</title>
       <author><first>Colin</first><last>Matheson</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <url hash="3ac86bff">A00-2001</url>
       <bibkey>matheson-etal-2000-modelling</bibkey>
     </paper>
@@ -485,7 +485,7 @@
     </paper>
     <paper id="5">
       <title>Bagging and Boosting a Treebank Parser</title>
-      <author><first>John C.</first><last>Henderson</last></author>
+      <author id="john-henderson"><first>John C.</first><last>Henderson</last></author>
       <author><first>Eric</first><last>Brill</last></author>
       <url hash="7c513384">A00-2005</url>
       <bibkey>henderson-brill-2000-bagging</bibkey>
@@ -494,22 +494,22 @@
       <title>Encoding information on adjectives in a lexical-semantic net for computational applications</title>
       <author><first>Antonietta</first><last>Alonge</last></author>
       <author><first>Francesca</first><last>Bertagna</last></author>
-      <author><first>Nicoletta</first><last>Calzolari</last></author>
+      <author id="nicoletta-calzolari"><first>Nicoletta</first><last>Calzolari</last></author>
       <author><first>Adriana</first><last>Roventini</last></author>
-      <author><first>Antonio</first><last>Zampolli</last></author>
+      <author id="antonio-zampolli"><first>Antonio</first><last>Zampolli</last></author>
       <url hash="42bd0a70">A00-2006</url>
       <bibkey>alonge-etal-2000-encoding</bibkey>
     </paper>
     <paper id="7">
       <title>Noun Phrase Recognition by System Combination</title>
-      <author><first>Erik F.</first><last>Tjong Kim Sang</last></author>
+      <author id="erik-tjong-kim-sang"><first>Erik F.</first><last>Tjong Kim Sang</last></author>
       <url hash="6d2be3e8">A00-2007</url>
       <bibkey>tjong-kim-sang-2000-noun</bibkey>
     </paper>
     <paper id="8">
       <title>The <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et tagset for frame-semantic and syntactic coding of predicate-argument structure</title>
       <author><first>Christopher</first><last>Johnson</last></author>
-      <author><first>Charles J.</first><last>Fillmore</last></author>
+      <author id="charles-j-fillmore"><first>Charles J.</first><last>Fillmore</last></author>
       <url hash="50142ad7">A00-2008</url>
       <bibkey>johnson-fillmore-2000-framenet</bibkey>
     </paper>
@@ -542,7 +542,7 @@
     </paper>
     <paper id="13">
       <title>Morphological Tagging: Data vs. Dictionaries</title>
-      <author><first>Jan</first><last>Hajic</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajic</last></author>
       <url hash="d219b16d">A00-2013</url>
       <bibkey>hajic-2000-morphological</bibkey>
     </paper>
@@ -561,7 +561,7 @@
       <author><first>Takehito</first><last>Utsuro</last></author>
       <author><first>Shigeyuki</first><last>Nishiokayama</last></author>
       <author><first>Masakazu</first><last>Fujio</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <url hash="d1218f38">A00-2015</url>
       <bibkey>utsuro-etal-2000-analyzing</bibkey>
     </paper>
@@ -586,7 +586,7 @@
     </paper>
     <paper id="19">
       <title>An Unsupervised Method for Detecting Grammatical Errors</title>
-      <author><first>Martin</first><last>Chodorow</last></author>
+      <author id="martin-chodorow"><first>Martin</first><last>Chodorow</last></author>
       <author><first>Claudia</first><last>Leacock</last></author>
       <url hash="c7d09535">A00-2019</url>
       <bibkey>chodorow-leacock-2000-unsupervised</bibkey>
@@ -607,56 +607,56 @@
     <paper id="22">
       <title>Ambiguity Packing in Constraint-based Parsing Practical Results</title>
       <author><first>Stephan</first><last>Oepen</last></author>
-      <author><first>John</first><last>Carroll</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
       <url hash="993806c8">A00-2022</url>
       <bibkey>oepen-carroll-2000-ambiguity</bibkey>
     </paper>
     <paper id="23">
       <title>Forest-Based Statistical Sentence Generation</title>
-      <author><first>Irene</first><last>Langkilde</last></author>
+      <author id="irene-langkilde"><first>Irene</first><last>Langkilde</last></author>
       <url hash="a9935377">A00-2023</url>
       <bibkey>langkilde-2000-forest</bibkey>
     </paper>
     <paper id="24">
       <title>Cut and Paste Based Text Summarization</title>
-      <author><first>Hongyan</first><last>Jing</last></author>
-      <author><first>Kathleen R.</first><last>McKeown</last></author>
+      <author id="hongyan-jing"><first>Hongyan</first><last>Jing</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen R.</first><last>McKeown</last></author>
       <url hash="45d70a5b">A00-2024</url>
       <bibkey>jing-mckeown-2000-cut</bibkey>
     </paper>
     <paper id="25">
       <title>Minimizing Word Error Rate in Textual Summaries of Spoken Language</title>
       <author><first>Klaus</first><last>Zechner</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <url hash="686e8e93">A00-2025</url>
       <bibkey>zechner-waibel-2000-minimizing</bibkey>
     </paper>
     <paper id="26">
       <title>Trainable Methods for Surface Natural Language Generation</title>
-      <author><first>Adwait</first><last>Ratnaparkhi</last></author>
+      <author id="adwait-ratnaparkhi"><first>Adwait</first><last>Ratnaparkhi</last></author>
       <url hash="2ef067b5">A00-2026</url>
       <bibkey>ratnaparkhi-2000-trainable</bibkey>
     </paper>
     <paper id="27">
       <title>Evaluating Automatic Dialogue Strategy Adaptation for a Spoken Dialogue System</title>
-      <author><first>Jennifer</first><last>Chu-Carroll</last></author>
+      <author id="jennifer-chu-carroll"><first>Jennifer</first><last>Chu-Carroll</last></author>
       <url hash="90d87e62">A00-2027</url>
       <bibkey>chu-carroll-2000-evaluating</bibkey>
     </paper>
     <paper id="28">
       <title>Learning to Predict Problematic Situations in a Spoken Dialogue System: Experiments with <fixed-case>H</fixed-case>ow <fixed-case>M</fixed-case>ay <fixed-case>I</fixed-case> <fixed-case>H</fixed-case>elp <fixed-case>Y</fixed-case>ou?</title>
-      <author><first>Marilyn</first><last>Walker</last></author>
-      <author><first>Irene</first><last>Langkilde</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
+      <author id="irene-langkilde"><first>Irene</first><last>Langkilde</last></author>
       <author><first>Jerry</first><last>Wright</last></author>
-      <author><first>Allen</first><last>Gorin</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="allen-l-gorin"><first>Allen</first><last>Gorin</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <url hash="f8a14649">A00-2028</url>
       <bibkey>walker-etal-2000-learning</bibkey>
     </paper>
     <paper id="29">
       <title>Predicting Automatic Speech Recognition Performance Using Prosodic Cues</title>
-      <author><first>Diane J.</first><last>Litman</last></author>
-      <author><first>Julia B.</first><last>Hirschberg</last></author>
+      <author id="diane-litman"><first>Diane J.</first><last>Litman</last></author>
+      <author id="julia-hirschberg"><first>Julia B.</first><last>Hirschberg</last></author>
       <author><first>Marc</first><last>Swerts</last></author>
       <url hash="9843e5ed">A00-2029</url>
       <bibkey>litman-etal-2000-predicting</bibkey>
@@ -664,9 +664,9 @@
     <paper id="30">
       <title>A Novel Use of Statistical Parsing to Extract Information from Text</title>
       <author><first>Scott</first><last>Miller</last></author>
-      <author><first>Heidi</first><last>Fox</last></author>
-      <author><first>Lance</first><last>Ramshaw</last></author>
-      <author><first>Ralph</first><last>Weischedel</last></author>
+      <author id="heidi-fox"><first>Heidi</first><last>Fox</last></author>
+      <author id="lance-ramshaw"><first>Lance</first><last>Ramshaw</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
       <url hash="80bca6f4">A00-2030</url>
       <bibkey>miller-etal-2000-novel</bibkey>
     </paper>
@@ -679,20 +679,20 @@
     </paper>
     <paper id="32">
       <title>Mostly-Unsupervised Statistical Segmentation of <fixed-case>J</fixed-case>apanese: Applications to Kanji</title>
-      <author><first>Rie Kubota</first><last>Ando</last></author>
+      <author id="rie-johnson"><first>Rie Kubota</first><last>Ando</last></author>
       <author><first>Lillian</first><last>Lee</last></author>
       <url hash="060cf657">A00-2032</url>
       <bibkey>ando-lee-2000-mostly</bibkey>
     </paper>
     <paper id="33">
       <title>Removing Left Recursion from Context-Free Grammars</title>
-      <author><first>Robert C.</first><last>Moore</last></author>
+      <author id="robert-c-moore"><first>Robert C.</first><last>Moore</last></author>
       <url hash="75fe2993">A00-2033</url>
       <bibkey>moore-2000-removing</bibkey>
     </paper>
     <paper id="34">
       <title>Using Semantic Preferences to Identify Verbal Participation in Role Switching Alternations</title>
-      <author><first>Diana</first><last>McCarthy</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
       <url hash="0839406a">A00-2034</url>
       <bibkey>mccarthy-2000-using</bibkey>
     </paper>
@@ -712,7 +712,7 @@
     <paper id="37">
       <title>Acknowledgments in Human-Computer Interaction</title>
       <author><first>Karen</first><last>Ward</last></author>
-      <author><first>Peter A.</first><last>Heeman</last></author>
+      <author id="peter-a-heeman"><first>Peter A.</first><last>Heeman</last></author>
       <url hash="c000280e">A00-2037</url>
       <bibkey>ward-heeman-2000-acknowledgments</bibkey>
     </paper>
@@ -736,7 +736,7 @@
     </paper>
     <paper id="41">
       <title>A Framework for Robust Semantic Interpretation Learning</title>
-      <author><first>Carolyn P.</first><last>Rose</last></author>
+      <author id="carolyn-rose"><first>Carolyn P.</first><last>Rose</last></author>
       <url hash="a4564963">A00-2041</url>
       <bibkey>rose-2000-framework</bibkey>
     </paper>
@@ -792,8 +792,8 @@
     </paper>
     <paper id="5">
       <title>Corpus-Based Syntactic Error Detection Using Syntactic Patterns</title>
-      <author><first>Koldo</first><last>Gojenola</last></author>
-      <author><first>Maite</first><last>Oronoz</last></author>
+      <author id="koldo-gojenola"><first>Koldo</first><last>Gojenola</last></author>
+      <author id="maite-oronoz"><first>Maite</first><last>Oronoz</last></author>
       <url hash="50f21840">A00-3005</url>
       <bibkey>gojenola-oronoz-2000-corpus</bibkey>
     </paper>
diff --git a/data/xml/A83.xml b/data/xml/A83.xml
index 68e44a6c4b..1c12e24d5c 100644
--- a/data/xml/A83.xml
+++ b/data/xml/A83.xml
@@ -15,7 +15,7 @@
     </frontmatter>
     <paper id="1">
       <title>Domain-Independent Natural Language Interfaces: Session Introduction</title>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
       <doi>10.3115/974194.974196</doi>
       <pages>1–2</pages>
       <url hash="c2600950">A83-1001</url>
@@ -58,7 +58,7 @@
     </paper>
     <paper id="6">
       <title><fixed-case>TEAM</fixed-case>: A Transportable Natural-Language Interface System</title>
-      <author><first>Barbara J.</first><last>Grosz</last></author>
+      <author id="barbara-j-grosz"><first>Barbara J.</first><last>Grosz</last></author>
       <doi>10.3115/974194.974201</doi>
       <pages>39–45</pages>
       <url hash="32210052">A83-1006</url>
@@ -84,7 +84,7 @@
     </paper>
     <paper id="9">
       <title>Distinguishing Fact From Opinion and Events From Meta-Events</title>
-      <author><first>Christine A.</first><last>Montgomery</last></author>
+      <author id="christine-a-montgomery"><first>Christine A.</first><last>Montgomery</last></author>
       <doi>10.3115/974194.974205</doi>
       <pages>55–61</pages>
       <url hash="0fba128a">A83-1009</url>
@@ -92,8 +92,8 @@
     </paper>
     <paper id="10">
       <title>Parsing With Logical Variables</title>
-      <author><first>Timothy W.</first><last>Finin</last></author>
-      <author><first>Martha Stone</first><last>Palmer</last></author>
+      <author id="tim-finin"><first>Timothy W.</first><last>Finin</last></author>
+      <author id="martha-palmer"><first>Martha Stone</first><last>Palmer</last></author>
       <doi>10.3115/974194.974206</doi>
       <pages>62–68</pages>
       <url hash="35ac447b">A83-1010</url>
@@ -101,7 +101,7 @@
     </paper>
     <paper id="11">
       <title><fixed-case>EXPLORER</fixed-case>: A Natural Language Processing System for Oil Exploration</title>
-      <author><first>Wendy G.</first><last>Lehnert</last></author>
+      <author id="wendy-lehnert"><first>Wendy G.</first><last>Lehnert</last></author>
       <author><first>Steven P.</first><last>Shwartz</last></author>
       <doi>10.3115/974194.974207</doi>
       <pages>69–72</pages>
@@ -128,7 +128,7 @@
     </paper>
     <paper id="14">
       <title>Handling Ill-Formed Input: Session Introduction</title>
-      <author><first>Ralph M.</first><last>Weischedel</last></author>
+      <author id="ralph-weischedel"><first>Ralph M.</first><last>Weischedel</last></author>
       <doi>10.3115/974194.974211</doi>
       <pages>89–92</pages>
       <url hash="c129ca99">A83-1014</url>
@@ -136,8 +136,8 @@
     </paper>
     <paper id="15">
       <title>The Fitted Parse: 100% Parsing Capability in a Syntactic Grammar of <fixed-case>E</fixed-case>nglish</title>
-      <author><first>Karen</first><last>Jensen</last></author>
-      <author><first>George E.</first><last>Heidorn</last></author>
+      <author id="karen-jensen"><first>Karen</first><last>Jensen</last></author>
+      <author id="george-e-heidorn"><first>George E.</first><last>Heidorn</last></author>
       <doi>10.3115/974194.974212</doi>
       <pages>93–98</pages>
       <url hash="0cc37a74">A83-1015</url>
@@ -172,7 +172,7 @@
     </paper>
     <paper id="19">
       <title>Specialized Information Extraction: Automatic Chemical Reaction Coding From <fixed-case>E</fixed-case>nglish Descriptions</title>
-      <author><first>Larry H.</first><last>Reeker</last></author>
+      <author id="larry-h-reeker"><first>Larry H.</first><last>Reeker</last></author>
       <author><first>Elena M.</first><last>Chmora</last></author>
       <author><first>Paul E.</first><last>Blower</last></author>
       <doi>10.3115/974194.974217</doi>
@@ -190,9 +190,9 @@
     </paper>
     <paper id="21">
       <title>“Expertness” from Structured Text? <fixed-case>RECONSIDER</fixed-case>: A Diagnostic Prompting Program</title>
-      <author><first>Mark S.</first><last>Tuttle</last></author>
-      <author><first>David D.</first><last>Sherertz</last></author>
-      <author><first>Marsden S.</first><last>Blois</last></author>
+      <author id="mark-s-tuttle"><first>Mark S.</first><last>Tuttle</last></author>
+      <author id="david-d-sherertz"><first>David D.</first><last>Sherertz</last></author>
+      <author id="marsden-s-blois"><first>Marsden S.</first><last>Blois</last></author>
       <author><first>Stuart</first><last>Nelson</last></author>
       <doi>10.3115/974194.974219</doi>
       <pages>124–131</pages>
@@ -218,7 +218,7 @@
     </paper>
     <paper id="24">
       <title>Automatic Representation of the Semantic Relationships Corresponding to a <fixed-case>F</fixed-case>rench Surface Expression</title>
-      <author><first>Gian Piero</first><last>Zarri</last></author>
+      <author id="gian-piero-zarri"><first>Gian Piero</first><last>Zarri</last></author>
       <doi>10.3115/974194.974222</doi>
       <pages>143–147</pages>
       <url hash="4ccd428f">A83-1024</url>
@@ -234,7 +234,7 @@
     </paper>
     <paper id="26">
       <title>Investigating the Possibility of a Microprocessor-Based Machine Translatton System</title>
-      <author><first>Harold L.</first><last>Somers</last></author>
+      <author id="harold-somers"><first>Harold L.</first><last>Somers</last></author>
       <doi>10.3115/974194.974225</doi>
       <pages>149–155</pages>
       <url hash="9bb8bc50">A83-1026</url>
@@ -242,7 +242,7 @@
     </paper>
     <paper id="27">
       <title>An Application of <fixed-case>M</fixed-case>ontague Grammar to <fixed-case>E</fixed-case>nglish-<fixed-case>J</fixed-case>apanese Machine Translation</title>
-      <author><first>Toyoaki</first><last>Nishida</last></author>
+      <author id="toyoaki-nishida"><first>Toyoaki</first><last>Nishida</last></author>
       <author><first>Shuji</first><last>Doshita</last></author>
       <doi>10.3115/974194.974226</doi>
       <pages>156–165</pages>
@@ -259,7 +259,7 @@
     </paper>
     <paper id="29">
       <title>COMPUTER-ASSISTED TRANSLATION SYSTEMS: The Standard Design and A Multi-level Design</title>
-      <author><first>Alan K.</first><last>Melby</last></author>
+      <author id="alan-k-melby"><first>Alan K.</first><last>Melby</last></author>
       <doi>10.3115/974194.974228</doi>
       <pages>174–177</pages>
       <url hash="f57eb64c">A83-1029</url>
diff --git a/data/xml/A88.xml b/data/xml/A88.xml
index 04339c6c30..eda83db38d 100644
--- a/data/xml/A88.xml
+++ b/data/xml/A88.xml
@@ -15,7 +15,7 @@
     </frontmatter>
     <paper id="1">
       <title>The Multimedia Articulation of Answers in a Natural Language Database Query System</title>
-      <author><first>Susan E.</first><last>Brennan</last></author>
+      <author id="susan-e-brennan"><first>Susan E.</first><last>Brennan</last></author>
       <doi>10.3115/974235.974237</doi>
       <pages>1–8</pages>
       <url hash="a91d68a4">A88-1001</url>
@@ -33,8 +33,8 @@
     </paper>
     <paper id="3">
       <title>An Architecture for Anaphora Resolution</title>
-      <author><first>Elaine</first><last>Rich</last></author>
-      <author><first>Susann</first><last>LuperFoy</last></author>
+      <author id="elaine-rich"><first>Elaine</first><last>Rich</last></author>
+      <author id="susann-luperfoy"><first>Susann</first><last>LuperFoy</last></author>
       <doi>10.3115/974235.974239</doi>
       <pages>18–24</pages>
       <url hash="3bb7c3e4">A88-1003</url>
@@ -42,7 +42,7 @@
     </paper>
     <paper id="4">
       <title>The <fixed-case>SEMSYN</fixed-case> Generation System: Ingredients, Applications, Prospects</title>
-      <author><first>Dietmar</first><last>Rosner</last></author>
+      <author id="dietmar-rosner"><first>Dietmar</first><last>Rosner</last></author>
       <doi>10.3115/974235.974241</doi>
       <pages>25–32</pages>
       <url hash="1181f269">A88-1004</url>
@@ -58,8 +58,8 @@
     </paper>
     <paper id="6">
       <title>From Water to Wine: Generating Natural Language Text From Today’s Applications Programs</title>
-      <author><first>David D.</first><last>McDonald</last></author>
-      <author><first>Marie W.</first><last>Meteer</last></author>
+      <author id="david-d-mcdonald"><first>David D.</first><last>McDonald</last></author>
+      <author id="marie-meteer"><first>Marie W.</first><last>Meteer</last></author>
       <doi>10.3115/974235.974243</doi>
       <pages>41–48</pages>
       <url hash="1b2336af">A88-1006</url>
@@ -67,8 +67,8 @@
     </paper>
     <paper id="7">
       <title>Improved Portability and Parsing Through Interactive Acquisition of Semantic Information</title>
-      <author><first>Francois-Michel</first><last>Lang</last></author>
-      <author><first>Lynette</first><last>Hirschman</last></author>
+      <author id="francois-michel-lang"><first>Francois-Michel</first><last>Lang</last></author>
+      <author id="lynette-hirschman"><first>Lynette</first><last>Hirschman</last></author>
       <doi>10.3115/974235.974245</doi>
       <pages>49–57</pages>
       <url hash="f20949fa">A88-1007</url>
@@ -84,7 +84,7 @@
     </paper>
     <paper id="9">
       <title>Responding to Semantically Ill-Formed Input</title>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <author><first>Ping</first><last>Peng</last></author>
       <doi>10.3115/974235.974247</doi>
       <pages>66–70</pages>
@@ -93,8 +93,8 @@
     </paper>
     <paper id="10">
       <title>Evaluation of a Parallel Chart Parser</title>
-      <author><first>Ralph</first><last>Grishman</last></author>
-      <author><first>Mahesh</first><last>Chitrao</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
+      <author id="mahesh-v-chitrao"><first>Mahesh</first><last>Chitrao</last></author>
       <doi>10.3115/974235.974248</doi>
       <pages>71–76</pages>
       <url hash="55422c9c">A88-1010</url>
@@ -103,7 +103,7 @@
     <paper id="11">
       <title>Triphone Analysis: A Combined Method for the Correction of Orthographical and Typographical Errors.</title>
       <author><first>Brigitte</first><last>van Berkel</last></author>
-      <author><first>Koenraad</first><last>De Smedt</last></author>
+      <author id="koenraad-de-smedt"><first>Koenraad</first><last>De Smedt</last></author>
       <doi>10.3115/974235.974250</doi>
       <pages>77–83</pages>
       <url hash="ea3eaeae">A88-1011</url>
@@ -111,7 +111,7 @@
     </paper>
     <paper id="12">
       <title>Creating and Querying Lexical Data Bases</title>
-      <author><first>Mary S.</first><last>Neff</last></author>
+      <author id="mary-s-neff"><first>Mary S.</first><last>Neff</last></author>
       <author><first>Roy J.</first><last>Byrd</last></author>
       <author><first>Omneya A.</first><last>Rizk</last></author>
       <doi>10.3115/974235.974251</doi>
@@ -131,8 +131,8 @@
       <title>Building a Large Thesaurus for Information Retrieval</title>
       <author><first>Edward A.</first><last>Fox</last></author>
       <author><first>J. Terry</first><last>Nutter</last></author>
-      <author><first>Thomas</first><last>Ahlswede</last></author>
-      <author><first>Martha</first><last>Evens</last></author>
+      <author id="thomas-ahlswede"><first>Thomas</first><last>Ahlswede</last></author>
+      <author id="martha-evens"><first>Martha</first><last>Evens</last></author>
       <author><first>Judith</first><last>Markowitz</last></author>
       <doi>10.3115/974235.974253</doi>
       <pages>101–108</pages>
@@ -141,9 +141,9 @@
     </paper>
     <paper id="15">
       <title>Application-Specific Issues in Natural Language Interfacer Development for a Diagnostic Expert System</title>
-      <author><first>Karen L.</first><last>Ryan</last></author>
+      <author id="karen-l-ryan"><first>Karen L.</first><last>Ryan</last></author>
       <author><first>Rebecca</first><last>Root</last></author>
-      <author><first>Duane</first><last>Olawsky</last></author>
+      <author id="duane-e-olawsky"><first>Duane</first><last>Olawsky</last></author>
       <doi>10.3115/974235.974255</doi>
       <pages>109–114</pages>
       <url hash="f5bd74cd">A88-1015</url>
@@ -152,7 +152,7 @@
     <paper id="16">
       <title>The <fixed-case>MULTIVOC</fixed-case> Text-to-Speech System</title>
       <author><first>Olivier M.</first><last>Emorine</last></author>
-      <author><first>Pierre M.</first><last>Martin</last></author>
+      <author id="m-patrick-martin"><first>Pierre M.</first><last>Martin</last></author>
       <doi>10.3115/974235.974256</doi>
       <pages>115–120</pages>
       <url hash="1ed287a9">A88-1016</url>
@@ -168,8 +168,8 @@
     </paper>
     <paper id="18">
       <title>Integrating Top-Down and Bottom-Up Strategies in a Text Processing System</title>
-      <author><first>Lisa F.</first><last>Rau</last></author>
-      <author><first>Paul S.</first><last>Jacobs</last></author>
+      <author id="lisa-rau"><first>Lisa F.</first><last>Rau</last></author>
+      <author id="paul-s-jacobs"><first>Paul S.</first><last>Jacobs</last></author>
       <doi>10.3115/974235.974259</doi>
       <pages>129–135</pages>
       <url hash="c1e94b0a">A88-1018</url>
@@ -177,7 +177,7 @@
     </paper>
     <paper id="19">
       <title>A Stochastic Parts Program and Noun Phrase Parser for Unrestricted Text</title>
-      <author><first>Kenneth Ward</first><last>Church</last></author>
+      <author id="kenneth-church"><first>Kenneth Ward</first><last>Church</last></author>
       <doi>10.3115/974235.974260</doi>
       <pages>136–143</pages>
       <url hash="e50f82e4">A88-1019</url>
@@ -185,8 +185,8 @@
     </paper>
     <paper id="20">
       <title>A Tool for Investigating the Synonymy Relation in a Sense Disambiguated Thesaurus</title>
-      <author><first>Martin S.</first><last>Chodorow</last></author>
-      <author><first>Yael</first><last>Ravin</last></author>
+      <author id="martin-chodorow"><first>Martin S.</first><last>Chodorow</last></author>
+      <author id="yael-ravin"><first>Yael</first><last>Ravin</last></author>
       <author><first>Howard E.</first><last>Sachar</last></author>
       <doi>10.3115/974235.974261</doi>
       <pages>144–151</pages>
@@ -195,7 +195,7 @@
     </paper>
     <paper id="21">
       <title>Dictionary Text Entries as a Source of Knowledge for Syntactic and Other Disambiguations</title>
-      <author><first>Karen</first><last>Jensen</last></author>
+      <author id="karen-jensen"><first>Karen</first><last>Jensen</last></author>
       <author><first>Jean-Louis</first><last>Binot</last></author>
       <doi>10.3115/974235.974262</doi>
       <pages>152–159</pages>
@@ -204,7 +204,7 @@
     </paper>
     <paper id="22">
       <title><fixed-case>E</fixed-case>urotra Practical Experience With a Multilingual Machine Translation System Under Development</title>
-      <author><first>Giovanni B.</first><last>Varile</last></author>
+      <author id="giovanni-battista-varile"><first>Giovanni B.</first><last>Varile</last></author>
       <author><first>Peter</first><last>Lau</last></author>
       <doi>10.3115/974235.974264</doi>
       <pages>160–167</pages>
@@ -221,7 +221,7 @@
     </paper>
     <paper id="24">
       <title>Natural Language Interfaces: Present and Future</title>
-      <author><first>Norman K.</first><last>Sondheimer</last></author>
+      <author id="norman-k-sondheimer"><first>Norman K.</first><last>Sondheimer</last></author>
       <doi>10.3115/974235.974267</doi>
       <pages>176–177</pages>
       <url hash="226ba9d8">A88-1024</url>
@@ -229,7 +229,7 @@
     </paper>
     <paper id="25">
       <title>Automatically Generating Natural Language Reports in an Office Environment</title>
-      <author><first>Jugal</first><last>Kalita</last></author>
+      <author id="jugal-kalita"><first>Jugal</first><last>Kalita</last></author>
       <author><first>Sunil</first><last>Shende</last></author>
       <doi>10.3115/974235.974269</doi>
       <pages>178–185</pages>
@@ -239,7 +239,7 @@
     <paper id="26">
       <title><fixed-case>LUKE</fixed-case>: An Experiment in the Early Integration of Natural Language Processing</title>
       <author><first>David A.</first><last>Wroblewski</last></author>
-      <author><first>Elaine A.</first><last>Rich</last></author>
+      <author id="elaine-rich"><first>Elaine A.</first><last>Rich</last></author>
       <doi>10.3115/974235.974270</doi>
       <pages>186–194</pages>
       <url hash="780ea545">A88-1026</url>
@@ -248,7 +248,7 @@
     <paper id="27">
       <title>The Experience of Developing a Large-Scale Natural Language Text Processing System: Critique</title>
       <author><first>Stephen D.</first><last>Richardson</last></author>
-      <author><first>Lisa C.</first><last>Braden-Harder</last></author>
+      <author id="lisa-braden-harder"><first>Lisa C.</first><last>Braden-Harder</last></author>
       <doi>10.3115/974235.974271</doi>
       <pages>195–202</pages>
       <url hash="b8a3159a">A88-1027</url>
@@ -256,7 +256,7 @@
     </paper>
     <paper id="28">
       <title>Computational Techniques for Improved Name Search</title>
-      <author><first>Beatrice T.</first><last>Oshika</last></author>
+      <author id="beatrice-oshika"><first>Beatrice T.</first><last>Oshika</last></author>
       <author><first>Filip</first><last>Machi</last></author>
       <author><first>Bruce</first><last>Evans</last></author>
       <author><first>Janet</first><last>Tom</last></author>
@@ -275,7 +275,7 @@
     </paper>
     <paper id="30">
       <title>Finding Clauses in Unrestricted Text by Finitary and Stochastic Methods</title>
-      <author><first>Eva I.</first><last>Ejerhed</last></author>
+      <author id="eva-ejerhed"><first>Eva I.</first><last>Ejerhed</last></author>
       <doi>10.3115/974235.974275</doi>
       <pages>219–227</pages>
       <url hash="894c132e">A88-1030</url>
@@ -291,8 +291,8 @@
     </paper>
     <paper id="32">
       <title>Localizing Expression of Ambiguity</title>
-      <author><first>John</first><last>Bear</last></author>
-      <author><first>Jerry R.</first><last>Hobbs</last></author>
+      <author id="john-bear"><first>John</first><last>Bear</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry R.</first><last>Hobbs</last></author>
       <doi>10.3115/974235.974278</doi>
       <pages>235–242</pages>
       <url hash="9b7f4b71">A88-1032</url>
diff --git a/data/xml/A92.xml b/data/xml/A92.xml
index 82a30c0f03..ec8bacdb5a 100644
--- a/data/xml/A92.xml
+++ b/data/xml/A92.xml
@@ -15,8 +15,8 @@
     </frontmatter>
     <paper id="1">
       <title>Deriving Database Queries from Logical Forms by Abductive Definition Expansion</title>
-      <author><first>Manny</first><last>Rayner</last></author>
-      <author><first>Hiyan</first><last>Alshawi</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
+      <author id="hiyan-alshawi"><first>Hiyan</first><last>Alshawi</last></author>
       <doi>10.3115/974499.974501</doi>
       <pages>1–8</pages>
       <url hash="36cf445c">A92-1001</url>
@@ -24,9 +24,9 @@
     </paper>
     <paper id="2">
       <title>A Dialog Control Algorithm and Its Performance</title>
-      <author><first>Ronnie W.</first><last>Smith</last></author>
+      <author id="ronnie-w-smith"><first>Ronnie W.</first><last>Smith</last></author>
       <author><first>D. Richard</first><last>Hipp</last></author>
-      <author><first>Alan W.</first><last>Biermann</last></author>
+      <author id="alan-w-biermann"><first>Alan W.</first><last>Biermann</last></author>
       <doi>10.3115/974499.974502</doi>
       <pages>9–16</pages>
       <url hash="1429ddb8">A92-1002</url>
@@ -34,8 +34,8 @@
     </paper>
     <paper id="3">
       <title>An Approach to Multilevel Semantics for Applied Systems</title>
-      <author><first>Alberto</first><last>Lavelli</last></author>
-      <author><first>Bernardo</first><last>Magnini</last></author>
+      <author id="alberto-lavelli"><first>Alberto</first><last>Lavelli</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
       <author><first>Carlo</first><last>Strapparava</last></author>
       <doi>10.3115/974499.974503</doi>
       <pages>17–24</pages>
@@ -44,9 +44,9 @@
     </paper>
     <paper id="4">
       <title>A Parser for Real-Time Speech Synthesis of Conversational Texts</title>
-      <author><first>Joan</first><last>Bachenko</last></author>
+      <author id="joan-bachenko"><first>Joan</first><last>Bachenko</last></author>
       <author><first>Jeffrey</first><last>Daugherty</last></author>
-      <author><first>Eileen</first><last>Fitzpatrick</last></author>
+      <author id="eileen-fitzpatrick"><first>Eileen</first><last>Fitzpatrick</last></author>
       <doi>10.3115/974499.974505</doi>
       <pages>25–32</pages>
       <url hash="1d729929">A92-1004</url>
@@ -56,7 +56,7 @@
       <title>Real-time linguistic analysis for continuous speech understanding</title>
       <author><first>Paolo</first><last>Baggia</last></author>
       <author><first>Elisabetta</first><last>Gerbino</last></author>
-      <author><first>Egidio</first><last>Giachin</last></author>
+      <author id="egidio-giachin"><first>Egidio</first><last>Giachin</last></author>
       <author><first>Claudio</first><last>Rullent</last></author>
       <doi>10.3115/974499.974506</doi>
       <pages>33–39</pages>
@@ -65,7 +65,7 @@
     </paper>
     <paper id="6">
       <title>Applied Text Generation</title>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <author><first>Tanya</first><last>Korelsky</last></author>
       <doi>10.3115/974499.974508</doi>
       <pages>40–47</pages>
@@ -74,7 +74,7 @@
     </paper>
     <paper id="7">
       <title>Automatic Generation of Multimodal Weather Reports from Datasets</title>
-      <author><first>Stephan M.</first><last>Kerpedjiev</last></author>
+      <author id="stephan-m-kerpedjiev"><first>Stephan M.</first><last>Kerpedjiev</last></author>
       <doi>10.3115/974499.974509</doi>
       <pages>48–55</pages>
       <url hash="a788cac6">A92-1007</url>
@@ -91,7 +91,7 @@
     <paper id="9">
       <title>Automatic Generation of On-Line Documentation in the <fixed-case>IDAS</fixed-case> Project</title>
       <author><first>Ehud</first><last>Reiter</last></author>
-      <author><first>Chris</first><last>Mellish</last></author>
+      <author id="chris-mellish"><first>Chris</first><last>Mellish</last></author>
       <author><first>John</first><last>Levine</last></author>
       <doi>10.3115/974499.974511</doi>
       <pages>64–71</pages>
@@ -101,7 +101,7 @@
     <paper id="10">
       <title>Integrating Natural Language Components into Graphical Discourse</title>
       <author><first>Stephan</first><last>Dilley</last></author>
-      <author><first>John</first><last>Bateman</last></author>
+      <author id="john-bateman"><first>John</first><last>Bateman</last></author>
       <author><first>Ulrich</first><last>Thiel</last></author>
       <author><first>Anne</first><last>Tissen</last></author>
       <doi>10.3115/974499.974512</doi>
@@ -128,9 +128,9 @@
     </paper>
     <paper id="13">
       <title>Computational Lexicons: the Neat Examples and the Odd Exemplars</title>
-      <author><first>Roberto</first><last>Basili</last></author>
-      <author><first>Maria Teresa</first><last>Pazienza</last></author>
-      <author><first>Paola</first><last>Velardi</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
+      <author id="maria-teresa-pazienza"><first>Maria Teresa</first><last>Pazienza</last></author>
+      <author id="paola-velardi"><first>Paola</first><last>Velardi</last></author>
       <doi>10.3115/974499.974516</doi>
       <pages>96–103</pages>
       <url hash="29def310">A92-1013</url>
@@ -139,9 +139,9 @@
     <paper id="14">
       <title>Automatic Learning for Semantic Collocation</title>
       <author><first>Satoshi</first><last>Sekine</last></author>
-      <author><first>Jeremy J.</first><last>Carroll</last></author>
+      <author id="jeremy-j-carroll"><first>Jeremy J.</first><last>Carroll</last></author>
       <author><first>Sofia</first><last>Ananiadou</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <doi>10.3115/974499.974517</doi>
       <pages>104–110</pages>
       <url hash="fc43e36e">A92-1014</url>
@@ -183,7 +183,7 @@
     </paper>
     <paper id="18">
       <title>A Practical Part-of-Speech Tagger</title>
-      <author><first>Doug</first><last>Cutting</last></author>
+      <author id="douglass-cutting"><first>Doug</first><last>Cutting</last></author>
       <author><first>Julian</first><last>Kupiec</last></author>
       <author><first>Jan</first><last>Pedersen</last></author>
       <author><first>Penelope</first><last>Sibun</last></author>
@@ -203,12 +203,12 @@
     </paper>
     <paper id="20">
       <title>A Corpus-Based Statistical Approach to Automatic Book Indexing</title>
-      <author><first>Jyun-Sheng</first><last>Chang</last></author>
+      <author id="jyun-sheng-chang"><first>Jyun-Sheng</first><last>Chang</last></author>
       <author><first>Tsung-Yih</first><last>Tseng</last></author>
-      <author><first>Sur-Jin</first><last>Ker</last></author>
+      <author id="sue-j-ker"><first>Sur-Jin</first><last>Ker</last></author>
       <author><first>Ying</first><last>Cheng</last></author>
-      <author><first>Huey-Chyun</first><last>Chen</last></author>
-      <author><first>Shun-Der</first><last>Cheng</last></author>
+      <author id="huey-chyun-chen"><first>Huey-Chyun</first><last>Chen</last></author>
+      <author id="shun-der-chen"><first>Shun-Der</first><last>Cheng</last></author>
       <author><first>John S.</first><last>Liu</last></author>
       <doi>10.3115/974499.974525</doi>
       <pages>147–151</pages>
@@ -225,8 +225,8 @@
     </paper>
     <paper id="22">
       <title>Evaluating Parsing Strategies Using Standardized Parse Files</title>
-      <author><first>Ralph</first><last>Grishman</last></author>
-      <author><first>Catherine</first><last>Macleod</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
+      <author id="catherine-macleod"><first>Catherine</first><last>Macleod</last></author>
       <author><first>John</first><last>Sterling</last></author>
       <doi>10.3115/974499.974528</doi>
       <pages>156–161</pages>
@@ -235,8 +235,8 @@
     </paper>
     <paper id="23">
       <title>A Practical Methodology for the Evaluation of Spoken Language Systems</title>
-      <author><first>Sean</first><last>Boisen</last></author>
-      <author><first>Madeleine</first><last>Bates</last></author>
+      <author id="sean-boisen"><first>Sean</first><last>Boisen</last></author>
+      <author id="madeleine-bates"><first>Madeleine</first><last>Bates</last></author>
       <doi>10.3115/974499.974529</doi>
       <pages>162–169</pages>
       <url hash="d0e9f650">A92-1023</url>
@@ -249,7 +249,7 @@
       <author><first>Steven P.</first><last>Weinstein</last></author>
       <author><first>Alison K.</first><last>Huettner</last></author>
       <author><first>Linda M.</first><last>Schmandt</last></author>
-      <author><first>Irene B.</first><last>Nirenburg</last></author>
+      <author id="irene-nirenburg"><first>Irene B.</first><last>Nirenburg</last></author>
       <doi>10.3115/974499.974531</doi>
       <pages>170–177</pages>
       <url hash="d7cb1e82">A92-1024</url>
@@ -257,7 +257,7 @@
     </paper>
     <paper id="25">
       <title>Joining Statistics with <fixed-case>NLP</fixed-case> for Text Categorization</title>
-      <author><first>Paul S.</first><last>Jacobs</last></author>
+      <author id="paul-s-jacobs"><first>Paul S.</first><last>Jacobs</last></author>
       <doi>10.3115/974499.974532</doi>
       <pages>178–185</pages>
       <url hash="a1cd7833">A92-1025</url>
@@ -265,9 +265,9 @@
     </paper>
     <paper id="26">
       <title>Robust Processing of Real-World Natural-Language Texts</title>
-      <author><first>Jerry R.</first><last>Hobbs</last></author>
-      <author><first>Douglas E.</first><last>Appelt</last></author>
-      <author><first>John</first><last>Bear</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry R.</first><last>Hobbs</last></author>
+      <author id="douglas-appelt"><first>Douglas E.</first><last>Appelt</last></author>
+      <author id="john-bear"><first>John</first><last>Bear</last></author>
       <author><first>Mabry</first><last>Tyson</last></author>
       <doi>10.3115/974499.974533</doi>
       <pages>186–192</pages>
@@ -276,7 +276,7 @@
     </paper>
     <paper id="27">
       <title>An Efficient Chart-based Algorithm for Partial-Parsing of Unrestricted Texts</title>
-      <author><first>David D.</first><last>McDonald</last></author>
+      <author id="david-d-mcdonald"><first>David D.</first><last>McDonald</last></author>
       <doi>10.3115/974499.974534</doi>
       <pages>193–200</pages>
       <url hash="f310301d">A92-1027</url>
@@ -293,9 +293,9 @@
     </paper>
     <paper id="29">
       <title>Compound Nouns in a Unification-Based <fixed-case>MT</fixed-case> System</title>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
-      <author><first>Katharina</first><last>Boesefeldt</last></author>
-      <author><first>Graham</first><last>Russell</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="katharina-boesefeldt"><first>Katharina</first><last>Boesefeldt</last></author>
+      <author id="graham-russell"><first>Graham</first><last>Russell</last></author>
       <doi>10.3115/974499.974537</doi>
       <pages>209–215</pages>
       <url hash="a3f7f03f">A92-1029</url>
@@ -303,9 +303,9 @@
     </paper>
     <paper id="30">
       <title><fixed-case>XTAG</fixed-case> - A Graphical Workbench for Developing <fixed-case>T</fixed-case>ree-<fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars</title>
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <author><first>Yves</first><last>Schabes</last></author>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
       <doi>10.3115/974499.974538</doi>
       <pages>223–230</pages>
       <url hash="cbe47d5d">A92-1030</url>
@@ -351,7 +351,7 @@
     <paper id="35">
       <title>Practical World Modeling for <fixed-case>NLP</fixed-case> Applications</title>
       <author><first>Lynn</first><last>Carlson</last></author>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <doi>10.3115/974499.974544</doi>
       <pages>235–236</pages>
       <url hash="5ebda3dd">A92-1035</url>
@@ -359,7 +359,7 @@
     </paper>
     <paper id="36">
       <title>Portable Natural Language Generation using <fixed-case>SPOKESMAN</fixed-case></title>
-      <author><first>Marie</first><last>Meteer</last></author>
+      <author id="marie-meteer"><first>Marie</first><last>Meteer</last></author>
       <doi>10.3115/974499.974545</doi>
       <pages>237–238</pages>
       <url hash="db9b3b2b">A92-1036</url>
@@ -368,8 +368,8 @@
     <paper id="37">
       <title>A Method of Automatic Hypertext Construction from an Encyclopedic Dictionary of a Specific Field</title>
       <author><first>Sadao</first><last>Kurohashi</last></author>
-      <author><first>Makoto</first><last>Nagao</last></author>
-      <author><first>Satoshi</first><last>Sato</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
+      <author id="satoshi-sato"><first>Satoshi</first><last>Sato</last></author>
       <author><first>Masahiko</first><last>Murakami</last></author>
       <doi>10.3115/974499.974546</doi>
       <pages>239–240</pages>
@@ -378,10 +378,10 @@
     </paper>
     <paper id="38">
       <title>Datenbank-<fixed-case>DIALOG</fixed-case> and the Relevance of Habitability</title>
-      <author><first>Harald</first><last>Trost</last></author>
+      <author id="harald-trost"><first>Harald</first><last>Trost</last></author>
       <author><first>Wolfgang</first><last>Heinz</last></author>
       <author><first>Johannes</first><last>Matiasek</last></author>
-      <author><first>Ernst</first><last>Buchberger</last></author>
+      <author id="ernst-buchberger"><first>Ernst</first><last>Buchberger</last></author>
       <doi>10.3115/974499.974547</doi>
       <pages>241–242</pages>
       <url hash="f8788aea">A92-1038</url>
@@ -398,11 +398,11 @@
     <paper id="40">
       <title>Dialogue Management for Telephone Information Systems</title>
       <author><first>Scott</first><last>McGlashan</last></author>
-      <author><first>Norman</first><last>Fraser</last></author>
+      <author id="norman-m-fraser"><first>Norman</first><last>Fraser</last></author>
       <author><first>Nigel</first><last>Gilbert</last></author>
       <author><first>Eric</first><last>Bilange</last></author>
       <author><first>Paul</first><last>Heisterkamp</last></author>
-      <author><first>Nick</first><last>Youd</last></author>
+      <author id="nick-j-youd"><first>Nick</first><last>Youd</last></author>
       <doi>10.3115/974499.974549</doi>
       <pages>245–246</pages>
       <url hash="03773252">A92-1040</url>
@@ -436,12 +436,12 @@
     <paper id="44">
       <title><fixed-case>SEISD</fixed-case>: An environment for extraction of Semantic Information from on-line dictionaries</title>
       <author><first>Alicia</first><last>Ageno</last></author>
-      <author><first>Irene</first><last>Castellon</last></author>
+      <author id="irene-castellon"><first>Irene</first><last>Castellon</last></author>
       <author id="m-antonia-marti"><first>M. A.</first><last>Marti</last></author>
-      <author><first>German</first><last>Rigau</last></author>
-      <author><first>Francesc</first><last>Ribas</last></author>
-      <author><first>Horacio</first><last>Rodriguez</last></author>
-      <author><first>Mariona</first><last>Taule</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
+      <author id="francesc-ribas"><first>Francesc</first><last>Ribas</last></author>
+      <author id="horacio-rodriguez"><first>Horacio</first><last>Rodriguez</last></author>
+      <author id="mariona-taule"><first>Mariona</first><last>Taule</last></author>
       <author><first>Felisa</first><last>Verdejo</last></author>
       <doi>10.3115/974499.974553</doi>
       <pages>253–254</pages>
@@ -450,7 +450,7 @@
     </paper>
     <paper id="45">
       <title>Multi-Purpose Development and Operation Environments for Natural Language Applications</title>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <author><first>Peter</first><last>Shell</last></author>
       <author><first>Ariel</first><last>Cohen</last></author>
       <author><first>Peter</first><last>Cousseau</last></author>
@@ -471,7 +471,7 @@
     </paper>
     <paper id="47">
       <title>Lexical Processing in the <fixed-case>CLARE</fixed-case> System</title>
-      <author><first>David M.</first><last>Carter</last></author>
+      <author id="david-carter"><first>David M.</first><last>Carter</last></author>
       <doi>10.3115/974499.974556</doi>
       <pages>259–260</pages>
       <url hash="02c33d5e">A92-1047</url>
diff --git a/data/xml/A94.xml b/data/xml/A94.xml
index 78e098437b..1add3e8b10 100644
--- a/data/xml/A94.xml
+++ b/data/xml/A94.xml
@@ -24,7 +24,7 @@
     </paper>
     <paper id="2">
       <title>Practical Issues in Automatic Documentation Generation</title>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <author><first>Karen</first><last>Kukich</last></author>
       <author><first>James</first><last>Shaw</last></author>
       <doi>10.3115/974358.974361</doi>
@@ -51,7 +51,7 @@
     </paper>
     <paper id="5">
       <title>Machine Translation of Sentences with Fixed Expressions</title>
-      <author><first>Naoto</first><last>Katoh</last></author>
+      <author id="naoto-kato"><first>Naoto</first><last>Katoh</last></author>
       <author><first>Teruaki</first><last>Aizawa</last></author>
       <doi>10.3115/974358.974366</doi>
       <pages>28–33</pages>
@@ -61,7 +61,7 @@
     <paper id="6">
       <title><fixed-case>T</fixed-case>ermight: Identifying and Translating Technical Terminology</title>
       <author><first>Ido</first><last>Dagan</last></author>
-      <author><first>Ken</first><last>Church</last></author>
+      <author id="kenneth-church"><first>Ken</first><last>Church</last></author>
       <doi>10.3115/974358.974367</doi>
       <pages>34–40</pages>
       <url hash="6c34def2">A94-1006</url>
@@ -95,7 +95,7 @@
     </paper>
     <paper id="10">
       <title>Improving Language Models by Clustering Training Sentences</title>
-      <author><first>David</first><last>Carter</last></author>
+      <author id="david-carter"><first>David</first><last>Carter</last></author>
       <doi>10.3115/974358.974372</doi>
       <pages>59–64</pages>
       <url hash="ed9bd0d1">A94-1010</url>
@@ -112,7 +112,7 @@
     <paper id="12">
       <title>Combination of Symbolic and Statistical Approaches for Grammatical Knowledge Acquisition</title>
       <author><first>Masaki</first><last>Kiyono</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <doi>10.3115/974358.974375</doi>
       <pages>72–77</pages>
       <url hash="1c9d482a">A94-1012</url>
@@ -120,8 +120,8 @@
     </paper>
     <paper id="13">
       <title>Adaptive Sentence Boundary Disambiguation</title>
-      <author><first>David D.</first><last>Palmer</last></author>
-      <author><first>Marti A.</first><last>Hearst</last></author>
+      <author id="david-d-palmer"><first>David D.</first><last>Palmer</last></author>
+      <author id="marti-a-hearst"><first>Marti A.</first><last>Hearst</last></author>
       <doi>10.3115/974358.974376</doi>
       <pages>78–83</pages>
       <url hash="c7d36369">A94-1013</url>
@@ -147,8 +147,8 @@
     </paper>
     <paper id="16">
       <title>Three Heads are Better than One</title>
-      <author><first>Robert</first><last>Frederking</last></author>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="robert-frederking"><first>Robert</first><last>Frederking</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <doi>10.3115/974358.974380</doi>
       <pages>95–100</pages>
       <url hash="bb35223d">A94-1016</url>
@@ -157,7 +157,7 @@
     <paper id="17">
       <title>Real-Time Spoken Language Translation Using Associative Processors</title>
       <author><first>Kozo</first><last>Oi</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Osamu</first><last>Furuse</last></author>
       <author><first>Hitoshi</first><last>Iida</last></author>
       <author><first>Tetsuya</first><last>Higuchi</last></author>
@@ -185,7 +185,7 @@
     <paper id="20">
       <title>Resolving Anaphora in a Portable Natural Language Front End to Databases</title>
       <author><first>Flavia A.</first><last>Barros</last></author>
-      <author><first>Anne</first><last>DeRoeck</last></author>
+      <author id="anne-de-roeck"><first>Anne</first><last>DeRoeck</last></author>
       <doi>10.3115/974358.974386</doi>
       <pages>119–124</pages>
       <url hash="a56c16a1">A94-1020</url>
@@ -193,8 +193,8 @@
     </paper>
     <paper id="21">
       <title>Upholding the Maxim of Relevance during Patient-Centered Activities</title>
-      <author><first>Abigail S.</first><last>Gertner</last></author>
-      <author><first>Bonnie L.</first><last>Webber</last></author>
+      <author id="abigail-s-gertner"><first>Abigail S.</first><last>Gertner</last></author>
+      <author id="bonnie-webber"><first>Bonnie L.</first><last>Webber</last></author>
       <author><first>John R.</first><last>Clarke</last></author>
       <doi>10.3115/974358.974387</doi>
       <pages>125–131</pages>
@@ -203,10 +203,10 @@
     </paper>
     <paper id="22">
       <title>The Delphi Natural Language Understanding System</title>
-      <author><first>Madeleine</first><last>Bates</last></author>
-      <author><first>Robert</first><last>Bobrow</last></author>
-      <author><first>Robert</first><last>Ingria</last></author>
-      <author><first>David</first><last>Stallard</last></author>
+      <author id="madeleine-bates"><first>Madeleine</first><last>Bates</last></author>
+      <author id="robert-bobrow"><first>Robert</first><last>Bobrow</last></author>
+      <author id="robert-ingria"><first>Robert</first><last>Ingria</last></author>
+      <author id="david-stallard"><first>David</first><last>Stallard</last></author>
       <doi>10.3115/974358.974388</doi>
       <pages>132–137</pages>
       <url hash="7852583c">A94-1022</url>
@@ -233,7 +233,7 @@
     </paper>
     <paper id="25">
       <title>A robust category guesser for <fixed-case>D</fixed-case>utch medical language</title>
-      <author><first>Peter</first><last>Spyns</last></author>
+      <author id="peter-spyns"><first>Peter</first><last>Spyns</last></author>
       <doi>10.3115/974358.974392</doi>
       <pages>150–155</pages>
       <url hash="613c114c">A94-1025</url>
@@ -258,7 +258,7 @@
     </paper>
     <paper id="28">
       <title>Robust Text Processing in Automated Information Retrieval</title>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
       <doi>10.3115/974358.974396</doi>
       <pages>168–173</pages>
       <url hash="2ac5680f">A94-1028</url>
@@ -266,9 +266,9 @@
     </paper>
     <paper id="29">
       <title>Might a semantic lexicon support hypertextual authoring?</title>
-      <author><first>Roberto</first><last>Basili</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
       <author><first>Fabrizio</first><last>Grisoli</last></author>
-      <author><first>Maria Teresa</first><last>Pazienza</last></author>
+      <author id="maria-teresa-pazienza"><first>Maria Teresa</first><last>Pazienza</last></author>
       <doi>10.3115/974358.974397</doi>
       <pages>174–179</pages>
       <url hash="080050ab">A94-1029</url>
@@ -295,7 +295,7 @@
     <paper id="32">
       <title>Automatic Aquisition of Semantic Attributes for User Defined Words m <fixed-case>J</fixed-case>apanese to <fixed-case>E</fixed-case>nglish Machine Translation</title>
       <author><first>Satoru</first><last>Ikehara</last></author>
-      <author><first>Satoshi</first><last>Shirai</last></author>
+      <author id="satoshi-shirai"><first>Satoshi</first><last>Shirai</last></author>
       <author><first>Akio</first><last>Yokoo</last></author>
       <author><first>Francis</first><last>Bond</last></author>
       <author><first>Yoshie</first><last>Omi</last></author>
@@ -335,7 +335,7 @@
     </paper>
     <paper id="36">
       <title>A Practical Evaluation of an Integrated Translation Tool during a Large Scale Localisation Project</title>
-      <author><first>Reinhard</first><last>Schaler</last></author>
+      <author id="reinhard-schaler"><first>Reinhard</first><last>Schaler</last></author>
       <doi>10.3115/974358.974405</doi>
       <pages>192–193</pages>
       <url hash="3dcb2386">A94-1036</url>
@@ -373,7 +373,7 @@
     </paper>
     <paper id="40">
       <title>Multifunction Thesaurus for <fixed-case>R</fixed-case>ussian Word Processing</title>
-      <author><first>Igor A.</first><last>Bolshakov</last></author>
+      <author id="igor-a-bolshakov"><first>Igor A.</first><last>Bolshakov</last></author>
       <doi>10.3115/974358.974409</doi>
       <pages>200–202</pages>
       <url hash="cf8a775a">A94-1040</url>
@@ -381,7 +381,7 @@
     </paper>
     <paper id="41">
       <title>Representing Knowledge for Planning Multisentential Text</title>
-      <author><first>Jose</first><last>Coch</last></author>
+      <author id="jose-coch"><first>Jose</first><last>Coch</last></author>
       <author><first>Raphael</first><last>David</last></author>
       <doi>10.3115/974358.974410</doi>
       <pages>203–204</pages>
@@ -400,7 +400,7 @@
     </paper>
     <paper id="43">
       <title>An Interactive Rewriting Tool for Machine Acceptable Sentences</title>
-      <author><first>Hideki</first><last>Hirakawa</last></author>
+      <author id="hideki-hirakawa"><first>Hideki</first><last>Hirakawa</last></author>
       <author><first>Kouichi</first><last>Nomura</last></author>
       <author><first>Mariko</first><last>Nakamura</last></author>
       <doi>10.3115/974358.974412</doi>
@@ -410,7 +410,7 @@
     </paper>
     <paper id="44">
       <title><fixed-case>TECHDOC</fixed-case>: Multilingual generation of online and offline instructional text</title>
-      <author><first>Dietmar</first><last>Rosner</last></author>
+      <author id="dietmar-rosner"><first>Dietmar</first><last>Rosner</last></author>
       <author><first>Manfred</first><last>Stede</last></author>
       <doi>10.3115/974358.974413</doi>
       <pages>209–210</pages>
@@ -419,7 +419,7 @@
     </paper>
     <paper id="45">
       <title>An Inheritance-based Lexicon for Message Understanding Systems</title>
-      <author><first>Lynne J.</first><last>Cahill</last></author>
+      <author id="lynne-cahill"><first>Lynne J.</first><last>Cahill</last></author>
       <doi>10.3115/974358.974414</doi>
       <pages>211–212</pages>
       <url hash="6bd19666">A94-1045</url>
@@ -427,7 +427,7 @@
     </paper>
     <paper id="46">
       <title>Industrial Applications of Unification Morphology</title>
-      <author><first>Gabor</first><last>Proszeky</last></author>
+      <author id="gabor-proszeky"><first>Gabor</first><last>Proszeky</last></author>
       <doi>10.3115/974358.974415</doi>
       <pages>213–214</pages>
       <url hash="2d2859a0">A94-1046</url>
@@ -435,10 +435,10 @@
     </paper>
     <paper id="47">
       <title>Sublanguage Engineering in the <fixed-case>F</fixed-case>o<fixed-case>G</fixed-case> System</title>
-      <author><first>Richard</first><last>Kittredge</last></author>
-      <author><first>Eli</first><last>Goldberg</last></author>
+      <author id="richard-kittredge"><first>Richard</first><last>Kittredge</last></author>
+      <author id="eli-goldberg"><first>Eli</first><last>Goldberg</last></author>
       <author><first>Myunghee</first><last>Kim</last></author>
-      <author><first>Alain</first><last>Polguere</last></author>
+      <author id="alain-polguere"><first>Alain</first><last>Polguere</last></author>
       <doi>10.3115/974358.974416</doi>
       <pages>215–216</pages>
       <url hash="0e5c150b">A94-1047</url>
diff --git a/data/xml/A97.xml b/data/xml/A97.xml
index 38c38a01d9..bc5b5486bf 100644
--- a/data/xml/A97.xml
+++ b/data/xml/A97.xml
@@ -29,7 +29,7 @@
     <paper id="2">
       <title>Natural Language in Four Spatial Interfaces</title>
       <author><first>Kenneth</first><last>Wauchope</last></author>
-      <author><first>Stephanie</first><last>Everett</last></author>
+      <author id="stephanie-s-everett"><first>Stephanie</first><last>Everett</last></author>
       <author><first>Dennis</first><last>Perzanowski</last></author>
       <author><first>Elaine</first><last>Marsh</last></author>
       <doi>10.3115/974557.974559</doi>
@@ -39,9 +39,9 @@
     </paper>
     <paper id="3">
       <title>High Performance Segmentation of Spontaneous Speech Using Part of Speech and Trigger Word Information</title>
-      <author><first>Marsal</first><last>Gavalda</last></author>
+      <author id="marsal-gavalda"><first>Marsal</first><last>Gavalda</last></author>
       <author><first>Klaus</first><last>Zechner</last></author>
-      <author><first>Gregory</first><last>Aist</last></author>
+      <author id="gregory-aist"><first>Gregory</first><last>Aist</last></author>
       <doi>10.3115/974557.974560</doi>
       <pages>12–15</pages>
       <url hash="84d4e3c2">A97-1003</url>
@@ -50,7 +50,7 @@
     <paper id="4">
       <title>A Maximum Entropy Approach to Identifying Sentence Boundaries</title>
       <author><first>Jeffrey C.</first><last>Reynar</last></author>
-      <author><first>Adwait</first><last>Ratnaparkhi</last></author>
+      <author id="adwait-ratnaparkhi"><first>Adwait</first><last>Ratnaparkhi</last></author>
       <doi>10.3115/974557.974561</doi>
       <pages>16–19</pages>
       <url hash="fcb87b16">A97-1004</url>
@@ -58,10 +58,10 @@
     </paper>
     <paper id="5">
       <title><fixed-case>Q</fixed-case>uick<fixed-case>S</fixed-case>et: Multimodal Interaction for Simulation Set-up and Control</title>
-      <author><first>Philip R.</first><last>Cohen</last></author>
-      <author><first>Michael</first><last>Johnston</last></author>
-      <author><first>David</first><last>McGee</last></author>
-      <author><first>Sharon</first><last>Oviatt</last></author>
+      <author id="philip-r-cohen"><first>Philip R.</first><last>Cohen</last></author>
+      <author id="michael-johnston"><first>Michael</first><last>Johnston</last></author>
+      <author id="david-mcgee"><first>David</first><last>McGee</last></author>
+      <author id="sharon-oviatt"><first>Sharon</first><last>Oviatt</last></author>
       <author><first>Jay</first><last>Pittman</last></author>
       <author><first>Ira</first><last>Smith</last></author>
       <author><first>Liang</first><last>Chen</last></author>
@@ -76,7 +76,7 @@
       <author><first>Stephan</first><last>Busemann</last></author>
       <author><first>Thierry</first><last>Declerck</last></author>
       <author><first>Abdel Kader</first><last>Diagne</last></author>
-      <author><first>Luca</first><last>Dini</last></author>
+      <author id="luca-dini"><first>Luca</first><last>Dini</last></author>
       <author><first>Judith</first><last>Klein</last></author>
       <author><first>Sven</first><last>Schmeier</last></author>
       <doi>10.3115/974557.974563</doi>
@@ -88,7 +88,7 @@
       <title>Insights into the Dialogue Processing of <fixed-case>VERBMOBIL</fixed-case></title>
       <author><first>Jan</first><last>Alexandersson</last></author>
       <author><first>Norbert</first><last>Reithinger</last></author>
-      <author><first>Elisabeth</first><last>Maier</last></author>
+      <author id="elisabeth-maier"><first>Elisabeth</first><last>Maier</last></author>
       <doi>10.3115/974557.974564</doi>
       <pages>33–40</pages>
       <url hash="b576b46d">A97-1007</url>
@@ -96,7 +96,7 @@
     </paper>
     <paper id="8">
       <title>An Evaluation of Strategies for Selective Utterance Verification for Spoken Natural Language Dialog</title>
-      <author><first>Ronnie W.</first><last>Smith</last></author>
+      <author id="ronnie-w-smith"><first>Ronnie W.</first><last>Smith</last></author>
       <doi>10.3115/974557.974565</doi>
       <pages>41–48</pages>
       <url hash="1001bddd">A97-1008</url>
@@ -105,7 +105,7 @@
     <paper id="9">
       <title>Name pronunciation in <fixed-case>G</fixed-case>erman text-to-speech synthesis</title>
       <author><first>Stefanie</first><last>Jannedy</last></author>
-      <author><first>Bernd</first><last>Mobius</last></author>
+      <author id="bernd-mobius"><first>Bernd</first><last>Mobius</last></author>
       <doi>10.3115/974557.974566</doi>
       <pages>49–56</pages>
       <url hash="d044e84a">A97-1009</url>
@@ -123,7 +123,7 @@
     <paper id="11">
       <title>A non-projective dependency parser</title>
       <author><first>Pasi</first><last>Tapanainen</last></author>
-      <author><first>Timo</first><last>Jarvinen</last></author>
+      <author id="timo-jarvinen"><first>Timo</first><last>Jarvinen</last></author>
       <doi>10.3115/974557.974568</doi>
       <pages>64–71</pages>
       <url hash="969cc046">A97-1011</url>
@@ -131,7 +131,7 @@
     </paper>
     <paper id="12">
       <title>Incremental Finite-State Parsing</title>
-      <author><first>Salah</first><last>Ait-Mokhtar</last></author>
+      <author id="salah-ait-mokhtar"><first>Salah</first><last>Ait-Mokhtar</last></author>
       <author><first>Jean-Pierre</first><last>Chanod</last></author>
       <doi>10.3115/974557.974569</doi>
       <pages>72–79</pages>
@@ -141,7 +141,7 @@
     <paper id="13">
       <title>Developing a hybrid <fixed-case>NP</fixed-case> parser</title>
       <author><first>Atro</first><last>Voutilainen</last></author>
-      <author><first>Lluis</first><last>Padro</last></author>
+      <author id="lluis-padro"><first>Lluis</first><last>Padro</last></author>
       <doi>10.3115/974557.974570</doi>
       <pages>80–87</pages>
       <url hash="357c975d">A97-1013</url>
@@ -177,8 +177,8 @@
     </paper>
     <paper id="17">
       <title>Probabilistic and Rule-Based Tagger of an Inflective Language- a Comparison</title>
-      <author><first>Jan</first><last>Hajic</last></author>
-      <author><first>Barbora</first><last>Hladka</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajic</last></author>
+      <author id="barbora-hladka"><first>Barbora</first><last>Hladka</last></author>
       <doi>10.3115/974557.974574</doi>
       <pages>111–118</pages>
       <url hash="80dc747d">A97-1017</url>
@@ -188,7 +188,7 @@
       <title><fixed-case>CS</fixed-case>eg&amp;Tagl.0: A Practical Word Segmenter and <fixed-case>POS</fixed-case> Tagger for <fixed-case>C</fixed-case>hinese Texts</title>
       <author><last>Sun</last><first>Maosong</first></author>
       <author><last>Shen</last><first>Dayang</first></author>
-      <author><last>Huang</last><first>Changning</first></author>
+      <author id="changning-huang"><first>Changning</first><last>Huang</last></author>
       <doi>10.3115/974557.974575</doi>
       <pages>119–126</pages>
       <url hash="82050c57">A97-1018</url>
@@ -207,9 +207,9 @@
       <title>Reading more into Foreign Languages</title>
       <author><first>John</first><last>Nerbonne</last></author>
       <author><first>Lauri</first><last>Karttunen</last></author>
-      <author><first>Elena</first><last>Paskaleva</last></author>
-      <author><first>Gabor</first><last>Proszeky</last></author>
-      <author><first>Tiit</first><last>Roosmaa</last></author>
+      <author id="elena-paskaleva"><first>Elena</first><last>Paskaleva</last></author>
+      <author id="gabor-proszeky"><first>Gabor</first><last>Proszeky</last></author>
+      <author id="tiit-roosmaa"><first>Tiit</first><last>Roosmaa</last></author>
       <doi>10.3115/974557.974577</doi>
       <pages>135–138</pages>
       <url hash="ae9bc22c">A97-1020</url>
@@ -217,7 +217,7 @@
     </paper>
     <paper id="21">
       <title>Large-Scale Acquisition of <fixed-case>LCS</fixed-case>-Based Lexicons for Foreign Language Tutoring</title>
-      <author><first>Bonnie J.</first><last>Dorr</last></author>
+      <author id="bonnie-dorr"><first>Bonnie J.</first><last>Dorr</last></author>
       <doi>10.3115/974557.974578</doi>
       <pages>139–146</pages>
       <url hash="28d9841d">A97-1021</url>
@@ -225,9 +225,9 @@
     </paper>
     <paper id="22">
       <title>A Prototype of a Grammar Checker for <fixed-case>C</fixed-case>zech</title>
-      <author><first>Tomáš</first><last>Holan</last></author>
-      <author><first>Vladislav</first><last>Kuboň</last></author>
-      <author><first>Martin</first><last>Plátek</last></author>
+      <author id="tomas-holan"><first>Tomáš</first><last>Holan</last></author>
+      <author id="vladislav-kubon"><first>Vladislav</first><last>Kuboň</last></author>
+      <author id="martin-platek"><first>Martin</first><last>Plátek</last></author>
       <doi>10.3115/974557.974579</doi>
       <pages>147–154</pages>
       <url hash="3f0ab444">A97-1022</url>
@@ -235,7 +235,7 @@
     </paper>
     <paper id="23">
       <title>Techniques for Accelerating a Grammar-Checker</title>
-      <author><first>Karel</first><last>Oliva</last></author>
+      <author id="karel-oliva"><first>Karel</first><last>Oliva</last></author>
       <doi>10.3115/974557.974580</doi>
       <pages>155–158</pages>
       <url hash="8151d0a3">A97-1023</url>
@@ -251,8 +251,8 @@
     </paper>
     <paper id="25">
       <title>Contextual Spelling Correction Using Latent Semantic Analysis</title>
-      <author><first>Michael P.</first><last>Jones</last></author>
-      <author><first>James H.</first><last>Martin</last></author>
+      <author id="michael-jones"><first>Michael P.</first><last>Jones</last></author>
+      <author id="james-h-martin"><first>James H.</first><last>Martin</last></author>
       <doi>10.3115/974557.974582</doi>
       <pages>166–173</pages>
       <url hash="e397ac76">A97-1025</url>
@@ -263,7 +263,7 @@
       <author><first>Jill</first><last>Burstein</last></author>
       <author><first>Susanne</first><last>Wolff</last></author>
       <author><first>Chi</first><last>Lu</last></author>
-      <author><first>Randy M.</first><last>Kaplan</last></author>
+      <author id="randy-m-kaplan"><first>Randy M.</first><last>Kaplan</last></author>
       <doi>10.3115/974557.974583</doi>
       <pages>174–181</pages>
       <url hash="783b920d">A97-1026</url>
@@ -283,8 +283,8 @@
     </paper>
     <paper id="28">
       <title>A Statistical Profile of the Named Entity Task</title>
-      <author><first>David D.</first><last>Palmer</last></author>
-      <author><first>David S.</first><last>Day</last></author>
+      <author id="david-d-palmer"><first>David D.</first><last>Palmer</last></author>
+      <author id="david-day"><first>David S.</first><last>Day</last></author>
       <doi>10.3115/974557.974585</doi>
       <pages>190–193</pages>
       <url hash="439f98ec">A97-1028</url>
@@ -292,10 +292,10 @@
     </paper>
     <paper id="29">
       <title><fixed-case>N</fixed-case>ymble: a High-Performance Learning Name-finder</title>
-      <author><first>Daniel M.</first><last>Bikel</last></author>
+      <author id="daniel-m-bikel"><first>Daniel M.</first><last>Bikel</last></author>
       <author><first>Scott</first><last>Miller</last></author>
-      <author><first>Richard</first><last>Schwartz</last></author>
-      <author><first>Ralph</first><last>Weischedel</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
       <doi>10.3115/974557.974586</doi>
       <pages>194–201</pages>
       <url hash="654292e5">A97-1029</url>
@@ -304,7 +304,7 @@
     <paper id="30">
       <title>Disambiguation of Proper Names in Text</title>
       <author><first>Nina</first><last>Wacholder</last></author>
-      <author><first>Yael</first><last>Ravin</last></author>
+      <author id="yael-ravin"><first>Yael</first><last>Ravin</last></author>
       <author><first>Misook</first><last>Choi</last></author>
       <doi>10.3115/974557.974587</doi>
       <pages>202–208</pages>
@@ -313,7 +313,7 @@
     </paper>
     <paper id="31">
       <title>An Information Extraction Core System for Real World <fixed-case>G</fixed-case>erman Text Processing</title>
-      <author><first>Gunter</first><last>Neumann</last></author>
+      <author id="gunter-neumann"><first>Gunter</first><last>Neumann</last></author>
       <author><first>Rolf</first><last>Backofen</last></author>
       <author><first>Judith</first><last>Baur</last></author>
       <author><first>Markus</first><last>Becker</last></author>
@@ -334,8 +334,8 @@
     </paper>
     <paper id="33">
       <title>Building a Generation Knowledge Source using <fixed-case>I</fixed-case>nternet-Accessible Newswire</title>
-      <author><first>Dragomir R.</first><last>Radev</last></author>
-      <author><first>Kathleen R.</first><last>McKeown</last></author>
+      <author id="dragomir-radev"><first>Dragomir R.</first><last>Radev</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen R.</first><last>McKeown</last></author>
       <doi>10.3115/974557.974590</doi>
       <pages>221–228</pages>
       <url hash="b2c4f41c">A97-1033</url>
@@ -345,7 +345,7 @@
       <title>Using <fixed-case>SGML</fixed-case> as a Basis for Data-Intensive <fixed-case>NLP</fixed-case></title>
       <author><first>David</first><last>McKelvie</last></author>
       <author><first>Chris</first><last>Brew</last></author>
-      <author><first>Henry</first><last>Thompson</last></author>
+      <author id="henry-s-thompson"><first>Henry</first><last>Thompson</last></author>
       <doi>10.3115/974557.974591</doi>
       <pages>229–236</pages>
       <url hash="68d6e385">A97-1034</url>
@@ -353,10 +353,10 @@
     </paper>
     <paper id="35">
       <title>Software Infrastructure for Natural Language Processing</title>
-      <author><first>Hamish</first><last>Cunningham</last></author>
-      <author><first>Kevin</first><last>Humphreys</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="hamish-cunningham"><first>Hamish</first><last>Cunningham</last></author>
+      <author id="kevin-humphreys"><first>Kevin</first><last>Humphreys</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <doi>10.3115/974557.974592</doi>
       <pages>237–244</pages>
       <url hash="d5ab886d">A97-1035</url>
@@ -364,7 +364,7 @@
     </paper>
     <paper id="36">
       <title>An Open Distributed Architecture for Reuse and Integration of Heterogeneous <fixed-case>NLP</fixed-case> Components</title>
-      <author><first>Remi</first><last>Zajac</last></author>
+      <author id="remi-zajac"><first>Remi</first><last>Zajac</last></author>
       <author><first>Mark</first><last>Casper</last></author>
       <author><first>Nigel</first><last>Sharples</last></author>
       <doi>10.3115/974557.974593</doi>
@@ -374,8 +374,8 @@
     </paper>
     <paper id="37">
       <title>Customizable Descriptions of Object-Oriented Models</title>
-      <author><first>Benoit</first><last>Lavoie</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="benoit-lavoie"><first>Benoit</first><last>Lavoie</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <author><first>Ehud</first><last>Reiter</last></author>
       <doi>10.3115/974557.974594</doi>
       <pages>253–256</pages>
@@ -384,7 +384,7 @@
     </paper>
     <paper id="38">
       <title><fixed-case>C</fixed-case>ogent<fixed-case>H</fixed-case>elp: <fixed-case>NLG</fixed-case> meets <fixed-case>SE</fixed-case> in a tool for authoring dynamically generated on-line help</title>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <author><first>David E.</first><last>Caldwell</last></author>
       <doi>10.3115/974557.974595</doi>
       <pages>257–264</pages>
@@ -393,7 +393,7 @@
     </paper>
     <paper id="39">
       <title>A Fast and Portable Realizer for Text Generation Systems</title>
-      <author><first>Benoit</first><last>Lavoie</last></author>
+      <author id="benoit-lavoie"><first>Benoit</first><last>Lavoie</last></author>
       <author><first>Owen</first><last>Rainbow</last></author>
       <doi>10.3115/974557.974596</doi>
       <pages>265–268</pages>
@@ -402,12 +402,12 @@
     </paper>
     <paper id="40">
       <title>Multilingual Generation and Summarization of Job Adverts: the <fixed-case>TREE</fixed-case> Project</title>
-      <author><first>Harold</first><last>Somers</last></author>
+      <author id="harold-somers"><first>Harold</first><last>Somers</last></author>
       <author><first>Bill</first><last>Black</last></author>
       <author><first>Joakim</first><last>Nivre</last></author>
-      <author><first>Torbjorn</first><last>Lager</last></author>
+      <author id="torbjorn-lager"><first>Torbjorn</first><last>Lager</last></author>
       <author><first>Annarosa</first><last>Multari</last></author>
-      <author><first>Luca</first><last>Gilardoni</last></author>
+      <author id="luca-gilardoni"><first>Luca</first><last>Gilardoni</last></author>
       <author><first>Jeremy</first><last>Ellman</last></author>
       <author><first>Alex</first><last>Rogers</last></author>
       <doi>10.3115/974557.974597</doi>
@@ -417,7 +417,7 @@
     </paper>
     <paper id="41">
       <title>Language Generation for Multimedia Healthcare Briefings</title>
-      <author><first>Kathleen R.</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen R.</first><last>McKeown</last></author>
       <author><first>Desmond A.</first><last>Jordan</last></author>
       <author><first>Shimei</first><last>Pan</last></author>
       <author><first>James</first><last>Shaw</last></author>
@@ -429,8 +429,8 @@
     </paper>
     <paper id="42">
       <title>Identifying Topics by Position</title>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <doi>10.3115/974557.974599</doi>
       <pages>283–290</pages>
       <url hash="bda9b64c">A97-1042</url>
@@ -440,7 +440,7 @@
       <title>An Automatic Extraction of Key Paragraphs Based on Context Dependency</title>
       <author><first>Fumiyo</first><last>Fukumoto</last></author>
       <author><first>Yoshimi</first><last>Suzuki</last></author>
-      <author><first>Jun’ichi</first><last>Fukumoto</last></author>
+      <author id="junichi-fukumoto"><first>Jun’ichi</first><last>Fukumoto</last></author>
       <doi>10.3115/974557.974600</doi>
       <pages>291–298</pages>
       <url hash="e8eb6407">A97-1043</url>
@@ -448,7 +448,7 @@
     </paper>
     <paper id="44">
       <title>Building Effective Queries In Natural Language Information Retrieval</title>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
       <author><first>Fang</first><last>Lin</last></author>
       <author><first>Jose</first><last>Perez-Carballo</last></author>
       <author><first>Jin</first><last>Wang</last></author>
@@ -459,7 +459,7 @@
     </paper>
     <paper id="45">
       <title>Construction and Visualization of Key Term Hierarchies</title>
-      <author><first>Joe</first><last>Zhou</last></author>
+      <author id="joe-zhou"><first>Joe</first><last>Zhou</last></author>
       <author><first>Troy</first><last>Tanner</last></author>
       <doi>10.3115/974557.974602</doi>
       <pages>307–311</pages>
@@ -468,7 +468,7 @@
     </paper>
     <paper id="46">
       <title>Fast Statistical Parsing of Noun Phrases for Document Indexing</title>
-      <author><first>Chengxiang</first><last>Zhai</last></author>
+      <author id="chengxiang-zhai"><first>Chengxiang</first><last>Zhai</last></author>
       <doi>10.3115/974557.974603</doi>
       <pages>312–319</pages>
       <url hash="b1fb4ac5">A97-1046</url>
@@ -486,9 +486,9 @@
       <title>An Interactive Translation Support Facility for Non-Professional Users</title>
       <author><first>Kiyoshi</first><last>Yamabana</last></author>
       <author><first>Kazunori</first><last>Muraki</last></author>
-      <author><first>Shin-ichiro</first><last>Kamei</last></author>
+      <author id="shin-ichiro-kamei"><first>Shin-ichiro</first><last>Kamei</last></author>
       <author><first>Kenji</first><last>Satoh</last></author>
-      <author><first>Shinichi</first><last>Doi</last></author>
+      <author id="shinichi-doi"><first>Shinichi</first><last>Doi</last></author>
       <author><first>Shinko</first><last>Tamura</last></author>
       <doi>10.3115/974557.974605</doi>
       <pages>324–331</pages>
@@ -516,12 +516,12 @@
     </paper>
     <paper id="51">
       <title>Mixed-Initiative Development of Language Processing Systems</title>
-      <author><first>David</first><last>Day</last></author>
+      <author id="david-day"><first>David</first><last>Day</last></author>
       <author><first>John</first><last>Aberdeen</last></author>
-      <author><first>Lynette</first><last>Hirschman</last></author>
+      <author id="lynette-hirschman"><first>Lynette</first><last>Hirschman</last></author>
       <author><first>Robyn</first><last>Kozierok</last></author>
-      <author><first>Patricia</first><last>Robinson</last></author>
-      <author><first>Marc</first><last>Vilain</last></author>
+      <author id="patricia-robinson"><first>Patricia</first><last>Robinson</last></author>
+      <author id="marc-vilain"><first>Marc</first><last>Vilain</last></author>
       <doi>10.3115/974557.974608</doi>
       <pages>348–355</pages>
       <url hash="1d49a2a5">A97-1051</url>
@@ -529,8 +529,8 @@
     </paper>
     <paper id="52">
       <title>Automatic Extraction of Subcategorization from Corpora</title>
-      <author><first>Ted</first><last>Briscoe</last></author>
-      <author><first>John</first><last>Carroll</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
       <doi>10.3115/974557.974609</doi>
       <pages>356–363</pages>
       <url hash="96251f94">A97-1052</url>
@@ -539,7 +539,7 @@
     <paper id="53">
       <title>Learning Probabilistic Subcategorization Preference by Identifying Case Dependencies and Optimal Noun Class Generalization Level</title>
       <author><first>Takehito</first><last>Utsuro</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <doi>10.3115/974557.974610</doi>
       <pages>364–371</pages>
       <url hash="71a3054d">A97-1053</url>
@@ -557,7 +557,7 @@
     <paper id="55">
       <title>Automatic Selection of Class Labels from a Thesaurus for an Effective Semantic Tagging of Corpora.</title>
       <author><first>Alessandro</first><last>Cucchiarelli</last></author>
-      <author><first>Paola</first><last>Velardi</last></author>
+      <author id="paola-velardi"><first>Paola</first><last>Velardi</last></author>
       <doi>10.3115/974557.974612</doi>
       <pages>380–387</pages>
       <url hash="f607d2e2">A97-1055</url>
@@ -566,8 +566,8 @@
     <paper id="56">
       <title>Sequential Model Selection for Word Sense Disambiguation</title>
       <author><first>Ted</first><last>Pedersen</last></author>
-      <author><first>Rebecca</first><last>Bruce</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="rebecca-bruce"><first>Rebecca</first><last>Bruce</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <doi>10.3115/974557.974613</doi>
       <pages>388–395</pages>
       <url hash="94ec0d7b">A97-1056</url>
@@ -598,7 +598,7 @@
     </paper>
     <paper id="2">
       <title>An Efficient Two Stage Approach to Robust Language Interpretation</title>
-      <author><first>Carolyn Penstein</first><last>Rose</last></author>
+      <author id="carolyn-rose"><first>Carolyn Penstein</first><last>Rose</last></author>
       <doi>10.3115/974281.974284</doi>
       <pages>3–4</pages>
       <url hash="e6af1d10">A97-2002</url>
@@ -615,7 +615,7 @@
     <paper id="4">
       <title>Duke’s Trainable Information and Meaning Extraction System (Duke <fixed-case>TIMES</fixed-case>)</title>
       <author><first>Amit</first><last>Bagga</last></author>
-      <author><first>Joyce Yue</first><last>Chai</last></author>
+      <author id="joyce-chai"><first>Joyce Yue</first><last>Chai</last></author>
       <doi>10.3115/974281.974286</doi>
       <pages>7–8</pages>
       <url hash="fd8cd5ff">A97-2004</url>
@@ -623,7 +623,7 @@
     </paper>
     <paper id="5">
       <title>Dependency parser demo</title>
-      <author><first>Timo</first><last>Jarvinen</last></author>
+      <author id="timo-jarvinen"><first>Timo</first><last>Jarvinen</last></author>
       <author><first>Pasi</first><last>Tapanainen</last></author>
       <doi>10.3115/974281.974287</doi>
       <pages>9–10</pages>
@@ -632,8 +632,8 @@
     </paper>
     <paper id="6">
       <title>An Improvement in the Selection Process of Machine Translation Using Inductive Learning with Genetic Algorithms</title>
-      <author><first>Hiroshi</first><last>Echizen-ya</last></author>
-      <author><first>Kenji</first><last>Araki</last></author>
+      <author id="hiroshi-echizen-ya"><first>Hiroshi</first><last>Echizen-ya</last></author>
+      <author id="kenji-araki"><first>Kenji</first><last>Araki</last></author>
       <author><first>Yoshikazu</first><last>Miyanaga</last></author>
       <author><first>Koji</first><last>Tochinai</last></author>
       <doi>10.3115/974281.974288</doi>
@@ -643,7 +643,7 @@
     </paper>
     <paper id="7">
       <title><fixed-case>CIRCSIM</fixed-case>-Tutor: An Intelligent Tutoring System Using Natural Language Dialogue</title>
-      <author><first>Martha W.</first><last>Evens</last></author>
+      <author id="martha-evens"><first>Martha W.</first><last>Evens</last></author>
       <author><first>Ru-Charn</first><last>Chang</last></author>
       <author><first>Yoon Hee</first><last>Lee</last></author>
       <author><first>Leem Seop</first><last>Shim</last></author>
@@ -696,10 +696,10 @@
     <paper id="13">
       <title><fixed-case>EAGLE</fixed-case>: An Extensible Architecture for General Linguistic Engineering</title>
       <author><first>Breck</first><last>Baldwin</last></author>
-      <author><first>Christine</first><last>Doran</last></author>
+      <author id="christine-doran"><first>Christine</first><last>Doran</last></author>
       <author><first>Jeffrey C.</first><last>Reynar</last></author>
       <author><first>Michael</first><last>Niv</last></author>
-      <author><first>B.</first><last>Srinivas</last></author>
+      <author id="srinivas-bangalore"><first>B.</first><last>Srinivas</last></author>
       <doi>10.3115/974281.974295</doi>
       <pages>23–23</pages>
       <url hash="478daadb">A97-2013</url>
@@ -707,8 +707,8 @@
     </paper>
     <paper id="14">
       <title>An <fixed-case>E</fixed-case>nglish Grammar Checker as a Writing Aid for Students of <fixed-case>E</fixed-case>nglish as a Second Language</title>
-      <author><first>Jong C.</first><last>Park</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="jong-c-park"><first>Jong C.</first><last>Park</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Clay</first><last>Washburn</last></author>
       <doi>10.3115/974281.974296</doi>
       <pages>24–24</pages>
@@ -718,7 +718,7 @@
     <paper id="15">
       <title><fixed-case>CATMORF</fixed-case>: Multi two-level steps for <fixed-case>C</fixed-case>atalan morphology</title>
       <author><first>Toni</first><last>Badia</last></author>
-      <author><first>Angels</first><last>Egea</last></author>
+      <author id="angels-egea"><first>Angels</first><last>Egea</last></author>
       <author><first>Antoni</first><last>Tuells</last></author>
       <doi>10.3115/974281.974297</doi>
       <pages>25–26</pages>
@@ -738,10 +738,10 @@
     </paper>
     <paper id="17">
       <title><fixed-case>GATE</fixed-case> - a General Architecture for Text Engineering</title>
-      <author><first>Hamish</first><last>Cunningham</last></author>
-      <author><first>Kevin</first><last>Humphreys</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="hamish-cunningham"><first>Hamish</first><last>Cunningham</last></author>
+      <author id="kevin-humphreys"><first>Kevin</first><last>Humphreys</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <doi>10.3115/974281.974299</doi>
       <pages>29–30</pages>
       <url hash="50d26265">A97-2017</url>
@@ -757,8 +757,8 @@
     </paper>
     <paper id="19">
       <title><fixed-case>NL</fixed-case> Assistant: A Toolkit for Developing Natural Language: Applications</title>
-      <author><first>Deborah A.</first><last>Dahl</last></author>
-      <author><first>Lewis M.</first><last>Norton</last></author>
+      <author id="deborah-a-dahl"><first>Deborah A.</first><last>Dahl</last></author>
+      <author id="lewis-m-norton"><first>Lewis M.</first><last>Norton</last></author>
       <author><first>Ahmed</first><last>Bouzid</last></author>
       <author><first>Li</first><last>Li</last></author>
       <doi>10.3115/974281.974301</doi>
@@ -768,8 +768,8 @@
     </paper>
     <paper id="20">
       <title>Using <fixed-case>BBN</fixed-case> <fixed-case>VALAD</fixed-case>: Speech at the Logistics Anchor Desk</title>
-      <author><first>Madeline</first><last>Bates</last></author>
-      <author><first>Rusty</first><last>Bobrow</last></author>
+      <author id="madeleine-bates"><first>Madeline</first><last>Bates</last></author>
+      <author id="robert-bobrow"><first>Rusty</first><last>Bobrow</last></author>
       <doi>10.3115/974281.974303</doi>
       <pages>35–35</pages>
       <url hash="d15c6600">A97-2020</url>
@@ -777,7 +777,7 @@
     </paper>
     <paper id="21">
       <title>A Spoken Language Interface to a Virtual Reality System (Video)</title>
-      <author><first>Stephanie S.</first><last>Everett</last></author>
+      <author id="stephanie-s-everett"><first>Stephanie S.</first><last>Everett</last></author>
       <author><first>Kenneth</first><last>Wauchope</last></author>
       <author><first>Manuel A.</first><last>Pfirez</last></author>
       <doi>10.3115/974281.974304</doi>
diff --git a/data/xml/C00.xml b/data/xml/C00.xml
index 07bbb909d1..50131a2852 100644
--- a/data/xml/C00.xml
+++ b/data/xml/C00.xml
@@ -29,7 +29,7 @@
     <paper id="2">
       <title>Learning Word Clusters from Data Types</title>
       <author><first>Paolo</first><last>Allegrini</last></author>
-      <author><first>Simonetta</first><last>Montemagni</last></author>
+      <author id="simonetta-montemagni"><first>Simonetta</first><last>Montemagni</last></author>
       <author><first>Vito</first><last>Pirrelli</last></author>
       <url hash="a9d59a81">C00-1002</url>
       <bibkey>allegrini-etal-2000-learning</bibkey>
@@ -44,7 +44,7 @@
     <paper id="4">
       <title>Extended Models and Tools for High-performance Part-of-speech</title>
       <author><first>Masayuki</first><last>Asahara</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <url hash="65fd4663">C00-1004</url>
       <bibkey>asahara-matsumoto-2000-extended</bibkey>
     </paper>
@@ -57,15 +57,15 @@
     </paper>
     <paper id="6">
       <title>The Effects of Word Order and Segmentation on Translation Retrieval Performance</title>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Hozumi</first><last>Tanaka</last></author>
       <url hash="95a9995d">C00-1006</url>
       <bibkey>baldwin-tanaka-2000-effects</bibkey>
     </paper>
     <paper id="7">
       <title>Exploiting a Probabilistic Hierarchical Model for Generation</title>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <url hash="d6ba1692">C00-1007</url>
       <bibkey>bangalore-rambow-2000-exploiting</bibkey>
     </paper>
@@ -78,8 +78,8 @@
     </paper>
     <paper id="9">
       <title>Combination of N-Grams and Stochastic Context-Free Grammars for Language Modeling</title>
-      <author><first>Jose-Miguel</first><last>Benedi</last></author>
-      <author><first>Joan-Andreu</first><last>Sanchez</last></author>
+      <author id="jose-miguel-benedi"><first>Jose-Miguel</first><last>Benedi</last></author>
+      <author id="joan-andreu-sanchez"><first>Joan-Andreu</first><last>Sanchez</last></author>
       <url hash="0592430d">C00-1009</url>
       <bibkey>benedi-sanchez-2000-combination</bibkey>
     </paper>
@@ -97,16 +97,16 @@
     </paper>
     <paper id="12">
       <title>The effects of analysing cohesion on document summarisation</title>
-      <author><first>Branimir K.</first><last>Boguraev</last></author>
-      <author><first>Mary S.</first><last>Neff</last></author>
+      <author id="branimir-boguraev"><first>Branimir K.</first><last>Boguraev</last></author>
+      <author id="mary-s-neff"><first>Mary S.</first><last>Neff</last></author>
       <url hash="5596526f">C00-1012</url>
       <bibkey>boguraev-neff-2000-effects</bibkey>
     </paper>
     <paper id="13">
       <title>Creating a Universal Networking Language Module within an Advanced <fixed-case>NLP</fixed-case> System</title>
-      <author><first>Igor</first><last>Boguslavsky</last></author>
+      <author id="igor-boguslavsky"><first>Igor</first><last>Boguslavsky</last></author>
       <author><first>Nadezhda</first><last>Frid</last></author>
-      <author><first>Leonid</first><last>Iomdin</last></author>
+      <author id="leonid-iomdin"><first>Leonid</first><last>Iomdin</last></author>
       <author><first>Leonid</first><last>Kreidlin</last></author>
       <author><first>Irina</first><last>Sagalova</last></author>
       <author><first>Victor</first><last>Sizov</last></author>
@@ -128,14 +128,14 @@
     </paper>
     <paper id="16">
       <title>Binding Constraints as Instructions of Binding Machines</title>
-      <author><first>Antonio</first><last>Branco</last></author>
+      <author id="antonio-branco"><first>Antonio</first><last>Branco</last></author>
       <url hash="c6452738">C00-1016</url>
       <bibkey>branco-2000-binding</bibkey>
     </paper>
     <paper id="17">
       <title>Probabilistic Parsing and Psychological Plausibility</title>
       <author><first>Thorsten</first><last>Brants</last></author>
-      <author><first>Matthew</first><last>Crocker</last></author>
+      <author id="matthew-crocker"><first>Matthew</first><last>Crocker</last></author>
       <url hash="e84caee6">C00-1017</url>
       <bibkey>brants-crocker-2000-probabilistic</bibkey>
     </paper>
@@ -147,7 +147,7 @@
     </paper>
     <paper id="19">
       <title>Automated Generalization of Translation Examples</title>
-      <author><first>Ralf D.</first><last>Brown</last></author>
+      <author id="ralf-d-brown"><first>Ralf D.</first><last>Brown</last></author>
       <url hash="4c53aed4">C00-1019</url>
       <bibkey>brown-2000-automated</bibkey>
     </paper>
@@ -160,8 +160,8 @@
     <paper id="21">
       <title>Tagging of very large corpora: Topic-Focus Articulation</title>
       <author><first>Eva</first><last>Buranova</last></author>
-      <author><first>Eva</first><last>Hajicova</last></author>
-      <author><first>Petr</first><last>Sgall</last></author>
+      <author id="eva-hajicova"><first>Eva</first><last>Hajicova</last></author>
+      <author id="petr-sgall"><first>Petr</first><last>Sgall</last></author>
       <url hash="cf4a32b2">C00-1021</url>
       <bibkey>buranova-etal-2000-tagging</bibkey>
     </paper>
@@ -174,7 +174,7 @@
     <paper id="23">
       <title>Word Sense Disambiguation of Adjectives Using Probabilistic Networks</title>
       <author><first>Gerald</first><last>Chao</last></author>
-      <author><first>Michael G.</first><last>Dyer</last></author>
+      <author id="michael-g-dyer"><first>Michael G.</first><last>Dyer</last></author>
       <url hash="2e9e26eb">C00-1023</url>
       <bibkey>chao-dyer-2000-word</bibkey>
     </paper>
@@ -195,14 +195,14 @@
     </paper>
     <paper id="26">
       <title>Automatic Semantic Classification for <fixed-case>C</fixed-case>hinese Unknown Compound Nouns</title>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
-      <author><first>Chao-jan</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="chao-jan-chen"><first>Chao-jan</first><last>Chen</last></author>
       <url hash="1ab73518">C00-1026</url>
       <bibkey>chen-chen-2000-automatic</bibkey>
     </paper>
     <paper id="27">
       <title>Empirical Estimates of Adaptation: The chance of Two Noriegas is closer to p/2 than p2</title>
-      <author><first>Kenneth W.</first><last>Church</last></author>
+      <author id="kenneth-church"><first>Kenneth W.</first><last>Church</last></author>
       <url hash="d6aad517">C00-1027</url>
       <bibkey>church-2000-empirical</bibkey>
     </paper>
@@ -216,7 +216,7 @@
     <paper id="29">
       <title>A Class-based Probabilistic approach to Structural Disambiguation</title>
       <author><first>Stephen</first><last>Clark</last></author>
-      <author><first>David</first><last>Weir</last></author>
+      <author id="david-weir"><first>David</first><last>Weir</last></author>
       <url hash="156de037">C00-1029</url>
       <bibkey>clark-weir-2000-class</bibkey>
     </paper>
@@ -224,14 +224,14 @@
       <title>Extracting the Names of Genes and Gene Products with a Hidden <fixed-case>M</fixed-case>arkov Model</title>
       <author><first>Nigel</first><last>Collier</last></author>
       <author><first>Chikashi</first><last>Nobata</last></author>
-      <author><first>Jun-ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun-ichi</first><last>Tsujii</last></author>
       <url hash="db9a746e">C00-1030</url>
       <bibkey>collier-etal-2000-extracting</bibkey>
     </paper>
     <paper id="31">
       <title>An Empirical Investigation of the Relation Between Discourse Structure and Co-Reference</title>
       <author><first>Dan</first><last>Cristea</last></author>
-      <author><first>Nancy</first><last>Ide</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
       <author><first>Daniel</first><last>Marcu</last></author>
       <author><first>Valentin</first><last>Tablan</last></author>
       <url hash="1eff3b2d">C00-1031</url>
@@ -239,20 +239,20 @@
     </paper>
     <paper id="32">
       <title>Morphological Rule Induction for Terminology Acquisition</title>
-      <author><first>Beatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Beatrice</first><last>Daille</last></author>
       <url hash="67d31eb7">C00-1032</url>
       <bibkey>daille-2000-morphological</bibkey>
     </paper>
     <paper id="33">
       <title>Authoring Multimedia Documents using <fixed-case>WYSIWYM</fixed-case> Editing</title>
-      <author><first>Kees</first><last>van Deemter</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
       <author><first>Richard</first><last>Power</last></author>
       <url hash="5a07ec55">C00-1033</url>
       <bibkey>van-deemter-power-2000-authoring</bibkey>
     </paper>
     <paper id="34">
       <title>Theory Refinement and Natural Language Learning</title>
-      <author><first>Herve</first><last>Dejean</last></author>
+      <author id="herve-dejean"><first>Herve</first><last>Dejean</last></author>
       <url hash="a9a0aaa3">C00-1034</url>
       <bibkey>dejean-2000-theory</bibkey>
     </paper>
@@ -265,7 +265,7 @@
     <paper id="36">
       <title><fixed-case>XML</fixed-case> and Multilingual Document Authoring: Convergent Trends</title>
       <author><first>Marc</first><last>Dymetman</last></author>
-      <author><first>Veronika</first><last>Lux</last></author>
+      <author id="veronika-lux"><first>Veronika</first><last>Lux</last></author>
       <author><first>Aarne</first><last>Ranta</last></author>
       <url hash="57979539">C00-1036</url>
       <bibkey>dymetman-etal-2000-xml</bibkey>
@@ -279,13 +279,13 @@
     </paper>
     <paper id="38">
       <title>Directional Constraint Evaluation in <fixed-case>O</fixed-case>ptimality <fixed-case>T</fixed-case>heory</title>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <url hash="c749035a">C00-1038</url>
       <bibkey>eisner-2000-directional</bibkey>
     </paper>
     <paper id="39">
       <title>Boosting Variant Recognition with Light Semantics</title>
-      <author><first>Cecile</first><last>Fabre</last></author>
+      <author id="cecile-fabre"><first>Cecile</first><last>Fabre</last></author>
       <author><first>Christian</first><last>Jacquemin</last></author>
       <url hash="49dce005">C00-1039</url>
       <bibkey>fabre-jacquemin-2000-boosting</bibkey>
@@ -299,7 +299,7 @@
     </paper>
     <paper id="41">
       <title>Deletions and their reconstruction in tectogrammatical syntactic tagging of very large corpora</title>
-      <author><first>Eva</first><last>Hajicová</last></author>
+      <author id="eva-hajicova"><first>Eva</first><last>Hajicová</last></author>
       <author><first>Markéta</first><last>Ceplová</last></author>
       <url hash="922d49b0">C00-1041</url>
       <bibkey>hajicova-ceplova-2000-deletions</bibkey>
@@ -314,30 +314,30 @@
     </paper>
     <paper id="43">
       <title>Experiments with Open-Domain Textual Question Answering</title>
-      <author><first>Sanda M.</first><last>Harabagiu</last></author>
-      <author><first>Marius A.</first><last>Pasca</last></author>
-      <author><first>Steven J.</first><last>Maiorano</last></author>
+      <author id="sanda-harabagiu"><first>Sanda M.</first><last>Harabagiu</last></author>
+      <author id="marius-pasca"><first>Marius A.</first><last>Pasca</last></author>
+      <author id="steven-j-maiorano"><first>Steven J.</first><last>Maiorano</last></author>
       <url hash="eb6dd106">C00-1043</url>
       <bibkey>harabagiu-etal-2000-experiments</bibkey>
     </paper>
     <paper id="44">
       <title>Effects of Adjective Orientation and Gradability on Sentence Subjectivity</title>
       <author><first>Vasileios</first><last>Hatzivassiloglou</last></author>
-      <author><first>Janyce M.</first><last>Wiebe</last></author>
+      <author id="janyce-wiebe"><first>Janyce M.</first><last>Wiebe</last></author>
       <url hash="deffa634">C00-1044</url>
       <bibkey>hatzivassiloglou-wiebe-2000-effects</bibkey>
     </paper>
     <paper id="45">
       <title>Pronominalization revisited</title>
-      <author><first>Renate</first><last>Henschel</last></author>
+      <author id="renate-henschel"><first>Renate</first><last>Henschel</last></author>
       <author><first>Hua</first><last>Cheng</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <url hash="4b82c555">C00-1045</url>
       <bibkey>henschel-etal-2000-pronominalization</bibkey>
     </paper>
     <paper id="46">
       <title>Automatic Refinement of a <fixed-case>POS</fixed-case> Tagger Using a Reliable Parser and Plain Text Corpora</title>
-      <author><first>Hideki</first><last>Hirakawa</last></author>
+      <author id="hideki-hirakawa"><first>Hideki</first><last>Hirakawa</last></author>
       <author><first>Kenji</first><last>Ono</last></author>
       <author><first>Yumiko</first><last>Yoshimura</last></author>
       <url hash="42a73336">C00-1046</url>
@@ -347,15 +347,15 @@
       <title>A Method of Measuring Term Representativeness - Baseline Method Using Co-occurrence Distribution</title>
       <author><first>Tom</first><last>Hisamitsu</last></author>
       <author><first>Yoshiki</first><last>Niwa</last></author>
-      <author><first>Jun-ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun-ichi</first><last>Tsujii</last></author>
       <url hash="7e02d255">C00-1047</url>
       <bibkey>hisamitsu-etal-2000-method</bibkey>
     </paper>
     <paper id="48">
       <title>A Rule Induction Approach to Modeling Regional Pronunciation Variation</title>
-      <author><first>Veronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Veronique</first><last>Hoste</last></author>
       <author><first>Steven</first><last>Gillis</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <url hash="166d551d">C00-1048</url>
       <bibkey>hoste-etal-2000-rule</bibkey>
     </paper>
@@ -371,7 +371,7 @@
       <author><first>Yumi</first><last>Ichimura</last></author>
       <author><first>Yoshimi</first><last>Saito</last></author>
       <author><first>Kazuhiro</first><last>Kimura</last></author>
-      <author><first>Hideki</first><last>Hirakawa</last></author>
+      <author id="hideki-hirakawa"><first>Hideki</first><last>Hirakawa</last></author>
       <url hash="2d0a9266">C00-1050</url>
       <bibkey>ichimura-etal-2000-kana</bibkey>
     </paper>
@@ -391,27 +391,27 @@
     </paper>
     <paper id="53">
       <title>Deixis and Conjunction in Multimodal Systems</title>
-      <author><first>Michael</first><last>Johnston</last></author>
+      <author id="michael-johnston"><first>Michael</first><last>Johnston</last></author>
       <url hash="b37fd16f">C00-1053</url>
       <bibkey>johnston-2000-deixis</bibkey>
     </paper>
     <paper id="54">
       <title>Finite-state Multimodal Parsing and Understanding</title>
-      <author><first>Michael</first><last>Johnston</last></author>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="michael-johnston"><first>Michael</first><last>Johnston</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
       <url hash="af301fde">C00-1054</url>
       <bibkey>johnston-bangalore-2000-finite</bibkey>
     </paper>
     <paper id="55">
       <title>Toward a Scoring Function for Quality-Driven Machine Translation</title>
-      <author><first>Douglas A.</first><last>Jones</last></author>
+      <author id="douglas-jones"><first>Douglas A.</first><last>Jones</last></author>
       <author><first>Gregory M.</first><last>Rusk</last></author>
       <url hash="66069025">C00-1055</url>
       <bibkey>jones-rusk-2000-toward</bibkey>
     </paper>
     <paper id="56">
       <title>An <fixed-case>E</fixed-case>nglish to <fixed-case>K</fixed-case>orean Transliteration Model of Extended <fixed-case>M</fixed-case>arkov Window</title>
-      <author><first>Sung Young</first><last>Jung</last></author>
+      <author id="sung-young-jung"><first>Sung Young</first><last>Jung</last></author>
       <author><first>SungLim</first><last>Hong</last></author>
       <author><first>Eunok</first><last>Paek</last></author>
       <url hash="ba96049a">C00-1056</url>
@@ -429,7 +429,7 @@
       <title>Automatic Thesaurus Generation through Multiple Filtering</title>
       <author><first>Kyo</first><last>Kageura</last></author>
       <author><first>Keita</first><last>Tsuji</last></author>
-      <author><first>Akiko N.</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko N.</first><last>Aizawa</last></author>
       <url hash="50f2e27e">C00-1058</url>
       <bibkey>kageura-etal-2000-automatic</bibkey>
     </paper>
@@ -447,21 +447,21 @@
       <author><first>Hiroshi</first><last>Kanayama</last></author>
       <author><first>Kentaro</first><last>Torisawa</last></author>
       <author><first>Yutaka</first><last>Mitsuishi</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <url hash="bd55e832">C00-1060</url>
       <bibkey>kanayama-etal-2000-hybrid</bibkey>
     </paper>
     <paper id="61">
       <title><fixed-case>E</fixed-case>nglish-to-<fixed-case>K</fixed-case>orean Transliteration using Multiple Unbounded Overlapping Phoneme Chunks</title>
       <author><first>In-Ho</first><last>Kang</last></author>
-      <author><first>GilChang</first><last>Kim</last></author>
+      <author id="gil-chang-kim"><first>GilChang</first><last>Kim</last></author>
       <url hash="08f18162">C00-1061</url>
       <bibkey>kang-kim-2000-english</bibkey>
     </paper>
     <paper id="62">
       <title><fixed-case>LFG</fixed-case> Generation Produces Context-free Languages</title>
-      <author><first>Ronald M.</first><last>Kaplan</last></author>
-      <author><first>Jurgen</first><last>Wedekind</last></author>
+      <author id="ronald-m-kaplan"><first>Ronald M.</first><last>Kaplan</last></author>
+      <author id="jurgen-wedekind"><first>Jurgen</first><last>Wedekind</last></author>
       <url hash="d5e465b8">C00-1062</url>
       <bibkey>kaplan-wedekind-2000-lfg</bibkey>
     </paper>
@@ -490,7 +490,7 @@
     <paper id="66">
       <title>Automatic Text Categorization by Unsupervised Learning</title>
       <author><first>Youngjoong</first><last>Ko</last></author>
-      <author><first>Jungyun</first><last>Seo</last></author>
+      <author id="jungyun-seo"><first>Jungyun</first><last>Seo</last></author>
       <url hash="850d7480">C00-1066</url>
       <bibkey>ko-seo-2000-automatic</bibkey>
     </paper>
@@ -510,11 +510,11 @@
     </paper>
     <paper id="69">
       <title>Multilinguality in a Text Generation System For Three <fixed-case>S</fixed-case>lavic Languages</title>
-      <author><first>Geert-Jan</first><last>Kruijff</last></author>
+      <author id="geert-jan-m-kruijff"><first>Geert-Jan</first><last>Kruijff</last></author>
       <author><first>Elke</first><last>Teich</last></author>
-      <author><first>John</first><last>Bateman</last></author>
-      <author><first>Ivana</first><last>Kruijff-Korbayova</last></author>
-      <author><first>Hana</first><last>Skoumalova</last></author>
+      <author id="john-bateman"><first>John</first><last>Bateman</last></author>
+      <author id="ivana-kruijff-korbayova"><first>Ivana</first><last>Kruijff-Korbayova</last></author>
+      <author id="hana-skoumalova"><first>Hana</first><last>Skoumalova</last></author>
       <author><first>Serge</first><last>Sharoff</last></author>
       <author><first>Lena</first><last>Sokolova</last></author>
       <author><first>Tony</first><last>Hartley</last></author>
@@ -526,8 +526,8 @@
     <paper id="70">
       <title>Lexicalized Hidden <fixed-case>M</fixed-case>arkov Models for Part-of-Speech Tagging</title>
       <author><first>Sang-Zoo</first><last>Lee</last></author>
-      <author><first>Jun-ichi</first><last>Tsujii</last></author>
-      <author><first>Hae-Chang</first><last>Rim</last></author>
+      <author id="junichi-tsujii"><first>Jun-ichi</first><last>Tsujii</last></author>
+      <author id="hae-chang-rim"><first>Hae-Chang</first><last>Rim</last></author>
       <url hash="c498aee5">C00-1070</url>
       <bibkey>lee-etal-2000-lexicalized</bibkey>
     </paper>
@@ -539,17 +539,17 @@
     </paper>
     <paper id="72">
       <title>The Automated Acquisition of Topic Signatures for Text Summarization</title>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <url hash="43de9913">C00-1072</url>
       <bibkey>lin-hovy-2000-automated</bibkey>
     </paper>
     <paper id="73">
       <title>Automatic Optimization of Dialogue Management</title>
-      <author><first>Diane J.</first><last>Litman</last></author>
-      <author><first>Michael S.</first><last>Kearns</last></author>
+      <author id="diane-litman"><first>Diane J.</first><last>Litman</last></author>
+      <author id="michael-s-kearns"><first>Michael S.</first><last>Kearns</last></author>
       <author><first>Satinder</first><last>Singh</last></author>
-      <author><first>Marilyn A.</first><last>Walker</last></author>
+      <author id="marilyn-walker"><first>Marilyn A.</first><last>Walker</last></author>
       <url hash="f768b8b4">C00-1073</url>
       <bibkey>litman-etal-2000-automatic</bibkey>
     </paper>
@@ -565,7 +565,7 @@
     <paper id="75">
       <title>Application of Analogical Modelling to Example Based Machine Translation</title>
       <author><first>Christos</first><last>Malavazosi</last></author>
-      <author><first>Stelios</first><last>Piperidis</last></author>
+      <author id="stelios-piperidis"><first>Stelios</first><last>Piperidis</last></author>
       <url hash="051050fa">C00-1075</url>
       <bibkey>malavazosi-piperidis-2000-application</bibkey>
     </paper>
@@ -584,15 +584,15 @@
     </paper>
     <paper id="78">
       <title>Chart-Based Transfer Rule Application in Machine Translation</title>
-      <author><first>Adam</first><last>Meyers</last></author>
+      <author id="adam-meyers"><first>Adam</first><last>Meyers</last></author>
       <author><first>Michiko</first><last>Kosaka</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <url hash="bf0e68b0">C00-1078</url>
       <bibkey>meyers-etal-2000-chart</bibkey>
     </paper>
     <paper id="79">
       <title>Representation and Recognition Method for Multi-Word Translation Units in <fixed-case>K</fixed-case>orean-to-<fixed-case>J</fixed-case>apanese <fixed-case>MT</fixed-case> System</title>
-      <author><first>Kyonghi</first><last>Moon</last></author>
+      <author id="kyong-hi-moon"><first>Kyonghi</first><last>Moon</last></author>
       <author><first>Jong-Hyeok</first><last>Lee</last></author>
       <url hash="3001a540">C00-1079</url>
       <bibkey>moon-lee-2000-representation</bibkey>
@@ -624,7 +624,7 @@
     </paper>
     <paper id="83">
       <title>Taking Account of the User’s View in 3<fixed-case>D</fixed-case> Multimodal Instruction Dialogue</title>
-      <author><first>Yukiko I.</first><last>Nakano</last></author>
+      <author id="yukiko-i-nakano"><first>Yukiko I.</first><last>Nakano</last></author>
       <author><first>Kenji</first><last>Imamura</last></author>
       <author><first>Hisashi</first><last>Ohara</last></author>
       <url hash="a71cdfdc">C00-1083</url>
@@ -657,7 +657,7 @@
     </frontmatter>
     <paper id="86">
       <title>Informed Parsing for Coordination with <fixed-case>C</fixed-case>ombinatory <fixed-case>C</fixed-case>ategorial <fixed-case>G</fixed-case>rammar</title>
-      <author><first>Jong C.</first><last>Park</last></author>
+      <author id="jong-c-park"><first>Jong C.</first><last>Park</last></author>
       <author><first>Hyung Joon</first><last>Cho</last></author>
       <url hash="95e04674">C00-2086</url>
       <bibkey>park-cho-2000-informed</bibkey>
@@ -730,12 +730,12 @@
     </paper>
     <paper id="97">
       <title>Compiling Language Models from a Linguistically Motivated Unification Grammar</title>
-      <author><first>Manny</first><last>Rayner</last></author>
-      <author><first>Beth Ann</first><last>Hockey</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
+      <author id="beth-ann-hockey"><first>Beth Ann</first><last>Hockey</last></author>
       <author><first>Frankie</first><last>James</last></author>
       <author><first>Elizabeth Owen</first><last>Bratt</last></author>
-      <author><first>Sharon</first><last>Goldwater</last></author>
-      <author><first>Jean Mark</first><last>Gawron</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
+      <author id="jean-mark-gawron"><first>Jean Mark</first><last>Gawron</last></author>
       <url hash="18bac63d">C00-2097</url>
       <bibkey>rayner-etal-2000-compiling</bibkey>
     </paper>
@@ -754,7 +754,7 @@
     <paper id="100">
       <title>Automatic Extraction of Subcategorization Frames for <fixed-case>C</fixed-case>zech</title>
       <author><first>Anoop</first><last>Sarkar</last></author>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <url hash="2daebd50">C00-2100</url>
       <bibkey>sarkar-zeman-2000-automatic</bibkey>
     </paper>
@@ -781,14 +781,14 @@
     <paper id="104">
       <title>Experiments in Automated Lexicon Building for Text Searching</title>
       <author><first>Barry</first><last>Schiffman</last></author>
-      <author><first>Kathleen R.</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen R.</first><last>McKeown</last></author>
       <url hash="c29f02df">C00-2104</url>
       <bibkey>schiffman-mckeown-2000-experiments</bibkey>
     </paper>
     <paper id="105">
       <title>Robust <fixed-case>G</fixed-case>erman Noun Chunking With a Probabilistic Context-Free Grammar</title>
       <author><first>Helmut</first><last>Schmid</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <url hash="d6bb8cdb">C00-2105</url>
       <bibkey>schmid-schulte-im-walde-2000-robust</bibkey>
     </paper>
@@ -801,13 +801,13 @@
     <paper id="107">
       <title>Realizing Expressions of Doubt in Collaborative Dialogue</title>
       <author><first>Leah</first><last>Schroeder</last></author>
-      <author><first>Sandra</first><last>Carberry</last></author>
+      <author id="sandra-carberry"><first>Sandra</first><last>Carberry</last></author>
       <url hash="0bf55d83">C00-2107</url>
       <bibkey>schroeder-carberry-2000-realizing</bibkey>
     </paper>
     <paper id="108">
       <title>Clustering Verbs Semantically According to their Alternation Behaviour</title>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <url hash="cbdb3742">C00-2108</url>
       <bibkey>schulte-im-walde-2000-clustering</bibkey>
     </paper>
@@ -827,8 +827,8 @@
     </paper>
     <paper id="111">
       <title>On <fixed-case>UNL</fixed-case> as the future “html of the linguistic content” &amp; the reuse of existing <fixed-case>NLP</fixed-case> components in <fixed-case>UNL</fixed-case>-related applications with the example of a <fixed-case>UNL</fixed-case>-<fixed-case>F</fixed-case>rench deconverter</title>
-      <author><first>Gilles</first><last>Sérasset</last></author>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="gilles-serasset"><first>Gilles</first><last>Sérasset</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <url hash="f1909b96">C00-2111</url>
       <bibkey>serasset-boitet-2000-unl</bibkey>
     </paper>
@@ -855,8 +855,8 @@
     </paper>
     <paper id="115">
       <title>Extracting semantic clusters from the alignment of definitions</title>
-      <author><first>Gerardo</first><last>Sierra</last></author>
-      <author><first>John</first><last>McNaught</last></author>
+      <author id="gerardo-sierra"><first>Gerardo</first><last>Sierra</last></author>
+      <author id="john-mcnaught"><first>John</first><last>McNaught</last></author>
       <url hash="299773da">C00-2115</url>
       <bibkey>sierra-mcnaught-2000-extracting</bibkey>
     </paper>
@@ -894,7 +894,7 @@
     <paper id="120">
       <title>Matching a tone-based and tune-based approach to <fixed-case>E</fixed-case>nglish intonation for concept-to-speech generation</title>
       <author><first>Elke</first><last>Teich</last></author>
-      <author><first>Catherine I.</first><last>Watson</last></author>
+      <author id="catherine-i-watson"><first>Catherine I.</first><last>Watson</last></author>
       <author><first>Cecile</first><last>Pereira</last></author>
       <url hash="c5cef21f">C00-2120</url>
       <bibkey>teich-etal-2000-matching</bibkey>
@@ -902,8 +902,8 @@
     <paper id="121">
       <title>Automatic Extraction of Semantic Relations from Specialized Corpora</title>
       <author><first>Aristomenis</first><last>Thanopoulos</last></author>
-      <author><first>Nikos</first><last>Fakotakis</last></author>
-      <author><first>George</first><last>Kokkinakis</last></author>
+      <author id="nikos-fakotakis"><first>Nikos</first><last>Fakotakis</last></author>
+      <author id="george-kokkinakis"><first>George</first><last>Kokkinakis</last></author>
       <url hash="837a425c">C00-2121</url>
       <bibkey>thanopoulos-etal-2000-automatic</bibkey>
     </paper>
@@ -916,16 +916,16 @@
     </paper>
     <paper id="123">
       <title>Word Re-ordering and <fixed-case>DP</fixed-case>-based Search in Statistical Machine Translation</title>
-      <author><first>Christoph</first><last>Tillmann</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="christoph-tillmann"><first>Christoph</first><last>Tillmann</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <url hash="910c9af9">C00-2123</url>
       <bibkey>tillmann-ney-2000-word</bibkey>
     </paper>
     <paper id="124">
       <title>Applying System Combination to Base Noun Phrase Identification</title>
-      <author><first>Erik F.</first><last>Tjong Kim Sang</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
-      <author><first>Herve</first><last>Dejean</last></author>
+      <author id="erik-tjong-kim-sang"><first>Erik F.</first><last>Tjong Kim Sang</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
+      <author id="herve-dejean"><first>Herve</first><last>Dejean</last></author>
       <author><first>Rob</first><last>Koeling</last></author>
       <author><first>Yuval</first><last>Krymolowski</last></author>
       <author><first>Vasin</first><last>Punyakanok</last></author>
@@ -951,7 +951,7 @@
     </paper>
     <paper id="127">
       <title>Toward the “At-a-glance” Summary: Phrase-representation Summarization Method</title>
-      <author><first>Yoshihiro</first><last>Ueda</last></author>
+      <author id="yoshihiro-ueda"><first>Yoshihiro</first><last>Ueda</last></author>
       <author><first>Mamiko</first><last>Oka</last></author>
       <author><first>Takahiro</first><last>Koyama</last></author>
       <author><first>Tadanobu</first><last>Miyauchi</last></author>
@@ -969,14 +969,14 @@
     <paper id="129">
       <title>Multi-Topic Multi-Document Summarization</title>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Koiti</first><last>Hasida</last></author>
+      <author id="koiti-hasida"><first>Koiti</first><last>Hasida</last></author>
       <url hash="b88b7434">C00-2129</url>
       <bibkey>utiyama-hasida-2000-multi</bibkey>
     </paper>
     <paper id="130">
       <title>Corpus-based Development and Evaluation of a System for Processing Definite Descriptions</title>
-      <author><first>Renata</first><last>Vieira</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="renata-vieira"><first>Renata</first><last>Vieira</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <url hash="807b42fa">C00-2130</url>
       <bibkey>vieira-poesio-2000-corpus</bibkey>
     </paper>
@@ -997,14 +997,14 @@
     <paper id="133">
       <title>Prosody and the Resolution of Pronominal Anaphora</title>
       <author><first>Maria</first><last>Wolters</last></author>
-      <author><first>Donna K.</first><last>Byron</last></author>
+      <author id="donna-byron"><first>Donna K.</first><last>Byron</last></author>
       <url hash="eddf195d">C00-2133</url>
       <bibkey>wolters-byron-2000-prosody</bibkey>
     </paper>
     <paper id="134">
       <title>Lexicalized Tree Automata-based Grammars for Translating Conversational Texts</title>
       <author><first>Kiyoshi</first><last>Yamabana</last></author>
-      <author><first>Shinichi</first><last>Ando</last></author>
+      <author id="shinichi-ando"><first>Shinichi</first><last>Ando</last></author>
       <author><first>Kiyomi</first><last>Mimura</last></author>
       <url hash="b77cb00c">C00-2134</url>
       <bibkey>yamabana-etal-2000-lexicalized</bibkey>
@@ -1012,14 +1012,14 @@
     <paper id="135">
       <title>Acquisition of Phrase-level Bilingual Correspondence using Dependency Structure</title>
       <author><first>Kaoru</first><last>Yamamoto</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <url hash="9afcdcb2">C00-2135</url>
       <bibkey>yamamoto-matsumoto-2000-acquisition</bibkey>
     </paper>
     <paper id="136">
       <title>Automatic Acquisition of Domain Knowledge for Information Extraction</title>
       <author><first>Roman</first><last>Yangarber</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <author><first>Pasi</first><last>Tapanainen</last></author>
       <author><first>Silja</first><last>Huttunen</last></author>
       <url hash="d725cbcd">C00-2136</url>
@@ -1027,7 +1027,7 @@
     </paper>
     <paper id="137">
       <title>More accurate tests for the statistical significance of result differences</title>
-      <author><first>Alexander</first><last>Yeh</last></author>
+      <author id="alexander-yeh"><first>Alexander</first><last>Yeh</last></author>
       <url hash="75ca2f1b">C00-2137</url>
       <bibkey>yeh-2000-accurate</bibkey>
     </paper>
@@ -1041,14 +1041,14 @@
     </paper>
     <paper id="139">
       <title><fixed-case>ABL</fixed-case>: Alignment-Based Learning</title>
-      <author><first>Menno</first><last>van Zaanen</last></author>
+      <author id="menno-van-zaanen"><first>Menno</first><last>van Zaanen</last></author>
       <url hash="2fa1f935">C00-2139</url>
       <bibkey>van-zaanen-2000-abl</bibkey>
     </paper>
     <paper id="140">
       <title><fixed-case>DIASUMM</fixed-case>: Flexible Summarization of Spontaneous Dialogues in Unrestricted Domains</title>
       <author><first>Klaus</first><last>Zechner</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <url hash="e46cd13e">C00-2140</url>
       <bibkey>zechner-waibel-2000-diasumm</bibkey>
     </paper>
@@ -1062,13 +1062,13 @@
       <title>Rapid Development of Translation Tools: Application to <fixed-case>P</fixed-case>ersian and <fixed-case>T</fixed-case>urkish</title>
       <author><first>Jan W.</first><last>Amtrup</last></author>
       <author><first>Karine</first><last>Megerdoomian</last></author>
-      <author><first>Remi</first><last>Zajac</last></author>
+      <author id="remi-zajac"><first>Remi</first><last>Zajac</last></author>
       <url hash="c407979d">C00-2142</url>
       <bibkey>amtrup-etal-2000-rapid</bibkey>
     </paper>
     <paper id="143">
       <title>Dependency Treebank for <fixed-case>R</fixed-case>ussian: Concept, Tools, Types of Information</title>
-      <author><first>Igor</first><last>Boguslavsky</last></author>
+      <author id="igor-boguslavsky"><first>Igor</first><last>Boguslavsky</last></author>
       <author><first>Svetlana</first><last>Grigorieva</last></author>
       <author><first>Nikolai</first><last>Grigoriev</last></author>
       <author><first>Leonid</first><last>Kreidlin</last></author>
@@ -1078,7 +1078,7 @@
     </paper>
     <paper id="144">
       <title>Thistle and Interarbora</title>
-      <author><first>Jo</first><last>Calder</last></author>
+      <author id="jo-calder"><first>Jo</first><last>Calder</last></author>
       <url hash="e0f58434">C00-2144</url>
       <bibkey>calder-2000-thistle</bibkey>
     </paper>
@@ -1090,25 +1090,25 @@
     </paper>
     <paper id="146">
       <title>Structural disambiguation of morpho-syntactic categorial parsing for <fixed-case>K</fixed-case>orean</title>
-      <author><first>Jeongwon</first><last>Cha</last></author>
-      <author><first>Geunbae</first><last>Lee</last></author>
+      <author id="jeong-won-cha"><first>Jeongwon</first><last>Cha</last></author>
+      <author id="gary-geunbae-lee"><first>Geunbae</first><last>Lee</last></author>
       <url hash="c83d6c1e">C00-2146</url>
       <bibkey>cha-lee-2000-structural</bibkey>
     </paper>
     <paper id="147">
       <title>The Week at a Glance - Cross-language Cross-document Information Extraction and Translation</title>
-      <author><first>Jim</first><last>Cowie</last></author>
+      <author id="jim-cowie"><first>Jim</first><last>Cowie</last></author>
       <author><first>Yevgeny</first><last>Ludovik</last></author>
       <author><first>Hugo</first><last>Molina-Salgado</last></author>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <url hash="5b7b11ae">C00-2147</url>
       <bibkey>cowie-etal-2000-week</bibkey>
     </paper>
     <paper id="148">
       <title>Integrating compositional semantics into a verb lexicon</title>
-      <author><first>Hoa Trang</first><last>Dang</last></author>
-      <author><first>Karin</first><last>Kipper</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="hoa-trang-dang"><first>Hoa Trang</first><last>Dang</last></author>
+      <author id="karin-kipper"><first>Karin</first><last>Kipper</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <url hash="9ab3e3d9">C00-2148</url>
       <bibkey>dang-etal-2000-integrating</bibkey>
     </paper>
@@ -1121,15 +1121,15 @@
     </paper>
     <paper id="150">
       <title>Language Identification in Unknown Signals</title>
-      <author><first>John</first><last>Elliott</last></author>
-      <author><first>Eric</first><last>Atwell</last></author>
+      <author id="john-elliott"><first>John</first><last>Elliott</last></author>
+      <author id="eric-atwell"><first>Eric</first><last>Atwell</last></author>
       <author><first>Bill</first><last>Whyte</last></author>
       <url hash="eece5f40">C00-2150</url>
       <bibkey>elliott-etal-2000-language</bibkey>
     </paper>
     <paper id="151">
       <title>An Experiment On Incremental Analysis Using Robust Parsing Techniques</title>
-      <author><first>Kilian</first><last>Foth</last></author>
+      <author id="kilian-a-foth"><first>Kilian</first><last>Foth</last></author>
       <author><first>Wolfgang</first><last>Menzel</last></author>
       <author><first>Horia F.</first><last>Pop</last></author>
       <author><first>Ingo</first><last>Schroder</last></author>
@@ -1138,7 +1138,7 @@
     </paper>
     <paper id="152">
       <title>An Integrated Architecture for Example-Based Machine Translation</title>
-      <author><first>Alexander</first><last>Franz</last></author>
+      <author id="alexander-franz"><first>Alexander</first><last>Franz</last></author>
       <author><first>Keiko</first><last>Horiguchi</last></author>
       <author><first>Lei</first><last>Duan</last></author>
       <author><first>Doris</first><last>Ecker</last></author>
@@ -1157,14 +1157,14 @@
     <paper id="154">
       <title><fixed-case>W</fixed-case>eb<fixed-case>DIPLOMAT</fixed-case>: A Web-Based Interactive Machine Translation System</title>
       <author><first>Christopher</first><last>Hogan</last></author>
-      <author><first>Robert</first><last>Frederking</last></author>
+      <author id="robert-frederking"><first>Robert</first><last>Frederking</last></author>
       <url hash="19be1634">C00-2154</url>
       <bibkey>hogan-frederking-2000-webdiplomat</bibkey>
     </paper>
     <paper id="155">
       <title>An <fixed-case>HPSG</fixed-case>-to-<fixed-case>CFG</fixed-case> Approximation of <fixed-case>J</fixed-case>apanese</title>
-      <author><first>Bernd</first><last>Kiefer</last></author>
-      <author><first>Hans-Ulrich</first><last>Krieger</last></author>
+      <author id="bernd-kiefer"><first>Bernd</first><last>Kiefer</last></author>
+      <author id="hans-ulrich-krieger"><first>Hans-Ulrich</first><last>Krieger</last></author>
       <author><first>Melanie</first><last>Siegel</last></author>
       <url hash="78e8042d">C00-2155</url>
       <bibkey>kiefer-etal-2000-hpsg</bibkey>
@@ -1172,7 +1172,7 @@
     <paper id="156">
       <title>Decision-Tree based Error Correction for Statistical Phrase Break Prediction in <fixed-case>K</fixed-case>orean</title>
       <author><first>Byeongchang</first><last>Kim</last></author>
-      <author><first>Geunbae</first><last>Lee</last></author>
+      <author id="gary-geunbae-lee"><first>Geunbae</first><last>Lee</last></author>
       <url hash="1c42f160">C00-2156</url>
       <bibkey>kim-lee-2000-decision</bibkey>
     </paper>
@@ -1192,7 +1192,7 @@
     </paper>
     <paper id="159">
       <title>A Bootstrapping Method for Extracting Bilingual Text Pairs</title>
-      <author><first>Hiroshi</first><last>Masuichi</last></author>
+      <author id="hiroshi-masuichi"><first>Hiroshi</first><last>Masuichi</last></author>
       <author><first>Raymond</first><last>Flournoy</last></author>
       <author><first>Stefan</first><last>Kaufmann</last></author>
       <author><first>Stanley</first><last>Peters</last></author>
@@ -1201,7 +1201,7 @@
     </paper>
     <paper id="160">
       <title>Producing More Readable Extracts by Revising Them</title>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <url hash="76899727">C00-2160</url>
       <bibkey>okumura-2000-producing</bibkey>
     </paper>
@@ -1214,15 +1214,15 @@
     </paper>
     <paper id="162">
       <title>Improving <fixed-case>SMT</fixed-case> quality with morpho-syntactic analysis</title>
-      <author><first>Sonja</first><last>Nießen</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="sonja-niessen"><first>Sonja</first><last>Nießen</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <url hash="602c0469">C00-2162</url>
       <bibkey>niessen-ney-2000-improving</bibkey>
     </paper>
     <paper id="163">
       <title>A Comparison of Alignment Models for Statistical Machine Translation</title>
-      <author><first>Franz Josef</first><last>Och</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="franz-josef-och"><first>Franz Josef</first><last>Och</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <url hash="558e2be0">C00-2163</url>
       <bibkey>och-ney-2000-comparison</bibkey>
     </paper>
@@ -1237,8 +1237,8 @@
       <title><fixed-case>KCAT</fixed-case>: A <fixed-case>K</fixed-case>orean Corpus Annotating Tool Minimizing Human Intervention</title>
       <author><first>Won-He</first><last>Ryu</last></author>
       <author><first>Jin-Dong</first><last>Kim</last></author>
-      <author><first>Hae-Chang</first><last>Rim</last></author>
-      <author><first>Heui-Seok</first><last>Lim</last></author>
+      <author id="hae-chang-rim"><first>Hae-Chang</first><last>Rim</last></author>
+      <author id="heui-seok-lim"><first>Heui-Seok</first><last>Lim</last></author>
       <url hash="6f83a4b0">C00-2165</url>
       <bibkey>ryu-etal-2000-kcat</bibkey>
     </paper>
@@ -1258,7 +1258,7 @@
     <paper id="168">
       <title>Acquisition of a Language Computational Model for <fixed-case>NLP</fixed-case></title>
       <author><first>Svetlana</first><last>Sheremetyeva</last></author>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <url hash="8b58a15a">C00-2168</url>
       <bibkey>sheremetyeva-nirenburg-2000-acquisition</bibkey>
     </paper>
@@ -1286,7 +1286,7 @@
     <paper id="171">
       <title>Incorporating Metaphonemes in a Multilingual Lexicon</title>
       <author><first>Carole</first><last>Tiberius</last></author>
-      <author><first>Lynne</first><last>Cahill</last></author>
+      <author id="lynne-cahill"><first>Lynne</first><last>Cahill</last></author>
       <url hash="9e8447d6">C00-2171</url>
       <bibkey>tiberius-cahill-2000-incorporating</bibkey>
     </paper>
@@ -1301,7 +1301,7 @@
       <title><fixed-case>XMLT</fixed-case>rans: a <fixed-case>J</fixed-case>ava-based <fixed-case>XML</fixed-case> Transformation Language for Structured Data</title>
       <author><first>Derek</first><last>Walker</last></author>
       <author><first>Dominique</first><last>Petitpierre</last></author>
-      <author><first>Susan</first><last>Armstrong</last></author>
+      <author id="susan-armstrong"><first>Susan</first><last>Armstrong</last></author>
       <url hash="14f8ab68">C00-2173</url>
       <bibkey>walker-etal-2000-xmltrans</bibkey>
     </paper>
@@ -1309,13 +1309,13 @@
       <title><fixed-case>C</fixed-case>hinese Generation in a Spoken Dialogue Translation System</title>
       <author><first>Hua</first><last>Wu</last></author>
       <author><first>Taiyi</first><last>Huang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <url hash="c7b99286">C00-2174</url>
       <bibkey>wu-etal-2000-chinese</bibkey>
     </paper>
     <paper id="175">
       <title>Comparing two trainable grammatical relations finders</title>
-      <author><first>Alexander</first><last>Yeh</last></author>
+      <author id="alexander-yeh"><first>Alexander</first><last>Yeh</last></author>
       <url hash="f36a2c61">C00-2175</url>
       <bibkey>yeh-2000-comparing</bibkey>
     </paper>
diff --git a/data/xml/C02.xml b/data/xml/C02.xml
index 6be28ef8d8..cf632bff82 100644
--- a/data/xml/C02.xml
+++ b/data/xml/C02.xml
@@ -20,16 +20,16 @@
     </paper>
     <paper id="2">
       <title>A Cheap and Fast Way to Build Useful Translation Lexicons</title>
-      <author><first>Dan</first><last>Tufis</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufis</last></author>
       <url hash="ee31dc7e">C02-1002</url>
       <bibkey>tufis-2002-cheap</bibkey>
     </paper>
     <paper id="3">
       <title>Learning <fixed-case>C</fixed-case>hinese Bracketing Knowledge Based on a Bilingual Language Model</title>
-      <author><first>Yajuan</first><last>Lü</last></author>
+      <author id="yajuan-lu"><first>Yajuan</first><last>Lü</last></author>
       <author><first>Sheng</first><last>Li</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
-      <author><first>Muyun</first><last>Yang</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
+      <author id="muyun-yang"><first>Muyun</first><last>Yang</last></author>
       <url hash="b72b0dc1">C02-1003</url>
       <bibkey>lu-etal-2002-learning</bibkey>
     </paper>
@@ -78,7 +78,7 @@
       <author><first>Wei</first><last>Wang</last></author>
       <author><first>Ming</first><last>Zhou</last></author>
       <author><first>Jin-Xia</first><last>Huang</last></author>
-      <author><first>Chang-Ning</first><last>Huang</last></author>
+      <author id="changning-huang"><first>Chang-Ning</first><last>Huang</last></author>
       <url hash="1545f3fd">C02-1010</url>
       <bibkey>wang-etal-2002-structure</bibkey>
     </paper>
@@ -95,22 +95,22 @@
       <author><first>Jianfeng</first><last>Gao</last></author>
       <author><first>Lei</first><last>Zhang</last></author>
       <author><first>Ming</first><last>Zhou</last></author>
-      <author><first>Changning</first><last>Huang</last></author>
+      <author id="changning-huang"><first>Changning</first><last>Huang</last></author>
       <url hash="48c63ed6">C02-1012</url>
       <bibkey>sun-etal-2002-chinese</bibkey>
     </paper>
     <paper id="13">
       <title>High Precision Extraction of Grammatical Relations</title>
-      <author><first>John</first><last>Carroll</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <url hash="a193cba9">C02-1013</url>
       <bibkey>carroll-briscoe-2002-high</bibkey>
     </paper>
     <paper id="14">
       <title>Semiautomatic Labelling of Semantic Features</title>
-      <author><first>Arantza</first><last>Díaz de Ilarraza</last></author>
-      <author><first>Aingeru</first><last>Mayor</last></author>
-      <author><first>Kepa</first><last>Sarasola</last></author>
+      <author id="arantza-diaz-de-ilarraza"><first>Arantza</first><last>Díaz de Ilarraza</last></author>
+      <author id="aingeru-mayor"><first>Aingeru</first><last>Mayor</last></author>
+      <author id="kepa-sarasola"><first>Kepa</first><last>Sarasola</last></author>
       <url hash="21fdcbc2">C02-1014</url>
       <bibkey>diaz-de-ilarraza-etal-2002-semiautomatic</bibkey>
     </paper>
@@ -128,8 +128,8 @@
     </paper>
     <paper id="17">
       <title>Corpus-based Generation of Numeral Classifier using Phrase Alignment</title>
-      <author><first>Michael</first><last>Paul</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="michael-paul"><first>Michael</first><last>Paul</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Seiichi</first><last>Yamamoto</last></author>
       <url hash="1e606d32">C02-1017</url>
       <bibkey>paul-etal-2002-corpus</bibkey>
@@ -144,7 +144,7 @@
       <title>The Grammatical Function Analysis between <fixed-case>K</fixed-case>orean Adnoun Clause and Noun Phrase by Using Support Vector Machines</title>
       <author><first>Songwook</first><last>Lee</last></author>
       <author><first>Tae-Yeoub</first><last>Jang</last></author>
-      <author><first>Jungyun</first><last>Seo</last></author>
+      <author id="jungyun-seo"><first>Jungyun</first><last>Seo</last></author>
       <url hash="889d0f13">C02-1019</url>
       <bibkey>lee-etal-2002-grammatical</bibkey>
     </paper>
@@ -157,8 +157,8 @@
     </paper>
     <paper id="21">
       <title>(Semi-)Automatic Detection of Errors in <fixed-case>P</fixed-case>o<fixed-case>S</fixed-case>-Tagged Corpora</title>
-      <author><first>Pavel</first><last>Kvĕtoň</last></author>
-      <author><first>Karel</first><last>Oliva</last></author>
+      <author id="pavel-kveton"><first>Pavel</first><last>Kvĕtoň</last></author>
+      <author id="karel-oliva"><first>Karel</first><last>Oliva</last></author>
       <url hash="9c59e0e5">C02-1021</url>
       <bibkey>kveton-oliva-2002-semi</bibkey>
     </paper>
@@ -189,20 +189,20 @@
     </paper>
     <paper id="26">
       <title>The Effectiveness of Dictionary and Web-Based Answer Reranking</title>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <url hash="9d1337f9">C02-1026</url>
       <bibkey>lin-2002-effectiveness</bibkey>
     </paper>
     <paper id="27">
       <title>Shallow Language Processing Architecture for <fixed-case>B</fixed-case>ulgarian</title>
-      <author><first>Hristo</first><last>Tanev</last></author>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="hristo-tanev"><first>Hristo</first><last>Tanev</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <url hash="132f27d0">C02-1027</url>
       <bibkey>tanev-mitkov-2002-shallow</bibkey>
     </paper>
     <paper id="28">
       <title>Parsing Mildly Context-Sensitive Languages with Thread Automata</title>
-      <author><first>Éric</first><last>Villemonte de la Clergerie</last></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Éric</first><last>Villemonte de la Clergerie</last></author>
       <url hash="e4994882">C02-1028</url>
       <bibkey>villemonte-de-la-clergerie-2002-parsing</bibkey>
     </paper>
@@ -214,24 +214,24 @@
     </paper>
     <paper id="30">
       <title>The Simple Core and the Complex Periphery of Natural Language - a Formal and a Computational View</title>
-      <author><first>Petr</first><last>Sgall</last></author>
-      <author><first>Alena</first><last>Bŏhmová</last></author>
+      <author id="petr-sgall"><first>Petr</first><last>Sgall</last></author>
+      <author id="alena-bohmova"><first>Alena</first><last>Bŏhmová</last></author>
       <url hash="b8bb4cf9">C02-1030</url>
       <bibkey>sgall-bohmova-2002-simple</bibkey>
     </paper>
     <paper id="31">
       <title>The <fixed-case>C</fixed-case>hinese Aspect System and its Semantic Interpretation</title>
       <author><first>Guowen</first><last>Yang</last></author>
-      <author><first>John A.</first><last>Bateman</last></author>
+      <author id="john-bateman"><first>John A.</first><last>Bateman</last></author>
       <url hash="9f054ee4">C02-1031</url>
       <bibkey>yang-bateman-2002-chinese</bibkey>
     </paper>
     <paper id="32">
       <title>Improving Alignment Quality in Statistical Machine Translation Using Context-dependent Maximum Entropy Models</title>
-      <author><first>Ismael García</first><last>Varea</last></author>
-      <author><first>Franz J.</first><last>Och</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="ismael-garcia-varea"><first>Ismael García</first><last>Varea</last></author>
+      <author id="franz-josef-och"><first>Franz J.</first><last>Och</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <url hash="ae5ce136">C02-1032</url>
       <bibkey>varea-etal-2002-improving</bibkey>
     </paper>
@@ -250,17 +250,17 @@
     </paper>
     <paper id="35">
       <title>Semantics-based Representation for Multimodal Interpretation in Conversational Systems</title>
-      <author><first>Joyce</first><last>Chai</last></author>
+      <author id="joyce-chai"><first>Joyce</first><last>Chai</last></author>
       <url hash="52f18c29">C02-1035</url>
       <bibkey>chai-2002-semantics</bibkey>
     </paper>
     <paper id="36">
       <title><fixed-case>E</fixed-case>xtraposition: A Case Study in <fixed-case>G</fixed-case>erman Sentence Realization</title>
       <author><first>Michael</first><last>Gamon</last></author>
-      <author><first>Eric</first><last>Ringger</last></author>
+      <author id="eric-ringger"><first>Eric</first><last>Ringger</last></author>
       <author><first>Zhu</first><last>Zhang</last></author>
-      <author><first>Robert</first><last>Moore</last></author>
-      <author><first>Simon</first><last>Corston-Oliver</last></author>
+      <author id="robert-c-moore"><first>Robert</first><last>Moore</last></author>
+      <author id="simon-corston-oliver"><first>Simon</first><last>Corston-Oliver</last></author>
       <url hash="0c5f3fa8">C02-1036</url>
       <bibkey>gamon-etal-2002-extraposition</bibkey>
     </paper>
@@ -279,7 +279,7 @@
     </paper>
     <paper id="39">
       <title>Instance Based Learning with Automatic Feature Selection Applied to Word Sense Disambiguation</title>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <url hash="c68eee4f">C02-1039</url>
       <bibkey>mihalcea-2002-instance</bibkey>
     </paper>
@@ -308,9 +308,9 @@
     </paper>
     <paper id="42">
       <title>Using Knowledge to Facilitate Factoid Answer Pinpointing</title>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <author><first>Ulf</first><last>Hermjakob</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <author><first>Deepak</first><last>Ravichandran</last></author>
       <url hash="60a0d2a9">C02-1042</url>
       <bibkey>hovy-etal-2002-using</bibkey>
@@ -324,20 +324,20 @@
     <paper id="44">
       <title>Considerations of Linking <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et with <fixed-case>MRD</fixed-case></title>
       <author><first>Changhua</first><last>Yang</last></author>
-      <author><first>Sue J.</first><last>Ker</last></author>
+      <author id="sue-j-ker"><first>Sue J.</first><last>Ker</last></author>
       <url hash="349cca20">C02-1044</url>
       <bibkey>yang-ker-2002-considerations</bibkey>
     </paper>
     <paper id="45">
       <title>A Method of Cluster-Based Indexing of Textual Data</title>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <url hash="1e098519">C02-1045</url>
       <bibkey>aizawa-2002-method</bibkey>
     </paper>
     <paper id="46">
       <title>Translation Selection through Source Word Sense Disambiguation and Target Word Selection</title>
       <author><first>Hyun Ah</first><last>Lee</last></author>
-      <author><first>Gil Chang</first><last>Kim</last></author>
+      <author id="gil-chang-kim"><first>Gil Chang</first><last>Kim</last></author>
       <url hash="94cb972f">C02-1046</url>
       <bibkey>lee-kim-2002-translation</bibkey>
     </paper>
@@ -358,15 +358,15 @@
     </paper>
     <paper id="49">
       <title>Unknown Word Extraction for <fixed-case>C</fixed-case>hinese Documents</title>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
-      <author><first>Wei-Yun</first><last>Ma</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="wei-yun-ma"><first>Wei-Yun</first><last>Ma</last></author>
       <url hash="468af7bc">C02-1049</url>
       <bibkey>chen-ma-2002-unknown</bibkey>
     </paper>
     <paper id="50">
       <title>Bidirectional Decoding for Statistical Machine Translation</title>
       <author><first>Taro</first><last>Watanabe</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <url hash="1f91158b">C02-1050</url>
       <bibkey>watanabe-sumita-2002-bidirectional</bibkey>
     </paper>
@@ -391,7 +391,7 @@
       <author><first>Tsutomu</first><last>Hirao</last></author>
       <author><first>Hideki</first><last>Isozaki</last></author>
       <author><first>Eisaku</first><last>Maeda</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <url hash="97247392">C02-1053</url>
       <bibkey>hirao-etal-2002-extracting</bibkey>
     </paper>
@@ -406,7 +406,7 @@
       <title>Covering Ambiguity Resolution in <fixed-case>C</fixed-case>hinese Word Segmentation Based on Contextual Information</title>
       <author><first>Xiao</first><last>Luo</last></author>
       <author><first>Maosong</first><last>Sun</last></author>
-      <author><first>Benjamin K.</first><last>Tsou</last></author>
+      <author id="benjamin-k-tsou"><first>Benjamin K.</first><last>Tsou</last></author>
       <url hash="367e76b7">C02-1055</url>
       <bibkey>luo-etal-2002-covering</bibkey>
     </paper>
@@ -419,9 +419,9 @@
     </paper>
     <paper id="57">
       <title>An Automatic Evaluation Method for Localization Oriented Lexicalised <fixed-case>EBMT</fixed-case> System</title>
-      <author><first>Jianmin</first><last>Yao</last></author>
+      <author id="jianmin-yao"><first>Jianmin</first><last>Yao</last></author>
       <author><first>Ming</first><last>Zhou</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <author><first>Hao</first><last>Yu</last></author>
       <author><first>Sheng</first><last>Li</last></author>
       <url hash="a54766d3">C02-1057</url>
@@ -504,20 +504,20 @@
     </paper>
     <paper id="68">
       <title>Learning Grammars for Different Parsing Tasks by Partition Search</title>
-      <author><first>Anja</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anja</first><last>Belz</last></author>
       <url hash="e0b79999">C02-1068</url>
       <bibkey>belz-2002-learning</bibkey>
     </paper>
     <paper id="69">
       <title>Effective Structural Inference for Large <fixed-case>XML</fixed-case> Documents</title>
       <author><first>Jason</first><last>Sankey</last></author>
-      <author><first>Raymond K.</first><last>Wong</last></author>
+      <author id="raymond-wong"><first>Raymond K.</first><last>Wong</last></author>
       <url hash="b20f7dac">C02-1069</url>
       <bibkey>sankey-wong-2002-effective</bibkey>
     </paper>
     <paper id="70">
       <title>Inducing Information Extraction Systems for New Languages via Cross-language Projection</title>
-      <author><first>Ellen</first><last>Riloff</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
       <author><first>Charles</first><last>Schafer</last></author>
       <author><first>David</first><last>Yarowsky</last></author>
       <url hash="93196d85">C02-1070</url>
@@ -525,7 +525,7 @@
     </paper>
     <paper id="71">
       <title>Integrating Shallow Linguistic Processing into a Unification-based <fixed-case>S</fixed-case>panish Grammar</title>
-      <author><first>Montserrat</first><last>Marimon</last></author>
+      <author id="montserrat-marimon"><first>Montserrat</first><last>Marimon</last></author>
       <url hash="d03738d3">C02-1071</url>
       <bibkey>marimon-2002-integrating</bibkey>
     </paper>
@@ -540,7 +540,7 @@
     <paper id="73">
       <title>Meta-evaluation of Summaries in a Cross-lingual Environment using Content-based Metrics</title>
       <author><first>Horacio</first><last>Saggion</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <author><first>Simone</first><last>Teufel</last></author>
       <author><first>Wai</first><last>Lam</last></author>
       <url hash="5083b764">C02-1073</url>
@@ -549,14 +549,14 @@
     <paper id="74">
       <title>Text Categorization using Feature Projections</title>
       <author><first>Youngjoong</first><last>Ko</last></author>
-      <author><first>Jungyun</first><last>Seo</last></author>
+      <author id="jungyun-seo"><first>Jungyun</first><last>Seo</last></author>
       <url hash="f0ec04ca">C02-1074</url>
       <bibkey>ko-seo-2002-text</bibkey>
     </paper>
     <paper id="75">
       <title>A Novel Disambiguation Method for Unification-Based Grammars Using Probabilistic Context-Free Approximations</title>
-      <author><first>Bernd</first><last>Kiefer</last></author>
-      <author><first>Hans-Ulrich</first><last>Krieger</last></author>
+      <author id="bernd-kiefer"><first>Bernd</first><last>Kiefer</last></author>
+      <author id="hans-ulrich-krieger"><first>Hans-Ulrich</first><last>Krieger</last></author>
       <author><first>Detlef</first><last>Prescher</last></author>
       <url hash="713177be">C02-1075</url>
       <bibkey>kiefer-etal-2002-novel</bibkey>
@@ -565,7 +565,7 @@
       <title>Using Language and Translation Models to Select the Best among Outputs from Multiple <fixed-case>MT</fixed-case> Systems</title>
       <author><first>Yasuhiro</first><last>Akiba</last></author>
       <author><first>Taro</first><last>Watanabe</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <url hash="e2d892af">C02-1076</url>
       <bibkey>akiba-etal-2002-using</bibkey>
     </paper>
@@ -587,14 +587,14 @@
     <paper id="79">
       <title>Best Analysis Selection in Inflectional Languages</title>
       <author><first>Aleš</first><last>Horák</last></author>
-      <author><first>Pavel</first><last>Smrž</last></author>
+      <author id="pavel-smrz"><first>Pavel</first><last>Smrž</last></author>
       <url hash="660963fb">C02-1079</url>
       <bibkey>horak-smrz-2002-best</bibkey>
     </paper>
     <paper id="80">
       <title>An Agent-based Approach to <fixed-case>C</fixed-case>hinese Named Entity Recognition</title>
       <author><first>Shiren</first><last>Ye</last></author>
-      <author><first>Tat-Seng</first><last>Chua</last></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last></author>
       <author><first>Jimin</first><last>Liu</last></author>
       <url hash="71303642">C02-1080</url>
       <bibkey>ye-etal-2002-agent</bibkey>
@@ -608,7 +608,7 @@
     <paper id="82">
       <title>Topic Detection Based on Dialogue History</title>
       <author><first>Takayuki</first><last>Nakata</last></author>
-      <author><first>Shinichi</first><last>Ando</last></author>
+      <author id="shinichi-ando"><first>Shinichi</first><last>Ando</last></author>
       <author><first>Akitoshi</first><last>Okumura</last></author>
       <url hash="d82b993b">C02-1082</url>
       <bibkey>nakata-etal-2002-topic</bibkey>
@@ -617,8 +617,8 @@
       <title>A Methodology for Terminology-based Knowledge Acquisition and Integration</title>
       <author><first>Hideki</first><last>Mima</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
-      <author><first>Goran</first><last>Nenadic</last></author>
-      <author><first>Jun-Ichi</first><last>Tsujii</last></author>
+      <author id="goran-nenadic"><first>Goran</first><last>Nenadic</last></author>
+      <author id="junichi-tsujii"><first>Jun-Ichi</first><last>Tsujii</last></author>
       <url hash="54f5f1ef">C02-1083</url>
       <bibkey>mima-etal-2002-methodology</bibkey>
     </paper>
@@ -639,9 +639,9 @@
     </paper>
     <paper id="86">
       <title>Implicit Ambiguity Resolution Using Incremental Clustering in <fixed-case>K</fixed-case>orean-to-<fixed-case>E</fixed-case>nglish Cross-Language Information Retrieval</title>
-      <author><first>Kyung-Soon</first><last>Lee</last></author>
+      <author id="kyung-soon-lee"><first>Kyung-Soon</first><last>Lee</last></author>
       <author><first>Kyo</first><last>Kageura</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <url hash="e3b02c5e">C02-1086</url>
       <bibkey>lee-etal-2002-implicit</bibkey>
     </paper>
@@ -656,14 +656,14 @@
       <title>Unsupervised Named Entity Classification Models and their Ensembles</title>
       <author><first>Jae-Ho</first><last>Kim</last></author>
       <author><first>In-Ho</first><last>Kang</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <url hash="60a98313">C02-1088</url>
       <bibkey>kim-etal-2002-unsupervised</bibkey>
     </paper>
     <paper id="89">
       <title>Applying an <fixed-case>NVEF</fixed-case> Word-Pair Identifier to the <fixed-case>C</fixed-case>hinese Syllable-to-Word Conversion Problem</title>
       <author><first>Jia-Lin</first><last>Tsai</last></author>
-      <author><first>Wen-Lian</first><last>Hsu</last></author>
+      <author id="wen-lian-hsu"><first>Wen-Lian</first><last>Hsu</last></author>
       <url hash="0dd3e436">C02-1089</url>
       <bibkey>tsai-hsu-2002-applying</bibkey>
     </paper>
@@ -676,7 +676,7 @@
     </paper>
     <paper id="91">
       <title>Using Syntactic Analysis to Increase Efficiency in Visualizing Text Collections</title>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <author><first>Paola</first><last>Merlo</last></author>
       <author><first>Ivan</first><last>Petroff</last></author>
       <author><first>Gerold</first><last>Schneider</last></author>
@@ -712,16 +712,16 @@
     </paper>
     <paper id="96">
       <title>Wordform- and Class-based Prediction of the Components of <fixed-case>G</fixed-case>erman Nominal Compounds in an <fixed-case>AAC</fixed-case> System</title>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <author><first>Johannes</first><last>Matiasek</last></author>
-      <author><first>Harald</first><last>Trost</last></author>
+      <author id="harald-trost"><first>Harald</first><last>Trost</last></author>
       <url hash="90d0177c">C02-1096</url>
       <bibkey>baroni-etal-2002-wordform</bibkey>
     </paper>
     <paper id="97">
       <title>Word Sense Disambiguation using Static and Dynamic Sense Vectors</title>
-      <author><first>Jong-Hoon</first><last>Oh</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="jong-hoon-oh"><first>Jong-Hoon</first><last>Oh</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <url hash="1af85e1c">C02-1097</url>
       <bibkey>oh-choi-2002-word</bibkey>
     </paper>
@@ -735,8 +735,8 @@
     </paper>
     <paper id="99">
       <title>An <fixed-case>E</fixed-case>nglish-<fixed-case>K</fixed-case>orean Transliteration Model Using Pronunciation and Contextual Rules</title>
-      <author><first>Jong-Hoon</first><last>Oh</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="jong-hoon-oh"><first>Jong-Hoon</first><last>Oh</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <url hash="d33b6b28">C02-1099</url>
       <bibkey>oh-choi-2002-english</bibkey>
     </paper>
@@ -744,20 +744,20 @@
       <title>Lenient Default Unification for Robust Processing within Unification Based Grammar Formalisms</title>
       <author><first>Takashi</first><last>Ninomiya</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
-      <author><first>Jun-Ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun-Ichi</first><last>Tsujii</last></author>
       <url hash="0f7c8836">C02-1100</url>
       <bibkey>ninomiya-etal-2002-lenient</bibkey>
     </paper>
     <paper id="101">
       <title>Detecting Errors in Corpora Using Support Vector Machines</title>
       <author><first>Tetsuji</first><last>Nakagawa</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <url hash="e31d005e">C02-1101</url>
       <bibkey>nakagawa-matsumoto-2002-detecting</bibkey>
     </paper>
     <paper id="102">
       <title>Learning How to Answer Questions Using Trivia Games</title>
-      <author><first>Gideon S.</first><last>Mann</last></author>
+      <author id="gideon-mann"><first>Gideon S.</first><last>Mann</last></author>
       <url hash="87c2097f">C02-1102</url>
       <bibkey>mann-2002-learning</bibkey>
     </paper>
@@ -765,7 +765,7 @@
       <title>Automatic Text Categorization using the Importance of Sentences</title>
       <author><first>Youngjoong</first><last>Ko</last></author>
       <author><first>Jinwoo</first><last>Park</last></author>
-      <author><first>Jungyun</first><last>Seo</last></author>
+      <author id="jungyun-seo"><first>Jungyun</first><last>Seo</last></author>
       <url hash="ca4633b9">C02-1103</url>
       <bibkey>ko-etal-2002-automatic</bibkey>
     </paper>
@@ -773,14 +773,14 @@
       <title>From Shallow to Deep Parsing Using Constraint Satisfaction</title>
       <author><first>Jean-Marie</first><last>Balfourier</last></author>
       <author><first>Philippe</first><last>Blache</last></author>
-      <author><first>Tristan</first><last>van Rullen</last></author>
+      <author id="tristan-vanrullen"><first>Tristan</first><last>van Rullen</last></author>
       <url hash="06842f2d">C02-1104</url>
       <bibkey>balfourier-etal-2002-shallow</bibkey>
     </paper>
     <paper id="105">
       <title>From Trees to Predicate-argument Structures</title>
       <author><first>Maria</first><last>Liakata</last></author>
-      <author><first>Stephen</first><last>Pulman</last></author>
+      <author id="stephen-pulman"><first>Stephen</first><last>Pulman</last></author>
       <url hash="4039af43">C02-1105</url>
       <bibkey>liakata-pulman-2002-trees</bibkey>
     </paper>
@@ -831,9 +831,9 @@
     </paper>
     <paper id="112">
       <title>Syntactic Features for High Precision Word Sense Disambiguation</title>
-      <author><first>David</first><last>Martínez</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="david-martinez"><first>David</first><last>Martínez</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <url hash="54b742b2">C02-1112</url>
       <bibkey>martinez-etal-2002-syntactic</bibkey>
     </paper>
@@ -854,8 +854,8 @@
     </paper>
     <paper id="115">
       <title>A Maximum Entropy-based Word Sense Disambiguation System</title>
-      <author><first>Armando</first><last>Suárez</last></author>
-      <author><first>Manuel</first><last>Palomar</last></author>
+      <author id="armando-suarez"><first>Armando</first><last>Suárez</last></author>
+      <author id="manuel-palomar"><first>Manuel</first><last>Palomar</last></author>
       <url hash="8f9bb9fa">C02-1115</url>
       <bibkey>suarez-palomar-2002-maximum</bibkey>
     </paper>
@@ -868,7 +868,7 @@
     </paper>
     <paper id="117">
       <title>Extension of <fixed-case>Z</fixed-case>ipf’s Law to Words and Phrases</title>
-      <author><first>Le Quan</first><last>Ha</last></author>
+      <author id="le-quan-ha"><first>Le Quan</first><last>Ha</last></author>
       <author id="elvira-i-sicilia-garcia"><first>E. I.</first><last>Sicilia-Garcia</last></author>
       <author><first>Ji</first><last>Ming</last></author>
       <author id="francis-j-smith"><first>F. J.</first><last>Smith</last></author>
@@ -877,7 +877,7 @@
     </paper>
     <paper id="118">
       <title>Can Subcategorization Help a Statistical Dependency Parser?</title>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <url hash="17087fa3">C02-1118</url>
       <bibkey>zeman-2002-subcategorization</bibkey>
     </paper>
@@ -911,7 +911,7 @@
     <paper id="123">
       <title>Natural Language Interpretations for Heterogeneous Database Access</title>
       <author><first>Hodong</first><last>Lee</last></author>
-      <author><first>Jong C.</first><last>Park</last></author>
+      <author id="jong-c-park"><first>Jong C.</first><last>Park</last></author>
       <url hash="eaee80f8">C02-1123</url>
       <bibkey>lee-park-2002-natural</bibkey>
     </paper>
@@ -932,14 +932,14 @@
     <paper id="126">
       <title>Recovering Latent Information in Treebanks</title>
       <author><first>David</first><last>Chiang</last></author>
-      <author><first>Daniel M.</first><last>Bikel</last></author>
+      <author id="daniel-m-bikel"><first>Daniel M.</first><last>Bikel</last></author>
       <url hash="c8c554d8">C02-1126</url>
       <bibkey>chiang-bikel-2002-recovering</bibkey>
     </paper>
     <paper id="127">
       <title>Location Normalization for Information Extraction</title>
-      <author><first>Huifeng</first><last>Li</last></author>
-      <author><first>Rohini K.</first><last>Srihari</last></author>
+      <author id="huifeng-li"><first>Huifeng</first><last>Li</last></author>
+      <author id="rohini-k-srihari"><first>Rohini K.</first><last>Srihari</last></author>
       <author><first>Cheng</first><last>Niu</last></author>
       <author><first>Wei</first><last>Li</last></author>
       <url hash="5400a0c0">C02-1127</url>
@@ -953,22 +953,22 @@
     </paper>
     <paper id="129">
       <title>Decision Trees as Explicit Domain Term Definitions</title>
-      <author><first>Roberto</first><last>Basili</last></author>
-      <author><first>Maria Teresa</first><last>Pazienza</last></author>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
+      <author id="maria-teresa-pazienza"><first>Maria Teresa</first><last>Pazienza</last></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last></author>
       <url hash="2e2697cc">C02-1129</url>
       <bibkey>basili-etal-2002-decision</bibkey>
     </paper>
     <paper id="130">
       <title>Fine Grained Classification of Named Entities</title>
       <author><first>Michael</first><last>Fleischman</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <url hash="0424f3d9">C02-1130</url>
       <bibkey>fleischman-hovy-2002-fine</bibkey>
     </paper>
     <paper id="131">
       <title>Annotating Topological Fields and Chunks - and Revising <fixed-case>POS</fixed-case> Tags at the Same Time</title>
-      <author><first>Frank Henrik</first><last>Müller</last></author>
+      <author id="frank-henrik-muller"><first>Frank Henrik</first><last>Müller</last></author>
       <author><first>Tylman</first><last>Ule</last></author>
       <url hash="1cd666fa">C02-1131</url>
       <bibkey>muller-ule-2002-annotating</bibkey>
@@ -988,7 +988,7 @@
     </paper>
     <paper id="134">
       <title>Bootstrapping Bilingual Data using Consensus Translation for a Multilingual Instant Messaging System</title>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
       <author><first>Vanessa</first><last>Murdock</last></author>
       <author><first>Giuseppe</first><last>Riccardi</last></author>
       <url hash="2d2313c7">C02-1134</url>
@@ -996,7 +996,7 @@
     </paper>
     <paper id="135">
       <title>Non-Sentential Utterances: Grammar and Dialogue Dynamics in Corpus Annotation</title>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <author><first>Jonathan</first><last>Ginzburg</last></author>
       <url hash="8f04f91f">C02-1135</url>
       <bibkey>fernandez-ginzburg-2002-non</bibkey>
@@ -1013,30 +1013,30 @@
     <paper id="137">
       <title>A New Probabilistic Model for Title Generation</title>
       <author><first>Rong</first><last>Jin</last></author>
-      <author><first>Alexander G.</first><last>Hauptmann</last></author>
+      <author id="alexander-g-hauptmann"><first>Alexander G.</first><last>Hauptmann</last></author>
       <url hash="0868a210">C02-1137</url>
       <bibkey>jin-hauptmann-2002-new</bibkey>
     </paper>
     <paper id="138">
       <title>Towards Automatic Generation of Natural Language Generation Systems</title>
       <author><first>John</first><last>Chen</last></author>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
-      <author><first>Marilyn A.</first><last>Walker</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
+      <author id="marilyn-walker"><first>Marilyn A.</first><last>Walker</last></author>
       <url hash="6da3f971">C02-1138</url>
       <bibkey>chen-etal-2002-towards</bibkey>
     </paper>
     <paper id="139">
       <title>Identifying Anaphoric and Non-Anaphoric Noun Phrases to Improve Coreference Resolution</title>
       <author><first>Vincent</first><last>Ng</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <url hash="581fee04">C02-1139</url>
       <bibkey>ng-cardie-2002-identifying</bibkey>
     </paper>
     <paper id="140">
       <title>Bringing the Dictionary to the User: The <fixed-case>FOKS</fixed-case> System</title>
       <author><first>Slaven</first><last>Bilac</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Hozumi</first><last>Tanaka</last></author>
       <url hash="9a27a4da">C02-1140</url>
       <bibkey>bilac-etal-2002-bringing</bibkey>
@@ -1044,7 +1044,7 @@
     <paper id="141">
       <title>A Complete Integrated <fixed-case>NLG</fixed-case> System Using <fixed-case>AI</fixed-case> and <fixed-case>NLU</fixed-case> Tools</title>
       <author><first>Laurence</first><last>Danlos</last></author>
-      <author><first>Adil</first><last>El Ghali</last></author>
+      <author id="adil-el-ghali"><first>Adil</first><last>El Ghali</last></author>
       <url hash="851f63e1">C02-1141</url>
       <bibkey>danlos-el-ghali-2002-complete</bibkey>
     </paper>
@@ -1052,15 +1052,15 @@
       <title>Automatic Glossary Extraction: Beyond Terminology Identification</title>
       <author><first>Youngja</first><last>Park</last></author>
       <author><first>Roy J.</first><last>Byrd</last></author>
-      <author><first>Branimir K.</first><last>Boguraev</last></author>
+      <author id="branimir-boguraev"><first>Branimir K.</first><last>Boguraev</last></author>
       <url hash="c490ec47">C02-1142</url>
       <bibkey>park-etal-2002-automatic</bibkey>
     </paper>
     <paper id="143">
       <title>Simple Features for <fixed-case>C</fixed-case>hinese Word Sense Disambiguation</title>
-      <author><first>Hoa Trang</first><last>Dang</last></author>
+      <author id="hoa-trang-dang"><first>Hoa Trang</first><last>Dang</last></author>
       <author><first>Ching-yi</first><last>Chia</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Fu-Dong</first><last>Chiou</last></author>
       <url hash="8c3822af">C02-1143</url>
       <bibkey>dang-etal-2002-simple</bibkey>
@@ -1076,7 +1076,7 @@
       <title>Building a Large-Scale Annotated <fixed-case>C</fixed-case>hinese Corpus</title>
       <author><first>Nianwen</first><last>Xue</last></author>
       <author><first>Fu-Dong</first><last>Chiou</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <url hash="516c13b6">C02-1145</url>
       <bibkey>xue-etal-2002-building</bibkey>
     </paper>
@@ -1097,7 +1097,7 @@
     <paper id="148">
       <title>Investigating the Relationship between Word Segmentation Performance and Retrieval Performance in <fixed-case>C</fixed-case>hinese <fixed-case>IR</fixed-case></title>
       <author><first>Fuchun</first><last>Peng</last></author>
-      <author><first>Xiangji</first><last>Huang</last></author>
+      <author id="xiangji-huang"><first>Xiangji</first><last>Huang</last></author>
       <author><first>Dale</first><last>Schuurmans</last></author>
       <author><first>Nick</first><last>Cercone</last></author>
       <url hash="3125040d">C02-1148</url>
@@ -1105,7 +1105,7 @@
     </paper>
     <paper id="149">
       <title>Entering Text with a Four-Button Device</title>
-      <author><first>Kumiko</first><last>Tanaka-Ishii</last></author>
+      <author id="kumiko-tanaka-ishii"><first>Kumiko</first><last>Tanaka-Ishii</last></author>
       <author><first>Yusuke</first><last>Inutsuka</last></author>
       <author><first>Masato</first><last>Takeichi</last></author>
       <url hash="35e6bdf1">C02-1149</url>
@@ -1121,7 +1121,7 @@
     <paper id="151">
       <title>Probabilistic Reasoning for Entity &amp; Relation Recognition</title>
       <author><first>Dan</first><last>Roth</last></author>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <url hash="aea09b30">C02-1151</url>
       <bibkey>roth-yih-2002-probabilistic</bibkey>
     </paper>
@@ -1130,13 +1130,13 @@
       <author><first>Kazunori</first><last>Komatani</last></author>
       <author><first>Tatsuya</first><last>Kawahara</last></author>
       <author><first>Ryosuke</first><last>Ito</last></author>
-      <author><first>Hiroshi G.</first><last>Okuno</last></author>
+      <author id="hiroshi-g-okuno"><first>Hiroshi G.</first><last>Okuno</last></author>
       <url hash="5e46c5b7">C02-1152</url>
       <bibkey>komatani-etal-2002-efficient</bibkey>
     </paper>
     <paper id="153">
       <title>Generating the <fixed-case>XTAG</fixed-case> <fixed-case>E</fixed-case>nglish Grammar Using Metarules</title>
-      <author><first>Carlos A.</first><last>Prolo</last></author>
+      <author id="carlos-a-prolo"><first>Carlos A.</first><last>Prolo</last></author>
       <url hash="1fcb2bfc">C02-1153</url>
       <bibkey>prolo-2002-generating</bibkey>
     </paper>
@@ -1144,7 +1144,7 @@
       <title>Unsupervised Learning of Generalized Names</title>
       <author><first>Roman</first><last>Yangarber</last></author>
       <author><first>Winston</first><last>Lin</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <url hash="d3da32d9">C02-1154</url>
       <bibkey>yangarber-etal-2002-unsupervised</bibkey>
     </paper>
@@ -1158,8 +1158,8 @@
     <paper id="156">
       <title>Putting Frames in Perspective</title>
       <author><first>Nancy</first><last>Chang</last></author>
-      <author><first>Srini</first><last>Narayanan</last></author>
-      <author><first>Miriam R.L.</first><last>Petruck</last></author>
+      <author id="srini-narayanan"><first>Srini</first><last>Narayanan</last></author>
+      <author id="miriam-r-l-petruck"><first>Miriam R.L.</first><last>Petruck</last></author>
       <url hash="659065ae">C02-1156</url>
       <bibkey>chang-etal-2002-putting</bibkey>
     </paper>
@@ -1171,8 +1171,8 @@
     </paper>
     <paper id="158">
       <title>Study of Practical Effectiveness for Machine Translation Using Recursive Chain-link-type Learning</title>
-      <author><first>Hiroshi</first><last>Echizen-ya</last></author>
-      <author><first>Kenji</first><last>Araki</last></author>
+      <author id="hiroshi-echizen-ya"><first>Hiroshi</first><last>Echizen-ya</last></author>
+      <author id="kenji-araki"><first>Kenji</first><last>Araki</last></author>
       <author><first>Yoshio</first><last>Momouchi</last></author>
       <author><first>Koji</first><last>Tochinai</last></author>
       <url hash="bed51662">C02-1158</url>
@@ -1224,7 +1224,7 @@
     <paper id="164">
       <title>Language Model Adaptation with Additional Text Generated by Machine Translation</title>
       <author><first>Hideharu</first><last>Nakajima</last></author>
-      <author><first>Hirofumi</first><last>Yamamoto</last></author>
+      <author id="hirofumi-yamamoto"><first>Hirofumi</first><last>Yamamoto</last></author>
       <author><first>Taro</first><last>Watanabe</last></author>
       <url hash="ff94bfae">C02-1164</url>
       <bibkey>nakajima-etal-2002-language</bibkey>
@@ -1233,21 +1233,21 @@
       <title>Complexity of Event Structure in <fixed-case>IE</fixed-case> Scenarios</title>
       <author><first>Silja</first><last>Huttunen</last></author>
       <author><first>Roman</first><last>Yangarber</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <url hash="b0d29eba">C02-1165</url>
       <bibkey>huttunen-etal-2002-complexity</bibkey>
     </paper>
     <paper id="166">
       <title>An Approach Based on Multilingual Thesauri and Model Combination for Bilingual Lexicon Extraction</title>
-      <author><first>Hervé</first><last>Déjean</last></author>
-      <author><first>Éric</first><last>Gaussier</last></author>
+      <author id="herve-dejean"><first>Hervé</first><last>Déjean</last></author>
+      <author id="eric-gaussier"><first>Éric</first><last>Gaussier</last></author>
       <author><first>Fatiha</first><last>Sadat</last></author>
       <url hash="da1e67df">C02-1166</url>
       <bibkey>dejean-etal-2002-approach</bibkey>
     </paper>
     <paper id="167">
       <title>Lexical Chains for Question Answering</title>
-      <author><first>Dan</first><last>Moldovan</last></author>
+      <author id="dan-moldovan"><first>Dan</first><last>Moldovan</last></author>
       <author><first>Adrian</first><last>Novischi</last></author>
       <url hash="d36b5d61">C02-1167</url>
       <bibkey>moldovan-novischi-2002-lexical</bibkey>
@@ -1255,21 +1255,21 @@
     <paper id="168">
       <title>Maximum Entropy Models for Word Sense Disambiguation</title>
       <author><first>Gerald</first><last>Chao</last></author>
-      <author><first>Michael G.</first><last>Dyer</last></author>
+      <author id="michael-g-dyer"><first>Michael G.</first><last>Dyer</last></author>
       <url hash="8bca50e9">C02-1168</url>
       <bibkey>chao-dyer-2002-maximum</bibkey>
     </paper>
     <paper id="169">
       <title>Open-Domain Voice-Activated Question Answering</title>
-      <author><first>Sanda</first><last>Harabagiu</last></author>
-      <author><first>Dan</first><last>Moldovan</last></author>
+      <author id="sanda-harabagiu"><first>Sanda</first><last>Harabagiu</last></author>
+      <author id="dan-moldovan"><first>Dan</first><last>Moldovan</last></author>
       <author><first>Joe</first><last>Picone</last></author>
       <url hash="ba30ecad">C02-1169</url>
       <bibkey>harabagiu-etal-2002-open</bibkey>
     </paper>
     <paper id="170">
       <title>A Pattern-based Analyzer for <fixed-case>F</fixed-case>rench in the Context of Spoken Language Translation: First Prototype and Evaluation</title>
-      <author><first>Hervé</first><last>Blanchon</last></author>
+      <author id="herve-blanchon"><first>Hervé</first><last>Blanchon</last></author>
       <url hash="39a2d79c">C02-1170</url>
       <bibkey>blanchon-2002-pattern</bibkey>
     </paper>
@@ -1301,7 +1301,7 @@
     </paper>
     <paper id="3">
       <title>Searching the Web by Voice</title>
-      <author><first>Alexander</first><last>Franz</last></author>
+      <author id="alexander-franz"><first>Alexander</first><last>Franz</last></author>
       <author><first>Brian</first><last>Milch</last></author>
       <url hash="c99c790e">C02-2003</url>
       <bibkey>franz-milch-2002-searching</bibkey>
@@ -1323,7 +1323,7 @@
       <title>An Annotation System for Enhancing Quality of Natural Language Processing</title>
       <author><first>Hideo</first><last>Watanabe</last></author>
       <author><first>Katashi</first><last>Nagao</last></author>
-      <author><first>Michael C.</first><last>McCord</last></author>
+      <author id="michael-c-mccord"><first>Michael C.</first><last>McCord</last></author>
       <author><first>Arendse</first><last>Bernth</last></author>
       <url hash="53a9a9e1">C02-2006</url>
       <bibkey>watanabe-etal-2002-annotation</bibkey>
@@ -1352,7 +1352,7 @@
     </paper>
     <paper id="10">
       <title>The Performance of a Grammar Checker with Deviant Language Input</title>
-      <author><first>Janne Bondi</first><last>Johannessen</last></author>
+      <author id="janne-bondi-johannessen"><first>Janne Bondi</first><last>Johannessen</last></author>
       <author><first>Kristin</first><last>Hagen</last></author>
       <author><first>Pia</first><last>Lane</last></author>
       <url hash="7f327a82">C02-2010</url>
@@ -1362,7 +1362,7 @@
       <title>Semantic Case Role Detection for Information Extraction</title>
       <author><first>Rik</first><last>De Busser</last></author>
       <author><first>Roxana</first><last>Angheluta</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <url hash="9405c40d">C02-2011</url>
       <bibkey>de-busser-etal-2002-semantic</bibkey>
     </paper>
@@ -1377,22 +1377,22 @@
     <paper id="13">
       <title><fixed-case>SOAT</fixed-case>: A Semi-Automatic Domain Ontology Acquisition Tool from <fixed-case>C</fixed-case>hinese Corpus</title>
       <author><first>Shih-Hung</first><last>Wu</last></author>
-      <author><first>Wen-Lian</first><last>Hsu</last></author>
+      <author id="wen-lian-hsu"><first>Wen-Lian</first><last>Hsu</last></author>
       <url hash="ab37935e">C02-2013</url>
       <bibkey>wu-hsu-2002-soat</bibkey>
     </paper>
     <paper id="14">
       <title>Recognition Assistance - Treating Errors in Texts Acquired from Various Recognition Processes</title>
-      <author><first>Gábor</first><last>Prószéky</last></author>
+      <author id="gabor-proszeky"><first>Gábor</first><last>Prószéky</last></author>
       <author><first>Mátyás</first><last>Naszódi</last></author>
-      <author><first>Balázs</first><last>Kis</last></author>
+      <author id="balazs-kis"><first>Balázs</first><last>Kis</last></author>
       <url hash="a6bfa10b">C02-2014</url>
       <bibkey>proszeky-etal-2002-recognition</bibkey>
     </paper>
     <paper id="15">
       <title>Context-Sensitive Electronic Dictionaries</title>
-      <author><first>Gábor</first><last>Prószéky</last></author>
-      <author><first>Balázs</first><last>Kis</last></author>
+      <author id="gabor-proszeky"><first>Gábor</first><last>Prószéky</last></author>
+      <author id="balazs-kis"><first>Balázs</first><last>Kis</last></author>
       <url hash="c402bc46">C02-2015</url>
       <bibkey>proszeky-kis-2002-context</bibkey>
     </paper>
@@ -1406,13 +1406,13 @@
       <title>Robust Interpretation of User Requests for Text Retrieval in a Multimodal Environment</title>
       <author><first>Alexandra</first><last>Klein</last></author>
       <author><first>Estela</first><last>Puig-Waldmüller</last></author>
-      <author><first>Harald</first><last>Trost</last></author>
+      <author id="harald-trost"><first>Harald</first><last>Trost</last></author>
       <url hash="320a2856">C02-2017</url>
       <bibkey>klein-etal-2002-robust</bibkey>
     </paper>
     <paper id="18">
       <title>An <fixed-case>XML</fixed-case>-based Document Suite</title>
-      <author><first>Dietmar</first><last>Rösner</last></author>
+      <author id="dietmar-rosner"><first>Dietmar</first><last>Rösner</last></author>
       <author><first>Manuela</first><last>Kunze</last></author>
       <url hash="096ccbea">C02-2018</url>
       <bibkey>rosner-kunze-2002-xml</bibkey>
@@ -1430,22 +1430,22 @@
     <paper id="20">
       <title>Looking for Candidate Translational Equivalents in Specialized, Comparable Corpora</title>
       <author><first>Yun-Chuang</first><last>Chiao</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <url hash="25b1d332">C02-2020</url>
       <bibkey>chiao-zweigenbaum-2002-looking</bibkey>
     </paper>
     <paper id="21">
       <title>Reasoning in Metaphor Understanding: The <fixed-case>ATT</fixed-case>-Meta Approach and System</title>
-      <author><first>John</first><last>Barnden</last></author>
-      <author><first>Sheila</first><last>Glasbey</last></author>
-      <author><first>Mark</first><last>Lee</last></author>
-      <author><first>Alan</first><last>Wallington</last></author>
+      <author id="john-barnden"><first>John</first><last>Barnden</last></author>
+      <author id="sheila-r-glasbey"><first>Sheila</first><last>Glasbey</last></author>
+      <author id="mark-lee"><first>Mark</first><last>Lee</last></author>
+      <author id="alan-wallington"><first>Alan</first><last>Wallington</last></author>
       <url hash="b59f318b">C02-2021</url>
       <bibkey>barnden-etal-2002-reasoning</bibkey>
     </paper>
     <paper id="22">
       <title>Cross Linguistic Phoneme Correspondences</title>
-      <author><first>Lynne</first><last>Cahill</last></author>
+      <author id="lynne-cahill"><first>Lynne</first><last>Cahill</last></author>
       <author><first>Carole</first><last>Tiberius</last></author>
       <url hash="eee5045b">C02-2022</url>
       <bibkey>cahill-tiberius-2002-cross</bibkey>
@@ -1456,7 +1456,7 @@
       <author><first>Nathan</first><last>Carr</last></author>
       <author><first>Greg</first><last>Kamei</last></author>
       <author><first>Mikyung</first><last>Kim</last></author>
-      <author><first>Michael J.</first><last>Pan</last></author>
+      <author id="michael-j-pan"><first>Michael J.</first><last>Pan</last></author>
       <author><first>Chris</first><last>Salvador</last></author>
       <author><first>Yasuyo</first><last>Sawaki</last></author>
       <url hash="ace12222">C02-2023</url>
@@ -1466,7 +1466,7 @@
       <title>An Indexing Scheme for Typed Feature Structures</title>
       <author><first>Takashi</first><last>Ninomiya</last></author>
       <author><first>Takaki</first><last>Makino</last></author>
-      <author><first>Jun-Ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun-Ichi</first><last>Tsujii</last></author>
       <url hash="c2e22b64">C02-2024</url>
       <bibkey>ninomiya-etal-2002-indexing</bibkey>
     </paper>
@@ -1474,20 +1474,20 @@
       <title>The <fixed-case>L</fixed-case>in<fixed-case>GO</fixed-case> Redwoods Treebank: Motivation and Preliminary Applications</title>
       <author><first>Stephan</first><last>Oepen</last></author>
       <author><first>Kristina</first><last>Toutanova</last></author>
-      <author><first>Stuart</first><last>Shieber</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
-      <author><first>Dan</first><last>Flickinger</last></author>
+      <author id="stuart-m-shieber"><first>Stuart</first><last>Shieber</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
+      <author id="dan-flickinger"><first>Dan</first><last>Flickinger</last></author>
       <author><first>Thorsten</first><last>Brants</last></author>
       <url hash="603e4d8c">C02-2025</url>
       <bibkey>oepen-etal-2002-lingo</bibkey>
     </paper>
     <paper id="26">
       <title>Creating a Finite-State Parser with Application Semantics</title>
-      <author><first>Owen</first><last>Rambow</last></author>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
       <author><first>Tahir</first><last>Butt</last></author>
       <author><first>Alexis</first><last>Nasr</last></author>
-      <author><first>Richard</first><last>Sproat</last></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last></author>
       <url hash="c4f8a60a">C02-2026</url>
       <bibkey>rambow-etal-2002-creating</bibkey>
     </paper>
@@ -1501,7 +1501,7 @@
       <title><fixed-case>C</fixed-case>hinese Syntactic Parsing Based on Extended <fixed-case>GLR</fixed-case> Parsing Algorithm with <fixed-case>PCFG</fixed-case>*</title>
       <author><first>Yan</first><last>Zhang</last></author>
       <author><first>Bo</first><last>Xu</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <url hash="426710d4">C02-2028</url>
       <bibkey>zhang-etal-2002-chinese</bibkey>
     </paper>
diff --git a/data/xml/C04.xml b/data/xml/C04.xml
index 248ce810d9..e8b8b878ae 100644
--- a/data/xml/C04.xml
+++ b/data/xml/C04.xml
@@ -35,7 +35,7 @@
     </paper>
     <paper id="4">
       <title>Discriminative Hidden <fixed-case>M</fixed-case>arkov Modeling with Long State Dependence using a k<fixed-case>NN</fixed-case> Ensemble</title>
-      <author><first>GuoDong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>GuoDong</first><last>Zhou</last></author>
       <pages>22–28</pages>
       <url hash="9da9c5b9">C04-1004</url>
       <bibkey>zhou-2004-discriminative</bibkey>
@@ -52,7 +52,7 @@
       <title>Improved Word Alignment Using a Symmetric Lexicon Model</title>
       <author><first>Richard</first><last>Zens</last></author>
       <author><first>Evgeny</first><last>Matusov</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>36–42</pages>
       <url hash="7fafc4ed">C04-1006</url>
       <bibkey>zens-etal-2004-improved</bibkey>
@@ -113,7 +113,7 @@
     </paper>
     <paper id="14">
       <title>Modeling of Long Distance Context Dependency</title>
-      <author><first>GuoDong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>GuoDong</first><last>Zhou</last></author>
       <pages>92–98</pages>
       <url hash="7f828ede">C04-1014</url>
       <bibkey>zhou-2004-modeling</bibkey>
@@ -123,7 +123,7 @@
       <author><first>Kenji</first><last>Imamura</last></author>
       <author><first>Hideo</first><last>Okuma</last></author>
       <author><first>Taro</first><last>Watanabe</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>99–105</pages>
       <url hash="507c7905">C04-1015</url>
       <bibkey>imamura-etal-2004-example</bibkey>
@@ -132,7 +132,7 @@
       <title>Extending <fixed-case>MT</fixed-case> evaluation tools with translation complexity metrics</title>
       <author><first>Bogdan</first><last>Babych</last></author>
       <author><first>Debbie</first><last>Elliott</last></author>
-      <author><first>Anthony</first><last>Hartley</last></author>
+      <author id="anthony-hartley"><first>Anthony</first><last>Hartley</last></author>
       <pages>106–112</pages>
       <url hash="c67f2f6e">C04-1016</url>
       <bibkey>babych-etal-2004-extending</bibkey>
@@ -140,25 +140,25 @@
     <paper id="17">
       <title>Splitting Input Sentence for Machine Translation Using Language Model with Sentence Similarity</title>
       <author><first>Takao</first><last>Doi</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>113–119</pages>
       <url hash="187cf917">C04-1017</url>
       <bibkey>doi-sumita-2004-splitting</bibkey>
     </paper>
     <paper id="18">
       <title>Playing the Telephone Game: Determining the Hierarchical Structure of Perspective and Speech Expressions</title>
-      <author><first>Eric</first><last>Breck</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="eric-breck"><first>Eric</first><last>Breck</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>120–126</pages>
       <url hash="60231d2a">C04-1018</url>
       <bibkey>breck-cardie-2004-playing</bibkey>
     </paper>
     <paper id="19">
       <title>The Queen’s Agents: Using Collaborating Object-Based Dialogue Agents in the Queen’s Communicator</title>
-      <author><first>Ian</first><last>O’Neill</last></author>
-      <author><first>Philip</first><last>Hanna</last></author>
+      <author id="ian-m-oneill"><first>Ian</first><last>O’Neill</last></author>
+      <author id="philip-hanna"><first>Philip</first><last>Hanna</last></author>
       <author><first>Xingkun</first><last>Liu</last></author>
-      <author><first>Michael</first><last>McTear</last></author>
+      <author id="michael-f-mctear"><first>Michael</first><last>McTear</last></author>
       <pages>127–133</pages>
       <url hash="29d847c8">C04-1019</url>
       <bibkey>oneill-etal-2004-queens</bibkey>
@@ -194,7 +194,7 @@
       <title>A Support System for Revising Titles to Stimulate the Lay Reader’s Interest in Technical Achievements</title>
       <author><first>Yasuko</first><last>Senda</last></author>
       <author><first>Yasusi</first><last>Sinohara</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>155–161</pages>
       <url hash="87d1e152">C04-1023</url>
       <bibkey>senda-etal-2004-support</bibkey>
@@ -209,7 +209,7 @@
     <paper id="25">
       <title>A Grammar Formalism and Parser for Linearization-based <fixed-case>HPSG</fixed-case></title>
       <author><first>Michael W.</first><last>Daniels</last></author>
-      <author><first>W. Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>W. Detmar</first><last>Meurers</last></author>
       <pages>169–175</pages>
       <url hash="d0f5a03e">C04-1025</url>
       <bibkey>daniels-meurers-2004-grammar</bibkey>
@@ -229,14 +229,14 @@
     <paper id="27">
       <title>Learning theories from text</title>
       <author><first>Maria</first><last>Liakata</last></author>
-      <author><first>Stephen</first><last>Pulman</last></author>
+      <author id="stephen-pulman"><first>Stephen</first><last>Pulman</last></author>
       <pages>183–190</pages>
       <url hash="40ea9214">C04-1027</url>
       <bibkey>liakata-pulman-2004-learning</bibkey>
     </paper>
     <paper id="28">
       <title>Generalizing Dimensionality in <fixed-case>C</fixed-case>ombinatory <fixed-case>C</fixed-case>ategorial <fixed-case>G</fixed-case>rammar</title>
-      <author><first>Geert-Jan M.</first><last>Kruijff</last></author>
+      <author id="geert-jan-m-kruijff"><first>Geert-Jan M.</first><last>Kruijff</last></author>
       <author><first>Jason</first><last>Baldridge</last></author>
       <pages>191–197</pages>
       <url hash="b65b5a86">C04-1028</url>
@@ -254,16 +254,16 @@
     <paper id="30">
       <title>Reordering Constraints for Phrase-Based Statistical Machine Translation</title>
       <author><first>Richard</first><last>Zens</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <author><first>Taro</first><last>Watanabe</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>205–211</pages>
       <url hash="094b823b">C04-1030</url>
       <bibkey>zens-etal-2004-reordering</bibkey>
     </paper>
     <paper id="31">
       <title>Word to word alignment strategies</title>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>212–218</pages>
       <url hash="e6c23666">C04-1031</url>
       <bibkey>tiedemann-2004-word</bibkey>
@@ -272,7 +272,7 @@
       <title>Symmetric Word Alignments for Statistical Machine Translation</title>
       <author><first>Evgeny</first><last>Matusov</last></author>
       <author><first>Richard</first><last>Zens</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>219–225</pages>
       <url hash="b196d626">C04-1032</url>
       <bibkey>matusov-etal-2004-symmetric</bibkey>
@@ -281,8 +281,8 @@
       <title>An <fixed-case>NP</fixed-case>-Cluster Based Approach to Coreference Resolution</title>
       <author><first>Xiaofeng</first><last>Yang</last></author>
       <author><first>Jian</first><last>Su</last></author>
-      <author><first>GuoDong</first><last>Zhou</last></author>
-      <author><first>Chew Lim</first><last>Tan</last></author>
+      <author id="guodong-zhou"><first>GuoDong</first><last>Zhou</last></author>
+      <author id="chew-lim-tan"><first>Chew Lim</first><last>Tan</last></author>
       <pages>226–232</pages>
       <url hash="7ea3b223">C04-1033</url>
       <bibkey>yang-etal-2004-np</bibkey>
@@ -296,7 +296,7 @@
     </paper>
     <paper id="35">
       <title>Classifying Ellipsis in Dialogue: A Machine Learning Approach</title>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <author><first>Jonathan</first><last>Ginzburg</last></author>
       <author><first>Shalom</first><last>Lappin</last></author>
       <pages>240–246</pages>
@@ -305,7 +305,7 @@
     </paper>
     <paper id="36">
       <title>Feature Vector Quality and Distributional Similarity</title>
-      <author><first>Maayan</first><last>Geffet</last></author>
+      <author id="maayan-geffet"><first>Maayan</first><last>Geffet</last></author>
       <author><first>Ido</first><last>Dagan</last></author>
       <pages>247–253</pages>
       <url hash="7128e9d9">C04-1036</url>
@@ -328,7 +328,7 @@
     </paper>
     <paper id="39">
       <title>Annotating Grammatical Functions for <fixed-case>G</fixed-case>erman Using Finite-State Cascades</title>
-      <author><first>Frank Henrik</first><last>Müller</last></author>
+      <author id="frank-henrik-muller"><first>Frank Henrik</first><last>Müller</last></author>
       <pages>268–274</pages>
       <url hash="0ff24f6a">C04-1039</url>
       <bibkey>muller-2004-annotating</bibkey>
@@ -345,16 +345,16 @@
     <paper id="41">
       <title>The Importance of Supertagging for Wide-Coverage <fixed-case>CCG</fixed-case> Parsing</title>
       <author><first>Stephen</first><last>Clark</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <pages>282–288</pages>
       <url hash="7ae24d7f">C04-1041</url>
       <bibkey>clark-curran-2004-importance</bibkey>
     </paper>
     <paper id="42">
       <title>Condition of Projectivity in the Underlying Dependency Structures</title>
-      <author><first>Katerina</first><last>Veselá</last></author>
-      <author><first>Jiri</first><last>Havelka</last></author>
-      <author><first>Eva</first><last>Hajicová</last></author>
+      <author id="katerina-vesela"><first>Katerina</first><last>Veselá</last></author>
+      <author id="jiri-havelka"><first>Jiri</first><last>Havelka</last></author>
+      <author id="eva-hajicova"><first>Eva</first><last>Hajicová</last></author>
       <pages>289–295</pages>
       <url hash="48c3fe49">C04-1042</url>
       <bibkey>vesela-etal-2004-condition</bibkey>
@@ -369,7 +369,7 @@
     </paper>
     <paper id="44">
       <title>Polarization and abstraction of grammatical formalisms as methods for lexical disambiguation</title>
-      <author><first>Guillaume</first><last>Bonfante</last></author>
+      <author id="guillaume-bonfante"><first>Guillaume</first><last>Bonfante</last></author>
       <author><first>Bruno</first><last>Guillaume</last></author>
       <author><first>Guy</first><last>Perrier</last></author>
       <pages>303–309</pages>
@@ -378,8 +378,8 @@
     </paper>
     <paper id="45">
       <title>Improving Word Alignment Quality using Morpho-syntactic Information</title>
-      <author><first>Hermann</first><last>Ney</last></author>
-      <author><first>Maja</first><last>Popovic</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popovic</last></author>
       <pages>310–314</pages>
       <url hash="f6be39df">C04-1045</url>
       <bibkey>ney-popovic-2004-improving</bibkey>
@@ -390,7 +390,7 @@
       <author><first>Erin</first><last>Fitzgerald</last></author>
       <author><first>George</first><last>Foster</last></author>
       <author><first>Simona</first><last>Gandrabur</last></author>
-      <author><first>Cyril</first><last>Goutte</last></author>
+      <author id="cyril-goutte"><first>Cyril</first><last>Goutte</last></author>
       <author><first>Alex</first><last>Kulesza</last></author>
       <author><first>Alberto</first><last>Sanchis</last></author>
       <author><first>Nicola</first><last>Ueffing</last></author>
@@ -401,10 +401,10 @@
     <paper id="47">
       <title>Using a Mixture of N-Best Lists from Multiple <fixed-case>MT</fixed-case> Systems in Rank-Sum-Based Confidence Measure for <fixed-case>MT</fixed-case> Outputs</title>
       <author><first>Yasuhiro</first><last>Akiba</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Hiromi</first><last>Nakaiwa</last></author>
       <author><first>Seiichi</first><last>Yamamoto</last></author>
-      <author><first>Hiroshi G.</first><last>Okuno</last></author>
+      <author id="hiroshi-g-okuno"><first>Hiroshi G.</first><last>Okuno</last></author>
       <pages>322–328</pages>
       <url hash="2b04e6df">C04-1047</url>
       <bibkey>akiba-etal-2004-using</bibkey>
@@ -413,7 +413,7 @@
       <title>Generating Discourse Structures for Written Text</title>
       <author><first>Huong</first><last>Le Thanh</last></author>
       <author><first>Geetha</first><last>Abeysinghe</last></author>
-      <author><first>Christian</first><last>Huyck</last></author>
+      <author id="christian-huyck"><first>Christian</first><last>Huyck</last></author>
       <pages>329–335</pages>
       <url hash="6ba6cef7">C04-1048</url>
       <bibkey>le-thanh-etal-2004-generating</bibkey>
@@ -421,7 +421,7 @@
     <paper id="49">
       <title>Talking robots with <fixed-case>L</fixed-case>ego <fixed-case>M</fixed-case>ind<fixed-case>S</fixed-case>torms</title>
       <author><first>Alexander</first><last>Koller</last></author>
-      <author><first>Geert-Jan</first><last>Kruijff</last></author>
+      <author id="geert-jan-m-kruijff"><first>Geert-Jan</first><last>Kruijff</last></author>
       <pages>336–342</pages>
       <url hash="c9a7115a">C04-1049</url>
       <bibkey>koller-kruijff-2004-talking</bibkey>
@@ -436,7 +436,7 @@
     </paper>
     <paper id="51">
       <title>Unsupervised Construction of Large Paraphrase Corpora: Exploiting Massively Parallel News Sources</title>
-      <author><first>Bill</first><last>Dolan</last></author>
+      <author id="william-b-dolan"><first>Bill</first><last>Dolan</last></author>
       <author><first>Chris</first><last>Quirk</last></author>
       <author><first>Chris</first><last>Brockett</last></author>
       <pages>350–356</pages>
@@ -445,7 +445,7 @@
     </paper>
     <paper id="52">
       <title>Toward Unsupervised Whole-Corpus Tagging</title>
-      <author><first>Dayne</first><last>Freitag</last></author>
+      <author id="dayne-freitag"><first>Dayne</first><last>Freitag</last></author>
       <pages>357–363</pages>
       <url hash="215cd7c0">C04-1052</url>
       <bibkey>freitag-2004-toward</bibkey>
@@ -454,7 +454,7 @@
       <title>Evaluating Cross-Language Annotation Transfer in the <fixed-case>M</fixed-case>ulti<fixed-case>S</fixed-case>em<fixed-case>C</fixed-case>or Corpus</title>
       <author><first>Luisa</first><last>Bentivogli</last></author>
       <author><first>Pamela</first><last>Forner</last></author>
-      <author><first>Emanuele</first><last>Pianta</last></author>
+      <author id="emanuele-pianta"><first>Emanuele</first><last>Pianta</last></author>
       <pages>364–370</pages>
       <url hash="70963a01">C04-1053</url>
       <bibkey>bentivogli-etal-2004-evaluating</bibkey>
@@ -469,8 +469,8 @@
     </paper>
     <paper id="55">
       <title>Skeletons in the parser: Using a shallow parser to improve deep parsing</title>
-      <author><first>Mary</first><last>Swift</last></author>
-      <author><first>James</first><last>Allen</last></author>
+      <author id="mary-swift"><first>Mary</first><last>Swift</last></author>
+      <author id="james-allen"><first>James</first><last>Allen</last></author>
       <author><first>Daniel</first><last>Gildea</last></author>
       <pages>383–389</pages>
       <url hash="bfed6e26">C04-1055</url>
@@ -504,7 +504,7 @@
       <title>Language Model Adaptation for Statistical Machine Translation via Structured Query Models</title>
       <author><first>Bing</first><last>Zhao</last></author>
       <author><first>Matthias</first><last>Eck</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>411–417</pages>
       <url hash="d555f03d">C04-1059</url>
       <bibkey>zhao-etal-2004-language</bibkey>
@@ -563,7 +563,7 @@
     <paper id="66">
       <title><fixed-case>J</fixed-case>apanese Unknown Word Identification by Character-based Chunking</title>
       <author><first>Masayuki</first><last>Asahara</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>459–465</pages>
       <url hash="9582cd69">C04-1066</url>
       <bibkey>asahara-matsumoto-2004-japanese</bibkey>
@@ -585,8 +585,8 @@
     </paper>
     <paper id="69">
       <title>Document Re-ranking Based on Automatically Acquired Key Terms in <fixed-case>C</fixed-case>hinese Information Retrieval</title>
-      <author><first>Lingpeng</first><last>Yang</last></author>
-      <author><first>Donghong</first><last>Ji</last></author>
+      <author id="lingpeng-yang"><first>Lingpeng</first><last>Yang</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
       <author><first>Li</first><last>Tang</last></author>
       <pages>480–486</pages>
       <url hash="dc2e5da5">C04-1069</url>
@@ -611,8 +611,8 @@
     </paper>
     <paper id="72">
       <title><fixed-case>ORANGE</fixed-case>: a Method for Evaluating Automatic Evaluation Metrics for Machine Translation</title>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
-      <author><first>Franz Josef</first><last>Och</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="franz-josef-och"><first>Franz Josef</first><last>Och</last></author>
       <pages>501–507</pages>
       <url hash="62e57a1e">C04-1072</url>
       <bibkey>lin-och-2004-orange</bibkey>
@@ -620,7 +620,7 @@
     <paper id="73">
       <title>Improving a Statistical <fixed-case>MT</fixed-case> System with Automatically Learned Rewrite Patterns</title>
       <author><first>Fei</first><last>Xia</last></author>
-      <author><first>Michael</first><last>McCord</last></author>
+      <author id="michael-c-mccord"><first>Michael</first><last>McCord</last></author>
       <pages>508–514</pages>
       <url hash="adf27934">C04-1073</url>
       <bibkey>xia-mccord-2004-improving</bibkey>
@@ -634,7 +634,7 @@
     </paper>
     <paper id="75">
       <title>A High-Performance Coreference Resolution System using a Constraint-based Multi-Agent Strategy</title>
-      <author><first>GuoDong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>GuoDong</first><last>Zhou</last></author>
       <author><first>Jian</first><last>Su</last></author>
       <pages>522–528</pages>
       <url hash="b3994a8f">C04-1075</url>
@@ -652,7 +652,7 @@
       <title>Corpus and Evaluation Measures for Multiple Document Summarization with Multiple Sources</title>
       <author><first>Tsutomu</first><last>Hirao</last></author>
       <author><first>Takahiro</first><last>Fukusima</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <author><first>Chikashi</first><last>Nobata</last></author>
       <author><first>Hidetsugu</first><last>Nanba</last></author>
       <pages>535–541</pages>
@@ -662,7 +662,7 @@
     <paper id="78">
       <title>Cascading Use of Soft and Hard Matching Pattern Rules for Weakly Supervised Information Extraction</title>
       <author><first>Jing</first><last>Xiao</last></author>
-      <author><first>Tat-Seng</first><last>Chua</last></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last></author>
       <author><first>Hang</first><last>Cui</last></author>
       <pages>542–548</pages>
       <url hash="0f297eed">C04-1078</url>
@@ -671,7 +671,7 @@
     <paper id="79">
       <title>Generating Overview Summaries of Ongoing Email Thread Discussions</title>
       <author><first>Stephen</first><last>Wan</last></author>
-      <author><first>Kathy</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathy</first><last>McKeown</last></author>
       <pages>549–555</pages>
       <url hash="2f6b44a2">C04-1079</url>
       <bibkey>wan-mckeown-2004-generating</bibkey>
@@ -679,7 +679,7 @@
     <paper id="80">
       <title>Part-of-Speech Tagging in Context</title>
       <author><first>Michele</first><last>Banko</last></author>
-      <author><first>Robert C.</first><last>Moore</last></author>
+      <author id="robert-c-moore"><first>Robert C.</first><last>Moore</last></author>
       <pages>556–561</pages>
       <url hash="91dbc9d0">C04-1080</url>
       <bibkey>banko-moore-2004-part</bibkey>
@@ -687,7 +687,7 @@
     <paper id="81">
       <title><fixed-case>C</fixed-case>hinese Segmentation and New Word Detection using Conditional Random Fields</title>
       <author><first>Fuchun</first><last>Peng</last></author>
-      <author><first>Fangfang</first><last>Feng</last></author>
+      <author id="fangfang-feng"><first>Fangfang</first><last>Feng</last></author>
       <author><first>Andrew</first><last>McCallum</last></author>
       <pages>562–568</pages>
       <url hash="e7447d55">C04-1081</url>
@@ -696,7 +696,7 @@
     <paper id="82">
       <title>Tagging with Hidden <fixed-case>M</fixed-case>arkov Models Using Ambiguous Tags</title>
       <author><first>Alexis</first><last>Nasr</last></author>
-      <author><first>Frédéric</first><last>Bechét</last></author>
+      <author id="frederic-bechet"><first>Frédéric</first><last>Bechét</last></author>
       <author><first>Alexandra</first><last>Volanschi</last></author>
       <pages>569–575</pages>
       <url hash="bb33a20c">C04-1082</url>
@@ -705,14 +705,14 @@
     <paper id="83">
       <title>Browsing Help for a Faster Retrieval</title>
       <author><first>Eric</first><last>Crestan</last></author>
-      <author><first>Claude</first><last>de Loupy</last></author>
+      <author id="claude-de-loupy"><first>Claude</first><last>de Loupy</last></author>
       <pages>576–582</pages>
       <url hash="09389b0d">C04-1083</url>
       <bibkey>crestan-de-loupy-2004-browsing</bibkey>
     </paper>
     <paper id="84">
       <title>Incremental Topic Representations</title>
-      <author><first>Sanda</first><last>Harabagiu</last></author>
+      <author id="sanda-harabagiu"><first>Sanda</first><last>Harabagiu</last></author>
       <pages>583–589</pages>
       <url hash="5b399ee7">C04-1084</url>
       <bibkey>harabagiu-2004-incremental</bibkey>
@@ -734,9 +734,9 @@
     </paper>
     <paper id="87">
       <title>Enhancing automatic term recognition through recognition of variation</title>
-      <author><first>Goran</first><last>Nenadic</last></author>
+      <author id="goran-nenadic"><first>Goran</first><last>Nenadic</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
-      <author><first>John</first><last>McNaught</last></author>
+      <author id="john-mcnaught"><first>John</first><last>McNaught</last></author>
       <pages>604–610</pages>
       <url hash="ee4f464d">C04-1087</url>
       <bibkey>nenadic-etal-2004-enhancing</bibkey>
@@ -765,8 +765,8 @@
     </paper>
     <paper id="91">
       <title>An Algorithmic Framework for Solving the Decoding Problem in Statistical Machine Translation</title>
-      <author><first>Raghavendra</first><last>Udupa</last></author>
-      <author><first>Tanveer A.</first><last>Faruquie</last></author>
+      <author id="raghavendra-udupa"><first>Raghavendra</first><last>Udupa</last></author>
+      <author id="tanveer-a-faruquie"><first>Tanveer A.</first><last>Faruquie</last></author>
       <author><first>Hemanta K.</first><last>Maji</last></author>
       <pages>631–637</pages>
       <url hash="2393881b">C04-1091</url>
@@ -789,10 +789,10 @@
     </paper>
     <paper id="94">
       <title>Using syntactic information to extract relevant terms for multi-document summarization</title>
-      <author><first>Enrique</first><last>Amigó</last></author>
+      <author id="enrique-amigo"><first>Enrique</first><last>Amigó</last></author>
       <author><first>Julio</first><last>Gonzalo</last></author>
-      <author><first>Víctor</first><last>Peinado</last></author>
-      <author><first>Anselmo</first><last>Peñas</last></author>
+      <author id="victor-peinado"><first>Víctor</first><last>Peinado</last></author>
+      <author id="anselmo-penas"><first>Anselmo</first><last>Peñas</last></author>
       <author><first>Felisa</first><last>Verdejo</last></author>
       <pages>652–658</pages>
       <url hash="b733dbd9">C04-1094</url>
@@ -818,12 +818,12 @@
     </paper>
     <paper id="97">
       <title>Linguistically Informed Statistical Models of Constituent Structure for Ordering in Sentence Realization</title>
-      <author><first>Eric</first><last>Ringger</last></author>
+      <author id="eric-ringger"><first>Eric</first><last>Ringger</last></author>
       <author><first>Michael</first><last>Gamon</last></author>
-      <author><first>Robert C.</first><last>Moore</last></author>
-      <author><first>David</first><last>Rojas</last></author>
+      <author id="robert-c-moore"><first>Robert C.</first><last>Moore</last></author>
+      <author id="david-m-rojas"><first>David</first><last>Rojas</last></author>
       <author><first>Martine</first><last>Smets</last></author>
-      <author><first>Simon</first><last>Corston-Oliver</last></author>
+      <author id="simon-corston-oliver"><first>Simon</first><last>Corston-Oliver</last></author>
       <pages>673–679</pages>
       <url hash="ed652b0e">C04-1097</url>
       <bibkey>ringger-etal-2004-linguistically</bibkey>
@@ -833,7 +833,7 @@
       <author><first>Jun-lin</first><last>Zhang</last></author>
       <author><first>Le</first><last>Sun</last></author>
       <author><first>Wei-min</first><last>Qu</last></author>
-      <author><first>Yu-fang</first><last>Sun</last></author>
+      <author id="yufang-sun"><first>Yu-fang</first><last>Sun</last></author>
       <pages>680–685</pages>
       <url hash="ce2d42c4">C04-1098</url>
       <bibkey>zhang-etal-2004-trigger</bibkey>
@@ -847,8 +847,8 @@
     </paper>
     <paper id="100">
       <title>Question Answering Based on Semantic Structures</title>
-      <author><first>Srini</first><last>Narayanan</last></author>
-      <author><first>Sanda</first><last>Harabagiu</last></author>
+      <author id="srini-narayanan"><first>Srini</first><last>Narayanan</last></author>
+      <author id="sanda-harabagiu"><first>Sanda</first><last>Harabagiu</last></author>
       <pages>693–701</pages>
       <url hash="35ac801b">C04-1100</url>
       <bibkey>narayanan-harabagiu-2004-question</bibkey>
@@ -857,7 +857,7 @@
       <title>Combining Linguistic Features with Weighted <fixed-case>B</fixed-case>ayesian Classifier for Temporal Reference Processing</title>
       <author><first>Guihong</first><last>Cao</last></author>
       <author><first>Wenjie</first><last>Li</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <author><first>Chunfa</first><last>Yuan</last></author>
       <pages>702–708</pages>
       <url hash="9bbd15fa">C04-1101</url>
@@ -884,8 +884,8 @@
     <paper id="104">
       <title>Subcategorization Acquisition and Evaluation for <fixed-case>C</fixed-case>hinese Verbs</title>
       <author><first>Xiwu</first><last>Han</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
-      <author><first>Haoliang</first><last>Qi</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
+      <author id="haoliang-qi"><first>Haoliang</first><last>Qi</last></author>
       <author><first>Hao</first><last>Yu</last></author>
       <pages>723–728</pages>
       <url hash="7dacd8b4">C04-1104</url>
@@ -907,7 +907,7 @@
     </paper>
     <paper id="107">
       <title>Probabilistic Sentence Reduction Using Support Vector Machines</title>
-      <author><first>Minh Le</first><last>Nguyen</last></author>
+      <author id="minh-le-nguyen"><first>Minh Le</first><last>Nguyen</last></author>
       <author><first>Akira</first><last>Shimazu</last></author>
       <author><first>Susumu</first><last>Horiguchi</last></author>
       <author><first>Bao Tu</first><last>Ho</last></author>
@@ -918,7 +918,7 @@
     </paper>
     <paper id="108">
       <title>Improving Chronological Sentence Ordering by Precedence Relation</title>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Yutaka</first><last>Matsuo</last></author>
       <author><first>Mitsuru</first><last>Ishizuka</last></author>
       <pages>750–756</pages>
@@ -928,8 +928,8 @@
     <paper id="109">
       <title>Discriminative Slot Detection Using Kernel Methods</title>
       <author><first>Shubin</first><last>Zhao</last></author>
-      <author><first>Adam</first><last>Meyers</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="adam-meyers"><first>Adam</first><last>Meyers</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <pages>757–763</pages>
       <url hash="deec7d19">C04-1109</url>
       <bibkey>zhao-etal-2004-discriminative</bibkey>
@@ -946,7 +946,7 @@
       <title>Towards Terascale Semantic Acquisition</title>
       <author><first>Patrick</first><last>Pantel</last></author>
       <author><first>Deepak</first><last>Ravichandran</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>771–777</pages>
       <url hash="285804b6">C04-1111</url>
       <bibkey>pantel-etal-2004-towards</bibkey>
@@ -968,8 +968,8 @@
     <paper id="114">
       <title>Improving Statistical Machine Translation in the Medical Domain using the Unified Medical Language system</title>
       <author><first>Matthias</first><last>Eck</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>792–798</pages>
       <url hash="0ba6c050">C04-1114</url>
       <bibkey>eck-etal-2004-improving</bibkey>
@@ -993,8 +993,8 @@
     </paper>
     <paper id="117">
       <title>Cognate Mapping - A Heuristic Strategy for the Semi-Supervised Acquisition of a <fixed-case>S</fixed-case>panish Lexicon from a <fixed-case>P</fixed-case>ortuguese Seed Lexicon</title>
-      <author><first>Stefan</first><last>Schulz</last></author>
-      <author><first>Kornel</first><last>Markó</last></author>
+      <author id="stefan-schulz"><first>Stefan</first><last>Schulz</last></author>
+      <author id="kornel-marko"><first>Kornel</first><last>Markó</last></author>
       <author><first>Eduardo</first><last>Sbrissia</last></author>
       <author><first>Percy</first><last>Nohama</last></author>
       <author><first>Udo</first><last>Hahn</last></author>
@@ -1016,7 +1016,7 @@
     <paper id="119">
       <title>Back Transliteration from <fixed-case>J</fixed-case>apanese to <fixed-case>E</fixed-case>nglish using Target <fixed-case>E</fixed-case>nglish Context</title>
       <author><first>Isao</first><last>Goto</last></author>
-      <author><first>Naoto</first><last>Kato</last></author>
+      <author id="naoto-kato"><first>Naoto</first><last>Kato</last></author>
       <author><first>Terumasa</first><last>Ehara</last></author>
       <author><first>Hideki</first><last>Tanaka</last></author>
       <pages>827–833</pages>
@@ -1055,7 +1055,7 @@
     <paper id="124">
       <title>Detecting Multiword Verbs in the <fixed-case>E</fixed-case>nglish Sublanguage of <fixed-case>MEDLINE</fixed-case> Abstracts</title>
       <author><first>Chun</first><last>Xiao</last></author>
-      <author><first>Dietmar</first><last>Rösner</last></author>
+      <author id="dietmar-rosner"><first>Dietmar</first><last>Rösner</last></author>
       <pages>861–867</pages>
       <url hash="3c05fba8">C04-1124</url>
       <bibkey>xiao-rosner-2004-detecting</bibkey>
@@ -1079,7 +1079,7 @@
       <title>Cross-lingual Information Extraction System Evaluation</title>
       <author><first>Kiyoshi</first><last>Sudo</last></author>
       <author><first>Satoshi</first><last>Sekine</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <pages>882–888</pages>
       <url hash="264858bb">C04-1127</url>
       <bibkey>sudo-etal-2004-cross</bibkey>
@@ -1087,7 +1087,7 @@
     <paper id="128">
       <title>Detection of Question-Answer Pairs in Email Conversations</title>
       <author><first>Lokesh</first><last>Shrestha</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>889–895</pages>
       <url hash="3470b7d6">C04-1128</url>
       <bibkey>shrestha-mckeown-2004-detection</bibkey>
@@ -1096,7 +1096,7 @@
       <title>Syntactic Simplification for Improving Content Selection in Multi-Document Summarization</title>
       <author><first>Advaith</first><last>Siddharthan</last></author>
       <author><first>Ani</first><last>Nenkova</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>896–902</pages>
       <url hash="09083e59">C04-1129</url>
       <bibkey>siddharthan-etal-2004-syntactic</bibkey>
@@ -1104,7 +1104,7 @@
     <paper id="130">
       <title>Trajectory Based Word Sense Disambiguation</title>
       <author><first>Xiaojie</first><last>Wang</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>903–909</pages>
       <url hash="3fe28dfb">C04-1130</url>
       <bibkey>wang-matsumoto-2004-trajectory</bibkey>
@@ -1126,7 +1126,7 @@
     </paper>
     <paper id="133">
       <title>Automated Induction of Sense in Context</title>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <author><first>Patrick</first><last>Hanks</last></author>
       <author><first>Anna</first><last>Rumshisky</last></author>
       <pages>924–930</pages>
@@ -1159,7 +1159,7 @@
     <paper id="137">
       <title>Identification of Confusable Drug Names: A New Approach and Evaluation Methodology</title>
       <author><first>Grzegorz</first><last>Kondrak</last></author>
-      <author><first>Bonnie</first><last>Dorr</last></author>
+      <author id="bonnie-dorr"><first>Bonnie</first><last>Dorr</last></author>
       <pages>952–958</pages>
       <url hash="f522fefe">C04-1137</url>
       <bibkey>kondrak-dorr-2004-identification</bibkey>
@@ -1177,7 +1177,7 @@
     </paper>
     <paper id="139">
       <title>Linguistic profiling of texts for the purpose of language verification</title>
-      <author><first>Hans</first><last>van Halteren</last></author>
+      <author id="hans-van-halteren"><first>Hans</first><last>van Halteren</last></author>
       <author><first>Nelleke</first><last>Oostdijk</last></author>
       <pages>966–972</pages>
       <url hash="ace2e4ec">C04-1139</url>
@@ -1212,7 +1212,7 @@
       <title><fixed-case>FASIL</fixed-case> Email Summarisation System</title>
       <author><first>Angelo</first><last>Dalli</last></author>
       <author><first>Yunqing</first><last>Xia</last></author>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <pages>994–1000</pages>
       <url hash="af9f8d27">C04-1143</url>
       <bibkey>dalli-etal-2004-fasil</bibkey>
@@ -1229,9 +1229,9 @@
       <title>Morpheme-based Derivation of Bipolar Semantic Orientation of <fixed-case>C</fixed-case>hinese Words</title>
       <author><first>Raymond W.M.</first><last>Yuen</last></author>
       <author><first>Terence Y.W.</first><last>Chan</last></author>
-      <author><first>Tom B.Y.</first><last>Lai</last></author>
-      <author><first>O.Y.</first><last>Kwong</last></author>
-      <author><first>Benjamin K.Y.</first><last>Tsou</last></author>
+      <author id="tom-b-y-lai"><first>Tom B.Y.</first><last>Lai</last></author>
+      <author id="olivia-o-y-kwong"><first>O.Y.</first><last>Kwong</last></author>
+      <author id="benjamin-k-tsou"><first>Benjamin K.Y.</first><last>Tsou</last></author>
       <pages>1008–1014</pages>
       <url hash="5e95f1ca">C04-1145</url>
       <bibkey>yuen-etal-2004-morpheme</bibkey>
@@ -1239,23 +1239,23 @@
     <paper id="146">
       <title>Characterising Measures of Lexical Distributional Similarity</title>
       <author><first>Julie</first><last>Weeds</last></author>
-      <author><first>David</first><last>Weir</last></author>
-      <author><first>Diana</first><last>McCarthy</last></author>
+      <author id="david-weir"><first>David</first><last>Weir</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
       <pages>1015–1021</pages>
       <url hash="4a6aece9">C04-1146</url>
       <bibkey>weeds-etal-2004-characterising</bibkey>
     </paper>
     <paper id="147">
       <title>Fast Computation of Lexical Affinity Models</title>
-      <author><first>Egidio</first><last>Terra</last></author>
-      <author><first>Charles L. A.</first><last>Clarke</last></author>
+      <author id="egidio-l-terra"><first>Egidio</first><last>Terra</last></author>
+      <author id="charles-l-a-clarke"><first>Charles L. A.</first><last>Clarke</last></author>
       <pages>1022–1028</pages>
       <url hash="8876c4b5">C04-1147</url>
       <bibkey>terra-clarke-2004-fast</bibkey>
     </paper>
     <paper id="148">
       <title>Online Generic Editing of Heterogeneous Dictionary Entries in Papillon Project</title>
-      <author><first>Mathieu</first><last>Mangeot</last></author>
+      <author id="mathieu-mangeot"><first>Mathieu</first><last>Mangeot</last></author>
       <author><first>David</first><last>Thevenin</last></author>
       <pages>1029–1035</pages>
       <url hash="7dfdcae2">C04-1148</url>
@@ -1266,8 +1266,8 @@
       <author><first>Takehito</first><last>Utsuro</last></author>
       <author><first>Kohei</first><last>Hino</last></author>
       <author><first>Mitsuhiro</first><last>Kida</last></author>
-      <author><first>Seiichi</first><last>Nakagawa</last></author>
-      <author><first>Satoshi</first><last>Sato</last></author>
+      <author id="seiichi-nakagawa"><first>Seiichi</first><last>Nakagawa</last></author>
+      <author id="satoshi-sato"><first>Satoshi</first><last>Sato</last></author>
       <pages>1036–1042</pages>
       <url hash="2097428d">C04-1149</url>
       <bibkey>utsuro-etal-2004-integrating</bibkey>
@@ -1275,7 +1275,7 @@
     <paper id="150">
       <title>Quantitative and Qualitative Evaluation of the <fixed-case>O</fixed-case>nto<fixed-case>L</fixed-case>earn Ontology Learning System</title>
       <author><first>Roberto</first><last>Navigli</last></author>
-      <author><first>Paola</first><last>Velardi</last></author>
+      <author id="paola-velardi"><first>Paola</first><last>Velardi</last></author>
       <author><first>Alessandro</first><last>Cucchiarelli</last></author>
       <author><first>Francesca</first><last>Neri</last></author>
       <pages>1043–1050</pages>
@@ -1302,10 +1302,10 @@
     </paper>
     <paper id="153">
       <title>Learning <fixed-case>G</fixed-case>reek Verb Complements: Addressing the Class Imbalance</title>
-      <author><first>Katia</first><last>Kermanidis</last></author>
+      <author id="katia-lida-kermanidis"><first>Katia</first><last>Kermanidis</last></author>
       <author><first>Manolis</first><last>Maragoudakis</last></author>
-      <author><first>Nikos</first><last>Fakotakis</last></author>
-      <author><first>George</first><last>Kokkinakis</last></author>
+      <author id="nikos-fakotakis"><first>Nikos</first><last>Fakotakis</last></author>
+      <author id="george-kokkinakis"><first>George</first><last>Kokkinakis</last></author>
       <pages>1065–1071</pages>
       <url hash="8051a536">C04-1153</url>
       <bibkey>kermanidis-etal-2004-learning</bibkey>
@@ -1322,16 +1322,16 @@
     <paper id="155">
       <title>A Flexible Example Annotation Schema: Translation Corresponding Tree Representation</title>
       <author><first>Fai</first><last>Wong</last></author>
-      <author><first>Dong Cheng</first><last>Hu</last></author>
-      <author><first>Yu Hang</first><last>Mao</last></author>
-      <author><first>Ming Chui</first><last>Dong</last></author>
+      <author id="dong-cheng-hu"><first>Dong Cheng</first><last>Hu</last></author>
+      <author id="yu-hang-mao"><first>Yu Hang</first><last>Mao</last></author>
+      <author id="ming-chui-dong"><first>Ming Chui</first><last>Dong</last></author>
       <pages>1079–1085</pages>
       <url hash="3eda6f16">C04-1155</url>
       <bibkey>wong-etal-2004-flexible</bibkey>
     </paper>
     <paper id="156">
       <title>Knowledge Intensive Word Alignment with <fixed-case>KNOWA</fixed-case></title>
-      <author><first>Emanuele</first><last>Pianta</last></author>
+      <author id="emanuele-pianta"><first>Emanuele</first><last>Pianta</last></author>
       <author><first>Luisa</first><last>Bentivogli</last></author>
       <pages>1086–1092</pages>
       <url hash="662932a0">C04-1156</url>
@@ -1349,7 +1349,7 @@
       <author><first>Kazunori</first><last>Komatani</last></author>
       <author><first>Teruhisa</first><last>Misu</last></author>
       <author><first>Tatsuya</first><last>Kawahara</last></author>
-      <author><first>Hiroshi G.</first><last>Okuno</last></author>
+      <author id="hiroshi-g-okuno"><first>Hiroshi G.</first><last>Okuno</last></author>
       <pages>1100–1106</pages>
       <url hash="4475dfff">C04-1158</url>
       <bibkey>komatani-etal-2004-efficient</bibkey>
@@ -1373,7 +1373,7 @@
     </paper>
     <paper id="161">
       <title>Acquisition of Semantic Classes for Adjectives from Distributional Evidence</title>
-      <author><first>Gemma</first><last>Boleda</last></author>
+      <author id="gemma-boleda"><first>Gemma</first><last>Boleda</last></author>
       <author><first>Toni</first><last>Badia</last></author>
       <author><first>Eloi</first><last>Batlle</last></author>
       <pages>1119–1125</pages>
@@ -1382,7 +1382,7 @@
     </paper>
     <paper id="162">
       <title><fixed-case>P</fixed-case>age<fixed-case>R</fixed-case>ank on Semantic Networks, with Application to Word Sense Disambiguation</title>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <author><first>Paul</first><last>Tarau</last></author>
       <author><first>Elizabeth</first><last>Figa</last></author>
       <pages>1126–1132</pages>
@@ -1391,9 +1391,9 @@
     </paper>
     <paper id="163">
       <title>A Semantic-based Approach to Interoperabiltity of Classification Hierarchies: Evaluation of Linguistic Techniques</title>
-      <author><first>Bernardo</first><last>Magnini</last></author>
-      <author><first>Manuela</first><last>Speranza</last></author>
-      <author><first>Christian</first><last>Girardi</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
+      <author id="manuela-speranza"><first>Manuela</first><last>Speranza</last></author>
+      <author id="christian-girardi"><first>Christian</first><last>Girardi</last></author>
       <pages>1133–1139</pages>
       <url hash="ca587f94">C04-1163</url>
       <bibkey>magnini-etal-2004-semantic</bibkey>
@@ -1403,7 +1403,7 @@
       <author><first>Jui-Feng</first><last>Yeh</last></author>
       <author><first>Chung-Hsien</first><last>Wu</last></author>
       <author><first>Ming-Jun</first><last>Chen</last></author>
-      <author><first>Liang-Chih</first><last>Yu</last></author>
+      <author id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last></author>
       <pages>1140–1146</pages>
       <url hash="88d05ad9">C04-1164</url>
       <bibkey>yeh-etal-2004-automated</bibkey>
@@ -1438,10 +1438,10 @@
       <title>A Unified Approach in Speech-to-Speech Translation: Integrating Features of Speech recognition and Machine Translation</title>
       <author><first>Ruiqiang</first><last>Zhang</last></author>
       <author><first>Genichiro</first><last>Kikui</last></author>
-      <author><first>Hirofumi</first><last>Yamamoto</last></author>
-      <author><first>Frank</first><last>Soong</last></author>
+      <author id="hirofumi-yamamoto"><first>Hirofumi</first><last>Yamamoto</last></author>
+      <author id="frank-k-soong"><first>Frank</first><last>Soong</last></author>
       <author><first>Taro</first><last>Watanabe</last></author>
-      <author><first>Wai Kit</first><last>Lo</last></author>
+      <author id="wai-kit-lo"><first>Wai Kit</first><last>Lo</last></author>
       <pages>1168–1174</pages>
       <url hash="82b556ec">C04-1168</url>
       <bibkey>zhang-etal-2004-unified</bibkey>
@@ -1455,8 +1455,8 @@
     </paper>
     <paper id="170">
       <title>Lexicalisation strategies in cooperative question-answering systems</title>
-      <author><first>Farah</first><last>Benamara</last></author>
-      <author><first>Patrick</first><last>Saint-Dizier</last></author>
+      <author id="farah-benamara"><first>Farah</first><last>Benamara</last></author>
+      <author id="patrick-saint-dizier"><first>Patrick</first><last>Saint-Dizier</last></author>
       <pages>1179–1185</pages>
       <url hash="0f79ee2c">C04-1170</url>
       <bibkey>benamara-saint-dizier-2004-lexicalisation</bibkey>
@@ -1512,19 +1512,19 @@
     </paper>
     <paper id="177">
       <title>Automatic Identification of Infrequent Word Senses</title>
-      <author><first>Diana</first><last>McCarthy</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
       <author><first>Rob</first><last>Koeling</last></author>
       <author><first>Julie</first><last>Weeds</last></author>
-      <author><first>John</first><last>Carroll</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
       <pages>1220–1226</pages>
       <url hash="bc7639cc">C04-1177</url>
       <bibkey>mccarthy-etal-2004-automatic</bibkey>
     </paper>
     <paper id="178">
       <title>Semiautomatic Extension of <fixed-case>C</fixed-case>ore<fixed-case>N</fixed-case>et using a Bootstrapping Mechanism on Corpus-based Co-occurrences</title>
-      <author><first>Chris</first><last>Biemann</last></author>
-      <author><first>Sa-Im</first><last>Shin</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
+      <author id="saim-shin"><first>Sa-Im</first><last>Shin</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <pages>1227–1232</pages>
       <url hash="48bd2cd6">C04-1178</url>
       <bibkey>biemann-etal-2004-semiautomatic</bibkey>
@@ -1533,7 +1533,7 @@
       <title><fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et-based Semantic Parsing using Maximum Entropy Models</title>
       <author><first>Namhee</first><last>Kwon</last></author>
       <author><first>Michael</first><last>Fleischman</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>1233–1239</pages>
       <url hash="36bc9291">C04-1179</url>
       <bibkey>kwon-etal-2004-framenet</bibkey>
@@ -1542,8 +1542,8 @@
       <title>Wide-Coverage Semantic Representations from a <fixed-case>CCG</fixed-case> Parser</title>
       <author><first>Johan</first><last>Bos</last></author>
       <author><first>Stephen</first><last>Clark</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <author><first>Julia</first><last>Hockenmaier</last></author>
       <pages>1240–1246</pages>
       <url hash="93b0bf8e">C04-1180</url>
@@ -1563,14 +1563,14 @@
       <author><first>Andreas</first><last>Hagen</last></author>
       <author><first>Nicholas</first><last>Romanyshyn</last></author>
       <author><first>Sean</first><last>Martin</last></author>
-      <author><first>Bryan</first><last>Pellom</last></author>
+      <author id="bryan-pellom"><first>Bryan</first><last>Pellom</last></author>
       <pages>1254–1260</pages>
       <url hash="c02339de">C04-1182</url>
       <bibkey>lee-etal-2004-analysis</bibkey>
     </paper>
     <paper id="183">
       <title>Combining clues for lexical level aligning using the Null hypothesis approach</title>
-      <author><first>Olivier</first><last>Kraif</last></author>
+      <author id="olivier-kraif"><first>Olivier</first><last>Kraif</last></author>
       <author><first>Boxing</first><last>Chen</last></author>
       <pages>1261–1264</pages>
       <url hash="44948042">C04-1183</url>
@@ -1578,8 +1578,8 @@
     </paper>
     <paper id="184">
       <title>Comparing Semantically Related Sentences: The Case of Paraphrase Versus Subsumption</title>
-      <author><first>Jahna</first><last>Otterbacher</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="jahna-otterbacher"><first>Jahna</first><last>Otterbacher</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <pages>1265–1268</pages>
       <url hash="f7ad45ad">C04-1184</url>
       <bibkey>otterbacher-radev-2004-comparing</bibkey>
@@ -1593,7 +1593,7 @@
     </paper>
     <paper id="186">
       <title>Semantic Role Labeling Using Dependency Trees</title>
-      <author><first>Kadri</first><last>Hacioglu</last></author>
+      <author id="kadri-hacioglu"><first>Kadri</first><last>Hacioglu</last></author>
       <pages>1273–1276</pages>
       <url hash="0a264991">C04-1186</url>
       <bibkey>hacioglu-2004-semantic</bibkey>
@@ -1601,7 +1601,7 @@
     <paper id="187">
       <title>Web-based List Question Answering</title>
       <author><first>Hui</first><last>Yang</last></author>
-      <author><first>Tat-Seng</first><last>Chua</last></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last></author>
       <pages>1277–1283</pages>
       <url hash="303f4610">C04-1187</url>
       <bibkey>yang-chua-2004-web</bibkey>
@@ -1610,20 +1610,20 @@
       <title>Information Extraction for Question Answering: Improving Recall Through Syntactic Patterns</title>
       <author><first>Valentin</first><last>Jijkoun</last></author>
       <author><first>Jori</first><last>Mur</last></author>
-      <author><first>Maarten</first><last>de Rijke</last></author>
+      <author id="maarten-de-rijke"><first>Maarten</first><last>de Rijke</last></author>
       <pages>1284–1290</pages>
       <url hash="4309ce60">C04-1188</url>
       <bibkey>jijkoun-etal-2004-information</bibkey>
     </paper>
     <paper id="189">
       <title><fixed-case>HITIQA</fixed-case>: Towards Analytical Question Answering</title>
-      <author><first>Sharon</first><last>Small</last></author>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="sharon-small"><first>Sharon</first><last>Small</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
       <author><first>Ting</first><last>Liu</last></author>
       <author><first>Sean</first><last>Ryan</last></author>
       <author><first>Robert</first><last>Salkin</last></author>
       <author><first>Nobuyuki</first><last>Shimizu</last></author>
-      <author><first>Paul</first><last>Kantor</last></author>
+      <author id="paul-kantor"><first>Paul</first><last>Kantor</last></author>
       <author><first>Diane</first><last>Kelly</last></author>
       <author><first>Robert</first><last>Rittman</last></author>
       <author><first>Nina</first><last>Wacholder</last></author>
@@ -1644,7 +1644,7 @@
       <title>Inferring parts of speech for lexical mappings via the <fixed-case>C</fixed-case>yc <fixed-case>KB</fixed-case></title>
       <author><first>Tom</first><last>O’Hara</last></author>
       <author><first>Stefano</first><last>Bertolo</last></author>
-      <author><first>Michael</first><last>Witbrock</last></author>
+      <author id="michael-j-witbrock"><first>Michael</first><last>Witbrock</last></author>
       <author><first>Bjørn</first><last>Aldag</last></author>
       <author><first>Jon</first><last>Curtis</last></author>
       <author><first>Kathy</first><last>Panton</last></author>
@@ -1656,9 +1656,9 @@
     </paper>
     <paper id="192">
       <title>Fine-Grained Word Sense Disambiguation Based on Parallel Corpora, Word Alignment, Word Clustering and Aligned Wordnets</title>
-      <author><first>Dan</first><last>Tufis</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufis</last></author>
       <author><first>Radu</first><last>Ion</last></author>
-      <author><first>Nancy</first><last>Ide</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
       <pages>1312–1318</pages>
       <url hash="77901735">C04-1192</url>
       <bibkey>tufis-etal-2004-fine</bibkey>
@@ -1684,7 +1684,7 @@
       <title>Creative Discovery in Lexical Ontologies</title>
       <author><first>Tony</first><last>Veale</last></author>
       <author><first>Nuno</first><last>Seco</last></author>
-      <author><first>Jer</first><last>Hayes</last></author>
+      <author id="jer-hayes"><first>Jer</first><last>Hayes</last></author>
       <pages>1333–1338</pages>
       <url hash="0b2f415b">C04-1195</url>
       <bibkey>veale-etal-2004-creative</bibkey>
@@ -1702,7 +1702,7 @@
       <title>Semantic Role Labeling Via Integer Linear Programming Inference</title>
       <author><first>Vasin</first><last>Punyakanok</last></author>
       <author><first>Dan</first><last>Roth</last></author>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <author><first>Dav</first><last>Zimak</last></author>
       <pages>1346–1352</pages>
       <url hash="86fcae59">C04-1197</url>
@@ -1727,7 +1727,7 @@
     <paper id="200">
       <title>Determining the Sentiment of Opinions</title>
       <author><first>Soo-Min</first><last>Kim</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>1367–1373</pages>
       <url hash="23a0a13a">C04-1200</url>
       <bibkey>kim-hovy-2004-determining</bibkey>
@@ -1736,8 +1736,8 @@
       <title>A Language Independent Method for Question Classification</title>
       <author><first>Thamar</first><last>Solorio</last></author>
       <author><first>Manuel</first><last>Pérez-Coutiño</last></author>
-      <author><first>Manuel</first><last>Montes-y-Gómez</last></author>
-      <author><first>Luis</first><last>Villaseñor-Pineda</last></author>
+      <author id="manuel-montes"><first>Manuel</first><last>Montes-y-Gómez</last></author>
+      <author id="luis-villasenor-pineda"><first>Luis</first><last>Villaseñor-Pineda</last></author>
       <author><first>Aurelio</first><last>López-López</last></author>
       <pages>1374–1380</pages>
       <url hash="1a54c124">C04-1201</url>
@@ -1758,7 +1758,7 @@
     <paper id="203">
       <title>A Natural Language Processing Infrastructure for <fixed-case>T</fixed-case>urkish</title>
       <author><first>A. C. Cem</first><last>Say</last></author>
-      <author><first>Ozlem</first><last>Cetinoglu</last></author>
+      <author id="ozlem-cetinoglu"><first>Ozlem</first><last>Cetinoglu</last></author>
       <author><first>Seniz</first><last>Demir</last></author>
       <author><first>Faith</first><last>Ögun</last></author>
       <pages>1385–1391</pages>
@@ -1768,7 +1768,7 @@
     <paper id="204">
       <title>Deep Linguistic Analysis for the Accurate Identification of Predicate-Argument Relations</title>
       <author><first>Yusuke</first><last>Miyao</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>1392–1398</pages>
       <url hash="7e0d5062">C04-1204</url>
       <bibkey>miyao-tsujii-2004-deep</bibkey>
diff --git a/data/xml/C08.xml b/data/xml/C08.xml
index 0d8406e8cd..2d15526e30 100644
--- a/data/xml/C08.xml
+++ b/data/xml/C08.xml
@@ -3,7 +3,7 @@
   <volume id="1" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 22nd International Conference on Computational Linguistics (Coling 2008)</booktitle>
-      <editor><first>Donia</first><last>Scott</last></editor>
+      <editor id="donia-scott"><first>Donia</first><last>Scott</last></editor>
       <editor><first>Hans</first><last>Uszkoreit</last></editor>
       <publisher>Coling 2008 Organizing Committee</publisher>
       <address>Manchester, UK</address>
@@ -19,7 +19,7 @@
       <title>Two-Phased Event Relation Acquisition: Coupling the Relation-Oriented and Argument-Oriented Approaches</title>
       <author><first>Shuya</first><last>Abe</last></author>
       <author><first>Kentaro</first><last>Inui</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>1–8</pages>
       <url hash="5c84b857">C08-1001</url>
       <bibkey>abe-etal-2008-two</bibkey>
@@ -35,8 +35,8 @@
     </paper>
     <paper id="3">
       <title>On Robustness and Domain Adaptation using <fixed-case>SVD</fixed-case> for Word Sense Disambiguation</title>
-      <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>Oier</first><last>Lopez de Lacalle</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
+      <author id="oier-lopez-de-lacalle"><first>Oier</first><last>Lopez de Lacalle</last></author>
       <pages>17–24</pages>
       <url hash="33d523d5">C08-1003</url>
       <bibkey>agirre-lopez-de-lacalle-2008-robustness</bibkey>
@@ -50,17 +50,17 @@
     </paper>
     <paper id="5">
       <title>Improving Alignments for Better Confusion Networks for Combining Machine Translation Systems</title>
-      <author><first>Necip Fazil</first><last>Ayan</last></author>
+      <author id="necip-fazil-ayan"><first>Necip Fazil</first><last>Ayan</last></author>
       <author><first>Jing</first><last>Zheng</last></author>
-      <author><first>Wen</first><last>Wang</last></author>
+      <author id="wen-wang"><first>Wen</first><last>Wang</last></author>
       <pages>33–40</pages>
       <url hash="b51ed9bb">C08-1005</url>
       <bibkey>ayan-etal-2008-improving</bibkey>
     </paper>
     <paper id="6">
       <title>Verification and Implementation of Language-Based Deception Indicators in Civil and Criminal Narratives</title>
-      <author><first>Joan</first><last>Bachenko</last></author>
-      <author><first>Eileen</first><last>Fitzpatrick</last></author>
+      <author id="joan-bachenko"><first>Joan</first><last>Bachenko</last></author>
+      <author id="eileen-fitzpatrick"><first>Eileen</first><last>Fitzpatrick</last></author>
       <author><first>Michael</first><last>Schonwetter</last></author>
       <pages>41–48</pages>
       <url hash="7dc3192c">C08-1006</url>
@@ -68,8 +68,8 @@
     </paper>
     <paper id="7">
       <title>Enhancing Multilingual Latent Semantic Analysis with Term Alignment Information</title>
-      <author><first>Brett W.</first><last>Bader</last></author>
-      <author><first>Peter A.</first><last>Chew</last></author>
+      <author id="brett-w-bader"><first>Brett W.</first><last>Bader</last></author>
+      <author id="peter-a-chew"><first>Peter A.</first><last>Chew</last></author>
       <pages>49–56</pages>
       <url hash="5b32f9ff">C08-1007</url>
       <bibkey>bader-chew-2008-enhancing</bibkey>
@@ -116,7 +116,7 @@
     <paper id="13">
       <title><fixed-case>P</fixed-case>ara<fixed-case>M</fixed-case>etric: An Automatic Evaluation Metric for Paraphrasing</title>
       <author><first>Chris</first><last>Callison-Burch</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <author><first>Mirella</first><last>Lapata</last></author>
       <pages>97–104</pages>
       <url hash="80458032">C08-1013</url>
@@ -126,7 +126,7 @@
       <title>Regenerating Hypotheses for Statistical Machine Translation</title>
       <author><first>Boxing</first><last>Chen</last></author>
       <author><first>Min</first><last>Zhang</last></author>
-      <author><first>Aiti</first><last>Aw</last></author>
+      <author id="aiti-aw"><first>Aiti</first><last>Aw</last></author>
       <author><first>Haizhou</first><last>Li</last></author>
       <pages>105–112</pages>
       <url hash="99f2527d">C08-1014</url>
@@ -146,15 +146,15 @@
       <author><first>Bin</first><last>Chen</last></author>
       <author><first>Xiaofeng</first><last>Yang</last></author>
       <author><first>Jian</first><last>Su</last></author>
-      <author><first>Chew Lim</first><last>Tan</last></author>
+      <author id="chew-lim-tan"><first>Chew Lim</first><last>Tan</last></author>
       <pages>121–128</pages>
       <url hash="fef5c264">C08-1016</url>
       <bibkey>chen-etal-2008-anaphora</bibkey>
     </paper>
     <paper id="17">
       <title>Latent Morpho-Semantic Analysis: Multilingual Information Retrieval with Character N-Grams and Mutual Information</title>
-      <author><first>Peter A.</first><last>Chew</last></author>
-      <author><first>Brett W.</first><last>Bader</last></author>
+      <author id="peter-a-chew"><first>Peter A.</first><last>Chew</last></author>
+      <author id="brett-w-bader"><first>Brett W.</first><last>Bader</last></author>
       <author><first>Ahmed</first><last>Abdelali</last></author>
       <pages>129–136</pages>
       <url hash="95cea052">C08-1017</url>
@@ -162,7 +162,7 @@
     </paper>
     <paper id="18">
       <title>Sentence Compression Beyond Word Deletion</title>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <author><first>Mirella</first><last>Lapata</last></author>
       <pages>137–144</pages>
       <url hash="c0f0c7da">C08-1018</url>
@@ -170,8 +170,8 @@
     </paper>
     <paper id="19">
       <title>Mind the Gap: Dangers of Divorcing Evaluations of Summary Content from Linguistic Quality</title>
-      <author><first>John M.</first><last>Conroy</last></author>
-      <author><first>Hoa Trang</first><last>Dang</last></author>
+      <author id="john-conroy"><first>John M.</first><last>Conroy</last></author>
+      <author id="hoa-trang-dang"><first>Hoa Trang</first><last>Dang</last></author>
       <pages>145–152</pages>
       <url hash="ed486916">C08-1019</url>
       <bibkey>conroy-dang-2008-mind</bibkey>
@@ -179,10 +179,10 @@
     <paper id="20">
       <title>Hybrid Processing for Grammar and Style Checking</title>
       <author><first>Berthold</first><last>Crysmann</last></author>
-      <author><first>Nuria</first><last>Bertomeu</last></author>
+      <author id="nuria-bertomeu"><first>Nuria</first><last>Bertomeu</last></author>
       <author><first>Peter</first><last>Adolphs</last></author>
-      <author><first>Daniel</first><last>Flickinger</last></author>
-      <author><first>Tina</first><last>Klüwer</last></author>
+      <author id="dan-flickinger"><first>Daniel</first><last>Flickinger</last></author>
+      <author id="tina-kluwer"><first>Tina</first><last>Klüwer</last></author>
       <pages>153–160</pages>
       <url hash="b3aff8f7">C08-1020</url>
       <bibkey>crysmann-etal-2008-hybrid</bibkey>
@@ -190,7 +190,7 @@
     <paper id="21">
       <title><fixed-case>K</fixed-case>now<fixed-case>N</fixed-case>et: Building a Large Net of Knowledge from the Web</title>
       <author><first>Montse</first><last>Cuadros</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <pages>161–168</pages>
       <url hash="af21b0a0">C08-1021</url>
       <bibkey>cuadros-rigau-2008-knownet</bibkey>
@@ -198,7 +198,7 @@
     <paper id="22">
       <title>A Classifier-Based Approach to Preposition and Determiner Error Correction in <fixed-case>L</fixed-case>2 <fixed-case>E</fixed-case>nglish</title>
       <author><first>Rachele</first><last>De Felice</last></author>
-      <author><first>Stephen G.</first><last>Pulman</last></author>
+      <author id="stephen-pulman"><first>Stephen G.</first><last>Pulman</last></author>
       <pages>169–176</pages>
       <url hash="865fbd5e">C08-1022</url>
       <bibkey>de-felice-pulman-2008-classifier</bibkey>
@@ -206,8 +206,8 @@
     <paper id="23">
       <title>Pedagogically Useful Extractive Summaries for Science Education</title>
       <author><first>Sebastian</first><last>de la Chica</last></author>
-      <author><first>Faisal</first><last>Ahmad</last></author>
-      <author><first>James H.</first><last>Martin</last></author>
+      <author id="faisal-ahmad"><first>Faisal</first><last>Ahmad</last></author>
+      <author id="james-h-martin"><first>James H.</first><last>Martin</last></author>
       <author><first>Tamara</first><last>Sumner</last></author>
       <pages>177–184</pages>
       <url hash="8af55d09">C08-1023</url>
@@ -217,7 +217,7 @@
       <title>Looking for Trouble</title>
       <author><first>Stijn</first><last>De Saeger</last></author>
       <author><first>Kentaro</first><last>Torisawa</last></author>
-      <author><first>Jun’ichi</first><last>Kazama</last></author>
+      <author id="junichi-kazama"><first>Jun’ichi</first><last>Kazama</last></author>
       <pages>185–192</pages>
       <url hash="fc3f7a16">C08-1024</url>
       <bibkey>de-saeger-etal-2008-looking</bibkey>
@@ -253,7 +253,7 @@
     <paper id="29">
       <title>A Probabilistic Model for Measuring Grammaticality and Similarity of Automatically Generated Paraphrases of Predicate Phrases</title>
       <author><first>Atsushi</first><last>Fujita</last></author>
-      <author><first>Satoshi</first><last>Sato</last></author>
+      <author id="satoshi-sato"><first>Satoshi</first><last>Sato</last></author>
       <pages>225–232</pages>
       <url hash="a592612b">C08-1029</url>
       <bibkey>fujita-sato-2008-probabilistic</bibkey>
@@ -285,15 +285,15 @@
     <paper id="33">
       <title>Statistical Anaphora Resolution in Biomedical Texts</title>
       <author><first>Caroline</first><last>Gasperin</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <pages>257–264</pages>
       <url hash="ad6e9b53">C08-1033</url>
       <bibkey>gasperin-briscoe-2008-statistical</bibkey>
     </paper>
     <paper id="34">
       <title>Instance-Based Ontology Population Exploiting Named-Entity Substitution</title>
-      <author><first>Claudio</first><last>Giuliano</last></author>
-      <author><first>Alfio</first><last>Gliozzo</last></author>
+      <author id="claudio-giuliano"><first>Claudio</first><last>Giuliano</last></author>
+      <author id="alfio-gliozzo"><first>Alfio</first><last>Gliozzo</last></author>
       <pages>265–272</pages>
       <url hash="ec4cbda0">C08-1034</url>
       <bibkey>giuliano-gliozzo-2008-instance</bibkey>
@@ -301,7 +301,7 @@
     <paper id="35">
       <title>Measuring Topic Homogeneity and its Application to Dictionary-Based Word Sense Disambiguation</title>
       <author><first>Ann</first><last>Gledson</last></author>
-      <author><first>John</first><last>Keane</last></author>
+      <author id="john-keane"><first>John</first><last>Keane</last></author>
       <pages>273–280</pages>
       <url hash="f056e0bb">C08-1035</url>
       <bibkey>gledson-keane-2008-measuring</bibkey>
@@ -309,7 +309,7 @@
     <paper id="36">
       <title>Using Web-Search Results to Measure Word-Group Similarity</title>
       <author><first>Ann</first><last>Gledson</last></author>
-      <author><first>John</first><last>Keane</last></author>
+      <author id="john-keane"><first>John</first><last>Keane</last></author>
       <pages>281–288</pages>
       <url hash="fec9f99a">C08-1036</url>
       <bibkey>gledson-keane-2008-using</bibkey>
@@ -323,8 +323,8 @@
     </paper>
     <paper id="38">
       <title>Dependency-Based N-Gram Models for General Purpose Sentence Realisation</title>
-      <author><first>Yuqing</first><last>Guo</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="yuqing-guo"><first>Yuqing</first><last>Guo</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <author><first>Haifeng</first><last>Wang</last></author>
       <pages>297–304</pages>
       <url hash="fd02eef6">C08-1038</url>
@@ -332,7 +332,7 @@
     </paper>
     <paper id="39">
       <title>Homotopy-Based Semi-Supervised Hidden <fixed-case>M</fixed-case>arkov Models for Sequence Labeling</title>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <author><first>Anoop</first><last>Sarkar</last></author>
       <pages>305–312</pages>
       <url hash="6653f349">C08-1039</url>
@@ -340,13 +340,13 @@
     </paper>
     <paper id="40">
       <title>Tracking the Dynamic Evolution of Participants Salience in a Discussion</title>
-      <author><first>Ahmed</first><last>Hassan</last></author>
+      <author id="ahmed-hassan"><first>Ahmed</first><last>Hassan</last></author>
       <author><first>Anthony</first><last>Fader</last></author>
       <author><first>Michael H.</first><last>Crespin</last></author>
-      <author><first>Kevin M.</first><last>Quinn</last></author>
+      <author id="kevin-m-quinn"><first>Kevin M.</first><last>Quinn</last></author>
       <author><first>Burt L.</first><last>Monroe</last></author>
       <author><first>Michael</first><last>Colaresi</last></author>
-      <author><first>Dragomir R.</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir R.</first><last>Radev</last></author>
       <pages>313–320</pages>
       <url hash="b5fbc0b8">C08-1040</url>
       <bibkey>hassan-etal-2008-tracking</bibkey>
@@ -399,7 +399,7 @@
       <title><fixed-case>J</fixed-case>apanese Dependency Parsing Using a Tournament Model</title>
       <author><first>Masakazu</first><last>Iwatate</last></author>
       <author><first>Masayuki</first><last>Asahara</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>361–368</pages>
       <url hash="12e82b05">C08-1046</url>
       <bibkey>iwatate-etal-2008-japanese</bibkey>
@@ -471,8 +471,8 @@
     <paper id="55">
       <title>Generation of Referring Expressions: Managing Structural Ambiguities</title>
       <author><first>Imtiaz Hussain</first><last>Khan</last></author>
-      <author><first>Kees</first><last>van Deemter</last></author>
-      <author><first>Graeme</first><last>Ritchie</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
+      <author id="graeme-ritchie"><first>Graeme</first><last>Ritchie</last></author>
       <pages>433–440</pages>
       <url hash="8b8c479f">C08-1055</url>
       <bibkey>khan-etal-2008-generation</bibkey>
@@ -481,7 +481,7 @@
       <title>Normalizing <fixed-case>SMS</fixed-case>: are Two Metaphors Better than One ?</title>
       <author><first>Catherine</first><last>Kobus</last></author>
       <author><first>François</first><last>Yvon</last></author>
-      <author><first>Géraldine</first><last>Damnati</last></author>
+      <author id="geraldine-damnati"><first>Géraldine</first><last>Damnati</last></author>
       <pages>441–448</pages>
       <url hash="46b7cdc2">C08-1056</url>
       <bibkey>kobus-etal-2008-normalizing</bibkey>
@@ -497,8 +497,8 @@
     </paper>
     <paper id="58">
       <title>Extending a Thesaurus with Words from Pan-<fixed-case>C</fixed-case>hinese Sources</title>
-      <author><first>Oi Yee</first><last>Kwong</last></author>
-      <author><first>Benjamin K.</first><last>Tsou</last></author>
+      <author id="olivia-o-y-kwong"><first>Oi Yee</first><last>Kwong</last></author>
+      <author id="benjamin-k-tsou"><first>Benjamin K.</first><last>Tsou</last></author>
       <pages>457–464</pages>
       <url hash="91a4291e">C08-1058</url>
       <bibkey>kwong-tsou-2008-extending</bibkey>
@@ -506,7 +506,7 @@
     <paper id="59">
       <title>Stopping Criteria for Active Learning of Named Entity Recognition</title>
       <author><first>Florian</first><last>Laws</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>465–472</pages>
       <url hash="5b0ab212">C08-1059</url>
       <bibkey>laws-schutze-2008-stopping</bibkey>
@@ -535,7 +535,7 @@
       <title><fixed-case>PNR</fixed-case>2: Ranking Sentences with Positive and Negative Reinforcement for Query-Oriented Update Summarization</title>
       <author><first>Wenjie</first><last>Li</last></author>
       <author><first>Furu</first><last>Wei</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <author><first>Yanxiang</first><last>He</last></author>
       <pages>489–496</pages>
       <url hash="e95ab378">C08-1062</url>
@@ -546,7 +546,7 @@
       <author><first>Yuanjie</first><last>Liu</last></author>
       <author><first>Shasha</first><last>Li</last></author>
       <author><first>Yunbo</first><last>Cao</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <author><first>Dingyi</first><last>Han</last></author>
       <author><first>Yong</first><last>Yu</last></author>
       <pages>497–504</pages>
@@ -563,7 +563,7 @@
     <paper id="65">
       <title>Authorship Attribution and Verification with Many Authors and Limited Data</title>
       <author><first>Kim</first><last>Luyckx</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>513–520</pages>
       <url hash="61405852">C08-1065</url>
       <bibkey>luyckx-daelemans-2008-authorship</bibkey>
@@ -571,7 +571,7 @@
     <paper id="66">
       <title>Modeling Semantic Containment and Exclusion in Natural Language Inference</title>
       <author><first>Bill</first><last>MacCartney</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>521–528</pages>
       <url hash="b30981f7">C08-1066</url>
       <bibkey>maccartney-manning-2008-modeling</bibkey>
@@ -580,16 +580,16 @@
       <title>Linguistically-Based Sub-Sentential Alignment for Terminology Extraction from a Bilingual Automotive Corpus</title>
       <author><first>Lieve</first><last>Macken</last></author>
       <author><first>Els</first><last>Lefever</last></author>
-      <author><first>Veronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Veronique</first><last>Hoste</last></author>
       <pages>529–536</pages>
       <url hash="7d4ccf94">C08-1067</url>
       <bibkey>macken-etal-2008-linguistically</bibkey>
     </paper>
     <paper id="68">
       <title><fixed-case>H</fixed-case>indi <fixed-case>U</fixed-case>rdu Machine Transliteration using Finite-State Transducers</title>
-      <author><first>M G Abbas</first><last>Malik</last></author>
-      <author><first>Christian</first><last>Boitet</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="m-g-abbas-malik"><first>M G Abbas</first><last>Malik</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>537–544</pages>
       <url hash="dd3558fe">C08-1068</url>
       <bibkey>malik-etal-2008-hindi</bibkey>
@@ -597,14 +597,14 @@
     <paper id="69">
       <title>Comparative Parser Performance Analysis across Grammar Frameworks through Automatic Tree Conversion using Synchronous Grammars</title>
       <author><first>Takuya</first><last>Matsuzaki</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>545–552</pages>
       <url hash="71cae06f">C08-1069</url>
       <bibkey>matsuzaki-tsujii-2008-comparative</bibkey>
     </paper>
     <paper id="70">
       <title>What’s the Date? High Accuracy Interpretation of Weekday Names</title>
-      <author><first>Pawel</first><last>Mazur</last></author>
+      <author id="pawel-mazur"><first>Pawel</first><last>Mazur</last></author>
       <author><first>Robert</first><last>Dale</last></author>
       <pages>553–560</pages>
       <url hash="bd6ed5c1">C08-1070</url>
@@ -621,7 +621,7 @@
     </paper>
     <paper id="72">
       <title>A Syntactic Time-Series Model for Parsing Fluent and Disfluent Speech</title>
-      <author><first>Tim</first><last>Miller</last></author>
+      <author id="timothy-miller"><first>Tim</first><last>Miller</last></author>
       <author><first>William</first><last>Schuler</last></author>
       <pages>569–576</pages>
       <url hash="943df079">C08-1072</url>
@@ -630,7 +630,7 @@
     <paper id="73">
       <title>Applying Discourse Analysis and Data Mining Methods to Spoken <fixed-case>OSCE</fixed-case> Assessments</title>
       <author><first>Meladel</first><last>Mistica</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Marisa</first><last>Cordella</last></author>
       <author><first>Simon</first><last>Musgrave</last></author>
       <pages>577–584</pages>
@@ -639,7 +639,7 @@
     </paper>
     <paper id="74">
       <title>Random Restarts in Minimum Error Rate Training for Statistical Machine Translation</title>
-      <author><first>Robert C.</first><last>Moore</last></author>
+      <author id="robert-c-moore"><first>Robert C.</first><last>Moore</last></author>
       <author><first>Chris</first><last>Quirk</last></author>
       <pages>585–592</pages>
       <url hash="6704573b">C08-1074</url>
@@ -666,9 +666,9 @@
     </paper>
     <paper id="77">
       <title>Detecting Multiple Facets of an Event using Graph-Based Unsupervised Methods</title>
-      <author><first>Pradeep</first><last>Muthukrishnan</last></author>
+      <author id="pradeep-muthukrishnan"><first>Pradeep</first><last>Muthukrishnan</last></author>
       <author><first>Joshua</first><last>Gerrish</last></author>
-      <author><first>Dragomir R.</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir R.</first><last>Radev</last></author>
       <pages>609–616</pages>
       <url hash="d1a2fd52">C08-1077</url>
       <bibkey>muthukrishnan-etal-2008-detecting</bibkey>
@@ -693,10 +693,10 @@
     <paper id="80">
       <title>Computer Aided Correction and Extension of a Syntactic Wide-Coverage Lexicon</title>
       <author><first>Lionel</first><last>Nicolas</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <author><first>Miguel A.</first><last>Molinero</last></author>
       <author><first>Jacques</first><last>Farré</last></author>
-      <author><first>Éric</first><last>de la Clergerie</last></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Éric</first><last>de la Clergerie</last></author>
       <pages>633–640</pages>
       <url hash="85eff49c">C08-1080</url>
       <bibkey>nicolas-etal-2008-computer</bibkey>
@@ -704,8 +704,8 @@
     <paper id="81">
       <title>Parsing the <fixed-case>S</fixed-case>yn<fixed-case>T</fixed-case>ag<fixed-case>R</fixed-case>us Treebank of <fixed-case>R</fixed-case>ussian</title>
       <author><first>Joakim</first><last>Nivre</last></author>
-      <author><first>Igor M.</first><last>Boguslavsky</last></author>
-      <author><first>Leonid L.</first><last>Iomdin</last></author>
+      <author id="igor-boguslavsky"><first>Igor M.</first><last>Boguslavsky</last></author>
+      <author id="leonid-iomdin"><first>Leonid L.</first><last>Iomdin</last></author>
       <pages>641–648</pages>
       <url hash="e6b6db56">C08-1081</url>
       <bibkey>nivre-etal-2008-parsing</bibkey>
@@ -720,16 +720,16 @@
     </paper>
     <paper id="83">
       <title>A Discriminative Alignment Model for Abbreviation Recognition</title>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>657–664</pages>
       <url hash="1c535023">C08-1083</url>
       <bibkey>okazaki-etal-2008-discriminative</bibkey>
     </paper>
     <paper id="84">
       <title>Semantic Role Assignment for Event Nominalisations by Leveraging Verbal Data</title>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <author><first>Marco</first><last>Pennacchiotti</last></author>
       <author><first>Caroline</first><last>Sporleder</last></author>
       <pages>665–672</pages>
@@ -755,17 +755,17 @@
     <paper id="87">
       <title>Scientific Paper Summarization Using Citation Summary Networks</title>
       <author><first>Vahed</first><last>Qazvinian</last></author>
-      <author><first>Dragomir R.</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir R.</first><last>Radev</last></author>
       <pages>689–696</pages>
       <url hash="42b76882">C08-1087</url>
       <bibkey>qazvinian-radev-2008-scientific</bibkey>
     </paper>
     <paper id="88">
       <title>Exploiting Constituent Dependencies for Tree Kernel-Based Semantic Relation Extraction</title>
-      <author><first>Longhua</first><last>Qian</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="longhua-qian"><first>Longhua</first><last>Qian</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <author><first>Fang</first><last>Kong</last></author>
-      <author><first>Qiaoming</first><last>Zhu</last></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last></author>
       <author><first>Peide</first><last>Qian</last></author>
       <pages>697–704</pages>
       <url hash="550acc67">C08-1088</url>
@@ -782,9 +782,9 @@
     </paper>
     <paper id="90">
       <title>Almost Flat Functional Semantics for Speech Translation</title>
-      <author><first>Manny</first><last>Rayner</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
-      <author><first>Beth Ann</first><last>Hockey</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="beth-ann-hockey"><first>Beth Ann</first><last>Hockey</last></author>
       <author><first>Yukie</first><last>Nakao</last></author>
       <pages>713–720</pages>
       <url hash="9c343225">C08-1090</url>
@@ -809,7 +809,7 @@
       <title>Translating Queries into Snippets for Improved Query Expansion</title>
       <author><first>Stefan</first><last>Riezler</last></author>
       <author><first>Yi</first><last>Liu</last></author>
-      <author><first>Alexander</first><last>Vasserman</last></author>
+      <author id="alexander-vasserman"><first>Alexander</first><last>Vasserman</last></author>
       <pages>737–744</pages>
       <url hash="6f2366ec">C08-1093</url>
       <bibkey>riezler-etal-2008-translating</bibkey>
@@ -825,7 +825,7 @@
     <paper id="95">
       <title>Shift-Reduce Dependency <fixed-case>DAG</fixed-case> Parsing</title>
       <author><first>Kenji</first><last>Sagae</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>753–760</pages>
       <url hash="609180f4">C08-1095</url>
       <bibkey>sagae-tsujii-2008-shift</bibkey>
@@ -835,7 +835,7 @@
       <author><first>Yutaka</first><last>Sasaki</last></author>
       <author><first>Paul</first><last>Thompson</last></author>
       <author><first>Philip</first><last>Cotter</last></author>
-      <author><first>John</first><last>McNaught</last></author>
+      <author id="john-mcnaught"><first>John</first><last>McNaught</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
       <pages>761–768</pages>
       <url hash="3e8fd7c4">C08-1096</url>
@@ -861,8 +861,8 @@
     <paper id="99">
       <title>Toward a Psycholinguistically-Motivated Model of Language Processing</title>
       <author><first>William</first><last>Schuler</last></author>
-      <author><first>Samir</first><last>AbdelRahman</last></author>
-      <author><first>Tim</first><last>Miller</last></author>
+      <author id="samir-abdelrahman"><first>Samir</first><last>AbdelRahman</last></author>
+      <author id="timothy-miller"><first>Tim</first><last>Miller</last></author>
       <author><first>Lane</first><last>Schwartz</last></author>
       <pages>785–792</pages>
       <url hash="4b0b0e6d">C08-1099</url>
@@ -882,7 +882,7 @@
     <paper id="101">
       <title>Discourse Level Opinion Interpretation</title>
       <author><first>Swapna</first><last>Somasundaran</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <author><first>Josef</first><last>Ruppenhofer</last></author>
       <pages>801–808</pages>
       <url hash="e838937e">C08-1101</url>
@@ -892,7 +892,7 @@
       <title>Acquiring Sense Tagged Examples using Relevance Feedback</title>
       <author><first>Mark</first><last>Stevenson</last></author>
       <author><first>Yikun</first><last>Guo</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <pages>809–816</pages>
       <url hash="92c1b024">C08-1102</url>
       <bibkey>stevenson-etal-2008-acquiring</bibkey>
@@ -900,7 +900,7 @@
     <paper id="103">
       <title>Topic Identification for Fine-Grained Opinion Analysis</title>
       <author><first>Veselin</first><last>Stoyanov</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>817–824</pages>
       <url hash="656bb2ab">C08-1103</url>
       <bibkey>stoyanov-cardie-2008-topic</bibkey>
@@ -915,7 +915,7 @@
     </paper>
     <paper id="105">
       <title>Prediction of Maximal Projection for Semantic Role Labeling</title>
-      <author><first>Weiwei</first><last>Sun</last></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last></author>
       <author><first>Zhifang</first><last>Sui</last></author>
       <author><first>Haifeng</first><last>Wang</last></author>
       <pages>833–840</pages>
@@ -928,7 +928,7 @@
       <author><first>Louis-Philippe</first><last>Morency</last></author>
       <author><first>Daisuke</first><last>Okanohara</last></author>
       <author><first>Yoshimasa</first><last>Tsuruoka</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>841–848</pages>
       <url hash="143b79fc">C08-1106</url>
       <bibkey>sun-etal-2008-modeling</bibkey>
@@ -944,15 +944,15 @@
     <paper id="108">
       <title>Experiments with Reasoning for Temporal Relations between Events</title>
       <author><first>Marta</first><last>Tatu</last></author>
-      <author><first>Munirathnam</first><last>Srikanth</last></author>
+      <author id="munirathnam-srikanth"><first>Munirathnam</first><last>Srikanth</last></author>
       <pages>857–864</pages>
       <url hash="991ce583">C08-1108</url>
       <bibkey>tatu-srikanth-2008-experiments</bibkey>
     </paper>
     <paper id="109">
       <title>The Ups and Downs of Preposition Error Detection in <fixed-case>ESL</fixed-case> Writing</title>
-      <author><first>Joel R.</first><last>Tetreault</last></author>
-      <author><first>Martin</first><last>Chodorow</last></author>
+      <author id="joel-tetreault"><first>Joel R.</first><last>Tetreault</last></author>
+      <author id="martin-chodorow"><first>Martin</first><last>Chodorow</last></author>
       <pages>865–872</pages>
       <url hash="6e9ebc67">C08-1109</url>
       <bibkey>tetreault-chodorow-2008-ups</bibkey>
@@ -960,7 +960,7 @@
     <paper id="110">
       <title>A Framework for Identifying Textual Redundancy</title>
       <author><first>Kapil</first><last>Thadani</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>873–880</pages>
       <url hash="295acbc9">C08-1110</url>
       <bibkey>thadani-mckeown-2008-framework</bibkey>
@@ -969,7 +969,7 @@
       <title>Emotion Classification Using Massive Examples Extracted from the Web</title>
       <author><first>Ryoko</first><last>Tokuhisa</last></author>
       <author><first>Kentaro</first><last>Inui</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>881–888</pages>
       <url hash="5298a9d6">C08-1111</url>
       <bibkey>tokuhisa-etal-2008-emotion</bibkey>
@@ -977,7 +977,7 @@
     <paper id="112">
       <title>Relational-Realizational Parsing</title>
       <author><first>Reut</first><last>Tsarfaty</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <pages>889–896</pages>
       <url hash="49f21188">C08-1112</url>
       <bibkey>tsarfaty-simaan-2008-relational</bibkey>
@@ -988,14 +988,14 @@
       <author><first>Hisashi</first><last>Kashima</last></author>
       <author><first>Shinsuke</first><last>Mori</last></author>
       <author><first>Hiroki</first><last>Oda</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>897–904</pages>
       <url hash="6c5fcd58">C08-1113</url>
       <bibkey>tsuboi-etal-2008-training</bibkey>
     </paper>
     <paper id="114">
       <title>A Uniform Approach to Analogies, Synonyms, Antonyms, and Associations</title>
-      <author><first>Peter</first><last>Turney</last></author>
+      <author id="peter-turney"><first>Peter</first><last>Turney</last></author>
       <pages>905–912</pages>
       <url hash="a8a7c429">C08-1114</url>
       <bibkey>turney-2008-uniform</bibkey>
@@ -1005,7 +1005,7 @@
       <author><first>Nicola</first><last>Ueffing</last></author>
       <author><first>Jens</first><last>Stephan</last></author>
       <author><first>Evgeny</first><last>Matusov</last></author>
-      <author><first>Loïc</first><last>Dugast</last></author>
+      <author id="loic-dugast"><first>Loïc</first><last>Dugast</last></author>
       <author><first>George</first><last>Foster</last></author>
       <author><first>Roland</first><last>Kuhn</last></author>
       <author><first>Jean</first><last>Senellart</last></author>
@@ -1018,21 +1018,21 @@
       <title>Class-Driven Attribute Extraction</title>
       <author><first>Benjamin</first><last>Van Durme</last></author>
       <author><first>Ting</first><last>Qian</last></author>
-      <author><first>Lenhart</first><last>Schubert</last></author>
+      <author id="lenhart-schubert"><first>Lenhart</first><last>Schubert</last></author>
       <pages>921–928</pages>
       <url hash="f9342997">C08-1116</url>
       <bibkey>van-durme-etal-2008-class</bibkey>
     </paper>
     <paper id="117">
       <title>Using Three Way Data for Word Sense Discrimination</title>
-      <author><first>Tim</first><last>Van de Cruys</last></author>
+      <author id="tim-van-de-cruys"><first>Tim</first><last>Van de Cruys</last></author>
       <pages>929–936</pages>
       <url hash="d66bcaca">C08-1117</url>
       <bibkey>van-de-cruys-2008-using</bibkey>
     </paper>
     <paper id="118">
       <title>Source Language Markers in <fixed-case>EUROPARL</fixed-case> Translations</title>
-      <author><first>Hans</first><last>van Halteren</last></author>
+      <author id="hans-van-halteren"><first>Hans</first><last>van Halteren</last></author>
       <pages>937–944</pages>
       <url hash="1afa69fb">C08-1118</url>
       <bibkey>van-halteren-2008-source</bibkey>
@@ -1048,9 +1048,9 @@
     <paper id="120">
       <title>Using Syntactic Information for Improving Why-Question Answering</title>
       <author><first>Suzan</first><last>Verberne</last></author>
-      <author><first>Lou</first><last>Boves</last></author>
+      <author id="lou-boves"><first>Lou</first><last>Boves</last></author>
       <author><first>Nelleke</first><last>Oostdijk</last></author>
-      <author><first>Peter-Arno</first><last>Coppen</last></author>
+      <author id="peter-arno-coppen"><first>Peter-Arno</first><last>Coppen</last></author>
       <pages>953–960</pages>
       <url hash="79ac272a">C08-1120</url>
       <bibkey>verberne-etal-2008-using</bibkey>
@@ -1059,7 +1059,7 @@
       <title>Coreference Systems Based on Kernels Methods</title>
       <author><first>Yannick</first><last>Versley</last></author>
       <author><first>Alessandro</first><last>Moschitti</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <author><first>Xiaofeng</first><last>Yang</last></author>
       <pages>961–968</pages>
       <url hash="2a53bc9e">C08-1121</url>
@@ -1075,7 +1075,7 @@
     </paper>
     <paper id="123">
       <title>Investigating the Portability of Corpus-Derived Cue Phrases for Dialogue Act Classification</title>
-      <author><first>Nick</first><last>Webb</last></author>
+      <author id="nick-webb"><first>Nick</first><last>Webb</last></author>
       <author><first>Ting</first><last>Liu</last></author>
       <pages>977–984</pages>
       <url hash="9f14ee37">C08-1123</url>
@@ -1083,7 +1083,7 @@
     </paper>
     <paper id="124">
       <title>Extractive Summarization Using Supervised and Semi-Supervised Learning</title>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <author><first>Mingli</first><last>Wu</last></author>
       <author><first>Wenjie</first><last>Li</last></author>
       <pages>985–992</pages>
@@ -1094,7 +1094,7 @@
       <title>Domain Adaptation for Statistical Machine Translation with Domain Dictionary and Monolingual Corpora</title>
       <author><first>Hua</first><last>Wu</last></author>
       <author><first>Haifeng</first><last>Wang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>993–1000</pages>
       <url hash="8ebe6045">C08-1125</url>
       <bibkey>wu-etal-2008-domain</bibkey>
@@ -1108,9 +1108,9 @@
     </paper>
     <paper id="127">
       <title>Linguistically Annotated <fixed-case>BTG</fixed-case> for Statistical Machine Translation</title>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Min</first><last>Zhang</last></author>
-      <author><first>Aiti</first><last>Aw</last></author>
+      <author id="aiti-aw"><first>Aiti</first><last>Aw</last></author>
       <author><first>Haizhou</first><last>Li</last></author>
       <pages>1009–1016</pages>
       <url hash="dd7f83b2">C08-1127</url>
@@ -1121,7 +1121,7 @@
       <author><first>Jia</first><last>Xu</last></author>
       <author><first>Jianfeng</first><last>Gao</last></author>
       <author><first>Kristina</first><last>Toutanova</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>1017–1024</pages>
       <url hash="a378d309">C08-1128</url>
       <bibkey>xu-etal-2008-bayesian</bibkey>
@@ -1129,8 +1129,8 @@
     <paper id="129">
       <title>Switching to Real-Time Tasks in Multi-Tasking Dialogue</title>
       <author><first>Fan</first><last>Yang</last></author>
-      <author><first>Peter A.</first><last>Heeman</last></author>
-      <author><first>Andrew</first><last>Kun</last></author>
+      <author id="peter-a-heeman"><first>Peter A.</first><last>Heeman</last></author>
+      <author id="andrew-l-kun"><first>Andrew</first><last>Kun</last></author>
       <pages>1025–1032</pages>
       <url hash="4c99018f">C08-1129</url>
       <bibkey>yang-etal-2008-switching</bibkey>
@@ -1138,8 +1138,8 @@
     <paper id="130">
       <title><fixed-case>C</fixed-case>hinese Term Extraction Using Minimal Resources</title>
       <author><first>Yuhang</first><last>Yang</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <pages>1033–1040</pages>
       <url hash="1899c15f">C08-1130</url>
       <bibkey>yang-etal-2008-chinese</bibkey>
@@ -1147,7 +1147,7 @@
     <paper id="131">
       <title>Measuring and Predicting Orthographic Associations: Modelling the Similarity of <fixed-case>J</fixed-case>apanese Kanji</title>
       <author><first>Lars</first><last>Yencken</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>1041–1048</pages>
       <url hash="3a674e8e">C08-1131</url>
       <bibkey>yencken-baldwin-2008-measuring</bibkey>
@@ -1163,9 +1163,9 @@
     </paper>
     <paper id="133">
       <title><fixed-case>O</fixed-case>nto<fixed-case>N</fixed-case>otes: Corpus Cleanup of Mistaken Agreement Using Word Sense Disambiguation</title>
-      <author><first>Liang-Chih</first><last>Yu</last></author>
+      <author id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last></author>
       <author><first>Chung-Hsien</first><last>Wu</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>1057–1064</pages>
       <url hash="c347eba8">C08-1133</url>
       <bibkey>yu-etal-2008-ontonotes</bibkey>
@@ -1181,7 +1181,7 @@
     <paper id="135">
       <title>Automatic Seed Word Selection for Unsupervised Sentiment Classification of <fixed-case>C</fixed-case>hinese Text</title>
       <author><first>Taras</first><last>Zagibalov</last></author>
-      <author><first>John</first><last>Carroll</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
       <pages>1073–1080</pages>
       <url hash="d33eaf54">C08-1135</url>
       <bibkey>zagibalov-carroll-2008-automatic</bibkey>
@@ -1198,7 +1198,7 @@
     <paper id="137">
       <title>Sentence Type Based Reordering Model for Statistical Machine Translation</title>
       <author><first>Jiajun</first><last>Zhang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <author><first>Shoushan</first><last>Li</last></author>
       <pages>1089–1096</pages>
       <url hash="05e1b115">C08-1137</url>
@@ -1209,7 +1209,7 @@
       <author><first>Min</first><last>Zhang</last></author>
       <author><first>Hongfei</first><last>Jiang</last></author>
       <author><first>Haizhou</first><last>Li</last></author>
-      <author><first>Aiti</first><last>Aw</last></author>
+      <author id="aiti-aw"><first>Aiti</first><last>Aw</last></author>
       <author><first>Sheng</first><last>Li</last></author>
       <pages>1097–1104</pages>
       <url hash="ba5490cd">C08-1138</url>
@@ -1238,7 +1238,7 @@
       <author><first>Shujie</first><last>Liu</last></author>
       <author><first>Mu</first><last>Li</last></author>
       <author><first>Dongdong</first><last>Zhang</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <pages>1121–1128</pages>
       <url hash="9792a7b9">C08-1141</url>
       <bibkey>zhou-etal-2008-diagnostic</bibkey>
@@ -1247,7 +1247,7 @@
       <title>Multi-Criteria-Based Strategy to Stop Active Learning for Data Annotation</title>
       <author><first>Jingbo</first><last>Zhu</last></author>
       <author><first>Huizhen</first><last>Wang</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>1129–1136</pages>
       <url hash="8a51d8ce">C08-1142</url>
       <bibkey>zhu-etal-2008-multi</bibkey>
@@ -1257,7 +1257,7 @@
       <author><first>Jingbo</first><last>Zhu</last></author>
       <author><first>Huizhen</first><last>Wang</last></author>
       <author><first>Tianshun</first><last>Yao</last></author>
-      <author><first>Benjamin K</first><last>Tsou</last></author>
+      <author id="benjamin-k-tsou"><first>Benjamin K</first><last>Tsou</last></author>
       <pages>1137–1144</pages>
       <url hash="bb1bd5d1">C08-1143</url>
       <bibkey>zhu-etal-2008-active</bibkey>
@@ -1266,7 +1266,7 @@
       <title>A Systematic Comparison of Phrase-Based, Hierarchical and Syntax-Augmented Statistical <fixed-case>MT</fixed-case></title>
       <author><first>Andreas</first><last>Zollmann</last></author>
       <author><first>Ashish</first><last>Venugopal</last></author>
-      <author><first>Franz</first><last>Och</last></author>
+      <author id="franz-josef-och"><first>Franz</first><last>Och</last></author>
       <author><first>Jay</first><last>Ponte</last></author>
       <pages>1145–1152</pages>
       <url hash="c6699290">C08-1144</url>
@@ -1284,7 +1284,7 @@
   <volume id="2" type="proceedings">
     <meta>
       <booktitle>Coling 2008: Companion volume: Posters</booktitle>
-      <editor><first>Donia</first><last>Scott</last></editor>
+      <editor id="donia-scott"><first>Donia</first><last>Scott</last></editor>
       <editor><first>Hans</first><last>Uszkoreit</last></editor>
       <publisher>Coling 2008 Organizing Committee</publisher>
       <address>Manchester, UK</address>
@@ -1298,16 +1298,16 @@
     </frontmatter>
     <paper id="1">
       <title>Metaphor in Textual Entailment</title>
-      <author><first>Rodrigo</first><last>Agerri</last></author>
+      <author id="rodrigo-agerri"><first>Rodrigo</first><last>Agerri</last></author>
       <pages>3–6</pages>
       <url hash="a12ef0a1">C08-2001</url>
       <bibkey>agerri-2008-metaphor</bibkey>
     </paper>
     <paper id="2">
       <title>Distilling Opinion in Discourse: A Preliminary Study</title>
-      <author><first>Nicholas</first><last>Asher</last></author>
-      <author><first>Farah</first><last>Benamara</last></author>
-      <author><first>Yvette Yannick</first><last>Mathieu</last></author>
+      <author id="nicholas-asher"><first>Nicholas</first><last>Asher</last></author>
+      <author id="farah-benamara"><first>Farah</first><last>Benamara</last></author>
+      <author id="yannick-mathieu"><first>Yvette Yannick</first><last>Mathieu</last></author>
       <pages>7–10</pages>
       <url hash="c0370a6c">C08-2002</url>
       <bibkey>asher-etal-2008-distilling</bibkey>
@@ -1324,7 +1324,7 @@
     <paper id="4">
       <title>The Power of Negative Thinking: Exploiting Label Disagreement in the <fixed-case>M</fixed-case>in-cut Classification Framework</title>
       <author><first>Mohit</first><last>Bansal</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <author><first>Lillian</first><last>Lee</last></author>
       <pages>15–18</pages>
       <url hash="343f0cf0">C08-2004</url>
@@ -1333,7 +1333,7 @@
     <paper id="5">
       <title>Phrasal Segmentation Models for Statistical Machine Translation</title>
       <author><first>Graeme</first><last>Blackwood</last></author>
-      <author><first>Adrià</first><last>de Gispert</last></author>
+      <author id="adria-de-gispert"><first>Adrià</first><last>de Gispert</last></author>
       <author id="bill-byrne"><first>William</first><last>Byrne</last></author>
       <pages>19–22</pages>
       <url hash="27812dfa">C08-2005</url>
@@ -1342,8 +1342,8 @@
     <paper id="6">
       <title>A Scalable <fixed-case>MMR</fixed-case> Approach to Sentence Scoring for Multi-Document Update Summarization</title>
       <author><first>Florian</first><last>Boudin</last></author>
-      <author><first>Marc</first><last>El-Bèze</last></author>
-      <author><first>Juan-Manuel</first><last>Torres-Moreno</last></author>
+      <author id="marc-el-beze"><first>Marc</first><last>El-Bèze</last></author>
+      <author id="juan-manuel-torres-moreno"><first>Juan-Manuel</first><last>Torres-Moreno</last></author>
       <pages>23–26</pages>
       <url hash="04417c54">C08-2006</url>
       <bibkey>boudin-etal-2008-scalable</bibkey>
@@ -1353,17 +1353,17 @@
       <author><first>Debasri</first><last>Chakrabarti</last></author>
       <author><first>Hemang</first><last>Mandalia</last></author>
       <author><first>Ritwik</first><last>Priya</last></author>
-      <author><first>Vaijayanthi</first><last>Sarma</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="vaijayanthi-m-sarma"><first>Vaijayanthi</first><last>Sarma</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>27–30</pages>
       <url hash="de6dbe0e">C08-2007</url>
       <bibkey>chakrabarti-etal-2008-hindi</bibkey>
     </paper>
     <paper id="8">
       <title>Detecting Erroneous Uses of Complex Postpositions in an Agglutinative Language</title>
-      <author><first>Arantza</first><last>Díaz de Ilarraza</last></author>
-      <author><first>Koldo</first><last>Gojenola</last></author>
-      <author><first>Maite</first><last>Oronoz</last></author>
+      <author id="arantza-diaz-de-ilarraza"><first>Arantza</first><last>Díaz de Ilarraza</last></author>
+      <author id="koldo-gojenola"><first>Koldo</first><last>Gojenola</last></author>
+      <author id="maite-oronoz"><first>Maite</first><last>Oronoz</last></author>
       <pages>31–34</pages>
       <url hash="1c7bd6ee">C08-2008</url>
       <bibkey>diaz-de-ilarraza-etal-2008-detecting</bibkey>
@@ -1378,8 +1378,8 @@
     </paper>
     <paper id="10">
       <title>The Impact of Reference Quality on Automatic <fixed-case>MT</fixed-case> Evaluation</title>
-      <author><first>Olivier</first><last>Hamon</last></author>
-      <author><first>Djamel</first><last>Mostefa</last></author>
+      <author id="olivier-hamon"><first>Olivier</first><last>Hamon</last></author>
+      <author id="djamel-mostefa"><first>Djamel</first><last>Mostefa</last></author>
       <pages>39–42</pages>
       <url hash="87b2a2c0">C08-2010</url>
       <bibkey>hamon-mostefa-2008-impact</bibkey>
@@ -1388,22 +1388,22 @@
       <title>Word Sense Disambiguation for All Words using Tree-Structured Conditional Random Fields</title>
       <author><first>Jun</first><last>Hatori</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>43–46</pages>
       <url hash="3f3d5af6">C08-2011</url>
       <bibkey>hatori-etal-2008-word</bibkey>
     </paper>
     <paper id="12">
       <title><fixed-case>ILP</fixed-case>-based Conceptual Analysis for <fixed-case>C</fixed-case>hinese <fixed-case>NP</fixed-case>s</title>
-      <author><first>Paul D.</first><last>Ji</last></author>
-      <author><first>Stephen G.</first><last>Pulman</last></author>
+      <author id="paul-d-ji"><first>Paul D.</first><last>Ji</last></author>
+      <author id="stephen-pulman"><first>Stephen G.</first><last>Pulman</last></author>
       <pages>47–50</pages>
       <url hash="22d6de09">C08-2012</url>
       <bibkey>ji-pulman-2008-ilp</bibkey>
     </paper>
     <paper id="13">
       <title>Scaling up Analogical Learning</title>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <author><first>François</first><last>Yvon</last></author>
       <pages>51–54</pages>
       <url hash="0b6f9867">C08-2013</url>
@@ -1428,7 +1428,7 @@
     <paper id="16">
       <title>Exact Inference for Multi-label Classification using Sparse Graphical Models</title>
       <author><first>Yusuke</first><last>Miyao</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>63–66</pages>
       <url hash="e86d7f56">C08-2016</url>
       <bibkey>miyao-tsujii-2008-exact</bibkey>
@@ -1436,9 +1436,9 @@
     <paper id="17">
       <title>Modelling Multilinguality in Ontologies</title>
       <author><first>Elena</first><last>Montiel-Ponsoda</last></author>
-      <author><first>Guadalupe</first><last>Aguado de Cea</last></author>
-      <author><first>Asunción</first><last>Gómez-Pérez</last></author>
-      <author><first>Wim</first><last>Peters</last></author>
+      <author id="guadalupe-aguado-de-cea"><first>Guadalupe</first><last>Aguado de Cea</last></author>
+      <author id="asuncion-gomez-perez"><first>Asunción</first><last>Gómez-Pérez</last></author>
+      <author id="wim-peters"><first>Wim</first><last>Peters</last></author>
       <pages>67–70</pages>
       <url hash="8012d767">C08-2017</url>
       <bibkey>montiel-ponsoda-etal-2008-modelling</bibkey>
@@ -1461,10 +1461,10 @@
     </paper>
     <paper id="20">
       <title>Generation under Space Constraints</title>
-      <author><first>Cécile</first><last>Paris</last></author>
+      <author id="cecile-paris"><first>Cécile</first><last>Paris</last></author>
       <author><first>Nathalie</first><last>Colineau</last></author>
       <author><first>Andrew</first><last>Lampert</last></author>
-      <author><first>Joan</first><last>Giralt Duran</last></author>
+      <author id="joan-giralt-duran"><first>Joan</first><last>Giralt Duran</last></author>
       <pages>79–82</pages>
       <url hash="29b3eaf0">C08-2020</url>
       <bibkey>paris-etal-2008-generation</bibkey>
@@ -1486,7 +1486,7 @@
       <author><first>Hena</first><last>Mehta</last></author>
       <author><first>Ani</first><last>Nenkova</last></author>
       <author><first>Alan</first><last>Lee</last></author>
-      <author><first>Aravind</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
       <pages>87–90</pages>
       <url hash="19dc64e2">C08-2022</url>
       <bibkey>pitler-etal-2008-easily</bibkey>
@@ -1494,29 +1494,29 @@
     <paper id="23">
       <title>Rank Distance as a Stylistic Similarity</title>
       <author><first>Marius</first><last>Popescu</last></author>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <pages>91–94</pages>
       <url hash="f6bfd982">C08-2023</url>
       <bibkey>popescu-dinu-2008-rank</bibkey>
     </paper>
     <paper id="24">
       <title>Integrating Motion Predicate Classes with Spatial and Temporal Annotations</title>
-      <author><first>James</first><last>Pustejovsky</last></author>
-      <author><first>Jessica L.</first><last>Moszkowicz</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
+      <author id="jessica-moszkowicz"><first>Jessica L.</first><last>Moszkowicz</last></author>
       <pages>95–98</pages>
       <url hash="b352926b">C08-2024</url>
       <bibkey>pustejovsky-moszkowicz-2008-integrating</bibkey>
     </paper>
     <paper id="25">
       <title>On the Weak Generative Capacity of Weighted Context-free Grammars</title>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>99–102</pages>
       <url hash="7df6ed18">C08-2025</url>
       <bibkey>sogaard-2008-weak</bibkey>
     </paper>
     <paper id="26">
       <title>Range Concatenation Grammars for Translation</title>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>103–106</pages>
       <url hash="db177125">C08-2026</url>
       <bibkey>sogaard-2008-range</bibkey>
@@ -1524,7 +1524,7 @@
     <paper id="27">
       <title>Comparative Evaluation of <fixed-case>A</fixed-case>rabic Language Morphological Analysers and Stemmers</title>
       <author><first>Majdi</first><last>Sawalha</last></author>
-      <author><first>Eric</first><last>Atwell</last></author>
+      <author id="eric-atwell"><first>Eric</first><last>Atwell</last></author>
       <pages>107–110</pages>
       <url hash="f490df06">C08-2027</url>
       <bibkey>sawalha-atwell-2008-comparative</bibkey>
@@ -1532,7 +1532,7 @@
     <paper id="28">
       <title>A Complete and Modestly Funny System for Generating and Performing <fixed-case>J</fixed-case>apanese Stand-Up Comedy</title>
       <author><first>Jonas</first><last>Sjöbergh</last></author>
-      <author><first>Kenji</first><last>Araki</last></author>
+      <author id="kenji-araki"><first>Kenji</first><last>Araki</last></author>
       <pages>111–114</pages>
       <url hash="7a397ff0">C08-2028</url>
       <bibkey>sjobergh-araki-2008-complete</bibkey>
@@ -1569,8 +1569,8 @@
     <paper id="32">
       <title>Building a Bilingual Lexicon Using Phrase-based Statistical Machine Translation via a Pivot Language</title>
       <author><first>Takashi</first><last>Tsunakawa</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>127–130</pages>
       <url hash="21516b4e">C08-2032</url>
       <bibkey>tsunakawa-etal-2008-building</bibkey>
@@ -1605,7 +1605,7 @@
     <meta>
       <booktitle>Coling 2008: Companion volume: Demonstrations</booktitle>
       <editor><first>Allan</first><last>Ramsay</last></editor>
-      <editor><first>Kalina</first><last>Bontcheva</last></editor>
+      <editor id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></editor>
       <publisher>Coling 2008 Organizing Committee</publisher>
       <address>Manchester, UK</address>
       <month>August</month>
@@ -1622,7 +1622,7 @@
       <author><first>Jakub</first><last>Piskorski</last></author>
       <author><first>Bruno</first><last>Pouliquen</last></author>
       <author><first>Ralf</first><last>Steinberger</last></author>
-      <author><first>Hristo</first><last>Tanev</last></author>
+      <author id="hristo-tanev"><first>Hristo</first><last>Tanev</last></author>
       <author><first>Vanni</first><last>Zavarella</last></author>
       <pages>145–148</pages>
       <url hash="85fc93b7">C08-3001</url>
@@ -1631,7 +1631,7 @@
     <paper id="2">
       <title>A Grammar Checking System for <fixed-case>P</fixed-case>unjabi</title>
       <author><first>Mandeep Singh</first><last>Gill</last></author>
-      <author><first>Gurpreet Singh</first><last>Lehal</last></author>
+      <author id="gurpreet-singh-lehal"><first>Gurpreet Singh</first><last>Lehal</last></author>
       <pages>149–152</pages>
       <url hash="54769f67">C08-3002</url>
       <bibkey>gill-lehal-2008-grammar</bibkey>
@@ -1639,7 +1639,7 @@
     <paper id="3">
       <title>A Toolchain for Grammarians</title>
       <author><first>Bruno</first><last>Guillaume</last></author>
-      <author><first>Joseph</first><last>Le Roux</last></author>
+      <author id="joseph-le-roux"><first>Joseph</first><last>Le Roux</last></author>
       <author><first>Jonathan</first><last>Marchand</last></author>
       <author><first>Guy</first><last>Perrier</last></author>
       <author><first>Karën</first><last>Fort</last></author>
@@ -1651,7 +1651,7 @@
     <paper id="4">
       <title>A <fixed-case>P</fixed-case>unjabi To <fixed-case>H</fixed-case>indi Machine Translation System</title>
       <author><first>Gurpreet Singh</first><last>Josan</last></author>
-      <author><first>Gurpreet Singh</first><last>Lehal</last></author>
+      <author id="gurpreet-singh-lehal"><first>Gurpreet Singh</first><last>Lehal</last></author>
       <pages>157–160</pages>
       <url hash="3bd35c1f">C08-3004</url>
       <bibkey>josan-lehal-2008-punjabi</bibkey>
@@ -1660,19 +1660,19 @@
       <title>“Build Your Own” Spoken Dialogue Systems: Automatically Generating <fixed-case>ISU</fixed-case> Dialogue Systems from Business User Resources</title>
       <author><first>Oliver</first><last>Lemon</last></author>
       <author><first>Xingkun</first><last>Liu</last></author>
-      <author><first>Helen</first><last>Hastie</last></author>
+      <author id="helen-hastie"><first>Helen</first><last>Hastie</last></author>
       <pages>161–164</pages>
       <url hash="09689bdc">C08-3005</url>
       <bibkey>lemon-etal-2008-build</bibkey>
     </paper>
     <paper id="6">
       <title>Multilingual Mobile-Phone Translation Services for World Travelers</title>
-      <author><first>Michael</first><last>Paul</last></author>
+      <author id="michael-paul"><first>Michael</first><last>Paul</last></author>
       <author><first>Hideo</first><last>Okuma</last></author>
-      <author><first>Hirofumi</first><last>Yamamoto</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="hirofumi-yamamoto"><first>Hirofumi</first><last>Yamamoto</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Shigeki</first><last>Matsuda</last></author>
-      <author><first>Tohru</first><last>Shimizu</last></author>
+      <author id="tohru-shimizu"><first>Tohru</first><last>Shimizu</last></author>
       <author><first>Satoshi</first><last>Nakamura</last></author>
       <pages>165–168</pages>
       <url hash="5566b37a">C08-3006</url>
@@ -1681,7 +1681,7 @@
     <paper id="7">
       <title>Multilingual Assistant for Medical Diagnosing and Drug Prescription Based on Category Ranking</title>
       <author><first>Fernando</first><last>Ruiz-Rico</last></author>
-      <author><first>Jose-Luis</first><last>Vicedo</last></author>
+      <author id="jose-luis-vicedo"><first>Jose-Luis</first><last>Vicedo</last></author>
       <author><first>María-Consuelo</first><last>Rubio-Sánchez</last></author>
       <pages>169–172</pages>
       <url hash="23c5cd8b">C08-3007</url>
@@ -1690,12 +1690,12 @@
     <paper id="8">
       <title>Entailment-based Question Answering for Structured Data</title>
       <author><first>Bogdan</first><last>Sacaleanu</last></author>
-      <author><first>Constantin</first><last>Orasan</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orasan</last></author>
       <author><first>Christian</first><last>Spurk</last></author>
       <author><first>Shiyan</first><last>Ou</last></author>
-      <author><first>Oscar</first><last>Ferrandez</last></author>
+      <author id="oscar-ferrandez"><first>Oscar</first><last>Ferrandez</last></author>
       <author><first>Milen</first><last>Kouylekov</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <pages>173–176</pages>
       <url hash="04c0bac7">C08-3008</url>
       <bibkey>sacaleanu-etal-2008-entailment</bibkey>
@@ -1703,7 +1703,7 @@
     <paper id="9">
       <title>Shahmukhi to Gurmukhi Transliteration System</title>
       <author><first>Tejinder Singh</first><last>Saini</last></author>
-      <author><first>Gurpreet Singh</first><last>Lehal</last></author>
+      <author id="gurpreet-singh-lehal"><first>Gurpreet Singh</first><last>Lehal</last></author>
       <author><first>Virinder S</first><last>Kalra</last></author>
       <pages>177–180</pages>
       <url hash="c715e449">C08-3009</url>
@@ -1726,7 +1726,7 @@
     <paper id="12">
       <title>Temporal Processing with the <fixed-case>TARSQI</fixed-case> Toolkit</title>
       <author><first>Marc</first><last>Verhagen</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <pages>189–192</pages>
       <url hash="471e9432">C08-3012</url>
       <bibkey>verhagen-pustejovsky-2008-temporal</bibkey>
diff --git a/data/xml/C10.xml b/data/xml/C10.xml
index e2147b043e..1c0358d93d 100644
--- a/data/xml/C10.xml
+++ b/data/xml/C10.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the 23rd International Conference on Computational Linguistics (Coling 2010)</booktitle>
       <url hash="2f1887af">C10-1</url>
       <editor><first>Chu-Ren</first><last>Huang</last></editor>
-      <editor><first>Dan</first><last>Jurafsky</last></editor>
+      <editor id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></editor>
       <publisher>Coling 2010 Organizing Committee</publisher>
       <address>Beijing, China</address>
       <month>August</month>
@@ -19,7 +19,7 @@
     <paper id="1">
       <title>Testing <fixed-case>SDRT</fixed-case>’s Right Frontier</title>
       <author><first>Stergos</first><last>Afantenos</last></author>
-      <author><first>Nicholas</first><last>Asher</last></author>
+      <author id="nicholas-asher"><first>Nicholas</first><last>Asher</last></author>
       <pages>1–9</pages>
       <url hash="6b3ba1f1">C10-1001</url>
       <bibkey>afantenos-asher-2010-testing</bibkey>
@@ -36,7 +36,7 @@
       <title>Robust Measurement and Comparison of Context Similarity for Finding Translation Pairs</title>
       <author><first>Daniel</first><last>Andrade</last></author>
       <author><first>Tetsuya</first><last>Nasukawa</last></author>
-      <author><first>Junichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Junichi</first><last>Tsujii</last></author>
       <pages>19–27</pages>
       <url hash="78df96be">C10-1003</url>
       <bibkey>andrade-etal-2010-robust</bibkey>
@@ -44,8 +44,8 @@
     <paper id="4">
       <title>Multilingual Subjectivity: Are More Languages Better?</title>
       <author><first>Carmen</first><last>Banea</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <pages>28–36</pages>
       <url hash="ebf01455">C10-1004</url>
       <bibkey>banea-etal-2010-multilingual</bibkey>
@@ -54,15 +54,15 @@
       <title>Plagiarism Detection across Distant Language Pairs</title>
       <author><first>Alberto</first><last>Barrón-Cedeño</last></author>
       <author><first>Paolo</first><last>Rosso</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>Gorka</first><last>Labaka</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
+      <author id="gorka-labaka"><first>Gorka</first><last>Labaka</last></author>
       <pages>37–45</pages>
       <url hash="92433e5d">C10-1005</url>
       <bibkey>barron-cedeno-etal-2010-plagiarism</bibkey>
     </paper>
     <paper id="6">
       <title>Automatic Detection of Non-deverbal Event Nouns for Quick Lexicon Production</title>
-      <author><first>Nuria</first><last>Bel</last></author>
+      <author id="nuria-bel"><first>Nuria</first><last>Bel</last></author>
       <author><first>Maria</first><last>Coll</last></author>
       <author><first>Gabriela</first><last>Resnik</last></author>
       <pages>46–52</pages>
@@ -88,7 +88,7 @@
     <paper id="9">
       <title>Fluency Constraints for Minimum <fixed-case>B</fixed-case>ayes-Risk Decoding of Statistical Machine Translation Lattices</title>
       <author><first>Graeme</first><last>Blackwood</last></author>
-      <author><first>Adrià</first><last>de Gispert</last></author>
+      <author id="adria-de-gispert"><first>Adrià</first><last>de Gispert</last></author>
       <author id="bill-byrne"><first>William</first><last>Byrne</last></author>
       <pages>71–79</pages>
       <url hash="580579fd">C10-1009</url>
@@ -96,8 +96,8 @@
     </paper>
     <paper id="10">
       <title>Self-Annotation for fine-grained geospatial relation extraction</title>
-      <author><first>Andre</first><last>Blessing</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="andre-blessing"><first>Andre</first><last>Blessing</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>80–88</pages>
       <url hash="0a16cae6">C10-1010</url>
       <bibkey>blessing-schutze-2010-self</bibkey>
@@ -122,7 +122,7 @@
     <paper id="13">
       <title>Towards an optimal weighting of context words based on distance</title>
       <author><first>Bernard</first><last>Brosseau-Villeneuve</last></author>
-      <author><first>Jian-Yun</first><last>Nie</last></author>
+      <author id="jian-yun-nie"><first>Jian-Yun</first><last>Nie</last></author>
       <author><first>Noriko</first><last>Kando</last></author>
       <pages>107–115</pages>
       <url hash="29bf96f6">C10-1013</url>
@@ -139,7 +139,7 @@
     </paper>
     <paper id="15">
       <title>A Utility-Driven Approach to Question Ranking in Social <fixed-case>QA</fixed-case></title>
-      <author><first>Razvan</first><last>Bunescu</last></author>
+      <author id="razvan-bunescu"><first>Razvan</first><last>Bunescu</last></author>
       <author><first>Yunfeng</first><last>Huang</last></author>
       <pages>125–133</pages>
       <url hash="41218112">C10-1015</url>
@@ -190,7 +190,7 @@
     <paper id="21">
       <title>Emotion Cause Detection with Linguistic Constructions</title>
       <author><first>Ying</first><last>Chen</last></author>
-      <author><first>Sophia Yat Mei</first><last>Lee</last></author>
+      <author id="sophia-yat-mei-lee"><first>Sophia Yat Mei</first><last>Lee</last></author>
       <author><first>Shoushan</first><last>Li</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <pages>179–187</pages>
@@ -201,7 +201,7 @@
       <title>A Twin-Candidate Based Approach for Event Pronoun Resolution using Composite Kernel</title>
       <author><first>Bin</first><last>Chen</last></author>
       <author><first>Jian</first><last>Su</last></author>
-      <author><first>Chew Lim</first><last>Tan</last></author>
+      <author id="chew-lim-tan"><first>Chew Lim</first><last>Tan</last></author>
       <pages>188–196</pages>
       <url hash="dae02d0f">C10-1022</url>
       <bibkey>chen-etal-2010-twin</bibkey>
@@ -217,7 +217,7 @@
     <paper id="24">
       <title>Simplicity is Better: Revisiting Single Kernel <fixed-case>PPI</fixed-case> Extraction</title>
       <author><first>Sung-Pil</first><last>Choi</last></author>
-      <author><first>Sung-Hyon</first><last>Myaeng</last></author>
+      <author id="sung-hyon-myaeng"><first>Sung-Hyon</first><last>Myaeng</last></author>
       <pages>206–214</pages>
       <url hash="eb7db4f7">C10-1024</url>
       <bibkey>choi-myaeng-2010-simplicity</bibkey>
@@ -226,13 +226,13 @@
       <title>An ontology-driven system for detecting global health events</title>
       <author><first>Nigel</first><last>Collier</last></author>
       <author><first>Reiko</first><last>Matsuda Goodwin</last></author>
-      <author><first>John</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John</first><last>McCrae</last></author>
       <author><first>Son</first><last>Doan</last></author>
       <author><first>Ai</first><last>Kawazoe</last></author>
       <author><first>Mike</first><last>Conway</last></author>
       <author><first>Asanee</first><last>Kawtrakul</last></author>
       <author><first>Koichi</first><last>Takeuchi</last></author>
-      <author><first>Dinh</first><last>Dien</last></author>
+      <author id="dinh-dien"><first>Dinh</first><last>Dien</last></author>
       <pages>215–222</pages>
       <url hash="06905104">C10-1025</url>
       <bibkey>collier-etal-2010-ontology</bibkey>
@@ -247,7 +247,7 @@
     </paper>
     <paper id="27">
       <title>Local lexical adaptation in Machine Translation through triangulation: <fixed-case>SMT</fixed-case> helping <fixed-case>SMT</fixed-case></title>
-      <author><first>Josep Maria</first><last>Crego</last></author>
+      <author id="josep-m-crego"><first>Josep Maria</first><last>Crego</last></author>
       <author><first>Aurélien</first><last>Max</last></author>
       <author><first>François</first><last>Yvon</last></author>
       <pages>232–240</pages>
@@ -291,7 +291,7 @@
       <author><first>Paul</first><last>McNamee</last></author>
       <author><first>Delip</first><last>Rao</last></author>
       <author><first>Adam</first><last>Gerber</last></author>
-      <author><first>Tim</first><last>Finin</last></author>
+      <author id="tim-finin"><first>Tim</first><last>Finin</last></author>
       <pages>277–285</pages>
       <url hash="bc65d6d3">C10-1032</url>
       <bibkey>dredze-etal-2010-entity</bibkey>
@@ -310,7 +310,7 @@
       <author><first>Long</first><last>Jiang</last></author>
       <author><first>Tao</first><last>Qin</last></author>
       <author><first>Ming</first><last>Zhou</last></author>
-      <author><first>Heung-Yeung</first><last>Shum</last></author>
+      <author id="heung-yeung-shum"><first>Heung-Yeung</first><last>Shum</last></author>
       <pages>295–303</pages>
       <url hash="411c7f05">C10-1034</url>
       <bibkey>duan-etal-2010-empirical</bibkey>
@@ -352,7 +352,7 @@
     <paper id="39">
       <title><fixed-case>O</fixed-case>pinosis: A Graph Based Approach to Abstractive Summarization of Highly Redundant Opinions</title>
       <author><first>Kavita</first><last>Ganesan</last></author>
-      <author><first>ChengXiang</first><last>Zhai</last></author>
+      <author id="chengxiang-zhai"><first>ChengXiang</first><last>Zhai</last></author>
       <author><first>Jiawei</first><last>Han</last></author>
       <pages>340–348</pages>
       <url hash="b2d2bba1">C10-1039</url>
@@ -361,8 +361,8 @@
     <paper id="40">
       <title><fixed-case>EMDC</fixed-case>: A Semi-supervised Approach for Word Alignment</title>
       <author><first>Qin</first><last>Gao</last></author>
-      <author><first>Francisco</first><last>Guzman</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzman</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>349–357</pages>
       <url hash="f0cc99ee">C10-1040</url>
       <bibkey>gao-etal-2010-emdc</bibkey>
@@ -397,7 +397,7 @@
       <title>Comparing Language Similarity across Genetic and Typologically-Based Groupings</title>
       <author><first>Ryan</first><last>Georgi</last></author>
       <author><first>Fei</first><last>Xia</last></author>
-      <author><first>William</first><last>Lewis</last></author>
+      <author id="william-lewis"><first>William</first><last>Lewis</last></author>
       <pages>385–393</pages>
       <url hash="da4b7b5a">C10-1044</url>
       <bibkey>georgi-etal-2010-comparing</bibkey>
@@ -405,16 +405,16 @@
     <paper id="45">
       <title>Better <fixed-case>A</fixed-case>rabic Parsing: Baselines, Evaluations, and Analysis</title>
       <author><first>Spence</first><last>Green</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>394–402</pages>
       <url hash="36aa2037">C10-1045</url>
       <bibkey>green-manning-2010-better</bibkey>
     </paper>
     <paper id="46">
       <title>Paraphrase Alignment for Synonym Evidence Discovery</title>
-      <author><first>Gintarė</first><last>Grigonytė</last></author>
-      <author><first>João Paulo</first><last>Cordeiro</last></author>
-      <author><first>Gaël</first><last>Dias</last></author>
+      <author id="gintare-grigonyte"><first>Gintarė</first><last>Grigonytė</last></author>
+      <author id="joao-paulo-cordeiro"><first>João Paulo</first><last>Cordeiro</last></author>
+      <author id="gael-dias"><first>Gaël</first><last>Dias</last></author>
       <author><first>Rumen</first><last>Moraliyski</last></author>
       <author><first>Pavel</first><last>Brazdil</last></author>
       <pages>403–411</pages>
@@ -431,7 +431,7 @@
     </paper>
     <paper id="48">
       <title>Detection of Simple Plagiarism in Computer Science Papers</title>
-      <author><first>Yaakov</first><last>HaCohen-Kerner</last></author>
+      <author id="yaakov-hacohen-kerner"><first>Yaakov</first><last>HaCohen-Kerner</last></author>
       <author><first>Aharon</first><last>Tayeb</last></author>
       <author><first>Natan</first><last>Ben-Dror</last></author>
       <pages>421–429</pages>
@@ -440,7 +440,7 @@
     </paper>
     <paper id="49">
       <title>A Structured Vector Space Model for Hidden Attribute Meaning in Adjective-Noun Phrases</title>
-      <author><first>Matthias</first><last>Hartung</last></author>
+      <author id="matthias-hartung"><first>Matthias</first><last>Hartung</last></author>
       <author><first>Anette</first><last>Frank</last></author>
       <pages>430–438</pages>
       <url hash="cd88582a">C10-1049</url>
@@ -461,7 +461,7 @@
       <title>A Novel Reordering Model Based on Multi-layer Phrase for Statistical Machine Translation</title>
       <author><first>Yanqing</first><last>He</last></author>
       <author><first>Yu</first><last>Zhou</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <author><first>Huilin</first><last>Wang</last></author>
       <pages>447–455</pages>
       <url hash="1d2d2bdd">C10-1051</url>
@@ -470,7 +470,7 @@
     <paper id="52">
       <title>Standardizing Wordnets in the <fixed-case>ISO</fixed-case> Standard <fixed-case>LMF</fixed-case>: <fixed-case>W</fixed-case>ordnet-<fixed-case>LMF</fixed-case> for <fixed-case>G</fixed-case>erma<fixed-case>N</fixed-case>et</title>
       <author><first>Verena</first><last>Henrich</last></author>
-      <author><first>Erhard</first><last>Hinrichs</last></author>
+      <author id="erhard-hinrichs"><first>Erhard</first><last>Hinrichs</last></author>
       <pages>456–464</pages>
       <url hash="cd8b0ad4">C10-1052</url>
       <bibkey>henrich-hinrichs-2010-standardizing</bibkey>
@@ -488,7 +488,7 @@
       <author><first>Gumwon</first><last>Hong</last></author>
       <author><first>Chi-Ho</first><last>Li</last></author>
       <author><first>Ming</first><last>Zhou</last></author>
-      <author><first>Hae-Chang</first><last>Rim</last></author>
+      <author id="hae-chang-rim"><first>Hae-Chang</first><last>Rim</last></author>
       <pages>474–482</pages>
       <url hash="fd5af248">C10-1054</url>
       <bibkey>hong-etal-2010-empirical</bibkey>
@@ -497,7 +497,7 @@
       <title>Enhancing Cross Document Coreference of Web Documents with Context Similarity and Very Large Scale Text Categorization</title>
       <author><first>Jian</first><last>Huang</last></author>
       <author><first>Pucktada</first><last>Treeratpituk</last></author>
-      <author><first>Sarah</first><last>Taylor</last></author>
+      <author id="sarah-taylor"><first>Sarah</first><last>Taylor</last></author>
       <author><first>C. Lee</first><last>Giles</last></author>
       <pages>483–491</pages>
       <url hash="4a09be16">C10-1055</url>
@@ -553,24 +553,24 @@
     </paper>
     <paper id="62">
       <title>Learning to Predict Readability using Diverse Linguistic Features</title>
-      <author><first>Rohit</first><last>Kate</last></author>
-      <author><first>Xiaoqiang</first><last>Luo</last></author>
+      <author id="rohit-kate"><first>Rohit</first><last>Kate</last></author>
+      <author id="xiaoqiang-luo"><first>Xiaoqiang</first><last>Luo</last></author>
       <author><first>Siddharth</first><last>Patwardhan</last></author>
       <author><first>Martin</first><last>Franz</last></author>
-      <author><first>Radu</first><last>Florian</last></author>
-      <author><first>Raymond</first><last>Mooney</last></author>
-      <author><first>Salim</first><last>Roukos</last></author>
-      <author><first>Chris</first><last>Welty</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
+      <author id="chris-welty"><first>Chris</first><last>Welty</last></author>
       <pages>546–554</pages>
       <url hash="c4dfb630">C10-1062</url>
       <bibkey>kate-etal-2010-learning</bibkey>
     </paper>
     <paper id="63">
       <title>Value for Money: Balancing Annotation Effort, Lexicon Building and Accuracy for Multilingual <fixed-case>WSD</fixed-case></title>
-      <author><first>Mitesh</first><last>Khapra</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh</first><last>Khapra</last></author>
       <author><first>Saurabh</first><last>Sohoney</last></author>
       <author><first>Anup</first><last>Kulkarni</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>555–563</pages>
       <url hash="38d8b7ce">C10-1063</url>
       <bibkey>khapra-etal-2010-value</bibkey>
@@ -580,7 +580,7 @@
       <author><first>Seokhwan</first><last>Kim</last></author>
       <author><first>Minwoo</first><last>Jeong</last></author>
       <author><first>Jonghoon</first><last>Lee</last></author>
-      <author><first>Gary Geunbae</first><last>Lee</last></author>
+      <author id="gary-geunbae-lee"><first>Gary Geunbae</first><last>Lee</last></author>
       <pages>564–571</pages>
       <url hash="ba2a8378">C10-1064</url>
       <bibkey>kim-etal-2010-cross</bibkey>
@@ -588,7 +588,7 @@
     <paper id="65">
       <title>Evaluating N-gram based Evaluation Metrics for Automatic Keyphrase Extraction</title>
       <author><first>Su Nam</first><last>Kim</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Min-Yen</first><last>Kan</last></author>
       <pages>572–580</pages>
       <url hash="3bbbe632">C10-1065</url>
@@ -598,7 +598,7 @@
       <title>Improving the Quality of Text Understanding by Delaying Ambiguity Resolution</title>
       <author><first>Doo Soon</first><last>Kim</last></author>
       <author><first>Ken</first><last>Barker</last></author>
-      <author><first>Bruce</first><last>Porter</last></author>
+      <author id="bruce-porter"><first>Bruce</first><last>Porter</last></author>
       <pages>581–589</pages>
       <url hash="45e89de5">C10-1066</url>
       <bibkey>kim-etal-2010-improving</bibkey>
@@ -615,9 +615,9 @@
     <paper id="68">
       <title>Dependency-driven Anaphoricity Determination for Coreference Resolution</title>
       <author><first>Fang</first><last>Kong</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
-      <author><first>Longhua</first><last>Qian</last></author>
-      <author><first>Qiaoming</first><last>Zhu</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
+      <author id="longhua-qian"><first>Longhua</first><last>Qian</last></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last></author>
       <pages>599–607</pages>
       <url hash="487f98a8">C10-1068</url>
       <bibkey>kong-etal-2010-dependency</bibkey>
@@ -635,7 +635,7 @@
     <paper id="70">
       <title>Revisiting Context-based Projection Methods for Term-Translation Spotting in Comparable Corpora</title>
       <author><first>Audrey</first><last>Laroche</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <pages>617–625</pages>
       <url hash="e66309c7">C10-1070</url>
       <bibkey>laroche-langlais-2010-revisiting</bibkey>
@@ -652,18 +652,18 @@
     <paper id="72">
       <title>Sentiment Classification and Polarity Shifting</title>
       <author><first>Shoushan</first><last>Li</last></author>
-      <author><first>Sophia Y. M.</first><last>Lee</last></author>
+      <author id="sophia-y-m-lee"><first>Sophia Y. M.</first><last>Lee</last></author>
       <author><first>Ying</first><last>Chen</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>635–643</pages>
       <url hash="b71a3d75">C10-1072</url>
       <bibkey>li-etal-2010-sentiment</bibkey>
     </paper>
     <paper id="73">
       <title>Improving Corpus Comparability for Bilingual Lexicon Extraction from Comparable Corpora</title>
-      <author id="bo-li"><first>Bo</first><last>Li</last></author>
-      <author><first>Eric</first><last>Gaussier</last></author>
+      <author><first>Bo</first><last>Li</last></author>
+      <author id="eric-gaussier"><first>Eric</first><last>Gaussier</last></author>
       <pages>644–652</pages>
       <url hash="0419e7e4">C10-1073</url>
       <bibkey>li-gaussier-2010-improving</bibkey>
@@ -674,7 +674,7 @@
       <author><first>Chao</first><last>Han</last></author>
       <author><first>Minlie</first><last>Huang</last></author>
       <author><first>Xiaoyan</first><last>Zhu</last></author>
-      <author><first>Ying-Ju</first><last>Xia</last></author>
+      <author id="yingju-xia"><first>Ying-Ju</first><last>Xia</last></author>
       <author><first>Shu</first><last>Zhang</last></author>
       <author><first>Hao</first><last>Yu</last></author>
       <pages>653–661</pages>
@@ -693,10 +693,10 @@
     </paper>
     <paper id="76">
       <title>Learning the Scope of Negation via Shallow Semantic Parsing</title>
-      <author><first>Junhui</first><last>Li</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="junhui-li"><first>Junhui</first><last>Li</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <author><first>Hongling</first><last>Wang</last></author>
-      <author><first>Qiaoming</first><last>Zhu</last></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last></author>
       <pages>671–679</pages>
       <url hash="de6c6667">C10-1076</url>
       <bibkey>li-etal-2010-learning</bibkey>
@@ -704,7 +704,7 @@
     <paper id="77">
       <title>Filtered Ranking for Bootstrapping in Event Extraction</title>
       <author><first>Shasha</first><last>Liao</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <pages>680–688</pages>
       <url hash="d3348472">C10-1077</url>
       <bibkey>liao-grishman-2010-filtered</bibkey>
@@ -727,7 +727,7 @@
       <author><first>Ming</first><last>Zhou</last></author>
       <author><first>Long</first><last>Jiang</last></author>
       <author><first>Zhongyang</first><last>Xiong</last></author>
-      <author><first>Changning</first><last>Huang</last></author>
+      <author id="changning-huang"><first>Changning</first><last>Huang</last></author>
       <pages>698–706</pages>
       <url hash="179520e7">C10-1079</url>
       <bibkey>liu-etal-2010-semantic</bibkey>
@@ -750,9 +750,9 @@
     </paper>
     <paper id="82">
       <title><fixed-case>T</fixed-case>ime<fixed-case>ML</fixed-case> Events Recognition and Classification: Learning <fixed-case>CRF</fixed-case> Models with Semantic Roles</title>
-      <author><first>Hector</first><last>Llorens</last></author>
-      <author><first>Estela</first><last>Saquete</last></author>
-      <author><first>Borja</first><last>Navarro-Colorado</last></author>
+      <author id="hector-llorens"><first>Hector</first><last>Llorens</last></author>
+      <author id="estela-saquete"><first>Estela</first><last>Saquete</last></author>
+      <author id="borja-navarro"><first>Borja</first><last>Navarro-Colorado</last></author>
       <pages>725–733</pages>
       <url hash="6446c582">C10-1082</url>
       <bibkey>llorens-etal-2010-timeml</bibkey>
@@ -762,14 +762,14 @@
       <author><first>Yue</first><last>Lu</last></author>
       <author><first>Huizhong</first><last>Duan</last></author>
       <author><first>Hongning</first><last>Wang</last></author>
-      <author><first>ChengXiang</first><last>Zhai</last></author>
+      <author id="chengxiang-zhai"><first>ChengXiang</first><last>Zhai</last></author>
       <pages>734–742</pages>
       <url hash="61fb3088">C10-1083</url>
       <bibkey>lu-etal-2010-exploiting</bibkey>
     </paper>
     <paper id="84">
       <title>Enhancing Morphological Alignment for Translating Highly Inflected Languages</title>
-      <author><first>Minh-Thang</first><last>Luong</last></author>
+      <author id="minh-thang-luong"><first>Minh-Thang</first><last>Luong</last></author>
       <author><first>Min-Yen</first><last>Kan</last></author>
       <pages>743–751</pages>
       <url hash="a72b7667">C10-1084</url>
@@ -778,7 +778,7 @@
     <paper id="85">
       <title>Automatic analysis of semantic similarity in comparable text through syntactic tree matching</title>
       <author><first>Erwin</first><last>Marsi</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <pages>752–760</pages>
       <url hash="edc8b87d">C10-1085</url>
       <bibkey>marsi-krahmer-2010-automatic</bibkey>
@@ -808,7 +808,7 @@
       <author><first>Makoto</first><last>Miwa</last></author>
       <author><first>Sampo</first><last>Pyysalo</last></author>
       <author><first>Tadayoshi</first><last>Hara</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>779–787</pages>
       <url hash="108801d3">C10-1088</url>
       <bibkey>miwa-etal-2010-evaluating</bibkey>
@@ -816,9 +816,9 @@
     <paper id="89">
       <title>Entity-Focused Sentence Simplification for Relation Extraction</title>
       <author><first>Makoto</first><last>Miwa</last></author>
-      <author><first>Rune</first><last>Sætre</last></author>
+      <author id="rune-saetre"><first>Rune</first><last>Sætre</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>788–796</pages>
       <url hash="5c5b7989">C10-1089</url>
       <bibkey>miwa-etal-2010-entity</bibkey>
@@ -827,7 +827,7 @@
       <title>Using Cross-Lingual Projections to Generate Semantic Role Labeled Annotated Corpus for <fixed-case>U</fixed-case>rdu - A Resource Poor Language</title>
       <author><first>Smruthi</first><last>Mukund</last></author>
       <author><first>Debanjan</first><last>Ghosh</last></author>
-      <author><first>Rohini</first><last>Srihari</last></author>
+      <author id="rohini-k-srihari"><first>Rohini</first><last>Srihari</last></author>
       <pages>797–805</pages>
       <url hash="502906d4">C10-1090</url>
       <bibkey>mukund-etal-2010-using</bibkey>
@@ -843,9 +843,9 @@
     </paper>
     <paper id="92">
       <title>Nonparametric Word Segmentation for Machine Translation</title>
-      <author><first>ThuyLinh</first><last>Nguyen</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="thuylinh-nguyen"><first>ThuyLinh</first><last>Nguyen</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>815–823</pages>
       <url hash="bf4bb79f">C10-1092</url>
       <bibkey>nguyen-etal-2010-nonparametric</bibkey>
@@ -870,7 +870,7 @@
     </paper>
     <paper id="95">
       <title>Co-<fixed-case>STAR</fixed-case>: A Co-training Style Algorithm for Hyponymy Relation Acquisition from Structured and Unstructured Text</title>
-      <author><first>Jong-Hoon</first><last>Oh</last></author>
+      <author id="jong-hoon-oh"><first>Jong-Hoon</first><last>Oh</last></author>
       <author><first>Ichiro</first><last>Yamada</last></author>
       <author><first>Kentaro</first><last>Torisawa</last></author>
       <author><first>Stijn</first><last>De Saeger</last></author>
@@ -880,8 +880,8 @@
     </paper>
     <paper id="96">
       <title>Simple and Efficient Algorithm for Approximate Dictionary Matching</title>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>851–859</pages>
       <url hash="2c7d4d90">C10-1096</url>
       <bibkey>okazaki-tsujii-2010-simple</bibkey>
@@ -898,7 +898,7 @@
     <paper id="98">
       <title>Text Summarization of <fixed-case>T</fixed-case>urkish Texts using Latent Semantic Analysis</title>
       <author><first>Makbule</first><last>Ozsoy</last></author>
-      <author><first>Ilyas</first><last>Cicekli</last></author>
+      <author id="ilyas-cicekli"><first>Ilyas</first><last>Cicekli</last></author>
       <author><first>Ferda</first><last>Alpaslan</last></author>
       <pages>869–876</pages>
       <url hash="fd9369a5">C10-1098</url>
@@ -917,7 +917,7 @@
       <author><first>Emily</first><last>Pitler</last></author>
       <author><first>Shane</first><last>Bergsma</last></author>
       <author><first>Dekang</first><last>Lin</last></author>
-      <author><first>Kenneth</first><last>Church</last></author>
+      <author id="kenneth-church"><first>Kenneth</first><last>Church</last></author>
       <pages>886–894</pages>
       <url hash="d38dacc7">C10-1100</url>
       <bibkey>pitler-etal-2010-using</bibkey>
@@ -925,8 +925,8 @@
     <paper id="101">
       <title>Citation Summarization Through Keyphrase Extraction</title>
       <author><first>Vahed</first><last>Qazvinian</last></author>
-      <author><first>Dragomir R.</first><last>Radev</last></author>
-      <author><first>Arzucan</first><last>Özgür</last></author>
+      <author id="dragomir-radev"><first>Dragomir R.</first><last>Radev</last></author>
+      <author id="arzucan-ozgur"><first>Arzucan</first><last>Özgür</last></author>
       <pages>895–903</pages>
       <url hash="1c4d4260">C10-1101</url>
       <bibkey>qazvinian-etal-2010-citation</bibkey>
@@ -935,8 +935,8 @@
       <title>2<fixed-case>D</fixed-case> Trie for Fast Parsing</title>
       <author><first>Xian</first><last>Qian</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
-      <author><first>Lide</first><last>Wu</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
+      <author id="lide-wu"><first>Lide</first><last>Wu</last></author>
       <pages>904–912</pages>
       <url hash="861b379b">C10-1102</url>
       <bibkey>qian-etal-2010-2d</bibkey>
@@ -996,7 +996,7 @@
     <paper id="109">
       <title>A Multiple-Domain Ontology Builder</title>
       <author><first>Sara</first><last>Salem</last></author>
-      <author><first>Samir</first><last>AbdelRahman</last></author>
+      <author id="samir-abdelrahman"><first>Samir</first><last>AbdelRahman</last></author>
       <pages>967–975</pages>
       <url hash="f8c04f31">C10-1109</url>
       <bibkey>salem-abdelrahman-2010-multiple</bibkey>
@@ -1064,12 +1064,12 @@
     </paper>
     <paper id="117">
       <title>Modeling Socio-Cultural Phenomena in Discourse</title>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
-      <author><first>George Aaron</first><last>Broadwell</last></author>
-      <author><first>Jennifer</first><last>Stromer-Galley</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="george-aaron-broadwell"><first>George Aaron</first><last>Broadwell</last></author>
+      <author id="jennifer-stromer-galley"><first>Jennifer</first><last>Stromer-Galley</last></author>
       <author><first>Samira</first><last>Shaikh</last></author>
-      <author><first>Sarah</first><last>Taylor</last></author>
-      <author><first>Nick</first><last>Webb</last></author>
+      <author id="sarah-taylor"><first>Sarah</first><last>Taylor</last></author>
+      <author id="nick-webb"><first>Nick</first><last>Webb</last></author>
       <pages>1038–1046</pages>
       <url hash="29dfee0f">C10-1117</url>
       <bibkey>strzalkowski-etal-2010-modeling</bibkey>
@@ -1078,7 +1078,7 @@
       <title>Discriminative Induction of Sub-Tree Alignment using Limited Labeled Data</title>
       <author><first>Jun</first><last>Sun</last></author>
       <author><first>Min</first><last>Zhang</last></author>
-      <author><first>Chew Lim</first><last>Tan</last></author>
+      <author id="chew-lim-tan"><first>Chew Lim</first><last>Tan</last></author>
       <pages>1047–1055</pages>
       <url hash="4c104e4d">C10-1118</url>
       <bibkey>sun-etal-2010-discriminative-induction</bibkey>
@@ -1095,7 +1095,7 @@
     </paper>
     <paper id="120">
       <title>Semi-supervised dependency parsing using generalized tri-training</title>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <author><first>Christian</first><last>Rishøj</last></author>
       <pages>1065–1073</pages>
       <url hash="d2c42f20">C10-1120</url>
@@ -1113,7 +1113,7 @@
     <paper id="122">
       <title><fixed-case>C</fixed-case>hinese <fixed-case>CCG</fixed-case>bank: extracting <fixed-case>CCG</fixed-case> derivations from the <fixed-case>P</fixed-case>enn <fixed-case>C</fixed-case>hinese Treebank</title>
       <author><first>Daniel</first><last>Tse</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <pages>1083–1091</pages>
       <url hash="23e229ae">C10-1122</url>
       <bibkey>tse-curran-2010-chinese</bibkey>
@@ -1133,7 +1133,7 @@
       <title>Large Scale Parallel Document Mining for Machine Translation</title>
       <author><first>Jakob</first><last>Uszkoreit</last></author>
       <author><first>Jay</first><last>Ponte</last></author>
-      <author><first>Ashok</first><last>Popat</last></author>
+      <author id="ashok-popat"><first>Ashok</first><last>Popat</last></author>
       <author><first>Moshe</first><last>Dubiner</last></author>
       <pages>1101–1109</pages>
       <url hash="3db00789">C10-1124</url>
@@ -1150,10 +1150,10 @@
     <paper id="126">
       <title>Syntax Based Reordering with Automatically Derived Rules for Improved Statistical Machine Translation</title>
       <author><first>Karthik</first><last>Visweswariah</last></author>
-      <author><first>Jiri</first><last>Navratil</last></author>
-      <author><first>Jeffrey</first><last>Sorensen</last></author>
+      <author id="jiri-navratil"><first>Jiri</first><last>Navratil</last></author>
+      <author id="jeffrey-sorensen"><first>Jeffrey</first><last>Sorensen</last></author>
       <author><first>Vijil</first><last>Chenthamarakshan</last></author>
-      <author><first>Nandakishore</first><last>Kambhatla</last></author>
+      <author id="nanda-kambhatla"><first>Nandakishore</first><last>Kambhatla</last></author>
       <pages>1119–1127</pages>
       <url hash="5bf63424">C10-1126</url>
       <bibkey>visweswariah-etal-2010-syntax</bibkey>
@@ -1177,7 +1177,7 @@
     <paper id="129">
       <title>“Got You!”: Automatic Vandalism Detection in <fixed-case>W</fixed-case>ikipedia with Web-based Shallow Syntactic-Semantic Modeling</title>
       <author><first>William Yang</first><last>Wang</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>1146–1154</pages>
       <url hash="a1895abf">C10-1129</url>
       <bibkey>wang-mckeown-2010-got</bibkey>
@@ -1185,7 +1185,7 @@
     <paper id="130">
       <title>Exploiting Salient Patterns for Question Detection and Question Retrieval in Community-based Question Answering</title>
       <author><first>Kai</first><last>Wang</last></author>
-      <author><first>Tat-Seng</first><last>Chua</last></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last></author>
       <pages>1155–1163</pages>
       <url hash="ef44b16a">C10-1130</url>
       <bibkey>wang-chua-2010-exploiting</bibkey>
@@ -1193,7 +1193,7 @@
     <paper id="131">
       <title>Probabilistic Tree-Edit Models with Structured Latent Variables for Textual Entailment and Question Answering</title>
       <author><first>Mengqiu</first><last>Wang</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <pages>1164–1172</pages>
       <url hash="a74878ec">C10-1131</url>
       <bibkey>wang-manning-2010-probabilistic</bibkey>
@@ -1201,7 +1201,7 @@
     <paper id="132">
       <title>A Character-Based Joint Model for <fixed-case>C</fixed-case>hinese Word Segmentation</title>
       <author><first>Kun</first><last>Wang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <author><first>Keh-Yih</first><last>Su</last></author>
       <pages>1173–1181</pages>
       <url hash="1f1d5cef">C10-1132</url>
@@ -1246,7 +1246,7 @@
     <paper id="137">
       <title>A Methodology for Automatic Identification of Nocuous Ambiguity</title>
       <author><first>Hui</first><last>Yang</last></author>
-      <author><first>Anne</first><last>de Roeck</last></author>
+      <author id="anne-de-roeck"><first>Anne</first><last>de Roeck</last></author>
       <author><first>Alistair</first><last>Willis</last></author>
       <author><first>Bashar</first><last>Nuseibeh</last></author>
       <pages>1218–1226</pages>
@@ -1279,7 +1279,7 @@
     </paper>
     <paper id="141">
       <title>Discriminative Training for Near-Synonym Substitution</title>
-      <author><first>Liang-Chih</first><last>Yu</last></author>
+      <author id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last></author>
       <author><first>Hsiu-Min</first><last>Shih</last></author>
       <author><first>Yu-Ling</first><last>Lai</last></author>
       <author><first>Jui-Feng</first><last>Yeh</last></author>
@@ -1290,7 +1290,7 @@
     </paper>
     <paper id="142">
       <title>Estimating Linear Models for Compositional Distributional Semantics</title>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last></author>
       <author><first>Ioannis</first><last>Korkontzelos</last></author>
       <author><first>Francesca</first><last>Fallucchi</last></author>
       <author><first>Suresh</first><last>Manandhar</last></author>
@@ -1310,9 +1310,9 @@
     </paper>
     <paper id="144">
       <title>Forest-guided Supertagger Training</title>
-      <author><first>Yao-zhong</first><last>Zhang</last></author>
+      <author id="yao-zhong-zhang"><first>Yao-zhong</first><last>Zhang</last></author>
       <author><first>Takuya</first><last>Matsuzaki</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>1281–1289</pages>
       <url hash="8cd5e4aa">C10-1144</url>
       <bibkey>zhang-etal-2010-forest</bibkey>
@@ -1321,8 +1321,8 @@
       <title>Entity Linking Leveraging Automatically Generated Annotation</title>
       <author><first>Wei</first><last>Zhang</last></author>
       <author><first>Jian</first><last>Su</last></author>
-      <author><first>Chew Lim</first><last>Tan</last></author>
-      <author><first>Wen Ting</first><last>Wang</last></author>
+      <author id="chew-lim-tan"><first>Chew Lim</first><last>Tan</last></author>
+      <author id="wen-ting-wang"><first>Wen Ting</first><last>Wang</last></author>
       <pages>1290–1298</pages>
       <url hash="c453daf6">C10-1145</url>
       <bibkey>zhang-etal-2010-entity</bibkey>
@@ -1394,7 +1394,7 @@
     <paper id="153">
       <title>A Minimum Error Weighting Combination Strategy for <fixed-case>C</fixed-case>hinese Semantic Role Labeling</title>
       <author><first>Tao</first><last>Zhuang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>1362–1370</pages>
       <url hash="8370b15b">C10-1153</url>
       <bibkey>zhuang-zong-2010-minimum</bibkey>
@@ -1410,7 +1410,7 @@
     </paper>
     <paper id="155">
       <title>Syntactic Scope Resolution in Uncertainty Analysis</title>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <author><first>Erik</first><last>Velldal</last></author>
       <author><first>Stephan</first><last>Oepen</last></author>
       <pages>1379–1387</pages>
@@ -1423,7 +1423,7 @@
       <booktitle>Coling 2010: Posters</booktitle>
       <url hash="7f430717">C10-2</url>
       <editor><first>Chu-Ren</first><last>Huang</last></editor>
-      <editor><first>Dan</first><last>Jurafsky</last></editor>
+      <editor id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></editor>
       <publisher>Coling 2010 Organizing Committee</publisher>
       <address>Beijing, China</address>
       <month>August</month>
@@ -1444,8 +1444,8 @@
     </paper>
     <paper id="2">
       <title>Document Expansion Based on <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et for Robust <fixed-case>IR</fixed-case></title>
-      <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>Xabier</first><last>Arregi</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
+      <author id="xabier-arregi"><first>Xabier</first><last>Arregi</last></author>
       <author><first>Arantxa</first><last>Otegi</last></author>
       <pages>9–17</pages>
       <url hash="63b0b098">C10-2002</url>
@@ -1464,10 +1464,10 @@
     </paper>
     <paper id="4">
       <title>Going Beyond Traditional <fixed-case>QA</fixed-case> Systems: Challenges and Keys in Opinion Question Answering</title>
-      <author><first>Alexandra</first><last>Balahur</last></author>
+      <author id="alexandra-balahur"><first>Alexandra</first><last>Balahur</last></author>
       <author><first>Ester</first><last>Boldrini</last></author>
-      <author><first>Andrés</first><last>Montoyo</last></author>
-      <author><first>Patricio</first><last>Martínez-Barco</last></author>
+      <author id="andres-montoyo"><first>Andrés</first><last>Montoyo</last></author>
+      <author id="patricio-martinez-barco"><first>Patricio</first><last>Martínez-Barco</last></author>
       <pages>27–35</pages>
       <url hash="b558ed07">C10-2004</url>
       <bibkey>balahur-etal-2010-going</bibkey>
@@ -1482,7 +1482,7 @@
     </paper>
     <paper id="6">
       <title>Benchmarking for syntax-based sentential inference</title>
-      <author><first>Paul</first><last>Bedaride</last></author>
+      <author id="paul-bedaride"><first>Paul</first><last>Bedaride</last></author>
       <author><first>Claire</first><last>Gardent</last></author>
       <pages>45–53</pages>
       <url hash="754468a8">C10-2006</url>
@@ -1498,7 +1498,7 @@
     <paper id="8">
       <title>A Formal Scheme for Multimodal Grammars</title>
       <author><first>Philippe</first><last>Blache</last></author>
-      <author><first>Laurent</first><last>Prévot</last></author>
+      <author id="laurent-prevot"><first>Laurent</first><last>Prévot</last></author>
       <pages>63–71</pages>
       <url hash="5ce24706">C10-2008</url>
       <bibkey>blache-prevot-2010-formal</bibkey>
@@ -1507,7 +1507,7 @@
       <title>Composition of Semantic Relations: Model and Applications</title>
       <author><first>Eduardo</first><last>Blanco</last></author>
       <author><first>Hakki C.</first><last>Cankaya</last></author>
-      <author><first>Dan</first><last>Moldovan</last></author>
+      <author id="dan-moldovan"><first>Dan</first><last>Moldovan</last></author>
       <pages>72–80</pages>
       <url hash="86774dd1">C10-2009</url>
       <bibkey>blanco-etal-2010-composition</bibkey>
@@ -1515,7 +1515,7 @@
     <paper id="10">
       <title>Improved Unsupervised Sentence Alignment for Symmetrical and Asymmetrical Parallel Corpora</title>
       <author><first>Fabienne</first><last>Braune</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>81–89</pages>
       <url hash="cef936fd">C10-2010</url>
       <bibkey>braune-fraser-2010-improved</bibkey>
@@ -1532,26 +1532,26 @@
     <paper id="12">
       <title>Toward Qualitative Evaluation of Textual Entailment Systems</title>
       <author><first>Elena</first><last>Cabrio</last></author>
-      <author><first>Bernardo</first><last>Magnini</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
       <pages>99–107</pages>
       <url hash="620e08a2">C10-2012</url>
       <bibkey>cabrio-magnini-2010-toward</bibkey>
     </paper>
     <paper id="13">
       <title>Benchmarking of Statistical Dependency Parsers for <fixed-case>F</fixed-case>rench</title>
-      <author><first>Marie</first><last>Candito</last></author>
+      <author id="marie-candito"><first>Marie</first><last>Candito</last></author>
       <author><first>Joakim</first><last>Nivre</last></author>
       <author><first>Pascal</first><last>Denis</last></author>
-      <author><first>Enrique</first><last>Henestroza Anguiano</last></author>
+      <author id="enrique-henestroza-anguiano"><first>Enrique</first><last>Henestroza Anguiano</last></author>
       <pages>108–116</pages>
       <url hash="e3dd02ea">C10-2013</url>
       <bibkey>candito-etal-2010-benchmarking</bibkey>
     </paper>
     <paper id="14">
       <title>Tree Topological Features for Unlexicalized Parsing</title>
-      <author><first>Samuel W. K.</first><last>Chan</last></author>
-      <author><first>Lawrence Y. L.</first><last>Cheung</last></author>
-      <author><first>Mickey W. C.</first><last>Chong</last></author>
+      <author id="samuel-w-k-chan"><first>Samuel W. K.</first><last>Chan</last></author>
+      <author id="lawrence-y-l-cheung"><first>Lawrence Y. L.</first><last>Cheung</last></author>
+      <author id="mickey-w-c-chong"><first>Mickey W. C.</first><last>Chong</last></author>
       <pages>117–125</pages>
       <url hash="ced02b08">C10-2014</url>
       <bibkey>chan-etal-2010-tree</bibkey>
@@ -1559,7 +1559,7 @@
     <paper id="15">
       <title>Improving Graph-based Dependency Parsing with Decision History</title>
       <author><first>Wenliang</first><last>Chen</last></author>
-      <author><first>Jun’ichi</first><last>Kazama</last></author>
+      <author id="junichi-kazama"><first>Jun’ichi</first><last>Kazama</last></author>
       <author><first>Yoshimasa</first><last>Tsuruoka</last></author>
       <author><first>Kentaro</first><last>Torisawa</last></author>
       <pages>126–134</pages>
@@ -1587,7 +1587,7 @@
     <paper id="18">
       <title>Acquisition of Unknown Word Paradigms for Large-Scale Grammars</title>
       <author><first>Kostadin</first><last>Cholakov</last></author>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <pages>153–161</pages>
       <url hash="4b68fec1">C10-2018</url>
       <bibkey>cholakov-van-noord-2010-acquisition</bibkey>
@@ -1604,14 +1604,14 @@
     <paper id="20">
       <title>Exploiting Paraphrases and Deferred Sense Commitment to Interpret Questions more Reliably</title>
       <author><first>Peter</first><last>Clark</last></author>
-      <author><first>Phil</first><last>Harrison</last></author>
+      <author id="phil-harrison"><first>Phil</first><last>Harrison</last></author>
       <pages>171–179</pages>
       <url hash="842cd6fc">C10-2020</url>
       <bibkey>clark-harrison-2010-exploiting</bibkey>
     </paper>
     <paper id="21">
       <title>Two Methods for Extending Hierarchical Rules from the Bilingual Chart Parsing</title>
-      <author><first>Martin</first><last>Čmejrek</last></author>
+      <author id="martin-cmejrek"><first>Martin</first><last>Čmejrek</last></author>
       <author><first>Bowen</first><last>Zhou</last></author>
       <pages>180–188</pages>
       <url hash="2ff2295a">C10-2021</url>
@@ -1620,15 +1620,15 @@
     <paper id="22">
       <title>Unsupervised cleansing of noisy text</title>
       <author><first>Danish</first><last>Contractor</last></author>
-      <author><first>Tanveer A.</first><last>Faruquie</last></author>
-      <author><first>L. Venkata</first><last>Subramaniam</last></author>
+      <author id="tanveer-a-faruquie"><first>Tanveer A.</first><last>Faruquie</last></author>
+      <author id="l-venkata-subramaniam"><first>L. Venkata</first><last>Subramaniam</last></author>
       <pages>189–196</pages>
       <url hash="3afe2dbf">C10-2022</url>
       <bibkey>contractor-etal-2010-unsupervised</bibkey>
     </paper>
     <paper id="23">
       <title>Improving Reordering with Linguistically Informed Bilingual n-grams</title>
-      <author><first>Josep Maria</first><last>Crego</last></author>
+      <author id="josep-m-crego"><first>Josep Maria</first><last>Crego</last></author>
       <author><first>François</first><last>Yvon</last></author>
       <pages>197–205</pages>
       <url hash="4af5521c">C10-2023</url>
@@ -1648,7 +1648,7 @@
       <author><first>Dongdong</first><last>Zhang</last></author>
       <author><first>Mu</first><last>Li</last></author>
       <author><first>Ming</first><last>Zhou</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <pages>214–222</pages>
       <url hash="44735fd9">C10-2025</url>
       <bibkey>cui-etal-2010-hybrid</bibkey>
@@ -1657,8 +1657,8 @@
       <title>Global Ranking via Data Fusion</title>
       <author><first>Hong-Jie</first><last>Dai</last></author>
       <author><first>Po-Ting</first><last>Lai</last></author>
-      <author><first>Richard Tzong-Han</first><last>Tsai</last></author>
-      <author><first>Wen-Lian</first><last>Hsu</last></author>
+      <author id="richard-tzong-han-tsai"><first>Richard Tzong-Han</first><last>Tsai</last></author>
+      <author id="wen-lian-hsu"><first>Wen-Lian</first><last>Hsu</last></author>
       <pages>223–231</pages>
       <url hash="7c06d920">C10-2026</url>
       <bibkey>dai-etal-2010-global</bibkey>
@@ -1666,7 +1666,7 @@
     <paper id="27">
       <title>Topic-Based <fixed-case>B</fixed-case>engali Opinion Summarization</title>
       <author><first>Amitava</first><last>Das</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>232–240</pages>
       <url hash="a4206aec">C10-2027</url>
       <bibkey>das-bandyopadhyay-2010-topic</bibkey>
@@ -1682,7 +1682,7 @@
     </paper>
     <paper id="29">
       <title>Topic Models for Meaning Similarity in Context</title>
-      <author><first>Georgiana</first><last>Dinu</last></author>
+      <author id="georgiana-dinu"><first>Georgiana</first><last>Dinu</last></author>
       <author><first>Mirella</first><last>Lapata</last></author>
       <pages>250–258</pages>
       <url hash="a7de6274">C10-2029</url>
@@ -1699,7 +1699,7 @@
     <paper id="31">
       <title>Exploring the Data-Driven Prediction of Prepositions in <fixed-case>E</fixed-case>nglish</title>
       <author><first>Anas</first><last>Elghafari</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <author><first>Holger</first><last>Wunsch</last></author>
       <pages>267–275</pages>
       <url hash="eab6a8a4">C10-2031</url>
@@ -1710,7 +1710,7 @@
       <author><first>Lijun</first><last>Feng</last></author>
       <author><first>Martin</first><last>Jansche</last></author>
       <author><first>Matt</first><last>Huenerfauth</last></author>
-      <author><first>Noémie</first><last>Elhadad</last></author>
+      <author id="noemie-elhadad"><first>Noémie</first><last>Elhadad</last></author>
       <pages>276–284</pages>
       <url hash="a0c76aec">C10-2032</url>
       <bibkey>feng-etal-2010-comparison</bibkey>
@@ -1730,8 +1730,8 @@
       <author><first>Yanhui</first><last>Feng</last></author>
       <author><first>Yu</first><last>Hong</last></author>
       <author><first>Zhenxiang</first><last>Yan</last></author>
-      <author><first>Jianmin</first><last>Yao</last></author>
-      <author><first>Qiaoming</first><last>Zhu</last></author>
+      <author id="jianmin-yao"><first>Jianmin</first><last>Yao</last></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last></author>
       <pages>294–302</pages>
       <url hash="3d87ba5b">C10-2034</url>
       <bibkey>feng-etal-2010-novel</bibkey>
@@ -1739,7 +1739,7 @@
     <paper id="35">
       <title>Building Systematic Reviews Using Automatic Text Classification Techniques</title>
       <author><first>Oana</first><last>Frunza</last></author>
-      <author><first>Diana</first><last>Inkpen</last></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last></author>
       <author><first>Stan</first><last>Matwin</last></author>
       <pages>303–311</pages>
       <url hash="0ef323fa">C10-2035</url>
@@ -1756,8 +1756,8 @@
     <paper id="37">
       <title>Monolingual Distributional Profiles for Word Substitution in Machine Translation</title>
       <author><first>Rashmi</first><last>Gangadharaiah</last></author>
-      <author><first>Ralf D.</first><last>Brown</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
+      <author id="ralf-d-brown"><first>Ralf D.</first><last>Brown</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
       <pages>320–328</pages>
       <url hash="795547fe">C10-2037</url>
       <bibkey>gangadharaiah-etal-2010-monolingual</bibkey>
@@ -1784,8 +1784,8 @@
       <title>Verbs are where all the action lies: Experiences of Shallow Parsing of a Morphologically Rich Language</title>
       <author><first>Harshada</first><last>Gune</last></author>
       <author><first>Mugdha</first><last>Bapat</last></author>
-      <author><first>Mitesh M.</first><last>Khapra</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh M.</first><last>Khapra</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>347–355</pages>
       <url hash="35b99266">C10-2040</url>
       <bibkey>gune-etal-2010-verbs</bibkey>
@@ -1810,7 +1810,7 @@
       <author><first>Yifan</first><last>He</last></author>
       <author><first>Yanjun</first><last>Ma</last></author>
       <author><first>Andy</first><last>Way</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>374–382</pages>
       <url hash="fee3f981">C10-2043</url>
       <bibkey>he-etal-2010-integrating</bibkey>
@@ -1877,10 +1877,10 @@
     <paper id="50">
       <title>Negative Feedback: The Forsaken Nature Available for Re-ranking</title>
       <author><first>Yu</first><last>Hong</last></author>
-      <author><first>Qing-qing</first><last>Cai</last></author>
+      <author id="qingqing-cai"><first>Qing-qing</first><last>Cai</last></author>
       <author><first>Song</first><last>Hua</last></author>
-      <author><first>Jian-min</first><last>Yao</last></author>
-      <author><first>Qiao-ming</first><last>Zhu</last></author>
+      <author id="jianmin-yao"><first>Jian-min</first><last>Yao</last></author>
+      <author id="qiaoming-zhu"><first>Qiao-ming</first><last>Zhu</last></author>
       <pages>436–444</pages>
       <url hash="4b20418a">C10-2050</url>
       <bibkey>hong-etal-2010-negative</bibkey>
@@ -1889,7 +1889,7 @@
       <title>Morphological Analysis Can Improve a <fixed-case>CCG</fixed-case> Parser for <fixed-case>E</fixed-case>nglish</title>
       <author><first>Matthew</first><last>Honnibal</last></author>
       <author><first>Jonathan K.</first><last>Kummerfeld</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <pages>445–453</pages>
       <url hash="e0917a27">C10-2051</url>
       <bibkey>honnibal-etal-2010-morphological</bibkey>
@@ -1898,7 +1898,7 @@
       <title>What’s in a Preposition? Dimensions of Sense Disambiguation for an Interesting Word Class</title>
       <author><first>Dirk</first><last>Hovy</last></author>
       <author><first>Stephen</first><last>Tratz</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>454–462</pages>
       <url hash="5fe007e2">C10-2052</url>
       <bibkey>hovy-etal-2010-whats</bibkey>
@@ -1913,7 +1913,7 @@
     </paper>
     <paper id="54">
       <title>Mining Large-scale Comparable Corpora from <fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish News Collections</title>
-      <author><first>Degen</first><last>Huang</last></author>
+      <author id="degen-huang"><first>Degen</first><last>Huang</last></author>
       <author><first>Lian</first><last>Zhao</last></author>
       <author><first>Lishuang</first><last>Li</last></author>
       <author><first>Haitao</first><last>Yu</last></author>
@@ -1954,7 +1954,7 @@
     <paper id="59">
       <title>Effective Constituent Projection across Languages</title>
       <author><first>Wenbin</first><last>Jiang</last></author>
-      <author><first>Yajuan</first><last>Lv</last></author>
+      <author id="yajuan-lu"><first>Yajuan</first><last>Lv</last></author>
       <author id="yang-liu-ict"><first>Yang</first><last>Liu</last></author>
       <author><first>Qun</first><last>Liu</last></author>
       <pages>516–524</pages>
@@ -1982,7 +1982,7 @@
     <paper id="62">
       <title>Generative Alignment and Semantic Parsing for Learning from Ambiguous Supervision</title>
       <author><first>Joohyun</first><last>Kim</last></author>
-      <author><first>Raymond</first><last>Mooney</last></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last></author>
       <pages>543–551</pages>
       <url hash="a9a26d8d">C10-2062</url>
       <bibkey>kim-mooney-2010-generative</bibkey>
@@ -2009,7 +2009,7 @@
     </paper>
     <paper id="65">
       <title>Using Syntactic and Semantic based Relations for Dialogue Act Recognition</title>
-      <author><first>Tina</first><last>Klüwer</last></author>
+      <author id="tina-kluwer"><first>Tina</first><last>Klüwer</last></author>
       <author><first>Hans</first><last>Uszkoreit</last></author>
       <author><first>Feiyu</first><last>Xu</last></author>
       <pages>570–578</pages>
@@ -2023,15 +2023,15 @@
       <author><first>Mikio</first><last>Nakano</last></author>
       <author><first>Kotaro</first><last>Funakoshi</last></author>
       <author><first>Tetsuya</first><last>Ogata</last></author>
-      <author><first>Hiroshi G.</first><last>Okuno</last></author>
+      <author id="hiroshi-g-okuno"><first>Hiroshi G.</first><last>Okuno</last></author>
       <pages>579–587</pages>
       <url hash="bd17c2c7">C10-2066</url>
       <bibkey>komatani-etal-2010-automatic</bibkey>
     </paper>
     <paper id="67">
       <title><fixed-case>DL</fixed-case> Meet <fixed-case>FL</fixed-case>: A Bidirectional Mapping between Ontologies and Linguistic Knowledge</title>
-      <author><first>Hans-Ulrich</first><last>Krieger</last></author>
-      <author><first>Ulrich</first><last>Schäfer</last></author>
+      <author id="hans-ulrich-krieger"><first>Hans-Ulrich</first><last>Krieger</last></author>
+      <author id="ulrich-schafer"><first>Ulrich</first><last>Schäfer</last></author>
       <pages>588–596</pages>
       <url hash="37a30aeb">C10-2067</url>
       <bibkey>krieger-schafer-2010-dl</bibkey>
@@ -2049,7 +2049,7 @@
       <author><first>Jey Han</first><last>Lau</last></author>
       <author><first>David</first><last>Newman</last></author>
       <author><first>Sarvnaz</first><last>Karimi</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>605–613</pages>
       <url hash="53a95db0">C10-2069</url>
       <bibkey>lau-etal-2010-best</bibkey>
@@ -2061,19 +2061,19 @@
       <author><first>Beate</first><last>Dorow</last></author>
       <author><first>Christian</first><last>Scheible</last></author>
       <author><first>Ulrich</first><last>Heid</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>614–622</pages>
       <url hash="e78b0ca9">C10-2070</url>
       <bibkey>laws-etal-2010-linguistically</bibkey>
     </paper>
     <paper id="71">
       <title>A Post-processing Approach to Statistical Word Alignment Reflecting Alignment Tendency between Part-of-speeches</title>
-      <author id="jae-hee-lee"><first>Jae-Hee</first><last>Lee</last></author>
+      <author><first>Jae-Hee</first><last>Lee</last></author>
       <author><first>Seung-Wook</first><last>Lee</last></author>
       <author><first>Gumwon</first><last>Hong</last></author>
       <author><first>Young-Sook</first><last>Hwang</last></author>
       <author><first>Sang-Bum</first><last>Kim</last></author>
-      <author><first>Hae-Chang</first><last>Rim</last></author>
+      <author id="hae-chang-rim"><first>Hae-Chang</first><last>Rim</last></author>
       <pages>623–629</pages>
       <url hash="416fdbaa">C10-2071</url>
       <bibkey>lee-etal-2010-post</bibkey>
@@ -2091,8 +2091,8 @@
     </paper>
     <paper id="73">
       <title><fixed-case>EM</fixed-case>-based Hybrid Model for Bilingual Terminology Extraction from Comparable Corpora</title>
-      <author><first>Lianhau</first><last>Lee</last></author>
-      <author><first>Aiti</first><last>Aw</last></author>
+      <author id="lianhau-lee"><first>Lianhau</first><last>Lee</last></author>
+      <author id="aiti-aw"><first>Aiti</first><last>Aw</last></author>
       <author><first>Min</first><last>Zhang</last></author>
       <author><first>Haizhou</first><last>Li</last></author>
       <pages>639–646</pages>
@@ -2101,8 +2101,8 @@
     </paper>
     <paper id="74">
       <title>Text Mining for Automatic Image Tagging</title>
-      <author><first>Chee Wee</first><last>Leong</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="chee-wee-leong"><first>Chee Wee</first><last>Leong</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <author><first>Samer</first><last>Hassan</last></author>
       <pages>647–655</pages>
       <url hash="1089c4db">C10-2074</url>
@@ -2112,8 +2112,8 @@
       <title>Unsupervised Discriminative Language Model Training for Machine Translation using Simulated Confusion Sets</title>
       <author><first>Zhifei</first><last>Li</last></author>
       <author><first>Ziyuan</first><last>Wang</last></author>
-      <author><first>Sanjeev</first><last>Khudanpur</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="sanjeev-khudanpur"><first>Sanjeev</first><last>Khudanpur</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>656–664</pages>
       <url hash="2b72907f">C10-2075</url>
       <bibkey>li-etal-2010-unsupervised</bibkey>
@@ -2121,9 +2121,9 @@
     <paper id="76">
       <title>Combining Constituent and Dependency Syntactic Views for <fixed-case>C</fixed-case>hinese Semantic Role Labeling</title>
       <author><first>Shiqi</first><last>Li</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
-      <author><first>Pengyuan</first><last>Liu</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
+      <author id="pengyuan-liu"><first>Pengyuan</first><last>Liu</last></author>
       <author><first>Hanjing</first><last>Li</last></author>
       <pages>665–673</pages>
       <url hash="2381660d">C10-2076</url>
@@ -2161,10 +2161,10 @@
     <paper id="80">
       <title>Reexamination on Potential for Personalization in Web Search</title>
       <author><first>Daren</first><last>Li</last></author>
-      <author><first>Muyun</first><last>Yang</last></author>
-      <author><first>HaoLiang</first><last>Qi</last></author>
+      <author id="muyun-yang"><first>Muyun</first><last>Yang</last></author>
+      <author id="haoliang-qi"><first>HaoLiang</first><last>Qi</last></author>
       <author><first>Sheng</first><last>Li</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <pages>701–709</pages>
       <url hash="7ad8d52a">C10-2080</url>
       <bibkey>li-etal-2010-reexamination</bibkey>
@@ -2180,9 +2180,9 @@
     </paper>
     <paper id="82">
       <title><fixed-case>T</fixed-case>ibetan Number Identification Based on Classification of Number Components in <fixed-case>T</fixed-case>ibetan Word Segmentation</title>
-      <author><first>Huidan</first><last>Liu</last></author>
-      <author><first>Weina</first><last>Zhao</last></author>
-      <author><first>Minghua</first><last>Nuo</last></author>
+      <author id="huidan-liu"><first>Huidan</first><last>Liu</last></author>
+      <author id="weina-zhao"><first>Weina</first><last>Zhao</last></author>
+      <author id="minghua-nuo"><first>Minghua</first><last>Nuo</last></author>
       <author><first>Li</first><last>Jiang</last></author>
       <author><first>Jian</first><last>Wu</last></author>
       <author><first>Yeping</first><last>He</last></author>
@@ -2215,9 +2215,9 @@
     <paper id="85">
       <title>Visually and Phonologically Similar Characters in Incorrect Simplified <fixed-case>C</fixed-case>hinese Words</title>
       <author><first>Chao-Lin</first><last>Liu</last></author>
-      <author><first>Min-Hua</first><last>Lai</last></author>
+      <author id="min-hua-lai"><first>Min-Hua</first><last>Lai</last></author>
       <author><first>Yi-Hsuan</first><last>Chuang</last></author>
-      <author><first>Chia-Ying</first><last>Lee</last></author>
+      <author id="chia-ying-lee"><first>Chia-Ying</first><last>Lee</last></author>
       <pages>739–747</pages>
       <url hash="d89a7101">C10-2085</url>
       <bibkey>liu-etal-2010-visually</bibkey>
@@ -2226,9 +2226,9 @@
       <title>Head-modifier Relation based Non-lexical Reordering Model for Phrase-Based Translation</title>
       <author><first>Shui</first><last>Liu</last></author>
       <author><first>Sheng</first><last>Li</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <author><first>Min</first><last>Zhang</last></author>
-      <author><first>Pengyuan</first><last>Liu</last></author>
+      <author id="pengyuan-liu"><first>Pengyuan</first><last>Liu</last></author>
       <pages>748–756</pages>
       <url hash="1a187582">C10-2086</url>
       <bibkey>liu-etal-2010-head</bibkey>
@@ -2236,9 +2236,9 @@
     <paper id="87">
       <title>Dependency-Driven Feature-based Learning for Extracting Protein-Protein Interactions from Biomedical Text</title>
       <author><first>Bing</first><last>Liu</last></author>
-      <author><first>Longhua</first><last>Qian</last></author>
+      <author id="longhua-qian"><first>Longhua</first><last>Qian</last></author>
       <author><first>Hongling</first><last>Wang</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>757–765</pages>
       <url hash="92a03f40">C10-2087</url>
       <bibkey>liu-etal-2010-dependency</bibkey>
@@ -2271,9 +2271,9 @@
     </paper>
     <paper id="91">
       <title>Finite-state Scriptural Translation</title>
-      <author><first>M. G. Abbas</first><last>Malik</last></author>
-      <author><first>Christian</first><last>Boitet</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="m-g-abbas-malik"><first>M. G. Abbas</first><last>Malik</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>791–800</pages>
       <url hash="0d2c98d2">C10-2091</url>
       <bibkey>malik-etal-2010-finite</bibkey>
@@ -2298,8 +2298,8 @@
       <title>Instance Sense Induction from Attribute Sets</title>
       <author><first>Ricardo</first><last>Martin-Brualla</last></author>
       <author><first>Enrique</first><last>Alfonseca</last></author>
-      <author><first>Marius</first><last>Pasca</last></author>
-      <author><first>Keith</first><last>Hall</last></author>
+      <author id="marius-pasca"><first>Marius</first><last>Pasca</last></author>
+      <author id="keith-hall"><first>Keith</first><last>Hall</last></author>
       <author><first>Enrique</first><last>Robledo-Arnuncio</last></author>
       <author><first>Massimiliano</first><last>Ciaramita</last></author>
       <pages>819–827</pages>
@@ -2336,7 +2336,7 @@
       <title>Imbalanced Classification Using Dictionary-based Prototypes and Hierarchical Decision Rules for Entity Sense Disambiguation</title>
       <author><first>Tingting</first><last>Mu</last></author>
       <author><first>Xinglong</first><last>Wang</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
       <pages>851–859</pages>
       <url hash="f3dcd600">C10-2098</url>
@@ -2345,7 +2345,7 @@
     <paper id="99">
       <title>A Vector Space Model for Subjectivity Classification in <fixed-case>U</fixed-case>rdu aided by Co-Training</title>
       <author><first>Smruthi</first><last>Mukund</last></author>
-      <author><first>Rohini</first><last>Srihari</last></author>
+      <author id="rohini-k-srihari"><first>Rohini</first><last>Srihari</last></author>
       <pages>860–868</pages>
       <url hash="09ea9017">C10-2099</url>
       <bibkey>mukund-srihari-2010-vector</bibkey>
@@ -2404,7 +2404,7 @@
       <title>A Study on Position Information in Document Summarization</title>
       <author><first>You</first><last>Ouyang</last></author>
       <author><first>Wenjie</first><last>Li</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <author><first>Renxian</first><last>Zhang</last></author>
       <pages>919–927</pages>
       <url hash="d3f72abe">C10-2106</url>
@@ -2421,7 +2421,7 @@
     <paper id="108">
       <title>Word Space Modeling for Measuring Semantic Specificity in <fixed-case>C</fixed-case>hinese</title>
       <author><first>Ching-Fen</first><last>Pan</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <pages>937–945</pages>
       <url hash="86387ac4">C10-2108</url>
       <bibkey>pan-hsieh-2010-word</bibkey>
@@ -2429,14 +2429,14 @@
     <paper id="109">
       <title><fixed-case>MT</fixed-case> Error Detection for Cross-Lingual Question Answering</title>
       <author><first>Kristen</first><last>Parton</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>946–954</pages>
       <url hash="30dd5414">C10-2109</url>
       <bibkey>parton-mckeown-2010-mt</bibkey>
     </paper>
     <paper id="110">
       <title>The Role of Queries in Ranking Labeled Instances Extracted from Text</title>
-      <author><first>Marius</first><last>Paşca</last></author>
+      <author id="marius-pasca"><first>Marius</first><last>Paşca</last></author>
       <pages>955–962</pages>
       <url hash="85bc3f13">C10-2110</url>
       <bibkey>pasca-2010-role</bibkey>
@@ -2460,8 +2460,8 @@
     </paper>
     <paper id="113">
       <title>Filling Knowledge Gaps in Text for Machine Reading</title>
-      <author><first>Anselmo</first><last>Peñas</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="anselmo-penas"><first>Anselmo</first><last>Peñas</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>979–987</pages>
       <url hash="8094f6b7">C10-2113</url>
       <bibkey>penas-hovy-2010-filling</bibkey>
@@ -2494,8 +2494,8 @@
     <paper id="117">
       <title>Automatic Committed Belief Tagging</title>
       <author><first>Vinodkumar</first><last>Prabhakaran</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>1014–1022</pages>
       <url hash="c8a08a9b">C10-2117</url>
       <bibkey>prabhakaran-etal-2010-automatic</bibkey>
@@ -2503,16 +2503,16 @@
     <paper id="118">
       <title>Realization of Discourse Relations by Other Means: Alternative Lexicalizations</title>
       <author><first>Rashmi</first><last>Prasad</last></author>
-      <author><first>Aravind</first><last>Joshi</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <pages>1023–1031</pages>
       <url hash="4e7febba">C10-2118</url>
       <bibkey>prasad-etal-2010-realization</bibkey>
     </paper>
     <paper id="119">
       <title>Designing Agreement Features for Realization Ranking</title>
-      <author><first>Rajakrishnan</first><last>Rajkumar</last></author>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="rajakrishnan-rajkumar"><first>Rajakrishnan</first><last>Rajkumar</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <pages>1032–1040</pages>
       <url hash="48af228f">C10-2119</url>
       <bibkey>rajkumar-white-2010-designing</bibkey>
@@ -2521,7 +2521,7 @@
       <title>Web-based and combined language models: a case study on noun compound identification</title>
       <author><first>Carlos</first><last>Ramisch</last></author>
       <author><first>Aline</first><last>Villavicencio</last></author>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <pages>1041–1049</pages>
       <url hash="0ee966a4">C10-2120</url>
       <bibkey>ramisch-etal-2010-web</bibkey>
@@ -2538,10 +2538,10 @@
     <paper id="122">
       <title>Multilingual Summarization Evaluation without Human Models</title>
       <author><first>Horacio</first><last>Saggion</last></author>
-      <author><first>Juan-Manuel</first><last>Torres-Moreno</last></author>
-      <author><first>Iria</first><last>da Cunha</last></author>
+      <author id="juan-manuel-torres-moreno"><first>Juan-Manuel</first><last>Torres-Moreno</last></author>
+      <author id="iria-da-cunha"><first>Iria</first><last>da Cunha</last></author>
       <author><first>Eric</first><last>SanJuan</last></author>
-      <author><first>Patricia</first><last>Velázquez-Morales</last></author>
+      <author id="patricia-velazquez-morales"><first>Patricia</first><last>Velázquez-Morales</last></author>
       <pages>1059–1067</pages>
       <url hash="567fa3f7">C10-2122</url>
       <bibkey>saggion-etal-2010-multilingual</bibkey>
@@ -2549,15 +2549,15 @@
     <paper id="123">
       <title>Argument Optionality in the <fixed-case>L</fixed-case>in<fixed-case>GO</fixed-case> Grammar Matrix</title>
       <author><first>Safiyyah</first><last>Saleem</last></author>
-      <author><first>Emily M.</first><last>Bender</last></author>
+      <author id="emily-m-bender"><first>Emily M.</first><last>Bender</last></author>
       <pages>1068–1076</pages>
       <url hash="fec3df61">C10-2123</url>
       <bibkey>saleem-bender-2010-argument</bibkey>
     </paper>
     <paper id="124">
       <title>Log-linear weight optimisation via <fixed-case>B</fixed-case>ayesian Adaptation in Statistical Machine Translation</title>
-      <author><first>Germán</first><last>Sanchis-Trilles</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="german-sanchis-trilles"><first>Germán</first><last>Sanchis-Trilles</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <pages>1077–1085</pages>
       <url hash="35ad3ad2">C10-2124</url>
       <bibkey>sanchis-trilles-casacuberta-2010-log</bibkey>
@@ -2565,18 +2565,18 @@
     <paper id="125">
       <title>A Global Relaxation Labeling Approach to Coreference Resolution</title>
       <author><first>Emili</first><last>Sapena</last></author>
-      <author><first>Lluís</first><last>Padró</last></author>
-      <author><first>Jordi</first><last>Turmo</last></author>
+      <author id="lluis-padro"><first>Lluís</first><last>Padró</last></author>
+      <author id="jordi-turmo"><first>Jordi</first><last>Turmo</last></author>
       <pages>1086–1094</pages>
       <url hash="28056590">C10-2125</url>
       <bibkey>sapena-etal-2010-global</bibkey>
     </paper>
     <paper id="126">
       <title>“Expresses-an-opinion-about”: using corpus statistics in an information extraction approach to opinion mining</title>
-      <author><first>Asad B.</first><last>Sayeed</last></author>
+      <author id="asad-sayeed"><first>Asad B.</first><last>Sayeed</last></author>
       <author><first>Hieu C.</first><last>Nguyen</last></author>
       <author><first>Timothy J.</first><last>Meyer</last></author>
-      <author><first>Amy</first><last>Weinberg</last></author>
+      <author id="amy-weinberg"><first>Amy</first><last>Weinberg</last></author>
       <pages>1095–1103</pages>
       <url hash="68fc8870">C10-2126</url>
       <bibkey>sayeed-etal-2010-expresses</bibkey>
@@ -2586,7 +2586,7 @@
       <author><first>Christian</first><last>Scheible</last></author>
       <author><first>Florian</first><last>Laws</last></author>
       <author><first>Lukas</first><last>Michelbacher</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>1104–1112</pages>
       <url hash="59d81e97">C10-2127</url>
       <bibkey>scheible-etal-2010-sentiment</bibkey>
@@ -2602,7 +2602,7 @@
       <title>Informed ways of improving data-driven dependency parsing for <fixed-case>G</fixed-case>erman</title>
       <author><first>Wolfgang</first><last>Seeker</last></author>
       <author><first>Bernd</first><last>Bohnet</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <author><first>Jonas</first><last>Kuhn</last></author>
       <pages>1122–1130</pages>
       <url hash="49de4198">C10-2129</url>
@@ -2632,7 +2632,7 @@
     <paper id="132">
       <title>Towards Automatic Building of Document Keywords</title>
       <author><first>Joaquim</first><last>Silva</last></author>
-      <author><first>Gabriel</first><last>Lopes</last></author>
+      <author id="gabriel-lopes"><first>Gabriel</first><last>Lopes</last></author>
       <pages>1149–1157</pages>
       <url hash="6ee5237a">C10-2132</url>
       <bibkey>silva-lopes-2010-towards</bibkey>
@@ -2641,7 +2641,7 @@
       <title>Shallow Information Extraction from Medical Forum Data</title>
       <author><first>Parikshit</first><last>Sondhi</last></author>
       <author><first>Manish</first><last>Gupta</last></author>
-      <author><first>ChengXiang</first><last>Zhai</last></author>
+      <author id="chengxiang-zhai"><first>ChengXiang</first><last>Zhai</last></author>
       <author><first>Julia</first><last>Hockenmaier</last></author>
       <pages>1158–1166</pages>
       <url hash="3a57d8e5">C10-2133</url>
@@ -2670,7 +2670,7 @@
       <author id="yang-liu-ict"><first>Yang</first><last>Liu</last></author>
       <author><first>Haitao</first><last>Mi</last></author>
       <author><first>Hongmei</first><last>Zhao</last></author>
-      <author><first>Yajuan</first><last>Lv</last></author>
+      <author id="yajuan-lu"><first>Yajuan</first><last>Lv</last></author>
       <author><first>Qun</first><last>Liu</last></author>
       <pages>1185–1193</pages>
       <url hash="2364733e">C10-2136</url>
@@ -2679,7 +2679,7 @@
     <paper id="137">
       <title>Semi-supervised Semantic Pattern Discovery with Guidance from Unsupervised Pattern Clusters</title>
       <author><first>Ang</first><last>Sun</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <pages>1194–1202</pages>
       <url hash="1550fe8d">C10-2137</url>
       <bibkey>sun-grishman-2010-semi</bibkey>
@@ -2688,16 +2688,16 @@
       <title>Utilizing Variability of Time and Term Content, within and across Users in Session Detection</title>
       <author><first>Shuqi</first><last>Sun</last></author>
       <author><first>Sheng</first><last>Li</last></author>
-      <author><first>Muyun</first><last>Yang</last></author>
-      <author><first>Haoliang</first><last>Qi</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="muyun-yang"><first>Muyun</first><last>Yang</last></author>
+      <author id="haoliang-qi"><first>Haoliang</first><last>Qi</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <pages>1203–1210</pages>
       <url hash="ceadc172">C10-2138</url>
       <bibkey>sun-etal-2010-utilizing</bibkey>
     </paper>
     <paper id="139">
       <title>Word-based and Character-based Word Segmentation Models: Comparison and Combination</title>
-      <author><first>Weiwei</first><last>Sun</last></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last></author>
       <pages>1211–1219</pages>
       <url hash="b07372f5">C10-2139</url>
       <bibkey>sun-2010-word</bibkey>
@@ -2705,8 +2705,8 @@
     <paper id="140">
       <title>Confidence Measures for Error Discrimination in an Interactive Predictive Parsing Framework</title>
       <author><first>Ricardo</first><last>Sánchez-Sáez</last></author>
-      <author><first>Joan Andreu</first><last>Sánchez</last></author>
-      <author><first>José Miguel</first><last>Benedí</last></author>
+      <author id="joan-andreu-sanchez"><first>Joan Andreu</first><last>Sánchez</last></author>
+      <author id="jose-miguel-benedi"><first>José Miguel</first><last>Benedí</last></author>
       <pages>1220–1228</pages>
       <url hash="d73fbaeb">C10-2140</url>
       <bibkey>sanchez-saez-etal-2010-confidence</bibkey>
@@ -2714,7 +2714,7 @@
     <paper id="141">
       <title>Learning Web Query Patterns for Imitating <fixed-case>W</fixed-case>ikipedia Articles</title>
       <author><first>Shohei</first><last>Tanaka</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Mitsuru</first><last>Ishizuka</last></author>
       <pages>1229–1237</pages>
       <url hash="859c5830">C10-2141</url>
@@ -2769,7 +2769,7 @@
       <title><fixed-case>U</fixed-case>rdu and <fixed-case>H</fixed-case>indi: Translation and sharing of linguistic resources</title>
       <author><first>Karthik</first><last>Visweswariah</last></author>
       <author><first>Vijil</first><last>Chenthamarakshan</last></author>
-      <author><first>Nandakishore</first><last>Kambhatla</last></author>
+      <author id="nanda-kambhatla"><first>Nandakishore</first><last>Kambhatla</last></author>
       <pages>1283–1291</pages>
       <url hash="ce1ee279">C10-2147</url>
       <bibkey>visweswariah-etal-2010-urdu</bibkey>
@@ -2777,7 +2777,7 @@
     <paper id="148">
       <title>Phrase Structure Parsing with Dependency Structure</title>
       <author><first>Zhiguo</first><last>Wang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>1292–1300</pages>
       <url hash="0b98dd95">C10-2148</url>
       <bibkey>wang-zong-2010-phrase</bibkey>
@@ -2785,16 +2785,16 @@
     <paper id="149">
       <title>Automatic Generation of Semantic Fields for Annotating Web Images</title>
       <author><first>Gang</first><last>Wang</last></author>
-      <author><first>Tat Seng</first><last>Chua</last></author>
+      <author id="tat-seng-chua"><first>Tat Seng</first><last>Chua</last></author>
       <author><first>Chong-Wah</first><last>Ngo</last></author>
-      <author><first>Yong Cheng</first><last>Wang</last></author>
+      <author id="yongcheng-wang"><first>Yong Cheng</first><last>Wang</last></author>
       <pages>1301–1309</pages>
       <url hash="95ab24f0">C10-2149</url>
       <bibkey>wang-etal-2010-automatic-generation</bibkey>
     </paper>
     <paper id="150">
       <title>Automatic Extraction of Cue Phrases for Cross-Corpus Dialogue Act Classification</title>
-      <author><first>Nick</first><last>Webb</last></author>
+      <author id="nick-webb"><first>Nick</first><last>Webb</last></author>
       <author><first>Michael</first><last>Ferguson</last></author>
       <pages>1310–1317</pages>
       <url hash="92b7760f">C10-2150</url>
@@ -2816,7 +2816,7 @@
       <title><fixed-case>MIEA</fixed-case>: a Mutual Iterative Enhancement Approach for Cross-Domain Sentiment Classification</title>
       <author><first>Qiong</first><last>Wu</last></author>
       <author><first>Songbo</first><last>Tan</last></author>
-      <author><first>Xueqi</first><last>Cheng</last></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last></author>
       <author><first>Miyi</first><last>Duan</last></author>
       <pages>1327–1335</pages>
       <url hash="3284ae83">C10-2152</url>
@@ -2825,7 +2825,7 @@
     <paper id="153">
       <title>Exploring the Use of Word Relation Features for Sentiment Classification</title>
       <author><first>Rui</first><last>Xia</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>1336–1344</pages>
       <url hash="9c2f9cea">C10-2153</url>
       <bibkey>xia-zong-2010-exploring</bibkey>
@@ -2904,14 +2904,14 @@
       <author><first>Yusuke</first><last>Miyao</last></author>
       <author><first>Xiangli</first><last>Wang</last></author>
       <author><first>Takuya</first><last>Matsuzaki</last></author>
-      <author><first>Junichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Junichi</first><last>Tsujii</last></author>
       <pages>1417–1425</pages>
       <url hash="d8f2a57c">C10-2162</url>
       <bibkey>yu-etal-2010-semi</bibkey>
     </paper>
     <paper id="163">
       <title>Cross-Lingual Induction for Deep Broad-Coverage Syntax: A Case Study on <fixed-case>G</fixed-case>erman Participles</title>
-      <author><first>Sina</first><last>Zarrieß</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
       <author><first>Aoife</first><last>Cahill</last></author>
       <author><first>Jonas</first><last>Kuhn</last></author>
       <author><first>Christian</first><last>Rohrer</last></author>
@@ -2963,10 +2963,10 @@
     <paper id="168">
       <title>Chart Pruning for Fast Lexicalised-Grammar Parsing</title>
       <author><first>Yue</first><last>Zhang</last></author>
-      <author><first>Byung-Gyu</first><last>Ahn</last></author>
+      <author id="byung-gyu-ahn"><first>Byung-Gyu</first><last>Ahn</last></author>
       <author><first>Stephen</first><last>Clark</last></author>
       <author><first>Curt</first><last>Van Wyk</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <author><first>Laura</first><last>Rimell</last></author>
       <pages>1471–1479</pages>
       <url hash="487bce99">C10-2168</url>
@@ -2983,7 +2983,7 @@
       <title>Sentence Ordering with Event-Enriched Semantics and Two-Layered Clustering for Multi-Document News Summarization</title>
       <author><first>Renxian</first><last>Zhang</last></author>
       <author><first>Wenjie</first><last>Li</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <pages>1489–1497</pages>
       <url hash="9b8bac76">C10-2170</url>
       <bibkey>zhang-etal-2010-sentence</bibkey>
@@ -2999,12 +2999,12 @@
     </paper>
     <paper id="172">
       <title>Predicting Discourse Connectives for Implicit Discourse Relation Recognition</title>
-      <author><first>Zhi-Min</first><last>Zhou</last></author>
+      <author id="zhi-min-zhou"><first>Zhi-Min</first><last>Zhou</last></author>
       <author><first>Yu</first><last>Xu</last></author>
-      <author><first>Zheng-Yu</first><last>Niu</last></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="zheng-yu-niu"><first>Zheng-Yu</first><last>Niu</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <author><first>Jian</first><last>Su</last></author>
-      <author><first>Chew Lim</first><last>Tan</last></author>
+      <author id="chew-lim-tan"><first>Chew Lim</first><last>Tan</last></author>
       <pages>1507–1514</pages>
       <url hash="97566a88">C10-2172</url>
       <bibkey>zhou-etal-2010-predicting</bibkey>
@@ -3021,7 +3021,7 @@
     <paper id="174">
       <title>Dual-Space Re-ranking Model for Document Retrieval</title>
       <author><first>Dong</first><last>Zhou</last></author>
-      <author><first>Seamus</first><last>Lawless</last></author>
+      <author id="seamus-lawless"><first>Seamus</first><last>Lawless</last></author>
       <author><first>Jinming</first><last>Min</last></author>
       <author><first>Vincent</first><last>Wade</last></author>
       <pages>1524–1532</pages>
@@ -3031,10 +3031,10 @@
     <paper id="175">
       <title>All in Strings: a Powerful String-based Automatic <fixed-case>MT</fixed-case> Evaluation Metric with Multiple Granularities</title>
       <author><first>Junguo</first><last>Zhu</last></author>
-      <author><first>Muyun</first><last>Yang</last></author>
+      <author id="muyun-yang"><first>Muyun</first><last>Yang</last></author>
       <author><first>Bo</first><last>Wang</last></author>
       <author><first>Sheng</first><last>Li</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <pages>1533–1540</pages>
       <url hash="393089e0">C10-2175</url>
       <bibkey>zhu-etal-2010-strings</bibkey>
@@ -3096,7 +3096,7 @@
     <paper id="2">
       <title><fixed-case>P</fixed-case>y<fixed-case>CWN</fixed-case>: a Python Module for <fixed-case>C</fixed-case>hinese <fixed-case>W</fixed-case>ordnet</title>
       <author><first>Yueh-Cheng</first><last>Wu</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <pages>5–8</pages>
       <url hash="11d17469">C10-3002</url>
       <bibkey>wu-hsieh-2010-pycwn</bibkey>
@@ -3105,7 +3105,7 @@
       <title>Annotation Tool for Discourse in <fixed-case>PDT</fixed-case></title>
       <author><first>Jiří</first><last>Mírovský</last></author>
       <author><first>Lucie</first><last>Mladová</last></author>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
       <pages>9–12</pages>
       <url hash="21f4462f">C10-3003</url>
       <bibkey>mirovsky-etal-2010-annotation-tool</bibkey>
@@ -3121,7 +3121,7 @@
     </paper>
     <paper id="5">
       <title><fixed-case>H</fixed-case>ave2eat: a Restaurant Finder with Review Summarization for Mobile Phones</title>
-      <author><first>Giuseppe</first><last>Fabbrizio</last></author>
+      <author id="giuseppe-di-fabbrizio"><first>Giuseppe</first><last>Fabbrizio</last></author>
       <author><first>Narendra</first><last>Gupta</last></author>
       <author><first>Sveva</first><last>Besana</last></author>
       <author><first>Premkumar</first><last>Mani</last></author>
@@ -3168,7 +3168,7 @@
     </paper>
     <paper id="10">
       <title><fixed-case>P</fixed-case>an<fixed-case>L</fixed-case>ex and <fixed-case>LEXTRACT</fixed-case>: Translating all Words of all Languages of the World</title>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Jonathan</first><last>Pool</last></author>
       <author><first>Susan</first><last>Colowick</last></author>
       <pages>37–40</pages>
@@ -3187,8 +3187,8 @@
       <author><first>Wei-Te</first><last>Chen</last></author>
       <author><first>Su-Chu</first><last>Lin</last></author>
       <author><first>Shu-Ling</first><last>Huang</last></author>
-      <author><first>You-Shan</first><last>Chung</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="you-shan-chung"><first>You-Shan</first><last>Chung</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <pages>45–48</pages>
       <url hash="e919edbd">C10-3012</url>
       <bibkey>chen-etal-2010-e</bibkey>
@@ -3214,7 +3214,7 @@
       <title>Multiword Expressions in the wild? The mwetoolkit comes in handy</title>
       <author><first>Carlos</first><last>Ramisch</last></author>
       <author><first>Aline</first><last>Villavicencio</last></author>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <pages>57–60</pages>
       <url hash="72488d54">C10-3015</url>
       <bibkey>ramisch-etal-2010-multiword</bibkey>
diff --git a/data/xml/C12.xml b/data/xml/C12.xml
index 8c0ddfc2de..d1b5f725c8 100644
--- a/data/xml/C12.xml
+++ b/data/xml/C12.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of <fixed-case>COLING</fixed-case> 2012</booktitle>
       <url hash="82ada411">C12-1</url>
       <editor><first>Martin</first><last>Kay</last></editor>
-      <editor><first>Christian</first><last>Boitet</last></editor>
+      <editor id="christian-boitet"><first>Christian</first><last>Boitet</last></editor>
       <publisher>The COLING 2012 Organizing Committee</publisher>
       <address>Mumbai, India</address>
       <month>December</month>
@@ -20,7 +20,7 @@
       <title>Multi-Dimensional Feature Merger for Question Answering</title>
       <author><first>Apoorv</first><last>Agarwal</last></author>
       <author><first>J. William</first><last>Murdock</last></author>
-      <author><first>Jennifer</first><last>Chu-Carroll</last></author>
+      <author id="jennifer-chu-carroll"><first>Jennifer</first><last>Chu-Carroll</last></author>
       <author><first>Adam</first><last>Lally</last></author>
       <author><first>Aditya</first><last>Kalyanpur</last></author>
       <pages>1–16</pages>
@@ -40,8 +40,8 @@
     </paper>
     <paper id="3">
       <title>Automatic Detection of Point of View Differences in <fixed-case>W</fixed-case>ikipedia</title>
-      <author><first>Khalid</first><last>Al Khatib</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="khalid-al-khatib"><first>Khalid</first><last>Al Khatib</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <author><first>Cathleen</first><last>Kantner</last></author>
       <pages>33–50</pages>
       <url hash="14517a8b">C12-1003</url>
@@ -50,14 +50,14 @@
     <paper id="4">
       <title><fixed-case>S</fixed-case>peed<fixed-case>R</fixed-case>ead: A Fast Named Entity Recognition Pipeline</title>
       <author><first>Rami</first><last>Al-Rfou’</last></author>
-      <author><first>Steven</first><last>Skiena</last></author>
+      <author id="steven-skiena"><first>Steven</first><last>Skiena</last></author>
       <pages>51–66</pages>
       <url hash="82cec8cd">C12-1004</url>
       <bibkey>al-rfou-skiena-2012-speedread</bibkey>
     </paper>
     <paper id="5">
       <title>Experiments with Term Translation</title>
-      <author><first>Mihael</first><last>Arcan</last></author>
+      <author id="mihael-arcan"><first>Mihael</first><last>Arcan</last></author>
       <author><first>Christian</first><last>Federmann</last></author>
       <author><first>Paul</first><last>Buitelaar</last></author>
       <pages>67–82</pages>
@@ -69,7 +69,7 @@
       <author><first>Mohammed</first><last>Attia</last></author>
       <author><first>Younes</first><last>Samih</last></author>
       <author><first>Khaled</first><last>Shaalan</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>83–96</pages>
       <url hash="e3d12097">C12-1006</url>
       <bibkey>attia-etal-2012-floating</bibkey>
@@ -77,8 +77,8 @@
     <paper id="7">
       <title>Contribution of Complex Lexical Information to Solve Syntactic Ambiguity in <fixed-case>B</fixed-case>asque</title>
       <author><first>Aitziber</first><last>Atutxa</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>Kepa</first><last>Sarasola</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
+      <author id="kepa-sarasola"><first>Kepa</first><last>Sarasola</last></author>
       <pages>97–114</pages>
       <url hash="c925a415">C12-1007</url>
       <bibkey>atutxa-etal-2012-contribution</bibkey>
@@ -103,10 +103,10 @@
     <paper id="10">
       <title>Translation Quality-Based Supplementary Data Selection by Incremental Update of Translation Models</title>
       <author><first>Pratyush</first><last>Banerjee</last></author>
-      <author><first>Sudip Kumar</first><last>Naskar</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last></author>
       <author><first>Johann</first><last>Roturier</last></author>
       <author><first>Andy</first><last>Way</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>149–166</pages>
       <url hash="50da84ca">C12-1010</url>
       <bibkey>banerjee-etal-2012-translation</bibkey>
@@ -133,18 +133,18 @@
       <author><first>Luciano</first><last>Barbosa</last></author>
       <author><first>Vivek Kumar</first><last>Rangarajan Sridhar</last></author>
       <author><first>Mahsa</first><last>Yarmohammadi</last></author>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
       <pages>201–214</pages>
       <url hash="2df38664">C12-1013</url>
       <bibkey>barbosa-etal-2012-harvesting</bibkey>
     </paper>
     <paper id="14">
       <title>An Evaluation of Statistical Post-Editing Systems Applied to <fixed-case>RBMT</fixed-case> and <fixed-case>SMT</fixed-case> Systems</title>
-      <author><first>Hanna</first><last>Béchara</last></author>
-      <author><first>Raphaël</first><last>Rubino</last></author>
+      <author id="hannah-bechara"><first>Hanna</first><last>Béchara</last></author>
+      <author id="raphael-rubino"><first>Raphaël</first><last>Rubino</last></author>
       <author><first>Yifan</first><last>He</last></author>
       <author><first>Yanjun</first><last>Ma</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>215–230</pages>
       <url hash="99309b5b">C12-1014</url>
       <bibkey>bechara-etal-2012-evaluation</bibkey>
@@ -152,19 +152,19 @@
     <paper id="15">
       <title><fixed-case>P</fixed-case>rague <fixed-case>D</fixed-case>ependency <fixed-case>T</fixed-case>reebank 2.5 – a Revisited Version of <fixed-case>PDT</fixed-case> 2.0</title>
       <author><first>Eduard</first><last>Bejček</last></author>
-      <author><first>Jarmila</first><last>Panevová</last></author>
+      <author id="jarmila-panevova"><first>Jarmila</first><last>Panevová</last></author>
       <author><first>Jan</first><last>Popelka</last></author>
       <author><first>Pavel</first><last>Straňák</last></author>
       <author><first>Magda</first><last>Ševčíková</last></author>
       <author><first>Jan</first><last>Štěpánek</last></author>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
       <pages>231–246</pages>
       <url hash="f40bc527">C12-1015</url>
       <bibkey>bejcek-etal-2012-prague</bibkey>
     </paper>
     <paper id="16">
       <title>Deriving a Lexicon for a Precision Grammar from Language Documentation Resources: A Case Study of <fixed-case>C</fixed-case>hintang</title>
-      <author><first>Emily M.</first><last>Bender</last></author>
+      <author id="emily-m-bender"><first>Emily M.</first><last>Bender</last></author>
       <author><first>Robert</first><last>Schikowski</last></author>
       <author><first>Balthasar</first><last>Bickel</last></author>
       <pages>247–262</pages>
@@ -173,7 +173,7 @@
     </paper>
     <paper id="17">
       <title>Quantifying Semantics using Complex Network Analysis</title>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <author><first>Stefanie</first><last>Roos</last></author>
       <author><first>Karsten</first><last>Weihe</last></author>
       <pages>263–278</pages>
@@ -183,7 +183,7 @@
     <paper id="18">
       <title>Improvements to Training an <fixed-case>RNN</fixed-case> parser</title>
       <author><first>Richard</first><last>Billingsley</last></author>
-      <author><first>James</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James</first><last>Curran</last></author>
       <pages>279–294</pages>
       <url hash="d2ae0538">C12-1018</url>
       <bibkey>billingsley-curran-2012-improvements</bibkey>
@@ -205,17 +205,17 @@
       <author><first>Hien</first><last>Nguyen</last></author>
       <author><first>Nirwan</first><last>Sharma</last></author>
       <author><first>Anne-Marie</first><last>Robinson</last></author>
-      <author><first>Elaine</first><last>O’Mahony</last></author>
+      <author id="elaine-omahony"><first>Elaine</first><last>O’Mahony</last></author>
       <author><first>Ben</first><last>Darvill</last></author>
-      <author><first>Chris</first><last>Mellish</last></author>
-      <author><first>René</first><last>van der Wal</last></author>
+      <author id="chris-mellish"><first>Chris</first><last>Mellish</last></author>
+      <author id="rene-van-der-wal"><first>René</first><last>van der Wal</last></author>
       <pages>311–324</pages>
       <url hash="8406b412">C12-1020</url>
       <bibkey>blake-etal-2012-natural</bibkey>
     </paper>
     <paper id="21">
       <title>Studying the Effect of Input Size for <fixed-case>B</fixed-case>ayesian Word Segmentation on the <fixed-case>P</fixed-case>rovidence Corpus</title>
-      <author><first>Benjamin</first><last>Börschinger</last></author>
+      <author id="benjamin-borschinger"><first>Benjamin</first><last>Börschinger</last></author>
       <author><first>Katherine</first><last>Demuth</last></author>
       <author><first>Mark</first><last>Johnson</last></author>
       <pages>325–340</pages>
@@ -225,8 +225,8 @@
     <paper id="22">
       <title><fixed-case>B</fixed-case>ayesian Language Modelling of <fixed-case>G</fixed-case>erman Compounds</title>
       <author><first>Jan A.</first><last>Botha</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
       <pages>341–356</pages>
       <url hash="efe161b3">C12-1022</url>
       <bibkey>botha-etal-2012-bayesian</bibkey>
@@ -235,7 +235,7 @@
       <title>Can <fixed-case>S</fixed-case>panish Be Simpler? <fixed-case>L</fixed-case>ex<fixed-case>S</fixed-case>i<fixed-case>S</fixed-case>: Lexical Simplification for <fixed-case>S</fixed-case>panish</title>
       <author><first>Stefan</first><last>Bott</last></author>
       <author><first>Luz</first><last>Rello</last></author>
-      <author><first>Biljana</first><last>Drndarevic</last></author>
+      <author id="biljana-drndarevic"><first>Biljana</first><last>Drndarevic</last></author>
       <author><first>Horacio</first><last>Saggion</last></author>
       <pages>357–374</pages>
       <url hash="ff470518">C12-1023</url>
@@ -262,9 +262,9 @@
       <title>Identifying <fixed-case>U</fixed-case>rdu Complex Predication via Bigram Extraction</title>
       <author><first>Miriam</first><last>Butt</last></author>
       <author><first>Tina</first><last>Bögel</last></author>
-      <author><first>Annette</first><last>Hautli</last></author>
+      <author id="annette-hautli"><first>Annette</first><last>Hautli</last></author>
       <author><first>Sebastian</first><last>Sulger</last></author>
-      <author><first>Tafseer</first><last>Ahmed</last></author>
+      <author id="tafseer-ahmed"><first>Tafseer</first><last>Ahmed</last></author>
       <pages>409–424</pages>
       <url hash="a749049e">C12-1026</url>
       <bibkey>butt-etal-2012-identifying</bibkey>
@@ -272,7 +272,7 @@
     <paper id="27">
       <title>Native Language Identification using Recurring <tex-math>n</tex-math>-grams – Investigating Abstraction and Domain Dependence</title>
       <author><first>Serhiy</first><last>Bykh</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <pages>425–440</pages>
       <url hash="2681d314">C12-1027</url>
       <bibkey>bykh-meurers-2012-native</bibkey>
@@ -281,7 +281,7 @@
       <title>Analysis and Enhancement of Wikification for Microblogs with Context Expansion</title>
       <author><first>Taylor</first><last>Cassidy</last></author>
       <author><first>Heng</first><last>Ji</last></author>
-      <author><first>Lev-Arie</first><last>Ratinov</last></author>
+      <author id="lev-ratinov"><first>Lev-Arie</first><last>Ratinov</last></author>
       <author><first>Arkaitz</first><last>Zubiaga</last></author>
       <author><first>Hongzhao</first><last>Huang</last></author>
       <pages>441–456</pages>
@@ -291,7 +291,7 @@
     <paper id="29">
       <title>On the Effectiveness of using Sentence Compression Models for Query-Focused Multi-Document Summarization</title>
       <author><first>Yllias</first><last>Chali</last></author>
-      <author><first>Sadid A.</first><last>Hasan</last></author>
+      <author id="sadid-a-hasan"><first>Sadid A.</first><last>Hasan</last></author>
       <pages>457–474</pages>
       <url hash="0e8ce9ba">C12-1029</url>
       <bibkey>chali-hasan-2012-effectiveness</bibkey>
@@ -299,14 +299,14 @@
     <paper id="30">
       <title>Towards Automatic Topical Question Generation</title>
       <author><first>Yllias</first><last>Chali</last></author>
-      <author><first>Sadid A.</first><last>Hasan</last></author>
+      <author id="sadid-a-hasan"><first>Sadid A.</first><last>Hasan</last></author>
       <pages>475–492</pages>
       <url hash="73017474">C12-1030</url>
       <bibkey>chali-hasan-2012-towards</bibkey>
     </paper>
     <paper id="31">
       <title>Adjective Deletion for Linguistic Steganography and Secret Sharing</title>
-      <author><first>Ching-Yun</first><last>Chang</last></author>
+      <author id="ching-yun-chang"><first>Ching-Yun</first><last>Chang</last></author>
       <author><first>Stephen</first><last>Clark</last></author>
       <pages>493–510</pages>
       <url hash="3cd5b844">C12-1031</url>
@@ -314,7 +314,7 @@
     </paper>
     <paper id="32">
       <title>The Secret’s in the Word Order: Text-to-Text Generation for Linguistic Steganography</title>
-      <author><first>Ching-Yun</first><last>Chang</last></author>
+      <author id="ching-yun-chang"><first>Ching-Yun</first><last>Chang</last></author>
       <author><first>Stephen</first><last>Clark</last></author>
       <pages>511–528</pages>
       <url hash="663fb489">C12-1032</url>
@@ -345,7 +345,7 @@
       <author><first>Liqiang</first><last>Nie</last></author>
       <author><first>Xia</first><last>Hu</last></author>
       <author><first>Xiangyu</first><last>Wang</last></author>
-      <author><first>Tat-Seng</first><last>Chua</last></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last></author>
       <author><first>Xiaoming</first><last>Zhang</last></author>
       <pages>561–576</pages>
       <url hash="6eedf432">C12-1035</url>
@@ -362,17 +362,17 @@
     <paper id="37">
       <title>Extraction of <fixed-case>R</fixed-case>ussian Sentiment Lexicon for Product Meta-Domain</title>
       <author><first>Ilia</first><last>Chetviorkin</last></author>
-      <author><first>Natalia</first><last>Loukachevitch</last></author>
+      <author id="natalia-loukachevitch"><first>Natalia</first><last>Loukachevitch</last></author>
       <pages>593–610</pages>
       <url hash="c0e7dc55">C12-1037</url>
       <bibkey>chetviorkin-loukachevitch-2012-extraction</bibkey>
     </paper>
     <paper id="38">
       <title>Problems in Evaluating Grammatical Error Detection Systems</title>
-      <author><first>Martin</first><last>Chodorow</last></author>
+      <author id="martin-chodorow"><first>Martin</first><last>Chodorow</last></author>
       <author><first>Markus</first><last>Dickinson</last></author>
       <author><first>Ross</first><last>Israel</last></author>
-      <author><first>Joel</first><last>Tetreault</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
       <pages>611–628</pages>
       <url hash="81663569">C12-1038</url>
       <bibkey>chodorow-etal-2012-problems</bibkey>
@@ -387,12 +387,12 @@
     <paper id="40">
       <title>A Hybrid Approach to Finding Phenotype Candidates in Genetic Texts</title>
       <author><first>Nigel</first><last>Collier</last></author>
-      <author><first>Mai-Vu</first><last>Tran</last></author>
-      <author><first>Hoang-Quynh</first><last>Le</last></author>
+      <author id="mai-vu-tran"><first>Mai-Vu</first><last>Tran</last></author>
+      <author id="hoang-quynh-le"><first>Hoang-Quynh</first><last>Le</last></author>
       <author><first>Anika</first><last>Oellrich</last></author>
       <author><first>Ai</first><last>Kawazoe</last></author>
       <author><first>Martin</first><last>Hall-May</last></author>
-      <author><first>Dietrich</first><last>Rebholz-Schuhmann</last></author>
+      <author id="dietrich-rebholz-schuhmann"><first>Dietrich</first><last>Rebholz-Schuhmann</last></author>
       <pages>647–662</pages>
       <url hash="b9569028">C12-1040</url>
       <bibkey>collier-etal-2012-hybrid</bibkey>
@@ -411,7 +411,7 @@
       <author><first>Bob</first><last>Coyne</last></author>
       <author><first>Alex</first><last>Klapheke</last></author>
       <author><first>Masoud</first><last>Rouhizadeh</last></author>
-      <author><first>Richard</first><last>Sproat</last></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last></author>
       <author><first>Daniel</first><last>Bauer</last></author>
       <pages>679–694</pages>
       <url hash="7725f9da">C12-1042</url>
@@ -435,7 +435,7 @@
     </paper>
     <paper id="45">
       <title>A Computational Cognitive Model for Semantic Sub-Network Extraction from Natural Language Queries</title>
-      <author><first>Suman</first><last>Deb Roy</last></author>
+      <author id="deb-roy"><first>Suman</first><last>Deb Roy</last></author>
       <author><first>Wenjun</first><last>Zeng</last></author>
       <pages>727–744</pages>
       <url hash="03b7ba55">C12-1045</url>
@@ -444,7 +444,7 @@
     <paper id="46">
       <title>Extraction of Domain-Specific Bilingual Lexicon from Comparable Corpora: Compositional Translation and Ranking</title>
       <author><first>Estelle</first><last>Delpech</last></author>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <author><first>Emmanuel</first><last>Morin</last></author>
       <author><first>Claire</first><last>Lemaire</last></author>
       <pages>745–762</pages>
@@ -457,7 +457,7 @@
       <author><first>Zhumin</first><last>Chen</last></author>
       <author><first>Furu</first><last>Wei</last></author>
       <author><first>Ming</first><last>Zhou</last></author>
-      <author><first>Heung-Yeung</first><last>Shum</last></author>
+      <author id="heung-yeung-shum"><first>Heung-Yeung</first><last>Shum</last></author>
       <pages>763–780</pages>
       <url hash="93e55a74">C12-1047</url>
       <bibkey>duan-etal-2012-twitter</bibkey>
@@ -497,7 +497,7 @@
     </paper>
     <paper id="52">
       <title>Stacking of Dependency and Phrase Structure Parsers</title>
-      <author><first>Richárd</first><last>Farkas</last></author>
+      <author id="richard-farkas"><first>Richárd</first><last>Farkas</last></author>
       <author><first>Bernd</first><last>Bohnet</last></author>
       <pages>849–866</pages>
       <url hash="1d419e77">C12-1052</url>
@@ -506,8 +506,8 @@
     <paper id="53">
       <title>Semantic Cohesion Model for Phrase-Based <fixed-case>SMT</fixed-case></title>
       <author><first>Minwei</first><last>Feng</last></author>
-      <author><first>Weiwei</first><last>Sun</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>867–878</pages>
       <url hash="9d562f8a">C12-1053</url>
       <bibkey>feng-etal-2012-semantic</bibkey>
@@ -515,10 +515,10 @@
     <paper id="54">
       <title>Comparing Taxonomies for Organising Collections of Documents</title>
       <author><first>Samuel</first><last>Fernando</last></author>
-      <author><first>Mark</first><last>Hall</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>Aitor</first><last>Soroa</last></author>
-      <author><first>Paul</first><last>Clough</last></author>
+      <author id="mark-hall"><first>Mark</first><last>Hall</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
+      <author id="aitor-soroa"><first>Aitor</first><last>Soroa</last></author>
+      <author id="paul-clough"><first>Paul</first><last>Clough</last></author>
       <author><first>Mark</first><last>Stevenson</last></author>
       <pages>879–894</pages>
       <url hash="3a672370">C12-1054</url>
@@ -527,15 +527,15 @@
     <paper id="55">
       <title>Modeling the Complexity of Manual Annotation Tasks: a Grid of Analysis</title>
       <author><first>Karën</first><last>Fort</last></author>
-      <author><first>Adeline</first><last>Nazarenko</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="adeline-nazarenko"><first>Adeline</first><last>Nazarenko</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <pages>895–910</pages>
       <url hash="f342fbca">C12-1055</url>
       <bibkey>fort-etal-2012-modeling</bibkey>
     </paper>
     <paper id="56">
       <title>Extractive Multi-Document Summarization with Integer Linear Programming and Support Vector Regression</title>
-      <author><first>Dimitrios</first><last>Galanis</last></author>
+      <author id="dimitrios-galanis"><first>Dimitrios</first><last>Galanis</last></author>
       <author><first>Gerasimos</first><last>Lampouras</last></author>
       <author><first>Ion</first><last>Androutsopoulos</last></author>
       <pages>911–926</pages>
@@ -553,8 +553,8 @@
     </paper>
     <paper id="58">
       <title>Structured Term Recognition in Medical Text</title>
-      <author><first>Michael</first><last>Glass</last></author>
-      <author><first>Alfio</first><last>Gliozzo</last></author>
+      <author id="michael-glass"><first>Michael</first><last>Glass</last></author>
+      <author id="alfio-gliozzo"><first>Alfio</first><last>Gliozzo</last></author>
       <pages>943–958</pages>
       <url hash="6ec2622d">C12-1058</url>
       <bibkey>glass-gliozzo-2012-structured</bibkey>
@@ -597,8 +597,8 @@
     </paper>
     <paper id="63">
       <title>Understanding the Performance of Statistical <fixed-case>MT</fixed-case> Systems: A Linear Regression Framework</title>
-      <author><first>Francisco</first><last>Guzman</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzman</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>1029–1044</pages>
       <url hash="5caa4984">C12-1063</url>
       <bibkey>guzman-vogel-2012-understanding</bibkey>
@@ -607,7 +607,7 @@
       <title>Geolocation Prediction in Social Media Data by Finding Location Indicative Words</title>
       <author><first>Bo</first><last>Han</last></author>
       <author><first>Paul</first><last>Cook</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>1045–1062</pages>
       <url hash="f184f16f">C12-1064</url>
       <bibkey>han-etal-2012-geolocation</bibkey>
@@ -616,7 +616,7 @@
       <title>Readability Classification for <fixed-case>G</fixed-case>erman using Lexical, Syntactic, and Morphological Features</title>
       <author><first>Julia</first><last>Hancke</last></author>
       <author><first>Sowmya</first><last>Vajjala</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <pages>1063–1080</pages>
       <url hash="b79ccac7">C12-1065</url>
       <bibkey>hancke-etal-2012-readability</bibkey>
@@ -626,7 +626,7 @@
       <author><first>Kazuo</first><last>Hara</last></author>
       <author><first>Ikumi</first><last>Suzuki</last></author>
       <author><first>Masashi</first><last>Shimbo</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>1081–1096</pages>
       <url hash="78b7fed5">C12-1066</url>
       <bibkey>hara-etal-2012-walk</bibkey>
@@ -653,8 +653,8 @@
     <paper id="69">
       <title>Bridging the Gap between Intrinsic and Perceived Relevance in Snippet Generation</title>
       <author><first>Jing</first><last>He</last></author>
-      <author><first>Pablo</first><last>Duboue</last></author>
-      <author><first>Jian-Yun</first><last>Nie</last></author>
+      <author id="pablo-duboue"><first>Pablo</first><last>Duboue</last></author>
+      <author id="jian-yun-nie"><first>Jian-Yun</first><last>Nie</last></author>
       <pages>1129–1146</pages>
       <url hash="c34d67b8">C12-1069</url>
       <bibkey>he-etal-2012-bridging</bibkey>
@@ -682,7 +682,7 @@
       <title>Statistical Method of Building Dialect Language Models for <fixed-case>ASR</fixed-case> Systems</title>
       <author><first>Naoki</first><last>Hirayama</last></author>
       <author><first>Shinsuke</first><last>Mori</last></author>
-      <author><first>Hiroshi G.</first><last>Okuno</last></author>
+      <author id="hiroshi-g-okuno"><first>Hiroshi G.</first><last>Okuno</last></author>
       <pages>1179–1194</pages>
       <url hash="a3ae2153">C12-1072</url>
       <bibkey>hirayama-etal-2012-statistical</bibkey>
@@ -692,7 +692,7 @@
       <author><first>Martin</first><last>Holub</last></author>
       <author><first>Vincent</first><last>Kríž</last></author>
       <author><first>Silvie</first><last>Cinková</last></author>
-      <author><first>Eckhard</first><last>Bick</last></author>
+      <author id="eckhard-bick"><first>Eckhard</first><last>Bick</last></author>
       <pages>1195–1210</pages>
       <url hash="46e16dbf">C12-1073</url>
       <bibkey>holub-etal-2012-tailored</bibkey>
@@ -700,7 +700,7 @@
     <paper id="74">
       <title>Method Mention Extraction from Scientific Research Papers</title>
       <author><first>Hospice</first><last>Houngbo</last></author>
-      <author><first>Robert E.</first><last>Mercer</last></author>
+      <author id="robert-e-mercer"><first>Robert E.</first><last>Mercer</last></author>
       <pages>1211–1222</pages>
       <url hash="47b5ae0d">C12-1074</url>
       <bibkey>houngbo-mercer-2012-method</bibkey>
@@ -708,7 +708,7 @@
     <paper id="75">
       <title>Context-Enhanced Personalized Social Summarization</title>
       <author><first>Po</first><last>Hu</last></author>
-      <author><first>Donghong</first><last>Ji</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
       <author><first>Chong</first><last>Teng</last></author>
       <author><first>Yujing</first><last>Guo</last></author>
       <pages>1223–1238</pages>
@@ -727,7 +727,7 @@
       <author><first>Jiawei</first><last>Han</last></author>
       <author><first>Alice</first><last>Leung</last></author>
       <author><first>John</first><last>Hancock</last></author>
-      <author><first>Clare</first><last>Voss</last></author>
+      <author id="clare-voss"><first>Clare</first><last>Voss</last></author>
       <pages>1239–1256</pages>
       <url hash="431d1c89">C12-1076</url>
       <bibkey>huang-etal-2012-tweet</bibkey>
@@ -736,7 +736,7 @@
       <title>Improved <fixed-case>C</fixed-case>ombinatory <fixed-case>C</fixed-case>ategorial <fixed-case>G</fixed-case>rammar Induction with Boundary Words and <fixed-case>B</fixed-case>ayesian Inference</title>
       <author><first>Yun</first><last>Huang</last></author>
       <author><first>Min</first><last>Zhang</last></author>
-      <author><first>Chew-Lim</first><last>Tan</last></author>
+      <author id="chew-lim-tan"><first>Chew-Lim</first><last>Tan</last></author>
       <pages>1257–1274</pages>
       <url hash="36bb6598">C12-1077</url>
       <bibkey>huang-etal-2012-improved</bibkey>
@@ -754,7 +754,7 @@
       <author><first>Naoya</first><last>Inoue</last></author>
       <author><first>Ekaterina</first><last>Ovchinnikova</last></author>
       <author><first>Kentaro</first><last>Inui</last></author>
-      <author><first>Jerry</first><last>Hobbs</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry</first><last>Hobbs</last></author>
       <pages>1291–1308</pages>
       <url hash="18c7e5ea">C12-1079</url>
       <bibkey>inoue-etal-2012-coreference</bibkey>
@@ -779,14 +779,14 @@
     <paper id="82">
       <title>Towards a Generic and Flexible Citation Classifier Based on a Faceted Classification Scheme</title>
       <author><first>Charles</first><last>Jochim</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>1343–1358</pages>
       <url hash="96634dd4">C12-1082</url>
       <bibkey>jochim-schutze-2012-towards</bibkey>
     </paper>
     <paper id="83">
       <title>Semantics-Based Machine Translation with Hyperedge Replacement Grammars</title>
-      <author><first>Bevan</first><last>Jones</last></author>
+      <author id="bevan-jones"><first>Bevan</first><last>Jones</last></author>
       <author><first>Jacob</first><last>Andreas</last></author>
       <author><first>Daniel</first><last>Bauer</last></author>
       <author><first>Karl Moritz</first><last>Hermann</last></author>
@@ -806,9 +806,9 @@
     </paper>
     <paper id="85">
       <title>Improving Topic Classification for Highly Inflective Languages</title>
-      <author><first>Jurgita</first><last>Kapociute-Dzikiene</last></author>
+      <author id="jurgita-kapociute-dzikiene"><first>Jurgita</first><last>Kapociute-Dzikiene</last></author>
       <author><first>Frederik</first><last>Vaassen</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <author><first>Algis</first><last>Krupavičius</last></author>
       <pages>1393–1410</pages>
       <url hash="d705bdbf">C12-1085</url>
@@ -819,7 +819,7 @@
       <author><first>Hiroaki</first><last>Kawasaki</last></author>
       <author><first>Ryohei</first><last>Sasano</last></author>
       <author><first>Hiroya</first><last>Takamura</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>1411–1424</pages>
       <url hash="9b591b70">C12-1086</url>
       <bibkey>kawasaki-etal-2012-generating</bibkey>
@@ -827,7 +827,7 @@
     <paper id="87">
       <title>A Machine Learning Approach for Phenotype Name Recognition</title>
       <author><first>Maryam</first><last>Khordad</last></author>
-      <author><first>Robert E.</first><last>Mercer</last></author>
+      <author id="robert-e-mercer"><first>Robert E.</first><last>Mercer</last></author>
       <author><first>Peter</first><last>Rogan</last></author>
       <pages>1425–1440</pages>
       <url hash="1ae4fd7c">C12-1087</url>
@@ -835,10 +835,10 @@
     </paper>
     <paper id="88">
       <title>Improving <fixed-case>C</fixed-case>ombinatory <fixed-case>C</fixed-case>ategorial <fixed-case>G</fixed-case>rammar Parse Reranking with Dependency Grammar Features</title>
-      <author><first>Sunghwan Mac</first><last>Kim</last></author>
+      <author id="sunghwan-mac-kim"><first>Sunghwan Mac</first><last>Kim</last></author>
       <author><first>Dominick</first><last>Ng</last></author>
       <author><first>Mark</first><last>Johnson</last></author>
-      <author><first>James</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James</first><last>Curran</last></author>
       <pages>1441–1458</pages>
       <url hash="27fbf5f5">C12-1088</url>
       <bibkey>kim-etal-2012-improving</bibkey>
@@ -855,7 +855,7 @@
     <paper id="90">
       <title>Exploring Local and Global Semantic Information for Event Pronoun Resolution</title>
       <author><first>Fang</first><last>Kong</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>1475–1488</pages>
       <url hash="6545cfb0">C12-1090</url>
       <bibkey>kong-zhou-2012-exploring</bibkey>
@@ -864,8 +864,8 @@
       <title>Semantic Processing of Compounds in <fixed-case>I</fixed-case>ndian Languages</title>
       <author><first>Amba</first><last>Kulkarni</last></author>
       <author><first>Soma</first><last>Paul</last></author>
-      <author><first>Malhar</first><last>Kulkarni</last></author>
-      <author><first>Anil</first><last>Kumar</last></author>
+      <author id="malhar-kulkarni"><first>Malhar</first><last>Kulkarni</last></author>
+      <author id="anil-kumar-nelakanti"><first>Anil</first><last>Kumar</last></author>
       <author><first>Nitesh</first><last>Surtani</last></author>
       <pages>1489–1502</pages>
       <url hash="a8c47654">C12-1091</url>
@@ -874,8 +874,8 @@
     <paper id="92">
       <title>Unsupervised <fixed-case>J</fixed-case>apanese-<fixed-case>C</fixed-case>hinese Opinion Word Translation using Dependency Distance and Feature-Opinion Association Weight</title>
       <author><first>Guo-Hau</first><last>Lai</last></author>
-      <author><first>Ying-Mei</first><last>Guo</last></author>
-      <author><first>Richard Tzong-Han</first><last>Tsai</last></author>
+      <author id="ying-mei-guo"><first>Ying-Mei</first><last>Guo</last></author>
+      <author id="richard-tzong-han-tsai"><first>Richard Tzong-Han</first><last>Tsai</last></author>
       <pages>1503–1518</pages>
       <url hash="e9aba04e">C12-1092</url>
       <bibkey>lai-etal-2012-unsupervised</bibkey>
@@ -884,7 +884,7 @@
       <title>On-line Trend Analysis with Topic Models: #twitter Trends Detection Topic Model Online</title>
       <author><first>Jey Han</first><last>Lau</last></author>
       <author><first>Nigel</first><last>Collier</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>1519–1534</pages>
       <url hash="30e2b15f">C12-1093</url>
       <bibkey>lau-etal-2012-line</bibkey>
@@ -899,7 +899,7 @@
     </paper>
     <paper id="95">
       <title>Evaluating Different Methods for Automatically Collecting Large General Corpora for <fixed-case>B</fixed-case>asque from the Web</title>
-      <author><first>Igor</first><last>Leturia</last></author>
+      <author id="igor-leturia"><first>Igor</first><last>Leturia</last></author>
       <pages>1553–1570</pages>
       <url hash="91bf9def">C12-1095</url>
       <bibkey>leturia-2012-evaluating</bibkey>
@@ -908,7 +908,7 @@
       <title>Approximate Sentence Retrieval for Scalable and Efficient Example-Based Machine Translation</title>
       <author><first>Johannes</first><last>Leveling</last></author>
       <author><first>Debasis</first><last>Ganguly</last></author>
-      <author><first>Sandipan</first><last>Dandapat</last></author>
+      <author id="sandipan-dandapat"><first>Sandipan</first><last>Dandapat</last></author>
       <author><first>Gareth</first><last>Jones</last></author>
       <pages>1571–1586</pages>
       <url hash="29f6a442">C12-1096</url>
@@ -928,7 +928,7 @@
       <author><first>Sujian</first><last>Li</last></author>
       <author><first>Xun</first><last>Wang</last></author>
       <author><first>Ye</first><last>Tian</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <pages>1603–1618</pages>
       <url hash="aa30a239">C12-1098</url>
       <bibkey>li-etal-2012-update</bibkey>
@@ -936,7 +936,7 @@
     <paper id="99">
       <title>Employing Morphological Structures and Sememes for <fixed-case>C</fixed-case>hinese Event Extraction</title>
       <author><first>Peifeng</first><last>Li</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>1619–1634</pages>
       <url hash="6c0d85db">C12-1099</url>
       <bibkey>li-zhou-2012-employing</bibkey>
@@ -944,9 +944,9 @@
     <paper id="100">
       <title>Joint Modeling of Trigger Identification and Event Type Determination in <fixed-case>C</fixed-case>hinese Event Extraction</title>
       <author><first>Peifeng</first><last>Li</last></author>
-      <author><first>Qiaoming</first><last>Zhu</last></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last></author>
       <author><first>Hongjun</first><last>Diao</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>1635–1652</pages>
       <url hash="91bebd27">C12-1100</url>
       <bibkey>li-etal-2012-joint</bibkey>
@@ -955,7 +955,7 @@
       <title>Integrating Surface and Abstract Features for Robust Cross-Domain <fixed-case>C</fixed-case>hinese Word Segmentation</title>
       <author><first>Xiaoqing</first><last>Li</last></author>
       <author><first>Kun</first><last>Wang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <author><first>Keh-Yih</first><last>Su</last></author>
       <pages>1653–1670</pages>
       <url hash="d5bb055d">C12-1101</url>
@@ -1012,7 +1012,7 @@
       <title>Recognizing Personal Characteristics of Readers using Eye-Movements and Text Features</title>
       <author><first>Pascual</first><last>Martínez-Gómez</last></author>
       <author><first>Tadayoshi</first><last>Hara</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>1747–1762</pages>
       <url hash="b8a48ab0">C12-1107</url>
       <bibkey>martinez-gomez-etal-2012-recognizing</bibkey>
@@ -1028,7 +1028,7 @@
     <paper id="109">
       <title>Using Distributional Similarity for Lexical Expansion in Knowledge-based Word Sense Disambiguation</title>
       <author><first>Tristan</first><last>Miller</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <author><first>Torsten</first><last>Zesch</last></author>
       <author><first>Iryna</first><last>Gurevych</last></author>
       <pages>1781–1796</pages>
@@ -1038,7 +1038,7 @@
     <paper id="110">
       <title>Revising the Compositional Method for Terminology Acquisition from Comparable Corpora</title>
       <author><first>Emmanuel</first><last>Morin</last></author>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <pages>1797–1810</pages>
       <url hash="6ca9b804">C12-1110</url>
       <bibkey>morin-daille-2012-revising</bibkey>
@@ -1063,7 +1063,7 @@
     <paper id="113">
       <title>Sentiment Analysis in <fixed-case>T</fixed-case>witter with Lightweight Discourse Analysis</title>
       <author><first>Subhabrata</first><last>Mukherjee</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>1847–1864</pages>
       <url hash="a8467f22">C12-1113</url>
       <bibkey>mukherjee-bhattacharyya-2012-sentiment</bibkey>
@@ -1071,7 +1071,7 @@
     <paper id="114">
       <title><fixed-case>Y</fixed-case>ou<fixed-case>C</fixed-case>at: Weakly Supervised <fixed-case>Y</fixed-case>outube Video Categorization System from Meta Data &amp; User Comments using <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et &amp; <fixed-case>W</fixed-case>ikipedia</title>
       <author><first>Subhabrata</first><last>Mukherjee</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>1865–1882</pages>
       <url hash="56f4c249">C12-1114</url>
       <bibkey>mukherjee-bhattacharyya-2012-youcat</bibkey>
@@ -1081,7 +1081,7 @@
       <author><first>Philippe</first><last>Muller</last></author>
       <author><first>Stergos</first><last>Afantenos</last></author>
       <author><first>Pascal</first><last>Denis</last></author>
-      <author><first>Nicholas</first><last>Asher</last></author>
+      <author id="nicholas-asher"><first>Nicholas</first><last>Asher</last></author>
       <pages>1883–1900</pages>
       <url hash="dd595a07">C12-1115</url>
       <erratum id="1" hash="8edae19f">C12-1115e1</erratum>
@@ -1091,7 +1091,7 @@
       <title>Incremental Learning of Affix Segmentation</title>
       <author><first>Wondwossen</first><last>Mulugeta</last></author>
       <author><first>Michael</first><last>Gasser</last></author>
-      <author><first>Baye</first><last>Yimam</last></author>
+      <author id="baye-yimam-mekonnen"><first>Baye</first><last>Yimam</last></author>
       <pages>1901–1914</pages>
       <url hash="c4e904ec">C12-1116</url>
       <bibkey>mulugeta-etal-2012-incremental</bibkey>
@@ -1107,8 +1107,8 @@
     <paper id="118">
       <title>Learning Effective and Interpretable Semantic Models using Non-Negative Sparse Embedding</title>
       <author><first>Brian</first><last>Murphy</last></author>
-      <author><first>Partha</first><last>Talukdar</last></author>
-      <author><first>Tom</first><last>Mitchell</last></author>
+      <author id="partha-talukdar"><first>Partha</first><last>Talukdar</last></author>
+      <author id="tom-mitchell"><first>Tom</first><last>Mitchell</last></author>
       <pages>1933–1950</pages>
       <url hash="eb0b3339">C12-1118</url>
       <bibkey>murphy-etal-2012-learning</bibkey>
@@ -1116,7 +1116,7 @@
     <paper id="119">
       <title>Combining <fixed-case>W</fixed-case>ordnet and Morphosyntactic Information in Terminology Clustering</title>
       <author><first>Agnieszka</first><last>Mykowiecka</last></author>
-      <author><first>Malgorzata</first><last>Marciniak</last></author>
+      <author id="malgorzata-marciniak"><first>Malgorzata</first><last>Marciniak</last></author>
       <pages>1951–1962</pages>
       <url hash="e95cc602">C12-1119</url>
       <bibkey>mykowiecka-marciniak-2012-combining</bibkey>
@@ -1131,9 +1131,9 @@
     </paper>
     <paper id="121">
       <title>Optimizing for Sentence-Level <fixed-case>BLEU</fixed-case>+1 Yields Short Translations</title>
-      <author><first>Preslav</first><last>Nakov</last></author>
-      <author><first>Francisco</first><last>Guzman</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzman</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>1979–1994</pages>
       <url hash="54069403">C12-1121</url>
       <bibkey>nakov-etal-2012-optimizing</bibkey>
@@ -1142,7 +1142,7 @@
       <title>Grammarless Parsing for Joint Inference</title>
       <author><first>Jason</first><last>Naradowsky</last></author>
       <author><first>Tim</first><last>Vieira</last></author>
-      <author><first>David</first><last>Smith</last></author>
+      <author id="david-a-smith"><first>David</first><last>Smith</last></author>
       <pages>1995–2010</pages>
       <url hash="0565a2be">C12-1122</url>
       <bibkey>naradowsky-etal-2012-grammarless</bibkey>
@@ -1165,7 +1165,7 @@
     </paper>
     <paper id="125">
       <title>A Comparison of Syntactic Reordering Methods for <fixed-case>E</fixed-case>nglish-<fixed-case>G</fixed-case>erman Machine Translation</title>
-      <author><first>Jiří</first><last>Navrátil</last></author>
+      <author id="jiri-navratil"><first>Jiří</first><last>Navrátil</last></author>
       <author><first>Karthik</first><last>Visweswariah</last></author>
       <author><first>Ananthakrishnan</first><last>Ramanathan</last></author>
       <pages>2043–2058</pages>
@@ -1185,25 +1185,25 @@
       <author><first>David</first><last>Newman</last></author>
       <author><first>Nagendra</first><last>Koilada</last></author>
       <author><first>Jey Han</first><last>Lau</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>2077–2092</pages>
       <url hash="cc533b0c">C12-1127</url>
       <bibkey>newman-etal-2012-bayesian</bibkey>
     </paper>
     <paper id="128">
       <title>Exploiting Category-Specific Information for Multi-Document Summarization</title>
-      <author><first>Jun-Ping</first><last>Ng</last></author>
+      <author id="jun-ping-ng"><first>Jun-Ping</first><last>Ng</last></author>
       <author><first>Praveen</first><last>Bysani</last></author>
       <author><first>Ziheng</first><last>Lin</last></author>
       <author><first>Min-Yen</first><last>Kan</last></author>
-      <author><first>Chew-Lim</first><last>Tan</last></author>
+      <author id="chew-lim-tan"><first>Chew-Lim</first><last>Tan</last></author>
       <pages>2093–2108</pages>
       <url hash="6ee461e5">C12-1128</url>
       <bibkey>ng-etal-2012-exploiting</bibkey>
     </paper>
     <paper id="129">
       <title>Improved Temporal Relation Classification using Dependency Parses and Selective Crowdsourced Annotations</title>
-      <author><first>Jun-Ping</first><last>Ng</last></author>
+      <author id="jun-ping-ng"><first>Jun-Ping</first><last>Ng</last></author>
       <author><first>Min-Yen</first><last>Kan</last></author>
       <pages>2109–2124</pages>
       <url hash="3fa66be8">C12-1129</url>
@@ -1212,7 +1212,7 @@
     <paper id="130">
       <title>Accurate Unbounded Dependency Recovery using Generalized Categorial Grammars</title>
       <author><first>Luan</first><last>Nguyen</last></author>
-      <author><first>Marten</first><last>Van Schijndel</last></author>
+      <author id="marten-van-schijndel"><first>Marten</first><last>Van Schijndel</last></author>
       <author><first>William</first><last>Schuler</last></author>
       <pages>2125–2140</pages>
       <url hash="8e129d3e">C12-1130</url>
@@ -1220,10 +1220,10 @@
     </paper>
     <paper id="131">
       <title><fixed-case>T</fixed-case>ibetan Base Noun Phrase Identification Framework Based on <fixed-case>C</fixed-case>hinese-<fixed-case>T</fixed-case>ibetan Sentence Aligned Corpus</title>
-      <author><first>Ming Hua</first><last>Nuo</last></author>
-      <author><first>Hui Dan</first><last>Liu</last></author>
-      <author><first>Wei Na</first><last>Zhao</last></author>
-      <author><first>Long Long</first><last>Ma</last></author>
+      <author id="minghua-nuo"><first>Ming Hua</first><last>Nuo</last></author>
+      <author id="huidan-liu"><first>Hui Dan</first><last>Liu</last></author>
+      <author id="weina-zhao"><first>Wei Na</first><last>Zhao</last></author>
+      <author id="longlong-ma"><first>Long Long</first><last>Ma</last></author>
       <author><first>Jian</first><last>Wu</last></author>
       <author><first>Zhi Ming</first><last>Ding</last></author>
       <pages>2141–2158</pages>
@@ -1240,7 +1240,7 @@
     </paper>
     <paper id="133">
       <title>Attribute Extraction from Conjectural Queries</title>
-      <author><first>Marius</first><last>Paşca</last></author>
+      <author id="marius-pasca"><first>Marius</first><last>Paşca</last></author>
       <pages>2177–2190</pages>
       <url hash="d060e985">C12-1133</url>
       <bibkey>pasca-2012-attribute</bibkey>
@@ -1257,7 +1257,7 @@
       <title>Simple and Effective Parameter Tuning for Domain Adaptation of Statistical Machine Translation</title>
       <author><first>Pavel</first><last>Pecina</last></author>
       <author><first>Antonio</first><last>Toral</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>2209–2224</pages>
       <url hash="83146960">C12-1135</url>
       <bibkey>pecina-etal-2012-simple</bibkey>
@@ -1267,7 +1267,7 @@
       <author><first>Yulong</first><last>Pei</last></author>
       <author><first>Wenpeng</first><last>Yin</last></author>
       <author><first>Qifeng</first><last>Fan</last></author>
-      <author><first>Lian’en</first><last>Huang</last></author>
+      <author id="lianen-huang"><first>Lian’en</first><last>Huang</last></author>
       <pages>2225–2242</pages>
       <url hash="4a507689">C12-1136</url>
       <bibkey>pei-etal-2012-supervised</bibkey>
@@ -1275,7 +1275,7 @@
     <paper id="137">
       <title>Collective Search for Concept Disambiguation</title>
       <author><first>Anja</first><last>Pilz</last></author>
-      <author><first>Gerhard</first><last>Paaß</last></author>
+      <author id="gerhard-paass"><first>Gerhard</first><last>Paaß</last></author>
       <pages>2243–2258</pages>
       <url hash="d741d24b">C12-1137</url>
       <bibkey>pilz-paass-2012-collective</bibkey>
@@ -1283,18 +1283,18 @@
     <paper id="138">
       <title>Who’s (Really) the Boss? Perception of Situational Power in Written Interactions</title>
       <author><first>Vinodkumar</first><last>Prabhakaran</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>2259–2274</pages>
       <url hash="c8122cfc">C12-1138</url>
       <bibkey>prabhakaran-etal-2012-whos</bibkey>
     </paper>
     <paper id="139">
       <title>Bilingual Lexicon Construction from Comparable Corpora via Dependency Mapping</title>
-      <author><first>Longhua</first><last>Qian</last></author>
+      <author id="longhua-qian"><first>Longhua</first><last>Qian</last></author>
       <author><first>Hongling</first><last>Wang</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
-      <author><first>Qiaoming</first><last>Zhu</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last></author>
       <pages>2275–2290</pages>
       <url hash="e414b128">C12-1139</url>
       <bibkey>qian-etal-2012-bilingual</bibkey>
@@ -1340,7 +1340,7 @@
       <author><first>Keisuke</first><last>Sakaguchi</last></author>
       <author><first>Tomoya</first><last>Mizumoto</last></author>
       <author><first>Mamoru</first><last>Komachi</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>2357–2374</pages>
       <url hash="dad18b67">C12-1144</url>
       <bibkey>sakaguchi-etal-2012-joint</bibkey>
@@ -1351,7 +1351,7 @@
       <author><first>Rohit</first><last>Prasad</last></author>
       <author><first>Shiv</first><last>Vitaladevuni</last></author>
       <author><first>Maciej</first><last>Pacula</last></author>
-      <author><first>Michael</first><last>Crystal</last></author>
+      <author id="michael-crystal"><first>Michael</first><last>Crystal</last></author>
       <author><first>Brian</first><last>Marx</last></author>
       <author><first>Denise</first><last>Sloan</last></author>
       <author><first>Jennifer</first><last>Vasterling</last></author>
@@ -1363,9 +1363,9 @@
     <paper id="146">
       <title>Ant Colony Algorithm for the Unsupervised Word Sense Disambiguation of Texts: Comparison and Evaluation</title>
       <author><first>Didier</first><last>Schwab</last></author>
-      <author><first>Jérôme</first><last>Goulian</last></author>
+      <author id="jerome-goulian"><first>Jérôme</first><last>Goulian</last></author>
       <author><first>Andon</first><last>Tchechmedjiev</last></author>
-      <author><first>Hervé</first><last>Blanchon</last></author>
+      <author id="herve-blanchon"><first>Hervé</first><last>Blanchon</last></author>
       <pages>2389–2404</pages>
       <url hash="04f9314a">C12-1146</url>
       <bibkey>schwab-etal-2012-ant</bibkey>
@@ -1381,18 +1381,18 @@
     </paper>
     <paper id="148">
       <title>Improving Supervised Sense Disambiguation with Web-Scale Selectors</title>
-      <author><first>H. Andrew</first><last>Schwartz</last></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last></author>
       <author><first>Fernando</first><last>Gomez</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <pages>2423–2440</pages>
       <url hash="9209bd0d">C12-1148</url>
       <bibkey>schwartz-etal-2012-improving</bibkey>
     </paper>
     <paper id="149">
       <title>The <fixed-case>F</fixed-case>rench <fixed-case>S</fixed-case>ocial <fixed-case>M</fixed-case>edia <fixed-case>B</fixed-case>ank: a Treebank of Noisy User Generated Content</title>
-      <author><first>Djamé</first><last>Seddah</last></author>
-      <author><first>Benoit</first><last>Sagot</last></author>
-      <author><first>Marie</first><last>Candito</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
+      <author id="benoit-sagot"><first>Benoit</first><last>Sagot</last></author>
+      <author id="marie-candito"><first>Marie</first><last>Candito</last></author>
       <author><first>Virginie</first><last>Mouilleron</last></author>
       <author><first>Vanessa</first><last>Combet</last></author>
       <pages>2441–2458</pages>
@@ -1402,14 +1402,14 @@
     <paper id="150">
       <title>Initial Explorations on using <fixed-case>CRF</fixed-case>s for <fixed-case>T</fixed-case>urkish Named Entity Recognition</title>
       <author><first>Gökhan Akın</first><last>Şeker</last></author>
-      <author><first>Gülşen</first><last>Eryiğit</last></author>
+      <author id="gulsen-eryigit"><first>Gülşen</first><last>Eryiğit</last></author>
       <pages>2459–2474</pages>
       <url hash="eba2c770">C12-1150</url>
       <bibkey>seker-eryigit-2012-initial</bibkey>
     </paper>
     <paper id="151">
       <title>Differential Evolution Based Feature Selection and Classifier Ensemble for Named Entity Recognition</title>
-      <author><first>Utpal Kumar</first><last>Sikdar</last></author>
+      <author id="utpal-kumar-sikdar"><first>Utpal Kumar</first><last>Sikdar</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
       <author><first>Sriparna</first><last>Saha</last></author>
       <pages>2475–2490</pages>
@@ -1419,8 +1419,8 @@
     <paper id="152">
       <title>Noun Group and Verb Group Identification for <fixed-case>H</fixed-case>indi</title>
       <author><first>Smriti</first><last>Singh</last></author>
-      <author><first>Om P.</first><last>Damani</last></author>
-      <author><first>Vaijayanthi M.</first><last>Sarma</last></author>
+      <author id="om-p-damani"><first>Om P.</first><last>Damani</last></author>
+      <author id="vaijayanthi-m-sarma"><first>Vaijayanthi M.</first><last>Sarma</last></author>
       <pages>2491–2506</pages>
       <url hash="6d764dd7">C12-1152</url>
       <bibkey>singh-etal-2012-noun</bibkey>
@@ -1429,7 +1429,7 @@
       <title>Named Entity Recognition System for <fixed-case>U</fixed-case>rdu</title>
       <author><first>UmrinderPal</first><last>Singh</last></author>
       <author><first>Vishal</first><last>Goyal</last></author>
-      <author><first>Gurpreet Singh</first><last>Lehal</last></author>
+      <author id="gurpreet-singh-lehal"><first>Gurpreet Singh</first><last>Lehal</last></author>
       <pages>2507–2518</pages>
       <url hash="b75a61d2">C12-1153</url>
       <bibkey>singh-etal-2012-named</bibkey>
@@ -1437,19 +1437,19 @@
     <paper id="154">
       <title>Easy-first Coreference Resolution</title>
       <author><first>Veselin</first><last>Stoyanov</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>2519–2534</pages>
       <url hash="cace45a1">C12-1154</url>
       <bibkey>stoyanov-eisner-2012-easy</bibkey>
     </paper>
     <paper id="155">
       <title>Modeling Leadership and Influence in Multi-party Online Discourse</title>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
       <author><first>Samira</first><last>Shaikh</last></author>
       <author><first>Ting</first><last>Liu</last></author>
-      <author><first>George Aaron</first><last>Broadwell</last></author>
+      <author id="george-aaron-broadwell"><first>George Aaron</first><last>Broadwell</last></author>
       <author><first>Jenny</first><last>Stromer-Galley</last></author>
-      <author><first>Sarah</first><last>Taylor</last></author>
+      <author id="sarah-taylor"><first>Sarah</first><last>Taylor</last></author>
       <author><first>Umit</first><last>Boz</last></author>
       <author><first>Veena</first><last>Ravishankar</last></author>
       <author><first>Xiaoai</first><last>Ren</last></author>
@@ -1479,10 +1479,10 @@
     </paper>
     <paper id="158">
       <title>Native Tongues, Lost and Found: Resources and Empirical Evaluations in Native Language Identification</title>
-      <author><first>Joel</first><last>Tetreault</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
       <author><first>Daniel</first><last>Blanchard</last></author>
       <author><first>Aoife</first><last>Cahill</last></author>
-      <author><first>Martin</first><last>Chodorow</last></author>
+      <author id="martin-chodorow"><first>Martin</first><last>Chodorow</last></author>
       <pages>2585–2602</pages>
       <url hash="280e626b">C12-1158</url>
       <revision id="1" href="C12-1158v1" hash="60313d27"/>
@@ -1500,7 +1500,7 @@
     </paper>
     <paper id="160">
       <title>Efficient Discrimination Between Closely Related Languages</title>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <author><first>Nikola</first><last>Ljubešić</last></author>
       <pages>2619–2634</pages>
       <url hash="5dbefbb5">C12-1160</url>
@@ -1524,7 +1524,7 @@
     </paper>
     <paper id="163">
       <title>Implicitness of Discourse Relations</title>
-      <author><first>Fatemeh</first><last>Torabi Asr</last></author>
+      <author id="fatemeh-torabi-asr"><first>Fatemeh</first><last>Torabi Asr</last></author>
       <author><first>Vera</first><last>Demberg</last></author>
       <pages>2669–2684</pages>
       <url hash="6e7e1f29">C12-1163</url>
@@ -1532,16 +1532,16 @@
     </paper>
     <paper id="164">
       <title>Combining Statistical Translation Techniques for Cross-Language Information Retrieval</title>
-      <author><first>Ferhan</first><last>Ture</last></author>
+      <author id="ferhan-ture"><first>Ferhan</first><last>Ture</last></author>
       <author><first>Jimmy</first><last>Lin</last></author>
-      <author><first>Douglas</first><last>Oard</last></author>
+      <author id="douglas-w-oard"><first>Douglas</first><last>Oard</last></author>
       <pages>2685–2702</pages>
       <url hash="955ef36b">C12-1164</url>
       <bibkey>ture-etal-2012-combining</bibkey>
     </paper>
     <paper id="165">
       <title>Multi-way Tensor Factorization for Unsupervised Lexical Acquisition</title>
-      <author><first>Tim</first><last>Van de Cruys</last></author>
+      <author id="tim-van-de-cruys"><first>Tim</first><last>Van de Cruys</last></author>
       <author><first>Laura</first><last>Rimell</last></author>
       <author><first>Thierry</first><last>Poibeau</last></author>
       <author><first>Anna</first><last>Korhonen</last></author>
@@ -1552,7 +1552,7 @@
     <paper id="166">
       <title>Sub-corpora Sampling with an Application to Bilingual Lexicon Extraction</title>
       <author><first>Ivan</first><last>Vulić</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>2721–2738</pages>
       <url hash="3371f893">C12-1166</url>
       <bibkey>vulic-moens-2012-sub</bibkey>
@@ -1561,7 +1561,7 @@
       <title>The Utility of Discourse Structure in Identifying Resolved Threads in Technical User Forums</title>
       <author><first>Li</first><last>Wang</last></author>
       <author><first>Su Nam</first><last>Kim</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>2739–2756</pages>
       <url hash="caf3fc3a">C12-1167</url>
       <bibkey>wang-etal-2012-utility</bibkey>
@@ -1579,7 +1579,7 @@
     <paper id="169">
       <title><fixed-case>C</fixed-case>hinese Evaluative Information Analysis</title>
       <author><first>Yiou</first><last>Wang</last></author>
-      <author><first>Jun’ichi</first><last>Kazama</last></author>
+      <author id="junichi-kazama"><first>Jun’ichi</first><last>Kazama</last></author>
       <author><first>Takuya</first><last>Kawada</last></author>
       <author><first>Kentaro</first><last>Torisawa</last></author>
       <pages>2773–2788</pages>
@@ -1588,7 +1588,7 @@
     </paper>
     <paper id="170">
       <title>Harnessing the <fixed-case>CRF</fixed-case> Complexity with Domain-Specific Constraints. The Case of Morphosyntactic Tagging of a Highly Inflected Language</title>
-      <author><first>Jakub</first><last>Waszczuk</last></author>
+      <author id="jakub-waszczuk"><first>Jakub</first><last>Waszczuk</last></author>
       <pages>2789–2804</pages>
       <url hash="5a4922b2">C12-1170</url>
       <bibkey>waszczuk-2012-harnessing</bibkey>
@@ -1598,7 +1598,7 @@
       <author><first>Yotaro</first><last>Watanabe</last></author>
       <author><first>Junta</first><last>Mizuno</last></author>
       <author><first>Eric</first><last>Nichols</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Kentaro</first><last>Inui</last></author>
       <pages>2805–2820</pages>
       <url hash="ee11f1b2">C12-1171</url>
@@ -1621,7 +1621,7 @@
       <author><first>Hitoshi</first><last>Yamamoto</last></author>
       <author><first>Shigeki</first><last>Matsuda</last></author>
       <author><first>Chiori</first><last>Hori</last></author>
-      <author><first>Hideki</first><last>Kashioka</last></author>
+      <author id="hideki-kashioka"><first>Hideki</first><last>Kashioka</last></author>
       <pages>2835–2850</pages>
       <url hash="cf78b459">C12-1173</url>
       <bibkey>wu-etal-2012-factored</bibkey>
@@ -1646,7 +1646,7 @@
     <paper id="176">
       <title>Unsupervised Discriminative Induction of Synchronous Grammar for Machine Translation</title>
       <author><first>Xinyan</first><last>Xiao</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author id="yang-liu-ict"><first>Yang</first><last>Liu</last></author>
       <author><first>Qun</first><last>Liu</last></author>
       <author><first>Shouxun</first><last>Lin</last></author>
@@ -1658,8 +1658,8 @@
       <title>Paraphrasing for Style</title>
       <author><first>Wei</first><last>Xu</last></author>
       <author><first>Alan</first><last>Ritter</last></author>
-      <author><first>Bill</first><last>Dolan</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="william-b-dolan"><first>Bill</first><last>Dolan</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <author><first>Colin</first><last>Cherry</last></author>
       <pages>2899–2914</pages>
       <url hash="243a7545">C12-1177</url>
@@ -1676,9 +1676,9 @@
     <paper id="179">
       <title><fixed-case>ISO</fixed-case>-<fixed-case>T</fixed-case>ime<fixed-case>ML</fixed-case> Event Extraction in <fixed-case>P</fixed-case>ersian Text</title>
       <author><first>Yadollah</first><last>Yaghoobzadeh</last></author>
-      <author><first>Gholamreza</first><last>Ghassem-sani</last></author>
+      <author id="gholamreza-ghassem-sani"><first>Gholamreza</first><last>Ghassem-sani</last></author>
       <author><first>Seyed Abolghasem</first><last>Mirroshandel</last></author>
-      <author><first>Mahbaneh</first><last>Eshaghzadeh</last></author>
+      <author id="mahbaneh-eshaghzadeh-torbati"><first>Mahbaneh</first><last>Eshaghzadeh</last></author>
       <pages>2931–2944</pages>
       <url hash="e7231307">C12-1179</url>
       <bibkey>yaghoobzadeh-etal-2012-iso</bibkey>
@@ -1700,7 +1700,7 @@
       <author><first>Wenpeng</first><last>Yin</last></author>
       <author><first>Lifu</first><last>Huang</last></author>
       <author><first>Yulong</first><last>Pei</last></author>
-      <author><first>Lian’en</first><last>Huang</last></author>
+      <author id="lianen-huang"><first>Lian’en</first><last>Huang</last></author>
       <pages>2961–2976</pages>
       <url hash="6060822c">C12-1181</url>
       <bibkey>yin-etal-2012-relationlistwise</bibkey>
@@ -1710,7 +1710,7 @@
       <author><first>Wenpeng</first><last>Yin</last></author>
       <author><first>Yulong</first><last>Pei</last></author>
       <author><first>Fan</first><last>Zhang</last></author>
-      <author><first>Lian’en</first><last>Huang</last></author>
+      <author id="lianen-huang"><first>Lian’en</first><last>Huang</last></author>
       <pages>2977–2992</pages>
       <url hash="e7d0ce19">C12-1182</url>
       <bibkey>yin-etal-2012-senttopic</bibkey>
@@ -1737,7 +1737,7 @@
       <author><first>Feifei</first><last>Zhai</last></author>
       <author><first>Jiajun</first><last>Zhang</last></author>
       <author><first>Yu</first><last>Zhou</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>3019–3036</pages>
       <url hash="5df5f580">C12-1185</url>
       <bibkey>zhai-etal-2012-machine</bibkey>
@@ -1747,7 +1747,7 @@
       <author><first>Feifei</first><last>Zhai</last></author>
       <author><first>Jiajun</first><last>Zhang</last></author>
       <author><first>Yu</first><last>Zhou</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>3037–3054</pages>
       <url hash="f6253c20">C12-1186</url>
       <bibkey>zhai-etal-2012-tree</bibkey>
@@ -1777,21 +1777,21 @@
       <title>A Lazy Learning Model for Entity Linking using Query-Specific Information</title>
       <author><first>Wei</first><last>Zhang</last></author>
       <author><first>Jian</first><last>Su</last></author>
-      <author><first>Chew-Lim</first><last>Tan</last></author>
+      <author id="chew-lim-tan"><first>Chew-Lim</first><last>Tan</last></author>
       <author><first>Yunbo</first><last>Cao</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <pages>3089–3104</pages>
       <url hash="2d7a99a4">C12-1189</url>
       <bibkey>zhang-etal-2012-lazy</bibkey>
     </paper>
     <paper id="190">
       <title>The Use of Dependency Relation Graph to Enhance the Term Weighting in Question Retrieval</title>
-      <author><first>Weinan</first><last>Zhang</last></author>
-      <author><first>Zhaoyan</first><last>Ming</last></author>
+      <author id="weinan-zhang"><first>Weinan</first><last>Zhang</last></author>
+      <author id="zhaoyan-ming"><first>Zhaoyan</first><last>Ming</last></author>
       <author><first>Yu</first><last>Zhang</last></author>
       <author><first>Liqiang</first><last>Nie</last></author>
       <author><first>Ting</first><last>Liu</last></author>
-      <author><first>Tat-Seng</first><last>Chua</last></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last></author>
       <pages>3105–3120</pages>
       <url hash="5a200ff7">C12-1190</url>
       <bibkey>zhang-etal-2012-use</bibkey>
@@ -1799,7 +1799,7 @@
     <paper id="191">
       <title>Long-Tail Distributions and Unsupervised Learning of Morphology</title>
       <author><first>Qiuye</first><last>Zhao</last></author>
-      <author><first>Mitch</first><last>Marcus</last></author>
+      <author id="mitch-marcus"><first>Mitch</first><last>Marcus</last></author>
       <pages>3121–3136</pages>
       <url hash="9d1736d8">C12-1191</url>
       <bibkey>zhao-marcus-2012-long</bibkey>
@@ -1837,7 +1837,7 @@
       <booktitle>Proceedings of <fixed-case>COLING</fixed-case> 2012: Posters</booktitle>
       <url hash="f68383fc">C12-2</url>
       <editor><first>Martin</first><last>Kay</last></editor>
-      <editor><first>Christian</first><last>Boitet</last></editor>
+      <editor id="christian-boitet"><first>Christian</first><last>Boitet</last></editor>
       <publisher>The COLING 2012 Organizing Committee</publisher>
       <address>Mumbai, India</address>
       <month>December</month>
@@ -1850,7 +1850,7 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>K</fixed-case>-Best Spanning Tree Dependency Parsing With Verb Valency Lexicon Reranking</title>
-      <author><first>Zeljko</first><last>Agic</last></author>
+      <author id="zeljko-agic"><first>Zeljko</first><last>Agic</last></author>
       <pages>1–12</pages>
       <url hash="003db939">C12-2001</url>
       <bibkey>agic-2012-k</bibkey>
@@ -1867,7 +1867,7 @@
       <title>Automatic Bilingual Phrase Extraction from Comparable Corpora</title>
       <author><first>Ahmet</first><last>Aker</last></author>
       <author><first>Yang</first><last>Feng</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <pages>23–32</pages>
       <url hash="00ffa5d4">C12-2003</url>
       <bibkey>aker-etal-2012-automatic</bibkey>
@@ -1882,14 +1882,14 @@
     <paper id="5">
       <title>Mapping <fixed-case>A</fixed-case>rabic <fixed-case>W</fixed-case>ikipedia into the Named Entities Taxonomy</title>
       <author><first>Fahd</first><last>Alotaibi</last></author>
-      <author><first>Mark</first><last>Lee</last></author>
+      <author id="mark-lee"><first>Mark</first><last>Lee</last></author>
       <pages>43–52</pages>
       <url hash="f866b20a">C12-2005</url>
       <bibkey>alotaibi-lee-2012-mapping</bibkey>
     </paper>
     <paper id="6">
       <title>Probabilistic Refinement Algorithms for the Generation of Referring Expressions</title>
-      <author><first>Romina</first><last>Altamirano</last></author>
+      <author id="romina-altamirano"><first>Romina</first><last>Altamirano</last></author>
       <author><first>Carlos</first><last>Areces</last></author>
       <author><first>Luciana</first><last>Benotti</last></author>
       <pages>53–62</pages>
@@ -1905,9 +1905,9 @@
     </paper>
     <paper id="8">
       <title>Cross-Lingual Sentiment Analysis for <fixed-case>I</fixed-case>ndian Languages using Linked <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>ets</title>
-      <author><first>Balamurali</first><last>A.R.</last></author>
+      <author id="balamurali-ar"><first>Balamurali</first><last>A.R.</last></author>
       <author><first>Aditya</first><last>Joshi</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>73–82</pages>
       <url hash="1228211b">C12-2008</url>
       <bibkey>a-r-etal-2012-cross</bibkey>
@@ -1923,7 +1923,7 @@
     </paper>
     <paper id="10">
       <title>Collocation Extraction using Parallel Corpus</title>
-      <author><first>Kavosh</first><last>Asadi Atui</last></author>
+      <author id="kavosh-asadi-atui"><first>Kavosh</first><last>Asadi Atui</last></author>
       <author><first>Heshaam</first><last>Faili</last></author>
       <author><first>Kaveh</first><last>Assadi Atuie</last></author>
       <pages>93–102</pages>
@@ -1936,7 +1936,7 @@
       <author><first>Pavel</first><last>Pecina</last></author>
       <author><first>Younes</first><last>Samih</last></author>
       <author><first>Khaled</first><last>Shaalan</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>103–112</pages>
       <url hash="4905b2f6">C12-2011</url>
       <bibkey>attia-etal-2012-improved</bibkey>
@@ -1990,7 +1990,7 @@
     </paper>
     <paper id="18">
       <title>Does Similarity Matter? The Case of Answer Extraction from Technical Discussion Forums</title>
-      <author><first>Rose</first><last>Catherine</last></author>
+      <author id="rose-catherine-kanjirathinkal"><first>Rose</first><last>Catherine</last></author>
       <author><first>Amit</first><last>Singh</last></author>
       <author><first>Rashmi</first><last>Gangadharaiah</last></author>
       <author><first>Dinesh</first><last>Raghu</last></author>
@@ -2018,7 +2018,7 @@
     <paper id="21">
       <title>Impact of Less Skewed Distributions on Efficiency and Effectiveness of Biomedical Relation Extraction</title>
       <author><first>Md. Faisal Mahbub</first><last>Chowdhury</last></author>
-      <author><first>Alberto</first><last>Lavelli</last></author>
+      <author id="alberto-lavelli"><first>Alberto</first><last>Lavelli</last></author>
       <pages>205–216</pages>
       <url hash="4211202c">C12-2021</url>
       <bibkey>chowdhury-lavelli-2012-impact</bibkey>
@@ -2026,7 +2026,7 @@
     <paper id="22">
       <title>Lattice Rescoring for Speech Recognition using Large Scale Distributed Language Models</title>
       <author><first>Euisok</first><last>Chung</last></author>
-      <author><first>Hyung-Bae</first><last>Jeon</last></author>
+      <author id="hyung-bae-jeon"><first>Hyung-Bae</first><last>Jeon</last></author>
       <author><first>Jeon-Gue</first><last>Park</last></author>
       <author><first>Yun-Keun</first><last>Lee</last></author>
       <pages>217–224</pages>
@@ -2037,7 +2037,7 @@
       <title>Morphological Analyzer for Affix Stacking Languages: A Case Study of <fixed-case>M</fixed-case>arathi</title>
       <author><first>Raj</first><last>Dabre</last></author>
       <author><first>Archana</first><last>Amberkar</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>225–234</pages>
       <url hash="a034145e">C12-2023</url>
       <bibkey>dabre-etal-2012-morphological</bibkey>
@@ -2054,15 +2054,15 @@
     <paper id="25">
       <title>Coreference Clustering using Column Generation</title>
       <author><first>Jan</first><last>De Belder</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>245–254</pages>
       <url hash="f38355e0">C12-2025</url>
       <bibkey>de-belder-moens-2012-coreference</bibkey>
     </paper>
     <paper id="26">
       <title>Metric Learning for Graph-Based Domain Adaptation</title>
-      <author><first>Paramveer</first><last>Dhillon</last></author>
-      <author><first>Partha</first><last>Talukdar</last></author>
+      <author id="paramveer-s-dhillon"><first>Paramveer</first><last>Dhillon</last></author>
+      <author id="partha-talukdar"><first>Partha</first><last>Talukdar</last></author>
       <author><first>Koby</first><last>Crammer</last></author>
       <pages>255–264</pages>
       <url hash="47c47fc3">C12-2026</url>
@@ -2072,7 +2072,7 @@
       <title>Automatic Hashtag Recommendation for Microblogs using Topic-Specific Translation Model</title>
       <author><first>Zhuoye</first><last>Ding</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>265–274</pages>
       <url hash="bb105e86">C12-2027</url>
       <bibkey>ding-etal-2012-automatic</bibkey>
@@ -2087,15 +2087,15 @@
     <paper id="29">
       <title>Token Level Identification of Linguistic Code Switching</title>
       <author><first>Heba</first><last>Elfardy</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>287–296</pages>
       <url hash="97914419">C12-2029</url>
       <bibkey>elfardy-diab-2012-token</bibkey>
     </paper>
     <paper id="30">
       <title>Parenthetical Classification for Information Extraction</title>
-      <author><first>Ismail</first><last>El Maarouf</last></author>
-      <author><first>Jeanne</first><last>Villaneau</last></author>
+      <author id="ismail-el-maarouf"><first>Ismail</first><last>El Maarouf</last></author>
+      <author id="jeanne-villaneau"><first>Jeanne</first><last>Villaneau</last></author>
       <pages>297–308</pages>
       <url hash="dfb02c96">C12-2030</url>
       <bibkey>el-maarouf-villaneau-2012-parenthetical</bibkey>
@@ -2113,8 +2113,8 @@
     </paper>
     <paper id="32">
       <title>Dealing with Input Noise in Statistical Machine Translation</title>
-      <author><first>Lluis</first><last>Formiga</last></author>
-      <author><first>Jose A. R.</first><last>Fonollosa</last></author>
+      <author id="lluis-formiga"><first>Lluis</first><last>Formiga</last></author>
+      <author id="jose-a-r-fonollosa"><first>Jose A. R.</first><last>Fonollosa</last></author>
       <pages>319–328</pages>
       <url hash="adda23bd">C12-2032</url>
       <bibkey>formiga-fonollosa-2012-dealing</bibkey>
@@ -2134,7 +2134,7 @@
       <author><first>Utpal</first><last>Garain</last></author>
       <author><first>Arjun</first><last>Das</last></author>
       <author><first>David</first><last>Doermann</last></author>
-      <author><first>Douglas</first><last>Oard</last></author>
+      <author id="douglas-w-oard"><first>Douglas</first><last>Oard</last></author>
       <pages>339–348</pages>
       <url hash="518d02cd">C12-2034</url>
       <bibkey>garain-etal-2012-leveraging</bibkey>
@@ -2163,7 +2163,7 @@
       <title>Improving Dependency Parsing with Interlinear Glossed Text and Syntactic Projection</title>
       <author><first>Ryan</first><last>Georgi</last></author>
       <author><first>Fei</first><last>Xia</last></author>
-      <author><first>William</first><last>Lewis</last></author>
+      <author id="william-lewis"><first>William</first><last>Lewis</last></author>
       <pages>371–380</pages>
       <url hash="91b96ff1">C12-2037</url>
       <bibkey>georgi-etal-2012-improving</bibkey>
@@ -2194,10 +2194,10 @@
     </paper>
     <paper id="41">
       <title>Classifier-Based Tense Model for <fixed-case>SMT</fixed-case></title>
-      <author><first>ZhengXian</first><last>Gong</last></author>
+      <author id="zhengxian-gong"><first>ZhengXian</first><last>Gong</last></author>
       <author><first>Min</first><last>Zhang</last></author>
-      <author><first>ChewLim</first><last>Tan</last></author>
-      <author><first>GuoDong</first><last>Zhou</last></author>
+      <author id="chew-lim-tan"><first>ChewLim</first><last>Tan</last></author>
+      <author id="guodong-zhou"><first>GuoDong</first><last>Zhou</last></author>
       <pages>411–420</pages>
       <url hash="a617afb3">C12-2041</url>
       <bibkey>gong-etal-2012-classifier</bibkey>
@@ -2248,7 +2248,7 @@
     <paper id="47">
       <title>Verb Temporality Analysis using <fixed-case>R</fixed-case>eichenbach’s Tense System</title>
       <author><first>André</first><last>Horie</last></author>
-      <author><first>Kumiko</first><last>Tanaka-Ishii</last></author>
+      <author id="kumiko-tanaka-ishii"><first>Kumiko</first><last>Tanaka-Ishii</last></author>
       <author><first>Mitsuru</first><last>Ishizuka</last></author>
       <pages>471–482</pages>
       <url hash="a0dea7f3">C12-2047</url>
@@ -2264,9 +2264,9 @@
     </paper>
     <paper id="49">
       <title>Comparing Word Relatedness Measures Based on <fixed-case>G</fixed-case>oogle <tex-math>n</tex-math>-grams</title>
-      <author><first>Aminul</first><last>Islam</last></author>
+      <author id="aminul-islam"><first>Aminul</first><last>Islam</last></author>
       <author><first>Evangelos</first><last>Milios</last></author>
-      <author><first>Vlado</first><last>Keselj</last></author>
+      <author id="vlado-keselj"><first>Vlado</first><last>Keselj</last></author>
       <pages>495–506</pages>
       <url hash="ab98a39d">C12-2049</url>
       <bibkey>islam-etal-2012-comparing</bibkey>
@@ -2274,7 +2274,7 @@
     <paper id="50">
       <title>Two-Stage Bootstrapping for Anaphora Resolution</title>
       <author><first>Balaji</first><last>Jagan</last></author>
-      <author><first>T V</first><last>Geetha</last></author>
+      <author id="t-v-geetha"><first>T V</first><last>Geetha</last></author>
       <author><first>Ranjani</first><last>Parthasarathi</last></author>
       <pages>507–516</pages>
       <url hash="3f50ec8d">C12-2050</url>
@@ -2282,15 +2282,15 @@
     </paper>
     <paper id="51">
       <title>Explorations in the Speakers’ Interaction Experience and Self-Assessments</title>
-      <author><first>Kristiina</first><last>Jokinen</last></author>
+      <author id="kristiina-jokinen"><first>Kristiina</first><last>Jokinen</last></author>
       <pages>517–526</pages>
       <url hash="646c7bdc">C12-2051</url>
       <bibkey>jokinen-2012-explorations</bibkey>
     </paper>
     <paper id="52">
       <title>Multimodal Signals and Holistic Interaction Structuring</title>
-      <author><first>Kristiina</first><last>Jokinen</last></author>
-      <author><first>Graham</first><last>Wilcock</last></author>
+      <author id="kristiina-jokinen"><first>Kristiina</first><last>Jokinen</last></author>
+      <author id="graham-wilcock"><first>Graham</first><last>Wilcock</last></author>
       <pages>527–538</pages>
       <url hash="32f8fd44">C12-2052</url>
       <bibkey>jokinen-wilcock-2012-multimodal</bibkey>
@@ -2299,9 +2299,9 @@
       <title>New Insights from Coarse Word Sense Disambiguation in the Crowd</title>
       <author><first>Adam</first><last>Kapelner</last></author>
       <author><first>Krishna</first><last>Kaliannan</last></author>
-      <author><first>H. Andrew</first><last>Schwartz</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
-      <author><first>Dean</first><last>Foster</last></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
+      <author id="dean-foster"><first>Dean</first><last>Foster</last></author>
       <pages>539–548</pages>
       <url hash="b6024d86">C12-2053</url>
       <bibkey>kapelner-etal-2012-new</bibkey>
@@ -2309,8 +2309,8 @@
     <paper id="54">
       <title>A Unified Sentence Space for Categorical Distributional-Compositional Semantics: Theory and Experiments</title>
       <author><first>Dimitri</first><last>Kartsaklis</last></author>
-      <author><first>Mehrnoosh</first><last>Sadrzadeh</last></author>
-      <author><first>Stephen</first><last>Pulman</last></author>
+      <author id="mehrnoosh-sadrzadeh"><first>Mehrnoosh</first><last>Sadrzadeh</last></author>
+      <author id="stephen-pulman"><first>Stephen</first><last>Pulman</last></author>
       <pages>549–558</pages>
       <url hash="bef672cb">C12-2054</url>
       <bibkey>kartsaklis-etal-2012-unified</bibkey>
@@ -2326,7 +2326,7 @@
     <paper id="56">
       <title>Classification of Inconsistent Sentiment Words using Syntactic Constructions</title>
       <author><first>Wiltrud</first><last>Kessler</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>569–578</pages>
       <url hash="1e21c4f7">C12-2056</url>
       <bibkey>kessler-schutze-2012-classification</bibkey>
@@ -2358,14 +2358,14 @@
     <paper id="60">
       <title>Decoder-based Discriminative Training of Phrase Segmentation for Statistical Machine Translation</title>
       <author><first>Hyoung-Gyu</first><last>Lee</last></author>
-      <author><first>Hae-Chang</first><last>Rim</last></author>
+      <author id="hae-chang-rim"><first>Hae-Chang</first><last>Rim</last></author>
       <pages>611–620</pages>
       <url hash="683ee4ee">C12-2060</url>
       <bibkey>lee-rim-2012-decoder</bibkey>
     </paper>
     <paper id="61">
       <title>Glimpses of <fixed-case>A</fixed-case>ncient <fixed-case>C</fixed-case>hina from Classical <fixed-case>C</fixed-case>hinese Poems</title>
-      <author><first>John</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
       <author><first>Tak-sum</first><last>Wong</last></author>
       <pages>621–632</pages>
       <url hash="5c100e8d">C12-2061</url>
@@ -2373,7 +2373,7 @@
     </paper>
     <paper id="62">
       <title>Conversion between Scripts of <fixed-case>P</fixed-case>unjabi: Beyond Simple Transliteration</title>
-      <author><first>Gurpreet Singh</first><last>Lehal</last></author>
+      <author id="gurpreet-singh-lehal"><first>Gurpreet Singh</first><last>Lehal</last></author>
       <author><first>Tejinder Singh</first><last>Saini</last></author>
       <pages>633–642</pages>
       <url hash="67e8f197">C12-2062</url>
@@ -2381,7 +2381,7 @@
     </paper>
     <paper id="63">
       <title>Development of a Complete <fixed-case>U</fixed-case>rdu-<fixed-case>H</fixed-case>indi Transliteration System</title>
-      <author><first>Gurpreet Singh</first><last>Lehal</last></author>
+      <author id="gurpreet-singh-lehal"><first>Gurpreet Singh</first><last>Lehal</last></author>
       <author><first>Tejinder Singh</first><last>Saini</last></author>
       <pages>643–652</pages>
       <url hash="1fb9dc0a">C12-2063</url>
@@ -2399,8 +2399,8 @@
     <paper id="65">
       <title>Phrase-Based Evaluation for Machine Translation</title>
       <author><first>Liangyou</first><last>Li</last></author>
-      <author><first>Zhengxian</first><last>Gong</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="zhengxian-gong"><first>Zhengxian</first><last>Gong</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>663–672</pages>
       <url hash="70dcbefc">C12-2065</url>
       <bibkey>li-etal-2012-phrase</bibkey>
@@ -2417,7 +2417,7 @@
     <paper id="67">
       <title>Active Learning for <fixed-case>C</fixed-case>hinese Word Segmentation</title>
       <author><first>Shoushan</first><last>Li</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <pages>683–692</pages>
       <url hash="5df5f51b">C12-2067</url>
@@ -2448,7 +2448,7 @@
       <author><first>Nadi</first><last>Tomeh</last></author>
       <author><first>Guang</first><last>Xiang</last></author>
       <author><first>Isabel</first><last>Trancoso</last></author>
-      <author><first>Alan</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan</first><last>Black</last></author>
       <pages>713–722</pages>
       <url hash="09a81fac">C12-2070</url>
       <bibkey>ling-etal-2012-improving</bibkey>
@@ -2456,7 +2456,7 @@
     <paper id="71">
       <title>Expected Error Minimization with Ultraconservative Update for <fixed-case>SMT</fixed-case></title>
       <author><first>Lemao</first><last>Liu</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <author><first>Taro</first><last>Watanabe</last></author>
       <author><first>Hailong</first><last>Cao</last></author>
       <author><first>Conghui</first><last>Zhu</last></author>
@@ -2475,7 +2475,7 @@
     </paper>
     <paper id="73">
       <title>Unsupervised Domain Adaptation for Joint Segmentation and <fixed-case>POS</fixed-case>-Tagging</title>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <author><first>Yue</first><last>Zhang</last></author>
       <pages>745–754</pages>
       <url hash="5a741575">C12-2073</url>
@@ -2528,12 +2528,12 @@
       <author><first>Yann</first><last>Mathet</last></author>
       <author><first>Antoine</first><last>Widlöcher</last></author>
       <author><first>Karën</first><last>Fort</last></author>
-      <author><first>Claire</first><last>François</last></author>
+      <author id="claire-francois"><first>Claire</first><last>François</last></author>
       <author><first>Olivier</first><last>Galibert</last></author>
       <author><first>Cyril</first><last>Grouin</last></author>
       <author><first>Juliette</first><last>Kahn</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <pages>809–818</pages>
       <url hash="36e8d409">C12-2079</url>
       <bibkey>mathet-etal-2012-manual</bibkey>
@@ -2586,7 +2586,7 @@
       <author><first>Yuta</first><last>Hayashibe</last></author>
       <author><first>Mamoru</first><last>Komachi</last></author>
       <author><first>Masaaki</first><last>Nagata</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>863–872</pages>
       <url hash="ff38133b">C12-2084</url>
       <bibkey>mizumoto-etal-2012-effect</bibkey>
@@ -2603,7 +2603,7 @@
     <paper id="86">
       <title>Relational Structures and Models for Coreference Resolution</title>
       <author><first>Truc-Vien T.</first><last>Nguyen</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>883–892</pages>
       <url hash="2ca160c2">C12-2086</url>
       <bibkey>nguyen-poesio-2012-relational</bibkey>
@@ -2620,7 +2620,7 @@
     </paper>
     <paper id="88">
       <title>Lexical Categories for Improved Parsing of Web Data</title>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <author><first>Arne</first><last>Skjærholt</last></author>
       <pages>903–912</pages>
       <url hash="56afa0c2">C12-2088</url>
@@ -2628,8 +2628,8 @@
     </paper>
     <paper id="89">
       <title>Text-To-Speech for Languages without an Orthography</title>
-      <author><first>Sukhada</first><last>Palkar</last></author>
-      <author><first>Alan</first><last>Black</last></author>
+      <author id="sukhada"><first>Sukhada</first><last>Palkar</last></author>
+      <author id="alan-w-black"><first>Alan</first><last>Black</last></author>
       <author><first>Alok</first><last>Parlikar</last></author>
       <pages>913–922</pages>
       <url hash="077df2bf">C12-2089</url>
@@ -2640,7 +2640,7 @@
       <author><first>Braja Gopal</first><last>Patra</last></author>
       <author><first>Khumbar</first><last>Debbarma</last></author>
       <author><first>Dipankar</first><last>Das</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>923–932</pages>
       <url hash="52d8b3fb">C12-2090</url>
       <bibkey>patra-etal-2012-part</bibkey>
@@ -2650,7 +2650,7 @@
       <author><first>Stephan</first><last>Peitz</last></author>
       <author><first>Arne</first><last>Mauser</last></author>
       <author><first>Joern</first><last>Wuebker</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>933–942</pages>
       <url hash="07a94826">C12-2091</url>
       <bibkey>peitz-etal-2012-forced</bibkey>
@@ -2668,7 +2668,7 @@
       <author><first>Xipeng</first><last>Qiu</last></author>
       <author><first>Feng</first><last>Ji</last></author>
       <author><first>Jiayi</first><last>Zhao</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>951–964</pages>
       <url hash="49b81fd2">C12-2093</url>
       <bibkey>qiu-etal-2012-joint</bibkey>
@@ -2728,7 +2728,7 @@
       <title>Using Qualia Information to Identify Lexical Semantic Classes in an Unsupervised Clustering Task</title>
       <author><first>Lauren</first><last>Romeo</last></author>
       <author><first>Sara</first><last>Mendes</last></author>
-      <author><first>Núria</first><last>Bel</last></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last></author>
       <pages>1029–1038</pages>
       <url hash="8e786c5b">C12-2100</url>
       <bibkey>romeo-etal-2012-using</bibkey>
@@ -2738,7 +2738,7 @@
       <author><first>Ewa</first><last>Rudnicka</last></author>
       <author><first>Marek</first><last>Maziarz</last></author>
       <author><first>Maciej</first><last>Piasecki</last></author>
-      <author><first>Stan</first><last>Szpakowicz</last></author>
+      <author id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></author>
       <pages>1039–1048</pages>
       <url hash="4097e83f">C12-2101</url>
       <bibkey>rudnicka-etal-2012-strategy</bibkey>
@@ -2749,7 +2749,7 @@
       <author><first>Donghyeon</first><last>Lee</last></author>
       <author><first>Injae</first><last>Lee</last></author>
       <author><first>Sangdo</first><last>Han</last></author>
-      <author><first>Gary Geunbae</first><last>Lee</last></author>
+      <author id="gary-geunbae-lee"><first>Gary Geunbae</first><last>Lee</last></author>
       <author><first>Myungjae</first><last>Kim</last></author>
       <author><first>Kyungduk</first><last>Kim</last></author>
       <pages>1049–1058</pages>
@@ -2758,7 +2758,7 @@
     </paper>
     <paper id="103">
       <title>A Fully Coreference-annotated Corpus of Scholarly Papers from the <fixed-case>ACL</fixed-case> <fixed-case>A</fixed-case>nthology</title>
-      <author><first>Ulrich</first><last>Schäfer</last></author>
+      <author id="ulrich-schafer"><first>Ulrich</first><last>Schäfer</last></author>
       <author><first>Christian</first><last>Spurk</last></author>
       <author><first>Jörg</first><last>Steffen</last></author>
       <pages>1059–1070</pages>
@@ -2776,7 +2776,7 @@
     <paper id="105">
       <title>Data-driven Dependency Parsing With Empty Heads</title>
       <author><first>Wolfgang</first><last>Seeker</last></author>
-      <author><first>Richárd</first><last>Farkas</last></author>
+      <author id="richard-farkas"><first>Richárd</first><last>Farkas</last></author>
       <author><first>Bernd</first><last>Bohnet</last></author>
       <author><first>Helmut</first><last>Schmid</last></author>
       <author><first>Jonas</first><last>Kuhn</last></author>
@@ -2786,7 +2786,7 @@
     </paper>
     <paper id="106">
       <title>Extension of <fixed-case>TSVM</fixed-case> to Multi-Class and Hierarchical Text Classification Problems With General Losses</title>
-      <author><first>Sathiya Keerthi</first><last>Selvaraj</last></author>
+      <author id="sathiya-keerthi-selvaraj"><first>Sathiya Keerthi</first><last>Selvaraj</last></author>
       <author><first>Sundararajan</first><last>Sellamanickam</last></author>
       <author><first>Shirish</first><last>Shevade</last></author>
       <pages>1091–1100</pages>
@@ -2795,7 +2795,7 @@
     </paper>
     <paper id="107">
       <title>Calculation of Phrase Probabilities for Statistical Machine Translation by using Belief Functions</title>
-      <author><first>Christophe</first><last>Servan</last></author>
+      <author id="christophe-servan"><first>Christophe</first><last>Servan</last></author>
       <author><first>Simon</first><last>Petitrenaud</last></author>
       <pages>1101–1110</pages>
       <url hash="b3dfa4be">C12-2107</url>
@@ -2804,8 +2804,8 @@
     <paper id="108">
       <title>Sense and Reference Disambiguation in <fixed-case>W</fixed-case>ikipedia</title>
       <author><first>Hui</first><last>Shen</last></author>
-      <author><first>Razvan</first><last>Bunescu</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="razvan-bunescu"><first>Razvan</first><last>Bunescu</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>1111–1120</pages>
       <url hash="4384f9ad">C12-2108</url>
       <bibkey>shen-etal-2012-sense</bibkey>
@@ -2813,7 +2813,7 @@
     <paper id="109">
       <title>Unsupervised Metaphor Paraphrasing using a Vector Space Model</title>
       <author><first>Ekaterina</first><last>Shutova</last></author>
-      <author><first>Tim</first><last>Van de Cruys</last></author>
+      <author id="tim-van-de-cruys"><first>Tim</first><last>Van de Cruys</last></author>
       <author><first>Anna</first><last>Korhonen</last></author>
       <pages>1121–1130</pages>
       <url hash="a98d97e1">C12-2109</url>
@@ -2842,7 +2842,7 @@
       <author><first>Yvonne</first><last>Skalban</last></author>
       <author><first>Le An</first><last>Ha</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <pages>1151–1160</pages>
       <url hash="6e971525">C12-2112</url>
       <bibkey>skalban-etal-2012-automatic</bibkey>
@@ -2851,22 +2851,22 @@
       <title>A More Cohesive Summarizer</title>
       <author><first>Christian</first><last>Smith</last></author>
       <author><first>Henrik</first><last>Danielsson</last></author>
-      <author><first>Arne</first><last>Jönsson</last></author>
+      <author id="arne-jonsson"><first>Arne</first><last>Jönsson</last></author>
       <pages>1161–1170</pages>
       <url hash="0d7411f5">C12-2113</url>
       <bibkey>smith-etal-2012-cohesive</bibkey>
     </paper>
     <paper id="114">
       <title>Robust Learning in Random Subspaces: Equipping <fixed-case>NLP</fixed-case> for <fixed-case>OOV</fixed-case> Effects</title>
-      <author><first>Anders</first><last>Søgaard</last></author>
-      <author><first>Anders</first><last>Johannsen</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-johannsen"><first>Anders</first><last>Johannsen</last></author>
       <pages>1171–1180</pages>
       <url hash="fa0f0e13">C12-2114</url>
       <bibkey>sogaard-johannsen-2012-robust</bibkey>
     </paper>
     <paper id="115">
       <title>An Empirical Etudy of Non-Lexical Extensions to Delexicalized Transfer</title>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <author><first>Julie</first><last>Wulff</last></author>
       <pages>1181–1190</pages>
       <url hash="b5f69da2">C12-2115</url>
@@ -2877,7 +2877,7 @@
       <author><first>Yan</first><last>Song</last></author>
       <author><first>Prescott</first><last>Klassen</last></author>
       <author><first>Fei</first><last>Xia</last></author>
-      <author><first>Chunyu</first><last>Kit</last></author>
+      <author id="chunyu-kit"><first>Chunyu</first><last>Kit</last></author>
       <pages>1191–1200</pages>
       <url hash="8e2c6691">C12-2116</url>
       <bibkey>song-etal-2012-entropy</bibkey>
@@ -2894,7 +2894,7 @@
     <paper id="118">
       <title>Acquiring and Generalizing Causal Inference Rules from Deverbal Noun Constructions</title>
       <author><first>Shohei</first><last>Tanaka</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Mitsuru</first><last>Ishizuka</last></author>
       <pages>1209–1218</pages>
       <url hash="e70510a8">C12-2118</url>
@@ -2903,7 +2903,7 @@
     <paper id="119">
       <title>Advertising Legality Recognition</title>
       <author><first>Yi-jie</first><last>Tang</last></author>
-      <author><first>Cong-kai</first><last>Lin</last></author>
+      <author id="cong-kai-lin"><first>Cong-kai</first><last>Lin</last></author>
       <author><first>Hsin-Hsi</first><last>Chen</last></author>
       <pages>1219–1228</pages>
       <url hash="d94327a8">C12-2119</url>
@@ -2913,7 +2913,7 @@
       <title>A Joint Phrasal and Dependency Model for Paraphrase Alignment</title>
       <author><first>Kapil</first><last>Thadani</last></author>
       <author><first>Scott</first><last>Martin</last></author>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <pages>1229–1238</pages>
       <url hash="4e45f641">C12-2120</url>
       <bibkey>thadani-etal-2012-joint</bibkey>
@@ -2931,7 +2931,7 @@
       <author><first>Zhaopeng</first><last>Tu</last></author>
       <author id="yang-liu-ict"><first>Yang</first><last>Liu</last></author>
       <author><first>Yifan</first><last>He</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <author><first>Qun</first><last>Liu</last></author>
       <author><first>Shouxun</first><last>Lin</last></author>
       <pages>1249–1260</pages>
@@ -2948,9 +2948,9 @@
     </paper>
     <paper id="124">
       <title>Automatic Extraction of Polar Adjectives for the Creation of Polarity Lexicons</title>
-      <author><first>Silvia</first><last>Vázquez</last></author>
-      <author><first>Muntsa</first><last>Padró</last></author>
-      <author><first>Núria</first><last>Bel</last></author>
+      <author id="silvia-vazquez"><first>Silvia</first><last>Vázquez</last></author>
+      <author id="muntsa-padro"><first>Muntsa</first><last>Padró</last></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last></author>
       <author><first>Julio</first><last>Gonzalo</last></author>
       <pages>1271–1280</pages>
       <url hash="f0caebbd">C12-2124</url>
@@ -3000,8 +3000,8 @@
     <paper id="130">
       <title>A Unified Framework for Discourse Argument Identification via Shallow Semantic Parsing</title>
       <author><first>Fan</first><last>Xu</last></author>
-      <author><first>Qiaoming</first><last>Zhu</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>1331–1340</pages>
       <url hash="ebbb21c4">C12-2130</url>
       <bibkey>xu-etal-2012-unified</bibkey>
@@ -3063,7 +3063,7 @@
     <paper id="137">
       <title><fixed-case>C</fixed-case>hinese Word Sense Disambiguation based on Context Expansion</title>
       <author><last>Yang</last><first>Zhizhuo</first></author>
-      <author><last>Huang</last><first>Heyan</first></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last></author>
       <pages>1401–1408</pages>
       <url hash="5f43762f">C12-2137</url>
       <bibkey>yang-huang-2012-chinese</bibkey>
@@ -3074,7 +3074,7 @@
       <author><first>Wei</first><last>Gao</last></author>
       <author><first>Binyang</first><last>Li</last></author>
       <author><first>Zhongyu</first><last>Wei</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <pages>1409–1418</pages>
       <url hash="126abddb">C12-2138</url>
       <bibkey>zhou-etal-2012-cross</bibkey>
@@ -3085,7 +3085,7 @@
       <booktitle>Proceedings of <fixed-case>COLING</fixed-case> 2012: Demonstration Papers</booktitle>
       <url hash="9c0a779c">C12-3</url>
       <editor><first>Martin</first><last>Kay</last></editor>
-      <editor><first>Christian</first><last>Boitet</last></editor>
+      <editor id="christian-boitet"><first>Christian</first><last>Boitet</last></editor>
       <publisher>The COLING 2012 Organizing Committee</publisher>
       <address>Mumbai, India</address>
       <month>December</month>
@@ -3115,7 +3115,7 @@
       <title>Keyphrase Extraction in Scientific Articles: A Supervised Approach</title>
       <author><first>Pinaki</first><last>Bhaskar</last></author>
       <author><first>Kishorjit</first><last>Nongmeikapam</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>17–24</pages>
       <url hash="a47acefe">C12-3003</url>
       <bibkey>bhaskar-etal-2012-keyphrase</bibkey>
@@ -3146,8 +3146,8 @@
     </paper>
     <paper id="7">
       <title>Word Root Finder: a Morphological Segmentor Based on <fixed-case>CRF</fixed-case></title>
-      <author><first>Joseph Z</first><last>Chang</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="joseph-z-chang"><first>Joseph Z</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>51–58</pages>
       <url hash="d982f2a8">C12-3007</url>
       <bibkey>chang-chang-2012-word</bibkey>
@@ -3172,15 +3172,15 @@
     <paper id="10">
       <title><fixed-case>D</fixed-case>om<fixed-case>E</fixed-case>x: Extraction of Sentiment Lexicons for Domains and Meta-Domains</title>
       <author><first>Ilia</first><last>Chetviorkin</last></author>
-      <author><first>Natalia</first><last>Loukachevitch</last></author>
+      <author id="natalia-loukachevitch"><first>Natalia</first><last>Loukachevitch</last></author>
       <pages>77–86</pages>
       <url hash="b2fd2277">C12-3010</url>
       <bibkey>chetviorkin-loukachevitch-2012-domex</bibkey>
     </paper>
     <paper id="11">
       <title>On the <fixed-case>R</fixed-case>omanian Rhyme Detection</title>
-      <author><first>Alina</first><last>Ciobanu</last></author>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="alina-maria-ciobanu"><first>Alina</first><last>Ciobanu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <pages>87–94</pages>
       <url hash="40c75f53">C12-3011</url>
       <bibkey>ciobanu-dinu-2012-romanian</bibkey>
@@ -3188,7 +3188,7 @@
     <paper id="12">
       <title>Hierarchical Dialogue Policy Learning using Flexible State Transitions and Linear Function Approximation</title>
       <author><first>Heriberto</first><last>Cuayáhuitl</last></author>
-      <author><first>Ivana</first><last>Kruijff-Korbayová</last></author>
+      <author id="ivana-kruijff-korbayova"><first>Ivana</first><last>Kruijff-Korbayová</last></author>
       <author><first>Nina</first><last>Dethlefs</last></author>
       <pages>95–102</pages>
       <url hash="b4cddd93">C12-3012</url>
@@ -3197,8 +3197,8 @@
     <paper id="13">
       <title>Automated Paradigm Selection for <fixed-case>FSA</fixed-case> based <fixed-case>K</fixed-case>onkani Verb Morphological Analyzer</title>
       <author><first>Shilpa</first><last>Desai</last></author>
-      <author><first>Jyoti</first><last>Pawar</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="jyoti-pawar"><first>Jyoti</first><last>Pawar</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>103–110</pages>
       <url hash="e0a9f303">C12-3013</url>
       <bibkey>desai-etal-2012-automated</bibkey>
@@ -3214,16 +3214,16 @@
     </paper>
     <paper id="15">
       <title>Dealing with the Grey Sheep of the <fixed-case>R</fixed-case>omanian Gender System, the Neuter</title>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <author><first>Vlad</first><last>Niculae</last></author>
-      <author><first>Maria</first><last>Sulea</last></author>
+      <author id="octavia-maria-sulea"><first>Maria</first><last>Sulea</last></author>
       <pages>119–124</pages>
       <url hash="7a155d8d">C12-3015</url>
       <bibkey>dinu-etal-2012-dealing</bibkey>
     </paper>
     <paper id="16">
       <title>Authorial Studies using Ranked Lexical Features</title>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <author><first>Sergiu</first><last>Nisioi</last></author>
       <pages>125–130</pages>
       <url hash="bfa7ce52">C12-3016</url>
@@ -3232,8 +3232,8 @@
     <paper id="17">
       <title><fixed-case>S</fixed-case>cien<fixed-case>Q</fixed-case>uest: a Treebank Exploitation Tool for non <fixed-case>NLP</fixed-case>-Specialists</title>
       <author><first>Achille</first><last>Falaise</last></author>
-      <author><first>Olivier</first><last>Kraif</last></author>
-      <author><first>Agnès</first><last>Tutin</last></author>
+      <author id="olivier-kraif"><first>Olivier</first><last>Kraif</last></author>
+      <author id="agnes-tutin"><first>Agnès</first><last>Tutin</last></author>
       <author><first>David</first><last>Rouquet</last></author>
       <pages>131–140</pages>
       <url hash="f7563b7b">C12-3017</url>
@@ -3243,7 +3243,7 @@
       <title>An In-Context and Collaborative Software Localisation Model</title>
       <author><first>Amel</first><last>Fraisse</last></author>
       <author><first>Christian</first><last>Boitet</last></author>
-      <author><first>Valérie</first><last>Bellynck</last></author>
+      <author id="valerie-bellynck"><first>Valérie</first><last>Bellynck</last></author>
       <pages>141–146</pages>
       <url hash="75cc74cc">C12-3018</url>
       <bibkey>fraisse-etal-2012-context</bibkey>
@@ -3296,7 +3296,7 @@
     <paper id="24">
       <title>Automatic <fixed-case>P</fixed-case>unjabi Text Extractive Summarization System</title>
       <author><first>Vishal</first><last>Gupta</last></author>
-      <author><first>Gurpreet</first><last>Lehal</last></author>
+      <author id="gurpreet-singh-lehal"><first>Gurpreet</first><last>Lehal</last></author>
       <pages>191–198</pages>
       <url hash="e146d9f2">C12-3024</url>
       <bibkey>gupta-lehal-2012-automatic</bibkey>
@@ -3304,7 +3304,7 @@
     <paper id="25">
       <title>Complete Pre Processing Phase of <fixed-case>P</fixed-case>unjabi Text Extractive Summarization System</title>
       <author><first>Vishal</first><last>Gupta</last></author>
-      <author><first>Gurpreet</first><last>Lehal</last></author>
+      <author id="gurpreet-singh-lehal"><first>Gurpreet</first><last>Lehal</last></author>
       <pages>199–206</pages>
       <url hash="2ef807d7">C12-3025</url>
       <bibkey>gupta-lehal-2012-complete</bibkey>
@@ -3337,7 +3337,7 @@
     </paper>
     <paper id="29">
       <title>Modeling <fixed-case>P</fixed-case>ollyanna Phenomena in <fixed-case>C</fixed-case>hinese Sentiment Analysis</title>
-      <author><first>Ting-Hao</first><last>Huang</last></author>
+      <author id="ting-hao-huang"><first>Ting-Hao</first><last>Huang</last></author>
       <author><first>Ho-Cheng</first><last>Yu</last></author>
       <author><first>Hsin-Hsi</first><last>Chen</last></author>
       <pages>231–238</pages>
@@ -3349,7 +3349,7 @@
       <author><first>Salil</first><last>Joshi</last></author>
       <author><first>Arindam</first><last>Chatterjee</last></author>
       <author><first>Arun Karthikeyan</first><last>Karra</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>239–246</pages>
       <url hash="622d0ab2">C12-3030</url>
       <bibkey>joshi-etal-2012-eating</bibkey>
@@ -3357,8 +3357,8 @@
     <paper id="31">
       <title><fixed-case>I</fixed-case> Can Sense It: a Comprehensive Online System for <fixed-case>WSD</fixed-case></title>
       <author><first>Salil</first><last>Joshi</last></author>
-      <author><first>Mitesh M</first><last>Khapra</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh M</first><last>Khapra</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>247–254</pages>
       <url hash="06e822dd">C12-3031</url>
       <bibkey>joshi-etal-2012-sense</bibkey>
@@ -3367,7 +3367,7 @@
       <title>Collaborative Computer-Assisted Translation Applied to Pedagogical Documents and Literary Works</title>
       <author><first>Ruslan</first><last>Kalitvianski</last></author>
       <author><first>Christian</first><last>Boitet</last></author>
-      <author><first>Valérie</first><last>Bellynck</last></author>
+      <author id="valerie-bellynck"><first>Valérie</first><last>Bellynck</last></author>
       <pages>255–260</pages>
       <url hash="8c54b938">C12-3032</url>
       <bibkey>kalitvianski-etal-2012-collaborative</bibkey>
@@ -3377,7 +3377,7 @@
       <author><first>Diptesh</first><last>Kanojia</last></author>
       <author><first>Arindam</first><last>Chatterjee</last></author>
       <author><first>Salil</first><last>Joshi</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>261–266</pages>
       <url hash="132046c6">C12-3033</url>
       <bibkey>kanojia-etal-2012-discrimination</bibkey>
@@ -3386,7 +3386,7 @@
       <title>Rule Based <fixed-case>U</fixed-case>rdu Stemmer</title>
       <author><first>Rohit</first><last>Kansal</last></author>
       <author><first>Vishal</first><last>Goyal</last></author>
-      <author><first>Gurpreet Singh</first><last>Lehal</last></author>
+      <author id="gurpreet-singh-lehal"><first>Gurpreet Singh</first><last>Lehal</last></author>
       <pages>267–276</pages>
       <url hash="4c90af85">C12-3034</url>
       <bibkey>kansal-etal-2012-rule</bibkey>
@@ -3416,18 +3416,18 @@
     </paper>
     <paper id="38">
       <title>Open Information Extraction for <fixed-case>SOV</fixed-case> Language Based on Entity-Predicate Pair Detection</title>
-      <author><first>Woong-Ki</first><last>Lee</last></author>
-      <author><first>Yeon-Su</first><last>Lee</last></author>
+      <author id="woong-ki-lee"><first>Woong-Ki</first><last>Lee</last></author>
+      <author id="yeon-su-lee"><first>Yeon-Su</first><last>Lee</last></author>
       <author><first>Hyoung-Gyu</first><last>Lee</last></author>
-      <author><first>Won-Ho</first><last>Ryu</last></author>
-      <author><first>Hae-Chang</first><last>Rim</last></author>
+      <author id="won-ho-ryu"><first>Won-Ho</first><last>Ryu</last></author>
+      <author id="hae-chang-rim"><first>Hae-Chang</first><last>Rim</last></author>
       <pages>305–312</pages>
       <url hash="a0d00edf">C12-3038</url>
       <bibkey>lee-etal-2012-open</bibkey>
     </paper>
     <paper id="39">
       <title>An Omni-Font <fixed-case>G</fixed-case>urmukhi to <fixed-case>S</fixed-case>hahmukhi Transliteration System</title>
-      <author><first>Gurpreet Singh</first><last>Lehal</last></author>
+      <author id="gurpreet-singh-lehal"><first>Gurpreet Singh</first><last>Lehal</last></author>
       <author><first>Tejinder Singh</first><last>Saini</last></author>
       <author><first>Savleen Kaur</first><last>Chowdhary</last></author>
       <pages>313–320</pages>
@@ -3446,11 +3446,11 @@
     </paper>
     <paper id="41">
       <title>Recognition of Named-Event Passages in News Articles</title>
-      <author><first>Luis</first><last>Marujo</last></author>
+      <author id="luis-marujo"><first>Luis</first><last>Marujo</last></author>
       <author><first>Wang</first><last>Ling</last></author>
       <author><first>Anatole</first><last>Gershman</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
-      <author><first>João P.</first><last>Neto</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
+      <author id="joao-p-neto"><first>João P.</first><last>Neto</last></author>
       <author><first>David</first><last>Matos</last></author>
       <pages>329–336</pages>
       <url hash="2e870574">C12-3041</url>
@@ -3458,8 +3458,8 @@
     </paper>
     <paper id="42">
       <title>Nonparametric Model for <fixed-case>I</fixed-case>nupiaq Word Segmentation</title>
-      <author><first>Thuy Linh</first><last>Nguyen</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="thuylinh-nguyen"><first>Thuy Linh</first><last>Nguyen</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>337–344</pages>
       <url hash="23abb8df">C12-3042</url>
       <bibkey>nguyen-vogel-2012-nonparametric</bibkey>
@@ -3474,7 +3474,7 @@
     </paper>
     <paper id="44">
       <title><fixed-case>O</fixed-case>pen<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et-<fixed-case>PT</fixed-case>: An Open <fixed-case>B</fixed-case>razilian <fixed-case>W</fixed-case>ordnet for Reasoning</title>
-      <author><first>Valeria</first><last>de Paiva</last></author>
+      <author id="valeria-de-paiva"><first>Valeria</first><last>de Paiva</last></author>
       <author><first>Alexandre</first><last>Rademaker</last></author>
       <author><first>Gerard</first><last>de Melo</last></author>
       <pages>353–360</pages>
@@ -3523,8 +3523,8 @@
     </paper>
     <paper id="50">
       <title>Open source multi-platform <fixed-case>N</fixed-case>oo<fixed-case>J</fixed-case> for <fixed-case>NLP</fixed-case></title>
-      <author><first>Max</first><last>Silberztein</last></author>
-      <author><first>Tamás</first><last>Váradi</last></author>
+      <author id="max-silberztein"><first>Max</first><last>Silberztein</last></author>
+      <author id="tamas-varadi"><first>Tamás</first><last>Váradi</last></author>
       <author><first>Marko</first><last>Tadić</last></author>
       <pages>401–408</pages>
       <url hash="dbb8f033">C12-3050</url>
@@ -3533,7 +3533,7 @@
     <paper id="51">
       <title><fixed-case>P</fixed-case>unjabi Text-To-Speech Synthesis System</title>
       <author><first>Parminder</first><last>Singh</last></author>
-      <author><first>Gurpreet Singh</first><last>Lehal</last></author>
+      <author id="gurpreet-singh-lehal"><first>Gurpreet Singh</first><last>Lehal</last></author>
       <pages>409–416</pages>
       <url hash="efa1bbb3">C12-3051</url>
       <bibkey>singh-lehal-2012-punjabi</bibkey>
@@ -3556,7 +3556,7 @@
     <paper id="54">
       <title><fixed-case>C</fixed-case>entral and <fixed-case>S</fixed-case>outh-<fixed-case>E</fixed-case>ast <fixed-case>E</fixed-case>uropean Resources in <fixed-case>META</fixed-case>-<fixed-case>SHARE</fixed-case></title>
       <author><first>Marko</first><last>Tadić</last></author>
-      <author><first>Tamás</first><last>Váradi</last></author>
+      <author id="tamas-varadi"><first>Tamás</first><last>Váradi</last></author>
       <pages>431–438</pages>
       <url hash="afeba9f1">C12-3054</url>
       <bibkey>tadic-varadi-2012-central</bibkey>
@@ -3572,7 +3572,7 @@
     </paper>
     <paper id="56">
       <title>Visualization on Financial Terms via Risk Ranking from Financial Reports</title>
-      <author><first>Ming-Feng</first><last>Tsai</last></author>
+      <author id="ming-feng-tsai"><first>Ming-Feng</first><last>Tsai</last></author>
       <author><first>Chuan-Ju</first><last>Wang</last></author>
       <pages>447–452</pages>
       <url hash="9c6e54dd">C12-3056</url>
@@ -3582,7 +3582,7 @@
       <title><fixed-case>UNL</fixed-case> Explorer</title>
       <author><first>Hiroshi</first><last>Uchida</last></author>
       <author><first>Meiying</first><last>Zhu</last></author>
-      <author><first>Md. Anwarus Salam</first><last>Khan</last></author>
+      <author id="md-anwarus-salam-khan"><first>Md. Anwarus Salam</first><last>Khan</last></author>
       <pages>453–458</pages>
       <url hash="c1a1d815">C12-3057</url>
       <bibkey>uchida-etal-2012-unl</bibkey>
@@ -3598,8 +3598,8 @@
     <paper id="59">
       <title>Generating Questions from Web Community Contents</title>
       <author><first>Baoxun</first><last>Wang</last></author>
-      <author><first>Bingquan</first><last>Liu</last></author>
-      <author><first>Chengjie</first><last>Sun</last></author>
+      <author id="bingquan-liu"><first>Bingquan</first><last>Liu</last></author>
+      <author id="cheng-jie-sun"><first>Chengjie</first><last>Sun</last></author>
       <author><first>Xiaolong</first><last>Wang</last></author>
       <author><first>Deyuan</first><last>Zhang</last></author>
       <pages>467–474</pages>
@@ -3608,10 +3608,10 @@
     </paper>
     <paper id="60">
       <title>Demo of i<fixed-case>MAG</fixed-case> Possibilities: <fixed-case>MT</fixed-case>-postediting, Translation Quality Evaluation, Parallel Corpus Production</title>
-      <author><first>Ling Xiao</first><last>Wang</last></author>
-      <author><first>Ying</first><last>Zhang</last></author>
+      <author id="ling-xiao-wang"><first>Ling Xiao</first><last>Wang</last></author>
+      <author id="ying-zhang"><first>Ying</first><last>Zhang</last></author>
       <author><first>Christian</first><last>Boitet</last></author>
-      <author><first>Valerie</first><last>Bellynck</last></author>
+      <author id="valerie-bellynck"><first>Valerie</first><last>Bellynck</last></author>
       <pages>475–482</pages>
       <url hash="6a547e6a">C12-3060</url>
       <bibkey>wang-etal-2012-demo</bibkey>
@@ -3625,7 +3625,7 @@
       <author><first>Markus</first><last>Freitag</last></author>
       <author><first>Jan-Thorsten</first><last>Peter</last></author>
       <author><first>Saab</first><last>Mansour</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>483–492</pages>
       <url hash="c9b8f38a">C12-3061</url>
       <bibkey>wuebker-etal-2012-jane</bibkey>
@@ -3648,7 +3648,7 @@
     </paper>
     <paper id="64">
       <title>Developing and Evaluating a Computer-Assisted Near-Synonym Learning System</title>
-      <author><first>Liang-Chih</first><last>Yu</last></author>
+      <author id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last></author>
       <author><first>Kai-Hsiang</first><last>Hsu</last></author>
       <pages>509–516</pages>
       <url hash="849b9539">C12-3064</url>
@@ -3664,7 +3664,7 @@
     </paper>
     <paper id="66">
       <title><fixed-case>SMR</fixed-case>-<fixed-case>C</fixed-case>mp: Square-Mean-Root Approach to Comparison of Monolingual Contrastive Corpora</title>
-      <author><first>HuaRui</first><last>Zhang</last></author>
+      <author id="huarui-zhang"><first>HuaRui</first><last>Zhang</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <author><first>Francesca</first><last>Quattri</last></author>
       <pages>527–534</pages>
diff --git a/data/xml/C14.xml b/data/xml/C14.xml
index b3dc3ceee8..f0efe196cc 100644
--- a/data/xml/C14.xml
+++ b/data/xml/C14.xml
@@ -4,8 +4,8 @@
     <meta>
       <booktitle>Proceedings of <fixed-case>COLING</fixed-case> 2014, the 25th International Conference on Computational Linguistics: Technical Papers</booktitle>
       <url hash="3bf4f103">C14-1</url>
-      <editor><first>Junichi</first><last>Tsujii</last></editor>
-      <editor><first>Jan</first><last>Hajic</last></editor>
+      <editor id="junichi-tsujii"><first>Junichi</first><last>Tsujii</last></editor>
+      <editor id="jan-hajic"><first>Jan</first><last>Hajic</last></editor>
       <publisher>Dublin City University and Association for Computational Linguistics</publisher>
       <address>Dublin, Ireland</address>
       <month>August</month>
@@ -18,7 +18,7 @@
     </frontmatter>
     <paper id="1">
       <title>Learning from 26 Languages: Program Management and Science in the <fixed-case>B</fixed-case>abel Program</title>
-      <author><first>Mary</first><last>Harper</last></author>
+      <author id="mary-harper"><first>Mary</first><last>Harper</last></author>
       <pages>1</pages>
       <url hash="548e4dba">C14-1001</url>
       <bibkey>harper-2014-learning</bibkey>
@@ -34,7 +34,7 @@
     <paper id="3">
       <title>Cross-lingual Coreference Resolution of Pronouns</title>
       <author><first>Michal</first><last>Novák</last></author>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
       <pages>14–24</pages>
       <url hash="87ee789e">C14-1003</url>
       <bibkey>novak-zabokrtsky-2014-cross</bibkey>
@@ -51,7 +51,7 @@
     <paper id="5">
       <title>Hierarchical Topical Segmentation with Affinity Propagation</title>
       <author><first>Anna</first><last>Kazantseva</last></author>
-      <author><first>Stan</first><last>Szpakowicz</last></author>
+      <author id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></author>
       <pages>37–47</pages>
       <url hash="c470f80e">C14-1005</url>
       <bibkey>kazantseva-szpakowicz-2014-hierarchical</bibkey>
@@ -75,8 +75,8 @@
     </paper>
     <paper id="8">
       <title>Deep Convolutional Neural Networks for Sentiment Analysis of Short Texts</title>
-      <author><first>Cícero</first><last>dos Santos</last></author>
-      <author><first>Maíra</first><last>Gatti</last></author>
+      <author id="cicero-dos-santos"><first>Cícero</first><last>dos Santos</last></author>
+      <author id="maira-gatti"><first>Maíra</first><last>Gatti</last></author>
       <pages>69–78</pages>
       <url hash="30c8cf6b">C14-1008</url>
       <bibkey>dos-santos-gatti-2014-deep</bibkey>
@@ -84,7 +84,7 @@
     <paper id="9">
       <title>Joint Inference and Disambiguation of Implicit Sentiments via Implicature Constraints</title>
       <author><first>Lingjia</first><last>Deng</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <author><first>Yoonjung</first><last>Choi</last></author>
       <pages>79–88</pages>
       <url hash="dcfbcdb1">C14-1009</url>
@@ -104,8 +104,8 @@
       <title>Multi-Objective Search Results Clustering</title>
       <author><first>Sudipta</first><last>Acharya</last></author>
       <author><first>Sriparna</first><last>Saha</last></author>
-      <author><first>Jose G.</first><last>Moreno</last></author>
-      <author><first>Gaël</first><last>Dias</last></author>
+      <author id="jose-g-moreno"><first>Jose G.</first><last>Moreno</last></author>
+      <author id="gael-dias"><first>Gaël</first><last>Dias</last></author>
       <pages>99–108</pages>
       <url hash="d7f39793">C14-1011</url>
       <bibkey>acharya-etal-2014-multi</bibkey>
@@ -162,7 +162,7 @@
     </paper>
     <paper id="17">
       <title>Learning Task-specific Bilexical Embeddings</title>
-      <author><first>Pranava Swaroop</first><last>Madhyastha</last></author>
+      <author id="pranava-swaroop-madhyastha"><first>Pranava Swaroop</first><last>Madhyastha</last></author>
       <author><first>Xavier</first><last>Carreras</last></author>
       <author><first>Ariadna</first><last>Quattoni</last></author>
       <pages>161–171</pages>
@@ -183,7 +183,7 @@
     <paper id="19">
       <title>Political Tendency Identification in <fixed-case>T</fixed-case>witter using Sentiment Analysis Techniques</title>
       <author><first>Ferran</first><last>Pla</last></author>
-      <author><first>Lluís-F.</first><last>Hurtado</last></author>
+      <author id="lluis-f-hurtado"><first>Lluís-F.</first><last>Hurtado</last></author>
       <pages>183–192</pages>
       <url hash="df912dd2">C14-1019</url>
       <bibkey>pla-hurtado-2014-political</bibkey>
@@ -191,12 +191,12 @@
     <paper id="20">
       <title>A Study of using Syntactic and Semantic Structures for Concept Segmentation and Labeling</title>
       <author><first>Iman</first><last>Saleh</last></author>
-      <author><first>Scott</first><last>Cyphers</last></author>
+      <author id="scott-cyphers"><first>Scott</first><last>Cyphers</last></author>
       <author><first>Jim</first><last>Glass</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <author><first>Alessandro</first><last>Moschitti</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>193–202</pages>
       <url hash="22cfe64a">C14-1020</url>
       <bibkey>saleh-etal-2014-study</bibkey>
@@ -206,7 +206,7 @@
       <author><first>Qi</first><last>Zhang</last></author>
       <author><first>Yeyun</first><last>Gong</last></author>
       <author><first>Xuyang</first><last>Sun</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>203–212</pages>
       <url hash="3e14e997">C14-1021</url>
       <bibkey>zhang-etal-2014-time</bibkey>
@@ -278,7 +278,7 @@
     <paper id="29">
       <title>Unsupervised Training Set Generation for Automatic Acquisition of Technical Terminology in Patents</title>
       <author><first>Alex</first><last>Judea</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <author><first>Soeren</first><last>Bruegmann</last></author>
       <pages>290–300</pages>
       <url hash="977c3f96">C14-1029</url>
@@ -287,7 +287,7 @@
     <paper id="30">
       <title>A Data Driven Approach for Person Name Disambiguation in Web Search Results</title>
       <author><first>Agustín D.</first><last>Delgado</last></author>
-      <author><first>Raquel</first><last>Martínez</last></author>
+      <author id="raquel-martinez"><first>Raquel</first><last>Martínez</last></author>
       <author><first>Víctor</first><last>Fresno</last></author>
       <author><first>Soto</first><last>Montalvo</last></author>
       <pages>301–310</pages>
@@ -297,15 +297,15 @@
     <paper id="31">
       <title>Picking the Amateur’s Mind - Predicting Chess Player Strength from Game Annotations</title>
       <author><first>Christian</first><last>Scheible</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>311–321</pages>
       <url hash="f2bb1cc5">C14-1031</url>
       <bibkey>scheible-schutze-2014-picking</bibkey>
     </paper>
     <paper id="32">
       <title><fixed-case>Z</fixed-case>ipf’s Law and Statistical Data on <fixed-case>M</fixed-case>odern <fixed-case>T</fixed-case>ibetan</title>
-      <author><first>Huidan</first><last>Liu</last></author>
-      <author><first>Minghua</first><last>Nuo</last></author>
+      <author id="huidan-liu"><first>Huidan</first><last>Liu</last></author>
+      <author id="minghua-nuo"><first>Minghua</first><last>Nuo</last></author>
       <author><first>Jian</first><last>Wu</last></author>
       <pages>322–333</pages>
       <url hash="83c0c436">C14-1032</url>
@@ -315,7 +315,7 @@
       <title>Simple or Complex? Assessing the readability of <fixed-case>B</fixed-case>asque Texts</title>
       <author><first>Itziar</first><last>Gonzalez-Dios</last></author>
       <author><first>María Jesús</first><last>Aranzabe</last></author>
-      <author><first>Arantza</first><last>Díaz de Ilarraza</last></author>
+      <author id="arantza-diaz-de-ilarraza"><first>Arantza</first><last>Díaz de Ilarraza</last></author>
       <author><first>Haritz</first><last>Salaberri</last></author>
       <pages>334–344</pages>
       <url hash="4196842d">C14-1033</url>
@@ -332,7 +332,7 @@
     </paper>
     <paper id="35">
       <title>Inducing Word Sense with Automatically Learned Hidden Concepts</title>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <author><first>Wenzhe</first><last>Pei</last></author>
       <author><first>Miaohong</first><last>Chen</last></author>
       <pages>355–364</pages>
@@ -358,7 +358,7 @@
       <title>Author Verification Using Common N-Gram Profiles of Text Documents</title>
       <author><first>Magdalena</first><last>Jankowska</last></author>
       <author><first>Evangelos</first><last>Milios</last></author>
-      <author><first>Vlado</first><last>Kešelj</last></author>
+      <author id="vlado-keselj"><first>Vlado</first><last>Kešelj</last></author>
       <pages>387–397</pages>
       <url hash="80eb891c">C14-1038</url>
       <bibkey>jankowska-etal-2014-author</bibkey>
@@ -366,7 +366,7 @@
     <paper id="39">
       <title>Dynamically Integrating Cross-Domain Translation Memory into Phrase-Based Machine Translation during Decoding</title>
       <author><first>Kun</first><last>Wang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <author><first>Keh-Yih</first><last>Su</last></author>
       <pages>398–408</pages>
       <url hash="763bb5a9">C14-1039</url>
@@ -374,9 +374,9 @@
     </paper>
     <paper id="40">
       <title>Machine Translation Quality Estimation Across Domains</title>
-      <author><first>José G.</first><last>C. de Souza</last></author>
+      <author id="jose-g-c-de-souza"><first>José G.</first><last>C. de Souza</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <pages>409–420</pages>
       <url hash="4c766c5b">C14-1040</url>
       <bibkey>c-de-souza-etal-2014-machine</bibkey>
@@ -386,7 +386,7 @@
       <author><first>Nadir</first><last>Durrani</last></author>
       <author><first>Philipp</first><last>Koehn</last></author>
       <author><first>Helmut</first><last>Schmid</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>421–432</pages>
       <url hash="9958a996">C14-1041</url>
       <bibkey>durrani-etal-2014-investigating</bibkey>
@@ -401,7 +401,7 @@
     <paper id="43">
       <title>Fourteen Light Tasks for comparing Analogical and Phrase-based Machine Translation</title>
       <author><first>Rafik</first><last>Rhouma</last></author>
-      <author><first>Phillippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Phillippe</first><last>Langlais</last></author>
       <pages>444–454</pages>
       <url hash="c13a6169">C14-1043</url>
       <bibkey>rhouma-langlais-2014-fourteen</bibkey>
@@ -419,9 +419,9 @@
     </paper>
     <paper id="45">
       <title>The annotation of the Central Unit in Rhetorical Structure Trees: A Key Step in Annotating Rhetorical Relations</title>
-      <author><first>Mikel</first><last>Iruskieta</last></author>
-      <author><first>Arantza</first><last>Díaz de Ilarraza</last></author>
-      <author><first>Mikel</first><last>Lersundi</last></author>
+      <author id="mikel-iruskieta"><first>Mikel</first><last>Iruskieta</last></author>
+      <author id="arantza-diaz-de-ilarraza"><first>Arantza</first><last>Díaz de Ilarraza</last></author>
+      <author id="mikel-lersundi"><first>Mikel</first><last>Lersundi</last></author>
       <pages>466–475</pages>
       <url hash="111f579b">C14-1045</url>
       <bibkey>iruskieta-etal-2014-annotation</bibkey>
@@ -429,7 +429,7 @@
     <paper id="46">
       <title>Measuring Lexical Cohesion: Beyond Word Repetition</title>
       <author><first>Anna</first><last>Kazantseva</last></author>
-      <author><first>Stan</first><last>Szpakowicz</last></author>
+      <author id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></author>
       <pages>476–485</pages>
       <url hash="5a5cc1ef">C14-1046</url>
       <bibkey>kazantseva-szpakowicz-2014-measuring</bibkey>
@@ -437,8 +437,8 @@
     <paper id="47">
       <title>Fast Tweet Retrieval with Compact Binary Codes</title>
       <author><first>Weiwei</first><last>Guo</last></author>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author><first>Wei</first><last>Liu</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>486–496</pages>
       <url hash="ac2bb2d9">C14-1047</url>
       <bibkey>guo-etal-2014-fast</bibkey>
@@ -457,7 +457,7 @@
       <title>Using unmarked contexts in nominal lexical semantic classification</title>
       <author><first>Lauren</first><last>Romeo</last></author>
       <author><first>Sara</first><last>Mendes</last></author>
-      <author><first>Núria</first><last>Bel</last></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last></author>
       <pages>508–519</pages>
       <url hash="b573ef4f">C14-1049</url>
       <bibkey>romeo-etal-2014-using</bibkey>
@@ -467,7 +467,7 @@
       <author><first>Zhongqing</first><last>Wang</last></author>
       <author><first>Shoushan</first><last>Li</last></author>
       <author><first>Hanxiao</first><last>Shi</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>520–529</pages>
       <url hash="f62868c9">C14-1050</url>
       <bibkey>wang-etal-2014-skill</bibkey>
@@ -484,7 +484,7 @@
     </paper>
     <paper id="52">
       <title>An <fixed-case>LR</fixed-case>-inspired generalized lexicalized phrase structure parser</title>
-      <author><first>Benoit</first><last>Crabbé</last></author>
+      <author id="benoit-crabbe"><first>Benoit</first><last>Crabbé</last></author>
       <pages>541–552</pages>
       <url hash="911c61c3">C14-1052</url>
       <bibkey>crabbe-2014-lr</bibkey>
@@ -501,8 +501,8 @@
     </paper>
     <paper id="54">
       <title>Biber Redux: Reconsidering Dimensions of Variation in <fixed-case>A</fixed-case>merican <fixed-case>E</fixed-case>nglish</title>
-      <author><first>Rebecca J.</first><last>Passonneau</last></author>
-      <author><first>Nancy</first><last>Ide</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca J.</first><last>Passonneau</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
       <author><first>Songqiao</first><last>Su</last></author>
       <author><first>Jesse</first><last>Stuart</last></author>
       <pages>565–576</pages>
@@ -521,7 +521,7 @@
     <paper id="56">
       <title>Enforcing Topic Diversity in a Document Recommender for Conversations</title>
       <author><first>Maryam</first><last>Habibi</last></author>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <pages>588–599</pages>
       <url hash="0097664c">C14-1056</url>
       <bibkey>habibi-popescu-belis-2014-enforcing</bibkey>
@@ -529,9 +529,9 @@
     <paper id="57">
       <title>Identifying Important Features for Graph Retrieval</title>
       <author><first>Zhuo</first><last>Li</last></author>
-      <author><first>Sandra</first><last>Carberry</last></author>
+      <author id="sandra-carberry"><first>Sandra</first><last>Carberry</last></author>
       <author><first>Hui</first><last>Fang</last></author>
-      <author><first>Kathleen</first><last>McCoy</last></author>
+      <author id="kathleen-f-mccoy"><first>Kathleen</first><last>McCoy</last></author>
       <pages>600–609</pages>
       <url hash="9314d2ab">C14-1057</url>
       <bibkey>li-etal-2014-identifying</bibkey>
@@ -564,7 +564,7 @@
     </paper>
     <paper id="61">
       <title>Unsupervised Coreference Resolution by Utilizing the Most Informative Relations</title>
-      <author><first>Nafise Sadat</first><last>Moosavi</last></author>
+      <author id="nafise-sadat-moosavi"><first>Nafise Sadat</first><last>Moosavi</last></author>
       <author><first>Michael</first><last>Strube</last></author>
       <pages>644–655</pages>
       <url hash="8aa73c9b">C14-1061</url>
@@ -573,7 +573,7 @@
     <paper id="62">
       <title>Knowledge Sharing via Social Login: Exploiting Microblogging Service for Warming up Social Question Answering Websites</title>
       <author><first>Yang</first><last>Xiao</last></author>
-      <author><first>Wayne Xin</first><last>Zhao</last></author>
+      <author id="wayne-xin-zhao"><first>Wayne Xin</first><last>Zhao</last></author>
       <author><first>Kun</first><last>Wang</last></author>
       <author><first>Zhen</first><last>Xiao</last></author>
       <pages>656–666</pages>
@@ -604,7 +604,7 @@
       <author><first>Yaqian</first><last>Zhou</last></author>
       <author><first>Ya</first><last>Guo</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>688–697</pages>
       <url hash="d6fee090">C14-1065</url>
       <bibkey>gong-etal-2014-generative</bibkey>
@@ -612,7 +612,7 @@
     <paper id="66">
       <title>Inducing Latent Semantic Relations for Structured Distributional Semantics</title>
       <author><first>Sujay Kumar</first><last>Jauhar</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>698–708</pages>
       <url hash="66c00a02">C14-1066</url>
       <bibkey>jauhar-hovy-2014-inducing</bibkey>
@@ -629,7 +629,7 @@
     <paper id="68">
       <title>Towards Syntax-aware Compositional Distributional Semantic Models</title>
       <author><first>Lorenzo</first><last>Ferrone</last></author>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last></author>
       <pages>721–730</pages>
       <url hash="7dcf2ce6">C14-1068</url>
       <bibkey>ferrone-zanzotto-2014-towards</bibkey>
@@ -664,15 +664,15 @@
     <paper id="72">
       <title>docrep: A lightweight and efficient document representation framework</title>
       <author><first>Tim</first><last>Dawborn</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <pages>762–771</pages>
       <url hash="45b794aa">C14-1072</url>
       <bibkey>dawborn-curran-2014-docrep</bibkey>
     </paper>
     <paper id="73">
       <title>Why Implementation Matters: Evaluation of an Open-source Constraint Grammar Parser</title>
-      <author><first>Dávid Márk</first><last>Nemeskey</last></author>
-      <author><first>Francis</first><last>Tyers</last></author>
+      <author id="david-mark-nemeskey"><first>Dávid Márk</first><last>Nemeskey</last></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last></author>
       <author><first>Mans</first><last>Hulden</last></author>
       <pages>772–780</pages>
       <url hash="660d22fe">C14-1073</url>
@@ -732,7 +732,7 @@
     </paper>
     <paper id="80">
       <title>Identifying Emotion Labels from Psychiatric Social Texts Using Independent Component Analysis</title>
-      <author><first>Liang-Chih</first><last>Yu</last></author>
+      <author id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last></author>
       <author><first>Chun-Yuan</first><last>Ho</last></author>
       <pages>837–847</pages>
       <url hash="7c8a6c8d">C14-1080</url>
@@ -767,14 +767,14 @@
       <author id="fei-liu-utdallas"><first>Fei</first><last>Liu</last></author>
       <author><first>Rohan</first><last>Ramanath</last></author>
       <author><first>Norman</first><last>Sadeh</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>884–894</pages>
       <url hash="07f06cf0">C14-1084</url>
       <bibkey>liu-etal-2014-step</bibkey>
     </paper>
     <paper id="85">
       <title>An Off-the-shelf Approach to Authorship Attribution</title>
-      <author><first>Jamal A.</first><last>Nasir</last></author>
+      <author id="jamal-a-nasir"><first>Jamal A.</first><last>Nasir</last></author>
       <author><first>Nico</first><last>Görnitz</last></author>
       <author><first>Ulf</first><last>Brefeld</last></author>
       <pages>895–904</pages>
@@ -828,7 +828,7 @@
       <title>Lexical Chaining for Measuring Discourse Coherence Quality in Test-taker Essays</title>
       <author><first>Swapna</first><last>Somasundaran</last></author>
       <author><first>Jill</first><last>Burstein</last></author>
-      <author><first>Martin</first><last>Chodorow</last></author>
+      <author id="martin-chodorow"><first>Martin</first><last>Chodorow</last></author>
       <pages>950–961</pages>
       <url hash="6e6b9787">C14-1090</url>
       <bibkey>somasundaran-etal-2014-lexical</bibkey>
@@ -847,7 +847,7 @@
       <title>A Framework for Translating <fixed-case>SMS</fixed-case> Messages</title>
       <author><first>Vivek Kumar</first><last>Rangarajan Sridhar</last></author>
       <author><first>John</first><last>Chen</last></author>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
       <author><first>Ron</first><last>Shacham</last></author>
       <pages>974–983</pages>
       <url hash="0f2e90af">C14-1092</url>
@@ -856,7 +856,7 @@
     <paper id="93">
       <title>A Hybrid Approach to Features Representation for Fine-grained <fixed-case>A</fixed-case>rabic Named Entity Recognition</title>
       <author><first>Fahd</first><last>Alotaibi</last></author>
-      <author><first>Mark</first><last>Lee</last></author>
+      <author id="mark-lee"><first>Mark</first><last>Lee</last></author>
       <pages>984–995</pages>
       <url hash="85ec8ba0">C14-1093</url>
       <bibkey>alotaibi-lee-2014-hybrid</bibkey>
@@ -864,7 +864,7 @@
     <paper id="94">
       <title>Prior-informed Distant Supervision for Temporal Evidence Classification</title>
       <author><first>Ridho</first><last>Reinanda</last></author>
-      <author><first>Maarten</first><last>de Rijke</last></author>
+      <author id="maarten-de-rijke"><first>Maarten</first><last>de Rijke</last></author>
       <pages>996–1006</pages>
       <url hash="ae0ce260">C14-1094</url>
       <bibkey>reinanda-de-rijke-2014-prior</bibkey>
@@ -892,7 +892,7 @@
       <title>Inclusive yet Selective: Supervised Distributional Hypernymy Detection</title>
       <author><first>Stephen</first><last>Roller</last></author>
       <author><first>Katrin</first><last>Erk</last></author>
-      <author><first>Gemma</first><last>Boleda</last></author>
+      <author id="gemma-boleda"><first>Gemma</first><last>Boleda</last></author>
       <pages>1025–1036</pages>
       <url hash="21c48893">C14-1097</url>
       <bibkey>roller-etal-2014-inclusive</bibkey>
@@ -910,7 +910,7 @@
     <paper id="99">
       <title>What good are ‘Nominalkomposita’ for ‘noun compounds’: Multilingual Extraction and Structure Analysis of Nominal Compositions using Linguistic Restrictors</title>
       <author><first>Patrick</first><last>Ziering</last></author>
-      <author><first>Lonneke</first><last>van der Plas</last></author>
+      <author id="lonneke-van-der-plas"><first>Lonneke</first><last>van der Plas</last></author>
       <pages>1047–1058</pages>
       <url hash="dbebfc39">C14-1099</url>
       <bibkey>ziering-van-der-plas-2014-good</bibkey>
@@ -925,16 +925,16 @@
       <author><first>Laleh</first><last>Roostapour</last></author>
       <author><first>Jordan</first><last>Bender</last></author>
       <author><first>Abhimanu</first><last>Kumar</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
       <author><first>Mandy</first><last>Simons</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <pages>1059–1070</pages>
       <url hash="b39f7848">C14-1100</url>
       <bibkey>bhatia-etal-2014-automatic</bibkey>
     </paper>
     <paper id="101">
       <title>Argument structure of adverbial derivatives in <fixed-case>R</fixed-case>ussian</title>
-      <author><first>Igor</first><last>Boguslavsky</last></author>
+      <author id="igor-boguslavsky"><first>Igor</first><last>Boguslavsky</last></author>
       <pages>1071–1080</pages>
       <url hash="d4825063">C14-1101</url>
       <bibkey>boguslavsky-2014-argument</bibkey>
@@ -942,7 +942,7 @@
     <paper id="102">
       <title>Active Learning in Noisy Conditions for Spoken Language Understanding</title>
       <author><first>Hossein</first><last>Hadian</last></author>
-      <author><first>Hossein</first><last>Sameti</last></author>
+      <author id="hossein-sameti"><first>Hossein</first><last>Sameti</last></author>
       <pages>1081–1090</pages>
       <url hash="814f3117">C14-1102</url>
       <bibkey>hadian-sameti-2014-active</bibkey>
@@ -980,7 +980,7 @@
       <author><first>Koichi</first><last>Akabe</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
       <author><first>Sakriani</first><last>Sakti</last></author>
-      <author><first>Tomoki</first><last>Toda</last></author>
+      <author id="tomoki-toda"><first>Tomoki</first><last>Toda</last></author>
       <author><first>Satoshi</first><last>Nakamura</last></author>
       <pages>1124–1132</pages>
       <url hash="1890d06a">C14-1106</url>
@@ -1002,7 +1002,7 @@
       <author><first>Dongdong</first><last>Zhang</last></author>
       <author><first>Mu</first><last>Li</last></author>
       <author><first>Ming</first><last>Zhou</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <pages>1144–1153</pages>
       <url hash="52f451b7">C14-1108</url>
       <bibkey>cao-etal-2014-lexicalized</bibkey>
@@ -1011,14 +1011,14 @@
       <title>Automatic Corpus Expansion for <fixed-case>C</fixed-case>hinese Word Segmentation by Exploiting the Redundancy of Web Information</title>
       <author><first>Xipeng</first><last>Qiu</last></author>
       <author><first>ChaoChao</first><last>Huang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>1154–1164</pages>
       <url hash="e6c7291c">C14-1109</url>
       <bibkey>qiu-etal-2014-automatic</bibkey>
     </paper>
     <paper id="110">
       <title>Fast High-Accuracy Part-of-Speech Tagging by Independent Classifiers</title>
-      <author><first>Robert</first><last>Moore</last></author>
+      <author id="robert-c-moore"><first>Robert</first><last>Moore</last></author>
       <pages>1165–1176</pages>
       <url hash="672baa7f">C14-1110</url>
       <bibkey>moore-2014-fast</bibkey>
@@ -1037,7 +1037,7 @@
       <title><fixed-case>J</fixed-case>apanese Word Reordering Integrated with Dependency Parsing</title>
       <author><first>Kazushi</first><last>Yoshida</last></author>
       <author><first>Tomohiro</first><last>Ohno</last></author>
-      <author><first>Yoshihide</first><last>Kato</last></author>
+      <author id="yoshihide-kato"><first>Yoshihide</first><last>Kato</last></author>
       <author><first>Shigeki</first><last>Matsubara</last></author>
       <pages>1186–1196</pages>
       <url hash="1ff3b83a">C14-1112</url>
@@ -1055,7 +1055,7 @@
       <title>Ranking Multidocument Event Descriptions for Building Thematic Timelines</title>
       <author><first>Kiem-Hieu</first><last>Nguyen</last></author>
       <author><first>Xavier</first><last>Tannier</last></author>
-      <author><first>Veronique</first><last>Moriceau</last></author>
+      <author id="veronique-moriceau"><first>Veronique</first><last>Moriceau</last></author>
       <pages>1208–1217</pages>
       <url hash="7e6d1720">C14-1114</url>
       <bibkey>nguyen-etal-2014-ranking</bibkey>
@@ -1066,7 +1066,7 @@
       <author><first>Subhashini</first><last>Venugopalan</last></author>
       <author><first>Sergio</first><last>Guadarrama</last></author>
       <author><first>Kate</first><last>Saenko</last></author>
-      <author><first>Raymond</first><last>Mooney</last></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last></author>
       <pages>1218–1227</pages>
       <url hash="43b797bb">C14-1115</url>
       <bibkey>thomason-etal-2014-integrating</bibkey>
@@ -1075,7 +1075,7 @@
       <title>Cross-Topic Authorship Attribution: Will Out-Of-Topic Data Help?</title>
       <author><first>Upendra</first><last>Sapkota</last></author>
       <author><first>Thamar</first><last>Solorio</last></author>
-      <author><first>Manuel</first><last>Montes</last></author>
+      <author id="manuel-montes"><first>Manuel</first><last>Montes</last></author>
       <author><first>Steven</first><last>Bethard</last></author>
       <author><first>Paolo</first><last>Rosso</last></author>
       <pages>1228–1237</pages>
@@ -1084,7 +1084,7 @@
     </paper>
     <paper id="117">
       <title>Online Gaming for Crowd-sourcing Phrase-equivalents</title>
-      <author><first>A.</first><last>Kumaran</last></author>
+      <author id="a-kumaran"><first>A.</first><last>Kumaran</last></author>
       <author><first>Melissa</first><last>Densmore</last></author>
       <author><first>Shaishav</first><last>Kumar</last></author>
       <pages>1238–1247</pages>
@@ -1119,7 +1119,7 @@
     </paper>
     <paper id="121">
       <title>Global Methods for Cross-lingual Semantic Role and Predicate Labelling</title>
-      <author><first>Lonneke</first><last>van der Plas</last></author>
+      <author id="lonneke-van-der-plas"><first>Lonneke</first><last>van der Plas</last></author>
       <author><first>Marianna</first><last>Apidianaki</last></author>
       <author><first>Chenhua</first><last>Chen</last></author>
       <pages>1279–1290</pages>
@@ -1137,14 +1137,14 @@
     <paper id="123">
       <title>Unsupervised Word Sense Induction using Distributional Statistics</title>
       <author><first>Kartik</first><last>Goyal</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>1302–1310</pages>
       <url hash="7c8e1e9b">C14-1123</url>
       <bibkey>goyal-hovy-2014-unsupervised</bibkey>
     </paper>
     <paper id="124">
       <title>Group based Self Training for <fixed-case>E</fixed-case>-Commerce Product Record Linkage</title>
-      <author><first>Xin</first><last>Zhao</last></author>
+      <author id="wayne-xin-zhao"><first>Xin</first><last>Zhao</last></author>
       <author><first>Yuexin</first><last>Wu</last></author>
       <author><first>Hongfei</first><last>Yan</last></author>
       <author><first>Xiaoming</first><last>Li</last></author>
@@ -1208,18 +1208,18 @@
     </paper>
     <paper id="131">
       <title>From neighborhood to parenthood: the advantages of dependency representation over bigrams in Brown clustering</title>
-      <author><first>Simon</first><last>Šuster</last></author>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="simon-suster"><first>Simon</first><last>Šuster</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <pages>1382–1391</pages>
       <url hash="3a3f9667">C14-1131</url>
       <bibkey>suster-van-noord-2014-neighborhood</bibkey>
     </paper>
     <paper id="132">
       <title>An Empirical Evaluation of Automatic Conversion from Constituency to Dependency in <fixed-case>H</fixed-case>ungarian</title>
-      <author><first>Katalin Ilona</first><last>Simkó</last></author>
+      <author id="katalin-ilona-simko"><first>Katalin Ilona</first><last>Simkó</last></author>
       <author><first>Veronika</first><last>Vincze</last></author>
       <author><first>Zsolt</first><last>Szántó</last></author>
-      <author><first>Richárd</first><last>Farkas</last></author>
+      <author id="richard-farkas"><first>Richárd</first><last>Farkas</last></author>
       <pages>1392–1401</pages>
       <url hash="3b0b4fba">C14-1132</url>
       <bibkey>simko-etal-2014-empirical</bibkey>
@@ -1237,7 +1237,7 @@
     <paper id="134">
       <title>Modeling Newswire Events using Neural Networks for Anomaly Detection</title>
       <author><first>Pradeep</first><last>Dasigi</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>1414–1422</pages>
       <url hash="0166e0b9">C14-1134</url>
       <bibkey>dasigi-hovy-2014-modeling</bibkey>
@@ -1248,8 +1248,8 @@
       <author><first>Kentaro</first><last>Torisawa</last></author>
       <author><first>Julien</first><last>Kloetzer</last></author>
       <author><first>Chikara</first><last>Hashimoto</last></author>
-      <author><first>István</first><last>Varga</last></author>
-      <author><first>Jong-Hoon</first><last>Oh</last></author>
+      <author id="istvan-varga"><first>István</first><last>Varga</last></author>
+      <author id="jong-hoon-oh"><first>Jong-Hoon</first><last>Oh</last></author>
       <pages>1423–1434</pages>
       <url hash="ddeb4732">C14-1135</url>
       <bibkey>sano-etal-2014-million</bibkey>
@@ -1258,14 +1258,14 @@
       <title>Combining Supervised and Unsupervised Parsing for Distributional Similarity</title>
       <author><first>Martin</first><last>Riedl</last></author>
       <author><first>Irina</first><last>Alles</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>1435–1446</pages>
       <url hash="359299d9">C14-1136</url>
       <bibkey>riedl-etal-2014-combining</bibkey>
     </paper>
     <paper id="137">
       <title>A <fixed-case>M</fixed-case>arkovian approach to distributional semantics with application to semantic compositionality</title>
-      <author><first>Édouard</first><last>Grave</last></author>
+      <author id="edouard-grave"><first>Édouard</first><last>Grave</last></author>
       <author><first>Guillaume</first><last>Obozinski</last></author>
       <author><first>Francis</first><last>Bach</last></author>
       <pages>1447–1456</pages>
@@ -1276,7 +1276,7 @@
       <title>A Beam-Search Decoder for Disfluency Detection</title>
       <author><first>Xuancong</first><last>Wang</last></author>
       <author><first>Hwee Tou</first><last>Ng</last></author>
-      <author><first>Khe Chai</first><last>Sim</last></author>
+      <author id="khe-chai-sim"><first>Khe Chai</first><last>Sim</last></author>
       <pages>1457–1467</pages>
       <url hash="661f0e04">C14-1138</url>
       <bibkey>wang-etal-2014-beam</bibkey>
@@ -1327,7 +1327,7 @@
     <paper id="144">
       <title>3arif: A Corpus of Modern Standard and <fixed-case>E</fixed-case>gyptian <fixed-case>A</fixed-case>rabic Tweets Annotated for Epistemic Modality Using Interactive Crowdsourcing</title>
       <author><first>Rania</first><last>Al-Sabbagh</last></author>
-      <author><first>Roxana</first><last>Girju</last></author>
+      <author id="roxana-girju"><first>Roxana</first><last>Girju</last></author>
       <author><first>Jana</first><last>Diesner</last></author>
       <pages>1521–1532</pages>
       <url hash="8e1b4925">C14-1144</url>
@@ -1337,7 +1337,7 @@
       <title>Empirical Analysis of Aggregation Methods for Collective Annotation</title>
       <author><first>Ciyang</first><last>Qing</last></author>
       <author><first>Ulle</first><last>Endriss</last></author>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <author><first>Justin</first><last>Kruger</last></author>
       <pages>1533–1542</pages>
       <url hash="6255233b">C14-1145</url>
@@ -1353,7 +1353,7 @@
     <paper id="147">
       <title>Collective Named Entity Disambiguation using Graph Ranking and Clique Partitioning Approaches</title>
       <author><first>Ayman</first><last>Alhelbawy</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <pages>1544–1555</pages>
       <url hash="523c9f24">C14-1147</url>
       <bibkey>alhelbawy-gaizauskas-2014-collective</bibkey>
@@ -1375,7 +1375,7 @@
       <author><first>Chi</first><last>Wang</last></author>
       <author><first>Shi</first><last>Zhi</last></author>
       <author><first>Jiawei</first><last>Han</last></author>
-      <author><first>Clare</first><last>Voss</last></author>
+      <author id="clare-voss"><first>Clare</first><last>Voss</last></author>
       <author><first>Malik</first><last>Magdon-Ismail</last></author>
       <pages>1567–1578</pages>
       <url hash="5e5982ba">C14-1149</url>
@@ -1393,7 +1393,7 @@
       <title>An Enhanced <fixed-case>L</fixed-case>esk Word Sense Disambiguation Algorithm through a Distributional Semantic Model</title>
       <author><first>Pierpaolo</first><last>Basile</last></author>
       <author><first>Annalina</first><last>Caputo</last></author>
-      <author><first>Giovanni</first><last>Semeraro</last></author>
+      <author id="giovanni-semeraro"><first>Giovanni</first><last>Semeraro</last></author>
       <pages>1591–1600</pages>
       <url hash="b6ae1732">C14-1151</url>
       <bibkey>basile-etal-2014-enhanced</bibkey>
@@ -1401,7 +1401,7 @@
     <paper id="152">
       <title>Word Sense Induction Using Lexical Chain based Hypergraph Model</title>
       <author><first>Tao</first><last>Qian</last></author>
-      <author><first>Donghong</first><last>Ji</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
       <author><first>Mingyao</first><last>Zhang</last></author>
       <author><first>Chong</first><last>Teng</last></author>
       <author><first>Congling</first><last>Xia</last></author>
@@ -1422,8 +1422,8 @@
       <title>Novel Word-sense Identification</title>
       <author><first>Paul</first><last>Cook</last></author>
       <author><first>Jey Han</first><last>Lau</last></author>
-      <author><first>Diana</first><last>McCarthy</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>1624–1635</pages>
       <url hash="4e48184b">C14-1154</url>
       <bibkey>cook-etal-2014-novel</bibkey>
@@ -1431,7 +1431,7 @@
     <paper id="155">
       <title>Learning to Summarise Related Sentences</title>
       <author><first>Emmanouil</first><last>Tzouridis</last></author>
-      <author><first>Jamal</first><last>Nasir</last></author>
+      <author id="jamal-a-nasir"><first>Jamal</first><last>Nasir</last></author>
       <author><first>Ulf</first><last>Brefeld</last></author>
       <pages>1636–1647</pages>
       <url hash="f00a9555">C14-1155</url>
@@ -1453,7 +1453,7 @@
       <title>Query-Focused Opinion Summarization for User-Generated Content</title>
       <author><first>Lu</first><last>Wang</last></author>
       <author><first>Hema</first><last>Raghavan</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <author><first>Vittorio</first><last>Castelli</last></author>
       <pages>1660–1669</pages>
       <url hash="a8821d74">C14-1157</url>
@@ -1491,7 +1491,7 @@
       <author><first>Takuya</first><last>Hiraoka</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
       <author><first>Sakriani</first><last>Sakti</last></author>
-      <author><first>Tomoki</first><last>Toda</last></author>
+      <author id="tomoki-toda"><first>Tomoki</first><last>Toda</last></author>
       <author><first>Satoshi</first><last>Nakamura</last></author>
       <pages>1706–1717</pages>
       <url hash="f7a4b2f8">C14-1161</url>
@@ -1500,18 +1500,18 @@
     <paper id="162">
       <title>Towards multimodal modeling of physicians’ diagnostic confidence and self-awareness using medical narratives</title>
       <author><first>Joseph</first><last>Bullard</last></author>
-      <author><first>Cecilia</first><last>Ovesdotter Alm</last></author>
+      <author id="cecilia-ovesdotter-alm"><first>Cecilia</first><last>Ovesdotter Alm</last></author>
       <author><first>Qi</first><last>Yu</last></author>
       <author><first>Pengcheng</first><last>Shi</last></author>
-      <author><first>Anne</first><last>Haake</last></author>
+      <author id="anne-haake"><first>Anne</first><last>Haake</last></author>
       <pages>1718–1727</pages>
       <url hash="16ec6e98">C14-1162</url>
       <bibkey>bullard-etal-2014-towards</bibkey>
     </paper>
     <paper id="163">
       <title>Towards Semantic Validation of a Derivational Lexicon</title>
-      <author><first>Britta</first><last>Zeller</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="britta-zeller"><first>Britta</first><last>Zeller</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <author><first>Jan</first><last>Šnajder</last></author>
       <pages>1728–1739</pages>
       <url hash="278f63ed">C14-1163</url>
@@ -1520,7 +1520,7 @@
     <paper id="164">
       <title>Detecting Learner Errors in the Choice of Content Words Using Compositional Distributional Semantics</title>
       <author><first>Ekaterina</first><last>Kochmar</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <pages>1740–1751</pages>
       <url hash="ecec9dea">C14-1164</url>
       <bibkey>kochmar-briscoe-2014-detecting</bibkey>
@@ -1556,10 +1556,10 @@
     </paper>
     <paper id="168">
       <title>Adapting taggers to <fixed-case>T</fixed-case>witter with not-so-distant supervision</title>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <author><first>Dirk</first><last>Hovy</last></author>
       <author><first>Ryan</first><last>McDonald</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>1783–1792</pages>
       <url hash="1f8197e2">C14-1168</url>
       <bibkey>plank-etal-2014-adapting</bibkey>
@@ -1567,14 +1567,14 @@
     <paper id="169">
       <title>Interpolated <fixed-case>D</fixed-case>irichlet Class Language Model for Speech Recognition Incorporating Long-distance N-grams</title>
       <author><first>Md. Akmal</first><last>Haidar</last></author>
-      <author><first>Douglas</first><last>O’Shaughnessy</last></author>
+      <author id="douglas-oshaughnessy"><first>Douglas</first><last>O’Shaughnessy</last></author>
       <pages>1793–1802</pages>
       <url hash="9791c4e2">C14-1169</url>
       <bibkey>haidar-oshaughnessy-2014-interpolated</bibkey>
     </paper>
     <paper id="170">
       <title>Situated Incremental Natural Language Understanding using a Multimodal, Linguistically-driven Update Model</title>
-      <author><first>Casey</first><last>Kennington</last></author>
+      <author id="casey-kennington"><first>Casey</first><last>Kennington</last></author>
       <author><first>Spyros</first><last>Kousidis</last></author>
       <author><first>David</first><last>Schlangen</last></author>
       <pages>1803–1812</pages>
@@ -1583,9 +1583,9 @@
     </paper>
     <paper id="171">
       <title>Quality Estimation for Automatic Speech Recognition</title>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
-      <author><first>José G.</first><last>C. de Souza</last></author>
+      <author id="jose-g-c-de-souza"><first>José G.</first><last>C. de Souza</last></author>
       <author><first>Daniele</first><last>Falavigna</last></author>
       <pages>1813–1823</pages>
       <url hash="4456a8e8">C14-1171</url>
@@ -1593,9 +1593,9 @@
     </paper>
     <paper id="172">
       <title>A Generic Anaphora Resolution Engine for <fixed-case>I</fixed-case>ndian Languages</title>
-      <author><first>Sobha</first><last>Lalitha Devi</last></author>
-      <author><first>Vijay</first><last>Sundar Ram</last></author>
-      <author><first>Pattabhi</first><last>RK Rao</last></author>
+      <author id="sobha-lalitha-devi"><first>Sobha</first><last>Lalitha Devi</last></author>
+      <author id="vijay-sundar-ram"><first>Vijay</first><last>Sundar Ram</last></author>
+      <author id="pattabhi-rk-rao"><first>Pattabhi</first><last>RK Rao</last></author>
       <pages>1824–1833</pages>
       <url hash="94d2edb8">C14-1172</url>
       <bibkey>lalitha-devi-etal-2014-generic</bibkey>
@@ -1617,7 +1617,7 @@
     </paper>
     <paper id="175">
       <title>Rediscovering Annotation Projection for Cross-Lingual Parser Induction</title>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>1854–1864</pages>
       <url hash="0baffd79">C14-1175</url>
       <bibkey>tiedemann-2014-rediscovering</bibkey>
@@ -1626,16 +1626,16 @@
       <title>Synchronous Constituent Context Model for Inducing Bilingual Synchronous Structures</title>
       <author><first>Xiangyu</first><last>Duan</last></author>
       <author><first>Min</first><last>Zhang</last></author>
-      <author><first>Qiaoming</first><last>Zhu</last></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last></author>
       <pages>1865–1874</pages>
       <url hash="6ffa80f8">C14-1176</url>
       <bibkey>duan-etal-2014-synchronous</bibkey>
     </paper>
     <paper id="177">
       <title>Syntactic Parsing and Compound Recognition via Dual Decomposition: Application to <fixed-case>F</fixed-case>rench</title>
-      <author><first>Joseph</first><last>Le Roux</last></author>
+      <author id="joseph-le-roux"><first>Joseph</first><last>Le Roux</last></author>
       <author><first>Antoine</first><last>Rozenknop</last></author>
-      <author><first>Matthieu</first><last>Constant</last></author>
+      <author id="matthieu-constant"><first>Matthieu</first><last>Constant</last></author>
       <pages>1875–1885</pages>
       <url hash="cb3d1de4">C14-1177</url>
       <bibkey>le-roux-etal-2014-syntactic</bibkey>
@@ -1681,7 +1681,7 @@
     <paper id="182">
       <title>Latent Domain Translation Models in Mix-of-Domains Haystack</title>
       <author><first>Hoang</first><last>Cuong</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <pages>1928–1939</pages>
       <url hash="20d91c47">C14-1182</url>
       <bibkey>cuong-simaan-2014-latent</bibkey>
@@ -1699,7 +1699,7 @@
       <author><first>Dolf</first><last>Trieschnigg</last></author>
       <author><first>A. Seza</first><last>Doğruöz</last></author>
       <author><first>Rilana</first><last>Gravel</last></author>
-      <author><first>Mariët</first><last>Theune</last></author>
+      <author id="mariet-theune"><first>Mariët</first><last>Theune</last></author>
       <author><first>Theo</first><last>Meder</last></author>
       <author><first>Franciska</first><last>de Jong</last></author>
       <pages>1950–1961</pages>
@@ -1709,7 +1709,7 @@
     <paper id="185">
       <title>Exploring Syntactic Features for Native Language Identification: A Variationist Perspective on Feature Encoding and Ensemble Optimization</title>
       <author><first>Serhiy</first><last>Bykh</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <pages>1962–1973</pages>
       <url hash="b39043a3">C14-1185</url>
       <bibkey>bykh-meurers-2014-exploring</bibkey>
@@ -1726,14 +1726,14 @@
     <paper id="187">
       <title>Empirical analysis of exploiting review helpfulness for extractive summarization of online reviews</title>
       <author><first>Wenting</first><last>Xiong</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <pages>1985–1995</pages>
       <url hash="8c03c619">C14-1187</url>
       <bibkey>xiong-litman-2014-empirical</bibkey>
     </paper>
     <paper id="188">
       <title>Lexico-syntactic text simplification and compression with typed dependencies</title>
-      <author><first>Mandya</first><last>Angrosh</last></author>
+      <author id="angrosh-mandya"><first>Mandya</first><last>Angrosh</last></author>
       <author><first>Tadashi</first><last>Nomoto</last></author>
       <author><first>Advaith</first><last>Siddharthan</last></author>
       <pages>1996–2006</pages>
@@ -1794,7 +1794,7 @@
       <author><first>Rasoul</first><last>Kaljahi</last></author>
       <author><first>Jennifer</first><last>Foster</last></author>
       <author><first>Johann</first><last>Roturier</last></author>
-      <author><first>Raphael</first><last>Rubino</last></author>
+      <author id="raphael-rubino"><first>Raphael</first><last>Rubino</last></author>
       <pages>2052–2063</pages>
       <url hash="4e47d27e">C14-1194</url>
       <bibkey>kaljahi-etal-2014-quality</bibkey>
@@ -1802,7 +1802,7 @@
     <paper id="195">
       <title>Effective Incorporation of Source Syntax into Hierarchical Phrase-based Translation</title>
       <author><first>Tong</first><last>Xiao</last></author>
-      <author><first>Adrià</first><last>de Gispert</last></author>
+      <author id="adria-de-gispert"><first>Adrià</first><last>de Gispert</last></author>
       <author><first>Jingbo</first><last>Zhu</last></author>
       <author id="bill-byrne"><first>Bill</first><last>Byrne</last></author>
       <pages>2064–2074</pages>
@@ -1838,7 +1838,7 @@
     </paper>
     <paper id="199">
       <title>Exploring Fine-grained Entity Type Constraints for Distantly Supervised Relation Extraction</title>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <author><first>Kang</first><last>Liu</last></author>
       <author><first>Liheng</first><last>Xu</last></author>
       <author><first>Jun</first><last>Zhao</last></author>
@@ -1856,7 +1856,7 @@
     <paper id="201">
       <title>Limited memory incremental coreference resolution</title>
       <author><first>Kellie</first><last>Webster</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <pages>2129–2139</pages>
       <url hash="b162b297">C14-1201</url>
       <bibkey>webster-curran-2014-limited</bibkey>
@@ -1880,8 +1880,8 @@
     <paper id="204">
       <title>Employing Event Inference to Improve Semi-Supervised <fixed-case>C</fixed-case>hinese Event Extraction</title>
       <author><first>Peifeng</first><last>Li</last></author>
-      <author><first>Qiaoming</first><last>Zhu</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>2161–2171</pages>
       <url hash="e5f204fc">C14-1204</url>
       <bibkey>li-etal-2014-employing</bibkey>
@@ -1898,7 +1898,7 @@
       <title>Unsupervised extraction of semantic relations using discourse cues</title>
       <author><first>Juliette</first><last>Conrath</last></author>
       <author><first>Stergos</first><last>Afantenos</last></author>
-      <author><first>Nicholas</first><last>Asher</last></author>
+      <author id="nicholas-asher"><first>Nicholas</first><last>Asher</last></author>
       <author><first>Philippe</first><last>Muller</last></author>
       <pages>2184–2194</pages>
       <url hash="a61bc53b">C14-1206</url>
@@ -1923,7 +1923,7 @@
     <paper id="209">
       <title>Augment Dependency-to-String Translation with Fixed and Floating Structures</title>
       <author><first>Jun</first><last>Xie</last></author>
-      <author><first>Jinan</first><last>Xu</last></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last></author>
       <author><first>Qun</first><last>Liu</last></author>
       <pages>2217–2226</pages>
       <url hash="ac72430d">C14-1209</url>
@@ -1934,14 +1934,14 @@
       <author><first>Hailong</first><last>Cao</last></author>
       <author><first>Dongdong</first><last>Zhang</last></author>
       <author><first>Ming</first><last>Zhou</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <pages>2227–2236</pages>
       <url hash="9f8cb5aa">C14-1210</url>
       <bibkey>cao-etal-2014-soft</bibkey>
     </paper>
     <paper id="211">
       <title>Using Spreading Activation to Evaluate and Improve Ontologies</title>
-      <author><first>Rónan</first><last>Mac an tSaoir</last></author>
+      <author id="ronan-mac-an-tsaoir"><first>Rónan</first><last>Mac an tSaoir</last></author>
       <pages>2237–2248</pages>
       <url hash="84fc6cff">C14-1211</url>
       <bibkey>mac-an-tsaoir-2014-using</bibkey>
@@ -1951,7 +1951,7 @@
       <author><first>Julie</first><last>Weeds</last></author>
       <author><first>Daoud</first><last>Clarke</last></author>
       <author><first>Jeremy</first><last>Reffin</last></author>
-      <author><first>David</first><last>Weir</last></author>
+      <author id="david-weir"><first>David</first><last>Weir</last></author>
       <author><first>Bill</first><last>Keller</last></author>
       <pages>2249–2259</pages>
       <url hash="743e9a0a">C14-1212</url>
@@ -1960,10 +1960,10 @@
     <paper id="213">
       <title>“One Entity per Discourse” and “One Entity per Collocation” Improve Named-Entity Disambiguation</title>
       <author><first>Ander</first><last>Barrena</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <author><first>Bernardo</first><last>Cabaleiro</last></author>
-      <author><first>Anselmo</first><last>Peñas</last></author>
-      <author><first>Aitor</first><last>Soroa</last></author>
+      <author id="anselmo-penas"><first>Anselmo</first><last>Peñas</last></author>
+      <author id="aitor-soroa"><first>Aitor</first><last>Soroa</last></author>
       <pages>2260–2269</pages>
       <url hash="dbd6375b">C14-1213</url>
       <bibkey>barrena-etal-2014-one</bibkey>
@@ -2018,7 +2018,7 @@
       <title>Unsupervised Word Segmentation in Context</title>
       <author><first>Gabriel</first><last>Synnaeve</last></author>
       <author><first>Isabelle</first><last>Dautriche</last></author>
-      <author><first>Benjamin</first><last>Börschinger</last></author>
+      <author id="benjamin-borschinger"><first>Benjamin</first><last>Börschinger</last></author>
       <author><first>Mark</first><last>Johnson</last></author>
       <author><first>Emmanuel</first><last>Dupoux</last></author>
       <pages>2326–2334</pages>
@@ -2040,7 +2040,7 @@
       <title>A context-based model for Sentiment Analysis in <fixed-case>T</fixed-case>witter</title>
       <author><first>Andrea</first><last>Vanzo</last></author>
       <author><first>Danilo</first><last>Croce</last></author>
-      <author><first>Roberto</first><last>Basili</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
       <pages>2345–2354</pages>
       <url hash="e93a88f5">C14-1221</url>
       <bibkey>vanzo-etal-2014-context</bibkey>
@@ -2075,14 +2075,14 @@
       <title>Claims on demand – an initial demonstration of a system for automatic detection and polarity identification of context dependent claims in massive corpora</title>
       <author><first>Noam</first><last>Slonim</last></author>
       <author><first>Ehud</first><last>Aharoni</last></author>
-      <author><first>Carlos</first><last>Alzate</last></author>
+      <author id="carlos-alzate"><first>Carlos</first><last>Alzate</last></author>
       <author><first>Roy</first><last>Bar-Haim</last></author>
       <author><first>Yonatan</first><last>Bilu</last></author>
       <author><first>Lena</first><last>Dankin</last></author>
       <author><first>Iris</first><last>Eiron</last></author>
       <author><first>Daniel</first><last>Hershcovich</last></author>
       <author><first>Shay</first><last>Hummel</last></author>
-      <author><first>Mitesh</first><last>Khapra</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh</first><last>Khapra</last></author>
       <author><first>Tamar</first><last>Lavee</last></author>
       <author><first>Ran</first><last>Levy</last></author>
       <author><first>Paul</first><last>Matchen</last></author>
@@ -2099,11 +2099,11 @@
     </paper>
     <paper id="3">
       <title>Copa 2014 <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et Brasil: a frame-based trilingual electronic dictionary for the Football World Cup</title>
-      <author><first>Tiago T.</first><last>Torrent</last></author>
+      <author id="tiago-timponi-torrent"><first>Tiago T.</first><last>Torrent</last></author>
       <author><first>Maria Margarida M.</first><last>Salomão</last></author>
       <author><first>Fernanda C. A.</first><last>Campos</last></author>
       <author><first>Regina M. M.</first><last>Braga</last></author>
-      <author><first>Ely E. S.</first><last>Matos</last></author>
+      <author id="ely-edison-da-silva-matos"><first>Ely E. S.</first><last>Matos</last></author>
       <author><first>Maucha A.</first><last>Gamonal</last></author>
       <author><first>Julia A.</first><last>Gonçalves</last></author>
       <author><first>Bruno C. P.</first><last>Souza</last></author>
@@ -2134,7 +2134,7 @@
     </paper>
     <paper id="6">
       <title>Processing Discourse in Dislog on the <fixed-case>T</fixed-case>ext<fixed-case>C</fixed-case>oop Platform</title>
-      <author><first>Patrick</first><last>Saint-Dizier</last></author>
+      <author id="patrick-saint-dizier"><first>Patrick</first><last>Saint-Dizier</last></author>
       <pages>25–28</pages>
       <url hash="fc5f6916">C14-2006</url>
       <bibkey>saint-dizier-2014-processing</bibkey>
@@ -2209,10 +2209,10 @@
     <paper id="15">
       <title>A Sentence Judgment System for Grammatical Error Detection</title>
       <author><first>Lung-Hao</first><last>Lee</last></author>
-      <author><first>Liang-Chih</first><last>Yu</last></author>
+      <author id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last></author>
       <author><first>Kuei-Ching</first><last>Lee</last></author>
-      <author><first>Yuen-Hsien</first><last>Tseng</last></author>
-      <author><first>Li-Ping</first><last>Chang</last></author>
+      <author id="yuen-hsien-tseng"><first>Yuen-Hsien</first><last>Tseng</last></author>
+      <author id="li-ping-chang"><first>Li-Ping</first><last>Chang</last></author>
       <author><first>Hsin-Hsi</first><last>Chen</last></author>
       <pages>67–70</pages>
       <url hash="78bbd5a7">C14-2015</url>
@@ -2269,7 +2269,7 @@
       <title>What or Who is Multilingual <fixed-case>W</fixed-case>atson?</title>
       <author><first>Keith</first><last>Cortis</last></author>
       <author><first>Urvesh</first><last>Bhowan</last></author>
-      <author><first>Ronan</first><last>Mac an tSaoir</last></author>
+      <author id="ronan-mac-an-tsaoir"><first>Ronan</first><last>Mac an tSaoir</last></author>
       <author><first>D.J.</first><last>McCloskey</last></author>
       <author><first>Mikhail</first><last>Sogrin</last></author>
       <author><first>Ross</first><last>Cadogan</last></author>
@@ -2310,7 +2310,7 @@
     <paper id="25">
       <title>Method51 for Mining Insight from Social Media Datasets</title>
       <author><first>Simon</first><last>Wibberley</last></author>
-      <author><first>David</first><last>Weir</last></author>
+      <author id="david-weir"><first>David</first><last>Weir</last></author>
       <author><first>Jeremy</first><last>Reffin</last></author>
       <pages>115–119</pages>
       <url hash="a1b5bb42">C14-2025</url>
@@ -2318,9 +2318,9 @@
     </paper>
     <paper id="26">
       <title><fixed-case>MT</fixed-case>-<fixed-case>EQ</fixed-case>u<fixed-case>A</fixed-case>l: a Toolkit for Human Assessment of Machine Translation Output</title>
-      <author><first>Christian</first><last>Girardi</last></author>
+      <author id="christian-girardi"><first>Christian</first><last>Girardi</last></author>
       <author><first>Luisa</first><last>Bentivogli</last></author>
-      <author><first>Mohammad Amin</first><last>Farajian</last></author>
+      <author id="m-amin-farajian"><first>Mohammad Amin</first><last>Farajian</last></author>
       <author><first>Marcello</first><last>Federico</last></author>
       <pages>120–123</pages>
       <url hash="fe1253c6">C14-2026</url>
@@ -2330,7 +2330,7 @@
       <title><fixed-case>O</fixed-case>pen<fixed-case>S</fixed-case>o<fixed-case>N</fixed-case>a<fixed-case>R</fixed-case>: user-driven development of the <fixed-case>S</fixed-case>o<fixed-case>N</fixed-case>a<fixed-case>R</fixed-case> corpus interfaces</title>
       <author><first>Martin</first><last>Reynaert</last></author>
       <author><first>Matje</first><last>van de Camp</last></author>
-      <author><first>Menno</first><last>van Zaanen</last></author>
+      <author id="menno-van-zaanen"><first>Menno</first><last>van Zaanen</last></author>
       <pages>124–128</pages>
       <url hash="9169eae6">C14-2027</url>
       <bibkey>reynaert-etal-2014-opensonar</bibkey>
@@ -2340,7 +2340,7 @@
       <author><first>Marcello</first><last>Federico</last></author>
       <author><first>Nicola</first><last>Bertoldi</last></author>
       <author><first>Mauro</first><last>Cettolo</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <author><first>Marco</first><last>Trombetti</last></author>
       <author><first>Alessandro</first><last>Cattelan</last></author>
@@ -2350,7 +2350,7 @@
       <author><first>Alberto</first><last>Massidda</last></author>
       <author><first>Holger</first><last>Schwenk</last></author>
       <author><first>Loïc</first><last>Barrault</last></author>
-      <author><first>Frederic</first><last>Blain</last></author>
+      <author id="frederic-blain"><first>Frederic</first><last>Blain</last></author>
       <author><first>Philipp</first><last>Koehn</last></author>
       <author><first>Christian</first><last>Buck</last></author>
       <author><first>Ulrich</first><last>Germann</last></author>
@@ -2377,8 +2377,8 @@
     </frontmatter>
     <paper id="1">
       <title>Biomedical/Clinical <fixed-case>NLP</fixed-case></title>
-      <author><first>Ozlem</first><last>Uzuner</last></author>
-      <author><first>Meliha</first><last>Yetişgen</last></author>
+      <author id="ozlem-uzuner"><first>Ozlem</first><last>Uzuner</last></author>
+      <author id="meliha-yetisgen-yildiz"><first>Meliha</first><last>Yetişgen</last></author>
       <author><first>Amber</first><last>Stubbs</last></author>
       <pages>1–2</pages>
       <url hash="8ef0e867">C14-3001</url>
@@ -2386,7 +2386,7 @@
     </paper>
     <paper id="2">
       <title>Using Neural Networks for Modeling and Representing Natural Languages</title>
-      <author><first>Tomas</first><last>Mikolov</last></author>
+      <author id="tomas-mikolov"><first>Tomas</first><last>Mikolov</last></author>
       <pages>3–4</pages>
       <url hash="94f948cd">C14-3002</url>
       <bibkey>mikolov-2014-using</bibkey>
@@ -2401,7 +2401,7 @@
     </paper>
     <paper id="4">
       <title>Automated Grammatical Error Correction for Language Learners</title>
-      <author><first>Joel</first><last>Tetreault</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
       <author><first>Claudia</first><last>Leacock</last></author>
       <pages>8–10</pages>
       <url hash="707cae00">C14-3004</url>
@@ -2409,8 +2409,8 @@
     </paper>
     <paper id="5">
       <title>Selection Bias, Label Bias, and Bias in Ground Truth</title>
-      <author><first>Anders</first><last>Søgaard</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <author><first>Dirk</first><last>Hovy</last></author>
       <pages>11–13</pages>
       <url hash="cb509721">C14-3005</url>
diff --git a/data/xml/C16.xml b/data/xml/C16.xml
index c185fc9e1b..153bac8a50 100644
--- a/data/xml/C16.xml
+++ b/data/xml/C16.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of <fixed-case>COLING</fixed-case> 2016, the 26th International Conference on Computational Linguistics: Technical Papers</booktitle>
       <url hash="ac8faccf">C16-1</url>
-      <editor><first>Yuji</first><last>Matsumoto</last></editor>
+      <editor id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></editor>
       <editor><first>Rashmi</first><last>Prasad</last></editor>
       <publisher>The COLING 2016 Organizing Committee</publisher>
       <address>Osaka, Japan</address>
@@ -19,7 +19,7 @@
     <paper id="1">
       <title>Boosting for Efficient Model Selection for Syntactic Parsing</title>
       <author><first>Rachel</first><last>Bawden</last></author>
-      <author><first>Benoît</first><last>Crabbé</last></author>
+      <author id="benoit-crabbe"><first>Benoît</first><last>Crabbé</last></author>
       <pages>1–11</pages>
       <url hash="ceb9e60f">C16-1001</url>
       <abstract>We present an efficient model selection method using boosting for transition-based constituency parsing. It is designed for exploring a high-dimensional search space, defined by a large set of feature templates, as for example is typically the case when parsing morphologically rich languages. Our method removes the need to manually define heuristic constraints, which are often imposed in current state-of-the-art selection methods. Our experiments for French show that the method is more efficient and is also capable of producing compact, state-of-the-art models.</abstract>
@@ -38,7 +38,7 @@
     </paper>
     <paper id="3">
       <title>Grammar induction from (lots of) words alone</title>
-      <author><first>John K</first><last>Pate</last></author>
+      <author id="john-k-pate"><first>John K</first><last>Pate</last></author>
       <author><first>Mark</first><last>Johnson</last></author>
       <pages>23–32</pages>
       <url hash="0620e685">C16-1003</url>
@@ -64,7 +64,7 @@
       <author><first>Noriki</first><last>Nishida</last></author>
       <author><first>Raphael</first><last>Shu</last></author>
       <author><first>Yo</first><last>Ehara</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
       <author><first>Hideki</first><last>Nakayama</last></author>
       <pages>44–52</pages>
@@ -76,7 +76,7 @@
       <title>An Improved Phrase-based Approach to Annotating and Summarizing Student Course Responses</title>
       <author><first>Wencan</first><last>Luo</last></author>
       <author id="fei-liu-utdallas"><first>Fei</first><last>Liu</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <pages>53–63</pages>
       <url hash="54ba9b30">C16-1006</url>
       <abstract>Teaching large classes remains a great challenge, primarily because it is difficult to attend to all the student needs in a timely manner. Automatic text summarization systems can be leveraged to summarize the student feedback, submitted immediately after each lecture, but it is left to be discovered what makes a good summary for student responses. In this work we explore a new methodology that effectively extracts summary phrases from the student responses. Each phrase is tagged with the number of students who raise the issue. The phrases are evaluated along two dimensions: with respect to text content, they should be informative and well-formed, measured by the ROUGE metric; additionally, they shall attend to the most pressing student needs, measured by a newly proposed metric. This work is enabled by a phrase-based annotation and highlighting scheme, which is new to the summarization task. The phrase-based framework allows us to summarize the student responses into a set of bullet points and present to the instructor promptly.</abstract>
@@ -114,8 +114,8 @@
     </paper>
     <paper id="10">
       <title>Expanding wordnets to new languages with multilingual sense disambiguation</title>
-      <author><first>Mihael</first><last>Arcan</last></author>
-      <author><first>John Philip</first><last>McCrae</last></author>
+      <author id="mihael-arcan"><first>Mihael</first><last>Arcan</last></author>
+      <author id="john-philip-mccrae"><first>John Philip</first><last>McCrae</last></author>
       <author><first>Paul</first><last>Buitelaar</last></author>
       <pages>97–108</pages>
       <url hash="4a35380f">C16-1010</url>
@@ -125,7 +125,7 @@
     <paper id="11">
       <title>A Correlational Encoder Decoder Architecture for Pivot Based Sequence Generation</title>
       <author><first>Amrita</first><last>Saha</last></author>
-      <author><first>Mitesh M.</first><last>Khapra</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh M.</first><last>Khapra</last></author>
       <author><first>Sarath</first><last>Chandar</last></author>
       <author><first>Janarthanan</first><last>Rajendran</last></author>
       <author><first>Kyunghyun</first><last>Cho</last></author>
@@ -147,7 +147,7 @@
     <paper id="13">
       <title>Improving historical spelling normalization with bi-directional <fixed-case>LSTM</fixed-case>s and multi-task learning</title>
       <author><first>Marcel</first><last>Bollmann</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>131–139</pages>
       <url hash="2c4b69d0">C16-1013</url>
       <abstract>Natural-language processing of historical documents is complicated by the abundance of variant spellings and lack of annotated data. A common approach is to normalize the spelling of historical words to modern forms. We explore the suitability of a deep neural network architecture for this task, particularly a deep bi-LSTM network applied on a character level. Our model compares well to previously established normalization algorithms when evaluated on a diverse set of texts from Early New High German. We show that multi-task learning with additional normalization data can improve our model’s performance further.</abstract>
@@ -200,7 +200,7 @@
       <author><first>Daniel</first><last>Clothiaux</last></author>
       <author><first>Emily</first><last>Tagtow</last></author>
       <author><first>Patrick</first><last>Littell</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <pages>181–191</pages>
       <url hash="039a926f">C16-1018</url>
       <abstract>Languages with rich morphology often introduce sparsity in language processing tasks. While morphological analyzers can reduce this sparsity by providing morpheme-level analyses for words, they will often introduce ambiguity by returning multiple analyses for the same surface form. The problem of disambiguating between these morphological parses is further complicated by the fact that a correct parse for a word is not only be dependent on the surface form but also on other words in its context. In this paper, we present a language-agnostic approach to morphological disambiguation. We address the problem of using context in morphological disambiguation by presenting several LSTM-based neural architectures that encode long-range surface-level and analysis-level contextual dependencies. We applied our approach to Turkish, Russian, and Arabic to compare effectiveness across languages, matching state-of-the-art results in two of the three languages. Our results also demonstrate that while context plays a role in learning how to disambiguate, the type and amount of context needed varies between languages.</abstract>
@@ -218,7 +218,7 @@
       <title>An Empirical Exploration of Skip Connections for Sequential Tagging</title>
       <author><first>Huijia</first><last>Wu</last></author>
       <author><first>Jiajun</first><last>Zhang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>203–212</pages>
       <url hash="6b79e911">C16-1020</url>
       <abstract>In this paper, we empirically explore the effects of various kinds of skip connections in stacked bidirectional LSTMs for sequential tagging. We investigate three kinds of skip connections connecting to LSTM cells: (a) skip connections to the gates, (b) skip connections to the internal states and (c) skip connections to the cell outputs. We present comprehensive experiments showing that skip connections to cell outputs outperform the remaining two. Furthermore, we observe that using gated identity functions as skip mappings works pretty well. Based on this novel skip connections, we successfully train deep stacked bidirectional LSTM models and obtain state-of-the-art results on CCG supertagging and comparable results on POS tagging.</abstract>
@@ -266,13 +266,13 @@
     </paper>
     <paper id="25">
       <title>Exploiting Sentence and Context Representations in Deep Neural Models for Spoken Language Understanding</title>
-      <author><first>Lina M.</first><last>Rojas-Barahona</last></author>
-      <author><first>Milica</first><last>Gašić</last></author>
+      <author id="lina-m-rojas-barahona"><first>Lina M.</first><last>Rojas-Barahona</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gašić</last></author>
       <author><first>Nikola</first><last>Mrkšić</last></author>
       <author><first>Pei-Hao</first><last>Su</last></author>
       <author><first>Stefan</first><last>Ultes</last></author>
       <author><first>Tsung-Hsien</first><last>Wen</last></author>
-      <author><first>Steve</first><last>Young</last></author>
+      <author id="steve-young"><first>Steve</first><last>Young</last></author>
       <pages>258–267</pages>
       <url hash="ec7f6192">C16-1025</url>
       <abstract>This paper presents a deep learning architecture for the semantic decoder component of a Statistical Spoken Dialogue System. In a slot-filling dialogue, the semantic decoder predicts the dialogue act and a set of slot-value pairs from a set of n-best hypotheses returned by the Automatic Speech Recognition. Most current models for spoken language understanding assume (i) word-aligned semantic annotations as in sequence taggers and (ii) delexicalisation, or a mapping of input words to domain-specific concepts using heuristics that try to capture morphological variation but that do not scale to other domains nor to language variation (e.g., morphology, synonyms, paraphrasing ). In this work the semantic decoder is trained using unaligned semantic annotations and it uses distributed semantic representation learning to overcome the limitations of explicit delexicalisation. The proposed architecture uses a convolutional neural network for the sentence representation and a long-short term memory network for the context representation. Results are presented for the publicly available DSTC2 corpus and an In-car corpus which is similar to DSTC2 but has a significantly higher word error rate (WER).</abstract>
@@ -331,7 +331,7 @@
     <paper id="31">
       <title>A Word Labeling Approach to <fixed-case>T</fixed-case>hai Sentence Boundary Detection and <fixed-case>POS</fixed-case> Tagging</title>
       <author><first>Nina</first><last>Zhou</last></author>
-      <author><first>AiTi</first><last>Aw</last></author>
+      <author id="aiti-aw"><first>AiTi</first><last>Aw</last></author>
       <author><first>Nattadaporn</first><last>Lertcheva</last></author>
       <author><first>Xuancong</first><last>Wang</last></author>
       <pages>319–327</pages>
@@ -372,7 +372,7 @@
       <author><first>Kuan-Yu</first><last>Chen</last></author>
       <author><first>Shih-Hung</first><last>Liu</last></author>
       <author><first>Berlin</first><last>Chen</last></author>
-      <author><first>Hsin-Min</first><last>Wang</last></author>
+      <author id="hsin-min-wang"><first>Hsin-Min</first><last>Wang</last></author>
       <pages>358–368</pages>
       <url hash="a258b91b">C16-1035</url>
       <abstract>In the context of natural language processing, representation learning has emerged as a newly active research subject because of its excellent performance in many applications. Learning representations of words is a pioneering study in this school of research. However, paragraph (or sentence and document) embedding learning is more suitable/reasonable for some tasks, such as sentiment classification and document summarization. Nevertheless, as far as we are aware, there is only a dearth of research focusing on launching unsupervised paragraph embedding methods. Classic paragraph embedding methods infer the representation of a given paragraph by considering all of the words occurring in the paragraph. Consequently, those stop or function words that occur frequently may mislead the embedding learning process to produce a misty paragraph representation. Motivated by these observations, our major contributions are twofold. First, we propose a novel unsupervised paragraph embedding method, named the essence vector (EV) model, which aims at not only distilling the most representative information from a paragraph but also excluding the general background information to produce a more informative low-dimensional vector representation for the paragraph. We evaluate the proposed EV model on benchmark sentiment classification and multi-document summarization tasks. The experimental results demonstrate the effectiveness and applicability of the proposed embedding method. Second, in view of the increasing importance of spoken content processing, an extension of the EV model, named the denoising essence vector (D-EV) model, is proposed. The D-EV model not only inherits the advantages of the EV model but also can infer a more robust representation for a given spoken paragraph against imperfect speech recognition. The utility of the D-EV model is evaluated on a spoken document summarization task, confirming the effectiveness of the proposed embedding method in relation to several well-practiced and state-of-the-art summarization methods.</abstract>
@@ -381,10 +381,10 @@
     <paper id="36">
       <title>Continuous Expressive Speaking Styles Synthesis based on <fixed-case>CVSM</fixed-case> and <fixed-case>MR</fixed-case>-<fixed-case>HMM</fixed-case></title>
       <author><first>Jaime</first><last>Lorenzo-Trueba</last></author>
-      <author><first>Roberto</first><last>Barra-Chicote</last></author>
-      <author><first>Ascension</first><last>Gallardo-Antolin</last></author>
+      <author id="roberto-barra-chicote"><first>Roberto</first><last>Barra-Chicote</last></author>
+      <author id="ascension-gallardo-antolin"><first>Ascension</first><last>Gallardo-Antolin</last></author>
       <author><first>Junichi</first><last>Yamagishi</last></author>
-      <author><first>Juan M.</first><last>Montero</last></author>
+      <author id="juan-m-montero"><first>Juan M.</first><last>Montero</last></author>
       <pages>369–376</pages>
       <url hash="b34034d4">C16-1036</url>
       <abstract>This paper introduces a continuous system capable of automatically producing the most adequate speaking style to synthesize a desired target text. This is done thanks to a joint modeling of the acoustic and lexical parameters of the speaker models by adapting the CVSM projection of the training texts using MR-HMM techniques. As such, we consider that as long as sufficient variety in the training data is available, we should be able to model a continuous lexical space into a continuous acoustic space. The proposed continuous automatic text to speech system was evaluated by means of a perceptual evaluation in order to compare them with traditional approaches to the task. The system proved to be capable of conveying the correct expressiveness (average adequacy of 3.6) with an expressive strength comparable to oracle traditional expressive speech synthesis (average of 3.6) although with a drop in speech quality mainly due to the semi-continuous nature of the data (average quality of 2.9). This means that the proposed system is capable of improving traditional neutral systems without requiring any additional user interaction.</abstract>
@@ -404,7 +404,7 @@
       <title>Frustratingly Easy Neural Domain Adaptation</title>
       <author><first>Young-Bum</first><last>Kim</last></author>
       <author><first>Karl</first><last>Stratos</last></author>
-      <author><first>Ruhi</first><last>Sarikaya</last></author>
+      <author id="ruhi-sarikaya"><first>Ruhi</first><last>Sarikaya</last></author>
       <pages>387–396</pages>
       <url hash="4bcd3487">C16-1038</url>
       <abstract>Popular techniques for domain adaptation such as the feature augmentation method of Daumé III (2009) have mostly been considered for sparse binary-valued features, but not for dense real-valued features such as those used in neural networks. In this paper, we describe simple neural extensions of these techniques. First, we propose a natural generalization of the feature augmentation method that uses K + 1 LSTMs where one model captures global patterns across all K domains and the remaining K models capture domain-specific information. Second, we propose a novel application of the framework for learning shared structures by Ando and Zhang (2005) to domain adaptation, and also provide a neural extension of their approach. In experiments on slot tagging over 17 domains, our methods give clear performance improvement over Daumé III (2009) applied on feature-rich CRFs.</abstract>
@@ -412,10 +412,10 @@
     </paper>
     <paper id="39">
       <title>A House United: Bridging the Script and Lexical Barrier between <fixed-case>H</fixed-case>indi and <fixed-case>U</fixed-case>rdu</title>
-      <author><first>Riyaz A.</first><last>Bhat</last></author>
-      <author><first>Irshad A.</first><last>Bhat</last></author>
+      <author id="riyaz-ahmad-bhat"><first>Riyaz A.</first><last>Bhat</last></author>
+      <author id="irshad-bhat"><first>Irshad A.</first><last>Bhat</last></author>
       <author><first>Naman</first><last>Jain</last></author>
-      <author><first>Dipti Misra</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></author>
       <pages>397–408</pages>
       <url hash="f8fba6f2">C16-1039</url>
       <abstract>In Computational Linguistics, Hindi and Urdu are not viewed as a monolithic entity and have received separate attention with respect to their text processing. From part-of-speech tagging to machine translation, models are separately trained for both Hindi and Urdu despite the fact that they represent the same language. The reasons mainly are their divergent literary vocabularies and separate orthographies, and probably also their political status and the social perception that they are two separate languages. In this article, we propose a simple but efficient approach to bridge the lexical and orthographic differences between Hindi and Urdu texts. With respect to text processing, addressing the differences between the Hindi and Urdu texts would be beneficial in the following ways: (a) instead of training separate models, their individual resources can be augmented to train single, unified models for better generalization, and (b) their individual text processing applications can be used interchangeably under varied resource conditions. To remove the script barrier, we learn accurate statistical transliteration models which use sentence-level decoding to resolve word ambiguity. Similarly, we learn cross-register word embeddings from the harmonized Hindi and Urdu corpora to nullify their lexical divergences. As a proof of the concept, we evaluate our approach on the Hindi and Urdu dependency parsing under two scenarios: (a) resource sharing, and (b) resource augmentation. We demonstrate that a neural network-based dependency parser trained on augmented, harmonized Hindi and Urdu resources performs significantly better than the parsing models trained separately on the individual resources. We also show that we can achieve near state-of-the-art results when the parsers are used interchangeably.</abstract>
@@ -425,7 +425,7 @@
       <title>Deeper syntax for better semantic parsing</title>
       <author><first>Olivier</first><last>Michalon</last></author>
       <author><first>Corentin</first><last>Ribeyre</last></author>
-      <author><first>Marie</first><last>Candito</last></author>
+      <author id="marie-candito"><first>Marie</first><last>Candito</last></author>
       <author><first>Alexis</first><last>Nasr</last></author>
       <pages>409–420</pages>
       <url hash="8343c42c">C16-1040</url>
@@ -443,7 +443,7 @@
     </paper>
     <paper id="42">
       <title>Promoting multiword expressions in <fixed-case>A</fixed-case>* <fixed-case>TAG</fixed-case> parsing</title>
-      <author><first>Jakub</first><last>Waszczuk</last></author>
+      <author id="jakub-waszczuk"><first>Jakub</first><last>Waszczuk</last></author>
       <author><first>Agata</first><last>Savary</last></author>
       <author><first>Yannick</first><last>Parmentier</last></author>
       <pages>429–439</pages>
@@ -454,8 +454,8 @@
     <paper id="43">
       <title>Incrementally Learning a Dependency Parser to Support Language Documentation in Field Linguistics</title>
       <author><first>Morgan</first><last>Ulinski</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>440–449</pages>
       <url hash="684c81d2">C16-1043</url>
       <abstract>We present experiments in incrementally learning a dependency parser. The parser will be used in the WordsEye Linguistics Tools (WELT) (Ulinski et al., 2014) which supports field linguists documenting a language’s syntax and semantics. Our goal is to make syntactic annotation faster for field linguists. We have created a new parallel corpus of descriptions of spatial relations and motion events, based on pictures and video clips used by field linguists for elicitation of language from native speaker informants. We collected descriptions for each picture and video from native speakers in English, Spanish, German, and Egyptian Arabic. We compare the performance of MSTParser (McDonald et al., 2006) and MaltParser (Nivre et al., 2006) when trained on small amounts of this data. We find that MaltParser achieves the best performance. We also present the results of experiments using the parser to assist with annotation. We find that even when the parser is trained on a single sentence from the corpus, annotation time significantly decreases.</abstract>
@@ -465,7 +465,7 @@
       <title>Inducing Multilingual Text Analysis Tools Using Bidirectional Recurrent Neural Networks</title>
       <author><first>Othman</first><last>Zennaki</last></author>
       <author><first>Nasredine</first><last>Semmar</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <pages>450–460</pages>
       <url hash="8e4d4a9e">C16-1044</url>
       <abstract>This work focuses on the development of linguistic analysis tools for resource-poor languages. We use a parallel corpus to produce a multilingual word representation based only on sentence level alignment. This representation is combined with the annotated source side (resource-rich language) of the parallel corpus to train text analysis tools for resource-poor languages. Our approach is based on Recurrent Neural Networks (RNN) and has the following advantages: (a) it does not use word alignment information, (b) it does not assume any knowledge about foreign languages, which makes it applicable to a wide range of resource-poor languages, (c) it provides truly multilingual taggers. In a previous study, we proposed a method based on Simple RNN to automatically induce a Part-Of-Speech (POS) tagger. In this paper, we propose an improvement of our neural model. We investigate the Bidirectional RNN and the inclusion of external information (for instance low level information from Part-Of-Speech tags) in the RNN to train a more complex tagger (for instance, a multilingual super sense tagger). We demonstrate the validity and genericity of our method by using parallel corpora (obtained by manual or automatic translation). Our experiments are conducted to induce cross-lingual POS and super sense taggers.</abstract>
@@ -488,7 +488,7 @@
       <title>Determining the Multiword Expression Inventory of a Surprise Language</title>
       <author><first>Bahar</first><last>Salehi</last></author>
       <author><first>Paul</first><last>Cook</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>471–481</pages>
       <url hash="4b1f94b5">C16-1046</url>
       <abstract>Much previous research on multiword expressions (MWEs) has focused on the token- and type-level tasks of MWE identification and extraction, respectively. Such studies typically target known prevalent MWE types in a given language. This paper describes the first attempt to learn the MWE inventory of a “surprise” language for which we have no explicit prior knowledge of MWE patterns, certainly no annotated MWE data, and not even a parallel corpus. Our proposed model is trained on a treebank with MWE relations of a source language, and can be applied to the monolingual corpus of the surprise language to identify its MWE construction types.</abstract>
@@ -497,9 +497,9 @@
     <paper id="47">
       <title>A Hybrid Deep Learning Architecture for Sentiment Analysis</title>
       <author><first>Md Shad</first><last>Akhtar</last></author>
-      <author><first>Ayush</first><last>Kumar</last></author>
+      <author id="ayush-kumar"><first>Ayush</first><last>Kumar</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>482–493</pages>
       <url hash="6422d277">C16-1047</url>
       <abstract>In this paper, we propose a novel hybrid deep learning archtecture which is highly efficient for sentiment analysis in resource-poor languages. We learn sentiment embedded vectors from the Convolutional Neural Network (CNN). These are augmented to a set of optimized features selected through a multi-objective optimization (MOO) framework. The sentiment augmented optimized vector obtained at the end is used for the training of SVM for sentiment classification. We evaluate our proposed approach for coarse-grained (i.e. sentence level) as well as fine-grained (i.e. aspect level) sentiment analysis on four Hindi datasets covering varying domains. In order to show that our proposed method is generic in nature we also evaluate it on two benchmark English datasets. Evaluation shows that the results of the proposed method are consistent across all the datasets and often outperforms the state-of-art systems. To the best of our knowledge, this is the very first attempt where such a deep learning model is used for less-resourced languages such as Hindi.</abstract>
@@ -509,7 +509,7 @@
       <title>Word Segmentation in <fixed-case>S</fixed-case>anskrit Using Path Constrained Random Walks</title>
       <author><first>Amrith</first><last>Krishna</last></author>
       <author><first>Bishal</first><last>Santra</last></author>
-      <author><first>Pavankumar</first><last>Satuluri</last></author>
+      <author id="pavankumar-satuluri"><first>Pavankumar</first><last>Satuluri</last></author>
       <author><first>Sasi Prasanth</first><last>Bandaru</last></author>
       <author><first>Bhumi</first><last>Faldu</last></author>
       <author><first>Yajuvendra</first><last>Singh</last></author>
@@ -533,7 +533,7 @@
       <title>Appraising <fixed-case>UMLS</fixed-case> Coverage for Summarizing Medical Evidence</title>
       <author><first>Elaheh</first><last>ShafieiBavani</last></author>
       <author><first>Mohammad</first><last>Ebrahimi</last></author>
-      <author><first>Raymond</first><last>Wong</last></author>
+      <author id="raymond-wong"><first>Raymond</first><last>Wong</last></author>
       <author><first>Fang</first><last>Chen</last></author>
       <pages>513–524</pages>
       <url hash="e53f6085">C16-1050</url>
@@ -625,7 +625,7 @@
     </paper>
     <paper id="59">
       <title>Keystroke dynamics as signal for shallow syntactic parsing</title>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>609–619</pages>
       <url hash="0a679b98">C16-1059</url>
       <abstract>Keystroke dynamics have been extensively used in psycholinguistic and writing research to gain insights into cognitive processing. But do keystroke logs contain actual signal that can be used to learn better natural language processing models? We postulate that keystroke dynamics contain information about syntactic structure that can inform shallow syntactic parsing. To test this hypothesis, we explore labels derived from keystroke logs as auxiliary task in a multi-task bidirectional Long Short-Term Memory (bi-LSTM). Our results show promising results on two shallow syntactic parsing tasks, chunking and CCG supertagging. Our model is simple, has the advantage that data can come from distinct sources, and produces models that are significantly better than models trained on the text annotations alone.</abstract>
@@ -681,7 +681,7 @@
     <paper id="65">
       <title>Identifying Cross-Cultural Differences in Word Usage</title>
       <author><first>Aparna</first><last>Garimella</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <author><first>James</first><last>Pennebaker</last></author>
       <pages>674–683</pages>
       <url hash="b01effca">C16-1065</url>
@@ -721,7 +721,7 @@
     </paper>
     <paper id="69">
       <title>Understanding the Lexical Simplification Needs of Non-Native Speakers of <fixed-case>E</fixed-case>nglish</title>
-      <author><first>Gustavo</first><last>Paetzold</last></author>
+      <author id="gustavo-paetzold"><first>Gustavo</first><last>Paetzold</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>717–727</pages>
       <url hash="950abf59">C16-1069</url>
@@ -742,7 +742,7 @@
     <paper id="71">
       <title>Advancing Linguistic Features and Insights by Label-informed Feature Grouping: An Exploration in the Context of Native Language Identification</title>
       <author><first>Serhiy</first><last>Bykh</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <pages>739–749</pages>
       <url hash="52afbbfe">C16-1071</url>
       <abstract>We propose a hierarchical clustering approach designed to group linguistic features for supervised machine learning that is inspired by variationist linguistics. The method makes it possible to abstract away from the individual feature occurrences by grouping features together that behave alike with respect to the target class, thus providing a new, more general perspective on the data. On the one hand, it reduces data sparsity, leading to quantitative performance gains. On the other, it supports the formation and evaluation of hypotheses about individual choices of linguistic structures. We explore the method using features based on verb subcategorization information and evaluate the approach in the context of the Native Language Identification (NLI) task.</abstract>
@@ -750,10 +750,10 @@
     </paper>
     <paper id="72">
       <title>Modeling Diachronic Change in Scientific Writing with Information Density</title>
-      <author><first>Raphael</first><last>Rubino</last></author>
+      <author id="raphael-rubino"><first>Raphael</first><last>Rubino</last></author>
       <author><first>Stefania</first><last>Degaetano-Ortlieb</last></author>
       <author><first>Elke</first><last>Teich</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>750–761</pages>
       <url hash="88dd14d4">C16-1072</url>
       <abstract>Previous linguistic research on scientific writing has shown that language use in the scientific domain varies considerably in register and style over time. In this paper we investigate the introduction of information theory inspired features to study long term diachronic change on three levels: lexis, part-of-speech and syntax. Our approach is based on distinguishing between sentences from 19th and 20th century scientific abstracts using supervised classification models. To the best of our knowledge, the introduction of information theoretic features to this task is novel. We show that these features outperform more traditional features, such as token or character n-grams, while leading to more compact models. We present a detailed analysis of feature informativeness in order to gain a better understanding of diachronic change on different linguistic levels.</abstract>
@@ -772,7 +772,7 @@
     <paper id="74">
       <title>Machine Learning for Metrical Analysis of <fixed-case>E</fixed-case>nglish Poetry</title>
       <author><first>Manex</first><last>Agirrezabal</last></author>
-      <author><first>Iñaki</first><last>Alegria</last></author>
+      <author id="inaki-alegria"><first>Iñaki</first><last>Alegria</last></author>
       <author><first>Mans</first><last>Hulden</last></author>
       <pages>772–781</pages>
       <url hash="37c119e2">C16-1074</url>
@@ -827,7 +827,7 @@
       <title>Automatic Extraction of Learner Errors in <fixed-case>ESL</fixed-case> Sentences Using Linguistically Enhanced Alignments</title>
       <author><first>Mariano</first><last>Felice</last></author>
       <author><first>Christopher</first><last>Bryant</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <pages>825–835</pages>
       <url hash="6dbf0708">C16-1079</url>
       <abstract>We propose a new method of automatically extracting learner errors from parallel English as a Second Language (ESL) sentences in an effort to regularise annotation formats and reduce inconsistencies. Specifically, given an original and corrected sentence, our method first uses a linguistically enhanced alignment algorithm to determine the most likely mappings between tokens, and secondly employs a rule-based function to decide which alignments should be merged. Our method beats all previous approaches on the tested datasets, achieving state-of-the-art results for automatic error extraction.</abstract>
@@ -844,7 +844,7 @@
     </paper>
     <paper id="81">
       <title>How Regular is <fixed-case>J</fixed-case>apanese Loanword Adaptation? A Computational Study</title>
-      <author><first>Lingshuang</first><last>Mao</last></author>
+      <author id="lingshuang-jack-mao"><first>Lingshuang</first><last>Mao</last></author>
       <author><first>Mans</first><last>Hulden</last></author>
       <pages>847–856</pages>
       <url hash="2f9f6b3e">C16-1081</url>
@@ -854,11 +854,11 @@
     <paper id="82">
       <title>Using Linguistic Data for <fixed-case>E</fixed-case>nglish and <fixed-case>S</fixed-case>panish Verb-Noun Combination Identification</title>
       <author><first>Uxoa</first><last>Iñurrieta</last></author>
-      <author><first>Arantza</first><last>Díaz de Ilarraza</last></author>
-      <author><first>Gorka</first><last>Labaka</last></author>
-      <author><first>Kepa</first><last>Sarasola</last></author>
-      <author><first>Itziar</first><last>Aduriz</last></author>
-      <author><first>John</first><last>Carroll</last></author>
+      <author id="arantza-diaz-de-ilarraza"><first>Arantza</first><last>Díaz de Ilarraza</last></author>
+      <author id="gorka-labaka"><first>Gorka</first><last>Labaka</last></author>
+      <author id="kepa-sarasola"><first>Kepa</first><last>Sarasola</last></author>
+      <author id="itziar-aduriz"><first>Itziar</first><last>Aduriz</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
       <pages>857–867</pages>
       <url hash="6f70310d">C16-1082</url>
       <abstract>We present a linguistic analysis of a set of English and Spanish verb+noun combinations (VNCs), and a method to use this information to improve VNC identification. Firstly, a sample of frequent VNCs are analysed in-depth and tagged along lexico-semantic and morphosyntactic dimensions, obtaining satisfactory inter-annotator agreement scores. Then, a VNC identification experiment is undertaken, where the analysed linguistic data is combined with chunking information and syntactic dependencies. A comparison between the results of the experiment and the results obtained by a basic detection method shows that VNC identification can be greatly improved by using linguistic information, as a large number of additional occurrences are detected with high precision.</abstract>
@@ -867,8 +867,8 @@
     <paper id="83">
       <title>Analyzing Gender Bias in Student Evaluations</title>
       <author><first>Andamlak</first><last>Terkik</last></author>
-      <author><first>Emily</first><last>Prud’hommeaux</last></author>
-      <author><first>Cecilia</first><last>Ovesdotter Alm</last></author>
+      <author id="emily-prudhommeaux"><first>Emily</first><last>Prud’hommeaux</last></author>
+      <author id="cecilia-ovesdotter-alm"><first>Cecilia</first><last>Ovesdotter Alm</last></author>
       <author><first>Christopher</first><last>Homan</last></author>
       <author><first>Scott</first><last>Franklin</last></author>
       <pages>868–876</pages>
@@ -900,7 +900,7 @@
     <paper id="86">
       <title>Extending the Use of <fixed-case>A</fixed-case>daptor <fixed-case>G</fixed-case>rammars for Unsupervised Morphological Segmentation of Unseen Languages</title>
       <author><first>Ramy</first><last>Eskander</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <author><first>Tianchun</first><last>Yang</last></author>
       <pages>900–910</pages>
       <url hash="caafa004">C16-1086</url>
@@ -930,7 +930,7 @@
       <author><first>Yunli</first><last>Wang</last></author>
       <author><first>Yong</first><last>Jin</last></author>
       <author><first>Xiaodan</first><last>Zhu</last></author>
-      <author><first>Cyril</first><last>Goutte</last></author>
+      <author id="cyril-goutte"><first>Cyril</first><last>Goutte</last></author>
       <pages>932–942</pages>
       <url hash="a9e0f892">C16-1089</url>
       <abstract>The goal of keyphrase extraction is to automatically identify the most salient phrases from documents. The technique has a wide range of applications such as rendering a quick glimpse of a document, or extracting key content for further use. While previous work often assumes keyphrases are a static property of a given documents, in many applications, the appropriate set of keyphrases that should be extracted depends on the set of documents that are being considered together. In particular, good keyphrases should not only accurately describe the content of a document, but also reveal what discriminates it from the other documents. In this paper, we study this problem of extracting discriminative keyphrases. In particularly, we propose to use the hierarchical semantic structure between candidate keyphrases to promote keyphrases that have the right level of specificity to clearly distinguish the target document from others. We show that such knowledge can be used to construct better discriminative keyphrase extraction systems that do not assume a static, fixed set of keyphrases for a document. We show how this helps identify key expertise of authors from their papers, as well as competencies covered by online courses within different domains.</abstract>
@@ -941,7 +941,7 @@
       <author><first>Haoran</first><last>Huang</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
       <author><first>Yeyun</first><last>Gong</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>943–952</pages>
       <url hash="3c9a156d">C16-1090</url>
       <abstract>On microblogging services, people usually use hashtags to mark microblogs, which have a specific theme or content, making them easier for users to find. Hence, how to automatically recommend hashtags for microblogs has received much attention in recent years. Previous deep neural network-based hashtag recommendation approaches converted the task into a multi-class classification problem. However, most of these methods only took the microblog itself into consideration. Motivated by the intuition that the history of users should impact the recommendation procedure, in this work, we extend end-to-end memory networks to perform this task. We incorporate the histories of users into the external memory and introduce a hierarchical attention mechanism to select more appropriate histories. To train and evaluate the proposed method, we also construct a dataset based on microblogs collected from Twitter. Experimental results demonstrate that the proposed methods can significantly outperform state-of-the-art methods. By incorporating the hierarchical attention mechanism, the relative improvement in the proposed method over the state-of-the-art method is around 67.9% in the F1-score.</abstract>
@@ -951,7 +951,7 @@
       <title>Automatic Labelling of Topics with Neural Embeddings</title>
       <author><first>Shraey</first><last>Bhatia</last></author>
       <author><first>Jey Han</first><last>Lau</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>953–963</pages>
       <url hash="008dacc6">C16-1091</url>
       <abstract>Topics generated by topic models are typically represented as list of terms. To reduce the cognitive overhead of interpreting these topics for end-users, we propose labelling a topic with a succinct phrase that summarises its theme or idea. Using Wikipedia document titles as label candidates, we compute neural embeddings for documents and words to select the most relevant labels for topics. Comparing to a state-of-the-art topic labelling system, our methodology is simpler, more efficient and finds better topic labels.</abstract>
@@ -964,7 +964,7 @@
       <author><first>Lifeng</first><last>Jin</last></author>
       <author><first>Victoria</first><last>Krakovna</last></author>
       <author><first>Finale</first><last>Doshi-Velez</last></author>
-      <author><first>Timothy</first><last>Miller</last></author>
+      <author id="timothy-miller"><first>Timothy</first><last>Miller</last></author>
       <author><first>William</first><last>Schuler</last></author>
       <author><first>Lane</first><last>Schwartz</last></author>
       <pages>964–975</pages>
@@ -974,7 +974,7 @@
     </paper>
     <paper id="93">
       <title>‘Calling on the classical phone’: a distributional model of adjective-noun errors in learners’ <fixed-case>E</fixed-case>nglish</title>
-      <author><first>Aurélie</first><last>Herbelot</last></author>
+      <author id="aurelie-herbelot"><first>Aurélie</first><last>Herbelot</last></author>
       <author><first>Ekaterina</first><last>Kochmar</last></author>
       <pages>976–986</pages>
       <url hash="7f34bc9d">C16-1093</url>
@@ -983,10 +983,10 @@
     </paper>
     <paper id="94">
       <title>Are Cohesive Features Relevant for Text Readability Evaluation?</title>
-      <author><first>Amalia</first><last>Todirascu</last></author>
+      <author id="amalia-todirascu"><first>Amalia</first><last>Todirascu</last></author>
       <author><first>Thomas</first><last>François</last></author>
       <author><first>Delphine</first><last>Bernhard</last></author>
-      <author><first>Núria</first><last>Gala</last></author>
+      <author id="nuria-gala"><first>Núria</first><last>Gala</last></author>
       <author><first>Anne-Laure</first><last>Ligozat</last></author>
       <pages>987–997</pages>
       <url hash="50cc2d68">C16-1094</url>
@@ -997,10 +997,10 @@
       <title>Named Entity Recognition for Linguistic Rapid Response in Low-Resource Languages: <fixed-case>S</fixed-case>orani <fixed-case>K</fixed-case>urdish and <fixed-case>T</fixed-case>ajik</title>
       <author><first>Patrick</first><last>Littell</last></author>
       <author><first>Kartik</first><last>Goyal</last></author>
-      <author><first>David R.</first><last>Mortensen</last></author>
-      <author><first>Alexa</first><last>Little</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
+      <author id="david-r-mortensen"><first>David R.</first><last>Mortensen</last></author>
+      <author id="alexa-n-little"><first>Alexa</first><last>Little</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
       <pages>998–1006</pages>
       <url hash="a4754377">C16-1095</url>
       <abstract>This paper describes our construction of named-entity recognition (NER) systems in two Western Iranian languages, Sorani Kurdish and Tajik, as a part of a pilot study of “Linguistic Rapid Response” to potential emergency humanitarian relief situations. In the absence of large annotated corpora, parallel corpora, treebanks, bilingual lexica, etc., we found the following to be effective: exploiting distributional regularities in monolingual data, projecting information across closely related languages, and utilizing human linguist judgments. We show promising results on both a four-month exercise in Sorani and a two-day exercise in Tajik, achieved with minimal annotation costs.</abstract>
@@ -1047,7 +1047,7 @@
     </paper>
     <paper id="100">
       <title><fixed-case>C</fixed-case>hinese Poetry Generation with Planning based Neural Network</title>
-      <author><first>Zhe</first><last>Wang</last></author>
+      <author id="daisy-zhe-wang"><first>Zhe</first><last>Wang</last></author>
       <author><first>Wei</first><last>He</last></author>
       <author><first>Hua</first><last>Wu</last></author>
       <author><first>Haiyang</first><last>Wu</last></author>
@@ -1062,7 +1062,7 @@
     <paper id="101">
       <title>Predicting sentential semantic compatibility for aggregation in text-to-text generation</title>
       <author><first>Victor</first><last>Chenal</last></author>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <pages>1061–1070</pages>
       <url hash="ca1845cd">C16-1101</url>
       <abstract>We examine the task of aggregation in the context of text-to-text generation. We introduce a new aggregation task which frames the process as grouping input sentence fragments into clusters that are to be expressed as a single output sentence. We extract datasets for this task from a corpus using an automatic extraction process. Based on the results of a user study, we develop two gold-standard clusterings and corresponding evaluation methods for each dataset. We present a hierarchical clustering framework for predicting aggregation decisions on this task, which outperforms several baselines and can serve as a reference in future work.</abstract>
@@ -1071,7 +1071,7 @@
     <paper id="102">
       <title>Sequential Clustering and Contextual Importance Measures for Incremental Update Summarization</title>
       <author><first>Markus</first><last>Zopf</last></author>
-      <author><first>Eneldo</first><last>Loza Mencía</last></author>
+      <author id="eneldo-loza-mencia"><first>Eneldo</first><last>Loza Mencía</last></author>
       <author><first>Johannes</first><last>Fürnkranz</last></author>
       <pages>1071–1082</pages>
       <url hash="577cb754">C16-1102</url>
@@ -1082,7 +1082,7 @@
       <title>Natural Language Generation through Character-based <fixed-case>RNN</fixed-case>s with Finite-state Prior Knowledge</title>
       <author><first>Raghav</first><last>Goyal</last></author>
       <author><first>Marc</first><last>Dymetman</last></author>
-      <author><first>Eric</first><last>Gaussier</last></author>
+      <author id="eric-gaussier"><first>Eric</first><last>Gaussier</last></author>
       <pages>1083–1092</pages>
       <url hash="c295cfc8">C16-1103</url>
       <abstract>Recently Wen et al. (2015) have proposed a Recurrent Neural Network (RNN) approach to the generation of utterances from dialog acts, and shown that although their model requires less effort to develop than a rule-based system, it is able to improve certain aspects of the utterances, in particular their naturalness. However their system employs generation at the word-level, which requires one to pre-process the data by substituting named entities with placeholders. This pre-processing prevents the model from handling some contextual effects and from managing multiple occurrences of the same attribute. Our approach uses a character-level model, which unlike the word-level model makes it possible to learn to “copy” information from the dialog act to the target without having to pre-process the input. In order to avoid generating non-words and inventing information not present in the input, we propose a method for incorporating prior knowledge into the RNN in the form of a weighted finite-state automaton over character sequences. Automatic and human evaluations show improved performance over baselines on several evaluation criteria.</abstract>
@@ -1153,11 +1153,11 @@
     </paper>
     <paper id="110">
       <title><fixed-case>W</fixed-case>ord2<fixed-case>V</fixed-case>ec vs <fixed-case>DB</fixed-case>nary: Augmenting <fixed-case>METEOR</fixed-case> using Vector Representations or Lexical Resources?</title>
-      <author><first>Christophe</first><last>Servan</last></author>
+      <author id="christophe-servan"><first>Christophe</first><last>Servan</last></author>
       <author><first>Alexandre</first><last>Bérard</last></author>
       <author><first>Zied</first><last>Elloumi</last></author>
-      <author><first>Hervé</first><last>Blanchon</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="herve-blanchon"><first>Hervé</first><last>Blanchon</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <pages>1159–1168</pages>
       <url hash="d1fa88d3">C16-1110</url>
       <abstract>This paper presents an approach combining lexico-semantic resources and distributed representations of words applied to the evaluation in machine translation (MT). This study is made through the enrichment of a well-known MT evaluation metric: METEOR. METEOR enables an approximate match (synonymy or morphological similarity) between an automatic and a reference translation. Our experiments are made in the framework of the Metrics task of WMT 2014. We show that distributed representations are a good alternative to lexico-semanticresources for MT evaluation and they can even bring interesting additional information. The augmented versions of METEOR, using vector representations, are made available on our Github page.</abstract>
@@ -1165,8 +1165,8 @@
     </paper>
     <paper id="111">
       <title>Broad <fixed-case>T</fixed-case>witter Corpus: A Diverse Named Entity Recognition Resource</title>
-      <author><first>Leon</first><last>Derczynski</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
       <author><first>Ian</first><last>Roberts</last></author>
       <pages>1169–1179</pages>
       <url hash="7eec74a9">C16-1111</url>
@@ -1196,7 +1196,7 @@
       <title>Leveraging Multilingual Training for Limited Resource Event Extraction</title>
       <author><first>Andrew</first><last>Hsi</last></author>
       <author><first>Yiming</first><last>Yang</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
       <author><first>Ruochen</first><last>Xu</last></author>
       <pages>1201–1210</pages>
       <url hash="0705b778">C16-1114</url>
@@ -1206,7 +1206,7 @@
     <paper id="115">
       <title><fixed-case>LILI</fixed-case>: A Simple Language Independent Approach for Language Identification</title>
       <author><first>Mohamed</first><last>Al-Badrashiny</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>1211–1219</pages>
       <url hash="0e9dd297">C16-1115</url>
       <abstract>We introduce a generic Language Independent Framework for Linguistic Code Switch Point Detection. The system uses characters level 5-grams and word level unigram language models to train a conditional random fields (CRF) model for classifying input words into various languages. We test our proposed framework and compare it to the state-of-the-art published systems on standard data sets from several language pairs: English-Spanish, Nepali-English, English-Hindi, Arabizi (Refers to Arabic written using the Latin/Roman script)-English, Arabic-Engari (Refers to English written using Arabic script), Modern Standard Arabic(MSA)-Egyptian, Levantine-MSA, Gulf-MSA, one more English-Spanish, and one more MSA-EGY. The overall weighted average F-score of each language pair are 96.4%, 97.3%, 98.0%, 97.0%, 98.9%, 86.3%, 88.2%, 90.6%, 95.2%, and 85.0% respectively. The results show that our approach despite its simplicity, either outperforms or performs at comparable levels to state-of-the-art published systems.</abstract>
@@ -1215,7 +1215,7 @@
     <paper id="116">
       <title>High Accuracy Rule-based Question Classification using Question Syntax and Semantics</title>
       <author><first>Harish</first><last>Tayyar Madabushi</last></author>
-      <author><first>Mark</first><last>Lee</last></author>
+      <author id="mark-lee"><first>Mark</first><last>Lee</last></author>
       <pages>1220–1230</pages>
       <url hash="166ec01a">C16-1116</url>
       <abstract>We present in this paper a purely rule-based system for Question Classification which we divide into two parts: The first is the extraction of relevant words from a question by use of its structure, and the second is the classification of questions based on rules that associate these words to Concepts. We achieve an accuracy of 97.2%, close to a 6 point improvement over the previous State of the Art of 91.6%. Additionally, we believe that machine learning algorithms can be applied on top of this method to further improve accuracy.</abstract>
@@ -1238,7 +1238,7 @@
     <paper id="118">
       <title>Semantically Motivated <fixed-case>H</fixed-case>ebrew Verb-Noun Multi-Word Expressions Identification</title>
       <author><first>Chaya</first><last>Liebeskind</last></author>
-      <author><first>Yaakov</first><last>HaCohen-Kerner</last></author>
+      <author id="yaakov-hacohen-kerner"><first>Yaakov</first><last>HaCohen-Kerner</last></author>
       <pages>1242–1253</pages>
       <url hash="4b5bcc04">C16-1118</url>
       <abstract>Identification of Multi-Word Expressions (MWEs) lies at the heart of many natural language processing applications. In this research, we deal with a particular type of Hebrew MWEs, Verb-Noun MWEs (VN-MWEs), which combine a verb and a noun with or without other words. Most prior work on MWEs classification focused on linguistic and statistical information. In this paper, we claim that it is essential to utilize semantic information. To this end, we propose a semantically motivated indicator for classifying VN-MWE and define features that are related to various semantic spaces and combine them as features in a supervised classification framework. We empirically demonstrate that our semantic feature set yields better performance than the common linguistic and statistical feature sets and that combining semantic features contributes to the VN-MWEs identification task.</abstract>
@@ -1269,7 +1269,7 @@
       <title>Facing the most difficult case of Semantic Role Labeling: A collaboration of word embeddings and co-training</title>
       <author><first>Quynh Ngoc Thi</first><last>Do</last></author>
       <author><first>Steven</first><last>Bethard</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>1275–1284</pages>
       <url hash="eb3d8ca1">C16-1121</url>
       <abstract>We present a successful collaboration of word embeddings and co-training to tackle in the most difficult test case of semantic role labeling: predicting out-of-domain and unseen semantic frames. Despite the fact that co-training is a successful traditional semi-supervised method, its application in SRL is very limited especially when a huge amount of labeled data is available. In this work, co-training is used together with word embeddings to improve the performance of a system trained on a large training dataset. We also introduce a semantic role labeling system with a simple learning architecture and effective inference that is easily adaptable to semi-supervised settings with new training data and/or new features. On the out-of-domain testing set of the standard benchmark CoNLL 2009 data our simple approach achieves high performance and improves state-of-the-art results.</abstract>
@@ -1277,8 +1277,8 @@
     </paper>
     <paper id="122">
       <title>Predictability of Distributional Semantics in Derivational Word Formation</title>
-      <author><first>Sebastian</first><last>Padó</last></author>
-      <author><first>Aurélie</first><last>Herbelot</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
+      <author id="aurelie-herbelot"><first>Aurélie</first><last>Herbelot</last></author>
       <author><first>Max</first><last>Kisselew</last></author>
       <author><first>Jan</first><last>Šnajder</last></author>
       <pages>1285–1296</pages>
@@ -1301,7 +1301,7 @@
     <paper id="124">
       <title>From phonemes to images: levels of representation in a recurrent neural model of visually-grounded language learning</title>
       <author><first>Lieke</first><last>Gelderloos</last></author>
-      <author><first>Grzegorz</first><last>Chrupała</last></author>
+      <author id="grzegorz-chrupala"><first>Grzegorz</first><last>Chrupała</last></author>
       <pages>1309–1319</pages>
       <url hash="20e6b8f9">C16-1124</url>
       <abstract>We present a model of visually-grounded language learning based on stacked gated recurrent neural networks which learns to predict visual features given an image description in the form of a sequence of phonemes. The learning task resembles that faced by human language learners who need to discover both structure and meaning from noisy and ambiguous data across modalities. We show that our model indeed learns to predict features of the visual context given phonetically transcribed image descriptions, and show that it represents linguistic information in a hierarchy of levels: lower layers in the stack are comparatively more sensitive to form, whereas higher layers are more sensitive to meaning.</abstract>
@@ -1311,7 +1311,7 @@
       <title>Linguistic features for <fixed-case>H</fixed-case>indi light verb construction identification</title>
       <author><first>Ashwini</first><last>Vaidya</last></author>
       <author><first>Sumeet</first><last>Agarwal</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>1320–1329</pages>
       <url hash="02c329a0">C16-1125</url>
       <abstract>Light verb constructions (LVC) in Hindi are highly productive. If we can distinguish a case such as nirnay lenaa ‘decision take; decide’ from an ordinary verb-argument combination kaagaz lenaa ‘paper take; take (a) paper’,it has been shown to aid NLP applications such as parsing (Begum et al., 2011) and machine translation (Pal et al., 2011). In this paper, we propose an LVC identification system using language specific features for Hindi which shows an improvement over previous work(Begum et al., 2011). To build our system, we carry out a linguistic analysis of Hindi LVCs using Hindi Treebank annotations and propose two new features that are aimed at capturing the diversity of Hindi LVCs in the corpus. We find that our model performs robustly across a diverse range of LVCs and our results underscore the importance of semantic features, which is in keeping with the findings for English. Our error analysis also demonstrates that our classifier can be used to further refine LVC annotations in the Hindi Treebank and make them more consistent across the board.</abstract>
@@ -1321,7 +1321,7 @@
       <title>Cross-lingual Transfer of Correlations between Parts of Speech and Gaze Features</title>
       <author><first>Maria</first><last>Barrett</last></author>
       <author><first>Frank</first><last>Keller</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>1330–1339</pages>
       <url hash="c9525ae8">C16-1126</url>
       <abstract>Several recent studies have shown that eye movements during reading provide information about grammatical and syntactic processing, which can assist the induction of NLP models. All these studies have been limited to English, however. This study shows that gaze and part of speech (PoS) correlations largely transfer across English and French. This means that we can replicate previous studies on gaze-based PoS tagging for French, but also that we can use English gaze data to assist the induction of French NLP models.</abstract>
@@ -1349,7 +1349,7 @@
     <paper id="129">
       <title>Dynamic Generative model for Diachronic Sense Emergence Detection</title>
       <author><first>Martin</first><last>Emms</last></author>
-      <author><first>Arun Kumar</first><last>Jayapal</last></author>
+      <author id="arun-kumar-jayapal"><first>Arun Kumar</first><last>Jayapal</last></author>
       <pages>1362–1373</pages>
       <url hash="c0a41874">C16-1129</url>
       <abstract>As time passes words can acquire meanings they did not previously have, such as the ‘twitter post’ usage of ‘tweet’. We address how this can be detected from time-stamped raw text. We propose a generative model with senses dependent on times and context words dependent on senses but otherwise eternal, and a Gibbs sampler for estimation. We obtain promising parameter estimates for positive (resp. negative) cases of known sense emergence (resp non-emergence) and adapt the ‘pseudo-word’ technique (Schutze, 1992) to give a novel further evaluation via ‘pseudo-neologisms’. The question of ground-truth is also addressed and a technique proposed to locate an emergence date for evaluation purposes.</abstract>
@@ -1380,7 +1380,7 @@
     </paper>
     <paper id="132">
       <title>Machine Translation Evaluation for <fixed-case>A</fixed-case>rabic using Morphologically-enriched Embeddings</title>
-      <author><first>Francisco</first><last>Guzmán</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzmán</last></author>
       <author><first>Houda</first><last>Bouamor</last></author>
       <author><first>Ramy</first><last>Baly</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
@@ -1401,7 +1401,7 @@
     <paper id="134">
       <title>Phrase-based Machine Translation using Multiple Preordering Candidates</title>
       <author><first>Yusuke</first><last>Oda</last></author>
-      <author><first>Taku</first><last>Kudo</last></author>
+      <author id="taku-kudo"><first>Taku</first><last>Kudo</last></author>
       <author><first>Tetsuji</first><last>Nakagawa</last></author>
       <author><first>Taro</first><last>Watanabe</last></author>
       <pages>1419–1428</pages>
@@ -1413,8 +1413,8 @@
       <title>Hand in Glove: Deep Feature Fusion Network Architectures for Answer Quality Prediction in Community Question Answering</title>
       <author><first>Sai Praneeth</first><last>Suggu</last></author>
       <author><first>Kushwanth</first><last>Naga Goutham</last></author>
-      <author><first>Manoj K.</first><last>Chinnakotla</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manoj-chinnakotla"><first>Manoj K.</first><last>Chinnakotla</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>1429–1440</pages>
       <url hash="55394a5d">C16-1135</url>
       <abstract>Community Question Answering (cQA) forums have become a popular medium for soliciting direct answers to specific questions of users from experts or other experienced users on a given topic. However, for a given question, users sometimes have to sift through a large number of low-quality or irrelevant answers to find out the answer which satisfies their information need. To alleviate this, the problem of Answer Quality Prediction (AQP) aims to predict the quality of an answer posted in response to a forum question. Current AQP systems either learn models using - a) various hand-crafted features (HCF) or b) Deep Learning (DL) techniques which automatically learn the required feature representations. In this paper, we propose a novel approach for AQP known as - “Deep Feature Fusion Network (DFFN)” which combines the advantages of both hand-crafted features and deep learning based systems. Given a question-answer pair along with its metadata, the DFFN architecture independently - a) learns features from the Deep Neural Network (DNN) and b) computes hand-crafted features using various external resources and then combines them using a fully connected neural network trained to predict the final answer quality. DFFN is end-end differentiable and trained as a single system. We propose two different DFFN architectures which vary mainly in the way they model the input question/answer pair - DFFN-CNN uses a Convolutional Neural Network (CNN) and DFFN-BLNA uses a Bi-directional LSTM with Neural Attention (BLNA). Both these proposed variants of DFFN (DFFN-CNN and DFFN-BLNA) achieve state-of-the-art performance on the standard SemEval-2015 and SemEval-2016 benchmark datasets and outperforms baseline approaches which individually employ either HCF or DL based techniques alone.</abstract>
@@ -1424,7 +1424,7 @@
       <title>Learning Event Expressions via Bilingual Structure Projection</title>
       <author><first>Fangyuan</first><last>Li</last></author>
       <author><first>Ruihong</first><last>Huang</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Min</first><last>Zhang</last></author>
       <pages>1441–1450</pages>
       <url hash="49e762e2">C16-1136</url>
@@ -1434,8 +1434,8 @@
     <paper id="137">
       <title>Global Inference to <fixed-case>C</fixed-case>hinese Temporal Relation Extraction</title>
       <author><first>Peifeng</first><last>Li</last></author>
-      <author><first>Qiaoming</first><last>Zhu</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <author><first>Hongling</first><last>Wang</last></author>
       <pages>1451–1460</pages>
       <url hash="92f644b5">C16-1137</url>
@@ -1498,7 +1498,7 @@
     <paper id="143">
       <title>An Unsupervised Multi-Document Summarization Framework Based on Neural Document Model</title>
       <author><first>Shulei</first><last>Ma</last></author>
-      <author><first>Zhi-Hong</first><last>Deng</last></author>
+      <author id="zhi-hong-deng"><first>Zhi-Hong</first><last>Deng</last></author>
       <author><first>Yunlun</first><last>Yang</last></author>
       <pages>1514–1523</pages>
       <url hash="bf8a18d5">C16-1143</url>
@@ -1510,7 +1510,7 @@
       <author><first>Maximilian</first><last>Schwenger</last></author>
       <author><first>Álvaro</first><last>Torralba</last></author>
       <author><first>Joerg</first><last>Hoffmann</last></author>
-      <author><first>David M.</first><last>Howcroft</last></author>
+      <author id="david-m-howcroft"><first>David M.</first><last>Howcroft</last></author>
       <author><first>Vera</first><last>Demberg</last></author>
       <pages>1524–1534</pages>
       <url hash="cd90266f">C16-1144</url>
@@ -1542,7 +1542,7 @@
       <title>On the Impact of Seed Words on Sentiment Polarity Lexicon Induction</title>
       <author><first>Dame</first><last>Jovanoski</last></author>
       <author><first>Veno</first><last>Pachovski</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>1557–1567</pages>
       <url hash="a6ca1cb6">C16-1147</url>
       <abstract>Sentiment polarity lexicons are key resources for sentiment analysis, and researchers have invested a lot of efforts in their manual creation. However, there has been a recent shift towards automatically extracted lexicons, which are orders of magnitude larger and perform much better. These lexicons are typically mined using bootstrapping, starting from very few seed words whose polarity is given, e.g., 50-60 words, and sometimes even just 5-6. Here we demonstrate that much higher-quality lexicons can be built by starting with hundreds of words and phrases as seeds, especially when they are in-domain. Thus, we combine (i) mid-sized high-quality manually crafted lexicons as seeds and (ii) bootstrapping, in order to build large-scale lexicons.</abstract>
@@ -1553,7 +1553,7 @@
       <author><first>Swapna</first><last>Somasundaran</last></author>
       <author><first>Brian</first><last>Riordan</last></author>
       <author><first>Binod</first><last>Gyawali</last></author>
-      <author><first>Su-Youn</first><last>Yoon</last></author>
+      <author id="su-youn-yoon"><first>Su-Youn</first><last>Yoon</last></author>
       <pages>1568–1578</pages>
       <url hash="fbcc78f8">C16-1148</url>
       <abstract>This work investigates whether the development of ideas in writing can be captured by graph properties derived from the text. Focusing on student essays, we represent the essay as a graph, and encode a variety of graph properties including PageRank as features for modeling essay scores related to quality of development. We demonstrate that our approach improves on a state-of-the-art system on the task of holistic scoring of persuasive essays and on the task of scoring narrative essays along the development dimension.</abstract>
@@ -1607,7 +1607,7 @@
       <author><first>Yue</first><last>Zhang</last></author>
       <author><first>Sophia</first><last>Lee</last></author>
       <author><first>Shoushan</first><last>Li</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>1624–1634</pages>
       <url hash="9f488b7b">C16-1153</url>
       <abstract>Emotions in code-switching text can be expressed in either monolingual or bilingual forms. However, relatively little research has emphasized on code-switching text. In this paper, we propose a Bilingual Attention Network (BAN) model to aggregate the monolingual and bilingual informative words to form vectors from the document representation, and integrate the attention vectors to predict the emotion. The experiments show that the effectiveness of the proposed model. Visualization of the attention layers illustrates that the model selects qualitatively informative words.</abstract>
@@ -1634,7 +1634,7 @@
     </paper>
     <paper id="156">
       <title>Predicting the Evocation Relation between Lexicalized Concepts</title>
-      <author><first>Yoshihiko</first><last>Hayashi</last></author>
+      <author id="yoshihiko-hayashi"><first>Yoshihiko</first><last>Hayashi</last></author>
       <pages>1657–1668</pages>
       <url hash="9f34e43e">C16-1156</url>
       <abstract>Evocation is a directed yet weighted semantic relationship between lexicalized concepts. Although evocation relations are considered potentially useful in several semantic NLP tasks, the prediction of the evocation relation between an arbitrary pair of concepts remains difficult, since evocation relationships cover a broader range of semantic relations rooted in human perception and experience. This paper presents a supervised learning approach to predict the strength (by regression) and to determine the directionality (by classification) of the evocation relation that might hold between a pair of lexicalized concepts. Empirical results that were obtained by investigating useful features are shown, indicating that a combination of the proposed features largely outperformed individual baselines, and also suggesting that semantic relational vectors computed from existing semantic vectors for lexicalized concepts were indeed effective for both the prediction of strength and the determination of directionality.</abstract>
@@ -1642,7 +1642,7 @@
     </paper>
     <paper id="157">
       <title>Collecting and Exploring Everyday Language for Predicting Psycholinguistic Properties of Words</title>
-      <author><first>Gustavo</first><last>Paetzold</last></author>
+      <author id="gustavo-paetzold"><first>Gustavo</first><last>Paetzold</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>1669–1679</pages>
       <url hash="3ad75c8e">C16-1157</url>
@@ -1652,7 +1652,7 @@
     <paper id="158">
       <title>Using Argument Mining to Assess the Argumentation Quality of Essays</title>
       <author><first>Henning</first><last>Wachsmuth</last></author>
-      <author><first>Khalid</first><last>Al-Khatib</last></author>
+      <author id="khalid-al-khatib"><first>Khalid</first><last>Al-Khatib</last></author>
       <author><first>Benno</first><last>Stein</last></author>
       <pages>1680–1691</pages>
       <url hash="21817333">C16-1158</url>
@@ -1685,7 +1685,7 @@
       <author><first>Tianyu</first><last>Liu</last></author>
       <author><first>Tao</first><last>Ge</last></author>
       <author><first>Lei</first><last>Sha</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <author><first>Sujian</first><last>Li</last></author>
       <author><first>Zhifang</first><last>Sui</last></author>
       <pages>1715–1724</pages>
@@ -1713,7 +1713,7 @@
       <author><first>Wei-Ning</first><last>Hsu</last></author>
       <author><first>Yu</first><last>Zhang</last></author>
       <author><first>Mitra</first><last>Mohtarami</last></author>
-      <author><first>James</first><last>Glass</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
       <pages>1734–1745</pages>
       <url hash="92bedb46">C16-1163</url>
       <abstract>In real-world data, e.g., from Web forums, text is often contaminated with redundant or irrelevant content, which leads to introducing noise in machine learning algorithms. In this paper, we apply Long Short-Term Memory networks with an attention mechanism, which can select important parts of text for the task of similar question retrieval from community Question Answering (cQA) forums. In particular, we use the attention weights for both selecting entire sentences and their subparts, i.e., word/chunk, from shallow syntactic trees. More interestingly, we apply tree kernels to the filtered text representations, thus exploiting the implicit features of the subtree space for learning question reranking. Our results show that the attention-based pruning allows for achieving the top position in the cQA challenge of SemEval 2016, with a relatively large gap from the other participants while greatly decreasing running time.</abstract>
@@ -1725,7 +1725,7 @@
       <author><first>Mo</first><last>Yu</last></author>
       <author><first>Bing</first><last>Xiang</last></author>
       <author><first>Bowen</first><last>Zhou</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>1746–1756</pages>
       <url hash="ac7b162b">C16-1164</url>
       <abstract>This work focuses on answering single-relation factoid questions over Freebase. Each question can acquire the answer from a single fact of form (subject, predicate, object) in Freebase. This task, simple question answering (SimpleQA), can be addressed via a two-step pipeline: entity linking and fact selection. In fact selection, we match the subject entity in a fact candidate with the entity mention in the question by a character-level convolutional neural network (char-CNN), and match the predicate in that fact with the question by a word-level CNN (word-CNN). This work makes two main contributions. (i) A simple and effective entity linker over Freebase is proposed. Our entity linker outperforms the state-of-the-art entity linker over SimpleQA task. (ii) A novel attentive maxpooling is stacked over word-CNN, so that the predicate representation can be matched with the predicate-focused question representation more effectively. Experiments show that our system sets new state-of-the-art in this task.</abstract>
@@ -1746,8 +1746,8 @@
       <author><first>Georgios</first><last>Balikas</last></author>
       <author><first>Hesam</first><last>Amoualian</last></author>
       <author><first>Marianne</first><last>Clausel</last></author>
-      <author><first>Eric</first><last>Gaussier</last></author>
-      <author><first>Massih R.</first><last>Amini</last></author>
+      <author id="eric-gaussier"><first>Eric</first><last>Gaussier</last></author>
+      <author id="massih-r-amini"><first>Massih R.</first><last>Amini</last></author>
       <pages>1767–1776</pages>
       <url hash="c9dde94e">C16-1166</url>
       <abstract>The exchangeability assumption in topic models like Latent Dirichlet Allocation (LDA) often results in inferring inconsistent topics for the words of text spans like noun-phrases, which are usually expected to be topically coherent. We propose copulaLDA, that extends LDA by integrating part of the text structure to the model and relaxes the conditional independence assumption between the word-specific latent topics given the per-document topic distributions. To this end, we assume that the words of text spans like noun-phrases are topically bound and we model this dependence with copulas. We demonstrate empirically the effectiveness of copulaLDA on both intrinsic and extrinsic evaluation tasks on several publicly available corpora.</abstract>
@@ -1768,7 +1768,7 @@
     <paper id="168">
       <title>Semantic Annotation Aggregation with Conditional Crowdsourcing Models and Word Embeddings</title>
       <author><first>Paul</first><last>Felt</last></author>
-      <author><first>Eric</first><last>Ringger</last></author>
+      <author id="eric-ringger"><first>Eric</first><last>Ringger</last></author>
       <author><first>Kevin</first><last>Seppi</last></author>
       <pages>1787–1796</pages>
       <url hash="3f91b174">C16-1168</url>
@@ -1778,8 +1778,8 @@
     <paper id="169">
       <title>Interactive-Predictive Machine Translation based on Syntactic Constraints of Prefix</title>
       <author><first>Na</first><last>Ye</last></author>
-      <author><first>Guiping</first><last>Zhang</last></author>
-      <author><first>Dongfeng</first><last>Cai</last></author>
+      <author id="guiping-zhang"><first>Guiping</first><last>Zhang</last></author>
+      <author id="dongfeng-cai"><first>Dongfeng</first><last>Cai</last></author>
       <pages>1797–1806</pages>
       <url hash="1cabc977">C16-1169</url>
       <abstract>Interactive-predictive machine translation (IPMT) is a translation mode which combines machine translation technology and human behaviours. In the IPMT system, the utilization of the prefix greatly affects the interaction efficiency. However, state-of-the-art methods filter translation hypotheses mainly according to their matching results with the prefix on character level, and the advantage of the prefix is not fully developed. Focusing on this problem, this paper mines the deep constraints of prefix on syntactic level to improve the performance of IPMT systems. Two syntactic subtree matching rules based on phrase structure grammar are proposed to filter the translation hypotheses more strictly. Experimental results on LDC Chinese-English corpora show that the proposed method outperforms state-of-the-art phrase-based IPMT system while keeping comparable decoding speed.</abstract>
@@ -1799,7 +1799,7 @@
     <paper id="171">
       <title>A Distribution-based Model to Learn Bilingual Word Embeddings</title>
       <author><first>Hailong</first><last>Cao</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <author><first>Shu</first><last>Zhang</last></author>
       <author><first>Yao</first><last>Meng</last></author>
       <pages>1818–1827</pages>
@@ -1812,7 +1812,7 @@
       <author><first>Jan</first><last>Niehues</last></author>
       <author><first>Eunah</first><last>Cho</last></author>
       <author><first>Thanh-Le</first><last>Ha</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>1828–1836</pages>
       <url hash="b28e0ebe">C16-1172</url>
       <abstract>Recently, the development of neural machine translation (NMT) has significantly improved the translation quality of automatic machine translation. While most sentences are more accurate and fluent than translations by statistical machine translation (SMT)-based systems, in some cases, the NMT system produces translations that have a completely different meaning. This is especially the case when rare words occur. When using statistical machine translation, it has already been shown that significant gains can be achieved by simplifying the input in a preprocessing step. A commonly used example is the pre-reordering approach. In this work, we used phrase-based machine translation to pre-translate the input into the target language. Then a neural machine translation system generates the final hypothesis using the pre-translation. Thereby, we use either only the output of the phrase-based machine translation (PBMT) system or a combination of the PBMT output and the source sentence. We evaluate the technique on the English to German translation task. Using this approach we are able to outperform the PBMT system as well as the baseline neural MT system by up to 2 BLEU points. We analyzed the influence of the quality of the initial system on the final result.</abstract>
@@ -1878,8 +1878,8 @@
     <paper id="179">
       <title>Multi-view and multi-task training of <fixed-case>RST</fixed-case> discourse parsers</title>
       <author><first>Chloé</first><last>Braud</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>1903–1913</pages>
       <url hash="28fd2c76">C16-1179</url>
       <abstract>We experiment with different ways of training LSTM networks to predict RST discourse trees. The main challenge for RST discourse parsing is the limited amounts of training data. We combat this by regularizing our models using task supervision from related tasks as well as alternative views on discourse structures. We show that a simple LSTM sequential discourse parser takes advantage of this multi-view and multi-task framework with 12-15% error reductions over our baseline (depending on the metric) and results that rival more complex state-of-the-art parsers.</abstract>
@@ -1906,7 +1906,7 @@
     </paper>
     <paper id="182">
       <title>Representation and Learning of Temporal Relations</title>
-      <author><first>Leon</first><last>Derczynski</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
       <pages>1937–1948</pages>
       <url hash="ff4b64ce">C16-1182</url>
       <abstract>Determining the relative order of events and times described in text is an important problem in natural language processing. It is also a difficult one: general state-of-the-art performance has been stuck at a relatively low ceiling for years. We investigate the representation of temporal relations, and empirically evaluate the effect that various temporal relation representations have on machine learning performance. While machine learning performance decreases with increased representational expressiveness, not all representation simplifications have equal impact.</abstract>
@@ -1928,7 +1928,7 @@
       <author><first>Kento</first><last>Watanabe</last></author>
       <author><first>Yuichiroh</first><last>Matsubayashi</last></author>
       <author><first>Naho</first><last>Orita</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Kentaro</first><last>Inui</last></author>
       <author><first>Satoru</first><last>Fukayama</last></author>
       <author><first>Tomoyasu</first><last>Nakano</last></author>
@@ -1952,7 +1952,7 @@
       <title>Multimodal Mood Classification - A Case Study of Differences in <fixed-case>H</fixed-case>indi and Western Songs</title>
       <author><first>Braja Gopal</first><last>Patra</last></author>
       <author><first>Dipankar</first><last>Das</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>1980–1989</pages>
       <url hash="63dade67">C16-1186</url>
       <abstract>Music information retrieval has emerged as a mainstream research area in the past two decades. Experiments on music mood classification have been performed mainly on Western music based on audio, lyrics and a combination of both. Unfortunately, due to the scarcity of digitalized resources, Indian music fares poorly in music mood retrieval research. In this paper, we identified the mood taxonomy and prepared multimodal mood annotated datasets for Hindi and Western songs. We identified important audio and lyric features using correlation based feature selection technique. Finally, we developed mood classification systems using Support Vector Machines and Feed Forward Neural Networks based on the features collected from audio, lyrics, and a combination of both. The best performing multimodal systems achieved F-measures of 75.1 and 83.5 for classifying the moods of the Hindi and Western songs respectively using Feed Forward Neural Networks. A comparative analysis indicates that the selected features work well for mood classification of the Western songs and produces better results as compared to the mood classification systems for Hindi songs.</abstract>
@@ -1984,7 +1984,7 @@
       <title>Dialogue Act Classification in Domain-Independent Conversations Using a Deep Recurrent Neural Network</title>
       <author><first>Hamed</first><last>Khanpour</last></author>
       <author><first>Nishitha</first><last>Guntakandla</last></author>
-      <author><first>Rodney</first><last>Nielsen</last></author>
+      <author id="rodney-nielsen"><first>Rodney</first><last>Nielsen</last></author>
       <pages>2012–2021</pages>
       <url hash="e0a0e63b">C16-1189</url>
       <abstract>In this study, we applied a deep LSTM structure to classify dialogue acts (DAs) in open-domain conversations. We found that the word embeddings parameters, dropout regularization, decay rate and number of layers are the parameters that have the largest effect on the final system accuracy. Using the findings of these experiments, we trained a deep LSTM network that outperforms the state-of-the-art on the Switchboard corpus by 3.11%, and MRDA by 2.2%.</abstract>
@@ -1993,7 +1993,7 @@
     <paper id="190">
       <title>Non-sentential Question Resolution using Sequence to Sequence Learning</title>
       <author><first>Vineet</first><last>Kumar</last></author>
-      <author><first>Sachindra</first><last>Joshi</last></author>
+      <author id="sachindra-joshi"><first>Sachindra</first><last>Joshi</last></author>
       <pages>2022–2031</pages>
       <url hash="2680cdeb">C16-1190</url>
       <abstract>An interactive Question Answering (QA) system frequently encounters non-sentential (incomplete) questions. These non-sentential questions may not make sense to the system when a user asks them without the context of conversation. The system thus needs to take into account the conversation context to process the question. In this work, we present a recurrent neural network (RNN) based encoder decoder network that can generate a complete (intended) question, given an incomplete question and conversation context. RNN encoder decoder networks have been show to work well when trained on a parallel corpus with millions of sentences, however it is extremely hard to obtain conversation data of this magnitude. We therefore propose to decompose the original problem into two separate simplified problems where each problem focuses on an abstraction. Specifically, we train a semantic sequence model to learn semantic patterns, and a syntactic sequence model to learn linguistic patterns. We further combine syntactic and semantic sequence models to generate an ensemble model. Our model achieves a BLEU score of 30.15 as compared to 18.54 using a standard RNN encoder decoder model.</abstract>
@@ -2024,7 +2024,7 @@
       <title>Domainless Adaptation by Constrained Decoding on a Schema Lattice</title>
       <author><first>Young-Bum</first><last>Kim</last></author>
       <author><first>Karl</first><last>Stratos</last></author>
-      <author><first>Ruhi</first><last>Sarikaya</last></author>
+      <author id="ruhi-sarikaya"><first>Ruhi</first><last>Sarikaya</last></author>
       <pages>2051–2060</pages>
       <url hash="24aa256b">C16-1193</url>
       <abstract>In many applications such as personal digital assistants, there is a constant need for new domains to increase the system’s coverage of user queries. A conventional approach is to learn a separate model every time a new domain is introduced. This approach is slow, inefficient, and a bottleneck for scaling to a large number of domains. In this paper, we introduce a framework that allows us to have a single model that can handle all domains: including unknown domains that may be created in the future as long as they are covered in the master schema. The key idea is to remove the need for distinguishing domains by explicitly predicting the schema of queries. Given permitted schema of a query, we perform constrained decoding on a lattice of slot sequences allowed under the schema. The proposed model achieves competitive and often superior performance over the conventional model trained separately per domain.</abstract>
@@ -2057,7 +2057,7 @@
       <author><first>Jonas</first><last>Wacker</last></author>
       <author><first>Stefan</first><last>Radomski</last></author>
       <author><first>Max</first><last>Mühlhäuser</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>2082–2091</pages>
       <url hash="06526a01">C16-1196</url>
       <abstract>We present Ambient Search, an open source system for displaying and retrieving relevant documents in real time for speech input. The system works ambiently, that is, it unobstructively listens to speech streams in the background, identifies keywords and keyphrases for query construction and continuously serves relevant documents from its index. Query terms are ranked with Word2Vec and TF-IDF and are continuously updated to allow for ongoing querying of a document collection. The retrieved documents, in our case Wikipedia articles, are visualized in real time in a browser interface. Our evaluation shows that Ambient Search compares favorably to another implicit information retrieval system on speech streams. Furthermore, we extrinsically evaluate multiword keyphrase generation, showing positive impact for manual transcriptions.</abstract>
@@ -2067,8 +2067,8 @@
       <title>Semi-supervised Gender Classification with Joint Textual and Social Modeling</title>
       <author><first>Shoushan</first><last>Li</last></author>
       <author><first>Bin</first><last>Dai</last></author>
-      <author><first>Zhengxian</first><last>Gong</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="zhengxian-gong"><first>Zhengxian</first><last>Gong</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>2092–2100</pages>
       <url hash="b2d26e77">C16-1197</url>
       <abstract>In gender classification, labeled data is often limited while unlabeled data is ample. This motivates semi-supervised learning for gender classification to improve the performance by exploring the knowledge in both labeled and unlabeled data. In this paper, we propose a semi-supervised approach to gender classification by leveraging textual features and a specific kind of indirect links among the users which we call “same-interest” links. Specifically, we propose a factor graph, namely Textual and Social Factor Graph (TSFG), to model both the textual and the “same-interest” link information. Empirical studies demonstrate the effectiveness of the proposed approach to semi-supervised gender classification.</abstract>
@@ -2089,7 +2089,7 @@
       <author><first>Dong</first><last>Zhang</last></author>
       <author><first>Shoushan</first><last>Li</last></author>
       <author><first>Hongling</first><last>Wang</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>2112–2121</pages>
       <url hash="24747ca5">C16-1199</url>
       <abstract>Textual information is of critical importance for automatic user classification in social media. However, most previous studies model textual features in a single perspective while the text in a user homepage typically possesses different styles of text, such as original message and comment from others. In this paper, we propose a novel approach, namely ensemble LSTM, to user classification by incorporating multiple textual perspectives. Specifically, our approach first learns a LSTM representation with a LSTM recurrent neural network and then presents a joint learning method to integrating all naturally-divided textual perspectives. Empirical studies on two basic user classification tasks, i.e., gender classification and age classification, demonstrate the effectiveness of the proposed approach to user classification with multiple textual perspectives.</abstract>
@@ -2129,9 +2129,9 @@
     <paper id="203">
       <title>Improving Statistical Machine Translation with Selectional Preferences</title>
       <author><first>Haiqing</first><last>Tang</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Min</first><last>Zhang</last></author>
-      <author><first>Zhengxian</first><last>Gong</last></author>
+      <author id="zhengxian-gong"><first>Zhengxian</first><last>Gong</last></author>
       <pages>2154–2163</pages>
       <url hash="90c10f10">C16-1203</url>
       <abstract>Long-distance semantic dependencies are crucial for lexical choice in statistical machine translation. In this paper, we study semantic dependencies between verbs and their arguments by modeling selectional preferences in the context of machine translation. We incorporate preferences that verbs impose on subjects and objects into translation. In addition, bilingual selectional preferences between source-side verbs and target-side arguments are also investigated. Our experiments on Chinese-to-English translation tasks with large-scale training data demonstrate that statistical machine translation using verbal selectional preferences can achieve statistically significant improvements over a state-of-the-art baseline.</abstract>
@@ -2140,7 +2140,7 @@
     <paper id="204">
       <title>Hierarchical Permutation Complexity for Word Order Evaluation</title>
       <author><first>Miloš</first><last>Stanojević</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <pages>2164–2173</pages>
       <url hash="f5fb87f1">C16-1204</url>
       <abstract>Existing approaches for evaluating word order in machine translation work with metrics computed directly over a permutation of word positions in system output relative to a reference translation. However, every permutation factorizes into a permutation tree (PET) built of primal permutations, i.e., atomic units that do not factorize any further. In this paper we explore the idea that permutations factorizing into (on average) shorter primal permutations should represent simpler ordering as well. Consequently, we contribute Permutation Complexity, a class of metrics over PETs and their extension to forests, and define tight metrics, a sub-class of metrics implementing this idea. Subsequently we define example tight metrics and empirically test them in word order evaluation. Experiments on the WMT13 data sets for ten language pairs show that a tight metric is more often than not better than the baselines.</abstract>
@@ -2159,7 +2159,7 @@
     </paper>
     <paper id="206">
       <title>Get Semantic With Me! The Usefulness of Different Feature Types for Short-Answer Grading</title>
-      <author><first>Ulrike</first><last>Padó</last></author>
+      <author id="ulrike-pado"><first>Ulrike</first><last>Padó</last></author>
       <pages>2186–2195</pages>
       <url hash="a9e3ceb0">C16-1206</url>
       <abstract>Automated short-answer grading is key to help close the automation loop for large-scale, computerised testing in education. A wide range of features on different levels of linguistic processing has been proposed so far. We investigate the relative importance of the different types of features across a range of standard corpora (both from a language skill and content assessment context, in English and in German). We find that features on the lexical, text similarity and dependency level often suffice to approximate full-model performance. Features derived from semantic processing particularly benefit the linguistically more varied answers in content assessment corpora.</abstract>
@@ -2170,9 +2170,9 @@
       <author><first>Terra</first><last>Blevins</last></author>
       <author><first>Robert</first><last>Kwiatkowski</last></author>
       <author><first>Jamie</first><last>MacBeth</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <author><first>Desmond</first><last>Patton</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>2196–2206</pages>
       <url hash="162cc9f2">C16-1207</url>
       <abstract>Violence is a serious problems for cities like Chicago and has been exacerbated by the use of social media by gang-involved youths for taunting rival gangs. We present a corpus of tweets from a young and powerful female gang member and her communicators, which we have annotated with discourse intention, using a deep read to understand how and what triggered conversations to escalate into aggression. We use this corpus to develop a part-of-speech tagger and phrase table for the variant of English that is used and a classifier for identifying tweets that express grieving and aggression.</abstract>
@@ -2192,7 +2192,7 @@
     <paper id="209">
       <title>Data-driven learning of symbolic constraints for a log-linear model in a phonological setting</title>
       <author><first>Gabriel</first><last>Doyle</last></author>
-      <author><first>Roger</first><last>Levy</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
       <pages>2217–2226</pages>
       <url hash="fa7ee037">C16-1209</url>
       <abstract>We propose a non-parametric Bayesian model for learning and weighting symbolically-defined constraints to populate a log-linear model. The model jointly infers a vector of binary constraint values for each candidate output and likely definitions for these constraints, combining observations of the output classes with a (potentially infinite) grammar over potential constraint definitions. We present results on a small morphophonological system, English regular plurals, as a test case. The inferred constraints, based on a grammar of articulatory features, perform as well as theoretically-defined constraints on both observed and novel forms of English regular plurals. The learned constraint values and definitions also closely resemble standard constraints defined within phonological theory.</abstract>
@@ -2235,7 +2235,7 @@
       <author><first>Marek</first><last>Maziarz</last></author>
       <author><first>Maciej</first><last>Piasecki</last></author>
       <author><first>Ewa</first><last>Rudnicka</last></author>
-      <author><first>Stan</first><last>Szpakowicz</last></author>
+      <author id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></author>
       <author><first>Paweł</first><last>Kędzia</last></author>
       <pages>2259–2268</pages>
       <url hash="f002e586">C16-1213</url>
@@ -2245,7 +2245,7 @@
     <paper id="214">
       <title>Time-Independent and Language-Independent Extraction of Multiword Expressions From <fixed-case>T</fixed-case>witter</title>
       <author><first>Nikhil</first><last>Londhe</last></author>
-      <author><first>Rohini</first><last>Srihari</last></author>
+      <author id="rohini-k-srihari"><first>Rohini</first><last>Srihari</last></author>
       <author><first>Vishrawas</first><last>Gopalakrishnan</last></author>
       <pages>2269–2278</pages>
       <url hash="7c91af13">C16-1214</url>
@@ -2279,7 +2279,7 @@
       <author><first>Amit</first><last>Gupta</last></author>
       <author><first>Francesco</first><last>Piccinno</last></author>
       <author><first>Mikhail</first><last>Kozhevnikov</last></author>
-      <author><first>Marius</first><last>Paşca</last></author>
+      <author id="marius-pasca"><first>Marius</first><last>Paşca</last></author>
       <author><first>Daniele</first><last>Pighin</last></author>
       <pages>2300–2309</pages>
       <url hash="be23f62d">C16-1217</url>
@@ -2289,10 +2289,10 @@
     <paper id="218">
       <title>Joint Learning of Local and Global Features for Entity Linking via Neural Networks</title>
       <author><first>Thien Huu</first><last>Nguyen</last></author>
-      <author><first>Nicolas</first><last>Fauceglia</last></author>
+      <author id="nicolas-r-fauceglia"><first>Nicolas</first><last>Fauceglia</last></author>
       <author><first>Mariano</first><last>Rodriguez Muro</last></author>
       <author><first>Oktie</first><last>Hassanzadeh</last></author>
-      <author><first>Alfio</first><last>Massimiliano Gliozzo</last></author>
+      <author id="alfio-gliozzo"><first>Alfio</first><last>Massimiliano Gliozzo</last></author>
       <author><first>Mohammad</first><last>Sadoghi</last></author>
       <pages>2310–2320</pages>
       <url hash="ab59c7be">C16-1218</url>
@@ -2301,7 +2301,7 @@
     </paper>
     <paper id="219">
       <title>Structured Aspect Extraction</title>
-      <author><first>Omer</first><last>Gunes</last></author>
+      <author id="omer-farukhan-gunes"><first>Omer</first><last>Gunes</last></author>
       <author><first>Tim</first><last>Furche</last></author>
       <author><first>Giorgio</first><last>Orsi</last></author>
       <pages>2321–2332</pages>
@@ -2440,7 +2440,7 @@
     <paper id="233">
       <title>Targeted Sentiment to Understand Student Comments</title>
       <author><first>Charles</first><last>Welch</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>2471–2481</pages>
       <url hash="cb72ea8e">C16-1233</url>
       <abstract>We address the task of targeted sentiment as a means of understanding the sentiment that students hold toward courses and instructors, as expressed by students in their comments. We introduce a new dataset consisting of student comments annotated for targeted sentiment and describe a system that can both identify the courses and instructors mentioned in student comments, as well as label the students’ sentiment toward those entities. Through several comparative evaluations, we show that our system outperforms previous work on a similar task.</abstract>
@@ -2450,7 +2450,7 @@
       <title>Towards Sub-Word Level Compositions for Sentiment Analysis of <fixed-case>H</fixed-case>indi-<fixed-case>E</fixed-case>nglish Code Mixed Text</title>
       <author><first>Aditya</first><last>Joshi</last></author>
       <author><first>Ameya</first><last>Prabhu</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <author><first>Vasudeva</first><last>Varma</last></author>
       <pages>2482–2491</pages>
       <url hash="363492a6">C16-1234</url>
@@ -2461,7 +2461,7 @@
       <title>Distance Metric Learning for Aspect Phrase Grouping</title>
       <author><first>Shufeng</first><last>Xiong</last></author>
       <author><first>Yue</first><last>Zhang</last></author>
-      <author><first>Donghong</first><last>Ji</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
       <author><first>Yinxia</first><last>Lou</last></author>
       <pages>2492–2502</pages>
       <url hash="8caa4c1d">C16-1235</url>
@@ -2474,7 +2474,7 @@
       <author><first>Nan</first><last>Duan</last></author>
       <author><first>Zhao</first><last>Yan</last></author>
       <author><first>Ming</first><last>Zhou</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <pages>2503–2514</pages>
       <url hash="a85442f3">C16-1236</url>
       <abstract>WebQuestions and SimpleQuestions are two benchmark data-sets commonly used in recent knowledge-based question answering (KBQA) work. Most questions in them are ‘simple’ questions which can be answered based on a single relation in the knowledge base. Such data-sets lack the capability of evaluating KBQA systems on complicated questions. Motivated by this issue, we release a new data-set, namely ComplexQuestions, aiming to measure the quality of KBQA systems on ‘multi-constraint’ questions which require multiple knowledge base relations to get the answer. Beside, we propose a novel systematic KBQA approach to solve multi-constraint questions. Compared to state-of-the-art methods, our approach not only obtains comparable results on the two existing benchmark data-sets, but also achieves significant improvements on the ComplexQuestions.</abstract>
@@ -2494,7 +2494,7 @@
     <paper id="238">
       <title>Attention-Based Convolutional Neural Network for Semantic Relation Extraction</title>
       <author><first>Yatian</first><last>Shen</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>2526–2536</pages>
       <url hash="09d05d17">C16-1238</url>
       <abstract>Nowadays, neural networks play an important role in the task of relation classification. In this paper, we propose a novel attention-based convolutional neural network architecture for this task. Our model makes full use of word embedding, part-of-speech tag embedding and position embedding information. Word level attention mechanism is able to better determine which parts of the sentence are most influential with respect to the two entities of interest. This architecture enables learning some important features from task-specific labeled data, forgoing the need for external knowledge such as explicit dependency structures. Experiments on the SemEval-2010 Task 8 benchmark dataset show that our model achieves better performances than several state-of-the-art neural network models and can achieve a competitive performance just with minimal feature engineering.</abstract>
@@ -2503,7 +2503,7 @@
     <paper id="239">
       <title>Table Filling Multi-Task Recurrent Neural Network for Joint Entity and Relation Extraction</title>
       <author><first>Pankaj</first><last>Gupta</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <author><first>Bernt</first><last>Andrassy</last></author>
       <pages>2537–2547</pages>
       <url hash="f194eaba">C16-1239</url>
@@ -2513,7 +2513,7 @@
     <paper id="240">
       <title>Bilingual Autoencoders with Global Descriptors for Modeling Parallel Sentences</title>
       <author><first>Biao</first><last>Zhang</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Jinsong</first><last>Su</last></author>
       <author><first>Hong</first><last>Duan</last></author>
       <author><first>Min</first><last>Zhang</last></author>
@@ -2525,8 +2525,8 @@
     <paper id="241">
       <title>Multi-Engine and Multi-Alignment Based Automatic Post-Editing and its Impact on Translation Productivity</title>
       <author><first>Santanu</first><last>Pal</last></author>
-      <author><first>Sudip Kumar</first><last>Naskar</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>2559–2570</pages>
       <url hash="5fc34b82">C16-1241</url>
       <abstract>In this paper we combine two strands of machine translation (MT) research: automatic post-editing (APE) and multi-engine (system combination) MT. APE systems learn a target-language-side second stage MT system from the data produced by human corrected output of a first stage MT system, to improve the output of the first stage MT in what is essentially a sequential MT system combination architecture. At the same time, there is a rich research literature on parallel MT system combination where the same input is fed to multiple engines and the best output is selected or smaller sections of the outputs are combined to obtain improved translation output. In the paper we show that parallel system combination in the APE stage of a sequential MT-APE combination yields substantial translation improvements both measured in terms of automatic evaluation metrics as well as in terms of productivity improvements measured in a post-editing experiment. We also show that system combination on the level of APE alignments yields further improvements. Overall our APE system yields statistically significant improvement of 5.9% relative BLEU over a strong baseline (English–Italian Google MT) and 21.76% productivity increase in a human post-editing experiment with professional translators.</abstract>
@@ -2568,7 +2568,7 @@
       <title>Training Data Enrichment for Infrequent Discourse Relations</title>
       <author><first>Kailang</first><last>Jiang</last></author>
       <author><first>Giuseppe</first><last>Carenini</last></author>
-      <author><first>Raymond</first><last>Ng</last></author>
+      <author id="raymond-ng"><first>Raymond</first><last>Ng</last></author>
       <pages>2603–2614</pages>
       <url hash="8e9cd4fd">C16-1245</url>
       <abstract>Discourse parsing is a popular technique widely used in text understanding, sentiment analysis and other NLP tasks. However, for most discourse parsers, the performance varies significantly across different discourse relations. In this paper, we first validate the underfitting hypothesis, i.e., the less frequent a relation is in the training data, the poorer the performance on that relation. We then explore how to increase the number of positive training instances, without resorting to manually creating additional labeled data. We propose a training data enrichment framework that relies on co-training of two different discourse parsers on unlabeled documents. Importantly, we show that co-training alone is not sufficient. The framework requires a filtering step to ensure that only “good quality” unlabeled documents can be used for enrichment and re-training. We propose and evaluate two ways to perform the filtering. The first is to use an agreement score between the two parsers. The second is to use only the confidence score of the faster parser. Our empirical results show that agreement score can help to boost the performance on infrequent relations, and that the confidence score is a viable approximation of the agreement score for infrequent relations.</abstract>
@@ -2577,8 +2577,8 @@
     <paper id="246">
       <title>Inferring Discourse Relations from <fixed-case>PDTB</fixed-case>-style Discourse Labels for Argumentative Revision Classification</title>
       <author><first>Fan</first><last>Zhang</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
-      <author><first>Katherine</first><last>Forbes Riley</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
+      <author id="kate-forbes-riley"><first>Katherine</first><last>Forbes Riley</last></author>
       <pages>2615–2624</pages>
       <url hash="73020678">C16-1246</url>
       <abstract>Penn Discourse Treebank (PDTB)-style annotation focuses on labeling local discourse relations between text spans and typically ignores larger discourse contexts. In this paper we propose two approaches to infer discourse relations in a paragraph-level context from annotated PDTB labels. We investigate the utility of inferring such discourse information using the task of revision classification. Experimental results demonstrate that the inferred information can significantly improve classification performance compared to baselines, not only when PDTB annotation comes from humans but also from automatic parsers.</abstract>
@@ -2588,7 +2588,7 @@
       <title>Capturing Pragmatic Knowledge in Article Usage Prediction using <fixed-case>LSTM</fixed-case>s</title>
       <author><first>Jad</first><last>Kabbara</last></author>
       <author><first>Yulan</first><last>Feng</last></author>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <pages>2625–2634</pages>
       <url hash="0538c49e">C16-1247</url>
       <abstract>We examine the potential of recurrent neural networks for handling pragmatic inferences involving complex contextual cues for the task of article usage prediction. We train and compare several variants of Long Short-Term Memory (LSTM) networks with an attention mechanism. Our model outperforms a previous state-of-the-art system, achieving up to 96.63% accuracy on the WSJ/PTB corpus. In addition, we perform a series of analyses to understand the impact of various model choices. We find that the gain in performance can be attributed to the ability of LSTMs to pick up on contextual cues, both local and further away in distance, and that the model is able to solve cases involving reasoning about coreference and synonymy. We also show how the attention mechanism contributes to the interpretability of the model’s effectiveness.</abstract>
@@ -2607,7 +2607,7 @@
       <author><first>Shoushan</first><last>Li</last></author>
       <author><first>Jian</first><last>Xu</last></author>
       <author><first>Dong</first><last>Zhang</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>2647–2655</pages>
       <url hash="54eae163">C16-1249</url>
       <abstract>In the literature, various supervised learning approaches have been adopted to address the task of reader emotion classification. However, the classification performance greatly suffers when the size of the labeled data is limited. In this paper, we propose a two-view label propagation approach to semi-supervised reader emotion classification by exploiting two views, namely source text and response text in a label propagation algorithm. Specifically, our approach depends on two word-document bipartite graphs to model the relationship among the samples in the two views respectively. Besides, the two bipartite graphs are integrated by linking each source text sample with its corresponding response text sample via a length-sensitive transition probability. In this way, our two-view label propagation approach to semi-supervised reader emotion classification largely alleviates the reliance on the strong sufficiency and independence assumptions of the two views, as required in co-training. Empirical evaluation demonstrates the effectiveness of our two-view label propagation approach to semi-supervised reader emotion classification.</abstract>
@@ -2628,7 +2628,7 @@
       <author><first>Erik</first><last>Cambria</last></author>
       <author><first>Soujanya</first><last>Poria</last></author>
       <author><first>Rajiv</first><last>Bajpai</last></author>
-      <author><first>Bjoern</first><last>Schuller</last></author>
+      <author id="bjorn-schuller"><first>Bjoern</first><last>Schuller</last></author>
       <pages>2666–2677</pages>
       <url hash="99d6922e">C16-1251</url>
       <abstract>An important difference between traditional AI systems and human intelligence is the human ability to harness commonsense knowledge gleaned from a lifetime of learning and experience to make informed decisions. This allows humans to adapt easily to novel situations where AI fails catastrophically due to a lack of situation-specific rules and generalization capabilities. Commonsense knowledge also provides background information that enables humans to successfully operate in social situations where such knowledge is typically assumed. Since commonsense consists of information that humans take for granted, gathering it is an extremely difficult task. Previous versions of SenticNet were focused on collecting this kind of knowledge for sentiment analysis but they were heavily limited by their inability to generalize. SenticNet 4 overcomes such limitations by leveraging on conceptual primitives automatically generated by means of hierarchical clustering and dimensionality reduction.</abstract>
@@ -2661,7 +2661,7 @@
     <paper id="254">
       <title>Neural-based Noise Filtering from Word Embeddings</title>
       <author><first>Kim Anh</first><last>Nguyen</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <author><first>Ngoc Thang</first><last>Vu</last></author>
       <pages>2699–2707</pages>
       <url hash="5b8ecf62">C16-1254</url>
@@ -2692,7 +2692,7 @@
       <title><fixed-case>M</fixed-case>onday mornings are my fave :) #not Exploring the Automatic Recognition of Irony in <fixed-case>E</fixed-case>nglish tweets</title>
       <author><first>Cynthia</first><last>Van Hee</last></author>
       <author><first>Els</first><last>Lefever</last></author>
-      <author><first>Véronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Véronique</first><last>Hoste</last></author>
       <pages>2730–2739</pages>
       <url hash="529fb69d">C16-1257</url>
       <abstract>Recognising and understanding irony is crucial for the improvement natural language processing tasks including sentiment analysis. In this study, we describe the construction of an English Twitter corpus and its annotation for irony based on a newly developed fine-grained annotation scheme. We also explore the feasibility of automatic irony recognition by exploiting a varied set of features including lexical, syntactic, sentiment and semantic (Word2Vec) information. Experiments on a held-out test set show that our irony classifier benefits from this combined information, yielding an F1-score of 67.66%. When explicit hashtag information like #irony is included in the data, the system even obtains an F1-score of 92.77%. A qualitative analysis of the output reveals that recognising irony that results from a polarity clash appears to be (much) more feasible than recognising other forms of ironic utterances (e.g., descriptions of situational irony).</abstract>
@@ -2729,7 +2729,7 @@
     <paper id="261">
       <title>Learning Succinct Models: Pipelined Compression with <fixed-case>L</fixed-case>1-Regularization, Hashing, <fixed-case>E</fixed-case>lias-<fixed-case>F</fixed-case>ano Indices, and Quantization</title>
       <author><first>Hajime</first><last>Senuma</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>2774–2784</pages>
       <url hash="fa446965">C16-1261</url>
       <abstract>The recent proliferation of smart devices necessitates methods to learn small-sized models. This paper demonstrates that if there are <tex-math>m</tex-math> features in total but only <tex-math>n = o(\sqrt{m})</tex-math> features are required to distinguish examples, with <tex-math>\Omega(\log m)</tex-math> training examples and reasonable settings, it is possible to obtain a good model in a <i>succinct</i> representation using <tex-math>n \log_2 \frac{m}{n} + o(m)</tex-math> bits, by using a pipeline of existing compression methods: L1-regularized logistic regression, feature hashing, Elias–Fano indices, and randomized quantization. An experiment shows that a noun phrase chunking task for which an existing library requires 27 megabytes can be compressed to less than 13 <i>kilo</i>bytes without notable loss of accuracy.</abstract>
@@ -2756,7 +2756,7 @@
     <paper id="264">
       <title>Is an Image Worth More than a Thousand Words? On the Fine-Grain Semantic Differences between Visual and Linguistic Representations</title>
       <author><first>Guillem</first><last>Collell</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>2807–2817</pages>
       <url hash="9e93a623">C16-1264</url>
       <abstract>Human concept representations are often grounded with visual information, yet some aspects of meaning cannot be visually represented or are better described with language. Thus, vision and language provide complementary information that, properly combined, can potentially yield more complete concept representations. Recently, state-of-the-art distributional semantic models and convolutional neural networks have achieved great success in representing linguistic and visual knowledge respectively. In this paper, we compare both, visual and linguistic representations in their ability to capture different types of fine-grain semantic knowledge—or attributes—of concepts. Humans often describe objects using attributes, that is, properties such as shape, color or functionality, which often transcend the linguistic and visual modalities. In our setting, we evaluate how well attributes can be predicted by using the unimodal representations as inputs. We are interested in first, finding out whether attributes are generally better captured by either the vision or by the language modality; and second, if none of them is clearly superior (as we hypothesize), what type of attributes or semantic knowledge are better encoded from each modality. Ultimately, our study sheds light on the potential of combining visual and textual representations.</abstract>
@@ -2776,7 +2776,7 @@
       <author><first>Naoya</first><last>Inoue</last></author>
       <author><first>Yuichiroh</first><last>Matsubayashi</last></author>
       <author><first>Masayuki</first><last>Ono</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Kentaro</first><last>Inui</last></author>
       <pages>2829–2838</pages>
       <url hash="a016e6a4">C16-1266</url>
@@ -2795,7 +2795,7 @@
     <paper id="268">
       <title>Distributional Inclusion Hypothesis for Tensor-based Composition</title>
       <author><first>Dimitri</first><last>Kartsaklis</last></author>
-      <author><first>Mehrnoosh</first><last>Sadrzadeh</last></author>
+      <author id="mehrnoosh-sadrzadeh"><first>Mehrnoosh</first><last>Sadrzadeh</last></author>
       <pages>2849–2860</pages>
       <url hash="7bece169">C16-1268</url>
       <abstract>According to the distributional inclusion hypothesis, entailment between words can be measured via the feature inclusions of their distributional vectors. In recent work, we showed how this hypothesis can be extended from words to phrases and sentences in the setting of compositional distributional semantics. This paper focuses on inclusion properties of tensors; its main contribution is a theoretical and experimental analysis of how feature inclusion works in different concrete models of verb tensors. We present results for relational, Frobenius, projective, and holistic methods and compare them to the simple vector addition, multiplication, min, and max models. The degrees of entailment thus obtained are evaluated via a variety of existing word-based measures, such as Weed’s and Clarke’s, KL-divergence, APinc, balAPinc, and two of our previously proposed metrics at the phrase/sentence level. We perform experiments on three entailment datasets, investigating which version of tensor-based composition achieves the highest performance when combined with the sentence-level measures.</abstract>
@@ -2814,7 +2814,7 @@
     <paper id="270">
       <title>Reading and Thinking: Re-read <fixed-case>LSTM</fixed-case> Unit for Textual Entailment Recognition</title>
       <author><first>Lei</first><last>Sha</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <author><first>Zhifang</first><last>Sui</last></author>
       <author><first>Sujian</first><last>Li</last></author>
       <pages>2870–2879</pages>
@@ -2866,7 +2866,7 @@
     <paper id="275">
       <title>Neural Paraphrase Generation with Stacked Residual <fixed-case>LSTM</fixed-case> Networks</title>
       <author><first>Aaditya</first><last>Prakash</last></author>
-      <author><first>Sadid A.</first><last>Hasan</last></author>
+      <author id="sadid-a-hasan"><first>Sadid A.</first><last>Hasan</last></author>
       <author><first>Kathy</first><last>Lee</last></author>
       <author><first>Vivek</first><last>Datla</last></author>
       <author><first>Ashequl</first><last>Qadir</last></author>
@@ -2892,7 +2892,7 @@
       <title>Keyphrase Annotation with Graph Co-Ranking</title>
       <author><first>Adrien</first><last>Bougouin</last></author>
       <author><first>Florian</first><last>Boudin</last></author>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <pages>2945–2955</pages>
       <url hash="cabb0209">C16-1277</url>
       <abstract>Keyphrase annotation is the task of identifying textual units that represent the main content of a document. Keyphrase annotation is either carried out by extracting the most important phrases from a document, keyphrase extraction, or by assigning entries from a controlled domain-specific vocabulary, keyphrase assignment. Assignment methods are generally more reliable. They provide better-formed keyphrases, as well as keyphrases that do not occur in the document. But they are often silent on the contrary of extraction methods that do not depend on manually built resources. This paper proposes a new method to perform both keyphrase extraction and keyphrase assignment in an integrated and mutual reinforcing manner. Experiments have been carried out on datasets covering different domains of humanities and social sciences. They show statistically significant improvements compared to both keyphrase extraction and keyphrase assignment state-of-the art methods.</abstract>
@@ -2900,7 +2900,7 @@
     </paper>
     <paper id="278">
       <title>What’s in an Explanation? Characterizing Knowledge and Inference Requirements for Elementary Science Exams</title>
-      <author><first>Peter</first><last>Jansen</last></author>
+      <author id="peter-jansen"><first>Peter</first><last>Jansen</last></author>
       <author><first>Niranjan</first><last>Balasubramanian</last></author>
       <author><first>Mihai</first><last>Surdeanu</last></author>
       <author><first>Peter</first><last>Clark</last></author>
@@ -2911,7 +2911,7 @@
     </paper>
     <paper id="279">
       <title>“All <fixed-case>I</fixed-case> know about politics is what <fixed-case>I</fixed-case> read in <fixed-case>T</fixed-case>witter”: Weakly Supervised Models for Extracting Politicians’ Stances From <fixed-case>T</fixed-case>witter</title>
-      <author><first>Kristen</first><last>Johnson</last></author>
+      <author id="kristen-johnson"><first>Kristen</first><last>Johnson</last></author>
       <author><first>Dan</first><last>Goldwasser</last></author>
       <pages>2966–2977</pages>
       <url hash="80e385b8">C16-1279</url>
@@ -2932,7 +2932,7 @@
       <title>Political News Sentiment Analysis for Under-resourced Languages</title>
       <author><first>Patrik F.</first><last>Bakken</last></author>
       <author><first>Terje A.</first><last>Bratlie</last></author>
-      <author><first>Cristina</first><last>Marco</last></author>
+      <author id="cristina-sanchez-marco"><first>Cristina</first><last>Marco</last></author>
       <author><first>Jon Atle</first><last>Gulla</last></author>
       <pages>2989–2996</pages>
       <url hash="16f70ee5">C16-1281</url>
@@ -2944,7 +2944,7 @@
       <author><first>Jeffrey</first><last>Lund</last></author>
       <author><first>Paul</first><last>Felt</last></author>
       <author><first>Kevin</first><last>Seppi</last></author>
-      <author><first>Eric</first><last>Ringger</last></author>
+      <author id="eric-ringger"><first>Eric</first><last>Ringger</last></author>
       <pages>2997–3006</pages>
       <url hash="17aaaf4c">C16-1282</url>
       <abstract>Probabilistic models are a useful means for analyzing large text corpora. Integrating such models with human interaction enables many new use cases. However, adding human interaction to probabilistic models requires inference algorithms which are both fast and accurate. We explore the use of Iterated Conditional Modes as a fast alternative to Gibbs sampling or variational EM. We demonstrate superior performance both in run time and model quality on three different models of text including a DP Mixture of Multinomials for web search result clustering, the Interactive Topic Model, and M OM R ESP , a multinomial crowdsourcing model.</abstract>
@@ -2991,7 +2991,7 @@
       <title>Crowdsourcing Complex Language Resources: Playing to Annotate Dependency Syntax</title>
       <author><first>Bruno</first><last>Guillaume</last></author>
       <author><first>Karën</first><last>Fort</last></author>
-      <author><first>Nicolas</first><last>Lefebvre</last></author>
+      <author id="nicolas-lefebvre"><first>Nicolas</first><last>Lefebvre</last></author>
       <pages>3041–3052</pages>
       <url hash="c59812b7">C16-1286</url>
       <abstract>This article presents the results we obtained on a complex annotation task (that of dependency syntax) using a specifically designed Game with a Purpose, ZombiLingo. We show that with suitable mechanisms (decomposition of the task, training of the players and regular control of the annotation quality during the game), it is possible to obtain annotations whose quality is significantly higher than that obtainable with a parser, provided that enough players participate. The source code of the game and the resulting annotated corpora (for French) are freely available.</abstract>
@@ -3000,7 +3000,7 @@
     <paper id="287">
       <title>Borrow a Little from your Rich Cousin: Using Embeddings and Polarities of <fixed-case>E</fixed-case>nglish Words for Multilingual Sentiment Classification</title>
       <author><first>Prerana</first><last>Singhal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>3053–3062</pages>
       <url hash="517dc620">C16-1287</url>
       <abstract>In this paper, we provide a solution to multilingual sentiment classification using deep learning. Given input text in a language, we use word translation into English and then the embeddings of these English words to train a classifier. This projection into the English space plus word embeddings gives a simple and uniform framework for multilingual sentiment analysis. A novel idea is augmentation of the training data with polar words, appearing in these sentences, along with their polarities. This approach leads to a performance gain of 7-10% over traditional classifiers on many languages, irrespective of text genre, despite the scarcity of resources in most languages.</abstract>
@@ -3021,7 +3021,7 @@
       <title>Convolution-Enhanced Bilingual Recursive Neural Network for Bilingual Semantic Modeling</title>
       <author><first>Jinsong</first><last>Su</last></author>
       <author><first>Biao</first><last>Zhang</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Ruochen</first><last>Li</last></author>
       <author><first>Jianmin</first><last>Yin</last></author>
       <pages>3071–3081</pages>
@@ -3036,7 +3036,7 @@
       <author><first>Nan</first><last>Yang</last></author>
       <author><first>Mu</first><last>Li</last></author>
       <author><first>Ming</first><last>Zhou</last></author>
-      <author><first>Kenny Q.</first><last>Zhu</last></author>
+      <author id="kenny-zhu"><first>Kenny Q.</first><last>Zhu</last></author>
       <pages>3082–3092</pages>
       <url hash="25ff15fe">C16-1290</url>
       <abstract>In neural machine translation, the attention mechanism facilitates the translation process by producing a soft alignment between the source sentence and the target sentence. However, without dedicated distortion and fertility models seen in traditional SMT systems, the learned alignment may not be accurate, which can lead to low translation quality. In this paper, we propose two novel models to improve attention-based neural machine translation. We propose a recurrent attention mechanism as an implicit distortion model, and a fertility conditioned decoder as an implicit fertility model. We conduct experiments on large-scale Chinese–English translation tasks. The results show that our models significantly improve both the alignment and translation quality compared to the original attention mechanism and several other variations.</abstract>
@@ -3047,7 +3047,7 @@
       <author><first>Lemao</first><last>Liu</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
       <author><first>Andrew</first><last>Finch</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>3093–3102</pages>
       <url hash="323fff7a">C16-1291</url>
       <abstract>The attention mechanism is appealing for neural machine translation, since it is able to dynamically encode a source sentence by generating a alignment between a target word and source words. Unfortunately, it has been proved to be worse than conventional alignment models in alignment accuracy. In this paper, we analyze and explain this issue from the point view of reordering, and propose a supervised attention which is learned with guidance from conventional alignment models. Experiments on two Chinese-to-English translation tasks show that the supervised attention mechanism yields better alignments leading to substantial gains over the standard attention based NMT.</abstract>
@@ -3058,8 +3058,8 @@
       <author><first>Matthias</first><last>Sperber</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
-      <author><first>Sebastian</first><last>Stüker</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="sebastian-stuker"><first>Sebastian</first><last>Stüker</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>3103–3113</pages>
       <url hash="261c3e60">C16-1292</url>
       <abstract>Evaluating the quality of output from language processing systems such as machine translation or speech recognition is an essential step in ensuring that they are sufficient for practical use. However, depending on the practical requirements, evaluation approaches can differ strongly. Often, reference-based evaluation measures (such as BLEU or WER) are appealing because they are cheap and allow rapid quantitative comparison. On the other hand, practitioners often focus on manual evaluation because they must deal with frequently changing domains and quality standards requested by customers, for which reference-based evaluation is insufficient or not possible due to missing in-domain reference data (Harris et al., 2016). In this paper, we attempt to bridge this gap by proposing a framework for lightly supervised quality estimation. We collect manually annotated scores for a small number of segments in a test corpus or document, and combine them with automatically predicted quality scores for the remaining segments to predict an overall quality estimate. An evaluation shows that our framework estimates quality more reliably than using fully automatic quality estimation approaches, while keeping annotation effort low by not requiring full references to be available for the particular domain.</abstract>
@@ -3068,9 +3068,9 @@
     <paper id="293">
       <title>Improving Translation Selection with Supersenses</title>
       <author><first>Haiqing</first><last>Tang</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
-      <author><first>Oier</first><last>Lopez de Lacalle</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
+      <author id="oier-lopez-de-lacalle"><first>Oier</first><last>Lopez de Lacalle</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <pages>3114–3123</pages>
       <url hash="84021b52">C16-1293</url>
       <abstract>Selecting appropriate translations for source words with multiple meanings still remains a challenge for statistical machine translation (SMT). One reason for this is that most SMT systems are not good at detecting the proper sense for a polysemic word when it appears in different contexts. In this paper, we adopt a supersense tagging method to annotate source words with coarse-grained ontological concepts. In order to enable the system to choose an appropriate translation for a word or phrase according to the annotated supersense of the word or phrase, we propose two translation models with supersense knowledge: a maximum entropy based model and a supersense embedding model. The effectiveness of our proposed models is validated on a large-scale English-to-Spanish translation task. Results indicate that our method can significantly improve translation quality via correctly conveying the meaning of the source language to the target language.</abstract>
@@ -3079,7 +3079,7 @@
     <paper id="294">
       <title>Is all that Glitters in Machine Translation Quality Estimation really Gold?</title>
       <author><first>Yvette</first><last>Graham</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Meghan</first><last>Dowling</last></author>
       <author><first>Maria</first><last>Eskevich</last></author>
       <author><first>Teresa</first><last>Lynn</last></author>
@@ -3093,9 +3093,9 @@
       <title>Connecting Phrase based Statistical Machine Translation Adaptation</title>
       <author><first>Rui</first><last>Wang</last></author>
       <author><first>Hai</first><last>Zhao</last></author>
-      <author><first>Bao-Liang</first><last>Lu</last></author>
+      <author id="bao-liang-lu"><first>Bao-Liang</first><last>Lu</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>3135–3145</pages>
       <url hash="80a79dc9">C16-1295</url>
       <abstract>Although more additional corpora are now available for Statistical Machine Translation (SMT), only the ones which belong to the same or similar domains of the original corpus can indeed enhance SMT performance directly. A series of SMT adaptation methods have been proposed to select these similar-domain data, and most of them focus on sentence selection. In comparison, phrase is a smaller and more fine grained unit for data selection, therefore we propose a straightforward and efficient connecting phrase based adaptation method, which is applied to both bilingual phrase pair and monolingual n-gram adaptation. The proposed method is evaluated on IWSLT/NIST data sets, and the results show that phrase based SMT performances are significantly improved (up to +1.6 in comparison with phrase based SMT baseline system and +0.9 in comparison with existing methods).</abstract>
@@ -3123,7 +3123,7 @@
       <title>Universal Reordering via Linguistic Typology</title>
       <author><first>Joachim</first><last>Daiber</last></author>
       <author><first>Miloš</first><last>Stanojević</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <pages>3167–3176</pages>
       <url hash="dfb87538">C16-1298</url>
       <abstract>In this paper we explore the novel idea of building a single universal reordering model from English to a large number of target languages. To build this model we exploit typological features of word order for a large number of target languages together with source (English) syntactic features and we train this model on a single combined parallel corpus representing all (22) involved language pairs. We contribute experimental evidence for the usefulness of linguistically defined typological features for building such a model. When the universal reordering model is used for preordering followed by monotone translation (no reordering inside the decoder), our experiments show that this pipeline gives comparable or improved translation performance with a phrase-based baseline for a large number of language pairs (12 out of 22) from diverse language families.</abstract>
@@ -3133,7 +3133,7 @@
       <title>A Deep Fusion Model for Domain Adaptation in Phrase-based <fixed-case>MT</fixed-case></title>
       <author><first>Nadir</first><last>Durrani</last></author>
       <author><first>Hassan</first><last>Sajjad</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <author><first>Ahmed</first><last>Abdelali</last></author>
       <pages>3177–3187</pages>
       <url hash="cb6b9b5a">C16-1299</url>
@@ -3166,7 +3166,7 @@
       <title>Improving Word Alignment of Rare Words with Word Embeddings</title>
       <author><first>Masoud</first><last>Jalili Sabet</last></author>
       <author><first>Heshaam</first><last>Faili</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>3209–3215</pages>
       <url hash="06ff945a">C16-1302</url>
       <abstract>We address the problem of inducing word alignment for language pairs by developing an unsupervised model with the capability of getting applied to other generative alignment models. We approach the task by: i)proposing a new alignment model based on the IBM alignment model 1 that uses vector representation of words, and ii)examining the use of similar source words to overcome the problem of rare source words and improving the alignments. We apply our method to English-French corpora and run the experiments with different sizes of sentence pairs. Our results show competitive performance against the baseline and in some cases improve the results up to 6.9% in terms of precision.</abstract>
@@ -3174,7 +3174,7 @@
     </paper>
     <paper id="303">
       <title>Measuring the Information Content of Financial News</title>
-      <author><first>Ching-Yun</first><last>Chang</last></author>
+      <author id="ching-yun-chang"><first>Ching-Yun</first><last>Chang</last></author>
       <author><first>Yue</first><last>Zhang</last></author>
       <author><first>Zhiyang</first><last>Teng</last></author>
       <author><first>Zahn</first><last>Bozanic</last></author>
@@ -3187,8 +3187,8 @@
     <paper id="304">
       <title>Automatic Generation and Classification of Minimal Meaningful Propositions in Educational Systems</title>
       <author><first>Andreea</first><last>Godea</last></author>
-      <author><first>Florin</first><last>Bulgarov</last></author>
-      <author><first>Rodney</first><last>Nielsen</last></author>
+      <author id="florin-bulgarov"><first>Florin</first><last>Bulgarov</last></author>
+      <author id="rodney-nielsen"><first>Rodney</first><last>Nielsen</last></author>
       <pages>3226–3236</pages>
       <url hash="05e8b114">C16-1304</url>
       <abstract>Truly effective and practical educational systems will only be achievable when they have the ability to fully recognize deep relationships between a learner’s interpretation of a subject and the desired conceptual understanding. In this paper, we take important steps in this direction by introducing a new representation of sentences – Minimal Meaningful Propositions (MMPs), which will allow us to significantly improve the mapping between a learner’s answer and the ideal response. Using this technique, we make significant progress towards highly scalable and domain independent educational systems, that will be able to operate without human intervention. Even though this is a new task, we show very good results both for the extraction of MMPs and for classification with respect to their importance.</abstract>
@@ -3209,7 +3209,7 @@
     <paper id="306">
       <title>Textual complexity as a predictor of difficulty of listening items in language proficiency tests</title>
       <author><first>Anastassia</first><last>Loukina</last></author>
-      <author><first>Su-Youn</first><last>Yoon</last></author>
+      <author id="su-youn-yoon"><first>Su-Youn</first><last>Yoon</last></author>
       <author><first>Jennifer</first><last>Sakano</last></author>
       <author><first>Youhua</first><last>Wei</last></author>
       <author><first>Kathy</first><last>Sheehan</last></author>
@@ -3222,7 +3222,7 @@
       <title>The Construction of a <fixed-case>C</fixed-case>hinese Collocational Knowledge Resource and Its Application for Second Language Acquisition</title>
       <author><first>Renfen</first><last>Hu</last></author>
       <author><first>Jiayong</first><last>Chen</last></author>
-      <author><first>Kuang-hua</first><last>Chen</last></author>
+      <author id="kuang-hua-chen"><first>Kuang-hua</first><last>Chen</last></author>
       <pages>3254–3263</pages>
       <url hash="aa7ae39a">C16-1307</url>
       <abstract>The appropriate use of collocations is a challenge for second language acquisition. However, high quality and easily accessible Chinese collocation resources are not available for both teachers and students. This paper presents the design and construction of a large scale resource of Chinese collocational knowledge, and a web-based application (OCCA, Online Chinese Collocation Assistant) which offers free and convenient collocation search service to end users. We define and classify collocations based on practical language acquisition needs and utilize a syntax based method to extract nine types of collocations. Totally 37 extraction rules are compiled with word, POS and dependency relation features, 1,750,000 collocations are extracted from a corpus for L2 learning and complementary Wikipedia data, and OCCA is implemented based on these extracted collocations. By comparing OCCA with two traditional collocation dictionaries, we find OCCA has higher entry coverage and collocation quantity, and our method achieves quite low error rate at less than 5%. We also discuss how to apply collocational knowledge to grammatical error detection and demonstrate comparable performance to the best results in 2015 NLP-TEA CGED shared task. The preliminary experiment shows that the collocation knowledge is helpful in detecting all the four types of grammatical errors.</abstract>
@@ -3243,7 +3243,7 @@
       <title>Event Detection with Burst Information Networks</title>
       <author><first>Tao</first><last>Ge</last></author>
       <author><first>Lei</first><last>Cui</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <author><first>Zhifang</first><last>Sui</last></author>
       <author><first>Ming</first><last>Zhou</last></author>
       <pages>3276–3286</pages>
@@ -3256,7 +3256,7 @@
       <author><first>Suyang</first><last>Zhu</last></author>
       <author><first>Shoushan</first><last>Li</last></author>
       <author><first>Ying</first><last>Chen</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>3287–3297</pages>
       <url hash="31e3f3d9">C16-1310</url>
       <abstract>Machine learning-based methods have obtained great progress on emotion classification. However, in most previous studies, the models are learned based on a single corpus which often suffers from insufficient labeled data. In this paper, we propose a corpus fusion approach to address emotion classification across two corpora which use different emotion taxonomies. The objective of this approach is to utilize the annotated data from one corpus to help the emotion classification on another corpus. An Integer Linear Programming (ILP) optimization is proposed to refine the classification results. Empirical studies show the effectiveness of the proposed approach to corpus fusion for emotion classification.</abstract>
@@ -3306,11 +3306,11 @@
     <paper id="315">
       <title>A Novel Fast Framework for Topic Labeling Based on Similarity-preserved Hashing</title>
       <author><first>Xian-Ling</first><last>Mao</last></author>
-      <author><first>Yi-Jing</first><last>Hao</last></author>
+      <author id="yi-jing-zhao"><first>Yi-Jing</first><last>Hao</last></author>
       <author><first>Qiang</first><last>Zhou</last></author>
       <author><first>Wen-Qing</first><last>Yuan</last></author>
       <author><first>Liner</first><last>Yang</last></author>
-      <author><first>Heyan</first><last>Huang</last></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last></author>
       <pages>3339–3348</pages>
       <url hash="0688b66c">C16-1315</url>
       <abstract>Recently, topic modeling has been widely applied in data mining due to its powerful ability. A common, major challenge in applying such topic models to other tasks is to accurately interpret the meaning of each topic. Topic labeling, as a major interpreting method, has attracted significant attention recently. However, most of previous works only focus on the effectiveness of topic labeling, and less attention has been paid to quickly creating good topic descriptors; meanwhile, it’s hard to assign labels for new emerging topics by using most of existing methods. To solve the problems above, in this paper, we propose a novel fast topic labeling framework that casts the labeling problem as a k-nearest neighbor (KNN) search problem in a probability vector set. Our experimental results show that the proposed sequential interleaving method based on locality sensitive hashing (LSH) technology is efficient in boosting the comparison speed among probability distributions, and the proposed framework can generate meaningful labels to interpret topics, including new emerging topics.</abstract>
@@ -3362,13 +3362,13 @@
     </paper>
     <paper id="320">
       <title><fixed-case>OCR</fixed-case>++: A Robust Framework For Information Extraction from Scholarly Articles</title>
-      <author id="mayank-singh"><first>Mayank</first><last>Singh</last></author>
+      <author><first>Mayank</first><last>Singh</last></author>
       <author><first>Barnopriyo</first><last>Barua</last></author>
       <author><first>Priyank</first><last>Palod</last></author>
       <author><first>Manvi</first><last>Garg</last></author>
       <author><first>Sidhartha</first><last>Satapathy</last></author>
       <author><first>Samuel</first><last>Bushi</last></author>
-      <author><first>Kumar</first><last>Ayush</last></author>
+      <author id="ayush-kumar"><first>Kumar</first><last>Ayush</last></author>
       <author><first>Krishna</first><last>Sai Rohith</last></author>
       <author><first>Tulasi</first><last>Gamidi</last></author>
       <author><first>Pawan</first><last>Goyal</last></author>
@@ -3390,7 +3390,7 @@
     <paper id="322">
       <title><fixed-case>T</fixed-case>weet<fixed-case>G</fixed-case>eo - A Tool for Collecting, Processing and Analysing Geo-encoded Linguistic Data</title>
       <author><first>Nikola</first><last>Ljubešić</last></author>
-      <author><first>Tanja</first><last>Samardžić</last></author>
+      <author id="tanja-samardzic"><first>Tanja</first><last>Samardžić</last></author>
       <author><first>Curdin</first><last>Derungs</last></author>
       <pages>3412–3421</pages>
       <url hash="04250121">C16-1322</url>
@@ -3399,8 +3399,8 @@
     </paper>
     <paper id="323">
       <title>Extending <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et with Fine-Grained Collocational Information via Supervised Distributional Learning</title>
-      <author><first>Luis</first><last>Espinosa-Anke</last></author>
-      <author><first>Jose</first><last>Camacho-Collados</last></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa-Anke</last></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></author>
       <author><first>Sara</first><last>Rodríguez-Fernández</last></author>
       <author><first>Horacio</first><last>Saggion</last></author>
       <author><first>Leo</first><last>Wanner</last></author>
@@ -3411,7 +3411,7 @@
     </paper>
     <paper id="324">
       <title>A News Editorial Corpus for Mining Argumentation Strategies</title>
-      <author><first>Khalid</first><last>Al-Khatib</last></author>
+      <author id="khalid-al-khatib"><first>Khalid</first><last>Al-Khatib</last></author>
       <author><first>Henning</first><last>Wachsmuth</last></author>
       <author><first>Johannes</first><last>Kiesel</last></author>
       <author><first>Matthias</first><last>Hagen</last></author>
@@ -3424,11 +3424,11 @@
     <paper id="325">
       <title><fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies for <fixed-case>T</fixed-case>urkish</title>
       <author><first>Umut</first><last>Sulubacak</last></author>
-      <author><first>Memduh</first><last>Gokirmak</last></author>
-      <author><first>Francis</first><last>Tyers</last></author>
+      <author id="memduh-gokirmak"><first>Memduh</first><last>Gokirmak</last></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last></author>
       <author><first>Çağrı</first><last>Çöltekin</last></author>
       <author><first>Joakim</first><last>Nivre</last></author>
-      <author><first>Gülşen</first><last>Eryiğit</last></author>
+      <author id="gulsen-eryigit"><first>Gülşen</first><last>Eryiğit</last></author>
       <pages>3444–3454</pages>
       <url hash="8bc5997c">C16-1325</url>
       <abstract>The Universal Dependencies (UD) project was conceived after the substantial recent interest in unifying annotation schemes across languages. With its own annotation principles and abstract inventory for parts of speech, morphosyntactic features and dependency relations, UD aims to facilitate multilingual parser development, cross-lingual learning, and parsing research from a language typology perspective. This paper presents the Turkish IMST-UD Treebank, the first Turkish treebank to be in a UD release. The IMST-UD Treebank was automatically converted from the IMST Treebank, which was also recently released. We describe this conversion procedure in detail, complete with mapping tables. We also present our evaluation of the parsing performances of both versions of the IMST Treebank. Our findings suggest that the UD framework is at least as viable for Turkish as the original annotation framework of the IMST Treebank.</abstract>
@@ -3438,7 +3438,7 @@
       <title>Creating Resources for Dialectal <fixed-case>A</fixed-case>rabic from a Single Annotation: A Case Study on <fixed-case>E</fixed-case>gyptian and <fixed-case>L</fixed-case>evantine</title>
       <author><first>Ramy</first><last>Eskander</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <author><first>Arfath</first><last>Pasha</last></author>
       <pages>3455–3465</pages>
       <url hash="71df5f6c">C16-1326</url>
@@ -3457,12 +3457,12 @@
     </paper>
     <paper id="328">
       <title><fixed-case>P</fixed-case>an<fixed-case>P</fixed-case>hon: A Resource for Mapping <fixed-case>IPA</fixed-case> Segments to Articulatory Feature Vectors</title>
-      <author><first>David R.</first><last>Mortensen</last></author>
+      <author id="david-r-mortensen"><first>David R.</first><last>Mortensen</last></author>
       <author><first>Patrick</first><last>Littell</last></author>
       <author><first>Akash</first><last>Bharadwaj</last></author>
       <author><first>Kartik</first><last>Goyal</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
       <pages>3475–3484</pages>
       <url hash="26aa7074">C16-1328</url>
       <abstract>This paper contributes to a growing body of evidence that—when coupled with appropriate machine-learning techniques–linguistically motivated, information-rich representations can outperform one-hot encodings of linguistic data. In particular, we show that phonological features outperform character-based models. PanPhon is a database relating over 5,000 IPA segments to 21 subsegmental articulatory features. We show that this database boosts performance in various NER-related tasks. Phonologically aware, neural CRF models built on PanPhon features are able to perform better on monolingual Spanish and Turkish NER tasks that character-based models. They have also been shown to work well in transfer models (as between Uzbek and Turkish). PanPhon features also contribute measurably to Orthography-to-IPA conversion tasks.</abstract>
@@ -3484,7 +3484,7 @@
     <paper id="330">
       <title>More is not always better: balancing sense distributions for all-words Word Sense Disambiguation</title>
       <author><first>Marten</first><last>Postma</last></author>
-      <author><first>Ruben</first><last>Izquierdo Bevia</last></author>
+      <author id="ruben-izquierdo"><first>Ruben</first><last>Izquierdo Bevia</last></author>
       <author><first>Piek</first><last>Vossen</last></author>
       <pages>3496–3506</pages>
       <url hash="2722d59b">C16-1330</url>
@@ -3514,7 +3514,7 @@
     <paper id="333">
       <title>Semantic Tagging with Deep Residual Networks</title>
       <author><first>Johannes</first><last>Bjerva</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <author><first>Johan</first><last>Bos</last></author>
       <pages>3531–3541</pages>
       <url hash="73ee1da8">C16-1333</url>
@@ -3535,7 +3535,7 @@
       <title><fixed-case>R</fixed-case>eddit Temporal N-gram Corpus and its Applications on Paraphrase and Semantic Similarity in Social Media using a Topic-based Latent Semantic Analysis</title>
       <author><first>Anh</first><last>Dang</last></author>
       <author><first>Abidalrahman</first><last>Moh’d</last></author>
-      <author><first>Aminul</first><last>Islam</last></author>
+      <author id="aminul-islam"><first>Aminul</first><last>Islam</last></author>
       <author><first>Rosane</first><last>Minghim</last></author>
       <author><first>Michael</first><last>Smit</last></author>
       <author><first>Evangelos</first><last>Milios</last></author>
@@ -3581,12 +3581,12 @@
     <paper id="1">
       <title>An Interactive System for Exploring Community Question Answering Forums</title>
       <author><first>Enamul</first><last>Hoque</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <author><first>Alberto</first><last>Barrón-Cedeño</last></author>
       <author><first>Giovanni</first><last>Da San Martino</last></author>
       <author><first>Alessandro</first><last>Moschitti</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Salvatore</first><last>Romeo</last></author>
       <author><first>Giuseppe</first><last>Carenini</last></author>
       <pages>1–5</pages>
@@ -3605,7 +3605,7 @@
     </paper>
     <paper id="3">
       <title>A Reading Environment for Learners of <fixed-case>C</fixed-case>hinese as a Foreign Language</title>
-      <author><first>John</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
       <author><first>Chun Yin</first><last>Lam</last></author>
       <author><first>Shu</first><last>Jiang</last></author>
       <pages>11–15</pages>
@@ -3636,13 +3636,13 @@
       <title>‘<fixed-case>B</fixed-case>on<fixed-case>T</fixed-case>en’ – Corpus Concordance System for ‘<fixed-case>NINJAL</fixed-case> Web <fixed-case>J</fixed-case>apanese Corpus’</title>
       <author><first>Masayuki</first><last>Asahara</last></author>
       <author><first>Kazuya</first><last>Kawahara</last></author>
-      <author><first>Yuya</first><last>Takei</last></author>
+      <author id="yuka-takei"><first>Yuya</first><last>Takei</last></author>
       <author><first>Hideto</first><last>Masuoka</last></author>
       <author><first>Yasuko</first><last>Ohba</last></author>
       <author><first>Yuki</first><last>Torii</last></author>
       <author><first>Toru</first><last>Morii</last></author>
       <author><first>Yuki</first><last>Tanaka</last></author>
-      <author><first>Kikuo</first><last>Maekawa</last></author>
+      <author id="kikuo-maekawa"><first>Kikuo</first><last>Maekawa</last></author>
       <author><first>Sachi</first><last>Kato</last></author>
       <author><first>Hikari</first><last>Konishi</last></author>
       <pages>25–29</pages>
@@ -3655,7 +3655,7 @@
       <author><first>Xiaolin</first><last>Wang</last></author>
       <author><first>Andrew</first><last>Finch</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>30–34</pages>
       <url hash="0a158b09">C16-2007</url>
       <abstract>Simultaneous interpretation allows people to communicate spontaneously across language boundaries, but such services are prohibitively expensive for the general public. This paper presents a fully automatic simultaneous interpretation system to address this problem. Though the development is still at an early stage, the system is capable of keeping up with the fastest of the TED speakers while at the same time delivering high-quality translations. We believe that the system will become an effective tool for facilitating cross-lingual communication in the future.</abstract>
@@ -3664,11 +3664,11 @@
     <paper id="8">
       <title><fixed-case>M</fixed-case>u<fixed-case>TUAL</fixed-case>: A Controlled Authoring Support System Enabling Contextual Machine Translation</title>
       <author><first>Rei</first><last>Miyata</last></author>
-      <author><first>Anthony</first><last>Hartley</last></author>
+      <author id="anthony-hartley"><first>Anthony</first><last>Hartley</last></author>
       <author><first>Kyo</first><last>Kageura</last></author>
-      <author><first>Cécile</first><last>Paris</last></author>
+      <author id="cecile-paris"><first>Cécile</first><last>Paris</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>35–39</pages>
       <url hash="fd995628">C16-2008</url>
       <abstract>The paper introduces a web-based authoring support system, MuTUAL, which aims to help writers create multilingual texts. The highlighted feature of the system is that it enables machine translation (MT) to generate outputs appropriate to their functional context within the target document. Our system is operational online, implementing core mechanisms for document structuring and controlled writing. These include a topic template and a controlled language authoring assistant, linked to our statistical MT system.</abstract>
@@ -3676,9 +3676,9 @@
     </paper>
     <paper id="9">
       <title>Joint search in a bilingual valency lexicon and an annotated corpus</title>
-      <author><first>Eva</first><last>Fučíková</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
-      <author><first>Zdeňka</first><last>Urešová</last></author>
+      <author id="eva-fucikova"><first>Eva</first><last>Fučíková</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
+      <author id="zdenka-uresova"><first>Zdeňka</first><last>Urešová</last></author>
       <pages>40–44</pages>
       <url hash="8a506104">C16-2009</url>
       <abstract>In this paper and the associated system demo, we present an advanced search system that allows to perform a joint search over a (bilingual) valency lexicon and a correspondingly annotated linked parallel corpus. This search tool has been developed on the basis of the Prague Czech-English Dependency Treebank, but its ideas are applicable in principle to any bilingual parallel corpus that is annotated for dependencies and valency (i.e., predicate-argument structure), and where verbs are linked to appropriate entries in an associated valency lexicon. Our online search tool consolidates more search interfaces into one, providing expanded structured search capability and a more efficient advanced way to search, allowing users to search for verb pairs, verbal argument pairs, their surface realization as recorded in the lexicon, or for their surface form actually appearing in the linked parallel corpus. The search system is currently under development, and is replacing our current search tool available at <url>http://lindat.mff.cuni.cz/services/CzEngVallex</url>, which could search the lexicon but the queries cannot take advantage of the underlying corpus nor use the additional surface form information from the lexicon(s). The system is available as open source.</abstract>
@@ -3698,7 +3698,7 @@
     <paper id="11">
       <title>Demonstration of <fixed-case>C</fixed-case>ha<fixed-case>K</fixed-case>i.<fixed-case>NET</fixed-case> – beyond the corpus search system</title>
       <author><first>Masayuki</first><last>Asahara</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <author><first>Toshio</first><last>Morita</last></author>
       <pages>49–53</pages>
       <url hash="3f90c4ec">C16-2011</url>
@@ -3708,7 +3708,7 @@
     <paper id="12">
       <title><fixed-case>V</fixed-case>ox<fixed-case>S</fixed-case>im: A Visual Platform for Modeling Motion Language</title>
       <author><first>Nikhil</first><last>Krishnaswamy</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <pages>54–58</pages>
       <url hash="27205d45">C16-2012</url>
       <abstract>Much existing work in text-to-scene generation focuses on generating static scenes. By introducing a focus on motion verbs, we integrate dynamic semantics into a rich formal model of events to generate animations in real time that correlate with human conceptions of the event described. This paper presents a working system that generates these animated scenes over a test set, discussing challenges encountered and describing the solutions implemented.</abstract>
@@ -3753,7 +3753,7 @@
     </paper>
     <paper id="17">
       <title><fixed-case>A</fixed-case>nita: An Intelligent Text Adaptation Tool</title>
-      <author><first>Gustavo</first><last>Paetzold</last></author>
+      <author id="gustavo-paetzold"><first>Gustavo</first><last>Paetzold</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>79–83</pages>
       <url hash="5afc3230">C16-2017</url>
@@ -3771,7 +3771,7 @@
     </paper>
     <paper id="19">
       <title>On-line Multilingual Linguistic Services</title>
-      <author><first>Eric</first><last>Wehrli</last></author>
+      <author id="eric-wehrli"><first>Eric</first><last>Wehrli</last></author>
       <author><first>Yves</first><last>Scherrer</last></author>
       <author><first>Luka</first><last>Nerima</last></author>
       <pages>89–92</pages>
@@ -3781,7 +3781,7 @@
     </paper>
     <paper id="20">
       <title>A Customizable Editor for Text Simplification</title>
-      <author><first>John</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
       <author><first>Wenlong</first><last>Zhao</last></author>
       <author><first>Wenxiu</first><last>Xie</last></author>
       <pages>93–97</pages>
@@ -3792,10 +3792,10 @@
     <paper id="21">
       <title><fixed-case>CAT</fixed-case>a<fixed-case>L</fixed-case>og Online: A Web-based <fixed-case>CAT</fixed-case> Tool for Distributed Translation with Data Capture for <fixed-case>APE</fixed-case> and Translation Process Research</title>
       <author><first>Santanu</first><last>Pal</last></author>
-      <author><first>Sudip Kumar</first><last>Naskar</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last></author>
       <author><first>Marcos</first><last>Zampieri</last></author>
-      <author><first>Tapas</first><last>Nayak</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="tapas-nayak"><first>Tapas</first><last>Nayak</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>98–102</pages>
       <url hash="eb2a5b25">C16-2021</url>
       <abstract>We present a free web-based CAT tool called CATaLog Online which provides a novel and user-friendly online CAT environment for post-editors/translators. The goal is to support distributed translation, reduce post-editing time and effort, improve the post-editing experience and capture data for incremental MT/APE (automatic post-editing) and translation process research. The tool supports individual as well as batch mode file translation and provides translations from three engines – translation memory (TM), MT and APE. TM suggestions are color coded to accelerate the post-editing task. The users can integrate their personal TM/MT outputs. The tool remotely monitors and records post-editing activities generating an extensive range of post-editing logs.</abstract>
@@ -3835,8 +3835,8 @@
     </paper>
     <paper id="25">
       <title>What topic do you want to hear about? A bilingual talking robot using <fixed-case>E</fixed-case>nglish and <fixed-case>J</fixed-case>apanese <fixed-case>W</fixed-case>ikipedias</title>
-      <author><first>Graham</first><last>Wilcock</last></author>
-      <author><first>Kristiina</first><last>Jokinen</last></author>
+      <author id="graham-wilcock"><first>Graham</first><last>Wilcock</last></author>
+      <author id="kristiina-jokinen"><first>Kristiina</first><last>Jokinen</last></author>
       <author><first>Seiichi</first><last>Yamamoto</last></author>
       <pages>116–120</pages>
       <url hash="ae735c43">C16-2025</url>
@@ -3847,8 +3847,8 @@
       <title>Annotating Discourse Relations with the <fixed-case>PDTB</fixed-case> Annotator</title>
       <author><first>Alan</first><last>Lee</last></author>
       <author><first>Rashmi</first><last>Prasad</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
       <pages>121–125</pages>
       <url hash="09f15c9a">C16-2026</url>
       <abstract>The PDTB Annotator is a tool for annotating and adjudicating discourse relations based on the annotation framework of the Penn Discourse TreeBank (PDTB). This demo describes the benefits of using the PDTB Annotator, gives an overview of the PDTB Framework and discusses the tool’s features, setup requirements and how it can also be used for adjudication.</abstract>
@@ -3858,7 +3858,7 @@
       <title>Opinion Retrieval Systems using Tweet-external Factors</title>
       <author><first>Yoon-Sung</first><last>Kim</last></author>
       <author><first>Young-In</first><last>Song</last></author>
-      <author><first>Hae-Chang</first><last>Rim</last></author>
+      <author id="hae-chang-rim"><first>Hae-Chang</first><last>Rim</last></author>
       <pages>126–130</pages>
       <url hash="1d9ba93c">C16-2027</url>
       <abstract>Opinion mining is a natural language processing technique which extracts subjective information from natural language text. To estimate an opinion about a query in large data collection, an opinion retrieval system that retrieves subjective and relevant information about the query can be useful. We present an opinion retrieval system that retrieves subjective and query-relevant tweets from Twitter, which is a useful source of obtaining real-time opinions. Our system outperforms previous opinion retrieval systems, and it further provides subjective information about Twitter authors and hashtags to describe their subjective tendencies.</abstract>
@@ -3866,10 +3866,10 @@
     </paper>
     <paper id="28">
       <title><fixed-case>T</fixed-case>ext<fixed-case>P</fixed-case>ro-<fixed-case>AL</fixed-case>: An Active Learning Platform for Flexible and Efficient Production of Training Data for <fixed-case>NLP</fixed-case> Tasks</title>
-      <author><first>Bernardo</first><last>Magnini</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
       <author><first>Anne-Lyse</first><last>Minard</last></author>
       <author><first>Mohammed R. H.</first><last>Qwaider</last></author>
-      <author><first>Manuela</first><last>Speranza</last></author>
+      <author id="manuela-speranza"><first>Manuela</first><last>Speranza</last></author>
       <pages>131–135</pages>
       <url hash="164e1dc0">C16-2028</url>
       <abstract>This paper presents TextPro-AL (Active Learning for Text Processing), a platform where human annotators can efficiently work to produce high quality training data for new domains and new languages exploiting Active Learning methodologies. TextPro-AL is a web-based application integrating four components: a machine learning based NLP pipeline, an annotation editor for task definition and text annotations, an incremental re-training procedure based on active learning selection from a large pool of unannotated data, and a graphical visualization of the learning status of the system.</abstract>
@@ -3878,7 +3878,7 @@
     <paper id="29">
       <title><fixed-case>S</fixed-case>ide<fixed-case>N</fixed-case>oter: Scholarly Paper Browsing System based on <fixed-case>PDF</fixed-case> Restructuring and Text Annotation</title>
       <author><first>Takeshi</first><last>Abekawa</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>136–140</pages>
       <url hash="dafff903">C16-2029</url>
       <abstract>In this paper, we discuss our ongoing efforts to construct a scientific paper browsing system that helps users to read and understand advanced technical content distributed in PDF. Since PDF is a format specifically designed for printing, layout and logical structures of documents are indistinguishably embedded in the file. It requires much effort to extract natural language text from PDF files, and reversely, display semantic annotations produced by NLP tools on the original page layout. In our browsing system, we tackle these issues caused by the gap between printable document and plain text. Our system provides ways to extract natural language sentences from PDF files together with their logical structures, and also to map arbitrary textual spans to their corresponding regions on page images. We setup a demonstration system using papers published in ACL anthology and demonstrate the enhanced search and refined recommendation functions which we plan to make widely available to NLP researchers.</abstract>
@@ -3889,7 +3889,7 @@
       <author><first>Shih-Ming</first><last>Wang</last></author>
       <author><first>Chun-Hui Scott</first><last>Lee</last></author>
       <author><first>Yu-Chun</first><last>Lo</last></author>
-      <author><first>Ting-Hao</first><last>Huang</last></author>
+      <author id="ting-hao-huang"><first>Ting-Hao</first><last>Huang</last></author>
       <author><first>Lun-Wei</first><last>Ku</last></author>
       <pages>141–145</pages>
       <url hash="f07a09b6">C16-2030</url>
@@ -3929,10 +3929,10 @@
     <paper id="34">
       <title>The Open Framework for Developing Knowledge Base And Question Answering System</title>
       <author><first>Jiseong</first><last>Kim</last></author>
-      <author><first>GyuHyeon</first><last>Choi</last></author>
+      <author id="gyuhyeon-choi"><first>GyuHyeon</first><last>Choi</last></author>
       <author><first>Jung-Uk</first><last>Kim</last></author>
-      <author><first>Eun-Kyung</first><last>Kim</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="eun-kyung-kim"><first>Eun-Kyung</first><last>Kim</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <pages>161–165</pages>
       <url hash="1d64557c">C16-2034</url>
       <abstract>Developing a question answering (QA) system is a task of implementing and integrating modules of different technologies and evaluating an integrated whole system, which inevitably goes with a collaboration among experts of different domains. For supporting a easy collaboration, this demonstration presents the open framework that aims to support developing a QA system in collaborative and intuitive ways. The demonstration also shows the QA system developed by our novel framework.</abstract>
@@ -3944,7 +3944,7 @@
       <author><first>Hao-Chun</first><last>Peng</last></author>
       <author><first>Mei-Cih</first><last>Yeh</last></author>
       <author><first>Peng-Yu</first><last>Chen</last></author>
-      <author><first>Jason</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason</first><last>Chang</last></author>
       <pages>166–169</pages>
       <url hash="f6907cd0">C16-2035</url>
       <abstract>This paper shows the great potential of incorporating different approaches to help writing. Not only did they solve different kinds of writing problems, but also they complement and reinforce each other to be a complete and effective solution. Despite the extensive and multifaceted feedback and suggestion, writing is not all about syntactically or lexically well-written. It involves contents, structure, the certain understanding of the background, and many other factors to compose a rich, organized and sophisticated text. (e.g., conventional structure and idioms in academic writing). There is still a long way to go to accomplish the ultimate goal. We envision the future of writing to be a joyful experience with the help of instantaneous suggestion and constructive feedback.</abstract>
@@ -3955,7 +3955,7 @@
       <author><first>Christina</first><last>Niklaus</last></author>
       <author><first>Bernhard</first><last>Bermeitinger</last></author>
       <author><first>Siegfried</first><last>Handschuh</last></author>
-      <author><first>André</first><last>Freitas</last></author>
+      <author id="andre-freitas"><first>André</first><last>Freitas</last></author>
       <pages>170–174</pages>
       <url hash="cf3b69ed">C16-2036</url>
       <abstract>We present a text simplification approach that is directed at improving the performance of state-of-the-art Open Relation Extraction (RE) systems. As syntactically complex sentences often pose a challenge for current Open RE approaches, we have developed a simplification framework that performs a pre-processing step by taking a single sentence as input and using a set of syntactic-based transformation rules to create a textual input that is easier to process for subsequently applied Open RE systems.</abstract>
@@ -3964,8 +3964,8 @@
     <paper id="37">
       <title><fixed-case>K</fixed-case>orean <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et Expansion Based on Projection of <fixed-case>J</fixed-case>apanese <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et</title>
       <author><first>Jeong-uk</first><last>Kim</last></author>
-      <author><first>Younggyun</first><last>Hahm</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="younggyun-hahm"><first>Younggyun</first><last>Hahm</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <pages>175–179</pages>
       <url hash="72a86649">C16-2037</url>
       <abstract>FrameNet project has begun from Berkeley in 1997, and is now supported in several countries reflecting characteristics of each language. The work for generating Korean FrameNet was already done by converting annotated English sentences into Korean with trained translators. However, high cost of frame-preservation and error revision was a huge burden on further expansion of FrameNet. This study makes use of linguistic similarity between Japanese and Korean to increase Korean FrameNet corpus with low cost. We also suggest adapting PubAnnotation and Korean-friendly valence patterns to FrameNet for increased accessibility.</abstract>
@@ -3987,7 +3987,7 @@
       <author><first>Hyoung-Gyu</first><last>Lee</last></author>
       <author><first>Jun-Seok</first><last>Kim</last></author>
       <author><first>Joong-Hwi</first><last>Shin</last></author>
-      <author><first>Jaesong</first><last>Lee</last></author>
+      <author id="jaesong-lee"><first>Jaesong</first><last>Lee</last></author>
       <author><first>Ying-Xiu</first><last>Quan</last></author>
       <author><first>Young-Seob</first><last>Jeong</last></author>
       <pages>185–188</pages>
@@ -4015,7 +4015,7 @@
       <author><first>Baolin</first><last>Peng</last></author>
       <author><first>Ming</first><last>Liao</last></author>
       <author><first>Jia</first><last>Zhu</last></author>
-      <author><first>Kam-fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-fai</first><last>Wong</last></author>
       <pages>194–197</pages>
       <url hash="e90a35ab">C16-2041</url>
       <abstract>We present a system called ACE for Automatic Colloquialism and Errors detection for written Chinese. ACE is based on the combination of N-gram model and rule-base model. Although it focuses on detecting colloquial Cantonese (a dialect of Chinese) at the current stage, it can be extended to detect other dialects. We chose Cantonese becauase it has many interesting properties, such as unique grammar system and huge colloquial terms, that turn the detection task extremely challenging. We conducted experiments using real data and synthetic data. The results indicated that ACE is highly reliable and effective.</abstract>
@@ -4031,8 +4031,8 @@
     </paper>
     <paper id="43">
       <title><fixed-case>MAGES</fixed-case>: A Multilingual Angle-integrated Grouping-based Entity Summarization System</title>
-      <author><first>Eun-kyung</first><last>Kim</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="eun-kyung-kim"><first>Eun-kyung</first><last>Kim</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <pages>203–207</pages>
       <url hash="97a139c0">C16-2043</url>
       <abstract>This demo presents MAGES (multilingual angle-integrated grouping-based entity summarization), an entity summarization system for a large knowledge base such as DBpedia based on a entity-group-bound ranking in a single integrated entity space across multiple language-specific editions. MAGES offers a multilingual angle-integrated space model, which has the advantage of overcoming missing semantic tags (i.e., categories) caused by biases in different language communities, and can contribute to the creation of entity groups that are well-formed and more stable than the monolingual condition within it. MAGES can help people quickly identify the essential points of the entities when they search or browse a large volume of entity-centric data. Evaluation results on the same experimental data demonstrate that our system produces a better summary compared with other representative DBpedia entity summarization methods.</abstract>
@@ -4063,7 +4063,7 @@
       <author><first>Mónica</first><last>Domínguez</last></author>
       <author><first>Iván</first><last>Latorre</last></author>
       <author><first>Mireia</first><last>Farrús</last></author>
-      <author><first>Joan</first><last>Codina-Filbà</last></author>
+      <author id="joan-codina-filba"><first>Joan</first><last>Codina-Filbà</last></author>
       <author><first>Leo</first><last>Wanner</last></author>
       <pages>218–222</pages>
       <url hash="9565a530">C16-2046</url>
@@ -4097,7 +4097,7 @@
       <author><first>Jonas</first><last>Wacker</last></author>
       <author><first>Stefan</first><last>Radomski</last></author>
       <author><first>Max</first><last>Mühlhäuser</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>233–237</pages>
       <url hash="43d44fb8">C16-2049</url>
       <abstract>In this demonstration paper we describe Ambient Search, a system that displays and retrieves documents in real time based on speech input. The system operates continuously in ambient mode, i.e. it generates speech transcriptions and identifies main keywords and keyphrases, while also querying its index to display relevant documents without explicit query. Without user intervention, the results are dynamically updated; users can choose to interact with the system at any time, employing a conversation protocol that is enriched with the ambient information gathered continuously. Our evaluation shows that Ambient Search outperforms another implicit speech-based information retrieval system. Ambient search is available as open source software.</abstract>
@@ -4123,7 +4123,7 @@
     <paper id="52">
       <title>Towards Non-projective High-Order Dependency Parser</title>
       <author><first>Wenjing</first><last>Fang</last></author>
-      <author><first>Kenny</first><last>Zhu</last></author>
+      <author id="kenny-zhu"><first>Kenny</first><last>Zhu</last></author>
       <author><first>Yizhong</first><last>Wang</last></author>
       <author><first>Jia</first><last>Tan</last></author>
       <pages>248–252</pages>
@@ -4156,7 +4156,7 @@
       <author><first>Junta</first><last>Mizuno</last></author>
       <author><first>Masahiro</first><last>Tanaka</last></author>
       <author><first>Kiyonori</first><last>Ohtake</last></author>
-      <author><first>Jong-Hoon</first><last>Oh</last></author>
+      <author id="jong-hoon-oh"><first>Jong-Hoon</first><last>Oh</last></author>
       <author><first>Julien</first><last>Kloetzer</last></author>
       <author><first>Chikara</first><last>Hashimoto</last></author>
       <author><first>Kentaro</first><last>Torisawa</last></author>
@@ -4259,7 +4259,7 @@
     </paper>
     <paper id="64">
       <title><fixed-case>K</fixed-case>yoto-<fixed-case>NMT</fixed-case>: a Neural Machine Translation implementation in Chainer</title>
-      <author><first>Fabien</first><last>Cromières</last></author>
+      <author id="fabien-cromieres"><first>Fabien</first><last>Cromières</last></author>
       <pages>307–311</pages>
       <url hash="1a3f4b8e">C16-2064</url>
       <abstract>We present Kyoto-NMT, an open-source implementation of the Neural Machine Translation paradigm. This implementation is done in Python and Chainer, an easy-to-use Deep Learning Framework.</abstract>
@@ -4271,7 +4271,7 @@
       <booktitle>Proceedings of <fixed-case>COLING</fixed-case> 2016, the 26th International Conference on Computational Linguistics: Tutorial Abstracts</booktitle>
       <url hash="ce026f3a">C16-3</url>
       <editor><first>Marcello</first><last>Federico</last></editor>
-      <editor><first>Akiko</first><last>Aizawa</last></editor>
+      <editor id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></editor>
       <publisher>The COLING 2016 Organizing Committee</publisher>
       <address>Osaka, Japan</address>
       <month>December</month>
@@ -4284,7 +4284,7 @@
     </frontmatter>
     <paper id="1">
       <title>Compositional Distributional Models of Meaning</title>
-      <author><first>Mehrnoosh</first><last>Sadrzadeh</last></author>
+      <author id="mehrnoosh-sadrzadeh"><first>Mehrnoosh</first><last>Sadrzadeh</last></author>
       <author><first>Dimitri</first><last>Kartsaklis</last></author>
       <pages>1–4</pages>
       <url hash="011f9dc2">C16-3001</url>
@@ -4312,8 +4312,8 @@
     </paper>
     <paper id="4">
       <title>Quality Estimation for Language Output Applications</title>
-      <author><first>Carolina</first><last>Scarton</last></author>
-      <author><first>Gustavo</first><last>Paetzold</last></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last></author>
+      <author id="gustavo-paetzold"><first>Gustavo</first><last>Paetzold</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>14–17</pages>
       <url hash="94844985">C16-3004</url>
@@ -4331,7 +4331,7 @@
     <paper id="6">
       <title>Succinct Data Structures for <fixed-case>NLP</fixed-case>-at-Scale</title>
       <author><first>Matthias</first><last>Petri</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>20–21</pages>
       <url hash="6409f91f">C16-3006</url>
       <abstract>Succinct data structures involve the use of novel data structures, compression technologies, and other mechanisms to allow data to be stored in extremely small memory or disk footprints, while still allowing for efficient access to the underlying data. They have successfully been applied in areas such as Information Retrieval and Bioinformatics to create highly compressible in-memory search indexes which provide efficient search functionality over datasets which traditionally could only be processed using external memory data structures. Modern technologies in this space are not well known within the NLP community, but have the potential to revolutionise NLP, particularly the application to ‘big data’ in the form of terabyte and larger corpora. This tutorial will present a practical introduction to the most important succinct data structures, tools, and applications with the intent of providing the researchers with a jump-start into this domain. The focus of this tutorial will be efficient text processing utilising space efficient representations of suffix arrays, suffix trees and searchable integer compression schemes with specific applications of succinct data structures to common NLP tasks such as <tex-math>n</tex-math>-gram language modelling.</abstract>
@@ -4339,7 +4339,7 @@
     </paper>
     <paper id="7">
       <title>The Role of <fixed-case>W</fixed-case>ikipedia in Text Analysis and Retrieval</title>
-      <author><first>Marius</first><last>Paşca</last></author>
+      <author id="marius-pasca"><first>Marius</first><last>Paşca</last></author>
       <pages>22</pages>
       <url hash="77b523bd">C16-3007</url>
       <abstract>This tutorial examines the characteristics, advantages and limitations of Wikipedia relative to other existing, human-curated resources of knowledge; derivative resources, created by converting semi-structured content in Wikipedia into structured data; the role of Wikipedia and its derivatives in text analysis; and the role of Wikipedia and its derivatives in enhancing information retrieval.</abstract>
diff --git a/data/xml/C18.xml b/data/xml/C18.xml
index e00720247f..60f0dd14eb 100644
--- a/data/xml/C18.xml
+++ b/data/xml/C18.xml
@@ -4,8 +4,8 @@
     <meta>
       <booktitle>Proceedings of the 27th International Conference on Computational Linguistics</booktitle>
       <url hash="8cb7fe8f">C18-1</url>
-      <editor><first>Emily M.</first><last>Bender</last></editor>
-      <editor><first>Leon</first><last>Derczynski</last></editor>
+      <editor id="emily-m-bender"><first>Emily M.</first><last>Bender</last></editor>
+      <editor id="leon-derczynski"><first>Leon</first><last>Derczynski</last></editor>
       <editor><first>Pierre</first><last>Isabelle</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Santa Fe, New Mexico, USA</address>
@@ -21,7 +21,7 @@
       <title>A New Approach to <fixed-case>A</fixed-case>nimacy Detection</title>
       <author><first>Labiba</first><last>Jahan</last></author>
       <author><first>Geeticka</first><last>Chauhan</last></author>
-      <author><first>Mark</first><last>Finlayson</last></author>
+      <author id="mark-finlayson"><first>Mark</first><last>Finlayson</last></author>
       <pages>1–12</pages>
       <abstract>Animacy is a necessary property for a referent to be an agent, and thus animacy detection is useful for a variety of natural language processing tasks, including word sense disambiguation, co-reference resolution, semantic role labeling, and others. Prior work treated animacy as a word-level property, and has developed statistical classifiers to classify words as either animate or inanimate. We discuss why this approach to the problem is ill-posed, and present a new approach based on classifying the animacy of co-reference chains. We show that simple voting approaches to inferring the animacy of a chain from its constituent words perform relatively poorly, and then present a hybrid system merging supervised machine learning (ML) and a small number of hand-built rules to compute the animacy of referring expressions and co-reference chains. This method achieves state of the art performance. The supervised ML component leverages features such as word embeddings over referring expressions, parts of speech, and grammatical and semantic roles. The rules take into consideration parts of speech and the hypernymy structure encoded in WordNet. The system achieves an F1 of 0.88 for classifying the animacy of referring expressions, which is comparable to state of the art results for classifying the animacy of words, and achieves an F1 of 0.75 for classifying the animacy of coreference chains themselves. We release our training and test dataset, which includes 142 texts (all narratives) comprising 156,154 words, 34,698 referring expressions, and 10,941 co-reference chains. We test the method on a subset of the OntoNotes dataset, showing using manual sampling that animacy classification is 90% +/- 2% accurate for coreference chains, and 92% +/- 1% for referring expressions. The data also contains 46 folktales, which present an interesting challenge because they often involve characters who are members of traditionally inanimate classes (e.g., stoves that walk, trees that talk). We show that our system is able to detect the animacy of these unusual referents with an F1 of 0.95.</abstract>
       <url hash="c73a3256">C18-1001</url>
@@ -31,7 +31,7 @@
       <title>Zero Pronoun Resolution with Attention-based Neural Network</title>
       <author><first>Qingyu</first><last>Yin</last></author>
       <author><first>Yu</first><last>Zhang</last></author>
-      <author><first>Weinan</first><last>Zhang</last></author>
+      <author id="weinan-zhang"><first>Weinan</first><last>Zhang</last></author>
       <author><first>Ting</first><last>Liu</last></author>
       <author><first>William Yang</first><last>Wang</last></author>
       <pages>13–23</pages>
@@ -42,7 +42,7 @@
     <paper id="3">
       <title>They Exist! Introducing Plural Mentions to Coreference Resolution and Entity Linking</title>
       <author><first>Ethan</first><last>Zhou</last></author>
-      <author><first>Jinho D.</first><last>Choi</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
       <pages>24–34</pages>
       <abstract>This paper analyzes arguably the most challenging yet under-explored aspect of resolution tasks such as coreference resolution and entity linking, that is the resolution of plural mentions. Unlike singular mentions each of which represents one entity, plural mentions stand for multiple entities. To tackle this aspect, we take the character identification corpus from the SemEval 2018 shared task that consists of entity annotation for singular mentions, and expand it by adding annotation for plural mentions. We then introduce a novel coreference resolution algorithm that selectively creates clusters to handle both singular and plural mentions, and also a deep learning-based entity linking model that jointly handles both types of mentions through multi-task learning. Adjusted evaluation metrics are proposed for these tasks as well to handle the uniqueness of plural mentions. Our experiments show that the new coreference resolution and entity linking models significantly outperform traditional models designed only for singular mentions. To the best of our knowledge, this is the first time that plural mentions are thoroughly analyzed for these two resolution tasks.</abstract>
       <url hash="cd80314c">C18-1003</url>
@@ -60,9 +60,9 @@
     <paper id="5">
       <title>Unsupervised Morphology Learning with Statistical Paradigms</title>
       <author><first>Hongzhi</first><last>Xu</last></author>
-      <author><first>Mitchell</first><last>Marcus</last></author>
-      <author><first>Charles</first><last>Yang</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="mitch-marcus"><first>Mitchell</first><last>Marcus</last></author>
+      <author id="charles-yang"><first>Charles</first><last>Yang</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <pages>44–54</pages>
       <abstract>This paper describes an unsupervised model for morphological segmentation that exploits the notion of paradigms, which are sets of morphological categories (e.g., suffixes) that can be applied to a homogeneous set of words (e.g., nouns or verbs). Our algorithm identifies statistically reliable paradigms from the morphological segmentation result of a probabilistic model, and chooses reliable suffixes from them. The new suffixes can be fed back iteratively to improve the accuracy of the probabilistic model. Finally, the unreliable paradigms are subjected to pruning to eliminate unreliable morphological relations between words. The paradigm-based algorithm significantly improves segmentation accuracy. Our method achieves start-of-the-art results on experiments using the Morpho-Challenge data, including English, Turkish, and Finnish.</abstract>
       <url hash="cfd3af7b">C18-1005</url>
@@ -72,8 +72,8 @@
       <title>Challenges of language technologies for the indigenous languages of the <fixed-case>A</fixed-case>mericas</title>
       <author><first>Manuel</first><last>Mager</last></author>
       <author><first>Ximena</first><last>Gutierrez-Vasques</last></author>
-      <author><first>Gerardo</first><last>Sierra</last></author>
-      <author><first>Ivan</first><last>Meza-Ruiz</last></author>
+      <author id="gerardo-sierra"><first>Gerardo</first><last>Sierra</last></author>
+      <author id="ivan-meza-ruiz"><first>Ivan</first><last>Meza-Ruiz</last></author>
       <pages>55–69</pages>
       <abstract>Indigenous languages of the American continent are highly diverse. However, they have received little attention from the technological perspective. In this paper, we review the research, the digital resources and the available NLP systems that focus on these languages. We present the main challenges and research questions that arise when distant languages and low-resource scenarios are faced. We would like to encourage NLP research in linguistically rich and diverse areas like the Americas.</abstract>
       <url hash="f90d45a1">C18-1006</url>
@@ -87,7 +87,7 @@
       <author><first>Ruochen</first><last>Xu</last></author>
       <author><first>Yiming</first><last>Yang</last></author>
       <author><first>Teruko</first><last>Mitamura</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>70–82</pages>
       <abstract>The use of machine learning for NLP generally requires resources for training. Tasks performed in a low-resource language usually rely on labeled data in another, typically resource-rich, language. However, there might not be enough labeled data even in a resource-rich language such as English. In such cases, one approach is to use a hand-crafted approach that utilizes only a small bilingual dictionary with minimal manual verification to create distantly supervised data. Another is to explore typical machine learning techniques, for example adversarial training of bilingual word representations. We find that in event-type detection task—the task to classify [parts of] documents into a fixed set of labels—they give about the same performance. We explore ways in which the two methods can be complementary and also see how to best utilize a limited budget for manual annotation to maximize performance gain.</abstract>
       <url hash="63fd2cde">C18-1007</url>
@@ -165,8 +165,8 @@
       <author><first>Yang</first><last>Xu</last></author>
       <author><first>Huibin</first><last>Ruan</last></author>
       <author><first>Bowei</first><last>Zou</last></author>
-      <author><first>Jianmin</first><last>Yao</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="jianmin-yao"><first>Jianmin</first><last>Yao</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>177–189</pages>
       <abstract>Event relation recognition is a challenging language processing task. It is required to determine the relation class of a pair of query events, such as causality, under the condition that there isn’t any reliable clue for use. We follow the traditional statistical approach in this paper, speculating the relation class of the target events based on the relation-class distributions on the similar events. There is minimal supervision used during the speculation process. In particular, we incorporate image processing into the acquisition of similar event instances, including the utilization of images for visually representing event scenes, and the use of the neural network based image matching for approximate calculation between events. We test our method on the ACE-R2 corpus and compared our model with the fully-supervised neural network models. Experimental results show that we achieve a comparable performance to CNN while slightly better than LSTM.</abstract>
       <url hash="d873af13">C18-1015</url>
@@ -187,7 +187,7 @@
       <author><first>Vivek</first><last>Kulkarni</last></author>
       <author><first>Yingtao</first><last>Tian</last></author>
       <author><first>Parth</first><last>Dandiwala</last></author>
-      <author><first>Steve</first><last>Skiena</last></author>
+      <author id="steven-skiena"><first>Steve</first><last>Skiena</last></author>
       <pages>202–212</pages>
       <abstract>We present domain independent models to date documents based only on neologism usage patterns. Our models capture patterns of neologism usage over time to date texts, provide insights into temporal locality of word usage over a span of 150 years, and generalize to various domains like News, Fiction, and Non-Fiction with competitive performance. Quite intriguingly, we show that by modeling only the distribution of usage counts over neologisms (the model being agnostic of the particular words themselves), we achieve competitive performance using several orders of magnitude fewer features (only 200 input features) compared to state of the art models some of which use 200K features.</abstract>
       <url hash="e9721fe5">C18-1017</url>
@@ -197,7 +197,7 @@
       <title>Neural Math Word Problem Solver with Reinforcement Learning</title>
       <author><first>Danqing</first><last>Huang</last></author>
       <author><first>Jing</first><last>Liu</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <author><first>Jian</first><last>Yin</last></author>
       <pages>213–223</pages>
       <abstract>Sequence-to-sequence model has been applied to solve math word problems. The model takes math problem descriptions as input and generates equations as output. The advantage of sequence-to-sequence model requires no feature engineering and can generate equations that do not exist in training data. However, our experimental analysis reveals that this model suffers from two shortcomings: (1) generate spurious numbers; (2) generate numbers at wrong positions. In this paper, we propose incorporating copy and alignment mechanism to the sequence-to-sequence model (namely CASS) to address these shortcomings. To train our model, we apply reinforcement learning to directly optimize the solution accuracy. It overcomes the “train-test discrepancy” issue of maximum likelihood estimation, which uses the surrogate objective of maximizing equation likelihood during training while the evaluation metric is solution accuracy (non-differentiable) at test time. Furthermore, to explore the effectiveness of our neural model, we use our model output as a feature and incorporate it into the feature-based model. Experimental results show that (1) The copy and alignment mechanism is effective to address the two issues; (2) Reinforcement learning leads to better performance than maximum likelihood on this task; (3) Our neural model is complementary to the feature-based model and their combination significantly outperforms the state-of-the-art results.</abstract>
@@ -206,7 +206,7 @@
     </paper>
     <paper id="19">
       <title>Personalizing Lexical Simplification</title>
-      <author><first>John</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
       <author><first>Chak Yan</first><last>Yeung</last></author>
       <pages>224–232</pages>
       <abstract>A lexical simplification (LS) system aims to substitute complex words with simple words in a text, while preserving its meaning and grammaticality. Despite individual users’ differences in vocabulary knowledge, current systems do not consider these variations; rather, they are trained to find one optimal substitution or ranked list of substitutions for all users. We evaluate the performance of a state-of-the-art LS system on individual learners of English at different proficiency levels, and measure the benefits of using complex word identification (CWI) models to personalize the system. Experimental results show that even a simple personalized CWI model, based on graded vocabulary lists, can help the system avoid some unnecessary simplifications and produce more readable output.</abstract>
@@ -225,8 +225,8 @@
     <paper id="21">
       <title><fixed-case>L</fixed-case>exi: A tool for adaptive, personalized text simplification</title>
       <author><first>Joachim</first><last>Bingel</last></author>
-      <author><first>Gustavo</first><last>Paetzold</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="gustavo-paetzold"><first>Gustavo</first><last>Paetzold</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>245–258</pages>
       <abstract>Most previous research in text simplification has aimed to develop generic solutions, assuming very homogeneous target audiences with consistent intra-group simplification needs. We argue that this assumption does not hold, and that instead we need to develop simplification systems that adapt to the individual needs of specific users. As a first step towards personalized simplification, we propose a framework for adaptive lexical simplification and introduce Lexi, a free open-source and easily extensible tool for adaptive, personalized text simplification. Lexi is easily installed as a browser extension, enabling easy access to the service for its users.</abstract>
       <url hash="09496d0a">C18-1021</url>
@@ -265,7 +265,7 @@
     </paper>
     <paper id="25">
       <title>Joint Learning from Labeled and Unlabeled Data for Information Retrieval</title>
-      <author id="bo-li"><first>Bo</first><last>Li</last></author>
+      <author><first>Bo</first><last>Li</last></author>
       <author><first>Ping</first><last>Cheng</last></author>
       <author><first>Le</first><last>Jia</last></author>
       <pages>293–302</pages>
@@ -275,8 +275,8 @@
     </paper>
     <paper id="26">
       <title>Modeling the Readability of <fixed-case>G</fixed-case>erman Targeting Adults and Children: An empirically broad analysis and its cross-corpus validation</title>
-      <author><first>Zarah</first><last>Weiß</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="zarah-weiss"><first>Zarah</first><last>Weiß</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <pages>303–317</pages>
       <abstract>We analyze two novel data sets of German educational media texts targeting adults and children. The analysis is based on 400 automatically extracted measures of linguistic complexity from a wide range of linguistic domains. We show that both data sets exhibit broad linguistic adaptation to the target audience, which generalizes across both data sets. Our most successful binary classification model for German readability robustly shows high accuracy between 89.4%–98.9% for both data sets. To our knowledge, this comprehensive German readability model is the first for which robust cross-corpus performance has been shown. The research also contributes resources for German readability assessment that are externally validated as successful for different target audiences: we compiled a new corpus of German news broadcast subtitles, the Tagesschau/Logo corpus, and crawled a GEO/GEOlino corpus substantially enlarging the data compiled by Hancke et al. 2012.</abstract>
       <url hash="0903f946">C18-1026</url>
@@ -284,7 +284,7 @@
     </paper>
     <paper id="27">
       <title>Automatic Assessment of Conceptual Text Complexity Using Knowledge Graphs</title>
-      <author><first>Sanja</first><last>Štajner</last></author>
+      <author id="sanja-stajner"><first>Sanja</first><last>Štajner</last></author>
       <author><first>Ioana</first><last>Hulpuş</last></author>
       <pages>318–330</pages>
       <abstract>Complexity of texts is usually assessed only at the lexical and syntactic levels. Although it is known that conceptual complexity plays a significant role in text understanding, no attempts have been made at assessing it automatically. We propose to automatically estimate the conceptual complexity of texts by exploiting a number of graph-based measures on a large knowledge base. By using a high-quality language learners corpus for English, we show that graph-based measures of individual text concepts, as well as the way they relate to each other in the knowledge graph, have a high discriminative power when distinguishing between two versions of the same text. Furthermore, when used as features in a binary classification task aiming to choose the simpler of two versions of the same text, our measures achieve high performance even in a default setup.</abstract>
@@ -294,7 +294,7 @@
     <paper id="28">
       <title><fixed-case>P</fixed-case>ar4<fixed-case>S</fixed-case>im – Adaptive Paraphrasing for Text Simplification</title>
       <author><first>Seid Muhie</first><last>Yimam</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>331–342</pages>
       <abstract>Learning from a real-world data stream and continuously updating the model without explicit supervision is a new challenge for NLP applications with machine learning components. In this work, we have developed an adaptive learning system for text simplification, which improves the underlying learning-to-rank model from usage data, i.e. how users have employed the system for the task of simplification. Our experimental result shows that, over a period of time, the performance of the embedded paraphrase ranking model increases steadily improving from a score of 62.88% up to 75.70% based on the NDCG@10 evaluation metrics. To our knowledge, this is the first study where an NLP component is adaptively improved through usage.</abstract>
       <url hash="6aacb636">C18-1028</url>
@@ -346,8 +346,8 @@
     <paper id="33">
       <title>Authorship Identification for Literary Book Recommendations</title>
       <author><first>Haifa</first><last>Alharthi</last></author>
-      <author><first>Diana</first><last>Inkpen</last></author>
-      <author><first>Stan</first><last>Szpakowicz</last></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last></author>
+      <author id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></author>
       <pages>390–400</pages>
       <abstract>Book recommender systems can help promote the practice of reading for pleasure, which has been declining in recent years. One factor that influences reading preferences is writing style. We propose a system that recommends books after learning their authors’ style. To our knowledge, this is the first work that applies the information learned by an author-identification model to book recommendations. We evaluated the system according to a top-k recommendation scenario. Our system gives better accuracy when compared with many state-of-the-art methods. We also conducted a qualitative analysis by checking if similar books/authors were annotated similarly by experts.</abstract>
       <url hash="4c06ee62">C18-1033</url>
@@ -356,8 +356,8 @@
     <paper id="34">
       <title>A Nontrivial Sentence Corpus for the Task of Sentence Readability Assessment in <fixed-case>P</fixed-case>ortuguese</title>
       <author><first>Sidney Evaldo</first><last>Leal</last></author>
-      <author><first>Magali Sanches</first><last>Duran</last></author>
-      <author><first>Sandra Maria</first><last>Aluísio</last></author>
+      <author id="magali-sanches-duran"><first>Magali Sanches</first><last>Duran</last></author>
+      <author id="sandra-aluisio"><first>Sandra Maria</first><last>Aluísio</last></author>
       <pages>401–413</pages>
       <abstract>Effective textual communication depends on readers being proficient enough to comprehend texts, and texts being clear enough to be understood by the intended audience, in a reading task. When the meaning of textual information and instructions is not well conveyed, many losses and damages may occur. Among the solutions to alleviate this problem is the automatic evaluation of sentence readability, task which has been receiving a lot of attention due to its large applicability. However, a shortage of resources, such as corpora for training and evaluation, hinders the full development of this task. In this paper, we generate a nontrivial sentence corpus in Portuguese. We evaluate three scenarios for building it, taking advantage of a parallel corpus of simplification, in which each sentence triplet is aligned and has simplification operations annotated, being ideal for justifying possible mistakes of future methods. The best scenario of our corpus PorSimplesSent is composed of 4,888 pairs, which is bigger than a similar corpus for English; all the three versions of it are publicly available. We created four baselines for PorSimplesSent and made available a pairwise ranking method, using 17 linguistic and psycholinguistic features, which correctly identifies the ranking of sentence pairs with an accuracy of 74.2%.</abstract>
       <url hash="f524aafe">C18-1034</url>
@@ -366,7 +366,7 @@
     <paper id="35">
       <title>Adopting the Word-Pair-Dependency-Triplets with Individual Comparison for Natural Language Inference</title>
       <author><first>Qianlong</first><last>Du</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <author><first>Keh-Yih</first><last>Su</last></author>
       <pages>414–425</pages>
       <abstract>This paper proposes to perform natural language inference with Word-Pair-Dependency-Triplets. Most previous DNN-based approaches either ignore syntactic dependency among words, or directly use tree-LSTM to generate sentence representation with irrelevant information. To overcome the problems mentioned above, we adopt Word-Pair-Dependency-Triplets to improve alignment and inference judgment. To be specific, instead of comparing each triplet from one passage with the merged information of another passage, we first propose to perform comparison directly between the triplets of the given passage-pair to make the judgement more interpretable. Experimental results show that the performance of our approach is better than most of the approaches that use tree structures, and is comparable to other state-of-the-art approaches.</abstract>
@@ -380,7 +380,7 @@
       <author><first>Yaliang</first><last>Li</last></author>
       <author><first>Nan</first><last>Du</last></author>
       <author><first>Min</first><last>Yang</last></author>
-      <author id="wei-fan"><first>Wei</first><last>Fan</last></author>
+      <author><first>Wei</first><last>Fan</last></author>
       <author><first>Ying</first><last>Shen</last></author>
       <pages>426–436</pages>
       <abstract>Distantly supervised relation extraction greatly reduces human efforts in extracting relational facts from unstructured texts. However, it suffers from noisy labeling problem, which can degrade its performance. Meanwhile, the useful information expressed in knowledge graph is still underutilized in the state-of-the-art methods for distantly supervised relation extraction. In the light of these challenges, we propose CORD, a novelCOopeRativeDenoising framework, which consists two base networks leveraging text corpus and knowledge graph respectively, and a cooperative module involving their mutual learning by the adaptive bi-directional knowledge distillation and dynamic ensemble with noisy-varying instances. Experimental results on a real-world dataset demonstrate that the proposed method reduces the noisy labels and achieves substantial improvement over the state-of-the-art methods.</abstract>
@@ -392,7 +392,7 @@
       <author><first>Bowei</first><last>Zou</last></author>
       <author><first>Zengzhuang</first><last>Xu</last></author>
       <author><first>Yu</first><last>Hong</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>437–448</pages>
       <abstract>Relation Classification aims to classify the semantic relationship between two marked entities in a given sentence. It plays a vital role in a variety of natural language processing applications. Most existing methods focus on exploiting mono-lingual data, e.g., in English, due to the lack of annotated data in other languages. In this paper, we come up with a feature adaptation approach for cross-lingual relation classification, which employs a generative adversarial network (GAN) to transfer feature representations from one language with rich annotated data to another language with scarce annotated data. Such a feature adaptation approach enables feature imitation via the competition between a relation classification network and a rival discriminator. Experimental results on the ACE 2005 multilingual training corpus, treating English as the source language and Chinese the target, demonstrate the effectiveness of our proposed approach, yielding an improvement of 5.7% over the state-of-the-art.</abstract>
       <url hash="93c8b887">C18-1037</url>
@@ -419,7 +419,7 @@
     </paper>
     <paper id="40">
       <title>Interpretation of Implicit Conditions in Database Search Dialogues</title>
-      <author><first>Shunya</first><last>Fukunaga</last></author>
+      <author id="shun-ya-fukunaga"><first>Shunya</first><last>Fukunaga</last></author>
       <author><first>Hitoshi</first><last>Nishikawa</last></author>
       <author><first>Takenobu</first><last>Tokunaga</last></author>
       <author><first>Hikaru</first><last>Yokono</last></author>
@@ -443,10 +443,10 @@
     </paper>
     <paper id="42">
       <title>Can Taxonomy Help? Improving Semantic Question Matching using Question Taxonomy</title>
-      <author><first>Deepak</first><last>Gupta</last></author>
-      <author><first>Rajkumar</first><last>Pujari</last></author>
+      <author id="deepak-gupta"><first>Deepak</first><last>Gupta</last></author>
+      <author id="rajkumar-pujari"><first>Rajkumar</first><last>Pujari</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <author><first>Anutosh</first><last>Maitra</last></author>
       <author><first>Tom</first><last>Jain</last></author>
       <author><first>Shubhashis</first><last>Sengupta</last></author>
@@ -471,8 +471,8 @@
       <title>Employing Text Matching Network to Recognise Nuclearity in <fixed-case>C</fixed-case>hinese Discourse</title>
       <author><first>Sheng</first><last>Xu</last></author>
       <author><first>Peifeng</first><last>Li</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
-      <author><first>Qiaoming</first><last>Zhu</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last></author>
       <pages>525–535</pages>
       <abstract>The task of nuclearity recognition in Chinese discourse remains challenging due to the demand for more deep semantic information. In this paper, we propose a novel text matching network (TMN) that encodes the discourse units and the paragraphs by combining Bi-LSTM and CNN to capture both global dependency information and local n-gram information. Moreover, it introduces three components of text matching, the Cosine, Bilinear and Single Layer Network, to incorporate various similarities and interactions among the discourse units. Experimental results on the Chinese Discourse TreeBank show that our proposed TMN model significantly outperforms various strong baselines in both micro-F1 and macro-F1.</abstract>
       <url hash="14d5373f">C18-1044</url>
@@ -483,8 +483,8 @@
       <author><first>Xiaomin</first><last>Chu</last></author>
       <author><first>Feng</first><last>Jiang</last></author>
       <author><first>Yi</first><last>Zhou</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
-      <author><first>Qiaoming</first><last>Zhu</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last></author>
       <pages>536–546</pages>
       <abstract>Discourse parsing is a challenging task and plays a critical role in discourse analysis. This paper focus on the macro level discourse structure analysis, which has been less studied in the previous researches. We explore a macro discourse structure presentation schema to present the macro level discourse structure, and propose a corresponding corpus, named Macro Chinese Discourse Treebank. On these bases, we concentrate on two tasks of macro discourse structure analysis, including structure identification and nuclearity recognition. In order to reduce the error transmission between the associated tasks, we adopt a joint model of the two tasks, and an Integer Linear Programming approach is proposed to achieve global optimization with various kinds of constraints.</abstract>
       <url hash="8fcba371">C18-1045</url>
@@ -535,9 +535,9 @@
     <paper id="50">
       <title>Modeling Coherence for Neural Machine Translation with Dynamic and Topic Caches</title>
       <author><first>Shaohui</first><last>Kuang</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Weihua</first><last>Luo</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>596–606</pages>
       <abstract>Sentences in a well-formed text are connected to each other via various links to form the cohesive structure of the text. Current neural machine translation (NMT) systems translate a text in a conventional sentence-by-sentence fashion, ignoring such cross-sentence links and dependencies. This may lead to generate an incoherent target text for a coherent source text. In order to handle this issue, we propose a cache-based approach to modeling coherence for neural machine translation by capturing contextual information either from recently translated sentences or the entire document. Particularly, we explore two types of caches: a dynamic cache, which stores words from the best translation hypotheses of preceding sentences, and a topic cache, which maintains a set of target-side topical words that are semantically related to the document to be translated. On this basis, we build a new layer to score target words in these two caches with a cache-based neural model. Here the estimated probabilities from the cache-based neural model are combined with NMT probabilities into the final word prediction probabilities via a gating mechanism. Finally, the proposed cache-based neural model is trained jointly with NMT system in an end-to-end manner. Experiments and analysis presented in this paper demonstrate that the proposed cache-based model achieves substantial improvements over several state-of-the-art SMT and NMT baselines.</abstract>
       <url hash="82cd70d3">C18-1050</url>
@@ -546,7 +546,7 @@
     <paper id="51">
       <title>Fusing Recency into Neural Machine Translation with an Inter-Sentence Gate Model</title>
       <author><first>Shaohui</first><last>Kuang</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <pages>607–617</pages>
       <abstract>Neural machine translation (NMT) systems are usually trained on a large amount of bilingual sentence pairs and translate one sentence at a time, ignoring inter-sentence information. This may make the translation of a sentence ambiguous or even inconsistent with the translations of neighboring sentences. In order to handle this issue, we propose an inter-sentence gate model that uses the same encoder to encode two adjacent sentences and controls the amount of information flowing from the preceding sentence to the translation of the current sentence with an inter-sentence gate. In this way, our proposed model can capture the connection between sentences and fuse recency from neighboring sentences into neural machine translation. On several NIST Chinese-English translation tasks, our experiments demonstrate that the proposed inter-sentence gate model achieves substantial improvements over the baseline.</abstract>
       <url hash="8383712d">C18-1051</url>
@@ -675,7 +675,7 @@
     <paper id="63">
       <title>Multi-task dialog act and sentiment recognition on Mastodon</title>
       <author><first>Christophe</first><last>Cerisara</last></author>
-      <author><first>Somayeh</first><last>Jafaritazehjani</last></author>
+      <author id="somayeh-jafaritazehjani"><first>Somayeh</first><last>Jafaritazehjani</last></author>
       <author><first>Adedayo</first><last>Oluokun</last></author>
       <author><first>Hoa T.</first><last>Le</last></author>
       <pages>745–754</pages>
@@ -720,7 +720,7 @@
       <title>Dynamic Feature Selection with Attention in Incremental Parsing</title>
       <author><first>Ryosuke</first><last>Kohita</last></author>
       <author><first>Hiroshi</first><last>Noji</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>785–794</pages>
       <abstract>One main challenge for incremental transition-based parsers, when future inputs are invisible, is to extract good features from a limited local context. In this work, we present a simple technique to maximally utilize the local features with an attention mechanism, which works as context- dependent dynamic feature selection. Our model learns, for example, which tokens should a parser focus on, to decide the next action. Our multilingual experiment shows its effectiveness across many languages. We also present an experiment with augmented test dataset and demon- strate it helps to understand the model’s behavior on locally ambiguous points.</abstract>
       <url hash="65b1ba05">C18-1067</url>
@@ -751,7 +751,7 @@
       <title>Projecting Embeddings for Domain Adaption: Joint Modeling of Sentiment Analysis in Diverse Domains</title>
       <author><first>Jeremy</first><last>Barnes</last></author>
       <author><first>Roman</first><last>Klinger</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>818–830</pages>
       <abstract>Domain adaptation for sentiment analysis is challenging due to the fact that supervised classifiers are very sensitive to changes in domain. The two most prominent approaches to this problem are structural correspondence learning and autoencoders. However, they either require long training times or suffer greatly on highly divergent domains. Inspired by recent advances in cross-lingual sentiment analysis, we provide a novel perspective and cast the domain adaptation problem as an embedding projection task. Our model takes as input two mono-domain embedding spaces and learns to project them to a bi-domain space, which is jointly optimized to (1) project across domains and to (2) predict sentiment. We perform domain adaptation experiments on 20 source-target domain pairs for sentiment classification and report novel state-of-the-art results on 11 domain pairs, including the Amazon domain adaptation datasets and SemEval 2013 and 2016 datasets. Our analysis shows that our model performs comparably to state-of-the-art approaches on domains that are similar, while performing significantly better on highly divergent domains. Our code is available at <url>https://github.com/jbarnesspain/domain_blse</url></abstract>
       <url hash="a04f2374">C18-1070</url>
@@ -799,7 +799,7 @@
       <author><first>Yicheng</first><last>Zou</last></author>
       <author><first>Tao</first><last>Gui</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>868–877</pages>
       <abstract>Attention mechanisms have been leveraged for sentiment classification tasks because not all words have the same importance. However, most existing attention models did not take full advantage of sentiment lexicons, which provide rich sentiment information and play a critical role in sentiment analysis. To achieve the above target, in this work, we propose a novel lexicon-based supervised attention model (LBSA), which allows a recurrent neural network to focus on the sentiment content, thus generating sentiment-informative representations. Compared with general attention models, our model has better interpretability and less noise. Experimental results on three large-scale sentiment classification datasets showed that the proposed method outperforms previous methods.</abstract>
       <url hash="88282198">C18-1074</url>
@@ -829,7 +829,7 @@
       <title>Summarization Evaluation in the Absence of Human Model Summaries Using the Compositionality of Word Embeddings</title>
       <author><first>Elaheh</first><last>ShafieiBavani</last></author>
       <author><first>Mohammad</first><last>Ebrahimi</last></author>
-      <author><first>Raymond</first><last>Wong</last></author>
+      <author id="raymond-wong"><first>Raymond</first><last>Wong</last></author>
       <author><first>Fang</first><last>Chen</last></author>
       <pages>905–914</pages>
       <abstract>We present a new summary evaluation approach that does not require human model summaries. Our approach exploits the compositional capabilities of corpus-based and lexical resource-based word embeddings to develop the features reflecting coverage, diversity, informativeness, and coherence of summaries. The features are then used to train a learning model for predicting the summary content quality in the absence of gold models. We evaluate the proposed metric in replicating the human assigned scores for summarization systems and summaries on data from query-focused and update summarization tasks in TAC 2008 and 2009. The results show that our feature combination provides reliable estimates of summary content quality when model summaries are not available.</abstract>
@@ -838,10 +838,10 @@
     </paper>
     <paper id="78">
       <title>A review of <fixed-case>S</fixed-case>panish corpora annotated with negation</title>
-      <author><first>Salud María</first><last>Jiménez-Zafra</last></author>
+      <author id="salud-maria-jimenez-zafra"><first>Salud María</first><last>Jiménez-Zafra</last></author>
       <author><first>Roser</first><last>Morante</last></author>
-      <author><first>Maite</first><last>Martin</last></author>
-      <author><first>L. Alfonso</first><last>Ureña-López</last></author>
+      <author id="m-teresa-martin-valdivia"><first>Maite</first><last>Martin</last></author>
+      <author id="l-alfonso-urena-lopez"><first>L. Alfonso</first><last>Ureña-López</last></author>
       <pages>915–924</pages>
       <abstract>The availability of corpora annotated with negation information is essential to develop negation processing systems in any language. However, there is a lack of these corpora even for languages like English, and when there are corpora available they are small and the annotations are not always compatible across corpora. In this paper we review the existing corpora annotated with negation in Spanish with the purpose of first, gathering the information to make it available for other researchers and, second, analyzing how compatible are the corpora and how has the linguistic phenomenon been addressed. Our final aim is to develop a supervised negation processing system for Spanish, for which we need training and test data. Our analysis shows that it will not be possible to merge the small corpora existing for Spanish due to lack of compatibility in the annotations.</abstract>
       <url hash="cbba5f03">C18-1078</url>
@@ -851,7 +851,7 @@
       <title>Document-level Multi-aspect Sentiment Classification by Jointly Modeling Users, Aspects, and Overall Ratings</title>
       <author><first>Junjie</first><last>Li</last></author>
       <author><first>Haitong</first><last>Yang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>925–936</pages>
       <abstract>Document-level multi-aspect sentiment classification aims to predict user’s sentiment polarities for different aspects of a product in a review. Existing approaches mainly focus on text information. However, the authors (i.e. users) and overall ratings of reviews are ignored, both of which are proved to be significant on interpreting the sentiments of different aspects in this paper. Therefore, we propose a model called Hierarchical User Aspect Rating Network (HUARN) to consider user preference and overall ratings jointly. Specifically, HUARN adopts a hierarchical architecture to encode word, sentence, and document level information. Then, user attention and aspect attention are introduced into building sentence and document level representation. The document representation is combined with user and overall rating information to predict aspect ratings of a review. Diverse aspects are treated differently and a multi-task framework is adopted. Empirical results on two real-world datasets show that HUARN achieves state-of-the-art performances.</abstract>
       <url hash="96aa0a94">C18-1079</url>
@@ -880,7 +880,7 @@
       <title>Evaluating the text quality, human likeness and tailoring component of <fixed-case>PASS</fixed-case>: A <fixed-case>D</fixed-case>utch data-to-text system for soccer</title>
       <author><first>Chris</first><last>van der Lee</last></author>
       <author><first>Bart</first><last>Verduijn</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <author><first>Sander</first><last>Wubben</last></author>
       <pages>962–972</pages>
       <abstract>We present an evaluation of PASS, a data-to-text system that generates Dutch soccer reports from match statistics which are automatically tailored towards fans of one club or the other. The evaluation in this paper consists of two studies. An intrinsic human-based evaluation of the system’s output is described in the first study. In this study it was found that compared to human-written texts, computer-generated texts were rated slightly lower on style-related text components (fluency and clarity) and slightly higher in terms of the correctness of given information. Furthermore, results from the first study showed that tailoring was accurately recognized in most cases, and that participants struggled with correctly identifying whether a text was written by a human or computer. The second study investigated if tailoring affects perceived text quality, for which no results were garnered. This lack of results might be due to negative preconceptions about computer-generated texts which were found in the first study.</abstract>
@@ -891,7 +891,7 @@
       <title>Answerable or Not: Devising a Dataset for Extending Machine Reading Comprehension</title>
       <author><first>Mao</first><last>Nakanishi</last></author>
       <author><first>Tetsunori</first><last>Kobayashi</last></author>
-      <author><first>Yoshihiko</first><last>Hayashi</last></author>
+      <author id="yoshihiko-hayashi"><first>Yoshihiko</first><last>Hayashi</last></author>
       <pages>973–983</pages>
       <abstract>Machine-reading comprehension (MRC) has recently attracted attention in the fields of natural language processing and machine learning. One of the problematic presumptions with current MRC technologies is that each question is assumed to be answerable by looking at a given text passage. However, to realize human-like language comprehension ability, a machine should also be able to distinguish not-answerable questions (NAQs) from answerable questions. To develop this functionality, a dataset incorporating hard-to-detect NAQs is vital; however, its manual construction would be expensive. This paper proposes a dataset creation method that alters an existing MRC dataset, the Stanford Question Answering Dataset, and describes the resulting dataset. The value of this dataset is likely to increase if each NAQ in the dataset is properly classified with the difficulty of identifying it as an NAQ. This difficulty level would allow researchers to evaluate a machine’s NAQ detection performance more precisely. Therefore, we propose a method for automatically assigning difficulty level labels, which measures the similarity between a question and the target text passage. Our NAQ detection experiments demonstrate that the resulting dataset, having difficulty level annotations, is valid and potentially useful in the development of advanced MRC models.</abstract>
       <url hash="d93c869c">C18-1083</url>
@@ -900,8 +900,8 @@
     <paper id="84">
       <title>Style Obfuscation by Invariance</title>
       <author><first>Chris</first><last>Emmery</last></author>
-      <author><first>Enrique</first><last>Manjavacas Arevalo</last></author>
-      <author><first>Grzegorz</first><last>Chrupała</last></author>
+      <author id="enrique-manjavacas"><first>Enrique</first><last>Manjavacas Arevalo</last></author>
+      <author id="grzegorz-chrupala"><first>Grzegorz</first><last>Chrupała</last></author>
       <pages>984–996</pages>
       <abstract>The task of obfuscating writing style using sequence models has previously been investigated under the framework of obfuscation-by-transfer, where the input text is explicitly rewritten in another style. A side effect of this framework are the frequent major alterations to the semantic content of the input. In this work, we propose obfuscation-by-invariance, and investigate to what extent models trained to be explicitly style-invariant preserve semantics. We evaluate our architectures in parallel and non-parallel settings, and compare automatic and human evaluations on the obfuscated sentences. Our experiments show that the performance of a style classifier can be reduced to chance level, while the output is evaluated to be of equal quality to models applying style-transfer. Additionally, human evaluation indicates a trade-off between the level of obfuscation and the observed quality of the output in terms of meaning preservation and grammaticality.</abstract>
       <url hash="37791826">C18-1084</url>
@@ -911,7 +911,7 @@
       <title>Encoding Sentiment Information into Word Vectors for Sentiment Analysis</title>
       <author><first>Zhe</first><last>Ye</last></author>
       <author><first>Fang</first><last>Li</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>997–1007</pages>
       <abstract>General-purpose pre-trained word embeddings have become a mainstay of natural language processing, and more recently, methods have been proposed to encode external knowledge into word embeddings to benefit specific downstream tasks. The goal of this paper is to encode sentiment knowledge into pre-trained word vectors to improve the performance of sentiment analysis. Our proposed method is based on a convolutional neural network (CNN) and an external sentiment lexicon. Experiments on four popular sentiment analysis datasets show that this method improves the accuracy of sentiment analysis compared to a number of benchmark methods.</abstract>
       <url hash="6ad54d1d">C18-1085</url>
@@ -929,7 +929,7 @@
     </paper>
     <paper id="87">
       <title>Towards a Language for Natural Language Treebank Transductions</title>
-      <author><first>Carlos A.</first><last>Prolo</last></author>
+      <author id="carlos-a-prolo"><first>Carlos A.</first><last>Prolo</last></author>
       <pages>1022–1032</pages>
       <abstract>This paper describes a transduction language suitable for natural language treebank transformations and motivates its application to tasks that have been used and described in the literature. The language, which is the basis for a tree transduction tool allows for clean, precise and concise description of what has been very confusingly, ambiguously, and incompletely textually described in the literature also allowing easy non-hard-coded implementation. We also aim at getting feedback from the NLP community to eventually converge to a de facto standard for such transduction language.</abstract>
       <url hash="3dcda489">C18-1087</url>
@@ -957,7 +957,7 @@
     <paper id="90">
       <title>Enhancing General Sentiment Lexicons for Domain-Specific Use</title>
       <author><first>Tim</first><last>Kreutz</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>1056–1064</pages>
       <abstract>Lexicon based methods for sentiment analysis rely on high quality polarity lexicons. In recent years, automatic methods for inducing lexicons have increased the viability of lexicon based methods for polarity classification. SentProp is a framework for inducing domain-specific polarities from word embeddings. We elaborate on SentProp by evaluating its use for enhancing DuOMan, a general-purpose lexicon, for use in the political domain. By adding only top sentiment bearing words from the vocabulary and applying small polarity shifts in the general-purpose lexicon, we increase accuracy in an in-domain classification task. The enhanced lexicon performs worse than the original lexicon in an out-domain task, showing that the words we added and the polarity shifts we applied are domain-specific and do not translate well to an out-domain setting.</abstract>
       <url hash="c98a2576">C18-1090</url>
@@ -1041,7 +1041,7 @@
       <title>Multilevel Heuristics for Rationale-Based Entity Relation Classification in Sentences</title>
       <author><first>Shiou Tian</first><last>Hsu</last></author>
       <author><first>Mandar</first><last>Chaudhary</last></author>
-      <author><first>Nagiza</first><last>Samatova</last></author>
+      <author id="nagiza-samatova"><first>Nagiza</first><last>Samatova</last></author>
       <pages>1145–1155</pages>
       <abstract>Rationale-based models provide a unique way to provide justifiable results for relation classification models by identifying rationales (key words and phrases that a person can use to justify the relation in the sentence) during the process. However, existing generative networks used to extract rationales come with a trade-off between extracting diversified rationales and achieving good classification results. In this paper, we propose a multilevel heuristic approach to regulate rationale extraction to avoid extracting monotonous rationales without compromising classification performance. In our model, rationale selection is regularized by a semi-supervised process and features from different levels: word, syntax, sentence, and corpus. We evaluate our approach on the SemEval 2010 dataset that includes 19 relation classes and the quality of extracted rationales with our manually-labeled rationales. Experiments show a significant improvement in classification performance and a 20% gain in rationale interpretability compared to state-of-the-art approaches.</abstract>
       <url hash="65c8ed57">C18-1098</url>
@@ -1096,7 +1096,7 @@
     <paper id="103">
       <title>Adversarial Domain Adaptation for Variational Neural Language Generation in Dialogue Systems</title>
       <author><first>Van-Khanh</first><last>Tran</last></author>
-      <author><first>Le-Minh</first><last>Nguyen</last></author>
+      <author id="minh-le-nguyen"><first>Le-Minh</first><last>Nguyen</last></author>
       <pages>1205–1217</pages>
       <abstract>Domain Adaptation arises when we aim at learning from source domain a model that can perform acceptably well on a different target domain. It is especially crucial for Natural Language Generation (NLG) in Spoken Dialogue Systems when there are sufficient annotated data in the source domain, but there is a limited labeled data in the target domain. How to effectively utilize as much of existing abilities from source domains is a crucial issue in domain adaptation. In this paper, we propose an adversarial training procedure to train a Variational encoder-decoder based language generator via multiple adaptation steps. In this procedure, a model is first trained on a source domain data and then fine-tuned on a small set of target domain utterances under the guidance of two proposed critics. Experimental results show that the proposed method can effectively leverage the existing knowledge in the source domain to adapt to another related domain by using only a small amount of in-domain data.</abstract>
       <url hash="caae9279">C18-1103</url>
@@ -1108,8 +1108,8 @@
       <author><first>Tim</first><last>Baumgärtner</last></author>
       <author><first>Aashish</first><last>Venkatesh</last></author>
       <author><first>Elia</first><last>Bruni</last></author>
-      <author><first>Raffaella</first><last>Bernardi</last></author>
-      <author><first>Raquel</first><last>Fernandez</last></author>
+      <author id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernandez</last></author>
       <pages>1218–1233</pages>
       <abstract>Our goal is to explore how the abilities brought in by a dialogue manager can be included in end-to-end visually grounded conversational agents. We make initial steps towards this general goal by augmenting a task-oriented visual dialogue model with a decision-making component that decides whether to ask a follow-up question to identify a target referent in an image, or to stop the conversation to make a guess. Our analyses show that adding a decision making component produces dialogues that are less repetitive and that include fewer unnecessary questions, thus potentially leading to more efficient and less unnatural interactions.</abstract>
       <url hash="9524607b">C18-1104</url>
@@ -1130,7 +1130,7 @@
       <title>Dialogue-act-driven Conversation Model : An Experimental Study</title>
       <author id="harshit-kumar"><first>Harshit</first><last>Kumar</last></author>
       <author><first>Arvind</first><last>Agarwal</last></author>
-      <author><first>Sachindra</first><last>Joshi</last></author>
+      <author id="sachindra-joshi"><first>Sachindra</first><last>Joshi</last></author>
       <pages>1246–1256</pages>
       <abstract>The utility of additional semantic information for the task of next utterance selection in an automated dialogue system is the focus of study in this paper. In particular, we show that additional information available in the form of dialogue acts –when used along with context given in the form of dialogue history– improves the performance irrespective of the underlying model being generative or discriminative. In order to show the model agnostic behavior of dialogue acts, we experiment with several well-known models such as sequence-to-sequence encoder-decoder model, hierarchical encoder-decoder model, and Siamese-based models with and without hierarchy; and show that in all models, incorporating dialogue acts improves the performance by a significant margin. We, furthermore, propose a novel way of encoding dialogue act information, and use it along with hierarchical encoder to build a model that can use the sequential dialogue act information in a natural way. Our proposed model achieves an MRR of about 84.8% for the task of next utterance selection on a newly introduced Daily Dialogue dataset, and outperform the baseline models. We also provide a detailed analysis of results including key insights that explain the improvement in MRR because of dialog act information.</abstract>
       <url hash="28918276">C18-1106</url>
@@ -1166,7 +1166,7 @@
       <author><first>Meng</first><last>Zou</last></author>
       <author><first>Xihan</first><last>Li</last></author>
       <author><first>Haokun</first><last>Liu</last></author>
-      <author><first>Zhihong</first><last>Deng</last></author>
+      <author id="zhi-hong-deng"><first>Zhihong</first><last>Deng</last></author>
       <pages>1281–1291</pages>
       <abstract>Neural encoder-decoder models have been widely applied to conversational response generation, which is a research hot spot in recent years. However, conventional neural encoder-decoder models tend to generate commonplace responses like “I don’t know” regardless of what the input is. In this paper, we analyze this problem from a new perspective: latent vectors. Based on it, we propose an easy-to-extend learning framework named MEMD (Multi-Encoder to Multi-Decoder), in which an auxiliary encoder and an auxiliary decoder are introduced to provide necessary training guidance without resorting to extra data or complicating network’s inner structure. Experimental results demonstrate that our method effectively improve the quality of generated responses according to automatic metrics and human evaluations, yielding more diverse and smooth replies.</abstract>
       <url hash="4d27f5d5">C18-1109</url>
@@ -1226,7 +1226,7 @@
       <title>Local String Transduction as Sequence Labeling</title>
       <author><first>Joana</first><last>Ribeiro</last></author>
       <author><first>Shashi</first><last>Narayan</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <author><first>Xavier</first><last>Carreras</last></author>
       <pages>1360–1371</pages>
       <abstract>We show that the general problem of string transduction can be reduced to the problem of sequence labeling. While character deletion and insertions are allowed in string transduction, they do not exist in sequence labeling. We show how to overcome this difference. Our approach can be used with any sequence labeling algorithm and it works best for problems in which string transduction imposes a strong notion of locality (no long range dependencies). We experiment with spelling correction for social media, OCR correction, and morphological inflection, and we see that it behaves better than seq2seq models and yields state-of-the-art results in several cases.</abstract>
@@ -1244,7 +1244,7 @@
     <paper id="117">
       <title>Diachronic word embeddings and semantic shifts: a survey</title>
       <author><first>Andrey</first><last>Kutuzov</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <author><first>Terrence</first><last>Szymanski</last></author>
       <author><first>Erik</first><last>Velldal</last></author>
       <pages>1384–1397</pages>
@@ -1271,7 +1271,7 @@
       <author><first>Shoushan</first><last>Li</last></author>
       <author><first>Mingqi</first><last>Jiang</last></author>
       <author><first>Hanqian</first><last>Wu</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>1410–1420</pages>
       <abstract>In realistic scenarios, a user profiling model (e.g., gender classification or age regression) learned from one social media might perform rather poorly when tested on another social media due to the different data distributions in the two media. In this paper, we address cross-media user profiling by bridging the knowledge between the source and target media with a uniform user embedding learning approach. In our approach, we first construct a cross-media user-word network to capture the relationship among users through the textual information and a modified cross-media user-user network to capture the relationship among users through the social information. Then, we learn user embedding by jointly learning the heterogeneous network composed of above two networks. Finally, we train a classification (or regression) model with the obtained user embeddings as input to perform user profiling. Empirical studies demonstrate the effectiveness of the proposed approach to two cross-media user profiling tasks, i.e., cross-media gender classification and cross-media age regression.</abstract>
       <url hash="c232e02e">C18-1119</url>
@@ -1280,7 +1280,7 @@
     <paper id="120">
       <title>Incorporating Syntactic Uncertainty in Neural Machine Translation with a Forest-to-Sequence Model</title>
       <author><first>Poorya</first><last>Zaremoodi</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>1421–1429</pages>
       <abstract>Incorporating syntactic information in Neural Machine Translation (NMT) can lead to better reorderings, particularly useful when the language pairs are syntactically highly divergent or when the training bitext is not large. Previous work on using syntactic information, provided by top-1 parse trees generated by (inevitably error-prone) parsers, has been promising. In this paper, we propose a forest-to-sequence NMT model to make use of exponentially many parse trees of the source sentence to compensate for the parser errors. Our method represents the collection of parse trees as a packed forest, and learns a neural transducer to translate from the input forest to the target sentence. Experiments on English to German, Chinese and Farsi translation tasks show the superiority of our approach over the sequence-to-sequence and tree-to-sequence neural translation models.</abstract>
       <url hash="0d88ff63">C18-1120</url>
@@ -1291,7 +1291,7 @@
       <author><first>Haoran</first><last>Li</last></author>
       <author><first>Junnan</first><last>Zhu</last></author>
       <author><first>Jiajun</first><last>Zhang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>1430–1441</pages>
       <abstract>In this paper, we investigate the sentence summarization task that produces a summary from a source sentence. Neural sequence-to-sequence models have gained considerable success for this task, while most existing approaches only focus on improving the informativeness of the summary, which ignore the correctness, i.e., the summary should not contain unrelated information with respect to the source sentence. We argue that correctness is an essential requirement for summarization systems. Considering a correct summary is semantically entailed by the source sentence, we incorporate entailment knowledge into abstractive summarization models. We propose an entailment-aware encoder under multi-task framework (i.e., summarization generation and entailment recognition) and an entailment-aware decoder by entailment Reward Augmented Maximum Likelihood (RAML) training. Experiment results demonstrate that our models significantly outperform baselines from the aspects of informativeness and correctness.</abstract>
       <url hash="91aa5ada">C18-1121</url>
@@ -1300,7 +1300,7 @@
     <paper id="122">
       <title>Extracting Parallel Sentences with Bidirectional Recurrent Neural Networks to Improve Machine Translation</title>
       <author><first>Francis</first><last>Grégoire</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <pages>1442–1453</pages>
       <abstract>Parallel sentence extraction is a task addressing the data sparsity problem found in multilingual natural language processing applications. We propose a bidirectional recurrent neural network based approach to extract parallel sentences from collections of multilingual texts. Our experiments with noisy parallel corpora show that we can achieve promising results against a competitive baseline by removing the need of specific feature engineering or additional external resources. To justify the utility of our approach, we extract sentence pairs from Wikipedia articles to train machine translation systems and show significant improvements in translation performance.</abstract>
       <url hash="e02faa64">C18-1122</url>
@@ -1310,7 +1310,7 @@
       <title>Fast and Accurate Reordering with <fixed-case>ITG</fixed-case> Transition <fixed-case>RNN</fixed-case></title>
       <author><first>Hao</first><last>Zhang</last></author>
       <author><first>Axel</first><last>Ng</last></author>
-      <author><first>Richard</first><last>Sproat</last></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last></author>
       <pages>1454–1463</pages>
       <abstract>Attention-based sequence-to-sequence neural network models learn to jointly align and translate. The quadratic-time attention mechanism is powerful as it is capable of handling arbitrary long-distance reordering, but computationally expensive. In this paper, towards making neural translation both accurate and efficient, we follow the traditional pre-reordering approach to decouple reordering from translation. We add a reordering RNN that shares the input encoder with the decoder. The RNNs are trained jointly with a multi-task loss function and applied sequentially at inference time. The task of the reordering model is to predict the permutation of the input words following the target language word order. After reordering, the attention in the decoder becomes more peaked and monotonic. For reordering, we adopt the Inversion Transduction Grammars (ITG) and propose a transition system to parse input to trees for reordering. We harness the ITG transition system with RNN. With the modeling power of RNN, we achieve superior reordering accuracy without any feature engineering. In experiments, we apply the model to the task of text normalization. Compared to a strong baseline of attention-based RNN, our ITG RNN re-ordering model can reach the same reordering accuracy with only 1/10 of the training data and is 2.5x faster in decoding.</abstract>
       <url hash="b47d22ab">C18-1123</url>
@@ -1322,7 +1322,7 @@
       <author><first>Jun</first><last>Xie</last></author>
       <author><first>Zhixing</first><last>Tan</last></author>
       <author><first>Jinsong</first><last>Su</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Chao</first><last>Bian</last></author>
       <pages>1464–1473</pages>
       <abstract>Neural machine translation with source-side attention have achieved remarkable performance. however, there has been little work exploring to attend to the target-side which can potentially enhance the memory capbility of NMT. We reformulate a Decoding History Enhanced Attention mechanism (DHEA) to render NMT model better at selecting both source-side and target-side information. DHA enables dynamic control of the ratios at which source and target contexts contribute to the generation of target words, offering a way to weakly induce structure relations among both source and target tokens. It also allows training errors to be directly back-propagated through short-cut connections and effectively alleviates the gradient vanishing problem. The empirical study on Chinese-English translation shows that our model with proper configuration can improve by 0:9 BLEU upon Transformer and the best reported results in the dataset. On WMT14 English-German task and a larger WMT14 English-French task, our model achieves comparable results with the state-of-the-art.</abstract>
@@ -1391,8 +1391,8 @@
     </paper>
     <paper id="130">
       <title>User-Level Race and Ethnicity Predictors from <fixed-case>T</fixed-case>witter Text</title>
-      <author><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="daniel-preotiuc-pietro"><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <pages>1534–1545</pages>
       <abstract>User demographic inference from social media text has the potential to improve a range of downstream applications, including real-time passive polling or quantifying demographic bias. This study focuses on developing models for user-level race and ethnicity prediction. We introduce a data set of users who self-report their race/ethnicity through a survey, in contrast to previous approaches that use distantly supervised data or perceived labels. We develop predictive models from text which accurately predict the membership of a user to the four largest racial and ethnic groups with up to .884 AUC and make these available to the research community.</abstract>
       <url hash="9d816ad0">C18-1130</url>
@@ -1402,7 +1402,7 @@
       <title>Multi-Source Multi-Class Fake News Detection</title>
       <author><first>Hamid</first><last>Karimi</last></author>
       <author><first>Proteek</first><last>Roy</last></author>
-      <author><first>Sari</first><last>Saba-Sadiya</last></author>
+      <author id="sari-saba-sadiya"><first>Sari</first><last>Saba-Sadiya</last></author>
       <author><first>Jiliang</first><last>Tang</last></author>
       <pages>1546–1557</pages>
       <abstract>Fake news spreading through media outlets poses a real threat to the trustworthiness of information and detecting fake news has attracted increasing attention in recent years. Fake news is typically written intentionally to mislead readers, which determines that fake news detection merely based on news content is tremendously challenging. Meanwhile, fake news could contain true evidence to mock true news and presents different degrees of fakeness, which further exacerbates the detection difficulty. On the other hand, the spread of fake news produces various types of data from different perspectives. These multiple sources provide rich contextual information about fake news and offer unprecedented opportunities for advanced fake news detection. In this paper, we study fake news detection with different degrees of fakeness by integrating multiple sources. In particular, we introduce approaches to combine information from multiple sources and to discriminate between different degrees of fakeness, and propose a Multi-source Multi-class Fake news Detection framework MMFD, which combines automated feature extraction, multi-source fusion and automated degrees of fakeness detection into a coherent and interpretable model. Experimental results on the real-world data demonstrate the effectiveness of the proposed framework and extensive experiments are further conducted to understand the working of the proposed framework.</abstract>
@@ -1424,7 +1424,7 @@
       <author><first>Nurendra</first><last>Choudhary</last></author>
       <author><first>Rajat</first><last>Singh</last></author>
       <author><first>Vijjini</first><last>Anvesh Rao</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>1570–1577</pages>
       <abstract>In this paper, we leverage social media platforms such as twitter for developing corpus across multiple languages. The corpus creation methodology is applicable for resource-scarce languages provided the speakers of that particular language are active users on social media platforms. We present an approach to extract social media microblogs such as tweets (Twitter). In this paper, we create corpus for multilingual sentiment analysis and emoji prediction in Hindi, Bengali and Telugu. Further, we perform and analyze multiple NLP tasks utilizing the corpus to get interesting observations.</abstract>
       <url hash="0f11e9e7">C18-1133</url>
@@ -1442,7 +1442,7 @@
     <paper id="135">
       <title>The Road to Success: Assessing the Fate of Linguistic Innovations in Online Communities</title>
       <author><first>Marco</first><last>Del Tredici</last></author>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <pages>1591–1603</pages>
       <abstract>We investigate the birth and diffusion of lexical innovations in a large dataset of online social communities. We build on sociolinguistic theories and focus on the relation between the spread of a novel term and the social role of the individuals who use it, uncovering characteristics of innovators and adopters. Finally, we perform a prediction task that allows us to anticipate whether an innovation will successfully spread within a community.</abstract>
       <url hash="4974aff2">C18-1135</url>
@@ -1450,8 +1450,8 @@
     </paper>
     <paper id="136">
       <title>Ab Initio: Automatic <fixed-case>L</fixed-case>atin Proto-word Reconstruction</title>
-      <author><first>Alina Maria</first><last>Ciobanu</last></author>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="alina-maria-ciobanu"><first>Alina Maria</first><last>Ciobanu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <pages>1604–1614</pages>
       <abstract>Proto-word reconstruction is central to the study of language evolution. It consists of recreating the words in an ancient language from its modern daughter languages. In this paper we investigate automatic word form reconstruction for Latin proto-words. Having modern word forms in multiple Romance languages (French, Italian, Spanish, Portuguese and Romanian), we infer the form of their common Latin ancestors. Our approach relies on the regularities that occurred when the Latin words entered the modern languages. We leverage information from all modern languages, building an ensemble system for proto-word reconstruction. We use conditional random fields for sequence labeling, but we conduct preliminary experiments with recurrent neural networks as well. We apply our method on multiple datasets, showing that our method improves on previous results, having also the advantage of requiring less input data, which is essential in historical linguistics, where resources are generally scarce.</abstract>
       <url hash="4aca1ad2">C18-1136</url>
@@ -1459,7 +1459,7 @@
     </paper>
     <paper id="137">
       <title>A Computational Model for the Linguistic Notion of Morphological Paradigm</title>
-      <author><first>Miikka</first><last>Silfverberg</last></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last></author>
       <author><first>Ling</first><last>Liu</last></author>
       <author><first>Mans</first><last>Hulden</last></author>
       <pages>1615–1626</pages>
@@ -1532,7 +1532,7 @@
     <paper id="144">
       <title>Learning from Measurements in Crowdsourcing Models: Inferring Ground Truth from Diverse Annotation Types</title>
       <author><first>Paul</first><last>Felt</last></author>
-      <author><first>Eric</first><last>Ringger</last></author>
+      <author id="eric-ringger"><first>Eric</first><last>Ringger</last></author>
       <author><first>Jordan</first><last>Boyd-Graber</last></author>
       <author><first>Kevin</first><last>Seppi</last></author>
       <pages>1694–1704</pages>
@@ -1601,7 +1601,7 @@
       <author><first>Zhongyu</first><last>Wei</last></author>
       <author><first>Siyuan</first><last>Wang</last></author>
       <author id="yang-liu-icsi"><first>Yang</first><last>Liu</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>1763–1774</pages>
       <abstract>Visual Question Generation (VQG) aims to ask natural questions about an image automatically. Existing research focus on training model to fit the annotated data set that makes it indifferent from other language generation tasks. We argue that natural questions need to have two specific attributes from the perspectives of content and linguistic respectively, namely, natural and human-written. Inspired by the setting of discriminator in adversarial learning, we propose two discriminators, one for each attribute, to enhance the training. We then use the reinforcement learning framework to incorporate scores from the two discriminators as the reward to guide the training of the question generator. Experimental results on a benchmark VQG dataset show the effectiveness and robustness of our model compared to some state-of-the-art models in terms of both automatic and human evaluation metrics.</abstract>
       <url hash="66726489">C18-1150</url>
@@ -1652,8 +1652,8 @@
       <title>Treat us like the sequences we are: Prepositional Paraphrasing of Noun Compounds using <fixed-case>LSTM</fixed-case></title>
       <author><first>Girishkumar</first><last>Ponkiya</last></author>
       <author><first>Kevin</first><last>Patel</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
-      <author><first>Girish</first><last>Palshikar</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="girish-palshikar"><first>Girish</first><last>Palshikar</last></author>
       <pages>1827–1836</pages>
       <abstract>Interpreting noun compounds is a challenging task. It involves uncovering the underlying predicate which is dropped in the formation of the compound. In most cases, this predicate is of the form VERB+PREP. It has been observed that uncovering the preposition is a significant step towards uncovering the predicate. In this paper, we attempt to paraphrase noun compounds using prepositions. We consider noun compounds and their corresponding prepositional paraphrases as parallelly aligned sequences of words. This enables us to adapt different architectures from cross-lingual embedding literature. We choose the architecture where we create representations of both noun compound (source sequence) and its corresponding prepositional paraphrase (target sequence), such that their sim- ilarity is high. We use LSTMs to learn these representations. We use these representations to decide the correct preposition. Our experiments show that this approach performs considerably well on different datasets of noun compounds that are manually annotated with prepositions.</abstract>
       <url hash="8db544a6">C18-1155</url>
@@ -1666,7 +1666,7 @@
       <author><first>Sruthi</first><last>Gorantla</last></author>
       <author><first>Erik</first><last>Cambria</last></author>
       <author><first>Roger</first><last>Zimmermann</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>1837–1848</pages>
       <abstract>The literature in automated sarcasm detection has mainly focused on lexical-, syntactic- and semantic-level analysis of text. However, a sarcastic sentence can be expressed with contextual presumptions, background and commonsense knowledge. In this paper, we propose a ContextuAl SarCasm DEtector (CASCADE), which adopts a hybrid approach of both content- and context-driven modeling for sarcasm detection in online social media discussions. For the latter, CASCADE aims at extracting contextual information from the discourse of a discussion thread. Also, since the sarcastic nature and form of expression can vary from person to person, CASCADE utilizes user embeddings that encode stylometric and personality features of users. When used along with content-based feature extractors such as convolutional neural networks, we see a significant boost in the classification performance on a large Reddit corpus.</abstract>
       <url hash="f8650474">C18-1156</url>
@@ -1718,7 +1718,7 @@
     <paper id="161">
       <title>Robust Lexical Features for Improved Neural Network Named-Entity Recognition</title>
       <author><first>Abbas</first><last>Ghaddar</last></author>
-      <author><first>Phillippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Phillippe</first><last>Langlais</last></author>
       <pages>1896–1907</pages>
       <abstract>Neural network approaches to Named-Entity Recognition reduce the need for carefully hand-crafted features. While some features do remain in state-of-the-art systems, lexical features have been mostly discarded, with the exception of gazetteers. In this work, we show that this is unfair: lexical features are actually quite useful. We propose to embed words and entity types into a low-dimensional vector space we train from annotated data produced by distant supervision thanks to Wikipedia. From this, we compute — offline — a feature vector representing each word. When used with a vanilla recurrent neural network model, this representation yields substantial improvements. We establish a new state-of-the-art F1 score of 87.95 on ONTONOTES 5.0, while matching state-of-the-art performance with a F1 score of 91.73 on the over-studied CONLL-2003 dataset.</abstract>
       <url hash="a6ffd855">C18-1161</url>
@@ -1783,7 +1783,7 @@
     <paper id="167">
       <title>Genre Identification and the Compositional Effect of Genre in Literature</title>
       <author><first>Joseph</first><last>Worsham</last></author>
-      <author><first>Jugal</first><last>Kalita</last></author>
+      <author id="jugal-kalita"><first>Jugal</first><last>Kalita</last></author>
       <pages>1963–1973</pages>
       <abstract>Recent advances in Natural Language Processing are finding ways to place an emphasis on the hierarchical nature of text instead of representing language as a flat sequence or unordered collection of words or letters. A human reader must capture multiple levels of abstraction and meaning in order to formulate an understanding of a document. In this paper, we address the problem of developing approaches which are capable of working with extremely large and complex literary documents to perform Genre Identification. The task is to assign the literary classification to a full-length book belonging to a corpus of literature, where the works on average are well over 200,000 words long and genre is an abstract thematic concept. We introduce the Gutenberg Dataset for Genre Identification. Additionally, we present a study on how current deep learning models compare to traditional methods for this task. The results are presented as a baseline along with findings on how using an ensemble of chapters can significantly improve results in deep learning methods. The motivation behind the ensemble of chapters method is discussed as the compositionality of subtexts which make up a larger work and contribute to the overall genre.</abstract>
       <url hash="e32490e4">C18-1167</url>
@@ -1793,7 +1793,7 @@
       <title>Transfer Learning for Entity Recognition of Novel Classes</title>
       <author><first>Juan Diego</first><last>Rodriguez</last></author>
       <author><first>Adam</first><last>Caldwell</last></author>
-      <author><first>Alexander</first><last>Liu</last></author>
+      <author id="alex-liu"><first>Alexander</first><last>Liu</last></author>
       <pages>1974–1985</pages>
       <abstract>In this reproduction paper, we replicate and extend several past studies on transfer learning for entity recognition. In particular, we are interested in entity recognition problems where the class labels in the source and target domains are different. Our work is the first direct comparison of these previously published approaches in this problem setting. In addition, we perform experiments on seven new source/target corpus pairs, nearly doubling the total number of corpus pairs that have been studied in all past work combined. Our results empirically demonstrate when each of the published approaches tends to do well. In particular, simpler approaches often work best when there is very little labeled target data, while neural transfer approaches tend to do better when there is more labeled target data.</abstract>
       <url hash="f4d6c8cd">C18-1168</url>
@@ -1834,7 +1834,7 @@
     <paper id="172">
       <title>Task-oriented Word Embedding for Text Classification</title>
       <author><first>Qian</first><last>Liu</last></author>
-      <author><first>Heyan</first><last>Huang</last></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last></author>
       <author><first>Yang</first><last>Gao</last></author>
       <author><first>Xiaochi</first><last>Wei</last></author>
       <author><first>Yuxin</first><last>Tian</last></author>
@@ -1849,7 +1849,7 @@
       <author><first>Jianyu</first><last>Zhao</last></author>
       <author><first>Zhiqiang</first><last>Zhan</last></author>
       <author><first>Qichuan</first><last>Yang</last></author>
-      <author id="yang-zhang"><first>Yang</first><last>Zhang</last></author>
+      <author><first>Yang</first><last>Zhang</last></author>
       <author><first>Changjian</first><last>Hu</last></author>
       <author><first>Zhensheng</first><last>Li</last></author>
       <author><first>Liuxin</first><last>Zhang</last></author>
@@ -1897,7 +1897,7 @@
     <paper id="177">
       <title>Improving Named Entity Recognition by Jointly Learning to Disambiguate Morphological Tags</title>
       <author><first>Onur</first><last>Güngör</last></author>
-      <author><first>Suzan</first><last>Uskudarli</last></author>
+      <author id="suzan-uskudarli"><first>Suzan</first><last>Uskudarli</last></author>
       <author><first>Tunga</first><last>Güngör</last></author>
       <pages>2082–2092</pages>
       <abstract>Previous studies have shown that linguistic features of a word such as possession, genitive or other grammatical cases can be employed in word representations of a named entity recognition (NER) tagger to improve the performance for morphologically rich languages. However, these taggers require external morphological disambiguation (MD) tools to function which are hard to obtain or non-existent for many languages. In this work, we propose a model which alleviates the need for such disambiguators by jointly learning NER and MD taggers in languages for which one can provide a list of candidate morphological analyses. We show that this can be done independent of the morphological annotation schemes, which differ among languages. Our experiments employing three different model architectures that join these two tasks show that joint learning improves NER performance. Furthermore, the morphological disambiguator’s performance is shown to be competitive.</abstract>
@@ -1917,7 +1917,7 @@
     </paper>
     <paper id="179">
       <title>An Analysis of Annotated Corpora for Emotion Classification in Text</title>
-      <author><first>Laura-Ana-Maria</first><last>Bostan</last></author>
+      <author id="laura-ana-maria-oberlander"><first>Laura-Ana-Maria</first><last>Bostan</last></author>
       <author><first>Roman</first><last>Klinger</last></author>
       <pages>2104–2119</pages>
       <abstract>Several datasets have been annotated and published for classification of emotions. They differ in several ways: (1) the use of different annotation schemata (e. g., discrete label sets, including joy, anger, fear, or sadness or continuous values including valence, or arousal), (2) the domain, and, (3) the file formats. This leads to several research gaps: supervised models often only use a limited set of available resources. Additionally, no previous work has compared emotion corpora in a systematic manner. We aim at contributing to this situation with a survey of the datasets, and aggregate them in a common file format with a common annotation schema. Based on this aggregation, we perform the first cross-corpus classification experiments in the spirit of future research enabled by this paper, in order to gain insight and a better understanding of differences of models inferred from the data. This work also simplifies the choice of the most appropriate resources for developing a model for a novel domain. One result from our analysis is that a subset of corpora is better classified with models trained on a different corpus. For none of the corpora, training on all data altogether is better than using a subselection of the resources. Our unified corpus is available at <url>http://www.ims.uni-stuttgart.de/data/unifyemotion</url>.</abstract>
@@ -1936,8 +1936,8 @@
     </paper>
     <paper id="181">
       <title>A Review on Deep Learning Techniques Applied to Answer Selection</title>
-      <author><first>Tuan Manh</first><last>Lai</last></author>
-      <author><first>Trung</first><last>Bui</last></author>
+      <author id="tuan-lai"><first>Tuan Manh</first><last>Lai</last></author>
+      <author id="trung-bui"><first>Trung</first><last>Bui</last></author>
       <author><first>Sheng</first><last>Li</last></author>
       <pages>2132–2144</pages>
       <abstract>Given a question and a set of candidate answers, answer selection is the task of identifying which of the candidates answers the question correctly. It is an important problem in natural language processing, with applications in many areas. Recently, many deep learning based methods have been proposed for the task. They produce impressive performance without relying on any feature engineering or expensive external resources. In this paper, we aim to provide a comprehensive review on deep learning methods applied to answer selection.</abstract>
@@ -1955,7 +1955,7 @@
     </paper>
     <paper id="183">
       <title>Distantly Supervised <fixed-case>NER</fixed-case> with Partial Annotation Learning and Reinforcement Learning</title>
-      <author><first>Yaosheng</first><last>Yang</last></author>
+      <author id="yaosheng-yang"><first>Yaosheng</first><last>Yang</last></author>
       <author><first>Wenliang</first><last>Chen</last></author>
       <author><first>Zhenghua</first><last>Li</last></author>
       <author><first>Zhengqiu</first><last>He</last></author>
@@ -2011,8 +2011,8 @@
       <title>Aspect-based summarization of pros and cons in unstructured product reviews</title>
       <author><first>Florian</first><last>Kunneman</last></author>
       <author><first>Sander</first><last>Wubben</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <pages>2219–2229</pages>
       <abstract>We developed three systems for generating pros and cons summaries of product reviews. Automating this task eases the writing of product reviews, and offers readers quick access to the most important information. We compared SynPat, a system based on syntactic phrases selected on the basis of valence scores, against a neural-network-based system trained to map bag-of-words representations of reviews directly to pros and cons, and the same neural system trained on clusters of word-embedding encodings of similar pros and cons. We evaluated the systems in two ways: first on held-out reviews with gold-standard pros and cons, and second by asking human annotators to rate the systems’ output on relevance and completeness. In the second evaluation, the gold-standard pros and cons were assessed along with the system output. We find that the human-generated summaries are not deemed as significantly more relevant or complete than the SynPat systems; the latter are scored higher than the human-generated summaries on a precision metric. The neural approaches yield a lower performance in the human assessment, and are outperformed by the baseline.</abstract>
       <url hash="1723f526">C18-1188</url>
@@ -2085,7 +2085,7 @@
       <title><fixed-case>G</fixed-case>raphene: Semantically-Linked Propositions in Open Information Extraction</title>
       <author><first>Matthias</first><last>Cetto</last></author>
       <author><first>Christina</first><last>Niklaus</last></author>
-      <author><first>André</first><last>Freitas</last></author>
+      <author id="andre-freitas"><first>André</first><last>Freitas</last></author>
       <author><first>Siegfried</first><last>Handschuh</last></author>
       <pages>2300–2311</pages>
       <abstract>We present an Open Information Extraction (IE) approach that uses a two-layered transformation stage consisting of a clausal disembedding layer and a phrasal disembedding layer, together with rhetorical relation identification. In that way, we convert sentences that present a complex linguistic structure into simplified, syntactically sound sentences, from which we can extract propositions that are represented in a two-layered hierarchy in the form of core relational tuples and accompanying contextual information which are semantically linked via rhetorical relations. In a comparative evaluation, we demonstrate that our reference implementation Graphene outperforms state-of-the-art Open IE systems in the construction of correct n-ary predicate-argument structures. Moreover, we show that existing Open IE approaches can benefit from the transformation process of our framework.</abstract>
@@ -2116,7 +2116,7 @@
       <author><first>Aakanksha</first><last>Naik</last></author>
       <author><first>Abhilasha</first><last>Ravichander</last></author>
       <author><first>Norman</first><last>Sadeh</last></author>
-      <author><first>Carolyn</first><last>Rose</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rose</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
       <pages>2340–2353</pages>
       <abstract>Natural language inference (NLI) is the task of determining if a natural language hypothesis can be inferred from a given premise in a justifiable manner. NLI was proposed as a benchmark task for natural language understanding. Existing models perform well at standard datasets for NLI, achieving impressive results across different genres of text. However, the extent to which these models understand the semantic content of sentences is unclear. In this work, we propose an evaluation methodology consisting of automatically constructed “stress tests” that allow us to examine whether systems have the ability to make real inferential decisions. Our evaluation of six sentence-encoder models on these stress tests reveals strengths and weaknesses of these models with respect to challenging linguistic phenomena, and suggests important directions for future work in this area.</abstract>
@@ -2128,11 +2128,11 @@
       <author><first>Hoa Trong</first><last>Vu</last></author>
       <author><first>Claudio</first><last>Greco</last></author>
       <author><first>Aliia</first><last>Erofeeva</last></author>
-      <author><first>Somayeh</first><last>Jafaritazehjan</last></author>
+      <author id="somayeh-jafaritazehjani"><first>Somayeh</first><last>Jafaritazehjan</last></author>
       <author><first>Guido</first><last>Linders</last></author>
       <author><first>Marc</first><last>Tanti</last></author>
       <author><first>Alberto</first><last>Testoni</last></author>
-      <author><first>Raffaella</first><last>Bernardi</last></author>
+      <author id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last></author>
       <author><first>Albert</first><last>Gatt</last></author>
       <pages>2354–2368</pages>
       <abstract>Capturing semantic relations between sentences, such as entailment, is a long-standing challenge for computational semantics. Logic-based models analyse entailment in terms of possible worlds (interpretations, or situations) where a premise P entails a hypothesis H iff in all worlds where P is true, H is also true. Statistical models view this relationship probabilistically, addressing it in terms of whether a human would likely infer H from P. In this paper, we wish to bridge these two perspectives, by arguing for a visually-grounded version of the Textual Entailment task. Specifically, we ask whether models can perform better if, in addition to P and H, there is also an image (corresponding to the relevant “world” or “situation”). We use a multimodal version of the SNLI dataset (Bowman et al., 2015) and we compare “blind” and visually-augmented models of textual entailment. We show that visual information is beneficial, but we also conduct an in-depth error analysis that reveals that current multimodal models are not performing “grounding” in an optimal fashion.</abstract>
@@ -2143,7 +2143,7 @@
       <title>Recurrent One-Hop Predictions for Reasoning over Knowledge Graphs</title>
       <author><first>Wenpeng</first><last>Yin</last></author>
       <author><first>Yadollah</first><last>Yaghoobzadeh</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>2369–2378</pages>
       <abstract>Large scale knowledge graphs (KGs) such as Freebase are generally incomplete. Reasoning over multi-hop (mh) KG paths is thus an important capability that is needed for question answering or other NLP tasks that require knowledge about the world. mh-KG reasoning includes diverse scenarios, e.g., given a head entity and a relation path, predict the tail entity; or given two entities connected by some relation paths, predict the unknown relation between them. We present ROPs, recurrent one-hop predictors, that predict entities at each step of mh-KB paths by using recurrent neural networks and vector representations of entities and relations, with two benefits: (i) modeling mh-paths of arbitrary lengths while updating the entity and relation representations by the training signal at each step; (ii) handling different types of mh-KG reasoning in a unified framework. Our models show state-of-the-art for two important multi-hop KG reasoning tasks: Knowledge Base Completion and Path Query Answering.</abstract>
       <url hash="8b98a76c">C18-1200</url>
@@ -2178,8 +2178,8 @@
       <title>Stance Detection with Hierarchical Attention Network</title>
       <author><first>Qingying</first><last>Sun</last></author>
       <author><first>Zhongqing</first><last>Wang</last></author>
-      <author><first>Qiaoming</first><last>Zhu</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>2399–2409</pages>
       <abstract>Stance detection aims to assign a stance label (for or against) to a post toward a specific target. Recently, there is a growing interest in using neural models to detect stance of documents. Most of these works model the sequence of words to learn document representation. However, much linguistic information, such as polarity and arguments of the document, is correlated with the stance of the document, and can inspire us to explore the stance. Hence, we present a neural model to fully employ various linguistic information to construct the document representation. In addition, since the influences of different linguistic information are different, we propose a hierarchical attention network to weigh the importance of various linguistic information, and learn the mutual attention between the document and the linguistic information. The experimental results on two datasets demonstrate the effectiveness of the proposed hierarchical attention neural model.</abstract>
       <url hash="724ab80e">C18-1203</url>
@@ -2198,7 +2198,7 @@
     <paper id="205">
       <title>Retrofitting Distributional Embeddings to Knowledge Graphs with Functional Relations</title>
       <author><first>Ben</first><last>Lengerich</last></author>
-      <author><first>Andrew</first><last>Maas</last></author>
+      <author id="andrew-maas"><first>Andrew</first><last>Maas</last></author>
       <author><first>Christopher</first><last>Potts</last></author>
       <pages>2423–2436</pages>
       <abstract>Knowledge graphs are a versatile framework to encode richly structured data relationships, but it can be challenging to combine these graphs with unstructured data. Methods for retrofitting pre-trained entity representations to the structure of a knowledge graph typically assume that entities are embedded in a connected space and that relations imply similarity. However, useful knowledge graphs often contain diverse entities and relations (with potentially disjoint underlying corpora) which do not accord with these assumptions. To overcome these limitations, we present Functional Retrofitting, a framework that generalizes current retrofitting methods by explicitly modeling pairwise relations. Our framework can directly incorporate a variety of pairwise penalty functions previously developed for knowledge graph completion. Further, it allows users to encode, learn, and extract information about relation semantics. We present both linear and neural instantiations of the framework. Functional Retrofitting significantly outperforms existing retrofitting methods on complex knowledge graphs and loses no accuracy on simpler graphs (in which relations do imply similarity). Finally, we demonstrate the utility of the framework by predicting new drug–disease treatment pairs in a large, complex health knowledge graph.</abstract>
@@ -2207,7 +2207,7 @@
     </paper>
     <paper id="206">
       <title>Context-Sensitive Generation of Open-Domain Conversational Responses</title>
-      <author><first>Weinan</first><last>Zhang</last></author>
+      <author id="weinan-zhang"><first>Weinan</first><last>Zhang</last></author>
       <author><first>Yiming</first><last>Cui</last></author>
       <author><first>Yifa</first><last>Wang</last></author>
       <author><first>Qingfu</first><last>Zhu</last></author>
@@ -2233,10 +2233,10 @@
     </paper>
     <paper id="208">
       <title>Synonymy in Bilingual Context: The <fixed-case>C</fixed-case>z<fixed-case>E</fixed-case>ng<fixed-case>C</fixed-case>lass Lexicon</title>
-      <author><first>Zdeňka</first><last>Urešová</last></author>
-      <author><first>Eva</first><last>Fučíková</last></author>
-      <author><first>Eva</first><last>Hajičová</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
+      <author id="zdenka-uresova"><first>Zdeňka</first><last>Urešová</last></author>
+      <author id="eva-fucikova"><first>Eva</first><last>Fučíková</last></author>
+      <author id="eva-hajicova"><first>Eva</first><last>Hajičová</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
       <pages>2456–2469</pages>
       <abstract>This paper describes CzEngClass, a bilingual lexical resource being built to investigate verbal synonymy in bilingual context and to relate semantic roles common to one synonym class to verb arguments (verb valency). In addition, the resource is linked to existing resources with the same of a similar aim: English and Czech WordNet, FrameNet, PropBank, VerbNet (SemLink), and valency lexicons for Czech and English (PDT-Vallex, Vallex, and EngVallex). There are several goals of this work and resource: (a) to provide gold standard data for automatic experiments in the future (such as automatic discovery of synonym classes, word sense disambiguation, assignment of classes to occurrences of verbs in text, coreferential linking of verb and event arguments in text, etc.), (b) to build a core (bilingual) lexicon linked to existing resources, for comparative studies and possibly for training automatic tools, and (c) to enrich the annotation of a parallel treebank, the Prague Czech English Dependency Treebank, which so far contained valency annotation but has not linked synonymous senses of verbs together. The method used for extracting the synonym classes is a semi-automatic process with a substantial amount of manual work during filtering, role assignment to classes and individual Class members’ arguments, and linking to the external lexical resources. We present the first version with 200 classes (about 1800 verbs) and evaluate interannotator agreement using several metrics.</abstract>
       <url hash="486ec8d4">C18-1208</url>
@@ -2257,7 +2257,7 @@
       <author><first>Andrew</first><last>Matteson</last></author>
       <author><first>Chanhee</first><last>Lee</last></author>
       <author><first>Youngbum</first><last>Kim</last></author>
-      <author><first>Heuiseok</first><last>Lim</last></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last></author>
       <pages>2482–2492</pages>
       <abstract>Due to the fact that Korean is a highly agglutinative, character-rich language, previous work on Korean morphological analysis typically employs the use of sub-character features known as graphemes or otherwise utilizes comprehensive prior linguistic knowledge (i.e., a dictionary of known morphological transformation forms, or actions). These models have been created with the assumption that character-level, dictionary-less morphological analysis was intractable due to the number of actions required. We present, in this study, a multi-stage action-based model that can perform morphological transformation and part-of-speech tagging using arbitrary units of input and apply it to the case of character-level Korean morphological analysis. Among models that do not employ prior linguistic knowledge, we achieve state-of-the-art word and sentence-level tagging accuracy with the Sejong Korean corpus using our proposed data-driven Bi-LSTM model.</abstract>
       <url hash="7210992c">C18-1210</url>
@@ -2276,7 +2276,7 @@
     <paper id="212">
       <title>Real-time Change Point Detection using On-line Topic Models</title>
       <author><first>Yunli</first><last>Wang</last></author>
-      <author><first>Cyril</first><last>Goutte</last></author>
+      <author id="cyril-goutte"><first>Cyril</first><last>Goutte</last></author>
       <pages>2505–2515</pages>
       <abstract>Detecting changes within an unfolding event in real time from news articles or social media enables to react promptly to serious issues in public safety, public health or natural disasters. In this study, we use on-line Latent Dirichlet Allocation (LDA) to model shifts in topics, and apply on-line change point detection (CPD) algorithms to detect when significant changes happen. We describe an on-line Bayesian change point detection algorithm that we use to detect topic changes from on-line LDA output. Extensive experiments on social media data and news articles show the benefits of on-line LDA versus standard LDA, and of on-line change point detection compared to off-line algorithms. This yields F-scores up to 52% on the detection of significant real-life changes from these document streams.</abstract>
       <url hash="c80417c9">C18-1212</url>
@@ -2313,7 +2313,7 @@
       <author><first>Luo</first><last>Si</last></author>
       <author><first>Xiaozhong</first><last>Liu</last></author>
       <author><first>Min</first><last>Zhang</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>2540–2550</pages>
       <abstract>Question-Answer (QA) matching is a fundamental task in the Natural Language Processing community. In this paper, we first build a novel QA matching corpus with informal text which is collected from a product reviewing website. Then, we propose a novel QA matching approach, namely One vs. Many Matching, which aims to address the novel scenario where one question sentence often has an answer with multiple sentences. Furthermore, we improve our matching approach by employing both word-level and sentence-level attentions for solving the noisy problem in the informal text. Empirical studies demonstrate the effectiveness of the proposed approach to question-answer matching.</abstract>
       <url hash="d2d8900c">C18-1215</url>
@@ -2333,7 +2333,7 @@
     <paper id="217">
       <title><fixed-case>U</fixed-case>rdu Word Segmentation using Conditional Random Fields (<fixed-case>CRF</fixed-case>s)</title>
       <author><first>Haris</first><last>Bin Zia</last></author>
-      <author><first>Agha Ali</first><last>Raza</last></author>
+      <author id="agha-ali-raza"><first>Agha Ali</first><last>Raza</last></author>
       <author><first>Awais</first><last>Athar</last></author>
       <pages>2562–2569</pages>
       <abstract>State-of-the-art Natural Language Processing algorithms rely heavily on efficient word segmentation. Urdu is amongst languages for which word segmentation is a complex task as it exhibits space omission as well as space insertion issues. This is partly due to the Arabic script which although cursive in nature, consists of characters that have inherent joining and non-joining attributes regardless of word boundary. This paper presents a word segmentation system for Urdu which uses a Conditional Random Field sequence modeler with orthographic, linguistic and morphological features. Our proposed model automatically learns to predict white space as word boundary as well as Zero Width Non-Joiner (ZWNJ) as sub-word boundary. Using a manually annotated corpus, our model achieves F1 score of 0.97 for word boundary identification and 0.85 for sub-word boundary identification tasks. We have made our code and corpus publicly available to make our results reproducible.</abstract>
@@ -2342,9 +2342,9 @@
     </paper>
     <paper id="218">
       <title><fixed-case>R</fixed-case>e<fixed-case>S</fixed-case>yf: a <fixed-case>F</fixed-case>rench lexicon with ranked synonyms</title>
-      <author><first>Mokhtar B.</first><last>Billami</last></author>
+      <author id="mokhtar-b-billami"><first>Mokhtar B.</first><last>Billami</last></author>
       <author><first>Thomas</first><last>François</last></author>
-      <author><first>Núria</first><last>Gala</last></author>
+      <author id="nuria-gala"><first>Núria</first><last>Gala</last></author>
       <pages>2570–2581</pages>
       <abstract>In this article, we present ReSyf, a lexical resource of monolingual synonyms ranked according to their difficulty to be read and understood by native learners of French. The synonyms come from an existing lexical network and they have been semantically disambiguated and refined. A ranking algorithm, based on a wide range of linguistic features and validated through an evaluation campaign with human annotators, automatically sorts the synonyms corresponding to a given word sense by reading difficulty. ReSyf is freely available and will be integrated into a web platform for reading assistance. It can also be applied to perform lexical simplification of French texts.</abstract>
       <url hash="75e06e6e">C18-1218</url>
@@ -2364,7 +2364,7 @@
     <paper id="220">
       <title>Learning Multilingual Topics from Incomparable Corpora</title>
       <author><first>Shudong</first><last>Hao</last></author>
-      <author><first>Michael J.</first><last>Paul</last></author>
+      <author id="michael-paul"><first>Michael J.</first><last>Paul</last></author>
       <pages>2595–2609</pages>
       <abstract>Multilingual topic models enable crosslingual tasks by extracting consistent topics from multilingual corpora. Most models require parallel or comparable training corpora, which limits their ability to generalize. In this paper, we first demystify the knowledge transfer mechanism behind multilingual topic models by defining an alternative but equivalent formulation. Based on this analysis, we then relax the assumption of training data required by most existing models, creating a model that only requires a dictionary for training. Experiments show that our new method effectively learns coherent multilingual topics from partially and fully incomparable corpora with limited amounts of dictionary resources.</abstract>
       <url hash="153ccaad">C18-1220</url>
@@ -2406,9 +2406,9 @@
     <paper id="224">
       <title>Automatically Extracting Qualia Relations for the Rich Event Ontology</title>
       <author><first>Ghazaleh</first><last>Kazeminejad</last></author>
-      <author><first>Claire</first><last>Bonial</last></author>
-      <author><first>Susan Windisch</first><last>Brown</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="claire-bonial"><first>Claire</first><last>Bonial</last></author>
+      <author id="susan-windisch-brown"><first>Susan Windisch</first><last>Brown</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>2644–2652</pages>
       <abstract>Commonsense, real-world knowledge about the events that entities or “things in the world” are typically involved in, as well as part-whole relationships, is valuable for allowing computational systems to draw everyday inferences about the world. Here, we focus on automatically extracting information about (1) the events that typically bring about certain entities (origins), (2) the events that are the typical functions of entities, and (3) part-whole relationships in entities. These correspond to the agentive, telic and constitutive qualia central to the Generative Lexicon. We describe our motivations and methods for extracting these qualia relations from the Suggested Upper Merged Ontology (SUMO) and show that human annotators overwhelmingly find the information extracted to be reasonable. Because ontologies provide a way of structuring this information and making it accessible to agents and computational systems generally, efforts are underway to incorporate the extracted information to an ontology hub of Natural Language Processing semantic role labeling resources, the Rich Event Ontology.</abstract>
       <url hash="2812d910">C18-1224</url>
@@ -2416,7 +2416,7 @@
     </paper>
     <paper id="225">
       <title><fixed-case>S</fixed-case>e<fixed-case>V</fixed-case>e<fixed-case>N</fixed-case>: Augmenting Word Embeddings with Unsupervised Relation Vectors</title>
-      <author><first>Luis</first><last>Espinosa-Anke</last></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa-Anke</last></author>
       <author><first>Steven</first><last>Schockaert</last></author>
       <pages>2653–2665</pages>
       <abstract>We present SeVeN (Semantic Vector Networks), a hybrid resource that encodes relationships between words in the form of a graph. Different from traditional semantic networks, these relations are represented as vectors in a continuous vector space. We propose a simple pipeline for learning such relation vectors, which is based on word vector averaging in combination with an ad hoc autoencoder. We show that by explicitly encoding relational information in a dedicated vector space we can capture aspects of word meaning that are complementary to what is captured by word embeddings. For example, by examining clusters of relation vectors, we observe that relational similarities can be identified at a more abstract level than with traditional word vector differences. Finally, we test the effectiveness of semantic vector networks in two tasks: measuring word similarity and neural text categorization. SeVeN is available at bitbucket.org/luisespinosa/seven.</abstract>
@@ -2426,7 +2426,7 @@
     <paper id="226">
       <title>Evaluation of Unsupervised Compositional Representations</title>
       <author><first>Hanan</first><last>Aldarmaki</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>2666–2677</pages>
       <abstract>We evaluated various compositional models, from bag-of-words representations to compositional RNN-based models, on several extrinsic supervised and unsupervised evaluation benchmarks. Our results confirm that weighted vector averaging can outperform context-sensitive models in most benchmarks, but structural features encoded in RNN models can also be useful in certain classification tasks. We analyzed some of the evaluation datasets to identify the aspects of meaning they measure and the characteristics of the various models that explain their performance variance.</abstract>
       <url hash="3a390ea4">C18-1226</url>
@@ -2435,7 +2435,7 @@
     <paper id="227">
       <title>Using Formulaic Expressions in Writing Assistance Systems</title>
       <author><first>Kenichi</first><last>Iwatsuki</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>2678–2689</pages>
       <abstract>Formulaic expressions (FEs) used in scholarly papers, such as ‘there has been little discussion about’, are helpful for non-native English speakers. However, it is time-consuming for users to manually search for an appropriate expression every time they want to consult FE dictionaries. For this reason, we tackle the task of semantic searches of FE dictionaries. At the start of our research, we identified two salient difficulties in this task. First, the paucity of example sentences in existing FE dictionaries results in a shortage of context information, which is necessary for acquiring semantic representation of FEs. Second, while a semantic category label is assigned to each FE in many FE dictionaries, it is difficult to predict the labels from user input, forcing users to manually designate the semantic category when searching. To address these difficulties, we propose a new framework for semantic searches of FEs and propose a new method to leverage both existing dictionaries and domain sentence corpora. Further, we expand an existing FE dictionary to consider building a more comprehensive and domain-specific FE dictionary and to verify the effectiveness of our method.</abstract>
       <url hash="b3becdfe">C18-1227</url>
@@ -2488,7 +2488,7 @@
       <author><first>Jingjing</first><last>Gong</last></author>
       <author><first>Xipeng</first><last>Qiu</last></author>
       <author><first>Shaojing</first><last>Wang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>2742–2752</pages>
       <abstract>While much progress has been made in how to encode a text sequence into a sequence of vectors, less attention has been paid to how to aggregate these preceding vectors (outputs of RNN/CNN) into fixed-size encoding vector. Usually, a simple max or average pooling is used, which is a bottom-up and passive way of aggregation and lack of guidance by task information. In this paper, we propose an aggregation mechanism to obtain a fixed-size encoding with a dynamic routing policy. The dynamic routing policy is dynamically deciding that what and how much information need be transferred from each word to the final encoding of the text sequence. Following the work of Capsule Network, we design two dynamic routing policies to aggregate the outputs of RNN/CNN encoding layer into a final encoding vector. Compared to the other aggregation methods, dynamic routing can refine the messages according to the state of final encoding vector. Experimental results on five text classification tasks show that our method outperforms other aggregating models by a significant margin. Related source code is released on our github page. Related source code is released on our github page.</abstract>
       <url hash="218e91b0">C18-1232</url>
@@ -2532,7 +2532,7 @@
       <author><first>Josep</first><last>Carmona</last></author>
       <author><first>Henrik</first><last>Leopold</last></author>
       <author><first>Jan</first><last>Mendling</last></author>
-      <author><first>Lluís</first><last>Padró</last></author>
+      <author id="lluis-padro"><first>Lluís</first><last>Padró</last></author>
       <pages>2791–2801</pages>
       <abstract>The Business Process Management (BPM) field focuses in the coordination of labor so that organizational processes are smoothly executed in a way that products and services are properly delivered. At the same time, NLP has reached a maturity level that enables its widespread application in many contexts, thanks to publicly available frameworks. In this position paper, we show how NLP has potential in raising the benefits of BPM practices at different levels. Instead of being exhaustive, we show selected key challenges were a successful application of NLP techniques would facilitate the automation of particular tasks that nowadays require a significant effort to accomplish. Finally, we report on applications that consider both the process perspective and its enhancement through NLP.</abstract>
       <url hash="55755a7e">C18-1236</url>
@@ -2543,7 +2543,7 @@
       <author><first>Tirthankar</first><last>Ghosal</last></author>
       <author><first>Vignesh</first><last>Edithal</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <author><first>George</first><last>Tsatsaronis</last></author>
       <author><first>Srinivasa Satya Sameer Kumar</first><last>Chivukula</last></author>
       <pages>2802–2813</pages>
@@ -2565,7 +2565,7 @@
       <author><first>Junwen</first><last>Duan</last></author>
       <author><first>Yue</first><last>Zhang</last></author>
       <author><first>Xiao</first><last>Ding</last></author>
-      <author><first>Ching-Yun</first><last>Chang</last></author>
+      <author id="ching-yun-chang"><first>Ching-Yun</first><last>Chang</last></author>
       <author><first>Ting</first><last>Liu</last></author>
       <pages>2823–2833</pages>
       <abstract>Texts from the Internet serve as important data sources for financial market modeling. Early statistical approaches rely on manually defined features to capture lexical, sentiment and event information, which suffers from feature sparsity. Recent work has considered learning dense representations for news titles and abstracts. Compared to news titles, full documents can contain more potentially helpful information, but also noise compared to events and sentences, which has been less investigated in previous work. To fill this gap, we propose a novel target-specific abstract-guided news document representation model. The model uses a target-sensitive representation of the news abstract to weigh sentences in the news content, so as to select and combine the most informative sentences for market modeling. Results show that document representations can give better performance for estimating cumulative abnormal returns of companies when compared to titles and abstracts. Our model is especially effective when it used to combine information from multiple document sources compared to the sentence-level baselines.</abstract>
@@ -2594,7 +2594,7 @@
       <title>Towards a unified framework for bilingual terminology extraction of single-word and multi-word terms</title>
       <author><first>Jingshu</first><last>Liu</last></author>
       <author><first>Emmanuel</first><last>Morin</last></author>
-      <author><first>Peña</first><last>Saldarriaga</last></author>
+      <author id="sebastian-pena-saldarriaga"><first>Peña</first><last>Saldarriaga</last></author>
       <pages>2855–2866</pages>
       <abstract>Extracting a bilingual terminology for multi-word terms from comparable corpora has not been widely researched. In this work we propose a unified framework for aligning bilingual terms independently of the term lengths. We also introduce some enhancements to the context-based and the neural network based approaches. Our experiments show the effectiveness of our enhancements of previous works and the system can be adapted in specialized domains.</abstract>
       <url hash="afe75288">C18-1242</url>
@@ -2622,7 +2622,7 @@
     </paper>
     <paper id="245">
       <title>Emotion Representation Mapping for Automatic Lexicon Construction (Mostly) Performs on Human Level</title>
-      <author><first>Sven</first><last>Buechel</last></author>
+      <author id="sven-buechel"><first>Sven</first><last>Buechel</last></author>
       <author><first>Udo</first><last>Hahn</last></author>
       <pages>2892–2904</pages>
       <abstract>Emotion Representation Mapping (ERM) has the goal to convert existing emotion ratings from one representation format into another one, e.g., mapping Valence-Arousal-Dominance annotations for words or sentences into Ekman’s Basic Emotions and vice versa. ERM can thus not only be considered as an alternative to Word Emotion Induction (WEI) techniques for automatic emotion lexicon construction but may also help mitigate problems that come from the proliferation of emotion representation formats in recent years. We propose a new neural network approach to ERM that not only outperforms the previous state-of-the-art. Equally important, we present a refined evaluation methodology and gather strong evidence that our model yields results which are (almost) as reliable as human annotations, even in cross-lingual settings. Based on these results we generate new emotion ratings for 13 typologically diverse languages and claim that they have near-gold quality, at least.</abstract>
@@ -2632,7 +2632,7 @@
     <paper id="246">
       <title>Emotion Detection and Classification in a Multigenre Corpus with Joint Multi-Task Deep Learning</title>
       <author><first>Shabnam</first><last>Tafreshi</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>2905–2913</pages>
       <abstract>Detection and classification of emotion categories expressed by a sentence is a challenging task due to subjectivity of emotion. To date, most of the models are trained and evaluated on single genre and when used to predict emotion in different genre their performance drops by a large margin. To address the issue of robustness, we model the problem within a joint multi-task learning framework. We train this model with a multigenre emotion corpus to predict emotions across various genre. Each genre is represented as a separate task, we use soft parameter shared layers across the various tasks. our experimental results show that this model improves the results across the various genres, compared to a single genre training in the same neural net architecture.</abstract>
       <url hash="b3e41017">C18-1246</url>
@@ -2642,7 +2642,7 @@
       <title>How emotional are you? Neural Architectures for Emotion Intensity Prediction in Microblogs</title>
       <author><first>Devang</first><last>Kulshreshtha</last></author>
       <author id="pranav-goel-umd"><first>Pranav</first><last>Goel</last></author>
-      <author><first>Anil</first><last>Kumar Singh</last></author>
+      <author id="anil-kumar-singh"><first>Anil</first><last>Kumar Singh</last></author>
       <pages>2914–2926</pages>
       <abstract>Social media based micro-blogging sites like Twitter have become a common source of real-time information (impacting organizations and their strategies, and are used for expressing emotions and opinions. Automated analysis of such content therefore rises in importance. To this end, we explore the viability of using deep neural networks on the specific task of emotion intensity prediction in tweets. We propose a neural architecture combining convolutional and fully connected layers in a non-sequential manner - done for the first time in context of natural language based tasks. Combined with lexicon-based features along with transfer learning, our model achieves state-of-the-art performance, outperforming the previous system by 0.044 or 4.4% Pearson correlation on the WASSA’17 EmoInt shared task dataset. We investigate the performance of deep multi-task learning models trained for all emotions at once in a unified architecture and get encouraging results. Experiments performed on evaluating correlation between emotion pairs offer interesting insights into the relationship between them.</abstract>
       <url hash="27aed75d">C18-1247</url>
@@ -2652,7 +2652,7 @@
       <title>Expressively vulgar: The socio-dynamics of vulgarity and its effects on sentiment analysis in social media</title>
       <author><first>Isabel</first><last>Cachola</last></author>
       <author><first>Eric</first><last>Holgate</last></author>
-      <author><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
+      <author id="daniel-preotiuc-pietro"><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
       <author><first>Junyi Jessy</first><last>Li</last></author>
       <pages>2927–2938</pages>
       <abstract>Vulgarity is a common linguistic expression and is used to perform several linguistic functions. Understanding their usage can aid both linguistic and psychological phenomena as well as benefit downstream natural language processing applications such as sentiment analysis. This study performs a large-scale, data-driven empirical analysis of vulgar words using social media data. We analyze the socio-cultural and pragmatic aspects of vulgarity using tweets from users with known demographics. Further, we collect sentiment ratings for vulgar tweets to study the relationship between the use of vulgar words and perceived sentiment and show that explicitly modeling vulgar words can boost sentiment analysis performance.</abstract>
@@ -2745,7 +2745,7 @@
     <paper id="257">
       <title>Adaptive Weighting for Neural Machine Translation</title>
       <author><first>Yachao</first><last>Li</last></author>
-      <author><first>Junhui</first><last>Li</last></author>
+      <author id="junhui-li"><first>Junhui</first><last>Li</last></author>
       <author><first>Min</first><last>Zhang</last></author>
       <pages>3038–3048</pages>
       <abstract>In the popular sequence to sequence (seq2seq) neural machine translation (NMT), there exist many weighted sum models (WSMs), each of which takes a set of input and generates one output. However, the weights in a WSM are independent of each other and fixed for all inputs, suggesting that by ignoring different needs of inputs, the WSM lacks effective control on the influence of each input. In this paper, we propose adaptive weighting for WSMs to control the contribution of each input. Specifically, we apply adaptive weighting for both GRU and the output state in NMT. Experimentation on Chinese-to-English translation and English-to-German translation demonstrates that the proposed adaptive weighting is able to much improve translation accuracy by achieving significant improvement of 1.49 and 0.92 BLEU points for the two translation tasks. Moreover, we discuss in-depth on what type of information is encoded in the encoder and how information influences the generation of target words in the decoder.</abstract>
@@ -2775,7 +2775,7 @@
     </paper>
     <paper id="260">
       <title>An Empirical Investigation of Error Types in <fixed-case>V</fixed-case>ietnamese Parsing</title>
-      <author><first>Quy</first><last>Nguyen</last></author>
+      <author id="quy-nguyen"><first>Quy</first><last>Nguyen</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
       <author><first>Hiroshi</first><last>Noji</last></author>
       <author><first>Nhung</first><last>Nguyen</last></author>
@@ -2795,15 +2795,15 @@
     </paper>
     <paper id="262">
       <title>Parallel Corpora for bi-lingual <fixed-case>E</fixed-case>nglish-<fixed-case>E</fixed-case>thiopian Languages Statistical Machine Translation</title>
-      <author><first>Solomon Teferra</first><last>Abate</last></author>
+      <author id="solomon-teferra-abate"><first>Solomon Teferra</first><last>Abate</last></author>
       <author><first>Michael</first><last>Melese</last></author>
-      <author><first>Martha Yifiru</first><last>Tachbelie</last></author>
+      <author id="martha-yifiru-tachbelie"><first>Martha Yifiru</first><last>Tachbelie</last></author>
       <author><first>Million</first><last>Meshesha</last></author>
       <author><first>Solomon</first><last>Atinafu</last></author>
       <author><first>Wondwossen</first><last>Mulugeta</last></author>
       <author><first>Yaregal</first><last>Assabie</last></author>
       <author><first>Hafte</first><last>Abera</last></author>
-      <author><first>Binyam</first><last>Ephrem</last></author>
+      <author id="binyam-ephrem-seyoum"><first>Binyam</first><last>Ephrem</last></author>
       <author><first>Tewodros</first><last>Abebe</last></author>
       <author><first>Wondimagegnhue</first><last>Tsegaye</last></author>
       <author><first>Amanuel</first><last>Lemma</last></author>
@@ -2845,7 +2845,7 @@
     <paper id="266">
       <title>deep<fixed-case>Q</fixed-case>uest: A Framework for Neural-based Quality Estimation</title>
       <author><first>Julia</first><last>Ive</last></author>
-      <author><first>Frédéric</first><last>Blain</last></author>
+      <author id="frederic-blain"><first>Frédéric</first><last>Blain</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>3146–3157</pages>
       <abstract>Predicting Machine Translation (MT) quality can help in many practical tasks such as MT post-editing. The performance of Quality Estimation (QE) methods has drastically improved recently with the introduction of neural approaches to the problem. However, thus far neural approaches have only been designed for word and sentence-level prediction. We present a neural framework that is able to accommodate neural QE approaches at these fine-grained levels and generalize them to the level of documents. We test the framework with two sentence-level neural QE approaches: a state of the art approach that requires extensive pre-training, and a new light-weight approach that we propose, which employs basic encoders. Our approach is significantly faster and yields performance improvements for a range of document-level quality estimation tasks. To our knowledge, this is the first neural architecture for document-level QE. In addition, for the first time we apply QE models to the output of both statistical and neural MT systems for a series of European languages and highlight the new challenges resulting from the use of neural MT.</abstract>
@@ -2856,7 +2856,7 @@
       <title>Butterfly Effects in Frame Semantic Parsing: impact of data processing on model ranking</title>
       <author><first>Alexandre</first><last>Kabbach</last></author>
       <author><first>Corentin</first><last>Ribeyre</last></author>
-      <author><first>Aurélie</first><last>Herbelot</last></author>
+      <author id="aurelie-herbelot"><first>Aurélie</first><last>Herbelot</last></author>
       <pages>3158–3169</pages>
       <abstract>Knowing the state-of-the-art for a particular task is an essential component of any computational linguistics investigation. But can we be truly confident that the current state-of-the-art is indeed the best performing model? In this paper, we study the case of frame semantic parsing, a well-established task with multiple shared datasets. We show that in spite of all the care taken to provide a standard evaluation resource, small variations in data processing can have dramatic consequences for ranking parser performance. This leads us to propose an open-source standardized processing pipeline, which can be shared and reused for robust model comparison.</abstract>
       <url hash="84547765">C18-1267</url>
@@ -2874,7 +2874,7 @@
     <paper id="269">
       <title>Sentence Weighting for Neural Machine Translation Domain Adaptation</title>
       <author><first>Shiqi</first><last>Zhang</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <pages>3181–3190</pages>
       <abstract>In this paper, we propose a new sentence weighting method for the domain adaptation of neural machine translation. We introduce a domain similarity metric to evaluate the relevance between a sentence and an available entire domain dataset. The similarity of each sentence to the target domain is calculated with various methods. The computed similarity is then integrated into the training objective to weight sentences. The adaptation results on both IWSLT Chinese-English TED task and a task with only synthetic training parallel data show that our sentence weighting method is able to achieve an significant improvement over strong baselines.</abstract>
       <url hash="244d75a7">C18-1269</url>
@@ -2905,7 +2905,7 @@
       <title>Revisiting the Hierarchical Multiscale <fixed-case>LSTM</fixed-case></title>
       <author><first>Ákos</first><last>Kádár</last></author>
       <author><first>Marc-Alexandre</first><last>Côté</last></author>
-      <author><first>Grzegorz</first><last>Chrupała</last></author>
+      <author id="grzegorz-chrupala"><first>Grzegorz</first><last>Chrupała</last></author>
       <author><first>Afra</first><last>Alishahi</last></author>
       <pages>3215–3227</pages>
       <abstract>Hierarchical Multiscale LSTM (Chung et. al., 2016) is a state-of-the-art language model that learns interpretable structure from character-level input. Such models can provide fertile ground for (cognitive) computational linguistics studies. However, the high complexity of the architecture, training and implementations might hinder its applicability. We provide a detailed reproduction and ablation study of the architecture, shedding light on some of the potential caveats of re-purposing complex deep-learning architectures. We further show that simplifying certain aspects of the architecture can in fact improve its performance. We also investigate the linguistic units (segments) learned by various levels of the model, and argue that their quality does not correlate with the overall performance of the model on language modeling.</abstract>
@@ -2917,7 +2917,7 @@
       <author><first>Chanhee</first><last>Lee</last></author>
       <author><first>Young-Bum</first><last>Kim</last></author>
       <author><first>Dongyub</first><last>Lee</last></author>
-      <author><first>Heuiseok</first><last>Lim</last></author>
+      <author id="heui-seok-lim"><first>Heuiseok</first><last>Lim</last></author>
       <pages>3228–3239</pages>
       <abstract>Generating character-level features is an important step for achieving good results in various natural language processing tasks. To alleviate the need for human labor in generating hand-crafted features, methods that utilize neural architectures such as Convolutional Neural Network (CNN) or Recurrent Neural Network (RNN) to automatically extract such features have been proposed and have shown great results. However, CNN generates position-independent features, and RNN is slow since it needs to process the characters sequentially. In this paper, we propose a novel method of using a densely connected network to automatically extract character-level features. The proposed method does not require any language or task specific assumptions, and shows robustness and effectiveness while being faster than CNN- or RNN-based methods. Evaluating this method on three sequence labeling tasks - slot tagging, Part-of-Speech (POS) tagging, and Named-Entity Recognition (NER) - we obtain state-of-the-art performance with a 96.62 F1-score and 97.73% accuracy on slot tagging and POS tagging, respectively, and comparable performance to the state-of-the-art 91.13 F1-score on NER.</abstract>
       <url hash="98450a6e">C18-1273</url>
@@ -2929,7 +2929,7 @@
       <author><first>Akihiro</first><last>Tamura</last></author>
       <author><first>Takashi</first><last>Ninomiya</last></author>
       <author><first>Hiroya</first><last>Takamura</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>3240–3250</pages>
       <abstract>This study proposes a new neural machine translation (NMT) model based on the encoder-decoder model that incorporates named entity (NE) tags of source-language sentences. Conventional NMT models have two problems enumerated as follows: (i) they tend to have difficulty in translating words with multiple meanings because of the high ambiguity, and (ii) these models’abilitytotranslatecompoundwordsseemschallengingbecausetheencoderreceivesaword, a part of the compound word, at each time step. To alleviate these problems, the encoder of the proposed model encodes the input word on the basis of its NE tag at each time step, which could reduce the ambiguity of the input word. Furthermore,the encoder introduces a chunk-level LSTM layer over a word-level LSTM layer and hierarchically encodes a source-language sentence to capture a compound NE as a chunk on the basis of the NE tags. We evaluate the proposed model on an English-to-Japanese translation task with the ASPEC, and English-to-Bulgarian and English-to-Romanian translation tasks with the Europarl corpus. The evaluation results show that the proposed model achieves up to 3.11 point improvement in BLEU.</abstract>
       <url hash="80f6054d">C18-1274</url>
@@ -2976,8 +2976,8 @@
     <paper id="278">
       <title>Integrating Question Classification and Deep Learning for improved Answer Selection</title>
       <author><first>Harish</first><last>Tayyar Madabushi</last></author>
-      <author><first>Mark</first><last>Lee</last></author>
-      <author><first>John</first><last>Barnden</last></author>
+      <author id="mark-lee"><first>Mark</first><last>Lee</last></author>
+      <author id="john-barnden"><first>John</first><last>Barnden</last></author>
       <pages>3283–3294</pages>
       <abstract>We present a system for Answer Selection that integrates fine-grained Question Classification with a Deep Learning model designed for Answer Selection. We detail the necessary changes to the Question Classification taxonomy and system, the creation of a new Entity Identification system and methods of highlighting entities to achieve this objective. Our experiments show that Question Classes are a strong signal to Deep Learning models for Answer Selection, and enable us to outperform the current state of the art in all variations of our experiments except one. In the best configuration, our MRR and MAP scores outperform the current state of the art by between 3 and 5 points on both versions of the TREC Answer Selection test set, a standard dataset for this task.</abstract>
       <url hash="163a08ce">C18-1278</url>
@@ -2990,7 +2990,7 @@
       <author><first>Min</first><last>Yang</last></author>
       <author><first>Yaliang</first><last>Li</last></author>
       <author><first>Nan</first><last>Du</last></author>
-      <author id="wei-fan"><first>Wei</first><last>Fan</last></author>
+      <author><first>Wei</first><last>Fan</last></author>
       <author><first>Kai</first><last>Lei</last></author>
       <pages>3295–3305</pages>
       <abstract>Answer selection is an important but challenging task. Significant progresses have been made in domains where a large amount of labeled training data is available. However, obtaining rich annotated data is a time-consuming and expensive process, creating a substantial barrier for applying answer selection models to a new domain which has limited labeled data. In this paper, we propose Knowledge-aware Attentive Network (KAN), a transfer learning framework for cross-domain answer selection, which uses the knowledge base as a bridge to enable knowledge transfer from the source domain to the target domains. Specifically, we design a knowledge module to integrate the knowledge-based representational learning into answer selection models. The learned knowledge-based representations are shared by source and target domains, which not only leverages large amounts of cross-domain data, but also benefits from a regularization effect that leads to more general representations to help tasks in new domains. To verify the effectiveness of our model, we use SQuAD-T dataset as the source domain and three other datasets (i.e., Yahoo QA, TREC QA and InsuranceQA) as the target domains. The experimental results demonstrate that KAN has remarkable applicability and generality, and consistently outperforms the strong competitors by a noticeable margin for cross-domain answer selection.</abstract>
@@ -3041,7 +3041,7 @@
       <author><first>Sebastian</first><last>Dungs</last></author>
       <author><first>Ahmet</first><last>Aker</last></author>
       <author><first>Norbert</first><last>Fuhr</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
       <pages>3360–3370</pages>
       <abstract>Prior manual studies of rumours suggested that crowd stance can give insights into the actual rumour veracity. Even though numerous studies of automatic veracity classification of social media rumours have been carried out, none explored the effectiveness of leveraging crowd stance to determine veracity. We use stance as an additional feature to those commonly used in earlier studies. We also model the veracity of a rumour using variants of Hidden Markov Models (HMM) and the collective stance information. This paper demonstrates that HMMs that use stance and tweets’ times as the only features for modelling true and false rumours achieve F1 scores in the range of 80%, outperforming those approaches where stance is used jointly with content and user based features.</abstract>
       <url hash="913e37df">C18-1284</url>
@@ -3061,7 +3061,7 @@
       <title>Predicting Stances from Social Media Posts using Factorization Machines</title>
       <author><first>Akira</first><last>Sasaki</last></author>
       <author><first>Kazuaki</first><last>Hanawa</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Kentaro</first><last>Inui</last></author>
       <pages>3381–3390</pages>
       <abstract>Social media provide platforms to express, discuss, and shape opinions about events and issues in the real world. An important step to analyze the discussions on social media and to assist in healthy decision-making is stance detection. This paper presents an approach to detect the stance of a user toward a topic based on their stances toward other topics and the social media posts of the user. We apply factorization machines, a widely used method in item recommendation, to model user preferences toward topics from the social media data. The experimental results demonstrate that users’ posts are useful to model topic preferences and therefore predict stances of silent users.</abstract>
@@ -3073,7 +3073,7 @@
       <author><first>Verónica</first><last>Pérez-Rosas</last></author>
       <author><first>Bennett</first><last>Kleinberg</last></author>
       <author><first>Alexandra</first><last>Lefevre</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>3391–3401</pages>
       <abstract>The proliferation of misleading information in everyday access media outlets such as social media feeds, news blogs, and online newspapers have made it challenging to identify trustworthy news sources, thus increasing the need for computational tools able to provide insights into the reliability of online content. In this paper, we focus on the automatic identification of fake content in online news. Our contribution is twofold. First, we introduce two novel datasets for the task of fake news detection, covering seven different news domains. We describe the collection, annotation, and validation process in detail and present several exploratory analyses on the identification of linguistic differences in fake and legitimate news content. Second, we conduct a set of learning experiments to build accurate fake news detectors, and show that we can achieve accuracies of up to 76%. In addition, we provide comparative analyses of the automatic and manual identification of fake news.</abstract>
       <url hash="f46ceb64">C18-1287</url>
@@ -3094,7 +3094,7 @@
       <author><first>Paul</first><last>Groth</last></author>
       <author><first>Mike</first><last>Lauruhn</last></author>
       <author><first>Antony</first><last>Scerri</last></author>
-      <author><first>Ron</first><last>Daniel Jr.</last></author>
+      <author id="ron-daniel-jr"><first>Ron</first><last>Daniel Jr.</last></author>
       <pages>3414–3423</pages>
       <abstract>Open Information Extraction (OIE) is the task of the unsupervised creation of structured information from text. OIE is often used as a starting point for a number of downstream tasks including knowledge base construction, relation extraction, and question answering. While OIE methods are targeted at being domain independent, they have been evaluated primarily on newspaper, encyclopedic or general web text. In this article, we evaluate the performance of OIE on scientific texts originating from 10 different disciplines. To do so, we use two state-of-the-art OIE systems using a crowd-sourcing approach. We find that OIE systems perform significantly worse on scientific text than encyclopedic text. We also provide an error analysis and suggest areas of work to reduce errors. Our corpus of sentences and judgments are made available.</abstract>
       <url hash="a954d92b">C18-1289</url>
@@ -3102,7 +3102,7 @@
     </paper>
     <paper id="290">
       <title>Simple Algorithms For Sentiment Analysis On Sentiment Rich, Data Poor Domains.</title>
-      <author><first>Prathusha</first><last>K Sarma</last></author>
+      <author id="prathusha-kameswara-sarma"><first>Prathusha</first><last>K Sarma</last></author>
       <author><first>William</first><last>Sethares</last></author>
       <pages>3424–3435</pages>
       <abstract>Standard word embedding algorithms learn vector representations from large corpora of text documents in an unsupervised fashion. However, the quality of word embeddings learned from these algorithms is affected by the size of training data sets. Thus, applications of these algorithms in domains with only moderate amounts of available data is limited. In this paper we introduce an algorithm that learns word embeddings jointly with a classifier. Our algorithm is called SWESA (Supervised Word Embeddings for Sentiment Analysis). SWESA leverages document label information to learn vector representations of words from a modest corpus of text documents by solving an optimization problem that minimizes a cost function with respect to both word embeddings and the weight vector used for classification. Experiments on several real world data sets show that SWESA has superior performance on domains with limited data, when compared to previously suggested approaches to word embeddings and sentiment analysis tasks.</abstract>
@@ -3112,7 +3112,7 @@
     <paper id="291">
       <title>Word-Level Loss Extensions for Neural Temporal Relation Classification</title>
       <author><first>Artuur</first><last>Leeuwenberg</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>3436–3447</pages>
       <abstract>Unsupervised pre-trained word embeddings are used effectively for many tasks in natural language processing to leverage unlabeled textual data. Often these embeddings are either used as initializations or as fixed word representations for task-specific classification models. In this work, we extend our classification model’s task loss with an unsupervised auxiliary loss on the word-embedding level of the model. This is to ensure that the learned word representations contain both task-specific features, learned from the supervised loss component, and more general features learned from the unsupervised loss component. We evaluate our approach on the task of temporal relation extraction, in particular, narrative containment relation extraction from clinical records, and show that continued training of the embeddings on the unsupervised objective together with the task objective gives better task-specific embeddings, and results in an improvement over the state of the art on the THYME dataset, using only a general-domain part-of-speech tagger as linguistic resource.</abstract>
       <url hash="92b1cb4c">C18-1291</url>
@@ -3121,7 +3121,7 @@
     <paper id="292">
       <title>Personalized Text Retrieval for Learners of <fixed-case>C</fixed-case>hinese as a Foreign Language</title>
       <author><first>Chak Yan</first><last>Yeung</last></author>
-      <author><first>John</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
       <pages>3448–3455</pages>
       <abstract>This paper describes a personalized text retrieval algorithm that helps language learners select the most suitable reading material in terms of vocabulary complexity. The user first rates their knowledge of a small set of words, chosen by a graph-based active learning model. The system trains a complex word identification model on this set, and then applies the model to find texts that contain the desired proportion of new, challenging, and familiar vocabulary. In an evaluation on learners of Chinese as a foreign language, we show that this algorithm is effective in identifying simpler texts for low-proficiency learners, and more challenging ones for high-proficiency learners.</abstract>
       <url hash="64ad1f0e">C18-1292</url>
@@ -3130,7 +3130,7 @@
     <paper id="293">
       <title>Punctuation as Native Language Interference</title>
       <author><first>Ilia</first><last>Markov</last></author>
-      <author><first>Vivi</first><last>Nastase</last></author>
+      <author id="vivi-nastase"><first>Vivi</first><last>Nastase</last></author>
       <author><first>Carlo</first><last>Strapparava</last></author>
       <pages>3456–3466</pages>
       <abstract>In this paper, we describe experiments designed to explore and evaluate the impact of punctuation marks on the task of native language identification. Punctuation is specific to each language, and is part of the indicators that overtly represent the manner in which each language organizes and conveys information. Our experiments are organized in various set-ups: the usual multi-class classification for individual languages, also considering classification by language groups, across different proficiency levels, topics and even cross-corpus. The results support our hypothesis that punctuation marks are persistent and robust indicators of the native language of the author, which do not diminish in influence even when a high proficiency level in a non-native language is achieved.</abstract>
@@ -3141,7 +3141,7 @@
       <title>Investigating Productive and Receptive Knowledge: A Profile for Second Language Learning</title>
       <author><first>Leonardo</first><last>Zilio</last></author>
       <author><first>Rodrigo</first><last>Wilkens</last></author>
-      <author><first>Cédrick</first><last>Fairon</last></author>
+      <author id="cedrick-fairon"><first>Cédrick</first><last>Fairon</last></author>
       <pages>3467–3478</pages>
       <abstract>The literature frequently addresses the differences in receptive and productive vocabulary, but grammar is often left unacknowledged in second language acquisition studies. In this paper, we used two corpora to investigate the divergences in the behavior of pedagogically relevant grammatical structures in reception and production texts. We further improved the divergence scores observed in this investigation by setting a polarity to them that indicates whether there is overuse or underuse of a grammatical structure by language learners. This led to the compilation of a language profile that was later combined with vocabulary and readability features for classifying reception and production texts in three classes: beginner, intermediate, and advanced. The results of the automatic classification task in both production (0.872 of F-measure) and reception (0.942 of F-measure) were comparable to the current state of the art. We also attempted to automatically attribute a score to texts produced by learners, and the correlation results were encouraging, but there is still a good amount of room for improvement in this task. The developed language profile will serve as input for a system that helps language learners to activate more of their passive knowledge in writing texts.</abstract>
       <url hash="45c4d62f">C18-1294</url>
@@ -3163,8 +3163,8 @@
       <author><first>Sheng</first><last>Xu</last></author>
       <author><first>Xiaomin</first><last>Chu</last></author>
       <author><first>Peifeng</first><last>Li</last></author>
-      <author><first>Qiaoming</first><last>Zhu</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>3493–3504</pages>
       <abstract>In view of the differences between the annotations of micro and macro discourse rela-tionships, this paper describes the relevant experiments on the construction of the Macro Chinese Discourse Treebank (MCDTB), a higher-level Chinese discourse corpus. Fol-lowing RST (Rhetorical Structure Theory), we annotate the macro discourse information, including discourse structure, nuclearity and relationship, and the additional discourse information, including topic sentences, lead and abstract, to make the macro discourse annotation more objective and accurate. Finally, we annotated 720 articles with a Kappa value greater than 0.6. Preliminary experiments on this corpus verify the computability of MCDTB.</abstract>
       <url hash="2966ec85">C18-1296</url>
@@ -3183,7 +3183,7 @@
     </paper>
     <paper id="298">
       <title>Bridging resolution: Task definition, corpus resources and rule-based experiments</title>
-      <author><first>Ina</first><last>Roesiger</last></author>
+      <author id="ina-roesiger"><first>Ina</first><last>Roesiger</last></author>
       <author><first>Arndt</first><last>Riester</last></author>
       <author><first>Jonas</first><last>Kuhn</last></author>
       <pages>3516–3528</pages>
@@ -3208,7 +3208,7 @@
       <title><fixed-case>ISO</fixed-case>-Standard Domain-Independent Dialogue Act Tagging for Conversational Agents</title>
       <author><first>Stefano</first><last>Mezza</last></author>
       <author><first>Alessandra</first><last>Cervone</last></author>
-      <author><first>Evgeny</first><last>Stepanov</last></author>
+      <author id="evgeny-stepanov"><first>Evgeny</first><last>Stepanov</last></author>
       <author><first>Giuliano</first><last>Tortoreto</last></author>
       <author><first>Giuseppe</first><last>Riccardi</last></author>
       <pages>3539–3551</pages>
@@ -3265,8 +3265,8 @@
       <author><first>Yu</first><last>Zhou</last></author>
       <author><first>Jiajun</first><last>Zhang</last></author>
       <author><first>Liang</first><last>Zhao</last></author>
-      <author><first>Mei-Yuh</first><last>Hwang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="mei-yuh-hwang"><first>Mei-Yuh</first><last>Hwang</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>3597–3607</pages>
       <abstract>To deploy a spoken language understanding (SLU) model to a new language, language transferring is desired to avoid the trouble of acquiring and labeling a new big SLU corpus. An SLU corpus is a monolingual corpus with domain/intent/slot labels. Translating the original SLU corpus into the target language is an attractive strategy. However, SLU corpora consist of plenty of semantic labels (slots), which general-purpose translators cannot handle well, not to mention additional culture differences. This paper focuses on the language transferring task given a small in-domain parallel SLU corpus. The in-domain parallel corpus can be used as the first adaptation on the general translator. But more importantly, we show how to use reinforcement learning (RL) to further adapt the adapted translator, where translated sentences with more proper slot tags receive higher rewards. Our reward is derived from the source input sentence exclusively, unlike reward via actor-critical methods or computing reward with a ground truth target sentence. Hence we can adapt the translator the second time, using the big monolingual SLU corpus from the source language. We evaluate our approach on Chinese to English language transferring for SLU systems. The experimental results show that the generated English SLU corpus via adaptation and reinforcement learning gives us over 97% in the slot F1 score and over 84% accuracy in domain classification. It demonstrates the effectiveness of the proposed language transferring method. Compared with naive translation, our proposed method improves domain classification accuracy by relatively 22%, and the slot filling F1 score by relatively more than 71%.</abstract>
       <url hash="ece1d29b">C18-1305</url>
@@ -3288,7 +3288,7 @@
     <paper id="307">
       <title>Adaptive Multi-Task Transfer Learning for <fixed-case>C</fixed-case>hinese Word Segmentation in Medical Text</title>
       <author><first>Junjie</first><last>Xing</last></author>
-      <author><first>Kenny</first><last>Zhu</last></author>
+      <author id="kenny-zhu"><first>Kenny</first><last>Zhu</last></author>
       <author><first>Shaodian</first><last>Zhang</last></author>
       <pages>3619–3630</pages>
       <abstract>Chinese word segmentation (CWS) trained from open source corpus faces dramatic performance drop when dealing with domain text, especially for a domain with lots of special terms and diverse writing styles, such as the biomedical domain. However, building domain-specific CWS requires extremely high annotation cost. In this paper, we propose an approach by exploiting domain-invariant knowledge from high resource to low resource domains. Extensive experiments show that our model achieves consistently higher accuracy than the single-task CWS and other transfer learning baselines, especially when there is a large disparity between source and target domains.</abstract>
@@ -3309,7 +3309,7 @@
       <title>Graph Based Decoding for Event Sequencing and Coreference Resolution</title>
       <author><first>Zhengzhong</first><last>Liu</last></author>
       <author><first>Teruko</first><last>Mitamura</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>3645–3657</pages>
       <abstract>Events in text documents are interrelated in complex ways. In this paper, we study two types of relation: Event Coreference and Event Sequencing. We show that the popular tree-like decoding structure for automated Event Coreference is not suitable for Event Sequencing. To this end, we propose a graph-based decoding algorithm that is applicable to both tasks. The new decoding algorithm supports flexible feature sets for both tasks. Empirically, our event coreference system has achieved state-of-the-art performance on the TAC-KBP 2015 event coreference task and our event sequencing system beats a strong temporal-based, oracle-informed baseline. We discuss the challenges of studying these event relations.</abstract>
       <url hash="86b41796">C18-1309</url>
@@ -3320,7 +3320,7 @@
       <author><first>Emiel</first><last>van Miltenburg</last></author>
       <author><first>Ákos</first><last>Kádár</last></author>
       <author><first>Ruud</first><last>Koolen</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <pages>3658–3669</pages>
       <abstract>We present a corpus of spoken Dutch image descriptions, paired with two sets of eye-tracking data: Free viewing, where participants look at images without any particular purpose, and Description viewing, where we track eye movements while participants produce spoken descriptions of the images they are viewing. This paper describes the data collection procedure and the corpus itself, and provides an initial analysis of self-corrections in image descriptions. We also present two studies showing the potential of this data. Though these studies mainly serve as an example, we do find two interesting results: (1) the eye-tracking data for the description viewing task is more coherent than for the free-viewing task; (2) variation in image descriptions (also called ‘image specificity’; Jas and Parikh, 2015) is only moderately correlated across different languages. Our corpus can be used to gain a deeper understanding of the image description task, particularly how visual attention is correlated with the image description process.</abstract>
       <url hash="d41b1609">C18-1310</url>
@@ -3328,7 +3328,7 @@
     </paper>
     <paper id="311">
       <title>Narrative Schema Stability in News Text</title>
-      <author><first>Dan</first><last>Simonson</last></author>
+      <author id="dan-simonson"><first>Dan</first><last>Simonson</last></author>
       <author><first>Anthony</first><last>Davis</last></author>
       <pages>3670–3680</pages>
       <abstract>We investigate the stability of narrative schemas (Chambers and Jurafsky, 2009) automatically induced from a news corpus, representing recurring narratives in a corpus. If such techniques produce meaningful results, we should expect that small changes to the corpus will result in only small changes to the induced schemas. We describe experiments involving successive ablation of a corpus and cross-validation at each stage of ablation, on schemas generated by three different techniques over a general news corpus and topically-specific subcorpora. We also develop a method for evaluating the similarity between sets of narrative schemas, and thus the stability of the schema induction algorithms. This stability analysis affirms the heterogeneous/homogeneous document category hypothesis first presented in Simonson and Davis (2016), whose technique is problematically limited. Additionally, increased ablation leads to increasing stability, so the smaller the remaining corpus, the more stable schema generation appears to be. We surmise that as a corpus grows larger, novel and more varied narratives continue to appear and stability declines, though at some point this decline levels off as new additions to the corpus consist essentially of “more of the same.”</abstract>
@@ -3338,7 +3338,7 @@
     <paper id="312">
       <title><fixed-case>NIPS</fixed-case> Conversational Intelligence Challenge 2017 Winner System: Skill-based Conversational Agent with Supervised Dialog Manager</title>
       <author><first>Idris</first><last>Yusupov</last></author>
-      <author><first>Yurii</first><last>Kuratov</last></author>
+      <author id="yurii-kuratov"><first>Yurii</first><last>Kuratov</last></author>
       <pages>3681–3692</pages>
       <abstract>We present bot#1337: a dialog system developed for the 1st NIPS Conversational Intelligence Challenge 2017 (ConvAI). The aim of the competition was to implement a bot capable of conversing with humans based on a given passage of text. To enable conversation, we implemented a set of skills for our bot, including chit-chat, topic detection, text summarization, question answering and question generation. The system has been trained in a supervised setting using a dialogue manager to select an appropriate skill for generating a response. The latter allows a developer to focus on the skill implementation rather than the finite state machine based dialog manager. The proposed system bot#1337 won the competition with an average dialogue quality score of 2.78 out of 5 given by human evaluators. Source code and trained models for the bot#1337 are available on GitHub.</abstract>
       <url hash="6ca8c07c">C18-1312</url>
@@ -3351,7 +3351,7 @@
       <author><first>Kira</first><last>Griffitt</last></author>
       <author><first>Ulf</first><last>Hermjakob</last></author>
       <author><first>Kevin</first><last>Knight</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>3693–3702</pages>
       <abstract>There are few corpora that endeavor to represent the semantic content of entire documents. We present a corpus that accomplishes one way of capturing document level semantics, by annotating coreference and similar phenomena (bridging and implicit roles) on top of gold Abstract Meaning Representations of sentence-level semantics. We present a new corpus of this annotation, with analysis of its quality, alongside a plausible baseline for comparison. It is hoped that this Multi-Sentence AMR corpus (MS-AMR) may become a feasible method for developing rich representations of document meaning, useful for tasks such as information extraction and question answering.</abstract>
       <url hash="2f0f230e">C18-1313</url>
@@ -3364,7 +3364,7 @@
       <author><first>Xiangkun</first><last>Hu</last></author>
       <author id="yang-liu-icsi"><first>Yang</first><last>Liu</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>3703–3714</pages>
       <abstract>In this paper, we investigate the issue of persuasiveness evaluation for argumentative comments. Most of the existing research explores different text features of reply comments on word level and ignores interactions between participants. In general, viewpoints are usually expressed by multiple arguments and exchanged on argument level. To better model the process of dialogical argumentation, we propose a novel co-attention mechanism based neural network to capture the interactions between participants on argument level. Experimental results on a publicly available dataset show that the proposed model significantly outperforms some state-of-the-art methods for persuasiveness evaluation. Further analysis reveals that attention weights computed in our model are able to extract interactive argument pairs from the original post and the reply.</abstract>
       <url hash="fdd16bc0">C18-1314</url>
@@ -3372,7 +3372,7 @@
     </paper>
     <paper id="315">
       <title>Learning Visually-Grounded Semantics from Contrastive Adversarial Samples</title>
-      <author><first>Haoyue</first><last>Shi</last></author>
+      <author id="freda-shi"><first>Haoyue</first><last>Shi</last></author>
       <author><first>Jiayuan</first><last>Mao</last></author>
       <author><first>Tete</first><last>Xiao</last></author>
       <author><first>Yuning</first><last>Jiang</last></author>
@@ -3409,7 +3409,7 @@
       <author><first>Henning</first><last>Wachsmuth</last></author>
       <author><first>Manfred</first><last>Stede</last></author>
       <author><first>Roxanne</first><last>El Baff</last></author>
-      <author><first>Khalid</first><last>Al-Khatib</last></author>
+      <author id="khalid-al-khatib"><first>Khalid</first><last>Al-Khatib</last></author>
       <author><first>Maria</first><last>Skeppstedt</last></author>
       <author><first>Benno</first><last>Stein</last></author>
       <pages>3753–3765</pages>
@@ -3422,7 +3422,7 @@
       <author><first>Suman</first><last>Banerjee</last></author>
       <author><first>Nikita</first><last>Moghe</last></author>
       <author><first>Siddhartha</first><last>Arora</last></author>
-      <author><first>Mitesh M.</first><last>Khapra</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh M.</first><last>Khapra</last></author>
       <pages>3766–3780</pages>
       <abstract>There is an increasing demand for goal-oriented conversation systems which can assist users in various day-to-day activities such as booking tickets, restaurant reservations, shopping, etc. Most of the existing datasets for building such conversation systems focus on monolingual conversations and there is hardly any work on multilingual and/or code-mixed conversations. Such datasets and systems thus do not cater to the multilingual regions of the world, such as India, where it is very common for people to speak more than one language and seamlessly switch between them resulting in code-mixed conversations. For example, a Hindi speaking user looking to book a restaurant would typically ask, “Kya tum is restaurant mein ek table book karne mein meri help karoge?” (“Can you help me in booking a table at this restaurant?”). To facilitate the development of such code-mixed conversation models, we build a goal-oriented dialog dataset containing code-mixed conversations. Specifically, we take the text from the DSTC2 restaurant reservation dataset and create code-mixed versions of it in Hindi-English, Bengali-English, Gujarati-English and Tamil-English. We also establish initial baselines on this dataset using existing state of the art models. This dataset along with our baseline implementations will be made publicly available for research purposes.</abstract>
       <url hash="3d5a59e9">C18-1319</url>
@@ -3497,7 +3497,7 @@
       <title>A Survey on Open Information Extraction</title>
       <author><first>Christina</first><last>Niklaus</last></author>
       <author><first>Matthias</first><last>Cetto</last></author>
-      <author><first>André</first><last>Freitas</last></author>
+      <author id="andre-freitas"><first>André</first><last>Freitas</last></author>
       <author><first>Siegfried</first><last>Handschuh</last></author>
       <pages>3866–3878</pages>
       <abstract>We provide a detailed overview of the various approaches that were proposed to date to solve the task of Open Information Extraction. We present the major challenges that such systems face, show the evolution of the suggested approaches over time and depict the specific issues they address. In addition, we provide a critique of the commonly applied evaluation procedures for assessing the performance of Open IE systems and highlight some directions for future work.</abstract>
@@ -3563,7 +3563,7 @@
     </frontmatter>
     <paper id="1">
       <title>Abbreviation Expander - a Web-based System for Easy Reading of Technical Documents</title>
-      <author><first>Manuel R.</first><last>Ciosici</last></author>
+      <author id="manuel-r-ciosici"><first>Manuel R.</first><last>Ciosici</last></author>
       <author><first>Ira</first><last>Assent</last></author>
       <pages>1–4</pages>
       <abstract>Abbreviations and acronyms are a part of textual communication in most domains. However, abbreviations are not necessarily defined in documents that employ them. Understanding all abbreviations used in a given document often requires extensive knowledge of the target domain and the ability to disambiguate based on context. This creates considerable entry barriers to newcomers and difficulties in automated document processing. Existing abbreviation expansion systems or tools require substantial technical knowledge for set up or make strong assumptions which limit their use in practice. Here, we present Abbreviation Expander, a system that builds on state of the art methods for identification of abbreviations, acronyms and their definitions and a novel disambiguator for abbreviation expansion in an easily accessible web-based solution.</abstract>
@@ -3585,7 +3585,7 @@
     <paper id="3">
       <title><fixed-case>J</fixed-case>e<fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>: Interleaving Semantics and Emotions in a Web Service for the Exploration of Language Change Phenomena</title>
       <author><first>Johannes</first><last>Hellrich</last></author>
-      <author><first>Sven</first><last>Buechel</last></author>
+      <author id="sven-buechel"><first>Sven</first><last>Buechel</last></author>
       <author><first>Udo</first><last>Hahn</last></author>
       <pages>10–14</pages>
       <abstract>We here introduce a substantially extended version of JeSemE, an interactive website for visually exploring computationally derived time-variant information on word meanings and lexical emotions assembled from five large diachronic text corpora. JeSemE is designed for scholars in the (digital) humanities as an alternative to consulting manually compiled, printed dictionaries for such information (if available at all). This tool uniquely combines state-of-the-art distributional semantics with a nuanced model of human emotions, two information streams we deem beneficial for a data-driven interpretation of texts in the humanities.</abstract>
@@ -3607,11 +3607,11 @@
     <paper id="5">
       <title>A <fixed-case>K</fixed-case>orean Knowledge Extraction System for Enriching a <fixed-case>KB</fixed-case>ox</title>
       <author><first>Sangha</first><last>Nam</last></author>
-      <author><first>Eun-kyung</first><last>Kim</last></author>
+      <author id="eun-kyung-kim"><first>Eun-kyung</first><last>Kim</last></author>
       <author><first>Jiho</first><last>Kim</last></author>
       <author><first>Yoosung</first><last>Jung</last></author>
       <author><first>Kijong</first><last>Han</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <pages>20–24</pages>
       <abstract>The increased demand for structured knowledge has created considerable interest in knowledge extraction from natural language sentences. This study presents a new Korean knowledge extraction system and web interface for enriching a KBox knowledge base that expands based on the Korean DBpedia. The aim is to create an endpoint where knowledge can be extracted and added to KBox anytime and anywhere.</abstract>
       <url hash="45432aaa">C18-2005</url>
@@ -3657,7 +3657,7 @@
     <paper id="10">
       <title><fixed-case>CRST</fixed-case>: a Claim Retrieval System in <fixed-case>T</fixed-case>witter</title>
       <author><first>Wenjia</first><last>Ma</last></author>
-      <author><first>WenHan</first><last>Chao</last></author>
+      <author id="wenhan-chao"><first>WenHan</first><last>Chao</last></author>
       <author><first>Zhunchen</first><last>Luo</last></author>
       <author><first>Xin</first><last>Jiang</last></author>
       <pages>43–47</pages>
@@ -3667,10 +3667,10 @@
     </paper>
     <paper id="11">
       <title>Utilizing Graph Measure to Deduce Omitted Entities in Paragraphs</title>
-      <author><first>Eun-kyung</first><last>Kim</last></author>
+      <author id="eun-kyung-kim"><first>Eun-kyung</first><last>Kim</last></author>
       <author><first>Kijong</first><last>Han</last></author>
       <author><first>Jiho</first><last>Kim</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <pages>48–52</pages>
       <abstract>This demo deals with the problem of capturing omitted arguments in relation extraction given a proper knowledge base for entities of interest. This paper introduces the concept of a salient entity and use this information to deduce omitted entities in the paragraph which allows improving the relation extraction quality. The main idea to compute salient entities is to construct a graph on the given information (by identifying the entities but without parsing it), rank it with standard graph measures and embed it in the context of the sentences.</abstract>
       <url hash="bbc86423">C18-2011</url>
@@ -3678,7 +3678,7 @@
     </paper>
     <paper id="12">
       <title>Transparent, Efficient, and Robust Word Embedding Access with <fixed-case>WOMBAT</fixed-case></title>
-      <author><first>Mark-Christoph</first><last>Müller</last></author>
+      <author id="mark-christoph-muller"><first>Mark-Christoph</first><last>Müller</last></author>
       <author><first>Michael</first><last>Strube</last></author>
       <pages>53–57</pages>
       <abstract>We present WOMBAT, a Python tool which supports NLP practitioners in accessing word embeddings from code. WOMBAT addresses common research problems, including unified access, scaling, and robust and reproducible preprocessing. Code that uses WOMBAT for accessing word embeddings is not only cleaner, more readable, and easier to reuse, but also much more efficient than code using standard in-memory methods: a Python script using WOMBAT for evaluating seven large word embedding collections (8.7M embedding vectors in total) on a simple SemEval sentence similarity task involving 250 raw sentence pairs completes in under ten seconds end-to-end on a standard notebook computer.</abstract>
@@ -3717,8 +3717,8 @@
     </paper>
     <paper id="15">
       <title>Simulating Language Evolution: a Tool for Historical Linguistics</title>
-      <author><first>Alina Maria</first><last>Ciobanu</last></author>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="alina-maria-ciobanu"><first>Alina Maria</first><last>Ciobanu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <pages>68–72</pages>
       <abstract>Language change across space and time is one of the main concerns in historical linguistics. In this paper, we develop a language evolution simulator: a web-based tool for word form production to assist in historical linguistics, in studying the evolution of the languages. Given a word in a source language, the system automatically predicts how the word evolves in a target language. The method that we propose is language-agnostic and does not use any external knowledge, except for the training word pairs.</abstract>
       <url hash="71c8145e">C18-2015</url>
@@ -3751,7 +3751,7 @@
       <author><first>Yu-Chun</first><last>Lo</last></author>
       <author><first>Jhih-Jie</first><last>Chen</last></author>
       <author><first>Chingyu</first><last>Yang</last></author>
-      <author><first>Jason</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason</first><last>Chang</last></author>
       <pages>82–85</pages>
       <abstract>This paper presents a grammatical error correction (GEC) system that provides corrective feedback for essays. We apply the sequence-to-sequence model, which is frequently used in machine translation and text summarization, to this GEC task. The model is trained by EF-Cambridge Open Language Database (EFCAMDAT), a large learner corpus annotated with grammatical errors and corrections. Evaluation shows that our system achieves competitive performance on a number of publicly available testsets.</abstract>
       <url hash="139f7b65">C18-2018</url>
@@ -3772,12 +3772,12 @@
       <author><first>Markus</first><last>Müller</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
       <author><first>Thai-Son</first><last>Nguyen</last></author>
-      <author><first>Ngoc-Quan</first><last>Pham</last></author>
+      <author id="ngoc-quan-pham"><first>Ngoc-Quan</first><last>Pham</last></author>
       <author><first>Elizabeth</first><last>Salesky</last></author>
       <author><first>Matthias</first><last>Sperber</last></author>
-      <author><first>Sebastian</first><last>Stüker</last></author>
+      <author id="sebastian-stuker"><first>Sebastian</first><last>Stüker</last></author>
       <author><first>Thomas</first><last>Zenkel</last></author>
-      <author><first>Alexander</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alexander</first><last>Waibel</last></author>
       <pages>89–93</pages>
       <abstract>In today’s globalized world we have the ability to communicate with people across the world. However, in many situations the language barrier still presents a major issue. For example, many foreign students coming to KIT to study are initially unable to follow a lecture in German. Therefore, we offer an automatic simultaneous interpretation service for students. To fulfill this task, we have developed a low-latency translation system that is adapted to lectures and covers several language pairs. While the switch from traditional Statistical Machine Translation to Neural Machine Translation (NMT) significantly improved performance, to integrate NMT into the speech translation framework required several adjustments. We have addressed the run-time constraints and different types of input. Furthermore, we utilized one-shot learning to easily add new topic-specific terms to the system. Besides better performance, NMT also enabled us increase our covered languages through multilingual NMT. % Combining these techniques, we are able to provide an adapted speech translation system for several European languages.</abstract>
       <url hash="478c0f53">C18-2020</url>
@@ -3787,7 +3787,7 @@
       <title><fixed-case>G</fixed-case>raphene: a Context-Preserving Open Information Extraction System</title>
       <author><first>Matthias</first><last>Cetto</last></author>
       <author><first>Christina</first><last>Niklaus</last></author>
-      <author><first>André</first><last>Freitas</last></author>
+      <author id="andre-freitas"><first>André</first><last>Freitas</last></author>
       <author><first>Siegfried</first><last>Handschuh</last></author>
       <pages>94–98</pages>
       <abstract>We introduce Graphene, an Open IE system whose goal is to generate accurate, meaningful and complete propositions that may facilitate a variety of downstream semantic applications. For this purpose, we transform syntactically complex input sentences into clean, compact structures in the form of core facts and accompanying contexts, while identifying the rhetorical relations that hold between them in order to maintain their semantic relationship. In that way, we preserve the context of the relational tuples extracted from a source sentence, generating a novel lightweight semantic representation for Open IE that enhances the expressiveness of the extracted propositions.</abstract>
@@ -3799,7 +3799,7 @@
       <author><first>Shang-Chien</first><last>Cheng</last></author>
       <author><first>Jhih-Jie</first><last>Chen</last></author>
       <author><first>Chingyu</first><last>Yang</last></author>
-      <author><first>Jason</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason</first><last>Chang</last></author>
       <pages>99–102</pages>
       <abstract>In this paper, we present a system, LanguageNet, which can help second language learners to search for different meanings and usages of a word. We disambiguate word senses based on the pairs of an English word and its corresponding Chinese translations in a parallel corpus, UM-Corpus. The process involved performing word alignment, learning vector space representations of words and training a classifier to distinguish words into groups of senses. LanguageNet directly shows the definition of a sense, bilingual synonyms and sense relevant examples.</abstract>
       <url hash="fa858738">C18-2022</url>
@@ -3882,7 +3882,7 @@
     <paper id="29">
       <title><fixed-case>WARP</fixed-case>-Text: a Web-Based Tool for Annotating Relationships between Pairs of Texts</title>
       <author><first>Venelin</first><last>Kovatchev</last></author>
-      <author><first>M. Antònia</first><last>Martí</last></author>
+      <author id="m-antonia-marti"><first>M. Antònia</first><last>Martí</last></author>
       <author><first>Maria</first><last>Salamó</last></author>
       <pages>132–136</pages>
       <abstract>We present WARP-Text, an open-source web-based tool for annotating relationships between pairs of texts. WARP-Text supports multi-layer annotation and custom definitions of inter-textual and intra-textual relationships. Annotation can be performed at different granularity levels (such as sentences, phrases, or tokens). WARP-Text has an intuitive user-friendly interface both for project managers and annotators. WARP-Text fills a gap in the currently available NLP toolbox, as open-source alternatives for annotation of pairs of text are not readily available. WARP-Text has already been used in several annotation tasks and can be of interest to the researchers working in the areas of Paraphrasing, Entailment, Simplification, and Summarization, among others.</abstract>
@@ -3914,7 +3914,7 @@
       <author><first>Xin</first><last>Jiang</last></author>
       <author><first>Hai</first><last>Ye</last></author>
       <author><first>Zhunchen</first><last>Luo</last></author>
-      <author><first>WenHan</first><last>Chao</last></author>
+      <author id="wenhan-chao"><first>WenHan</first><last>Chao</last></author>
       <author><first>Wenjia</first><last>Ma</last></author>
       <pages>146–151</pages>
       <abstract>This paper proposes a neural based system to solve the essential interpretability problem existing in text classification, especially in charge prediction task. First, we use a deep reinforcement learning method to extract rationales which mean short, readable and decisive snippets from input text. Then a rationale augmented classification model is proposed to elevate the prediction accuracy. Naturally, the extracted rationales serve as the introspection explanation for the prediction result of the model, enhancing the transparency of the model. Experimental results demonstrate that our system is able to extract readable rationales in a high consistency with manual annotation and is comparable with the attention model in prediction accuracy.</abstract>
@@ -3926,7 +3926,7 @@
       <author><first>Shehroze</first><last>Khan</last></author>
       <author><first>Jihyun</first><last>Kim</last></author>
       <author><first>Tarik</first><last>Zulfikarpasic</last></author>
-      <author><first>Peter</first><last>Chen</last></author>
+      <author id="yuanzhu-peter-chen"><first>Peter</first><last>Chen</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
       <pages>152–156</pages>
       <abstract>We present Qutr (Query Translator), a smart cross-lingual communication application for the travel domain. Qutr is a real-time messaging app that automatically translates conversations while supporting keyword-to-sentence matching. Qutr relies on querying a database that holds commonly used pre-translated travel-domain phrases and phrase templates in different languages with the use of keywords. The query matching supports paraphrases, incomplete keywords and some input spelling errors. The application addresses common cross-lingual communication issues such as translation accuracy, speed, privacy, and personalization.</abstract>
@@ -3948,7 +3948,7 @@
       <author><first>Quynh Ngoc Thi</first><last>Do</last></author>
       <author><first>Artuur</first><last>Leeuwenberg</last></author>
       <author><first>Geert</first><last>Heyman</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>161–165</pages>
       <abstract>This paper presents a flexible and open source framework for deep semantic role labeling. We aim at facilitating easy exploration of model structures for multiple languages with different characteristics. It provides flexibility in its model construction in terms of word representation, sequence representation, output modeling, and inference styles and comes with clear output visualization. The framework is available under the Apache 2.0 license.</abstract>
       <url hash="62d78af4">C18-2035</url>
@@ -3959,8 +3959,8 @@
     <meta>
       <booktitle>Proceedings of the 27th International Conference on Computational Linguistics: Tutorial Abstracts</booktitle>
       <url hash="8151c298">C18-3</url>
-      <editor><first>Donia</first><last>Scott</last></editor>
-      <editor><first>Marilyn</first><last>Walker</last></editor>
+      <editor id="donia-scott"><first>Donia</first><last>Scott</last></editor>
+      <editor id="marilyn-walker"><first>Marilyn</first><last>Walker</last></editor>
       <editor><first>Pascale</first><last>Fung</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Santa Fe, New Mexico, USA</address>
@@ -3976,7 +3976,7 @@
       <title><fixed-case>NLP</fixed-case> for Conversations: Sentiment, Summarization, and Group Dynamics</title>
       <author><first>Gabriel</first><last>Murray</last></author>
       <author><first>Giuseppe</first><last>Carenini</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <pages>1–4</pages>
       <url hash="761b7fcf">C18-3001</url>
       <bibkey>murray-etal-2018-nlp</bibkey>
@@ -3984,16 +3984,16 @@
     <paper id="2">
       <title>Practical Parsing for Downstream Applications</title>
       <author><first>Daniel</first><last>Dakota</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <pages>5–7</pages>
       <url hash="a6c92599">C18-3002</url>
       <bibkey>dakota-kubler-2018-practical</bibkey>
     </paper>
     <paper id="3">
       <title>Frame Semantics across Languages: Towards a Multilingual <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et</title>
-      <author><first>Collin F.</first><last>Baker</last></author>
-      <author><first>Michael</first><last>Ellsworth</last></author>
-      <author><first>Miriam R. L.</first><last>Petruck</last></author>
+      <author id="collin-f-baker"><first>Collin F.</first><last>Baker</last></author>
+      <author id="michael-ellsworth"><first>Michael</first><last>Ellsworth</last></author>
+      <author id="miriam-r-l-petruck"><first>Miriam R. L.</first><last>Petruck</last></author>
       <author><first>Swabha</first><last>Swayamdipta</last></author>
       <pages>9–12</pages>
       <url hash="d6f4254c">C18-3003</url>
@@ -4001,7 +4001,7 @@
     </paper>
     <paper id="4">
       <title>Deep <fixed-case>B</fixed-case>ayesian Learning and Understanding</title>
-      <author><first>Jen-Tzung</first><last>Chien</last></author>
+      <author id="jen-tzung-chien"><first>Jen-Tzung</first><last>Chien</last></author>
       <pages>13–18</pages>
       <url hash="d3facb87">C18-3004</url>
       <attachment type="presentation" hash="8dcafeb5">C18-3004.Presentation.pdf</attachment>
@@ -4009,7 +4009,7 @@
     </paper>
     <paper id="5">
       <title>Data-Driven Text Simplification</title>
-      <author><first>Sanja</first><last>Štajner</last></author>
+      <author id="sanja-stajner"><first>Sanja</first><last>Štajner</last></author>
       <author><first>Horacio</first><last>Saggion</last></author>
       <pages>19–23</pages>
       <url hash="b1cd24a7">C18-3005</url>
@@ -4020,7 +4020,7 @@
       <title>Deep Learning for Dialogue Systems</title>
       <author><first>Yun-Nung</first><last>Chen</last></author>
       <author><first>Asli</first><last>Celikyilmaz</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tür</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tür</last></author>
       <pages>25–31</pages>
       <url hash="51199b42">C18-3006</url>
       <bibkey>chen-etal-2018-deep</bibkey>
diff --git a/data/xml/C65.xml b/data/xml/C65.xml
index f57493da41..9ad9cf37a3 100644
--- a/data/xml/C65.xml
+++ b/data/xml/C65.xml
@@ -91,7 +91,7 @@
     </paper>
     <paper id="20">
       <title>Endocentric Constructions and the <fixed-case>C</fixed-case>ocke Parsing Logic</title>
-      <author><first>Jane J.</first><last>Robinson</last></author>
+      <author id="jane-j-robinson"><first>Jane J.</first><last>Robinson</last></author>
       <url hash="2ef9d7a6">C65-1020</url>
       <bibkey>robinson-1965-endocentric</bibkey>
     </paper>
@@ -104,19 +104,19 @@
     <paper id="22">
       <title>Sentence Generation by Semantic Concordance</title>
       <author><first>Toshiyuki</first><last>Sakai</last></author>
-      <author><first>Makoto</first><last>Nagao</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
       <url hash="f32bf352">C65-1022</url>
       <bibkey>sakai-nagao-1965-sentence</bibkey>
     </paper>
     <paper id="24">
       <title>Generation, Production, and Translation</title>
-      <author><first>Petr</first><last>Sgall</last></author>
+      <author id="petr-sgall"><first>Petr</first><last>Sgall</last></author>
       <url hash="ba49f996">C65-1024</url>
       <bibkey>sgall-1965-generation</bibkey>
     </paper>
     <paper id="25">
       <title>On a Certain Distribution of Semantic Units</title>
-      <author><first>Wojciech</first><last>Skalmowski</last></author>
+      <author id="wojciech-skalmowski"><first>Wojciech</first><last>Skalmowski</last></author>
       <url hash="938d8e1b">C65-1025</url>
       <bibkey>skalmowski-1965-certain</bibkey>
     </paper>
diff --git a/data/xml/C67.xml b/data/xml/C67.xml
index 882a2b0f41..d92bffdf08 100644
--- a/data/xml/C67.xml
+++ b/data/xml/C67.xml
@@ -21,7 +21,7 @@
       <title>An evaluation of the usefulness of machine translations produced at the National Physical Laboratory, Teddington, with a summary of the translation methods</title>
       <author><first>J.</first><last>McDaniel</last></author>
       <author><first>W.L.</first><last>Price</last></author>
-      <author><first>A.J.M.</first><last>Szanser</last></author>
+      <author id="a-j-m-szanser"><first>A.J.M.</first><last>Szanser</last></author>
       <author><first>D.M.</first><last>Yates</last></author>
       <url hash="7a04e5ac">C67-1002</url>
       <bibkey>mcdaniel-etal-1967-evaluation</bibkey>
@@ -54,7 +54,7 @@
     <paper id="7">
       <title>Transformational Decomposition: A Simple Description of an Algorithm for Transformational Analysis of <fixed-case>E</fixed-case>nglish Sentences</title>
       <author><first>Danuta</first><last>Hiz</last></author>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
       <url hash="66efb98a">C67-1007</url>
       <bibkey>hiz-joshi-1967-transformational</bibkey>
     </paper>
@@ -104,7 +104,7 @@
     </paper>
     <paper id="15">
       <title>Methods for Obtaining Corresponding Phrase Structure and Dependency Grammars</title>
-      <author><first>Jane J.</first><last>Robinson</last></author>
+      <author id="jane-j-robinson"><first>Jane J.</first><last>Robinson</last></author>
       <url hash="d58e3c5f">C67-1015</url>
       <bibkey>robinson-1967-methods</bibkey>
     </paper>
diff --git a/data/xml/C69.xml b/data/xml/C69.xml
index 7e936e650c..29a87b3f65 100644
--- a/data/xml/C69.xml
+++ b/data/xml/C69.xml
@@ -34,7 +34,7 @@
     </frontmatter>
     <paper id="1">
       <title>A Conceptual Dependency Parser for Natural Language</title>
-      <author><first>Roger C.</first><last>Schank</last></author>
+      <author id="roger-c-schank"><first>Roger C.</first><last>Schank</last></author>
       <author><first>Larry</first><last>Tesler</last></author>
       <url hash="e3c815c9">C69-0201</url>
       <bibkey>schank-tesler-1969-conceptual</bibkey>
@@ -93,7 +93,7 @@
     <paper id="1">
       <title>An Application of Computer Programming to the Reconstruction of a Proto-Language</title>
       <author><first>Stanton P.</first><last>Durham</last></author>
-      <author><first>David Ellis</first><last>Rogers</last></author>
+      <author id="david-ellis"><first>David Ellis</first><last>Rogers</last></author>
       <url hash="bde0db3a">C69-0501</url>
       <bibkey>durham-rogers-1969-application</bibkey>
     </paper>
@@ -131,7 +131,7 @@
     </frontmatter>
     <paper id="1">
       <title>Automatic error-correction in natural languages</title>
-      <author><first>A.J.</first><last>Szanser</last></author>
+      <author id="a-j-m-szanser"><first>A.J.</first><last>Szanser</last></author>
       <url hash="abb52f41">C69-0701</url>
       <bibkey>szanser-1969-automatic</bibkey>
     </paper>
@@ -150,7 +150,7 @@
     </frontmatter>
     <paper id="1">
       <title>Interactive Semantic Analysis of <fixed-case>E</fixed-case>nglish Paragraphs</title>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <url hash="a3e352ff">C69-0801</url>
       <bibkey>wilks-1969-interactive</bibkey>
     </paper>
@@ -169,7 +169,7 @@
     </frontmatter>
     <paper id="1">
       <title>Automatic Simulation of Historical Change</title>
-      <author><first>Raoul N.</first><last>Smith</last></author>
+      <author id="raoul-n-smith"><first>Raoul N.</first><last>Smith</last></author>
       <url hash="218c407d">C69-0901</url>
       <bibkey>smith-1969-automatic</bibkey>
     </paper>
@@ -791,7 +791,7 @@
     </frontmatter>
     <paper id="1">
       <title>Linguistics and Automated Language Processing</title>
-      <author><first>Christine A.</first><last>Montgomery</last></author>
+      <author id="christine-a-montgomery"><first>Christine A.</first><last>Montgomery</last></author>
       <url hash="f5959334">C69-4101</url>
       <bibkey>montgomery-1969-linguistics</bibkey>
     </paper>
@@ -907,7 +907,7 @@
     </frontmatter>
     <paper id="1">
       <title>Properties of Formal Grammars With Mixed Type of Rules and Their Linguistic Relevance</title>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
       <url hash="943ef9b0">C69-4701</url>
       <bibkey>joshi-1969-properties</bibkey>
     </paper>
@@ -1066,7 +1066,7 @@
       <author><first>Andre</first><last>Dugas</last></author>
       <author><first>Myrna</first><last>Gopnik</last></author>
       <author><first>Brian</first><last>Harris</last></author>
-      <author><first>Jean-Pierre</first><last>Paillet</last></author>
+      <author id="jean-pierre-paillet"><first>Jean-Pierre</first><last>Paillet</last></author>
       <url hash="f4b82dd2">C69-5501</url>
       <bibkey>dugas-etal-1969-le</bibkey>
     </paper>
@@ -1236,7 +1236,7 @@
     </paper>
     <paper id="7">
       <title>On Semantics of Some Verbal Categories in <fixed-case>E</fixed-case>nglish</title>
-      <author><first>Eva</first><last>Hajicova</last></author>
+      <author id="eva-hajicova"><first>Eva</first><last>Hajicova</last></author>
       <url hash="9e8de426">C69-6207</url>
       <bibkey>hajicova-1969-semantics</bibkey>
     </paper>
@@ -1262,7 +1262,7 @@
     </paper>
     <paper id="11">
       <title>Machine Transcoding</title>
-      <author><first>T. R.</first><last>Hofmann</last></author>
+      <author id="th-r-hofmann"><first>T. R.</first><last>Hofmann</last></author>
       <author><first>Brian</first><last>Harris</last></author>
       <url hash="0ac40018">C69-6211</url>
       <bibkey>hofmann-harris-1969-machine</bibkey>
@@ -1351,7 +1351,7 @@
       <author><first>H.</first><last>Eggers</last></author>
       <author><first>A.</first><last>Rothkegel-Schramm</last></author>
       <author id="wolfgang-klein"><first>W.</first><last>Klein</last></author>
-      <author><first>H-J.</first><last>Weber</last></author>
+      <author id="heinz-j-weber"><first>H-J.</first><last>Weber</last></author>
       <author id="harald-h-zimmermann"><first>H.</first><last>Zimmermann</last></author>
       <url hash="2f55ece1">C69-6501</url>
       <bibkey>eggers-etal-1969-diskontinuierliche</bibkey>
@@ -1447,7 +1447,7 @@
     </paper>
     <paper id="4">
       <title>Project <fixed-case>DOC</fixed-case></title>
-      <author><first>William S-Y.</first><last>Wang</last></author>
+      <author id="william-s-y-wang"><first>William S-Y.</first><last>Wang</last></author>
       <url hash="e85f00b9">C69-6904</url>
       <bibkey>wang-1969-project</bibkey>
     </paper>
@@ -1485,7 +1485,7 @@
     </frontmatter>
     <paper id="1">
       <title>Project <fixed-case>DOC</fixed-case>: Its Methodological Basis</title>
-      <author><first>William S-Y.</first><last>Wang</last></author>
+      <author id="william-s-y-wang"><first>William S-Y.</first><last>Wang</last></author>
       <url hash="b1861808">C69-7101</url>
       <bibkey>wang-1969-project-doc</bibkey>
     </paper>
diff --git a/data/xml/C73.xml b/data/xml/C73.xml
index f32b38bcc8..4f8824883e 100644
--- a/data/xml/C73.xml
+++ b/data/xml/C73.xml
@@ -25,7 +25,7 @@
     </paper>
     <paper id="3">
       <title>Un Modele Mathematique D’analyse Transformationnelle Selon <fixed-case>Z</fixed-case>. <fixed-case>S</fixed-case>. <fixed-case>H</fixed-case>arris</title>
-      <author><first>Jean Pierre</first><last>Descles</last></author>
+      <author id="jean-pierre-descles"><first>Jean Pierre</first><last>Descles</last></author>
       <url hash="269fc156">C73-1003</url>
       <bibkey>descles-1973-un</bibkey>
     </paper>
@@ -56,7 +56,7 @@
     </paper>
     <paper id="8">
       <title>Working With the Interactive Version of the <fixed-case>T.G.T.</fixed-case>-System of <fixed-case>J</fixed-case>oyce <fixed-case>F</fixed-case>riedman</title>
-      <author><first>Istvan</first><last>Batori</last></author>
+      <author id="istvan-batori"><first>Istvan</first><last>Batori</last></author>
       <url hash="12adfb09">C73-1008</url>
       <bibkey>batori-1973-working</bibkey>
     </paper>
@@ -84,7 +84,7 @@
     <paper id="12">
       <title>Problems in Computerized Historical Linguistics: The <fixed-case>O</fixed-case>ld <fixed-case>C</fixed-case>ornish Lexicon</title>
       <author><first>Enrico</first><last>Campanile</last></author>
-      <author><first>Antonio</first><last>Zampolli</last></author>
+      <author id="antonio-zampolli"><first>Antonio</first><last>Zampolli</last></author>
       <url hash="363a543d">C73-1012</url>
       <bibkey>campanile-zampolli-1973-problems</bibkey>
     </paper>
@@ -170,13 +170,13 @@
     </paper>
     <paper id="26">
       <title>A La Recherche D’un Modele De Derivation En <fixed-case>I</fixed-case>talien</title>
-      <author><first>Irina</first><last>Prodanof</last></author>
+      <author id="irina-prodanof"><first>Irina</first><last>Prodanof</last></author>
       <url hash="3e42f048">C73-1026</url>
       <bibkey>prodanof-1973-la</bibkey>
     </paper>
     <paper id="27">
       <title>An <fixed-case>E</fixed-case>nglish Dictionary for Computerized Syntactic and Semantic Processing Systems</title>
-      <author><first>Raoul N.</first><last>Smith</last></author>
+      <author id="raoul-n-smith"><first>Raoul N.</first><last>Smith</last></author>
       <author><first>Edward</first><last>Maxwell</last></author>
       <url hash="ff70ad3e">C73-1027</url>
       <bibkey>smith-maxwell-1973-english</bibkey>
@@ -238,9 +238,9 @@
     </paper>
     <paper id="5">
       <title>Working on the <fixed-case>I</fixed-case>talian Machine Dictionary: A Semantic Approach</title>
-      <author><first>Nicoletta</first><last>Calzolari</last></author>
+      <author id="nicoletta-calzolari"><first>Nicoletta</first><last>Calzolari</last></author>
       <author><first>Laura</first><last>Pecchia</last></author>
-      <author><first>Antonio</first><last>Zampolli</last></author>
+      <author id="antonio-zampolli"><first>Antonio</first><last>Zampolli</last></author>
       <url hash="1bd16d9a">C73-2005</url>
       <bibkey>calzolari-etal-1973-working</bibkey>
     </paper>
@@ -286,7 +286,7 @@
     </paper>
     <paper id="12">
       <title>Automatic Pattern Recognition Applied to Semantic Problems</title>
-      <author><first>R. G.</first><last>Piotrowski</last></author>
+      <author id="r-piotrowski"><first>R. G.</first><last>Piotrowski</last></author>
       <author><first>I. V.</first><last>Palibina</last></author>
       <url hash="29667485">C73-2012</url>
       <bibkey>piotrowski-palibina-1973-automatic</bibkey>
@@ -308,7 +308,7 @@
     </paper>
     <paper id="15">
       <title>Towards Computer Systems for Conversing in <fixed-case>P</fixed-case>olish</title>
-      <author><first>Janusz Stanislaw</first><last>Bien</last></author>
+      <author id="janusz-stanislaw-bien"><first>Janusz Stanislaw</first><last>Bien</last></author>
       <url hash="8d9732ef">C73-2015</url>
       <bibkey>bien-1973-towards</bibkey>
     </paper>
@@ -361,7 +361,7 @@
     </paper>
     <paper id="23">
       <title>Segmentation of <fixed-case>F</fixed-case>rench Sentences</title>
-      <author><first>Bente</first><last>Maegaard</last></author>
+      <author id="bente-maegaard"><first>Bente</first><last>Maegaard</last></author>
       <author><first>Ebbe</first><last>Spang-Hanssen</last></author>
       <url hash="332be255">C73-2023</url>
       <bibkey>maegaard-spang-hanssen-1973-segmentation</bibkey>
@@ -375,7 +375,7 @@
     </paper>
     <paper id="25">
       <title>An Application Du Systeme <fixed-case>A.T.E.F.</fixed-case> A L’analyse Morphologique De Textes Russes</title>
-      <author><first>Nicolas</first><last>Nedobejkine</last></author>
+      <author id="nicolas-nedobejkine"><first>Nicolas</first><last>Nedobejkine</last></author>
       <url hash="c22aca0f">C73-2025</url>
       <bibkey>nedobejkine-1973-application</bibkey>
     </paper>
@@ -387,7 +387,7 @@
     </paper>
     <paper id="27">
       <title>Computational Linguistics and Linguistic Theory</title>
-      <author><first>Jean Pierre</first><last>Paillet</last></author>
+      <author id="jean-pierre-paillet"><first>Jean Pierre</first><last>Paillet</last></author>
       <url hash="0826b86c">C73-2027</url>
       <bibkey>paillet-1973-computational</bibkey>
     </paper>
@@ -399,14 +399,14 @@
     </paper>
     <paper id="29">
       <title>Analyse Automatique De Textes Par Un Systeme D’etats Finis</title>
-      <author><first>Maurice</first><last>Quezel-Ambrunaz</last></author>
-      <author><first>Pierre</first><last>Guillaume</last></author>
+      <author id="maurice-quezel-ambrunaz"><first>Maurice</first><last>Quezel-Ambrunaz</last></author>
+      <author id="pierre-guillaume"><first>Pierre</first><last>Guillaume</last></author>
       <url hash="b4142bcd">C73-2029</url>
       <bibkey>quezel-ambrunaz-guillaume-1973-analyse</bibkey>
     </paper>
     <paper id="30">
       <title>On Using Semantic Data in Automatic Syntactic Analysis</title>
-      <author><first>Morris</first><last>Salkoff</last></author>
+      <author id="morris-salkoff"><first>Morris</first><last>Salkoff</last></author>
       <url hash="806a4ad1">C73-2030</url>
       <bibkey>salkoff-1973-using</bibkey>
     </paper>
@@ -418,7 +418,7 @@
     </paper>
     <paper id="32">
       <title>The Automatically Built Up Homograph Dictionary a Component of a Dynamic Lexical System</title>
-      <author><first>Heinz J.</first><last>Weber</last></author>
+      <author id="heinz-j-weber"><first>Heinz J.</first><last>Weber</last></author>
       <url hash="cd6368d4">C73-2032</url>
       <bibkey>weber-1973-automatically</bibkey>
     </paper>
diff --git a/data/xml/C80.xml b/data/xml/C80.xml
index c4beb8a23d..1d1154d3b3 100644
--- a/data/xml/C80.xml
+++ b/data/xml/C80.xml
@@ -29,7 +29,7 @@
     </paper>
     <paper id="3">
       <title>A Syntax Parser Based on the Case Dependency Grammar and Its Efficiency</title>
-      <author><first>Toru</first><last>Hitaka</last></author>
+      <author id="toru-hitaka"><first>Toru</first><last>Hitaka</last></author>
       <author><first>Sho</first><last>Yoshida</last></author>
       <url hash="2ae25061">C80-1003</url>
       <bibkey>hitaka-yoshida-1980-syntax</bibkey>
@@ -54,14 +54,14 @@
     </paper>
     <paper id="7">
       <title>A Context-Free Grammar of <fixed-case>F</fixed-case>rench</title>
-      <author><first>Morris</first><last>Salkoff</last></author>
+      <author id="morris-salkoff"><first>Morris</first><last>Salkoff</last></author>
       <url hash="e688670a">C80-1007</url>
       <bibkey>salkoff-1980-context</bibkey>
     </paper>
     <paper id="8">
       <title>A Rule-Based Approach to Ill-Formed Input</title>
-      <author><first>Norman K.</first><last>Sondheimer</last></author>
-      <author><first>Ralph M.</first><last>Weischedel</last></author>
+      <author id="norman-k-sondheimer"><first>Norman K.</first><last>Sondheimer</last></author>
+      <author id="ralph-weischedel"><first>Ralph M.</first><last>Weischedel</last></author>
       <url hash="5cd877ba">C80-1008</url>
       <bibkey>sondheimer-weischedel-1980-rule</bibkey>
     </paper>
@@ -79,8 +79,8 @@
     </paper>
     <paper id="11">
       <title>Linguistic Meaning and Knowledge Representation in Automatic Understanding of Natural Language</title>
-      <author><first>Eva</first><last>Hajicova</last></author>
-      <author><first>Petr</first><last>Sgall</last></author>
+      <author id="eva-hajicova"><first>Eva</first><last>Hajicova</last></author>
+      <author id="petr-sgall"><first>Petr</first><last>Sgall</last></author>
       <url hash="f49bf655">C80-1011</url>
       <bibkey>hajicova-sgall-1980-linguistic</bibkey>
     </paper>
@@ -92,7 +92,7 @@
     </paper>
     <paper id="13">
       <title>Hierarchical Meaning Representation and Analysis of Natural Language Documents</title>
-      <author><first>Toyo-aki</first><last>Nishida</last></author>
+      <author id="toyoaki-nishida"><first>Toyo-aki</first><last>Nishida</last></author>
       <author><first>Shuji</first><last>Doshita</last></author>
       <url hash="5816008c">C80-1013</url>
       <bibkey>nishida-doshita-1980-hierarchical</bibkey>
@@ -195,13 +195,13 @@
     </paper>
     <paper id="29">
       <title>Embedded Sublanguages and Natural Language Processing</title>
-      <author><first>Richard</first><last>Kittredge</last></author>
+      <author id="richard-kittredge"><first>Richard</first><last>Kittredge</last></author>
       <url hash="f24cb971">C80-1029</url>
       <bibkey>kittredge-1980-embedded</bibkey>
     </paper>
     <paper id="30">
       <title>Adaptation of <fixed-case>M</fixed-case>ontague Grammar to the Requirements of Question-Answering</title>
-      <author><first>S.P.J.</first><last>Landsbergen</last></author>
+      <author id="jan-landsbergen"><first>S.P.J.</first><last>Landsbergen</last></author>
       <url hash="b3d1cde6">C80-1030</url>
       <bibkey>landsbergen-1980-adaptation</bibkey>
     </paper>
@@ -228,7 +228,7 @@
     </paper>
     <paper id="34">
       <title>On the Derivation of a Conversational Maxim</title>
-      <author><first>Th. R.</first><last>Hofmann</last></author>
+      <author id="th-r-hofmann"><first>Th. R.</first><last>Hofmann</last></author>
       <url hash="987d409c">C80-1034</url>
       <bibkey>hofmann-1980-derivation</bibkey>
     </paper>
@@ -257,7 +257,7 @@
     <paper id="38">
       <title>Linguistic Error Correction of <fixed-case>J</fixed-case>apanese Sentences</title>
       <author><first>Tsutomu</first><last>Kawada</last></author>
-      <author><first>Shin-ya</first><last>Amano</last></author>
+      <author id="shin-ya-amano"><first>Shin-ya</first><last>Amano</last></author>
       <author><first>Kunio</first><last>Sakai</last></author>
       <url hash="9b3d7afb">C80-1038</url>
       <bibkey>kawada-etal-1980-linguistic</bibkey>
@@ -322,7 +322,7 @@
     </paper>
     <paper id="48">
       <title>Automatic Compilation of <fixed-case>M</fixed-case>odern <fixed-case>C</fixed-case>hinese Concordances</title>
-      <author><first>Syunsuke</first><last>Uemura</last></author>
+      <author id="shunsuke-uemura"><first>Syunsuke</first><last>Uemura</last></author>
       <author><first>Yasuo</first><last>Sugawara</last></author>
       <author><first>Mantaro J.</first><last>Hashimoto</last></author>
       <author><first>Akihiro</first><last>Furuya</last></author>
@@ -345,15 +345,15 @@
     </paper>
     <paper id="51">
       <title>Parsing Free Word Order Languages in <fixed-case>P</fixed-case>rolog</title>
-      <author><first>Janusz Stanislaw</first><last>Bien</last></author>
+      <author id="janusz-stanislaw-bien"><first>Janusz Stanislaw</first><last>Bien</last></author>
       <author><first>Krystyna</first><last>Laus-Maczyniska</last></author>
-      <author><first>Stanislaw</first><last>Szpakowicz</last></author>
+      <author id="stan-szpakowicz"><first>Stanislaw</first><last>Szpakowicz</last></author>
       <url hash="1dddc711">C80-1051</url>
       <bibkey>bien-etal-1980-parsing</bibkey>
     </paper>
     <paper id="52">
       <title>Parsing Against Lexical Ambiguity</title>
-      <author><first>Rob</first><last>Milne</last></author>
+      <author id="robert-milne"><first>Rob</first><last>Milne</last></author>
       <url hash="52db22bd">C80-1052</url>
       <bibkey>milne-1980-parsing</bibkey>
     </paper>
@@ -372,7 +372,7 @@
     <paper id="55">
       <title>Active Schemata and Their Role in Semantic Parsing</title>
       <author><first>Joachim H.</first><last>Laubsch</last></author>
-      <author><first>Dietmar F.</first><last>Roesner</last></author>
+      <author id="dietmar-rosner"><first>Dietmar F.</first><last>Roesner</last></author>
       <url hash="d725bab3">C80-1055</url>
       <bibkey>laubsch-roesner-1980-active</bibkey>
     </paper>
@@ -432,9 +432,9 @@
     </paper>
     <paper id="64">
       <title><fixed-case>ITS</fixed-case>: Interactive Translation System</title>
-      <author><first>Alan K.</first><last>Melby</last></author>
+      <author id="alan-k-melby"><first>Alan K.</first><last>Melby</last></author>
       <author><first>Melvin R.</first><last>Smith</last></author>
-      <author><first>Jill</first><last>Peterson</last></author>
+      <author id="j-peterson"><first>Jill</first><last>Peterson</last></author>
       <url hash="aec6d177">C80-1064</url>
       <bibkey>melby-etal-1980-interactive</bibkey>
     </paper>
@@ -494,7 +494,7 @@
     <paper id="72">
       <title>The Impatient Tutor: An Integrated Language Understanding System</title>
       <author><first>Brian</first><last>Phillips</last></author>
-      <author><first>James</first><last>Hendler</last></author>
+      <author id="james-hendler"><first>James</first><last>Hendler</last></author>
       <url hash="fd3730e3">C80-1072</url>
       <bibkey>phillips-hendler-1980-impatient</bibkey>
     </paper>
@@ -513,7 +513,7 @@
     </paper>
     <paper id="75">
       <title>Conjunctions and Modularity in Language Analysis Procedures</title>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <url hash="17d43fde">C80-1075</url>
       <bibkey>grishman-1980-conjunctions</bibkey>
     </paper>
@@ -526,7 +526,7 @@
     <paper id="77">
       <title>Une Experience Pratique D’utilisation De L’analyse Linguistique En Recherche D’information: Bilan &amp; Perspectives</title>
       <author><first>Ernest</first><last>Grandjean</last></author>
-      <author><first>Gerard</first><last>Veillon</last></author>
+      <author id="gerard-veillon"><first>Gerard</first><last>Veillon</last></author>
       <url hash="30502524">C80-1077</url>
       <bibkey>grandjean-veillon-1980-une</bibkey>
     </paper>
@@ -575,7 +575,7 @@
     </paper>
     <paper id="84">
       <title>Natürlichsprachige <fixed-case>P</fixed-case>roblembeschreibung als ein <fixed-case>V</fixed-case>erfahren für den Bürgernahen <fixed-case>Z</fixed-case>ugang zu <fixed-case>D</fixed-case>okumentationssystemen</title>
-      <author><first>Harald H.</first><last>Zimmermann</last></author>
+      <author id="harald-h-zimmermann"><first>Harald H.</first><last>Zimmermann</last></author>
       <url hash="db6ad35f">C80-1084</url>
       <language>deu</language>
       <bibkey>zimmermann-1980-naturlichsprachige</bibkey>
@@ -612,7 +612,7 @@
     </paper>
     <paper id="90">
       <title>A Method to Reduce Large Number of Concordances</title>
-      <author><first>Maria</first><last>Pozzi</last></author>
+      <author id="maria-pozzi"><first>Maria</first><last>Pozzi</last></author>
       <author><first>Javier</first><last>Becerra</last></author>
       <author><first>Jaime</first><last>Rangel</last></author>
       <author><first>Luis Fernando</first><last>Lara</last></author>
diff --git a/data/xml/C82.xml b/data/xml/C82.xml
index e0388aa739..8d1f1ace4b 100644
--- a/data/xml/C82.xml
+++ b/data/xml/C82.xml
@@ -99,34 +99,34 @@
     </paper>
     <paper id="14">
       <title>Natural Language Interfaces Using Limited Semantic Information</title>
-      <author><first>Ralph</first><last>Grishman</last></author>
-      <author><first>Lynette</first><last>Hirschman</last></author>
-      <author><first>Carol</first><last>Friedman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
+      <author id="lynette-hirschman"><first>Lynette</first><last>Hirschman</last></author>
+      <author id="carol-friedman"><first>Carol</first><last>Friedman</last></author>
       <url hash="1f90b0be">C82-1014</url>
       <bibkey>grishman-etal-1982-natural</bibkey>
     </paper>
     <paper id="15">
       <title><fixed-case>DIALOGIC</fixed-case>: A Core Natural-Language Processing System</title>
-      <author><first>Barbara</first><last>Grosz</last></author>
+      <author id="barbara-j-grosz"><first>Barbara</first><last>Grosz</last></author>
       <author><first>Norman</first><last>Haas</last></author>
-      <author><first>Gary</first><last>Hendrix</last></author>
-      <author><first>Jerry</first><last>Hobbs</last></author>
+      <author id="gary-g-hendrix"><first>Gary</first><last>Hendrix</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry</first><last>Hobbs</last></author>
       <author><first>Paul</first><last>Martin</last></author>
-      <author><first>Robert</first><last>Moore</last></author>
-      <author><first>Jane</first><last>Robinson</last></author>
-      <author><first>Stanley</first><last>Rosenschein</last></author>
+      <author id="robert-c-moore"><first>Robert</first><last>Moore</last></author>
+      <author id="jane-j-robinson"><first>Jane</first><last>Robinson</last></author>
+      <author id="stanley-j-rosenschein"><first>Stanley</first><last>Rosenschein</last></author>
       <url hash="1dae45e1">C82-1015</url>
       <bibkey>grosz-etal-1982-dialogic</bibkey>
     </paper>
     <paper id="16">
       <title>Referential Nets With Attributes</title>
-      <author><first>Christopher U.</first><last>Habel</last></author>
+      <author id="christopher-habel"><first>Christopher U.</first><last>Habel</last></author>
       <url hash="f68db3db">C82-1016</url>
       <bibkey>habel-1982-referential</bibkey>
     </paper>
     <paper id="17">
       <title>The Role of the Hierarchy of Activation in the Process of Natural Language Understanding</title>
-      <author><first>Eva</first><last>Hajicova</last></author>
+      <author id="eva-hajicova"><first>Eva</first><last>Hajicova</last></author>
       <author><first>Jarka</first><last>Vrbova</last></author>
       <url hash="64772be1">C82-1017</url>
       <bibkey>hajicova-vrbova-1982-role</bibkey>
@@ -139,13 +139,13 @@
     </paper>
     <paper id="19">
       <title>An Experimental Parser</title>
-      <author><first>Anna Sagvall</first><last>Hein</last></author>
+      <author id="anna-sagvall-hein"><first>Anna Sagvall</first><last>Hein</last></author>
       <url hash="7a453ee7">C82-1019</url>
       <bibkey>hein-1982-experimental</bibkey>
     </paper>
     <paper id="20">
       <title>Natural Language Access to Structured Text</title>
-      <author><first>Jerry R.</first><last>Hobbs</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry R.</first><last>Hobbs</last></author>
       <author><first>Donald E.</first><last>Walker</last></author>
       <author><first>Robert A.</first><last>Amsler</last></author>
       <url hash="7143f61c">C82-1020</url>
@@ -153,7 +153,7 @@
     </paper>
     <paper id="21">
       <title>A Multilayered Approach to the Handling of Word Formation</title>
-      <author><first>Wolfgang</first><last>Hoeppner</last></author>
+      <author id="wolfgang-hoeppner"><first>Wolfgang</first><last>Hoeppner</last></author>
       <url hash="cfb0d09e">C82-1021</url>
       <bibkey>hoeppner-1982-multilayered</bibkey>
     </paper>
@@ -165,14 +165,14 @@
     </paper>
     <paper id="23">
       <title>Processing of Sentences With Intra-Sentential Code-Switching</title>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
       <url hash="58dba9e5">C82-1023</url>
       <bibkey>joshi-1982-processing</bibkey>
     </paper>
     <paper id="24">
       <title>Incremental Sentence Generation: Implications for the Structure of a Syntactic Processor</title>
       <author><first>Gerard</first><last>Kempen</last></author>
-      <author><first>Edward</first><last>Hoenkamp</last></author>
+      <author id="edward-hoenkamp"><first>Edward</first><last>Hoenkamp</last></author>
       <url hash="99821263">C82-1024</url>
       <bibkey>kempen-hoenkamp-1982-incremental</bibkey>
     </paper>
@@ -196,13 +196,13 @@
     </paper>
     <paper id="28">
       <title>Machine Translation Based on Logically Isomorphic <fixed-case>M</fixed-case>ontague Grammars</title>
-      <author><first>Jan</first><last>Landsbergen</last></author>
+      <author id="jan-landsbergen"><first>Jan</first><last>Landsbergen</last></author>
       <url hash="be141f07">C82-1028</url>
       <bibkey>landsbergen-1982-machine</bibkey>
     </paper>
     <paper id="29">
       <title>Conversion of a <fixed-case>F</fixed-case>rench Surface Expression Into Its Semantic Representation According to the <fixed-case>RESEDA</fixed-case> Metalanguage</title>
-      <author><first>Jacqueline</first><last>Leon</last></author>
+      <author id="jacqueline-leon"><first>Jacqueline</first><last>Leon</last></author>
       <url hash="ac871686">C82-1029</url>
       <bibkey>leon-1982-conversion</bibkey>
     </paper>
@@ -214,14 +214,14 @@
     </paper>
     <paper id="31">
       <title>The Anatomy of a Systemic Choice</title>
-      <author><first>William C.</first><last>Mann</last></author>
+      <author id="william-c-mann"><first>William C.</first><last>Mann</last></author>
       <url hash="3ea4884d">C82-1031</url>
       <bibkey>mann-1982-anatomy</bibkey>
     </paper>
     <paper id="32">
       <title>Analysis and Processing of Compact Text</title>
       <author><first>Elaine</first><last>Marsh</last></author>
-      <author><first>Naomi</first><last>Sager</last></author>
+      <author id="naomi-sager"><first>Naomi</first><last>Sager</last></author>
       <url hash="ddeecbb7">C82-1032</url>
       <bibkey>marsh-sager-1982-analysis</bibkey>
     </paper>
@@ -234,7 +234,7 @@
     </paper>
     <paper id="34">
       <title>Multi-Level Translation Aids in a Distributed System</title>
-      <author><first>Alan K.</first><last>Melby</last></author>
+      <author id="alan-k-melby"><first>Alan K.</first><last>Melby</last></author>
       <url hash="1f61d1b9">C82-1034</url>
       <bibkey>melby-1982-multi</bibkey>
     </paper>
@@ -267,8 +267,8 @@
     </paper>
     <paper id="39">
       <title>An <fixed-case>E</fixed-case>nglish <fixed-case>J</fixed-case>apanese Machine Translation System of the Titles of Scientific and Engineering Papers</title>
-      <author><first>Makoto</first><last>Nagao</last></author>
-      <author><first>Jun-ichi</first><last>Tsujii</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
+      <author id="junichi-tsujii"><first>Jun-ichi</first><last>Tsujii</last></author>
       <author><first>Koji</first><last>Yada</last></author>
       <author><first>Toshihiro</first><last>Kakimoto</last></author>
       <url hash="cd0608c7">C82-1039</url>
@@ -276,8 +276,8 @@
     </paper>
     <paper id="40">
       <title>Parser Which Learns the Application Order of Rewriting Rules</title>
-      <author><first>Makoto</first><last>Nagao</last></author>
-      <author><first>Jun-ichi</first><last>Nakamura</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
+      <author id="jun-ichi-nakamura"><first>Jun-ichi</first><last>Nakamura</last></author>
       <url hash="3997f1ea">C82-1040</url>
       <bibkey>nagao-nakamura-1982-parser</bibkey>
     </paper>
@@ -303,7 +303,7 @@
     </paper>
     <paper id="44">
       <title>An <fixed-case>E</fixed-case>nglish-<fixed-case>J</fixed-case>apanese Machine Translation System Based on Formal Semantics of Natural Language</title>
-      <author><first>Toyo-aki</first><last>Nishida</last></author>
+      <author id="toyoaki-nishida"><first>Toyo-aki</first><last>Nishida</last></author>
       <author><first>Shuji</first><last>Doshita</last></author>
       <url hash="a82300b9">C82-1044</url>
       <bibkey>nishida-doshita-1982-english</bibkey>
@@ -326,7 +326,7 @@
     </paper>
     <paper id="47">
       <title>Random Generation of <fixed-case>C</fixed-case>zech Sentences</title>
-      <author><first>Jarmila</first><last>Panevova</last></author>
+      <author id="jarmila-panevova"><first>Jarmila</first><last>Panevova</last></author>
       <url hash="53f495ed">C82-1047</url>
       <bibkey>panevova-1982-random</bibkey>
     </paper>
@@ -339,13 +339,13 @@
     <paper id="49">
       <title>A Message-Passing Control Structure for Text Understanding</title>
       <author><first>Brian</first><last>Phillips</last></author>
-      <author><first>James A.</first><last>Hendler</last></author>
+      <author id="james-hendler"><first>James A.</first><last>Hendler</last></author>
       <url hash="d8817c37">C82-1049</url>
       <bibkey>phillips-hendler-1982-message</bibkey>
     </paper>
     <paper id="50">
       <title>Composition of Translation Schemes with <fixed-case>D</fixed-case>-Trees</title>
-      <author><first>Martin</first><last>Platek</last></author>
+      <author id="martin-platek"><first>Martin</first><last>Platek</last></author>
       <url hash="4f242f3e">C82-1050</url>
       <bibkey>platek-1982-composition</bibkey>
     </paper>
@@ -357,7 +357,7 @@
     </paper>
     <paper id="52">
       <title>Formalization of Argumentation Structures in Newspaper Texts</title>
-      <author><first>Dietmar F.</first><last>Roesner</last></author>
+      <author id="dietmar-rosner"><first>Dietmar F.</first><last>Roesner</last></author>
       <author><first>Joachim H.</first><last>Laubsch</last></author>
       <url hash="c2fe8f75">C82-1052</url>
       <bibkey>roesner-laubsch-1982-formalization</bibkey>
@@ -392,21 +392,21 @@
       <title>Knowledge Representation and Machine Translation</title>
       <author><first>Susumu</first><last>Sawai</last></author>
       <author><first>Hiromichi</first><last>Fukushima</last></author>
-      <author><first>Masakatsu</first><last>Sugimoto</last></author>
+      <author id="masakatsu-sugimoto"><first>Masakatsu</first><last>Sugimoto</last></author>
       <author><first>Naoya</first><last>Ukai</last></author>
       <url hash="dea432e7">C82-1057</url>
       <bibkey>sawai-etal-1982-knowledge</bibkey>
     </paper>
     <paper id="58">
       <title>Natural Language Understanding and the Perspectives of Question Answering</title>
-      <author><first>Petr</first><last>Sgall</last></author>
+      <author id="petr-sgall"><first>Petr</first><last>Sgall</last></author>
       <url hash="5e99fa12">C82-1058</url>
       <bibkey>sgall-1982-natural</bibkey>
     </paper>
     <paper id="59">
       <title>Parsing <fixed-case>G</fixed-case>erman</title>
       <author><first>Ingeborg</first><last>Steinacker</last></author>
-      <author><first>Harald</first><last>Trost</last></author>
+      <author id="harald-trost"><first>Harald</first><last>Trost</last></author>
       <url hash="8a751e57">C82-1059</url>
       <bibkey>steinacker-trost-1982-parsing</bibkey>
     </paper>
@@ -424,7 +424,7 @@
     </paper>
     <paper id="62">
       <title>The Transfer Phase In an <fixed-case>E</fixed-case>nglish-<fixed-case>J</fixed-case>apanese Translation System</title>
-      <author><first>Jun-ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun-ichi</first><last>Tsujii</last></author>
       <url hash="f42ef8fe">C82-1062</url>
       <bibkey>tsujii-1982-transfer</bibkey>
     </paper>
@@ -450,8 +450,8 @@
     </paper>
     <paper id="66">
       <title>Taking the Initiative in Natural Language Data Base Interactions: Justifying Why</title>
-      <author><first>Bonnie</first><last>Webber</last></author>
-      <author><first>Aravind</first><last>Joshi</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
       <url hash="894082cd">C82-1066</url>
       <bibkey>webber-joshi-1982-taking</bibkey>
     </paper>
@@ -459,7 +459,7 @@
       <title>Man-Assisted Machine Construction of a Semantic Dictionary for Natural Language Processing</title>
       <author><first>Sho</first><last>Yoshida</last></author>
       <author><first>Hiroaki</first><last>Tsurumaru</last></author>
-      <author><first>Tooru</first><last>Hitaka</last></author>
+      <author id="toru-hitaka"><first>Tooru</first><last>Hitaka</last></author>
       <url hash="5d60d1ef">C82-1067</url>
       <bibkey>yoshida-etal-1982-man</bibkey>
     </paper>
@@ -517,7 +517,7 @@
     </paper>
     <paper id="5">
       <title>Subordinate Clauses and Belief - Domains in Verbal Information Processing</title>
-      <author><first>Istvan</first><last>Batori</last></author>
+      <author id="istvan-batori"><first>Istvan</first><last>Batori</last></author>
       <url hash="8b3d99a8">C82-2005</url>
       <bibkey>batori-1982-subordinate</bibkey>
     </paper>
@@ -535,14 +535,14 @@
     </paper>
     <paper id="8">
       <title>Toward a Parsing Method for Free Word Order Languages</title>
-      <author><first>Janusz S.</first><last>Bień</last></author>
-      <author><first>Stanisław</first><last>Szpakowicz</last></author>
+      <author id="janusz-stanislaw-bien"><first>Janusz S.</first><last>Bień</last></author>
+      <author id="stan-szpakowicz"><first>Stanisław</first><last>Szpakowicz</last></author>
       <url hash="7714c920">C82-2008</url>
       <bibkey>bien-szpakowicz-1982-toward</bibkey>
     </paper>
     <paper id="9">
       <title>Developing the <fixed-case>COMMENTATOR</fixed-case>, A Computer System Simulating Verbal Production</title>
-      <author><first>Milan</first><last>Bily</last></author>
+      <author id="milan-bily"><first>Milan</first><last>Bily</last></author>
       <author><first>Bengt</first><last>Sigurd</last></author>
       <url hash="332c7721">C82-2009</url>
       <bibkey>bily-sigurd-1982-developing</bibkey>
@@ -567,7 +567,7 @@
     </paper>
     <paper id="13">
       <title>Towards the Organization of Lexical Definitions on a Database Structure</title>
-      <author><first>Nicoletta</first><last>Calzolari</last></author>
+      <author id="nicoletta-calzolari"><first>Nicoletta</first><last>Calzolari</last></author>
       <url hash="bc752d7d">C82-2013</url>
       <bibkey>calzolari-1982-towards</bibkey>
     </paper>
@@ -620,21 +620,21 @@
     </paper>
     <paper id="21">
       <title>Merging - The Art of Representing Different Levels of Sentence Structure in a Single Analysis Tree</title>
-      <author><first>Frank</first><last>Van Eynde</last></author>
+      <author id="frank-van-eynde"><first>Frank</first><last>Van Eynde</last></author>
       <url hash="c6d54241">C82-2021</url>
       <bibkey>van-eynde-1982-merging</bibkey>
     </paper>
     <paper id="22">
       <title>Revising an <fixed-case>ATN</fixed-case> Parser</title>
       <author><first>Giacomo</first><last>Ferrari</last></author>
-      <author><first>Irina</first><last>Prodanof</last></author>
+      <author id="irina-prodanof"><first>Irina</first><last>Prodanof</last></author>
       <url hash="c066a66c">C82-2022</url>
       <bibkey>ferrari-prodanof-1982-revising</bibkey>
     </paper>
     <paper id="23">
       <title>Collocational Grammar as a Model for Human-Computer Interaction</title>
       <author><first>W. Randolph</first><last>Ford</last></author>
-      <author><first>Raoul N.</first><last>Smith</last></author>
+      <author id="raoul-n-smith"><first>Raoul N.</first><last>Smith</last></author>
       <url hash="8f9d213b">C82-2023</url>
       <bibkey>ford-smith-1982-collocational</bibkey>
     </paper>
@@ -673,13 +673,13 @@
     </paper>
     <paper id="29">
       <title>Constraints on Noun Phrase Conjunction: A Domain-Independent Mechanism</title>
-      <author><first>Lynette</first><last>Hirschman</last></author>
+      <author id="lynette-hirschman"><first>Lynette</first><last>Hirschman</last></author>
       <url hash="7ee8b87e">C82-2029</url>
       <bibkey>hirschman-1982-constraints</bibkey>
     </paper>
     <paper id="30">
       <title>Why There Must Be a Semantic Representation (Over and Above Any Cognitive Network)</title>
-      <author><first>Th. R.</first><last>Hofmann</last></author>
+      <author id="th-r-hofmann"><first>Th. R.</first><last>Hofmann</last></author>
       <url hash="81a6049c">C82-2030</url>
       <bibkey>hofmann-1982-must</bibkey>
     </paper>
@@ -691,8 +691,8 @@
     </paper>
     <paper id="32">
       <title>Inferencing and Search for an Answer in <fixed-case>TIBAQ</fixed-case></title>
-      <author><first>Petr</first><last>Jirku</last></author>
-      <author><first>Jan</first><last>Hajic</last></author>
+      <author id="petr-jirku"><first>Petr</first><last>Jirku</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajic</last></author>
       <url hash="11b76c6c">C82-2032</url>
       <bibkey>jirku-hajic-1982-inferencing</bibkey>
     </paper>
@@ -711,7 +711,7 @@
     </paper>
     <paper id="35">
       <title>Syntactic Privilege</title>
-      <author><first>Michael B.</first><last>Kac</last></author>
+      <author id="michael-b-kac"><first>Michael B.</first><last>Kac</last></author>
       <url hash="ce9d4e3e">C82-2035</url>
       <bibkey>kac-1982-syntactic</bibkey>
     </paper>
@@ -725,7 +725,7 @@
       <title>A Procedure of an Automatic Grapheme-to-Phoneme Transfornation of <fixed-case>G</fixed-case>erman</title>
       <author><first>Sabine</first><last>Koch</last></author>
       <author><first>Wolfgang</first><last>Menzel</last></author>
-      <author><first>Ingrid</first><last>Starke</last></author>
+      <author id="ingrid-starke"><first>Ingrid</first><last>Starke</last></author>
       <url hash="856ac97a">C82-2037</url>
       <bibkey>koch-etal-1982-procedure</bibkey>
     </paper>
@@ -743,7 +743,7 @@
     </paper>
     <paper id="40">
       <title>Natural Language Data Base Access With <fixed-case>PEARL</fixed-case></title>
-      <author><first>Wendy</first><last>Lehnert</last></author>
+      <author id="wendy-lehnert"><first>Wendy</first><last>Lehnert</last></author>
       <author><first>Steve</first><last>Shwartz</last></author>
       <url hash="bd9ce3db">C82-2040</url>
       <bibkey>lehnert-shwartz-1982-natural</bibkey>
@@ -752,7 +752,7 @@
       <title>Reference Resolution and Semantic Coherence</title>
       <author><first>Elisabeth</first><last>Leinfellner</last></author>
       <author><first>Ingeborg</first><last>Steinacker</last></author>
-      <author><first>Harald</first><last>Trost</last></author>
+      <author id="harald-trost"><first>Harald</first><last>Trost</last></author>
       <url hash="ff47e6ac">C82-2041</url>
       <bibkey>leinfellner-etal-1982-reference</bibkey>
     </paper>
@@ -770,7 +770,7 @@
     </paper>
     <paper id="44">
       <title>Meaning Negotiation in Dialogue</title>
-      <author><first>Barbara</first><last>Lewandowska</last></author>
+      <author id="barbara-lewandowska-tomaszyk"><first>Barbara</first><last>Lewandowska</last></author>
       <url hash="45b87861">C82-2044</url>
       <bibkey>lewandowska-1982-meaning</bibkey>
     </paper>
@@ -783,7 +783,7 @@
     </paper>
     <paper id="46">
       <title>The Transfer of Finite Verb Forms in a Machine Translation System</title>
-      <author><first>Bente</first><last>Maegaard</last></author>
+      <author id="bente-maegaard"><first>Bente</first><last>Maegaard</last></author>
       <url hash="aeba7cf5">C82-2046</url>
       <bibkey>maegaard-1982-transfer</bibkey>
     </paper>
@@ -826,10 +826,10 @@
     </paper>
     <paper id="53">
       <title>A Formal Procedure for <fixed-case>B</fixed-case>ulgarian Word Form Generation</title>
-      <author><first>Elena</first><last>Paskaleva</last></author>
+      <author id="elena-paskaleva"><first>Elena</first><last>Paskaleva</last></author>
       <url hash="9842644a">C82-2053</url>
       <bibkey>paskaleva-bulgaria-1982-formal</bibkey>
-      <abstract/>
+      <abstract></abstract>
     </paper>
     <paper id="54">
       <title>On an Approach for Designing Linguistic Processors</title>
@@ -878,7 +878,7 @@
     </paper>
     <paper id="61">
       <title>Adverbs and Semantic Inferences</title>
-      <author><first>Madis</first><last>Saluveer</last></author>
+      <author id="madis-saluveer"><first>Madis</first><last>Saluveer</last></author>
       <url hash="d1717bb3">C82-2061</url>
       <bibkey>saluveer-1982-adverbs</bibkey>
     </paper>
diff --git a/data/xml/C86.xml b/data/xml/C86.xml
index 8393e0255d..30edb578d2 100644
--- a/data/xml/C86.xml
+++ b/data/xml/C86.xml
@@ -45,7 +45,7 @@
     </paper>
     <paper id="6">
       <title>User Models: The Problem of Disparity</title>
-      <author><first>Sandra</first><last>Carberry</last></author>
+      <author id="sandra-carberry"><first>Sandra</first><last>Carberry</last></author>
       <url hash="b76bc681">C86-1006</url>
       <bibkey>carberry-1986-user</bibkey>
     </paper>
@@ -58,13 +58,13 @@
     <paper id="8">
       <title>A Two-Level Dialogue Representation</title>
       <author><first>Giacomo</first><last>Ferrari</last></author>
-      <author><first>Ronan</first><last>Reilly</last></author>
+      <author id="ronan-g-reilly"><first>Ronan</first><last>Reilly</last></author>
       <url hash="3a7058fe">C86-1008</url>
       <bibkey>ferrari-reilly-1986-two</bibkey>
     </paper>
     <paper id="9">
       <title><fixed-case>INTERFACILE</fixed-case>: Linguistic Coverage and Query Reformulation</title>
-      <author><first>Yvette</first><last>Mathieu</last></author>
+      <author id="yannick-mathieu"><first>Yvette</first><last>Mathieu</last></author>
       <author><first>Paul</first><last>Sabatier</last></author>
       <url hash="9cf713dd">C86-1009</url>
       <bibkey>mathieu-sabatier-1986-interfacile</bibkey>
@@ -84,19 +84,19 @@
     </paper>
     <paper id="12">
       <title>Particle Homonymy and Machine Translation</title>
-      <author><first>Karoly</first><last>Fabricz</last></author>
+      <author id="karoly-fabricz"><first>Karoly</first><last>Fabricz</last></author>
       <url hash="94323ebb">C86-1012</url>
       <bibkey>fabricz-1986-particle</bibkey>
     </paper>
     <paper id="13">
       <title>Plurals, Cardinalities, and Structures of Determination</title>
-      <author><first>Christopher U.</first><last>Habel</last></author>
+      <author id="christopher-habel"><first>Christopher U.</first><last>Habel</last></author>
       <url hash="daead4af">C86-1013</url>
       <bibkey>habel-1986-plurals</bibkey>
     </paper>
     <paper id="14">
       <title>Processing Word Order Variation Within a Modified <fixed-case>ID/LP</fixed-case> Framework</title>
-      <author><first>Pradip</first><last>Dey</last></author>
+      <author id="pradip-dey"><first>Pradip</first><last>Dey</last></author>
       <url hash="53fa86ee">C86-1014</url>
       <bibkey>dey-1986-processing</bibkey>
     </paper>
@@ -120,7 +120,7 @@
     </paper>
     <paper id="18">
       <title>Conditioned Unification for Natural Language Processing</title>
-      <author><first>Koiti</first><last>Hasida</last></author>
+      <author id="koiti-hasida"><first>Koiti</first><last>Hasida</last></author>
       <url hash="be4216e7">C86-1018</url>
       <bibkey>hasida-1986-conditioned</bibkey>
     </paper>
@@ -140,13 +140,13 @@
     <paper id="21">
       <title>The Transfer Phase of the <fixed-case>M</fixed-case>u Machine Translation System</title>
       <author><first>Hakoto</first><last>Nagao</last></author>
-      <author><first>Jun-ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun-ichi</first><last>Tsujii</last></author>
       <url hash="6f460cff">C86-1021</url>
       <bibkey>nagao-tsujii-1986-transfer</bibkey>
     </paper>
     <paper id="22">
       <title>Lexical Transfer: A Missing Element in Linguistics Theories</title>
-      <author><first>Alan K.</first><last>Melby</last></author>
+      <author id="alan-k-melby"><first>Alan K.</first><last>Melby</last></author>
       <url hash="2c4aa512">C86-1022</url>
       <bibkey>melby-1986-lexical</bibkey>
     </paper>
@@ -172,7 +172,7 @@
     </paper>
     <paper id="26">
       <title>The need for <fixed-case>MT</fixed-case>-oriented versions of Case and Valency in <fixed-case>MT</fixed-case></title>
-      <author><first>Harold L.</first><last>Somers</last></author>
+      <author id="harold-somers"><first>Harold L.</first><last>Somers</last></author>
       <url hash="44615e05">C86-1026</url>
       <bibkey>somers-1986-need</bibkey>
     </paper>
@@ -191,29 +191,29 @@
     </paper>
     <paper id="29">
       <title>Solutions for Problems of <fixed-case>MT</fixed-case> Parser - Methods Used in <fixed-case>M</fixed-case>u-Machine Translation Project -</title>
-      <author><first>Jun-ichi</first><last>Nakamura</last></author>
-      <author><first>Jun-ichi</first><last>Tsujii</last></author>
-      <author><first>Makoto</first><last>Nagao</last></author>
+      <author id="jun-ichi-nakamura"><first>Jun-ichi</first><last>Nakamura</last></author>
+      <author id="junichi-tsujii"><first>Jun-ichi</first><last>Tsujii</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
       <url hash="0ad378f3">C86-1029</url>
       <bibkey>nakamura-etal-1986-solutions</bibkey>
     </paper>
     <paper id="30">
       <title>Strategies and Heuristics in the Analysis of a Natural Language in Machine Translation</title>
-      <author><first>Zaharin</first><last>Yusoff</last></author>
+      <author id="zaharin-yusoff"><first>Zaharin</first><last>Yusoff</last></author>
       <url hash="dc25c99a">C86-1030</url>
       <bibkey>yusoff-1986-strategies</bibkey>
     </paper>
     <paper id="31">
       <title>Parsing in Parallel</title>
       <author><first>Xiuming</first><last>Huang</last></author>
-      <author><first>Louise</first><last>Guthrie</last></author>
+      <author id="louise-guthrie"><first>Louise</first><last>Guthrie</last></author>
       <url hash="bc14061c">C86-1031</url>
       <bibkey>huang-guthrie-1986-parsing</bibkey>
     </paper>
     <paper id="32">
       <title>COMPUTATIONAL COMPARATIVE STUDIES ON <fixed-case>R</fixed-case>OMANCE LAGUAGES: A linguistic comparison of lexicon-grammars</title>
       <author><first>Annibale</first><last>Elia</last></author>
-      <author><first>Yvette</first><last>Mathieu</last></author>
+      <author id="yannick-mathieu"><first>Yvette</first><last>Mathieu</last></author>
       <url hash="ad06fb9d">C86-1032</url>
       <bibkey>elia-mathieu-1986-computational</bibkey>
     </paper>
@@ -225,40 +225,40 @@
     </paper>
     <paper id="34">
       <title>Parsing Without (Much) Phrase Structure</title>
-      <author><first>Michael B.</first><last>Kac</last></author>
-      <author><first>Alexis</first><last>Manaster-Ramer</last></author>
+      <author id="michael-b-kac"><first>Michael B.</first><last>Kac</last></author>
+      <author id="alexis-manaster-ramer"><first>Alexis</first><last>Manaster-Ramer</last></author>
       <url hash="1643fe78">C86-1034</url>
       <bibkey>kac-manaster-ramer-1986-parsing</bibkey>
     </paper>
     <paper id="35">
       <title>Reconnaissance-Attack Parsing</title>
-      <author><first>Michael B.</first><last>Kac</last></author>
+      <author id="michael-b-kac"><first>Michael B.</first><last>Kac</last></author>
       <author><first>Tom</first><last>Rindflesch</last></author>
-      <author><first>Karen L.</first><last>Ryan</last></author>
+      <author id="karen-l-ryan"><first>Karen L.</first><last>Ryan</last></author>
       <url hash="bcf06799">C86-1035</url>
       <bibkey>kac-etal-1986-reconnaissance</bibkey>
     </paper>
     <paper id="36">
       <title>Natural Language Interfaces - Ready for Commercial Success?</title>
-      <author><first>Wolfgang</first><last>Wahlster</last></author>
+      <author id="wolfgang-wahlster"><first>Wolfgang</first><last>Wahlster</last></author>
       <url hash="f96e684c">C86-1036</url>
       <bibkey>wahlster-1986-natural</bibkey>
     </paper>
     <paper id="37">
       <title>Requirements for Robust Natural Language Interfaces: The <fixed-case>L</fixed-case>anguage<fixed-case>C</fixed-case>raft and <fixed-case>XCALIBUR</fixed-case> experiences</title>
-      <author><first>Jaime G.</first><last>Carbonell</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime G.</first><last>Carbonell</last></author>
       <url hash="99da0cb3">C86-1037</url>
       <bibkey>carbonell-1986-requirements</bibkey>
     </paper>
     <paper id="38">
       <title><fixed-case>Q&amp;A</fixed-case>: Already a Success?</title>
-      <author><first>Gary G.</first><last>Hendrix</last></author>
+      <author id="gary-g-hendrix"><first>Gary G.</first><last>Hendrix</last></author>
       <url hash="db751013">C86-1038</url>
       <bibkey>hendrix-1986-q</bibkey>
     </paper>
     <paper id="39">
       <title>The Commercial Application of: Natural Language Interfaces</title>
-      <author><first>Harry</first><last>Tennant</last></author>
+      <author id="harry-tennant"><first>Harry</first><last>Tennant</last></author>
       <url hash="fd366cca">C86-1039</url>
       <bibkey>tennant-1986-commercial</bibkey>
     </paper>
@@ -277,23 +277,23 @@
     </paper>
     <paper id="42">
       <title>Linking Propositions</title>
-      <author><first>D. S.</first><last>Bree</last></author>
-      <author><first>R. A.</first><last>Smit</last></author>
+      <author id="d-s-bree"><first>D. S.</first><last>Bree</last></author>
+      <author id="r-a-smit"><first>R. A.</first><last>Smit</last></author>
       <url hash="8faa3a6e">C86-1042</url>
       <bibkey>bree-smit-1986-linking</bibkey>
     </paper>
     <paper id="43">
       <title>Discourse and Cohesion in Expository Text</title>
-      <author><first>Allen B.</first><last>Tucker</last></author>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="allen-b-tucker"><first>Allen B.</first><last>Tucker</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <author><first>Victor</first><last>Raskin</last></author>
       <url hash="82de5575">C86-1043</url>
       <bibkey>tucker-etal-1986-discourse</bibkey>
     </paper>
     <paper id="44">
       <title>Degrees of Understanding</title>
-      <author><first>Eva</first><last>Hajicova</last></author>
-      <author><first>Petr</first><last>Sgall</last></author>
+      <author id="eva-hajicova"><first>Eva</first><last>Hajicova</last></author>
+      <author id="petr-sgall"><first>Petr</first><last>Sgall</last></author>
       <url hash="431ec26f">C86-1044</url>
       <bibkey>hajicova-sgall-1986-degrees</bibkey>
     </paper>
@@ -319,9 +319,9 @@
     </paper>
     <paper id="48">
       <title>Tree Adjoining and Head Wrapping</title>
-      <author><first>K.</first><last>Vijay-Shanker</last></author>
-      <author><first>David J.</first><last>Weir</last></author>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="k-vijay-shanker"><first>K.</first><last>Vijay-Shanker</last></author>
+      <author id="david-weir"><first>David J.</first><last>Weir</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
       <url hash="0ea4e33f">C86-1048</url>
       <bibkey>vijay-shanker-etal-1986-tree</bibkey>
     </paper>
@@ -334,7 +334,7 @@
     </paper>
     <paper id="50">
       <title>A Simple Reconstruction of <fixed-case>GPSG</fixed-case></title>
-      <author><first>Stuart M.</first><last>Shieber</last></author>
+      <author id="stuart-m-shieber"><first>Stuart M.</first><last>Shieber</last></author>
       <url hash="5171a567">C86-1050</url>
       <bibkey>shieber-1986-simple</bibkey>
     </paper>
@@ -353,8 +353,8 @@
     </paper>
     <paper id="53">
       <title>Conceptual Lexicon Using an Object-Oriented Language</title>
-      <author><first>Shoichi</first><last>Yokoyama</last></author>
-      <author><first>Kenji</first><last>Hanakata</last></author>
+      <author id="shoichi-yokoyama"><first>Shoichi</first><last>Yokoyama</last></author>
+      <author id="kenji-hanakata"><first>Kenji</first><last>Hanakata</last></author>
       <url hash="fd853580">C86-1053</url>
       <bibkey>yokoyama-hanakata-1986-conceptual</bibkey>
     </paper>
@@ -396,7 +396,7 @@
     <paper id="59">
       <title>Disambiguation and Language Acquisition through the Phrasal Lexicon</title>
       <author><first>Uri</first><last>Zernik</last></author>
-      <author><first>Michael G.</first><last>Dyer</last></author>
+      <author id="michael-g-dyer"><first>Michael G.</first><last>Dyer</last></author>
       <url hash="fc2158c9">C86-1059</url>
       <bibkey>zernik-dyer-1986-disambiguation</bibkey>
     </paper>
@@ -461,13 +461,13 @@
     </paper>
     <paper id="68">
       <title>A Compression Technique for <fixed-case>A</fixed-case>rabic Dictionaries: The Affix Analysis</title>
-      <author><first>Abdelmajid</first><last>Ben Hamadou</last></author>
+      <author id="abdelmajid-ben-hamadou"><first>Abdelmajid</first><last>Ben Hamadou</last></author>
       <url hash="f986327c">C86-1068</url>
       <bibkey>ben-hamadou-1986-compression</bibkey>
     </paper>
     <paper id="69">
       <title>Machine Learning of Morphological Rules by Generalization and Analogy</title>
-      <author><first>Klaus</first><last>Wothke</last></author>
+      <author id="klaus-wothke"><first>Klaus</first><last>Wothke</last></author>
       <url hash="9eac2c7e">C86-1069</url>
       <bibkey>wothke-1986-machine</bibkey>
     </paper>
@@ -489,7 +489,7 @@
     </paper>
     <paper id="72">
       <title>Generating Semantic Structures in <fixed-case>EUROTRA-D</fixed-case></title>
-      <author><first>Erich</first><last>Steiner</last></author>
+      <author id="erich-h-steiner"><first>Erich</first><last>Steiner</last></author>
       <url hash="cc6e1936">C86-1072</url>
       <bibkey>steiner-1986-generating</bibkey>
     </paper>
@@ -513,7 +513,7 @@
     </paper>
     <paper id="76">
       <title><fixed-case>NARA</fixed-case>: A Two-way Simultaneous Interpretation System between <fixed-case>K</fixed-case>orean and <fixed-case>J</fixed-case>apanese -A methodological study-</title>
-      <author><first>Hee Sung</first><last>Chung</last></author>
+      <author id="hee-sung-chung"><first>Hee Sung</first><last>Chung</last></author>
       <author><first>Tosiyasu L.</first><last>Kunii</last></author>
       <url hash="0069b57b">C86-1076</url>
       <bibkey>chung-kunii-1986-nara</bibkey>
@@ -536,7 +536,7 @@
     </paper>
     <paper id="79">
       <title>A Metric for Computational Analysis of Meaning: Toward an Applied Theory of Linguistic Semantics</title>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <author><first>Victor</first><last>Raskin</last></author>
       <url hash="e18e0990">C86-1079</url>
       <bibkey>nirenburg-raskin-1986-metric</bibkey>
@@ -550,7 +550,7 @@
     <paper id="81">
       <title>A Logical Formalism for the Representation of Determiners</title>
       <author><first>Barbara</first><last>Di Eugenio</last></author>
-      <author><first>Leonardo</first><last>Lesmo</last></author>
+      <author id="leonardo-lesmo"><first>Leonardo</first><last>Lesmo</last></author>
       <author><first>Paolo</first><last>Pogliano</last></author>
       <author><first>Pietro</first><last>Torasso</last></author>
       <author><first>Francesco</first><last>Urbano</last></author>
@@ -559,7 +559,7 @@
     </paper>
     <paper id="82">
       <title>A Compositional Semantics for Directional Modifiers - Locative Case Reopened -</title>
-      <author><first>Erhard W.</first><last>Hinrichs</last></author>
+      <author id="erhard-hinrichs"><first>Erhard W.</first><last>Hinrichs</last></author>
       <url hash="3cf3e1b0">C86-1082</url>
       <bibkey>hinrichs-1986-compositional</bibkey>
     </paper>
@@ -583,19 +583,19 @@
       <author><first>Norbert</first><last>Reithinger</last></author>
       <author><first>Dagmar</first><last>Schmauks</last></author>
       <author><first>Karin</first><last>Harbusch</last></author>
-      <author><first>Wolfgang</first><last>Wahlster</last></author>
+      <author id="wolfgang-wahlster"><first>Wolfgang</first><last>Wahlster</last></author>
       <url hash="dc3ea189">C86-1085</url>
       <bibkey>kobsa-etal-1986-combining</bibkey>
     </paper>
     <paper id="86">
       <title>An Approach to Non-Singular Terms in Discourse</title>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
       <url hash="2a1c6bd6">C86-1086</url>
       <bibkey>strzalkowski-1986-approach</bibkey>
     </paper>
     <paper id="87">
       <title>Processing Clinical Narratives in <fixed-case>H</fixed-case>ungarian</title>
-      <author><first>Gabor</first><last>Proszeky</last></author>
+      <author id="gabor-proszeky"><first>Gabor</first><last>Proszeky</last></author>
       <url hash="08f2129d">C86-1087</url>
       <bibkey>proszeky-1986-processing</bibkey>
     </paper>
@@ -615,7 +615,7 @@
     </paper>
     <paper id="90">
       <title>On the Use of Term Associations in Automatic Information Retrieval</title>
-      <author><first>Gerard</first><last>Salton</last></author>
+      <author id="gerard-salton"><first>Gerard</first><last>Salton</last></author>
       <url hash="56da2b1c">C86-1090</url>
       <bibkey>salton-1986-use</bibkey>
     </paper>
@@ -635,7 +635,7 @@
     </paper>
     <paper id="93">
       <title><fixed-case>SCSL</fixed-case>: a linguistic specification language for <fixed-case>MT</fixed-case></title>
-      <author><first>Remi</first><last>Zajac</last></author>
+      <author id="remi-zajac"><first>Remi</first><last>Zajac</last></author>
       <url hash="a68bf9c2">C86-1093</url>
       <bibkey>zajac-1986-scsl</bibkey>
     </paper>
@@ -717,7 +717,7 @@
     <paper id="105">
       <title>An Attempt to Automatic Thesaurus Construction From an Ordinary <fixed-case>J</fixed-case>apanese Language Dictionary</title>
       <author><first>Hiroaki</first><last>Tsurumaru</last></author>
-      <author><first>Toru</first><last>Hitaka</last></author>
+      <author id="toru-hitaka"><first>Toru</first><last>Hitaka</last></author>
       <author><first>Sho</first><last>Yoshida</last></author>
       <url hash="8e0159dd">C86-1105</url>
       <bibkey>tsurumaru-etal-1986-attempt</bibkey>
@@ -738,7 +738,7 @@
     </paper>
     <paper id="108">
       <title>User Specification of Syntactic Case Frames in <fixed-case>TELI</fixed-case>, A Transportable, User-Customized Natural Language Processor</title>
-      <author><first>Bruce W.</first><last>Ballard</last></author>
+      <author id="bruce-w-ballard"><first>Bruce W.</first><last>Ballard</last></author>
       <url hash="fc387582">C86-1108</url>
       <bibkey>ballard-1986-user</bibkey>
     </paper>
@@ -765,7 +765,7 @@
     </paper>
     <paper id="112">
       <title>Generalized Memory Manipulating Actions for Parsing Natural Language</title>
-      <author><first>Irina</first><last>Prodanof</last></author>
+      <author id="irina-prodanof"><first>Irina</first><last>Prodanof</last></author>
       <author><first>Giacomo</first><last>Ferrari</last></author>
       <url hash="a94a5e8c">C86-1112</url>
       <bibkey>prodanof-ferrari-1986-generalized</bibkey>
@@ -778,20 +778,20 @@
     </paper>
     <paper id="114">
       <title>The Treatment of Movement-Rules in a <fixed-case>LFG</fixed-case>-Parser</title>
-      <author><first>Hans-Ulrich</first><last>Block</last></author>
+      <author id="hans-ulrich-block"><first>Hans-Ulrich</first><last>Block</last></author>
       <author><first>Hans</first><last>Haugeneder</last></author>
       <url hash="c58a2ec2">C86-1114</url>
       <bibkey>block-haugeneder-1986-treatment</bibkey>
     </paper>
     <paper id="115">
       <title>A Concept of Derivation for <fixed-case>LFG</fixed-case></title>
-      <author><first>Jurgen</first><last>Wedekind</last></author>
+      <author id="jurgen-wedekind"><first>Jurgen</first><last>Wedekind</last></author>
       <url hash="6c93035c">C86-1115</url>
       <bibkey>wedekind-1986-concept</bibkey>
     </paper>
     <paper id="116">
       <title>Incremental Construction of <fixed-case>C</fixed-case>- and <fixed-case>F</fixed-case>-Structure in a <fixed-case>LFG</fixed-case>-Parser</title>
-      <author><first>Hans-Ulrich</first><last>Block</last></author>
+      <author id="hans-ulrich-block"><first>Hans-Ulrich</first><last>Block</last></author>
       <author><first>Rudolf</first><last>Hunze</last></author>
       <url hash="fdc3b76c">C86-1116</url>
       <bibkey>block-hunze-1986-incremental</bibkey>
@@ -836,7 +836,7 @@
     </paper>
     <paper id="123">
       <title>Pragmatic Considerations in Man-Machine Discourse</title>
-      <author><first>Walther</first><last>v. Hahn</last></author>
+      <author id="walther-von-hahn"><first>Walther</first><last>v. Hahn</last></author>
       <url hash="c52aaabe">C86-1123</url>
       <bibkey>v-hahn-1986-pragmatic</bibkey>
     </paper>
@@ -850,41 +850,41 @@
     </paper>
     <paper id="125">
       <title>On Formalizations of <fixed-case>M</fixed-case>arcus’ Parser</title>
-      <author><first>R.</first><last>Nozohoor-Farshi</last></author>
+      <author id="r-nozohoor-farshi"><first>R.</first><last>Nozohoor-Farshi</last></author>
       <url hash="ca7d87f8">C86-1125</url>
       <bibkey>nozohoor-farshi-1986-formalizations</bibkey>
     </paper>
     <paper id="126">
       <title>A Grammar Used for Parsing and Generation</title>
       <author><first>Jean-Marie</first><last>Lancel</last></author>
-      <author><first>Francois</first><last>Rousselot</last></author>
-      <author><first>Nathalie</first><last>Simonin</last></author>
+      <author id="francois-rousselot"><first>Francois</first><last>Rousselot</last></author>
+      <author id="nathalie-simonin"><first>Nathalie</first><last>Simonin</last></author>
       <url hash="d7ee8e38">C86-1126</url>
       <bibkey>lancel-etal-1986-grammar</bibkey>
     </paper>
     <paper id="127">
       <title><fixed-case>BUILDRS</fixed-case>: An Implementation of <fixed-case>DR</fixed-case> Theory and <fixed-case>LFG</fixed-case></title>
       <author><first>Hajime</first><last>Wada</last></author>
-      <author><first>Nicholas</first><last>Asher</last></author>
+      <author id="nicholas-asher"><first>Nicholas</first><last>Asher</last></author>
       <url hash="e2a91593">C86-1127</url>
       <bibkey>wada-asher-1986-buildrs</bibkey>
     </paper>
     <paper id="128">
       <title>A <fixed-case>PROLOG</fixed-case> Implementation of <fixed-case>G</fixed-case>overnment-<fixed-case>B</fixed-case>inding <fixed-case>T</fixed-case>heory</title>
-      <author><first>Robert J.</first><last>Kuhns</last></author>
+      <author id="robert-j-kuhns"><first>Robert J.</first><last>Kuhns</last></author>
       <url hash="f73ac617">C86-1128</url>
       <bibkey>kuhns-1986-prolog</bibkey>
     </paper>
     <paper id="129">
       <title>A <fixed-case>L</fixed-case>exical <fixed-case>F</fixed-case>unctional <fixed-case>G</fixed-case>rammar System in <fixed-case>P</fixed-case>rolog</title>
-      <author><first>Andreas</first><last>Eisele</last></author>
-      <author><first>Jochen</first><last>Dorre</last></author>
+      <author id="andreas-eisele"><first>Andreas</first><last>Eisele</last></author>
+      <author id="jochen-dorre"><first>Jochen</first><last>Dorre</last></author>
       <url hash="e7d72282">C86-1129</url>
       <bibkey>eisele-dorre-1986-lexical</bibkey>
     </paper>
     <paper id="130">
       <title>Knowledge Structures for Natural Language Generation</title>
-      <author><first>Paul S.</first><last>Jacobs</last></author>
+      <author id="paul-s-jacobs"><first>Paul S.</first><last>Jacobs</last></author>
       <url hash="92876c91">C86-1130</url>
       <bibkey>jacobs-1986-knowledge</bibkey>
     </paper>
@@ -920,30 +920,30 @@
     </paper>
     <paper id="135">
       <title>Generating Natural Language Text in a Dialog System</title>
-      <author><first>Mare</first><last>Koit</last></author>
-      <author><first>Madis</first><last>Saluveer</last></author>
+      <author id="mare-koit"><first>Mare</first><last>Koit</last></author>
+      <author id="madis-saluveer"><first>Madis</first><last>Saluveer</last></author>
       <url hash="1ebc98f8">C86-1135</url>
       <bibkey>koit-saluveer-1986-generating</bibkey>
     </paper>
     <paper id="136">
       <title>Generating <fixed-case>E</fixed-case>nglish Paraphrases From Formal Relational Calculus Expressions</title>
-      <author><first>A.N.</first><last>De Roeck</last></author>
+      <author id="anne-de-roeck"><first>A.N.</first><last>De Roeck</last></author>
       <author><first>B.G.T.</first><last>Lowden</last></author>
       <url hash="9f99740c">C86-1136</url>
       <bibkey>de-roeck-lowden-1986-generating</bibkey>
     </paper>
     <paper id="137">
       <title>The computational complexity of sentence derivation in functional unification grammar</title>
-      <author><first>Graeme</first><last>Ritchie</last></author>
+      <author id="graeme-ritchie"><first>Graeme</first><last>Ritchie</last></author>
       <url hash="f6550ff1">C86-1137</url>
       <bibkey>ritchie-1986-computational</bibkey>
     </paper>
     <paper id="138">
       <title>Parsing Spoken Language: a Semantic Caseframe Approach</title>
       <author><first>Philip J.</first><last>Hayes</last></author>
-      <author><first>Alexander G.</first><last>Hauptmann</last></author>
-      <author><first>Jaime G.</first><last>Carbonell</last></author>
-      <author><first>Masaru</first><last>Tomita</last></author>
+      <author id="alexander-g-hauptmann"><first>Alexander G.</first><last>Hauptmann</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime G.</first><last>Carbonell</last></author>
+      <author id="masaru-tomita"><first>Masaru</first><last>Tomita</last></author>
       <url hash="c32f99a1">C86-1138</url>
       <bibkey>hayes-etal-1986-parsing</bibkey>
     </paper>
@@ -963,7 +963,7 @@
     <paper id="141">
       <title>Synthesis of Spoken Messages from Semantic Representations (Semantic-Representation-to-Speech System)</title>
       <author><first>Laurence</first><last>Danlos</last></author>
-      <author><first>Eric</first><last>Laporte</last></author>
+      <author id="eric-laporte"><first>Eric</first><last>Laporte</last></author>
       <author><first>Francoise</first><last>Emerard</last></author>
       <url hash="abdf74b7">C86-1141</url>
       <bibkey>danlos-etal-1986-synthesis</bibkey>
@@ -1013,22 +1013,22 @@
     </paper>
     <paper id="148">
       <title>On Knowledge-Based Machine Translation</title>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <author><first>Victor</first><last>Raskin</last></author>
-      <author><first>Allen</first><last>Tucker</last></author>
+      <author id="allen-b-tucker"><first>Allen</first><last>Tucker</last></author>
       <url hash="3066030c">C86-1148</url>
       <bibkey>nirenburg-etal-1986-knowledge</bibkey>
     </paper>
     <paper id="149">
       <title>Another Stride Towards Knowledge-Based Machine Translation</title>
-      <author><first>Masaru</first><last>Tomita</last></author>
-      <author><first>Jaime G.</first><last>Carbonell</last></author>
+      <author id="masaru-tomita"><first>Masaru</first><last>Tomita</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime G.</first><last>Carbonell</last></author>
       <url hash="4a76d97a">C86-1149</url>
       <bibkey>tomita-carbonell-1986-another</bibkey>
     </paper>
     <paper id="150">
       <title><fixed-case>E</fixed-case>nglish - <fixed-case>M</fixed-case>alay Translation System: A Laboratory Prototype</title>
-      <author><last>Tong</last><first>Loong-Cheong</first></author>
+      <author id="loong-cheong-tong"><first>Loong-Cheong</first><last>Tong</last></author>
       <url hash="748ae067">C86-1150</url>
       <bibkey>tong-1986-english</bibkey>
     </paper>
@@ -1055,13 +1055,13 @@
     </paper>
     <paper id="154">
       <title>When <fixed-case>M</fixed-case>ariko talks to <fixed-case>S</fixed-case>iegfried - Experiences from a <fixed-case>J</fixed-case>apanese/<fixed-case>G</fixed-case>erman Machine Translation Project-</title>
-      <author><first>Dietmar</first><last>Rosner</last></author>
+      <author id="dietmar-rosner"><first>Dietmar</first><last>Rosner</last></author>
       <url hash="fb62d3bb">C86-1154</url>
       <bibkey>rosner-1986-mariko</bibkey>
     </paper>
     <paper id="155">
       <title>Future Directions of Machine Translation</title>
-      <author><first>Jun-ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun-ichi</first><last>Tsujii</last></author>
       <url hash="98819309">C86-1155</url>
       <bibkey>tsujii-1986-future</bibkey>
     </paper>
diff --git a/data/xml/C88.xml b/data/xml/C88.xml
index 63d0313ed5..84dfed51ac 100644
--- a/data/xml/C88.xml
+++ b/data/xml/C88.xml
@@ -18,7 +18,7 @@
     </paper>
     <paper id="2">
       <title>Parsing <fixed-case>F</fixed-case>rench with <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammar: some linguistic accounts</title>
-      <author><first>Anne</first><last>Abeille</last></author>
+      <author id="anne-abeille"><first>Anne</first><last>Abeille</last></author>
       <url hash="5a686843">C88-1002</url>
       <bibkey>abeille-1988-parsing</bibkey>
     </paper>
@@ -36,21 +36,21 @@
     </paper>
     <paper id="5">
       <title>Efficiency Considerations for <fixed-case>LFG</fixed-case>-Parsers - Incremental and Table-Lookup Techniques</title>
-      <author><first>Istvan</first><last>Batori</last></author>
+      <author id="istvan-batori"><first>Istvan</first><last>Batori</last></author>
       <author><first>Stefan</first><last>Marok</last></author>
       <url hash="25fc46fc">C88-1005</url>
       <bibkey>batori-marok-1988-efficiency</bibkey>
     </paper>
     <paper id="6">
       <title>Morphology with Two-Level Rules and Negative Rule Features</title>
-      <author><first>John</first><last>Bear</last></author>
+      <author id="john-bear"><first>John</first><last>Bear</last></author>
       <url hash="b18c169b">C88-1006</url>
       <bibkey>bear-1988-morphology</bibkey>
     </paper>
     <paper id="7">
       <title>Machine Translation Using Isomorphic <fixed-case>UCG</fixed-case>s</title>
       <author><first>John L.</first><last>Beaven</last></author>
-      <author><first>Pete</first><last>Whitelock</last></author>
+      <author id="pete-whitelock"><first>Pete</first><last>Whitelock</last></author>
       <url hash="ba85fb39">C88-1007</url>
       <bibkey>beaven-whitelock-1988-machine</bibkey>
     </paper>
@@ -71,8 +71,8 @@
     <paper id="10">
       <title>Some Problems of Machine Translation Between Closely Related Languages</title>
       <author><first>Alevtina</first><last>Bemova</last></author>
-      <author><first>Karel</first><last>Oli̊va</last></author>
-      <author><first>Jarmila</first><last>Panevová</last></author>
+      <author id="karel-oliva"><first>Karel</first><last>Oli̊va</last></author>
+      <author id="jarmila-panevova"><first>Jarmila</first><last>Panevová</last></author>
       <url hash="6ea835d5">C88-1010</url>
       <bibkey>bemova-etal-1988-problems</bibkey>
     </paper>
@@ -85,9 +85,9 @@
     </paper>
     <paper id="12">
       <title>Software Support for Practical Grammar Development</title>
-      <author><first>Bran</first><last>Boguraev</last></author>
-      <author><first>John</first><last>Carroll</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="branimir-boguraev"><first>Bran</first><last>Boguraev</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <author><first>Claire</first><last>Grover</last></author>
       <url hash="b3c3595b">C88-1012</url>
       <bibkey>boguraev-etal-1988-software</bibkey>
@@ -101,7 +101,7 @@
     </paper>
     <paper id="14">
       <title>Co-Ordinative Ellipsis in <fixed-case>R</fixed-case>ussian Texts: Problems of Description and Restoration</title>
-      <author><first>Igor A.</first><last>Bolshakov</last></author>
+      <author id="igor-a-bolshakov"><first>Igor A.</first><last>Bolshakov</last></author>
       <url hash="a852e51a">C88-1014</url>
       <bibkey>bolshakov-1988-co</bibkey>
     </paper>
@@ -132,7 +132,7 @@
     </paper>
     <paper id="18">
       <title>Unification Categorial Grammar: A Concise, Extendable Grammar for Natural Language Processing</title>
-      <author><first>Jonathan</first><last>Calder</last></author>
+      <author id="jo-calder"><first>Jonathan</first><last>Calder</last></author>
       <author><first>Ewan</first><last>Klein</last></author>
       <author><first>Henk</first><last>Zeevat</last></author>
       <url hash="b045fac5">C88-1018</url>
@@ -140,7 +140,7 @@
     </paper>
     <paper id="19">
       <title>Acquisition of Semantic Information From an On-Line Dictionary</title>
-      <author><first>Nicoletta</first><last>Calzolari</last></author>
+      <author id="nicoletta-calzolari"><first>Nicoletta</first><last>Calzolari</last></author>
       <author><first>Eugenio</first><last>Picchi</last></author>
       <url hash="8b576522">C88-1019</url>
       <bibkey>calzolari-picchi-1988-acquisition</bibkey>
@@ -153,8 +153,8 @@
     </paper>
     <paper id="21">
       <title>Anaphora Resolution: A Multi-Strategy Approach</title>
-      <author><first>Jaime G.</first><last>Carbonell</last></author>
-      <author><first>Ralf D.</first><last>Brown</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime G.</first><last>Carbonell</last></author>
+      <author id="ralf-d-brown"><first>Ralf D.</first><last>Brown</last></author>
       <url hash="449daf13">C88-1021</url>
       <bibkey>carbonell-brown-1988-anaphora</bibkey>
     </paper>
@@ -166,7 +166,7 @@
     </paper>
     <paper id="23">
       <title>Unification and Transduction in Computational Phonology</title>
-      <author><first>Julie</first><last>Carson</last></author>
+      <author id="julie-carson-berndsen"><first>Julie</first><last>Carson</last></author>
       <url hash="c1674c7b">C88-1023</url>
       <bibkey>carson-1988-unification</bibkey>
     </paper>
@@ -200,14 +200,14 @@
     </paper>
     <paper id="28">
       <title><fixed-case>GRAFON</fixed-case>: A Grapheme-to-Phoneme Conversion System for <fixed-case>D</fixed-case>utch</title>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <url hash="08e79bcf">C88-1028</url>
       <bibkey>daelemans-1988-grafon</bibkey>
     </paper>
     <paper id="29">
       <title>Morphology and cross dependencies in the synthesis of personal pronouns in <fixed-case>R</fixed-case>omance languages</title>
       <author><first>Laurence</first><last>Danlos</last></author>
-      <author><first>Fiametta</first><last>Namer</last></author>
+      <author id="fiammetta-namer"><first>Fiametta</first><last>Namer</last></author>
       <url hash="96339078">C88-1029</url>
       <bibkey>danlos-namer-1988-morphology</bibkey>
     </paper>
@@ -221,7 +221,7 @@
     </paper>
     <paper id="31">
       <title>Stylistic Grammars in Language Translation</title>
-      <author><first>Chrysanne</first><last>DiMarco</last></author>
+      <author id="chrysanne-dimarco"><first>Chrysanne</first><last>DiMarco</last></author>
       <author><first>Graeme</first><last>Hirst</last></author>
       <url hash="74e5aa5a">C88-1031</url>
       <bibkey>dimarco-hirst-1988-stylistic</bibkey>
@@ -240,7 +240,7 @@
     </paper>
     <paper id="34">
       <title>Knowledge integration in a robust and efficient morpho-syntactic analyzer for <fixed-case>F</fixed-case>rench</title>
-      <author><first>Louisette</first><last>Emirkanian</last></author>
+      <author id="louisette-emirkanian"><first>Louisette</first><last>Emirkanian</last></author>
       <author><first>Lorne H.</first><last>Bouchard</last></author>
       <url hash="a006379c">C88-1034</url>
       <bibkey>emirkanian-bouchard-1988-knowledge</bibkey>
@@ -266,7 +266,7 @@
     <paper id="38">
       <title>Sequencing in a Connectionist Model of Language Processing</title>
       <author><first>Michael</first><last>Gasser</last></author>
-      <author><first>Michael G.</first><last>Dyer</last></author>
+      <author id="michael-g-dyer"><first>Michael G.</first><last>Dyer</last></author>
       <url hash="c8cbe740">C88-1038</url>
       <bibkey>gasser-dyer-1988-sequencing</bibkey>
     </paper>
@@ -316,7 +316,7 @@
     </paper>
     <paper id="45">
       <title>Formal Morphology</title>
-      <author><first>Jan</first><last>Hajic</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajic</last></author>
       <url hash="1602a03f">C88-1045</url>
       <bibkey>hajic-1988-formal</bibkey>
     </paper>
@@ -330,7 +330,7 @@
     </paper>
     <paper id="47">
       <title>A Cognitive Account of Unbounded Dependency</title>
-      <author><first>Koiti</first><last>Hasida</last></author>
+      <author id="koiti-hasida"><first>Koiti</first><last>Hasida</last></author>
       <url hash="bfd02434">C88-1047</url>
       <bibkey>hasida-1988-cognitive</bibkey>
     </paper>
@@ -377,34 +377,34 @@
     </paper>
     <paper id="54">
       <title>Achieving Bidirectionality</title>
-      <author><first>Paul S.</first><last>Jacobs</last></author>
+      <author id="paul-s-jacobs"><first>Paul S.</first><last>Jacobs</last></author>
       <url hash="5aa0759d">C88-1054</url>
       <bibkey>jacobs-1988-achieving</bibkey>
     </paper>
     <paper id="55">
       <title><fixed-case>C</fixed-case>oncretion: Assumption-Based Understanding</title>
-      <author><first>Paul S.</first><last>Jacobs</last></author>
+      <author id="paul-s-jacobs"><first>Paul S.</first><last>Jacobs</last></author>
       <url hash="285f8284">C88-1055</url>
       <bibkey>jacobs-1988-concretion</bibkey>
     </paper>
     <paper id="56">
       <title>Locally Governed Trees and Dependecncy Parsing</title>
-      <author><first>Harri</first><last>Jäppinen</last></author>
+      <author id="harri-jappinen"><first>Harri</first><last>Jäppinen</last></author>
       <author><first>Eero</first><last>Lassila</last></author>
-      <author><first>Aarno</first><last>Lehtola</last></author>
+      <author id="aarno-lehtola"><first>Aarno</first><last>Lehtola</last></author>
       <url hash="6450b16a">C88-1056</url>
       <bibkey>jappinen-etal-1988-locally</bibkey>
     </paper>
     <paper id="57">
       <title>Issues in Relating Syntax and Semantics</title>
-      <author><first>Daniel</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Daniel</first><last>Jurafsky</last></author>
       <url hash="db5cda2e">C88-1057</url>
       <bibkey>jurafsky-1988-issues</bibkey>
     </paper>
     <paper id="58">
       <title>Coordination in Reconnaissance-Attack Parsing</title>
-      <author><first>Michael B.</first><last>Kac</last></author>
-      <author><first>Thomas C.</first><last>Rindflesch</last></author>
+      <author id="michael-b-kac"><first>Michael B.</first><last>Kac</last></author>
+      <author id="thomas-c-rindflesch"><first>Thomas C.</first><last>Rindflesch</last></author>
       <url hash="11aef2a7">C88-1058</url>
       <bibkey>kac-rindflesch-1988-coordination</bibkey>
     </paper>
@@ -417,15 +417,15 @@
     </paper>
     <paper id="60">
       <title>An Algorithm for Functional Uncertainty</title>
-      <author><first>Ronald M.</first><last>Kaplan</last></author>
-      <author><first>John T.</first><last>Maxwell III</last></author>
+      <author id="ronald-m-kaplan"><first>Ronald M.</first><last>Kaplan</last></author>
+      <author id="john-t-maxwell-iii"><first>John T.</first><last>Maxwell III</last></author>
       <url hash="6cbf2a23">C88-1060</url>
       <bibkey>kaplan-maxwell-iii-1988-algorithm</bibkey>
     </paper>
     <paper id="61">
       <title>Constituent Coordination in <fixed-case>L</fixed-case>exical-<fixed-case>F</fixed-case>unctional <fixed-case>G</fixed-case>rammar</title>
-      <author><first>Ronald M.</first><last>Kaplan</last></author>
-      <author><first>John T.</first><last>Maxwell III</last></author>
+      <author id="ronald-m-kaplan"><first>Ronald M.</first><last>Kaplan</last></author>
+      <author id="john-t-maxwell-iii"><first>John T.</first><last>Maxwell III</last></author>
       <url hash="469055ab">C88-1061</url>
       <bibkey>kaplan-maxwell-iii-1988-constituent</bibkey>
     </paper>
@@ -438,7 +438,7 @@
     </paper>
     <paper id="63">
       <title>An Experimental Parser for Systemic Grammars</title>
-      <author><first>Robert T.</first><last>Kasper</last></author>
+      <author id="robert-t-kasper"><first>Robert T.</first><last>Kasper</last></author>
       <url hash="788eaf33">C88-1063</url>
       <bibkey>kasper-1988-experimental</bibkey>
     </paper>
@@ -470,14 +470,14 @@
     </paper>
     <paper id="68">
       <title>Constructing a Model of Dialog</title>
-      <author><first>Mare</first><last>Koit</last></author>
+      <author id="mare-koit"><first>Mare</first><last>Koit</last></author>
       <url hash="515d236e">C88-1068</url>
       <bibkey>koit-1988-constructing</bibkey>
     </paper>
     <paper id="69">
       <title>Complexity, Two-Level Morphology and <fixed-case>F</fixed-case>innish</title>
       <author><first>Kimmo</first><last>Koskenniemi</last></author>
-      <author><first>Kenneth Ward</first><last>Church</last></author>
+      <author id="kenneth-church"><first>Kenneth Ward</first><last>Church</last></author>
       <url hash="96db7a61">C88-1069</url>
       <bibkey>koskenniemi-church-1988-complexity</bibkey>
     </paper>
@@ -486,7 +486,7 @@
       <author><first>Ikuo</first><last>Kudo</last></author>
       <author><first>Hideya</first><last>Koshino</last></author>
       <author><first>Moonkyung</first><last>Chung</last></author>
-      <author><first>Tsuyosi</first><last>Morimoto</last></author>
+      <author id="tsuyoshi-morimoto"><first>Tsuyosi</first><last>Morimoto</last></author>
       <url hash="442fa8ad">C88-1070</url>
       <bibkey>kudo-etal-1988-schema</bibkey>
     </paper>
@@ -498,7 +498,7 @@
     </paper>
     <paper id="72">
       <title>A News Analysis System</title>
-      <author><first>Robert J.</first><last>Kuhns</last></author>
+      <author id="robert-j-kuhns"><first>Robert J.</first><last>Kuhns</last></author>
       <url hash="b1f0d210">C88-1072</url>
       <bibkey>kuhns-1988-news</bibkey>
     </paper>
@@ -512,7 +512,7 @@
       <title><fixed-case>SAGE</fixed-case> - a Sentence Parsing and Generation System</title>
       <author><first>Jean-Marie</first><last>Lancel</last></author>
       <author><first>Miyo</first><last>Otani</last></author>
-      <author><first>Nathalie</first><last>Simonin</last></author>
+      <author id="nathalie-simonin"><first>Nathalie</first><last>Simonin</last></author>
       <author><first>Laurence</first><last>Danlos</last></author>
       <url hash="e60e6d4d">C88-1074</url>
       <bibkey>lancel-etal-1988-sage</bibkey>
@@ -531,21 +531,21 @@
     </paper>
     <paper id="77">
       <title>Interpretation of Noun Phrases in Intensional Contexts</title>
-      <author><first>Leonardo</first><last>Lesmo</last></author>
+      <author id="leonardo-lesmo"><first>Leonardo</first><last>Lesmo</last></author>
       <author><first>Paolo</first><last>Terenziani</last></author>
       <url hash="6cc3d7ec">C88-1077</url>
       <bibkey>lesmo-terenziani-1988-interpretation</bibkey>
     </paper>
     <paper id="78">
       <title>Inheritance in Hierarchical Relational Structures</title>
-      <author><first>Derek P.</first><last>Long</last></author>
-      <author><first>Roberto</first><last>Garigliano</last></author>
+      <author id="derek-long"><first>Derek P.</first><last>Long</last></author>
+      <author id="roberto-garigliano"><first>Roberto</first><last>Garigliano</last></author>
       <url hash="2cdcb817">C88-1078</url>
       <bibkey>long-garigliano-1988-inheritance</bibkey>
     </paper>
     <paper id="79">
       <title>Designing and testing linguistic development phases in machine translation project</title>
-      <author><first>Bente</first><last>Maegaard</last></author>
+      <author id="bente-maegaard"><first>Bente</first><last>Maegaard</last></author>
       <url hash="d9f14d0a">C88-1079</url>
       <bibkey>maegaard-1988-designing</bibkey>
     </paper>
@@ -557,13 +557,13 @@
     </paper>
     <paper id="81">
       <title>Representing Regularities in the Metaphoric Lexicon</title>
-      <author><first>James H.</first><last>Martin</last></author>
+      <author id="james-h-martin"><first>James H.</first><last>Martin</last></author>
       <url hash="40ebffe8">C88-1081</url>
       <bibkey>martin-1988-representing</bibkey>
     </paper>
     <paper id="82">
       <title>Linguistic Processing Using a Dependency Structure Grammar for Speech Recognition and Understanding</title>
-      <author><first>Sho-ichi</first><last>Matsunaga</last></author>
+      <author id="shoichi-matsunaga"><first>Sho-ichi</first><last>Matsunaga</last></author>
       <author><first>Masaki</first><last>Kohda</last></author>
       <url hash="273c34ae">C88-1082</url>
       <bibkey>matsunaga-kohda-1988-linguistic</bibkey>
@@ -587,7 +587,7 @@
     </frontmatter>
     <paper id="84">
       <title>Lexical Transfer: Between a Source Rock and a Hard Target</title>
-      <author><first>Alan K.</first><last>Melby</last></author>
+      <author id="alan-k-melby"><first>Alan K.</first><last>Melby</last></author>
       <url hash="2abce9f4">C88-2084</url>
       <bibkey>melby-1988-lexical</bibkey>
     </paper>
@@ -599,7 +599,7 @@
     </paper>
     <paper id="86">
       <title>Solving Some Persistent Presupposition Problems</title>
-      <author><first>Robert E.</first><last>Mercer</last></author>
+      <author id="robert-e-mercer"><first>Robert E.</first><last>Mercer</last></author>
       <url hash="8cb11b4d">C88-2086</url>
       <bibkey>mercer-1988-solving</bibkey>
     </paper>
@@ -611,7 +611,7 @@
     </paper>
     <paper id="88">
       <title>Strategies for Effective Paraphrasing</title>
-      <author><first>Marie</first><last>Meteer</last></author>
+      <author id="marie-meteer"><first>Marie</first><last>Meteer</last></author>
       <author><first>Varda</first><last>Shaked</last></author>
       <url hash="c21f192b">C88-2088</url>
       <bibkey>meteer-shaked-1988-strategies</bibkey>
@@ -631,37 +631,37 @@
     </paper>
     <paper id="91">
       <title><fixed-case>PANEL</fixed-case>: Language Engineering: The Real Bottle Neck of Natural Language Processing</title>
-      <author><first>Makoto</first><last>Nagao</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
       <url hash="976ea221">C88-2091</url>
       <bibkey>nagao-1988-panel</bibkey>
     </paper>
     <paper id="92">
       <title>Why Computational Grammarians Can Be Skeptical About Existing Linguistic Theories</title>
-      <author><first>Karen</first><last>Jensen</last></author>
+      <author id="karen-jensen"><first>Karen</first><last>Jensen</last></author>
       <url hash="82d5f452">C88-2092</url>
       <bibkey>jensen-1988-computational</bibkey>
     </paper>
     <paper id="93">
       <title>Why Implementors of Practical <fixed-case>NLP</fixed-case> Systems Can not Wait for Linguistic Theories Remarks and Theses</title>
-      <author><first>Dietmar</first><last>Roesner</last></author>
+      <author id="dietmar-rosner"><first>Dietmar</first><last>Roesner</last></author>
       <url hash="c4bfc530">C88-2093</url>
       <bibkey>roesner-1988-implementors</bibkey>
     </paper>
     <paper id="94">
       <title>Reasons Why We Use Dependency Grammar</title>
-      <author><first>Eva</first><last>Hajicova</last></author>
+      <author id="eva-hajicova"><first>Eva</first><last>Hajicova</last></author>
       <url hash="80ebd034">C88-2094</url>
       <bibkey>hajicova-1988-reasons</bibkey>
     </paper>
     <paper id="95">
       <title>Reasons why <fixed-case>I</fixed-case> do not care grammar formalism</title>
-      <author><first>Jun-ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun-ichi</first><last>Tsujii</last></author>
       <url hash="d63ed159">C88-2095</url>
       <bibkey>tsujii-1988-reasons</bibkey>
     </paper>
     <paper id="96">
       <title>“Linguistic” Sentences and “Real” Sentences</title>
-      <author><first>Masaru</first><last>Tomita</last></author>
+      <author id="masaru-tomita"><first>Masaru</first><last>Tomita</last></author>
       <url hash="c0511b1c">C88-2096</url>
       <bibkey>tomita-1988-linguistic</bibkey>
     </paper>
@@ -674,8 +674,8 @@
     </paper>
     <paper id="98">
       <title>Extraction of Semantic Information from an Ordinary <fixed-case>E</fixed-case>nglish Dictionary and its Evaluation</title>
-      <author><first>Jun-ichi</first><last>Nakamura</last></author>
-      <author><first>Makoto</first><last>Nagao</last></author>
+      <author id="jun-ichi-nakamura"><first>Jun-ichi</first><last>Nakamura</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
       <url hash="b891de34">C88-2098</url>
       <bibkey>nakamura-nagao-1988-extraction</bibkey>
     </paper>
@@ -688,8 +688,8 @@
     </paper>
     <paper id="100">
       <title>A Framework for Lexical Selection in Natural Language Generation</title>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
-      <author><first>Irene</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="irene-nirenburg"><first>Irene</first><last>Nirenburg</last></author>
       <url hash="b7c6db03">C88-2100</url>
       <bibkey>nirenburg-nirenburg-1988-framework</bibkey>
     </paper>
@@ -704,7 +704,7 @@
     </paper>
     <paper id="102">
       <title>Maintaining Consistency and Plausibility in Integrated Natural Language Understanding</title>
-      <author><first>Toyoaki</first><last>Nishida</last></author>
+      <author id="toyoaki-nishida"><first>Toyoaki</first><last>Nishida</last></author>
       <author><first>Xuemin</first><last>Liu</last></author>
       <author><first>Shuji</first><last>Doshita</last></author>
       <author><first>Atsushi</first><last>Yamada</last></author>
@@ -715,20 +715,20 @@
       <title>Parsing with look-ahead in real-time on-line translation system</title>
       <author><first>Hiroyasu</first><last>Nogami</last></author>
       <author><first>Yumiko</first><last>Yoshimura</last></author>
-      <author><first>Shin-ya</first><last>Amano</last></author>
+      <author id="shin-ya-amano"><first>Shin-ya</first><last>Amano</last></author>
       <url hash="58357b70">C88-2103</url>
       <bibkey>nogami-etal-1988-parsing</bibkey>
     </paper>
     <paper id="104">
       <title>Syntactic Functions in <fixed-case>GPSG</fixed-case></title>
-      <author><first>Karel</first><last>Oli̊va</last></author>
+      <author id="karel-oliva"><first>Karel</first><last>Oli̊va</last></author>
       <url hash="bdf4f141">C88-2104</url>
       <bibkey>oliva-1988-syntactic</bibkey>
     </paper>
     <paper id="105">
       <title>List Automata With Syntactically Structured Output</title>
-      <author><first>Karel</first><last>Oli̊va</last></author>
-      <author><first>Martin</first><last>Platek</last></author>
+      <author id="karel-oliva"><first>Karel</first><last>Oli̊va</last></author>
+      <author id="martin-platek"><first>Martin</first><last>Platek</last></author>
       <url hash="606febaf">C88-2105</url>
       <bibkey>oliva-platek-1988-list</bibkey>
     </paper>
@@ -748,7 +748,7 @@
     </paper>
     <paper id="108">
       <title>New Dependency Based Specification of Underlying Representations of Sentences</title>
-      <author><first>Vladimir</first><last>Petkevic</last></author>
+      <author id="vladimir-petkevic"><first>Vladimir</first><last>Petkevic</last></author>
       <url hash="bf59da56">C88-2108</url>
       <bibkey>petkevic-1988-new</bibkey>
     </paper>
@@ -760,14 +760,14 @@
     </paper>
     <paper id="110">
       <title>On The Semantic Interpretation of Nominals</title>
-      <author><first>James</first><last>Pustejovsky</last></author>
-      <author><first>Peter G.</first><last>Anick</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
+      <author id="peter-anick"><first>Peter G.</first><last>Anick</last></author>
       <url hash="5d3bccd1">C88-2110</url>
       <bibkey>pustejovsky-anick-1988-semantic</bibkey>
     </paper>
     <paper id="111">
       <title>Using a Logic Grammar to Learn a Lexicon</title>
-      <author><first>Manny</first><last>Rayner</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
       <author><first>Asa</first><last>Hugosson</last></author>
       <author><first>Goran</first><last>Hagert</last></author>
       <url hash="c6787c48">C88-2111</url>
@@ -778,22 +778,22 @@
       <author><first>Walter</first><last>Read</last></author>
       <author><first>Alex</first><last>Quilici</last></author>
       <author><first>John</first><last>Reeves</last></author>
-      <author><first>Michael</first><last>Dyer</last></author>
+      <author id="michael-g-dyer"><first>Michael</first><last>Dyer</last></author>
       <url hash="5c13a911">C88-2112</url>
       <bibkey>read-etal-1988-evaluating</bibkey>
     </paper>
     <paper id="113">
       <title>Parallel Intersection and Serial Composition of Finite State Transducers</title>
-      <author><first>Mike</first><last>Reape</last></author>
-      <author><first>Henry</first><last>Thompson</last></author>
+      <author id="mike-reape"><first>Mike</first><last>Reape</last></author>
+      <author id="henry-s-thompson"><first>Henry</first><last>Thompson</last></author>
       <url hash="d6a25395">C88-2113</url>
       <bibkey>reape-thompson-1988-parallel</bibkey>
     </paper>
     <paper id="114">
       <title>Framework for a Model of Dialogue</title>
-      <author><first>Ronan</first><last>Reilly</last></author>
+      <author id="ronan-g-reilly"><first>Ronan</first><last>Reilly</last></author>
       <author><first>Giacomo</first><last>Ferrari</last></author>
-      <author><first>Irina</first><last>Prodanof</last></author>
+      <author id="irina-prodanof"><first>Irina</first><last>Prodanof</last></author>
       <url hash="f174de75">C88-2114</url>
       <bibkey>reilly-etal-1988-framework</bibkey>
     </paper>
@@ -811,27 +811,27 @@
     </paper>
     <paper id="117">
       <title>Default Logic, Natural Language and Generalized Quantifiers</title>
-      <author><first>Patrick</first><last>Saint-Dizier</last></author>
+      <author id="patrick-saint-dizier"><first>Patrick</first><last>Saint-Dizier</last></author>
       <url hash="08343267">C88-2117</url>
       <bibkey>saint-dizier-1988-default</bibkey>
     </paper>
     <paper id="118">
       <title>Parsing Noisy Sentences</title>
       <author><first>Hiroaki</first><last>Saito</last></author>
-      <author><first>Masaru</first><last>Tomita</last></author>
+      <author id="masaru-tomita"><first>Masaru</first><last>Tomita</last></author>
       <url hash="b69f4f3c">C88-2118</url>
       <bibkey>saito-tomita-1988-parsing</bibkey>
     </paper>
     <paper id="119">
       <title>A New Strategy for Providing Definitions In Task-Oriented Dialogues</title>
       <author><first>Margaret H.</first><last>Sarner</last></author>
-      <author><first>Sandra</first><last>Carberry</last></author>
+      <author id="sandra-carberry"><first>Sandra</first><last>Carberry</last></author>
       <url hash="ac002d08">C88-2119</url>
       <bibkey>sarner-carberry-1988-new</bibkey>
     </paper>
     <paper id="120">
       <title>An Augmented Context Free Grammar for Discourse</title>
-      <author><first>Remko</first><last>Scha</last></author>
+      <author id="remko-scha"><first>Remko</first><last>Scha</last></author>
       <author><first>Livia</first><last>Polanyi</last></author>
       <url hash="683ea854">C88-2120</url>
       <bibkey>scha-polanyi-1988-augmented</bibkey>
@@ -839,8 +839,8 @@
     <paper id="121">
       <title>Parsing Strategies with ‘Lexicalized’ Grammars: Application to <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars</title>
       <author><first>Yves</first><last>Schabes</last></author>
-      <author><first>Anne</first><last>Abeille</last></author>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="anne-abeille"><first>Anne</first><last>Abeille</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
       <url hash="7a412ba8">C88-2121</url>
       <bibkey>schabes-etal-1988-parsing</bibkey>
     </paper>
@@ -861,7 +861,7 @@
       <title><fixed-case>PANEL</fixed-case> Parallel Processing in Computational Linguistics</title>
       <author><first>Helmut</first><last>Schnelle</last></author>
       <author><first>Garry</first><last>Cottrell</last></author>
-      <author><first>Paradip</first><last>Dey</last></author>
+      <author id="pradip-dey"><first>Paradip</first><last>Dey</last></author>
       <author><first>Peter A.</first><last>Reich</last></author>
       <author><first>Lokendra</first><last>Shastri</last></author>
       <url hash="0c41d0e6">C88-2124</url>
@@ -887,7 +887,7 @@
     </paper>
     <paper id="128">
       <title>A Uniform Architecture for Parsing and Generation</title>
-      <author><first>Stuart M.</first><last>Shieber</last></author>
+      <author id="stuart-m-shieber"><first>Stuart M.</first><last>Shieber</last></author>
       <url hash="9efbf5ea">C88-2128</url>
       <bibkey>shieber-1988-uniform</bibkey>
     </paper>
@@ -903,13 +903,13 @@
       <title>Directing the Generation of Living Space Descriptions</title>
       <author><first>Penelope</first><last>Sibun</last></author>
       <author><first>Alison K.</first><last>Huettner</last></author>
-      <author><first>David D.</first><last>McDonald</last></author>
+      <author id="david-d-mcdonald"><first>David D.</first><last>McDonald</last></author>
       <url hash="0a0db3a3">C88-2130</url>
       <bibkey>sibun-etal-1988-directing</bibkey>
     </paper>
     <paper id="131">
       <title>On the Semantics of Focus Phenomena in <fixed-case>E</fixed-case>urotra</title>
-      <author><first>Erich H.</first><last>Steiner</last></author>
+      <author id="erich-h-steiner"><first>Erich H.</first><last>Steiner</last></author>
       <author><first>Jutta</first><last>Winter-Thielen</last></author>
       <url hash="2258ba64">C88-2131</url>
       <bibkey>steiner-winter-thielen-1988-semantics</bibkey>
@@ -937,7 +937,7 @@
     </paper>
     <paper id="135">
       <title>A Computer Readability Formula of <fixed-case>J</fixed-case>apanese Texts for Machine Scoring</title>
-      <author><last>Tateisi</last><first>Yuka</first></author>
+      <author id="yuka-tateisi"><first>Yuka</first><last>Tateisi</last></author>
       <author><last>Ono</last><first>Yoshihiko</first></author>
       <author><last>Yamada</last><first>Hisao</first></author>
       <url hash="3e8d09c3">C88-2135</url>
@@ -955,44 +955,44 @@
     <paper id="137">
       <title>Application of the Direct Memory Access paradigm to natural language interlaces to knowledge-based systems</title>
       <author><first>Hideto</first><last>Tomabechi</last></author>
-      <author><first>Masaru</first><last>Tomita</last></author>
+      <author id="masaru-tomita"><first>Masaru</first><last>Tomita</last></author>
       <url hash="b71c4a56">C88-2137</url>
       <bibkey>tomabechi-tomita-1988-application</bibkey>
     </paper>
     <paper id="138">
       <title>Combining Lexicon-Driven Parsing and Phrase-Structure-Based Parsing</title>
-      <author><first>Masaru</first><last>Tomita</last></author>
+      <author id="masaru-tomita"><first>Masaru</first><last>Tomita</last></author>
       <url hash="3ad06cfc">C88-2138</url>
       <bibkey>tomita-1988-combining</bibkey>
     </paper>
     <paper id="139">
       <title>Linguistic Contributions to Text-to-Speech Computer Prorgrams for <fixed-case>F</fixed-case>rench</title>
       <author><first>Pierre</first><last>Trescases</last></author>
-      <author><first>Matthew</first><last>Crocker</last></author>
+      <author id="matthew-crocker"><first>Matthew</first><last>Crocker</last></author>
       <url hash="7c48121d">C88-2139</url>
       <bibkey>trescases-crocker-1988-linguistic</bibkey>
     </paper>
     <paper id="140">
       <title>On the Interaction of Syntax and Semantics in a Syntactically Guided Caseframe Parser</title>
-      <author><first>Harald</first><last>Trost</last></author>
-      <author><first>Ernst</first><last>Buchberger</last></author>
+      <author id="harald-trost"><first>Harald</first><last>Trost</last></author>
+      <author id="ernst-buchberger"><first>Ernst</first><last>Buchberger</last></author>
       <author><first>Wolfgang</first><last>Heinz</last></author>
       <url hash="12fec04a">C88-2140</url>
       <bibkey>trost-etal-1988-interaction</bibkey>
     </paper>
     <paper id="141">
       <title>How to Get Preferred Readings in Natural Language Analysis</title>
-      <author><first>Jun-ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun-ichi</first><last>Tsujii</last></author>
       <author><first>Yukiyoshi</first><last>Muto</last></author>
       <author><first>Yuuji</first><last>Ikeda</last></author>
-      <author><first>Makoto</first><last>Nagao</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
       <url hash="6c4ef7a4">C88-2141</url>
       <bibkey>tsujii-etal-1988-get</bibkey>
     </paper>
     <paper id="142">
       <title>Dialogue Translation vs. Text Translation</title>
-      <author><first>Jun-ichi</first><last>Tsujii</last></author>
-      <author><first>Makoto</first><last>Nagao</last></author>
+      <author id="junichi-tsujii"><first>Jun-ichi</first><last>Tsujii</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
       <url hash="7bd30e1e">C88-2142</url>
       <bibkey>tsujii-nagao-1988-dialogue</bibkey>
     </paper>
@@ -1005,7 +1005,7 @@
     </paper>
     <paper id="144">
       <title>The Analysis of Tense and Aspect in <fixed-case>E</fixed-case>urotra</title>
-      <author><first>Frank</first><last>van Eynde</last></author>
+      <author id="frank-van-eynde"><first>Frank</first><last>van Eynde</last></author>
       <url hash="3acb83c7">C88-2144</url>
       <bibkey>van-eynde-1988-analysis</bibkey>
     </paper>
@@ -1017,13 +1017,13 @@
     </paper>
     <paper id="146">
       <title>Morphosyntactic correction in natural language interfaces</title>
-      <author><first>Jean</first><last>Veronis</last></author>
+      <author id="jean-veronis"><first>Jean</first><last>Veronis</last></author>
       <url hash="25de7cea">C88-2146</url>
       <bibkey>veronis-1988-morphosyntactic</bibkey>
     </paper>
     <paper id="147">
       <title>Feature Structures Based <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars</title>
-      <author><first>K.</first><last>Vijay-Shanker</last></author>
+      <author id="k-vijay-shanker"><first>K.</first><last>Vijay-Shanker</last></author>
       <author id="aravind-joshi"><first>A.K.</first><last>Joshi</last></author>
       <url hash="9123b500">C88-2147</url>
       <bibkey>vijay-shanker-joshi-1988-feature</bibkey>
@@ -1036,13 +1036,13 @@
     </paper>
     <paper id="149">
       <title>Issues in Word Choice</title>
-      <author><first>Nigel</first><last>Ward</last></author>
+      <author id="nigel-ward"><first>Nigel</first><last>Ward</last></author>
       <url hash="c2f4a65f">C88-2149</url>
       <bibkey>ward-1988-issues</bibkey>
     </paper>
     <paper id="150">
       <title>Generation as Structure Driven Derivation</title>
-      <author><first>Jurgen</first><last>Wedekind</last></author>
+      <author id="jurgen-wedekind"><first>Jurgen</first><last>Wedekind</last></author>
       <url hash="8bfde909">C88-2150</url>
       <bibkey>wedekind-1988-generation</bibkey>
     </paper>
@@ -1060,9 +1060,9 @@
     </paper>
     <paper id="153">
       <title>Machine Tractable Dictionaries as Tools and Resources for Natural Language Processing</title>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <author><first>Dan</first><last>Fass</last></author>
-      <author><first>Cheng-ming</first><last>Guo</last></author>
+      <author id="cheng-ming-guo"><first>Cheng-ming</first><last>Guo</last></author>
       <author><first>James E.</first><last>McDonald</last></author>
       <author><first>Tony</first><last>Plate</last></author>
       <author><first>Brian M.</first><last>Slator</last></author>
@@ -1077,15 +1077,15 @@
     </paper>
     <paper id="155">
       <title>Machine Translation for Monolinguals</title>
-      <author><first>Mary McGee</first><last>Wood</last></author>
-      <author><first>Brian J.</first><last>Chandler</last></author>
+      <author id="mary-mcgee-wood"><first>Mary McGee</first><last>Wood</last></author>
+      <author id="brian-j-chandler"><first>Brian J.</first><last>Chandler</last></author>
       <url hash="d5400f67">C88-2155</url>
       <bibkey>wood-chandler-1988-machine</bibkey>
     </paper>
     <paper id="156">
       <title>Figuring out Most Plausible Interpretation from Spatial Descriptions</title>
       <author><first>Atsushi</first><last>Yamada</last></author>
-      <author><first>Toyoaki</first><last>Nishida</last></author>
+      <author id="toyoaki-nishida"><first>Toyoaki</first><last>Nishida</last></author>
       <author><first>Shuji</first><last>Doshita</last></author>
       <url hash="4d88c4ac">C88-2156</url>
       <bibkey>yamada-etal-1988-figuring</bibkey>
@@ -1106,13 +1106,13 @@
     </paper>
     <paper id="159">
       <title>Identifying Zero Pronouns in <fixed-case>J</fixed-case>apanese Dialogue</title>
-      <author><first>Kei</first><last>Yoshimoto</last></author>
+      <author id="kei-yoshimoto"><first>Kei</first><last>Yoshimoto</last></author>
       <url hash="2e9b2a3b">C88-2159</url>
       <bibkey>yoshimoto-1988-identifying</bibkey>
     </paper>
     <paper id="160">
       <title>Interactive Translation: a new approach</title>
-      <author><first>Remi</first><last>Zajac</last></author>
+      <author id="remi-zajac"><first>Remi</first><last>Zajac</last></author>
       <url hash="b643accc">C88-2160</url>
       <bibkey>zajac-1988-interactive</bibkey>
     </paper>
@@ -1153,7 +1153,7 @@
     </paper>
     <paper id="166">
       <title><fixed-case>COMPLEX</fixed-case>: A Computational Lexicon for Natural Language Systems</title>
-      <author><first>Judith</first><last>Klavans</last></author>
+      <author id="judith-l-klavans"><first>Judith</first><last>Klavans</last></author>
       <url hash="c80e68a3">C88-2166</url>
       <bibkey>klavans-1988-complex</bibkey>
     </paper>
diff --git a/data/xml/C90.xml b/data/xml/C90.xml
index 7e8670f2d8..9dc7efe592 100644
--- a/data/xml/C90.xml
+++ b/data/xml/C90.xml
@@ -18,7 +18,7 @@
     <paper id="2">
       <title>Design of a Hybrid Deterministic Parser</title>
       <author><first>Kanaan A.</first><last>Faisal</last></author>
-      <author><first>Stan C.</first><last>Kwasny</last></author>
+      <author id="stan-c-kwasny"><first>Stan C.</first><last>Kwasny</last></author>
       <url hash="49f0d625">C90-1002</url>
       <bibkey>faisal-kwasny-1990-design</bibkey>
     </paper>
@@ -38,7 +38,7 @@
     <paper id="5">
       <title>Tagging for Learning: Collecting Thematic Relations from Corpus</title>
       <author><first>Uri</first><last>Zernik</last></author>
-      <author><first>Paul</first><last>Jacobs</last></author>
+      <author id="paul-s-jacobs"><first>Paul</first><last>Jacobs</last></author>
       <url hash="13c74a47">C90-1005</url>
       <bibkey>zernik-jacobs-1990-tagging</bibkey>
     </paper>
@@ -94,13 +94,13 @@
     </paper>
     <paper id="12">
       <title>The Generalized <fixed-case>LR</fixed-case> Parser/Compiler V8-4: A Software Package for Practical <fixed-case>NL</fixed-case> Projects</title>
-      <author><first>Masaru</first><last>Tomita</last></author>
+      <author id="masaru-tomita"><first>Masaru</first><last>Tomita</last></author>
       <url hash="9dc566e8">C90-1012</url>
       <bibkey>tomita-1990-generalized</bibkey>
     </paper>
     <paper id="13">
       <title>Generation for Dialogue Translation Using Typed Feature Structure Unification</title>
-      <author><first>Yoshihiro</first><last>Ueda</last></author>
+      <author id="yoshihiro-ueda"><first>Yoshihiro</first><last>Ueda</last></author>
       <author><first>Kiyoshi</first><last>Kogure</last></author>
       <url hash="0ed96840">C90-1013</url>
       <bibkey>ueda-kogure-1990-generation</bibkey>
@@ -125,7 +125,7 @@
     </paper>
     <paper id="17">
       <title><fixed-case>STS</fixed-case>: An Experimental Sentence Translation System</title>
-      <author><first>Eric</first><last>Wehrli</last></author>
+      <author id="eric-wehrli"><first>Eric</first><last>Wehrli</last></author>
       <url hash="d86b5f44">C90-1017</url>
       <bibkey>wehrli-1990-sts</bibkey>
     </paper>
@@ -137,7 +137,7 @@
     </paper>
     <paper id="19">
       <title>Deep Sentence Understanding in a Restricted Domain</title>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <author><first>Marc</first><last>Cavazza</last></author>
       <url hash="c6ddbb1d">C90-1019</url>
       <bibkey>zweigenbaum-cavazza-1990-deep</bibkey>
@@ -207,14 +207,14 @@
     </paper>
     <paper id="2">
       <title>An Application of Lexical Semantics to Knowledge Acquisition from Corpora</title>
-      <author><first>Peter</first><last>Anick</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="peter-anick"><first>Peter</first><last>Anick</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <url hash="345e2ad6">C90-2002</url>
       <bibkey>anick-pustejovsky-1990-application</bibkey>
     </paper>
     <paper id="3">
       <title>Finding Translation Equivalents: An Application of Grammatical Metaphor</title>
-      <author><first>John A.</first><last>Bateman</last></author>
+      <author id="john-bateman"><first>John A.</first><last>Bateman</last></author>
       <url hash="481ac45e">C90-2003</url>
       <bibkey>bateman-1990-finding</bibkey>
     </paper>
@@ -234,22 +234,22 @@
     </paper>
     <paper id="6">
       <title>Towards Personal <fixed-case>MT</fixed-case>: general design, dialogue structure, potential role of speech</title>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <url hash="1a69e71f">C90-2006</url>
       <bibkey>boitet-1990-towards</bibkey>
     </paper>
     <paper id="7">
       <title>Lexical Ambiguity and The Role of Knowledge Representation in Lexicon Design</title>
-      <author><first>Branimir</first><last>Boguraev</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="branimir-boguraev"><first>Branimir</first><last>Boguraev</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <url hash="45c19169">C90-2007</url>
       <bibkey>boguraev-pustejovsky-1990-lexical</bibkey>
     </paper>
     <paper id="8">
       <title>Enjoy the Paper: Lexicology</title>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <author><first>Ann</first><last>Copestake</last></author>
-      <author><first>Bran</first><last>Boguraev</last></author>
+      <author id="branimir-boguraev"><first>Bran</first><last>Boguraev</last></author>
       <url hash="2443157b">C90-2008</url>
       <bibkey>briscoe-etal-1990-enjoy</bibkey>
     </paper>
@@ -261,7 +261,7 @@
     </paper>
     <paper id="10">
       <title>Information-based Case Grammar</title>
-      <author><first>Keh-jiann</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-jiann</first><last>Chen</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <url hash="c86e6022">C90-2010</url>
       <bibkey>chen-huang-1990-information</bibkey>
@@ -269,8 +269,8 @@
     <paper id="11">
       <title>An Augmented Chart Data Structure with Efficient Word Lattice Parsing Scheme In Speech Recognition Applications</title>
       <author><first>Lee-Feng</first><last>Chien</last></author>
-      <author><first>K. J.</first><last>Chen</last></author>
-      <author><first>Lin-Shan</first><last>Lee</last></author>
+      <author id="keh-jiann-chen"><first>K. J.</first><last>Chen</last></author>
+      <author id="lin-shan-lee"><first>Lin-Shan</first><last>Lee</last></author>
       <url hash="9c2d5195">C90-2011</url>
       <bibkey>chien-etal-1990-augmented</bibkey>
     </paper>
@@ -283,7 +283,7 @@
     <paper id="13">
       <title>Modeling syntactic constraints on anaphoric binding</title>
       <author><first>Mary</first><last>Dalrymple</last></author>
-      <author><first>John</first><last>Maxwell</last></author>
+      <author id="john-t-maxwell-iii"><first>John</first><last>Maxwell</last></author>
       <author><first>Annie</first><last>Zaenen</last></author>
       <url hash="ff338f39">C90-2013</url>
       <bibkey>dalrymple-etal-1990-modeling</bibkey>
@@ -303,7 +303,7 @@
     <paper id="16">
       <title>Integrating Stress and Intonation into a Concept-to-Speech System</title>
       <author><first>Georg</first><last>Dorffner</last></author>
-      <author><first>Ernst</first><last>Buchberger</last></author>
+      <author id="ernst-buchberger"><first>Ernst</first><last>Buchberger</last></author>
       <author><first>Markus</first><last>Kommenda</last></author>
       <url hash="f0112fc8">C90-2016</url>
       <bibkey>dorffner-etal-1990-integrating</bibkey>
@@ -316,14 +316,14 @@
     </paper>
     <paper id="18">
       <title>Feature Logic with Disjunctive Unification</title>
-      <author><first>Jochen</first><last>Dorre</last></author>
-      <author><first>Andreas</first><last>Eisele</last></author>
+      <author id="jochen-dorre"><first>Jochen</first><last>Dorre</last></author>
+      <author id="andreas-eisele"><first>Andreas</first><last>Eisele</last></author>
       <url hash="240d1502">C90-2018</url>
       <bibkey>dorre-eisele-1990-feature</bibkey>
     </paper>
     <paper id="19">
       <title>Generating <fixed-case>F</fixed-case>rench with a Reversible Unification Grammar</title>
-      <author><first>Dominique</first><last>Estival</last></author>
+      <author id="dominique-estival"><first>Dominique</first><last>Estival</last></author>
       <url hash="b62143a7">C90-2019</url>
       <bibkey>estival-1990-generating</bibkey>
     </paper>
@@ -348,7 +348,7 @@
     </paper>
     <paper id="23">
       <title>“Translation Great Problem” - On the Problem of Inserting Articles When Translating From <fixed-case>R</fixed-case>ussian Into <fixed-case>S</fixed-case>wedish</title>
-      <author><first>Barbara</first><last>Gawronska-Werngren</last></author>
+      <author id="barbara-gawronska"><first>Barbara</first><last>Gawronska-Werngren</last></author>
       <url hash="1c8b73b9">C90-2023</url>
       <bibkey>gawronska-werngren-1990-translation</bibkey>
     </paper>
@@ -363,7 +363,7 @@
     <paper id="25">
       <title>Functor-Driven Natural Language Generation with Categorial-Unification Grammars</title>
       <author><first>Dale</first><last>Gerdemann</last></author>
-      <author><first>Erhard W.</first><last>Hinrichs</last></author>
+      <author id="erhard-hinrichs"><first>Erhard W.</first><last>Hinrichs</last></author>
       <url hash="d6f6ae01">C90-2025</url>
       <bibkey>gerdemann-hinrichs-1990-functor</bibkey>
     </paper>
@@ -376,7 +376,7 @@
     </paper>
     <paper id="27">
       <title>A Linguistic Theory of Robustness</title>
-      <author><first>Sebastian</first><last>Goeser</last></author>
+      <author id="sebastian-goeser"><first>Sebastian</first><last>Goeser</last></author>
       <url hash="bf313d72">C90-2027</url>
       <bibkey>goeser-1990-linguistic</bibkey>
     </paper>
@@ -416,27 +416,27 @@
     </paper>
     <paper id="33">
       <title>A Bottom-up Generation for Principle-based Grammars Using Constraint Propagation</title>
-      <author><first>Masato</first><last>Ishizaki</last></author>
+      <author id="masato-ishizaki"><first>Masato</first><last>Ishizaki</last></author>
       <url hash="69b9ea03">C90-2033</url>
       <bibkey>ishizaki-1990-bottom</bibkey>
     </paper>
     <paper id="34">
       <title>To Parse or Not to Parse: Relation-Driven Text Skimming</title>
-      <author><first>Paul S.</first><last>Jacobs</last></author>
+      <author id="paul-s-jacobs"><first>Paul S.</first><last>Jacobs</last></author>
       <url hash="be5f5613">C90-2034</url>
       <bibkey>jacobs-1990-parse</bibkey>
     </paper>
     <paper id="35">
       <title>Representing and Integrating Linguistic Knowledge</title>
-      <author><first>Daniel</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Daniel</first><last>Jurafsky</last></author>
       <url hash="86c75166">C90-2035</url>
       <bibkey>jurafsky-1990-representing</bibkey>
     </paper>
     <paper id="36">
       <title>A Spelling Correction Program Based on a Noisy Channel Model</title>
       <author><first>Mark D.</first><last>Kernighan</last></author>
-      <author><first>Kenneth W.</first><last>Church</last></author>
-      <author><first>William A.</first><last>Gale</last></author>
+      <author id="kenneth-church"><first>Kenneth W.</first><last>Church</last></author>
+      <author id="william-a-gale"><first>William A.</first><last>Gale</last></author>
       <url hash="93698224">C90-2036</url>
       <bibkey>kernighan-etal-1990-spelling</bibkey>
     </paper>
@@ -486,8 +486,8 @@
     </paper>
     <paper id="44">
       <title>Disambiguating Cue Phrases in Text and Speech</title>
-      <author><first>Diane</first><last>Litman</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
       <url hash="0f76cee0">C90-2044</url>
       <bibkey>litman-hirschberg-1990-disambiguating</bibkey>
     </paper>
@@ -499,7 +499,7 @@
     </paper>
     <paper id="46">
       <title>Tenets for an Interlingual Representation Definite <fixed-case>NP</fixed-case>s</title>
-      <author><first>Montserrat</first><last>Meya</last></author>
+      <author id="montserrat-meya"><first>Montserrat</first><last>Meya</last></author>
       <url hash="9ee77d70">C90-2046</url>
       <bibkey>meya-1990-tenets</bibkey>
     </paper>
@@ -512,8 +512,8 @@
     <paper id="48">
       <title>The Generation of High-Level Structure for Extended Explanations</title>
       <author><first>David J.</first><last>Mooney</last></author>
-      <author><first>Sandra</first><last>Carberry</last></author>
-      <author><first>Kathleen F.</first><last>McCoy</last></author>
+      <author id="sandra-carberry"><first>Sandra</first><last>Carberry</last></author>
+      <author id="kathleen-f-mccoy"><first>Kathleen F.</first><last>McCoy</last></author>
       <url hash="84297518">C90-2048</url>
       <bibkey>mooney-etal-1990-generation</bibkey>
     </paper>
@@ -525,7 +525,7 @@
     </paper>
     <paper id="50">
       <title>A Head-Driven Approach to Incremental and Parallel Generation of Syntactic Structures</title>
-      <author><first>Gunter</first><last>Neumann</last></author>
+      <author id="gunter-neumann"><first>Gunter</first><last>Neumann</last></author>
       <author><first>Wolfgang</first><last>Finkler</last></author>
       <url hash="e2cc4c9e">C90-2050</url>
       <bibkey>neumann-finkler-1990-head</bibkey>
@@ -538,7 +538,7 @@
     </paper>
     <paper id="52">
       <title>Reversible Unification Based Machine Translation</title>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <url hash="d544f941">C90-2052</url>
       <bibkey>van-noord-1990-reversible</bibkey>
     </paper>
@@ -581,13 +581,13 @@
     </paper>
     <paper id="59">
       <title>Gapping and Frame Semantics: A fresh look from a cognitive perspective</title>
-      <author><first>Andreas</first><last>Stolcke</last></author>
+      <author id="andreas-stolcke"><first>Andreas</first><last>Stolcke</last></author>
       <url hash="807de5ae">C90-2059</url>
       <bibkey>stolcke-1990-gapping</bibkey>
     </paper>
     <paper id="60">
       <title>How to Invert a Natural Language Parser Into an Efficient Generator: An Algorithm for Logic Grammars</title>
-      <author><first>Tomek</first><last>Strzalkowskl</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowskl</last></author>
       <url hash="f27a7508">C90-2060</url>
       <bibkey>strzalkowskl-1990-invert</bibkey>
     </paper>
@@ -599,7 +599,7 @@
     </paper>
     <paper id="62">
       <title>An Explanation Facility for a Grammar Writing System</title>
-      <author><first>Loong Cheong</first><last>Tong</last></author>
+      <author id="loong-cheong-tong"><first>Loong Cheong</first><last>Tong</last></author>
       <url hash="88375041">C90-2062</url>
       <bibkey>tong-1990-explanation</bibkey>
     </paper>
@@ -611,7 +611,7 @@
     </paper>
     <paper id="64">
       <title>The application of two-level morphology to non-concatenative <fixed-case>G</fixed-case>erman morphology</title>
-      <author><first>Harald</first><last>Trost</last></author>
+      <author id="harald-trost"><first>Harald</first><last>Trost</last></author>
       <url hash="881f9eef">C90-2064</url>
       <bibkey>trost-1990-application</bibkey>
     </paper>
@@ -623,27 +623,27 @@
     </paper>
     <paper id="66">
       <title>Why Human Translators Still Sleep in Peace? (Four Engineering and Linguistic Gaps in Nlp)</title>
-      <author><first>Paola</first><last>Velardi</last></author>
+      <author id="paola-velardi"><first>Paola</first><last>Velardi</last></author>
       <url hash="d316e322">C90-2066</url>
       <bibkey>velardi-1990-human</bibkey>
     </paper>
     <paper id="67">
       <title>Word Sense Disambiguation with Very Large Neural Networks Extracted from Machine Readable Dictionaries</title>
-      <author><first>Jean</first><last>Veronis</last></author>
-      <author><first>Nancy M.</first><last>Ide</last></author>
+      <author id="jean-veronis"><first>Jean</first><last>Veronis</last></author>
+      <author id="nancy-ide"><first>Nancy M.</first><last>Ide</last></author>
       <url hash="d4ecd917">C90-2067</url>
       <bibkey>veronis-ide-1990-word</bibkey>
     </paper>
     <paper id="68">
       <title>Free Adjuncts in Natural Language Instructions</title>
-      <author><first>Bonnie Lynn</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie Lynn</first><last>Webber</last></author>
       <author><first>Barbara</first><last>Di Eugenio</last></author>
       <url hash="47a5cc72">C90-2068</url>
       <bibkey>webber-di-eugenio-1990-free</bibkey>
     </paper>
     <paper id="69">
       <title>Identifying Subjective Characters in Narrative</title>
-      <author><first>Janyce M.</first><last>Wiebe</last></author>
+      <author id="janyce-wiebe"><first>Janyce M.</first><last>Wiebe</last></author>
       <url hash="0c200530">C90-2069</url>
       <bibkey>wiebe-1990-identifying</bibkey>
     </paper>
@@ -668,14 +668,14 @@
     </paper>
     <paper id="73">
       <title>Generation of Synthes Is Programs in Robra (Ariane) From String-Tree Correspondence Grammars (Or a Strategy for Synthesis in Machine Translation)</title>
-      <author><first>Zaharin</first><last>Yusoff</last></author>
+      <author id="zaharin-yusoff"><first>Zaharin</first><last>Yusoff</last></author>
       <url hash="4afafe25">C90-2073</url>
       <bibkey>yusoff-1990-generation</bibkey>
     </paper>
     <paper id="74">
       <title>Morphological Analysis and Synthesis by Automated Discovery and Acquisition of Linguistic Rules</title>
       <author><first>Byoung-Tak</first><last>Zhang</last></author>
-      <author><first>Yung-Taek</first><last>Kim</last></author>
+      <author id="yung-taek-kim"><first>Yung-Taek</first><last>Kim</last></author>
       <url hash="8f6a0a53">C90-2074</url>
       <bibkey>zhang-kim-1990-morphological</bibkey>
     </paper>
@@ -692,9 +692,9 @@
     </frontmatter>
     <paper id="1">
       <title>Using Lexicalized Tags for Machine Translation</title>
-      <author><first>Anne</first><last>Abeille</last></author>
+      <author id="anne-abeille"><first>Anne</first><last>Abeille</last></author>
       <author><first>Yves</first><last>Schabes</last></author>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
       <url hash="a942cf2a">C90-3001</url>
       <bibkey>abeille-etal-1990-using</bibkey>
     </paper>
@@ -707,26 +707,26 @@
     </paper>
     <paper id="3">
       <title>Backwards Phonology</title>
-      <author><first>John</first><last>Bear</last></author>
+      <author id="john-bear"><first>John</first><last>Bear</last></author>
       <url hash="1b23a9b2">C90-3003</url>
       <bibkey>bear-1990-backwards</bibkey>
     </paper>
     <paper id="4">
       <title>Phonological Processing of Speech Variants</title>
-      <author><first>Julle</first><last>Carson-Berndsen</last></author>
+      <author id="julie-carson-berndsen"><first>Julle</first><last>Carson-Berndsen</last></author>
       <url hash="1d6f4015">C90-3004</url>
       <bibkey>carson-berndsen-1990-phonological</bibkey>
     </paper>
     <paper id="5">
       <title>A Karaka Based Approach to Parsing of <fixed-case>I</fixed-case>ndian Languages</title>
-      <author><first>Akshar</first><last>Bharati</last></author>
+      <author id="akshar-bharati"><first>Akshar</first><last>Bharati</last></author>
       <author><first>Rajeev</first><last>Sangal</last></author>
       <url hash="9949cdf9">C90-3005</url>
       <bibkey>bharati-sangal-1990-karaka</bibkey>
     </paper>
     <paper id="6">
       <title>Towards Personal <fixed-case>MT</fixed-case>: general design, dialogue structure, potential role of speech</title>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <url hash="9d433bc1">C90-3006</url>
       <bibkey>boitet-1990-towards-personal</bibkey>
     </paper>
@@ -738,19 +738,19 @@
     </paper>
     <paper id="8">
       <title>Human-Computer Interaction for Semantic Disambiguation</title>
-      <author><first>Ralf D.</first><last>Brown</last></author>
+      <author id="ralf-d-brown"><first>Ralf D.</first><last>Brown</last></author>
       <url hash="ce4b6863">C90-3008</url>
       <bibkey>brown-1990-human</bibkey>
     </paper>
     <paper id="9">
       <title>Syllable-based Morphology</title>
-      <author><first>Lynne J.</first><last>Cahill</last></author>
+      <author id="lynne-cahill"><first>Lynne J.</first><last>Cahill</last></author>
       <url hash="a051431a">C90-3009</url>
       <bibkey>cahill-1990-syllable</bibkey>
     </paper>
     <paper id="10">
       <title>Acquisition of Lexical Information from a Large Textual <fixed-case>I</fixed-case>talian Corpus</title>
-      <author><first>Nicoletta</first><last>Calzolari</last></author>
+      <author id="nicoletta-calzolari"><first>Nicoletta</first><last>Calzolari</last></author>
       <author><first>Remo</first><last>Bindi</last></author>
       <url hash="36e5b5d4">C90-3010</url>
       <bibkey>calzolari-bindi-1990-acquisition</bibkey>
@@ -771,13 +771,13 @@
     </paper>
     <paper id="13">
       <title>Efficient Disjunctive Unification for Bottom-Up Parsing</title>
-      <author><first>David</first><last>Carter</last></author>
+      <author id="david-carter"><first>David</first><last>Carter</last></author>
       <url hash="4154a7ee">C90-3013</url>
       <bibkey>carter-1990-efficient</bibkey>
     </paper>
     <paper id="14">
       <title>A Phonological Knowledge Base System Using Unification-based Formalism - A Case Study of <fixed-case>K</fixed-case>orean Phonology</title>
-      <author><first>Hee-Sung</first><last>Chung</last></author>
+      <author id="hee-sung-chung"><first>Hee-Sung</first><last>Chung</last></author>
       <url hash="c051187b">C90-3014</url>
       <bibkey>chung-1990-phonological</bibkey>
     </paper>
@@ -789,7 +789,7 @@
     </paper>
     <paper id="16">
       <title>Structured Meanings in Computational Linguistics</title>
-      <author><first>Kees</first><last>van Deemter</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
       <url hash="ae09ef0f">C90-3016</url>
       <bibkey>van-deemter-1990-structured</bibkey>
     </paper>
@@ -804,16 +804,16 @@
     <paper id="18">
       <title>Generating Connectives</title>
       <author><first>Michael</first><last>Elhadad</last></author>
-      <author><first>Kathleen R.</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen R.</first><last>McKeown</last></author>
       <url hash="2240fbae">C90-3018</url>
       <bibkey>elhadad-mckeown-1990-generating</bibkey>
     </paper>
     <paper id="19">
       <title>Organizing linguistic knowledge for multilingual generation</title>
-      <author><first>Martin</first><last>Emele</last></author>
+      <author id="martin-c-emele"><first>Martin</first><last>Emele</last></author>
       <author><first>Ulrich</first><last>Held</last></author>
       <author><first>Stefan</first><last>Momma</last></author>
-      <author><first>Remi</first><last>Zajac</last></author>
+      <author id="remi-zajac"><first>Remi</first><last>Zajac</last></author>
       <url hash="b5e86d98">C90-3019</url>
       <bibkey>emele-etal-1990-organizing</bibkey>
     </paper>
@@ -832,14 +832,14 @@
     <paper id="22">
       <title>A Computational Approach to Binding Theory</title>
       <author><first>Alessandra</first><last>Giorgi</last></author>
-      <author><first>Fabio</first><last>Pianesi</last></author>
+      <author id="fabio-pianesi"><first>Fabio</first><last>Pianesi</last></author>
       <author><first>Giorgio</first><last>Satta</last></author>
       <url hash="42cfec8e">C90-3022</url>
       <bibkey>giorgi-etal-1990-computational</bibkey>
     </paper>
     <paper id="23">
       <title>Causal and Temporal Text Analysis: The Role of the Domain Model</title>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <author><first>Tomasz</first><last>Ksiezyk</last></author>
       <url hash="c38b3394">C90-3023</url>
       <bibkey>grishman-ksiezyk-1990-causal</bibkey>
@@ -852,38 +852,38 @@
     </paper>
     <paper id="25">
       <title>Is there content in empty heads?</title>
-      <author><first>Louise</first><last>Guthrie</last></author>
+      <author id="louise-guthrie"><first>Louise</first><last>Guthrie</last></author>
       <author><first>Brian M.</first><last>Slator</last></author>
-      <author><first>Yorick</first><last>Wilks</last></author>
-      <author><first>Rebecca</first><last>Bruce</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
+      <author id="rebecca-bruce"><first>Rebecca</first><last>Bruce</last></author>
       <url hash="446afcea">C90-3025</url>
       <bibkey>guthrie-etal-1990-content</bibkey>
     </paper>
     <paper id="26">
       <title>Hierarchy of Salience and Discourse Analysis and Production</title>
-      <author><first>Eva</first><last>Hajicova</last></author>
+      <author id="eva-hajicova"><first>Eva</first><last>Hajicova</last></author>
       <author><first>Petr</first><last>Kubon</last></author>
-      <author><first>Vladlslav</first><last>Kubon</last></author>
+      <author id="vladislav-kubon"><first>Vladlslav</first><last>Kubon</last></author>
       <url hash="d6817d63">C90-3026</url>
       <bibkey>hajicova-etal-1990-hierarchy</bibkey>
     </paper>
     <paper id="27">
       <title>A Constraint-Based Approach to Linguistic Performance</title>
-      <author><first>Koiti</first><last>Hasida</last></author>
+      <author id="koiti-hasida"><first>Koiti</first><last>Hasida</last></author>
       <url hash="9f03a271">C90-3027</url>
       <bibkey>hasida-1990-constraint</bibkey>
     </paper>
     <paper id="28">
       <title>Translation by Abduction</title>
-      <author><first>Jerry R.</first><last>Hobbs</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry R.</first><last>Hobbs</last></author>
       <author><first>Megumi</first><last>Kameyama</last></author>
       <url hash="3a94954a">C90-3028</url>
       <bibkey>hobbs-kameyama-1990-translation</bibkey>
     </paper>
     <paper id="29">
       <title>Two Principles of Parse Preference</title>
-      <author><first>Jerry R.</first><last>Hobbs</last></author>
-      <author><first>John</first><last>Bear</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry R.</first><last>Hobbs</last></author>
+      <author id="john-bear"><first>John</first><last>Bear</last></author>
       <url hash="825c19ab">C90-3029</url>
       <bibkey>hobbs-bear-1990-two</bibkey>
     </paper>
@@ -895,8 +895,8 @@
     </paper>
     <paper id="31">
       <title>The <fixed-case>BICORD</fixed-case> System Combining Lexical Information from Bilingual Corpora and Machine Readable Dictionaries</title>
-      <author><first>Judith</first><last>Klavans</last></author>
-      <author><first>Evelyne</first><last>Tzoukermann</last></author>
+      <author id="judith-l-klavans"><first>Judith</first><last>Klavans</last></author>
+      <author id="evelyne-tzoukermann"><first>Evelyne</first><last>Tzoukermann</last></author>
       <url hash="b8a05597">C90-3031</url>
       <bibkey>klavans-tzoukermann-1990-bicord</bibkey>
     </paper>
@@ -908,7 +908,7 @@
     </paper>
     <paper id="33">
       <title>When Something Is Missing: Ellipsis, Coordination and the Chart</title>
-      <author><first>Alberto</first><last>Lavelli</last></author>
+      <author id="alberto-lavelli"><first>Alberto</first><last>Lavelli</last></author>
       <author><first>Oliviero</first><last>Stock</last></author>
       <url hash="38d15130">C90-3033</url>
       <bibkey>lavelli-stock-1990-something</bibkey>
@@ -921,7 +921,7 @@
     </paper>
     <paper id="35">
       <title>Expressive Power of Grammatical Formalisms</title>
-      <author><first>Alexis</first><last>Manaster-Ramer</last></author>
+      <author id="alexis-manaster-ramer"><first>Alexis</first><last>Manaster-Ramer</last></author>
       <author><first>Wlodek</first><last>Zadrozny</last></author>
       <url hash="6163f812">C90-3035</url>
       <bibkey>manaster-ramer-zadrozny-1990-expressive</bibkey>
@@ -950,7 +950,7 @@
     <paper id="39">
       <title>Meaning Representation and Text Planning</title>
       <author><first>Christine</first><last>Defrise</last></author>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <url hash="bdecb5aa">C90-3039</url>
       <bibkey>defrise-nirenburg-1990-meaning</bibkey>
     </paper>
@@ -964,7 +964,7 @@
     <paper id="41">
       <title>Predicting Co-Occurrence Restrictions by Using Semantic Classifications in the Lexicon</title>
       <author><first>Elena V.</first><last>Paducheva</last></author>
-      <author><first>Ekaterina V.</first><last>Rakhilina</last></author>
+      <author id="ekaterina-v-rakhilina"><first>Ekaterina V.</first><last>Rakhilina</last></author>
       <url hash="0fe07c35">C90-3041</url>
       <bibkey>paducheva-rakhilina-1990-predicting</bibkey>
     </paper>
@@ -976,20 +976,20 @@
     </paper>
     <paper id="43">
       <title>Automatic translation of support verb constructions</title>
-      <author><first>Morris</first><last>Salkoff</last></author>
+      <author id="morris-salkoff"><first>Morris</first><last>Salkoff</last></author>
       <url hash="1e9e2226">C90-3043</url>
       <bibkey>salkoff-1990-automatic</bibkey>
     </paper>
     <paper id="44">
       <title>Toward Memory-based Translation</title>
-      <author><first>Satoshi</first><last>Sato</last></author>
-      <author><first>Makoto</first><last>Nagao</last></author>
+      <author id="satoshi-sato"><first>Satoshi</first><last>Sato</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
       <url hash="05eee314">C90-3044</url>
       <bibkey>sato-nagao-1990-toward</bibkey>
     </paper>
     <paper id="45">
       <title>Synchronous <fixed-case>T</fixed-case>ree-<fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars</title>
-      <author><first>Stuart M.</first><last>Shieber</last></author>
+      <author id="stuart-m-shieber"><first>Stuart M.</first><last>Shieber</last></author>
       <author><first>Yves</first><last>Schabes</last></author>
       <url hash="c113f42d">C90-3045</url>
       <bibkey>shieber-schabes-1990-synchronous</bibkey>
@@ -1008,16 +1008,16 @@
     </paper>
     <paper id="48">
       <title>Machine Translation without a source text</title>
-      <author><first>Harold L.</first><last>Somers</last></author>
-      <author><first>Jun-ichi</first><last>Tsujii</last></author>
+      <author id="harold-somers"><first>Harold L.</first><last>Somers</last></author>
+      <author id="junichi-tsujii"><first>Jun-ichi</first><last>Tsujii</last></author>
       <author><first>Danny</first><last>Jones</last></author>
       <url hash="1745d9e3">C90-3048</url>
       <bibkey>somers-etal-1990-machine</bibkey>
     </paper>
     <paper id="49">
       <title>A Finite-State Morphological Processor for <fixed-case>S</fixed-case>panish</title>
-      <author><first>Evelyne</first><last>Tzoukermann</last></author>
-      <author><first>Mark Y.</first><last>Liberman</last></author>
+      <author id="evelyne-tzoukermann"><first>Evelyne</first><last>Tzoukermann</last></author>
+      <author id="mark-liberman"><first>Mark Y.</first><last>Liberman</last></author>
       <url hash="d5a1dde1">C90-3049</url>
       <bibkey>tzoukermann-liberman-1990-finite</bibkey>
     </paper>
@@ -1029,14 +1029,14 @@
     </paper>
     <paper id="51">
       <title>Incremental Parsing and Reason Maintenance</title>
-      <author><first>Mats</first><last>Wiren</last></author>
+      <author id="mats-wiren"><first>Mats</first><last>Wiren</last></author>
       <url hash="ab4108c7">C90-3051</url>
       <bibkey>wiren-1990-incremental</bibkey>
     </paper>
     <paper id="52">
       <title>Typed Unification Grammars</title>
-      <author><first>Martin C.</first><last>Emele</last></author>
-      <author><first>Remi</first><last>Zajac</last></author>
+      <author id="martin-c-emele"><first>Martin C.</first><last>Emele</last></author>
+      <author id="remi-zajac"><first>Remi</first><last>Zajac</last></author>
       <url hash="a5ebdc77">C90-3052</url>
       <bibkey>emele-zajac-1990-typed</bibkey>
     </paper>
@@ -1048,7 +1048,7 @@
     </paper>
     <paper id="54">
       <title>The Self-Extending Lexicon: Off-Line and On-Line Defaulting of Lexical Information in the <fixed-case>METAL</fixed-case> Machine Translation System</title>
-      <author><first>Geert</first><last>Adriaens</last></author>
+      <author id="geert-adriaens"><first>Geert</first><last>Adriaens</last></author>
       <author><first>Maarten</first><last>Lemmons</last></author>
       <url hash="784a0c32">C90-3054</url>
       <bibkey>adriaens-lemmons-1990-self</bibkey>
@@ -1059,7 +1059,7 @@
       <author><first>Terumasa</first><last>Ehara</last></author>
       <author><first>Noriyoshi</first><last>Uratani</last></author>
       <author><first>Hideki</first><last>Tanaka</last></author>
-      <author><first>Naoto</first><last>Kato</last></author>
+      <author id="naoto-kato"><first>Naoto</first><last>Kato</last></author>
       <author><first>Sumio</first><last>Nakase</last></author>
       <author><first>Norikazu</first><last>Aruga</last></author>
       <author><first>Takeo</first><last>Matsuda</last></author>
@@ -1069,20 +1069,20 @@
     <paper id="56">
       <title>Syntactic Description of Free Word Order Languages</title>
       <author><first>Tania</first><last>Avgustinova</last></author>
-      <author><first>Karel</first><last>Oliva</last></author>
+      <author id="karel-oliva"><first>Karel</first><last>Oliva</last></author>
       <url hash="31577b7c">C90-3056</url>
       <bibkey>avgustinova-oliva-1990-syntactic</bibkey>
     </paper>
     <paper id="57">
       <title><fixed-case>C</fixed-case>zech-to-<fixed-case>R</fixed-case>ussian Transducing Dictionary</title>
       <author><first>Alla</first><last>Bemova</last></author>
-      <author><first>Vladislav</first><last>Kubon</last></author>
+      <author id="vladislav-kubon"><first>Vladislav</first><last>Kubon</last></author>
       <url hash="87ef6846">C90-3057</url>
       <bibkey>bemova-kubon-1990-czech</bibkey>
     </paper>
     <paper id="58">
       <title>A Large <fixed-case>R</fixed-case>ussian Morphological Vocabulary for Ibm Compatibles and Methods of Its Compression</title>
-      <author><first>Igor A.</first><last>Bolshakov</last></author>
+      <author id="igor-a-bolshakov"><first>Igor A.</first><last>Bolshakov</last></author>
       <url hash="b901aa40">C90-3058</url>
       <bibkey>bolshakov-1990-large</bibkey>
     </paper>
@@ -1129,7 +1129,7 @@
     </paper>
     <paper id="64">
       <title>A message processing system with object-centered semantics</title>
-      <author><first>Jean-Francois</first><last>Delannoy</last></author>
+      <author id="jean-francois-delannoy"><first>Jean-Francois</first><last>Delannoy</last></author>
       <url hash="ac0e1f35">C90-3064</url>
       <bibkey>delannoy-1990-message</bibkey>
     </paper>
@@ -1159,7 +1159,7 @@
     </paper>
     <paper id="69">
       <title>An Integrated System for Morphological Analysis of the <fixed-case>S</fixed-case>lovene Language</title>
-      <author><first>Tomaz</first><last>Erjavec</last></author>
+      <author id="tomaz-erjavec"><first>Tomaz</first><last>Erjavec</last></author>
       <author><first>Peter</first><last>Tancig</last></author>
       <url hash="4d6e28e0">C90-3069</url>
       <bibkey>erjavec-tancig-1990-integrated</bibkey>
@@ -1172,14 +1172,14 @@
     </paper>
     <paper id="71">
       <title>Information Extraction and Semantic Constraints</title>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <author><first>John</first><last>Sterling</last></author>
       <url hash="ff4d2436">C90-3071</url>
       <bibkey>grishman-sterling-1990-information</bibkey>
     </paper>
     <paper id="72">
       <title>Spelling-checking for Highly Inflective Languages</title>
-      <author><first>Jan</first><last>Hajic</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajic</last></author>
       <author><first>Janus</first><last>Drozd</last></author>
       <url hash="0793c0a7">C90-3072</url>
       <bibkey>hajic-drozd-1990-spelling</bibkey>
@@ -1216,8 +1216,8 @@
     </paper>
     <paper id="77">
       <title>The <fixed-case>GE</fixed-case> <fixed-case>NLT</fixed-case>oolset: A Software Foundation for Intelligent Text Processing</title>
-      <author><first>Paul S.</first><last>Jacobs</last></author>
-      <author><first>Lisa F.</first><last>Rau</last></author>
+      <author id="paul-s-jacobs"><first>Paul S.</first><last>Jacobs</last></author>
+      <author id="lisa-rau"><first>Lisa F.</first><last>Rau</last></author>
       <url hash="61763744">C90-3077</url>
       <bibkey>jacobs-rau-1990-ge</bibkey>
     </paper>
@@ -1229,7 +1229,7 @@
     </paper>
     <paper id="79">
       <title>Intelligent Handling of Weather Forecasts</title>
-      <author><first>Stephan</first><last>Kerpedjiev</last></author>
+      <author id="stephan-m-kerpedjiev"><first>Stephan</first><last>Kerpedjiev</last></author>
       <author><first>Veska</first><last>Noncheva</last></author>
       <url hash="c1d363e2">C90-3079</url>
       <bibkey>kerpedjiev-noncheva-1990-intelligent</bibkey>
@@ -1262,19 +1262,19 @@
     </paper>
     <paper id="84">
       <title>A <fixed-case>PARLOG</fixed-case> Implementation of Government-Binding Theory</title>
-      <author><first>Robert J.</first><last>Kuhns</last></author>
+      <author id="robert-j-kuhns"><first>Robert J.</first><last>Kuhns</last></author>
       <url hash="33a45431">C90-3084</url>
       <bibkey>kuhns-1990-parlog</bibkey>
     </paper>
     <paper id="85">
       <title>Automatic Indexing and Government-Binding Theory</title>
-      <author><first>Robert J.</first><last>Kuhns</last></author>
+      <author id="robert-j-kuhns"><first>Robert J.</first><last>Kuhns</last></author>
       <url hash="c1fec63a">C90-3085</url>
       <bibkey>kuhns-1990-automatic</bibkey>
     </paper>
     <paper id="86">
       <title>“The first million is hardest to get”: Building a Large Tagged Corpus as Automatically as Possible</title>
-      <author><first>Gunnel</first><last>Kallgren</last></author>
+      <author id="gunnel-kallgren"><first>Gunnel</first><last>Kallgren</last></author>
       <url hash="83fafcd0">C90-3086</url>
       <bibkey>kallgren-1990-first</bibkey>
     </paper>
@@ -1295,10 +1295,10 @@
     </paper>
     <paper id="89">
       <title>Applying Natural Language Processing Techniques to Augmentative Communication Systems</title>
-      <author><first>Kathleen</first><last>McCoy</last></author>
+      <author id="kathleen-f-mccoy"><first>Kathleen</first><last>McCoy</last></author>
       <author><first>Patrick</first><last>Demasco</last></author>
-      <author><first>Mark</first><last>Jones</last></author>
-      <author><first>Christopher</first><last>Pennington</last></author>
+      <author id="mark-jones"><first>Mark</first><last>Jones</last></author>
+      <author id="christopher-pennington"><first>Christopher</first><last>Pennington</last></author>
       <author><first>Charles</first><last>Rowe</last></author>
       <url hash="3a9c857b">C90-3089</url>
       <bibkey>mccoy-etal-1990-applying</bibkey>
@@ -1344,7 +1344,7 @@
     </paper>
     <paper id="96">
       <title>Simple Parser for an Hpsg-Style Grammar Implemented in <fixed-case>P</fixed-case>rolog</title>
-      <author><first>Karel</first><last>Oliva</last></author>
+      <author id="karel-oliva"><first>Karel</first><last>Oliva</last></author>
       <url hash="a5e60e7a">C90-3096</url>
       <bibkey>oliva-1990-simple</bibkey>
     </paper>
@@ -1376,7 +1376,7 @@
     </paper>
     <paper id="101">
       <title>Pilot Implementation of a Bilingual Knowledge Bank</title>
-      <author><first>Victor</first><last>Sadler</last></author>
+      <author id="victor-sadler"><first>Victor</first><last>Sadler</last></author>
       <author><first>Ronald</first><last>Vendelmans</last></author>
       <url hash="15f15683">C90-3101</url>
       <bibkey>sadler-vendelmans-1990-pilot</bibkey>
@@ -1384,16 +1384,16 @@
     <paper id="102">
       <title>A Mechanism for ellipsis resolution in dialogued systems</title>
       <author id="arantza-diaz-de-ilarraza"><first>A.</first><last>Diaz de Ilarraza Sanchez</last></author>
-      <author><first>H.</first><last>Rodriguez Hontoria</last></author>
+      <author id="h-rodriguez-hontoria"><first>H.</first><last>Rodriguez Hontoria</last></author>
       <author><first>F.</first><last>Maillo Verdejo</last></author>
       <url hash="70f0fc31">C90-3102</url>
       <bibkey>diaz-de-ilarraza-sanchez-etal-1990-mechanism</bibkey>
     </paper>
     <paper id="103">
       <title><fixed-case>MORPHO</fixed-case>-<fixed-case>ASSISTANT</fixed-case>: The Proper Treatment of Morphological Knowledge</title>
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <author><first>Galia</first><last>Angelova</last></author>
-      <author><first>Elena</first><last>Paskaleva</last></author>
+      <author id="elena-paskaleva"><first>Elena</first><last>Paskaleva</last></author>
       <url hash="f4957ec3">C90-3103</url>
       <bibkey>simov-etal-1990-morpho</bibkey>
     </paper>
diff --git a/data/xml/C92.xml b/data/xml/C92.xml
index 8f412da235..6d933358ee 100644
--- a/data/xml/C92.xml
+++ b/data/xml/C92.xml
@@ -29,13 +29,13 @@
     </paper>
     <paper id="4">
       <title>The scientific programme of <fixed-case>COLING</fixed-case>-92</title>
-      <author><first>Antonio</first><last>Zampolli</last></author>
+      <author id="antonio-zampolli"><first>Antonio</first><last>Zampolli</last></author>
       <url hash="2065c3f6">C92-1004</url>
       <bibkey>zampolli-1992-scientific</bibkey>
     </paper>
     <paper id="5">
       <title>About these proceedings</title>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <url hash="b0056929">C92-1005</url>
       <bibkey>boitet-1992-proceedings</bibkey>
     </paper>
@@ -57,7 +57,7 @@
     </paper>
     <paper id="9">
       <title>Feature Structure Based Semantic Head Driven Generation</title>
-      <author><first>Gen-ichiro</first><last>Kikui</last></author>
+      <author id="gen-ichiro-kikui"><first>Gen-ichiro</first><last>Kikui</last></author>
       <url hash="cb6ca43c">C92-1009</url>
       <bibkey>kikui-1992-feature</bibkey>
     </paper>
@@ -87,13 +87,13 @@
     </paper>
     <paper id="13">
       <title>Synchronous <fixed-case>TAG</fixed-case>s and <fixed-case>F</fixed-case>rench Pronominal Clitics</title>
-      <author><first>Anne</first><last>Abeille</last></author>
+      <author id="anne-abeille"><first>Anne</first><last>Abeille</last></author>
       <url hash="9a00e37d">C92-1013</url>
       <bibkey>abeille-1992-synchronous</bibkey>
     </paper>
     <paper id="14">
       <title>A High-level Morphological Description Language Exploiting Inflectional Paradigms</title>
-      <author><first>Peter</first><last>Anick</last></author>
+      <author id="peter-anick"><first>Peter</first><last>Anick</last></author>
       <author><first>Suzanne</first><last>Artemieff</last></author>
       <url hash="2ac40a59">C92-1014</url>
       <bibkey>anick-artemieff-1992-high</bibkey>
@@ -112,7 +112,7 @@
     </paper>
     <paper id="17">
       <title>Trace &amp; Unification Grammar</title>
-      <author><first>Hans Ulrich</first><last>Block</last></author>
+      <author id="hans-ulrich-block"><first>Hans Ulrich</first><last>Block</last></author>
       <author><first>Stefanie</first><last>Schachtl</last></author>
       <url hash="82e61f86">C92-1017</url>
       <bibkey>block-schachtl-1992-trace</bibkey>
@@ -125,7 +125,7 @@
     </paper>
     <paper id="19">
       <title>Word Identification for <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese Sentences</title>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <author><first>Shing-Huan</first><last>Liu</last></author>
       <url hash="9d506fe3">C92-1019</url>
       <bibkey>chen-liu-1992-word</bibkey>
@@ -144,14 +144,14 @@
     </paper>
     <paper id="22">
       <title>Chart Parsing of Robust Grammars</title>
-      <author><first>Sebastian</first><last>Goeser</last></author>
+      <author id="sebastian-goeser"><first>Sebastian</first><last>Goeser</last></author>
       <url hash="b3c8c5e8">C92-1022</url>
       <bibkey>goeser-1992-chart</bibkey>
     </paper>
     <paper id="23">
       <title>Stock of Shared Knowledge - A Tool for Solving Pronominal Anaphora</title>
-      <author><first>Eva</first><last>Hajicova</last></author>
-      <author><first>Vladislav</first><last>Kubon</last></author>
+      <author id="eva-hajicova"><first>Eva</first><last>Hajicova</last></author>
+      <author id="vladislav-kubon"><first>Vladislav</first><last>Kubon</last></author>
       <author><first>Petr</first><last>Kubon</last></author>
       <url hash="70c3f807">C92-1023</url>
       <bibkey>hajicova-etal-1992-stock</bibkey>
@@ -165,7 +165,7 @@
     <paper id="25">
       <title>Two-Level Morphology with Composition</title>
       <author><first>Lauri</first><last>Karttunen</last></author>
-      <author><first>Ronald M.</first><last>Kaplan</last></author>
+      <author id="ronald-m-kaplan"><first>Ronald M.</first><last>Kaplan</last></author>
       <author><first>Annie</first><last>Zaenen</last></author>
       <url hash="09d15943">C92-1025</url>
       <bibkey>karttunen-etal-1992-two</bibkey>
@@ -193,7 +193,7 @@
     <paper id="29">
       <title>Dynamic Programming Method for Analyzing Conjunctive Structures in <fixed-case>J</fixed-case>apanese</title>
       <author><first>Sadao</first><last>Kurohashi</last></author>
-      <author><first>Makoto</first><last>Nagao</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
       <url hash="0dd6f626">C92-1029</url>
       <bibkey>kurohashi-nagao-1992-dynamic</bibkey>
     </paper>
@@ -205,7 +205,7 @@
     </paper>
     <paper id="31">
       <title>The Proper Treatment of Word Order in Hpsg</title>
-      <author><first>Karel</first><last>Oliva</last></author>
+      <author id="karel-oliva"><first>Karel</first><last>Oliva</last></author>
       <url hash="4438c7ac">C92-1031</url>
       <bibkey>oliva-1992-proper</bibkey>
     </paper>
@@ -217,13 +217,13 @@
     </paper>
     <paper id="33">
       <title><fixed-case>TTP</fixed-case>: A Fast and Robust Parser for Natural Language</title>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
       <url hash="9d0a9d58">C92-1033</url>
       <bibkey>strzalkowski-1992-ttp</bibkey>
     </paper>
     <paper id="34">
       <title>Structure Sharing in <fixed-case>L</fixed-case>exicalized <fixed-case>T</fixed-case>ree-<fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars</title>
-      <author><first>K.</first><last>Vijay-Shanker</last></author>
+      <author id="k-vijay-shanker"><first>K.</first><last>Vijay-Shanker</last></author>
       <author><first>Yves</first><last>Schabes</last></author>
       <url hash="aee81bc2">C92-1034</url>
       <bibkey>vijay-shanker-schabes-1992-structure</bibkey>
@@ -262,7 +262,7 @@
     </paper>
     <paper id="40">
       <title>Conceptual Structures and <fixed-case>CCG</fixed-case>: Linking Theory and Incorporated Argument Adjuncts</title>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <url hash="5e4bd64c">C92-1040</url>
       <bibkey>white-1992-conceptual</bibkey>
     </paper>
@@ -287,14 +287,14 @@
     <paper id="44">
       <title>An Acquisition Model for both Choosing and Resolving Anaphora in Conjoined <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese Sentences</title>
       <author><first>Benjamin L.</first><last>Chen</last></author>
-      <author><first>Von-Wun</first><last>Soo</last></author>
+      <author id="von-wun-soo"><first>Von-Wun</first><last>Soo</last></author>
       <url hash="77e0f2f2">C92-1044</url>
       <bibkey>chen-soo-1992-acquisition</bibkey>
     </paper>
     <paper id="45">
       <title>Aspect-Switching and Subordination: the Role of It-Clefts in Discourse</title>
       <author><first>Judy</first><last>Delin</last></author>
-      <author><first>Jon</first><last>Oberlander</last></author>
+      <author id="jon-oberlander"><first>Jon</first><last>Oberlander</last></author>
       <url hash="d5a553dc">C92-1045</url>
       <bibkey>delin-oberlander-1992-aspect</bibkey>
     </paper>
@@ -320,7 +320,7 @@
     <paper id="49">
       <title>Using Linguistic, World, and Contextual Knowledge in a Plan Recognition Model of Dialogue</title>
       <author><first>Lynn</first><last>Lambert</last></author>
-      <author><first>Sandra</first><last>Carberry</last></author>
+      <author id="sandra-carberry"><first>Sandra</first><last>Carberry</last></author>
       <url hash="00dd6e08">C92-1049</url>
       <bibkey>lambert-carberry-1992-using</bibkey>
     </paper>
@@ -338,8 +338,8 @@
     </paper>
     <paper id="52">
       <title>Temporal Structure of Discourse</title>
-      <author><first>Irene Pimenta</first><last>Rodrigues</last></author>
-      <author><first>Jose Gabriel P.</first><last>Lopes</last></author>
+      <author id="irene-rodrigues"><first>Irene Pimenta</first><last>Rodrigues</last></author>
+      <author id="gabriel-lopes"><first>Jose Gabriel P.</first><last>Lopes</last></author>
       <url hash="e174feb4">C92-1052</url>
       <bibkey>rodrigues-lopes-1992-temporal</bibkey>
     </paper>
@@ -351,13 +351,13 @@
     </paper>
     <paper id="54">
       <title>Redundancy in Collaborative Dialogue</title>
-      <author><first>Marilyn A.</first><last>Walker</last></author>
+      <author id="marilyn-walker"><first>Marilyn A.</first><last>Walker</last></author>
       <url hash="cb597cd0">C92-1054</url>
       <bibkey>walker-1992-redundancy</bibkey>
     </paper>
     <paper id="55">
       <title>Syntactic Ambiguity Resolution Using A Discrimination and Robustness Oriented Adaptive Learning Algorithm</title>
-      <author><first>Tung-Hui</first><last>Chiang</last></author>
+      <author id="tung-hui-chiang"><first>Tung-Hui</first><last>Chiang</last></author>
       <author><first>Yi-Chung</first><last>Lin</last></author>
       <author><first>Keh-Yih</first><last>Su</last></author>
       <url hash="a4a5417f">C92-1055</url>
@@ -365,9 +365,9 @@
     </paper>
     <paper id="56">
       <title>Lexical Disambiguation using Simulated Annealing</title>
-      <author><first>Jim</first><last>Cowie</last></author>
+      <author id="jim-cowie"><first>Jim</first><last>Cowie</last></author>
       <author><first>Joe</first><last>Guthrie</last></author>
-      <author><first>Louise</first><last>Guthrie</last></author>
+      <author id="louise-guthrie"><first>Louise</first><last>Guthrie</last></author>
       <url hash="ab2dcd96">C92-1056</url>
       <bibkey>cowie-etal-1992-lexical-disambiguation</bibkey>
     </paper>
@@ -407,7 +407,7 @@
     <paper id="62">
       <title>A Chart-based Method of <fixed-case>ID</fixed-case>/<fixed-case>LP</fixed-case> Parsing with Generalized Discrimination Networks</title>
       <author><first>Surapant</first><last>Meknavin</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <author><first>Hozumi</first><last>Tanaka</last></author>
       <url hash="cc947831">C92-1062</url>
       <bibkey>meknavin-etal-1992-chart</bibkey>
@@ -464,7 +464,7 @@
     <paper id="69">
       <title>A Linear Least Squares Fit Mapping Method for Information Retrieval From Natural Language Texts</title>
       <author><first>Yiming</first><last>Yang</last></author>
-      <author><first>Christopher G.</first><last>Chute</last></author>
+      <author id="christopher-chute"><first>Christopher G.</first><last>Chute</last></author>
       <url hash="66b2676b">C92-2069</url>
       <bibkey>yang-chute-1992-linear</bibkey>
     </paper>
@@ -479,7 +479,7 @@
       <author><first>Alain</first><last>Berrendonner</last></author>
       <author><first>Mounia</first><last>Fredj</last></author>
       <author><first>Flavio</first><last>Oquendo</last></author>
-      <author><first>Jacques</first><last>Rouault</last></author>
+      <author id="jacques-rouault"><first>Jacques</first><last>Rouault</last></author>
       <url hash="c6720243">C92-2071</url>
       <bibkey>berrendonner-etal-1992-un</bibkey>
     </paper>
@@ -506,7 +506,7 @@
     </paper>
     <paper id="75">
       <title><fixed-case>T</fixed-case>alisman: Un Systeme Multi-Agents Gouverne Par Des Lois Linguistiques Pour Le Traitement De La Langue Naturelle</title>
-      <author><first>Marie-Helene</first><last>Stefanini</last></author>
+      <author id="marie-helene-stefanini"><first>Marie-Helene</first><last>Stefanini</last></author>
       <author><first>Alain</first><last>Berrendonner</last></author>
       <author><first>Genevieve</first><last>Lallich</last></author>
       <author><first>Flavio</first><last>Oquendo</last></author>
@@ -515,7 +515,7 @@
     </paper>
     <paper id="76">
       <title>Disjunctive Feature Structures as Hypergraphs</title>
-      <author><first>Jean</first><last>Veronis</last></author>
+      <author id="jean-veronis"><first>Jean</first><last>Veronis</last></author>
       <url hash="c0a651db">C92-2076</url>
       <bibkey>veronis-1992-disjunctive</bibkey>
     </paper>
@@ -525,7 +525,7 @@
       <author><first>L.</first><last>Kogan</last></author>
       <author><first>W.</first><last>Kwitakowski</last></author>
       <author><first>R.</first><last>Minvaleev</last></author>
-      <author><first>R.</first><last>Piotrowski</last></author>
+      <author id="r-piotrowski"><first>R.</first><last>Piotrowski</last></author>
       <author><first>V.</first><last>Shumovsky</last></author>
       <author><first>E.</first><last>Tioun</last></author>
       <author><first>Yu.</first><last>Tovmach</last></author>
@@ -549,28 +549,28 @@
     </paper>
     <paper id="80">
       <title>Translation Ambiguity Resolution Based on Text Corpora of Source and Target Languages</title>
-      <author><first>Shinichi</first><last>Doi</last></author>
+      <author id="shinichi-doi"><first>Shinichi</first><last>Doi</last></author>
       <author><first>Kazunori</first><last>Muraki</last></author>
       <url hash="41c147bf">C92-2080</url>
       <bibkey>doi-muraki-1992-translation</bibkey>
     </paper>
     <paper id="81">
       <title>The Automatic Creation of Lexical Entries for a Multilingual <fixed-case>MT</fixed-case> System</title>
-      <author><first>David</first><last>Farwell</last></author>
-      <author><first>Louise</first><last>Guthrie</last></author>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="david-farwell"><first>David</first><last>Farwell</last></author>
+      <author id="louise-guthrie"><first>Louise</first><last>Guthrie</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <url hash="05c9de9e">C92-2081</url>
       <bibkey>farwell-etal-1992-automatic</bibkey>
     </paper>
     <paper id="82">
       <title>Automatic Acquisition of Hyponyms from Large Text Corpora</title>
-      <author><first>Marti A.</first><last>Hearst</last></author>
+      <author id="marti-a-hearst"><first>Marti A.</first><last>Hearst</last></author>
       <url hash="1d98249a">C92-2082</url>
       <bibkey>hearst-1992-automatic</bibkey>
     </paper>
     <paper id="83">
       <title>Structural Patterns vs. String Patterns for Extracting Semantic Information from Dictionaries</title>
-      <author><first>Simonetta</first><last>Montemagni</last></author>
+      <author id="simonetta-montemagni"><first>Simonetta</first><last>Montemagni</last></author>
       <author><first>Lucy</first><last>Vanderwende</last></author>
       <url hash="aecf418c">C92-2083</url>
       <bibkey>montemagni-vanderwende-1992-structural</bibkey>
@@ -578,8 +578,8 @@
     <paper id="84">
       <title>Derivation of Underlying Valency Frames From a Learner’s Dictionary</title>
       <author><first>Alexandr</first><last>Rosen</last></author>
-      <author><first>Eva</first><last>Hajicova</last></author>
-      <author><first>Jan</first><last>Hajic</last></author>
+      <author id="eva-hajicova"><first>Eva</first><last>Hajicova</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajic</last></author>
       <url hash="779552cf">C92-2084</url>
       <bibkey>rosen-etal-1992-derivation</bibkey>
     </paper>
@@ -587,8 +587,8 @@
       <title>Linguistic Knowledge Generator</title>
       <author><first>Satoshi</first><last>Sekine</last></author>
       <author><first>Sofia</first><last>Ananiadou</last></author>
-      <author><first>Jeremy J.</first><last>Carroll</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="jeremy-j-carroll"><first>Jeremy J.</first><last>Carroll</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <url hash="d6c2ac03">C92-2085</url>
       <bibkey>sekine-etal-1992-linguistic</bibkey>
     </paper>
@@ -605,7 +605,7 @@
       <title>Logical Form of Hierarchical Relation on Verbs and Extracting it from Definition Sentences in a <fixed-case>J</fixed-case>apanese Dictionary</title>
       <author><first>Yoichi</first><last>Tomiura</last></author>
       <author><first>Teigo</first><last>Nakamura</last></author>
-      <author><first>Toru</first><last>Hitaka</last></author>
+      <author id="toru-hitaka"><first>Toru</first><last>Hitaka</last></author>
       <author><first>Sho</first><last>Yoshida</last></author>
       <url hash="bce2d41d">C92-2087</url>
       <bibkey>tomiura-etal-1992-logical</bibkey>
@@ -613,21 +613,21 @@
     <paper id="88">
       <title>Lexical Knowledge Acquisition from Bilingual Corpora</title>
       <author><first>Takehito</first><last>Utsuro</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
-      <author><first>Makoto</first><last>Nagao</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
       <url hash="6a73f0af">C92-2088</url>
       <bibkey>utsuro-etal-1992-lexical</bibkey>
     </paper>
     <paper id="89">
       <title>A Feature-Based Model for Lexical Databases</title>
-      <author><first>Jean</first><last>Veronis</last></author>
-      <author><first>Nancy</first><last>Ide</last></author>
+      <author id="jean-veronis"><first>Jean</first><last>Veronis</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
       <url hash="6a3c07b0">C92-2089</url>
       <bibkey>veronis-ide-1992-feature</bibkey>
     </paper>
     <paper id="90">
       <title>From Cogram to Alcogram: Toward a Controlled <fixed-case>E</fixed-case>nglish Grammar Checker</title>
-      <author><first>Geert</first><last>Adriaens</last></author>
+      <author id="geert-adriaens"><first>Geert</first><last>Adriaens</last></author>
       <author><first>Dirk</first><last>Schreurs</last></author>
       <url hash="26f369b2">C92-2090</url>
       <bibkey>adriaens-schreurs-1992-cogram</bibkey>
@@ -652,14 +652,14 @@
     </paper>
     <paper id="94">
       <title>Parameterization of the Interlingua in Machine Translation</title>
-      <author><first>Bonnie</first><last>Dorr</last></author>
+      <author id="bonnie-dorr"><first>Bonnie</first><last>Dorr</last></author>
       <url hash="2d7acc2a">C92-2094</url>
       <bibkey>dorr-1992-parameterization</bibkey>
     </paper>
     <paper id="95">
       <title>Isolating Cross-linguistic Parsing Complexity with a Principles-and-Parameters Parser: A Case Study of <fixed-case>J</fixed-case>apanese and <fixed-case>E</fixed-case>nglish</title>
       <author><first>Sandiway</first><last>Fong</last></author>
-      <author><first>Robert C.</first><last>Berwick</last></author>
+      <author id="robert-c-berwick"><first>Robert C.</first><last>Berwick</last></author>
       <url hash="45255df0">C92-2095</url>
       <bibkey>fong-berwick-1992-isolating</bibkey>
     </paper>
@@ -678,20 +678,20 @@
     </paper>
     <paper id="98">
       <title>Aspect - A Problem for <fixed-case>MT</fixed-case></title>
-      <author><first>Barbara</first><last>Gawronska</last></author>
+      <author id="barbara-gawronska"><first>Barbara</first><last>Gawronska</last></author>
       <url hash="ec816e93">C92-2098</url>
       <bibkey>gawronska-1992-aspect</bibkey>
     </paper>
     <paper id="99">
       <title>Acquisition of Selectional Patterns</title>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <author><first>John</first><last>Sterling</last></author>
       <url hash="e0ea9458">C92-2099</url>
       <bibkey>grishman-sterling-1992-acquisition</bibkey>
     </paper>
     <paper id="100">
       <title>A Three-level Revision Model for Improving <fixed-case>J</fixed-case>apanese Bad-styled Expressions</title>
-      <author><first>Yoshihiko</first><last>Hayashi</last></author>
+      <author id="yoshihiko-hayashi"><first>Yoshihiko</first><last>Hayashi</last></author>
       <url hash="0071a61b">C92-2100</url>
       <bibkey>hayashi-1992-three</bibkey>
     </paper>
@@ -707,7 +707,7 @@
       <title>Interaction between Structural Changes in Machine Translation</title>
       <author><first>Satoshi</first><last>Kinoshita</last></author>
       <author id="john-phillips"><first>John</first><last>Phillips</last></author>
-      <author><first>Jun-ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun-ichi</first><last>Tsujii</last></author>
       <url hash="18569b2c">C92-2102</url>
       <bibkey>kinoshita-etal-1992-interaction-structural</bibkey>
     </paper>
@@ -727,8 +727,8 @@
     </paper>
     <paper id="105">
       <title>Self-Monitoring with Reversible Grammars</title>
-      <author><first>Gunter</first><last>Neumann</last></author>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gunter-neumann"><first>Gunter</first><last>Neumann</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <url hash="cddf377a">C92-2105</url>
       <bibkey>neumann-van-noord-1992-self</bibkey>
     </paper>
@@ -748,7 +748,7 @@
     </paper>
     <paper id="108">
       <title>Preventing False Temporal Implicatures: Interactive Defaults for Text Generation</title>
-      <author><first>Jon</first><last>Oberlander</last></author>
+      <author id="jon-oberlander"><first>Jon</first><last>Oberlander</last></author>
       <author><first>Alex</first><last>Lascarides</last></author>
       <url hash="a81206f4">C92-2108</url>
       <bibkey>oberlander-lascarides-1992-preventing</bibkey>
@@ -771,7 +771,7 @@
     <paper id="111">
       <title>Explanatory Text Planning in Logic Based Systems</title>
       <author><first>Clarisse Sieckenius</first><last>de Souza</last></author>
-      <author><first>Maria das Gracas</first><last>Volpe</last></author>
+      <author id="maria-das-gracas-volpe-nunes"><first>Maria das Gracas</first><last>Volpe</last></author>
       <url hash="25370221">C92-2111</url>
       <bibkey>de-souza-volpe-1992-explanatory</bibkey>
     </paper>
@@ -792,8 +792,8 @@
     </paper>
     <paper id="114">
       <title>Lexical choice in context: generating procedural texts</title>
-      <author><first>Agnes</first><last>Tutin</last></author>
-      <author><first>Richard</first><last>Kittredge</last></author>
+      <author id="agnes-tutin"><first>Agnes</first><last>Tutin</last></author>
+      <author id="richard-kittredge"><first>Richard</first><last>Kittredge</last></author>
       <url hash="7d3699d0">C92-2114</url>
       <bibkey>tutin-kittredge-1992-lexical</bibkey>
     </paper>
@@ -838,13 +838,13 @@
     <paper id="121">
       <title>Semantic Network Array Processor as a Massively Parallel Computing Platform for High Performance and Large-Scale Natural Language Processing</title>
       <author><first>Hiroaki</first><last>Kitano</last></author>
-      <author><first>Dan</first><last>Moldovan</last></author>
+      <author id="dan-moldovan"><first>Dan</first><last>Moldovan</last></author>
       <url hash="a98fae81">C92-2121</url>
       <bibkey>kitano-moldovan-1992-semantic</bibkey>
     </paper>
     <paper id="122">
       <title>A Case Study of Natural Language Customisation: The Practical Effects of World Knowledge</title>
-      <author><first>Marilyn A.</first><last>Walker</last></author>
+      <author id="marilyn-walker"><first>Marilyn A.</first><last>Walker</last></author>
       <author><first>Andrew L.</first><last>Nelson</last></author>
       <author><first>Phil</first><last>Stenton</last></author>
       <url hash="dafec7a0">C92-2122</url>
@@ -852,7 +852,7 @@
     </paper>
     <paper id="123">
       <title>Towards Computer-Aided Linguistic Engineering</title>
-      <author><first>Remi</first><last>Zajac</last></author>
+      <author id="remi-zajac"><first>Remi</first><last>Zajac</last></author>
       <url hash="13e92d16">C92-2123</url>
       <bibkey>zajac-1992-towards</bibkey>
     </paper>
@@ -898,7 +898,7 @@
     </paper>
     <paper id="129">
       <title>The Ips System</title>
-      <author><first>Eric</first><last>Wehrli</last></author>
+      <author id="eric-wehrli"><first>Eric</first><last>Wehrli</last></author>
       <url hash="a25bc1b3">C92-3129</url>
       <bibkey>wehrli-1992-ips</bibkey>
     </paper>
@@ -911,21 +911,21 @@
     </paper>
     <paper id="131">
       <title>Causal ambiguity in Natural Language: conceptual representation of ‘parce que/because’ and ‘puisque/since’</title>
-      <author><first>Adeline</first><last>Nazarenko-Perrin</last></author>
+      <author id="adeline-nazarenko"><first>Adeline</first><last>Nazarenko-Perrin</last></author>
       <url hash="83fc42b9">C92-3131</url>
       <bibkey>nazarenko-perrin-1992-causal</bibkey>
     </paper>
     <paper id="132">
       <title>Surface and Deep Cases</title>
-      <author><first>Jarmila</first><last>Panevová</last></author>
-      <author><first>Hana</first><last>Skoumalova</last></author>
+      <author id="jarmila-panevova"><first>Jarmila</first><last>Panevová</last></author>
+      <author id="hana-skoumalova"><first>Hana</first><last>Skoumalova</last></author>
       <url hash="8cb6e5ae">C92-3132</url>
       <bibkey>panevova-skoumalova-1992-surface</bibkey>
     </paper>
     <paper id="133">
       <title>An Integrated Syntactic and Semantic System for Natural Language Understanding</title>
-      <author><first>Frederique</first><last>Segond</last></author>
-      <author><first>Karen</first><last>Jensen</last></author>
+      <author id="frederique-segond"><first>Frederique</first><last>Segond</last></author>
+      <author id="karen-jensen"><first>Karen</first><last>Jensen</last></author>
       <url hash="12072667">C92-3133</url>
       <bibkey>segond-jensen-1992-integrated</bibkey>
     </paper>
@@ -950,25 +950,25 @@
     </paper>
     <paper id="137">
       <title>Attitude Emergence - An Effective Interpretation Scheme for Persuasive Discourse</title>
-      <author><first>Horng-Jyh P.</first><last>Wu</last></author>
-      <author><first>Steven L.</first><last>Lytinen</last></author>
+      <author id="horng-jyh-paul-wu"><first>Horng-Jyh P.</first><last>Wu</last></author>
+      <author id="steven-l-lytinen"><first>Steven L.</first><last>Lytinen</last></author>
       <url hash="f8603a28">C92-3137</url>
       <bibkey>wu-lytinen-1992-attitude</bibkey>
     </paper>
     <paper id="138">
       <title>The Nondirectional Representation of Systemic Functional Grammars and Semantics as Typed Feature Structures</title>
-      <author><first>John A.</first><last>Bateman</last></author>
-      <author><first>Martin</first><last>Emele</last></author>
+      <author id="john-bateman"><first>John A.</first><last>Bateman</last></author>
+      <author id="martin-c-emele"><first>Martin</first><last>Emele</last></author>
       <author><first>Stefan</first><last>Momma</last></author>
       <url hash="931b69b9">C92-3138</url>
       <bibkey>bateman-etal-1992-nondirectional</bibkey>
     </paper>
     <paper id="139">
       <title>A Statistical Approach to Machine Aided Translation of Terminology <fixed-case>B</fixed-case>anks</title>
-      <author><first>Jyun-Sheng</first><last>Chang</last></author>
+      <author id="jyun-sheng-chang"><first>Jyun-Sheng</first><last>Chang</last></author>
       <author><first>Andrew</first><last>Chang</last></author>
       <author><first>Tsuey-Fen</first><last>Lin</last></author>
-      <author><first>Sur-Jin</first><last>Ker</last></author>
+      <author id="sue-j-ker"><first>Sur-Jin</first><last>Ker</last></author>
       <url hash="e11ae395">C92-3139</url>
       <bibkey>chang-etal-1992-statistical</bibkey>
     </paper>
@@ -998,7 +998,7 @@
     <paper id="143">
       <title>Coupling an Automatic Dictation System With a Grammar Checker</title>
       <author><first>Jean-Pierre</first><last>Chanod</last></author>
-      <author><first>Marc</first><last>El-Beze</last></author>
+      <author id="marc-el-beze"><first>Marc</first><last>El-Beze</last></author>
       <author><first>Sylvie</first><last>Guillemin-Lanne</last></author>
       <url hash="0227a8a4">C92-3143</url>
       <bibkey>chanod-etal-1992-coupling</bibkey>
@@ -1037,7 +1037,7 @@
     <paper id="148">
       <title>Multilinguisation d’un editeur de documents structures. Application a un dictionnaire trilingue</title>
       <author><first>Huy Khanh</first><last>Phan</last></author>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <url hash="a886bee2">C92-3148</url>
       <bibkey>phan-boitet-1992-multilinguisation</bibkey>
     </paper>
@@ -1083,7 +1083,7 @@
     <paper id="155">
       <title><fixed-case>JDII</fixed-case>: Parsing <fixed-case>I</fixed-case>talian with a Robust Constraint Grammar</title>
       <author><first>Andrea</first><last>Bolioli</last></author>
-      <author><first>Luca</first><last>Dini</last></author>
+      <author id="luca-dini"><first>Luca</first><last>Dini</last></author>
       <author><first>Giovanni</first><last>Malnati</last></author>
       <url hash="775fd8c1">C92-3155</url>
       <bibkey>bolioli-etal-1992-jdii</bibkey>
@@ -1092,7 +1092,7 @@
       <title>Parsing and Case Analysis in <fixed-case>TANKA</fixed-case></title>
       <author><first>Terry</first><last>Copeck</last></author>
       <author><first>Sylvain</first><last>Delisle</last></author>
-      <author><first>Stan</first><last>Szpakowicz</last></author>
+      <author id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></author>
       <url hash="c82f3228">C92-3156</url>
       <bibkey>copeck-etal-1992-parsing</bibkey>
     </paper>
@@ -1115,7 +1115,7 @@
     </paper>
     <paper id="159">
       <title>Generation of Informative Texts with Style</title>
-      <author><first>Stephan M.</first><last>Kerpedjiev</last></author>
+      <author id="stephan-m-kerpedjiev"><first>Stephan M.</first><last>Kerpedjiev</last></author>
       <url hash="ecfa9bfc">C92-3159</url>
       <bibkey>kerpedjiev-1992-generation</bibkey>
     </paper>
@@ -1138,27 +1138,27 @@
     </paper>
     <paper id="162">
       <title>A Knowledge-based Machine-aided System for <fixed-case>C</fixed-case>hinese Text Abstraction</title>
-      <author><first>Benjamin K.</first><last>Tsou</last></author>
-      <author><first>Hing-cheung</first><last>Ho</last></author>
-      <author><first>Tom Bong-yeung</first><last>Lai</last></author>
-      <author><first>Caesar Suen</first><last>Lun</last></author>
-      <author><first>Hing-lung</first><last>Lin</last></author>
+      <author id="benjamin-k-tsou"><first>Benjamin K.</first><last>Tsou</last></author>
+      <author id="hing-cheung-ho"><first>Hing-cheung</first><last>Ho</last></author>
+      <author id="tom-bong-yeung-lai"><first>Tom Bong-yeung</first><last>Lai</last></author>
+      <author id="suen-caesar-lun"><first>Caesar Suen</first><last>Lun</last></author>
+      <author id="hing-lung-lin"><first>Hing-lung</first><last>Lin</last></author>
       <url hash="78c604da">C92-3162</url>
       <bibkey>tsou-etal-1992-knowledge</bibkey>
     </paper>
     <paper id="163">
       <title>Interaction Between Lexicon and Image: Linguistic Specifications of Animation</title>
       <author><first>Maryvonne</first><last>Abraham</last></author>
-      <author><first>Jean-Pierre</first><last>Desclés</last></author>
+      <author id="jean-pierre-descles"><first>Jean-Pierre</first><last>Desclés</last></author>
       <url hash="57b175de">C92-3163</url>
       <bibkey>abraham-descles-1992-interaction</bibkey>
     </paper>
     <paper id="164">
       <title>A Spoken Language Translation System: <fixed-case>SL-TRANS</fixed-case>2</title>
-      <author><first>Tsuyoshi</first><last>Morimoto</last></author>
+      <author id="tsuyoshi-morimoto"><first>Tsuyoshi</first><last>Morimoto</last></author>
       <author><first>Masami</first><last>Suzuki</last></author>
       <author><first>Toshiyuki</first><last>Takezawa</last></author>
-      <author><first>Gen’ichiro</first><last>Kikui</last></author>
+      <author id="gen-ichiro-kikui"><first>Gen’ichiro</first><last>Kikui</last></author>
       <author><first>Masaaki</first><last>Nagata</last></author>
       <author><first>Mutsuko</first><last>Tomokiyo</last></author>
       <url hash="cd9c200d">C92-3164</url>
@@ -1186,7 +1186,7 @@
     </paper>
     <paper id="168">
       <title>The <fixed-case>KANT</fixed-case> System: Fast, Accurate, High-Quality Translation in Practical Domains</title>
-      <author><first>Eric H.</first><last>Nyberg III</last></author>
+      <author id="eric-nyberg"><first>Eric H.</first><last>Nyberg III</last></author>
       <author><first>Teruko</first><last>Mitamura</last></author>
       <url hash="6694efe1">C92-3168</url>
       <bibkey>nyberg-iii-mitamura-1992-kant</bibkey>
@@ -1209,7 +1209,7 @@
     </frontmatter>
     <paper id="170">
       <title>The Assignment of Grammatical Relations in Natural Language Processing</title>
-      <author><first>Leonardo</first><last>Lesmo</last></author>
+      <author id="leonardo-lesmo"><first>Leonardo</first><last>Lesmo</last></author>
       <author><first>Vincenzo</first><last>Lombardo</last></author>
       <url hash="ff0ca814">C92-4170</url>
       <bibkey>lesmo-lombardo-1992-assignment</bibkey>
@@ -1228,8 +1228,8 @@
     </paper>
     <paper id="173">
       <title>Tokenization as the Initial Phase in <fixed-case>NLP</fixed-case></title>
-      <author><first>Jonathan J.</first><last>Webster</last></author>
-      <author><first>Chunyu</first><last>Kit</last></author>
+      <author id="jonathan-j-webster"><first>Jonathan J.</first><last>Webster</last></author>
+      <author id="chunyu-kit"><first>Chunyu</first><last>Kit</last></author>
       <url hash="22792f2d">C92-4173</url>
       <bibkey>webster-kit-1992-tokenization</bibkey>
     </paper>
@@ -1242,7 +1242,7 @@
     </paper>
     <paper id="175">
       <title>Embedding <fixed-case>DRT</fixed-case> in a Situation Theoretic Framework</title>
-      <author><first>Alan W.</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W.</first><last>Black</last></author>
       <url hash="38e632f8">C92-4175</url>
       <bibkey>black-1992-embedding</bibkey>
     </paper>
@@ -1254,8 +1254,8 @@
     </paper>
     <paper id="177">
       <title>Degrees of Stativity: The Lexical Representation of Verb Aspect</title>
-      <author><first>Judith L.</first><last>Klavans</last></author>
-      <author><first>Martin</first><last>Chodorow</last></author>
+      <author id="judith-l-klavans"><first>Judith L.</first><last>Klavans</last></author>
+      <author id="martin-chodorow"><first>Martin</first><last>Chodorow</last></author>
       <url hash="7f56bb1e">C92-4177</url>
       <bibkey>klavans-chodorow-1992-degrees</bibkey>
     </paper>
@@ -1267,20 +1267,20 @@
     </paper>
     <paper id="179">
       <title>An Alternative to Deep Case for Representing Relational Information</title>
-      <author><first>Nigel</first><last>Ward</last></author>
+      <author id="nigel-ward"><first>Nigel</first><last>Ward</last></author>
       <url hash="4d02e385">C92-4179</url>
       <bibkey>ward-1992-alternative</bibkey>
     </paper>
     <paper id="180">
       <title>Preferred Argument Structure for Discourse Understanding</title>
-      <author><first>Ka-Wai</first><last>Chui</last></author>
+      <author id="ka-wai-chui"><first>Ka-Wai</first><last>Chui</last></author>
       <url hash="e7d3427c">C92-4180</url>
       <bibkey>chui-1992-preferred</bibkey>
     </paper>
     <paper id="181">
       <title>On the Interpretation of Natural Language Instructions</title>
       <author><first>Barbara</first><last>Di Eugenio</last></author>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <url hash="d6a4baba">C92-4181</url>
       <bibkey>di-eugenio-white-1992-interpretation</bibkey>
     </paper>
@@ -1306,14 +1306,14 @@
     </paper>
     <paper id="185">
       <title>Unifying Disjunctive Feature Structures</title>
-      <author><first>Lena</first><last>Stromback</last></author>
+      <author id="lena-stromback"><first>Lena</first><last>Stromback</last></author>
       <url hash="e658830a">C92-4185</url>
       <bibkey>stromback-1992-unifying</bibkey>
     </paper>
     <paper id="186">
       <title>Ebl²: An Approach to Automatic Lexical Acquisition</title>
       <author><first>Lars</first><last>Asker</last></author>
-      <author><first>Bjorn</first><last>Gamback</last></author>
+      <author id="bjorn-gamback"><first>Bjorn</first><last>Gamback</last></author>
       <author><first>Christer</first><last>Samuelsson</last></author>
       <url hash="32666972">C92-4186</url>
       <bibkey>asker-etal-1992-ebl2</bibkey>
@@ -1332,22 +1332,22 @@
     </paper>
     <paper id="188">
       <title>Converting Large On-Line Valency Dictionaries for <fixed-case>NLP</fixed-case> Applications: From Proton Descriptions to Metal Frames</title>
-      <author><first>Geert</first><last>Adriaens</last></author>
+      <author id="geert-adriaens"><first>Geert</first><last>Adriaens</last></author>
       <author><first>Gert</first><last>De Braekeleer</last></author>
       <url hash="d2fef18c">C92-4188</url>
       <bibkey>adriaens-de-braekeleer-1992-converting</bibkey>
     </paper>
     <paper id="189">
       <title>Genus Disambiguation: A Study in Weighted Preference</title>
-      <author><first>Rebecca</first><last>Bruce</last></author>
-      <author><first>Louise</first><last>Guthrie</last></author>
+      <author id="rebecca-bruce"><first>Rebecca</first><last>Bruce</last></author>
+      <author id="louise-guthrie"><first>Louise</first><last>Guthrie</last></author>
       <url hash="c3071075">C92-4189</url>
       <bibkey>bruce-guthrie-1992-genus</bibkey>
     </paper>
     <paper id="190">
       <title>Can Computers Handle Adverbs?</title>
       <author><first>Sumali Pin-Ngern</first><last>Conlon</last></author>
-      <author><first>Martha</first><last>Evens</last></author>
+      <author id="martha-evens"><first>Martha</first><last>Evens</last></author>
       <url hash="9da425f3">C92-4190</url>
       <bibkey>conlon-evens-1992-computers</bibkey>
     </paper>
@@ -1379,7 +1379,7 @@
     <paper id="194">
       <title>A <fixed-case>C</fixed-case>hinese Corpus for Linguistic Research</title>
       <author><first>Chu-Ren</first><last>Huang</last></author>
-      <author><first>Keh-jiann</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-jiann</first><last>Chen</last></author>
       <url hash="d18496eb">C92-4194</url>
       <bibkey>huang-chen-1992-chinese</bibkey>
     </paper>
@@ -1407,7 +1407,7 @@
     </paper>
     <paper id="198">
       <title>A Solution for the Problem of Interactive Disambiguation</title>
-      <author><first>Herve</first><last>Blanchon</last></author>
+      <author id="herve-blanchon"><first>Herve</first><last>Blanchon</last></author>
       <url hash="80e16d18">C92-4198</url>
       <bibkey>blanchon-1992-solution</bibkey>
     </paper>
@@ -1421,7 +1421,7 @@
     </paper>
     <paper id="200">
       <title>Knowledge Extraction From Texts by Sintesi</title>
-      <author><first>Fabio</first><last>Ciravegna</last></author>
+      <author id="fabio-ciravegna"><first>Fabio</first><last>Ciravegna</last></author>
       <author><first>Paolo</first><last>Campia</last></author>
       <author><first>Alberto</first><last>Colognese</last></author>
       <url hash="ec51be0e">C92-4200</url>
@@ -1438,33 +1438,33 @@
     <paper id="202">
       <title>Hierarchical Lexical Structure and Interpretive Mapping in Machine Translation</title>
       <author><first>Teruko</first><last>Mitamura</last></author>
-      <author><first>Eric H.</first><last>Nyberg III</last></author>
+      <author id="eric-nyberg"><first>Eric H.</first><last>Nyberg III</last></author>
       <url hash="aefb388a">C92-4202</url>
       <bibkey>mitamura-nyberg-iii-1992-hierarchical</bibkey>
     </paper>
     <paper id="203">
       <title><fixed-case>CTM</fixed-case>: An Example-Based Translation Aid System</title>
-      <author><first>Satoshi</first><last>Sato</last></author>
+      <author id="satoshi-sato"><first>Satoshi</first><last>Sato</last></author>
       <url hash="d55fec2a">C92-4203</url>
       <bibkey>sato-1992-ctm</bibkey>
     </paper>
     <paper id="204">
       <title>Applying and Improving the Restriction Grammar Approach for <fixed-case>D</fixed-case>utch Patient Discharge Summaries</title>
-      <author><first>Peter</first><last>Spyns</last></author>
-      <author><first>Geert</first><last>Adriaens</last></author>
+      <author id="peter-spyns"><first>Peter</first><last>Spyns</last></author>
+      <author id="geert-adriaens"><first>Geert</first><last>Adriaens</last></author>
       <url hash="f6ae467a">C92-4204</url>
       <bibkey>spyns-adriaens-1992-applying</bibkey>
     </paper>
     <paper id="205">
       <title>Event Relations at the Phonetics/Phonology Interface</title>
-      <author><first>Julie</first><last>Carson-Berndsen</last></author>
+      <author id="julie-carson-berndsen"><first>Julie</first><last>Carson-Berndsen</last></author>
       <author><first>Dafydd</first><last>Gibbon</last></author>
       <url hash="6713385d">C92-4205</url>
       <bibkey>carson-berndsen-gibbon-1992-event</bibkey>
     </paper>
     <paper id="206">
       <title>Multimodal Database Query</title>
-      <author><first>Nicholas J.</first><last>Haddock</last></author>
+      <author id="nicholas-j-haddock"><first>Nicholas J.</first><last>Haddock</last></author>
       <url hash="8cf84f66">C92-4206</url>
       <bibkey>haddock-1992-multimodal</bibkey>
     </paper>
@@ -1473,7 +1473,7 @@
       <author><first>Atsushi</first><last>Yamada</last></author>
       <author><first>Tadashi</first><last>Yamamoto</last></author>
       <author><first>Hisashi</first><last>Ikeda</last></author>
-      <author><first>Toyoaki</first><last>Nishida</last></author>
+      <author id="toyoaki-nishida"><first>Toyoaki</first><last>Nishida</last></author>
       <author><first>Shuji</first><last>Doshita</last></author>
       <url hash="35dd57aa">C92-4207</url>
       <bibkey>yamada-etal-1992-reconstructing</bibkey>
@@ -1496,7 +1496,7 @@
     <paper id="210">
       <title>Semantic dictionary viewed as a lexical database</title>
       <author><first>Elena V.</first><last>Paducheva</last></author>
-      <author><first>Ekaterina V.</first><last>Rakhilina</last></author>
+      <author id="ekaterina-v-rakhilina"><first>Ekaterina V.</first><last>Rakhilina</last></author>
       <author><first>Marina V.</first><last>Filipenko</last></author>
       <url hash="5983a47c">C92-4210</url>
       <bibkey>paducheva-etal-1992-semantic</bibkey>
@@ -1504,7 +1504,7 @@
     <paper id="211">
       <title>Knowledge Acquisition and <fixed-case>C</fixed-case>hinese Parsing Based on Corpus</title>
       <author><last>Yuan</last><first>Chunfa</first></author>
-      <author><last>Huang</last><first>Changning</first></author>
+      <author id="changning-huang"><first>Changning</first><last>Huang</last></author>
       <author><last>Pan</last><first>Shimei</first></author>
       <url hash="bf8b7ebd">C92-4211</url>
       <bibkey>yuan-etal-1992-knowledge</bibkey>
@@ -1524,7 +1524,7 @@
     <paper id="214">
       <title>Marking and Tagging a Computerized Corpus</title>
       <author><last>Eriksson</last><first>Gunnar</first></author>
-      <author><last>Kallgren</last><first>Gunnel</first></author>
+      <author id="gunnel-kallgren"><first>Gunnel</first><last>Kallgren</last></author>
       <url hash="a057c1b6">C92-4214</url>
       <bibkey>eriksson-kallgren-1992-marking</bibkey>
     </paper>
diff --git a/data/xml/C94.xml b/data/xml/C94.xml
index aa4712f396..3e9917cb61 100644
--- a/data/xml/C94.xml
+++ b/data/xml/C94.xml
@@ -16,7 +16,7 @@
       <title>Improvement in Customizability Using Translation Templates</title>
       <author><first>Satoshi</first><last>Kinoshita</last></author>
       <author><first>Akira</first><last>Kumano</last></author>
-      <author><first>Hideki</first><last>Hirakawa</last></author>
+      <author id="hideki-hirakawa"><first>Hideki</first><last>Hirakawa</last></author>
       <url hash="0bf819a4">C94-1001</url>
       <bibkey>kinoshita-etal-1994-improvement</bibkey>
     </paper>
@@ -36,7 +36,7 @@
     </paper>
     <paper id="4">
       <title>Interpreting Compounds for Machine Translation</title>
-      <author><first>Barbara</first><last>Gawronska</last></author>
+      <author id="barbara-gawronska"><first>Barbara</first><last>Gawronska</last></author>
       <author><first>Anders</first><last>Nordner</last></author>
       <author><first>Christer</first><last>Johansson</last></author>
       <author><first>Caroline</first><last>Willners</last></author>
@@ -47,7 +47,7 @@
       <title>Towards Machine Translation Using Contextual Information</title>
       <author><first>Tim</first><last>Cornish</last></author>
       <author><first>Kimikazu</first><last>Fujita</last></author>
-      <author><first>Ryochi</first><last>Sugimura</last></author>
+      <author id="ryochi-sugimura"><first>Ryochi</first><last>Sugimura</last></author>
       <url hash="331364e2">C94-1005</url>
       <bibkey>cornish-etal-1994-towards</bibkey>
     </paper>
@@ -80,7 +80,7 @@
     <paper id="9">
       <title>Building an <fixed-case>MT</fixed-case> Dictionary From Parallel Texts Based on Linguistic and Statistical Information</title>
       <author><first>Akira</first><last>Kumano</last></author>
-      <author><first>Hideki</first><last>Hirakawa</last></author>
+      <author id="hideki-hirakawa"><first>Hideki</first><last>Hirakawa</last></author>
       <url hash="3537d1c9">C94-1009</url>
       <bibkey>kumano-hirakawa-1994-building</bibkey>
     </paper>
@@ -102,27 +102,27 @@
     </paper>
     <paper id="12">
       <title>Coping With Ambiguity in a Large-Scale Machine Translation System</title>
-      <author><first>Kathryn L.</first><last>Baker</last></author>
-      <author><first>Alexander M.</first><last>Franz</last></author>
-      <author><first>Pamela W.</first><last>Jordan</last></author>
+      <author id="kathryn-baker"><first>Kathryn L.</first><last>Baker</last></author>
+      <author id="alexander-franz"><first>Alexander M.</first><last>Franz</last></author>
+      <author id="pamela-jordan"><first>Pamela W.</first><last>Jordan</last></author>
       <author><first>Teruko</first><last>Mitamura</last></author>
-      <author><first>Eric H.</first><last>Nyberg</last></author>
+      <author id="eric-nyberg"><first>Eric H.</first><last>Nyberg</last></author>
       <url hash="f8dcf781">C94-1012</url>
       <bibkey>baker-etal-1994-coping</bibkey>
     </paper>
     <paper id="13">
       <title>Evaluation Metrics for Knowledge-Based Machine Translation</title>
-      <author><first>Eric H.</first><last>Nyberg, 3rd</last></author>
+      <author id="eric-nyberg"><first>Eric H.</first><last>Nyberg, 3rd</last></author>
       <author><first>Teruko</first><last>Mitamura</last></author>
-      <author><first>Jaime G.</first><last>Carbonell</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime G.</first><last>Carbonell</last></author>
       <url hash="4a160436">C94-1013</url>
       <bibkey>nyberg-3rd-etal-1994-evaluation</bibkey>
     </paper>
     <paper id="14">
       <title>A Matching Technique in Example-Based Machine Translation</title>
-      <author><first>Lambros</first><last>Cranias</last></author>
-      <author><first>Harris</first><last>Papageorgiou</last></author>
-      <author><first>Stelios</first><last>Piperdis</last></author>
+      <author id="lambros-cranias"><first>Lambros</first><last>Cranias</last></author>
+      <author id="harris-papageorgiou"><first>Harris</first><last>Papageorgiou</last></author>
+      <author id="stelios-piperidis"><first>Stelios</first><last>Piperdis</last></author>
       <url hash="3c8c8279">C94-1014</url>
       <bibkey>cranias-etal-1994-matching</bibkey>
     </paper>
@@ -136,40 +136,40 @@
     <paper id="16">
       <title>The <fixed-case>J</fixed-case>a<fixed-case>RAP</fixed-case> Experimental System of <fixed-case>J</fixed-case>apanese-<fixed-case>R</fixed-case>ussian Automatic Translation</title>
       <author><first>Larisa S.</first><last>Modina</last></author>
-      <author><first>Zoya M.</first><last>Shalyapina</last></author>
+      <author id="zoya-m-shalyapina"><first>Zoya M.</first><last>Shalyapina</last></author>
       <url hash="0fb20da2">C94-1016</url>
       <bibkey>modina-shalyapina-1994-jarap</bibkey>
     </paper>
     <paper id="17">
       <title>Perspectives of <fixed-case>DBMT</fixed-case> for monolingual authors on the basis of <fixed-case>LIDIA</fixed-case>-1, an implemented mock-up</title>
-      <author><first>Herve</first><last>Blanchon</last></author>
+      <author id="herve-blanchon"><first>Herve</first><last>Blanchon</last></author>
       <url hash="d8eed1dd">C94-1017</url>
       <bibkey>blanchon-1994-perspectives</bibkey>
     </paper>
     <paper id="18">
       <title>Modals as a Problem for <fixed-case>MT</fixed-case></title>
       <author><first>Bengt</first><last>Sigurd</last></author>
-      <author><first>Barbara</first><last>Gawronska</last></author>
+      <author id="barbara-gawronska"><first>Barbara</first><last>Gawronska</last></author>
       <url hash="cf479038">C94-1018</url>
       <bibkey>sigurd-gawronska-1994-modals</bibkey>
     </paper>
     <paper id="19">
       <title>Two Types of Adaptive <fixed-case>MT</fixed-case> Environments</title>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
-      <author><first>Robert</first><last>Frederking</last></author>
-      <author><first>David</first><last>Farwell</last></author>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="robert-frederking"><first>Robert</first><last>Frederking</last></author>
+      <author id="david-farwell"><first>David</first><last>Farwell</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <url hash="85fa73aa">C94-1019</url>
       <bibkey>nirenburg-etal-1994-two</bibkey>
     </paper>
     <paper id="20">
       <title>An <fixed-case>E</fixed-case>nglish-to-<fixed-case>K</fixed-case>orean Machine Translator: <fixed-case>MATES/EK</fixed-case></title>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <author><first>Seungmi</first><last>Lee</last></author>
       <author><first>Hiongun</first><last>Kim</last></author>
-      <author><first>Deok-bong</first><last>Kim</last></author>
-      <author><first>Cheoljung</first><last>Kweon</last></author>
-      <author><first>Gilchang</first><last>Kim</last></author>
+      <author id="deok-bong-kim"><first>Deok-bong</first><last>Kim</last></author>
+      <author id="cheol-jung-kweon"><first>Cheoljung</first><last>Kweon</last></author>
+      <author id="gil-chang-kim"><first>Gilchang</first><last>Kim</last></author>
       <url hash="9209d9ad">C94-1020</url>
       <bibkey>choi-etal-1994-english</bibkey>
     </paper>
@@ -181,7 +181,7 @@
     </paper>
     <paper id="22">
       <title>Morphology with a Null-Interface</title>
-      <author><first>Harald</first><last>Trost</last></author>
+      <author id="harald-trost"><first>Harald</first><last>Trost</last></author>
       <author><first>Johannes</first><last>Matiasek</last></author>
       <url hash="8b21f114">C94-1022</url>
       <bibkey>trost-matiasek-1994-morphology</bibkey>
@@ -189,27 +189,27 @@
     <paper id="23">
       <title><fixed-case>AUTOMATIC</fixed-case> <fixed-case>MODEL</fixed-case> <fixed-case>REFINEMENT</fixed-case> - with an application to tagging</title>
       <author><first>Yi-Chung</first><last>Lin</last></author>
-      <author><first>Tung-Hui</first><last>Chiang</last></author>
+      <author id="tung-hui-chiang"><first>Tung-Hui</first><last>Chiang</last></author>
       <author><first>Keh-Yih</first><last>Su</last></author>
       <url hash="f2a3f683">C94-1023</url>
       <bibkey>lin-etal-1994-automatic</bibkey>
     </paper>
     <paper id="24">
       <title>Disambiguation of Super Parts of Speech (or Supertags): Almost Parsing</title>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
-      <author><first>B.</first><last>Srinivas</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="srinivas-bangalore"><first>B.</first><last>Srinivas</last></author>
       <url hash="76f09008">C94-1024</url>
       <bibkey>joshi-srinivas-1994-disambiguation</bibkey>
     </paper>
     <paper id="25">
       <title>Probabilistic Tagging With Feature Structures</title>
-      <author><first>Andre</first><last>Kempe</last></author>
+      <author id="andre-kempe"><first>Andre</first><last>Kempe</last></author>
       <url hash="9bb10ca3">C94-1025</url>
       <bibkey>kempe-1994-probabilistic</bibkey>
     </paper>
     <paper id="26">
       <title>A Part-of-Speech-Based Alignment Algorithm</title>
-      <author><first>Kuang-hua</first><last>Chen</last></author>
+      <author id="kuang-hua-chen"><first>Kuang-hua</first><last>Chen</last></author>
       <author><first>Hsin-Hsi</first><last>Chen</last></author>
       <url hash="d492a290">C94-1026</url>
       <bibkey>chen-chen-1994-part</bibkey>
@@ -223,13 +223,13 @@
     <paper id="28">
       <title>The Rumors System of <fixed-case>R</fixed-case>ussian Synthesis</title>
       <author><first>Max I.</first><last>Kanovich</last></author>
-      <author><first>Zoyn M.</first><last>Shalyapina</last></author>
+      <author id="zoya-m-shalyapina"><first>Zoyn M.</first><last>Shalyapina</last></author>
       <url hash="908d04a3">C94-1028</url>
       <bibkey>kanovich-shalyapina-1994-rumors</bibkey>
     </paper>
     <paper id="29">
       <title>MULTI-TAPE TWO-LEVEL MORPHOLOGY: A Case Study in <fixed-case>S</fixed-case>emitic Non-linear Morphology</title>
-      <author><first>George Anton</first><last>Kiraz</last></author>
+      <author id="george-anton-kiraz"><first>George Anton</first><last>Kiraz</last></author>
       <url hash="7cb3c1c7">C94-1029</url>
       <bibkey>kiraz-1994-multi</bibkey>
     </paper>
@@ -270,14 +270,14 @@
     <paper id="35">
       <title>Syllable-Based Model for the <fixed-case>K</fixed-case>orean Morphology</title>
       <author><first>Seung-Shik</first><last>Kang</last></author>
-      <author><first>Yung Taek</first><last>Kim</last></author>
+      <author id="yung-taek-kim"><first>Yung Taek</first><last>Kim</last></author>
       <url hash="0a73eda1">C94-1035</url>
       <bibkey>kang-kim-1994-syllable</bibkey>
     </paper>
     <paper id="36">
       <title>Segmenting a Sentence Into Morphemes Using Statistic Information Between Words</title>
       <author><first>Shilm</first><last>Nobesawa</last></author>
-      <author><first>Junya</first><last>Tsutsumi</last></author>
+      <author id="junya-tsutsumi"><first>Junya</first><last>Tsutsumi</last></author>
       <author><first>Tomoaki</first><last>Nitta</last></author>
       <author><first>Kotaro</first><last>Ono</last></author>
       <author><first>Sun Da</first><last>Jiang</last></author>
@@ -295,14 +295,14 @@
     <paper id="38">
       <title>AN ARCHITECTURE FOR A UNIVERSAL LEXICON: A Case Study on Shared Syntactic Information in <fixed-case>J</fixed-case>apanese, <fixed-case>H</fixed-case>indi, <fixed-case>B</fixed-case>engali, <fixed-case>G</fixed-case>reek, and <fixed-case>E</fixed-case>nglish</title>
       <author><first>Naoyuki</first><last>Nomura</last></author>
-      <author><first>Douglas A.</first><last>Jones</last></author>
-      <author><first>Robert C.</first><last>Berwick</last></author>
+      <author id="douglas-jones"><first>Douglas A.</first><last>Jones</last></author>
+      <author id="robert-c-berwick"><first>Robert C.</first><last>Berwick</last></author>
       <url hash="5b726544">C94-1038</url>
       <bibkey>nomura-etal-1994-architecture</bibkey>
     </paper>
     <paper id="39">
       <title>Adjuncts and the Processing of Lexical Rules</title>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <author><first>Gosse</first><last>Bouma</last></author>
       <url hash="57089335">C94-1039</url>
       <bibkey>van-noord-bouma-1994-adjuncts</bibkey>
@@ -328,9 +328,9 @@
     </paper>
     <paper id="42">
       <title>Comlex Syntax: Building a Computational Lexicon</title>
-      <author><first>Ralph</first><last>Grishman</last></author>
-      <author><first>Catherine</first><last>Macleod</last></author>
-      <author><first>Adam</first><last>Meyers</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
+      <author id="catherine-macleod"><first>Catherine</first><last>Macleod</last></author>
+      <author id="adam-meyers"><first>Adam</first><last>Meyers</last></author>
       <url hash="7eda937b">C94-1042</url>
       <bibkey>grishman-etal-1994-comlex</bibkey>
     </paper>
@@ -342,7 +342,7 @@
     </paper>
     <paper id="44">
       <title>lnterlinguai Lexical Organisation for Multilingual Lexical Databases in <fixed-case>NADIA</fixed-case></title>
-      <author><first>Gilles</first><last>Serasset</last></author>
+      <author id="gilles-serasset"><first>Gilles</first><last>Serasset</last></author>
       <url hash="20c7907b">C94-1044</url>
       <bibkey>serasset-1994-lnterlinguai</bibkey>
     </paper>
@@ -362,13 +362,13 @@
     </paper>
     <paper id="47">
       <title>Logic Compression of Dictionaries for Multilingual Spelling Checkers</title>
-      <author><first>Boubaker</first><last>Meddeb Hamrouni</last></author>
+      <author id="boubaker-meddeb-hamrouni"><first>Boubaker</first><last>Meddeb Hamrouni</last></author>
       <url hash="7971d356">C94-1047</url>
       <bibkey>meddeb-hamrouni-1994-logic</bibkey>
     </paper>
     <paper id="48">
       <title>Construction of a Bilingual Dictionary Intermediated by a Third Language</title>
-      <author><first>Kumiko</first><last>Tanaka</last></author>
+      <author id="kumiko-tanaka-ishii"><first>Kumiko</first><last>Tanaka</last></author>
       <author><first>Kyoji</first><last>Umemura</last></author>
       <url hash="31d5f0bf">C94-1048</url>
       <bibkey>tanaka-umemura-1994-construction</bibkey>
@@ -396,9 +396,9 @@
     <paper id="52">
       <title><fixed-case>TGE</fixed-case>: Tlinks Generation Environment</title>
       <author><first>Alicia</first><last>Ageno</last></author>
-      <author><first>Francesc</first><last>Ribas</last></author>
-      <author><first>German</first><last>Rigau</last></author>
-      <author><first>Horacio</first><last>Rodriguez</last></author>
+      <author id="francesc-ribas"><first>Francesc</first><last>Ribas</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
+      <author id="horacio-rodriguez"><first>Horacio</first><last>Rodriguez</last></author>
       <author><first>Anna</first><last>Samiotou</last></author>
       <url hash="b33b81a9">C94-1052</url>
       <bibkey>ageno-etal-1994-tge</bibkey>
@@ -418,7 +418,7 @@
     </paper>
     <paper id="55">
       <title>Generating Multilingual Documents from a Knowledge Base The <fixed-case>TECHDOC</fixed-case> Project</title>
-      <author><first>Dietmar</first><last>Rosner</last></author>
+      <author id="dietmar-rosner"><first>Dietmar</first><last>Rosner</last></author>
       <author><first>Manfred</first><last>Stede</last></author>
       <url hash="fd4244fd">C94-1055</url>
       <bibkey>rosner-stede-1994-generating</bibkey>
@@ -433,15 +433,15 @@
     </paper>
     <paper id="57">
       <title>The Correct Place of Lexical Semantics in Interlingual <fixed-case>MT</fixed-case></title>
-      <author><first>Lori</first><last>Levin</last></author>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <url hash="0d59b4d4">C94-1057</url>
       <bibkey>levin-nirenburg-1994-correct</bibkey>
     </paper>
     <paper id="58">
       <title>Default Handling in Incremental Generation</title>
       <author><first>Karin</first><last>Harbusch</last></author>
-      <author><first>Gen-ichiro</first><last>Kikui</last></author>
+      <author id="gen-ichiro-kikui"><first>Gen-ichiro</first><last>Kikui</last></author>
       <author><first>Anne</first><last>Kilger</last></author>
       <url hash="602c2dec">C94-1058</url>
       <bibkey>harbusch-etal-1994-default</bibkey>
@@ -523,7 +523,7 @@
     </paper>
     <paper id="70">
       <title>The “Whiteboard” Architecture: A Way to Integrate Heterogeneous Components of <fixed-case>NLP</fixed-case> Systems</title>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <author><first>Mark</first><last>Seligman</last></author>
       <url hash="dd29cae9">C94-1070</url>
       <bibkey>boitet-seligman-1994-whiteboard</bibkey>
@@ -540,12 +540,12 @@
       <author><first>Rolf</first><last>Backofen</last></author>
       <author><first>Stephan</first><last>Busemann</last></author>
       <author><first>Abdel Kader</first><last>Diagne</last></author>
-      <author><first>Elizabeth A.</first><last>Hinkelman</last></author>
-      <author><first>Walter</first><last>Kasper</last></author>
-      <author><first>Bernd</first><last>Kiefer</last></author>
-      <author><first>Hans-Ulrich</first><last>Krieger</last></author>
+      <author id="elizabeth-a-hinkelman"><first>Elizabeth A.</first><last>Hinkelman</last></author>
+      <author id="walter-kasper"><first>Walter</first><last>Kasper</last></author>
+      <author id="bernd-kiefer"><first>Bernd</first><last>Kiefer</last></author>
+      <author id="hans-ulrich-krieger"><first>Hans-Ulrich</first><last>Krieger</last></author>
       <author><first>Klaus</first><last>Netter</last></author>
-      <author><first>Gunter</first><last>Neumann</last></author>
+      <author id="gunter-neumann"><first>Gunter</first><last>Neumann</last></author>
       <author><first>Stephan</first><last>Oepen</last></author>
       <author><first>Stephen P.</first><last>Spackman</last></author>
       <url hash="4146e43d">C94-1072</url>
@@ -554,7 +554,7 @@
     <paper id="73">
       <title>A Corpus-Based Learning Technique for Building A Self-Extensible Parser</title>
       <author><first>Rey-Long</first><last>Liu</last></author>
-      <author><first>Von-Wun</first><last>Soo</last></author>
+      <author id="von-wun-soo"><first>Von-Wun</first><last>Soo</last></author>
       <url hash="60b1fdee">C94-1073</url>
       <bibkey>liu-soo-1994-corpus</bibkey>
     </paper>
@@ -568,20 +568,20 @@
     </paper>
     <paper id="75">
       <title>A Modular Architecture for Constraint-Based Parsing</title>
-      <author><first>Francois</first><last>Barthelemy</last></author>
+      <author id="francois-barthelemy"><first>Francois</first><last>Barthelemy</last></author>
       <author><first>Francois</first><last>Rouaix</last></author>
       <url hash="9da7d63f">C94-1075</url>
       <bibkey>barthelemy-rouaix-1994-modular</bibkey>
     </paper>
     <paper id="76">
       <title>Minimal Change and Bounded Incremental Parsing</title>
-      <author><first>Mats</first><last>Wiren</last></author>
+      <author id="mats-wiren"><first>Mats</first><last>Wiren</last></author>
       <url hash="5bc878e0">C94-1076</url>
       <bibkey>wiren-1994-minimal</bibkey>
     </paper>
     <paper id="77">
       <title>Emergent Parsing and Generation with Generalized Chart</title>
-      <author><last>Hasida</last><first>Koiti</first></author>
+      <author id="koiti-hasida"><first>Koiti</first><last>Hasida</last></author>
       <url hash="6e80abdc">C94-1077</url>
       <bibkey>hasida-1994-emergent</bibkey>
     </paper>
@@ -615,7 +615,7 @@
     <paper id="82">
       <title><fixed-case>LHIP</fixed-case>: Extended <fixed-case>DCG</fixed-case>s for Configurable Robust Parsing</title>
       <author><first>Afzal</first><last>Ballim</last></author>
-      <author><first>Graham</first><last>Russell</last></author>
+      <author id="graham-russell"><first>Graham</first><last>Russell</last></author>
       <url hash="56a36367">C94-1082</url>
       <bibkey>ballim-russell-1994-lhip</bibkey>
     </paper>
@@ -627,41 +627,41 @@
     </paper>
     <paper id="84">
       <title>Towards Automatic Extraction of Monolingual and Bilingual Terminology</title>
-      <author><first>Beatrice</first><last>Daille</last></author>
-      <author><first>Eric</first><last>Gaussier</last></author>
+      <author id="beatrice-daille"><first>Beatrice</first><last>Daille</last></author>
+      <author id="eric-gaussier"><first>Eric</first><last>Gaussier</last></author>
       <author><first>Jean-Marc</first><last>Lange</last></author>
       <url hash="5e78ec1f">C94-1084</url>
       <bibkey>daille-etal-1994-towards</bibkey>
     </paper>
     <paper id="85">
       <title><fixed-case>F</fixed-case>ax: An Alternative to <fixed-case>SGML</fixed-case></title>
-      <author><first>Kenneth W.</first><last>Church</last></author>
-      <author><first>William A.</first><last>Gale</last></author>
+      <author id="kenneth-church"><first>Kenneth W.</first><last>Church</last></author>
+      <author id="william-a-gale"><first>William A.</first><last>Gale</last></author>
       <author><first>Jonathan I.</first><last>Helfman</last></author>
-      <author><first>David D.</first><last>Lewis</last></author>
+      <author id="david-d-lewis"><first>David D.</first><last>Lewis</last></author>
       <url hash="3c6c987b">C94-1085</url>
       <bibkey>church-etal-1994-fax</bibkey>
     </paper>
     <paper id="86">
       <title>Referring to World Objects With Text and Pictures</title>
-      <author><first>Elisabeth</first><last>Andre</last></author>
+      <author id="elisabeth-andre"><first>Elisabeth</first><last>Andre</last></author>
       <author><first>Thomas</first><last>Rist</last></author>
       <url hash="662e1860">C94-1086</url>
       <bibkey>andre-rist-1994-referring</bibkey>
     </paper>
     <paper id="87">
       <title>A Two-Level Morphological Analysis of <fixed-case>K</fixed-case>orean</title>
-      <author><first>Deok-Bong</first><last>Kim</last></author>
-      <author><first>Sung-Jin</first><last>Lee</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
-      <author><first>Gil-Chang</first><last>Kim</last></author>
+      <author id="deok-bong-kim"><first>Deok-Bong</first><last>Kim</last></author>
+      <author id="sungjin-lee"><first>Sung-Jin</first><last>Lee</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
+      <author id="gil-chang-kim"><first>Gil-Chang</first><last>Kim</last></author>
       <url hash="1cf80036">C94-1087</url>
       <bibkey>kim-etal-1994-two</bibkey>
     </paper>
     <paper id="88">
       <title>Character-based Collocation for <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese</title>
       <author><first>Chu-Ren</first><last>Huang</last></author>
-      <author><first>Keh-jiann</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-jiann</first><last>Chen</last></author>
       <author><first>Yun-yan</first><last>Yang</last></author>
       <url hash="a3b54bff">C94-1088</url>
       <bibkey>huang-etal-1994-character</bibkey>
@@ -692,7 +692,7 @@
     </paper>
     <paper id="92">
       <title>Annotating 200 Million Words: The Bank of <fixed-case>E</fixed-case>nglish Project</title>
-      <author><first>Timo</first><last>Jarvinen</last></author>
+      <author id="timo-jarvinen"><first>Timo</first><last>Jarvinen</last></author>
       <url hash="ce4bf162">C94-1092</url>
       <bibkey>jarvinen-1994-annotating</bibkey>
     </paper>
@@ -700,32 +700,32 @@
       <title>Restructuring Tagged Corpora with Morpheme Adjustment Rules</title>
       <author><first>Toshihisa</first><last>Tashiro</last></author>
       <author><first>Noriyoshi</first><last>Uratani</last></author>
-      <author><first>Tsuyoshi</first><last>Morimoto</last></author>
+      <author id="tsuyoshi-morimoto"><first>Tsuyoshi</first><last>Morimoto</last></author>
       <url hash="6817644e">C94-1093</url>
       <bibkey>tashiro-etal-1994-restructuring</bibkey>
     </paper>
     <paper id="94">
       <title>Encoding standards for large text resources: The <fixed-case>T</fixed-case>ext <fixed-case>E</fixed-case>ncoding <fixed-case>I</fixed-case>nitiative</title>
-      <author><first>Nancy</first><last>Ide</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
       <url hash="c5d830bd">C94-1094</url>
       <bibkey>ide-1994-encoding</bibkey>
     </paper>
     <paper id="95">
       <title><fixed-case>INTEX</fixed-case>: A Corpus Processing System</title>
-      <author><first>Max D.</first><last>Silberztein</last></author>
+      <author id="max-silberztein"><first>Max D.</first><last>Silberztein</last></author>
       <url hash="4188403b">C94-1095</url>
       <bibkey>silberztein-1994-intex</bibkey>
     </paper>
     <paper id="96">
       <title>An <fixed-case>IBM</fixed-case>-<fixed-case>PC</fixed-case> Environment for <fixed-case>C</fixed-case>hinese Corpus Analysis</title>
-      <author><first>Robert Wing Pong</first><last>Luk</last></author>
+      <author id="robert-wing-pong-luk"><first>Robert Wing Pong</first><last>Luk</last></author>
       <url hash="b5b41f38">C94-1096</url>
       <bibkey>luk-1994-ibm</bibkey>
     </paper>
     <paper id="97">
       <title><fixed-case>MULTEXT</fixed-case>: Multilingual Text Tools and Corpora</title>
-      <author><first>Nancy</first><last>Ide</last></author>
-      <author><first>Jean</first><last>Veronis</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
+      <author id="jean-veronis"><first>Jean</first><last>Veronis</last></author>
       <url hash="f10286cf">C94-1097</url>
       <bibkey>ide-veronis-1994-multext</bibkey>
     </paper>
@@ -740,21 +740,21 @@
     <paper id="99">
       <title>A Tool for Collecting Domain Dependent Sortal Constraints From Corpora</title>
       <author><first>Francois</first><last>Andry</last></author>
-      <author><first>Mark</first><last>Gawron</last></author>
-      <author><first>John</first><last>Dowding</last></author>
-      <author><first>Robert</first><last>Moore</last></author>
+      <author id="jean-mark-gawron"><first>Mark</first><last>Gawron</last></author>
+      <author id="john-dowding"><first>John</first><last>Dowding</last></author>
+      <author id="robert-c-moore"><first>Robert</first><last>Moore</last></author>
       <url hash="1b140a41">C94-1099</url>
       <bibkey>andry-etal-1994-tool</bibkey>
     </paper>
     <paper id="100">
       <title>Building a Lexical Domain Map From Text Corpora</title>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
       <url hash="57c78aa7">C94-1100</url>
       <bibkey>strzalkowski-1994-building</bibkey>
     </paper>
     <paper id="101">
       <title>A New Method of N-gram Statistics for Large Number of n and Automatic Extraction of Words and Phrases from Large Text Data of <fixed-case>J</fixed-case>apanese</title>
-      <author><first>Makoto</first><last>Nagao</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
       <author><first>Shinsuke</first><last>Mori</last></author>
       <url hash="972f3dbe">C94-1101</url>
       <bibkey>nagao-mori-1994-new</bibkey>
@@ -776,7 +776,7 @@
     <paper id="104">
       <title>Syntactic Analysis of Natural Language Using Linguistic Rules and Corpus-Based Patterns</title>
       <author><first>Pasi</first><last>Tapanainen</last></author>
-      <author><first>Timo</first><last>Jarvinen</last></author>
+      <author id="timo-jarvinen"><first>Timo</first><last>Jarvinen</last></author>
       <url hash="46015d91">C94-1104</url>
       <bibkey>tapanainen-jarvinen-1994-syntactic</bibkey>
     </paper>
@@ -793,7 +793,7 @@
     </frontmatter>
     <paper id="105">
       <title>Word Sense Acquisition for Multilingual Text Interpretation</title>
-      <author><first>Paul S.</first><last>Jacobs</last></author>
+      <author id="paul-s-jacobs"><first>Paul S.</first><last>Jacobs</last></author>
       <url hash="faaa5457">C94-2105</url>
       <bibkey>jacobs-1994-word</bibkey>
     </paper>
@@ -821,7 +821,7 @@
     </paper>
     <paper id="109">
       <title>The Nature of Near-Synonymic Relations</title>
-      <author><first>Chrysanne</first><last>DiMarco</last></author>
+      <author id="chrysanne-dimarco"><first>Chrysanne</first><last>DiMarco</last></author>
       <url hash="6e1806cf">C94-2109</url>
       <bibkey>dimarco-1994-nature</bibkey>
     </paper>
@@ -835,20 +835,20 @@
     </paper>
     <paper id="111">
       <title>Building a <fixed-case>W</fixed-case>indows-Based Bilingual Functional Semantic Processor</title>
-      <author><first>Jonathan J.</first><last>Webster</last></author>
+      <author id="jonathan-j-webster"><first>Jonathan J.</first><last>Webster</last></author>
       <url hash="062d90de">C94-2111</url>
       <bibkey>webster-1994-building</bibkey>
     </paper>
     <paper id="112">
       <title>On the Proper Role of Coercion in Semantic Typing</title>
-      <author><first>James</first><last>Pustejovsky</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <url hash="8fa388af">C94-2112</url>
       <bibkey>pustejovsky-bouillon-1994-proper</bibkey>
     </paper>
     <paper id="113">
       <title>Word Sense Ambiguation: Clustering Related Senses</title>
-      <author><first>William B.</first><last>Dolan</last></author>
+      <author id="william-b-dolan"><first>William B.</first><last>Dolan</last></author>
       <url hash="922ec456">C94-2113</url>
       <bibkey>dolan-1994-word</bibkey>
     </paper>
@@ -874,7 +874,7 @@
     <paper id="117">
       <title>An Empirical Study on the Generation of Zero Anaphors in <fixed-case>C</fixed-case>hinese</title>
       <author><first>Ching-Long</first><last>Yeh</last></author>
-      <author><first>Chris</first><last>Mellish</last></author>
+      <author id="chris-mellish"><first>Chris</first><last>Mellish</last></author>
       <url hash="2c6fb3bf">C94-2117</url>
       <bibkey>yeh-mellish-1994-empirical</bibkey>
     </paper>
@@ -886,7 +886,7 @@
     </paper>
     <paper id="119">
       <title>Generalizing Automatically Generated Selectional Patterns</title>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <author><first>John</first><last>Sterling</last></author>
       <url hash="2523124e">C94-2119</url>
       <bibkey>grishman-sterling-1994-generalizing</bibkey>
@@ -900,7 +900,7 @@
     </paper>
     <paper id="121">
       <title>Word Sense Disambiguation and Text Segmentation Based on Lexical Cohesion</title>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <author><first>Takeo</first><last>Honda</last></author>
       <url hash="a23b3df6">C94-2121</url>
       <bibkey>okumura-honda-1994-word</bibkey>
@@ -908,19 +908,19 @@
     <paper id="122">
       <title>Automatic Recognition of Verbal Polysemy</title>
       <author><first>Fumiyo</first><last>Fukumoto</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <url hash="db236441">C94-2122</url>
       <bibkey>fukumoto-tsujii-1994-automatic</bibkey>
     </paper>
     <paper id="123">
       <title>An Experiment on Learning Appropriate Selectional Restrictions From a Parsed Corpus</title>
-      <author><first>Francesc Ribas</first><last>Framis</last></author>
+      <author id="francesc-ribas"><first>Francesc Ribas</first><last>Framis</last></author>
       <url hash="796a31ef">C94-2123</url>
       <bibkey>framis-1994-experiment</bibkey>
     </paper>
     <paper id="124">
       <title>A Discrete Model of Degree Concept in Natural Language</title>
-      <author><first>Shin-ichiro</first><last>Kamei</last></author>
+      <author id="shin-ichiro-kamei"><first>Shin-ichiro</first><last>Kamei</last></author>
       <author><first>Kazunori</first><last>Muraki</last></author>
       <url hash="580b49f2">C94-2124</url>
       <bibkey>kamei-muraki-1994-discrete</bibkey>
@@ -945,8 +945,8 @@
     </paper>
     <paper id="128">
       <title>The Merged Upper Model: A Linguistic Ontology for <fixed-case>G</fixed-case>erman and <fixed-case>E</fixed-case>nglish</title>
-      <author><first>Renate</first><last>Henschel</last></author>
-      <author><first>John</first><last>Bateman</last></author>
+      <author id="renate-henschel"><first>Renate</first><last>Henschel</last></author>
+      <author id="john-bateman"><first>John</first><last>Bateman</last></author>
       <url hash="f3de6adc">C94-2128</url>
       <bibkey>henschel-bateman-1994-merged</bibkey>
     </paper>
@@ -965,13 +965,13 @@
     </paper>
     <paper id="131">
       <title><fixed-case>HPSG</fixed-case> Lexicon Without Lexical Rules</title>
-      <author><first>Karel</first><last>Oliva</last></author>
+      <author id="karel-oliva"><first>Karel</first><last>Oliva</last></author>
       <url hash="494bdd04">C94-2131</url>
       <bibkey>oliva-1994-hpsg</bibkey>
     </paper>
     <paper id="132">
       <title>A Lexicon of Distributed Noun Representations Constructed by Taxonomic Traversal</title>
-      <author><first>Richard F.E.</first><last>Sutcliffe</last></author>
+      <author id="richard-f-e-sutcliffe"><first>Richard F.E.</first><last>Sutcliffe</last></author>
       <author><first>Donie</first><last>O’Sullivan</last></author>
       <author><first>Fergus</first><last>Meharg</last></author>
       <url hash="321972c0">C94-2132</url>
@@ -988,13 +988,13 @@
     <paper id="134">
       <title>Hypothesis Selection in Grammar Acquisition</title>
       <author><first>Masaki</first><last>Kiyono</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <url hash="e4d44669">C94-2134</url>
       <bibkey>kiyono-tsujii-1994-hypothesis</bibkey>
     </paper>
     <paper id="135">
       <title>Achieving Flexibility in Unification Formalisms</title>
-      <author><first>Lena</first><last>Stromback</last></author>
+      <author id="lena-stromback"><first>Lena</first><last>Stromback</last></author>
       <url hash="f28fa508">C94-2135</url>
       <bibkey>stromback-1994-achieving</bibkey>
     </paper>
@@ -1003,7 +1003,7 @@
       <author><first>Kazunori</first><last>Muraki</last></author>
       <author><first>Susumu</first><last>Akamine</last></author>
       <author><first>Kenji</first><last>Satoh</last></author>
-      <author><first>Sinichi</first><last>Ando</last></author>
+      <author id="shinichi-ando"><first>Sinichi</first><last>Ando</last></author>
       <url hash="d5195384">C94-2136</url>
       <bibkey>muraki-etal-1994-twp</bibkey>
     </paper>
@@ -1016,7 +1016,7 @@
     <paper id="138">
       <title>A Reestimation Algorithm for Probabilistic ttecursive Transition Network</title>
       <author><first>Young S.</first><last>Han</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <url hash="51d63170">C94-2138</url>
       <bibkey>han-choi-1994-reestimation</bibkey>
     </paper>
@@ -1054,36 +1054,36 @@
     </paper>
     <paper id="144">
       <title><fixed-case>TDL</fixed-case>-A Type Description Language for Constraint-Based Grammars</title>
-      <author><first>Hans-Ulrich</first><last>Krieger</last></author>
-      <author><first>Ulrich</first><last>Schafer</last></author>
+      <author id="hans-ulrich-krieger"><first>Hans-Ulrich</first><last>Krieger</last></author>
+      <author id="ulrich-schafer"><first>Ulrich</first><last>Schafer</last></author>
       <url hash="2a35fcdf">C94-2144</url>
       <bibkey>krieger-schafer-1994-tdl</bibkey>
     </paper>
     <paper id="145">
       <title>On the Portability of Complex Constraint-Based Grammars</title>
-      <author><first>C.J.</first><last>Rupp</last></author>
+      <author id="c-j-rupp"><first>C.J.</first><last>Rupp</last></author>
       <author><first>Rod</first><last>Johnson</last></author>
       <url hash="5d82745a">C94-2145</url>
       <bibkey>rupp-johnson-1994-portability</bibkey>
     </paper>
     <paper id="146">
       <title>A Grammar Based Approach to a Grammar Checking of Free Word Order Languages</title>
-      <author><first>Vladislav</first><last>Kubon</last></author>
-      <author><first>Martin</first><last>Platek</last></author>
+      <author id="vladislav-kubon"><first>Vladislav</first><last>Kubon</last></author>
+      <author id="martin-platek"><first>Martin</first><last>Platek</last></author>
       <url hash="a1ba1726">C94-2146</url>
       <bibkey>kubon-platek-1994-grammar</bibkey>
     </paper>
     <paper id="147">
       <title>Table-driven Neural Syntactic Analysis of Spoken <fixed-case>K</fixed-case>orean</title>
       <author><first>Wonll</first><last>Lee</last></author>
-      <author><first>Geunbae</first><last>Lee</last></author>
+      <author id="gary-geunbae-lee"><first>Geunbae</first><last>Lee</last></author>
       <author><first>Jong-Hyeok</first><last>Lee</last></author>
       <url hash="bf189dd8">C94-2147</url>
       <bibkey>lee-etal-1994-table</bibkey>
     </paper>
     <paper id="148">
       <title>Universal Guides and Finiteness and Symmetry of Grammar Processing Algorithms</title>
-      <author><first>Miroslav</first><last>Martinović</last></author>
+      <author id="miroslav-martinovic"><first>Miroslav</first><last>Martinović</last></author>
       <url hash="4b698c1d">C94-2148</url>
       <bibkey>martinovic-1994-universal</bibkey>
     </paper>
@@ -1091,8 +1091,8 @@
       <title><fixed-case>XTAG</fixed-case> System - A Wide Coverage Grammar for <fixed-case>E</fixed-case>nglish</title>
       <author><first>Christy</first><last>Doran</last></author>
       <author><first>Dania</first><last>Egedi</last></author>
-      <author><first>Beth Ann</first><last>Hockey</last></author>
-      <author><first>B.</first><last>Srinivas</last></author>
+      <author id="beth-ann-hockey"><first>Beth Ann</first><last>Hockey</last></author>
+      <author id="srinivas-bangalore"><first>B.</first><last>Srinivas</last></author>
       <author><first>Martin</first><last>Zaidel</last></author>
       <url hash="74ba555d">C94-2149</url>
       <bibkey>doran-etal-1994-xtag</bibkey>
@@ -1113,14 +1113,14 @@
     <paper id="152">
       <title>Hypothesis Scoring over Theta Grids Information in Parsing <fixed-case>C</fixed-case>hinese Sentences with Serial Verb Constructions</title>
       <author><first>Koong H. C.</first><last>Lin</last></author>
-      <author><first>Von-Wun</first><last>Soo</last></author>
+      <author id="von-wun-soo"><first>Von-Wun</first><last>Soo</last></author>
       <url hash="5d5e710f">C94-2152</url>
       <bibkey>lin-soo-1994-hypothesis</bibkey>
     </paper>
     <paper id="153">
       <title>An Efficient Syntactic Tagging Tool for Corpora</title>
       <author><first>Ming</first><last>Zhou</last></author>
-      <author><first>Changning</first><last>Huang</last></author>
+      <author id="changning-huang"><first>Changning</first><last>Huang</last></author>
       <url hash="26abe856">C94-2153</url>
       <bibkey>zhou-huang-1994-efficient</bibkey>
     </paper>
@@ -1140,20 +1140,20 @@
     </paper>
     <paper id="156">
       <title>Machine-Readable Dictionaries in Text-to-Speech Systems</title>
-      <author><first>Judith L.</first><last>Klavans</last></author>
-      <author><first>Evelyne</first><last>Tzoukermann</last></author>
+      <author id="judith-l-klavans"><first>Judith L.</first><last>Klavans</last></author>
+      <author id="evelyne-tzoukermann"><first>Evelyne</first><last>Tzoukermann</last></author>
       <url hash="4d05a0ca">C94-2156</url>
       <bibkey>klavans-tzoukermann-1994-machine</bibkey>
     </paper>
     <paper id="157">
       <title>Issues in Text-to-Speech for <fixed-case>F</fixed-case>rench</title>
-      <author><first>Evelyne</first><last>Tzoukermann</last></author>
+      <author id="evelyne-tzoukermann"><first>Evelyne</first><last>Tzoukermann</last></author>
       <url hash="d4e12b25">C94-2157</url>
       <bibkey>tzoukermann-1994-issues</bibkey>
     </paper>
     <paper id="158">
       <title><fixed-case>CHATR</fixed-case>: a generic speech synthesis system</title>
-      <author><first>Alan W.</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W.</first><last>Black</last></author>
       <author><first>Paul</first><last>Taylor</last></author>
       <url hash="99d911cf">C94-2158</url>
       <bibkey>black-taylor-1994-chatr</bibkey>
@@ -1188,7 +1188,7 @@
     </paper>
     <paper id="163">
       <title>Phonological Derivation in <fixed-case>O</fixed-case>ptimality <fixed-case>T</fixed-case>heory</title>
-      <author><first>T. Mark</first><last>Ellison</last></author>
+      <author id="t-mark-ellison"><first>T. Mark</first><last>Ellison</last></author>
       <url hash="77d2561b">C94-2163</url>
       <bibkey>ellison-1994-phonological</bibkey>
     </paper>
@@ -1209,7 +1209,7 @@
     <paper id="166">
       <title>A <fixed-case>D</fixed-case>utch to <fixed-case>SQL</fixed-case> database interface using Generalized Quantifier Theory</title>
       <author><first>Dirk</first><last>Speelman</last></author>
-      <author><first>Geert</first><last>Adriaens</last></author>
+      <author id="geert-adriaens"><first>Geert</first><last>Adriaens</last></author>
       <url hash="9536bb56">C94-2166</url>
       <bibkey>speelman-adriaens-1994-dutch</bibkey>
     </paper>
@@ -1222,7 +1222,7 @@
     <paper id="168">
       <title>Knowledge Extraction from Texts: a method for extracting predicate-argument structures from texts</title>
       <author><first>Florence</first><last>Pugeault</last></author>
-      <author><first>Patrick</first><last>Saint-Dizier</last></author>
+      <author id="patrick-saint-dizier"><first>Patrick</first><last>Saint-Dizier</last></author>
       <author><first>Marie-Gaelle</first><last>Monteil</last></author>
       <url hash="5b8a6df4">C94-2168</url>
       <bibkey>pugeault-etal-1994-knowledge</bibkey>
@@ -1232,7 +1232,7 @@
       <author><first>Takehito</first><last>Utsuro</last></author>
       <author><first>Kiyotaka</first><last>Uchimoto</last></author>
       <author><first>Mitsutaka</first><last>Matsumoto</last></author>
-      <author><first>Makoto</first><last>Nagao</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
       <url hash="07e96c35">C94-2169</url>
       <bibkey>utsuro-etal-1994-thesaurus</bibkey>
     </paper>
@@ -1252,7 +1252,7 @@
     </paper>
     <paper id="172">
       <title>Document Classification by Machine:Theory and Practice</title>
-      <author><first>Louise</first><last>Guthrie</last></author>
+      <author id="louise-guthrie"><first>Louise</first><last>Guthrie</last></author>
       <author><first>Elbert</first><last>Walker</last></author>
       <url hash="ab425e00">C94-2172</url>
       <bibkey>guthrie-walker-1994-document</bibkey>
@@ -1268,7 +1268,7 @@
     <paper id="174">
       <title>Recognizing Text Genres With Simple Metrics Using Discriminant Analysis</title>
       <author><first>Jussi</first><last>Karlgren</last></author>
-      <author><first>Douglass</first><last>Cutting</last></author>
+      <author id="douglass-cutting"><first>Douglass</first><last>Cutting</last></author>
       <url hash="09c1067b">C94-2174</url>
       <bibkey>karlgren-cutting-1994-recognizing</bibkey>
     </paper>
@@ -1277,8 +1277,8 @@
       <author><first>Takehito</first><last>Utsuro</last></author>
       <author><first>Hiroshi</first><last>Ikeda</last></author>
       <author><first>Masaya</first><last>Yamane</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
-      <author><first>Makoto</first><last>Nagao</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
       <url hash="6966ca7b">C94-2175</url>
       <bibkey>utsuro-etal-1994-bilingual</bibkey>
     </paper>
@@ -1299,7 +1299,7 @@
     <paper id="178">
       <title>K-vec: A New Approach for Aligning Parallel Texts</title>
       <author><first>Pascale</first><last>Fung</last></author>
-      <author><first>Kenneth Ward</first><last>Church</last></author>
+      <author id="kenneth-church"><first>Kenneth Ward</first><last>Church</last></author>
       <url hash="37dd6903">C94-2178</url>
       <bibkey>fung-church-1994-k</bibkey>
     </paper>
@@ -1324,27 +1324,27 @@
     </paper>
     <paper id="182">
       <title>Collaboration on Reference to Objects That Are Not Mutually Known</title>
-      <author><first>Philip G.</first><last>Edmonds</last></author>
+      <author id="philip-edmonds"><first>Philip G.</first><last>Edmonds</last></author>
       <url hash="676ecfcf">C94-2182</url>
       <bibkey>edmonds-1994-collaboration</bibkey>
     </paper>
     <paper id="183">
       <title>Automatic Detection of Discourse Structure by Checking Surface Information in Sentences</title>
       <author><first>Sadao</first><last>Kurohashi</last></author>
-      <author><first>Makoto</first><last>Nagao</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
       <url hash="333d5553">C94-2183</url>
       <bibkey>kurohashi-nagao-1994-automatic</bibkey>
     </paper>
     <paper id="184">
       <title>Extending <fixed-case>DRT</fixed-case> With a Focusing Mechanism for Pronominal Anaphora and Ellipsis Resolution</title>
       <author><first>Jose</first><last>Abracos</last></author>
-      <author><first>Jose Gabriel</first><last>Lopes</last></author>
+      <author id="gabriel-lopes"><first>Jose Gabriel</first><last>Lopes</last></author>
       <url hash="7cff8d3e">C94-2184</url>
       <bibkey>abracos-lopes-1994-extending</bibkey>
     </paper>
     <paper id="185">
       <title>Reference Resolution Using Semantic Patterns in <fixed-case>J</fixed-case>apanese Newspaper Articles</title>
-      <author><first>Takahiro</first><last>Wakao</last></author>
+      <author id="takahiro-wakao"><first>Takahiro</first><last>Wakao</last></author>
       <url hash="db0d6093">C94-2185</url>
       <bibkey>wakao-1994-reference</bibkey>
     </paper>
@@ -1384,14 +1384,14 @@
     </paper>
     <paper id="191">
       <title>An Integrated Model for Anaphora Resolution</title>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <url hash="166cab8c">C94-2191</url>
       <bibkey>mitkov-1994-integrated</bibkey>
     </paper>
     <paper id="192">
       <title>Breaking Down Rhetorical Relations for the purpose of Analysing Discourse Structures</title>
-      <author><first>Jun’ichi</first><last>Fukumoto</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-fukumoto"><first>Jun’ichi</first><last>Fukumoto</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <url hash="93f8eef7">C94-2192</url>
       <bibkey>fukumoto-tsujii-1994-breaking</bibkey>
     </paper>
@@ -1403,7 +1403,7 @@
     </paper>
     <paper id="194">
       <title>Communicating With Multiple Agents</title>
-      <author><first>Elizabeth A.</first><last>Hinkelman</last></author>
+      <author id="elizabeth-a-hinkelman"><first>Elizabeth A.</first><last>Hinkelman</last></author>
       <author><first>Stephen P.</first><last>Spackman</last></author>
       <url hash="c812ce1c">C94-2194</url>
       <bibkey>hinkelman-spackman-1994-communicating</bibkey>
@@ -1417,7 +1417,7 @@
     </paper>
     <paper id="196">
       <title>Discourse and Deliberation: Testing a Collaborative Strategy</title>
-      <author><first>Marilyn A.</first><last>Walker</last></author>
+      <author id="marilyn-walker"><first>Marilyn A.</first><last>Walker</last></author>
       <url hash="2c355a85">C94-2196</url>
       <bibkey>walker-1994-discourse</bibkey>
     </paper>
@@ -1442,9 +1442,9 @@
     </paper>
     <paper id="200">
       <title>The Evolution of Machine-Tractable Dictionaries</title>
-      <author><first>Cheng-ming</first><last>Guo</last></author>
-      <author><first>Chang-ning</first><last>Huang</last></author>
-      <author><first>Jun-ping</first><last>Gong</last></author>
+      <author id="cheng-ming-guo"><first>Cheng-ming</first><last>Guo</last></author>
+      <author id="changning-huang"><first>Chang-ning</first><last>Huang</last></author>
+      <author id="junping-gong"><first>Jun-ping</first><last>Gong</last></author>
       <author><first>Jin</first><last>Li</last></author>
       <url hash="1016f992">C94-2200</url>
       <bibkey>guo-etal-1994-evolution</bibkey>
@@ -1477,8 +1477,8 @@
     </paper>
     <paper id="205">
       <title><fixed-case>P</fixed-case>ortuguese Analysis with <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars</title>
-      <author><first>Karin Christine</first><last>Kipper</last></author>
-      <author><first>Vera Lucia Strube</first><last>de Lima</last></author>
+      <author id="karin-kipper"><first>Karin Christine</first><last>Kipper</last></author>
+      <author id="vera-lucia-strube-de-lima"><first>Vera Lucia Strube</first><last>de Lima</last></author>
       <url hash="f1808a42">C94-2205</url>
       <bibkey>kipper-de-lima-1994-portuguese</bibkey>
     </paper>
@@ -1497,9 +1497,9 @@
     </paper>
     <paper id="208">
       <title>Humor-Based Applications</title>
-      <author><first>Gabor</first><last>Proszeky</last></author>
+      <author id="gabor-proszeky"><first>Gabor</first><last>Proszeky</last></author>
       <author><first>Miklos</first><last>Pal</last></author>
-      <author><first>Laszlo</first><last>Tihanyi</last></author>
+      <author id="laszlo-tihanyi"><first>Laszlo</first><last>Tihanyi</last></author>
       <url hash="fcf66ad2">C94-2208</url>
       <bibkey>proszeky-etal-1994-humor</bibkey>
     </paper>
@@ -1530,9 +1530,9 @@
     <paper id="212">
       <title><fixed-case>NL</fixed-case> Understanding with a Grammar of Constructions</title>
       <author><first>Wlodek</first><last>Zadrozny</last></author>
-      <author><first>Marcin</first><last>Szummer</last></author>
+      <author id="marcin-szummer"><first>Marcin</first><last>Szummer</last></author>
       <author><first>Stanislaw</first><last>Jarecki</last></author>
-      <author><first>David E.</first><last>Johnson</last></author>
+      <author id="david-e-johnson"><first>David E.</first><last>Johnson</last></author>
       <author><first>Leora</first><last>Morgenstern</last></author>
       <url hash="3bfc976f">C94-2212</url>
       <bibkey>zadrozny-etal-1994-nl</bibkey>
diff --git a/data/xml/C96.xml b/data/xml/C96.xml
index aa0c3f614e..146bfe89aa 100644
--- a/data/xml/C96.xml
+++ b/data/xml/C96.xml
@@ -12,7 +12,7 @@
     </frontmatter>
     <paper id="1">
       <title>Discovering the Sounds of Discourse Structure Extended Abstract</title>
-      <author><first>Barbara J.</first><last>Grosz</last></author>
+      <author id="barbara-j-grosz"><first>Barbara J.</first><last>Grosz</last></author>
       <url hash="3f466b4f">C96-1001</url>
       <bibkey>grosz-1996-discovering</bibkey>
     </paper>
@@ -38,8 +38,8 @@
     </paper>
     <paper id="5">
       <title>Word Sense Disambiguation using Conceptual Density</title>
-      <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <url hash="e704454f">C96-1005</url>
       <bibkey>agirre-rigau-1996-word</bibkey>
     </paper>
@@ -95,34 +95,34 @@
     </paper>
     <paper id="13">
       <title>Concept clustering and knowledge integration from a children’s dictionary</title>
-      <author><first>Caroline</first><last>Barrière</last></author>
+      <author id="caroline-barriere"><first>Caroline</first><last>Barrière</last></author>
       <author><first>Fred</first><last>Popowich</last></author>
       <url hash="8aeaea57">C96-1013</url>
       <bibkey>barriere-popowich-1996-concept</bibkey>
     </paper>
     <paper id="14">
       <title>Integrating Syntactic and Prosodic Information for the Efficient Detection of Empty Categories</title>
-      <author><first>Anton</first><last>Batliner</last></author>
+      <author id="anton-batliner"><first>Anton</first><last>Batliner</last></author>
       <author><first>Anke</first><last>Feldhaus</last></author>
       <author><first>Stefan</first><last>Geifiler</last></author>
       <author><first>Andreas</first><last>Kieflling</last></author>
       <author><first>Tibor</first><last>Kiss</last></author>
       <author><first>Ralf</first><last>Kompe</last></author>
-      <author><first>Elmar</first><last>Noth</last></author>
+      <author id="elmar-noth"><first>Elmar</first><last>Noth</last></author>
       <url hash="2f0f89bb">C96-1014</url>
       <bibkey>batliner-etal-1996-integrating</bibkey>
     </paper>
     <paper id="15">
       <title>Monotonic Paradigmatic Schemata in <fixed-case>I</fixed-case>talian Verb Inflection</title>
       <author><first>Vito</first><last>Pirrelli</last></author>
-      <author><first>Marco</first><last>Battista</last></author>
+      <author id="marco-battista"><first>Marco</first><last>Battista</last></author>
       <url hash="37302471">C96-1015</url>
       <bibkey>pirrelli-battista-1996-monotonic</bibkey>
     </paper>
     <paper id="16">
       <title>Measuring Semantic Coverage</title>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
-      <author><first>Kavi</first><last>Mahesh</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="kavi-mahesh"><first>Kavi</first><last>Mahesh</last></author>
       <author><first>Stephen</first><last>Beale</last></author>
       <url hash="eef03b14">C96-1016</url>
       <bibkey>nirenburg-etal-1996-measuring</bibkey>
@@ -135,7 +135,7 @@
     </paper>
     <paper id="18">
       <title>Unsupervised Discovery of Phonological Categories through Supervised Learning of Morphological Rules</title>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <author><first>Peter</first><last>Berck</last></author>
       <author><first>Steven</first><last>Gillis</last></author>
       <url hash="73d2037e">C96-1018</url>
@@ -150,10 +150,10 @@
     </paper>
     <paper id="20">
       <title>Beyond Skeleton Parsing: Producing a Comprehensive Large-Scale General-<fixed-case>E</fixed-case>nglish Treebank With Full Grammatical Analysis</title>
-      <author><first>Ezra</first><last>Black</last></author>
+      <author id="ezra-black"><first>Ezra</first><last>Black</last></author>
       <author><first>Stephen</first><last>Eubank</last></author>
-      <author><first>Hideki</first><last>Kashioka</last></author>
-      <author><first>David</first><last>Magerman</last></author>
+      <author id="hideki-kashioka"><first>Hideki</first><last>Kashioka</last></author>
+      <author id="david-m-magerman"><first>David</first><last>Magerman</last></author>
       <author><first>Roger</first><last>Garside</last></author>
       <author><first>Geoffrey</first><last>Leech</last></author>
       <url hash="88f32ce6">C96-1020</url>
@@ -162,13 +162,13 @@
     <paper id="21">
       <title>Anaphora for Everyone: Pronominal Anaphora Resolution without a Parser</title>
       <author><first>Christopher</first><last>Kennedy</last></author>
-      <author><first>Branimir</first><last>Boguraev</last></author>
+      <author id="branimir-boguraev"><first>Branimir</first><last>Boguraev</last></author>
       <url hash="8b7f9a2c">C96-1021</url>
       <bibkey>kennedy-boguraev-1996-anaphora</bibkey>
     </paper>
     <paper id="22">
       <title>Theory and practice of ambiguity labelling with a view to interactive disambiguation in text and speech <fixed-case>MT</fixed-case></title>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <author><first>Mutsuko</first><last>Tomokiyo</last></author>
       <url hash="b1e9b4ce">C96-1022</url>
       <bibkey>boitet-tomokiyo-1996-theory</bibkey>
@@ -184,11 +184,11 @@
     <paper id="24">
       <title>Compositional Semantics in Verbmobil</title>
       <author><first>Johan</first><last>Bos</last></author>
-      <author><first>Bjorn</first><last>Gamback</last></author>
+      <author id="bjorn-gamback"><first>Bjorn</first><last>Gamback</last></author>
       <author><first>Christian</first><last>Lieske</last></author>
       <author><first>Yoshiki</first><last>Mori</last></author>
       <author><first>Manfred</first><last>Pinkal</last></author>
-      <author><first>Karsten</first><last>Worm</last></author>
+      <author id="karsten-l-worm"><first>Karsten</first><last>Worm</last></author>
       <url hash="5f779b14">C96-1024</url>
       <bibkey>bos-etal-1996-compositional</bibkey>
     </paper>
@@ -196,19 +196,19 @@
       <title>Processing Metonymy- a Domain-Model Heuristic Graph Traversal Approach</title>
       <author><first>Jacques</first><last>Bouaud</last></author>
       <author><first>Bruno</first><last>Bachimont</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <url hash="c6a019f1">C96-1025</url>
       <bibkey>bouaud-etal-1996-processing</bibkey>
     </paper>
     <paper id="26">
       <title>Mental State Adjectives: the Perspective of <fixed-case>G</fixed-case>enerative <fixed-case>L</fixed-case>exicon</title>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <url hash="dccdd382">C96-1026</url>
       <bibkey>bouillon-1996-mental</bibkey>
     </paper>
     <paper id="27">
       <title>Branching Split Obliqueness at the Syntax-Semantics Interface</title>
-      <author><first>Antonio H.</first><last>Branco</last></author>
+      <author id="antonio-branco"><first>Antonio H.</first><last>Branco</last></author>
       <url hash="e9b5924e">C96-1027</url>
       <bibkey>branco-1996-branching</bibkey>
     </paper>
@@ -223,21 +223,21 @@
     <paper id="29">
       <title>Lexical Rules: What are they?</title>
       <author><first>Andrew</first><last>Bredenkamp</last></author>
-      <author><first>Stella</first><last>Markantonatou</last></author>
+      <author id="stella-markantonatou"><first>Stella</first><last>Markantonatou</last></author>
       <author><first>Louisa</first><last>Sadler</last></author>
       <url hash="9df82e41">C96-1029</url>
       <bibkey>bredenkamp-etal-1996-lexical</bibkey>
     </paper>
     <paper id="30">
       <title>Example-Based Machine Translation in the Pangloss System</title>
-      <author><first>Ralf D.</first><last>Brown</last></author>
+      <author id="ralf-d-brown"><first>Ralf D.</first><last>Brown</last></author>
       <url hash="0554bcaa">C96-1030</url>
       <bibkey>brown-1996-example</bibkey>
     </paper>
     <paper id="31">
       <title><fixed-case>G</fixed-case>ram<fixed-case>C</fixed-case>heck: A Grammar and Style Checker</title>
       <author><first>Flora Ramírez</first><last>Bustamante</last></author>
-      <author><first>Fernando Sánchez</first><last>León</last></author>
+      <author id="fernando-sanchez-leon"><first>Fernando Sánchez</first><last>León</last></author>
       <url hash="07eb96ce">C96-1031</url>
       <bibkey>bustamante-leon-1996-gramcheck</bibkey>
     </paper>
@@ -252,13 +252,13 @@
     <paper id="33">
       <title><fixed-case>F</fixed-case>eas<fixed-case>P</fixed-case>ar - A Feature Structure Parser Learning to Parse Spoken Language</title>
       <author><first>Finn Dag</first><last>Buo</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <url hash="b83ff873">C96-1033</url>
       <bibkey>buo-waibel-1996-feaspar</bibkey>
     </paper>
     <paper id="34">
       <title>A principle-based hierarchical representation of <fixed-case>LTAG</fixed-case>s</title>
-      <author><first>Marie-Helene</first><last>Candito</last></author>
+      <author id="marie-candito"><first>Marie-Helene</first><last>Candito</last></author>
       <url hash="130d310d">C96-1034</url>
       <bibkey>candito-1996-principle</bibkey>
     </paper>
@@ -278,14 +278,14 @@
     </paper>
     <paper id="37">
       <title>Aligning More Words with High Precision for Small Bilingual Corpora</title>
-      <author><first>Sur-Jin</first><last>Ker</last></author>
-      <author><first>Jason J. S.</first><last>Chang</last></author>
+      <author id="sue-j-ker"><first>Sur-Jin</first><last>Ker</last></author>
+      <author id="jason-s-chang"><first>Jason J. S.</first><last>Chang</last></author>
       <url hash="6e306c8d">C96-1037</url>
       <bibkey>ker-chang-1996-aligning</bibkey>
     </paper>
     <paper id="38">
       <title>A Rule-Based and <fixed-case>MT</fixed-case>-Oriented Approach to Prepositional Phrase Attachment</title>
-      <author><first>Kuang-hua</first><last>Chen</last></author>
+      <author id="kuang-hua-chen"><first>Kuang-hua</first><last>Chen</last></author>
       <author><first>Hsin-Hsi</first><last>Chen</last></author>
       <url hash="03771555">C96-1038</url>
       <bibkey>chen-chen-1996-rule</bibkey>
@@ -301,15 +301,15 @@
       <title>Bilingual Knowledge Acquisition from <fixed-case>K</fixed-case>orean-<fixed-case>E</fixed-case>nglish Parallel Corpus Using Alignment</title>
       <author><first>Jung H.</first><last>Shin</last></author>
       <author><first>Young S.</first><last>Han</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <url hash="e39f12ff">C96-1040</url>
       <bibkey>shin-etal-1996-bilingual</bibkey>
     </paper>
     <paper id="41">
       <title><fixed-case>M</fixed-case>arkov random field based <fixed-case>E</fixed-case>nglish Part-Of-Speech tagging system</title>
-      <author><first>Sung-Young</first><last>Jung</last></author>
+      <author id="sung-young-jung"><first>Sung-Young</first><last>Jung</last></author>
       <author><first>Young C.</first><last>Park</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <author><first>Youngwhan</first><last>Kim</last></author>
       <url hash="9ea060a9">C96-1041</url>
       <bibkey>jung-etal-1996-markov</bibkey>
@@ -322,7 +322,7 @@
     </paper>
     <paper id="43">
       <title>Evaluating and comparing three text-production techniques</title>
-      <author><first>Jose</first><last>Coch</last></author>
+      <author id="jose-coch"><first>Jose</first><last>Coch</last></author>
       <url hash="7b56d383">C96-1043</url>
       <bibkey>coch-1996-evaluating</bibkey>
     </paper>
@@ -335,22 +335,22 @@
     </paper>
     <paper id="45">
       <title>Direct and Underspecified Interpretations of <fixed-case>LFG</fixed-case> f-structures</title>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <author><first>Dick</first><last>Crouch</last></author>
       <url hash="764fb926">C96-1045</url>
       <bibkey>van-genabith-crouch-1996-direct</bibkey>
     </paper>
     <paper id="46">
       <title>Pronouncing Text by Analogy</title>
-      <author><first>Robert I.</first><last>Damper</last></author>
+      <author id="robert-i-damper"><first>Robert I.</first><last>Damper</last></author>
       <author><first>John EG.</first><last>Eastmond</last></author>
       <url hash="b9e4d62c">C96-1046</url>
       <bibkey>damper-eastmond-1996-pronouncing</bibkey>
     </paper>
     <paper id="47">
       <title>Finite-state phrase parsing by rule sequences</title>
-      <author><first>Marc</first><last>Vilain</last></author>
-      <author><first>David</first><last>Day</last></author>
+      <author id="marc-vilain"><first>Marc</first><last>Vilain</last></author>
+      <author id="david-day"><first>David</first><last>Day</last></author>
       <url hash="8ecc0797">C96-1047</url>
       <bibkey>vilain-day-1996-finite</bibkey>
     </paper>
@@ -372,17 +372,17 @@
     <paper id="50">
       <title>Language-Specific Mappings from Semantics to Syntax</title>
       <author><first>Judy</first><last>Delin</last></author>
-      <author><first>Donia R.</first><last>Scott</last></author>
-      <author><first>Anthony</first><last>Hartley</last></author>
+      <author id="donia-scott"><first>Donia R.</first><last>Scott</last></author>
+      <author id="anthony-hartley"><first>Anthony</first><last>Hartley</last></author>
       <url hash="129fd94a">C96-1050</url>
       <bibkey>delin-etal-1996-language</bibkey>
     </paper>
     <paper id="51">
       <title>Segmentation and Labelling of <fixed-case>S</fixed-case>lovenian Diphone Inventories</title>
-      <author><first>Jerneja</first><last>Gros</last></author>
+      <author id="jerneja-gros"><first>Jerneja</first><last>Gros</last></author>
       <author><first>Ivo</first><last>Ipsic</last></author>
-      <author><first>Simon</first><last>Dobrisek</last></author>
-      <author><first>France</first><last>Mihelic</last></author>
+      <author id="simon-dobrisek"><first>Simon</first><last>Dobrisek</last></author>
+      <author id="france-mihelic"><first>France</first><last>Mihelic</last></author>
       <author><first>Nikola</first><last>Pavesic</last></author>
       <url hash="ad2c3f73">C96-1051</url>
       <bibkey>gros-etal-1996-segmentation</bibkey>
@@ -396,31 +396,31 @@
     </paper>
     <paper id="53">
       <title>Lexical Information for Determining <fixed-case>J</fixed-case>apanese Unbounded Dependency</title>
-      <author><first>Shin-ichiro</first><last>Kamei</last></author>
+      <author id="shin-ichiro-kamei"><first>Shin-ichiro</first><last>Kamei</last></author>
       <author><first>Kazunori</first><last>Muraki</last></author>
-      <author><first>Shin’ichi</first><last>Doi</last></author>
+      <author id="shinichi-doi"><first>Shin’ichi</first><last>Doi</last></author>
       <url hash="1a0ab729">C96-1053</url>
       <bibkey>kamei-etal-1996-lexical</bibkey>
     </paper>
     <paper id="54">
       <title>Semantic-based Transfer</title>
       <author><first>Michael</first><last>Dorna</last></author>
-      <author><first>Martin C.</first><last>Emele</last></author>
+      <author id="martin-c-emele"><first>Martin C.</first><last>Emele</last></author>
       <url hash="63c29e36">C96-1054</url>
       <bibkey>dorna-emele-1996-semantic</bibkey>
     </paper>
     <paper id="55">
       <title>Role of Word Sense Disalnbiguation in Lexical Acquisition: Predicting Semantics from Syntactic Cues</title>
-      <author><first>Bonnie J.</first><last>Dorr</last></author>
-      <author><first>Doug</first><last>Jones</last></author>
+      <author id="bonnie-dorr"><first>Bonnie J.</first><last>Dorr</last></author>
+      <author id="douglas-jones"><first>Doug</first><last>Jones</last></author>
       <url hash="41238a9a">C96-1055</url>
       <bibkey>dorr-jones-1996-role</bibkey>
     </paper>
     <paper id="56">
       <title><fixed-case>G</fixed-case>RICE INCORPORATED: Cooperativity in Spoken Dialogue</title>
-      <author><first>Laila</first><last>Dybkjaer</last></author>
-      <author><first>Niels Ole</first><last>Bernsen</last></author>
-      <author><first>Hans</first><last>Dybkjaer</last></author>
+      <author id="laila-dybkjaer"><first>Laila</first><last>Dybkjaer</last></author>
+      <author id="niels-ole-bernsen"><first>Niels Ole</first><last>Bernsen</last></author>
+      <author id="hans-dybkjaer"><first>Hans</first><last>Dybkjaer</last></author>
       <url hash="b5443137">C96-1056</url>
       <bibkey>dybkjaer-etal-1996-grice</bibkey>
     </paper>
@@ -432,7 +432,7 @@
     </paper>
     <paper id="58">
       <title>Three New Probabilistic Models for Dependency Parsing: An Exploration</title>
-      <author><first>Jason M.</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason M.</first><last>Eisner</last></author>
       <url hash="ed12e57f">C96-1058</url>
       <bibkey>eisner-1996-three</bibkey>
     </paper>
@@ -452,14 +452,14 @@
     <paper id="61">
       <title>Using Discourse Predictions for Ambiguity Resolution</title>
       <author><first>Yan</first><last>Qu</last></author>
-      <author><first>Carolyn P.</first><last>Rose</last></author>
+      <author id="carolyn-rose"><first>Carolyn P.</first><last>Rose</last></author>
       <author><first>Barbara</first><last>Di Eugenio</last></author>
       <url hash="13dcc782">C96-1061</url>
       <bibkey>qu-etal-1996-using</bibkey>
     </paper>
     <paper id="62">
       <title>Interpretation of Nominal Compounds: Combining Domain-Independent and Domain-Specific Information</title>
-      <author><first>Cecile</first><last>Fabre</last></author>
+      <author id="cecile-fabre"><first>Cecile</first><last>Fabre</last></author>
       <url hash="75cc325c">C96-1062</url>
       <bibkey>fabre-1996-interpretation</bibkey>
     </paper>
@@ -471,7 +471,7 @@
     </paper>
     <paper id="64">
       <title>Resolving syntactic ambiguities with lexico-semantic patterns: an analogy-based approach</title>
-      <author><first>Simonetta</first><last>Montemagni</last></author>
+      <author id="simonetta-montemagni"><first>Simonetta</first><last>Montemagni</last></author>
       <author><first>Stefano</first><last>Federici</last></author>
       <author><first>Vito</first><last>Pirrelli</last></author>
       <url hash="94619bde">C96-1064</url>
@@ -523,9 +523,9 @@
     </paper>
     <paper id="71">
       <title>Evaluation of an Algorithm for the Recognition and Classification of Proper Names</title>
-      <author><first>Takahiro</first><last>Wakao</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="takahiro-wakao"><first>Takahiro</first><last>Wakao</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <url hash="a1388827">C96-1071</url>
       <bibkey>wakao-etal-1996-evaluation</bibkey>
     </paper>
@@ -552,12 +552,12 @@
     </paper>
     <paper id="75">
       <title>Multi-lingual Translation of Spontaneously Spoken Language in a Limited Domain</title>
-      <author><first>Alon</first><last>Lavie</last></author>
-      <author><first>Donna</first><last>Gates</last></author>
-      <author><first>Marsal</first><last>Gavalda</last></author>
-      <author><first>Laura</first><last>Mayfield</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
+      <author id="donna-gates"><first>Donna</first><last>Gates</last></author>
+      <author id="marsal-gavalda"><first>Marsal</first><last>Gavalda</last></author>
+      <author id="laura-mayfield-tomokiyo"><first>Laura</first><last>Mayfield</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
       <url hash="3c1cabea">C96-1075</url>
       <bibkey>lavie-etal-1996-multi</bibkey>
     </paper>
@@ -569,39 +569,39 @@
     </paper>
     <paper id="77">
       <title>Compiling a Partition-Based Two-Level Formalism</title>
-      <author><first>Edmund</first><last>Grimley-Evans</last></author>
-      <author><first>George Anton</first><last>Kiraz</last></author>
-      <author><first>Stephen G.</first><last>Pulman</last></author>
+      <author id="edmund-grimley-evans"><first>Edmund</first><last>Grimley-Evans</last></author>
+      <author id="george-anton-kiraz"><first>George Anton</first><last>Kiraz</last></author>
+      <author id="stephen-pulman"><first>Stephen G.</first><last>Pulman</last></author>
       <url hash="6d03534c">C96-1077</url>
       <bibkey>grimley-evans-etal-1996-compiling</bibkey>
     </paper>
     <paper id="78">
       <title>Alignment of Shared Forests for Bilingual Corpora</title>
-      <author><first>Adam</first><last>Meyers</last></author>
+      <author id="adam-meyers"><first>Adam</first><last>Meyers</last></author>
       <author><first>Roman</first><last>Yangarber</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <url hash="856f95f3">C96-1078</url>
       <bibkey>meyers-etal-1996-alignment</bibkey>
     </paper>
     <paper id="79">
       <title><fixed-case>M</fixed-case>essage <fixed-case>U</fixed-case>nderstanding <fixed-case>C</fixed-case>onference- 6: A Brief History</title>
-      <author><first>Ralph</first><last>Grishman</last></author>
-      <author><first>Beth</first><last>Sundheim</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
+      <author id="beth-m-sundheim"><first>Beth</first><last>Sundheim</last></author>
       <url hash="9cd936d8">C96-1079</url>
       <bibkey>grishman-sundheim-1996-message</bibkey>
     </paper>
     <paper id="80">
       <title>The Influence of Tagging on the Classification of Lexical Complements</title>
-      <author><first>Catherine</first><last>Macleod</last></author>
-      <author><first>Adam</first><last>Meyers</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="catherine-macleod"><first>Catherine</first><last>Macleod</last></author>
+      <author id="adam-meyers"><first>Adam</first><last>Meyers</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <url hash="4a57e11c">C96-1080</url>
       <bibkey>macleod-etal-1996-influence</bibkey>
     </paper>
     <paper id="81">
       <title>A Sign Expansion Approach to Dynamic, Multi-Purpose Lexicons</title>
       <author><first>Jon Atle</first><last>Gulla</last></author>
-      <author><first>Sjur Nørstebø</first><last>Moshagen</last></author>
+      <author id="sjur-moshagen"><first>Sjur Nørstebø</first><last>Moshagen</last></author>
       <url hash="685caa24">C96-1081</url>
       <bibkey>gulla-moshagen-1996-sign</bibkey>
     </paper>
@@ -616,9 +616,9 @@
     </paper>
     <paper id="83">
       <title>Symbolic word clustering for medium-size corpora</title>
-      <author><first>Benoit</first><last>Habert</last></author>
+      <author id="benoit-habert"><first>Benoit</first><last>Habert</last></author>
       <author><first>Elie</first><last>Naulleau</last></author>
-      <author><first>Adeline</first><last>Nazarenko</last></author>
+      <author id="adeline-nazarenko"><first>Adeline</first><last>Nazarenko</last></author>
       <url hash="b07eb2af">C96-1083</url>
       <bibkey>habert-etal-1996-symbolic</bibkey>
     </paper>
@@ -639,7 +639,7 @@
     </paper>
     <paper id="86">
       <title>Inherited Feature-based Similarity Measure Based on Large Semantic Hierarchy and Large Text Corpus</title>
-      <author><first>Hideki</first><last>Hirakawa</last></author>
+      <author id="hideki-hirakawa"><first>Hideki</first><last>Hirakawa</last></author>
       <author><first>Zhonghui</first><last>Xu</last></author>
       <author><first>Kenneth</first><last>Haase</last></author>
       <url hash="fc271592">C96-1086</url>
@@ -669,7 +669,7 @@
     </paper>
     <paper id="90">
       <title>Issues in Communication Game</title>
-      <author><first>Koiti</first><last>Hasida</last></author>
+      <author id="koiti-hasida"><first>Koiti</first><last>Hasida</last></author>
       <url hash="7cae7715">C96-1090</url>
       <bibkey>hasida-1996-issues</bibkey>
     </paper>
@@ -681,7 +681,7 @@
     </paper>
     <paper id="92">
       <title>Applying Lexical Rules Under Subsumption</title>
-      <author><first>Erhard W.</first><last>Hinrichs</last></author>
+      <author id="erhard-hinrichs"><first>Erhard W.</first><last>Hinrichs</last></author>
       <author><first>Tsuneko</first><last>Nakazawa</last></author>
       <url hash="da6690f1">C96-1092</url>
       <bibkey>hinrichs-nakazawa-1996-applying</bibkey>
@@ -702,7 +702,7 @@
     <paper id="95">
       <title>Towards a More Careful Evaluation of Broad Coverage Parsing Systems</title>
       <author><first>Wide R.</first><last>Hogenhout</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <url hash="536ced17">C96-1095</url>
       <bibkey>hogenhout-matsumoto-1996-towards</bibkey>
     </paper>
@@ -715,7 +715,7 @@
     <paper id="97">
       <title>A Statistical Method for Extracting Uninterrupted and Interrupted Collocations from Very Large Corpora</title>
       <author><first>Satoru</first><last>Ikehara</last></author>
-      <author><first>Satoshi</first><last>Shirai</last></author>
+      <author id="satoshi-shirai"><first>Satoshi</first><last>Shirai</last></author>
       <author><first>Hajime</first><last>Uchino</last></author>
       <url hash="576ab8a0">C96-1097</url>
       <bibkey>ikehara-etal-1996-statistical</bibkey>
@@ -733,7 +733,7 @@
     </frontmatter>
     <paper id="98">
       <title>Extraction of Lexical Translations from Non-Aligned Corpora</title>
-      <author><first>Kumiko</first><last>Tanaka</last></author>
+      <author id="kumiko-tanaka-ishii"><first>Kumiko</first><last>Tanaka</last></author>
       <author><first>Hideya</first><last>Iwasaki</last></author>
       <url hash="7b011ebb">C96-2098</url>
       <bibkey>tanaka-iwasaki-1996-extraction</bibkey>
@@ -741,7 +741,7 @@
     <paper id="99">
       <title>Segmenting Sentences into Linky Strings Using <fixed-case>D</fixed-case>-bigram Statistics</title>
       <author><first>Shiho</first><last>Nobesawa</last></author>
-      <author><first>Junya</first><last>Tsutsumi</last></author>
+      <author id="junya-tsutsumi"><first>Junya</first><last>Tsutsumi</last></author>
       <author><first>Sun Da</first><last>Jiang</last></author>
       <author><first>Tomohisa</first><last>Sano</last></author>
       <author><first>Kengo</first><last>Sato</last></author>
@@ -757,7 +757,7 @@
     </paper>
     <paper id="101">
       <title>Goal Formulation based on Communicative Principles</title>
-      <author><first>Kristiina</first><last>Jokinen</last></author>
+      <author id="kristiina-jokinen"><first>Kristiina</first><last>Jokinen</last></author>
       <url hash="e92046f9">C96-2101</url>
       <bibkey>jokinen-1996-goal</bibkey>
     </paper>
@@ -770,7 +770,7 @@
     <paper id="103">
       <title>Coordination in <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars: Formalization and Implementation</title>
       <author><first>Anoop</first><last>Sarkar</last></author>
-      <author><first>Aravind</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
       <url hash="0693c747">C96-2103</url>
       <bibkey>sarkar-joshi-1996-coordination</bibkey>
     </paper>
@@ -782,22 +782,22 @@
     </paper>
     <paper id="105">
       <title>Parallel Replacement in Finite State Calculus</title>
-      <author><first>Andre</first><last>Kempe</last></author>
+      <author id="andre-kempe"><first>Andre</first><last>Kempe</last></author>
       <author><first>Lauri</first><last>Karttunen</last></author>
       <url hash="0bfa1f49">C96-2105</url>
       <bibkey>kempe-karttunen-1996-parallel</bibkey>
     </paper>
     <paper id="106">
       <title>Modularizing Codescriptive Grammars for Efficient Parsing</title>
-      <author><first>Walter</first><last>Kasper</last></author>
-      <author><first>Hans-Ulrich</first><last>Krieger</last></author>
+      <author id="walter-kasper"><first>Walter</first><last>Kasper</last></author>
+      <author id="hans-ulrich-krieger"><first>Hans-Ulrich</first><last>Krieger</last></author>
       <url hash="23dccac0">C96-2106</url>
       <bibkey>kasper-krieger-1996-modularizing</bibkey>
     </paper>
     <paper id="107">
       <title>Statistical Method of Recognizing Local Cohesion</title>
-      <author><first>Naoto</first><last>Katoh</last></author>
-      <author><first>Tsuyoshi</first><last>Morimoto</last></author>
+      <author id="naoto-kato"><first>Naoto</first><last>Katoh</last></author>
+      <author id="tsuyoshi-morimoto"><first>Tsuyoshi</first><last>Morimoto</last></author>
       <url hash="faa39bd2">C96-2107</url>
       <bibkey>katoh-morimoto-1996-statistical</bibkey>
     </paper>
@@ -828,7 +828,7 @@
     </paper>
     <paper id="112">
       <title>Computing Prosodic Morphology</title>
-      <author><first>George Anton</first><last>Kiraz</last></author>
+      <author id="george-anton-kiraz"><first>George Anton</first><last>Kiraz</last></author>
       <url hash="617fdbe3">C96-2112</url>
       <bibkey>kiraz-1996-computing</bibkey>
     </paper>
@@ -840,7 +840,7 @@
     </paper>
     <paper id="114">
       <title>Linguistic Indeterminacy as a Source of Errors in Tagging</title>
-      <author><first>Gunnel</first><last>Kallgren</last></author>
+      <author id="gunnel-kallgren"><first>Gunnel</first><last>Kallgren</last></author>
       <url hash="5fe5db0c">C96-2114</url>
       <bibkey>kallgren-1996-linguistic</bibkey>
     </paper>
@@ -848,14 +848,14 @@
       <title>Arguments desperately seeking Interpretation: Parsing <fixed-case>G</fixed-case>erman Infinitives</title>
       <author><first>Christopher</first><last>Laenzlinger</last></author>
       <author><first>Martin S.</first><last>Ulmann</last></author>
-      <author><first>Eric</first><last>Wehrli</last></author>
+      <author id="eric-wehrli"><first>Eric</first><last>Wehrli</last></author>
       <url hash="be3f4f22">C96-2115</url>
       <bibkey>laenzlinger-etal-1996-arguments</bibkey>
     </paper>
     <paper id="116">
       <title>A Generalized Reconstruction Algorithm for Ellipsis Resolution</title>
       <author><first>Shalom</first><last>Lappin</last></author>
-      <author><first>Hsue-Hueh</first><last>Shih</last></author>
+      <author id="hsue-hueh-shih"><first>Hsue-Hueh</first><last>Shih</last></author>
       <url hash="b17299da">C96-2116</url>
       <bibkey>lappin-shih-1996-generalized</bibkey>
     </paper>
@@ -867,17 +867,17 @@
     </paper>
     <paper id="118">
       <title>An ascription-based approach to Speech Acts</title>
-      <author><first>Mark</first><last>Lee</last></author>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="mark-lee"><first>Mark</first><last>Lee</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <url hash="f4fe2564">C96-2118</url>
       <bibkey>lee-wilks-1996-ascription</bibkey>
     </paper>
     <paper id="119">
       <title>Automatic <fixed-case>E</fixed-case>nglish-to-<fixed-case>K</fixed-case>orean Text Translation of Telegraphic Messages in a Limited Domain</title>
-      <author><first>Clifford</first><last>Weinstein</last></author>
+      <author id="clifford-j-weinstein"><first>Clifford</first><last>Weinstein</last></author>
       <author><first>Dinesh</first><last>Tummala</last></author>
       <author><first>Young-Suk</first><last>Lee</last></author>
-      <author><first>Stephanie</first><last>Seneff</last></author>
+      <author id="stephanie-seneff"><first>Stephanie</first><last>Seneff</last></author>
       <url hash="b047a2a8">C96-2119</url>
       <bibkey>weinstein-etal-1996-automatic</bibkey>
     </paper>
@@ -887,11 +887,11 @@
       <author><first>Stephan</first><last>Oepen</last></author>
       <author><first>Sylvie</first><last>Regnier-Prost</last></author>
       <author><first>Klaus</first><last>Netter</last></author>
-      <author><first>Veronika</first><last>Lux</last></author>
+      <author id="veronika-lux"><first>Veronika</first><last>Lux</last></author>
       <author><first>Judith</first><last>Klein</last></author>
       <author><first>Kirsten</first><last>Falkedal</last></author>
       <author><first>Frederik</first><last>Fouvry</last></author>
-      <author><first>Dominique</first><last>Estival</last></author>
+      <author id="dominique-estival"><first>Dominique</first><last>Estival</last></author>
       <author><first>Eva</first><last>Dauphin</last></author>
       <author><first>Herve</first><last>Compagnion</last></author>
       <author><first>Judith</first><last>Baur</last></author>
@@ -903,14 +903,14 @@
     <paper id="121">
       <title>Saussurian analogy: a theoretical account and its application</title>
       <author><first>Yves</first><last>Lepage</last></author>
-      <author><last>Ando</last><first>Shin-ichi</first></author>
+      <author id="shinichi-ando"><first>Shin-ichi</first><last>Ando</last></author>
       <url hash="16440909">C96-2121</url>
       <bibkey>lepage-ando-1996-saussurian</bibkey>
     </paper>
     <paper id="122">
       <title>An <fixed-case>E</fixed-case>arley-type recognizer for dependency grammar</title>
       <author><first>Vincenzo</first><last>Lombardo</last></author>
-      <author><first>Leonardo</first><last>Lesmo</last></author>
+      <author id="leonardo-lesmo"><first>Leonardo</first><last>Lesmo</last></author>
       <url hash="048883e3">C96-2122</url>
       <bibkey>lombardo-lesmo-1996-earley</bibkey>
     </paper>
@@ -922,7 +922,7 @@
     </paper>
     <paper id="124">
       <title>Building Knowledge Bases for the Generation of Software Documentation</title>
-      <author><first>Cecile</first><last>Paris</last></author>
+      <author id="cecile-paris"><first>Cecile</first><last>Paris</last></author>
       <author><first>Keith</first><last>Vander Linden</last></author>
       <url hash="5e3eeae3">C96-2124</url>
       <bibkey>paris-vander-linden-1996-building</bibkey>
@@ -944,14 +944,14 @@
     <paper id="127">
       <title>An <fixed-case>HPSG</fixed-case>-Based Generator for <fixed-case>G</fixed-case>erman An Experiment in the Reusability of Linguistic Resources</title>
       <author><first>Johannes</first><last>Matiasek</last></author>
-      <author><first>Harald</first><last>Trost</last></author>
+      <author id="harald-trost"><first>Harald</first><last>Trost</last></author>
       <url hash="ec614d34">C96-2127</url>
       <bibkey>matiasek-trost-1996-hpsg</bibkey>
     </paper>
     <paper id="128">
       <title>Reversible delayed lexical choice in a bidirectional framework</title>
-      <author><first>Graham</first><last>Wilcock</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="graham-wilcock"><first>Graham</first><last>Wilcock</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <url hash="7749d653">C96-2128</url>
       <bibkey>wilcock-matsumoto-1996-reversible</bibkey>
     </paper>
@@ -991,13 +991,13 @@
       <author><first>Yasuhiko</first><last>Watanabe</last></author>
       <author><first>Masaki</first><last>Murata</last></author>
       <author><first>Masahito</first><last>Takeuchi</last></author>
-      <author><first>Makoto</first><last>Nagao</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
       <url hash="e7715e68">C96-2134</url>
       <bibkey>watanabe-etal-1996-document</bibkey>
     </paper>
     <paper id="135">
       <title>Yet Another Paper about Partial Verb Phrase Fronting in <fixed-case>G</fixed-case>erman</title>
-      <author><first>Stefan</first><last>Muller</last></author>
+      <author id="stefan-muller"><first>Stefan</first><last>Muller</last></author>
       <url hash="2904ea09">C96-2135</url>
       <bibkey>muller-1996-yet</bibkey>
     </paper>
@@ -1010,7 +1010,7 @@
     <paper id="137">
       <title>Anaphora Resolution of <fixed-case>J</fixed-case>apanese Zero Pronouns with Deictic Reference</title>
       <author><first>Hiromi</first><last>Nakaiwa</last></author>
-      <author><first>Satoshi</first><last>Shirai</last></author>
+      <author id="satoshi-shirai"><first>Satoshi</first><last>Shirai</last></author>
       <url hash="10e37693">C96-2137</url>
       <bibkey>nakaiwa-shirai-1996-anaphora</bibkey>
     </paper>
@@ -1035,16 +1035,16 @@
     </paper>
     <paper id="141">
       <title><fixed-case>HMM</fixed-case>-Based Word Alignment in Statistical Translation</title>
-      <author><first>Stephan</first><last>Vogel</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
-      <author><first>Christoph</first><last>Tillmann</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
+      <author id="christoph-tillmann"><first>Christoph</first><last>Tillmann</last></author>
       <url hash="c9ba82c3">C96-2141</url>
       <bibkey>vogel-etal-1996-hmm</bibkey>
     </paper>
     <paper id="142">
       <title>Adjectival Modification in Text Meaning Representation</title>
       <author><first>Victor</first><last>Raskin</last></author>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <url hash="ad05cd21">C96-2142</url>
       <bibkey>raskin-nirenburg-1996-adjectival</bibkey>
     </paper>
@@ -1075,14 +1075,14 @@
     </paper>
     <paper id="147">
       <title>Zero Pronoun Resolution in <fixed-case>J</fixed-case>apanese Discourse Based on Centering Theory</title>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <author><first>Kouji</first><last>Tamura</last></author>
       <url hash="c8c0a628">C96-2147</url>
       <bibkey>okumura-tamura-1996-zero</bibkey>
     </paper>
     <paper id="148">
       <title><fixed-case>POS</fixed-case> Tagging Using Relaxation Labelling</title>
-      <author><first>Lluis</first><last>Padro</last></author>
+      <author id="lluis-padro"><first>Lluis</first><last>Padro</last></author>
       <url hash="f4319ef2">C96-2148</url>
       <bibkey>padro-1996-pos</bibkey>
     </paper>
@@ -1137,7 +1137,7 @@
     </paper>
     <paper id="157">
       <title>A Self-Learning Universal Concept Spotter</title>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
       <author><first>Jin</first><last>Wang</last></author>
       <url hash="42093f73">C96-2157</url>
       <bibkey>strzalkowski-wang-1996-self</bibkey>
@@ -1157,7 +1157,7 @@
     <paper id="160">
       <title>Computing Phrasal-signs in <fixed-case>HPSG</fixed-case> prior to Parsing</title>
       <author><first>Kentaro</first><last>Torisawa</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <url hash="3c1a4e36">C96-2160</url>
       <bibkey>torisawa-tsujii-1996-computing</bibkey>
     </paper>
@@ -1187,7 +1187,7 @@
     </paper>
     <paper id="165">
       <title>On Inference-Based Procedures for Lexical Disambiguation</title>
-      <author><first>Jurgen</first><last>Wedekind</last></author>
+      <author id="jurgen-wedekind"><first>Jurgen</first><last>Wedekind</last></author>
       <url hash="2e9c36ee">C96-2165</url>
       <bibkey>wedekind-1996-inference</bibkey>
     </paper>
@@ -1205,8 +1205,8 @@
     </paper>
     <paper id="168">
       <title>“Is Speech Language?”</title>
-      <author><first>Joseph</first><last>Mariani</last></author>
-      <author><first>Steven</first><last>Krauwer</last></author>
+      <author id="joseph-mariani"><first>Joseph</first><last>Mariani</last></author>
+      <author id="steven-krauwer"><first>Steven</first><last>Krauwer</last></author>
       <url hash="e8d7b3ab">C96-2168</url>
       <bibkey>mariani-krauwer-1996-speech</bibkey>
     </paper>
@@ -1218,7 +1218,7 @@
     </paper>
     <paper id="170">
       <title>Evaluation of <fixed-case>NLP</fixed-case> systems</title>
-      <author><first>Bente</first><last>Maegaard</last></author>
+      <author id="bente-maegaard"><first>Bente</first><last>Maegaard</last></author>
       <url hash="08df2a6c">C96-2170</url>
       <bibkey>maegaard-1996-evaluation</bibkey>
     </paper>
@@ -1242,7 +1242,7 @@
     </paper>
     <paper id="174">
       <title><fixed-case>CALL</fixed-case>: The Potential of Lingware and the Use of Empirical Linguistic Data</title>
-      <author><first>Dan</first><last>Tufis</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufis</last></author>
       <url hash="ec3c2f8a">C96-2174</url>
       <bibkey>tufis-1996-call</bibkey>
     </paper>
@@ -1261,14 +1261,14 @@
     <paper id="177">
       <title><fixed-case>NL</fixed-case> Domain Explanations in Knowledge Based <fixed-case>MAT</fixed-case></title>
       <author><first>Galia</first><last>Angelova</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
       <url hash="c59c083f">C96-2177</url>
       <bibkey>angelova-bontcheva-1996-nl</bibkey>
     </paper>
     <paper id="178">
       <title>Machine Translation Method Using Inductive Learning with Genetic Algorithms</title>
-      <author><first>Hiroshi</first><last>Echizen-ya</last></author>
-      <author><first>Kenji</first><last>Araki</last></author>
+      <author id="hiroshi-echizen-ya"><first>Hiroshi</first><last>Echizen-ya</last></author>
+      <author id="kenji-araki"><first>Kenji</first><last>Araki</last></author>
       <author><first>Yoshio</first><last>Momouchi</last></author>
       <author><first>Koji</first><last>Tochinai</last></author>
       <url hash="b680ebda">C96-2178</url>
@@ -1276,7 +1276,7 @@
     </paper>
     <paper id="179">
       <title>The implementation of a computational grammar of <fixed-case>F</fixed-case>rench using the Grammar Development Environment</title>
-      <author><first>Louisette</first><last>Emirkanian</last></author>
+      <author id="louisette-emirkanian"><first>Louisette</first><last>Emirkanian</last></author>
       <author><first>Lyne</first><last>Da Sylva</last></author>
       <author><first>Lorne H.</first><last>Bouchard</last></author>
       <url hash="d28ca1cf">C96-2179</url>
@@ -1293,14 +1293,14 @@
     </paper>
     <paper id="181">
       <title><fixed-case>NKRL</fixed-case>, a Knowledge Representation Language for Narrative Natural Language Processing</title>
-      <author><first>Gian Piero</first><last>Zarri</last></author>
+      <author id="gian-piero-zarri"><first>Gian Piero</first><last>Zarri</last></author>
       <url hash="85c9fc20">C96-2181</url>
       <bibkey>zarri-1996-nkrl</bibkey>
     </paper>
     <paper id="182">
       <title>Formal Description of Multi-Word Lexemes with the Finite-State Formalism <fixed-case>IDAREX</fixed-case></title>
       <author><first>Elisabeth</first><last>Breidt</last></author>
-      <author><first>Frederique</first><last>Segond</last></author>
+      <author id="frederique-segond"><first>Frederique</first><last>Segond</last></author>
       <author><first>Giuseppe</first><last>Valetto</last></author>
       <url hash="c41bac8b">C96-2182</url>
       <bibkey>breidt-etal-1996-formal</bibkey>
@@ -1308,23 +1308,23 @@
     <paper id="183">
       <title>Motivations and Methods for Text Simplification</title>
       <author id="raman-chandrasekar"><first>R.</first><last>Chandrasekar</last></author>
-      <author><first>Christine</first><last>Doran</last></author>
-      <author><first>B.</first><last>Srinivas</last></author>
+      <author id="christine-doran"><first>Christine</first><last>Doran</last></author>
+      <author id="srinivas-bangalore"><first>B.</first><last>Srinivas</last></author>
       <url hash="362c0018">C96-2183</url>
       <bibkey>chandrasekar-etal-1996-motivations</bibkey>
     </paper>
     <paper id="184">
       <title>Segmentation Standard for <fixed-case>C</fixed-case>hinese Natural Language Processing</title>
       <author><first>Chu-Ren</first><last>Huang</last></author>
-      <author><first>Keh-jiann</first><last>Chen</last></author>
-      <author><first>Li-Li</first><last>Chang</last></author>
+      <author id="keh-jiann-chen"><first>Keh-jiann</first><last>Chen</last></author>
+      <author id="li-li-chang"><first>Li-Li</first><last>Chang</last></author>
       <url hash="259ffd10">C96-2184</url>
       <bibkey>huang-etal-1996-segmentation</bibkey>
     </paper>
     <paper id="185">
       <title><fixed-case>K</fixed-case>orean Language Engineering: Current Status of the Information Platform</title>
       <author><last>Kim</last><first>Seongyong</first></author>
-      <author><last>Choi</last><first>Key-Sun</first></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <url hash="b4f37ce6">C96-2185</url>
       <bibkey>kim-choi-1996-korean</bibkey>
     </paper>
@@ -1338,16 +1338,16 @@
     </paper>
     <paper id="187">
       <title><fixed-case>GATE</fixed-case>-a General Architecture for Text Engineering</title>
-      <author><first>Hamish</first><last>Cunningham</last></author>
-      <author><first>Yorick</first><last>Wilks</last></author>
-      <author><first>Robert J.</first><last>Gaizauskas</last></author>
+      <author id="hamish-cunningham"><first>Hamish</first><last>Cunningham</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
+      <author id="robert-gaizauskas"><first>Robert J.</first><last>Gaizauskas</last></author>
       <url hash="0090489a">C96-2187</url>
       <bibkey>cunningham-etal-1996-gate</bibkey>
     </paper>
     <paper id="188">
       <title>Corpus-based annotated test set for Machine Translation evaluation by an Industrial User</title>
       <author><first>Eva</first><last>Dauphin</last></author>
-      <author><first>Veronika</first><last>Lux</last></author>
+      <author id="veronika-lux"><first>Veronika</first><last>Lux</last></author>
       <url hash="e23522c6">C96-2188</url>
       <bibkey>dauphin-lux-1996-corpus</bibkey>
     </paper>
@@ -1368,7 +1368,7 @@
     <paper id="191">
       <title>Spoken-Language Translation Method Using Examples</title>
       <author><first>Hitoshi</first><last>Iida</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Osamu</first><last>Furuse</last></author>
       <url hash="e561154b">C96-2191</url>
       <bibkey>iida-etal-1996-spoken</bibkey>
@@ -1376,10 +1376,10 @@
     <paper id="192">
       <title>Tagging Spoken Language Using Written Language Statistics</title>
       <author><first>Joakim</first><last>Nivre</last></author>
-      <author><first>Leif</first><last>Gronqvist</last></author>
+      <author id="leif-groenqvist"><first>Leif</first><last>Gronqvist</last></author>
       <author><first>Malin</first><last>Gustafsson</last></author>
       <author><first>TorbjSrn</first><last>Lager</last></author>
-      <author><first>Sylvana</first><last>Sofkova</last></author>
+      <author id="sylvana-sofkova-hashemi"><first>Sylvana</first><last>Sofkova</last></author>
       <url hash="8354e67a">C96-2192</url>
       <bibkey>nivre-etal-1996-tagging</bibkey>
     </paper>
@@ -1393,7 +1393,7 @@
     <paper id="194">
       <title>A Gradual Refinement Model for A Robust <fixed-case>T</fixed-case>hai Morphological Analyzer</title>
       <author><first>Asanee</first><last>Kawtrakul</last></author>
-      <author><first>Chalatip</first><last>Thumkanon</last></author>
+      <author id="chalathip-thumkanon"><first>Chalatip</first><last>Thumkanon</last></author>
       <author><first>Thitima</first><last>Jamjanya</last></author>
       <author><first>Parinee</first><last>Muangyunnan</last></author>
       <author><first>Kritsada</first><last>Poolwan</last></author>
@@ -1440,7 +1440,7 @@
     </paper>
     <paper id="200">
       <title><fixed-case>C</fixed-case>hinese String Searching Using the <fixed-case>KMP</fixed-case> Algorithm</title>
-      <author><first>Robert W.P.</first><last>Luk</last></author>
+      <author id="robert-w-p-luk"><first>Robert W.P.</first><last>Luk</last></author>
       <url hash="f61c0d81">C96-2200</url>
       <bibkey>luk-1996-chinese</bibkey>
     </paper>
@@ -1448,26 +1448,26 @@
       <title><fixed-case>P</fixed-case>a<fixed-case>T</fixed-case>rans- A Patent Translation System</title>
       <author><first>Bjarne</first><last>Orsnes</last></author>
       <author><first>Bradley</first><last>Music</last></author>
-      <author><first>Bente</first><last>Maegaard</last></author>
+      <author id="bente-maegaard"><first>Bente</first><last>Maegaard</last></author>
       <url hash="b913349f">C96-2201</url>
       <bibkey>orsnes-etal-1996-patrans</bibkey>
     </paper>
     <paper id="202">
       <title>Word Extraction from Corpora and Its Part-of-Speech Estimation Using Distributional Analysis</title>
       <author><first>Shinsuke</first><last>Mori</last></author>
-      <author><first>Makoto</first><last>Nagao</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
       <url hash="e2a503af">C96-2202</url>
       <bibkey>mori-nagao-1996-word</bibkey>
     </paper>
     <paper id="203">
       <title>Morphological Analyzer as Syntactic Parser</title>
-      <author><first>Gábor</first><last>Prószéky</last></author>
+      <author id="gabor-proszeky"><first>Gábor</first><last>Prószéky</last></author>
       <url hash="199eebe9">C96-2203</url>
       <bibkey>proszeky-1996-morphological</bibkey>
     </paper>
     <paper id="204">
       <title>Constructing Verb Semantic Classes for <fixed-case>F</fixed-case>rench: Methods and Evaluation</title>
-      <author><first>Patrick</first><last>Saint-Dizier</last></author>
+      <author id="patrick-saint-dizier"><first>Patrick</first><last>Saint-Dizier</last></author>
       <url hash="264b5d64">C96-2204</url>
       <bibkey>saint-dizier-1996-constructing</bibkey>
     </paper>
@@ -1489,7 +1489,7 @@
     <paper id="207">
       <title>How the Linguistic Negation Can Have an Effect in Object-Based Knowledge Representation Model</title>
       <author><first>Lahcene</first><last>Si Ameur</last></author>
-      <author><first>Jacques</first><last>Rouault</last></author>
+      <author id="jacques-rouault"><first>Jacques</first><last>Rouault</last></author>
       <url hash="a184ee10">C96-2207</url>
       <bibkey>si-ameur-rouault-1996-linguistic</bibkey>
     </paper>
@@ -1502,13 +1502,13 @@
     </paper>
     <paper id="209">
       <title>A tagger/lemmatiser for <fixed-case>D</fixed-case>utch medical language</title>
-      <author><first>Peter</first><last>Spyns</last></author>
+      <author id="peter-spyns"><first>Peter</first><last>Spyns</last></author>
       <url hash="61b8ab6b">C96-2209</url>
       <bibkey>spyns-1996-tagger</bibkey>
     </paper>
     <paper id="210">
       <title>A Distributed Architecture for Text Analysis in <fixed-case>F</fixed-case>rench: An Application to Complex Linguistic Phenomena Processing</title>
-      <author><first>Marie-Helene</first><last>Stefanini</last></author>
+      <author id="marie-helene-stefanini"><first>Marie-Helene</first><last>Stefanini</last></author>
       <author><first>Karine</first><last>Warren</last></author>
       <url hash="f2438ee9">C96-2210</url>
       <bibkey>stefanini-warren-1996-distributed</bibkey>
@@ -1538,7 +1538,7 @@
     </paper>
     <paper id="215">
       <title>Computational Complexity of Probabilistic Disambiguation by means of Tree-Grammars</title>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <url hash="ca653252">C96-2215</url>
       <bibkey>simaan-1996-computational</bibkey>
     </paper>
diff --git a/data/xml/C98.xml b/data/xml/C98.xml
index 3d188c5890..72755bfb86 100644
--- a/data/xml/C98.xml
+++ b/data/xml/C98.xml
@@ -13,7 +13,7 @@
     <paper id="1">
       <title>A Quasi-Dependency Model for Structural Analysis it of <fixed-case>C</fixed-case>hinese <fixed-case>B</fixed-case>ase<fixed-case>NP</fixed-case>s</title>
       <author><last>Zhao</last><first>Jun</first></author>
-      <author><last>Huang</last><first>Changning</first></author>
+      <author id="changning-huang"><first>Changning</first><last>Huang</last></author>
       <url hash="8e8728ee">C98-1001</url>
       <bibkey>zhao-huang-1998-quasi</bibkey>
     </paper>
@@ -25,9 +25,9 @@
     </paper>
     <paper id="3">
       <title>Towards a single proposal in spelling correction</title>
-      <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>Koldo</first><last>Gojenola</last></author>
-      <author><first>Kepa</first><last>Sarasola</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
+      <author id="koldo-gojenola"><first>Koldo</first><last>Gojenola</last></author>
+      <author id="kepa-sarasola"><first>Kepa</first><last>Sarasola</last></author>
       <author><first>Atro</first><last>Voutilainen</last></author>
       <url hash="64f4f260">C98-1003</url>
       <bibkey>agirre-etal-1998-towards</bibkey>
@@ -42,15 +42,15 @@
     </paper>
     <paper id="5">
       <title>Parole et traduction automatique: le module de reconnaissance <fixed-case>RAPHAEL</fixed-case></title>
-      <author><first>Mohammad</first><last>Akbar</last></author>
+      <author id="mohammad-akbar"><first>Mohammad</first><last>Akbar</last></author>
       <author><first>Jean</first><last>Caelen</last></author>
       <url hash="fc6b7889">C98-1005</url>
       <bibkey>akbar-caelen-1998-parole</bibkey>
     </paper>
     <paper id="6">
       <title>Automatic Acquisition of Hierarchical Transduction Models for Machine Translation</title>
-      <author><first>Hiyan</first><last>Alshawi</last></author>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="hiyan-alshawi"><first>Hiyan</first><last>Alshawi</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
       <author><first>Shona</first><last>Douglas</last></author>
       <url hash="aa6cf28b">C98-1006</url>
       <bibkey>alshawi-etal-1998-automatic</bibkey>
@@ -87,9 +87,9 @@
     </paper>
     <paper id="11">
       <title>Evaluating a Focus-Based Approach to Anaphora Resolution</title>
-      <author><first>Saliha</first><last>Azzam</last></author>
-      <author><first>Kevin</first><last>Humphreys</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="saliha-azzam"><first>Saliha</first><last>Azzam</last></author>
+      <author id="kevin-humphreys"><first>Kevin</first><last>Humphreys</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <url hash="fe39536a">C98-1011</url>
       <bibkey>azzam-etal-1998-evaluating</bibkey>
     </paper>
@@ -102,9 +102,9 @@
     </paper>
     <paper id="13">
       <title>The <fixed-case>B</fixed-case>erkeley <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et Project</title>
-      <author><first>Collin F.</first><last>Baker</last></author>
-      <author><first>Charles J.</first><last>Fillmore</last></author>
-      <author><first>John B.</first><last>Lowe</last></author>
+      <author id="collin-f-baker"><first>Collin F.</first><last>Baker</last></author>
+      <author id="charles-j-fillmore"><first>Charles J.</first><last>Fillmore</last></author>
+      <author id="john-b-lowe"><first>John B.</first><last>Lowe</last></author>
       <url hash="395613de">C98-1013</url>
       <bibkey>baker-etal-1998-berkeley</bibkey>
     </paper>
@@ -118,13 +118,13 @@
     <paper id="15">
       <title>Semi-Automatic Recognition of Noun Modifier Relationships</title>
       <author><first>Ken</first><last>Barker</last></author>
-      <author><first>Stan</first><last>Szpakowicz</last></author>
+      <author id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></author>
       <url hash="0bb1e67d">C98-1015</url>
       <bibkey>barker-szpakowicz-1998-semi</bibkey>
     </paper>
     <paper id="16">
       <title><fixed-case>R</fixed-case>edundancy: helping semantic disambiguation</title>
-      <author><first>Caroline</first><last>Barriere</last></author>
+      <author id="caroline-barriere"><first>Caroline</first><last>Barriere</last></author>
       <url hash="9c37a99a">C98-1016</url>
       <bibkey>barriere-1998-redundancy</bibkey>
     </paper>
@@ -151,9 +151,9 @@
     </paper>
     <paper id="20">
       <title>Trigger-Pair Predictors in Parsing and Tagging</title>
-      <author><first>Ezra</first><last>Black</last></author>
+      <author id="ezra-black"><first>Ezra</first><last>Black</last></author>
       <author><first>Andrew</first><last>Finch</last></author>
-      <author><first>Hideki</first><last>Kashioka</last></author>
+      <author id="hideki-kashioka"><first>Hideki</first><last>Kashioka</last></author>
       <url hash="9e1dda68">C98-1020</url>
       <bibkey>black-etal-1998-trigger</bibkey>
     </paper>
@@ -166,7 +166,7 @@
     <paper id="22">
       <title>A Probabilistic Corpus-Driven Model for Lexical-Functional Analysis</title>
       <author><first>Rens</first><last>Bod</last></author>
-      <author><first>Ronald</first><last>Kaplan</last></author>
+      <author id="ronald-m-kaplan"><first>Ronald</first><last>Kaplan</last></author>
       <url hash="2c5e8e2d">C98-1022</url>
       <bibkey>bod-kaplan-1998-probabilistic</bibkey>
     </paper>
@@ -174,22 +174,22 @@
       <title>Anchoring Floating Quantifiers in <fixed-case>J</fixed-case>apanese-to-<fixed-case>E</fixed-case>nglish Machine Translation</title>
       <author><first>Francis</first><last>Bond</last></author>
       <author><first>Daniela</first><last>Kurz</last></author>
-      <author><first>Satoshi</first><last>Shirai</last></author>
+      <author id="satoshi-shirai"><first>Satoshi</first><last>Shirai</last></author>
       <url hash="45a0a7af">C98-1023</url>
       <bibkey>bond-etal-1998-anchoring</bibkey>
     </paper>
     <paper id="24">
       <title>Managing information at linguistic interfaces</title>
       <author><first>Johan</first><last>Bos</last></author>
-      <author><first>C.J.</first><last>Rupp</last></author>
-      <author><first>Bianka</first><last>Buschbeck-Wolf</last></author>
+      <author id="c-j-rupp"><first>C.J.</first><last>Rupp</last></author>
+      <author id="bianka-buschbeck"><first>Bianka</first><last>Buschbeck-Wolf</last></author>
       <author><first>Michael</first><last>Dorna</last></author>
       <url hash="57a47371">C98-1024</url>
       <bibkey>bos-etal-1998-managing</bibkey>
     </paper>
     <paper id="25">
       <title>Deriving the Predicate-Argument Structure for a Free Word Order Language</title>
-      <author><first>Cem</first><last>Bozsahin</last></author>
+      <author id="cem-bozsahin"><first>Cem</first><last>Bozsahin</last></author>
       <url hash="0256599a">C98-1025</url>
       <bibkey>bozsahin-1998-deriving</bibkey>
     </paper>
@@ -201,16 +201,16 @@
     </paper>
     <paper id="27">
       <title>The Logical Structure of Binding</title>
-      <author><first>Antonio</first><last>Branco</last></author>
+      <author id="antonio-branco"><first>Antonio</first><last>Branco</last></author>
       <url hash="f8df2927">C98-1027</url>
       <bibkey>branco-1998-logical</bibkey>
     </paper>
     <paper id="28">
       <title>Beyond N-Grams: Can Linguistic Sophistication Improve Language Modeling?</title>
       <author><first>Eric</first><last>Brill</last></author>
-      <author><first>Radu</first><last>Florian</last></author>
-      <author><first>John C.</first><last>Henderson</last></author>
-      <author><first>Lidia</first><last>Mangu</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
+      <author id="john-henderson"><first>John C.</first><last>Henderson</last></author>
+      <author id="lidia-mangu"><first>Lidia</first><last>Mangu</last></author>
       <url hash="ff271377">C98-1028</url>
       <bibkey>brill-etal-1998-beyond</bibkey>
     </paper>
@@ -229,9 +229,9 @@
     </paper>
     <paper id="31">
       <title>Named Entity Scoring for Speech Input</title>
-      <author><first>John D.</first><last>Burger</last></author>
-      <author><first>David</first><last>Palmer</last></author>
-      <author><first>Lynette</first><last>Hirschman</last></author>
+      <author id="john-d-burger"><first>John D.</first><last>Burger</last></author>
+      <author id="david-d-palmer"><first>David</first><last>Palmer</last></author>
+      <author id="lynette-hirschman"><first>Lynette</first><last>Hirschman</last></author>
       <url hash="d3a0cb63">C98-1031</url>
       <bibkey>burger-etal-1998-named</bibkey>
     </paper>
@@ -241,29 +241,29 @@
       <author><first>Karen</first><last>Kukich</last></author>
       <author><first>Susanne</first><last>Wolff</last></author>
       <author><first>Chi</first><last>Lu</last></author>
-      <author><first>Martin</first><last>Chodorow</last></author>
-      <author><first>Lisa</first><last>Braden-Harder</last></author>
+      <author id="martin-chodorow"><first>Martin</first><last>Chodorow</last></author>
+      <author id="lisa-braden-harder"><first>Lisa</first><last>Braden-Harder</last></author>
       <author><first>Mary Dee</first><last>Harris</last></author>
       <url hash="e4f8940a">C98-1032</url>
       <bibkey>burstein-etal-1998-automated</bibkey>
     </paper>
     <paper id="33">
       <title>Building Parallel <fixed-case>LTAG</fixed-case> for <fixed-case>F</fixed-case>rench and <fixed-case>I</fixed-case>talian</title>
-      <author><first>Marie-Helene</first><last>Candito</last></author>
+      <author id="marie-candito"><first>Marie-Helene</first><last>Candito</last></author>
       <url hash="f51572d9">C98-1033</url>
       <bibkey>candito-1998-building</bibkey>
     </paper>
     <paper id="34">
       <title>Error-Driven Pruning of Treebank Grammars for Base Noun Phrase Identification</title>
-      <author><first>Claire</first><last>Cardie</last></author>
-      <author><first>David</first><last>Pierce</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
+      <author id="david-pierce"><first>David</first><last>Pierce</last></author>
       <url hash="7a814c99">C98-1034</url>
       <bibkey>cardie-pierce-1998-error</bibkey>
     </paper>
     <paper id="35">
       <title>Exploiting Syntactic Structure for Language Modeling</title>
       <author><first>Ciprian</first><last>Chelba</last></author>
-      <author><first>Frederick</first><last>Jelinek</last></author>
+      <author id="frederick-jelinek"><first>Frederick</first><last>Jelinek</last></author>
       <url hash="1f033091">C98-1035</url>
       <bibkey>chelba-jelinek-1998-exploiting</bibkey>
     </paper>
@@ -278,14 +278,14 @@
     </paper>
     <paper id="37">
       <title>A Concept-based Adaptive Approach to Word Sense Disambiguation</title>
-      <author><first>Jen Nan</first><last>Chen</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jen-nan-chen"><first>Jen Nan</first><last>Chen</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <url hash="ce8123be">C98-1037</url>
       <bibkey>chen-chang-1998-concept</bibkey>
     </paper>
     <paper id="38">
       <title><fixed-case>PAT</fixed-case>-Trees with the Deletion Function as the Learning Device for Linguistic Patterns</title>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <author><first>Wen</first><last>Tsuei</last></author>
       <author><first>Lee-Feng</first><last>Chien</last></author>
       <url hash="d2f71165">C98-1038</url>
@@ -294,15 +294,15 @@
     <paper id="39">
       <title>Hybrid Approaches to Improvement of Translation Quality in Web-based <fixed-case>E</fixed-case>nglish-<fixed-case>K</fixed-case>orean Machine Translation</title>
       <author><first>Sung-Kwon</first><last>Choi</last></author>
-      <author><first>Han-Min</first><last>Jung</last></author>
+      <author id="han-min-jung"><first>Han-Min</first><last>Jung</last></author>
       <author><first>Jun-Sik</first><last>Park</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <url hash="c042dcee">C98-1039</url>
       <bibkey>choi-etal-1998-hybrid</bibkey>
     </paper>
     <paper id="40">
       <title>Dialogue Management in Vector-Based Call Routing</title>
-      <author><first>Jennifer</first><last>Chu-Carroll</last></author>
+      <author id="jennifer-chu-carroll"><first>Jennifer</first><last>Chu-Carroll</last></author>
       <author><first>Bob</first><last>Carpenter</last></author>
       <url hash="4bd70a9b">C98-1040</url>
       <bibkey>chu-carroll-carpenter-1998-dialogue</bibkey>
@@ -310,7 +310,7 @@
     <paper id="41">
       <title>Machine Translation vs. Dictionary Term Translation - a Comparison for <fixed-case>E</fixed-case>nglish-<fixed-case>J</fixed-case>apanese News Article Alignment</title>
       <author><first>Nigel</first><last>Collier</last></author>
-      <author><first>Hideki</first><last>Hirakawa</last></author>
+      <author id="hideki-hirakawa"><first>Hideki</first><last>Hirakawa</last></author>
       <author><first>Akira</first><last>Kumano</last></author>
       <url hash="d57588bf">C98-1041</url>
       <bibkey>collier-etal-1998-machine</bibkey>
@@ -319,7 +319,7 @@
       <title>An Experiment in Hybrid Dictionary and Statistical Sentence Alignment</title>
       <author><first>Nigel</first><last>Collier</last></author>
       <author><first>Kenji</first><last>Ono</last></author>
-      <author><first>Hideki</first><last>Hirakawa</last></author>
+      <author id="hideki-hirakawa"><first>Hideki</first><last>Hirakawa</last></author>
       <url hash="7c6d6827">C98-1042</url>
       <bibkey>collier-etal-1998-experiment</bibkey>
     </paper>
@@ -332,8 +332,8 @@
     <paper id="44">
       <title>Veins Theory: A Model of Global Discourse Cohesion and Coherence</title>
       <author><first>Dan</first><last>Cristea</last></author>
-      <author><first>Nancy</first><last>Ide</last></author>
-      <author><first>Laurent</first><last>Romary</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
       <url hash="33b1dc0c">C98-1044</url>
       <bibkey>cristea-etal-1998-veins</bibkey>
     </paper>
@@ -341,15 +341,15 @@
       <title>Automatic Semantic Tagging of Unknown Proper Names</title>
       <author><first>Alessandro</first><last>Cucchiarelli</last></author>
       <author><first>Danilo</first><last>Luzi</last></author>
-      <author><first>Paola</first><last>Velardi</last></author>
+      <author id="paola-velardi"><first>Paola</first><last>Velardi</last></author>
       <url hash="cdb706df">C98-1045</url>
       <bibkey>cucchiarelli-etal-1998-automatic</bibkey>
     </paper>
     <paper id="46">
       <title>Investigating regular sense extensions based on intersective Levin classes</title>
-      <author><first>Hoa Trang</first><last>Dang</last></author>
-      <author><first>Karin</first><last>Kipper</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="hoa-trang-dang"><first>Hoa Trang</first><last>Dang</last></author>
+      <author id="karin-kipper"><first>Karin</first><last>Kipper</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Joseph</first><last>Rosenzweig</last></author>
       <url hash="9129135e">C98-1046</url>
       <bibkey>dang-etal-1998-investigating</bibkey>
@@ -372,28 +372,28 @@
     <paper id="49">
       <title>Multext-East: Parallel and Comparable Corpora and Lexicons for Six Central and <fixed-case>E</fixed-case>astern <fixed-case>E</fixed-case>uropean Languages</title>
       <author><first>Ludmila</first><last>Dimitrova</last></author>
-      <author><first>Tomaz</first><last>Erjavec</last></author>
-      <author><first>Nancy</first><last>Ide</last></author>
-      <author><first>Heiki Jaan</first><last>Kaalep</last></author>
-      <author><first>Vladimir</first><last>Petkevic</last></author>
-      <author><first>Dan</first><last>Tufis</last></author>
+      <author id="tomaz-erjavec"><first>Tomaz</first><last>Erjavec</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
+      <author id="heiki-jaan-kaalep"><first>Heiki Jaan</first><last>Kaalep</last></author>
+      <author id="vladimir-petkevic"><first>Vladimir</first><last>Petkevic</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufis</last></author>
       <url hash="7e6c28b1">C98-1049</url>
       <bibkey>dimitrova-etal-1998-multext</bibkey>
     </paper>
     <paper id="50">
       <title>Error Driven Word Sense Disambiguation</title>
-      <author><first>Luca</first><last>Dini</last></author>
-      <author><first>Vittorio</first><last>Di Tomaso</last></author>
-      <author><first>Frederique</first><last>Segond</last></author>
+      <author id="luca-dini"><first>Luca</first><last>Dini</last></author>
+      <author id="vittorio-di-tomaso"><first>Vittorio</first><last>Di Tomaso</last></author>
+      <author id="frederique-segond"><first>Frederique</first><last>Segond</last></author>
       <url hash="ebb719a1">C98-1050</url>
       <bibkey>dini-etal-1998-error</bibkey>
     </paper>
     <paper id="51">
       <title>An Empirical Investigation of Proposals in Collaborative Dialogues</title>
       <author><first>Barbara</first><last>Di Eugenio</last></author>
-      <author><first>Pamela W.</first><last>Jordan</last></author>
-      <author><first>Johanna D.</first><last>Moore</last></author>
-      <author><first>Richmond H.</first><last>Thomason</last></author>
+      <author id="pamela-jordan"><first>Pamela W.</first><last>Jordan</last></author>
+      <author id="johanna-d-moore"><first>Johanna D.</first><last>Moore</last></author>
+      <author id="richmond-h-thomason"><first>Richmond H.</first><last>Thomason</last></author>
       <url hash="4e1bf51f">C98-1051</url>
       <bibkey>di-eugenio-etal-1998-empirical</bibkey>
     </paper>
@@ -405,8 +405,8 @@
     </paper>
     <paper id="53">
       <title>A Text Input Front-end Processor as an Information Access Platform</title>
-      <author><first>Shinichi</first><last>Doi</last></author>
-      <author><first>Shin-ichiro</first><last>Kamei</last></author>
+      <author id="shinichi-doi"><first>Shinichi</first><last>Doi</last></author>
+      <author id="shin-ichiro-kamei"><first>Shin-ichiro</first><last>Kamei</last></author>
       <author><first>Kiyoshi</first><last>Yamabana</last></author>
       <url hash="aa636aa2">C98-1053</url>
       <bibkey>doi-etal-1998-text</bibkey>
@@ -415,8 +415,8 @@
       <title>Syntactic and Semantic Transfer with <fixed-case>F</fixed-case>-Structures</title>
       <author><first>Michael</first><last>Dorna</last></author>
       <author><first>Anette</first><last>Frank</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
-      <author><first>Martin C.</first><last>Emele</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
+      <author id="martin-c-emele"><first>Martin C.</first><last>Emele</last></author>
       <url hash="76bbdda1">C98-1054</url>
       <bibkey>dorna-etal-1998-syntactic</bibkey>
     </paper>
@@ -438,21 +438,21 @@
     <paper id="57">
       <title>Spelling Correction Using Context</title>
       <author><first>Mohammad Ali</first><last>Elmi</last></author>
-      <author><first>Martha</first><last>Evens</last></author>
+      <author id="martha-evens"><first>Martha</first><last>Evens</last></author>
       <url hash="a0f9bcaa">C98-1057</url>
       <bibkey>elmi-evens-1998-spelling</bibkey>
     </paper>
     <paper id="58">
       <title>Ambiguity Preserving Machine Translation using Packed Representations</title>
-      <author><first>Martin C.</first><last>Emele</last></author>
+      <author id="martin-c-emele"><first>Martin C.</first><last>Emele</last></author>
       <author><first>Michael</first><last>Dorna</last></author>
       <url hash="e2162fcc">C98-1058</url>
       <bibkey>emele-dorna-1998-ambiguity</bibkey>
     </paper>
     <paper id="59">
       <title>A structure-sharing parser for lexicalized grammars</title>
-      <author><first>Roger</first><last>Evans</last></author>
-      <author><first>David</first><last>Weir</last></author>
+      <author id="roger-evans"><first>Roger</first><last>Evans</last></author>
+      <author id="david-weir"><first>David</first><last>Weir</last></author>
       <url hash="4eac942d">C98-1059</url>
       <bibkey>evans-weir-1998-structure</bibkey>
     </paper>
@@ -490,7 +490,7 @@
     </paper>
     <paper id="64">
       <title>Toward General-Purpose Learning for Information Extraction</title>
-      <author><first>Dayne</first><last>Freitag</last></author>
+      <author id="dayne-freitag"><first>Dayne</first><last>Freitag</last></author>
       <url hash="d4b39be0">C98-1064</url>
       <bibkey>freitag-1998-toward</bibkey>
     </paper>
@@ -524,7 +524,7 @@
     </paper>
     <paper id="69">
       <title>Semantic-Head Based Resolution of Scopal Ambiguities</title>
-      <author><first>Bjorn</first><last>Gamback</last></author>
+      <author id="bjorn-gamback"><first>Bjorn</first><last>Gamback</last></author>
       <author><first>Johan</first><last>Bos</last></author>
       <url hash="416ed0da">C98-1069</url>
       <bibkey>gamback-bos-1998-semantic</bibkey>
@@ -533,20 +533,20 @@
       <title>Vers l’utilisation des methodes formelles pour le developpement de linguiciels</title>
       <author><first>Bilel</first><last>Gargouri</last></author>
       <author><first>Mohamed</first><last>Jmaiel</last></author>
-      <author><first>Abdelmajid</first><last>Ben Hamadou</last></author>
+      <author id="abdelmajid-ben-hamadou"><first>Abdelmajid</first><last>Ben Hamadou</last></author>
       <url hash="ee5663b0">C98-1070</url>
       <bibkey>gargouri-etal-1998-vers</bibkey>
     </paper>
     <paper id="71">
       <title>Flow Network Models for Word Alignment and Terminology Extraction from Bilingual Corpora</title>
-      <author><first>Eric</first><last>Gaussier</last></author>
+      <author id="eric-gaussier"><first>Eric</first><last>Gaussier</last></author>
       <url hash="7795a2d1">C98-1071</url>
       <bibkey>gaussier-1998-flow</bibkey>
     </paper>
     <paper id="72">
       <title>Growing Semantic Grammars</title>
-      <author><first>Marsal</first><last>Gavaldà</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="marsal-gavalda"><first>Marsal</first><last>Gavaldà</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <url hash="6e2a95d7">C98-1072</url>
       <bibkey>gavalda-waibel-1998-growing</bibkey>
     </paper>
@@ -558,7 +558,7 @@
     </paper>
     <paper id="74">
       <title>Efficient Linear Logic Meaning Assembly</title>
-      <author><first>Vineet</first><last>Gupta</last></author>
+      <author id="vineet-gupta"><first>Vineet</first><last>Gupta</last></author>
       <author><first>John</first><last>Lamping</last></author>
       <url hash="b70a2fe8">C98-1074</url>
       <bibkey>gupta-lamping-1998-efficient</bibkey>
@@ -579,23 +579,23 @@
     </paper>
     <paper id="77">
       <title>Tagging Inflective Languages: Prediction of Morphological Categories for a Rich, Structured Tagset</title>
-      <author><first>Jan</first><last>Hajic</last></author>
-      <author><first>Barbora</first><last>Hladka</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajic</last></author>
+      <author id="barbora-hladka"><first>Barbora</first><last>Hladka</last></author>
       <url hash="de348ab5">C98-1077</url>
       <bibkey>hajic-hladka-1998-tagging</bibkey>
     </paper>
     <paper id="78">
       <title>Improving Data Driven Wordclass Tagging by System Combination</title>
-      <author><first>Hans</first><last>van Halteren</last></author>
+      <author id="hans-van-halteren"><first>Hans</first><last>van Halteren</last></author>
       <author><first>Jakub</first><last>Zavrel</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <url hash="f4669f95">C98-1078</url>
       <bibkey>van-halteren-etal-1998-improving</bibkey>
     </paper>
     <paper id="79">
       <title>A step towards the detection of semantic variants of terms in technical documents</title>
-      <author><first>Thierry</first><last>Hamon</last></author>
-      <author><first>Adeline</first><last>Nazarenko</last></author>
+      <author id="thierry-hamon"><first>Thierry</first><last>Hamon</last></author>
+      <author id="adeline-nazarenko"><first>Adeline</first><last>Nazarenko</last></author>
       <author><first>Cecile</first><last>Gros</last></author>
       <url hash="7c35ec13">C98-1079</url>
       <bibkey>hamon-etal-1998-step</bibkey>
@@ -603,7 +603,7 @@
     <paper id="80">
       <title>Using Decision Trees to Construct a Practical Parser</title>
       <author><first>Masahiko</first><last>Haruno</last></author>
-      <author><first>Satoshi</first><last>Shirai</last></author>
+      <author id="satoshi-shirai"><first>Satoshi</first><last>Shirai</last></author>
       <author><first>Yoshifumi</first><last>Ooyama</last></author>
       <url hash="45698966">C98-1080</url>
       <bibkey>haruno-etal-1998-using</bibkey>
@@ -611,7 +611,7 @@
     <paper id="81">
       <title>Integrating Text Plans for Conciseness and Coherence</title>
       <author><first>Terrence</first><last>Harvey</last></author>
-      <author><first>Sandra</first><last>Carberry</last></author>
+      <author id="sandra-carberry"><first>Sandra</first><last>Carberry</last></author>
       <url hash="f02fe744">C98-1081</url>
       <bibkey>harvey-carberry-1998-integrating</bibkey>
     </paper>
@@ -632,7 +632,7 @@
     </paper>
     <paper id="84">
       <title>A Connectionist Architecture for Learning to Parse</title>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <author><first>Peter</first><last>Lane</last></author>
       <url hash="9ecb40fc">C98-1084</url>
       <bibkey>henderson-lane-1998-connectionist</bibkey>
@@ -651,8 +651,8 @@
     </paper>
     <paper id="87">
       <title>Long Distance Pronominalisation and Global Focus</title>
-      <author><first>Janet</first><last>Hitzeman</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="janet-hitzeman"><first>Janet</first><last>Hitzeman</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <url hash="be93acd3">C98-1087</url>
       <bibkey>hitzeman-poesio-1998-long</bibkey>
     </paper>
@@ -664,7 +664,7 @@
     </paper>
     <paper id="89">
       <title>Terminological variation, a means of identifying research topics from texts</title>
-      <author><first>Fidelia</first><last>Ibekwe-Sanjuan</last></author>
+      <author id="fidelia-ibekwe-sanjuan"><first>Fidelia</first><last>Ibekwe-Sanjuan</last></author>
       <url hash="f3cae6d0">C98-1089</url>
       <bibkey>ibekwe-sanjuan-1998-terminological</bibkey>
     </paper>
@@ -685,15 +685,15 @@
     </paper>
     <paper id="92">
       <title>Exploring the Characteristics of Multi-Party Dialogues</title>
-      <author><first>Masato</first><last>Ishizaki</last></author>
+      <author id="masato-ishizaki"><first>Masato</first><last>Ishizaki</last></author>
       <author><first>Tsuneaki</first><last>Kato</last></author>
       <url hash="26da6ba5">C98-1092</url>
       <bibkey>ishizaki-kato-1998-exploring</bibkey>
     </paper>
     <paper id="93">
       <title>Robust Interaction through Partial Interpretation and Dialogue Management</title>
-      <author><first>Arne</first><last>Jonsson</last></author>
-      <author><first>Lena</first><last>Stromback</last></author>
+      <author id="arne-jonsson"><first>Arne</first><last>Jonsson</last></author>
+      <author id="lena-stromback"><first>Lena</first><last>Stromback</last></author>
       <url hash="90276407">C98-1093</url>
       <bibkey>jonsson-stromback-1998-robust</bibkey>
     </paper>
@@ -705,23 +705,23 @@
     </paper>
     <paper id="95">
       <title>Combining a <fixed-case>C</fixed-case>hinese Thesaurus with a <fixed-case>C</fixed-case>hinese Dictionary</title>
-      <author><last>Ji</last><first>Donghong</first></author>
-      <author><last>Gong</last><first>Junping</first></author>
-      <author><last>Huang</last><first>Changning</first></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
+      <author id="junping-gong"><first>Junping</first><last>Gong</last></author>
+      <author id="changning-huang"><first>Changning</first><last>Huang</last></author>
       <url hash="ce5fb2d1">C98-1095</url>
       <bibkey>ji-etal-1998-combining</bibkey>
     </paper>
     <paper id="96">
       <title>Combining Multiple, Large-Scale Resources in a Reusable Lexicon for Natural Language Generation</title>
-      <author><first>Hongyan</first><last>Jing</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="hongyan-jing"><first>Hongyan</first><last>Jing</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <url hash="ca0956e3">C98-1096</url>
       <bibkey>jing-mckeown-1998-combining</bibkey>
     </paper>
     <paper id="97">
       <title>Text Segmentation Using Reiteration and Collocation</title>
-      <author><first>Amanda C.</first><last>Jobbins</last></author>
-      <author><first>Lindsay J.</first><last>Evett</last></author>
+      <author id="amanda-c-jobbins"><first>Amanda C.</first><last>Jobbins</last></author>
+      <author id="lindsay-j-evett"><first>Lindsay J.</first><last>Evett</last></author>
       <url hash="434dd527">C98-1097</url>
       <bibkey>jobbins-evett-1998-text</bibkey>
     </paper>
@@ -733,13 +733,13 @@
     </paper>
     <paper id="99">
       <title>Unification-based Multimodal Parsing</title>
-      <author><first>Michael</first><last>Johnston</last></author>
+      <author id="michael-johnston"><first>Michael</first><last>Johnston</last></author>
       <url hash="0e57575c">C98-1099</url>
       <bibkey>johnston-1998-unification</bibkey>
     </paper>
     <paper id="100">
       <title>Context Management with Topics for Spoken Dialogue Systems</title>
-      <author><first>Kristiina</first><last>Jokinen</last></author>
+      <author id="kristiina-jokinen"><first>Kristiina</first><last>Jokinen</last></author>
       <author><first>Hideki</first><last>Tanaka</last></author>
       <author><first>Akio</first><last>Yokoo</last></author>
       <url hash="cedded11">C98-1100</url>
@@ -755,31 +755,31 @@
       <title>Pseudo-Projectivity: A Polynomially Parsable Non-Projective Dependency Grammar</title>
       <author><first>Sylvain</first><last>Kahane</last></author>
       <author><first>Alexis</first><last>Nasr</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <url hash="fe86aac6">C98-1102</url>
       <bibkey>kahane-etal-1998-pseudo</bibkey>
     </paper>
     <paper id="103">
       <title>A Method for Correcting Errors in Speech Recognition Using the Statistical Features of Character Co-occurrence</title>
       <author><first>Satoshi</first><last>Kaki</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Hitoshi</first><last>Iida</last></author>
       <url hash="ea36b9dc">C98-1103</url>
       <bibkey>kaki-etal-1998-method</bibkey>
     </paper>
     <paper id="104">
       <title>Use of Mutual Information Based Character Clusters in Dictionary-less Morphological Analysis of <fixed-case>J</fixed-case>apanese</title>
-      <author><first>Hideki</first><last>Kashioka</last></author>
+      <author id="hideki-kashioka"><first>Hideki</first><last>Kashioka</last></author>
       <author><first>Yasuhiro</first><last>Kawata</last></author>
       <author><first>Yumiko</first><last>Kinjo</last></author>
       <author><first>Andrew</first><last>Finch</last></author>
-      <author><first>Ezra W.</first><last>Black</last></author>
+      <author id="ezra-black"><first>Ezra W.</first><last>Black</last></author>
       <url hash="fa8b5c7d">C98-1104</url>
       <bibkey>kashioka-etal-1998-use</bibkey>
     </paper>
     <paper id="105">
       <title>Know When to Hold ’Em: Shuffling Deterministically in a Parser for Nonconcatenative Grammars</title>
-      <author><first>Robert T.</first><last>Kasper</last></author>
+      <author id="robert-t-kasper"><first>Robert T.</first><last>Kasper</last></author>
       <author><first>Mike</first><last>Calcagno</last></author>
       <author><first>Paul C.</first><last>Davis</last></author>
       <url hash="9b40b3d7">C98-1105</url>
@@ -795,21 +795,21 @@
       <title>Unlimited Vocabulary Grapheme to Phoneme Conversion for <fixed-case>K</fixed-case>orean <fixed-case>TTS</fixed-case></title>
       <author><first>Byeongchang</first><last>Kim</last></author>
       <author><first>WonIl</first><last>Lee</last></author>
-      <author><first>Geunbae</first><last>Lee</last></author>
+      <author id="gary-geunbae-lee"><first>Geunbae</first><last>Lee</last></author>
       <author><first>Jong-Hyeok</first><last>Lee</last></author>
       <url hash="65bad547">C98-1107</url>
       <bibkey>kim-etal-1998-unlimited</bibkey>
     </paper>
     <paper id="108">
       <title>Role of Verbs in Document Analysis</title>
-      <author><first>Judith</first><last>Klavans</last></author>
+      <author id="judith-l-klavans"><first>Judith</first><last>Klavans</last></author>
       <author><first>Min-Yen</first><last>Kan</last></author>
       <url hash="18e46624">C98-1108</url>
       <bibkey>klavans-kan-1998-role</bibkey>
     </paper>
     <paper id="109">
       <title>A Flexible Example-Based Parser Based on the <fixed-case>SSTC</fixed-case></title>
-      <author><first>Mosleh Hmoud</first><last>Al-Adhaileh</last></author>
+      <author id="mosleh-hmoud-al-adhaileh"><first>Mosleh Hmoud</first><last>Al-Adhaileh</last></author>
       <author><last>Tang</last><first>Enya Kong</first></author>
       <url hash="f25caba3">C98-1109</url>
       <bibkey>al-adhaileh-tang-1998-flexible</bibkey>
@@ -827,37 +827,37 @@
       <title>Compacting the <fixed-case>P</fixed-case>enn <fixed-case>T</fixed-case>reebank Grammar</title>
       <author><first>Alexander</first><last>Krotov</last></author>
       <author><first>Mark</first><last>Hepple</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <url hash="e5caf614">C98-1111</url>
       <bibkey>krotov-etal-1998-compacting</bibkey>
     </paper>
     <paper id="112">
       <title>Generation that Exploits Corpus-Based Statistical Knowledge</title>
-      <author><first>Irene</first><last>Langkilde</last></author>
+      <author id="irene-langkilde"><first>Irene</first><last>Langkilde</last></author>
       <author><first>Kevin</first><last>Knight</last></author>
       <url hash="c7b9b4ee">C98-1112</url>
       <bibkey>langkilde-knight-1998-generation</bibkey>
     </paper>
     <paper id="113">
       <title>Methods and Practical Issues in Evaluating Alignment Techniques</title>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <author><first>Michel</first><last>Simard</last></author>
-      <author><first>Jean</first><last>Veronis</last></author>
+      <author id="jean-veronis"><first>Jean</first><last>Veronis</last></author>
       <url hash="a9085f35">C98-1113</url>
       <bibkey>langlais-etal-1998-methods</bibkey>
     </paper>
     <paper id="114">
       <title>A Framework for Customizable Generation of Hypertext Presentations</title>
-      <author><first>Benoit</first><last>Lavoie</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="benoit-lavoie"><first>Benoit</first><last>Lavoie</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <url hash="01e7e044">C98-1114</url>
       <bibkey>lavoie-rambow-1998-framework</bibkey>
     </paper>
     <paper id="115">
       <title>Automatic Acquisition of Language Model based on Head-Dependent Relation between Words</title>
       <author><first>Seungmi</first><last>Lee</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <url hash="f8239c2e">C98-1115</url>
       <bibkey>lee-choi-1998-automatic</bibkey>
     </paper>
@@ -869,7 +869,7 @@
     </paper>
     <paper id="117">
       <title>Characterizing and Recognizing Spoken Corrections in Human-Computer Dialogue</title>
-      <author><first>Gina-Anne</first><last>Levow</last></author>
+      <author id="gina-anne-levow"><first>Gina-Anne</first><last>Levow</last></author>
       <url hash="a783b229">C98-1117</url>
       <bibkey>levow-1998-characterizing</bibkey>
     </paper>
@@ -901,18 +901,18 @@
     </paper>
     <paper id="120">
       <title>Identifying Syntactic Role of Antecedent in <fixed-case>K</fixed-case>orean Relative Clause Using Corpus and Thesaurus Information</title>
-      <author><first>Hui-Feng</first><last>Li</last></author>
+      <author id="huifeng-li"><first>Hui-Feng</first><last>Li</last></author>
       <author><first>Jong-Hyeok</first><last>Lee</last></author>
-      <author><first>Geunbae</first><last>Lee</last></author>
+      <author id="gary-geunbae-lee"><first>Geunbae</first><last>Lee</last></author>
       <url hash="784fa3ae">C98-2120</url>
       <bibkey>li-etal-1998-identifying</bibkey>
     </paper>
     <paper id="121">
       <title>A Test Environment for Natural Language Understanding Systems</title>
       <author><first>Li</first><last>Li</last></author>
-      <author><first>Deborah A.</first><last>Dahl</last></author>
-      <author><first>Lewis M.</first><last>Norton</last></author>
-      <author><first>Marcia C.</first><last>Linebarger</last></author>
+      <author id="deborah-a-dahl"><first>Deborah A.</first><last>Dahl</last></author>
+      <author id="lewis-m-norton"><first>Lewis M.</first><last>Norton</last></author>
+      <author id="marcia-c-linebarger"><first>Marcia C.</first><last>Linebarger</last></author>
       <author><first>Dongdong</first><last>Chen</last></author>
       <url hash="ecfccda3">C98-2121</url>
       <bibkey>li-etal-1998-test</bibkey>
@@ -932,26 +932,26 @@
     </paper>
     <paper id="124">
       <title>Evaluating Response Strategies in a Web-Based Spoken Dialogue Agent</title>
-      <author><first>Diane J.</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane J.</first><last>Litman</last></author>
       <author><first>Shimei</first><last>Pan</last></author>
-      <author><first>Marilyn A.</first><last>Walker</last></author>
+      <author id="marilyn-walker"><first>Marilyn A.</first><last>Walker</last></author>
       <url hash="f624721d">C98-2124</url>
       <bibkey>litman-etal-1998-evaluating</bibkey>
     </paper>
     <paper id="125">
       <title>Formal aspects and parsing issues of dependency theory</title>
       <author><first>Vincenzo</first><last>Lombardo</last></author>
-      <author><first>Leonardo</first><last>Lesmo</last></author>
+      <author id="leonardo-lesmo"><first>Leonardo</first><last>Lesmo</last></author>
       <url hash="7661b260">C98-2125</url>
       <bibkey>lombardo-lesmo-1998-formal</bibkey>
     </paper>
     <paper id="126">
       <title>An Architecture for Dialogue Management, Context Tracking, and Pragmatic Adaptation in Spoken Dialogue Systems</title>
-      <author><first>Susann</first><last>LuperFoy</last></author>
+      <author id="susann-luperfoy"><first>Susann</first><last>LuperFoy</last></author>
       <author><first>Dan</first><last>Loehr</last></author>
       <author><first>David</first><last>Duff</last></author>
-      <author><first>Keith</first><last>Miller</last></author>
-      <author><first>Florence</first><last>Reeder</last></author>
+      <author id="keith-j-miller"><first>Keith</first><last>Miller</last></author>
+      <author id="florence-reeder"><first>Florence</first><last>Reeder</last></author>
       <author><first>Lisa</first><last>Harper</last></author>
       <url hash="a4c84555">C98-2126</url>
       <bibkey>luperfoy-etal-1998-architecture</bibkey>
@@ -968,29 +968,29 @@
       <author><last>Makino</last><first>Takaki</first></author>
       <author><last>Yoshida</last><first>Minoru</first></author>
       <author><last>Torisawa</last><first>Kentaro</first></author>
-      <author><last>Tsujii</last><first>Jun’ichi</first></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <url hash="24a76acb">C98-2128</url>
       <bibkey>makino-etal-1998-lilfes</bibkey>
     </paper>
     <paper id="129">
       <title>Bitext Correspondences through Rich Mark-up</title>
-      <author><first>Raquel</first><last>Martinez</last></author>
+      <author id="raquel-martinez"><first>Raquel</first><last>Martinez</last></author>
       <author><first>Joseba</first><last>Abaitua</last></author>
-      <author><first>Arantza</first><last>Casillas</last></author>
+      <author id="arantza-casillas"><first>Arantza</first><last>Casillas</last></author>
       <url hash="0b021fde">C98-2129</url>
       <bibkey>martinez-etal-1998-bitext</bibkey>
     </paper>
     <paper id="130">
       <title>Discourse Cues for Broadcast News Segmentation</title>
-      <author><first>Mark T.</first><last>Maybury</last></author>
+      <author id="mark-t-maybury"><first>Mark T.</first><last>Maybury</last></author>
       <url hash="8fa031e8">C98-2130</url>
       <bibkey>maybury-1998-discourse</bibkey>
     </paper>
     <paper id="131">
       <title>Confmnation in Multimodal Systems</title>
-      <author><first>David R.</first><last>McGee</last></author>
-      <author><first>Philip R.</first><last>Cohen</last></author>
-      <author><first>Sharon</first><last>Oviatt</last></author>
+      <author id="david-mcgee"><first>David R.</first><last>McGee</last></author>
+      <author id="philip-r-cohen"><first>Philip R.</first><last>Cohen</last></author>
+      <author id="sharon-oviatt"><first>Sharon</first><last>Oviatt</last></author>
       <url hash="9e011f79">C98-2131</url>
       <bibkey>mcgee-etal-1998-confmnation</bibkey>
     </paper>
@@ -1011,11 +1011,11 @@
     </paper>
     <paper id="134">
       <title>Deriving Transfer Rules from Dominance-Preserving Alignments</title>
-      <author><first>Adam</first><last>Meyers</last></author>
+      <author id="adam-meyers"><first>Adam</first><last>Meyers</last></author>
       <author><first>Roman</first><last>Yangarber</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
-      <author><first>Catherine</first><last>Macleod</last></author>
-      <author><first>Antonio</first><last>Moreno-Sandoval</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
+      <author id="catherine-macleod"><first>Catherine</first><last>Macleod</last></author>
+      <author id="antonio-moreno-sandoval"><first>Antonio</first><last>Moreno-Sandoval</last></author>
       <url hash="3654a66a">C98-2134</url>
       <bibkey>meyers-etal-1998-deriving</bibkey>
     </paper>
@@ -1042,7 +1042,7 @@
     </paper>
     <paper id="138">
       <title>Robust pronoun resolution with limited knowledge</title>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <url hash="27f3c5a2">C98-2138</url>
       <bibkey>mitkov-1998-robust</bibkey>
     </paper>
@@ -1050,7 +1050,7 @@
       <title><fixed-case>HPSG</fixed-case>-Style Underspecified <fixed-case>J</fixed-case>apanese Grammar with Wide Coverage</title>
       <author><last>Mitsuishi</last><first>Yutaka</first></author>
       <author><last>Torisawa</last><first>Kentaro</first></author>
-      <author><last>Tsujii</last><first>Jun’ichi</first></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <url hash="07f761c0">C98-2139</url>
       <bibkey>mitsuishi-etal-1998-hpsg</bibkey>
     </paper>
@@ -1058,7 +1058,7 @@
       <title>Text Segmentation with Multiple Surface Linguistic Cues</title>
       <author><last>Mochizuki</last><first>Hajime</first></author>
       <author><last>Honda</last><first>Takeo</first></author>
-      <author><last>Okumura</last><first>Manabu</first></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <url hash="8ad36434">C98-2140</url>
       <bibkey>mochizuki-etal-1998-text</bibkey>
     </paper>
@@ -1071,14 +1071,14 @@
     <paper id="142">
       <title>Dynamic compilation of weighted context-free grammars</title>
       <author><first>Mehryar</first><last>Mohri</last></author>
-      <author><first>Fernando C. N.</first><last>Pereira</last></author>
+      <author id="fernando-c-n-pereira"><first>Fernando C. N.</first><last>Pereira</last></author>
       <url hash="6e5fe572">C98-2142</url>
       <bibkey>mohri-pereira-1998-dynamic</bibkey>
     </paper>
     <paper id="143">
       <title>A Stochastic Language Model using Dependency and Its Improvement by Word Clustering</title>
       <author><first>Shinsuke</first><last>Mori</last></author>
-      <author><first>Makoto</first><last>Nagao</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
       <url hash="eee3ae93">C98-2143</url>
       <bibkey>mori-nagao-1998-stochastic</bibkey>
     </paper>
@@ -1092,14 +1092,14 @@
     <paper id="145">
       <title>An Estimate of Referent of Noun Phrases in <fixed-case>J</fixed-case>apanese Sentences</title>
       <author><first>Masaki</first><last>Murata</last></author>
-      <author><first>Makoto</first><last>Nagao</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
       <url hash="169eedb6">C98-2145</url>
       <bibkey>murata-nagao-1998-estimate</bibkey>
     </paper>
     <paper id="146">
       <title>Automatic Text Summarization Based on the Global Document Annotation</title>
       <author><first>Katashi</first><last>Nagao</last></author>
-      <author><first>Koiti</first><last>Hasida</last></author>
+      <author id="koiti-hasida"><first>Koiti</first><last>Hasida</last></author>
       <url hash="b9935494">C98-2146</url>
       <bibkey>nagao-hasida-1998-automatic</bibkey>
     </paper>
@@ -1127,7 +1127,7 @@
     </paper>
     <paper id="150">
       <title>Constituent-based Accent Prediction</title>
-      <author><first>Christine H.</first><last>Nakatani</last></author>
+      <author id="christine-h-nakatani"><first>Christine H.</first><last>Nakatani</last></author>
       <url hash="ae710e26">C98-2150</url>
       <bibkey>nakatani-1998-constituent</bibkey>
     </paper>
@@ -1158,21 +1158,21 @@
       <title>An Efficient Parallel Substrate for Typed Feature Structures on Shared Memory Parallel Machines</title>
       <author><last>Ninomiya</last><first>Takashi</first></author>
       <author><last>Torisawa</last><first>Kentaro</first></author>
-      <author><last>Tsujii</last><first>Jun’ichi</first></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <url hash="e3086788">C98-2154</url>
       <bibkey>ninomiya-etal-1998-efficient</bibkey>
     </paper>
     <paper id="155">
       <title>Universal Grammar and Lexis for Quick Ramp-Up of <fixed-case>MT</fixed-case> Systems</title>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <author><first>Victor</first><last>Raskin</last></author>
       <url hash="374cae04">C98-2155</url>
       <bibkey>nirenburg-raskin-1998-universal</bibkey>
     </paper>
     <paper id="156">
       <title>Integration of Large-Scale Linguistic Resources in a Natural Language Understanding System</title>
-      <author><first>Lewis M.</first><last>Norton</last></author>
-      <author><first>Deborah A.</first><last>Dahl</last></author>
+      <author id="lewis-m-norton"><first>Lewis M.</first><last>Norton</last></author>
+      <author id="deborah-a-dahl"><first>Deborah A.</first><last>Dahl</last></author>
       <author><first>Li</first><last>Li</last></author>
       <author><first>Katharine P.</first><last>Beals</last></author>
       <url hash="c2e588b1">C98-2156</url>
@@ -1180,7 +1180,7 @@
     </paper>
     <paper id="157">
       <title>Improving Statistical Natural Language Translation with Categories and Rules</title>
-      <author><first>Franz Josef</first><last>Och</last></author>
+      <author id="franz-josef-och"><first>Franz Josef</first><last>Och</last></author>
       <author><first>Hans</first><last>Weber</last></author>
       <url hash="4a294a0e">C98-2157</url>
       <bibkey>och-weber-1998-improving</bibkey>
@@ -1188,28 +1188,28 @@
     <paper id="158">
       <title>Recognition of the Coherence Relation between Te-linked Clauses</title>
       <author><first>Akira</first><last>Oishi</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <url hash="121f407a">C98-2158</url>
       <bibkey>oishi-matsumoto-1998-recognition</bibkey>
     </paper>
     <paper id="159">
       <title>On the Evaluation and Comparison of Taggers: the Effect of Noise in Testing Corpora.</title>
-      <author><first>Lluis</first><last>Padro</last></author>
-      <author><first>Lluis</first><last>Marquez</last></author>
+      <author id="lluis-padro"><first>Lluis</first><last>Padro</last></author>
+      <author id="lluis-marquez"><first>Lluis</first><last>Marquez</last></author>
       <url hash="ea73cd3a">C98-2159</url>
       <bibkey>padro-marquez-1998-evaluation</bibkey>
     </paper>
     <paper id="160">
       <title>Learning Intonation Rules for Concept to Speech Generation</title>
       <author><first>Shimei</first><last>Pan</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <url hash="f678a766">C98-2160</url>
       <bibkey>pan-mckeown-1998-learning</bibkey>
     </paper>
     <paper id="161">
       <title>Possessive Pronominal Anaphor Resolution in <fixed-case>P</fixed-case>ortuguese Written Texts</title>
-      <author><first>Ivandre</first><last>Paraboni</last></author>
-      <author><first>Vera Lucia Strube</first><last>de Lima</last></author>
+      <author id="ivandre-paraboni"><first>Ivandre</first><last>Paraboni</last></author>
+      <author id="vera-lucia-strube-de-lima"><first>Vera Lucia Strube</first><last>de Lima</last></author>
       <url hash="4e0cb97e">C98-2161</url>
       <bibkey>paraboni-de-lima-1998-possessive</bibkey>
     </paper>
@@ -1218,7 +1218,7 @@
       <author><first>Junsik</first><last>Park</last></author>
       <author><first>Jung-Goo</first><last>Kang</last></author>
       <author><first>Wook</first><last>Hur</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <url hash="234e85ea">C98-2162</url>
       <bibkey>park-etal-1998-machine</bibkey>
     </paper>
@@ -1247,13 +1247,13 @@
       <author><first>Hannes</first><last>Pirker</last></author>
       <author><first>Georg</first><last>Niklfeld</last></author>
       <author><first>Johannes</first><last>Matiasek</last></author>
-      <author><first>Harald</first><last>Trost</last></author>
+      <author id="harald-trost"><first>Harald</first><last>Trost</last></author>
       <url hash="7cc8253a">C98-2166</url>
       <bibkey>pirker-etal-1998-information</bibkey>
     </paper>
     <paper id="167">
       <title>Reference Resolution beyond Coreference: a Conceptual Frame and its Application</title>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <author><first>Isabelle</first><last>Robba</last></author>
       <author><first>Gerard</first><last>Sabah</last></author>
       <url hash="bea41710">C98-2167</url>
@@ -1262,14 +1262,14 @@
     <paper id="168">
       <title>Multilingual authoring using feedback texts</title>
       <author><first>Richard</first><last>Power</last></author>
-      <author><first>Donia</first><last>Scott</last></author>
+      <author id="donia-scott"><first>Donia</first><last>Scott</last></author>
       <url hash="b2e1e0fb">C98-2168</url>
       <bibkey>power-scott-1998-multilingual</bibkey>
     </paper>
     <paper id="169">
       <title>Practical Glossing by Prioritised Tiling</title>
       <author><first>Victor</first><last>Poznanski</last></author>
-      <author><first>Pete</first><last>Whitelock</last></author>
+      <author id="pete-whitelock"><first>Pete</first><last>Whitelock</last></author>
       <author><first>Jan</first><last>IJdens</last></author>
       <author><first>Steffan</first><last>Corley</last></author>
       <url hash="33b8372f">C98-2169</url>
@@ -1277,19 +1277,19 @@
     </paper>
     <paper id="170">
       <title>An Intelligent Multi-Dictionary Environment</title>
-      <author><first>Gabor</first><last>Proszeky</last></author>
+      <author id="gabor-proszeky"><first>Gabor</first><last>Proszeky</last></author>
       <url hash="9e2bf546">C98-2170</url>
       <bibkey>proszeky-1998-intelligent</bibkey>
     </paper>
     <paper id="171">
       <title>Learning Correlations between Linguistic Indicators and Semantic Constraints: Reuse of Context-Dependent Descriptions of Entities</title>
-      <author><first>Dragomir R.</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir R.</first><last>Radev</last></author>
       <url hash="e6d3cdd1">C98-2171</url>
       <bibkey>radev-1998-learning</bibkey>
     </paper>
     <paper id="172">
       <title>Statistical Models for Unsupervised Prepositional Phrase Attachment</title>
-      <author><first>Adwait</first><last>Ratnaparkhi</last></author>
+      <author id="adwait-ratnaparkhi"><first>Adwait</first><last>Ratnaparkhi</last></author>
       <url hash="dbd01cde">C98-2172</url>
       <bibkey>ratnaparkhi-1998-statistical</bibkey>
     </paper>
@@ -1302,24 +1302,24 @@
     </paper>
     <paper id="174">
       <title>Generating the Structure of Argument</title>
-      <author><first>Chris</first><last>Reed</last></author>
-      <author><first>Derek</first><last>Long</last></author>
+      <author id="chris-reed"><first>Chris</first><last>Reed</last></author>
+      <author id="derek-long"><first>Derek</first><last>Long</last></author>
       <url hash="f7434be4">C98-2174</url>
       <bibkey>reed-long-1998-generating</bibkey>
     </paper>
     <paper id="175">
       <title><fixed-case>M</fixed-case>ind<fixed-case>N</fixed-case>et: acquiring and structuring semantic information from text</title>
       <author><first>Stephen D.</first><last>Richardson</last></author>
-      <author><first>William B.</first><last>Dolan</last></author>
+      <author id="william-b-dolan"><first>William B.</first><last>Dolan</last></author>
       <author><first>Lucy</first><last>Vanderwende</last></author>
       <url hash="b49c5e6b">C98-2175</url>
       <bibkey>richardson-etal-1998-mindnet</bibkey>
     </paper>
     <paper id="176">
       <title>Building Accurate Semantic Taxonomies Monolingual <fixed-case>MRD</fixed-case>s</title>
-      <author><first>German</first><last>Rigau</last></author>
-      <author><first>Horacio</first><last>Rodriguez</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
+      <author id="horacio-rodriguez"><first>Horacio</first><last>Rodriguez</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <url hash="a77bcd5a">C98-2176</url>
       <bibkey>rigau-etal-1998-building</bibkey>
     </paper>
@@ -1339,14 +1339,14 @@
     <paper id="179">
       <title>How Verb Subcategorization Frequencies Are Affected By Corpus Choice</title>
       <author><first>Douglas</first><last>Roland</last></author>
-      <author><first>Daniel</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Daniel</first><last>Jurafsky</last></author>
       <url hash="91e85412">C98-2179</url>
       <bibkey>roland-jurafsky-1998-verb</bibkey>
     </paper>
     <paper id="180">
       <title>An Interactive Domain Independent Approach to Robust Dialogue Interpretation</title>
-      <author><first>Carolyn Penstein</first><last>Rose</last></author>
-      <author><first>Lori S.</first><last>Levin</last></author>
+      <author id="carolyn-rose"><first>Carolyn Penstein</first><last>Rose</last></author>
+      <author id="lori-levin"><first>Lori S.</first><last>Levin</last></author>
       <url hash="c709dc6f">C98-2180</url>
       <bibkey>rose-levin-1998-interactive</bibkey>
     </paper>
@@ -1359,15 +1359,15 @@
     </paper>
     <paper id="182">
       <title>A <fixed-case>G</fixed-case>enerative <fixed-case>L</fixed-case>exicon Perspective for Adjectival Modification</title>
-      <author><first>Patrick</first><last>Saint-Dizier</last></author>
+      <author id="patrick-saint-dizier"><first>Patrick</first><last>Saint-Dizier</last></author>
       <url hash="a24569a6">C98-2182</url>
       <bibkey>saint-dizier-1998-generative</bibkey>
     </paper>
     <paper id="183">
       <title>Dialogue Act Tagging with Transformation-Based Learning</title>
-      <author><first>Ken</first><last>Samuel</last></author>
-      <author><first>Sandra</first><last>Carberry</last></author>
-      <author><first>K.</first><last>Vijay-Shanker</last></author>
+      <author id="ken-samuel"><first>Ken</first><last>Samuel</last></author>
+      <author id="sandra-carberry"><first>Sandra</first><last>Carberry</last></author>
+      <author id="k-vijay-shanker"><first>K.</first><last>Vijay-Shanker</last></author>
       <url hash="85188c54">C98-2183</url>
       <bibkey>samuel-etal-1998-dialogue</bibkey>
     </paper>
@@ -1425,15 +1425,15 @@
     <paper id="191">
       <title>Recognizing Syntactic Errors in the Writing of Second Language Learners</title>
       <author><first>David</first><last>Schneider</last></author>
-      <author><first>Kathleen F.</first><last>McCoy</last></author>
+      <author id="kathleen-f-mccoy"><first>Kathleen F.</first><last>McCoy</last></author>
       <url hash="0008b0ee">C98-2191</url>
       <bibkey>schneider-mccoy-1998-recognizing</bibkey>
     </paper>
     <paper id="192">
       <title>Transforming Lattices into Non-deterministic Automata with Optional Null Arcs</title>
       <author><first>Mark</first><last>Seligman</last></author>
-      <author><first>Christian</first><last>Boitet</last></author>
-      <author><first>Boubaker</first><last>Meddeb-Hamrouni</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
+      <author id="boubaker-meddeb-hamrouni"><first>Boubaker</first><last>Meddeb-Hamrouni</last></author>
       <url hash="95b7ed8f">C98-2192</url>
       <bibkey>seligman-etal-1998-transforming</bibkey>
     </paper>
@@ -1451,14 +1451,14 @@
     </paper>
     <paper id="195">
       <title>Similarity metrics for aligning children’s articulation data</title>
-      <author><first>Harold L.</first><last>Somers</last></author>
+      <author id="harold-somers"><first>Harold L.</first><last>Somers</last></author>
       <url hash="ee2efa44">C98-2195</url>
       <bibkey>somers-1998-similarity</bibkey>
     </paper>
     <paper id="196">
       <title>A Connectionist Approach to Prepositional Phrase Attachment for Real World Texts</title>
       <author><first>Josep M.</first><last>Sopena</last></author>
-      <author><first>Agusti</first><last>LLoberas</last></author>
+      <author id="agusti-lloberas"><first>Agusti</first><last>LLoberas</last></author>
       <author><first>Joan L.</first><last>Moliner</last></author>
       <url hash="575c5ab8">C98-2196</url>
       <bibkey>sopena-etal-1998-connectionist</bibkey>
@@ -1485,9 +1485,9 @@
     </paper>
     <paper id="200">
       <title>Summarization-based Query Expansion in Information Retrieval</title>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
       <author><first>Jin</first><last>Wang</last></author>
-      <author><first>Bowden</first><last>Wise</last></author>
+      <author id="g-bowden-wise"><first>Bowden</first><last>Wise</last></author>
       <url hash="7ec30f97">C98-2200</url>
       <bibkey>strzalkowski-etal-1998-summarization</bibkey>
     </paper>
@@ -1495,7 +1495,7 @@
       <title><fixed-case>C</fixed-case>hinese Word Segmentation without Using Lexicon and Hand-crafted Training Data</title>
       <author><last>Sun</last><first>Maosong</first></author>
       <author><last>Shen</last><first>Dayang</first></author>
-      <author><first>Benjamin K.</first><last>Tsou</last></author>
+      <author id="benjamin-k-tsou"><first>Benjamin K.</first><last>Tsou</last></author>
       <url hash="b64a1df8">C98-2201</url>
       <bibkey>sun-etal-1998-chinese</bibkey>
     </paper>
@@ -1509,15 +1509,15 @@
     </paper>
     <paper id="203">
       <title>Tagging <fixed-case>E</fixed-case>nglish by Path Voting Constraints</title>
-      <author><first>Gokhan</first><last>Tur</last></author>
+      <author id="gokhan-tur"><first>Gokhan</first><last>Tur</last></author>
       <author><first>Kemal</first><last>Oflazer</last></author>
       <url hash="abd97694">C98-2203</url>
       <bibkey>tur-oflazer-1998-tagging</bibkey>
     </paper>
     <paper id="204">
       <title>Reactive Content Selection in the Generation of Real-time Soccer Commentary</title>
-      <author><first>Kumiko</first><last>Tanaka-Ishii</last></author>
-      <author><first>Koiti</first><last>Hasida</last></author>
+      <author id="kumiko-tanaka-ishii"><first>Kumiko</first><last>Tanaka-Ishii</last></author>
+      <author id="koiti-hasida"><first>Koiti</first><last>Hasida</last></author>
       <author><first>Itsuki</first><last>Noda</last></author>
       <url hash="73556f99">C98-2204</url>
       <bibkey>tanaka-ishii-etal-1998-reactive</bibkey>
@@ -1526,7 +1526,7 @@
       <title>Idiomatic object usage and support verbs</title>
       <author><first>Pasi</first><last>Tapanainen</last></author>
       <author><first>Jussi</first><last>Piitulainen</last></author>
-      <author><first>Timo</first><last>Jarvinen</last></author>
+      <author id="timo-jarvinen"><first>Timo</first><last>Jarvinen</last></author>
       <url hash="d4dbb682">C98-2205</url>
       <bibkey>tapanainen-etal-1998-idiomatic</bibkey>
     </paper>
@@ -1554,7 +1554,7 @@
       <title>General-to-Specific Model Selection for Subcategorization Preference</title>
       <author><first>Takehito</first><last>Utsuro</last></author>
       <author><first>Takashi</first><last>Miyata</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <url hash="4ddd019a">C98-2209</url>
       <bibkey>utsuro-etal-1998-general</bibkey>
     </paper>
@@ -1568,20 +1568,20 @@
       <title>The Computational Lexical Semantics of Syntagmatic Relations</title>
       <author><first>Evelyne</first><last>Viegas</last></author>
       <author><first>Stephen</first><last>Beale</last></author>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <url hash="e13c945a">C98-2211</url>
       <bibkey>viegas-etal-1998-computational</bibkey>
     </paper>
     <paper id="212">
       <title>A tabular interpretation of a class of 2-Stack Automata</title>
-      <author><first>Eric</first><last>Villemonte de la Clergerie</last></author>
-      <author><first>Miguel</first><last>Alonso Pardo</last></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Eric</first><last>Villemonte de la Clergerie</last></author>
+      <author id="miguel-a-alonso"><first>Miguel</first><last>Alonso Pardo</last></author>
       <url hash="9c5722c2">C98-2212</url>
       <bibkey>villemonte-de-la-clergerie-alonso-pardo-1998-tabular</bibkey>
     </paper>
     <paper id="213">
       <title>Project for production of closed-caption <fixed-case>TV</fixed-case> programs for the hearing impaired</title>
-      <author><first>Takahiro</first><last>Wakao</last></author>
+      <author id="takahiro-wakao"><first>Takahiro</first><last>Wakao</last></author>
       <author><first>Eiji</first><last>Sawamura</last></author>
       <author><first>Terumasa</first><last>Ehara</last></author>
       <author><first>Ichiro</first><last>Maruyama</last></author>
@@ -1590,23 +1590,23 @@
     </paper>
     <paper id="214">
       <title>Learning Optimal Dialogue Strategies: A Case Study of a Spoken Dialogue Agent for Email</title>
-      <author><first>Marilyn A.</first><last>Walker</last></author>
+      <author id="marilyn-walker"><first>Marilyn A.</first><last>Walker</last></author>
       <author><first>Jeanne C.</first><last>Fromer</last></author>
-      <author><first>Shrikanth</first><last>Narayanan</last></author>
+      <author id="shrikanth-narayanan"><first>Shrikanth</first><last>Narayanan</last></author>
       <url hash="632d9305">C98-2214</url>
       <bibkey>walker-etal-1998-learning</bibkey>
     </paper>
     <paper id="215">
       <title>Automatic <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>hinese name transliteration for development of multilingual resources</title>
       <author><first>Stephen</first><last>Wan</last></author>
-      <author><first>Cornelia Maria</first><last>Verspoor</last></author>
+      <author id="karin-verspoor"><first>Cornelia Maria</first><last>Verspoor</last></author>
       <url hash="bedb9188">C98-2215</url>
       <bibkey>wan-verspoor-1998-automatic</bibkey>
     </paper>
     <paper id="216">
       <title>Modeling with Structures in Statistical Machine Translation</title>
       <author><first>Ye-Yi</first><last>Wang</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <url hash="f0e2ab11">C98-2216</url>
       <bibkey>wang-waibel-1998-modeling</bibkey>
     </paper>
@@ -1626,7 +1626,7 @@
     <paper id="219">
       <title>Diagram Understanding Using Integration of Layout Information and Textual Information</title>
       <author><first>Yasuhiko</first><last>Watanabe</last></author>
-      <author><first>Makoto</first><last>Nagao</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
       <url hash="f2803112">C98-2219</url>
       <bibkey>watanabe-nagao-1998-diagram</bibkey>
     </paper>
@@ -1635,33 +1635,33 @@
       <author><first>Yasuhiko</first><last>Watanabe</last></author>
       <author><first>Yoshihiro</first><last>Okada</last></author>
       <author><first>Kengo</first><last>Kaneji</last></author>
-      <author><first>Makoto</first><last>Nagao</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
       <url hash="9a0cf3b1">C98-2220</url>
       <bibkey>watanabe-etal-1998-aligning</bibkey>
     </paper>
     <paper id="221">
       <title>Translating Idioms</title>
-      <author><first>Eric</first><last>Wehrli</last></author>
+      <author id="eric-wehrli"><first>Eric</first><last>Wehrli</last></author>
       <url hash="bb0472ea">C98-2221</url>
       <bibkey>wehrli-1998-translating</bibkey>
     </paper>
     <paper id="222">
       <title>Head-Driven Generation with <fixed-case>HPSG</fixed-case></title>
-      <author><first>Graham</first><last>Wilcock</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="graham-wilcock"><first>Graham</first><last>Wilcock</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <url hash="99f24675">C98-2222</url>
       <bibkey>wilcock-matsumoto-1998-head</bibkey>
     </paper>
     <paper id="223">
       <title>Word Sense Disambiguation using Optimised Combinations of Knowledge Sources</title>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <author><first>Mark</first><last>Stevenson</last></author>
       <url hash="92f9f197">C98-2223</url>
       <bibkey>wilks-stevenson-1998-word</bibkey>
     </paper>
     <paper id="224">
       <title>A Model for Robust Processing of Spontaneous Speech by Integrating Viable Fragments</title>
-      <author><first>Karsten L.</first><last>Worm</last></author>
+      <author id="karsten-l-worm"><first>Karsten L.</first><last>Worm</last></author>
       <url hash="10ce4e77">C98-2224</url>
       <bibkey>worm-1998-model</bibkey>
     </paper>
@@ -1693,20 +1693,20 @@
     <paper id="228">
       <title>Feasibility Study for Ellipsis Resolution in Dialogues by Machine-Learning Technique</title>
       <author><last>Yamamoto</last><first>Kazuhide</first></author>
-      <author><last>Sumita</last><first>Eiichiro</first></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <url hash="452b946c">C98-2228</url>
       <bibkey>yamamoto-sumita-1998-feasibility</bibkey>
     </paper>
     <paper id="229">
       <title>Some Properties of Preposition and Subordinate Conjunction Attachments</title>
-      <author><first>Alexander S.</first><last>Yeh</last></author>
-      <author><first>Marc B.</first><last>Vilain</last></author>
+      <author id="alexander-yeh"><first>Alexander S.</first><last>Yeh</last></author>
+      <author id="marc-vilain"><first>Marc B.</first><last>Vilain</last></author>
       <url hash="91c1b936">C98-2229</url>
       <bibkey>yeh-vilain-1998-properties</bibkey>
     </paper>
     <paper id="230">
       <title>Evaluation of Importance of Sentences based on Connectivity to Title</title>
-      <author><first>Takehiko</first><last>Yoshimi</last></author>
+      <author id="takehiko-yoshimi"><first>Takehiko</first><last>Yoshimi</last></author>
       <author><first>Toshiyuki</first><last>Okunishi</last></author>
       <author><first>Takahiro</first><last>Yamaji</last></author>
       <author><first>Yoji</first><last>Fukumochi</last></author>
@@ -1722,7 +1722,7 @@
     <paper id="232">
       <title>Using Chunk Based Partial Parsing of Spontaneous Speech in Unrestricted Domains for Reducing Word Error Rate in Speech Recognition</title>
       <author><first>Klaus</first><last>Zechner</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <url hash="58ad8043">C98-2232</url>
       <bibkey>zechner-waibel-1998-using</bibkey>
     </paper>
@@ -1734,14 +1734,14 @@
     </paper>
     <paper id="234">
       <title>Word Association and <fixed-case>MI</fixed-case>-Trigger-based Language Modeling</title>
-      <author><first>GuoDong</first><last>Zhou</last></author>
-      <author><first>KimTeng</first><last>Lua</last></author>
+      <author id="guodong-zhou"><first>GuoDong</first><last>Zhou</last></author>
+      <author id="kim-teng-lua"><first>KimTeng</first><last>Lua</last></author>
       <url hash="e59f7e3f">C98-2234</url>
       <bibkey>zhou-lua-1998-word</bibkey>
     </paper>
     <paper id="235">
       <title>Discovering Phonotactic Finite-State Automata by Genetic Search</title>
-      <author><first>Anja</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anja</first><last>Belz</last></author>
       <url hash="6fd1eb6c">C98-2235</url>
       <bibkey>belz-1998-discovering</bibkey>
     </paper>
@@ -1772,7 +1772,7 @@
     </paper>
     <paper id="240">
       <title>Bridging the Gap between Dictionary and Thesaurus</title>
-      <author><first>Oi Yee</first><last>Kwong</last></author>
+      <author id="olivia-o-y-kwong"><first>Oi Yee</first><last>Kwong</last></author>
       <url hash="4b3aaa91">C98-2240</url>
       <bibkey>kwong-1998-bridging</bibkey>
     </paper>
@@ -1784,7 +1784,7 @@
     </paper>
     <paper id="242">
       <title>Detecting Verbal Participation in Diathesis Alternations</title>
-      <author><first>Diana</first><last>McCarthy</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
       <author><first>Anna</first><last>Korhonen</last></author>
       <url hash="6c8bb98a">C98-2242</url>
       <bibkey>mccarthy-korhonen-1998-detecting</bibkey>
diff --git a/data/xml/D07.xml b/data/xml/D07.xml
index f6e784e4fd..a77d37c63f 100644
--- a/data/xml/D07.xml
+++ b/data/xml/D07.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the 2007 Joint Conference on Empirical Methods in Natural Language Processing and Computational Natural Language Learning (<fixed-case>EMNLP</fixed-case>-<fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>)</booktitle>
       <url hash="42b5b355">D07-1</url>
-      <editor><first>Jason</first><last>Eisner</last></editor>
+      <editor id="jason-eisner"><first>Jason</first><last>Eisner</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Prague, Czech Republic</address>
       <month>June</month>
@@ -35,7 +35,7 @@
     <paper id="3">
       <title>What is the <fixed-case>J</fixed-case>eopardy Model? A Quasi-Synchronous Grammar for <fixed-case>QA</fixed-case></title>
       <author><first>Mengqiu</first><last>Wang</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <author><first>Teruko</first><last>Mitamura</last></author>
       <pages>22–32</pages>
       <url hash="61f31b39">D07-1003</url>
@@ -46,7 +46,7 @@
       <author><first>Youzheng</first><last>Wu</last></author>
       <author><first>Ruiqiang</first><last>Zhang</last></author>
       <author><first>Xinhui</first><last>Hu</last></author>
-      <author><first>Hideki</first><last>Kashioka</last></author>
+      <author id="hideki-kashioka"><first>Hideki</first><last>Kashioka</last></author>
       <pages>33–41</pages>
       <url hash="f4962410">D07-1004</url>
       <bibkey>wu-etal-2007-learning</bibkey>
@@ -54,7 +54,7 @@
     <paper id="5">
       <title>Improving Word Alignment with Bridge Languages</title>
       <author><first>Shankar</first><last>Kumar</last></author>
-      <author><first>Franz J.</first><last>Och</last></author>
+      <author id="franz-josef-och"><first>Franz J.</first><last>Och</last></author>
       <author><first>Wolfgang</first><last>Macherey</last></author>
       <pages>42–50</pages>
       <url hash="af3d54bd">D07-1005</url>
@@ -62,7 +62,7 @@
     </paper>
     <paper id="6">
       <title>Getting the Structure Right for Word Alignment: <fixed-case>LEAF</fixed-case></title>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <author><first>Daniel</first><last>Marcu</last></author>
       <pages>51–60</pages>
       <url hash="4d574d04">D07-1006</url>
@@ -78,7 +78,7 @@
     </paper>
     <paper id="8">
       <title>Large Margin Synchronous Generation and its Application to Sentence Compression</title>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <author><first>Mirella</first><last>Lapata</last></author>
       <pages>73–82</pages>
       <url hash="a3cef414">D07-1008</url>
@@ -95,8 +95,8 @@
     </paper>
     <paper id="10">
       <title>Automatically Identifying the Arguments of Discourse Connectives</title>
-      <author><first>Ben</first><last>Wellner</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="ben-wellner"><first>Ben</first><last>Wellner</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <pages>92–101</pages>
       <url hash="8ca91a0f">D07-1010</url>
       <bibkey>wellner-pustejovsky-2007-automatically</bibkey>
@@ -104,7 +104,7 @@
     <paper id="11">
       <title>Incremental Generation of Plural Descriptions: Similarity and Partitioning</title>
       <author><first>Albert</first><last>Gatt</last></author>
-      <author><first>Kees</first><last>van Deemter</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
       <pages>102–111</pages>
       <url hash="f2b986a1">D07-1011</url>
       <bibkey>gatt-van-deemter-2007-incremental</bibkey>
@@ -113,7 +113,7 @@
       <title>A Comparative Evaluation of Deep and Shallow Approaches to the Automatic Detection of Common Grammatical Errors</title>
       <author><first>Joachim</first><last>Wagner</last></author>
       <author><first>Jennifer</first><last>Foster</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>112–121</pages>
       <url hash="b0166ada">D07-1012</url>
       <bibkey>wagner-etal-2007-comparative</bibkey>
@@ -128,8 +128,8 @@
     </paper>
     <paper id="14">
       <title>Probabilistic Models of Nonprojective Dependency Trees</title>
-      <author><first>David A.</first><last>Smith</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="david-a-smith"><first>David A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>132–140</pages>
       <url hash="049ef69f">D07-1014</url>
       <bibkey>smith-smith-2007-probabilistic</bibkey>
@@ -139,14 +139,14 @@
       <author><first>Terry</first><last>Koo</last></author>
       <author><first>Amir</first><last>Globerson</last></author>
       <author><first>Xavier</first><last>Carreras</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <pages>141–150</pages>
       <url hash="bb2cbc6e">D07-1015</url>
       <bibkey>koo-etal-2007-structured</bibkey>
     </paper>
     <paper id="16">
       <title>Using Foreign Inclusion Detection to Improve Parsing Performance</title>
-      <author><first>Beatrice</first><last>Alex</last></author>
+      <author id="beatrice-alex"><first>Beatrice</first><last>Alex</last></author>
       <author><first>Amit</first><last>Dubey</last></author>
       <author><first>Frank</first><last>Keller</last></author>
       <pages>151–160</pages>
@@ -157,15 +157,15 @@
       <title><fixed-case>LEDIR</fixed-case>: An Unsupervised Algorithm for Learning Directionality of Inference Rules</title>
       <author><first>Rahul</first><last>Bhagat</last></author>
       <author><first>Patrick</first><last>Pantel</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>161–170</pages>
       <url hash="1a0901dc">D07-1017</url>
       <bibkey>bhagat-etal-2007-ledir</bibkey>
     </paper>
     <paper id="18">
       <title>Modelling Polysemy in Adjective Classes by Multi-Label Classification</title>
-      <author><first>Gemma</first><last>Boleda</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="gemma-boleda"><first>Gemma</first><last>Boleda</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <author><first>Toni</first><last>Badia</last></author>
       <pages>171–180</pages>
       <url hash="d1e996c6">D07-1018</url>
@@ -183,14 +183,14 @@
     <paper id="20">
       <title>Towards Robust Unsupervised Personal Name Disambiguation</title>
       <author><first>Ying</first><last>Chen</last></author>
-      <author><first>James</first><last>Martin</last></author>
+      <author id="james-h-martin"><first>James</first><last>Martin</last></author>
       <pages>190–198</pages>
       <url hash="d5a6e324">D07-1020</url>
       <bibkey>chen-martin-2007-towards</bibkey>
     </paper>
     <paper id="21">
       <title>Compressing Trigram Language Models With <fixed-case>G</fixed-case>olomb Coding</title>
-      <author><first>Kenneth</first><last>Church</last></author>
+      <author id="kenneth-church"><first>Kenneth</first><last>Church</last></author>
       <author><first>Ted</first><last>Hart</last></author>
       <author><first>Jianfeng</first><last>Gao</last></author>
       <pages>199–207</pages>
@@ -199,8 +199,8 @@
     </paper>
     <paper id="22">
       <title>Joint Morphological and Syntactic Disambiguation</title>
-      <author><first>Shay B.</first><last>Cohen</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>208–217</pages>
       <url hash="f85e8972">D07-1022</url>
       <bibkey>cohen-smith-2007-joint</bibkey>
@@ -215,16 +215,16 @@
     </paper>
     <paper id="24">
       <title>Semi-Supervised Classification for Extracting Protein Interaction Sentences using Dependency Parsing</title>
-      <author><first>Güneş</first><last>Erkan</last></author>
-      <author><first>Arzucan</first><last>Özgür</last></author>
-      <author><first>Dragomir R.</first><last>Radev</last></author>
+      <author id="gunes-erkan"><first>Güneş</first><last>Erkan</last></author>
+      <author id="arzucan-ozgur"><first>Arzucan</first><last>Özgür</last></author>
+      <author id="dragomir-radev"><first>Dragomir R.</first><last>Radev</last></author>
       <pages>228–237</pages>
       <url hash="55bc1649">D07-1024</url>
       <bibkey>erkan-etal-2007-semi</bibkey>
     </paper>
     <paper id="25">
       <title>A Sequence Alignment Model Based on the Averaged Perceptron</title>
-      <author><first>Dayne</first><last>Freitag</last></author>
+      <author id="dayne-freitag"><first>Dayne</first><last>Freitag</last></author>
       <author><first>Shahram</first><last>Khadivi</last></author>
       <pages>238–247</pages>
       <url hash="1f683a3d">D07-1025</url>
@@ -232,17 +232,17 @@
     </paper>
     <paper id="26">
       <title>Instance Based Lexical Entailment for Ontology Population</title>
-      <author><first>Claudio</first><last>Giuliano</last></author>
-      <author><first>Alfio</first><last>Gliozzo</last></author>
+      <author id="claudio-giuliano"><first>Claudio</first><last>Giuliano</last></author>
+      <author id="alfio-gliozzo"><first>Alfio</first><last>Gliozzo</last></author>
       <pages>248–256</pages>
       <url hash="74dfe1f4">D07-1026</url>
       <bibkey>giuliano-gliozzo-2007-instance</bibkey>
     </paper>
     <paper id="27">
       <title>Recovering Non-Local Dependencies for <fixed-case>C</fixed-case>hinese</title>
-      <author><first>Yuqing</first><last>Guo</last></author>
+      <author id="yuqing-guo"><first>Yuqing</first><last>Guo</last></author>
       <author><first>Haifeng</first><last>Wang</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>257–266</pages>
       <url hash="8f568af2">D07-1027</url>
       <bibkey>guo-etal-2007-recovering</bibkey>
@@ -252,7 +252,7 @@
       <author><first>Deirdre</first><last>Hogan</last></author>
       <author><first>Conor</first><last>Cafferkey</last></author>
       <author><first>Aoife</first><last>Cahill</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>267–276</pages>
       <url hash="13025cfc">D07-1028</url>
       <bibkey>hogan-etal-2007-exploiting</bibkey>
@@ -291,7 +291,7 @@
     </paper>
     <paper id="33">
       <title>A New Perceptron Algorithm for Sequence Labeling with Non-Local Features</title>
-      <author><first>Jun’ichi</first><last>Kazama</last></author>
+      <author id="junichi-kazama"><first>Jun’ichi</first><last>Kazama</last></author>
       <author><first>Kentaro</first><last>Torisawa</last></author>
       <pages>315–324</pages>
       <url hash="1a7b698f">D07-1033</url>
@@ -299,8 +299,8 @@
     </paper>
     <paper id="34">
       <title>Extending a Thesaurus in the Pan-<fixed-case>C</fixed-case>hinese Context</title>
-      <author><first>Oi Yee</first><last>Kwong</last></author>
-      <author><first>Benjamin K.</first><last>Tsou</last></author>
+      <author id="olivia-o-y-kwong"><first>Oi Yee</first><last>Kwong</last></author>
+      <author id="benjamin-k-tsou"><first>Benjamin K.</first><last>Tsou</last></author>
       <pages>325–333</pages>
       <url hash="ddf5ea23">D07-1034</url>
       <bibkey>kwong-tsou-2007-extending</bibkey>
@@ -309,7 +309,7 @@
       <title>Low-Quality Product Review Detection in Opinion Summarization</title>
       <author><first>Jingjing</first><last>Liu</last></author>
       <author><first>Yunbo</first><last>Cao</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <author><first>Yalou</first><last>Huang</last></author>
       <author><first>Ming</first><last>Zhou</last></author>
       <pages>334–342</pages>
@@ -318,7 +318,7 @@
     </paper>
     <paper id="36">
       <title>Improving Statistical Machine Translation Performance by Training Data Selection and Optimization</title>
-      <author><first>Yajuan</first><last>Lü</last></author>
+      <author id="yajuan-lu"><first>Yajuan</first><last>Lü</last></author>
       <author><first>Jin</first><last>Huang</last></author>
       <author><first>Qun</first><last>Liu</last></author>
       <pages>343–350</pages>
@@ -327,8 +327,8 @@
     </paper>
     <paper id="37">
       <title>Topic Segmentation with Hybrid Document Indexing</title>
-      <author><first>Irina</first><last>Matveeva</last></author>
-      <author><first>Gina-Anne</first><last>Levow</last></author>
+      <author id="irina-matveeva"><first>Irina</first><last>Matveeva</last></author>
+      <author id="gina-anne-levow"><first>Gina-Anne</first><last>Levow</last></author>
       <pages>351–359</pages>
       <url hash="c29a668f">D07-1037</url>
       <bibkey>matveeva-levow-2007-topic</bibkey>
@@ -343,16 +343,16 @@
     </paper>
     <paper id="39">
       <title>Detecting Compositionality of Verb-Object Combinations using Selectional Preferences</title>
-      <author><first>Diana</first><last>McCarthy</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
       <author><first>Sriram</first><last>Venkatapathy</last></author>
-      <author><first>Aravind</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
       <pages>369–379</pages>
       <url hash="07d6bc5a">D07-1039</url>
       <bibkey>mccarthy-etal-2007-detecting</bibkey>
     </paper>
     <paper id="40">
       <title>Explorations in Automatic Book Summarization</title>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <author><first>Hakan</first><last>Ceylan</last></author>
       <pages>380–389</pages>
       <url hash="d2e0a2b2">D07-1040</url>
@@ -368,8 +368,8 @@
     </paper>
     <paper id="42">
       <title>Flexible, Corpus-Based Modelling of Human Plausibility Judgements</title>
-      <author><first>Sebastian</first><last>Padó</last></author>
-      <author><first>Ulrike</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
+      <author id="ulrike-pado"><first>Ulrike</first><last>Padó</last></author>
       <author><first>Katrin</first><last>Erk</last></author>
       <pages>400–409</pages>
       <url hash="c335b13b">D07-1042</url>
@@ -378,7 +378,7 @@
     <paper id="43">
       <title><fixed-case>V</fixed-case>-Measure: A Conditional Entropy-Based External Cluster Evaluation Measure</title>
       <author><first>Andrew</first><last>Rosenberg</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
       <pages>410–420</pages>
       <url hash="ce7c97f8">D07-1043</url>
       <bibkey>rosenberg-hirschberg-2007-v</bibkey>
@@ -394,8 +394,8 @@
     <paper id="45">
       <title>Smooth Bilingual <tex-math>N</tex-math>-Gram Translation</title>
       <author><first>Holger</first><last>Schwenk</last></author>
-      <author><first>Marta</first><last>R. Costa-jussà</last></author>
-      <author><first>Jose A.</first><last>R. Fonollosa</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta</first><last>R. Costa-jussà</last></author>
+      <author id="jose-a-r-fonollosa"><first>Jose A.</first><last>R. Fonollosa</last></author>
       <pages>430–438</pages>
       <url hash="09f67541">D07-1045</url>
       <bibkey>schwenk-etal-2007-smooth</bibkey>
@@ -420,7 +420,7 @@
     <paper id="48">
       <title>Automatic Identification of Important Segments and Expressions for Mining of Business-Oriented Conversations at Contact Centers</title>
       <author><first>Hironori</first><last>Takeuchi</last></author>
-      <author><first>L Venkata</first><last>Subramaniam</last></author>
+      <author id="l-venkata-subramaniam"><first>L Venkata</first><last>Subramaniam</last></author>
       <author><first>Tetsuya</first><last>Nasukawa</last></author>
       <author><first>Shourya</first><last>Roy</last></author>
       <pages>458–467</pages>
@@ -439,7 +439,7 @@
       <title>Word Sense Disambiguation Incorporating Lexical and Structural Semantic Information</title>
       <author><first>Takaaki</first><last>Tanaka</last></author>
       <author><first>Francis</first><last>Bond</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Sanae</first><last>Fujita</last></author>
       <author><first>Chikara</first><last>Hashimoto</last></author>
       <pages>477–485</pages>
@@ -472,8 +472,8 @@
     </paper>
     <paper id="54">
       <title>Bilingual Cluster Based Models for Statistical Machine Translation</title>
-      <author><first>Hirofumi</first><last>Yamamoto</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="hirofumi-yamamoto"><first>Hirofumi</first><last>Yamamoto</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>514–523</pages>
       <url hash="fa51d123">D07-1054</url>
       <bibkey>yamamoto-sumita-2007-bilingual</bibkey>
@@ -481,8 +481,8 @@
     <paper id="55">
       <title>A Systematic Comparison of Training Criteria for Statistical Machine Translation</title>
       <author><first>Richard</first><last>Zens</last></author>
-      <author><first>Saša</first><last>Hasan</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="sasa-hasan"><first>Saša</first><last>Hasan</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>524–532</pages>
       <url hash="68ce926e">D07-1055</url>
       <bibkey>zens-etal-2007-systematic</bibkey>
@@ -516,14 +516,14 @@
       <title>Generating Lexical Analogies Using Dependency Relations</title>
       <author><first>Andy</first><last>Chiu</last></author>
       <author><first>Pascal</first><last>Poupart</last></author>
-      <author><first>Chrysanne</first><last>DiMarco</last></author>
+      <author id="chrysanne-dimarco"><first>Chrysanne</first><last>DiMarco</last></author>
       <pages>561–570</pages>
       <url hash="7431cd86">D07-1059</url>
       <bibkey>chiu-etal-2007-generating</bibkey>
     </paper>
     <paper id="60">
       <title>Cross-Lingual Distributional Profiles of Concepts for Measuring Semantic Distance</title>
-      <author><first>Saif</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
       <author><first>Iryna</first><last>Gurevych</last></author>
       <author><first>Graeme</first><last>Hirst</last></author>
       <author><first>Torsten</first><last>Zesch</last></author>
@@ -551,7 +551,7 @@
       <title><fixed-case>J</fixed-case>apanese Dependency Analysis Using the Ancestor-Descendant Relation</title>
       <author><first>Akihiro</first><last>Tamura</last></author>
       <author><first>Hiroya</first><last>Takamura</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>600–609</pages>
       <url hash="9b2988a6">D07-1063</url>
       <bibkey>tamura-etal-2007-japanese</bibkey>
@@ -567,7 +567,7 @@
     <paper id="65">
       <title>Recovery of Empty Nodes in Parse Structures</title>
       <author><first>Denis</first><last>Filimonov</last></author>
-      <author><first>Mary</first><last>Harper</last></author>
+      <author id="mary-harper"><first>Mary</first><last>Harper</last></author>
       <pages>620–629</pages>
       <url hash="b37e7443">D07-1065</url>
       <bibkey>filimonov-harper-2007-recovery</bibkey>
@@ -575,7 +575,7 @@
     <paper id="66">
       <title>Treebank Annotation Schemes and Parser Evaluation for <fixed-case>G</fixed-case>erman</title>
       <author><first>Ines</first><last>Rehbein</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>630–639</pages>
       <url hash="95e8b6e6">D07-1066</url>
       <bibkey>rehbein-van-genabith-2007-treebank</bibkey>
@@ -585,7 +585,7 @@
       <author><first>Qinfeng</first><last>Shi</last></author>
       <author><first>Yasemin</first><last>Altun</last></author>
       <author><first>Alex</first><last>Smola</last></author>
-      <author><first>S.V.N.</first><last>Vishwanathan</last></author>
+      <author id="s-v-n-vishwanathan"><first>S.V.N.</first><last>Vishwanathan</last></author>
       <pages>640–648</pages>
       <url hash="ca77b324">D07-1067</url>
       <bibkey>shi-etal-2007-semi</bibkey>
@@ -594,7 +594,7 @@
       <title>A Graph-Based Approach to Named Entity Categorization in <fixed-case>W</fixed-case>ikipedia Using Conditional Random Fields</title>
       <author><first>Yotaro</first><last>Watanabe</last></author>
       <author><first>Masayuki</first><last>Asahara</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>649–657</pages>
       <url hash="3b5d4368">D07-1068</url>
       <bibkey>watanabe-etal-2007-graph</bibkey>
@@ -602,10 +602,10 @@
     <paper id="69">
       <title><fixed-case>M</fixed-case>aven<fixed-case>R</fixed-case>ank: Identifying Influential Members of the <fixed-case>US</fixed-case> Senate Using Lexical Centrality</title>
       <author><first>Anthony</first><last>Fader</last></author>
-      <author><first>Dragomir R.</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir R.</first><last>Radev</last></author>
       <author><first>Michael H.</first><last>Crespin</last></author>
       <author><first>Burt L.</first><last>Monroe</last></author>
-      <author><first>Kevin M.</first><last>Quinn</last></author>
+      <author id="kevin-m-quinn"><first>Kevin M.</first><last>Quinn</last></author>
       <author><first>Michael</first><last>Colaresi</last></author>
       <pages>658–666</pages>
       <url hash="0155081d">D07-1069</url>
@@ -613,7 +613,7 @@
     </paper>
     <paper id="70">
       <title>Bootstrapping Feature-Rich Dependency Parsers with Entropic Priors</title>
-      <author><first>David A.</first><last>Smith</last></author>
+      <author id="david-a-smith"><first>David A.</first><last>Smith</last></author>
       <author><first>Jason</first><last>Eisner</last></author>
       <pages>667–677</pages>
       <url hash="793ff171">D07-1070</url>
@@ -621,8 +621,8 @@
     </paper>
     <paper id="71">
       <title>Online Learning of Relaxed <fixed-case>CCG</fixed-case> Grammars for Parsing to Logical Form</title>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <pages>678–687</pages>
       <url hash="89e852de">D07-1071</url>
       <bibkey>zettlemoyer-collins-2007-online</bibkey>
@@ -631,7 +631,7 @@
       <title>The Infinite <fixed-case>PCFG</fixed-case> Using Hierarchical <fixed-case>D</fixed-case>irichlet Processes</title>
       <author><first>Percy</first><last>Liang</last></author>
       <author><first>Slav</first><last>Petrov</last></author>
-      <author><first>Michael</first><last>Jordan</last></author>
+      <author id="michael-i-jordan"><first>Michael</first><last>Jordan</last></author>
       <author><first>Dan</first><last>Klein</last></author>
       <pages>688–697</pages>
       <url hash="dae70883">D07-1072</url>
@@ -639,7 +639,7 @@
     </paper>
     <paper id="73">
       <title>Exploiting <fixed-case>W</fixed-case>ikipedia as External Knowledge for Named Entity Recognition</title>
-      <author><first>Jun’ichi</first><last>Kazama</last></author>
+      <author id="junichi-kazama"><first>Jun’ichi</first><last>Kazama</last></author>
       <author><first>Kentaro</first><last>Torisawa</last></author>
       <pages>698–707</pages>
       <url hash="b83ecb92">D07-1073</url>
@@ -647,7 +647,7 @@
     </paper>
     <paper id="74">
       <title>Large-Scale Named Entity Disambiguation Based on <fixed-case>W</fixed-case>ikipedia Data</title>
-      <author><first>Silviu</first><last>Cucerzan</last></author>
+      <author id="silviu-cucerzan"><first>Silviu</first><last>Cucerzan</last></author>
       <pages>708–716</pages>
       <url hash="60d130c4">D07-1074</url>
       <bibkey>cucerzan-2007-large</bibkey>
@@ -655,17 +655,17 @@
     <paper id="75">
       <title>Effective Information Extraction with Semantic Affinity Patterns and Relevant Regions</title>
       <author><first>Siddharth</first><last>Patwardhan</last></author>
-      <author><first>Ellen</first><last>Riloff</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
       <pages>717–727</pages>
       <url hash="b28e20a8">D07-1075</url>
       <bibkey>patwardhan-riloff-2007-effective</bibkey>
     </paper>
     <paper id="76">
       <title>Tree Kernel-Based Relation Extraction with Context-Sensitive Structured Parse Tree Information</title>
-      <author><first>GuoDong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>GuoDong</first><last>Zhou</last></author>
       <author><first>Min</first><last>Zhang</last></author>
-      <author><first>Dong Hong</first><last>Ji</last></author>
-      <author><first>QiaoMing</first><last>Zhu</last></author>
+      <author id="donghong-ji"><first>Dong Hong</first><last>Ji</last></author>
+      <author id="qiaoming-zhu"><first>QiaoMing</first><last>Zhu</last></author>
       <pages>728–736</pages>
       <url hash="69f01c2c">D07-1076</url>
       <bibkey>zhou-etal-2007-tree</bibkey>
@@ -673,7 +673,7 @@
     <paper id="77">
       <title><fixed-case>C</fixed-case>hinese Syntactic Reordering for Statistical Machine Translation</title>
       <author><first>Chao</first><last>Wang</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <author><first>Philipp</first><last>Koehn</last></author>
       <pages>737–745</pages>
       <url hash="c19d49cc">D07-1077</url>
@@ -719,7 +719,7 @@
     <paper id="82">
       <title>Active Learning for Word Sense Disambiguation with Methods for Addressing the Class Imbalance Problem</title>
       <author><first>Jingbo</first><last>Zhu</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>783–790</pages>
       <url hash="3634dc47">D07-1082</url>
       <bibkey>zhu-hovy-2007-active</bibkey>
@@ -769,16 +769,16 @@
       <title>Extracting Data Records from Unstructured Biomedical Full Text</title>
       <author><first>Donghui</first><last>Feng</last></author>
       <author><first>Gully</first><last>Burns</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>837–846</pages>
       <url hash="bd9483a1">D07-1088</url>
       <bibkey>feng-etal-2007-extracting</bibkey>
     </paper>
     <paper id="89">
       <title>Multiple Alignment of Citation Sentences with Conditional Random Fields and Posterior Decoding</title>
-      <author><first>Ariel</first><last>Schwartz</last></author>
+      <author id="ariel-schwartz"><first>Ariel</first><last>Schwartz</last></author>
       <author><first>Anna</first><last>Divoli</last></author>
-      <author><first>Marti</first><last>Hearst</last></author>
+      <author id="marti-a-hearst"><first>Marti</first><last>Hearst</last></author>
       <pages>847–857</pages>
       <url hash="d002cfe4">D07-1089</url>
       <bibkey>schwartz-etal-2007-multiple</bibkey>
@@ -786,9 +786,9 @@
     <paper id="90">
       <title>Large Language Models in Machine Translation</title>
       <author><first>Thorsten</first><last>Brants</last></author>
-      <author><first>Ashok C.</first><last>Popat</last></author>
+      <author id="ashok-popat"><first>Ashok C.</first><last>Popat</last></author>
       <author><first>Peng</first><last>Xu</last></author>
-      <author><first>Franz J.</first><last>Och</last></author>
+      <author id="franz-josef-och"><first>Franz J.</first><last>Och</last></author>
       <author><first>Jeffrey</first><last>Dean</last></author>
       <pages>858–867</pages>
       <url hash="1c9a0843">D07-1090</url>
@@ -804,7 +804,7 @@
     </paper>
     <paper id="92">
       <title>Translating Unknown Words by Analogical Learning</title>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <author><first>Alexandre</first><last>Patry</last></author>
       <pages>877–886</pages>
       <url hash="e0676536">D07-1092</url>
@@ -812,9 +812,9 @@
     </paper>
     <paper id="93">
       <title>A Probabilistic Approach to Diachronic Phonology</title>
-      <author><first>Alexandre</first><last>Bouchard</last></author>
+      <author id="alexandre-bouchard-cote"><first>Alexandre</first><last>Bouchard</last></author>
       <author><first>Percy</first><last>Liang</last></author>
-      <author><first>Thomas</first><last>Griffiths</last></author>
+      <author id="thomas-l-griffiths"><first>Thomas</first><last>Griffiths</last></author>
       <author><first>Dan</first><last>Klein</last></author>
       <pages>887–896</pages>
       <url hash="fba0489e">D07-1093</url>
@@ -840,7 +840,7 @@
       <title>The <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case> 2007 Shared Task on Dependency Parsing</title>
       <author><first>Joakim</first><last>Nivre</last></author>
       <author><first>Johan</first><last>Hall</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <author><first>Ryan</first><last>McDonald</last></author>
       <author><first>Jens</first><last>Nilsson</last></author>
       <author><first>Sebastian</first><last>Riedel</last></author>
@@ -854,8 +854,8 @@
       <author><first>Johan</first><last>Hall</last></author>
       <author><first>Jens</first><last>Nilsson</last></author>
       <author><first>Joakim</first><last>Nivre</last></author>
-      <author><first>Gülşen</first><last>Eryiǧit</last></author>
-      <author><first>Beáta</first><last>Megyesi</last></author>
+      <author id="gulsen-eryigit"><first>Gülşen</first><last>Eryiǧit</last></author>
+      <author id="beata-megyesi"><first>Beáta</first><last>Megyesi</last></author>
       <author><first>Mattias</first><last>Nilsson</last></author>
       <author><first>Markus</first><last>Saers</last></author>
       <pages>933–939</pages>
@@ -874,7 +874,7 @@
     <paper id="99">
       <title>Fast and Robust Multilingual Dependency Parsing with a Generative Latent Variable Model</title>
       <author><first>Ivan</first><last>Titov</last></author>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <pages>947–951</pages>
       <url hash="16e36429">D07-1099</url>
       <bibkey>titov-henderson-2007-fast</bibkey>
@@ -895,9 +895,9 @@
     </paper>
     <paper id="102">
       <title>Log-Linear Models of Non-Projective Trees, <tex-math>k</tex-math>-best <fixed-case>MST</fixed-case> Parsing and Tree-Ranking</title>
-      <author><first>Keith</first><last>Hall</last></author>
-      <author><first>Jiří</first><last>Havelka</last></author>
-      <author><first>David A.</first><last>Smith</last></author>
+      <author id="keith-hall"><first>Keith</first><last>Hall</last></author>
+      <author id="jiri-havelka"><first>Jiří</first><last>Havelka</last></author>
+      <author id="david-a-smith"><first>David A.</first><last>Smith</last></author>
       <pages>962–966</pages>
       <url hash="1ee7a376">D07-1102</url>
       <bibkey>hall-etal-2007-log</bibkey>
@@ -922,15 +922,15 @@
     <paper id="105">
       <title>An Empirical Study on Computing Consensus Translations from Multiple Machine Translation Systems</title>
       <author><first>Wolfgang</first><last>Macherey</last></author>
-      <author><first>Franz J.</first><last>Och</last></author>
+      <author id="franz-josef-och"><first>Franz J.</first><last>Och</last></author>
       <pages>986–995</pages>
       <url hash="108def0f">D07-1105</url>
       <bibkey>macherey-och-2007-empirical</bibkey>
     </paper>
     <paper id="106">
       <title>Learning to Find <fixed-case>E</fixed-case>nglish to <fixed-case>C</fixed-case>hinese Transliterations on the Web</title>
-      <author><first>Jian-Cheng</first><last>Wu</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jian-cheng-wu"><first>Jian-Cheng</first><last>Wu</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>996–1004</pages>
       <url hash="d8e89a2b">D07-1106</url>
       <bibkey>wu-chang-2007-learning</bibkey>
@@ -939,15 +939,15 @@
       <title>Learning to Merge Word Senses</title>
       <author><first>Rion</first><last>Snow</last></author>
       <author><first>Sushant</first><last>Prakash</last></author>
-      <author><first>Daniel</first><last>Jurafsky</last></author>
-      <author><first>Andrew Y.</first><last>Ng</last></author>
+      <author id="dan-jurafsky"><first>Daniel</first><last>Jurafsky</last></author>
+      <author id="andrew-y-ng"><first>Andrew Y.</first><last>Ng</last></author>
       <pages>1005–1014</pages>
       <url hash="9cf7ed29">D07-1107</url>
       <bibkey>snow-etal-2007-learning</bibkey>
     </paper>
     <paper id="108">
       <title>Improving Word Sense Disambiguation Using Topic Features</title>
-      <author><first>Junfu</first><last>Cai</last></author>
+      <author id="jun-fu-cai"><first>Junfu</first><last>Cai</last></author>
       <author><first>Wee Sun</first><last>Lee</last></author>
       <author><first>Yee Whye</first><last>Teh</last></author>
       <pages>1015–1023</pages>
@@ -957,8 +957,8 @@
     <paper id="109">
       <title>A Topic Model for Word Sense Disambiguation</title>
       <author><first>Jordan</first><last>Boyd-Graber</last></author>
-      <author><first>David</first><last>Blei</last></author>
-      <author><first>Xiaojin</first><last>Zhu</last></author>
+      <author id="david-blei"><first>David</first><last>Blei</last></author>
+      <author id="xiaojin-zhu"><first>Xiaojin</first><last>Zhu</last></author>
       <pages>1024–1033</pages>
       <url hash="abd5ce40">D07-1109</url>
       <bibkey>boyd-graber-etal-2007-topic</bibkey>
@@ -977,7 +977,7 @@
     <paper id="111">
       <title>Dependency Parsing and Domain Adaptation with <fixed-case>LR</fixed-case> Models and Parser Ensembles</title>
       <author><first>Kenji</first><last>Sagae</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>1044–1050</pages>
       <url hash="8ffe3287">D07-1111</url>
       <bibkey>sagae-tsujii-2007-dependency</bibkey>
@@ -986,9 +986,9 @@
       <title>Frustratingly Hard Domain Adaptation for Dependency Parsing</title>
       <author><first>Mark</first><last>Dredze</last></author>
       <author><first>John</first><last>Blitzer</last></author>
-      <author><first>Partha Pratim</first><last>Talukdar</last></author>
+      <author id="partha-talukdar"><first>Partha Pratim</first><last>Talukdar</last></author>
       <author><first>Kuzman</first><last>Ganchev</last></author>
-      <author><first>João</first><last>Graça</last></author>
+      <author id="joao-graca"><first>João</first><last>Graça</last></author>
       <author><first>Fernando</first><last>Pereira</last></author>
       <pages>1051–1055</pages>
       <url hash="1f9c55e9">D07-1112</url>
@@ -997,7 +997,7 @@
     <paper id="113">
       <title><fixed-case>C</fixed-case>rystal: Analyzing Predictive Opinions on the Web</title>
       <author><first>Soo-Min</first><last>Kim</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>1056–1064</pages>
       <url hash="85c67937">D07-1113</url>
       <bibkey>kim-hovy-2007-crystal</bibkey>
@@ -1006,7 +1006,7 @@
       <title>Extracting Aspect-Evaluation and Aspect-Of Relations in Opinion Mining</title>
       <author><first>Nozomi</first><last>Kobayashi</last></author>
       <author><first>Kentaro</first><last>Inui</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>1065–1074</pages>
       <url hash="7464c85f">D07-1114</url>
       <bibkey>kobayashi-etal-2007-extracting</bibkey>
@@ -1023,9 +1023,9 @@
       <title>Determining Case in <fixed-case>A</fixed-case>rabic: Learning Complex Linguistic Behavior Requires Complex Linguistic Features</title>
       <author><first>Nizar</first><last>Habash</last></author>
       <author><first>Ryan</first><last>Gabbard</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <author><first>Seth</first><last>Kulick</last></author>
-      <author><first>Mitch</first><last>Marcus</last></author>
+      <author id="mitch-marcus"><first>Mitch</first><last>Marcus</last></author>
       <pages>1084–1092</pages>
       <url hash="38915e04">D07-1116</url>
       <bibkey>habash-etal-2007-determining</bibkey>
@@ -1033,17 +1033,17 @@
     <paper id="117">
       <title><fixed-case>M</fixed-case>andarin Part-of-Speech Tagging and Discriminative Reranking</title>
       <author><first>Zhongqiang</first><last>Huang</last></author>
-      <author><first>Mary</first><last>Harper</last></author>
-      <author><first>Wen</first><last>Wang</last></author>
+      <author id="mary-harper"><first>Mary</first><last>Harper</last></author>
+      <author id="wen-wang"><first>Wen</first><last>Wang</last></author>
       <pages>1093–1102</pages>
       <url hash="e990b190">D07-1117</url>
       <bibkey>huang-etal-2007-mandarin</bibkey>
     </paper>
     <paper id="118">
       <title>Building Domain-Specific Taggers without Annotated (Domain) Data</title>
-      <author><first>John</first><last>Miller</last></author>
+      <author id="john-miller"><first>John</first><last>Miller</last></author>
       <author><first>Manabu</first><last>Torii</last></author>
-      <author><first>K.</first><last>Vijay-Shanker</last></author>
+      <author id="k-vijay-shanker"><first>K.</first><last>Vijay-Shanker</last></author>
       <pages>1103–1111</pages>
       <url hash="6ae70e4c">D07-1118</url>
       <bibkey>miller-etal-2007-building</bibkey>
@@ -1061,7 +1061,7 @@
     </paper>
     <paper id="120">
       <title>Hybrid Ways to Improve Domain Independence in an <fixed-case>ML</fixed-case> Dependency Parser</title>
-      <author><first>Eckhard</first><last>Bick</last></author>
+      <author id="eckhard-bick"><first>Eckhard</first><last>Bick</last></author>
       <pages>1119–1123</pages>
       <url hash="e652c4fc">D07-1120</url>
       <bibkey>bick-2007-hybrid</bibkey>
@@ -1069,7 +1069,7 @@
     <paper id="121">
       <title>A Constraint Satisfaction Approach to Dependency Parsing</title>
       <author><first>Sander</first><last>Canisius</last></author>
-      <author><first>Erik</first><last>Tjong Kim Sang</last></author>
+      <author id="erik-tjong-kim-sang"><first>Erik</first><last>Tjong Kim Sang</last></author>
       <pages>1124–1128</pages>
       <url hash="e51e0d13">D07-1121</url>
       <bibkey>canisius-tjong-kim-sang-2007-constraint</bibkey>
@@ -1093,7 +1093,7 @@
     </paper>
     <paper id="124">
       <title>Online Learning for Deterministic Dependency Parsing</title>
-      <author><first>Prashanth Reddy</first><last>Mannem</last></author>
+      <author id="prashanth-mannem"><first>Prashanth Reddy</first><last>Mannem</last></author>
       <pages>1139–1143</pages>
       <url hash="61934b0c">D07-1124</url>
       <bibkey>mannem-2007-online</bibkey>
@@ -1107,9 +1107,9 @@
     </paper>
     <paper id="126">
       <title>A Multilingual Dependency Analysis System Using Online Passive-Aggressive Learning</title>
-      <author><first>Le-Minh</first><last>Nguyen</last></author>
+      <author id="minh-le-nguyen"><first>Le-Minh</first><last>Nguyen</last></author>
       <author><first>Akira</first><last>Shimazu</last></author>
-      <author><first>Phuong-Thai</first><last>Nguyen</last></author>
+      <author id="phuong-thai-nguyen"><first>Phuong-Thai</first><last>Nguyen</last></author>
       <author><first>Xuan-Hieu</first><last>Phan</last></author>
       <pages>1149–1155</pages>
       <url hash="f569b775">D07-1126</url>
@@ -1144,7 +1144,7 @@
     <paper id="130">
       <title>Adapting the <fixed-case>RASP</fixed-case> System for the <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>07 Domain-Adaptation Task</title>
       <author><first>Rebecca</first><last>Watson</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <pages>1170–1174</pages>
       <url hash="eb3e066d">D07-1130</url>
       <bibkey>watson-briscoe-2007-adapting</bibkey>
diff --git a/data/xml/D08.xml b/data/xml/D08.xml
index 37e8c8967a..05ff9b4305 100644
--- a/data/xml/D08.xml
+++ b/data/xml/D08.xml
@@ -20,7 +20,7 @@
       <title>Revealing the Structure of Medical Dictations with Conditional Random Fields</title>
       <author><first>Jeremy</first><last>Jancsary</last></author>
       <author><first>Johannes</first><last>Matiasek</last></author>
-      <author><first>Harald</first><last>Trost</last></author>
+      <author id="harald-trost"><first>Harald</first><last>Trost</last></author>
       <pages>1–10</pages>
       <url hash="86214a19">D08-1001</url>
       <bibkey>jancsary-etal-2008-revealing</bibkey>
@@ -28,7 +28,7 @@
     <paper id="2">
       <title>It’s a Contradiction – no, it’s not: <fixed-case>A</fixed-case> Case Study using Functional Relations</title>
       <author><first>Alan</first><last>Ritter</last></author>
-      <author><first>Stephen</first><last>Soderland</last></author>
+      <author id="stephen-soderland"><first>Stephen</first><last>Soderland</last></author>
       <author><first>Doug</first><last>Downey</last></author>
       <author><first>Oren</first><last>Etzioni</last></author>
       <pages>11–20</pages>
@@ -48,8 +48,8 @@
     </paper>
     <paper id="4">
       <title>Modeling Annotators: <fixed-case>A</fixed-case> Generative Approach to Learning from Annotator Rationales</title>
-      <author><first>Omar</first><last>Zaidan</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="omar-zaidan"><first>Omar</first><last>Zaidan</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>31–40</pages>
       <url hash="f65ad667">D08-1004</url>
       <bibkey>zaidan-eisner-2008-modeling</bibkey>
@@ -90,7 +90,7 @@
       <title>Scaling Textual Inference to the Web</title>
       <author><first>Stefan</first><last>Schoenmackers</last></author>
       <author><first>Oren</first><last>Etzioni</last></author>
-      <author><first>Daniel</first><last>Weld</last></author>
+      <author id="daniel-s-weld"><first>Daniel</first><last>Weld</last></author>
       <pages>79–88</pages>
       <url hash="f92428ad">D08-1009</url>
       <bibkey>schoenmackers-etal-2008-scaling</bibkey>
@@ -111,7 +111,7 @@
       <author><first>Mei</first><last>Yang</last></author>
       <author><first>Jianfeng</first><last>Gao</last></author>
       <author><first>Patrick</first><last>Nguyen</last></author>
-      <author><first>Robert</first><last>Moore</last></author>
+      <author id="robert-c-moore"><first>Robert</first><last>Moore</last></author>
       <pages>98–107</pages>
       <url hash="6af94a27">D08-1011</url>
       <bibkey>he-etal-2008-indirect</bibkey>
@@ -137,8 +137,8 @@
     <paper id="14">
       <title>Multilingual Subjectivity Analysis Using Machine Translation</title>
       <author><first>Carmen</first><last>Banea</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <author><first>Samer</first><last>Hassan</last></author>
       <pages>127–135</pages>
       <url hash="251f0af5">D08-1014</url>
@@ -154,8 +154,8 @@
     </paper>
     <paper id="16">
       <title>Dependency Parsing by Belief Propagation</title>
-      <author><first>David</first><last>Smith</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="david-a-smith"><first>David</first><last>Smith</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>145–156</pages>
       <url hash="cf4484c4">D08-1016</url>
       <bibkey>smith-eisner-2008-dependency</bibkey>
@@ -164,8 +164,8 @@
       <title>Stacking Dependency Parsers</title>
       <author><first>André Filipe</first><last>Torres Martins</last></author>
       <author><first>Dipanjan</first><last>Das</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
-      <author><first>Eric P.</first><last>Xing</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
+      <author id="eric-xing"><first>Eric P.</first><last>Xing</last></author>
       <pages>157–166</pages>
       <url hash="7457fbd7">D08-1017</url>
       <bibkey>torres-martins-etal-2008-stacking</bibkey>
@@ -174,7 +174,7 @@
       <title>Better Binarization for the <fixed-case>CKY</fixed-case> Parsing</title>
       <author><first>Xinying</first><last>Song</last></author>
       <author><first>Shilin</first><last>Ding</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <pages>167–176</pages>
       <url hash="8d1b7c72">D08-1018</url>
       <bibkey>song-etal-2008-better</bibkey>
@@ -212,7 +212,7 @@
     </paper>
     <paper id="23">
       <title>Probabilistic Inference for Machine Translation</title>
-      <author><first>Phil</first><last>Blunsom</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
       <author><first>Miles</first><last>Osborne</last></author>
       <pages>215–223</pages>
       <url hash="477dd7d2">D08-1023</url>
@@ -229,7 +229,7 @@
     </paper>
     <paper id="25">
       <title>A Noisy-Channel Model of Human Sentence Comprehension under Uncertain Input</title>
-      <author><first>Roger</first><last>Levy</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
       <pages>234–243</pages>
       <url hash="aa8e91ba">D08-1025</url>
       <bibkey>levy-2008-noisy</bibkey>
@@ -237,7 +237,7 @@
     <paper id="26">
       <title>Incorporating Temporal and Semantic Information with Eye Gaze for Automatic Word Acquisition in Multimodal Conversational Systems</title>
       <author><first>Shaolin</first><last>Qu</last></author>
-      <author><first>Joyce</first><last>Chai</last></author>
+      <author id="joyce-chai"><first>Joyce</first><last>Chai</last></author>
       <pages>244–253</pages>
       <url hash="d06b535f">D08-1026</url>
       <bibkey>qu-chai-2008-incorporating</bibkey>
@@ -246,16 +246,16 @@
       <title>Cheap and Fast – But is it Good? Evaluating Non-Expert Annotations for Natural Language Tasks</title>
       <author><first>Rion</first><last>Snow</last></author>
       <author><first>Brendan</first><last>O’Connor</last></author>
-      <author><first>Daniel</first><last>Jurafsky</last></author>
-      <author><first>Andrew</first><last>Ng</last></author>
+      <author id="dan-jurafsky"><first>Daniel</first><last>Jurafsky</last></author>
+      <author id="andrew-y-ng"><first>Andrew</first><last>Ng</last></author>
       <pages>254–263</pages>
       <url hash="b040f58e">D08-1027</url>
       <bibkey>snow-etal-2008-cheap</bibkey>
     </paper>
     <paper id="28">
       <title><fixed-case>H</fixed-case>ot<fixed-case>S</fixed-case>pots: <fixed-case>V</fixed-case>isualizing Edits to a Text</title>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
-      <author><first>David</first><last>Smith</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="david-a-smith"><first>David</first><last>Smith</last></author>
       <pages>264–273</pages>
       <url hash="133d61e0">D08-1028</url>
       <bibkey>bangalore-smith-2008-hotspots</bibkey>
@@ -271,7 +271,7 @@
     <paper id="30">
       <title><fixed-case>A</fixed-case>rabic Named Entity Recognition using Optimized Feature Sets</title>
       <author><first>Yassine</first><last>Benajiba</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <author><first>Paolo</first><last>Rosso</last></author>
       <pages>284–293</pages>
       <url hash="e3a11eae">D08-1030</url>
@@ -288,7 +288,7 @@
     <paper id="32">
       <title>Selecting Sentences for Answering Complex Questions</title>
       <author><first>Yllias</first><last>Chali</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <pages>304–313</pages>
       <url hash="2903d86a">D08-1032</url>
       <bibkey>chali-joty-2008-selecting</bibkey>
@@ -296,7 +296,7 @@
     <paper id="33">
       <title>Sampling Alignment Structure under a <fixed-case>B</fixed-case>ayesian Translation Model</title>
       <author><first>John</first><last>DeNero</last></author>
-      <author><first>Alexandre</first><last>Bouchard-Côté</last></author>
+      <author id="alexandre-bouchard-cote"><first>Alexandre</first><last>Bouchard-Côté</last></author>
       <author><first>Dan</first><last>Klein</last></author>
       <pages>314–323</pages>
       <url hash="98f24645">D08-1033</url>
@@ -305,7 +305,7 @@
     <paper id="34">
       <title>Improving <fixed-case>C</fixed-case>hinese Semantic Role Classification with Hierarchical Feature Selection Strategy</title>
       <author><first>Weiwei</first><last>Ding</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <pages>324–333</pages>
       <url hash="55ab6970">D08-1034</url>
       <bibkey>ding-chang-2008-improving</bibkey>
@@ -337,17 +337,17 @@
     <paper id="38">
       <title>Studying the History of Ideas Using Topic Models</title>
       <author><first>David</first><last>Hall</last></author>
-      <author><first>Daniel</first><last>Jurafsky</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="dan-jurafsky"><first>Daniel</first><last>Jurafsky</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>363–371</pages>
       <url hash="8ab25c09">D08-1038</url>
       <bibkey>hall-etal-2008-studying</bibkey>
     </paper>
     <paper id="39">
       <title>Triplet Lexicon Models for Statistical Machine Translation</title>
-      <author><first>Saša</first><last>Hasan</last></author>
+      <author id="sasa-hasan"><first>Saša</first><last>Hasan</last></author>
       <author><first>Juri</first><last>Ganitkevitch</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <author><first>Jesús</first><last>Andrés-Ferrer</last></author>
       <pages>372–381</pages>
       <url hash="31f9c442">D08-1039</url>
@@ -357,7 +357,7 @@
       <title>A Casual Conversation System Using Modality and Word Associations Retrieved from the Web</title>
       <author><first>Shinsuke</first><last>Higuchi</last></author>
       <author><first>Rafal</first><last>Rzepka</last></author>
-      <author><first>Kenji</first><last>Araki</last></author>
+      <author id="kenji-araki"><first>Kenji</first><last>Araki</last></author>
       <pages>382–390</pages>
       <url hash="555b6e9d">D08-1040</url>
       <bibkey>higuchi-etal-2008-casual</bibkey>
@@ -373,7 +373,7 @@
     </paper>
     <paper id="42">
       <title>A Dependency-based Word Subsequence Kernel</title>
-      <author><first>Rohit</first><last>Kate</last></author>
+      <author id="rohit-kate"><first>Rohit</first><last>Kate</last></author>
       <pages>400–409</pages>
       <url hash="6edf3fc3">D08-1042</url>
       <bibkey>kate-2008-dependency</bibkey>
@@ -383,7 +383,7 @@
       <author><first>Jung-Tae</first><last>Lee</last></author>
       <author><first>Sang-Bum</first><last>Kim</last></author>
       <author><first>Young-In</first><last>Song</last></author>
-      <author><first>Hae-Chang</first><last>Rim</last></author>
+      <author id="hae-chang-rim"><first>Hae-Chang</first><last>Rim</last></author>
       <pages>410–418</pages>
       <url hash="25e97ff2">D08-1043</url>
       <bibkey>lee-etal-2008-bridging</bibkey>
@@ -406,17 +406,17 @@
     <paper id="46">
       <title>Legal Docket Classification: <fixed-case>W</fixed-case>here Machine Learning Stumbles</title>
       <author><first>Ramesh</first><last>Nallapati</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>438–446</pages>
       <url hash="0a282c1d">D08-1046</url>
       <bibkey>nallapati-manning-2008-legal</bibkey>
     </paper>
     <paper id="47">
       <title>A Discriminative Candidate Generator for String Transformations</title>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Yoshimasa</first><last>Tsuruoka</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>447–456</pages>
       <url hash="426c9959">D08-1047</url>
       <bibkey>okazaki-etal-2008-discriminative-candidate</bibkey>
@@ -425,7 +425,7 @@
       <title>Automatic induction of <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et lexical units</title>
       <author><first>Marco</first><last>Pennacchiotti</last></author>
       <author><first>Diego</first><last>De Cao</last></author>
-      <author><first>Roberto</first><last>Basili</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
       <author><first>Danilo</first><last>Croce</last></author>
       <author><first>Michael</first><last>Roth</last></author>
       <pages>457–465</pages>
@@ -435,7 +435,7 @@
     <paper id="49">
       <title>Multimodal Subjectivity Analysis of Multiparty Conversation</title>
       <author><first>Stephan</first><last>Raaijmakers</last></author>
-      <author><first>Khiet</first><last>Truong</last></author>
+      <author id="khiet-p-truong"><first>Khiet</first><last>Truong</last></author>
       <author><first>Theresa</first><last>Wilson</last></author>
       <pages>466–474</pages>
       <url hash="0ee17484">D08-1049</url>
@@ -451,11 +451,11 @@
     </paper>
     <paper id="51">
       <title>Improving Interactive Machine Translation via Mouse Actions</title>
-      <author><first>Germán</first><last>Sanchis-Trilles</last></author>
-      <author><first>Daniel</first><last>Ortiz-Martínez</last></author>
+      <author id="german-sanchis-trilles"><first>Germán</first><last>Sanchis-Trilles</last></author>
+      <author id="daniel-ortiz-martinez"><first>Daniel</first><last>Ortiz-Martínez</last></author>
       <author><first>Jorge</first><last>Civera</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
-      <author><first>Enrique</first><last>Vidal</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="enrique-vidal"><first>Enrique</first><last>Vidal</last></author>
       <author><first>Hieu</first><last>Hoang</last></author>
       <pages>485–494</pages>
       <url hash="90bebd55">D08-1051</url>
@@ -464,7 +464,7 @@
     <paper id="52">
       <title><fixed-case>LTAG</fixed-case> Dependency Parsing with Bidirectional Incremental Construction</title>
       <author><first>Libin</first><last>Shen</last></author>
-      <author><first>Aravind</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
       <pages>495–504</pages>
       <url hash="9d736260">D08-1052</url>
       <bibkey>shen-joshi-2008-ltag</bibkey>
@@ -514,7 +514,7 @@
       <author><first>Stephen</first><last>Wan</last></author>
       <author><first>Robert</first><last>Dale</last></author>
       <author><first>Mark</first><last>Dras</last></author>
-      <author><first>Cécile</first><last>Paris</last></author>
+      <author id="cecile-paris"><first>Cécile</first><last>Paris</last></author>
       <pages>543–552</pages>
       <url hash="eccb1c37">D08-1057</url>
       <bibkey>wan-etal-2008-seed</bibkey>
@@ -537,16 +537,16 @@
     <paper id="60">
       <title>Generalizing Local and Non-Local Word-Reordering Patterns for Syntax-Based Machine Translation</title>
       <author><first>Bing</first><last>Zhao</last></author>
-      <author><first>Yaser</first><last>Al-onaizan</last></author>
+      <author id="yaser-al-onaizan"><first>Yaser</first><last>Al-onaizan</last></author>
       <pages>572–581</pages>
       <url hash="535c727c">D08-1060</url>
       <bibkey>zhao-al-onaizan-2008-generalizing</bibkey>
     </paper>
     <paper id="61">
       <title>Weakly-Supervised Acquisition of Labeled Class Instances using Graph Random Walks</title>
-      <author><first>Partha Pratim</first><last>Talukdar</last></author>
+      <author id="partha-talukdar"><first>Partha Pratim</first><last>Talukdar</last></author>
       <author><first>Joseph</first><last>Reisinger</last></author>
-      <author><first>Marius</first><last>Paşca</last></author>
+      <author id="marius-pasca"><first>Marius</first><last>Paşca</last></author>
       <author><first>Deepak</first><last>Ravichandran</last></author>
       <author><first>Rahul</first><last>Bhagat</last></author>
       <author><first>Fernando</first><last>Pereira</last></author>
@@ -565,7 +565,7 @@
     <paper id="63">
       <title>Mention Detection Crossing the Language Barrier</title>
       <author><first>Imed</first><last>Zitouni</last></author>
-      <author><first>Radu</first><last>Florian</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
       <pages>600–609</pages>
       <url hash="4138a64d">D08-1063</url>
       <bibkey>zitouni-florian-2008-mention</bibkey>
@@ -582,9 +582,9 @@
     </paper>
     <paper id="65">
       <title>Lattice <fixed-case>M</fixed-case>inimum <fixed-case>B</fixed-case>ayes-<fixed-case>R</fixed-case>isk Decoding for Statistical Machine Translation</title>
-      <author><first>Roy</first><last>Tromble</last></author>
+      <author id="roy-tromble"><first>Roy</first><last>Tromble</last></author>
       <author><first>Shankar</first><last>Kumar</last></author>
-      <author><first>Franz</first><last>Och</last></author>
+      <author id="franz-josef-och"><first>Franz</first><last>Och</last></author>
       <author><first>Wolfgang</first><last>Macherey</last></author>
       <pages>620–629</pages>
       <url hash="4920b561">D08-1065</url>
@@ -593,7 +593,7 @@
     <paper id="66">
       <title>Phrase Translation Probabilities with <fixed-case>ITG</fixed-case> Priors and Smoothing as Learning Objective</title>
       <author><first>Markos</first><last>Mylonakis</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <pages>630–639</pages>
       <url hash="00bb02f9">D08-1066</url>
       <bibkey>mylonakis-simaan-2008-phrase</bibkey>
@@ -623,14 +623,14 @@
     </paper>
     <paper id="70">
       <title>Learning with Probabilistic Features for Improved Pipeline Models</title>
-      <author><first>Razvan</first><last>Bunescu</last></author>
+      <author id="razvan-bunescu"><first>Razvan</first><last>Bunescu</last></author>
       <pages>670–679</pages>
       <url hash="d703d648">D08-1070</url>
       <bibkey>bunescu-2008-learning</bibkey>
     </paper>
     <paper id="71">
       <title>Cross-Task Knowledge-Constrained Self Training</title>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <pages>680–688</pages>
       <url hash="86e2af66">D08-1071</url>
       <bibkey>daume-iii-2008-cross</bibkey>
@@ -645,8 +645,8 @@
     </paper>
     <paper id="73">
       <title>Jointly Combining Implicit Constraints Improves Temporal Ordering</title>
-      <author><first>Nathanael</first><last>Chambers</last></author>
-      <author><first>Daniel</first><last>Jurafsky</last></author>
+      <author id="nathanael-chambers"><first>Nathanael</first><last>Chambers</last></author>
+      <author id="dan-jurafsky"><first>Daniel</first><last>Jurafsky</last></author>
       <pages>698–706</pages>
       <url hash="00bb5f49">D08-1073</url>
       <bibkey>chambers-jurafsky-2008-jointly</bibkey>
@@ -662,7 +662,7 @@
       <title>Learning the Scope of Negation in Biomedical Texts</title>
       <author><first>Roser</first><last>Morante</last></author>
       <author><first>Anthony</first><last>Liekens</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>715–724</pages>
       <url hash="1afaeba5">D08-1075</url>
       <bibkey>morante-etal-2008-learning</bibkey>
@@ -670,7 +670,7 @@
     <paper id="76">
       <title>Lattice-based Minimum Error Rate Training for Statistical Machine Translation</title>
       <author><first>Wolfgang</first><last>Macherey</last></author>
-      <author><first>Franz</first><last>Och</last></author>
+      <author id="franz-josef-och"><first>Franz</first><last>Och</last></author>
       <author><first>Ignacio</first><last>Thayer</last></author>
       <author><first>Jakob</first><last>Uszkoreit</last></author>
       <pages>725–734</pages>
@@ -703,7 +703,7 @@
     </paper>
     <paper id="80">
       <title>Topic-Driven Multi-Document Summarization with Encyclopedic Knowledge and Spreading Activation</title>
-      <author><first>Vivi</first><last>Nastase</last></author>
+      <author id="vivi-nastase"><first>Vivi</first><last>Nastase</last></author>
       <pages>763–772</pages>
       <url hash="7ad98268">D08-1080</url>
       <bibkey>nastase-2008-topic</bibkey>
@@ -721,7 +721,7 @@
       <author><first>Wei</first><last>Lu</last></author>
       <author><first>Hwee Tou</first><last>Ng</last></author>
       <author><first>Wee Sun</first><last>Lee</last></author>
-      <author><first>Luke S.</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke S.</first><last>Zettlemoyer</last></author>
       <pages>783–792</pages>
       <url hash="d8b7f34d">D08-1082</url>
       <bibkey>lu-etal-2008-generative</bibkey>
@@ -729,7 +729,7 @@
     <paper id="83">
       <title>Learning with Compositional Semantics as Structural Inference for Subsentential Sentiment Analysis</title>
       <author><first>Yejin</first><last>Choi</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>793–801</pages>
       <url hash="21915d80">D08-1083</url>
       <bibkey>choi-cardie-2008-learning</bibkey>
@@ -738,7 +738,7 @@
       <title>A Phrase-Based Alignment Model for Natural Language Inference</title>
       <author><first>Bill</first><last>MacCartney</last></author>
       <author><first>Michel</first><last>Galley</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>802–811</pages>
       <url hash="c4883a89">D08-1084</url>
       <bibkey>maccartney-etal-2008-phrase</bibkey>
@@ -763,8 +763,8 @@
     </paper>
     <paper id="87">
       <title><fixed-case>N</fixed-case>-gram Weighting: <fixed-case>R</fixed-case>educing Training Data Mismatch in Cross-Domain Language Model Estimation</title>
-      <author><first>Bo-June Paul</first><last>Hsu</last></author>
-      <author><first>James</first><last>Glass</last></author>
+      <author id="bo-june-paul-hsu"><first>Bo-June Paul</first><last>Hsu</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
       <pages>829–838</pages>
       <url hash="3a8cbb12">D08-1087</url>
       <bibkey>hsu-glass-2008-n</bibkey>
@@ -773,7 +773,7 @@
       <title>Complexity of Finding the <fixed-case>BLEU</fixed-case>-optimal Hypothesis in a Confusion Network</title>
       <author><first>Gregor</first><last>Leusch</last></author>
       <author><first>Evgeny</first><last>Matusov</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>839–847</pages>
       <url hash="41b67ee3">D08-1088</url>
       <bibkey>leusch-etal-2008-complexity</bibkey>
@@ -781,16 +781,16 @@
     <paper id="89">
       <title>A Simple and Effective Hierarchical Phrase Reordering Model</title>
       <author><first>Michel</first><last>Galley</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>848–856</pages>
       <url hash="259da807">D08-1089</url>
       <bibkey>galley-manning-2008-simple</bibkey>
     </paper>
     <paper id="90">
       <title>Language and Translation Model Adaptation using Comparable Corpora</title>
-      <author><first>Matthew</first><last>Snover</last></author>
-      <author><first>Bonnie</first><last>Dorr</last></author>
-      <author><first>Richard</first><last>Schwartz</last></author>
+      <author id="matthew-snover"><first>Matthew</first><last>Snover</last></author>
+      <author id="bonnie-dorr"><first>Bonnie</first><last>Dorr</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
       <pages>857–866</pages>
       <url hash="defdd8f0">D08-1090</url>
       <bibkey>snover-etal-2008-language</bibkey>
@@ -823,7 +823,7 @@
     <paper id="94">
       <title>A Structured Vector Space Model for Word Meaning in Context</title>
       <author><first>Katrin</first><last>Erk</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <pages>897–906</pages>
       <url hash="39c1accf">D08-1094</url>
       <bibkey>erk-pado-2008-structured</bibkey>
@@ -831,14 +831,14 @@
     <paper id="95">
       <title>Learning Graph Walk Based Similarity Measures for Parsed Text</title>
       <author><first>Einat</first><last>Minkov</last></author>
-      <author><first>William W.</first><last>Cohen</last></author>
+      <author id="william-cohen"><first>William W.</first><last>Cohen</last></author>
       <pages>907–916</pages>
       <url hash="0fb70380">D08-1095</url>
       <bibkey>minkov-cohen-2008-learning</bibkey>
     </paper>
     <paper id="96">
       <title>A Graph-theoretic Model of Lexical Syntactic Acquisition</title>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <author><first>Michael</first><last>Walsh</last></author>
       <pages>917–926</pages>
       <url hash="d3d2e004">D08-1096</url>
@@ -864,10 +864,10 @@
     </paper>
     <paper id="99">
       <title>Automatic Set Expansion for List Question Answering</title>
-      <author><first>Richard C.</first><last>Wang</last></author>
+      <author id="richard-c-wang"><first>Richard C.</first><last>Wang</last></author>
       <author><first>Nico</first><last>Schlaefer</last></author>
-      <author><first>William W.</first><last>Cohen</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="william-cohen"><first>William W.</first><last>Cohen</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <pages>947–954</pages>
       <url hash="8eb71dcf">D08-1099</url>
       <bibkey>wang-etal-2008-automatic</bibkey>
@@ -875,7 +875,7 @@
     <paper id="100">
       <title>Acquiring Domain-Specific Dialog Information from Task-Oriented Human-Human Interaction through an Unsupervised Learning</title>
       <author><first>Ananlada</first><last>Chotimongkol</last></author>
-      <author><first>Alexander</first><last>Rudnicky</last></author>
+      <author id="alexander-rudnicky"><first>Alexander</first><last>Rudnicky</last></author>
       <pages>955–964</pages>
       <url hash="1849ffea">D08-1100</url>
       <bibkey>chotimongkol-rudnicky-2008-acquiring</bibkey>
@@ -897,8 +897,8 @@
     </paper>
     <paper id="103">
       <title>Computing Word-Pair Antonymy</title>
-      <author><first>Saif</first><last>Mohammad</last></author>
-      <author><first>Bonnie</first><last>Dorr</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
+      <author id="bonnie-dorr"><first>Bonnie</first><last>Dorr</last></author>
       <author><first>Graeme</first><last>Hirst</last></author>
       <pages>982–991</pages>
       <url hash="c5381392">D08-1103</url>
@@ -924,9 +924,9 @@
     <paper id="106">
       <title>Graph-based Analysis of Semantic Drift in <fixed-case>E</fixed-case>spresso-like Bootstrapping Algorithms</title>
       <author><first>Mamoru</first><last>Komachi</last></author>
-      <author><first>Taku</first><last>Kudo</last></author>
+      <author id="taku-kudo"><first>Taku</first><last>Kudo</last></author>
       <author><first>Masashi</first><last>Shimbo</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>1011–1020</pages>
       <url hash="97513976">D08-1106</url>
       <bibkey>komachi-etal-2008-graph</bibkey>
@@ -987,16 +987,16 @@
     <paper id="113">
       <title>Latent-Variable Modeling of String Transductions with Finite-State Methods</title>
       <author><first>Markus</first><last>Dreyer</last></author>
-      <author><first>Jason</first><last>Smith</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-smith"><first>Jason</first><last>Smith</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>1080–1089</pages>
       <url hash="4d7a654e">D08-1113</url>
       <bibkey>dreyer-etal-2008-latent</bibkey>
     </paper>
     <paper id="114">
       <title>Soft-Supervised Learning for Text Classification</title>
-      <author><first>Amarnag</first><last>Subramanya</last></author>
-      <author><first>Jeff</first><last>Bilmes</last></author>
+      <author id="amarnag-subramanya"><first>Amarnag</first><last>Subramanya</last></author>
+      <author id="jeff-bilmes"><first>Jeff</first><last>Bilmes</last></author>
       <pages>1090–1099</pages>
       <url hash="05a3d1f5">D08-1114</url>
       <bibkey>subramanya-bilmes-2008-soft</bibkey>
diff --git a/data/xml/D09.xml b/data/xml/D09.xml
index 955abf17af..d854fc39ee 100644
--- a/data/xml/D09.xml
+++ b/data/xml/D09.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the 2009 Conference on Empirical Methods in Natural Language Processing</booktitle>
       <url hash="50a09540">D09-1</url>
       <editor><first>Philipp</first><last>Koehn</last></editor>
-      <editor><first>Rada</first><last>Mihalcea</last></editor>
+      <editor id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Singapore</address>
       <month>August</month>
@@ -35,7 +35,7 @@
     <paper id="3">
       <title>Semi-supervised Semantic Role Labeling Using the <fixed-case>L</fixed-case>atent <fixed-case>W</fixed-case>ords <fixed-case>L</fixed-case>anguage <fixed-case>M</fixed-case>odel</title>
       <author><first>Koen</first><last>Deschacht</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>21–29</pages>
       <url hash="094aae62">D09-1003</url>
       <bibkey>deschacht-moens-2009-semi</bibkey>
@@ -44,7 +44,7 @@
       <title>Semantic Dependency Parsing of <fixed-case>N</fixed-case>om<fixed-case>B</fixed-case>ank and <fixed-case>P</fixed-case>rop<fixed-case>B</fixed-case>ank: An Efficient Integrated Approach via a Large-scale Feature Selection</title>
       <author><first>Hai</first><last>Zhao</last></author>
       <author><first>Wenliang</first><last>Chen</last></author>
-      <author><first>Chunyu</first><last>Kit</last></author>
+      <author id="chunyu-kit"><first>Chunyu</first><last>Kit</last></author>
       <pages>30–39</pages>
       <url hash="afff6a4b">D09-1004</url>
       <bibkey>zhao-etal-2009-semantic</bibkey>
@@ -52,14 +52,14 @@
     <paper id="5">
       <title>First- and Second-Order Expectation Semirings with Applications to Minimum-Risk Training on Translation Forests</title>
       <author><first>Zhifei</first><last>Li</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>40–51</pages>
       <url hash="dc3b16b4">D09-1005</url>
       <bibkey>li-eisner-2009-first</bibkey>
     </paper>
     <paper id="6">
       <title>Feasibility of Human-in-the-loop Minimum Error Rate Training</title>
-      <author><first>Omar F.</first><last>Zaidan</last></author>
+      <author id="omar-zaidan"><first>Omar F.</first><last>Zaidan</last></author>
       <author><first>Chris</first><last>Callison-Burch</last></author>
       <pages>52–61</pages>
       <url hash="a35fa173">D09-1006</url>
@@ -79,7 +79,7 @@
       <author><first>Jinxi</first><last>Xu</last></author>
       <author><first>Bing</first><last>Zhang</last></author>
       <author><first>Spyros</first><last>Matsoukas</last></author>
-      <author><first>Ralph</first><last>Weischedel</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
       <pages>72–80</pages>
       <url hash="25fa53b5">D09-1008</url>
       <bibkey>shen-etal-2009-effective</bibkey>
@@ -95,7 +95,7 @@
     </paper>
     <paper id="10">
       <title>Efficient kernels for sentence pair classification</title>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last></author>
       <author><first>Lorenzo</first><last>Dell’Arciprete</last></author>
       <pages>91–100</pages>
       <url hash="2614ca27">D09-1010</url>
@@ -104,7 +104,7 @@
     <paper id="11">
       <title>Graphical Models over Multiple Strings</title>
       <author><first>Markus</first><last>Dreyer</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>101–110</pages>
       <url hash="e97557b1">D09-1011</url>
       <bibkey>dreyer-eisner-2009-graphical</bibkey>
@@ -120,9 +120,9 @@
     <paper id="13">
       <title>A Rich Feature Vector for Protein-Protein Interaction Extraction from Multiple Corpora</title>
       <author><first>Makoto</first><last>Miwa</last></author>
-      <author><first>Rune</first><last>Sætre</last></author>
+      <author id="rune-saetre"><first>Rune</first><last>Sætre</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>121–130</pages>
       <url hash="57092d97">D09-1013</url>
       <bibkey>miwa-etal-2009-rich</bibkey>
@@ -137,8 +137,8 @@
     </paper>
     <paper id="15">
       <title>Nested Named Entity Recognition</title>
-      <author><first>Jenny Rose</first><last>Finkel</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="jenny-rose-finkel"><first>Jenny Rose</first><last>Finkel</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>141–150</pages>
       <url hash="a372ea90">D09-1015</url>
       <bibkey>finkel-manning-2009-nested</bibkey>
@@ -146,7 +146,7 @@
     <paper id="16">
       <title>A Unified Model of Phrasal and Sentential Evidence for Information Extraction</title>
       <author><first>Siddharth</first><last>Patwardhan</last></author>
-      <author><first>Ellen</first><last>Riloff</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
       <pages>151–160</pages>
       <url hash="9be90dda">D09-1016</url>
       <bibkey>patwardhan-riloff-2009-unified</bibkey>
@@ -154,7 +154,7 @@
     <paper id="17">
       <title>Review Sentiment Scoring via a Parse-and-Paraphrase Paradigm</title>
       <author><first>Jingjing</first><last>Liu</last></author>
-      <author><first>Stephanie</first><last>Seneff</last></author>
+      <author id="stephanie-seneff"><first>Stephanie</first><last>Seneff</last></author>
       <pages>161–169</pages>
       <url hash="038863c0">D09-1017</url>
       <bibkey>liu-seneff-2009-review</bibkey>
@@ -163,7 +163,7 @@
       <title>Supervised and Unsupervised Methods in Employing Discourse Relations for Improving Opinion Polarity Classification</title>
       <author><first>Swapna</first><last>Somasundaran</last></author>
       <author><first>Galileo</first><last>Namata</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <author><first>Lise</first><last>Getoor</last></author>
       <pages>170–179</pages>
       <url hash="2f1f5582">D09-1018</url>
@@ -181,7 +181,7 @@
     <paper id="20">
       <title>Subjectivity Word Sense Disambiguation</title>
       <author><first>Cem</first><last>Akkaya</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <author><first>Rada</first><last>Mihalcea</last></author>
       <pages>190–199</pages>
       <url hash="dc570d48">D09-1020</url>
@@ -190,7 +190,7 @@
     <paper id="21">
       <title>Non-Projective Parsing for Statistical Machine Translation</title>
       <author><first>Xavier</first><last>Carreras</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <pages>200–209</pages>
       <url hash="c6a3c373">D09-1021</url>
       <bibkey>carreras-collins-2009-non</bibkey>
@@ -198,8 +198,8 @@
     <paper id="22">
       <title>Extending Statistical Machine Translation with Discriminative and Trigger-Based Lexicon Models</title>
       <author><first>Arne</first><last>Mauser</last></author>
-      <author><first>Saša</first><last>Hasan</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="sasa-hasan"><first>Saša</first><last>Hasan</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>210–218</pages>
       <url hash="f1804243">D09-1022</url>
       <bibkey>mauser-etal-2009-extending</bibkey>
@@ -207,7 +207,7 @@
     <paper id="23">
       <title>Feature-Rich Translation by Quasi-Synchronous Lattice Parsing</title>
       <author><first>Kevin</first><last>Gimpel</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>219–228</pages>
       <url hash="1f87c516">D09-1023</url>
       <bibkey>gimpel-smith-2009-feature</bibkey>
@@ -232,7 +232,7 @@
       <author><first>Daniel</first><last>Ramage</last></author>
       <author><first>David</first><last>Hall</last></author>
       <author><first>Ramesh</first><last>Nallapati</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>248–256</pages>
       <url hash="1b429135">D09-1026</url>
       <bibkey>ramage-etal-2009-labeled</bibkey>
@@ -258,7 +258,7 @@
     <paper id="29">
       <title><fixed-case>W</fixed-case>ikipedia as Frame Information Repository</title>
       <author><first>Sara</first><last>Tonelli</last></author>
-      <author><first>Claudio</first><last>Giuliano</last></author>
+      <author id="claudio-giuliano"><first>Claudio</first><last>Giuliano</last></author>
       <pages>276–285</pages>
       <url hash="c2b8b2c8">D09-1029</url>
       <bibkey>tonelli-giuliano-2009-wikipedia</bibkey>
@@ -307,8 +307,8 @@
     <paper id="35">
       <title>It’s Not You, it’s Me: Detecting Flirting and its Misperception in Speed-Dates</title>
       <author><first>Rajesh</first><last>Ranganath</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
-      <author><first>Dan</first><last>McFarland</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-mcfarland"><first>Dan</first><last>McFarland</last></author>
       <pages>334–342</pages>
       <url hash="28379385">D09-1035</url>
       <bibkey>ranganath-etal-2009-detecting</bibkey>
@@ -324,8 +324,8 @@
     </paper>
     <paper id="37">
       <title>A <fixed-case>B</fixed-case>ayesian Model of Syntax-Directed Tree to String Grammar Induction</title>
-      <author><first>Trevor</first><last>Cohn</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
       <pages>352–361</pages>
       <url hash="729c7db1">D09-1037</url>
       <bibkey>cohn-blunsom-2009-bayesian</bibkey>
@@ -378,8 +378,8 @@
     </paper>
     <paper id="43">
       <title>Perceptron Reranking for <fixed-case>CCG</fixed-case> Realization</title>
-      <author><first>Michael</first><last>White</last></author>
-      <author><first>Rajakrishnan</first><last>Rajkumar</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
+      <author id="rajakrishnan-rajkumar"><first>Rajakrishnan</first><last>Rajkumar</last></author>
       <pages>410–419</pages>
       <url hash="05d7192f">D09-1043</url>
       <bibkey>white-rajkumar-2009-perceptron</bibkey>
@@ -402,7 +402,7 @@
     <paper id="46">
       <title>Graded Word Sense Assignment</title>
       <author><first>Katrin</first><last>Erk</last></author>
-      <author><first>Diana</first><last>McCarthy</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
       <pages>440–449</pages>
       <url hash="26e6197e">D09-1046</url>
       <bibkey>erk-mccarthy-2009-graded</bibkey>
@@ -418,10 +418,10 @@
     </paper>
     <paper id="48">
       <title>Projecting Parameters for Multilingual Word Sense Disambiguation</title>
-      <author><first>Mitesh M.</first><last>Khapra</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh M.</first><last>Khapra</last></author>
       <author><first>Sapan</first><last>Shah</last></author>
       <author><first>Piyush</first><last>Kedia</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>459–467</pages>
       <url hash="6c97ba83">D09-1048</url>
       <bibkey>khapra-etal-2009-projecting</bibkey>
@@ -438,8 +438,8 @@
       <title>Acquiring Translation Equivalences of Multiword Expressions by Normalized Correlation Frequencies</title>
       <author><first>Ming-Hong</first><last>Bai</last></author>
       <author><first>Jia-Ming</first><last>You</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>478–486</pages>
       <url hash="0a905bdd">D09-1050</url>
       <bibkey>bai-etal-2009-acquiring</bibkey>
@@ -481,14 +481,14 @@
       <title>A Structural Support Vector Method for Extracting Contexts and Answers of Questions from Online Forums</title>
       <author><first>Wen-Yun</first><last>Yang</last></author>
       <author><first>Yunbo</first><last>Cao</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <pages>514–523</pages>
       <url hash="157a80e6">D09-1054</url>
       <bibkey>yang-etal-2009-structural</bibkey>
     </paper>
     <paper id="55">
       <title>Mining Search Engine Clickthrough Log for Matching N-gram Features</title>
-      <author><first>Huihsin</first><last>Tseng</last></author>
+      <author id="huihsin-tseng"><first>Huihsin</first><last>Tseng</last></author>
       <author><first>Longbin</first><last>Chen</last></author>
       <author><first>Fan</first><last>Li</last></author>
       <author><first>Ziming</first><last>Zhuang</last></author>
@@ -501,7 +501,7 @@
     <paper id="56">
       <title>The role of named entities in <fixed-case>W</fixed-case>eb <fixed-case>P</fixed-case>eople <fixed-case>S</fixed-case>earch</title>
       <author><first>Javier</first><last>Artiles</last></author>
-      <author><first>Enrique</first><last>Amigó</last></author>
+      <author id="enrique-amigo"><first>Enrique</first><last>Amigó</last></author>
       <author><first>Julio</first><last>Gonzalo</last></author>
       <pages>534–542</pages>
       <url hash="dd8f8a8b">D09-1056</url>
@@ -521,7 +521,7 @@
       <author><first>Jun</first><last>Suzuki</last></author>
       <author><first>Hideki</first><last>Isozaki</last></author>
       <author><first>Xavier</first><last>Carreras</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <pages>551–560</pages>
       <url hash="ef552a04">D09-1058</url>
       <bibkey>suzuki-etal-2009-empirical</bibkey>
@@ -536,7 +536,7 @@
     <paper id="60">
       <title>Improving Dependency Parsing with Subtrees from Auto-Parsed Data</title>
       <author><first>Wenliang</first><last>Chen</last></author>
-      <author><first>Jun’ichi</first><last>Kazama</last></author>
+      <author id="junichi-kazama"><first>Jun’ichi</first><last>Kazama</last></author>
       <author><first>Kiyotaka</first><last>Uchimoto</last></author>
       <author><first>Kentaro</first><last>Torisawa</last></author>
       <pages>570–579</pages>
@@ -554,16 +554,16 @@
     <paper id="62">
       <title>Adapting a Polarity Lexicon using Integer Linear Programming for Domain-Specific Sentiment Classification</title>
       <author><first>Yejin</first><last>Choi</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>590–598</pages>
       <url hash="5ae1ca83">D09-1062</url>
       <bibkey>choi-cardie-2009-adapting</bibkey>
     </paper>
     <paper id="63">
       <title>Generating High-Coverage Semantic Orientation Lexicons From Overtly Marked Words and a Thesaurus</title>
-      <author><first>Saif</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
       <author><first>Cody</first><last>Dunne</last></author>
-      <author><first>Bonnie</first><last>Dorr</last></author>
+      <author id="bonnie-dorr"><first>Bonnie</first><last>Dorr</last></author>
       <pages>599–608</pages>
       <url hash="2b2a7110">D09-1063</url>
       <bibkey>mohammad-etal-2009-generating</bibkey>
@@ -581,8 +581,8 @@
     <paper id="65">
       <title><fixed-case>EEG</fixed-case> responds to conceptual stimuli and corpus semantics</title>
       <author><first>Brian</first><last>Murphy</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>619–627</pages>
       <url hash="15481897">D09-1065</url>
       <bibkey>murphy-etal-2009-eeg</bibkey>
@@ -616,7 +616,7 @@
     </paper>
     <paper id="69">
       <title>Can <fixed-case>C</fixed-case>hinese Phonemes Improve Machine Transliteration?: A Comparative Study of <fixed-case>E</fixed-case>nglish-to-<fixed-case>C</fixed-case>hinese Transliteration Models</title>
-      <author><first>Jong-Hoon</first><last>Oh</last></author>
+      <author id="jong-hoon-oh"><first>Jong-Hoon</first><last>Oh</last></author>
       <author><first>Kiyotaka</first><last>Uchimoto</last></author>
       <author><first>Kentaro</first><last>Torisawa</last></author>
       <pages>658–667</pages>
@@ -644,7 +644,7 @@
     <paper id="72">
       <title>A Simple Unsupervised Learner for <fixed-case>POS</fixed-case> Disambiguation Rules Given Only a Minimal Lexicon</title>
       <author><first>Qiuye</first><last>Zhao</last></author>
-      <author><first>Mitch</first><last>Marcus</last></author>
+      <author id="mitch-marcus"><first>Mitch</first><last>Marcus</last></author>
       <pages>688–697</pages>
       <url hash="790a6021">D09-1072</url>
       <bibkey>zhao-marcus-2009-simple</bibkey>
@@ -660,7 +660,7 @@
     <paper id="74">
       <title>Discriminative Corpus Weight Estimation for Machine Translation</title>
       <author><first>Spyros</first><last>Matsoukas</last></author>
-      <author><first>Antti-Veikko I.</first><last>Rosti</last></author>
+      <author id="antti-veikko-rosti"><first>Antti-Veikko I.</first><last>Rosti</last></author>
       <author><first>Bing</first><last>Zhang</last></author>
       <pages>708–717</pages>
       <url hash="90857958">D09-1074</url>
@@ -684,14 +684,14 @@
     </paper>
     <paper id="77">
       <title>Word Buffering Models for Improved Speech Repair Parsing</title>
-      <author><first>Tim</first><last>Miller</last></author>
+      <author id="timothy-miller"><first>Tim</first><last>Miller</last></author>
       <pages>737–745</pages>
       <url hash="6d5e8dc5">D09-1077</url>
       <bibkey>miller-2009-word</bibkey>
     </paper>
     <paper id="78">
       <title>Less is More: Significance-Based <fixed-case>N</fixed-case>-gram Selection for Smaller, Better Language Models</title>
-      <author><first>Robert C.</first><last>Moore</last></author>
+      <author id="robert-c-moore"><first>Robert C.</first><last>Moore</last></author>
       <author><first>Chris</first><last>Quirk</last></author>
       <pages>746–755</pages>
       <url hash="92047763">D09-1078</url>
@@ -708,8 +708,8 @@
     <paper id="80">
       <title>Integrating sentence- and word-level error identification for disfluency correction</title>
       <author><first>Erin</first><last>Fitzgerald</last></author>
-      <author><first>Frederick</first><last>Jelinek</last></author>
-      <author><first>Keith</first><last>Hall</last></author>
+      <author id="frederick-jelinek"><first>Frederick</first><last>Jelinek</last></author>
+      <author id="keith-hall"><first>Keith</first><last>Hall</last></author>
       <pages>765–774</pages>
       <url hash="9781f473">D09-1080</url>
       <bibkey>fitzgerald-etal-2009-integrating</bibkey>
@@ -717,7 +717,7 @@
     <paper id="81">
       <title>Estimating Semantic Distance Using Soft Semantic Constraints in Knowledge-Source – Corpus Hybrid Models</title>
       <author><first>Yuval</first><last>Marton</last></author>
-      <author><first>Saif</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
       <author><first>Philip</first><last>Resnik</last></author>
       <pages>775–783</pages>
       <url hash="aa781187">D09-1081</url>
@@ -733,7 +733,7 @@
     </paper>
     <paper id="83">
       <title>Learning Term-weighting Functions for Similarity Measures</title>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <pages>793–802</pages>
       <url hash="bc1bf555">D09-1083</url>
       <attachment type="presentation" hash="de69050c">D09-1083.Presentation.pptx</attachment>
@@ -752,15 +752,15 @@
       <title>Unbounded Dependency Recovery for Parser Evaluation</title>
       <author><first>Laura</first><last>Rimell</last></author>
       <author><first>Stephen</first><last>Clark</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>813–821</pages>
       <url hash="af4d132c">D09-1085</url>
       <bibkey>rimell-etal-2009-unbounded</bibkey>
     </paper>
     <paper id="86">
       <title>Parser Adaptation and Projection with Quasi-Synchronous Grammar Features</title>
-      <author><first>David A.</first><last>Smith</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="david-a-smith"><first>David A.</first><last>Smith</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>822–831</pages>
       <url hash="2fb61361">D09-1086</url>
       <bibkey>smith-eisner-2009-parser</bibkey>
@@ -768,7 +768,7 @@
     <paper id="87">
       <title>Self-Training <fixed-case>PCFG</fixed-case> Grammars with Latent Annotations Across Languages</title>
       <author><first>Zhongqiang</first><last>Huang</last></author>
-      <author><first>Mary</first><last>Harper</last></author>
+      <author id="mary-harper"><first>Mary</first><last>Harper</last></author>
       <pages>832–841</pages>
       <url hash="35ac5c8f">D09-1087</url>
       <bibkey>huang-harper-2009-self</bibkey>
@@ -776,8 +776,8 @@
     <paper id="88">
       <title>An Alternative to Head-Driven Approaches for Parsing a (Relatively) Free Word-Order Language</title>
       <author><first>Reut</first><last>Tsarfaty</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
-      <author><first>Remko</first><last>Scha</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
+      <author id="remko-scha"><first>Remko</first><last>Scha</last></author>
       <pages>842–851</pages>
       <url hash="0b9e25d1">D09-1088</url>
       <bibkey>tsarfaty-etal-2009-alternative</bibkey>
@@ -792,8 +792,8 @@
     </paper>
     <paper id="90">
       <title>Bilingual dictionary generation for low-resourced language pairs</title>
-      <author><first>István</first><last>Varga</last></author>
-      <author><first>Shoichi</first><last>Yokoyama</last></author>
+      <author id="istvan-varga"><first>István</first><last>Varga</last></author>
+      <author id="shoichi-yokoyama"><first>Shoichi</first><last>Yokoyama</last></author>
       <pages>862–870</pages>
       <url hash="b498b568">D09-1090</url>
       <bibkey>varga-yokoyama-2009-bilingual</bibkey>
@@ -801,7 +801,7 @@
     <paper id="91">
       <title>Multilingual Spectral Clustering Using Document Similarity Propagation</title>
       <author><first>Dani</first><last>Yogatama</last></author>
-      <author><first>Kumiko</first><last>Tanaka-Ishii</last></author>
+      <author id="kumiko-tanaka-ishii"><first>Kumiko</first><last>Tanaka-Ishii</last></author>
       <pages>871–879</pages>
       <url hash="f05d395d">D09-1091</url>
       <bibkey>yogatama-tanaka-ishii-2009-multilingual</bibkey>
@@ -809,9 +809,9 @@
     <paper id="92">
       <title>Polylingual Topic Models</title>
       <author><first>David</first><last>Mimno</last></author>
-      <author><first>Hanna M.</first><last>Wallach</last></author>
+      <author id="hanna-wallach"><first>Hanna M.</first><last>Wallach</last></author>
       <author><first>Jason</first><last>Naradowsky</last></author>
-      <author><first>David A.</first><last>Smith</last></author>
+      <author id="david-a-smith"><first>David A.</first><last>Smith</last></author>
       <author><first>Andrew</first><last>McCallum</last></author>
       <pages>880–889</pages>
       <url hash="cbc51e38">D09-1092</url>
@@ -821,7 +821,7 @@
       <title>Using the <fixed-case>W</fixed-case>eb for Language Independent Spellchecking and Autocorrection</title>
       <author><first>Casey</first><last>Whitelaw</last></author>
       <author><first>Ben</first><last>Hutchinson</last></author>
-      <author><first>Grace Y</first><last>Chung</last></author>
+      <author id="grace-chung"><first>Grace Y</first><last>Chung</last></author>
       <author><first>Ged</first><last>Ellis</last></author>
       <pages>890–899</pages>
       <url hash="ea8527ec">D09-1093</url>
@@ -838,7 +838,7 @@
     </paper>
     <paper id="95">
       <title>Combining Collocations, Lexical and Encyclopedic Knowledge for Metonymy Resolution</title>
-      <author><first>Vivi</first><last>Nastase</last></author>
+      <author id="vivi-nastase"><first>Vivi</first><last>Nastase</last></author>
       <author><first>Michael</first><last>Strube</last></author>
       <pages>910–918</pages>
       <url hash="1a65f9ca">D09-1095</url>
@@ -848,7 +848,7 @@
       <title>Segmenting Email Message Text into Zones</title>
       <author><first>Andrew</first><last>Lampert</last></author>
       <author><first>Robert</first><last>Dale</last></author>
-      <author><first>Cécile</first><last>Paris</last></author>
+      <author id="cecile-paris"><first>Cécile</first><last>Paris</last></author>
       <pages>919–928</pages>
       <url hash="773b07ed">D09-1096</url>
       <bibkey>lampert-etal-2009-segmenting</bibkey>
@@ -857,7 +857,7 @@
       <title>Hypernym Discovery Based on Distributional Similarity and Hierarchical Structures</title>
       <author><first>Ichiro</first><last>Yamada</last></author>
       <author><first>Kentaro</first><last>Torisawa</last></author>
-      <author><first>Jun’ichi</first><last>Kazama</last></author>
+      <author id="junichi-kazama"><first>Jun’ichi</first><last>Kazama</last></author>
       <author><first>Kow</first><last>Kuroda</last></author>
       <author><first>Masaki</first><last>Murata</last></author>
       <author><first>Stijn</first><last>De Saeger</last></author>
@@ -880,9 +880,9 @@
     </paper>
     <paper id="99">
       <title>Toward Completeness in Concept Extraction and Classification</title>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <author><first>Zornitsa</first><last>Kozareva</last></author>
-      <author><first>Ellen</first><last>Riloff</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
       <pages>948–957</pages>
       <url hash="335258ca">D09-1099</url>
       <bibkey>hovy-etal-2009-toward</bibkey>
@@ -907,7 +907,7 @@
     </paper>
     <paper id="102">
       <title>Global Learning of Noun Phrase Anaphoricity in Coreference Resolution via Label Propagation</title>
-      <author><first>GuoDong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>GuoDong</first><last>Zhou</last></author>
       <author><first>Fang</first><last>Kong</last></author>
       <pages>978–986</pages>
       <url hash="b7f88d42">D09-1102</url>
@@ -916,8 +916,8 @@
     <paper id="103">
       <title>Employing the Centering Theory in Pronoun Resolution from the Semantic Perspective</title>
       <author><first>Fang</first><last>Kong</last></author>
-      <author><first>GuoDong</first><last>Zhou</last></author>
-      <author><first>Qiaoming</first><last>Zhu</last></author>
+      <author id="guodong-zhou"><first>GuoDong</first><last>Zhou</last></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last></author>
       <pages>987–996</pages>
       <url hash="0535b590">D09-1103</url>
       <bibkey>kong-etal-2009-employing</bibkey>
@@ -931,8 +931,8 @@
     </paper>
     <paper id="105">
       <title>Learning Linear Ordering Problems for Better Translation</title>
-      <author><first>Roy</first><last>Tromble</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="roy-tromble"><first>Roy</first><last>Tromble</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>1007–1016</pages>
       <url hash="98ae66f5">D09-1105</url>
       <bibkey>tromble-eisner-2009-learning</bibkey>
@@ -959,7 +959,7 @@
       <author><first>Hui</first><last>Zhang</last></author>
       <author><first>Min</first><last>Zhang</last></author>
       <author><first>Haizhou</first><last>Li</last></author>
-      <author><first>Chew Lim</first><last>Tan</last></author>
+      <author id="chew-lim-tan"><first>Chew Lim</first><last>Tan</last></author>
       <pages>1037–1045</pages>
       <url hash="ceec0edd">D09-1108</url>
       <bibkey>zhang-etal-2009-fast</bibkey>
@@ -968,7 +968,7 @@
       <title><fixed-case>G</fixed-case>azpacho and summer rash: lexical relationships from temporal patterns of web search queries</title>
       <author><first>Enrique</first><last>Alfonseca</last></author>
       <author><first>Massimiliano</first><last>Ciaramita</last></author>
-      <author><first>Keith</first><last>Hall</last></author>
+      <author id="keith-hall"><first>Keith</first><last>Hall</last></author>
       <pages>1046–1055</pages>
       <url hash="425b23e7">D09-1109</url>
       <bibkey>alfonseca-etal-2009-gazpacho</bibkey>
@@ -1027,7 +1027,7 @@
       <author id="yang-liu-ict"><first>Yang</first><last>Liu</last></author>
       <author><first>Haitao</first><last>Mi</last></author>
       <author><first>Qun</first><last>Liu</last></author>
-      <author><first>Yajuan</first><last>Lü</last></author>
+      <author id="yajuan-lu"><first>Yajuan</first><last>Lü</last></author>
       <pages>1105–1113</pages>
       <url hash="68017bec">D09-1115</url>
       <bibkey>feng-etal-2009-lattice</bibkey>
@@ -1035,7 +1035,7 @@
     <paper id="116">
       <title>A Joint Language Model With Fine-grain Syntactic Tags</title>
       <author><first>Denis</first><last>Filimonov</last></author>
-      <author><first>Mary</first><last>Harper</last></author>
+      <author id="mary-harper"><first>Mary</first><last>Harper</last></author>
       <pages>1114–1123</pages>
       <url hash="d4ec7a4d">D09-1116</url>
       <bibkey>filimonov-harper-2009-joint</bibkey>
@@ -1043,7 +1043,7 @@
     <paper id="117">
       <title>Bidirectional Phrase-based Statistical Machine Translation</title>
       <author><first>Andrew</first><last>Finch</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>1124–1132</pages>
       <url hash="a0d72b0c">D09-1117</url>
       <bibkey>finch-sumita-2009-bidirectional</bibkey>
@@ -1052,7 +1052,7 @@
       <title>Real-time decision detection in multi-party dialogue</title>
       <author><first>Matthew</first><last>Frampton</last></author>
       <author><first>Jia</first><last>Huang</last></author>
-      <author><first>Trung</first><last>Bui</last></author>
+      <author id="trung-bui"><first>Trung</first><last>Bui</last></author>
       <author><first>Stanley</first><last>Peters</last></author>
       <pages>1133–1141</pages>
       <url hash="4f54281f">D09-1118</url>
@@ -1078,7 +1078,7 @@
       <title>Descriptive and Empirical Approaches to Capturing Underlying Dependencies among Parsing Errors</title>
       <author><first>Tadayoshi</first><last>Hara</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>1162–1171</pages>
       <url hash="21afbcd3">D09-1121</url>
       <bibkey>hara-etal-2009-descriptive</bibkey>
@@ -1090,15 +1090,15 @@
       <author><first>Kow</first><last>Kuroda</last></author>
       <author><first>Stijn</first><last>De Saeger</last></author>
       <author><first>Masaki</first><last>Murata</last></author>
-      <author><first>Jun’ichi</first><last>Kazama</last></author>
+      <author id="junichi-kazama"><first>Jun’ichi</first><last>Kazama</last></author>
       <pages>1172–1181</pages>
       <url hash="f02164a5">D09-1122</url>
       <bibkey>hashimoto-etal-2009-large</bibkey>
     </paper>
     <paper id="123">
       <title>A Syntactified Direct Translation Model with Linear-time Decoding</title>
-      <author><first>Hany</first><last>Hassan</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="hany-hassan-awadalla"><first>Hany</first><last>Hassan</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <pages>1182–1191</pages>
       <url hash="37d544ff">D09-1123</url>
@@ -1123,7 +1123,7 @@
     <paper id="126">
       <title>Fully Lexicalising <fixed-case>CCG</fixed-case>bank with Hat Categories</title>
       <author><first>Matthew</first><last>Honnibal</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <pages>1212–1221</pages>
       <url hash="00a16f97">D09-1126</url>
       <bibkey>honnibal-curran-2009-fully</bibkey>
@@ -1149,8 +1149,8 @@
     </paper>
     <paper id="129">
       <title>Real-Word Spelling Correction using <fixed-case>G</fixed-case>oogle <fixed-case>W</fixed-case>eb 1<fixed-case>T</fixed-case> 3-grams</title>
-      <author><first>Aminul</first><last>Islam</last></author>
-      <author><first>Diana</first><last>Inkpen</last></author>
+      <author id="aminul-islam"><first>Aminul</first><last>Islam</last></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last></author>
       <pages>1241–1249</pages>
       <url hash="4276174f">D09-1129</url>
       <bibkey>islam-inkpen-2009-real</bibkey>
@@ -1158,8 +1158,8 @@
     <paper id="130">
       <title>Semi-supervised Speech Act Recognition in Emails and Forums</title>
       <author><first>Minwoo</first><last>Jeong</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
-      <author><first>Gary Geunbae</first><last>Lee</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="gary-geunbae-lee"><first>Gary Geunbae</first><last>Lee</last></author>
       <pages>1250–1259</pages>
       <url hash="1442a6a6">D09-1130</url>
       <bibkey>jeong-etal-2009-semi</bibkey>
@@ -1167,7 +1167,7 @@
     <paper id="131">
       <title>Using Morphological and Syntactic Structures for <fixed-case>C</fixed-case>hinese Opinion Analysis</title>
       <author><first>Lun-Wei</first><last>Ku</last></author>
-      <author><first>Ting-Hao</first><last>Huang</last></author>
+      <author id="ting-hao-huang"><first>Ting-Hao</first><last>Huang</last></author>
       <author><first>Hsin-Hsi</first><last>Chen</last></author>
       <pages>1260–1269</pages>
       <url hash="36927b2d">D09-1131</url>
@@ -1183,10 +1183,10 @@
     </paper>
     <paper id="133">
       <title>Improving Nominal <fixed-case>SRL</fixed-case> in <fixed-case>C</fixed-case>hinese Language with Verbal <fixed-case>SRL</fixed-case> Information and Automatic Predicate Recognition</title>
-      <author><first>Junhui</first><last>Li</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="junhui-li"><first>Junhui</first><last>Li</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <author><first>Hai</first><last>Zhao</last></author>
-      <author><first>Qiaoming</first><last>Zhu</last></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last></author>
       <author><first>Peide</first><last>Qian</last></author>
       <pages>1280–1288</pages>
       <url hash="9c8fcb51">D09-1133</url>
@@ -1201,7 +1201,7 @@
     </paper>
     <paper id="135">
       <title>Refining Grammars for Parsing with Hierarchical Semantic Knowledge</title>
-      <author><first>Xiaojun</first><last>Lin</last></author>
+      <author id="xiaojun-lin"><first>Xiaojun</first><last>Lin</last></author>
       <author><first>Yang</first><last>Fan</last></author>
       <author><first>Meng</first><last>Zhang</last></author>
       <author><first>Xihong</first><last>Wu</last></author>
@@ -1230,14 +1230,14 @@
     <paper id="138">
       <title>Supervised Learning of a Probabilistic Lexicon of Verb Semantic Classes</title>
       <author><first>Yusuke</first><last>Miyao</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>1328–1337</pages>
       <url hash="5d52a49a">D09-1138</url>
       <bibkey>miyao-tsujii-2009-supervised</bibkey>
     </paper>
     <paper id="139">
       <title>A Study on the Semantic Relatedness of Query and Document Terms in Information Retrieval</title>
-      <author><first>Christof</first><last>Müller</last></author>
+      <author id="christof-muller"><first>Christof</first><last>Müller</last></author>
       <author><first>Iryna</first><last>Gurevych</last></author>
       <pages>1338–1347</pages>
       <url hash="46f138ff">D09-1139</url>
@@ -1253,7 +1253,7 @@
     </paper>
     <paper id="141">
       <title>Improved Statistical Machine Translation for Resource-Poor Languages Using Related Resource-Rich Languages</title>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Hwee Tou</first><last>Ng</last></author>
       <pages>1358–1367</pages>
       <url hash="4916a0bc">D09-1141</url>
@@ -1261,7 +1261,7 @@
     </paper>
     <paper id="142">
       <title>What’s in a name? <fixed-case>I</fixed-case>n some languages, grammatical gender</title>
-      <author><first>Vivi</first><last>Nastase</last></author>
+      <author id="vivi-nastase"><first>Vivi</first><last>Nastase</last></author>
       <author><first>Marius</first><last>Popescu</last></author>
       <pages>1368–1377</pages>
       <url hash="3556c8ff">D09-1142</url>
@@ -1287,16 +1287,16 @@
     </paper>
     <paper id="145">
       <title>Detecting Speculations and their Scopes in Scientific Text</title>
-      <author><first>Arzucan</first><last>Özgür</last></author>
-      <author><first>Dragomir R.</first><last>Radev</last></author>
+      <author id="arzucan-ozgur"><first>Arzucan</first><last>Özgür</last></author>
+      <author id="dragomir-radev"><first>Dragomir R.</first><last>Radev</last></author>
       <pages>1398–1407</pages>
       <url hash="4f8de553">D09-1145</url>
       <bibkey>ozgur-radev-2009-detecting</bibkey>
     </paper>
     <paper id="146">
       <title>Cross-Cultural Analysis of Blogs and Forums with Mixed-Collection Topic Models</title>
-      <author><first>Michael</first><last>Paul</last></author>
-      <author><first>Roxana</first><last>Girju</last></author>
+      <author id="michael-paul"><first>Michael</first><last>Paul</last></author>
+      <author id="roxana-girju"><first>Roxana</first><last>Girju</last></author>
       <pages>1408–1417</pages>
       <url hash="d323b597">D09-1146</url>
       <bibkey>paul-girju-2009-cross</bibkey>
@@ -1313,17 +1313,17 @@
     <paper id="148">
       <title>Using Word-Sense Disambiguation Methods to Classify Web Queries by Intent</title>
       <author><first>Emily</first><last>Pitler</last></author>
-      <author><first>Ken</first><last>Church</last></author>
+      <author id="kenneth-church"><first>Ken</first><last>Church</last></author>
       <pages>1428–1436</pages>
       <url hash="7b9ced21">D09-1148</url>
       <bibkey>pitler-church-2009-using</bibkey>
     </paper>
     <paper id="149">
       <title>Semi-Supervised Learning for Semantic Relation Classification using Stratified Sampling Strategy</title>
-      <author><first>Longhua</first><last>Qian</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="longhua-qian"><first>Longhua</first><last>Qian</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <author><first>Fang</first><last>Kong</last></author>
-      <author><first>Qiaoming</first><last>Zhu</last></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last></author>
       <pages>1437–1445</pages>
       <url hash="6b0f2271">D09-1149</url>
       <bibkey>qian-etal-2009-semi</bibkey>
@@ -1354,7 +1354,7 @@
     </paper>
     <paper id="153">
       <title><fixed-case>C</fixed-case>hinese Semantic Role Labeling with Shallow Parsing</title>
-      <author><first>Weiwei</first><last>Sun</last></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last></author>
       <author><first>Zhifang</first><last>Sui</last></author>
       <author><first>Meng</first><last>Wang</last></author>
       <author><first>Xin</first><last>Wang</last></author>
@@ -1375,15 +1375,15 @@
       <title>Towards Domain-Independent Argumentative Zoning: Evidence from Chemistry and Computational Linguistics</title>
       <author><first>Simone</first><last>Teufel</last></author>
       <author><first>Advaith</first><last>Siddharthan</last></author>
-      <author><first>Colin</first><last>Batchelor</last></author>
+      <author id="colin-batchelor"><first>Colin</first><last>Batchelor</last></author>
       <pages>1493–1502</pages>
       <url hash="91970f40">D09-1155</url>
       <bibkey>teufel-etal-2009-towards</bibkey>
     </paper>
     <paper id="156">
       <title>Character-level Analysis of Semi-Structured Documents for Set Expansion</title>
-      <author><first>Richard C.</first><last>Wang</last></author>
-      <author><first>William W.</first><last>Cohen</last></author>
+      <author id="richard-c-wang"><first>Richard C.</first><last>Wang</last></author>
+      <author id="william-cohen"><first>William W.</first><last>Cohen</last></author>
       <pages>1503–1512</pages>
       <url hash="5b247dc1">D09-1156</url>
       <bibkey>wang-cohen-2009-character</bibkey>
@@ -1391,7 +1391,7 @@
     <paper id="157">
       <title>Classifying Relations for Biomedical Named Entity Disambiguation</title>
       <author><first>Xinglong</first><last>Wang</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
       <pages>1513–1522</pages>
       <url hash="37a3b3dd">D09-1157</url>
@@ -1411,8 +1411,8 @@
       <title>Phrase Dependency Parsing for Opinion Mining</title>
       <author><first>Yuanbin</first><last>Wu</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
-      <author><first>Lide</first><last>Wu</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
+      <author id="lide-wu"><first>Lide</first><last>Wu</last></author>
       <pages>1533–1541</pages>
       <url hash="4c940924">D09-1159</url>
       <bibkey>wu-etal-2009-phrase</bibkey>
@@ -1429,7 +1429,7 @@
       <title>K-Best Combination of Syntactic Parsers</title>
       <author><first>Hui</first><last>Zhang</last></author>
       <author><first>Min</first><last>Zhang</last></author>
-      <author><first>Chew Lim</first><last>Tan</last></author>
+      <author id="chew-lim-tan"><first>Chew Lim</first><last>Tan</last></author>
       <author><first>Haizhou</first><last>Li</last></author>
       <pages>1552–1560</pages>
       <url hash="ace15c9b">D09-1161</url>
diff --git a/data/xml/D10.xml b/data/xml/D10.xml
index 0dcb90b209..931312a1b4 100644
--- a/data/xml/D10.xml
+++ b/data/xml/D10.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the 2010 Conference on Empirical Methods in Natural Language Processing</booktitle>
       <url hash="26daf779">D10-1</url>
       <editor><first>Hang</first><last>Li</last></editor>
-      <editor><first>Lluís</first><last>Màrquez</last></editor>
+      <editor id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Cambridge, MA</address>
       <month>October</month>
@@ -18,9 +18,9 @@
     </frontmatter>
     <paper id="1">
       <title>On Dual Decomposition and Linear Programming Relaxations for Natural Language Processing</title>
-      <author><first>Alexander M.</first><last>Rush</last></author>
+      <author id="alexander-m-rush"><first>Alexander M.</first><last>Rush</last></author>
       <author><first>David</first><last>Sontag</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <author><first>Tommi</first><last>Jaakkola</last></author>
       <pages>1–11</pages>
       <url hash="667e3a39">D10-1001</url>
@@ -29,7 +29,7 @@
     <paper id="2">
       <title>Self-Training with Products of Latent Variable Grammars</title>
       <author><first>Zhongqiang</first><last>Huang</last></author>
-      <author><first>Mary</first><last>Harper</last></author>
+      <author id="mary-harper"><first>Mary</first><last>Harper</last></author>
       <author><first>Slav</first><last>Petrov</last></author>
       <pages>12–22</pages>
       <url hash="70461427">D10-1002</url>
@@ -37,7 +37,7 @@
     </paper>
     <paper id="3">
       <title>Utilizing Extra-Sentential Context for Parsing</title>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <author><first>Gerald</first><last>Penn</last></author>
       <pages>23–33</pages>
       <url hash="bc28da15">D10-1003</url>
@@ -45,9 +45,9 @@
     </paper>
     <paper id="4">
       <title>Turbo Parsers: Dependency Parsing by Approximate Variational Inference</title>
-      <author><first>André</first><last>Martins</last></author>
-      <author><first>Noah</first><last>Smith</last></author>
-      <author><first>Eric</first><last>Xing</last></author>
+      <author id="andre-f-t-martins"><first>André</first><last>Martins</last></author>
+      <author id="noah-a-smith"><first>Noah</first><last>Smith</last></author>
+      <author id="eric-xing"><first>Eric</first><last>Xing</last></author>
       <author><first>Pedro</first><last>Aguiar</last></author>
       <author><first>Mário</first><last>Figueiredo</last></author>
       <pages>34–44</pages>
@@ -64,7 +64,7 @@
     </paper>
     <paper id="6">
       <title>Jointly Modeling Aspects and Opinions with a <fixed-case>M</fixed-case>ax<fixed-case>E</fixed-case>nt-<fixed-case>LDA</fixed-case> Hybrid</title>
-      <author><first>Xin</first><last>Zhao</last></author>
+      <author id="wayne-xin-zhao"><first>Xin</first><last>Zhao</last></author>
       <author><first>Jing</first><last>Jiang</last></author>
       <author><first>Hongfei</first><last>Yan</last></author>
       <author><first>Xiaoming</first><last>Li</last></author>
@@ -74,9 +74,9 @@
     </paper>
     <paper id="7">
       <title>Summarizing Contrastive Viewpoints in Opinionated Text</title>
-      <author><first>Michael</first><last>Paul</last></author>
-      <author><first>ChengXiang</first><last>Zhai</last></author>
-      <author><first>Roxana</first><last>Girju</last></author>
+      <author id="michael-paul"><first>Michael</first><last>Paul</last></author>
+      <author id="chengxiang-zhai"><first>ChengXiang</first><last>Zhai</last></author>
+      <author id="roxana-girju"><first>Roxana</first><last>Girju</last></author>
       <pages>66–76</pages>
       <url hash="cdfdb3dc">D10-1007</url>
       <bibkey>paul-etal-2010-summarizing</bibkey>
@@ -84,8 +84,8 @@
     <paper id="8">
       <title>Automatically Producing Plot Unit Representations for Narrative Text</title>
       <author><first>Amit</first><last>Goyal</last></author>
-      <author><first>Ellen</first><last>Riloff</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <pages>77–86</pages>
       <url hash="065693a1">D10-1008</url>
       <bibkey>goyal-etal-2010-automatically</bibkey>
@@ -93,8 +93,8 @@
     <paper id="9">
       <title>Handling Noisy Queries in Cross Language <fixed-case>FAQ</fixed-case> Retrieval</title>
       <author><first>Danish</first><last>Contractor</last></author>
-      <author><first>Govind</first><last>Kothari</last></author>
-      <author><first>Tanveer</first><last>Faruquie</last></author>
+      <author id="govind-kothari"><first>Govind</first><last>Kothari</last></author>
+      <author id="tanveer-a-faruquie"><first>Tanveer</first><last>Faruquie</last></author>
       <author id="l-venkata-subramaniam"><first>L. V.</first><last>Subramaniam</last></author>
       <author><first>Sumit</first><last>Negi</last></author>
       <pages>87–96</pages>
@@ -103,7 +103,7 @@
     </paper>
     <paper id="10">
       <title>Learning the Relative Usefulness of Questions in Community <fixed-case>QA</fixed-case></title>
-      <author><first>Razvan</first><last>Bunescu</last></author>
+      <author id="razvan-bunescu"><first>Razvan</first><last>Bunescu</last></author>
       <author><first>Yunfeng</first><last>Huang</last></author>
       <pages>97–107</pages>
       <url hash="a3c2b9d6">D10-1010</url>
@@ -112,7 +112,7 @@
     <paper id="11">
       <title>Positional Language Models for Clinical Information Retrieval</title>
       <author><first>Florian</first><last>Boudin</last></author>
-      <author><first>Jian-Yun</first><last>Nie</last></author>
+      <author id="jian-yun-nie"><first>Jian-Yun</first><last>Nie</last></author>
       <author><first>Martin</first><last>Dawes</last></author>
       <pages>108–115</pages>
       <url hash="c8df20ef">D10-1011</url>
@@ -141,7 +141,7 @@
     <paper id="14">
       <title>Soft Syntactic Constraints for Hierarchical Phrase-Based Translation Using Latent Syntactic Distributions</title>
       <author><first>Zhongqiang</first><last>Huang</last></author>
-      <author><first>Martin</first><last>Čmejrek</last></author>
+      <author id="martin-cmejrek"><first>Martin</first><last>Čmejrek</last></author>
       <author><first>Bowen</first><last>Zhou</last></author>
       <pages>138–147</pages>
       <url hash="ffb5e6b5">D10-1014</url>
@@ -149,8 +149,8 @@
     </paper>
     <paper id="15">
       <title>A Hybrid Morpheme-Word Representation for Machine Translation of Morphologically Rich Languages</title>
-      <author><first>Minh-Thang</first><last>Luong</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="minh-thang-luong"><first>Minh-Thang</first><last>Luong</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Min-Yen</first><last>Kan</last></author>
       <pages>148–157</pages>
       <url hash="c5695fa4">D10-1015</url>
@@ -160,14 +160,14 @@
       <title>“Poetic” Statistical Machine Translation: Rhyme and Meter</title>
       <author><first>Dmitriy</first><last>Genzel</last></author>
       <author><first>Jakob</first><last>Uszkoreit</last></author>
-      <author><first>Franz</first><last>Och</last></author>
+      <author id="franz-josef-och"><first>Franz</first><last>Och</last></author>
       <pages>158–166</pages>
       <url hash="123b751b">D10-1016</url>
       <bibkey>genzel-etal-2010-poetic</bibkey>
     </paper>
     <paper id="17">
       <title>Efficient Graph-Based Semi-Supervised Learning of Structured Tagging Models</title>
-      <author><first>Amarnag</first><last>Subramanya</last></author>
+      <author id="amarnag-subramanya"><first>Amarnag</first><last>Subramanya</last></author>
       <author><first>Slav</first><last>Petrov</last></author>
       <author><first>Fernando</first><last>Pereira</last></author>
       <pages>167–176</pages>
@@ -187,8 +187,8 @@
       <author><first>Xian</first><last>Qian</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
       <author><first>Yaqian</first><last>Zhou</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
-      <author><first>Lide</first><last>Wu</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
+      <author id="lide-wu"><first>Lide</first><last>Wu</last></author>
       <pages>187–195</pages>
       <url hash="47b4e52d">D10-1019</url>
       <bibkey>qian-etal-2010-joint</bibkey>
@@ -214,7 +214,7 @@
       <title>Negative Training Data Can be Harmful to Text Classification</title>
       <author><first>Xiao-Li</first><last>Li</last></author>
       <author><first>Bing</first><last>Liu</last></author>
-      <author><first>See-Kiong</first><last>Ng</last></author>
+      <author id="see-kiong-ng"><first>See-Kiong</first><last>Ng</last></author>
       <pages>218–228</pages>
       <url hash="7f27c700">D10-1022</url>
       <bibkey>li-etal-2010-negative</bibkey>
@@ -232,16 +232,16 @@
       <title>Evaluating Models of Latent Document Semantics in the Presence of <fixed-case>OCR</fixed-case> Errors</title>
       <author><first>Daniel</first><last>Walker</last></author>
       <author><first>William B.</first><last>Lund</last></author>
-      <author><first>Eric K.</first><last>Ringger</last></author>
+      <author id="eric-ringger"><first>Eric K.</first><last>Ringger</last></author>
       <pages>240–250</pages>
       <url hash="ad27be56">D10-1024</url>
       <bibkey>walker-etal-2010-evaluating</bibkey>
     </paper>
     <paper id="25">
       <title>Translingual Document Representations from Discriminative Projections</title>
-      <author><first>John</first><last>Platt</last></author>
+      <author id="john-c-platt"><first>John</first><last>Platt</last></author>
       <author><first>Kristina</first><last>Toutanova</last></author>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <pages>251–261</pages>
       <url hash="0caed77d">D10-1025</url>
       <bibkey>platt-etal-2010-translingual</bibkey>
@@ -284,7 +284,7 @@
     <paper id="30">
       <title>Joint Inference for Bilingual Semantic Role Labeling</title>
       <author><first>Tao</first><last>Zhuang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>304–314</pages>
       <url hash="bcf21233">D10-1030</url>
       <bibkey>zhuang-zong-2010-joint</bibkey>
@@ -292,7 +292,7 @@
     <paper id="31">
       <title>Automatic Discovery of Manner Relations and its Applications</title>
       <author><first>Eduardo</first><last>Blanco</last></author>
-      <author><first>Dan</first><last>Moldovan</last></author>
+      <author id="dan-moldovan"><first>Dan</first><last>Moldovan</last></author>
       <pages>315–324</pages>
       <url hash="0b187903">D10-1031</url>
       <bibkey>blanco-moldovan-2010-automatic</bibkey>
@@ -307,9 +307,9 @@
     </paper>
     <paper id="33">
       <title>Improving Mention Detection Robustness to Noisy Input</title>
-      <author><first>Radu</first><last>Florian</last></author>
-      <author><first>John</first><last>Pitrelli</last></author>
-      <author><first>Salim</first><last>Roukos</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
+      <author id="john-f-pitrelli"><first>John</first><last>Pitrelli</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
       <author><first>Imed</first><last>Zitouni</last></author>
       <pages>335–345</pages>
       <url hash="74f0f68a">D10-1033</url>
@@ -317,8 +317,8 @@
     </paper>
     <paper id="34">
       <title>Clustering-Based Stratified Seed Sampling for Semi-Supervised Relation Classification</title>
-      <author><first>Longhua</first><last>Qian</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="longhua-qian"><first>Longhua</first><last>Qian</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>346–355</pages>
       <url hash="f0a8e5be">D10-1034</url>
       <bibkey>qian-zhou-2010-clustering</bibkey>
@@ -351,10 +351,10 @@
     </paper>
     <paper id="38">
       <title>Exploiting Conversation Structure in Unsupervised Topic Segmentation for Emails</title>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <author><first>Giuseppe</first><last>Carenini</last></author>
       <author><first>Gabriel</first><last>Murray</last></author>
-      <author><first>Raymond T.</first><last>Ng</last></author>
+      <author id="raymond-ng"><first>Raymond T.</first><last>Ng</last></author>
       <pages>388–398</pages>
       <url hash="1211500d">D10-1038</url>
       <bibkey>joty-etal-2010-exploiting</bibkey>
@@ -410,7 +410,7 @@
     <paper id="44">
       <title>Discriminative Instance Weighting for Domain Adaptation in Statistical Machine Translation</title>
       <author><first>George</first><last>Foster</last></author>
-      <author><first>Cyril</first><last>Goutte</last></author>
+      <author id="cyril-goutte"><first>Cyril</first><last>Goutte</last></author>
       <author><first>Roland</first><last>Kuhn</last></author>
       <pages>451–459</pages>
       <url hash="f65630f7">D10-1044</url>
@@ -421,7 +421,7 @@
       <author><first>Mark</first><last>Dredze</last></author>
       <author><first>Aren</first><last>Jansen</last></author>
       <author><first>Glen</first><last>Coppersmith</last></author>
-      <author><first>Ken</first><last>Church</last></author>
+      <author id="kenneth-church"><first>Ken</first><last>Church</last></author>
       <pages>460–470</pages>
       <url hash="377558a5">D10-1045</url>
       <bibkey>dredze-etal-2010-nlp</bibkey>
@@ -429,7 +429,7 @@
     <paper id="46">
       <title>Fusing Eye Gaze with Speech Recognition Hypotheses to Resolve Exophoric References in Situated Dialogue</title>
       <author><first>Zahar</first><last>Prasov</last></author>
-      <author><first>Joyce Y.</first><last>Chai</last></author>
+      <author id="joyce-chai"><first>Joyce Y.</first><last>Chai</last></author>
       <pages>471–481</pages>
       <url hash="5c3d8bda">D10-1046</url>
       <bibkey>prasov-chai-2010-fusing</bibkey>
@@ -437,8 +437,8 @@
     <paper id="47">
       <title>Multi-Document Summarization Using <fixed-case>A</fixed-case>* Search and Discriminative Learning</title>
       <author><first>Ahmet</first><last>Aker</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <pages>482–491</pages>
       <url hash="ff2b66a1">D10-1047</url>
       <bibkey>aker-etal-2010-multi</bibkey>
@@ -448,10 +448,10 @@
       <author><first>Karthik</first><last>Raghunathan</last></author>
       <author><first>Heeyoung</first><last>Lee</last></author>
       <author><first>Sudarshan</first><last>Rangarajan</last></author>
-      <author><first>Nathanael</first><last>Chambers</last></author>
+      <author id="nathanael-chambers"><first>Nathanael</first><last>Chambers</last></author>
       <author><first>Mihai</first><last>Surdeanu</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <pages>492–501</pages>
       <url hash="d076a87b">D10-1048</url>
       <bibkey>raghunathan-etal-2010-multi</bibkey>
@@ -486,7 +486,7 @@
     <paper id="52">
       <title>Discriminative Word Alignment with a Function Word Reordering Model</title>
       <author><first>Hendra</first><last>Setiawan</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <author><first>Philip</first><last>Resnik</last></author>
       <pages>534–544</pages>
       <url hash="049cb314">D10-1052</url>
@@ -494,7 +494,7 @@
     </paper>
     <paper id="53">
       <title>Hierarchical Phrase-Based Translation Grammars Extracted from Alignment Posterior Probabilities</title>
-      <author><first>Adrià</first><last>de Gispert</last></author>
+      <author id="adria-de-gispert"><first>Adrià</first><last>de Gispert</last></author>
       <author><first>Juan</first><last>Pino</last></author>
       <author id="bill-byrne"><first>William</first><last>Byrne</last></author>
       <pages>545–554</pages>
@@ -513,8 +513,8 @@
     <paper id="55">
       <title>Further Meta-Evaluation of Broad-Coverage Surface Realization</title>
       <author><first>Dominic</first><last>Espinosa</last></author>
-      <author><first>Rajakrishnan</first><last>Rajkumar</last></author>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="rajakrishnan-rajkumar"><first>Rajakrishnan</first><last>Rajkumar</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <author><first>Shoshana</first><last>Berleant</last></author>
       <pages>564–574</pages>
       <url hash="0a724785">D10-1055</url>
@@ -523,8 +523,8 @@
     <paper id="56">
       <title>Two Decades of Unsupervised <fixed-case>POS</fixed-case> Induction: How Far Have We Come?</title>
       <author><first>Christos</first><last>Christodoulopoulos</last></author>
-      <author><first>Sharon</first><last>Goldwater</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>575–584</pages>
       <url hash="efc56424">D10-1056</url>
       <bibkey>christodoulopoulos-etal-2010-two</bibkey>
@@ -533,7 +533,7 @@
       <title>We’re Not in <fixed-case>K</fixed-case>ansas Anymore: Detecting Domain Changes in Streams</title>
       <author><first>Mark</first><last>Dredze</last></author>
       <author><first>Tim</first><last>Oates</last></author>
-      <author><first>Christine</first><last>Piatko</last></author>
+      <author id="christine-piatko"><first>Christine</first><last>Piatko</last></author>
       <pages>585–595</pages>
       <url hash="eef58e4a">D10-1057</url>
       <bibkey>dredze-etal-2010-kansas</bibkey>
@@ -560,7 +560,7 @@
       <author><first>Bing</first><last>Zhang</last></author>
       <author><first>Spyros</first><last>Matsoukas</last></author>
       <author><first>Jinxi</first><last>Xu</last></author>
-      <author><first>Ralph</first><last>Weischedel</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
       <pages>616–625</pages>
       <url hash="576eb3ac">D10-1060</url>
       <bibkey>shen-etal-2010-statistical</bibkey>
@@ -569,8 +569,8 @@
       <title>Discriminative Sample Selection for Statistical Machine Translation</title>
       <author><first>Sankaranarayanan</first><last>Ananthakrishnan</last></author>
       <author><first>Rohit</first><last>Prasad</last></author>
-      <author><first>David</first><last>Stallard</last></author>
-      <author><first>Prem</first><last>Natarajan</last></author>
+      <author id="david-stallard"><first>David</first><last>Stallard</last></author>
+      <author id="prem-natarajan"><first>Prem</first><last>Natarajan</last></author>
       <pages>626–635</pages>
       <url hash="041859c4">D10-1061</url>
       <bibkey>ananthakrishnan-etal-2010-discriminative</bibkey>
@@ -601,7 +601,7 @@
     <paper id="65">
       <title>Combining Unsupervised and Supervised Alignments for <fixed-case>MT</fixed-case>: An Empirical Study</title>
       <author><first>Jinxi</first><last>Xu</last></author>
-      <author><first>Antti-Veikko</first><last>Rosti</last></author>
+      <author id="antti-veikko-rosti"><first>Antti-Veikko</first><last>Rosti</last></author>
       <pages>667–673</pages>
       <url hash="02349cd0">D10-1065</url>
       <bibkey>xu-rosti-2010-combining</bibkey>
@@ -624,7 +624,7 @@
     <paper id="68">
       <title>Unsupervised Parse Selection for <fixed-case>HPSG</fixed-case></title>
       <author><first>Rebecca</first><last>Dridan</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>694–704</pages>
       <url hash="e3617e1b">D10-1068</url>
       <bibkey>dridan-baldwin-2010-unsupervised</bibkey>
@@ -632,19 +632,19 @@
     <paper id="69">
       <title>Uptraining for Accurate Deterministic Question Parsing</title>
       <author><first>Slav</first><last>Petrov</last></author>
-      <author><first>Pi-Chuan</first><last>Chang</last></author>
+      <author id="pi-chuan-chang"><first>Pi-Chuan</first><last>Chang</last></author>
       <author><first>Michael</first><last>Ringgaard</last></author>
-      <author><first>Hiyan</first><last>Alshawi</last></author>
+      <author id="hiyan-alshawi"><first>Hiyan</first><last>Alshawi</last></author>
       <pages>705–713</pages>
       <url hash="929a9e3a">D10-1069</url>
       <bibkey>petrov-etal-2010-uptraining</bibkey>
     </paper>
     <paper id="70">
       <title>A Unified Framework for Scope Learning via Simplified Shallow Semantic Parsing</title>
-      <author><first>Qiaoming</first><last>Zhu</last></author>
-      <author><first>Junhui</first><last>Li</last></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last></author>
+      <author id="junhui-li"><first>Junhui</first><last>Li</last></author>
       <author><first>Hongling</first><last>Wang</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>714–724</pages>
       <url hash="9a5feb34">D10-1070</url>
       <bibkey>zhu-etal-2010-unified</bibkey>
@@ -652,19 +652,19 @@
     <paper id="71">
       <title>A New Approach to Lexical Disambiguation of <fixed-case>A</fixed-case>rabic Text</title>
       <author><first>Rushin</first><last>Shah</last></author>
-      <author><first>Paramveer S.</first><last>Dhillon</last></author>
-      <author><first>Mark</first><last>Liberman</last></author>
-      <author><first>Dean</first><last>Foster</last></author>
-      <author><first>Mohamed</first><last>Maamouri</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="paramveer-s-dhillon"><first>Paramveer S.</first><last>Dhillon</last></author>
+      <author id="mark-liberman"><first>Mark</first><last>Liberman</last></author>
+      <author id="dean-foster"><first>Dean</first><last>Foster</last></author>
+      <author id="mohamed-maamouri"><first>Mohamed</first><last>Maamouri</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <pages>725–735</pages>
       <url hash="d862f192">D10-1071</url>
       <bibkey>shah-etal-2010-new</bibkey>
     </paper>
     <paper id="72">
       <title>What a Parser Can Learn from a Semantic Role Labeler and Vice Versa</title>
-      <author><first>Stephen</first><last>Boxwell</last></author>
-      <author><first>Dennis</first><last>Mehay</last></author>
+      <author id="stephen-boxwell"><first>Stephen</first><last>Boxwell</last></author>
+      <author id="dennis-mehay"><first>Dennis</first><last>Mehay</last></author>
       <author><first>Chris</first><last>Brew</last></author>
       <pages>736–744</pages>
       <url hash="c37a8b3c">D10-1072</url>
@@ -672,7 +672,7 @@
     </paper>
     <paper id="73">
       <title>Word Sense Induction &amp; Disambiguation Using Hierarchical Random Graphs</title>
-      <author><first>Ioannis</first><last>Klapaftis</last></author>
+      <author id="ioannis-klapaftis"><first>Ioannis</first><last>Klapaftis</last></author>
       <author><first>Suresh</first><last>Manandhar</last></author>
       <pages>745–755</pages>
       <url hash="6e2f3170">D10-1073</url>
@@ -680,8 +680,8 @@
     </paper>
     <paper id="74">
       <title>Towards Conversation Entailment: An Empirical Investigation</title>
-      <author id="chen-zhang"><first>Chen</first><last>Zhang</last></author>
-      <author><first>Joyce</first><last>Chai</last></author>
+      <author><first>Chen</first><last>Zhang</last></author>
+      <author id="joyce-chai"><first>Joyce</first><last>Chai</last></author>
       <pages>756–766</pages>
       <url hash="98d6e621">D10-1074</url>
       <bibkey>zhang-chai-2010-towards</bibkey>
@@ -697,7 +697,7 @@
     </paper>
     <paper id="76">
       <title>Training Continuous Space Language Models: Some Practical Issues</title>
-      <author><first>Hai Son</first><last>Le</last></author>
+      <author id="hai-son-le"><first>Hai Son</first><last>Le</last></author>
       <author><first>Alexandre</first><last>Allauzen</last></author>
       <author><first>Guillaume</first><last>Wisniewski</last></author>
       <author><first>François</first><last>Yvon</last></author>
@@ -707,7 +707,7 @@
     </paper>
     <paper id="77">
       <title>Enhancing Domain Portability of <fixed-case>C</fixed-case>hinese Segmentation Model Using Chi-Square Statistics and Bootstrapping</title>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <author><first>Dongxu</first><last>Han</last></author>
       <pages>789–798</pages>
       <url hash="765db39f">D10-1077</url>
@@ -730,7 +730,7 @@
       <author><first>Kristian</first><last>Heal</last></author>
       <author><first>Deryle</first><last>Lonsdale</last></author>
       <author><first>Kevin</first><last>Seppi</last></author>
-      <author><first>Eric</first><last>Ringger</last></author>
+      <author id="eric-ringger"><first>Eric</first><last>Ringger</last></author>
       <pages>810–820</pages>
       <url hash="496aaf75">D10-1079</url>
       <bibkey>mcclanahan-etal-2010-probabilistic</bibkey>
@@ -739,7 +739,7 @@
       <title>Lessons Learned in Part-of-Speech Tagging of Conversational Speech</title>
       <author><first>Vladimir</first><last>Eidelman</last></author>
       <author><first>Zhongqiang</first><last>Huang</last></author>
-      <author><first>Mary</first><last>Harper</last></author>
+      <author id="mary-harper"><first>Mary</first><last>Harper</last></author>
       <pages>821–831</pages>
       <url hash="961b8fdb">D10-1080</url>
       <bibkey>eidelman-etal-2010-lessons</bibkey>
@@ -748,7 +748,7 @@
       <title>An Efficient Algorithm for Unsupervised Word Segmentation with Branching Entropy and <fixed-case>MDL</fixed-case></title>
       <author><first>Valentin</first><last>Zhikov</last></author>
       <author><first>Hiroya</first><last>Takamura</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>832–842</pages>
       <url hash="35269341">D10-1081</url>
       <bibkey>zhikov-etal-2010-efficient</bibkey>
@@ -763,7 +763,7 @@
     </paper>
     <paper id="83">
       <title>Simple Type-Level Unsupervised <fixed-case>POS</fixed-case> Tagging</title>
-      <author><first>Yoong Keok</first><last>Lee</last></author>
+      <author id="yoong-keok-lee"><first>Yoong Keok</first><last>Lee</last></author>
       <author><first>Aria</first><last>Haghighi</last></author>
       <author><first>Regina</first><last>Barzilay</last></author>
       <pages>853–861</pages>
@@ -774,7 +774,7 @@
       <title>Classifying Dialogue Acts in One-on-One Live Chats</title>
       <author><first>Su Nam</first><last>Kim</last></author>
       <author><first>Lawrence</first><last>Cavedon</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>862–871</pages>
       <url hash="ae42a896">D10-1084</url>
       <bibkey>kim-etal-2010-classifying</bibkey>
@@ -783,7 +783,7 @@
       <title>Resolving Event Noun Phrases to Their Verbal Mentions</title>
       <author><first>Bin</first><last>Chen</last></author>
       <author><first>Jian</first><last>Su</last></author>
-      <author><first>Chew Lim</first><last>Tan</last></author>
+      <author id="chew-lim-tan"><first>Chew Lim</first><last>Tan</last></author>
       <pages>872–881</pages>
       <url hash="291776ba">D10-1085</url>
       <bibkey>chen-etal-2010-resolving</bibkey>
@@ -791,7 +791,7 @@
     <paper id="86">
       <title>A Tree Kernel-Based Unified Framework for <fixed-case>C</fixed-case>hinese Zero Anaphora Resolution</title>
       <author><first>Fang</first><last>Kong</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>882–891</pages>
       <url hash="3c055eb2">D10-1086</url>
       <bibkey>kong-zhou-2010-tree</bibkey>
@@ -808,14 +808,14 @@
     <paper id="88">
       <title>Using Unknown Word Techniques to Learn Known Words</title>
       <author><first>Kostadin</first><last>Cholakov</last></author>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <pages>902–912</pages>
       <url hash="1731a8fe">D10-1088</url>
       <bibkey>cholakov-van-noord-2010-using</bibkey>
     </paper>
     <paper id="89">
       <title><fixed-case>W</fixed-case>iki<fixed-case>W</fixed-case>ars: A New Corpus for Research on Temporal Expressions</title>
-      <author><first>Pawel</first><last>Mazur</last></author>
+      <author id="pawel-mazur"><first>Pawel</first><last>Mazur</last></author>
       <author><first>Robert</first><last>Dale</last></author>
       <pages>913–922</pages>
       <url hash="92a856c2">D10-1089</url>
@@ -854,7 +854,7 @@
       <title>An Approach of Generating Personalized Views from Normalized Electronic Dictionaries : A Practical Experiment on <fixed-case>A</fixed-case>rabic Language</title>
       <author><first>Aida</first><last>Khemakhem</last></author>
       <author><first>Bilel</first><last>Gargouri</last></author>
-      <author><first>Abdelmajid</first><last>Ben Hamadou</last></author>
+      <author id="abdelmajid-ben-hamadou"><first>Abdelmajid</first><last>Ben Hamadou</last></author>
       <pages>953–960</pages>
       <url hash="fe574c7b">D10-1093</url>
       <bibkey>khemakhem-etal-2010-approach</bibkey>
@@ -896,7 +896,7 @@
       <author><first>Laura</first><last>Chiticariu</last></author>
       <author><first>Rajasekar</first><last>Krishnamurthy</last></author>
       <author><first>Yunyao</first><last>Li</last></author>
-      <author><first>Frederick</first><last>Reiss</last></author>
+      <author id="frederick-reiss"><first>Frederick</first><last>Reiss</last></author>
       <author><first>Shivakumar</first><last>Vaithyanathan</last></author>
       <pages>1002–1012</pages>
       <url hash="d5ad1315">D10-1098</url>
@@ -914,7 +914,7 @@
     <paper id="100">
       <title>Automatic Detection and Classification of Social Events</title>
       <author><first>Apoorv</first><last>Agarwal</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>1024–1034</pages>
       <url hash="8883f01b">D10-1100</url>
       <bibkey>agarwal-rambow-2010-automatic</bibkey>
@@ -931,7 +931,7 @@
       <title>Multi-Level Structured Models for Document-Level Sentiment Classification</title>
       <author><first>Ainur</first><last>Yessenalina</last></author>
       <author><first>Yisong</first><last>Yue</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>1046–1056</pages>
       <url hash="513fb16f">D10-1102</url>
       <bibkey>yessenalina-etal-2010-multi</bibkey>
@@ -939,7 +939,7 @@
     <paper id="103">
       <title>Cross Language Text Classification by Model Translation and Semi-Supervised Learning</title>
       <author><first>Lei</first><last>Shi</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <author><first>Mingjun</first><last>Tian</last></author>
       <pages>1057–1067</pages>
       <url hash="f78baca2">D10-1103</url>
@@ -970,7 +970,7 @@
       <author><first>Stefan</first><last>Schoenmackers</last></author>
       <author><first>Jesse</first><last>Davis</last></author>
       <author><first>Oren</first><last>Etzioni</last></author>
-      <author><first>Daniel</first><last>Weld</last></author>
+      <author id="daniel-s-weld"><first>Daniel</first><last>Weld</last></author>
       <pages>1088–1098</pages>
       <url hash="af94fd65">D10-1106</url>
       <bibkey>schoenmackers-etal-2010-learning</bibkey>
@@ -986,7 +986,7 @@
     <paper id="108">
       <title>A Semi-Supervised Method to Learn and Construct Taxonomies Using the Web</title>
       <author><first>Zornitsa</first><last>Kozareva</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>1110–1118</pages>
       <url hash="96ec7e5e">D10-1108</url>
       <bibkey>kozareva-hovy-2010-semi</bibkey>
@@ -1016,7 +1016,7 @@
     <paper id="111">
       <title>Staying Informed: Supervised and Semi-Supervised Multi-View Topical Analysis of Ideological Perspective</title>
       <author><first>Amr</first><last>Ahmed</last></author>
-      <author><first>Eric</first><last>Xing</last></author>
+      <author id="eric-xing"><first>Eric</first><last>Xing</last></author>
       <pages>1140–1150</pages>
       <url hash="37735b81">D10-1111</url>
       <bibkey>ahmed-xing-2010-staying</bibkey>
@@ -1024,14 +1024,14 @@
     <paper id="112">
       <title>Word-Based Dialect Identification with Georeferenced Rules</title>
       <author><first>Yves</first><last>Scherrer</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>1151–1161</pages>
       <url hash="c98c9ae6">D10-1112</url>
       <bibkey>scherrer-rambow-2010-word</bibkey>
     </paper>
     <paper id="113">
       <title>Measuring Distributional Similarity in Context</title>
-      <author><first>Georgiana</first><last>Dinu</last></author>
+      <author id="georgiana-dinu"><first>Georgiana</first><last>Dinu</last></author>
       <author><first>Mirella</first><last>Lapata</last></author>
       <pages>1162–1172</pages>
       <url hash="df556dcf">D10-1113</url>
@@ -1040,14 +1040,14 @@
     <paper id="114">
       <title>A Mixture Model with Sharing for Lexical Semantics</title>
       <author><first>Joseph</first><last>Reisinger</last></author>
-      <author><first>Raymond</first><last>Mooney</last></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last></author>
       <pages>1173–1182</pages>
       <url hash="3cdab0fb">D10-1114</url>
       <bibkey>reisinger-mooney-2010-mixture</bibkey>
     </paper>
     <paper id="115">
       <title>Nouns are Vectors, Adjectives are Matrices: Representing Adjective-Noun Constructions in Semantic Space</title>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <author><first>Roberto</first><last>Zamparelli</last></author>
       <pages>1183–1193</pages>
       <url hash="33554719">D10-1115</url>
@@ -1055,7 +1055,7 @@
     </paper>
     <paper id="116">
       <title>Practical Linguistic Steganography Using Contextual Synonym Substitution and Vertex Colour Coding</title>
-      <author><first>Ching-Yun</first><last>Chang</last></author>
+      <author id="ching-yun-chang"><first>Ching-Yun</first><last>Chang</last></author>
       <author><first>Stephen</first><last>Clark</last></author>
       <pages>1194–1203</pages>
       <url hash="0b6bf292">D10-1116</url>
@@ -1063,8 +1063,8 @@
     </paper>
     <paper id="117">
       <title>Unsupervised Induction of Tree Substitution Grammars for Dependency Parsing</title>
-      <author><first>Phil</first><last>Blunsom</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>1204–1213</pages>
       <url hash="3b9d49b3">D10-1117</url>
       <bibkey>blunsom-cohn-2010-unsupervised</bibkey>
@@ -1079,9 +1079,9 @@
     <paper id="119">
       <title>Inducing Probabilistic <fixed-case>CCG</fixed-case> Grammars from Logical Form with Higher-Order Unification</title>
       <author><first>Tom</first><last>Kwiatkowksi</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
-      <author><first>Sharon</first><last>Goldwater</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>1223–1233</pages>
       <url hash="2a58285b">D10-1119</url>
       <bibkey>kwiatkowksi-etal-2010-inducing</bibkey>
@@ -1098,16 +1098,16 @@
     </paper>
     <paper id="121">
       <title>What’s with the Attitude? Identifying Sentences with Attitude in Online Discussions</title>
-      <author><first>Ahmed</first><last>Hassan</last></author>
+      <author id="ahmed-hassan"><first>Ahmed</first><last>Hassan</last></author>
       <author><first>Vahed</first><last>Qazvinian</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <pages>1245–1255</pages>
       <url hash="4472c34f">D10-1121</url>
       <bibkey>hassan-etal-2010-whats</bibkey>
     </paper>
     <paper id="122">
       <title>Hashing-Based Approaches to Spelling Correction of Personal Names</title>
-      <author><first>Raghavendra</first><last>Udupa</last></author>
+      <author id="raghavendra-udupa"><first>Raghavendra</first><last>Udupa</last></author>
       <author><first>Shaishav</first><last>Kumar</last></author>
       <pages>1256–1265</pages>
       <url hash="10d23f42">D10-1122</url>
@@ -1126,8 +1126,8 @@
       <title>A Latent Variable Model for Geographic Lexical Variation</title>
       <author><first>Jacob</first><last>Eisenstein</last></author>
       <author><first>Brendan</first><last>O’Connor</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
-      <author><first>Eric P.</first><last>Xing</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
+      <author id="eric-xing"><first>Eric P.</first><last>Xing</last></author>
       <pages>1277–1287</pages>
       <url hash="727b69b0">D10-1124</url>
       <bibkey>eisenstein-etal-2010-latent</bibkey>
@@ -1135,8 +1135,8 @@
     <paper id="125">
       <title>Dual Decomposition for Parsing with Non-Projective Head Automata</title>
       <author><first>Terry</first><last>Koo</last></author>
-      <author><first>Alexander M.</first><last>Rush</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="alexander-m-rush"><first>Alexander M.</first><last>Rush</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <author><first>Tommi</first><last>Jaakkola</last></author>
       <author><first>David</first><last>Sontag</last></author>
       <pages>1288–1298</pages>
diff --git a/data/xml/D11.xml b/data/xml/D11.xml
index ed927e2356..7340a80683 100644
--- a/data/xml/D11.xml
+++ b/data/xml/D11.xml
@@ -30,7 +30,7 @@
       <author><first>Marco</first><last>Lui</last></author>
       <author><first>Su Nam</first><last>Kim</last></author>
       <author><first>Joakim</first><last>Nivre</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>13–25</pages>
       <url hash="df3033d6">D11-1002</url>
       <bibkey>wang-etal-2011-predicting</bibkey>
@@ -38,7 +38,7 @@
     <paper id="3">
       <title>Exact Decoding of Phrase-Based Translation Models through <fixed-case>L</fixed-case>agrangian Relaxation</title>
       <author><first>Yin-Wen</first><last>Chang</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <pages>26–37</pages>
       <url hash="f3d46548">D11-1003</url>
       <attachment hash="6d88fd81">D11-1003.Attachment.pdf</attachment>
@@ -54,9 +54,9 @@
     </paper>
     <paper id="5">
       <title>Unsupervised Structure Prediction with Non-Parallel Multilingual Guidance</title>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <author><first>Dipanjan</first><last>Das</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>50–61</pages>
       <url hash="a1b4c2b5">D11-1005</url>
       <bibkey>cohen-etal-2011-unsupervised</bibkey>
@@ -65,7 +65,7 @@
       <title>Multi-Source Transfer of Delexicalized Dependency Parsers</title>
       <author><first>Ryan</first><last>McDonald</last></author>
       <author><first>Slav</first><last>Petrov</last></author>
-      <author><first>Keith</first><last>Hall</last></author>
+      <author id="keith-hall"><first>Keith</first><last>Hall</last></author>
       <pages>62–72</pages>
       <url hash="96e841d8">D11-1006</url>
       <bibkey>mcdonald-etal-2011-multi</bibkey>
@@ -73,7 +73,7 @@
     <paper id="7">
       <title><fixed-case>SMT</fixed-case> Helps Bitext Dependency Parsing</title>
       <author><first>Wenliang</first><last>Chen</last></author>
-      <author><first>Jun’ichi</first><last>Kazama</last></author>
+      <author id="junichi-kazama"><first>Jun’ichi</first><last>Kazama</last></author>
       <author><first>Min</first><last>Zhang</last></author>
       <author><first>Yoshimasa</first><last>Tsuruoka</last></author>
       <author><first>Yujie</first><last>Zhang</last></author>
@@ -133,7 +133,7 @@
       <author><first>Zheng-Jun</first><last>Zha</last></author>
       <author><first>Meng</first><last>Wang</last></author>
       <author><first>Kai</first><last>Wang</last></author>
-      <author><first>Tat-Seng</first><last>Chua</last></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last></author>
       <pages>140–150</pages>
       <url hash="0f823177">D11-1013</url>
       <attachment hash="fe2a2cc8">D11-1013.Attachment.zip</attachment>
@@ -143,9 +143,9 @@
       <title>Semi-Supervised Recursive Autoencoders for Predicting Sentiment Distributions</title>
       <author><first>Richard</first><last>Socher</last></author>
       <author><first>Jeffrey</first><last>Pennington</last></author>
-      <author><first>Eric H.</first><last>Huang</last></author>
-      <author><first>Andrew Y.</first><last>Ng</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="eric-h-huang"><first>Eric H.</first><last>Huang</last></author>
+      <author id="andrew-y-ng"><first>Andrew Y.</first><last>Ng</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>151–161</pages>
       <url hash="f7745003">D11-1014</url>
       <attachment hash="644120c1">D11-1014.Attachment.pdf</attachment>
@@ -157,7 +157,7 @@
       <author><first>Binyang</first><last>Li</last></author>
       <author><first>Wei</first><last>Gao</last></author>
       <author><first>Zhongyu</first><last>Wei</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <pages>162–171</pages>
       <url hash="62ee553e">D11-1015</url>
       <bibkey>zhou-etal-2011-unsupervised</bibkey>
@@ -165,17 +165,17 @@
     <paper id="16">
       <title>Compositional Matrix-Space Models for Sentiment Analysis</title>
       <author><first>Ainur</first><last>Yessenalina</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>172–182</pages>
       <url hash="a0c14e85">D11-1016</url>
       <bibkey>yessenalina-cardie-2011-compositional</bibkey>
     </paper>
     <paper id="17">
       <title>Training a Parser for Machine Translation Reordering</title>
-      <author><first>Jason</first><last>Katz-Brown</last></author>
+      <author id="jason-katz-brown"><first>Jason</first><last>Katz-Brown</last></author>
       <author><first>Slav</first><last>Petrov</last></author>
       <author><first>Ryan</first><last>McDonald</last></author>
-      <author><first>Franz</first><last>Och</last></author>
+      <author id="franz-josef-och"><first>Franz</first><last>Och</last></author>
       <author><first>David</first><last>Talbot</last></author>
       <author><first>Hiroshi</first><last>Ichikawa</last></author>
       <author><first>Masakazu</first><last>Seno</last></author>
@@ -196,7 +196,7 @@
       <title>Augmenting String-to-Tree Translation Models with Fuzzy Use of Source-side Syntax</title>
       <author><first>Jiajun</first><last>Zhang</last></author>
       <author><first>Feifei</first><last>Zhai</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>204–215</pages>
       <url hash="70b80541">D11-1019</url>
       <bibkey>zhang-etal-2011-augmenting</bibkey>
@@ -213,7 +213,7 @@
     <paper id="21">
       <title><fixed-case>B</fixed-case>ayesian Checking for Topic Models</title>
       <author><first>David</first><last>Mimno</last></author>
-      <author><first>David</first><last>Blei</last></author>
+      <author id="david-blei"><first>David</first><last>Blei</last></author>
       <pages>227–237</pages>
       <url hash="1f6bc491">D11-1021</url>
       <attachment hash="46ba2e1a">D11-1021.Attachment.zip</attachment>
@@ -221,8 +221,8 @@
     </paper>
     <paper id="22">
       <title>Dual Decomposition with Many Overlapping Components</title>
-      <author><first>André</first><last>Martins</last></author>
-      <author><first>Noah</first><last>Smith</last></author>
+      <author id="andre-f-t-martins"><first>André</first><last>Martins</last></author>
+      <author id="noah-a-smith"><first>Noah</first><last>Smith</last></author>
       <author><first>Mário</first><last>Figueiredo</last></author>
       <author><first>Pedro</first><last>Aguiar</last></author>
       <pages>238–249</pages>
@@ -233,7 +233,7 @@
     <paper id="23">
       <title>Approximate Scalable Bounded Space Sketch for Large Data <fixed-case>NLP</fixed-case></title>
       <author><first>Amit</first><last>Goyal</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <pages>250–261</pages>
       <url hash="e784a790">D11-1023</url>
       <bibkey>goyal-daume-iii-2011-approximate</bibkey>
@@ -241,7 +241,7 @@
     <paper id="24">
       <title>Optimizing Semantic Coherence in Topic Models</title>
       <author><first>David</first><last>Mimno</last></author>
-      <author><first>Hanna</first><last>Wallach</last></author>
+      <author id="hanna-wallach"><first>Hanna</first><last>Wallach</last></author>
       <author><first>Edmund</first><last>Talley</last></author>
       <author><first>Miriam</first><last>Leenders</last></author>
       <author><first>Andrew</first><last>McCallum</last></author>
@@ -261,7 +261,7 @@
     <paper id="26">
       <title>Linear Text Segmentation Using Affinity Propagation</title>
       <author><first>Anna</first><last>Kazantseva</last></author>
-      <author><first>Stan</first><last>Szpakowicz</last></author>
+      <author id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></author>
       <pages>284–293</pages>
       <url hash="01073651">D11-1026</url>
       <attachment hash="689ff05b">D11-1026.Attachment.gz</attachment>
@@ -297,7 +297,7 @@
     <paper id="30">
       <title>Universal Morphological Analysis using Structured Nearest Neighbor Prediction</title>
       <author><first>Young-Bum</first><last>Kim</last></author>
-      <author><first>João</first><last>Graça</last></author>
+      <author id="joao-graca"><first>João</first><last>Graça</last></author>
       <author><first>Benjamin</first><last>Snyder</last></author>
       <pages>322–332</pages>
       <url hash="b9abe658">D11-1030</url>
@@ -358,8 +358,8 @@
     </paper>
     <paper id="37">
       <title>Parser Evaluation over Local and Non-Local Deep Dependencies in a Large Corpus</title>
-      <author><first>Emily M.</first><last>Bender</last></author>
-      <author><first>Dan</first><last>Flickinger</last></author>
+      <author id="emily-m-bender"><first>Emily M.</first><last>Bender</last></author>
+      <author id="dan-flickinger"><first>Dan</first><last>Flickinger</last></author>
       <author><first>Stephan</first><last>Oepen</last></author>
       <author><first>Yi</first><last>Zhang</last></author>
       <pages>397–408</pages>
@@ -378,7 +378,7 @@
     <paper id="39">
       <title>Bootstrapping Semantic Parsers from Conversations</title>
       <author><first>Yoav</first><last>Artzi</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>421–432</pages>
       <url hash="0243437a">D11-1039</url>
       <bibkey>artzi-zettlemoyer-2011-bootstrapping</bibkey>
@@ -399,7 +399,7 @@
       <title>Corpus-Guided Sentence Generation of Natural Images</title>
       <author><first>Yezhou</first><last>Yang</last></author>
       <author><first>Ching</first><last>Teo</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <author><first>Yiannis</first><last>Aloimonos</last></author>
       <pages>444–454</pages>
       <url hash="71c15535">D11-1041</url>
@@ -407,9 +407,9 @@
     </paper>
     <paper id="42">
       <title>Corroborating Text Evaluation Results with Heterogeneous Measures</title>
-      <author><first>Enrique</first><last>Amigó</last></author>
+      <author id="enrique-amigo"><first>Enrique</first><last>Amigó</last></author>
       <author><first>Julio</first><last>Gonzalo</last></author>
-      <author><first>Jesús</first><last>Giménez</last></author>
+      <author id="jesus-gimenez"><first>Jesús</first><last>Giménez</last></author>
       <author><first>Felisa</first><last>Verdejo</last></author>
       <pages>455–466</pages>
       <url hash="a60b7080">D11-1042</url>
@@ -417,10 +417,10 @@
     </paper>
     <paper id="43">
       <title>Ranking Human and Machine Summarization Systems</title>
-      <author><first>Peter</first><last>Rankel</last></author>
-      <author><first>John</first><last>Conroy</last></author>
+      <author id="peter-a-rankel"><first>Peter</first><last>Rankel</last></author>
+      <author id="john-conroy"><first>John</first><last>Conroy</last></author>
       <author><first>Eric</first><last>Slud</last></author>
-      <author><first>Dianne</first><last>O’Leary</last></author>
+      <author id="dianne-p-oleary"><first>Dianne</first><last>O’Leary</last></author>
       <pages>467–473</pages>
       <url hash="5555c342">D11-1043</url>
       <bibkey>rankel-etal-2011-ranking</bibkey>
@@ -428,7 +428,7 @@
     <paper id="44">
       <title>Quasi-Synchronous Phrase Dependency Grammars for Machine Translation</title>
       <author><first>Kevin</first><last>Gimpel</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>474–485</pages>
       <url hash="988052ff">D11-1044</url>
       <bibkey>gimpel-smith-2011-quasi</bibkey>
@@ -436,10 +436,10 @@
     <paper id="45">
       <title>A Word Reordering Model for Improved Machine Translation</title>
       <author><first>Karthik</first><last>Visweswariah</last></author>
-      <author><first>Rajakrishnan</first><last>Rajkumar</last></author>
+      <author id="rajakrishnan-rajkumar"><first>Rajakrishnan</first><last>Rajkumar</last></author>
       <author><first>Ankur</first><last>Gandhe</last></author>
       <author><first>Ananthakrishnan</first><last>Ramanathan</last></author>
-      <author><first>Jiri</first><last>Navratil</last></author>
+      <author id="jiri-navratil"><first>Jiri</first><last>Navratil</last></author>
       <pages>486–496</pages>
       <url hash="39806143">D11-1045</url>
       <bibkey>visweswariah-etal-2011-word</bibkey>
@@ -456,7 +456,7 @@
     </paper>
     <paper id="47">
       <title>Efficient retrieval of tree translation examples for Syntax-Based Machine Translation</title>
-      <author><first>Fabien</first><last>Cromieres</last></author>
+      <author id="fabien-cromieres"><first>Fabien</first><last>Cromieres</last></author>
       <author><first>Sadao</first><last>Kurohashi</last></author>
       <pages>508–518</pages>
       <url hash="1fea9670">D11-1047</url>
@@ -465,7 +465,7 @@
     <paper id="48">
       <title>A generative model for unsupervised discovery of relations and argument classes from clinical texts</title>
       <author><first>Bryan</first><last>Rink</last></author>
-      <author><first>Sanda</first><last>Harabagiu</last></author>
+      <author id="sanda-harabagiu"><first>Sanda</first><last>Harabagiu</last></author>
       <pages>519–528</pages>
       <url hash="43d81e7a">D11-1048</url>
       <bibkey>rink-harabagiu-2011-generative</bibkey>
@@ -473,15 +473,15 @@
     <paper id="49">
       <title>Random Walk Inference and Learning in A Large Scale Knowledge Base</title>
       <author><first>Ni</first><last>Lao</last></author>
-      <author><first>Tom</first><last>Mitchell</last></author>
-      <author><first>William W.</first><last>Cohen</last></author>
+      <author id="tom-mitchell"><first>Tom</first><last>Mitchell</last></author>
+      <author id="william-cohen"><first>William W.</first><last>Cohen</last></author>
       <pages>529–539</pages>
       <url hash="3c41d3b3">D11-1049</url>
       <bibkey>lao-etal-2011-random</bibkey>
     </paper>
     <paper id="50">
       <title>Exploring Supervised <fixed-case>LDA</fixed-case> Models for Assigning Attributes to Adjective-Noun Phrases</title>
-      <author><first>Matthias</first><last>Hartung</last></author>
+      <author id="matthias-hartung"><first>Matthias</first><last>Hartung</last></author>
       <author><first>Anette</first><last>Frank</last></author>
       <pages>540–551</pages>
       <url hash="cd28d1f9">D11-1050</url>
@@ -491,7 +491,7 @@
     <paper id="51">
       <title>Semantic Topic Models: Combining Word Distributional Statistics and Dictionary Definitions</title>
       <author><first>Weiwei</first><last>Guo</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>552–561</pages>
       <url hash="7c2d1653">D11-1051</url>
       <bibkey>guo-diab-2011-semantic</bibkey>
@@ -517,7 +517,7 @@
       <title>Data-Driven Response Generation in Social Media</title>
       <author><first>Alan</first><last>Ritter</last></author>
       <author><first>Colin</first><last>Cherry</last></author>
-      <author><first>William B.</first><last>Dolan</last></author>
+      <author id="william-b-dolan"><first>William B.</first><last>Dolan</last></author>
       <pages>583–593</pages>
       <url hash="c099d773">D11-1054</url>
       <attachment hash="d1cc1e5e">D11-1054.Attachment.zip</attachment>
@@ -528,9 +528,9 @@
       <author><first>Dani</first><last>Yogatama</last></author>
       <author><first>Michael</first><last>Heilman</last></author>
       <author><first>Brendan</first><last>O’Connor</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Bryan R.</first><last>Routledge</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="bryan-r-routledge"><first>Bryan R.</first><last>Routledge</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>594–604</pages>
       <url hash="f13063eb">D11-1055</url>
       <bibkey>yogatama-etal-2011-predicting</bibkey>
@@ -546,7 +546,7 @@
     <paper id="57">
       <title>Discovering Morphological Paradigms from Plain Text Using a <fixed-case>D</fixed-case>irichlet Process Mixture Model</title>
       <author><first>Markus</first><last>Dreyer</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>616–627</pages>
       <url hash="0dc33ac6">D11-1057</url>
       <attachment hash="693e67b3">D11-1057.Attachment.zip</attachment>
@@ -555,7 +555,7 @@
     <paper id="58">
       <title>Multilayer Sequence Labeling</title>
       <author><first>Ai</first><last>Azuma</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>628–637</pages>
       <url hash="a8b750f5">D11-1058</url>
       <bibkey>azuma-matsumoto-2011-multilayer</bibkey>
@@ -563,8 +563,8 @@
     <paper id="59">
       <title>A <fixed-case>B</fixed-case>ayesian Mixture Model for <fixed-case>P</fixed-case>o<fixed-case>S</fixed-case> Induction Using Multiple Features</title>
       <author><first>Christos</first><last>Christodoulopoulos</last></author>
-      <author><first>Sharon</first><last>Goldwater</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>638–647</pages>
       <url hash="576326ac">D11-1059</url>
       <bibkey>christodoulopoulos-etal-2011-bayesian</bibkey>
@@ -572,14 +572,14 @@
     <paper id="60">
       <title>Large-Scale Noun Compound Interpretation Using Bootstrapping and the Web as a Corpus</title>
       <author><first>Su Nam</first><last>Kim</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>648–658</pages>
       <url hash="7c9d90ac">D11-1060</url>
       <bibkey>kim-nakov-2011-large</bibkey>
     </paper>
     <paper id="61">
       <title>Linguistic Redundancy in <fixed-case>T</fixed-case>witter</title>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last></author>
       <author><first>Marco</first><last>Pennacchiotti</last></author>
       <author><first>Kostas</first><last>Tsioutsiouliklis</last></author>
       <pages>659–669</pages>
@@ -588,7 +588,7 @@
     </paper>
     <paper id="62">
       <title>Divide and Conquer: Crowdsourcing the Creation of Cross-Lingual Textual Entailment Corpora</title>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Luisa</first><last>Bentivogli</last></author>
       <author><first>Yashar</first><last>Mehdad</last></author>
       <author><first>Danilo</first><last>Giampiccolo</last></author>
@@ -599,7 +599,7 @@
     </paper>
     <paper id="63">
       <title>Literal and Metaphorical Sense Identification through Concrete and Abstract Context</title>
-      <author><first>Peter</first><last>Turney</last></author>
+      <author id="peter-turney"><first>Peter</first><last>Turney</last></author>
       <author><first>Yair</first><last>Neuman</last></author>
       <author><first>Dan</first><last>Assaf</last></author>
       <author><first>Yohai</first><last>Cohen</last></author>
@@ -610,7 +610,7 @@
     <paper id="64">
       <title>Syntactic Decision Tree <fixed-case>LM</fixed-case>s: Random Selection or Intelligent Design?</title>
       <author><first>Denis</first><last>Filimonov</last></author>
-      <author><first>Mary</first><last>Harper</last></author>
+      <author id="mary-harper"><first>Mary</first><last>Harper</last></author>
       <pages>691–699</pages>
       <url hash="c74e9a1f">D11-1064</url>
       <bibkey>filimonov-harper-2011-syntactic</bibkey>
@@ -626,7 +626,7 @@
     <paper id="66">
       <title>Using Syntactic and Semantic Structural Kernels for Classifying Definition Questions in Jeopardy!</title>
       <author><first>Alessandro</first><last>Moschitti</last></author>
-      <author><first>Jennifer</first><last>Chu-Carroll</last></author>
+      <author id="jennifer-chu-carroll"><first>Jennifer</first><last>Chu-Carroll</last></author>
       <author><first>Siddharth</first><last>Patwardhan</last></author>
       <author><first>James</first><last>Fan</last></author>
       <author><first>Giuseppe</first><last>Riccardi</last></author>
@@ -637,16 +637,16 @@
     <paper id="67">
       <title>Multiword Expression Identification with Tree Substitution Grammars: A Parsing tour de force with <fixed-case>F</fixed-case>rench</title>
       <author><first>Spence</first><last>Green</last></author>
-      <author><first>Marie-Catherine</first><last>de Marneffe</last></author>
+      <author id="marie-catherine-de-marneffe"><first>Marie-Catherine</first><last>de Marneffe</last></author>
       <author><first>John</first><last>Bauer</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>725–735</pages>
       <url hash="c0ed19a2">D11-1067</url>
       <bibkey>green-etal-2011-multiword</bibkey>
     </paper>
     <paper id="68">
       <title>Modelling Discourse Relations for <fixed-case>A</fixed-case>rabic</title>
-      <author><first>Amal</first><last>Al-Saif</last></author>
+      <author id="amal-al-saif"><first>Amal</first><last>Al-Saif</last></author>
       <author><first>Katja</first><last>Markert</last></author>
       <pages>736–747</pages>
       <url hash="83555821">D11-1068</url>
@@ -655,14 +655,14 @@
     <paper id="69">
       <title>Classifying Sentences as Speech Acts in Message Board Posts</title>
       <author><first>Ashequl</first><last>Qadir</last></author>
-      <author><first>Ellen</first><last>Riloff</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
       <pages>748–758</pages>
       <url hash="c01edcb0">D11-1069</url>
       <bibkey>qadir-riloff-2011-classifying</bibkey>
     </paper>
     <paper id="70">
       <title>Learning Local Content Shift Detectors from Document-level Information</title>
-      <author><first>Richárd</first><last>Farkas</last></author>
+      <author id="richard-farkas"><first>Richárd</first><last>Farkas</last></author>
       <pages>759–770</pages>
       <url hash="61a59b2d">D11-1070</url>
       <attachment hash="b4e3416b">D11-1070.Attachment.zip</attachment>
@@ -697,7 +697,7 @@
       <author><first>Alok</first><last>Kothari</last></author>
       <author><first>Martin</first><last>Forst</last></author>
       <author><first>Christina</first><last>Lioma</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>793–803</pages>
       <url hash="1a45a990">D11-1073</url>
       <bibkey>michelbacher-etal-2011-cascaded</bibkey>
@@ -714,7 +714,7 @@
       <title>Unsupervised Information Extraction with Distributional Prior Knowledge</title>
       <author><first>Cane Wing-ki</first><last>Leung</last></author>
       <author><first>Jing</first><last>Jiang</last></author>
-      <author><first>Kian Ming A.</first><last>Chai</last></author>
+      <author id="kian-ming-a-chai"><first>Kian Ming A.</first><last>Chai</last></author>
       <author><first>Hai Leong</first><last>Chieu</last></author>
       <author><first>Loo-Nin</first><last>Teow</last></author>
       <pages>814–824</pages>
@@ -726,11 +726,11 @@
       <author><first>Stijn</first><last>De Saeger</last></author>
       <author><first>Kentaro</first><last>Torisawa</last></author>
       <author><first>Masaaki</first><last>Tsuchida</last></author>
-      <author><first>Jun’ichi</first><last>Kazama</last></author>
+      <author id="junichi-kazama"><first>Jun’ichi</first><last>Kazama</last></author>
       <author><first>Chikara</first><last>Hashimoto</last></author>
       <author><first>Ichiro</first><last>Yamada</last></author>
-      <author><first>Jong Hoon</first><last>Oh</last></author>
-      <author><first>Istvan</first><last>Varga</last></author>
+      <author id="jong-hoon-oh"><first>Jong Hoon</first><last>Oh</last></author>
+      <author id="istvan-varga"><first>Istvan</first><last>Varga</last></author>
       <author><first>Yulan</first><last>Yan</last></author>
       <pages>825–835</pages>
       <url hash="a8c7f38c">D11-1076</url>
@@ -746,7 +746,7 @@
     </paper>
     <paper id="78">
       <title>Analyzing Methods for Improving Precision of Pivot Based Bilingual Dictionaries</title>
-      <author><first>Xabier</first><last>Saralegi</last></author>
+      <author id="xabier-saralegi"><first>Xabier</first><last>Saralegi</last></author>
       <author><first>Iker</first><last>Manterola</last></author>
       <author><first>Iñaki</first><last>San Vicente</last></author>
       <pages>846–856</pages>
@@ -781,11 +781,11 @@
     </paper>
     <paper id="82">
       <title>A Correction Model for Word Alignments</title>
-      <author><first>J. Scott</first><last>McCarley</last></author>
+      <author id="j-scott-mccarley"><first>J. Scott</first><last>McCarley</last></author>
       <author><first>Abraham</first><last>Ittycheriah</last></author>
-      <author><first>Salim</first><last>Roukos</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
       <author><first>Bing</first><last>Xiang</last></author>
-      <author><first>Jian-ming</first><last>Xu</last></author>
+      <author id="jian-ming-xu"><first>Jian-ming</first><last>Xu</last></author>
       <pages>889–898</pages>
       <url hash="ffa3d02b">D11-1082</url>
       <bibkey>mccarley-etal-2011-correction</bibkey>
@@ -793,16 +793,16 @@
     <paper id="83">
       <title>Heuristic Search for Non-Bottom-Up Tree Structure Prediction</title>
       <author><first>Andrea</first><last>Gesmundo</last></author>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <pages>899–908</pages>
       <url hash="cd0341e3">D11-1083</url>
       <bibkey>gesmundo-henderson-2011-heuristic</bibkey>
     </paper>
     <paper id="84">
       <title>Cache-based Document-level Statistical Machine Translation</title>
-      <author><first>Zhengxian</first><last>Gong</last></author>
+      <author id="zhengxian-gong"><first>Zhengxian</first><last>Gong</last></author>
       <author><first>Min</first><last>Zhang</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>909–919</pages>
       <url hash="92c8837b">D11-1084</url>
       <bibkey>gong-etal-2011-cache</bibkey>
@@ -811,8 +811,8 @@
       <title>Minimum Imputed-Risk: Unsupervised Discriminative Training for Machine Translation</title>
       <author><first>Zhifei</first><last>Li</last></author>
       <author><first>Ziyuan</first><last>Wang</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
-      <author><first>Sanjeev</first><last>Khudanpur</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
+      <author id="sanjeev-khudanpur"><first>Sanjeev</first><last>Khudanpur</last></author>
       <author><first>Brian</first><last>Roark</last></author>
       <pages>920–929</pages>
       <url hash="688de4be">D11-1085</url>
@@ -821,8 +821,8 @@
     <paper id="86">
       <title>Improving Bilingual Projections via Sparse Covariance Matrices</title>
       <author><first>Jagadeesh</first><last>Jagarlamudi</last></author>
-      <author><first>Raghavendra</first><last>Udupa</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="raghavendra-udupa"><first>Raghavendra</first><last>Udupa</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <author><first>Abhijit</first><last>Bhole</last></author>
       <pages>930–940</pages>
       <url hash="24021f70">D11-1086</url>
@@ -854,7 +854,7 @@
     </paper>
     <paper id="90">
       <title>Enhancing <fixed-case>C</fixed-case>hinese Word Segmentation Using Unlabeled Data</title>
-      <author><first>Weiwei</first><last>Sun</last></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last></author>
       <author><first>Jia</first><last>Xu</last></author>
       <pages>970–979</pages>
       <url hash="2ff972c9">D11-1090</url>
@@ -863,7 +863,7 @@
     <paper id="91">
       <title>Unsupervised Learning of Selectional Restrictions and Detection of Argument Coercions</title>
       <author><first>Kirk</first><last>Roberts</last></author>
-      <author><first>Sanda</first><last>Harabagiu</last></author>
+      <author id="sanda-harabagiu"><first>Sanda</first><last>Harabagiu</last></author>
       <pages>980–990</pages>
       <url hash="ae987a75">D11-1091</url>
       <bibkey>roberts-harabagiu-2011-unsupervised</bibkey>
@@ -872,7 +872,7 @@
       <title>Harnessing different knowledge sources to measure semantic relatedness under a uniform model</title>
       <author><first>Ziqi</first><last>Zhang</last></author>
       <author><first>Anna Lisa</first><last>Gentile</last></author>
-      <author><first>Fabio</first><last>Ciravegna</last></author>
+      <author id="fabio-ciravegna"><first>Fabio</first><last>Ciravegna</last></author>
       <pages>991–1002</pages>
       <url hash="48717438">D11-1092</url>
       <bibkey>zhang-etal-2011-harnessing</bibkey>
@@ -887,7 +887,7 @@
     </paper>
     <paper id="94">
       <title>Latent Vector Weighting for Word Meaning in Context</title>
-      <author><first>Tim</first><last>Van de Cruys</last></author>
+      <author id="tim-van-de-cruys"><first>Tim</first><last>Van de Cruys</last></author>
       <author><first>Thierry</first><last>Poibeau</last></author>
       <author><first>Anna</first><last>Korhonen</last></author>
       <pages>1012–1022</pages>
@@ -907,7 +907,7 @@
       <title>Structured Lexical Similarity via Convolution Kernels on Dependency Trees</title>
       <author><first>Danilo</first><last>Croce</last></author>
       <author><first>Alessandro</first><last>Moschitti</last></author>
-      <author><first>Roberto</first><last>Basili</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
       <pages>1034–1046</pages>
       <url hash="a619699e">D11-1096</url>
       <bibkey>croce-etal-2011-structured</bibkey>
@@ -923,7 +923,7 @@
     <paper id="98">
       <title>Lexical Co-occurrence, Statistical Significance, and Word Association</title>
       <author><first>Dipak L.</first><last>Chaudhari</last></author>
-      <author><first>Om P.</first><last>Damani</last></author>
+      <author id="om-p-damani"><first>Om P.</first><last>Damani</last></author>
       <author><first>Srivatsan</first><last>Laxman</last></author>
       <pages>1058–1068</pages>
       <url hash="6204f9d4">D11-1098</url>
@@ -939,9 +939,9 @@
     </paper>
     <paper id="100">
       <title>Harnessing <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Senses for Supervised Sentiment Classification</title>
-      <author><first>Balamurali</first><last>AR</last></author>
+      <author id="balamurali-ar"><first>Balamurali</first><last>AR</last></author>
       <author><first>Aditya</first><last>Joshi</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>1081–1091</pages>
       <url hash="edb5cbed">D11-1100</url>
       <bibkey>ar-etal-2011-harnessing</bibkey>
@@ -958,7 +958,7 @@
     <paper id="102">
       <title>Hypotheses Selection Criteria in a Reranking Framework for Spoken Language Understanding</title>
       <author><first>Marco</first><last>Dinarelli</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <pages>1104–1115</pages>
       <url hash="55746888">D11-1102</url>
       <bibkey>dinarelli-rosset-2011-hypotheses</bibkey>
@@ -966,8 +966,8 @@
     <paper id="103">
       <title>A Fast Re-scoring Strategy to Capture Long-Distance Dependencies</title>
       <author><first>Anoop</first><last>Deoras</last></author>
-      <author><first>Tomáš</first><last>Mikolov</last></author>
-      <author><first>Kenneth</first><last>Church</last></author>
+      <author id="tomas-mikolov"><first>Tomáš</first><last>Mikolov</last></author>
+      <author id="kenneth-church"><first>Kenneth</first><last>Church</last></author>
       <pages>1116–1127</pages>
       <url hash="3967dda6">D11-1103</url>
       <bibkey>deoras-etal-2011-fast</bibkey>
@@ -976,7 +976,7 @@
       <title>Efficient Subsampling for Training Complex Language Models</title>
       <author><first>Puyang</first><last>Xu</last></author>
       <author><first>Asela</first><last>Gunawardana</last></author>
-      <author><first>Sanjeev</first><last>Khudanpur</last></author>
+      <author id="sanjeev-khudanpur"><first>Sanjeev</first><last>Khudanpur</last></author>
       <pages>1128–1136</pages>
       <url hash="cd5342ca">D11-1104</url>
       <bibkey>xu-etal-2011-efficient</bibkey>
@@ -1034,7 +1034,7 @@
       <title>Relaxed Cross-lingual Projection of Constituent Syntax</title>
       <author><first>Wenbin</first><last>Jiang</last></author>
       <author><first>Qun</first><last>Liu</last></author>
-      <author><first>Yajuan</first><last>Lv</last></author>
+      <author id="yajuan-lu"><first>Yajuan</first><last>Lv</last></author>
       <pages>1192–1201</pages>
       <url hash="57c92409">D11-1110</url>
       <bibkey>jiang-etal-2011-relaxed</bibkey>
@@ -1042,7 +1042,7 @@
     <paper id="111">
       <title>Computing Logical Form on Regulatory Texts</title>
       <author><first>Nikhil</first><last>Dinesh</last></author>
-      <author><first>Aravind</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
       <author><first>Insup</first><last>Lee</last></author>
       <pages>1202–1212</pages>
       <url hash="d4c8967c">D11-1111</url>
@@ -1058,15 +1058,15 @@
     </paper>
     <paper id="113">
       <title>Parse Correction with Specialized Models for Difficult Attachment Types</title>
-      <author><first>Enrique</first><last>Henestroza Anguiano</last></author>
-      <author><first>Marie</first><last>Candito</last></author>
+      <author id="enrique-henestroza-anguiano"><first>Enrique</first><last>Henestroza Anguiano</last></author>
+      <author id="marie-candito"><first>Marie</first><last>Candito</last></author>
       <pages>1222–1233</pages>
       <url hash="1518e465">D11-1113</url>
       <bibkey>henestroza-anguiano-candito-2011-parse</bibkey>
     </paper>
     <paper id="114">
       <title>Exact Inference for Generative Probabilistic Non-Projective Dependency Parsing</title>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <author><first>Carlos</first><last>Gómez-Rodríguez</last></author>
       <author><first>Giorgio</first><last>Satta</last></author>
       <pages>1234–1245</pages>
@@ -1076,7 +1076,7 @@
     <paper id="115">
       <title>Semi-supervised <fixed-case>CCG</fixed-case> Lexicon Extension</title>
       <author><first>Emily</first><last>Thomforde</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>1246–1256</pages>
       <url hash="fcf7fcc3">D11-1115</url>
       <bibkey>thomforde-steedman-2011-semi</bibkey>
@@ -1084,7 +1084,7 @@
     <paper id="116">
       <title>A Fast, Accurate, Non-Projective, Semantically-Enriched Parser</title>
       <author><first>Stephen</first><last>Tratz</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>1257–1268</pages>
       <url hash="e77dad2e">D11-1116</url>
       <attachment hash="84525784">D11-1116.Attachment.zip</attachment>
@@ -1092,19 +1092,19 @@
     </paper>
     <paper id="117">
       <title>Lateen <fixed-case>EM</fixed-case>: Unsupervised Training with Multiple Objectives, Applied to Dependency Grammar Induction</title>
-      <author><first>Valentin I.</first><last>Spitkovsky</last></author>
-      <author><first>Hiyan</first><last>Alshawi</last></author>
-      <author><first>Daniel</first><last>Jurafsky</last></author>
+      <author id="valentin-i-spitkovsky"><first>Valentin I.</first><last>Spitkovsky</last></author>
+      <author id="hiyan-alshawi"><first>Hiyan</first><last>Alshawi</last></author>
+      <author id="dan-jurafsky"><first>Daniel</first><last>Jurafsky</last></author>
       <pages>1269–1280</pages>
       <url hash="21868915">D11-1117</url>
       <bibkey>spitkovsky-etal-2011-lateen</bibkey>
     </paper>
     <paper id="118">
       <title>Unsupervised Dependency Parsing without Gold Part-of-Speech Tags</title>
-      <author><first>Valentin I.</first><last>Spitkovsky</last></author>
-      <author><first>Hiyan</first><last>Alshawi</last></author>
-      <author><first>Angel X.</first><last>Chang</last></author>
-      <author><first>Daniel</first><last>Jurafsky</last></author>
+      <author id="valentin-i-spitkovsky"><first>Valentin I.</first><last>Spitkovsky</last></author>
+      <author id="hiyan-alshawi"><first>Hiyan</first><last>Alshawi</last></author>
+      <author id="angel-chang"><first>Angel X.</first><last>Chang</last></author>
+      <author id="dan-jurafsky"><first>Daniel</first><last>Jurafsky</last></author>
       <pages>1281–1290</pages>
       <url hash="063347eb">D11-1118</url>
       <bibkey>spitkovsky-etal-2011-unsupervised</bibkey>
@@ -1112,9 +1112,9 @@
     <paper id="119">
       <title>Exploiting Syntactic and Distributional Information for Spelling Correction with Web-Scale N-gram Models</title>
       <author><first>Wei</first><last>Xu</last></author>
-      <author><first>Joel</first><last>Tetreault</last></author>
-      <author><first>Martin</first><last>Chodorow</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
+      <author id="martin-chodorow"><first>Martin</first><last>Chodorow</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <author><first>Le</first><last>Zhao</last></author>
       <pages>1291–1300</pages>
       <url hash="a9a88fa7">D11-1119</url>
@@ -1122,8 +1122,8 @@
     </paper>
     <paper id="120">
       <title>Discriminating Gender on <fixed-case>T</fixed-case>witter</title>
-      <author><first>John D.</first><last>Burger</last></author>
-      <author><first>John</first><last>Henderson</last></author>
+      <author id="john-d-burger"><first>John D.</first><last>Burger</last></author>
+      <author id="john-henderson"><first>John</first><last>Henderson</last></author>
       <author><first>George</first><last>Kim</last></author>
       <author><first>Guido</first><last>Zarrella</last></author>
       <pages>1301–1309</pages>
@@ -1154,8 +1154,8 @@
       <title>Structural Opinion Mining for Graph-based Sentiment Representation</title>
       <author><first>Yuanbin</first><last>Wu</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
-      <author><first>Lide</first><last>Wu</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
+      <author id="lide-wu"><first>Lide</first><last>Wu</last></author>
       <pages>1332–1341</pages>
       <url hash="75b15b79">D11-1123</url>
       <bibkey>wu-etal-2011-structural</bibkey>
@@ -1163,7 +1163,7 @@
     <paper id="124">
       <title>Summarize What You Are Interested In: An Optimization Framework for Interactive Personalized Summarization</title>
       <author><first>Rui</first><last>Yan</last></author>
-      <author><first>Jian-Yun</first><last>Nie</last></author>
+      <author id="jian-yun-nie"><first>Jian-Yun</first><last>Nie</last></author>
       <author><first>Xiaoming</first><last>Li</last></author>
       <pages>1342–1351</pages>
       <url hash="f07097e8">D11-1124</url>
@@ -1182,7 +1182,7 @@
       <author><first>Ashish</first><last>Venugopal</last></author>
       <author><first>Jakob</first><last>Uszkoreit</last></author>
       <author><first>David</first><last>Talbot</last></author>
-      <author><first>Franz</first><last>Och</last></author>
+      <author id="franz-josef-och"><first>Franz</first><last>Och</last></author>
       <author><first>Juri</first><last>Ganitkevitch</last></author>
       <pages>1363–1372</pages>
       <url hash="a0b140e9">D11-1126</url>
@@ -1193,8 +1193,8 @@
       <author><first>Gonzalo</first><last>Iglesias</last></author>
       <author><first>Cyril</first><last>Allauzen</last></author>
       <author id="bill-byrne"><first>William</first><last>Byrne</last></author>
-      <author><first>Adrià</first><last>de Gispert</last></author>
-      <author><first>Michael</first><last>Riley</last></author>
+      <author id="adria-de-gispert"><first>Adrià</first><last>de Gispert</last></author>
+      <author id="michael-riley"><first>Michael</first><last>Riley</last></author>
       <pages>1373–1383</pages>
       <url hash="46e7a949">D11-1127</url>
       <bibkey>iglesias-etal-2011-hierarchical</bibkey>
@@ -1213,8 +1213,8 @@
     </paper>
     <paper id="129">
       <title>Experimental Support for a Categorical Compositional Distributional Model of Meaning</title>
-      <author><first>Edward</first><last>Grefenstette</last></author>
-      <author><first>Mehrnoosh</first><last>Sadrzadeh</last></author>
+      <author id="edward-grefenstette"><first>Edward</first><last>Grefenstette</last></author>
+      <author id="mehrnoosh-sadrzadeh"><first>Mehrnoosh</first><last>Sadrzadeh</last></author>
       <pages>1394–1404</pages>
       <url hash="cbf2e84b">D11-1129</url>
       <attachment hash="5111127c">D11-1129.Attachment.zip</attachment>
@@ -1223,15 +1223,15 @@
     <paper id="130">
       <title>Cross-Cutting Models of Lexical Semantics</title>
       <author><first>Joseph</first><last>Reisinger</last></author>
-      <author><first>Raymond</first><last>Mooney</last></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last></author>
       <pages>1405–1415</pages>
       <url hash="33695b46">D11-1130</url>
       <bibkey>reisinger-mooney-2011-cross</bibkey>
     </paper>
     <paper id="131">
       <title>Reducing Grounded Learning Tasks To Grammatical Inference</title>
-      <author><first>Benjamin</first><last>Börschinger</last></author>
-      <author><first>Bevan K.</first><last>Jones</last></author>
+      <author id="benjamin-borschinger"><first>Benjamin</first><last>Börschinger</last></author>
+      <author id="bevan-jones"><first>Bevan K.</first><last>Jones</last></author>
       <author><first>Mark</first><last>Johnson</last></author>
       <pages>1416–1425</pages>
       <url hash="f35afa75">D11-1131</url>
@@ -1251,12 +1251,12 @@
     <paper id="133">
       <title>Extreme Extraction – Machine Reading in a Week</title>
       <author><first>Marjorie</first><last>Freedman</last></author>
-      <author><first>Lance</first><last>Ramshaw</last></author>
+      <author id="lance-ramshaw"><first>Lance</first><last>Ramshaw</last></author>
       <author><first>Elizabeth</first><last>Boschee</last></author>
       <author><first>Ryan</first><last>Gabbard</last></author>
       <author><first>Gary</first><last>Kratkiewicz</last></author>
       <author><first>Nicolas</first><last>Ward</last></author>
-      <author><first>Ralph</first><last>Weischedel</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
       <pages>1437–1446</pages>
       <url hash="c2c5c74b">D11-1133</url>
       <bibkey>freedman-etal-2011-extreme</bibkey>
@@ -1265,7 +1265,7 @@
       <title>Discovering Relations between Noun Categories</title>
       <author><first>Thahir</first><last>Mohamed</last></author>
       <author><first>Estevam</first><last>Hruschka</last></author>
-      <author><first>Tom</first><last>Mitchell</last></author>
+      <author id="tom-mitchell"><first>Tom</first><last>Mitchell</last></author>
       <pages>1447–1455</pages>
       <url hash="b0eaef5a">D11-1134</url>
       <bibkey>mohamed-etal-2011-discovering</bibkey>
@@ -1292,16 +1292,16 @@
       <author><first>Katsuhiko</first><last>Hayashi</last></author>
       <author><first>Taro</first><last>Watanabe</last></author>
       <author><first>Masayuki</first><last>Asahara</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>1479–1488</pages>
       <url hash="a50c4971">D11-1137</url>
       <bibkey>hayashi-etal-2011-third</bibkey>
     </paper>
     <paper id="138">
       <title>Training dependency parsers by jointly optimizing multiple objectives</title>
-      <author><first>Keith</first><last>Hall</last></author>
+      <author id="keith-hall"><first>Keith</first><last>Hall</last></author>
       <author><first>Ryan</first><last>McDonald</last></author>
-      <author><first>Jason</first><last>Katz-Brown</last></author>
+      <author id="jason-katz-brown"><first>Jason</first><last>Katz-Brown</last></author>
       <author><first>Michael</first><last>Ringgaard</last></author>
       <pages>1489–1499</pages>
       <url hash="5351400f">D11-1138</url>
@@ -1309,8 +1309,8 @@
     </paper>
     <paper id="139">
       <title>Structured Sparsity in Structured Prediction</title>
-      <author><first>André</first><last>Martins</last></author>
-      <author><first>Noah</first><last>Smith</last></author>
+      <author id="andre-f-t-martins"><first>André</first><last>Martins</last></author>
+      <author id="noah-a-smith"><first>Noah</first><last>Smith</last></author>
       <author><first>Mário</first><last>Figueiredo</last></author>
       <author><first>Pedro</first><last>Aguiar</last></author>
       <pages>1500–1511</pages>
@@ -1320,9 +1320,9 @@
     <paper id="140">
       <title>Lexical Generalization in <fixed-case>CCG</fixed-case> Grammar Induction for Semantic Parsing</title>
       <author><first>Tom</first><last>Kwiatkowski</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
-      <author><first>Sharon</first><last>Goldwater</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>1512–1523</pages>
       <url hash="135eb6bb">D11-1140</url>
       <bibkey>kwiatkowski-etal-2011-lexical</bibkey>
@@ -1340,7 +1340,7 @@
     <paper id="142">
       <title>Identifying Relations for Open Information Extraction</title>
       <author><first>Anthony</first><last>Fader</last></author>
-      <author><first>Stephen</first><last>Soderland</last></author>
+      <author id="stephen-soderland"><first>Stephen</first><last>Soderland</last></author>
       <author><first>Oren</first><last>Etzioni</last></author>
       <pages>1535–1545</pages>
       <url hash="41cd11e8">D11-1142</url>
@@ -1350,7 +1350,7 @@
       <title>Active Learning with <fixed-case>A</fixed-case>mazon <fixed-case>M</fixed-case>echanical <fixed-case>T</fixed-case>urk</title>
       <author><first>Florian</first><last>Laws</last></author>
       <author><first>Christian</first><last>Scheible</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>1546–1556</pages>
       <url hash="aaedb0a5">D11-1143</url>
       <bibkey>laws-etal-2011-active</bibkey>
@@ -1385,7 +1385,7 @@
       <title>Rumor has it: Identifying Misinformation in Microblogs</title>
       <author><first>Vahed</first><last>Qazvinian</last></author>
       <author><first>Emily</first><last>Rosengren</last></author>
-      <author><first>Dragomir R.</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir R.</first><last>Radev</last></author>
       <author><first>Qiaozhu</first><last>Mei</last></author>
       <pages>1589–1599</pages>
       <url hash="1f29a2d4">D11-1147</url>
diff --git a/data/xml/D12.xml b/data/xml/D12.xml
index bf7f45b808..e52c44e8b4 100644
--- a/data/xml/D12.xml
+++ b/data/xml/D12.xml
@@ -4,9 +4,9 @@
     <meta>
       <booktitle>Proceedings of the 2012 Joint Conference on Empirical Methods in Natural Language Processing and Computational Natural Language Learning</booktitle>
       <url hash="48b1fda1">D12-1</url>
-      <editor><first>Jun’ichi</first><last>Tsujii</last></editor>
-      <editor><first>James</first><last>Henderson</last></editor>
-      <editor><first>Marius</first><last>Paşca</last></editor>
+      <editor id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></editor>
+      <editor id="james-henderson"><first>James</first><last>Henderson</last></editor>
+      <editor id="marius-pasca"><first>Marius</first><last>Paşca</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Jeju Island, Korea</address>
       <month>July</month>
@@ -29,7 +29,7 @@
     <paper id="2">
       <title>Regularized Interlingual Projections: Evaluation on Multilingual Transliteration</title>
       <author><first>Jagadeesh</first><last>Jagarlamudi</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <pages>12–23</pages>
       <url hash="997111b8">D12-1002</url>
       <bibkey>jagarlamudi-daume-iii-2012-regularized</bibkey>
@@ -38,7 +38,7 @@
       <title>Bilingual Lexicon Extraction from Comparable Corpora Using Label Propagation</title>
       <author><first>Akihiro</first><last>Tamura</last></author>
       <author><first>Taro</first><last>Watanabe</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>24–36</pages>
       <url hash="bfb8a48e">D12-1003</url>
       <bibkey>tamura-etal-2012-bilingual</bibkey>
@@ -64,9 +64,9 @@
     </paper>
     <paper id="6">
       <title>Detecting Subgroups in Online Discussions by Modeling Positive and Negative Relations among Participants</title>
-      <author><first>Ahmed</first><last>Hassan</last></author>
+      <author id="ahmed-hassan"><first>Ahmed</first><last>Hassan</last></author>
       <author><first>Amjad</first><last>Abu-Jbara</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <pages>59–70</pages>
       <url hash="3d54061d">D12-1006</url>
       <bibkey>hassan-etal-2012-detecting</bibkey>
@@ -75,7 +75,7 @@
       <title>Generative Goal-Driven User Simulation for Dialog Management</title>
       <author><first>Aciel</first><last>Eshky</last></author>
       <author><first>Ben</first><last>Allison</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>71–81</pages>
       <url hash="c4e0ff89">D12-1007</url>
       <bibkey>eshky-etal-2012-generative</bibkey>
@@ -83,7 +83,7 @@
     <paper id="8">
       <title>Optimising Incremental Dialogue Decisions Using Information Density for Interactive Systems</title>
       <author><first>Nina</first><last>Dethlefs</last></author>
-      <author><first>Helen</first><last>Hastie</last></author>
+      <author id="helen-hastie"><first>Helen</first><last>Hastie</last></author>
       <author><first>Verena</first><last>Rieser</last></author>
       <author><first>Oliver</first><last>Lemon</last></author>
       <pages>82–93</pages>
@@ -92,7 +92,7 @@
     </paper>
     <paper id="9">
       <title>Mixed Membership <fixed-case>M</fixed-case>arkov Models for Unsupervised Conversation Modeling</title>
-      <author><first>Michael J.</first><last>Paul</last></author>
+      <author id="michael-paul"><first>Michael J.</first><last>Paul</last></author>
       <pages>94–104</pages>
       <url hash="bca0410a">D12-1009</url>
       <bibkey>paul-2012-mixed</bibkey>
@@ -107,7 +107,7 @@
     </paper>
     <paper id="11">
       <title>Linking Named Entities to Any Database</title>
-      <author><first>Avirup</first><last>Sil</last></author>
+      <author id="avirup-sil"><first>Avirup</first><last>Sil</last></author>
       <author><first>Ernest</first><last>Cronin</last></author>
       <author><first>Penghai</first><last>Nie</last></author>
       <author><first>Yinfei</first><last>Yang</last></author>
@@ -125,7 +125,7 @@
       <author><first>Laura</first><last>Chiticariu</last></author>
       <author><first>Rajasekar</first><last>Krishnamurthy</last></author>
       <author><first>Ankush</first><last>Dharkar</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>128–138</pages>
       <url hash="973ac07a">D12-1012</url>
       <bibkey>nagesh-etal-2012-towards</bibkey>
@@ -134,8 +134,8 @@
       <title>Active Learning for Imbalanced Sentiment Classification</title>
       <author><first>Shoushan</first><last>Li</last></author>
       <author><first>Shengfeng</first><last>Ju</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
-      <author><first>Xiaojun</first><last>Li</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
+      <author id="xiaojun-lin"><first>Xiaojun</first><last>Li</last></author>
       <pages>139–148</pages>
       <url hash="03a479ea">D12-1013</url>
       <bibkey>li-etal-2012-active-learning</bibkey>
@@ -169,7 +169,7 @@
     </paper>
     <paper id="17">
       <title>Local and Global Context for Supervised and Unsupervised Metonymy Resolution</title>
-      <author><first>Vivi</first><last>Nastase</last></author>
+      <author id="vivi-nastase"><first>Vivi</first><last>Nastase</last></author>
       <author><first>Alex</first><last>Judea</last></author>
       <author><first>Katja</first><last>Markert</last></author>
       <author><first>Michael</first><last>Strube</last></author>
@@ -190,11 +190,11 @@
     </paper>
     <paper id="19">
       <title>Spectral Dependency Parsing with Latent Variables</title>
-      <author><first>Paramveer</first><last>Dhillon</last></author>
+      <author id="paramveer-s-dhillon"><first>Paramveer</first><last>Dhillon</last></author>
       <author><first>Jordan</first><last>Rodu</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
-      <author><first>Dean</first><last>Foster</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
+      <author id="dean-foster"><first>Dean</first><last>Foster</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <pages>205–213</pages>
       <url hash="037550c3">D12-1019</url>
       <bibkey>dhillon-etal-2012-spectral</bibkey>
@@ -211,8 +211,8 @@
     <paper id="21">
       <title>A <fixed-case>B</fixed-case>ayesian Model for Learning <fixed-case>SCFG</fixed-case>s with Discontiguous Rules</title>
       <author><first>Abby</first><last>Levenberg</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
       <pages>223–232</pages>
       <url hash="c39bbe44">D12-1021</url>
       <bibkey>levenberg-etal-2012-bayesian</bibkey>
@@ -227,8 +227,8 @@
     </paper>
     <paper id="23">
       <title>Minimal Dependency Length in Realization Ranking</title>
-      <author><first>Michael</first><last>White</last></author>
-      <author><first>Rajakrishnan</first><last>Rajkumar</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
+      <author id="rajakrishnan-rajkumar"><first>Rajakrishnan</first><last>Rajkumar</last></author>
       <pages>244–255</pages>
       <url hash="41053fa4">D12-1023</url>
       <attachment type="attachment" hash="b1e66c7a">D12-1023.Attachment.zip</attachment>
@@ -252,10 +252,10 @@
     </paper>
     <paper id="26">
       <title>N-gram-based Tense Models for Statistical Machine Translation</title>
-      <author><first>Zhengxian</first><last>Gong</last></author>
+      <author id="zhengxian-gong"><first>Zhengxian</first><last>Gong</last></author>
       <author><first>Min</first><last>Zhang</last></author>
-      <author><first>Chew Lim</first><last>Tan</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="chew-lim-tan"><first>Chew Lim</first><last>Tan</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>276–285</pages>
       <url hash="40612411">D12-1026</url>
       <bibkey>gong-etal-2012-n</bibkey>
@@ -263,7 +263,7 @@
     <paper id="27">
       <title>Source Language Adaptation for Resource-Poor Machine Translation</title>
       <author><first>Pidong</first><last>Wang</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Hwee Tou</first><last>Ng</last></author>
       <pages>286–296</pages>
       <url hash="88e1f713">D12-1027</url>
@@ -272,7 +272,7 @@
     <paper id="28">
       <title>Exploiting Reducibility in Unsupervised Dependency Parsing</title>
       <author><first>David</first><last>Mareček</last></author>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
       <pages>297–307</pages>
       <url hash="c50fc381">D12-1028</url>
       <bibkey>marecek-zabokrtsky-2012-exploiting</bibkey>
@@ -306,7 +306,7 @@
     <paper id="32">
       <title>Name Phylogeny: A Generative Model of String Variation</title>
       <author><first>Nicholas</first><last>Andrews</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <author><first>Mark</first><last>Dredze</last></author>
       <pages>344–355</pages>
       <url hash="87959446">D12-1032</url>
@@ -317,8 +317,8 @@
     <paper id="33">
       <title>Syntactic Surprisal Affects Spoken Word Duration in Conversational Contexts</title>
       <author><first>Vera</first><last>Demberg</last></author>
-      <author><first>Asad</first><last>Sayeed</last></author>
-      <author><first>Philip</first><last>Gorinski</last></author>
+      <author id="asad-sayeed"><first>Asad</first><last>Sayeed</last></author>
+      <author id="philip-gorinski"><first>Philip</first><last>Gorinski</last></author>
       <author><first>Nikolaos</first><last>Engonopoulos</last></author>
       <pages>356–367</pages>
       <url hash="f6d0410d">D12-1033</url>
@@ -327,12 +327,12 @@
     </paper>
     <paper id="34">
       <title>Why Question Answering using Sentiment Analysis and Word Classes</title>
-      <author><first>Jong-Hoon</first><last>Oh</last></author>
+      <author id="jong-hoon-oh"><first>Jong-Hoon</first><last>Oh</last></author>
       <author><first>Kentaro</first><last>Torisawa</last></author>
       <author><first>Chikara</first><last>Hashimoto</last></author>
       <author><first>Takuya</first><last>Kawada</last></author>
       <author><first>Stijn</first><last>De Saeger</last></author>
-      <author><first>Jun’ichi</first><last>Kazama</last></author>
+      <author id="junichi-kazama"><first>Jun’ichi</first><last>Kazama</last></author>
       <author><first>Yiou</first><last>Wang</last></author>
       <pages>368–378</pages>
       <url hash="a051d48d">D12-1034</url>
@@ -355,7 +355,7 @@
       <title>Answering Opinion Questions on Products by Exploiting Hierarchical Organization of Consumer Reviews</title>
       <author><first>Jianxing</first><last>Yu</last></author>
       <author><first>Zheng-Jun</first><last>Zha</last></author>
-      <author><first>Tat-Seng</first><last>Chua</last></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last></author>
       <pages>391–401</pages>
       <url hash="57a1337a">D12-1036</url>
       <attachment type="attachment" hash="bf80ff4b">D12-1036.Attachment.zip</attachment>
@@ -366,7 +366,7 @@
       <author><first>Lemao</first><last>Liu</last></author>
       <author><first>Hailong</first><last>Cao</last></author>
       <author><first>Taro</first><last>Watanabe</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <author><first>Mo</first><last>Yu</last></author>
       <author><first>Conghui</first><last>Zhu</last></author>
       <pages>402–411</pages>
@@ -378,7 +378,7 @@
       <author><first>Wenbin</first><last>Jiang</last></author>
       <author><first>Fandong</first><last>Meng</last></author>
       <author><first>Qun</first><last>Liu</last></author>
-      <author><first>Yajuan</first><last>Lü</last></author>
+      <author id="yajuan-lu"><first>Yajuan</first><last>Lü</last></author>
       <pages>412–420</pages>
       <url hash="9c19d35d">D12-1038</url>
       <bibkey>jiang-etal-2012-iterative</bibkey>
@@ -387,7 +387,7 @@
       <title>Automatically Constructing a Normalisation Dictionary for Microblogs</title>
       <author><first>Bo</first><last>Han</last></author>
       <author><first>Paul</first><last>Cook</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>421–432</pages>
       <url hash="01977689">D12-1039</url>
       <bibkey>han-etal-2012-automatically</bibkey>
@@ -395,7 +395,7 @@
     <paper id="40">
       <title>Unsupervised <fixed-case>PCFG</fixed-case> Induction for Grounded Language Learning with Highly Ambiguous Supervision</title>
       <author><first>Joohyun</first><last>Kim</last></author>
-      <author><first>Raymond</first><last>Mooney</last></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last></author>
       <pages>433–444</pages>
       <url hash="32bc5343">D12-1040</url>
       <bibkey>kim-mooney-2012-unsupervised</bibkey>
@@ -414,7 +414,7 @@
       <author><first>Mihai</first><last>Surdeanu</last></author>
       <author><first>Julie</first><last>Tibshirani</last></author>
       <author><first>Ramesh</first><last>Nallapati</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>455–465</pages>
       <url hash="55b4ca85">D12-1042</url>
       <bibkey>surdeanu-etal-2012-multi</bibkey>
@@ -422,7 +422,7 @@
     <paper id="43">
       <title>An “<fixed-case>AI</fixed-case> readability” Formula for <fixed-case>F</fixed-case>rench as a Foreign Language</title>
       <author><first>Thomas</first><last>François</last></author>
-      <author><first>Cédrick</first><last>Fairon</last></author>
+      <author id="cedrick-fairon"><first>Cédrick</first><last>Fairon</last></author>
       <pages>466–477</pages>
       <url hash="80de54d7">D12-1043</url>
       <bibkey>francois-fairon-2012-ai</bibkey>
@@ -431,7 +431,7 @@
       <title>Dynamic Programming for Higher Order Parsing of Gap-Minding Trees</title>
       <author><first>Emily</first><last>Pitler</last></author>
       <author><first>Sampath</first><last>Kannan</last></author>
-      <author><first>Mitchell</first><last>Marcus</last></author>
+      <author id="mitch-marcus"><first>Mitchell</first><last>Marcus</last></author>
       <pages>478–488</pages>
       <url hash="1b5222a3">D12-1044</url>
       <bibkey>pitler-etal-2012-dynamic</bibkey>
@@ -440,9 +440,9 @@
       <title>Joint Entity and Event Coreference Resolution across Documents</title>
       <author><first>Heeyoung</first><last>Lee</last></author>
       <author><first>Marta</first><last>Recasens</last></author>
-      <author><first>Angel</first><last>Chang</last></author>
+      <author id="angel-chang"><first>Angel</first><last>Chang</last></author>
       <author><first>Mihai</first><last>Surdeanu</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <pages>489–500</pages>
       <url hash="b4a85db5">D12-1045</url>
       <attachment type="attachment" hash="5bb342c3">D12-1045.Attachment.zip</attachment>
@@ -471,7 +471,7 @@
       <award>ACL 2022 10-Year Test of Time</award>
       <author><first/><last>Mausam</last></author>
       <author><first>Michael</first><last>Schmitz</last></author>
-      <author><first>Stephen</first><last>Soderland</last></author>
+      <author id="stephen-soderland"><first>Stephen</first><last>Soderland</last></author>
       <author><first>Robert</first><last>Bart</last></author>
       <author><first>Oren</first><last>Etzioni</last></author>
       <pages>523–534</pages>
@@ -518,7 +518,7 @@
       <author><first>Vincent</first><last>Van Asch</last></author>
       <author><first>Roser</first><last>Morante</last></author>
       <author><first>Paolo</first><last>Frasconi</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <author><first>Luc</first><last>De Raedt</last></author>
       <pages>579–589</pages>
       <url hash="6b8dab97">D12-1053</url>
@@ -527,7 +527,7 @@
     </paper>
     <paper id="54">
       <title>Lyrics, Music, and Emotions</title>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <author><first>Carlo</first><last>Strapparava</last></author>
       <pages>590–599</pages>
       <url hash="45cd8bd9">D12-1054</url>
@@ -535,7 +535,7 @@
     </paper>
     <paper id="55">
       <title>Assessment of <fixed-case>ESL</fixed-case> Learners’ Syntactic Competence Based on Similarity Measures</title>
-      <author><first>Su-Youn</first><last>Yoon</last></author>
+      <author id="su-youn-yoon"><first>Su-Youn</first><last>Yoon</last></author>
       <author><first>Suma</first><last>Bhat</last></author>
       <pages>600–608</pages>
       <url hash="49e43e07">D12-1055</url>
@@ -554,8 +554,8 @@
       <author><first>Chikara</first><last>Hashimoto</last></author>
       <author><first>Kentaro</first><last>Torisawa</last></author>
       <author><first>Stijn</first><last>De Saeger</last></author>
-      <author><first>Jong-Hoon</first><last>Oh</last></author>
-      <author><first>Jun’ichi</first><last>Kazama</last></author>
+      <author id="jong-hoon-oh"><first>Jong-Hoon</first><last>Oh</last></author>
+      <author id="junichi-kazama"><first>Jun’ichi</first><last>Kazama</last></author>
       <pages>619–630</pages>
       <url hash="29259666">D12-1057</url>
       <attachment type="attachment" hash="3d585298">D12-1057.Attachment.pdf</attachment>
@@ -573,7 +573,7 @@
     <paper id="59">
       <title>Concurrent Acquisition of Word Meaning and Lexical Categories</title>
       <author><first>Afra</first><last>Alishahi</last></author>
-      <author><first>Grzegorz</first><last>Chrupala</last></author>
+      <author id="grzegorz-chrupala"><first>Grzegorz</first><last>Chrupala</last></author>
       <pages>643–654</pages>
       <url hash="0efced7a">D12-1059</url>
       <bibkey>alishahi-chrupala-2012-concurrent</bibkey>
@@ -607,9 +607,9 @@
     </paper>
     <paper id="63">
       <title>Three Dependency-and-Boundary Models for Grammar Induction</title>
-      <author><first>Valentin I.</first><last>Spitkovsky</last></author>
-      <author><first>Hiyan</first><last>Alshawi</last></author>
-      <author><first>Daniel</first><last>Jurafsky</last></author>
+      <author id="valentin-i-spitkovsky"><first>Valentin I.</first><last>Spitkovsky</last></author>
+      <author id="hiyan-alshawi"><first>Hiyan</first><last>Alshawi</last></author>
+      <author id="dan-jurafsky"><first>Daniel</first><last>Jurafsky</last></author>
       <pages>688–698</pages>
       <url hash="4b93e2d3">D12-1063</url>
       <bibkey>spitkovsky-etal-2012-three</bibkey>
@@ -645,7 +645,7 @@
     <paper id="67">
       <title>Parse, Price and <fixed-case>C</fixed-case>ut—<fixed-case>D</fixed-case>elayed Column and Row Generation for Graph Based Parsers</title>
       <author><first>Sebastian</first><last>Riedel</last></author>
-      <author><first>David</first><last>Smith</last></author>
+      <author id="david-a-smith"><first>David</first><last>Smith</last></author>
       <author><first>Andrew</first><last>McCallum</last></author>
       <pages>732–743</pages>
       <url hash="0d4e9a1b">D12-1067</url>
@@ -657,7 +657,7 @@
       <author><first>Qi</first><last>Mao</last></author>
       <author><first>Qiao Liang</first><last>Xiang</last></author>
       <author><first>Ivor Wai-Hung</first><last>Tsang</last></author>
-      <author><first>Kian Ming Adam</first><last>Chai</last></author>
+      <author id="kian-ming-a-chai"><first>Kian Ming Adam</first><last>Chai</last></author>
       <author><first>Hai Leong</first><last>Chieu</last></author>
       <pages>744–753</pages>
       <url hash="033d2e4c">D12-1068</url>
@@ -666,7 +666,7 @@
     <paper id="69">
       <title>Weakly Supervised Training of Semantic Parsers</title>
       <author><first>Jayant</first><last>Krishnamurthy</last></author>
-      <author><first>Tom</first><last>Mitchell</last></author>
+      <author id="tom-mitchell"><first>Tom</first><last>Mitchell</last></author>
       <pages>754–765</pages>
       <url hash="2929f9f3">D12-1069</url>
       <bibkey>krishnamurthy-mitchell-2012-weakly</bibkey>
@@ -689,9 +689,9 @@
     </paper>
     <paper id="72">
       <title>A Sequence Labelling Approach to Quote Attribution</title>
-      <author><first>Timothy</first><last>O’Keefe</last></author>
+      <author id="tim-okeefe"><first>Timothy</first><last>O’Keefe</last></author>
       <author><first>Silvia</first><last>Pareti</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <author><first>Irena</first><last>Koprinska</last></author>
       <author><first>Matthew</first><last>Honnibal</last></author>
       <pages>790–799</pages>
@@ -701,8 +701,8 @@
     <paper id="73">
       <title><fixed-case>SSHLDA</fixed-case>: A Semi-Supervised Hierarchical Topic Model</title>
       <author><first>Xian-Ling</first><last>Mao</last></author>
-      <author><first>Zhao-Yan</first><last>Ming</last></author>
-      <author><first>Tat-Seng</first><last>Chua</last></author>
+      <author id="zhaoyan-ming"><first>Zhao-Yan</first><last>Ming</last></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last></author>
       <author><first>Si</first><last>Li</last></author>
       <author><first>Hongfei</first><last>Yan</last></author>
       <author><first>Xiaoming</first><last>Li</last></author>
@@ -714,7 +714,7 @@
       <title>Improving <fixed-case>NLP</fixed-case> through Marginalization of Hidden Syntactic Structure</title>
       <author><first>Jason</first><last>Naradowsky</last></author>
       <author><first>Sebastian</first><last>Riedel</last></author>
-      <author><first>David</first><last>Smith</last></author>
+      <author id="david-a-smith"><first>David</first><last>Smith</last></author>
       <pages>810–820</pages>
       <url hash="c8956e64">D12-1074</url>
       <bibkey>naradowsky-etal-2012-improving</bibkey>
@@ -768,7 +768,7 @@
     <paper id="80">
       <title>Learning Constraints for Consistent Timeline Extraction</title>
       <author><first>David</first><last>McClosky</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>873–882</pages>
       <url hash="819250af">D12-1080</url>
       <bibkey>mcclosky-manning-2012-learning</bibkey>
@@ -794,9 +794,9 @@
     </paper>
     <paper id="83">
       <title>A Novel Discriminative Framework for Sentence-Level Discourse Analysis</title>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <author><first>Giuseppe</first><last>Carenini</last></author>
-      <author><first>Raymond</first><last>Ng</last></author>
+      <author id="raymond-ng"><first>Raymond</first><last>Ng</last></author>
       <pages>904–915</pages>
       <url hash="c2720af8">D12-1083</url>
       <bibkey>joty-etal-2012-novel</bibkey>
@@ -816,7 +816,7 @@
       <author><first>Anders</first><last>Björkelund</last></author>
       <author><first>Jonas</first><last>Kuhn</last></author>
       <author><first>Wolfgang</first><last>Seeker</last></author>
-      <author><first>Sina</first><last>Zarriess</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarriess</last></author>
       <pages>928–939</pages>
       <url hash="5239b9d8">D12-1085</url>
       <bibkey>bohnet-etal-2012-generating</bibkey>
@@ -843,9 +843,9 @@
     <paper id="88">
       <title>Entropy-based Pruning for Phrase-based Machine Translation</title>
       <author><first>Wang</first><last>Ling</last></author>
-      <author><first>João</first><last>Graça</last></author>
+      <author id="joao-graca"><first>João</first><last>Graça</last></author>
       <author><first>Isabel</first><last>Trancoso</last></author>
-      <author><first>Alan</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan</first><last>Black</last></author>
       <pages>962–971</pages>
       <url hash="b74a0aef">D12-1088</url>
       <bibkey>ling-etal-2012-entropy</bibkey>
@@ -862,7 +862,7 @@
     <paper id="90">
       <title>Probabilistic Finite State Machines for Regression-based <fixed-case>MT</fixed-case> Evaluation</title>
       <author><first>Mengqiu</first><last>Wang</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>984–994</pages>
       <url hash="91058226">D12-1090</url>
       <bibkey>wang-manning-2012-probabilistic</bibkey>
@@ -879,8 +879,8 @@
     <paper id="92">
       <title>Employing Compositional Semantics and Discourse Consistency in <fixed-case>C</fixed-case>hinese Event Extraction</title>
       <author><first>Peifeng</first><last>Li</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
-      <author><first>Qiaoming</first><last>Zhu</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last></author>
       <author><first>Libin</first><last>Hou</last></author>
       <pages>1006–1016</pages>
       <url hash="c82ea5b6">D12-1092</url>
@@ -889,9 +889,9 @@
     <paper id="93">
       <title>Reading The Web with Learned Syntactic-Semantic Inference Rules</title>
       <author><first>Ni</first><last>Lao</last></author>
-      <author><first>Amarnag</first><last>Subramanya</last></author>
+      <author id="amarnag-subramanya"><first>Amarnag</first><last>Subramanya</last></author>
       <author><first>Fernando</first><last>Pereira</last></author>
-      <author><first>William W.</first><last>Cohen</last></author>
+      <author id="william-cohen"><first>William W.</first><last>Cohen</last></author>
       <pages>1017–1026</pages>
       <url hash="9105128e">D12-1093</url>
       <bibkey>lao-etal-2012-reading</bibkey>
@@ -900,15 +900,15 @@
       <title>Ensemble Semantics for Large-scale Unsupervised Relation Extraction</title>
       <author><first>Bonan</first><last>Min</last></author>
       <author><first>Shuming</first><last>Shi</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <pages>1027–1037</pages>
       <url hash="0ce4d2ba">D12-1094</url>
       <bibkey>min-etal-2012-ensemble</bibkey>
     </paper>
     <paper id="95">
       <title>Forest Reranking through Subtree Ranking</title>
-      <author><first>Richárd</first><last>Farkas</last></author>
+      <author id="richard-farkas"><first>Richárd</first><last>Farkas</last></author>
       <author><first>Helmut</first><last>Schmid</last></author>
       <pages>1038–1047</pages>
       <url hash="1a1caa16">D12-1095</url>
@@ -918,7 +918,7 @@
       <title>Parser Showdown at the <fixed-case>W</fixed-case>all <fixed-case>S</fixed-case>treet Corral: An Empirical Investigation of Error Types in Parser Output</title>
       <author><first>Jonathan K.</first><last>Kummerfeld</last></author>
       <author><first>David</first><last>Hall</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <author><first>Dan</first><last>Klein</last></author>
       <pages>1048–1059</pages>
       <url hash="d34cfbcb">D12-1096</url>
@@ -927,8 +927,8 @@
     </paper>
     <paper id="97">
       <title>Extending Machine Translation Evaluation Metrics with Lexical Cohesion to Document Level</title>
-      <author><first>Billy T. M.</first><last>Wong</last></author>
-      <author><first>Chunyu</first><last>Kit</last></author>
+      <author id="billy-t-m-wong"><first>Billy T. M.</first><last>Wong</last></author>
+      <author id="chunyu-kit"><first>Chunyu</first><last>Kit</last></author>
       <pages>1060–1068</pages>
       <url hash="f1d09dfc">D12-1097</url>
       <bibkey>wong-kit-2012-extending</bibkey>
@@ -936,7 +936,7 @@
     <paper id="98">
       <title>Fast Large-Scale Approximate Graph Construction for <fixed-case>NLP</fixed-case></title>
       <author><first>Amit</first><last>Goyal</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <author><first>Raul</first><last>Guerra</last></author>
       <pages>1069–1080</pages>
       <url hash="f501293e">D12-1098</url>
@@ -955,7 +955,7 @@
     <paper id="100">
       <title>Sketch Algorithms for Estimating Point Queries in <fixed-case>NLP</fixed-case></title>
       <author><first>Amit</first><last>Goyal</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <author><first>Graham</first><last>Cormode</last></author>
       <pages>1093–1103</pages>
       <url hash="9d9a3244">D12-1100</url>
@@ -990,9 +990,9 @@
     </paper>
     <paper id="104">
       <title><fixed-case>PATTY</fixed-case>: A Taxonomy of Relational Patterns with Semantic Types</title>
-      <author><first>Ndapandula</first><last>Nakashole</last></author>
+      <author id="ndapandula-nakashole"><first>Ndapandula</first><last>Nakashole</last></author>
       <author><first>Gerhard</first><last>Weikum</last></author>
-      <author><first>Fabian</first><last>Suchanek</last></author>
+      <author id="fabian-suchanek"><first>Fabian</first><last>Suchanek</last></author>
       <pages>1135–1145</pages>
       <url hash="ffadcf15">D12-1104</url>
       <bibkey>nakashole-etal-2012-patty</bibkey>
@@ -1018,7 +1018,7 @@
       <title>Language Model Rest Costs and Space-Efficient Storage</title>
       <author><first>Kenneth</first><last>Heafield</last></author>
       <author><first>Philipp</first><last>Koehn</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <pages>1169–1178</pages>
       <url hash="d5508130">D12-1107</url>
       <bibkey>heafield-etal-2012-language</bibkey>
@@ -1027,7 +1027,7 @@
       <title>Document-Wide Decoding for Phrase-Based Statistical Machine Translation</title>
       <author><first>Christian</first><last>Hardmeier</last></author>
       <author><first>Joakim</first><last>Nivre</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>1179–1190</pages>
       <url hash="e84c0f55">D12-1108</url>
       <bibkey>hardmeier-etal-2012-document</bibkey>
@@ -1037,7 +1037,7 @@
       <author><first>Yang</first><last>Feng</last></author>
       <author id="yang-liu-ict"><first>Yang</first><last>Liu</last></author>
       <author><first>Qun</first><last>Liu</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>1191–1200</pages>
       <url hash="b94d448b">D12-1109</url>
       <bibkey>feng-etal-2012-left</bibkey>
@@ -1046,17 +1046,17 @@
       <title>Semantic Compositionality through Recursive Matrix-Vector Spaces</title>
       <author><first>Richard</first><last>Socher</last></author>
       <author><first>Brody</first><last>Huval</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
-      <author><first>Andrew Y.</first><last>Ng</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
+      <author id="andrew-y-ng"><first>Andrew Y.</first><last>Ng</last></author>
       <pages>1201–1211</pages>
       <url hash="934c8dda">D12-1110</url>
       <bibkey>socher-etal-2012-semantic</bibkey>
     </paper>
     <paper id="111">
       <title>Polarity Inducing Latent Semantic Analysis</title>
-      <author><first>Wen-tau</first><last>Yih</last></author>
-      <author><first>Geoffrey</first><last>Zweig</last></author>
-      <author><first>John</first><last>Platt</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
+      <author id="geoffrey-zweig"><first>Geoffrey</first><last>Zweig</last></author>
+      <author id="john-c-platt"><first>John</first><last>Platt</last></author>
       <pages>1212–1222</pages>
       <url hash="be8bb328">D12-1111</url>
       <attachment type="presentation" hash="579263c6">D12-1111.Presentation.pptx</attachment>
@@ -1064,8 +1064,8 @@
     </paper>
     <paper id="112">
       <title>First Order vs. Higher Order Modification in Distributional Semantics</title>
-      <author><first>Gemma</first><last>Boleda</last></author>
-      <author><first>Eva Maria</first><last>Vecchi</last></author>
+      <author id="gemma-boleda"><first>Gemma</first><last>Boleda</last></author>
+      <author id="eva-maria-vecchi"><first>Eva Maria</first><last>Vecchi</last></author>
       <author><first>Miquel</first><last>Cornudella</last></author>
       <author><first>Louise</first><last>McNally</last></author>
       <pages>1223–1233</pages>
@@ -1074,7 +1074,7 @@
     </paper>
     <paper id="113">
       <title>Learning-based Multi-Sieve Co-reference Resolution with Knowledge</title>
-      <author><first>Lev</first><last>Ratinov</last></author>
+      <author id="lev-ratinov"><first>Lev</first><last>Ratinov</last></author>
       <author><first>Dan</first><last>Roth</last></author>
       <pages>1234–1244</pages>
       <url hash="74f027f8">D12-1113</url>
@@ -1084,7 +1084,7 @@
       <title>Joint Learning for Coreference Resolution with <fixed-case>M</fixed-case>arkov <fixed-case>L</fixed-case>ogic</title>
       <author><first>Yang</first><last>Song</last></author>
       <author><first>Jing</first><last>Jiang</last></author>
-      <author><first>Wayne Xin</first><last>Zhao</last></author>
+      <author id="wayne-xin-zhao"><first>Wayne Xin</first><last>Zhao</last></author>
       <author><first>Sujian</first><last>Li</last></author>
       <author><first>Houfeng</first><last>Wang</last></author>
       <pages>1245–1254</pages>
@@ -1118,7 +1118,7 @@
       <author><first>Jordan</first><last>Boyd-Graber</last></author>
       <author><first>Brianna</first><last>Satinoff</last></author>
       <author><first>He</first><last>He</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <pages>1290–1301</pages>
       <url hash="4094b9fb">D12-1118</url>
       <attachment type="attachment" hash="7432e33e">D12-1118.Attachment.pdf</attachment>
@@ -1128,8 +1128,8 @@
       <title>Multi-Domain Learning: When Do Domains Matter?</title>
       <author><first>Mahesh</first><last>Joshi</last></author>
       <author><first>Mark</first><last>Dredze</last></author>
-      <author><first>William W.</first><last>Cohen</last></author>
-      <author><first>Carolyn</first><last>Rosé</last></author>
+      <author id="william-cohen"><first>William W.</first><last>Cohen</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rosé</last></author>
       <pages>1302–1312</pages>
       <url hash="3b06ff97">D12-1119</url>
       <bibkey>joshi-etal-2012-multi</bibkey>
@@ -1153,7 +1153,7 @@
     <paper id="122">
       <title>Extracting Opinion Expressions with semi-<fixed-case>M</fixed-case>arkov Conditional Random Fields</title>
       <author><first>Bishan</first><last>Yang</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>1335–1345</pages>
       <url hash="f9bfe3e5">D12-1122</url>
       <bibkey>yang-cardie-2012-extracting</bibkey>
@@ -1171,9 +1171,9 @@
       <title>Word Salad: Relating Food Prices and Descriptions</title>
       <author><first>Victor</first><last>Chahuneau</last></author>
       <author><first>Kevin</first><last>Gimpel</last></author>
-      <author><first>Bryan R.</first><last>Routledge</last></author>
+      <author id="bryan-r-routledge"><first>Bryan R.</first><last>Routledge</last></author>
       <author><first>Lily</first><last>Scherlis</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>1357–1367</pages>
       <url hash="b5e5fd79">D12-1124</url>
       <attachment type="attachment" hash="a7891144">D12-1124.Attachment.pdf</attachment>
@@ -1196,7 +1196,7 @@
       <author><first>Xipeng</first><last>Qiu</last></author>
       <author><first>Shu</first><last>Zhang</last></author>
       <author><first>Feng</first><last>Ji</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>1379–1388</pages>
       <url hash="886bbeee">D12-1126</url>
       <bibkey>zhao-etal-2012-part</bibkey>
@@ -1204,7 +1204,7 @@
     <paper id="127">
       <title><fixed-case>W</fixed-case>iki-ly Supervised Part-of-Speech Tagging</title>
       <author><first>Shen</first><last>Li</last></author>
-      <author><first>João</first><last>Graça</last></author>
+      <author id="joao-graca"><first>João</first><last>Graça</last></author>
       <author><first>Ben</first><last>Taskar</last></author>
       <pages>1389–1398</pages>
       <url hash="16bed77d">D12-1127</url>
@@ -1213,7 +1213,7 @@
     <paper id="128">
       <title>Joining Forces Pays Off: Multilingual Joint Word Sense Disambiguation</title>
       <author><first>Roberto</first><last>Navigli</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <pages>1399–1410</pages>
       <url hash="81ae3652">D12-1128</url>
       <bibkey>navigli-ponzetto-2012-joining</bibkey>
@@ -1236,9 +1236,9 @@
     </paper>
     <paper id="131">
       <title>Improved Parsing and <fixed-case>POS</fixed-case> Tagging Using Inter-Sentence Consistency Constraints</title>
-      <author><first>Alexander</first><last>Rush</last></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last></author>
       <author><first>Roi</first><last>Reichart</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <author><first>Amir</first><last>Globerson</last></author>
       <pages>1434–1444</pages>
       <url hash="777caf90">D12-1131</url>
@@ -1247,7 +1247,7 @@
     <paper id="132">
       <title>Unified Dependency Parsing of <fixed-case>C</fixed-case>hinese Morphological and Syntactic Structures</title>
       <author><first>Zhongguo</first><last>Li</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>1445–1454</pages>
       <url hash="3383f3f2">D12-1132</url>
       <bibkey>li-zhou-2012-unified</bibkey>
@@ -1262,7 +1262,7 @@
     </paper>
     <paper id="134">
       <title>Identifying Event-related Bursts via Social Media Activities</title>
-      <author><first>Xin</first><last>Zhao</last></author>
+      <author id="wayne-xin-zhao"><first>Xin</first><last>Zhao</last></author>
       <author><first>Baihan</first><last>Shu</last></author>
       <author><first>Jing</first><last>Jiang</last></author>
       <author><first>Yang</first><last>Song</last></author>
@@ -1292,7 +1292,7 @@
       <author><first>Stephen</first><last>Roller</last></author>
       <author><first>Michael</first><last>Speriosu</last></author>
       <author><first>Sarat</first><last>Rallapalli</last></author>
-      <author><first>Benjamin</first><last>Wing</last></author>
+      <author id="benjamin-wing"><first>Benjamin</first><last>Wing</last></author>
       <author><first>Jason</first><last>Baldridge</last></author>
       <pages>1500–1510</pages>
       <url hash="17e10437">D12-1137</url>
@@ -1302,7 +1302,7 @@
       <title>A Discriminative Model for Query Spelling Correction with Latent Structural <fixed-case>SVM</fixed-case></title>
       <author><first>Huizhong</first><last>Duan</last></author>
       <author><first>Yanen</first><last>Li</last></author>
-      <author><first>ChengXiang</first><last>Zhai</last></author>
+      <author id="chengxiang-zhai"><first>ChengXiang</first><last>Zhai</last></author>
       <author><first>Dan</first><last>Roth</last></author>
       <pages>1511–1521</pages>
       <url hash="211bc6a8">D12-1138</url>
diff --git a/data/xml/D13.xml b/data/xml/D13.xml
index ec25152c12..25e4a12011 100644
--- a/data/xml/D13.xml
+++ b/data/xml/D13.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the 2013 Conference on Empirical Methods in Natural Language Processing</booktitle>
       <url hash="9d85386b">D13-1</url>
       <editor><first>David</first><last>Yarowsky</last></editor>
-      <editor><first>Timothy</first><last>Baldwin</last></editor>
+      <editor id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></editor>
       <editor><first>Anna</first><last>Korhonen</last></editor>
       <editor><first>Karen</first><last>Livescu</last></editor>
       <editor><first>Steven</first><last>Bethard</last></editor>
@@ -22,7 +22,7 @@
     <paper id="1">
       <title>Event-Based Time Label Propagation for Automatic Dating of News Articles</title>
       <author><first>Tao</first><last>Ge</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <author><first>Sujian</first><last>Li</last></author>
       <author><first>Zhifang</first><last>Sui</last></author>
       <pages>1–11</pages>
@@ -31,13 +31,13 @@
     </paper>
     <paper id="2">
       <title>Exploiting Discourse Analysis for Article-Wide Temporal Classification</title>
-      <author><first>Jun-Ping</first><last>Ng</last></author>
+      <author id="jun-ping-ng"><first>Jun-Ping</first><last>Ng</last></author>
       <author><first>Min-Yen</first><last>Kan</last></author>
       <author><first>Ziheng</first><last>Lin</last></author>
       <author><first>Wei</first><last>Feng</last></author>
       <author><first>Bin</first><last>Chen</last></author>
       <author><first>Jian</first><last>Su</last></author>
-      <author><first>Chew-Lim</first><last>Tan</last></author>
+      <author id="chew-lim-tan"><first>Chew-Lim</first><last>Tan</last></author>
       <pages>12–23</pages>
       <url hash="dc3773f6">D13-1002</url>
       <bibkey>ng-etal-2013-exploiting</bibkey>
@@ -54,7 +54,7 @@
       <title>Exploring the Utility of Joint Morphological and Syntactic Learning from Child-directed Speech</title>
       <author><first>Stella</first><last>Frank</last></author>
       <author><first>Frank</first><last>Keller</last></author>
-      <author><first>Sharon</first><last>Goldwater</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
       <pages>30–41</pages>
       <url hash="51dcfa16">D13-1004</url>
       <bibkey>frank-etal-2013-exploring</bibkey>
@@ -62,8 +62,8 @@
     <paper id="5">
       <title>A Joint Learning Model of Word Segmentation, Lexical Acquisition, and Phonetic Variability</title>
       <author><first>Micha</first><last>Elsner</last></author>
-      <author><first>Sharon</first><last>Goldwater</last></author>
-      <author><first>Naomi</first><last>Feldman</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
+      <author id="naomi-feldman"><first>Naomi</first><last>Feldman</last></author>
       <author><first>Frank</first><last>Wood</last></author>
       <pages>42–54</pages>
       <url hash="4af19a9e">D13-1005</url>
@@ -73,9 +73,9 @@
     <paper id="6">
       <title><fixed-case>A</fixed-case>nimacy Detection with Voting Models</title>
       <author><first>Joshua</first><last>Moore</last></author>
-      <author><first>Christopher J.C.</first><last>Burges</last></author>
+      <author id="christopher-j-c-burges"><first>Christopher J.C.</first><last>Burges</last></author>
       <author><first>Erin</first><last>Renshaw</last></author>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <pages>55–60</pages>
       <url hash="c4b4592b">D13-1006</url>
       <bibkey>moore-etal-2013-animacy</bibkey>
@@ -91,8 +91,8 @@
     <paper id="8">
       <title>Paraphrasing 4 Microblog Normalization</title>
       <author><first>Wang</first><last>Ling</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <author><first>Isabel</first><last>Trancoso</last></author>
       <pages>73–84</pages>
       <url hash="e979add6">D13-1008</url>
@@ -102,8 +102,8 @@
       <title>Question Difficulty Estimation in Community Question Answering Services</title>
       <author><first>Jing</first><last>Liu</last></author>
       <author><first>Quan</first><last>Wang</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
-      <author><first>Hsiao-Wuen</first><last>Hon</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="hsiao-wuen-hon"><first>Hsiao-Wuen</first><last>Hon</last></author>
       <pages>85–90</pages>
       <url hash="3fb87a15">D13-1009</url>
       <bibkey>liu-etal-2013-question</bibkey>
@@ -112,8 +112,8 @@
       <title>Measuring Ideological Proportions in Political Speeches</title>
       <author><first>Yanchuan</first><last>Sim</last></author>
       <author><first>Brice D. L.</first><last>Acree</last></author>
-      <author><first>Justin H.</first><last>Gross</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="justin-h-gross"><first>Justin H.</first><last>Gross</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>91–101</pages>
       <url hash="fa07ebd5">D13-1010</url>
       <attachment type="attachment" hash="6de84322">D13-1010.Attachment.pdf</attachment>
@@ -140,7 +140,7 @@
     <paper id="13">
       <title>Joint Parsing and Disfluency Detection in Linear Time</title>
       <author><first>Mohammad Sadegh</first><last>Rasooli</last></author>
-      <author><first>Joel</first><last>Tetreault</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
       <pages>124–129</pages>
       <url hash="d15b4f2b">D13-1013</url>
       <bibkey>rasooli-tetreault-2013-joint</bibkey>
@@ -150,16 +150,16 @@
       <author><first>Masashi</first><last>Tsubaki</last></author>
       <author><first>Kevin</first><last>Duh</last></author>
       <author><first>Masashi</first><last>Shimbo</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>130–140</pages>
       <url hash="eaf6f021">D13-1014</url>
       <bibkey>tsubaki-etal-2013-modeling</bibkey>
     </paper>
     <paper id="15">
       <title>Studying the Recursive Behaviour of Adjectival Modification with Compositional Distributional Semantics</title>
-      <author><first>Eva Maria</first><last>Vecchi</last></author>
+      <author id="eva-maria-vecchi"><first>Eva Maria</first><last>Vecchi</last></author>
       <author><first>Roberto</first><last>Zamparelli</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <pages>141–151</pages>
       <url hash="99f6b741">D13-1015</url>
       <bibkey>vecchi-etal-2013-studying</bibkey>
@@ -175,7 +175,7 @@
     </paper>
     <paper id="17">
       <title>Appropriately Incorporating Statistical Significance in <fixed-case>PMI</fixed-case></title>
-      <author><first>Om P.</first><last>Damani</last></author>
+      <author id="om-p-damani"><first>Om P.</first><last>Damani</last></author>
       <author><first>Shweta</first><last>Ghonge</last></author>
       <pages>163–169</pages>
       <url hash="fd58e309">D13-1017</url>
@@ -191,9 +191,9 @@
     </paper>
     <paper id="19">
       <title>Joint Learning of Phonetic Units and Word Pronunciations for <fixed-case>ASR</fixed-case></title>
-      <author><first>Chia-ying</first><last>Lee</last></author>
+      <author id="chia-ying-lee"><first>Chia-ying</first><last>Lee</last></author>
       <author><first>Yu</first><last>Zhang</last></author>
-      <author><first>James</first><last>Glass</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
       <pages>182–192</pages>
       <url hash="6c6d5b0d">D13-1019</url>
       <bibkey>lee-etal-2013-joint</bibkey>
@@ -201,7 +201,7 @@
     <paper id="20">
       <title><fixed-case>MCT</fixed-case>est: A Challenge Dataset for the Open-Domain Machine Comprehension of Text</title>
       <author><first>Matthew</first><last>Richardson</last></author>
-      <author><first>Christopher J.C.</first><last>Burges</last></author>
+      <author id="christopher-j-c-burges"><first>Christopher J.C.</first><last>Burges</last></author>
       <author><first>Erin</first><last>Renshaw</last></author>
       <pages>193–203</pages>
       <url hash="141e98d9">D13-1020</url>
@@ -218,9 +218,9 @@
     </paper>
     <paper id="22">
       <title>Optimal Beam Search for Machine Translation</title>
-      <author><first>Alexander</first><last>Rush</last></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last></author>
       <author><first>Yin-Wen</first><last>Chang</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <pages>210–221</pages>
       <url hash="2fcefa88">D13-1022</url>
       <bibkey>rush-etal-2013-optimal</bibkey>
@@ -237,7 +237,7 @@
     </paper>
     <paper id="24">
       <title>Structured Penalties for Log-Linear Language Models</title>
-      <author><first>Anil Kumar</first><last>Nelakanti</last></author>
+      <author id="anil-kumar-nelakanti"><first>Anil Kumar</first><last>Nelakanti</last></author>
       <author><first>Cédric</first><last>Archambeau</last></author>
       <author><first>Julien</first><last>Mairal</last></author>
       <author><first>Francis</first><last>Bach</last></author>
@@ -249,10 +249,10 @@
     </paper>
     <paper id="25">
       <title>Interactive Machine Translation using Hierarchical Translation Models</title>
-      <author><first>Jesús</first><last>González-Rubio</last></author>
-      <author><first>Daniel</first><last>Ortiz-Martínez</last></author>
-      <author><first>José-Miguel</first><last>Benedí</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="jesus-gonzalez-rubio"><first>Jesús</first><last>González-Rubio</last></author>
+      <author id="daniel-ortiz-martinez"><first>Daniel</first><last>Ortiz-Martínez</last></author>
+      <author id="jose-miguel-benedi"><first>José-Miguel</first><last>Benedí</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <pages>244–254</pages>
       <url hash="c5c66289">D13-1025</url>
       <bibkey>gonzalez-rubio-etal-2013-interactive</bibkey>
@@ -260,7 +260,7 @@
     <paper id="26">
       <title>Max-Margin Synchronous Grammar Induction for Machine Translation</title>
       <author><first>Xinyan</first><last>Xiao</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <pages>255–264</pages>
       <url hash="b9057198">D13-1026</url>
       <bibkey>xiao-xiong-2013-max</bibkey>
@@ -286,8 +286,8 @@
       <title>Joint Coreference Resolution and Named-Entity Linking with Multi-Pass Sieves</title>
       <author><first>Hannaneh</first><last>Hajishirzi</last></author>
       <author><first>Leila</first><last>Zilles</last></author>
-      <author><first>Daniel S.</first><last>Weld</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="daniel-s-weld"><first>Daniel S.</first><last>Weld</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>289–299</pages>
       <url hash="ec52b04f">D13-1029</url>
       <bibkey>hajishirzi-etal-2013-joint</bibkey>
@@ -306,16 +306,16 @@
       <author><first>Longkai</first><last>Zhang</last></author>
       <author><first>Houfeng</first><last>Wang</last></author>
       <author><first>Xu</first><last>Sun</last></author>
-      <author><first>Mairgup</first><last>Mansur</last></author>
+      <author id="mairgup-mansur"><first>Mairgup</first><last>Mansur</last></author>
       <pages>311–321</pages>
       <url hash="c37e0e82">D13-1031</url>
       <bibkey>zhang-etal-2013-exploring</bibkey>
     </paper>
     <paper id="32">
       <title>Efficient Higher-Order <fixed-case>CRF</fixed-case>s for Morphological Tagging</title>
-      <author><first>Thomas</first><last>Mueller</last></author>
+      <author id="thomas-mueller"><first>Thomas</first><last>Mueller</last></author>
       <author><first>Helmut</first><last>Schmid</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>322–332</pages>
       <url hash="b4e9cb57">D13-1032</url>
       <attachment type="attachment" hash="b350787a">D13-1032.Attachment.tgz</attachment>
@@ -332,16 +332,16 @@
     <paper id="34">
       <title><fixed-case>A</fixed-case>daptor <fixed-case>G</fixed-case>rammars for Learning Non-Concatenative Morphology</title>
       <author><first>Jan A.</first><last>Botha</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
       <pages>345–356</pages>
       <url hash="cebf4f7b">D13-1034</url>
       <bibkey>botha-blunsom-2013-adaptor</bibkey>
     </paper>
     <paper id="35">
       <title>Grounding Strategic Conversation: Using Negotiation Dialogues to Predict Trades in a Win-Lose Game</title>
-      <author><first>Anaïs</first><last>Cadilhac</last></author>
-      <author><first>Nicholas</first><last>Asher</last></author>
-      <author><first>Farah</first><last>Benamara</last></author>
+      <author id="anais-cadilhac"><first>Anaïs</first><last>Cadilhac</last></author>
+      <author id="nicholas-asher"><first>Nicholas</first><last>Asher</last></author>
+      <author id="farah-benamara"><first>Farah</first><last>Benamara</last></author>
       <author><first>Alex</first><last>Lascarides</last></author>
       <pages>357–368</pages>
       <url hash="ce47d6db">D13-1035</url>
@@ -353,7 +353,7 @@
       <author><first>Elahe</first><last>Rahimtoroghi</last></author>
       <author><first>Larissa</first><last>Munishkina</last></author>
       <author><first>Reid</first><last>Swanson</last></author>
-      <author><first>Marilyn A.</first><last>Walker</last></author>
+      <author id="marilyn-walker"><first>Marilyn A.</first><last>Walker</last></author>
       <pages>369–379</pages>
       <url hash="40fa8bc5">D13-1036</url>
       <bibkey>hu-etal-2013-unsupervised</bibkey>
@@ -361,7 +361,7 @@
     <paper id="37">
       <title>Latent Anaphora Resolution for Cross-Lingual Pronoun Prediction</title>
       <author><first>Christian</first><last>Hardmeier</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <author><first>Joakim</first><last>Nivre</last></author>
       <pages>380–391</pages>
       <url hash="17259f06">D13-1037</url>
@@ -372,21 +372,21 @@
       <author><first>Rui</first><last>Fang</last></author>
       <author><first>Changsong</first><last>Liu</last></author>
       <author><first>Lanbo</first><last>She</last></author>
-      <author><first>Joyce Y.</first><last>Chai</last></author>
+      <author id="joyce-chai"><first>Joyce Y.</first><last>Chai</last></author>
       <pages>392–402</pages>
       <url hash="9832d99b">D13-1038</url>
       <bibkey>fang-etal-2013-towards</bibkey>
     </paper>
     <paper id="39">
       <title>Open-Domain Fine-Grained Class Extraction from Web Search Queries</title>
-      <author><first>Marius</first><last>Paşca</last></author>
+      <author id="marius-pasca"><first>Marius</first><last>Paşca</last></author>
       <pages>403–414</pages>
       <url hash="67725b1f">D13-1039</url>
       <bibkey>pasca-2013-open</bibkey>
     </paper>
     <paper id="40">
       <title>Unsupervised Relation Extraction with General Domain Knowledge</title>
-      <author><first>Oier</first><last>Lopez de Lacalle</last></author>
+      <author id="oier-lopez-de-lacalle"><first>Oier</first><last>Lopez de Lacalle</last></author>
       <author><first>Mirella</first><last>Lapata</last></author>
       <pages>415–425</pages>
       <url hash="4b84bc55">D13-1040</url>
@@ -407,7 +407,7 @@
     <paper id="42">
       <title>Joint Bootstrapping of Corpus Annotations and Entity Types</title>
       <author><first>Hrushikesh</first><last>Mohapatra</last></author>
-      <author><first>Siddhanth</first><last>Jain</last></author>
+      <author id="siddharth-jain"><first>Siddhanth</first><last>Jain</last></author>
       <author><first>Soumen</first><last>Chakrabarti</last></author>
       <pages>436–446</pages>
       <url hash="78b608ed">D13-1042</url>
@@ -446,7 +446,7 @@
       <author><first>Dhouha</first><last>Bouamor</last></author>
       <author><first>Adrian</first><last>Popescu</last></author>
       <author><first>Nasredine</first><last>Semmar</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <pages>479–489</pages>
       <url hash="c1b19b48">D13-1046</url>
       <bibkey>bouamor-etal-2013-building</bibkey>
@@ -485,7 +485,7 @@
       <author><first>Hua</first><last>Wu</last></author>
       <author><first>Haifeng</first><last>Wang</last></author>
       <author><first>Conghui</first><last>Zhu</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <pages>524–534</pages>
       <url hash="ece41418">D13-1050</url>
       <bibkey>zhu-etal-2013-improving</bibkey>
@@ -504,7 +504,7 @@
     </paper>
     <paper id="52">
       <title>Flexible and Efficient Hypergraph Interactions for Joint Hierarchical and Forest-to-String Decoding</title>
-      <author><first>Martin</first><last>Čmejrek</last></author>
+      <author id="martin-cmejrek"><first>Martin</first><last>Čmejrek</last></author>
       <author><first>Haitao</first><last>Mi</last></author>
       <author><first>Bowen</first><last>Zhou</last></author>
       <pages>545–555</pages>
@@ -598,7 +598,7 @@
       <title>Joint <fixed-case>C</fixed-case>hinese Word Segmentation and <fixed-case>POS</fixed-case> Tagging on Heterogeneous Annotated Corpora with Multiple Task Learning</title>
       <author><first>Xipeng</first><last>Qiu</last></author>
       <author><first>Jiayi</first><last>Zhao</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>658–668</pages>
       <url hash="6ee6c03a">D13-1062</url>
       <bibkey>qiu-etal-2013-joint</bibkey>
@@ -608,7 +608,7 @@
       <author><first>Jimmy</first><last>Dubuisson</last></author>
       <author><first>Jean-Pierre</first><last>Eckmann</last></author>
       <author><first>Christian</first><last>Scheible</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>669–680</pages>
       <url hash="6563a70f">D13-1063</url>
       <attachment type="attachment" hash="ffb8541d">D13-1063.Attachment.zip</attachment>
@@ -617,7 +617,7 @@
     <paper id="64">
       <title>Unsupervised Induction of Cross-Lingual Semantic Relations</title>
       <author><first>Mike</first><last>Lewis</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>681–692</pages>
       <url hash="0fa67f02">D13-1064</url>
       <bibkey>lewis-steedman-2013-unsupervised</bibkey>
@@ -628,7 +628,7 @@
       <author><first>Stijn</first><last>De Saeger</last></author>
       <author><first>Kentaro</first><last>Torisawa</last></author>
       <author><first>Chikara</first><last>Hashimoto</last></author>
-      <author><first>Jong-Hoon</first><last>Oh</last></author>
+      <author id="jong-hoon-oh"><first>Jong-Hoon</first><last>Oh</last></author>
       <author><first>Motoki</first><last>Sano</last></author>
       <author><first>Kiyonori</first><last>Ohtake</last></author>
       <pages>693–703</pages>
@@ -637,7 +637,7 @@
     </paper>
     <paper id="66">
       <title>Sarcasm as Contrast between a Positive Sentiment and Negative Situation</title>
-      <author><first>Ellen</first><last>Riloff</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
       <author><first>Ashequl</first><last>Qadir</last></author>
       <author><first>Prafulla</first><last>Surve</last></author>
       <author><first>Lalindra</first><last>De Silva</last></author>
@@ -652,7 +652,7 @@
       <author><first>Zhongqing</first><last>Wang</last></author>
       <author><first>Shoushan</first><last>Li</last></author>
       <author><first>Fang</first><last>Kong</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>715–725</pages>
       <url hash="6b5e13d4">D13-1067</url>
       <bibkey>wang-etal-2013-collective</bibkey>
@@ -660,7 +660,7 @@
     <paper id="68">
       <title>Optimized Event Storyline Generation based on Mixture-Event-Aspect Model</title>
       <author><first>Lifu</first><last>Huang</last></author>
-      <author><first>Lian’en</first><last>Huang</last></author>
+      <author id="lianen-huang"><first>Lian’en</first><last>Huang</last></author>
       <pages>726–735</pages>
       <url hash="122ca971">D13-1068</url>
       <bibkey>huang-huang-2013-optimized</bibkey>
@@ -678,8 +678,8 @@
       <author><first>Maria</first><last>Liakata</last></author>
       <author><first>Simon</first><last>Dobnik</last></author>
       <author><first>Shyamasree</first><last>Saha</last></author>
-      <author><first>Colin</first><last>Batchelor</last></author>
-      <author><first>Dietrich</first><last>Rebholz-Schuhmann</last></author>
+      <author id="colin-batchelor"><first>Colin</first><last>Batchelor</last></author>
+      <author id="dietrich-rebholz-schuhmann"><first>Dietrich</first><last>Rebholz-Schuhmann</last></author>
       <pages>747–757</pages>
       <url hash="97ae9d87">D13-1070</url>
       <bibkey>liakata-etal-2013-discourse</bibkey>
@@ -697,7 +697,7 @@
       <title>Exploiting Language Models for Visual Recognition</title>
       <author><first>Dieu-Thu</first><last>Le</last></author>
       <author><first>Jasper</first><last>Uijlings</last></author>
-      <author><first>Raffaella</first><last>Bernardi</last></author>
+      <author id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last></author>
       <pages>769–779</pages>
       <url hash="25c82b76">D13-1072</url>
       <bibkey>le-etal-2013-exploiting</bibkey>
@@ -706,7 +706,7 @@
       <title>Mining Scientific Terms and their Definitions: A Study of the <fixed-case>ACL</fixed-case> <fixed-case>A</fixed-case>nthology</title>
       <author><first>Yiping</first><last>Jin</last></author>
       <author><first>Min-Yen</first><last>Kan</last></author>
-      <author><first>Jun-Ping</first><last>Ng</last></author>
+      <author id="jun-ping-ng"><first>Jun-Ping</first><last>Ng</last></author>
       <author><first>Xiangnan</first><last>He</last></author>
       <pages>780–790</pages>
       <url hash="f6d1cf18">D13-1073</url>
@@ -724,7 +724,7 @@
     <paper id="75">
       <title>With Blinkers on: Robust Prediction of Eye Movements across Readers</title>
       <author><first>Franz</first><last>Matthies</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>803–807</pages>
       <url hash="364c135e">D13-1075</url>
       <bibkey>matthies-sogaard-2013-blinkers</bibkey>
@@ -732,7 +732,7 @@
     <paper id="76">
       <title>Using Paraphrases and Lexical Semantics to Improve the Accuracy and the Robustness of Supervised Models in Situated Dialogue Systems</title>
       <author><first>Claire</first><last>Gardent</last></author>
-      <author><first>Lina M.</first><last>Rojas Barahona</last></author>
+      <author id="lina-m-rojas-barahona"><first>Lina M.</first><last>Rojas Barahona</last></author>
       <pages>808–813</pages>
       <url hash="90765aa3">D13-1076</url>
       <bibkey>gardent-rojas-barahona-2013-using</bibkey>
@@ -757,7 +757,7 @@
       <title>Rule-Based Information Extraction is Dead! Long Live Rule-Based Information Extraction Systems!</title>
       <author><first>Laura</first><last>Chiticariu</last></author>
       <author><first>Yunyao</first><last>Li</last></author>
-      <author><first>Frederick R.</first><last>Reiss</last></author>
+      <author id="frederick-reiss"><first>Frederick R.</first><last>Reiss</last></author>
       <pages>827–832</pages>
       <url hash="458ca18d">D13-1079</url>
       <bibkey>chiticariu-etal-2013-rule</bibkey>
@@ -765,9 +765,9 @@
     <paper id="80">
       <title>Improving Learning and Inference in a Large Knowledge-Base using Latent Syntactic Cues</title>
       <author><first>Matt</first><last>Gardner</last></author>
-      <author><first>Partha Pratim</first><last>Talukdar</last></author>
+      <author id="partha-talukdar"><first>Partha Pratim</first><last>Talukdar</last></author>
       <author><first>Bryan</first><last>Kisiel</last></author>
-      <author><first>Tom</first><last>Mitchell</last></author>
+      <author id="tom-mitchell"><first>Tom</first><last>Mitchell</last></author>
       <pages>833–838</pages>
       <url hash="5959f501">D13-1080</url>
       <bibkey>gardner-etal-2013-improving</bibkey>
@@ -785,9 +785,9 @@
       <author><first>Rui</first><last>Wang</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
       <author><first>Isao</first><last>Goto</last></author>
-      <author><first>Eiichro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichro</first><last>Sumita</last></author>
       <author><first>Hai</first><last>Zhao</last></author>
-      <author><first>Bao-Liang</first><last>Lu</last></author>
+      <author id="bao-liang-lu"><first>Bao-Liang</first><last>Lu</last></author>
       <pages>845–850</pages>
       <url hash="52a274a4">D13-1082</url>
       <bibkey>wang-etal-2013-converting</bibkey>
@@ -840,8 +840,8 @@
     </paper>
     <paper id="88">
       <title><fixed-case>R</fixed-case>ussian Stress Prediction using Maximum Entropy Ranking</title>
-      <author><first>Keith</first><last>Hall</last></author>
-      <author><first>Richard</first><last>Sproat</last></author>
+      <author id="keith-hall"><first>Keith</first><last>Hall</last></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last></author>
       <pages>879–883</pages>
       <url hash="11f08526">D13-1088</url>
       <bibkey>hall-sproat-2013-russian</bibkey>
@@ -849,7 +849,7 @@
     <paper id="89">
       <title>Scaling to Large³ Data: An Efficient and Effective Method to Compute Distributional Thesauri</title>
       <author><first>Martin</first><last>Riedl</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>884–890</pages>
       <url hash="6870d1a5">D13-1089</url>
       <bibkey>riedl-biemann-2013-scaling</bibkey>
@@ -869,7 +869,7 @@
       <author><first>Binyang</first><last>Li</last></author>
       <author><first>Daling</first><last>Wang</last></author>
       <author><first>Ge</first><last>Yu</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <pages>897–902</pages>
       <url hash="df72a289">D13-1091</url>
       <bibkey>feng-etal-2013-twitter</bibkey>
@@ -897,7 +897,7 @@
     <paper id="94">
       <title>Predicting the Presence of Discourse Connectives</title>
       <author><first>Gary</first><last>Patterson</last></author>
-      <author><first>Andrew</first><last>Kehler</last></author>
+      <author id="andrew-kehler"><first>Andrew</first><last>Kehler</last></author>
       <pages>914–923</pages>
       <url hash="177e79d3">D13-1094</url>
       <bibkey>patterson-kehler-2013-predicting</bibkey>
@@ -927,7 +927,7 @@
       <author><first>Jin</first><last>Qian</last></author>
       <author><first>Huan</first><last>Chen</last></author>
       <author><first>Jihua</first><last>Kang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>946–957</pages>
       <url hash="87408767">D13-1097</url>
       <bibkey>zhang-etal-2013-discourse</bibkey>
@@ -935,7 +935,7 @@
     <paper id="98">
       <title>Building Event Threads out of Multiple News Articles</title>
       <author><first>Xavier</first><last>Tannier</last></author>
-      <author><first>Véronique</first><last>Moriceau</last></author>
+      <author id="veronique-moriceau"><first>Véronique</first><last>Moriceau</last></author>
       <pages>958–967</pages>
       <url hash="64972f82">D13-1098</url>
       <bibkey>tannier-moriceau-2013-building</bibkey>
@@ -943,16 +943,16 @@
     <paper id="99">
       <title>Tree Kernel-based Negation and Speculation Scope Detection with Structured Syntactic Parse Features</title>
       <author><first>Bowei</first><last>Zou</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
-      <author><first>Qiaoming</first><last>Zhu</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last></author>
       <pages>968–976</pages>
       <url hash="922aeb17">D13-1099</url>
       <bibkey>zou-etal-2013-tree</bibkey>
     </paper>
     <paper id="100">
       <title>A temporal model of text periodicities using <fixed-case>G</fixed-case>aussian Processes</title>
-      <author><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="daniel-preotiuc-pietro"><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>977–988</pages>
       <url hash="b2bf58ac">D13-1100</url>
       <attachment type="poster" hash="68c5a785">D13-1100.Poster.pdf</attachment>
@@ -961,9 +961,9 @@
     <paper id="101">
       <title>Automatically Detecting and Attributing Indirect Quotations</title>
       <author><first>Silvia</first><last>Pareti</last></author>
-      <author><first>Tim</first><last>O’Keefe</last></author>
+      <author id="tim-okeefe"><first>Tim</first><last>O’Keefe</last></author>
       <author><first>Ioannis</first><last>Konstas</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <author><first>Irena</first><last>Koprinska</last></author>
       <pages>989–999</pages>
       <url hash="a3658397">D13-1101</url>
@@ -971,7 +971,7 @@
     </paper>
     <paper id="102">
       <title>Identifying Web Search Query Reformulation using Concept based Matching</title>
-      <author><first>Ahmed</first><last>Hassan</last></author>
+      <author id="ahmed-hassan"><first>Ahmed</first><last>Hassan</last></author>
       <pages>1000–1010</pages>
       <url hash="ddc96bab">D13-1102</url>
       <bibkey>hassan-2013-identifying</bibkey>
@@ -1001,7 +1001,7 @@
       <title>Automatic Extraction of Morphological Lexicons from Morphologically Annotated Corpora</title>
       <author><first>Ramy</first><last>Eskander</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>1032–1043</pages>
       <url hash="b2de3744">D13-1105</url>
       <bibkey>eskander-etal-2013-automatic</bibkey>
@@ -1011,7 +1011,7 @@
       <author><first>Michael</first><last>Auli</last></author>
       <author><first>Michel</first><last>Galley</last></author>
       <author><first>Chris</first><last>Quirk</last></author>
-      <author><first>Geoffrey</first><last>Zweig</last></author>
+      <author id="geoffrey-zweig"><first>Geoffrey</first><last>Zweig</last></author>
       <pages>1044–1054</pages>
       <url hash="91158796">D13-1106</url>
       <bibkey>auli-etal-2013-joint</bibkey>
@@ -1033,7 +1033,7 @@
       <author><first>Fandong</first><last>Meng</last></author>
       <author><first>Jun</first><last>Xie</last></author>
       <author><first>Linfeng</first><last>Song</last></author>
-      <author><first>Yajuan</first><last>Lü</last></author>
+      <author id="yajuan-lu"><first>Yajuan</first><last>Lü</last></author>
       <author><first>Qun</first><last>Liu</last></author>
       <pages>1066–1076</pages>
       <url hash="28b1f085">D13-1108</url>
@@ -1043,7 +1043,7 @@
       <title>Monolingual Marginal Matching for Translation Model Adaptation</title>
       <author><first>Ann</first><last>Irvine</last></author>
       <author><first>Chris</first><last>Quirk</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <pages>1077–1088</pages>
       <url hash="5a0041ab">D13-1109</url>
       <bibkey>irvine-etal-2013-monolingual</bibkey>
@@ -1051,7 +1051,7 @@
     <paper id="110">
       <title>Efficient Left-to-Right Hierarchical Phrase-Based Translation with Improved Reordering</title>
       <author><first>Maryam</first><last>Siahbani</last></author>
-      <author><first>Baskaran</first><last>Sankaran</last></author>
+      <author id="baskaran-sankaran"><first>Baskaran</first><last>Sankaran</last></author>
       <author><first>Anoop</first><last>Sarkar</last></author>
       <pages>1089–1099</pages>
       <url hash="21055039">D13-1110</url>
@@ -1061,7 +1061,7 @@
       <title>A Systematic Exploration of Diversity in Machine Translation</title>
       <author><first>Kevin</first><last>Gimpel</last></author>
       <author><first>Dhruv</first><last>Batra</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <author><first>Gregory</first><last>Shakhnarovich</last></author>
       <pages>1100–1111</pages>
       <url hash="5da674ec">D13-1111</url>
@@ -1098,7 +1098,7 @@
     <paper id="115">
       <title>A Multimodal <fixed-case>LDA</fixed-case> Model integrating Textual, Cognitive and Visual Modalities</title>
       <author><first>Stephen</first><last>Roller</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>1146–1157</pages>
       <url hash="980a4379">D13-1115</url>
       <attachment type="attachment" hash="57bdd5b5">D13-1115.Attachment.zip</attachment>
@@ -1106,7 +1106,7 @@
     </paper>
     <paper id="116">
       <title>Combining <fixed-case>PCFG</fixed-case>-<fixed-case>LA</fixed-case> Models with Dual Decomposition: A Case Study with Function Labels and Binarization</title>
-      <author><first>Joseph</first><last>Le Roux</last></author>
+      <author id="joseph-le-roux"><first>Joseph</first><last>Le Roux</last></author>
       <author><first>Antoine</first><last>Rozenknop</last></author>
       <author><first>Jennifer</first><last>Foster</last></author>
       <pages>1158–1169</pages>
@@ -1115,11 +1115,11 @@
     </paper>
     <paper id="117">
       <title>Feature Noising for Log-Linear Structured Prediction</title>
-      <author><first>Sida</first><last>Wang</last></author>
+      <author id="sida-i-wang"><first>Sida</first><last>Wang</last></author>
       <author><first>Mengqiu</first><last>Wang</last></author>
-      <author><first>Stefan</first><last>Wager</last></author>
+      <author id="stefan-wagner"><first>Stefan</first><last>Wager</last></author>
       <author><first>Percy</first><last>Liang</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>1170–1179</pages>
       <url hash="cefbc952">D13-1117</url>
       <bibkey>wang-etal-2013-feature</bibkey>
@@ -1153,7 +1153,7 @@
       <author><first>Ryohei</first><last>Sasano</last></author>
       <author><first>Daisuke</first><last>Kawahara</last></author>
       <author><first>Sadao</first><last>Kurohashi</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>1213–1223</pages>
       <url hash="cb7f0a3b">D13-1121</url>
       <bibkey>sasano-etal-2013-automatic</bibkey>
@@ -1170,7 +1170,7 @@
     <paper id="123">
       <title>A Semantically Enhanced Approach to Determine Textual Similarity</title>
       <author><first>Eduardo</first><last>Blanco</last></author>
-      <author><first>Dan</first><last>Moldovan</last></author>
+      <author id="dan-moldovan"><first>Dan</first><last>Moldovan</last></author>
       <pages>1235–1245</pages>
       <url hash="35f3f573">D13-1123</url>
       <revision id="1" href="D13-1123v1" hash="3feedbee"/>
@@ -1198,7 +1198,7 @@
     <paper id="126">
       <title>Simulating Early-Termination Search for Verbose Spoken Queries</title>
       <author><first>Jerome</first><last>White</last></author>
-      <author><first>Douglas W.</first><last>Oard</last></author>
+      <author id="douglas-w-oard"><first>Douglas W.</first><last>Oard</last></author>
       <author><first>Nitendra</first><last>Rajput</last></author>
       <author><first>Marion</first><last>Zalk</last></author>
       <pages>1270–1280</pages>
@@ -1233,8 +1233,8 @@
     </paper>
     <paper id="130">
       <title>Leveraging Lexical Cohesion and Disruption for Topic Segmentation</title>
-      <author><first>Anca-Roxana</first><last>Şimon</last></author>
-      <author><first>Guillaume</first><last>Gravier</last></author>
+      <author id="anca-roxana-simon"><first>Anca-Roxana</first><last>Şimon</last></author>
+      <author id="guillaume-gravier"><first>Guillaume</first><last>Gravier</last></author>
       <author><first>Pascale</first><last>Sébillot</last></author>
       <pages>1314–1324</pages>
       <url hash="c8b2af73">D13-1130</url>
@@ -1253,7 +1253,7 @@
     <paper id="132">
       <title>Mining New Business Opportunities: Identifying Trend related Products by Leveraging Commercial Intents from Microblogs</title>
       <author><first>Jinpeng</first><last>Wang</last></author>
-      <author><first>Wayne Xin</first><last>Zhao</last></author>
+      <author id="wayne-xin-zhao"><first>Wayne Xin</first><last>Zhao</last></author>
       <author><first>Haitian</first><last>Wei</last></author>
       <author><first>Hongfei</first><last>Yan</last></author>
       <author><first>Xiaoming</first><last>Li</last></author>
@@ -1313,7 +1313,7 @@
       <author><first>Joern</first><last>Wuebker</last></author>
       <author><first>Stephan</first><last>Peitz</last></author>
       <author><first>Felix</first><last>Rietig</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>1377–1381</pages>
       <url hash="6d99794a">D13-1138</url>
       <bibkey>wuebker-etal-2013-improving</bibkey>
@@ -1333,7 +1333,7 @@
       <title>Decoding with Large-Scale Neural Language Models Improves Translation</title>
       <author><first>Ashish</first><last>Vaswani</last></author>
       <author><first>Yinggong</first><last>Zhao</last></author>
-      <author><first>Victoria</first><last>Fossum</last></author>
+      <author id="victoria-fossum"><first>Victoria</first><last>Fossum</last></author>
       <author><first>David</first><last>Chiang</last></author>
       <pages>1387–1392</pages>
       <url hash="d7408d0e">D13-1140</url>
@@ -1344,7 +1344,7 @@
       <author><first>Will Y.</first><last>Zou</last></author>
       <author><first>Richard</first><last>Socher</last></author>
       <author><first>Daniel</first><last>Cer</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>1393–1398</pages>
       <url hash="b6bd9cc4">D13-1141</url>
       <bibkey>zou-etal-2013-bilingual</bibkey>
@@ -1352,7 +1352,7 @@
     <paper id="142">
       <title>Application of Localized Similarity for Web Documents</title>
       <author><first>Peter</first><last>Reberšek</last></author>
-      <author><first>Mateja</first><last>Verlič</last></author>
+      <author id="mateja-verlic"><first>Mateja</first><last>Verlič</last></author>
       <pages>1399–1404</pages>
       <url hash="2718123a">D13-1142</url>
       <bibkey>rebersek-verlic-2013-application</bibkey>
@@ -1369,7 +1369,7 @@
       <title>A Walk-Based Semantically Enriched Tree Kernel Over Distributed Word Representations</title>
       <author><first>Shashank</first><last>Srivastava</last></author>
       <author><first>Dirk</first><last>Hovy</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>1411–1416</pages>
       <url hash="71306b81">D13-1144</url>
       <bibkey>srivastava-etal-2013-walk</bibkey>
@@ -1377,7 +1377,7 @@
     <paper id="145">
       <title>Automatic Idiom Identification in <fixed-case>W</fixed-case>iktionary</title>
       <author><first>Grace</first><last>Muzny</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>1417–1421</pages>
       <url hash="e930c42a">D13-1145</url>
       <bibkey>muzny-zettlemoyer-2013-automatic</bibkey>
@@ -1386,7 +1386,7 @@
       <title><fixed-case>E</fixed-case>lephant: Sequence Labeling for Word and Sentence Segmentation</title>
       <author><first>Kilian</first><last>Evang</last></author>
       <author><first>Valerio</first><last>Basile</last></author>
-      <author><first>Grzegorz</first><last>Chrupała</last></author>
+      <author id="grzegorz-chrupala"><first>Grzegorz</first><last>Chrupała</last></author>
       <author><first>Johan</first><last>Bos</last></author>
       <pages>1422–1426</pages>
       <url hash="6beb877d">D13-1146</url>
@@ -1411,8 +1411,8 @@
     <paper id="149">
       <title>The <fixed-case>V</fixed-case>erb<fixed-case>C</fixed-case>orner Project: Toward an Empirically-Based Semantic Decomposition of Verbs</title>
       <author><first>Joshua K.</first><last>Hartshorne</last></author>
-      <author><first>Claire</first><last>Bonial</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="claire-bonial"><first>Claire</first><last>Bonial</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>1438–1442</pages>
       <url hash="9ff01197">D13-1149</url>
       <bibkey>hartshorne-etal-2013-verbcorner</bibkey>
@@ -1438,8 +1438,8 @@
     <paper id="152">
       <title>Dynamic Feature Selection for Dependency Parsing</title>
       <author><first>He</first><last>He</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>1455–1464</pages>
       <url hash="8f044517">D13-1152</url>
       <bibkey>he-etal-2013-dynamic</bibkey>
@@ -1454,10 +1454,10 @@
     </paper>
     <paper id="154">
       <title>Using Crowdsourcing to get Representations based on Regular Expressions</title>
-      <author><first>Anders</first><last>Søgaard</last></author>
-      <author><first>Hector</first><last>Martinez</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
+      <author id="hector-martinez-alonso"><first>Hector</first><last>Martinez</last></author>
       <author><first>Jakob</first><last>Elming</last></author>
-      <author><first>Anders</first><last>Johannsen</last></author>
+      <author id="anders-johannsen"><first>Anders</first><last>Johannsen</last></author>
       <pages>1476–1480</pages>
       <url hash="03a44dd1">D13-1154</url>
       <bibkey>sogaard-etal-2013-using</bibkey>
@@ -1491,7 +1491,7 @@
       <author><first>Tsutomu</first><last>Hirao</last></author>
       <author><first>Yasuhisa</first><last>Yoshida</last></author>
       <author><first>Masaaki</first><last>Nishino</last></author>
-      <author><first>Norihito</first><last>Yasuda</last></author>
+      <author id="norihito-yasuda"><first>Norihito</first><last>Yasuda</last></author>
       <author><first>Masaaki</first><last>Nagata</last></author>
       <pages>1515–1520</pages>
       <url hash="333c4714">D13-1158</url>
@@ -1501,7 +1501,7 @@
       <title>A Hierarchical Entity-Based Approach to Structuralize User Generated Content in Social Media: A Case of <fixed-case>Y</fixed-case>ahoo! <fixed-case>A</fixed-case>nswers</title>
       <author><first>Baichuan</first><last>Li</last></author>
       <author><first>Jing</first><last>Liu</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <author><first>Irwin</first><last>King</last></author>
       <author><first>Michael R.</first><last>Lyu</last></author>
       <pages>1521–1532</pages>
@@ -1524,7 +1524,7 @@
       <author><first>Tom</first><last>Kwiatkowski</last></author>
       <author><first>Eunsol</first><last>Choi</last></author>
       <author><first>Yoav</first><last>Artzi</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>1545–1556</pages>
       <url hash="12cff0a6">D13-1161</url>
       <bibkey>kwiatkowski-etal-2013-scaling</bibkey>
@@ -1532,17 +1532,17 @@
     <paper id="162">
       <title>Classifying Message Board Posts with an Extracted Lexicon of Patient Attributes</title>
       <author><first>Ruihong</first><last>Huang</last></author>
-      <author><first>Ellen</first><last>Riloff</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
       <pages>1557–1562</pages>
       <url hash="0d1eba3d">D13-1162</url>
       <bibkey>huang-riloff-2013-classifying</bibkey>
     </paper>
     <paper id="163">
       <title>Lexical Chain Based Cohesion Models for Document-Level Statistical Machine Translation</title>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Yang</first><last>Ding</last></author>
       <author><first>Min</first><last>Zhang</last></author>
-      <author><first>Chew Lim</first><last>Tan</last></author>
+      <author id="chew-lim-tan"><first>Chew Lim</first><last>Tan</last></author>
       <pages>1563–1573</pages>
       <url hash="63eddbfd">D13-1163</url>
       <bibkey>xiong-etal-2013-lexical</bibkey>
@@ -1550,7 +1550,7 @@
     <paper id="164">
       <title>A Convex Alternative to <fixed-case>IBM</fixed-case> Model 2</title>
       <author><first>Andrei</first><last>Simion</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <author><first>Cliff</first><last>Stein</last></author>
       <pages>1574–1583</pages>
       <url hash="b66093d5">D13-1164</url>
@@ -1558,7 +1558,7 @@
     </paper>
     <paper id="165">
       <title>Pair Language Models for Deriving Alternative Pronunciations and Spellings from Pronunciation Dictionaries</title>
-      <author><first>Russell</first><last>Beckley</last></author>
+      <author id="russell-beckley"><first>Russell</first><last>Beckley</last></author>
       <author><first>Brian</first><last>Roark</last></author>
       <pages>1584–1589</pages>
       <url hash="f9d8e78b">D13-1165</url>
@@ -1567,7 +1567,7 @@
     <paper id="166">
       <title>Prior Disambiguation of Word Tensors for Constructing Sentence Vectors</title>
       <author><first>Dimitri</first><last>Kartsaklis</last></author>
-      <author><first>Mehrnoosh</first><last>Sadrzadeh</last></author>
+      <author id="mehrnoosh-sadrzadeh"><first>Mehrnoosh</first><last>Sadrzadeh</last></author>
       <pages>1590–1601</pages>
       <url hash="52cbc063">D13-1166</url>
       <bibkey>kartsaklis-sadrzadeh-2013-prior</bibkey>
@@ -1575,8 +1575,8 @@
     <paper id="167">
       <title>Multi-Relational Latent Semantic Analysis</title>
       <author><first>Kai-Wei</first><last>Chang</last></author>
-      <author><first>Wen-tau</first><last>Yih</last></author>
-      <author><first>Christopher</first><last>Meek</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
+      <author id="christopher-meek"><first>Christopher</first><last>Meek</last></author>
       <pages>1602–1612</pages>
       <url hash="fc5bc333">D13-1167</url>
       <attachment type="presentation" hash="e5c6caaa">D13-1167.Presentation.pptx</attachment>
@@ -1585,7 +1585,7 @@
     <paper id="168">
       <title>A Study on Bootstrapping Bilingual Vector Spaces from Non-Parallel Data (and Nothing Else)</title>
       <author><first>Ivan</first><last>Vulić</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>1613–1624</pages>
       <url hash="3eee06c7">D13-1168</url>
       <bibkey>vulic-moens-2013-study</bibkey>
@@ -1593,7 +1593,7 @@
     <paper id="169">
       <title>Deriving Adjectival Scales from Continuous Space Word Representations</title>
       <author><first>Joo-Kyung</first><last>Kim</last></author>
-      <author><first>Marie-Catherine</first><last>de Marneffe</last></author>
+      <author id="marie-catherine-de-marneffe"><first>Marie-Catherine</first><last>de Marneffe</last></author>
       <pages>1625–1630</pages>
       <url hash="868932ad">D13-1169</url>
       <bibkey>kim-de-marneffe-2013-deriving</bibkey>
@@ -1604,8 +1604,8 @@
       <author><first>Alex</first><last>Perelygin</last></author>
       <author><first>Jean</first><last>Wu</last></author>
       <author><first>Jason</first><last>Chuang</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
-      <author><first>Andrew</first><last>Ng</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
+      <author id="andrew-y-ng"><first>Andrew</first><last>Ng</last></author>
       <author><first>Christopher</first><last>Potts</last></author>
       <pages>1631–1642</pages>
       <url hash="713031ae">D13-1170</url>
@@ -1647,15 +1647,15 @@
       <title>Translating into Morphologically Rich Languages with Synthetic Phrases</title>
       <author><first>Victor</first><last>Chahuneau</last></author>
       <author><first>Eva</first><last>Schlinger</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <pages>1677–1687</pages>
       <url hash="cad4440e">D13-1174</url>
       <bibkey>chahuneau-etal-2013-translating</bibkey>
     </paper>
     <paper id="175">
       <title>Boosting Cross-Language Retrieval by Learning Bilingual Phrase Associations from Relevance Rankings</title>
-      <author><first>Artem</first><last>Sokokov</last></author>
+      <author id="artem-sokolov"><first>Artem</first><last>Sokokov</last></author>
       <author><first>Laura</first><last>Jehl</last></author>
       <author><first>Felix</first><last>Hieber</last></author>
       <author><first>Stefan</first><last>Riezler</last></author>
@@ -1666,7 +1666,7 @@
     <paper id="176">
       <title>Recurrent Continuous Translation Models</title>
       <author><first>Nal</first><last>Kalchbrenner</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
       <pages>1700–1709</pages>
       <url hash="f8238913">D13-1176</url>
       <bibkey>kalchbrenner-blunsom-2013-recurrent</bibkey>
@@ -1679,7 +1679,7 @@
       <author><first>Peter</first><last>Clark</last></author>
       <author><first>Justin</first><last>Lewis</last></author>
       <author><first>Brittany</first><last>Harding</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>1710–1720</pages>
       <url hash="7b7af517">D13-1177</url>
       <attachment type="attachment" hash="1378dab4">D13-1177.Attachment.zip</attachment>
@@ -1688,7 +1688,7 @@
     <paper id="178">
       <title>Generating Coherent Event Schemas at Scale</title>
       <author><first>Niranjan</first><last>Balasubramanian</last></author>
-      <author><first>Stephen</first><last>Soderland</last></author>
+      <author id="stephen-soderland"><first>Stephen</first><last>Soderland</last></author>
       <author><first/><last>Mausam</last></author>
       <author><first>Oren</first><last>Etzioni</last></author>
       <pages>1721–1731</pages>
@@ -1697,8 +1697,8 @@
     </paper>
     <paper id="179">
       <title>Orthonormal Explicit Topic Analysis for Cross-Lingual Document Matching</title>
-      <author><first>John Philip</first><last>McCrae</last></author>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="john-philip-mccrae"><first>John Philip</first><last>McCrae</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <author><first>Roman</first><last>Klinger</last></author>
       <pages>1732–1740</pages>
       <url hash="83a90763">D13-1179</url>
@@ -1714,7 +1714,7 @@
     </paper>
     <paper id="181">
       <title>Success with Style: Using Writing Style to Predict the Success of Novels</title>
-      <author><first>Vikas</first><last>Ganjigunte Ashok</last></author>
+      <author id="vikas-ganjigunte-ashok"><first>Vikas</first><last>Ganjigunte Ashok</last></author>
       <author><first>Song</first><last>Feng</last></author>
       <author><first>Yejin</first><last>Choi</last></author>
       <pages>1753–1764</pages>
@@ -1723,7 +1723,7 @@
     </paper>
     <paper id="182">
       <title>A Generative Joint, Additive, Sequential Model of Topics and Speech Acts in Patient-Doctor Communication</title>
-      <author><first>Byron C.</first><last>Wallace</last></author>
+      <author id="byron-c-wallace"><first>Byron C.</first><last>Wallace</last></author>
       <author><first>Thomas A.</first><last>Trikalinos</last></author>
       <author><first>M. Barton</first><last>Laws</last></author>
       <author><first>Ira B.</first><last>Wilson</last></author>
@@ -1735,7 +1735,7 @@
     <paper id="183">
       <title>Harvesting Parallel News Streams to Generate Paraphrases of Event Relations</title>
       <author><first>Congle</first><last>Zhang</last></author>
-      <author><first>Daniel S.</first><last>Weld</last></author>
+      <author id="daniel-s-weld"><first>Daniel S.</first><last>Weld</last></author>
       <pages>1776–1786</pages>
       <url hash="1c7c3e14">D13-1183</url>
       <bibkey>zhang-weld-2013-harvesting</bibkey>
@@ -1750,7 +1750,7 @@
     </paper>
     <paper id="185">
       <title>Event Schema Induction with a Probabilistic Entity-Driven Model</title>
-      <author><first>Nathanael</first><last>Chambers</last></author>
+      <author id="nathanael-chambers"><first>Nathanael</first><last>Chambers</last></author>
       <pages>1797–1807</pages>
       <url hash="8b735309">D13-1185</url>
       <bibkey>chambers-2013-event</bibkey>
@@ -1793,7 +1793,7 @@
       <title>Detecting Promotional Content in <fixed-case>W</fixed-case>ikipedia</title>
       <author><first>Shruti</first><last>Bhosale</last></author>
       <author><first>Heath</first><last>Vinicombe</last></author>
-      <author><first>Raymond</first><last>Mooney</last></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last></author>
       <pages>1851–1857</pages>
       <url hash="4dd9a30a">D13-1190</url>
       <bibkey>bhosale-etal-2013-detecting</bibkey>
@@ -1804,7 +1804,7 @@
       <author><first>Minghui</first><last>Qiu</last></author>
       <author><first>Yanchuan</first><last>Sim</last></author>
       <author><first>Jing</first><last>Jiang</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>1858–1868</pages>
       <url hash="7724c8bc">D13-1191</url>
       <attachment type="attachment" hash="f81f55dc">D13-1191.Attachment.pdf</attachment>
@@ -1848,8 +1848,8 @@
     <paper id="196">
       <title>Fish Transporters and Miracle Homes: How Compositional Distributional Semantics can Help <fixed-case>NP</fixed-case> Parsing</title>
       <author><first>Angeliki</first><last>Lazaridou</last></author>
-      <author><first>Eva Maria</first><last>Vecchi</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="eva-maria-vecchi"><first>Eva Maria</first><last>Vecchi</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <pages>1908–1913</pages>
       <url hash="05952f0e">D13-1196</url>
       <attachment type="attachment" hash="8b0b1df5">D13-1196.Attachment.zip</attachment>
@@ -1859,7 +1859,7 @@
       <title>Learning Distributions over Logical Forms for Referring Expression Generation</title>
       <author><first>Nicholas</first><last>FitzGerald</last></author>
       <author><first>Yoav</first><last>Artzi</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>1914–1925</pages>
       <url hash="171237d0">D13-1197</url>
       <bibkey>fitzgerald-etal-2013-learning</bibkey>
@@ -1877,7 +1877,7 @@
       <title>Identifying Manipulated Offerings on Review Portals</title>
       <author><first>Jiwei</first><last>Li</last></author>
       <author><first>Myle</first><last>Ott</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>1933–1942</pages>
       <url hash="7c15e940">D13-1199</url>
       <bibkey>li-etal-2013-identifying</bibkey>
@@ -1885,7 +1885,7 @@
     <paper id="200">
       <title>Well-Argued Recommendation: Adaptive Models Based on Words in Recommender Systems</title>
       <author><first>Julien</first><last>Gaillard</last></author>
-      <author><first>Marc</first><last>El-Beze</last></author>
+      <author id="marc-el-beze"><first>Marc</first><last>El-Beze</last></author>
       <author><first>Eitan</first><last>Altman</last></author>
       <author><first>Emmanuel</first><last>Ethis</last></author>
       <pages>1943–1947</pages>
@@ -1905,11 +1905,11 @@
     </paper>
     <paper id="202">
       <title>Of Words, Eyes and Brains: Correlating Image-Based Distributional Semantic Models with Neural Representations of Concepts</title>
-      <author><first>Andrew J.</first><last>Anderson</last></author>
+      <author id="andrew-j-anderson"><first>Andrew J.</first><last>Anderson</last></author>
       <author><first>Elia</first><last>Bruni</last></author>
       <author><first>Ulisse</first><last>Bordignon</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <pages>1960–1970</pages>
       <url hash="90c3d14a">D13-1202</url>
       <bibkey>anderson-etal-2013-words</bibkey>
@@ -1924,9 +1924,9 @@
     </paper>
     <paper id="204">
       <title>Breaking Out of Local Optima with Count Transforms and Model Recombination: A Study in Grammar Induction</title>
-      <author><first>Valentin I.</first><last>Spitkovsky</last></author>
-      <author><first>Hiyan</first><last>Alshawi</last></author>
-      <author><first>Daniel</first><last>Jurafsky</last></author>
+      <author id="valentin-i-spitkovsky"><first>Valentin I.</first><last>Spitkovsky</last></author>
+      <author id="hiyan-alshawi"><first>Hiyan</first><last>Alshawi</last></author>
+      <author id="dan-jurafsky"><first>Daniel</first><last>Jurafsky</last></author>
       <pages>1983–1995</pages>
       <url hash="bb447dab">D13-1204</url>
       <bibkey>spitkovsky-etal-2013-breaking</bibkey>
diff --git a/data/xml/D14.xml b/data/xml/D14.xml
index 5301063142..e6e3972f7a 100644
--- a/data/xml/D14.xml
+++ b/data/xml/D14.xml
@@ -6,7 +6,7 @@
       <url hash="8e5c36f8">D14-1</url>
       <editor><first>Alessandro</first><last>Moschitti</last></editor>
       <editor><first>Bo</first><last>Pang</last></editor>
-      <editor><first>Walter</first><last>Daelemans</last></editor>
+      <editor id="walter-daelemans"><first>Walter</first><last>Daelemans</last></editor>
       <doi>10.3115/v1/D14-1</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Doha, Qatar</address>
@@ -20,7 +20,7 @@
     </frontmatter>
     <paper id="1">
       <title>Invited Talk: <fixed-case>IBM</fixed-case> Cognitive Computing - An <fixed-case>NLP</fixed-case> Renaissance!</title>
-      <author><first>Salim</first><last>Roukos</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
       <pages>1</pages>
       <url hash="7a83d7a1">D14-1001</url>
       <doi>10.3115/v1/D14-1001</doi>
@@ -43,7 +43,7 @@
       <author><first>Martin</first><last>Sundermeyer</last></author>
       <author><first>Tamer</first><last>Alkhouli</last></author>
       <author><first>Joern</first><last>Wuebker</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>14–25</pages>
       <url hash="5b988965">D14-1003</url>
       <doi>10.3115/v1/D14-1003</doi>
@@ -51,7 +51,7 @@
     </paper>
     <paper id="4">
       <title>A Neural Network Approach to Selectional Preference Acquisition</title>
-      <author><first>Tim</first><last>Van de Cruys</last></author>
+      <author id="tim-van-de-cruys"><first>Tim</first><last>Van de Cruys</last></author>
       <pages>26–35</pages>
       <url hash="d067c6e8">D14-1004</url>
       <doi>10.3115/v1/D14-1004</doi>
@@ -92,7 +92,7 @@
       <title>A Constituent-Based Approach to Argument Labeling with Joint Inference in Discourse Parsing</title>
       <author><first>Fang</first><last>Kong</last></author>
       <author><first>Hwee Tou</first><last>Ng</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>68–77</pages>
       <url hash="4e148b8b">D14-1008</url>
       <doi>10.3115/v1/D14-1008</doi>
@@ -140,7 +140,7 @@
     <paper id="13">
       <title>Combining Punctuation and Disfluency Prediction: An Empirical Study</title>
       <author><first>Xuancong</first><last>Wang</last></author>
-      <author><first>Khe Chai</first><last>Sim</last></author>
+      <author id="khe-chai-sim"><first>Khe Chai</first><last>Sim</last></author>
       <author><first>Hwee Tou</first><last>Ng</last></author>
       <pages>121–130</pages>
       <url hash="81c76879">D14-1013</url>
@@ -150,7 +150,7 @@
     <paper id="14">
       <title>Submodularity for Data Selection in Machine Translation</title>
       <author><first>Katrin</first><last>Kirchhoff</last></author>
-      <author><first>Jeff</first><last>Bilmes</last></author>
+      <author id="jeff-bilmes"><first>Jeff</first><last>Bilmes</last></author>
       <pages>131–141</pages>
       <url hash="0ae28064">D14-1014</url>
       <doi>10.3115/v1/D14-1014</doi>
@@ -187,7 +187,7 @@
       <author><first>Hidetaka</first><last>Kamigaito</last></author>
       <author><first>Taro</first><last>Watanabe</last></author>
       <author><first>Hiroya</first><last>Takamura</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>153–158</pages>
       <url hash="3d9c0525">D14-1017</url>
       <doi>10.3115/v1/D14-1017</doi>
@@ -206,7 +206,7 @@
       <title>Syntax-Augmented Machine Translation using Syntax-Label Clustering</title>
       <author><first>Hideya</first><last>Mino</last></author>
       <author><first>Taro</first><last>Watanabe</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>165–171</pages>
       <url hash="68e3784f">D14-1019</url>
       <doi>10.3115/v1/D14-1019</doi>
@@ -215,7 +215,7 @@
     <paper id="20">
       <title>Testing for Significance of Increased Correlation with Human Judgment</title>
       <author><first>Yvette</first><last>Graham</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>172–176</pages>
       <url hash="cf958feb">D14-1020</url>
       <doi>10.3115/v1/D14-1020</doi>
@@ -237,7 +237,7 @@
       <title>Learning Hierarchical Translation Spans</title>
       <author><first>Jingyi</first><last>Zhang</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Hai</first><last>Zhao</last></author>
       <pages>183–188</pages>
       <url hash="87b57e48">D14-1022</url>
@@ -248,9 +248,9 @@
       <title>Neural Network Based Bilingual Language Model Growing for Statistical Machine Translation</title>
       <author><first>Rui</first><last>Wang</last></author>
       <author><first>Hai</first><last>Zhao</last></author>
-      <author><first>Bao-Liang</first><last>Lu</last></author>
+      <author id="bao-liang-lu"><first>Bao-Liang</first><last>Lu</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>189–195</pages>
       <url hash="0faefd58">D14-1023</url>
       <doi>10.3115/v1/D14-1023</doi>
@@ -268,7 +268,7 @@
     <paper id="25">
       <title>Fitting Sentence Level Translation Evaluation with Many Dense Features</title>
       <author><first>Miloš</first><last>Stanojević</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <pages>202–206</pages>
       <url hash="bdbb3e04">D14-1025</url>
       <doi>10.3115/v1/D14-1025</doi>
@@ -287,11 +287,11 @@
     </paper>
     <paper id="27">
       <title>Learning to Differentiate Better from Worse Translations</title>
-      <author><first>Francisco</first><last>Guzmán</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzmán</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <author><first>Alessandro</first><last>Moschitti</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Massimo</first><last>Nicosia</last></author>
       <pages>214–220</pages>
       <url hash="9195cbe9">D14-1027</url>
@@ -309,7 +309,7 @@
     </paper>
     <paper id="29">
       <title>Reordering Model for Forest-to-String Machine Translation</title>
-      <author><first>Martin</first><last>Čmejrek</last></author>
+      <author id="martin-cmejrek"><first>Martin</first><last>Čmejrek</last></author>
       <pages>227–232</pages>
       <url hash="0b46b2e3">D14-1029</url>
       <doi>10.3115/v1/D14-1029</doi>
@@ -320,7 +320,7 @@
       <author><first>Leila</first><last>Wehbe</last></author>
       <author><first>Ashish</first><last>Vaswani</last></author>
       <author><first>Kevin</first><last>Knight</last></author>
-      <author><first>Tom</first><last>Mitchell</last></author>
+      <author id="tom-mitchell"><first>Tom</first><last>Mitchell</last></author>
       <pages>233–243</pages>
       <url hash="dd04b784">D14-1030</url>
       <doi>10.3115/v1/D14-1030</doi>
@@ -388,8 +388,8 @@
     </paper>
     <paper id="37">
       <title>A Graph-based Approach for Contextual Text Normalization</title>
-      <author><first>Cagil</first><last>Sönmez</last></author>
-      <author><first>Arzucan</first><last>Özgür</last></author>
+      <author id="cagil-sonmez"><first>Cagil</first><last>Sönmez</last></author>
+      <author id="arzucan-ozgur"><first>Arzucan</first><last>Özgür</last></author>
       <pages>313–324</pages>
       <url hash="ce462378">D14-1037</url>
       <doi>10.3115/v1/D14-1037</doi>
@@ -408,7 +408,7 @@
     </paper>
     <paper id="39">
       <title>Hierarchical Discriminative Classification for Text-Based Geolocation</title>
-      <author><first>Benjamin</first><last>Wing</last></author>
+      <author id="benjamin-wing"><first>Benjamin</first><last>Wing</last></author>
       <author><first>Jason</first><last>Baldridge</last></author>
       <pages>336–348</pages>
       <url hash="f9fceeed">D14-1039</url>
@@ -418,7 +418,7 @@
     <paper id="40">
       <title>Probabilistic Models of Cross-Lingual Semantic Similarity in Context Based on Latent Cross-Lingual Concepts Induced from Comparable Data</title>
       <author><first>Ivan</first><last>Vulić</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>349–362</pages>
       <url hash="fc637c86">D14-1040</url>
       <doi>10.3115/v1/D14-1040</doi>
@@ -428,7 +428,7 @@
     <paper id="41">
       <title>Multi-Predicate Semantic Role Labeling</title>
       <author><first>Haitong</first><last>Yang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>363–373</pages>
       <url hash="4b70c4dd">D14-1041</url>
       <doi>10.3115/v1/D14-1041</doi>
@@ -457,9 +457,9 @@
     <paper id="44">
       <title>Incorporating Vector Space Similarity in Random Walk Inference over Knowledge Bases</title>
       <author><first>Matt</first><last>Gardner</last></author>
-      <author><first>Partha</first><last>Talukdar</last></author>
+      <author id="partha-talukdar"><first>Partha</first><last>Talukdar</last></author>
       <author><first>Jayant</first><last>Krishnamurthy</last></author>
-      <author><first>Tom</first><last>Mitchell</last></author>
+      <author id="tom-mitchell"><first>Tom</first><last>Mitchell</last></author>
       <pages>397–406</pages>
       <url hash="c9b5c5e9">D14-1044</url>
       <doi>10.3115/v1/D14-1044</doi>
@@ -485,7 +485,7 @@
     </paper>
     <paper id="47">
       <title>Nothing like Good Old Frequency: Studying Context Filters for Distributional Thesauri</title>
-      <author><first>Muntsa</first><last>Padró</last></author>
+      <author id="muntsa-padro"><first>Muntsa</first><last>Padró</last></author>
       <author><first>Marco</first><last>Idiart</last></author>
       <author><first>Aline</first><last>Villavicencio</last></author>
       <author><first>Carlos</first><last>Ramisch</last></author>
@@ -509,7 +509,7 @@
       <title>A Shortest-path Method for Arc-factored Semantic Role Labeling</title>
       <author><first>Xavier</first><last>Lluís</last></author>
       <author><first>Xavier</first><last>Carreras</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <pages>430–435</pages>
       <url hash="c04ad2f6">D14-1049</url>
       <doi>10.3115/v1/D14-1049</doi>
@@ -519,9 +519,9 @@
       <title>Semantic Kernels for Semantic Parsing</title>
       <author><first>Iman</first><last>Saleh</last></author>
       <author><first>Alessandro</first><last>Moschitti</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <pages>436–442</pages>
       <url hash="09273aee">D14-1050</url>
       <doi>10.3115/v1/D14-1050</doi>
@@ -532,9 +532,9 @@
       <author><first>Mohamed</first><last>Morchid</last></author>
       <author><first>Mohamed</first><last>Bouallegue</last></author>
       <author><first>Richard</first><last>Dufour</last></author>
-      <author><first>Georges</first><last>Linarès</last></author>
+      <author id="georges-linares"><first>Georges</first><last>Linarès</last></author>
       <author><first>Driss</first><last>Matrouf</last></author>
-      <author><first>Renato</first><last>de Mori</last></author>
+      <author id="renato-de-mori"><first>Renato</first><last>de Mori</last></author>
       <pages>443–454</pages>
       <url hash="d1b4d0eb">D14-1051</url>
       <doi>10.3115/v1/D14-1051</doi>
@@ -543,7 +543,7 @@
     <paper id="52">
       <title>Explaining the Stars: Weighted Multiple-Instance Learning for Aspect-Based Sentiment Analysis</title>
       <author><first>Nikolaos</first><last>Pappas</last></author>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <pages>455–466</pages>
       <url hash="d62994fe">D14-1052</url>
       <doi>10.3115/v1/D14-1052</doi>
@@ -552,7 +552,7 @@
     <paper id="53">
       <title>Sentiment Analysis on the People’s Daily</title>
       <author><first>Jiwei</first><last>Li</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>467–476</pages>
       <url hash="fb0f6ffb">D14-1053</url>
       <doi>10.3115/v1/D14-1053</doi>
@@ -575,7 +575,7 @@
     <paper id="55">
       <title>Positive Unlabeled Learning for Deceptive Reviews Detection</title>
       <author><first>Yafeng</first><last>Ren</last></author>
-      <author><first>Donghong</first><last>Ji</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
       <author><first>Hongbin</first><last>Zhang</last></author>
       <pages>488–498</pages>
       <url hash="27280c10">D14-1055</url>
@@ -614,7 +614,7 @@
     <paper id="59">
       <title><fixed-case>N</fixed-case>atural<fixed-case>LI</fixed-case>: Natural Logic Inference for Common Sense Reasoning</title>
       <author><first>Gabor</first><last>Angeli</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>534–545</pages>
       <url hash="b6f5e4ef">D14-1059</url>
       <doi>10.3115/v1/D14-1059</doi>
@@ -623,7 +623,7 @@
     <paper id="60">
       <title>Modeling Term Translation for Document-informed Machine Translation</title>
       <author><first>Fandong</first><last>Meng</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Wenbin</first><last>Jiang</last></author>
       <author><first>Qun</first><last>Liu</last></author>
       <pages>546–556</pages>
@@ -644,7 +644,7 @@
     <paper id="62">
       <title>Latent Domain Phrase-based Models for Adaptation</title>
       <author><first>Hoang</first><last>Cuong</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <pages>566–576</pages>
       <url hash="ca82beee">D14-1062</url>
       <doi>10.3115/v1/D14-1062</doi>
@@ -652,7 +652,7 @@
     </paper>
     <paper id="63">
       <title>Translation Rules with Right-Hand Side Lattices</title>
-      <author><first>Fabien</first><last>Cromières</last></author>
+      <author id="fabien-cromieres"><first>Fabien</first><last>Cromières</last></author>
       <author><first>Sadao</first><last>Kurohashi</last></author>
       <pages>577–588</pages>
       <url hash="a4afe625">D14-1063</url>
@@ -661,7 +661,7 @@
     </paper>
     <paper id="64">
       <title>Learning to Translate: A Query-Specific Combination Approach for Cross-Lingual Information Retrieval</title>
-      <author><first>Ferhan</first><last>Ture</last></author>
+      <author id="ferhan-ture"><first>Ferhan</first><last>Ture</last></author>
       <author><first>Elizabeth</first><last>Boschee</last></author>
       <pages>589–599</pages>
       <url hash="42deba48">D14-1064</url>
@@ -681,8 +681,8 @@
     <paper id="66">
       <title>Lexical Substitution for the Medical Domain</title>
       <author><first>Martin</first><last>Riedl</last></author>
-      <author><first>Michael</first><last>Glass</last></author>
-      <author><first>Alfio</first><last>Gliozzo</last></author>
+      <author id="michael-glass"><first>Michael</first><last>Glass</last></author>
+      <author id="alfio-gliozzo"><first>Alfio</first><last>Gliozzo</last></author>
       <pages>610–614</pages>
       <url hash="1b7a993f">D14-1066</url>
       <doi>10.3115/v1/D14-1066</doi>
@@ -710,7 +710,7 @@
     </paper>
     <paper id="69">
       <title>Non-linear Mapping for Improved Identification of 1300+ Languages</title>
-      <author><first>Ralf</first><last>Brown</last></author>
+      <author id="ralf-d-brown"><first>Ralf</first><last>Brown</last></author>
       <pages>627–632</pages>
       <url hash="428c4a7b">D14-1069</url>
       <doi>10.3115/v1/D14-1069</doi>
@@ -722,7 +722,7 @@
       <author><first>Jordan</first><last>Boyd-Graber</last></author>
       <author><first>Leonardo</first><last>Claudino</last></author>
       <author><first>Richard</first><last>Socher</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <pages>633–644</pages>
       <url hash="f4a4bcf8">D14-1070</url>
       <doi>10.3115/v1/D14-1070</doi>
@@ -733,7 +733,7 @@
       <author><first>Min-Chul</first><last>Yang</last></author>
       <author><first>Nan</first><last>Duan</last></author>
       <author><first>Ming</first><last>Zhou</last></author>
-      <author><first>Hae-Chang</first><last>Rim</last></author>
+      <author id="hae-chang-rim"><first>Hae-Chang</first><last>Rim</last></author>
       <pages>645–650</pages>
       <url hash="d6165a6d">D14-1071</url>
       <doi>10.3115/v1/D14-1071</doi>
@@ -772,7 +772,7 @@
     <paper id="75">
       <title>Fear the <fixed-case>REAPER</fixed-case>: A System for Automatic Multi-Document Summarization with Reinforcement Learning</title>
       <author><first>Cody</first><last>Rioux</last></author>
-      <author><first>Sadid A.</first><last>Hasan</last></author>
+      <author id="sadid-a-hasan"><first>Sadid A.</first><last>Hasan</last></author>
       <author><first>Yllias</first><last>Chali</last></author>
       <pages>681–690</pages>
       <url hash="81402040">D14-1075</url>
@@ -794,7 +794,7 @@
     <paper id="77">
       <title>Analyzing Stemming Approaches for <fixed-case>T</fixed-case>urkish Multi-Document Summarization</title>
       <author><first>Muhammed Yavuz</first><last>Nuzumlalı</last></author>
-      <author><first>Arzucan</first><last>Özgür</last></author>
+      <author id="arzucan-ozgur"><first>Arzucan</first><last>Özgür</last></author>
       <pages>702–706</pages>
       <url hash="5fc20186">D14-1077</url>
       <doi>10.3115/v1/D14-1077</doi>
@@ -812,7 +812,7 @@
       <title>Evaluating Neural Word Representations in Tensor-Based Compositional Settings</title>
       <author><first>Dmitrijs</first><last>Milajevs</last></author>
       <author><first>Dimitri</first><last>Kartsaklis</last></author>
-      <author><first>Mehrnoosh</first><last>Sadrzadeh</last></author>
+      <author id="mehrnoosh-sadrzadeh"><first>Mehrnoosh</first><last>Sadrzadeh</last></author>
       <author><first>Matthew</first><last>Purver</last></author>
       <pages>708–719</pages>
       <url hash="35d792ee">D14-1079</url>
@@ -821,8 +821,8 @@
     </paper>
     <paper id="80">
       <title>Opinion Mining with Deep Recurrent Neural Networks</title>
-      <author><first>Ozan</first><last>İrsoy</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="ozan-irsoy"><first>Ozan</first><last>İrsoy</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>720–728</pages>
       <url hash="cc06b2c6">D14-1080</url>
       <doi>10.3115/v1/D14-1080</doi>
@@ -840,7 +840,7 @@
     <paper id="82">
       <title>A Fast and Accurate Dependency Parser using Neural Networks</title>
       <author><first>Danqi</first><last>Chen</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <pages>740–750</pages>
       <url hash="890021f9">D14-1082</url>
       <doi>10.3115/v1/D14-1082</doi>
@@ -866,7 +866,7 @@
     </paper>
     <paper id="85">
       <title>Unsupervised Sentence Enhancement for Automatic Summarization</title>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <author><first>Gerald</first><last>Penn</last></author>
       <pages>775–786</pages>
       <url hash="72eff50d">D14-1085</url>
@@ -879,7 +879,7 @@
       <author><first>Sahar</first><last>Kazemzadeh</last></author>
       <author><first>Vicente</first><last>Ordonez</last></author>
       <author><first>Mark</first><last>Matten</last></author>
-      <author><first>Tamara</first><last>Berg</last></author>
+      <author id="tamara-berg"><first>Tamara</first><last>Berg</last></author>
       <pages>787–798</pages>
       <url hash="eb6fab86">D14-1086</url>
       <doi>10.3115/v1/D14-1086</doi>
@@ -889,7 +889,7 @@
       <title>Unsupervised Template Mining for Semantic Category Understanding</title>
       <author><first>Lei</first><last>Shi</last></author>
       <author><first>Shuming</first><last>Shi</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <author><first>Yi-Dong</first><last>Shen</last></author>
       <author><first>Yong</first><last>Rui</last></author>
       <pages>799–809</pages>
@@ -899,9 +899,9 @@
     </paper>
     <paper id="88">
       <title>Taxonomy Construction Using Syntactic Contextual Evidence</title>
-      <author><last>Luu</last> <first>Anh Tuan</first></author>
-      <author><first>Jung-jae</first><last>Kim</last></author>
-      <author><first>See Kiong</first><last>Ng</last></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last></author>
+      <author id="jung-jae-kim"><first>Jung-jae</first><last>Kim</last></author>
+      <author id="see-kiong-ng"><first>See Kiong</first><last>Ng</last></author>
       <pages>810–819</pages>
       <url hash="d8db9290">D14-1088</url>
       <doi>10.3115/v1/D14-1088</doi>
@@ -911,7 +911,7 @@
       <title>Analysing recall loss in named entity slot filling</title>
       <author><first>Glen</first><last>Pink</last></author>
       <author><first>Joel</first><last>Nothman</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <pages>820–830</pages>
       <url hash="5c27a656">D14-1089</url>
       <doi>10.3115/v1/D14-1089</doi>
@@ -930,7 +930,7 @@
     </paper>
     <paper id="91">
       <title>Syllable weight encodes mostly the same information for <fixed-case>E</fixed-case>nglish word segmentation as dictionary stress</title>
-      <author><first>John K</first><last>Pate</last></author>
+      <author id="john-k-pate"><first>John K</first><last>Pate</last></author>
       <author><first>Mark</first><last>Johnson</last></author>
       <pages>844–853</pages>
       <url hash="4e91ba04">D14-1091</url>
@@ -940,7 +940,7 @@
     <paper id="92">
       <title>A Joint Model for Unsupervised <fixed-case>C</fixed-case>hinese Word Segmentation</title>
       <author><first>Miaohong</first><last>Chen</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <author><first>Wenzhe</first><last>Pei</last></author>
       <pages>854–863</pages>
       <url hash="4f34c14a">D14-1092</url>
@@ -973,7 +973,7 @@
       <title>Morphological Segmentation for Keyword Spotting</title>
       <author><first>Karthik</first><last>Narasimhan</last></author>
       <author><first>Damianos</first><last>Karakos</last></author>
-      <author><first>Richard</first><last>Schwartz</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
       <author><first>Stavros</first><last>Tsakalidis</last></author>
       <author><first>Regina</first><last>Barzilay</last></author>
       <pages>880–885</pages>
@@ -983,9 +983,9 @@
     </paper>
     <paper id="96">
       <title>What Can We Get From 1000 Tokens? A Case Study of Multilingual <fixed-case>POS</fixed-case> Tagging For Resource-Poor Languages</title>
-      <author><first>Long</first><last>Duong</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
-      <author><first>Karin</first><last>Verspoor</last></author>
+      <author id="long-duong"><first>Long</first><last>Duong</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last></author>
       <author><first>Steven</first><last>Bird</last></author>
       <author><first>Paul</first><last>Cook</last></author>
       <pages>886–897</pages>
@@ -1026,8 +1026,8 @@
     <paper id="100">
       <title>Ambiguity Resolution for Vt-N Structures in <fixed-case>C</fixed-case>hinese</title>
       <author><first>Yu-Ming</first><last>Hsieh</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <pages>928–937</pages>
       <url hash="f66dd360">D14-1100</url>
       <doi>10.3115/v1/D14-1100</doi>
@@ -1053,11 +1053,11 @@
     </paper>
     <paper id="103">
       <title>Dependency parsing with latent refinements of part-of-speech tags</title>
-      <author><first>Thomas</first><last>Mueller</last></author>
-      <author><first>Richard</first><last>Farkas</last></author>
+      <author id="thomas-mueller"><first>Thomas</first><last>Mueller</last></author>
+      <author id="richard-farkas"><first>Richard</first><last>Farkas</last></author>
       <author><first>Alex</first><last>Judea</last></author>
       <author><first>Helmut</first><last>Schmid</last></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <pages>963–967</pages>
       <url hash="a0730908">D14-1103</url>
       <doi>10.3115/v1/D14-1103</doi>
@@ -1065,9 +1065,9 @@
     </paper>
     <paper id="104">
       <title>Importance weighting and unsupervised domain adaptation of <fixed-case>POS</fixed-case> taggers: a negative result</title>
-      <author><first>Barbara</first><last>Plank</last></author>
-      <author><first>Anders</first><last>Johannsen</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
+      <author id="anders-johannsen"><first>Anders</first><last>Johannsen</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>968–973</pages>
       <url hash="300862d0">D14-1104</url>
       <doi>10.3115/v1/D14-1104</doi>
@@ -1098,7 +1098,7 @@
     <paper id="107">
       <title><fixed-case>A</fixed-case>* <fixed-case>CCG</fixed-case> Parsing with a Supertag-factored Model</title>
       <author><first>Mike</first><last>Lewis</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>990–1000</pages>
       <url hash="38e7b441">D14-1107</url>
       <doi>10.3115/v1/D14-1107</doi>
@@ -1110,8 +1110,8 @@
       <author><first>Nathan</first><last>Schneider</last></author>
       <author><first>Swabha</first><last>Swayamdipta</last></author>
       <author><first>Archna</first><last>Bhatia</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>1001–1012</pages>
       <url hash="36023420">D14-1108</url>
       <doi>10.3115/v1/D14-1108</doi>
@@ -1142,7 +1142,7 @@
     <paper id="111">
       <title>Reducing Dimensions of Tensors in Type-Driven Distributional Semantics</title>
       <author><first>Tamara</first><last>Polajnar</last></author>
-      <author><first>Luana</first><last>Fǎgǎrǎşan</last></author>
+      <author id="luana-fagarasan"><first>Luana</first><last>Fǎgǎrǎşan</last></author>
       <author><first>Stephen</first><last>Clark</last></author>
       <pages>1036–1046</pages>
       <url hash="aaaeb6dd">D14-1111</url>
@@ -1151,8 +1151,8 @@
     </paper>
     <paper id="112">
       <title>An Etymological Approach to Cross-Language Orthographic Similarity. Application on <fixed-case>R</fixed-case>omanian</title>
-      <author><first>Alina Maria</first><last>Ciobanu</last></author>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="alina-maria-ciobanu"><first>Alina Maria</first><last>Ciobanu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <pages>1047–1058</pages>
       <url hash="fea35fc7">D14-1112</url>
       <doi>10.3115/v1/D14-1112</doi>
@@ -1180,7 +1180,7 @@
     </paper>
     <paper id="115">
       <title>Queries as a Source of Lexicalized Commonsense Knowledge</title>
-      <author><first>Marius</first><last>Paşca</last></author>
+      <author id="marius-pasca"><first>Marius</first><last>Paşca</last></author>
       <pages>1081–1091</pages>
       <url hash="b05df581">D14-1115</url>
       <doi>10.3115/v1/D14-1115</doi>
@@ -1247,11 +1247,11 @@
       <author><first>Maarten</first><last>Sap</last></author>
       <author><first>Gregory</first><last>Park</last></author>
       <author><first>Johannes</first><last>Eichstaedt</last></author>
-      <author><first>Margaret</first><last>Kern</last></author>
+      <author id="margaret-kern"><first>Margaret</first><last>Kern</last></author>
       <author><first>David</first><last>Stillwell</last></author>
       <author><first>Michal</first><last>Kosinski</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
-      <author><first>Hansen Andrew</first><last>Schwartz</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
+      <author id="h-andrew-schwartz"><first>Hansen Andrew</first><last>Schwartz</last></author>
       <pages>1146–1151</pages>
       <url hash="23eabc2c">D14-1121</url>
       <doi>10.3115/v1/D14-1121</doi>
@@ -1263,7 +1263,7 @@
       <author><first>William Yang</first><last>Wang</last></author>
       <author><first>Lingpeng</first><last>Kong</last></author>
       <author><first>Kathryn</first><last>Mazaitis</last></author>
-      <author><first>William W.</first><last>Cohen</last></author>
+      <author id="william-cohen"><first>William W.</first><last>Cohen</last></author>
       <pages>1152–1158</pages>
       <url hash="c486a076">D14-1122</url>
       <doi>10.3115/v1/D14-1122</doi>
@@ -1278,7 +1278,7 @@
       <author><first>Zhongyu</first><last>Wei</last></author>
       <author><first>Binyang</first><last>Li</last></author>
       <author><first>Dongqing</first><last>Yang</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <pages>1159–1168</pages>
       <url hash="6e724eef">D14-1123</url>
       <doi>10.3115/v1/D14-1123</doi>
@@ -1288,7 +1288,7 @@
       <title>Detecting Disagreement in Conversations using Pseudo-Monologic Rhetorical Structure</title>
       <author><first>Kelsey</first><last>Allen</last></author>
       <author><first>Giuseppe</first><last>Carenini</last></author>
-      <author><first>Raymond</first><last>Ng</last></author>
+      <author id="raymond-ng"><first>Raymond</first><last>Ng</last></author>
       <pages>1169–1180</pages>
       <url hash="e1cec49b">D14-1124</url>
       <doi>10.3115/v1/D14-1124</doi>
@@ -1297,7 +1297,7 @@
     <paper id="125">
       <title>+/-<fixed-case>E</fixed-case>ffect<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et: Sense-level Lexicon Acquisition for Opinion Inference</title>
       <author><first>Yoonjung</first><last>Choi</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <pages>1181–1191</pages>
       <url hash="a958705d">D14-1125</url>
       <doi>10.3115/v1/D14-1125</doi>
@@ -1315,7 +1315,7 @@
     <paper id="127">
       <title>Learning Emotion Indicators from Tweets: Hashtags, Hashtag Patterns, and Phrases</title>
       <author><first>Ashequl</first><last>Qadir</last></author>
-      <author><first>Ellen</first><last>Riloff</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
       <pages>1203–1209</pages>
       <url hash="d4b406ae">D14-1127</url>
       <doi>10.3115/v1/D14-1127</doi>
@@ -1324,7 +1324,7 @@
     <paper id="128">
       <title>Fine-Grained Contextual Predictions for Hard Sentiment Words</title>
       <author><first>Sebastian</first><last>Ebert</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>1210–1215</pages>
       <url hash="bccd4d06">D14-1128</url>
       <doi>10.3115/v1/D14-1128</doi>
@@ -1338,7 +1338,7 @@
       <author><first>Jun</first><last>Lu</last></author>
       <author><first>Jun</first><last>Lang</last></author>
       <author><first>Heng</first><last>Ji</last></author>
-      <author><first>Jianmin</first><last>Yao</last></author>
+      <author id="jianmin-yao"><first>Jianmin</first><last>Yao</last></author>
       <pages>1216–1224</pages>
       <url hash="3e087482">D14-1129</url>
       <doi>10.3115/v1/D14-1129</doi>
@@ -1347,11 +1347,11 @@
     <paper id="130">
       <title>Human Effort and Machine Learnability in Computer Aided Translation</title>
       <author><first>Spence</first><last>Green</last></author>
-      <author><first>Sida I.</first><last>Wang</last></author>
+      <author id="sida-i-wang"><first>Sida I.</first><last>Wang</last></author>
       <author><first>Jason</first><last>Chuang</last></author>
       <author><first>Jeffrey</first><last>Heer</last></author>
       <author><first>Sebastian</first><last>Schuster</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>1225–1236</pages>
       <url hash="f723e887">D14-1130</url>
       <doi>10.3115/v1/D14-1130</doi>
@@ -1401,7 +1401,7 @@
       <title>Morpho-syntactic Lexical Generalization for <fixed-case>CCG</fixed-case> Semantic Parsing</title>
       <author><first>Adrienne</first><last>Wang</last></author>
       <author><first>Tom</first><last>Kwiatkowski</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>1284–1295</pages>
       <url hash="7125fcaa">D14-1135</url>
       <doi>10.3115/v1/D14-1135</doi>
@@ -1453,7 +1453,7 @@
       <author><first>He</first><last>He</last></author>
       <author><first>Jordan</first><last>Boyd-Graber</last></author>
       <author><first>John</first><last>Morgan</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <pages>1342–1352</pages>
       <url hash="dd788b4a">D14-1140</url>
       <doi>10.3115/v1/D14-1140</doi>
@@ -1493,7 +1493,7 @@
     </paper>
     <paper id="144">
       <title>Language Transfer Hypotheses with Linear <fixed-case>SVM</fixed-case> Weights</title>
-      <author><first>Shervin</first><last>Malmasi</last></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></author>
       <author><first>Mark</first><last>Dras</last></author>
       <pages>1385–1390</pages>
       <url hash="0a098564">D14-1144</url>
@@ -1503,7 +1503,7 @@
     <paper id="145">
       <title>Predicting Dialect Variation in Immigrant Contexts Using Light Verb Constructions</title>
       <author><first>A. Seza</first><last>Doğruöz</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>1391–1395</pages>
       <url hash="b081c0aa">D14-1145</url>
       <doi>10.3115/v1/D14-1145</doi>
@@ -1514,7 +1514,7 @@
       <author><first>A-Yeong</first><last>Kim</last></author>
       <author><first>Hyun-Je</first><last>Song</last></author>
       <author><first>Seong-Bae</first><last>Park</last></author>
-      <author><first>Sang-Jo</first><last>Lee</last></author>
+      <author id="sang-jo-lee"><first>Sang-Jo</first><last>Lee</last></author>
       <pages>1396–1404</pages>
       <url hash="8a96bd6f">D14-1146</url>
       <doi>10.3115/v1/D14-1146</doi>
@@ -1553,9 +1553,9 @@
     <paper id="150">
       <title>Citation-Enhanced Keyphrase Extraction from Research Papers: A Supervised Approach</title>
       <author><first>Cornelia</first><last>Caragea</last></author>
-      <author><first>Florin Adrian</first><last>Bulgarov</last></author>
+      <author id="florin-bulgarov"><first>Florin Adrian</first><last>Bulgarov</last></author>
       <author><first>Andreea</first><last>Godea</last></author>
-      <author><first>Sujatha</first><last>Das Gollapalli</last></author>
+      <author id="sujatha-das-gollapalli"><first>Sujatha</first><last>Das Gollapalli</last></author>
       <pages>1435–1446</pages>
       <url hash="eb230f8d">D14-1150</url>
       <doi>10.3115/v1/D14-1150</doi>
@@ -1564,7 +1564,7 @@
     <paper id="151">
       <title>Using Mined Coreference Chains as a Resource for a Semantic Task</title>
       <author><first>Heike</first><last>Adel</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>1447–1452</pages>
       <url hash="955c17be">D14-1151</url>
       <doi>10.3115/v1/D14-1151</doi>
@@ -1573,7 +1573,7 @@
     </paper>
     <paper id="152">
       <title>Financial Keyword Expansion via Continuous Word Vector Representations</title>
-      <author><first>Ming-Feng</first><last>Tsai</last></author>
+      <author id="ming-feng-tsai"><first>Ming-Feng</first><last>Tsai</last></author>
       <author><first>Chuan-Ju</first><last>Wang</last></author>
       <pages>1453–1458</pages>
       <url hash="4695fec0">D14-1152</url>
@@ -1618,8 +1618,8 @@
       <author><first>Shih-Hung</first><last>Liu</last></author>
       <author><first>Berlin</first><last>Chen</last></author>
       <author><first>Ea-Ee</first><last>Jan</last></author>
-      <author><first>Hsin-Min</first><last>Wang</last></author>
-      <author><first>Wen-Lian</first><last>Hsu</last></author>
+      <author id="hsin-min-wang"><first>Hsin-Min</first><last>Wang</last></author>
+      <author id="wen-lian-hsu"><first>Wen-Lian</first><last>Hsu</last></author>
       <author><first>Hsin-Hsi</first><last>Chen</last></author>
       <pages>1474–1480</pages>
       <url hash="eab5415c">D14-1156</url>
@@ -1630,7 +1630,7 @@
       <title>Staying on Topic: An Indicator of Power in Political Debates</title>
       <author><first>Vinodkumar</first><last>Prabhakaran</last></author>
       <author><first>Ashima</first><last>Arora</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>1481–1486</pages>
       <url hash="49821eeb">D14-1157</url>
       <doi>10.3115/v1/D14-1157</doi>
@@ -1638,10 +1638,10 @@
     </paper>
     <paper id="158">
       <title>Language Modeling with Power Low Rank Ensembles</title>
-      <author><first>Ankur P.</first><last>Parikh</last></author>
+      <author id="ankur-parikh"><first>Ankur P.</first><last>Parikh</last></author>
       <author><first>Avneesh</first><last>Saluja</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Eric</first><last>Xing</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="eric-xing"><first>Eric</first><last>Xing</last></author>
       <pages>1487–1498</pages>
       <url hash="0e34ca43">D14-1158</url>
       <doi>10.3115/v1/D14-1158</doi>
@@ -1657,7 +1657,7 @@
       <author><first>Brittany</first><last>Harding</last></author>
       <author><first>Brad</first><last>Huang</last></author>
       <author><first>Peter</first><last>Clark</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>1499–1510</pages>
       <url hash="393b3847">D14-1159</url>
       <doi>10.3115/v1/D14-1159</doi>
@@ -1669,7 +1669,7 @@
     <paper id="160">
       <title><fixed-case>S</fixed-case>ensicon: An Automatically Constructed Sensorial Lexicon</title>
       <author><first>Serra Sinem</first><last>Tekiroğlu</last></author>
-      <author><first>Gözde</first><last>Özbal</last></author>
+      <author id="gozde-ozbal"><first>Gözde</first><last>Özbal</last></author>
       <author><first>Carlo</first><last>Strapparava</last></author>
       <pages>1511–1521</pages>
       <url hash="4cfd5f53">D14-1160</url>
@@ -1680,8 +1680,8 @@
       <title>Word Semantic Representations using <fixed-case>B</fixed-case>ayesian Probabilistic Tensor Factorization</title>
       <author><first>Jingwei</first><last>Zhang</last></author>
       <author><first>Jeremy</first><last>Salwen</last></author>
-      <author><first>Michael</first><last>Glass</last></author>
-      <author><first>Alfio</first><last>Gliozzo</last></author>
+      <author id="michael-glass"><first>Michael</first><last>Glass</last></author>
+      <author id="alfio-gliozzo"><first>Alfio</first><last>Gliozzo</last></author>
       <pages>1522–1531</pages>
       <url hash="9cda4b3d">D14-1161</url>
       <doi>10.3115/v1/D14-1161</doi>
@@ -1691,7 +1691,7 @@
       <title><fixed-case>G</fixed-case>lo<fixed-case>V</fixed-case>e: Global Vectors for Word Representation</title>
       <author><first>Jeffrey</first><last>Pennington</last></author>
       <author><first>Richard</first><last>Socher</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <pages>1532–1543</pages>
       <url hash="ccc02ed3">D14-1162</url>
       <doi>10.3115/v1/D14-1162</doi>
@@ -1713,7 +1713,7 @@
       <author><first>Gabor</first><last>Angeli</last></author>
       <author><first>Julie</first><last>Tibshirani</last></author>
       <author><first>Jean</first><last>Wu</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>1556–1567</pages>
       <url hash="2d3fbb57">D14-1164</url>
       <doi>10.3115/v1/D14-1164</doi>
@@ -1723,9 +1723,9 @@
     <paper id="165">
       <title>Typed Tensor Decomposition of Knowledge Bases for Relation Extraction</title>
       <author><first>Kai-Wei</first><last>Chang</last></author>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <author><first>Bishan</first><last>Yang</last></author>
-      <author><first>Christopher</first><last>Meek</last></author>
+      <author id="christopher-meek"><first>Christopher</first><last>Meek</last></author>
       <pages>1568–1579</pages>
       <url hash="2b639b63">D14-1165</url>
       <doi>10.3115/v1/D14-1165</doi>
@@ -1734,7 +1734,7 @@
     </paper>
     <paper id="166">
       <title>A convex relaxation for weakly supervised relation extraction</title>
-      <author><first>Édouard</first><last>Grave</last></author>
+      <author id="edouard-grave"><first>Édouard</first><last>Grave</last></author>
       <pages>1580–1590</pages>
       <url hash="6417a09b">D14-1166</url>
       <doi>10.3115/v1/D14-1166</doi>
@@ -1757,7 +1757,7 @@
       <author><first>Shima</first><last>Gerani</last></author>
       <author><first>Yashar</first><last>Mehdad</last></author>
       <author><first>Giuseppe</first><last>Carenini</last></author>
-      <author><first>Raymond T.</first><last>Ng</last></author>
+      <author id="raymond-ng"><first>Raymond T.</first><last>Ng</last></author>
       <author><first>Bita</first><last>Nejat</last></author>
       <pages>1602–1613</pages>
       <url hash="5ae2b2dc">D14-1168</url>
@@ -1797,7 +1797,7 @@
     <paper id="172">
       <title>Assessing the Impact of Translation Errors on Machine Translation Quality with Mixed-effects Models</title>
       <author><first>Marcello</first><last>Federico</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Luisa</first><last>Bentivogli</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <pages>1643–1653</pages>
@@ -1810,7 +1810,7 @@
       <author><first>Xiaolin</first><last>Wang</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
       <author><first>Andrew</first><last>Finch</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>1654–1664</pages>
       <url hash="8011188f">D14-1173</url>
       <doi>10.3115/v1/D14-1173</doi>
@@ -1823,7 +1823,7 @@
       <author><first>Hua</first><last>Wu</last></author>
       <author><first>Conghui</first><last>Zhu</last></author>
       <author><first>Haifeng</first><last>Wang</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <pages>1665–1675</pages>
       <url hash="73c112fe">D14-1174</url>
       <doi>10.3115/v1/D14-1174</doi>
@@ -1831,7 +1831,7 @@
     </paper>
     <paper id="175">
       <title>Word Translation Prediction for Morphologically Rich Languages with Bilingual Neural Networks</title>
-      <author><first>Ke M.</first><last>Tran</last></author>
+      <author id="ke-m-tran"><first>Ke M.</first><last>Tran</last></author>
       <author><first>Arianna</first><last>Bisazza</last></author>
       <author><first>Christof</first><last>Monz</last></author>
       <pages>1676–1688</pages>
@@ -1852,7 +1852,7 @@
       <title>Combining String and Context Similarity for Bilingual Term Alignment from Comparable Corpora</title>
       <author><first>Georgios</first><last>Kontonatsios</last></author>
       <author><first>Ioannis</first><last>Korkontzelos</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
       <pages>1701–1712</pages>
       <url hash="14337aa2">D14-1177</url>
@@ -1861,7 +1861,7 @@
     </paper>
     <paper id="178">
       <title>Random Manhattan Integer Indexing: Incremental <fixed-case>L</fixed-case>1 Normed Vector Space Construction</title>
-      <author><first>Behrang</first><last>Q. Zadeh</last></author>
+      <author id="behrang-qasemizadeh"><first>Behrang</first><last>Q. Zadeh</last></author>
       <author><first>Siegfried</first><last>Handschuh</last></author>
       <pages>1713–1723</pages>
       <url hash="04a0aa4a">D14-1178</url>
@@ -1924,7 +1924,7 @@
       <title>Improved Decipherment of Homophonic Ciphers</title>
       <author><first>Malte</first><last>Nuhn</last></author>
       <author><first>Julian</first><last>Schamper</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>1764–1768</pages>
       <url hash="3a03374d">D14-1184</url>
       <doi>10.3115/v1/D14-1184</doi>
@@ -1975,7 +1975,7 @@
       <title>Detecting Non-compositional <fixed-case>MWE</fixed-case> Components using <fixed-case>W</fixed-case>iktionary</title>
       <author><first>Bahar</first><last>Salehi</last></author>
       <author><first>Paul</first><last>Cook</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>1792–1797</pages>
       <url hash="575e0538">D14-1189</url>
       <doi>10.3115/v1/D14-1189</doi>
@@ -1983,8 +1983,8 @@
     </paper>
     <paper id="190">
       <title>Joint Emotion Analysis via Multi-task <fixed-case>G</fixed-case>aussian Processes</title>
-      <author><first>Daniel</first><last>Beck</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="daniel-beck"><first>Daniel</first><last>Beck</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>1798–1803</pages>
       <url hash="192cce36">D14-1190</url>
@@ -2034,7 +2034,7 @@
     </paper>
     <paper id="195">
       <title>Joint Decoding of Tree Transduction Models for Sentence Compression</title>
-      <author><first>Jin-ge</first><last>Yao</last></author>
+      <author id="jin-ge-yao"><first>Jin-ge</first><last>Yao</last></author>
       <author><first>Xiaojun</first><last>Wan</last></author>
       <author><first>Jianguo</first><last>Xiao</last></author>
       <pages>1828–1833</pages>
@@ -2064,7 +2064,7 @@
     </paper>
     <paper id="198">
       <title>Constructing Information Networks Using One Single Model</title>
-      <author id="qi-li"><first>Qi</first><last>Li</last></author>
+      <author><first>Qi</first><last>Li</last></author>
       <author><first>Heng</first><last>Ji</last></author>
       <author><first>Yu</first><last>Hong</last></author>
       <author><first>Sujian</first><last>Li</last></author>
@@ -2075,7 +2075,7 @@
     </paper>
     <paper id="199">
       <title>Event Role Extraction using Domain-Relevant Word Representations</title>
-      <author><first>Emanuela</first><last>Boroş</last></author>
+      <author id="emanuela-boros"><first>Emanuela</first><last>Boroş</last></author>
       <author><first>Romaric</first><last>Besançon</last></author>
       <author><first>Olivier</first><last>Ferret</last></author>
       <author><first>Brigitte</first><last>Grau</last></author>
@@ -2116,8 +2116,8 @@
       <title>Type-Aware Distantly Supervised Relation Extraction with Linked Arguments</title>
       <author><first>Mitchell</first><last>Koch</last></author>
       <author><first>John</first><last>Gilmer</last></author>
-      <author><first>Stephen</first><last>Soderland</last></author>
-      <author><first>Daniel S.</first><last>Weld</last></author>
+      <author id="stephen-soderland"><first>Stephen</first><last>Soderland</last></author>
+      <author id="daniel-s-weld"><first>Daniel S.</first><last>Weld</last></author>
       <pages>1891–1901</pages>
       <url hash="356bff17">D14-1203</url>
       <doi>10.3115/v1/D14-1203</doi>
@@ -2155,9 +2155,9 @@
     </paper>
     <paper id="207">
       <title><fixed-case>CTP</fixed-case>s: Contextual Temporal Profiles for Time Scoping Facts using State Change Detection</title>
-      <author><first>Derry Tanti</first><last>Wijaya</last></author>
-      <author><first>Ndapandula</first><last>Nakashole</last></author>
-      <author><first>Tom M.</first><last>Mitchell</last></author>
+      <author id="derry-tanti-wijaya"><first>Derry Tanti</first><last>Wijaya</last></author>
+      <author id="ndapandula-nakashole"><first>Ndapandula</first><last>Nakashole</last></author>
+      <author id="tom-mitchell"><first>Tom M.</first><last>Mitchell</last></author>
       <pages>1930–1936</pages>
       <url hash="581fea2e">D14-1207</url>
       <doi>10.3115/v1/D14-1207</doi>
@@ -2166,7 +2166,7 @@
     <paper id="208">
       <title>Noisy Or-based model for Relation Extraction using Distant Supervision</title>
       <author><first>Ajay</first><last>Nagesh</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <author><first>Ganesh</first><last>Ramakrishnan</last></author>
       <pages>1937–1941</pages>
       <url hash="f01e3ed3">D14-1208</url>
@@ -2185,8 +2185,8 @@
     <paper id="210">
       <title>Latent-Variable Synchronous <fixed-case>CFG</fixed-case>s for Hierarchical Translation</title>
       <author><first>Avneesh</first><last>Saluja</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <pages>1953–1964</pages>
       <url hash="211f320f">D14-1210</url>
       <doi>10.3115/v1/D14-1210</doi>
@@ -2196,7 +2196,7 @@
       <title>Gender and Power: How Gender and Gender Environment Affect Manifestations of Power</title>
       <author><first>Vinodkumar</first><last>Prabhakaran</last></author>
       <author><first>Emily E.</first><last>Reid</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>1965–1976</pages>
       <url hash="02c2886e">D14-1211</url>
       <doi>10.3115/v1/D14-1211</doi>
@@ -2215,8 +2215,8 @@
     <paper id="213">
       <title>Self-disclosure topic model for classifying and analyzing <fixed-case>T</fixed-case>witter conversations</title>
       <author><first>JinYeong</first><last>Bak</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
-      <author><first>Alice</first><last>Oh</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last></author>
       <pages>1986–1996</pages>
       <url hash="e5fbf574">D14-1213</url>
       <doi>10.3115/v1/D14-1213</doi>
@@ -2227,8 +2227,8 @@
       <title>Major Life Event Extraction from <fixed-case>T</fixed-case>witter based on Congratulations/Condolences Speech Acts</title>
       <author><first>Jiwei</first><last>Li</last></author>
       <author><first>Alan</first><last>Ritter</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>1997–2007</pages>
       <url hash="befe93d2">D14-1214</url>
       <doi>10.3115/v1/D14-1214</doi>
@@ -2256,9 +2256,9 @@
     </paper>
     <paper id="217">
       <title>Learning Spatial Knowledge for Text to 3<fixed-case>D</fixed-case> Scene Generation</title>
-      <author><first>Angel</first><last>Chang</last></author>
+      <author id="angel-chang"><first>Angel</first><last>Chang</last></author>
       <author><first>Manolis</first><last>Savva</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>2028–2038</pages>
       <url hash="1e998749">D14-1217</url>
       <doi>10.3115/v1/D14-1217</doi>
@@ -2267,7 +2267,7 @@
     <paper id="218">
       <title>A Model of Coherence Based on Distributed Sentence Representation</title>
       <author><first>Jiwei</first><last>Li</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>2039–2048</pages>
       <url hash="090bbf68">D14-1218</url>
       <doi>10.3115/v1/D14-1218</doi>
@@ -2275,7 +2275,7 @@
     </paper>
     <paper id="219">
       <title>Discriminative Reranking of Discourse Parses Using Tree Kernels</title>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <author><first>Alessandro</first><last>Moschitti</last></author>
       <pages>2049–2060</pages>
       <url hash="706139c0">D14-1219</url>
@@ -2286,7 +2286,7 @@
       <title>Recursive Deep Models for Discourse Parsing</title>
       <author><first>Jiwei</first><last>Li</last></author>
       <author><first>Rumeng</first><last>Li</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>2061–2069</pages>
       <url hash="8869ba8d">D14-1220</url>
       <doi>10.3115/v1/D14-1220</doi>
@@ -2315,8 +2315,8 @@
       <title>Resolving Referring Expressions in Conversational Dialogs for Natural User Interfaces</title>
       <author><first>Asli</first><last>Celikyilmaz</last></author>
       <author><first>Zhaleh</first><last>Feizollahi</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last></author>
-      <author><first>Ruhi</first><last>Sarikaya</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></author>
+      <author id="ruhi-sarikaya"><first>Ruhi</first><last>Sarikaya</last></author>
       <pages>2094–2104</pages>
       <url hash="c2c79931">D14-1223</url>
       <doi>10.3115/v1/D14-1223</doi>
@@ -2328,7 +2328,7 @@
       <author><first>Wenhe</first><last>Feng</last></author>
       <author><first>Jing</first><last>Sun</last></author>
       <author><first>Fang</first><last>Kong</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>2105–2114</pages>
       <url hash="0ac672d0">D14-1224</url>
       <doi>10.3115/v1/D14-1224</doi>
@@ -2339,8 +2339,8 @@
       <title>Prune-and-Score: Learning for Greedy Coreference Resolution</title>
       <author><first>Chao</first><last>Ma</last></author>
       <author><first>Janardhan Rao</first><last>Doppa</last></author>
-      <author><first>J. Walker</first><last>Orr</last></author>
-      <author><first>Prashanth</first><last>Mannem</last></author>
+      <author id="j-walker-orr"><first>J. Walker</first><last>Orr</last></author>
+      <author id="prashanth-mannem"><first>Prashanth</first><last>Mannem</last></author>
       <author><first>Xiaoli</first><last>Fern</last></author>
       <author><first>Tom</first><last>Dietterich</last></author>
       <author><first>Prasad</first><last>Tadepalli</last></author>
@@ -2374,7 +2374,7 @@
     </meta>
     <paper id="1">
       <title>Sentiment Analysis of Social Media Texts</title>
-      <author><first>Saif M.</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif M.</first><last>Mohammad</last></author>
       <author><first>Xiaodan</first><last>Zhu</last></author>
       <abstract>Automatically detecting sentiment of product reviews, blogs, tweets, and SMS messages has attracted extensive interest from both the academia and industry. It has a number of applications, including: tracking sentiment towards products, movies, politicians, etc.; improving customer relation models; detecting happiness and well-being; and improving automatic dialogue systems. In this tutorial, we will describe how you can create a state-of-the-art sentiment analysis system, with a focus on social media posts.
 
@@ -2401,7 +2401,7 @@ The content of the tutorial will be divided into four blocks of 45 minutes each,
       <title>Semantic Parsing with <fixed-case>C</fixed-case>ombinatory <fixed-case>C</fixed-case>ategorial <fixed-case>G</fixed-case>rammars</title>
       <author><first>Yoav</first><last>Artzi</last></author>
       <author><first>Nicholas</first><last>Fitzgerald</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <abstract>Semantic parsers map natural language sentences to formal representations of their underlying meaning. Building accurate semantic parsers without prohibitive engineering costs is a long-standing, open research problem.
 
 The tutorial will describe general principles for building semantic parsers. The presentation will be divided into two main parts: learning and modeling. In the learning part, we will describe a unified approach for learning Combinatory Categorial Grammar (CCG) semantic parsers, that induces both a CCG lexicon and the parameters of a parsing model. The approach learns from data with labeled meaning representations, as well as from more easily gathered weak supervision. It also enables grounded learning where the semantic parser is used in an interactive environment, for example to read and execute instructions. The modeling section will include best practices for grammar design and choice of semantic representation. We will motivate our use of lambda calculus as a language for building and representing meaning with examples from several domains.
@@ -2411,7 +2411,7 @@ The ideas we will discuss are widely applicable. The semantic modeling approach,
     </paper>
     <paper id="4">
       <title>Linear Programming Decoders in Natural Language Processing: From Integer Programming to Message Passing and Dual Decomposition</title>
-      <author><first>André F. T.</first><last>Martins</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
       <abstract>This tutorial will cover the theory and practice of linear programming decoders. This class of decoders encompasses a variety of techniques that have enjoyed great success in devising structured models for natural language processing (NLP). Along the tutorial, we provide a unified view of different algorithms and modeling techniques, including belief propagation, dual decomposition, integer linear programming, Markov logic, and constrained conditional models. Various applications in NLP will serve as a motivation.
 
 There is a long string of work using integer linear programming (ILP) formulations in NLP, for example in semantic role labeling, machine translation, summarization, dependency parsing, coreference resolution, and opinion mining, to name just a few. At the heart of these approaches is the ability to encode logic and budget constraints (common in NLP and information retrieval) as linear inequalities. Thanks to general purpose solvers (such as Gurobi, CPLEX, or GLPK), the practitioner can abstract away from the decoding algorithm and focus on developing a powerful model. A disadvantage, however, is that general solvers do not scale well to large problem instances, since they fail to exploit the structure of the problem.
@@ -2451,7 +2451,7 @@ This second part will focus mostly on the construction of embeddings for multi-r
     </paper>
     <paper id="7">
       <title>Natural Language Processing of <fixed-case>A</fixed-case>rabic and its Dialects</title>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
       <abstract>This tutorial introduces the different challenges and current solutions to the automatic processing of Arabic and its dialects. The tutorial has two parts: First, we present a discussion of generic issues relevant to Arabic NLP and detail dialectal linguistic issues and the challenges they pose for NLP. In the second part, we review the state-of-the-art in Arabic processing covering several enabling technologies and applications, e.g., dialect identification, morphological processing (analysis, disambiguation, tokenization, POS tagging), parsing, and machine translation.</abstract>
       <bibkey>diab-habash-2014-natural</bibkey>
diff --git a/data/xml/D15.xml b/data/xml/D15.xml
index 2f1951e02e..8c35eb4f71 100644
--- a/data/xml/D15.xml
+++ b/data/xml/D15.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the 2015 Conference on Empirical Methods in Natural Language Processing</booktitle>
       <url hash="ee9dc22b">D15-1</url>
-      <editor><first>Lluís</first><last>Màrquez</last></editor>
+      <editor id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></editor>
       <editor><first>Chris</first><last>Callison-Burch</last></editor>
       <editor><first>Jian</first><last>Su</last></editor>
       <doi>10.18653/v1/D15-1</doi>
@@ -33,9 +33,9 @@
     <paper id="2">
       <title>Distributional vectors encode referential attributes</title>
       <author><first>Abhijeet</first><last>Gupta</last></author>
-      <author><first>Gemma</first><last>Boleda</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="gemma-boleda"><first>Gemma</first><last>Boleda</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <pages>12–21</pages>
       <url hash="83e6adb1">D15-1002</url>
       <doi>10.18653/v1/D15-1002</doi>
@@ -44,8 +44,8 @@
     </paper>
     <paper id="3">
       <title>Building a shared world: mapping distributional to model-theoretic semantic spaces</title>
-      <author><first>Aurélie</first><last>Herbelot</last></author>
-      <author><first>Eva Maria</first><last>Vecchi</last></author>
+      <author id="aurelie-herbelot"><first>Aurélie</first><last>Herbelot</last></author>
+      <author id="eva-maria-vecchi"><first>Eva Maria</first><last>Vecchi</last></author>
       <pages>22–32</pages>
       <url hash="fde993b2">D15-1003</url>
       <doi>10.18653/v1/D15-1003</doi>
@@ -66,7 +66,7 @@
     <paper id="5">
       <title>Reordering Grammar Induction</title>
       <author><first>Miloš</first><last>Stanojević</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <pages>44–54</pages>
       <url hash="24f3e40c">D15-1005</url>
       <doi>10.18653/v1/D15-1005</doi>
@@ -79,7 +79,7 @@
       <author><first>Alvin</first><last>Grissom II</last></author>
       <author><first>John</first><last>Morgan</last></author>
       <author><first>Jordan</first><last>Boyd-Graber</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <pages>55–64</pages>
       <url hash="fc82cce8">D15-1006</url>
       <doi>10.18653/v1/D15-1006</doi>
@@ -88,7 +88,7 @@
     </paper>
     <paper id="7">
       <title>Identifying Political Sentiment between Nation States with Social Media</title>
-      <author><first>Nathanael</first><last>Chambers</last></author>
+      <author id="nathanael-chambers"><first>Nathanael</first><last>Chambers</last></author>
       <author><first>Victor</first><last>Bowen</last></author>
       <author><first>Ethan</first><last>Genco</last></author>
       <author><first>Xisen</first><last>Tian</last></author>
@@ -104,7 +104,7 @@
     <paper id="8">
       <title>Open Extraction of Fine-Grained Political Statements</title>
       <author><first>David</first><last>Bamman</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>76–85</pages>
       <url hash="e951c352">D15-1008</url>
       <doi>10.18653/v1/D15-1008</doi>
@@ -125,8 +125,8 @@
     </paper>
     <paper id="10">
       <title>Semantic Annotation for Microblog Topics Using <fixed-case>W</fixed-case>ikipedia Temporal Information</title>
-      <author><first>Tuan</first><last>Tran</last></author>
-      <author><first>Nam Khanh</first><last>Tran</last></author>
+      <author id="tuan-tran"><first>Tuan</first><last>Tran</last></author>
+      <author id="nam-khanh-tran"><first>Nam Khanh</first><last>Tran</last></author>
       <author><first>Asmelash</first><last>Teka Hadgu</last></author>
       <author><first>Robert</first><last>Jäschke</last></author>
       <pages>97–106</pages>
@@ -138,7 +138,7 @@
     <paper id="11">
       <title>System Combination for Multi-document Summarization</title>
       <author><first>Kai</first><last>Hong</last></author>
-      <author><first>Mitchell</first><last>Marcus</last></author>
+      <author id="mitch-marcus"><first>Mitchell</first><last>Marcus</last></author>
       <author><first>Ani</first><last>Nenkova</last></author>
       <pages>107–117</pages>
       <url hash="a78f6874">D15-1011</url>
@@ -147,7 +147,7 @@
     </paper>
     <paper id="12">
       <title>Phrase-based Compressive Cross-Language Summarization</title>
-      <author><first>Jin-ge</first><last>Yao</last></author>
+      <author id="jin-ge-yao"><first>Jin-ge</first><last>Yao</last></author>
       <author><first>Xiaojun</first><last>Wan</last></author>
       <author><first>Jianguo</first><last>Xiao</last></author>
       <pages>118–127</pages>
@@ -167,7 +167,7 @@
     <paper id="14">
       <title>Indicative Tweet Generation: An Extractive Summarization Problem?</title>
       <author><first>Priya</first><last>Sidhaye</last></author>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <pages>138–147</pages>
       <url hash="94387f40">D15-1014</url>
       <doi>10.18653/v1/D15-1014</doi>
@@ -196,7 +196,7 @@
       <title>Monotone Submodularity in Opinion Summaries</title>
       <author><first>Jayanth</first><last>Jayanth</last></author>
       <author><first>Jayaprakash</first><last>Sundararaj</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>169–178</pages>
       <url hash="b32c8ee3">D15-1017</url>
       <doi>10.18653/v1/D15-1017</doi>
@@ -205,7 +205,7 @@
     <paper id="18">
       <title>Joint Prediction for Entity/Event-Level Sentiment Analysis using Probabilistic Soft Logic Models</title>
       <author><first>Lingjia</first><last>Deng</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <pages>179–189</pages>
       <url hash="aecdaec7">D15-1018</url>
       <doi>10.18653/v1/D15-1018</doi>
@@ -214,8 +214,8 @@
     <paper id="19">
       <title>Learning to Recognize Affective Polarity in Similes</title>
       <author><first>Ashequl</first><last>Qadir</last></author>
-      <author><first>Ellen</first><last>Riloff</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <pages>190–200</pages>
       <url hash="072ccf91">D15-1019</url>
       <doi>10.18653/v1/D15-1019</doi>
@@ -226,7 +226,7 @@
       <author><first>Tongtao</first><last>Zhang</last></author>
       <author><first>Hongzhi</first><last>Li</last></author>
       <author><first>Heng</first><last>Ji</last></author>
-      <author><first>Shih-Fu</first><last>Chang</last></author>
+      <author id="shih-fu-chang"><first>Shih-Fu</first><last>Chang</last></author>
       <pages>201–206</pages>
       <url hash="25b2f199">D15-1020</url>
       <doi>10.18653/v1/D15-1020</doi>
@@ -236,7 +236,7 @@
       <title>A Survey of Current Datasets for Vision and Language Research</title>
       <author><first>Francis</first><last>Ferraro</last></author>
       <author><first>Nasrin</first><last>Mostafazadeh</last></author>
-      <author><first>Ting-Hao</first><last>Huang</last></author>
+      <author id="ting-hao-huang"><first>Ting-Hao</first><last>Huang</last></author>
       <author><first>Lucy</first><last>Vanderwende</last></author>
       <author><first>Jacob</first><last>Devlin</last></author>
       <author><first>Michel</first><last>Galley</last></author>
@@ -253,7 +253,7 @@
       <author><first>Ying</first><last>Lu</last></author>
       <author><first>Emmanuel</first><last>Dellandrea</last></author>
       <author><first>Francesc</first><last>Moreno-Noguer</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <pages>214–220</pages>
       <url hash="31eba99a">D15-1022</url>
       <doi>10.18653/v1/D15-1022</doi>
@@ -263,7 +263,7 @@
     <paper id="23">
       <title>On A Strictly Convex <fixed-case>IBM</fixed-case> Model 1</title>
       <author><first>Andrei</first><last>Simion</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <author><first>Cliff</first><last>Stein</last></author>
       <pages>221–226</pages>
       <url hash="b9d6bbfb">D15-1023</url>
@@ -302,7 +302,7 @@
     <paper id="27">
       <title>A Model of Zero-Shot Learning of Spoken Language Understanding</title>
       <author><first>Majid</first><last>Yazdani</last></author>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <pages>244–249</pages>
       <url hash="8dab3184">D15-1027</url>
       <doi>10.18653/v1/D15-1027</doi>
@@ -312,8 +312,8 @@
       <title>Modeling Tweet Arrival Times using Log-<fixed-case>G</fixed-case>aussian Cox Processes</title>
       <author><first>Michal</first><last>Lukasik</last></author>
       <author><first>P. K.</first><last>Srijith</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
       <pages>250–255</pages>
       <url hash="ed1a9159">D15-1028</url>
       <doi>10.18653/v1/D15-1028</doi>
@@ -365,7 +365,7 @@
     <paper id="33">
       <title>Learning Better Embeddings for Rare Words Using Distributional Representations</title>
       <author><first>Irina</first><last>Sergienya</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>280–285</pages>
       <url hash="298d0de8">D15-1033</url>
       <doi>10.18653/v1/D15-1033</doi>
@@ -373,7 +373,7 @@
     </paper>
     <paper id="34">
       <title>Composing Relationships with Translations</title>
-      <author><first>Alberto</first><last>García-Durán</last></author>
+      <author id="alberto-garcia-duran"><first>Alberto</first><last>García-Durán</last></author>
       <author><first>Antoine</first><last>Bordes</last></author>
       <author><first>Nicolas</first><last>Usunier</last></author>
       <pages>286–290</pages>
@@ -417,7 +417,7 @@
     <paper id="38">
       <title>Traversing Knowledge Graphs in Vector Space</title>
       <author><first>Kelvin</first><last>Guu</last></author>
-      <author><first>John</first><last>Miller</last></author>
+      <author id="john-miller"><first>John</first><last>Miller</last></author>
       <author><first>Percy</first><last>Liang</last></author>
       <pages>318–327</pages>
       <url hash="6cc29c5c">D15-1038</url>
@@ -428,7 +428,7 @@
     <paper id="39">
       <title>Density-Driven Cross-Lingual Transfer of Dependency Parsers</title>
       <author><first>Mohammad Sadegh</first><last>Rasooli</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <pages>328–338</pages>
       <url hash="98a8951c">D15-1039</url>
       <doi>10.18653/v1/D15-1039</doi>
@@ -437,8 +437,8 @@
     </paper>
     <paper id="40">
       <title>A Neural Network Model for Low-Resource <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependency Parsing</title>
-      <author><first>Long</first><last>Duong</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="long-duong"><first>Long</first><last>Duong</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <author><first>Steven</first><last>Bird</last></author>
       <author><first>Paul</first><last>Cook</last></author>
       <pages>339–348</pages>
@@ -450,8 +450,8 @@
     <paper id="41">
       <title>Improved Transition-based Parsing by Modeling Characters instead of Words with <fixed-case>LSTM</fixed-case>s</title>
       <author><first>Miguel</first><last>Ballesteros</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>349–359</pages>
       <url hash="8011576d">D15-1041</url>
       <doi>10.18653/v1/D15-1041</doi>
@@ -463,7 +463,7 @@
       <author><first>Katja</first><last>Filippova</last></author>
       <author><first>Enrique</first><last>Alfonseca</last></author>
       <author><first>Carlos A.</first><last>Colmenares</last></author>
-      <author><first>Lukasz</first><last>Kaiser</last></author>
+      <author id="lukasz-kaiser"><first>Lukasz</first><last>Kaiser</last></author>
       <author><first>Oriol</first><last>Vinyals</last></author>
       <pages>360–368</pages>
       <url hash="a18c3efe">D15-1042</url>
@@ -483,7 +483,7 @@
     </paper>
     <paper id="44">
       <title>A Neural Attention Model for Abstractive Sentence Summarization</title>
-      <author><first>Alexander M.</first><last>Rush</last></author>
+      <author id="alexander-m-rush"><first>Alexander M.</first><last>Rush</last></author>
       <author><first>Sumit</first><last>Chopra</last></author>
       <author><first>Jason</first><last>Weston</last></author>
       <pages>379–389</pages>
@@ -506,7 +506,7 @@
       <title>Hashtag Recommendation Using <fixed-case>D</fixed-case>irichlet Process Mixture Models Incorporating Types of Hashtags</title>
       <author><first>Yeyun</first><last>Gong</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>401–410</pages>
       <url hash="371ec4ca">D15-1046</url>
       <doi>10.18653/v1/D15-1046</doi>
@@ -538,7 +538,7 @@
     <paper id="49">
       <title>Flexible Domain Adaptation for Automated Essay Scoring Using Correlated Linear Regression</title>
       <author><first>Peter</first><last>Phandi</last></author>
-      <author><first>Kian Ming A.</first><last>Chai</last></author>
+      <author id="kian-ming-a-chai"><first>Kian Ming A.</first><last>Chai</last></author>
       <author><first>Hwee Tou</first><last>Ng</last></author>
       <pages>431–439</pages>
       <url hash="e8d9b0eb">D15-1049</url>
@@ -549,8 +549,8 @@
       <title>Show Me Your Evidence - an Automatic Method for Context Dependent Evidence Detection</title>
       <author><first>Ruty</first><last>Rinott</last></author>
       <author><first>Lena</first><last>Dankin</last></author>
-      <author><first>Carlos</first><last>Alzate Perez</last></author>
-      <author><first>Mitesh M.</first><last>Khapra</last></author>
+      <author id="carlos-alzate"><first>Carlos</first><last>Alzate Perez</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh M.</first><last>Khapra</last></author>
       <author><first>Ehud</first><last>Aharoni</last></author>
       <author><first>Noam</first><last>Slonim</last></author>
       <pages>440–450</pages>
@@ -560,7 +560,7 @@
     </paper>
     <paper id="51">
       <title>Spelling Correction of User Search Queries through Statistical Machine Translation</title>
-      <author><first>Saša</first><last>Hasan</last></author>
+      <author id="sasa-hasan"><first>Saša</first><last>Hasan</last></author>
       <author><first>Carmen</first><last>Heger</last></author>
       <author><first>Saab</first><last>Mansour</last></author>
       <pages>451–460</pages>
@@ -590,7 +590,7 @@
     </paper>
     <paper id="54">
       <title>Joint Embedding of Query and Ad by Leveraging Implicit Feedback</title>
-      <author><first>Sungjin</first><last>Lee</last></author>
+      <author id="sungjin-lee"><first>Sungjin</first><last>Lee</last></author>
       <author><first>Yifan</first><last>Hu</last></author>
       <pages>482–491</pages>
       <url hash="27783140">D15-1054</url>
@@ -601,7 +601,7 @@
       <title>Automatic Extraction of Time Expressions Accross Domains in <fixed-case>F</fixed-case>rench Narratives</title>
       <author><first>Mike Donald</first><last>Tapi Nzali</last></author>
       <author><first>Xavier</first><last>Tannier</last></author>
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <pages>492–498</pages>
       <url hash="eded8472">D15-1055</url>
       <doi>10.18653/v1/D15-1055</doi>
@@ -611,7 +611,7 @@
       <title>Semi-Supervised Bootstrapping of Relationship Extractors with Distributional Semantics</title>
       <author><first>David S.</first><last>Batista</last></author>
       <author><first>Bruno</first><last>Martins</last></author>
-      <author><first>Mário J.</first><last>Silva</last></author>
+      <author id="mario-j-silva"><first>Mário J.</first><last>Silva</last></author>
       <pages>499–504</pages>
       <url hash="cf80175c">D15-1056</url>
       <doi>10.18653/v1/D15-1056</doi>
@@ -631,7 +631,7 @@
       <title>Named entity recognition with document-specific <fixed-case>KB</fixed-case> tag gazetteers</title>
       <author><first>Will</first><last>Radford</last></author>
       <author><first>Xavier</first><last>Carreras</last></author>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <pages>512–517</pages>
       <url hash="e341a3b3">D15-1058</url>
       <doi>10.18653/v1/D15-1058</doi>
@@ -639,9 +639,9 @@
     </paper>
     <paper id="59">
       <title>“A Spousal Relation Begins with a Deletion of engage and Ends with an Addition of divorce”: Learning State Changing Verbs from <fixed-case>W</fixed-case>ikipedia Revision History</title>
-      <author><first>Derry Tanti</first><last>Wijaya</last></author>
-      <author><first>Ndapandula</first><last>Nakashole</last></author>
-      <author><first>Tom</first><last>Mitchell</last></author>
+      <author id="derry-tanti-wijaya"><first>Derry Tanti</first><last>Wijaya</last></author>
+      <author id="ndapandula-nakashole"><first>Ndapandula</first><last>Nakashole</last></author>
+      <author id="tom-mitchell"><first>Tom</first><last>Mitchell</last></author>
       <pages>518–523</pages>
       <url hash="ea973de1">D15-1059</url>
       <doi>10.18653/v1/D15-1059</doi>
@@ -651,8 +651,8 @@
       <title>Improving Distant Supervision for Information Extraction Using Label Propagation Through Lists</title>
       <author><first>Lidong</first><last>Bing</last></author>
       <author><first>Sneha</first><last>Chaudhari</last></author>
-      <author><first>Richard</first><last>Wang</last></author>
-      <author><first>William</first><last>Cohen</last></author>
+      <author id="richard-c-wang"><first>Richard</first><last>Wang</last></author>
+      <author id="william-cohen"><first>William</first><last>Cohen</last></author>
       <pages>524–529</pages>
       <url hash="e94d7dac">D15-1060</url>
       <doi>10.18653/v1/D15-1060</doi>
@@ -663,7 +663,7 @@
     <paper id="61">
       <title>An Entity-centric Approach for Overcoming Knowledge Graph Sparsity</title>
       <author><first>Manjunath</first><last>Hegde</last></author>
-      <author><first>Partha P.</first><last>Talukdar</last></author>
+      <author id="partha-talukdar"><first>Partha P.</first><last>Talukdar</last></author>
       <pages>530–535</pages>
       <url hash="26b3f20f">D15-1061</url>
       <doi>10.18653/v1/D15-1061</doi>
@@ -702,7 +702,7 @@
       <title>Inferring Binary Relation Schemas for Open Information Extraction</title>
       <author><first>Kangqi</first><last>Luo</last></author>
       <author><first>Xusheng</first><last>Luo</last></author>
-      <author><first>Kenny</first><last>Zhu</last></author>
+      <author id="kenny-zhu"><first>Kenny</first><last>Zhu</last></author>
       <pages>555–560</pages>
       <url hash="40d5e0a8">D15-1065</url>
       <doi>10.18653/v1/D15-1065</doi>
@@ -715,7 +715,7 @@
       <author><first>Zhiwei</first><last>Zhang</last></author>
       <author><first>Lejian</first><last>Liao</last></author>
       <author><first>Luo</first><last>Si</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <pages>561–566</pages>
       <url hash="ef88f27a">D15-1066</url>
       <doi>10.18653/v1/D15-1066</doi>
@@ -723,7 +723,7 @@
     </paper>
     <paper id="67">
       <title>Online Sentence Novelty Scoring for Topical Document Streams</title>
-      <author><first>Sungjin</first><last>Lee</last></author>
+      <author id="sungjin-lee"><first>Sungjin</first><last>Lee</last></author>
       <pages>567–572</pages>
       <url hash="20db8e46">D15-1067</url>
       <doi>10.18653/v1/D15-1067</doi>
@@ -731,13 +731,13 @@
     </paper>
     <paper id="68">
       <title>Global Thread-level Inference for Comment Classification in Community Question Answering</title>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <author><first>Alberto</first><last>Barrón-Cedeño</last></author>
       <author><first>Giovanni</first><last>Da San Martino</last></author>
       <author><first>Simone</first><last>Filice</last></author>
       <author><first>Lluís</first><last>Màrquez</last></author>
       <author><first>Alessandro</first><last>Moschitti</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>573–578</pages>
       <url hash="f5bca324">D15-1068</url>
       <doi>10.18653/v1/D15-1068</doi>
@@ -805,10 +805,10 @@
     </paper>
     <paper id="75">
       <title>A large annotated corpus for learning natural language inference</title>
-      <author><first>Samuel R.</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel R.</first><last>Bowman</last></author>
       <author><first>Gabor</first><last>Angeli</last></author>
       <author><first>Christopher</first><last>Potts</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>632–642</pages>
       <url hash="e65921bb">D15-1075</url>
       <doi>10.18653/v1/D15-1075</doi>
@@ -819,7 +819,7 @@
       <title>Question-Answer Driven Semantic Role Labeling: Using Natural Language to Annotate Natural Language</title>
       <author><first>Luheng</first><last>He</last></author>
       <author><first>Mike</first><last>Lewis</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>643–653</pages>
       <url hash="8bbb57ad">D15-1076</url>
       <doi>10.18653/v1/D15-1076</doi>
@@ -845,7 +845,7 @@
       <author><first>Yu</first><last>Hong</last></author>
       <author><first>Xiaobin</first><last>Wang</last></author>
       <author><first>Yadong</first><last>Chen</last></author>
-      <author id="jian-wang"><first>Jian</first><last>Wang</last></author>
+      <author><first>Jian</first><last>Wang</last></author>
       <author><first>Tongtao</first><last>Zhang</last></author>
       <author><first>Heng</first><last>Ji</last></author>
       <pages>665–675</pages>
@@ -908,7 +908,7 @@
     <paper id="83">
       <title>Corpus-level Fine-grained Entity Typing Using Contextual Information</title>
       <author><first>Yadollah</first><last>Yaghoobzadeh</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>715–725</pages>
       <url hash="bc771170">D15-1083</url>
       <doi>10.18653/v1/D15-1083</doi>
@@ -917,7 +917,7 @@
     <paper id="84">
       <title>Knowledge Base Unification via Sense Embeddings and Disambiguation</title>
       <author><first>Claudio</first><last>Delli Bovi</last></author>
-      <author><first>Luis</first><last>Espinosa-Anke</last></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa-Anke</last></author>
       <author><first>Roberto</first><last>Navigli</last></author>
       <pages>726–736</pages>
       <url hash="6eda428d">D15-1084</url>
@@ -928,7 +928,7 @@
       <title>Open-Domain Name Error Detection using a Multi-Task <fixed-case>RNN</fixed-case></title>
       <author><first>Hao</first><last>Cheng</last></author>
       <author><first>Hao</first><last>Fang</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
       <pages>737–746</pages>
       <url hash="e7389499">D15-1085</url>
       <doi>10.18653/v1/D15-1085</doi>
@@ -1002,7 +1002,7 @@
       <author><first>Xipeng</first><last>Qiu</last></author>
       <author><first>Chenxi</first><last>Zhu</last></author>
       <author><first>Shiyu</first><last>Wu</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>793–798</pages>
       <url hash="2e4ff754">D15-1092</url>
       <doi>10.18653/v1/D15-1092</doi>
@@ -1075,7 +1075,7 @@
       <author><first>Li</first><last>Li</last></author>
       <author><first>Houfeng</first><last>Wang</last></author>
       <author><first>Xu</first><last>Sun</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <author><first>Shi</first><last>Zhao</last></author>
       <author><first>Lei</first><last>Sha</last></author>
       <pages>835–839</pages>
@@ -1127,7 +1127,7 @@
       <title>Joint Entity Recognition and Disambiguation</title>
       <author><first>Gang</first><last>Luo</last></author>
       <author><first>Xiaojiang</first><last>Huang</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <author><first>Zaiqing</first><last>Nie</last></author>
       <pages>879–888</pages>
       <url hash="817614dd">D15-1104</url>
@@ -1150,7 +1150,7 @@
       <title>Hierarchical Recurrent Neural Network for Document Modeling</title>
       <author><first>Rui</first><last>Lin</last></author>
       <author><first>Shujie</first><last>Liu</last></author>
-      <author><first>Muyun</first><last>Yang</last></author>
+      <author id="muyun-yang"><first>Muyun</first><last>Yang</last></author>
       <author><first>Mu</first><last>Li</last></author>
       <author><first>Ming</first><last>Zhou</last></author>
       <author><first>Sheng</first><last>Li</last></author>
@@ -1174,7 +1174,7 @@
       <title>Dual Decomposition Inference for Graphical Models over Strings</title>
       <author><first>Nanyun</first><last>Peng</last></author>
       <author><first>Ryan</first><last>Cotterell</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>917–927</pages>
       <url hash="d3d2f40f">D15-1108</url>
       <doi>10.18653/v1/D15-1108</doi>
@@ -1185,7 +1185,7 @@
       <title>Discourse parsing for multi-party chat dialogues</title>
       <author><first>Stergos</first><last>Afantenos</last></author>
       <author><first>Eric</first><last>Kow</last></author>
-      <author><first>Nicholas</first><last>Asher</last></author>
+      <author id="nicholas-asher"><first>Nicholas</first><last>Asher</last></author>
       <author><first>Jérémy</first><last>Perret</last></author>
       <pages>928–937</pages>
       <url hash="8c2a3232">D15-1109</url>
@@ -1207,7 +1207,7 @@
     </paper>
     <paper id="111">
       <title>Feature-Rich Two-Stage Logistic Regression for Monolingual Alignment</title>
-      <author><first>Md Arafat</first><last>Sultan</last></author>
+      <author id="md-arafat-sultan"><first>Md Arafat</first><last>Sultan</last></author>
       <author><first>Steven</first><last>Bethard</last></author>
       <author><first>Tamara</first><last>Sumner</last></author>
       <pages>949–959</pages>
@@ -1241,9 +1241,9 @@
     </paper>
     <paper id="114">
       <title>Mise en Place: Unsupervised Interpretation of Instructional Recipes</title>
-      <author><first>Chloé</first><last>Kiddon</last></author>
+      <author id="chloe-kiddon"><first>Chloé</first><last>Kiddon</last></author>
       <author><first>Ganesa Thandavam</first><last>Ponnuraj</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <author><first>Yejin</first><last>Choi</last></author>
       <pages>982–992</pages>
       <url hash="84bfad22">D15-1114</url>
@@ -1253,7 +1253,7 @@
     <paper id="115">
       <title>Semantic Framework for Comparison Structures in Natural Language</title>
       <author><first>Omid</first><last>Bakhshandeh</last></author>
-      <author><first>James</first><last>Allen</last></author>
+      <author id="james-allen"><first>James</first><last>Allen</last></author>
       <pages>993–1002</pages>
       <url hash="d03a36de">D15-1115</url>
       <doi>10.18653/v1/D15-1115</doi>
@@ -1271,9 +1271,9 @@
     </paper>
     <paper id="117">
       <title>Incorporating Trustiness and Collective Synonym/Contrastive Evidence into Taxonomy Construction</title>
-      <author><last>Luu</last> <first>Anh Tuan</first></author>
-      <author><first>Jung-jae</first><last>Kim</last></author>
-      <author><first>See Kiong</first><last>Ng</last></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last></author>
+      <author id="jung-jae-kim"><first>Jung-jae</first><last>Kim</last></author>
+      <author id="see-kiong-ng"><first>See Kiong</first><last>Ng</last></author>
       <pages>1013–1022</pages>
       <url hash="3df224cf">D15-1117</url>
       <doi>10.18653/v1/D15-1117</doi>
@@ -1292,7 +1292,7 @@
       <title>Improving fast_align by Reordering</title>
       <author><first>Chenchen</first><last>Ding</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>1034–1039</pages>
       <url hash="16256dea">D15-1119</url>
       <doi>10.18653/v1/D15-1119</doi>
@@ -1309,7 +1309,7 @@
     </paper>
     <paper id="121">
       <title>A Discriminative Training Procedure for Continuous Translation Models</title>
-      <author><first>Quoc-Khanh</first><last>Do</last></author>
+      <author id="quoc-khanh-do"><first>Quoc-Khanh</first><last>Do</last></author>
       <author><first>Alexandre</first><last>Allauzen</last></author>
       <author><first>François</first><last>Yvon</last></author>
       <pages>1046–1052</pages>
@@ -1319,8 +1319,8 @@
     </paper>
     <paper id="122">
       <title>System Combination for Machine Translation through Paraphrasing</title>
-      <author><first>Wei-Yun</first><last>Ma</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="wei-yun-ma"><first>Wei-Yun</first><last>Ma</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>1053–1058</pages>
       <url hash="ed5b6895">D15-1122</url>
       <doi>10.18653/v1/D15-1122</doi>
@@ -1339,8 +1339,8 @@
     <paper id="124">
       <title><fixed-case>R</fixed-case>e<fixed-case>V</fixed-case>al: A Simple and Effective Machine Translation Evaluation Metric Based on Recurrent Neural Networks</title>
       <author><first>Rohit</first><last>Gupta</last></author>
-      <author><first>Constantin</first><last>Orăsan</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orăsan</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>1066–1072</pages>
       <url hash="ea02cea6">D15-1124</url>
       <doi>10.18653/v1/D15-1124</doi>
@@ -1373,8 +1373,8 @@
       <author><first>Evangelos</first><last>Papalexakis</last></author>
       <author><first>Christos</first><last>Faloutsos</last></author>
       <author><first>Nikos</first><last>Sidiropoulos</last></author>
-      <author><first>Tom</first><last>Mitchell</last></author>
-      <author><first>Partha P.</first><last>Talukdar</last></author>
+      <author id="tom-mitchell"><first>Tom</first><last>Mitchell</last></author>
+      <author id="partha-talukdar"><first>Partha P.</first><last>Talukdar</last></author>
       <author><first>Xiao</first><last>Fu</last></author>
       <pages>1084–1088</pages>
       <url hash="4993379a">D15-1127</url>
@@ -1386,7 +1386,7 @@
       <author><first>Andrew</first><last>Finch</last></author>
       <author><first>Xiaolin</first><last>Wang</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>1089–1094</pages>
       <url hash="1571eb55">D15-1128</url>
       <doi>10.18653/v1/D15-1128</doi>
@@ -1396,7 +1396,7 @@
       <title>Rule Selection with Soft Syntactic Features for String-to-Tree Statistical Machine Translation</title>
       <author><first>Fabienne</first><last>Braune</last></author>
       <author><first>Nina</first><last>Seemann</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>1095–1101</pages>
       <url hash="2cdbe842">D15-1129</url>
       <doi>10.18653/v1/D15-1129</doi>
@@ -1428,7 +1428,7 @@
     <paper id="132">
       <title>The Overall Markedness of Discourse Relations</title>
       <author><first>Lifeng</first><last>Jin</last></author>
-      <author><first>Marie-Catherine</first><last>de Marneffe</last></author>
+      <author id="marie-catherine-de-marneffe"><first>Marie-Catherine</first><last>de Marneffe</last></author>
       <pages>1114–1119</pages>
       <url hash="0c23e589">D15-1132</url>
       <doi>10.18653/v1/D15-1132</doi>
@@ -1437,7 +1437,7 @@
     <paper id="133">
       <title>Experiments in Open Domain Deception Detection</title>
       <author><first>Verónica</first><last>Pérez-Rosas</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>1120–1125</pages>
       <url hash="685b13cd">D15-1133</url>
       <doi>10.18653/v1/D15-1133</doi>
@@ -1446,7 +1446,7 @@
     <paper id="134">
       <title>A model of rapid phonotactic generalization</title>
       <author><first>Tal</first><last>Linzen</last></author>
-      <author><first>Timothy</first><last>O’Donnell</last></author>
+      <author id="timothy-odonnell"><first>Timothy</first><last>O’Donnell</last></author>
       <pages>1126–1131</pages>
       <url hash="64fee224">D15-1134</url>
       <doi>10.18653/v1/D15-1134</doi>
@@ -1456,7 +1456,7 @@
       <title>Automatically Solving Number Word Problems by Semantic Parsing and Reasoning</title>
       <author><first>Shuming</first><last>Shi</last></author>
       <author><first>Yuehui</first><last>Wang</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <author><first>Xiaojiang</first><last>Liu</last></author>
       <author><first>Yong</first><last>Rui</last></author>
       <pages>1132–1142</pages>
@@ -1520,7 +1520,7 @@
       <author><first>Xipeng</first><last>Qiu</last></author>
       <author><first>Chenxi</first><last>Zhu</last></author>
       <author><first>Pengfei</first><last>Liu</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>1197–1206</pages>
       <url hash="9246e823">D15-1141</url>
       <doi>10.18653/v1/D15-1141</doi>
@@ -1542,8 +1542,8 @@
       <author><first>Hidetaka</first><last>Kamigaito</last></author>
       <author><first>Taro</first><last>Watanabe</last></author>
       <author><first>Hiroya</first><last>Takamura</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>1217–1227</pages>
       <url hash="8ba591c9">D15-1143</url>
       <doi>10.18653/v1/D15-1143</doi>
@@ -1563,7 +1563,7 @@
     <paper id="145">
       <title>Graph-Based Collective Lexical Selection for Statistical Machine Translation</title>
       <author><first>Jinsong</first><last>Su</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Shujian</first><last>Huang</last></author>
       <author><first>Xianpei</first><last>Han</last></author>
       <author><first>Junfeng</first><last>Yao</last></author>
@@ -1575,7 +1575,7 @@
     <paper id="146">
       <title>Bilingual Correspondence Recursive Autoencoder for Statistical Machine Translation</title>
       <author><first>Jinsong</first><last>Su</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Biao</first><last>Zhang</last></author>
       <author id="yang-liu-ict"><first>Yang</first><last>Liu</last></author>
       <author><first>Junfeng</first><last>Yao</last></author>
@@ -1587,12 +1587,12 @@
     </paper>
     <paper id="147">
       <title>How to Avoid Unwanted Pregnancies: Domain Adaptation using Neural Network Models</title>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <author><first>Hassan</first><last>Sajjad</last></author>
       <author><first>Nadir</first><last>Durrani</last></author>
       <author><first>Kamla</first><last>Al-Mannai</last></author>
       <author><first>Ahmed</first><last>Abdelali</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>1259–1270</pages>
       <url hash="ff09a572">D15-1147</url>
       <doi>10.18653/v1/D15-1147</doi>
@@ -1621,7 +1621,7 @@
       <title>Part-of-speech Taggers for Low-resource Languages using <fixed-case>CCA</fixed-case> Features</title>
       <author><first>Young-Bum</first><last>Kim</last></author>
       <author><first>Benjamin</first><last>Snyder</last></author>
-      <author><first>Ruhi</first><last>Sarikaya</last></author>
+      <author id="ruhi-sarikaya"><first>Ruhi</first><last>Sarikaya</last></author>
       <pages>1292–1302</pages>
       <url hash="43e0783f">D15-1150</url>
       <doi>10.18653/v1/D15-1150</doi>
@@ -1629,7 +1629,7 @@
     </paper>
     <paper id="151">
       <title>An Improved Tag Dictionary for Faster Part-of-Speech Tagging</title>
-      <author><first>Robert</first><last>Moore</last></author>
+      <author id="robert-c-moore"><first>Robert</first><last>Moore</last></author>
       <pages>1303–1308</pages>
       <url hash="81c17674">D15-1151</url>
       <doi>10.18653/v1/D15-1151</doi>
@@ -1657,7 +1657,7 @@
     <paper id="154">
       <title>Efficient Inner-to-outer Greedy Algorithm for Higher-order Labeled Dependency Parsing</title>
       <author><first>Xuezhe</first><last>Ma</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>1322–1328</pages>
       <url hash="38d794de">D15-1154</url>
       <doi>10.18653/v1/D15-1154</doi>
@@ -1667,7 +1667,7 @@
       <title>Online Updating of Word Representations for Part-of-Speech Tagging</title>
       <author><first>Wenpeng</first><last>Yin</last></author>
       <author><first>Tobias</first><last>Schnabel</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>1329–1334</pages>
       <url hash="794d0e30">D15-1155</url>
       <doi>10.18653/v1/D15-1155</doi>
@@ -1690,7 +1690,7 @@
       <author><first>Johann</first><last>Roturier</last></author>
       <author><first>Corentin</first><last>Ribeyre</last></author>
       <author><first>Teresa</first><last>Lynn</last></author>
-      <author><first>Joseph</first><last>Le Roux</last></author>
+      <author id="joseph-le-roux"><first>Joseph</first><last>Le Roux</last></author>
       <pages>1341–1347</pages>
       <url hash="74a143ab">D15-1157</url>
       <doi>10.18653/v1/D15-1157</doi>
@@ -1732,8 +1732,8 @@
       <author><first>Yulia</first><last>Tsvetkov</last></author>
       <author><first>Silvio</first><last>Amir</last></author>
       <author><first>Ramón</first><last>Fermandez</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <author><first>Isabel</first><last>Trancoso</last></author>
       <author><first>Chu-Cheng</first><last>Lin</last></author>
       <pages>1367–1372</pages>
@@ -1764,7 +1764,7 @@
     <paper id="164">
       <title>Learning Semantic Representations for Nonterminals in Hierarchical Phrase-Based Translation</title>
       <author><first>Xing</first><last>Wang</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Min</first><last>Zhang</last></author>
       <pages>1391–1400</pages>
       <url hash="b89eb37a">D15-1164</url>
@@ -1778,7 +1778,7 @@
       <author><first>Tamer</first><last>Alkhouli</last></author>
       <author><first>Jan-Thorsten</first><last>Peter</last></author>
       <author><first>Joern</first><last>Wuebker</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>1401–1411</pages>
       <url hash="56b0806b">D15-1165</url>
       <doi>10.18653/v1/D15-1165</doi>
@@ -1787,9 +1787,9 @@
     </paper>
     <paper id="166">
       <title>Effective Approaches to Attention-based Neural Machine Translation</title>
-      <author><first>Thang</first><last>Luong</last></author>
+      <author id="minh-thang-luong"><first>Thang</first><last>Luong</last></author>
       <author><first>Hieu</first><last>Pham</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>1412–1421</pages>
       <url hash="c25bc213">D15-1166</url>
       <doi>10.18653/v1/D15-1166</doi>
@@ -1811,8 +1811,8 @@
     <paper id="168">
       <title>Fine-grained Opinion Mining with Recurrent Neural Networks and Word Embeddings</title>
       <author><first>Pengfei</first><last>Liu</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
-      <author><first>Helen</first><last>Meng</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
+      <author id="helen-meng"><first>Helen</first><last>Meng</last></author>
       <pages>1433–1443</pages>
       <url hash="eeb36582">D15-1168</url>
       <doi>10.18653/v1/D15-1168</doi>
@@ -1823,7 +1823,7 @@
       <title>Joint <fixed-case>A</fixed-case>* <fixed-case>CCG</fixed-case> Parsing and Semantic Role Labelling</title>
       <author><first>Mike</first><last>Lewis</last></author>
       <author><first>Luheng</first><last>He</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>1444–1454</pages>
       <url hash="9ea29091">D15-1169</url>
       <doi>10.18653/v1/D15-1169</doi>
@@ -1832,10 +1832,10 @@
     </paper>
     <paper id="170">
       <title>Improving Semantic Parsing with Enriched Synchronous Context-Free Grammar</title>
-      <author><first>Junhui</first><last>Li</last></author>
+      <author id="junhui-li"><first>Junhui</first><last>Li</last></author>
       <author><first>Muhua</first><last>Zhu</last></author>
       <author><first>Wei</first><last>Lu</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>1455–1465</pages>
       <url hash="25779077">D15-1170</url>
       <doi>10.18653/v1/D15-1170</doi>
@@ -1871,7 +1871,7 @@
     <paper id="173">
       <title>Efficient and Expressive Knowledge Base Completion Using Subgraph Feature Extraction</title>
       <author><first>Matt</first><last>Gardner</last></author>
-      <author><first>Tom</first><last>Mitchell</last></author>
+      <author id="tom-mitchell"><first>Tom</first><last>Mitchell</last></author>
       <pages>1488–1498</pages>
       <url hash="7bf24bc4">D15-1173</url>
       <doi>10.18653/v1/D15-1173</doi>
@@ -1895,8 +1895,8 @@
     <paper id="175">
       <title>A Utility Model of Authors in the Scientific Community</title>
       <author><first>Yanchuan</first><last>Sim</last></author>
-      <author><first>Bryan</first><last>Routledge</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="bryan-r-routledge"><first>Bryan</first><last>Routledge</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>1510–1519</pages>
       <url hash="43f989d5">D15-1175</url>
       <doi>10.18653/v1/D15-1175</doi>
@@ -1905,12 +1905,12 @@
     <paper id="176">
       <title>Finding Function in Form: Compositional Character Models for Open Vocabulary Word Representation</title>
       <author><first>Wang</first><last>Ling</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <author><first>Isabel</first><last>Trancoso</last></author>
       <author><first>Ramón</first><last>Fermandez</last></author>
       <author><first>Silvio</first><last>Amir</last></author>
-      <author><first>Luís</first><last>Marujo</last></author>
+      <author id="luis-marujo"><first>Luís</first><last>Marujo</last></author>
       <author><first>Tiago</first><last>Luís</last></author>
       <pages>1520–1530</pages>
       <url hash="b1b61d65">D15-1176</url>
@@ -1930,7 +1930,7 @@
     <paper id="178">
       <title>Conversation Trees: A Grammar Model for Topic Structure in Forums</title>
       <author><first>Annie</first><last>Louis</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <pages>1543–1553</pages>
       <url hash="7230ff66">D15-1178</url>
       <doi>10.18653/v1/D15-1178</doi>
@@ -2001,7 +2001,7 @@
       <title>Recognizing Textual Entailment Using Probabilistic Inference</title>
       <author><first>Lei</first><last>Sha</last></author>
       <author><first>Sujian</first><last>Li</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <author><first>Zhifang</first><last>Sui</last></author>
       <author><first>Tingsong</first><last>Jiang</last></author>
       <pages>1620–1625</pages>
@@ -2013,7 +2013,7 @@
       <title><fixed-case>C</fixed-case>hinese Semantic Role Labeling with Bidirectional Recurrent Neural Networks</title>
       <author><first>Zhen</first><last>Wang</last></author>
       <author><first>Tingsong</first><last>Jiang</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <author><first>Zhifang</first><last>Sui</last></author>
       <pages>1626–1631</pages>
       <url hash="8b459ff0">D15-1186</url>
@@ -2023,8 +2023,8 @@
     <paper id="187">
       <title>Unsupervised Negation Focus Identification with Word-Topic Graph Model</title>
       <author><first>Bowei</first><last>Zou</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
-      <author><first>Qiaoming</first><last>Zhu</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last></author>
       <pages>1632–1636</pages>
       <url hash="5d0b2a23">D15-1187</url>
       <doi>10.18653/v1/D15-1187</doi>
@@ -2043,7 +2043,7 @@
       <author><first>Kenton</first><last>Lee</last></author>
       <author><first>Yoav</first><last>Artzi</last></author>
       <author><first>Yejin</first><last>Choi</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>1643–1648</pages>
       <url hash="ea2630c0">D15-1189</url>
       <doi>10.18653/v1/D15-1189</doi>
@@ -2054,7 +2054,7 @@
       <author><first>Julien</first><last>Kloetzer</last></author>
       <author><first>Kentaro</first><last>Torisawa</last></author>
       <author><first>Chikara</first><last>Hashimoto</last></author>
-      <author><first>Jong-Hoon</first><last>Oh</last></author>
+      <author id="jong-hoon-oh"><first>Jong-Hoon</first><last>Oh</last></author>
       <pages>1649–1655</pages>
       <url hash="f07b70be">D15-1190</url>
       <doi>10.18653/v1/D15-1190</doi>
@@ -2074,7 +2074,7 @@
     <paper id="192">
       <title>Learning to Identify the Best Contexts for Knowledge-based <fixed-case>WSD</fixed-case></title>
       <author><first>Evgenia</first><last>Wasserman Pritsker</last></author>
-      <author><first>William</first><last>Cohen</last></author>
+      <author id="william-cohen"><first>William</first><last>Cohen</last></author>
       <author><first>Einat</first><last>Minkov</last></author>
       <pages>1662–1667</pages>
       <url hash="53a8036a">D15-1192</url>
@@ -2128,7 +2128,7 @@
       <title>A Strong Lexical Matching Method for the Machine Comprehension Test</title>
       <author><first>Ellery</first><last>Smith</last></author>
       <author><first>Nicola</first><last>Greco</last></author>
-      <author><first>Matko</first><last>Bošnjak</last></author>
+      <author id="matko-bosnjak"><first>Matko</first><last>Bošnjak</last></author>
       <author><first>Andreas</first><last>Vlachos</last></author>
       <pages>1693–1698</pages>
       <url hash="dabecfec">D15-1197</url>
@@ -2139,7 +2139,7 @@
       <title>Broad-coverage <fixed-case>CCG</fixed-case> Semantic Parsing with <fixed-case>AMR</fixed-case></title>
       <author><first>Yoav</first><last>Artzi</last></author>
       <author><first>Kenton</first><last>Lee</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>1699–1710</pages>
       <url hash="25042c27">D15-1198</url>
       <doi>10.18653/v1/D15-1198</doi>
@@ -2150,11 +2150,11 @@
     <paper id="199">
       <title>Semantically Conditioned <fixed-case>LSTM</fixed-case>-based Natural Language Generation for Spoken Dialogue Systems</title>
       <author><first>Tsung-Hsien</first><last>Wen</last></author>
-      <author><first>Milica</first><last>Gašić</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gašić</last></author>
       <author><first>Nikola</first><last>Mrkšić</last></author>
       <author><first>Pei-Hao</first><last>Su</last></author>
       <author><first>David</first><last>Vandyke</last></author>
-      <author><first>Steve</first><last>Young</last></author>
+      <author id="steve-young"><first>Steve</first><last>Young</last></author>
       <pages>1711–1721</pages>
       <url hash="20dec577">D15-1199</url>
       <doi>10.18653/v1/D15-1199</doi>
@@ -2165,7 +2165,7 @@
     <paper id="200">
       <title>Do Multi-Sense Embeddings Improve Natural Language Understanding?</title>
       <author><first>Jiwei</first><last>Li</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <pages>1722–1732</pages>
       <url hash="88e42d59">D15-1200</url>
       <doi>10.18653/v1/D15-1200</doi>
@@ -2176,7 +2176,7 @@
       <title>Learning Semantic Composition to Detect Non-compositionality of Multiword Expressions</title>
       <author><first>Majid</first><last>Yazdani</last></author>
       <author><first>Meghdad</first><last>Farahmand</last></author>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <pages>1733–1742</pages>
       <url hash="8cef93b1">D15-1201</url>
       <doi>10.18653/v1/D15-1201</doi>
@@ -2218,7 +2218,7 @@
     </paper>
     <paper id="205">
       <title>Improved Relation Extraction with Feature-Rich Compositional Embedding Models</title>
-      <author><first>Matthew R.</first><last>Gormley</last></author>
+      <author id="matthew-r-gormley"><first>Matthew R.</first><last>Gormley</last></author>
       <author><first>Mo</first><last>Yu</last></author>
       <author><first>Mark</first><last>Dredze</last></author>
       <pages>1774–1784</pages>
@@ -2269,7 +2269,7 @@
       <author><first>Masao</first><last>Utiyama</last></author>
       <author><first>Andrew</first><last>Finch</last></author>
       <author><first>Taro</first><last>Watanabe</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>1817–1827</pages>
       <url hash="ed4d7acf">D15-1209</url>
       <doi>10.18653/v1/D15-1209</doi>
@@ -2295,7 +2295,7 @@
       <author><first>Yue</first><last>Zhang</last></author>
       <author><first>Meishan</first><last>Zhang</last></author>
       <author><first>Yafeng</first><last>Ren</last></author>
-      <author><first>Donghong</first><last>Ji</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
       <pages>1837–1846</pages>
       <url hash="1dfe46e8">D15-1211</url>
       <doi>10.18653/v1/D15-1211</doi>
@@ -2303,7 +2303,7 @@
     </paper>
     <paper id="212">
       <title>Multilingual discriminative lexicalized phrase structure parsing</title>
-      <author><first>Benoit</first><last>Crabbé</last></author>
+      <author id="benoit-crabbe"><first>Benoit</first><last>Crabbé</last></author>
       <pages>1847–1856</pages>
       <url hash="a7e42efb">D15-1212</url>
       <doi>10.18653/v1/D15-1212</doi>
@@ -2321,7 +2321,7 @@
     <paper id="214">
       <title>Diversity in Spectral Learning for Natural Language Parsing</title>
       <author><first>Shashi</first><last>Narayan</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <pages>1868–1878</pages>
       <url hash="422564f9">D15-1214</url>
       <doi>10.18653/v1/D15-1214</doi>
@@ -2333,7 +2333,7 @@
       <author><first>Yaqian</first><last>Zhou</last></author>
       <author><first>Chenxi</first><last>Zhu</last></author>
       <author><first>Xipeng</first><last>Qiu</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>1879–1889</pages>
       <url hash="e073e5b5">D15-1215</url>
       <doi>10.18653/v1/D15-1215</doi>
@@ -2343,7 +2343,7 @@
       <title>Turn-taking phenomena in incremental dialogue systems</title>
       <author><first>Hatim</first><last>Khouzaimi</last></author>
       <author><first>Romain</first><last>Laroche</last></author>
-      <author><first>Fabrice</first><last>Lefèvre</last></author>
+      <author id="fabrice-lefevre"><first>Fabrice</first><last>Lefèvre</last></author>
       <pages>1890–1895</pages>
       <url hash="c4501f48">D15-1216</url>
       <doi>10.18653/v1/D15-1216</doi>
@@ -2367,8 +2367,8 @@
       <author><first>Gaurav</first><last>Kumar</last></author>
       <author><first>Graeme</first><last>Blackwood</last></author>
       <author><first>Jan</first><last>Trmal</last></author>
-      <author><first>Daniel</first><last>Povey</last></author>
-      <author><first>Sanjeev</first><last>Khudanpur</last></author>
+      <author id="daniel-povey"><first>Daniel</first><last>Povey</last></author>
+      <author id="sanjeev-khudanpur"><first>Sanjeev</first><last>Khudanpur</last></author>
       <pages>1902–1907</pages>
       <url hash="cfc8cd0c">D15-1218</url>
       <doi>10.18653/v1/D15-1218</doi>
@@ -2386,7 +2386,7 @@
       <title>Concept-based Summarization using Integer Linear Programming: From Concept Pruning to Multiple Optimal Solutions</title>
       <author><first>Florian</first><last>Boudin</last></author>
       <author><first>Hugo</first><last>Mougard</last></author>
-      <author><first>Benoit</first><last>Favre</last></author>
+      <author id="benoit-favre"><first>Benoit</first><last>Favre</last></author>
       <pages>1914–1918</pages>
       <url hash="ed4ae6ba">D15-1220</url>
       <doi>10.18653/v1/D15-1220</doi>
@@ -2404,7 +2404,7 @@
     </paper>
     <paper id="222">
       <title>Better Summarization Evaluation with Word Embeddings for <fixed-case>ROUGE</fixed-case></title>
-      <author><first>Jun-Ping</first><last>Ng</last></author>
+      <author id="jun-ping-ng"><first>Jun-Ping</first><last>Ng</last></author>
       <author><first>Viktoria</first><last>Abrecht</last></author>
       <pages>1925–1930</pages>
       <url hash="9a03136e">D15-1222</url>
@@ -2455,7 +2455,7 @@
     <paper id="227">
       <title>Summarizing Student Responses to Reflection Prompts</title>
       <author><first>Wencan</first><last>Luo</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <pages>1955–1960</pages>
       <url hash="8b6f7739">D15-1227</url>
       <doi>10.18653/v1/D15-1227</doi>
@@ -2466,7 +2466,7 @@
       <title>Extractive Summarization by Maximizing Semantic Volume</title>
       <author><first>Dani</first><last>Yogatama</last></author>
       <author id="fei-liu-utdallas"><first>Fei</first><last>Liu</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>1961–1966</pages>
       <url hash="6bc14e68">D15-1228</url>
       <doi>10.18653/v1/D15-1228</doi>
@@ -2485,7 +2485,7 @@
     <paper id="230">
       <title>Discourse Planning with an N-gram Model of Relations</title>
       <author><first>Or</first><last>Biran</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>1973–1977</pages>
       <url hash="b401e51d">D15-1230</url>
       <doi>10.18653/v1/D15-1230</doi>
@@ -2526,7 +2526,7 @@
       <author><first>Anil</first><last>Ramakrishna</last></author>
       <author><first>Nikolaos</first><last>Malandrakis</last></author>
       <author><first>Elizabeth</first><last>Staruk</last></author>
-      <author><first>Shrikanth</first><last>Narayanan</last></author>
+      <author id="shrikanth-narayanan"><first>Shrikanth</first><last>Narayanan</last></author>
       <pages>1996–2001</pages>
       <url hash="b5c78bb2">D15-1234</url>
       <doi>10.18653/v1/D15-1234</doi>
@@ -2535,7 +2535,7 @@
     </paper>
     <paper id="235">
       <title><fixed-case>EMNLP</fixed-case> versus <fixed-case>ACL</fixed-case>: Analyzing <fixed-case>NLP</fixed-case> research over time</title>
-      <author><first>Sujatha Das</first><last>Gollapalli</last></author>
+      <author id="sujatha-das-gollapalli"><first>Sujatha Das</first><last>Gollapalli</last></author>
       <author><first>Xiaoli</first><last>Li</last></author>
       <pages>2002–2006</pages>
       <url hash="91591e00">D15-1235</url>
@@ -2557,8 +2557,8 @@
     <paper id="237">
       <title><fixed-case>W</fixed-case>iki<fixed-case>QA</fixed-case>: A Challenge Dataset for Open-Domain Question Answering</title>
       <author><first>Yi</first><last>Yang</last></author>
-      <author><first>Wen-tau</first><last>Yih</last></author>
-      <author><first>Christopher</first><last>Meek</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
+      <author id="christopher-meek"><first>Christopher</first><last>Meek</last></author>
       <pages>2013–2018</pages>
       <url hash="3d6bea37">D15-1237</url>
       <doi>10.18653/v1/D15-1237</doi>
@@ -2581,7 +2581,7 @@
       <author><first>Aaron</first><last>Jaech</last></author>
       <author><first>Victoria</first><last>Zayats</last></author>
       <author><first>Hao</first><last>Fang</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last></author>
       <pages>2026–2031</pages>
       <url hash="d4bb274d">D15-1239</url>
@@ -2592,7 +2592,7 @@
     <paper id="240">
       <title>What Your Username Says About You</title>
       <author><first>Aaron</first><last>Jaech</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
       <pages>2032–2037</pages>
       <url hash="543abf0d">D15-1240</url>
       <doi>10.18653/v1/D15-1240</doi>
@@ -2603,7 +2603,7 @@
       <title>Knowledge Base Inference using Bridging Entities</title>
       <author><first>Bhushan</first><last>Kotnis</last></author>
       <author><first>Pradeep</first><last>Bansal</last></author>
-      <author><first>Partha P.</first><last>Talukdar</last></author>
+      <author id="partha-talukdar"><first>Partha P.</first><last>Talukdar</last></author>
       <pages>2038–2043</pages>
       <url hash="3535c140">D15-1241</url>
       <doi>10.18653/v1/D15-1241</doi>
@@ -2627,7 +2627,7 @@
       <author><first>Manaal</first><last>Faruqui</last></author>
       <author><first>Wang</first><last>Ling</last></author>
       <author><first>Guillaume</first><last>Lample</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <pages>2049–2054</pages>
       <url hash="e61f78da">D15-1243</url>
       <doi>10.18653/v1/D15-1243</doi>
@@ -2648,9 +2648,9 @@
     </paper>
     <paper id="245">
       <title>Any-language frame-semantic parsing</title>
-      <author><first>Anders</first><last>Johannsen</last></author>
-      <author><first>Héctor</first><last>Martínez Alonso</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-johannsen"><first>Anders</first><last>Johannsen</last></author>
+      <author id="hector-martinez-alonso"><first>Héctor</first><last>Martínez Alonso</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>2062–2066</pages>
       <url hash="ab2a69f6">D15-1245</url>
       <doi>10.18653/v1/D15-1245</doi>
@@ -2701,7 +2701,7 @@
       <title>A Binarized Neural Network Joint Model for Machine Translation</title>
       <author><first>Jingyi</first><last>Zhang</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
       <author><first>Satoshi</first><last>Nakamura</last></author>
       <pages>2094–2099</pages>
@@ -2714,7 +2714,7 @@
       <title><fixed-case>B</fixed-case>ayesian Optimization of Text Representations</title>
       <author><first>Dani</first><last>Yogatama</last></author>
       <author><first>Lingpeng</first><last>Kong</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>2100–2105</pages>
       <url hash="88e535f1">D15-1251</url>
       <doi>10.18653/v1/D15-1251</doi>
@@ -2776,7 +2776,7 @@
     <paper id="257">
       <title>Modeling Reportable Events as Turning Points in Narrative</title>
       <author><first>Jessica</first><last>Ouyang</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>2149–2158</pages>
       <url hash="f95928f1">D15-1257</url>
       <doi>10.18653/v1/D15-1257</doi>
@@ -2797,7 +2797,7 @@
       <author><first>Wei</first><last>Gao</last></author>
       <author><first>Zhongyu</first><last>Wei</last></author>
       <author><first>Baolin</first><last>Peng</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <pages>2168–2178</pages>
       <url hash="c81a8876">D15-1259</url>
       <doi>10.18653/v1/D15-1259</doi>
@@ -2808,7 +2808,7 @@
       <author><first>Ryu</first><last>Iida</last></author>
       <author><first>Kentaro</first><last>Torisawa</last></author>
       <author><first>Chikara</first><last>Hashimoto</last></author>
-      <author><first>Jong-Hoon</first><last>Oh</last></author>
+      <author id="jong-hoon-oh"><first>Jong-Hoon</first><last>Oh</last></author>
       <author><first>Julien</first><last>Kloetzer</last></author>
       <pages>2179–2189</pages>
       <url hash="32c1771e">D15-1260</url>
@@ -2819,7 +2819,7 @@
       <title>Estimation of Discourse Segmentation Labels from Crowd Data</title>
       <author><first>Ziheng</first><last>Huang</last></author>
       <author><first>Jialu</first><last>Zhong</last></author>
-      <author><first>Rebecca J.</first><last>Passonneau</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca J.</first><last>Passonneau</last></author>
       <pages>2190–2200</pages>
       <url hash="6ce30614">D15-1261</url>
       <doi>10.18653/v1/D15-1261</doi>
@@ -2857,7 +2857,7 @@
     <paper id="265">
       <title>Wikification of Concept Mentions within Spoken Dialogues Using Domain Constraints from <fixed-case>W</fixed-case>ikipedia</title>
       <author><first>Seokhwan</first><last>Kim</last></author>
-      <author><first>Rafael E.</first><last>Banchs</last></author>
+      <author id="rafael-e-banchs"><first>Rafael E.</first><last>Banchs</last></author>
       <author><first>Haizhou</first><last>Li</last></author>
       <pages>2225–2229</pages>
       <url hash="584f6ef8">D15-1265</url>
@@ -2868,7 +2868,7 @@
       <title>Shallow Convolutional Neural Network for Implicit Discourse Relation Recognition</title>
       <author><first>Biao</first><last>Zhang</last></author>
       <author><first>Jinsong</first><last>Su</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Yaojie</first><last>Lu</last></author>
       <author><first>Hong</first><last>Duan</last></author>
       <author><first>Junfeng</first><last>Yao</last></author>
@@ -2892,7 +2892,7 @@
       <author><first>Ryuichiro</first><last>Higashinaka</last></author>
       <author><first>Masahiro</first><last>Mizukami</last></author>
       <author><first>Kotaro</first><last>Funakoshi</last></author>
-      <author><first>Masahiro</first><last>Araki</last></author>
+      <author id="masahiro-araki"><first>Masahiro</first><last>Araki</last></author>
       <author><first>Hiroshi</first><last>Tsukahara</last></author>
       <author><first>Yuka</first><last>Kobayashi</last></author>
       <pages>2243–2248</pages>
@@ -2929,7 +2929,7 @@
       <title>Adapting Coreference Resolution for Narrative Processing</title>
       <author><first>Quynh Ngoc Thi</first><last>Do</last></author>
       <author><first>Steven</first><last>Bethard</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>2262–2267</pages>
       <url hash="d29f45ca">D15-1271</url>
       <doi>10.18653/v1/D15-1271</doi>
@@ -2937,10 +2937,10 @@
     </paper>
     <paper id="272">
       <title>Joint Lemmatization and Morphological Tagging with Lemming</title>
-      <author><first>Thomas</first><last>Müller</last></author>
+      <author id="thomas-mueller"><first>Thomas</first><last>Müller</last></author>
       <author><first>Ryan</first><last>Cotterell</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>2268–2274</pages>
       <url hash="f2c94087">D15-1272</url>
       <doi>10.18653/v1/D15-1272</doi>
@@ -2950,7 +2950,7 @@
     <paper id="273">
       <title>Transducer Disambiguation with Sparse Topological Features</title>
       <author><first>Gonzalo</first><last>Iglesias</last></author>
-      <author><first>Adrià</first><last>de Gispert</last></author>
+      <author id="adria-de-gispert"><first>Adrià</first><last>de Gispert</last></author>
       <author id="bill-byrne"><first>Bill</first><last>Byrne</last></author>
       <pages>2275–2280</pages>
       <url hash="ca693204">D15-1273</url>
@@ -2960,7 +2960,7 @@
     <paper id="274">
       <title><fixed-case>A</fixed-case>rabic Diacritization with Recurrent Neural Networks</title>
       <author><first>Yonatan</first><last>Belinkov</last></author>
-      <author><first>James</first><last>Glass</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
       <pages>2281–2285</pages>
       <url hash="029da697">D15-1274</url>
       <doi>10.18653/v1/D15-1274</doi>
@@ -2998,9 +2998,9 @@
     <paper id="278">
       <title>When Are Tree Structures Necessary for Deep Learning of Representations?</title>
       <author><first>Jiwei</first><last>Li</last></author>
-      <author><first>Thang</first><last>Luong</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="minh-thang-luong"><first>Thang</first><last>Luong</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>2304–2314</pages>
       <url hash="1fbba188">D15-1278</url>
       <doi>10.18653/v1/D15-1278</doi>
@@ -3027,7 +3027,7 @@
       <author><first>Xipeng</first><last>Qiu</last></author>
       <author><first>Xinchi</first><last>Chen</last></author>
       <author><first>Shiyu</first><last>Wu</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>2326–2335</pages>
       <url hash="bd7bb0d5">D15-1280</url>
       <doi>10.18653/v1/D15-1280</doi>
@@ -3038,7 +3038,7 @@
       <title>Verbal and Nonverbal Clues for Real-life Deception Detection</title>
       <author><first>Verónica</first><last>Pérez-Rosas</last></author>
       <author><first>Mohamed</first><last>Abouelenien</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <author><first>Yao</first><last>Xiao</last></author>
       <author><first>CJ</first><last>Linton</last></author>
       <author><first>Mihai</first><last>Burzo</last></author>
@@ -3061,8 +3061,8 @@
     <paper id="283">
       <title>Co-Training for Topic Classification of Scholarly Data</title>
       <author><first>Cornelia</first><last>Caragea</last></author>
-      <author><first>Florin</first><last>Bulgarov</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="florin-bulgarov"><first>Florin</first><last>Bulgarov</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>2357–2366</pages>
       <url hash="5ea5842b">D15-1283</url>
       <doi>10.18653/v1/D15-1283</doi>
@@ -3072,9 +3072,9 @@
     <paper id="284">
       <title>Humor Recognition and Humor Anchor Extraction</title>
       <author><first>Diyi</first><last>Yang</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>2367–2376</pages>
       <url hash="7f2499bd">D15-1284</url>
       <doi>10.18653/v1/D15-1284</doi>
@@ -3098,8 +3098,8 @@
     </paper>
     <paper id="286">
       <title>A Dynamic Programming Algorithm for Computing N-gram Posteriors from Lattices</title>
-      <author><first>Doğan</first><last>Can</last></author>
-      <author><first>Shrikanth</first><last>Narayanan</last></author>
+      <author id="dogan-can"><first>Doğan</first><last>Can</last></author>
+      <author id="shrikanth-narayanan"><first>Shrikanth</first><last>Narayanan</last></author>
       <pages>2388–2397</pages>
       <url hash="65c8201c">D15-1286</url>
       <doi>10.18653/v1/D15-1286</doi>
@@ -3120,8 +3120,8 @@
       <title>Compact, Efficient and Unlimited Capacity: Language Modeling with Compressed Suffix Trees</title>
       <author><first>Ehsan</first><last>Shareghi</last></author>
       <author><first>Matthias</first><last>Petri</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>2409–2418</pages>
       <url hash="710cd2fd">D15-1288</url>
       <doi>10.18653/v1/D15-1288</doi>
@@ -3133,7 +3133,7 @@
       <title><fixed-case>ERSOM</fixed-case>: A Structural Ontology Matching Approach Using Automatically Learned Entity Representation</title>
       <author><first>Chuncheng</first><last>Xiang</last></author>
       <author><first>Tingsong</first><last>Jiang</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <author><first>Zhifang</first><last>Sui</last></author>
       <pages>2419–2429</pages>
       <url hash="9988108f">D15-1289</url>
@@ -3143,7 +3143,7 @@
     <paper id="290">
       <title>A Single Word is not Enough: Ranking Multiword Expressions Using Distributional Semantics</title>
       <author><first>Martin</first><last>Riedl</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>2430–2440</pages>
       <url hash="57ea39b6">D15-1290</url>
       <doi>10.18653/v1/D15-1290</doi>
@@ -3206,7 +3206,7 @@
     <paper id="297">
       <title><fixed-case>JEAM</fixed-case>: A Novel Model for Cross-Domain Sentiment Classification Based on Emotion Analysis</title>
       <author><first>Kun-Hu</first><last>Luo</last></author>
-      <author><first>Zhi-Hong</first><last>Deng</last></author>
+      <author id="zhi-hong-deng"><first>Zhi-Hong</first><last>Deng</last></author>
       <author><first>Hongliang</first><last>Yu</last></author>
       <author><first>Liang-Chen</first><last>Wei</last></author>
       <pages>2503–2508</pages>
@@ -3238,7 +3238,7 @@
       <author><first>Raksha</first><last>Sharma</last></author>
       <author><first>Mohit</first><last>Gupta</last></author>
       <author><first>Astha</first><last>Agarwal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>2520–2526</pages>
       <url hash="8821ab0c">D15-1300</url>
       <doi>10.18653/v1/D15-1300</doi>
@@ -3256,11 +3256,11 @@
     </paper>
     <paper id="302">
       <title>A Multi-lingual Annotated Dataset for Aspect-Oriented Opinion Mining</title>
-      <author><first>Salud M.</first><last>Jiménez Zafra</last></author>
+      <author id="salud-maria-jimenez-zafra"><first>Salud M.</first><last>Jiménez Zafra</last></author>
       <author><first>Giacomo</first><last>Berardi</last></author>
       <author><first>Andrea</first><last>Esuli</last></author>
       <author><first>Diego</first><last>Marcheggiani</last></author>
-      <author><first>María Teresa</first><last>Martín-Valdivia</last></author>
+      <author id="m-teresa-martin-valdivia"><first>María Teresa</first><last>Martín-Valdivia</last></author>
       <author><first>Alejandro</first><last>Moreo Fernández</last></author>
       <pages>2533–2538</pages>
       <url hash="b9aa0917">D15-1302</url>
@@ -3271,7 +3271,7 @@
       <title>Deep Convolutional Neural Network Textual Features and Multiple Kernel Learning for Utterance-level Multimodal Sentiment Analysis</title>
       <author><first>Soujanya</first><last>Poria</last></author>
       <author><first>Erik</first><last>Cambria</last></author>
-      <author><first>Alexander</first><last>Gelbukh</last></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last></author>
       <pages>2539–2544</pages>
       <url hash="d458309e">D15-1303</url>
       <doi>10.18653/v1/D15-1303</doi>
@@ -3280,7 +3280,7 @@
     <paper id="304">
       <title><fixed-case>SLSA</fixed-case>: A Sentiment Lexicon for <fixed-case>S</fixed-case>tandard <fixed-case>A</fixed-case>rabic</title>
       <author><first>Ramy</first><last>Eskander</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>2545–2550</pages>
       <url hash="4ef8c651">D15-1304</url>
       <doi>10.18653/v1/D15-1304</doi>
@@ -3310,7 +3310,7 @@
     <paper id="307">
       <title>Detection of Steganographic Techniques on <fixed-case>T</fixed-case>witter</title>
       <author><first>Alex</first><last>Wilson</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
       <author><first>Andrew</first><last>Ker</last></author>
       <pages>2564–2569</pages>
       <url hash="d0bf77b2">D15-1307</url>
@@ -3332,8 +3332,8 @@
     <paper id="309">
       <title>An Analysis of Domestic Abuse Discourse on <fixed-case>R</fixed-case>eddit</title>
       <author><first>Nicolas</first><last>Schrading</last></author>
-      <author><first>Cecilia</first><last>Ovesdotter Alm</last></author>
-      <author><first>Ray</first><last>Ptucha</last></author>
+      <author id="cecilia-ovesdotter-alm"><first>Cecilia</first><last>Ovesdotter Alm</last></author>
+      <author id="raymond-ptucha"><first>Ray</first><last>Ptucha</last></author>
       <author><first>Christopher</first><last>Homan</last></author>
       <pages>2577–2583</pages>
       <url hash="bc09c37e">D15-1309</url>
@@ -3353,8 +3353,8 @@
     <paper id="311">
       <title>Classifying Tweet Level Judgements of Rumours in Social Media</title>
       <author><first>Michal</first><last>Lukasik</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
       <pages>2590–2595</pages>
       <url hash="02cfec57">D15-1311</url>
       <doi>10.18653/v1/D15-1311</doi>
@@ -3404,7 +3404,7 @@ The major aim of the tutorial is to provide NLP researchers with an introduction
       <title>Transparent Machine Learning for Information Extraction: State-of-the-Art and the Future</title>
       <author><first>Laura</first><last>Chiticariu</last></author>
       <author><first>Yunyao</first><last>Li</last></author>
-      <author><first>Frederick</first><last>Reiss</last></author>
+      <author id="frederick-reiss"><first>Frederick</first><last>Reiss</last></author>
       <abstract>The rise of Big Data analytics over unstructured text has led to renewed interest in information extraction (IE). These applications need effective IE as a first step towards solving end-to-end real world problems (e.g. biology, medicine, finance, media and entertainment, etc). Much recent NLP research has focused on addressing specific IE problems using a pipeline of multiple machine learning techniques. This approach requires an analyst with the expertise to answer questions such as: “What ML techniques should I combine to solve this problem?”; “What features will be useful for the composite pipeline?”; and “Why is my model giving the wrong answer on this document?”. The need for this expertise creates problems in real world applications. It is very difficult in practice to find an analyst who both understands the real world problem and has deep knowledge of applied machine learning. As a result, the real impact by current IE research does not match up to the abundant opportunities available.
 
 In this tutorial, we introduce the concept of transparent machine learning. A transparent ML technique is one that:
@@ -3418,16 +3418,16 @@ The tutorial is aimed at IE researchers in both the academic and industry commun
     </paper>
     <paper id="4">
       <title>Knowledge Acquisition for Web Search</title>
-      <author><first>Marius</first><last>Pasca</last></author>
+      <author id="marius-pasca"><first>Marius</first><last>Pasca</last></author>
       <abstract>The identification of textual items, or documents, that best match a user’s information need, as expressed in search queries, forms the core functionality of information retrieval systems. Well-known challenges are associated with understanding the intent behind user queries; and, more importantly, with matching inherently-ambiguous queries to documents that may employ lexically different phrases to convey the same meaning. The conversion of semi-structured content from Wikipedia and other resources into structured data produces knowledge potentially more suitable to database-style queries and, ideally, to use in information retrieval. In parallel, the availability of textual documents on the Web enables an aggressive push towards the automatic acquisition of various types of knowledge from text. Methods developed under the umbrella of open-domain information extraction acquire open-domain classes of instances and relations from Web text. The methods operate over unstructured or semi-structured text available within collections of Web documents, or over relatively more intriguing streams of anonymized search queries. Some of the methods import the automatically-extracted data into human-generated resources, or otherwise exploit existing human-generated resources. In both cases, the goal is to expand the coverage of the initial resources, thus providing information about more of the topics that people in general, and Web search users in particular, may be interested in.</abstract>
       <bibkey>pasca-2015-knowledge</bibkey>
     </paper>
     <paper id="5">
       <title>Learning Semantic Relations from Text</title>
-      <author><first>Preslav</first><last>Nakov</last></author>
-      <author><first>Vivi</first><last>Nastase</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
+      <author id="vivi-nastase"><first>Vivi</first><last>Nastase</last></author>
       <author><first>Diarmuid</first><last>Ó Séaghdha</last></author>
-      <author><first>Stan</first><last>Szpakowicz</last></author>
+      <author id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></author>
       <abstract>Every non-trivial text describes interactions and relations between people, institutions, activities, events and so on. What we know about the world consists in large part of such relations, and that knowledge contributes to the understanding of what texts refer to. Newly found relations can in turn become part of this knowledge that is stored for future use.
 
 To grasp a text’s semantic content, an automatic system must be able to recognize relations in texts and reason about them. This may be done by applying and updating previously acquired knowledge. We focus here in particular on semantic relations which describe the interactions among nouns and compact noun phrases, and we present such relations from both a theoretical and a practical perspective. The theoretical exploration sketches the historical path which has brought us to the contemporary view and interpretation of semantic relations. We discuss a wide range of relation inventories proposed by linguists and by language processing people. Such inventories vary by domain, granularity and suitability for downstream applications.
@@ -3438,7 +3438,7 @@ On the practical side, we investigate the recognition and acquisition of relatio
     <paper id="6">
       <title>Applications of Social Media Text Analysis</title>
       <author><first>Atefeh</first><last>Farzindar</last></author>
-      <author><first>Diana</first><last>Inkpen</last></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last></author>
       <abstract>Analyzing social media texts is a complex problem that becomes difficult to address using traditional Natural Language Processing (NLP) methods. Our tutorial focuses on presenting new methods for NLP tasks and applications that work on noisy and informal texts, such as the ones from social media.
 
 Automatic processing of large collections of social media texts is important because they contain a lot of useful information, due to the in-creasing popularity of all types of social media. Use of social media and messaging apps grew 203 percent year-on-year in 2013, with overall app use rising 115 percent over the same period, as reported by Statista, citing data from Flurry Analytics. This growth means that 1.61 billion people are now active in social media around the world and this is expected to advance to 2 billion users in 2016, led by India. The research shows that consumers are now spending daily 5.6 hours on digital media including social media and mo-bile internet usage.
@@ -3448,7 +3448,7 @@ At the heart of this interest is the ability for users to create and share conte
     </paper>
     <paper id="7">
       <title>Robust Semantic Analysis of Multiword Expressions with <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et</title>
-      <author><first>Miriam R. L.</first><last>Petruck</last></author>
+      <author id="miriam-r-l-petruck"><first>Miriam R. L.</first><last>Petruck</last></author>
       <author><first>Valia</first><last>Kordoni</last></author>
       <abstract>This tutorial will give participants a solid understanding of the linguistic features of multiword expressions (MWEs), focusing on the semantics of such expressions and their importance for natural language processing and language technology, with particular attention to the way that FrameNet (framenet.icsi.berkeley.edu) handles this wide spread phenomenon. Our target audience includes researchers and practitioners of language technology, not necessarily experts in MWEs or knowledgeable about FrameNet, who are interested in NLP tasks that involve or could benefit from considering MWEs as a pervasive phenomenon in human language and communication.
 
@@ -3461,8 +3461,8 @@ While native speakers use these forms with ease, the treatment and interpretatio
     </paper>
     <paper id="8">
       <title>Computational Analysis of Affect and Emotion in Language</title>
-      <author><first>Saif</first><last>Mohammad</last></author>
-      <author><first>Cecilia</first><last>Ovesdotter Alm</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
+      <author id="cecilia-ovesdotter-alm"><first>Cecilia</first><last>Ovesdotter Alm</last></author>
       <abstract>Computational linguistics has witnessed a surge of interest in approaches to emotion and affect analysis, tackling problems that extend beyond sentiment analysis in depth and complexity. This area involves basic emotions (such as joy, sadness, and fear) as well as any of the hundreds of other emotions humans are capable of (such as optimism, frustration, and guilt), expanding into affective conditions, experiences, and activities. Leveraging linguistic data for computational affect and emotion inference enables opportunities to address a range of affect-related tasks, problems, and non-invasive applications that capture aspects essential to the human condition and individuals’ cognitive processes. These efforts enable and facilitate human-centered computing experiences, as demonstrated by applications across clinical, socio-political, artistic, educational, and commercial domains. Efforts to computationally detect, characterize, and generate emotions or affect-related phenomena respond equally to technological needs for personalized, micro-level analytics and broad-coverage, macro-level inference, and they have involved both small and massive amounts of data.
 
 While this is an exciting area with numerous opportunities for members of the ACL community, a major obstacle is its intersection with other investigatory traditions, necessitating knowledge transfer. This tutorial comprehensively integrates relevant concepts and frameworks from linguistics, cognitive science, affective computing, and computational linguistics in order to equip researchers and practitioners with the adequate background and knowledge to work effectively on problems and tasks either directly involving, or benefiting from having an understanding of, affect and emotion analysis.
diff --git a/data/xml/D16.xml b/data/xml/D16.xml
index cf24e6a389..75dd3e594d 100644
--- a/data/xml/D16.xml
+++ b/data/xml/D16.xml
@@ -63,9 +63,9 @@
       <title>Distinguishing Past, On-going, and Future Events: The <fixed-case>E</fixed-case>vent<fixed-case>S</fixed-case>tatus Corpus</title>
       <author><first>Ruihong</first><last>Huang</last></author>
       <author><first>Ignacio</first><last>Cases</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <author><first>Cleo</first><last>Condoravdi</last></author>
-      <author><first>Ellen</first><last>Riloff</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
       <pages>44–54</pages>
       <url hash="60bd307a">D16-1005</url>
       <doi>10.18653/v1/D16-1005</doi>
@@ -76,7 +76,7 @@
       <title>Nested Propositions in Open Information Extraction</title>
       <author><first>Nikita</first><last>Bhutani</last></author>
       <author><first>H. V.</first><last>Jagadish</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <pages>55–64</pages>
       <url hash="fef228d4">D16-1006</url>
       <doi>10.18653/v1/D16-1006</doi>
@@ -88,7 +88,7 @@
       <author><first>Yunlun</first><last>Yang</last></author>
       <author><first>Yunhai</first><last>Tong</last></author>
       <author><first>Shulei</first><last>Ma</last></author>
-      <author><first>Zhi-Hong</first><last>Deng</last></author>
+      <author id="zhi-hong-deng"><first>Zhi-Hong</first><last>Deng</last></author>
       <pages>65–74</pages>
       <url hash="a0efb0e6">D16-1007</url>
       <doi>10.18653/v1/D16-1007</doi>
@@ -118,7 +118,7 @@
     <paper id="10">
       <title>Comparing Computational Cognitive Models of Generalization in a Language Acquisition Task</title>
       <author><first>Libby</first><last>Barak</last></author>
-      <author><first>Adele E.</first><last>Goldberg</last></author>
+      <author id="adele-goldberg"><first>Adele E.</first><last>Goldberg</last></author>
       <author><first>Suzanne</first><last>Stevenson</last></author>
       <pages>96–106</pages>
       <url hash="65279e11">D16-1010</url>
@@ -141,7 +141,7 @@
       <title>Deep Multi-Task Learning with Shared Memory for Text Classification</title>
       <author><first>Pengfei</first><last>Liu</last></author>
       <author><first>Xipeng</first><last>Qiu</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>118–127</pages>
       <url hash="3fdc1576">D16-1012</url>
       <doi>10.18653/v1/D16-1012</doi>
@@ -166,7 +166,7 @@
       <title>Creating Causal Embeddings for Question Answering with Minimal Supervision</title>
       <author><first>Rebecca</first><last>Sharp</last></author>
       <author><first>Mihai</first><last>Surdeanu</last></author>
-      <author><first>Peter</first><last>Jansen</last></author>
+      <author id="peter-jansen"><first>Peter</first><last>Jansen</last></author>
       <author><first>Peter</first><last>Clark</last></author>
       <author><first>Michael</first><last>Hammond</last></author>
       <pages>138–148</pages>
@@ -203,7 +203,7 @@
       <title>Event participant modelling with neural networks</title>
       <author><first>Ottokar</first><last>Tilk</last></author>
       <author><first>Vera</first><last>Demberg</last></author>
-      <author><first>Asad</first><last>Sayeed</last></author>
+      <author id="asad-sayeed"><first>Asad</first><last>Sayeed</last></author>
       <author><first>Dietrich</first><last>Klakow</last></author>
       <author><first>Stefan</first><last>Thater</last></author>
       <pages>171–182</pages>
@@ -306,8 +306,8 @@
     <paper id="26">
       <title>Zero-Resource Translation with Multi-Lingual Neural Machine Translation</title>
       <author><first>Orhan</first><last>Firat</last></author>
-      <author><first>Baskaran</first><last>Sankaran</last></author>
-      <author><first>Yaser</first><last>Al-onaizan</last></author>
+      <author id="baskaran-sankaran"><first>Baskaran</first><last>Sankaran</last></author>
+      <author id="yaser-al-onaizan"><first>Yaser</first><last>Al-onaizan</last></author>
       <author><first>Fatos T.</first><last>Yarman Vural</last></author>
       <author><first>Kyunghyun</first><last>Cho</last></author>
       <pages>268–277</pages>
@@ -331,9 +331,9 @@
     <paper id="28">
       <title>Semi-Supervised Learning of Sequence Models with Method of Moments</title>
       <author><first>Zita</first><last>Marinho</last></author>
-      <author><first>André F. T.</first><last>Martins</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>287–296</pages>
       <url hash="c1b9feae">D16-1028</url>
       <doi>10.18653/v1/D16-1028</doi>
@@ -345,7 +345,7 @@
       <author><first>Shyam</first><last>Upadhyay</last></author>
       <author><first>Ming-Wei</first><last>Chang</last></author>
       <author><first>Kai-Wei</first><last>Chang</last></author>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <pages>297–306</pages>
       <url hash="c6b1c69b">D16-1029</url>
       <doi>10.18653/v1/D16-1029</doi>
@@ -366,7 +366,7 @@
     <paper id="31">
       <title>Language as a Latent Variable: Discrete Generative Models for Sentence Compression</title>
       <author><first>Yishu</first><last>Miao</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
       <pages>319–328</pages>
       <url hash="86c59ce0">D16-1031</url>
       <doi>10.18653/v1/D16-1031</doi>
@@ -375,8 +375,8 @@
     </paper>
     <paper id="32">
       <title>Globally Coherent Text Generation with Neural Checklist Models</title>
-      <author><first>Chloé</first><last>Kiddon</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="chloe-kiddon"><first>Chloé</first><last>Kiddon</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <author><first>Yejin</first><last>Choi</last></author>
       <pages>329–339</pages>
       <url hash="27f421d9">D16-1032</url>
@@ -388,7 +388,7 @@
       <title>A Dataset and Evaluation Metrics for Abstractive Compression of Sentences and Short Paragraphs</title>
       <author><first>Kristina</first><last>Toutanova</last></author>
       <author><first>Chris</first><last>Brockett</last></author>
-      <author><first>Ke M.</first><last>Tran</last></author>
+      <author id="ke-m-tran"><first>Ke M.</first><last>Tran</last></author>
       <author><first>Saleema</first><last>Amershi</last></author>
       <pages>340–350</pages>
       <url hash="c1cdeba7">D16-1033</url>
@@ -410,9 +410,9 @@
     </paper>
     <paper id="35">
       <title>Discourse Parsing with Attention-based Hierarchical Neural Networks</title>
-      <author id="qi-li"><first>Qi</first><last>Li</last></author>
+      <author><first>Qi</first><last>Li</last></author>
       <author><first>Tianshi</first><last>Li</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <pages>362–371</pages>
       <url hash="1b7c678c">D16-1035</url>
       <doi>10.18653/v1/D16-1035</doi>
@@ -436,7 +436,7 @@
     <paper id="37">
       <title>Variational Neural Discourse Relation Recognizer</title>
       <author><first>Biao</first><last>Zhang</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Jinsong</first><last>Su</last></author>
       <author><first>Qun</first><last>Liu</last></author>
       <author><first>Rongrong</first><last>Ji</last></author>
@@ -460,10 +460,10 @@
     </paper>
     <paper id="39">
       <title>Learning Term Embeddings for Taxonomic Relation Identification Using Dynamic Weighting Neural Network</title>
-      <author><last>Luu</last> <first>Anh Tuan</first></author>
-      <author><first>Yi</first><last>Tay</last></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last></author>
+      <author><last>Tay</last><first>Yi</first></author>
       <author><first>Siu Cheung</first><last>Hui</last></author>
-      <author><first>See Kiong</first><last>Ng</last></author>
+      <author id="see-kiong-ng"><first>See Kiong</first><last>Ng</last></author>
       <pages>403–413</pages>
       <url hash="01cc066c">D16-1039</url>
       <doi>10.18653/v1/D16-1039</doi>
@@ -473,7 +473,7 @@
       <title>Relation Schema Induction using Tensor Factorization with Side Information</title>
       <author><first>Madhav</first><last>Nimishakavi</last></author>
       <author><first>Uday Singh</first><last>Saini</last></author>
-      <author><first>Partha</first><last>Talukdar</last></author>
+      <author id="partha-talukdar"><first>Partha</first><last>Talukdar</last></author>
       <pages>414–423</pages>
       <url hash="792585fa">D16-1040</url>
       <doi>10.18653/v1/D16-1040</doi>
@@ -481,8 +481,8 @@
     </paper>
     <paper id="41">
       <title>Supervised Distributional Hypernym Discovery via Domain Adaptation</title>
-      <author><first>Luis</first><last>Espinosa-Anke</last></author>
-      <author><first>Jose</first><last>Camacho-Collados</last></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa-Anke</last></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></author>
       <author><first>Claudio</first><last>Delli Bovi</last></author>
       <author><first>Horacio</first><last>Saggion</last></author>
       <pages>424–435</pages>
@@ -501,7 +501,7 @@
     <paper id="43">
       <title>Comparing Data Sources and Architectures for Deep Visual Representation Learning in Semantics</title>
       <author><first>Douwe</first><last>Kiela</last></author>
-      <author><first>Anita Lilla</first><last>Verő</last></author>
+      <author id="anita-lilla-vero"><first>Anita Lilla</first><last>Verő</last></author>
       <author><first>Stephen</first><last>Clark</last></author>
       <pages>447–456</pages>
       <url hash="0af384f2">D16-1043</url>
@@ -559,7 +559,7 @@
       <title>Automatic Cross-Lingual Similarization of Dependency Grammars for Tree-based Machine Translation</title>
       <author><first>Wenbin</first><last>Jiang</last></author>
       <author><first>Wen</first><last>Zhang</last></author>
-      <author><first>Jinan</first><last>Xu</last></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last></author>
       <author><first>Rangjia</first><last>Cai</last></author>
       <pages>501–510</pages>
       <url hash="6b037471">D16-1048</url>
@@ -581,7 +581,7 @@
     <paper id="50">
       <title>Variational Neural Machine Translation</title>
       <author><first>Biao</first><last>Zhang</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Jinsong</first><last>Su</last></author>
       <author><first>Hong</first><last>Duan</last></author>
       <author><first>Min</first><last>Zhang</last></author>
@@ -593,7 +593,7 @@
     <paper id="51">
       <title>Towards a Convex <fixed-case>HMM</fixed-case> Surrogate for Word Alignment</title>
       <author><first>Andrei</first><last>Simion</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <author><first>Cliff</first><last>Stein</last></author>
       <pages>531–540</pages>
       <url hash="75c60288">D16-1051</url>
@@ -640,7 +640,7 @@
     </paper>
     <paper id="55">
       <title>Learning to Translate for Multilingual Question Answering</title>
-      <author><first>Ferhan</first><last>Ture</last></author>
+      <author id="ferhan-ture"><first>Ferhan</first><last>Ture</last></author>
       <author><first>Elizabeth</first><last>Boschee</last></author>
       <pages>573–584</pages>
       <url hash="c5f70158">D16-1055</url>
@@ -662,7 +662,7 @@
       <author><first>William L.</first><last>Hamilton</last></author>
       <author><first>Kevin</first><last>Clark</last></author>
       <author><first>Jure</first><last>Leskovec</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <pages>595–605</pages>
       <url hash="d9039f73">D16-1057</url>
       <doi>10.18653/v1/D16-1057</doi>
@@ -713,7 +713,7 @@
     </paper>
     <paper id="62">
       <title>Building an Evaluation Scale using Item Response Theory</title>
-      <author><first>John P.</first><last>Lalor</last></author>
+      <author id="john-p-lalor"><first>John P.</first><last>Lalor</last></author>
       <author><first>Hao</first><last>Wu</last></author>
       <author><first>Hong</first><last>Yu</last></author>
       <pages>648–657</pages>
@@ -727,7 +727,7 @@
       <author><first>Hyokun</first><last>Yun</last></author>
       <author><first>Pinar</first><last>Yanardag</last></author>
       <author><first>Shin</first><last>Matsushima</last></author>
-      <author><first>S. V. N.</first><last>Vishwanathan</last></author>
+      <author id="s-v-n-vishwanathan"><first>S. V. N.</first><last>Vishwanathan</last></author>
       <pages>658–668</pages>
       <url hash="e5c580a8">D16-1063</url>
       <doi>10.18653/v1/D16-1063</doi>
@@ -769,7 +769,7 @@
     <paper id="67">
       <title>Enhanced Personalized Search using Social Data</title>
       <author><first>Dong</first><last>Zhou</last></author>
-      <author><first>Séamus</first><last>Lawless</last></author>
+      <author id="seamus-lawless"><first>Séamus</first><last>Lawless</last></author>
       <author><first>Xuan</first><last>Wu</last></author>
       <author><first>Wenyu</first><last>Zhao</last></author>
       <author><first>Jianxun</first><last>Liu</last></author>
@@ -794,7 +794,7 @@
       <author><first>Jin</first><last>Qian</last></author>
       <author><first>Ya</first><last>Guo</last></author>
       <author><first>Yaqian</first><last>Zhou</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>721–730</pages>
       <url hash="8f1d8630">D16-1069</url>
       <doi>10.18653/v1/D16-1069</doi>
@@ -813,8 +813,8 @@
     <paper id="71">
       <title><fixed-case>LAMB</fixed-case>: A Good Shepherd of Morphologically Rich Languages</title>
       <author><first>Sebastian</first><last>Ebert</last></author>
-      <author><first>Thomas</first><last>Müller</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="thomas-mueller"><first>Thomas</first><last>Müller</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>742–752</pages>
       <url hash="1f8f5298">D16-1071</url>
       <doi>10.18653/v1/D16-1071</doi>
@@ -856,7 +856,7 @@
       <title>News Stream Summarization using Burst Information Networks</title>
       <author><first>Tao</first><last>Ge</last></author>
       <author><first>Lei</first><last>Cui</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <author><first>Sujian</first><last>Li</last></author>
       <author><first>Ming</first><last>Zhou</last></author>
       <author><first>Zhifang</first><last>Sui</last></author>
@@ -868,8 +868,8 @@
     <paper id="76">
       <title>Rationale-Augmented Convolutional Neural Networks for Text Classification</title>
       <author><first>Ye</first><last>Zhang</last></author>
-      <author><first>Iain</first><last>Marshall</last></author>
-      <author><first>Byron C.</first><last>Wallace</last></author>
+      <author id="iain-marshall"><first>Iain</first><last>Marshall</last></author>
+      <author id="byron-c-wallace"><first>Byron C.</first><last>Wallace</last></author>
       <pages>795–804</pages>
       <url hash="3b1118be">D16-1076</url>
       <doi>10.18653/v1/D16-1076</doi>
@@ -878,7 +878,7 @@
     <paper id="77">
       <title>Transferring User Interests Across Websites with Unstructured Text for Cold-Start Recommendation</title>
       <author><first>Yu-Yang</first><last>Huang</last></author>
-      <author><first>Shou-De</first><last>Lin</last></author>
+      <author id="shou-de-lin"><first>Shou-De</first><last>Lin</last></author>
       <pages>805–814</pages>
       <url hash="b35ac042">D16-1077</url>
       <doi>10.18653/v1/D16-1077</doi>
@@ -888,8 +888,8 @@
       <title>Speculation and Negation Scope Detection via Convolutional Neural Networks</title>
       <author><first>Zhong</first><last>Qian</last></author>
       <author><first>Peifeng</first><last>Li</last></author>
-      <author><first>Qiaoming</first><last>Zhu</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <author><first>Zhunchen</first><last>Luo</last></author>
       <author><first>Wei</first><last>Luo</last></author>
       <pages>815–825</pages>
@@ -902,7 +902,7 @@
       <title>Analyzing Linguistic Knowledge in Sequential Model of Sentence</title>
       <author><first>Peng</first><last>Qian</last></author>
       <author><first>Xipeng</first><last>Qiu</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>826–835</pages>
       <url hash="adec8b2f">D16-1079</url>
       <doi>10.18653/v1/D16-1079</doi>
@@ -913,7 +913,7 @@
       <author><first>Qi</first><last>Zhang</last></author>
       <author><first>Yang</first><last>Wang</last></author>
       <author><first>Yeyun</first><last>Gong</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>836–845</pages>
       <url hash="874bace7">D16-1080</url>
       <doi>10.18653/v1/D16-1080</doi>
@@ -933,7 +933,7 @@
     </paper>
     <paper id="82">
       <title>Structured prediction models for <fixed-case>RNN</fixed-case> based sequence labeling in clinical text</title>
-      <author><first>Abhyuday</first><last>Jagannatha</last></author>
+      <author id="abhyuday-jagannatha"><first>Abhyuday</first><last>Jagannatha</last></author>
       <author><first>Hong</first><last>Yu</last></author>
       <pages>856–865</pages>
       <url hash="7326691e">D16-1082</url>
@@ -954,9 +954,9 @@
     <paper id="84">
       <title>Stance Detection with Bidirectional Conditional Encoding</title>
       <author><first>Isabelle</first><last>Augenstein</last></author>
-      <author><first>Tim</first><last>Rocktäschel</last></author>
+      <author id="tim-rocktaschel"><first>Tim</first><last>Rocktäschel</last></author>
       <author><first>Andreas</first><last>Vlachos</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
       <pages>876–885</pages>
       <url hash="87141af1">D16-1084</url>
       <doi>10.18653/v1/D16-1084</doi>
@@ -965,7 +965,7 @@
     <paper id="85">
       <title>Modeling Skip-Grams for Event Detection with Convolutional Neural Networks</title>
       <author><first>Thien Huu</first><last>Nguyen</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <pages>886–891</pages>
       <url hash="bd126d05">D16-1085</url>
       <doi>10.18653/v1/D16-1085</doi>
@@ -988,7 +988,7 @@
       <author><first>Gabriela</first><last>Ferraro</last></author>
       <author><first>Liyuan</first><last>Zhou</last></author>
       <author><first>Weiwei</first><last>Hou</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>899–905</pages>
       <url hash="1e038ba1">D16-1087</url>
       <doi>10.18653/v1/D16-1087</doi>
@@ -1063,8 +1063,8 @@
     <paper id="94">
       <title>Richer Interpolative Smoothing Based on Modified <fixed-case>K</fixed-case>neser-<fixed-case>N</fixed-case>ey Language Modeling</title>
       <author><first>Ehsan</first><last>Shareghi</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>944–949</pages>
       <url hash="a8785352">D16-1094</url>
       <doi>10.18653/v1/D16-1094</doi>
@@ -1084,9 +1084,9 @@
     <paper id="96">
       <title>Coverage Embedding Models for Neural Machine Translation</title>
       <author><first>Haitao</first><last>Mi</last></author>
-      <author><first>Baskaran</first><last>Sankaran</last></author>
+      <author id="baskaran-sankaran"><first>Baskaran</first><last>Sankaran</last></author>
       <author><first>Zhiguo</first><last>Wang</last></author>
-      <author><first>Abe</first><last>Ittycheriah</last></author>
+      <author id="abe-ittycheriah"><first>Abe</first><last>Ittycheriah</last></author>
       <pages>955–960</pages>
       <url hash="2f93b7ff">D16-1096</url>
       <doi>10.18653/v1/D16-1096</doi>
@@ -1094,9 +1094,9 @@
     </paper>
     <paper id="97">
       <title>Neural Morphological Analysis: Encoding-Decoding Canonical Segments</title>
-      <author><first>Katharina</first><last>Kann</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
       <author><first>Ryan</first><last>Cotterell</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>961–967</pages>
       <url hash="9034db59">D16-1097</url>
       <doi>10.18653/v1/D16-1097</doi>
@@ -1169,8 +1169,8 @@
       <author><first>Aditya</first><last>Joshi</last></author>
       <author><first>Vaibhav</first><last>Tripathi</last></author>
       <author><first>Kevin</first><last>Patel</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
-      <author><first>Mark</first><last>Carman</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="mark-carman"><first>Mark</first><last>Carman</last></author>
       <pages>1006–1011</pages>
       <url hash="e16ffae6">D16-1104</url>
       <doi>10.18653/v1/D16-1104</doi>
@@ -1210,7 +1210,7 @@
     <paper id="108">
       <title>Characterizing the Language of Online Communities and its Relation to Community Reception</title>
       <author><first>Trang</first><last>Tran</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
       <pages>1030–1035</pages>
       <url hash="a42ea618">D16-1108</url>
       <doi>10.18653/v1/D16-1108</doi>
@@ -1220,7 +1220,7 @@
       <title>Joint Transition-based Dependency Parsing and Disfluency Detection for Automatic Speech Recognition Texts</title>
       <author><first>Masashi</first><last>Yoshikawa</last></author>
       <author><first>Hiroyuki</first><last>Shindo</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>1036–1041</pages>
       <url hash="9d078577">D16-1109</url>
       <doi>10.18653/v1/D16-1109</doi>
@@ -1252,7 +1252,7 @@
       <title>Neural Headline Generation on <fixed-case>A</fixed-case>bstract <fixed-case>M</fixed-case>eaning <fixed-case>R</fixed-case>epresentation</title>
       <author><first>Sho</first><last>Takase</last></author>
       <author><first>Jun</first><last>Suzuki</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Tsutomu</first><last>Hirao</last></author>
       <author><first>Masaaki</first><last>Nagata</last></author>
       <pages>1054–1059</pages>
@@ -1294,10 +1294,10 @@
       <title>Semantic Parsing with Semi-Supervised Sequential Autoencoders</title>
       <author><first>Tomáš</first><last>Kočiský</last></author>
       <author><first>Gábor</first><last>Melis</last></author>
-      <author><first>Edward</first><last>Grefenstette</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="edward-grefenstette"><first>Edward</first><last>Grefenstette</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <author><first>Wang</first><last>Ling</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
       <author><first>Karl Moritz</first><last>Hermann</last></author>
       <pages>1078–1087</pages>
       <url hash="46c03803">D16-1116</url>
@@ -1365,9 +1365,9 @@
     <paper id="122">
       <title>Detecting and Characterizing Events</title>
       <author><first>Allison</first><last>Chaney</last></author>
-      <author><first>Hanna</first><last>Wallach</last></author>
+      <author id="hanna-wallach"><first>Hanna</first><last>Wallach</last></author>
       <author><first>Matthew</first><last>Connelly</last></author>
-      <author><first>David</first><last>Blei</last></author>
+      <author id="david-blei"><first>David</first><last>Blei</last></author>
       <pages>1142–1152</pages>
       <url hash="b2669e91">D16-1122</url>
       <doi>10.18653/v1/D16-1122</doi>
@@ -1377,9 +1377,9 @@
     </paper>
     <paper id="123">
       <title>Convolutional Neural Network Language Models</title>
-      <author><first>Ngoc-Quan</first><last>Pham</last></author>
-      <author><first>German</first><last>Kruszewski</last></author>
-      <author><first>Gemma</first><last>Boleda</last></author>
+      <author id="ngoc-quan-pham"><first>Ngoc-Quan</first><last>Pham</last></author>
+      <author id="german-kruszewski"><first>German</first><last>Kruszewski</last></author>
+      <author id="gemma-boleda"><first>Gemma</first><last>Boleda</last></author>
       <pages>1153–1162</pages>
       <url hash="0201341d">D16-1123</url>
       <doi>10.18653/v1/D16-1123</doi>
@@ -1389,7 +1389,7 @@
     <paper id="124">
       <title>Generalizing and Hybridizing Count-based and Neural Language Models</title>
       <author><first>Graham</first><last>Neubig</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <pages>1163–1172</pages>
       <url hash="02a05668">D16-1124</url>
       <doi>10.18653/v1/D16-1124</doi>
@@ -1423,7 +1423,7 @@
       <author><first>Jiwei</first><last>Li</last></author>
       <author><first>Will</first><last>Monroe</last></author>
       <author><first>Alan</first><last>Ritter</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <author><first>Michel</first><last>Galley</last></author>
       <author><first>Jianfeng</first><last>Gao</last></author>
       <pages>1192–1202</pages>
@@ -1465,7 +1465,7 @@
     </paper>
     <paper id="131">
       <title>Antecedent Selection for Sluicing: Structure and Content</title>
-      <author><first>Pranav</first><last>Anand</last></author>
+      <author id="pranav-anand"><first>Pranav</first><last>Anand</last></author>
       <author><first>Daniel</first><last>Hardt</last></author>
       <pages>1234–1243</pages>
       <url hash="c0ab1a5b">D16-1131</url>
@@ -1477,7 +1477,7 @@
       <title>Intra-Sentential Subject Zero Anaphora Resolution using Multi-Column Convolutional Neural Network</title>
       <author><first>Ryu</first><last>Iida</last></author>
       <author><first>Kentaro</first><last>Torisawa</last></author>
-      <author><first>Jong-Hoon</first><last>Oh</last></author>
+      <author id="jong-hoon-oh"><first>Jong-Hoon</first><last>Oh</last></author>
       <author><first>Canasai</first><last>Kruengkrai</last></author>
       <author><first>Julien</first><last>Kloetzer</last></author>
       <pages>1244–1254</pages>
@@ -1490,7 +1490,7 @@
       <title>An Unsupervised Probability Model for Speech-to-Translation Alignment of Low-Resource Languages</title>
       <author><first>Antonios</first><last>Anastasopoulos</last></author>
       <author><first>David</first><last>Chiang</last></author>
-      <author><first>Long</first><last>Duong</last></author>
+      <author id="long-duong"><first>Long</first><last>Duong</last></author>
       <pages>1255–1263</pages>
       <url hash="adf17778">D16-1133</url>
       <doi>10.18653/v1/D16-1133</doi>
@@ -1501,7 +1501,7 @@
       <title><fixed-case>HUME</fixed-case>: Human <fixed-case>UCCA</fixed-case>-Based Evaluation of Machine Translation</title>
       <author><first>Alexandra</first><last>Birch</last></author>
       <author><first>Omri</first><last>Abend</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <author><first>Barry</first><last>Haddow</last></author>
       <pages>1264–1274</pages>
       <url hash="b3b343dd">D16-1134</url>
@@ -1512,7 +1512,7 @@
     <paper id="135">
       <title>Improving Multilingual Named Entity Recognition with <fixed-case>W</fixed-case>ikipedia Entity Type Mapping</title>
       <author><first>Jian</first><last>Ni</last></author>
-      <author><first>Radu</first><last>Florian</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
       <pages>1275–1284</pages>
       <url hash="9604d3c2">D16-1135</url>
       <doi>10.18653/v1/D16-1135</doi>
@@ -1521,11 +1521,11 @@
     </paper>
     <paper id="136">
       <title>Learning Crosslingual Word Embeddings without Bilingual Corpora</title>
-      <author><first>Long</first><last>Duong</last></author>
+      <author id="long-duong"><first>Long</first><last>Duong</last></author>
       <author><first>Hiroshi</first><last>Kanayama</last></author>
       <author><first>Tengfei</first><last>Ma</last></author>
       <author><first>Steven</first><last>Bird</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>1285–1295</pages>
       <url hash="8563953f">D16-1136</url>
       <doi>10.18653/v1/D16-1136</doi>
@@ -1535,7 +1535,7 @@
     <paper id="137">
       <title>Sequence-to-Sequence Learning as Beam-Search Optimization</title>
       <author><first>Sam</first><last>Wiseman</last></author>
-      <author><first>Alexander M.</first><last>Rush</last></author>
+      <author id="alexander-m-rush"><first>Alexander M.</first><last>Rush</last></author>
       <pages>1296–1306</pages>
       <url hash="4d255b4f">D16-1137</url>
       <doi>10.18653/v1/D16-1137</doi>
@@ -1546,7 +1546,7 @@
       <title>Online Segment to Segment Neural Transduction</title>
       <author><first>Lei</first><last>Yu</last></author>
       <author><first>Jan</first><last>Buys</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
       <pages>1307–1316</pages>
       <url hash="6af43eae">D16-1138</url>
       <doi>10.18653/v1/D16-1138</doi>
@@ -1556,7 +1556,7 @@
     <paper id="139">
       <title>Sequence-Level Knowledge Distillation</title>
       <author><first>Yoon</first><last>Kim</last></author>
-      <author><first>Alexander M.</first><last>Rush</last></author>
+      <author id="alexander-m-rush"><first>Alexander M.</first><last>Rush</last></author>
       <pages>1317–1327</pages>
       <url hash="d154a4f0">D16-1139</url>
       <doi>10.18653/v1/D16-1139</doi>
@@ -1569,7 +1569,7 @@
       <author><first>Graham</first><last>Neubig</last></author>
       <author><first>Ryohei</first><last>Sasano</last></author>
       <author><first>Hiroya</first><last>Takamura</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>1328–1338</pages>
       <url hash="b6b19027">D16-1140</url>
       <doi>10.18653/v1/D16-1140</doi>
@@ -1637,7 +1637,7 @@
     <paper id="146">
       <title>Lifted Rule Injection for Relation Embeddings</title>
       <author><first>Thomas</first><last>Demeester</last></author>
-      <author><first>Tim</first><last>Rocktäschel</last></author>
+      <author id="tim-rocktaschel"><first>Tim</first><last>Rocktäschel</last></author>
       <author><first>Sebastian</first><last>Riedel</last></author>
       <pages>1389–1399</pages>
       <url hash="fba0e36b">D16-1146</url>
@@ -1662,9 +1662,9 @@
     <paper id="148">
       <title>Analyzing Framing through the Casts of Characters in the News</title>
       <author><first>Dallas</first><last>Card</last></author>
-      <author><first>Justin</first><last>Gross</last></author>
-      <author><first>Amber</first><last>Boydstun</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="justin-h-gross"><first>Justin</first><last>Gross</last></author>
+      <author id="amber-boydstun"><first>Amber</first><last>Boydstun</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>1410–1420</pages>
       <url hash="dc2e786e">D16-1148</url>
       <doi>10.18653/v1/D16-1148</doi>
@@ -1673,7 +1673,7 @@
     </paper>
     <paper id="149">
       <title>The Teams Corpus and Entrainment in Multi-Party Spoken Dialogues</title>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <author><first>Susannah</first><last>Paletz</last></author>
       <author><first>Zahra</first><last>Rahimi</last></author>
       <author><first>Stefani</first><last>Allegretti</last></author>
@@ -1720,9 +1720,9 @@
     <paper id="153">
       <title>Phonologically Aware Neural Model for Named Entity Recognition in Low Resource Transfer Settings</title>
       <author><first>Akash</first><last>Bharadwaj</last></author>
-      <author><first>David</first><last>Mortensen</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
+      <author id="david-r-mortensen"><first>David</first><last>Mortensen</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
       <pages>1462–1472</pages>
       <url hash="462d1301">D16-1153</url>
       <doi>10.18653/v1/D16-1153</doi>
@@ -1743,11 +1743,11 @@
       <title>Jointly Learning Grounded Task Structures from Language Instruction and Visual Demonstration</title>
       <author><first>Changsong</first><last>Liu</last></author>
       <author><first>Shaohua</first><last>Yang</last></author>
-      <author><first>Sari</first><last>Saba-Sadiya</last></author>
+      <author id="sari-saba-sadiya"><first>Sari</first><last>Saba-Sadiya</last></author>
       <author><first>Nishant</first><last>Shukla</last></author>
       <author><first>Yunzhong</first><last>He</last></author>
-      <author><first>Song-Chun</first><last>Zhu</last></author>
-      <author><first>Joyce</first><last>Chai</last></author>
+      <author id="song-chun-zhu"><first>Song-Chun</first><last>Zhu</last></author>
+      <author id="joyce-chai"><first>Joyce</first><last>Chai</last></author>
       <pages>1482–1492</pages>
       <url hash="99041726">D16-1155</url>
       <doi>10.18653/v1/D16-1155</doi>
@@ -1802,7 +1802,7 @@
     <paper id="160">
       <title>Exploiting Source-side Monolingual Data in Neural Machine Translation</title>
       <author><first>Jiajun</first><last>Zhang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>1535–1545</pages>
       <url hash="9ca63a92">D16-1160</url>
       <doi>10.18653/v1/D16-1160</doi>
@@ -1840,7 +1840,7 @@
     </paper>
     <paper id="164">
       <title><fixed-case>M</fixed-case>ix<fixed-case>KM</fixed-case>eans: Clustering Question-Answer Archives</title>
-      <author><first>Deepak</first><last>P</last></author>
+      <author id="deepak-p"><first>Deepak</first><last>P</last></author>
       <pages>1576–1585</pages>
       <url hash="f72c6219">D16-1164</url>
       <doi>10.18653/v1/D16-1164</doi>
@@ -1848,9 +1848,9 @@
     </paper>
     <paper id="165">
       <title>It Takes Three to Tango: Triangulation Approach to Answer Ranking in Community Question Answering</title>
-      <author><first>Preslav</first><last>Nakov</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
-      <author><first>Francisco</first><last>Guzmán</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzmán</last></author>
       <pages>1586–1597</pages>
       <url hash="618f570c">D16-1165</url>
       <doi>10.18653/v1/D16-1165</doi>
@@ -1877,9 +1877,9 @@
     </paper>
     <paper id="168">
       <title>A Theme-Rewriting Approach for Generating Algebra Word Problems</title>
-      <author><first>Rik</first><last>Koncel-Kedziorski</last></author>
+      <author id="rik-koncel-kedziorski"><first>Rik</first><last>Koncel-Kedziorski</last></author>
       <author><first>Ioannis</first><last>Konstas</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last></author>
       <pages>1617–1628</pages>
       <url hash="bcdc7759">D16-1168</url>
@@ -1901,7 +1901,7 @@
       <author><first>Lin</first><last>Gui</last></author>
       <author><first>Dongyin</first><last>Wu</last></author>
       <author><first>Ruifeng</first><last>Xu</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <author><first>Yu</first><last>Zhou</last></author>
       <pages>1639–1649</pages>
       <url hash="30d15455">D16-1170</url>
@@ -1925,7 +1925,7 @@
       <author><first>Jiacheng</first><last>Xu</last></author>
       <author><first>Danlu</first><last>Chen</last></author>
       <author><first>Xipeng</first><last>Qiu</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>1660–1669</pages>
       <url hash="a99750f8">D16-1172</url>
       <doi>10.18653/v1/D16-1172</doi>
@@ -1936,7 +1936,7 @@
       <author><first>Zhiting</first><last>Hu</last></author>
       <author><first>Zichao</first><last>Yang</last></author>
       <author><first>Ruslan</first><last>Salakhutdinov</last></author>
-      <author><first>Eric</first><last>Xing</last></author>
+      <author id="eric-xing"><first>Eric</first><last>Xing</last></author>
       <pages>1670–1679</pages>
       <url hash="347327c0">D16-1173</url>
       <doi>10.18653/v1/D16-1173</doi>
@@ -1957,7 +1957,7 @@
       <author><first>Thomas</first><last>Kober</last></author>
       <author><first>Julie</first><last>Weeds</last></author>
       <author><first>Jeremy</first><last>Reffin</last></author>
-      <author><first>David</first><last>Weir</last></author>
+      <author id="david-weir"><first>David</first><last>Weir</last></author>
       <pages>1691–1702</pages>
       <url hash="8f7d66b6">D16-1175</url>
       <doi>10.18653/v1/D16-1175</doi>
@@ -1969,7 +1969,7 @@
       <author><first>Xipeng</first><last>Qiu</last></author>
       <author><first>Yaqian</first><last>Zhou</last></author>
       <author><first>Jifan</first><last>Chen</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>1703–1712</pages>
       <url hash="90d0bc13">D16-1176</url>
       <doi>10.18653/v1/D16-1176</doi>
@@ -1993,8 +1993,8 @@
     <paper id="178">
       <title><fixed-case>F</fixed-case>riends with Motives: Using Text to Infer Influence on <fixed-case>SCOTUS</fixed-case></title>
       <author><first>Yanchuan</first><last>Sim</last></author>
-      <author><first>Bryan</first><last>Routledge</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="bryan-r-routledge"><first>Bryan</first><last>Routledge</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>1724–1733</pages>
       <url hash="6f372bfb">D16-1178</url>
       <doi>10.18653/v1/D16-1178</doi>
@@ -2004,7 +2004,7 @@
     <paper id="179">
       <title>Verb Phrase Ellipsis Resolution Using Discriminative and Margin-Infused Algorithms</title>
       <author><first>Kian</first><last>Kenyon-Dean</last></author>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <author><first>Doina</first><last>Precup</last></author>
       <pages>1734–1743</pages>
       <url hash="8e3f2f35">D16-1179</url>
@@ -2016,8 +2016,8 @@
       <author><first>Adhiguna</first><last>Kuncoro</last></author>
       <author><first>Miguel</first><last>Ballesteros</last></author>
       <author><first>Lingpeng</first><last>Kong</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>1744–1753</pages>
       <url hash="769e2730">D16-1180</url>
       <doi>10.18653/v1/D16-1180</doi>
@@ -2044,7 +2044,7 @@
     </paper>
     <paper id="183">
       <title>Neural Shift-Reduce <fixed-case>CCG</fixed-case> Semantic Parsing</title>
-      <author><first>Dipendra Kumar</first><last>Misra</last></author>
+      <author id="dipendra-misra"><first>Dipendra Kumar</first><last>Misra</last></author>
       <author><first>Yoav</first><last>Artzi</last></author>
       <pages>1775–1786</pages>
       <url hash="95cb6a19">D16-1183</url>
@@ -2108,7 +2108,7 @@
     <paper id="189">
       <title>Deep Reinforcement Learning with a Combinatorial Action Space for Predicting Popular <fixed-case>R</fixed-case>eddit Threads</title>
       <author><first>Ji</first><last>He</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
       <author><first>Xiaodong</first><last>He</last></author>
       <author><first>Jianshu</first><last>Chen</last></author>
       <author><first>Jianfeng</first><last>Gao</last></author>
@@ -2121,7 +2121,7 @@
     </paper>
     <paper id="190">
       <title>Non-Literal Text Reuse in Historical Texts: An Approach to Identify Reuse Transformations and its Application to <fixed-case>B</fixed-case>ible Reuse</title>
-      <author><first>Maria</first><last>Moritz</last></author>
+      <author id="maria-berger"><first>Maria</first><last>Moritz</last></author>
       <author><first>Andreas</first><last>Wiederhold</last></author>
       <author><first>Barbara</first><last>Pavlek</last></author>
       <author><first>Yuri</first><last>Bizzoni</last></author>
@@ -2165,7 +2165,7 @@
       <title>Non-uniform Language Detection in Technical Writing</title>
       <author><first>Weibo</first><last>Wang</last></author>
       <author><first>Abidalrahman</first><last>Moh’d</last></author>
-      <author><first>Aminul</first><last>Islam</last></author>
+      <author id="aminul-islam"><first>Aminul</first><last>Islam</last></author>
       <author><first>Axel</first><last>Soto</last></author>
       <author><first>Evangelos</first><last>Milios</last></author>
       <pages>1892–1900</pages>
@@ -2186,7 +2186,7 @@
     <paper id="196">
       <title>Orthographic Syllable as basic unit for <fixed-case>SMT</fixed-case> between Related Languages</title>
       <author><first>Anoop</first><last>Kunchukuttan</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>1912–1917</pages>
       <url hash="fc88db95">D16-1196</url>
       <doi>10.18653/v1/D16-1196</doi>
@@ -2239,7 +2239,7 @@
     <paper id="201">
       <title>Combining Supervised and Unsupervised Enembles for Knowledge Base Population</title>
       <author><first>Nazneen Fatema</first><last>Rajani</last></author>
-      <author><first>Raymond</first><last>Mooney</last></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last></author>
       <pages>1943–1948</pages>
       <url hash="41143854">D16-1201</url>
       <doi>10.18653/v1/D16-1201</doi>
@@ -2248,9 +2248,9 @@
     <paper id="202">
       <title>Character Sequence Models for Colorful Words</title>
       <author><first>Kazuya</first><last>Kawakami</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Bryan</first><last>Routledge</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="bryan-r-routledge"><first>Bryan</first><last>Routledge</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>1949–1954</pages>
       <url hash="1bfbc04f">D16-1202</url>
       <doi>10.18653/v1/D16-1202</doi>
@@ -2271,7 +2271,7 @@
       <title>Improving <fixed-case>LSTM</fixed-case>-based Video Description with Linguistic Knowledge Mined from Text</title>
       <author><first>Subhashini</first><last>Venugopalan</last></author>
       <author><first>Lisa Anne</first><last>Hendricks</last></author>
-      <author><first>Raymond</first><last>Mooney</last></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last></author>
       <author><first>Kate</first><last>Saenko</last></author>
       <pages>1961–1966</pages>
       <url hash="e9b726b2">D16-1204</url>
@@ -2294,7 +2294,7 @@
       <title>Speed-Accuracy Tradeoffs in Tagging with Variable-Order <fixed-case>CRF</fixed-case>s and Structured Sparsity</title>
       <author><first>Tim</first><last>Vieira</last></author>
       <author><first>Ryan</first><last>Cotterell</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>1973–1978</pages>
       <url hash="b8c18fbd">D16-1206</url>
       <doi>10.18653/v1/D16-1206</doi>
@@ -2303,8 +2303,8 @@
     <paper id="207">
       <title>Learning Robust Representations of Text</title>
       <author><first>Yitong</first><last>Li</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>1979–1985</pages>
       <url hash="8c33ab04">D16-1207</url>
       <doi>10.18653/v1/D16-1207</doi>
@@ -2332,8 +2332,8 @@
       <author><first>Hidetaka</first><last>Kamigaito</last></author>
       <author><first>Akihiro</first><last>Tamura</last></author>
       <author><first>Hiroya</first><last>Takamura</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>1998–2004</pages>
       <url hash="db7e6e73">D16-1210</url>
       <doi>10.18653/v1/D16-1210</doi>
@@ -2343,8 +2343,8 @@
       <title>Training with Exploration Improves a Greedy Stack <fixed-case>LSTM</fixed-case> Parser</title>
       <author><first>Miguel</first><last>Ballesteros</last></author>
       <author><first>Yoav</first><last>Goldberg</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>2005–2010</pages>
       <url hash="4846f970">D16-1211</url>
       <doi>10.18653/v1/D16-1211</doi>
@@ -2354,7 +2354,7 @@
       <title>Capturing Argument Relationship for <fixed-case>C</fixed-case>hinese Semantic Role Labeling</title>
       <author><first>Lei</first><last>Sha</last></author>
       <author><first>Sujian</first><last>Li</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <author><first>Zhifang</first><last>Sui</last></author>
       <author><first>Tingsong</first><last>Jiang</last></author>
       <pages>2011–2016</pages>
@@ -2378,7 +2378,7 @@
       <author><first>Siva</first><last>Reddy</last></author>
       <author><first>John</first><last>Blitzer</last></author>
       <author><first>Julia</first><last>Hockenmaier</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>2022–2027</pages>
       <url hash="e9a7c24b">D16-1214</url>
       <doi>10.18653/v1/D16-1214</doi>
@@ -2408,10 +2408,10 @@
       <author><first>Salvatore</first><last>Giorgi</last></author>
       <author><first>Rishi</first><last>Solanki</last></author>
       <author><first>Johannes</first><last>Eichstaedt</last></author>
-      <author><first>H. Andrew</first><last>Schwartz</last></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last></author>
       <author><first>Muhammad</first><last>Abdul-Mageed</last></author>
       <author><first>Anneke</first><last>Buffone</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <pages>2042–2047</pages>
       <url hash="eb671782">D16-1217</url>
       <doi>10.18653/v1/D16-1217</doi>
@@ -2429,9 +2429,9 @@
     <paper id="219">
       <title>Using Syntactic and Semantic Context to Explore Psychodemographic Differences in Self-reference</title>
       <author><first>Masoud</first><last>Rouhizadeh</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <author><first>Anneke</first><last>Buffone</last></author>
-      <author><first>H Andrew</first><last>Schwartz</last></author>
+      <author id="h-andrew-schwartz"><first>H Andrew</first><last>Schwartz</last></author>
       <pages>2054–2059</pages>
       <url hash="965ec5d8">D16-1219</url>
       <doi>10.18653/v1/D16-1219</doi>
@@ -2439,7 +2439,7 @@
     </paper>
     <paper id="220">
       <title>Learning to Identify Metaphors from a Corpus of Proverbs</title>
-      <author><first>Gözde</first><last>Özbal</last></author>
+      <author id="gozde-ozbal"><first>Gözde</first><last>Özbal</last></author>
       <author><first>Carlo</first><last>Strapparava</last></author>
       <author><first>Serra Sinem</first><last>Tekiroğlu</last></author>
       <author><first>Daniele</first><last>Pighin</last></author>
@@ -2452,7 +2452,7 @@
       <title>An Embedding Model for Predicting Roll-Call Votes</title>
       <author><first>Peter</first><last>Kraft</last></author>
       <author><first>Hirsh</first><last>Jain</last></author>
-      <author><first>Alexander M.</first><last>Rush</last></author>
+      <author id="alexander-m-rush"><first>Alexander M.</first><last>Rush</last></author>
       <pages>2066–2070</pages>
       <url hash="65cadf86">D16-1221</url>
       <doi>10.18653/v1/D16-1221</doi>
@@ -2462,7 +2462,7 @@
       <title>Natural Language Model Re-usability for Scaling to Different Domains</title>
       <author><first>Young-Bum</first><last>Kim</last></author>
       <author><first>Alexandre</first><last>Rochette</last></author>
-      <author><first>Ruhi</first><last>Sarikaya</last></author>
+      <author id="ruhi-sarikaya"><first>Ruhi</first><last>Sarikaya</last></author>
       <pages>2071–2076</pages>
       <url hash="70ddce6b">D16-1222</url>
       <doi>10.18653/v1/D16-1222</doi>
@@ -2528,7 +2528,7 @@
       <title>There’s No Comparison: Reference-less Evaluation Metrics in Grammatical Error Correction</title>
       <author><first>Courtney</first><last>Napoles</last></author>
       <author><first>Keisuke</first><last>Sakaguchi</last></author>
-      <author><first>Joel</first><last>Tetreault</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
       <pages>2109–2115</pages>
       <url hash="c3d294ee">D16-1228</url>
       <doi>10.18653/v1/D16-1228</doi>
@@ -2538,7 +2538,7 @@
       <title>Cultural Shift or Linguistic Drift? Comparing Two Computational Measures of Semantic Change</title>
       <author><first>William L.</first><last>Hamilton</last></author>
       <author><first>Jure</first><last>Leskovec</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <pages>2116–2121</pages>
       <url hash="8bd2cefe">D16-1229</url>
       <doi>10.18653/v1/D16-1229</doi>
@@ -2548,7 +2548,7 @@
       <title>How <fixed-case>NOT</fixed-case> To Evaluate Your Dialogue System: An Empirical Study of Unsupervised Evaluation Metrics for Dialogue Response Generation</title>
       <author><first>Chia-Wei</first><last>Liu</last></author>
       <author><first>Ryan</first><last>Lowe</last></author>
-      <author><first>Iulian</first><last>Serban</last></author>
+      <author id="iulian-vlad-serban"><first>Iulian</first><last>Serban</last></author>
       <author><first>Mike</first><last>Noseworthy</last></author>
       <author><first>Laurent</first><last>Charlin</last></author>
       <author><first>Joelle</first><last>Pineau</last></author>
@@ -2584,13 +2584,13 @@
     <paper id="233">
       <title>Conditional Generation and Snapshot Learning in Neural Dialogue Systems</title>
       <author><first>Tsung-Hsien</first><last>Wen</last></author>
-      <author><first>Milica</first><last>Gašić</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gašić</last></author>
       <author><first>Nikola</first><last>Mrkšić</last></author>
-      <author><first>Lina M.</first><last>Rojas-Barahona</last></author>
+      <author id="lina-m-rojas-barahona"><first>Lina M.</first><last>Rojas-Barahona</last></author>
       <author><first>Pei-Hao</first><last>Su</last></author>
       <author><first>Stefan</first><last>Ultes</last></author>
       <author><first>David</first><last>Vandyke</last></author>
-      <author><first>Steve</first><last>Young</last></author>
+      <author id="steve-young"><first>Steve</first><last>Young</last></author>
       <pages>2153–2162</pages>
       <url hash="7e5b103c">D16-1233</url>
       <doi>10.18653/v1/D16-1233</doi>
@@ -2680,7 +2680,7 @@
     </paper>
     <paper id="241">
       <title>Who did What: A Large-Scale Person-Centered Cloze Dataset</title>
-      <author><first>Takeshi</first><last>Onishi</last></author>
+      <author id="takashi-onishi"><first>Takeshi</first><last>Onishi</last></author>
       <author><first>Hai</first><last>Wang</last></author>
       <author><first>Mohit</first><last>Bansal</last></author>
       <author><first>Kevin</first><last>Gimpel</last></author>
@@ -2708,7 +2708,7 @@
     <paper id="243">
       <title>Learning to Generate Compositional Color Descriptions</title>
       <author><first>Will</first><last>Monroe</last></author>
-      <author><first>Noah D.</first><last>Goodman</last></author>
+      <author id="noah-goodman"><first>Noah D.</first><last>Goodman</last></author>
       <author><first>Christopher</first><last>Potts</last></author>
       <pages>2243–2248</pages>
       <url hash="4c1225b1">D16-1243</url>
@@ -2718,7 +2718,7 @@
     </paper>
     <paper id="244">
       <title>A Decomposable Attention Model for Natural Language Inference</title>
-      <author><first>Ankur</first><last>Parikh</last></author>
+      <author id="ankur-parikh"><first>Ankur</first><last>Parikh</last></author>
       <author><first>Oscar</first><last>Täckström</last></author>
       <author><first>Dipanjan</first><last>Das</last></author>
       <author><first>Jakob</first><last>Uszkoreit</last></author>
@@ -2731,7 +2731,7 @@
     <paper id="245">
       <title>Deep Reinforcement Learning for Mention-Ranking Coreference Models</title>
       <author><first>Kevin</first><last>Clark</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>2256–2262</pages>
       <url hash="700be1b5">D16-1245</url>
       <doi>10.18653/v1/D16-1245</doi>
@@ -2776,7 +2776,7 @@
       <title>Supervised Attentions for Neural Machine Translation</title>
       <author><first>Haitao</first><last>Mi</last></author>
       <author><first>Zhiguo</first><last>Wang</last></author>
-      <author><first>Abe</first><last>Ittycheriah</last></author>
+      <author id="abe-ittycheriah"><first>Abe</first><last>Ittycheriah</last></author>
       <pages>2283–2288</pages>
       <url hash="5b69e3b6">D16-1249</url>
       <doi>10.18653/v1/D16-1249</doi>
@@ -2786,8 +2786,8 @@
     <paper id="250">
       <title>Learning principled bilingual mappings of word embeddings while preserving monolingual invariance</title>
       <author><first>Mikel</first><last>Artetxe</last></author>
-      <author><first>Gorka</first><last>Labaka</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="gorka-labaka"><first>Gorka</first><last>Labaka</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <pages>2289–2294</pages>
       <url hash="66631314">D16-1250</url>
       <doi>10.18653/v1/D16-1250</doi>
@@ -2831,7 +2831,7 @@
       <title>Transition-Based Dependency Parsing with Heuristic Backtracking</title>
       <author><first>Jacob</first><last>Buckman</last></author>
       <author><first>Miguel</first><last>Ballesteros</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <pages>2313–2318</pages>
       <url hash="e7bcfb76">D16-1254</url>
       <doi>10.18653/v1/D16-1254</doi>
@@ -2841,8 +2841,8 @@
     <paper id="255">
       <title>Word Ordering Without Syntax</title>
       <author><first>Allen</first><last>Schmaltz</last></author>
-      <author><first>Alexander M.</first><last>Rush</last></author>
-      <author><first>Stuart</first><last>Shieber</last></author>
+      <author id="alexander-m-rush"><first>Alexander M.</first><last>Rush</last></author>
+      <author id="stuart-m-shieber"><first>Stuart</first><last>Shieber</last></author>
       <pages>2319–2324</pages>
       <url hash="df7eefb8">D16-1255</url>
       <doi>10.18653/v1/D16-1255</doi>
@@ -2853,7 +2853,7 @@
       <title>Morphological Segmentation Inside-Out</title>
       <author><first>Ryan</first><last>Cotterell</last></author>
       <author><first>Arun</first><last>Kumar</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>2325–2330</pages>
       <url hash="683c4b7e">D16-1256</url>
       <doi>10.18653/v1/D16-1256</doi>
@@ -2877,7 +2877,7 @@
       <author><first>Luheng</first><last>He</last></author>
       <author><first>Julian</first><last>Michael</last></author>
       <author><first>Mike</first><last>Lewis</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>2337–2342</pages>
       <url hash="97528f74">D16-1258</url>
       <doi>10.18653/v1/D16-1258</doi>
@@ -2901,7 +2901,7 @@
       <author><first>Tao</first><last>Ge</last></author>
       <author><first>Lei</first><last>Sha</last></author>
       <author><first>Sujian</first><last>Li</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <author><first>Zhifang</first><last>Sui</last></author>
       <pages>2350–2354</pages>
       <url hash="e63d5538">D16-1260</url>
@@ -2925,7 +2925,7 @@
       <title>Global Neural <fixed-case>CCG</fixed-case> Parsing with Optimality Guarantees</title>
       <author><first>Kenton</first><last>Lee</last></author>
       <author><first>Mike</first><last>Lewis</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>2366–2376</pages>
       <url hash="75432e53">D16-1262</url>
       <doi>10.18653/v1/D16-1262</doi>
@@ -2936,7 +2936,7 @@
       <title>Learning a Lexicon and Translation Model from Phoneme Lattices</title>
       <author><first>Oliver</first><last>Adams</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <author><first>Steven</first><last>Bird</last></author>
       <author><first>Quoc Truong</first><last>Do</last></author>
       <author><first>Satoshi</first><last>Nakamura</last></author>
@@ -2972,7 +2972,7 @@
     </meta>
     <paper id="1">
       <title>Practical Neural Networks for <fixed-case>NLP</fixed-case>: From Theory to Code</title>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <author><first>Yoav</first><last>Goldberg</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
       <abstract>This tutorial aims to bring NLP researchers up to speed with the current techniques in deep learning and neural networks, and show them how they can turn their ideas into practical implementations. We will start with simple classification models (logistic regression and multilayer perceptrons) and cover more advanced patterns that come up in NLP such as recurrent networks for sequence tagging and prediction problems, structured networks (e.g., compositional architectures based on syntax trees), structured output spaces (sequences and trees), attention for sequence-to-sequence transduction, and feature induction for complex algorithm states. A particular emphasis will be on learning to represent complex objects as recursive compositions of simpler objects. This representation will reflect characterize standard objects in NLP, such as the composition of characters and morphemes into words, and words into sentences and documents. In addition, new opportunities such as learning to embed "algorithm states" such as those used in transition-based parsing and other sequential structured prediction models (for which effective features may be difficult to engineer by hand) will be covered.
@@ -3014,7 +3014,7 @@ The content of the tutorial is divided into 3 sections of 1 hour each. We assume
     </paper>
     <paper id="5">
       <title>Continuous Vector Spaces for Cross-language <fixed-case>NLP</fixed-case> Applications</title>
-      <author><first>Rafael E.</first><last>Banchs</last></author>
+      <author id="rafael-e-banchs"><first>Rafael E.</first><last>Banchs</last></author>
       <abstract>The mathematical metaphor offered by the geometric concept of distance in vector spaces with respect to semantics and meaning has been proven to be useful in many monolingual natural language processing applications. There is also some recent and strong evidence that this paradigm can also be useful in the cross-language setting. In this tutorial, we present and discuss some of the most recent advances on exploiting the vector space model paradigm in specific cross-language natural language processing applications, along with a comprehensive review of the theoretical background behind them.
 
 First, the tutorial introduces some fundamental concepts of distributional semantics and vector space models. More specifically, the concepts of distributional hypothesis and term-document matrices are revised, followed by a brief discussion on linear and non-linear dimensionality reduction techniques and their implications to the parallel distributed approach to semantic cognition. Next, some classical examples of using vector space models in monolingual natural language processing applications are presented. Specific examples in the areas of information retrieval, related term identification and semantic compositionality are described.
diff --git a/data/xml/D17.xml b/data/xml/D17.xml
index b7a427eda4..7182f5e617 100644
--- a/data/xml/D17.xml
+++ b/data/xml/D17.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing</booktitle>
       <url hash="6382c144">D17-1</url>
-      <editor><first>Martha</first><last>Palmer</last></editor>
+      <editor id="martha-palmer"><first>Martha</first><last>Palmer</last></editor>
       <editor><first>Rebecca</first><last>Hwa</last></editor>
       <editor><first>Sebastian</first><last>Riedel</last></editor>
       <doi>10.18653/v1/D17-1</doi>
@@ -21,7 +21,7 @@
     <paper id="1">
       <title>Monolingual Phrase Alignment on Parse Forests</title>
       <author><first>Yuki</first><last>Arase</last></author>
-      <author><first>Junichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Junichi</first><last>Tsujii</last></author>
       <pages>1–11</pages>
       <url hash="fb6e14e5">D17-1001</url>
       <doi>10.18653/v1/D17-1001</doi>
@@ -46,7 +46,7 @@
       <title>Quasi-Second-Order Parsing for 1-Endpoint-Crossing, Pagenumber-2 Graphs</title>
       <author><first>Junjie</first><last>Cao</last></author>
       <author><first>Sheng</first><last>Huang</last></author>
-      <author><first>Weiwei</first><last>Sun</last></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last></author>
       <author><first>Xiaojun</first><last>Wan</last></author>
       <pages>24–34</pages>
       <url hash="804175ee">D17-1003</url>
@@ -62,7 +62,7 @@
       <author><first>Victor</first><last>Zhong</last></author>
       <author><first>Danqi</first><last>Chen</last></author>
       <author><first>Gabor</first><last>Angeli</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>35–45</pages>
       <url hash="97b57cba">D17-1004</url>
       <doi>10.18653/v1/D17-1004</doi>
@@ -91,7 +91,7 @@
       <title>Integrating Order Information and Event Relation for Script Event Prediction</title>
       <author><first>Zhongqing</first><last>Wang</last></author>
       <author><first>Yue</first><last>Zhang</last></author>
-      <author><first>Ching-Yun</first><last>Chang</last></author>
+      <author id="ching-yun-chang"><first>Ching-Yun</first><last>Chang</last></author>
       <pages>57–67</pages>
       <url hash="a9a8bab1">D17-1006</url>
       <doi>10.18653/v1/D17-1006</doi>
@@ -130,7 +130,7 @@
       <author><first>Siva</first><last>Reddy</last></author>
       <author><first>Oscar</first><last>Täckström</last></author>
       <author><first>Slav</first><last>Petrov</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <author><first>Mirella</first><last>Lapata</last></author>
       <pages>89–101</pages>
       <url hash="254916a6">D17-1009</url>
@@ -156,7 +156,7 @@
     <paper id="11">
       <title>Past, Present, Future: A Computational Investigation of the Typology of Tense in 1000 Languages</title>
       <author><first>Ehsaneddin</first><last>Asgari</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>113–124</pages>
       <url hash="2e4a9657">D17-1011</url>
       <doi>10.18653/v1/D17-1011</doi>
@@ -181,8 +181,8 @@
       <author><first>Rongxiang</first><last>Weng</last></author>
       <author><first>Shujian</first><last>Huang</last></author>
       <author><first>Zaixiang</first><last>Zheng</last></author>
-      <author><first>Xinyu</first><last>Dai</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
+      <author id="xinyu-dai"><first>Xinyu</first><last>Dai</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
       <pages>136–145</pages>
       <url hash="469dce79">D17-1013</url>
       <doi>10.18653/v1/D17-1013</doi>
@@ -192,8 +192,8 @@
     <paper id="14">
       <title>Towards Decoding as Continuous Optimisation in Neural Machine Translation</title>
       <author><first>Cong Duy Vu</first><last>Hoang</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>146–156</pages>
       <url hash="dc97ea3c">D17-1014</url>
       <doi>10.18653/v1/D17-1014</doi>
@@ -216,8 +216,8 @@
     <paper id="16">
       <title>Continuous Representation of Location for Geolocation and Lexical Dialectology using Mixture Density Networks</title>
       <author><first>Afshin</first><last>Rahimi</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>167–176</pages>
       <url hash="36cba80b">D17-1016</url>
       <doi>10.18653/v1/D17-1016</doi>
@@ -241,7 +241,7 @@
       <author><first>Kenton</first><last>Lee</last></author>
       <author><first>Luheng</first><last>He</last></author>
       <author><first>Mike</first><last>Lewis</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>188–197</pages>
       <url hash="2f8cf056">D17-1018</url>
       <doi>10.18653/v1/D17-1018</doi>
@@ -252,7 +252,7 @@
     <paper id="19">
       <title>Neural Net Models of Open-domain Discourse Coherence</title>
       <author><first>Jiwei</first><last>Li</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <pages>198–209</pages>
       <url hash="f865769d">D17-1019</url>
       <doi>10.18653/v1/D17-1019</doi>
@@ -265,7 +265,7 @@
       <author><first>Kexiang</first><last>Wang</last></author>
       <author><first>Tianyu</first><last>Liu</last></author>
       <author><first>Zhifang</first><last>Sui</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <pages>210–220</pages>
       <url hash="56c66f70">D17-1020</url>
       <doi>10.18653/v1/D17-1020</doi>
@@ -290,7 +290,7 @@
       <title>Hierarchical Embeddings for Hypernymy Detection and Directionality</title>
       <author><first>Kim Anh</first><last>Nguyen</last></author>
       <author><first>Maximilian</first><last>Köper</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <author><first>Ngoc Thang</first><last>Vu</last></author>
       <pages>233–243</pages>
       <url hash="37e84236">D17-1022</url>
@@ -364,7 +364,7 @@
       <author><first>Avijit</first><last>Vajpayee</last></author>
       <author><first>Arjit</first><last>Srivastava</last></author>
       <author><first>Madan Gopal</first><last>Jhanwar</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>292–297</pages>
       <url hash="863b8040">D17-1028</url>
       <doi>10.18653/v1/D17-1028</doi>
@@ -377,7 +377,7 @@
       <title>Exploiting Word Internal Structures for Generic <fixed-case>C</fixed-case>hinese Sentence Representation</title>
       <author><first>Shaonan</first><last>Wang</last></author>
       <author><first>Jiajun</first><last>Zhang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>298–303</pages>
       <url hash="be18d45c">D17-1029</url>
       <doi>10.18653/v1/D17-1029</doi>
@@ -386,8 +386,8 @@
     </paper>
     <paper id="30">
       <title>High-risk learning: acquiring new word vectors from tiny data</title>
-      <author><first>Aurélie</first><last>Herbelot</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="aurelie-herbelot"><first>Aurélie</first><last>Herbelot</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <pages>304–309</pages>
       <url hash="b9c6b33b">D17-1030</url>
       <doi>10.18653/v1/D17-1030</doi>
@@ -398,7 +398,7 @@
     <paper id="31">
       <title>Word Embeddings based on Fixed-Size Ordinally Forgetting Encoding</title>
       <author><first>Joseph</first><last>Sanu</last></author>
-      <author><first>Mingbin</first><last>Xu</last></author>
+      <author id="mingbin-xu"><first>Mingbin</first><last>Xu</last></author>
       <author><first>Hui</first><last>Jiang</last></author>
       <author><first>Quan</first><last>Liu</last></author>
       <pages>310–315</pages>
@@ -452,7 +452,7 @@
     </paper>
     <paper id="36">
       <title>Learning What’s Easy: Fully Differentiable Neural Easy-First Taggers</title>
-      <author><first>André F. T.</first><last>Martins</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
       <author><first>Julia</first><last>Kreutzer</last></author>
       <pages>349–362</pages>
       <url hash="ac26e4e2">D17-1036</url>
@@ -476,7 +476,7 @@
     <paper id="38">
       <title>Learning to select data for transfer learning with <fixed-case>B</fixed-case>ayesian Optimization</title>
       <author><first>Sebastian</first><last>Ruder</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>372–382</pages>
       <url hash="21a63e8b">D17-1038</url>
       <doi>10.18653/v1/D17-1038</doi>
@@ -486,8 +486,8 @@
     <paper id="39">
       <title>Unsupervised Pretraining for Sequence to Sequence Learning</title>
       <author><first>Prajit</first><last>Ramachandran</last></author>
-      <author><first>Peter</first><last>Liu</last></author>
-      <author><first>Quoc</first><last>Le</last></author>
+      <author id="peter-j-liu"><first>Peter</first><last>Liu</last></author>
+      <author id="quoc-le"><first>Quoc</first><last>Le</last></author>
       <pages>383–391</pages>
       <url hash="b1a05e5b">D17-1039</url>
       <doi>10.18653/v1/D17-1039</doi>
@@ -499,7 +499,7 @@
       <title>Efficient Attention using a Fixed-Size Memory Representation</title>
       <author><first>Denny</first><last>Britz</last></author>
       <author><first>Melody</first><last>Guan</last></author>
-      <author><first>Minh-Thang</first><last>Luong</last></author>
+      <author id="minh-thang-luong"><first>Minh-Thang</first><last>Luong</last></author>
       <pages>392–400</pages>
       <url hash="13001a28">D17-1040</url>
       <doi>10.18653/v1/D17-1040</doi>
@@ -510,7 +510,7 @@
       <title>Rotated Word Vector Representations and their Interpretability</title>
       <author><first>Sungjoon</first><last>Park</last></author>
       <author><first>JinYeong</first><last>Bak</last></author>
-      <author><first>Alice</first><last>Oh</last></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last></author>
       <pages>401–411</pages>
       <url hash="48b7ae10">D17-1041</url>
       <doi>10.18653/v1/D17-1041</doi>
@@ -530,7 +530,7 @@
     </paper>
     <paper id="43">
       <title>Piecewise Latent Variables for Neural Variational Text Processing</title>
-      <author><first>Iulian Vlad</first><last>Serban</last></author>
+      <author id="iulian-vlad-serban"><first>Iulian Vlad</first><last>Serban</last></author>
       <author><first>Alexander G.</first><last>Ororbia</last></author>
       <author><first>Joelle</first><last>Pineau</last></author>
       <author><first>Aaron</first><last>Courville</last></author>
@@ -589,7 +589,7 @@
     <paper id="48">
       <title>A Cognition Based Attention Model for Sentiment Analysis</title>
       <author><first>Yunfei</first><last>Long</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <author><first>Rong</first><last>Xiang</last></author>
       <author><first>Minglei</first><last>Li</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
@@ -623,7 +623,7 @@
     <paper id="51">
       <title>Identifying Humor in Reviews using Background Text Sources</title>
       <author><first>Alex</first><last>Morales</last></author>
-      <author><first>Chengxiang</first><last>Zhai</last></author>
+      <author id="chengxiang-zhai"><first>Chengxiang</first><last>Zhai</last></author>
       <pages>492–501</pages>
       <url hash="b525e393">D17-1051</url>
       <doi>10.18653/v1/D17-1051</doi>
@@ -675,7 +675,7 @@
     </paper>
     <paper id="56">
       <title>Refining Word Embeddings for Sentiment Analysis</title>
-      <author><first>Liang-Chih</first><last>Yu</last></author>
+      <author id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last></author>
       <author><first>Jin</first><last>Wang</last></author>
       <author><first>K. Robert</first><last>Lai</last></author>
       <author><first>Xuejie</first><last>Zhang</last></author>
@@ -691,7 +691,7 @@
       <author><first>Abhishek</first><last>Kumar</last></author>
       <author><first>Deepanway</first><last>Ghosal</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>540–546</pages>
       <url hash="d4749953">D17-1057</url>
       <doi>10.18653/v1/D17-1057</doi>
@@ -703,7 +703,7 @@
       <author><first>Raksha</first><last>Sharma</last></author>
       <author><first>Arpan</first><last>Somani</last></author>
       <author><first>Lakshya</first><last>Kumar</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>547–552</pages>
       <url hash="7119f261">D17-1058</url>
       <doi>10.18653/v1/D17-1058</doi>
@@ -713,7 +713,7 @@
     <paper id="59">
       <title>Sentiment Lexicon Expansion Based on Neural <fixed-case>PU</fixed-case> Learning, Double Dictionary Lookup, and Polarity Association</title>
       <author><first>Yasheng</first><last>Wang</last></author>
-      <author id="yang-zhang"><first>Yang</first><last>Zhang</last></author>
+      <author><first>Yang</first><last>Zhang</last></author>
       <author><first>Bing</first><last>Liu</last></author>
       <pages>553–563</pages>
       <url hash="148a230e">D17-1059</url>
@@ -764,7 +764,7 @@
       <title>Learning how to Active Learn: A Deep Reinforcement Learning Approach</title>
       <author><first>Meng</first><last>Fang</last></author>
       <author><first>Yuan</first><last>Li</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>595–605</pages>
       <url hash="4a60e58a">D17-1063</url>
       <doi>10.18653/v1/D17-1063</doi>
@@ -776,7 +776,7 @@
       <title>Split and Rephrase</title>
       <author><first>Shashi</first><last>Narayan</last></author>
       <author><first>Claire</first><last>Gardent</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <author><first>Anastasia</first><last>Shimorina</last></author>
       <pages>606–616</pages>
       <url hash="a1b30c7b">D17-1064</url>
@@ -788,9 +788,9 @@
     <paper id="65">
       <title>Neural Response Generation via <fixed-case>GAN</fixed-case> with an Approximate Embedding Layer</title>
       <author><first>Zhen</first><last>Xu</last></author>
-      <author><first>Bingquan</first><last>Liu</last></author>
+      <author id="bingquan-liu"><first>Bingquan</first><last>Liu</last></author>
       <author><first>Baoxun</first><last>Wang</last></author>
-      <author><first>Chengjie</first><last>Sun</last></author>
+      <author id="cheng-jie-sun"><first>Chengjie</first><last>Sun</last></author>
       <author><first>Xiaolong</first><last>Wang</last></author>
       <author><first>Zhuoran</first><last>Wang</last></author>
       <author><first>Chao</first><last>Qi</last></author>
@@ -962,8 +962,8 @@
       <author><first>Zhenting</first><last>Yu</last></author>
       <author><first>Yue</first><last>Zhang</last></author>
       <author><first>Shujian</first><last>Huang</last></author>
-      <author><first>Xinyu</first><last>Dai</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
+      <author id="xinyu-dai"><first>Xinyu</first><last>Dai</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
       <pages>760–766</pages>
       <url hash="0c297858">D17-1079</url>
       <doi>10.18653/v1/D17-1079</doi>
@@ -983,7 +983,7 @@
       <title>From Textbooks to Knowledge: A Case Study in Harvesting Axiomatic Knowledge from Textbooks to Solve Geometry Problems</title>
       <author><first>Mrinmaya</first><last>Sachan</last></author>
       <author><first>Kumar</first><last>Dubey</last></author>
-      <author><first>Eric</first><last>Xing</last></author>
+      <author id="eric-xing"><first>Eric</first><last>Xing</last></author>
       <pages>773–784</pages>
       <url hash="0f3330f2">D17-1081</url>
       <doi>10.18653/v1/D17-1081</doi>
@@ -996,7 +996,7 @@
       <author><first>Qizhe</first><last>Xie</last></author>
       <author><first>Hanxiao</first><last>Liu</last></author>
       <author><first>Yiming</first><last>Yang</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>785–794</pages>
       <url hash="d700ef56">D17-1082</url>
       <doi>10.18653/v1/D17-1082</doi>
@@ -1024,7 +1024,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
       <title>Learning Fine-Grained Expressions to Solve Math Word Problems</title>
       <author><first>Danqing</first><last>Huang</last></author>
       <author><first>Shuming</first><last>Shi</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <author><first>Jian</first><last>Yin</last></author>
       <pages>805–814</pages>
       <url hash="01aca850">D17-1084</url>
@@ -1038,7 +1038,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
       <author><first>Junjie</first><last>Hu</last></author>
       <author><first>Wei</first><last>Wei</last></author>
       <author><first>Zi</first><last>Yang</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <pages>815–824</pages>
       <url hash="9dcddde4">D17-1085</url>
       <doi>10.18653/v1/D17-1085</doi>
@@ -1050,7 +1050,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
       <author><first>Teng</first><last>Long</last></author>
       <author><first>Emmanuel</first><last>Bengio</last></author>
       <author><first>Ryan</first><last>Lowe</last></author>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <author><first>Doina</first><last>Precup</last></author>
       <pages>825–834</pages>
       <url hash="af762223">D17-1086</url>
@@ -1083,7 +1083,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
     </paper>
     <paper id="89">
       <title>Latent Space Embedding for Retrieval in Question-Answer Archives</title>
-      <author><first>Deepak</first><last>P</last></author>
+      <author id="deepak-p"><first>Deepak</first><last>P</last></author>
       <author><first>Dinesh</first><last>Garg</last></author>
       <author><first>Shirish</first><last>Shevade</last></author>
       <pages>855–865</pages>
@@ -1210,7 +1210,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
     </paper>
     <paper id="100">
       <title>Deriving continous grounded meaning representations from referentially structured multimodal contexts</title>
-      <author><first>Sina</first><last>Zarrieß</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
       <author><first>David</first><last>Schlangen</last></author>
       <pages>959–965</pages>
       <url hash="3c1a88ce">D17-1100</url>
@@ -1222,7 +1222,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
       <title>Hierarchically-Attentive <fixed-case>RNN</fixed-case> for Album Summarization and Storytelling</title>
       <author><first>Licheng</first><last>Yu</last></author>
       <author><first>Mohit</first><last>Bansal</last></author>
-      <author><first>Tamara</first><last>Berg</last></author>
+      <author id="tamara-berg"><first>Tamara</first><last>Berg</last></author>
       <pages>966–971</pages>
       <url hash="29b57454">D17-1101</url>
       <doi>10.18653/v1/D17-1101</doi>
@@ -1235,7 +1235,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
       <author><first>Cheng-Yang</first><last>Fu</last></author>
       <author><first>Joon</first><last>Lee</last></author>
       <author><first>Mohit</first><last>Bansal</last></author>
-      <author><first>Alexander</first><last>Berg</last></author>
+      <author id="alexander-berg"><first>Alexander</first><last>Berg</last></author>
       <pages>972–978</pages>
       <url hash="777cba37">D17-1102</url>
       <doi>10.18653/v1/D17-1102</doi>
@@ -1258,7 +1258,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
       <title>Evaluating Hierarchies of Verb Argument Structure with Hierarchical Clustering</title>
       <author><first>Jesse</first><last>Mu</last></author>
       <author><first>Joshua K.</first><last>Hartshorne</last></author>
-      <author><first>Timothy</first><last>O’Donnell</last></author>
+      <author id="timothy-odonnell"><first>Timothy</first><last>O’Donnell</last></author>
       <pages>986–991</pages>
       <url hash="ac27fd91">D17-1104</url>
       <doi>10.18653/v1/D17-1104</doi>
@@ -1278,7 +1278,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
     </paper>
     <paper id="106">
       <title>Mapping Instructions and Visual Observations to Actions with Reinforcement Learning</title>
-      <author><first>Dipendra</first><last>Misra</last></author>
+      <author id="dipendra-misra"><first>Dipendra</first><last>Misra</last></author>
       <author><first>John</first><last>Langford</last></author>
       <author><first>Yoav</first><last>Artzi</last></author>
       <pages>1004–1015</pages>
@@ -1290,7 +1290,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
     </paper>
     <paper id="107">
       <title>An analysis of eye-movements during reading for the detection of mild cognitive impairment</title>
-      <author><first>Kathleen C.</first><last>Fraser</last></author>
+      <author id="kathleen-c-fraser"><first>Kathleen C.</first><last>Fraser</last></author>
       <author><first>Kristina</first><last>Lundholm Fors</last></author>
       <author><first>Dimitrios</first><last>Kokkinakis</last></author>
       <author><first>Arto</first><last>Nordlund</last></author>
@@ -1317,7 +1317,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
       <author><first>Arun</first><last>Chaganty</last></author>
       <author><first>Ashwin</first><last>Paranjape</last></author>
       <author><first>Percy</first><last>Liang</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>1038–1048</pages>
       <url hash="40418b55">D17-1109</url>
       <doi>10.18653/v1/D17-1109</doi>
@@ -1341,7 +1341,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
     <paper id="111">
       <title>Globally Normalized Reader</title>
       <author><first>Jonathan</first><last>Raiman</last></author>
-      <author><first>John</first><last>Miller</last></author>
+      <author id="john-miller"><first>John</first><last>Miller</last></author>
       <pages>1059–1069</pages>
       <url hash="d2a93032">D17-1111</url>
       <doi>10.18653/v1/D17-1111</doi>
@@ -1379,7 +1379,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
       <author><first>Junnan</first><last>Zhu</last></author>
       <author><first>Cong</first><last>Ma</last></author>
       <author><first>Jiajun</first><last>Zhang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>1092–1102</pages>
       <url hash="eed7fb9d">D17-1114</url>
       <doi>10.18653/v1/D17-1114</doi>
@@ -1446,7 +1446,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
       <author><first>Youngseo</first><last>Son</last></author>
       <author><first>Vivek</first><last>Kulkarni</last></author>
       <author><first>Niranjan</first><last>Balasubramanian</last></author>
-      <author><first>H. Andrew</first><last>Schwartz</last></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last></author>
       <pages>1146–1155</pages>
       <url hash="687aada8">D17-1119</url>
       <doi>10.18653/v1/D17-1119</doi>
@@ -1481,7 +1481,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
       <title>Inter-Weighted Alignment Network for Sentence Pair Modeling</title>
       <author><first>Gehui</first><last>Shen</last></author>
       <author><first>Yunlun</first><last>Yang</last></author>
-      <author><first>Zhi-Hong</first><last>Deng</last></author>
+      <author id="zhi-hong-deng"><first>Zhi-Hong</first><last>Deng</last></author>
       <pages>1179–1189</pages>
       <url hash="bc12eb4e">D17-1122</url>
       <doi>10.18653/v1/D17-1122</doi>
@@ -1504,7 +1504,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
       <author><first>Pengfei</first><last>Liu</last></author>
       <author><first>Kaiyu</first><last>Qian</last></author>
       <author><first>Xipeng</first><last>Qiu</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>1204–1213</pages>
       <url hash="91f0111b">D17-1124</url>
       <doi>10.18653/v1/D17-1124</doi>
@@ -1548,7 +1548,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
     <paper id="128">
       <title>A Joint Sequential and Relational Model for Frame-Semantic Parsing</title>
       <author><first>Bishan</first><last>Yang</last></author>
-      <author><first>Tom</first><last>Mitchell</last></author>
+      <author id="tom-mitchell"><first>Tom</first><last>Mitchell</last></author>
       <pages>1247–1256</pages>
       <url hash="9c67e032">D17-1128</url>
       <doi>10.18653/v1/D17-1128</doi>
@@ -1568,7 +1568,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
     <paper id="130">
       <title><fixed-case>AMR</fixed-case> Parsing using Stack-<fixed-case>LSTM</fixed-case>s</title>
       <author><first>Miguel</first><last>Ballesteros</last></author>
-      <author><first>Yaser</first><last>Al-Onaizan</last></author>
+      <author id="yaser-al-onaizan"><first>Yaser</first><last>Al-Onaizan</last></author>
       <pages>1269–1275</pages>
       <url hash="62b6e9b8">D17-1130</url>
       <doi>10.18653/v1/D17-1130</doi>
@@ -1609,10 +1609,10 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
     </paper>
     <paper id="134">
       <title>Multi-task Attention-based Neural Networks for Implicit Discourse Relationship Representation and Identification</title>
-      <author><first>Man</first><last>Lan</last></author>
-      <author><first>Jianxiang</first><last>Wang</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
+      <author id="jianxiang-wang"><first>Jianxiang</first><last>Wang</last></author>
       <author><first>Yuanbin</first><last>Wu</last></author>
-      <author><first>Zheng-Yu</first><last>Niu</last></author>
+      <author id="zheng-yu-niu"><first>Zheng-Yu</first><last>Niu</last></author>
       <author><first>Haifeng</first><last>Wang</last></author>
       <pages>1299–1308</pages>
       <url hash="26ee37d3">D17-1134</url>
@@ -1624,7 +1624,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
       <title><fixed-case>C</fixed-case>hinese Zero Pronoun Resolution with Deep Memory Network</title>
       <author><first>Qingyu</first><last>Yin</last></author>
       <author><first>Yu</first><last>Zhang</last></author>
-      <author><first>Weinan</first><last>Zhang</last></author>
+      <author id="weinan-zhang"><first>Weinan</first><last>Zhang</last></author>
       <author><first>Ting</first><last>Liu</last></author>
       <pages>1309–1318</pages>
       <url hash="7e6e2148">D17-1135</url>
@@ -1636,7 +1636,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
       <title>How much progress have we made on <fixed-case>RST</fixed-case> discourse parsing? A replication study of recent results on the <fixed-case>RST</fixed-case>-<fixed-case>DT</fixed-case></title>
       <author><first>Mathieu</first><last>Morey</last></author>
       <author><first>Philippe</first><last>Muller</last></author>
-      <author><first>Nicholas</first><last>Asher</last></author>
+      <author id="nicholas-asher"><first>Nicholas</first><last>Asher</last></author>
       <pages>1319–1324</pages>
       <url hash="6eceaa47">D17-1136</url>
       <doi>10.18653/v1/D17-1136</doi>
@@ -1657,7 +1657,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
     <paper id="138">
       <title>Revisiting Selectional Preferences for Coreference Resolution</title>
       <author><first>Benjamin</first><last>Heinzerling</last></author>
-      <author><first>Nafise Sadat</first><last>Moosavi</last></author>
+      <author id="nafise-sadat-moosavi"><first>Nafise Sadat</first><last>Moosavi</last></author>
       <author><first>Michael</first><last>Strube</last></author>
       <pages>1332–1339</pages>
       <url hash="b8b21ece">D17-1138</url>
@@ -1669,7 +1669,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
       <title>Learning to Rank Semantic Coherence for Topic Segmentation</title>
       <author><first>Liang</first><last>Wang</last></author>
       <author><first>Sujian</first><last>Li</last></author>
-      <author><first>Yajuan</first><last>Lv</last></author>
+      <author id="yajuan-lu"><first>Yajuan</first><last>Lv</last></author>
       <author><first>Houfeng</first><last>Wang</last></author>
       <pages>1340–1344</pages>
       <url hash="d7d27652">D17-1139</url>
@@ -1692,7 +1692,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
     </paper>
     <paper id="141">
       <title>Patterns of Argumentation Strategies across Topics</title>
-      <author><first>Khalid</first><last>Al-Khatib</last></author>
+      <author id="khalid-al-khatib"><first>Khalid</first><last>Al-Khatib</last></author>
       <author><first>Henning</first><last>Wachsmuth</last></author>
       <author><first>Matthias</first><last>Hagen</last></author>
       <author><first>Benno</first><last>Stein</last></author>
@@ -1744,7 +1744,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
       <author><first>Matthias</first><last>Sperber</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>1380–1389</pages>
       <url hash="eaeee3cd">D17-1145</url>
       <doi>10.18653/v1/D17-1145</doi>
@@ -1792,7 +1792,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
       <title>Translating Phrases in Neural Machine Translation</title>
       <author><first>Xing</first><last>Wang</last></author>
       <author><first>Zhaopeng</first><last>Tu</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Min</first><last>Zhang</last></author>
       <pages>1421–1431</pages>
       <url hash="b8d1226e">D17-1149</url>
@@ -1817,8 +1817,8 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
       <title>Massive Exploration of Neural Machine Translation Architectures</title>
       <author><first>Denny</first><last>Britz</last></author>
       <author><first>Anna</first><last>Goldie</last></author>
-      <author><first>Minh-Thang</first><last>Luong</last></author>
-      <author><first>Quoc</first><last>Le</last></author>
+      <author id="minh-thang-luong"><first>Minh-Thang</first><last>Luong</last></author>
+      <author id="quoc-le"><first>Quoc</first><last>Le</last></author>
       <pages>1442–1451</pages>
       <url hash="8401dbaf">D17-1151</url>
       <doi>10.18653/v1/D17-1151</doi>
@@ -1827,7 +1827,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
     </paper>
     <paper id="152">
       <title>Learning Translations via Matrix Completion</title>
-      <author><first>Derry Tanti</first><last>Wijaya</last></author>
+      <author id="derry-tanti-wijaya"><first>Derry Tanti</first><last>Wijaya</last></author>
       <author><first>Brendan</first><last>Callahan</last></author>
       <author><first>John</first><last>Hewitt</last></author>
       <author><first>Jie</first><last>Gao</last></author>
@@ -1843,7 +1843,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
     <paper id="153">
       <title>Reinforcement Learning for Bandit Neural Machine Translation with Simulated Human Feedback</title>
       <author><first>Khanh</first><last>Nguyen</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <author><first>Jordan</first><last>Boyd-Graber</last></author>
       <pages>1464–1474</pages>
       <url hash="bb80ddbe">D17-1153</url>
@@ -1871,7 +1871,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
       <author><first>Masao</first><last>Utiyama</last></author>
       <author><first>Lemao</first><last>Liu</last></author>
       <author><first>Kehai</first><last>Chen</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>1482–1488</pages>
       <url hash="50e05506">D17-1155</url>
       <doi>10.18653/v1/D17-1155</doi>
@@ -1880,7 +1880,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
     </paper>
     <paper id="156">
       <title>Regularization techniques for fine-tuning in neural machine translation</title>
-      <author><first>Antonio Valerio</first><last>Miceli Barone</last></author>
+      <author id="antonio-valerio-miceli-barone"><first>Antonio Valerio</first><last>Miceli Barone</last></author>
       <author><first>Barry</first><last>Haddow</last></author>
       <author><first>Ulrich</first><last>Germann</last></author>
       <author><first>Rico</first><last>Sennrich</last></author>
@@ -1893,7 +1893,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
     <paper id="157">
       <title>Source-Side Left-to-Right or Target-Side Left-to-Right? An Empirical Comparison of Two Phrase-Based Decoding Algorithms</title>
       <author><first>Yin-Wen</first><last>Chang</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <pages>1495–1499</pages>
       <url hash="e7dc2c67">D17-1157</url>
       <doi>10.18653/v1/D17-1157</doi>
@@ -1938,7 +1938,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
       <title>Joint Concept Learning and Semantic Parsing from Natural Language Explanations</title>
       <author><first>Shashank</first><last>Srivastava</last></author>
       <author><first>Igor</first><last>Labutov</last></author>
-      <author><first>Tom</first><last>Mitchell</last></author>
+      <author id="tom-mitchell"><first>Tom</first><last>Mitchell</last></author>
       <pages>1527–1536</pages>
       <url hash="6f44042d">D17-1161</url>
       <doi>10.18653/v1/D17-1161</doi>
@@ -2000,7 +2000,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
       <title>“i have a feeling trump will win..................”: Forecasting Winners and Losers from User Predictions on <fixed-case>T</fixed-case>witter</title>
       <author><first>Sandesh</first><last>Swamy</last></author>
       <author><first>Alan</first><last>Ritter</last></author>
-      <author><first>Marie-Catherine</first><last>de Marneffe</last></author>
+      <author id="marie-catherine-de-marneffe"><first>Marie-Catherine</first><last>de Marneffe</last></author>
       <pages>1583–1592</pages>
       <url hash="cf55823e">D17-1166</url>
       <doi>10.18653/v1/D17-1166</doi>
@@ -2014,7 +2014,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
       <author><first>Jiannan</first><last>Hu</last></author>
       <author><first>Yulan</first><last>He</last></author>
       <author><first>Ruifeng</first><last>Xu</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <author><first>Jiachen</first><last>Du</last></author>
       <pages>1593–1602</pages>
       <url hash="f3cb9d1b">D17-1167</url>
@@ -2039,7 +2039,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
       <title>Using millions of emoji occurrences to learn any-domain representations for detecting sentiment, emotion and sarcasm</title>
       <author><first>Bjarke</first><last>Felbo</last></author>
       <author><first>Alan</first><last>Mislove</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <author><first>Iyad</first><last>Rahwan</last></author>
       <author><first>Sune</first><last>Lehmann</last></author>
       <pages>1615–1625</pages>
@@ -2075,7 +2075,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
     <paper id="172">
       <title>Efficient Discontinuous Phrase-Structure Parsing via the Generalized Maximum Spanning Arborescence</title>
       <author><first>Caio</first><last>Corro</last></author>
-      <author><first>Joseph</first><last>Le Roux</last></author>
+      <author id="joseph-le-roux"><first>Joseph</first><last>Le Roux</last></author>
       <author><first>Mathieu</first><last>Lacroix</last></author>
       <pages>1644–1654</pages>
       <url hash="f1f1935b">D17-1172</url>
@@ -2166,8 +2166,8 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
       <title><fixed-case>TAG</fixed-case> Parsing with Neural Networks and Vector Representations of Supertags</title>
       <author><first>Jungo</first><last>Kasai</last></author>
       <author><first>Robert</first><last>Frank</last></author>
-      <author><first>R. Thomas</first><last>McCoy</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="r-thomas-mccoy"><first>R. Thomas</first><last>McCoy</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <author><first>Alexis</first><last>Nasr</last></author>
       <pages>1712–1722</pages>
       <url hash="187ef873">D17-1180</url>
@@ -2178,7 +2178,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
     <paper id="181">
       <title>Global Normalization of Convolutional Neural Networks for Joint Entity and Relation Classification</title>
       <author><first>Heike</first><last>Adel</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>1723–1729</pages>
       <url hash="bf65a4aa">D17-1181</url>
       <doi>10.18653/v1/D17-1181</doi>
@@ -2199,7 +2199,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
     <paper id="183">
       <title><fixed-case>KGE</fixed-case>val: Accuracy Estimation of Automatically Constructed Knowledge Graphs</title>
       <author><first>Prakhar</first><last>Ojha</last></author>
-      <author><first>Partha</first><last>Talukdar</last></author>
+      <author id="partha-talukdar"><first>Partha</first><last>Talukdar</last></author>
       <pages>1741–1750</pages>
       <url hash="8d7af69a">D17-1183</url>
       <doi>10.18653/v1/D17-1183</doi>
@@ -2221,7 +2221,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
     <paper id="185">
       <title>Dual Tensor Model for Detecting Asymmetric Lexico-Semantic Relations</title>
       <author><first>Goran</first><last>Glavaš</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <pages>1757–1767</pages>
       <url hash="cc987191">D17-1185</url>
       <doi>10.18653/v1/D17-1185</doi>
@@ -2266,7 +2266,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
       <title>A Soft-label Method for Noise-tolerant Distantly Supervised Relation Extraction</title>
       <author><first>Tianyu</first><last>Liu</last></author>
       <author><first>Kexiang</first><last>Wang</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <author><first>Zhifang</first><last>Sui</last></author>
       <pages>1790–1795</pages>
       <url hash="68a32110">D17-1189</url>
@@ -2276,7 +2276,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
     </paper>
     <paper id="190">
       <title>A Sequential Model for Classifying Temporal Relations between Intra-Sentence Events</title>
-      <author><first>Prafulla Kumar</first><last>Choubey</last></author>
+      <author id="prafulla-kumar-choubey"><first>Prafulla Kumar</first><last>Choubey</last></author>
       <author><first>Ruihong</first><last>Huang</last></author>
       <pages>1796–1802</pages>
       <url hash="805113de">D17-1190</url>
@@ -2321,7 +2321,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
       <title>Temporal dynamics of semantic relations in word embeddings: an application to predicting armed conflict participants</title>
       <author><first>Andrey</first><last>Kutuzov</last></author>
       <author><first>Erik</first><last>Velldal</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <pages>1824–1829</pages>
       <url hash="78cf3941">D17-1194</url>
       <doi>10.18653/v1/D17-1194</doi>
@@ -2334,7 +2334,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
       <author><first>Chenhao</first><last>Tan</last></author>
       <author><first>Sebastian</first><last>Martschat</last></author>
       <author><first>Yejin</first><last>Choi</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>1830–1839</pages>
       <url hash="db765a4f">D17-1195</url>
       <doi>10.18653/v1/D17-1195</doi>
@@ -2344,7 +2344,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
     <paper id="196">
       <title>Towards Quantum Language Models</title>
       <author><first>Ivano</first><last>Basile</last></author>
-      <author><first>Fabio</first><last>Tamburini</last></author>
+      <author id="fabio-tamburini"><first>Fabio</first><last>Tamburini</last></author>
       <pages>1840–1849</pages>
       <url hash="71f5c338">D17-1196</url>
       <doi>10.18653/v1/D17-1196</doi>
@@ -2355,8 +2355,8 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
     <paper id="197">
       <title>Reference-Aware Language Models</title>
       <author><first>Zichao</first><last>Yang</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <author><first>Wang</first><last>Ling</last></author>
       <pages>1850–1859</pages>
       <url hash="eae1fceb">D17-1197</url>
@@ -2381,7 +2381,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
       <author><first>Zhenisbek</first><last>Assylbekov</last></author>
       <author><first>Rustem</first><last>Takhanov</last></author>
       <author><first>Bagdat</first><last>Myrzakhmetov</last></author>
-      <author><first>Jonathan N.</first><last>Washington</last></author>
+      <author id="jonathan-washington"><first>Jonathan N.</first><last>Washington</last></author>
       <pages>1866–1872</pages>
       <url hash="48d92aff">D17-1199</url>
       <doi>10.18653/v1/D17-1199</doi>
@@ -2440,7 +2440,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
     <paper id="204">
       <title>Finding Patterns in Noisy Crowds: Regression-based Annotation Aggregation for Crowdsourced Data</title>
       <author><first>Natalie</first><last>Parde</last></author>
-      <author><first>Rodney</first><last>Nielsen</last></author>
+      <author id="rodney-nielsen"><first>Rodney</first><last>Nielsen</last></author>
       <pages>1907–1912</pages>
       <url hash="5e151bd9">D17-1204</url>
       <doi>10.18653/v1/D17-1204</doi>
@@ -2504,7 +2504,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
       <author><first>Ivan</first><last>Titov</last></author>
       <author><first>Wilker</first><last>Aziz</last></author>
       <author><first>Diego</first><last>Marcheggiani</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <pages>1957–1967</pages>
       <url hash="5304424a">D17-1209</url>
       <doi>10.18653/v1/D17-1209</doi>
@@ -2515,7 +2515,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
       <title>Trainable Greedy Decoding for Neural Machine Translation</title>
       <author><first>Jiatao</first><last>Gu</last></author>
       <author><first>Kyunghyun</first><last>Cho</last></author>
-      <author><first>Victor O.K.</first><last>Li</last></author>
+      <author id="victor-o-k-li"><first>Victor O.K.</first><last>Li</last></author>
       <pages>1968–1978</pages>
       <url hash="0350ca50">D17-1210</url>
       <doi>10.18653/v1/D17-1210</doi>
@@ -2552,7 +2552,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
       <author><first>Diyi</first><last>Yang</last></author>
       <author><first>Aaron</first><last>Halfaker</last></author>
       <author><first>Robert</first><last>Kraut</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>2000–2010</pages>
       <url hash="b88901f8">D17-1213</url>
       <doi>10.18653/v1/D17-1213</doi>
@@ -2626,7 +2626,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
     <paper id="219">
       <title>Identifying Where to Focus in Reading Comprehension for Neural Question Generation</title>
       <author><first>Xinya</first><last>Du</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>2067–2073</pages>
       <url hash="10705c18">D17-1219</url>
       <doi>10.18653/v1/D17-1219</doi>
@@ -2717,7 +2717,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
     </paper>
     <paper id="226">
       <title>Event Coreference Resolution by Iteratively Unfolding Inter-dependencies among Events</title>
-      <author><first>Prafulla Kumar</first><last>Choubey</last></author>
+      <author id="prafulla-kumar-choubey"><first>Prafulla Kumar</first><last>Choubey</last></author>
       <author><first>Ruihong</first><last>Huang</last></author>
       <pages>2124–2133</pages>
       <url hash="0e6ec874">D17-1226</url>
@@ -2741,7 +2741,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
       <author><first>Di</first><last>Wang</last></author>
       <author><first>Nebojsa</first><last>Jojic</last></author>
       <author><first>Chris</first><last>Brockett</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <pages>2140–2150</pages>
       <url hash="d0770995">D17-1228</url>
       <doi>10.18653/v1/D17-1228</doi>
@@ -2751,9 +2751,9 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
     </paper>
     <paper id="229">
       <title>Preserving Distributional Information in Dialogue Act Classification</title>
-      <author><first>Quan Hung</first><last>Tran</last></author>
+      <author id="quan-hung-tran"><first>Quan Hung</first><last>Tran</last></author>
       <author><first>Ingrid</first><last>Zukerman</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>2151–2156</pages>
       <url hash="71a2b4de">D17-1229</url>
       <doi>10.18653/v1/D17-1229</doi>
@@ -2767,7 +2767,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
       <author><first>Tianlin</first><last>Shi</last></author>
       <author><first>Sébastien</first><last>Jean</last></author>
       <author><first>Alan</first><last>Ritter</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <pages>2157–2169</pages>
       <url hash="808f1fe8">D17-1230</url>
       <doi>10.18653/v1/D17-1230</doi>
@@ -2791,7 +2791,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
       <author><first>Yohan</first><last>Jo</last></author>
       <author><first>Michael</first><last>Yoder</last></author>
       <author><first>Hyeju</first><last>Jang</last></author>
-      <author><first>Carolyn</first><last>Rosé</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rosé</last></author>
       <pages>2179–2189</pages>
       <url hash="a37d555c">D17-1232</url>
       <doi>10.18653/v1/D17-1232</doi>
@@ -2858,8 +2858,8 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
       <author><first>Lihong</first><last>Li</last></author>
       <author><first>Jianfeng</first><last>Gao</last></author>
       <author><first>Asli</first><last>Celikyilmaz</last></author>
-      <author><first>Sungjin</first><last>Lee</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="sungjin-lee"><first>Sungjin</first><last>Lee</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <pages>2231–2240</pages>
       <url hash="582dee6e">D17-1237</url>
       <doi>10.18653/v1/D17-1237</doi>
@@ -2883,8 +2883,8 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
     <paper id="239">
       <title>Challenges in Data-to-Document Generation</title>
       <author><first>Sam</first><last>Wiseman</last></author>
-      <author><first>Stuart</first><last>Shieber</last></author>
-      <author><first>Alexander</first><last>Rush</last></author>
+      <author id="stuart-m-shieber"><first>Stuart</first><last>Shieber</last></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last></author>
       <pages>2253–2263</pages>
       <url hash="07257479">D17-1239</url>
       <doi>10.18653/v1/D17-1239</doi>
@@ -2923,7 +2923,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
       <title>Demographic-aware word associations</title>
       <author><first>Aparna</first><last>Garimella</last></author>
       <author><first>Carmen</first><last>Banea</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>2285–2295</pages>
       <url hash="01cd3db0">D17-1242</url>
       <doi>10.18653/v1/D17-1242</doi>
@@ -2934,7 +2934,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
       <title>A Factored Neural Network Model for Characterizing Online Discussions in Vector Space</title>
       <author><first>Hao</first><last>Cheng</last></author>
       <author><first>Hao</first><last>Fang</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
       <pages>2296–2306</pages>
       <url hash="54404d9d">D17-1243</url>
       <doi>10.18653/v1/D17-1243</doi>
@@ -2967,7 +2967,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
       <author><first>Tatsuya</first><last>Aoki</last></author>
       <author><first>Ryohei</first><last>Sasano</last></author>
       <author><first>Hiroya</first><last>Takamura</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>2323–2328</pages>
       <url hash="d55519bd">D17-1246</url>
       <doi>10.18653/v1/D17-1246</doi>
@@ -2990,9 +2990,9 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
     </paper>
     <paper id="248">
       <title>Controlling Human Perception of Basic User Traits</title>
-      <author><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
-      <author><first>Sharath</first><last>Chandra Guntuku</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="daniel-preotiuc-pietro"><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
+      <author id="sharath-chandra-guntuku"><first>Sharath</first><last>Chandra Guntuku</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <pages>2335–2341</pages>
       <url hash="d56efd18">D17-1248</url>
       <doi>10.18653/v1/D17-1248</doi>
@@ -3005,7 +3005,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
       <author><first>Clément</first><last>Gautrais</last></author>
       <author><first>Peggy</first><last>Cellier</last></author>
       <author><first>René</first><last>Quiniou</last></author>
-      <author><first>Alexandre</first><last>Termier</last></author>
+      <author id="alexandre-termier"><first>Alexandre</first><last>Termier</last></author>
       <pages>2342–2347</pages>
       <url hash="ee786dfb">D17-1249</url>
       <doi>10.18653/v1/D17-1249</doi>
@@ -3014,12 +3014,12 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
     </paper>
     <paper id="250">
       <title>Assessing Objective Recommendation Quality through Political Forecasting</title>
-      <author><first>H. Andrew</first><last>Schwartz</last></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last></author>
       <author><first>Masoud</first><last>Rouhizadeh</last></author>
       <author><first>Michael</first><last>Bishop</last></author>
       <author><first>Philip</first><last>Tetlock</last></author>
       <author><first>Barbara</first><last>Mellers</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <pages>2348–2357</pages>
       <url hash="18a39d74">D17-1250</url>
       <doi>10.18653/v1/D17-1250</doi>
@@ -3041,7 +3041,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
       <title>Maximum Margin Reward Networks for Learning from Explicit and Implicit Supervision</title>
       <author><first>Haoruo</first><last>Peng</last></author>
       <author><first>Ming-Wei</first><last>Chang</last></author>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <pages>2368–2378</pages>
       <url hash="cd651c13">D17-1252</url>
       <doi>10.18653/v1/D17-1252</doi>
@@ -3080,7 +3080,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
     <paper id="255">
       <title>Repeat before Forgetting: Spaced Repetition for Efficient and Effective Training of Neural Networks</title>
       <author><first>Hadi</first><last>Amiri</last></author>
-      <author><first>Timothy</first><last>Miller</last></author>
+      <author id="timothy-miller"><first>Timothy</first><last>Miller</last></author>
       <author><first>Guergana</first><last>Savova</last></author>
       <pages>2401–2410</pages>
       <url hash="db50717e">D17-1255</url>
@@ -3095,7 +3095,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
       <author><first>Qi</first><last>Zhang</last></author>
       <author><first>Haoran</first><last>Huang</last></author>
       <author><first>Minlong</first><last>Peng</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>2411–2420</pages>
       <url hash="82f7b344">D17-1256</url>
       <doi>10.18653/v1/D17-1256</doi>
@@ -3122,7 +3122,7 @@ and the code is available at <url>https://github.com/qizhex/RACE_AR_baselines</u
       <title>Does syntax help discourse segmentation? Not so much</title>
       <author><first>Chloé</first><last>Braud</last></author>
       <author><first>Ophélie</first><last>Lacroix</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>2432–2442</pages>
       <url hash="19465f59">D17-1258</url>
       <doi>10.18653/v1/D17-1258</doi>
@@ -3176,7 +3176,7 @@ and <i>efficiency</i> of on-line policy optimization compared to other companion
       <title>Further Investigation into Reference Bias in Monolingual Evaluation of Machine Translation</title>
       <author><first>Qingsong</first><last>Ma</last></author>
       <author><first>Yvette</first><last>Graham</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Qun</first><last>Liu</last></author>
       <pages>2476–2485</pages>
       <url hash="f6cdaaeb">D17-1262</url>
@@ -3198,7 +3198,7 @@ and <i>efficiency</i> of on-line policy optimization compared to other companion
     </paper>
     <paper id="264">
       <title>Knowledge Distillation for Bilingual Dictionary Induction</title>
-      <author><first>Ndapandula</first><last>Nakashole</last></author>
+      <author id="ndapandula-nakashole"><first>Ndapandula</first><last>Nakashole</last></author>
       <author><first>Raphael</first><last>Flauger</last></author>
       <pages>2497–2506</pages>
       <url hash="f924fbb3">D17-1264</url>
@@ -3283,7 +3283,7 @@ and <i>efficiency</i> of on-line policy optimization compared to other companion
     <paper id="272">
       <title>Counterfactual Learning from Bandit Feedback under Deterministic Logging : A Case Study in Statistical Machine Translation</title>
       <author><first>Carolin</first><last>Lawrence</last></author>
-      <author><first>Artem</first><last>Sokolov</last></author>
+      <author id="artem-sokolov"><first>Artem</first><last>Sokolov</last></author>
       <author><first>Stefan</first><last>Riezler</last></author>
       <pages>2566–2576</pages>
       <url hash="1f6e6226">D17-1272</url>
@@ -3306,9 +3306,9 @@ and <i>efficiency</i> of on-line policy optimization compared to other companion
     <paper id="274">
       <title>Improving Slot Filling Performance with Attentive Neural Networks on Dependency Structures</title>
       <author><first>Lifu</first><last>Huang</last></author>
-      <author><first>Avirup</first><last>Sil</last></author>
+      <author id="avirup-sil"><first>Avirup</first><last>Sil</last></author>
       <author><first>Heng</first><last>Ji</last></author>
-      <author><first>Radu</first><last>Florian</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
       <pages>2588–2597</pages>
       <url hash="8dac77f1">D17-1274</url>
       <doi>10.18653/v1/D17-1274</doi>
@@ -3368,7 +3368,7 @@ and <i>efficiency</i> of on-line policy optimization compared to other companion
     <paper id="279">
       <title>Scientific Information Extraction with Semi-supervised Neural Tagging</title>
       <author><first>Yi</first><last>Luan</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last></author>
       <pages>2641–2651</pages>
       <url hash="7e281770">D17-1279</url>
@@ -3393,7 +3393,7 @@ and <i>efficiency</i> of on-line policy optimization compared to other companion
       <title>Speeding up Reinforcement Learning-based Information Extraction Training using Asynchronous Methods</title>
       <author><first>Aditya</first><last>Sharma</last></author>
       <author><first>Zarana</first><last>Parekh</last></author>
-      <author><first>Partha</first><last>Talukdar</last></author>
+      <author id="partha-talukdar"><first>Partha</first><last>Talukdar</last></author>
       <pages>2658–2663</pages>
       <url hash="b7217e06">D17-1281</url>
       <doi>10.18653/v1/D17-1281</doi>
@@ -3407,7 +3407,7 @@ and <i>efficiency</i> of on-line policy optimization compared to other companion
       <author><first>Ruo-Ping</first><last>Dong</last></author>
       <author><first>Yu-Siang</first><last>Wang</last></author>
       <author><first>Ju-Chieh</first><last>Chou</last></author>
-      <author><first>Wei-Yun</first><last>Ma</last></author>
+      <author id="wei-yun-ma"><first>Wei-Yun</first><last>Ma</last></author>
       <pages>2664–2669</pages>
       <url hash="cdaab090">D17-1282</url>
       <doi>10.18653/v1/D17-1282</doi>
@@ -3444,7 +3444,7 @@ and <i>efficiency</i> of on-line policy optimization compared to other companion
       <author><first>Kris</first><last>Ganjam</last></author>
       <author><first>Navendu</first><last>Jain</last></author>
       <author><first>Jessica</first><last>Lundin</last></author>
-      <author><first>Ryen</first><last>White</last></author>
+      <author id="ryen-white"><first>Ryen</first><last>White</last></author>
       <author><first>Jimmy</first><last>Lin</last></author>
       <pages>2691–2701</pages>
       <url hash="7e4aefd2">D17-1285</url>
@@ -3454,7 +3454,7 @@ and <i>efficiency</i> of on-line policy optimization compared to other companion
     </paper>
     <paper id="286">
       <title>Word Etymology as Native Language Interference</title>
-      <author><first>Vivi</first><last>Nastase</last></author>
+      <author id="vivi-nastase"><first>Vivi</first><last>Nastase</last></author>
       <author><first>Carlo</first><last>Strapparava</last></author>
       <pages>2702–2707</pages>
       <url hash="0d166e3a">D17-1286</url>
@@ -3465,7 +3465,7 @@ and <i>efficiency</i> of on-line policy optimization compared to other companion
     <paper id="287">
       <title>A Simpler and More Generalizable Story Detector using Verb and Character Features</title>
       <author><first>Joshua</first><last>Eisenberg</last></author>
-      <author><first>Mark</first><last>Finlayson</last></author>
+      <author id="mark-finlayson"><first>Mark</first><last>Finlayson</last></author>
       <pages>2708–2715</pages>
       <url hash="cb437665">D17-1287</url>
       <doi>10.18653/v1/D17-1287</doi>
@@ -3525,7 +3525,7 @@ and <i>efficiency</i> of on-line policy optimization compared to other companion
       <author><first>Varun</first><last>Gangal</last></author>
       <author><first>Ang</first><last>Lu</last></author>
       <author><first>Zheng</first><last>Chen</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>2758–2767</pages>
       <url hash="66bc6001">D17-1292</url>
       <doi>10.18653/v1/D17-1292</doi>
@@ -3547,7 +3547,7 @@ and <i>efficiency</i> of on-line policy optimization compared to other companion
     </paper>
     <paper id="294">
       <title>Identifying the Provision of Choices in Privacy Policy Text</title>
-      <author><first>Kanthashree</first><last>Mysore Sathyendra</last></author>
+      <author id="kanthashree-mysore-sathyendra"><first>Kanthashree</first><last>Mysore Sathyendra</last></author>
       <author><first>Shomir</first><last>Wilson</last></author>
       <author><first>Florian</first><last>Schaub</last></author>
       <author><first>Sebastian</first><last>Zimmeck</last></author>
@@ -3588,7 +3588,7 @@ and <i>efficiency</i> of on-line policy optimization compared to other companion
       <title>Neural Sequence-Labelling Models for Grammatical Error Correction</title>
       <author><first>Helen</first><last>Yannakoudakis</last></author>
       <author><first>Marek</first><last>Rei</last></author>
-      <author><first>Øistein E.</first><last>Andersen</last></author>
+      <author id="oistein-e-andersen"><first>Øistein E.</first><last>Andersen</last></author>
       <author id="zheng-yuan-cambridge"><first>Zheng</first><last>Yuan</last></author>
       <pages>2795–2806</pages>
       <url hash="b529d5a7">D17-1297</url>
@@ -3600,8 +3600,8 @@ and <i>efficiency</i> of on-line policy optimization compared to other companion
       <title>Adapting Sequence Models for Sentence Correction</title>
       <author><first>Allen</first><last>Schmaltz</last></author>
       <author><first>Yoon</first><last>Kim</last></author>
-      <author><first>Alexander</first><last>Rush</last></author>
-      <author><first>Stuart</first><last>Shieber</last></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last></author>
+      <author id="stuart-m-shieber"><first>Stuart</first><last>Shieber</last></author>
       <pages>2807–2813</pages>
       <url hash="9778d1d5">D17-1298</url>
       <doi>10.18653/v1/D17-1298</doi>
@@ -3612,7 +3612,7 @@ and <i>efficiency</i> of on-line policy optimization compared to other companion
     <paper id="299">
       <title>A Study of Style in Machine Translation: Controlling the Formality of Machine Translation Output</title>
       <author><first>Xing</first><last>Niu</last></author>
-      <author><first>Marianna</first><last>Martindale</last></author>
+      <author id="marianna-martindale"><first>Marianna</first><last>Martindale</last></author>
       <author><first>Marine</first><last>Carpuat</last></author>
       <pages>2814–2819</pages>
       <url hash="5e4dbe7f">D17-1299</url>
@@ -3649,8 +3649,8 @@ and <i>efficiency</i> of on-line policy optimization compared to other companion
       <title>Cross-Lingual Transfer Learning for <fixed-case>POS</fixed-case> Tagging without Cross-Lingual Resources</title>
       <author><first>Joo-Kyung</first><last>Kim</last></author>
       <author><first>Young-Bum</first><last>Kim</last></author>
-      <author><first>Ruhi</first><last>Sarikaya</last></author>
-      <author><first>Eric</first><last>Fosler-Lussier</last></author>
+      <author id="ruhi-sarikaya"><first>Ruhi</first><last>Sarikaya</last></author>
+      <author id="eric-fosler-lussier"><first>Eric</first><last>Fosler-Lussier</last></author>
       <pages>2832–2838</pages>
       <url hash="9a5b9637">D17-1302</url>
       <doi>10.18653/v1/D17-1302</doi>
@@ -3678,8 +3678,8 @@ and <i>efficiency</i> of on-line policy optimization compared to other companion
       <author><first>Masao</first><last>Utiyama</last></author>
       <author><first>Lemao</first><last>Liu</last></author>
       <author><first>Akihiro</first><last>Tamura</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <pages>2846–2852</pages>
       <url hash="ad81e73e">D17-1304</url>
       <doi>10.18653/v1/D17-1304</doi>
@@ -3702,8 +3702,8 @@ and <i>efficiency</i> of on-line policy optimization compared to other companion
     <paper id="306">
       <title>Sequence Effects in Crowdsourced Annotations</title>
       <author><first>Nitika</first><last>Mathur</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>2860–2865</pages>
       <url hash="adb4e529">D17-1306</url>
       <doi>10.18653/v1/D17-1306</doi>
@@ -3713,7 +3713,7 @@ and <i>efficiency</i> of on-line policy optimization compared to other companion
     </paper>
     <paper id="307">
       <title>No Need to Pay Attention: Simple Recurrent Neural Networks Work!</title>
-      <author><first>Ferhan</first><last>Ture</last></author>
+      <author id="ferhan-ture"><first>Ferhan</first><last>Ture</last></author>
       <author><first>Oliver</first><last>Jojic</last></author>
       <pages>2866–2872</pages>
       <url hash="7919429f">D17-1307</url>
@@ -3788,7 +3788,7 @@ and <i>efficiency</i> of on-line policy optimization compared to other companion
     <paper id="313">
       <title>Learning what to read: Focused machine reading</title>
       <author><first>Enrique</first><last>Noriega-Atala</last></author>
-      <author><first>Marco A.</first><last>Valenzuela-Escárcega</last></author>
+      <author id="marco-a-valenzuela-escarcega"><first>Marco A.</first><last>Valenzuela-Escárcega</last></author>
       <author><first>Clayton</first><last>Morrison</last></author>
       <author><first>Mihai</first><last>Surdeanu</last></author>
       <pages>2905–2910</pages>
@@ -3814,8 +3814,8 @@ and <i>efficiency</i> of on-line policy optimization compared to other companion
       <author><first>Varun</first><last>Gangal</last></author>
       <author><first>Harsh</first><last>Jhamtani</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <pages>2917–2922</pages>
       <url hash="9e451c92">D17-1315</url>
       <doi>10.18653/v1/D17-1315</doi>
@@ -3826,8 +3826,8 @@ and <i>efficiency</i> of on-line policy optimization compared to other companion
     </paper>
     <paper id="316">
       <title>Using Automated Metaphor Identification to Aid in Detection and Prediction of First-Episode Schizophrenia</title>
-      <author><first>E. Darío</first><last>Gutiérrez</last></author>
-      <author><first>Guillermo</first><last>Cecchi</last></author>
+      <author id="e-dario-gutierrez"><first>E. Darío</first><last>Gutiérrez</last></author>
+      <author id="guillermo-a-cecchi"><first>Guillermo</first><last>Cecchi</last></author>
       <author><first>Cheryl</first><last>Corcoran</last></author>
       <author><first>Philip</first><last>Corlett</last></author>
       <pages>2923–2930</pages>
@@ -3855,7 +3855,7 @@ and <i>efficiency</i> of on-line policy optimization compared to other companion
       <title>Topic-Based Agreement and Disagreement in <fixed-case>US</fixed-case> Electoral Manifestos</title>
       <author><first>Stefano</first><last>Menini</last></author>
       <author><first>Federico</first><last>Nanni</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <author><first>Sara</first><last>Tonelli</last></author>
       <pages>2938–2944</pages>
       <url hash="f996fb87">D17-1318</url>
@@ -3929,7 +3929,7 @@ and <i>efficiency</i> of on-line policy optimization compared to other companion
       <url hash="a842145f">D17-2</url>
       <editor><first>Lucia</first><last>Specia</last></editor>
       <editor><first>Matt</first><last>Post</last></editor>
-      <editor><first>Michael</first><last>Paul</last></editor>
+      <editor id="michael-paul"><first>Michael</first><last>Paul</last></editor>
       <doi>10.18653/v1/D17-2</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Copenhagen, Denmark</address>
@@ -3967,9 +3967,9 @@ and <i>efficiency</i> of on-line policy optimization compared to other companion
     </paper>
     <paper id="3">
       <title>An Analysis and Visualization Tool for Case Study Learning of Linguistic Concepts</title>
-      <author><first>Cecilia</first><last>Ovesdotter Alm</last></author>
-      <author><first>Benjamin</first><last>Meyers</last></author>
-      <author><first>Emily</first><last>Prud’hommeaux</last></author>
+      <author id="cecilia-ovesdotter-alm"><first>Cecilia</first><last>Ovesdotter Alm</last></author>
+      <author id="benjamin-s-meyers"><first>Benjamin</first><last>Meyers</last></author>
+      <author id="emily-prudhommeaux"><first>Emily</first><last>Prud’hommeaux</last></author>
       <pages>13–18</pages>
       <url hash="e456c56c">D17-2003</url>
       <doi>10.18653/v1/D17-2003</doi>
@@ -4062,11 +4062,11 @@ and <i>efficiency</i> of on-line policy optimization compared to other companion
     </paper>
     <paper id="10">
       <title><fixed-case>DLATK</fixed-case>: Differential Language Analysis <fixed-case>T</fixed-case>ool<fixed-case>K</fixed-case>it</title>
-      <author><first>H. Andrew</first><last>Schwartz</last></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last></author>
       <author><first>Salvatore</first><last>Giorgi</last></author>
       <author><first>Maarten</first><last>Sap</last></author>
       <author><first>Patrick</first><last>Crutchley</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <author><first>Johannes</first><last>Eichstaedt</last></author>
       <pages>55–60</pages>
       <url hash="d01c1f8b">D17-2010</url>
@@ -4100,7 +4100,7 @@ and <i>efficiency</i> of on-line policy optimization compared to other companion
       <title><fixed-case>M</fixed-case>ood<fixed-case>S</fixed-case>wipe: A Soft Keyboard that Suggests <fixed-case>M</fixed-case>essage<fixed-case>B</fixed-case>ased on User-Specified Emotions</title>
       <author><first>Chieh-Yang</first><last>Huang</last></author>
       <author><first>Tristan</first><last>Labetoulle</last></author>
-      <author><first>Ting-Hao</first><last>Huang</last></author>
+      <author id="ting-hao-huang"><first>Ting-Hao</first><last>Huang</last></author>
       <author><first>Yi-Pei</first><last>Chen</last></author>
       <author><first>Hung-Chen</first><last>Chen</last></author>
       <author><first>Vallari</first><last>Srivastava</last></author>
@@ -4130,7 +4130,7 @@ and <i>efficiency</i> of on-line policy optimization compared to other companion
     <paper id="15">
       <title><fixed-case>H</fixed-case>eidel<fixed-case>P</fixed-case>lace: An Extensible Framework for Geoparsing</title>
       <author><first>Ludwig</first><last>Richter</last></author>
-      <author><first>Johanna</first><last>Geiß</last></author>
+      <author id="johanna-geiss"><first>Johanna</first><last>Geiß</last></author>
       <author><first>Andreas</first><last>Spitz</last></author>
       <author><first>Michael</first><last>Gertz</last></author>
       <pages>85–90</pages>
@@ -4146,8 +4146,8 @@ and <i>efficiency</i> of on-line policy optimization compared to other companion
       <author><first>Eugen</first><last>Ruppert</last></author>
       <author><first>Stefano</first><last>Faralli</last></author>
       <author><first>Dmitry</first><last>Ustalov</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>91–96</pages>
       <url hash="42a54bd4">D17-2016</url>
       <doi>10.18653/v1/D17-2016</doi>
@@ -4201,7 +4201,7 @@ and <i>efficiency</i> of on-line policy optimization compared to other companion
     </paper>
     <paper id="21">
       <title>Interactive Visualization and Manipulation of Attention-based Neural Machine Translation</title>
-      <author><first>Jaesong</first><last>Lee</last></author>
+      <author id="jaesong-lee"><first>Jaesong</first><last>Lee</last></author>
       <author><first>Joong-Hwi</first><last>Shin</last></author>
       <author><first>Jun-Seok</first><last>Kim</last></author>
       <pages>121–126</pages>
@@ -4224,7 +4224,7 @@ and <i>efficiency</i> of on-line policy optimization compared to other companion
     </meta>
     <paper id="1">
       <title>Acquisition, Representation and Usage of Conceptual Hierarchies</title>
-      <author><first>Marius</first><last>Pasca</last></author>
+      <author id="marius-pasca"><first>Marius</first><last>Pasca</last></author>
       <url hash="9f1d40ea">D17-3001</url>
       <abstract>Through subsumption and instantiation, individual instances (“artificial intelligence”, “the spotted pig”) otherwise spanning a wide range of domains can be brought together and organized under conceptual hierarchies. The hierarchies connect more specific concepts (“computer science subfields”, “gastropubs”) to more general concepts (“academic disciplines”, “restaurants”) through IsA relations. Explicit or implicit properties applicable to, and defining, more general concepts are inherited by their more specific concepts, down to the instances connected to the lower parts of the hierarchies. Subsumption represents a crisp, universally-applicable principle towards consistently representing IsA relations in any knowledge resource. Yet knowledge resources often exhibit significant differences in their scope, representation choices and intended usage, to cause significant differences in their expected usage and impact on various tasks.
 
@@ -4233,7 +4233,7 @@ and <i>efficiency</i> of on-line policy optimization compared to other companion
     </paper>
     <paper id="2">
       <title>Computational Sarcasm</title>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <author><first>Aditya</first><last>Joshi</last></author>
       <url hash="e2c784db">D17-3002</url>
       <abstract>Sarcasm is a form of verbal irony that is intended to express contempt or ridicule. Motivated by challenges posed by sarcastic text to sentiment analysis, computational approaches to sarcasm have witnessed a growing interest at NLP forums in the past decade. Computational sarcasm refers to automatic approaches pertaining to sarcasm. The tutorial will provide a bird’s-eye view of the research in computational sarcasm for text, while focusing on significant milestones.
@@ -4300,7 +4300,7 @@ The material associated with this tutorial will be available at the tutorial web
     <paper id="7">
       <title>Cross-Lingual Word Representations: Induction and Evaluation</title>
       <author><first>Manaal</first><last>Faruqui</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <author><first>Ivan</first><last>Vulić</last></author>
       <url hash="7346a164">D17-3007</url>
       <abstract>In recent past, NLP as a field has seen tremendous utility of distributional word vector representations as features in downstream tasks. The fact that these word vectors can be trained on unlabeled monolingual corpora of a language makes them an inexpensive resource in NLP. With the increasing use of monolingual word vectors, there is a need for word vectors that can be used as efficiently across multiple languages as monolingually. Therefore, learning bilingual and multilingual word embeddings/vectors is currently an important research topic. These vectors offer an elegant and language-pair independent way to represent content across different languages.
diff --git a/data/xml/D18.xml b/data/xml/D18.xml
index 8f6b6858aa..a0b0a813c9 100644
--- a/data/xml/D18.xml
+++ b/data/xml/D18.xml
@@ -4,10 +4,10 @@
     <meta>
       <booktitle>Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing</booktitle>
       <url hash="67e20a5b">D18-1</url>
-      <editor><first>Ellen</first><last>Riloff</last></editor>
+      <editor id="ellen-riloff"><first>Ellen</first><last>Riloff</last></editor>
       <editor><first>David</first><last>Chiang</last></editor>
       <editor><first>Julia</first><last>Hockenmaier</last></editor>
-      <editor><first>Jun’ichi</first><last>Tsujii</last></editor>
+      <editor id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Brussels, Belgium</address>
       <month>October-November</month>
@@ -22,7 +22,7 @@
       <title>Privacy-preserving Neural Representations of Text</title>
       <author><first>Maximin</first><last>Coavoux</last></author>
       <author><first>Shashi</first><last>Narayan</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <pages>1–10</pages>
       <url hash="16709530">D18-1001</url>
       <abstract>This article deals with adversarial attacks towards deep learning systems for Natural Language Processing (NLP), in the context of privacy protection. We study a specific type of attack: an attacker eavesdrops on the hidden representations of a neural text classifier and tries to recover information about the input text. Such scenario may arise in situations when the computation of a neural network is shared across multiple devices, e.g. some hidden representation is computed by a user’s device and sent to a cloud-based model. We measure the privacy of a hidden representation by the ability of an attacker to predict accurately specific private information from it and characterize the tradeoff between the privacy and the utility of neural representations. Finally, we propose several defense methods based on modified training objectives and show that they improve the privacy of neural representations.</abstract>
@@ -76,7 +76,7 @@
       <author><first>Desmond</first><last>Patton</last></author>
       <author><first>William</first><last>Frey</last></author>
       <author><first>Chris</first><last>Kedzie</last></author>
-      <author><first>Kathy</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathy</first><last>McKeown</last></author>
       <pages>46–56</pages>
       <url hash="d9a88bc9">D18-1005</url>
       <attachment type="attachment" hash="75559092">D18-1005.Attachment.zip</attachment>
@@ -88,9 +88,9 @@
     <paper id="6">
       <title>Reasoning about Actions and State Changes by Injecting Commonsense Knowledge</title>
       <author><first>Niket</first><last>Tandon</last></author>
-      <author><first>Bhavana</first><last>Dalvi</last></author>
+      <author id="bhavana-dalvi"><first>Bhavana</first><last>Dalvi</last></author>
       <author><first>Joel</first><last>Grus</last></author>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <author><first>Antoine</first><last>Bosselut</last></author>
       <author><first>Peter</first><last>Clark</last></author>
       <pages>57–66</pages>
@@ -120,8 +120,8 @@
       <title>Textual Analogy Parsing: What’s Shared and What’s Compared among Analogous Facts</title>
       <author><first>Matthew</first><last>Lamm</last></author>
       <author><first>Arun</first><last>Chaganty</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <author><first>Percy</first><last>Liang</last></author>
       <pages>82–92</pages>
       <url hash="0dda676f">D18-1008</url>
@@ -159,7 +159,7 @@
       <title>Associative Multichannel Autoencoder for Multimodal Word Representation</title>
       <author><first>Shaonan</first><last>Wang</last></author>
       <author><first>Jiajun</first><last>Zhang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>115–124</pages>
       <url hash="0dbd5914">D18-1011</url>
       <abstract>In this paper we address the problem of learning multimodal word representations by integrating textual, visual and auditory inputs. Inspired by the re-constructive and associative nature of human memory, we propose a novel associative multichannel autoencoder (AMA). Our model first learns the associations between textual and perceptual modalities, so as to predict the missing perceptual information of concepts. Then the textual and predicted perceptual representations are fused through reconstructing their original and associated embeddings. Using a gating mechanism our model assigns different weights to each modality according to the different concepts. Results on six benchmark concept similarity tests show that the proposed method significantly outperforms strong unimodal baselines and state-of-the-art multimodal models.</abstract>
@@ -213,7 +213,7 @@
       <author><first>Xinpeng</first><last>Chen</last></author>
       <author><first>Lin</first><last>Ma</last></author>
       <author><first>Zequn</first><last>Jie</last></author>
-      <author><first>Tat-Seng</first><last>Chua</last></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last></author>
       <pages>162–171</pages>
       <url hash="c040d892">D18-1015</url>
       <abstract>We introduce an effective and efficient method that grounds (i.e., localizes) natural sentences in long, untrimmed video sequences. Specifically, a novel Temporal GroundNet (TGN) is proposed to temporally capture the evolving fine-grained frame-by-word interactions between video and sentence. TGN sequentially scores a set of temporal candidates ended at each frame based on the exploited frame-by-word interactions, and finally grounds the segment corresponding to the sentence. Unlike traditional methods treating the overlapping segments separately in a sliding window fashion, TGN aggregates the historical information and generates the final grounding result in one single pass. We extensively evaluate our proposed TGN on three public datasets with significant improvements over the state-of-the-arts. We further show the consistent effectiveness and efficiency of TGN through an ablation study and a runtime test.</abstract>
@@ -251,7 +251,7 @@
     </paper>
     <paper id="18">
       <title>Using Linguistic Features to Improve the Generalization Capability of Neural Coreference Resolvers</title>
-      <author><first>Nafise Sadat</first><last>Moosavi</last></author>
+      <author id="nafise-sadat-moosavi"><first>Nafise Sadat</first><last>Moosavi</last></author>
       <author><first>Michael</first><last>Strube</last></author>
       <pages>193–203</pages>
       <url hash="06e1286a">D18-1018</url>
@@ -328,7 +328,7 @@
     <paper id="24">
       <title>Unsupervised Multilingual Word Embeddings</title>
       <author><first>Xilun</first><last>Chen</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>261–270</pages>
       <url hash="484da67d">D18-1024</url>
       <abstract>Multilingual Word Embeddings (MWEs) represent words from multiple languages in a single distributional vector space. Unsupervised MWE (UMWE) methods acquire multilingual embeddings without cross-lingual supervision, which is a significant advantage over traditional supervised approaches and opens many new possibilities for low-resource languages. Prior art for learning UMWEs, however, merely relies on a number of independently trained Unsupervised Bilingual Word Embeddings (UBWEs) to obtain multilingual embeddings. These methods fail to leverage the interdependencies that exist among many languages. To address this shortcoming, we propose a fully unsupervised framework for learning MWEs that directly exploits the relations between all language pairs. Our model substantially outperforms previous approaches in the experiments on multilingual word translation and cross-lingual word similarity. In addition, our model even beats supervised approaches trained with cross-lingual resources.</abstract>
@@ -360,9 +360,9 @@
     </paper>
     <paper id="27">
       <title>Improving Cross-Lingual Word Embeddings by Meeting in the Middle</title>
-      <author><first>Yerai</first><last>Doval</last></author>
-      <author><first>Jose</first><last>Camacho-Collados</last></author>
-      <author><first>Luis</first><last>Espinosa-Anke</last></author>
+      <author id="yerai-doval"><first>Yerai</first><last>Doval</last></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa-Anke</last></author>
       <author><first>Steven</first><last>Schockaert</last></author>
       <pages>294–304</pages>
       <url hash="bad30c50">D18-1027</url>
@@ -400,7 +400,7 @@
       <title>A Fast, Compact, Accurate Model for Language Identification of Codemixed Text</title>
       <author><first>Yuan</first><last>Zhang</last></author>
       <author><first>Jason</first><last>Riesa</last></author>
-      <author><first>Daniel</first><last>Gillick</last></author>
+      <author id="dan-gillick"><first>Daniel</first><last>Gillick</last></author>
       <author><first>Anton</first><last>Bakalov</last></author>
       <author><first>Jason</first><last>Baldridge</last></author>
       <author><first>David</first><last>Weiss</last></author>
@@ -455,8 +455,8 @@
       <author><first>Jiateng</first><last>Xie</last></author>
       <author><first>Zhilin</first><last>Yang</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
       <pages>369–379</pages>
       <url hash="fbea1626">D18-1034</url>
       <abstract>For languages with no annotated resources, unsupervised transfer of natural language processing models such as named-entity recognition (NER) from resource-rich languages would be an appealing capability. However, differences in words and word order across languages make it a challenging problem. To improve mapping of lexical items across languages, we propose a method that finds translations based on bilingual word embeddings. To improve robustness to word order differences, we propose to use self-attention, which allows for a degree of flexibility with respect to word order. We demonstrate that these methods achieve state-of-the-art or competitive NER performance on commonly tested languages under a cross-lingual setting, with much lower resource requirements than past approaches. We also evaluate the challenges of applying these methods to Uyghur, a low-resource language.</abstract>
@@ -466,9 +466,9 @@
     <paper id="35">
       <title>A Stable and Effective Learning Strategy for Trainable Greedy Decoding</title>
       <author><first>Yun</first><last>Chen</last></author>
-      <author><first>Victor O.K.</first><last>Li</last></author>
+      <author id="victor-o-k-li"><first>Victor O.K.</first><last>Li</last></author>
       <author><first>Kyunghyun</first><last>Cho</last></author>
-      <author><first>Samuel</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel</first><last>Bowman</last></author>
       <pages>380–390</pages>
       <url hash="ddee07be">D18-1035</url>
       <abstract>Beam search is a widely used approximate search strategy for neural network decoders, and it generally outperforms simple greedy decoding on tasks like machine translation. However, this improvement comes at substantial computational cost. In this paper, we propose a flexible new method that allows us to reap nearly the full benefits of beam search with nearly no additional computational cost. The method revolves around a small neural network actor that is trained to observe and manipulate the hidden state of a previously-trained decoder. To train this actor network, we introduce the use of a pseudo-parallel corpus built using the output of beam search on a base model, ranked by a target quality metric like BLEU. Our method is inspired by earlier work on this problem, but requires no reinforcement learning, and can be trained reliably on a range of models. Experiments on three parallel corpora and three architectures show that the method yields substantial improvements in translation quality and speed over each base system.</abstract>
@@ -480,7 +480,7 @@
       <author><first>Yang</first><last>Zhao</last></author>
       <author><first>Jiajun</first><last>Zhang</last></author>
       <author><first>Zhongjun</first><last>He</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <author><first>Hua</first><last>Wu</last></author>
       <pages>391–400</pages>
       <url hash="d31a1c9d">D18-1036</url>
@@ -491,7 +491,7 @@
     <paper id="37">
       <title>Top-down Tree Structured Decoding with Syntactic Connections for Neural Machine Translation and Parsing</title>
       <author><first>Jetic</first><last>Gū</last></author>
-      <author><first>Hassan S.</first><last>Shavarani</last></author>
+      <author id="hassan-s-shavarani"><first>Hassan S.</first><last>Shavarani</last></author>
       <author><first>Anoop</first><last>Sarkar</last></author>
       <pages>401–413</pages>
       <url hash="cd7c1b29">D18-1037</url>
@@ -520,7 +520,7 @@
       <author><first>Emmanouil Antonios</first><last>Platanios</last></author>
       <author><first>Mrinmaya</first><last>Sachan</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
-      <author><first>Tom</first><last>Mitchell</last></author>
+      <author id="tom-mitchell"><first>Tom</first><last>Mitchell</last></author>
       <pages>425–435</pages>
       <url hash="89dcba72">D18-1039</url>
       <abstract>We propose a simple modification to existing neural machine translation (NMT) models that enables using a single universal model to translate between multiple languages while allowing for language specific parameterization, and that can also be used for domain adaptation. Our approach requires no changes to the model architecture of a standard NMT system, but instead introduces a new component, the contextual parameter generator (CPG), that generates the parameters of the system (e.g., weights in a neural network). This parameter generator accepts source and target language embeddings as input, and generates the parameters for the encoder and the decoder, respectively. The rest of the model remains unchanged and is shared across all languages. We show how this simple modification enables the system to use monolingual data for training and also perform zero-shot translation. We further show it is able to surpass state-of-the-art performance for both the IWSLT-15 and IWSLT-17 datasets and that the learned language embeddings are able to uncover interesting relationships between languages.</abstract>
@@ -557,7 +557,7 @@
       <author><first>Sebastian</first><last>Ruder</last></author>
       <author><first>Ryan</first><last>Cotterell</last></author>
       <author><first>Yova</first><last>Kementchedjhieva</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>458–468</pages>
       <url hash="d3490b13">D18-1042</url>
       <abstract>We introduce a novel discriminative latent-variable model for the task of bilingual lexicon induction. Our model combines the bipartite matching dictionary prior of Haghighi et al. (2008) with a state-of-the-art embedding-based approach. To train the model, we derive an efficient Viterbi EM algorithm. We provide empirical improvements on six language pairs under two metrics and show that the prior theoretically and empirically helps to mitigate the hubness problem. We also demonstrate how previous work may be viewed as a similarly fashioned latent-variable model, albeit with a different prior.</abstract>
@@ -610,7 +610,7 @@
     </paper>
     <paper id="47">
       <title><fixed-case>NORMA</fixed-case>: Neighborhood Sensitive Maps for Multilingual Word Embeddings</title>
-      <author><first>Ndapa</first><last>Nakashole</last></author>
+      <author id="ndapandula-nakashole"><first>Ndapa</first><last>Nakashole</last></author>
       <pages>512–522</pages>
       <url hash="24df5af6">D18-1047</url>
       <abstract>Inducing multilingual word embeddings by learning a linear map between embedding spaces of different languages achieves remarkable accuracy on related languages. However, accuracy drops substantially when translating between distant languages. Given that languages exhibit differences in vocabulary, grammar, written form, or syntax, one would expect that embedding spaces of different languages have different structures especially for distant languages. With the goal of capturing such differences, we propose a method for learning neighborhood sensitive maps, NORMA. Our experiments show that NORMA outperforms current state-of-the-art methods for word translation between distant languages.</abstract>
@@ -657,7 +657,7 @@
     <paper id="51">
       <title><fixed-case>S</fixed-case>imple<fixed-case>Q</fixed-case>uestions Nearly Solved: A New Upperbound and Baseline Approach</title>
       <author><first>Michael</first><last>Petrochuk</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>554–558</pages>
       <url hash="5dcb4b79">D18-1051</url>
       <abstract>The SimpleQuestions dataset is one of the most commonly used benchmarks for studying single-relation factoid questions. In this paper, we present new evidence that this benchmark can be nearly solved by standard methods. First, we show that ambiguity in the data bounds performance at 83.4%; many questions have more than one equally plausible interpretation. Second, we introduce a baseline that sets a new state-of-the-art performance level at 78.1% accuracy, despite using standard methods. Finally, we report an empirical analysis showing that the upperbound is loose; roughly a quarter of the remaining errors are also not resolvable from the linguistic signal. Together, these results suggest that the SimpleQuestions dataset is nearly solved.</abstract>
@@ -669,7 +669,7 @@
       <title>Phrase-Indexed Question Answering: A New Challenge for Scalable Document Comprehension</title>
       <author><first>Minjoon</first><last>Seo</last></author>
       <author><first>Tom</first><last>Kwiatkowski</last></author>
-      <author><first>Ankur</first><last>Parikh</last></author>
+      <author id="ankur-parikh"><first>Ankur</first><last>Parikh</last></author>
       <author><first>Ali</first><last>Farhadi</last></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last></author>
       <pages>559–564</pages>
@@ -695,7 +695,7 @@
     </paper>
     <paper id="54">
       <title>Cut to the Chase: A Context Zoom-in Network for Reading Comprehension</title>
-      <author><first>Sathish Reddy</first><last>Indurthi</last></author>
+      <author id="sathish-reddy-indurthi"><first>Sathish Reddy</first><last>Indurthi</last></author>
       <author><first>Seunghak</first><last>Yu</last></author>
       <author><first>Seohyun</first><last>Back</last></author>
       <author><first>Heriberto</first><last>Cuayáhuitl</last></author>
@@ -722,7 +722,7 @@
       <title>Why is unsupervised alignment of <fixed-case>E</fixed-case>nglish embeddings from different algorithms so hard?</title>
       <author><first>Mareike</first><last>Hartmann</last></author>
       <author><first>Yova</first><last>Kementchedjhieva</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>582–586</pages>
       <url hash="b30f4eb4">D18-1056</url>
       <abstract>This paper presents a challenge to the community: Generative adversarial networks (GANs) can perfectly align independent English word embeddings induced using the same algorithm, based on distributional information alone; but fails to do so, for two different embeddings algorithms. Why is that? We believe understanding why, is key to understand both modern word embedding algorithms and the limitations and instability dynamics of GANs. This paper shows that (a) in all these cases, where alignment fails, there exists a linear transform between the two embeddings (so algorithm biases do not lead to non-linear differences), and (b) similar effects can not easily be obtained by varying hyper-parameters. One plausible suggestion based on our initial experiments is that the differences in the inductive biases of the embedding algorithms lead to an optimization landscape that is riddled with local optima, leading to a very small basin of convergence, but we present this more as a challenge paper than a technical contribution.</abstract>
@@ -770,7 +770,7 @@
       <author><first>Ge</first><last>Gao</last></author>
       <author><first>Eunsol</first><last>Choi</last></author>
       <author><first>Yejin</first><last>Choi</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>607–613</pages>
       <url hash="f3e54110">D18-1060</url>
       <abstract>We present end-to-end neural models for detecting metaphorical word use in context. We show that relatively standard BiLSTM models which operate on complete sentences work well in this setting, in comparison to previous work that used more restricted forms of linguistic context. These models establish a new state-of-the-art on existing verb metaphor detection benchmarks, and show strong performance on jointly predicting the metaphoricity of all words in a running text.</abstract>
@@ -780,8 +780,8 @@
     </paper>
     <paper id="61">
       <title>Distant Supervision from Disparate Sources for Low-Resource Part-of-Speech Tagging</title>
-      <author><first>Barbara</first><last>Plank</last></author>
-      <author><first>Željko</first><last>Agić</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
+      <author id="zeljko-agic"><first>Željko</first><last>Agić</last></author>
       <pages>614–620</pages>
       <url hash="be94bece">D18-1061</url>
       <abstract>a cross-lingual neural part-of-speech tagger that learns from disparate sources of distant supervision, and realistically scales to hundreds of low-resource languages. The model exploits annotation projection, instance selection, tag dictionaries, morphological lexicons, and distributed representations, all in a uniform framework. The approach is simple, yet surprisingly effective, resulting in a new state of the art without access to any gold annotated data.</abstract>
@@ -854,7 +854,7 @@
     <paper id="67">
       <title>Exploring Optimism and Pessimism in <fixed-case>T</fixed-case>witter Using Deep Learning</title>
       <author><first>Cornelia</first><last>Caragea</last></author>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <author><first>Bogdan</first><last>Dumitru</last></author>
       <pages>652–658</pages>
       <url hash="619157dd">D18-1067</url>
@@ -965,7 +965,7 @@
       <title>A Dataset for Document Grounded Conversations</title>
       <author><first>Kangyan</first><last>Zhou</last></author>
       <author><first>Shrimai</first><last>Prabhumoye</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <pages>708–713</pages>
       <url hash="14f70050">D18-1076</url>
       <attachment type="attachment" hash="30c71711">D18-1076.Attachment.zip</attachment>
@@ -978,7 +978,7 @@
       <author><first>Seonghan</first><last>Ryu</last></author>
       <author><first>Sangjun</first><last>Koo</last></author>
       <author><first>Hwanjo</first><last>Yu</last></author>
-      <author><first>Gary Geunbae</first><last>Lee</last></author>
+      <author id="gary-geunbae-lee"><first>Gary Geunbae</first><last>Lee</last></author>
       <pages>714–718</pages>
       <url hash="db128c1c">D18-1077</url>
       <abstract>The main goal of this paper is to develop out-of-domain (OOD) detection for dialog systems. We propose to use only in-domain (IND) sentences to build a generative adversarial network (GAN) of which the discriminator generates low scores for OOD sentences. To improve basic GANs, we apply feature matching loss in the discriminator, use domain-category analysis as an additional task in the discriminator, and remove the biases in the generator. Thereby, we reduce the huge effort of collecting OOD sentences for training OOD detection. For evaluation, we experimented OOD detection on a multi-domain dialog system. The experimental results showed the proposed method was most accurate compared to the existing methods.</abstract>
@@ -1008,9 +1008,9 @@
       <author><first>Yang</first><last>Xu</last></author>
       <author><first>Yu</first><last>Hong</last></author>
       <author><first>Huibin</first><last>Ruan</last></author>
-      <author><first>Jianmin</first><last>Yao</last></author>
+      <author id="jianmin-yao"><first>Jianmin</first><last>Yao</last></author>
       <author><first>Min</first><last>Zhang</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>725–731</pages>
       <url hash="afe49e15">D18-1079</url>
       <abstract>We tackle discourse-level relation recognition, a problem of determining semantic relations between text spans. Implicit relation recognition is challenging due to the lack of explicit relational clues. The increasingly popular neural network techniques have been proven effective for semantic encoding, whereby widely employed to boost semantic relation discrimination. However, learning to predict semantic relations at a deep level heavily relies on a great deal of training data, but the scale of the publicly available data in this field is limited. In this paper, we follow Rutherford and Xue (2015) to expand the training data set using the corpus of explicitly-related arguments, by arbitrarily dropping the overtly presented discourse connectives. On the basis, we carry out an experiment of sampling, in which a simple active learning approach is used, so as to take the informative instances for data expansion. The goal is to verify whether the selective use of external data not only reduces the time consumption of retraining but also ensures a better system performance. Using the expanded training data, we retrain a convolutional neural network (CNN) based classifer which is a simplified version of Qin et al. (2016)’s stacking gated relation recognizer. Experimental results show that expanding the training set with small-scale carefully-selected external data yields substantial performance gain, with the improvements of about 4% for accuracy and 3.6% for F-score. This allows a weak classifier to achieve a comparable performance against the state-of-the-art systems.</abstract>
@@ -1068,7 +1068,7 @@
     <paper id="84">
       <title>Training for Diversity in Image Paragraph Captioning</title>
       <author><first>Luke</first><last>Melas-Kyriazi</last></author>
-      <author><first>Alexander</first><last>Rush</last></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last></author>
       <author><first>George</first><last>Han</last></author>
       <pages>757–761</pages>
       <url hash="e0c3ab82">D18-1084</url>
@@ -1080,7 +1080,7 @@
       <title>A Graph-theoretic Summary Evaluation for <fixed-case>ROUGE</fixed-case></title>
       <author><first>Elaheh</first><last>ShafieiBavani</last></author>
       <author><first>Mohammad</first><last>Ebrahimi</last></author>
-      <author><first>Raymond</first><last>Wong</last></author>
+      <author id="raymond-wong"><first>Raymond</first><last>Wong</last></author>
       <author><first>Fang</first><last>Chen</last></author>
       <pages>762–767</pages>
       <url hash="50e73544">D18-1085</url>
@@ -1127,8 +1127,8 @@
     </paper>
     <paper id="89">
       <title>On the Abstractiveness of Neural Document Summarization</title>
-      <author><first>Fangfang</first><last>Zhang</last></author>
-      <author><first>Jin-ge</first><last>Yao</last></author>
+      <author id="fang-fang-zhang"><first>Fangfang</first><last>Zhang</last></author>
+      <author id="jin-ge-yao"><first>Jin-ge</first><last>Yao</last></author>
       <author><first>Rui</first><last>Yan</last></author>
       <pages>785–790</pages>
       <url hash="4ef0e3fa">D18-1089</url>
@@ -1141,7 +1141,7 @@
       <author><first>Yucheng</first><last>Wang</last></author>
       <author><first>Zhongyu</first><last>Wei</last></author>
       <author><first>Yaqian</first><last>Zhou</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>791–797</pages>
       <url hash="cb77fb65">D18-1090</url>
       <abstract>Automatic essay scoring (AES) is the task of assigning grades to essays without human interference. Existing systems for AES are typically trained to predict the score of each single essay at a time without considering the rating schema. In order to address this issue, we propose a reinforcement learning framework for essay scoring that incorporates quadratic weighted kappa as guidance to optimize the scoring system. Experiment results on benchmark datasets show the effectiveness of our framework.</abstract>
@@ -1185,7 +1185,7 @@
       <title>A Hierarchical Neural Attention-based Text Classifier</title>
       <author><first>Koustuv</first><last>Sinha</last></author>
       <author><first>Yue</first><last>Dong</last></author>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <author><first>Derek</first><last>Ruths</last></author>
       <pages>817–823</pages>
       <url hash="e819e4c5">D18-1094</url>
@@ -1233,7 +1233,7 @@
       <title>Topic Intrusion for Automatic Topic Model Evaluation</title>
       <author><first>Shraey</first><last>Bhatia</last></author>
       <author><first>Jey Han</first><last>Lau</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>844–849</pages>
       <url hash="c65dbfa2">D18-1098</url>
       <attachment type="attachment" hash="8e006ac2">D18-1098.Attachment.pdf</attachment>
@@ -1270,7 +1270,7 @@
       <title>Improving Unsupervised Word-by-Word Translation with Language Model and Denoising Autoencoder</title>
       <author><first>Yunsu</first><last>Kim</last></author>
       <author><first>Jiahui</first><last>Geng</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>862–868</pages>
       <url hash="181724fe">D18-1101</url>
       <abstract>Unsupervised learning of cross-lingual word embedding offers elegant matching of words across languages, but has fundamental limitations in translating sentences. In this paper, we propose simple yet effective methods to improve word-by-word translation of cross-lingual embeddings, using only monolingual corpora but without any back-translation. We integrate a language model for context-aware search, and use a novel denoising autoencoder to handle reordering. Our system surpasses state-of-the-art unsupervised translation systems without costly iterative training. We also analyze the effect of vocabulary size and denoising type on the translation performance, which provides better understanding of learning the cross-lingual word embedding and its usage in translation.</abstract>
@@ -1350,8 +1350,8 @@
     <paper id="108">
       <title>Towards Dynamic Computation Graphs via Sparse Latent Structure</title>
       <author><first>Vlad</first><last>Niculae</last></author>
-      <author><first>André F. T.</first><last>Martins</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>905–911</pages>
       <url hash="488270fa">D18-1108</url>
       <attachment type="attachment" hash="d92b09c3">D18-1108.Attachment.zip</attachment>
@@ -1442,7 +1442,7 @@
     <paper id="115">
       <title>Conversational Decision-Making Model for Predicting the King’s Decision in the Annals of the <fixed-case>J</fixed-case>oseon Dynasty</title>
       <author><first>JinYeong</first><last>Bak</last></author>
-      <author><first>Alice</first><last>Oh</last></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last></author>
       <pages>956–961</pages>
       <url hash="a7b7681f">D18-1115</url>
       <abstract>Styles of leaders when they make decisions in groups vary, and the different styles affect the performance of the group. To understand the key words and speakers associated with decisions, we initially formalize the problem as one of predicting leaders’ decisions from discussion with group members. As a dataset, we introduce conversational meeting records from a historical corpus, and develop a hierarchical RNN structure with attention and pre-trained speaker embedding in the form of a, Conversational Decision Making Model (CDMM). The CDMM outperforms other baselines to predict leaders’ final decisions from the data. We explain why CDMM works better than other methods by showing the key words and speakers discovered from the attentions as evidence.</abstract>
@@ -1491,7 +1491,7 @@
     <paper id="119">
       <title>How agents see things: On visual representations in an emergent language game</title>
       <author><first>Diane</first><last>Bouchacourt</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <pages>981–985</pages>
       <url hash="01f6cbbc">D18-1119</url>
       <abstract>There is growing interest in the language developed by agents interacting in emergent-communication settings. Earlier studies have focused on the agents’ symbol usage, rather than on their representation of visual input. In this paper, we consider the referential games of Lazaridou et al. (2017), and investigate the representations the agents develop during their evolving interaction. We find that the agents establish successful communication by inducing visual representations that almost perfectly align with each other, but, surprisingly, do not capture the conceptual properties of the objects depicted in the input images. We conclude that, if we care about developing language-like communication systems, we must pay more attention to the visual semantics agents associate to the symbols they use.</abstract>
@@ -1529,7 +1529,7 @@
     </paper>
     <paper id="122">
       <title>Event Detection with Neural Networks: A Rigorous Empirical Evaluation</title>
-      <author><first>Walker</first><last>Orr</last></author>
+      <author id="j-walker-orr"><first>Walker</first><last>Orr</last></author>
       <author><first>Prasad</first><last>Tadepalli</last></author>
       <author><first>Xiaoli</first><last>Fern</last></author>
       <pages>999–1004</pages>
@@ -1571,7 +1571,7 @@
       <author><first>Boliang</first><last>Zhang</last></author>
       <author><first>Heng</first><last>Ji</last></author>
       <author><first>Lejian</first><last>Liao</last></author>
-      <author><first>Heyan</first><last>Huang</last></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last></author>
       <pages>1018–1023</pages>
       <url hash="e0f6e08e">D18-1125</url>
       <abstract>Relation Extraction suffers from dramatical performance decrease when training a model on one genre and directly applying it to a new genre, due to the distinct feature distributions. Previous studies address this problem by discovering a shared space across genres using manually crafted features, which requires great human effort. To effectively automate this process, we design a genre-separation network, which applies two encoders, one genre-independent and one genre-shared, to explicitly extract genre-specific and genre-agnostic features. Then we train a relation classifier using the genre-agnostic features on the source genre and directly apply to the target genre. Experiment results on three distinct genres of the ACE dataset show that our approach achieves up to 6.1% absolute F1-score gain compared to previous methods. By incorporating a set of external linguistic features, our approach outperforms the state-of-the-art by 1.7% absolute F1 gain. We make all programs of our model publicly available for research purpose</abstract>
@@ -1593,7 +1593,7 @@
       <author><first>Shaobo</first><last>Liu</last></author>
       <author><first>Rui</first><last>Cheng</last></author>
       <author><first>Xiaoming</first><last>Yu</last></author>
-      <author><first>Xueqi</first><last>Cheng</last></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last></author>
       <pages>1030–1035</pages>
       <url hash="49430900">D18-1127</url>
       <abstract>The task of event detection involves identifying and categorizing event triggers. Contextual information has been shown effective on the task. However, existing methods which utilize contextual information only process the context once. We argue that the context can be better exploited by processing the context multiple times, allowing the model to perform complex reasoning and to generate better context representation, thus improving the overall performance. Meanwhile, dynamic memory network (DMN) has demonstrated promising capability in capturing contextual information and has been applied successfully to various tasks. In light of the multi-hop mechanism of the DMN to model the context, we propose the trigger detection dynamic memory network (TD-DMN) to tackle the event detection problem. We performed a five-fold cross-validation on the ACE-2005 dataset and experimental results show that the multi-hop mechanism does improve the performance and the proposed model achieves best F1 score compared to the state-of-the-art methods.</abstract>
@@ -1629,7 +1629,7 @@
       <title>Entity Tracking Improves Cloze-style Reading Comprehension</title>
       <author><first>Luong</first><last>Hoang</last></author>
       <author><first>Sam</first><last>Wiseman</last></author>
-      <author><first>Alexander</first><last>Rush</last></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last></author>
       <pages>1049–1055</pages>
       <url hash="58538c37">D18-1130</url>
       <attachment type="attachment" hash="e3e8f95d">D18-1130.Attachment.pdf</attachment>
@@ -1643,7 +1643,7 @@
       <author><first>Tao</first><last>Lei</last></author>
       <author><first>Alessandro</first><last>Moschitti</last></author>
       <author><first>Salvatore</first><last>Romeo</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>1056–1063</pages>
       <url hash="bdcde475">D18-1131</url>
       <abstract>We address the problem of detecting duplicate questions in forums, which is an important step towards automating the process of answering new questions. As finding and annotating such potential duplicates manually is very tedious and costly, automatic methods based on machine learning are a viable alternative. However, many forums do not have annotated data, i.e., questions labeled by experts as duplicates, and thus a promising solution is to use domain adaptation from another forum that has such annotations. Here we focus on adversarial domain adaptation, deriving important findings about when it performs well and what properties of the domains are important in this regard. Our experiments with StackExchange data show an average improvement of 5.6% over the best baseline across multiple pairs of domains.</abstract>
@@ -1688,7 +1688,7 @@
     <paper id="135">
       <title>Improving the results of string kernels in sentiment analysis and <fixed-case>A</fixed-case>rabic dialect identification by adapting them to your test set</title>
       <author><first>Radu Tudor</first><last>Ionescu</last></author>
-      <author><first>Andrei M.</first><last>Butnaru</last></author>
+      <author id="andrei-butnaru"><first>Andrei M.</first><last>Butnaru</last></author>
       <pages>1084–1090</pages>
       <url hash="a2b9d13d">D18-1135</url>
       <abstract>Recently, string kernels have obtained state-of-the-art results in various text classification tasks such as Arabic dialect identification or native language identification. In this paper, we apply two simple yet effective transductive learning approaches to further improve the results of string kernels. The first approach is based on interpreting the pairwise string kernel similarities between samples in the training set and samples in the test set as features. Our second approach is a simple self-training method based on two learning iterations. In the first iteration, a classifier is trained on the training set and tested on the test set, as usual. In the second iteration, a number of test samples (to which the classifier associated higher confidence scores) are added to the training set for another round of training. However, the ground-truth labels of the added test samples are not necessary. Instead, we use the labels predicted by the classifier in the first training iteration. By adapting string kernels to the test set, we report significantly better accuracy rates in English polarity classification and Arabic dialect identification.</abstract>
@@ -1698,7 +1698,7 @@
     <paper id="136">
       <title>Parameterized Convolutional Neural Networks for Aspect Level Sentiment Classification</title>
       <author><first>Binxuan</first><last>Huang</last></author>
-      <author><first>Kathleen</first><last>Carley</last></author>
+      <author id="kathleen-m-carley"><first>Kathleen</first><last>Carley</last></author>
       <pages>1091–1096</pages>
       <url hash="2dbc20aa">D18-1136</url>
       <abstract>We introduce a novel parameterized convolutional neural network for aspect level sentiment classification. Using parameterized filters and parameterized gates, we incorporate aspect information into convolutional neural networks (CNN). Experiments demonstrate that our parameterized filters and parameterized gates effectively capture the aspect-specific features, and our CNN-based models achieve excellent results on SemEval 2014 datasets.</abstract>
@@ -1708,7 +1708,7 @@
     <paper id="137">
       <title>Improving Multi-label Emotion Classification via Sentiment Classification with Dual Attention Transfer Network</title>
       <author><first>Jianfei</first><last>Yu</last></author>
-      <author><first>Luís</first><last>Marujo</last></author>
+      <author id="luis-marujo"><first>Luís</first><last>Marujo</last></author>
       <author><first>Jing</first><last>Jiang</last></author>
       <author><first>Pradeep</first><last>Karuturi</last></author>
       <author><first>William</first><last>Brendel</last></author>
@@ -1795,7 +1795,7 @@
       <author><first>Yasuhide</first><last>Miura</last></author>
       <author><first>Motoki</first><last>Taniguchi</last></author>
       <author><first>Yan-Ying</first><last>Chen</last></author>
-      <author><first>Francine</first><last>Chen</last></author>
+      <author id="francine-chen"><first>Francine</first><last>Chen</last></author>
       <author><first>Tomoko</first><last>Ohkuma</last></author>
       <pages>1139–1145</pages>
       <url hash="c6d3fffd">D18-1144</url>
@@ -1808,9 +1808,9 @@
       <author><first>Masoud</first><last>Rouhizadeh</last></author>
       <author><first>Kokil</first><last>Jaidka</last></author>
       <author><first>Laura</first><last>Smith</last></author>
-      <author><first>H. Andrew</first><last>Schwartz</last></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last></author>
       <author><first>Anneke</first><last>Buffone</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <pages>1146–1152</pages>
       <url hash="84d65b94">D18-1145</url>
       <attachment type="attachment" hash="434f0d2a">D18-1145.Attachment.pdf</attachment>
@@ -1840,11 +1840,11 @@
     <paper id="148">
       <title>The Remarkable Benefit of User-Level Aggregation for Lexical-based Population-Level Predictions</title>
       <author><first>Salvatore</first><last>Giorgi</last></author>
-      <author><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
+      <author id="daniel-preotiuc-pietro"><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
       <author><first>Anneke</first><last>Buffone</last></author>
       <author><first>Daniel</first><last>Rieman</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
-      <author><first>H. Andrew</first><last>Schwartz</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last></author>
       <pages>1167–1172</pages>
       <url hash="85e049d6">D18-1148</url>
       <attachment type="attachment" hash="2ef58675">D18-1148.Attachment.zip</attachment>
@@ -1895,7 +1895,7 @@
       <author><first>Hao</first><last>Peng</last></author>
       <author><first>Roy</first><last>Schwartz</last></author>
       <author><first>Sam</first><last>Thomson</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>1203–1214</pages>
       <url hash="0da699a8">D18-1152</url>
       <attachment type="attachment" hash="5cd08c01">D18-1152.Attachment.pdf</attachment>
@@ -1924,7 +1924,7 @@
       <author><first>Zhengzhong</first><last>Liu</last></author>
       <author><first>Chenyan</first><last>Xiong</last></author>
       <author><first>Teruko</first><last>Mitamura</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>1226–1236</pages>
       <url hash="33b77f88">D18-1154</url>
       <abstract>Identifying the salience (i.e. importance) of discourse units is an important task in language understanding. While events play important roles in text documents, little research exists on analyzing their saliency status. This paper empirically studies Event Salience and proposes two salience detection models based on discourse relations. The first is a feature based salience model that incorporates cohesion among discourse units. The second is a neural model that captures more complex interactions between discourse units. In our new large-scale event salience corpus, both methods significantly outperform the strong frequency baseline, while our neural model further improves the feature based one by a large margin. Our analyses demonstrate that our neural model captures interesting connections between salience and discourse unit relations (e.g., scripts and frame structures).</abstract>
@@ -1935,7 +1935,7 @@
     <paper id="155">
       <title>Temporal Information Extraction by Predicting Relative Time-lines</title>
       <author><first>Artuur</first><last>Leeuwenberg</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>1237–1246</pages>
       <url hash="e21450eb">D18-1155</url>
       <abstract>The current leading paradigm for temporal information extraction from text consists of three phases: (1) recognition of events and temporal expressions, (2) recognition of temporal relations among them, and (3) time-line construction from the temporal relations. In contrast to the first two phases, the last phase, time-line construction, received little attention and is the focus of this work. In this paper, we propose a new method to construct a linear time-line from a set of (extracted) temporal relations. But more importantly, we propose a novel paradigm in which we directly predict start and end-points for events from the text, constituting a time-line without going through the intermediate step of prediction of temporal relations as in earlier work. Within this paradigm, we propose two models that predict in linear complexity, and a new training loss using TimeML-style annotations, yielding promising results.</abstract>
@@ -1947,7 +1947,7 @@
       <title>Jointly Multiple Events Extraction via Attention-based Graph Information Aggregation</title>
       <author><first>Xiao</first><last>Liu</last></author>
       <author><first>Zhunchen</first><last>Luo</last></author>
-      <author><first>Heyan</first><last>Huang</last></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last></author>
       <pages>1247–1256</pages>
       <url hash="d29b02ba">D18-1156</url>
       <abstract>Event extraction is of practical utility in natural language processing. In the real world, it is a common phenomenon that multiple events existing in the same sentence, where extracting them are more difficult than extracting a single event. Previous works on modeling the associations between events by sequential modeling methods suffer a lot from the low efficiency in capturing very long-range dependencies. In this paper, we propose a novel Jointly Multiple Events Extraction (JMEE) framework to jointly extract multiple event triggers and arguments by introducing syntactic shortcut arcs to enhance information flow and attention-based graph convolution networks to model graph information. The experiment results demonstrate that our proposed framework achieves competitive results compared with state-of-the-art methods.</abstract>
@@ -1961,7 +1961,7 @@
       <author><first>Rishabh</first><last>Joshi</last></author>
       <author><first>Sai Suman</first><last>Prayaga</last></author>
       <author><first>Chiranjib</first><last>Bhattacharyya</last></author>
-      <author><first>Partha</first><last>Talukdar</last></author>
+      <author id="partha-talukdar"><first>Partha</first><last>Talukdar</last></author>
       <pages>1257–1266</pages>
       <url hash="58bd95d0">D18-1157</url>
       <attachment type="attachment" hash="9bd3e6dc">D18-1157.Attachment.pdf</attachment>
@@ -2036,7 +2036,7 @@
     <paper id="163">
       <title>Synthetic Data Made to Order: The Case of Parsing</title>
       <author><first>Dingquan</first><last>Wang</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>1325–1337</pages>
       <url hash="371fa8fe">D18-1163</url>
       <attachment type="attachment" hash="648d65ae">D18-1163.Attachment.pdf</attachment>
@@ -2063,7 +2063,7 @@
       <title>Learning a Policy for Opportunistic Active Learning</title>
       <author><first>Aishwarya</first><last>Padmakumar</last></author>
       <author><first>Peter</first><last>Stone</last></author>
-      <author><first>Raymond</first><last>Mooney</last></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last></author>
       <pages>1347–1357</pages>
       <url hash="209e24b5">D18-1165</url>
       <attachment type="attachment" hash="f2a2aee7">D18-1165.Attachment.tgz</attachment>
@@ -2091,7 +2091,7 @@
       <author><first>Jie</first><last>Lei</last></author>
       <author><first>Licheng</first><last>Yu</last></author>
       <author><first>Mohit</first><last>Bansal</last></author>
-      <author><first>Tamara</first><last>Berg</last></author>
+      <author id="tamara-berg"><first>Tamara</first><last>Berg</last></author>
       <pages>1369–1379</pages>
       <url hash="ca0fa61e">D18-1167</url>
       <attachment type="attachment" hash="eba7c8e8">D18-1167.Attachment.pdf</attachment>
@@ -2136,7 +2136,7 @@
       <author><first>Zexue</first><last>He</last></author>
       <author><first>Qiaolin</first><last>Xia</last></author>
       <author><first>Zhifang</first><last>Sui</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <pages>1402–1411</pages>
       <url hash="4babff5b">D18-1170</url>
       <abstract>The goal of Word Sense Disambiguation (WSD) is to identify the correct meaning of a word in the particular context. Traditional supervised methods only use labeled data (context), while missing rich lexical knowledge such as the gloss which defines the meaning of a word sense. Recent studies have shown that incorporating glosses into neural networks for WSD has made significant improvement. However, the previous models usually build the context representation and gloss representation separately. In this paper, we find that the learning for the context and gloss representation can benefit from each other. Gloss can help to highlight the important words in the context, thus building a better context representation. Context can also help to locate the key words in the gloss of the correct word sense. Therefore, we introduce a co-attention mechanism to generate co-dependent representations for the context and gloss. Furthermore, in order to capture both word-level and sentence-level information, we extend the attention mechanism in a hierarchical fashion. Experimental results show that our model achieves the state-of-the-art results on several standard English all-words WSD test datasets.</abstract>
@@ -2168,7 +2168,7 @@
       <title>Memory, Show the Way: Memory Based Few Shot Word Representation Learning</title>
       <author><first>Jingyuan</first><last>Sun</last></author>
       <author><first>Shaonan</first><last>Wang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>1435–1444</pages>
       <url hash="6bf961a2">D18-1173</url>
       <abstract>Distributional semantic models (DSMs) generally require sufficient examples for a word to learn a high quality representation. This is in stark contrast with human who can guess the meaning of a word from one or a few referents only. In this paper, we propose Mem2Vec, a memory based embedding learning method capable of acquiring high quality word representations from fairly limited context. Our method directly adapts the representations produced by a DSM with a longterm memory to guide its guess of a novel word. Based on a pre-trained embedding space, the proposed method delivers impressive performance on two challenging few-shot word similarity tasks. Embeddings learned with our method also lead to considerable improvements over strong baselines on NER and sentiment classification.</abstract>
@@ -2235,10 +2235,10 @@
     </paper>
     <paper id="179">
       <title>Dissecting Contextual Word Embeddings: Architecture and Representation</title>
-      <author><first>Matthew E.</first><last>Peters</last></author>
+      <author id="matthew-e-peters"><first>Matthew E.</first><last>Peters</last></author>
       <author><first>Mark</first><last>Neumann</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <pages>1499–1509</pages>
       <url hash="5fb6739e">D18-1179</url>
       <attachment type="attachment" hash="4ec10fd7">D18-1179.Attachment.pdf</attachment>
@@ -2308,7 +2308,7 @@
     <paper id="185">
       <title>Compare, Compress and Propagate: Enhancing Neural Architectures with Alignment Factorization for Natural Language Inference</title>
       <author><first>Yi</first><last>Tay</last></author>
-      <author><first>Anh Tuan</first><last>Luu</last></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last></author>
       <author><first>Siu Cheung</first><last>Hui</last></author>
       <pages>1565–1575</pages>
       <url hash="0763009c">D18-1185</url>
@@ -2322,7 +2322,7 @@
       <author><first>Xipeng</first><last>Qiu</last></author>
       <author><first>Xinchi</first><last>Chen</last></author>
       <author><first>Dong</first><last>Liang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>1576–1585</pages>
       <url hash="00eee20c">D18-1186</url>
       <abstract>Attention-based neural models have achieved great success in natural language inference (NLI). In this paper, we propose the Convolutional Interaction Network (CIN), a general model to capture the interaction between two sentences, which can be an alternative to the attention mechanism for NLI. Specifically, CIN encodes one sentence with the filters dynamically generated based on another sentence. Since the filters may be designed to have various numbers and sizes, CIN can capture more complicated interaction patterns. Experiments on three large datasets demonstrate CIN’s efficacy.</abstract>
@@ -2386,7 +2386,7 @@
       <title>A Span Selection Model for Semantic Role Labeling</title>
       <author><first>Hiroki</first><last>Ouchi</last></author>
       <author><first>Hiroyuki</first><last>Shindo</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>1630–1642</pages>
       <url hash="9c877a18">D18-1191</url>
       <abstract>We present a simple and accurate span-based model for semantic role labeling (SRL). Our model directly takes into account all possible argument spans and scores them for each label. At decoding time, we greedily select higher scoring labeled spans. One advantage of our model is to allow us to design and use span-level features, that are difficult to use in token-based BIO tagging approaches. Experimental results demonstrate that our ensemble model achieves the state-of-the-art results, 87.4 F1 and 87.0 F1 on the CoNLL-2005 and 2012 datasets, respectively.</abstract>
@@ -2398,7 +2398,7 @@
       <author><first>Srinivasan</first><last>Iyer</last></author>
       <author><first>Ioannis</first><last>Konstas</last></author>
       <author><first>Alvin</first><last>Cheung</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>1643–1652</pages>
       <url hash="e14d9fb6">D18-1192</url>
       <abstract>Source code is rarely written in isolation. It depends significantly on the programmatic context, such as the class that the code would reside in. To study this phenomenon, we introduce the task of generating class member functions given English documentation and the programmatic context provided by the rest of the class. This task is challenging because the desired code can vary greatly depending on the functionality the class provides (e.g., a sort function may or may not be available when we are asked to “return the smallest element” in a particular member variable list). We introduce CONCODE, a new large dataset with over 100,000 examples consisting of Java classes from online code repositories, and develop a new encoder-decoder architecture that models the interaction between the method documentation and the class environment. We also present a detailed error analysis suggesting that there is significant room for future work on this task.</abstract>
@@ -2413,7 +2413,7 @@
       <author><first>Rui</first><last>Zhang</last></author>
       <author><first>Dongxu</first><last>Wang</last></author>
       <author><first>Zifan</first><last>Li</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <pages>1653–1663</pages>
       <url hash="466eef9d">D18-1193</url>
       <abstract>Most existing studies in text-to-SQL tasks do not require generating complex SQL queries with multiple clauses or sub-queries, and generalizing to new, unseen databases. In this paper we propose SyntaxSQLNet, a syntax tree network to address the complex and cross-domain text-to-SQL generation task. SyntaxSQLNet employs a SQL specific syntax tree-based decoder with SQL generation path history and table-aware column attention encoders. We evaluate SyntaxSQLNet on a new large-scale text-to-SQL corpus containing databases with multiple tables and complex SQL queries containing multiple SQL clauses and nested queries. We use a database split setting where databases in the test set are unseen during training. Experimental results show that SyntaxSQLNet can handle a significantly greater number of complex SQL examples than prior work, outperforming the previous state-of-the-art model by 9.5% in exact matching accuracy. To our knowledge, we are the first to study this complex text-to-SQL task. Our task and models with the latest updates are available at <url>https://yale-lily.github.io/seq2sql/spider</url>.</abstract>
@@ -2438,7 +2438,7 @@
       <title>Learning to Learn Semantic Parsers from Natural Language Supervision</title>
       <author><first>Igor</first><last>Labutov</last></author>
       <author><first>Bishan</first><last>Yang</last></author>
-      <author><first>Tom</first><last>Mitchell</last></author>
+      <author id="tom-mitchell"><first>Tom</first><last>Mitchell</last></author>
       <pages>1676–1690</pages>
       <url hash="e1ff0fbb">D18-1195</url>
       <abstract>As humans, we often rely on language to learn language. For example, when corrected in a conversation, we may learn from that correction, over time improving our language fluency. Inspired by this observation, we propose a learning algorithm for training semantic parsers from supervision (feedback) expressed in natural language. Our algorithm learns a semantic parser from users’ corrections such as “no, what I really meant was before his job, not after”, by also simultaneously learning to parse this natural language feedback in order to leverage it as a form of supervision. Unlike supervision with gold-standard logical forms, our method does not require the user to be familiar with the underlying logical formalism, and unlike supervision from denotation, it does not require the user to know the correct answer to their query. This makes our learning algorithm naturally scalable in settings where existing conversational logs are available and can be leveraged as training data. We construct a novel dataset of natural language feedback in a conversational setting, and show that our method is effective at learning a semantic parser from such natural language supervision.</abstract>
@@ -2448,8 +2448,8 @@
     <paper id="196">
       <title><fixed-case>D</fixed-case>eep<fixed-case>C</fixed-case>x: A transition-based approach for shallow semantic parsing with complex constructional triggers</title>
       <author><first>Jesse</first><last>Dunietz</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
       <pages>1691–1701</pages>
       <url hash="338da631">D18-1196</url>
       <attachment type="attachment" hash="95a84055">D18-1196.Attachment.zip</attachment>
@@ -2575,7 +2575,7 @@
     <paper id="206">
       <title>Don’t Give Me the Details, Just the Summary! Topic-Aware Convolutional Neural Networks for Extreme Summarization</title>
       <author><first>Shashi</first><last>Narayan</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <author><first>Mirella</first><last>Lapata</last></author>
       <pages>1797–1807</pages>
       <url hash="bb8c5b2b">D18-1206</url>
@@ -2600,8 +2600,8 @@
     <paper id="208">
       <title>Content Selection in Deep Learning Models of Summarization</title>
       <author><first>Chris</first><last>Kedzie</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <pages>1818–1828</pages>
       <url hash="ec38c396">D18-1208</url>
       <revision id="1" href="D18-1208v1" hash="6f8f065b"/>
@@ -2653,7 +2653,7 @@
     </paper>
     <paper id="212">
       <title>Learning Neural Representation for <fixed-case>CLIR</fixed-case> with Adversarial Framework</title>
-      <author id="bo-li"><first>Bo</first><last>Li</last></author>
+      <author><first>Bo</first><last>Li</last></author>
       <author><first>Ping</first><last>Cheng</last></author>
       <pages>1861–1870</pages>
       <url hash="19c2e0c3">D18-1212</url>
@@ -2666,7 +2666,7 @@
       <title><fixed-case>AD</fixed-case>3: Attentive Deep Document Dater</title>
       <author><first>Swayambhu Nath</first><last>Ray</last></author>
       <author><first>Shib Sankar</first><last>Dasgupta</last></author>
-      <author><first>Partha</first><last>Talukdar</last></author>
+      <author id="partha-talukdar"><first>Partha</first><last>Talukdar</last></author>
       <pages>1871–1880</pages>
       <url hash="7cbf3f83">D18-1213</url>
       <abstract>Knowledge of the creation date of documents facilitates several tasks such as summarization, event extraction, temporally focused information extraction etc. Unfortunately, for most of the documents on the Web, the time-stamp metadata is either missing or can’t be trusted. Thus, predicting creation time from document content itself is an important task. In this paper, we propose Attentive Deep Document Dater (AD3), an attention-based neural document dating system which utilizes both context and temporal information in documents in a flexible and principled manner. We perform extensive experimentation on multiple real-world datasets to demonstrate the effectiveness of AD3 over neural and non-neural baselines.</abstract>
@@ -2713,9 +2713,9 @@
     <paper id="217">
       <title>Semi-Supervised Sequence Modeling with Cross-View Training</title>
       <author><first>Kevin</first><last>Clark</last></author>
-      <author><first>Minh-Thang</first><last>Luong</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
-      <author><first>Quoc</first><last>Le</last></author>
+      <author id="minh-thang-luong"><first>Minh-Thang</first><last>Luong</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
+      <author id="quoc-le"><first>Quoc</first><last>Le</last></author>
       <pages>1914–1925</pages>
       <url hash="9f7e2cc1">D18-1217</url>
       <attachment type="attachment" hash="4a4ef4b3">D18-1217.Attachment.zip</attachment>
@@ -2728,9 +2728,9 @@
       <title>A Probabilistic Annotation Model for Crowdsourcing Coreference</title>
       <author><first>Silviu</first><last>Paun</last></author>
       <author><first>Jon</first><last>Chamberlain</last></author>
-      <author><first>Udo</first><last>Kruschwitz</last></author>
+      <author id="udo-kruschwitz"><first>Udo</first><last>Kruschwitz</last></author>
       <author><first>Juntao</first><last>Yu</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>1926–1937</pages>
       <url hash="be35bd86">D18-1218</url>
       <attachment type="attachment" hash="e51276e5">D18-1218.Attachment.zip</attachment>
@@ -2753,7 +2753,7 @@
       <author><first>Noelia</first><last>De La Cruz</last></author>
       <author><first>Adam</first><last>Trischler</last></author>
       <author><first>Kaheer</first><last>Suleman</last></author>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <pages>1949–1958</pages>
       <url hash="f65dae02">D18-1220</url>
       <attachment type="attachment" hash="9a78ffc2">D18-1220.Attachment.pdf</attachment>
@@ -2815,7 +2815,7 @@
       <title><fixed-case>H</fixed-case>y<fixed-case>TE</fixed-case>: Hyperplane-based Temporally aware Knowledge Graph Embedding</title>
       <author><first>Shib Sankar</first><last>Dasgupta</last></author>
       <author><first>Swayambhu Nath</first><last>Ray</last></author>
-      <author><first>Partha</first><last>Talukdar</last></author>
+      <author id="partha-talukdar"><first>Partha</first><last>Talukdar</last></author>
       <pages>2001–2011</pages>
       <url hash="fc9c8132">D18-1225</url>
       <abstract>Knowledge Graph (KG) embedding has emerged as an active area of research resulting in the development of several KG embedding methods. Relational facts in KG often show temporal dynamics, e.g., the fact (Cristiano_Ronaldo, playsFor, Manchester_United) is valid only from 2003 to 2009. Most of the existing KG embedding methods ignore this temporal dimension while learning embeddings of the KG elements. In this paper, we propose HyTE, a temporally aware KG embedding method which explicitly incorporates time in the entity-relation space by associating each timestamp with a corresponding hyperplane. HyTE not only performs KG inference using temporal guidance, but also predicts temporal scopes for relational facts with missing time annotations. Through extensive experimentation on temporal datasets extracted from real-world KGs, we demonstrate the effectiveness of our model over both traditional as well as temporal KG embedding methods.</abstract>
@@ -2824,7 +2824,7 @@
     </paper>
     <paper id="226">
       <title>Neural Adaptation Layers for Cross-domain Named Entity Recognition</title>
-      <author><first>Bill Yuchen</first><last>Lin</last></author>
+      <author id="bill-yuchen-lin"><first>Bill Yuchen</first><last>Lin</last></author>
       <author><first>Wei</first><last>Lu</last></author>
       <pages>2012–2022</pages>
       <url hash="b063c080">D18-1226</url>
@@ -2903,7 +2903,7 @@
       <author><first>Yuxing</first><last>Peng</last></author>
       <author><first>Furu</first><last>Wei</last></author>
       <author><first>Zhen</first><last>Huang</last></author>
-      <author id="dongsheng-li"><first>Dongsheng</first><last>Li</last></author>
+      <author><first>Dongsheng</first><last>Li</last></author>
       <author><first>Nan</first><last>Yang</last></author>
       <author><first>Ming</first><last>Zhou</last></author>
       <pages>2077–2086</pages>
@@ -2918,7 +2918,7 @@
       <author><first>Max</first><last>Bartolo</last></author>
       <author><first>Patrick</first><last>Lewis</last></author>
       <author><first>Sameer</first><last>Singh</last></author>
-      <author><first>Tim</first><last>Rocktäschel</last></author>
+      <author id="tim-rocktaschel"><first>Tim</first><last>Rocktäschel</last></author>
       <author><first>Mike</first><last>Sheldon</last></author>
       <author><first>Guillaume</first><last>Bouchard</last></author>
       <author><first>Sebastian</first><last>Riedel</last></author>
@@ -2970,7 +2970,7 @@
       <title><fixed-case>M</fixed-case>emo<fixed-case>R</fixed-case>eader: Large-Scale Reading Comprehension through Neural Memory Controller</title>
       <author><first>Seohyun</first><last>Back</last></author>
       <author><first>Seunghak</first><last>Yu</last></author>
-      <author><first>Sathish Reddy</first><last>Indurthi</last></author>
+      <author id="sathish-reddy-indurthi"><first>Sathish Reddy</first><last>Indurthi</last></author>
       <author><first>Jihie</first><last>Kim</last></author>
       <author><first>Jaegul</first><last>Choo</last></author>
       <pages>2131–2140</pages>
@@ -2983,7 +2983,7 @@
     <paper id="238">
       <title>Multi-Granular Sequence Encoding via Dilated Compositional Units for Reading Comprehension</title>
       <author><first>Yi</first><last>Tay</last></author>
-      <author><first>Anh Tuan</first><last>Luu</last></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last></author>
       <author><first>Siu Cheung</first><last>Hui</last></author>
       <pages>2141–2151</pages>
       <url hash="528fcad8">D18-1238</url>
@@ -3018,16 +3018,16 @@
       <author><first>He</first><last>He</last></author>
       <author><first>Mohit</first><last>Iyyer</last></author>
       <author><first>Mark</first><last>Yatskar</last></author>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <author><first>Yejin</first><last>Choi</last></author>
       <author><first>Percy</first><last>Liang</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>2174–2184</pages>
       <url hash="d68e67cd">D18-1241</url>
       <attachment type="attachment" hash="2997369e">D18-1241.Attachment.zip</attachment>
+      <attachment hash="28e0467a" type="poster">D18-1241.Poster.pdf</attachment>
       <abstract>We present QuAC, a dataset for Question Answering in Context that contains 14K information-seeking QA dialogs (100K questions in total). The dialogs involve two crowd workers: (1) a student who poses a sequence of freeform questions to learn as much as possible about a hidden Wikipedia text, and (2) a teacher who answers the questions by providing short excerpts from the text. QuAC introduces challenges not found in existing machine comprehension datasets: its questions are often more open-ended, unanswerable, or only meaningful within the dialog context, as we show in a detailed qualitative evaluation. We also report results for a number of reference models, including a recently state-of-the-art reading comprehension architecture extended to model dialog context. Our best model underperforms humans by 20 F1, suggesting that there is significant room for future work on this data. Dataset, baseline, and leaderboard available at <url>http://quac.ai</url>.</abstract>
       <doi>10.18653/v1/D18-1241</doi>
-      <attachment type="poster" hash="28e0467a">D18-1241.Poster.pdf</attachment>
       <bibkey>choi-etal-2018-quac</bibkey>
     </paper>
     <paper id="242">
@@ -3035,7 +3035,7 @@
       <author><first>Kangqi</first><last>Luo</last></author>
       <author><first>Fengli</first><last>Lin</last></author>
       <author><first>Xusheng</first><last>Luo</last></author>
-      <author><first>Kenny</first><last>Zhu</last></author>
+      <author id="kenny-zhu"><first>Kenny</first><last>Zhu</last></author>
       <pages>2185–2194</pages>
       <url hash="f7829f8d">D18-1242</url>
       <abstract>Answering complex questions that involve multiple entities and multiple relations using a standard knowledge base is an open and challenging task. Most existing KBQA approaches focus on simpler questions and do not work very well on complex questions because they were not able to simultaneously represent the question and the corresponding complex query structure. In this work, we encode such complex query structure into a uniform vector representation, and thus successfully capture the interactions between individual semantic components within a complex question. This approach consistently outperforms existing methods on complex questions while staying competitive on simple questions.</abstract>
@@ -3058,7 +3058,7 @@
       <title>Graph Convolution over Pruned Dependency Trees Improves Relation Extraction</title>
       <author><first>Yuhao</first><last>Zhang</last></author>
       <author><first>Peng</first><last>Qi</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>2205–2215</pages>
       <url hash="67edbf50">D18-1244</url>
       <attachment type="attachment" hash="20aa4f04">D18-1244.Attachment.pdf</attachment>
@@ -3069,7 +3069,7 @@
     <paper id="245">
       <title>Multi-Level Structured Self-Attentions for Distantly Supervised Relation Extraction</title>
       <author><first>Jinhua</first><last>Du</last></author>
-      <author><first>Jingguang</first><last>Han</last></author>
+      <author id="jingguang-han"><first>Jingguang</first><last>Han</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <author><first>Dadong</first><last>Wan</last></author>
       <pages>2216–2225</pages>
@@ -3123,7 +3123,7 @@
       <title>Extracting Entities and Relations with Joint Minimum Risk Training</title>
       <author><first>Changzhi</first><last>Sun</last></author>
       <author><first>Yuanbin</first><last>Wu</last></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <author><first>Shiliang</first><last>Sun</last></author>
       <author><first>Wenting</first><last>Wang</last></author>
       <author><first>Kuang-Chih</first><last>Lee</last></author>
@@ -3137,7 +3137,7 @@
     </paper>
     <paper id="250">
       <title>Large-scale Exploration of Neural Relation Classification Architectures</title>
-      <author><first>Hoang-Quynh</first><last>Le</last></author>
+      <author id="hoang-quynh-le"><first>Hoang-Quynh</first><last>Le</last></author>
       <author><first>Duy-Cat</first><last>Can</last></author>
       <author><first>Sinh T.</first><last>Vu</last></author>
       <author><first>Thanh Hai</first><last>Dang</last></author>
@@ -3206,7 +3206,7 @@
       <author><first>Nikita</first><last>Moghe</last></author>
       <author><first>Siddhartha</first><last>Arora</last></author>
       <author><first>Suman</first><last>Banerjee</last></author>
-      <author><first>Mitesh M.</first><last>Khapra</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh M.</first><last>Khapra</last></author>
       <pages>2322–2332</pages>
       <url hash="902e095f">D18-1255</url>
       <attachment type="attachment" hash="d648b72e">D18-1255.Attachment.zip</attachment>
@@ -3234,7 +3234,7 @@
       <author><first>Qizhe</first><last>Xie</last></author>
       <author><first>Guokun</first><last>Lai</last></author>
       <author><first>Zihang</first><last>Dai</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>2344–2356</pages>
       <url hash="c7b294be">D18-1257</url>
       <abstract>Cloze tests are widely adopted in language exams to evaluate students’ language proficiency. In this paper, we propose the first large-scale human-created cloze test dataset CLOTH, containing questions used in middle-school and high-school language exams. With missing blanks carefully created by teachers and candidate choices purposely designed to be nuanced, CLOTH requires a deeper language understanding and a wider attention span than previously automatically-generated cloze datasets. We test the performance of dedicatedly designed baseline models including a language model trained on the One Billion Word Corpus and show humans outperform them by a significant margin. We investigate the source of the performance gap, trace model deficiencies to some distinct properties of CLOTH, and identify the limited ability of comprehending the long-term context to be the key bottleneck.</abstract>
@@ -3261,9 +3261,9 @@
       <author><first>Peng</first><last>Qi</last></author>
       <author><first>Saizheng</first><last>Zhang</last></author>
       <author><first>Yoshua</first><last>Bengio</last></author>
-      <author><first>William</first><last>Cohen</last></author>
+      <author id="william-cohen"><first>William</first><last>Cohen</last></author>
       <author><first>Ruslan</first><last>Salakhutdinov</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>2369–2380</pages>
       <url hash="85540589">D18-1259</url>
       <attachment type="attachment" hash="f1aa8928">D18-1259.Attachment.pdf</attachment>
@@ -3356,17 +3356,17 @@
     </paper>
     <paper id="266">
       <title>Policy Shaping and Generalized Update Equations for Semantic Parsing from Denotations</title>
-      <author><first>Dipendra</first><last>Misra</last></author>
+      <author id="dipendra-misra"><first>Dipendra</first><last>Misra</last></author>
       <author><first>Ming-Wei</first><last>Chang</last></author>
       <author><first>Xiaodong</first><last>He</last></author>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <pages>2442–2452</pages>
       <url hash="02d47d13">D18-1266</url>
       <attachment type="attachment" hash="c7217091">D18-1266.Attachment.pdf</attachment>
+      <attachment hash="bf850576" type="presentation">D18-1266.Presentation.pdf</attachment>
       <abstract>Semantic parsing from denotations faces two key challenges in model training: (1) given only the denotations (e.g., answers), search for good candidate semantic parses, and (2) choose the best model update algorithm. We propose effective and general solutions to each of them. Using policy shaping, we bias the search procedure towards semantic parses that are more compatible to the text, which provide better supervision signals for training. In addition, we propose an update equation that generalizes three different families of learning algorithms, which enables fast model exploration. When experimented on a recently proposed sequential question answering dataset, our framework leads to a new state-of-the-art model that outperforms previous work by 5.0% absolute on exact match accuracy.</abstract>
       <video href="D18-1266.mp4"/>
       <doi>10.18653/v1/D18-1266</doi>
-      <attachment type="presentation" hash="bf850576">D18-1266.Presentation.pdf</attachment>
       <bibkey>misra-etal-2018-policy</bibkey>
     </paper>
     <paper id="267">
@@ -3402,7 +3402,7 @@
       <author><first>Ruty</first><last>Rinott</last></author>
       <author><first>Guillaume</first><last>Lample</last></author>
       <author><first>Adina</first><last>Williams</last></author>
-      <author><first>Samuel</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel</first><last>Bowman</last></author>
       <author><first>Holger</first><last>Schwenk</last></author>
       <author><first>Veselin</first><last>Stoyanov</last></author>
       <pages>2475–2485</pages>
@@ -3431,7 +3431,7 @@
       <author><first>Qing</first><last>Dou</last></author>
       <author><first>Heng</first><last>Ji</last></author>
       <author><first>Lei</first><last>Cui</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <author><first>Zhifang</first><last>Sui</last></author>
       <author><first>Furu</first><last>Wei</last></author>
       <author><first>Ming</first><last>Zhou</last></author>
@@ -3451,7 +3451,7 @@
       <author><first>Liang</first><last>Yang</last></author>
       <author><first>Kan</first><last>Xu</last></author>
       <author><first>Zhihao</first><last>Yang</last></author>
-      <author id="jian-wang"><first>Jian</first><last>Wang</last></author>
+      <author><first>Jian</first><last>Wang</last></author>
       <author><first>Shaowu</first><last>Zhang</last></author>
       <author><first>Bo</first><last>Xu</last></author>
       <author><first>Dongyu</first><last>Zhang</last></author>
@@ -3492,7 +3492,7 @@
       <author><first>Minlong</first><last>Peng</last></author>
       <author><first>Di</first><last>Liang</last></author>
       <author><first>Keyu</first><last>Ding</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>2540–2549</pages>
       <url hash="bf54447d">D18-1275</url>
       <abstract>Part-of-Speech (POS) tagging for Twitter has received considerable attention in recent years. Because most POS tagging methods are based on supervised models, they usually require a large amount of labeled data for training. However, the existing labeled datasets for Twitter are much smaller than those for newswire text. Hence, to help POS tagging for Twitter, most domain adaptation methods try to leverage newswire datasets by learning the shared features between the two domains. However, from a linguistic perspective, Twitter users not only tend to mimic the formal expressions of traditional media, like news, but they also appear to be developing linguistically informal styles. Therefore, POS tagging for the formal Twitter context can be learned together with the newswire dataset, while POS tagging for the informal Twitter context should be learned separately. To achieve this task, in this work, we propose a hypernetwork-based method to generate different parameters to separately model contexts with different expression styles. Experimental results on three different datasets show that our approach achieves better performance than state-of-the-art methods in most cases.</abstract>
@@ -3505,8 +3505,8 @@
       <author><first>Bishal</first><last>Santra</last></author>
       <author><first>Sasi Prasanth</first><last>Bandaru</last></author>
       <author><first>Gaurav</first><last>Sahu</last></author>
-      <author><first>Vishnu Dutt</first><last>Sharma</last></author>
-      <author><first>Pavankumar</first><last>Satuluri</last></author>
+      <author id="vishnu-dutt-sharma"><first>Vishnu Dutt</first><last>Sharma</last></author>
+      <author id="pavankumar-satuluri"><first>Pavankumar</first><last>Satuluri</last></author>
       <author><first>Pawan</first><last>Goyal</last></author>
       <pages>2550–2561</pages>
       <url hash="7bb868b7">D18-1276</url>
@@ -3544,7 +3544,7 @@
       <author><first>Yingwei</first><last>Xin</last></author>
       <author><first>Ethan</first><last>Hart</last></author>
       <author><first>Vibhuti</first><last>Mahajan</last></author>
-      <author><first>Jean-David</first><last>Ruvini</last></author>
+      <author id="jean-david-ruvini"><first>Jean-David</first><last>Ruvini</last></author>
       <pages>2584–2593</pages>
       <url hash="c5690666">D18-1279</url>
       <abstract>Character-based neural models have recently proven very useful for many NLP tasks. However, there is a gap of sophistication between methods for learning representations of sentences and words. While, most character models for learning representations of sentences are deep and complex, models for learning representations of words are shallow and simple. Also, in spite of considerable research on learning character embeddings, it is still not clear which kind of architecture is the best for capturing character-to-word representations. To address these questions, we first investigate the gaps between methods for learning word and sentence representations. We conduct detailed experiments and comparisons on different state-of-the-art convolutional models, and also investigate the advantages and disadvantages of their constituents. Furthermore, we propose IntNet, a funnel-shaped wide convolutional neural architecture with no down-sampling for learning representations of the internal structure of words by composing their characters from limited, supervised training corpora. We evaluate our proposed model on six sequence labeling datasets, including named entity recognition, part-of-speech tagging, and syntactic chunking. Our in-depth analysis shows that IntNet significantly outperforms other character embedding models and obtains new state-of-the-art performance without relying on any external knowledge or resources.</abstract>
@@ -3555,7 +3555,7 @@
       <title><fixed-case>ICON</fixed-case>: Interactive Conversational Memory Network for Multimodal Emotion Detection</title>
       <author><first>Devamanyu</first><last>Hazarika</last></author>
       <author><first>Soujanya</first><last>Poria</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <author><first>Erik</first><last>Cambria</last></author>
       <author><first>Roger</first><last>Zimmermann</last></author>
       <pages>2594–2604</pages>
@@ -3589,8 +3589,8 @@
       <title>Commonsense Justification for Action Explanation</title>
       <author><first>Shaohua</first><last>Yang</last></author>
       <author><first>Qiaozi</first><last>Gao</last></author>
-      <author><first>Sari</first><last>Sadiya</last></author>
-      <author><first>Joyce</first><last>Chai</last></author>
+      <author id="sari-saba-sadiya"><first>Sari</first><last>Sadiya</last></author>
+      <author id="joyce-chai"><first>Joyce</first><last>Chai</last></author>
       <pages>2627–2637</pages>
       <url hash="4c4df699">D18-1283</url>
       <abstract>To enable collaboration and communication between humans and agents, this paper investigates learning to acquire commonsense evidence for action justification. In particular, we have developed an approach based on the generative Conditional Variational Autoencoder(CVAE) that models object relations/attributes of the world as latent variables and jointly learns a performer that predicts actions and an explainer that gathers commonsense evidence to justify the action. Our empirical results have shown that, compared to a typical attention-based model, CVAE achieves significantly higher performance in both action prediction and justification. A human subject study further shows that the commonsense evidence gathered by CVAE can be communicated to humans to achieve a significantly higher common ground between humans and agents.</abstract>
@@ -3601,7 +3601,7 @@
       <title>Learning Personas from Dialogue with Attentive Memory Networks</title>
       <author><first>Eric</first><last>Chu</last></author>
       <author><first>Prashanth</first><last>Vijayaraghavan</last></author>
-      <author><first>Deb</first><last>Roy</last></author>
+      <author id="deb-roy"><first>Deb</first><last>Roy</last></author>
       <pages>2638–2646</pages>
       <url hash="797edb8d">D18-1284</url>
       <attachment type="attachment" hash="c651dc5c">D18-1284.Attachment.zip</attachment>
@@ -3640,8 +3640,8 @@
     </paper>
     <paper id="287">
       <title>Mapping Instructions to Actions in 3<fixed-case>D</fixed-case> Environments with Visual Goal Prediction</title>
-      <author><first>Dipendra</first><last>Misra</last></author>
-      <author><first>Andrew</first><last>Bennett</last></author>
+      <author id="dipendra-misra"><first>Dipendra</first><last>Misra</last></author>
+      <author id="andrew-bennett"><first>Andrew</first><last>Bennett</last></author>
       <author><first>Valts</first><last>Blukis</last></author>
       <author><first>Eyvind</first><last>Niklasson</last></author>
       <author><first>Max</first><last>Shatkhin</last></author>
@@ -3704,7 +3704,7 @@
       <title>Depth-bounding is effective: Improvements and evaluation of unsupervised <fixed-case>PCFG</fixed-case> induction</title>
       <author><first>Lifeng</first><last>Jin</last></author>
       <author><first>Finale</first><last>Doshi-Velez</last></author>
-      <author><first>Timothy</first><last>Miller</last></author>
+      <author id="timothy-miller"><first>Timothy</first><last>Miller</last></author>
       <author><first>William</first><last>Schuler</last></author>
       <author><first>Lane</first><last>Schwartz</last></author>
       <pages>2721–2731</pages>
@@ -3752,7 +3752,7 @@
       <author><first>Wayne</first><last>Xiong</last></author>
       <author><first>Lingfeng</first><last>Wu</last></author>
       <author><first>Jun</first><last>Zhang</last></author>
-      <author><first>Andreas</first><last>Stolcke</last></author>
+      <author id="andreas-stolcke"><first>Andreas</first><last>Stolcke</last></author>
       <pages>2764–2768</pages>
       <url hash="1b61603c">D18-1296</url>
       <abstract>We propose to generalize language models for conversational speech recognition to allow them to operate across utterance boundaries and speaker changes, thereby capturing conversation-level phenomena such as adjacency pairs, lexical entrainment, and topical coherence. The model consists of a long-short-term memory (LSTM) recurrent network that reads the entire word-level history of a conversation, as well as information about turn taking and speaker overlap, in order to predict each next word. The model is applied in a rescoring framework, where the word history prior to the current utterance is approximated with preliminary recognition results. In experiments in the conversational telephone speech domain (Switchboard) we find that such a model gives substantial perplexity reductions over a standard LSTM-LM with utterance scope, as well as improvements in word error rate.</abstract>
@@ -3777,7 +3777,7 @@
     </paper>
     <paper id="298">
       <title>Training Millions of Personalized Dialogue Agents</title>
-      <author><first>Pierre-Emmanuel</first><last>Mazaré</last></author>
+      <author id="pierre-emmanuel-mazare"><first>Pierre-Emmanuel</first><last>Mazaré</last></author>
       <author><first>Samuel</first><last>Humeau</last></author>
       <author><first>Martin</first><last>Raison</last></author>
       <author><first>Antoine</first><last>Bordes</last></author>
@@ -3905,11 +3905,11 @@
     </paper>
     <paper id="308">
       <title>Structured Multi-Label Biomedical Text Tagging via Attentive Neural Tree Decoding</title>
-      <author><first>Gaurav</first><last>Singh</last></author>
+      <author id="gaurav-singh-tomar"><first>Gaurav</first><last>Singh</last></author>
       <author><first>James</first><last>Thomas</last></author>
-      <author><first>Iain</first><last>Marshall</last></author>
+      <author id="iain-marshall"><first>Iain</first><last>Marshall</last></author>
       <author><first>John</first><last>Shawe-Taylor</last></author>
-      <author><first>Byron C.</first><last>Wallace</last></author>
+      <author id="byron-c-wallace"><first>Byron C.</first><last>Wallace</last></author>
       <pages>2837–2842</pages>
       <url hash="6fcb378d">D18-1308</url>
       <attachment type="attachment" hash="20327ddf">D18-1308.Attachment.pdf</attachment>
@@ -3933,7 +3933,7 @@
       <title>Evaluating the Utility of Hand-crafted Features in Sequence Labelling</title>
       <author><first>Minghao</first><last>Wu</last></author>
       <author id="fei-liu-unimelb"><first>Fei</first><last>Liu</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>2850–2856</pages>
       <url hash="02f9b953">D18-1310</url>
       <abstract>Conventional wisdom is that hand-crafted features are redundant for deep learning models, as they already learn adequate representations of text automatically from corpora. In this work, we test this claim by proposing a new method for exploiting handcrafted features as part of a novel hybrid learning approach, incorporating a feature auto-encoder loss component. We evaluate on the task of named entity recognition (NER), where we show that including manual features for part-of-speech, word shapes and gazetteers can improve the performance of a neural CRF model. We obtain a F 1 of 91.89 for the CoNLL-2003 English shared task, which significantly outperforms a collection of highly competitive baseline models. We also present an ablation study showing the importance of auto-encoding, over using features as either inputs or outputs alone, and moreover, show including the autoencoder components reduces training requirements to 60%, while retaining the same predictive accuracy.</abstract>
@@ -3944,8 +3944,8 @@
     <paper id="311">
       <title>Improved Dependency Parsing using Implicit Word Connections Learned from Unlabeled Data</title>
       <author><first>Wenhui</first><last>Wang</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
-      <author><first>Mairgup</first><last>Mansur</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
+      <author id="mairgup-mansur"><first>Mairgup</first><last>Mansur</last></author>
       <pages>2857–2863</pages>
       <url hash="0fa11117">D18-1311</url>
       <abstract>Pre-trained word embeddings and language model have been shown useful in a lot of tasks. However, both of them cannot directly capture word connections in a sentence, which is important for dependency parsing given its goal is to establish dependency relations between words. In this paper, we propose to implicitly capture word connections from unlabeled data by a word ordering model with self-attention mechanism. Experiments show that these implicit word connections do improve our parsing model. Furthermore, by combining with a pre-trained language model, our model gets state-of-the-art performance on the English PTB dataset, achieving 96.35% UAS and 95.25% LAS.</abstract>
@@ -3989,7 +3989,7 @@
     </paper>
     <paper id="315">
       <title>An Encoder-Decoder Approach to the Paradigm Cell Filling Problem</title>
-      <author><first>Miikka</first><last>Silfverberg</last></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last></author>
       <author><first>Mans</first><last>Hulden</last></author>
       <pages>2883–2889</pages>
       <url hash="6ac95f40">D18-1315</url>
@@ -4029,7 +4029,7 @@
     <paper id="318">
       <title>Deep <fixed-case>B</fixed-case>ayesian Active Learning for Natural Language Processing: Results of a Large-Scale Empirical Study</title>
       <author><first>Aditya</first><last>Siddhant</last></author>
-      <author><first>Zachary C.</first><last>Lipton</last></author>
+      <author id="zachary-c-lipton"><first>Zachary C.</first><last>Lipton</last></author>
       <pages>2904–2909</pages>
       <url hash="9718d321">D18-1318</url>
       <abstract>Several recent papers investigate Active Learning (AL) for mitigating the data dependence of deep learning for natural language processing. However, the applicability of AL to real-world problems remains an open question. While in supervised learning, practitioners can try many different methods, evaluating each against a validation set before selecting a model, AL affords no such luxury. Over the course of one AL run, an agent annotates its dataset exhausting its labeling budget. Thus, given a new task, we have no opportunity to compare models and acquisition functions. This paper provides a large-scale empirical study of deep active learning, addressing multiple tasks and, for each, multiple datasets, multiple models, and a full suite of acquisition functions. We find that across all settings, Bayesian active learning by disagreement, using uncertainty estimates provided either by Dropout or Bayes-by-Backprop significantly improves over i.i.d. baselines and usually outperforms classic uncertainty sampling.</abstract>
@@ -4052,7 +4052,7 @@
       <title>Multimodal neural pronunciation modeling for spoken languages with logographic origin</title>
       <author><first>Minh</first><last>Nguyen</last></author>
       <author><first>Gia H.</first><last>Ngo</last></author>
-      <author><first>Nancy</first><last>Chen</last></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last></author>
       <pages>2916–2922</pages>
       <url hash="79c12afc">D18-1320</url>
       <attachment type="attachment" hash="2f66daab">D18-1320.Attachment.zip</attachment>
@@ -4085,7 +4085,7 @@
       <title>How to represent a word and predict it, too: Improving tied architectures for language modelling</title>
       <author><first>Kristina</first><last>Gulordava</last></author>
       <author><first>Laura</first><last>Aina</last></author>
-      <author><first>Gemma</first><last>Boleda</last></author>
+      <author id="gemma-boleda"><first>Gemma</first><last>Boleda</last></author>
       <pages>2936–2941</pages>
       <url hash="9536bb47">D18-1323</url>
       <attachment type="attachment" hash="7f751dbf">D18-1323.Attachment.pdf</attachment>
@@ -4107,10 +4107,10 @@
     </paper>
     <paper id="325">
       <title>Document-Level Neural Machine Translation with Hierarchical Attention Networks</title>
-      <author><first>Lesly</first><last>Miculicich</last></author>
+      <author id="lesly-miculicich-werlen"><first>Lesly</first><last>Miculicich</last></author>
       <author><first>Dhananjay</first><last>Ram</last></author>
       <author><first>Nikolaos</first><last>Pappas</last></author>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <pages>2947–2954</pages>
       <url hash="b88e81b6">D18-1325</url>
       <attachment type="attachment" hash="912a5d76">D18-1325.Attachment.pdf</attachment>
@@ -4124,7 +4124,7 @@
       <author><first>Jiajun</first><last>Zhang</last></author>
       <author><first>Feifei</first><last>Zhai</last></author>
       <author><first>Jingfang</first><last>Xu</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>2955–2960</pages>
       <url hash="e6e1e760">D18-1326</url>
       <abstract>Due to the benefits of model compactness, multilingual translation (including many-to-one, many-to-many and one-to-many) based on a universal encoder-decoder architecture attracts more and more attention. However, previous studies show that one-to-many translation based on this framework cannot perform on par with the individually trained models. In this work, we introduce three strategies to improve one-to-many multilingual translation by balancing the shared and unique features. Within the architecture of one decoder for all target languages, we first exploit the use of unique initial states for different target languages. Then, we employ language-dependent positional embeddings. Finally and especially, we propose to divide the hidden cells of the decoder into shared and language-dependent ones. The extensive experiments demonstrate that our proposed methods can obtain remarkable improvements over the strong baselines. Moreover, our strategies can achieve comparable or even better performance than the individually trained translation models.</abstract>
@@ -4143,8 +4143,8 @@
     </paper>
     <paper id="328">
       <title>Fixing Translation Divergences in Parallel Corpora for Neural <fixed-case>MT</fixed-case></title>
-      <author><first>MinhQuang</first><last>Pham</last></author>
-      <author><first>Josep</first><last>Crego</last></author>
+      <author id="minh-quang-pham"><first>MinhQuang</first><last>Pham</last></author>
+      <author id="josep-m-crego"><first>Josep</first><last>Crego</last></author>
       <author><first>Jean</first><last>Senellart</last></author>
       <author><first>François</first><last>Yvon</last></author>
       <pages>2967–2973</pages>
@@ -4166,9 +4166,9 @@
       <title>Loss in Translation: Learning Bilingual Word Mapping with a Retrieval Criterion</title>
       <author><first>Armand</first><last>Joulin</last></author>
       <author><first>Piotr</first><last>Bojanowski</last></author>
-      <author><first>Tomas</first><last>Mikolov</last></author>
+      <author id="tomas-mikolov"><first>Tomas</first><last>Mikolov</last></author>
       <author><first>Hervé</first><last>Jégou</last></author>
-      <author><first>Edouard</first><last>Grave</last></author>
+      <author id="edouard-grave"><first>Edouard</first><last>Grave</last></author>
       <pages>2979–2984</pages>
       <url hash="6ceba9b0">D18-1330</url>
       <attachment type="attachment" hash="c4209512">D18-1330.Attachment.zip</attachment>
@@ -4228,7 +4228,7 @@
       <title>Towards Two-Dimensional Sequence to Sequence Model in Neural Machine Translation</title>
       <author><first>Parnia</first><last>Bahar</last></author>
       <author><first>Christopher</first><last>Brix</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>3009–3015</pages>
       <url hash="a51d096d">D18-1335</url>
       <abstract>This work investigates an alternative model for neural machine translation (NMT) and proposes a novel architecture, where we employ a multi-dimensional long short-term memory (MDLSTM) for translation modelling. In the state-of-the-art methods, source and target sentences are treated as one-dimensional sequences over time, while we view translation as a two-dimensional (2D) mapping using an MDLSTM layer to define the correspondence between source and target words. We extend beyond the current sequence to sequence backbone NMT models to a 2D structure in which the source and target sentences are aligned with each other in a 2D grid. Our proposed topology shows consistent improvements over attention-based sequence to sequence model on two WMT 2017 tasks, German&lt;-&gt;English.</abstract>
@@ -4238,7 +4238,7 @@
     <paper id="336">
       <title>End-to-End Non-Autoregressive Neural Machine Translation with Connectionist Temporal Classification</title>
       <author><first>Jindřich</first><last>Libovický</last></author>
-      <author><first>Jindřich</first><last>Helcl</last></author>
+      <author id="jindrich-helcl"><first>Jindřich</first><last>Helcl</last></author>
       <pages>3016–3021</pages>
       <url hash="37fc4db3">D18-1336</url>
       <abstract>Autoregressive decoding is the only part of sequence-to-sequence models that prevents them from massive parallelization at inference time. Non-autoregressive models enable the decoder to generate all output symbols independently in parallel. We present a novel non-autoregressive architecture based on connectionist temporal classification and evaluate it on the task of neural machine translation. Unlike other non-autoregressive methods which operate in several steps, our model can be trained end-to-end. We conduct experiments on the WMT English-Romanian and English-German datasets. Our models achieve a significant speedup over the autoregressive models, keeping the translation quality comparable to other non-autoregressive models.</abstract>
@@ -4260,7 +4260,7 @@
     <paper id="338">
       <title>Training Deeper Neural Machine Translation Models with Transparent Attention</title>
       <author><first>Ankur</first><last>Bapna</last></author>
-      <author><first>Mia</first><last>Chen</last></author>
+      <author id="mia-xu-chen"><first>Mia</first><last>Chen</last></author>
       <author><first>Orhan</first><last>Firat</last></author>
       <author><first>Yuan</first><last>Cao</last></author>
       <author><first>Yonghui</first><last>Wu</last></author>
@@ -4282,8 +4282,8 @@
     </paper>
     <paper id="340">
       <title>Encoding Gated Translation Memory into Neural Machine Translation</title>
-      <author id="qian-cao"><first>Qian</first><last>Cao</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author><first>Qian</first><last>Cao</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <pages>3042–3047</pages>
       <url hash="cc592760">D18-1340</url>
       <abstract>Translation memories (TM) facilitate human translators to reuse existing repetitive translation fragments. In this paper, we propose a novel method to combine the strengths of both TM and neural machine translation (NMT) for high-quality translation. We treat the target translation of a TM match as an additional reference input and encode it into NMT with an extra encoder. A gating mechanism is further used to balance the impact of the TM match on the NMT decoder. Experiment results on the UN corpus demonstrate that when fuzzy matches are higher than 50%, the quality of NMT translation can be significantly improved by over 10 BLEU points.</abstract>
@@ -4292,8 +4292,8 @@
     </paper>
     <paper id="341">
       <title>Automatic Post-Editing of Machine Translation: A Neural Programmer-Interpreter Approach</title>
-      <author><first>Thuy-Trang</first><last>Vu</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="thuy-vu"><first>Thuy-Trang</first><last>Vu</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>3048–3053</pages>
       <url hash="6306021e">D18-1341</url>
       <abstract>Automated Post-Editing (PE) is the task of automatically correct common and repetitive errors found in machine translation (MT) output. In this paper, we present a neural programmer-interpreter approach to this task, resembling the way that human perform post-editing using discrete edit operations, wich we refer to as programs. Our model outperforms previous neural models for inducing PE programs on the WMT17 APE task for German-English up to +1 BLEU score and -0.7 TER scores.</abstract>
@@ -4314,7 +4314,7 @@
     <paper id="343">
       <title>Multi-Multi-View Learning: Multilingual and Multi-Representation Entity Typing</title>
       <author><first>Yadollah</first><last>Yaghoobzadeh</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>3060–3066</pages>
       <url hash="2b3df4de">D18-1343</url>
       <attachment type="attachment" hash="ced6de9c">D18-1343.Attachment.zip</attachment>
@@ -4372,7 +4372,7 @@
       <author><first>Chenwei</first><last>Zhang</last></author>
       <author><first>Xiaohui</first><last>Yan</last></author>
       <author><first>Yi</first><last>Chang</last></author>
-      <author><first>Philip</first><last>Yu</last></author>
+      <author id="philip-s-yu"><first>Philip</first><last>Yu</last></author>
       <pages>3090–3099</pages>
       <url hash="2e1163e4">D18-1348</url>
       <abstract>User intent detection plays a critical role in question-answering and dialog systems. Most previous works treat intent detection as a classification problem where utterances are labeled with predefined intents. However, it is labor-intensive and time-consuming to label users’ utterances as intents are diversely expressed and novel intents will continually be involved. Instead, we study the zero-shot intent detection problem, which aims to detect emerging user intents where no labeled utterances are currently available. We propose two capsule-based architectures: IntentCapsNet that extracts semantic features from utterances and aggregates them to discriminate existing intents, and IntentCapsNet-ZSL which gives IntentCapsNet the zero-shot learning ability to discriminate emerging intents via knowledge transfer from existing intents. Experiments on two real-world datasets show that our model not only can better discriminate diversely expressed existing intents, but is also able to discriminate emerging intents when no labeled utterances are available.</abstract>
@@ -4477,8 +4477,8 @@
     <paper id="356">
       <title>Learning Neural Templates for Text Generation</title>
       <author><first>Sam</first><last>Wiseman</last></author>
-      <author><first>Stuart</first><last>Shieber</last></author>
-      <author><first>Alexander</first><last>Rush</last></author>
+      <author id="stuart-m-shieber"><first>Stuart</first><last>Shieber</last></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last></author>
       <pages>3174–3187</pages>
       <url hash="1df55ba3">D18-1356</url>
       <abstract>While neural, encoder-decoder models have had significant empirical success in text generation, there remain several unaddressed problems with this style of generation. Encoder-decoder models are largely (a) uninterpretable, and (b) difficult to control in terms of their phrasing or content. This work proposes a neural generation system using a hidden semi-markov model (HSMM) decoder, which learns latent, discrete templates jointly with learning to generate. We show that this model learns useful templates, and that these templates make generation both more interpretable and controllable. Furthermore, we show that this approach scales to real data sets and achieves strong performance nearing that of encoder-decoder text generation models.</abstract>
@@ -4529,7 +4529,7 @@
       <title>Multi-Task Identification of Entities, Relations, and Coreference for Scientific Knowledge Graph Construction</title>
       <author><first>Yi</first><last>Luan</last></author>
       <author><first>Luheng</first><last>He</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last></author>
       <pages>3219–3232</pages>
       <url hash="f9745a65">D18-1360</url>
@@ -4557,7 +4557,7 @@
     </paper>
     <paper id="362">
       <title>Multi-Hop Knowledge Graph Reasoning with Reward Shaping</title>
-      <author><first>Xi Victoria</first><last>Lin</last></author>
+      <author id="xi-victoria-lin"><first>Xi Victoria</first><last>Lin</last></author>
       <author><first>Richard</first><last>Socher</last></author>
       <author><first>Caiming</first><last>Xiong</last></author>
       <pages>3243–3253</pages>
@@ -4570,8 +4570,8 @@
     </paper>
     <paper id="363">
       <title>Neural Transductive Learning and Beyond: Morphological Generation in the Minimal-Resource Setting</title>
-      <author><first>Katharina</first><last>Kann</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>3254–3264</pages>
       <url hash="82c87703">D18-1363</url>
       <attachment type="attachment" hash="3988a92c">D18-1363.Attachment.zip</attachment>
@@ -4609,10 +4609,10 @@
       <title>Adapting Word Embeddings to New Languages with Morphological and Phonological Subword Representations</title>
       <author><first>Aditi</first><last>Chaudhary</last></author>
       <author><first>Chunting</first><last>Zhou</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
-      <author><first>David R.</first><last>Mortensen</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
+      <author id="david-r-mortensen"><first>David R.</first><last>Mortensen</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
       <pages>3285–3295</pages>
       <url hash="9fc611e7">D18-1366</url>
       <attachment type="attachment" hash="1b5eb308">D18-1366.Attachment.zip</attachment>
@@ -4625,7 +4625,7 @@
       <title>A Computational Exploration of Exaggeration</title>
       <author><first>Enrica</first><last>Troiano</last></author>
       <author><first>Carlo</first><last>Strapparava</last></author>
-      <author><first>Gözde</first><last>Özbal</last></author>
+      <author id="gozde-ozbal"><first>Gözde</first><last>Özbal</last></author>
       <author><first>Serra Sinem</first><last>Tekiroğlu</last></author>
       <pages>3296–3304</pages>
       <url hash="7d00d8e6">D18-1367</url>
@@ -4646,7 +4646,7 @@
     <paper id="369">
       <title>Hierarchical <fixed-case>D</fixed-case>irichlet <fixed-case>G</fixed-case>aussian Marked <fixed-case>H</fixed-case>awkes Process for Narrative Reconstruction in Continuous Time Domain</title>
       <author><first>Yeon</first><last>Seonwoo</last></author>
-      <author><first>Alice</first><last>Oh</last></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last></author>
       <author><first>Sungjoon</first><last>Park</last></author>
       <pages>3316–3325</pages>
       <url hash="15ff88b8">D18-1369</url>
@@ -4658,7 +4658,7 @@
       <title>Investigating the Role of Argumentation in the Rhetorical Analysis of Scientific Publications with Neural Multi-Task Learning Models</title>
       <author><first>Anne</first><last>Lauscher</last></author>
       <author><first>Goran</first><last>Glavaš</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <author><first>Kai</first><last>Eckert</last></author>
       <pages>3326–3338</pages>
       <url hash="0f6c5c91">D18-1370</url>
@@ -4681,7 +4681,7 @@
       <title>Causal Explanation Analysis on Social Media</title>
       <author><first>Youngseo</first><last>Son</last></author>
       <author><first>Nipun</first><last>Bayas</last></author>
-      <author><first>H. Andrew</first><last>Schwartz</last></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last></author>
       <pages>3350–3359</pages>
       <url hash="30b2cdee">D18-1372</url>
       <attachment type="attachment" hash="01a94c37">D18-1372.Attachment.zip</attachment>
@@ -4715,8 +4715,8 @@
     <paper id="375">
       <title>A Genre-Aware Attention Model to Improve the Likability Prediction of Books</title>
       <author><first>Suraj</first><last>Maharjan</last></author>
-      <author><first>Manuel</first><last>Montes</last></author>
-      <author><first>Fabio A.</first><last>González</last></author>
+      <author id="manuel-montes"><first>Manuel</first><last>Montes</last></author>
+      <author id="fabio-a-gonzalez"><first>Fabio A.</first><last>González</last></author>
       <author><first>Thamar</first><last>Solorio</last></author>
       <pages>3381–3391</pages>
       <url hash="63bc6e39">D18-1375</url>
@@ -4739,7 +4739,7 @@
       <title><fixed-case>IARM</fixed-case>: Inter-Aspect Relation Modeling with Memory Networks in Aspect-Based Sentiment Analysis</title>
       <author><first>Navonil</first><last>Majumder</last></author>
       <author><first>Soujanya</first><last>Poria</last></author>
-      <author><first>Alexander</first><last>Gelbukh</last></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last></author>
       <author><first>Md. Shad</first><last>Akhtar</last></author>
       <author><first>Erik</first><last>Cambria</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
@@ -4752,7 +4752,7 @@
     <paper id="378">
       <title><fixed-case>L</fixed-case>imbic: Author-Based Sentiment Aspect Modeling Regularized with Word Embeddings and Discourse Relations</title>
       <author><first>Zhe</first><last>Zhang</last></author>
-      <author><first>Munindar</first><last>Singh</last></author>
+      <author id="munindar-p-singh"><first>Munindar</first><last>Singh</last></author>
       <pages>3412–3422</pages>
       <url hash="e91e489f">D18-1378</url>
       <abstract>We propose Limbic, an unsupervised probabilistic model that addresses the problem of discovering aspects and sentiments and associating them with authors of opinionated texts. Limbic combines three ideas, incorporating authors, discourse relations, and word embeddings. For discourse relations, Limbic adopts a generative process regularized by a Markov Random Field. To promote words with high semantic similarity into the same topic, Limbic captures semantic regularities from word embeddings via a generalized Pólya Urn process. We demonstrate that Limbic (1) discovers aspects associated with sentiments with high lexical diversity; (2) outperforms state-of-the-art models by a substantial margin in topic cohesion and sentiment classification.</abstract>
@@ -4784,7 +4784,7 @@
     <paper id="381">
       <title>Attentive Gated Lexicon Reader with Contrastive Contextual Co-Attention for Sentiment Classification</title>
       <author><first>Yi</first><last>Tay</last></author>
-      <author><first>Anh Tuan</first><last>Luu</last></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last></author>
       <author><first>Siu Cheung</first><last>Hui</last></author>
       <author><first>Jian</first><last>Su</last></author>
       <pages>3443–3453</pages>
@@ -4800,7 +4800,7 @@
       <author><first>Dushyant</first><last>Chauhan</last></author>
       <author><first>Soujanya</first><last>Poria</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>3454–3466</pages>
       <url hash="fbb49caf">D18-1382</url>
       <abstract>Multi-modal sentiment analysis offers various challenges, one being the effective combination of different input modalities, namely text, visual and acoustic. In this paper, we propose a recurrent neural network based multi-modal attention framework that leverages the contextual information for utterance-level sentiment prediction. The proposed approach applies attention on multi-modal multi-utterance representations and tries to learn the contributing features amongst them. We evaluate our proposed approach on two multi-modal sentiment analysis benchmark datasets, viz. CMU Multi-modal Opinion-level Sentiment Intensity (CMU-MOSI) corpus and the recently released CMU Multi-modal Opinion Sentiment and Emotion Intensity (CMU-MOSEI) corpus. Evaluation results show the effectiveness of our proposed approach with the accuracies of 82.31% and 79.80% for the MOSI and MOSEI datasets, respectively. These are approximately 2 and 1 points performance improvement over the state-of-the-art models for the datasets.</abstract>
@@ -4824,10 +4824,10 @@
       <title><fixed-case>E</fixed-case>xt<fixed-case>RA</fixed-case>: Extracting Prominent Review Aspects from Customer Feedback</title>
       <author><first>Zhiyi</first><last>Luo</last></author>
       <author><first>Shanshan</first><last>Huang</last></author>
-      <author><first>Frank F.</first><last>Xu</last></author>
-      <author><first>Bill Yuchen</first><last>Lin</last></author>
+      <author id="frank-f-xu"><first>Frank F.</first><last>Xu</last></author>
+      <author id="bill-yuchen-lin"><first>Bill Yuchen</first><last>Lin</last></author>
       <author><first>Hanyuan</first><last>Shi</last></author>
-      <author><first>Kenny</first><last>Zhu</last></author>
+      <author id="kenny-zhu"><first>Kenny</first><last>Zhu</last></author>
       <pages>3477–3486</pages>
       <url hash="aa446773">D18-1384</url>
       <abstract>Many existing systems for analyzing and summarizing customer reviews about products or service are based on a number of prominent review aspects. Conventionally, the prominent review aspects of a product type are determined manually. This costly approach cannot scale to large and cross-domain services such as Amazon.com, Taobao.com or Yelp.com where there are a large number of product types and new products emerge almost every day. In this paper, we propose a novel framework, for extracting the most prominent aspects of a given product type from textual reviews. The proposed framework, ExtRA, extracts K most prominent aspect terms or phrases which do not overlap semantically automatically without supervision. Extensive experiments show that ExtRA is effective and achieves the state-of-the-art performance on a dataset consisting of different product types.</abstract>
@@ -4870,7 +4870,7 @@
       <title>Multi-view Models for Political Ideology Detection of News Articles</title>
       <author><first>Vivek</first><last>Kulkarni</last></author>
       <author><first>Junting</first><last>Ye</last></author>
-      <author><first>Steve</first><last>Skiena</last></author>
+      <author id="steven-skiena"><first>Steve</first><last>Skiena</last></author>
       <author><first>William Yang</first><last>Wang</last></author>
       <pages>3518–3527</pages>
       <url hash="0c9a6481">D18-1388</url>
@@ -4883,8 +4883,8 @@
       <author><first>Ramy</first><last>Baly</last></author>
       <author><first>Georgi</first><last>Karadzhov</last></author>
       <author><first>Dimitar</first><last>Alexandrov</last></author>
-      <author><first>James</first><last>Glass</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>3528–3539</pages>
       <url hash="b654b880">D18-1389</url>
       <abstract>We present a study on predicting the factuality of reporting and bias of news media. While previous work has focused on studying the veracity of claims or documents, here we are interested in characterizing entire news media. This is an under-studied, but arguably important research problem, both in its own right and as a prior for fact-checking systems. We experiment with a large list of news websites and with a rich set of features derived from (i) a sample of articles from the target news media, (ii) its Wikipedia page, (iii) its Twitter account, (iv) the structure of its URL, and (v) information about the Web traffic it attracts. The experimental results show sizable performance gains over the baseline, and reveal the importance of each feature type.</abstract>
@@ -4920,7 +4920,7 @@
     <paper id="392">
       <title>Residualized Factor Adaptation for Community Social Media Prediction Tasks</title>
       <author><first>Mohammadzaman</first><last>Zamani</last></author>
-      <author><first>H. Andrew</first><last>Schwartz</last></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last></author>
       <author><first>Veronica</first><last>Lynn</last></author>
       <author><first>Salvatore</first><last>Giorgi</last></author>
       <author><first>Niranjan</first><last>Balasubramanian</last></author>
@@ -4936,7 +4936,7 @@
       <author><first>Doron</first><last>Kliger</last></author>
       <author><first>Shuly</first><last>Wintner</last></author>
       <author><first>Jennifer</first><last>Pan</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <author><first>Yulia</first><last>Tsvetkov</last></author>
       <pages>3570–3580</pages>
       <url hash="76ca3a63">D18-1393</url>
@@ -5005,7 +5005,7 @@
       <author><first>Jiatao</first><last>Gu</last></author>
       <author><first>Yong</first><last>Wang</last></author>
       <author><first>Yun</first><last>Chen</last></author>
-      <author><first>Victor O. K.</first><last>Li</last></author>
+      <author id="victor-o-k-li"><first>Victor O. K.</first><last>Li</last></author>
       <author><first>Kyunghyun</first><last>Cho</last></author>
       <pages>3622–3631</pages>
       <url hash="d89d9237">D18-1398</url>
@@ -5017,8 +5017,8 @@
     <paper id="399">
       <title>Unsupervised Statistical Machine Translation</title>
       <author><first>Mikel</first><last>Artetxe</last></author>
-      <author><first>Gorka</first><last>Labaka</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="gorka-labaka"><first>Gorka</first><last>Labaka</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <pages>3632–3642</pages>
       <url hash="c7c2e722">D18-1399</url>
       <abstract>While modern machine translation has relied on large parallel corpora, a recent line of work has managed to train Neural Machine Translation (NMT) systems from monolingual corpora only (Artetxe et al., 2018c; Lample et al., 2018). Despite the potential of this approach for low-resource settings, existing systems are far behind their supervised counterparts, limiting their practical interest. In this paper, we propose an alternative approach based on phrase-based Statistical Machine Translation (SMT) that significantly closes the gap with supervised systems. Our method profits from the modular architecture of SMT: we first induce a phrase table from monolingual corpora through cross-lingual embedding mappings, combine it with an n-gram language model, and fine-tune hyperparameters through an unsupervised MERT variant. In addition, iterative backtranslation improves results further, yielding, for instance, 14.08 and 26.22 BLEU points in WMT 2014 English-German and English-French, respectively, an improvement of more than 7-10 BLEU points over previous unsupervised systems, and closing the gap with supervised SMT (Moses trained on Europarl) down to 2-5 BLEU points. Our implementation is available at <url>https://github.com/artetxem/monoses</url>.</abstract>
@@ -5050,7 +5050,7 @@
       <author><first>Xiaozhong</first><last>Liu</last></author>
       <author><first>Luo</first><last>Si</last></author>
       <author><first>Min</first><last>Zhang</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>3654–3663</pages>
       <url hash="a0529583">D18-1401</url>
       <abstract>In an e-commerce environment, user-oriented question-answering (QA) text pair could carry rich sentiment information. In this study, we propose a novel task/method to address QA sentiment analysis. In particular, we create a high-quality annotated corpus with specially-designed annotation guidelines for QA-style sentiment classification. On the basis, we propose a three-stage hierarchical matching network to explore deep sentiment information in a QA text pair. First, we segment both the question and answer text into sentences and construct a number of [Q-sentence, A-sentence] units in each QA text pair. Then, by leveraging a QA bidirectional matching layer, the proposed approach can learn the matching vectors of each [Q-sentence, A-sentence] unit. Finally, we characterize the importance of the generated matching vectors via a self-matching attention layer. Experimental results, comparing with a number of state-of-the-art baselines, demonstrate the impressive effectiveness of the proposed approach for QA-style sentiment classification.</abstract>
@@ -5103,7 +5103,7 @@
     <paper id="405">
       <title>Noise Contrastive Estimation and Negative Sampling for Conditional Models: Consistency and Statistical Efficiency</title>
       <author><first>Zhuang</first><last>Ma</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <pages>3698–3707</pages>
       <url hash="dfabf192">D18-1405</url>
       <attachment type="attachment" hash="4f84098c">D18-1405.Attachment.pdf</attachment>
@@ -5160,7 +5160,7 @@
       <author><first>Yikang</first><last>Shen</last></author>
       <author><first>Eric</first><last>Crawford</last></author>
       <author><first>Herke</first><last>van Hoof</last></author>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <pages>3739–3748</pages>
       <url hash="8201e19d">D18-1409</url>
       <attachment type="attachment" hash="190e9519">D18-1409.Attachment.pdf</attachment>
@@ -5183,10 +5183,10 @@
     <paper id="411">
       <title>Learning Latent Semantic Annotations for Grounding Natural Language to Structured Data</title>
       <author><first>Guanghui</first><last>Qin</last></author>
-      <author><first>Jin-Ge</first><last>Yao</last></author>
+      <author id="jin-ge-yao"><first>Jin-Ge</first><last>Yao</last></author>
       <author><first>Xuening</first><last>Wang</last></author>
       <author><first>Jinpeng</first><last>Wang</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <pages>3761–3771</pages>
       <url hash="699690bf">D18-1411</url>
       <attachment type="attachment" hash="bbc06b8e">D18-1411.Attachment.pdf</attachment>
@@ -5200,9 +5200,9 @@
       <author><first>Swabha</first><last>Swayamdipta</last></author>
       <author><first>Sam</first><last>Thomson</last></author>
       <author><first>Kenton</first><last>Lee</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>3772–3782</pages>
       <url hash="f1038a53">D18-1412</url>
       <attachment type="attachment" hash="a8278c60">D18-1412.Attachment.zip</attachment>
@@ -5216,7 +5216,7 @@
       <author><first>Noah</first><last>Weber</last></author>
       <author><first>Leena</first><last>Shekhar</last></author>
       <author><first>Niranjan</first><last>Balasubramanian</last></author>
-      <author><first>Nathanael</first><last>Chambers</last></author>
+      <author id="nathanael-chambers"><first>Nathanael</first><last>Chambers</last></author>
       <pages>3783–3792</pages>
       <url hash="9e1a587d">D18-1413</url>
       <abstract>Scripts define knowledge about how everyday scenarios (such as going to a restaurant) are expected to unfold. One of the challenges to learning scripts is the hierarchical nature of the knowledge. For example, a suspect arrested might plead innocent or guilty, and a very different track of events is then expected to happen. To capture this type of information, we propose an autoencoder model with a latent space defined by a hierarchy of categorical variables. We utilize a recently proposed vector quantization based approach, which allows continuous embeddings to be associated with each latent variable value. This permits the decoder to softly decide what portions of the latent hierarchy to condition on by attending over the value embeddings for a given setting. Our model effectively encodes and generates scripts, outperforming a recent language modeling-based method on several standard tasks, and allowing the autoencoder model to achieve substantially lower perplexity scores compared to the previous language modeling-based method.</abstract>
@@ -5227,9 +5227,9 @@
     <paper id="414">
       <title>Semantic Role Labeling for Learner <fixed-case>C</fixed-case>hinese: the Importance of Syntactic Parsing and <fixed-case>L</fixed-case>2-<fixed-case>L</fixed-case>1 Parallel Data</title>
       <author><first>Zi</first><last>Lin</last></author>
-      <author><first>Yuguang</first><last>Duan</last></author>
+      <author id="yuguang-duan"><first>Yuguang</first><last>Duan</last></author>
       <author><first>Yuanyuan</first><last>Zhao</last></author>
-      <author><first>Weiwei</first><last>Sun</last></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last></author>
       <author><first>Xiaojun</first><last>Wan</last></author>
       <pages>3793–3802</pages>
       <url hash="c4999b6a">D18-1414</url>
@@ -5243,8 +5243,8 @@
       <author><first>Weikang</first><last>Wang</last></author>
       <author><first>Jiajun</first><last>Zhang</last></author>
       <author><first>Han</first><last>Zhang</last></author>
-      <author><first>Mei-Yuh</first><last>Hwang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="mei-yuh-hwang"><first>Mei-Yuh</first><last>Hwang</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <author><first>Zhifei</first><last>Li</last></author>
       <pages>3803–3812</pages>
       <url hash="3100682d">D18-1415</url>
@@ -5292,7 +5292,7 @@
     <paper id="419">
       <title><fixed-case>A</fixed-case>ir<fixed-case>D</fixed-case>ialogue: An Environment for Goal-Oriented Dialogue Research</title>
       <author><first>Wei</first><last>Wei</last></author>
-      <author><first>Quoc</first><last>Le</last></author>
+      <author id="quoc-le"><first>Quoc</first><last>Le</last></author>
       <author><first>Andrew</first><last>Dai</last></author>
       <author><first>Jia</first><last>Li</last></author>
       <pages>3844–3854</pages>
@@ -5331,9 +5331,9 @@
       <title>Operation-guided Neural Networks for High Fidelity Data-To-Text Generation</title>
       <author><first>Feng</first><last>Nie</last></author>
       <author><first>Jinpeng</first><last>Wang</last></author>
-      <author><first>Jin-Ge</first><last>Yao</last></author>
+      <author id="jin-ge-yao"><first>Jin-Ge</first><last>Yao</last></author>
       <author><first>Rong</first><last>Pan</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <pages>3879–3889</pages>
       <url hash="94f588fb">D18-1422</url>
       <abstract>Recent neural models for data-to-text generation are mostly based on data-driven end-to-end training over encoder-decoder networks. Even though the generated texts are mostly fluent and informative, they often generate descriptions that are not consistent with the input structured data. This is a critical issue especially in domains that require inference or calculations over raw data. In this paper, we attempt to improve the fidelity of neural data-to-text generation by utilizing pre-executed symbolic operations. We propose a framework called Operation-guided Attention-based sequence-to-sequence network (OpAtt), with a specifically designed gating mechanism as well as a quantization module for operation results to utilize information from pre-executed operations. Experiments on two sports datasets show our proposed method clearly improves the fidelity of the generated texts to the input structured data.</abstract>
@@ -5380,7 +5380,7 @@
       <author><first>Qingning</first><last>Yao</last></author>
       <author><first>Shanelle</first><last>Roman</last></author>
       <author><first>Zilin</first><last>Zhang</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <pages>3911–3921</pages>
       <url hash="4d45a09f">D18-1425</url>
       <attachment type="attachment" hash="87f61a88">D18-1425.Attachment.zip</attachment>
@@ -5428,7 +5428,7 @@
     <paper id="429">
       <title>Towards a Better Metric for Evaluating Question Generation Systems</title>
       <author><first>Preksha</first><last>Nema</last></author>
-      <author><first>Mitesh M.</first><last>Khapra</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh M.</first><last>Khapra</last></author>
       <pages>3950–3959</pages>
       <url hash="927b3062">D18-1429</url>
       <abstract>There has always been criticism for using <tex-math>n</tex-math>-gram based similarity metrics, such as BLEU, NIST, <i>etc</i>, for evaluating the performance of NLG systems. However, these metrics continue to remain popular and are recently being used for evaluating the performance of systems which automatically generate questions from documents, knowledge graphs, images, <i>etc</i>. Given the rising interest in such automatic question generation (AQG) systems, it is important to objectively examine whether these metrics are suitable for this task. In particular, it is important to verify whether such metrics used for evaluating AQG systems focus on <i>answerability</i> of the generated question by preferring questions which contain all relevant information such as question type (Wh-types), entities, relations, <i>etc</i>. In this work, we show that current automatic evaluation metrics based on <tex-math>n</tex-math>-gram similarity do not always correlate well with human judgments about <i>answerability</i> of a question. To alleviate this problem and as a first step towards better evaluation metrics for AQG, we introduce a scoring function to capture <i>answerability</i> and show that when this scoring function is integrated with existing metrics, they correlate significantly better with human judgments. The scripts and data developed as a part of this work are made publicly available.</abstract>
@@ -5453,7 +5453,7 @@
       <author><first>Ashutosh</first><last>Baheti</last></author>
       <author><first>Alan</first><last>Ritter</last></author>
       <author><first>Jiwei</first><last>Li</last></author>
-      <author><first>Bill</first><last>Dolan</last></author>
+      <author id="william-b-dolan"><first>Bill</first><last>Dolan</last></author>
       <pages>3970–3980</pages>
       <url hash="e53a61f6">D18-1431</url>
       <abstract>Neural conversation models tend to generate safe, generic responses for most inputs. This is due to the limitations of likelihood-based decoding objectives in generation tasks with diverse outputs, such as conversation. To address this challenge, we propose a simple yet effective approach for incorporating side information in the form of distributional constraints over the generated responses. We propose two constraints that help generate more content rich responses that are based on a model of syntax and topics (Griffiths et al., 2005) and semantic similarity (Arora et al., 2016). We evaluate our approach against a variety of competitive baselines, using both automatic metrics and human judgments, showing that our proposed approach generates responses that are much less generic without sacrificing plausibility. A working demo of our code can be found at <url>https://github.com/abaheti95/DC-NeuralConversation</url>.</abstract>
@@ -5478,8 +5478,8 @@
       <author><first>Spencer</first><last>Whitehead</last></author>
       <author><first>Heng</first><last>Ji</last></author>
       <author><first>Mohit</first><last>Bansal</last></author>
-      <author><first>Shih-Fu</first><last>Chang</last></author>
-      <author><first>Clare</first><last>Voss</last></author>
+      <author id="shih-fu-chang"><first>Shih-Fu</first><last>Chang</last></author>
+      <author id="clare-voss"><first>Clare</first><last>Voss</last></author>
       <pages>3992–4001</pages>
       <url hash="5afdcad9">D18-1433</url>
       <abstract>Most previous efforts toward video captioning focus on generating generic descriptions, such as, “A man is talking.” We collect a news video dataset to generate enriched descriptions that include important background knowledge, such as named entities and related events, which allows the user to fully understand the video content. We develop an approach that uses video meta-data to retrieve topically related news documents for a video and extracts the events and named entities from these documents. Then, given the video as well as the extracted events and entities, we generate a description using a Knowledge-aware Video Description network. The model learns to incorporate entities found in the topically related documents into the description via an entity pointer network and the generation procedure is guided by the event and entity types from the topically related documents through a knowledge gate, which is a gating mechanism added to the model’s decoder that takes a one-hot vector of these types. We evaluate our approach on the new dataset of news videos we have collected, establishing the first benchmark for this dataset as well as proposing a new metric to evaluate these descriptions.</abstract>
@@ -5505,7 +5505,7 @@
       <author><first>Spencer</first><last>Whitehead</last></author>
       <author><first>Lifu</first><last>Huang</last></author>
       <author><first>Heng</first><last>Ji</last></author>
-      <author><first>Shih-Fu</first><last>Chang</last></author>
+      <author id="shih-fu-chang"><first>Shih-Fu</first><last>Chang</last></author>
       <pages>4013–4023</pages>
       <url hash="7d35d96a">D18-1435</url>
       <abstract>Current image captioning approaches generate descriptions which lack specific information, such as named entities that are involved in the images. In this paper we propose a new task which aims to generate informative image captions, given images and hashtags as input. We propose a simple but effective approach to tackle this problem. We first train a convolutional neural networks - long short term memory networks (CNN-LSTM) model to generate a template caption based on the input image. Then we use a knowledge graph based collective inference algorithm to fill in the template with specific named entities retrieved via the hashtags. Experiments on a new benchmark dataset collected from Flickr show that our model generates news-style image descriptions with much richer information. Our model outperforms unimodal baselines significantly with various evaluation metrics.</abstract>
@@ -5600,7 +5600,7 @@
       <title>Bottom-Up Abstractive Summarization</title>
       <author><first>Sebastian</first><last>Gehrmann</last></author>
       <author><first>Yuntian</first><last>Deng</last></author>
-      <author><first>Alexander</first><last>Rush</last></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last></author>
       <pages>4098–4109</pages>
       <url hash="70450f24">D18-1443</url>
       <abstract>Neural summarization produces outputs that are fluent and readable, but which can be poor at content selection, for instance often copying full sentences from the source document. This work explores the use of data-efficient content selectors to over-determine phrases in a source document that should be part of the summary. We use this selector as a bottom-up attention step to constrain the model to likely phrases. We show that this approach improves the ability to compress text, while still generating fluent summaries. This two-step process is both simpler and higher performing than other end-to-end content selection models, leading to significant improvements on ROUGE for both the CNN-DM and NYT corpus. Furthermore, the content selector can be trained with as little as 1,000 sentences making it easy to transfer a trained summarizer to a new domain.</abstract>
@@ -5611,7 +5611,7 @@
       <title>Controlling Length in Abstractive Summarization Using a Convolutional Neural Network</title>
       <author><first>Yizhu</first><last>Liu</last></author>
       <author><first>Zhiyi</first><last>Luo</last></author>
-      <author><first>Kenny</first><last>Zhu</last></author>
+      <author id="kenny-zhu"><first>Kenny</first><last>Zhu</last></author>
       <pages>4110–4119</pages>
       <url hash="39d26cd6">D18-1444</url>
       <abstract>Convolutional neural networks (CNNs) have met great success in abstractive summarization, but they cannot effectively generate summaries of desired lengths. Because generated summaries are used in difference scenarios which may have space or length constraints, the ability to control the summary length in abstractive summarization is an important problem. In this paper, we propose an approach to constrain the summary length by extending a convolutional sequence to sequence model. The results show that this approach generates high-quality summaries with user defined length, and outperforms the baselines consistently in terms of ROUGE score, length variations and semantic similarity.</abstract>
@@ -5658,7 +5658,7 @@
       <author><first>Tianshang</first><last>Liu</last></author>
       <author><first>Yu</first><last>Zhou</last></author>
       <author><first>Jiajun</first><last>Zhang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>4154–4164</pages>
       <url hash="5448f81f">D18-1448</url>
       <attachment type="attachment" hash="add3e5f4">D18-1448.Attachment.zip</attachment>
@@ -5701,9 +5701,9 @@
     </paper>
     <paper id="452">
       <title>Joint Multitask Learning for Community Question Answering Using Task-Specific Embeddings</title>
-      <author><first>Shafiq</first><last>Joty</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>4196–4207</pages>
       <url hash="91af0c00">D18-1452</url>
       <abstract>We address jointly two important tasks for Question Answering in community forums: given a new question, (i) find related existing questions, and (ii) find relevant answers to this new question. We further use an auxiliary task to complement the previous two, i.e., (iii) find good answers with respect to the thread question in a question-comment thread. We use deep neural networks (DNNs) to learn meaningful task-specific embeddings, which we then incorporate into a conditional random field (CRF) model for the multitask setting, performing joint learning over a complex graph structure. While DNNs alone achieve competitive results when trained to produce the embeddings, the CRF, which makes use of the embeddings and the dependencies between the tasks, improves the results significantly and consistently across a variety of evaluation metrics, thus showing the complementarity of DNNs and structured learning.</abstract>
@@ -5716,7 +5716,7 @@
       <author><first>Saku</first><last>Sugawara</last></author>
       <author><first>Kentaro</first><last>Inui</last></author>
       <author><first>Satoshi</first><last>Sekine</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>4208–4219</pages>
       <url hash="e164a29c">D18-1453</url>
       <abstract>A challenge in creating a dataset for machine reading comprehension (MRC) is to collect questions that require a sophisticated understanding of language to answer beyond using superficial cues. In this work, we investigate what makes questions easier across recent 12 MRC datasets with three question styles (answer extraction, description, and multiple choice). We propose to employ simple heuristics to split each dataset into easy and hard subsets and examine the performance of two baseline models for each of the subsets. We then manually annotate questions sampled from each subset with both validity and requisite reasoning skills to investigate which skills explain the difference between easy and hard questions. From this study, we observed that (i) the baseline performances for the hard subsets remarkably degrade compared to those of entire datasets, (ii) hard questions require knowledge inference and multiple-sentence reasoning in comparison with easy questions, and (iii) multiple-choice questions tend to require a broader range of reasoning skills than answer extraction and description questions. These results suggest that one might overestimate recent advances in MRC.</abstract>
@@ -5744,7 +5744,7 @@
       <author><first>Manzil</first><last>Zaheer</last></author>
       <author><first>Kathryn</first><last>Mazaitis</last></author>
       <author><first>Ruslan</first><last>Salakhutdinov</last></author>
-      <author><first>William</first><last>Cohen</last></author>
+      <author id="william-cohen"><first>William</first><last>Cohen</last></author>
       <pages>4231–4242</pages>
       <url hash="7ac458ba">D18-1455</url>
       <abstract>Open Domain Question Answering (QA) is evolving from complex pipelined systems to end-to-end deep neural networks. Specialized neural models have been developed for extracting answers from either text alone or Knowledge Bases (KBs) alone. In this paper we look at a more practical setting, namely QA over the combination of a KB and entity-linked text, which is appropriate when an incomplete KB is available with a large text corpus. Building on recent advances in graph representation learning we propose a novel model, GRAFT-Net, for extracting answers from a question-specific subgraph containing text and KB entities and relations. We construct a suite of benchmark tasks for this problem, varying the difficulty of questions, the amount of training data, and KB completeness. We show that GRAFT-Net is competitive with the state-of-the-art when tested using either KBs or text alone, and vastly outperforms existing methods in the combined setting.</abstract>
@@ -5781,7 +5781,7 @@
       <title>Why Self-Attention? A Targeted Evaluation of Neural Machine Translation Architectures</title>
       <author><first>Gongbo</first><last>Tang</last></author>
       <author><first>Mathias</first><last>Müller</last></author>
-      <author><first>Annette</first><last>Rios</last></author>
+      <author id="annette-rios-gonzales"><first>Annette</first><last>Rios</last></author>
       <author><first>Rico</first><last>Sennrich</last></author>
       <pages>4263–4272</pages>
       <url hash="fe1f9b90">D18-1458</url>
@@ -5793,7 +5793,7 @@
     <paper id="459">
       <title>Simplifying Neural Machine Translation with Addition-Subtraction Twin-Gated Recurrent Networks</title>
       <author><first>Biao</first><last>Zhang</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Jinsong</first><last>Su</last></author>
       <author><first>Qian</first><last>Lin</last></author>
       <author><first>Huiji</first><last>Zhang</last></author>
@@ -5891,7 +5891,7 @@
       <title>Getting to “Hearer-old”: Charting Referring Expressions Across Time</title>
       <author><first>Ieva</first><last>Staliūnaitė</last></author>
       <author><first>Hannah</first><last>Rohde</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <author><first>Annie</first><last>Louis</last></author>
       <pages>4350–4359</pages>
       <url hash="bc342646">D18-1466</url>
@@ -5949,7 +5949,7 @@
       <title>Why Swear? Analyzing and Inferring the Intentions of Vulgar Expressions</title>
       <author><first>Eric</first><last>Holgate</last></author>
       <author><first>Isabel</first><last>Cachola</last></author>
-      <author><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
+      <author id="daniel-preotiuc-pietro"><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
       <author><first>Junyi Jessy</first><last>Li</last></author>
       <pages>4405–4414</pages>
       <url hash="776c9b27">D18-1471</url>
@@ -5985,7 +5985,7 @@
     <paper id="474">
       <title>Speed Reading: Learning to Read <fixed-case>F</fixed-case>or<fixed-case>B</fixed-case>ackward via Shuttle</title>
       <author><first>Tsu-Jui</first><last>Fu</last></author>
-      <author><first>Wei-Yun</first><last>Ma</last></author>
+      <author id="wei-yun-ma"><first>Wei-Yun</first><last>Ma</last></author>
       <pages>4439–4448</pages>
       <url hash="bf6b32b4">D18-1474</url>
       <abstract>We present LSTM-Shuttle, which applies human speed reading techniques to natural language processing tasks for accurate and efficient comprehension. In contrast to previous work, LSTM-Shuttle not only reads shuttling forward but also goes back. Shuttling forward enables high efficiency, and going backward gives the model a chance to recover lost information, ensuring better prediction. We evaluate LSTM-Shuttle on sentiment analysis, news classification, and cloze on IMDB, Rotten Tomatoes, AG, and Children’s Book Test datasets. We show that LSTM-Shuttle predicts both better and more quickly. To demonstrate how LSTM-Shuttle actually behaves, we also analyze the shuttling operation and present a case study.</abstract>
@@ -6025,7 +6025,7 @@
       <title>Simple Recurrent Units for Highly Parallelizable Recurrence</title>
       <author><first>Tao</first><last>Lei</last></author>
       <author><first>Yu</first><last>Zhang</last></author>
-      <author><first>Sida I.</first><last>Wang</last></author>
+      <author id="sida-i-wang"><first>Sida I.</first><last>Wang</last></author>
       <author><first>Hui</first><last>Dai</last></author>
       <author><first>Yoav</first><last>Artzi</last></author>
       <pages>4470–4481</pages>
@@ -6054,7 +6054,7 @@
     <paper id="479">
       <title>Co-Stack Residual Affinity Networks with Multi-level Attention Refinement for Matching Text Sequences</title>
       <author><first>Yi</first><last>Tay</last></author>
-      <author><first>Anh Tuan</first><last>Luu</last></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last></author>
       <author><first>Siu Cheung</first><last>Hui</last></author>
       <pages>4492–4502</pages>
       <url hash="21bf2e3c">D18-1479</url>
@@ -6076,7 +6076,7 @@
       <title>Learning Universal Sentence Representations with Mean-Max Attention Autoencoder</title>
       <author><first>Minghua</first><last>Zhang</last></author>
       <author><first>Yunfang</first><last>Wu</last></author>
-      <author><first>Weikang</first><last>Li</last></author>
+      <author id="weigang-li"><first>Weikang</first><last>Li</last></author>
       <author><first>Wei</first><last>Li</last></author>
       <pages>4514–4523</pages>
       <url hash="34168a24">D18-1481</url>
@@ -6093,7 +6093,7 @@
       <author><first>Avinash</first><last>Balakrishnan</last></author>
       <author><first>Pin-Yu</first><last>Chen</last></author>
       <author><first>Pradeep</first><last>Ravikumar</last></author>
-      <author><first>Michael J.</first><last>Witbrock</last></author>
+      <author id="michael-j-witbrock"><first>Michael J.</first><last>Witbrock</last></author>
       <pages>4524–4534</pages>
       <url hash="ca029237">D18-1482</url>
       <attachment type="attachment" hash="82f8fe42">D18-1482.Attachment.zip</attachment>
@@ -6104,9 +6104,9 @@
     <paper id="483">
       <title>Multilingual Clustering of Streaming News</title>
       <author><first>Sebastião</first><last>Miranda</last></author>
-      <author><first>Artūrs</first><last>Znotiņš</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
-      <author><first>Guntis</first><last>Barzdins</last></author>
+      <author id="arturs-znotins"><first>Artūrs</first><last>Znotiņš</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
+      <author id="guntis-barzdins"><first>Guntis</first><last>Barzdins</last></author>
       <pages>4535–4544</pages>
       <url hash="367c080b">D18-1483</url>
       <abstract>Clustering news across languages enables efficient media monitoring by aggregating articles from multilingual sources into coherent stories. Doing so in an online setting allows scalable processing of massive news streams. To this end, we describe a novel method for clustering an incoming stream of multilingual documents into monolingual and crosslingual clusters. Unlike typical clustering approaches that report results on datasets with a small and known number of labels, we tackle the problem of discovering an ever growing number of cluster labels in an online fashion, using real news datasets in multiple languages. In our formulation, the monolingual clusters group together documents while the crosslingual clusters group together monolingual clusters, one per language that appears in the stream. Our method is simple to implement, computationally efficient and produces state-of-the-art results on datasets in German, English and Spanish.</abstract>
@@ -6198,7 +6198,7 @@
     <paper id="491">
       <title>Pyramidal Recurrent Unit for Language Modeling</title>
       <author><first>Sachin</first><last>Mehta</last></author>
-      <author><first>Rik</first><last>Koncel-Kedziorski</last></author>
+      <author id="rik-koncel-kedziorski"><first>Rik</first><last>Koncel-Kedziorski</last></author>
       <author><first>Mohammad</first><last>Rastegari</last></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last></author>
       <pages>4620–4630</pages>
@@ -6209,7 +6209,7 @@
     </paper>
     <paper id="492">
       <title>On Tree-Based Neural Sentence Modeling</title>
-      <author><first>Haoyue</first><last>Shi</last></author>
+      <author id="freda-shi"><first>Haoyue</first><last>Shi</last></author>
       <author><first>Hao</first><last>Zhou</last></author>
       <author><first>Jiaze</first><last>Chen</last></author>
       <author><first>Lei</first><last>Li</last></author>
@@ -6279,8 +6279,8 @@
       <author><first>Sarthak</first><last>Jain</last></author>
       <author><first>Edward</first><last>Banner</last></author>
       <author><first>Jan-Willem</first><last>van de Meent</last></author>
-      <author><first>Iain J.</first><last>Marshall</last></author>
-      <author><first>Byron C.</first><last>Wallace</last></author>
+      <author id="iain-marshall"><first>Iain J.</first><last>Marshall</last></author>
+      <author id="byron-c-wallace"><first>Byron C.</first><last>Wallace</last></author>
       <pages>4683–4693</pages>
       <url hash="7116b988">D18-1497</url>
       <attachment type="attachment" hash="96ed59f2">D18-1497.Attachment.pdf</attachment>
@@ -6301,7 +6301,7 @@
     </paper>
     <paper id="499">
       <title>A Neural Model of Adaptation in Reading</title>
-      <author><first>Marten</first><last>van Schijndel</last></author>
+      <author id="marten-van-schijndel"><first>Marten</first><last>van Schijndel</last></author>
       <author><first>Tal</first><last>Linzen</last></author>
       <pages>4704–4710</pages>
       <url hash="8b02f759">D18-1499</url>
@@ -6313,7 +6313,7 @@
     </paper>
     <paper id="500">
       <title>Understanding Deep Learning Performance through an Examination of Test Set Difficulty: A Psychometric Case Study</title>
-      <author><first>John P.</first><last>Lalor</last></author>
+      <author id="john-p-lalor"><first>John P.</first><last>Lalor</last></author>
       <author><first>Hao</first><last>Wu</last></author>
       <author><first>Tsendsuren</first><last>Munkhdalai</last></author>
       <author><first>Hong</first><last>Yu</last></author>
@@ -6342,7 +6342,7 @@
     <paper id="502">
       <title>Dual Fixed-Size Ordinally Forgetting Encoding (<fixed-case>FOFE</fixed-case>) for Competitive Neural Language Models</title>
       <author><first>Sedtawut</first><last>Watcharawittayakul</last></author>
-      <author><first>Mingbin</first><last>Xu</last></author>
+      <author id="mingbin-xu"><first>Mingbin</first><last>Xu</last></author>
       <author><first>Hui</first><last>Jiang</last></author>
       <pages>4725–4730</pages>
       <url hash="ecbbd574">D18-1502</url>
@@ -6353,7 +6353,7 @@
     </paper>
     <paper id="503">
       <title>The Importance of Being Recurrent for Modeling Hierarchical Structure</title>
-      <author><first>Ke</first><last>Tran</last></author>
+      <author id="ke-m-tran"><first>Ke</first><last>Tran</last></author>
       <author><first>Arianna</first><last>Bisazza</last></author>
       <author><first>Christof</first><last>Monz</last></author>
       <pages>4731–4736</pages>
@@ -6404,10 +6404,10 @@
     </paper>
     <paper id="507">
       <title>Modeling Empathy and Distress in Reaction to News Stories</title>
-      <author><first>Sven</first><last>Buechel</last></author>
+      <author id="sven-buechel"><first>Sven</first><last>Buechel</last></author>
       <author><first>Anneke</first><last>Buffone</last></author>
       <author><first>Barry</first><last>Slaff</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <author><first>João</first><last>Sedoc</last></author>
       <pages>4758–4765</pages>
       <url hash="b84d8729">D18-1507</url>
@@ -6419,8 +6419,8 @@
     <paper id="508">
       <title>Interpretable Emoji Prediction via Label-Wise Attention <fixed-case>LSTM</fixed-case>s</title>
       <author><first>Francesco</first><last>Barbieri</last></author>
-      <author><first>Luis</first><last>Espinosa-Anke</last></author>
-      <author><first>Jose</first><last>Camacho-Collados</last></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa-Anke</last></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></author>
       <author><first>Steven</first><last>Schockaert</last></author>
       <author><first>Horacio</first><last>Saggion</last></author>
       <pages>4766–4771</pages>
@@ -6461,7 +6461,7 @@
       <author><first>Zhisong</first><last>Zhang</last></author>
       <author><first>Rui</first><last>Wang</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Hai</first><last>Zhao</last></author>
       <pages>4785–4790</pages>
       <url hash="2fbcd7c1">D18-1511</url>
@@ -6513,9 +6513,9 @@
     </paper>
     <paper id="515">
       <title>A strong baseline for question relevancy ranking</title>
-      <author><first>Ana</first><last>Gonzalez</last></author>
+      <author id="ana-gonzalez-ledesma"><first>Ana</first><last>Gonzalez</last></author>
       <author><first>Isabelle</first><last>Augenstein</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>4810–4815</pages>
       <url hash="49d9bdb0">D18-1515</url>
       <abstract>The best systems at the SemEval-16 and SemEval-17 community question answering shared tasks – a task that amounts to question relevancy ranking – involve complex pipelines and manual feature engineering. Despite this, many of these still fail at beating the IR baseline, i.e., the rankings provided by Google’s search engine. We present a strong baseline for question relevancy ranking by training a simple multi-task feed forward network on a bag of 14 distance measures for the input question pair. This baseline model, which is fast to train and uses only language-independent features, outperforms the best shared task systems on the task of retrieving relevant previously asked questions.</abstract>
@@ -6525,7 +6525,7 @@
     </paper>
     <paper id="516">
       <title>Learning Sequence Encoders for Temporal Knowledge Graph Completion</title>
-      <author><first>Alberto</first><last>García-Durán</last></author>
+      <author id="alberto-garcia-duran"><first>Alberto</first><last>García-Durán</last></author>
       <author><first>Sebastijan</first><last>Dumančić</last></author>
       <author><first>Mathias</first><last>Niepert</last></author>
       <pages>4816–4821</pages>
@@ -6537,7 +6537,7 @@
     </paper>
     <paper id="517">
       <title>Similar but not the Same: Word Sense Disambiguation Improves Event Detection via Neural Representation Matching</title>
-      <author><first>Weiyi</first><last>Lu</last></author>
+      <author id="weiyi-liu"><first>Weiyi</first><last>Lu</last></author>
       <author><first>Thien Huu</first><last>Nguyen</last></author>
       <pages>4822–4828</pages>
       <url hash="e7585bb9">D18-1517</url>
@@ -6562,7 +6562,7 @@
       <author><first>Hong-You</first><last>Chen</last></author>
       <author><first>Cheng-Syuan</first><last>Lee</last></author>
       <author><first>Keng-Te</first><last>Liao</last></author>
-      <author><first>Shou-De</first><last>Lin</last></author>
+      <author id="shou-de-lin"><first>Shou-De</first><last>Lin</last></author>
       <pages>4834–4839</pages>
       <url hash="2f94193b">D18-1519</url>
       <abstract>Lexicon relation extraction given distributional representation of words is an important topic in NLP. We observe that the state-of-the-art projection-based methods cannot be generalized to handle unseen hypernyms. We propose to analyze it in the perspective of pollution and construct the corresponding indicator to measure it. We propose a word relation autoencoder (WRAE) model to address the challenge. Experiments on several hypernym-like lexicon datasets show that our model outperforms the competitors significantly.</abstract>
@@ -6618,7 +6618,7 @@
     </paper>
     <paper id="524">
       <title><fixed-case>I</fixed-case>nfer<fixed-case>L</fixed-case>ite: Simple Universal Sentence Representations from Natural Language Inference Data</title>
-      <author><first>Jamie</first><last>Kiros</last></author>
+      <author id="jamie-kiros"><first>Jamie</first><last>Kiros</last></author>
       <author><first>William</first><last>Chan</last></author>
       <pages>4868–4874</pages>
       <url hash="0689d556">D18-1524</url>
@@ -6669,8 +6669,8 @@
       <title>Classifying Referential and Non-referential It Using Gaze</title>
       <author><first>Victoria</first><last>Yaneva</last></author>
       <author><first>Le An</first><last>Ha</last></author>
-      <author><first>Richard</first><last>Evans</last></author>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="richard-evans"><first>Richard</first><last>Evans</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <pages>4896–4901</pages>
       <url hash="93692ed8">D18-1528</url>
       <abstract>When processing a text, humans and machines must disambiguate between different uses of the pronoun it, including non-referential, nominal anaphoric or clause anaphoric ones. In this paper we use eye-tracking data to learn how humans perform this disambiguation and use this knowledge to improve the automatic classification of it. We show that by using gaze data and a POS-tagger we are able to significantly outperform a common baseline and classify between three categories of it with an accuracy comparable to that of linguistic-based approaches. In addition, the discriminatory power of specific gaze features informs the way humans process the pronoun, which, to the best of our knowledge, has not been explored using data from a natural reading task.</abstract>
@@ -6705,7 +6705,7 @@
     <paper id="531">
       <title>Unsupervised Neural Word Segmentation for <fixed-case>C</fixed-case>hinese via Segmental Language Modeling</title>
       <author><first>Zhiqing</first><last>Sun</last></author>
-      <author><first>Zhi-Hong</first><last>Deng</last></author>
+      <author id="zhi-hong-deng"><first>Zhi-Hong</first><last>Deng</last></author>
       <pages>4915–4920</pages>
       <url hash="8dbd5a45">D18-1531</url>
       <abstract>Previous traditional approaches to unsupervised Chinese word segmentation (CWS) can be roughly classified into discriminative and generative models. The former uses the carefully designed goodness measures for candidate segmentation, while the latter focuses on finding the optimal segmentation of the highest generative probability. However, while there exists a trivial way to extend the discriminative models into neural version by using neural language models, those of generative ones are non-trivial. In this paper, we propose the segmental language models (SLMs) for CWS. Our approach explicitly focuses on the segmental nature of Chinese, as well as preserves several properties of language models. In SLMs, a context encoder encodes the previous context and a segment decoder generates each segment incrementally. As far as we know, we are the first to propose a neural model for unsupervised CWS and achieve competitive performance to the state-of-the-art statistical models on four different datasets from SIGHAN 2005 bakeoff.</abstract>
@@ -6717,7 +6717,7 @@
       <author><first>Daniel</first><last>Kondratyuk</last></author>
       <author><first>Tomáš</first><last>Gavenčiak</last></author>
       <author><first>Milan</first><last>Straka</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
       <pages>4921–4928</pages>
       <url hash="09483f27">D18-1532</url>
       <attachment type="attachment" hash="741ad562">D18-1532.Attachment.zip</attachment>
@@ -6728,7 +6728,7 @@
     <paper id="533">
       <title>Recovering Missing Characters in Old <fixed-case>H</fixed-case>awaiian Writing</title>
       <author><first>Brendan</first><last>Shillingford</last></author>
-      <author><first>Oiwi</first><last>Parker Jones</last></author>
+      <author id="oiwi-parker-jones"><first>Oiwi</first><last>Parker Jones</last></author>
       <pages>4929–4934</pages>
       <url hash="ee1f402b">D18-1533</url>
       <attachment type="attachment" hash="1ae36c24">D18-1533.Attachment.pdf</attachment>
@@ -6787,9 +6787,9 @@
     </paper>
     <paper id="538">
       <title>Towards Semi-Supervised Learning for Deep Semantic Role Labeling</title>
-      <author><first>Sanket Vaibhav</first><last>Mehta</last></author>
+      <author id="sanket-vaibhav-mehta"><first>Sanket Vaibhav</first><last>Mehta</last></author>
       <author><first>Jay Yoon</first><last>Lee</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
       <pages>4958–4963</pages>
       <url hash="de164f85">D18-1538</url>
       <attachment type="attachment" hash="4dc5bf7e">D18-1538.Attachment.pdf</attachment>
@@ -6802,7 +6802,7 @@
       <author><first>James</first><last>Ferguson</last></author>
       <author><first>Janara</first><last>Christensen</last></author>
       <author><first>Edward</first><last>Li</last></author>
-      <author><first>Edgar</first><last>Gonzàlez</last></author>
+      <author id="edgar-gonzalez-pellicer"><first>Edgar</first><last>Gonzàlez</last></author>
       <pages>4964–4969</pages>
       <url hash="c8df40e0">D18-1539</url>
       <abstract>When the semantics of a sentence are not representable in a semantic parser’s output schema, parsing will inevitably fail. Detection of these instances is commonly treated as an out-of-domain classification problem. However, there is also a more subtle scenario in which the test data is drawn from the same domain. In addition to formalizing this problem of domain-adjacency, we present a comparison of various baselines that could be used to solve it. We also propose a new simple sentence representation that emphasizes words which are unexpected. This approach improves the performance of a downstream semantic parser run on in-domain and domain-adjacent instances.</abstract>
@@ -6836,7 +6836,7 @@
     <paper id="542">
       <title>Modeling Input Uncertainty in Neural Network Dependency Parsing</title>
       <author><first>Rob</first><last>van der Goot</last></author>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <pages>4984–4991</pages>
       <url hash="3fba5307">D18-1542</url>
       <attachment type="attachment" hash="426c1a8b">D18-1542.Attachment.pdf</attachment>
@@ -6849,12 +6849,12 @@
       <author><first>Miryam</first><last>de Lhoneux</last></author>
       <author><first>Johannes</first><last>Bjerva</last></author>
       <author><first>Isabelle</first><last>Augenstein</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>4992–4997</pages>
       <url hash="acc741c2">D18-1543</url>
       <attachment type="attachment" hash="3aa1858a">D18-1543.Attachment.pdf</attachment>
-      <abstract>Previous work has suggested that parameter sharing between transition-based neural dependency parsers for related languages can lead to better performance, but there is no consensus on what parameters to share. We present an evaluation of 27 different parameter sharing strategies across 10 languages, representing five pairs of related languages, each pair from a different language family. We find that sharing transition classifier parameters always helps, whereas the usefulness of sharing word and/or character LSTM parameters varies. Based on this result, we propose an architecture where the transition classifier is shared, and the sharing of word and character parameters is controlled by a parameter that can be tuned on validation data. This model is linguistically motivated and obtains significant improvements over a monolingually trained baseline. We also find that sharing transition classifier parameters helps when training a parser on unrelated language pairs, but we find that, in the case of unrelated languages, sharing too many parameters does not help.</abstract>
       <attachment type="poster" hash="7d484185">D18-1543.Poster.pdf</attachment>
+      <abstract>Previous work has suggested that parameter sharing between transition-based neural dependency parsers for related languages can lead to better performance, but there is no consensus on what parameters to share. We present an evaluation of 27 different parameter sharing strategies across 10 languages, representing five pairs of related languages, each pair from a different language family. We find that sharing transition classifier parameters always helps, whereas the usefulness of sharing word and/or character LSTM parameters varies. Based on this result, we propose an architecture where the transition classifier is shared, and the sharing of word and character parameters is controlled by a parameter that can be tuned on validation data. This model is linguistically motivated and obtains significant improvements over a monolingually trained baseline. We also find that sharing transition classifier parameters helps when training a parser on unrelated language pairs, but we find that, in the case of unrelated languages, sharing too many parameters does not help.</abstract>
       <doi>10.18653/v1/D18-1543</doi>
       <bibkey>de-lhoneux-etal-2018-parameter</bibkey>
     </paper>
@@ -6862,7 +6862,7 @@
       <title>Grammar Induction with Neural Language Models: An Unusual Replication</title>
       <author><first>Phu Mon</first><last>Htut</last></author>
       <author><first>Kyunghyun</first><last>Cho</last></author>
-      <author><first>Samuel</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel</first><last>Bowman</last></author>
       <pages>4998–5003</pages>
       <url hash="c3ef8822">D18-1544</url>
       <attachment type="attachment" hash="7fc47f98">D18-1544.Attachment.zip</attachment>
@@ -6872,8 +6872,8 @@
     </paper>
     <paper id="545">
       <title>Data Augmentation via Dependency Tree Morphing for Low-Resource Languages</title>
-      <author><first>Gözde Gül</first><last>Şahin</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="gozde-gul-sahin"><first>Gözde Gül</first><last>Şahin</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>5004–5009</pages>
       <url hash="26b9ff10">D18-1545</url>
       <abstract>Neural NLP systems achieve high scores in the presence of sizable training dataset. Lack of such datasets leads to poor system performances in the case low-resource languages. We present two simple text augmentation techniques using dependency trees, inspired from image processing. We “crop” sentences by removing dependency links, and we “rotate” sentences by moving the tree fragments around the root. We apply these techniques to augment the training sets of low-resource languages in Universal Dependencies project. We implement a character-level sequence tagging model and evaluate the augmented datasets on part-of-speech tagging task. We show that crop and rotate provides improvements over the models trained with non-augmented data for majority of the languages, especially for languages with rich case marking systems.</abstract>
@@ -6883,7 +6883,7 @@
     <paper id="546">
       <title>How Much Reading Does Reading Comprehension Require? A Critical Investigation of Popular Benchmarks</title>
       <author><first>Divyansh</first><last>Kaushik</last></author>
-      <author><first>Zachary C.</first><last>Lipton</last></author>
+      <author id="zachary-c-lipton"><first>Zachary C.</first><last>Lipton</last></author>
       <pages>5010–5015</pages>
       <url hash="b37e408a">D18-1546</url>
       <abstract>Many recent papers address reading comprehension, where examples consist of (question, passage, answer) tuples. Presumably, a model must combine information from both questions and passages to predict corresponding answers. However, despite intense interest in the topic, with hundreds of published papers vying for leaderboard dominance, basic questions about the difficulty of many popular benchmarks remain unanswered. In this paper, we establish sensible baselines for the bAbI, SQuAD, CBT, CNN, and Who-did-What datasets, finding that question- and passage-only models often perform surprisingly well. On 14 out of 20 bAbI tasks, passage-only models achieve greater than 50% accuracy, sometimes matching the full model. Interestingly, while CBT provides 20-sentence passages, only the last is needed for accurate prediction. By comparison, SQuAD and CNN appear better-constructed.</abstract>
@@ -6899,7 +6899,7 @@
       <author><first>Iñigo</first><last>Casanueva</last></author>
       <author><first>Stefan</first><last>Ultes</last></author>
       <author><first>Osman</first><last>Ramadan</last></author>
-      <author><first>Milica</first><last>Gašić</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gašić</last></author>
       <pages>5016–5026</pages>
       <url hash="22a4ee89">D18-1547</url>
       <attachment type="attachment" hash="ab7f5f03">D18-1547.Attachment.pdf</attachment>
@@ -6958,7 +6958,7 @@
     <paper id="1">
       <title><fixed-case>S</fixed-case>ynta<fixed-case>V</fixed-case>iz: Visualizing Voice Queries through a Syntax-Driven Hierarchical Ontology</title>
       <author><first>Md Iftekhar</first><last>Tanveer</last></author>
-      <author><first>Ferhan</first><last>Ture</last></author>
+      <author id="ferhan-ture"><first>Ferhan</first><last>Ture</last></author>
       <pages>1–6</pages>
       <url hash="dd7e0e3f">D18-2001</url>
       <abstract>This paper describes SyntaViz, a visualization interface specifically designed for analyzing natural-language queries that were created by users of a voice-enabled product. SyntaViz provides a platform for browsing the ontology of user queries from a syntax-driven perspective, providing quick access to high-impact failure points of the existing intent understanding system and evidence for data-driven decisions in the development cycle. A case study on Xfinity X1 (a voice-enabled entertainment platform from Comcast) reveals that SyntaViz helps developers identify multiple action items in a short amount of time without any special training. SyntaViz has been open-sourced for the benefit of the community.</abstract>
@@ -6980,8 +6980,8 @@
       <author><first>Longxu</first><last>Dou</last></author>
       <author><first>Guanghui</first><last>Qin</last></author>
       <author><first>Jinpeng</first><last>Wang</last></author>
-      <author><first>Jin-Ge</first><last>Yao</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="jin-ge-yao"><first>Jin-Ge</first><last>Yao</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <pages>13–18</pages>
       <url hash="84c4208b">D18-2003</url>
       <abstract>Data2Text Studio is a platform for automated text generation from structured data. It is equipped with a Semi-HMMs model to extract high-quality templates and corresponding trigger conditions from parallel data automatically, which improves the interactivity and interpretability of the generated text. In addition, several easy-to-use tools are provided for developers to edit templates of pre-trained models, and APIs are released for developers to call the pre-trained model to generate texts in third-party applications. We conduct experiments on RotoWire datasets for template extraction and text generation. The results show that our model achieves improvements on both tasks.</abstract>
@@ -7020,7 +7020,7 @@
     <paper id="6">
       <title>An Interactive Web-Interface for Visualizing the Inner Workings of the Question Answering <fixed-case>LSTM</fixed-case></title>
       <author><first>Ekaterina</first><last>Loginova</last></author>
-      <author><first>Günter</first><last>Neumann</last></author>
+      <author id="gunter-neumann"><first>Günter</first><last>Neumann</last></author>
       <pages>30–35</pages>
       <url hash="b4f4b7fa">D18-2006</url>
       <abstract>We present a visualisation tool which aims to illuminate the inner workings of an LSTM model for question answering. It plots heatmaps of neurons’ firings and allows a user to check the dependency between neurons and manual features. The system possesses an interactive web-interface and can be adapted to other models and domains.</abstract>
@@ -7044,9 +7044,9 @@
     <paper id="8">
       <title><fixed-case>DERE</fixed-case>: A Task and Domain-Independent Slot Filling Framework for Declarative Relation Extraction</title>
       <author><first>Heike</first><last>Adel</last></author>
-      <author><first>Laura Ana Maria</first><last>Bostan</last></author>
+      <author id="laura-ana-maria-oberlander"><first>Laura Ana Maria</first><last>Bostan</last></author>
       <author><first>Sean</first><last>Papay</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <author><first>Roman</first><last>Klinger</last></author>
       <pages>42–47</pages>
       <url hash="711cb4bb">D18-2008</url>
@@ -7057,7 +7057,7 @@
     <paper id="9">
       <title>Demonstrating <fixed-case>P</fixed-case>ar4<fixed-case>S</fixed-case>em - A Semantic Writing Aid with Adaptive Paraphrasing</title>
       <author><first>Seid Muhie</first><last>Yimam</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>48–53</pages>
       <url hash="c81ea894">D18-2009</url>
       <abstract>In this paper, we present Par4Sem, a semantic writing aid tool based on adaptive paraphrasing. Unlike many annotation tools that are primarily used to collect training examples, Par4Sem is integrated into a real word application, in this case a writing aid tool, in order to collect training examples from usage data. Par4Sem is a tool, which supports an adaptive, iterative, and interactive process where the underlying machine learning models are updated for each iteration using new training examples from usage data. After motivating the use of ever-learning tools in NLP applications, we evaluate Par4Sem by adopting it to a text simplification task through mere usage.</abstract>
@@ -7094,7 +7094,7 @@
     </paper>
     <paper id="12">
       <title><fixed-case>S</fixed-case>entence<fixed-case>P</fixed-case>iece: A simple and language independent subword tokenizer and detokenizer for Neural Text Processing</title>
-      <author><first>Taku</first><last>Kudo</last></author>
+      <author id="taku-kudo"><first>Taku</first><last>Kudo</last></author>
       <author><first>John</first><last>Richardson</last></author>
       <pages>66–71</pages>
       <url hash="df57cd89">D18-2012</url>
@@ -7119,7 +7119,7 @@
       <title>A Multilingual Information Extraction Pipeline for Investigative Journalism</title>
       <author><first>Gregor</first><last>Wiedemann</last></author>
       <author><first>Seid Muhie</first><last>Yimam</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>78–83</pages>
       <url hash="a2b3d191">D18-2014</url>
       <abstract>We introduce an advanced information extraction pipeline to automatically process very large collections of unstructured textual data for the purpose of investigative journalism. The pipeline serves as a new input processor for the upcoming major release of our New/s/leak 2.0 software, which we develop in cooperation with a large German news organization. The use case is that journalists receive a large collection of files up to several Gigabytes containing unknown contents. Collections may originate either from official disclosures of documents, e.g. Freedom of Information Act requests, or unofficial data leaks.</abstract>
@@ -7130,7 +7130,7 @@
       <title>Sisyphus, a Workflow Manager Designed for Machine Translation and Automatic Speech Recognition</title>
       <author><first>Jan-Thorsten</first><last>Peter</last></author>
       <author><first>Eugen</first><last>Beck</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>84–89</pages>
       <url hash="b1102c48">D18-2015</url>
       <abstract>Training and testing many possible parameters or model architectures of state-of-the-art machine translation or automatic speech recognition system is a cumbersome task. They usually require a long pipeline of commands reaching from pre-processing the training data to post-processing and evaluating the output.</abstract>
@@ -7171,12 +7171,12 @@
       <author><first>Rajarshi</first><last>Das</last></author>
       <author><first>Andrew</first><last>McCallum</last></author>
       <author><first>Maria</first><last>Chang</last></author>
-      <author><first>Achille</first><last>Fokoue</last></author>
+      <author id="achille-fokoue-nkoutche"><first>Achille</first><last>Fokoue</last></author>
       <author><first>Pavan</first><last>Kapanipathi</last></author>
       <author><first>Nicholas</first><last>Mattei</last></author>
       <author><first>Ryan</first><last>Musa</last></author>
       <author><first>Kartik</first><last>Talamadupula</last></author>
-      <author><first>Michael</first><last>Witbrock</last></author>
+      <author id="michael-j-witbrock"><first>Michael</first><last>Witbrock</last></author>
       <pages>102–107</pages>
       <url hash="68fea77f">D18-2018</url>
       <abstract>Recent work introduces the AI2 Reasoning Challenge (ARC) and the associated ARC dataset that partitions open domain, complex science questions into an Easy Set and a Challenge Set. That work includes an analysis of 100 questions with respect to the types of knowledge and reasoning required to answer them. However, it does not include clear definitions of these types, nor does it offer information about the quality of the labels or the annotation process used. In this paper, we introduce a novel interface for human annotation of science question-answer pairs with their respective knowledge and reasoning types, in order that the classification of new questions may be improved. We build on the classification schema proposed by prior work on the ARC dataset, and evaluate the effectiveness of our interface with a preliminary study involving 10 participants.</abstract>
@@ -7219,7 +7219,7 @@
       <title>Integrating Knowledge-Supported Search into the <fixed-case>INCE</fixed-case>p<fixed-case>TION</fixed-case> Annotation Platform</title>
       <author><first>Beto</first><last>Boullosa</last></author>
       <author><first>Richard</first><last>Eckart de Castilho</last></author>
-      <author><first>Naveen</first><last>Kumar</last></author>
+      <author id="naveen-kumar-laskari"><first>Naveen</first><last>Kumar</last></author>
       <author><first>Jan-Christoph</first><last>Klie</last></author>
       <author><first>Iryna</first><last>Gurevych</last></author>
       <pages>127–132</pages>
@@ -7232,7 +7232,7 @@
       <title><fixed-case>C</fixed-case>yton<fixed-case>MT</fixed-case>: an Efficient Neural Machine Translation Open-source Toolkit Implemented in <fixed-case>C</fixed-case>++</title>
       <author><first>Xiaolin</first><last>Wang</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>133–138</pages>
       <url hash="d8acb7c6">D18-2023</url>
       <abstract>This paper presents an open-source neural machine translation toolkit named CytonMT. The toolkit is built from scratch only using C++ and NVIDIA’s GPU-accelerated libraries. The toolkit features training efficiency, code simplicity and translation quality. Benchmarks show that cytonMT accelerates the training speed by 64.5% to 110.8% on neural networks of various sizes, and achieves competitive translation quality.</abstract>
@@ -7258,7 +7258,7 @@
       <title><fixed-case>LIA</fixed-case>: A Natural Language Programmable Personal Assistant</title>
       <author><first>Igor</first><last>Labutov</last></author>
       <author><first>Shashank</first><last>Srivastava</last></author>
-      <author><first>Tom</first><last>Mitchell</last></author>
+      <author id="tom-mitchell"><first>Tom</first><last>Mitchell</last></author>
       <pages>145–150</pages>
       <url hash="ae61e82a">D18-2025</url>
       <abstract>We present LIA, an intelligent personal assistant that can be programmed using natural language. Our system demonstrates multiple competencies towards learning from human-like interactions. These include the ability to be taught reusable conditional procedures, the ability to be taught new knowledge about the world (concepts in an ontology) and the ability to be taught how to ground that knowledge in a set of sensors and effectors. Building such a system highlights design questions regarding the overall architecture that such an agent should have, as well as questions about parsing and grounding language in situational contexts. We outline key properties of this architecture, and demonstrate a prototype that embodies them in the form of a personal assistant on an Android device.</abstract>
@@ -7357,7 +7357,7 @@
     </paper>
     <paper id="4">
       <title>Deep Latent Variable Models of Natural Language</title>
-      <author><first>Alexander</first><last>Rush</last></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last></author>
       <author><first>Yoon</first><last>Kim</last></author>
       <author><first>Sam</first><last>Wiseman</last></author>
       <abstract>The proposed tutorial will cover deep latent variable models both in the case where exact inference over the latent variables is tractable and when it is not. The former case includes neural extensions of unsupervised tagging and parsing models. Our discussion of the latter case, where inference cannot be performed tractably, will restrict itself to continuous latent variables. In particular, we will discuss recent developments both in neural variational inference (e.g., relating to Variational Auto-encoders) and in implicit density modeling (e.g., relating to Generative Adversarial Networks). We will highlight the challenges of applying these families of methods to NLP problems, and discuss recent successes and best practices.</abstract>
@@ -7368,7 +7368,7 @@
       <author><first>Mrinmaya</first><last>Sachan</last></author>
       <author><first>Minjoon</first><last>Seo</last></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last></author>
-      <author><first>Eric</first><last>Xing</last></author>
+      <author id="eric-xing"><first>Eric</first><last>Xing</last></author>
       <abstract>Standardized tests have recently been proposed as replacements to the Turing test as a driver for progress in AI (Clark, 2015). These include tests on understanding passages and stories and answering questions about them (Richardson et al., 2013; Rajpurkar et al., 2016a, inter alia), science question answering (Schoenick et al., 2016, inter alia), algebra word problems (Kushman et al., 2014, inter alia), geometry problems (Seo et al., 2015; Sachan et al., 2016), visual question answering (Antol et al., 2015), etc. Many of these tests require sophisticated understanding of the world, aiming to push the boundaries of AI. For this tutorial, we broadly categorize these tests into two categories: open domain tests such as reading comprehensions and elementary school tests where the goal is to find the support for an answer from the student curriculum, and closed domain tests such as intermediate level math and science tests (algebra, geometry, Newtonian physics problems, etc.). Unlike open domain tests, closed domain tests require the system to have significant domain knowledge and reasoning capabilities. For example, geometry questions typically involve a number of geometry primitives (lines, quadrilaterals, circles, etc) and require students to use axioms and theorems of geometry (Pythagoras theorem, alternating angles, etc) to solve them. These closed domains often have a formal logical basis and the question can be mapped to a formal language by semantic parsing. The formal question representation can then provided as an input to an expert system to solve the question.</abstract>
       <bibkey>sachan-etal-2018-standardized</bibkey>
     </paper>
diff --git a/data/xml/D19.xml b/data/xml/D19.xml
index b4aaa11c5d..430f55e0a7 100644
--- a/data/xml/D19.xml
+++ b/data/xml/D19.xml
@@ -45,8 +45,8 @@
     <paper id="3">
       <title>Practical Obstacles to Deploying Active Learning</title>
       <author><first>David</first><last>Lowell</last></author>
-      <author><first>Zachary C.</first><last>Lipton</last></author>
-      <author><first>Byron C.</first><last>Wallace</last></author>
+      <author id="zachary-c-lipton"><first>Zachary C.</first><last>Lipton</last></author>
+      <author id="byron-c-wallace"><first>Byron C.</first><last>Wallace</last></author>
       <pages>21–30</pages>
       <abstract>Active learning (AL) is a widely-used training strategy for maximizing predictive performance subject to a fixed annotation budget. In AL, one iteratively selects training examples for annotation, often those for which the current model is most uncertain (by some measure). The hope is that active sampling leads to better performance than would be achieved under independent and identically distributed (i.i.d.) random samples. While AL has shown promise in retrospective evaluations, these studies often ignore practical obstacles to its use. In this paper, we show that while AL may provide benefits when used with specific models and for particular domains, the benefits of current approaches do not generalize reliably across models and tasks. This is problematic because in practice, one does not have the opportunity to explore and compare alternative AL strategies. Moreover, AL couples the training dataset with the model used to guide its acquisition. We find that subsequently training a successor model with an actively-acquired dataset does not consistently outperform training on i.i.d. sampled data. Our findings raise the question of whether the downsides inherent to AL are worth the modest and inconsistent performance gains it tends to afford.</abstract>
       <url hash="37e1ac60">D19-1003</url>
@@ -66,13 +66,13 @@
     </paper>
     <paper id="5">
       <title>Knowledge Enhanced Contextual Word Representations</title>
-      <author><first>Matthew E.</first><last>Peters</last></author>
+      <author id="matthew-e-peters"><first>Matthew E.</first><last>Peters</last></author>
       <author><first>Mark</first><last>Neumann</last></author>
       <author><first>Robert</first><last>Logan</last></author>
       <author><first>Roy</first><last>Schwartz</last></author>
       <author><first>Vidur</first><last>Joshi</last></author>
       <author><first>Sameer</first><last>Singh</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>43–54</pages>
       <abstract>Contextual word representations, typically trained on unstructured, unlabeled text, do not contain any explicit grounding to real world entities and are often unable to remember facts about those entities. We propose a general method to embed multiple knowledge bases (KBs) into large scale models, and thereby enhance their representations with structured, human-curated knowledge. For each KB, we first use an integrated entity linker to retrieve relevant entity embeddings, then update contextual word representations via a form of word-to-entity attention. In contrast to previous approaches, the entity linkers and self-supervised language modeling objective are jointly trained end-to-end in a multitask setting that combines a small amount of entity linking supervision with a large amount of raw text. After integrating WordNet and a subset of Wikipedia into BERT, the knowledge enhanced BERT (KnowBert) demonstrates improved perplexity, ability to recall facts as measured in a probing task and downstream performance on relationship extraction, entity typing, and word sense disambiguation. KnowBert’s runtime is comparable to BERT’s and it scales to large KBs.</abstract>
       <url hash="81d8db15">D19-1005</url>
@@ -94,7 +94,7 @@
       <author><first>Philippa</first><last>Shoemark</last></author>
       <author><first>Farhana Ferdousi</first><last>Liza</last></author>
       <author><first>Dong</first><last>Nguyen</last></author>
-      <author><first>Scott A.</first><last>Hale</last></author>
+      <author id="scott-a-hale"><first>Scott A.</first><last>Hale</last></author>
       <author><first>Barbara</first><last>McGillivray</last></author>
       <pages>66–76</pages>
       <abstract>Word embeddings are increasingly used for the automatic detection of semantic change; yet, a robust evaluation and systematic comparison of the choices involved has been lacking. We propose a new evaluation framework for semantic change detection and find that (i) using the whole time series is preferable over only comparing between the first and last time points; (ii) independently trained and aligned embeddings perform better than continuously trained embeddings for long time periods; and (iii) that the reference point for comparison matters. We also present an analysis of the changes detected on a large Twitter dataset spanning 5.5 years.</abstract>
@@ -198,7 +198,7 @@
       <author><first>Navonil</first><last>Majumder</last></author>
       <author><first>Soujanya</first><last>Poria</last></author>
       <author><first>Niyati</first><last>Chhaya</last></author>
-      <author><first>Alexander</first><last>Gelbukh</last></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last></author>
       <pages>154–164</pages>
       <abstract>Emotion recognition in conversation (ERC) has received much attention, lately, from researchers due to its potential widespread applications in diverse areas, such as health-care, education, and human resources. In this paper, we present Dialogue Graph Convolutional Network (DialogueGCN), a graph neural network based approach to ERC. We leverage self and inter-speaker dependency of the interlocutors to model conversational context for emotion recognition. Through the graph network, DialogueGCN addresses context propagation issues present in the current RNN-based methods. We empirically show that this method alleviates such issues, while outperforming the current state of the art on a number of benchmark emotion classification datasets.</abstract>
       <url hash="29eb7474">D19-1015</url>
@@ -291,7 +291,7 @@
       <author><first>Pengfei</first><last>Li</last></author>
       <author><first>Kezhi</first><last>Mao</last></author>
       <author><first>Xuefeng</first><last>Yang</last></author>
-      <author id="qi-li"><first>Qi</first><last>Li</last></author>
+      <author><first>Qi</first><last>Li</last></author>
       <pages>229–239</pages>
       <abstract>While attention mechanisms have been proven to be effective in many NLP tasks, majority of them are data-driven. We propose a novel knowledge-attention encoder which incorporates prior knowledge from external lexical resources into deep neural networks for relation extraction task. Furthermore, we present three effective ways of integrating knowledge-attention with self-attention to maximize the utilization of both knowledge and data. The proposed relation extraction system is end-to-end and fully attention-based. Experiment results show that the proposed knowledge-attention mechanism has complementary strengths with self-attention, and our integrated models outperform existing CNN, RNN, and self-attention based models. State-of-the-art performance is achieved on TACRED, a complex and large-scale relation extraction dataset.</abstract>
       <url hash="61db8449">D19-1022</url>
@@ -401,9 +401,9 @@
       <author><first>Di</first><last>Lu</last></author>
       <author><first>Heng</first><last>Ji</last></author>
       <author><first>Jonathan</first><last>May</last></author>
-      <author><first>Shih-Fu</first><last>Chang</last></author>
-      <author><first>Avirup</first><last>Sil</last></author>
-      <author><first>Clare</first><last>Voss</last></author>
+      <author id="shih-fu-chang"><first>Shih-Fu</first><last>Chang</last></author>
+      <author id="avirup-sil"><first>Avirup</first><last>Sil</last></author>
+      <author id="clare-voss"><first>Clare</first><last>Voss</last></author>
       <pages>313–325</pages>
       <abstract>The identification of complex semantic structures such as events and entity relations, already a challenging Information Extraction task, is doubly difficult from sources written in under-resourced and under-annotated languages. We investigate the suitability of cross-lingual structure transfer techniques for these tasks. We exploit relation- and event-relevant language-universal features, leveraging both symbolic (including part-of-speech and dependency path) and distributional (including type representation and contextualized representation) information. By representing all entity mentions, event triggers, and contexts into this complex and structured multilingual common space, using graph convolutional networks, we can train a relation or event extractor from source language annotations and apply it to the target language. Extensive experiments on cross-lingual relation and event transfer among English, Chinese, and Arabic demonstrate that our approach achieves performance comparable to state-of-the-art supervised models trained on up to 3,000 manually annotated mentions: up to 62.6% F-score for Relation Extraction, and 63.1% F-score for Event Argument Role Labeling. The event argument role labeling model transferred from English to Chinese achieves similar performance as the model trained from Chinese. We thus find that language-universal symbolic and distributional representations are complementary for cross-lingual structure transfer.</abstract>
       <url hash="c427a095">D19-1030</url>
@@ -481,7 +481,7 @@
       <title><fixed-case>C</fixed-case>a<fixed-case>R</fixed-case>e: Open Knowledge Graph Embeddings</title>
       <author><first>Swapnil</first><last>Gupta</last></author>
       <author><first>Sreyash</first><last>Kenkre</last></author>
-      <author><first>Partha</first><last>Talukdar</last></author>
+      <author id="partha-talukdar"><first>Partha</first><last>Talukdar</last></author>
       <pages>378–388</pages>
       <abstract>Open Information Extraction (OpenIE) methods are effective at extracting (noun phrase, relation phrase, noun phrase) triples from text, e.g., (Barack Obama, took birth in, Honolulu). Organization of such triples in the form of a graph with noun phrases (NPs) as nodes and relation phrases (RPs) as edges results in the construction of Open Knowledge Graphs (OpenKGs). In order to use such OpenKGs in downstream tasks, it is often desirable to learn embeddings of the NPs and RPs present in the graph. Even though several Knowledge Graph (KG) embedding methods have been recently proposed, all of those methods have targeted Ontological KGs, as opposed to OpenKGs. Straightforward application of existing Ontological KG embedding methods to OpenKGs is challenging, as unlike Ontological KGs, OpenKGs are not canonicalized, i.e., a real-world entity may be represented using multiple nodes in the OpenKG, with each node corresponding to a different NP referring to the entity. For example, nodes with labels Barack Obama, Obama, and President Obama may refer to the same real-world entity Barack Obama. Even though canonicalization of OpenKGs has received some attention lately, output of such methods has not been used to improve OpenKG embed- dings. We fill this gap in the paper and propose Canonicalization-infused Representations (CaRe) for OpenKGs. Through extensive experiments, we observe that CaRe enables existing models to adapt to the challenges in OpenKGs and achieve substantial improvements for the link prediction task.</abstract>
       <url hash="d4ad6949">D19-1036</url>
@@ -501,7 +501,7 @@
     <paper id="38">
       <title>Neural Cross-Lingual Relation Extraction Based on Bilingual Word Embedding Mapping</title>
       <author><first>Jian</first><last>Ni</last></author>
-      <author><first>Radu</first><last>Florian</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
       <pages>399–409</pages>
       <abstract>Relation extraction (RE) seeks to detect and classify semantic relationships between entities, which provides useful information for many NLP applications. Since the state-of-the-art RE models require large amounts of manually annotated data and language-specific resources to achieve high accuracy, it is very challenging to transfer an RE model of a resource-rich language to a resource-poor language. In this paper, we propose a new approach for cross-lingual RE model transfer based on bilingual word embedding mapping. It projects word embeddings from a target language to a source language, so that a well-trained source-language neural network RE model can be directly applied to the target language. Experiment results show that the proposed approach achieves very good performance for a number of target languages on both in-house and open datasets, using a small bilingual dictionary with only 1K word pairs.</abstract>
       <url hash="d1e74e25">D19-1038</url>
@@ -648,7 +648,7 @@
     <paper id="50">
       <title>Linking artificial and human neural representations of language</title>
       <author><first>Jon</first><last>Gauthier</last></author>
-      <author><first>Roger</first><last>Levy</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
       <pages>529–539</pages>
       <abstract>What information from an act of sentence understanding is robustly represented in the human brain? We investigate this question by comparing sentence encoding models on a brain decoding task, where the sentence that an experimental participant has seen must be predicted from the fMRI signal evoked by the sentence. We take a pre-trained BERT architecture as a baseline sentence encoding model and fine-tune it on a variety of natural language understanding (NLU) tasks, asking which lead to improvements in brain-decoding performance. We find that none of the sentence encoding tasks tested yield significant increases in brain decoding performance. Through further task ablations and representational analyses, we find that tasks which produce syntax-light representations yield significant improvements in brain decoding performance. Our results constrain the space of NLU models that could best account for human neural representations of language, but also suggest limits on the possibility of decoding fine-grained syntactic information from fMRI human neuroimaging.</abstract>
       <url hash="f312b0b6">D19-1050</url>
@@ -672,10 +672,10 @@
     </paper>
     <paper id="52">
       <title>Neural data-to-text generation: A comparison between pipeline and end-to-end architectures</title>
-      <author><first>Thiago</first><last>Castro Ferreira</last></author>
+      <author id="thiago-castro-ferreira"><first>Thiago</first><last>Castro Ferreira</last></author>
       <author><first>Chris</first><last>van der Lee</last></author>
       <author><first>Emiel</first><last>van Miltenburg</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <pages>552–562</pages>
       <abstract>Traditionally, most data-to-text applications have been designed using a modular pipeline architecture, in which non-linguistic input data is converted into natural language through several intermediate transformations. By contrast, recent neural models for data-to-text generation have been proposed as end-to-end approaches, where the non-linguistic input is rendered in natural language with much less explicit intermediate representations in between. This study introduces a systematic comparison between neural pipeline and end-to-end data-to-text approaches for the generation of text from RDF triples. Both architectures were implemented making use of the encoder-decoder Gated-Recurrent Units (GRU) and Transformer, two state-of-the art deep learning methods. Automatic and human evaluations together with a qualitative analysis suggest that having explicit intermediate steps in the generation process results in better texts than the ones generated by end-to-end approaches. Moreover, the pipeline models generalize better to unseen inputs. Data and code are publicly available.</abstract>
       <url hash="a13e0f76">D19-1052</url>
@@ -800,7 +800,7 @@
       <author><first>Saachi</first><last>Jain</last></author>
       <author><first>Samuel</first><last>Humeau</last></author>
       <author><first>Emily</first><last>Dinan</last></author>
-      <author><first>Tim</first><last>Rocktäschel</last></author>
+      <author id="tim-rocktaschel"><first>Tim</first><last>Rocktäschel</last></author>
       <author><first>Douwe</first><last>Kiela</last></author>
       <author><first>Arthur</first><last>Szlam</last></author>
       <author><first>Jason</first><last>Weston</last></author>
@@ -814,7 +814,7 @@
     <paper id="63">
       <title>Help, Anna! Visual Navigation with Natural Multimodal Assistance via Retrospective Curiosity-Encouraging Imitation Learning</title>
       <author><first>Khanh</first><last>Nguyen</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <pages>684–695</pages>
       <abstract>Mobile agents that can leverage help from humans can potentially accomplish more complex tasks than they could entirely on their own. We develop “Help, Anna!” (HANNA), an interactive photo-realistic simulator in which an agent fulfills object-finding tasks by requesting and interpreting natural language-and-vision assistance. An agent solving tasks in a HANNA environment can leverage simulated human assistants, called ANNA (Automatic Natural Navigation Assistants), which, upon request, provide natural language and visual instructions to direct the agent towards the goals. To address the HANNA problem, we develop a memory-augmented neural agent that hierarchically models multiple levels of decision-making, and an imitation learning algorithm that teaches the agent to avoid repeating past mistakes while simultaneously predicting its own chances of making future progress. Empirically, our approach is able to ask for help more effectively than competitive baselines and, thus, attains higher task success rate on both previously seen and previously unseen environments.</abstract>
       <url hash="41a30a7b">D19-1063</url>
@@ -936,7 +936,7 @@
     <paper id="73">
       <title>Improving Back-Translation with Uncertainty-based Confidence Estimation</title>
       <author><first>Shuo</first><last>Wang</last></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <author><first>Chao</first><last>Wang</last></author>
       <author><first>Huanbo</first><last>Luan</last></author>
       <author><first>Maosong</first><last>Sun</last></author>
@@ -952,7 +952,7 @@
       <author><first>Jun</first><last>Xie</last></author>
       <author><first>Zhixing</first><last>Tan</last></author>
       <author><first>Jinsong</first><last>Su</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Lei</first><last>Li</last></author>
       <pages>803–812</pages>
       <abstract>In this study, we first investigate a novel capsule network with dynamic routing for linear time Neural Machine Translation (NMT), referred as CapsNMT. CapsNMT uses an aggregation mechanism to map the source sentence into a matrix with pre-determined size, and then applys a deep LSTM network to decode the target sequence from the source representation. Unlike the previous work (CITATION) to store the source sentence with a passive and bottom-up way, the dynamic routing policy encodes the source sentence with an iterative process to decide the credit attribution between nodes from lower and higher layers. CapsNMT has two core properties: it runs in time that is linear in the length of the sequences and provides a more flexible way to aggregate the part-whole information of the source sentence. On WMT14 English-German task and a larger WMT14 English-French task, CapsNMT achieves comparable results with the Transformer system. To the best of our knowledge, this is the first work that capsule networks have been empirically investigated for sequence to sequence problems.</abstract>
@@ -992,7 +992,7 @@
     <paper id="78">
       <title>Iterative Dual Domain Adaptation for Neural Machine Translation</title>
       <author><first>Jiali</first><last>Zeng</last></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <author><first>Jinsong</first><last>Su</last></author>
       <author><first>Yubing</first><last>Ge</last></author>
       <author><first>Yaojie</first><last>Lu</last></author>
@@ -1023,7 +1023,7 @@
       <author><first>Petre</first><last>Petrov</last></author>
       <author><first>Pavel</first><last>Petrushkov</last></author>
       <author><first>Shahram</first><last>Khadivi</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>866–876</pages>
       <abstract>We present effective pre-training strategies for neural machine translation (NMT) using parallel corpora involving a pivot language, i.e., source-pivot and pivot-target, leading to a significant improvement in source-target translation. We propose three methods to increase the relation among source, pivot, and target languages in the pre-training: 1) step-wise training of a single model for different language pairs, 2) additional adapter component to smoothly connect pre-trained encoder and decoder, and 3) cross-lingual encoder training via autoencoding of the pivot language. Our methods greatly outperform multilingual models up to +2.6% BLEU in WMT 2019 French-German and German-Czech tasks. We show that our improvements are valid also in zero-shot/zero-resource scenarios.</abstract>
       <url hash="fa27d683">D19-1080</url>
@@ -1094,8 +1094,8 @@
       <author><first>Zaixiang</first><last>Zheng</last></author>
       <author><first>Shujian</first><last>Huang</last></author>
       <author><first>Zhaopeng</first><last>Tu</last></author>
-      <author><first>Xin-Yu</first><last>Dai</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
+      <author id="xinyu-dai"><first>Xin-Yu</first><last>Dai</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
       <pages>931–941</pages>
       <abstract>Previous studies have shown that neural machine translation (NMT) models can benefit from explicitly modeling translated () and untranslated () source contents as recurrent states (CITATION). However, this less interpretable recurrent process hinders its power to model the dynamic updating of and contents during decoding. In this paper, we propose to model the <i>dynamic principles</i> by explicitly separating source words into groups of translated and untranslated contents through parts-to-wholes assignment. The assignment is learned through a novel variant of routing-by-agreement mechanism (CITATION), namely <i>Guided Dynamic Routing</i>, where the translating status at each decoding step <i>guides</i> the routing process to assign each source word to its associated group (i.e., translated or untranslated content) represented by a capsule, enabling translation to be made from holistic context. Experiments show that our approach achieves substantial improvements over both Rnmt and Transformer by producing more adequate translations. Extensive analysis demonstrates that our method is highly interpretable, which is able to recognize the translated and untranslated contents as expected.</abstract>
       <url hash="c8fde544">D19-1086</url>
@@ -1107,9 +1107,9 @@
       <title>Revisit Automatic Error Detection for Wrong and Missing Translation – A Supervised Approach</title>
       <author><first>Wenqiang</first><last>Lei</last></author>
       <author><first>Weiwen</first><last>Xu</last></author>
-      <author><first>Ai Ti</first><last>Aw</last></author>
+      <author id="aiti-aw"><first>Ai Ti</first><last>Aw</last></author>
       <author><first>Yuanxin</first><last>Xiang</last></author>
-      <author><first>Tat Seng</first><last>Chua</last></author>
+      <author id="tat-seng-chua"><first>Tat Seng</first><last>Chua</last></author>
       <pages>942–952</pages>
       <abstract>While achieving great fluency, current machine translation (MT) techniques are bottle-necked by adequacy issues. To have a closer study of these issues and accelerate model development, we propose automatic detecting adequacy errors in MT hypothesis for MT model evaluation. To do that, we annotate missing and wrong translations, the two most prevalent issues for current neural machine translation model, in 15000 Chinese-English translation pairs. We build a supervised alignment model for translation error detection (AlignDet) based on a simple Alignment Triangle strategy to set the benchmark for automatic error detection task. We also discuss the difficulties of this task and the benefits of this task for existing evaluation metrics.</abstract>
       <url hash="cc33f800">D19-1087</url>
@@ -1150,7 +1150,7 @@
       <title>Don’t Forget the Long Tail! A Comprehensive Analysis of Morphological Generalization in Bilingual Lexicon Induction</title>
       <author><first>Paula</first><last>Czarnowska</last></author>
       <author><first>Sebastian</first><last>Ruder</last></author>
-      <author><first>Edouard</first><last>Grave</last></author>
+      <author id="edouard-grave"><first>Edouard</first><last>Grave</last></author>
       <author><first>Ryan</first><last>Cotterell</last></author>
       <author><first>Ann</first><last>Copestake</last></author>
       <pages>974–983</pages>
@@ -1185,7 +1185,7 @@
       <title>Hierarchical Pointer Net Parsing</title>
       <author><first>Linlin</first><last>Liu</last></author>
       <author><first>Xiang</first><last>Lin</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <author><first>Simeng</first><last>Han</last></author>
       <author><first>Lidong</first><last>Bing</last></author>
       <pages>1007–1017</pages>
@@ -1210,7 +1210,7 @@
       <author><first>Zuyi</first><last>Bao</last></author>
       <author><first>Rui</first><last>Huang</last></author>
       <author><first>Chen</first><last>Li</last></author>
-      <author><first>Kenny</first><last>Zhu</last></author>
+      <author id="kenny-zhu"><first>Kenny</first><last>Zhu</last></author>
       <pages>1028–1039</pages>
       <abstract>Previous work on cross-lingual sequence labeling tasks either requires parallel data or bridges the two languages through word-by-word matching. Such requirements and assumptions are infeasible for most languages, especially for languages with large linguistic distances, e.g., English and Chinese. In this work, we propose a Multilingual Language Model with deep semantic Alignment (MLMA) to generate language-independent representations for cross-lingual sequence labeling. Our methods require only monolingual corpora with no bilingual resources at all and take advantage of deep contextualized representations. Experimental results show that our approach achieves new state-of-the-art NER and POS performance across European languages, and is also effective on distant language pairs such as English and Chinese.</abstract>
       <url hash="458a2386">D19-1095</url>
@@ -1225,7 +1225,7 @@
       <author><first>Minlong</first><last>Peng</last></author>
       <author><first>Jinlan</first><last>Fu</last></author>
       <author><first>Zhongyu</first><last>Wei</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>1040–1050</pages>
       <abstract>Recurrent neural networks (RNN) used for Chinese named entity recognition (NER) that sequentially track character and word information have achieved great success. However, the characteristic of chain structure and the lack of global semantics determine that RNN-based models are vulnerable to word ambiguities. In this work, we try to alleviate this problem by introducing a lexicon-based graph neural network with global semantics, in which lexicon knowledge is used to connect characters to capture the local composition, while a global relay node can capture global sentence semantics and long-range dependency. Based on the multiple graph-based interactions among characters, potential words, and the whole-sentence semantics, word ambiguities can be effectively tackled. Experiments on four NER datasets show that the proposed model achieves significant improvements against other baseline models.</abstract>
       <url hash="103f2ea3">D19-1096</url>
@@ -1239,7 +1239,7 @@
       <author><first>Jinchao</first><last>Zhang</last></author>
       <author><first>Jie</first><last>Zhou</last></author>
       <author><first>Yufeng</first><last>Chen</last></author>
-      <author><first>Jinan</first><last>Xu</last></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last></author>
       <pages>1051–1060</pages>
       <abstract>Spoken Language Understanding (SLU) mainly involves two tasks, intent detection and slot filling, which are generally modeled jointly in existing works. However, most existing models fail to fully utilize cooccurrence relations between slots and intents, which restricts their potential performance. To address this issue, in this paper we propose a novel Collaborative Memory Network (CM-Net) based on the well-designed block, named CM-block. The CM-block firstly captures slot-specific and intent-specific features from memories in a collaborative manner, and then uses these enriched features to enhance local context representations, based on which the sequential information flow leads to more specific (slot and intent) global utterance representations. Through stacking multiple CM-blocks, our CM-Net is able to alternately perform information exchange among specific memories, local contexts and the global utterance, and thus incrementally enriches each other. We evaluate the CM-Net on two standard benchmarks (ATIS and SNIPS) and a self-collected corpus (CAIS). Experimental results show that the CM-Net achieves the state-of-the-art results on the ATIS and SNIPS in most of criteria, and significantly outperforms the baseline models on the CAIS. Additionally, we make the CAIS dataset publicly available for the research community.</abstract>
       <url hash="8387639d">D19-1097</url>
@@ -1262,7 +1262,7 @@
     <paper id="99">
       <title>Semantic Role Labeling with Iterative Structure Refinement</title>
       <author><first>Chunchuan</first><last>Lyu</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <author><first>Ivan</first><last>Titov</last></author>
       <pages>1071–1082</pages>
       <abstract>Modern state-of-the-art Semantic Role Labeling (SRL) methods rely on expressive sentence encoders (e.g., multi-layer LSTMs) but tend to model only local (if any) interactions between individual argument labeling decisions. This contrasts with earlier work and also with the intuition that the labels of individual arguments are strongly interdependent. We model interactions between argument labeling decisions through iterative refinement. Starting with an output produced by a factorized model, we iteratively refine it using a refinement network. Instead of modeling arbitrary interactions among roles and words, we encode prior knowledge about the SRL problem by designing a restricted network architecture capturing non-local interactions. This modeling choice prevents overfitting and results in an effective model, outperforming strong factorized baseline models on all 7 CoNLL-2009 languages, and achieving state-of-the-art results on 5 of them, including English.</abstract>
@@ -1275,7 +1275,7 @@
       <title>Entity Projection via Machine Translation for Cross-Lingual <fixed-case>NER</fixed-case></title>
       <author><first>Alankar</first><last>Jain</last></author>
       <author><first>Bhargavi</first><last>Paranjape</last></author>
-      <author><first>Zachary C.</first><last>Lipton</last></author>
+      <author id="zachary-c-lipton"><first>Zachary C.</first><last>Lipton</last></author>
       <pages>1083–1092</pages>
       <abstract>Although over 100 languages are supported by strong off-the-shelf machine translation systems, only a subset of them possess large annotated corpora for named entity recognition. Motivated by this fact, we leverage machine translation to improve annotation-projection approaches to cross-lingual named entity recognition. We propose a system that improves over prior entity-projection methods by: (a) leveraging machine translation systems twice: first for translating sentences and subsequently for translating entities; (b) matching entities based on orthographic and phonetic similarity; and (c) identifying matches based on distributional statistics derived from the dataset. Our approach improves upon current state-of-the-art methods for cross-lingual named entity recognition on 5 diverse languages by an average of 4.1 points. Further, our method achieves state-of-the-art F_1 scores for Armenian, outperforming even a monolingual model trained on Armenian source data.</abstract>
       <url hash="6e6e818b">D19-1100</url>
@@ -1298,7 +1298,7 @@
       <title>A systematic comparison of methods for low-resource dependency parsing on genuinely low-resource languages</title>
       <author><first>Clara</first><last>Vania</last></author>
       <author><first>Yova</first><last>Kementchedjhieva</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <author><first>Adam</first><last>Lopez</last></author>
       <pages>1105–1116</pages>
       <abstract>Parsers are available for only a handful of the world’s languages, since they require lots of training data. How far can we get with just a small amount of training data? We systematically compare a set of simple strategies for improving low-resource parsers: data augmentation, which has not been tested before; cross-lingual training; and transliteration. Experimenting on three typologically diverse low-resource languages—North Sámi, Galician, and Kazah—We find that (1) when only the low-resource treebank is available, data augmentation is very helpful; (2) when a related high-resource treebank is available, cross-lingual training is helpful and complements data augmentation; and (3) when the high-resource treebank uses a different writing system, transliteration into a shared orthographic spaces is also very helpful.</abstract>
@@ -1324,7 +1324,7 @@
       <author><first>Zhichu</first><last>Lu</last></author>
       <author><first>Forough</first><last>Arabshahi</last></author>
       <author><first>Igor</first><last>Labutov</last></author>
-      <author><first>Tom</first><last>Mitchell</last></author>
+      <author id="tom-mitchell"><first>Tom</first><last>Mitchell</last></author>
       <pages>1129–1139</pages>
       <abstract>Computing devices have recently become capable of interacting with their end users via natural language. However, they can only operate within a limited “supported” domain of discourse and fail drastically when faced with an out-of-domain utterance, mainly due to the limitations of their semantic parser. In this paper, we propose a semantic parser that generalizes to out-of-domain examples by learning a general strategy for parsing an unseen utterance through adapting the logical forms of seen utterances, instead of learning to generate a logical form from scratch. Our parser maintains a memory consisting of a representative subset of the seen utterances paired with their logical forms. Given an unseen utterance, our parser works by looking up a similar utterance from the memory and adapting its logical form until it fits the unseen utterance. Moreover, we present a data generation strategy for constructing utterance-logical form pairs from different domains. Our results show an improvement of up to 68.8% on one-shot parsing under two different evaluation settings compared to the baselines.</abstract>
       <url hash="cf449d43">D19-1104</url>
@@ -1347,7 +1347,7 @@
     </paper>
     <paper id="106">
       <title>Variable beam search for generative neural parsing and its relevance for the analysis of neuro-imaging signal</title>
-      <author><first>Benoit</first><last>Crabbé</last></author>
+      <author id="benoit-crabbe"><first>Benoit</first><last>Crabbé</last></author>
       <author><first>Murielle</first><last>Fabre</last></author>
       <author><first>Christophe</first><last>Pallier</last></author>
       <pages>1150–1160</pages>
@@ -1370,7 +1370,7 @@
     </paper>
     <paper id="108">
       <title>Robust Text Classifier on Test-Time Budgets</title>
-      <author><first>Md Rizwan</first><last>Parvez</last></author>
+      <author id="md-rizwan-parvez"><first>Md Rizwan</first><last>Parvez</last></author>
       <author><first>Tolga</first><last>Bolukbasi</last></author>
       <author><first>Kai-Wei</first><last>Chang</last></author>
       <author><first>Venkatesh</first><last>Saligrama</last></author>
@@ -1385,7 +1385,7 @@
       <title>Commonsense Knowledge Mining from Pretrained Models</title>
       <author><first>Joe</first><last>Davison</last></author>
       <author><first>Joshua</first><last>Feldman</last></author>
-      <author><first>Alexander</first><last>Rush</last></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last></author>
       <pages>1173–1178</pages>
       <abstract>Inferring commonsense knowledge is a key challenge in machine learning. Due to the sparsity of training data, previous work has shown that supervised methods for commonsense knowledge mining underperform when evaluated on novel data. In this work, we develop a method for generating commonsense knowledge using a large, pre-trained bidirectional language model. By transforming relational triples into masked sentences, we can use this model to rank a triple’s validity by the estimated pointwise mutual information between the two entities. Since we do not update the weights of the bidirectional model, our approach is not biased by the coverage of any one commonsense knowledge base. Though we do worse on a held-out test set than models explicitly trained on a corresponding training set, our approach outperforms these methods when mining commonsense knowledge from new sources, suggesting that our unsupervised technique generalizes better than current supervised approaches.</abstract>
       <url hash="4c585805">D19-1109</url>
@@ -1398,7 +1398,7 @@
       <author><first>Jesse</first><last>Dodge</last></author>
       <author><first>Roy</first><last>Schwartz</last></author>
       <author><first>Hao</first><last>Peng</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>1179–1184</pages>
       <abstract>Neural models for NLP typically use large numbers of parameters to reach state-of-the-art performance, which can lead to excessive memory usage and increased runtime. We present a structure learning method for learning sparse, parameter-efficient NLP models. Our method applies group lasso to rational RNNs (Peng et al., 2018), a family of models that is closely connected to weighted finite-state automata (WFSAs). We take advantage of rational RNNs’ natural grouping of the weights, so the group lasso penalty directly removes WFSA states, substantially reducing the number of parameters in the model. Our experiments on a number of sentiment analysis datasets, using both GloVe and BERT embeddings, show that our approach learns neural structures which have fewer parameters without sacrificing performance relative to parameter-rich baselines. Our method also highlights the interpretable properties of rational RNNs. We show that sparsifying such models makes them easier to visualize, and we present models that rely exclusively on as few as three WFSAs after pruning more than 90% of the weights. We publicly release our code.</abstract>
       <url hash="9f42ed8e">D19-1110</url>
@@ -1409,7 +1409,7 @@
     <paper id="111">
       <title>Analytical Methods for Interpretable Ultradense Word Embeddings</title>
       <author><first>Philipp</first><last>Dufter</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>1185–1191</pages>
       <abstract>Word embeddings are useful for a wide variety of tasks, but they lack interpretability. By rotating word spaces, interpretable dimensions can be identified while preserving the information contained in the embeddings without any loss. In this work, we investigate three methods for making word spaces interpretable by rotation: Densifier (Rothe et al., 2016), linear SVMs and DensRay, a new method we propose. In contrast to Densifier, DensRay can be computed in closed form, is hyperparameter-free and thus more robust than Densifier. We evaluate the three methods on lexicon induction and set-based word analogy. In addition we provide qualitative insights as to how interpretable word spaces can be used for removing gender bias from embeddings.</abstract>
       <url hash="018bb981">D19-1111</url>
@@ -1458,7 +1458,7 @@
       <title>Neural Linguistic Steganography</title>
       <author><first>Zachary</first><last>Ziegler</last></author>
       <author><first>Yuntian</first><last>Deng</last></author>
-      <author><first>Alexander</first><last>Rush</last></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last></author>
       <pages>1210–1215</pages>
       <abstract>Whereas traditional cryptography encrypts a secret message into an unintelligible form, steganography conceals that communication is taking place by encoding a secret message into a cover signal. Language is a particularly pragmatic cover signal due to its benign occurrence and independence from any one medium. Traditionally, linguistic steganography systems encode secret messages in existing text via synonym substitution or word order rearrangements. Advances in neural language models enable previously impractical generation-based techniques. We propose a steganography technique based on arithmetic coding with large-scale neural language models. We find that our approach can generate realistic looking cover sentences as evaluated by humans, while at the same time preserving security by matching the cover message distribution with the language model distribution.</abstract>
       <url hash="8eca7995">D19-1115</url>
@@ -1480,7 +1480,7 @@
     <paper id="117">
       <title>Attention Optimization for Abstractive Document Summarization</title>
       <author><first>Min</first><last>Gui</last></author>
-      <author><first>Junfeng</first><last>Tian</last></author>
+      <author id="junfeng-tian"><first>Junfeng</first><last>Tian</last></author>
       <author><first>Rui</first><last>Wang</last></author>
       <author><first>Zhenglu</first><last>Yang</last></author>
       <pages>1222–1228</pages>
@@ -1498,7 +1498,7 @@
       <author><first>Chen</first><last>Qiu</last></author>
       <author><first>Anders</first><last>Sandholm</last></author>
       <author><first>Michael</first><last>Ringaard</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>1229–1235</pages>
       <abstract>Unresolved coreference is a bottleneck for relation extraction, and high-quality coreference resolvers may produce an output that makes it a lot easier to extract knowledge triples. We show how to improve coreference resolvers by forwarding their input to a relation extraction system and reward the resolvers for producing triples that are found in knowledge bases. Since relation extraction systems can rely on different forms of supervision and be biased in different ways, we obtain the best performance, improving over the state of the art, using multi-task reinforcement learning.</abstract>
       <url hash="698872a0">D19-1118</url>
@@ -1547,9 +1547,9 @@
       <title>Towards Extracting Medical Family History from Natural Language Interactions: A New Dataset and Baselines</title>
       <author><first>Mahmoud</first><last>Azab</last></author>
       <author><first>Stephane</first><last>Dadian</last></author>
-      <author><first>Vivi</first><last>Nastase</last></author>
+      <author id="vivi-nastase"><first>Vivi</first><last>Nastase</last></author>
       <author><first>Larry</first><last>An</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>1255–1260</pages>
       <abstract>We introduce a new dataset consisting of natural language interactions annotated with medical family histories, obtained during interactions with a genetic counselor and through crowdsourcing, following a questionnaire created by experts in the domain. We describe the data collection process and the annotations performed by medical professionals, including illness and personal attributes (name, age, gender, family relationships) for the patient and their family members. An initial system that performs argument identification and relation extraction shows promising results – average F-score of 0.87 on complex sentences on the targeted relations.</abstract>
       <url hash="1aae95b4">D19-1122</url>
@@ -1560,7 +1560,7 @@
       <title>Multi-task Learning for Natural Language Generation in Task-Oriented Dialogue</title>
       <author><first>Chenguang</first><last>Zhu</last></author>
       <author><first>Michael</first><last>Zeng</last></author>
-      <author><first>Xuedong</first><last>Huang</last></author>
+      <author id="xuedong-huang"><first>Xuedong</first><last>Huang</last></author>
       <pages>1261–1266</pages>
       <abstract>In task-oriented dialogues, Natural Language Generation (NLG) is the final yet crucial step to produce user-facing system utterances. The result of NLG is directly related to the perceived quality and usability of a dialogue system. While most existing systems provide semantically correct responses given goals to present, they struggle to match the variation and fluency in the human language. In this paper, we propose a novel multi-task learning framework, NLG-LM, for natural language generation. In addition to generating high-quality responses conveying the required information, it also explicitly targets for naturalness in generated responses via an unconditioned language model. This can significantly improve the learning of style and variation in human language. Empirical results show that this multi-task learning framework outperforms previous models across multiple datasets. For example, it improves the previous best BLEU score on the E2E-NLG dataset by 2.2%, and on the Laptop dataset by 6.1%.</abstract>
       <url hash="66959070">D19-1123</url>
@@ -1610,7 +1610,7 @@
       <author><first>Arshit</first><last>Gupta</last></author>
       <author><first>Peng</first><last>Zhang</last></author>
       <author><first>Garima</first><last>Lalwani</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>1285–1290</pages>
       <abstract>Natural Language Understanding (NLU) is a core component of dialog systems. It typically involves two tasks - Intent Classification (IC) and Slot Labeling (SL), which are then followed by a dialogue management (DM) component. Such NLU systems cater to utterances in isolation, thus pushing the problem of context management to DM. However, contextual information is critical to the correct prediction of intents in a conversation. Prior work on contextual NLU has been limited in terms of the types of contextual signals used and the understanding of their impact on the model. In this work, we propose a context-aware self-attentive NLU (CASA-NLU) model that uses multiple signals over a variable context window, such as previous intents, slots, dialog acts and utterances, in addition to the current user utterance. CASA-NLU outperforms a recurrent contextual NLU baseline on two conversational datasets, yielding a gain of up to 7% on the IC task. Moreover, a non-contextual variant of CASA-NLU achieves state-of-the-art performance on standard public datasets - SNIPS and ATIS.</abstract>
       <url hash="97e14081">D19-1127</url>
@@ -1637,7 +1637,7 @@
       <author><first>Zihan</first><last>Liu</last></author>
       <author><first>Jamin</first><last>Shin</last></author>
       <author><first>Yan</first><last>Xu</last></author>
-      <author><first>Genta Indra</first><last>Winata</last></author>
+      <author id="genta-indra-winata"><first>Genta Indra</first><last>Winata</last></author>
       <author><first>Peng</first><last>Xu</last></author>
       <author><first>Andrea</first><last>Madotto</last></author>
       <author><first>Pascale</first><last>Fung</last></author>
@@ -1663,7 +1663,7 @@
       <title>An Evaluation Dataset for Intent Classification and Out-of-Scope Prediction</title>
       <author><first>Stefan</first><last>Larson</last></author>
       <author><first>Anish</first><last>Mahendran</last></author>
-      <author><first>Joseph J.</first><last>Peper</last></author>
+      <author id="joseph-j-peper"><first>Joseph J.</first><last>Peper</last></author>
       <author><first>Christopher</first><last>Clarke</last></author>
       <author><first>Andrew</first><last>Lee</last></author>
       <author><first>Parker</first><last>Hill</last></author>
@@ -1693,7 +1693,7 @@
       <title>uniblock: Scoring and Filtering Corpus with <fixed-case>U</fixed-case>nicode Block Information</title>
       <author><first>Yingbo</first><last>Gao</last></author>
       <author><first>Weiyue</first><last>Wang</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>1324–1329</pages>
       <abstract>The preprocessing pipelines in Natural Language Processing usually involve a step of removing sentences consisted of illegal characters. The definition of illegal characters and the specific removal strategy depend on the task, language, domain, etc, which often lead to tiresome and repetitive scripting of rules. In this paper, we introduce a simple statistical method, uniblock, to overcome this problem. For each sentence, uniblock generates a fixed-size feature vector using Unicode block information of the characters. A Gaussian mixture model is then estimated on some clean corpus using variational inference. The learned model can then be used to score sentences and filter corpus. We present experimental results on Sentiment Analysis, Language Modeling and Machine Translation, and show the simplicity and effectiveness of our method.</abstract>
       <url hash="78dc81df">D19-1133</url>
@@ -1763,7 +1763,7 @@
       <author><first>Kehai</first><last>Chen</last></author>
       <author><first>Rui</first><last>Wang</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>1361–1367</pages>
       <abstract>In the Transformer network architecture, positional embeddings are used to encode order dependencies into the input representation. However, this input representation only involves static order dependencies based on discrete numerical information, that is, are independent of word content. To address this issue, this work proposes a recurrent positional embedding approach based on word vector. In this approach, these recurrent positional embeddings are learned by a recurrent neural network, encoding word content-based order dependencies into the input representation. They are then integrated into the existing multi-head self-attention model as independent heads or part of each head. The experimental results revealed that the proposed approach improved translation performance over that of the state-of-the-art Transformer baseline in WMT’14 English-to-German and NIST Chinese-to-English translation tasks.</abstract>
       <url hash="fd8a55af">D19-1139</url>
@@ -1774,7 +1774,7 @@
       <title>Machine Translation for Machines: the Sentiment Classification Use Case</title>
       <author><first>Amirhossein</first><last>Tebbifakhr</last></author>
       <author><first>Luisa</first><last>Bentivogli</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <pages>1368–1374</pages>
       <abstract>We propose a neural machine translation (NMT) approach that, instead of pursuing adequacy and fluency (“human-oriented” quality criteria), aims to generate translations that are best suited as input to a natural language processing component designed for a specific downstream task (a “machine-oriented” criterion). Towards this objective, we present a reinforcement learning technique based on a new candidate sampling strategy, which exploits the results obtained on the downstream task as weak feedback. Experiments in sentiment classification of Twitter data in German and Italian show that feeding an English classifier with “machine-oriented” translations significantly improves its performance. Classification results outperform those obtained with translations produced by general-purpose NMT models as well as by an approach based on reinforcement learning. Moreover, our results on both languages approximate the classification accuracy computed on gold standard English tweets.</abstract>
@@ -1906,7 +1906,7 @@
       <title>Efficient Convolutional Neural Networks for Diacritic Restoration</title>
       <author><first>Sawsan</first><last>Alqahtani</last></author>
       <author><first>Ajay</first><last>Mishra</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>1442–1448</pages>
       <abstract>Diacritic restoration has gained importance with the growing need for machines to understand written texts. The task is typically modeled as a sequence labeling problem and currently Bidirectional Long Short Term Memory (BiLSTM) models provide state-of-the-art results. Recently, Bai et al. (2018) show the advantages of Temporal Convolutional Neural Networks (TCN) over Recurrent Neural Networks (RNN) for sequence modeling in terms of performance and computational resources. As diacritic restoration benefits from both previous as well as subsequent timesteps, we further apply and evaluate a variant of TCN, Acausal TCN (A-TCN), which incorporates context from both directions (previous and future) rather than strictly incorporating previous context as in the case of TCN. A-TCN yields significant improvement over TCN for diacritization in three different languages: Arabic, Yoruba, and Vietnamese. Furthermore, A-TCN and BiLSTM have comparable performance, making A-TCN an efficient alternative over BiLSTM since convolutions can be trained in parallel. A-TCN is significantly faster than BiLSTM at inference time (270% 334% improvement in the amount of text diacritized per minute).</abstract>
       <url hash="ddaebe53">D19-1151</url>
@@ -1941,7 +1941,7 @@
       <title>Multi-Head Attention with Diversity for Learning Grounded Multilingual Multimodal Representations</title>
       <author><first>Po-Yao</first><last>Huang</last></author>
       <author><first>Xiaojun</first><last>Chang</last></author>
-      <author><first>Alexander</first><last>Hauptmann</last></author>
+      <author id="alexander-g-hauptmann"><first>Alexander</first><last>Hauptmann</last></author>
       <pages>1461–1467</pages>
       <abstract>With the aim of promoting and understanding the multilingual version of image search, we leverage visual object detection and propose a model with diverse multi-head attention to learn grounded multilingual multimodal representations. Specifically, our model attends to different types of textual semantics in two languages and visual objects for fine-grained alignments between sentences and images. We introduce a new objective function which explicitly encourages attention diversity to learn an improved visual-semantic embedding space. We evaluate our model in the German-Image and English-Image matching tasks on the Multi30K dataset, and in the Semantic Textual Similarity task with the English descriptions of visual content. Results show that our model yields a significant performance gain over other methods in all of the three tasks.</abstract>
       <url hash="dc72cb55">D19-1154</url>
@@ -1991,7 +1991,7 @@
       <title>Grounding learning of modifier dynamics: An application to color naming</title>
       <author><first>Xudong</first><last>Han</last></author>
       <author><first>Philip</first><last>Schulz</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>1488–1493</pages>
       <abstract>Grounding is crucial for natural language understanding. An important subtask is to understand modified color expressions, such as “light blue”. We present a model of color modifiers that, compared with previous additive models in RGB space, learns more complex transformations. In addition, we present a model that operates in the HSV color space. We show that certain adjectives are better modeled in that space. To account for all modifiers, we train a hard ensemble model that selects a color space depending on the modifier-color pair. Experimental results show significant and consistent improvements compared to the state-of-the-art baseline model.</abstract>
       <url hash="1d44f8bc">D19-1158</url>
@@ -2006,7 +2006,7 @@
       <author><first>Yonatan</first><last>Bisk</last></author>
       <author><first>Asli</first><last>Celikyilmaz</last></author>
       <author><first>Jianfeng</first><last>Gao</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <author><first>Yejin</first><last>Choi</last></author>
       <pages>1494–1499</pages>
       <abstract>Core to the vision-and-language navigation (VLN) challenge is building robust instruction representations and action decoding schemes, which can generalize well to previously unseen instructions and environments. In this paper, we report two simple but highly effective methods to address these challenges and lead to a new state-of-the-art performance. First, we adapt large-scale pretrained language models to learn text representations that generalize better to previously unseen instructions. Second, we propose a stochastic sampling scheme to reduce the considerable gap between the expert actions in training and sampled actions in test, so that the agent can learn to correct its own mistakes during long sequential action decoding. Combining the two techniques, we achieve a new state of the art on the Room-to-Room benchmark with 6% absolute gain over the previous best result (47% -&gt; 53%) on the Success Rate weighted by Path Length metric.</abstract>
@@ -2059,7 +2059,7 @@
       <author><first>Karishma</first><last>Mandyam</last></author>
       <author><first>Rushin</first><last>Shah</last></author>
       <author><first>Mike</first><last>Lewis</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>1520–1526</pages>
       <abstract>We propose a semantic parser for parsing compositional utterances into Task Oriented Parse (TOP), a tree representation that has intents and slots as labels of nesting tree nodes. Our parser is span-based: it scores labels of the tree nodes covering each token span independently, but then decodes a valid tree globally. In contrast to previous sequence decoding approaches and other span-based parsers, we (1) improve the training speed by removing the need to run the decoder at training time; and (2) introduce edge scores, which model relations between parent and child labels, to mitigate the independence assumption between node labels and improve accuracy. Our best parser outperforms previous methods on the TOP dataset of mixed-domain task-oriented utterances in both accuracy and training speed.</abstract>
       <url hash="1bacda3a">D19-1163</url>
@@ -2119,8 +2119,8 @@
       <title>Hierarchical Modeling of Global Context for Document-Level Neural Machine Translation</title>
       <author><first>Xin</first><last>Tan</last></author>
       <author><first>Longyin</first><last>Zhang</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>1576–1585</pages>
       <abstract>Document-level machine translation (MT) remains challenging due to the difficulty in efficiently using document context for translation. In this paper, we propose a hierarchical model to learn the global context for document-level neural machine translation (NMT). This is done through a sentence encoder to capture intra-sentence dependencies and a document encoder to model document-level inter-sentence consistency and coherence. With this hierarchical architecture, we feedback the extracted global document context to each word in a top-down fashion to distinguish different translations of a word according to its specific surrounding context. In addition, since large-scale in-domain document-level parallel corpora are usually unavailable, we use a two-step training strategy to take advantage of a large-scale corpus with out-of-domain parallel sentence pairs and a small-scale corpus with in-domain parallel document pairs to achieve the domain adaptability. Experimental results on several benchmark corpora show that our proposed model can significantly improve document-level translation performance over several strong NMT baselines.</abstract>
       <url hash="86b41714">D19-1168</url>
@@ -2146,7 +2146,7 @@
       <author><first>Minghao</first><last>Hu</last></author>
       <author><first>Yuxing</first><last>Peng</last></author>
       <author><first>Zhen</first><last>Huang</last></author>
-      <author id="dongsheng-li"><first>Dongsheng</first><last>Li</last></author>
+      <author><first>Dongsheng</first><last>Li</last></author>
       <pages>1596–1606</pages>
       <abstract>Rapid progress has been made in the field of reading comprehension and question answering, where several systems have achieved human parity in some simplified settings. However, the performance of these models degrades significantly when they are applied to more realistic scenarios, such as answers involve various types, multiple text strings are correct answers, or discrete reasoning abilities are required. In this paper, we introduce the Multi-Type Multi-Span Network (MTMSN), a neural reading comprehension model that combines a multi-type answer predictor designed to support various answer types (e.g., span, count, negation, and arithmetic expression) with a multi-span extraction method for dynamically producing one or multiple text spans. In addition, an arithmetic expression reranking mechanism is proposed to rank expression candidates for further confirming the prediction. Experiments show that our model achieves 79.9 F1 on the DROP hidden test set, creating new state-of-the-art results. Source code (<url>https://github.com/huminghao16/MTMSN</url>) is released to facilitate future work.</abstract>
       <url hash="418fd78d">D19-1170</url>
@@ -2157,7 +2157,7 @@
     <paper id="171">
       <title>Neural Duplicate Question Detection without Labeled Training Data</title>
       <author><first>Andreas</first><last>Rücklé</last></author>
-      <author><first>Nafise Sadat</first><last>Moosavi</last></author>
+      <author id="nafise-sadat-moosavi"><first>Nafise Sadat</first><last>Moosavi</last></author>
       <author><first>Iryna</first><last>Gurevych</last></author>
       <pages>1607–1617</pages>
       <abstract>Supervised training of neural models to duplicate question detection in community Question Answering (CQA) requires large amounts of labeled question pairs, which can be costly to obtain. To minimize this cost, recent works thus often used alternative methods, e.g., adversarial domain adaptation. In this work, we propose two novel methods—weak supervision using the title and body of a question, and the automatic generation of duplicate questions—and show that both can achieve improved performances even though they do not require any labeled data. We provide a comparison of popular training strategies and show that our proposed approaches are more effective in many cases because they can utilize larger amounts of data from the CQA forums. Finally, we show that weak supervision with question title and body information is also an effective method to train CQA answer selection models without direct answer supervision.</abstract>
@@ -2185,7 +2185,7 @@
     <paper id="173">
       <title>Multi-View Domain Adapted Sentence Embeddings for Low-Resource Unsupervised Duplicate Question Detection</title>
       <author><first>Nina</first><last>Poerner</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>1630–1641</pages>
       <abstract>We address the problem of Duplicate Question Detection (DQD) in low-resource domain-specific Community Question Answering forums. Our multi-view framework MV-DASE combines an ensemble of sentence encoders via Generalized Canonical Correlation Analysis, using unlabeled data only. In our experiments, the ensemble includes generic and domain-specific averaged word embeddings, domain-finetuned BERT and the Universal Sentence Encoder. We evaluate MV-DASE on the CQADupStack corpus and on additional low-resource Stack Exchange forums. Combining the strengths of different encoders, we significantly outperform BM25, all single-view systems as well as a recent supervised domain-adversarial DQD method.</abstract>
       <url hash="adf81a15">D19-1173</url>
@@ -2212,7 +2212,7 @@
     <paper id="175">
       <title>The Trumpiest Trump? Identifying a Subject’s Most Characteristic Tweets</title>
       <author><first>Charuta</first><last>Pethe</last></author>
-      <author><first>Steve</first><last>Skiena</last></author>
+      <author id="steven-skiena"><first>Steve</first><last>Skiena</last></author>
       <pages>1653–1663</pages>
       <abstract>The sequence of documents produced by any given author varies in style and content, but some documents are more typical or representative of the source than others. We quantify the extent to which a given short text is characteristic of a specific person, using a dataset of tweets from fifteen celebrities. Such analysis is useful for generating excerpts of high-volume Twitter profiles, and understanding how representativeness relates to tweet popularity. We first consider the related task of binary author detection (is x the author of text T?), and report a test accuracy of 90.37% for the best of five approaches to this problem. We then use these models to compute characterization scores among all of an author’s texts. A user study shows human evaluators agree with our characterization model for all 15 celebrities in our dataset, each with p-value &lt; 0.05. We use these classifiers to show surprisingly strong correlations between characterization scores and the popularity of the associated texts. Indeed, we demonstrate a statistically significant correlation between this score and tweet popularity (likes/replies/retweets) for 13 of the 15 celebrities in our study.</abstract>
       <url hash="37121add">D19-1175</url>
@@ -2258,7 +2258,7 @@
       <title>(Male, Bachelor) and (Female, <fixed-case>P</fixed-case>h.<fixed-case>D</fixed-case>) have different connotations: Parallelly Annotated Stylistic Language Dataset with Multiple Personas</title>
       <author><first>Dongyeop</first><last>Kang</last></author>
       <author><first>Varun</first><last>Gangal</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>1696–1706</pages>
       <abstract>Stylistic variation in text needs to be studied with different aspects including the writer’s personal traits, interpersonal relations, rhetoric, and more. Despite recent attempts on computational modeling of the variation, the lack of parallel corpora of style language makes it difficult to systematically control the stylistic change as well as evaluate such models. We release PASTEL, the parallel and annotated stylistic language dataset, that contains ~41K parallel sentences (8.3K parallel stories) annotated across different personas. Each persona has different styles in conjunction: gender, age, country, political view, education, ethnic, and time-of-writing. The dataset is collected from human annotators with solid control of input denotation: not only preserving original meaning between text, but promoting stylistic diversity to annotators. We test the dataset on two interesting applications of style language, where PASTEL helps design appropriate experiment and evaluation. First, in predicting a target style (e.g., male or female in gender) given a text, multiple styles of PASTEL make other external style variables controlled (or fixed), which is a more accurate experimental design. Second, a simple supervised model with our parallel text outperforms the unsupervised models using nonparallel text in style transfer. Our dataset is publicly available.</abstract>
       <url hash="e5460084">D19-1179</url>
@@ -2295,9 +2295,9 @@
     </paper>
     <paper id="182">
       <title>Deep Ordinal Regression for Pledge Specificity Prediction</title>
-      <author><first>Shivashankar</first><last>Subramanian</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="shivashankar-subramanian"><first>Shivashankar</first><last>Subramanian</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>1729–1740</pages>
       <abstract>Many pledges are made in the course of an election campaign, forming important corpora for political analysis of campaign strategy and governmental accountability. At present, there are no publicly available annotated datasets of pledges, and most political analyses rely on manual annotations. In this paper we collate a novel dataset of manifestos from eleven Australian federal election cycles, with over 12,000 sentences annotated with specificity (e.g., rhetorical vs detailed pledge) on a fine-grained scale. We propose deep ordinal regression approaches for specificity prediction, under both supervised and semi-supervised settings, and provide empirical results demonstrating the effectiveness of the proposed techniques over several baseline approaches. We analyze the utility of pledge specificity modeling across a spectrum of policy issues in performing ideology prediction, and further provide qualitative analysis in terms of capturing party-specific issue salience across election cycles.</abstract>
       <url hash="0689b7ce">D19-1182</url>
@@ -2307,7 +2307,7 @@
     <paper id="183">
       <title>Data-Efficient Goal-Oriented Conversation with Dialogue Knowledge Transfer Networks</title>
       <author><first>Igor</first><last>Shalyminov</last></author>
-      <author><first>Sungjin</first><last>Lee</last></author>
+      <author id="sungjin-lee"><first>Sungjin</first><last>Lee</last></author>
       <author><first>Arash</first><last>Eshghi</last></author>
       <author><first>Oliver</first><last>Lemon</last></author>
       <pages>1741–1751</pages>
@@ -2332,7 +2332,7 @@
       <author><first>Weikang</first><last>Wang</last></author>
       <author><first>Jiajun</first><last>Zhang</last></author>
       <author><first>Qian</first><last>Li</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <author><first>Zhifei</first><last>Li</last></author>
       <pages>1762–1771</pages>
       <abstract>Identity fraud detection is of great importance in many real-world scenarios such as the financial industry. However, few studies addressed this problem before. In this paper, we focus on identity fraud detection in loan applications and propose to solve this problem with a novel interactive dialogue system which consists of two modules. One is the knowledge graph (KG) constructor organizing the personal information for each loan applicant. The other is structured dialogue management that can dynamically generate a series of questions based on the personal KG to ask the applicants and determine their identity states. We also present a heuristic user simulator based on problem analysis to evaluate our method. Experiments have shown that the trainable dialogue system can effectively detect fraudsters, and achieve higher recognition accuracy compared with rule-based systems. Furthermore, our learned dialogue strategies are interpretable and flexible, which can help promote real-world applications.</abstract>
@@ -2354,7 +2354,7 @@
     <paper id="187">
       <title>Knowledge Aware Conversation Generation with Explainable Reasoning over Augmented Graphs</title>
       <author><first>Zhibin</first><last>Liu</last></author>
-      <author><first>Zheng-Yu</first><last>Niu</last></author>
+      <author id="zheng-yu-niu"><first>Zheng-Yu</first><last>Niu</last></author>
       <author><first>Hua</first><last>Wu</last></author>
       <author><first>Haifeng</first><last>Wang</last></author>
       <pages>1782–1792</pages>
@@ -2396,11 +2396,11 @@
       <title>Structuring Latent Spaces for Stylized Response Generation</title>
       <author><first>Xiang</first><last>Gao</last></author>
       <author><first>Yizhe</first><last>Zhang</last></author>
-      <author><first>Sungjin</first><last>Lee</last></author>
+      <author id="sungjin-lee"><first>Sungjin</first><last>Lee</last></author>
       <author><first>Michel</first><last>Galley</last></author>
       <author><first>Chris</first><last>Brockett</last></author>
       <author><first>Jianfeng</first><last>Gao</last></author>
-      <author><first>Bill</first><last>Dolan</last></author>
+      <author id="william-b-dolan"><first>Bill</first><last>Dolan</last></author>
       <pages>1814–1823</pages>
       <abstract>Generating responses in a targeted style is a useful yet challenging task, especially in the absence of parallel data. With limited data, existing methods tend to generate responses that are either less stylized or less context-relevant. We propose StyleFusion, which bridges conversation modeling and non-parallel style transfer by sharing a structured latent space. This structure allows the system to generate stylized relevant responses by sampling in the neighborhood of the conversation model prediction, and continuously control the style level. We demonstrate this method using dialogues from Reddit data and two sets of sentences with distinct styles (arXiv and Sherlock Holmes novels). Automatic and human evaluation show that, without sacrificing appropriateness, the system generates responses of the targeted style and outperforms competitive baselines.</abstract>
       <url hash="e5bac854">D19-1190</url>
@@ -2501,8 +2501,8 @@
       <author><first>Jun</first><last>Gao</last></author>
       <author><first>Wei</first><last>Bi</last></author>
       <author><first>Xiaojiang</first><last>Liu</last></author>
-      <author><first>Junhui</first><last>Li</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="junhui-li"><first>Junhui</first><last>Li</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <author><first>Shuming</first><last>Shi</last></author>
       <pages>1898–1908</pages>
       <abstract>Neural conversation models such as encoder-decoder models are easy to generate bland and generic responses. Some researchers propose to use the conditional variational autoencoder (CVAE) which maximizes the lower bound on the conditional log-likelihood on a continuous latent variable. With different sampled latent variables, the model is expected to generate diverse responses. Although the CVAE-based models have shown tremendous potential, their improvement of generating high-quality responses is still unsatisfactory. In this paper, we introduce a discrete latent variable with an explicit semantic meaning to improve the CVAE on short-text conversation. A major advantage of our model is that we can exploit the semantic distance between the latent variables to maintain good diversity between the sampled latent variables. Accordingly, we propose a two-stage sampling approach to enable efficient diverse variable selection from a large latent space assumed in the short-text conversation task. Experimental results indicate that our model outperforms various kinds of generation models under both automatic and human evaluations and generates more diverse and informative responses.</abstract>
@@ -2558,13 +2558,13 @@
     <paper id="202">
       <title>Variational Hierarchical User-based Conversation Model</title>
       <author><first>JinYeong</first><last>Bak</last></author>
-      <author><first>Alice</first><last>Oh</last></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last></author>
       <pages>1941–1950</pages>
       <abstract>Generating appropriate conversation responses requires careful modeling of the utterances and speakers together. Some recent approaches to response generation model both the utterances and the speakers, but these approaches tend to generate responses that are overly tailored to the speakers. To overcome this limitation, we propose a new model with a stochastic variable designed to capture the speaker information and deliver it to the conversational context. An important part of this model is the network of speakers in which each speaker is connected to one or more conversational partner, and this network is then used to model the speakers better. To test whether our model generates more appropriate conversation responses, we build a new conversation corpus containing approximately 27,000 speakers and 770,000 conversations. With this corpus, we run experiments of generating conversational responses and compare our model with other state-of-the-art models. By automatic evaluation metrics and human evaluation, we show that our model outperforms other models in generating appropriate responses. An additional advantage of our model is that it generates better responses for various new user scenarios, for example when one of the speakers is a known user in our corpus but the partner is a new user. For replicability, we make available all our code and data.</abstract>
       <url hash="60a1fa96">D19-1202</url>
       <attachment hash="4bbb4c6c">D19-1202.Attachment.pdf</attachment>
-      <doi>10.18653/v1/D19-1202</doi>
       <attachment type="poster" hash="c14726ff">D19-1202.Poster.pdf</attachment>
+      <doi>10.18653/v1/D19-1202</doi>
       <bibkey>bak-oh-2019-variational</bibkey>
     </paper>
     <paper id="203">
@@ -2572,7 +2572,7 @@
       <author><first>Dongyeop</first><last>Kang</last></author>
       <author><first>Anusha</first><last>Balakrishnan</last></author>
       <author><first>Pararth</first><last>Shah</last></author>
-      <author><first>Paul</first><last>Crook</last></author>
+      <author id="paul-a-crook"><first>Paul</first><last>Crook</last></author>
       <author><first>Y-Lan</first><last>Boureau</last></author>
       <author><first>Jason</first><last>Weston</last></author>
       <pages>1951–1961</pages>
@@ -2590,7 +2590,7 @@
       <author><first>Suyi</first><last>Li</last></author>
       <author><first>Eric</first><last>Xue</last></author>
       <author><first>Bo</first><last>Pang</last></author>
-      <author><first>Xi Victoria</first><last>Lin</last></author>
+      <author id="xi-victoria-lin"><first>Xi Victoria</first><last>Lin</last></author>
       <author><first>Yi Chern</first><last>Tan</last></author>
       <author><first>Tianze</first><last>Shi</last></author>
       <author><first>Zihan</first><last>Li</last></author>
@@ -2598,7 +2598,7 @@
       <author><first>Michihiro</first><last>Yasunaga</last></author>
       <author><first>Sungrok</first><last>Shim</last></author>
       <author><first>Tao</first><last>Chen</last></author>
-      <author><first>Alexander</first><last>Fabbri</last></author>
+      <author id="alexander-richard-fabbri"><first>Alexander</first><last>Fabbri</last></author>
       <author><first>Zifan</first><last>Li</last></author>
       <author><first>Luyao</first><last>Chen</last></author>
       <author><first>Yuwen</first><last>Zhang</last></author>
@@ -2606,8 +2606,8 @@
       <author><first>Vincent</first><last>Zhang</last></author>
       <author><first>Caiming</first><last>Xiong</last></author>
       <author><first>Richard</first><last>Socher</last></author>
-      <author><first>Walter</first><last>Lasecki</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="walter-lasecki"><first>Walter</first><last>Lasecki</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <pages>1962–1979</pages>
       <abstract>We present CoSQL, a corpus for building cross-domain, general-purpose database (DB) querying dialogue systems. It consists of 30k+ turns plus 10k+ annotated SQL queries, obtained from a Wizard-of-Oz (WOZ) collection of 3k dialogues querying 200 complex DBs spanning 138 domains. Each dialogue simulates a real-world DB query scenario with a crowd worker as a user exploring the DB and a SQL expert retrieving answers with SQL, clarifying ambiguous questions, or otherwise informing of unanswerable questions. When user questions are answerable by SQL, the expert describes the SQL and execution results to the user, hence maintaining a natural interaction flow. CoSQL introduces new challenges compared to existing task-oriented dialogue datasets: (1) the dialogue states are grounded in SQL, a domain-independent executable representation, instead of domain-specific slot value pairs, and (2) because testing is done on unseen databases, success requires generalizing to new domains. CoSQL includes three tasks: SQL-grounded dialogue state tracking, response generation from query results, and user dialogue act prediction. We evaluate a set of strong baselines for each task and show that CoSQL presents significant challenges for future research. The dataset, baselines, and leaderboard will be released at <url>https://yale-lily.github.io/cosql</url>.</abstract>
       <url hash="06a139de">D19-1204</url>
@@ -2618,7 +2618,7 @@
       <title>A Practical Dialogue-Act-Driven Conversation Model for Multi-Turn Response Selection</title>
       <author id="harshit-kumar"><first>Harshit</first><last>Kumar</last></author>
       <author><first>Arvind</first><last>Agarwal</last></author>
-      <author><first>Sachindra</first><last>Joshi</last></author>
+      <author id="sachindra-joshi"><first>Sachindra</first><last>Joshi</last></author>
       <pages>1980–1989</pages>
       <abstract>Dialogue Acts play an important role in conversation modeling. Research has shown the utility of dialogue acts for the response selection task, however, the underlying assumption is that the dialogue acts are readily available, which is impractical, as dialogue acts are rarely available for new conversations. This paper proposes an end-to-end multi-task model for conversation modeling, which is optimized for two tasks, dialogue act prediction and response selection, with the latter being the task of interest. It proposes a novel way of combining the predicted dialogue acts of context and response with the context (previous utterances) and response (follow-up utterance) in a crossway fashion, such that, it achieves at par performance for the response selection task compared to the model that uses actual dialogue acts. Through experiments on two well known datasets, we demonstrate that the multi-task model not only improves the accuracy of the dialogue act prediction task but also improves the MRR for the response selection task. Also, the cross-stitching of dialogue acts of context and response with the context and response is better than using either one of them individually.</abstract>
       <url hash="699a940c">D19-1205</url>
@@ -2706,7 +2706,7 @@
       <author><first>Nikos</first><last>Papasarantopoulos</last></author>
       <author><first>Lea</first><last>Frermann</last></author>
       <author><first>Mirella</first><last>Lapata</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <pages>2057–2067</pages>
       <abstract>Multi-view learning algorithms are powerful representation learning tools, often exploited in the context of multimodal problems. However, for problems requiring inference at the token-level of a sequence (that is, a separate prediction must be made for every time step), it is often the case that single-view systems are used, or that more than one views are fused in a simple manner. We describe an incremental neural architecture paired with a novel training objective for incremental inference. The network operates on multi-view data. We demonstrate the effectiveness of our approach on the problem of predicting perpetrators in crime drama series, for which our model significantly outperforms previous work and strong baselines. Moreover, we introduce two tasks, crime case and speaker type tagging, that contribute to movie understanding and demonstrate the effectiveness of our model on them.</abstract>
       <url hash="c409458b">D19-1212</url>
@@ -2745,7 +2745,7 @@
       <author><first>Simon</first><last>Vandenhende</last></author>
       <author><first>Dusan</first><last>Grujicic</last></author>
       <author><first>Luc</first><last>Van Gool</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>2088–2098</pages>
       <abstract>A long-term goal of artificial intelligence is to have an agent execute commands communicated through natural language. In many cases the commands are grounded in a visual environment shared by the human who gives the command and the agent. Execution of the command then requires mapping the command into the physical visual space, after which the appropriate action can be taken. In this paper we consider the former. Or more specifically, we consider the problem in an autonomous driving setting, where a passenger requests an action that can be associated with an object found in a street scene. Our work presents the Talk2Car dataset, which is the first object referral dataset that contains commands written in natural language for self-driving cars. We provide a detailed comparison with related datasets such as ReferIt, RefCOCO, RefCOCO+, RefCOCOg, Cityscape-Ref and CLEVR-Ref. Additionally, we include a performance analysis using strong state-of-the-art models. The results show that the proposed object referral task is a challenging one for which the models show promising results but still require additional research in natural language processing, computer vision and the intersection of these fields. The dataset can be found on our website: <url>http://macchina-ai.eu/</url></abstract>
       <url hash="4e2cdcd9">D19-1215</url>
@@ -2756,7 +2756,7 @@
     <paper id="216">
       <title>Fact-Checking Meets Fauxtography: Verifying Claims About Images</title>
       <author><first>Dimitrina</first><last>Zlatkova</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Ivan</first><last>Koychev</last></author>
       <pages>2099–2108</pages>
       <abstract>The recent explosion of false claims in social media and on the Web in general has given rise to a lot of manual fact-checking initiatives. Unfortunately, the number of claims that need to be fact-checked is several orders of magnitude larger than what humans can handle manually. Thus, there has been a lot of research aiming at automating the process. Interestingly, previous work has largely ignored the growing number of claims about images. This is despite the fact that visual imagery is more influential than text and naturally appears alongside fake news. Here we aim at bridging this gap. In particular, we create a new dataset for this problem, and we explore a variety of features modeling the claim, the image, and the relationship between the claim and the image. The evaluation results show sizable improvements over the baseline. We release our dataset, hoping to enable further research on fact-checking claims about images.</abstract>
@@ -2801,7 +2801,7 @@
       <title>Fusion of Detected Objects in Text for Visual Question Answering</title>
       <author><first>Chris</first><last>Alberti</last></author>
       <author><first>Jeffrey</first><last>Ling</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <author><first>David</first><last>Reitter</last></author>
       <pages>2131–2140</pages>
       <abstract>To advance models of multimodal context, we introduce a simple yet powerful neural architecture for data that combines vision and natural language. The “Bounding Boxes in Text Transformer” (B2T2) also leverages referential information binding words to portions of the image in a single unified architecture. B2T2 is highly effective on the Visual Commonsense Reasoning benchmark, achieving a new state-of-the-art with a 25% relative reduction in error rate compared to published baselines and obtaining the best performance to date on the public leaderboard (as of May 22, 2019). A detailed ablation analysis shows that the early integration of the visual features into the text analysis is key to the effectiveness of the new architecture. A reference implementation of our models is provided.</abstract>
@@ -2854,7 +2854,7 @@
       <title>Adaptively Sparse Transformers</title>
       <author><first>Gonçalo M.</first><last>Correia</last></author>
       <author><first>Vlad</first><last>Niculae</last></author>
-      <author><first>André F. T.</first><last>Martins</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
       <pages>2174–2184</pages>
       <abstract>Attention mechanisms have become ubiquitous in NLP. Recent architectures, notably the Transformer, learn powerful context-aware word representations through layered, multi-headed attention. The multiple heads learn diverse types of word relationships. However, with standard softmax attention, all attention heads are dense, assigning a non-zero weight to all context words. In this work, we introduce the adaptively sparse Transformer, wherein attention heads have flexible, context-dependent sparsity patterns. This sparsity is accomplished by replacing softmax with alpha-entmax: a differentiable generalization of softmax that allows low-scoring words to receive precisely zero weight. Moreover, we derive a method to automatically learn the alpha parameter – which controls the shape and sparsity of alpha-entmax – allowing attention heads to choose between focused or spread-out behavior. Our adaptively sparse Transformer improves interpretability and head diversity when compared to softmax Transformers on machine translation datasets. Findings of the quantitative and qualitative analysis of our approach include that heads in different layers learn different sparsity preferences and tend to be more diverse in their attention distributions than softmax Transformers. Furthermore, at no cost in accuracy, sparsity in attention heads helps to uncover different head specializations.</abstract>
       <url hash="76c7e96c">D19-1223</url>
@@ -2868,7 +2868,7 @@
       <author><first>Suchin</first><last>Gururangan</last></author>
       <author><first>Dallas</first><last>Card</last></author>
       <author><first>Roy</first><last>Schwartz</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>2185–2194</pages>
       <abstract>Research in natural language processing proceeds, in part, by demonstrating that new models achieve superior performance (e.g., accuracy) on held-out test data, compared to previous results. In this paper, we demonstrate that test-set performance scores alone are insufficient for drawing accurate conclusions about which model performs best. We argue for reporting additional details, especially performance on validation data obtained during model development. We present a novel technique for doing so: expected validation performance of the best-found model as a function of computation budget (i.e., the number of hyperparameter search trials or the overall training time). Using our approach, we find multiple recent model comparisons where authors would have reached a different conclusion if they had used more (or less) computation. Our approach also allows us to estimate the amount of computation required to obtain a given accuracy; applying it to several recently published results yields massive variation across papers, from hours to weeks. We conclude with a set of best practices for reporting experimental results which allow for robust future comparisons, and provide code to allow researchers to use our technique.</abstract>
       <url hash="a00b17ca">D19-1224</url>
@@ -2928,7 +2928,7 @@
     </paper>
     <paper id="229">
       <title>Modeling Color Terminology Across Thousands of Languages</title>
-      <author><first>Arya D.</first><last>McCarthy</last></author>
+      <author id="arya-d-mccarthy"><first>Arya D.</first><last>McCarthy</last></author>
       <author><first>Winston</first><last>Wu</last></author>
       <author><first>Aaron</first><last>Mueller</last></author>
       <author><first>William</first><last>Watson</last></author>
@@ -2945,9 +2945,9 @@
       <author><first>Longxiang</first><last>Shen</last></author>
       <author><first>Bowei</first><last>Zou</last></author>
       <author><first>Yu</first><last>Hong</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
-      <author><first>Qiaoming</first><last>Zhu</last></author>
-      <author><first>AiTi</first><last>Aw</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last></author>
+      <author id="aiti-aw"><first>AiTi</first><last>Aw</last></author>
       <pages>2251–2261</pages>
       <abstract>Negation is a universal but complicated linguistic phenomenon, which has received considerable attention from the NLP community over the last decade, since a negated statement often carries both an explicit negative focus and implicit positive meanings. For the sake of understanding a negated statement, it is critical to precisely detect the negative focus in context. However, how to capture contextual information for negative focus detection is still an open challenge. To well address this, we come up with an attention-based neural network to model contextual information. In particular, we introduce a framework which consists of a Bidirectional Long Short-Term Memory (BiLSTM) neural network and a Conditional Random Fields (CRF) layer to effectively encode the order information and the long-range context dependency in a sentence. Moreover, we design two types of attention mechanisms, word-level contextual attention and topic-level contextual attention, to take advantage of contextual information across sentences from both the word perspective and the topic perspective, respectively. Experimental results on the SEM’12 shared task corpus show that our approach achieves the best performance on negative focus detection, yielding an absolute improvement of 2.11% over the state-of-the-art. This demonstrates the great effectiveness of the two types of contextual attention mechanisms.</abstract>
       <url hash="b7226b2a">D19-1230</url>
@@ -2957,8 +2957,8 @@
     <paper id="231">
       <title>A Unified Neural Coherence Model</title>
       <author><first>Han Cheol</first><last>Moon</last></author>
-      <author><first>Tasnim</first><last>Mohiuddin</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="muhammad-tasnim-mohiuddin"><first>Tasnim</first><last>Mohiuddin</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <author><first>Chi</first><last>Xu</last></author>
       <pages>2262–2272</pages>
       <abstract>Recently, neural approaches to coherence modeling have achieved state-of-the-art results in several evaluation tasks. However, we show that most of these models often fail on harder tasks with more realistic application scenarios. In particular, the existing models underperform on tasks that require the model to be sensitive to local contexts such as candidate ranking in conversational dialogue and in machine translation. In this paper, we propose a unified coherence model that incorporates sentence grammar, inter-sentence coherence relations, and global coherence patterns into a common neural framework. With extensive experiments on local and global discrimination tasks, we demonstrate that our proposed model outperforms existing models by a good margin, and establish a new state-of-the-art.</abstract>
@@ -2996,7 +2996,7 @@
       <author><first>Sonia</first><last>Badene</last></author>
       <author><first>Kate</first><last>Thompson</last></author>
       <author><first>Jean-Pierre</first><last>Lorré</last></author>
-      <author><first>Nicholas</first><last>Asher</last></author>
+      <author id="nicholas-asher"><first>Nicholas</first><last>Asher</last></author>
       <pages>2296–2305</pages>
       <abstract>This paper provides a detailed comparison of a data programming approach with (i) off-the-shelf, state-of-the-art deep learning architectures that optimize their representations (BERT) and (ii) handcrafted-feature approaches previously used in the discourse analysis literature. We compare these approaches on the task of learning discourse structure for multi-party dialogue. The data programming paradigm offered by the Snorkel framework allows a user to label training data using expert-composed heuristics, which are then transformed via the “generative step” into probability distributions of the class labels given the data. We show that on our task the generative model outperforms both deep learning architectures as well as more traditional ML approaches when learning discourse structure—it even outperforms the combination of deep learning methods and hand-crafted features. We also implement several strategies for “decoding” our generative model output in order to improve our results. We conclude that weak supervision methods hold great promise as a means for creating and improving data sets for discourse structure.</abstract>
       <url hash="8c619117">D19-1234</url>
@@ -3017,7 +3017,7 @@
       <title>The Myth of Double-Blind Review Revisited: <fixed-case>ACL</fixed-case> vs. <fixed-case>EMNLP</fixed-case></title>
       <author><first>Cornelia</first><last>Caragea</last></author>
       <author><first>Ana</first><last>Uban</last></author>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <pages>2317–2327</pages>
       <abstract>The review and selection process for scientific paper publication is essential for the quality of scholarly publications in a scientific field. The double-blind review system, which enforces author anonymity during the review period, is widely used by prestigious conferences and journals to ensure the integrity of this process. Although the notion of anonymity in the double-blind review has been questioned before, the availability of full text paper collections brings new opportunities for exploring the question: Is the double-blind review process really double-blind? We study this question on the ACL and EMNLP paper collections and present an analysis on how well deep learning techniques can infer the authors of a paper. Specifically, we explore Convolutional Neural Networks trained on various aspects of a paper, e.g., content, style features, and references, to understand the extent to which we can infer the authors of a paper and what aspects contribute the most. Our results show that the authors of a paper can be inferred with accuracy as high as 87% on ACL and 78% on EMNLP for the top 100 most prolific authors.</abstract>
       <url hash="89bfee27">D19-1236</url>
@@ -3095,7 +3095,7 @@
       <title><fixed-case>P</fixed-case>ull<fixed-case>N</fixed-case>et: Open Domain Question Answering with Iterative Retrieval on Knowledge Bases and Text</title>
       <author><first>Haitian</first><last>Sun</last></author>
       <author><first>Tania</first><last>Bedrax-Weiss</last></author>
-      <author><first>William</first><last>Cohen</last></author>
+      <author id="william-cohen"><first>William</first><last>Cohen</last></author>
       <pages>2380–2390</pages>
       <abstract>We consider open-domain question answering (QA) where answers are drawn from either a corpus, a knowledge base (KB), or a combination of both of these. We focus on a setting in which a corpus is supplemented with a large but incomplete KB, and on questions that require non-trivial (e.g., “multi-hop”) reasoning. We describe PullNet, an integrated framework for (1) learning what to retrieve and (2) reasoning with this heterogeneous information to find the best answer. PullNet uses an <fixed-case>iterative</fixed-case> process to construct a question-specific subgraph that contains information relevant to the question. In each iteration, a graph convolutional network (graph CNN) is used to identify subgraph nodes that should be expanded using retrieval (or “pull”) operations on the corpus and/or KB. After the subgraph is complete, another graph CNN is used to extract the answer from the subgraph. This retrieve-and-reason process allows us to answer multi-hop questions using large KBs and corpora. PullNet is weakly supervised, requiring question-answer pairs but not gold inference paths. Experimentally PullNet improves over the prior state-of-the art, and in the setting where a corpus is used with incomplete KB these improvements are often dramatic. PullNet is also often superior to prior systems in a KB-only setting or a text-only setting.</abstract>
       <url hash="cd514d9e">D19-1242</url>
@@ -3187,7 +3187,7 @@
     <paper id="249">
       <title><fixed-case>B</fixed-case>i<fixed-case>P</fixed-case>a<fixed-case>R</fixed-case>: A Bilingual Parallel Dataset for Multilingual and Cross-lingual Reading Comprehension on Novels</title>
       <author><first>Yimin</first><last>Jing</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Zhen</first><last>Yan</last></author>
       <pages>2452–2462</pages>
       <abstract>This paper presents BiPaR, a bilingual parallel novel-style machine reading comprehension (MRC) dataset, developed to support multilingual and cross-lingual reading comprehension. The biggest difference between BiPaR and existing reading comprehension datasets is that each triple (Passage, Question, Answer) in BiPaR is written parallelly in two languages. We collect 3,667 bilingual parallel paragraphs from Chinese and English novels, from which we construct 14,668 parallel question-answer pairs via crowdsourced workers following a strict quality control procedure. We analyze BiPaR in depth and find that BiPaR offers good diversification in prefixes of questions, answer types and relationships between questions and passages. We also observe that answering questions of novels requires reading comprehension skills of coreference resolution, multi-sentence reasoning, and understanding of implicit causality, etc. With BiPaR, we build monolingual, multilingual, and cross-lingual MRC baseline models. Even for the relatively simple monolingual MRC on this dataset, experiments show that a strong BERT baseline is over 30 points behind human in terms of both EM and F1 score, indicating that BiPaR provides a challenging testbed for monolingual, multilingual and cross-lingual MRC on novels. The dataset is available at <url>https://multinlp.github.io/BiPaR/</url>.</abstract>
@@ -3198,7 +3198,7 @@
     <paper id="250">
       <title>Language Models as Knowledge Bases?</title>
       <author><first>Fabio</first><last>Petroni</last></author>
-      <author><first>Tim</first><last>Rocktäschel</last></author>
+      <author id="tim-rocktaschel"><first>Tim</first><last>Rocktäschel</last></author>
       <author><first>Sebastian</first><last>Riedel</last></author>
       <author><first>Patrick</first><last>Lewis</last></author>
       <author><first>Anton</first><last>Bakhtin</last></author>
@@ -3315,7 +3315,7 @@
       <author><first>Qiao</first><last>Jin</last></author>
       <author><first>Bhuwan</first><last>Dhingra</last></author>
       <author><first>Zhengping</first><last>Liu</last></author>
-      <author><first>William</first><last>Cohen</last></author>
+      <author id="william-cohen"><first>William</first><last>Cohen</last></author>
       <author><first>Xinghua</first><last>Lu</last></author>
       <pages>2567–2577</pages>
       <abstract>We introduce PubMedQA, a novel biomedical question answering (QA) dataset collected from PubMed abstracts. The task of PubMedQA is to answer research questions with yes/no/maybe (e.g.: Do preoperative statins reduce atrial fibrillation after coronary artery bypass grafting?) using the corresponding abstracts. PubMedQA has 1k expert-annotated, 61.2k unlabeled and 211.3k artificially generated QA instances. Each PubMedQA instance is composed of (1) a question which is either an existing research article title or derived from one, (2) a context which is the corresponding abstract without its conclusion, (3) a long answer, which is the conclusion of the abstract and, presumably, answers the research question, and (4) a yes/no/maybe answer which summarizes the conclusion. PubMedQA is the first QA dataset where reasoning over biomedical research texts, especially their quantitative contents, is required to answer the questions. Our best performing model, multi-phase fine-tuning of BioBERT with long answer bag-of-word statistics as additional supervision, achieves 68.1% accuracy, compared to single human performance of 78.0% accuracy and majority-baseline of 55.2% accuracy, leaving much room for improvement. PubMedQA is publicly available at <url>https://pubmedqa.github.io</url>.</abstract>
@@ -3340,7 +3340,7 @@
       <author><first>Xiaowen</first><last>Lin</last></author>
       <author><first>Leo</first><last>Mehr</last></author>
       <author><first>Zijian</first><last>Wang</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>2590–2602</pages>
       <abstract>It is challenging for current one-step retrieve-and-read question answering (QA) systems to answer questions like “Which novel by the author of ‘Armada’ will be adapted as a feature film by Steven Spielberg?” because the question seldom contains retrievable clues about the missing entity (here, the author). Answering such a question requires multi-hop reasoning where one must gather information about the missing entity (or facts) to proceed with further reasoning. We present GoldEn (Gold Entity) Retriever, which iterates between reading context and retrieving more supporting documents to answer open-domain multi-hop questions. Instead of using opaque and computationally expensive neural retrieval models, GoldEn Retriever generates natural language search queries given the question and available context, and leverages off-the-shelf information retrieval systems to query for missing entities. This allows GoldEn Retriever to scale up efficiently for open-domain multi-hop reasoning while maintaining interpretability. We evaluate GoldEn Retriever on the recently proposed open-domain multi-hop QA dataset, HotpotQA, and demonstrate that it outperforms the best previously published model despite not using pretrained language models such as BERT.</abstract>
       <url hash="737e9c7d">D19-1261</url>
@@ -3374,7 +3374,7 @@
     </paper>
     <paper id="264">
       <title>Incorporating Graph Attention Mechanism into Knowledge Graph Reasoning Based on Deep Reinforcement Learning</title>
-      <author id="heng-wang"><first>Heng</first><last>Wang</last></author>
+      <author><first>Heng</first><last>Wang</last></author>
       <author><first>Shuangyin</first><last>Li</last></author>
       <author><first>Rong</first><last>Pan</last></author>
       <author><first>Mingzhi</first><last>Mao</last></author>
@@ -3410,7 +3410,7 @@
       <title>Original Semantics-Oriented Attention and Deep Fusion Network for Sentence Matching</title>
       <author><first>Mingtong</first><last>Liu</last></author>
       <author><first>Yujie</first><last>Zhang</last></author>
-      <author><first>Jinan</first><last>Xu</last></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last></author>
       <author><first>Yufeng</first><last>Chen</last></author>
       <pages>2652–2661</pages>
       <abstract>Sentence matching is a key issue in natural language inference and paraphrase identification. Despite the recent progress on multi-layered neural network with cross sentence attention, one sentence learns attention to the intermediate representations of another sentence, which are propagated from preceding layers and therefore are uncertain and unstable for matching, particularly at the risk of error propagation. In this paper, we present an original semantics-oriented attention and deep fusion network (OSOA-DFN) for sentence matching. Unlike existing models, each attention layer of OSOA-DFN is oriented to the original semantic representation of another sentence, which captures the relevant information from a fixed matching target. The multiple attention layers allow one sentence to repeatedly read the important information of another sentence for better matching. We then additionally design deep fusion to propagate the attention information at each matching layer. At last, we introduce a self-attention mechanism to capture global context to enhance attention-aware representation within each sentence. Experiment results on three sentence matching benchmark datasets SNLI, SciTail and Quora show that OSOA-DFN has the ability to model sentence matching more precisely.</abstract>
@@ -3462,7 +3462,7 @@
       <author><first>Di</first><last>Liang</last></author>
       <author><first>Fubao</first><last>Zhang</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>2692–2700</pages>
       <abstract>Natural language inference aims to predict whether a premise sentence can infer another hypothesis sentence. Existing methods typically have framed the reasoning problem as a semantic matching task. The both sentences are encoded and interacted symmetrically and in parallel. However, in the process of reasoning, the role of the two sentences is obviously different, and the sentence pairs for NLI are asymmetrical corpora. In this paper, we propose an asynchronous deep interaction network (ADIN) to complete the task. ADIN is a neural network structure stacked with multiple inference sub-layers, and each sub-layer consists of two local inference modules in an asymmetrical manner. Different from previous methods, this model deconstructs the reasoning process and implements the asynchronous and multi-step reasoning. Experiment results show that ADIN achieves competitive performance and outperforms strong baselines on three popular benchmarks: SNLI, MultiNLI, and SciTail.</abstract>
       <url hash="6d52b8f0">D19-1271</url>
@@ -3499,7 +3499,7 @@
       <author><first>Lei</first><last>Hou</last></author>
       <author><first>Jiaxin</first><last>Shi</last></author>
       <author><first>Juanzi</first><last>Li</last></author>
-      <author><first>Tat-Seng</first><last>Chua</last></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last></author>
       <pages>2723–2732</pages>
       <abstract>Entity alignment aims at integrating complementary knowledge graphs (KGs) from different sources or languages, which may benefit many knowledge-driven applications. It is challenging due to the heterogeneity of KGs and limited seed alignments. In this paper, we propose a semi-supervised entity alignment method by joint Knowledge Embedding model and Cross-Graph model (KECG). It can make better use of seed alignments to propagate over the entire graphs with KG-based constraints. Specifically, as for the knowledge embedding model, we utilize TransE to implicitly complete two KGs towards consistency and learn relational constraints between entities. As for the cross-graph model, we extend Graph Attention Network (GAT) with projection constraint to robustly encode graphs, and two KGs share the same GAT to transfer structural knowledge as well as to ignore unimportant neighbors for alignment via attention mechanism. Results on publicly available datasets as well as further analysis demonstrate the effectiveness of KECG. Our codes can be found in https: //github.com/THU-KEG/KECG.</abstract>
       <url hash="4a52dd90">D19-1274</url>
@@ -3520,7 +3520,7 @@
     <paper id="276">
       <title>Specializing Word Embeddings (for Parsing) by Information Bottleneck</title>
       <author><first>Xiang Lisa</first><last>Li</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>2744–2754</pages>
       <abstract>Pre-trained word embeddings like ELMo and BERT contain rich syntactic and semantic information, resulting in state-of-the-art performance on various tasks. We propose a very fast variational information bottleneck (VIB) method to nonlinearly compress these embeddings, keeping only the information that helps a discriminative parser. We compress each word embedding to either a discrete tag or a continuous vector. In the discrete version, our automatically compressed tags form an alternative tag set: we show experimentally that our tags capture most of the information in traditional POS tag annotations, but our tag sequences can be parsed more accurately at the same level of tag granularity. In the continuous version, we show experimentally that moderately compressing the word embeddings by our method yields a more accurate parser in 8 of 9 languages, unlike simple dimensionality reduction.</abstract>
       <url hash="97f7139a">D19-1276</url>
@@ -3574,7 +3574,7 @@
       <author><first>Marc-Alexandre</first><last>Côté</last></author>
       <author><first>Jie</first><last>Fu</last></author>
       <author><first>Zhouhan</first><last>Lin</last></author>
-      <author><first>Chris</first><last>Pal</last></author>
+      <author id="christopher-pal"><first>Chris</first><last>Pal</last></author>
       <author><first>Yoshua</first><last>Bengio</last></author>
       <author><first>Adam</first><last>Trischler</last></author>
       <pages>2796–2813</pages>
@@ -3598,7 +3598,7 @@
     </paper>
     <paper id="282">
       <title><fixed-case>K</fixed-case>ag<fixed-case>N</fixed-case>et: Knowledge-Aware Graph Networks for Commonsense Reasoning</title>
-      <author><first>Bill Yuchen</first><last>Lin</last></author>
+      <author id="bill-yuchen-lin"><first>Bill Yuchen</first><last>Lin</last></author>
       <author><first>Xinyue</first><last>Chen</last></author>
       <author><first>Jamin</first><last>Chen</last></author>
       <author><first>Xiang</first><last>Ren</last></author>
@@ -3627,7 +3627,7 @@
       <author><first>Sewon</first><last>Min</last></author>
       <author><first>Danqi</first><last>Chen</last></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>2851–2864</pages>
       <abstract>Many question answering (QA) tasks only provide weak supervision for how the answer should be computed. For example, TriviaQA answers are entities that can be mentioned multiple times in supporting documents, while DROP answers can be computed by deriving many different equations from numbers in the reference text. In this paper, we show it is possible to convert such tasks into discrete latent variable learning problems with a precomputed, task-specific set of possible solutions (e.g. different mentions or equations) that contains one correct option. We then develop a hard EM learning scheme that computes gradients relative to the most likely solution at each update. Despite its simplicity, we show that this approach significantly outperforms previous methods on six QA tasks, including absolute gains of 2–10%, and achieves the state-of-the-art on five of them. Using hard updates instead of maximizing marginal likelihood is key to these results as it encourages the model to find the one correct answer, which we show through detailed qualitative analysis.</abstract>
       <url hash="ffa534ed">D19-1284</url>
@@ -3637,7 +3637,7 @@
     <paper id="285">
       <title>Is the Red Square Big? <fixed-case>MAL</fixed-case>e<fixed-case>V</fixed-case>i<fixed-case>C</fixed-case>: Modeling Adjectives Leveraging Visual Contexts</title>
       <author><first>Sandro</first><last>Pezzelle</last></author>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <pages>2865–2876</pages>
       <abstract>This work aims at modeling how the meaning of gradable adjectives of size (‘big’, ‘small’) can be learned from visually-grounded contexts. Inspired by cognitive and linguistic evidence showing that the use of these expressions relies on setting a threshold that is dependent on a specific context, we investigate the ability of multi-modal models in assessing whether an object is ‘big’ or ‘small’ in a given visual scene. In contrast with the standard computational approach that simplistically treats gradable adjectives as ‘fixed’ attributes, we pose the problem as relational: to be successful, a model has to consider the full visual context. By means of four main tasks, we show that state-of-the-art models (but not a relatively strong baseline) can learn the function subtending the meaning of size adjectives, though their performance is found to decrease while moving from simple to more complex tasks. Crucially, models fail in developing abstract representations of gradable adjectives that can be used compositionally.</abstract>
       <url hash="79266a97">D19-1285</url>
@@ -3661,7 +3661,7 @@
       <author><first>Anhad</first><last>Mohananey</last></author>
       <author><first>Phu Mon</first><last>Htut</last></author>
       <author><first>Paloma</first><last>Jeretic</last></author>
-      <author><first>Samuel R.</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel R.</first><last>Bowman</last></author>
       <pages>2877–2887</pages>
       <abstract>Though state-of-the-art sentence representation models can perform tasks requiring significant knowledge of grammar, it is an open question how best to evaluate their grammatical knowledge. We explore five experimental methods inspired by prior work evaluating pretrained sentence representation models. We use a single linguistic phenomenon, negative polarity item (NPI) licensing, as a case study for our experiments. NPIs like any are grammatical only if they appear in a licensing environment like negation (Sue doesn’t have any cats vs. *Sue has any cats). This phenomenon is challenging because of the variety of NPI licensing environments that exist. We introduce an artificially generated dataset that manipulates key features of NPI licensing for the experiments. We find that BERT has significant knowledge of these features, but its success varies widely across different experimental methods. We conclude that a variety of methods is necessary to reveal all relevant aspects of a model’s grammatical knowledge in a given domain.</abstract>
       <url hash="697a8501">D19-1286</url>
@@ -3674,7 +3674,7 @@
       <author><first>Aixiu</first><last>An</last></author>
       <author><first>Peng</first><last>Qian</last></author>
       <author><first>Ethan</first><last>Wilcox</last></author>
-      <author><first>Roger</first><last>Levy</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
       <pages>2888–2899</pages>
       <abstract>Neural language models have achieved state-of-the-art performances on many NLP tasks, and recently have been shown to learn a number of hierarchically-sensitive syntactic dependencies between individual words. However, equally important for language processing is the ability to combine words into phrasal constituents, and use constituent-level features to drive downstream expectations. Here we investigate neural models’ ability to represent constituent-level features, using coordinated noun phrases as a case study. We assess whether different neural language models trained on English and French represent phrase-level number and gender features, and use those features to drive downstream expectations. Our results suggest that models use a linear combination of NP constituent number to drive CoordNP/verb number agreement. This behavior is highly regular and even sensitive to local syntactic context, however it differs crucially from observed human behavior. Models have less success with gender agreement. Models trained on large corpora perform best, and there is no obvious advantage for models trained using explicit syntactic supervision.</abstract>
       <url hash="e0f707bc">D19-1287</url>
@@ -3726,7 +3726,7 @@
       <author><first>Tuhin</first><last>Chakrabarty</last></author>
       <author><first>Christopher</first><last>Hidey</last></author>
       <author><first>Smaranda</first><last>Muresan</last></author>
-      <author><first>Kathy</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathy</first><last>McKeown</last></author>
       <author><first>Alyssa</first><last>Hwang</last></author>
       <pages>2933–2943</pages>
       <abstract>Argumentation is a type of discourse where speakers try to persuade their audience about the reasonableness of a claim by presenting supportive arguments. Most work in argument mining has focused on modeling arguments in monologues. We propose a computational model for argument mining in online persuasive discussion forums that brings together the micro-level (argument as product) and macro-level (argument as process) models of argumentation. Fundamentally, this approach relies on identifying relations between components of arguments in a discussion thread. Our approach for relation prediction uses contextual information in terms of fine-tuning a pre-trained language model and leveraging discourse relations based on Rhetorical Structure Theory. We additionally propose a candidate selection method to automatically predict what parts of one’s argument will be targeted by other participants in the discussion. Our models obtain significant improvements compared to recent state-of-the-art approaches using pointer networks and a pre-trained language model.</abstract>
@@ -3752,7 +3752,7 @@
       <author><first>Wonsuk</first><last>Yang</last></author>
       <author><first>Seungwon</first><last>Yoon</last></author>
       <author><first>Ada</first><last>Carpenter</last></author>
-      <author><first>Jong</first><last>Park</last></author>
+      <author id="jong-c-park"><first>Jong</first><last>Park</last></author>
       <pages>2954–2963</pages>
       <abstract>Annotation quality control is a critical aspect for building reliable corpora through linguistic annotation. In this study, we present a simple but powerful quality control method using two-step reason selection. We gathered sentential annotations of local acceptance and three related attributes through a crowdsourcing platform. For each attribute, the reason for the choice of the attribute value is selected in a two-step manner. The options given for reason selection were designed to facilitate the detection of a nonsensical reason selection. We assume that a sentential annotation that contains a nonsensical reason is less reliable than the one without such reason. Our method, based solely on this assumption, is found to retain the annotations with satisfactory quality out of the entire annotations mixed with those of low quality.</abstract>
       <url hash="4e049941">D19-1293</url>
@@ -3763,9 +3763,9 @@
     <paper id="294">
       <title>Evaluating Pronominal Anaphora in Machine Translation: An Evaluation Measure and a Test Suite</title>
       <author><first>Prathyusha</first><last>Jwalapuram</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <author><first>Irina</first><last>Temnikova</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>2964–2975</pages>
       <abstract>The ongoing neural revolution in machine translation has made it easier to model larger contexts beyond the sentence-level, which can potentially help resolve some discourse-level ambiguities such as pronominal anaphora, thus enabling better translations. Unfortunately, even when the resulting improvements are seen as substantial by humans, they remain virtually unnoticed by traditional automatic evaluation measures like BLEU, as only a few words end up being affected. Thus, specialized evaluation measures are needed. With this aim in mind, we contribute an extensive, targeted dataset that can be used as a test suite for pronoun translation, covering multiple source languages and different pronoun errors drawn from real system translations, for English. We further propose an evaluation measure to differentiate good and bad pronoun translations. We also conduct a user study to report correlations with human judgments.</abstract>
       <url hash="bdb5eb6c">D19-1294</url>
@@ -3797,7 +3797,7 @@
       <author><first>Junjie</first><last>Li</last></author>
       <author><first>Xuepeng</first><last>Wang</last></author>
       <author><first>Dawei</first><last>Yin</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>3000–3010</pages>
       <abstract>Review summarization aims to generate a condensed summary for a review or multiple reviews. Existing review summarization systems mainly generate summary only based on review content and neglect the authors’ attributes (e.g., gender, age, and occupation). In fact, when summarizing a review, users with different attributes usually pay attention to specific aspects and have their own word-using habits or writing styles. Therefore, we propose an Attribute-aware Sequence Network (ASN) to take the aforementioned users’ characteristics into account, which includes three modules: an attribute encoder encodes the attribute preferences over the words; an attribute-aware review encoder adopts an attribute-based selective mechanism to select the important information of a review; and an attribute-aware summary decoder incorporates attribute embedding and attribute-specific word-using habits into word prediction. To validate our model, we collect a new dataset TripAtt, comprising 495,440 attribute-review-summary triplets with three kinds of attribute information: gender, age, and travel status. Extensive experiments show that ASN achieves state-of-the-art performance on review summarization in both auto-metric ROUGE and human evaluation.</abstract>
       <url hash="65ecadec">D19-1297</url>
@@ -3822,7 +3822,7 @@
       <author><first>Xiaocheng</first><last>Feng</last></author>
       <author><first>Feng</first><last>Jiang</last></author>
       <author><first>Bing</first><last>Qin</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <pages>3022–3032</pages>
       <abstract>Recent neural models for data-to-text generation rely on massive parallel pairs of data and text to learn the writing knowledge. They often assume that writing knowledge can be acquired from the training data alone. However, when people are writing, they not only rely on the data but also consider related knowledge. In this paper, we enhance neural data-to-text models with external knowledge in a simple but effective way to improve the fidelity of generated text. Besides relying on parallel data and text as in previous work, our model attends to relevant external knowledge, encoded as a temporary memory, and combines this knowledge with the context representation of data before generating words. This allows the model to infer relevant facts which are not explicitly stated in the data table from an external knowledge source. Experimental results on twenty-one Wikipedia infobox-to-text datasets show our model, KBAtt, consistently improves a state-of-the-art model on most of the datasets. In addition, to quantify when and why external knowledge is effective, we design a metric, KBGain, which shows a strong correlation with the observed performance boost. This result demonstrates the relevance of external knowledge and sparseness of original data are the main factors affecting system performance.</abstract>
       <url hash="39823ccc">D19-1299</url>
@@ -3866,7 +3866,7 @@
       <author><first>Yu</first><last>Zhou</last></author>
       <author><first>Jiajun</first><last>Zhang</last></author>
       <author><first>Shaonan</first><last>Wang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>3054–3064</pages>
       <abstract>Cross-lingual summarization (CLS) is the task to produce a summary in one particular language for a source document in a different language. Existing methods simply divide this task into two steps: summarization and translation, leading to the problem of error propagation. To handle that, we present an end-to-end CLS framework, which we refer to as Neural Cross-Lingual Summarization (NCLS), for the first time. Moreover, we propose to further improve NCLS by incorporating two related tasks, monolingual summarization and machine translation, into the training process of CLS under multi-task learning. Due to the lack of supervised CLS data, we propose a round-trip translation strategy to acquire two high-quality large-scale CLS datasets based on existing monolingual summarization datasets. Experimental results have shown that our NCLS achieves remarkable improvement over traditional pipeline methods on both English-to-Chinese and Chinese-to-English CLS human-corrected test sets. In addition, NCLS with multi-task learning can further significantly improve the quality of generated summaries. We make our dataset and code publicly available here: <url>http://www.nlpr.ia.ac.cn/cip/dataset.htm</url>.</abstract>
       <url hash="9560f001">D19-1302</url>
@@ -3889,7 +3889,7 @@
       <title>Concept Pointer Network for Abstractive Summarization</title>
       <author><first>Wenbo</first><last>Wang</last></author>
       <author><first>Yang</first><last>Gao</last></author>
-      <author><first>Heyan</first><last>Huang</last></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last></author>
       <author><first>Yuxiang</first><last>Zhou</last></author>
       <pages>3076–3085</pages>
       <abstract>A quality abstractive summary should not only copy salient source texts as summaries but should also tend to generate new conceptual words to express concrete details. Inspired by the popular pointer generator sequence-to-sequence model, this paper presents a concept pointer network for improving these aspects of abstractive summarization. The network leverages knowledge-based, context-aware conceptualizations to derive an extended set of candidate concepts. The model then points to the most appropriate choice using both the concept set and original source text. This joint approach generates abstractive summaries with higher-level semantic concepts. The training model is also optimized in a way that adapts to different data, which is based on a novel method of distant-supervised learning guided by reference summaries and testing set. Overall, the proposed approach provides statistically significant improvements over several state-of-the-art models on both the DUC-2004 and Gigaword datasets. A human evaluation of the model’s abstractive abilities also supports the quality of the summaries produced within this framework.</abstract>
@@ -3990,7 +3990,7 @@
     <paper id="312">
       <title>Referring Expression Generation Using Entity Profiles</title>
       <author><first>Meng</first><last>Cao</last></author>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <pages>3163–3172</pages>
       <abstract>Referring Expression Generation (REG) is the task of generating contextually appropriate references to entities. A limitation of existing REG systems is that they rely on entity-specific supervised training, which means that they cannot handle entities not seen during training. In this study, we address this in two ways. First, we propose task setups in which we specifically test a REG system’s ability to generalize to entities not seen during training. Second, we propose a profile-based deep neural network model, ProfileREG, which encodes both the local context and an external profile of the entity to generate reference realizations. Our model generates tokens by learning to choose between generating pronouns, generating from a fixed vocabulary, or copying a word from the profile. We evaluate our model on three different splits of the WebNLG dataset, and show that it outperforms competitive baselines in all settings according to automatic and human evaluations.</abstract>
       <url hash="436b8a12">D19-1312</url>
@@ -4001,7 +4001,7 @@
       <title>Exploring Diverse Expressions for Paraphrase Generation</title>
       <author><first>Lihua</first><last>Qian</last></author>
       <author><first>Lin</first><last>Qiu</last></author>
-      <author><first>Weinan</first><last>Zhang</last></author>
+      <author id="weinan-zhang"><first>Weinan</first><last>Zhang</last></author>
       <author><first>Xin</first><last>Jiang</last></author>
       <author><first>Yong</first><last>Yu</last></author>
       <pages>3173–3182</pages>
@@ -4062,7 +4062,7 @@
     <paper id="318">
       <title>Deep Copycat Networks for Text-to-Text Generation</title>
       <author><first>Julia</first><last>Ive</last></author>
-      <author><first>Pranava</first><last>Madhyastha</last></author>
+      <author id="pranava-swaroop-madhyastha"><first>Pranava</first><last>Madhyastha</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>3227–3236</pages>
       <abstract>Most text-to-text generation tasks, for example text summarisation and text simplification, require copying words from the input to the output. We introduce Copycat, a transformer-based pointer network for such tasks which obtains competitive results in abstractive text summarisation and generates more abstractive summaries. We propose a further extension of this architecture for automatic post-editing, where generation is conditioned over two inputs (source language and machine translation), and the model is capable of deciding where to copy information from. This approach achieves competitive performance when compared to state-of-the-art automated post-editing systems. More importantly, we show that it addresses a well-known limitation of automatic post-editing - overcorrecting translations - and that our novel mechanism for copying source language words improves the results.</abstract>
@@ -4148,7 +4148,7 @@
       <author><first>Zhe</first><last>Gan</last></author>
       <author><first>Yu</first><last>Cheng</last></author>
       <author><first>Chris</first><last>Brockett</last></author>
-      <author><first>Bill</first><last>Dolan</last></author>
+      <author id="william-b-dolan"><first>Bill</first><last>Dolan</last></author>
       <author><first>Ming-Ting</first><last>Sun</last></author>
       <pages>3304–3313</pages>
       <abstract>Text style transfer without parallel data has achieved some practical success. However, in the scenario where less data is available, these methods may yield poor performance. In this paper, we examine domain adaptation for text style transfer to leverage massively available data from other domains. These data may demonstrate domain shift, which impedes the benefits of utilizing such data for training. To address this challenge, we propose simple yet effective domain adaptive text style transfer models, enabling domain-adaptive information exchange. The proposed models presumably learn from the source domain to: (i) distinguish stylized information and generic content information; (ii) maximally preserve content information; and (iii) adaptively transfer the styles in a domain-aware manner. We evaluate the proposed models on two style transfer tasks (sentiment and formality) over multiple target domains where only limited non-parallel data is available. Extensive experiments demonstrate the effectiveness of the proposed model compared to the baselines.</abstract>
@@ -4161,9 +4161,9 @@
       <title>Let’s Ask Again: Refine Network for Automatic Question Generation</title>
       <author><first>Preksha</first><last>Nema</last></author>
       <author><first>Akash Kumar</first><last>Mohankumar</last></author>
-      <author><first>Mitesh M.</first><last>Khapra</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh M.</first><last>Khapra</last></author>
       <author><first>Balaji Vasan</first><last>Srinivasan</last></author>
-      <author><first>Balaraman</first><last>Ravindran</last></author>
+      <author id="balaraman-ravindran"><first>Balaraman</first><last>Ravindran</last></author>
       <pages>3314–3323</pages>
       <abstract>In this work, we focus on the task of Automatic Question Generation (AQG) where given a passage and an answer the task is to generate the corresponding question. It is desired that the generated question should be (i) grammatically correct (ii) answerable from the passage and (iii) specific to the given answer. An analysis of existing AQG models shows that they produce questions which do not adhere to one or more of <fixed-case>the above-mentioned qualities</fixed-case>. In particular, the generated questions look like an incomplete draft of the desired question with a clear scope for refinement. <fixed-case>To alleviate this shortcoming</fixed-case>, we propose a method which tries to mimic the human process of generating questions by first creating an initial draft and then refining it. More specifically, we propose Refine Network (RefNet) which contains two decoders. The second decoder uses a dual attention network which pays attention to both (i) the original passage and (ii) the question (initial draft) generated by the first decoder. In effect, it refines the question generated by the first decoder, thereby making it more correct and complete. We evaluate RefNet on three datasets, <i>viz.</i>, SQuAD, HOTPOT-QA, and DROP, and show that it outperforms existing state-of-the-art methods by 7-16% on all of these datasets. Lastly, we show that we can improve the quality of the second decoder on specific metrics, such as, fluency and answerability by explicitly rewarding revisions that improve on the corresponding metric during training. The code has been made publicly available .</abstract>
       <url hash="98c0da59">D19-1326</url>
@@ -4176,7 +4176,7 @@
       <author><first>Taehee</first><last>Jung</last></author>
       <author><first>Dongyeop</first><last>Kang</last></author>
       <author><first>Lucas</first><last>Mentch</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>3324–3335</pages>
       <abstract>Despite the recent developments on neural summarization systems, the underlying logic behind the improvements from the systems and its corpus-dependency remains largely unexplored. Position of sentences in the original text, for example, is a well known bias for news summarization. Following in the spirit of the claim that summarization is a combination of sub-functions, we define three sub-aspects of summarization: position, importance, and diversity and conduct an extensive analysis of the biases of each sub-aspect with respect to the domain of nine different summarization corpora (e.g., news, academic papers, meeting minutes, movie script, books, posts). We find that while position exhibits substantial bias in news articles, this is not the case, for example, with academic papers and meeting minutes. Furthermore, our empirical study shows that different types of summarization systems (e.g., neural-based) are composed of different degrees of the sub-aspects. Our study provides useful lessons regarding consideration of underlying sub-aspects when collecting a new summarization dataset or developing a new system.</abstract>
       <url hash="8adbf959">D19-1327</url>
@@ -4188,7 +4188,7 @@
       <title>Lost in Evaluation: Misleading Benchmarks for Bilingual Dictionary Induction</title>
       <author><first>Yova</first><last>Kementchedjhieva</last></author>
       <author><first>Mareike</first><last>Hartmann</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>3336–3341</pages>
       <abstract>The task of bilingual dictionary induction (BDI) is commonly used for intrinsic evaluation of cross-lingual word embeddings. The largest dataset for BDI was generated automatically, so its quality is dubious. We study the composition and quality of the test sets for five diverse languages from this dataset, with concerning findings: (1) a quarter of the data consists of proper nouns, which can be hardly indicative of BDI performance, and (2) there are pervasive gaps in the gold-standard targets. These issues appear to affect the ranking between cross-lingual embedding systems on individual languages, and the overall degree to which the systems differ in performance. With proper nouns removed from the data, the margin between the top two systems included in the study grows from 3.4% to 17.2%. Manual verification of the predictions, on the other hand, reveals that gaps in the gold standard targets artificially inflate the margin between the two systems on English to Bulgarian BDI from 0.1% to 6.7%. We thus suggest that future research either avoids drawing conclusions from quantitative results on this BDI dataset, or accompanies such evaluation with rigorous error analysis.</abstract>
       <url hash="4c3e3607">D19-1328</url>
@@ -4198,9 +4198,9 @@
     </paper>
     <paper id="329">
       <title>Towards Realistic Practices In Low-Resource Natural Language Processing: The Development Set</title>
-      <author><first>Katharina</first><last>Kann</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
       <author><first>Kyunghyun</first><last>Cho</last></author>
-      <author><first>Samuel R.</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel R.</first><last>Bowman</last></author>
       <pages>3342–3349</pages>
       <abstract>Development sets are impractical to obtain for real low-resource languages, since using all available data for training is often more effective. However, development sets are widely used in research papers that purport to deal with low-resource natural language processing (NLP). Here, we aim to answer the following questions: Does using a development set for early stopping in the low-resource setting influence results as compared to a more realistic alternative, where the number of training epochs is tuned on development languages? And does it lead to overestimation or underestimation of performance? We repeat multiple experiments from recent work on neural models for low-resource NLP and compare results for models obtained by training with and without development sets. On average over languages, absolute accuracy differs by up to 1.4%. However, for some languages and tasks, differences are as big as 18.0% accuracy. Our results highlight the importance of realistic experimental setups in the publication of low-resource NLP research results.</abstract>
       <url hash="51ca80c6">D19-1329</url>
@@ -4214,7 +4214,7 @@
       <author><first>Jiajun</first><last>Zhang</last></author>
       <author><first>Long</first><last>Zhou</last></author>
       <author><first>Yuchen</first><last>Liu</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>3350–3355</pages>
       <abstract>In this paper, we introduce a novel interactive approach to translate a source language into two different languages simultaneously and interactively. Specifically, the generation of one language relies on not only previously generated outputs by itself, but also the outputs predicted in the other language. Experimental results on IWSLT and WMT datasets demonstrate that our method can obtain significant improvements over both conventional Neural Machine Translation (NMT) model and multilingual NMT model.</abstract>
       <url hash="3bd64156">D19-1330</url>
@@ -4275,7 +4275,7 @@
       <author><first>Ali</first><last>Emami</last></author>
       <author><first>Adam</first><last>Trischler</last></author>
       <author><first>Kaheer</first><last>Suleman</last></author>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <pages>3382–3387</pages>
       <abstract>Recent studies have significantly improved the state-of-the-art on common-sense reasoning (CSR) benchmarks like the Winograd Schema Challenge (WSC) and SWAG. The question we ask in this paper is whether improved performance on these benchmarks represents genuine progress towards common-sense-enabled systems. We make case studies of both benchmarks and design protocols that clarify and qualify the results of previous work by analyzing threats to the validity of previous experimental designs. Our protocols account for several properties prevalent in common-sense benchmarks including size limitations, structural regularities, and variable instance difficulty.</abstract>
       <url hash="671b987a">D19-1335</url>
@@ -4289,7 +4289,7 @@
       <author><first>Shunyao</first><last>Li</last></author>
       <author><first>Pengcheng</first><last>Yang</last></author>
       <author><first>Lei</first><last>Li</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <author><first>Zhifang</first><last>Sui</last></author>
       <author><first>Xu</first><last>Sun</last></author>
       <pages>3388–3393</pages>
@@ -4325,7 +4325,7 @@
       <title>The Woman Worked as a Babysitter: On Biases in Language Generation</title>
       <author><first>Emily</first><last>Sheng</last></author>
       <author><first>Kai-Wei</first><last>Chang</last></author>
-      <author><first>Premkumar</first><last>Natarajan</last></author>
+      <author id="prem-natarajan"><first>Premkumar</first><last>Natarajan</last></author>
       <author><first>Nanyun</first><last>Peng</last></author>
       <pages>3407–3412</pages>
       <abstract>We present a systematic study of biases in natural language generation (NLG) by analyzing text generated from prompts that contain mentions of different demographic groups. In this work, we introduce the notion of the regard towards a demographic, use the varying levels of regard towards different demographics as a defining metric for bias in NLG, and analyze the extent to which sentiment scores are a relevant proxy metric for regard. To this end, we collect strategically-generated text from language models and manually annotate the text with both sentiment and regard scores. Additionally, we build an automatic regard classifier through transfer learning, so that we can analyze biases in unseen text. Together, these methods reveal the extent of the biased nature of language model generations. Our analysis provides a study of biases in NLG, bias metrics and correlated human judgments, and empirical evidence on the usefulness of our annotated dataset.</abstract>
@@ -4376,7 +4376,7 @@
     <paper id="343">
       <title>Investigating Dynamic Routing in Tree-Structured <fixed-case>LSTM</fixed-case> for Sentiment Analysis</title>
       <author><first>Jin</first><last>Wang</last></author>
-      <author><first>Liang-Chih</first><last>Yu</last></author>
+      <author id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last></author>
       <author><first>K. Robert</first><last>Lai</last></author>
       <author><first>Xuejie</first><last>Zhang</last></author>
       <pages>3432–3437</pages>
@@ -4413,7 +4413,7 @@
       <title>Semantic Relatedness Based Re-ranker for Text Spotting</title>
       <author><first>Ahmed</first><last>Sabir</last></author>
       <author><first>Francesc</first><last>Moreno</last></author>
-      <author><first>Lluís</first><last>Padró</last></author>
+      <author id="lluis-padro"><first>Lluís</first><last>Padró</last></author>
       <pages>3451–3457</pages>
       <abstract>Applications such as textual entailment, plagiarism detection or document clustering rely on the notion of semantic similarity, and are usually approached with dimension reduction techniques like LDA or with embedding-based neural approaches. We present a scenario where semantic similarity is not enough, and we devise a neural approach to learn semantic relatedness. The scenario is text spotting in the wild, where a text in an image (e.g. street sign, advertisement or bus destination) must be identified and recognized. Our goal is to improve the performance of vision systems by leveraging semantic information. Our rationale is that the text to be spotted is often related to the image context in which it appears (word pairs such as Delta-airplane, or quarters-parking are not similar, but are clearly related). We show how learning a word-to-word or word-to-sentence relatedness score can improve the performance of text spotting systems up to 2.9 points, outperforming other measures in a benchmark dataset.</abstract>
       <url hash="15fb9294">D19-1346</url>
@@ -4444,7 +4444,7 @@
     <paper id="349">
       <title>Evaluating Topic Quality with Posterior Variability</title>
       <author><first>Linzi</first><last>Xing</last></author>
-      <author><first>Michael J.</first><last>Paul</last></author>
+      <author id="michael-paul"><first>Michael J.</first><last>Paul</last></author>
       <author><first>Giuseppe</first><last>Carenini</last></author>
       <pages>3471–3477</pages>
       <abstract>Probabilistic topic models such as latent Dirichlet allocation (LDA) are popularly used with Bayesian inference methods such as Gibbs sampling to learn posterior distributions over topic model parameters. We derive a novel measure of LDA topic quality using the variability of the posterior distributions. Compared to several existing baselines for automatic topic evaluation, the proposed metric achieves state-of-the-art correlations with human judgments of topic quality in experiments on three corpora. We additionally demonstrate that topic quality estimation can be further improved using a supervised estimator that combines multiple metrics.</abstract>
@@ -4495,7 +4495,7 @@
       <author><first>Daniel</first><last>Cohen</last></author>
       <author><first>Yen-Chieh</first><last>Lien</last></author>
       <author><first>Pratik</first><last>Mehta</last></author>
-      <author><first>W. Bruce</first><last>Croft</last></author>
+      <author id="w-bruce-croft"><first>W. Bruce</first><last>Croft</last></author>
       <author><first>Scott</first><last>Miller</last></author>
       <pages>3497–3502</pages>
       <abstract>When performing cross-language information retrieval (CLIR) for lower-resourced languages, a common approach is to retrieve over the output of machine translation (MT). However, there is no established guidance on how to optimize the resulting MT-IR system. In this paper, we examine the relationship between the performance of MT systems and both neural and term frequency-based IR models to identify how CLIR performance can be best predicted from MT quality. We explore performance at varying amounts of MT training data, byte pair encoding (BPE) merge operations, and across two IR collections and retrieval models. We find that the choice of IR collection can substantially affect the predictive power of MT tuning decisions and evaluation, potentially introducing dissociations between MT-only and overall CLIR performance.</abstract>
@@ -4518,7 +4518,7 @@
       <author><first>Luyao</first><last>Huang</last></author>
       <author><first>Chi</first><last>Sun</last></author>
       <author><first>Xipeng</first><last>Qiu</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>3509–3514</pages>
       <abstract>Word Sense Disambiguation (WSD) aims to find the exact sense of an ambiguous word in a particular context. Traditional supervised methods rarely take into consideration the lexical resources like WordNet, which are widely utilized in knowledge-based methods. Recent studies have shown the effectiveness of incorporating gloss (sense definition) into neural networks for WSD. However, compared with traditional word expert supervised methods, they have not achieved much improvement. In this paper, we focus on how to better leverage gloss knowledge in a supervised neural WSD system. We construct context-gloss pairs and propose three BERT based models for WSD. We fine-tune the pre-trained BERT model and achieve new state-of-the-art results on WSD task.</abstract>
       <url hash="dd2b49e0">D19-1355</url>
@@ -4577,7 +4577,7 @@
     </paper>
     <paper id="360">
       <title>Hierarchical Meta-Embeddings for Code-Switching Named Entity Recognition</title>
-      <author><first>Genta Indra</first><last>Winata</last></author>
+      <author id="genta-indra-winata"><first>Genta Indra</first><last>Winata</last></author>
       <author><first>Zhaojiang</first><last>Lin</last></author>
       <author><first>Jamin</first><last>Shin</last></author>
       <author><first>Zihan</first><last>Liu</last></author>
@@ -4643,7 +4643,7 @@
       <author><first>Yu</first><last>Wu</last></author>
       <author><first>Lili</first><last>Mou</last></author>
       <author><first>Zhoujun</first><last>Li</last></author>
-      <author><first>Wenhan</first><last>Chao</last></author>
+      <author id="wenhan-chao"><first>Wenhan</first><last>Chao</last></author>
       <pages>3573–3578</pages>
       <abstract>Formality text style transfer plays an important role in various NLP applications, such as non-native speaker assistants and child education. Early studies normalize informal sentences with rules, before statistical and neural models become a prevailing method in the field. While a rule-based system is still a common preprocessing step for formality style transfer in the neural era, it could introduce noise if we use the rules in a naive way such as data preprocessing. To mitigate this problem, we study how to harness rules into a state-of-the-art neural network that is typically pretrained on massive corpora. We propose three fine-tuning methods in this paper and achieve a new state-of-the-art on benchmark datasets</abstract>
       <url hash="2556b673">D19-1365</url>
@@ -4656,7 +4656,7 @@
       <author><first>Yi-Te</first><last>Hong</last></author>
       <author><first>Hong-You</first><last>Chen</last></author>
       <author><first>Chi-Jen</first><last>Lu</last></author>
-      <author><first>Shou-De</first><last>Lin</last></author>
+      <author id="shou-de-lin"><first>Shou-De</first><last>Lin</last></author>
       <pages>3579–3584</pages>
       <abstract>The objective of non-parallel text style transfer, or controllable text generation, is to alter specific attributes (e.g. sentiment, mood, tense, politeness, etc) of a given text while preserving its remaining attributes and content. Generative adversarial network (GAN) is a popular model to ensure the transferred sentences are realistic and have the desired target styles. However, training GAN often suffers from mode collapse problem, which causes that the transferred text is little related to the original text. In this paper, we propose a new GAN model with a word-level conditional architecture and a two-phase training procedure. By using a style-related condition architecture before generating a word, our model is able to maintain style-unrelated words while changing the others. By separating the training procedure into reconstruction and transfer phases, our model is able to learn a proper text generation process, which further improves the content preservation. We test our model on polarity sentiment transfer and multiple-attribute transfer tasks. The empirical results show that our model achieves comparable evaluation scores in both transfer accuracy and fluency but significantly outperforms other state-of-the-art models in content compatibility on three real-world datasets.</abstract>
       <url hash="31912278">D19-1366</url>
@@ -4774,7 +4774,7 @@
       <title><fixed-case>P</fixed-case>a<fixed-case>LM</fixed-case>: A Hybrid Parser and Language Model</title>
       <author><first>Hao</first><last>Peng</last></author>
       <author><first>Roy</first><last>Schwartz</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>3644–3651</pages>
       <abstract>We present PaLM, a hybrid parser and neural language model. Building on an RNN language model, PaLM adds an attention layer over text spans in the left context. An unsupervised constituency parser can be derived from its attention weights, using a greedy decoding algorithm. We evaluate PaLM on language modeling, and empirically show that it outperforms strong baselines. If syntactic annotations are available, the attention component can be trained in a supervised manner, providing syntactically-informed representations of the context, and further improving language modeling performance.</abstract>
       <url hash="dd7535c6">D19-1376</url>
@@ -4821,7 +4821,7 @@
       <title>Efficient Sentence Embedding using Discrete Cosine Transform</title>
       <author><first>Nada</first><last>Almarwani</last></author>
       <author><first>Hanan</first><last>Aldarmaki</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>3672–3678</pages>
       <abstract>Vector averaging remains one of the most popular sentence embedding methods in spite of its obvious disregard for syntactic structure. While more complex sequential or convolutional networks potentially yield superior classification performance, the improvements in classification accuracy are typically mediocre compared to the simple vector averaging. As an efficient alternative, we propose the use of discrete cosine transform (DCT) to compress word sequences in an order-preserving manner. The lower order DCT coefficients represent the overall feature patterns in sentences, which results in suitable embeddings for tasks that could benefit from syntactic features. Our results in semantic probing tasks demonstrate that DCT embeddings indeed preserve more syntactic information compared with vector averaging. With practically equivalent complexity, the model yields better overall performance in downstream classification tasks that correlate with syntactic features, which illustrates the capacity of DCT to preserve word order information.</abstract>
       <url hash="b38f106a">D19-1380</url>
@@ -4856,8 +4856,8 @@
       <author><first>Arman</first><last>Cohan</last></author>
       <author><first>Iz</first><last>Beltagy</last></author>
       <author><first>Daniel</first><last>King</last></author>
-      <author><first>Bhavana</first><last>Dalvi</last></author>
-      <author><first>Dan</first><last>Weld</last></author>
+      <author id="bhavana-dalvi"><first>Bhavana</first><last>Dalvi</last></author>
+      <author id="daniel-s-weld"><first>Dan</first><last>Weld</last></author>
       <pages>3693–3699</pages>
       <abstract>As a step toward better document-level understanding, we explore classification of a sequence of sentences into their corresponding categories, a task that requires understanding sentences in context of the document. Recent successful models for this task have used hierarchical models to contextualize sentence representations, and Conditional Random Fields (CRFs) to incorporate dependencies between subsequent labels. In this work, we show that pretrained language models, BERT (Devlin et al., 2018) in particular, can be used for this task to capture contextual dependencies without the need for hierarchical encoding nor a CRF. Specifically, we construct a joint sentence representation that allows BERT Transformer layers to directly utilize contextual information from all words in all sentences. Our approach achieves state-of-the-art results on four datasets, including a new dataset of structured scientific abstracts.</abstract>
       <url hash="b3240e06">D19-1383</url>
@@ -5226,7 +5226,7 @@
       <author><first>Sahil</first><last>Garg</last></author>
       <author><first>Aram</first><last>Galstyan</last></author>
       <author><first>Greg</first><last>Ver Steeg</last></author>
-      <author><first>Guillermo</first><last>Cecchi</last></author>
+      <author id="guillermo-a-cecchi"><first>Guillermo</first><last>Cecchi</last></author>
       <pages>4026–4036</pages>
       <abstract>Recently, kernelized locality sensitive hashcodes have been successfully employed as representations of natural language text, especially showing high relevance to biomedical relation extraction tasks. In this paper, we propose to optimize the hashcode representations in a nearly unsupervised manner, in which we only use data points, but not their class labels, for learning. The optimized hashcode representations are then fed to a supervised classifi er following the prior work. This nearly unsupervised approach allows fine-grained optimization of each hash function, which is particularly suitable for building hashcode representations generalizing from a training set to a test set. We empirically evaluate the proposed approach for biomedical relation extraction tasks, obtaining significant accuracy improvements w.r.t. state-of-the-art supervised and semi-supervised approaches.</abstract>
       <url hash="b2fb50ee">D19-1414</url>
@@ -5237,7 +5237,7 @@
       <title>Auditing Deep Learning processes through Kernel-based Explanatory Models</title>
       <author><first>Danilo</first><last>Croce</last></author>
       <author><first>Daniele</first><last>Rossini</last></author>
-      <author><first>Roberto</first><last>Basili</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
       <pages>4037–4046</pages>
       <abstract>While NLP systems become more pervasive, their accountability gains value as a focal point of effort. Epistemological opaqueness of nonlinear learning methods, such as deep learning models, can be a major drawback for their adoptions. In this paper, we discuss the application of Layerwise Relevance Propagation over a linguistically motivated neural architecture, the Kernel-based Deep Architecture, in order to trace back connections between linguistic properties of input instances and system decisions. Such connections then guide the construction of argumentations on network’s inferences, i.e., explanations based on real examples, semantically related to the input. We propose here a methodology to evaluate the transparency and coherence of analogy-based explanations modeling an audit stage for the system. Quantitative analysis on two semantic tasks, i.e., question classification and semantic role labeling, show that the explanatory capabilities (native in KDAs) are effective and they pave the way to more complex argumentation methods.</abstract>
       <url hash="5269f898">D19-1415</url>
@@ -5271,7 +5271,7 @@
       <title>Don’t Take the Easy Way Out: Ensemble Based Methods for Avoiding Known Dataset Biases</title>
       <author><first>Christopher</first><last>Clark</last></author>
       <author><first>Mark</first><last>Yatskar</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>4069–4082</pages>
       <abstract>State-of-the-art models often make use of superficial patterns in the data that do not generalize well to out-of-domain or adversarial settings. For example, textual entailment models often learn that particular key words imply entailment, irrespective of context, and visual question answering models learn to predict prototypical answers, without considering evidence in the image. In this paper, we show that if we have prior knowledge of such biases, we can train a model to be more robust to domain shift. Our method has two stages: we (1) train a naive model that makes predictions exclusively based on dataset biases, and (2) train a robust model as part of an ensemble with the naive one in order to encourage it to focus on other patterns in the data that are more likely to generalize. Experiments on five datasets with out-of-domain test sets show significantly improved robustness in all settings, including a 12 point gain on a changing priors visual question answering dataset and a 9 point gain on an adversarial question answering test set.</abstract>
       <url hash="46211f3c">D19-1418</url>
@@ -5283,7 +5283,7 @@
       <author><first>Po-Sen</first><last>Huang</last></author>
       <author><first>Robert</first><last>Stanforth</last></author>
       <author><first>Johannes</first><last>Welbl</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <author><first>Dani</first><last>Yogatama</last></author>
       <author><first>Sven</first><last>Gowal</last></author>
       <author><first>Krishnamurthy</first><last>Dvijotham</last></author>
@@ -5299,7 +5299,7 @@
       <title>Rethinking Cooperative Rationalization: Introspective Extraction and Complement Control</title>
       <author><first>Mo</first><last>Yu</last></author>
       <author><first>Shiyu</first><last>Chang</last></author>
-      <author id="yang-zhang"><first>Yang</first><last>Zhang</last></author>
+      <author><first>Yang</first><last>Zhang</last></author>
       <author><first>Tommi</first><last>Jaakkola</last></author>
       <pages>4094–4103</pages>
       <abstract>Selective rationalization has become a common mechanism to ensure that predictive models reveal how they use any available features. The selection may be soft or hard, and identifies a subset of input features relevant for prediction. The setup can be viewed as a co-operate game between the selector (aka rationale generator) and the predictor making use of only the selected features. The co-operative setting may, however, be compromised for two reasons. First, the generator typically has no direct access to the outcome it aims to justify, resulting in poor performance. Second, there’s typically no control exerted on the information left outside the selection. We revise the overall co-operative framework to address these challenges. We introduce an introspective model which explicitly predicts and incorporates the outcome into the selection process. Moreover, we explicitly control the rationale complement via an adversary so as not to leave any useful information out of the selection. We show that the two complementary mechanisms maintain both high predictive accuracy and lead to comprehensive rationales.</abstract>
@@ -5311,7 +5311,7 @@
     <paper id="421">
       <title>Experimenting with Power Divergences for Language Modeling</title>
       <author><first>Matthieu</first><last>Labeau</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <pages>4104–4114</pages>
       <abstract>Neural language models are usually trained using Maximum-Likelihood Estimation (MLE). The corresponding objective function for MLE is derived from the Kullback-Leibler (KL) divergence between the empirical probability distribution representing the data and the parametric probability distribution output by the model. However, the word frequency discrepancies in natural language make performance extremely uneven: while the perplexity is usually very low for frequent words, it is especially difficult to predict rare words. In this paper, we experiment with several families (alpha, beta and gamma) of power divergences, generalized from the KL divergence, for learning language models with an objective different than standard MLE. Intuitively, these divergences should affect the way the probability mass is spread during learning, notably by prioritizing performances on high or low-frequency words. In addition, we implement and experiment with various sampling-based objectives, where the computation of the output layer is only done on a small subset of the vocabulary. They are derived as power generalizations of a softmax approximated via Importance Sampling, and Noise Contrastive Estimation, for accelerated learning. Our experiments on the Penn Treebank and Wikitext-2 show that these power divergences can indeed be used to prioritize learning on the frequent or rare words, and lead to general performance improvements in the case of sampling-based learning.</abstract>
       <url hash="5f3695c0">D19-1421</url>
@@ -5359,7 +5359,7 @@
       <title>Topics to Avoid: Demoting Latent Confounds in Text Classification</title>
       <author><first>Sachin</first><last>Kumar</last></author>
       <author><first>Shuly</first><last>Wintner</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <author><first>Yulia</first><last>Tsvetkov</last></author>
       <pages>4153–4163</pages>
       <abstract>Despite impressive performance on many text classification tasks, deep neural networks tend to learn frequent superficial patterns that are specific to the training data and do not always generalize well. In this work, we observe this limitation with respect to the task of <i>native language identification</i>. We find that standard text classifiers which perform well on the test set end up learning topical features which are confounds of the prediction task (e.g., if the input text mentions Sweden, the classifier predicts that the author’s native language is Swedish). We propose a method that represents the latent topical confounds and a model which “unlearns” confounding features by predicting both the label of the input text and the confound; but we train the two predictors adversarially in an alternating fashion to learn a text representation that predicts the correct label but is less prone to using information about the confound. We show that this model generalizes better and learns features that are indicative of the writing style rather than the content.</abstract>
@@ -5371,7 +5371,7 @@
       <title>Learning to Ask for Conversational Machine Learning</title>
       <author><first>Shashank</first><last>Srivastava</last></author>
       <author><first>Igor</first><last>Labutov</last></author>
-      <author><first>Tom</first><last>Mitchell</last></author>
+      <author id="tom-mitchell"><first>Tom</first><last>Mitchell</last></author>
       <pages>4164–4174</pages>
       <abstract>Natural language has recently been explored as a new medium of supervision for training machine learning models. Here, we explore learning classification tasks using language in a conversational setting – where the automated learner does not simply receive language input from a teacher, but can proactively engage the teacher by asking questions. We present a reinforcement learning framework, where the learner’s actions correspond to question types and the reward for asking a question is based on how the teacher’s response changes performance of the resulting machine learning model on the learning task. In this framework, learning good question-asking strategies corresponds to asking sequences of questions that maximize the cumulative (discounted) reward, and hence quickly lead to effective classifiers. Empirical analysis across three domains shows that learned question-asking strategies expedite classifier training by asking appropriate questions at different points in the learning process. The approach allows learning classifiers from a blend of strategies, including learning from observations, explanations and clarifications.</abstract>
       <url hash="6e53e90c">D19-1426</url>
@@ -5406,8 +5406,8 @@
       <title>Fine-grained Knowledge Fusion for Sequence Labeling Domain Adaptation</title>
       <author><first>Huiyun</first><last>Yang</last></author>
       <author><first>Shujian</first><last>Huang</last></author>
-      <author><first>Xin-Yu</first><last>Dai</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
+      <author id="xinyu-dai"><first>Xin-Yu</first><last>Dai</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
       <pages>4197–4206</pages>
       <abstract>In sequence labeling, previous domain adaptation methods focus on the adaptation from the source domain to the entire target domain without considering the diversity of individual target domain samples, which may lead to negative transfer results for certain samples. Besides, an important characteristic of sequence labeling tasks is that different elements within a given sample may also have diverse domain relevance, which requires further consideration. To take the multi-level domain relevance discrepancy into account, in this paper, we propose a fine-grained knowledge fusion model with the domain relevance modeling scheme to control the balance between learning from the target domain data and learning from the source domain model. Experiments on three sequence labeling tasks show that our fine-grained knowledge fusion model outperforms strong baselines and other state-of-the-art sequence labeling domain adaptation methods.</abstract>
       <url hash="cbb37400">D19-1429</url>
@@ -5446,7 +5446,7 @@
       <title>Distributionally Robust Language Modeling</title>
       <author><first>Yonatan</first><last>Oren</last></author>
       <author><first>Shiori</first><last>Sagawa</last></author>
-      <author><first>Tatsunori B.</first><last>Hashimoto</last></author>
+      <author id="tatsunori-b-hashimoto"><first>Tatsunori B.</first><last>Hashimoto</last></author>
       <author><first>Percy</first><last>Liang</last></author>
       <pages>4227–4237</pages>
       <abstract>Language models are generally trained on data spanning a wide range of topics (e.g., news, reviews, fiction), but they might be applied to an a priori unknown target distribution (e.g., restaurant reviews). In this paper, we first show that training on text outside the test distribution can degrade test performance when using standard maximum likelihood (MLE) training. To remedy this without the knowledge of the test distribution, we propose an approach which trains a model that performs well over a wide range of potential test distributions. In particular, we derive a new distributionally robust optimization (DRO) procedure which minimizes the loss of the model over the worst-case mixture of topics with sufficient overlap with the training distribution. Our approach, called topic conditional value at risk (topic CVaR), obtains a 5.5 point perplexity reduction over MLE when the language models are trained on a mixture of Yelp reviews and news and tested only on reviews.</abstract>
@@ -5467,7 +5467,7 @@
     </paper>
     <paper id="434">
       <title>Learning Latent Parameters without Human Response Patterns: Item Response Theory with Artificial Crowds</title>
-      <author><first>John P.</first><last>Lalor</last></author>
+      <author id="john-p-lalor"><first>John P.</first><last>Lalor</last></author>
       <author><first>Hao</first><last>Wu</last></author>
       <author><first>Hong</first><last>Yu</last></author>
       <pages>4249–4259</pages>
@@ -5509,7 +5509,7 @@
       <author><first>Chunting</first><last>Zhou</last></author>
       <author><first>Xian</first><last>Li</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>4282–4292</pages>
       <abstract>Most sequence-to-sequence (seq2seq) models are autoregressive; they generate each token by conditioning on previously generated tokens. In contrast, non-autoregressive seq2seq models generate all tokens in one pass, which leads to increased efficiency through parallel processing on hardware such as GPUs. However, directly modeling the joint distribution of all tokens simultaneously is challenging, and even with increasingly complex model structures accuracy lags significantly behind autoregressive models. In this paper, we propose a simple, efficient, and effective model for non-autoregressive sequence generation using latent variable models. Specifically, we turn to generative flow, an elegant technique to model complex distributions using neural networks, and design several layers of flow tailored for modeling the conditional density of sequential latent variables. We evaluate this model on three neural machine translation (NMT) benchmark datasets, achieving comparable performance with state-of-the-art non-autoregressive NMT models and almost constant decoding time w.r.t the sequence length.</abstract>
       <url hash="199e8b4d">D19-1437</url>
@@ -5536,7 +5536,7 @@
       <author><first>Oana-Maria</first><last>Camburu</last></author>
       <author><first>Ana-Maria</first><last>Cretu</last></author>
       <author><first>Yordan</first><last>Yordanov</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
       <author><first>Thomas</first><last>Lukasiewicz</last></author>
       <pages>4303–4312</pages>
       <abstract>Pronoun resolution is a major area of natural language understanding. However, large-scale training sets are still scarce, since manually labelling data is costly. In this work, we introduce WikiCREM (Wikipedia CoREferences Masked) a large-scale, yet accurate dataset of pronoun disambiguation instances. We use a language-model-based approach for pronoun resolution in combination with our WikiCREM dataset. We compare a series of models on a collection of diverse and challenging coreference resolution problems, where we match or outperform previous state-of-the-art approaches on 6 out of 7 datasets, such as GAP, DPR, WNLI, PDP, WinoBias, and WinoGender. We release our model to be used off-the-shelf for solving pronoun disambiguation.</abstract>
@@ -5548,7 +5548,7 @@
     <paper id="440">
       <title>Identifying and Explaining Discriminative Attributes</title>
       <author><first>Armins</first><last>Stepanjans</last></author>
-      <author><first>André</first><last>Freitas</last></author>
+      <author id="andre-freitas"><first>André</first><last>Freitas</last></author>
       <pages>4313–4322</pages>
       <abstract>Identifying what is at the center of the meaning of a word and what discriminates it from other words is a fundamental natural language inference task. This paper describes an explicit word vector representation model (WVM) to support the identification of discriminative attributes. A core contribution of the paper is a quantitative and qualitative comparative analysis of different types of data sources and Knowledge Bases in the construction of explainable and explicit WVMs: (i) knowledge graphs built from dictionary definitions, (ii) entity-attribute-relationships graphs derived from images and (iii) commonsense knowledge graphs. Using a detailed quantitative and qualitative analysis, we demonstrate that these data sources have complementary semantic aspects, supporting the creation of explicit semantic vector spaces. The explicit vector spaces are evaluated using the task of discriminative attribute identification, showing comparable performance to the state-of-the-art systems in the task (F1-score = 0.69), while delivering full model transparency and explainability.</abstract>
       <url hash="de678589">D19-1440</url>
@@ -5668,7 +5668,7 @@
     <paper id="450">
       <title>Weakly-Supervised Concept-based Adversarial Learning for Cross-lingual Word Embeddings</title>
       <author><first>Haozhou</first><last>Wang</last></author>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <author><first>Paola</first><last>Merlo</last></author>
       <pages>4419–4430</pages>
       <abstract>Distributed representations of words which map each word to a continuous vector have proven useful in capturing important linguistic information not only in a single language but also across different languages. Current unsupervised adversarial approaches show that it is possible to build a mapping matrix that aligns two sets of monolingual word embeddings without high quality parallel data, such as a dictionary or a sentence-aligned corpus. However, without an additional step of refinement, the preliminary mapping learnt by these methods is unsatisfactory, leading to poor performance for typologically distant languages. In this paper, we propose a weakly-supervised adversarial training method to overcome this limitation, based on the intuition that mapping across languages is better done at the concept level than at the word level. We propose a concept-based adversarial training method which improves the performance of previous unsupervised adversarial methods for most languages, and especially for typologically distant language pairs.</abstract>
@@ -5693,8 +5693,8 @@
     <paper id="452">
       <title>Contrastive Language Adaptation for Cross-Lingual Stance Detection</title>
       <author><first>Mitra</first><last>Mohtarami</last></author>
-      <author><first>James</first><last>Glass</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>4442–4452</pages>
       <abstract>We study cross-lingual stance detection, which aims to leverage labeled data in one language to identify the relative perspective (or stance) of a given document with respect to a claim in a different target language. In particular, we introduce a novel contrastive language adaptation approach applied to memory networks, which ensures accurate alignment of stances in the source and target languages, and can effectively deal with the challenge of limited labeled data in the target language. The evaluation results on public benchmark datasets and comparison against current state-of-the-art approaches demonstrate the effectiveness of our approach.</abstract>
       <url hash="008bff5c">D19-1452</url>
@@ -5752,10 +5752,10 @@
     </paper>
     <paper id="457">
       <title>Everything Happens for a Reason: Discovering the Purpose of Actions in Procedural Text</title>
-      <author><first>Bhavana</first><last>Dalvi</last></author>
+      <author id="bhavana-dalvi"><first>Bhavana</first><last>Dalvi</last></author>
       <author><first>Niket</first><last>Tandon</last></author>
       <author><first>Antoine</first><last>Bosselut</last></author>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <author><first>Peter</first><last>Clark</last></author>
       <pages>4496–4505</pages>
       <abstract>Our goal is to better comprehend procedural text, e.g., a paragraph about photosynthesis, by not only predicting what happens, but *why* some actions need to happen before others. Our approach builds on a prior process comprehension framework for predicting actions’ effects, to also identify subsequent steps that those effects enable. We present our new model (XPAD) that biases effect predictions towards those that (1) explain more of the actions in the paragraph and (2) are more plausible with respect to background knowledge. We also extend an existing benchmark dataset for procedural text comprehension, ProPara, by adding the new task of explaining actions by predicting their dependencies. We find that XPAD significantly outperforms prior systems on this task, while maintaining the performance on the original task in ProPara. The dataset is available at <url>http://data.allenai.org/propara</url></abstract>
@@ -5787,7 +5787,7 @@
       <author><first>Daniel</first><last>Duckworth</last></author>
       <author><first>Semih</first><last>Yavuz</last></author>
       <author><first>Amit</first><last>Dubey</last></author>
-      <author><first>Kyu-Young</first><last>Kim</last></author>
+      <author id="kyuyoung-kim"><first>Kyu-Young</first><last>Kim</last></author>
       <author><first>Andy</first><last>Cedilnik</last></author>
       <pages>4516–4525</pages>
       <abstract>A significant barrier to progress in data-driven approaches to building dialog systems is the lack of high quality, goal-oriented conversational data. To help satisfy this elementary requirement, we introduce the initial release of the Taskmaster-1 dataset which includes 13,215 task-based dialogs comprising six domains. Two procedures were used to create this collection, each with unique advantages. The first involves a two-person, spoken “Wizard of Oz” (WOz) approach in which trained agents and crowdsourced workers interact to complete the task while the second is “self-dialog” in which crowdsourced workers write the entire dialog themselves. We do not restrict the workers to detailed scripts or to a small knowledge base and hence we observe that our dataset contains more realistic and diverse conversations in comparison to existing datasets. We offer several baseline models including state of the art neural seq2seq architectures with benchmark performance as well as qualitative human evaluations. Dialogs are labeled with API calls and arguments, a simple and cost effective approach which avoids the requirement of complex annotation schema. The layer of abstraction between the dialog model and the service provider API allows for a given model to interact with multiple services that provide similar functionally. Finally, the dataset will evoke interest in written vs. spoken language, discourse patterns, error handling and other linguistic phenomena related to dialog system research, development and design.</abstract>
@@ -5803,7 +5803,7 @@
       <author><first>Brigi</first><last>Fodor</last></author>
       <author><first>Yi</first><last>Zhang</last></author>
       <author><first>Adel</first><last>Youssef</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>4526–4536</pages>
       <abstract>The need for high-quality, large-scale, goal-oriented dialogue datasets continues to grow as virtual assistants become increasingly wide-spread. However, publicly available datasets useful for this area are limited either in their size, linguistic diversity, domain coverage, or annotation granularity. In this paper, we present strategies toward curating and annotating large scale goal oriented dialogue data. We introduce the MultiDoGO dataset to overcome these limitations. With a total of over 81K dialogues harvested across six domains, MultiDoGO is over 8 times the size of MultiWOZ, the other largest comparable dialogue dataset currently available to the public. Over 54K of these harvested conversations are annotated for intent classes and slot labels. We adopt a Wizard-of-Oz approach wherein a crowd-sourced worker (the “customer”) is paired with a trained annotator (the “agent”). The data curation process was controlled via biases to ensure a diversity in dialogue flows following variable dialogue policies. We provide distinct class label tags for agents vs. customer utterances, along with applicable slot labels. We also compare and contrast our strategies on annotation granularity, i.e. turn vs. sentence level. Furthermore, we compare and contrast annotations curated by leveraging professional annotators vs the crowd. We believe our strategies for eliciting and annotating such a dialogue dataset scales across modalities and domains and potentially languages in the future. To demonstrate the efficacy of our devised strategies we establish neural baselines for classification on the agent and customer utterances as well as slot labeling for each domain.</abstract>
       <url hash="4c03d59e">D19-1460</url>
@@ -5827,8 +5827,8 @@
     <paper id="462">
       <title><fixed-case>GECOR</fixed-case>: An End-to-End Generative Ellipsis and Co-reference Resolution Model for Task-Oriented Dialogue</title>
       <author><first>Jun</first><last>Quan</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <author><first>Changjian</first><last>Hu</last></author>
       <pages>4547–4557</pages>
       <abstract>Ellipsis and co-reference are common and ubiquitous especially in multi-turn dialogues. In this paper, we treat the resolution of ellipsis and co-reference in dialogue as a problem of generating omitted or referred expressions from the dialogue context. We therefore propose a unified end-to-end Generative Ellipsis and CO-reference Resolution model (GECOR) in the context of dialogue. The model can generate a new pragmatically complete user utterance by alternating the generation and copy mode for each user utterance. A multi-task learning framework is further proposed to integrate the GECOR into an end-to-end task-oriented dialogue. In order to train both the GECOR and the multi-task learning framework, we manually construct a new dataset on the basis of the public dataset CamRest676 with both ellipsis and co-reference annotation. On this dataset, intrinsic evaluations on the resolution of ellipsis and co-reference show that the GECOR model significantly outperforms the sequence-to-sequence (seq2seq) baseline model in terms of EM, BLEU and F1 while extrinsic evaluations on the downstream dialogue task demonstrate that our multi-task learning framework with GECOR achieves a higher success rate of task completion than TSCP, a state-of-the-art end-to-end task-oriented dialogue model.</abstract>
@@ -5851,7 +5851,7 @@
     </paper>
     <paper id="464">
       <title>Aspect-based Sentiment Classification with Aspect-specific Graph Convolutional Networks</title>
-      <author id="chen-zhang"><first>Chen</first><last>Zhang</last></author>
+      <author><first>Chen</first><last>Zhang</last></author>
       <author><first>Qiuchi</first><last>Li</last></author>
       <author><first>Dawei</first><last>Song</last></author>
       <pages>4568–4578</pages>
@@ -5867,7 +5867,7 @@
       <author><first>Haisong</first><last>Zhang</last></author>
       <author><first>Lingzhi</first><last>Wang</last></author>
       <author><first>Xixin</first><last>Wu</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <pages>4579–4589</pages>
       <abstract>Aspect words, indicating opinion targets, are essential in expressing and understanding human opinions. To identify aspects, most previous efforts focus on using sequence tagging models trained on human-annotated data. This work studies unsupervised aspect extraction and explores how words appear in global context (on sentence level) and local context (conveyed by neighboring words). We propose a novel neural model, capable of coupling global and local representation to discover aspect words. Experimental results on two benchmarks, laptop and restaurant reviews, show that our model significantly outperforms the state-of-the-art models from previous studies evaluated with varying metrics. Analysis on model output show our ability to learn meaningful and coherent aspect representations. We further investigate how words distribute in global and local context, and find that aspect and non-aspect words do exhibit different context, interpreting our superiority in unsupervised aspect extraction.</abstract>
       <url hash="050ec476">D19-1465</url>
@@ -5923,7 +5923,7 @@
       <author><first>Jonah</first><last>Lubin</last></author>
       <author><first>Karan</first><last>Sikka</last></author>
       <author><first>Xiao</first><last>Lin</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <author><first>Ajay</first><last>Divakaran</last></author>
       <pages>4622–4632</pages>
       <abstract>Computing author intent from multimodal data like Instagram posts requires modeling a complex relationship between text and image. For example, a caption might evoke an ironic contrast with the image, so neither caption nor image is a mere transcript of the other. Instead they combine—via what has been called meaning multiplication (Bateman et al.)- to create a new meaning that has a more complex relation to the literal meanings of text and image. Here we introduce a multimodal dataset of 1299 Instagram posts labeled for three orthogonal taxonomies: the authorial intent behind the image-caption pair, the contextual relationship between the literal meanings of the image and caption, and the semiotic relationship between the signified meanings of the image and caption. We build a baseline deep multimodal classifier to validate the taxonomy, showing that employing both text and image improves intent detection by 9.6 compared to using only the image modality, demonstrating the commonality of non-intersective meaning multiplication. The gain with multimodality is greatest when the image and caption diverge semiotically. Our dataset offers a new resource for the study of the rich meanings that result from pairing text and image.</abstract>
@@ -5936,7 +5936,7 @@
       <author><first>Xingshan</first><last>Zeng</last></author>
       <author><first>Jing</first><last>Li</last></author>
       <author><first>Lu</first><last>Wang</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <pages>4633–4643</pages>
       <abstract>The prevalent use of social media leads to a vast amount of online conversations being produced on a daily basis. It presents a concrete challenge for individuals to better discover and engage in social media discussions. In this paper, we present a novel framework to automatically recommend conversations to users based on their prior conversation behaviors. Built on neural collaborative filtering, our model explores deep semantic features that measure how a user’s preferences match an ongoing conversation’s context. Furthermore, to identify salient characteristics from interleaving user interactions, our model incorporates graph-structured networks, where both replying relations and temporal features are encoded as conversation context. Experimental results on two large-scale datasets collected from Twitter and Reddit show that our model yields better performance than previous state-of-the-art models, which only utilize lexical features and ignore past user interactions in the conversations.</abstract>
       <url hash="1e19cd21">D19-1470</url>
@@ -6024,8 +6024,8 @@
       <title>You Shall Know a User by the Company It Keeps: Dynamic Representations for Social Media Users in <fixed-case>NLP</fixed-case></title>
       <author><first>Marco</first><last>Del Tredici</last></author>
       <author><first>Diego</first><last>Marcheggiani</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <pages>4707–4717</pages>
       <abstract>Information about individuals can help to better understand what they say, particularly in social media where texts are short. Current approaches to modelling social media users pay attention to their social connections, but exploit this information in a static way, treating all connections uniformly. This ignores the fact, well known in sociolinguistics, that an individual may be part of several communities which are not equally relevant in all communicative situations. We present a model based on Graph Attention Networks that captures this observation. It dynamically explores the social graph of a user, computes a user representation given the most relevant connections for a target task, and combines it with linguistic information to make a prediction. We apply our model to three different tasks, evaluate it against alternative models, and analyse the results extensively, showing that it significantly outperforms other current methods.</abstract>
       <url hash="b09fd31f">D19-1477</url>
@@ -6049,7 +6049,7 @@
     <paper id="480">
       <title>A Hierarchical Location Prediction Neural Network for <fixed-case>T</fixed-case>witter User Geolocation</title>
       <author><first>Binxuan</first><last>Huang</last></author>
-      <author><first>Kathleen</first><last>Carley</last></author>
+      <author id="kathleen-m-carley"><first>Kathleen</first><last>Carley</last></author>
       <pages>4732–4742</pages>
       <abstract>Accurate estimation of user location is important for many online services. Previous neural network based methods largely ignore the hierarchical structure among locations. In this paper, we propose a hierarchical location prediction neural network for Twitter user geolocation. Our model first predicts the home country for a user, then uses the country result to guide the city-level prediction. In addition, we employ a character-aware word embedding layer to overcome the noisy information in tweets. With the feature fusion layer, our model can accommodate various feature combinations and achieves state-of-the-art results over three commonly used benchmarks under different feature settings. It not only improves the prediction accuracy but also greatly reduces the mean error distance.</abstract>
       <url hash="013dd8c4">D19-1480</url>
@@ -6085,7 +6085,7 @@
       <author><first>Yanda</first><last>Chen</last></author>
       <author><first>Desmond</first><last>Patton</last></author>
       <author><first>Charlotte</first><last>Selous</last></author>
-      <author><first>Kathy</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathy</first><last>McKeown</last></author>
       <pages>4765–4775</pages>
       <abstract>Gang-involved youth in cities such as Chicago sometimes post on social media to express their aggression towards rival gangs and previous research has demonstrated that a deep learning approach can predict aggression and loss in posts. To address the possibility of bias in this sensitive application, we developed an approach to systematically interpret the state of the art model. We found, surprisingly, that it frequently bases its predictions on stop words such as “a” or “on”, an approach that could harm social media users who have no aggressive intentions. To tackle this bias, domain experts annotated the rationales, highlighting words that explain why a tweet is labeled as “aggression”. These new annotations enable us to quantitatively measure how justified the model predictions are, and build models that drastically reduce bias. Our study shows that in high stake scenarios, accuracy alone cannot guarantee a good system and we need new evaluation methods.</abstract>
       <url hash="29372d20">D19-1483</url>
@@ -6134,7 +6134,7 @@
     <paper id="487">
       <title>Domain Adaptation for Person-Job Fit with Transferable Deep Global Match Network</title>
       <author><first>Shuqing</first><last>Bian</last></author>
-      <author><first>Wayne Xin</first><last>Zhao</last></author>
+      <author id="wayne-xin-zhao"><first>Wayne Xin</first><last>Zhao</last></author>
       <author><first>Yang</first><last>Song</last></author>
       <author><first>Tao</first><last>Zhang</last></author>
       <author><first>Ji-Rong</first><last>Wen</last></author>
@@ -6171,7 +6171,7 @@
       <author><first>Kristina</first><last>Gligorić</last></author>
       <author><first>Sean</first><last>Kross</last></author>
       <author><first>Michelle</first><last>Mazurek</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <pages>4831–4842</pages>
       <abstract>The readability of a digital text can influence people’s ability to learn new things about a range topics from digital resources (e.g., Wikipedia, WebMD). Readability also impacts search rankings, and is used to evaluate the performance of NLP systems. Despite this, we lack a thorough understanding of how to validly measure readability at scale, especially for domain-specific texts. In this work, we present a comparison of the validity of well-known readability measures and introduce a novel approach, Smart Cloze, which is designed to address shortcomings of existing measures. We compare these approaches across four different corpora: crowdworker-generated stories, Wikipedia articles, security and privacy advice, and health information. On these corpora, we evaluate the convergent and content validity of each measure, and detail tradeoffs in score precision, domain-specificity, and participant burden. These results provide a foundation for more accurate readability measurements and better evaluation of new natural-language-processing systems and tools.</abstract>
       <url hash="49d175fd">D19-1489</url>
@@ -6269,7 +6269,7 @@
     <paper id="497">
       <title>A Neural Citation Count Prediction Model based on Peer Review Text</title>
       <author><first>Siqing</first><last>Li</last></author>
-      <author><first>Wayne Xin</first><last>Zhao</last></author>
+      <author id="wayne-xin-zhao"><first>Wayne Xin</first><last>Zhao</last></author>
       <author><first>Eddy Jing</first><last>Yin</last></author>
       <author><first>Ji-Rong</first><last>Wen</last></author>
       <pages>4914–4924</pages>
@@ -6307,7 +6307,7 @@
     <paper id="500">
       <title>Question Answering for Privacy Policies: Combining Computational and Legal Perspectives</title>
       <author><first>Abhilasha</first><last>Ravichander</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <author><first>Shomir</first><last>Wilson</last></author>
       <author><first>Thomas</first><last>Norton</last></author>
       <author><first>Norman</first><last>Sadeh</last></author>
@@ -6562,7 +6562,7 @@
       <author><first>Jiateng</first><last>Xie</last></author>
       <author><first>Zaid</first><last>Sheikh</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
       <pages>5164–5174</pages>
       <abstract>Most state-of-the-art models for named entity recognition (NER) rely on the availability of large amounts of labeled data, making them challenging to extend to new, lower-resourced languages. However, there are now many proposed solutions to this problem involving either cross-lingual transfer learning, which learns from other highly resourced languages, or active learning, which efficiently selects effective training data based on model predictions. In this paper, we ask the question: given this recent progress, and some amount of human annotation, what is the most effective method for efficiently creating high-quality entity recognizers in under-resourced languages? Based on extensive experimentation using both simulated and real human annotation, we settle on a recipe of starting with a cross-lingual transferred model, then performing targeted annotation of only uncertain entity spans in the target language, minimizing annotator effort. Results demonstrate that cross-lingual transfer is a powerful tool when very little data can be annotated, but an entity-targeted annotation strategy can achieve competitive accuracy quickly, with just one-tenth of training data.</abstract>
       <url hash="af45f211">D19-1520</url>
@@ -6785,11 +6785,11 @@
       <author><first>Heyang</first><last>Er</last></author>
       <author><first>Sungrok</first><last>Shim</last></author>
       <author><first>Eric</first><last>Xue</last></author>
-      <author><first>Xi Victoria</first><last>Lin</last></author>
+      <author id="xi-victoria-lin"><first>Xi Victoria</first><last>Lin</last></author>
       <author><first>Tianze</first><last>Shi</last></author>
       <author><first>Caiming</first><last>Xiong</last></author>
       <author><first>Richard</first><last>Socher</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <pages>5338–5349</pages>
       <abstract>We focus on the cross-domain context-dependent text-to-SQL generation task. Based on the observation that adjacent natural language questions are often linguistically dependent and their corresponding SQL queries tend to overlap, we utilize the interaction history by editing the previous predicted query to improve the generation quality. Our editing mechanism views SQL as sequences and reuses generation results at the token level in a simple manner. It is flexible to change individual tokens and robust to error propagation. Furthermore, to deal with complex table structures in different domains, we employ an utterance-table encoder and a table-aware decoder to incorporate the context of the user utterance and the table schema. We evaluate our approach on the SParC dataset and demonstrate the benefit of editing compared with the state-of-the-art baselines which generate SQL from scratch. Our code is available at <url>https://github.com/ryanzhumich/sparc_atis_pytorch</url>.</abstract>
       <url hash="ac1e8afc">D19-1537</url>
@@ -6812,7 +6812,7 @@
       <author><first>Alexei</first><last>Baevski</last></author>
       <author><first>Sergey</first><last>Edunov</last></author>
       <author><first>Yinhan</first><last>Liu</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <author><first>Michael</first><last>Auli</last></author>
       <pages>5360–5369</pages>
       <abstract>We present a new approach for pretraining a bi-directional transformer model that provides significant performance gains across a variety of language understanding problems. Our model solves a cloze-style word reconstruction task, where each word is ablated and must be predicted given the rest of the text. Experiments demonstrate large performance gains on GLUE and new state of the art results on NER as well as constituency parsing benchmarks, consistent with BERT. We also present a detailed analysis of a number of factors that contribute to effective pretraining, including data domain and size, model capacity, and variations on the cloze objective.</abstract>
@@ -6848,7 +6848,7 @@
     <paper id="542">
       <title>Transfer Fine-Tuning: A <fixed-case>BERT</fixed-case> Case Study</title>
       <author><first>Yuki</first><last>Arase</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>5393–5404</pages>
       <abstract>A semantic equivalence assessment is defined as a task that assesses semantic equivalence in a sentence pair by binary judgment (i.e., paraphrase identification) or grading (i.e., semantic textual similarity measurement). It constitutes a set of tasks crucial for research on natural language understanding. Recently, BERT realized a breakthrough in sentence representation learning (Devlin et al., 2019), which is broadly transferable to various NLP tasks. While BERT’s performance improves by increasing its model size, the required computational power is an obstacle preventing practical applications from adopting the technology. Herein, we propose to inject phrasal paraphrase relations into BERT in order to generate suitable representations for semantic equivalence assessment instead of increasing the model size. Experiments on standard natural language understanding tasks confirm that our method effectively improves a smaller BERT model while maintaining the model size. The generated model exhibits superior performance compared to a larger BERT model on semantic equivalence assessment tasks. Furthermore, it achieves larger performance gains on tasks with limited training datasets for fine-tuning, which is a property desirable for transfer learning.</abstract>
       <url hash="778c76fc">D19-1542</url>
@@ -6883,7 +6883,7 @@
       <title>Learning Programmatic Idioms for Scalable Semantic Parsing</title>
       <author><first>Srinivasan</first><last>Iyer</last></author>
       <author><first>Alvin</first><last>Cheung</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>5426–5435</pages>
       <abstract>Programmers typically organize executable source code using high-level coding patterns or idiomatic structures such as nested loops, exception handlers and recursive blocks, rather than as individual code tokens. In contrast, state of the art (SOTA) semantic parsers still map natural language instructions to source code by building the code syntax tree one node at a time. In this paper, we introduce an iterative method to extract code idioms from large source code corpora by repeatedly collapsing most-frequent depth-2 subtrees of their syntax trees, and train semantic parsers to apply these idioms during decoding. Applying idiom-based decoding on a recent context-dependent semantic parsing task improves the SOTA by 2.2% BLEU score while reducing training time by more than 50%. This improved speed enables us to scale up the model by training on an extended training set that is 5<tex-math>\times</tex-math> larger, to further move up the SOTA by an additional 2.3% BLEU and 0.9% exact match. Finally, idioms also significantly improve accuracy of semantic parsing to SQL on the ATIS-SQL dataset, when training data is limited.</abstract>
       <url hash="3c430f87">D19-1545</url>
@@ -6895,7 +6895,7 @@
       <title><fixed-case>J</fixed-case>u<fixed-case>IC</fixed-case>e: A Large Scale Distantly Supervised Dataset for Open Domain Context-based Code Generation</title>
       <author><first>Rajas</first><last>Agashe</last></author>
       <author><first>Srinivasan</first><last>Iyer</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>5436–5446</pages>
       <abstract>Interactive programming with interleaved code snippet cells and natural language markdown is recently gaining popularity in the form of Jupyter notebooks, which accelerate prototyping and collaboration. To study code generation conditioned on a long context history, we present JuICe, a corpus of 1.5 million examples with a curated test set of 3.7K instances based on online programming assignments. Compared with existing contextual code generation datasets, JuICe provides refined human-curated data, open-domain code, and an order of magnitude more training data. Using JuICe, we train models for two tasks: (1) generation of the API call sequence in a code cell, and (2) full code cell generation, both conditioned on the NL-Code history up to a particular code cell. Experiments using current baseline code generation models show that both context and distant supervision aid in generation, and that the dataset is challenging for current systems.</abstract>
       <url hash="e9eeebcd">D19-1546</url>
@@ -6907,7 +6907,7 @@
       <author><first>Ziyu</first><last>Yao</last></author>
       <author><first>Yu</first><last>Su</last></author>
       <author><first>Huan</first><last>Sun</last></author>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <pages>5447–5458</pages>
       <abstract>As a promising paradigm, interactive semantic parsing has shown to improve both semantic parsing accuracy and user confidence in the results. In this paper, we propose a new, unified formulation of the interactive semantic parsing problem, where the goal is to design a model-based intelligent agent. The agent maintains its own state as the current predicted semantic parse, decides whether and where human intervention is needed, and generates a clarification question in natural language. A key part of the agent is a world model: it takes a percept (either an initial question or subsequent feedback from the user) and transitions to a new state. We then propose a simple yet remarkably effective instantiation of our framework, demonstrated on two text-to-SQL datasets (WikiSQL and Spider) with different state-of-the-art base semantic parsers. Compared to an existing interactive semantic parsing approach that treats the base parser as a black box, our approach solicits less user feedback but yields higher run-time accuracy.</abstract>
       <url hash="7d29600c">D19-1547</url>
@@ -6918,11 +6918,11 @@
     <paper id="548">
       <title>Modeling Graph Structure in Transformer for Better <fixed-case>AMR</fixed-case>-to-Text Generation</title>
       <author><first>Jie</first><last>Zhu</last></author>
-      <author><first>Junhui</first><last>Li</last></author>
+      <author id="junhui-li"><first>Junhui</first><last>Li</last></author>
       <author><first>Muhua</first><last>Zhu</last></author>
-      <author><first>Longhua</first><last>Qian</last></author>
+      <author id="longhua-qian"><first>Longhua</first><last>Qian</last></author>
       <author><first>Min</first><last>Zhang</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>5459–5468</pages>
       <abstract>Recent studies on AMR-to-text generation often formalize the task as a sequence-to-sequence (seq2seq) learning problem by converting an Abstract Meaning Representation (AMR) graph into a word sequences. Graph structures are further modeled into the seq2seq framework in order to utilize the structural information in the AMR graphs. However, previous approaches only consider the relations between directly connected concepts while ignoring the rich structure in AMR graphs. In this paper we eliminate such a strong limitation and propose a novel structure-aware self-attention approach to better model the relations between indirectly connected concepts in the state-of-the-art seq2seq model, i.e. the Transformer. In particular, a few different methods are explored to learn structural representations between two concepts. Experimental results on English AMR benchmark datasets show that our approach significantly outperforms the state-of-the-art with 29.66 and 31.82 BLEU scores on LDC2015E86 and LDC2017T10, respectively. To the best of our knowledge, these are the best results achieved so far by supervised models on the benchmarks.</abstract>
       <url hash="5c6aeba3">D19-1548</url>
@@ -6932,7 +6932,7 @@
     <paper id="549">
       <title>Syntax-Aware Aspect Level Sentiment Classification with Graph Attention Networks</title>
       <author><first>Binxuan</first><last>Huang</last></author>
-      <author><first>Kathleen</first><last>Carley</last></author>
+      <author id="kathleen-m-carley"><first>Kathleen</first><last>Carley</last></author>
       <pages>5469–5477</pages>
       <abstract>Aspect level sentiment classification aims to identify the sentiment expressed towards an aspect given a context sentence. Previous neural network based methods largely ignore the syntax structure in one sentence. In this paper, we propose a novel target-dependent graph attention network (TD-GAT) for aspect level sentiment classification, which explicitly utilizes the dependency relationship among words. Using the dependency graph, it propagates sentiment features directly from the syntactic context of an aspect target. In our experiments, we show our method outperforms multiple baselines with GloVe embeddings. We also demonstrate that using BERT representations further substantially boosts the performance.</abstract>
       <url hash="18605ce6">D19-1549</url>
@@ -6970,7 +6970,7 @@
       <author><first>Xiabing</first><last>Zhou</last></author>
       <author><first>Zhongqing</first><last>Wang</last></author>
       <author><first>Shoushan</first><last>Li</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <author><first>Min</first><last>Zhang</last></author>
       <pages>5499–5507</pages>
       <abstract>There have been a recent line of works to automatically predict the emotions of posts in social media. Existing approaches consider the posts individually and predict their emotions independently. Different from previous researches, we explore the dependence among relevant posts via the authors’ backgrounds, since the authors with similar backgrounds, e.g., gender, location, tend to express similar emotions. However, such personal attributes are not easy to obtain in most social media websites, and it is hard to capture attributes-aware words to connect similar people. Accordingly, we propose a Neural Personal Discrimination (NPD) approach to address above challenges by determining personal attributes from posts, and connecting relevant posts with similar attributes to jointly learn their emotions. In particular, we employ adversarial discriminators to determine the personal attributes, with attention mechanisms to aggregate attributes-aware words. In this way, social correlationship among different posts can be better addressed. Experimental results show the usefulness of personal attributes, and the effectiveness of our proposed NPD approach in capturing such personal attributes with significant gains over the state-of-the-art models.</abstract>
@@ -7010,7 +7010,7 @@
     <paper id="555">
       <title>Leveraging Structural and Semantic Correspondence for Attribute-Oriented Aspect Sentiment Discovery</title>
       <author><first>Zhe</first><last>Zhang</last></author>
-      <author><first>Munindar</first><last>Singh</last></author>
+      <author id="munindar-p-singh"><first>Munindar</first><last>Singh</last></author>
       <pages>5528–5538</pages>
       <abstract>Opinionated text often involves attributes such as authorship and location that influence the sentiments expressed for different aspects. We posit that structural and semantic correspondence is both prevalent in opinionated text, especially when associated with attributes, and crucial in accurately revealing its latent aspect and sentiment structure. However, it is not recognized by existing approaches. We propose Trait, an unsupervised probabilistic model that discovers aspects and sentiments from text and associates them with different attributes. To this end, Trait infers and leverages structural and semantic correspondence using a Markov Random Field. We show empirically that by incorporating attributes explicitly Trait significantly outperforms state-of-the-art baselines both by generating attribute profiles that accord with our intuitions, as shown via visualization, and yielding topics of greater semantic cohesion.</abstract>
       <url hash="212ade5e">D19-1555</url>
@@ -7033,7 +7033,7 @@
     </paper>
     <paper id="557">
       <title>Shallow Domain Adaptive Embeddings for Sentiment Analysis</title>
-      <author><first>Prathusha</first><last>K Sarma</last></author>
+      <author id="prathusha-kameswara-sarma"><first>Prathusha</first><last>K Sarma</last></author>
       <author><first>Yingyu</first><last>Liang</last></author>
       <author><first>William</first><last>Sethares</last></author>
       <pages>5549–5558</pages>
@@ -7062,7 +7062,7 @@
       <author><first>Yunlong</first><last>Liang</last></author>
       <author><first>Fandong</first><last>Meng</last></author>
       <author><first>Jinchao</first><last>Zhang</last></author>
-      <author><first>Jinan</first><last>Xu</last></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last></author>
       <author><first>Yufeng</first><last>Chen</last></author>
       <author><first>Jie</first><last>Zhou</last></author>
       <pages>5569–5580</pages>
@@ -7080,7 +7080,7 @@
       <author><first>Luo</first><last>Si</last></author>
       <author><first>Min</first><last>Zhang</last></author>
       <author><first>Xiaozhong</first><last>Liu</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>5581–5590</pages>
       <abstract>Recently, neural networks have shown promising results on Document-level Aspect Sentiment Classification (DASC). However, these approaches often offer little transparency w.r.t. their inner working mechanisms and lack interpretability. In this paper, to simulating the steps of analyzing aspect sentiment in a document by human beings, we propose a new Hierarchical Reinforcement Learning (HRL) approach to DASC. This approach incorporates clause selection and word selection strategies to tackle the data noise problem in the task of DASC. First, a high-level policy is proposed to select aspect-relevant clauses and discard noisy clauses. Then, a low-level policy is proposed to select sentiment-relevant words and discard noisy words inside the selected clauses. Finally, a sentiment rating predictor is designed to provide reward signals to guide both clause and word selection. Experimental results demonstrate the impressive effectiveness of the proposed approach to DASC over the state-of-the-art baselines.</abstract>
       <url hash="5db02d1c">D19-1560</url>
@@ -7109,7 +7109,7 @@
     </paper>
     <paper id="562">
       <title>Rethinking Attribute Representation and Injection for Sentiment Classification</title>
-      <author><first>Reinald Kim</first><last>Amplayo</last></author>
+      <author id="reinald-kim-amplayo"><first>Reinald Kim</first><last>Amplayo</last></author>
       <pages>5602–5613</pages>
       <abstract>Text attributes, such as user and product information in product reviews, have been used to improve the performance of sentiment classification models. The de facto standard method is to incorporate them as additional biases in the attention mechanism, and more performance gains are achieved by extending the model architecture. In this paper, we show that the above method is the least effective way to represent and inject attributes. To demonstrate this hypothesis, unlike previous models with complicated architectures, we limit our base model to a simple BiLSTM with attention classifier, and instead focus on how and where the attributes should be incorporated in the model. We propose to represent attributes as chunk-wise importance weight matrices and consider four locations in the model (i.e., embedding, encoding, attention, classifier) to inject attributes. Experiments show that our proposed method achieves significant improvements over the standard approach and that attention mechanism is the worst location to inject attributes, contradicting prior work. We also outperform the state-of-the-art despite our use of a simple base model. Finally, we show that these representations transfer well to other tasks. Model implementation and datasets are released here: <url>https://github.com/rktamplayo/CHIM</url>.</abstract>
       <url hash="b2258426">D19-1562</url>
@@ -7156,7 +7156,7 @@
       <author><first>Seunghak</first><last>Yu</last></author>
       <author><first>Alberto</first><last>Barrón-Cedeño</last></author>
       <author><first>Rostislav</first><last>Petrov</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>5636–5646</pages>
       <abstract>Propaganda aims at influencing people’s mindset with the purpose of advancing a specific agenda. Previous work has addressed propaganda detection at document level, typically labelling all articles from a propagandistic news outlet as propaganda. Such noisy gold labels inevitably affect the quality of any learning system trained on them. A further issue with most existing systems is the lack of explainability. To overcome these limitations, we propose a novel task: performing fine-grained analysis of texts by detecting all fragments that contain propaganda techniques as well as their type. In particular, we create a corpus of news articles manually annotated at fragment level with eighteen propaganda techniques and propose a suitable evaluation measure. We further design a novel multi-granularity neural network, and we show that it outperforms several strong BERT-based baselines.</abstract>
       <url hash="7f40ed17">D19-1565</url>
@@ -7169,7 +7169,7 @@
       <author><first>Dushyant Singh</first><last>Chauhan</last></author>
       <author><first>Md Shad</first><last>Akhtar</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>5647–5657</pages>
       <abstract>In recent times, multi-modal analysis has been an emerging and highly sought-after field at the intersection of natural language processing, computer vision, and speech processing. The prime objective of such studies is to leverage the diversified information, (e.g., textual, acoustic and visual), for learning a model. The effective interaction among these modalities often leads to a better system in terms of performance. In this paper, we introduce a recurrent neural network based approach for the multi-modal sentiment and emotion analysis. The proposed model learns the inter-modal interaction among the participating modalities through an auto-encoder mechanism. We employ a context-aware attention module to exploit the correspondence among the neighboring utterances. We evaluate our proposed approach for five standard multi-modal affect analysis datasets. Experimental results suggest the efficacy of the proposed model for both sentiment and emotion analysis over various existing state-of-the-art systems.</abstract>
       <url hash="adc288bb">D19-1566</url>
@@ -7190,7 +7190,7 @@
       <title>The Role of Pragmatic and Discourse Context in Determining Argument Impact</title>
       <author><first>Esin</first><last>Durmus</last></author>
       <author><first>Faisal</first><last>Ladhak</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>5668–5678</pages>
       <abstract>Research in the social sciences and psychology has shown that the persuasiveness of an argument depends not only the language employed, but also on attributes of the source/communicator, the audience, and the appropriateness and strength of the argument’s claims given the pragmatic and discourse context of the argument. Among these characteristics of persuasive arguments, prior work in NLP does not explicitly investigate the effect of the pragmatic and discourse context when determining argument quality. This paper presents a new dataset to initiate the study of this aspect of argumentation: it consists of a diverse collection of arguments covering 741 controversial topics and comprising over 47,000 claims. We further propose predictive models that incorporate the pragmatic and discourse context of argumentative claims and show that they outperform models that rely only on claim-specific linguistic features for predicting the perceived impact of individual claims within a particular line of argument.</abstract>
       <url hash="789c9e1b">D19-1568</url>
@@ -7216,7 +7216,7 @@
       <author><first>Lemao</first><last>Liu</last></author>
       <author><first>Guoping</first><last>Huang</last></author>
       <author><first>Conghui</first><last>Zhu</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <pages>5689–5695</pages>
       <abstract>Many Data Augmentation (DA) methods have been proposed for neural machine translation. Existing works measure the superiority of DA methods in terms of their performance on a specific test set, but we find that some DA methods do not exhibit consistent improvements across translation tasks. Based on the observation, this paper makes an initial attempt to answer a fundamental question: what benefits, which are consistent across different methods and tasks, does DA in general obtain? Inspired by recent theoretic advances in deep learning, the paper understands DA from two perspectives towards the generalization ability of a model: input sensitivity and prediction margin, which are defined independent of specific test set thereby may lead to findings with relatively low variance. Extensive experiments show that relatively consistent benefits across five DA methods and four translation tasks are achieved regarding both perspectives.</abstract>
       <url hash="2d5428fc">D19-1570</url>
@@ -7310,7 +7310,7 @@
       <author><first>Adina</first><last>Williams</last></author>
       <author><first>Damian</first><last>Blasi</last></author>
       <author><first>Lawrence</first><last>Wolf-Sonkin</last></author>
-      <author><first>Hanna</first><last>Wallach</last></author>
+      <author id="hanna-wallach"><first>Hanna</first><last>Wallach</last></author>
       <author><first>Ryan</first><last>Cotterell</last></author>
       <pages>5734–5739</pages>
       <abstract>Many of the world’s languages employ grammatical gender on the lexeme. For instance, in Spanish, house “casa” is feminine, whereas the word for paper “papel” is masculine. To a speaker of a genderless language, this categorization seems to exist with neither rhyme nor reason. But, is the association of nouns to gender classes truly arbitrary? In this work, we present the first large-scale investigation of the arbitrariness of gender assignment that uses canonical correlation analysis as a method for correlating the gender of inanimate nouns with their lexical semantic meaning. We find that the gender systems of 18 languages exhibit a significant correlation with an externally grounded definition of lexical semantics.</abstract>
@@ -7332,7 +7332,7 @@
     <paper id="579">
       <title>Automatically Inferring Gender Associations from Language</title>
       <author><first>Serina</first><last>Chang</last></author>
-      <author><first>Kathy</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathy</first><last>McKeown</last></author>
       <pages>5746–5752</pages>
       <abstract>In this paper, we pose the question: do people talk about women and men in different ways? We introduce two datasets and a novel integration of approaches for automatically inferring gender associations from language, discovering coherent word clusters, and labeling the clusters for the semantic concepts they represent. The datasets allow us to compare how people write about women and men in two different settings – one set draws from celebrity news and the other from student reviews of computer science professors. We demonstrate that there are large-scale differences in the ways that people talk about women and men and that these differences vary across domains. Human evaluations show that our methods significantly outperform strong baselines.</abstract>
       <url hash="666c3ee4">D19-1579</url>
@@ -7376,7 +7376,7 @@
       <author><first>Xiaolong</first><last>Jin</last></author>
       <author><first>Xiangbin</first><last>Meng</last></author>
       <author><first>Jiafeng</first><last>Guo</last></author>
-      <author><first>Xueqi</first><last>Cheng</last></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last></author>
       <pages>5766–5770</pages>
       <abstract>Syntactic relations are broadly used in many NLP tasks. For event detection, syntactic relation representations based on dependency tree can better capture the interrelations between candidate trigger words and related entities than sentence representations. But, existing studies only use first-order syntactic relations (i.e., the arcs) in dependency trees to identify trigger words. For this reason, this paper proposes a new method for event detection, which uses a dependency tree based graph convolution network with aggregative attention to explicitly model and aggregate multi-order syntactic representations in sentences. Experimental comparison with state-of-the-art baselines shows the superiority of the proposed method.</abstract>
       <url hash="2c76f42d">D19-1582</url>
@@ -7442,7 +7442,7 @@
       <author><first>Tsutomu</first><last>Hirao</last></author>
       <author><first>Kengo</first><last>Nakamura</last></author>
       <author><first>Hidetaka</first><last>Kamigaito</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <author><first>Masaaki</first><last>Nagata</last></author>
       <pages>5797–5802</pages>
       <abstract>Rhetorical Structure Theory (RST) parsing is crucial for many downstream NLP tasks that require a discourse structure for a text. Most of the previous RST parsers have been based on supervised learning approaches. That is, they require an annotated corpus of sufficient size and quality, and heavily rely on the language and domain dependent corpus. In this paper, we present two language-independent unsupervised RST parsing methods based on dynamic programming. The first one builds the optimal tree in terms of a dissimilarity score function that is defined for splitting a text span into smaller ones. The second builds the optimal tree in terms of a similarity score function that is defined for merging two adjacent spans into a large one. Experimental results on English and German RST treebanks showed that our parser based on span merging achieved the best score, around 0.8 F<tex-math>_1</tex-math> score, which is close to the scores of the previous supervised parsers.</abstract>
@@ -7454,8 +7454,8 @@
       <title><fixed-case>BERT</fixed-case> for Coreference Resolution: Baselines and Analysis</title>
       <author><first>Mandar</first><last>Joshi</last></author>
       <author><first>Omer</first><last>Levy</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
-      <author><first>Daniel</first><last>Weld</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="daniel-s-weld"><first>Daniel</first><last>Weld</last></author>
       <pages>5803–5808</pages>
       <abstract>We apply BERT to coreference resolution, achieving a new state of the art on the GAP (+11.5 F1) and OntoNotes (+3.9 F1) benchmarks. A qualitative analysis of model predictions indicates that, compared to ELMo and BERT-base, BERT-large is particularly better at distinguishing between related but distinct entities (e.g., President and CEO), but that there is still room for improvement in modeling document-level context, conversations, and mention paraphrasing. We will release all code and trained models upon publication.</abstract>
       <url hash="3f77a000">D19-1588</url>
@@ -7465,7 +7465,7 @@
     <paper id="589">
       <title>Linguistic Versus Latent Relations for Modeling Coherent Flow in Paragraphs</title>
       <author><first>Dongyeop</first><last>Kang</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>5809–5815</pages>
       <abstract>Generating a long, coherent text such as a paragraph requires a high-level control of different levels of relations between sentences (e.g., tense, coreference). We call such a logical connection between sentences as a (paragraph) flow. In order to produce a coherent flow of text, we explore two forms of intersentential relations in a paragraph: one is a human-created linguistical relation that forms a structure (e.g., discourse tree) and the other is a relation from latent representation learned from the sentences themselves. Our two proposed models incorporate each form of relations into document-level language models: the former is a supervised model that jointly learns a language model as well as discourse relation prediction, and the latter is an unsupervised model that is hierarchically conditioned by a recurrent neural network (RNN) over the latent information. Our proposed models with both forms of relations outperform the baselines in partially conditioned paragraph generation task. Our codes and data are publicly available.</abstract>
       <url hash="730db76e">D19-1589</url>
@@ -7478,7 +7478,7 @@
       <author><first>Kazuma</first><last>Kadowaki</last></author>
       <author><first>Ryu</first><last>Iida</last></author>
       <author><first>Kentaro</first><last>Torisawa</last></author>
-      <author><first>Jong-Hoon</first><last>Oh</last></author>
+      <author id="jong-hoon-oh"><first>Jong-Hoon</first><last>Oh</last></author>
       <author><first>Julien</first><last>Kloetzer</last></author>
       <pages>5816–5822</pages>
       <abstract>We propose new BERT-based methods for recognizing event causality such as “smoke cigarettes” –&gt; “die of lung cancer” written in web texts. In our methods, we grasp each annotator’s policy by training multiple classifiers, each of which predicts the labels given by a single annotator, and combine the resulting classifiers’ outputs to predict the final labels determined by majority vote. Furthermore, we investigate the effect of supplying background knowledge to our classifiers. Since BERT models are pre-trained with a large corpus, some sort of background knowledge for event causality may be learned during pre-training. Our experiments with a Japanese dataset suggest that this is actually the case: Performance improved when we pre-trained the BERT models with web texts containing a large number of event causalities instead of Wikipedia articles or randomly sampled web texts. However, this effect was limited. Therefore, we further improved performance by simply adding texts related to an input causality candidate as background knowledge to the input of the BERT models. We believe these findings indicate a promising future research direction.</abstract>
@@ -7499,7 +7499,7 @@
     </paper>
     <paper id="592">
       <title>Quantity doesn’t buy quality syntax with neural language models</title>
-      <author><first>Marten</first><last>van Schijndel</last></author>
+      <author id="marten-van-schijndel"><first>Marten</first><last>van Schijndel</last></author>
       <author><first>Aaron</first><last>Mueller</last></author>
       <author><first>Tal</first><last>Linzen</last></author>
       <pages>5831–5837</pages>
@@ -7515,7 +7515,7 @@
       <author><first>Artur</first><last>Kulmizev</last></author>
       <author><first>Felix</first><last>Hill</last></author>
       <author><first>Daniel M.</first><last>Low</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>5838–5845</pages>
       <abstract>Representational Similarity Analysis (RSA) is a technique developed by neuroscientists for comparing activity patterns of different measurement modalities (e.g., fMRI, electrophysiology, behavior). As a framework, RSA has several advantages over existing approaches to interpretation of language encoders based on probing or diagnostic classification: namely, it does not require large training samples, is not prone to overfitting, and it enables a more transparent comparison between the representational geometries of different models and modalities. We demonstrate the utility of RSA by establishing a previously unknown correspondence between widely-employed pretrained language encoders and human processing difficulty via eye-tracking data, showcasing its potential in the interpretability toolbox for neural models.</abstract>
       <url hash="eb4083cc">D19-1593</url>
@@ -7525,10 +7525,10 @@
     </paper>
     <paper id="594">
       <title>Text Genre and Training Data Size in Human-like Parsing</title>
-      <author><first>John</first><last>Hale</last></author>
+      <author id="john-hale"><first>John</first><last>Hale</last></author>
       <author><first>Adhiguna</first><last>Kuncoro</last></author>
-      <author><first>Keith</first><last>Hall</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="keith-hall"><first>Keith</first><last>Hall</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <author><first>Jonathan</first><last>Brennan</last></author>
       <pages>5846–5852</pages>
       <abstract>Domain-specific training typically makes NLP systems work better. We show that this extends to cognitive modeling as well by relating the states of a neural phrase-structure parser to electrophysiological measures from human participants. These measures were recorded as participants listened to a spoken recitation of the same literary text that was supplied as input to the neural parser. Given more training data, the system derives a better cognitive model — but only when the training examples come from the same textual genre. This finding is consistent with the idea that humans adapt syntactic expectations to particular genres during language comprehension (Kaan and Chun, 2018; Branigan and Pickering, 2017).</abstract>
@@ -7569,7 +7569,7 @@
       <author><first>Yuang</first><last>Wei</last></author>
       <author><first>Gong</first><last>Cheng</last></author>
       <author><first>Lin</first><last>Zhou</last></author>
-      <author><first>Xinyu</first><last>Dai</last></author>
+      <author id="xinyu-dai"><first>Xinyu</first><last>Dai</last></author>
       <author><first>Yuzhong</first><last>Qu</last></author>
       <pages>5866–5871</pages>
       <abstract>Scenario-based question answering (SQA) has attracted increasing research attention. It typically requires retrieving and integrating knowledge from multiple sources, and applying general knowledge to a specific case described by a scenario. SQA widely exists in the medical, geography, and legal domains—both in practice and in the exams. In this paper, we introduce the GeoSQA dataset. It consists of 1,981 scenarios and 4,110 multiple-choice questions in the geography domain at high school level, where diagrams (e.g., maps, charts) have been manually annotated with natural language descriptions to benefit NLP research. Benchmark results on a variety of state-of-the-art methods for question answering, textual entailment, and reading comprehension demonstrate the unique challenges presented by SQA for future research.</abstract>
@@ -7648,7 +7648,7 @@
     </paper>
     <paper id="603">
       <title>Answering Conversational Questions on Structured Data without Logical Forms</title>
-      <author><first>Thomas</first><last>Mueller</last></author>
+      <author id="thomas-mueller"><first>Thomas</first><last>Mueller</last></author>
       <author><first>Francesco</first><last>Piccinno</last></author>
       <author><first>Peter</first><last>Shaw</last></author>
       <author><first>Massimo</first><last>Nicosia</last></author>
@@ -7687,9 +7687,9 @@
     <paper id="606">
       <title><fixed-case>Q</fixed-case>uoref: A Reading Comprehension Dataset with Questions Requiring Coreferential Reasoning</title>
       <author><first>Pradeep</first><last>Dasigi</last></author>
-      <author><first>Nelson F.</first><last>Liu</last></author>
+      <author id="nelson-f-liu"><first>Nelson F.</first><last>Liu</last></author>
       <author><first>Ana</first><last>Marasović</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <author><first>Matt</first><last>Gardner</last></author>
       <pages>5925–5932</pages>
       <abstract>Machine comprehension of texts longer than a single sentence often requires coreference resolution. However, most current reading comprehension benchmarks do not contain complex coreferential phenomena and hence fail to evaluate the ability of models to resolve coreference. We present a new crowdsourced dataset containing more than 24K span-selection questions that require resolving coreference among entities in over 4.7K English paragraphs from Wikipedia. Obtaining questions focused on such phenomena is challenging, because it is hard to avoid lexical cues that shortcut complex reasoning. We deal with this issue by using a strong baseline model as an adversary in the crowdsourcing loop, which helps crowdworkers avoid writing questions with exploitable surface cues. We show that state-of-the-art reading comprehension models perform significantly worse than humans on this benchmark—the best model performance is 70.5 F1, while the estimated human performance is 93.4 F1.</abstract>
@@ -7736,9 +7736,9 @@
     </paper>
     <paper id="610">
       <title>A Gated Self-attention Memory Network for Answer Selection</title>
-      <author><first>Tuan</first><last>Lai</last></author>
-      <author><first>Quan Hung</first><last>Tran</last></author>
-      <author><first>Trung</first><last>Bui</last></author>
+      <author id="tuan-lai"><first>Tuan</first><last>Lai</last></author>
+      <author id="quan-hung-tran"><first>Quan Hung</first><last>Tran</last></author>
+      <author id="trung-bui"><first>Trung</first><last>Bui</last></author>
       <author><first>Daisuke</first><last>Kihara</last></author>
       <pages>5953–5959</pages>
       <abstract>Answer selection is an important research problem, with applications in many areas. Previous deep learning based approaches for the task mainly adopt the Compare-Aggregate architecture that performs word-level comparison followed by aggregation. In this work, we take a departure from the popular Compare-Aggregate architecture, and instead, propose a new gated self-attention memory network for the task. Combined with a simple transfer learning technique from a large-scale online corpus, our model outperforms previous methods by a large margin, achieving new state-of-the-art results on two standard answer selection datasets: TrecQA and WikiQA.</abstract>
@@ -7784,7 +7784,7 @@
     <paper id="614">
       <title>Generating Highly Relevant Questions</title>
       <author><first>Jiazuo</first><last>Qiu</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <pages>5983–5987</pages>
       <abstract>The neural seq2seq based question generation (QG) is prone to generating generic and undiversified questions that are poorly relevant to the given passage and target answer. In this paper, we propose two methods to address the issue. (1) By a partial copy mechanism, we prioritize words that are morphologically close to words in the input passage when generating questions; (2) By a QA-based reranker, from the n-best list of question candidates, we select questions that are preferred by both the QA and QG model. Experiments and analyses demonstrate that the proposed two methods substantially improve the relevance of generated questions to passages and answers.</abstract>
       <url hash="f345c434">D19-1614</url>
@@ -7852,7 +7852,7 @@
       <title>Countering the Effects of Lead Bias in News Summarization via Multi-Stage Training and Auxiliary Losses</title>
       <author><first>Matt</first><last>Grenander</last></author>
       <author><first>Yue</first><last>Dong</last></author>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <author><first>Annie</first><last>Louis</last></author>
       <pages>6019–6024</pages>
       <abstract>Sentence position is a strong feature for news summarization, since the lead often (but not always) summarizes the key points of the article. In this paper, we show that recent neural systems excessively exploit this trend, which although powerful for many inputs, is also detrimental when summarizing documents where important content should be extracted from later parts of the article. We propose two techniques to make systems sensitive to the importance of content in different parts of the article. The first technique employs ‘unbiased’ data; i.e., randomly shuffled sentences of the source document, to pretrain the model. The second technique uses an auxiliary ROUGE-based loss that encourages the model to distribute importance scores throughout a document by mimicking sentence-level ROUGE scores on the training data. We show that these techniques significantly improve the performance of a competitive reinforcement learning based extractive system, with the auxiliary loss being more powerful than pretraining.</abstract>
@@ -7864,8 +7864,8 @@
     <paper id="621">
       <title>Learning Rhyming Constraints using Structured Adversaries</title>
       <author><first>Harsh</first><last>Jhamtani</last></author>
-      <author><first>Sanket Vaibhav</first><last>Mehta</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
+      <author id="sanket-vaibhav-mehta"><first>Sanket Vaibhav</first><last>Mehta</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
       <author><first>Taylor</first><last>Berg-Kirkpatrick</last></author>
       <pages>6025–6031</pages>
       <abstract>Existing recurrent neural language models often fail to capture higher-level structure present in text: for example, rhyming patterns present in poetry. Much prior work on poetry generation uses manually defined constraints which are satisfied during decoding using either specialized decoding procedures or rejection sampling. The rhyming constraints themselves are typically not learned by the generator. We propose an alternate approach that uses a structured discriminator to learn a poetry generator that directly captures rhyming constraints in a generative adversarial setup. By causing the discriminator to compare poems based only on a learned similarity matrix of pairs of line ending words, the proposed approach is able to successfully learn rhyming patterns in two different English poetry datasets (Sonnet and Limerick) without explicitly being provided with any phonetic information</abstract>
@@ -7959,7 +7959,7 @@
     <paper id="629">
       <title><fixed-case>WIQA</fixed-case>: A dataset for “What if...” reasoning over procedural text</title>
       <author><first>Niket</first><last>Tandon</last></author>
-      <author><first>Bhavana</first><last>Dalvi</last></author>
+      <author id="bhavana-dalvi"><first>Bhavana</first><last>Dalvi</last></author>
       <author><first>Keisuke</first><last>Sakaguchi</last></author>
       <author><first>Peter</first><last>Clark</last></author>
       <author><first>Antoine</first><last>Bosselut</last></author>
@@ -7973,7 +7973,7 @@
     <paper id="630">
       <title>Evaluating <fixed-case>BERT</fixed-case> for natural language inference: A case study on the <fixed-case>C</fixed-case>ommitment<fixed-case>B</fixed-case>ank</title>
       <author><first>Nanjiang</first><last>Jiang</last></author>
-      <author><first>Marie-Catherine</first><last>de Marneffe</last></author>
+      <author id="marie-catherine-de-marneffe"><first>Marie-Catherine</first><last>de Marneffe</last></author>
       <pages>6086–6091</pages>
       <abstract>Natural language inference (NLI) datasets (e.g., MultiNLI) were collected by soliciting hypotheses for a given premise from annotators. Such data collection led to annotation artifacts: systems can identify the premise-hypothesis relationship without observing the premise (e.g., negation in hypothesis being indicative of contradiction). We address this problem by recasting the CommitmentBank for NLI, which contains items involving reasoning over the extent to which a speaker is committed to complements of clause-embedding verbs under entailment-canceling environments (conditional, negation, modal and question). Instead of being constructed to stand in certain relationships with the premise, hypotheses in the recast CommitmentBank are the complements of the clause-embedding verb in each premise, leading to no annotation artifacts in the hypothesis. A state-of-the-art BERT-based model performs well on the CommitmentBank with 85% F1. However analysis of model behavior shows that the BERT models still do not capture the full complexity of pragmatic reasoning, nor encode some of the linguistic generalizations, highlighting room for improvement.</abstract>
       <url hash="752b0a5a">D19-1630</url>
@@ -7997,7 +7997,7 @@
     </paper>
     <paper id="632">
       <title>The <fixed-case>FLORES</fixed-case> Evaluation Datasets for Low-Resource Machine Translation: <fixed-case>N</fixed-case>epali–<fixed-case>E</fixed-case>nglish and <fixed-case>S</fixed-case>inhala–<fixed-case>E</fixed-case>nglish</title>
-      <author><first>Francisco</first><last>Guzmán</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzmán</last></author>
       <author><first>Peng-Jen</first><last>Chen</last></author>
       <author><first>Myle</first><last>Ott</last></author>
       <author><first>Juan</first><last>Pino</last></author>
@@ -8018,7 +8018,7 @@
       <author><first>Marjan</first><last>Ghazvininejad</last></author>
       <author><first>Omer</first><last>Levy</last></author>
       <author><first>Yinhan</first><last>Liu</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>6112–6121</pages>
       <abstract>Most machine translation systems generate text autoregressively from left to right. We, instead, use a masked language modeling objective to train a model to predict any subset of the target words, conditioned on both the input text and a partially masked target translation. This approach allows for efficient iterative decoding, where we first predict all of the target words non-autoregressively, and then repeatedly mask out and regenerate the subset of words that the model is least confident about. By applying this strategy for a constant number of iterations, our model improves state-of-the-art performance levels for non-autoregressive and parallel decoding translation models by over 4 BLEU on average. It is also able to reach within about 1 BLEU point of a typical left-to-right transformer model, while decoding significantly faster.</abstract>
       <url hash="1c166c44">D19-1633</url>
@@ -8028,7 +8028,7 @@
     <paper id="634">
       <title>Learning to Copy for Automatic Post-Editing</title>
       <author><first>Xuancheng</first><last>Huang</last></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <author><first>Huanbo</first><last>Luan</last></author>
       <author><first>Jingfang</first><last>Xu</last></author>
       <author><first>Maosong</first><last>Sun</last></author>
@@ -8042,7 +8042,7 @@
       <title>Exploring Human Gender Stereotypes with Word Association Test</title>
       <author><first>Yupei</first><last>Du</last></author>
       <author><first>Yuanbin</first><last>Wu</last></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <pages>6133–6143</pages>
       <abstract>Word embeddings have been widely used to study gender stereotypes in texts. One key problem regarding existing bias scores is to evaluate their validities: do they really reflect true bias levels? For a small set of words (e.g. occupations), we can rely on human annotations or external data. However, for most words, evaluating the correctness of them is still an open problem. In this work, we utilize word association test, which contains rich types of word connections annotated by human participants, to explore how gender stereotypes spread within our minds. Specifically, we use random walk on word association graph to derive bias scores for a large amount of words. Experiments show that these bias scores correlate well with bias in the real world. More importantly, comparing with word-embedding-based bias scores, it provides a different perspective on gender stereotypes in words.</abstract>
       <url hash="9d10fbee">D19-1635</url>
@@ -8078,8 +8078,8 @@
     </paper>
     <paper id="638">
       <title>Set to Ordered Text: Generating Discharge Instructions from Medical Billing Codes</title>
-      <author><first>Litton</first><last>J Kurisinkel</last></author>
-      <author><first>Nancy</first><last>Chen</last></author>
+      <author id="litton-j-kurisinkel"><first>Litton</first><last>J Kurisinkel</last></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last></author>
       <pages>6165–6175</pages>
       <abstract>We present set to ordered text, a natural language generation task applied to automatically generating discharge instructions from admission ICD (International Classification of Diseases) codes. This task differs from other natural language generation tasks in the following ways: (1) The input is a set of identifiable entities (ICD codes) where the relations between individual entity are not explicitly specified. (2) The output text is not a narrative description (e.g. news articles) composed from the input. Rather, inferences are made from the input (symptoms specified in ICD codes) to generate the output (instructions). (3) There is an optimal order in which each sentence (instruction) should appear in the output. Unlike most other tasks, neither the input (ICD codes) nor their corresponding symptoms appear in the output, so the ordering of the output instructions needs to be learned in an unsupervised fashion. Based on clinical intuition, we hypothesize that each instruction in the output is mapped to a subset of ICD codes specified in the input. We propose a neural architecture that jointly models (a) subset selection: choosing relevant subsets from a set of input entities; (b) content ordering: learning the order of instructions; and (c) text generation: representing the instructions corresponding to the selected subsets in natural language. In addition, we penalize redundancy during beam search to improve tractability for long text generation. Our model outperforms baseline models in BLEU scores and human evaluation. We plan to extend this work to other tasks such as recipe generation from ingredients.</abstract>
       <url hash="3e5675db">D19-1638</url>
@@ -8322,7 +8322,7 @@
     </paper>
     <paper id="658">
       <title>A Robust Self-Learning Framework for Cross-Lingual Text Classification</title>
-      <author><first>Xin</first><last>Dong</last></author>
+      <author id="xin-luna-dong"><first>Xin</first><last>Dong</last></author>
       <author><first>Gerard</first><last>de Melo</last></author>
       <pages>6306–6310</pages>
       <abstract>Based on massive amounts of data, recent pretrained contextual representation models have made significant strides in advancing a number of different English NLP tasks. However, for other languages, relevant training data may be lacking, while state-of-the-art deep learning methods are known to be data-hungry. In this paper, we present an elegantly simple robust self-learning framework to include unlabeled non-English samples in the fine-tuning process of pretrained multilingual representation models. We leverage a multilingual model’s own predictions on unlabeled non-English data in order to obtain additional information that can be used during further fine-tuning. Compared with original multilingual models and other cross-lingual classification models, we observe significant gains in effectiveness on document and sentiment classification for a range of diverse languages.</abstract>
@@ -8343,7 +8343,7 @@
       <title>Label Embedding using Hierarchical Structure of Labels for <fixed-case>T</fixed-case>witter Classification</title>
       <author><first>Taro</first><last>Miyazaki</last></author>
       <author><first>Kiminobu</first><last>Makino</last></author>
-      <author><first>Yuka</first><last>Takei</last></author>
+      <author id="yuka-takei"><first>Yuka</first><last>Takei</last></author>
       <author><first>Hiroki</first><last>Okamoto</last></author>
       <author><first>Jun</first><last>Goto</last></author>
       <pages>6317–6322</pages>
@@ -8370,7 +8370,7 @@
       <author><first>Yova</first><last>Kementchedjhieva</last></author>
       <author><first>Yanai</first><last>Elazar</last></author>
       <author><first>Desmond</first><last>Elliott</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>6330–6335</pages>
       <abstract>Elazar and Goldberg (2018) showed that protected attributes can be extracted from the representations of a debiased neural network for mention detection at above-chance levels, by evaluating a diagnostic classifier on a held-out subsample of the data it was trained on. We revisit their experiments and conduct a series of follow-up experiments showing that, in fact, the diagnostic classifier generalizes poorly to both new in-domain samples and new domains, indicating that it relies on correlations specific to their particular data sample. We further show that a diagnostic classifier trained on the biased baseline neural network also does not generalize to new samples. In other words, the biases detected in Elazar and Goldberg (2018) seem restricted to their particular data sample, and would therefore not bias the decisions of the model on new samples, whether in-domain or out-of-domain. In light of this, we discuss better methodologies for detecting bias in our models.</abstract>
       <url hash="683afcbf">D19-1662</url>
@@ -8395,7 +8395,7 @@
       <author><first>Marshall</first><last>White</last></author>
       <author><first>Eva</first><last>Sharma</last></author>
       <author><first>Ruisi</first><last>Su</last></author>
-      <author><first>Prafulla Kumar</first><last>Choubey</last></author>
+      <author id="prafulla-kumar-choubey"><first>Prafulla Kumar</first><last>Choubey</last></author>
       <author><first>Ruihong</first><last>Huang</last></author>
       <author><first>Lu</first><last>Wang</last></author>
       <pages>6343–6349</pages>
@@ -8570,7 +8570,7 @@
       <title>Using Clinical Notes with Time Series Data for <fixed-case>ICU</fixed-case> Management</title>
       <author><first>Swaraj</first><last>Khadanga</last></author>
       <author><first>Karan</first><last>Aggarwal</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <author><first>Jaideep</first><last>Srivastava</last></author>
       <pages>6432–6437</pages>
       <abstract>Monitoring patients in ICU is a challenging and high-cost task. Hence, predicting the condition of patients during their ICU stay can help provide better acute care and plan the hospital’s resources. There has been continuous progress in machine learning research for ICU management, and most of this work has focused on using time series signals recorded by ICU instruments. In our work, we show that adding clinical notes as another modality improves the performance of the model for three benchmark tasks: in-hospital mortality prediction, modeling decompensation, and length of stay forecasting that play an important role in ICU management. While the time-series data is measured at regular intervals, doctor notes are charted at irregular times, making it challenging to model them together. We propose a method to model them jointly, achieving considerable improvement across benchmark tasks over baseline time-series model.</abstract>
@@ -8582,7 +8582,7 @@
       <title>Spelling-Aware Construction of Macaronic Texts for Teaching Foreign-Language Vocabulary</title>
       <author><first>Adithya</first><last>Renduchintala</last></author>
       <author><first>Philipp</first><last>Koehn</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>6438–6443</pages>
       <abstract>We present a machine foreign-language teacher that modifies text in a student’s native language (L1) by replacing some word tokens with glosses in a foreign language (L2), in such a way that the student can acquire L2 vocabulary simply by reading the resulting macaronic text. The machine teacher uses no supervised data from human students. Instead, to guide the machine teacher’s choice of which words to replace, we equip a cloze language model with a training procedure that can incrementally learn representations for novel words, and use this model as a proxy for the word guessing and learning ability of real human students. We use Mechanical Turk to evaluate two variants of the student model: (i) one that generates a representation for a novel word using only surrounding context and (ii) an extension that also uses the spelling of the novel word.</abstract>
       <url hash="453492fa">D19-1679</url>
@@ -8663,7 +8663,7 @@ In this hands-on tutorial, we take a closer look at the challenges from these co
       <title>Processing and Understanding Mixed Language Data</title>
       <author><first>Monojit</first><last>Choudhury</last></author>
       <author><first>Anirudh</first><last>Srinivasan</last></author>
-      <author><first>Sandipan</first><last>Dandapat</last></author>
+      <author id="sandipan-dandapat"><first>Sandipan</first><last>Dandapat</last></author>
       <abstract>Multilingual communities exhibit code-mixing, that is, mixing of two or more socially stable languages in a single conversation, sometimes even in a single utterance. This phenomenon has been widely studied by linguists and interaction scientists in the spoken language of such communities. However, with the prevalence of social media and other informal interactive platforms, code-switching is now also ubiquitously observed in user-generated text. As multilingual communities are more the norm from a global perspective, it becomes essential that code-switched text and speech are adequately handled by language technologies and NUIs.
 
 Code-mixing is extremely prevalent in all multilingual societies. Current studies have shown that as much as 20% of user generated content from some geographies, like South Asia, parts of Europe, and Singapore, are code-mixed. Thus, it is very important to handle code-mixed content as a part of NLP systems and applications for these geographies.
@@ -8717,7 +8717,7 @@ As a gentle start, we will briefly introduce the background of deep learning bas
       <title>Graph-based Deep Learning in Natural Language Processing</title>
       <author><first>Shikhar</first><last>Vashishth</last></author>
       <author><first>Naganand</first><last>Yadati</last></author>
-      <author><first>Partha</first><last>Talukdar</last></author>
+      <author id="partha-talukdar"><first>Partha</first><last>Talukdar</last></author>
       <abstract>This tutorial aims to introduce recent advances in graph-based deep learning techniques such as Graph Convolutional Networks (GCNs) for Natural Language Processing (NLP). It provides a brief introduction to deep learning methods on non-Euclidean domains such as graphs and justifies their relevance in NLP. It then covers recent advances in applying graph-based deep learning methods for various NLP tasks, such as semantic role labeling, machine translation, relationship extraction, and many more.</abstract>
       <bibkey>vashishth-etal-2019-graph</bibkey>
     </paper>
@@ -8736,7 +8736,7 @@ A plethora of methods have been proposed to emphasize specific lexico-semantic r
     <meta>
       <booktitle>Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP): System Demonstrations</booktitle>
       <url hash="e8f8d220">D19-3</url>
-      <editor><first>Sebastian</first><last>Padó</last></editor>
+      <editor id="sebastian-pado"><first>Sebastian</first><last>Padó</last></editor>
       <editor><first>Ruihong</first><last>Huang</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Hong Kong, China</address>
@@ -8804,7 +8804,7 @@ A plethora of methods have been proposed to emphasize specific lexico-semantic r
     <paper id="5">
       <title>Automatic Taxonomy Induction and Expansion</title>
       <author><first>Nicolas Rodolfo</first><last>Fauceglia</last></author>
-      <author><first>Alfio</first><last>Gliozzo</last></author>
+      <author id="alfio-gliozzo"><first>Alfio</first><last>Gliozzo</last></author>
       <author><first>Sarthak</first><last>Dash</last></author>
       <author><first>Md. Faisal Mahbub</first><last>Chowdhury</last></author>
       <author><first>Nandana</first><last>Mihindukulasooriya</last></author>
@@ -8821,12 +8821,12 @@ A plethora of methods have been proposed to emphasize specific lexico-semantic r
       <author><first>Andrzej</first><last>Sakrajda</last></author>
       <author><first>Anthony</first><last>Ferritto</last></author>
       <author><first>Lin</first><last>Pan</last></author>
-      <author><first>Michael</first><last>Glass</last></author>
+      <author id="michael-glass"><first>Michael</first><last>Glass</last></author>
       <author><first>Vittorio</first><last>Castelli</last></author>
       <author><first>J. William</first><last>Murdock</last></author>
-      <author><first>Radu</first><last>Florian</last></author>
-      <author><first>Salim</first><last>Roukos</last></author>
-      <author><first>Avi</first><last>Sil</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
+      <author id="avirup-sil"><first>Avi</first><last>Sil</last></author>
       <pages>31–36</pages>
       <abstract>This paper introduces a novel orchestration framework, called CFO (Computation Flow Orchestrator), for building, experimenting with, and deploying interactive NLP (Natural Language Processing) and IR (Information Retrieval) systems to production environments. We then demonstrate a question answering system built using this framework which incorporates state-of-the-art BERT based MRC (Machine Reading Com- prehension) with IR components to enable end-to-end answer retrieval. Results from the demo system are shown to be high quality in both academic and industry domain specific settings. Finally, we discuss best practices when (pre-)training BERT based MRC models for production systems. Screencast links: - Short video (&lt; 3 min): http: //ibm.biz/gaama_demo - Supplementary long video (&lt; 13 min): <url>http://ibm.biz/gaama_cfo_demo</url></abstract>
       <url hash="f2774ca3">D19-3006</url>
@@ -8852,7 +8852,7 @@ A plethora of methods have been proposed to emphasize specific lexico-semantic r
       <author><first>Liang-Hsin</first><last>Shen</last></author>
       <author><first>Pei-Lun</first><last>Tai</last></author>
       <author><first>Chao-Chung</first><last>Wu</last></author>
-      <author><first>Shou-De</first><last>Lin</last></author>
+      <author id="shou-de-lin"><first>Shou-De</first><last>Lin</last></author>
       <pages>43–48</pages>
       <abstract>An acrostic is a form of writing that the first token of each line (or other recurring features in the text) forms a meaningful sequence. In this paper we present a generalized acrostic generation system that can hide certain message in a flexible pattern specified by the users. Different from previous works that focus on rule-based solutions, here we adopt a neural- based sequence-to-sequence model to achieve this goal. Besides acrostic, users are also allowed to specify the rhyme and length of the output sequences. Based on our knowledge, this is the first neural-based natural language generation system that demonstrates the capability of performing micro-level control over output sentences.</abstract>
       <url hash="28d1322b">D19-3008</url>
@@ -8863,7 +8863,7 @@ A plethora of methods have been proposed to emphasize specific lexico-semantic r
       <title><fixed-case>EASSE</fixed-case>: Easier Automatic Sentence Simplification Evaluation</title>
       <author><first>Fernando</first><last>Alva-Manchego</last></author>
       <author><first>Louis</first><last>Martin</last></author>
-      <author><first>Carolina</first><last>Scarton</last></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>49–54</pages>
       <abstract>We introduce EASSE, a Python package aiming to facilitate and standardise automatic evaluation and comparison of Sentence Simplification (SS) systems. EASSE provides a single access point to a broad range of evaluation resources: standard automatic metrics for assessing SS outputs (e.g. SARI), word-level accuracy scores for certain simplification transformations, reference-independent quality estimation features (e.g. compression ratio), and standard test data for SS evaluation (e.g. TurkCorpus). Finally, EASSE generates easy-to-visualise reports on the various metrics and features above and on how a particular SS output fares against reference simplifications. Through experiments, we show that these functionalities allow for better comparison and understanding of the performance of SS systems.</abstract>
@@ -8876,7 +8876,7 @@ A plethora of methods have been proposed to emphasize specific lexico-semantic r
       <author><first>Eugene</first><last>Kharitonov</last></author>
       <author><first>Rahma</first><last>Chaabouni</last></author>
       <author><first>Diane</first><last>Bouchacourt</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <pages>55–60</pages>
       <abstract>There is renewed interest in simulating language emergence among deep neural agents that communicate to jointly solve a task, spurred by the practical aim to develop language-enabled interactive AIs, as well as by theoretical questions about the evolution of human language. However, optimizing deep architectures connected by a discrete communication channel (such as that in which language emerges) is technically challenging. We introduce EGG, a toolkit that greatly simplifies the implementation of emergent-language communication games. EGG’s modular design provides a set of building blocks that the user can combine to create new games, easily navigating the optimization and architecture space. We hope that the tool will lower the technical barrier, and encourage researchers from various backgrounds to do original work in this exciting area.</abstract>
       <url hash="971efee0">D19-3010</url>
@@ -8952,7 +8952,7 @@ A plethora of methods have been proposed to emphasize specific lexico-semantic r
     <paper id="15">
       <title><fixed-case>HARE</fixed-case>: a Flexible Highlighting Annotator for Ranking and Exploration</title>
       <author><first>Denis</first><last>Newman-Griffis</last></author>
-      <author><first>Eric</first><last>Fosler-Lussier</last></author>
+      <author id="eric-fosler-lussier"><first>Eric</first><last>Fosler-Lussier</last></author>
       <pages>85–90</pages>
       <abstract>Exploration and analysis of potential data sources is a significant challenge in the application of NLP techniques to novel information domains. We describe HARE, a system for highlighting relevant information in document collections to support ranking and triage, which provides tools for post-processing and qualitative analysis for model development and tuning. We apply HARE to the use case of narrative descriptions of mobility information in clinical data, and demonstrate its utility in comparing candidate embedding features. We provide a web-based interface for annotation visualization and document ranking, with a modular backend to support interoperability with existing annotation tools. Our system is available online at <url>https://github.com/OSU-slatelab/HARE</url>.</abstract>
       <url hash="cb70474c">D19-3015</url>
@@ -8988,7 +8988,7 @@ A plethora of methods have been proposed to emphasize specific lexico-semantic r
     <paper id="18">
       <title><fixed-case>INMT</fixed-case>: Interactive Neural Machine Translation Prediction</title>
       <author><first>Sebastin</first><last>Santy</last></author>
-      <author><first>Sandipan</first><last>Dandapat</last></author>
+      <author id="sandipan-dandapat"><first>Sandipan</first><last>Dandapat</last></author>
       <author><first>Monojit</first><last>Choudhury</last></author>
       <author><first>Kalika</first><last>Bali</last></author>
       <pages>103–108</pages>
@@ -9012,7 +9012,7 @@ A plethora of methods have been proposed to emphasize specific lexico-semantic r
       <title>Journalist-in-the-Loop: Continuous Learning as a Service for Rumour Analysis</title>
       <author><first>Twin</first><last>Karmakharm</last></author>
       <author><first>Nikolaos</first><last>Aletras</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
       <pages>115–120</pages>
       <abstract>Automatically identifying rumours in social media and assessing their veracity is an important task with downstream applications in journalism. A significant challenge is how to keep rumour analysis tools up-to-date as new information becomes available for particular rumours that spread in a social network. This paper presents a novel open-source web-based rumour analysis tool that can continuous learn from journalists. The system features a rumour annotation service that allows journalists to easily provide feedback for a given social media post through a web-based interface. The feedback allows the system to improve an underlying state-of-the-art neural network-based rumour classification model. The system can be easily integrated as a service into existing tools and platforms used by journalists using a REST API.</abstract>
       <url hash="8dd18a3a">D19-3020</url>
@@ -9021,7 +9021,7 @@ A plethora of methods have been proposed to emphasize specific lexico-semantic r
     </paper>
     <paper id="21">
       <title><fixed-case>LIDA</fixed-case>: Lightweight Interactive Dialogue Annotator</title>
-      <author><first>Edward</first><last>Collins</last></author>
+      <author id="edward-collins"><first>Edward</first><last>Collins</last></author>
       <author><first>Nikolai</first><last>Rozanov</last></author>
       <author><first>Bingbing</first><last>Zhang</last></author>
       <pages>121–126</pages>
@@ -9033,7 +9033,7 @@ A plethora of methods have been proposed to emphasize specific lexico-semantic r
     <paper id="22">
       <title><fixed-case>LINSPECTOR</fixed-case> <fixed-case>WEB</fixed-case>: A Multilingual Probing Suite for Word Representations</title>
       <author><first>Max</first><last>Eichler</last></author>
-      <author><first>Gözde Gül</first><last>Şahin</last></author>
+      <author id="gozde-gul-sahin"><first>Gözde Gül</first><last>Şahin</last></author>
       <author><first>Iryna</first><last>Gurevych</last></author>
       <pages>127–132</pages>
       <abstract>We present LINSPECTOR WEB , an open source multilingual inspector to analyze word representations. Our system provides researchers working in low-resource settings with an easily accessible web based probing tool to gain quick insights into their word embeddings especially outside of the English language. To do this we employ 16 simple linguistic probing tasks such as gender, case marking, and tense for a diverse set of 28 languages. We support probing of static word embeddings along with pretrained AllenNLP models that are commonly used for NLP downstream tasks such as named entity recognition, natural language inference and dependency parsing. The results are visualized in a polar chart and also provided as a table. LINSPECTOR WEB is available as an offline tool or at <url>https://linspector.ukp.informatik.tu-darmstadt.de</url>.</abstract>
@@ -9084,7 +9084,7 @@ A plethora of methods have been proposed to emphasize specific lexico-semantic r
     <paper id="26">
       <title>Multilingual, Multi-scale and Multi-layer Visualization of Intermediate Representations</title>
       <author><first>Carlos</first><last>Escolano</last></author>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
       <author><first>Elora</first><last>Lacroux</last></author>
       <author><first>Pere-Pau</first><last>Vázquez</last></author>
       <pages>151–156</pages>
@@ -9097,7 +9097,7 @@ A plethora of methods have been proposed to emphasize specific lexico-semantic r
       <title><fixed-case>MY</fixed-case>-<fixed-case>AKKHARA</fixed-case>: A <fixed-case>R</fixed-case>omanization-based <fixed-case>B</fixed-case>urmese (<fixed-case>M</fixed-case>yanmar) Input Method</title>
       <author><first>Chenchen</first><last>Ding</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>157–162</pages>
       <abstract>MY-AKKHARA is a method used to input Burmese texts encoded in the Unicode standard, based on commonly accepted Latin transcription. By using this method, arbitrary Burmese strings can be accurately inputted with 26 lowercase Latin letters. Meanwhile, the 26 uppercase Latin letters are designed as shortcuts of lowercase letter sequences. The frequency of Burmese characters is considered in MY-AKKHARA to realize an efficient keystroke distribution on a QWERTY keyboard. Given that the Unicode standard has not been extensively used in digitization of Burmese, we hope that MY-AKKHARA can contribute to the widespread use of Unicode in Myanmar and can provide a platform for smart input methods for Burmese in the future. An implementation of MY-AKKHARA running in Windows is released at <url>http://www2.nict.go.jp/astrec-att/member/ding/my-akkhara.html</url></abstract>
       <url hash="02dd073d">D19-3027</url>
@@ -9181,7 +9181,7 @@ A plethora of methods have been proposed to emphasize specific lexico-semantic r
     <paper id="33">
       <title><fixed-case>R</fixed-case>edcoat: A Collaborative Annotation Tool for Hierarchical Entity Typing</title>
       <author><first>Michael</first><last>Stewart</last></author>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last></author>
+      <author><first>Wei</first><last>Liu</last></author>
       <author><first>Rachel</first><last>Cardell-Oliver</last></author>
       <pages>193–198</pages>
       <abstract>We introduce Redcoat, a web-based annotation tool that supports collaborative hierarchical entity typing. As an annotation tool, Redcoat also facilitates knowledge elicitation by allowing the creation and continuous refinement of concept hierarchies during annotation. It aims to minimise not only annotation time but the time it takes for project creators to set up and distribute projects to annotators. Projects created using the web-based interface can be rapidly distributed to a list of email addresses. Redcoat handles the propagation of documents amongst annotators and automatically scales the annotation workload depending on the number of active annotators. In this paper we discuss these key features and outline Redcoat’s system architecture. We also highlight Redcoat’s unique benefits over existing annotation tools via a qualitative comparison.</abstract>
@@ -9193,7 +9193,7 @@ A plethora of methods have been proposed to emphasize specific lexico-semantic r
       <title><fixed-case>SEAGLE</fixed-case>: A Platform for Comparative Evaluation of Semantic Encoders for Information Retrieval</title>
       <author><first>Fabian David</first><last>Schmidt</last></author>
       <author><first>Markus</first><last>Dietsche</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <author><first>Goran</first><last>Glavaš</last></author>
       <pages>199–204</pages>
       <abstract>We introduce Seagle, a platform for comparative evaluation of semantic text encoding models on information retrieval (IR) tasks. Seagle implements (1) word embedding aggregators, which represent texts as algebraic aggregations of pretrained word embeddings and (2) pretrained semantic encoders, and allows for their comparative evaluation on arbitrary (monolingual and cross-lingual) IR collections. We benchmark Seagle’s models on monolingual document retrieval and cross-lingual sentence retrieval. Seagle functionality can be exploited via an easy-to-use web interface and its modular backend (micro-service architecture) can easily be extended with additional semantic search models.</abstract>
@@ -9267,8 +9267,8 @@ A plethora of methods have been proposed to emphasize specific lexico-semantic r
       <author><first>Georgi</first><last>Karadzhov</last></author>
       <author><first>Ramy</first><last>Baly</last></author>
       <author><first>Kareem</first><last>Darwish</last></author>
-      <author><first>James</first><last>Glass</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>223–228</pages>
       <abstract>We introduce Tanbih, a news aggregator with intelligent analysis tools to help readers understanding what’s behind a news story. Our system displays news grouped into events and generates media profiles that show the general factuality of reporting, the degree of propagandistic content, hyper-partisanship, leading political ideology, general frame of reporting, and stance with respect to various claims and topics of a news outlet. In addition, we automatically analyse each article to detect whether it is propagandistic and to determine its stance with respect to a number of controversial topics.</abstract>
       <url hash="c1126412">D19-3038</url>
@@ -9291,7 +9291,7 @@ A plethora of methods have been proposed to emphasize specific lexico-semantic r
     <paper id="40">
       <title><fixed-case>T</fixed-case>ell<fixed-case>M</fixed-case>e<fixed-case>W</fixed-case>hy: Learning to Explain Corrective Feedback for Second Language Learners</title>
       <author><first>Yi-Huei</first><last>Lai</last></author>
-      <author><first>Jason</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason</first><last>Chang</last></author>
       <pages>235–240</pages>
       <abstract>We present a writing prototype feedback system, TellMeWhy, to provide explanations of errors in submitted essays. In our approach, the sentence with corrections is analyzed to identify error types and problem words, aimed at customizing explanations based on the context of the error. The method involves learning the relation of errors and problem words, generating common feedback patterns, and extracting grammar patterns, collocations and example sentences. At run-time, a sentence with corrections is classified, and the problem word and template are identified to provide detailed explanations. Preliminary evaluation shows that the method has potential to improve existing commercial writing services.</abstract>
       <url hash="03597ee8">D19-3040</url>
@@ -9303,7 +9303,7 @@ A plethora of methods have been proposed to emphasize specific lexico-semantic r
       <author><first>Zhe</first><last>Zhao</last></author>
       <author><first>Hui</first><last>Chen</last></author>
       <author><first>Jinbin</first><last>Zhang</last></author>
-      <author><first>Xin</first><last>Zhao</last></author>
+      <author id="wayne-xin-zhao"><first>Xin</first><last>Zhao</last></author>
       <author><first>Tao</first><last>Liu</last></author>
       <author><first>Wei</first><last>Lu</last></author>
       <author><first>Xi</first><last>Chen</last></author>
@@ -9363,7 +9363,7 @@ A plethora of methods have been proposed to emphasize specific lexico-semantic r
       <editor><first>Alberto</first><last>Barrón-Cedeño</last></editor>
       <editor><first>Chris</first><last>Brew</last></editor>
       <editor><first>Chris</first><last>Leberknight</last></editor>
-      <editor><first>Preslav</first><last>Nakov</last></editor>
+      <editor id="preslav-nakov"><first>Preslav</first><last>Nakov</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Hong Kong, China</address>
       <month>November</month>
@@ -9393,7 +9393,7 @@ A plethora of methods have been proposed to emphasize specific lexico-semantic r
       <title>Detecting context abusiveness using hierarchical deep learning</title>
       <author><first>Ju-Hyoung</first><last>Lee</last></author>
       <author><first>Jun-U</first><last>Park</last></author>
-      <author><first>Jeong-Won</first><last>Cha</last></author>
+      <author id="jeong-won-cha"><first>Jeong-Won</first><last>Cha</last></author>
       <author><first>Yo-Sub</first><last>Han</last></author>
       <pages>10–19</pages>
       <abstract>Abusive text is a serious problem in social media and causes many issues among users as the number of users and the content volume increase. There are several attempts for detecting or preventing abusive text effectively. One simple yet effective approach is to use an abusive lexicon and determine the existence of an abusive word in text. This approach works well even when an abusive word is obfuscated. On the other hand, it is still a challenging problem to determine abusiveness in a text having no explicit abusive words. Especially, it is hard to identify sarcasm or offensiveness in context without any abusive words. We tackle this problem using an ensemble deep learning model. Our model consists of two parts of extracting local features and global features, which are crucial for identifying implicit abusiveness in context level. We evaluate our model using three benchmark data. Our model outperforms all the previous models for detecting abusiveness in a text data without abusive words. Furthermore, we combine our model and an abusive lexicon method. The experimental results show that our model has at least 4% better performance compared with the previous approaches for identifying text abusiveness in case of with/without abusive words.</abstract>
@@ -9417,7 +9417,7 @@ A plethora of methods have been proposed to emphasize specific lexico-semantic r
       <title>Identifying Nuances in Fake News vs. Satire: Using Semantic and Linguistic Cues</title>
       <author><first>Or</first><last>Levi</last></author>
       <author><first>Pedram</first><last>Hosseini</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <author><first>David</first><last>Broniatowski</last></author>
       <pages>31–35</pages>
       <abstract>The blurry line between nefarious fake news and protected-speech satire has been a notorious struggle for social media platforms. Further to the efforts of reducing exposure to misinformation on social media, purveyors of fake news have begun to masquerade as satire sites to avoid being demoted. In this work, we address the challenge of automatically classifying fake news versus satire. Previous work have studied whether fake news and satire can be distinguished based on language differences. Contrary to fake news, satire stories are usually humorous and carry some political or social message. We hypothesize that these nuances could be identified using semantic and linguistic cues. Consequently, we train a machine learning method using semantic representation, with a state-of-the-art contextual language model, and with linguistic features based on textual coherence metrics. Empirical evaluation attests to the merits of our approach compared to the language-based baseline and sheds light on the nuances between fake news and satire. As avenues for future work, we consider studying additional linguistic features related to the humor aspect, and enriching the data with current news events, to help identify a political or social message.</abstract>
@@ -9456,7 +9456,7 @@ A plethora of methods have been proposed to emphasize specific lexico-semantic r
       <title>Generating Sentential Arguments from Diverse Perspectives on Controversial Topic</title>
       <author><first>ChaeHun</first><last>Park</last></author>
       <author><first>Wonsuk</first><last>Yang</last></author>
-      <author><first>Jong</first><last>Park</last></author>
+      <author id="jong-c-park"><first>Jong</first><last>Park</last></author>
       <pages>56–65</pages>
       <abstract>Considering diverse aspects of an argumentative issue is an essential step for mitigating a biased opinion and making reasonable decisions. A related generation model can produce flexible results that cover a wide range of topics, compared to the retrieval-based method that may show unstable performance for unseen data. In this paper, we study the problem of generating sentential arguments from multiple perspectives, and propose a neural method to address this problem. Our model, ArgDiver (Argument generation model from diverse perspectives), in a way a conversational system, successfully generates high-quality sentential arguments. At the same time, the automatically generated arguments by our model show a higher diversity than those generated by any other baseline models. We believe that our work provides evidence for the potential of a good generation model in providing diverse perspectives on a controversial topic.</abstract>
       <url hash="cf19916d">D19-5007</url>
@@ -9478,7 +9478,7 @@ A plethora of methods have been proposed to emphasize specific lexico-semantic r
     <paper id="9">
       <title>Unraveling the Search Space of Abusive Language in <fixed-case>W</fixed-case>ikipedia with Dynamic Lexicon Acquisition</title>
       <author><first>Wei-Fan</first><last>Chen</last></author>
-      <author><first>Khalid</first><last>Al Khatib</last></author>
+      <author id="khalid-al-khatib"><first>Khalid</first><last>Al Khatib</last></author>
       <author><first>Matthias</first><last>Hagen</last></author>
       <author><first>Henning</first><last>Wachsmuth</last></author>
       <author><first>Benno</first><last>Stein</last></author>
@@ -9514,7 +9514,7 @@ A plethora of methods have been proposed to emphasize specific lexico-semantic r
       <author><first>Khushbu</first><last>Saxena</last></author>
       <author><first>Usama</first><last>Yaseen</last></author>
       <author><first>Thomas</first><last>Runkler</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>92–97</pages>
       <abstract>This paper describes our system (MIC-CIS) details and results of participation in the fine grained propaganda detection shared task 2019. To address the tasks of sentence (SLC) and fragment level (FLC) propaganda detection, we explore different neural architectures (e.g., CNN, LSTM-CRF and BERT) and extract linguistic (e.g., part-of-speech, named entity, readability, sentiment, emotion, etc.), layout and topical features. Specifically, we have designed multi-granularity and multi-tasking neural architectures to jointly perform both the sentence and fragment level propaganda detection. Additionally, we investigate different ensemble schemes such as majority-voting, relax-voting, etc. to boost overall system performance. Compared to the other participating systems, our submissions are ranked 3rd and 4th in FLC and SLC tasks, respectively.</abstract>
       <url hash="adcea830">D19-5012</url>
@@ -9661,7 +9661,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <booktitle>Proceedings of the Second Workshop on Economics and Natural Language Processing</booktitle>
       <url hash="ce97fb4f">D19-51</url>
       <editor><first>Udo</first><last>Hahn</last></editor>
-      <editor><first>Véronique</first><last>Hoste</last></editor>
+      <editor id="veronique-hoste"><first>Véronique</first><last>Hoste</last></editor>
       <editor><first>Zhu</first><last>Zhang</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Hong Kong</address>
@@ -9678,7 +9678,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <author><first>Berke</first><last>Oral</last></author>
       <author><first>Erdem</first><last>Emekligil</last></author>
       <author><first>Seçil</first><last>Arslan</last></author>
-      <author><first>Gülşen</first><last>Eryiğit</last></author>
+      <author id="gulsen-eryigit"><first>Gülşen</first><last>Eryiğit</last></author>
       <pages>1–9</pages>
       <abstract>In order to automate banking processes (e.g. payments, money transfers, foreign trade), we need to extract banking transactions from different types of mediums such as faxes, e-mails, and scanners. Banking orders may be considered as complex documents since they contain quite complex relations compared to traditional datasets used in relation extraction research. In this paper, we present our method to extract intersentential, nested and complex relations from banking orders, and introduce a relation extraction method based on maximal clique factorization technique. We demonstrate 11% error reduction over previous methods.</abstract>
       <url hash="1faa1911">D19-5101</url>
@@ -9687,7 +9687,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     </paper>
     <paper id="2">
       <title>Financial Event Extraction Using <fixed-case>W</fixed-case>ikipedia-Based Weak Supervision</title>
-      <author><first>Liat</first><last>Ein-Dor</last></author>
+      <author id="liat-ein-dor"><first>Liat</first><last>Ein-Dor</last></author>
       <author><first>Ariel</first><last>Gera</last></author>
       <author><first>Orith</first><last>Toledo-Ronen</last></author>
       <author><first>Alon</first><last>Halfon</last></author>
@@ -9704,7 +9704,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     </paper>
     <paper id="3">
       <title>A Time Series Analysis of Emotional Loading in Central Bank Statements</title>
-      <author><first>Sven</first><last>Buechel</last></author>
+      <author id="sven-buechel"><first>Sven</first><last>Buechel</last></author>
       <author><first>Simon</first><last>Junker</last></author>
       <author><first>Thore</first><last>Schlaak</last></author>
       <author><first>Claus</first><last>Michelsen</last></author>
@@ -9795,7 +9795,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <editor><first>Anoop</first><last>Kunchukuttan</last></editor>
       <editor><first>Nobushige</first><last>Doi</last></editor>
       <editor><first>Yusuke</first><last>Oda</last></editor>
-      <editor><first>Ondřej</first><last>Bojar</last></editor>
+      <editor id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></editor>
       <editor><first>Shantipriya</first><last>Parida</last></editor>
       <editor><first>Isao</first><last>Goto</last></editor>
       <editor><first>Hidaya</first><last>Mino</last></editor>
@@ -9846,7 +9846,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <title>Controlling <fixed-case>J</fixed-case>apanese Honorifics in <fixed-case>E</fixed-case>nglish-to-<fixed-case>J</fixed-case>apanese Neural Machine Translation</title>
       <author><first>Weston</first><last>Feely</last></author>
       <author><first>Eva</first><last>Hasler</last></author>
-      <author><first>Adrià</first><last>de Gispert</last></author>
+      <author id="adria-de-gispert"><first>Adrià</first><last>de Gispert</last></author>
       <pages>45–53</pages>
       <abstract>In the Japanese language different levels of honorific speech are used to convey respect, deference, humility, formality and social distance. In this paper, we present a method for controlling the level of formality of Japanese output in English-to-Japanese neural machine translation (NMT). By using heuristics to identify honorific verb forms, we classify Japanese sentences as being one of three levels of informal, polite, or formal speech in parallel text. The English source side is marked with a feature that identifies the level of honorific speech present in the Japanese target side. We use this parallel text to train an English-Japanese NMT model capable of producing Japanese translations in different honorific speech styles for the same English input sentence.</abstract>
       <url hash="54be80e3">D19-5203</url>
@@ -9870,7 +9870,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <author><first>Sahinur Rahman</first><last>Laskar</last></author>
       <author><first>Rohit Pratap</first><last>Singh</last></author>
       <author><first>Partha</first><last>Pakray</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>62–67</pages>
       <abstract>With the widespread use of Machine Trans-lation (MT) techniques, attempt to minimizecommunication gap among people from di-verse linguistic backgrounds. We have par-ticipated in Workshop on Asian Transla-tion 2019 (WAT2019) multi-modal translationtask. There are three types of submissiontrack namely, multi-modal translation, Hindi-only image captioning and text-only transla-tion for English to Hindi translation. The mainchallenge is to provide a precise MT output. The multi-modal concept incorporates textualand visual features in the translation task. Inthis work, multi-modal translation track re-lies on pre-trained convolutional neural net-works (CNN) with Visual Geometry Grouphaving 19 layered (VGG19) to extract imagefeatures and attention-based Neural MachineTranslation (NMT) system for translation. The merge-model of recurrent neural network(RNN) and CNN is used for the Hindi-onlyimage captioning. The text-only translationtrack is based on the transformer model of theNMT system. The official results evaluated atWAT2019 translation task, which shows thatour multi-modal NMT system achieved Bilin-gual Evaluation Understudy (BLEU) score20.37, Rank-based Intuitive Bilingual Eval-uation Score (RIBES) 0.642838, Adequacy-Fluency Metrics (AMFM) score 0.668260 forchallenge test data and BLEU score 40.55,RIBES 0.760080, AMFM score 0.770860 forevaluation test data in English to Hindi multi-modal translation respectively.</abstract>
       <url hash="e61089a9">D19-5205</url>
@@ -9885,7 +9885,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <author><first>Chenchen</first><last>Ding</last></author>
       <author><first>Atsushi</first><last>Fujita</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>68–75</pages>
       <abstract>This paper presents the NICT’s supervised and unsupervised machine translation systems for the WAT2019 Myanmar-English and Khmer-English translation tasks. For all the translation directions, we built state-of-the-art supervised neural (NMT) and statistical (SMT) machine translation systems, using monolingual data cleaned and normalized. Our combination of NMT and SMT performed among the best systems for the four translation directions. We also investigated the feasibility of unsupervised machine translation for low-resource and distant language pairs and confirmed observations of previous work showing that unsupervised MT is still largely unable to deal with them.</abstract>
       <url hash="10aa721f">D19-5206</url>
@@ -9895,7 +9895,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     <paper id="7">
       <title><fixed-case>NICT</fixed-case>’s participation to <fixed-case>WAT</fixed-case> 2019: Multilingualism and Multi-step Fine-Tuning for Low Resource <fixed-case>NMT</fixed-case></title>
       <author><first>Raj</first><last>Dabre</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>76–80</pages>
       <abstract>In this paper we describe our submissions to WAT 2019 for the following tasks: English–Tamil translation and Russian–Japanese translation. Our team,“NICT-5”, focused on multilingual domain adaptation and back-translation for Russian–Japanese translation and on simple fine-tuning for English–Tamil translation . We noted that multi-stage fine tuning is essential in leveraging the power of multilingualism for an extremely low-resource language like Russian–Japanese. Furthermore, we can improve the performance of such a low-resource language pair by exploiting a small but in-domain monolingual corpus via back-translation. We managed to obtain second rank in both tasks for all translation directions.</abstract>
       <url hash="2a47ecac">D19-5207</url>
@@ -9925,7 +9925,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <author><first>Kehai</first><last>Chen</last></author>
       <author><first>Chenchen</first><last>Ding</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>90–93</pages>
       <abstract>This paper presents the NICT’s participation (team ID: NICT) in the 6th Workshop on Asian Translation (WAT-2019) shared translation task, specifically Myanmar (Burmese) - English task in both translation directions. We built neural machine translation (NMT) systems for these tasks. Our NMT systems were trained with language model pretraining. Back-translation technology is adopted to NMT. Our NMT systems rank the third in English-to-Myanmar and the second in Myanmar-to-English according to BLEU score.</abstract>
       <url hash="76ef3e67">D19-5209</url>
@@ -10012,7 +10012,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     <paper id="16">
       <title><fixed-case>LTRC</fixed-case>-<fixed-case>MT</fixed-case> Simple &amp; Effective <fixed-case>H</fixed-case>indi-<fixed-case>E</fixed-case>nglish Neural Machine Translation Systems at <fixed-case>WAT</fixed-case> 2019</title>
       <author><first>Vikrant</first><last>Goyal</last></author>
-      <author><first>Dipti Misra</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></author>
       <pages>137–140</pages>
       <abstract>This paper describes the Neural Machine Translation systems of IIIT-Hyderabad (LTRC-MT) for WAT 2019 Hindi-English shared task. We experimented with both Recurrent Neural Networks &amp; Transformer architectures. We also show the results of our experiments of training NMT models using additional data via backtranslation.</abstract>
       <url hash="78324b89">D19-5216</url>
@@ -10022,7 +10022,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     <paper id="17">
       <title>Long Warm-up and Self-Training: Training Strategies of <fixed-case>NICT</fixed-case>-2 <fixed-case>NMT</fixed-case> System at <fixed-case>WAT</fixed-case>-2019</title>
       <author><first>Kenji</first><last>Imamura</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>141–146</pages>
       <abstract>This paper describes the NICT-2 neural machine translation system at the 6th Workshop on Asian Translation. This system employs the standard Transformer model but features the following two characteristics. One is the long warm-up strategy, which performs a longer warm-up of the learning rate at the start of the training than conventional approaches. Another is that the system introduces self-training approaches based on multiple back-translations generated by sampling. We participated in three tasks—ASPEC.en-ja, ASPEC.ja-en, and TDDC.ja-en—using this system.</abstract>
       <url hash="da03dc5d">D19-5217</url>
@@ -10077,7 +10077,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     <paper id="22">
       <title><fixed-case>NLPRL</fixed-case> at <fixed-case>WAT</fixed-case>2019: Transformer-based <fixed-case>T</fixed-case>amil – <fixed-case>E</fixed-case>nglish Indic Task Neural Machine Translation System</title>
       <author><first>Amit</first><last>Kumar</last></author>
-      <author><first>Anil Kumar</first><last>Singh</last></author>
+      <author id="anil-kumar-singh"><first>Anil Kumar</first><last>Singh</last></author>
       <pages>171–174</pages>
       <abstract>This paper describes the Machine Translation system for Tamil-English Indic Task organized at WAT 2019. We use Transformer- based architecture for Neural Machine Translation.</abstract>
       <url hash="8f17b056">D19-5222</url>
@@ -10098,8 +10098,8 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     <paper id="24">
       <title><fixed-case>WAT</fixed-case>2019: <fixed-case>E</fixed-case>nglish-<fixed-case>H</fixed-case>indi Translation on <fixed-case>H</fixed-case>indi Visual Genome Dataset</title>
       <author><first>Loitongbam</first><last>Sanayai Meetei</last></author>
-      <author><first>Thoudam Doren</first><last>Singh</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="thoudam-doren-singh"><first>Thoudam Doren</first><last>Singh</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>181–188</pages>
       <abstract>A multimodal translation is a task of translating a source language to a target language with the help of a parallel text corpus paired with images that represent the contextual details of the text. In this paper, we carried out an extensive comparison to evaluate the benefits of using a multimodal approach on translating text in English to a low resource language, Hindi as a part of WAT2019 shared task. We carried out the translation of English to Hindi in three separate tasks with both the evaluation and challenge dataset. First, by using only the parallel text corpora, then through an image caption generation approach and, finally with the multimodal approach. Our experiment shows a significant improvement in the result with the multimodal approach than the other approach.</abstract>
       <url hash="58e24e01">D19-5224</url>
@@ -10110,8 +10110,8 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <title><fixed-case>SYSTRAN</fixed-case> @ <fixed-case>WAT</fixed-case> 2019: <fixed-case>R</fixed-case>ussian-<fixed-case>J</fixed-case>apanese News Commentary task</title>
       <author><first>Jitao</first><last>Xu</last></author>
       <author><first>TuAnh</first><last>Nguyen</last></author>
-      <author><first>MinhQuang</first><last>Pham</last></author>
-      <author><first>Josep</first><last>Crego</last></author>
+      <author id="minh-quang-pham"><first>MinhQuang</first><last>Pham</last></author>
+      <author id="josep-m-crego"><first>Josep</first><last>Crego</last></author>
       <author><first>Jean</first><last>Senellart</last></author>
       <pages>189–194</pages>
       <abstract>This paper describes Systran’s submissions to WAT 2019 Russian-Japanese News Commentary task. A challenging translation task due to the extremely low resources available and the distance of the language pair. We have used the neural Transformer architecture learned over the provided resources and we carried out synthetic data generation experiments which aim at alleviating the data scarcity problem. Results indicate the suitability of the data augmentation experiments, enabling our systems to rank first according to automatic evaluations.</abstract>
@@ -10134,7 +10134,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <title>Sentiment Aware Neural Machine Translation</title>
       <author><first>Chenglei</first><last>Si</last></author>
       <author><first>Kui</first><last>Wu</last></author>
-      <author><first>Ai Ti</first><last>Aw</last></author>
+      <author id="aiti-aw"><first>Ai Ti</first><last>Aw</last></author>
       <author><first>Min-Yen</first><last>Kan</last></author>
       <pages>200–206</pages>
       <abstract>Sentiment ambiguous lexicons refer to words where their polarity depends strongly on con- text. As such, when the context is absent, their translations or their embedded sentence ends up (incorrectly) being dependent on the training data. While neural machine translation (NMT) has achieved great progress in recent years, most systems aim to produce one single correct translation for a given source sentence. We investigate the translation variation in two sentiment scenarios. We perform experiments to study the preservation of sentiment during translation with three different methods that we propose. We conducted tests with both sentiment and non-sentiment bearing contexts to examine the effectiveness of our methods. We show that NMT can generate both positive- and negative-valent translations of a source sentence, based on a given input sentiment label. Empirical evaluations show that our valence-sensitive embedding (VSE) method significantly outperforms a sequence-to-sequence (seq2seq) baseline, both in terms of BLEU score and ambiguous word translation accuracy in test, given non-sentiment bearing contexts.</abstract>
@@ -10146,8 +10146,8 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <title>Overcoming the Rare Word Problem for low-resource language pairs in Neural Machine Translation</title>
       <author><first>Thi-Vinh</first><last>Ngo</last></author>
       <author><first>Thanh-Le</first><last>Ha</last></author>
-      <author><first>Phuong-Thai</first><last>Nguyen</last></author>
-      <author><first>Le-Minh</first><last>Nguyen</last></author>
+      <author id="phuong-thai-nguyen"><first>Phuong-Thai</first><last>Nguyen</last></author>
+      <author id="minh-le-nguyen"><first>Le-Minh</first><last>Nguyen</last></author>
       <pages>207–214</pages>
       <abstract>Among the six challenges of neural machine translation (NMT) coined by (Koehn and Knowles, 2017), rare-word problem is considered the most severe one, especially in translation of low-resource languages. In this paper, we propose three solutions to address the rare words in neural machine translation systems. First, we enhance source context to predict the target words by connecting directly the source embeddings to the output of the attention component in NMT. Second, we propose an algorithm to learn morphology of unknown words for English in supervised way in order to minimize the adverse effect of rare-word problem. Finally, we exploit synonymous relation from the WordNet to overcome out-of-vocabulary (OOV) problem of NMT. We evaluate our approaches on two low-resource language pairs: English-Vietnamese and Japanese-Vietnamese. In our experiments, we have achieved significant improvements of up to roughly +1.0 BLEU points in both language pairs.</abstract>
       <url hash="e18d8b9a">D19-5228</url>
@@ -10174,7 +10174,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <url hash="ef851e40">D19-53</url>
       <editor><first>Dmitry</first><last>Ustalov</last></editor>
       <editor><first>Swapna</first><last>Somasundaran</last></editor>
-      <editor><first>Peter</first><last>Jansen</last></editor>
+      <editor id="peter-jansen"><first>Peter</first><last>Jansen</last></editor>
       <editor><first>Goran</first><last>Glavaš</last></editor>
       <editor><first>Martin</first><last>Riedl</last></editor>
       <editor><first>Mihai</first><last>Surdeanu</last></editor>
@@ -10226,9 +10226,9 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     </paper>
     <paper id="4">
       <title>Neural Speech Translation using Lattice Transformations and Graph Networks</title>
-      <author><first>Daniel</first><last>Beck</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="daniel-beck"><first>Daniel</first><last>Beck</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>26–31</pages>
       <abstract>Speech translation systems usually follow a pipeline approach, using word lattices as an intermediate representation. However, previous work assume access to the original transcriptions used to train the ASR system, which can limit applicability in real scenarios. In this work we propose an approach for speech translation through lattice transformations and neural models based on graph networks. Experimental results show that our approach reaches competitive performance without relying on transcriptions, while also being orders of magnitude faster than previous work.</abstract>
       <url hash="ca483c8e">D19-5304</url>
@@ -10251,7 +10251,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <author><first>Mokanarangan</first><last>Thayaparan</last></author>
       <author><first>Marco</first><last>Valentino</last></author>
       <author><first>Viktor</first><last>Schlegel</last></author>
-      <author><first>André</first><last>Freitas</last></author>
+      <author id="andre-freitas"><first>André</first><last>Freitas</last></author>
       <pages>42–51</pages>
       <abstract>Recent advances in reading comprehension have resulted in models that surpass human performance when the answer is contained in a single, continuous passage of text. However, complex Question Answering (QA) typically requires multi-hop reasoning - i.e. the integration of supporting facts from different sources, to infer the correct answer. This paper proposes Document Graph Network (DGN), a message passing architecture for the identification of supporting facts over a graph-structured representation of text. The evaluation on HotpotQA shows that DGN obtains competitive results when compared to a reading comprehension baseline operating on raw text, confirming the relevance of structured representations for supporting multi-hop reasoning.</abstract>
       <url hash="48e86752">D19-5306</url>
@@ -10351,7 +10351,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     </paper>
     <paper id="15">
       <title>Evaluating Research Novelty Detection: Counterfactual Approaches</title>
-      <author><first>Reinald Kim</first><last>Amplayo</last></author>
+      <author id="reinald-kim-amplayo"><first>Reinald Kim</first><last>Amplayo</last></author>
       <author><first>Seung-won</first><last>Hwang</last></author>
       <author><first>Min</first><last>Song</last></author>
       <pages>124–133</pages>
@@ -10364,7 +10364,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <title>Do Sentence Interactions Matter? Leveraging Sentence Level Representations for Fake News Classification</title>
       <author><first>Vaibhav</first><last>Vaibhav</last></author>
       <author><first>Raghuram</first><last>Mandyam</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>134–139</pages>
       <abstract>The rising growth of fake news and misleading information through online media outlets demands an automatic method for detecting such news articles. Of the few limited works which differentiate between trusted vs other types of news article (satire, propaganda, hoax), none of them model sentence interactions within a document. We observe an interesting pattern in the way sentences interact with each other across different kind of news articles. To capture this kind of information for long news articles, we propose a graph neural network-based model which does away with the need of feature engineering for fine grained fake news classification. Through experiments, we show that our proposed method beats strong neural baselines and achieves state-of-the-art accuracy on existing datasets. Moreover, we establish the generalizability of our model by evaluating its performance in out-of-domain scenarios. Code is available at <url>https://github.com/MysteryVaibhav/fake_news_semantics</url>.</abstract>
       <url hash="88c966be">D19-5316</url>
@@ -10406,7 +10406,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <author><first>Jie</first><last>Chen</last></author>
       <author><first>Maria</first><last>Chang</last></author>
       <author><first>Lingfei</first><last>Wu</last></author>
-      <author><first>Michael</first><last>Witbrock</last></author>
+      <author id="michael-j-witbrock"><first>Michael</first><last>Witbrock</last></author>
       <pages>159–163</pages>
       <abstract>Semantic parsing is a fundamental problem in natural language understanding, as it involves the mapping of natural language to structured forms such as executable queries or logic-like knowledge representations. Existing deep learning approaches for semantic parsing have shown promise on a variety of benchmark data sets, particularly on text-to-SQL parsing. However, most text-to-SQL parsers do not generalize to unseen data sets in different domains. In this paper, we propose a new cross-domain learning scheme to perform text-to-SQL translation and demonstrate its use on Spider, a large-scale cross-domain text-to-SQL data set. We improve upon a state-of-the-art Spider model, SyntaxSQLNet, by constructing a graph of column names for all databases and using graph neural networks to compute their embeddings. The resulting embeddings offer better cross-domain representations and SQL queries, as evidenced by substantial improvement on the Spider data set compared to SyntaxSQLNet.</abstract>
       <url hash="0beb708a">D19-5319</url>
@@ -10427,7 +10427,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     <paper id="21">
       <title>Node Embeddings for Graph Merging: Case of Knowledge Graph Construction</title>
       <author><first>Ida</first><last>Szubert</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>172–176</pages>
       <abstract>Combining two graphs requires merging the nodes which are counterparts of each other. In this process errors occur, resulting in incorrect merging or incorrect failure to merge. We find a high prevalence of such errors when using AskNET, an algorithm for building Knowledge Graphs from text corpora. AskNET node matching method uses string similarity, which we propose to replace with vector embedding similarity. We explore graph-based and word-based embedding models and show an overall error reduction of from 56% to 23.6%, with a reduction of over a half in both types of incorrect node matching.</abstract>
       <url hash="35af9257">D19-5321</url>
@@ -10437,7 +10437,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     <paper id="22">
       <title><fixed-case>DB</fixed-case>ee: A Database for Creating and Managing Knowledge Graphs and Embeddings</title>
       <author><first>Viktor</first><last>Schlegel</last></author>
-      <author><first>André</first><last>Freitas</last></author>
+      <author id="andre-freitas"><first>André</first><last>Freitas</last></author>
       <pages>177–185</pages>
       <abstract>This paper describes DBee, a database to support the construction of data-intensive AI applications. DBee provides a unique data model which operates jointly over large-scale knowledge graphs (KGs) and embedding vector spaces (VSs). This model supports queries which exploit the semantic properties of both types of representations (KGs and VSs). Additionally, DBee aims to facilitate the construction of KGs and VSs, by providing a library of generators, which can be used to create, integrate and transform data into KGs and VSs.</abstract>
       <url hash="f9978e52">D19-5322</url>
@@ -10460,7 +10460,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <booktitle>Proceedings of the 2nd Workshop on New Frontiers in Summarization</booktitle>
       <url hash="f86f0bd9">D19-54</url>
       <editor><first>Lu</first><last>Wang</last></editor>
-      <editor><first>Jackie Chi Kit</first><last>Cheung</last></editor>
+      <editor id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></editor>
       <editor><first>Giuseppe</first><last>Carenini</last></editor>
       <editor id="fei-liu-utdallas"><first>Fei</first><last>Liu</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -10476,8 +10476,8 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     <paper id="1">
       <title>Answering Naturally: Factoid to Full length Answer Generation</title>
       <author><first>Vaishali</first><last>Pal</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
-      <author><first>Irshad</first><last>Bhat</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
+      <author id="irshad-bhat"><first>Irshad</first><last>Bhat</last></author>
       <pages>1–9</pages>
       <abstract>In recent years, the task of Question Answering over passages, also pitched as a reading comprehension, has evolved into a very active research area. A reading comprehension system extracts a span of text, comprising of named entities, dates, small phrases, etc., which serve as the answer to a given question. However, these spans of text would result in an unnatural reading experience in a conversational system. Usually, dialogue systems solve this issue by using template-based language generation. These systems, though adequate for a domain specific task, are too restrictive and predefined for a domain independent system. In order to present the user with a more conversational experience, we propose a pointer generator based full-length answer generator which can be used with most QA systems. Our system generates a full length answer given a question and the extracted factoid/span answer without relying on the passage from where the answer was extracted. We also present a dataset of 315000 question, factoid answer and full length answer triples. We have evaluated our system using ROUGE-1,2,L and BLEU and achieved 74.05 BLEU score and 86.25 Rogue-L score.</abstract>
       <url hash="be124353">D19-5401</url>
@@ -10581,7 +10581,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <author><first>Danqing</first><last>Wang</last></author>
       <author><first>Pengfei</first><last>Liu</last></author>
       <author><first>Xipeng</first><last>Qiu</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>80–89</pages>
       <abstract>In this paper, we take stock of the current state of summarization datasets and explore how different factors of datasets influence the generalization behaviour of neural extractive summarization models. Specifically, we first propose several properties of datasets, which matter for the generalization of summarization models. Then we build the connection between priors residing in datasets and model designs, analyzing how different properties of datasets influence the choices of model structure design and training methods. Finally, by taking a typical dataset as an example, we rethink the process of the model design based on the experience of the above analysis. We demonstrate that when we have a deep understanding of the characteristics of datasets, a simple approach can bring significant improvements to the existing state-of-the-art model.</abstract>
       <url hash="17289290">D19-5410</url>
@@ -10591,7 +10591,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     <paper id="11">
       <title><fixed-case>G</fixed-case>lobal <fixed-case>V</fixed-case>oices: Crossing Borders in Automatic News Summarization</title>
       <author><first>Khanh</first><last>Nguyen</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <pages>90–97</pages>
       <abstract>We construct Global Voices, a multilingual dataset for evaluating cross-lingual summarization methods. We extract social-network descriptions of Global Voices news articles to cheaply collect evaluation data for into-English and from-English summarization in 15 languages. Especially, for the into-English summarization task, we crowd-source a high-quality evaluation dataset based on guidelines that emphasize accuracy, coverage, and understandability. To ensure the quality of this dataset, we collect human ratings to filter out bad summaries, and conduct a survey on humans, which shows that the remaining summaries are preferred over the social-network summaries. We study the effect of translation quality in cross-lingual summarization, comparing a translate-then-summarize approach with several baselines. Our results highlight the limitations of the ROUGE metric that are overlooked in monolingual summarization.</abstract>
       <url hash="954c7661">D19-5411</url>
@@ -10631,7 +10631,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <author><first>Abram</first><last>Handler</last></author>
       <author><first>Premkumar</first><last>Ganeshkumar</last></author>
       <author><first>Brendan</first><last>O’Connor</last></author>
-      <author><first>Mohamed</first><last>AlTantawy</last></author>
+      <author id="mohamed-altantawy"><first>Mohamed</first><last>AlTantawy</last></author>
       <pages>111–115</pages>
       <abstract>Concept maps are visual summaries, structured as directed graphs: important concepts from a dataset are displayed as vertexes, and edges between vertexes show natural language descriptions of the relationships between the concepts on the map. Thus far, preliminary attempts at automatically creating concept maps have focused on building static summaries. However, in interactive settings, users will need to dynamically investigate particular relationships between pairs of concepts. For instance, a historian using a concept map browser might decide to investigate the relationship between two politicians in a news archive. We present a model which responds to such queries by returning one or more short, importance-ranked, natural language descriptions of the relationship between two requested concepts, for display in a visual interface. Our model is trained on a new public dataset, collected for this task.</abstract>
       <url hash="1eea8602">D19-5414</url>
@@ -10641,7 +10641,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     <paper id="15">
       <title>Exploiting Discourse-Level Segmentation for Extractive Summarization</title>
       <author><first>Zhengyuan</first><last>Liu</last></author>
-      <author><first>Nancy</first><last>Chen</last></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last></author>
       <pages>116–121</pages>
       <abstract>Extractive summarization selects and concatenates the most essential text spans in a document. Most, if not all, neural approaches use sentences as the elementary unit to select content for summarization. However, semantic segments containing supplementary information or descriptive details are often nonessential in the generated summaries. In this work, we propose to exploit discourse-level segmentation as a finer-grained means to more precisely pinpoint the core content in a document. We investigate how the sub-sentential segmentation improves extractive summarization performance when content selection is modeled through two basic neural network architectures and a deep bi-directional transformer. Experiment results on the CNN/Daily Mail dataset show that discourse-level segmentation is effective in both cases. In particular, we achieve state-of-the-art performance when discourse-level segmentation is combined with our adapted contextual representation model.</abstract>
       <url hash="a750a407">D19-5415</url>
@@ -10655,7 +10655,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <url hash="ccc8e3df">D19-55</url>
       <editor><first>Wei</first><last>Xu</last></editor>
       <editor><first>Alan</first><last>Ritter</last></editor>
-      <editor><first>Tim</first><last>Baldwin</last></editor>
+      <editor id="timothy-baldwin"><first>Tim</first><last>Baldwin</last></editor>
       <editor><first>Afshin</first><last>Rahimi</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Hong Kong, China</address>
@@ -10682,7 +10682,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     <paper id="2">
       <title>Formality Style Transfer for Noisy, User-generated Conversations: Extracting Labeled, Parallel Data from Unlabeled Corpora</title>
       <author><first>Isak</first><last>Czeresnia Etinger</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <pages>11–16</pages>
       <abstract>Typical datasets used for style transfer in NLP contain aligned pairs of two opposite extremes of a style. As each existing dataset is sourced from a specific domain and context, most use cases will have a sizable mismatch from the vocabulary and sentence structures of any dataset available. This reduces the performance of the style transfer, and is particularly significant for noisy, user-generated text. To solve this problem, we show a technique to derive a dataset of aligned pairs (style-agnostic vs stylistic sentences) from an unlabeled corpus by using an auxiliary dataset, allowing for in-domain training. We test the technique with the Yahoo Formality Dataset and 6 novel datasets we produced, which consist of scripts from 5 popular TV-shows (Friends, Futurama, Seinfeld, Southpark, Stargate SG-1) and the Slate Star Codex online forum. We gather 1080 human evaluations, which show that our method produces a sizable change in formality while maintaining fluency and context; and that it considerably outperforms OpenNMT’s Seq2Seq model directly trained on the Yahoo Formality Dataset. Additionally, we publish the full pipeline code and our novel datasets.</abstract>
       <url hash="a15b8d08">D19-5502</url>
@@ -10703,8 +10703,8 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     </paper>
     <paper id="4">
       <title>Personalizing Grammatical Error Correction: Adaptation to Proficiency Level and <fixed-case>L</fixed-case>1</title>
-      <author><first>Maria</first><last>Nadejde</last></author>
-      <author><first>Joel</first><last>Tetreault</last></author>
+      <author id="maria-nadejde"><first>Maria</first><last>Nadejde</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
       <pages>27–33</pages>
       <abstract>Grammar error correction (GEC) systems have become ubiquitous in a variety of software applications, and have started to approach human-level performance for some datasets. However, very little is known about how to efficiently personalize these systems to the user’s characteristics, such as their proficiency level and first language, or to emerging domains of text. We present the first results on adapting a general purpose neural GEC system to both the proficiency level and the first language of a writer, using only a few thousand annotated sentences. Our study is the broadest of its kind, covering five proficiency levels and twelve different languages, and comparing three different adaptation scenarios: adapting to the proficiency level only, to the first language only, or to both aspects simultaneously. We show that tailoring to both scenarios achieves the largest performance improvement (3.6 F0.5) relative to a strong baseline.</abstract>
       <url hash="0897e58b">D19-5504</url>
@@ -10738,7 +10738,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     </paper>
     <paper id="7">
       <title>Character-Based Models for Adversarial Phone Extraction: Preventing Human Sex Trafficking</title>
-      <author><first>Nathanael</first><last>Chambers</last></author>
+      <author id="nathanael-chambers"><first>Nathanael</first><last>Chambers</last></author>
       <author><first>Timothy</first><last>Forman</last></author>
       <author><first>Catherine</first><last>Griswold</last></author>
       <author><first>Kevin</first><last>Lu</last></author>
@@ -10786,7 +10786,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <author><first>Vinayak</first><last>Athavale</last></author>
       <author><first>Aayush</first><last>Naik</last></author>
       <author><first>Rajas</first><last>Vanjape</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>84–93</pages>
       <abstract>We introduce the task of algorithm class prediction for programming word problems. A programming word problem is a problem written in natural language, which can be solved using an algorithm or a program. We define classes of various programming word problems which correspond to the class of algorithms required to solve the problem. We present four new datasets for this task, two multiclass datasets with 550 and 1159 problems each and two multilabel datasets having 3737 and 3960 problems each. We pose the problem as a text classification problem and train neural network and non-neural network based models on this task. Our best performing classifier gets an accuracy of 62.7 percent for the multiclass case on the five class classification dataset, Codeforces Multiclass-5 (CFMC5). We also do some human-level analysis and compare human performance with that of our text classification models. Our best classifier has an accuracy only 9 percent lower than that of a human on this task. To the best of our knowledge, these are the first reported results on such a task. We make our code and datasets publicly available.</abstract>
       <url hash="80bb0005">D19-5511</url>
@@ -10797,7 +10797,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <title>Automatic identification of writers’ intentions: Comparing different methods for predicting relationship goals in online dating profile texts</title>
       <author><first>Chris</first><last>van der Lee</last></author>
       <author><first>Tess</first><last>van der Zanden</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <author><first>Maria</first><last>Mos</last></author>
       <author><first>Alexander</first><last>Schouten</last></author>
       <pages>94–100</pages>
@@ -10810,7 +10810,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     <paper id="13">
       <title>Contextualized Word Representations from Distant Supervision with and for <fixed-case>NER</fixed-case></title>
       <author><first>Abbas</first><last>Ghaddar</last></author>
-      <author><first>Phillippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Phillippe</first><last>Langlais</last></author>
       <pages>101–108</pages>
       <abstract>We describe a special type of deep contextualized word representation that is learned from distant supervision annotations and dedicated to named entity recognition. Our extensive experiments on 7 datasets show systematic gains across all domains over strong baselines, and demonstrate that our representation is complementary to previously proposed embeddings. We report new state-of-the-art results on CONLL and ONTONOTES datasets.</abstract>
       <url hash="58d90f09">D19-5513</url>
@@ -10943,7 +10943,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     <paper id="25">
       <title>Modelling Uncertainty in Collaborative Document Quality Assessment</title>
       <author><first>Aili</first><last>Shen</last></author>
-      <author><first>Daniel</first><last>Beck</last></author>
+      <author id="daniel-beck"><first>Daniel</first><last>Beck</last></author>
       <author><first>Bahar</first><last>Salehi</last></author>
       <author><first>Jianzhong</first><last>Qi</last></author>
       <author><first>Timothy</first><last>Baldwin</last></author>
@@ -10955,7 +10955,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     </paper>
     <paper id="26">
       <title>Conceptualisation and Annotation of Drug Nonadherence Information for Knowledge Extraction from Patient-Generated Texts</title>
-      <author><first>Anja</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anja</first><last>Belz</last></author>
       <author><first>Richard</first><last>Hoile</last></author>
       <author><first>Elizabeth</first><last>Ford</last></author>
       <author><first>Azam</first><last>Mullick</last></author>
@@ -10970,7 +10970,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <author><first>Shirley Anugrah</first><last>Hayati</last></author>
       <author><first>Aditi</first><last>Chaudhary</last></author>
       <author><first>Naoki</first><last>Otani</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <pages>212–216</pages>
       <abstract>Irony detection is an important task with applications in identification of online abuse and harassment. With the ubiquitous use of non-verbal cues such as emojis in social media, in this work we aim to study the role of these structures in irony detection. Since the existing irony detection datasets have &lt;10% ironic tweets with emoji, classifiers trained on them are insensitive to emojis. We propose an automated pipeline for creating a more balanced dataset.</abstract>
       <url hash="7b83516c">D19-5527</url>
@@ -11011,7 +11011,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <title>Robustness to Capitalization Errors in Named Entity Recognition</title>
       <author><first>Sravan</first><last>Bodapati</last></author>
       <author><first>Hyokun</first><last>Yun</last></author>
-      <author><first>Yaser</first><last>Al-Onaizan</last></author>
+      <author id="yaser-al-onaizan"><first>Yaser</first><last>Al-Onaizan</last></author>
       <pages>237–242</pages>
       <abstract>Robustness to capitalization errors is a highly desirable characteristic of named entity recognizers, yet we find standard models for the task are surprisingly brittle to such noise. Existing methods to improve robustness to the noise completely discard given orthographic information, which significantly degrades their performance on well-formed text. We propose a simple alternative approach based on data augmentation, which allows the model to learn to utilize or ignore orthographic information depending on its usefulness in the context. It achieves competitive robustness to capitalization errors while making negligible compromise to its performance on well-formed text and significantly improving generalization power on noisy user-generated text. Our experiments clearly and consistently validate our claim across different types of machine learning models, languages, and dataset sizes.</abstract>
       <url hash="2763f3bb">D19-5531</url>
@@ -11054,7 +11054,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <title>Incremental processing of noisy user utterances in the spoken language understanding task</title>
       <author><first>Stefan</first><last>Constantin</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>265–274</pages>
       <abstract>The state-of-the-art neural network architectures make it possible to create spoken language understanding systems with high quality and fast processing time. One major challenge for real-world applications is the high latency of these systems caused by triggered actions with high executions times. If an action can be separated into subactions, the reaction time of the systems can be improved through incremental processing of the user utterance and starting subactions while the utterance is still being uttered. In this work, we present a model-agnostic method to achieve high quality in processing incrementally produced partial utterances. Based on clean and noisy versions of the ATIS dataset, we show how to create datasets with our method to create low-latency natural language understanding components. We get improvements of up to 47.91 absolute percentage points in the metric F1-score.</abstract>
       <url hash="aac81e90">D19-5535</url>
@@ -11065,8 +11065,8 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     <paper id="36">
       <title>Benefits of Data Augmentation for <fixed-case>NMT</fixed-case>-based Text Normalization of User-Generated Content</title>
       <author><first>Claudia</first><last>Matos Veliz</last></author>
-      <author><first>Orphee</first><last>De Clercq</last></author>
-      <author><first>Veronique</first><last>Hoste</last></author>
+      <author id="orphee-de-clercq"><first>Orphee</first><last>De Clercq</last></author>
+      <author id="veronique-hoste"><first>Veronique</first><last>Hoste</last></author>
       <pages>275–285</pages>
       <abstract>One of the most persistent characteristics of written user-generated content (UGC) is the use of non-standard words. This characteristic contributes to an increased difficulty to automatically process and analyze UGC. Text normalization is the task of transforming lexical variants to their canonical forms and is often used as a pre-processing step for conventional NLP tasks in order to overcome the performance drop that NLP systems experience when applied to UGC. In this work, we follow a Neural Machine Translation approach to text normalization. To train such an encoder-decoder model, large parallel training corpora of sentence pairs are required. However, obtaining large data sets with UGC and their normalized version is not trivial, especially for languages other than English. In this paper, we explore how to overcome this data bottleneck for Dutch, a low-resource language. We start off with a small publicly available parallel Dutch data set comprising three UGC genres and compare two different approaches. The first is to manually normalize and add training data, a money and time-consuming task. The second approach is a set of data augmentation techniques which increase data size by converting existing resources into synthesized non-standard forms. Our results reveal that, while the different approaches yield similar results regarding the normalization issues in the test set, they also introduce a large amount of over-normalizations.</abstract>
       <url hash="27d47c70">D19-5536</url>
@@ -11086,7 +11086,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     <paper id="38">
       <title>Towards Automated Semantic Role Labelling of <fixed-case>H</fixed-case>indi-<fixed-case>E</fixed-case>nglish Code-Mixed Tweets</title>
       <author><first>Riya</first><last>Pal</last></author>
-      <author><first>Dipti</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti</first><last>Sharma</last></author>
       <pages>291–296</pages>
       <abstract>We present a system for automating Semantic Role Labelling of Hindi-English code-mixed tweets. We explore the issues posed by noisy, user generated code-mixed social media data. We also compare the individual effect of various linguistic features used in our system. Our proposed model is a 2-step system for automated labelling which gives an overall accuracy of 84% for Argument Classification, marking a 10% increase over the existing rule-based baseline model. This is the first attempt at building a statistical Semantic Role Labeller for Hindi-English code-mixed data, to the best of our knowledge.</abstract>
       <url hash="8d3e18fe">D19-5538</url>
@@ -11096,8 +11096,8 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     <paper id="39">
       <title>Enhancing <fixed-case>BERT</fixed-case> for Lexical Normalization</title>
       <author><first>Benjamin</first><last>Muller</last></author>
-      <author><first>Benoit</first><last>Sagot</last></author>
-      <author><first>Djamé</first><last>Seddah</last></author>
+      <author id="benoit-sagot"><first>Benoit</first><last>Sagot</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
       <pages>297–306</pages>
       <abstract>Language model-based pre-trained representations have become ubiquitous in natural language processing. They have been shown to significantly improve the performance of neural models on a great variety of tasks. However, it remains unclear how useful those general models can be in handling non-canonical text. In this article, focusing on User Generated Content (UGC), we study the ability of BERT to perform lexical normalisation. Our contribution is simple: by framing lexical normalisation as a token prediction task, by enhancing its architecture and by carefully fine-tuning it, we show that BERT can be a competitive lexical normalisation model without the need of any UGC resources aside from 3,000 training sentences. To the best of our knowledge, it is the first work done in adapting and analysing the ability of this model to handle noisy UGC data.</abstract>
       <url hash="de693c3c">D19-5539</url>
@@ -11120,7 +11120,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <title>Improving Multi-label Emotion Classification by Integrating both General and Domain-specific Knowledge</title>
       <author><first>Wenhao</first><last>Ying</last></author>
       <author><first>Rong</first><last>Xiang</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <pages>316–321</pages>
       <abstract>Deep learning based general language models have achieved state-of-the-art results in many popular tasks such as sentiment analysis and QA tasks. Text in domains like social media has its own salient characteristics. Domain knowledge should be helpful in domain relevant tasks. In this work, we devise a simple method to obtain domain knowledge and further propose a method to integrate domain knowledge with general knowledge based on deep language models to improve performance of emotion classification. Experiments on Twitter data show that even though a deep language model fine-tuned by a target domain data has attained comparable results to that of previous state-of-the-art models, this fine-tuned model can still benefit from our extracted domain knowledge to obtain more improvement. This highlights the importance of making use of domain knowledge in domain-specific applications.</abstract>
       <url hash="a5ad553d">D19-5541</url>
@@ -11228,7 +11228,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <author><first>Yeon</first><last>Seonwoo</last></author>
       <author><first>Sungjoon</first><last>Park</last></author>
       <author><first>Dongkwan</first><last>Kim</last></author>
-      <author><first>Alice</first><last>Oh</last></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last></author>
       <pages>387–396</pages>
       <abstract>Additive compositionality of word embedding models has been studied from empirical and theoretical perspectives. Existing research on justifying additive compositionality of existing word embedding models requires a rather strong assumption of uniform word distribution. In this paper, we relax that assumption and propose more realistic conditions for proving additive compositionality, and we develop a novel word and sub-word embedding model that satisfies additive compositionality under those conditions. We then empirically show our model’s improved semantic representation performance on word similarity and noisy sentence similarity.</abstract>
       <url hash="f9422e43">D19-5551</url>
@@ -11250,7 +11250,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     <paper id="53">
       <title>Phonetic Normalization for Machine Translation of User Generated Content</title>
       <author><first>José Carlos</first><last>Rosales Núñez</last></author>
-      <author><first>Djamé</first><last>Seddah</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
       <author><first>Guillaume</first><last>Wisniewski</last></author>
       <pages>407–416</pages>
       <abstract>We present an approach to correct noisy User Generated Content (UGC) in French aiming to produce a pretreatement pipeline to improve Machine Translation for this kind of non-canonical corpora. In order to do so, we have implemented a character-based neural model phonetizer to produce IPA pronunciations of words. In this way, we intend to correct grammar, vocabulary and accentuation errors often present in noisy UGC corpora. Our method leverages on the fact that some errors are due to confusion induced by words with similar pronunciation which can be corrected using a phonetic look-up table to produce normalization candidates. These potential corrections are then encoded in a lattice and ranked using a language model to output the most probable corrected phrase. Compare to using other phonetizers, our method boosts a transformer-based machine translation system on UGC.</abstract>
@@ -11321,7 +11321,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <editor><first>Andrew</first><last>Finch</last></editor>
       <editor><first>Hiroaki</first><last>Hayashi</last></editor>
       <editor><first>Ioannis</first><last>Konstas</last></editor>
-      <editor><first>Thang</first><last>Luong</last></editor>
+      <editor id="minh-thang-luong"><first>Thang</first><last>Luong</last></editor>
       <editor><first>Graham</first><last>Neubig</last></editor>
       <editor><first>Yusuke</first><last>Oda</last></editor>
       <editor><first>Katsuhito</first><last>Sudoh</last></editor>
@@ -11364,7 +11364,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     <paper id="3">
       <title>Recycling a Pre-trained <fixed-case>BERT</fixed-case> Encoder for Neural Machine Translation</title>
       <author><first>Kenji</first><last>Imamura</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>23–31</pages>
       <abstract>In this paper, a pre-trained Bidirectional Encoder Representations from Transformers (BERT) model is applied to Transformer-based neural machine translation (NMT). In contrast to monolingual tasks, the number of unlearned model parameters in an NMT decoder is as huge as the number of learned parameters in the BERT model. To train all the models appropriately, we employ two-stage optimization, which first trains only the unlearned parameters by freezing the BERT model, and then fine-tunes all the sub-models. In our experiments, stable two-stage optimization was achieved, in contrast the BLEU scores of direct fine-tuning were extremely low. Consequently, the BLEU scores of the proposed method were better than those of the Transformer base model and the same model without pre-training. Additionally, we confirmed that NMT with the BERT encoder is more effective in low-resource settings.</abstract>
       <url hash="ea922aaa">D19-5603</url>
@@ -11377,7 +11377,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <author><first>Yizhe</first><last>Zhang</last></author>
       <author><first>Sudha</first><last>Rao</last></author>
       <author><first>Chris</first><last>Brockett</last></author>
-      <author><first>Sungjin</first><last>Lee</last></author>
+      <author id="sungjin-lee"><first>Sungjin</first><last>Lee</last></author>
       <pages>32–43</pages>
       <abstract>Ambiguous user queries in search engines result in the retrieval of documents that often span multiple topics. One potential solution is for the search engine to generate multiple refined queries, each of which relates to a subset of the documents spanning the same topic. A preliminary step towards this goal is to generate a question that captures common concepts of multiple documents. We propose a new task of generating common question from multiple documents and present simple variant of an existing multi-source encoder-decoder framework, called the Multi-Source Question Generator (MSQG). We first train an RNN-based single encoder-decoder generator from (single document, question) pairs. At test time, given multiple documents, the Distribute step of our MSQG model predicts target word distributions for each document using the trained model. The Aggregate step aggregates these distributions to generate a common question. This simple yet effective strategy significantly outperforms several existing baseline models applied to the new task when evaluated using automated metrics and human judgments on the MS-MARCO-QA dataset.</abstract>
       <url hash="3eb39132">D19-5604</url>
@@ -11503,7 +11503,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     <paper id="15">
       <title>Enhanced Transformer Model for Data-to-Text Generation</title>
       <author><first>Li</first><last>Gong</last></author>
-      <author><first>Josep</first><last>Crego</last></author>
+      <author id="josep-m-crego"><first>Josep</first><last>Crego</last></author>
       <author><first>Jean</first><last>Senellart</last></author>
       <pages>148–156</pages>
       <abstract>Neural models have recently shown significant progress on data-to-text generation tasks in which descriptive texts are generated conditioned on database records. In this work, we present a new Transformer-based data-to-text generation model which learns content selection and summary generation in an end-to-end fashion. We introduce two extensions to the baseline transformer model: First, we modify the latent representation of the input, which helps to significantly improve the content correctness of the output summary; Second, we include an additional learning objective that accounts for content selection modelling. In addition, we propose two data augmentation methods that succeed to further improve performance of the resulting generation models. Evaluation experiments show that our final model outperforms current state-of-the-art systems as measured by different metrics: BLEU, content selection precision and content ordering. We made publicly available the transformer extension presented in this paper.</abstract>
@@ -11539,7 +11539,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     <paper id="18">
       <title>Adaptively Scheduled Multitask Learning: The Case of Low-Resource Neural Machine Translation</title>
       <author><first>Poorya</first><last>Zaremoodi</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>177–186</pages>
       <abstract>Neural Machine Translation (NMT), a data-hungry technology, suffers from the lack of bilingual data in low-resource scenarios. Multitask learning (MTL) can alleviate this issue by injecting inductive biases into NMT, using auxiliary syntactic and semantic tasks. However, an effective <i>training schedule</i> is required to balance the importance of tasks to get the best use of the training signal. The role of training schedule becomes even more crucial in <i>biased-MTL</i> where the goal is to improve one (or a subset) of tasks the most, e.g. translation quality. Current approaches for biased-MTL are based on brittle <i>hand-engineered</i> heuristics that require trial and error, and should be (re-)designed for each learning scenario. To the best of our knowledge, ours is the first work on <i>adaptively</i> and <i>dynamically</i> changing the training schedule in biased-MTL. We propose a rigorous approach for automatically reweighing the training data of the main and auxiliary tasks throughout the training process based on their contributions to the generalisability of the main NMT task. Our experiments on translating from English to Vietnamese/Turkish/Spanish show improvements of up to +1.2 BLEU points, compared to strong baselines. Additionally, our analyses shed light on the dynamic of needs throughout the training of NMT: from syntax to semantic.</abstract>
       <url hash="70388bd8">D19-5618</url>
@@ -11550,7 +11550,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <title>On the Importance of Word Boundaries in Character-level Neural Machine Translation</title>
       <author><first>Duygu</first><last>Ataman</last></author>
       <author><first>Orhan</first><last>Firat</last></author>
-      <author><first>Mattia A.</first><last>Di Gangi</last></author>
+      <author id="mattia-a-di-gangi"><first>Mattia A.</first><last>Di Gangi</last></author>
       <author><first>Marcello</first><last>Federico</last></author>
       <author><first>Alexandra</first><last>Birch</last></author>
       <pages>187–193</pages>
@@ -11620,7 +11620,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <title>Auto-Sizing the Transformer Network: Improving Speed, Efficiency, and Performance for Low-Resource Machine Translation</title>
       <author><first>Kenton</first><last>Murray</last></author>
       <author><first>Jeffery</first><last>Kinnison</last></author>
-      <author><first>Toan Q.</first><last>Nguyen</last></author>
+      <author id="toan-q-nguyen"><first>Toan Q.</first><last>Nguyen</last></author>
       <author><first>Walter</first><last>Scheirer</last></author>
       <author><first>David</first><last>Chiang</last></author>
       <pages>231–240</pages>
@@ -11652,7 +11652,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     <paper id="28">
       <title>Monash University’s Submissions to the <fixed-case>WNGT</fixed-case> 2019 Document Translation Task</title>
       <author><first>Sameen</first><last>Maruf</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>256–261</pages>
       <abstract>We describe the work of Monash University for the shared task of Rotowire document translation organised by the 3rd Workshop on Neural Generation and Translation (WNGT 2019). We submitted systems for both directions of the English-German language pair. Our main focus is on employing an established document-level neural machine translation model for this task. We achieve a BLEU score of 39.83 (41.46 BLEU per WNGT evaluation) for En-De and 45.06 (47.39 BLEU per WNGT evaluation) for De-En translation directions on the Rotowire test set. All experiments conducted in the process are also described.</abstract>
       <url hash="344d3463">D19-5628</url>
@@ -11662,7 +11662,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     <paper id="29">
       <title><fixed-case>SYSTRAN</fixed-case> @ <fixed-case>WNGT</fixed-case> 2019: <fixed-case>DGT</fixed-case> Task</title>
       <author><first>Li</first><last>Gong</last></author>
-      <author><first>Josep</first><last>Crego</last></author>
+      <author id="josep-m-crego"><first>Josep</first><last>Crego</last></author>
       <author><first>Jean</first><last>Senellart</last></author>
       <pages>262–267</pages>
       <abstract>This paper describes SYSTRAN participation to the Document-level Generation and Trans- lation (DGT) Shared Task of the 3rd Workshop on Neural Generation and Translation (WNGT 2019). We participate for the first time using a Transformer network enhanced with modified input embeddings and optimising an additional objective function that considers content selection. The network takes in structured data of basketball games and outputs a summary of the game in natural language.</abstract>
@@ -11686,7 +11686,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <author><first>Fahimeh</first><last>Saleh</last></author>
       <author><first>Alexandre</first><last>Berard</last></author>
       <author><first>Ioan</first><last>Calapodescu</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <pages>273–279</pages>
       <abstract>Recently, neural models led to significant improvements in both machine translation (MT) and natural language generation tasks (NLG). However, generation of long descriptive summaries conditioned on structured data remains an open challenge. Likewise, MT that goes beyond sentence-level context is still an open issue (e.g., document-level MT or MT with metadata). To address these challenges, we propose to leverage data from both tasks and do transfer learning between MT, NLG, and MT with source-side metadata (MT+NLG). First, we train document-based MT systems with large amounts of parallel data. Then, we adapt these models to pure NLG and MT+NLG tasks by fine-tuning with smaller amounts of domain-specific data. This end-to-end NLG approach, without data selection and planning, outperforms the previous state of the art on the Rotowire NLG task. We participated to the “Document Generation and Translation” task at WNGT 2019, and ranked first in all tracks.</abstract>
       <url hash="82d4a484">D19-5631</url>
@@ -11697,7 +11697,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <title>From Research to Production and Back: Ludicrously Fast Neural Machine Translation</title>
       <author><first>Young Jin</first><last>Kim</last></author>
       <author><first>Marcin</first><last>Junczys-Dowmunt</last></author>
-      <author><first>Hany</first><last>Hassan</last></author>
+      <author id="hany-hassan-awadalla"><first>Hany</first><last>Hassan</last></author>
       <author><first>Alham</first><last>Fikri Aji</last></author>
       <author><first>Kenneth</first><last>Heafield</last></author>
       <author><first>Roman</first><last>Grundkiewicz</last></author>
@@ -11710,9 +11710,9 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     </paper>
     <paper id="33">
       <title>Selecting, Planning, and Rewriting: A Modular Approach for Data-to-Document Generation and Translation</title>
-      <author><first>Lesly</first><last>Miculicich</last></author>
+      <author id="lesly-miculicich-werlen"><first>Lesly</first><last>Miculicich</last></author>
       <author><first>Marc</first><last>Marone</last></author>
-      <author><first>Hany</first><last>Hassan</last></author>
+      <author id="hany-hassan-awadalla"><first>Hany</first><last>Hassan</last></author>
       <pages>289–296</pages>
       <abstract>In this paper, we report our system submissions to all 6 tracks of the WNGT 2019 shared task on Document-Level Generation and Translation. The objective is to generate a textual document from either structured data: generation task, or a document in a different language: translation task. For the translation task, we focused on adapting a large scale system trained on WMT data by fine tuning it on the RotoWire data. For the generation task, we participated with two systems based on a selection and planning model followed by (a) a simple language model generation, and (b) a GPT-2 pre-trained language model approach. The selection and planning module chooses a subset of table records in order, and the language models produce text given such a subset.</abstract>
       <url hash="1004e778">D19-5633</url>
@@ -11751,12 +11751,12 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     </frontmatter>
     <paper id="1">
       <title><fixed-case>P</fixed-case>harma<fixed-case>C</fixed-case>o<fixed-case>NER</fixed-case>: Pharmacological Substances, Compounds and proteins Named Entity Recognition track</title>
-      <author><first>Aitor</first><last>Gonzalez-Agirre</last></author>
-      <author><first>Montserrat</first><last>Marimon</last></author>
+      <author id="aitor-gonzalez-agirre"><first>Aitor</first><last>Gonzalez-Agirre</last></author>
+      <author id="montserrat-marimon"><first>Montserrat</first><last>Marimon</last></author>
       <author><first>Ander</first><last>Intxaurrondo</last></author>
       <author><first>Obdulia</first><last>Rabal</last></author>
       <author><first>Marta</first><last>Villegas</last></author>
-      <author><first>Martin</first><last>Krallinger</last></author>
+      <author id="martin-krallinger"><first>Martin</first><last>Krallinger</last></author>
       <pages>1–10</pages>
       <abstract>One of the biomedical entity types of relevance for medicine or biosciences are chemical compounds and drugs. The correct detection these entities is critical for other text mining applications building on them, such as adverse drug-reaction detection, medication-related fake news or drug-target extraction. Although a significant effort was made to detect mentions of drugs/chemicals in English texts, so far only very limited attempts were made to recognize them in medical documents in other languages. Taking into account the growing amount of medical publications and clinical records written in Spanish, we have organized the first shared task on detecting drug and chemical entities in Spanish medical documents. Additionally, we included a clinical concept-indexing sub-track asking teams to return SNOMED-CT identifiers related to drugs/chemicals for a collection of documents. For this task, named PharmaCoNER, we generated annotation guidelines together with a corpus of 1,000 manually annotated clinical case studies. A total of 22 teams participated in the sub-track 1, (77 system runs), and 7 teams in the sub-track 2 (19 system runs). Top scoring teams used sophisticated deep learning approaches yielding very competitive results with F-measures above 0.91. These results indicate that there is a real interest in promoting biomedical text mining efforts beyond English. We foresee that the PharmaCoNER annotation guidelines, corpus and participant systems will foster the development of new resources for clinical and biomedical text mining systems of Spanish medical data.</abstract>
       <url hash="3ef99a3b">D19-5701</url>
@@ -11787,9 +11787,9 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <title><fixed-case>I</fixed-case>xa<fixed-case>M</fixed-case>ed at <fixed-case>P</fixed-case>harmaco<fixed-case>NER</fixed-case> Challenge 2019</title>
       <author><first>Xabier</first><last>Lahuerta</last></author>
       <author><first>Iakes</first><last>Goenaga</last></author>
-      <author><first>Koldo</first><last>Gojenola</last></author>
+      <author id="koldo-gojenola"><first>Koldo</first><last>Gojenola</last></author>
       <author><first>Aitziber</first><last>Atutxa Salazar</last></author>
-      <author><first>Maite</first><last>Oronoz</last></author>
+      <author id="maite-oronoz"><first>Maite</first><last>Oronoz</last></author>
       <pages>21–25</pages>
       <abstract>The aim of this paper is to present our approach (IxaMed) in the PharmacoNER 2019 task. The task consists of identifying chemical, drug, and gene/protein mentions from clinical case studies written in Spanish. The evaluation of the task is divided in two scenarios: one corresponding to the detection of named entities and one corresponding to the indexation of named entities that have been previously identified. In order to identify named entities we have made use of a Bi-LSTM with a CRF on top in combination with different types of word embeddings. We have achieved our best result (86.81 F-Score) combining pretrained word embeddings of Wikipedia and Electronic Health Records (50M words) with contextual string embeddings of Wikipedia and Electronic Health Records. On the other hand, for the indexation of the named entities we have used the Levenshtein distance obtaining a 85.34 F-Score as our best result.</abstract>
       <url hash="bda466ac">D19-5704</url>
@@ -11954,8 +11954,8 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <title>Using Snomed to recognize and index chemical and drug mentions.</title>
       <author><first>Pilar</first><last>López Úbeda</last></author>
       <author><first>Manuel Carlos</first><last>Díaz Galiano</last></author>
-      <author><first>L. Alfonso</first><last>Urena Lopez</last></author>
-      <author><first>Maite</first><last>Martin</last></author>
+      <author id="l-alfonso-urena-lopez"><first>L. Alfonso</first><last>Urena Lopez</last></author>
+      <author id="m-teresa-martin-valdivia"><first>Maite</first><last>Martin</last></author>
       <pages>115–120</pages>
       <abstract>In this paper we describe a new named entity extraction system. Our work proposes a system for the identification and annotation of drug names in Spanish biomedical texts based on machine learning and deep learning models. Subsequently, a standardized code using Snomed is assigned to these drugs, for this purpose, Natural Language Processing tools and techniques have been used, and a dictionary of different sources of information has been built. The results are promising, we obtain 78% in F1 score on the first sub-track and in the second task we map with Snomed correctly 72% of the found entities.</abstract>
       <url hash="8b3387f5">D19-5718</url>
@@ -11968,7 +11968,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <author><first>Louise</first><last>Deléger</last></author>
       <author><first>Estelle</first><last>Chaix</last></author>
       <author><first>Mouhamadou</first><last>Ba</last></author>
-      <author><first>Claire</first><last>Nédellec</last></author>
+      <author id="claire-nedellec"><first>Claire</first><last>Nédellec</last></author>
       <pages>121–131</pages>
       <abstract>This paper presents the fourth edition of the Bacteria Biotope task at BioNLP Open Shared Tasks 2019. The task focuses on the extraction of the locations and phenotypes of microorganisms from PubMed abstracts and full-text excerpts, and the characterization of these entities with respect to reference knowledge sources (NCBI taxonomy, OntoBiotope ontology). The task is motivated by the importance of the knowledge on biodiversity for fundamental research and applications in microbiology. The paper describes the different proposed subtasks, the corpus characteristics, and the challenge organization. We also provide an analysis of the results obtained by participants, and inspect the evolution of the results since the last edition in 2016.</abstract>
       <url hash="657e1a4c">D19-5719</url>
@@ -11979,7 +11979,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <title>Linguistically Informed Relation Extraction and Neural Architectures for Nested Named Entity Recognition in <fixed-case>B</fixed-case>io<fixed-case>NLP</fixed-case>-<fixed-case>OST</fixed-case> 2019</title>
       <author><first>Pankaj</first><last>Gupta</last></author>
       <author><first>Usama</first><last>Yaseen</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>132–142</pages>
       <abstract>Named Entity Recognition (NER) and Relation Extraction (RE) are essential tools in distilling knowledge from biomedical literature. This paper presents our findings from participating in BioNLP Shared Tasks 2019. We addressed Named Entity Recognition including nested entities extraction, Entity Normalization and Relation Extraction. Our proposed approach of Named Entities can be generalized to different languages and we have shown it’s effectiveness for English and Spanish text. We investigated linguistic features, hybrid loss including ranking and Conditional Random Fields (CRF), multi-task objective and token level ensembling strategy to improve NER. We employed dictionary based fuzzy and semantic search to perform Entity Normalization. Finally, our RE system employed Support Vector Machine (SVM) with linguistic features. Our NER submission (team:MIC-CIS) ranked first in BB-2019 norm+NER task with standard error rate (SER) of 0.7159 and showed competitive performance on PharmaCo NER task with F1-score of 0.8662. Our RE system ranked first in the SeeDev-binary Relation Extraction Task with F1-score of 0.3738.</abstract>
       <url hash="d1ee7d5f">D19-5720</url>
@@ -12003,7 +12003,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <title><fixed-case>BOUN</fixed-case>-<fixed-case>ISIK</fixed-case> Participation: An Unsupervised Approach for the Named Entity Normalization and Relation Extraction of Bacteria Biotopes</title>
       <author><first>İlknur</first><last>Karadeniz</last></author>
       <author><first>Ömer Faruk</first><last>Tuna</last></author>
-      <author><first>Arzucan</first><last>Özgür</last></author>
+      <author id="arzucan-ozgur"><first>Arzucan</first><last>Özgür</last></author>
       <pages>150–157</pages>
       <abstract>This paper presents our participation to the Bacteria Biotope Task of the BioNLP Shared Task 2019. Our participation includes two systems for the two subtasks of the Bacteria Biotope Task: the normalization of entities (BB-norm) and the identification of the relations between the entities given a biomedical text (BB-rel). For the normalization of entities, we utilized word embeddings and syntactic re-ranking. For the relation extraction task, pre-defined rules are used. Although both approaches are unsupervised, in the sense that they do not need any labeled data, they achieved promising results. Especially, for the BB-norm task, the results have shown that the proposed method performs as good as deep learning based methods, which require labeled data.</abstract>
       <url hash="4dc34a55">D19-5722</url>
@@ -12016,7 +12016,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <author><first>Fei</first><last>Li</last></author>
       <author><first>Ming</first><last>Cheng</last></author>
       <author><first>Hong</first><last>Yu</last></author>
-      <author><first>Donghong</first><last>Ji</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
       <pages>158–167</pages>
       <abstract>abstract In this article, we describe our approach for the Bacteria Biotopes relation extraction (BB-rel) subtask in the BioNLP Shared Task 2019. This task aims to promote the development of text mining systems that extract relationships between Microorganism, Habitat and Phenotype entities. In this paper, we propose a novel approach for dependency graph construction based on lexical chains, so one dependency graph can represent one or multiple sentences. After that, we propose a neural network model which consists of the bidirectional long short-term memories and an attention graph convolution neural network to learn relation extraction features from the graph. Our approach is able to extract both intra- and inter-sentence relations, and meanwhile utilize syntax information. The results show that our approach achieved the best F1 (66.3%) in the official evaluation participated by 7 teams.</abstract>
       <url hash="db2a5465">D19-5723</url>
@@ -12035,14 +12035,14 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     </paper>
     <paper id="25">
       <title><fixed-case>CRAFT</fixed-case> Shared Tasks 2019 Overview — Integrated Structure, Semantics, and Coreference</title>
-      <author><first>William</first><last>Baumgartner</last></author>
+      <author id="william-a-baumgartner-jr"><first>William</first><last>Baumgartner</last></author>
       <author><first>Michael</first><last>Bada</last></author>
       <author><first>Sampo</first><last>Pyysalo</last></author>
-      <author><first>Manuel R.</first><last>Ciosici</last></author>
-      <author><first>Negacy</first><last>Hailu</last></author>
+      <author id="manuel-r-ciosici"><first>Manuel R.</first><last>Ciosici</last></author>
+      <author id="negacy-hailu"><first>Negacy</first><last>Hailu</last></author>
       <author><first>Harrison</first><last>Pielke-Lombardo</last></author>
       <author><first>Michael</first><last>Regan</last></author>
-      <author><first>Lawrence</first><last>Hunter</last></author>
+      <author id="lawrence-hunter"><first>Lawrence</first><last>Hunter</last></author>
       <pages>174–184</pages>
       <abstract>As part of the BioNLP Open Shared Tasks 2019, the CRAFT Shared Tasks 2019 provides a platform to gauge the state of the art for three fundamental language processing tasks — dependency parse construction, coreference resolution, and ontology concept identification — over full-text biomedical articles. The structural annotation task requires the automatic generation of dependency parses for each sentence of an article given only the article text. The coreference resolution task focuses on linking coreferring base noun phrase mentions into chains using the symmetrical and transitive identity relation. The ontology concept annotation task involves the identification of concept mentions within text using the classes of ten distinct ontologies in the biomedical domain, both unmodified and augmented with extension classes. This paper provides an overview of each task, including descriptions of the data provided to participants and the evaluation metrics used, and discusses participant results relative to baseline performances for each of the three tasks.</abstract>
       <url hash="ab617d72">D19-5725</url>
@@ -12102,7 +12102,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <title><fixed-case>B</fixed-case>io<fixed-case>NLP</fixed-case>-<fixed-case>OST</fixed-case> 2019 <fixed-case>RD</fixed-case>o<fixed-case>C</fixed-case> Tasks: Multi-grain Neural Relevance Ranking Using Topics and Attention Based Query-Document-Sentence Interactions</title>
       <author><first>Pankaj</first><last>Gupta</last></author>
       <author><first>Yatin</first><last>Chaudhary</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>227–236</pages>
       <abstract>This paper presents our system details and results of participation in the RDoC Tasks of BioNLP-OST 2019. Research Domain Criteria (RDoC) construct is a multi-dimensional and broad framework to describe mental health disorders by combining knowledge from genomics to behaviour. Non-availability of RDoC labelled dataset and tedious labelling process hinders the use of RDoC framework to reach its full potential in Biomedical research community and Healthcare industry. Therefore, Task-1 aims at retrieval and ranking of PubMed abstracts relevant to a given RDoC construct and Task-2 aims at extraction of the most relevant sentence from a given PubMed abstract. We investigate (1) attention based supervised neural topic model and SVM for retrieval and ranking of PubMed abstracts and, further utilize BM25 and other relevance measures for re-ranking, (2) supervised and unsupervised sentence ranking models utilizing multi-view representations comprising of query-aware attention-based sentence representation (QAR), bag-of-words (BoW) and TF-IDF. Our best systems achieved 1st rank and scored 0.86 mAP and 0.58 macro average accuracy in Task-1 and Task-2 respectively.</abstract>
       <url hash="7d183a49">D19-5730</url>
@@ -12162,10 +12162,10 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     </paper>
     <paper id="3">
       <title><fixed-case>CALOR</fixed-case>-<fixed-case>QUEST</fixed-case> : generating a training corpus for Machine Reading Comprehension models from shallow semantic annotations</title>
-      <author><first>Frederic</first><last>Bechet</last></author>
+      <author id="frederic-bechet"><first>Frederic</first><last>Bechet</last></author>
       <author><first>Cindy</first><last>Aloui</last></author>
       <author><first>Delphine</first><last>Charlet</last></author>
-      <author><first>Geraldine</first><last>Damnati</last></author>
+      <author id="geraldine-damnati"><first>Geraldine</first><last>Damnati</last></author>
       <author><first>Johannes</first><last>Heinecke</last></author>
       <author><first>Alexis</first><last>Nasr</last></author>
       <author><first>Frederic</first><last>Herledan</last></author>
@@ -12182,7 +12182,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <author><first>Dian</first><last>Yu</last></author>
       <author><first>Jianshu</first><last>Chen</last></author>
       <author><first>Heng</first><last>Ji</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <author><first>Dong</first><last>Yu</last></author>
       <pages>27–37</pages>
       <abstract>We focus on multiple-choice question answering (QA) tasks in subject areas such as science, where we require both broad background knowledge and the facts from the given subject-area reference corpus. In this work, we explore simple yet effective methods for exploiting two sources of external knowledge for subject-area QA. The first enriches the original subject-area reference corpus with relevant text snippets extracted from an open-domain resource (i.e., Wikipedia) that cover potentially ambiguous concepts in the question and answer options. As in other QA research, the second method simply increases the amount of training data by appending additional in-domain subject-area instances. Experiments on three challenging multiple-choice science QA tasks (i.e., ARC-Easy, ARC-Challenge, and OpenBookQA) demonstrate the effectiveness of our methods: in comparison to the previous state-of-the-art, we obtain absolute gains in accuracy of up to 8.1%, 13.0%, and 12.8%, respectively. While we observe consistent gains when we introduce knowledge from Wikipedia, we find that employing additional QA training instances is not uniformly helpful: performance degrades when the added instances exhibit a higher level of difficulty than the original training data. As one of the first studies on exploiting unstructured external knowledge for subject-area QA, we hope our methods, observations, and discussion of the exposed limitations may shed light on further developments in the area.</abstract>
@@ -12252,7 +12252,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <title>Towards Answer-unaware Conversational Question Generation</title>
       <author><first>Mao</first><last>Nakanishi</last></author>
       <author><first>Tetsunori</first><last>Kobayashi</last></author>
-      <author><first>Yoshihiko</first><last>Hayashi</last></author>
+      <author id="yoshihiko-hayashi"><first>Yoshihiko</first><last>Hayashi</last></author>
       <pages>63–71</pages>
       <abstract>Conversational question generation is a novel area of NLP research which has a range of potential applications. This paper is first to presents a framework for conversational question generation that is unaware of the corresponding answers. To properly generate a question coherent to the grounding text and the current conversation history, the proposed framework first locates the focus of a question in the text passage, and then identifies the question pattern that leads the sequential generation of the words in a question. The experiments using the CoQA dataset demonstrate that the quality of generated questions greatly improves if the question foci and the question patterns are correctly identified. In addition, it was shown that the question foci, even estimated with a reasonable accuracy, could contribute to the quality improvement. These results established that our research direction may be promising, but at the same time revealed that the identification of question patterns is a challenging issue, and it has to be largely refined to achieve a better quality in the end-to-end automatic question generation.</abstract>
       <url hash="7600060c">D19-5809</url>
@@ -12263,7 +12263,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <title>Cross-Task Knowledge Transfer for Query-Based Text Summarization</title>
       <author><first>Elozino</first><last>Egonmwan</last></author>
       <author><first>Vittorio</first><last>Castelli</last></author>
-      <author><first>Md Arafat</first><last>Sultan</last></author>
+      <author id="md-arafat-sultan"><first>Md Arafat</first><last>Sultan</last></author>
       <pages>72–77</pages>
       <abstract>We demonstrate the viability of knowledge transfer between two related tasks: machine reading comprehension (MRC) and query-based text summarization. Using an MRC model trained on the SQuAD1.1 dataset as a core system component, we first build an extractive query-based summarizer. For better precision, this summarizer also compresses the output of the MRC model using a novel sentence compression technique. We further leverage pre-trained machine translation systems to abstract our extracted summaries. Our models achieve state-of-the-art results on the publicly available CNN/Daily Mail and Debatepedia datasets, and can serve as simple yet powerful baselines for future systems. We also hope that these results will encourage research on transfer learning from large MRC corpora to query-based summarization.</abstract>
       <url hash="d4f33d49">D19-5810</url>
@@ -12276,7 +12276,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <author><first>Lea</first><last>Frermann</last></author>
       <author><first>Diego</first><last>Marcheggiani</last></author>
       <author><first>Roi</first><last>Blanco</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <pages>78–85</pages>
       <abstract>We present a system for answering questions based on the full text of books (BookQA), which first selects book passages given a question at hand, and then uses a memory network to reason and predict an answer. To improve generalization, we pretrain our memory network using artificial questions generated from book sentences. We experiment with the recently published NarrativeQA corpus, on the subset of Who questions, which expect book characters as answers. We experimentally show that BERT-based retrieval and pretraining improve over baseline results significantly. At the same time, we confirm that NarrativeQA is a highly challenging data set, and that there is need for novel research in order to achieve high-precision BookQA results. We analyze some of the bottlenecks of the current approach, and we argue that more research is needed on text representation, retrieval of relevant passages, and reasoning, including commonsense knowledge.</abstract>
       <url hash="6b2e8617">D19-5811</url>
@@ -12370,7 +12370,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <author><first>James</first><last>Route</last></author>
       <author><first>Kaixin</first><last>Ma</last></author>
       <author><first>Yixuan</first><last>Geng</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <pages>125–136</pages>
       <abstract>The field of question answering (QA) has seen rapid growth in new tasks and modeling approaches in recent years. Large scale datasets and focus on challenging linguistic phenomena have driven development in neural models, some of which have achieved parity with human performance in limited cases. However, an examination of state-of-the-art model output reveals that a gap remains in reasoning ability compared to a human, and performance tends to degrade when models are exposed to less-constrained tasks. We are interested in more clearly defining the strengths and limitations of leading models across diverse QA challenges, intending to help future researchers with identifying pathways to generalizable performance. We conduct extensive qualitative and quantitative analyses on the results of four models across four datasets and relate common errors to model capabilities. We also illustrate limitations in the datasets we examine and discuss a way forward for achieving generalizable models and datasets that broadly test QA capabilities.</abstract>
       <url hash="688cbe26">D19-5818</url>
@@ -12417,7 +12417,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <title>Let Me Know What to Ask: Interrogative-Word-Aware Question Generation</title>
       <author><first>Junmo</first><last>Kang</last></author>
       <author><first>Haritz</first><last>Puerto San Roman</last></author>
-      <author><first>Sung-Hyon</first><last>Myaeng</last></author>
+      <author id="sung-hyon-myaeng"><first>Sung-Hyon</first><last>Myaeng</last></author>
       <pages>163–171</pages>
       <abstract>Question Generation (QG) is a Natural Language Processing (NLP) task that aids advances in Question Answering (QA) and conversational assistants. Existing models focus on generating a question based on a text and possibly the answer to the generated question. They need to determine the type of interrogative word to be generated while having to pay attention to the grammar and vocabulary of the question. In this work, we propose Interrogative-Word-Aware Question Generation (IWAQG), a pipelined system composed of two modules: an interrogative word classifier and a QG model. The first module predicts the interrogative word that is provided to the second module to create the question. Owing to an increased recall of deciding the interrogative words to be used for the generated questions, the proposed model achieves new state-of-the-art results on the task of QG in SQuAD, improving from 46.58 to 47.69 in BLEU-1, 17.55 to 18.53 in BLEU-4, 21.24 to 22.33 in METEOR, and from 44.53 to 46.94 in ROUGE-L.</abstract>
       <url hash="9d7c1911">D19-5822</url>
@@ -12448,7 +12448,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     <paper id="25">
       <title>Question Answering Using Hierarchical Attention on Top of <fixed-case>BERT</fixed-case> Features</title>
       <author><first>Reham</first><last>Osama</last></author>
-      <author><first>Nagwa</first><last>El-Makky</last></author>
+      <author id="nagwa-m-el-makky"><first>Nagwa</first><last>El-Makky</last></author>
       <author><first>Marwan</first><last>Torki</last></author>
       <pages>191–195</pages>
       <abstract>The model submitted works as follows. When supplied a question and a passage it makes use of the BERT embedding along with the hierarchical attention model which consists of 2 parts, the co-attention and the self-attention, to locate a continuous span of the passage that is the answer to the question.</abstract>
@@ -12471,7 +12471,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <title>Generalizing Question Answering System with Pre-trained Language Model Fine-tuning</title>
       <author><first>Dan</first><last>Su</last></author>
       <author><first>Yan</first><last>Xu</last></author>
-      <author><first>Genta Indra</first><last>Winata</last></author>
+      <author id="genta-indra-winata"><first>Genta Indra</first><last>Winata</last></author>
       <author><first>Peng</first><last>Xu</last></author>
       <author><first>Hyeondey</first><last>Kim</last></author>
       <author><first>Zihan</first><last>Liu</last></author>
@@ -12549,8 +12549,8 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     <paper id="3">
       <title>Leveraging syntactic parsing to improve event annotation matching</title>
       <author><first>Camiel</first><last>Colruyt</last></author>
-      <author><first>Orphée</first><last>De Clercq</last></author>
-      <author><first>Véronique</first><last>Hoste</last></author>
+      <author id="orphee-de-clercq"><first>Orphée</first><last>De Clercq</last></author>
+      <author id="veronique-hoste"><first>Véronique</first><last>Hoste</last></author>
       <pages>15–23</pages>
       <abstract>Detecting event mentions is the first step in event extraction from text and annotating them is a notoriously difficult task. Evaluating annotator consistency is crucial when building datasets for mention detection. When event mentions are allowed to cover many tokens, annotators may disagree on their span, which means that overlapping annotations may then refer to the same event or to different events. This paper explores different fuzzy-matching functions which aim to resolve this ambiguity. The functions extract the sets of syntactic heads present in the annotations, use the Dice coefficient to measure the similarity between sets and return a judgment based on a given threshold. The functions are tested against the judgment of a human evaluator and a comparison is made between sets of tokens and sets of syntactic heads. The best-performing function is a head-based function that is found to agree with the human evaluator in 89% of cases.</abstract>
       <url hash="50b879c5">D19-5903</url>
@@ -12587,7 +12587,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <title>Computer Assisted Annotation of Tension Development in <fixed-case>TED</fixed-case> Talks through Crowdsourcing</title>
       <author><first>Seungwon</first><last>Yoon</last></author>
       <author><first>Wonsuk</first><last>Yang</last></author>
-      <author><first>Jong</first><last>Park</last></author>
+      <author id="jong-c-park"><first>Jong</first><last>Park</last></author>
       <pages>39–47</pages>
       <abstract>We propose a method of machine-assisted annotation for the identification of tension development, annotating whether the tension is increasing, decreasing, or staying unchanged. We use a neural network based prediction model, whose predicted results are given to the annotators as initial values for the options that they are asked to choose. By presenting such initial values to the annotators, the annotation task becomes an evaluation task where the annotators inspect whether or not the predicted results are correct. To demonstrate the effectiveness of our method, we performed the annotation task in both in-house and crowdsourced environments. For the crowdsourced environment, we compared the annotation results with and without our method of machine-assisted annotation. We find that the results with our method showed a higher agreement to the gold standard than those without, though our method had little effect at reducing the time for annotation. Our codes for the experiment are made publicly available.</abstract>
       <url hash="2cbe5165">D19-5906</url>
@@ -12653,7 +12653,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <author><first>Kaixin</first><last>Ma</last></author>
       <author><first>Jonathan</first><last>Francis</last></author>
       <author><first>Quanyang</first><last>Lu</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <author><first>Alessandro</first><last>Oltramari</last></author>
       <pages>22–32</pages>
       <abstract>Non-extractive commonsense QA remains a challenging AI task, as it requires systems to reason about, synthesize, and gather disparate pieces of information, in order to generate responses to queries. Recent approaches on such tasks show increased performance, only when models are either pre-trained with additional information or when domain-specific heuristics are used, without any special consideration regarding the knowledge resource type. In this paper, we perform a survey of recent commonsense QA methods and we provide a systematic analysis of popular knowledge resources and knowledge-integration methods, across benchmarks from multiple commonsense datasets. Our results and analysis show that attention-based injection seems to be a preferable choice for knowledge integration and that the degree of domain overlap, between knowledge bases and datasets, plays a crucial role in determining model success.</abstract>
@@ -12677,7 +12677,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     </paper>
     <paper id="5">
       <title>Commonsense about Human Senses: Labeled Data Collection Processes</title>
-      <author><first>Ndapa</first><last>Nakashole</last></author>
+      <author id="ndapandula-nakashole"><first>Ndapa</first><last>Nakashole</last></author>
       <pages>43–52</pages>
       <abstract>We consider the problem of extracting from text commonsense knowledge pertaining to human senses such as sound and smell. First, we consider the problem of recognizing mentions of human senses in text. Our contribution is a method for acquiring labeled data. Experiments show the effectiveness of our proposed data labeling approach when used with standard machine learning models on the task of sense recognition in text. Second, we propose to extract novel, common sense relationships pertaining to sense perception concepts. Our contribution is a process for generating labeled data by leveraging large corpora and crowdsourcing questionnaires.</abstract>
       <url hash="aa36c389">D19-6005</url>
@@ -12687,7 +12687,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     <paper id="6">
       <title>Extracting Common Inference Patterns from Semi-Structured Explanations</title>
       <author><first>Sebastian</first><last>Thiem</last></author>
-      <author><first>Peter</first><last>Jansen</last></author>
+      <author id="peter-jansen"><first>Peter</first><last>Jansen</last></author>
       <pages>53–65</pages>
       <abstract>Complex questions often require combining multiple facts to correctly answer, particularly when generating detailed explanations for why those answers are correct. Combining multiple facts to answer questions is often modeled as a “multi-hop” graph traversal problem, where a given solver must find a series of interconnected facts in a knowledge graph that, taken together, answer the question and explain the reasoning behind that answer. Multi-hop inference currently suffers from semantic drift, or the tendency for chains of reasoning to “drift”’ to unrelated topics, and this semantic drift greatly limits the number of facts that can be combined in both free text or knowledge base inference. In this work we present our effort to mitigate semantic drift by extracting large high-confidence multi-hop inference patterns, generated by abstracting large-scale explanatory structure from a corpus of detailed explanations. We represent these inference patterns as sets of generalized constraints over sentences represented as rows in a knowledge base of semi-structured tables. We present a prototype tool for identifying common inference patterns from corpora of semi-structured explanations, and use it to successfully extract 67 inference patterns from a “matter” subset of standardized elementary science exam questions that span scientific and world knowledge.</abstract>
       <url hash="d46b9ecf">D19-6006</url>
@@ -12790,7 +12790,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <title>Can a Gorilla Ride a Camel? Learning Semantic Plausibility from Text</title>
       <author><first>Ian</first><last>Porada</last></author>
       <author><first>Kaheer</first><last>Suleman</last></author>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <pages>123–129</pages>
       <abstract>Modeling semantic plausibility requires commonsense knowledge about the world and has been used as a testbed for exploring various knowledge representations. Previous work has focused specifically on modeling physical plausibility and shown that distributional methods fail when tested in a supervised setting. At the same time, distributional models, namely large pretrained language models, have led to improved results for many natural language understanding tasks. In this work, we show that these pretrained language models are in fact effective at modeling physical plausibility in the supervised setting. We therefore present the more difficult problem of learning to model physical plausibility directly from text. We create a training set by extracting attested events from a large corpus, and we provide a baseline for training on these attested events in a self-supervised manner and testing on a physical plausibility task. We believe results could be further improved by injecting explicit commonsense knowledge into a distributional model.</abstract>
       <url hash="1adba42c">D19-6015</url>
@@ -12817,7 +12817,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <editor><first>Colin</first><last>Cherry</last></editor>
       <editor><first>Greg</first><last>Durrett</last></editor>
       <editor><first>George</first><last>Foster</last></editor>
-      <editor><first>Reza</first><last>Haffari</last></editor>
+      <editor id="gholamreza-haffari"><first>Reza</first><last>Haffari</last></editor>
       <editor><first>Shahram</first><last>Khadivi</last></editor>
       <editor><first>Nanyun</first><last>Peng</last></editor>
       <editor><first>Xiang</first><last>Ren</last></editor>
@@ -12941,7 +12941,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <title>Empirical Evaluation of Active Learning Techniques for Neural <fixed-case>MT</fixed-case></title>
       <author><first>Xiangkai</first><last>Zeng</last></author>
       <author><first>Sarthak</first><last>Garg</last></author>
-      <author><first>Rajen</first><last>Chatterjee</last></author>
+      <author id="rajen-chatterjee"><first>Rajen</first><last>Chatterjee</last></author>
       <author><first>Udhyakumar</first><last>Nallasamy</last></author>
       <author><first>Matthias</first><last>Paulik</last></author>
       <pages>84–93</pages>
@@ -12965,7 +12965,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <title>Few-Shot and Zero-Shot Learning for Historical Text Normalization</title>
       <author><first>Marcel</first><last>Bollmann</last></author>
       <author><first>Natalia</first><last>Korchagina</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>104–114</pages>
       <abstract>Historical text normalization often relies on small training datasets. Recent work has shown that multi-task learning can lead to significant improvements by exploiting synergies with related datasets, but there has been no systematic study of different multi-task learning architectures. This paper evaluates 63 multi-task learning configurations for sequence-to-sequence-based historical text normalization across ten datasets from eight languages, using autoencoding, grapheme-to-phoneme mapping, and lemmatization as auxiliary tasks. We observe consistent, significant improvements across languages when training data for the target task is limited, but minimal or no improvements when training data is abundant. We also show that zero-shot learning outperforms the simple, but relatively strong, identity baseline.</abstract>
       <url hash="4845f825">D19-6112</url>
@@ -13075,7 +13075,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <author><first>Steven</first><last>Hillis</last></author>
       <author><first>Isak</first><last>Czeresnia Etinger</last></author>
       <author><first>Han</first><last>Zhang</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <pages>192–201</pages>
       <abstract>Grapheme-to-phoneme conversion (g2p) is the task of predicting the pronunciation of words from their orthographic representation. His- torically, g2p systems were transition- or rule- based, making generalization beyond a mono- lingual (high resource) domain impractical. Recently, neural architectures have enabled multilingual systems to generalize widely; however, all systems to date have been trained only on spelling-pronunciation pairs. We hy- pothesize that the sequences of IPA characters used to represent pronunciation do not capture its full nuance, especially when cleaned to fa- cilitate machine learning. We leverage audio data as an auxiliary modality in a multi-task training process to learn a more optimal inter- mediate representation of source graphemes; this is the first multimodal model proposed for multilingual g2p. Our approach is highly ef- fective: on our in-domain test set, our mul- timodal model reduces phoneme error rate to 2.46%, a more than 65% decrease compared to our implementation of a unimodal spelling- pronunciation model—which itself achieves state-of-the-art results on the Wiktionary test set. The advantages of the multimodal model generalize to wholly unseen languages, reduc- ing phoneme error rate on our out-of-domain test set to 6.39% from the unimodal 8.21%, a more than 20% relative decrease. Further- more, our training and test sets are composed primarily of low-resource languages, demon- strating that our multimodal approach remains useful when training data are constrained.</abstract>
       <url hash="1e32c49e">D19-6121</url>
@@ -13095,9 +13095,9 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     </paper>
     <paper id="23">
       <title>Neural Unsupervised Parsing Beyond <fixed-case>E</fixed-case>nglish</title>
-      <author><first>Katharina</first><last>Kann</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
       <author><first>Anhad</first><last>Mohananey</last></author>
-      <author><first>Samuel R.</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel R.</first><last>Bowman</last></author>
       <author><first>Kyunghyun</first><last>Cho</last></author>
       <pages>209–218</pages>
       <abstract>Recently, neural network models which automatically infer syntactic structure from raw text have started to achieve promising results. However, earlier work on unsupervised parsing shows large performance differences between non-neural models trained on corpora in different languages, even for comparable amounts of data. With that in mind, we train instances of the PRPN architecture (Shen et al., 2018)—one of these unsupervised neural network parsers—for Arabic, Chinese, English, and German. We find that (i) the model strongly outperforms trivial baselines and, thus, acquires at least some parsing ability for all languages; (ii) good hyperparameter values seem to be universal; (iii) how the model benefits from larger training set sizes depends on the corpus, with the model achieving the largest performance gains when increasing the number of sentences from 2,500 to 12,500 for English. In addition, we show that, by sharing parameters between the related languages German and English, we can improve the model’s unsupervised parsing F1 score by up to 4% in the low-resource setting.</abstract>
@@ -13108,9 +13108,9 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     <paper id="24">
       <title>Reevaluating Argument Component Extraction in Low Resource Settings</title>
       <author><first>Anirudh</first><last>Joshi</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Richard</first><last>Sinnott</last></author>
-      <author><first>Cecile</first><last>Paris</last></author>
+      <author id="cecile-paris"><first>Cecile</first><last>Paris</last></author>
       <pages>219–224</pages>
       <abstract>Argument component extraction is a challenging and complex high-level semantic extraction task. As such, it is both expensive to annotate (meaning training data is limited and low-resource by nature), and hard for current-generation deep learning methods to model. In this paper, we reevaluate the performance of state-of-the-art approaches in both single- and multi-task learning settings using combinations of character-level, GloVe, ELMo, and BERT encodings using standard BiLSTM-CRF encoders. We use evaluation metrics that are more consistent with evaluation practice in named entity recognition to understand how well current baselines address this challenge and compare their performance to lower-level semantic tasks such as CoNLL named entity recognition. We find that performance utilizing various pre-trained representations and training methodologies often leaves a lot to be desired as it currently stands, and suggest future pathways for improvement.</abstract>
       <url hash="d2824ce9">D19-6124</url>
@@ -13121,7 +13121,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <title>Reinforcement-based denoising of distantly supervised <fixed-case>NER</fixed-case> with partial annotation</title>
       <author><first>Farhad</first><last>Nooralahzadeh</last></author>
       <author><first>Jan Tore</first><last>Lønning</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <pages>225–233</pages>
       <abstract>Existing named entity recognition (NER) systems rely on large amounts of human-labeled data for supervision. However, obtaining large-scale annotated data is challenging particularly in specific domains like health-care, e-commerce and so on. Given the availability of domain specific knowledge resources, (e.g., ontologies, dictionaries), distant supervision is a solution to generate automatically labeled training data to reduce human effort. The outcome of distant supervision for NER, however, is often noisy. False positive and false negative instances are the main issues that reduce performance on this kind of auto-generated data. In this paper, we explore distant supervision in a supervised setup. We adopt a technique of partial annotation to address false negative cases and implement a reinforcement learning strategy with a neural network policy to identify false positive instances. Our results establish a new state-of-the-art on four benchmark datasets taken from different domains and different languages. We then go on to show that our model reduces the amount of manually annotated data required to perform NER in a new domain.</abstract>
       <url hash="17dfb382">D19-6125</url>
@@ -13153,7 +13153,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     <paper id="28">
       <title>Transductive Auxiliary Task Self-Training for Neural Multi-Task Models</title>
       <author><first>Johannes</first><last>Bjerva</last></author>
-      <author><first>Katharina</first><last>Kann</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
       <author><first>Isabelle</first><last>Augenstein</last></author>
       <pages>253–258</pages>
       <abstract>Multi-task learning and self-training are two common ways to improve a machine learning model’s performance in settings with limited training data. Drawing heavily on ideas from those two approaches, we suggest transductive auxiliary task self-training: training a multi-task model on (i) a combination of main and auxiliary task training data, and (ii) test instances with auxiliary task labels which a single-task version of the model has previously generated. We perform extensive experiments on 86 combinations of languages and tasks. Our results are that, on average, transductive auxiliary task self-training improves absolute accuracy by up to 9.56% over the pure multi-task model for dependency relation tagging and by up to 13.03% for semantic tagging.</abstract>
@@ -13180,7 +13180,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <author><first>Cezar</first><last>Sas</last></author>
       <author><first>Rahul</first><last>Aralikatte</last></author>
       <author><first>Isabelle</first><last>Augenstein</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>265–274</pages>
       <abstract>Although the vast majority of knowledge bases (KBs) are heavily biased towards English, Wikipedias do cover very different topics in different languages. Exploiting this, we introduce a new multilingual dataset (X-WikiRE), framing relation extraction as a multilingual machine reading problem. We show that by leveraging this resource it is possible to robustly transfer models cross-lingually and that multilingual support significantly improves (zero-shot) relation extraction, enabling the population of low-resourced KBs from their well-populated counterparts.</abstract>
       <url hash="460ce040">D19-6130</url>
@@ -13199,7 +13199,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     </paper>
     <paper id="32">
       <title>Zero-shot Dependency Parsing with Pre-trained Multilingual Sentence Representations</title>
-      <author><first>Ke</first><last>Tran</last></author>
+      <author id="ke-m-tran"><first>Ke</first><last>Tran</last></author>
       <author><first>Arianna</first><last>Bisazza</last></author>
       <pages>281–288</pages>
       <abstract>We investigate whether off-the-shelf deep bidirectional sentence representations (Devlin et al., 2019) trained on a massively multilingual corpus (multilingual BERT) enable the development of an unsupervised universal dependency parser. This approach only leverages a mix of monolingual corpora in many languages and does not require any translation data making it applicable to low-resource languages. In our experiments we outperform the best CoNLL 2018 language-specific systems in all of the shared task’s six truly low-resource languages while using a single system. However, we also find that (i) parsing accuracy still varies dramatically when changing the training languages and (ii) in some target languages zero-shot transfer fails under all tested conditions, raising concerns on the ‘universality’ of the whole approach.</abstract>
@@ -13213,10 +13213,10 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <booktitle>Proceedings of the Tenth International Workshop on Health Text Mining and Information Analysis (LOUHI 2019)</booktitle>
       <url hash="c4a11d6c">D19-62</url>
       <editor><first>Eben</first><last>Holderness</last></editor>
-      <editor><first>Antonio</first><last>Jimeno Yepes</last></editor>
-      <editor><first>Alberto</first><last>Lavelli</last></editor>
+      <editor id="antonio-jimeno-yepes"><first>Antonio</first><last>Jimeno Yepes</last></editor>
+      <editor id="alberto-lavelli"><first>Alberto</first><last>Lavelli</last></editor>
       <editor><first>Anne-Lyse</first><last>Minard</last></editor>
-      <editor><first>James</first><last>Pustejovsky</last></editor>
+      <editor id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></editor>
       <editor><first>Fabio</first><last>Rinaldi</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Hong Kong</address>
@@ -13231,10 +13231,10 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     <paper id="1">
       <title>Cross-document coreference: An approach to capturing coreference without context</title>
       <author><first>Kristin</first><last>Wright-Bettner</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Guergana</first><last>Savova</last></author>
       <author><first>Piet</first><last>de Groen</last></author>
-      <author><first>Timothy</first><last>Miller</last></author>
+      <author id="timothy-miller"><first>Timothy</first><last>Miller</last></author>
       <pages>1–10</pages>
       <abstract>This paper discusses a cross-document coreference annotation schema that was developed to further automatic extraction of timelines in the clinical domain. Lexical senses and coreference choices are determined largely by context, but cross-document work requires reasoning across contexts that are not necessarily coherent. We found that an annotation approach that relies less on context-guided annotator intuitions and more on schematic rules was most effective in creating meaningful and consistent cross-document relations.</abstract>
       <url hash="2b754296">D19-6201</url>
@@ -13297,7 +13297,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     <paper id="7">
       <title>Experiments with ad hoc ambiguous abbreviation expansion</title>
       <author><first>Agnieszka</first><last>Mykowiecka</last></author>
-      <author><first>Malgorzata</first><last>Marciniak</last></author>
+      <author id="malgorzata-marciniak"><first>Malgorzata</first><last>Marciniak</last></author>
       <pages>44–53</pages>
       <abstract>The paper addresses experiments to expand ad hoc ambiguous abbreviations in medical notes on the basis of morphologically annotated texts, without using additional domain resources. We work on Polish data but the described approaches can be used for other languages too. We test two methods to select candidates for word abbreviation expansions. The first one automatically selects all words in text which might be an expansion of an abbreviation according to the language rules. The second method uses clustering of abbreviation occurrences to select representative elements which are manually annotated to determine lists of potential expansions. We then train a classifier to assign expansions to abbreviations based on three training sets: automatically obtained, consisting of manual annotation, and concatenation of the two previous ones. The results obtained for the manually annotated training data significantly outperform automatically obtained training data. Adding the automatically obtained training data to the manually annotated data improves the results, in particular for less frequent abbreviations. In this context the proposed a priori data driven selection of possible extensions turned out to be crucial.</abstract>
       <url hash="93ce997a">D19-6207</url>
@@ -13307,7 +13307,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     <paper id="8">
       <title>Multi-Task, Multi-Channel, Multi-Input Learning for Mental Illness Detection using Social Media Text</title>
       <author><first>Prasadith</first><last>Kirinde Gamaarachchige</last></author>
-      <author><first>Diana</first><last>Inkpen</last></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last></author>
       <pages>54–64</pages>
       <abstract>We investigate the impact of using emotional patterns identified by the clinical practitioners and computational linguists to enhance the prediction capabilities of a mental illness detection (in our case depression and post-traumatic stress disorder) model built using a deep neural network architecture. Over the years, deep learning methods have been successfully used in natural language processing tasks, including a few in the domain of mental illness and suicide ideation detection. We illustrate the effectiveness of using multi-task learning with a multi-channel convolutional neural network as the shared representation and use additional inputs identified by researchers as indicatives in detecting mental disorders to enhance the model predictability. Given the limited amount of unstructured data available for training, we managed to obtain a task-specific AUC higher than 0.90. In comparison to methods such as multi-class classification, we identified multi-task learning with multi-channel convolution neural network and multiple-inputs to be effective in detecting mental disorders.</abstract>
       <url hash="3adacf2a">D19-6208</url>
@@ -13371,7 +13371,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     <paper id="13">
       <title><fixed-case>D</fixed-case>readdit: A <fixed-case>R</fixed-case>eddit Dataset for Stress Analysis in Social Media</title>
       <author><first>Elsbeth</first><last>Turcan</last></author>
-      <author><first>Kathy</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathy</first><last>McKeown</last></author>
       <pages>97–107</pages>
       <abstract>Stress is a nigh-universal human experience, particularly in the online world. While stress can be a motivator, too much stress is associated with many negative health outcomes, making its identification useful across a range of domains. However, existing computational research typically only studies stress in domains such as speech, or in short genres such as Twitter. We present Dreaddit, a new text corpus of lengthy multi-domain social media data for the identification of stress. Our dataset consists of 190K posts from five different categories of Reddit communities; we additionally label 3.5K total segments taken from 3K posts using Amazon Mechanical Turk. We present preliminary supervised learning methods for identifying stress, both neural and traditional, and analyze the complexity and diversity of the data and characteristics of each category.</abstract>
       <url hash="d29e4afc">D19-6213</url>
@@ -13431,7 +13431,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     <paper id="18">
       <title>Writing habits and telltale neighbors: analyzing clinical concept usage patterns with sublanguage embeddings</title>
       <author><first>Denis</first><last>Newman-Griffis</last></author>
-      <author><first>Eric</first><last>Fosler-Lussier</last></author>
+      <author id="eric-fosler-lussier"><first>Eric</first><last>Fosler-Lussier</last></author>
       <pages>146–156</pages>
       <abstract>Natural language processing techniques are being applied to increasingly diverse types of electronic health records, and can benefit from in-depth understanding of the distinguishing characteristics of medical document types. We present a method for characterizing the usage patterns of clinical concepts among different document types, in order to capture semantic differences beyond the lexical level. By training concept embeddings on clinical documents of different types and measuring the differences in their nearest neighborhood structures, we are able to measure divergences in concept usage while correcting for noise in embedding learning. Experiments on the MIMIC-III corpus demonstrate that our approach captures clinically-relevant differences in concept usage and provides an intuitive way to explore semantic characteristics of clinical document collections.</abstract>
       <url hash="52b0c9e9">D19-6218</url>
@@ -13441,7 +13441,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     <paper id="19">
       <title>Recognizing <fixed-case>UMLS</fixed-case> Semantic Types with Deep Learning</title>
       <author><first>Isar</first><last>Nejadgholi</last></author>
-      <author><first>Kathleen C.</first><last>Fraser</last></author>
+      <author id="kathleen-c-fraser"><first>Kathleen C.</first><last>Fraser</last></author>
       <author><first>Berry</first><last>De Bruijn</last></author>
       <author><first>Muqun</first><last>Li</last></author>
       <author><first>Astha</first><last>LaPlante</last></author>
@@ -13486,7 +13486,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <booktitle>Proceedings of the 2nd Workshop on Multilingual Surface Realisation (MSR 2019)</booktitle>
       <url hash="38285244">D19-63</url>
       <editor><first>Simon</first><last>Mille</last></editor>
-      <editor><first>Anja</first><last>Belz</last></editor>
+      <editor id="anja-belz"><first>Anja</first><last>Belz</last></editor>
       <editor><first>Bernd</first><last>Bohnet</last></editor>
       <editor><first>Yvette</first><last>Graham</last></editor>
       <editor><first>Leo</first><last>Wanner</last></editor>
@@ -13518,7 +13518,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     <paper id="2">
       <title>Learning to Order Graph Elements with Application to Multilingual Surface Realization</title>
       <author><first>Wenchao</first><last>Du</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <pages>18–24</pages>
       <abstract>Recent advances in deep learning have shown promises in solving complex combinatorial optimization problems, such as sorting variable-sized sequences. In this work, we take a step further and tackle the problem of ordering the elements of sequences that come with graph structures. Our solution adopts an encoder-decoder framework, in which the encoder is a graph neural network that learns the representation for each element, and the decoder predicts the ordering of each local neighborhood of the graph in turn. We apply our framework to multilingual surface realization, which is the task of ordering and completing sentences with their dependency parses given but without the ordering of words. Experiments show that our approach is much better for this task than prior works that do not consider graph structures. We participated in 2019 Surface Realization Shared Task (SR’19), and we ranked second out of 14 teams while outperforming those teams below by a large margin.</abstract>
       <url hash="14ac1921">D19-6302</url>
@@ -13559,7 +13559,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     <paper id="6">
       <title><fixed-case>IMS</fixed-case>ur<fixed-case>R</fixed-case>eal: <fixed-case>IMS</fixed-case> at the Surface Realization Shared Task 2019</title>
       <author><first>Xiang</first><last>Yu</last></author>
-      <author><first>Agnieszka</first><last>Falenska</last></author>
+      <author id="agnieszka-falenska"><first>Agnieszka</first><last>Falenska</last></author>
       <author><first>Marina</first><last>Haid</last></author>
       <author><first>Ngoc Thang</first><last>Vu</last></author>
       <author><first>Jonas</first><last>Kuhn</last></author>
@@ -13571,8 +13571,8 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     </paper>
     <paper id="7">
       <title>Surface Realization Shared Task 2019 (<fixed-case>MSR</fixed-case>19): The Team 6 Approach</title>
-      <author><first>Thiago</first><last>Castro Ferreira</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="thiago-castro-ferreira"><first>Thiago</first><last>Castro Ferreira</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <pages>59–62</pages>
       <abstract>This study describes the approach developed by the Tilburg University team to the shallow track of the Multilingual Surface Realization Shared Task 2019 (SR’19) (Mille et al., 2019). Based on Ferreira et al. (2017) and on our 2018 submission Ferreira et al. (2018), the approach generates texts by first preprocessing an input dependency tree into an ordered linearized string, which is then realized using a rule-based and a statistical machine translation (SMT) model. This year our submission is able to realize texts in the 11 languages proposed for the task, different from our last year submission, which covered only 6 Indo-European languages. The model is publicly available.</abstract>
       <url hash="2a9a52d4">D19-6307</url>
@@ -13594,10 +13594,10 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     <paper id="9">
       <title>The <fixed-case>OSU</fixed-case>/<fixed-case>F</fixed-case>acebook Realizer for <fixed-case>SRST</fixed-case> 2019: <fixed-case>S</fixed-case>eq2<fixed-case>S</fixed-case>eq Inflection and Serialized <fixed-case>T</fixed-case>ree2<fixed-case>T</fixed-case>ree Linearization</title>
       <author><first>Kartikeya</first><last>Upasani</last></author>
-      <author><first>David</first><last>King</last></author>
+      <author id="david-king"><first>David</first><last>King</last></author>
       <author><first>Jinfeng</first><last>Rao</last></author>
       <author><first>Anusha</first><last>Balakrishnan</last></author>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <pages>68–74</pages>
       <abstract>We describe our exploratory system for the shallow surface realization task, which combines morphological inflection using character sequence-to-sequence models with a baseline linearizer that implements a tree-to-tree model using sequence-to-sequence models on serialized trees. Results for morphological inflection were competitive across languages. Due to time constraints, we could only submit complete results (including linearization) for English. Preliminary linearization results were decent, with a small benefit from reranking to prefer valid output trees, but inadequate control over the words in the output led to poor quality on longer sentences.</abstract>
       <url hash="b1ada9d6">D19-6309</url>
@@ -13617,7 +13617,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     </paper>
     <paper id="11">
       <title>The <fixed-case>D</fixed-case>ip<fixed-case>I</fixed-case>nfo<fixed-case>U</fixed-case>ni<fixed-case>T</fixed-case>o Realizer at <fixed-case>SRST</fixed-case>’19: Learning to Rank and Deep Morphology Prediction for Multilingual Surface Realization</title>
-      <author><first>Alessandro</first><last>Mazzei</last></author>
+      <author id="alessandro-mazzei"><first>Alessandro</first><last>Mazzei</last></author>
       <author><first>Valerio</first><last>Basile</last></author>
       <pages>81–87</pages>
       <abstract>We describe the system presented at the SR’19 shared task by the DipInfoUnito team. Our approach is based on supervised machine learning. In particular, we divide the SR task into two independent subtasks, namely word order prediction and morphology inflection prediction. Two neural networks with different architectures run on the same input structure, each producing a partial output which is recombined in the final step in order to produce the predicted surface form. This work is a direct successor of the architecture presented at SR’19.</abstract>
@@ -13637,7 +13637,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     </paper>
     <paper id="13">
       <title>Back-Translation as Strategy to Tackle the Lack of Corpus in Natural Language Generation from Semantic Representations</title>
-      <author><first>Marco Antonio</first><last>Sobrevilla Cabezudo</last></author>
+      <author id="marco-antonio-sobrevilla-cabezudo"><first>Marco Antonio</first><last>Sobrevilla Cabezudo</last></author>
       <author><first>Simon</first><last>Mille</last></author>
       <author><first>Thiago</first><last>Pardo</last></author>
       <pages>94–103</pages>
@@ -13654,7 +13654,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <editor><first>Aditya</first><last>Mogadala</last></editor>
       <editor><first>Dietrich</first><last>Klakow</last></editor>
       <editor><first>Sandro</first><last>Pezzelle</last></editor>
-      <editor><first>Marie-Francine</first><last>Moens</last></editor>
+      <editor id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Hong Kong, China</address>
       <month>November</month>
@@ -13670,7 +13670,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <author><first>Vardaan</first><last>Pahuja</last></author>
       <author><first>Jie</first><last>Fu</last></author>
       <author><first>Sarath</first><last>Chandar</last></author>
-      <author><first>Christopher</first><last>Pal</last></author>
+      <author id="christopher-pal"><first>Christopher</first><last>Pal</last></author>
       <pages>1–10</pages>
       <abstract>Neural Module Networks, originally proposed for the task of visual question answering, are a class of neural network architectures that involve human-specified neural modules, each designed for a specific form of reasoning. In current formulations of such networks only the parameters of the neural modules and/or the order of their execution is learned. In this work, we further expand this approach and also learn the underlying internal structure of modules in terms of the ordering and combination of simple and elementary arithmetic operators. We utilize a minimum amount of prior knowledge from the human-specified neural modules in the form of different input types and arithmetic operators used in these modules. Our results show that one is indeed able to simultaneously learn both internal module structure and module sequencing without extra supervisory signals for module execution sequencing. With this approach, we report performance comparable to models using hand-designed modules. In addition, we do a analysis of sensitivity of the learned modules w.r.t. the arithmetic operations and infer the analytical expressions of the learned modules.</abstract>
       <url hash="c801ac23">D19-6401</url>
@@ -13693,7 +13693,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     <paper id="3">
       <title>Big Generalizations with Small Data: Exploring the Role of Training Samples in Learning Adjectives of Size</title>
       <author><first>Sandro</first><last>Pezzelle</last></author>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <pages>18–23</pages>
       <abstract>In this paper, we experiment with a recently proposed visual reasoning task dealing with quantities – modeling the multimodal, contextually-dependent meaning of size adjectives (‘big’, ‘small’) – and explore the impact of varying the training data on the learning behavior of a state-of-art system. In previous work, models have been shown to fail in generalizing to unseen adjective-noun combinations. Here, we investigate whether, and to what extent, seeing some of these cases during training helps a model understand the rule subtending the task, i.e., that being big implies being not small, and vice versa. We show that relatively few examples are enough to understand this relationship, and that developing a specific, mutually exclusive representation of size adjectives is beneficial to the task.</abstract>
       <url hash="cf9f5923">D19-6403</url>
@@ -13703,7 +13703,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     <paper id="4">
       <title><fixed-case>E</fixed-case>igencharacter: An Embedding of <fixed-case>C</fixed-case>hinese Character Orthography</title>
       <author><first>Yu-Hsiang</first><last>Tseng</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <pages>24–28</pages>
       <abstract>Chinese characters are unique in its logographic nature, which inherently encodes world knowledge through thousands of years evolution. This paper proposes an embedding approach, namely eigencharacter (EC) space, which helps NLP application easily access the knowledge encoded in Chinese orthography. These EC representations are automatically extracted, encode both structural and radical information, and easily integrate with other computational models. We built EC representations of 5,000 Chinese characters, investigated orthography knowledge encoded in ECs, and demonstrated how these ECs identified visually similar characters with both structural and radical information.</abstract>
       <url hash="561a4c09">D19-6404</url>
@@ -13713,8 +13713,8 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     <paper id="5">
       <title>On the Role of Scene Graphs in Image Captioning</title>
       <author><first>Dalin</first><last>Wang</last></author>
-      <author><first>Daniel</first><last>Beck</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="daniel-beck"><first>Daniel</first><last>Beck</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>29–34</pages>
       <abstract>Scene graphs represent semantic information in images, which can help image captioning system to produce more descriptive outputs versus using only the image as context. Recent captioning approaches rely on ad-hoc approaches to obtain graphs for images. However, those graphs introduce noise and it is unclear the effect of parser errors on captioning accuracy. In this work, we investigate to what extent scene graphs can help image captioning. Our results show that a state-of-the-art scene graph parser can boost performance almost as much as the ground truth graphs, showing that the bottleneck currently resides more on the captioning models than on the performance of the scene graph parser.</abstract>
       <url hash="c33730ea">D19-6405</url>
@@ -13723,7 +13723,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     </paper>
     <paper id="6">
       <title>Understanding the Effect of Textual Adversaries in Multimodal Machine Translation</title>
-      <author><first>Koel</first><last>Dutta Chowdhury</last></author>
+      <author id="koel-dutta-chowdhury"><first>Koel</first><last>Dutta Chowdhury</last></author>
       <author><first>Desmond</first><last>Elliott</last></author>
       <pages>35–40</pages>
       <abstract>It is assumed that multimodal machine translation systems are better than text-only systems at translating phrases that have a direct correspondence in the image. This assumption has been challenged in experiments demonstrating that state-of-the-art multimodal systems perform equally well in the presence of randomly selected images, but, more recently, it has been shown that masking entities from the source language sentence during training can help to overcome this problem. In this paper, we conduct experiments with both visual and textual adversaries in order to understand the role of incorrect textual inputs to such systems. Our results show that when the source language sentence contains mistakes, multimodal translation systems do not leverage the additional visual signal to produce the correct translation. We also find that the degradation of translation performance caused by textual adversaries is significantly higher than by visual adversaries.</abstract>
@@ -13752,7 +13752,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     <paper id="8">
       <title>At a Glance: The Impact of Gaze Aggregation Views on Syntactic Tagging</title>
       <author><first>Sigrid</first><last>Klerke</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>51–61</pages>
       <abstract>Readers’ eye movements used as part of the training signal have been shown to improve performance in a wide range of Natural Language Processing (NLP) tasks. Previous work uses gaze data either at the type level or at the token level and mostly from a single eye-tracking corpus. In this paper, we analyze type vs token-level integration options with eye tracking data from two corpora to inform two syntactic sequence labeling problems: binary phrase chunking and part-of-speech tagging. We show that using globally-aggregated measures that capture the central tendency or variability of gaze data is more beneficial than proposed local views which retain individual participant information. While gaze data is informative for supervised POS tagging, which complements previous findings on unsupervised POS induction, almost no improvement is obtained for binary phrase chunking, except for a single specific setup. Hence, caution is warranted when using gaze data as signal for NLP, as no single view is robust over tasks, modeling choice and gaze corpus.</abstract>
       <url hash="262e845d">D19-6408</url>
@@ -13779,10 +13779,10 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     <meta>
       <booktitle>Proceedings of the Fourth Workshop on Discourse in Machine Translation (DiscoMT 2019)</booktitle>
       <url hash="e612d975">D19-65</url>
-      <editor><first>Andrei</first><last>Popescu-Belis</last></editor>
+      <editor id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></editor>
       <editor><first>Sharid</first><last>Loáiciga</last></editor>
       <editor><first>Christian</first><last>Hardmeier</last></editor>
-      <editor><first>Deyi</first><last>Xiong</last></editor>
+      <editor id="deyi-xiong"><first>Deyi</first><last>Xiong</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Hong Kong, China</address>
       <month>November</month>
@@ -13797,7 +13797,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <title>Analysing Coreference in Transformer Outputs</title>
       <author><first>Ekaterina</first><last>Lapshinova-Koltunski</last></author>
       <author><first>Cristina</first><last>España-Bonet</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>1–12</pages>
       <abstract>We analyse coreference phenomena in three neural machine translation systems trained with different data settings with or without access to explicit intra- and cross-sentential anaphoric information. We compare system performance on two different genres: news and TED talks. To do this, we manually annotate (the possibly incorrect) coreference chains in the MT outputs and evaluate the coreference chain translations. We define an error typology that aims to go further than pronoun translation adequacy and includes types such as incorrect word selection or missing words. The features of coreference chains in automatic translations are also compared to those of the source texts and human translations. The analysis shows stronger potential translationese effects in machine translated outputs than in human translations.</abstract>
       <url hash="bf4cf622">D19-6501</url>
@@ -13806,7 +13806,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     </paper>
     <paper id="2">
       <title>Context-Aware Neural Machine Translation Decoding</title>
-      <author><first>Eva</first><last>Martínez Garcia</last></author>
+      <author id="eva-martinez-garcia"><first>Eva</first><last>Martínez Garcia</last></author>
       <author><first>Carles</first><last>Creus</last></author>
       <author><first>Cristina</first><last>España-Bonet</last></author>
       <pages>13–23</pages>
@@ -13819,7 +13819,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <title>When and Why is Document-level Context Useful in Neural Machine Translation?</title>
       <author><first>Yunsu</first><last>Kim</last></author>
       <author><first>Duc Thanh</first><last>Tran</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>24–34</pages>
       <abstract>Document-level context has received lots of attention for compensating neural machine translation (NMT) of isolated sentences. However, recent advances in document-level NMT focus on sophisticated integration of the context, explaining its improvement with only a few selected examples or targeted test sets. We extensively quantify the causes of improvements by a document-level model in general test sets, clarifying the limit of the usefulness of document-level context in NMT. We show that most of the improvements are not interpretable as utilizing the context. We also show that a minimal encoding is sufficient for the context modeling and very long context is not helpful for NMT.</abstract>
       <url hash="8095d1ed">D19-6503</url>
@@ -13842,7 +13842,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <author><first>Takumi</first><last>Ohtani</last></author>
       <author><first>Hidetaka</first><last>Kamigaito</last></author>
       <author><first>Masaaki</first><last>Nagata</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>45–50</pages>
       <abstract>We present neural machine translation models for translating a sentence in a text by using a graph-based encoder which can consider coreference relations provided within the text explicitly. The graph-based encoder can dynamically encode the source text without attending to all tokens in the text. In experiments, our proposed models provide statistically significant improvement to the previous approach of at most 0.9 points in the BLEU score on the OpenSubtitle2018 English-to-Japanese data set. Experimental results also show that the graph-based encoder can handle a longer text well, compared with the previous approach.</abstract>
       <url hash="669cf81c">D19-6505</url>
@@ -13852,7 +13852,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     <paper id="6">
       <title>Analysing concatenation approaches to document-level <fixed-case>NMT</fixed-case> in two different domains</title>
       <author><first>Yves</first><last>Scherrer</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <author><first>Sharid</first><last>Loáiciga</last></author>
       <pages>51–61</pages>
       <abstract>In this paper, we investigate how different aspects of discourse context affect the performance of recent neural MT systems. We describe two popular datasets covering news and movie subtitles and we provide a thorough analysis of the distribution of various document-level features in their domains. Furthermore, we train a set of context-aware MT models on both datasets and propose a comparative evaluation scheme that contrasts coherent context with artificially scrambled documents and absent context, arguing that the impact of discourse-aware MT models will become visible in this way. Our results show that the models are indeed affected by the manipulation of the test data, providing a different view on document-level translation quality than absolute sentence-level scores.</abstract>
@@ -13909,7 +13909,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <author><first>Wei</first><last>Fang</last></author>
       <author><first>Moin</first><last>Nadeem</last></author>
       <author><first>Mitra</first><last>Mohtarami</last></author>
-      <author><first>James</first><last>Glass</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
       <pages>13–19</pages>
       <abstract>We present a multi-task learning model that leverages large amount of textual information from existing datasets to improve stance prediction. In particular, we utilize multiple NLP tasks under both unsupervised and supervised settings for the target stance prediction task. Our model obtains state-of-the-art performance on a public benchmark dataset, Fake News Challenge, outperforming current approaches by a wide margin.</abstract>
       <url hash="a123bec3">D19-6603</url>
@@ -14037,7 +14037,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     <paper id="15">
       <title><fixed-case>FEVER</fixed-case> Breaker’s Run of Team <fixed-case>N</fixed-case>b<fixed-case>A</fixed-case>uz<fixed-case>D</fixed-case>r<fixed-case>L</fixed-case>qg</title>
       <author><first>Youngwoo</first><last>Kim</last></author>
-      <author><first>James</first><last>Allan</last></author>
+      <author id="james-allan"><first>James</first><last>Allan</last></author>
       <pages>99–104</pages>
       <abstract>We describe our submission for the Breaker phase of the second Fact Extraction and VERification (FEVER) Shared Task. Our adversarial data can be explained by two perspectives. First, we aimed at testing model’s ability to retrieve evidence, when appropriate query terms could not be easily generated from the claim. Second, we test model’s ability to precisely understand the implications of the texts, which we expect to be rare in FEVER 1.0 dataset. Overall, we suggested six types of adversarial attacks. The evaluation on the submitted systems showed that the systems were only able get both the evidence and label correct in 20% of the data. We also demonstrate our adversarial run analysis in the data development process.</abstract>
       <url hash="10ac9e95">D19-6615</url>
@@ -14047,7 +14047,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
     <paper id="16">
       <title>Team <fixed-case>DOMLIN</fixed-case>: Exploiting Evidence Enhancement for the <fixed-case>FEVER</fixed-case> Shared Task</title>
       <author><first>Dominik</first><last>Stammbach</last></author>
-      <author><first>Guenter</first><last>Neumann</last></author>
+      <author id="gunter-neumann"><first>Guenter</first><last>Neumann</last></author>
       <pages>105–109</pages>
       <abstract>This paper contains our system description for the second Fact Extraction and VERification (FEVER) challenge. We propose a two-staged sentence selection strategy to account for examples in the dataset where evidence is not only conditioned on the claim, but also on previously retrieved evidence. We use a publicly available document retrieval module and have fine-tuned BERT checkpoints for sentence se- lection and as the entailment classifier. We report a FEVER score of 68.46% on the blind testset.</abstract>
       <url hash="bde3121a">D19-6616</url>
@@ -14058,8 +14058,8 @@ Typo in Table 4 fixed to reflect correct recall of presented system.</revision>
       <title>Team <fixed-case>GPLSI</fixed-case>. Approach for automated fact checking</title>
       <author><first>Aimée</first><last>Alonso-Reina</last></author>
       <author><first>Robiert</first><last>Sepúlveda-Torres</last></author>
-      <author><first>Estela</first><last>Saquete</last></author>
-      <author><first>Manuel</first><last>Palomar</last></author>
+      <author id="estela-saquete"><first>Estela</first><last>Saquete</last></author>
+      <author id="manuel-palomar"><first>Manuel</first><last>Palomar</last></author>
       <pages>110–114</pages>
       <abstract>Fever Shared 2.0 Task is a challenge meant for developing automated fact checking systems. Our approach for the Fever 2.0 is based on a previous proposal developed by Team Athene UKP TU Darmstadt. Our proposal modifies the sentence retrieval phase, using statement extraction and representation in the form of triplets (subject, object, action). Triplets are extracted from the claim and compare to triplets extracted from Wikipedia articles using semantic similarity. Our results are satisfactory but there is room for improvement.</abstract>
       <url hash="3173f18f">D19-6617</url>
diff --git a/data/xml/E03.xml b/data/xml/E03.xml
index 9ab4b76168..6399a151f6 100644
--- a/data/xml/E03.xml
+++ b/data/xml/E03.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>10th Conference of the <fixed-case>E</fixed-case>uropean Chapter of the Association for Computational Linguistics</booktitle>
       <editor><first>Ann</first><last>Copestake</last></editor>
-      <editor><first>Jan</first><last>Hajič</last></editor>
+      <editor id="jan-hajic"><first>Jan</first><last>Hajič</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Budapest, Hungary</address>
       <year>2003</year>
@@ -16,13 +16,13 @@
     </frontmatter>
     <paper id="1">
       <title>Multilingual Access to Large Spoken Archives (Invited talk)</title>
-      <author><first>Doug</first><last>Oard</last></author>
+      <author id="douglas-w-oard"><first>Doug</first><last>Oard</last></author>
       <url hash="cdb8c573">E03-1001</url>
       <bibkey>oard-2003-multilingual</bibkey>
     </paper>
     <paper id="2">
       <title>Neural Network Probability Estimation for Broad Coverage Parsing</title>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <url hash="969a1a3b">E03-1002</url>
       <bibkey>henderson-2003-neural</bibkey>
     </paper>
@@ -35,9 +35,9 @@
     </paper>
     <paper id="4">
       <title><fixed-case>C</fixed-case>zech-<fixed-case>E</fixed-case>nglish Dependency Tree-based Machine Translation</title>
-      <author><first>Martin</first><last>Čmejrek</last></author>
-      <author><first>Jan</first><last>Cuřín</last></author>
-      <author><first>Jiří</first><last>Havelka</last></author>
+      <author id="martin-cmejrek"><first>Martin</first><last>Čmejrek</last></author>
+      <author id="jan-curin"><first>Jan</first><last>Cuřín</last></author>
+      <author id="jiri-havelka"><first>Jiří</first><last>Havelka</last></author>
       <url hash="7156d610">E03-1004</url>
       <bibkey>cmejrek-etal-2003-czech</bibkey>
     </paper>
@@ -51,21 +51,21 @@
       <title><fixed-case>F</fixed-case>rench Amalgam: a quick adaptation of a sentence realization system to <fixed-case>F</fixed-case>rench</title>
       <author><first>Martine</first><last>Smets</last></author>
       <author><first>Michael</first><last>Gamon</last></author>
-      <author><first>Simon</first><last>Corston-Oliver</last></author>
-      <author><first>Eric</first><last>Ringger</last></author>
+      <author id="simon-corston-oliver"><first>Simon</first><last>Corston-Oliver</last></author>
+      <author id="eric-ringger"><first>Eric</first><last>Ringger</last></author>
       <url hash="e5b97fe2">E03-1006</url>
       <bibkey>smets-etal-2003-french</bibkey>
     </paper>
     <paper id="7">
       <title>Using <fixed-case>POS</fixed-case> Information for <fixed-case>SMT</fixed-case> into Morphologically Rich Languages</title>
       <author><first>Nicola</first><last>Ueffing</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <url hash="7326e8c5">E03-1007</url>
       <bibkey>ueffing-ney-2003-using</bibkey>
     </paper>
     <paper id="8">
       <title>Bootstrapping statistical parsers from small datasets</title>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <author><first>Miles</first><last>Osborne</last></author>
       <author><first>Anoop</first><last>Sarkar</last></author>
       <author><first>Stephen</first><last>Clark</last></author>
@@ -96,21 +96,21 @@
     <paper id="11">
       <title>Mining Web Sites Using Unsupervised Adaptive Information Extraction</title>
       <author><first>Alexiei</first><last>Dingli</last></author>
-      <author><first>Fabio</first><last>Ciravegna</last></author>
+      <author id="fabio-ciravegna"><first>Fabio</first><last>Ciravegna</last></author>
       <author><first>David</first><last>Guthrie</last></author>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <url hash="49611066">E03-1011</url>
       <bibkey>dingli-etal-2003-mining</bibkey>
     </paper>
     <paper id="12">
       <title>Annotated <fixed-case>H</fixed-case>ungarian National Corpus</title>
-      <author><first>Zoltán</first><last>Alexin</last></author>
+      <author id="zoltan-alexin"><first>Zoltán</first><last>Alexin</last></author>
       <author><first>János</first><last>Csirik</last></author>
-      <author><first>Tibor</first><last>Gyimóthy</last></author>
+      <author id="tibor-gyimothy"><first>Tibor</first><last>Gyimóthy</last></author>
       <author><first>Károly</first><last>Bibok</last></author>
       <author><first>Csaba</first><last>Hatvani</last></author>
-      <author><first>Gábor</first><last>Prószéky</last></author>
-      <author><first>László</first><last>Tihanyi</last></author>
+      <author id="gabor-proszeky"><first>Gábor</first><last>Prószéky</last></author>
+      <author id="laszlo-tihanyi"><first>László</first><last>Tihanyi</last></author>
       <url hash="a8a1a2ca">E03-1012</url>
       <bibkey>alexin-etal-2003-annotated</bibkey>
     </paper>
@@ -124,8 +124,8 @@
     </paper>
     <paper id="14">
       <title><fixed-case>A</fixed-case>rabic Syntactic Trees: from Constituency to Dependency</title>
-      <author><first>Zdenek</first><last>Zabokrtsky</last></author>
-      <author><first>Otakar</first><last>Smrz</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdenek</first><last>Zabokrtsky</last></author>
+      <author id="otakar-smrz"><first>Otakar</first><last>Smrz</last></author>
       <url hash="c44ab446">E03-1014</url>
       <bibkey>zabokrtsky-smrz-2003-arabic</bibkey>
     </paper>
@@ -150,7 +150,7 @@
       <bibkey>poibeau-etal-2003-multilingual</bibkey>
     </paper>
     <paper id="16">
-      <author><first>Serge A.</first><last>Yablonsky</last></author>
+      <author id="serge-a-yablonsky"><first>Serge A.</first><last>Yablonsky</last></author>
       <title>The Corpora Management System Based on <fixed-case>J</fixed-case>ava and Oracle Technologies</title>
       <url hash="e78aac20">E03-1016</url>
       <bibkey>yablonsky-2003-corpora</bibkey>
@@ -164,7 +164,7 @@
     <paper id="18">
       <title>Beyond Lexical Units: Enriching <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>ets with Phrasets</title>
       <author><first>Luisa</first><last>Bentivogli</last></author>
-      <author><first>Emanuele</first><last>Pianta</last></author>
+      <author id="emanuele-pianta"><first>Emanuele</first><last>Pianta</last></author>
       <url hash="8b581deb">E03-1018</url>
       <bibkey>bentivogli-pianta-2003-beyond</bibkey>
     </paper>
@@ -183,7 +183,7 @@
     </paper>
     <paper id="21">
       <title>Summarizing Neonatal Time Series Data</title>
-      <author><first>Somayajulu G.</first><last>Sripada</last></author>
+      <author id="somayajulu-sripada"><first>Somayajulu G.</first><last>Sripada</last></author>
       <author><first>Ehud</first><last>Reiter</last></author>
       <author><first>Jim</first><last>Hunter</last></author>
       <author><first>Jin</first><last>Yu</last></author>
@@ -194,7 +194,7 @@
       <title>Creating a multilingual collocations dictionary from large text corpora</title>
       <author><first>Luka</first><last>Nerima</last></author>
       <author><first>Violeta</first><last>Seretan</last></author>
-      <author><first>Eric</first><last>Wehrli</last></author>
+      <author id="eric-wehrli"><first>Eric</first><last>Wehrli</last></author>
       <url hash="c190824e">E03-1022</url>
       <bibkey>nerima-etal-2003-creating</bibkey>
     </paper>
@@ -224,7 +224,7 @@
     </paper>
     <paper id="26">
       <title>Combining Clues for Word Alignment</title>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <url hash="03b87d72">E03-1026</url>
       <bibkey>tiedemann-2003-combining</bibkey>
     </paper>
@@ -244,8 +244,8 @@
     <paper id="29">
       <title>Automatic Construction of Machine Translation Knowledge Using Translation Literalness</title>
       <author><first>Kenji</first><last>Imamura</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <url hash="800066ee">E03-1029</url>
       <bibkey>imamura-etal-2003-automatic</bibkey>
     </paper>
@@ -264,15 +264,15 @@
     </paper>
     <paper id="32">
       <title>Efficient Search for Interactive Statistical Machine Translation</title>
-      <author><first>Franz Josef</first><last>Och</last></author>
+      <author id="franz-josef-och"><first>Franz Josef</first><last>Och</last></author>
       <author><first>Richard</first><last>Zens</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <url hash="e34e68c0">E03-1032</url>
       <bibkey>och-etal-2003-efficient</bibkey>
     </paper>
     <paper id="33">
       <title>Rigid grammars in the Associative-Commutative <fixed-case>L</fixed-case>ambek Calculus are not learnable</title>
-      <author><first>Christophe Costa</first><last>Florencio</last></author>
+      <author id="christophe-costa-florencio"><first>Christophe Costa</first><last>Florencio</last></author>
       <url hash="ab0dc175">E03-1033</url>
       <bibkey>florencio-2003-rigid</bibkey>
     </paper>
@@ -285,28 +285,28 @@
     </paper>
     <paper id="35">
       <title>Learning Translations of Named-Entity Phrases from Parallel Corpora</title>
-      <author><first>Robert C.</first><last>Moore</last></author>
+      <author id="robert-c-moore"><first>Robert C.</first><last>Moore</last></author>
       <url hash="eb303551">E03-1035</url>
       <bibkey>moore-2003-learning</bibkey>
     </paper>
     <paper id="36">
       <title>Multi-<fixed-case>M</fixed-case>odal <fixed-case>C</fixed-case>ombinatory <fixed-case>C</fixed-case>ategorial <fixed-case>G</fixed-case>rammar</title>
       <author><first>Jason</first><last>Baldridge</last></author>
-      <author><first>Geert-Jan M.</first><last>Kruijff</last></author>
+      <author id="geert-jan-m-kruijff"><first>Geert-Jan M.</first><last>Kruijff</last></author>
       <url hash="ef71b8e7">E03-1036</url>
       <bibkey>baldridge-kruijff-2003-multi</bibkey>
     </paper>
     <paper id="37">
       <title>Experiments on the Choice of Features for Learning Verb Classes</title>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <url hash="c0a8473a">E03-1037</url>
       <bibkey>schulte-im-walde-2003-experiments</bibkey>
     </paper>
     <paper id="38">
       <title>Named Entity Recognition For <fixed-case>C</fixed-case>atalan Using Only <fixed-case>S</fixed-case>panish Resources and Unlabelled Data</title>
       <author><first>Xavier</first><last>Carreras</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
-      <author><first>Lluís</first><last>Padró</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
+      <author id="lluis-padro"><first>Lluís</first><last>Padró</last></author>
       <url hash="48c34c87">E03-1038</url>
       <bibkey>carreras-etal-2003-named</bibkey>
     </paper>
@@ -340,8 +340,8 @@
     </paper>
     <paper id="43">
       <title>Contents and evaluation of the first <fixed-case>S</fixed-case>lovenian-<fixed-case>G</fixed-case>erman online dictionary</title>
-      <author><first>Birte</first><last>Lönneker</last></author>
-      <author><first>Primoz</first><last>Jakopin</last></author>
+      <author id="birte-lonneker"><first>Birte</first><last>Lönneker</last></author>
+      <author id="primoz-jakopin"><first>Primoz</first><last>Jakopin</last></author>
       <url hash="3e8e1140">E03-1043</url>
       <bibkey>lonneker-jakopin-2003-contents</bibkey>
     </paper>
@@ -367,19 +367,19 @@
       <title>Lexicalized Grammar Acquisition</title>
       <author><first>Yusuke</first><last>Miyao</last></author>
       <author><first>Takashi</first><last>Ninomiya</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <url hash="18588299">E03-1047</url>
       <bibkey>miyao-etal-2003-lexicalized</bibkey>
     </paper>
     <paper id="48">
       <title>A corpus-centered approach to spoken language translation</title>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Yasuhiro</first><last>Akiba</last></author>
       <author><first>Takao</first><last>Doi</last></author>
       <author><first>Andrew</first><last>Finch</last></author>
       <author><first>Kenji</first><last>Imamura</last></author>
-      <author><first>Michael</first><last>Paul</last></author>
-      <author><first>Mitsuo</first><last>Shimohata</last></author>
+      <author id="michael-paul"><first>Michael</first><last>Paul</last></author>
+      <author id="mitsuo-shimohata"><first>Mitsuo</first><last>Shimohata</last></author>
       <author><first>Taro</first><last>Watanabe</last></author>
       <url hash="1419f9d0">E03-1048</url>
       <bibkey>sumita-etal-2003-corpus</bibkey>
@@ -392,7 +392,7 @@
     </paper>
     <paper id="50">
       <title>Using Noisy Biligual Data for Statistical Machine Translation</title>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <url hash="40aad9c3">E03-1050</url>
       <bibkey>vogel-2003-using</bibkey>
     </paper>
@@ -400,7 +400,7 @@
       <title>Learning <fixed-case>PP</fixed-case> attachment for filtering prosodic phrasing</title>
       <author><first>Olga</first><last>van Herwijnen</last></author>
       <author><first>Jacques</first><last>Terken</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <author><first>Erwin</first><last>Marsi</last></author>
       <url hash="9f212765">E03-1051</url>
       <bibkey>van-herwijnen-etal-2003-learning</bibkey>
@@ -408,7 +408,7 @@
     <paper id="52">
       <title>Constraint Based Integration of Deep and Shallow Parsing Techniques</title>
       <author><first>Michael</first><last>Daum</last></author>
-      <author><first>Kilian A.</first><last>Foth</last></author>
+      <author id="kilian-a-foth"><first>Kilian A.</first><last>Foth</last></author>
       <author><first>Wolfgang</first><last>Menzel</last></author>
       <url hash="cd338cde">E03-1052</url>
       <bibkey>daum-etal-2003-constraint</bibkey>
@@ -417,14 +417,14 @@
       <title>Language Independent Authorship Attribution with Character Level N-Grams</title>
       <author><first>Fuchun</first><last>Peng</last></author>
       <author><first>Dale</first><last>Schuurmans</last></author>
-      <author><first>Vlado</first><last>Keselj</last></author>
+      <author id="vlado-keselj"><first>Vlado</first><last>Keselj</last></author>
       <author><first>Shaojun</first><last>Wang</last></author>
       <url hash="affe3fa3">E03-1053</url>
       <bibkey>peng-etal-2003-language</bibkey>
     </paper>
     <paper id="54">
       <title>Information Structure in Topological Dependency Grammar</title>
-      <author><first>Geert-Jan M.</first><last>Kruijff</last></author>
+      <author id="geert-jan-m-kruijff"><first>Geert-Jan M.</first><last>Kruijff</last></author>
       <author><first>Denys</first><last>Duchier</last></author>
       <url hash="c49d7046">E03-1054</url>
       <bibkey>kruijff-duchier-2003-information</bibkey>
@@ -433,8 +433,8 @@
       <title>Comparison of Alignment Templates and Maximum Entropy Models for <fixed-case>NLP</fixed-case></title>
       <author><first>Oliver</first><last>Bender</last></author>
       <author><first>Klaus</first><last>Macherey</last></author>
-      <author><first>Franz Josef</first><last>Och</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="franz-josef-och"><first>Franz Josef</first><last>Och</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <url hash="fa16c47b">E03-1055</url>
       <bibkey>bender-etal-2003-comparison</bibkey>
     </paper>
@@ -451,9 +451,9 @@
     </paper>
     <paper id="57">
       <title>Producing Contextually Appropriate Intonation is an Information-State Based Dialogue System</title>
-      <author><first>Ivana</first><last>Kruijff-Korbayova</last></author>
+      <author id="ivana-kruijff-korbayova"><first>Ivana</first><last>Kruijff-Korbayova</last></author>
       <author><first>Stina</first><last>Ericsson</last></author>
-      <author><first>Kepa J.</first><last>Rodríguez</last></author>
+      <author id="kepa-joseba-rodriguez"><first>Kepa J.</first><last>Rodríguez</last></author>
       <author><first>Elena</first><last>Karagjosova</last></author>
       <url hash="3ab29505">E03-1057</url>
       <bibkey>kruijff-korbayova-etal-2003-producing</bibkey>
@@ -474,8 +474,8 @@
     </paper>
     <paper id="60">
       <title><fixed-case>WEBCOOP</fixed-case>: A Cooperative Question Answering System on the Web</title>
-      <author><first>Farah</first><last>Benamara</last></author>
-      <author><first>Patrick</first><last>Saint Dizier</last></author>
+      <author id="farah-benamara"><first>Farah</first><last>Benamara</last></author>
+      <author id="patrick-saint-dizier"><first>Patrick</first><last>Saint Dizier</last></author>
       <url hash="76a5a945">E03-1060</url>
       <bibkey>benamara-saint-dizier-2003-webcoop</bibkey>
     </paper>
@@ -483,7 +483,7 @@
       <title>Automatic Acquisition of Script Knowledge from a Text Collection</title>
       <author><first>Toshiaki</first><last>Fujiki</last></author>
       <author><first>Hidetsugu</first><last>Nanba</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <url hash="6f633a23">E03-1061</url>
       <bibkey>fujiki-etal-2003-automatic</bibkey>
     </paper>
@@ -495,15 +495,15 @@
     </paper>
     <paper id="63">
       <title>The Corpora Management System Based on <fixed-case>J</fixed-case>ava and Oracle Technologies</title>
-      <author><first>Serge A.</first><last>Yablonsky</last></author>
+      <author id="serge-a-yablonsky"><first>Serge A.</first><last>Yablonsky</last></author>
       <url hash="c7bb0686">E03-1063</url>
       <bibkey>yablonsky-2003-corpora-management</bibkey>
     </paper>
     <paper id="64">
       <title>How to build a <fixed-case>QA</fixed-case> system in your back-garden: application for <fixed-case>R</fixed-case>omanian</title>
-      <author><first>Constantin</first><last>Orăsan</last></author>
-      <author><first>Doina</first><last>Tatar</last></author>
-      <author><first>Gabriela</first><last>Şerban</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orăsan</last></author>
+      <author id="doina-tatar"><first>Doina</first><last>Tatar</last></author>
+      <author id="gabriela-serban"><first>Gabriela</first><last>Şerban</last></author>
       <author><first>Dana</first><last>Lupsa</last></author>
       <author><first>Adrian</first><last>Oneţ</last></author>
       <url hash="089c58a5">E03-1064</url>
@@ -513,31 +513,31 @@
       <title><fixed-case>NLP</fixed-case> for Indexing and Retrieval of Captioned Photographs</title>
       <author><first>Horacio</first><last>Saggion</last></author>
       <author><first>Katerina</first><last>Pastra</last></author>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <url hash="c006e326">E03-1065</url>
       <bibkey>saggion-etal-2003-nlp</bibkey>
     </paper>
     <paper id="66">
       <title><fixed-case>CAST</fixed-case>: A computer-aided summarisation tool</title>
-      <author><first>Constantin</first><last>Orasan</last></author>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orasan</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <author><first>Laura</first><last>Hasler</last></author>
       <url hash="3fed88f8">E03-1066</url>
       <bibkey>orasan-etal-2003-cast</bibkey>
     </paper>
     <paper id="67">
       <title>Domain-transcending mappings in a system for metaphorical reasoning</title>
-      <author><first>John A.</first><last>Barnden</last></author>
-      <author><first>Sheila R.</first><last>Glasbey</last></author>
-      <author><first>Mark G.</first><last>Lee</last></author>
-      <author><first>Alan M.</first><last>Wallington</last></author>
+      <author id="john-barnden"><first>John A.</first><last>Barnden</last></author>
+      <author id="sheila-r-glasbey"><first>Sheila R.</first><last>Glasbey</last></author>
+      <author id="mark-lee"><first>Mark G.</first><last>Lee</last></author>
+      <author id="alan-wallington"><first>Alan M.</first><last>Wallington</last></author>
       <url hash="0f7978b2">E03-1067</url>
       <bibkey>barnden-etal-2003-domain</bibkey>
     </paper>
     <paper id="68">
       <title>Detecting Errors in Part-of-Speech Annotation</title>
       <author><first>Markus</first><last>Dickinson</last></author>
-      <author><first>W. Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>W. Detmar</first><last>Meurers</last></author>
       <url hash="79e37723">E03-1068</url>
       <bibkey>dickinson-meurers-2003-detecting</bibkey>
     </paper>
@@ -552,21 +552,21 @@
     <paper id="70">
       <title><fixed-case>QUALIFIER</fixed-case>: Question Answering by Lexical Fabric and External Resources</title>
       <author><first>Hui</first><last>Yang</last></author>
-      <author><first>Tat-Seng</first><last>Chua</last></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last></author>
       <url hash="bb731f81">E03-1070</url>
       <bibkey>yang-chua-2003-qualifier</bibkey>
     </paper>
     <paper id="71">
       <title>Investigating <fixed-case>GIS</fixed-case> and Smoothing for Maximum Entropy Taggers</title>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <author><first>Stephen</first><last>Clark</last></author>
       <url hash="b8e9d8a5">E03-1071</url>
       <bibkey>curran-clark-2003-investigating</bibkey>
     </paper>
     <paper id="72">
       <title>The Role of Initiative in Tutorial Dialogue</title>
-      <author><first>Mark G.</first><last>Core</last></author>
-      <author><first>Johanna D.</first><last>Moore</last></author>
+      <author id="mark-g-core"><first>Mark G.</first><last>Core</last></author>
+      <author id="johanna-d-moore"><first>Johanna D.</first><last>Moore</last></author>
       <author><first>Claus</first><last>Zinn</last></author>
       <url hash="84fba852">E03-1072</url>
       <bibkey>core-etal-2003-role</bibkey>
@@ -586,14 +586,14 @@
     </paper>
     <paper id="75">
       <title>Targeted Help for Spoken Dialogue Systems</title>
-      <author><first>Beth Ann</first><last>Hockey</last></author>
+      <author id="beth-ann-hockey"><first>Beth Ann</first><last>Hockey</last></author>
       <author><first>Oliver</first><last>Lemon</last></author>
-      <author><first>Ellen</first><last>Campana</last></author>
+      <author id="ellen-campana"><first>Ellen</first><last>Campana</last></author>
       <author><first>Laura</first><last>Hiatt</last></author>
-      <author><first>Gregory</first><last>Aist</last></author>
-      <author><first>James</first><last>Hieronymus</last></author>
+      <author id="gregory-aist"><first>Gregory</first><last>Aist</last></author>
+      <author id="james-hieronymus"><first>James</first><last>Hieronymus</last></author>
       <author><first>Alexander</first><last>Gruenstein</last></author>
-      <author><first>John</first><last>Dowding</last></author>
+      <author id="john-dowding"><first>John</first><last>Dowding</last></author>
       <url hash="ce17ce7f">E03-1075</url>
       <bibkey>hockey-etal-2003-targeted</bibkey>
     </paper>
@@ -606,8 +606,8 @@
     </paper>
     <paper id="77">
       <title>An Integrated Term-Based Corpus Query System</title>
-      <author><first>Irena</first><last>Spasic</last></author>
-      <author><first>Goran</first><last>Nenadic</last></author>
+      <author id="irena-spasic"><first>Irena</first><last>Spasic</last></author>
+      <author id="goran-nenadic"><first>Goran</first><last>Nenadic</last></author>
       <author><first>Kostas</first><last>Manios</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
       <url hash="baaa84a4">E03-1077</url>
@@ -615,8 +615,8 @@
     </paper>
     <paper id="78">
       <title>Transparent combination of rule-based and data-driven approaches in speech understanding</title>
-      <author><first>Manny</first><last>Rayner</last></author>
-      <author><first>Beth Ann</first><last>Hockey</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
+      <author id="beth-ann-hockey"><first>Beth Ann</first><last>Hockey</last></author>
       <url hash="d30df541">E03-1078</url>
       <bibkey>rayner-hockey-2003-transparent</bibkey>
     </paper>
@@ -635,8 +635,8 @@
     </paper>
     <paper id="81">
       <title>Categorial Fluidity in <fixed-case>C</fixed-case>hinese and its Implications for Part-of-speech Tagging</title>
-      <author><first>Oi Yee</first><last>Kwong</last></author>
-      <author><first>Benjamin K.</first><last>Tsou</last></author>
+      <author id="olivia-o-y-kwong"><first>Oi Yee</first><last>Kwong</last></author>
+      <author id="benjamin-k-tsou"><first>Benjamin K.</first><last>Tsou</last></author>
       <url hash="c2d697dc">E03-1081</url>
       <bibkey>kwong-tsou-2003-categorial</bibkey>
     </paper>
@@ -664,7 +664,7 @@
       <title>Creating a multilingual collocations dictionary from large text corpora</title>
       <author><first>Luka</first><last>Nerima</last></author>
       <author><first>Violeta</first><last>Seretan</last></author>
-      <author><first>Eric</first><last>Wehrli</last></author>
+      <author id="eric-wehrli"><first>Eric</first><last>Wehrli</last></author>
       <url hash="98072ee7">E03-1083</url>
       <bibkey>nerima-etal-2003-creating-multilingual</bibkey>
     </paper>
@@ -679,9 +679,9 @@
       <title><fixed-case>PEAS</fixed-case>, the first instantiation of a comparative framework for evaluating parsers of <fixed-case>F</fixed-case>rench</title>
       <author><first>Véronique</first><last>Gendner</last></author>
       <author><first>Gabriel</first><last>Illouz</last></author>
-      <author><first>Michèle</first><last>Jardino</last></author>
+      <author id="michele-jardino"><first>Michèle</first><last>Jardino</last></author>
       <author><first>Laura</first><last>Monceaux</last></author>
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <author><first>Isabelle</first><last>Robba</last></author>
       <author><first>Anne</first><last>Vilnat</last></author>
       <url hash="5e1dcb83">E03-1085</url>
@@ -719,7 +719,7 @@
     </frontmatter>
     <paper id="1">
       <title>Talking through Procedures: An Intelligent Space Station Procedure Assistant</title>
-      <author><first>Greg</first><last>Aist</last></author>
+      <author id="gregory-aist"><first>Greg</first><last>Aist</last></author>
       <author id="john-dowding"><first>J.</first><last>Dowding</last></author>
       <author id="beth-ann-hockey"><first>B. A.</first><last>Hockey</last></author>
       <author id="manny-rayner"><first>M.</first><last>Rayner</last></author>
@@ -754,16 +754,16 @@
     </paper>
     <paper id="4">
       <title>A dialogue system with contextually appropriate spoken output intonation</title>
-      <author><first>Ivana</first><last>Kruijff-Korbayova</last></author>
+      <author id="ivana-kruijff-korbayova"><first>Ivana</first><last>Kruijff-Korbayova</last></author>
       <author><first>Elena</first><last>Karagjosova</last></author>
-      <author><first>Kepa Joseba</first><last>Rodriguez</last></author>
+      <author id="kepa-joseba-rodriguez"><first>Kepa Joseba</first><last>Rodriguez</last></author>
       <author><first>Stina</first><last>Ericsson</last></author>
       <url hash="cc8bd876">E03-2004</url>
       <bibkey>kruijff-korbayova-etal-2003-dialogue</bibkey>
     </paper>
     <paper id="5">
       <title><fixed-case>WYSIWYM</fixed-case> - building user interfaces with natural language feedback</title>
-      <author><first>Roger</first><last>Evans</last></author>
+      <author id="roger-evans"><first>Roger</first><last>Evans</last></author>
       <author><first>Richard</first><last>Power</last></author>
       <url hash="1c583a1f">E03-2005</url>
       <bibkey>evans-power-2003-wysiwym</bibkey>
@@ -778,7 +778,7 @@
     <paper id="7">
       <title><fixed-case>WASPBENCH</fixed-case>: a lexicographer’s workbench incorporating state-of-the-art word sense disambiguation</title>
       <author><first>Adam</first><last>Kilgarriff</last></author>
-      <author><first>Roger</first><last>Evans</last></author>
+      <author id="roger-evans"><first>Roger</first><last>Evans</last></author>
       <author><first>Rob</first><last>Koeling</last></author>
       <author><first>Michael</first><last>Rundell</last></author>
       <author><first>David</first><last>Tugwell</last></author>
@@ -794,25 +794,25 @@
     <paper id="9">
       <title>Multilingual adaptations of a reusable information extraction tool</title>
       <author><first>Diana</first><last>Maynard</last></author>
-      <author><first>Hamish</first><last>Cunningham</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="hamish-cunningham"><first>Hamish</first><last>Cunningham</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
       <url hash="d3dc4911">E03-2009</url>
       <bibkey>maynard-etal-2003-multilingual</bibkey>
     </paper>
     <paper id="10">
       <title>An Open-Source Environment for Compiling Typed Unification Grammars into Speech Recognisers</title>
-      <author><first>Manny</first><last>Rayner</last></author>
-      <author><first>Beth Ann</first><last>Hockey</last></author>
-      <author><first>John</first><last>Dowding</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
+      <author id="beth-ann-hockey"><first>Beth Ann</first><last>Hockey</last></author>
+      <author id="john-dowding"><first>John</first><last>Dowding</last></author>
       <url hash="86aa4ac1">E03-2010</url>
       <bibkey>rayner-etal-2003-open</bibkey>
     </paper>
     <paper id="11">
       <title><fixed-case>AGORA</fixed-case>. Multilingual Multiplatform Architecture for the development of Natural Language Voice Services</title>
-      <author><first>Jose</first><last>Relaño-Gil</last></author>
-      <author><first>Mari Carmen</first><last>Rodriguez-Gancedo</last></author>
+      <author id="jose-relano-gil"><first>Jose</first><last>Relaño-Gil</last></author>
+      <author id="mari-carmen-rodriguez-gancedo"><first>Mari Carmen</first><last>Rodriguez-Gancedo</last></author>
       <author><first>Luis</first><last>Villarrubia</last></author>
-      <author><first>Luis Hernández</first><last>Gomez</last></author>
+      <author id="luis-hernandez"><first>Luis Hernández</first><last>Gomez</last></author>
       <url hash="df6f3a4b">E03-2011</url>
       <bibkey>relano-gil-etal-2003-agora</bibkey>
     </paper>
@@ -827,8 +827,8 @@
     <paper id="13">
       <title>Robust Generic and Query-based Summarization</title>
       <author><first>Horacio</first><last>Saggion</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
-      <author><first>Hamish</first><last>Cunningham</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="hamish-cunningham"><first>Hamish</first><last>Cunningham</last></author>
       <url hash="dbac9422">E03-2013</url>
       <bibkey>saggion-etal-2003-robust</bibkey>
     </paper>
@@ -836,22 +836,22 @@
       <title>Event-Coreference across Multiple, Multi-lingual Sources in the Mumis Project</title>
       <author><first>Horacio</first><last>Saggion</last></author>
       <author><first>Jan</first><last>Kuper</last></author>
-      <author><first>Hamish</first><last>Cunningham</last></author>
+      <author id="hamish-cunningham"><first>Hamish</first><last>Cunningham</last></author>
       <author><first>Thierry</first><last>Declerck</last></author>
-      <author><first>Peter</first><last>Wittenburg</last></author>
+      <author id="peter-wittenburg"><first>Peter</first><last>Wittenburg</last></author>
       <author><first>Marco</first><last>Puts</last></author>
-      <author><first>Eduard</first><last>Hoenkamp</last></author>
+      <author id="edward-hoenkamp"><first>Eduard</first><last>Hoenkamp</last></author>
       <author><first>Franciska</first><last>de Jong</last></author>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <url hash="f89c878a">E03-2014</url>
       <bibkey>saggion-etal-2003-event</bibkey>
     </paper>
     <paper id="15">
       <title>Development of Corpora within the <fixed-case>CL</fixed-case>a<fixed-case>RK</fixed-case> System: The <fixed-case>B</fixed-case>ul<fixed-case>T</fixed-case>ree<fixed-case>B</fixed-case>ank Project Experience</title>
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <author><first>Alexander</first><last>Simov</last></author>
       <author><first>Milen</first><last>Kouylekov</last></author>
-      <author><first>Krasimira</first><last>Ivanova</last></author>
+      <author id="krasimira-ivanova"><first>Krasimira</first><last>Ivanova</last></author>
       <author><first>Ilko</first><last>Grigorov</last></author>
       <author><first>Hristo</first><last>Ganev</last></author>
       <url hash="48d55250">E03-2015</url>
@@ -859,7 +859,7 @@
     </paper>
     <paper id="16">
       <title>Integrating Natural Language Generation with <fixed-case>XML</fixed-case> Web Technology</title>
-      <author><first>Graham</first><last>Wilcock</last></author>
+      <author id="graham-wilcock"><first>Graham</first><last>Wilcock</last></author>
       <url hash="ccb397d6">E03-2016</url>
       <bibkey>wilcock-2003-integrating</bibkey>
     </paper>
@@ -887,15 +887,15 @@
     </paper>
     <paper id="2">
       <title>Cohesion and coherence for Automatic Summarization</title>
-      <author><first>Laura</first><last>Alonso i Alemany</last></author>
-      <author><first>Maria</first><last>Fuentes Fort</last></author>
+      <author id="laura-alonso-alemany"><first>Laura</first><last>Alonso i Alemany</last></author>
+      <author id="maria-fuentes"><first>Maria</first><last>Fuentes Fort</last></author>
       <url hash="81ce7481">E03-3002</url>
       <bibkey>alonso-i-alemany-fuentes-fort-2003-cohesion</bibkey>
     </paper>
     <paper id="3">
       <title>Clustering Adjectives for Class Discovery</title>
-      <author><first>Gemma</first><last>Boleda Torrent</last></author>
-      <author><first>Laura</first><last>Alonso i Alemany</last></author>
+      <author id="gemma-boleda"><first>Gemma</first><last>Boleda Torrent</last></author>
+      <author id="laura-alonso-alemany"><first>Laura</first><last>Alonso i Alemany</last></author>
       <url hash="411c3965">E03-3003</url>
       <bibkey>boleda-torrent-alonso-i-alemany-2003-clustering</bibkey>
     </paper>
@@ -908,7 +908,7 @@
     </paper>
     <paper id="5">
       <title>A Dynamic Logic Formalisation of the Dialogue Gameboard</title>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <url hash="6e56444f">E03-3005</url>
       <bibkey>fernandez-2003-dynamic</bibkey>
     </paper>
diff --git a/data/xml/E06.xml b/data/xml/E06.xml
index 3842f51ea0..845090ae01 100644
--- a/data/xml/E06.xml
+++ b/data/xml/E06.xml
@@ -3,7 +3,7 @@
   <volume id="1" type="proceedings">
     <meta>
       <booktitle>11th Conference of the <fixed-case>E</fixed-case>uropean Chapter of the Association for Computational Linguistics</booktitle>
-      <editor><first>Diana</first><last>McCarthy</last></editor>
+      <editor id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></editor>
       <editor><first>Shuly</first><last>Wintner</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Trento, Italy</address>
@@ -24,23 +24,23 @@
     </paper>
     <paper id="2">
       <title>Using Encyclopedic Knowledge for Named entity Disambiguation</title>
-      <author><first>Razvan</first><last>Bunescu</last></author>
-      <author><first>Marius</first><last>Paşca</last></author>
+      <author id="razvan-bunescu"><first>Razvan</first><last>Bunescu</last></author>
+      <author id="marius-pasca"><first>Marius</first><last>Paşca</last></author>
       <pages>9–16</pages>
       <url hash="f4e8de55">E06-1002</url>
       <bibkey>bunescu-pasca-2006-using</bibkey>
     </paper>
     <paper id="3">
       <title>Weakly Supervised Approaches for Ontology Population</title>
-      <author><first>Hristo</first><last>Tanev</last></author>
-      <author><first>Bernardo</first><last>Magnini</last></author>
+      <author id="hristo-tanev"><first>Hristo</first><last>Tanev</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
       <pages>17–24</pages>
       <url hash="9e286a4e">E06-1003</url>
       <bibkey>tanev-magnini-2006-weakly</bibkey>
     </paper>
     <paper id="4">
       <title>Computational Complexity of Statistical Machine Translation</title>
-      <author><first>Raghavendra Udupa</first><last>U.</last></author>
+      <author id="raghavendra-udupa"><first>Raghavendra Udupa</first><last>U.</last></author>
       <author><first>Hemanta K.</first><last>Maji</last></author>
       <pages>25–32</pages>
       <url hash="08d88745">E06-1004</url>
@@ -50,7 +50,7 @@
       <title>Computing Consensus Translation for Multiple Machine Translation Systems Using Enhanced Hypothesis Alignment</title>
       <author><first>Evgeny</first><last>Matusov</last></author>
       <author><first>Nicola</first><last>Ueffing</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>33–40</pages>
       <url hash="1c704d3d">E06-1005</url>
       <bibkey>matusov-etal-2006-computing</bibkey>
@@ -65,7 +65,7 @@
     </paper>
     <paper id="7">
       <title>Automatic Detection of Nonreferential It in Spoken Multi-Party Dialog</title>
-      <author><first>Christoph</first><last>Müller</last></author>
+      <author id="christoph-muller"><first>Christoph</first><last>Müller</last></author>
       <pages>49–56</pages>
       <url hash="08f4a9a2">E06-1007</url>
       <bibkey>muller-2006-automatic</bibkey>
@@ -80,7 +80,7 @@
     <paper id="9">
       <title>Information Presentation in Spoken Dialogue Systems</title>
       <author><first>Vera</first><last>Demberg</last></author>
-      <author><first>Johanna D.</first><last>Moore</last></author>
+      <author id="johanna-d-moore"><first>Johanna D.</first><last>Moore</last></author>
       <pages>65–72</pages>
       <url hash="916fa644">E06-1009</url>
       <bibkey>demberg-moore-2006-information</bibkey>
@@ -102,7 +102,7 @@
     </paper>
     <paper id="12">
       <title>Statistical Dependency Parsing for <fixed-case>T</fixed-case>urkish</title>
-      <author><first>Gülşen</first><last>Eryiǧit</last></author>
+      <author id="gulsen-eryigit"><first>Gülşen</first><last>Eryiǧit</last></author>
       <author><first>Kemal</first><last>Oflazer</last></author>
       <pages>89–96</pages>
       <url hash="4f0b4541">E06-1012</url>
@@ -110,7 +110,7 @@
     </paper>
     <paper id="13">
       <title>Generalized Hebbian Algorithm for Incremental Singular Value Decomposition in Natural Language Processing</title>
-      <author><first>Genevieve</first><last>Gorrell</last></author>
+      <author id="genevieve-gorrell"><first>Genevieve</first><last>Gorrell</last></author>
       <pages>97–104</pages>
       <url hash="ec4eb6c4">E06-1013</url>
       <bibkey>gorrell-2006-generalized</bibkey>
@@ -118,7 +118,7 @@
     <paper id="14">
       <title>Improving Probabilistic Latent Semantic Analysis with Principal Component Analysis</title>
       <author><first>Ayman</first><last>Farahat</last></author>
-      <author><first>Francine</first><last>Chen</last></author>
+      <author id="francine-chen"><first>Francine</first><last>Chen</last></author>
       <pages>105–112</pages>
       <url hash="03852195">E06-1014</url>
       <bibkey>farahat-chen-2006-improving</bibkey>
@@ -132,7 +132,7 @@
     </paper>
     <paper id="16">
       <title>Determining Word Sense Dominance Using a Thesaurus</title>
-      <author><first>Saif</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
       <author><first>Graeme</first><last>Hirst</last></author>
       <pages>121–128</pages>
       <url hash="1d6649f4">E06-1016</url>
@@ -162,10 +162,10 @@
     </paper>
     <paper id="20">
       <title>Improved Lexical Alignment by Combining Multiple Reified Alignments</title>
-      <author><first>Dan</first><last>Tufiş</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufiş</last></author>
       <author><first>Radu</first><last>Ion</last></author>
-      <author><first>Alexandru</first><last>Ceauşu</last></author>
-      <author><first>Dan</first><last>Ştefănescu</last></author>
+      <author id="alexandru-ceausu"><first>Alexandru</first><last>Ceauşu</last></author>
+      <author id="dan-stefanescu"><first>Dan</first><last>Ştefănescu</last></author>
       <pages>153–160</pages>
       <url hash="d6fb601f">E06-1020</url>
       <bibkey>tufis-etal-2006-improved</bibkey>
@@ -173,7 +173,7 @@
     <paper id="21">
       <title>Towards Robust Context-Sensitive Sentence Alignment for Monolingual Corpora</title>
       <author><first>Rani</first><last>Nelken</last></author>
-      <author><first>Stuart M.</first><last>Shieber</last></author>
+      <author id="stuart-m-shieber"><first>Stuart M.</first><last>Shieber</last></author>
       <pages>1611–168</pages>
       <url hash="dce9d0aa">E06-1021</url>
       <bibkey>nelken-shieber-2006-towards</bibkey>
@@ -198,7 +198,7 @@
     <paper id="24">
       <title>Keeping the Initiative: An Empirically-Motivated Approach to Predicting User-Initiated Dialogue Contribution in <fixed-case>HCI</fixed-case></title>
       <author><first>Kerstin</first><last>Fischer</last></author>
-      <author><first>John A.</first><last>Bateman</last></author>
+      <author id="john-bateman"><first>John A.</first><last>Bateman</last></author>
       <pages>185–192</pages>
       <url hash="efae95da">E06-1024</url>
       <bibkey>fischer-bateman-2006-keeping</bibkey>
@@ -215,7 +215,7 @@
       <title>Latent Variable Models for Semantic Orientations of Phrases</title>
       <author><first>Hiroya</first><last>Takamura</last></author>
       <author><first>Takashi</first><last>Inui</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>201–208</pages>
       <url hash="4dae61fb">E06-1026</url>
       <bibkey>takamura-etal-2006-latent</bibkey>
@@ -231,7 +231,7 @@
     <paper id="28">
       <title>A Figure of Merit for the Evaluation of Web-Corpus Randomness</title>
       <author><first>Massimiliano</first><last>Ciaramita</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <pages>217–224</pages>
       <url hash="b6d03e30">E06-1028</url>
       <bibkey>ciaramita-baroni-2006-figure</bibkey>
@@ -241,7 +241,7 @@
       <author><first>Xavier</first><last>Robitaille</last></author>
       <author><first>Yasuhiro</first><last>Sasaki</last></author>
       <author><first>Masatsugu</first><last>Tonoike</last></author>
-      <author><first>Satoshi</first><last>Sato</last></author>
+      <author id="satoshi-sato"><first>Satoshi</first><last>Sato</last></author>
       <author><first>Takehito</first><last>Utsuro</last></author>
       <pages>225–232</pages>
       <url hash="4b80983f">E06-1029</url>
@@ -250,7 +250,7 @@
     <paper id="30">
       <title>Web Text Corpus for Natural Language Processing</title>
       <author><first>Vinci</first><last>Liu</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <pages>233–240</pages>
       <url hash="fff7baf1">E06-1030</url>
       <bibkey>liu-curran-2006-web</bibkey>
@@ -259,7 +259,7 @@
       <title><fixed-case>CDER</fixed-case>: Efficient <fixed-case>MT</fixed-case> Evaluation Using Block Movements</title>
       <author><first>Gregor</first><last>Leusch</last></author>
       <author><first>Nicola</first><last>Ueffing</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>241–248</pages>
       <url hash="768218ef">E06-1031</url>
       <bibkey>leusch-etal-2006-cder</bibkey>
@@ -277,7 +277,7 @@
       <title>Adaptive Transformation-Based Learning for Improving Dictionary Tagging</title>
       <author><first>Burcu</first><last>Karagol-Ayan</last></author>
       <author><first>David</first><last>Doermann</last></author>
-      <author><first>Amy</first><last>Weinberg</last></author>
+      <author id="amy-weinberg"><first>Amy</first><last>Weinberg</last></author>
       <pages>257–264</pages>
       <url hash="71782440">E06-1033</url>
       <bibkey>karagol-ayan-etal-2006-adaptive</bibkey>
@@ -292,7 +292,7 @@
     <paper id="35">
       <title>Automatic Segmentation of Multiparty Dialogue</title>
       <author><first>Pei-Yun</first><last>Hsueh</last></author>
-      <author><first>Johanna D.</first><last>Moore</last></author>
+      <author id="johanna-d-moore"><first>Johanna D.</first><last>Moore</last></author>
       <author><first>Steve</first><last>Renals</last></author>
       <pages>273–280</pages>
       <url hash="45794097">E06-1035</url>
@@ -308,8 +308,8 @@
     </paper>
     <paper id="37">
       <title>Using Reinforcement Learning to Build a Better Model of Dialogue State</title>
-      <author><first>Joel R.</first><last>Tetreault</last></author>
-      <author><first>Diane J.</first><last>Litman</last></author>
+      <author id="joel-tetreault"><first>Joel R.</first><last>Tetreault</last></author>
+      <author id="diane-litman"><first>Diane J.</first><last>Litman</last></author>
       <pages>289–296</pages>
       <url hash="dc86a108">E06-1037</url>
       <bibkey>tetreault-litman-2006-using</bibkey>
@@ -324,7 +324,7 @@
     <paper id="39">
       <title>Multi-Document Summarization of Evaluative Text</title>
       <author><first>Giuseppe</first><last>Carenini</last></author>
-      <author><first>Raymond</first><last>Ng</last></author>
+      <author id="raymond-ng"><first>Raymond</first><last>Ng</last></author>
       <author><first>Adam</first><last>Pauls</last></author>
       <pages>305–312</pages>
       <url hash="19eab3f7">E06-1039</url>
@@ -332,7 +332,7 @@
     </paper>
     <paper id="40">
       <title>Comparing Automatic and Human Evaluation of <fixed-case>NLG</fixed-case> Systems</title>
-      <author><first>Anja</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anja</first><last>Belz</last></author>
       <author><first>Ehud</first><last>Reiter</last></author>
       <pages>313–320</pages>
       <url hash="fafd2f60">E06-1040</url>
@@ -363,8 +363,8 @@
     </paper>
     <paper id="44">
       <title>Modelling Semantic Role Pausibility in Human Sentence Processing</title>
-      <author><first>Ulrike</first><last>Padó</last></author>
-      <author><first>Matthew</first><last>Crocker</last></author>
+      <author id="ulrike-pado"><first>Ulrike</first><last>Padó</last></author>
+      <author id="matthew-crocker"><first>Matthew</first><last>Crocker</last></author>
       <author><first>Frank</first><last>Keller</last></author>
       <pages>345–352</pages>
       <url hash="9d657ab8">E06-1044</url>
@@ -372,16 +372,16 @@
     </paper>
     <paper id="45">
       <title>Data-Driven Generation of Emphatic Facial Displays</title>
-      <author><first>Mary Ellen</first><last>Foster</last></author>
-      <author><first>Jon</first><last>Oberlander</last></author>
+      <author id="mary-ellen-foster"><first>Mary Ellen</first><last>Foster</last></author>
+      <author id="jon-oberlander"><first>Jon</first><last>Oberlander</last></author>
       <pages>353–360</pages>
       <url hash="499de0aa">E06-1045</url>
       <bibkey>foster-oberlander-2006-data</bibkey>
     </paper>
     <paper id="46">
       <title>Edit Machines for Robust Multimodal Language Processing</title>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
-      <author><first>Michael</first><last>Johnston</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="michael-johnston"><first>Michael</first><last>Johnston</last></author>
       <pages>361–368</pages>
       <url hash="47c07e06">E06-1046</url>
       <bibkey>bangalore-johnston-2006-edit</bibkey>
@@ -389,9 +389,9 @@
     <paper id="47">
       <title>Parsing <fixed-case>A</fixed-case>rabic Dialects</title>
       <author><first>David</first><last>Chiang</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <author><first>Safiullah</first><last>Shareef</last></author>
       <pages>369–376</pages>
       <url hash="69079c1c">E06-1047</url>
@@ -399,7 +399,7 @@
     </paper>
     <paper id="48">
       <title>Unifying Synchronous <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars and Tree Transducers via Bimorphisms</title>
-      <author><first>Stuart M.</first><last>Shieber</last></author>
+      <author id="stuart-m-shieber"><first>Stuart M.</first><last>Shieber</last></author>
       <pages>377–384</pages>
       <url hash="32604ad0">E06-1048</url>
       <bibkey>shieber-2006-unifying</bibkey>
@@ -423,20 +423,20 @@
     </paper>
     <paper id="51">
       <title>Exploiting Shallow Linguistic Information for Relation Extraction from Biomedical Literature</title>
-      <author><first>Claudio</first><last>Giuliano</last></author>
-      <author><first>Alberto</first><last>Lavelli</last></author>
-      <author><first>Lorenza</first><last>Romano</last></author>
+      <author id="claudio-giuliano"><first>Claudio</first><last>Giuliano</last></author>
+      <author id="alberto-lavelli"><first>Alberto</first><last>Lavelli</last></author>
+      <author id="lorenza-romano"><first>Lorenza</first><last>Romano</last></author>
       <pages>401–408</pages>
       <url hash="6145664a">E06-1051</url>
       <bibkey>giuliano-etal-2006-exploiting</bibkey>
     </paper>
     <paper id="52">
       <title>Investigating a Generic Paraphrase-Based Approach for Relation Extraction</title>
-      <author><first>Lorenza</first><last>Romano</last></author>
+      <author id="lorenza-romano"><first>Lorenza</first><last>Romano</last></author>
       <author><first>Milen</first><last>Kouylekov</last></author>
       <author><first>Idan</first><last>Szpektor</last></author>
       <author><first>Ido</first><last>Dagan</last></author>
-      <author><first>Alberto</first><last>Lavelli</last></author>
+      <author id="alberto-lavelli"><first>Alberto</first><last>Lavelli</last></author>
       <pages>409–416</pages>
       <url hash="cbfc3f71">E06-1052</url>
       <bibkey>romano-etal-2006-investigating</bibkey>
@@ -454,7 +454,7 @@
     </frontmatter>
     <paper id="1">
       <title>Large Linguistically-Processed Web Corpora for Multiple Languages</title>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <author><first>Adam</first><last>Kilgarriff</last></author>
       <pages>87–90</pages>
       <url hash="d5b25a69">E06-2001</url>
@@ -490,8 +490,8 @@
     <paper id="5">
       <title><fixed-case>XMG</fixed-case> - An Expressive Formalism for Describing Tree-Based Grammars</title>
       <author><first>Yannick</first><last>Parmentier</last></author>
-      <author><first>Joseph Le</first><last>Roux</last></author>
-      <author><first>Benoît</first><last>Crabbé</last></author>
+      <author id="joseph-le-roux"><first>Joseph Le</first><last>Roux</last></author>
+      <author id="benoit-crabbe"><first>Benoît</first><last>Crabbé</last></author>
       <pages>103–106</pages>
       <url hash="8583b8b1">E06-2005</url>
       <bibkey>parmentier-etal-2006-xmg</bibkey>
@@ -523,7 +523,7 @@
       <title>An <fixed-case>ISU</fixed-case> Dialogue System Exhibiting Reinforcement Learning of Dialogue Policies: Generic Slot-Filling in the <fixed-case>TALK</fixed-case> In-car System</title>
       <author><first>Oliver</first><last>Lemon</last></author>
       <author><first>Kallirroi</first><last>Georgila</last></author>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <author><first>Matthew</first><last>Stuttle</last></author>
       <pages>119–122</pages>
       <url hash="42e8c84e">E06-2009</url>
@@ -533,11 +533,11 @@
       <title>Generating and Visualizing a Soccer Knowledge Base</title>
       <author><first>Paul</first><last>Buitelaar</last></author>
       <author><first>Thomas</first><last>Eigner</last></author>
-      <author><first>Greg</first><last>Gul-rajani</last></author>
+      <author id="greg-gul-rajani"><first>Greg</first><last>Gul-rajani</last></author>
       <author><first>Alexander</first><last>Schutz</last></author>
       <author><first>Melanie</first><last>Siegel</last></author>
       <author><first>Nicolas</first><last>Weber</last></author>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <author><first>Günter</first><last>Ladwig</last></author>
       <author><first>Matthias</first><last>Mantel</last></author>
       <author><first>Honggang</first><last>Zhu</last></author>
@@ -547,7 +547,7 @@
     </paper>
     <paper id="11">
       <title><fixed-case>E</fixed-case>sfinge — a Question Answering System in the Web using the Web</title>
-      <author><first>Luís Fernando</first><last>Costa</last></author>
+      <author id="luis-fernando-costa"><first>Luís Fernando</first><last>Costa</last></author>
       <pages>127–130</pages>
       <url hash="6ff53c9d">E06-2011</url>
       <bibkey>costa-2006-esfinge</bibkey>
@@ -557,12 +557,12 @@
       <author><first>Conrad</first><last>Chang</last></author>
       <author><first>Lisa</first><last>Ferro</last></author>
       <author><first>John</first><last>Gibson</last></author>
-      <author><first>Janet</first><last>Hitzeman</last></author>
+      <author id="janet-hitzeman"><first>Janet</first><last>Hitzeman</last></author>
       <author><first>Suzi</first><last>Lubar</last></author>
       <author><first>Justin</first><last>Palmer</last></author>
       <author><first>Sean</first><last>Munson</last></author>
-      <author><first>Marc</first><last>Vilain</last></author>
-      <author><first>Benjamin</first><last>Wellner</last></author>
+      <author id="marc-vilain"><first>Marc</first><last>Vilain</last></author>
+      <author id="ben-wellner"><first>Benjamin</first><last>Wellner</last></author>
       <pages>131–134</pages>
       <url hash="2466b26a">E06-2012</url>
       <bibkey>chang-etal-2006-maytag</bibkey>
@@ -581,14 +581,14 @@
       <author><first>Bogdan</first><last>Babych</last></author>
       <author><first>Paul</first><last>Rayson</last></author>
       <author><first>Olga</first><last>Mudraya</last></author>
-      <author><first>Scott</first><last>Piao</last></author>
+      <author id="scott-s-l-piao"><first>Scott</first><last>Piao</last></author>
       <pages>139–142</pages>
       <url hash="5c71ed70">E06-2014</url>
       <bibkey>sharoff-etal-2006-assist</bibkey>
     </paper>
     <paper id="15">
       <title>Semantic Role Labeling for Coreference Resolution</title>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <author><first>Michael</first><last>Strube</last></author>
       <pages>143–146</pages>
       <url hash="c8dc779e">E06-2015</url>
@@ -596,15 +596,15 @@
     </paper>
     <paper id="16">
       <title>The <fixed-case>GOD</fixed-case> model</title>
-      <author><first>Alfio Massimiliano</first><last>Gliozzo</last></author>
+      <author id="alfio-gliozzo"><first>Alfio Massimiliano</first><last>Gliozzo</last></author>
       <pages>147–150</pages>
       <url hash="c40bd354">E06-2016</url>
       <bibkey>gliozzo-2006-god</bibkey>
     </paper>
     <paper id="17">
       <title>Computing Term Translation Probabilities with Generalized Latent Semantic Analysis</title>
-      <author><first>Irina</first><last>Matveeva</last></author>
-      <author><first>Gina-Anne</first><last>Levow</last></author>
+      <author id="irina-matveeva"><first>Irina</first><last>Matveeva</last></author>
+      <author id="gina-anne-levow"><first>Gina-Anne</first><last>Levow</last></author>
       <pages>151–154</pages>
       <url hash="630b7b4e">E06-2017</url>
       <bibkey>matveeva-levow-2006-computing</bibkey>
@@ -618,7 +618,7 @@
     </paper>
     <paper id="19">
       <title>Classifying Biological Full-Text Articles for Multi-Database Curation</title>
-      <author><first>Wen-Juan</first><last>Hou</last></author>
+      <author id="wen-juan-hou"><first>Wen-Juan</first><last>Hou</last></author>
       <author><first>Chih</first><last>Lee</last></author>
       <author><first>Hsin-Hsi</first><last>Chen</last></author>
       <pages>159–162</pages>
@@ -628,9 +628,9 @@
     <paper id="20">
       <title>Generating Spatio-Temporal Descriptions in Pollen Forecasts</title>
       <author><first>Ross</first><last>Turner</last></author>
-      <author><first>Somayajulu</first><last>Sripada</last></author>
+      <author id="somayajulu-sripada"><first>Somayajulu</first><last>Sripada</last></author>
       <author><first>Ehud</first><last>Reiter</last></author>
-      <author><first>Ian P</first><last>Davy</last></author>
+      <author id="ian-p-davy"><first>Ian P</first><last>Davy</last></author>
       <pages>163–166</pages>
       <url hash="bbe14b88">E06-2020</url>
       <bibkey>turner-etal-2006-generating</bibkey>
@@ -659,8 +659,8 @@
     </paper>
     <paper id="24">
       <title>A Suite of Shallow Processing Tools for <fixed-case>P</fixed-case>ortuguese: <fixed-case>LX</fixed-case>-Suite</title>
-      <author><first>António</first><last>Branco</last></author>
-      <author><first>João Ricardo</first><last>Silva</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
+      <author id="joao-silva"><first>João Ricardo</first><last>Silva</last></author>
       <pages>179–182</pages>
       <url hash="f0f2c786">E06-2024</url>
       <bibkey>branco-silva-2006-suite</bibkey>
@@ -688,7 +688,7 @@
     </paper>
     <paper id="28">
       <title><fixed-case>B</fixed-case>ayesian Network, a Model for <fixed-case>NLP</fixed-case>?</title>
-      <author><first>Davy</first><last>Weissenbacher</last></author>
+      <author id="davy-weissenbacher"><first>Davy</first><last>Weissenbacher</last></author>
       <pages>195–198</pages>
       <url hash="f1504eb0">E06-2028</url>
       <bibkey>weissenbacher-2006-bayesian</bibkey>
@@ -704,9 +704,9 @@
     <paper id="30">
       <title>Developments in Affect Detection in <fixed-case>E</fixed-case>-drama</title>
       <author id="li-zhang-birmingham"><first>Li</first><last>Zhang</last></author>
-      <author><first>John A.</first><last>Barnden</last></author>
-      <author><first>Robert J.</first><last>Hendley</last></author>
-      <author><first>Alan M.</first><last>Wallington</last></author>
+      <author id="john-barnden"><first>John A.</first><last>Barnden</last></author>
+      <author id="robert-j-hendley"><first>Robert J.</first><last>Hendley</last></author>
+      <author id="alan-wallington"><first>Alan M.</first><last>Wallington</last></author>
       <pages>203–206</pages>
       <url hash="4eb3a0b3">E06-2030</url>
       <bibkey>zhang-etal-2006-developments</bibkey>
@@ -715,7 +715,7 @@
       <title>Why Are They Excited? Identifying and Explaining Spikes in Blog Mood Levels</title>
       <author><first>Krisztian</first><last>Balog</last></author>
       <author><first>Gilad</first><last>Mishne</last></author>
-      <author><first>Maarten</first><last>de Rijke</last></author>
+      <author id="maarten-de-rijke"><first>Maarten</first><last>de Rijke</last></author>
       <pages>207–210</pages>
       <url hash="1f8024d5">E06-2031</url>
       <bibkey>balog-etal-2006-excited</bibkey>
@@ -782,7 +782,7 @@
     </paper>
     <paper id="8">
       <title>Towards Robust <fixed-case>A</fixed-case>nimacy Classification Using Morphosyntactic Distributional Features</title>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <pages>47–54</pages>
       <url hash="e9b9a1cf">E06-3008</url>
       <bibkey>ovrelid-2006-towards</bibkey>
diff --git a/data/xml/E09.xml b/data/xml/E09.xml
index 2bf60f8324..dee778e71e 100644
--- a/data/xml/E09.xml
+++ b/data/xml/E09.xml
@@ -43,23 +43,23 @@
       <title>Contextual Phrase-Level Polarity Analysis Using Lexical Affect Scoring and Syntactic <fixed-case>N</fixed-case>-Grams</title>
       <author><first>Apoorv</first><last>Agarwal</last></author>
       <author><first>Fadi</first><last>Biadsy</last></author>
-      <author><first>Kathleen R.</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen R.</first><last>McKeown</last></author>
       <pages>24–32</pages>
       <url hash="9834a084">E09-1004</url>
       <bibkey>agarwal-etal-2009-contextual</bibkey>
     </paper>
     <paper id="5">
       <title>Personalizing <fixed-case>P</fixed-case>age<fixed-case>R</fixed-case>ank for Word Sense Disambiguation</title>
-      <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>Aitor</first><last>Soroa</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
+      <author id="aitor-soroa"><first>Aitor</first><last>Soroa</last></author>
       <pages>33–41</pages>
       <url hash="c47d5782">E09-1005</url>
       <bibkey>agirre-soroa-2009-personalizing</bibkey>
     </paper>
     <paper id="6">
       <title>Supervised Domain Adaption for <fixed-case>WSD</fixed-case></title>
-      <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>Oier</first><last>Lopez de Lacalle</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
+      <author id="oier-lopez-de-lacalle"><first>Oier</first><last>Lopez de Lacalle</last></author>
       <pages>42–50</pages>
       <url hash="d26069eb">E09-1006</url>
       <bibkey>agirre-lopez-de-lacalle-2009-supervised</bibkey>
@@ -99,15 +99,15 @@
       <title>Syntactic Phrase Reordering for <fixed-case>E</fixed-case>nglish-to-<fixed-case>A</fixed-case>rabic Statistical Machine Translation</title>
       <author><first>Ibrahim</first><last>Badr</last></author>
       <author><first>Rabih</first><last>Zbib</last></author>
-      <author><first>James</first><last>Glass</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
       <pages>86–93</pages>
       <url hash="04d5ad94">E09-1011</url>
       <bibkey>badr-etal-2009-syntactic</bibkey>
     </paper>
     <paper id="12">
       <title>Incremental Parsing Models for Dialog Task Structure</title>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
-      <author><first>Amanda</first><last>Stent</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="amanda-stent"><first>Amanda</first><last>Stent</last></author>
       <pages>94–102</pages>
       <url hash="1adddf8f">E09-1012</url>
       <bibkey>bangalore-stent-2009-incremental</bibkey>
@@ -132,7 +132,7 @@
       <title>Large-Coverage Root Lexicon Extraction for <fixed-case>H</fixed-case>indi</title>
       <author><first>Cohan Sujay</first><last>Carlos</last></author>
       <author><first>Monojit</first><last>Choudhury</last></author>
-      <author><first>Sandipan</first><last>Dandapat</last></author>
+      <author id="sandipan-dandapat"><first>Sandipan</first><last>Dandapat</last></author>
       <pages>121–129</pages>
       <url hash="02c72dc7">E09-1015</url>
       <bibkey>carlos-etal-2009-large</bibkey>
@@ -171,7 +171,7 @@
     </paper>
     <paper id="20">
       <title>An Alignment Algorithm Using Belief Propagation and a Structure-Based Distortion Model</title>
-      <author><first>Fabien</first><last>Cromières</last></author>
+      <author id="fabien-cromieres"><first>Fabien</first><last>Cromières</last></author>
       <author><first>Sadao</first><last>Kurohashi</last></author>
       <pages>166–174</pages>
       <url hash="2484de1a">E09-1020</url>
@@ -211,7 +211,7 @@
     </paper>
     <paper id="25">
       <title>Inference Rules and their Application to Recognizing Textual Entailment</title>
-      <author><first>Georgiana</first><last>Dinu</last></author>
+      <author id="georgiana-dinu"><first>Georgiana</first><last>Dinu</last></author>
       <author><first>Rui</first><last>Wang</last></author>
       <pages>211–219</pages>
       <url hash="a6f12855">E09-1025</url>
@@ -228,7 +228,7 @@
     <paper id="27">
       <title>Cognitively Motivated Features for Readability Assessment</title>
       <author><first>Lijun</first><last>Feng</last></author>
-      <author><first>Noémie</first><last>Elhadad</last></author>
+      <author id="noemie-elhadad"><first>Noémie</first><last>Elhadad</last></author>
       <author><first>Matt</first><last>Huenerfauth</last></author>
       <pages>229–237</pages>
       <url hash="b0edadbf">E09-1027</url>
@@ -237,7 +237,7 @@
     <paper id="28">
       <title>Effects of Word Confusion Networks on Voice Search</title>
       <author><first>Junlan</first><last>Feng</last></author>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
       <pages>238–245</pages>
       <url hash="1b56ec46">E09-1028</url>
       <bibkey>feng-bangalore-2009-effects</bibkey>
@@ -255,8 +255,8 @@
     <paper id="30">
       <title>Reconstructing False Start Errors in Spontaneous Speech Text</title>
       <author><first>Erin</first><last>Fitzgerald</last></author>
-      <author><first>Keith</first><last>Hall</last></author>
-      <author><first>Frederick</first><last>Jelinek</last></author>
+      <author id="keith-hall"><first>Keith</first><last>Hall</last></author>
+      <author id="frederick-jelinek"><first>Frederick</first><last>Jelinek</last></author>
       <pages>255–263</pages>
       <url hash="12008935">E09-1030</url>
       <bibkey>fitzgerald-etal-2009-reconstructing</bibkey>
@@ -272,9 +272,9 @@
     <paper id="32">
       <title>Who is “You”? Combining Linguistic and Gaze Features to Resolve Second-Person References in Dialogue</title>
       <author><first>Matthew</first><last>Frampton</last></author>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <author><first>Patrick</first><last>Ehlen</last></author>
-      <author><first>Mario</first><last>Christoudias</last></author>
+      <author id="c-mario-christoudias"><first>Mario</first><last>Christoudias</last></author>
       <author><first>Trevor</first><last>Darrell</last></author>
       <author><first>Stanley</first><last>Peters</last></author>
       <pages>273–281</pages>
@@ -283,9 +283,9 @@
     </paper>
     <paper id="33">
       <title>Rich Bitext Projection Features for Parse Reranking</title>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <author><first>Renjing</first><last>Wang</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>282–290</pages>
       <url hash="dc65db37">E09-1033</url>
       <bibkey>fraser-etal-2009-rich</bibkey>
@@ -293,8 +293,8 @@
     <paper id="34">
       <title>Parsing Mildly Non-Projective Dependency Structures</title>
       <author><first>Carlos</first><last>Gómez-Rodríguez</last></author>
-      <author><first>David</first><last>Weir</last></author>
-      <author><first>John</first><last>Carroll</last></author>
+      <author id="david-weir"><first>David</first><last>Weir</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
       <pages>291–299</pages>
       <url hash="d019bcd6">E09-1034</url>
       <bibkey>gomez-rodriguez-etal-2009-parsing</bibkey>
@@ -317,7 +317,7 @@
     <paper id="37">
       <title>Cube Summing, Approximate Inference with Non-Local Features, and Dynamic Programming without Semirings</title>
       <author><first>Kevin</first><last>Gimpel</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>318–326</pages>
       <url hash="b6403bde">E09-1037</url>
       <bibkey>gimpel-smith-2009-cube</bibkey>
@@ -334,7 +334,7 @@
     </paper>
     <paper id="39">
       <title>Person Identification from Text and Speech Genre Samples</title>
-      <author><first>Jade</first><last>Goldstein-Stewart</last></author>
+      <author id="jade-goldstein"><first>Jade</first><last>Goldstein-Stewart</last></author>
       <author><first>Ransom</first><last>Winder</last></author>
       <author><first>Roberta</first><last>Sabin</last></author>
       <pages>336–344</pages>
@@ -343,13 +343,13 @@
     </paper>
     <paper id="40">
       <title>End-to-End Evaluation in Simultaneous Translation</title>
-      <author><first>Olivier</first><last>Hamon</last></author>
+      <author id="olivier-hamon"><first>Olivier</first><last>Hamon</last></author>
       <author><first>Christian</first><last>Fügen</last></author>
-      <author><first>Djamel</first><last>Mostefa</last></author>
+      <author id="djamel-mostefa"><first>Djamel</first><last>Mostefa</last></author>
       <author><first>Victoria</first><last>Arranz</last></author>
       <author><first>Muntsin</first><last>Kolss</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
-      <author><first>Khalid</first><last>Choukri</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
+      <author id="khalid-choukri"><first>Khalid</first><last>Choukri</last></author>
       <pages>345–353</pages>
       <url hash="2b7b0569">E09-1040</url>
       <bibkey>hamon-etal-2009-end</bibkey>
@@ -382,8 +382,8 @@
     <paper id="44">
       <title>Rule Filtering by Pattern for Efficient Hierarchical Translation</title>
       <author><first>Gonzalo</first><last>Iglesias</last></author>
-      <author><first>Adrià</first><last>de Gispert</last></author>
-      <author><first>Eduardo R.</first><last>Banga</last></author>
+      <author id="adria-de-gispert"><first>Adrià</first><last>de Gispert</last></author>
+      <author id="eduardo-r-banga"><first>Eduardo R.</first><last>Banga</last></author>
       <author id="bill-byrne"><first>William</first><last>Byrne</last></author>
       <pages>380–388</pages>
       <url hash="0b6d9a65">E09-1044</url>
@@ -391,9 +391,9 @@
     </paper>
     <paper id="45">
       <title>An Empirical Study on Class-Based Word Sense Disambiguation</title>
-      <author><first>Rubén</first><last>Izquierdo</last></author>
-      <author><first>Armando</first><last>Suárez</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="ruben-izquierdo"><first>Rubén</first><last>Izquierdo</last></author>
+      <author id="armando-suarez"><first>Armando</first><last>Suárez</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <pages>389–397</pages>
       <url hash="209b1957">E09-1045</url>
       <bibkey>izquierdo-etal-2009-empirical</bibkey>
@@ -408,8 +408,8 @@
     </paper>
     <paper id="47">
       <title>Parsing Coordinations</title>
-      <author><first>Sandra</first><last>Kübler</last></author>
-      <author><first>Erhard</first><last>Hinrichs</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
+      <author id="erhard-hinrichs"><first>Erhard</first><last>Hinrichs</last></author>
       <author><first>Wolfgang</first><last>Maier</last></author>
       <author><first>Eva</first><last>Klett</last></author>
       <pages>406–414</pages>
@@ -427,7 +427,7 @@
     <paper id="49">
       <title><fixed-case>N</fixed-case>-Gram-Based Statistical Machine Translation versus Syntax Augmented Machine Translation: Comparison and System Combination</title>
       <author><first>Maxim</first><last>Khalilov</last></author>
-      <author><first>José A. R.</first><last>Fonollosa</last></author>
+      <author id="jose-a-r-fonollosa"><first>José A. R.</first><last>Fonollosa</last></author>
       <pages>424–432</pages>
       <url hash="7b371609">E09-1049</url>
       <bibkey>khalilov-fonollosa-2009-n</bibkey>
@@ -466,7 +466,7 @@
     </paper>
     <paper id="54">
       <title>Lattice Parsing to Integrate Speech Recognition and Rule-Based Machine Translation</title>
-      <author><first>Selçuk</first><last>Köprü</last></author>
+      <author id="selcuk-kopru"><first>Selçuk</first><last>Köprü</last></author>
       <author><first>Adnan</first><last>Yazıcı</last></author>
       <pages>469–477</pages>
       <url hash="450a6f85">E09-1054</url>
@@ -482,9 +482,9 @@
     </paper>
     <paper id="56">
       <title>Improvements in Analogical Learning: Application to Translating Multi-Terms of the Medical Domain</title>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <author><first>François</first><last>Yvon</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <pages>487–495</pages>
       <url hash="652e08ba">E09-1056</url>
       <bibkey>langlais-etal-2009-improvements</bibkey>
@@ -493,7 +493,7 @@
       <title>Language-Independent Bilingual Terminology Extraction from a Multilingual Parallel Corpus</title>
       <author><first>Els</first><last>Lefever</last></author>
       <author><first>Lieve</first><last>Macken</last></author>
-      <author><first>Veronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Veronique</first><last>Hoste</last></author>
       <pages>496–504</pages>
       <url hash="4f717a51">E09-1057</url>
       <bibkey>lefever-etal-2009-language</bibkey>
@@ -559,7 +559,7 @@
     <paper id="65">
       <title>Text-to-Text Semantic Similarity for Automatic Short Answer Grading</title>
       <author><first>Michael</first><last>Mohler</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>567–575</pages>
       <url hash="1dd81d12">E09-1065</url>
       <bibkey>mohler-mihalcea-2009-text</bibkey>
@@ -601,7 +601,7 @@
       <title>Analysing <fixed-case>W</fixed-case>ikipedia and Gold-Standard Corpora for <fixed-case>NER</fixed-case> Training</title>
       <author><first>Joel</first><last>Nothman</last></author>
       <author><first>Tara</first><last>Murphy</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <pages>612–620</pages>
       <url hash="6e2bd782">E09-1070</url>
       <bibkey>nothman-etal-2009-analysing</bibkey>
@@ -616,14 +616,14 @@
     </paper>
     <paper id="72">
       <title>Empirical Evaluations of <fixed-case>A</fixed-case>nimacy Annotation</title>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <pages>630–638</pages>
       <url hash="bfe77050">E09-1072</url>
       <bibkey>ovrelid-2009-empirical</bibkey>
     </paper>
     <paper id="73">
       <title>Outclassing <fixed-case>W</fixed-case>ikipedia in Open-Domain Information Extraction: Weakly-Supervised Acquisition of Attributes over Conceptual Hierarchies</title>
-      <author><first>Marius</first><last>Paşca</last></author>
+      <author id="marius-pasca"><first>Marius</first><last>Paşca</last></author>
       <pages>639–647</pages>
       <url hash="9e09e275">E09-1073</url>
       <bibkey>pasca-2009-outclassing</bibkey>
@@ -696,7 +696,7 @@
     <paper id="82">
       <title>Word Lattices for Multi-Source Translation</title>
       <author><first>Josh</first><last>Schroeder</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <author><first>Philipp</first><last>Koehn</last></author>
       <pages>719–727</pages>
       <url hash="86e4205e">E09-1082</url>
@@ -706,10 +706,10 @@
       <title>Frequency Matters: Pitch Accents and Information Status</title>
       <author><first>Katrin</first><last>Schweitzer</last></author>
       <author><first>Michael</first><last>Walsh</last></author>
-      <author><first>Bernd</first><last>Möbius</last></author>
+      <author id="bernd-mobius"><first>Bernd</first><last>Möbius</last></author>
       <author><first>Arndt</first><last>Riester</last></author>
       <author><first>Antje</first><last>Schweitzer</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>728–736</pages>
       <url hash="86c2ba87">E09-1083</url>
       <bibkey>schweitzer-etal-2009-frequency</bibkey>
@@ -743,8 +743,8 @@
     </paper>
     <paper id="87">
       <title>Semi-Supervised Training for the Averaged Perceptron <fixed-case>POS</fixed-case> Tagger</title>
-      <author><first>Drahomíra “johanka”</first><last>Spoustová</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
+      <author id="drahomira-johanka-spoustova"><first>Drahomíra “johanka”</first><last>Spoustová</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
       <author><first>Jan</first><last>Raab</last></author>
       <author><first>Miroslav</first><last>Spousta</last></author>
       <pages>763–771</pages>
@@ -754,7 +754,7 @@
     <paper id="88">
       <title>Sequential Labeling with Latent Variables: An Exact Inference Algorithm and its Efficient Approximation</title>
       <author><first>Xu</first><last>Sun</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>772–780</pages>
       <url hash="5456cf92">E09-1088</url>
       <bibkey>sun-tsujii-2009-sequential</bibkey>
@@ -762,7 +762,7 @@
     <paper id="89">
       <title>Text Summarization Model Based on Maximum Coverage Problem and its Variant</title>
       <author><first>Hiroya</first><last>Takamura</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>781–789</pages>
       <url hash="fbd64082">E09-1089</url>
       <bibkey>takamura-okumura-2009-text</bibkey>
@@ -770,7 +770,7 @@
     <paper id="90">
       <title>Fast Full Parsing by Linear-Chain Conditional Random Fields</title>
       <author><first>Yoshimasa</first><last>Tsuruoka</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
       <pages>790–798</pages>
       <url hash="24348007">E09-1090</url>
@@ -778,9 +778,9 @@
     </paper>
     <paper id="91">
       <title><fixed-case>MINT</fixed-case>: A Method for Effective and Scalable Mining of Named Entity Transliterations from Large Comparable Corpora</title>
-      <author><first>Raghavendra</first><last>Udupa</last></author>
-      <author><first>K</first><last>Saravanan</last></author>
-      <author><first>A</first><last>Kumaran</last></author>
+      <author id="raghavendra-udupa"><first>Raghavendra</first><last>Udupa</last></author>
+      <author id="k-saravanan"><first>K</first><last>Saravanan</last></author>
+      <author id="a-kumaran"><first>A</first><last>Kumaran</last></author>
       <author><first>Jagadeesh</first><last>Jagarlamudi</last></author>
       <pages>799–807</pages>
       <url hash="775fcd10">E09-1091</url>
@@ -790,14 +790,14 @@
       <title>Deriving Generalized Knowledge from Corpora Using <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Abstraction</title>
       <author><first>Benjamin</first><last>Van Durme</last></author>
       <author><first>Phillip</first><last>Michalak</last></author>
-      <author><first>Lenhart</first><last>Schubert</last></author>
+      <author id="lenhart-schubert"><first>Lenhart</first><last>Schubert</last></author>
       <pages>808–816</pages>
       <url hash="436dcc8d">E09-1092</url>
       <bibkey>van-durme-etal-2009-deriving</bibkey>
     </paper>
     <paper id="93">
       <title>Learning Efficient Parsing</title>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <pages>817–825</pages>
       <url hash="b260381f">E09-1093</url>
       <bibkey>van-noord-2009-learning</bibkey>
@@ -806,7 +806,7 @@
       <title>A Robust and Extensible Exemplar-Based Model of Thematic Fit</title>
       <author><first>Bram</first><last>Vandekerckhove</last></author>
       <author><first>Dominiek</first><last>Sandra</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>826–834</pages>
       <url hash="3318df7d">E09-1094</url>
       <bibkey>vandekerckhove-etal-2009-robust</bibkey>
@@ -822,8 +822,8 @@
     </paper>
     <paper id="96">
       <title>Feature-Based Method for Document Alignment in Comparable News Corpora</title>
-      <author><first>Thuy</first><last>Vu</last></author>
-      <author><first>Ai Ti</first><last>Aw</last></author>
+      <author id="thuy-vu"><first>Thuy</first><last>Vu</last></author>
+      <author id="aiti-aw"><first>Ai Ti</first><last>Aw</last></author>
       <author><first>Min</first><last>Zhang</last></author>
       <pages>843–851</pages>
       <url hash="473e10d8">E09-1096</url>
@@ -834,7 +834,7 @@
       <author><first>Stephen</first><last>Wan</last></author>
       <author><first>Mark</first><last>Dras</last></author>
       <author><first>Robert</first><last>Dale</last></author>
-      <author><first>Cécile</first><last>Paris</last></author>
+      <author id="cecile-paris"><first>Cécile</first><last>Paris</last></author>
       <pages>852–860</pages>
       <url hash="95439864">E09-1097</url>
       <bibkey>wan-etal-2009-improving</bibkey>
@@ -849,7 +849,7 @@
     <paper id="99">
       <title>Language <fixed-case>ID</fixed-case> in the Context of Harvesting Language Data off the Web</title>
       <author><first>Fei</first><last>Xia</last></author>
-      <author><first>William</first><last>Lewis</last></author>
+      <author id="william-lewis"><first>William</first><last>Lewis</last></author>
       <author><first>Hoifung</first><last>Poon</last></author>
       <pages>870–878</pages>
       <url hash="256f7579">E09-1099</url>
@@ -916,7 +916,7 @@
     </paper>
     <paper id="5">
       <title>An Open-Source Natural Language Generator for <fixed-case>OWL</fixed-case> Ontologies and its Use in Protege and Second Life</title>
-      <author><first>Dimitrios</first><last>Galanis</last></author>
+      <author id="dimitrios-galanis"><first>Dimitrios</first><last>Galanis</last></author>
       <author><first>George</first><last>Karakatsiotis</last></author>
       <author><first>Gerasimos</first><last>Lampouras</last></author>
       <author><first>Ion</first><last>Androutsopoulos</last></author>
@@ -939,8 +939,8 @@
     <paper id="7">
       <title>A Comparison of Clausal Coordinate Ellipsis in <fixed-case>E</fixed-case>stonian and <fixed-case>G</fixed-case>erman: Remarkably Similar Elision Rules Allow a Language-Independent Ellipsis-Generation Module</title>
       <author><first>Karin</first><last>Harbusch</last></author>
-      <author><first>Mare</first><last>Koit</last></author>
-      <author><first>Haldur</first><last>Õim</last></author>
+      <author id="mare-koit"><first>Mare</first><last>Koit</last></author>
+      <author id="haldur-oim"><first>Haldur</first><last>Õim</last></author>
       <pages>25–28</pages>
       <url hash="a6a9c15e">E09-2007</url>
       <bibkey>harbusch-etal-2009-comparison</bibkey>
@@ -955,11 +955,11 @@
     <paper id="9">
       <title>The Software Architecture for the First Challenge on Generating Instructions in Virtual Environments</title>
       <author><first>Alexander</first><last>Koller</last></author>
-      <author><first>Donna</first><last>Byron</last></author>
+      <author id="donna-byron"><first>Donna</first><last>Byron</last></author>
       <author><first>Justine</first><last>Cassell</last></author>
       <author><first>Robert</first><last>Dale</last></author>
-      <author><first>Johanna</first><last>Moore</last></author>
-      <author><first>Jon</first><last>Oberlander</last></author>
+      <author id="johanna-d-moore"><first>Johanna</first><last>Moore</last></author>
+      <author id="jon-oberlander"><first>Jon</first><last>Oberlander</last></author>
       <author><first>Kristina</first><last>Striegnitz</last></author>
       <pages>33–36</pages>
       <url hash="6584d6f5">E09-2009</url>
@@ -969,7 +969,7 @@
       <title>Adaptive Natural Language Interaction</title>
       <author><first>Stasinos</first><last>Konstantopoulos</last></author>
       <author><first>Athanasios</first><last>Tegos</last></author>
-      <author><first>Dimitrios</first><last>Bilidas</last></author>
+      <author id="dimitrios-bilidas"><first>Dimitrios</first><last>Bilidas</last></author>
       <author><first>Ion</first><last>Androutsopoulos</last></author>
       <author><first>Gerasimos</first><last>Lampouras</last></author>
       <author><first>Colin</first><last>Matheson</last></author>
@@ -981,7 +981,7 @@
     </paper>
     <paper id="11">
       <title>Parsing, Projecting &amp; Prototypes: Repurposing Linguistic Data on the Web</title>
-      <author><first>William</first><last>Lewis</last></author>
+      <author id="william-lewis"><first>William</first><last>Lewis</last></author>
       <author><first>Fei</first><last>Xia</last></author>
       <pages>41–44</pages>
       <url hash="77b96620">E09-2011</url>
@@ -1023,7 +1023,7 @@
       <title>Three <fixed-case>B</fixed-case>io<fixed-case>NLP</fixed-case> Tools Powered by a Biological Lexicon</title>
       <author><first>Yutaka</first><last>Sasaki</last></author>
       <author><first>Paul</first><last>Thompson</last></author>
-      <author><first>John</first><last>McNaught</last></author>
+      <author id="john-mcnaught"><first>John</first><last>McNaught</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
       <pages>61–64</pages>
       <url hash="d7b21683">E09-2016</url>
@@ -1032,7 +1032,7 @@
     <paper id="17">
       <title>A Mobile Health and Fitness Companion Demonstrator</title>
       <author><first>Olov</first><last>Ståhl</last></author>
-      <author><first>Björn</first><last>Gambäck</last></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gambäck</last></author>
       <author><first>Markku</first><last>Turunen</last></author>
       <author><first>Jaakko</first><last>Hakulinen</last></author>
       <pages>65–68</pages>
@@ -1080,14 +1080,14 @@
     </paper>
     <paper id="4">
       <title>Finding Word Substitutions Using a Distributional Similarity Baseline and Immediate Context Overlap</title>
-      <author><first>Aurelie</first><last>Herbelot</last></author>
+      <author id="aurelie-herbelot"><first>Aurelie</first><last>Herbelot</last></author>
       <pages>28–36</pages>
       <url hash="e8c0a3ac">E09-3004</url>
       <bibkey>herbelot-2009-finding</bibkey>
     </paper>
     <paper id="5">
       <title>Structural Correspondence Learning for Parse Disambiguation</title>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>37–45</pages>
       <url hash="0f770e70">E09-3005</url>
       <bibkey>plank-2009-structural</bibkey>
@@ -1123,7 +1123,7 @@
     </paper>
     <paper id="10">
       <title>Aligning Medical Domain Ontologies for Clinical Query Extraction</title>
-      <author><first>Pinar</first><last>Wennerberg</last></author>
+      <author id="pinar-oezden-wennerberg"><first>Pinar</first><last>Wennerberg</last></author>
       <pages>79–87</pages>
       <url hash="48d820ed">E09-3010</url>
       <bibkey>wennerberg-2009-aligning</bibkey>
diff --git a/data/xml/E12.xml b/data/xml/E12.xml
index eed5d478d3..a3fab2e8db 100644
--- a/data/xml/E12.xml
+++ b/data/xml/E12.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the 13th Conference of the <fixed-case>E</fixed-case>uropean Chapter of the Association for Computational Linguistics</booktitle>
       <url hash="538b1414">E12-1</url>
-      <editor><first>Walter</first><last>Daelemans</last></editor>
+      <editor id="walter-daelemans"><first>Walter</first><last>Daelemans</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Avignon, France</address>
       <month>April</month>
@@ -40,8 +40,8 @@
     </paper>
     <paper id="4">
       <title>Entailment above the word level in distributional semantics</title>
-      <author><first>Marco</first><last>Baroni</last></author>
-      <author><first>Raffaella</first><last>Bernardi</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
+      <author id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last></author>
       <author><first>Ngoc-Quynh</first><last>Do</last></author>
       <author><first>Chung-chieh</first><last>Shan</last></author>
       <pages>23–32</pages>
@@ -50,7 +50,7 @@
     </paper>
     <paper id="5">
       <title>Evaluating Distributional Models of Semantics for Syntactically Invariant Inference</title>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <author><first>Gerald</first><last>Penn</last></author>
       <pages>33–43</pages>
       <url hash="6b8e1e5d">E12-1005</url>
@@ -67,7 +67,7 @@
     </paper>
     <paper id="7">
       <title>Dependency Parsing of <fixed-case>H</fixed-case>ungarian: Baseline Results and Challenges</title>
-      <author><first>Richárd</first><last>Farkas</last></author>
+      <author id="richard-farkas"><first>Richárd</first><last>Farkas</last></author>
       <author><first>Veronika</first><last>Vincze</last></author>
       <author><first>Helmut</first><last>Schmid</last></author>
       <pages>55–65</pages>
@@ -93,7 +93,7 @@
     </paper>
     <paper id="10">
       <title>Answer Sentence Retrieval by Matching Dependency Paths acquired from Question/Answer Sentence Pairs</title>
-      <author><first>Michael</first><last>Kaisser</last></author>
+      <author id="michael-kaisser"><first>Michael</first><last>Kaisser</last></author>
       <pages>88–98</pages>
       <url hash="d71ad7c6">E12-1010</url>
       <bibkey>kaisser-2012-answer</bibkey>
@@ -117,7 +117,7 @@
     </paper>
     <paper id="13">
       <title>Computing Lattice <fixed-case>BLEU</fixed-case> Oracle Scores for Machine Translation</title>
-      <author><first>Artem</first><last>Sokolov</last></author>
+      <author id="artem-sokolov"><first>Artem</first><last>Sokolov</last></author>
       <author><first>Guillaume</first><last>Wisniewski</last></author>
       <author><first>François</first><last>Yvon</last></author>
       <pages>120–129</pages>
@@ -137,7 +137,7 @@
     </paper>
     <paper id="15">
       <title>Character-Based Pivot Translation for Under-Resourced Languages and Domains</title>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>141–151</pages>
       <url hash="46c02aad">E12-1015</url>
       <bibkey>tiedemann-2012-character</bibkey>
@@ -145,10 +145,10 @@
     <paper id="16">
       <title>Does more data always yield better translations?</title>
       <author><first>Guillem</first><last>Gascó</last></author>
-      <author><first>Martha-Alicia</first><last>Rocha</last></author>
-      <author><first>Germán</first><last>Sanchis-Trilles</last></author>
+      <author id="martha-alicia-rocha"><first>Martha-Alicia</first><last>Rocha</last></author>
+      <author id="german-sanchis-trilles"><first>Germán</first><last>Sanchis-Trilles</last></author>
       <author><first>Jesús</first><last>Andrés-Ferrer</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <pages>152–161</pages>
       <url hash="d1be8b83">E12-1016</url>
       <bibkey>gasco-etal-2012-data</bibkey>
@@ -159,7 +159,7 @@
       <author><first>Nathan</first><last>Schneider</last></author>
       <author><first>Rishav</first><last>Bhowmick</last></author>
       <author><first>Kemal</first><last>Oflazer</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>162–173</pages>
       <url hash="6bf3efdc">E12-1017</url>
       <bibkey>mohit-etal-2012-recall</bibkey>
@@ -167,7 +167,7 @@
     <paper id="18">
       <title>Tree Representations in Probabilistic Models for Extended Named Entities Detection</title>
       <author><first>Marco</first><last>Dinarelli</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <pages>174–184</pages>
       <url hash="bc5c22c5">E12-1018</url>
       <bibkey>dinarelli-rosset-2012-tree</bibkey>
@@ -176,9 +176,9 @@
       <title>When Did that Happen? — Linking Events and Relations to Timestamps</title>
       <author><first>Dirk</first><last>Hovy</last></author>
       <author><first>James</first><last>Fan</last></author>
-      <author><first>Alfio</first><last>Gliozzo</last></author>
+      <author id="alfio-gliozzo"><first>Alfio</first><last>Gliozzo</last></author>
       <author><first>Siddharth</first><last>Patwardhan</last></author>
-      <author><first>Christopher</first><last>Welty</last></author>
+      <author id="chris-welty"><first>Christopher</first><last>Welty</last></author>
       <pages>185–193</pages>
       <url hash="5cecbe7c">E12-1019</url>
       <bibkey>hovy-etal-2012-happen</bibkey>
@@ -186,7 +186,7 @@
     <paper id="20">
       <title>Compensating for Annotation Errors in Training a Relation Extractor</title>
       <author><first>Bonan</first><last>Min</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <pages>194–203</pages>
       <url hash="71c591bf">E12-1020</url>
       <bibkey>min-grishman-2012-compensating</bibkey>
@@ -194,8 +194,8 @@
     <paper id="21">
       <title>Incorporating Lexical Priors into Topic Models</title>
       <author><first>Jagadeesh</first><last>Jagarlamudi</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
-      <author><first>Raghavendra</first><last>Udupa</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
+      <author id="raghavendra-udupa"><first>Raghavendra</first><last>Udupa</last></author>
       <pages>204–213</pages>
       <url hash="7d40336b">E12-1021</url>
       <bibkey>jagarlamudi-etal-2012-incorporating</bibkey>
@@ -220,18 +220,18 @@
     <paper id="24">
       <title>A Probabilistic Model of Syntactic and Semantic Acquisition from Child-Directed Utterances and their Meanings</title>
       <author><first>Tom</first><last>Kwiatkowski</last></author>
-      <author><first>Sharon</first><last>Goldwater</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>234–244</pages>
       <url hash="3492fd1e">E12-1024</url>
       <bibkey>kwiatkowski-etal-2012-probabilistic</bibkey>
     </paper>
     <paper id="25">
       <title>Active learning for interactive machine translation</title>
-      <author><first>Jesús</first><last>González-Rubio</last></author>
-      <author><first>Daniel</first><last>Ortiz-Martínez</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="jesus-gonzalez-rubio"><first>Jesús</first><last>González-Rubio</last></author>
+      <author id="daniel-ortiz-martinez"><first>Daniel</first><last>Ortiz-Martínez</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <pages>245–254</pages>
       <url hash="b6deea82">E12-1025</url>
       <bibkey>gonzalez-rubio-etal-2012-active</bibkey>
@@ -248,7 +248,7 @@
     <paper id="27">
       <title>Aspectual Type and Temporal Relation Classification</title>
       <author><first>Francisco</first><last>Costa</last></author>
-      <author><first>António</first><last>Branco</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
       <pages>266–275</pages>
       <url hash="0a077a61">E12-1027</url>
       <bibkey>costa-branco-2012-aspectual</bibkey>
@@ -256,7 +256,7 @@
     <paper id="28">
       <title>Automatic generation of short informative sentiment summaries</title>
       <author><first>Andrea</first><last>Glaser</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>276–285</pages>
       <url hash="14fcf59f">E12-1028</url>
       <bibkey>glaser-schutze-2012-automatic</bibkey>
@@ -264,7 +264,7 @@
     <paper id="29">
       <title>Bootstrapped Training of Event Extraction Classifiers</title>
       <author><first>Ruihong</first><last>Huang</last></author>
-      <author><first>Ellen</first><last>Riloff</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
       <pages>286–295</pages>
       <url hash="18b88539">E12-1029</url>
       <bibkey>huang-riloff-2012-bootstrapped</bibkey>
@@ -272,7 +272,7 @@
     <paper id="30">
       <title>Bootstrapping Events and Relations from Text</title>
       <author><first>Ting</first><last>Liu</last></author>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
       <pages>296–305</pages>
       <url hash="1ae58fec">E12-1030</url>
       <bibkey>liu-strzalkowski-2012-bootstrapping</bibkey>
@@ -280,7 +280,7 @@
     <paper id="31">
       <title><fixed-case>CL</fixed-case>ex: A Lexicon for Exploring Color, Concept and Emotion Associations in Language</title>
       <author><first>Svitlana</first><last>Volkova</last></author>
-      <author><first>William B.</first><last>Dolan</last></author>
+      <author id="william-b-dolan"><first>William B.</first><last>Dolan</last></author>
       <author><first>Theresa</first><last>Wilson</last></author>
       <pages>306–314</pages>
       <url hash="cfb6dc97">E12-1031</url>
@@ -307,7 +307,7 @@
       <author><first>Bram</first><last>Jans</last></author>
       <author><first>Steven</first><last>Bethard</last></author>
       <author><first>Ivan</first><last>Vulić</last></author>
-      <author><first>Marie Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie Francine</first><last>Moens</last></author>
       <pages>336–344</pages>
       <url hash="b2b98c97">E12-1034</url>
       <bibkey>jans-etal-2012-skip</bibkey>
@@ -346,7 +346,7 @@
     <paper id="39">
       <title><fixed-case>W</fixed-case>eb<fixed-case>CAG</fixed-case>e – A Web-Harvested Corpus Annotated with <fixed-case>G</fixed-case>erma<fixed-case>N</fixed-case>et Senses</title>
       <author><first>Verena</first><last>Henrich</last></author>
-      <author><first>Erhard</first><last>Hinrichs</last></author>
+      <author id="erhard-hinrichs"><first>Erhard</first><last>Hinrichs</last></author>
       <author><first>Tatiana</first><last>Vodolazova</last></author>
       <pages>387–396</pages>
       <url hash="e0b83a44">E12-1039</url>
@@ -362,7 +362,7 @@
     <paper id="41">
       <title>Lexical surprisal as a general predictor of reading time</title>
       <author><first>Irene</first><last>Fernandez Monsalve</last></author>
-      <author><first>Stefan L.</first><last>Frank</last></author>
+      <author id="stefan-l-frank"><first>Stefan L.</first><last>Frank</last></author>
       <author><first>Gabriella</first><last>Vigliocco</last></author>
       <pages>398–408</pages>
       <url hash="a52350b6">E12-1041</url>
@@ -381,7 +381,7 @@
     <paper id="43">
       <title>Combining Tree Structures, Flat Features and Patterns for Biomedical Relation Extraction</title>
       <author><first>Md. Faisal Mahbub</first><last>Chowdhury</last></author>
-      <author><first>Alberto</first><last>Lavelli</last></author>
+      <author id="alberto-lavelli"><first>Alberto</first><last>Lavelli</last></author>
       <pages>420–429</pages>
       <url hash="165f8a1f">E12-1043</url>
       <bibkey>chowdhury-lavelli-2012-combining</bibkey>
@@ -390,7 +390,7 @@
       <title>Coordination Structure Analysis using Dual Decomposition</title>
       <author><first>Atsushi</first><last>Hanamoto</last></author>
       <author><first>Takuya</first><last>Matsuzaki</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>430–438</pages>
       <url hash="5a3b3dee">E12-1044</url>
       <bibkey>hanamoto-etal-2012-coordination</bibkey>
@@ -406,7 +406,7 @@
     <paper id="46">
       <title>Detecting Highly Confident Word Translations from Comparable Corpora without Any Prior Knowledge</title>
       <author><first>Ivan</first><last>Vulić</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>449–459</pages>
       <url hash="4a5a7ab7">E12-1046</url>
       <bibkey>vulic-moens-2012-detecting</bibkey>
@@ -420,10 +420,10 @@
     </paper>
     <paper id="48">
       <title>Evaluating language understanding accuracy with respect to objective outcomes in a dialogue system</title>
-      <author><first>Myroslava O.</first><last>Dzikovska</last></author>
+      <author id="myroslava-o-dzikovska"><first>Myroslava O.</first><last>Dzikovska</last></author>
       <author><first>Peter</first><last>Bell</last></author>
       <author><first>Amy</first><last>Isard</last></author>
-      <author><first>Johanna D.</first><last>Moore</last></author>
+      <author id="johanna-d-moore"><first>Johanna D.</first><last>Moore</last></author>
       <pages>471–481</pages>
       <url hash="c09f9e56">E12-1048</url>
       <bibkey>dzikovska-etal-2012-evaluating</bibkey>
@@ -440,9 +440,9 @@
       <title>Feature-Rich Part-of-speech Tagging for Morphologically Complex Languages: Application to <fixed-case>B</fixed-case>ulgarian</title>
       <author><first>Georgi</first><last>Georgiev</last></author>
       <author><first>Valentin</first><last>Zhikov</last></author>
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <author><first>Petya</first><last>Osenova</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>492–502</pages>
       <url hash="e8efa64d">E12-1050</url>
       <bibkey>georgiev-etal-2012-feature</bibkey>
@@ -450,7 +450,7 @@
     <paper id="51">
       <title>Instance-Driven Attachment of Semantic Annotations over Conceptual Hierarchies</title>
       <author><first>Janara</first><last>Christensen</last></author>
-      <author><first>Marius</first><last>Paşca</last></author>
+      <author id="marius-pasca"><first>Marius</first><last>Paşca</last></author>
       <pages>503–513</pages>
       <url hash="b8e9fb1b">E12-1051</url>
       <bibkey>christensen-pasca-2012-instance</bibkey>
@@ -467,9 +467,9 @@
     </paper>
     <paper id="53">
       <title>Learning How to Conjugate the <fixed-case>R</fixed-case>omanian Verb. Rules for Regular and Partially Irregular Verbs</title>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <author><first>Vlad</first><last>Niculae</last></author>
-      <author><first>Octavia-Maria</first><last>Sulea</last></author>
+      <author id="octavia-maria-sulea"><first>Octavia-Maria</first><last>Sulea</last></author>
       <pages>524–528</pages>
       <url hash="1e24dd0c">E12-1053</url>
       <bibkey>dinu-etal-2012-learning</bibkey>
@@ -499,9 +499,9 @@
     <paper id="57">
       <title>The effect of domain and text type on text prediction quality</title>
       <author><first>Suzan</first><last>Verberne</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
-      <author><first>Helmer</first><last>Strik</last></author>
-      <author><first>Lou</first><last>Boves</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
+      <author id="helmer-strik"><first>Helmer</first><last>Strik</last></author>
+      <author id="lou-boves"><first>Lou</first><last>Boves</last></author>
       <pages>561–569</pages>
       <url hash="53e626ea">E12-1057</url>
       <bibkey>verberne-etal-2012-effect</bibkey>
@@ -531,16 +531,16 @@
       <title>Word Sense Induction for Novel Sense Detection</title>
       <author><first>Jey Han</first><last>Lau</last></author>
       <author><first>Paul</first><last>Cook</last></author>
-      <author><first>Diana</first><last>McCarthy</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
       <author><first>David</first><last>Newman</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>591–601</pages>
       <url hash="128e6a1a">E12-1060</url>
       <bibkey>lau-etal-2012-word</bibkey>
     </paper>
     <paper id="61">
       <title>Learning Language from Perceptual Context</title>
-      <author><first>Raymond</first><last>Mooney</last></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last></author>
       <pages>602</pages>
       <url hash="a9336830">E12-1061</url>
       <bibkey>mooney-2012-learning</bibkey>
@@ -548,14 +548,14 @@
     <paper id="62">
       <title>Learning for Microblogs with Distant Supervision: Political Forecasting with <fixed-case>T</fixed-case>witter</title>
       <author><first>Micol</first><last>Marchetti-Bowick</last></author>
-      <author><first>Nathanael</first><last>Chambers</last></author>
+      <author id="nathanael-chambers"><first>Nathanael</first><last>Chambers</last></author>
       <pages>603–612</pages>
       <url hash="afdef3b2">E12-1062</url>
       <bibkey>marchetti-bowick-chambers-2012-learning</bibkey>
     </paper>
     <paper id="63">
       <title>Learning from evolving data streams: online triage of bug reports</title>
-      <author><first>Grzegorz</first><last>Chrupala</last></author>
+      <author id="grzegorz-chrupala"><first>Grzegorz</first><last>Chrupala</last></author>
       <pages>613–622</pages>
       <url hash="00dbf547">E12-1063</url>
       <bibkey>chrupala-2012-learning</bibkey>
@@ -563,7 +563,7 @@
     <paper id="64">
       <title>Towards a model of formal and informal address in <fixed-case>E</fixed-case>nglish</title>
       <author><first>Manaal</first><last>Faruqui</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <pages>623–633</pages>
       <url hash="9c549661">E12-1064</url>
       <bibkey>faruqui-pado-2012-towards</bibkey>
@@ -593,7 +593,7 @@
     </paper>
     <paper id="68">
       <title>Modeling Inflection and Word-Formation in <fixed-case>SMT</fixed-case></title>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <author><first>Marion</first><last>Weller</last></author>
       <author><first>Aoife</first><last>Cahill</last></author>
       <author><first>Fabienne</first><last>Cap</last></author>
@@ -613,14 +613,14 @@
       <title>Framework of Semantic Role Assignment based on Extended Lexical Conceptual Structure: Comparison with <fixed-case>V</fixed-case>erb<fixed-case>N</fixed-case>et and <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et</title>
       <author><first>Yuichiroh</first><last>Matsubayashi</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>686–695</pages>
       <url hash="d1e04c35">E12-1070</url>
       <bibkey>matsubayashi-etal-2012-framework</bibkey>
     </paper>
     <paper id="71">
       <title>Unsupervised Detection of Downward-Entailing Operators By Maximizing Classification Certainty</title>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <author><first>Gerald</first><last>Penn</last></author>
       <pages>696–705</pages>
       <url hash="2f51a4c1">E12-1071</url>
@@ -630,7 +630,7 @@
       <title><fixed-case>E</fixed-case>lliphant: Improved Automatic Detection of Zero Subjects and Impersonal Constructions in <fixed-case>S</fixed-case>panish</title>
       <author><first>Luz</first><last>Rello</last></author>
       <author><first>Ricardo</first><last>Baeza-Yates</last></author>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <pages>706–715</pages>
       <url hash="3f60d126">E12-1072</url>
       <bibkey>rello-etal-2012-elliphant</bibkey>
@@ -647,7 +647,7 @@
     <paper id="74">
       <title>Determining the placement of <fixed-case>G</fixed-case>erman verbs in <fixed-case>E</fixed-case>nglish–to–<fixed-case>G</fixed-case>erman <fixed-case>SMT</fixed-case></title>
       <author><first>Anita</first><last>Gojun</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>726–735</pages>
       <url hash="e71ac0a1">E12-1074</url>
       <bibkey>gojun-fraser-2012-determining</bibkey>
@@ -671,9 +671,9 @@
       <author><first>Karl</first><last>Stratos</last></author>
       <author><first>Xufeng</first><last>Han</last></author>
       <author><first>Alyssa</first><last>Mensch</last></author>
-      <author><first>Alex</first><last>Berg</last></author>
-      <author><first>Tamara</first><last>Berg</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="alexander-berg"><first>Alex</first><last>Berg</last></author>
+      <author id="tamara-berg"><first>Tamara</first><last>Berg</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <pages>747–756</pages>
       <url hash="ac1a7260">E12-1076</url>
       <bibkey>mitchell-etal-2012-midge</bibkey>
@@ -688,7 +688,7 @@
     </paper>
     <paper id="78">
       <title>To what extent does sentence-internal realisation reflect discourse context? A study on word order</title>
-      <author><first>Sina</first><last>Zarrieß</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
       <author><first>Aoife</first><last>Cahill</last></author>
       <author><first>Jonas</first><last>Kuhn</last></author>
       <pages>767–776</pages>
@@ -710,7 +710,7 @@
       <author><first>John</first><last>McDonough</last></author>
       <author><first>Gahgene</first><last>Gweon</last></author>
       <author><first>Bhiksha</first><last>Raj</last></author>
-      <author><first>Carolyn</first><last>Penstein Rosé</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Penstein Rosé</last></author>
       <pages>787–797</pages>
       <url hash="85f84329">E12-1080</url>
       <bibkey>jain-etal-2012-unsupervised</bibkey>
@@ -735,7 +735,7 @@
     </paper>
     <paper id="83">
       <title>Structural and Topical Dimensions in Multi-Task Patent Translation</title>
-      <author><first>Katharina</first><last>Waeschle</last></author>
+      <author id="katharina-waschle"><first>Katharina</first><last>Waeschle</last></author>
       <author><first>Stefan</first><last>Riezler</last></author>
       <pages>818–828</pages>
       <url hash="3e9db535">E12-1083</url>
@@ -762,7 +762,7 @@
     <meta>
       <booktitle>Proceedings of the Demonstrations at the 13th Conference of the <fixed-case>E</fixed-case>uropean Chapter of the Association for Computational Linguistics</booktitle>
       <url hash="768cefe8">E12-2</url>
-      <editor><first>Frédérique</first><last>Segond</last></editor>
+      <editor id="frederique-segond"><first>Frédérique</first><last>Segond</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Avignon, France</address>
       <month>April</month>
@@ -777,7 +777,7 @@
       <title>Language Resources Factory: case study on the acquisition of Translation Memories</title>
       <author><first>Marc</first><last>Poch</last></author>
       <author><first>Antonio</first><last>Toral</last></author>
-      <author><first>Núria</first><last>Bel</last></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last></author>
       <pages>1–5</pages>
       <url hash="a9eb3198">E12-2001</url>
       <bibkey>poch-etal-2012-language</bibkey>
@@ -785,7 +785,7 @@
     <paper id="2">
       <title>Harnessing <fixed-case>NLP</fixed-case> Techniques in the Processes of Multilingual Content Management</title>
       <author><first>Anelia</first><last>Belogay</last></author>
-      <author><first>Diman</first><last>Karagyozov</last></author>
+      <author id="diman-karagyozov"><first>Diman</first><last>Karagyozov</last></author>
       <author><first>Svetla</first><last>Koeva</last></author>
       <author><first>Cristina</first><last>Vertan</last></author>
       <author><first>Adam</first><last>Przepiórkowski</last></author>
@@ -799,23 +799,23 @@
       <title>Collaborative Machine Translation Service for Scientific texts</title>
       <author><first>Patrik</first><last>Lambert</last></author>
       <author><first>Jean</first><last>Senellart</last></author>
-      <author><first>Laurent</first><last>Romary</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
       <author><first>Holger</first><last>Schwenk</last></author>
       <author><first>Florian</first><last>Zipser</last></author>
       <author><first>Patrice</first><last>Lopez</last></author>
-      <author><first>Frédéric</first><last>Blain</last></author>
+      <author id="frederic-blain"><first>Frédéric</first><last>Blain</last></author>
       <pages>11–15</pages>
       <url hash="6ab2fc20">E12-2003</url>
       <bibkey>lambert-etal-2012-collaborative</bibkey>
     </paper>
     <paper id="4">
       <title><fixed-case>T</fixed-case>rans<fixed-case>A</fixed-case>head: A Writing Assistant for <fixed-case>CAT</fixed-case> and <fixed-case>CALL</fixed-case></title>
-      <author><first>Chung-chi</first><last>Huang</last></author>
-      <author><first>Ping-che</first><last>Yang</last></author>
-      <author><first>Mei-hua</first><last>Chen</last></author>
-      <author><first>Hung-ting</first><last>Hsieh</last></author>
-      <author><first>Ting-hui</first><last>Kao</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="chung-chi-huang"><first>Chung-chi</first><last>Huang</last></author>
+      <author id="ping-che-yang"><first>Ping-che</first><last>Yang</last></author>
+      <author id="mei-hua-chen"><first>Mei-hua</first><last>Chen</last></author>
+      <author id="hung-ting-hsieh"><first>Hung-ting</first><last>Hsieh</last></author>
+      <author id="ting-hui-kao"><first>Ting-hui</first><last>Kao</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>16–19</pages>
       <url hash="ecd1c075">E12-2004</url>
       <bibkey>huang-etal-2012-transahead</bibkey>
@@ -826,7 +826,7 @@
       <author><first>Henri</first><last>Leisma</last></author>
       <author><first>Monika</first><last>Machunik</last></author>
       <author><first>Tuomo</first><last>Kakkonen</last></author>
-      <author><first>Jean-Luc</first><last>LeBrun</last></author>
+      <author id="jean-luc-lebrun"><first>Jean-Luc</first><last>LeBrun</last></author>
       <pages>20–24</pages>
       <url hash="94ef4d93">E12-2005</url>
       <bibkey>kinnunen-etal-2012-swan</bibkey>
@@ -839,7 +839,7 @@
       <author><first>Brett</first><last>Crawley</last></author>
       <author><first>Stefano</first><last>Bucci</last></author>
       <author><first>Ralf</first><last>Steinberger</last></author>
-      <author><first>Erik</first><last>Van der Goot</last></author>
+      <author id="erik-van-der-goot"><first>Erik</first><last>Van der Goot</last></author>
       <pages>25–30</pages>
       <url hash="9da42bdd">E12-2006</url>
       <bibkey>turchi-etal-2012-onts</bibkey>
@@ -855,7 +855,7 @@
     </paper>
     <paper id="8">
       <title><fixed-case>F</fixed-case>olheador: browsing through <fixed-case>P</fixed-case>ortuguese semantic relations</title>
-      <author><first>Hugo</first><last>Gonçalo Oliveira</last></author>
+      <author id="hugo-goncalo-oliveira"><first>Hugo</first><last>Gonçalo Oliveira</last></author>
       <author><first>Hernani</first><last>Costa</last></author>
       <author><first>Diana</first><last>Santos</last></author>
       <pages>35–40</pages>
@@ -866,14 +866,14 @@
       <title>A Computer Assisted Speech Transcription System</title>
       <author><first>Alejandro</first><last>Revuelta-Martínez</last></author>
       <author><first>Luis</first><last>Rodríguez</last></author>
-      <author><first>Ismael</first><last>García-Varea</last></author>
+      <author id="ismael-garcia-varea"><first>Ismael</first><last>García-Varea</last></author>
       <pages>41–45</pages>
       <url hash="be8f4196">E12-2009</url>
       <bibkey>revuelta-martinez-etal-2012-computer</bibkey>
     </paper>
     <paper id="10">
       <title>A Statistical Spoken Dialogue System using Complex User Goals and Value Directed Compression</title>
-      <author><first>Paul A.</first><last>Crook</last></author>
+      <author id="paul-a-crook"><first>Paul A.</first><last>Crook</last></author>
       <author><first>Zhuoran</first><last>Wang</last></author>
       <author><first>Xingkun</first><last>Liu</last></author>
       <author><first>Oliver</first><last>Lemon</last></author>
@@ -909,7 +909,7 @@
     </paper>
     <paper id="14">
       <title>A Support Platform for Event Detection using Social Intelligence</title>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Paul</first><last>Cook</last></author>
       <author><first>Bo</first><last>Han</last></author>
       <author><first>Aaron</first><last>Harwood</last></author>
@@ -922,7 +922,7 @@
     <paper id="15">
       <title><fixed-case>NERD</fixed-case>: A Framework for Unifying Named Entity Recognition and Disambiguation Extraction Tools</title>
       <author><first>Giuseppe</first><last>Rizzo</last></author>
-      <author><first>Raphaël</first><last>Troncy</last></author>
+      <author id="raphael-troncy"><first>Raphaël</first><last>Troncy</last></author>
       <pages>73–76</pages>
       <url hash="744d60c8">E12-2015</url>
       <bibkey>rizzo-troncy-2012-nerd</bibkey>
@@ -951,7 +951,7 @@
       <author><first>Marco</first><last>Trevisan</last></author>
       <author><first>Eduard</first><last>Barbu</last></author>
       <author><first>Igor</first><last>Barsanti</last></author>
-      <author><first>Luca</first><last>Dini</last></author>
+      <author id="luca-dini"><first>Luca</first><last>Dini</last></author>
       <author><first>Nikolaos</first><last>Lagos</last></author>
       <author><first>Frédérique</first><last>Segond</last></author>
       <author><first>Mathieu</first><last>Rhulmann</last></author>
@@ -965,7 +965,7 @@
       <author><first>Valerio</first><last>Basile</last></author>
       <author><first>Johan</first><last>Bos</last></author>
       <author><first>Kilian</first><last>Evang</last></author>
-      <author><first>Noortje</first><last>Venhuizen</last></author>
+      <author id="noortje-venhuizen"><first>Noortje</first><last>Venhuizen</last></author>
       <pages>92–96</pages>
       <url hash="4435225c">E12-2019</url>
       <bibkey>basile-etal-2012-platform</bibkey>
@@ -985,7 +985,7 @@
       <author><first>Goran</first><last>Topić</last></author>
       <author><first>Tomoko</first><last>Ohta</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>102–107</pages>
       <url hash="cd9f82b2">E12-2021</url>
       <bibkey>stenetorp-etal-2012-brat</bibkey>
@@ -1024,7 +1024,7 @@
     </paper>
     <paper id="3">
       <title>A Comparative Study of Reinforcement Learning Techniques on Dialogue Management</title>
-      <author><first>Alexandros</first><last>Papangelis</last></author>
+      <author id="alexandros-papangelis"><first>Alexandros</first><last>Papangelis</last></author>
       <pages>22–31</pages>
       <url hash="cf49fd85">E12-3003</url>
       <bibkey>papangelis-2012-comparative</bibkey>
@@ -1039,7 +1039,7 @@
     <paper id="5">
       <title>What’s in a Name? Entity Type Variation across Two Biomedical Subdomains</title>
       <author><first>Claudiu</first><last>Mihăilă</last></author>
-      <author><first>Riza Theresa</first><last>Batista-Navarro</last></author>
+      <author id="riza-theresa-batista-navarro"><first>Riza Theresa</first><last>Batista-Navarro</last></author>
       <pages>38–45</pages>
       <url hash="a1606754">E12-3005</url>
       <bibkey>mihaila-batista-navarro-2012-whats</bibkey>
diff --git a/data/xml/E14.xml b/data/xml/E14.xml
index f91513ac6a..3ee4a37017 100644
--- a/data/xml/E14.xml
+++ b/data/xml/E14.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the 14th Conference of the <fixed-case>E</fixed-case>uropean Chapter of the Association for Computational Linguistics</booktitle>
       <url hash="11dc9f36">E14-1</url>
       <editor><first>Shuly</first><last>Wintner</last></editor>
-      <editor><first>Sharon</first><last>Goldwater</last></editor>
+      <editor id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></editor>
       <editor><first>Stefan</first><last>Riezler</last></editor>
       <doi>10.3115/v1/E14-1</doi>
       <publisher>Association for Computational Linguistics</publisher>
@@ -30,7 +30,7 @@
     <paper id="2">
       <title>Undirected Machine Translation with Discriminative Reinforcement Learning</title>
       <author><first>Andrea</first><last>Gesmundo</last></author>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <pages>10–19</pages>
       <url hash="84496d94">E14-1002</url>
       <doi>10.3115/v1/E14-1002</doi>
@@ -51,7 +51,7 @@
       <title>Maximizing Component Quality in Bilingual Word-Aligned Segmentations</title>
       <author><first>Spyros</first><last>Martzoukos</last></author>
       <author><first>Christof</first><last>Monz</last></author>
-      <author><first>Christophe</first><last>Costa Florêncio</last></author>
+      <author id="christophe-costa-florencio"><first>Christophe</first><last>Costa Florêncio</last></author>
       <pages>30–38</pages>
       <url hash="8b735957">E14-1004</url>
       <doi>10.3115/v1/E14-1004</doi>
@@ -60,8 +60,8 @@
     <paper id="5">
       <title>A Joint Model for Quotation Attribution and Coreference Resolution</title>
       <author><first>Mariana S. C.</first><last>Almeida</last></author>
-      <author><first>Miguel B.</first><last>Almeida</last></author>
-      <author><first>André F. T.</first><last>Martins</last></author>
+      <author id="miguel-b-almeida"><first>Miguel B.</first><last>Almeida</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
       <pages>39–48</pages>
       <url hash="4db6e7fd">E14-1005</url>
       <doi>10.3115/v1/E14-1005</doi>
@@ -80,9 +80,9 @@
     <paper id="7">
       <title>Inducing Example-based Semantic Frames from a Massive Amount of Verb Uses</title>
       <author><first>Daisuke</first><last>Kawahara</last></author>
-      <author><first>Daniel</first><last>Peterson</last></author>
+      <author id="daniel-peterson"><first>Daniel</first><last>Peterson</last></author>
       <author><first>Octavian</first><last>Popescu</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>58–67</pages>
       <url hash="1a5f15eb">E14-1007</url>
       <doi>10.3115/v1/E14-1007</doi>
@@ -110,8 +110,8 @@
     <paper id="10">
       <title>Simple, Robust and (almost) Unsupervised Generation of Polarity Lexicons for Multiple Languages</title>
       <author><first>Iñaki</first><last>San Vicente</last></author>
-      <author><first>Rodrigo</first><last>Agerri</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="rodrigo-agerri"><first>Rodrigo</first><last>Agerri</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <pages>88–97</pages>
       <url hash="9d4bb2b1">E14-1010</url>
       <doi>10.3115/v1/E14-1010</doi>
@@ -131,7 +131,7 @@
       <author><first>A. Seza</first><last>Doğruöz</last></author>
       <author><first>Phani</first><last>Gadde</last></author>
       <author><first>David</first><last>Adamson</last></author>
-      <author><first>Carolyn</first><last>Rosé</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rosé</last></author>
       <pages>107–115</pages>
       <url hash="b828624a">E14-1012</url>
       <doi>10.3115/v1/E14-1012</doi>
@@ -140,7 +140,7 @@
     <paper id="13">
       <title>Modelling the Lexicon in Unsupervised Part of Speech Induction</title>
       <author><first>Gregory</first><last>Dubbin</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
       <pages>116–125</pages>
       <url hash="f2a178c8">E14-1013</url>
       <doi>10.3115/v1/E14-1013</doi>
@@ -151,7 +151,7 @@
       <author><first>Tejaswini</first><last>Deoskar</last></author>
       <author><first>Christos</first><last>Christodoulopoulos</last></author>
       <author><first>Alexandra</first><last>Birch</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>126–134</pages>
       <url hash="7755dbf7">E14-1014</url>
       <doi>10.3115/v1/E14-1014</doi>
@@ -160,7 +160,7 @@
     <paper id="15">
       <title>Special Techniques for Constituent Parsing of Morphologically Rich Languages</title>
       <author><first>Zsolt</first><last>Szántó</last></author>
-      <author><first>Richárd</first><last>Farkas</last></author>
+      <author id="richard-farkas"><first>Richárd</first><last>Farkas</last></author>
       <pages>135–144</pages>
       <url hash="bb2af691">E14-1015</url>
       <doi>10.3115/v1/E14-1015</doi>
@@ -169,7 +169,7 @@
     <paper id="16">
       <title>Leveraging Verb-Argument Structures to Infer Semantic Relations</title>
       <author><first>Eduardo</first><last>Blanco</last></author>
-      <author><first>Dan</first><last>Moldovan</last></author>
+      <author id="dan-moldovan"><first>Dan</first><last>Moldovan</last></author>
       <pages>145–154</pages>
       <url hash="9e75f53b">E14-1016</url>
       <doi>10.3115/v1/E14-1016</doi>
@@ -178,7 +178,7 @@
     <paper id="17">
       <title>Structured and Unstructured Cache Models for <fixed-case>SMT</fixed-case> Domain Adaptation</title>
       <author><first>Annie</first><last>Louis</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <pages>155–163</pages>
       <url hash="a1376db6">E14-1017</url>
       <doi>10.3115/v1/E14-1017</doi>
@@ -239,7 +239,7 @@
       <author><first>Sriramkumar</first><last>Balasubramanian</last></author>
       <author><first>Anup</first><last>Kotalwar</last></author>
       <author><first>Jiehan</first><last>Zheng</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>211–219</pages>
       <url hash="dd379ec0">E14-1023</url>
       <doi>10.3115/v1/E14-1023</doi>
@@ -248,7 +248,7 @@
     <paper id="24">
       <title>Statistical Script Learning with Multi-Argument Events</title>
       <author><first>Karl</first><last>Pichotta</last></author>
-      <author><first>Raymond</first><last>Mooney</last></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last></author>
       <pages>220–229</pages>
       <url hash="db57dfd1">E14-1024</url>
       <doi>10.3115/v1/E14-1024</doi>
@@ -266,7 +266,7 @@
     <paper id="26">
       <title>Source-side Preordering for Translation using Logistic Regression and Depth-first Branch-and-Bound Search</title>
       <author><first>Laura</first><last>Jehl</last></author>
-      <author><first>Adrià</first><last>de Gispert</last></author>
+      <author id="adria-de-gispert"><first>Adrià</first><last>de Gispert</last></author>
       <author><first>Mark</first><last>Hopkins</last></author>
       <author id="bill-byrne"><first>Bill</first><last>Byrne</last></author>
       <pages>239–248</pages>
@@ -285,7 +285,7 @@
     </paper>
     <paper id="28">
       <title>Word Ordering with Phrase-Based Grammars</title>
-      <author><first>Adrià</first><last>de Gispert</last></author>
+      <author id="adria-de-gispert"><first>Adrià</first><last>de Gispert</last></author>
       <author><first>Marcus</first><last>Tomalin</last></author>
       <author id="bill-byrne"><first>Bill</first><last>Byrne</last></author>
       <pages>259–268</pages>
@@ -296,8 +296,8 @@
     <paper id="29">
       <title>Iterative Constrained Clustering for Subjectivity Word Sense Disambiguation</title>
       <author><first>Cem</first><last>Akkaya</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>269–278</pages>
       <url hash="54ae97ee">E14-1029</url>
       <doi>10.3115/v1/E14-1029</doi>
@@ -306,7 +306,7 @@
     <paper id="30">
       <title>Identifying fake <fixed-case>A</fixed-case>mazon reviews as learning from crowds</title>
       <author><first>Tommaso</first><last>Fornaciari</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>279–287</pages>
       <url hash="e2a22ec9">E14-1030</url>
       <doi>10.3115/v1/E14-1030</doi>
@@ -315,7 +315,7 @@
     <paper id="31">
       <title>Assessing the relative reading level of sentence pairs for text simplification</title>
       <author><first>Sowmya</first><last>Vajjala</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <pages>288–297</pages>
       <url hash="663705d0">E14-1031</url>
       <doi>10.3115/v1/E14-1031</doi>
@@ -343,7 +343,7 @@
     <paper id="34">
       <title>Using idiolects and sociolects to improve word prediction</title>
       <author><first>Wessel</first><last>Stoop</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <pages>318–327</pages>
       <url hash="3bb40d85">E14-1034</url>
       <doi>10.3115/v1/E14-1034</doi>
@@ -352,7 +352,7 @@
     <paper id="35">
       <title>Dynamic Topic Adaptation for Phrase-based <fixed-case>MT</fixed-case></title>
       <author><first>Eva</first><last>Hasler</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
       <author><first>Philipp</first><last>Koehn</last></author>
       <author><first>Barry</first><last>Haddow</last></author>
       <pages>328–337</pages>
@@ -391,7 +391,7 @@
     <paper id="39">
       <title>Fast Statistical Parsing with Parallel Multiple Context-Free Grammars</title>
       <author><first>Krasimir</first><last>Angelov</last></author>
-      <author><first>Peter</first><last>Ljunglöf</last></author>
+      <author id="peter-ljunglof"><first>Peter</first><last>Ljunglöf</last></author>
       <pages>368–376</pages>
       <url hash="a54d1c92">E14-1039</url>
       <doi>10.3115/v1/E14-1039</doi>
@@ -400,7 +400,7 @@
     <paper id="40">
       <title>Sentiment Propagation via Implicature Constraints</title>
       <author><first>Lingjia</first><last>Deng</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <pages>377–385</pages>
       <url hash="a0675867">E14-1040</url>
       <doi>10.3115/v1/E14-1040</doi>
@@ -408,7 +408,7 @@
     </paper>
     <paper id="41">
       <title>Acquisition of Noncontiguous Class Attributes from Web Search Queries</title>
-      <author><first>Marius</first><last>Paşca</last></author>
+      <author id="marius-pasca"><first>Marius</first><last>Paşca</last></author>
       <pages>386–394</pages>
       <url hash="9e038b7c">E14-1041</url>
       <doi>10.3115/v1/E14-1041</doi>
@@ -417,8 +417,8 @@
     <paper id="42">
       <title>Learning from Post-Editing: Online Model Adaptation for Statistical Machine Translation</title>
       <author><first>Michael</first><last>Denkowski</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <pages>395–404</pages>
       <url hash="0b23ed82">E14-1042</url>
       <doi>10.3115/v1/E14-1042</doi>
@@ -428,8 +428,8 @@
       <title>Predicting and Characterising User Impact on <fixed-case>T</fixed-case>witter</title>
       <author><first>Vasileios</first><last>Lampos</last></author>
       <author><first>Nikolaos</first><last>Aletras</last></author>
-      <author><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="daniel-preotiuc-pietro"><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>405–413</pages>
       <url hash="8a23f212">E14-1043</url>
       <doi>10.3115/v1/E14-1043</doi>
@@ -458,8 +458,8 @@
     <paper id="46">
       <title>Improving the Lexical Function Composition Model with Pathwise Optimized Elastic-Net Regression</title>
       <author><first>Jiming</first><last>Li</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
-      <author><first>Georgiana</first><last>Dinu</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
+      <author id="georgiana-dinu"><first>Georgiana</first><last>Dinu</last></author>
       <pages>434–442</pages>
       <url hash="0009df29">E14-1046</url>
       <doi>10.3115/v1/E14-1046</doi>
@@ -468,7 +468,7 @@
     <paper id="47">
       <title>Is Machine Translation Getting Better over Time?</title>
       <author><first>Yvette</first><last>Graham</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Alistair</first><last>Moffat</last></author>
       <author><first>Justin</first><last>Zobel</last></author>
       <pages>443–451</pages>
@@ -479,7 +479,7 @@
     <paper id="48">
       <title>Learning Dictionaries for Named Entity Recognition using Minimal Supervision</title>
       <author><first>Arvind</first><last>Neelakantan</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <pages>452–461</pages>
       <url hash="15a331cc">E14-1048</url>
       <doi>10.3115/v1/E14-1048</doi>
@@ -488,7 +488,7 @@
     <paper id="49">
       <title>Improving Vector Space Word Representations Using Multilingual Correlation</title>
       <author><first>Manaal</first><last>Faruqui</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <pages>462–471</pages>
       <url hash="c8b8a8a8">E14-1049</url>
       <doi>10.3115/v1/E14-1049</doi>
@@ -498,7 +498,7 @@
       <title>Using Distributional Similarity of Multi-way Translations to Predict Multiword Expression Compositionality</title>
       <author><first>Bahar</first><last>Salehi</last></author>
       <author><first>Paul</first><last>Cook</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>472–481</pages>
       <url hash="a79925da">E14-1050</url>
       <doi>10.3115/v1/E14-1050</doi>
@@ -553,7 +553,7 @@
       <title>Machine Reading Tea Leaves: Automatically Evaluating Topic Coherence and Topic Model Quality</title>
       <author><first>Jey Han</first><last>Lau</last></author>
       <author><first>David</first><last>Newman</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>530–539</pages>
       <url hash="d7f09a77">E14-1056</url>
       <doi>10.3115/v1/E14-1056</doi>
@@ -563,7 +563,7 @@
       <title>What Substitutes Tell Us - Analysis of an “All-Words” Lexical Substitution Corpus</title>
       <author><first>Gerhard</first><last>Kremer</last></author>
       <author><first>Katrin</first><last>Erk</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <author><first>Stefan</first><last>Thater</last></author>
       <pages>540–549</pages>
       <url hash="76acc630">E14-1057</url>
@@ -573,8 +573,8 @@
     <paper id="58">
       <title>Weighted <fixed-case>K</fixed-case>rippendorff’s alpha is a more reliable metrics for multi-coders ordinal annotations: experimental studies on emotion, opinion and coreference annotation</title>
       <author><first>Jean-Yves</first><last>Antoine</last></author>
-      <author><first>Jeanne</first><last>Villaneau</last></author>
-      <author><first>Anaïs</first><last>Lefeuvre</last></author>
+      <author id="jeanne-villaneau"><first>Jeanne</first><last>Villaneau</last></author>
+      <author id="anais-lefeuvre"><first>Anaïs</first><last>Lefeuvre</last></author>
       <pages>550–559</pages>
       <url hash="604f8436">E14-1058</url>
       <doi>10.3115/v1/E14-1058</doi>
@@ -603,7 +603,7 @@
     <paper id="61">
       <title>How to Produce Unseen Teddy Bears: Improved Morphological Processing of Compounds in <fixed-case>SMT</fixed-case></title>
       <author><first>Fabienne</first><last>Cap</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <author><first>Marion</first><last>Weller</last></author>
       <author><first>Aoife</first><last>Cahill</last></author>
       <pages>579–587</pages>
@@ -625,7 +625,7 @@
     <paper id="63">
       <title>Applying the semantics of negation to <fixed-case>SMT</fixed-case> through n-best list re-ranking</title>
       <author><first>Federico</first><last>Fancellu</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <pages>598–606</pages>
       <url hash="2e83bfe0">E14-1063</url>
       <doi>10.3115/v1/E14-1063</doi>
@@ -644,7 +644,7 @@
       <title>Augmenting Translation Models with Simulated Acoustic Confusions for Improved Spoken Language Translation</title>
       <author><first>Yulia</first><last>Tsvetkov</last></author>
       <author><first>Florian</first><last>Metze</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <pages>616–625</pages>
       <url hash="dbf3c8d4">E14-1065</url>
       <doi>10.3115/v1/E14-1065</doi>
@@ -655,7 +655,7 @@
       <author><first>Aciel</first><last>Eshky</last></author>
       <author><first>Ben</first><last>Allison</last></author>
       <author><first>Subramanian</first><last>Ramamoorthy</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>626–635</pages>
       <url hash="d989d848">E14-1066</url>
       <doi>10.3115/v1/E14-1066</doi>
@@ -682,7 +682,7 @@
     <paper id="69">
       <title>“<fixed-case>I</fixed-case> Object!” Modeling Latent Pragmatic Effects in Courtroom Dialogues</title>
       <author><first>Dan</first><last>Goldwasser</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <pages>655–663</pages>
       <url hash="d10e3e41">E14-1069</url>
       <doi>10.3115/v1/E14-1069</doi>
@@ -731,7 +731,7 @@
       <title>Cluster-based Prediction of User Ratings for Stylistic Surface Realisation</title>
       <author><first>Nina</first><last>Dethlefs</last></author>
       <author><first>Heriberto</first><last>Cuayáhuitl</last></author>
-      <author><first>Helen</first><last>Hastie</last></author>
+      <author id="helen-hastie"><first>Helen</first><last>Hastie</last></author>
       <author><first>Verena</first><last>Rieser</last></author>
       <author><first>Oliver</first><last>Lemon</last></author>
       <pages>702–711</pages>
@@ -751,7 +751,7 @@
     <paper id="76">
       <title>Hybrid text simplification using synchronous dependency grammars with hand-written and automatically harvested rules</title>
       <author><first>Advaith</first><last>Siddharthan</last></author>
-      <author><first>Angrosh</first><last>Mandya</last></author>
+      <author id="angrosh-mandya"><first>Angrosh</first><last>Mandya</last></author>
       <pages>722–731</pages>
       <url hash="28dc515f">E14-1076</url>
       <doi>10.3115/v1/E14-1076</doi>
@@ -768,9 +768,9 @@
     </paper>
     <paper id="78">
       <title>Learning part-of-speech taggers with inter-annotator agreement loss</title>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <author><first>Dirk</first><last>Hovy</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>742–751</pages>
       <url hash="5d4ff563">E14-1078</url>
       <doi>10.3115/v1/E14-1078</doi>
@@ -797,7 +797,7 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>ITU</fixed-case> <fixed-case>T</fixed-case>urkish <fixed-case>NLP</fixed-case> Web Service</title>
-      <author><first>Gülşen</first><last>Eryiğit</last></author>
+      <author id="gulsen-eryigit"><first>Gülşen</first><last>Eryiğit</last></author>
       <pages>1–4</pages>
       <url hash="ded05c04">E14-2001</url>
       <doi>10.3115/v1/E14-2001</doi>
@@ -805,9 +805,9 @@
     </paper>
     <paper id="2">
       <title>Multilingual, Efficient and Easy <fixed-case>NLP</fixed-case> Processing with <fixed-case>IXA</fixed-case> Pipeline</title>
-      <author><first>Rodrigo</first><last>Agerri</last></author>
+      <author id="rodrigo-agerri"><first>Rodrigo</first><last>Agerri</last></author>
       <author><first>Josu</first><last>Bermudez</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <pages>5–8</pages>
       <url hash="6d935da1">E14-2002</url>
       <doi>10.3115/v1/E14-2002</doi>
@@ -816,12 +816,12 @@
     <paper id="3">
       <title><fixed-case>XL</fixed-case>ike Project Language Analysis Services</title>
       <author><first>Xavier</first><last>Carreras</last></author>
-      <author><first>Lluís</first><last>Padró</last></author>
+      <author id="lluis-padro"><first>Lluís</first><last>Padró</last></author>
       <author><first>Lei</first><last>Zhang</last></author>
       <author><first>Achim</first><last>Rettinger</last></author>
       <author><first>Zhixing</first><last>Li</last></author>
       <author><first>Esteban</first><last>García-Cuesta</last></author>
-      <author><first>Željko</first><last>Agić</last></author>
+      <author id="zeljko-agic"><first>Željko</first><last>Agić</last></author>
       <author><first>Božo</first><last>Bekavac</last></author>
       <author><first>Blaz</first><last>Fortuna</last></author>
       <author><first>Tadej</first><last>Štajner</last></author>
@@ -863,20 +863,20 @@
     </paper>
     <paper id="7">
       <title><fixed-case>CASMACAT</fixed-case>: A Computer-assisted Translation Workbench</title>
-      <author><first>Vicent</first><last>Alabau</last></author>
+      <author id="vicent-alabau"><first>Vicent</first><last>Alabau</last></author>
       <author><first>Christian</first><last>Buck</last></author>
       <author><first>Michael</first><last>Carl</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
-      <author><first>Mercedes</first><last>García-Martínez</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="mercedes-garcia-martinez"><first>Mercedes</first><last>García-Martínez</last></author>
       <author><first>Ulrich</first><last>Germann</last></author>
-      <author><first>Jesús</first><last>González-Rubio</last></author>
-      <author><first>Robin</first><last>Hill</last></author>
+      <author id="jesus-gonzalez-rubio"><first>Jesús</first><last>González-Rubio</last></author>
+      <author id="robin-l-hill"><first>Robin</first><last>Hill</last></author>
       <author><first>Philipp</first><last>Koehn</last></author>
-      <author><first>Luis</first><last>Leiva</last></author>
+      <author id="luis-a-leiva"><first>Luis</first><last>Leiva</last></author>
       <author><first>Bartolomé</first><last>Mesa-Lao</last></author>
-      <author><first>Daniel</first><last>Ortiz-Martínez</last></author>
-      <author><first>Herve</first><last>Saint-Amand</last></author>
-      <author><first>Germán</first><last>Sanchis Trilles</last></author>
+      <author id="daniel-ortiz-martinez"><first>Daniel</first><last>Ortiz-Martínez</last></author>
+      <author id="herve-saint-amand"><first>Herve</first><last>Saint-Amand</last></author>
+      <author id="german-sanchis-trilles"><first>Germán</first><last>Sanchis Trilles</last></author>
       <author><first>Chara</first><last>Tsoukala</last></author>
       <pages>25–28</pages>
       <url hash="96b3fcfa">E14-2007</url>
@@ -887,7 +887,7 @@
       <title><fixed-case>J</fixed-case>ane: Open Source Machine Translation System Combination</title>
       <author><first>Markus</first><last>Freitag</last></author>
       <author><first>Matthias</first><last>Huck</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>29–32</pages>
       <url hash="f5ced058">E14-2008</url>
       <doi>10.3115/v1/E14-2008</doi>
@@ -896,8 +896,8 @@
     <paper id="9">
       <title><fixed-case>CHISPA</fixed-case> on the <fixed-case>GO</fixed-case>: A mobile <fixed-case>C</fixed-case>hinese-<fixed-case>S</fixed-case>panish translation service for travellers in trouble</title>
       <author><first>Jordi</first><last>Centelles</last></author>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
-      <author><first>Rafael E.</first><last>Banchs</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="rafael-e-banchs"><first>Rafael E.</first><last>Banchs</last></author>
       <pages>33–36</pages>
       <url hash="cddb7516">E14-2009</url>
       <doi>10.3115/v1/E14-2009</doi>
@@ -925,8 +925,8 @@
     </paper>
     <paper id="12">
       <title>The New Thot Toolkit for Fully-Automatic and Interactive Statistical Machine Translation</title>
-      <author><first>Daniel</first><last>Ortiz-Martínez</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="daniel-ortiz-martinez"><first>Daniel</first><last>Ortiz-Martínez</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <pages>45–48</pages>
       <url hash="75360b82">E14-2012</url>
       <doi>10.3115/v1/E14-2012</doi>
@@ -946,9 +946,9 @@
       <title>Finding Terms in Corpora for Many Languages with the <fixed-case>S</fixed-case>ketch <fixed-case>E</fixed-case>ngine</title>
       <author><first>Miloš</first><last>Jakubíček</last></author>
       <author><first>Adam</first><last>Kilgarriff</last></author>
-      <author><first>Vojtěch</first><last>Kovář</last></author>
-      <author><first>Pavel</first><last>Rychlý</last></author>
-      <author><first>Vít</first><last>Suchomel</last></author>
+      <author id="vojtech-kovar"><first>Vojtěch</first><last>Kovář</last></author>
+      <author id="pavel-rychly"><first>Pavel</first><last>Rychlý</last></author>
+      <author id="vit-suchomel"><first>Vít</first><last>Suchomel</last></author>
       <pages>53–56</pages>
       <url hash="9605bedd">E14-2014</url>
       <doi>10.3115/v1/E14-2014</doi>
@@ -968,9 +968,9 @@
     </paper>
     <paper id="16">
       <title><fixed-case>DKIE</fixed-case>: Open Source Information Extraction for <fixed-case>D</fixed-case>anish</title>
-      <author><first>Leon</first><last>Derczynski</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
       <author><first>Camilla Vilhelmsen</first><last>Field</last></author>
-      <author><first>Kenneth S.</first><last>Bøgh</last></author>
+      <author id="kenneth-s-bogh"><first>Kenneth S.</first><last>Bøgh</last></author>
       <pages>61–64</pages>
       <url hash="34bfe3b6">E14-2016</url>
       <doi>10.3115/v1/E14-2016</doi>
@@ -980,8 +980,8 @@
       <title>Event Extraction for <fixed-case>B</fixed-case>alkan Languages</title>
       <author><first>Vanni</first><last>Zavarella</last></author>
       <author><first>Dilek</first><last>Küçük</last></author>
-      <author><first>Hristo</first><last>Tanev</last></author>
-      <author><first>Ali</first><last>Hürriyetoğlu</last></author>
+      <author id="hristo-tanev"><first>Hristo</first><last>Tanev</last></author>
+      <author id="ali-hurriyetoglu"><first>Ali</first><last>Hürriyetoğlu</last></author>
       <pages>65–68</pages>
       <url hash="a7fa5e7a">E14-2017</url>
       <doi>10.3115/v1/E14-2017</doi>
@@ -998,7 +998,7 @@
     </paper>
     <paper id="19">
       <title><fixed-case>SPARSAR</fixed-case>: An Expressive Poetry Reader</title>
-      <author><first>Rodolfo</first><last>Delmonte</last></author>
+      <author id="rodolfo-delmonte"><first>Rodolfo</first><last>Delmonte</last></author>
       <author><first>Anton Maria</first><last>Prati</last></author>
       <pages>73–76</pages>
       <url hash="d809adac">E14-2019</url>
@@ -1023,8 +1023,8 @@
     </paper>
     <paper id="21">
       <title>Answering List Questions using Web as a corpus</title>
-      <author><first>Patrícia</first><last>Gonçalves</last></author>
-      <author><first>António</first><last>Branco</last></author>
+      <author id="patricia-goncalves"><first>Patrícia</first><last>Gonçalves</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
       <pages>81–84</pages>
       <url hash="bab675ef">E14-2021</url>
       <doi>10.3115/v1/E14-2021</doi>
@@ -1045,7 +1045,7 @@
       <title><fixed-case>R</fixed-case>elation<fixed-case>F</fixed-case>actory: A Fast, Modular and Effective System for Knowledge Base Population</title>
       <author><first>Benjamin</first><last>Roth</last></author>
       <author><first>Tassilo</first><last>Barth</last></author>
-      <author><first>Grzegorz</first><last>Chrupała</last></author>
+      <author id="grzegorz-chrupala"><first>Grzegorz</first><last>Chrupała</last></author>
       <author><first>Martin</first><last>Gropp</last></author>
       <author><first>Dietrich</first><last>Klakow</last></author>
       <pages>89–92</pages>
@@ -1063,9 +1063,9 @@
     </paper>
     <paper id="25">
       <title>The <fixed-case>GATE</fixed-case> Crowdsourcing Plugin: Crowdsourcing Annotated Corpora Made Easy</title>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
       <author><first>Ian</first><last>Roberts</last></author>
-      <author><first>Leon</first><last>Derczynski</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
       <author><first>Samantha</first><last>Alexander-Eames</last></author>
       <pages>97–100</pages>
       <url hash="9a7b4e6f">E14-2025</url>
@@ -1145,7 +1145,7 @@
     </paper>
     <paper id="6">
       <title>Resolving Coreferent and Associative Noun Phrases in Scientific Text</title>
-      <author><first>Ina</first><last>Roesiger</last></author>
+      <author id="ina-roesiger"><first>Ina</first><last>Roesiger</last></author>
       <author><first>Simone</first><last>Teufel</last></author>
       <pages>45–55</pages>
       <url hash="ab496728">E14-3006</url>
@@ -1197,8 +1197,8 @@
     <paper id="12">
       <title>Automatic Creation of <fixed-case>A</fixed-case>rabic Named Entity Annotated Corpus Using <fixed-case>W</fixed-case>ikipedia</title>
       <author><first>Maha</first><last>Althobaiti</last></author>
-      <author><first>Udo</first><last>Kruschwitz</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="udo-kruschwitz"><first>Udo</first><last>Kruschwitz</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>106–115</pages>
       <url hash="e315377f">E14-3012</url>
       <doi>10.3115/v1/E14-3012</doi>
@@ -1220,7 +1220,7 @@
       <url hash="8f3f95fb">E14-4</url>
       <editor><first>Shuly</first><last>Wintner</last></editor>
       <editor><first>Stefan</first><last>Riezler</last></editor>
-      <editor><first>Sharon</first><last>Goldwater</last></editor>
+      <editor id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></editor>
       <doi>10.3115/v1/E14-4</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Gothenburg, Sweden</address>
@@ -1234,8 +1234,8 @@
     </frontmatter>
     <paper id="1">
       <title>Easy Web Search Results Clustering: When Baselines Can Reach State-of-the-Art Algorithms</title>
-      <author><first>Jose G.</first><last>Moreno</last></author>
-      <author><first>Gaël</first><last>Dias</last></author>
+      <author id="jose-g-moreno"><first>Jose G.</first><last>Moreno</last></author>
+      <author id="gael-dias"><first>Gaël</first><last>Dias</last></author>
       <pages>1–5</pages>
       <url hash="9e793257">E14-4001</url>
       <doi>10.3115/v1/E14-4001</doi>
@@ -1244,8 +1244,8 @@
     <paper id="2">
       <title>Propagation Strategies for Building Temporal Ontologies</title>
       <author><first>Mohammed</first><last>Hasanuzzaman</last></author>
-      <author><first>Gaël</first><last>Dias</last></author>
-      <author><first>Stéphane</first><last>Ferrari</last></author>
+      <author id="gael-dias"><first>Gaël</first><last>Dias</last></author>
+      <author id="stephane-ferrari"><first>Stéphane</first><last>Ferrari</last></author>
       <author><first>Yann</first><last>Mathet</last></author>
       <pages>6–11</pages>
       <url hash="5463bad7">E14-4002</url>
@@ -1254,9 +1254,9 @@
     </paper>
     <paper id="3">
       <title><fixed-case>C</fixed-case>hinese Open Relation Extraction for Knowledge Acquisition</title>
-      <author><first>Yuen-Hsien</first><last>Tseng</last></author>
+      <author id="yuen-hsien-tseng"><first>Yuen-Hsien</first><last>Tseng</last></author>
       <author><first>Lung-Hao</first><last>Lee</last></author>
-      <author><first>Shu-Yen</first><last>Lin</last></author>
+      <author id="shu-yen-lin"><first>Shu-Yen</first><last>Lin</last></author>
       <author><first>Bo-Shun</first><last>Liao</last></author>
       <author><first>Mei-Jun</first><last>Liu</last></author>
       <author><first>Hsin-Hsi</first><last>Chen</last></author>
@@ -1271,8 +1271,8 @@
       <title>Temporal Text Ranking and Automatic Dating of Texts</title>
       <author><first>Vlad</first><last>Niculae</last></author>
       <author><first>Marcos</first><last>Zampieri</last></author>
-      <author><first>Liviu</first><last>Dinu</last></author>
-      <author><first>Alina Maria</first><last>Ciobanu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu</first><last>Dinu</last></author>
+      <author id="alina-maria-ciobanu"><first>Alina Maria</first><last>Ciobanu</last></author>
       <pages>17–21</pages>
       <url hash="89e7f2cb">E14-4004</url>
       <doi>10.3115/v1/E14-4004</doi>
@@ -1290,7 +1290,7 @@
     <paper id="6">
       <title>Projecting the Knowledge Graph to Syntactic Parsing</title>
       <author><first>Andrea</first><last>Gesmundo</last></author>
-      <author><first>Keith</first><last>Hall</last></author>
+      <author id="keith-hall"><first>Keith</first><last>Hall</last></author>
       <pages>28–32</pages>
       <url hash="b86b9e3a">E14-4006</url>
       <doi>10.3115/v1/E14-4006</doi>
@@ -1309,8 +1309,8 @@
       <title>Chasing Hypernyms in Vector Spaces with Entropy</title>
       <author><first>Enrico</first><last>Santus</last></author>
       <author><first>Alessandro</first><last>Lenci</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>38–42</pages>
       <url hash="0d60cca9">E14-4008</url>
       <doi>10.3115/v1/E14-4008</doi>
@@ -1320,7 +1320,7 @@
       <title>Tight Integration of Speech Disfluency Removal into <fixed-case>SMT</fixed-case></title>
       <author><first>Eunah</first><last>Cho</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>43–47</pages>
       <url hash="cebef8f8">E14-4009</url>
       <doi>10.3115/v1/E14-4009</doi>
@@ -1329,7 +1329,7 @@
     <paper id="10">
       <title>Non-Monotonic Parsing of Fluent Umm <fixed-case>I</fixed-case> mean Disfluent Sentences</title>
       <author><first>Mohammad Sadegh</first><last>Rasooli</last></author>
-      <author><first>Joel</first><last>Tetreault</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
       <pages>48–53</pages>
       <url hash="0ffd9bd7">E14-4010</url>
       <doi>10.3115/v1/E14-4010</doi>
@@ -1337,7 +1337,7 @@
     </paper>
     <paper id="11">
       <title>Lightly-Supervised Word Sense Translation Error Detection for an Interactive Conversational Spoken Language Translation System</title>
-      <author><first>Dennis</first><last>Mehay</last></author>
+      <author id="dennis-mehay"><first>Dennis</first><last>Mehay</last></author>
       <author><first>Sankaranarayanan</first><last>Ananthakrishnan</last></author>
       <author><first>Sanjika</first><last>Hewavitharana</last></author>
       <pages>54–58</pages>
@@ -1357,9 +1357,9 @@
     </paper>
     <paper id="13">
       <title>Predicting <fixed-case>R</fixed-case>omanian Stress Assignment</title>
-      <author><first>Alina Maria</first><last>Ciobanu</last></author>
+      <author id="alina-maria-ciobanu"><first>Alina Maria</first><last>Ciobanu</last></author>
       <author><first>Anca</first><last>Dinu</last></author>
-      <author><first>Liviu</first><last>Dinu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu</first><last>Dinu</last></author>
       <pages>64–68</pages>
       <url hash="19cd1378">E14-4013</url>
       <doi>10.3115/v1/E14-4013</doi>
@@ -1367,8 +1367,8 @@
     </paper>
     <paper id="14">
       <title>Passive-Aggressive Sequence Labeling with Discriminative Post-Editing for Recognising Person Entities in Tweets</title>
-      <author><first>Leon</first><last>Derczynski</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
       <pages>69–73</pages>
       <url hash="7445688f">E14-4014</url>
       <doi>10.3115/v1/E14-4014</doi>
@@ -1378,9 +1378,9 @@
     <paper id="15">
       <title>Accelerated Estimation of Conditional Random Fields using a Pseudo-Likelihood-inspired Perceptron Variant</title>
       <author><first>Teemu</first><last>Ruokolainen</last></author>
-      <author><first>Miikka</first><last>Silfverberg</last></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last></author>
       <author><first>Mikko</first><last>Kurimo</last></author>
-      <author><first>Krister</first><last>Linden</last></author>
+      <author id="krister-linden"><first>Krister</first><last>Linden</last></author>
       <pages>74–78</pages>
       <url hash="ba8661fd">E14-4015</url>
       <doi>10.3115/v1/E14-4015</doi>
@@ -1407,8 +1407,8 @@
     </paper>
     <paper id="18">
       <title>Inference of Phrase-Based Translation Models via Minimum Description Length</title>
-      <author><first>Jesús</first><last>González-Rubio</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="jesus-gonzalez-rubio"><first>Jesús</first><last>González-Rubio</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <pages>90–94</pages>
       <url hash="5a9f22b1">E14-4018</url>
       <doi>10.3115/v1/E14-4018</doi>
@@ -1416,7 +1416,7 @@
     </paper>
     <paper id="19">
       <title><fixed-case>C</fixed-case>hinese Native Language Identification</title>
-      <author><first>Shervin</first><last>Malmasi</last></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></author>
       <author><first>Mark</first><last>Dras</last></author>
       <pages>95–99</pages>
       <url hash="9044bf98">E14-4019</url>
@@ -1445,7 +1445,7 @@
       <title>Using a Random Forest Classifier to Compile Bilingual Dictionaries of Technical Terms from Comparable Corpora</title>
       <author><first>Georgios</first><last>Kontonatsios</last></author>
       <author><first>Ioannis</first><last>Korkontzelos</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
       <pages>111–116</pages>
       <url hash="cda0debb">E14-4022</url>
@@ -1475,7 +1475,7 @@
       <author><first>Hoa Trong</first><last>Vu</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
       <author><first>Sakriani</first><last>Sakti</last></author>
-      <author><first>Tomoki</first><last>Toda</last></author>
+      <author id="tomoki-toda"><first>Tomoki</first><last>Toda</last></author>
       <author><first>Satoshi</first><last>Nakamura</last></author>
       <pages>128–132</pages>
       <url hash="2d42020f">E14-4025</url>
@@ -1527,7 +1527,7 @@
       <title>Improving Dependency Parsers with Supertags</title>
       <author><first>Hiroki</first><last>Ouchi</last></author>
       <author><first>Kevin</first><last>Duh</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>154–158</pages>
       <url hash="5ec43b90">E14-4030</url>
       <doi>10.3115/v1/E14-4030</doi>
@@ -1537,7 +1537,7 @@
       <title>Improving Dependency Parsers using <fixed-case>C</fixed-case>ombinatory <fixed-case>C</fixed-case>ategorial <fixed-case>G</fixed-case>rammar</title>
       <author><first>Bharat Ram</first><last>Ambati</last></author>
       <author><first>Tejaswini</first><last>Deoskar</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>159–163</pages>
       <url hash="c442ac5f">E14-4031</url>
       <doi>10.3115/v1/E14-4031</doi>
@@ -1555,7 +1555,7 @@
     </paper>
     <paper id="33">
       <title>Data Driven Language Transfer Hypotheses</title>
-      <author><first>Ben</first><last>Swanson</last></author>
+      <author id="ben-swanson"><first>Ben</first><last>Swanson</last></author>
       <author><first>Eugene</first><last>Charniak</last></author>
       <pages>169–173</pages>
       <url hash="88454e50">E14-4033</url>
@@ -1566,7 +1566,7 @@
       <title>Simple and Effective Approach for Consistent Training of Hierarchical Phrase-based Translation Models</title>
       <author><first>Stephan</first><last>Peitz</last></author>
       <author><first>David</first><last>Vilar</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>174–179</pages>
       <url hash="0cf2e9ae">E14-4034</url>
       <doi>10.3115/v1/E14-4034</doi>
@@ -1575,7 +1575,7 @@
     <paper id="35">
       <title>Some Experiments with a Convex <fixed-case>IBM</fixed-case> Model 2</title>
       <author><first>Andrei</first><last>Simion</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <author><first>Cliff</first><last>Stein</last></author>
       <pages>180–184</pages>
       <url hash="e9d3d65f">E14-4035</url>
@@ -1584,8 +1584,8 @@
     </paper>
     <paper id="36">
       <title>Active Learning for Post-Editing Based Incrementally Retrained <fixed-case>MT</fixed-case></title>
-      <author><first>Aswarth Abhilash</first><last>Dara</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="aswarth-abhilash-dara"><first>Aswarth Abhilash</first><last>Dara</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <author><first>Qun</first><last>Liu</last></author>
       <author><first>John</first><last>Judge</last></author>
       <author><first>Antonio</first><last>Toral</last></author>
@@ -1598,7 +1598,7 @@
       <title>Analysis and Prediction of Unalignable Words in Parallel Text</title>
       <author><first>Frances</first><last>Yung</last></author>
       <author><first>Kevin</first><last>Duh</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>190–194</pages>
       <url hash="f5747b9b">E14-4037</url>
       <doi>10.3115/v1/E14-4037</doi>
@@ -1616,7 +1616,7 @@
     <paper id="39">
       <title>Multi-Domain Sentiment Relevance Classification with Automatic Representation Learning</title>
       <author><first>Christian</first><last>Scheible</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>200–204</pages>
       <url hash="b059ea3c">E14-4039</url>
       <doi>10.3115/v1/E14-4039</doi>
@@ -1625,7 +1625,7 @@
     <paper id="40">
       <title>A New Entity Salience Task with Millions of Training Examples</title>
       <author><first>Jesse</first><last>Dunietz</last></author>
-      <author><first>Daniel</first><last>Gillick</last></author>
+      <author id="dan-gillick"><first>Daniel</first><last>Gillick</last></author>
       <pages>205–209</pages>
       <url hash="cb9e128e">E14-4040</url>
       <doi>10.3115/v1/E14-4040</doi>
@@ -1634,7 +1634,7 @@
     <paper id="41">
       <title>Finding middle ground? Multi-objective Natural Language Generation from time-series data</title>
       <author><first>Dimitra</first><last>Gkatzia</last></author>
-      <author><first>Helen</first><last>Hastie</last></author>
+      <author id="helen-hastie"><first>Helen</first><last>Hastie</last></author>
       <author><first>Oliver</first><last>Lemon</last></author>
       <pages>210–214</pages>
       <url hash="281d9111">E14-4041</url>
@@ -1645,7 +1645,7 @@
       <title>One Sense per Tweeter ... and Other Lexical Semantic Tales of <fixed-case>T</fixed-case>witter</title>
       <author><first>Spandana</first><last>Gella</last></author>
       <author><first>Paul</first><last>Cook</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>215–220</pages>
       <url hash="cdea6a7b">E14-4042</url>
       <doi>10.3115/v1/E14-4042</doi>
@@ -1662,7 +1662,7 @@
     <paper id="44">
       <title>Crowdsourcing Annotation of Non-Local Semantic Roles</title>
       <author><first>Parvin Sadat</first><last>Feizabadi</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <pages>226–230</pages>
       <url hash="1673358d">E14-4044</url>
       <doi>10.3115/v1/E14-4044</doi>
diff --git a/data/xml/E17.xml b/data/xml/E17.xml
index f3ff94ffe2..1d16f35471 100644
--- a/data/xml/E17.xml
+++ b/data/xml/E17.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the 15th Conference of the <fixed-case>E</fixed-case>uropean Chapter of the Association for Computational Linguistics: Volume 1, Long Papers</booktitle>
       <url hash="5a53410a">E17-1</url>
       <editor><first>Mirella</first><last>Lapata</last></editor>
-      <editor><first>Phil</first><last>Blunsom</last></editor>
+      <editor id="phil-blunsom"><first>Phil</first><last>Blunsom</last></editor>
       <editor><first>Alexander</first><last>Koller</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Valencia, Spain</address>
@@ -38,7 +38,7 @@
     <paper id="3">
       <title>Exploring Different Dimensions of Attention for Uncertainty Detection</title>
       <author><first>Heike</first><last>Adel</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>22–34</pages>
       <url hash="d8c66eaa">E17-1003</url>
       <abstract>Neural networks with attention have proven effective for many natural language processing tasks. In this paper, we develop attention mechanisms for uncertainty detection. In particular, we generalize standardly used attention mechanisms by introducing external attention and sequence-preserving attention. These novel architectures differ from standard approaches in that they use external resources to compute attention weights and preserve sequence information. We compare them to other configurations along different dimensions of attention. Our novel architectures set the new state of the art on a Wikipedia benchmark dataset and perform similar to the state-of-the-art model on a biomedical benchmark which uses a large set of linguistic features.</abstract>
@@ -57,8 +57,8 @@
     </paper>
     <paper id="5">
       <title>When is multitask learning effective? Semantic sequence prediction under varying data conditions</title>
-      <author><first>Héctor</first><last>Martínez Alonso</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="hector-martinez-alonso"><first>Héctor</first><last>Martínez Alonso</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>44–53</pages>
       <url hash="d6595583">E17-1005</url>
       <abstract>Multitask learning has been applied successfully to a range of tasks, mostly morphosyntactic. However, little is known on <i>when</i> MTL works and whether there are data characteristics that help to determine the success of MTL. In this paper we evaluate a range of semantic sequence labeling tasks in a MTL setup. We examine different auxiliary task configurations, amongst which a novel setup, and correlate their impact to data-dependent conditions. Our results show that MTL is not always effective, because significant improvements are obtained only for 1 out of 5 tasks. When successful, auxiliary tasks with compact and more uniform label distributions are preferable.</abstract>
@@ -66,10 +66,10 @@
     </paper>
     <paper id="6">
       <title>Learning Compositionality Functions on Word Embeddings for Modelling Attribute Meaning in Adjective-Noun Phrases</title>
-      <author><first>Matthias</first><last>Hartung</last></author>
+      <author id="matthias-hartung"><first>Matthias</first><last>Hartung</last></author>
       <author><first>Fabian</first><last>Kaupmann</last></author>
       <author><first>Soufian</first><last>Jebbara</last></author>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <pages>54–64</pages>
       <url hash="4797bd38">E17-1006</url>
       <abstract>Word embeddings have been shown to be highly effective in a variety of lexical semantic tasks. They tend to capture meaningful relational similarities between individual words, at the expense of lacking the capabilty of making the underlying semantic relation explicit. In this paper, we investigate the attribute relation that often holds between the constituents of adjective-noun phrases. We use CBOW word embeddings to represent word meaning and learn a compositionality function that combines the individual constituents into a phrase representation, thus capturing the compositional attribute meaning. The resulting embedding model, while being fully interpretable, outperforms count-based distributional vector space models that are tailored to attribute meaning in the two tasks of attribute selection and phrase similarity prediction. Moreover, as the model captures a generalized layer of attribute meaning, it bears the potential to be used for predictions over various attribute inventories without re-training.</abstract>
@@ -88,7 +88,7 @@
     <paper id="8">
       <title>Distinguishing Antonyms and Synonyms in a Pattern-based Neural Network</title>
       <author><first>Kim Anh</first><last>Nguyen</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <author><first>Ngoc Thang</first><last>Vu</last></author>
       <pages>76–85</pages>
       <url hash="6d017ffe">E17-1008</url>
@@ -100,8 +100,8 @@
       <author><first>Alexander</first><last>Panchenko</last></author>
       <author><first>Eugen</first><last>Ruppert</last></author>
       <author><first>Stefano</first><last>Faralli</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>86–98</pages>
       <url hash="d2414a23">E17-1009</url>
       <abstract>The current trend in NLP is the use of highly opaque models, e.g. neural networks and word embeddings. While these models yield state-of-the-art results on a range of tasks, their drawback is poor interpretability. On the example of word sense induction and disambiguation (WSID), we show that it is possible to develop an interpretable model that matches the state-of-the-art models in accuracy. Namely, we present an unsupervised, knowledge-free WSID approach, which is interpretable at three levels: word sense inventory, sense feature representations, and disambiguation procedure. Experiments show that our model performs on par with state-of-the-art word sense embeddings and other unsupervised systems while offering the possibility to justify its decisions in human-readable form.</abstract>
@@ -110,7 +110,7 @@
     <paper id="10">
       <title>Word Sense Disambiguation: A Unified Evaluation Framework and Empirical Comparison</title>
       <author><first>Alessandro</first><last>Raganato</last></author>
-      <author><first>Jose</first><last>Camacho-Collados</last></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></author>
       <author><first>Roberto</first><last>Navigli</last></author>
       <pages>99–110</pages>
       <url hash="51adaa36">E17-1010</url>
@@ -231,8 +231,8 @@
     </paper>
     <paper id="21">
       <title>Cross-Lingual Dependency Parsing with Late Decoding for Truly Low-Resource Languages</title>
-      <author><first>Michael</first><last>Schlichtkrull</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="michael-schlichtkrull"><first>Michael</first><last>Schlichtkrull</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>220–229</pages>
       <url hash="46dc313f">E17-1021</url>
       <abstract>In cross-lingual dependency annotation projection, information is often lost during transfer because of early decoding. We present an end-to-end graph-based neural network dependency parser that can be trained to reproduce matrices of edge scores, which can be directly projected across word alignments. We show that our approach to cross-lingual dependency parsing is not only simpler, but also achieves an absolute improvement of 2.25% averaged across 10 languages compared to the previous state of the art.</abstract>
@@ -240,10 +240,10 @@
     </paper>
     <paper id="22">
       <title>Parsing <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies without training</title>
-      <author><first>Héctor</first><last>Martínez Alonso</last></author>
-      <author><first>Željko</first><last>Agić</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="hector-martinez-alonso"><first>Héctor</first><last>Martínez Alonso</last></author>
+      <author id="zeljko-agic"><first>Željko</first><last>Agić</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>230–240</pages>
       <url hash="90db81de">E17-1022</url>
       <abstract>We present UDP, the first training-free parser for Universal Dependencies (UD). Our algorithm is based on PageRank and a small set of specific dependency head rules. UDP features two-step decoding to guarantee that function words are attached as leaf nodes. The parser requires no training, and it is competitive with a delexicalized transfer system. UDP offers a linguistically sound unsupervised alternative to cross-lingual parsing for UD. The parser has very few parameters and distinctly robust to domain change across languages.</abstract>
@@ -273,8 +273,8 @@
     <paper id="25">
       <title>Exploring the Impact of Pragmatic Phenomena on Irony Detection in Tweets: A Multilingual Corpus Study</title>
       <author><first>Jihen</first><last>Karoui</last></author>
-      <author><first>Farah</first><last>Benamara</last></author>
-      <author><first>Véronique</first><last>Moriceau</last></author>
+      <author id="farah-benamara"><first>Farah</first><last>Benamara</last></author>
+      <author id="veronique-moriceau"><first>Véronique</first><last>Moriceau</last></author>
       <author><first>Viviana</first><last>Patti</last></author>
       <author><first>Cristina</first><last>Bosco</last></author>
       <author><first>Nathalie</first><last>Aussenac-Gilles</last></author>
@@ -307,7 +307,7 @@
       <title>Cross-lingual <fixed-case>RST</fixed-case> Discourse Parsing</title>
       <author><first>Chloé</first><last>Braud</last></author>
       <author><first>Maximin</first><last>Coavoux</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>292–304</pages>
       <url hash="10ca80de">E17-1028</url>
       <abstract>Discourse parsing is an integral part of understanding information flow and argumentative structure in documents. Most previous research has focused on inducing and evaluating models from the English RST Discourse Treebank. However, discourse treebanks for other languages exist, including Spanish, German, Basque, Dutch and Brazilian Portuguese. The treebanks share the same underlying linguistic theory, but differ slightly in the way documents are annotated. In this paper, we present (a) a new discourse parser which is simpler, yet competitive (significantly better on 2/3 metrics) to state of the art for English, (b) a harmonization of discourse treebanks across languages, enabling us to present (c) what to the best of our knowledge are the first experiments on cross-lingual discourse parsing.</abstract>
@@ -326,7 +326,7 @@
       <title>Sentence Segmentation in Narrative Transcripts from Neuropsychological Tests using Recurrent Convolutional Neural Networks</title>
       <author><first>Marcos</first><last>Treviso</last></author>
       <author><first>Christopher</first><last>Shulby</last></author>
-      <author><first>Sandra</first><last>Aluísio</last></author>
+      <author id="sandra-aluisio"><first>Sandra</first><last>Aluísio</last></author>
       <pages>315–325</pages>
       <url hash="b3ab9d33">E17-1030</url>
       <abstract>Automated discourse analysis tools based on Natural Language Processing (NLP) aiming at the diagnosis of language-impairing dementias generally extract several textual metrics of narrative transcripts. However, the absence of sentence boundary segmentation in the transcripts prevents the direct application of NLP methods which rely on these marks in order to function properly, such as taggers and parsers. We present the first steps taken towards automatic neuropsychological evaluation based on narrative discourse analysis, presenting a new automatic sentence segmentation method for impaired speech. Our model uses recurrent convolutional neural networks with prosodic, Part of Speech (PoS) features, and word embeddings. It was evaluated intrinsically on impaired, spontaneous speech as well as normal, prepared speech and presents better results for healthy elderly (CTL) (F1 = 0.74) and Mild Cognitive Impairment (MCI) patients (F1 = 0.70) than the Conditional Random Fields method (F1 = 0.55 and 0.53, respectively) used in the same context of our study. The results suggest that our model is robust for impaired speech and can be used in automated discourse analysis tools to differentiate narratives produced by MCI and CTL.</abstract>
@@ -344,7 +344,7 @@
     <paper id="32">
       <title>From Segmentation to Analyses: a Probabilistic Model for Unsupervised Morphology Induction</title>
       <author><first>Toms</first><last>Bergmanis</last></author>
-      <author><first>Sharon</first><last>Goldwater</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
       <pages>337–346</pages>
       <url hash="cc8fa385">E17-1032</url>
       <abstract>A major motivation for unsupervised morphological analysis is to reduce the sparse data problem in under-resourced languages. Most previous work focus on segmenting surface forms into their constituent morphs (taking: tak +ing), but surface form segmentation does not solve the sparse data problem as the analyses of take and taking are not connected to each other. We present a system that adapts the MorphoChains system (Narasimhan et al., 2015) to provide morphological analyses that aim to abstract over spelling differences in functionally similar morphs. This results in analyses that are not compelled to use all the orthographic material of a word (stopping: stop +ing) or limited to only that material (acidified: acid +ify +ed). On average across six typologically varied languages our system has a similar or better F-score on EMMA (a measure of underlying morpheme accuracy) than three strong baselines; moreover, the total number of distinct morphemes identified by our system is on average 12.8% lower than for Morfessor (Virpioja et al., 2013), a state-of-the-art surface segmentation system.</abstract>
@@ -353,7 +353,7 @@
     <paper id="33">
       <title>Creating <fixed-case>POS</fixed-case> Tagging and Dependency Parsing Experts via Topic Modeling</title>
       <author><first>Atreyee</first><last>Mukherjee</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <author><first>Matthias</first><last>Scheutz</last></author>
       <pages>347–355</pages>
       <url hash="9c1ab824">E17-1033</url>
@@ -363,9 +363,9 @@
     <paper id="34">
       <title><fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies and Morphology for <fixed-case>H</fixed-case>ungarian - and on the Price of Universality</title>
       <author><first>Veronika</first><last>Vincze</last></author>
-      <author><first>Katalin</first><last>Simkó</last></author>
+      <author id="katalin-ilona-simko"><first>Katalin</first><last>Simkó</last></author>
       <author><first>Zsolt</first><last>Szántó</last></author>
-      <author><first>Richárd</first><last>Farkas</last></author>
+      <author id="richard-farkas"><first>Richárd</first><last>Farkas</last></author>
       <pages>356–365</pages>
       <url hash="c7bdab0a">E17-1034</url>
       <abstract>In this paper, we present how the principles of universal dependencies and morphology have been adapted to Hungarian. We report the most challenging grammatical phenomena and our solutions to those. On the basis of the adapted guidelines, we have converted and manually corrected 1,800 sentences from the Szeged Treebank to universal dependency format. We also introduce experiments on this manually annotated corpus for evaluating automatic conversion and the added value of language-specific, i.e. non-universal, annotations. Our results reveal that converting to universal dependencies is not necessarily trivial, moreover, using language-specific morphological features may have an impact on overall performance.</abstract>
@@ -384,10 +384,10 @@
     </paper>
     <paper id="36">
       <title>Generating Natural Language Question-Answer Pairs from a Knowledge Graph Using a <fixed-case>RNN</fixed-case> Based Question Generation Model</title>
-      <author><first>Sathish</first><last>Reddy</last></author>
+      <author id="sathish-reddy-indurthi"><first>Sathish</first><last>Reddy</last></author>
       <author><first>Dinesh</first><last>Raghu</last></author>
-      <author><first>Mitesh M.</first><last>Khapra</last></author>
-      <author><first>Sachindra</first><last>Joshi</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh M.</first><last>Khapra</last></author>
+      <author id="sachindra-joshi"><first>Sachindra</first><last>Joshi</last></author>
       <pages>376–385</pages>
       <url hash="52616ccd">E17-1036</url>
       <abstract>In recent years, knowledge graphs such as Freebase that capture facts about entities and relationships between them have been used actively for answering factoid questions. In this paper, we explore the problem of automatically generating question answer pairs from a given knowledge graph. The generated question answer (QA) pairs can be used in several downstream applications. For example, they could be used for training better QA systems. To generate such QA pairs, we first extract a set of keywords from entities and relationships expressed in a triple stored in the knowledge graph. From each such set, we use a subset of keywords to generate a natural language question that has a unique answer. We treat this subset of keywords as a sequence and propose a sequence to sequence model using RNN to generate a natural language question from it. Our RNN based model generates QA pairs with an accuracy of 33.61 percent and performs 110.47 percent (relative) better than a state-of-the-art template based method for generating natural language question from keywords. We also do an extrinsic evaluation by using the generated QA pairs to train a QA system and observe that the F1-score of the QA system improves by 5.5 percent (relative) when using automatically generated QA pairs in addition to manually generated QA pairs available for training.</abstract>
@@ -415,9 +415,9 @@
     </paper>
     <paper id="39">
       <title>Efficient Benchmarking of <fixed-case>NLP</fixed-case> <fixed-case>API</fixed-case>s using Multi-armed Bandits</title>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
-      <author><first>Tuan Dung</first><last>Tran</last></author>
-      <author><first>Mark</first><last>Carman</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="tuan-tran"><first>Tuan Dung</first><last>Tran</last></author>
+      <author id="mark-carman"><first>Mark</first><last>Carman</last></author>
       <pages>408–416</pages>
       <url hash="6cf4d183">E17-1039</url>
       <abstract>Comparing NLP systems to select the best one for a task of interest, such as named entity recognition, is critical for practitioners and researchers. A rigorous approach involves setting up a hypothesis testing scenario using the performance of the systems on query documents. However, often the hypothesis testing approach needs to send a lot of document queries to the systems, which can be problematic. In this paper, we present an effective alternative based on the multi-armed bandit (MAB). We propose a hierarchical generative model to represent the uncertainty in the performance measures of the competing systems, to be used by Thompson Sampling to solve the resulting MAB. Experimental results on both synthetic and real data show that our approach requires significantly fewer queries compared to the standard benchmarking technique to identify the best system according to F-measure.</abstract>
@@ -427,7 +427,7 @@
       <title>Character-Word <fixed-case>LSTM</fixed-case> Language Models</title>
       <author><first>Lyan</first><last>Verwimp</last></author>
       <author><first>Joris</first><last>Pelemans</last></author>
-      <author><first>Hugo</first><last>Van hamme</last></author>
+      <author id="hugo-van-hamme"><first>Hugo</first><last>Van hamme</last></author>
       <author><first>Patrick</first><last>Wambacq</last></author>
       <pages>417–427</pages>
       <url hash="9fcd655d">E17-1040</url>
@@ -436,9 +436,9 @@
     </paper>
     <paper id="41">
       <title>A Hierarchical Neural Model for Learning Sequences of Dialogue Acts</title>
-      <author><first>Quan Hung</first><last>Tran</last></author>
+      <author id="quan-hung-tran"><first>Quan Hung</first><last>Tran</last></author>
       <author><first>Ingrid</first><last>Zukerman</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>428–437</pages>
       <url hash="95b5f0a8">E17-1041</url>
       <abstract>We propose a novel hierarchical Recurrent Neural Network (RNN) for learning sequences of Dialogue Acts (DAs). The input in this task is a sequence of utterances (i.e., conversational contributions) comprising a sequence of tokens, and the output is a sequence of DA labels (one label per utterance). Our model leverages the hierarchical nature of dialogue data by using two nested RNNs that capture long-range dependencies at the dialogue level and the utterance level. This model is combined with an attention mechanism that focuses on salient tokens in utterances. Our experimental results show that our model outperforms strong baselines on two popular datasets, Switchboard and MapTask; and our detailed empirical analysis highlights the impact of each aspect of our model.</abstract>
@@ -449,11 +449,11 @@
       <author><first>Tsung-Hsien</first><last>Wen</last></author>
       <author><first>David</first><last>Vandyke</last></author>
       <author><first>Nikola</first><last>Mrkšić</last></author>
-      <author><first>Milica</first><last>Gašić</last></author>
-      <author><first>Lina M.</first><last>Rojas-Barahona</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gašić</last></author>
+      <author id="lina-m-rojas-barahona"><first>Lina M.</first><last>Rojas-Barahona</last></author>
       <author><first>Pei-Hao</first><last>Su</last></author>
       <author><first>Stefan</first><last>Ultes</last></author>
-      <author><first>Steve</first><last>Young</last></author>
+      <author id="steve-young"><first>Steve</first><last>Young</last></author>
       <pages>438–449</pages>
       <url hash="9d3e4f48">E17-1042</url>
       <abstract>Teaching machines to accomplish tasks by conversing naturally with humans is challenging. Currently, developing task-oriented dialogue systems requires creating multiple components and typically this involves either a large amount of handcrafting, or acquiring costly labelled datasets to solve a statistical learning problem for each component. In this work we introduce a neural network-based text-in, text-out end-to-end trainable goal-oriented dialogue system along with a new way of collecting dialogue data based on a novel pipe-lined Wizard-of-Oz framework. This approach allows us to develop dialogue systems easily and without making too many assumptions about the task at hand. The results show that the model can converse with human subjects naturally whilst helping them to accomplish tasks in a restaurant search domain.</abstract>
@@ -464,8 +464,8 @@
       <author><first>Baolin</first><last>Peng</last></author>
       <author><first>Michael</first><last>Seltzer</last></author>
       <author><first>Y.C.</first><last>Ju</last></author>
-      <author><first>Geoffrey</first><last>Zweig</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="geoffrey-zweig"><first>Geoffrey</first><last>Zweig</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <pages>450–459</pages>
       <url hash="579f467c">E17-1043</url>
       <abstract>In this paper we tackle a unique and important problem of extracting a structured order from the conversation a customer has with an order taker at a restaurant. This is motivated by an actual system under development to assist in the order taking process. We develop a sequence-to-sequence model that is able to map from unstructured conversational input to the structured form that is conveyed to the kitchen and appears on the customer receipt. This problem is critically different from other tasks like machine translation where sequence-to-sequence models have been used: the input includes two sides of a conversation; the output is highly structured; and logical manipulations must be performed, for example when the customer changes his mind while ordering. We present a novel sequence-to-sequence model that incorporates a special attention-memory gating mechanism and conversational role markers. The proposed model improves performance over both a phrase-based machine translation approach and a standard sequence-to-sequence model.</abstract>
@@ -475,8 +475,8 @@
       <title>A Two-stage Sieve Approach for Quote Attribution</title>
       <author><first>Grace</first><last>Muzny</last></author>
       <author><first>Michael</first><last>Fang</last></author>
-      <author><first>Angel</first><last>Chang</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="angel-chang"><first>Angel</first><last>Chang</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <pages>460–470</pages>
       <url hash="4515b09e">E17-1044</url>
       <abstract>We present a deterministic sieve-based system for attributing quotations in literary text and a new dataset: QuoteLi3. Quote attribution, determining who said what in a given text, is important for tasks like creating dialogue systems, and in newer areas like computational literary studies, where it creates opportunities to analyze novels at scale rather than only a few at a time. We release QuoteLi3, which contains more than 6,000 annotations linking quotes to speaker mentions and quotes to speaker entities, and introduce a new algorithm for quote attribution. Our two-stage algorithm first links quotes to mentions, then mentions to entities. Using two stages encapsulates difficult sub-problems and improves system performance. The modular design allows us to tune for overall performance or higher precision, which is useful for many real-world use cases. Our system achieves an average F-score of 87.5 across three novels, outperforming previous systems, and can be tuned for precision of 90.4 at a recall of 65.1.</abstract>
@@ -486,7 +486,7 @@
       <title>Out-of-domain <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et Semantic Role Labeling</title>
       <author><first>Silvana</first><last>Hartmann</last></author>
       <author><first>Ilia</first><last>Kuznetsov</last></author>
-      <author><first>Teresa</first><last>Martin</last></author>
+      <author id="m-teresa-martin-valdivia"><first>Teresa</first><last>Martin</last></author>
       <author><first>Iryna</first><last>Gurevych</last></author>
       <pages>471–482</pages>
       <url hash="fc190b2d">E17-1045</url>
@@ -516,8 +516,8 @@
     <paper id="48">
       <title>An Extensive Empirical Evaluation of Character-Based Morphological Tagging for 14 Languages</title>
       <author><first>Georg</first><last>Heigold</last></author>
-      <author><first>Guenter</first><last>Neumann</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="gunter-neumann"><first>Guenter</first><last>Neumann</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>505–513</pages>
       <url hash="34950c22">E17-1048</url>
       <abstract>This paper investigates neural character-based morphological tagging for languages with complex morphology and large tag sets. Character-based approaches are attractive as they can handle rarely- and unseen words gracefully. We evaluate on 14 languages and observe consistent gains over a state-of-the-art morphological tagger across all languages except for English and French, where we match the state-of-the-art. We compare two architectures for computing character-based word vectors using recurrent (RNN) and convolutional (CNN) nets. We show that the CNN based approach performs slightly worse and less consistently than the RNN based approach. Small but systematic gains are observed when combining the two architectures by ensembling.</abstract>
@@ -525,9 +525,9 @@
     </paper>
     <paper id="49">
       <title>Neural Multi-Source Morphological Reinflection</title>
-      <author><first>Katharina</first><last>Kann</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
       <author><first>Ryan</first><last>Cotterell</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>514–524</pages>
       <url hash="a7b17f05">E17-1049</url>
       <abstract>We explore the task of multi-source morphological reinflection, which generalizes the standard, single-source version. The input consists of (i) a target tag and (ii) multiple pairs of source form and source tag for a lemma. The motivation is that it is beneficial to have access to more than one source form since different source forms can provide complementary information, e.g., different stems. We further present a novel extension to the encoder-decoder recurrent neural architecture, consisting of multiple encoders, to better solve the task. We show that our new architecture outperforms single-source reinflection models and publish our dataset for multi-source morphological reinflection to facilitate future research.</abstract>
@@ -535,9 +535,9 @@
     </paper>
     <paper id="50">
       <title>Online Automatic Post-editing for <fixed-case>MT</fixed-case> in a Multi-Domain Translation Environment</title>
-      <author><first>Rajen</first><last>Chatterjee</last></author>
+      <author id="rajen-chatterjee"><first>Rajen</first><last>Chatterjee</last></author>
       <author><first>Gebremedhen</first><last>Gebremelak</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <pages>525–535</pages>
       <url hash="bc02472a">E17-1050</url>
@@ -547,7 +547,7 @@
     <paper id="51">
       <title>An Incremental Parser for <fixed-case>A</fixed-case>bstract <fixed-case>M</fixed-case>eaning <fixed-case>R</fixed-case>epresentation</title>
       <author><first>Marco</first><last>Damonte</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <author><first>Giorgio</first><last>Satta</last></author>
       <pages>536–546</pages>
       <url hash="244b81fb">E17-1051</url>
@@ -558,7 +558,7 @@
       <title>Integrated Learning of Dialog Strategies and Semantic Parsing</title>
       <author><first>Aishwarya</first><last>Padmakumar</last></author>
       <author><first>Jesse</first><last>Thomason</last></author>
-      <author><first>Raymond J.</first><last>Mooney</last></author>
+      <author id="raymond-mooney"><first>Raymond J.</first><last>Mooney</last></author>
       <pages>547–557</pages>
       <url hash="7b235d12">E17-1052</url>
       <abstract>Natural language understanding and dialog management are two integral components of interactive dialog systems. Previous research has used machine learning techniques to individually optimize these components, with different forms of direct and indirect supervision. We present an approach to integrate the learning of both a dialog strategy using reinforcement learning, and a semantic parser for robust natural language understanding, using only natural dialog interaction for supervision. Experimental results on a simulated task of robot instruction demonstrate that joint learning of both components improves dialog performance over learning either of these components alone.</abstract>
@@ -567,7 +567,7 @@
     <paper id="53">
       <title>Unsupervised <fixed-case>AMR</fixed-case>-Dependency Parse Alignment</title>
       <author><first>Wei-Te</first><last>Chen</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>558–567</pages>
       <url hash="8d5f2231">E17-1053</url>
       <abstract>In this paper, we introduce an Abstract Meaning Representation (AMR) to Dependency Parse aligner. Alignment is a preliminary step for AMR parsing, and our aligner improves current AMR parser performance. Our aligner involves several different features, including named entity tags and semantic role labels, and uses Expectation-Maximization training. Results show that our aligner reaches an 87.1% F-Score score with the experimental data, and enhances AMR parsing.</abstract>
@@ -586,7 +586,7 @@
     <paper id="55">
       <title>Multi-level Representations for Fine-Grained Typing of Knowledge Base Entities</title>
       <author><first>Yadollah</first><last>Yaghoobzadeh</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>578–589</pages>
       <url hash="b7e7b5b8">E17-1055</url>
       <abstract>Entities are essential elements of natural language. In this paper, we present methods for learning multi-level representations of entities on three complementary levels: character (character patterns in entity names extracted, e.g., by neural networks), word (embeddings of words in entity names) and entity (entity embeddings). We investigate state-of-the-art learning methods on each level and find large differences, e.g., for deep learning models, traditional ngram features and the subword model of fasttext (Bojanowski et al., 2016) on the character level; for word2vec (Mikolov et al., 2013) on the word level; and for the order-aware model wang2vec (Ling et al., 2015a) on the entity level. We confirm experimentally that each level of representation contributes complementary information and a joint representation of all three levels improves the existing embedding based baseline for fine-grained entity typing by a large margin. Additionally, we show that adding information from entity descriptions further improves multi-level representations of entities.</abstract>
@@ -596,8 +596,8 @@
       <title>The <fixed-case>C</fixed-case>ontrast<fixed-case>M</fixed-case>edium Algorithm: Taxonomy Induction From Noisy Knowledge Graphs With Just A Few Links</title>
       <author><first>Stefano</first><last>Faralli</last></author>
       <author><first>Alexander</first><last>Panchenko</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <pages>590–600</pages>
       <url hash="e2a5a0bb">E17-1056</url>
       <abstract>In this paper, we present ContrastMedium, an algorithm that transforms noisy semantic networks into full-fledged, clean taxonomies. ContrastMedium is able to identify the embedded taxonomy structure from a noisy knowledge graph without explicit human supervision such as, for instance, a set of manually selected input root and leaf concepts. This is achieved by leveraging structural information from a companion reference taxonomy, to which the input knowledge graph is linked (either automatically or manually). When used in conjunction with methods for hypernym acquisition and knowledge base linking, our methodology provides a complete solution for end-to-end taxonomy induction. We conduct experiments using automatically acquired knowledge graphs, as well as a SemEval benchmark, and show that our method is able to achieve high performance on the task of taxonomy induction.</abstract>
@@ -650,7 +650,7 @@
       <title>Transition-Based Deep Input Linearization</title>
       <author><first>Ratish</first><last>Puduppully</last></author>
       <author><first>Yue</first><last>Zhang</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>643–654</pages>
       <url hash="c182746b">E17-1061</url>
       <abstract>Traditional methods for deep NLG adopt pipeline approaches comprising stages such as constructing syntactic input, predicting function words, linearizing the syntactic input and generating the surface forms. Though easier to visualize, pipeline approaches suffer from error propagation. In addition, information available across modules cannot be leveraged by all modules. We construct a transition-based model to jointly perform linearization, function word prediction and morphological generation, which considerably improves upon the accuracy compared to a pipelined baseline system. On a standard deep input linearization shared task, our system achieves the best results reported so far.</abstract>
@@ -658,8 +658,8 @@
     </paper>
     <paper id="62">
       <title>Generating flexible proper name references in text: Data, models and evaluation</title>
-      <author><first>Thiago</first><last>Castro Ferreira</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="thiago-castro-ferreira"><first>Thiago</first><last>Castro Ferreira</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <author><first>Sander</first><last>Wubben</last></author>
       <pages>655–664</pages>
       <url hash="5053ae0f">E17-1062</url>
@@ -688,7 +688,7 @@
     <paper id="65">
       <title>Noisy-context surprisal as a human sentence processing cost model</title>
       <author><first>Richard</first><last>Futrell</last></author>
-      <author><first>Roger</first><last>Levy</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
       <pages>688–698</pages>
       <url hash="11906a5e">E17-1065</url>
       <abstract>We use the noisy-channel theory of human sentence comprehension to develop an incremental processing cost model that unifies and extends key features of expectation-based and memory-based models. In this model, which we call noisy-context surprisal, the processing cost of a word is the surprisal of the word given a noisy representation of the preceding context. We show that this model accounts for an outstanding puzzle in sentence comprehension, language-dependent structural forgetting effects (Gibson and Thomas, 1999; Vasishth et al., 2010; Frank et al., 2016), which are previously not well modeled by either expectation-based or memory-based approaches. Additionally, we show that this model derives and generalizes locality effects (Gibson, 1998; Demberg and Keller, 2008), a signature prediction of memory-based models. We give corpus-based evidence for a key assumption in this derivation.</abstract>
@@ -697,7 +697,7 @@
     <paper id="66">
       <title>Task-Specific Attentive Pooling of Phrase Alignments Contributes to Sentence Matching</title>
       <author><first>Wenpeng</first><last>Yin</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>699–709</pages>
       <url hash="4c9d276d">E17-1066</url>
       <abstract>This work studies comparatively two typical sentence matching tasks: textual entailment (TE) and answer selection (AS), observing that weaker phrase alignments are more critical in TE, while stronger phrase alignments deserve more attention in AS. The key to reach this observation lies in phrase detection, phrase representation, phrase alignment, and more importantly how to connect those aligned phrases of different matching degrees with the final classifier. Prior work (i) has limitations in phrase generation and representation, or (ii) conducts alignment at word and phrase levels by handcrafted features or (iii) utilizes a single framework of alignment without considering the characteristics of specific tasks, which limits the framework’s effectiveness across tasks. We propose an architecture based on Gated Recurrent Unit that supports (i) representation learning of phrases of arbitrary granularity and (ii) task-specific attentive pooling of phrase alignments between two sentences. Experimental results on TE and AS match our observation and show the effectiveness of our approach.</abstract>
@@ -730,7 +730,7 @@
       <author><first>Samiksha</first><last>Gupta</last></author>
       <author><first>Anupam</first><last>Jamatia</last></author>
       <author><first>Upendra</first><last>Kumar</last></author>
-      <author><first>Björn</first><last>Gambäck</last></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gambäck</last></author>
       <author><first>Amitava</first><last>Das</last></author>
       <pages>731–741</pages>
       <url hash="e21872f3">E17-1069</url>
@@ -739,10 +739,10 @@
     </paper>
     <paper id="70">
       <title>Argument Strength is in the Eye of the Beholder: Audience Effects in Persuasion</title>
-      <author><first>Stephanie</first><last>Lukin</last></author>
-      <author><first>Pranav</first><last>Anand</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
-      <author><first>Steve</first><last>Whittaker</last></author>
+      <author id="stephanie-lukin"><first>Stephanie</first><last>Lukin</last></author>
+      <author id="pranav-anand"><first>Pranav</first><last>Anand</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
+      <author id="steve-whittaker"><first>Steve</first><last>Whittaker</last></author>
       <pages>742–753</pages>
       <url hash="9dcb34e6">E17-1070</url>
       <abstract>Americans spend about a third of their time online, with many participating in online conversations on social and political issues. We hypothesize that social media arguments on such issues may be more engaging and persuasive than traditional media summaries, and that particular types of people may be more or less convinced by particular styles of argument, e.g. emotional arguments may resonate with some personalities while factual arguments resonate with others. We report a set of experiments testing at large scale how audience variables interact with argument style to affect the persuasiveness of an argument, an under-researched topic within natural language processing. We show that belief change is affected by personality factors, with conscientious, open and agreeable people being more convinced by emotional arguments.</abstract>
@@ -761,7 +761,7 @@
     <paper id="72">
       <title>A Strong Baseline for Learning Cross-Lingual Word Embeddings from Sentence Alignments</title>
       <author><first>Omer</first><last>Levy</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <author><first>Yoav</first><last>Goldberg</last></author>
       <pages>765–774</pages>
       <url hash="cc355432">E17-1072</url>
@@ -779,7 +779,7 @@
     </paper>
     <paper id="74">
       <title>Nonsymbolic Text Representation</title>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>785–796</pages>
       <url hash="c9e16ee2">E17-1074</url>
       <abstract>We introduce the first generic text representation model that is completely nonsymbolic, i.e., it does not require the availability of a segmentation or tokenization method that attempts to identify words or other symbolic units in text. This applies to training the parameters of the model on a training corpus as well as to applying it when computing the representation of a new text. We show that our model performs better than prior work on an information extraction and a text denoising task.</abstract>
@@ -808,8 +808,8 @@
     <paper id="77">
       <title>End-to-end Relation Extraction using Neural Networks and <fixed-case>M</fixed-case>arkov <fixed-case>L</fixed-case>ogic <fixed-case>N</fixed-case>etworks</title>
       <author><first>Sachin</first><last>Pawar</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
-      <author><first>Girish</first><last>Palshikar</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="girish-palshikar"><first>Girish</first><last>Palshikar</last></author>
       <pages>818–827</pages>
       <url hash="7f2043a3">E17-1077</url>
       <abstract>End-to-end relation extraction refers to identifying boundaries of entity mentions, entity types of these mentions and appropriate semantic relation for each pair of mentions. Traditionally, separate predictive models were trained for each of these tasks and were used in a “pipeline” fashion where output of one model is fed as input to another. But it was observed that addressing some of these tasks jointly results in better performance. We propose a single, joint neural network based model to carry out all the three tasks of boundary identification, entity type classification and relation type classification. This model is referred to as “All Word Pairs” model (AWP-NN) as it assigns an appropriate label to each word pair in a given sentence for performing end-to-end relation extraction. We also propose to refine output of the AWP-NN model by using inference in Markov Logic Networks (MLN) so that additional domain knowledge can be effectively incorporated. We demonstrate effectiveness of our approach by achieving better end-to-end relation extraction performance than all 4 previous joint modelling approaches, on the standard dataset of ACE 2004.</abstract>
@@ -819,7 +819,7 @@
       <title>Trust, but Verify! Better Entity Linking through Automatic Verification</title>
       <author><first>Benjamin</first><last>Heinzerling</last></author>
       <author><first>Michael</first><last>Strube</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <pages>828–838</pages>
       <url hash="d5f5781e">E17-1078</url>
       <abstract>We introduce automatic verification as a post-processing step for entity linking (EL). The proposed method trusts EL system results collectively, by assuming entity mentions are mostly linked correctly, in order to create a semantic profile of the given text using geospatial and temporal information, as well as fine-grained entity types. This profile is then used to automatically verify each linked mention individually, i.e., to predict whether it has been linked correctly or not. Verification allows leveraging a rich set of global and pairwise features that would be prohibitively expensive for EL systems employing global inference. Evaluation shows consistent improvements across datasets and systems. In particular, when applied to state-of-the-art systems, our method yields an absolute improvement in linking performance of up to 1.7 F1 on AIDA/CoNLL’03 and up to 2.4 F1 on the English TAC KBP 2015 TEDL dataset.</abstract>
@@ -877,11 +877,11 @@
     </paper>
     <paper id="84">
       <title>Multilingual Training of Crosslingual Word Embeddings</title>
-      <author><first>Long</first><last>Duong</last></author>
+      <author id="long-duong"><first>Long</first><last>Duong</last></author>
       <author><first>Hiroshi</first><last>Kanayama</last></author>
       <author><first>Tengfei</first><last>Ma</last></author>
       <author><first>Steven</first><last>Bird</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>894–904</pages>
       <url hash="749c13f3">E17-1084</url>
       <abstract>Crosslingual word embeddings represent lexical items from different languages using the same vector space, enabling crosslingual transfer. Most prior work constructs embeddings for a pair of languages, with English on one side. We investigate methods for building high quality crosslingual word embeddings for many languages in a unified vector space. In this way, we can exploit and combine strength of many languages. We obtained high performance on bilingual lexicon induction, monolingual similarity and crosslingual document classification tasks.</abstract>
@@ -890,7 +890,7 @@
     <paper id="85">
       <title>Building Lexical Vector Representations from Concept Definitions</title>
       <author><first>Danilo</first><last>Silva de Carvalho</last></author>
-      <author><first>Minh Le</first><last>Nguyen</last></author>
+      <author id="minh-le-nguyen"><first>Minh Le</first><last>Nguyen</last></author>
       <pages>905–915</pages>
       <url hash="c54fcb75">E17-1085</url>
       <abstract>The use of distributional language representations have opened new paths in solving a variety of NLP problems. However, alternative approaches can take advantage of information unavailable through pure statistical means. This paper presents a method for building vector representations from meaning unit blocks called concept definitions, which are obtained by extracting information from a curated linguistic resource (Wiktionary). The representations obtained in this way can be compared through conventional cosine similarity and are also interpretable by humans. Evaluation was conducted in semantic similarity and relatedness test sets, with results indicating a performance comparable to other methods based on single linguistic resource extraction. The results also indicate noticeable performance gains when combining distributional similarity scores with the ones obtained using this approach. Additionally, a discussion on the proposed method’s shortcomings is provided in the analysis of error cases.</abstract>
@@ -898,7 +898,7 @@
     </paper>
     <paper id="86">
       <title><fixed-case>S</fixed-case>hotgun<fixed-case>WSD</fixed-case>: An unsupervised algorithm for global word sense disambiguation inspired by <fixed-case>DNA</fixed-case> sequencing</title>
-      <author><first>Andrei</first><last>Butnaru</last></author>
+      <author id="andrei-butnaru"><first>Andrei</first><last>Butnaru</last></author>
       <author><first>Radu Tudor</first><last>Ionescu</last></author>
       <author><first>Florentina</first><last>Hristea</last></author>
       <pages>916–926</pages>
@@ -909,7 +909,7 @@
     <paper id="87">
       <title><fixed-case>L</fixed-case>anide<fixed-case>NN</fixed-case>: Multilingual Language Identification on Character Window</title>
       <author><first>Tom</first><last>Kocmi</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>927–936</pages>
       <url hash="a1feec36">E17-1087</url>
       <abstract>In language identification, a common first step in natural language processing, we want to automatically determine the language of some input text. Monolingual language identification assumes that the given document is written in one language. In multilingual language identification, the document is usually in two or three languages and we just want their names. We aim one step further and propose a method for textual language identification where languages can change arbitrarily and the goal is to identify the spans of each of the languages. Our method is based on Bidirectional Recurrent Neural Networks and it performs well in monolingual and multilingual language identification tasks on six datasets covering 131 languages. The method keeps the accuracy also for short documents and across domains, so it is ideal for off-the-shelf use without preparation of training data.</abstract>
@@ -921,7 +921,7 @@
       <author><first>Adam</first><last>Makarucha</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
       <author><first>Steven</first><last>Bird</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>937–947</pages>
       <url hash="c4c89420">E17-1088</url>
       <abstract>Most languages have no established writing system and minimal written records. However, textual data is essential for natural language processing, and particularly important for training language models to support speech recognition. Even in cases where text data is missing, there are some languages for which bilingual lexicons are available, since creating lexicons is a fundamental task of documentary linguistics. We investigate the use of such lexicons to improve language models when textual training data is limited to as few as a thousand sentences. The method involves learning cross-lingual word embeddings as a preliminary step in training monolingual language models. Results across a number of languages show that language models are improved by this pre-training. Application to Yongning Na, a threatened language, highlights challenges in deploying the approach in real low-resource environments.</abstract>
@@ -931,7 +931,7 @@
       <title>Consistent Translation of Repeated Nouns using Syntactic and Semantic Cues</title>
       <author><first>Xiao</first><last>Pu</last></author>
       <author><first>Laura</first><last>Mascarell</last></author>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <pages>948–957</pages>
       <url hash="ad8e85f3">E17-1089</url>
       <abstract>We propose a method to decide whether two occurrences of the same noun in a source text should be translated consistently, i.e. using the same noun in the target text as well. We train and test classifiers that predict consistent translations based on lexical, syntactic, and semantic features. We first evaluate the accuracy of our classifiers intrinsically, in terms of the accuracy of consistency predictions, over a subset of the UN Corpus. Then, we also evaluate them in combination with phrase-based statistical MT systems for Chinese-to-English and German-to-English. We compare the automatic post-editing of noun translations with the re-ranking of the translation hypotheses based on the classifiers’ output, and also use these methods in combination. This improves over the baseline and closes up to 50% of the gap in BLEU scores between the baseline and an oracle classifier.</abstract>
@@ -939,7 +939,7 @@
     </paper>
     <paper id="90">
       <title>Psycholinguistic Models of Sentence Processing Improve Sentence Readability Ranking</title>
-      <author><first>David M.</first><last>Howcroft</last></author>
+      <author id="david-m-howcroft"><first>David M.</first><last>Howcroft</last></author>
       <author><first>Vera</first><last>Demberg</last></author>
       <pages>958–968</pages>
       <url hash="854173db">E17-1090</url>
@@ -951,7 +951,7 @@
       <author><first>Pradipto</first><last>Das</last></author>
       <author><first>Yandi</first><last>Xia</last></author>
       <author><first>Aaron</first><last>Levine</last></author>
-      <author><first>Giuseppe</first><last>Di Fabbrizio</last></author>
+      <author id="giuseppe-di-fabbrizio"><first>Giuseppe</first><last>Di Fabbrizio</last></author>
       <author><first>Ankur</first><last>Datta</last></author>
       <pages>969–979</pages>
       <url hash="f2a2e944">E17-1091</url>
@@ -974,7 +974,7 @@
       <author><first>Georgios</first><last>Kontonatsios</last></author>
       <author><first>Tingting</first><last>Mu</last></author>
       <author><first>John Y.</first><last>Goulermas</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
       <pages>991–1001</pages>
       <url hash="662b298b">E17-1093</url>
@@ -984,7 +984,7 @@
     <paper id="94">
       <title><fixed-case>SMART</fixed-case>ies: Sentiment Models for <fixed-case>A</fixed-case>rabic Target entities</title>
       <author><first>Noura</first><last>Farra</last></author>
-      <author><first>Kathy</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathy</first><last>McKeown</last></author>
       <pages>1002–1013</pages>
       <url hash="5d10f530">E17-1094</url>
       <abstract>We consider entity-level sentiment analysis in Arabic, a morphologically rich language with increasing resources. We present a system that is applied to complex posts written in response to Arabic newspaper articles. Our goal is to identify important entity “targets” within the post along with the polarity expressed about each target. We achieve significant improvements over multiple baselines, demonstrating that the use of specific morphological representations improves the performance of identifying both important targets and their sentiment, and that the use of distributional semantic clusters further boosts performances for these representations, especially when richer linguistic resources are not available.</abstract>
@@ -992,7 +992,7 @@
     </paper>
     <paper id="95">
       <title>Exploring Convolutional Neural Networks for Sentiment Analysis of <fixed-case>S</fixed-case>panish tweets</title>
-      <author><first>Isabel</first><last>Segura-Bedmar</last></author>
+      <author id="isabel-segura-bedmar"><first>Isabel</first><last>Segura-Bedmar</last></author>
       <author><first>Antonio</first><last>Quirós</last></author>
       <author><first>Paloma</first><last>Martínez</last></author>
       <pages>1014–1022</pages>
@@ -1003,7 +1003,7 @@
     <paper id="96">
       <title>Contextual Bidirectional Long Short-Term Memory Recurrent Neural Network Language Models: A Generative Approach to Sentiment Analysis</title>
       <author><first>Amr</first><last>Mousa</last></author>
-      <author><first>Björn</first><last>Schuller</last></author>
+      <author id="bjorn-schuller"><first>Björn</first><last>Schuller</last></author>
       <pages>1023–1032</pages>
       <url hash="2b8e170b">E17-1096</url>
       <abstract>Traditional learning-based approaches to sentiment analysis of written text use the concept of bag-of-words or bag-of-n-grams, where a document is viewed as a set of terms or short combinations of terms disregarding grammar rules or word order. Novel approaches de-emphasize this concept and view the problem as a sequence classification problem. In this context, recurrent neural networks (RNNs) have achieved significant success. The idea is to use RNNs as discriminative binary classifiers to predict a positive or negative sentiment label at every word position then perform a type of pooling to get a sentence-level polarity. Here, we investigate a novel generative approach in which a separate probability distribution is estimated for every sentiment using language models (LMs) based on long short-term memory (LSTM) RNNs. We introduce a novel type of LM using a modified version of bidirectional LSTM (BLSTM) called contextual BLSTM (cBLSTM), where the probability of a word is estimated based on its full left and right contexts. Our approach is compared with a BLSTM binary classifier. Significant improvements are observed in classifying the IMDB movie review dataset. Further improvements are achieved via model combination.</abstract>
@@ -1013,7 +1013,7 @@
       <title>Large-scale Opinion Relation Extraction with Distantly Supervised Neural Network</title>
       <author><first>Changzhi</first><last>Sun</last></author>
       <author><first>Yuanbin</first><last>Wu</last></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <author><first>Shiliang</first><last>Sun</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
       <pages>1033–1043</pages>
@@ -1035,7 +1035,7 @@
       <author><first>Jiatao</first><last>Gu</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
       <author><first>Kyunghyun</first><last>Cho</last></author>
-      <author><first>Victor O.K.</first><last>Li</last></author>
+      <author id="victor-o-k-li"><first>Victor O.K.</first><last>Li</last></author>
       <pages>1053–1062</pages>
       <url hash="92d57ae8">E17-1099</url>
       <abstract>Translating in real-time, a.k.a.simultaneous translation, outputs translation words before the input sentence ends, which is a challenging problem for conventional machine translation methods. We propose a neural machine translation (NMT) framework for simultaneous translation in which an agent learns to make decisions on when to translate from the interaction with a pre-trained NMT environment. To trade off quality and delay, we extensively explore various targets for delay and design a method for beam-search applicable in the simultaneous MT setting. Experiments against state-of-the-art baselines on two language pairs demonstrate the efficacy of the proposed framework both quantitatively and qualitatively.</abstract>
@@ -1044,7 +1044,7 @@
     <paper id="100">
       <title>A Multifaceted Evaluation of Neural versus Phrase-Based Machine Translation for 9 Language Directions</title>
       <author><first>Antonio</first><last>Toral</last></author>
-      <author><first>Víctor M.</first><last>Sánchez-Cartagena</last></author>
+      <author id="victor-m-sanchez-cartagena"><first>Víctor M.</first><last>Sánchez-Cartagena</last></author>
       <pages>1063–1073</pages>
       <url hash="de089a13">E17-1100</url>
       <abstract>We aim to shed light on the strengths and weaknesses of the newly introduced neural machine translation paradigm. To that end, we conduct a multifaceted evaluation in which we compare outputs produced by state-of-the-art neural machine translation and phrase-based machine translation systems for 9 language directions across a number of dimensions. Specifically, we measure the similarity of the outputs, their fluency and amount of reordering, the effect of sentence length and performance across different error categories. We find out that translations produced by neural machine translation systems are considerably different, more fluent and more accurate in terms of word order compared to those produced by phrase-based systems. Neural machine translation systems are also more accurate at producing inflected forms, but they perform poorly when translating very long sentences.</abstract>
@@ -1067,7 +1067,7 @@
       <title>Bilingual Lexicon Induction by Learning to Combine Word-Level and Character-Level Representations</title>
       <author><first>Geert</first><last>Heyman</last></author>
       <author><first>Ivan</first><last>Vulić</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>1085–1095</pages>
       <url hash="b315519e">E17-1102</url>
       <abstract>We study the problem of bilingual lexicon induction (BLI) in a setting where some translation resources are available, but unknown translations are sought for certain, possibly domain-specific terminology. We frame BLI as a classification problem for which we design a neural network based classification architecture composed of recurrent long short-term memory and deep feed forward networks. The results show that word- and character-level representations each improve state-of-the-art results for BLI, and the best results are obtained by exploiting the synergy between these word- and character-level representations in the classification model.</abstract>
@@ -1109,7 +1109,7 @@
     <paper id="106">
       <title>Predicting Counselor Behaviors in Motivational Interviewing Encounters</title>
       <author><first>Verónica</first><last>Pérez-Rosas</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <author><first>Kenneth</first><last>Resnicow</last></author>
       <author><first>Satinder</first><last>Singh</last></author>
       <author><first>Lawrence</first><last>An</last></author>
@@ -1122,7 +1122,7 @@
     </paper>
     <paper id="107">
       <title>Authorship Attribution Using Text Distortion</title>
-      <author><first>Efstathios</first><last>Stamatatos</last></author>
+      <author id="efstathios-stamatatos"><first>Efstathios</first><last>Stamatatos</last></author>
       <pages>1138–1149</pages>
       <url hash="6cedf6ed">E17-1107</url>
       <abstract>Authorship attribution is associated with important applications in forensics and humanities research. A crucial point in this field is to quantify the personal style of writing, ideally in a way that is not affected by changes in topic or genre. In this paper, we present a novel method that enhances authorship attribution effectiveness by introducing a text distortion step before extracting stylometric measures. The proposed method attempts to mask topic-specific information that is not related to the personal style of authors. Based on experiments on two main tasks in authorship attribution, closed-set attribution and authorship verification, we demonstrate that the proposed approach can enhance existing methods especially under cross-topic conditions, where the training and test corpora do not match in topic.</abstract>
@@ -1131,7 +1131,7 @@
     <paper id="108">
       <title>Structured Learning for Temporal Relation Extraction from Clinical Records</title>
       <author><first>Artuur</first><last>Leeuwenberg</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>1150–1158</pages>
       <url hash="b5d807cf">E17-1108</url>
       <abstract>We propose a scalable structured learning model that jointly predicts temporal relations between events and temporal expressions (TLINKS), and the relation between these events and the document creation time (DCTR). We employ a structured perceptron, together with integer linear programming constraints for document-level inference during training and prediction to exploit relational properties of temporality, together with global learning of the relations at the document level. Moreover, this study gives insights in the results of integrating constraints for temporal relation extraction when using structured learning and prediction. Our best system outperforms the state-of-the art on both the CONTAINS TLINK task, and the DCTR task.</abstract>
@@ -1142,7 +1142,7 @@
       <author><first>Shweta</first><last>Yadav</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
       <author><first>Sriparna</first><last>Saha</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>1159–1170</pages>
       <url hash="180339e4">E17-1109</url>
       <abstract>Text mining has drawn significant attention in recent past due to the rapid growth in biomedical and clinical records. Entity extraction is one of the fundamental components for biomedical text mining. In this paper, we propose a novel approach of feature selection for entity extraction that exploits the concept of deep learning and Particle Swarm Optimization (PSO). The system utilizes word embedding features along with several other features extracted by studying the properties of the datasets. We obtain an interesting observation that compact word embedding features as determined by PSO are more effective compared to the entire word embedding feature set for entity extraction. The proposed system is evaluated on three benchmark biomedical datasets such as GENIA, GENETAG, and AiMed. The effectiveness of the proposed approach is evident with significant performance gains over the baseline models as well as the other existing systems. We observe improvements of 7.86%, 5.27% and 7.25% F-measure points over the baseline models for GENIA, GENETAG, and AiMed dataset respectively.</abstract>
@@ -1161,7 +1161,7 @@
       <title>Noise Mitigation for Neural Entity Typing and Relation Extraction</title>
       <author><first>Yadollah</first><last>Yaghoobzadeh</last></author>
       <author><first>Heike</first><last>Adel</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>1183–1194</pages>
       <url hash="d91e363c">E17-1111</url>
       <abstract>In this paper, we address two different types of noise in information extraction models: noise from distant supervision and noise from pipeline input features. Our target tasks are entity typing and relation extraction. For the first noise type, we introduce multi-instance multi-label learning algorithms using neural network models, and apply them to fine-grained entity typing for the first time. Our model outperforms the state-of-the-art supervised approach which uses global embeddings of entities. For the second noise type, we propose ways to improve the integration of noisy entity type predictions into relation extraction. Our experiments show that probabilistic predictions are more robust than discrete predictions and that joint training of the two tasks performs best.</abstract>
@@ -1191,8 +1191,8 @@
       <title>A Multi-task Approach to Predict Likability of Books</title>
       <author><first>Suraj</first><last>Maharjan</last></author>
       <author><first>John</first><last>Arevalo</last></author>
-      <author><first>Manuel</first><last>Montes</last></author>
-      <author><first>Fabio A.</first><last>González</last></author>
+      <author id="manuel-montes"><first>Manuel</first><last>Montes</last></author>
+      <author id="fabio-a-gonzalez"><first>Fabio A.</first><last>González</last></author>
       <author><first>Thamar</first><last>Solorio</last></author>
       <pages>1217–1227</pages>
       <url hash="ffe9394f">E17-1114</url>
@@ -1214,7 +1214,7 @@
       <author><first>Debnil</first><last>Sur</last></author>
       <author><first>Luke</first><last>Shrimpton</last></author>
       <author><first>Iain</first><last>Murray</last></author>
-      <author><first>Sharon</first><last>Goldwater</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
       <pages>1239–1248</pages>
       <url hash="30f63e7f">E17-1116</url>
       <abstract>Political surveys have indicated a relationship between a sense of Scottish identity and voting decisions in the 2014 Scottish Independence Referendum. Identity is often reflected in language use, suggesting the intuitive hypothesis that individuals who support Scottish independence are more likely to use distinctively Scottish words than those who oppose it. In the first large-scale study of sociolinguistic variation on social media in the UK, we identify distinctively Scottish terms in a data-driven way, and find that these terms are indeed used at a higher rate by users of pro-independence hashtags than by users of anti-independence hashtags. However, we also find that in general people are less likely to use distinctively Scottish words in tweets with referendum-related hashtags than in their general Twitter activity. We attribute this difference to style shifting relative to audience, aligning with previous work showing that Twitter users tend to use fewer local variants when addressing a broader audience.</abstract>
@@ -1225,9 +1225,9 @@
       <author><first>Adhiguna</first><last>Kuncoro</last></author>
       <author><first>Miguel</first><last>Ballesteros</last></author>
       <author><first>Lingpeng</first><last>Kong</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>1249–1258</pages>
       <url hash="c5bf3168">E17-1117</url>
       <abstract>Recurrent neural network grammars (RNNG) are a recently proposed probablistic generative modeling family for natural language. They show state-of-the-art language modeling and parsing performance. We investigate what information they learn, from a linguistic perspective, through various ablations to the model and the data, and by augmenting the model with an attention mechanism (GA-RNNG) to enable closer inspection. We find that explicit modeling of composition is crucial for achieving the best performance. Through the attention mechanism, we find that headedness plays a central role in phrasal representation (with the model’s latent attention largely agreeing with predictions made by hand-crafted head rules, albeit with some important differences). By training grammars without nonterminal labels, we find that phrasal representations depend minimally on nonterminals, providing support for the endocentricity hypothesis.</abstract>
@@ -1236,7 +1236,7 @@
     <paper id="118">
       <title>Incremental Discontinuous Phrase Structure Parsing with the <fixed-case>GAP</fixed-case> Transition</title>
       <author><first>Maximin</first><last>Coavoux</last></author>
-      <author><first>Benoît</first><last>Crabbé</last></author>
+      <author id="benoit-crabbe"><first>Benoît</first><last>Crabbé</last></author>
       <pages>1259–1270</pages>
       <url hash="e5679dd8">E17-1118</url>
       <abstract>This article introduces a novel transition system for discontinuous lexicalized constituent parsing called SR-GAP. It is an extension of the shift-reduce algorithm with an additional gap transition. Evaluation on two German treebanks shows that SR-GAP outperforms the previous best transition-based discontinuous parser (Maier, 2015) by a large margin (it is notably twice as accurate on the prediction of discontinuous constituents), and is competitive with the state of the art (Fernández-González and Martins, 2015). As a side contribution, we adapt span features (Hall et al., 2014) to discontinuous parsing.</abstract>
@@ -1259,7 +1259,7 @@
       <booktitle>Proceedings of the 15th Conference of the <fixed-case>E</fixed-case>uropean Chapter of the Association for Computational Linguistics: Volume 2, Short Papers</booktitle>
       <url hash="aa8810b7">E17-2</url>
       <editor><first>Mirella</first><last>Lapata</last></editor>
-      <editor><first>Phil</first><last>Blunsom</last></editor>
+      <editor id="phil-blunsom"><first>Phil</first><last>Blunsom</last></editor>
       <editor><first>Alexander</first><last>Koller</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Valencia, Spain</address>
@@ -1275,7 +1275,7 @@
       <title>Multilingual Back-and-Forth Conversion between Content and Function Head for Easy Dependency Parsing</title>
       <author><first>Ryosuke</first><last>Kohita</last></author>
       <author><first>Hiroshi</first><last>Noji</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>1–7</pages>
       <url hash="6d54bba4">E17-2001</url>
       <abstract>Universal Dependencies (UD) is becoming a standard annotation scheme cross-linguistically, but it is argued that this scheme centering on content words is harder to parse than the conventional one centering on function words. To improve the parsability of UD, we propose a back-and-forth conversion algorithm, in which we preprocess the training treebank to increase parsability, and reconvert the parser outputs to follow the UD scheme as a postprocess. We show that this technique consistently improves LAS across languages even with a state-of-the-art parser, in particular on core dependency arcs such as nominal modifier. We also provide an in-depth analysis to understand why our method increases parsability.</abstract>
@@ -1284,11 +1284,11 @@
     <paper id="2">
       <title><fixed-case>URIEL</fixed-case> and lang2vec: Representing languages as typological, geographical, and phylogenetic vectors</title>
       <author><first>Patrick</first><last>Littell</last></author>
-      <author><first>David R.</first><last>Mortensen</last></author>
+      <author id="david-r-mortensen"><first>David R.</first><last>Mortensen</last></author>
       <author><first>Ke</first><last>Lin</last></author>
       <author><first>Katherine</first><last>Kairis</last></author>
       <author><first>Carlisle</first><last>Turner</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
       <pages>8–14</pages>
       <url hash="0482031a">E17-2002</url>
       <abstract>We introduce the URIEL knowledge base for massively multilingual NLP and the lang2vec utility, which provides information-rich vector identifications of languages drawn from typological, geographical, and phylogenetic databases and normalized to have straightforward and consistent formats, naming, and semantics. The goal of URIEL and lang2vec is to enable multilingual NLP, especially on less-resourced languages and make possible types of experiments (especially but not exclusively related to NLP tasks) that are otherwise difficult or impossible due to the sparsity and incommensurability of the data sources. lang2vec vectors have been shown to reduce perplexity in multilingual language modeling, when compared to one-hot language identification vectors.</abstract>
@@ -1306,8 +1306,8 @@
     <paper id="4">
       <title>Robust Training under Linguistic Adversity</title>
       <author><first>Yitong</first><last>Li</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>21–27</pages>
       <url hash="6f7e9d02">E17-2004</url>
       <abstract>Deep neural networks have achieved remarkable results across many language processing tasks, however they have been shown to be susceptible to overfitting and highly sensitive to noise, including adversarial attacks. In this work, we propose a linguistically-motivated approach for training robust models based on exposing the model to corrupted text examples at training time. We consider several flavours of linguistically plausible corruption, include lexical semantic and syntactic methods. Empirically, we evaluate our method with a convolutional neural model across a range of sentiment analysis datasets. Compared with a baseline and the dropout method, our method achieves better overall performance.</abstract>
@@ -1316,7 +1316,7 @@
     <paper id="5">
       <title>Using <fixed-case>T</fixed-case>witter Language to Predict the Real Estate Market</title>
       <author><first>Mohammadzaman</first><last>Zamani</last></author>
-      <author><first>H. Andrew</first><last>Schwartz</last></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last></author>
       <pages>28–33</pages>
       <url hash="5730cc74">E17-2005</url>
       <abstract>We explore whether social media can provide a window into community real estate -foreclosure rates and price changes- beyond that of traditional economic and demographic variables. We find language use in Twitter not only predicts real estate outcomes as well as traditional variables across counties, but that including Twitter language in traditional models leads to a significant improvement (e.g. from Pearson r = :50 to r = :59 for price changes). We overcome the challenge of the relative sparsity and noise in Twitter language variables by showing that training on the residual error of the traditional models leads to more accurate overall assessments. Finally, we discover that it is Twitter language related to business (e.g. ‘company’, ‘marketing’) and technology (e.g. ‘technology’, ‘internet’), among others, that yield predictive power over economics.</abstract>
@@ -1324,7 +1324,7 @@
     </paper>
     <paper id="6">
       <title>Lexical Simplification with Neural Ranking</title>
-      <author><first>Gustavo</first><last>Paetzold</last></author>
+      <author id="gustavo-paetzold"><first>Gustavo</first><last>Paetzold</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>34–40</pages>
       <url hash="8c2e55aa">E17-2006</url>
@@ -1343,7 +1343,7 @@
       <title>Crowd-Sourced Iterative Annotation for Narrative Summarization Corpora</title>
       <author><first>Jessica</first><last>Ouyang</last></author>
       <author><first>Serina</first><last>Chang</last></author>
-      <author><first>Kathy</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathy</first><last>McKeown</last></author>
       <pages>46–51</pages>
       <url hash="8ae8c040">E17-2008</url>
       <abstract>We present an iterative annotation process for producing aligned, parallel corpora of abstractive and extractive summaries for narrative. Our approach uses a combination of trained annotators and crowd-sourcing, allowing us to elicit human-generated summaries and alignments quickly and at low cost. We use crowd-sourcing to annotate aligned phrases with the text-to-text generation techniques needed to transform each phrase into the other. We apply this process to a corpus of 476 personal narratives, which we make available on the Web.</abstract>
@@ -1364,7 +1364,7 @@
       <title>Detecting negation scope is easy, except when it isn’t</title>
       <author><first>Federico</first><last>Fancellu</last></author>
       <author><first>Adam</first><last>Lopez</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <author><first>Hangfeng</first><last>He</last></author>
       <pages>58–63</pages>
       <url hash="54797b3f">E17-2010</url>
@@ -1394,9 +1394,9 @@
     </paper>
     <paper id="13">
       <title>Instances and concepts in distributional space</title>
-      <author><first>Gemma</first><last>Boleda</last></author>
+      <author id="gemma-boleda"><first>Gemma</first><last>Boleda</last></author>
       <author><first>Abhijeet</first><last>Gupta</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <pages>79–85</pages>
       <url hash="622aa9da">E17-2013</url>
       <abstract>Instances (“Mozart”) are ontologically distinct from concepts or classes (“composer”). Natural language encompasses both, but instances have received comparatively little attention in distributional semantics. Our results show that instances and concepts differ in their distributional properties. We also establish that instantiation detection (“Mozart – composer”) is generally easier than hypernymy detection (“chemist – scientist”), and that results on the influence of input representation do not transfer from hyponymy to instantiation.</abstract>
@@ -1404,7 +1404,7 @@
     </paper>
     <paper id="14">
       <title>Is this a Child, a Girl or a Car? Exploring the Contribution of Distributional Similarity to Learning Referential Word Meanings</title>
-      <author><first>Sina</first><last>Zarrieß</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
       <author><first>David</first><last>Schlangen</last></author>
       <pages>86–91</pages>
       <url hash="ce10e0e2">E17-2014</url>
@@ -1456,8 +1456,8 @@
       <title>Context-Aware Prediction of Derivational Word-forms</title>
       <author><first>Ekaterina</first><last>Vylomova</last></author>
       <author><first>Ryan</first><last>Cotterell</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>118–124</pages>
       <url hash="e41bf20d">E17-2019</url>
       <abstract>Derivational morphology is a fundamental and complex characteristic of language. In this paper we propose a new task of predicting the derivational form of a given base-form lemma that is appropriate for a given context. We present an encoder-decoder style neural network to produce a derived form character-by-character, based on its corresponding character-level representation of the base form and the context. We demonstrate that our model is able to generate valid context-sensitive derivations from known base forms, but is less accurate under lexicon agnostic setting.</abstract>
@@ -1486,7 +1486,7 @@
     <paper id="22">
       <title>A Computational Analysis of the Language of Drug Addiction</title>
       <author><first>Carlo</first><last>Strapparava</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>136–142</pages>
       <url hash="0310e0a5">E17-2022</url>
       <abstract>We present a computational analysis of the language of drug users when talking about their drug experiences. We introduce a new dataset of over 4,000 descriptions of experiences reported by users of four main drug types, and show that we can predict with an F1-score of up to 88% the drug behind a certain experience. We also perform an analysis of the dominant psycholinguistic processes and dominant emotions associated with each drug type, which sheds light on the characteristics of drug users.</abstract>
@@ -1522,7 +1522,7 @@
     <paper id="26">
       <title>Identifying beneficial task relations for multi-task learning in deep neural networks</title>
       <author><first>Joachim</first><last>Bingel</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>164–169</pages>
       <url hash="0da6d2d4">E17-2026</url>
       <abstract>Multi-task learning (MTL) in deep neural networks for NLP has recently received increasing interest due to some compelling benefits, including its potential to efficiently regularize models and to reduce the need for labeled data. While it has brought significant improvements in a number of NLP tasks, mixed results have been reported, and little is known about the conditions under which MTL leads to gains in NLP. This paper sheds light on the specific task relations that can lead to gains from MTL models over single-task setups.</abstract>
@@ -1541,7 +1541,7 @@
       <author><first>Ryan</first><last>Cotterell</last></author>
       <author><first>Adam</first><last>Poliak</last></author>
       <author><first>Benjamin</first><last>Van Durme</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>175–181</pages>
       <url hash="f8e2bbcd">E17-2028</url>
       <abstract>The popular skip-gram model induces word embeddings by exploiting the signal from word-context coocurrence. We offer a new interpretation of skip-gram based on exponential family PCA-a form of matrix factorization to generalize the skip-gram model to tensor factorization. In turn, this lets us train embeddings through richer higher-order coocurrences, e.g., triples that include positional information (to incorporate syntax) or morphological information (to share parameters across related words). We experiment on 40 languages and show our model improves upon skip-gram.</abstract>
@@ -1610,7 +1610,7 @@
       <title>Morphological Analysis of the <fixed-case>D</fixed-case>ravidian Language Family</title>
       <author><first>Arun</first><last>Kumar</last></author>
       <author><first>Ryan</first><last>Cotterell</last></author>
-      <author><first>Lluís</first><last>Padró</last></author>
+      <author id="lluis-padro"><first>Lluís</first><last>Padró</last></author>
       <author><first>Antoni</first><last>Oliver</last></author>
       <pages>217–222</pages>
       <url hash="390f4b39">E17-2035</url>
@@ -1619,7 +1619,7 @@
     </paper>
     <paper id="36">
       <title><fixed-case>B</fixed-case>abel<fixed-case>D</fixed-case>omains: Large-Scale Domain Labeling of Lexical Resources</title>
-      <author><first>Jose</first><last>Camacho-Collados</last></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></author>
       <author><first>Roberto</first><last>Navigli</last></author>
       <pages>223–228</pages>
       <url hash="73376b61">E17-2036</url>
@@ -1630,7 +1630,7 @@
       <title><fixed-case>JFLEG</fixed-case>: A Fluency Corpus and Benchmark for Grammatical Error Correction</title>
       <author><first>Courtney</first><last>Napoles</last></author>
       <author><first>Keisuke</first><last>Sakaguchi</last></author>
-      <author><first>Joel</first><last>Tetreault</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
       <pages>229–234</pages>
       <url hash="7a69e260">E17-2037</url>
       <abstract>We present a new parallel corpus, JHU FLuency-Extended GUG corpus (JFLEG) for developing and evaluating grammatical error correction (GEC). Unlike other corpora, it represents a broad range of language proficiency levels and uses holistic fluency edits to not only correct grammatical errors but also make the original text more native sounding. We describe the types of corrections made and benchmark four leading GEC systems on this corpus, identifying specific areas in which they do well and how they can improve. JFLEG fulfills the need for a new gold standard to properly assess the current state of GEC.</abstract>
@@ -1665,9 +1665,9 @@
     </paper>
     <paper id="40">
       <title>Cross-lingual tagger evaluation without test data</title>
-      <author><first>Željko</first><last>Agić</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="zeljko-agic"><first>Željko</first><last>Agić</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>248–253</pages>
       <url hash="ffeb845f">E17-2040</url>
       <abstract>We address the challenge of cross-lingual POS tagger evaluation in absence of manually annotated test data. We put forth and evaluate two dictionary-based metrics. On the tasks of accuracy prediction and system ranking, we reveal that these metrics are reliable enough to approximate test set-based evaluation, and at the same time lean enough to support assessment for truly low-resource languages.</abstract>
@@ -1677,7 +1677,7 @@
       <title>Legal <fixed-case>NERC</fixed-case> with ontologies, <fixed-case>W</fixed-case>ikipedia and curriculum learning</title>
       <author><first>Cristian</first><last>Cardellino</last></author>
       <author><first>Milagro</first><last>Teruel</last></author>
-      <author><first>Laura</first><last>Alonso Alemany</last></author>
+      <author id="laura-alonso-alemany"><first>Laura</first><last>Alonso Alemany</last></author>
       <author><first>Serena</first><last>Villata</last></author>
       <pages>254–259</pages>
       <url hash="de74019e">E17-2041</url>
@@ -1686,7 +1686,7 @@
     </paper>
     <paper id="42">
       <title>The Content Types Dataset: a New Resource to Explore Semantic and Functional Characteristics of Texts</title>
-      <author><first>Rachele</first><last>Sprugnoli</last></author>
+      <author id="rachele-sprugnoli"><first>Rachele</first><last>Sprugnoli</last></author>
       <author><first>Tommaso</first><last>Caselli</last></author>
       <author><first>Sara</first><last>Tonelli</last></author>
       <author><first>Giovanni</first><last>Moretti</last></author>
@@ -1718,9 +1718,9 @@
     </paper>
     <paper id="45">
       <title>Neural vs. Phrase-Based Machine Translation in a Multi-Domain Scenario</title>
-      <author><first>M. Amin</first><last>Farajian</last></author>
+      <author id="m-amin-farajian"><first>M. Amin</first><last>Farajian</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Nicola</first><last>Bertoldi</last></author>
       <author><first>Marcello</first><last>Federico</last></author>
       <pages>280–284</pages>
@@ -1749,7 +1749,7 @@
     <paper id="48">
       <title>To Sing like a Mockingbird</title>
       <author><first>Lorenzo</first><last>Gatti</last></author>
-      <author><first>Gözde</first><last>Özbal</last></author>
+      <author id="gozde-ozbal"><first>Gözde</first><last>Özbal</last></author>
       <author><first>Oliviero</first><last>Stock</last></author>
       <author><first>Carlo</first><last>Strapparava</last></author>
       <pages>298–304</pages>
@@ -1771,7 +1771,7 @@
       <author><first>Daniël</first><last>de Kok</last></author>
       <author><first>Jianqiang</first><last>Ma</last></author>
       <author><first>Corina</first><last>Dima</last></author>
-      <author><first>Erhard</first><last>Hinrichs</last></author>
+      <author id="erhard-hinrichs"><first>Erhard</first><last>Hinrichs</last></author>
       <pages>311–317</pages>
       <url hash="3e96f603">E17-2050</url>
       <abstract>Prepostitional phrase (PP) attachment is a well known challenge to parsing. In this paper, we combine the insights of different works, namely: (1) treating PP attachment as a classification task with an arbitrary number of attachment candidates; (2) using auxiliary distributions to augment the data beyond the hand-annotated training set; (3) using topological fields to get information about the distribution of PP attachment throughout clauses and (4) using state-of-the-art techniques such as word embeddings and neural networks. We show that jointly using these techniques leads to substantial improvements. We also conduct a qualitative analysis to gauge where the ceiling of the task is in a realistic setup.</abstract>
@@ -1789,10 +1789,10 @@
     </paper>
     <paper id="52">
       <title>Joining Hands: Exploiting Monolingual Treebanks for Parsing of Code-mixing Data</title>
-      <author><first>Irshad</first><last>Bhat</last></author>
-      <author><first>Riyaz A.</first><last>Bhat</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
-      <author><first>Dipti</first><last>Sharma</last></author>
+      <author id="irshad-bhat"><first>Irshad</first><last>Bhat</last></author>
+      <author id="riyaz-ahmad-bhat"><first>Riyaz A.</first><last>Bhat</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti</first><last>Sharma</last></author>
       <pages>324–330</pages>
       <url hash="e2770613">E17-2052</url>
       <abstract>In this paper, we propose efficient and less resource-intensive strategies for parsing of code-mixed data. These strategies are not constrained by in-domain annotations, rather they leverage pre-existing monolingual annotated resources for training. We show that these methods can produce significantly better results as compared to an informed baseline. Due to lack of an evaluation set for code-mixed structures, we also present a data set of 450 Hindi and English code-mixed tweets of Hindi multilingual speakers for evaluation.</abstract>
@@ -1801,7 +1801,7 @@
     <paper id="53">
       <title>Multilingual Lexicalized Constituency Parsing with Word-Level Auxiliary Tasks</title>
       <author><first>Maximin</first><last>Coavoux</last></author>
-      <author><first>Benoît</first><last>Crabbé</last></author>
+      <author id="benoit-crabbe"><first>Benoît</first><last>Crabbé</last></author>
       <pages>331–336</pages>
       <url hash="a436d2b1">E17-2053</url>
       <abstract>We introduce a constituency parser based on a bi-LSTM encoder adapted from recent work (Cross and Huang, 2016b; Kiperwasser and Goldberg, 2016), which can incorporate a lower level character biLSTM (Ballesteros et al., 2015; Plank et al., 2016). We model two important interfaces of constituency parsing with auxiliary tasks supervised at the word level: (i) part-of-speech (POS) and morphological tagging, (ii) functional label prediction. On the SPMRL dataset, our parser obtains above state-of-the-art results on constituency parsing without requiring either predicted POS or morphological tags, and outputs labelled dependency trees.</abstract>
@@ -1811,7 +1811,7 @@
       <title>Be Precise or Fuzzy: Learning the Meaning of Cardinals and Quantifiers from Vision</title>
       <author><first>Sandro</first><last>Pezzelle</last></author>
       <author><first>Marco</first><last>Marelli</last></author>
-      <author><first>Raffaella</first><last>Bernardi</last></author>
+      <author id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last></author>
       <pages>337–342</pages>
       <url hash="8c596dd9">E17-2054</url>
       <abstract>People can refer to quantities in a visual scene by using either exact cardinals (e.g. one, two, three) or natural language quantifiers (e.g. few, most, all). In humans, these two processes underlie fairly different cognitive and neural mechanisms. Inspired by this evidence, the present study proposes two models for learning the objective meaning of cardinals and quantifiers from visual scenes containing multiple objects. We show that a model capitalizing on a ‘fuzzy’ measure of similarity is effective for learning quantifiers, whereas the learning of exact cardinals is better accomplished when information about number is provided.</abstract>
@@ -1829,10 +1829,10 @@
     <paper id="56">
       <title>Neural Automatic Post-Editing Using Prior Alignment and Reranking</title>
       <author><first>Santanu</first><last>Pal</last></author>
-      <author><first>Sudip Kumar</first><last>Naskar</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last></author>
       <author><first>Mihaela</first><last>Vela</last></author>
       <author><first>Qun</first><last>Liu</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>349–355</pages>
       <url hash="7847b5b8">E17-2056</url>
       <abstract>We present a second-stage machine translation (MT) system based on a neural machine translation (NMT) approach to automatic post-editing (APE) that improves the translation quality provided by a first-stage MT system. Our APE system (APE_Sym) is an extended version of an attention based NMT model with bilingual symmetry employing bidirectional models, mt–pe and pe–mt. APE translations produced by our system show statistically significant improvements over the first-stage MT, phrase-based APE and the best reported score on the WMT 2016 APE dataset by a previous neural APE system. Re-ranking (APE_Rerank) of the n-best translations from the phrase-based APE and APE_Sym systems provides further substantial improvements over the symmetric neural APE model. Human evaluation confirms that the APE_Rerank generated PE translations improve on the previous best neural APE system at WMT 2016.</abstract>
@@ -1842,10 +1842,10 @@
       <title>Improving Evaluation of Document-level Machine Translation Quality Estimation</title>
       <author><first>Yvette</first><last>Graham</last></author>
       <author><first>Qingsong</first><last>Ma</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Qun</first><last>Liu</last></author>
-      <author><first>Carla</first><last>Parra</last></author>
-      <author><first>Carolina</first><last>Scarton</last></author>
+      <author id="carla-parra-escartin"><first>Carla</first><last>Parra</last></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last></author>
       <pages>356–361</pages>
       <url hash="d91571f4">E17-2057</url>
       <abstract>Meaningful conclusions about the relative performance of NLP systems are only possible if the gold standard employed in a given evaluation is both valid and reliable. In this paper, we explore the validity of human annotations currently employed in the evaluation of document-level quality estimation for machine translation (MT). We demonstrate the degree to which MT system rankings are dependent on weights employed in the construction of the gold standard, before proposing direct human assessment as a valid alternative. Experiments show direct assessment (DA) scores for documents to be highly reliable, achieving a correlation of above 0.9 in a self-replication experiment, in addition to a substantial estimated cost reduction through quality controlled crowd-sourcing. The original gold standard based on post-edits incurs a 10–20 times greater cost than DA.</abstract>
@@ -1854,7 +1854,7 @@
     <paper id="58">
       <title>Neural Machine Translation by Minimising the <fixed-case>B</fixed-case>ayes-risk with Respect to Syntactic Translation Lattices</title>
       <author><first>Felix</first><last>Stahlberg</last></author>
-      <author><first>Adrià</first><last>de Gispert</last></author>
+      <author id="adria-de-gispert"><first>Adrià</first><last>de Gispert</last></author>
       <author><first>Eva</first><last>Hasler</last></author>
       <author id="bill-byrne"><first>Bill</first><last>Byrne</last></author>
       <pages>362–368</pages>
@@ -1866,8 +1866,8 @@
       <title>Producing Unseen Morphological Variants in Statistical Machine Translation</title>
       <author><first>Matthias</first><last>Huck</last></author>
       <author><first>Aleš</first><last>Tamchyna</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>369–375</pages>
       <url hash="d18b23be">E17-2059</url>
       <abstract>Translating into morphologically rich languages is difficult. Although the coverage of lemmas may be reasonable, many morphological variants cannot be learned from the training data. We present a statistical translation system that is able to produce these inflected word forms. Different from most previous work, we do not separate morphological prediction from lexical choice into two consecutive steps. Our approach is novel in that it is integrated in decoding and takes advantage of context information from both the source language and the target language sides.</abstract>
@@ -1886,7 +1886,7 @@
       <author><first>Zichao</first><last>Yang</last></author>
       <author><first>Zhiting</first><last>Hu</last></author>
       <author><first>Yuntian</first><last>Deng</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <author><first>Alex</first><last>Smola</last></author>
       <pages>383–387</pages>
       <url hash="cf0130ae">E17-2061</url>
@@ -1931,7 +1931,7 @@
     <paper id="66">
       <title>Using Word Embedding for Cross-Language Plagiarism Detection</title>
       <author><first>Jérémy</first><last>Ferrero</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <author><first>Didier</first><last>Schwab</last></author>
       <author><first>Frédéric</first><last>Agnès</last></author>
       <pages>415–421</pages>
@@ -1952,9 +1952,9 @@
     <paper id="68">
       <title>Bag of Tricks for Efficient Text Classification</title>
       <author><first>Armand</first><last>Joulin</last></author>
-      <author><first>Edouard</first><last>Grave</last></author>
+      <author id="edouard-grave"><first>Edouard</first><last>Grave</last></author>
       <author><first>Piotr</first><last>Bojanowski</last></author>
-      <author><first>Tomas</first><last>Mikolov</last></author>
+      <author id="tomas-mikolov"><first>Tomas</first><last>Mikolov</last></author>
       <pages>427–431</pages>
       <url hash="0e826023">E17-2068</url>
       <abstract>This paper explores a simple and efficient baseline for text classification. Our experiments show that our fast text classifier fastText is often on par with deep learning classifiers in terms of accuracy, and many orders of magnitude faster for training and evaluation. We can train fastText on more than one billion words in less than ten minutes using a standard multicore CPU, and classify half a million sentences among 312K classes in less than a minute.</abstract>
@@ -1975,7 +1975,7 @@
       <author><first>Nitin</first><last>Ramrakhiyani</last></author>
       <author><first>Sachin</first><last>Pawar</last></author>
       <author><first>Swapnil</first><last>Hingmire</last></author>
-      <author><first>Girish</first><last>Palshikar</last></author>
+      <author id="girish-palshikar"><first>Girish</first><last>Palshikar</last></author>
       <pages>437–442</pages>
       <url hash="71fe0ec8">E17-2070</url>
       <abstract>Measuring topic quality is essential for scoring the learned topics and their subsequent use in Information Retrieval and Text classification. To measure quality of Latent Dirichlet Allocation (LDA) based topics learned from text, we propose a novel approach based on grouping of topic words into buckets (TBuckets). A single large bucket signifies a single coherent theme, in turn indicating high topic coherence. TBuckets uses word embeddings of topic words and employs singular value decomposition (SVD) and Integer Linear Programming based optimization to create coherent word buckets. TBuckets outperforms the state-of-the-art techniques when evaluated using 3 publicly available datasets and on another one proposed in this paper.</abstract>
@@ -1986,7 +1986,7 @@
       <author><first>Shiou Tian</first><last>Hsu</last></author>
       <author><first>Changsung</first><last>Moon</last></author>
       <author><first>Paul</first><last>Jones</last></author>
-      <author><first>Nagiza</first><last>Samatova</last></author>
+      <author id="nagiza-samatova"><first>Nagiza</first><last>Samatova</last></author>
       <pages>443–449</pages>
       <url hash="1e235c3a">E17-2071</url>
       <abstract>The success of sentence classification often depends on understanding both the syntactic and semantic properties of word-phrases. Recent progress on this task has been based on exploiting the grammatical structure of sentences but often this structure is difficult to parse and noisy. In this paper, we propose a structure-independent ‘Gated Representation Alignment’ (GRA) model that blends a phrase-focused Convolutional Neural Network (CNN) approach with sequence-oriented Recurrent Neural Network (RNN). Our novel alignment mechanism allows the RNN to selectively include phrase information in a word-by-word sentence representation, and to do this without awareness of the syntactic structure. An empirical evaluation of GRA shows higher prediction accuracy (up to 4.6%) of fine-grained sentiment ratings, when compared to other structure-independent baselines. We also show comparable results to several structure-dependent methods. Finally, we analyzed the effect of our alignment mechanism and found that this is critical to the effectiveness of the CNN-RNN hybrid.</abstract>
@@ -2027,7 +2027,7 @@
     <paper id="75">
       <title>A Copy-Augmented Sequence-to-Sequence Architecture Gives Good Performance on Task-Oriented Dialogue</title>
       <author><first>Mihail</first><last>Eric</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <pages>468–473</pages>
       <url hash="6e9ec070">E17-2075</url>
       <abstract>Task-oriented dialogue focuses on conversational agents that participate in dialogues with user goals on domain-specific topics. In contrast to chatbots, which simply seek to sustain open-ended meaningful discourse, existing task-oriented agents usually explicitly model user intent and belief states. This paper examines bypassing such an explicit representation by depending on a latent neural embedding of state and learning selective attention to dialogue history together with copying to incorporate relevant prior context. We complement recent work by showing the effectiveness of simple sequence-to-sequence neural architectures with a copy mechanism. Our model outperforms more complex memory-augmented models by 7% in per-response generation and is on par with the current state-of-the-art on DSTC2, a real-world task-oriented dialogue dataset.</abstract>
@@ -2038,7 +2038,7 @@
       <author><first>Sameer</first><last>Bansal</last></author>
       <author><first>Herman</first><last>Kamper</last></author>
       <author><first>Adam</first><last>Lopez</last></author>
-      <author><first>Sharon</first><last>Goldwater</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
       <pages>474–479</pages>
       <url hash="fb120902">E17-2076</url>
       <abstract>We explore the problem of translating speech to text in low-resource scenarios where neither automatic speech recognition (ASR) nor machine translation (MT) are available, but we have training data in the form of audio paired with text translations. We present the first system for this problem applied to a realistic multi-speaker dataset, the CALLHOME Spanish-English speech translation corpus. Our approach uses unsupervised term discovery (UTD) to cluster repeated patterns in the audio, creating a pseudotext, which we pair with translations to create a parallel text and train a simple bag-of-words MT model. We identify the challenges faced by the system, finding that the difficulty of cross-speaker UTD results in low recall, but that our system is still able to correctly translate some content words in test data.</abstract>
@@ -2091,7 +2091,7 @@
       <title>Efficient, Compositional, Order-sensitive n-gram Embeddings</title>
       <author><first>Adam</first><last>Poliak</last></author>
       <author><first>Pushpendre</first><last>Rastogi</last></author>
-      <author><first>M. Patrick</first><last>Martin</last></author>
+      <author id="m-patrick-martin"><first>M. Patrick</first><last>Martin</last></author>
       <author><first>Benjamin</first><last>Van Durme</last></author>
       <pages>503–508</pages>
       <url hash="6a3da8bf">E17-2081</url>
@@ -2101,7 +2101,7 @@
     <paper id="82">
       <title>Integrating Semantic Knowledge into Lexical Embeddings Based on Information Content Measurement</title>
       <author><first>Hsin-Yang</first><last>Wang</last></author>
-      <author><first>Wei-Yun</first><last>Ma</last></author>
+      <author id="wei-yun-ma"><first>Wei-Yun</first><last>Ma</last></author>
       <pages>509–515</pages>
       <url hash="f143f7e4">E17-2082</url>
       <abstract>Distributional word representations are widely used in NLP tasks. These representations are based on an assumption that words with a similar context tend to have a similar meaning. To improve the quality of the context-based embeddings, many researches have explored how to make full use of existing lexical resources. In this paper, we argue that while we incorporate the prior knowledge with context-based embeddings, words with different occurrences should be treated differently. Therefore, we propose to rely on the measurement of information content to control the degree of applying prior knowledge into context-based embeddings - different words would have different learning rates when adjusting their embeddings. In the result, we demonstrate that our embeddings get significant improvements on two different tasks: Word Similarity and Analogical Reasoning.</abstract>
@@ -2110,7 +2110,7 @@
     <paper id="83">
       <title>Improving Neural Knowledge Base Completion with Cross-Lingual Projections</title>
       <author><first>Patrick</first><last>Klein</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <author><first>Goran</first><last>Glavaš</last></author>
       <pages>516–522</pages>
       <url hash="3518bd5d">E17-2083</url>
@@ -2132,7 +2132,7 @@
       <author><first>Julie</first><last>Weeds</last></author>
       <author><first>Thomas</first><last>Kober</last></author>
       <author><first>Jeremy</first><last>Reffin</last></author>
-      <author><first>David</first><last>Weir</last></author>
+      <author id="david-weir"><first>David</first><last>Weir</last></author>
       <pages>529–534</pages>
       <url hash="b6430d35">E17-2085</url>
       <abstract>Non-compositional phrases such as <i>red herring</i> and weakly compositional phrases such as <i>spelling bee</i> are an integral part of natural language (Sag, 2002). They are also the phrases that are difficult, or even impossible, for good compositional distributional models of semantics. Compositionality detection therefore provides a good testbed for compositional methods. We compare an integrated compositional distributional approach, using sparse high dimensional representations, with the ad-hoc compositional approach of applying simple composition operations to state-of-the-art neural embeddings.</abstract>
@@ -2141,7 +2141,7 @@
     <paper id="86">
       <title>Applying Multi-Sense Embeddings for <fixed-case>G</fixed-case>erman Verbs to Determine Semantic Relatedness and to Detect Non-Literal Language</title>
       <author><first>Maximilian</first><last>Köper</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>535–542</pages>
       <url hash="1c6d8693">E17-2086</url>
       <abstract>Up to date, the majority of computational models still determines the semantic relatedness between words (or larger linguistic units) on the type level. In this paper, we compare and extend multi-sense embeddings, in order to model and utilise word senses on the token level. We focus on the challenging class of complex verbs, and evaluate the model variants on various semantic tasks: semantic classification; predicting compositionality; and detecting non-literal language usage. While there is no overall best model, all models significantly outperform a word2vec single-sense skip baseline, thus demonstrating the need to distinguish between word senses in a distributional semantic model.</abstract>
@@ -2150,8 +2150,8 @@
     <paper id="87">
       <title>Negative Sampling Improves Hypernymy Extraction Based on Projection Learning</title>
       <author><first>Dmitry</first><last>Ustalov</last></author>
-      <author><first>Nikolay</first><last>Arefyev</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="nikolay-arefyev"><first>Nikolay</first><last>Arefyev</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <author><first>Alexander</first><last>Panchenko</last></author>
       <pages>543–550</pages>
       <url hash="56b46ff5">E17-2087</url>
@@ -2161,7 +2161,7 @@
     <paper id="88">
       <title>A Dataset for Multi-Target Stance Detection</title>
       <author><first>Parinaz</first><last>Sobhani</last></author>
-      <author><first>Diana</first><last>Inkpen</last></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last></author>
       <author><first>Xiaodan</first><last>Zhu</last></author>
       <pages>551–557</pages>
       <url hash="d2fae70b">E17-2088</url>
@@ -2181,8 +2181,8 @@
     <paper id="90">
       <title>Predicting Emotional Word Ratings using Distributional Representations and Signed Clustering</title>
       <author><first>João</first><last>Sedoc</last></author>
-      <author><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="daniel-preotiuc-pietro"><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <pages>564–571</pages>
       <url hash="e5e3dda5">E17-2090</url>
       <abstract>Inferring the emotional content of words is important for text-based sentiment analysis, dialogue systems and psycholinguistics, but word ratings are expensive to collect at scale and across languages or domains. We develop a method that automatically extends word-level ratings to unrated words using signed clustering of vector space word representations along with affect ratings. We use our method to determine a word’s valence and arousal, which determine its position on the circumplex model of affect, the most popular dimensional model of emotion. Our method achieves superior out-of-sample word rating prediction on both affective dimensions across three different languages when compared to state-of-the-art word similarity based methods. Our method can assist building word ratings for new languages and improve downstream tasks such as sentiment analysis and emotion detection.</abstract>
@@ -2199,7 +2199,7 @@
     </paper>
     <paper id="92">
       <title><fixed-case>E</fixed-case>mo<fixed-case>B</fixed-case>ank: Studying the Impact of Annotation Perspective and Representation Format on Dimensional Emotion Analysis</title>
-      <author><first>Sven</first><last>Buechel</last></author>
+      <author id="sven-buechel"><first>Sven</first><last>Buechel</last></author>
       <author><first>Udo</first><last>Hahn</last></author>
       <pages>578–585</pages>
       <url hash="c5365d3a">E17-2092</url>
@@ -2218,7 +2218,7 @@
     <paper id="94">
       <title>Ranking Convolutional Recurrent Neural Networks for Purchase Stage Identification on Imbalanced <fixed-case>T</fixed-case>witter Data</title>
       <author><first>Heike</first><last>Adel</last></author>
-      <author><first>Francine</first><last>Chen</last></author>
+      <author id="francine-chen"><first>Francine</first><last>Chen</last></author>
       <author><first>Yan-Ying</first><last>Chen</last></author>
       <pages>592–598</pages>
       <url hash="28daf44d">E17-2094</url>
@@ -2238,7 +2238,7 @@
     <paper id="96">
       <title>Reranking Translation Candidates Produced by Several Bilingual Word Similarity Sources</title>
       <author><first>Laurent</first><last>Jakubina</last></author>
-      <author><first>Phillippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Phillippe</first><last>Langlais</last></author>
       <pages>605–611</pages>
       <url hash="7bd5e73c">E17-2096</url>
       <abstract>We investigate the reranking of the output of several distributional approaches on the Bilingual Lexicon Induction task. We show that reranking an n-best list produced by any of those approaches leads to very substantial improvements. We further demonstrate that combining several n-best lists by reranking is an effective way of further boosting performance.</abstract>
@@ -2265,9 +2265,9 @@
     </paper>
     <paper id="99">
       <title>Addressing Problems across Linguistic Levels in <fixed-case>SMT</fixed-case>: Combining Approaches to Model Morphology, Syntax and Lexical Choice</title>
-      <author><first>Marion</first><last>Weller-Di Marco</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="marion-weller-di-marco"><first>Marion</first><last>Weller-Di Marco</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>625–630</pages>
       <url hash="c012935f">E17-2099</url>
       <abstract>Many errors in phrase-based SMT can be attributed to problems on three linguistic levels: morphological complexity in the target language, structural differences and lexical choice. We explore combinations of linguistically motivated approaches to address these problems in English-to-German SMT and show that they are complementary to one another, but also that the popular verbal pre-ordering can cause problems on the morphological and lexical level. A discriminative classifier can overcome these problems, in particular when enriching standard lexical features with features geared towards verbal inflection.</abstract>
@@ -2275,9 +2275,9 @@
     </paper>
     <paper id="100">
       <title>Machine Translation of <fixed-case>S</fixed-case>panish Personal and Possessive Pronouns Using Anaphora Probabilities</title>
-      <author><first>Ngoc Quang</first><last>Luong</last></author>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
-      <author><first>Annette</first><last>Rios Gonzales</last></author>
+      <author id="ngoc-quang-luong"><first>Ngoc Quang</first><last>Luong</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="annette-rios-gonzales"><first>Annette</first><last>Rios Gonzales</last></author>
       <author><first>Don</first><last>Tuggener</last></author>
       <pages>631–636</pages>
       <url hash="0b7f627f">E17-2100</url>
@@ -2300,7 +2300,7 @@
     <paper id="102">
       <title>Continuous multilinguality with language vectors</title>
       <author><first>Robert</first><last>Östling</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>644–649</pages>
       <url hash="e687c26d">E17-2102</url>
       <abstract>Most existing models for multilingual natural language processing (NLP) treat language as a discrete category, and make predictions for either one language or the other. In contrast, we propose using continuous vector representations of language. We show that these can be learned efficiently with a character-based neural language model, and used to improve inference about language varieties not seen during training. In experiments with 1303 Bible translations into 990 different languages, we empirically explore the capacity of multilingual language models, and also show that the language vectors capture genetic relationships between languages.</abstract>
@@ -2310,7 +2310,7 @@
       <title>Unsupervised Training for Large Vocabulary Translation Using Sparse Lexicon and Word Classes</title>
       <author><first>Yunsu</first><last>Kim</last></author>
       <author><first>Julian</first><last>Schamper</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>650–656</pages>
       <url hash="aa00ab22">E17-2103</url>
       <abstract>We address for the first time unsupervised training for a translation task with hundreds of thousands of vocabulary words. We scale up the expectation-maximization (EM) algorithm to learn a large translation table without any parallel text or seed lexicon. First, we solve the memory bottleneck and enforce the sparsity with a simple thresholding scheme for the lexicon. Second, we initialize the lexicon training with word classes, which efficiently boosts the performance. Our methods produced promising results on two large-scale unsupervised translation tasks.</abstract>
@@ -2318,7 +2318,7 @@
     </paper>
     <paper id="104">
       <title>Co-reference Resolution of Elided Subjects and Possessive Pronouns in <fixed-case>S</fixed-case>panish-<fixed-case>E</fixed-case>nglish Statistical Machine Translation</title>
-      <author><first>Annette</first><last>Rios Gonzales</last></author>
+      <author id="annette-rios-gonzales"><first>Annette</first><last>Rios Gonzales</last></author>
       <author><first>Don</first><last>Tuggener</last></author>
       <pages>657–662</pages>
       <url hash="a696c26b">E17-2104</url>
@@ -2330,7 +2330,7 @@
       <author><first>Yandi</first><last>Xia</last></author>
       <author><first>Aaron</first><last>Levine</last></author>
       <author><first>Pradipto</first><last>Das</last></author>
-      <author><first>Giuseppe</first><last>Di Fabbrizio</last></author>
+      <author id="giuseppe-di-fabbrizio"><first>Giuseppe</first><last>Di Fabbrizio</last></author>
       <author><first>Keiji</first><last>Shinzato</last></author>
       <author><first>Ankur</first><last>Datta</last></author>
       <pages>663–668</pages>
@@ -2342,8 +2342,8 @@
       <title>Convolutional Neural Networks for Authorship Attribution of Short Texts</title>
       <author><first>Prasha</first><last>Shrestha</last></author>
       <author><first>Sebastian</first><last>Sierra</last></author>
-      <author><first>Fabio</first><last>González</last></author>
-      <author><first>Manuel</first><last>Montes</last></author>
+      <author id="fabio-a-gonzalez"><first>Fabio</first><last>González</last></author>
+      <author id="manuel-montes"><first>Manuel</first><last>Montes</last></author>
       <author><first>Paolo</first><last>Rosso</last></author>
       <author><first>Thamar</first><last>Solorio</last></author>
       <pages>669–674</pages>
@@ -2356,7 +2356,7 @@
       <author><first>Yinfei</first><last>Yang</last></author>
       <author><first>Cen</first><last>Chen</last></author>
       <author><first>Minghui</first><last>Qiu</last></author>
-      <author><first>Forrest</first><last>Bao</last></author>
+      <author id="forrest-bao"><first>Forrest</first><last>Bao</last></author>
       <pages>675–680</pages>
       <url hash="4b848305">E17-2107</url>
       <abstract>Aspect extraction abstracts the common properties of objects from corpora discussing them, such as reviews of products. Recent work on aspect extraction is leveraging the hierarchical relationship between products and their categories. However, such effort focuses on the aspects of child categories but ignores those from parent categories. Hence, we propose an LDA-based generative topic model inducing the two-layer categorical information (CAT-LDA), to balance the aspects of both a parent category and its child categories. Our hypothesis is that child categories inherit aspects from parent categories, controlled by the hierarchy between them. Experimental results on 5 categories of Amazon.com products show that both common aspects of parent category and the individual aspects of sub-categories can be extracted to align well with the common sense. We further evaluate the manually extracted aspects of 16 products, resulting in an average hit rate of 79.10%.</abstract>
@@ -2364,7 +2364,7 @@
     </paper>
     <paper id="108">
       <title>On the Relevance of Syntactic and Discourse Features for Author Profiling and Identification</title>
-      <author><first>Juan</first><last>Soler-Company</last></author>
+      <author id="juan-soler-company"><first>Juan</first><last>Soler-Company</last></author>
       <author><first>Leo</first><last>Wanner</last></author>
       <pages>681–687</pages>
       <url hash="59224b48">E17-2108</url>
@@ -2375,7 +2375,7 @@
       <title>Unsupervised Cross-Lingual Scaling of Political Texts</title>
       <author><first>Goran</first><last>Glavaš</last></author>
       <author><first>Federico</first><last>Nanni</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <pages>688–693</pages>
       <url hash="73a998cc">E17-2109</url>
       <abstract>Political text scaling aims to linearly order parties and politicians across political dimensions (e.g., left-to-right ideology) based on textual content (e.g., politician speeches or party manifestos). Existing models scale texts based on relative word usage and cannot be used for cross-lingual analyses. Additionally, there is little quantitative evidence that the output of these models correlates with common political dimensions like left-to-right orientation. Experimental results show that the semantically-informed scaling models better predict the party positions than the existing word-based models in two different political dimensions. Furthermore, the proposed models exhibit no drop in performance in the cross-lingual compared to monolingual setting.</abstract>
@@ -2393,10 +2393,10 @@
     </paper>
     <paper id="111">
       <title>Multimodal Topic Labelling</title>
-      <author><first>Ionut</first><last>Sorodoc</last></author>
+      <author id="ionut-sorodoc"><first>Ionut</first><last>Sorodoc</last></author>
       <author><first>Jey Han</first><last>Lau</last></author>
       <author><first>Nikolaos</first><last>Aletras</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>701–706</pages>
       <url hash="937a9d0f">E17-2111</url>
       <abstract>Topics generated by topic models are typically presented as a list of topic terms. Automatic topic labelling is the task of generating a succinct label that summarises the theme or subject of a topic, with the intention of reducing the cognitive load of end-users when interpreting these topics. Traditionally, topic label systems focus on a single label modality, e.g. textual labels. In this work we propose a multimodal approach to topic labelling using a simple feedforward neural network. Given a topic and a candidate image or textual label, our method automatically generates a rating for the label, relative to the topic. Experiments show that this multimodal approach outperforms single-modality topic labelling systems.</abstract>
@@ -2405,7 +2405,7 @@
     <paper id="112">
       <title>Detecting (Un)Important Content for Single-Document News Summarization</title>
       <author><first>Yinfei</first><last>Yang</last></author>
-      <author><first>Forrest</first><last>Bao</last></author>
+      <author id="forrest-bao"><first>Forrest</first><last>Bao</last></author>
       <author><first>Ani</first><last>Nenkova</last></author>
       <pages>707–712</pages>
       <url hash="28c45c10">E17-2112</url>
@@ -2454,7 +2454,7 @@
       <author><first>Julien</first><last>Tourille</last></author>
       <author><first>Olivier</first><last>Ferret</last></author>
       <author><first>Xavier</first><last>Tannier</last></author>
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <pages>739–745</pages>
       <url hash="bbda1185">E17-2117</url>
       <abstract>In this paper, we present a method for temporal relation extraction from clinical narratives in French and in English. We experiment on two comparable corpora, the MERLOT corpus and the THYME corpus, and show that a common approach can be used for both languages.</abstract>
@@ -2463,7 +2463,7 @@
     <paper id="118">
       <title>Neural Temporal Relation Extraction</title>
       <author><first>Dmitriy</first><last>Dligach</last></author>
-      <author><first>Timothy</first><last>Miller</last></author>
+      <author id="timothy-miller"><first>Timothy</first><last>Miller</last></author>
       <author><first>Chen</first><last>Lin</last></author>
       <author><first>Steven</first><last>Bethard</last></author>
       <author><first>Guergana</first><last>Savova</last></author>
@@ -2476,7 +2476,7 @@
       <title>End-to-End Trainable Attentive Decoder for Hierarchical Entity Classification</title>
       <author><first>Sanjeev Kumar</first><last>Karn</last></author>
       <author><first>Ulli</first><last>Waltinger</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>752–758</pages>
       <url hash="10d48bea">E17-2119</url>
       <abstract>We address fine-grained entity classification and propose a novel attention-based recurrent neural network (RNN) encoder-decoder that generates paths in the type hierarchy and can be trained end-to-end. We show that our model performs better on fine-grained entity classification than prior work that relies on flat or local classifiers that do not directly model hierarchical structure.</abstract>
@@ -2497,8 +2497,8 @@
     <meta>
       <booktitle>Proceedings of the Software Demonstrations of the 15th Conference of the <fixed-case>E</fixed-case>uropean Chapter of the Association for Computational Linguistics</booktitle>
       <url hash="e7754cd9">E17-3</url>
-      <editor><first>André</first><last>Martins</last></editor>
-      <editor><first>Anselmo</first><last>Peñas</last></editor>
+      <editor id="andre-f-t-martins"><first>André</first><last>Martins</last></editor>
+      <editor id="anselmo-penas"><first>Anselmo</first><last>Peñas</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Valencia, Spain</address>
       <month>April</month>
@@ -2511,7 +2511,7 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>COVER</fixed-case>: Covering the Semantically Tractable Questions</title>
-      <author><first>Michael</first><last>Minock</last></author>
+      <author id="michael-minock"><first>Michael</first><last>Minock</last></author>
       <pages>1–4</pages>
       <url hash="15a9b77e">E17-3001</url>
       <abstract>In semantic parsing, natural language questions map to expressions in a meaning representation language (MRL) over some fixed vocabulary of predicates. To do this reliably, one must guarantee that for a wide class of natural language questions (the so called semantically tractable questions), correct interpretations are always in the mapped set of possibilities. In this demonstration, we introduce the system COVER which significantly clarifies, revises and extends the basic notion of semantic tractability. COVER achieves coverage of 89% while the earlier PRECISE system achieved coverage of 77% on the well known GeoQuery corpus. Like PRECISE, COVER requires only a simple domain lexicon and integrates off-the-shelf syntactic parsers. Beyond PRECISE, COVER also integrates off-the-shelf theorem provers to provide more accurate results. COVER is written in Python and uses the NLTK.</abstract>
@@ -2526,10 +2526,10 @@
       <author><first>Renlong</first><last>Ai</last></author>
       <author><first>Stephan</first><last>Busemann</last></author>
       <author><first>Jon</first><last>Dehdari</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <author><first>Georg</first><last>Heigold</last></author>
       <author><first>Nils</first><last>Rethmeier</last></author>
-      <author><first>Raphael</first><last>Rubino</last></author>
+      <author id="raphael-rubino"><first>Raphael</first><last>Rubino</last></author>
       <author><first>Sven</first><last>Schmeier</last></author>
       <author><first>Philippe</first><last>Thomas</last></author>
       <author><first>He</first><last>Wang</last></author>
@@ -2551,7 +2551,7 @@
       <title><fixed-case>WAT</fixed-case>-<fixed-case>SL</fixed-case>: A Customizable Web Annotation Tool for Segment Labeling</title>
       <author><first>Johannes</first><last>Kiesel</last></author>
       <author><first>Henning</first><last>Wachsmuth</last></author>
-      <author><first>Khalid</first><last>Al-Khatib</last></author>
+      <author id="khalid-al-khatib"><first>Khalid</first><last>Al-Khatib</last></author>
       <author><first>Benno</first><last>Stein</last></author>
       <pages>13–16</pages>
       <url hash="08698d8a">E17-3004</url>
@@ -2605,8 +2605,8 @@
     </paper>
     <paper id="9">
       <title><fixed-case>CASSANDRA</fixed-case>: A multipurpose configurable voice-enabled human-computer-interface</title>
-      <author><first>Tiberiu</first><last>Boros</last></author>
-      <author><first>Stefan Daniel</first><last>Dumitrescu</last></author>
+      <author id="tiberiu-boros"><first>Tiberiu</first><last>Boros</last></author>
+      <author id="stefan-daniel-dumitrescu"><first>Stefan Daniel</first><last>Dumitrescu</last></author>
       <author><first>Sonia</first><last>Pipa</last></author>
       <pages>33–36</pages>
       <url hash="f53bafdb">E17-3009</url>
@@ -2662,7 +2662,7 @@
     </paper>
     <paper id="15">
       <title>The ar<fixed-case>T</fixed-case>ext prototype: An automatic system for writing specialized texts</title>
-      <author><first>Iria</first><last>da Cunha</last></author>
+      <author id="iria-da-cunha"><first>Iria</first><last>da Cunha</last></author>
       <author><first>M. Amor</first><last>Montané</last></author>
       <author><first>Luis</first><last>Hysa</last></author>
       <pages>57–60</pages>
@@ -2680,7 +2680,7 @@
       <author><first>Ahmed</first><last>Abdelali</last></author>
       <author><first>Hamdy</first><last>Mubarak</last></author>
       <author><first>Ahmed</first><last>Ali</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>61–64</pages>
       <url hash="735856f5">E17-3016</url>
       <abstract>This paper presents QCRI’s Arabic-to-English live speech translation system. It features modern web technologies to capture live audio, and broadcasts Arabic transcriptions and English translations simultaneously. Our Kaldi-based ASR system uses the Time Delay Neural Network (TDNN) architecture, while our Machine Translation (MT) system uses both phrase-based and neural frameworks. Although our neural MT system is slower than the phrase-based system, it produces significantly better translations and is memory efficient. The demo is available at <url>https://st.qcri.org/demos/livetranslation</url>.</abstract>
@@ -2696,9 +2696,9 @@
       <author><first>Julian</first><last>Hitschler</last></author>
       <author><first>Marcin</first><last>Junczys-Dowmunt</last></author>
       <author><first>Samuel</first><last>Läubli</last></author>
-      <author><first>Antonio Valerio</first><last>Miceli Barone</last></author>
+      <author id="antonio-valerio-miceli-barone"><first>Antonio Valerio</first><last>Miceli Barone</last></author>
       <author><first>Jozef</first><last>Mokry</last></author>
-      <author><first>Maria</first><last>Nădejde</last></author>
+      <author id="maria-nadejde"><first>Maria</first><last>Nădejde</last></author>
       <pages>65–68</pages>
       <url hash="488e8266">E17-3017</url>
       <abstract>We present Nematus, a toolkit for Neural Machine Translation. The toolkit prioritizes high translation accuracy, usability, and extensibility. Nematus has been used to build top-performing submissions to shared translation tasks at WMT and IWSLT, and has been used to train systems for production environments.</abstract>
@@ -2718,7 +2718,7 @@
     </paper>
     <paper id="19">
       <title><fixed-case>L</fixed-case>ingmotif: Sentiment Analysis for the Digital Humanities</title>
-      <author><first>Antonio</first><last>Moreno-Ortiz</last></author>
+      <author id="antonio-moreno-ortiz"><first>Antonio</first><last>Moreno-Ortiz</last></author>
       <pages>73–76</pages>
       <url hash="a49328f7">E17-3019</url>
       <abstract>Lingmotif is a lexicon-based, linguistically-motivated, user-friendly, GUI-enabled, multi-platform, Sentiment Analysis desktop application. Lingmotif can perform SA on any type of input texts, regardless of their length and topic. The analysis is based on the identification of sentiment-laden words and phrases contained in the application’s rich core lexicons, and employs context rules to account for sentiment shifters. It offers easy-to-interpret visual representations of quantitative data (text polarity, sentiment intensity, sentiment profile), as well as a detailed, qualitative analysis of the text in terms of its sentiment. Lingmotif can also take user-provided plugin lexicons in order to account for domain-specific sentiment expression. Lingmotif currently analyzes English and Spanish texts.</abstract>
@@ -2727,7 +2727,7 @@
     <paper id="20">
       <title><fixed-case>RAMBLE</fixed-case> <fixed-case>ON</fixed-case>: Tracing Movements of Popular Historical Figures</title>
       <author><first>Stefano</first><last>Menini</last></author>
-      <author><first>Rachele</first><last>Sprugnoli</last></author>
+      <author id="rachele-sprugnoli"><first>Rachele</first><last>Sprugnoli</last></author>
       <author><first>Giovanni</first><last>Moretti</last></author>
       <author><first>Enrico</first><last>Bignotti</last></author>
       <author><first>Sara</first><last>Tonelli</last></author>
@@ -2817,7 +2817,7 @@
       <title>The <fixed-case>SUMMA</fixed-case> Platform Prototype</title>
       <author><first>Renars</first><last>Liepins</last></author>
       <author><first>Ulrich</first><last>Germann</last></author>
-      <author><first>Guntis</first><last>Barzdins</last></author>
+      <author id="guntis-barzdins"><first>Guntis</first><last>Barzdins</last></author>
       <author><first>Alexandra</first><last>Birch</last></author>
       <author><first>Steve</first><last>Renals</last></author>
       <author><first>Susanne</first><last>Weber</last></author>
@@ -2827,11 +2827,11 @@
       <author><first>Ondřej</first><last>Klejch</last></author>
       <author><first>Peter</first><last>Bell</last></author>
       <author><first>Alexandros</first><last>Lazaridis</last></author>
-      <author><first>Alfonso</first><last>Mendes</last></author>
+      <author id="alfonso-mendes"><first>Alfonso</first><last>Mendes</last></author>
       <author><first>Sebastian</first><last>Riedel</last></author>
       <author><first>Mariana S. C.</first><last>Almeida</last></author>
-      <author><first>Pedro</first><last>Balage</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="pedro-balage-filho"><first>Pedro</first><last>Balage</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <author><first>Tomasz</first><last>Dwojak</last></author>
       <author><first>Philip N.</first><last>Garner</last></author>
       <author><first>Andreas</first><last>Giefer</last></author>
@@ -2840,8 +2840,8 @@
       <author><first>David</first><last>Nogueira</last></author>
       <author><first>Ahmed</first><last>Ali</last></author>
       <author><first>Sebastião</first><last>Miranda</last></author>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
-      <author><first>Lesly</first><last>Miculicich Werlen</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="lesly-miculicich-werlen"><first>Lesly</first><last>Miculicich Werlen</last></author>
       <author><first>Nikos</first><last>Papasarantopoulos</last></author>
       <author><first>Abiola</first><last>Obamuyide</last></author>
       <author><first>Clive</first><last>Jones</last></author>
@@ -2857,7 +2857,7 @@
       <author><first>Sameer</first><last>Khurana</last></author>
       <author><first>Ahmed</first><last>Abdelali</last></author>
       <author><first>Hassan</first><last>Sajjad</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <author><first>David</first><last>Sheppey</last></author>
       <author><first>Chris</first><last>Hernon</last></author>
       <author><first>Jeff</first><last>Mitchell</last></author>
@@ -2874,7 +2874,7 @@
       <editor><first>Florian</first><last>Kunneman</last></editor>
       <editor><first>Uxoa</first><last>Iñurrieta</last></editor>
       <editor><first>John J.</first><last>Camilleri</last></editor>
-      <editor><first>Mariona Coll</first><last>Ardanuy</last></editor>
+      <editor id="mariona-coll-ardanuy"><first>Mariona Coll</first><last>Ardanuy</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Valencia, Spain</address>
       <month>April</month>
@@ -2913,7 +2913,7 @@
     <paper id="4">
       <title>Discourse Relations and Conjoined <fixed-case>VP</fixed-case>s: Automated Sense Recognition</title>
       <author><first>Valentina</first><last>Pyatkin</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <pages>33–42</pages>
       <url hash="7f3904e3">E17-4004</url>
       <abstract>Sense classification of discourse relations is a sub-task of shallow discourse parsing. Discourse relations can occur both across sentences (<i>inter-sentential</i>) and within sentences (<i>intra-sentential</i>), and more than one discourse relation can hold between the same units. Using a newly available corpus of discourse-annotated intra-sentential conjoined verb phrases, we demonstrate a sequential classification pipeline for their multi-label sense classification. We assess the importance of each feature used in the classification, the feature scope, and what is lost in moving from gold standard manual parses to the output of an off-the-shelf parser.</abstract>
@@ -2942,7 +2942,7 @@
     <paper id="7">
       <title>Automatic Extraction of News Values from Headline Text</title>
       <author><first>Alicja</first><last>Piotrkowicz</last></author>
-      <author><first>Vania</first><last>Dimitrova</last></author>
+      <author id="vania-dimitrova"><first>Vania</first><last>Dimitrova</last></author>
       <author><first>Katja</first><last>Markert</last></author>
       <pages>64–74</pages>
       <url hash="b29fc8dd">E17-4007</url>
@@ -2989,7 +2989,7 @@
       <title>Evaluating the Reliability and Interaction of Recursively Used Feature Classes for Terminology Extraction</title>
       <author><first>Anna</first><last>Hätty</last></author>
       <author><first>Michael</first><last>Dorna</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>113–121</pages>
       <url hash="d198457e">E17-4012</url>
       <abstract>Feature design and selection is a crucial aspect when treating terminology extraction as a machine learning classification problem. We designed feature classes which characterize different properties of terms based on distributions, and propose a new feature class for components of term candidates. By using random forests, we infer optimal features which are later used to build decision tree classifiers. We evaluate our method using the ACL RD-TEC dataset. We demonstrate the importance of the novel feature class for downgrading termhood which exploits properties of term components. Furthermore, our classification suggests that the identification of reliable term candidates should be performed successively, rather than just once.</abstract>
@@ -3010,9 +3010,9 @@
     <paper id="1">
       <title><fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies</title>
       <author><first>Joakim</first><last>Nivre</last></author>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <author><first>Filip</first><last>Ginter</last></author>
-      <author><first>Francis</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last></author>
       <url hash="4dacb20e">E17-5001</url>
       <abstract>Universal Dependencies (UD) is a project that seeks to develop cross-linguistically consistent treebank annotation for many languages. This tutorial gives an introduction to the UD framework and resources, from basic design principles to annotation guidelines and existing treebanks. We also discuss tools for developing and exploiting UD treebanks and survey applications of UD in NLP and linguistics.</abstract>
       <bibkey>nivre-etal-2017-universal</bibkey>
@@ -3065,7 +3065,7 @@ The goal of this tutorial is to introduce the computational framework to broader
     </paper>
     <paper id="6">
       <title>Building Multimodal Simulations for Natural Language</title>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <author><first>Nikhil</first><last>Krishnaswamy</last></author>
       <url hash="13ce8c26">E17-5006</url>
       <abstract>In this tutorial, we introduce a computational framework and modeling language (VoxML) for composing multimodal simulations of natural language expressions within a 3D simulation environment (VoxSim). We demonstrate how to construct voxemes, which are visual object representations of linguistic entities. We also show how to compose events and actions over these objects, within a restricted domain of dynamics. This gives us the building blocks to simulate narratives of multiple events or participate in a multimodal dialogue with synthetic agents in the simulation environment. To our knowledge, this is the first time such material has been presented as a tutorial within the CL community.
diff --git a/data/xml/E83.xml b/data/xml/E83.xml
index d49e44fb0b..b580459990 100644
--- a/data/xml/E83.xml
+++ b/data/xml/E83.xml
@@ -34,14 +34,14 @@
     </paper>
     <paper id="4">
       <title>Structure of Sentence and Inferencing in Question Answering</title>
-      <author><first>Eva</first><last>Hajicova</last></author>
-      <author><first>Petr</first><last>Sgall</last></author>
+      <author id="eva-hajicova"><first>Eva</first><last>Hajicova</last></author>
+      <author id="petr-sgall"><first>Petr</first><last>Sgall</last></author>
       <url hash="2e6e9f6a">E83-1004</url>
       <bibkey>hajicova-sgall-1983-structure</bibkey>
     </paper>
     <paper id="5">
       <title>A Phonological Processor for <fixed-case>I</fixed-case>talian</title>
-      <author><first>Rodolfo</first><last>Delmonte</last></author>
+      <author id="rodolfo-delmonte"><first>Rodolfo</first><last>Delmonte</last></author>
       <url hash="5cacb841">E83-1005</url>
       <bibkey>delmonte-1983-phonological</bibkey>
     </paper>
@@ -61,10 +61,10 @@
     </paper>
     <paper id="8">
       <title>Knowledge Engineering Approach to Morphological Analysis</title>
-      <author><first>Harri</first><last>Jäppinen</last></author>
-      <author><first>Aarno</first><last>Lehtola</last></author>
-      <author><first>Esa</first><last>Nelimarkka</last></author>
-      <author><first>Matti</first><last>Ylilammi</last></author>
+      <author id="harri-jappinen"><first>Harri</first><last>Jäppinen</last></author>
+      <author id="aarno-lehtola"><first>Aarno</first><last>Lehtola</last></author>
+      <author id="esa-nelimarkka"><first>Esa</first><last>Nelimarkka</last></author>
+      <author id="matti-ylilammi"><first>Matti</first><last>Ylilammi</last></author>
       <url hash="eb94e766">E83-1008</url>
       <bibkey>jappinen-etal-1983-knowledge</bibkey>
     </paper>
@@ -77,7 +77,7 @@
     </paper>
     <paper id="10">
       <title>Extended Access to the Left Context in an <fixed-case>ATN</fixed-case> Parser</title>
-      <author><first>Irina</first><last>Prodanof</last></author>
+      <author id="irina-prodanof"><first>Irina</first><last>Prodanof</last></author>
       <author><first>Giacomo</first><last>Ferrari</last></author>
       <url hash="9c95cdcf">E83-1010</url>
       <bibkey>prodanof-ferrari-1983-extended</bibkey>
@@ -108,20 +108,20 @@
     </paper>
     <paper id="15">
       <title>The Generation of Term Definitions From an On-Line Terminological Thesaurus</title>
-      <author><first>John</first><last>McNaught</last></author>
+      <author id="john-mcnaught"><first>John</first><last>McNaught</last></author>
       <url hash="e2a10d89">E83-1015</url>
       <bibkey>mcnaught-1983-generation</bibkey>
     </paper>
     <paper id="16">
       <title>Relating Syntax and Semantics: The Syntactico-Semantic Lexicon of the System <fixed-case>VIE-LANG</fixed-case></title>
       <author><first>Ingeborg</first><last>Steinacker</last></author>
-      <author><first>Ernst</first><last>Buchberger</last></author>
+      <author id="ernst-buchberger"><first>Ernst</first><last>Buchberger</last></author>
       <url hash="a0bf570b">E83-1016</url>
       <bibkey>steinacker-buchberger-1983-relating</bibkey>
     </paper>
     <paper id="17">
       <title>An Island Parsing Interpreter for the Full Augmented Transition Network Formalism</title>
-      <author><first>John A.</first><last>Carroll</last></author>
+      <author id="john-a-carroll"><first>John A.</first><last>Carroll</last></author>
       <url hash="75575fe1">E83-1017</url>
       <bibkey>carroll-1983-island</bibkey>
     </paper>
@@ -142,15 +142,15 @@
     </paper>
     <paper id="20">
       <title>A Flexible Natural Language Parser Based on a Two-Level Representation of Syntax</title>
-      <author><first>Leonardo</first><last>Lesmo</last></author>
+      <author id="leonardo-lesmo"><first>Leonardo</first><last>Lesmo</last></author>
       <author><first>Pietro</first><last>Torasso</last></author>
       <url hash="c2b82474">E83-1020</url>
       <bibkey>lesmo-torasso-1983-flexible</bibkey>
     </paper>
     <paper id="21">
       <title>An Approach to Natural Language in the <fixed-case>SI-N</fixed-case>ets Paradigm</title>
-      <author><first>Amedeo</first><last>Cappelli</last></author>
-      <author><first>Lorenzo</first><last>Moretti</last></author>
+      <author id="amedeo-cappelli"><first>Amedeo</first><last>Cappelli</last></author>
+      <author id="lorenzo-moretti"><first>Lorenzo</first><last>Moretti</last></author>
       <url hash="4e6304e7">E83-1021</url>
       <bibkey>cappelli-moretti-1983-approach</bibkey>
     </paper>
@@ -175,7 +175,7 @@
     </paper>
     <paper id="25">
       <title>Rules for Pronominalization</title>
-      <author><first>Franz</first><last>Guenthner</last></author>
+      <author id="franz-guenthner"><first>Franz</first><last>Guenthner</last></author>
       <author><first>Hubert</first><last>Lehmann</last></author>
       <url hash="04f1a0a9">E83-1025</url>
       <bibkey>guenthner-lehmann-1983-rules</bibkey>
@@ -192,20 +192,20 @@
     </paper>
     <paper id="27">
       <title>Systemic Grammar in Computation: The <fixed-case>N</fixed-case>igel Case</title>
-      <author><first>Christian M.I.M.</first><last>Matthiessen</last></author>
+      <author id="christian-m-i-m-matthiessen"><first>Christian M.I.M.</first><last>Matthiessen</last></author>
       <url hash="715a068e">E83-1027</url>
       <bibkey>matthiessen-1983-systemic</bibkey>
     </paper>
     <paper id="28">
       <title>Inquiry Semantics: A Functional Semantics of Natural Language Grammar</title>
-      <author><first>William C.</first><last>Mann</last></author>
+      <author id="william-c-mann"><first>William C.</first><last>Mann</last></author>
       <url hash="f20d6936">E83-1028</url>
       <bibkey>mann-1983-inquiry</bibkey>
     </paper>
     <paper id="29">
       <title>Natural Language Input for Scene Generation</title>
-      <author><first>Giovanni</first><last>Adorni</last></author>
-      <author><first>Mauro</first><last>Di Manzo</last></author>
+      <author id="giovanni-adorni"><first>Giovanni</first><last>Adorni</last></author>
+      <author id="mauro-di-manzo"><first>Mauro</first><last>Di Manzo</last></author>
       <url hash="ea341571">E83-1029</url>
       <bibkey>adorni-di-manzo-1983-natural</bibkey>
     </paper>
@@ -218,7 +218,7 @@
     <paper id="31">
       <title>Case Role Filling as a Side Effect of Visual Search</title>
       <author><first>Heinz</first><last>Marburger</last></author>
-      <author><first>Wolfgang</first><last>Wahlster</last></author>
+      <author id="wolfgang-wahlster"><first>Wolfgang</first><last>Wahlster</last></author>
       <url hash="13a0f220">E83-1031</url>
       <bibkey>marburger-wahlster-1983-case</bibkey>
     </paper>
diff --git a/data/xml/E85.xml b/data/xml/E85.xml
index 73f16d12c4..d7d4abd412 100644
--- a/data/xml/E85.xml
+++ b/data/xml/E85.xml
@@ -40,8 +40,8 @@
     </paper>
     <paper id="5">
       <title>The Specification of Time Meaning for Machine Translation</title>
-      <author><first>Frank</first><last>van Eynde</last></author>
-      <author><first>Louis</first><last>des Tombe</last></author>
+      <author id="frank-van-eynde"><first>Frank</first><last>van Eynde</last></author>
+      <author id="louis-des-tombe"><first>Louis</first><last>des Tombe</last></author>
       <author><first>Fons</first><last>Maes</last></author>
       <url hash="b11e3ccb">E85-1005</url>
       <bibkey>van-eynde-etal-1985-specification</bibkey>
@@ -79,9 +79,9 @@
     </paper>
     <paper id="11">
       <title>Various Representations of Text Proposed for <fixed-case>E</fixed-case>urotra</title>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <author><first>Nelson</first><last>Verastegui</last></author>
-      <author><first>Daniel</first><last>Bachut</last></author>
+      <author id="daniel-bachut"><first>Daniel</first><last>Bachut</last></author>
       <url hash="aceccdfb">E85-1011</url>
       <bibkey>boitet-etal-1985-various</bibkey>
     </paper>
@@ -93,7 +93,7 @@
     </paper>
     <paper id="13">
       <title>Right Attachment and Preference Semantics .</title>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <url hash="16a2a750">E85-1013</url>
       <bibkey>wilks-1985-right</bibkey>
     </paper>
@@ -120,7 +120,7 @@
     </paper>
     <paper id="17">
       <title>On the Representation of Query Term Relations by Soft <fixed-case>B</fixed-case>oolean Operators</title>
-      <author><first>Gerard</first><last>Salton</last></author>
+      <author id="gerard-salton"><first>Gerard</first><last>Salton</last></author>
       <url hash="38c6a81b">E85-1017</url>
       <bibkey>salton-1985-representation</bibkey>
     </paper>
@@ -138,13 +138,13 @@
     </paper>
     <paper id="20">
       <title>Parsing Difficulties &amp; Phonological Processing in <fixed-case>I</fixed-case>talian</title>
-      <author><first>Rodolfo</first><last>Delmonte</last></author>
+      <author id="rodolfo-delmonte"><first>Rodolfo</first><last>Delmonte</last></author>
       <url hash="de469f7f">E85-1020</url>
       <bibkey>delmonte-1985-parsing</bibkey>
     </paper>
     <paper id="21">
       <title>Design and Implementation of a Lexical Data Base</title>
-      <author><first>Eric</first><last>Wehrli</last></author>
+      <author id="eric-wehrli"><first>Eric</first><last>Wehrli</last></author>
       <url hash="953e071a">E85-1021</url>
       <bibkey>wehrli-1985-design</bibkey>
     </paper>
@@ -157,7 +157,7 @@
     </paper>
     <paper id="23">
       <title>A Probabilistic Approach to Grammatical Analysis of Written <fixed-case>E</fixed-case>nglish by Computer.</title>
-      <author><first>Andrew David</first><last>Beale</last></author>
+      <author id="andrew-david-beale"><first>Andrew David</first><last>Beale</last></author>
       <url hash="b53b767d">E85-1023</url>
       <bibkey>beale-1985-probabilistic</bibkey>
     </paper>
@@ -170,9 +170,9 @@
     </paper>
     <paper id="25">
       <title>Towards a Dictionary Support Environment for Realtime Parsing</title>
-      <author><first>Hiyan</first><last>Alshawi</last></author>
-      <author><first>Bran</first><last>Boguraev</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="hiyan-alshawi"><first>Hiyan</first><last>Alshawi</last></author>
+      <author id="branimir-boguraev"><first>Bran</first><last>Boguraev</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <url hash="de90fc3d">E85-1025</url>
       <bibkey>alshawi-etal-1985-towards</bibkey>
     </paper>
@@ -184,14 +184,14 @@
     </paper>
     <paper id="27">
       <title>A Computational Theory of Prose Style for Natural Language Generation</title>
-      <author><first>David D.</first><last>McDonald</last></author>
-      <author><first>James D.</first><last>Pustejovsky</last></author>
+      <author id="david-d-mcdonald"><first>David D.</first><last>McDonald</last></author>
+      <author id="james-pustejovsky"><first>James D.</first><last>Pustejovsky</last></author>
       <url hash="a918e566">E85-1027</url>
       <bibkey>mcdonald-pustejovsky-1985-computational</bibkey>
     </paper>
     <paper id="28">
       <title>An <fixed-case>E</fixed-case>nglish Generator for a Case-Labelled Dependency Representation</title>
-      <author><first>John Irving</first><last>Tait</last></author>
+      <author id="john-tait"><first>John Irving</first><last>Tait</last></author>
       <url hash="e790bc73">E85-1028</url>
       <bibkey>tait-1985-english</bibkey>
     </paper>
@@ -221,8 +221,8 @@
     </paper>
     <paper id="32">
       <title>Non Standard Uses of If</title>
-      <author><first>D.S.</first><last>Bree</last></author>
-      <author><first>R.A.</first><last>Smit</last></author>
+      <author id="d-s-bree"><first>D.S.</first><last>Bree</last></author>
+      <author id="r-a-smit"><first>R.A.</first><last>Smit</last></author>
       <url hash="8bbbc82e">E85-1032</url>
       <bibkey>bree-smit-1985-non</bibkey>
     </paper>
@@ -258,7 +258,7 @@
     </paper>
     <paper id="37">
       <title>A Problem Solving Approach to Generating Text From Systemic Grammars</title>
-      <author><first>Terry</first><last>Patten</last></author>
+      <author id="terry-patten"><first>Terry</first><last>Patten</last></author>
       <url hash="6c06182e">E85-1037</url>
       <bibkey>patten-1985-problem</bibkey>
     </paper>
@@ -271,8 +271,8 @@
     </paper>
     <paper id="39">
       <title>Towards an Automatic Identification of Topic and Focus</title>
-      <author><first>Eva</first><last>Hajicova</last></author>
-      <author><first>Petr</first><last>Sgall</last></author>
+      <author id="eva-hajicova"><first>Eva</first><last>Hajicova</last></author>
+      <author id="petr-sgall"><first>Petr</first><last>Sgall</last></author>
       <url hash="9f37b2a6">E85-1039</url>
       <bibkey>hajicova-sgall-1985-towards</bibkey>
     </paper>
diff --git a/data/xml/E87.xml b/data/xml/E87.xml
index d4af99ca22..b49a881988 100644
--- a/data/xml/E87.xml
+++ b/data/xml/E87.xml
@@ -3,7 +3,7 @@
   <volume id="1" type="proceedings">
     <meta>
       <booktitle>Third Conference of the <fixed-case>E</fixed-case>uropean Chapter of the Association for Computational Linguistics</booktitle>
-      <editor><first>Bente</first><last>Maegaard</last></editor>
+      <editor id="bente-maegaard"><first>Bente</first><last>Maegaard</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Copenhagen, Denmark</address>
       <month>April</month>
@@ -28,10 +28,10 @@
     </paper>
     <paper id="3">
       <title>Formalisms for Morphographemic Description</title>
-      <author><first>Alan</first><last>Black</last></author>
-      <author><first>Graeme</first><last>Ritchie</last></author>
+      <author id="alan-w-black"><first>Alan</first><last>Black</last></author>
+      <author id="graeme-ritchie"><first>Graeme</first><last>Ritchie</last></author>
       <author><first>Steve</first><last>Pulman</last></author>
-      <author><first>Graham</first><last>Russell</last></author>
+      <author id="graham-russell"><first>Graham</first><last>Russell</last></author>
       <url hash="daa27693">E87-1003</url>
       <bibkey>black-etal-1987-formalisms</bibkey>
     </paper>
@@ -57,7 +57,7 @@
     </paper>
     <paper id="7">
       <title>How to Detect Grammatical Errors in a Text Without Parsing It</title>
-      <author><first>Eric Steven</first><last>Atwell</last></author>
+      <author id="eric-atwell"><first>Eric Steven</first><last>Atwell</last></author>
       <url hash="faa2d14f">E87-1007</url>
       <bibkey>atwell-1987-detect</bibkey>
     </paper>
@@ -78,16 +78,16 @@
     </paper>
     <paper id="10">
       <title>Pattern Recognition Applied to the Acquisition of a Grammatical Classification System From Unrestricted <fixed-case>E</fixed-case>nglish Text</title>
-      <author><first>Eric Steven</first><last>Atwell</last></author>
+      <author id="eric-atwell"><first>Eric Steven</first><last>Atwell</last></author>
       <author><first>Nicos Frixou</first><last>Drakos</last></author>
       <url hash="0122887b">E87-1010</url>
       <bibkey>atwell-drakos-1987-pattern</bibkey>
     </paper>
     <paper id="11">
       <title>A Multi-Purpose Interface to an On-line Dictionary</title>
-      <author><first>Branimir</first><last>Boguraev</last></author>
-      <author><first>David</first><last>Carter</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="branimir-boguraev"><first>Branimir</first><last>Boguraev</last></author>
+      <author id="david-carter"><first>David</first><last>Carter</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <url hash="6a03d1e7">E87-1011</url>
       <bibkey>boguraev-etal-1987-multi</bibkey>
     </paper>
@@ -128,38 +128,38 @@
     </paper>
     <paper id="17">
       <title>Dictionary Organization for Machine Translation: The Experience and Implications of the <fixed-case>UMIST</fixed-case> <fixed-case>J</fixed-case>apanese Project</title>
-      <author><first>Mary McGee</first><last>Wood</last></author>
+      <author id="mary-mcgee-wood"><first>Mary McGee</first><last>Wood</last></author>
       <author><first>Elaine</first><last>Pollard</last></author>
-      <author><first>Heather</first><last>Horsfall</last></author>
-      <author><first>Natsuko</first><last>Holden</last></author>
-      <author><first>Brian</first><last>Chandler</last></author>
-      <author><first>Jeremy</first><last>Carroll</last></author>
+      <author id="heather-horsfall"><first>Heather</first><last>Horsfall</last></author>
+      <author id="natsuko-holden"><first>Natsuko</first><last>Holden</last></author>
+      <author id="brian-j-chandler"><first>Brian</first><last>Chandler</last></author>
+      <author id="jeremy-j-carroll"><first>Jeremy</first><last>Carroll</last></author>
       <url hash="d9a65b02">E87-1017</url>
       <bibkey>wood-etal-1987-dictionary</bibkey>
     </paper>
     <paper id="18">
       <title>Machine Translation, Linguistics, and Interlingua</title>
-      <author><first>Petr</first><last>Sgall</last></author>
-      <author><first>Jarmila</first><last>Panevová</last></author>
+      <author id="petr-sgall"><first>Petr</first><last>Sgall</last></author>
+      <author id="jarmila-panevova"><first>Jarmila</first><last>Panevová</last></author>
       <url hash="c73adc80">E87-1018</url>
       <bibkey>sgall-panevova-1987-machine</bibkey>
     </paper>
     <paper id="19">
       <title>Fail-Soft (“Emergency”) Measures in a Production-Oriented <fixed-case>MT</fixed-case> System</title>
-      <author><first>Eva</first><last>Hajicova</last></author>
+      <author id="eva-hajicova"><first>Eva</first><last>Hajicova</last></author>
       <author><first>Zdenek</first><last>Kirschner</last></author>
       <url hash="3efc15fd">E87-1019</url>
       <bibkey>hajicova-kirschner-1987-fail</bibkey>
     </paper>
     <paper id="20">
       <title><fixed-case>REFTEX</fixed-case> - A Context-Based Translation Aid</title>
-      <author><first>Poul Soren</first><last>Kjaersgaard</last></author>
+      <author id="poul-soren-kjaersgaard"><first>Poul Soren</first><last>Kjaersgaard</last></author>
       <url hash="e85278f9">E87-1020</url>
       <bibkey>kjaersgaard-1987-reftex</bibkey>
     </paper>
     <paper id="21">
       <title><fixed-case>RUSLAN</fixed-case> - An <fixed-case>MT</fixed-case> System Between Closely Related Languages</title>
-      <author><first>Jan</first><last>Hajic</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajic</last></author>
       <url hash="a91d2afa">E87-1021</url>
       <bibkey>hajic-1987-ruslan</bibkey>
     </paper>
@@ -167,17 +167,17 @@
       <title>Subgrammars, Rule Classes and Control in the <fixed-case>R</fixed-case>osetta Translation System</title>
       <author><first>Lisette</first><last>Appelo</last></author>
       <author><first>Carel</first><last>Fellinger</last></author>
-      <author><first>Jan</first><last>Landsbergen</last></author>
+      <author id="jan-landsbergen"><first>Jan</first><last>Landsbergen</last></author>
       <url hash="9c616086">E87-1022</url>
       <bibkey>appelo-etal-1987-subgrammars</bibkey>
     </paper>
     <paper id="23">
       <title>A Model for Preference</title>
       <author><first>Dominique</first><last>Petitpierre</last></author>
-      <author><first>Steven</first><last>Krauwer</last></author>
-      <author><first>Louis</first><last>des Tombe</last></author>
+      <author id="steven-krauwer"><first>Steven</first><last>Krauwer</last></author>
+      <author id="louis-des-tombe"><first>Louis</first><last>des Tombe</last></author>
       <author><first>Doug</first><last>Arnold</last></author>
-      <author><first>Giovanni B.</first><last>Varile</last></author>
+      <author id="giovanni-battista-varile"><first>Giovanni B.</first><last>Varile</last></author>
       <url hash="8871eaee">E87-1023</url>
       <bibkey>petitpierre-etal-1987-model</bibkey>
     </paper>
@@ -201,7 +201,7 @@
     </paper>
     <paper id="27">
       <title>String-Tree Correspondence Grammar: A Declarative Grammar Formalism for Defining the Correspondence Between Strings of Terms and Tree Structures</title>
-      <author><first>Zaharin</first><last>Yusoff</last></author>
+      <author id="zaharin-yusoff"><first>Zaharin</first><last>Yusoff</last></author>
       <url hash="9bb72a5a">E87-1027</url>
       <bibkey>yusoff-1987-string</bibkey>
     </paper>
@@ -230,14 +230,14 @@
       <title>Planning for Problem Formulation in Advice-Giving Dialogue</title>
       <author><first>Paul</first><last>Decitre</last></author>
       <author><first>Thomas</first><last>Grossi</last></author>
-      <author><first>Cléo</first><last>Jullien</last></author>
+      <author id="cleo-jullien"><first>Cléo</first><last>Jullien</last></author>
       <author><first>Jean-Philippe</first><last>Solvay</last></author>
       <url hash="1b165de3">E87-1031</url>
       <bibkey>decitre-etal-1987-planning</bibkey>
     </paper>
     <paper id="32">
       <title>Modeling Extemporaneous Elaboration</title>
-      <author><first>Marie A.</first><last>Bienkowski</last></author>
+      <author id="marie-a-bienkowski"><first>Marie A.</first><last>Bienkowski</last></author>
       <url hash="17c7ccb9">E87-1032</url>
       <bibkey>bienkowski-1987-modeling</bibkey>
     </paper>
@@ -246,13 +246,13 @@
       <author><first>Massimo</first><last>Marino</last></author>
       <author><first>Antonella</first><last>Spiezio</last></author>
       <author><first>Giacomo</first><last>Ferrari</last></author>
-      <author><first>Irina</first><last>Prodanof</last></author>
+      <author id="irina-prodanof"><first>Irina</first><last>Prodanof</last></author>
       <url hash="517bd22f">E87-1033</url>
       <bibkey>marino-etal-1987-efficient</bibkey>
     </paper>
     <paper id="34">
       <title>Discontinuous Constituents in Trees, Rules, and Parsing</title>
-      <author><first>Harry</first><last>Bunt</last></author>
+      <author id="harry-bunt"><first>Harry</first><last>Bunt</last></author>
       <author><first>Jan</first><last>Thesingh</last></author>
       <author><first>Ko</first><last>van der Sloot</last></author>
       <url hash="a248090d">E87-1034</url>
@@ -260,7 +260,7 @@
     </paper>
     <paper id="35">
       <title>Deterministic Parsing and Unbounded Dependencies</title>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <url hash="1fa8ce10">E87-1035</url>
       <bibkey>briscoe-1987-deterministic</bibkey>
     </paper>
@@ -275,7 +275,7 @@
     </paper>
     <paper id="37">
       <title>A Comparison of Rule-Invocation Strategies in Context-Free Chart Parsing</title>
-      <author><first>Mats</first><last>Wiren</last></author>
+      <author id="mats-wiren"><first>Mats</first><last>Wiren</last></author>
       <url hash="02f304e2">E87-1037</url>
       <bibkey>wiren-1987-comparison</bibkey>
     </paper>
@@ -287,21 +287,21 @@
     </paper>
     <paper id="39">
       <title>Acquisition of Conceptual Data Models from Natural Language Descriptions</title>
-      <author><first>William J.</first><last>Black</last></author>
+      <author id="william-j-black"><first>William J.</first><last>Black</last></author>
       <url hash="be72dda5">E87-1039</url>
       <bibkey>black-1987-acquisition</bibkey>
     </paper>
     <paper id="40">
       <title>A Structured Representation of Word-Senses for Semantic Analysis.</title>
-      <author><first>Maria Teresa</first><last>Pazienza</last></author>
-      <author><first>Paola</first><last>Velardi</last></author>
+      <author id="maria-teresa-pazienza"><first>Maria Teresa</first><last>Pazienza</last></author>
+      <author id="paola-velardi"><first>Paola</first><last>Velardi</last></author>
       <url hash="37fc7c6e">E87-1040</url>
       <bibkey>pazienza-velardi-1987-structured</bibkey>
     </paper>
     <paper id="41">
       <title>Situations and Prepositional Phrases</title>
       <author><first>Erik</first><last>Colban</last></author>
-      <author><first>Jens Erik</first><last>Fenstad</last></author>
+      <author id="jens-erik-fenstad"><first>Jens Erik</first><last>Fenstad</last></author>
       <url hash="ee7c4d61">E87-1041</url>
       <bibkey>colban-fenstad-1987-situations</bibkey>
     </paper>
@@ -313,7 +313,7 @@
     </paper>
     <paper id="43">
       <title>Iteration, Habituality and Verb Form Semantics</title>
-      <author><first>Frank</first><last>van Eynde</last></author>
+      <author id="frank-van-eynde"><first>Frank</first><last>van Eynde</last></author>
       <url hash="57c95bbf">E87-1043</url>
       <bibkey>van-eynde-1987-iteration</bibkey>
     </paper>
diff --git a/data/xml/E89.xml b/data/xml/E89.xml
index 79f0ac95c0..a7d7c37e49 100644
--- a/data/xml/E89.xml
+++ b/data/xml/E89.xml
@@ -3,8 +3,8 @@
   <volume id="1" type="proceedings">
     <meta>
       <booktitle>Fourth Conference of the <fixed-case>E</fixed-case>uropean Chapter of the Association for Computational Linguistics</booktitle>
-      <editor><first>Harold</first><last>Somers</last></editor>
-      <editor><first>Mary</first><last>McGee Wood</last></editor>
+      <editor id="harold-somers"><first>Harold</first><last>Somers</last></editor>
+      <editor id="mary-mcgee-wood"><first>Mary</first><last>McGee Wood</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Manchester, England</address>
       <month>April</month>
@@ -17,7 +17,7 @@
     </frontmatter>
     <paper id="1">
       <title>Parsing Idioms in Lexicalized <fixed-case>TAG</fixed-case>s</title>
-      <author><first>Anne</first><last>Abeille</last></author>
+      <author id="anne-abeille"><first>Anne</first><last>Abeille</last></author>
       <author><first>Yves</first><last>Schabes</last></author>
       <url hash="3878bd88">E89-1001</url>
       <bibkey>abeille-schabes-1989-parsing</bibkey>
@@ -44,32 +44,32 @@
     </paper>
     <paper id="5">
       <title>A Metaplan Model for Problem-Solving Discourse</title>
-      <author><first>Lance A.</first><last>Ramshaw</last></author>
+      <author id="lance-ramshaw"><first>Lance A.</first><last>Ramshaw</last></author>
       <url hash="5779f527">E89-1005</url>
       <bibkey>ramshaw-1989-metaplan</bibkey>
     </paper>
     <paper id="6">
       <title>Tenses as Anaphora</title>
       <author><first>Kurt</first><last>Eberle</last></author>
-      <author><first>Walter</first><last>Kasper</last></author>
+      <author id="walter-kasper"><first>Walter</first><last>Kasper</last></author>
       <url hash="e53e5381">E89-1006</url>
       <bibkey>eberle-kasper-1989-tenses</bibkey>
     </paper>
     <paper id="7">
       <title>On the Generative Power of Two-Level Morphological Rules</title>
-      <author><first>Graeme</first><last>Ritchie</last></author>
+      <author id="graeme-ritchie"><first>Graeme</first><last>Ritchie</last></author>
       <url hash="3d65afc4">E89-1007</url>
       <bibkey>ritchie-1989-generative</bibkey>
     </paper>
     <paper id="8">
       <title>Paradigmatic Morphology</title>
-      <author><first>Jonathan</first><last>Calder</last></author>
+      <author id="jo-calder"><first>Jonathan</first><last>Calder</last></author>
       <url hash="3679017c">E89-1008</url>
       <bibkey>calder-1989-paradigmatic</bibkey>
     </paper>
     <paper id="9">
       <title>Inference in <fixed-case>DATR</fixed-case></title>
-      <author><first>Roger</first><last>Evans</last></author>
+      <author id="roger-evans"><first>Roger</first><last>Evans</last></author>
       <author><first>Gerald</first><last>Gazdar</last></author>
       <url hash="2b5436d2">E89-1009</url>
       <bibkey>evans-gazdar-1989-inference</bibkey>
@@ -78,33 +78,33 @@
       <title>Ambiguity Resolution in the <fixed-case>DMTRANS</fixed-case> <fixed-case>PLUS</fixed-case></title>
       <author><first>Hiroaki</first><last>Kitano</last></author>
       <author><first>Hideto</first><last>Tomabechi</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
       <url hash="b3e1cd32">E89-1010</url>
       <bibkey>kitano-etal-1989-ambiguity</bibkey>
     </paper>
     <paper id="11">
       <title>The Organization of the <fixed-case>R</fixed-case>osetta Grammars</title>
-      <author><first>Jan</first><last>Odijk</last></author>
+      <author id="jan-odijk"><first>Jan</first><last>Odijk</last></author>
       <url hash="3ee48e75">E89-1011</url>
       <bibkey>odijk-1989-organization</bibkey>
     </paper>
     <paper id="12">
       <title>Programming in Logic with Constraints for Natural Language Processing</title>
-      <author><first>Patrick</first><last>Saint-Dizier</last></author>
+      <author id="patrick-saint-dizier"><first>Patrick</first><last>Saint-Dizier</last></author>
       <url hash="14384bec">E89-1012</url>
       <bibkey>saint-dizier-1989-programming</bibkey>
     </paper>
     <paper id="13">
       <title><fixed-case>JPSG</fixed-case> Parser on Constraint Logic Programming</title>
       <author><first>Hirosi</first><last>Tuda</last></author>
-      <author><first>Koiti</first><last>Hasida</last></author>
+      <author id="koiti-hasida"><first>Koiti</first><last>Hasida</last></author>
       <author><first>Hidetosi</first><last>Sirai</last></author>
       <url hash="d2a8ddda">E89-1013</url>
       <bibkey>tuda-etal-1989-jpsg</bibkey>
     </paper>
     <paper id="14">
       <title>A logical treatment of semi-free word order and bounded discontinuous constituency</title>
-      <author><first>Mike</first><last>Reape</last></author>
+      <author id="mike-reape"><first>Mike</first><last>Reape</last></author>
       <url hash="c534912a">E89-1014</url>
       <bibkey>reape-1989-logical</bibkey>
     </paper>
@@ -116,7 +116,7 @@
     </paper>
     <paper id="16">
       <title>User studies and the design of Natural Language Systems</title>
-      <author><first>Steve</first><last>Whittaker</last></author>
+      <author id="steve-whittaker"><first>Steve</first><last>Whittaker</last></author>
       <author><first>Phil</first><last>Stenton</last></author>
       <url hash="042532a3">E89-1016</url>
       <bibkey>whittaker-stenton-1989-user</bibkey>
@@ -138,19 +138,19 @@
     </paper>
     <paper id="19">
       <title>Lexical Acquisition in the Core Language Engine</title>
-      <author><first>David M.</first><last>Carter</last></author>
+      <author id="david-carter"><first>David M.</first><last>Carter</last></author>
       <url hash="b006d428">E89-1019</url>
       <bibkey>carter-1989-lexical</bibkey>
     </paper>
     <paper id="20">
       <title>It Would Be Much Easier If <fixed-case>WENT</fixed-case> Were <fixed-case>GOED</fixed-case></title>
-      <author><first>Dan</first><last>Tufis</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufis</last></author>
       <url hash="854c9bea">E89-1020</url>
       <bibkey>tufis-1989-much</bibkey>
     </paper>
     <paper id="21">
       <title>Plan Revision in Person-Machine Dialogue</title>
-      <author><first>Cleo</first><last>Jullien</last></author>
+      <author id="cleo-jullien"><first>Cleo</first><last>Jullien</last></author>
       <author><first>Jean-Charles</first><last>Marty</last></author>
       <url hash="718ad410">E89-1021</url>
       <bibkey>jullien-marty-1989-plan</bibkey>
@@ -158,7 +158,7 @@
     <paper id="22">
       <title>Remarks on Plural Anaphora</title>
       <author><first>Carola</first><last>Eschenbach</last></author>
-      <author><first>Christopher</first><last>Habel</last></author>
+      <author id="christopher-habel"><first>Christopher</first><last>Habel</last></author>
       <author><first>Michael</first><last>Herweg</last></author>
       <author><first>Klaus</first><last>Rehkamper</last></author>
       <url hash="d539a67a">E89-1022</url>
@@ -166,16 +166,16 @@
     </paper>
     <paper id="23">
       <title>Enhancing Explanation Coherence With Rhetorical Strategies</title>
-      <author><first>Mark T.</first><last>Maybury</last></author>
+      <author id="mark-t-maybury"><first>Mark T.</first><last>Maybury</last></author>
       <url hash="d449ee72">E89-1023</url>
       <bibkey>maybury-1989-enhancing</bibkey>
     </paper>
     <paper id="24">
       <title>Expressing generalizations in unification-based grammar formalisms</title>
       <author><first>Marc</first><last>Moens</last></author>
-      <author><first>Jo</first><last>Calder</last></author>
+      <author id="jo-calder"><first>Jo</first><last>Calder</last></author>
       <author><first>Ewan</first><last>Klein</last></author>
-      <author><first>Mike</first><last>Reape</last></author>
+      <author id="mike-reape"><first>Mike</first><last>Reape</last></author>
       <author><first>Henk</first><last>Zeevat</last></author>
       <url hash="0789505d">E89-1024</url>
       <bibkey>moens-etal-1989-expressing</bibkey>
@@ -219,27 +219,27 @@
     </paper>
     <paper id="31">
       <title>Subject Erasing and Pronominalization in <fixed-case>I</fixed-case>talian Text Generation</title>
-      <author><first>Fiammetta</first><last>Namer</last></author>
+      <author id="fiammetta-namer"><first>Fiammetta</first><last>Namer</last></author>
       <url hash="106ffaa4">E89-1031</url>
       <bibkey>namer-1989-subject</bibkey>
     </paper>
     <paper id="32">
       <title>An Algorithm for Generation in Unification Categorial Grammar</title>
-      <author><first>Jonathan</first><last>Calder</last></author>
-      <author><first>Mike</first><last>Reape</last></author>
+      <author id="jo-calder"><first>Jonathan</first><last>Calder</last></author>
+      <author id="mike-reape"><first>Mike</first><last>Reape</last></author>
       <author><first>Henk</first><last>Zeevat</last></author>
       <url hash="46f04e11">E89-1032</url>
       <bibkey>calder-etal-1989-algorithm</bibkey>
     </paper>
     <paper id="33">
       <title>Interactive Incremental Chart Parsing</title>
-      <author><first>Mats</first><last>Wiren</last></author>
+      <author id="mats-wiren"><first>Mats</first><last>Wiren</last></author>
       <url hash="f871e8c4">E89-1033</url>
       <bibkey>wiren-1989-interactive</bibkey>
     </paper>
     <paper id="34">
       <title><fixed-case>F</fixed-case>rench Order Without Order</title>
-      <author><first>Gabriel G.</first><last>Bes</last></author>
+      <author id="gabriel-g-bes"><first>Gabriel G.</first><last>Bes</last></author>
       <author><first>Claire</first><last>Gardent</last></author>
       <url hash="d5053bce">E89-1034</url>
       <bibkey>bes-gardent-1989-french</bibkey>
@@ -248,63 +248,63 @@
       <title>The Syntactic Regularity of <fixed-case>E</fixed-case>nglish Noun Phrases</title>
       <author><first>Lita</first><last>Taylor</last></author>
       <author><first>Claire</first><last>Grover</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <url hash="63624cd7">E89-1035</url>
       <bibkey>taylor-etal-1989-syntactic</bibkey>
     </paper>
     <paper id="36">
       <title>A Descriptive Framework for Translating Speaker’s Meaning</title>
-      <author><first>Masako</first><last>Kume</last></author>
+      <author id="masako-kume"><first>Masako</first><last>Kume</last></author>
       <author><first>Gayle K.</first><last>Sato</last></author>
-      <author><first>Kei</first><last>Yoshimoto</last></author>
+      <author id="kei-yoshimoto"><first>Kei</first><last>Yoshimoto</last></author>
       <url hash="2ac6e55f">E89-1036</url>
       <bibkey>kume-etal-1989-descriptive</bibkey>
     </paper>
     <paper id="37">
       <title>Translation by Structural Correspondences</title>
-      <author><first>Ronald M.</first><last>Kaplan</last></author>
+      <author id="ronald-m-kaplan"><first>Ronald M.</first><last>Kaplan</last></author>
       <author><first>Klaus</first><last>Netter</last></author>
-      <author><first>Jurgen</first><last>Wedekind</last></author>
+      <author id="jurgen-wedekind"><first>Jurgen</first><last>Wedekind</last></author>
       <author><first>Annie</first><last>Zaenen</last></author>
       <url hash="1205f296">E89-1037</url>
       <bibkey>kaplan-etal-1989-translation</bibkey>
     </paper>
     <paper id="38">
       <title>A New View on the Process of Translation</title>
-      <author><first>John A.</first><last>Bateman</last></author>
-      <author><first>Robert T.</first><last>Kasper</last></author>
+      <author id="john-bateman"><first>John A.</first><last>Bateman</last></author>
+      <author id="robert-t-kasper"><first>Robert T.</first><last>Kasper</last></author>
       <author><first>Jorg F. L.</first><last>Schutz</last></author>
-      <author><first>Erich H.</first><last>Steiner</last></author>
+      <author id="erich-h-steiner"><first>Erich H.</first><last>Steiner</last></author>
       <url hash="9652d8f6">E89-1038</url>
       <bibkey>bateman-etal-1989-new</bibkey>
     </paper>
     <paper id="39">
       <title>Empirical Studies of Discourse Representations for Natural Language Interfaces</title>
       <author><first>Nils</first><last>Dählback</last></author>
-      <author><first>Arne</first><last>Jonsson</last></author>
+      <author id="arne-jonsson"><first>Arne</first><last>Jonsson</last></author>
       <url hash="ac776240">E89-1039</url>
       <bibkey>dahlback-jonsson-1989-empirical</bibkey>
     </paper>
     <paper id="40">
       <title>An Approach to Sentence-Level Anaphora in Machine Translation</title>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <author><first>Joke</first><last>Dorrepaal</last></author>
       <author><first>Doug</first><last>Arnold</last></author>
-      <author><first>Steven</first><last>Krauwer</last></author>
+      <author id="steven-krauwer"><first>Steven</first><last>Krauwer</last></author>
       <author><first>Louisa</first><last>Sadler</last></author>
-      <author><first>Louis</first><last>des Tombe</last></author>
+      <author id="louis-des-tombe"><first>Louis</first><last>des Tombe</last></author>
       <url hash="30646912">E89-1040</url>
       <bibkey>van-noord-etal-1989-approach</bibkey>
     </paper>
     <paper id="41">
       <title>Situation Semantics and Machine Translation</title>
-      <author><first>C.J.</first><last>Rupp</last></author>
+      <author id="c-j-rupp"><first>C.J.</first><last>Rupp</last></author>
       <url hash="085bcbe9">E89-1041</url>
       <bibkey>rupp-1989-situation</bibkey>
     </paper>
     <paper id="42">
       <title>On Formalisms and Analysis, Generation and Synthesis in Machine Translation</title>
-      <author><first>Zaharin</first><last>Yusoff</last></author>
+      <author id="zaharin-yusoff"><first>Zaharin</first><last>Yusoff</last></author>
       <url hash="9fbd9a2e">E89-1042</url>
       <bibkey>yusoff-1989-formalisms</bibkey>
     </paper>
diff --git a/data/xml/E91.xml b/data/xml/E91.xml
index 3c0e7d020c..9b6e58a727 100644
--- a/data/xml/E91.xml
+++ b/data/xml/E91.xml
@@ -29,8 +29,8 @@
     </paper>
     <paper id="3">
       <title>Designing Illustrated Texts: How Language Production Is Influenced by Graphics Generation</title>
-      <author><first>Wolfgang</first><last>Wahlster</last></author>
-      <author><first>Elisabeth</first><last>Andre</last></author>
+      <author id="wolfgang-wahlster"><first>Wolfgang</first><last>Wahlster</last></author>
+      <author id="elisabeth-andre"><first>Elisabeth</first><last>Andre</last></author>
       <author><first>Winfried</first><last>Graf</last></author>
       <author><first>Thomas</first><last>Rist</last></author>
       <url hash="573eae87">E91-1003</url>
@@ -39,21 +39,21 @@
     <paper id="4">
       <title><fixed-case>P</fixed-case>earl: A Probabilistic Chart Parser</title>
       <author><first>David M.</first><last>Magerrnan</last></author>
-      <author><first>Mitchell P.</first><last>Marcus</last></author>
+      <author id="mitch-marcus"><first>Mitchell P.</first><last>Marcus</last></author>
       <url hash="f901d679">E91-1004</url>
       <bibkey>magerrnan-marcus-1991-pearl</bibkey>
     </paper>
     <paper id="5">
       <title>Long-Distance Scrambling and <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars</title>
       <author><first>Tilman</first><last>Becker</last></author>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <url hash="14bbe661">E91-1005</url>
       <bibkey>becker-etal-1991-long</bibkey>
     </paper>
     <paper id="6">
       <title>Bidirectional Parsing of <fixed-case>L</fixed-case>exicalized <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars</title>
-      <author><first>Alberto</first><last>Lavelli</last></author>
+      <author id="alberto-lavelli"><first>Alberto</first><last>Lavelli</last></author>
       <author><first>Giorgio</first><last>Satta</last></author>
       <url hash="84cba3cf">E91-1006</url>
       <bibkey>lavelli-satta-1991-bidirectional</bibkey>
@@ -66,7 +66,7 @@
     </paper>
     <paper id="8">
       <title>Indexing and a Referential Dependencies Within Binding Theory: Computational Framework</title>
-      <author><first>Fabio</first><last>Pianesi</last></author>
+      <author id="fabio-pianesi"><first>Fabio</first><last>Pianesi</last></author>
       <url hash="a834ece7">E91-1008</url>
       <bibkey>pianesi-1991-indexing</bibkey>
     </paper>
@@ -84,7 +84,7 @@
     </paper>
     <paper id="11">
       <title>Processing Language with Logical Types and Active Constraints</title>
-      <author><first>Patrick</first><last>Saint-Dizier</last></author>
+      <author id="patrick-saint-dizier"><first>Patrick</first><last>Saint-Dizier</last></author>
       <url hash="f8ebbe8f">E91-1011</url>
       <bibkey>saint-dizier-1991-processing</bibkey>
     </paper>
@@ -102,7 +102,7 @@
     </paper>
     <paper id="14">
       <title>What Sort of Trees Do We Speak? A Computational Model of the Syntax-Prosody Interface in <fixed-case>T</fixed-case>okyo <fixed-case>J</fixed-case>apanese</title>
-      <author><first>Pete</first><last>Whitelock</last></author>
+      <author id="pete-whitelock"><first>Pete</first><last>Whitelock</last></author>
       <url hash="351287d5">E91-1014</url>
       <bibkey>whitelock-1991-sort</bibkey>
     </paper>
@@ -121,14 +121,14 @@
     </paper>
     <paper id="17">
       <title>A Unified Management and Processing of Word-Forms, Idioms and Analytical Compounds</title>
-      <author><first>Dan</first><last>Tufis</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufis</last></author>
       <author><first>Octav</first><last>Popescu</last></author>
       <url hash="d3994261">E91-1017</url>
       <bibkey>tufis-popescu-1991-unified</bibkey>
     </paper>
     <paper id="18">
       <title>Analysis of Unknown Words through Morphological Decomposition</title>
-      <author><first>Alan W.</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W.</first><last>Black</last></author>
       <author><first>Joke</first><last>van de Plassche</last></author>
       <author><first>Briony</first><last>Williams</last></author>
       <url hash="1b9bf1a3">E91-1018</url>
@@ -175,7 +175,7 @@
     </paper>
     <paper id="25">
       <title>Parsing without lexicon: the <fixed-case>M</fixed-case>or<fixed-case>P</fixed-case> system</title>
-      <author><first>Gunnel</first><last>Kallgren</last></author>
+      <author id="gunnel-kallgren"><first>Gunnel</first><last>Kallgren</last></author>
       <url hash="270d066f">E91-1025</url>
       <bibkey>kallgren-1991-parsing</bibkey>
     </paper>
@@ -196,7 +196,7 @@
     <paper id="28">
       <title>Generating Referring Expressions Involving Relations</title>
       <author><first>Robert</first><last>Dale</last></author>
-      <author><first>Nicholas</first><last>Haddock</last></author>
+      <author id="nicholas-j-haddock"><first>Nicholas</first><last>Haddock</last></author>
       <url hash="5cf89cb9">E91-1028</url>
       <bibkey>dale-haddock-1991-generating</bibkey>
     </paper>
@@ -209,7 +209,7 @@
     <paper id="30">
       <title>The Formal and Processing Models of <fixed-case>CLG</fixed-case></title>
       <author><first>Luis</first><last>Damas</last></author>
-      <author><first>Giovanni B.</first><last>Varile</last></author>
+      <author id="giovanni-battista-varile"><first>Giovanni B.</first><last>Varile</last></author>
       <url hash="9dc93cfc">E91-1030</url>
       <bibkey>damas-varile-1991-formal</bibkey>
     </paper>
@@ -221,7 +221,7 @@
     </paper>
     <paper id="32">
       <title>Multiple Interpreters in a Principle-Based Model of Sentence Processing</title>
-      <author><first>Matthew W.</first><last>Crocker</last></author>
+      <author id="matthew-crocker"><first>Matthew W.</first><last>Crocker</last></author>
       <url hash="181d71b1">E91-1032</url>
       <bibkey>crocker-1991-multiple</bibkey>
     </paper>
@@ -248,7 +248,7 @@
     </paper>
     <paper id="36">
       <title>Classical Logics for Attribute-Value Languages</title>
-      <author><first>Jurgen</first><last>Wedekind</last></author>
+      <author id="jurgen-wedekind"><first>Jurgen</first><last>Wedekind</last></author>
       <url hash="f60261fb">E91-1036</url>
       <bibkey>wedekind-1991-classical</bibkey>
     </paper>
@@ -272,14 +272,14 @@
     </paper>
     <paper id="40">
       <title>An Assessment of Semantic Information Automatically Extracted From Machine Readable Dictionaries</title>
-      <author><first>Jean</first><last>Veronis</last></author>
-      <author><first>Nancy</first><last>Ide</last></author>
+      <author id="jean-veronis"><first>Jean</first><last>Veronis</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
       <url hash="fb6017bd">E91-1040</url>
       <bibkey>veronis-ide-1991-assessment</bibkey>
     </paper>
     <paper id="41">
       <title>A Dialogue Manager Using Initiative-Response Units and Distributed Control</title>
-      <author><first>Arne</first><last>Jonsson</last></author>
+      <author id="arne-jonsson"><first>Arne</first><last>Jonsson</last></author>
       <url hash="097a4ad1">E91-1041</url>
       <bibkey>jonsson-1991-dialogue</bibkey>
     </paper>
@@ -292,7 +292,7 @@
     </paper>
     <paper id="43">
       <title>A Bidirectional Model for Natural Language Processing</title>
-      <author><first>Gunter</first><last>Neumann</last></author>
+      <author id="gunter-neumann"><first>Gunter</first><last>Neumann</last></author>
       <url hash="1df68a72">E91-1043</url>
       <bibkey>neumann-1991-bidirectional</bibkey>
     </paper>
@@ -304,7 +304,7 @@
     </paper>
     <paper id="45">
       <title>Helpful Answers to Modal and Hypothetical Questions</title>
-      <author><first>Anne</first><last>De Roeck</last></author>
+      <author id="anne-de-roeck"><first>Anne</first><last>De Roeck</last></author>
       <author><first>Richard</first><last>Ball</last></author>
       <author><first>Keith</first><last>Brown</last></author>
       <author><first>Chris</first><last>Fox</last></author>
@@ -323,18 +323,18 @@
     </paper>
     <paper id="47">
       <title>Limits of a Sentence Based Procedural Approach for Aspect Choice in <fixed-case>G</fixed-case>erman-<fixed-case>R</fixed-case>ussian <fixed-case>MT</fixed-case></title>
-      <author><first>Bianka</first><last>Buschbeck</last></author>
-      <author><first>Renate</first><last>Henschel</last></author>
-      <author><first>Iris</first><last>Hoser</last></author>
-      <author><first>Gerda</first><last>Klimonow</last></author>
-      <author><first>Andreas</first><last>Kustner</last></author>
-      <author><first>Ingrid</first><last>Starke</last></author>
+      <author id="bianka-buschbeck"><first>Bianka</first><last>Buschbeck</last></author>
+      <author id="renate-henschel"><first>Renate</first><last>Henschel</last></author>
+      <author id="iris-hoser"><first>Iris</first><last>Hoser</last></author>
+      <author id="gerda-klimonow"><first>Gerda</first><last>Klimonow</last></author>
+      <author id="andreas-kustner"><first>Andreas</first><last>Kustner</last></author>
+      <author id="ingrid-starke"><first>Ingrid</first><last>Starke</last></author>
       <url hash="4ead2ec1">E91-1047</url>
       <bibkey>buschbeck-etal-1991-limits</bibkey>
     </paper>
     <paper id="48">
       <title>Lexical Transfer based on bilingual signs: Towards interaction during transfer</title>
-      <author><first>Jun-ich</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun-ich</first><last>Tsujii</last></author>
       <author><first>Kimikazu</first><last>Fujita</last></author>
       <url hash="0fe68917">E91-1048</url>
       <bibkey>tsujii-fujita-1991-lexical</bibkey>
@@ -349,17 +349,17 @@
     </paper>
     <paper id="50">
       <title>A Language for the Statement of Binary Relations over Feature Structures</title>
-      <author><first>Graham</first><last>Russell</last></author>
+      <author id="graham-russell"><first>Graham</first><last>Russell</last></author>
       <author><first>Afzal</first><last>Ballim</last></author>
-      <author><first>Dominique</first><last>Estival</last></author>
-      <author><first>Susan</first><last>Warwick-Armstrong</last></author>
+      <author id="dominique-estival"><first>Dominique</first><last>Estival</last></author>
+      <author id="susan-armstrong"><first>Susan</first><last>Warwick-Armstrong</last></author>
       <url hash="0c2cb75e">E91-1050</url>
       <bibkey>russell-etal-1991-language</bibkey>
     </paper>
     <paper id="51">
       <title>Structural Non-Correspondence in Translation</title>
       <author><first>Louisa</first><last>Sadler</last></author>
-      <author><first>Henry S.</first><last>Thompson</last></author>
+      <author id="henry-s-thompson"><first>Henry S.</first><last>Thompson</last></author>
       <url hash="747fe2f5">E91-1051</url>
       <bibkey>sadler-thompson-1991-structural</bibkey>
     </paper>
diff --git a/data/xml/E93.xml b/data/xml/E93.xml
index 2406cc2f40..412779744d 100644
--- a/data/xml/E93.xml
+++ b/data/xml/E93.xml
@@ -3,9 +3,9 @@
   <volume id="1" type="proceedings">
     <meta>
       <booktitle>Sixth Conference of the <fixed-case>E</fixed-case>uropean Chapter of the Association for Computational Linguistics</booktitle>
-      <editor><first>Steven</first><last>Krauwer</last></editor>
-      <editor><first>Michael</first><last>Moortgat</last></editor>
-      <editor><first>Louis</first><last>des Tombe</last></editor>
+      <editor id="steven-krauwer"><first>Steven</first><last>Krauwer</last></editor>
+      <editor id="michael-moortgat"><first>Michael</first><last>Moortgat</last></editor>
+      <editor id="louis-des-tombe"><first>Louis</first><last>des Tombe</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Utrecht, The Netherlands</address>
       <month>April</month>
@@ -33,8 +33,8 @@
       <title>Experiments in Reusability of Grammatical Resources</title>
       <author><first>Doug</first><last>Arnold</last></author>
       <author><first>Toni</first><last>Badia</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
-      <author><first>Stella</first><last>Markantonatou</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
+      <author id="stella-markantonatou"><first>Stella</first><last>Markantonatou</last></author>
       <author><first>Stefan</first><last>Momma</last></author>
       <author><first>Louisa</first><last>Sadler</last></author>
       <author><first>Paul</first><last>Schmidt</last></author>
@@ -64,8 +64,8 @@
     </paper>
     <paper id="7">
       <title>Data-Oriented Methods for Grapheme-to-Phoneme Conversion</title>
-      <author><first>Antal</first><last>van den Bosch</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <url hash="0d6715a6">E93-1007</url>
       <bibkey>van-den-bosch-daelemans-1993-data</bibkey>
     </paper>
@@ -86,7 +86,7 @@
     <paper id="10">
       <title>Head-driven Parsing for Lexicalist Grammars: Experimental Results</title>
       <author><first>Gosse</first><last>Bouma</last></author>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <url hash="2829a3eb">E93-1010</url>
       <bibkey>bouma-van-noord-1993-head</bibkey>
     </paper>
@@ -98,7 +98,7 @@
     </paper>
     <paper id="12">
       <title>Morphonology in the Lexicon</title>
-      <author><first>Lynne J.</first><last>Cahill</last></author>
+      <author id="lynne-cahill"><first>Lynne J.</first><last>Cahill</last></author>
       <url hash="5e618f2c">E93-1012</url>
       <bibkey>cahill-1993-morphonology</bibkey>
     </paper>
@@ -142,28 +142,28 @@
     </paper>
     <paper id="19">
       <title>Rule-based Acquisition and Maintenance of Lexical and Semantic Knowledge</title>
-      <author><first>Donna M.</first><last>Gates</last></author>
+      <author id="donna-gates"><first>Donna M.</first><last>Gates</last></author>
       <author><first>Peter</first><last>Shell</last></author>
       <url hash="9fcf0130">E93-1019</url>
       <bibkey>gates-shell-1993-rule</bibkey>
     </paper>
     <paper id="20">
       <title>A Computational Treatment of Sentence-Final ‘then’</title>
-      <author><first>Sheila</first><last>Glasbey</last></author>
+      <author id="sheila-r-glasbey"><first>Sheila</first><last>Glasbey</last></author>
       <url hash="06d257c2">E93-1020</url>
       <bibkey>glasbey-1993-computational</bibkey>
     </paper>
     <paper id="21">
       <title>Towards a proper treatment of coercion phenomena</title>
-      <author><first>Daniele</first><last>Godard</last></author>
+      <author id="daniele-godard"><first>Daniele</first><last>Godard</last></author>
       <author><first>Jacques</first><last>Jayez</last></author>
       <url hash="1b50b94e">E93-1021</url>
       <bibkey>godard-jayez-1993-towards</bibkey>
     </paper>
     <paper id="22">
       <title>Identifying Topic and Focus by an Automatic Procedure</title>
-      <author><first>Eva</first><last>Hajicova</last></author>
-      <author><first>Petr</first><last>Sgall</last></author>
+      <author id="eva-hajicova"><first>Eva</first><last>Hajicova</last></author>
+      <author id="petr-sgall"><first>Petr</first><last>Sgall</last></author>
       <author><first>Hana</first><last>Skonmalovla</last></author>
       <url hash="b4762bef">E93-1022</url>
       <bibkey>hajicova-etal-1993-identifying</bibkey>
@@ -176,13 +176,13 @@
     </paper>
     <paper id="24">
       <title>Restriction and Correspondence-based Translation</title>
-      <author><first>Ronald M.</first><last>Kaplan</last></author>
+      <author id="ronald-m-kaplan"><first>Ronald M.</first><last>Kaplan</last></author>
       <url hash="c9bc90f9">E93-1024</url>
       <bibkey>kaplan-1993-restriction</bibkey>
     </paper>
     <paper id="25">
       <title>A Discourse Copying Algorithm for Ellipsis and Anaphora Resolution</title>
-      <author><first>Andrew</first><last>Kehler</last></author>
+      <author id="andrew-kehler"><first>Andrew</first><last>Kehler</last></author>
       <url hash="dd5debbb">E93-1025</url>
       <bibkey>kehler-1993-discourse</bibkey>
     </paper>
@@ -195,7 +195,7 @@
     <paper id="27">
       <title>Linguistic Knowledge Acquisition from Parsing Failures</title>
       <author><first>Masaki</first><last>Kiyono</last></author>
-      <author><first>Jun-ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun-ichi</first><last>Tsujii</last></author>
       <url hash="14e75653">E93-1027</url>
       <bibkey>kiyono-tsujii-1993-linguistic</bibkey>
     </paper>
@@ -214,14 +214,14 @@
     <paper id="30">
       <title>A Semantics and Pragmatics for the Pluperfect</title>
       <author><first>Alex</first><last>Lascarides</last></author>
-      <author><first>Nicholas</first><last>Asher</last></author>
+      <author id="nicholas-asher"><first>Nicholas</first><last>Asher</last></author>
       <url hash="ad383a01">E93-1030</url>
       <bibkey>lascarides-asher-1993-semantics</bibkey>
     </paper>
     <paper id="31">
       <title>Temporal Connectives in a Discourse Context</title>
       <author><first>Alex</first><last>Lascarides</last></author>
-      <author><first>Jon</first><last>Oberlander</last></author>
+      <author id="jon-oberlander"><first>Jon</first><last>Oberlander</last></author>
       <url hash="44bb4869">E93-1031</url>
       <bibkey>lascarides-oberlander-1993-temporal</bibkey>
     </paper>
@@ -233,7 +233,7 @@
     </paper>
     <paper id="33">
       <title>Abductive Explanation of Dialogue Misunderstandings</title>
-      <author><first>Susan</first><last>McRoy</last></author>
+      <author id="susan-w-mcroy"><first>Susan</first><last>McRoy</last></author>
       <author><first>Graeme</first><last>Hirst</last></author>
       <url hash="09ccca16">E93-1033</url>
       <bibkey>mcroy-hirst-1993-abductive</bibkey>
@@ -274,7 +274,7 @@
     <paper id="39">
       <title>Generating Contextually Appropriate Intonation</title>
       <author><first>Scott</first><last>Prevost</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <url hash="5b661907">E93-1039</url>
       <bibkey>prevost-steedman-1993-generating</bibkey>
     </paper>
@@ -300,7 +300,7 @@
     </paper>
     <paper id="43">
       <title>Coping With Derivation in a Morphological Component</title>
-      <author><first>Harald</first><last>Trost</last></author>
+      <author id="harald-trost"><first>Harald</first><last>Trost</last></author>
       <url hash="d4747b0c">E93-1043</url>
       <bibkey>trost-1993-coping</bibkey>
     </paper>
@@ -312,7 +312,7 @@
     </paper>
     <paper id="45">
       <title>The Use of Shared Forests in <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammar Parsing</title>
-      <author><first>K.</first><last>Vijay-Shanker</last></author>
+      <author id="k-vijay-shanker"><first>K.</first><last>Vijay-Shanker</last></author>
       <url hash="d78784f9">E93-1045</url>
       <bibkey>vijay-shanker-1993-use</bibkey>
     </paper>
@@ -325,14 +325,14 @@
     </paper>
     <paper id="47">
       <title>Type-Driven Semantic Interpretation of f-Structures</title>
-      <author><first>Jurgen</first><last>Wedekind</last></author>
-      <author><first>Ronald M.</first><last>Kaplan</last></author>
+      <author id="jurgen-wedekind"><first>Jurgen</first><last>Wedekind</last></author>
+      <author id="ronald-m-kaplan"><first>Ronald M.</first><last>Kaplan</last></author>
       <url hash="be7a478f">E93-1047</url>
       <bibkey>wedekind-kaplan-1993-type</bibkey>
     </paper>
     <paper id="48">
       <title>Delimitedness and Trajectory-of-Motion Events</title>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <url hash="4b088d70">E93-1048</url>
       <bibkey>white-1993-delimitedness</bibkey>
     </paper>
@@ -349,7 +349,7 @@
     </paper>
     <paper id="51">
       <title>Lexical Disambiguation Using Constraint Handling In <fixed-case>P</fixed-case>rolog (<fixed-case>CHIP</fixed-case>)</title>
-      <author><first>George C.</first><last>Demetriou</last></author>
+      <author id="george-demetriou"><first>George C.</first><last>Demetriou</last></author>
       <url hash="acccdf1e">E93-1051</url>
       <bibkey>demetriou-1993-lexical</bibkey>
     </paper>
@@ -401,8 +401,8 @@
     <paper id="58">
       <title>Undestanding Stories in Different Languages with <fixed-case>GETA</fixed-case>-<fixed-case>RUN</fixed-case></title>
       <author><first>Dario</first><last>Bianchi</last></author>
-      <author><first>Rodolfo</first><last>Delmonte</last></author>
-      <author><first>Emanuele</first><last>Pianta</last></author>
+      <author id="rodolfo-delmonte"><first>Rodolfo</first><last>Delmonte</last></author>
+      <author id="emanuele-pianta"><first>Emanuele</first><last>Pianta</last></author>
       <url hash="9243b642">E93-1058</url>
       <bibkey>bianchi-etal-1993-undestanding</bibkey>
     </paper>
@@ -415,28 +415,28 @@
     </paper>
     <paper id="60">
       <title>Long Sentence Analysis by Domain-Specific Pattern Grammar</title>
-      <author><first>Shinichi</first><last>Doi</last></author>
+      <author id="shinichi-doi"><first>Shinichi</first><last>Doi</last></author>
       <author><first>Kazunori</first><last>Muraki</last></author>
-      <author><first>Shinichiro</first><last>Kamei</last></author>
+      <author id="shin-ichiro-kamei"><first>Shinichiro</first><last>Kamei</last></author>
       <author><first>Kiyoshi</first><last>Yamabana</last></author>
       <url hash="3d488b09">E93-1060</url>
       <bibkey>doi-etal-1993-long</bibkey>
     </paper>
     <paper id="61">
       <title>Knowledge acquisition for a constrained speech system using <fixed-case>W</fixed-case>o<fixed-case>Z</fixed-case></title>
-      <author><first>Laila</first><last>Dybkjær</last></author>
-      <author><first>Niels Ole</first><last>Bernsen</last></author>
-      <author><first>Hans</first><last>Dybkjær</last></author>
+      <author id="laila-dybkjaer"><first>Laila</first><last>Dybkjær</last></author>
+      <author id="niels-ole-bernsen"><first>Niels Ole</first><last>Bernsen</last></author>
+      <author id="hans-dybkjaer"><first>Hans</first><last>Dybkjær</last></author>
       <url hash="55a8ae8a">E93-1061</url>
       <bibkey>dybkjaer-etal-1993-knowledge</bibkey>
     </paper>
     <paper id="62">
       <title>The <fixed-case>PANGLOSS MARK I</fixed-case> <fixed-case>MAT</fixed-case> system</title>
-      <author><first>Robert</first><last>Frederking</last></author>
+      <author id="robert-frederking"><first>Robert</first><last>Frederking</last></author>
       <author><first>Ariel</first><last>Cohen</last></author>
       <author><first>Dean</first><last>Grannes</last></author>
       <author><first>Peter</first><last>Cousseau</last></author>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <url hash="16182368">E93-1062</url>
       <bibkey>frederking-etal-1993-pangloss</bibkey>
     </paper>
@@ -450,7 +450,7 @@
     </paper>
     <paper id="64">
       <title>The Linguistic Annotation System of the <fixed-case>S</fixed-case>tockholm - <fixed-case>U</fixed-case>meå <fixed-case>C</fixed-case>orpus Project</title>
-      <author><first>Gunnel</first><last>Källgren</last></author>
+      <author id="gunnel-kallgren"><first>Gunnel</first><last>Källgren</last></author>
       <author><first>Gunnar</first><last>Eriksson</last></author>
       <url hash="0c43900c">E93-1064</url>
       <bibkey>kallgren-eriksson-1993-linguistic</bibkey>
@@ -459,7 +459,7 @@
       <title><fixed-case>INSYST</fixed-case>: An Automatic Inserter System for Hierarchical Lexica</title>
       <author><first>Marc</first><last>Light</last></author>
       <author><first>Sabine</first><last>Reinhard</last></author>
-      <author><first>Marie</first><last>Boyle-Hinrichs</last></author>
+      <author id="marie-hinrichs"><first>Marie</first><last>Boyle-Hinrichs</last></author>
       <url hash="08a9baf6">E93-1065</url>
       <bibkey>light-etal-1993-insyst</bibkey>
     </paper>
@@ -471,14 +471,14 @@
     </paper>
     <paper id="67">
       <title><fixed-case>H</fixed-case>elyette: Inflectional Thesaurus for Agglutinative Languages</title>
-      <author><first>Gabor</first><last>Proszeky</last></author>
-      <author><first>Laszlo</first><last>Tihanyi</last></author>
+      <author id="gabor-proszeky"><first>Gabor</first><last>Proszeky</last></author>
+      <author id="laszlo-tihanyi"><first>Laszlo</first><last>Tihanyi</last></author>
       <url hash="86372d32">E93-1067</url>
       <bibkey>proszeky-tihanyi-1993-helyette</bibkey>
     </paper>
     <paper id="68">
       <title>Natural Language Front-Ends to Databases: Design and the Customisation Bottleneck</title>
-      <author><first>Anne</first><last>De Roeck</last></author>
+      <author id="anne-de-roeck"><first>Anne</first><last>De Roeck</last></author>
       <url hash="4219dd4c">E93-1068</url>
       <bibkey>de-roeck-1993-natural</bibkey>
     </paper>
@@ -491,7 +491,7 @@
     </paper>
     <paper id="70">
       <title><fixed-case>ITS</fixed-case>-2 : an interactive personal translation system</title>
-      <author><first>Eric</first><last>Wehrli</last></author>
+      <author id="eric-wehrli"><first>Eric</first><last>Wehrli</last></author>
       <author><first>Mira</first><last>Ramluckun</last></author>
       <url hash="54b0ffc6">E93-1070</url>
       <bibkey>wehrli-ramluckun-1993-2</bibkey>
diff --git a/data/xml/E95.xml b/data/xml/E95.xml
index 70fa23067b..ede5da8f83 100644
--- a/data/xml/E95.xml
+++ b/data/xml/E95.xml
@@ -3,8 +3,8 @@
   <volume id="1" type="proceedings">
     <meta>
       <booktitle>Seventh Conference of the <fixed-case>E</fixed-case>uropean Chapter of the Association for Computational Linguistics</booktitle>
-      <editor><first>Steven P.</first><last>Abney</last></editor>
-      <editor><first>Erhard W.</first><last>Hinrichs</last></editor>
+      <editor id="steven-abney"><first>Steven P.</first><last>Abney</last></editor>
+      <editor id="erhard-hinrichs"><first>Erhard W.</first><last>Hinrichs</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Dublin, Ireland</address>
       <month>March</month>
@@ -44,7 +44,7 @@
     </paper>
     <paper id="5">
       <title>The Semantics of Resource Sharing in <fixed-case>L</fixed-case>exical-<fixed-case>F</fixed-case>unctional <fixed-case>G</fixed-case>rammar</title>
-      <author><first>Andrew</first><last>Kehler</last></author>
+      <author id="andrew-kehler"><first>Andrew</first><last>Kehler</last></author>
       <author><first>Mary</first><last>Dalrymple</last></author>
       <author><first>John</first><last>Lamping</last></author>
       <author><first>Vijay</first><last>Saraswat</last></author>
@@ -60,7 +60,7 @@
     </paper>
     <paper id="7">
       <title>Some Remarks on the Decidability of the Generation Problem in <fixed-case>LFG</fixed-case>- and <fixed-case>PATR</fixed-case>-Style Unification Grammars</title>
-      <author><first>Jurgen</first><last>Wedekind</last></author>
+      <author id="jurgen-wedekind"><first>Jurgen</first><last>Wedekind</last></author>
       <url hash="3117aef3">E95-1007</url>
       <bibkey>wedekind-1995-remarks</bibkey>
     </paper>
@@ -68,7 +68,7 @@
       <title>Collocation Map for Overcoming Data Sparseness</title>
       <author><first>Moonjoo</first><last>Kim</last></author>
       <author><first>Young S.</first><last>Han</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <url hash="bbddfadf">E95-1008</url>
       <bibkey>kim-etal-1995-collocation</bibkey>
     </paper>
@@ -80,16 +80,16 @@
     </paper>
     <paper id="10">
       <title>Text Alignment in the Real World: Improving Alignments of Noisy Translations Using Common Lexical Features, String Matching Strategies and N-Gram Comparisons</title>
-      <author><first>Mark W.</first><last>Davis</last></author>
-      <author><first>Ted E.</first><last>Dunning</last></author>
-      <author><first>William C.</first><last>Ogden</last></author>
+      <author id="mark-w-davis"><first>Mark W.</first><last>Davis</last></author>
+      <author id="ted-e-dunning"><first>Ted E.</first><last>Dunning</last></author>
+      <author id="william-c-ogden"><first>William C.</first><last>Ogden</last></author>
       <url hash="1b9b5f0d">E95-1010</url>
       <bibkey>davis-etal-1995-text</bibkey>
     </paper>
     <paper id="11">
       <title>A Tractable Extension of Linear Indexed Grammars</title>
       <author><first>Bill</first><last>Keller</last></author>
-      <author><first>David</first><last>Weir</last></author>
+      <author id="david-weir"><first>David</first><last>Weir</last></author>
       <url hash="cc47c70c">E95-1011</url>
       <bibkey>keller-weir-1995-tractable</bibkey>
     </paper>
@@ -120,7 +120,7 @@
     </paper>
     <paper id="16">
       <title>On Learning more Appropriate Selectional Restrictions</title>
-      <author><first>Francesc</first><last>Ribas</last></author>
+      <author id="francesc-ribas"><first>Francesc</first><last>Ribas</last></author>
       <url hash="e3f3a57c">E95-1016</url>
       <bibkey>ribas-1995-learning</bibkey>
     </paper>
@@ -144,7 +144,7 @@
     </paper>
     <paper id="20">
       <title>Distributional Part-of-Speech Tagging</title>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <url hash="d30511c5">E95-1020</url>
       <bibkey>schutze-1995-distributional</bibkey>
     </paper>
@@ -171,7 +171,7 @@
       <title>Off-line Optimization for <fixed-case>E</fixed-case>arley-style <fixed-case>HPSG</fixed-case> Processing</title>
       <author><first>Guido</first><last>Minnen</last></author>
       <author><first>Dale</first><last>Gerdemann</last></author>
-      <author><first>Thilo</first><last>Gotz</last></author>
+      <author id="thilo-gotz"><first>Thilo</first><last>Gotz</last></author>
       <url hash="4d9bdcc5">E95-1024</url>
       <bibkey>minnen-etal-1995-line</bibkey>
     </paper>
@@ -184,7 +184,7 @@
     <paper id="26">
       <title>A Robust and Efficient Three-Layered Dialogue Component for a Speech-to-Speech Translation System</title>
       <author><first>Jan</first><last>Alexandersson</last></author>
-      <author><first>Elisabeth</first><last>Maier</last></author>
+      <author id="elisabeth-maier"><first>Elisabeth</first><last>Maier</last></author>
       <author><first>Norbert</first><last>Reithinger</last></author>
       <url hash="a321e346">E95-1026</url>
       <bibkey>alexandersson-etal-1995-robust</bibkey>
@@ -198,14 +198,14 @@
     </paper>
     <paper id="28">
       <title>Rapid Development of Morphological Descriptions for Full Language Processing Systems</title>
-      <author><first>David</first><last>Carter</last></author>
+      <author id="david-carter"><first>David</first><last>Carter</last></author>
       <url hash="c3ceee52">E95-1028</url>
       <bibkey>carter-1995-rapid</bibkey>
     </paper>
     <paper id="29">
       <title>Specifying a shallow grammatical representation for parsing purposes</title>
       <author><first>Atro</first><last>Voutilainen</last></author>
-      <author><first>Timo</first><last>Jarvinen</last></author>
+      <author id="timo-jarvinen"><first>Timo</first><last>Jarvinen</last></author>
       <url hash="fa70841e">E95-1029</url>
       <bibkey>voutilainen-jarvinen-1995-specifying</bibkey>
     </paper>
@@ -219,9 +219,9 @@
     <paper id="31">
       <title>A Robust Parser Based on Syntactic Information</title>
       <author><first>Kong Joo</first><last>Lee</last></author>
-      <author><first>Cheol Jung</first><last>Kweon</last></author>
-      <author><first>Jungyun</first><last>Seo</last></author>
-      <author><first>Gil Chang</first><last>Kim</last></author>
+      <author id="cheol-jung-kweon"><first>Cheol Jung</first><last>Kweon</last></author>
+      <author id="jungyun-seo"><first>Jungyun</first><last>Seo</last></author>
+      <author id="gil-chang-kim"><first>Gil Chang</first><last>Kim</last></author>
       <url hash="a7592cfa">E95-1031</url>
       <bibkey>lee-etal-1995-robust</bibkey>
     </paper>
@@ -246,7 +246,7 @@
     </paper>
     <paper id="35">
       <title>Algorithms for Analysing the Temporal Structure of Discourse</title>
-      <author><first>Janet</first><last>Hitzeman</last></author>
+      <author id="janet-hitzeman"><first>Janet</first><last>Hitzeman</last></author>
       <author><first>Marc</first><last>Moens</last></author>
       <author><first>Claire</first><last>Grover</last></author>
       <url hash="cf991e0a">E95-1035</url>
@@ -261,7 +261,7 @@
     </paper>
     <paper id="37">
       <title>Topic Identification in Discourse</title>
-      <author><first>Kuang-hua</first><last>Chen</last></author>
+      <author id="kuang-hua-chen"><first>Kuang-hua</first><last>Chen</last></author>
       <url hash="8a6cf6a3">E95-1037</url>
       <bibkey>chen-1995-topic</bibkey>
     </paper>
@@ -285,7 +285,7 @@
     </paper>
     <paper id="41">
       <title>An Algorithm to Co-Ordinate Anaphora Resolution and <fixed-case>PPS</fixed-case> Disambiguation Process</title>
-      <author><first>Saliha</first><last>Azzam</last></author>
+      <author id="saliha-azzam"><first>Saliha</first><last>Azzam</last></author>
       <url hash="422d46b9">E95-1041</url>
       <bibkey>azzam-1995-algorithm</bibkey>
     </paper>
diff --git a/data/xml/E99.xml b/data/xml/E99.xml
index 93e24b9e06..cb3650583c 100644
--- a/data/xml/E99.xml
+++ b/data/xml/E99.xml
@@ -3,7 +3,7 @@
   <volume id="1" type="proceedings">
     <meta>
       <booktitle>Ninth Conference of the <fixed-case>E</fixed-case>uropean Chapter of the Association for Computational Linguistics</booktitle>
-      <editor><first>Henry S.</first><last>Thompson</last></editor>
+      <editor id="henry-s-thompson"><first>Henry S.</first><last>Thompson</last></editor>
       <editor><first>Alex</first><last>Lascarides</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Bergen, Norway</address>
@@ -41,8 +41,8 @@
     </paper>
     <paper id="4">
       <title>An Object-Oriented Approach to the Design of Dialogue Management Functionality</title>
-      <author><first>Ian M.</first><last>O’Neill</last></author>
-      <author><first>Michael F.</first><last>McTear</last></author>
+      <author id="ian-m-oneill"><first>Ian M.</first><last>O’Neill</last></author>
+      <author id="michael-f-mctear"><first>Michael F.</first><last>McTear</last></author>
       <pages>23–29</pages>
       <url hash="b5dd112e">E99-1004</url>
       <bibkey>oneill-mctear-1999-object</bibkey>
@@ -88,7 +88,7 @@
     </paper>
     <paper id="10">
       <title>An Efficient Method for Determining Bilingual Word Classes</title>
-      <author><first>Franz Josef</first><last>Och</last></author>
+      <author id="franz-josef-och"><first>Franz Josef</first><last>Och</last></author>
       <pages>71–76</pages>
       <url hash="cff39963">E99-1010</url>
       <bibkey>och-1999-efficient</bibkey>
@@ -98,9 +98,9 @@
       <author><first>Inderjeet</first><last>Mani</last></author>
       <author><first>David</first><last>House</last></author>
       <author><first>Gary</first><last>Klein</last></author>
-      <author><first>Lynette</first><last>Hirschman</last></author>
-      <author><first>Therese</first><last>Firmin</last></author>
-      <author><first>Beth</first><last>Sundheim</last></author>
+      <author id="lynette-hirschman"><first>Lynette</first><last>Hirschman</last></author>
+      <author id="therese-firmin"><first>Therese</first><last>Firmin</last></author>
+      <author id="beth-m-sundheim"><first>Beth</first><last>Sundheim</last></author>
       <pages>77–85</pages>
       <url hash="02ff8e68">E99-1011</url>
       <bibkey>mani-etal-1999-tipster</bibkey>
@@ -114,7 +114,7 @@
     </paper>
     <paper id="13">
       <title>Complementing <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et with <fixed-case>R</fixed-case>oget’s and Corpus-based Thesauri for Information Retrieval</title>
-      <author><first>Rila</first><last>Mandala</last></author>
+      <author id="rila-mandala"><first>Rila</first><last>Mandala</last></author>
       <author><first>Takenobu</first><last>Tokunaga</last></author>
       <author><first>Hozumi</first><last>Tanaka</last></author>
       <pages>94–101</pages>
@@ -124,8 +124,8 @@
     <paper id="14">
       <title>Full Text Parsing using Cascades of Rules: an Information Extraction Perspective</title>
       <pages>102–109</pages>
-      <author><first>Fabio</first><last>Ciravegna</last></author>
-      <author><first>Alberto</first><last>Lavelli</last></author>
+      <author id="fabio-ciravegna"><first>Fabio</first><last>Ciravegna</last></author>
+      <author id="alberto-lavelli"><first>Alberto</first><last>Lavelli</last></author>
       <url hash="18a460e1">E99-1014</url>
       <bibkey>ciravegna-lavelli-1999-full</bibkey>
     </paper>
@@ -148,7 +148,7 @@
     <paper id="17">
       <title>Transducers from Rewrite Rules with Backreferences</title>
       <author><first>Dale</first><last>Gerdemann</last></author>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <pages>126–133</pages>
       <url hash="8a65f114">E99-1017</url>
       <bibkey>gerdemann-van-noord-1999-transducers</bibkey>
@@ -156,7 +156,7 @@
     <paper id="18">
       <title><fixed-case>POS</fixed-case> Disambiguation and Unknown Word Guessing with Decision Trees</title>
       <author><first>Giorgos S.</first><last>Orphanos</last></author>
-      <author><first>Dimitris N.</first><last>Christodoulakis</last></author>
+      <author id="dimitris-christodoulakis"><first>Dimitris N.</first><last>Christodoulakis</last></author>
       <pages>134–141</pages>
       <url hash="dd5c0592">E99-1018</url>
       <bibkey>orphanos-christodoulakis-1999-pos</bibkey>
@@ -171,9 +171,9 @@
     </paper>
     <paper id="20">
       <title>Tabular Algorithms for <fixed-case>TAG</fixed-case> Parsing</title>
-      <author><first>Miguel A.</first><last>Alonso</last></author>
-      <author><first>David</first><last>Cabrero</last></author>
-      <author><first>Eric</first><last>de la Clergerie</last></author>
+      <author id="miguel-a-alonso"><first>Miguel A.</first><last>Alonso</last></author>
+      <author id="david-cabrero-souto"><first>David</first><last>Cabrero</last></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Eric</first><last>de la Clergerie</last></author>
       <author><first>Manuel</first><last>Vilares</last></author>
       <pages>150–157</pages>
       <url hash="342bdb0a">E99-1020</url>
@@ -197,7 +197,7 @@
     </paper>
     <paper id="23">
       <title>Representing Text Chunks</title>
-      <author><first>Erik F.</first><last>Tjong Kim Sang</last></author>
+      <author id="erik-tjong-kim-sang"><first>Erik F.</first><last>Tjong Kim Sang</last></author>
       <author><first>Jorn</first><last>Veenstra</last></author>
       <pages>173–179</pages>
       <url hash="88c7ac0d">E99-1023</url>
@@ -213,7 +213,7 @@
     <paper id="25">
       <title>New Models for Improving Supertag Disambiguation</title>
       <author><first>John</first><last>Chen</last></author>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
       <pages>188–195</pages>
       <url hash="e8826473">E99-1025</url>
       <bibkey>chen-bangalore-1999-new</bibkey>
@@ -244,26 +244,26 @@
     </paper>
     <paper id="29">
       <title>Parsing with an Extended Domain of Locality</title>
-      <author><first>John</first><last>Carroll</last></author>
-      <author><first>Nicolas</first><last>Nicolov</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
+      <author id="nicolas-nicolov"><first>Nicolas</first><last>Nicolov</last></author>
       <author><first>Olga</first><last>Shaumyan</last></author>
       <author><first>Martine</first><last>Smets</last></author>
-      <author><first>David</first><last>Weir</last></author>
+      <author id="david-weir"><first>David</first><last>Weir</last></author>
       <pages>217–224</pages>
       <url hash="6becd100">E99-1029</url>
       <bibkey>carroll-etal-1999-parsing</bibkey>
     </paper>
     <paper id="30">
       <title>The Development of Lexical Resources for Information Extraction from Text Combining <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et and <fixed-case>D</fixed-case>ewey Decimal Classification</title>
-      <author><first>Gabriela</first><last>Cavaglia</last></author>
+      <author id="gabriela-cavaglia"><first>Gabriela</first><last>Cavaglia</last></author>
       <pages>225–228</pages>
       <url hash="01301eac">E99-1030</url>
       <bibkey>cavaglia-1999-development</bibkey>
     </paper>
     <paper id="31">
       <title>A Flexible Architecture for Reference Resolution</title>
-      <author><first>Donna K.</first><last>Byron</last></author>
-      <author><first>Joel R.</first><last>Tetreault</last></author>
+      <author id="donna-byron"><first>Donna K.</first><last>Byron</last></author>
+      <author id="joel-tetreault"><first>Joel R.</first><last>Tetreault</last></author>
       <pages>229–232</pages>
       <url hash="b18247aa">E99-1031</url>
       <bibkey>byron-tetreault-1999-flexible</bibkey>
@@ -277,7 +277,7 @@
     </paper>
     <paper id="33">
       <title>Investigating <fixed-case>NLG</fixed-case> Architectures: taking style into consideration</title>
-      <author><first>Daniel S.</first><last>Paiva</last></author>
+      <author id="daniel-paiva"><first>Daniel S.</first><last>Paiva</last></author>
       <pages>237–240</pages>
       <url hash="de5acbd3">E99-1033</url>
       <bibkey>paiva-1999-investigating</bibkey>
@@ -292,7 +292,7 @@
     <paper id="35">
       <title>A Cascaded Finite-State Parser for Syntactic Analysis of <fixed-case>S</fixed-case>wedish</title>
       <author><first>Dimitrios</first><last>Kokkinakis</last></author>
-      <author><first>Sofie Johansson</first><last>Kokkinakis</last></author>
+      <author id="sofie-johansson-kokkinakis"><first>Sofie Johansson</first><last>Kokkinakis</last></author>
       <pages>245–248</pages>
       <url hash="2ce4043d">E99-1035</url>
       <bibkey>kokkinakis-kokkinakis-1999-cascaded</bibkey>
@@ -346,12 +346,12 @@
     </paper>
     <paper id="42">
       <title>Simplifying Text for Language-Impaired Readers</title>
-      <author><first>John</first><last>Carroll</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
       <author><first>Guido</first><last>Minnen</last></author>
       <author><first>Darren</first><last>Pearce</last></author>
       <author><first>Yvonne</first><last>Canning</last></author>
       <author><first>Siobhan</first><last>Devlin</last></author>
-      <author><first>John</first><last>Tait</last></author>
+      <author id="john-tait"><first>John</first><last>Tait</last></author>
       <pages>269–270</pages>
       <url hash="0f4fc99f">E99-1042</url>
       <bibkey>carroll-etal-1999-simplifying</bibkey>
@@ -359,15 +359,15 @@
     <paper id="43">
       <title>The <fixed-case>GENIA</fixed-case> project: corpus-based knowledge acquisition and information extraction from genome research papers</title>
       <author><first>Nigel</first><last>Collier</last></author>
-      <author><first>Hyun Seok</first><last>Park</last></author>
+      <author id="hyun-seok-park"><first>Hyun Seok</first><last>Park</last></author>
       <author><first>Norihiro</first><last>Ogata</last></author>
-      <author><first>Yuka</first><last>Tateishi</last></author>
+      <author id="yuka-tateisi"><first>Yuka</first><last>Tateishi</last></author>
       <author><first>Chikashi</first><last>Nobata</last></author>
       <author><first>Tomoko</first><last>Ohta</last></author>
       <author><first>Tateshi</first><last>Sekimizu</last></author>
       <author><first>Hisao</first><last>Imai</last></author>
       <author><first>Katsutoshi</first><last>Ibushi</last></author>
-      <author><first>Jun-ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun-ichi</first><last>Tsujii</last></author>
       <pages>271–272</pages>
       <url hash="a7711440">E99-1043</url>
       <bibkey>collier-etal-1999-genia</bibkey>
@@ -395,7 +395,7 @@
     </paper>
     <paper id="47">
       <title><fixed-case>u-TBL</fixed-case> Lite: A Small, Extendible Transformation-Based Learner</title>
-      <author><first>Torbjorn</first><last>Lager</last></author>
+      <author id="torbjorn-lager"><first>Torbjorn</first><last>Lager</last></author>
       <pages>279–280</pages>
       <url hash="a7439812">E99-1047</url>
       <bibkey>lager-1999-u</bibkey>
@@ -425,11 +425,11 @@
     </paper>
     <paper id="51">
       <title>Robust and Flexible Mixed-Initiative Dialogue for Telephone Services</title>
-      <author><first>Jose</first><last>Relano Gil</last></author>
-      <author><first>Daniel</first><last>Tapias</last></author>
+      <author id="jose-relano-gil"><first>Jose</first><last>Relano Gil</last></author>
+      <author id="daniel-tapias"><first>Daniel</first><last>Tapias</last></author>
       <author><first>Maria C.</first><last>Gancedo</last></author>
-      <author><first>Marcela</first><last>Charfuelan</last></author>
-      <author><first>Luis A.</first><last>Hernandez</last></author>
+      <author id="marcela-charfuelan"><first>Marcela</first><last>Charfuelan</last></author>
+      <author id="luis-hernandez"><first>Luis A.</first><last>Hernandez</last></author>
       <pages>287–290</pages>
       <url hash="dec58162">E99-1051</url>
       <bibkey>relano-gil-etal-1999-robust</bibkey>
diff --git a/data/xml/F12.xml b/data/xml/F12.xml
index a898ce6eb7..1c25796fb8 100644
--- a/data/xml/F12.xml
+++ b/data/xml/F12.xml
@@ -4,9 +4,9 @@
     <meta>
       <booktitle>Proceedings of the Joint Conference JEP-TALN-RECITAL 2012, volume 1: JEP</booktitle>
       <url hash="b143a9ff">F12-1</url>
-      <editor><first>Laurent</first><last>Besacier</last></editor>
+      <editor id="laurent-besacier"><first>Laurent</first><last>Besacier</last></editor>
       <editor><first>Benjamin</first><last>Lecouteux</last></editor>
-      <editor><first>Gilles</first><last>Sérasset</last></editor>
+      <editor id="gilles-serasset"><first>Gilles</first><last>Sérasset</last></editor>
       <publisher>ATALA/AFCP</publisher>
       <address>Grenoble, France</address>
       <month>June</month>
@@ -27,7 +27,7 @@
     <paper id="2">
       <title>La structuration prosodique et les relations syntaxe/ prosodie dans le discours politique (Prosodic Structuring and the Syntax-Prosody Relationship in Political Speech) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Ingo</first><last>Feldhausen</last></author>
-      <author><first>Elisabeth</first><last>Delais-Roussarie</last></author>
+      <author id="elisabeth-delais-roussarie"><first>Elisabeth</first><last>Delais-Roussarie</last></author>
       <pages>9–16</pages>
       <url hash="70a831bb">F12-1002</url>
       <bibkey>feldhausen-delais-roussarie-2012-la</bibkey>
@@ -62,7 +62,7 @@
       <author><first>Luciana</first><last>Mendonça-Alves</last></author>
       <author><first>Robert</first><last>Espesser</last></author>
       <author><first>Alain</first><last>Ghio</last></author>
-      <author><first>Céline</first><last>de Looze</last></author>
+      <author id="celine-de-looze"><first>Céline</first><last>de Looze</last></author>
       <author><first>César</first><last>Reis</last></author>
       <pages>41–48</pages>
       <url hash="597a211d">F12-1006</url>
@@ -73,7 +73,7 @@
       <author><first>Julien</first><last>Fayolle</last></author>
       <author><first>Fabienne</first><last>Moreau</last></author>
       <author><first>Christian</first><last>Raymond</last></author>
-      <author><first>Guillaume</first><last>Gravier</last></author>
+      <author id="guillaume-gravier"><first>Guillaume</first><last>Gravier</last></author>
       <pages>49–56</pages>
       <url hash="1503a42d">F12-1007</url>
       <bibkey>fayolle-etal-2012-automates</bibkey>
@@ -178,7 +178,7 @@
     <paper id="18">
       <title>Détection d’émotions dans la voix de patients en interaction avec un agent conversationnel animé (Emotions detection in the voice of patients interacting with an animated conversational agent) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Clément</first><last>Chastagnol</last></author>
-      <author><first>Laurence</first><last>Devillers</last></author>
+      <author id="laurence-devillers"><first>Laurence</first><last>Devillers</last></author>
       <pages>137–144</pages>
       <url hash="a4d53407">F12-1018</url>
       <bibkey>chastagnol-devillers-2012-detection</bibkey>
@@ -220,14 +220,14 @@
     </paper>
     <paper id="23">
       <title>Prosodie multimodale. Les enchères chantées aux Etats-Unis (Multimodal Prosody. The auction chant in the <fixed-case>U</fixed-case>nited <fixed-case>S</fixed-case>tates) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Gaëlle</first><last>Ferré</last></author>
+      <author id="gaelle-ferre"><first>Gaëlle</first><last>Ferré</last></author>
       <pages>177–184</pages>
       <url hash="d27c11aa">F12-1023</url>
       <bibkey>ferre-2012-prosodie</bibkey>
     </paper>
     <paper id="24">
       <title>Un cadre expérimental pour les Sciences de la Parole (An experimental framework for speech sciences) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Gilles</first><last>Adda</last></author>
+      <author id="gilles-adda"><first>Gilles</first><last>Adda</last></author>
       <pages>185–192</pages>
       <url hash="8b9bfa60">F12-1024</url>
       <bibkey>adda-2012-un</bibkey>
@@ -271,7 +271,7 @@
     <paper id="29">
       <title>Coordinations spatio-temporelles dans les suites ab(b)i en arabe marocain (Spatio-temporal coordinations in <fixed-case>M</fixed-case>oroccan <fixed-case>A</fixed-case>rabic ab(bi) sequences) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Chakir</first><last>Zeroual</last></author>
-      <author><first>Philip</first><last>Hoole</last></author>
+      <author id="philip-hoole"><first>Philip</first><last>Hoole</last></author>
       <author><first>Diamantis</first><last>Gafos</last></author>
       <author><first>John</first><last>Esling</last></author>
       <pages>225–232</pages>
@@ -302,7 +302,7 @@
       <title>Les ajustements laryngaux en français (Laryngeal adjustments in <fixed-case>F</fixed-case>rench) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Rachid</first><last>Ridouane</last></author>
       <author><first>Nicolas</first><last>Audibert</last></author>
-      <author><first>Van Minh</first><last>Nguyen</last></author>
+      <author id="van-minh-nguyen"><first>Van Minh</first><last>Nguyen</last></author>
       <pages>249–256</pages>
       <url hash="300d08aa">F12-1032</url>
       <bibkey>ridouane-etal-2012-les</bibkey>
@@ -321,7 +321,7 @@
     <paper id="34">
       <title>La Prosodie des énoncés interrogatifs en français langue seconde (The prosody of questions in <fixed-case>F</fixed-case>rench as <fixed-case>L</fixed-case>2) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Fabian</first><last>Santiago Vargas</last></author>
-      <author><first>Élisabeth</first><last>Delais-Roussarie</last></author>
+      <author id="elisabeth-delais-roussarie"><first>Élisabeth</first><last>Delais-Roussarie</last></author>
       <pages>265–272</pages>
       <url hash="5def25be">F12-1034</url>
       <bibkey>santiago-vargas-delais-roussarie-2012-la</bibkey>
@@ -329,7 +329,7 @@
     <paper id="35">
       <title>Extraction de mots clefs dans des vidéos Web par Analyse Latente de <fixed-case>D</fixed-case>irichlet (<fixed-case>LDA</fixed-case>-based tagging of Web videos) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Mohamed</first><last>Morchid</last></author>
-      <author><first>Georges</first><last>Linarès</last></author>
+      <author id="georges-linares"><first>Georges</first><last>Linarès</last></author>
       <pages>273–280</pages>
       <url hash="9ec9b8c9">F12-1035</url>
       <bibkey>morchid-linares-2012-extraction</bibkey>
@@ -337,7 +337,7 @@
     <paper id="36">
       <title>Impact du Comportement Social d’un Robot sur les Emotions de l’Utilisateur : une Expérience Perceptive (Impact of the Social Behaviour of a Robot on the User’s Emotions: a Perceptive Experiment) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Agnes</first><last>Delaborde</last></author>
-      <author><first>Laurence</first><last>Devillers</last></author>
+      <author id="laurence-devillers"><first>Laurence</first><last>Devillers</last></author>
       <pages>281–288</pages>
       <url hash="dce42f4e">F12-1036</url>
       <bibkey>delaborde-devillers-2012-impact</bibkey>
@@ -356,7 +356,7 @@
       <author><first>Anthony</first><last>Larcher</last></author>
       <author><first>Pierre-Michel</first><last>Bousquet</last></author>
       <author><first>Driss</first><last>Matrouf</last></author>
-      <author><first>Jean-Francois</first><last>Bonastre</last></author>
+      <author id="jean-francois-bonastre"><first>Jean-Francois</first><last>Bonastre</last></author>
       <pages>297–304</pages>
       <url hash="d249bbd6">F12-1038</url>
       <bibkey>larcher-etal-2012-analyse</bibkey>
@@ -382,7 +382,7 @@
     </paper>
     <paper id="41">
       <title>Vers un mesure automatique de l’adaptation prosodique en interaction conversationnelle (Automatic measurement of prosodic accommodation in conversational interaction) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Céline</first><last>De Looze</last></author>
+      <author id="celine-de-looze"><first>Céline</first><last>De Looze</last></author>
       <author><first>Stefan</first><last>Scherer</last></author>
       <author><first>Brian</first><last>Vaughan</last></author>
       <author><first>Nick</first><last>Campbell</last></author>
@@ -393,8 +393,8 @@
     <paper id="42">
       <title>Une comparaison de la déclinaison de F0 entre le français et l’allemand journalistiques (F0-declination : a comparison between <fixed-case>F</fixed-case>rench and <fixed-case>G</fixed-case>erman journalistic speech) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Carolin</first><last>Schmid</last></author>
-      <author><first>Cédric</first><last>Gendrot</last></author>
-      <author><first>Martine</first><last>Adda-Decker</last></author>
+      <author id="cedric-gendrot"><first>Cédric</first><last>Gendrot</last></author>
+      <author id="martine-adda-decker"><first>Martine</first><last>Adda-Decker</last></author>
       <pages>329–336</pages>
       <url hash="cee0fbd2">F12-1042</url>
       <bibkey>schmid-etal-2012-une</bibkey>
@@ -495,7 +495,7 @@
     <paper id="53">
       <title>L’identification du locuteur : 20 ans de témoignage dans les cours de Justice. Le cas du <fixed-case>LIPSADON</fixed-case> &lt;&lt; laboratoire indépendant de police scientifique &gt;&gt; (Forensic speaker identification: 20 years of scientific testimonies in courts of Justice. The case of <fixed-case>LIPSADON</fixed-case> “forensics independent laboratory”) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Louis-Jean</first><last>Boë</last></author>
-      <author><first>Jean-François</first><last>Bonastre</last></author>
+      <author id="jean-francois-bonastre"><first>Jean-François</first><last>Bonastre</last></author>
       <pages>417–424</pages>
       <url hash="115b199d">F12-1053</url>
       <bibkey>boe-bonastre-2012-lidentification</bibkey>
@@ -505,7 +505,7 @@
       <author><first>Juliette</first><last>Kahn</last></author>
       <author><first>Nicolas</first><last>Scheffer</last></author>
       <author><first>Solange</first><last>Rossato</last></author>
-      <author><first>Jean-François</first><last>Bonastre</last></author>
+      <author id="jean-francois-bonastre"><first>Jean-François</first><last>Bonastre</last></author>
       <pages>425–432</pages>
       <url hash="2ee6ddfd">F12-1054</url>
       <bibkey>kahn-etal-2012-verification</bibkey>
@@ -597,7 +597,7 @@
     <paper id="64">
       <title>Codage échelonnable à granularité fine de la parole : Application au codeur G.729 (Fine granularity scalable speech coding: Application to the G.729 coder) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Mouloud</first><last>Djamah</last></author>
-      <author><first>Douglas</first><last>O’Shaughnessy</last></author>
+      <author id="douglas-oshaughnessy"><first>Douglas</first><last>O’Shaughnessy</last></author>
       <pages>505–512</pages>
       <url hash="1e9b27fb">F12-1064</url>
       <bibkey>djamah-oshaughnessy-2012-codage</bibkey>
@@ -643,26 +643,26 @@
     </paper>
     <paper id="69">
       <title>La liaison dans la parole spontanée familière : explorations semi-automatiques de grands corpus (<fixed-case>F</fixed-case>rench Liaison in casual speech : automatic and manual investigations) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Martine</first><last>Adda-Decker</last></author>
-      <author><first>Elisabeth</first><last>Delais-Roussarie</last></author>
+      <author id="martine-adda-decker"><first>Martine</first><last>Adda-Decker</last></author>
+      <author id="elisabeth-delais-roussarie"><first>Elisabeth</first><last>Delais-Roussarie</last></author>
       <author><first>Cécile</first><last>Fougeron</last></author>
-      <author><first>Cédric</first><last>Gendrot</last></author>
-      <author><first>Lori</first><last>Lamel</last></author>
+      <author id="cedric-gendrot"><first>Cédric</first><last>Gendrot</last></author>
+      <author id="lori-lamel"><first>Lori</first><last>Lamel</last></author>
       <pages>545–552</pages>
       <url hash="acb2e8c7">F12-1069</url>
       <bibkey>adda-decker-etal-2012-la</bibkey>
     </paper>
     <paper id="70">
       <title>Percol0 - un système multimodal de détection de personnes dans des documents vidéo (Percol0 - A multimodal person detection system in video documents) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Frederic</first><last>Bechet</last></author>
+      <author id="frederic-bechet"><first>Frederic</first><last>Bechet</last></author>
       <author><first>Remi</first><last>Auguste</last></author>
       <author><first>Stephane</first><last>Ayache</last></author>
       <author><first>Delphine</first><last>Charlet</last></author>
-      <author><first>Geraldine</first><last>Damnati</last></author>
-      <author><first>Benoit</first><last>Favre</last></author>
+      <author id="geraldine-damnati"><first>Geraldine</first><last>Damnati</last></author>
+      <author id="benoit-favre"><first>Benoit</first><last>Favre</last></author>
       <author><first>Corinne</first><last>Fredouille</last></author>
       <author><first>Christophe</first><last>Levy</last></author>
-      <author><first>Georges</first><last>Linares</last></author>
+      <author id="georges-linares"><first>Georges</first><last>Linares</last></author>
       <author><first>Jean</first><last>Martinet</last></author>
       <pages>553–560</pages>
       <url hash="e4ab0ec5">F12-1070</url>
@@ -679,16 +679,16 @@
     </paper>
     <paper id="72">
       <title>Évaluation segmentale du système de synthèse <fixed-case>HTS</fixed-case> pour le français (Segmental evaluation of <fixed-case>HTS</fixed-case>) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Sébastien</first><last>Le Maguer</last></author>
+      <author id="sebastien-le-maguer"><first>Sébastien</first><last>Le Maguer</last></author>
       <author><first>Nelly</first><last>Barbot</last></author>
-      <author><first>Olivier</first><last>Boeffard</last></author>
+      <author id="olivier-boeffard"><first>Olivier</first><last>Boeffard</last></author>
       <pages>569–576</pages>
       <url hash="9df92395">F12-1072</url>
       <bibkey>le-maguer-etal-2012-evaluation</bibkey>
     </paper>
     <paper id="73">
       <title>Lire les tons sur les lèvres : perception(s) visuelle(s) des tons lexicaux en chinois mandarin (Read the tones on the lips : visual perception(s) of lexical tones in <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Grégory</first><last>Roulet-Guiot</last></author>
+      <author id="gregory-roulet-guiot"><first>Grégory</first><last>Roulet-Guiot</last></author>
       <author><first>Corine</first><last>Astésano</last></author>
       <pages>577–584</pages>
       <url hash="fdc092a2">F12-1073</url>
@@ -719,7 +719,7 @@
     <paper id="76">
       <title><fixed-case>PROSOTRAN</fixed-case> : un système d’annotation symbolique des faits prosodiques pour les données non-standards (<fixed-case>PROSOTRAN</fixed-case> : an tool that provides a symbolic representation of the prosodic events in non-standard data) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Katarina</first><last>Bartkova</last></author>
-      <author><first>Elisabeth</first><last>Delais-Roussarie</last></author>
+      <author id="elisabeth-delais-roussarie"><first>Elisabeth</first><last>Delais-Roussarie</last></author>
       <author><first>Fabian</first><last>Santiago Vargas</last></author>
       <pages>601–608</pages>
       <url hash="39c1966e">F12-1076</url>
@@ -727,7 +727,7 @@
     </paper>
     <paper id="77">
       <title>Questions corses : peut-on mettre en évidence un transfert prosodique du corse vers le français ? (<fixed-case>C</fixed-case>orsican questions: is there a prosodic transfer from <fixed-case>C</fixed-case>orsican to <fixed-case>F</fixed-case>rench?) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Philippe</first><last>Boula de Mareüil</last></author>
+      <author id="philippe-boula-de-mareuil"><first>Philippe</first><last>Boula de Mareüil</last></author>
       <author><first>Albert</first><last>Rilliard</last></author>
       <author><first>Paolo</first><last>Mairano</last></author>
       <author><first>Jean-Pierre</first><last>Lai</last></author>
@@ -747,7 +747,7 @@
     <paper id="79">
       <title>Allongements vocaliques en français de <fixed-case>B</fixed-case>elgique : approche expérimentale et perceptive (Vowel lengthening in <fixed-case>B</fixed-case>elgium <fixed-case>F</fixed-case>rench: an experimental and perceptual approach) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Alice</first><last>Bardiaux</last></author>
-      <author><first>Philippe</first><last>Boula de Mareüil</last></author>
+      <author id="philippe-boula-de-mareuil"><first>Philippe</first><last>Boula de Mareüil</last></author>
       <pages>625–632</pages>
       <url hash="705f72ac">F12-1079</url>
       <bibkey>bardiaux-boula-de-mareuil-2012-allongements</bibkey>
@@ -772,8 +772,8 @@
     </paper>
     <paper id="82">
       <title>Comparaison de parole journalistique et de parole spontanée : analyses de séquences entre pauses (Comparison of journalistic and spontaneous speech: analysis of sequences between pauses) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Cedric</first><last>Gendrot</last></author>
-      <author><first>Martine</first><last>Adda-decker</last></author>
+      <author id="cedric-gendrot"><first>Cedric</first><last>Gendrot</last></author>
+      <author id="martine-adda-decker"><first>Martine</first><last>Adda-decker</last></author>
       <author><first>Carolin</first><last>Schmid</last></author>
       <pages>649–656</pages>
       <url hash="ce809690">F12-1082</url>
@@ -790,7 +790,7 @@
     </paper>
     <paper id="84">
       <title>Mise au point d’un paradigme de perturbation motrice pour l’étude de la perception de la parole (Defining a motor perturbation paradigm for speech perception studies) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Ali Hadian</first><last>Cefidekhanie</last></author>
+      <author id="ali-hadian-cefidekhanie"><first>Ali Hadian</first><last>Cefidekhanie</last></author>
       <author><first>Christophe</first><last>Savariaux</last></author>
       <author><first>Marc</first><last>Sato</last></author>
       <author><first>Jean-Luc</first><last>Schwartz</last></author>
@@ -829,7 +829,7 @@
       <title>Prédiction de l’indexabilité d’une transcription (Prediction of transcription indexability) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Grégory</first><last>Senay</last></author>
       <author><first>Benjamin</first><last>Lecouteux</last></author>
-      <author><first>Georges</first><last>Linarès</last></author>
+      <author id="georges-linares"><first>Georges</first><last>Linarès</last></author>
       <pages>697–705</pages>
       <url hash="dcb5570c">F12-1088</url>
       <bibkey>senay-etal-2012-prediction</bibkey>
@@ -868,9 +868,9 @@
     </paper>
     <paper id="92">
       <title>Vers une annotation automatique de corpus audio pour la synthèse de parole (Towards Fully Automatic Annotation of Audio Books for Text-To-Speech (<fixed-case>TTS</fixed-case>) Synthesis) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Olivier</first><last>Boëffard</last></author>
+      <author id="olivier-boeffard"><first>Olivier</first><last>Boëffard</last></author>
       <author><first>Laure</first><last>Charonnat</last></author>
-      <author><first>Sébastien</first><last>Le Maguer</last></author>
+      <author id="sebastien-le-maguer"><first>Sébastien</first><last>Le Maguer</last></author>
       <author><first>Damien</first><last>Lolive</last></author>
       <author><first>Gaëlle</first><last>Vidal</last></author>
       <pages>731–738</pages>
@@ -920,15 +920,15 @@
     </paper>
     <paper id="98">
       <title>Robustesse et portabilités multilingue et multi-domaines des systèmes de compréhension de la parole : les corpus du projet <fixed-case>P</fixed-case>ort<fixed-case>M</fixed-case>edia (Robustness and portability of spoken language understanding systems among languages and domains : the <fixed-case>PORTMEDIA</fixed-case> project) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Fabrice</first><last>Lefèvre</last></author>
-      <author><first>Djamel</first><last>Mostefa</last></author>
+      <author id="fabrice-lefevre"><first>Fabrice</first><last>Lefèvre</last></author>
+      <author id="djamel-mostefa"><first>Djamel</first><last>Mostefa</last></author>
       <author><first>Laurent</first><last>Besacier</last></author>
       <author><first>Yannick</first><last>Estève</last></author>
-      <author><first>Matthieu</first><last>Quignard</last></author>
+      <author id="matthieu-quignard"><first>Matthieu</first><last>Quignard</last></author>
       <author><first>Nathalie</first><last>Camelin</last></author>
-      <author><first>Benoit</first><last>Favre</last></author>
+      <author id="benoit-favre"><first>Benoit</first><last>Favre</last></author>
       <author><first>Bassam</first><last>Jabaian</last></author>
-      <author><first>Lina</first><last>Rojas-Barahona</last></author>
+      <author id="lina-m-rojas-barahona"><first>Lina</first><last>Rojas-Barahona</last></author>
       <pages>779–786</pages>
       <url hash="d6dedbe0">F12-1098</url>
       <bibkey>lefevre-etal-2012-robustesse</bibkey>
@@ -947,9 +947,9 @@
       <title>Avancées dans le domaine de la transcription automatique par décodage guidé (Improvements on driven decoding system combination) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Fethi</first><last>Bougares</last></author>
       <author><first>Yannick</first><last>Estève</last></author>
-      <author><first>Paul</first><last>Deléglise</last></author>
+      <author id="paul-deleglise"><first>Paul</first><last>Deléglise</last></author>
       <author><first>Mickael</first><last>Rouvier</last></author>
-      <author><first>Georges</first><last>Linarès</last></author>
+      <author id="georges-linares"><first>Georges</first><last>Linarès</last></author>
       <pages>795–802</pages>
       <url hash="2bd41241">F12-1100</url>
       <bibkey>bougares-etal-2012-avancees</bibkey>
@@ -967,7 +967,7 @@
     <paper id="102">
       <title>Détection et caractérisation des régions d’erreurs dans des transcriptions de contenus multimédia : application à la recherche des noms de personnes (Error region detection and characterization in transcriptions of multimedia documents : application to person name search) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Richard</first><last>Dufour</last></author>
-      <author><first>Géraldine</first><last>Damnati</last></author>
+      <author id="geraldine-damnati"><first>Géraldine</first><last>Damnati</last></author>
       <author><first>Delphine</first><last>Charlet</last></author>
       <pages>811–818</pages>
       <url hash="aff5441c">F12-1102</url>
@@ -1038,9 +1038,9 @@
     <meta>
       <booktitle>Proceedings of the Joint Conference JEP-TALN-RECITAL 2012, volume 2: TALN</booktitle>
       <url hash="40f5ba5d">F12-2</url>
-      <editor><first>Georges</first><last>Antoniadis</last></editor>
-      <editor><first>Hervé</first><last>Blanchon</last></editor>
-      <editor><first>Gilles</first><last>Sérasset</last></editor>
+      <editor id="georges-antoniadis"><first>Georges</first><last>Antoniadis</last></editor>
+      <editor id="herve-blanchon"><first>Hervé</first><last>Blanchon</last></editor>
+      <editor id="gilles-serasset"><first>Gilles</first><last>Sérasset</last></editor>
       <publisher>ATALA/AFCP</publisher>
       <address>Grenoble, France</address>
       <month>June</month>
@@ -1063,7 +1063,7 @@
     <paper id="2">
       <title>Extraction d’information automatique en domaine médical par projection inter-langue : vers un passage à l’échelle (Automatic Information Extraction in the Medical Domain by Cross-Lingual Projection) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Asma</first><last>Ben Abacha</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <author><first>Aurélien</first><last>Max</last></author>
       <pages>15–28</pages>
       <url hash="ebeee667">F12-2002</url>
@@ -1080,7 +1080,7 @@
     </paper>
     <paper id="4">
       <title>Traitement automatique sur corpus de récits de voyages pyrénéens : Une analyse syntaxique, sémantique et temporelle (Processing of a Pyrenees Travel Novels Corpus : a Syntactical, Semantical and Temporal Analysis.) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Anaïs</first><last>Lefeuvre</last></author>
+      <author id="anais-lefeuvre"><first>Anaïs</first><last>Lefeuvre</last></author>
       <author><first>Richard</first><last>Moot</last></author>
       <author><first>Christian</first><last>Retoré</last></author>
       <author><first>Noémie-Fleur</first><last>Sandillon-Rezer</last></author>
@@ -1090,7 +1090,7 @@
     </paper>
     <paper id="5">
       <title>La reconnaissance des mots composés à l’épreuve de l’analyse syntaxique et vice-versa : évaluation de deux stratégies discriminantes (Recognition of Compound Words Tested against Parsing and Vice-versa : Evaluation of Two Discriminative Approaches) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Matthieu</first><last>Constant</last></author>
+      <author id="matthieu-constant"><first>Matthieu</first><last>Constant</last></author>
       <author><first>Anthony</first><last>Sigogne</last></author>
       <author><first>Patrick</first><last>Watrin</last></author>
       <pages>57–70</pages>
@@ -1117,7 +1117,7 @@
       <title><fixed-case>TCOF</fixed-case>-<fixed-case>POS</fixed-case> : un corpus libre de français parlé annoté en morphosyntaxe (<fixed-case>TCOF</fixed-case>-<fixed-case>POS</fixed-case> : A Freely Available <fixed-case>POS</fixed-case>-Tagged Corpus of Spoken <fixed-case>F</fixed-case>rench) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Christophe</first><last>Benzitoun</last></author>
       <author><first>Karën</first><last>Fort</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <pages>99–112</pages>
       <url hash="75c80846">F12-2008</url>
       <bibkey>benzitoun-etal-2012-tcof</bibkey>
@@ -1133,7 +1133,7 @@
     </paper>
     <paper id="10">
       <title>Utilisation de la translittération arabe pour l’amélioration de l’alignement de mots à partir de corpus parallèles français-arabe (Using <fixed-case>A</fixed-case>rabic Transliteration to Improve Word Alignment from <fixed-case>F</fixed-case>rench-<fixed-case>A</fixed-case>rabic Parallel Corpora) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Houda</first><last>Saadane</last></author>
+      <author id="houda-saadane"><first>Houda</first><last>Saadane</last></author>
       <author><first>Nasredine</first><last>Semmar</last></author>
       <pages>127–140</pages>
       <url hash="a0e2c557">F12-2010</url>
@@ -1142,14 +1142,14 @@
     <paper id="11">
       <title>Compositionnalité et contextes issus de corpus comparables pour la traduction terminologique (Compositionality and Context for Bilingual Lexicon Extraction from Comparable Corpora) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Emmanuel</first><last>Morin</last></author>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <pages>141–154</pages>
       <url hash="d0529486">F12-2011</url>
       <bibkey>morin-daille-2012-compositionnalite</bibkey>
     </paper>
     <paper id="12">
       <title>Raffinement du Lexique des Verbes Français (Resource Refining : &lt;&lt; Les Verbes Français &gt;&gt;) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Paul</first><last>Bédaride</last></author>
+      <author id="paul-bedaride"><first>Paul</first><last>Bédaride</last></author>
       <pages>155–168</pages>
       <url hash="2636ff0f">F12-2012</url>
       <bibkey>bedaride-2012-raffinement</bibkey>
@@ -1157,16 +1157,16 @@
     <paper id="13">
       <title>Étude des manifestations de la relation de méronymie dans une ressource distributionnelle (Study of Meronymy in a Distribution-Based Lexical Resource) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>François</first><last>Morlane-Hondère</last></author>
-      <author><first>Cécile</first><last>Fabre</last></author>
+      <author id="cecile-fabre"><first>Cécile</first><last>Fabre</last></author>
       <pages>169–182</pages>
       <url hash="7eaa31d7">F12-2013</url>
       <bibkey>morlane-hondere-fabre-2012-etude</bibkey>
     </paper>
     <paper id="14">
       <title>Un critère de cohésion thématique fondé sur un graphe de cooccurrences (Topical Cohesion using Graph Random Walks) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Clément</first><last>de Groc</last></author>
+      <author id="clement-de-groc"><first>Clément</first><last>de Groc</last></author>
       <author><first>Xavier</first><last>Tannier</last></author>
-      <author><first>Claude</first><last>de Loupy</last></author>
+      <author id="claude-de-loupy"><first>Claude</first><last>de Loupy</last></author>
       <pages>183–195</pages>
       <url hash="5e920115">F12-2014</url>
       <bibkey>de-groc-etal-2012-un</bibkey>
@@ -1194,9 +1194,9 @@
     <paper id="17">
       <title>Étude comparative entre trois approches de résumé automatique de documents arabes (Comparative Study of Three Approaches to Automatic Summarization of <fixed-case>A</fixed-case>rabic Documents) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Iskandar</first><last>Keskes</last></author>
-      <author><first>Mohamed Mahdi</first><last>Boudabous</last></author>
+      <author id="mohamed-mahdi-boudabous"><first>Mohamed Mahdi</first><last>Boudabous</last></author>
       <author><first>Mohamed Hédi</first><last>Maaloul</last></author>
-      <author><first>Lamia</first><last>Hadrich Belguith</last></author>
+      <author id="lamia-hadrich-belguith"><first>Lamia</first><last>Hadrich Belguith</last></author>
       <pages>225–238</pages>
       <url hash="5c8b4bd6">F12-2017</url>
       <bibkey>keskes-etal-2012-etude</bibkey>
@@ -1255,8 +1255,8 @@
     </paper>
     <paper id="24">
       <title>Le corpus Sequoia : annotation syntaxique et exploitation pour l’adaptation d’analyseur par pont lexical (The Sequoia Corpus : Syntactic Annotation and Use for a Parser Lexical Domain Adaptation Method) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Marie</first><last>Candito</last></author>
-      <author><first>Djamé</first><last>Seddah</last></author>
+      <author id="marie-candito"><first>Marie</first><last>Candito</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
       <pages>321–334</pages>
       <url hash="095a6794">F12-2024</url>
       <bibkey>candito-seddah-2012-le</bibkey>
@@ -1264,17 +1264,17 @@
     <paper id="25">
       <title><fixed-case>ACOLAD</fixed-case> Plateforme pour l’édition collaborative dépendancielle (<fixed-case>ACOLAD</fixed-case>: platform for collaborative dependency annotation) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Francis</first><last>Brunet-Manquat</last></author>
-      <author><first>Jérôme</first><last>Goulian</last></author>
+      <author id="jerome-goulian"><first>Jérôme</first><last>Goulian</last></author>
       <pages>335–342</pages>
       <url hash="da0ea992">F12-2025</url>
       <bibkey>brunet-manquat-goulian-2012-acolad</bibkey>
     </paper>
     <paper id="26">
       <title>Extraction de préférences à partir de dialogues de négociation (Towards Preference Extraction From Negotiation Dialogues) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Anaïs</first><last>Cadilhac</last></author>
-      <author><first>Farah</first><last>Benamara</last></author>
+      <author id="anais-cadilhac"><first>Anaïs</first><last>Cadilhac</last></author>
+      <author id="farah-benamara"><first>Farah</first><last>Benamara</last></author>
       <author><first>Vladimir</first><last>Popescu</last></author>
-      <author><first>Nicholas</first><last>Asher</last></author>
+      <author id="nicholas-asher"><first>Nicholas</first><last>Asher</last></author>
       <author><first>Mohamadou</first><last>Seck</last></author>
       <pages>343–350</pages>
       <url hash="80e100a6">F12-2026</url>
@@ -1282,8 +1282,8 @@
     </paper>
     <paper id="27">
       <title>Détection de conflits dans les communautés épistémiques en ligne (Conflicts detection in online epistemic communities) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Alexandre</first><last>Denis</last></author>
-      <author><first>Matthieu</first><last>Quignard</last></author>
+      <author id="alexandre-denis"><first>Alexandre</first><last>Denis</last></author>
+      <author id="matthieu-quignard"><first>Matthieu</first><last>Quignard</last></author>
       <author><first>Dominique</first><last>Fréard</last></author>
       <author><first>Françoise</first><last>Détienne</last></author>
       <author><first>Michael</first><last>Baker</last></author>
@@ -1296,9 +1296,9 @@
       <title>Quel est l’apport de la détection d’entités nommées pour l’extraction d’information en domaine restreint ? (What is the contribution of named entities detection for information extraction in restricted domain ?) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Camille</first><last>Dutrey</last></author>
       <author><first>Chloé</first><last>Clavel</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
-      <author><first>Ioana</first><last>Vasilescu</last></author>
-      <author><first>Martine</first><last>Adda-Decker</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
+      <author id="ioana-vasilescu"><first>Ioana</first><last>Vasilescu</last></author>
+      <author id="martine-adda-decker"><first>Martine</first><last>Adda-Decker</last></author>
       <pages>359–366</pages>
       <url hash="0bb12870">F12-2028</url>
       <bibkey>dutrey-etal-2012-quel</bibkey>
@@ -1314,7 +1314,7 @@
     <paper id="30">
       <title>Méthodologie d’exploration de corpus et de formalisation de règles grammaticales pour les langues des signes (Methodology for corpus exploration and grammatical rule building in Sign Language) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Michael</first><last>Filhol</last></author>
-      <author><first>Annelies</first><last>Braffort</last></author>
+      <author id="annelies-braffort"><first>Annelies</first><last>Braffort</last></author>
       <pages>375–382</pages>
       <url hash="d6f7c45b">F12-2030</url>
       <bibkey>filhol-braffort-2012-methodologie</bibkey>
@@ -1337,7 +1337,7 @@
     </paper>
     <paper id="33">
       <title>Le Lexicoscope : un outil pour l’étude de profils combinatoires et l’extraction de constructions lexico-syntaxiques (The Lexicoscope : an integrated tool for combinatoric profles observation and lexico-syntactic constructs extraction) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Olivier</first><last>Kraif</last></author>
+      <author id="olivier-kraif"><first>Olivier</first><last>Kraif</last></author>
       <author><first>Sascha</first><last>Diwersy</last></author>
       <pages>399–406</pages>
       <url hash="c1a05f40">F12-2033</url>
@@ -1372,7 +1372,7 @@
       <title>Apprentissage automatique d’un chunker pour le français (Machine Learning of a chunker for <fixed-case>F</fixed-case>rench) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Isabelle</first><last>Tellier</last></author>
       <author><first>Denys</first><last>Duchier</last></author>
-      <author><first>Iris</first><last>Eshkol</last></author>
+      <author id="iris-eshkol"><first>Iris</first><last>Eshkol</last></author>
       <author><first>Arnaud</first><last>Courmet</last></author>
       <author><first>Mathieu</first><last>Martinet</last></author>
       <pages>431–438</pages>
@@ -1400,7 +1400,7 @@
       <title>La reconnaissance automatique de la fonction des pronoms démonstratifs en langue arabe (Automatic recognition of demonstrative pronouns function in <fixed-case>A</fixed-case>rabic) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Yacine</first><last>Ben Yahia</last></author>
       <author><first>Souha</first><last>Mezghani Hammami</last></author>
-      <author><first>Lamia</first><last>Hadrich Belguith</last></author>
+      <author id="lamia-hadrich-belguith"><first>Lamia</first><last>Hadrich Belguith</last></author>
       <pages>455–462</pages>
       <url hash="b600697f">F12-2040</url>
       <bibkey>ben-yahia-etal-2012-la</bibkey>
@@ -1426,7 +1426,7 @@
     <paper id="43">
       <title>Combinaison de ressources générales pour une contextualisation implicite de requêtes (Query Contextualization and Reformulation by Combining External Corpora) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Romain</first><last>Deveaud</last></author>
-      <author><first>Patrice</first><last>Bellot</last></author>
+      <author id="patrice-bellot"><first>Patrice</first><last>Bellot</last></author>
       <pages>479–486</pages>
       <url hash="dea976e4">F12-2043</url>
       <bibkey>deveaud-bellot-2012-combinaison</bibkey>
@@ -1434,7 +1434,7 @@
     <paper id="44">
       <title>Repérage des entités nommées pour l’arabe : adaptation non-supervisée et combinaison de systèmes (Named Entity Recognition for <fixed-case>A</fixed-case>rabic : Unsupervised adaptation and Systems combination) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Souhir</first><last>Gahbiche-Braham</last></author>
-      <author><first>Hélène</first><last>Bonneau-Maynard</last></author>
+      <author id="helene-bonneau-maynard"><first>Hélène</first><last>Bonneau-Maynard</last></author>
       <author><first>Thomas</first><last>Lavergne</last></author>
       <author><first>François</first><last>Yvon</last></author>
       <pages>487–494</pages>
@@ -1443,7 +1443,7 @@
     </paper>
     <paper id="45">
       <title>Propagation de polarités dans des familles de mots : impact de la morphologie dans la construction d’un lexique pour l’analyse de sentiments (Spreading Polarities among Word Families: Impact of Morphology on Building a Lexicon for Sentiment Analysis) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Núria</first><last>Gala</last></author>
+      <author id="nuria-gala"><first>Núria</first><last>Gala</last></author>
       <author><first>Caroline</first><last>Brun</last></author>
       <pages>495–502</pages>
       <url hash="3186ca29">F12-2045</url>
@@ -1453,7 +1453,7 @@
       <title>Transitions thématiques : Annotation d’un corpus journalistique et premières analyses (Manual thematic annotation of a journalistic corpus : first observations and evaluation) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Alexandre</first><last>Labadié</last></author>
       <author><first>Patrice</first><last>Enjalbert</last></author>
-      <author><first>Stéphane</first><last>Ferrari</last></author>
+      <author id="stephane-ferrari"><first>Stéphane</first><last>Ferrari</last></author>
       <pages>503–510</pages>
       <url hash="e9463e6b">F12-2046</url>
       <bibkey>labadie-etal-2012-transitions</bibkey>
@@ -1478,17 +1478,17 @@
     </paper>
     <paper id="49">
       <title>Post-édition statistique pour l’adaptation aux domaines de spécialité en traduction automatique (Statistical Post-Editing of Machine Translation for Domain Adaptation) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Raphaël</first><last>Rubino</last></author>
+      <author id="raphael-rubino"><first>Raphaël</first><last>Rubino</last></author>
       <author><first>Stéphane</first><last>Huet</last></author>
-      <author><first>Fabrice</first><last>Lefèvre</last></author>
-      <author><first>Georges</first><last>Linarès</last></author>
+      <author id="fabrice-lefevre"><first>Fabrice</first><last>Lefèvre</last></author>
+      <author id="georges-linares"><first>Georges</first><last>Linarès</last></author>
       <pages>527–534</pages>
       <url hash="36018ff9">F12-2049</url>
       <bibkey>rubino-etal-2012-post</bibkey>
     </paper>
     <paper id="50">
       <title>Annotation référentielle du Corpus Arboré de <fixed-case>P</fixed-case>aris 7 en entités nommées (Referential named entity annotation of the <fixed-case>P</fixed-case>aris 7 <fixed-case>F</fixed-case>rench <fixed-case>T</fixed-case>ree<fixed-case>B</fixed-case>ank) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <author><first>Marion</first><last>Richard</last></author>
       <author><first>Rosa</first><last>Stern</last></author>
       <pages>535–542</pages>
@@ -1497,7 +1497,7 @@
     </paper>
     <paper id="51">
       <title>Utilisation des fonctions de croyance pour l’estimation de paramètres en traduction automatique (Feature calculation for Statistical Machine Translation by using belief functions) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Christophe</first><last>Servan</last></author>
+      <author id="christophe-servan"><first>Christophe</first><last>Servan</last></author>
       <author><first>Simon</first><last>Petitrenaud</last></author>
       <pages>543–550</pages>
       <url hash="82c4f3f5">F12-2051</url>
@@ -1514,7 +1514,7 @@
     </paper>
     <paper id="53">
       <title>Enjeux méthodologiques, linguistiques et informatiques pour le traitement du français écrit des sourds (Methodological, linguistic and computational challenges for processing written <fixed-case>F</fixed-case>rench of deaf people) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Tristan</first><last>Vanrullen</last></author>
+      <author id="tristan-vanrullen"><first>Tristan</first><last>Vanrullen</last></author>
       <author><first>Leïla</first><last>Boutora</last></author>
       <author><first>Jean</first><last>Dagron</last></author>
       <pages>559–566</pages>
@@ -1528,7 +1528,7 @@
       <url hash="08f43382">F12-3</url>
       <editor><first>Jorge Mauricio</first><last>Molina Mejia</last></editor>
       <editor><first>Didier</first><last>Schwab</last></editor>
-      <editor><first>Gilles</first><last>Sérasset</last></editor>
+      <editor id="gilles-serasset"><first>Gilles</first><last>Sérasset</last></editor>
       <publisher>ATALA/AFCP</publisher>
       <address>Grenoble, France</address>
       <month>June</month>
@@ -1562,7 +1562,7 @@
     </paper>
     <paper id="4">
       <title>Integrating lexical, syntactic and system-based features to improve Word Confidence Estimation in <fixed-case>SMT</fixed-case></title>
-      <author><first>Ngoc Quang</first><last>Luong</last></author>
+      <author id="ngoc-quang-luong"><first>Ngoc Quang</first><last>Luong</last></author>
       <pages>43–56</pages>
       <url hash="34b50b4a">F12-3004</url>
       <bibkey>luong-2012-integrating</bibkey>
@@ -1598,7 +1598,7 @@
     </paper>
     <paper id="9">
       <title>Création d’un multi-arbre à partir d’un texte balisé : l’exemple de l’annotation d’un corpus d’oral spontané (Creating a Multi-Tree from a Tagged Text : Annotating Spoken <fixed-case>F</fixed-case>rench) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Julie</first><last>Belião</last></author>
+      <author id="julie-beliao"><first>Julie</first><last>Belião</last></author>
       <pages>109–122</pages>
       <url hash="6716db5c">F12-3009</url>
       <bibkey>beliao-2012-creation</bibkey>
@@ -1727,11 +1727,11 @@
     <meta>
       <booktitle>Proceedings of the Joint Conference <fixed-case>JEP</fixed-case>-<fixed-case>TALN</fixed-case>-<fixed-case>RECITAL</fixed-case> 2012, volume 4: Invited Conferences</booktitle>
       <url hash="4ca45c80">F12-4</url>
-      <editor><first>Laurent</first><last>Besacier</last></editor>
-      <editor><first>Hervé</first><last>Blanchon</last></editor>
+      <editor id="laurent-besacier"><first>Laurent</first><last>Besacier</last></editor>
+      <editor id="herve-blanchon"><first>Hervé</first><last>Blanchon</last></editor>
       <editor><first>Marie-Paule</first><last>Jacques</last></editor>
       <editor><first>Nathalie</first><last>Vallée</last></editor>
-      <editor><first>Gilles</first><last>Sérasset</last></editor>
+      <editor id="gilles-serasset"><first>Gilles</first><last>Sérasset</last></editor>
       <publisher>ATALA/AFCP</publisher>
       <address>Grenoble, France</address>
       <month>June</month>
@@ -1751,7 +1751,7 @@
     </paper>
     <paper id="2">
       <title>Tensions entre théorie et pratique dans les systèmes de <fixed-case>TAL</fixed-case>. Étude historique et épistémologique (Tensions Between Theory and Practice in <fixed-case>NLP</fixed-case> Systems. Historic and Epistemological Study) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Jacqueline</first><last>Léon</last></author>
+      <author id="jacqueline-leon"><first>Jacqueline</first><last>Léon</last></author>
       <pages>3</pages>
       <url hash="7d4f33e2">F12-4002</url>
       <bibkey>leon-2012-tensions</bibkey>
@@ -1775,9 +1775,9 @@
     <meta>
       <booktitle>Proceedings of the Joint Conference <fixed-case>JEP</fixed-case>-<fixed-case>TALN</fixed-case>-<fixed-case>RECITAL</fixed-case> 2012, volume 5: Software Demonstrations</booktitle>
       <url hash="e76a3c04">F12-5</url>
-      <editor><first>Laurent</first><last>Besacier</last></editor>
-      <editor><first>Hervé</first><last>Blanchon</last></editor>
-      <editor><first>Gilles</first><last>Sérasset</last></editor>
+      <editor id="laurent-besacier"><first>Laurent</first><last>Besacier</last></editor>
+      <editor id="herve-blanchon"><first>Hervé</first><last>Blanchon</last></editor>
+      <editor id="gilles-serasset"><first>Gilles</first><last>Sérasset</last></editor>
       <publisher>ATALA/AFCP</publisher>
       <address>Grenoble, France</address>
       <month>June</month>
@@ -1791,7 +1791,7 @@
     <paper id="1">
       <title><fixed-case>G</fixed-case>rew : un outil de réécriture de graphes pour le <fixed-case>TAL</fixed-case> (<fixed-case>G</fixed-case>rew: a Graph Rewriting Tool for <fixed-case>NLP</fixed-case>) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Bruno</first><last>Guillaume</last></author>
-      <author><first>Guillame</first><last>Bonfante</last></author>
+      <author id="guillaume-bonfante"><first>Guillame</first><last>Bonfante</last></author>
       <author><first>Paul</first><last>Masson</last></author>
       <author><first>Mathieu</first><last>Morey</last></author>
       <author><first>Guy</first><last>Perrier</last></author>
@@ -1801,7 +1801,7 @@
     </paper>
     <paper id="2">
       <title>Interfaces de navigation dans des contenus audio et vidéo (Navigation interfaces through audio and video contents) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Géraldine</first><last>Damnati</last></author>
+      <author id="geraldine-damnati"><first>Géraldine</first><last>Damnati</last></author>
       <pages>3–4</pages>
       <url hash="55c490e2">F12-5002</url>
       <bibkey>damnati-2012-interfaces</bibkey>
@@ -1857,7 +1857,7 @@
       <author><first>Atef</first><last>Ben-Youssef</last></author>
       <author><first>Pierre</first><last>Badin</last></author>
       <author><first>Gérard</first><last>Bailly</last></author>
-      <author><first>Frédéric</first><last>Eliséi</last></author>
+      <author id="frederic-elisei"><first>Frédéric</first><last>Eliséi</last></author>
       <pages>17–18</pages>
       <url hash="836b13e0">F12-5009</url>
       <bibkey>hueber-etal-2012-vizart3d</bibkey>
@@ -1865,7 +1865,7 @@
     <paper id="10">
       <title><fixed-case>ROC</fixed-case>me! : logiciel pour l’enregistrement et la gestion de corpus oraux (<fixed-case>ROC</fixed-case>me!: software for the recording and management of oral corpora) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Emmanuel</first><last>Ferragne</last></author>
-      <author><first>Sébastien</first><last>Flavier</last></author>
+      <author id="sebastien-flavier"><first>Sébastien</first><last>Flavier</last></author>
       <author><first>Christian</first><last>Fressard</last></author>
       <pages>19–20</pages>
       <url hash="d4569508">F12-5010</url>
diff --git a/data/xml/F13.xml b/data/xml/F13.xml
index a3cabc74e1..c9b6d8ba3f 100644
--- a/data/xml/F13.xml
+++ b/data/xml/F13.xml
@@ -36,7 +36,7 @@
     <paper id="3">
       <title>Using <fixed-case>POMDP</fixed-case>s for Topic-Focused <fixed-case>M</fixed-case>ulti-<fixed-case>D</fixed-case>ocument Summarization (L’utilisation des <fixed-case>POMDP</fixed-case> pour les résumés multi-documents orientés par une thématique) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Yllias</first><last>Chali</last></author>
-      <author><first>Sadid A.</first><last>Hasan</last></author>
+      <author id="sadid-a-hasan"><first>Sadid A.</first><last>Hasan</last></author>
       <author><first>Mustapha</first><last>Mojahid</last></author>
       <pages>33-47</pages>
       <url hash="3646b1b7">F13-1003</url>
@@ -52,7 +52,7 @@
     <paper id="5">
       <title>Grouping of terms based on linguistic and semantic regularities in a cross-lingual context (Groupement de termes basé sur des régularités linguistiques et sémantiques dans un contexte cross-langue) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Marie</first><last>Dupuch</last></author>
-      <author><first>Thierry</first><last>Hamon</last></author>
+      <author id="thierry-hamon"><first>Thierry</first><last>Hamon</last></author>
       <author><first>Natalia</first><last>Grabar</last></author>
       <pages>62-75</pages>
       <url hash="247b0492">F13-1005</url>
@@ -61,7 +61,7 @@
     <paper id="6">
       <title><fixed-case>W</fixed-case>o<fixed-case>N</fixed-case>e<fixed-case>F</fixed-case>, an improved, extended and evaluated automatic <fixed-case>F</fixed-case>rench translation of <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et (<fixed-case>W</fixed-case>o<fixed-case>N</fixed-case>e<fixed-case>F</fixed-case> : amélioration, extension et évaluation d’une traduction française automatique de <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Quentin</first><last>Pradet</last></author>
-      <author><first>Jeanne</first><last>Baguenier-Desormeaux</last></author>
+      <author id="jeanne-baguenier-desormeaux"><first>Jeanne</first><last>Baguenier-Desormeaux</last></author>
       <author><first>Gaël</first><last>de Chalendar</last></author>
       <author><first>Laurence</first><last>Danlos</last></author>
       <pages>76-89</pages>
@@ -71,8 +71,8 @@
     <paper id="7">
       <title>Discriminative statistical approaches for multilingual speech understanding (Approches statistiques discriminantes pour l’interprétation sémantique multilingue de la parole) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Bassam</first><last>Jabaian</last></author>
-      <author><first>Fabrice</first><last>Lefèvre</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="fabrice-lefevre"><first>Fabrice</first><last>Lefèvre</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <pages>90-103</pages>
       <url hash="5641c466">F13-1007</url>
       <bibkey>jabaian-etal-2013-discriminative</bibkey>
@@ -122,8 +122,8 @@
     </paper>
     <paper id="13">
       <title>Towards a treebank of spoken <fixed-case>F</fixed-case>rench (Vers un treebank du français parlé) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Anne</first><last>Abeillé</last></author>
-      <author><first>Benoit</first><last>Crabbé</last></author>
+      <author id="anne-abeille"><first>Anne</first><last>Abeillé</last></author>
+      <author id="benoit-crabbe"><first>Benoit</first><last>Crabbé</last></author>
       <pages>174-187</pages>
       <url hash="a063816c">F13-1013</url>
       <bibkey>abeille-crabbe-2013-towards</bibkey>
@@ -138,8 +138,8 @@
     </paper>
     <paper id="15">
       <title>A probabilistic segment model combining lexical cohesion and disruption for topic segmentation (Un modèle segmental probabiliste combinant cohésion lexicale et rupture lexicale pour la segmentation thématique) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Anca</first><last>Simon</last></author>
-      <author><first>Guillaume</first><last>Gravier</last></author>
+      <author id="anca-roxana-simon"><first>Anca</first><last>Simon</last></author>
+      <author id="guillaume-gravier"><first>Guillaume</first><last>Gravier</last></author>
       <author><first>Pascale</first><last>Sébillot</last></author>
       <pages>202-214</pages>
       <url hash="2a2902ab">F13-1015</url>
@@ -194,7 +194,7 @@
     <paper id="22">
       <title>The constitution of an <fixed-case>A</fixed-case>rabic semantic resource from a multilingual aligned corpus (Constitution d’une ressource sémantique arabe à partir de corpus multilingue aligné) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Authoul Abdul</first><last>Hay</last></author>
-      <author><first>Olivier</first><last>Kraif</last></author>
+      <author id="olivier-kraif"><first>Olivier</first><last>Kraif</last></author>
       <pages>299-312</pages>
       <url hash="1b741e50">F13-1022</url>
       <bibkey>hay-kraif-2013-constitution</bibkey>
@@ -202,7 +202,7 @@
     <paper id="23">
       <title>Identification, Alignment, and Tranlsation of Relational Adjectives from Comparable Corpora (Identification, alignement, et traductions des adjectifs relationnels en corpus comparables) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Rima</first><last>Harastani</last></author>
-      <author><first>Beatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Beatrice</first><last>Daille</last></author>
       <author><first>Emmanuel</first><last>Morin</last></author>
       <pages>313-326</pages>
       <url hash="a293a726">F13-1023</url>
@@ -212,7 +212,7 @@
       <title>(Utilisation de la similarité sémantique pour l’extraction de lexiques bilingues à partir de corpus comparables) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Dhouha</first><last>Bouamor</last></author>
       <author><first>Nasredine</first><last>Semmar</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <pages>327-338</pages>
       <url hash="92674e21">F13-1024</url>
       <bibkey>bouamor-etal-2013-utilisation</bibkey>
@@ -268,7 +268,7 @@
     </paper>
     <paper id="30">
       <title>Dynamic extension of a <fixed-case>F</fixed-case>rench morphological lexicon based a text stream (Extension dynamique de lexiques morphologiques pour le français à partir d’un flux textuel) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <author><first>Damien</first><last>Nouvel</last></author>
       <author><first>Virginie</first><last>Mouilleron</last></author>
       <author><first>Marion</first><last>Baranes</last></author>
@@ -289,8 +289,8 @@
     <paper id="32">
       <title>Segmenting <fixed-case>A</fixed-case>rabic Texts into Elementary Discourse Units (Segmentation de textes arabes en unités discursives minimales) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Iskandar</first><last>Keskes</last></author>
-      <author><first>Farah</first><last>Beanamara</last></author>
-      <author><first>Lamia Hadrich</first><last>Belguith</last></author>
+      <author id="farah-benamara"><first>Farah</first><last>Beanamara</last></author>
+      <author id="lamia-hadrich-belguith"><first>Lamia Hadrich</first><last>Belguith</last></author>
       <pages>435-449</pages>
       <url hash="c541872a">F13-1032</url>
       <bibkey>keskes-etal-2013-segmenting</bibkey>
@@ -307,8 +307,8 @@
     <paper id="34">
       <title>Semantic Annotation in Specific Domains with rich Ontologies (Annotation sémantique pour des domaines spécialisés et des ontologies riches) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Yue</first><last>Ma</last></author>
-      <author><first>François</first><last>Lévy</last></author>
-      <author><first>Adeline</first><last>Nazarenko</last></author>
+      <author id="francois-levy"><first>François</first><last>Lévy</last></author>
+      <author id="adeline-nazarenko"><first>Adeline</first><last>Nazarenko</last></author>
       <pages>464-478</pages>
       <url hash="9a9b32c4">F13-1034</url>
       <bibkey>ma-etal-2013-semantic</bibkey>
@@ -316,8 +316,8 @@
     <paper id="35">
       <title>Web pages segmentation for document selection in Question Answering (Pré-segmentation de pages web et sélection de documents pertinents en Questions-Réponses) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Nicolas</first><last>Foucault</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
-      <author><first>Gilles</first><last>Adda</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
+      <author id="gilles-adda"><first>Gilles</first><last>Adda</last></author>
       <pages>479-492</pages>
       <url hash="1af10cf4">F13-1035</url>
       <bibkey>foucault-etal-2013-web</bibkey>
@@ -357,8 +357,8 @@
     </paper>
     <paper id="2">
       <title>Similarities induced by a comparability mapping : meaning and utility in the context of the clustering of comparable texts (Similarités induites par mesure de comparabilité : signification et utilité pour le clustering et l’alignement de textes comparables) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Pierre-Francois</first><last>Marteau</last></author>
-      <author><first>Gildas</first><last>Ménier</last></author>
+      <author id="pierre-francois-marteau"><first>Pierre-Francois</first><last>Marteau</last></author>
+      <author id="gildas-menier"><first>Gildas</first><last>Ménier</last></author>
       <pages>515-522</pages>
       <url hash="70970fb3">F13-2002</url>
       <bibkey>marteau-menier-2013-similarities</bibkey>
@@ -374,7 +374,7 @@
     <paper id="4">
       <title>Driven Decoding for machine translation (Vers un décodage guidé pour la traduction automatique) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Benjamin</first><last>Lecouteux</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <pages>531-538</pages>
       <url hash="c9a6abdb">F13-2004</url>
       <bibkey>lecouteux-besacier-2013-driven</bibkey>
@@ -383,7 +383,7 @@
       <title>Can lightweight pre-editing rules improve statistical <fixed-case>MT</fixed-case> of forum content? (La La préédition avec des règles peu coûteuses, utile pour la <fixed-case>TA</fixed-case> statistique des forums ?) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Johanna</first><last>Gerlach</last></author>
       <author><first>Victoria</first><last>Porro</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Sabine</first><last>Lehmann</last></author>
       <pages>539-546</pages>
       <url hash="1503ffef">F13-2005</url>
@@ -401,28 +401,28 @@
     <paper id="7">
       <title><fixed-case>ANCOR</fixed-case>, the first large <fixed-case>F</fixed-case>rench speaking corpus of conversational speech annotated in coreference to be freely available (<fixed-case>ANCOR</fixed-case>, premier corpus de français parlé d’envergure annoté en coréférence et distribué librement) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Judith</first><last>Muzerelle</last></author>
-      <author><first>Anaïs</first><last>Lefeuvre</last></author>
+      <author id="anais-lefeuvre"><first>Anaïs</first><last>Lefeuvre</last></author>
       <author><first>Jean-Yves</first><last>Antoine</last></author>
       <author><first>Emmanuel</first><last>Schang</last></author>
       <author><first>Denis</first><last>Maurel</last></author>
-      <author><first>Jeanne</first><last>Villaneau</last></author>
-      <author><first>Iris</first><last>Eshkol</last></author>
+      <author id="jeanne-villaneau"><first>Jeanne</first><last>Villaneau</last></author>
+      <author id="iris-eshkol"><first>Iris</first><last>Eshkol</last></author>
       <pages>555-563</pages>
       <url hash="3fee1121">F13-2007</url>
       <bibkey>muzerelle-etal-2013-ancor</bibkey>
     </paper>
     <paper id="8">
       <title>Multilingual Compound Splitting (Segmentation Multilingue des Mots Composés) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Elizaveta</first><last>Loginova-Clouet</last></author>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="elizaveta-loginova-clouet"><first>Elizaveta</first><last>Loginova-Clouet</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <pages>564-571</pages>
       <url hash="a6b2bc71">F13-2008</url>
       <bibkey>loginova-clouet-daille-2013-multilingual</bibkey>
     </paper>
     <paper id="9">
       <title>Complex terminologies management - the case of acronyms (Gestion des terminologies riches : l’exemple des acronymes) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Ying</first><last>Zhang</last></author>
-      <author><first>Mathieu</first><last>Mangeot</last></author>
+      <author id="ying-zhang"><first>Ying</first><last>Zhang</last></author>
+      <author id="mathieu-mangeot"><first>Mathieu</first><last>Mangeot</last></author>
       <pages>572-579</pages>
       <url hash="3fb70d0e">F13-2009</url>
       <bibkey>zhang-mangeot-2013-complex</bibkey>
@@ -430,7 +430,7 @@
     <paper id="10">
       <title>N-gram Language Models and <fixed-case>POS</fixed-case> Distribution for the Identification of <fixed-case>S</fixed-case>panish Varieties (Ngrammes et Traits Morphosyntaxiques pour la Identification de Variétés de l’Espagnol) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Marcos</first><last>Zampieri</last></author>
-      <author><first>Binyam Gebrekidan</first><last>Gebre</last></author>
+      <author id="binyam-gebrekidan-gebre"><first>Binyam Gebrekidan</first><last>Gebre</last></author>
       <author><first>Sascha</first><last>Diwersy</last></author>
       <pages>580-587</pages>
       <url hash="baa39a21">F13-2010</url>
@@ -439,7 +439,7 @@
     <paper id="11">
       <title>Improving Minor Opinion Polarity Classification with Named Entity Analysis (L’apport des Entités Nommées pour la classification des opinions minoritaires) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Amel</first><last>Fraisse</last></author>
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <author><first>Gil</first><last>Francopoulo</last></author>
       <pages>588-595</pages>
       <url hash="5784dc11">F13-2011</url>
@@ -447,7 +447,7 @@
     </paper>
     <paper id="12">
       <title>Lexical access via a simple co-occurrence network (Trouver les mots dans un simple réseau de co-occurrences) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Gemma</first><last>Bel-Enguix</last></author>
+      <author id="gemma-bel-enguix"><first>Gemma</first><last>Bel-Enguix</last></author>
       <author><first>Michael</first><last>Zock</last></author>
       <pages>596-603</pages>
       <url hash="7c3db63a">F13-2012</url>
@@ -462,7 +462,7 @@
     </paper>
     <paper id="14">
       <title>Semantic annotation influence on coreference detection using perceptron approach (Influence des annotations sémantiques sur un système de détection de coréférence à base de perceptron multi-couches) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Eric</first><last>Charton</last></author>
+      <author id="eric-charton"><first>Eric</first><last>Charton</last></author>
       <author><first>Michel</first><last>Gagnon</last></author>
       <author><first>Ludovic</first><last>Jean-Louis</last></author>
       <pages>612-619</pages>
@@ -488,14 +488,14 @@
     <paper id="17">
       <title>Localizing toponyms in topographic map titles (Repérer des toponymes dans des titres de cartes topographiques) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Catherine</first><last>Dominguès</last></author>
-      <author><first>Iris</first><last>Eshkol-Taravella</last></author>
+      <author id="iris-eshkol"><first>Iris</first><last>Eshkol-Taravella</last></author>
       <pages>636-642</pages>
       <url hash="49df66e5">F13-2017</url>
       <bibkey>domingues-eshkol-taravella-2013-localizing</bibkey>
     </paper>
     <paper id="18">
       <title>Extraction of temporal relations between clinical events in clinical documents (Extraction des relations temporelles entre événements médicaux dans des comptes rendus hospitaliers) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <author><first>Xavier</first><last>Tannier</last></author>
       <pages>643-650</pages>
       <url hash="e410697e">F13-2018</url>
@@ -528,7 +528,7 @@
     </paper>
     <paper id="22">
       <title>Converting dependencies for syntactic analysis of <fixed-case>F</fixed-case>rench into <fixed-case>PASSAGE</fixed-case> functional relations (Convertir des analyses syntaxiques en dépendances vers les relations fonctionnelles <fixed-case>PASSAGE</fixed-case>) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <author><first>Munshi</first><last>Asadullah</last></author>
       <author><first>Anne</first><last>Vilnat</last></author>
       <pages>675-682</pages>
@@ -546,8 +546,8 @@
       <title>Lexicons from Comparable Corpora for Multilingual Information Retrieval (Lexiques de corpus comparables et recherche d’information multilingue) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Frederik</first><last>Cailliau</last></author>
       <author><first>Ariane</first><last>Cavet</last></author>
-      <author><first>Clément</first><last>De Groc</last></author>
-      <author><first>Claude</first><last>De Loupy</last></author>
+      <author id="clement-de-groc"><first>Clément</first><last>De Groc</last></author>
+      <author id="claude-de-loupy"><first>Claude</first><last>De Loupy</last></author>
       <pages>691-698</pages>
       <url hash="5a45cd5a">F13-2024</url>
       <bibkey>cailliau-etal-2013-lexicons</bibkey>
@@ -562,9 +562,9 @@
     </paper>
     <paper id="26">
       <title><fixed-case>S</fixed-case>eg<fixed-case>CV</fixed-case> : Eficient parsing of résumés with analysis and correction of errors (<fixed-case>S</fixed-case>eg<fixed-case>CV</fixed-case> : traitement efficace de <fixed-case>CV</fixed-case> avec analyse et correction d’erreurs) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Luis Adrián</first><last>Cabrera-Diego</last></author>
-      <author><first>Juan-Manuel</first><last>Torres-Moreno</last></author>
-      <author><first>Marc</first><last>El-Bèze</last></author>
+      <author id="luis-adrian-cabrera-diego"><first>Luis Adrián</first><last>Cabrera-Diego</last></author>
+      <author id="juan-manuel-torres-moreno"><first>Juan-Manuel</first><last>Torres-Moreno</last></author>
+      <author id="marc-el-beze"><first>Marc</first><last>El-Bèze</last></author>
       <pages>707-714</pages>
       <url hash="4abe27a6">F13-2026</url>
       <bibkey>cabrera-diego-etal-2013-segcv</bibkey>
@@ -572,8 +572,8 @@
     <paper id="27">
       <title>Search and usage of named conceptual entities in a categorisazion task (Recherche et utilisation d’entités nommées conceptuelles dans une tâche de catégorisation) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Jean-Valère</first><last>Cossu</last></author>
-      <author><first>Juan-Manuel</first><last>Torres-Moreno</last></author>
-      <author><first>Marc</first><last>El-Bèze</last></author>
+      <author id="juan-manuel-torres-moreno"><first>Juan-Manuel</first><last>Torres-Moreno</last></author>
+      <author id="marc-el-beze"><first>Marc</first><last>El-Bèze</last></author>
       <pages>715-722</pages>
       <url hash="80c48cef">F13-2027</url>
       <bibkey>cossu-etal-2013-search</bibkey>
@@ -581,7 +581,7 @@
     <paper id="28">
       <title>A corpus of post-edited translations (Un corpus d’erreurs de traduction) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Guillaume</first><last>Wisniewski</last></author>
-      <author><first>Anil Kumar</first><last>Singh</last></author>
+      <author id="anil-kumar-singh"><first>Anil Kumar</first><last>Singh</last></author>
       <author><first>Natalia</first><last>Segal</last></author>
       <author><first>François</first><last>Yvon</last></author>
       <pages>723-730</pages>
@@ -590,17 +590,17 @@
     </paper>
     <paper id="29">
       <title>An evaluation summary method based on combination of automatic and textual complexity metrics (Une méthode d’évaluation des résumés basée sur la combinaison de métriques automatiques et de complexité textuelle) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Samira Walha</first><last>Ellouze</last></author>
+      <author id="samira-ellouze"><first>Samira Walha</first><last>Ellouze</last></author>
       <author><first>Maher</first><last>Jaoua</last></author>
-      <author><first>Lamia Hadrich</first><last>Belguith</last></author>
+      <author id="lamia-hadrich-belguith"><first>Lamia Hadrich</first><last>Belguith</last></author>
       <pages>731-738</pages>
       <url hash="cbb68cef">F13-2029</url>
       <bibkey>ellouze-etal-2013-evaluation</bibkey>
     </paper>
     <paper id="30">
       <title>An iterative topic segmentation algorithm with intra-content term weighting (Segmentation thématique : processus itératif de pondération intra-contenu) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Abdessalam</first><last>Bouchekif</last></author>
-      <author><first>Géraldine</first><last>Damnati</last></author>
+      <author id="abdessalam-bouchekif"><first>Abdessalam</first><last>Bouchekif</last></author>
+      <author id="geraldine-damnati"><first>Géraldine</first><last>Damnati</last></author>
       <author><first>Delphine</first><last>Charlet</last></author>
       <pages>739-746</pages>
       <url hash="c479019c">F13-2030</url>
@@ -612,7 +612,7 @@
       <author><first>Hubert</first><last>Naets</last></author>
       <author><first>Laetitia</first><last>Brouwers</last></author>
       <author><first>Pavel</first><last>Romanov</last></author>
-      <author><first>Cédrick</first><last>Fairon</last></author>
+      <author id="cedrick-fairon"><first>Cédrick</first><last>Fairon</last></author>
       <pages>747-754</pages>
       <url hash="bafd8f3c">F13-2031</url>
       <bibkey>panchenko-etal-2013-search</bibkey>
@@ -620,7 +620,7 @@
     <paper id="32">
       <title>An extended morphological analyzer of <fixed-case>G</fixed-case>erman handling verbal forms with separated separable particles (Un analyseur morphologique étendu de l’allemand traitant les formes verbales à particule séparée) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Jean-Philippe</first><last>Guilbaud</last></author>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <author><first>Vincent</first><last>Berment</last></author>
       <pages>755-763</pages>
       <url hash="f1c1f961">F13-2032</url>
@@ -637,7 +637,7 @@
     <paper id="34">
       <title>Anaphora Resolution Applied to Collocation Identification: A Preliminary Evaluation (Résolution d’anaphores appliquée aux collocations: une évaluation préliminaire) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Luka</first><last>Nerima</last></author>
-      <author><first>Éric</first><last>Wehrli</last></author>
+      <author id="eric-wehrli"><first>Éric</first><last>Wehrli</last></author>
       <pages>772-778</pages>
       <url hash="648cf47e">F13-2034</url>
       <bibkey>nerima-wehrli-2013-anaphora</bibkey>
@@ -645,7 +645,7 @@
     <paper id="35">
       <title>Help enrich a terminological repository : proposals and experiments (Aide à l’enrichissement d’un référentiel terminologique : propositions et expérimentations) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Thibault</first><last>Mondary</last></author>
-      <author><first>Adeline</first><last>Nazarenko</last></author>
+      <author id="adeline-nazarenko"><first>Adeline</first><last>Nazarenko</last></author>
       <author><first>Haïfa</first><last>Zargayouna</last></author>
       <author><first>Sabine</first><last>Barreaux</last></author>
       <pages>779-786</pages>
@@ -714,7 +714,7 @@
     <paper id="6">
       <title>An Interface for Validating and Evaluating Thematic Timelines (Une interface pour la validation et l’évaluation de chronologies thématiques) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Xavier</first><last>Tannier</last></author>
-      <author><first>Véronique</first><last>Moriceau</last></author>
+      <author id="veronique-moriceau"><first>Véronique</first><last>Moriceau</last></author>
       <author><first>Erwan Le</first><last>Flem</last></author>
       <pages>797-798</pages>
       <url hash="b0282ac1">F13-3006</url>
@@ -730,8 +730,8 @@
     </paper>
     <paper id="8">
       <title>i<fixed-case>MAG</fixed-case> : <fixed-case>MT</fixed-case>-postediting, translation quality evaluation and parallel corpus production (i<fixed-case>MAG</fixed-case> : post-édition, évaluation de qualité de <fixed-case>TA</fixed-case> et production d’un corpus parallèle) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Lingxiao</first><last>Wang</last></author>
-      <author><first>Ying</first><last>Zhang</last></author>
+      <author id="ling-xiao-wang"><first>Lingxiao</first><last>Wang</last></author>
+      <author id="ying-zhang"><first>Ying</first><last>Zhang</last></author>
       <pages>801-802</pages>
       <url hash="62393b4d">F13-3008</url>
       <bibkey>wang-zhang-2013-imag</bibkey>
@@ -752,9 +752,9 @@
     </paper>
     <paper id="11">
       <title>Apopsis Demonstrator for Tweet Analysis (Démonstrateur Apopsis pour l’analyse des tweets) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Sebastián Peña</first><last>Saldarriaga</last></author>
+      <author id="sebastian-pena-saldarriaga"><first>Sebastián Peña</first><last>Saldarriaga</last></author>
       <author><first>Damien</first><last>Vintache</last></author>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <pages>807-808</pages>
       <url hash="247983ea">F13-3011</url>
       <bibkey>saldarriaga-etal-2013-apopsis</bibkey>
@@ -769,7 +769,7 @@
     </paper>
     <paper id="13">
       <title><fixed-case>TTC</fixed-case> <fixed-case>T</fixed-case>erm<fixed-case>S</fixed-case>uite - Terminological Alignment from Comparable Corpora (<fixed-case>TTC</fixed-case> <fixed-case>T</fixed-case>erm<fixed-case>S</fixed-case>uite alignement terminologique à partir de corpus comparables) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <author><first>Rima</first><last>Harastani</last></author>
       <pages>812-813</pages>
       <url hash="310c15bc">F13-3013</url>
@@ -793,7 +793,7 @@
     </frontmatter>
     <paper id="1">
       <title>Improving Translation to Morphologically Rich Languages (Améliorer la traduction des langages morphologiquement riches) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>1-1</pages>
       <url hash="b9f097ce">F13-4001</url>
       <bibkey>fraser-2013-improving</bibkey>
@@ -889,7 +889,7 @@
     </paper>
     <paper id="10">
       <title>A linguistic approach for knowledge extraction from an <fixed-case>A</fixed-case>rabic text (Une approche linguistique pour l’extraction des connaissances dans un texte arabe) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Houda</first><last>Saadane</last></author>
+      <author id="houda-saadane"><first>Houda</first><last>Saadane</last></author>
       <pages>124-137</pages>
       <url hash="d5731d98">F13-5010</url>
       <bibkey>saadane-2013-linguistic</bibkey>
diff --git a/data/xml/F14.xml b/data/xml/F14.xml
index c2acf9712b..273308ed7a 100644
--- a/data/xml/F14.xml
+++ b/data/xml/F14.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of TALN 2014 (Volume 1: Long Papers)</booktitle>
       <url hash="580e03b4">F14-1</url>
       <editor><first>Philippe</first><last>Blache</last></editor>
-      <editor><first>Frédéric</first><last>Béchet</last></editor>
+      <editor id="frederic-bechet"><first>Frédéric</first><last>Béchet</last></editor>
       <editor><first>Brigitte</first><last>Bigi</last></editor>
       <publisher>Association pour le Traitement Automatique des Langues</publisher>
       <address>Marseille, France</address>
@@ -31,14 +31,14 @@
       <title>The impact of domains for Keyphrase extraction (Influence des domaines de spécialité dans l’extraction de termes-clés) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Adrien</first><last>Bougouin</last></author>
       <author><first>Florian</first><last>Boudin</last></author>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <pages>13-24</pages>
       <url hash="12a454b6">F14-1002</url>
       <bibkey>bougouin-etal-2014-impact</bibkey>
     </paper>
     <paper id="3">
       <title>Event Role Labelling using a Neural Network Model (Étiquetage en rôles événementiels fondé sur l’utilisation d’un modèle neuronal) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Emanuela</first><last>Boroş</last></author>
+      <author id="emanuela-boros"><first>Emanuela</first><last>Boroş</last></author>
       <author><first>Romaric</first><last>Besançon</last></author>
       <author><first>Olivier</first><last>Ferret</last></author>
       <author><first>Brigitte</first><last>Grau</last></author>
@@ -49,14 +49,14 @@
     <paper id="4">
       <title>Using distributed word representations for robust semantic role labeling (Utilisation de représentations de mots pour l’étiquetage de rôles sémantiques suivant <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>William</first><last>Léchelle</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <pages>36-45</pages>
       <url hash="8a5b1993">F14-1004</url>
       <bibkey>lechelle-langlais-2014-using</bibkey>
     </paper>
     <paper id="5">
       <title>Cross-lingual Word Sense Disambiguation for Predicate Labelling of <fixed-case>F</fixed-case>rench</title>
-      <author><first>Lonneke</first><last>van der Plas</last></author>
+      <author id="lonneke-van-der-plas"><first>Lonneke</first><last>van der Plas</last></author>
       <author><first>Marianna</first><last>Apidianaki</last></author>
       <pages>46-55</pages>
       <url hash="fedb9af6">F14-1005</url>
@@ -71,24 +71,24 @@
     </paper>
     <paper id="7">
       <title>Playing with parsers (Jouer avec des analyseurs syntaxiques) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Éric</first><last>Villemonte de la Clergerie</last></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Éric</first><last>Villemonte de la Clergerie</last></author>
       <pages>67-78</pages>
       <url hash="a9a36e57">F14-1007</url>
       <bibkey>villemonte-de-la-clergerie-2014-playing</bibkey>
     </paper>
     <paper id="8">
       <title>Principles of Lexical Network Systemic Modeling (Principes de modélisation systémique des réseaux lexicaux) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Alain</first><last>Polguère</last></author>
+      <author id="alain-polguere"><first>Alain</first><last>Polguère</last></author>
       <pages>79-90</pages>
       <url hash="c8952e5c">F14-1008</url>
       <bibkey>polguere-2014-principles</bibkey>
     </paper>
     <paper id="9">
       <title>A model to predict lexical complexity and to grade words (Un modèle pour prédire la complexité lexicale et graduer les mots) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Núria</first><last>Gala</last></author>
+      <author id="nuria-gala"><first>Núria</first><last>Gala</last></author>
       <author><first>Thomas</first><last>François</last></author>
       <author><first>Delphine</first><last>Bernhard</last></author>
-      <author><first>Cédrick</first><last>Fairon</last></author>
+      <author id="cedrick-fairon"><first>Cédrick</first><last>Fairon</last></author>
       <pages>91-102</pages>
       <url hash="5e897923">F14-1009</url>
       <bibkey>gala-etal-2014-model</bibkey>
@@ -115,7 +115,7 @@
     <paper id="12">
       <title>Can we chunk well with bad <fixed-case>POS</fixed-case> labels? (Peut-on bien chunker avec de mauvaises étiquettes <fixed-case>POS</fixed-case> ?) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Isabelle</first><last>Tellier</last></author>
-      <author><first>Iris</first><last>Eshkol-Taravella</last></author>
+      <author id="iris-eshkol"><first>Iris</first><last>Eshkol-Taravella</last></author>
       <author><first>Yoann</first><last>Dupont</last></author>
       <author><first>Ilaine</first><last>Wang</last></author>
       <pages>125-136</pages>
@@ -125,7 +125,7 @@
     <paper id="13">
       <title>Analogy-based Text Normalization : the case of unknowns words (Normalisation de textes par analogie: le cas des mots inconnus) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Marion</first><last>Baranes</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <pages>137-148</pages>
       <url hash="1bcf86c4">F14-1013</url>
       <bibkey>baranes-sagot-2014-analogy</bibkey>
@@ -133,8 +133,8 @@
     <paper id="14">
       <title>An evaluation of various methods for adjective-nouns composition (Une évaluation approfondie de différentes méthodes de compositionalité sémantique) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Antoine</first><last>Bride</last></author>
-      <author><first>Tim</first><last>Van de Cruys</last></author>
-      <author><first>Nicolas</first><last>Asher</last></author>
+      <author id="tim-van-de-cruys"><first>Tim</first><last>Van de Cruys</last></author>
+      <author id="nicholas-asher"><first>Nicolas</first><last>Asher</last></author>
       <pages>149-160</pages>
       <url hash="5aea422a">F14-1014</url>
       <bibkey>bride-etal-2014-evaluation</bibkey>
@@ -170,7 +170,7 @@
       <title>A simple approach to make dialogue systems incremental (Vers une approche simplifiée pour introduire le caractère incrémental dans les systèmes de dialogue) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Hatim</first><last>Khouzaimi</last></author>
       <author><first>Romain</first><last>Laroche</last></author>
-      <author><first>Fabrice</first><last>Lefèvre</last></author>
+      <author id="fabrice-lefevre"><first>Fabrice</first><last>Lefèvre</last></author>
       <pages>196-207</pages>
       <url hash="bda6a75b">F14-1018</url>
       <bibkey>khouzaimi-etal-2014-simple</bibkey>
@@ -178,7 +178,7 @@
     <paper id="19">
       <title>The Démonette Lexical Database: between Constructional Semantics and Word Formation (La base lexicale Démonette : entre sémantique constructionnelle et morphologie dérivationnelle) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Nabil</first><last>Hathout</last></author>
-      <author><first>Fiammetta</first><last>Namer</last></author>
+      <author id="fiammetta-namer"><first>Fiammetta</first><last>Namer</last></author>
       <pages>208-219</pages>
       <url hash="50d3b062">F14-1019</url>
       <bibkey>hathout-namer-2014-demonette-lexical</bibkey>
@@ -195,7 +195,7 @@
     <paper id="21">
       <title>Reducing data sparsity by generalising distributional contexts: application to specialised texts (Réduction de la dispersion des données par généralisation des contextes distributionnels : application aux textes de spécialité) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Amandine</first><last>Périnet</last></author>
-      <author><first>Thierry</first><last>Hamon</last></author>
+      <author id="thierry-hamon"><first>Thierry</first><last>Hamon</last></author>
       <pages>232-243</pages>
       <url hash="e7c1ef74">F14-1021</url>
       <bibkey>perinet-hamon-2014-reducing</bibkey>
@@ -204,7 +204,7 @@
       <title>Unsupervised extraction of semantic relations (Extraction non supervisée de relations sémantiques lexicales) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Juliette</first><last>Conrath</last></author>
       <author><first>Stergos</first><last>Afantenos</last></author>
-      <author><first>Nicholas</first><last>Asher</last></author>
+      <author id="nicholas-asher"><first>Nicholas</first><last>Asher</last></author>
       <author><first>Philippe</first><last>Muller</last></author>
       <pages>244-255</pages>
       <url hash="75a5d267">F14-1022</url>
@@ -212,7 +212,7 @@
     </paper>
     <paper id="23">
       <title>Comparison of scheduling methods for the learning rate of neural network language models (Modèles de langue neuronaux: une comparaison de plusieurs stratégies d’apprentissage) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Quoc-Khanh</first><last>Do</last></author>
+      <author id="quoc-khanh-do"><first>Quoc-Khanh</first><last>Do</last></author>
       <author><first>Alexandre</first><last>Allauzen</last></author>
       <author><first>François</first><last>Yvon</last></author>
       <pages>256-267</pages>
@@ -222,7 +222,7 @@
     <paper id="24">
       <title>Study of the impact of proper name transliteration on the performance of word alignment in <fixed-case>F</fixed-case>rench-<fixed-case>A</fixed-case>rabic parallel corpora (Etude de l’impact de la translittération de noms propres sur la qualité de l’alignement de mots à partir de corpus parallèles français-arabe) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Nasredine</first><last>Semmar</last></author>
-      <author><first>Houda</first><last>Saadane</last></author>
+      <author id="houda-saadane"><first>Houda</first><last>Saadane</last></author>
       <pages>268-279</pages>
       <url hash="638305aa">F14-1024</url>
       <bibkey>semmar-saadane-2014-study</bibkey>
@@ -230,7 +230,7 @@
     <paper id="25">
       <title>Topic Adaptation for the Automatic Translation of News Articles (Adaptation thématique pour la traduction automatique de dépêches de presse) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Souhir</first><last>Gahbiche-Braham</last></author>
-      <author><first>Hélène</first><last>Bonneau-Maynard</last></author>
+      <author id="helene-bonneau-maynard"><first>Hélène</first><last>Bonneau-Maynard</last></author>
       <author><first>François</first><last>Yvon</last></author>
       <pages>280-291</pages>
       <url hash="5ebcb504">F14-1025</url>
@@ -246,7 +246,7 @@
     </paper>
     <paper id="27">
       <title>Detection and Analysis of Paraphrastic Reformulations in Spoken Corpora (Repérage et analyse de la reformulation paraphrastique dans les corpus oraux) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Iris</first><last>Eshkol-Taravella</last></author>
+      <author id="iris-eshkol"><first>Iris</first><last>Eshkol-Taravella</last></author>
       <author><first>Natalia</first><last>Grabar</last></author>
       <pages>304-315</pages>
       <url hash="f87e5348">F14-1027</url>
@@ -257,7 +257,7 @@
       <author><first>Raja</first><last>Ayed</last></author>
       <author><first>Ibrahim</first><last>Bounhas</last></author>
       <author><first>Bilel</first><last>Elayeb</last></author>
-      <author><first>Narjès</first><last>Bellamine Ben Saoud</last></author>
+      <author id="narjes-bellamine-ben-saoud"><first>Narjès</first><last>Bellamine Ben Saoud</last></author>
       <author><first>Fabrice</first><last>Evrard</last></author>
       <pages>316-327</pages>
       <url hash="b3a4da40">F14-1028</url>
@@ -265,7 +265,7 @@
     </paper>
     <paper id="29">
       <title>A discriminative parser of the <fixed-case>LR</fixed-case> family for phrase structure parsing (Un analyseur discriminant de la famille <fixed-case>LR</fixed-case> pour l’analyse en constituants) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Benoît</first><last>Crabbé</last></author>
+      <author id="benoit-crabbe"><first>Benoît</first><last>Crabbé</last></author>
       <pages>328-339</pages>
       <url hash="e509325a">F14-1029</url>
       <bibkey>crabbe-2014-discriminative</bibkey>
@@ -290,8 +290,8 @@
     </paper>
     <paper id="32">
       <title>Semantic Annotation and Terminology Validation in full scientific articles in Social Sciences and Humanities (Annotation sémantique et validation terminologique en texte intégral en <fixed-case>SHS</fixed-case>) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Mokhtar-Boumedyen</first><last>Billami</last></author>
-      <author><first>José</first><last>Camacho-Collados</last></author>
+      <author id="mokhtar-b-billami"><first>Mokhtar-Boumedyen</first><last>Billami</last></author>
+      <author id="jose-camacho-collados"><first>José</first><last>Camacho-Collados</last></author>
       <author><first>Evelyne</first><last>Jacquey</last></author>
       <author><first>Laurence</first><last>Kister</last></author>
       <pages>363-376</pages>
@@ -303,7 +303,7 @@
       <author><first>Charlotte</first><last>Roze</last></author>
       <author><first>Thierry</first><last>Charnois</last></author>
       <author><first>Dominique</first><last>Legallois</last></author>
-      <author><first>Stéphane</first><last>Ferrari</last></author>
+      <author id="stephane-ferrari"><first>Stéphane</first><last>Ferrari</last></author>
       <author><first>Mathilde</first><last>Salles</last></author>
       <pages>377-388</pages>
       <url hash="558b190d">F14-1033</url>
@@ -315,7 +315,7 @@
       <booktitle>Proceedings of TALN 2014 (Volume 2: Short Papers)</booktitle>
       <url hash="e6510b52">F14-2</url>
       <editor><first>Philippe</first><last>Blache</last></editor>
-      <editor><first>Frédéric</first><last>Béchet</last></editor>
+      <editor id="frederic-bechet"><first>Frédéric</first><last>Béchet</last></editor>
       <editor><first>Brigitte</first><last>Bigi</last></editor>
       <publisher>Association pour le Traitement Automatique des Langues</publisher>
       <address>Marseille, France</address>
@@ -329,7 +329,7 @@
     </frontmatter>
     <paper id="1">
       <title>Machine translation for litterature: a pilot study (Traduction automatisée d’une oeuvre littéraire: une étude pilote) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <pages>389-394</pages>
       <url hash="85d88065">F14-2001</url>
       <bibkey>besacier-2014-machine</bibkey>
@@ -346,8 +346,8 @@
     <paper id="3">
       <title>On-going Cooperative Research towards Developing Economy-Oriented <fixed-case>C</fixed-case>hinese-<fixed-case>F</fixed-case>rench <fixed-case>SMT</fixed-case> Systems with a New <fixed-case>SMT</fixed-case> Framework</title>
       <author><first>Yidong</first><last>Chen</last></author>
-      <author><first>Lingxiao</first><last>Wang</last></author>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="ling-xiao-wang"><first>Lingxiao</first><last>Wang</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <author><first>Xiaodong</first><last>Shi</last></author>
       <pages>401-406</pages>
       <url hash="6fdb5774">F14-2003</url>
@@ -356,7 +356,7 @@
     <paper id="4">
       <title>Automatic Term Extraction Combining Different Information (Extraction automatique de termes combinant différentes informations) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Juan Antonio</first><last>Lossio-Ventura</last></author>
-      <author><first>Clement</first><last>Jonquet</last></author>
+      <author id="clement-jonquet"><first>Clement</first><last>Jonquet</last></author>
       <author><first>Mathieu</first><last>Roche</last></author>
       <author><first>Maguelonne</first><last>Teisseire</last></author>
       <pages>407-412</pages>
@@ -366,21 +366,21 @@
     <paper id="5">
       <title>Automated Analysis for Stem Spaces: the case of <fixed-case>F</fixed-case>rench verbs (Analyse automatique d’espaces thématiques) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Gilles</first><last>Boyé</last></author>
-      <author><first>Anna</first><last>Kupść</last></author>
+      <author id="anna-kupsc"><first>Anna</first><last>Kupść</last></author>
       <pages>413-418</pages>
       <url hash="0905a632">F14-2005</url>
       <bibkey>boye-kupsc-2014-automated</bibkey>
     </paper>
     <paper id="6">
       <title>Extraction and representation of support verb constructions in <fixed-case>S</fixed-case>panish (Extraction et représentation des constructions à verbe support en espagnol) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Sandra</first><last>Milena Castellanos Páez</last></author>
+      <author id="sandra-milena-castellanos-paez"><first>Sandra</first><last>Milena Castellanos Páez</last></author>
       <pages>419-424</pages>
       <url hash="9e69ffa2">F14-2006</url>
       <bibkey>milena-castellanos-paez-2014-extraction</bibkey>
     </paper>
     <paper id="7">
       <title>Sub-categorization in ‘pour’ and lexical syntax (Sous-catégorisation en pour et syntaxe lexicale) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <author><first>Laurence</first><last>Danlos</last></author>
       <author><first>Margot</first><last>Colinet</last></author>
       <pages>425-430</pages>
@@ -399,7 +399,7 @@
     </paper>
     <paper id="9">
       <title>Named Entity Recognition and Correction in <fixed-case>OCR</fixed-case>ized Corpora (Détection et correction automatique d’entités nommées dans des corpus <fixed-case>OCR</fixed-case>isés) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <author><first>Kata</first><last>Gábor</last></author>
       <pages>437-442</pages>
       <url hash="5489ef98">F14-2009</url>
@@ -416,7 +416,7 @@
     <paper id="11">
       <title>User evaluation of a multiple answer extraction system on the Web (Évaluation d’un système d’extraction de réponses multiples sur le Web par comparaison à des humains) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Mathieu-Henri</first><last>Falco</last></author>
-      <author><first>Véronique</first><last>Moriceau</last></author>
+      <author id="veronique-moriceau"><first>Véronique</first><last>Moriceau</last></author>
       <author><first>Anne</first><last>Vilnat</last></author>
       <pages>449-454</pages>
       <url hash="59756d3d">F14-2011</url>
@@ -444,7 +444,7 @@
       <author><first>Thomas</first><last>François</last></author>
       <author><first>Laetitia</first><last>Brouwers</last></author>
       <author><first>Hubert</first><last>Naets</last></author>
-      <author><first>Cédrick</first><last>Fairon</last></author>
+      <author id="cedrick-fairon"><first>Cédrick</first><last>Fairon</last></author>
       <pages>467-472</pages>
       <url hash="1905f526">F14-2014</url>
       <bibkey>francois-etal-2014-amesure</bibkey>
@@ -467,7 +467,7 @@
     </paper>
     <paper id="17">
       <title><fixed-case>KNG</fixed-case>: a Tool for Writing Easily Transducer Cascades (<fixed-case>KNG</fixed-case>: un outil pour l’écriture facile de cascades de transducteurs) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>François</first><last>Barthélemy</last></author>
+      <author id="francois-barthelemy"><first>François</first><last>Barthélemy</last></author>
       <pages>485-490</pages>
       <url hash="23701205">F14-2017</url>
       <bibkey>barthelemy-2014-kng</bibkey>
@@ -500,8 +500,8 @@
     <paper id="21">
       <title>Impact of the nature and size of the training set on performance in the automatic detection of named entities (Impact de la nature et de la taille des corpus d’apprentissage sur les performances dans la détection automatique des entités nommées) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Anaïs</first><last>Ollagnier</last></author>
-      <author><first>Sébastien</first><last>Fournier</last></author>
-      <author><first>Patrice</first><last>Bellot</last></author>
+      <author id="sebastien-fournier"><first>Sébastien</first><last>Fournier</last></author>
+      <author id="patrice-bellot"><first>Patrice</first><last>Bellot</last></author>
       <author><first>Frédéric</first><last>Béchet</last></author>
       <pages>511-516</pages>
       <url hash="00b244bf">F14-2021</url>
@@ -535,8 +535,8 @@
     <paper id="25">
       <title>Supporting Sign Languages Exploratory Linguistics with an Automatization of the Annotation Process (Vers un traitement automatique en soutien d’une linguistique exploratoire des Langues des Signes) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Rémi</first><last>Dubot</last></author>
-      <author><first>Arturo</first><last>Curiel</last></author>
-      <author><first>Christophe</first><last>Collet</last></author>
+      <author id="arturo-curiel"><first>Arturo</first><last>Curiel</last></author>
+      <author id="christophe-collet"><first>Christophe</first><last>Collet</last></author>
       <pages>537-542</pages>
       <url hash="e40688f3">F14-2025</url>
       <bibkey>dubot-etal-2014-supporting</bibkey>
@@ -555,7 +555,7 @@
       <author><first>Rémi</first><last>Bois</last></author>
       <author><first>Johannes</first><last>Leveling</last></author>
       <author><first>Lorraine</first><last>Goeuriot</last></author>
-      <author><first>Gareth J. F.</first><last>Jones</last></author>
+      <author id="gareth-j-f-jones"><first>Gareth J. F.</first><last>Jones</last></author>
       <author><first>Liadh</first><last>Kelly</last></author>
       <pages>550-555</pages>
       <url hash="5e8a7580">F14-2027</url>
@@ -571,13 +571,13 @@
     </paper>
     <paper id="29">
       <title>Tense and Time Annotations : a Contribution to <fixed-case>T</fixed-case>ime<fixed-case>ML</fixed-case> Improvement (Annotation de la temporalité en corpus : contribution à l’amélioration de la norme <fixed-case>T</fixed-case>ime<fixed-case>ML</fixed-case>) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Anaïs</first><last>Lefeuvre</last></author>
+      <author id="anais-lefeuvre"><first>Anaïs</first><last>Lefeuvre</last></author>
       <author><first>Jean-Yves</first><last>Antoine</last></author>
       <author><first>Agata</first><last>Savary</last></author>
       <author><first>Emmanuel</first><last>Schang</last></author>
       <author><first>Lotfi</first><last>Abouda</last></author>
       <author><first>Denis</first><last>Maurel</last></author>
-      <author><first>Iris</first><last>Eshkol</last></author>
+      <author id="iris-eshkol"><first>Iris</first><last>Eshkol</last></author>
       <pages>562-567</pages>
       <url hash="9209429f">F14-2029</url>
       <bibkey>lefeuvre-etal-2014-tense</bibkey>
@@ -585,7 +585,7 @@
     <paper id="30">
       <title>Automatic identification of document sections for designing a <fixed-case>F</fixed-case>rench clinical corpus (Identification automatique de zones dans des documents pour la constitution d’un corpus médical en français) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Louise</first><last>Deléger</last></author>
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <pages>568-573</pages>
       <url hash="529b2474">F14-2030</url>
       <bibkey>deleger-neveol-2014-automatic</bibkey>
@@ -593,11 +593,11 @@
     <paper id="31">
       <title>Annotation scheme for deep dependency syntax of <fixed-case>F</fixed-case>rench (Un schéma d’annotation en dépendances syntaxiques profondes pour le français) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Guy</first><last>Perrier</last></author>
-      <author><first>Marie</first><last>Candito</last></author>
+      <author id="marie-candito"><first>Marie</first><last>Candito</last></author>
       <author><first>Bruno</first><last>Guillaume</last></author>
       <author><first>Corentin</first><last>Ribeyre</last></author>
       <author><first>Karën</first><last>Fort</last></author>
-      <author><first>Djamé</first><last>Seddah</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
       <pages>574-579</pages>
       <url hash="d14c58de">F14-2031</url>
       <bibkey>perrier-etal-2014-annotation</bibkey>
@@ -616,7 +616,7 @@
     </paper>
     <paper id="33">
       <title>Integrating lexicographic examples in a lexical network (Intégration relationnelle des exemples lexicographiques dans un réseau lexical) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Veronika</first><last>Lux-Pogodalla</last></author>
+      <author id="veronika-lux"><first>Veronika</first><last>Lux-Pogodalla</last></author>
       <pages>586-591</pages>
       <url hash="a0570084">F14-2033</url>
       <bibkey>lux-pogodalla-2014-integrating</bibkey>
@@ -624,7 +624,7 @@
     <paper id="34">
       <title>Colors of People (Les couleurs des gens) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Mathieu</first><last>Lafourcade</last></author>
-      <author><first>Nathalie</first><last>Le Brun</last></author>
+      <author id="nathalie-le-brun"><first>Nathalie</first><last>Le Brun</last></author>
       <author><first>Virginie</first><last>Zampa</last></author>
       <pages>592-597</pages>
       <url hash="74de49fd">F14-2034</url>
@@ -635,8 +635,8 @@
       <author><first>Mohammad</first><last>Nasiruddin</last></author>
       <author><first>Didier</first><last>Schwab</last></author>
       <author><first>Andon</first><last>Tchechmedjiev</last></author>
-      <author><first>Gilles</first><last>Sérasset</last></author>
-      <author><first>Hervé</first><last>Blanchon</last></author>
+      <author id="gilles-serasset"><first>Gilles</first><last>Sérasset</last></author>
+      <author id="herve-blanchon"><first>Hervé</first><last>Blanchon</last></author>
       <pages>598-603</pages>
       <url hash="232373a1">F14-2035</url>
       <bibkey>nasiruddin-etal-2014-word</bibkey>
@@ -653,7 +653,7 @@
     <meta>
       <booktitle>Proceedings of TALN 2014 (Volume 3: System Demonstrations)</booktitle>
       <url hash="1e148727">F14-3</url>
-      <editor><first>Grégoire</first><last>de Montcheuil</last></editor>
+      <editor id="gregoire-moreau-de-montcheuil"><first>Grégoire</first><last>de Montcheuil</last></editor>
       <editor><first>Brigitte</first><last>Bigi</last></editor>
       <publisher>Association pour le Traitement Automatique des Langues</publisher>
       <address>Marseille, France</address>
@@ -739,7 +739,7 @@
       <title><fixed-case>D</fixed-case>icta<fixed-case>N</fixed-case>um: a dialogue system for numbers dictation (<fixed-case>D</fixed-case>icta<fixed-case>N</fixed-case>um : système de dialogue incrémental pour la dictée de numéros.) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Hatim</first><last>Khouzaimi</last></author>
       <author><first>Romain</first><last>Laroche</last></author>
-      <author><first>Fabrice</first><last>Lefèvre</last></author>
+      <author id="fabrice-lefevre"><first>Fabrice</first><last>Lefèvre</last></author>
       <pages>23-25</pages>
       <url hash="644583b1">F14-3010</url>
       <bibkey>khouzaimi-etal-2014-dictanum</bibkey>
@@ -759,7 +759,7 @@
       <author><first>Hajar</first><last>Falih</last></author>
       <author><first>Christine</first><last>Chardenon</last></author>
       <author><first>Romain</first><last>Laroche</last></author>
-      <author><first>Fabrice</first><last>Lefevre</last></author>
+      <author id="fabrice-lefevre"><first>Fabrice</first><last>Lefevre</last></author>
       <pages>28-29</pages>
       <url hash="4f3fc06b">F14-3012</url>
       <bibkey>ekeinhor-komi-etal-2014-enia</bibkey>
@@ -788,7 +788,7 @@
     <meta>
       <booktitle>Proceedings of TALN 2014 (Volume 4: RECITAL - Student Research Workshop)</booktitle>
       <url hash="c6334c8a">F14-4</url>
-      <editor><first>Núria</first><last>Gala</last></editor>
+      <editor id="nuria-gala"><first>Núria</first><last>Gala</last></editor>
       <editor><first>Klim</first><last>Peshkov</last></editor>
       <editor><first>Brigitte</first><last>Bigi</last></editor>
       <publisher>Association pour le Traitement Automatique des Langues</publisher>
@@ -845,7 +845,7 @@
     </paper>
     <paper id="7">
       <title>Description of structures of time (in <fixed-case>F</fixed-case>rench sign language) based on a formal grammar (Une description des structures de la durée en Langue des Signes Française à partir d’une grammaire formelle) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Mohamed</first><last>Hadjadj</last></author>
+      <author id="mohamed-nassime-hadjadj"><first>Mohamed</first><last>Hadjadj</last></author>
       <pages>71-80</pages>
       <url hash="2c83edb8">F14-4007</url>
       <bibkey>hadjadj-2014-description</bibkey>
@@ -866,7 +866,7 @@
     </paper>
     <paper id="10">
       <title>Extraction methods for automatic summarization of spoken conversations from call centers (Méthodes par extraction pour le résumé automatique de conversations parlées provenant de centres d’appels) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Jérémy</first><last>Trione</last></author>
+      <author id="jeremy-trione"><first>Jérémy</first><last>Trione</last></author>
       <pages>104-111</pages>
       <url hash="680e51dd">F14-4010</url>
       <bibkey>trione-2014-extraction</bibkey>
diff --git a/data/xml/H01.xml b/data/xml/H01.xml
index 6c9cb68d6e..1abfff7cfb 100644
--- a/data/xml/H01.xml
+++ b/data/xml/H01.xml
@@ -12,21 +12,21 @@
     <paper id="1">
       <title>Activity detection for information access to oral communication</title>
       <author><first>Klaus</first><last>Ries</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <url hash="19f623c9">H01-1001</url>
       <bibkey>ries-waibel-2001-activity</bibkey>
     </paper>
     <paper id="2">
       <title>Adapting an Example-Based Translation System to <fixed-case>C</fixed-case>hinese</title>
-      <author><first>Ying</first><last>Zhang</last></author>
-      <author><first>Ralf D.</first><last>Brown</last></author>
-      <author><first>Robert E.</first><last>Frederking</last></author>
+      <author id="ying-zhang"><first>Ying</first><last>Zhang</last></author>
+      <author id="ralf-d-brown"><first>Ralf D.</first><last>Brown</last></author>
+      <author id="robert-frederking"><first>Robert E.</first><last>Frederking</last></author>
       <url hash="154c85f4">H01-1002</url>
       <bibkey>zhang-etal-2001-adapting</bibkey>
     </paper>
     <paper id="3">
       <title>Advances in meeting recognition</title>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <author><first>Hua</first><last>Yu</last></author>
       <author><first>Tanja</first><last>Schultz</last></author>
       <author><first>Yue</first><last>Pan</last></author>
@@ -41,10 +41,10 @@
     <paper id="4">
       <title>Amount of Information Presented in a Complex List: Effects on User Performance</title>
       <author><first>Dawn</first><last>Dutton</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <author><first>Selina</first><last>Chu</last></author>
       <author><first>James</first><last>Hubbell</last></author>
-      <author><first>Shrikanth</first><last>Narayanan</last></author>
+      <author id="shrikanth-narayanan"><first>Shrikanth</first><last>Narayanan</last></author>
       <url hash="a50e3f9b">H01-1004</url>
       <bibkey>dutton-etal-2001-amount</bibkey>
     </paper>
@@ -60,17 +60,17 @@
     <paper id="6">
       <title>Answering What-Is Questions by Virtual Annotation</title>
       <author><first>John</first><last>Prager</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <author><first>Krzysztof</first><last>Czuba</last></author>
       <url hash="51656267">H01-1006</url>
       <bibkey>prager-etal-2001-answering</bibkey>
     </paper>
     <paper id="7">
       <title>Architecture and Design Considerations in <fixed-case>NESPOLE</fixed-case>!: a Speech Translation System for <fixed-case>E</fixed-case>-commerce Applications</title>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <author><first>Chad</first><last>Langley</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
-      <author><first>Fabio</first><last>Pianesi</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
+      <author id="fabio-pianesi"><first>Fabio</first><last>Pianesi</last></author>
       <author><first>Gianni</first><last>Lazzari</last></author>
       <author><first>Paolo</first><last>Coletti</last></author>
       <author><first>Loredana</first><last>Taddei</last></author>
@@ -80,7 +80,7 @@
     </paper>
     <paper id="8">
       <title>Assigning Belief Scores to Names in Queries</title>
-      <author><first>Christopher</first><last>Dozier</last></author>
+      <author id="christopher-dozier"><first>Christopher</first><last>Dozier</last></author>
       <url hash="fccde144">H01-1008</url>
       <bibkey>dozier-2001-assigning</bibkey>
     </paper>
@@ -88,13 +88,13 @@
       <title>Automatic Pattern Acquisition for <fixed-case>J</fixed-case>apanese Information Extraction</title>
       <author><first>Kiyoshi</first><last>Sudo</last></author>
       <author><first>Satoshi</first><last>Sekine</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <url hash="4c965540">H01-1009</url>
       <bibkey>sudo-etal-2001-automatic</bibkey>
     </paper>
     <paper id="10">
       <title>Automatic Predicate Argument Analysis of the <fixed-case>P</fixed-case>enn <fixed-case>T</fixed-case>ree<fixed-case>B</fixed-case>ank</title>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Joseph</first><last>Rosenzweig</last></author>
       <author><first>Scott</first><last>Cotton</last></author>
       <url hash="94580f3d">H01-1010</url>
@@ -103,28 +103,28 @@
     <paper id="11">
       <title>Automatic Title Generation for Spoken Broadcast News</title>
       <author><first>Rong</first><last>Jin</last></author>
-      <author><first>Alexander G.</first><last>Hauptmann</last></author>
+      <author id="alexander-g-hauptmann"><first>Alexander G.</first><last>Hauptmann</last></author>
       <url hash="e2ab0b49">H01-1011</url>
       <bibkey>jin-hauptmann-2001-automatic</bibkey>
     </paper>
     <paper id="12">
       <title>A Conversational Interface for Online Shopping</title>
-      <author><first>Joyce</first><last>Chai</last></author>
+      <author id="joyce-chai"><first>Joyce</first><last>Chai</last></author>
       <author><first>Veronika</first><last>Horvath</last></author>
-      <author><first>Nanda</first><last>Kambhatla</last></author>
-      <author><first>Nicolas</first><last>Nicolov</last></author>
-      <author><first>Margo</first><last>Stys-Budzikowska</last></author>
+      <author id="nanda-kambhatla"><first>Nanda</first><last>Kambhatla</last></author>
+      <author id="nicolas-nicolov"><first>Nicolas</first><last>Nicolov</last></author>
+      <author id="margo-stys-budzikowska"><first>Margo</first><last>Stys-Budzikowska</last></author>
       <url hash="bae6a081">H01-1012</url>
       <bibkey>chai-etal-2001-conversational</bibkey>
     </paper>
     <paper id="13">
       <title>Conversational Sales Assistant for Online Shopping</title>
-      <author><first>Margo</first><last>Budzikowska</last></author>
-      <author><first>Joyce</first><last>Chai</last></author>
+      <author id="margo-stys-budzikowska"><first>Margo</first><last>Budzikowska</last></author>
+      <author id="joyce-chai"><first>Joyce</first><last>Chai</last></author>
       <author><first>Sunil</first><last>Govindappa</last></author>
       <author><first>Veronika</first><last>Horvath</last></author>
-      <author><first>Nanda</first><last>Kambhatla</last></author>
-      <author><first>Nicolas</first><last>Nicolov</last></author>
+      <author id="nanda-kambhatla"><first>Nanda</first><last>Kambhatla</last></author>
+      <author id="nicolas-nicolov"><first>Nicolas</first><last>Nicolov</last></author>
       <author><first>Wlodek</first><last>Zadrozny</last></author>
       <url hash="e35e5e4b">H01-1013</url>
       <bibkey>budzikowska-etal-2001-conversational</bibkey>
@@ -132,20 +132,20 @@
     <paper id="14">
       <title>Converting Dependency Structures to Phrase Structures</title>
       <author><first>Fei</first><last>Xia</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <url hash="e56b3503">H01-1014</url>
       <bibkey>xia-palmer-2001-converting</bibkey>
     </paper>
     <paper id="15">
       <title><fixed-case>DATE</fixed-case>: A Dialogue Act Tagging Scheme for Evaluation of Spoken Dialogue Systems</title>
-      <author><first>Marilyn</first><last>Walker</last></author>
-      <author><first>Rebecca</first><last>Passonneau</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca</first><last>Passonneau</last></author>
       <url hash="e0c6bc19">H01-1015</url>
       <bibkey>walker-passonneau-2001-date</bibkey>
     </paper>
     <paper id="16">
       <title>Development of the <fixed-case>HRL</fixed-case> Route Navigation Dialogue System</title>
-      <author><first>Robert</first><last>Belvin</last></author>
+      <author id="robert-s-belvin"><first>Robert</first><last>Belvin</last></author>
       <author><first>Ron</first><last>Burns</last></author>
       <author><first>Cheryl</first><last>Hein</last></author>
       <url hash="c980238f">H01-1016</url>
@@ -153,21 +153,21 @@
     </paper>
     <paper id="17">
       <title>Dialogue Interaction with the <fixed-case>DARPA</fixed-case> Communicator Infrastructure: The Development of Useful Software</title>
-      <author><first>Samuel</first><last>Bayer</last></author>
-      <author><first>Christine</first><last>Doran</last></author>
+      <author id="samuel-bayer"><first>Samuel</first><last>Bayer</last></author>
+      <author id="christine-doran"><first>Christine</first><last>Doran</last></author>
       <author><first>Bryan</first><last>George</last></author>
       <url hash="6b7c6b5e">H01-1017</url>
       <bibkey>bayer-etal-2001-dialogue</bibkey>
     </paper>
     <paper id="18">
       <title>Domain Portability in Speech-to-Speech Translation</title>
-      <author><first>Alon</first><last>Lavie</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
       <author><first>Tanja</first><last>Schultz</last></author>
       <author><first>Chad</first><last>Langley</last></author>
       <author><first>Benjamin</first><last>Han</last></author>
       <author><first>Alicia</first><last>Tribble</last></author>
-      <author><first>Donna</first><last>Gates</last></author>
+      <author id="donna-gates"><first>Donna</first><last>Gates</last></author>
       <author><first>Dorcas</first><last>Wallace</last></author>
       <author><first>Kay</first><last>Peterson</last></author>
       <url hash="0f73a116">H01-1018</url>
@@ -193,40 +193,40 @@
     <paper id="21">
       <title>Evaluating Question-Answering Techniques in <fixed-case>C</fixed-case>hinese</title>
       <author><first>Xiaoyan</first><last>Li</last></author>
-      <author><first>W. Bruce</first><last>Croft</last></author>
+      <author id="w-bruce-croft"><first>W. Bruce</first><last>Croft</last></author>
       <url hash="03b100be">H01-1021</url>
       <bibkey>li-croft-2001-evaluating</bibkey>
     </paper>
     <paper id="22">
       <title>An Evaluation Corpus For Temporal Summarization</title>
-      <author><first>Vikash</first><last>Khandelwal</last></author>
+      <author id="vikash-khandelwal"><first>Vikash</first><last>Khandelwal</last></author>
       <author><first>Rahul</first><last>Gupta</last></author>
-      <author><first>James</first><last>Allan</last></author>
+      <author id="james-allan"><first>James</first><last>Allan</last></author>
       <url hash="296e1ee2">H01-1022</url>
       <bibkey>khandelwal-etal-2001-evaluation</bibkey>
     </paper>
     <paper id="23">
       <title>Evaluation Results for the Talk’n’Travel System</title>
-      <author><first>David</first><last>Stallard</last></author>
+      <author id="david-stallard"><first>David</first><last>Stallard</last></author>
       <url hash="be8ad6bf">H01-1023</url>
       <bibkey>stallard-2001-evaluation</bibkey>
     </paper>
     <paper id="24">
       <title>Experiments in Multi-Modal Automatic Content Extraction</title>
-      <author><first>Lance</first><last>Ramshaw</last></author>
+      <author id="lance-ramshaw"><first>Lance</first><last>Ramshaw</last></author>
       <author><first>Elizabeth</first><last>Boschee</last></author>
       <author><first>Sergey</first><last>Bratus</last></author>
       <author><first>Scott</first><last>Miller</last></author>
       <author><first>Rebecca</first><last>Stone</last></author>
-      <author><first>Ralph</first><last>Weischedel</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
       <author><first>Alex</first><last>Zamanian</last></author>
       <url hash="b55f088a">H01-1024</url>
       <bibkey>ramshaw-etal-2001-experiments</bibkey>
     </paper>
     <paper id="25">
       <title>Exploring Speech-Enabled Dialogue with the Galaxy Communicator Infrastructure</title>
-      <author><first>Samuel</first><last>Bayer</last></author>
-      <author><first>Christine</first><last>Doran</last></author>
+      <author id="samuel-bayer"><first>Samuel</first><last>Bayer</last></author>
+      <author id="christine-doran"><first>Christine</first><last>Doran</last></author>
       <author><first>Bryan</first><last>George</last></author>
       <url hash="f671c767">H01-1025</url>
       <bibkey>bayer-etal-2001-exploring</bibkey>
@@ -235,7 +235,7 @@
       <title>Facilitating Treebank Annotation Using a Statistical Parser</title>
       <author><first>Fu-Dong</first><last>Chiou</last></author>
       <author><first>David</first><last>Chiang</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <url hash="9b539c24">H01-1026</url>
       <bibkey>chiou-etal-2001-facilitating</bibkey>
     </paper>
@@ -243,8 +243,8 @@
       <title><fixed-case>F</fixed-case>act<fixed-case>B</fixed-case>rowser Demonstration</title>
       <author><first>Scott</first><last>Miller</last></author>
       <author><first>Sergey</first><last>Bratus</last></author>
-      <author><first>Lance</first><last>Ramshaw</last></author>
-      <author><first>Ralph</first><last>Weischedel</last></author>
+      <author id="lance-ramshaw"><first>Lance</first><last>Ramshaw</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
       <author><first>Alex</first><last>Zamanian</last></author>
       <url hash="93c9c085">H01-1027</url>
       <bibkey>miller-etal-2001-factbrowser</bibkey>
@@ -252,16 +252,16 @@
     <paper id="28">
       <title>Finding Errors Automatically in Semantically Tagged Dialogues</title>
       <author><first>John</first><last>Aberdeen</last></author>
-      <author><first>Christine</first><last>Doran</last></author>
-      <author><first>Laurie</first><last>Damianos</last></author>
-      <author><first>Samuel</first><last>Bayer</last></author>
-      <author><first>Lynette</first><last>Hirschman</last></author>
+      <author id="christine-doran"><first>Christine</first><last>Doran</last></author>
+      <author id="laurie-damianos"><first>Laurie</first><last>Damianos</last></author>
+      <author id="samuel-bayer"><first>Samuel</first><last>Bayer</last></author>
+      <author id="lynette-hirschman"><first>Lynette</first><last>Hirschman</last></author>
       <url hash="8fccda20">H01-1028</url>
       <bibkey>aberdeen-etal-2001-finding</bibkey>
     </paper>
     <paper id="29">
       <title>Fine-Grained Hidden <fixed-case>M</fixed-case>arkov Modeling for Broadcast-News Story Segmentation</title>
-      <author><first>Warren</first><last>Greiff</last></author>
+      <author id="warren-greiff"><first>Warren</first><last>Greiff</last></author>
       <author><first>Alex</first><last>Morgan</last></author>
       <author><first>Randall</first><last>Fish</last></author>
       <author><first>Marc</first><last>Richards</last></author>
@@ -281,29 +281,29 @@
       <author><first>Inderjeet</first><last>Mani</last></author>
       <author><first>George</first><last>Wilson</last></author>
       <author><first>Lisa</first><last>Ferro</last></author>
-      <author><first>Beth</first><last>Sundheim</last></author>
+      <author id="beth-m-sundheim"><first>Beth</first><last>Sundheim</last></author>
       <url hash="57d34561">H01-1031</url>
       <bibkey>mani-etal-2001-guidelines</bibkey>
     </paper>
     <paper id="32">
       <title>Hypothesis Selection and Resolution in the Mercury Flight Reservation System</title>
-      <author><first>Stephanie</first><last>Seneff</last></author>
-      <author><first>Joseph</first><last>Polifroni</last></author>
+      <author id="stephanie-seneff"><first>Stephanie</first><last>Seneff</last></author>
+      <author id="joseph-polifroni"><first>Joseph</first><last>Polifroni</last></author>
       <url hash="d782d711">H01-1032</url>
       <bibkey>seneff-polifroni-2001-hypothesis</bibkey>
     </paper>
     <paper id="33">
       <title>Improved Cross-Language Retrieval using Backoff Translation</title>
       <author><first>Philip</first><last>Resnik</last></author>
-      <author><first>Douglas</first><last>Oard</last></author>
-      <author><first>Gina</first><last>Levow</last></author>
+      <author id="douglas-w-oard"><first>Douglas</first><last>Oard</last></author>
+      <author id="gina-anne-levow"><first>Gina</first><last>Levow</last></author>
       <url hash="8a6a46bf">H01-1033</url>
       <bibkey>resnik-etal-2001-improved</bibkey>
     </paper>
     <paper id="34">
       <title>Improving Information Extraction by Modeling Errors in Speech Recognizer Output</title>
-      <author><first>David D.</first><last>Palmer</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
+      <author id="david-d-palmer"><first>David D.</first><last>Palmer</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
       <url hash="bd22ede5">H01-1034</url>
       <bibkey>palmer-ostendorf-2001-improving</bibkey>
     </paper>
@@ -332,44 +332,44 @@
     </paper>
     <paper id="38">
       <title>Integrated Feasibility Experiment for Bio-Security: <fixed-case>IFE</fixed-case>-Bio, A <fixed-case>TIDES</fixed-case> Demonstration</title>
-      <author><first>Lynette</first><last>Hirschman</last></author>
-      <author><first>Kris</first><last>Concepcion</last></author>
-      <author><first>Laurie</first><last>Damianos</last></author>
-      <author><first>David</first><last>Day</last></author>
+      <author id="lynette-hirschman"><first>Lynette</first><last>Hirschman</last></author>
+      <author id="kristian-concepcion"><first>Kris</first><last>Concepcion</last></author>
+      <author id="laurie-damianos"><first>Laurie</first><last>Damianos</last></author>
+      <author id="david-day"><first>David</first><last>Day</last></author>
       <author><first>John</first><last>Delmore</last></author>
       <author><first>Lisa</first><last>Ferro</last></author>
       <author><first>John</first><last>Griffith</last></author>
-      <author><first>John</first><last>Henderson</last></author>
+      <author id="john-henderson"><first>John</first><last>Henderson</last></author>
       <author><first>Jeff</first><last>Kurtz</last></author>
       <author><first>Inderjeet</first><last>Mani</last></author>
       <author><first>Scott</first><last>Mardis</last></author>
       <author><first>Tom</first><last>McEntee</last></author>
-      <author><first>Keith</first><last>Miller</last></author>
+      <author id="keith-j-miller"><first>Keith</first><last>Miller</last></author>
       <author><first>Beverly</first><last>Nunam</last></author>
       <author><first>Jay</first><last>Ponte</last></author>
-      <author><first>Florence</first><last>Reeder</last></author>
-      <author><first>Ben</first><last>Wellner</last></author>
+      <author id="florence-reeder"><first>Florence</first><last>Reeder</last></author>
+      <author id="ben-wellner"><first>Ben</first><last>Wellner</last></author>
       <author><first>George</first><last>Wilson</last></author>
-      <author><first>Alex</first><last>Yeh</last></author>
+      <author id="alexander-yeh"><first>Alex</first><last>Yeh</last></author>
       <url hash="66759d35">H01-1038</url>
       <bibkey>hirschman-etal-2001-integrated</bibkey>
     </paper>
     <paper id="39">
       <title>Integrated Information Management: An Interactive, Extensible Architecture for Information Retrieval</title>
-      <author><first>Eric</first><last>Nyberg</last></author>
-      <author><first>Hal</first><last>Daume</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daume</last></author>
       <url hash="9ceedca3">H01-1039</url>
       <bibkey>nyberg-daume-2001-integrated</bibkey>
     </paper>
     <paper id="40">
       <title>Intelligent Access to Text: Integrating Information Extraction Technology into Text Browsers</title>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <author><first>Patrick</first><last>Herring</last></author>
       <author><first>Michael</first><last>Oakes</last></author>
       <author><first>Michelline</first><last>Beaulieu</last></author>
       <author><first>Peter</first><last>Willett</last></author>
       <author><first>Helene</first><last>Fowkes</last></author>
-      <author id="anna-jonsson"><first>Anna</first><last>Jonsson</last></author>
+      <author><first>Anna</first><last>Jonsson</last></author>
       <url hash="739ba4cf">H01-1040</url>
       <bibkey>gaizauskas-etal-2001-intelligent</bibkey>
     </paper>
@@ -377,14 +377,14 @@
       <title>Interlingua-Based Broad-Coverage <fixed-case>K</fixed-case>orean-to-<fixed-case>E</fixed-case>nglish Translation in <fixed-case>CCLINC</fixed-case></title>
       <author><first>Young-Suk</first><last>Lee</last></author>
       <author><first>Wu Sok</first><last>Yi</last></author>
-      <author><first>Stephanie</first><last>Seneff</last></author>
-      <author><first>Clifford J.</first><last>Weinstein</last></author>
+      <author id="stephanie-seneff"><first>Stephanie</first><last>Seneff</last></author>
+      <author id="clifford-j-weinstein"><first>Clifford J.</first><last>Weinstein</last></author>
       <url hash="fe2b165e">H01-1041</url>
       <bibkey>lee-etal-2001-interlingua</bibkey>
     </paper>
     <paper id="42">
       <title>Is That Your Final Answer?</title>
-      <author><first>Florence</first><last>Reeder</last></author>
+      <author id="florence-reeder"><first>Florence</first><last>Reeder</last></author>
       <url hash="208cf0c7">H01-1042</url>
       <bibkey>reeder-2001-final</bibkey>
     </paper>
@@ -397,7 +397,7 @@
     </paper>
     <paper id="44">
       <title><fixed-case>J</fixed-case>apanese Text Input System With Digits</title>
-      <author><first>Kumiko</first><last>Tanaka-Ishii</last></author>
+      <author id="kumiko-tanaka-ishii"><first>Kumiko</first><last>Tanaka-Ishii</last></author>
       <author><first>Yusuke</first><last>Inutsuka</last></author>
       <author><first>Masato</first><last>Takeichi</last></author>
       <url hash="a610889c">H01-1044</url>
@@ -429,7 +429,7 @@
       <author><first>Martin</first><last>Westphal</last></author>
       <author><first>Mike</first><last>Schneider</last></author>
       <author><first>Tanja</first><last>Schultz</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <url hash="7ccbdc4a">H01-1048</url>
       <bibkey>fugen-etal-2001-lingwear</bibkey>
     </paper>
@@ -442,15 +442,15 @@
     </paper>
     <paper id="50">
       <title><fixed-case>M</fixed-case>andarin-<fixed-case>E</fixed-case>nglish Information: Investigating Translingual Speech Retrieval</title>
-      <author><first>Helen</first><last>Meng</last></author>
+      <author id="helen-meng"><first>Helen</first><last>Meng</last></author>
       <author><first>Berlin</first><last>Chen</last></author>
-      <author><first>Sanjeev</first><last>Khudanpur</last></author>
-      <author><first>Gina-Anne</first><last>Levow</last></author>
-      <author><first>Wai-Kit</first><last>Lo</last></author>
-      <author><first>Douglas</first><last>Oard</last></author>
+      <author id="sanjeev-khudanpur"><first>Sanjeev</first><last>Khudanpur</last></author>
+      <author id="gina-anne-levow"><first>Gina-Anne</first><last>Levow</last></author>
+      <author id="wai-kit-lo"><first>Wai-Kit</first><last>Lo</last></author>
+      <author id="douglas-w-oard"><first>Douglas</first><last>Oard</last></author>
       <author><first>Patrick</first><last>Shone</last></author>
       <author><first>Karen</first><last>Tang</last></author>
-      <author><first>Hsin-Min</first><last>Wang</last></author>
+      <author id="hsin-min-wang"><first>Hsin-Min</first><last>Wang</last></author>
       <author><first>Jianqiang</first><last>Wang</last></author>
       <url hash="700074d5">H01-1050</url>
       <bibkey>meng-etal-2001-mandarin</bibkey>
@@ -464,8 +464,8 @@
       <author><first>David</first><last>Gelbart</last></author>
       <author><first>Adam</first><last>Janin</last></author>
       <author><first>Thilo</first><last>Pfau</last></author>
-      <author><first>Elizabeth</first><last>Shriberg</last></author>
-      <author><first>Andreas</first><last>Stolcke</last></author>
+      <author id="elizabeth-shriberg"><first>Elizabeth</first><last>Shriberg</last></author>
+      <author id="andreas-stolcke"><first>Andreas</first><last>Stolcke</last></author>
       <url hash="67385154">H01-1051</url>
       <bibkey>morgan-etal-2001-meeting</bibkey>
     </paper>
@@ -480,35 +480,35 @@
       <title>Monitoring the News: a <fixed-case>TDT</fixed-case> demonstration system</title>
       <author><first>David</first><last>Frey</last></author>
       <author><first>Rahul</first><last>Gupta</last></author>
-      <author><first>Vikas</first><last>Khandelwal</last></author>
+      <author id="vikash-khandelwal"><first>Vikas</first><last>Khandelwal</last></author>
       <author><first>Victor</first><last>Lavrenko</last></author>
       <author><first>Anton</first><last>Leuski</last></author>
-      <author><first>James</first><last>Allan</last></author>
+      <author id="james-allan"><first>James</first><last>Allan</last></author>
       <url hash="4e9406c0">H01-1053</url>
       <bibkey>frey-etal-2001-monitoring</bibkey>
     </paper>
     <paper id="54">
       <title>Multidocument Summarization via Information Extraction</title>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <author><first>Tanya</first><last>Korelsky</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <author><first>Vincent</first><last>Ng</last></author>
-      <author><first>David</first><last>Pierce</last></author>
+      <author id="david-pierce"><first>David</first><last>Pierce</last></author>
       <author><first>Kiri</first><last>Wagstaff</last></author>
       <url hash="9b3bb855">H01-1054</url>
       <bibkey>white-etal-2001-multidocument</bibkey>
     </paper>
     <paper id="55">
       <title>Natural Language Generation in Dialog Systems</title>
-      <author><first>Owen</first><last>Rambow</last></author>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <url hash="434cfe23">H01-1055</url>
       <bibkey>rambow-etal-2001-natural</bibkey>
     </paper>
     <paper id="56">
       <title><fixed-case>N</fixed-case>ews<fixed-case>I</fixed-case>n<fixed-case>E</fixed-case>ssence: A System For Domain-Independent, Real-Time News Clustering and Multi-Document Summarization</title>
-      <author><first>Dragomir R.</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir R.</first><last>Radev</last></author>
       <author><first>Sasha</first><last>Blair-Goldensohn</last></author>
       <author><first>Zhu</first><last>Zhang</last></author>
       <author><first>Revathi Sundara</first><last>Raghavan</last></author>
@@ -524,24 +524,24 @@
     </paper>
     <paper id="58">
       <title>On Combining Language Models: Oracle Approach</title>
-      <author><first>Kadri</first><last>Hacioglu</last></author>
-      <author><first>Wayne</first><last>Ward</last></author>
+      <author id="kadri-hacioglu"><first>Kadri</first><last>Hacioglu</last></author>
+      <author id="wayne-ward"><first>Wayne</first><last>Ward</last></author>
       <url hash="02842715">H01-1058</url>
       <bibkey>hacioglu-ward-2001-combining</bibkey>
     </paper>
     <paper id="59">
       <title>Portability Issues for Speech Recognition Technologies</title>
-      <author><first>Lori</first><last>Lamel</last></author>
-      <author><first>Fabrice</first><last>Lefevre</last></author>
+      <author id="lori-lamel"><first>Lori</first><last>Lamel</last></author>
+      <author id="fabrice-lefevre"><first>Fabrice</first><last>Lefevre</last></author>
       <author><first>Jean-Luc</first><last>Gauvain</last></author>
-      <author><first>Gilles</first><last>Adda</last></author>
+      <author id="gilles-adda"><first>Gilles</first><last>Adda</last></author>
       <url hash="739b9f49">H01-1059</url>
       <bibkey>lamel-etal-2001-portability</bibkey>
     </paper>
     <paper id="60">
       <title>Rapidly Retargetable Interactive Translingual Retrieval</title>
-      <author><first>Gina-Anne</first><last>Levow</last></author>
-      <author><first>Douglas W.</first><last>Oard</last></author>
+      <author id="gina-anne-levow"><first>Gina-Anne</first><last>Levow</last></author>
+      <author id="douglas-w-oard"><first>Douglas W.</first><last>Oard</last></author>
       <author><first>Philip</first><last>Resnik</last></author>
       <url hash="cb038ce9">H01-1060</url>
       <bibkey>levow-etal-2001-rapidly</bibkey>
@@ -570,20 +570,20 @@
     <paper id="63">
       <title>Scalability and Portability of a Belief Network-based Dialog Model for Different Application Domains</title>
       <author><first>Carmen</first><last>Wai</last></author>
-      <author><first>Helen M.</first><last>Meng</last></author>
-      <author><first>Roberto</first><last>Pieraccini</last></author>
+      <author id="helen-meng"><first>Helen M.</first><last>Meng</last></author>
+      <author id="roberto-pieraccini"><first>Roberto</first><last>Pieraccini</last></author>
       <url hash="ad4b2774">H01-1063</url>
       <bibkey>wai-etal-2001-scalability</bibkey>
     </paper>
     <paper id="64">
       <title><fixed-case>SCANM</fixed-case>ail: Audio Navigation in the Voicemail Domain</title>
       <author><first>Michiel</first><last>Bacchiani</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
       <author><first>Aaron</first><last>Rosenberg</last></author>
-      <author><first>Steve</first><last>Whittaker</last></author>
-      <author><first>Donald</first><last>Hindle</last></author>
+      <author id="steve-whittaker"><first>Steve</first><last>Whittaker</last></author>
+      <author id="donald-hindle"><first>Donald</first><last>Hindle</last></author>
       <author><first>Phil</first><last>Isenhour</last></author>
-      <author><first>Mark</first><last>Jones</last></author>
+      <author id="mark-jones"><first>Mark</first><last>Jones</last></author>
       <author><first>Litza</first><last>Stark</last></author>
       <author><first>Gary</first><last>Zamchick</last></author>
       <url hash="e7842727">H01-1064</url>
@@ -592,14 +592,14 @@
     <paper id="65">
       <title>Sentence Ordering in Multidocument Summarization</title>
       <author><first>Regina</first><last>Barzilay</last></author>
-      <author><first>Noemie</first><last>Elhadad</last></author>
-      <author><first>Kathleen R.</first><last>McKeown</last></author>
+      <author id="noemie-elhadad"><first>Noemie</first><last>Elhadad</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen R.</first><last>McKeown</last></author>
       <url hash="d7df3cd5">H01-1065</url>
       <bibkey>barzilay-etal-2001-sentence</bibkey>
     </paper>
     <paper id="66">
       <title>A Server for Real-Time Event Tracking in News</title>
-      <author><first>Ralf D.</first><last>Brown</last></author>
+      <author id="ralf-d-brown"><first>Ralf D.</first><last>Brown</last></author>
       <url hash="e884d263">H01-1066</url>
       <bibkey>brown-2001-server</bibkey>
     </paper>
@@ -619,10 +619,10 @@
     </paper>
     <paper id="69">
       <title>Toward Semantics-Based Answer Pinpointing</title>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <author><first>Laurie</first><last>Gerber</last></author>
       <author><first>Ulf</first><last>Hermjakob</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <author><first>Deepak</first><last>Ravichandran</last></author>
       <url hash="ac0b8f13">H01-1069</url>
       <bibkey>hovy-etal-2001-toward</bibkey>
@@ -639,15 +639,15 @@
       <title>Towards Automatic Sign Translation</title>
       <author><first>Jie</first><last>Yang</last></author>
       <author><first>Jiang</first><last>Gao</last></author>
-      <author><first>Ying</first><last>Zhang</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="ying-zhang"><first>Ying</first><last>Zhang</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <url hash="f5f1e861">H01-1071</url>
       <bibkey>yang-etal-2001-towards</bibkey>
     </paper>
     <paper id="72">
       <title><fixed-case>T</fixed-case>ü<fixed-case>SBL</fixed-case>: A Similarity-Based Chunk Parser for Robust Syntactic Processing</title>
-      <author><first>Sandra</first><last>Kübler</last></author>
-      <author><first>Erhard W.</first><last>Hinrichs</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
+      <author id="erhard-hinrichs"><first>Erhard W.</first><last>Hinrichs</last></author>
       <url hash="146f20f4">H01-1072</url>
       <bibkey>kubler-hinrichs-2001-tusbl</bibkey>
     </paper>
@@ -667,13 +667,13 @@
     <paper id="74">
       <title>The Use of Dynamic Segment Scoring for Language-Independent Question Answering</title>
       <author><first>Daniel</first><last>Pack</last></author>
-      <author><first>Clifford</first><last>Weinstein</last></author>
+      <author id="clifford-j-weinstein"><first>Clifford</first><last>Weinstein</last></author>
       <url hash="60919d42">H01-1074</url>
       <bibkey>pack-weinstein-2001-use</bibkey>
     </paper>
     <paper id="75">
       <title>Using Speech and Language Technology to Coach Reading</title>
-      <author><first>Patti</first><last>Price</last></author>
+      <author id="patti-price"><first>Patti</first><last>Price</last></author>
       <author><first>Luc</first><last>Julia</last></author>
       <url hash="2a1ec955">H01-1075</url>
       <bibkey>price-julia-2001-using</bibkey>
diff --git a/data/xml/H05.xml b/data/xml/H05.xml
index 0409e337cd..bcc4f10e8b 100644
--- a/data/xml/H05.xml
+++ b/data/xml/H05.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of Human Language Technology Conference and Conference on Empirical Methods in Natural Language Processing</booktitle>
       <url hash="2584aebb">H05-1</url>
-      <editor><first>Raymond</first><last>Mooney</last></editor>
+      <editor id="raymond-mooney"><first>Raymond</first><last>Mooney</last></editor>
       <editor><first>Chris</first><last>Brew</last></editor>
       <editor><first>Lee-Feng</first><last>Chien</last></editor>
       <editor><first>Katrin</first><last>Kirchhoff</last></editor>
@@ -22,18 +22,18 @@
     <paper id="1">
       <title>Improving <fixed-case>LSA</fixed-case>-based Summarization with Anaphora Resolution</title>
       <author><first>Josef</first><last>Steinberger</last></author>
-      <author><first>Mijail</first><last>Kabadjov</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
-      <author><first>Olivia</first><last>Sanchez-Graillet</last></author>
+      <author id="mijail-kabadjov"><first>Mijail</first><last>Kabadjov</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
+      <author id="olivia-sanchez-graillet"><first>Olivia</first><last>Sanchez-Graillet</last></author>
       <pages>1–8</pages>
       <url hash="6059dec4">H05-1001</url>
       <bibkey>steinberger-etal-2005-improving</bibkey>
     </paper>
     <paper id="2">
       <title>Data-driven Approaches for Information Structure Identification</title>
-      <author><first>Oana</first><last>Postolache</last></author>
-      <author><first>Ivana</first><last>Kruijff-Korbayová</last></author>
-      <author><first>Geert-Jan</first><last>Kruijff</last></author>
+      <author id="oana-postolache"><first>Oana</first><last>Postolache</last></author>
+      <author id="ivana-kruijff-korbayova"><first>Ivana</first><last>Kruijff-Korbayová</last></author>
+      <author id="geert-jan-m-kruijff"><first>Geert-Jan</first><last>Kruijff</last></author>
       <pages>9–16</pages>
       <url hash="ae8be0fd">H05-1002</url>
       <bibkey>postolache-etal-2005-data</bibkey>
@@ -42,14 +42,14 @@
       <title>Using Semantic Relations to Refine Coreference Decisions</title>
       <author><first>Heng</first><last>Ji</last></author>
       <author><first>David</first><last>Westbrook</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <pages>17–24</pages>
       <url hash="e2125815">H05-1003</url>
       <bibkey>ji-etal-2005-using</bibkey>
     </paper>
     <paper id="4">
       <title>On Coreference Resolution Performance Metrics</title>
-      <author><first>Xiaoqiang</first><last>Luo</last></author>
+      <author id="xiaoqiang-luo"><first>Xiaoqiang</first><last>Luo</last></author>
       <pages>25–32</pages>
       <url hash="e8b1b4bb">H05-1004</url>
       <bibkey>luo-2005-coreference</bibkey>
@@ -57,7 +57,7 @@
     <paper id="5">
       <title>Improving Multilingual Summarization: Using Redundancy in the Input to Correct <fixed-case>MT</fixed-case> errors</title>
       <author><first>Advaith</first><last>Siddharthan</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>33–40</pages>
       <url hash="a6e5d298">H05-1005</url>
       <bibkey>siddharthan-mckeown-2005-improving</bibkey>
@@ -72,7 +72,7 @@
     </paper>
     <paper id="7">
       <title>Semantic Similarity for Detecting Recognition Errors in Automatic Speech Transcripts</title>
-      <author><first>Diana</first><last>Inkpen</last></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last></author>
       <author><first>Alain</first><last>Désilets</last></author>
       <pages>49–56</pages>
       <url hash="c02592e0">H05-1007</url>
@@ -88,8 +88,8 @@
     </paper>
     <paper id="9">
       <title><fixed-case>N</fixed-case>eur<fixed-case>A</fixed-case>lign: Combining Word Alignments Using Neural Networks</title>
-      <author><first>Necip Fazil</first><last>Ayan</last></author>
-      <author><first>Bonnie J.</first><last>Dorr</last></author>
+      <author id="necip-fazil-ayan"><first>Necip Fazil</first><last>Ayan</last></author>
+      <author id="bonnie-dorr"><first>Bonnie J.</first><last>Dorr</last></author>
       <author><first>Christof</first><last>Monz</last></author>
       <pages>65–72</pages>
       <url hash="0c1653a0">H05-1009</url>
@@ -106,7 +106,7 @@
     </paper>
     <paper id="11">
       <title>A Discriminative Framework for Bilingual Word Alignment</title>
-      <author><first>Robert C.</first><last>Moore</last></author>
+      <author id="robert-c-moore"><first>Robert C.</first><last>Moore</last></author>
       <pages>81–88</pages>
       <url hash="154820ac">H05-1011</url>
       <bibkey>moore-2005-discriminative</bibkey>
@@ -114,14 +114,14 @@
     <paper id="12">
       <title>A Maximum Entropy Word Aligner for <fixed-case>A</fixed-case>rabic-<fixed-case>E</fixed-case>nglish Machine Translation</title>
       <author><first>Abraham</first><last>Ittycheriah</last></author>
-      <author><first>Salim</first><last>Roukos</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
       <pages>89–96</pages>
       <url hash="fb82b7d0">H05-1012</url>
       <bibkey>ittycheriah-roukos-2005-maximum</bibkey>
     </paper>
     <paper id="13">
       <title>A Large-Scale Exploration of Effective Global Features for a Joint Entity Detection and Tracking Model</title>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <author><first>Daniel</first><last>Marcu</last></author>
       <pages>97–104</pages>
       <url hash="d28da2cc">H05-1013</url>
@@ -146,14 +146,14 @@
     <paper id="16">
       <title>Using Names and Topics for New Event Detection</title>
       <author><first>Giridhar</first><last>Kumaran</last></author>
-      <author><first>James</first><last>Allan</last></author>
+      <author id="james-allan"><first>James</first><last>Allan</last></author>
       <pages>121–128</pages>
       <url hash="c9eabced">H05-1016</url>
       <bibkey>kumaran-allan-2005-using</bibkey>
     </paper>
     <paper id="17">
       <title>Investigating Unsupervised Learning for Text Categorization Bootstrapping</title>
-      <author><first>Alfio</first><last>Gliozzo</last></author>
+      <author id="alfio-gliozzo"><first>Alfio</first><last>Gliozzo</last></author>
       <author><first>Carlo</first><last>Strapparava</last></author>
       <author><first>Ido</first><last>Dagan</last></author>
       <pages>129–136</pages>
@@ -162,7 +162,7 @@
     </paper>
     <paper id="18">
       <title>Speeding up Training with Tree Kernels for Node Relation Labeling</title>
-      <author><first>Jun’ichi</first><last>Kazama</last></author>
+      <author id="junichi-kazama"><first>Jun’ichi</first><last>Kazama</last></author>
       <author><first>Kentaro</first><last>Torisawa</last></author>
       <pages>137–144</pages>
       <url hash="715b315b">H05-1018</url>
@@ -171,7 +171,7 @@
     <paper id="19">
       <title>Kernel-based Approach for Automatic Evaluation of Natural Language Generation Technologies: Application to Automatic Summarization</title>
       <author><first>Tsutomu</first><last>Hirao</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <author><first>Hideki</first><last>Isozaki</last></author>
       <pages>145–152</pages>
       <url hash="48699b66">H05-1019</url>
@@ -212,8 +212,8 @@
     </paper>
     <paper id="24">
       <title>Alignment Link Projection Using Transformation-Based Learning</title>
-      <author><first>Necip Fazil</first><last>Ayan</last></author>
-      <author><first>Bonnie J.</first><last>Dorr</last></author>
+      <author id="necip-fazil-ayan"><first>Necip Fazil</first><last>Ayan</last></author>
+      <author id="bonnie-dorr"><first>Bonnie J.</first><last>Dorr</last></author>
       <author><first>Christof</first><last>Monz</last></author>
       <pages>185–192</pages>
       <url hash="fd77699f">H05-1024</url>
@@ -248,7 +248,7 @@
     </paper>
     <paper id="28">
       <title>A Salience Driven Approach to Robust Input Interpretation in Multimodal Conversational Systems</title>
-      <author><first>Joyce Y.</first><last>Chai</last></author>
+      <author id="joyce-chai"><first>Joyce Y.</first><last>Chai</last></author>
       <author><first>Shaolin</first><last>Qu</last></author>
       <pages>217–224</pages>
       <url hash="2cae611c">H05-1028</url>
@@ -256,19 +256,19 @@
     </paper>
     <paper id="29">
       <title>Error Handling in the <fixed-case>R</fixed-case>aven<fixed-case>C</fixed-case>law Dialog Management Architecture</title>
-      <author><first>Dan</first><last>Bohus</last></author>
-      <author><first>Alexander</first><last>Rudnicky</last></author>
+      <author id="dan-bohus"><first>Dan</first><last>Bohus</last></author>
+      <author id="alexander-rudnicky"><first>Alexander</first><last>Rudnicky</last></author>
       <pages>225–232</pages>
       <url hash="9a974e65">H05-1029</url>
       <bibkey>bohus-rudnicky-2005-error</bibkey>
     </paper>
     <paper id="30">
       <title>Effective Use of Prosody in Parsing Conversational Speech</title>
-      <author><first>Jeremy G.</first><last>Kahn</last></author>
+      <author id="jeremy-g-kahn"><first>Jeremy G.</first><last>Kahn</last></author>
       <author><first>Matthew</first><last>Lease</last></author>
       <author><first>Eugene</first><last>Charniak</last></author>
       <author><first>Mark</first><last>Johnson</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
       <pages>233–240</pages>
       <url hash="ac328f1e">H05-1030</url>
       <bibkey>kahn-etal-2005-effective</bibkey>
@@ -277,7 +277,7 @@
       <title>Automatically Learning Cognitive Status for Multi-Document Summarization of Newswire</title>
       <author><first>Ani</first><last>Nenkova</last></author>
       <author><first>Advaith</first><last>Siddharthan</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>241–248</pages>
       <url hash="55703d37">H05-1031</url>
       <bibkey>nenkova-etal-2005-automatically</bibkey>
@@ -308,16 +308,16 @@
     <paper id="35">
       <title><fixed-case>PP</fixed-case>-attachment Disambiguation using Large Context</title>
       <author><first>Marian</first><last>Olteanu</last></author>
-      <author><first>Dan</first><last>Moldovan</last></author>
+      <author id="dan-moldovan"><first>Dan</first><last>Moldovan</last></author>
       <pages>273–280</pages>
       <url hash="d6ee1380">H05-1035</url>
       <bibkey>olteanu-moldovan-2005-pp</bibkey>
     </paper>
     <paper id="36">
       <title>Compiling Comp Ling: Weighted Dynamic Programming and the <fixed-case>D</fixed-case>yna Language</title>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <author><first>Eric</first><last>Goldlust</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>281–290</pages>
       <url hash="1f5b0e0f">H05-1036</url>
       <bibkey>eisner-etal-2005-compiling</bibkey>
@@ -332,7 +332,7 @@
     </paper>
     <paper id="38">
       <title>Using Question Series to Evaluate Question Answering System Effectiveness</title>
-      <author><first>Ellen</first><last>Voorhees</last></author>
+      <author id="ellen-m-voorhees"><first>Ellen</first><last>Voorhees</last></author>
       <pages>299–306</pages>
       <url hash="0ea175cb">H05-1038</url>
       <bibkey>voorhees-2005-using</bibkey>
@@ -340,7 +340,7 @@
     <paper id="39">
       <title>Combining Deep Linguistics Analysis and Surface Pattern Learning: A Hybrid Approach to <fixed-case>C</fixed-case>hinese Definitional Question Answering</title>
       <author><first>Fuchun</first><last>Peng</last></author>
-      <author><first>Ralph</first><last>Weischedel</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
       <author><first>Ana</first><last>Licuanan</last></author>
       <author><first>Jinxi</first><last>Xu</last></author>
       <pages>307–314</pages>
@@ -350,7 +350,7 @@
     <paper id="40">
       <title>Enhanced Answer Type Inference from Questions using Sequential Models</title>
       <author><first>Vijay</first><last>Krishnan</last></author>
-      <author><first>Sujatha</first><last>Das</last></author>
+      <author id="sujatha-das-gollapalli"><first>Sujatha</first><last>Das</last></author>
       <author><first>Soumen</first><last>Chakrabarti</last></author>
       <pages>315–322</pages>
       <url hash="0aa513b2">H05-1040</url>
@@ -359,7 +359,7 @@
     <paper id="41">
       <title>A Practically Unsupervised Learning Method to Identify Single-Snippet Answers to Definition Questions on the Web</title>
       <author><first>Ion</first><last>Androutsopoulos</last></author>
-      <author><first>Dimitrios</first><last>Galanis</last></author>
+      <author id="dimitrios-galanis"><first>Dimitrios</first><last>Galanis</last></author>
       <pages>323–330</pages>
       <url hash="4adce254">H05-1041</url>
       <bibkey>androutsopoulos-galanis-2005-practically</bibkey>
@@ -383,7 +383,7 @@
     <paper id="44">
       <title>Recognizing Contextual Polarity in Phrase-Level Sentiment Analysis</title>
       <author><first>Theresa</first><last>Wilson</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <author><first>Paul</first><last>Hoffmann</last></author>
       <pages>347–354</pages>
       <url hash="38854139">H05-1044</url>
@@ -392,8 +392,8 @@
     <paper id="45">
       <title>Identifying Sources of Opinions with Conditional Random Fields and Extraction Patterns</title>
       <author><first>Yejin</first><last>Choi</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
-      <author><first>Ellen</first><last>Riloff</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
       <author><first>Siddharth</first><last>Patwardhan</last></author>
       <pages>355–362</pages>
       <url hash="44dfcf21">H05-1045</url>
@@ -410,14 +410,14 @@
     <paper id="47">
       <title>A Semantic Approach to Recognizing Textual Entailment</title>
       <author><first>Marta</first><last>Tatu</last></author>
-      <author><first>Dan</first><last>Moldovan</last></author>
+      <author id="dan-moldovan"><first>Dan</first><last>Moldovan</last></author>
       <pages>371–378</pages>
       <url hash="79f02c51">H05-1047</url>
       <bibkey>tatu-moldovan-2005-semantic</bibkey>
     </paper>
     <paper id="48">
       <title>Detection of Entity Mentions Occuring in <fixed-case>E</fixed-case>nglish and <fixed-case>C</fixed-case>hinese Text</title>
-      <author><first>Kadri</first><last>Hacioglu</last></author>
+      <author id="kadri-hacioglu"><first>Kadri</first><last>Hacioglu</last></author>
       <author><first>Benjamin</first><last>Douglas</last></author>
       <author><first>Ying</first><last>Chen</last></author>
       <pages>379–386</pages>
@@ -427,15 +427,15 @@
     <paper id="49">
       <title>Robust Textual Inference via Graph Matching</title>
       <author><first>Aria</first><last>Haghighi</last></author>
-      <author><first>Andrew</first><last>Ng</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="andrew-y-ng"><first>Andrew</first><last>Ng</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <pages>387–394</pages>
       <url hash="9994f6aa">H05-1049</url>
       <bibkey>haghighi-etal-2005-robust</bibkey>
     </paper>
     <paper id="50">
       <title>Bootstrapping Without the Boot</title>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <author><first>Damianos</first><last>Karakos</last></author>
       <pages>395–402</pages>
       <url hash="049497e9">H05-1050</url>
@@ -450,7 +450,7 @@
     </paper>
     <paper id="52">
       <title>Unsupervised Large-Vocabulary Word Sense Disambiguation with Graph-based Algorithms for Sequence Data Labeling</title>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>411–418</pages>
       <url hash="1fe6cbcd">H05-1052</url>
       <bibkey>mihalcea-2005-unsupervised</bibkey>
@@ -458,8 +458,8 @@
     <paper id="53">
       <title>Domain-Specific Sense Distributions and Predominant Sense Acquisition</title>
       <author><first>Rob</first><last>Koeling</last></author>
-      <author><first>Diana</first><last>McCarthy</last></author>
-      <author><first>John</first><last>Carroll</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
       <pages>419–426</pages>
       <url hash="0d8e4206">H05-1053</url>
       <bibkey>koeling-etal-2005-domain</bibkey>
@@ -484,8 +484,8 @@
     <paper id="56">
       <title>Extracting Personal Names from Email: Applying Named Entity Recognition to Informal Text</title>
       <author><first>Einat</first><last>Minkov</last></author>
-      <author><first>Richard C.</first><last>Wang</last></author>
-      <author><first>William W.</first><last>Cohen</last></author>
+      <author id="richard-c-wang"><first>Richard C.</first><last>Wang</last></author>
+      <author id="william-cohen"><first>William W.</first><last>Cohen</last></author>
       <pages>443–450</pages>
       <url hash="084c787d">H05-1056</url>
       <bibkey>minkov-etal-2005-extracting</bibkey>
@@ -493,7 +493,7 @@
     <paper id="57">
       <title>Matching Inconsistently Spelled Names in Automatic Speech Recognizer Output for Information Retrieval</title>
       <author><first>Hema</first><last>Raghavan</last></author>
-      <author><first>James</first><last>Allan</last></author>
+      <author id="james-allan"><first>James</first><last>Allan</last></author>
       <pages>451–458</pages>
       <url hash="389d9d95">H05-1057</url>
       <bibkey>raghavan-allan-2005-matching</bibkey>
@@ -501,7 +501,7 @@
     <paper id="58">
       <title>Part-of-Speech Tagging using Virtual Evidence and Negative Training</title>
       <author><first>Sheila M.</first><last>Reynolds</last></author>
-      <author><first>Jeff A.</first><last>Bilmes</last></author>
+      <author id="jeff-bilmes"><first>Jeff A.</first><last>Bilmes</last></author>
       <pages>459–466</pages>
       <url hash="b676e533">H05-1058</url>
       <bibkey>reynolds-bilmes-2005-part</bibkey>
@@ -509,16 +509,16 @@
     <paper id="59">
       <title>Bidirectional Inference with the Easiest-First Strategy for Tagging Sequence Data</title>
       <author><first>Yoshimasa</first><last>Tsuruoka</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>467–474</pages>
       <url hash="4e14ccb1">H05-1059</url>
       <bibkey>tsuruoka-tsujii-2005-bidirectional</bibkey>
     </paper>
     <paper id="60">
       <title>Context-Based Morphological Disambiguation with Random Fields</title>
-      <author><first>Noah A.</first><last>Smith</last></author>
-      <author><first>David A.</first><last>Smith</last></author>
-      <author><first>Roy W.</first><last>Tromble</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
+      <author id="david-a-smith"><first>David A.</first><last>Smith</last></author>
+      <author id="roy-tromble"><first>Roy W.</first><last>Tromble</last></author>
       <pages>475–482</pages>
       <url hash="d80f745c">H05-1060</url>
       <bibkey>smith-etal-2005-context</bibkey>
@@ -526,17 +526,17 @@
     <paper id="61">
       <title>Mining Key Phrase Translations from Web Corpora</title>
       <author><first>Fei</first><last>Huang</last></author>
-      <author><first>Ying</first><last>Zhang</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="ying-zhang"><first>Ying</first><last>Zhang</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>483–490</pages>
       <url hash="b8708872">H05-1061</url>
       <bibkey>huang-etal-2005-mining</bibkey>
     </paper>
     <paper id="62">
       <title>Robust Named Entity Extraction from Large Spoken Archives</title>
-      <author><first>Benoît</first><last>Favre</last></author>
-      <author><first>Frédéric</first><last>Béchet</last></author>
-      <author><first>Pascal</first><last>Nocéra</last></author>
+      <author id="benoit-favre"><first>Benoît</first><last>Favre</last></author>
+      <author id="frederic-bechet"><first>Frédéric</first><last>Béchet</last></author>
+      <author id="pascal-nocera"><first>Pascal</first><last>Nocéra</last></author>
       <pages>491–498</pages>
       <url hash="efd008f3">H05-1062</url>
       <bibkey>favre-etal-2005-robust</bibkey>
@@ -553,7 +553,7 @@
     <paper id="64">
       <title>Hidden-Variable Models for Discriminative Reranking</title>
       <author><first>Terry</first><last>Koo</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <pages>507–514</pages>
       <url hash="258598f5">H05-1064</url>
       <bibkey>koo-collins-2005-hidden</bibkey>
@@ -570,14 +570,14 @@
       <author><first>Ryan</first><last>McDonald</last></author>
       <author><first>Fernando</first><last>Pereira</last></author>
       <author><first>Kiril</first><last>Ribarov</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
       <pages>523–530</pages>
       <url hash="1e250687">H05-1066</url>
       <bibkey>mcdonald-etal-2005-non</bibkey>
     </paper>
     <paper id="67">
       <title>Making Computers Laugh: Investigations in Automatic Humor Recognition</title>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <author><first>Carlo</first><last>Strapparava</last></author>
       <pages>531–538</pages>
       <url hash="f1829822">H05-1067</url>
@@ -586,7 +586,7 @@
     <paper id="68">
       <title>Optimizing to Arbitrary <fixed-case>NLP</fixed-case> Metrics using Ensemble Selection</title>
       <author><first>Art</first><last>Munson</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <author><first>Rich</first><last>Caruana</last></author>
       <pages>539–546</pages>
       <url hash="bbb51110">H05-1068</url>
@@ -595,7 +595,7 @@
     <paper id="69">
       <title>Word Sense Disambiguation Using Sense Examples Automatically Acquired from a Second Language</title>
       <author><first>Xinglong</first><last>Wang</last></author>
-      <author><first>John</first><last>Carroll</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
       <pages>547–554</pages>
       <url hash="e4171854">H05-1069</url>
       <bibkey>wang-carroll-2005-word</bibkey>
@@ -609,9 +609,9 @@
     </paper>
     <paper id="71">
       <title><fixed-case>K</fixed-case>now<fixed-case>I</fixed-case>t<fixed-case>N</fixed-case>ow: Fast, Scalable Information Extraction from the Web</title>
-      <author><first>Michael J.</first><last>Cafarella</last></author>
+      <author id="michael-j-cafarella"><first>Michael J.</first><last>Cafarella</last></author>
       <author><first>Doug</first><last>Downey</last></author>
-      <author><first>Stephen</first><last>Soderland</last></author>
+      <author id="stephen-soderland"><first>Stephen</first><last>Soderland</last></author>
       <author><first>Oren</first><last>Etzioni</last></author>
       <pages>563–570</pages>
       <url hash="b0d9e1d6">H05-1071</url>
@@ -619,7 +619,7 @@
     </paper>
     <paper id="72">
       <title>A Cost-Benefit Analysis of Hybrid Phone-Manner Representations for <fixed-case>ASR</fixed-case></title>
-      <author><first>Eric</first><last>Fosler-Lussier</last></author>
+      <author id="eric-fosler-lussier"><first>Eric</first><last>Fosler-Lussier</last></author>
       <author><first>C. Anton</first><last>Rytting</last></author>
       <pages>571–578</pages>
       <url hash="286efa04">H05-1072</url>
@@ -627,9 +627,9 @@
     </paper>
     <paper id="73">
       <title>Emotions from Text: Machine Learning for Text-based Emotion Prediction</title>
-      <author><first>Cecilia Ovesdotter</first><last>Alm</last></author>
+      <author id="cecilia-ovesdotter-alm"><first>Cecilia Ovesdotter</first><last>Alm</last></author>
       <author><first>Dan</first><last>Roth</last></author>
-      <author><first>Richard</first><last>Sproat</last></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last></author>
       <pages>579–586</pages>
       <url hash="cc3f8ffa">H05-1073</url>
       <bibkey>alm-etal-2005-emotions</bibkey>
@@ -645,7 +645,7 @@
     <paper id="75">
       <title>Handling Biographical Questions with Implicature</title>
       <author><first>Donghui</first><last>Feng</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>596–603</pages>
       <url hash="726048e2">H05-1075</url>
       <bibkey>feng-hovy-2005-handling</bibkey>
@@ -653,16 +653,16 @@
     <paper id="76">
       <title>The Use of Metadata, Web-derived Answer Patterns and Passage Context to Improve Reading Comprehension Performance</title>
       <author><first>Yongping</first><last>Du</last></author>
-      <author><first>Helen</first><last>Meng</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
-      <author><first>Lide</first><last>Wu</last></author>
+      <author id="helen-meng"><first>Helen</first><last>Meng</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
+      <author id="lide-wu"><first>Lide</first><last>Wu</last></author>
       <pages>604–611</pages>
       <url hash="1acdd5e7">H05-1076</url>
       <bibkey>du-etal-2005-use</bibkey>
     </paper>
     <paper id="77">
       <title>Identifying Semantic Relations and Functional Properties of Human Verb Associations</title>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <author><first>Alissa</first><last>Melinger</last></author>
       <pages>612–619</pages>
       <url hash="7f7c46c0">H05-1077</url>
@@ -671,7 +671,7 @@
     <paper id="78">
       <title>Accurate Function Parsing</title>
       <author><first>Paola</first><last>Merlo</last></author>
-      <author><first>Gabriele</first><last>Musillo</last></author>
+      <author id="gabriele-musillo"><first>Gabriele</first><last>Musillo</last></author>
       <pages>620–627</pages>
       <url hash="18c58133">H05-1078</url>
       <bibkey>merlo-musillo-2005-accurate</bibkey>
@@ -694,29 +694,29 @@
     </paper>
     <paper id="81">
       <title>A Robust Combination Strategy for Semantic Role Labeling</title>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <author><first>Mihai</first><last>Surdeanu</last></author>
-      <author><first>Pere</first><last>Comas</last></author>
-      <author><first>Jordi</first><last>Turmo</last></author>
+      <author id="pere-comas"><first>Pere</first><last>Comas</last></author>
+      <author id="jordi-turmo"><first>Jordi</first><last>Turmo</last></author>
       <pages>644–651</pages>
       <url hash="e0e42f91">H05-1081</url>
       <bibkey>marquez-etal-2005-robust</bibkey>
     </paper>
     <paper id="82">
       <title>A Methodology for Extrinsically Evaluating Information Extraction Performance</title>
-      <author><first>Michael</first><last>Crystal</last></author>
+      <author id="michael-crystal"><first>Michael</first><last>Crystal</last></author>
       <author><first>Alex</first><last>Baron</last></author>
       <author><first>Katherine</first><last>Godfrey</last></author>
       <author><first>Linnea</first><last>Micciulla</last></author>
       <author><first>Yvette</first><last>Tenney</last></author>
-      <author><first>Ralph</first><last>Weischedel</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
       <pages>652–659</pages>
       <url hash="1a04ba90">H05-1082</url>
       <bibkey>crystal-etal-2005-methodology</bibkey>
     </paper>
     <paper id="83">
       <title>Multi-Lingual Coreference Resolution With Syntactic Features</title>
-      <author><first>Xiaoqiang</first><last>Luo</last></author>
+      <author id="xiaoqiang-luo"><first>Xiaoqiang</first><last>Luo</last></author>
       <author><first>Imed</first><last>Zitouni</last></author>
       <pages>660–667</pages>
       <url hash="fdcde985">H05-1083</url>
@@ -725,14 +725,14 @@
     <paper id="84">
       <title>Analyzing Models for Semantic Role Assignment using Confusability</title>
       <author><first>Katrin</first><last>Erk</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <pages>668–675</pages>
       <url hash="53c04823">H05-1084</url>
       <bibkey>erk-pado-2005-analyzing</bibkey>
     </paper>
     <paper id="85">
       <title>Improving Statistical <fixed-case>MT</fixed-case> through Morphological Analysis</title>
-      <author><first>Sharon</first><last>Goldwater</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
       <author><first>David</first><last>McClosky</last></author>
       <pages>676–683</pages>
       <url hash="47ca696e">H05-1085</url>
@@ -741,7 +741,7 @@
     <paper id="86">
       <title>A Translation Model for Sentence Retrieval</title>
       <author><first>Vanessa</first><last>Murdock</last></author>
-      <author><first>W. Bruce</first><last>Croft</last></author>
+      <author id="w-bruce-croft"><first>W. Bruce</first><last>Croft</last></author>
       <pages>684–691</pages>
       <url hash="1ede369d">H05-1086</url>
       <bibkey>murdock-croft-2005-translation</bibkey>
@@ -755,10 +755,10 @@
     </paper>
     <paper id="88">
       <title><fixed-case>E</fixed-case>vita: A Robust Event Recognizer For <fixed-case>QA</fixed-case> Systems</title>
-      <author><first>Roser</first><last>Saurí</last></author>
+      <author id="roser-sauri"><first>Roser</first><last>Saurí</last></author>
       <author><first>Robert</first><last>Knippen</last></author>
       <author><first>Marc</first><last>Verhagen</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <pages>700–707</pages>
       <url hash="c372aad9">H05-1088</url>
       <bibkey>sauri-etal-2005-evita</bibkey>
@@ -766,7 +766,7 @@
     <paper id="89">
       <title>Using Sketches to Estimate Associations</title>
       <author><first>Ping</first><last>Li</last></author>
-      <author><first>Kenneth W.</first><last>Church</last></author>
+      <author id="kenneth-church"><first>Kenneth W.</first><last>Church</last></author>
       <pages>708–715</pages>
       <url hash="0e47b31f">H05-1089</url>
       <bibkey>li-church-2005-using</bibkey>
@@ -774,14 +774,14 @@
     <paper id="90">
       <title>Context and Learning in Novelty Detection</title>
       <author><first>Barry</first><last>Schiffman</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>716–723</pages>
       <url hash="2900b49c">H05-1090</url>
       <bibkey>schiffman-mckeown-2005-context</bibkey>
     </paper>
     <paper id="91">
       <title>A Shortest Path Dependency Kernel for Relation Extraction</title>
-      <author><first>Razvan</first><last>Bunescu</last></author>
+      <author id="razvan-bunescu"><first>Razvan</first><last>Bunescu</last></author>
       <author><first>Raymond</first><last>Mooney</last></author>
       <pages>724–731</pages>
       <url hash="43f900b7">H05-1091</url>
@@ -790,16 +790,16 @@
     <paper id="92">
       <title>Multi-way Relation Classification: Application to Protein-Protein Interactions</title>
       <author><first>Barbara</first><last>Rosario</last></author>
-      <author><first>Marti</first><last>Hearst</last></author>
+      <author id="marti-a-hearst"><first>Marti</first><last>Hearst</last></author>
       <pages>732–739</pages>
       <url hash="f5363b94">H05-1092</url>
       <bibkey>rosario-hearst-2005-multi</bibkey>
     </paper>
     <paper id="93">
       <title><fixed-case>BLANC</fixed-case>: Learning Evaluation Metrics for <fixed-case>MT</fixed-case></title>
-      <author><first>Lucian</first><last>Lita</last></author>
+      <author id="lucian-vlad-lita"><first>Lucian</first><last>Lita</last></author>
       <author><first>Monica</first><last>Rogati</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <pages>740–747</pages>
       <url hash="946bab65">H05-1093</url>
       <bibkey>lita-etal-2005-blanc</bibkey>
@@ -818,10 +818,10 @@
       <author><first>Nicola</first><last>Cancedda</last></author>
       <author><first>Bruno</first><last>Cavestro</last></author>
       <author><first>Marc</first><last>Dymetman</last></author>
-      <author><first>Eric</first><last>Gaussier</last></author>
-      <author><first>Cyril</first><last>Goutte</last></author>
+      <author id="eric-gaussier"><first>Eric</first><last>Gaussier</last></author>
+      <author id="cyril-goutte"><first>Cyril</first><last>Goutte</last></author>
       <author><first>Kenji</first><last>Yamada</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <author><first>Arne</first><last>Mauser</last></author>
       <pages>755–762</pages>
       <url hash="0775cdc8">H05-1095</url>
@@ -830,7 +830,7 @@
     <paper id="96">
       <title>Word-Level Confidence Estimation for Machine Translation using Phrase-Based Translation Models</title>
       <author><first>Nicola</first><last>Ueffing</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>763–770</pages>
       <url hash="890a16b0">H05-1096</url>
       <bibkey>ueffing-ney-2005-word</bibkey>
@@ -869,7 +869,7 @@
     <paper id="100">
       <title>Morphology and Reranking for the Statistical Parsing of <fixed-case>S</fixed-case>panish</title>
       <author><first>Brooke</first><last>Cowan</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <pages>795–802</pages>
       <url hash="0ae22d0a">H05-1100</url>
       <bibkey>cowan-collins-2005-morphology</bibkey>
@@ -885,7 +885,7 @@
     <paper id="102">
       <title>Incremental <fixed-case>LTAG</fixed-case> Parsing</title>
       <author><first>Libin</first><last>Shen</last></author>
-      <author><first>Aravind</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
       <pages>811–818</pages>
       <url hash="2c45525d">H05-1102</url>
       <bibkey>shen-joshi-2005-incremental</bibkey>
@@ -910,8 +910,8 @@
     </paper>
     <paper id="105">
       <title>Using the Web as an Implicit Training Set: Application to Structural Ambiguity Resolution</title>
-      <author><first>Preslav</first><last>Nakov</last></author>
-      <author><first>Marti</first><last>Hearst</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
+      <author id="marti-a-hearst"><first>Marti</first><last>Hearst</last></author>
       <pages>835–842</pages>
       <url hash="fb813d9a">H05-1105</url>
       <bibkey>nakov-hearst-2005-using</bibkey>
@@ -934,7 +934,7 @@
     </paper>
     <paper id="108">
       <title>Cross-linguistic Projection of Role-Semantic Information</title>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <author><first>Mirella</first><last>Lapata</last></author>
       <pages>859–866</pages>
       <url hash="9cc3bd4d">H05-1108</url>
@@ -957,7 +957,7 @@
     </paper>
     <paper id="111">
       <title>Exploiting a Verb Lexicon in Automatic Semantic Role Labelling</title>
-      <author><first>Robert</first><last>Swier</last></author>
+      <author id="robert-s-swier"><first>Robert</first><last>Swier</last></author>
       <author><first>Suzanne</first><last>Stevenson</last></author>
       <pages>883–890</pages>
       <url hash="6ac9f01d">H05-1111</url>
@@ -965,8 +965,8 @@
     </paper>
     <paper id="112">
       <title>A Semantic Scattering Model for the Automatic Interpretation of Genitives</title>
-      <author><first>Dan</first><last>Moldovan</last></author>
-      <author><first>Adriana</first><last>Badulescu</last></author>
+      <author id="dan-moldovan"><first>Dan</first><last>Moldovan</last></author>
+      <author id="adriana-badulescu"><first>Adriana</first><last>Badulescu</last></author>
       <pages>891–898</pages>
       <url hash="11545e70">H05-1112</url>
       <bibkey>moldovan-badulescu-2005-semantic</bibkey>
@@ -974,25 +974,25 @@
     <paper id="113">
       <title>Measuring the Relative Compositionality of Verb-Noun (<fixed-case>V</fixed-case>-N) Collocations by Integrating Features</title>
       <author><first>Sriram</first><last>Venkatapathy</last></author>
-      <author><first>Aravind</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
       <pages>899–906</pages>
       <url hash="28d8c9f6">H05-1113</url>
       <bibkey>venkatapathy-joshi-2005-measuring</bibkey>
     </paper>
     <paper id="114">
       <title>A Semi-Supervised Feature Clustering Algorithm with Application to Word Sense Disambiguation</title>
-      <author><first>Zheng-Yu</first><last>Niu</last></author>
-      <author><first>Dong-Hong</first><last>Ji</last></author>
-      <author><first>Chew Lim</first><last>Tan</last></author>
+      <author id="zheng-yu-niu"><first>Zheng-Yu</first><last>Niu</last></author>
+      <author id="donghong-ji"><first>Dong-Hong</first><last>Ji</last></author>
+      <author id="chew-lim-tan"><first>Chew Lim</first><last>Tan</last></author>
       <pages>907–914</pages>
       <url hash="2e0b7e39">H05-1114</url>
       <bibkey>niu-etal-2005-semi</bibkey>
     </paper>
     <paper id="115">
       <title>Using Random Walks for Question-focused Sentence Retrieval</title>
-      <author><first>Jahna</first><last>Otterbacher</last></author>
-      <author><first>Güneş</first><last>Erkan</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="jahna-otterbacher"><first>Jahna</first><last>Otterbacher</last></author>
+      <author id="gunes-erkan"><first>Güneş</first><last>Erkan</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <pages>915–922</pages>
       <url hash="faae17f6">H05-1115</url>
       <bibkey>otterbacher-etal-2005-using</bibkey>
@@ -1000,8 +1000,8 @@
     <paper id="116">
       <title>Multi-Perspective Question Answering Using the <fixed-case>O</fixed-case>p<fixed-case>QA</fixed-case> Corpus</title>
       <author><first>Veselin</first><last>Stoyanov</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <pages>923–930</pages>
       <url hash="3d3500fc">H05-1116</url>
       <bibkey>stoyanov-etal-2005-multi</bibkey>
@@ -1016,7 +1016,7 @@
     </paper>
     <paper id="118">
       <title>Integrating Linguistic Knowledge in Passage Retrieval for Question Answering</title>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>939–946</pages>
       <url hash="f5f32323">H05-1118</url>
       <bibkey>tiedemann-2005-integrating</bibkey>
@@ -1050,7 +1050,7 @@
     </paper>
     <paper id="122">
       <title>An Orthonormal Basis for Topic Segmentation in Tutorial Dialogue</title>
-      <author><first>Andrew</first><last>Olney</last></author>
+      <author id="andrew-olney"><first>Andrew</first><last>Olney</last></author>
       <author><first>Zhiqiang</first><last>Cai</last></author>
       <pages>971–978</pages>
       <url hash="8fe6671d">H05-1122</url>
@@ -1076,13 +1076,13 @@
     </paper>
     <paper id="125">
       <title>The Vocal Joystick: A Voice-Based Human-Computer Interface for Individuals with Motor Impairments</title>
-      <author><first>Jeff A.</first><last>Bilmes</last></author>
+      <author id="jeff-bilmes"><first>Jeff A.</first><last>Bilmes</last></author>
       <author><first>Xiao</first><last>Li</last></author>
       <author><first>Jonathan</first><last>Malkin</last></author>
       <author><first>Kelley</first><last>Kilanski</last></author>
       <author><first>Richard</first><last>Wright</last></author>
       <author><first>Katrin</first><last>Kirchhoff</last></author>
-      <author><first>Amar</first><last>Subramanya</last></author>
+      <author id="amarnag-subramanya"><first>Amar</first><last>Subramanya</last></author>
       <author><first>Susumu</first><last>Harada</last></author>
       <author><first>James</first><last>Landay</last></author>
       <author><first>Patricia</first><last>Dowden</last></author>
@@ -1102,7 +1102,7 @@
     <paper id="127">
       <title>Learning Mixed Initiative Dialog Strategies By Using Reinforcement Learning On Both Conversants</title>
       <author><first>Michael</first><last>English</last></author>
-      <author><first>Peter</first><last>Heeman</last></author>
+      <author id="peter-a-heeman"><first>Peter</first><last>Heeman</last></author>
       <pages>1011–1018</pages>
       <url hash="a6ed71d3">H05-1127</url>
       <bibkey>english-heeman-2005-learning</bibkey>
@@ -1112,7 +1112,7 @@
     <meta>
       <booktitle>Proceedings of <fixed-case>HLT</fixed-case>/<fixed-case>EMNLP</fixed-case> 2005 Interactive Demonstrations</booktitle>
       <url hash="68f7f99e">H05-2</url>
-      <editor><first>Donna</first><last>Byron</last></editor>
+      <editor id="donna-byron"><first>Donna</first><last>Byron</last></editor>
       <editor><first>Anand</first><last>Venkataraman</last></editor>
       <editor><first>Dell</first><last>Zhang</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -1128,7 +1128,7 @@
     </frontmatter>
     <paper id="1">
       <title>Automatic Detection of Translation Errors: The State of the Art</title>
-      <author><first>Graham</first><last>Russell</last></author>
+      <author id="graham-russell"><first>Graham</first><last>Russell</last></author>
       <author><first>George</first><last>Foster</last></author>
       <author><first>Ngoc Tran</first><last>Nguyen</last></author>
       <pages>1</pages>
@@ -1149,8 +1149,8 @@
       <title><fixed-case>C</fixed-case>lassummary: Introducing Discussion Summarization to Online Classrooms</title>
       <author><first>Liang</first><last>Zhou</last></author>
       <author><first>Erin</first><last>Shaw</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>4–5</pages>
       <url hash="223822b6">H05-2003</url>
       <bibkey>zhou-etal-2005-classummary</bibkey>
@@ -1160,7 +1160,7 @@
       <author><first>Vasin</first><last>Punyakanok</last></author>
       <author><first>Dan</first><last>Roth</last></author>
       <author><first>Mark</first><last>Sammons</last></author>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <pages>6–7</pages>
       <url hash="46c49276">H05-2004</url>
       <bibkey>punyakanok-etal-2005-demonstrating</bibkey>
@@ -1177,7 +1177,7 @@
     </paper>
     <paper id="6">
       <title><fixed-case>N</fixed-case>oo<fixed-case>J</fixed-case>: a Linguistic Annotation System for Corpus Processing</title>
-      <author><first>Max</first><last>Silberztein</last></author>
+      <author id="max-silberztein"><first>Max</first><last>Silberztein</last></author>
       <pages>10–11</pages>
       <url hash="6a4744a6">H05-2006</url>
       <bibkey>silberztein-2005-nooj</bibkey>
@@ -1192,7 +1192,7 @@
     </paper>
     <paper id="8">
       <title><fixed-case>P</fixed-case>rague Dependency Treebank as an Exercise Book of <fixed-case>C</fixed-case>zech</title>
-      <author><first>Barbora</first><last>Hladká</last></author>
+      <author id="barbora-hladka"><first>Barbora</first><last>Hladká</last></author>
       <author><first>Ondřej</first><last>Kučera</last></author>
       <pages>14–15</pages>
       <url hash="2c8f5b63">H05-2008</url>
@@ -1217,7 +1217,7 @@
     <paper id="11">
       <title><fixed-case>D</fixed-case>ialogue<fixed-case>V</fixed-case>iew: an Annotation Tool for Dialogue</title>
       <author><first>Fan</first><last>Yang</last></author>
-      <author><first>Peter A.</first><last>Heeman</last></author>
+      <author id="peter-a-heeman"><first>Peter A.</first><last>Heeman</last></author>
       <pages>20–21</pages>
       <url hash="d88e451f">H05-2011</url>
       <bibkey>yang-heeman-2005-dialogueview</bibkey>
@@ -1240,7 +1240,7 @@
       <author><first>Danilo</first><last>Mirkovic</last></author>
       <author><first>Ben</first><last>Bei</last></author>
       <author><first>Heather</first><last>Pon-Barry</last></author>
-      <author><first>Harry</first><last>Bratt</last></author>
+      <author id="harry-bratt"><first>Harry</first><last>Bratt</last></author>
       <author><first>Hua</first><last>Cheng</last></author>
       <author><first>Hauke</first><last>Schmidt</last></author>
       <author><first>Rohit</first><last>Mishra</last></author>
@@ -1258,13 +1258,13 @@
     </paper>
     <paper id="14">
       <title><fixed-case>J</fixed-case>apanese Speech Understanding using Grammar Specialization</title>
-      <author><first>Manny</first><last>Rayner</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
       <author><first>Nikos</first><last>Chatzichrisafis</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Yukie</first><last>Nakao</last></author>
       <author><first>Hitoshi</first><last>Isahara</last></author>
       <author><first>Kyoko</first><last>Kanzaki</last></author>
-      <author><first>Beth Ann</first><last>Hockey</last></author>
+      <author id="beth-ann-hockey"><first>Beth Ann</first><last>Hockey</last></author>
       <author><first>Marianne</first><last>Santaholma</last></author>
       <author><first>Marianne</first><last>Starlander</last></author>
       <pages>26–27</pages>
@@ -1273,9 +1273,9 @@
     </paper>
     <paper id="15">
       <title>The <fixed-case>M</fixed-case><fixed-case>I</fixed-case><fixed-case>T</fixed-case> Spoken Lecture Processing Project</title>
-      <author><first>James R.</first><last>Glass</last></author>
-      <author><first>Timothy J.</first><last>Hazen</last></author>
-      <author><first>D. Scott</first><last>Cyphers</last></author>
+      <author id="james-glass"><first>James R.</first><last>Glass</last></author>
+      <author id="timothy-j-hazen"><first>Timothy J.</first><last>Hazen</last></author>
+      <author id="scott-cyphers"><first>D. Scott</first><last>Cyphers</last></author>
       <author><first>Ken</first><last>Schutte</last></author>
       <author><first>Alex</first><last>Park</last></author>
       <pages>28–29</pages>
@@ -1306,10 +1306,10 @@
       <author><first>Paul</first><last>Hoffmann</last></author>
       <author><first>Swapna</first><last>Somasundaran</last></author>
       <author><first>Jason</first><last>Kessler</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <author><first>Yejin</first><last>Choi</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
-      <author><first>Ellen</first><last>Riloff</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
       <author><first>Siddharth</first><last>Patwardhan</last></author>
       <pages>34–35</pages>
       <url hash="4470028b">H05-2018</url>
@@ -1319,7 +1319,7 @@
       <title><fixed-case>P</fixed-case><fixed-case>O</fixed-case><fixed-case>S</fixed-case><fixed-case>B</fixed-case><fixed-case>I</fixed-case><fixed-case>O</fixed-case><fixed-case>T</fixed-case><fixed-case>M</fixed-case>/<fixed-case>W</fixed-case>: A Development Workbench for Machine Learning Oriented Biomedical Text Mining System</title>
       <author><first>Kyungduk</first><last>Kim</last></author>
       <author><first>Yu</first><last>Song</last></author>
-      <author><first>Gary Geunbae</first><last>Lee</last></author>
+      <author id="gary-geunbae-lee"><first>Gary Geunbae</first><last>Lee</last></author>
       <pages>36–37</pages>
       <url hash="a44f48a4">H05-2019</url>
       <bibkey>kim-etal-2005-posbiotm</bibkey>
diff --git a/data/xml/H86.xml b/data/xml/H86.xml
index 7ca746ff88..3993166a64 100644
--- a/data/xml/H86.xml
+++ b/data/xml/H86.xml
@@ -12,109 +12,109 @@
     </frontmatter>
     <paper id="1">
       <title>Research and Development in Natural Language Processing at <fixed-case>BBN</fixed-case> <fixed-case>L</fixed-case>aboratories in the <fixed-case>S</fixed-case>trategic <fixed-case>C</fixed-case>omputing <fixed-case>P</fixed-case>rogram</title>
-      <author><first>Ralph</first><last>Weischedel</last></author>
-      <author><first>Remko</first><last>Scha</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
+      <author id="remko-scha"><first>Remko</first><last>Scha</last></author>
       <author><first>Edward</first><last>Walker</last></author>
-      <author><first>Damaris</first><last>Ayuso</last></author>
-      <author><first>Andrew</first><last>Haas</last></author>
-      <author><first>Erhard</first><last>Hinrichs</last></author>
-      <author><first>Robert</first><last>Ingria</last></author>
-      <author><first>Lance</first><last>Ramshaw</last></author>
+      <author id="damaris-ayuso"><first>Damaris</first><last>Ayuso</last></author>
+      <author id="andrew-haas"><first>Andrew</first><last>Haas</last></author>
+      <author id="erhard-hinrichs"><first>Erhard</first><last>Hinrichs</last></author>
+      <author id="robert-ingria"><first>Robert</first><last>Ingria</last></author>
+      <author id="lance-ramshaw"><first>Lance</first><last>Ramshaw</last></author>
       <author><first>Varda</first><last>Shaked</last></author>
-      <author><first>David</first><last>Stallard</last></author>
+      <author id="david-stallard"><first>David</first><last>Stallard</last></author>
       <url hash="8b0ef01f">H86-1001</url>
       <bibkey>weischedel-etal-1986-research</bibkey>
     </paper>
     <paper id="2">
       <title><fixed-case>PROTEUS</fixed-case> and <fixed-case>PUNDIT</fixed-case>: <fixed-case>RESEARCH</fixed-case> <fixed-case>IN</fixed-case> <fixed-case>TEXT</fixed-case> <fixed-case>UNDERSTANDING</fixed-case> at the <fixed-case>D</fixed-case>epartment of <fixed-case>C</fixed-case>omputer <fixed-case>S</fixed-case>cience, <fixed-case>N</fixed-case>ew <fixed-case>Y</fixed-case>ork <fixed-case>U</fixed-case>niversity and <fixed-case>S</fixed-case>ystem <fixed-case>D</fixed-case>evelopment <fixed-case>C</fixed-case>orporation -- A <fixed-case>B</fixed-case>urroughs Company</title>
-      <author><first>Ralph</first><last>Grishman</last></author>
-      <author><first>Lynette</first><last>Hirschman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
+      <author id="lynette-hirschman"><first>Lynette</first><last>Hirschman</last></author>
       <url hash="93cc266b">H86-1002</url>
       <bibkey>grishman-hirschman-1986-proteus</bibkey>
     </paper>
     <paper id="3">
       <title>Overview of the <fixed-case>TACITUS</fixed-case> Project</title>
-      <author><first>Jerry R.</first><last>Hobbs</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry R.</first><last>Hobbs</last></author>
       <url hash="9c5a3c46">H86-1003</url>
       <bibkey>hobbs-1986-overview</bibkey>
     </paper>
     <paper id="4">
       <title>The <fixed-case>C</fixed-case>ounselor Project at the <fixed-case>U</fixed-case>niversity of <fixed-case>M</fixed-case>assachusetts</title>
-      <author><first>David D.</first><last>McDonald</last></author>
+      <author id="david-d-mcdonald"><first>David D.</first><last>McDonald</last></author>
       <author><first>James D.</first><last>Pustejowky</last></author>
       <url hash="8671b833">H86-1004</url>
       <bibkey>mcdonald-pustejowky-1986-counselor</bibkey>
     </paper>
     <paper id="5">
       <title>Research in Natural Language Processing</title>
-      <author><first>Aravind</first><last>Joshi</last></author>
-      <author><first>Tim</first><last>Finin</last></author>
-      <author><first>Dale</first><last>Miller</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
+      <author id="tim-finin"><first>Tim</first><last>Finin</last></author>
+      <author id="dale-a-miller"><first>Dale</first><last>Miller</last></author>
       <author><first>Lokendra</first><last>Shastri</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <url hash="d7497b85">H86-1005</url>
       <bibkey>joshi-etal-1986-research</bibkey>
     </paper>
     <paper id="6">
       <title>Text Generation for Strategic Computing</title>
-      <author><first>William</first><last>Mann</last></author>
-      <author><first>Norman</first><last>Sondheimer</last></author>
+      <author id="william-c-mann"><first>William</first><last>Mann</last></author>
+      <author id="norman-k-sondheimer"><first>Norman</first><last>Sondheimer</last></author>
       <url hash="c029c475">H86-1006</url>
       <bibkey>mann-sondheimer-1986-text</bibkey>
     </paper>
     <paper id="7">
       <title>Out of the Laboratory: A Case Study with the <fixed-case>IRUS</fixed-case> Natural Language Interface</title>
-      <author><first>Ralph M.</first><last>Weischedel</last></author>
+      <author id="ralph-weischedel"><first>Ralph M.</first><last>Weischedel</last></author>
       <author><first>Edward</first><last>Walker</last></author>
-      <author><first>Damaris</first><last>Ayuso</last></author>
+      <author id="damaris-ayuso"><first>Damaris</first><last>Ayuso</last></author>
       <author><first>Jos</first><last>de Bruin</last></author>
       <author><first>Kimberle</first><last>Koile</last></author>
-      <author><first>Lance</first><last>Ramshaw</last></author>
+      <author id="lance-ramshaw"><first>Lance</first><last>Ramshaw</last></author>
       <author><first>Varda</first><last>Shaked</last></author>
       <url hash="3b35705d">H86-1007</url>
       <bibkey>weischedel-etal-1986-laboratory</bibkey>
     </paper>
     <paper id="8">
       <title>A Terminological Simplification Transformation for Natural Language Question-Answering Systems</title>
-      <author><first>David G.</first><last>Stallard</last></author>
+      <author id="david-stallard"><first>David G.</first><last>Stallard</last></author>
       <url hash="0895e859">H86-1008</url>
       <bibkey>stallard-1986-terminological</bibkey>
     </paper>
     <paper id="9">
       <title>Model-based Analysis of Messages about Equipment</title>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <author><first>Tomasz</first><last>Ksiezyk</last></author>
-      <author><first>Ngo Thanh</first><last>Nhan</last></author>
+      <author id="ngo-thanh-nhan"><first>Ngo Thanh</first><last>Nhan</last></author>
       <url hash="e807717b">H86-1009</url>
       <bibkey>grishman-etal-1986-model</bibkey>
     </paper>
     <paper id="10">
       <title>An Equipment Model and Its Role in the Interpretation of Nominal Compounds</title>
       <author><first>Tomasz</first><last>Ksiezyk</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <url hash="e84316e0">H86-1010</url>
       <bibkey>ksiezyk-grishman-1986-equipment</bibkey>
     </paper>
     <paper id="11">
       <title>Recovering Implicit Information</title>
-      <author><first>Martha S.</first><last>Palmer</last></author>
-      <author><first>Deborah A.</first><last>Dahl</last></author>
+      <author id="martha-palmer"><first>Martha S.</first><last>Palmer</last></author>
+      <author id="deborah-a-dahl"><first>Deborah A.</first><last>Dahl</last></author>
       <author><first>Rebecca J.</first><last>Schiffman</last></author>
-      <author><first>Lynette</first><last>Hirschman</last></author>
-      <author><first>Marcia</first><last>Linebarger</last></author>
-      <author><first>John</first><last>Dowding</last></author>
+      <author id="lynette-hirschman"><first>Lynette</first><last>Hirschman</last></author>
+      <author id="marcia-c-linebarger"><first>Marcia</first><last>Linebarger</last></author>
+      <author id="john-dowding"><first>John</first><last>Dowding</last></author>
       <url hash="71dde8c7">H86-1011</url>
       <bibkey>palmer-etal-1986-recovering</bibkey>
     </paper>
     <paper id="12">
       <title>Focusing and Reference Resolution in <fixed-case>PUNDIT</fixed-case></title>
-      <author><first>Deborah A.</first><last>Dahl</last></author>
+      <author id="deborah-a-dahl"><first>Deborah A.</first><last>Dahl</last></author>
       <url hash="27da1d9a">H86-1012</url>
       <bibkey>dahl-1986-focusing</bibkey>
     </paper>
     <paper id="13">
       <title>Commonsense Metaphysics and Lexical Semantics</title>
-      <author><first>Jerry R.</first><last>Hobbs</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry R.</first><last>Hobbs</last></author>
       <author><first>William</first><last>Croft</last></author>
       <author><first>Todd</first><last>Davies</last></author>
       <author><first>Douglas</first><last>Edwards</last></author>
@@ -124,35 +124,35 @@
     </paper>
     <paper id="14">
       <title>Multi-Level Description Directed Generation</title>
-      <author><first>David D.</first><last>McDonald</last></author>
+      <author id="david-d-mcdonald"><first>David D.</first><last>McDonald</last></author>
       <url hash="acb1c958">H86-1014</url>
       <bibkey>mcdonald-1986-multi</bibkey>
     </paper>
     <paper id="15">
       <title><fixed-case>TAG</fixed-case>’s as a Grammatical Formalism for Generation</title>
-      <author><first>David D.</first><last>McDonald</last></author>
-      <author><first>James D.</first><last>Pustejovsky</last></author>
+      <author id="david-d-mcdonald"><first>David D.</first><last>McDonald</last></author>
+      <author id="james-pustejovsky"><first>James D.</first><last>Pustejovsky</last></author>
       <url hash="975386b6">H86-1015</url>
       <bibkey>mcdonald-pustejovsky-1986-tags</bibkey>
     </paper>
     <paper id="16">
       <title>Hypotheticals as Heuristic Device</title>
       <author><first>Edwina L.</first><last>Rissland</last></author>
-      <author><first>Kevin D.</first><last>Ashley</last></author>
+      <author id="kevin-d-ashley"><first>Kevin D.</first><last>Ashley</last></author>
       <url hash="50f750fe">H86-1016</url>
       <bibkey>rissland-ashley-1986-hypotheticals</bibkey>
     </paper>
     <paper id="17">
       <title>Living Up to Expectations: Computing Expert Responses</title>
-      <author><first>Aravind</first><last>Joshi</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
-      <author><first>Ralph M.</first><last>Weischedel</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
+      <author id="ralph-weischedel"><first>Ralph M.</first><last>Weischedel</last></author>
       <url hash="307ae820">H86-1017</url>
       <bibkey>joshi-etal-1986-living</bibkey>
     </paper>
     <paper id="18">
       <title>The Role of Perspective in Responding to Property Misconceptions</title>
-      <author><first>Kathleen F.</first><last>McCoy</last></author>
+      <author id="kathleen-f-mccoy"><first>Kathleen F.</first><last>McCoy</last></author>
       <url hash="9b33e3a0">H86-1018</url>
       <bibkey>mccoy-1986-role</bibkey>
     </paper>
@@ -164,21 +164,21 @@
     </paper>
     <paper id="20">
       <title>Some Computational Properties of <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars</title>
-      <author><first>K.</first><last>Vijay-Shankar</last></author>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="k-vijay-shanker"><first>K.</first><last>Vijay-Shankar</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
       <url hash="ccc35e0e">H86-1020</url>
       <bibkey>vijay-shankar-joshi-1986-computational</bibkey>
     </paper>
     <paper id="21">
       <title><fixed-case>GUMS</fixed-case>₁ : A General User Modeling System</title>
-      <author><first>Tim</first><last>Finin</last></author>
+      <author id="tim-finin"><first>Tim</first><last>Finin</last></author>
       <author><first>David</first><last>Drager</last></author>
       <url hash="15eb6fa6">H86-1021</url>
       <bibkey>finin-drager-1986-gums1</bibkey>
     </paper>
     <paper id="22">
       <title>A Logical-Form and Knowledge-Base Design for Natural Language Generation</title>
-      <author><first>Norman K.</first><last>Sondheimer</last></author>
+      <author id="norman-k-sondheimer"><first>Norman K.</first><last>Sondheimer</last></author>
       <author><first>Bernhard</first><last>Nebel</last></author>
       <url hash="b7147134">H86-1022</url>
       <bibkey>sondheimer-nebel-1986-logical</bibkey>
@@ -191,7 +191,7 @@
     </paper>
     <paper id="24">
       <title>Assertions from Discourse Structure</title>
-      <author><first>William C.</first><last>Mann</last></author>
+      <author id="william-c-mann"><first>William C.</first><last>Mann</last></author>
       <author><first>Sandra A.</first><last>Thompson</last></author>
       <url hash="0d41a0dc">H86-1024</url>
       <bibkey>mann-thompson-1986-assertions</bibkey>
diff --git a/data/xml/H89.xml b/data/xml/H89.xml
index f1ec812317..861bfeb302 100644
--- a/data/xml/H89.xml
+++ b/data/xml/H89.xml
@@ -12,7 +12,7 @@
     </frontmatter>
     <paper id="1">
       <title>Overview of the <fixed-case>DARPA</fixed-case> Speech and Natural Language Workshop</title>
-      <author><first>Lynette</first><last>Hirshman</last></author>
+      <author id="lynette-hirschman"><first>Lynette</first><last>Hirshman</last></author>
       <url hash="013e0838">H89-1001</url>
       <bibkey>hirshman-1989-overview</bibkey>
     </paper>
@@ -42,20 +42,20 @@
     </paper>
     <paper id="6">
       <title>Research in Continuous Speech Recognition</title>
-      <author><first>John</first><last>Makhoul</last></author>
-      <author><first>Richard</first><last>Schwartz</last></author>
+      <author id="john-makhoul"><first>John</first><last>Makhoul</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
       <url hash="30d3d38a">H89-1006</url>
       <bibkey>makhoul-schwartz-1989-research</bibkey>
     </paper>
     <paper id="7">
       <title>Integrating Speech and Natural Language</title>
-      <author><first>Salim</first><last>Roukos</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
       <url hash="3821ef15">H89-1007</url>
       <bibkey>roukos-1989-integrating</bibkey>
     </paper>
     <paper id="8">
       <title>Rapid Porting of the <fixed-case>P</fixed-case>arlance™ Natural Language Interface</title>
-      <author><first>Madeleine</first><last>Bates</last></author>
+      <author id="madeleine-bates"><first>Madeleine</first><last>Bates</last></author>
       <url hash="21c09bc0">H89-1008</url>
       <bibkey>bates-1989-rapid</bibkey>
     </paper>
@@ -67,44 +67,44 @@
     </paper>
     <paper id="10">
       <title>The <fixed-case>BBN</fixed-case> <fixed-case>BYBLOS</fixed-case> Continuous Speech Recognition System</title>
-      <author><first>Richard</first><last>Schwartz</last></author>
-      <author><first>Chris</first><last>Barry</last></author>
-      <author><first>Yen-Lu</first><last>Chow</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
+      <author id="chris-barry"><first>Chris</first><last>Barry</last></author>
+      <author id="yen-lu-chow"><first>Yen-Lu</first><last>Chow</last></author>
       <author><first>Alan</first><last>Deft</last></author>
       <author><first>Ming-Whei</first><last>Feng</last></author>
-      <author><first>Owen</first><last>Kimball</last></author>
-      <author><first>Francis</first><last>Kubala</last></author>
-      <author><first>John</first><last>Makhoul</last></author>
+      <author id="owen-kimball"><first>Owen</first><last>Kimball</last></author>
+      <author id="francis-kubala"><first>Francis</first><last>Kubala</last></author>
+      <author id="john-makhoul"><first>John</first><last>Makhoul</last></author>
       <author><first>Jeffrey</first><last>Vandegrift</last></author>
       <url hash="c2446189">H89-1010</url>
       <bibkey>schwartz-etal-1989-bbn</bibkey>
     </paper>
     <paper id="11">
       <title>Speaker Adaptation from Limited Training in the <fixed-case>BBN</fixed-case> <fixed-case>BYBLOS</fixed-case> Speech Recognition System</title>
-      <author><first>Francis</first><last>Kubala</last></author>
+      <author id="francis-kubala"><first>Francis</first><last>Kubala</last></author>
       <author><first>Ming-Whei</first><last>Feng</last></author>
-      <author><first>John</first><last>Makhoul</last></author>
-      <author><first>Richard</first><last>Schwartz</last></author>
+      <author id="john-makhoul"><first>John</first><last>Makhoul</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
       <url hash="efa248f5">H89-1011</url>
       <bibkey>kubala-etal-1989-speaker</bibkey>
     </paper>
     <paper id="12">
       <title>The <fixed-case>BBN</fixed-case> Spoken Language System</title>
-      <author><first>Sean</first><last>Boisen</last></author>
-      <author><first>Yen-Lu</first><last>Chow</last></author>
-      <author><first>Andrew</first><last>Haas</last></author>
-      <author><first>Robert</first><last>Ingria</last></author>
-      <author><first>Salim</first><last>Roukos</last></author>
-      <author><first>David</first><last>Stallard</last></author>
+      <author id="sean-boisen"><first>Sean</first><last>Boisen</last></author>
+      <author id="yen-lu-chow"><first>Yen-Lu</first><last>Chow</last></author>
+      <author id="andrew-haas"><first>Andrew</first><last>Haas</last></author>
+      <author id="robert-ingria"><first>Robert</first><last>Ingria</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
+      <author id="david-stallard"><first>David</first><last>Stallard</last></author>
       <url hash="804064f9">H89-1012</url>
       <bibkey>boisen-etal-1989-bbn</bibkey>
     </paper>
     <paper id="13">
       <title>Portability in the <fixed-case>J</fixed-case>anus Natural Language Interface</title>
-      <author><first>Ralph M.</first><last>Weischedel</last></author>
-      <author><first>Robert J.</first><last>Bobrow</last></author>
-      <author><first>Damaris</first><last>Ayuso</last></author>
-      <author><first>Lance</first><last>Ramshaw</last></author>
+      <author id="ralph-weischedel"><first>Ralph M.</first><last>Weischedel</last></author>
+      <author id="robert-bobrow"><first>Robert J.</first><last>Bobrow</last></author>
+      <author id="damaris-ayuso"><first>Damaris</first><last>Ayuso</last></author>
+      <author id="lance-ramshaw"><first>Lance</first><last>Ramshaw</last></author>
       <url hash="3a4ae9ce">H89-1013</url>
       <bibkey>weischedel-etal-1989-portability</bibkey>
     </paper>
@@ -116,34 +116,34 @@
     </paper>
     <paper id="15">
       <title>The design of voice-driven interfaces</title>
-      <author><first>Alexander I.</first><last>Rudnicky</last></author>
+      <author id="alexander-rudnicky"><first>Alexander I.</first><last>Rudnicky</last></author>
       <url hash="46fc781f">H89-1015</url>
       <bibkey>rudnicky-1989-design</bibkey>
     </paper>
     <paper id="16">
       <title>Recent Progress in the <fixed-case>S</fixed-case>phinx Speech Recognition System</title>
-      <author><first>Kai-Fu</first><last>Lee</last></author>
-      <author><first>Hsiao-Wuen</first><last>Hon</last></author>
-      <author><first>Mei-Yuh</first><last>Hwang</last></author>
+      <author id="kai-fu-lee"><first>Kai-Fu</first><last>Lee</last></author>
+      <author id="hsiao-wuen-hon"><first>Hsiao-Wuen</first><last>Hon</last></author>
+      <author id="mei-yuh-hwang"><first>Mei-Yuh</first><last>Hwang</last></author>
       <url hash="509722fc">H89-1016</url>
       <bibkey>lee-etal-1989-recent</bibkey>
     </paper>
     <paper id="17">
       <title>The <fixed-case>MINDS</fixed-case> System: Using Context and Dialog to Enhance Speech Recognition</title>
-      <author><first>Sheryl R.</first><last>Young</last></author>
+      <author id="sheryl-young"><first>Sheryl R.</first><last>Young</last></author>
       <url hash="007ffd6d">H89-1017</url>
       <bibkey>young-1989-minds</bibkey>
     </paper>
     <paper id="18">
       <title>Understanding Spontaneous Speech</title>
-      <author><first>Wayne</first><last>Ward</last></author>
+      <author id="wayne-ward"><first>Wayne</first><last>Ward</last></author>
       <url hash="ab778db9">H89-1018</url>
       <bibkey>ward-1989-understanding</bibkey>
     </paper>
     <paper id="19">
       <title><fixed-case>D</fixed-case>ragon</title>
-      <author><first>Janet M.</first><last>Baker</last></author>
-      <author><first>James K.</first><last>Baker</last></author>
+      <author id="janet-baker"><first>Janet M.</first><last>Baker</last></author>
+      <author id="james-baker"><first>James K.</first><last>Baker</last></author>
       <url hash="aec8792b">H89-1019</url>
       <bibkey>baker-baker-1989-dragon</bibkey>
     </paper>
@@ -155,20 +155,20 @@
     </paper>
     <paper id="21">
       <title>The <fixed-case>P</fixed-case>enman Language Generation Project</title>
-      <author><first>William C.</first><last>Mann</last></author>
-      <author><first>Eduard H.</first><last>Hovy</last></author>
+      <author id="william-c-mann"><first>William C.</first><last>Mann</last></author>
+      <author id="eduard-hovy"><first>Eduard H.</first><last>Hovy</last></author>
       <url hash="2cbbea1e">H89-1021</url>
       <bibkey>mann-hovy-1989-penman</bibkey>
     </paper>
     <paper id="22">
       <title>A Flexible Interface for Linking Applications to <fixed-case>P</fixed-case>enman’s Sentence Generator</title>
-      <author><first>Robert T.</first><last>Kasper</last></author>
+      <author id="robert-t-kasper"><first>Robert T.</first><last>Kasper</last></author>
       <url hash="e095f697">H89-1022</url>
       <bibkey>kasper-1989-flexible</bibkey>
     </paper>
     <paper id="23">
       <title>Robust Speech Recognition</title>
-      <author><first>Clifford J.</first><last>Weinstein</last></author>
+      <author id="clifford-j-weinstein"><first>Clifford J.</first><last>Weinstein</last></author>
       <url hash="8c7c3d9b">H89-1023</url>
       <bibkey>weinstein-1989-robust</bibkey>
     </paper>
@@ -180,35 +180,35 @@
     </paper>
     <paper id="25">
       <title>Acoustic-Phonetics Based Speech Recognition</title>
-      <author><first>Victor W.</first><last>Zue</last></author>
+      <author id="victor-zue"><first>Victor W.</first><last>Zue</last></author>
       <url hash="9b6ed79b">H89-1025</url>
       <bibkey>zue-1989-acoustic</bibkey>
     </paper>
     <paper id="26">
       <title><fixed-case>TINA</fixed-case>: A Probabilistic Syntactic Parser for Speech Understanding Systems</title>
-      <author><first>Stephanie</first><last>Seneff</last></author>
+      <author id="stephanie-seneff"><first>Stephanie</first><last>Seneff</last></author>
       <url hash="47bbbf4e">H89-1026</url>
       <bibkey>seneff-1989-tina</bibkey>
     </paper>
     <paper id="27">
       <title>The <fixed-case>MIT</fixed-case> <fixed-case>SUMMIT</fixed-case> Speech Recognition System: A Progress Report</title>
-      <author><first>Victor</first><last>Zue</last></author>
-      <author><first>James</first><last>Glass</last></author>
-      <author><first>Michael</first><last>Phillips</last></author>
-      <author><first>Stephanie</first><last>Seneff</last></author>
+      <author id="victor-zue"><first>Victor</first><last>Zue</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
+      <author id="michael-phillips"><first>Michael</first><last>Phillips</last></author>
+      <author id="stephanie-seneff"><first>Stephanie</first><last>Seneff</last></author>
       <url hash="802464c8">H89-1027</url>
       <bibkey>zue-etal-1989-mit</bibkey>
     </paper>
     <paper id="28">
       <title><fixed-case>N</fixed-case>ATIONAL <fixed-case>I</fixed-case>NSTITUTE OF <fixed-case>S</fixed-case>TANDARDS AND <fixed-case>T</fixed-case>ECHNOLOGY (<fixed-case>NIST</fixed-case>) (Formerly <fixed-case>N</fixed-case>ational <fixed-case>B</fixed-case>ureau of <fixed-case>S</fixed-case>tandards)</title>
-      <author><first>David S.</first><last>Pallett</last></author>
+      <author id="david-s-pallett"><first>David S.</first><last>Pallett</last></author>
       <url hash="f3d1b342">H89-1028</url>
       <bibkey>pallett-1989-national</bibkey>
     </paper>
     <paper id="29">
       <title><fixed-case>N</fixed-case>ew <fixed-case>M</fixed-case>exico <fixed-case>S</fixed-case>tate <fixed-case>U</fixed-case>niversity <fixed-case>C</fixed-case>omputing <fixed-case>R</fixed-case>esearch <fixed-case>L</fixed-case>aboratory</title>
-      <author><first>Yorick</first><last>Wilks</last></author>
-      <author><first>David</first><last>Farwell</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
+      <author id="david-farwell"><first>David</first><last>Farwell</last></author>
       <author><first>Afzal</first><last>Ballim</last></author>
       <author><first>Roger</first><last>Hartley</last></author>
       <url hash="024f1ed0">H89-1029</url>
@@ -216,7 +216,7 @@
     </paper>
     <paper id="30">
       <title><fixed-case>N</fixed-case>aval <fixed-case>O</fixed-case>cean <fixed-case>S</fixed-case>ystems <fixed-case>C</fixed-case>enter</title>
-      <author><first>Beth</first><last>Sundheim</last></author>
+      <author id="beth-m-sundheim"><first>Beth</first><last>Sundheim</last></author>
       <url hash="1710b136">H89-1030</url>
       <bibkey>sundheim-1989-naval</bibkey>
     </paper>
@@ -229,38 +229,38 @@
     </paper>
     <paper id="32">
       <title>Plans for a Task-Oriented Evaluation of Natural Language Understanding Systems</title>
-      <author><first>Beth M.</first><last>Sundheim</last></author>
+      <author id="beth-m-sundheim"><first>Beth M.</first><last>Sundheim</last></author>
       <url hash="9c9b128e">H89-1032</url>
       <bibkey>sundheim-1989-plans</bibkey>
     </paper>
     <paper id="33">
       <title>Natural Language Understanding</title>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <url hash="b7234db1">H89-1033</url>
       <bibkey>grishman-1989-natural</bibkey>
     </paper>
     <paper id="34">
       <title>Analyzing Telegraphic Messages</title>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <author><first>John</first><last>Sterling</last></author>
       <url hash="cbd1d870">H89-1034</url>
       <bibkey>grishman-sterling-1989-analyzing</bibkey>
     </paper>
     <paper id="35">
       <title>Natural Language Research</title>
-      <author><first>Aravind</first><last>Joshi</last></author>
-      <author><first>Mitch</first><last>Marcus</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
+      <author id="mitch-marcus"><first>Mitch</first><last>Marcus</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <url hash="cf179465">H89-1035</url>
       <bibkey>joshi-etal-1989-natural</bibkey>
     </paper>
     <paper id="36">
       <title>Lexicalized <fixed-case>TAG</fixed-case>s, Parsing and Lexicons</title>
-      <author><first>Anne</first><last>Abeille</last></author>
+      <author id="anne-abeille"><first>Anne</first><last>Abeille</last></author>
       <author><first>Kathleen</first><last>Bishop</last></author>
       <author><first>Sharon</first><last>Cote</last></author>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
       <author><first>Yves</first><last>Schabes</last></author>
       <url hash="49101dfe">H89-1036</url>
       <bibkey>abeille-etal-1989-lexicalized</bibkey>
@@ -268,13 +268,13 @@
     <paper id="37">
       <title>Elements of a Computational Model of Cooperative Response Generation</title>
       <author><first>Brant A.</first><last>Cheikes</last></author>
-      <author><first>Bonnie L.</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie L.</first><last>Webber</last></author>
       <url hash="a63034f7">H89-1037</url>
       <bibkey>cheikes-webber-1989-elements</bibkey>
     </paper>
     <paper id="38">
       <title>Intonation and Syntax in Spoken Language Systems</title>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <url hash="78c723e2">H89-1038</url>
       <bibkey>steedman-1989-intonation</bibkey>
     </paper>
@@ -293,26 +293,26 @@
     <paper id="41">
       <title><fixed-case>SRI</fixed-case> <fixed-case>I</fixed-case>nternational, Speech Recognition Program, <fixed-case>M</fixed-case>enlo <fixed-case>P</fixed-case>ark, <fixed-case>CA</fixed-case></title>
       <author><first>Jared</first><last>Bemstein</last></author>
-      <author><first>Hy</first><last>Murveit</last></author>
+      <author id="hy-murveit"><first>Hy</first><last>Murveit</last></author>
       <url hash="b565edf9">H89-1041</url>
       <bibkey>bemstein-murveit-1989-sri</bibkey>
     </paper>
     <paper id="42">
       <title><fixed-case>SRI</fixed-case>’s <fixed-case>DECIPHER</fixed-case> System</title>
-      <author><first>Hy</first><last>Murveit</last></author>
+      <author id="hy-murveit"><first>Hy</first><last>Murveit</last></author>
       <author><first>Michael</first><last>Cohen</last></author>
-      <author><first>Patti</first><last>Price</last></author>
+      <author id="patti-price"><first>Patti</first><last>Price</last></author>
       <author><first>Gay</first><last>Baldwin</last></author>
-      <author><first>Mitch</first><last>Weintraub</last></author>
+      <author id="mitch-weintraub"><first>Mitch</first><last>Weintraub</last></author>
       <author><first>Jared</first><last>Bernstein</last></author>
       <url hash="4fcc99ee">H89-1042</url>
       <bibkey>murveit-etal-1989-sris</bibkey>
     </paper>
     <paper id="43">
       <title>Integrating Speech and Natural-Language Processing</title>
-      <author><first>Robert</first><last>Moore</last></author>
+      <author id="robert-c-moore"><first>Robert</first><last>Moore</last></author>
       <author><first>Fernando</first><last>Pereira</last></author>
-      <author><first>Hy</first><last>Murveit</last></author>
+      <author id="hy-murveit"><first>Hy</first><last>Murveit</last></author>
       <url hash="f9459ef7">H89-1043</url>
       <bibkey>moore-etal-1989-integrating</bibkey>
     </paper>
@@ -325,14 +325,14 @@
     </paper>
     <paper id="45">
       <title>Chart Parsing of Stochastic Spoken Language Models</title>
-      <author><first>Charles</first><last>Hemphill</last></author>
+      <author id="charles-t-hemphill"><first>Charles</first><last>Hemphill</last></author>
       <author><first>Joseph</first><last>Picone</last></author>
       <url hash="a7ae6e8a">H89-1045</url>
       <bibkey>hemphill-picone-1989-chart</bibkey>
     </paper>
     <paper id="46">
       <title>Initial Draft Guidelines for the Development of the Next-Generation Spoken Language Systems Speech Research Database</title>
-      <author><first>George R.</first><last>Doddington</last></author>
+      <author id="george-r-doddington"><first>George R.</first><last>Doddington</last></author>
       <url hash="8d271293">H89-1046</url>
       <bibkey>doddington-1989-initial</bibkey>
     </paper>
@@ -344,28 +344,28 @@
     </paper>
     <paper id="48">
       <title>Natural Language Understanding: Integrating Syntax, Semantics, and Discourse.</title>
-      <author><first>Lynette</first><last>Hirschman</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="lynette-hirschman"><first>Lynette</first><last>Hirschman</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <url hash="538e0376">H89-1048</url>
       <bibkey>hirschman-palmer-1989-natural</bibkey>
     </paper>
     <paper id="49">
       <title>Analyzing Explicitly-Structured Discourse in a Limited Domain: Trouble and Failure Reports</title>
-      <author><first>Catherine N.</first><last>Ball</last></author>
+      <author id="catherine-n-ball"><first>Catherine N.</first><last>Ball</last></author>
       <url hash="f549d6a9">H89-1049</url>
       <bibkey>ball-1989-analyzing</bibkey>
     </paper>
     <paper id="50">
       <title>Reducing Search by Partitioning the Word Network</title>
-      <author><first>John</first><last>Dowding</last></author>
+      <author id="john-dowding"><first>John</first><last>Dowding</last></author>
       <url hash="1c1e6d6d">H89-1050</url>
       <bibkey>dowding-1989-reducing</bibkey>
     </paper>
     <paper id="51">
       <title>Porting <fixed-case>PUNDIT</fixed-case> to the Resource Management Domain</title>
-      <author><first>Lynette</first><last>Hirschman</last></author>
-      <author><first>Francois-Michel</first><last>Lang</last></author>
-      <author><first>John</first><last>Dowding</last></author>
+      <author id="lynette-hirschman"><first>Lynette</first><last>Hirschman</last></author>
+      <author id="francois-michel-lang"><first>Francois-Michel</first><last>Lang</last></author>
+      <author id="john-dowding"><first>John</first><last>Dowding</last></author>
       <author><first>Carl</first><last>Weir</last></author>
       <url hash="e83e0713">H89-1051</url>
       <bibkey>hirschman-etal-1989-porting</bibkey>
@@ -373,13 +373,13 @@
     <paper id="52">
       <title>Analysis and Symbolic Processing of Unrestricted Speech</title>
       <author><first>M. Margaret</first><last>Withgott</last></author>
-      <author><first>Ronald M.</first><last>Kaplan</last></author>
+      <author id="ronald-m-kaplan"><first>Ronald M.</first><last>Kaplan</last></author>
       <url hash="72da89f5">H89-1052</url>
       <bibkey>withgott-kaplan-1989-analysis</bibkey>
     </paper>
     <paper id="53">
       <title>Automatic Discovery of Contextual Factors Describing Phonological Variation</title>
-      <author><first>Francine R.</first><last>Chen</last></author>
+      <author id="francine-chen"><first>Francine R.</first><last>Chen</last></author>
       <author><first>Jeff</first><last>Shrager</last></author>
       <url hash="399bb2f8">H89-1053</url>
       <bibkey>chen-shrager-1989-automatic</bibkey>
@@ -403,7 +403,7 @@
     </frontmatter>
     <paper id="1">
       <title>Report on Session <fixed-case>I</fixed-case>: Prosodic Aids to Speech Recognition</title>
-      <author><first>Lynette</first><last>Hirschman</last></author>
+      <author id="lynette-hirschman"><first>Lynette</first><last>Hirschman</last></author>
       <url hash="6f3dbbba">H89-2001</url>
       <bibkey>hirschman-1989-report</bibkey>
     </paper>
@@ -423,7 +423,7 @@
     </paper>
     <paper id="4">
       <title>Distinguishing Questions by Contour Speech Recognition Tasks</title>
-      <author><first>Julia</first><last>Hirschberg</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
       <url hash="0c53c89e">H89-2004</url>
       <bibkey>hirschberg-1989-distinguishing</bibkey>
     </paper>
@@ -435,65 +435,65 @@
     </paper>
     <paper id="6">
       <title>Unification-Based Semantic Interpretation in the <fixed-case>BBN</fixed-case> Spoken Language System</title>
-      <author><first>David</first><last>Stallard</last></author>
+      <author id="david-stallard"><first>David</first><last>Stallard</last></author>
       <url hash="9bd0faaf">H89-2006</url>
       <bibkey>stallard-1989-unification</bibkey>
     </paper>
     <paper id="7">
       <title>Modelling Non-verbal Sounds for Speech Recognition</title>
-      <author><first>Wayne</first><last>Ward</last></author>
+      <author id="wayne-ward"><first>Wayne</first><last>Ward</last></author>
       <url hash="447259ae">H89-2007</url>
       <bibkey>ward-1989-modelling</bibkey>
     </paper>
     <paper id="8">
       <title>The <fixed-case>VOYAGER</fixed-case> Speech Understanding System: A Progress Report</title>
-      <author><first>Victor</first><last>Zue</last></author>
-      <author><first>James</first><last>Glass</last></author>
-      <author><first>David</first><last>Goodine</last></author>
-      <author><first>Hong</first><last>Leung</last></author>
-      <author><first>Michael</first><last>Phillips</last></author>
-      <author><first>Joseph</first><last>Polifroni</last></author>
-      <author><first>Stephanie</first><last>Seneff</last></author>
+      <author id="victor-zue"><first>Victor</first><last>Zue</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
+      <author id="david-goodine"><first>David</first><last>Goodine</last></author>
+      <author id="hong-leung"><first>Hong</first><last>Leung</last></author>
+      <author id="michael-phillips"><first>Michael</first><last>Phillips</last></author>
+      <author id="joseph-polifroni"><first>Joseph</first><last>Polifroni</last></author>
+      <author id="stephanie-seneff"><first>Stephanie</first><last>Seneff</last></author>
       <url hash="5aefca77">H89-2008</url>
       <bibkey>zue-etal-1989-voyager</bibkey>
     </paper>
     <paper id="9">
       <title>Answers and Questions: Processing Messages and Queries</title>
-      <author><first>Catherine N.</first><last>Ball</last></author>
-      <author><first>Deborah</first><last>Dahl</last></author>
-      <author><first>Lewis M.</first><last>Norton</last></author>
-      <author><first>Lynette</first><last>Hirschman</last></author>
+      <author id="catherine-n-ball"><first>Catherine N.</first><last>Ball</last></author>
+      <author id="deborah-a-dahl"><first>Deborah</first><last>Dahl</last></author>
+      <author id="lewis-m-norton"><first>Lewis M.</first><last>Norton</last></author>
+      <author id="lynette-hirschman"><first>Lynette</first><last>Hirschman</last></author>
       <author><first>Carl</first><last>Weir</last></author>
-      <author><first>Marcia</first><last>Linebarger</last></author>
+      <author id="marcia-c-linebarger"><first>Marcia</first><last>Linebarger</last></author>
       <url hash="5694fd35">H89-2009</url>
       <bibkey>ball-etal-1989-answers</bibkey>
     </paper>
     <paper id="10">
       <title>Natural Language <fixed-case>I</fixed-case></title>
-      <author><first>Bonnie Lynn</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie Lynn</first><last>Webber</last></author>
       <url hash="c2aa8f71">H89-2010</url>
       <bibkey>webber-1989-natural</bibkey>
     </paper>
     <paper id="11">
       <title>Preference Semantics for Message Understanding</title>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <author><first>John</first><last>Sterling</last></author>
       <url hash="bf4461e0">H89-2011</url>
       <bibkey>grishman-sterling-1989-preference</bibkey>
     </paper>
     <paper id="12">
       <title>Parsing, Word Associations and Typical Predicate-Argument Relations</title>
-      <author><first>Kenneth</first><last>Church</last></author>
-      <author><first>William</first><last>Gale</last></author>
+      <author id="kenneth-church"><first>Kenneth</first><last>Church</last></author>
+      <author id="william-a-gale"><first>William</first><last>Gale</last></author>
       <author><first>Patrick</first><last>Hanks</last></author>
-      <author><first>Donald</first><last>Hindle</last></author>
+      <author id="donald-hindle"><first>Donald</first><last>Hindle</last></author>
       <url hash="d50b5bf5">H89-2012</url>
       <bibkey>church-etal-1989-parsing</bibkey>
     </paper>
     <paper id="13">
       <title>Enhanced <fixed-case>G</fixed-case>ood-<fixed-case>T</fixed-case>uring and <fixed-case>C</fixed-case>at-<fixed-case>C</fixed-case>al: Two New Methods for Estimating Probabilities of <fixed-case>E</fixed-case>nglish Bigrams (abbreviated version)</title>
-      <author><first>Kenneth W.</first><last>Church</last></author>
-      <author><first>William A.</first><last>Gale</last></author>
+      <author id="kenneth-church"><first>Kenneth W.</first><last>Church</last></author>
+      <author id="william-a-gale"><first>William A.</first><last>Gale</last></author>
       <url hash="7ae282f9">H89-2013</url>
       <bibkey>church-gale-1989-enhanced</bibkey>
     </paper>
@@ -505,101 +505,101 @@
     </paper>
     <paper id="15">
       <title>New Possibilities in Machine Translation</title>
-      <author><first>Eduard H.</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard H.</first><last>Hovy</last></author>
       <url hash="bc0cfb65">H89-2015</url>
       <bibkey>hovy-1989-new</bibkey>
     </paper>
     <paper id="16">
       <title>Data Collection And Evaluation</title>
-      <author><first>David S.</first><last>Pallett</last></author>
+      <author id="david-s-pallett"><first>David S.</first><last>Pallett</last></author>
       <url hash="ce3a9832">H89-2016</url>
       <bibkey>pallett-1989-data</bibkey>
     </paper>
     <paper id="17">
       <title>Data Collection and Analysis in the Air Travel Planning Domain</title>
       <author><first>Jacqueline C.</first><last>Kowtko</last></author>
-      <author><first>Patti J.</first><last>Price</last></author>
+      <author id="patti-price"><first>Patti J.</first><last>Price</last></author>
       <url hash="3e5a0bcc">H89-2017</url>
       <bibkey>kowtko-price-1989-data</bibkey>
     </paper>
     <paper id="18">
       <title>The Collection and Preliminary Analysis of a Spontaneous Speech Database</title>
-      <author><first>Victor</first><last>Zue</last></author>
+      <author id="victor-zue"><first>Victor</first><last>Zue</last></author>
       <author><first>Nancy</first><last>Daly</last></author>
-      <author><first>James</first><last>Glass</last></author>
-      <author><first>David</first><last>Goodine</last></author>
-      <author><first>Hong</first><last>Leung</last></author>
-      <author><first>Michael</first><last>Phillips</last></author>
-      <author><first>Joseph</first><last>Polifroni</last></author>
-      <author><first>Stephanie</first><last>Seneff</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
+      <author id="david-goodine"><first>David</first><last>Goodine</last></author>
+      <author id="hong-leung"><first>Hong</first><last>Leung</last></author>
+      <author id="michael-phillips"><first>Michael</first><last>Phillips</last></author>
+      <author id="joseph-polifroni"><first>Joseph</first><last>Polifroni</last></author>
+      <author id="stephanie-seneff"><first>Stephanie</first><last>Seneff</last></author>
       <author><first>Michal</first><last>Soclof</last></author>
       <url hash="3f08e5cf">H89-2018</url>
       <bibkey>zue-etal-1989-collection</bibkey>
     </paper>
     <paper id="19">
       <title>A Proposal for <fixed-case>SLS</fixed-case> Evaluation</title>
-      <author><first>Sean</first><last>Boisen</last></author>
-      <author><first>Lance</first><last>Ramshaw</last></author>
-      <author><first>Damaris</first><last>Ayuso</last></author>
-      <author><first>Madeleine</first><last>Bates</last></author>
+      <author id="sean-boisen"><first>Sean</first><last>Boisen</last></author>
+      <author id="lance-ramshaw"><first>Lance</first><last>Ramshaw</last></author>
+      <author id="damaris-ayuso"><first>Damaris</first><last>Ayuso</last></author>
+      <author id="madeleine-bates"><first>Madeleine</first><last>Bates</last></author>
       <url hash="5f28d6ea">H89-2019</url>
       <bibkey>boisen-etal-1989-proposal</bibkey>
     </paper>
     <paper id="20">
       <title>A Simple Statistical Class Grammar for Measuring Speech Recognition Performance</title>
       <author><first>Alan</first><last>Derr</last></author>
-      <author><first>Richard</first><last>Schwartz</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
       <url hash="6876a8f1">H89-2020</url>
       <bibkey>derr-schwartz-1989-simple</bibkey>
     </paper>
     <paper id="21">
       <title>Evaluating spoken language interaction</title>
-      <author><first>Alexander I.</first><last>Rudnicky</last></author>
+      <author id="alexander-rudnicky"><first>Alexander I.</first><last>Rudnicky</last></author>
       <author><first>Michelle</first><last>Sakamoto</last></author>
-      <author><first>Joseph H.</first><last>Polifroni</last></author>
+      <author id="joseph-polifroni"><first>Joseph H.</first><last>Polifroni</last></author>
       <url hash="be39596d">H89-2021</url>
       <bibkey>rudnicky-etal-1989-evaluating</bibkey>
     </paper>
     <paper id="22">
       <title>Preliminary Evaluation of the <fixed-case>VOYAGER</fixed-case> Spoken Language System</title>
-      <author><first>Victor</first><last>Zue</last></author>
-      <author><first>James</first><last>Glass</last></author>
-      <author><first>David</first><last>Goodine</last></author>
-      <author><first>Hong</first><last>Leung</last></author>
-      <author><first>Michael</first><last>Phillips</last></author>
-      <author><first>Joseph</first><last>Polifroni</last></author>
-      <author><first>Stephanie</first><last>Seneff</last></author>
+      <author id="victor-zue"><first>Victor</first><last>Zue</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
+      <author id="david-goodine"><first>David</first><last>Goodine</last></author>
+      <author id="hong-leung"><first>Hong</first><last>Leung</last></author>
+      <author id="michael-phillips"><first>Michael</first><last>Phillips</last></author>
+      <author id="joseph-polifroni"><first>Joseph</first><last>Polifroni</last></author>
+      <author id="stephanie-seneff"><first>Stephanie</first><last>Seneff</last></author>
       <url hash="58f2d3d4">H89-2022</url>
       <bibkey>zue-etal-1989-preliminary</bibkey>
     </paper>
     <paper id="23">
       <title>Data Collection and Evaluation <fixed-case>II</fixed-case></title>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <url hash="10296b26">H89-2023</url>
       <bibkey>grishman-1989-data</bibkey>
     </paper>
     <paper id="24">
       <title>Text on Tap: the <fixed-case>ACL</fixed-case>/<fixed-case>DCI</fixed-case></title>
-      <author><first>Mark</first><last>Liberman</last></author>
+      <author id="mark-liberman"><first>Mark</first><last>Liberman</last></author>
       <url hash="9da2a313">H89-2024</url>
       <bibkey>liberman-1989-text</bibkey>
     </paper>
     <paper id="25">
       <title>Spoken Language Systems <fixed-case>II</fixed-case></title>
-      <author><first>Richard</first><last>Stern</last></author>
+      <author id="richard-m-stern"><first>Richard</first><last>Stern</last></author>
       <url hash="746e75d0">H89-2025</url>
       <bibkey>stern-1989-spoken</bibkey>
     </paper>
     <paper id="26">
       <title>A Stack Decoder for Continous Speech Recognition</title>
-      <author><first>Dean G.</first><last>Sturtevant</last></author>
+      <author id="dean-sturtevant"><first>Dean G.</first><last>Sturtevant</last></author>
       <url hash="98b254a0">H89-2026</url>
       <bibkey>sturtevant-1989-stack</bibkey>
     </paper>
     <paper id="27">
       <title>The <fixed-case>N</fixed-case>-Best Algorithm: Efficient Procedure for Finding Top <fixed-case>N</fixed-case> Sentence Hypotheses</title>
-      <author><first>Yen-Lu</first><last>Chow</last></author>
-      <author><first>Richard</first><last>Schwartz</last></author>
+      <author id="yen-lu-chow"><first>Yen-Lu</first><last>Chow</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
       <url hash="367750cc">H89-2027</url>
       <bibkey>chow-schwartz-1989-n</bibkey>
     </paper>
@@ -611,13 +611,13 @@
     </paper>
     <paper id="29">
       <title>Summary of Session 7 – Natural Language (Part 2)</title>
-      <author><first>Madeleine</first><last>Bates</last></author>
+      <author id="madeleine-bates"><first>Madeleine</first><last>Bates</last></author>
       <url hash="23398824">H89-2029</url>
       <bibkey>bates-1989-summary</bibkey>
     </paper>
     <paper id="30">
       <title>Belief Ascription and Model Generative Reasoning: joining two paradigms to a robust parser of messages.</title>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <author><first>Roger</first><last>Hartley</last></author>
       <url hash="a55ef460">H89-2030</url>
       <bibkey>wilks-hartley-1989-belief</bibkey>
@@ -625,60 +625,60 @@
     <paper id="31">
       <title>Porting to New Domains Using the Learner</title>
       <author><first>Robert J. P.</first><last>Ingna</last></author>
-      <author><first>Lance</first><last>Ramshaw</last></author>
+      <author id="lance-ramshaw"><first>Lance</first><last>Ramshaw</last></author>
       <url hash="f98d62ff">H89-2031</url>
       <bibkey>ingna-ramshaw-1989-porting</bibkey>
     </paper>
     <paper id="32">
       <title>Overview: Continuous Speech Recognition <fixed-case>I</fixed-case></title>
-      <author><first>Janet M.</first><last>Baker</last></author>
+      <author id="janet-baker"><first>Janet M.</first><last>Baker</last></author>
       <url hash="6828519f">H89-2032</url>
       <bibkey>baker-1989-overview</bibkey>
     </paper>
     <paper id="33">
       <title>Improved <fixed-case>HMM</fixed-case> Models for High Performance Speech Recognition</title>
-      <author><first>Steve</first><last>Austin</last></author>
-      <author><first>Chris</first><last>Barry</last></author>
-      <author><first>Yen-Lu</first><last>Chow</last></author>
+      <author id="steve-austin"><first>Steve</first><last>Austin</last></author>
+      <author id="chris-barry"><first>Chris</first><last>Barry</last></author>
+      <author id="yen-lu-chow"><first>Yen-Lu</first><last>Chow</last></author>
       <author><first>Man</first><last>Derr</last></author>
-      <author><first>Owen</first><last>Kimball</last></author>
-      <author><first>Francis</first><last>Kubala</last></author>
-      <author><first>John</first><last>Makhoul</last></author>
-      <author><first>Paul</first><last>Placeway</last></author>
+      <author id="owen-kimball"><first>Owen</first><last>Kimball</last></author>
+      <author id="francis-kubala"><first>Francis</first><last>Kubala</last></author>
+      <author id="john-makhoul"><first>John</first><last>Makhoul</last></author>
+      <author id="paul-placeway"><first>Paul</first><last>Placeway</last></author>
       <author><first>William</first><last>Russell</last></author>
-      <author><first>Richard</first><last>Schwartz</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
       <author><first>George</first><last>Yu</last></author>
       <url hash="4eed3fbf">H89-2033</url>
       <bibkey>austin-etal-1989-improved</bibkey>
     </paper>
     <paper id="34">
       <title>Speaker Adaptation Using Multiple Reference Speakers</title>
-      <author><first>Francis</first><last>Kubala</last></author>
-      <author><first>Richard</first><last>Schwartz</last></author>
-      <author><first>Chris</first><last>Barry</last></author>
+      <author id="francis-kubala"><first>Francis</first><last>Kubala</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
+      <author id="chris-barry"><first>Chris</first><last>Barry</last></author>
       <url hash="f92f68fa">H89-2034</url>
       <bibkey>kubala-etal-1989-speaker-adaptation</bibkey>
     </paper>
     <paper id="35">
       <title>Automatic Detection Of New Words In A Large Vocabulary Continuous Speech Recognition System</title>
       <author><first>Ayman</first><last>Asadi</last></author>
-      <author><first>Richard</first><last>Schwartz</last></author>
-      <author><first>John</first><last>Makhoul</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
+      <author id="john-makhoul"><first>John</first><last>Makhoul</last></author>
       <url hash="18d09d45">H89-2035</url>
       <bibkey>asadi-etal-1989-automatic</bibkey>
     </paper>
     <paper id="36">
       <title>Automatic New Word Acquisition: Spelling from Acoustics</title>
-      <author><first>Fil</first><last>Alleva</last></author>
-      <author><first>Kai-Fu</first><last>Lee</last></author>
+      <author id="fil-alleva"><first>Fil</first><last>Alleva</last></author>
+      <author id="kai-fu-lee"><first>Kai-Fu</first><last>Lee</last></author>
       <url hash="ccbfd3b4">H89-2036</url>
       <bibkey>alleva-lee-1989-automatic</bibkey>
     </paper>
     <paper id="37">
       <title>Towards Speech Recognition Without Vocabulary-Specific Training</title>
-      <author><first>Hsiao-Wuen</first><last>Hon</last></author>
-      <author><first>Kai-Fu</first><last>Lee</last></author>
-      <author><first>Robert</first><last>Weide</last></author>
+      <author id="hsiao-wuen-hon"><first>Hsiao-Wuen</first><last>Hon</last></author>
+      <author id="kai-fu-lee"><first>Kai-Fu</first><last>Lee</last></author>
+      <author id="robert-weide"><first>Robert</first><last>Weide</last></author>
       <url hash="744fd5fb">H89-2037</url>
       <bibkey>hon-etal-1989-towards</bibkey>
     </paper>
@@ -693,9 +693,9 @@
     <paper id="39">
       <title>Acoustic Modeling of Subword Units for Large Vocabulary Speaker Independent Speech Recognition</title>
       <author><first>Chin-Hui</first><last>Lee</last></author>
-      <author><first>Lawrence R.</first><last>Rabiner</last></author>
-      <author><first>Roberto</first><last>Pieraccini</last></author>
-      <author><first>Jay G.</first><last>Wilpon</last></author>
+      <author id="lawrence-r-rabiner"><first>Lawrence R.</first><last>Rabiner</last></author>
+      <author id="roberto-pieraccini"><first>Roberto</first><last>Pieraccini</last></author>
+      <author id="jay-wilpon"><first>Jay G.</first><last>Wilpon</last></author>
       <url hash="c98a93c5">H89-2039</url>
       <bibkey>lee-etal-1989-acoustic</bibkey>
     </paper>
@@ -714,27 +714,27 @@
     </paper>
     <paper id="42">
       <title>Summary of Session 9 - Future Plans</title>
-      <author><first>Clifford</first><last>Weinstein</last></author>
+      <author id="clifford-j-weinstein"><first>Clifford</first><last>Weinstein</last></author>
       <url hash="131c172d">H89-2042</url>
       <bibkey>weinstein-1989-summary</bibkey>
     </paper>
     <paper id="43">
       <title>SUMMARY OF SESSION 10 - Continous Speech Recognition <fixed-case>II</fixed-case></title>
-      <author><first>George</first><last>Doddington</last></author>
+      <author id="george-r-doddington"><first>George</first><last>Doddington</last></author>
       <url hash="eb77f07b">H89-2043</url>
       <bibkey>doddington-1989-summary</bibkey>
     </paper>
     <paper id="44">
       <title>Acoustical Pre-Processing for Robust Speech Recognition</title>
-      <author><first>Richard M.</first><last>Stern</last></author>
+      <author id="richard-m-stern"><first>Richard M.</first><last>Stern</last></author>
       <author><first>Alejandro</first><last>Acero</last></author>
       <url hash="294b5ec1">H89-2044</url>
       <bibkey>stern-acero-1989-acoustical</bibkey>
     </paper>
     <paper id="45">
       <title>Spectral Estimation for Noise Robust Speech Recognition</title>
-      <author><first>Adoram</first><last>Erell</last></author>
-      <author><first>Mitch</first><last>Weintraub</last></author>
+      <author id="adoram-erell"><first>Adoram</first><last>Erell</last></author>
+      <author id="mitch-weintraub"><first>Mitch</first><last>Weintraub</last></author>
       <url hash="fd4611f0">H89-2045</url>
       <bibkey>erell-weintraub-1989-spectral</bibkey>
     </paper>
@@ -756,7 +756,7 @@
     </paper>
     <paper id="48">
       <title>Some Applications of Tree-based Modelling to Speech and Language</title>
-      <author><first>Michael D.</first><last>Riley</last></author>
+      <author id="michael-riley"><first>Michael D.</first><last>Riley</last></author>
       <url hash="ea4cb8c4">H89-2048</url>
       <bibkey>riley-1989-applications</bibkey>
     </paper>
@@ -764,33 +764,33 @@
       <title>Speech Recognition in Parallel</title>
       <author><first>Salvatore J.</first><last>Stolfo</last></author>
       <author><first>Zvi</first><last>Galil</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <author><first>Russell</first><last>Mills</last></author>
       <url hash="42a99088">H89-2049</url>
       <bibkey>stolfo-etal-1989-speech</bibkey>
     </paper>
     <paper id="50">
       <title>Contextually-Based Data-Derived Pronunciation Networks for Automatic Speech Recognition</title>
-      <author><first>Francine R.</first><last>Chen</last></author>
+      <author id="francine-chen"><first>Francine R.</first><last>Chen</last></author>
       <url hash="7cd1229e">H89-2050</url>
       <bibkey>chen-1989-contextually</bibkey>
     </paper>
     <paper id="51">
       <title>Session 11 Natural Language <fixed-case>III</fixed-case></title>
-      <author><first>Kenneth Ward</first><last>Church</last></author>
+      <author id="kenneth-church"><first>Kenneth Ward</first><last>Church</last></author>
       <url hash="bec19076">H89-2051</url>
       <bibkey>church-1989-session</bibkey>
     </paper>
     <paper id="52">
       <title>Using Structural Constraints for Speech Act Interpretation</title>
-      <author><first>James F.</first><last>Allen</last></author>
-      <author><first>Elizabeth</first><last>Hinkelman</last></author>
+      <author id="james-allen"><first>James F.</first><last>Allen</last></author>
+      <author id="elizabeth-a-hinkelman"><first>Elizabeth</first><last>Hinkelman</last></author>
       <url hash="d7fec7f1">H89-2052</url>
       <bibkey>allen-hinkelman-1989-using</bibkey>
     </paper>
     <paper id="53">
       <title>An Evaluation of Lexicalization in Parsing</title>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
       <author><first>Yves</first><last>Schabes</last></author>
       <url hash="6d124748">H89-2053</url>
       <bibkey>joshi-schabes-1989-evaluation</bibkey>
@@ -807,33 +807,33 @@
     </paper>
     <paper id="55">
       <title>Coordinating Text and Graphics in Explanation Generation</title>
-      <author><first>Steven K.</first><last>Feiner</last></author>
-      <author><first>Kathleen R.</first><last>McKeown</last></author>
+      <author id="steven-feiner"><first>Steven K.</first><last>Feiner</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen R.</first><last>McKeown</last></author>
       <url hash="61da449a">H89-2055</url>
       <bibkey>feiner-mckeown-1989-coordinating</bibkey>
     </paper>
     <paper id="56">
       <title>Summary of Session on Hardware for Spoken Language Demonstrations</title>
-      <author><first>Richard</first><last>Schwartz</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
       <url hash="132d4d99">H89-2056</url>
       <bibkey>schwartz-1989-summary</bibkey>
     </paper>
     <paper id="57">
       <title>Research and Development in Natural Language Understanding</title>
-      <author><first>Ralph</first><last>Weischedel</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
       <url hash="e803188c">H89-2057</url>
       <bibkey>weischedel-1989-research</bibkey>
     </paper>
     <paper id="58">
       <title>Research in Continuous Speech Recognition</title>
-      <author><first>John</first><last>Makhoul</last></author>
-      <author><first>Richard</first><last>Schwartz</last></author>
+      <author id="john-makhoul"><first>John</first><last>Makhoul</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
       <url hash="eb13b264">H89-2058</url>
       <bibkey>makhoul-schwartz-1989-research-continuous</bibkey>
     </paper>
     <paper id="59">
       <title>Spoken Language Systems</title>
-      <author><first>John</first><last>Makhoul</last></author>
+      <author id="john-makhoul"><first>John</first><last>Makhoul</last></author>
       <url hash="9a7278b2">H89-2059</url>
       <bibkey>makhoul-1989-spoken</bibkey>
     </paper>
@@ -845,15 +845,15 @@
     </paper>
     <paper id="61">
       <title>Evaluating the Use of Prosodic Information in Speech Recognition and Understanding</title>
-      <author><first>Mari</first><last>Ostendorf</last></author>
-      <author><first>Patti</first><last>Price</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
+      <author id="patti-price"><first>Patti</first><last>Price</last></author>
       <url hash="1bf7098c">H89-2061</url>
       <bibkey>ostendorf-price-1989-evaluating</bibkey>
     </paper>
     <paper id="62">
       <title>Segment-Based Acoustic Models with Multi-level Search Algorithms for Continuous Speech Recognition</title>
-      <author><first>Mari</first><last>Ostendorf</last></author>
-      <author><first>J. Robin</first><last>Rohlicek</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
+      <author id="j-robin-rohlicek"><first>J. Robin</first><last>Rohlicek</last></author>
       <url hash="ccc443c7">H89-2062</url>
       <bibkey>ostendorf-rohlicek-1989-segment</bibkey>
     </paper>
@@ -871,32 +871,32 @@
     </paper>
     <paper id="65">
       <title>The Current Status of the Penman Language Generation System</title>
-      <author><first>Eduard H.</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard H.</first><last>Hovy</last></author>
       <url hash="9494298a">H89-2065</url>
       <bibkey>hovy-1989-current</bibkey>
     </paper>
     <paper id="66">
       <title>Research and Development for Spoken Language Systems</title>
-      <author><first>Victor W.</first><last>Zue</last></author>
+      <author id="victor-zue"><first>Victor W.</first><last>Zue</last></author>
       <url hash="3be99eef">H89-2066</url>
       <bibkey>zue-1989-research</bibkey>
     </paper>
     <paper id="67">
       <title>Robust Speech Recognition Technology Program Summary</title>
-      <author><first>Clifford J.</first><last>Weinstein</last></author>
+      <author id="clifford-j-weinstein"><first>Clifford J.</first><last>Weinstein</last></author>
       <author><first>Douglas B.</first><last>Paul</last></author>
       <url hash="86e3f37a">H89-2067</url>
       <bibkey>weinstein-paul-1989-robust</bibkey>
     </paper>
     <paper id="68">
       <title>Establishing Performance Baselines for Text Understanding Systems</title>
-      <author><first>Beth</first><last>Sundheim</last></author>
+      <author id="beth-m-sundheim"><first>Beth</first><last>Sundheim</last></author>
       <url hash="c8f8547c">H89-2068</url>
       <bibkey>sundheim-1989-establishing</bibkey>
     </paper>
     <paper id="69">
       <title>Robust Natural Language Analysis</title>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <url hash="6c7648cd">H89-2069</url>
       <bibkey>grishman-1989-robust</bibkey>
     </paper>
@@ -914,8 +914,8 @@
     </paper>
     <paper id="71">
       <title>Natural Language, Knowledge Representation and Discourse</title>
-      <author><first>James F.</first><last>Allen</last></author>
-      <author><first>Lenhart K.</first><last>Schubert</last></author>
+      <author id="james-allen"><first>James F.</first><last>Allen</last></author>
+      <author id="lenhart-schubert"><first>Lenhart K.</first><last>Schubert</last></author>
       <url hash="087c08f7">H89-2071</url>
       <bibkey>allen-schubert-1989-natural</bibkey>
     </paper>
@@ -927,21 +927,21 @@
     </paper>
     <paper id="73">
       <title>Integration of Speech and Natural Language Understanding for Spoken Language Systems (<fixed-case>SLS</fixed-case>)</title>
-      <author><first>Patti</first><last>Price</last></author>
+      <author id="patti-price"><first>Patti</first><last>Price</last></author>
       <url hash="5f19af6a">H89-2073</url>
       <bibkey>price-1989-integration</bibkey>
     </paper>
     <paper id="74">
       <title>Real-Time Speech Recognition Systems</title>
-      <author><first>Hy</first><last>Murveit</last></author>
+      <author id="hy-murveit"><first>Hy</first><last>Murveit</last></author>
       <url hash="1f005548">H89-2074</url>
       <bibkey>murveit-1989-real</bibkey>
     </paper>
     <paper id="75">
       <title><fixed-case>TACITUS</fixed-case>: A Message Understanding System</title>
-      <author><first>Jerry R.</first><last>Hobbs</last></author>
-      <author><first>Douglas</first><last>Appelt</last></author>
-      <author><first>John</first><last>Bear</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry R.</first><last>Hobbs</last></author>
+      <author id="douglas-appelt"><first>Douglas</first><last>Appelt</last></author>
+      <author id="john-bear"><first>John</first><last>Bear</last></author>
       <author><first>Mark</first><last>Stickel</last></author>
       <author><first>Mabry</first><last>Tyson</last></author>
       <url hash="fd4e9ef8">H89-2075</url>
@@ -950,28 +950,28 @@
     <paper id="76">
       <title>Analysis and Symbolic Processing of Unrestricted Speech</title>
       <author><first>M. Margaret</first><last>Withgott</last></author>
-      <author><first>Ronald M.</first><last>Kaplan</last></author>
+      <author id="ronald-m-kaplan"><first>Ronald M.</first><last>Kaplan</last></author>
       <url hash="6d567d2e">H89-2076</url>
       <bibkey>withgott-kaplan-1989-analysis-symbolic</bibkey>
     </paper>
     <paper id="77">
       <title>White Paper on Spoken Language Systems</title>
-      <author><first>John</first><last>Makhoul</last></author>
-      <author><first>Fred</first><last>Jelinek</last></author>
+      <author id="john-makhoul"><first>John</first><last>Makhoul</last></author>
+      <author id="frederick-jelinek"><first>Fred</first><last>Jelinek</last></author>
       <author><first>Larry</first><last>Rabiner</last></author>
-      <author><first>Clifford</first><last>Weinstein</last></author>
-      <author><first>Victor</first><last>Zue</last></author>
+      <author id="clifford-j-weinstein"><first>Clifford</first><last>Weinstein</last></author>
+      <author id="victor-zue"><first>Victor</first><last>Zue</last></author>
       <url hash="1a565c1b">H89-2077</url>
       <bibkey>makhoul-etal-1989-white</bibkey>
     </paper>
     <paper id="78">
       <title>White Paper on Natural Language Processing</title>
-      <author><first>Ralph</first><last>Weischedel</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
-      <author><first>Barbara</first><last>Grosz</last></author>
-      <author><first>Wendy</first><last>Lehnert</last></author>
-      <author><first>Mitchell</first><last>Marcus</last></author>
-      <author><first>Raymond</first><last>Perrault</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
+      <author id="barbara-j-grosz"><first>Barbara</first><last>Grosz</last></author>
+      <author id="wendy-lehnert"><first>Wendy</first><last>Lehnert</last></author>
+      <author id="mitch-marcus"><first>Mitchell</first><last>Marcus</last></author>
+      <author id="c-raymond-perrault"><first>Raymond</first><last>Perrault</last></author>
       <author><first>Robert</first><last>Wilensky</last></author>
       <url hash="f7db6097">H89-2078</url>
       <bibkey>weischedel-etal-1989-white</bibkey>
diff --git a/data/xml/H90.xml b/data/xml/H90.xml
index cdd6869221..2544ff6cbe 100644
--- a/data/xml/H90.xml
+++ b/data/xml/H90.xml
@@ -12,26 +12,26 @@
     </frontmatter>
     <paper id="1">
       <title>Overview of the Third <fixed-case>DARPA</fixed-case> Speech and Natural Language Workshop</title>
-      <author><first>Richard M.</first><last>Stern</last></author>
+      <author id="richard-m-stern"><first>Richard M.</first><last>Stern</last></author>
       <url hash="bd14bb6c">H90-1001</url>
       <bibkey>stern-1990-overview</bibkey>
     </paper>
     <paper id="2">
       <title>Session 1: Spoken Language Systems <fixed-case>I</fixed-case></title>
-      <author><first>Wayne</first><last>Ward</last></author>
+      <author id="wayne-ward"><first>Wayne</first><last>Ward</last></author>
       <url hash="6eb65b93">H90-1002</url>
       <bibkey>ward-1990-session</bibkey>
     </paper>
     <paper id="3">
       <title>Efficient, High-Performance Algorithms for N-Best Search</title>
-      <author><first>Richard</first><last>Schwartz</last></author>
-      <author><first>Steve</first><last>Austin</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
+      <author id="steve-austin"><first>Steve</first><last>Austin</last></author>
       <url hash="1059d877">H90-1003</url>
       <bibkey>schwartz-austin-1990-efficient</bibkey>
     </paper>
     <paper id="4">
       <title>A <fixed-case>T</fixed-case>ree.<fixed-case>T</fixed-case>rellis Based Fast Search for Finding the N Best Sentence Hypotheses in Continuous Speech Recognition</title>
-      <author><first>Frank K.</first><last>Soong</last></author>
+      <author id="frank-k-soong"><first>Frank K.</first><last>Soong</last></author>
       <author><first>Eng-Fong</first><last>Huang</last></author>
       <url hash="63b06f5a">H90-1004</url>
       <bibkey>soong-huang-1990-tree</bibkey>
@@ -53,48 +53,48 @@
     </paper>
     <paper id="7">
       <title>Session 2: Natural Language <fixed-case>I</fixed-case></title>
-      <author><first>Damaris M.</first><last>Ayuso</last></author>
+      <author id="damaris-ayuso"><first>Damaris M.</first><last>Ayuso</last></author>
       <url hash="40c967be">H90-1007</url>
       <bibkey>ayuso-1990-session</bibkey>
     </paper>
     <paper id="8">
       <title>Picking Reference Events from Tense A Formal, Implement able Theory of <fixed-case>E</fixed-case>nglish Tense-Aspect Semantics Trees:</title>
-      <author><first>Lenhart K.</first><last>Schubert</last></author>
+      <author id="lenhart-schubert"><first>Lenhart K.</first><last>Schubert</last></author>
       <author><first>Chung Hee</first><last>Hwang</last></author>
       <url hash="d3cdd088">H90-1008</url>
       <bibkey>schubert-hwang-1990-picking</bibkey>
     </paper>
     <paper id="9">
       <title>Interactive Multimedia Explanation for Equipment Maintenance and Repair</title>
-      <author><first>Kathleen</first><last>McKeown</last></author>
-      <author><first>Steven</first><last>Feiner</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
+      <author id="steven-feiner"><first>Steven</first><last>Feiner</last></author>
       <url hash="c43e92fa">H90-1009</url>
       <bibkey>mckeown-feiner-1990-interactive</bibkey>
     </paper>
     <paper id="10">
       <title>Two Recent Developments in <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars: Semantics and Efficient Processing</title>
       <author><first>Yves</first><last>Schabes</last></author>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
       <url hash="30d6b3c5">H90-1010</url>
       <bibkey>schabes-joshi-1990-two</bibkey>
     </paper>
     <paper id="11">
       <title>Performing Integrated Syntactic and Semantic Parsing Using Classification</title>
-      <author><first>Robert T.</first><last>Kasper</last></author>
-      <author><first>Eduard H.</first><last>Hovy</last></author>
+      <author id="robert-t-kasper"><first>Robert T.</first><last>Kasper</last></author>
+      <author id="eduard-hovy"><first>Eduard H.</first><last>Hovy</last></author>
       <url hash="aa332b7d">H90-1011</url>
       <bibkey>kasper-hovy-1990-performing</bibkey>
     </paper>
     <paper id="12">
       <title>Making Abduction More Efficient</title>
-      <author><first>Douglas</first><last>Appelt</last></author>
-      <author><first>Jerry R.</first><last>Hobbs</last></author>
+      <author id="douglas-appelt"><first>Douglas</first><last>Appelt</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry R.</first><last>Hobbs</last></author>
       <url hash="af71ccc0">H90-1012</url>
       <bibkey>appelt-hobbs-1990-making</bibkey>
     </paper>
     <paper id="13">
       <title>Session 3: Natural Language Evaluation</title>
-      <author><first>Lynette</first><last>Hirschman</last></author>
+      <author id="lynette-hirschman"><first>Lynette</first><last>Hirschman</last></author>
       <url hash="f86b426a">H90-1013</url>
       <bibkey>hirschman-1990-session</bibkey>
     </paper>
@@ -112,10 +112,10 @@
     </paper>
     <paper id="16">
       <title>Toward a Real-Time Spoken Language System Using Commercial Hardware</title>
-      <author><first>Steve</first><last>Austin</last></author>
+      <author id="steve-austin"><first>Steve</first><last>Austin</last></author>
       <author><first>Pat</first><last>Peterson</last></author>
-      <author><first>Paul</first><last>Placeway</last></author>
-      <author><first>Richard</first><last>Schwartz</last></author>
+      <author id="paul-placeway"><first>Paul</first><last>Placeway</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
       <author><first>Jeff</first><last>Vandergrift</last></author>
       <url hash="04c246e1">H90-1016</url>
       <bibkey>austin-etal-1990-toward</bibkey>
@@ -123,10 +123,10 @@
     <paper id="17">
       <title>The Dragon Continuous Speech Recognition System: A Real-Time Implementation</title>
       <author><first>Paul</first><last>Bamberg</last></author>
-      <author><first>Yen-lu</first><last>Chow</last></author>
-      <author><first>Laurence</first><last>Gillick</last></author>
+      <author id="yen-lu-chow"><first>Yen-lu</first><last>Chow</last></author>
+      <author id="laurence-gillick"><first>Laurence</first><last>Gillick</last></author>
       <author><first>Robert</first><last>Roth</last></author>
-      <author><first>Dean</first><last>Sturtevant</last></author>
+      <author id="dean-sturtevant"><first>Dean</first><last>Sturtevant</last></author>
       <url hash="85595366">H90-1017</url>
       <bibkey>bamberg-etal-1990-dragon</bibkey>
     </paper>
@@ -147,7 +147,7 @@
     </paper>
     <paper id="19">
       <title>Session 5: Overview of the <fixed-case>ATIS</fixed-case> System</title>
-      <author><first>David S.</first><last>Pallett</last></author>
+      <author id="david-s-pallett"><first>David S.</first><last>Pallett</last></author>
       <url hash="d82b7aa8">H90-1019</url>
       <bibkey>pallett-1990-session</bibkey>
     </paper>
@@ -159,27 +159,27 @@
     </paper>
     <paper id="21">
       <title>The <fixed-case>ATIS</fixed-case> Spoken Language Systems Pilot Corpus</title>
-      <author><first>Charles T.</first><last>Hemphill</last></author>
+      <author id="charles-t-hemphill"><first>Charles T.</first><last>Hemphill</last></author>
       <author><first>John J.</first><last>Godfrey</last></author>
-      <author><first>George R.</first><last>Doddington</last></author>
+      <author id="george-r-doddington"><first>George R.</first><last>Doddington</last></author>
       <url hash="a5cc47cc">H90-1021</url>
       <bibkey>hemphill-etal-1990-atis</bibkey>
     </paper>
     <paper id="22">
       <title>Developing an Evaluation Methodology for Spoken Language Systems</title>
-      <author><first>Madeleine</first><last>Bates</last></author>
-      <author><first>Sean</first><last>Boisen</last></author>
-      <author><first>John</first><last>Makhoul</last></author>
+      <author id="madeleine-bates"><first>Madeleine</first><last>Bates</last></author>
+      <author id="sean-boisen"><first>Sean</first><last>Boisen</last></author>
+      <author id="john-makhoul"><first>John</first><last>Makhoul</last></author>
       <url hash="cf8bd65d">H90-1022</url>
       <bibkey>bates-etal-1990-developing</bibkey>
     </paper>
     <paper id="23">
       <title>Beyond Class A: A Proposal for Automatic Evaluation of Discourse</title>
-      <author><first>Lynette</first><last>Hirschman</last></author>
-      <author><first>Deborah A.</first><last>Dahl</last></author>
+      <author id="lynette-hirschman"><first>Lynette</first><last>Hirschman</last></author>
+      <author id="deborah-a-dahl"><first>Deborah A.</first><last>Dahl</last></author>
       <author><first>Donald P.</first><last>McKay</last></author>
-      <author><first>Lewis M.</first><last>Norton</last></author>
-      <author><first>Marcia C.</first><last>Linebarger</last></author>
+      <author id="lewis-m-norton"><first>Lewis M.</first><last>Norton</last></author>
+      <author id="marcia-c-linebarger"><first>Marcia C.</first><last>Linebarger</last></author>
       <url hash="fc553f5b">H90-1023</url>
       <bibkey>hirschman-etal-1990-beyond</bibkey>
     </paper>
@@ -194,7 +194,7 @@
     </paper>
     <paper id="25">
       <title>Session 6: <fixed-case>ATIS</fixed-case> Site Reports and General Discussion</title>
-      <author><first>David S.</first><last>Pallett</last></author>
+      <author id="david-s-pallett"><first>David S.</first><last>Pallett</last></author>
       <url hash="2ef227dc">H90-1025</url>
       <bibkey>pallett-1990-session-6</bibkey>
     </paper>
@@ -210,19 +210,19 @@
     </paper>
     <paper id="27">
       <title>The <fixed-case>CMU</fixed-case> Air Travel Information Service: Understanding Spontaneous Speech</title>
-      <author><first>Wayne</first><last>Ward</last></author>
+      <author id="wayne-ward"><first>Wayne</first><last>Ward</last></author>
       <url hash="03a6c050">H90-1027</url>
       <bibkey>ward-1990-cmu</bibkey>
     </paper>
     <paper id="28">
       <title>Preliminary <fixed-case>ATIS</fixed-case> Development at <fixed-case>MIT</fixed-case></title>
-      <author><first>Victor</first><last>Zue</last></author>
-      <author><first>James</first><last>Glass</last></author>
-      <author><first>David</first><last>Goodine</last></author>
-      <author><first>Hong</first><last>Leung</last></author>
-      <author><first>Michael</first><last>Phillips</last></author>
-      <author><first>Joseph</first><last>Polifroni</last></author>
-      <author><first>Stephanie</first><last>Seneff</last></author>
+      <author id="victor-zue"><first>Victor</first><last>Zue</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
+      <author id="david-goodine"><first>David</first><last>Goodine</last></author>
+      <author id="hong-leung"><first>Hong</first><last>Leung</last></author>
+      <author id="michael-phillips"><first>Michael</first><last>Phillips</last></author>
+      <author id="joseph-polifroni"><first>Joseph</first><last>Polifroni</last></author>
+      <author id="stephanie-seneff"><first>Stephanie</first><last>Seneff</last></author>
       <url hash="f49d0c8a">H90-1028</url>
       <bibkey>zue-etal-1990-preliminary</bibkey>
     </paper>
@@ -239,29 +239,29 @@
     </paper>
     <paper id="30">
       <title>Management and Evaluation of Interactive Dialog in the Air Travel Domain</title>
-      <author><first>Lewis M.</first><last>Norton</last></author>
-      <author><first>Deborah A.</first><last>Dahl</last></author>
+      <author id="lewis-m-norton"><first>Lewis M.</first><last>Norton</last></author>
+      <author id="deborah-a-dahl"><first>Deborah A.</first><last>Dahl</last></author>
       <author><first>Donald P.</first><last>McKay</last></author>
-      <author><first>Lynette</first><last>Hirschman</last></author>
-      <author><first>Marcia C.</first><last>Linebarger</last></author>
-      <author><first>David</first><last>Magerman</last></author>
-      <author><first>Catherine N.</first><last>Ball</last></author>
+      <author id="lynette-hirschman"><first>Lynette</first><last>Hirschman</last></author>
+      <author id="marcia-c-linebarger"><first>Marcia C.</first><last>Linebarger</last></author>
+      <author id="david-m-magerman"><first>David</first><last>Magerman</last></author>
+      <author id="catherine-n-ball"><first>Catherine N.</first><last>Ball</last></author>
       <url hash="02b55c46">H90-1030</url>
       <bibkey>norton-etal-1990-management</bibkey>
     </paper>
     <paper id="31">
       <title><fixed-case>SRI</fixed-case>’s Experience with the <fixed-case>ATIS</fixed-case> Evaluation</title>
-      <author><first>Robert</first><last>Moore</last></author>
-      <author><first>Douglas</first><last>Appelt</last></author>
-      <author><first>John</first><last>Bear</last></author>
+      <author id="robert-c-moore"><first>Robert</first><last>Moore</last></author>
+      <author id="douglas-appelt"><first>Douglas</first><last>Appelt</last></author>
+      <author id="john-bear"><first>John</first><last>Bear</last></author>
       <author><first>Mary</first><last>Dalrymple</last></author>
-      <author><first>Douglas</first><last>Moran</last></author>
+      <author id="douglas-b-moran"><first>Douglas</first><last>Moran</last></author>
       <url hash="9fe335f1">H90-1031</url>
       <bibkey>moore-etal-1990-sris</bibkey>
     </paper>
     <paper id="32">
       <title>Session 7: Speech Recognition <fixed-case>I</fixed-case></title>
-      <author><first>Mitch</first><last>Weintraub</last></author>
+      <author id="mitch-weintraub"><first>Mitch</first><last>Weintraub</last></author>
       <url hash="18ee2ecc">H90-1032</url>
       <bibkey>weintraub-1990-session</bibkey>
     </paper>
@@ -274,20 +274,20 @@
     <paper id="34">
       <title>Towards Environment-Independent Spoken Language Systems</title>
       <author><first>Alejandro</first><last>Acero</last></author>
-      <author><first>Richard M.</first><last>Stern</last></author>
+      <author id="richard-m-stern"><first>Richard M.</first><last>Stern</last></author>
       <url hash="fa5e9b98">H90-1034</url>
       <bibkey>acero-stern-1990-towards</bibkey>
     </paper>
     <paper id="35">
       <title>Phoneme-in-Context Modeling for Dragon’s Continuous Speech Recognizer</title>
       <author><first>Paul</first><last>Bamberg</last></author>
-      <author><first>Laurence</first><last>Gillick</last></author>
+      <author id="laurence-gillick"><first>Laurence</first><last>Gillick</last></author>
       <url hash="ec3d8153">H90-1035</url>
       <bibkey>bamberg-gillick-1990-phoneme</bibkey>
     </paper>
     <paper id="36">
       <title>A Rapid Match Algorithm for Continuous Speech Recognition</title>
-      <author><first>Laurence S.</first><last>Gillick</last></author>
+      <author id="laurence-gillick"><first>Laurence S.</first><last>Gillick</last></author>
       <author><first>Robert</first><last>Roth</last></author>
       <url hash="6eb3eed3">H90-1036</url>
       <bibkey>gillick-roth-1990-rapid</bibkey>
@@ -309,7 +309,7 @@
       <author id="robert-l-mercer"><first>R. L.</first><last>Mercer</last></author>
       <author id="bernard-merialdo"><first>B.</first><last>Merialdo</last></author>
       <author><first>D.</first><last>Nahamoo</last></author>
-      <author><first>M. A.</first><last>Picheny</last></author>
+      <author id="m-a-picheny"><first>M. A.</first><last>Picheny</last></author>
       <author><first>J.</first><last>Powell</last></author>
       <url hash="5005a02b">H90-1038</url>
       <bibkey>bahl-etal-1990-automatic</bibkey>
@@ -317,8 +317,8 @@
     <paper id="39">
       <title>On the Interaction Between True Source, Training, and Testing Language Models</title>
       <author><first>Douglas B.</first><last>Paul</last></author>
-      <author><first>James K.</first><last>Baker</last></author>
-      <author><first>Janet M.</first><last>Baker</last></author>
+      <author id="james-baker"><first>James K.</first><last>Baker</last></author>
+      <author id="janet-baker"><first>Janet M.</first><last>Baker</last></author>
       <url hash="63ab61bb">H90-1039</url>
       <bibkey>paul-etal-1990-interaction</bibkey>
     </paper>
@@ -338,61 +338,61 @@
     </paper>
     <paper id="42">
       <title>Session 8: Spoken Language Systems <fixed-case>II</fixed-case></title>
-      <author><first>Charles T.</first><last>Hemphill</last></author>
+      <author id="charles-t-hemphill"><first>Charles T.</first><last>Hemphill</last></author>
       <url hash="f648811a">H90-1042</url>
       <bibkey>hemphill-1990-session</bibkey>
     </paper>
     <paper id="43">
       <title>Recent Progress on the <fixed-case>VOYAGER</fixed-case> System</title>
-      <author><first>Victor</first><last>Zue</last></author>
-      <author><first>James</first><last>Glass</last></author>
-      <author><first>David</first><last>Goodine</last></author>
-      <author><first>Hong</first><last>Leung</last></author>
+      <author id="victor-zue"><first>Victor</first><last>Zue</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
+      <author id="david-goodine"><first>David</first><last>Goodine</last></author>
+      <author id="hong-leung"><first>Hong</first><last>Leung</last></author>
       <author><first>Michael</first><last>McCandless</last></author>
-      <author><first>Michael</first><last>Phillips</last></author>
-      <author><first>Joseph</first><last>Polifroni</last></author>
-      <author><first>Stephanie</first><last>Seneff</last></author>
+      <author id="michael-phillips"><first>Michael</first><last>Phillips</last></author>
+      <author id="joseph-polifroni"><first>Joseph</first><last>Polifroni</last></author>
+      <author id="stephanie-seneff"><first>Stephanie</first><last>Seneff</last></author>
       <url hash="637ef403">H90-1043</url>
       <bibkey>zue-etal-1990-recent</bibkey>
     </paper>
     <paper id="44">
       <title>Training and Evaluation of a Spoken Language Understanding System</title>
-      <author><first>Deborah A.</first><last>Dahl</last></author>
-      <author><first>Lynette</first><last>Hirschman</last></author>
-      <author><first>Lewis M.</first><last>Norton</last></author>
-      <author><first>Marcia C.</first><last>Linebarger</last></author>
-      <author><first>David</first><last>Magerman</last></author>
+      <author id="deborah-a-dahl"><first>Deborah A.</first><last>Dahl</last></author>
+      <author id="lynette-hirschman"><first>Lynette</first><last>Hirschman</last></author>
+      <author id="lewis-m-norton"><first>Lewis M.</first><last>Norton</last></author>
+      <author id="marcia-c-linebarger"><first>Marcia C.</first><last>Linebarger</last></author>
+      <author id="david-m-magerman"><first>David</first><last>Magerman</last></author>
       <author><first>Nghi</first><last>Nguyen</last></author>
-      <author><first>Catherine N.</first><last>Ball</last></author>
+      <author id="catherine-n-ball"><first>Catherine N.</first><last>Ball</last></author>
       <url hash="7465d536">H90-1044</url>
       <bibkey>dahl-etal-1990-training</bibkey>
     </paper>
     <paper id="45">
       <title>A Comparison of Speech and Typed Input</title>
-      <author><first>Alexander G.</first><last>Hauptmann</last></author>
-      <author><first>Alexander I.</first><last>Rudnicky</last></author>
+      <author id="alexander-g-hauptmann"><first>Alexander G.</first><last>Hauptmann</last></author>
+      <author id="alexander-rudnicky"><first>Alexander I.</first><last>Rudnicky</last></author>
       <url hash="21745f20">H90-1045</url>
       <bibkey>hauptmann-rudnicky-1990-comparison</bibkey>
     </paper>
     <paper id="46">
       <title>The design of a spoken language interface</title>
       <author><first>Jean-Michel</first><last>Lunati</last></author>
-      <author><first>Alexander I.</first><last>Rudnicky</last></author>
+      <author id="alexander-rudnicky"><first>Alexander I.</first><last>Rudnicky</last></author>
       <url hash="f36d07e9">H90-1046</url>
       <bibkey>lunati-rudnicky-1990-design</bibkey>
     </paper>
     <paper id="47">
       <title>Syntactic and Semantic Knowledge in the <fixed-case>DELPHI</fixed-case> Unification Grammar</title>
       <author id="robert-bobrow"><first>R.</first><last>Bobrow</last></author>
-      <author><first>Robert</first><last>Ingria</last></author>
-      <author><first>David</first><last>Stallard</last></author>
+      <author id="robert-ingria"><first>Robert</first><last>Ingria</last></author>
+      <author id="david-stallard"><first>David</first><last>Stallard</last></author>
       <url hash="90b2e02a">H90-1047</url>
       <bibkey>bobrow-etal-1990-syntactic</bibkey>
     </paper>
     <paper id="48">
       <title>On Deftly Introducing Procedural Elements into Unification Parsing</title>
       <author id="robert-bobrow"><first>R.</first><last>Bobrow</last></author>
-      <author><first>Lance</first><last>Ramshaw</last></author>
+      <author id="lance-ramshaw"><first>Lance</first><last>Ramshaw</last></author>
       <url hash="ee6d6cf9">H90-1048</url>
       <bibkey>bobrow-ramshaw-1990-deftly</bibkey>
     </paper>
@@ -405,28 +405,28 @@
     </paper>
     <paper id="50">
       <title>Session 9: Automatic Acquisition of Linguistic Structure</title>
-      <author><first>Mitchell</first><last>Marcus</last></author>
+      <author id="mitch-marcus"><first>Mitchell</first><last>Marcus</last></author>
       <url hash="41fb08bb">H90-1050</url>
       <bibkey>marcus-1990-session</bibkey>
     </paper>
     <paper id="51">
       <title>Using Explanation-Based Learning to Increase Performance in a Large-Scale <fixed-case>NL</fixed-case> Query System</title>
-      <author><first>Manny</first><last>Rayner</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
       <author><first>Christer</first><last>Samuelsson</last></author>
       <url hash="e2b06237">H90-1051</url>
       <bibkey>rayner-samuelsson-1990-using</bibkey>
     </paper>
     <paper id="52">
       <title>Structural Ambiguity and Lexical Relations</title>
-      <author><first>Donald</first><last>Hindle</last></author>
+      <author id="donald-hindle"><first>Donald</first><last>Hindle</last></author>
       <author><first>Mats</first><last>Rooth</last></author>
       <url hash="bfa1c063">H90-1052</url>
       <bibkey>hindle-rooth-1990-structural</bibkey>
     </paper>
     <paper id="53">
       <title>Statistical Parsing of Messages</title>
-      <author><first>Mahesh V.</first><last>Chitrao</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="mahesh-v-chitrao"><first>Mahesh V.</first><last>Chitrao</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <url hash="e883498d">H90-1053</url>
       <bibkey>chitrao-grishman-1990-statistical</bibkey>
     </paper>
@@ -441,28 +441,28 @@
     <paper id="55">
       <title>Deducing Linguistic Structure from the Statistics of Large Corpora</title>
       <author><first>Eric</first><last>Brill</last></author>
-      <author><first>David</first><last>Magerman</last></author>
-      <author><first>Mitchell</first><last>Marcus</last></author>
-      <author><first>Beatrice</first><last>Santorini</last></author>
+      <author id="david-m-magerman"><first>David</first><last>Magerman</last></author>
+      <author id="mitch-marcus"><first>Mitchell</first><last>Marcus</last></author>
+      <author id="beatrice-santorini"><first>Beatrice</first><last>Santorini</last></author>
       <url hash="89e7002b">H90-1055</url>
       <bibkey>brill-etal-1990-deducing</bibkey>
     </paper>
     <paper id="56">
       <title>Poor Estimates of Context are Worse than None</title>
-      <author><first>William A.</first><last>Gale</last></author>
-      <author><first>Kenneth W.</first><last>Church</last></author>
+      <author id="william-a-gale"><first>William A.</first><last>Gale</last></author>
+      <author id="kenneth-church"><first>Kenneth W.</first><last>Church</last></author>
       <url hash="3bbf1cbb">H90-1056</url>
       <bibkey>gale-church-1990-poor</bibkey>
     </paper>
     <paper id="57">
       <title>Representation Quality in Text Classification: An Introduction and Experiment</title>
-      <author><first>David D.</first><last>Lewis</last></author>
+      <author id="david-d-lewis"><first>David D.</first><last>Lewis</last></author>
       <url hash="5f38cc22">H90-1057</url>
       <bibkey>lewis-1990-representation</bibkey>
     </paper>
     <paper id="58">
       <title>Session 10: Evaluation of Systems on the Resource Management Database</title>
-      <author><first>George</first><last>Doddington</last></author>
+      <author id="george-r-doddington"><first>George</first><last>Doddington</last></author>
       <url hash="1affd63b">H90-1058</url>
       <bibkey>doddington-1990-session</bibkey>
     </paper>
@@ -476,15 +476,15 @@
     </paper>
     <paper id="60">
       <title>A New Paradigm for Speaker-Independent Training and Speaker Adaptation</title>
-      <author><first>Francis</first><last>Kubala</last></author>
-      <author><first>Richard</first><last>Schwartz</last></author>
+      <author id="francis-kubala"><first>Francis</first><last>Kubala</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
       <url hash="a75c185d">H90-1060</url>
       <bibkey>kubala-schwartz-1990-new</bibkey>
     </paper>
     <paper id="61">
       <title>Implementation Aspects of Large Vocabulary Recognition Based on Intraword and Interword Phonetic Units</title>
       <author id="roberto-pieraccini"><first>R.</first><last>Pieraccini</last></author>
-      <author><first>C. H.</first><last>Lee</last></author>
+      <author id="c-h-lee"><first>C. H.</first><last>Lee</last></author>
       <author id="egidio-giachin"><first>E.</first><last>Giachin</last></author>
       <author id="lawrence-r-rabiner"><first>L. R.</first><last>Rabiner</last></author>
       <url hash="de8e23bb">H90-1061</url>
@@ -492,7 +492,7 @@
     </paper>
     <paper id="62">
       <title>Improved Acoustic Modeling for Continuous Speech Recognition</title>
-      <author><first>C.-H.</first><last>Lee</last></author>
+      <author id="c-h-lee"><first>C.-H.</first><last>Lee</last></author>
       <author id="egidio-giachin"><first>E.</first><last>Giachin</last></author>
       <author id="lawrence-r-rabiner"><first>L. R.</first><last>Rabiner</last></author>
       <author id="roberto-pieraccini"><first>R.</first><last>Pieraccini</last></author>
@@ -502,12 +502,12 @@
     </paper>
     <paper id="63">
       <title>Improved Hidden <fixed-case>M</fixed-case>arkov Modeling for Speaker-Independent Continuous Speech Recognition</title>
-      <author><first>Xuedong</first><last>Huang</last></author>
-      <author><first>Fil</first><last>Alleva</last></author>
+      <author id="xuedong-huang"><first>Xuedong</first><last>Huang</last></author>
+      <author id="fil-alleva"><first>Fil</first><last>Alleva</last></author>
       <author><first>Satoru</first><last>Hayamizu</last></author>
-      <author><first>Hsiao-Wuen</first><last>Hon</last></author>
-      <author><first>Mei-Yuh</first><last>Hwang</last></author>
-      <author><first>Kai-Fu</first><last>Lee</last></author>
+      <author id="hsiao-wuen-hon"><first>Hsiao-Wuen</first><last>Hon</last></author>
+      <author id="mei-yuh-hwang"><first>Mei-Yuh</first><last>Hwang</last></author>
+      <author id="kai-fu-lee"><first>Kai-Fu</first><last>Lee</last></author>
       <url hash="b7bc0d59">H90-1063</url>
       <bibkey>huang-etal-1990-improved</bibkey>
     </paper>
@@ -519,8 +519,8 @@
     </paper>
     <paper id="65">
       <title>Training Set Issues in <fixed-case>SRI</fixed-case>’s <fixed-case>DECIPHER</fixed-case> Speech Recognition System</title>
-      <author><first>Hy</first><last>Murveit</last></author>
-      <author><first>Mitch</first><last>Weintraub</last></author>
+      <author id="hy-murveit"><first>Hy</first><last>Murveit</last></author>
+      <author id="mitch-weintraub"><first>Mitch</first><last>Weintraub</last></author>
       <author><first>Mike</first><last>Cohen</last></author>
       <url hash="c4a9f42f">H90-1065</url>
       <bibkey>murveit-etal-1990-training</bibkey>
@@ -537,35 +537,35 @@
       <author><first>Mark T.</first><last>Anikst</last></author>
       <author><first>William S.</first><last>Meisel</last></author>
       <author><first>Matthew C.</first><last>Soares</last></author>
-      <author><first>Kai-Fu</first><last>Lee</last></author>
+      <author id="kai-fu-lee"><first>Kai-Fu</first><last>Lee</last></author>
       <url hash="ce90ca40">H90-1067</url>
       <bibkey>anikst-etal-1990-experiments</bibkey>
     </paper>
     <paper id="68">
       <title>Session 11: Natural Language <fixed-case>II</fixed-case></title>
-      <author><first>Deborah A.</first><last>Dahl</last></author>
+      <author id="deborah-a-dahl"><first>Deborah A.</first><last>Dahl</last></author>
       <url hash="56618306">H90-1068</url>
       <bibkey>dahl-1990-session</bibkey>
     </paper>
     <paper id="69">
       <title>Towards Understanding Text with a Very Large Vocabulary</title>
-      <author><first>Damaris</first><last>Ayuso</last></author>
+      <author id="damaris-ayuso"><first>Damaris</first><last>Ayuso</last></author>
       <author id="robert-bobrow"><first>R.</first><last>Bobrow</last></author>
       <author><first>Dawn</first><last>MacLaughlin</last></author>
-      <author><first>Marie</first><last>Meteer</last></author>
-      <author><first>Lance</first><last>Ramshaw</last></author>
-      <author><first>Rich</first><last>Schwartz</last></author>
-      <author><first>Ralph</first><last>Weischedel</last></author>
+      <author id="marie-meteer"><first>Marie</first><last>Meteer</last></author>
+      <author id="lance-ramshaw"><first>Lance</first><last>Ramshaw</last></author>
+      <author id="richard-schwartz"><first>Rich</first><last>Schwartz</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
       <url hash="6276df6a">H90-1069</url>
       <bibkey>ayuso-etal-1990-towards</bibkey>
     </paper>
     <paper id="70">
       <title>Generic Text Processing: A Progress Report</title>
-      <author><first>Paul S.</first><last>Jacobs</last></author>
-      <author><first>George R.</first><last>Krupka</last></author>
-      <author><first>Susan W.</first><last>McRoy</last></author>
-      <author><first>Lisa F.</first><last>Rau</last></author>
-      <author><first>Norman K.</first><last>Sondheimer</last></author>
+      <author id="paul-s-jacobs"><first>Paul S.</first><last>Jacobs</last></author>
+      <author id="george-krupka"><first>George R.</first><last>Krupka</last></author>
+      <author id="susan-w-mcroy"><first>Susan W.</first><last>McRoy</last></author>
+      <author id="lisa-rau"><first>Lisa F.</first><last>Rau</last></author>
+      <author id="norman-k-sondheimer"><first>Norman K.</first><last>Sondheimer</last></author>
       <author><first>Uri</first><last>Zernik</last></author>
       <url hash="b059391f">H90-1070</url>
       <bibkey>jacobs-etal-1990-generic</bibkey>
@@ -578,35 +578,35 @@
     </paper>
     <paper id="72">
       <title>Machine Translation Again?</title>
-      <author><first>Yorick</first><last>Wilks</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
-      <author><first>David</first><last>Farwell</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
+      <author id="david-farwell"><first>David</first><last>Farwell</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <url hash="9784196f">H90-1072</url>
       <bibkey>wilks-etal-1990-machine</bibkey>
     </paper>
     <paper id="73">
       <title>Session 12: Speech Recognition <fixed-case>II</fixed-case></title>
-      <author><first>Jordan</first><last>Cohen</last></author>
+      <author id="jordan-cohen"><first>Jordan</first><last>Cohen</last></author>
       <url hash="22bf9adf">H90-1073</url>
       <bibkey>cohen-1990-session</bibkey>
     </paper>
     <paper id="74">
       <title>Recent Progress on the <fixed-case>SUMMIT</fixed-case> System</title>
-      <author><first>Victor</first><last>Zue</last></author>
-      <author><first>James</first><last>Glass</last></author>
-      <author><first>David</first><last>Goodine</last></author>
-      <author><first>Hong</first><last>Leung</last></author>
-      <author><first>Michael</first><last>Phillips</last></author>
-      <author><first>Joseph</first><last>Polifroni</last></author>
-      <author><first>Stephanie</first><last>Seneff</last></author>
+      <author id="victor-zue"><first>Victor</first><last>Zue</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
+      <author id="david-goodine"><first>David</first><last>Goodine</last></author>
+      <author id="hong-leung"><first>Hong</first><last>Leung</last></author>
+      <author id="michael-phillips"><first>Michael</first><last>Phillips</last></author>
+      <author id="joseph-polifroni"><first>Joseph</first><last>Polifroni</last></author>
+      <author id="stephanie-seneff"><first>Stephanie</first><last>Seneff</last></author>
       <url hash="5e56bc4d">H90-1074</url>
       <bibkey>zue-etal-1990-recent-progress</bibkey>
     </paper>
     <paper id="75">
       <title>Spoken Letter Recognition</title>
-      <author><first>Ronald</first><last>Cole</last></author>
+      <author id="ronald-cole"><first>Ronald</first><last>Cole</last></author>
       <author><first>Mark</first><last>Fanty</last></author>
       <url hash="26b125b9">H90-1075</url>
       <bibkey>cole-fanty-1990-spoken</bibkey>
@@ -623,41 +623,41 @@
     </paper>
     <paper id="77">
       <title>Recent Results from the <fixed-case>ARM</fixed-case> Continuous Speech Recognition Project</title>
-      <author><first>Martin</first><last>Russell</last></author>
+      <author id="martin-russell"><first>Martin</first><last>Russell</last></author>
       <author><first>Keith</first><last>Ponting</last></author>
       <url hash="8980c758">H90-1077</url>
       <bibkey>russell-ponting-1990-recent</bibkey>
     </paper>
     <paper id="78">
       <title>Adaptive Natural Language Processing</title>
-      <author><first>Ralph</first><last>Weischedel</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
       <url hash="a94f65cb">H90-1078</url>
       <bibkey>weischedel-1990-adaptive</bibkey>
     </paper>
     <paper id="79">
       <title>Research in Continuous Speech Recognition</title>
-      <author><first>John</first><last>Makhoul</last></author>
-      <author><first>Richard</first><last>Schwartz</last></author>
+      <author id="john-makhoul"><first>John</first><last>Makhoul</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
       <url hash="d61d7387">H90-1079</url>
       <bibkey>makhoul-schwartz-1990-research</bibkey>
     </paper>
     <paper id="80">
       <title>Spoken Language Systems</title>
-      <author><first>John</first><last>Makhoul</last></author>
+      <author id="john-makhoul"><first>John</first><last>Makhoul</last></author>
       <url hash="f62daa2b">H90-1080</url>
       <bibkey>makhoul-1990-spoken</bibkey>
     </paper>
     <paper id="81">
       <title>Evaluating the Use of Prosodic Information in Speech Recognition and Understanding</title>
-      <author><first>Mari</first><last>Ostendorf</last></author>
-      <author><first>Patti</first><last>Price</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
+      <author id="patti-price"><first>Patti</first><last>Price</last></author>
       <url hash="2e6a8d85">H90-1081</url>
       <bibkey>ostendorf-price-1990-evaluating</bibkey>
     </paper>
     <paper id="82">
       <title>Segment-Based Acoustic Models with Multi-level Search Algorithms for Continuous Speech Recognition</title>
-      <author><first>Mari</first><last>Ostendorf</last></author>
-      <author><first>J. Robin</first><last>Rohlicek</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
+      <author id="j-robin-rohlicek"><first>J. Robin</first><last>Rohlicek</last></author>
       <url hash="321fb9a0">H90-1082</url>
       <bibkey>ostendorf-rohlicek-1990-segment</bibkey>
     </paper>
@@ -681,14 +681,14 @@
     </paper>
     <paper id="86">
       <title>Interactive Multimedia Explanation for Equipment Maintenance and Repair</title>
-      <author><first>Kathleen</first><last>McKeown</last></author>
-      <author><first>Steven</first><last>Feiner</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
+      <author id="steven-feiner"><first>Steven</first><last>Feiner</last></author>
       <url hash="8cd6ac70">H90-1086</url>
       <bibkey>mckeown-feiner-1990-interactive-multimedia</bibkey>
     </paper>
     <paper id="87">
       <title>Large Vocabulary Speech Recognition Prototype</title>
-      <author><first>Janet M.</first><last>Baker</last></author>
+      <author id="janet-baker"><first>Janet M.</first><last>Baker</last></author>
       <url hash="0f31b768">H90-1087</url>
       <bibkey>baker-1990-large</bibkey>
     </paper>
@@ -700,38 +700,38 @@
     </paper>
     <paper id="89">
       <title>Robust Speech Recognition Technology Program Summary</title>
-      <author><first>Clifford J.</first><last>Weinstein</last></author>
+      <author id="clifford-j-weinstein"><first>Clifford J.</first><last>Weinstein</last></author>
       <author><first>Douglas B.</first><last>Paul</last></author>
       <url hash="b1087740">H90-1089</url>
       <bibkey>weinstein-paul-1990-robust</bibkey>
     </paper>
     <paper id="90">
       <title>Research and Development for Spoken Language Systems</title>
-      <author><first>Victor W.</first><last>Zue</last></author>
+      <author id="victor-zue"><first>Victor W.</first><last>Zue</last></author>
       <url hash="d6d6314f">H90-1090</url>
       <bibkey>zue-1990-research</bibkey>
     </paper>
     <paper id="91">
       <title><fixed-case>NIST</fixed-case>-<fixed-case>DARPA</fixed-case> Interagency Agreement: <fixed-case>SLS</fixed-case> Program</title>
-      <author><first>David S.</first><last>Pallett</last></author>
+      <author id="david-s-pallett"><first>David S.</first><last>Pallett</last></author>
       <url hash="c6a922f6">H90-1091</url>
       <bibkey>pallett-1990-nist</bibkey>
     </paper>
     <paper id="92">
       <title>Extending the Scope of Text Understanding Systems Evaluation</title>
-      <author><first>Beth</first><last>Sundheim</last></author>
+      <author id="beth-m-sundheim"><first>Beth</first><last>Sundheim</last></author>
       <url hash="ef1b8643">H90-1092</url>
       <bibkey>sundheim-1990-extending</bibkey>
     </paper>
     <paper id="93">
       <title>PROGRESS REPORT: Active Knowledge Structures in Natural Language Understanding</title>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <url hash="c17831d2">H90-1093</url>
       <bibkey>wilks-1990-progress</bibkey>
     </paper>
     <paper id="94">
       <title>Research in Text Processing: Creating Robust and Portable Systems</title>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <url hash="1f24907f">H90-1094</url>
       <bibkey>grishman-1990-research</bibkey>
     </paper>
@@ -743,44 +743,44 @@
     </paper>
     <paper id="96">
       <title>A Real-Time Spoken-Language System Interactive Problem-Solving</title>
-      <author><first>Patti</first><last>Price</last></author>
-      <author><first>Robert C.</first><last>Moore</last></author>
+      <author id="patti-price"><first>Patti</first><last>Price</last></author>
+      <author id="robert-c-moore"><first>Robert C.</first><last>Moore</last></author>
       <url hash="d5df4426">H90-1096</url>
       <bibkey>price-moore-1990-real</bibkey>
     </paper>
     <paper id="97">
       <title>Real-Time Speech Recognition Systems</title>
-      <author><first>Hy</first><last>Murveit</last></author>
-      <author><first>Mitch</first><last>Weintraub</last></author>
+      <author id="hy-murveit"><first>Hy</first><last>Murveit</last></author>
+      <author id="mitch-weintraub"><first>Mitch</first><last>Weintraub</last></author>
       <url hash="be73e4da">H90-1097</url>
       <bibkey>murveit-weintraub-1990-real</bibkey>
     </paper>
     <paper id="98">
       <title>Project Summary: Linguistic Knowledge Sources for Spoken Language Understanding</title>
-      <author><first>Lynette</first><last>Hirschman</last></author>
-      <author><first>Deborah</first><last>Dahl</last></author>
+      <author id="lynette-hirschman"><first>Lynette</first><last>Hirschman</last></author>
+      <author id="deborah-a-dahl"><first>Deborah</first><last>Dahl</last></author>
       <url hash="40e9cebd">H90-1098</url>
       <bibkey>hirschman-dahl-1990-project</bibkey>
     </paper>
     <paper id="99">
       <title>Natural Language Research</title>
-      <author><first>Aravind</first><last>Joshi</last></author>
-      <author><first>Mitch</first><last>Marcus</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
+      <author id="mitch-marcus"><first>Mitch</first><last>Marcus</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <url hash="2e2e74fe">H90-1099</url>
       <bibkey>joshi-etal-1990-natural</bibkey>
     </paper>
     <paper id="100">
       <title>Very Large Annotated Database of <fixed-case>A</fixed-case>merican <fixed-case>E</fixed-case>nglish</title>
-      <author><first>Mitch</first><last>Marcus</last></author>
+      <author id="mitch-marcus"><first>Mitch</first><last>Marcus</last></author>
       <url hash="62175baa">H90-1100</url>
       <bibkey>marcus-1990-large</bibkey>
     </paper>
     <paper id="101">
       <title>Natural Language, Knowledge Representation, and Discourse</title>
-      <author><first>James F.</first><last>Allen</last></author>
-      <author><first>Lenhart K.</first><last>Schubert</last></author>
+      <author id="james-allen"><first>James F.</first><last>Allen</last></author>
+      <author id="lenhart-schubert"><first>Lenhart K.</first><last>Schubert</last></author>
       <url hash="744f89e1">H90-1101</url>
       <bibkey>allen-schubert-1990-natural</bibkey>
     </paper>
@@ -791,7 +791,7 @@
     </paper>
     <paper id="103">
       <title>Opportunities for Advanced Speech Processing in Military Computer-Based Systems*</title>
-      <author><first>Clifford J.</first><last>Weinstein</last></author>
+      <author id="clifford-j-weinstein"><first>Clifford J.</first><last>Weinstein</last></author>
       <url hash="a0415d7b">H90-1103</url>
       <bibkey>weinstein-1990-opportunities</bibkey>
     </paper>
diff --git a/data/xml/H91.xml b/data/xml/H91.xml
index 26c1e31a4d..56ed944aca 100644
--- a/data/xml/H91.xml
+++ b/data/xml/H91.xml
@@ -12,20 +12,20 @@
     </frontmatter>
     <paper id="1">
       <title>Overview of the Fourth <fixed-case>DARPA</fixed-case> Speech and Natural Language Workshop</title>
-      <author><first>Patti</first><last>Price</last></author>
+      <author id="patti-price"><first>Patti</first><last>Price</last></author>
       <url hash="70466e2e">H91-1001</url>
       <bibkey>price-1991-overview</bibkey>
     </paper>
     <paper id="2">
       <title>Session 1: Speech and Natural Language Efforts in the <fixed-case>U. S.</fixed-case> and Abroad</title>
-      <author><first>Mark Y.</first><last>Liberman</last></author>
-      <author><first>Patti</first><last>Price</last></author>
+      <author id="mark-liberman"><first>Mark Y.</first><last>Liberman</last></author>
+      <author id="patti-price"><first>Patti</first><last>Price</last></author>
       <url hash="34262baa">H91-1002</url>
       <bibkey>liberman-price-1991-session</bibkey>
     </paper>
     <paper id="3">
       <title>The <fixed-case>ESPRIT</fixed-case> Project <fixed-case>POLYGLOT</fixed-case></title>
-      <author><first>Louis</first><last>Boves</last></author>
+      <author id="lou-boves"><first>Louis</first><last>Boves</last></author>
       <url hash="5a658387">H91-1003</url>
       <bibkey>boves-1991-esprit</bibkey>
     </paper>
@@ -54,20 +54,20 @@
     </paper>
     <paper id="8">
       <title>Session 2: <fixed-case>DARPA</fixed-case> Resource Management and <fixed-case>ATIS</fixed-case> Benchmark Test Poster Session</title>
-      <author><first>David S.</first><last>Pallett</last></author>
+      <author id="david-s-pallett"><first>David S.</first><last>Pallett</last></author>
       <url hash="632d2f44">H91-1008</url>
       <bibkey>pallett-1991-session</bibkey>
     </paper>
     <paper id="9">
       <title><fixed-case>D</fixed-case>ragon <fixed-case>S</fixed-case>ystems Resource Management Benchmark Results <fixed-case>F</fixed-case>ebruary 1991</title>
-      <author><first>James</first><last>Baker</last></author>
-      <author><first>Janet</first><last>Baker</last></author>
+      <author id="james-baker"><first>James</first><last>Baker</last></author>
+      <author id="janet-baker"><first>Janet</first><last>Baker</last></author>
       <author><first>Pard</first><last>Bamberg</last></author>
       <author><first>Larry</first><last>Gillick</last></author>
-      <author><first>Lori</first><last>Lamel</last></author>
+      <author id="lori-lamel"><first>Lori</first><last>Lamel</last></author>
       <author><first>Robert</first><last>Roth</last></author>
       <author><first>Francesco</first><last>Scattone</last></author>
-      <author><first>Dean</first><last>Sturtevant</last></author>
+      <author id="dean-sturtevant"><first>Dean</first><last>Sturtevant</last></author>
       <author><first>Ousmane</first><last>Ba</last></author>
       <author><first>Richard</first><last>Benedict</last></author>
       <url hash="f6ebffa2">H91-1009</url>
@@ -81,9 +81,9 @@
     </paper>
     <paper id="11">
       <title>Modelling Context Dependency in Acoustic-Phonetic and Lexical Representations</title>
-      <author><first>Michael</first><last>Phillips</last></author>
-      <author><first>James</first><last>Glass</last></author>
-      <author><first>Victor</first><last>Zue</last></author>
+      <author id="michael-phillips"><first>Michael</first><last>Phillips</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
+      <author id="victor-zue"><first>Victor</first><last>Zue</last></author>
       <url hash="38cd5729">H91-1011</url>
       <bibkey>phillips-etal-1991-modelling</bibkey>
     </paper>
@@ -111,35 +111,35 @@
     </paper>
     <paper id="14">
       <title>Development and Preliminary Evaluation of the <fixed-case>MIT</fixed-case> <fixed-case>ATIS</fixed-case> System</title>
-      <author><first>Stephanie</first><last>Seneff</last></author>
-      <author><first>James</first><last>Glass</last></author>
+      <author id="stephanie-seneff"><first>Stephanie</first><last>Seneff</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
       <author><first>David</first><last>Goddeau</last></author>
-      <author><first>David</first><last>Goodine</last></author>
-      <author><first>Lynette</first><last>Hirschman</last></author>
-      <author><first>Hong</first><last>Leung</last></author>
-      <author><first>Michael</first><last>Phillips</last></author>
-      <author><first>Joseph</first><last>Polifroni</last></author>
-      <author><first>Victor</first><last>Zue</last></author>
+      <author id="david-goodine"><first>David</first><last>Goodine</last></author>
+      <author id="lynette-hirschman"><first>Lynette</first><last>Hirschman</last></author>
+      <author id="hong-leung"><first>Hong</first><last>Leung</last></author>
+      <author id="michael-phillips"><first>Michael</first><last>Phillips</last></author>
+      <author id="joseph-polifroni"><first>Joseph</first><last>Polifroni</last></author>
+      <author id="victor-zue"><first>Victor</first><last>Zue</last></author>
       <url hash="7c112aa1">H91-1014</url>
       <bibkey>seneff-etal-1991-development</bibkey>
     </paper>
     <paper id="15">
       <title>Speech Recognition in <fixed-case>SRI</fixed-case>’s Resource Management and <fixed-case>ATIS</fixed-case> Systems</title>
-      <author><first>Hy</first><last>Murveit</last></author>
+      <author id="hy-murveit"><first>Hy</first><last>Murveit</last></author>
       <author><first>John</first><last>Butzberger</last></author>
-      <author><first>Mitch</first><last>Weintraub</last></author>
+      <author id="mitch-weintraub"><first>Mitch</first><last>Weintraub</last></author>
       <url hash="9c3e6ed8">H91-1015</url>
       <bibkey>murveit-etal-1991-speech</bibkey>
     </paper>
     <paper id="16">
       <title>Evaluation of the <fixed-case>CMU</fixed-case> <fixed-case>ATIS</fixed-case> System</title>
-      <author><first>Wayne</first><last>Ward</last></author>
+      <author id="wayne-ward"><first>Wayne</first><last>Ward</last></author>
       <url hash="767ad1d9">H91-1016</url>
       <bibkey>ward-1991-evaluation</bibkey>
     </paper>
     <paper id="17">
       <title>Using Semantics to Correct Parser Output for <fixed-case>ATIS</fixed-case> Utterances</title>
-      <author><first>Sheryl</first><last>Young</last></author>
+      <author id="sheryl-young"><first>Sheryl</first><last>Young</last></author>
       <url hash="573d0fec">H91-1017</url>
       <bibkey>young-1991-using</bibkey>
     </paper>
@@ -159,13 +159,13 @@
     </paper>
     <paper id="19">
       <title>A Textual processor to handle <fixed-case>ATIS</fixed-case> queries</title>
-      <author><first>Douglas</first><last>O’Shaughnessy</last></author>
+      <author id="douglas-oshaughnessy"><first>Douglas</first><last>O’Shaughnessy</last></author>
       <url hash="c44b7d20">H91-1019</url>
       <bibkey>oshaughnessy-1991-textual</bibkey>
     </paper>
     <paper id="20">
       <title>Stochastic Representation of Conceptual Structure in the <fixed-case>ATIS</fixed-case> Task</title>
-      <author><first>Roberto</first><last>Pieraccini</last></author>
+      <author id="roberto-pieraccini"><first>Roberto</first><last>Pieraccini</last></author>
       <author><first>Esther</first><last>Levin</last></author>
       <author><first>Chin-Hui</first><last>Lee</last></author>
       <url hash="9042bdff">H91-1020</url>
@@ -173,45 +173,45 @@
     </paper>
     <paper id="21">
       <title>Augmented Role Filling Capabilities for Semantic Interpretation of Spoken Language</title>
-      <author><first>Lewis</first><last>Norton</last></author>
-      <author><first>Marcia</first><last>Linebarger</last></author>
-      <author><first>Deborah</first><last>Dahl</last></author>
+      <author id="lewis-m-norton"><first>Lewis</first><last>Norton</last></author>
+      <author id="marcia-c-linebarger"><first>Marcia</first><last>Linebarger</last></author>
+      <author id="deborah-a-dahl"><first>Deborah</first><last>Dahl</last></author>
       <author><first>Nghi</first><last>Nguyen</last></author>
       <url hash="ede41e3f">H91-1021</url>
       <bibkey>norton-etal-1991-augmented</bibkey>
     </paper>
     <paper id="22">
       <title>The Use of a Commercial Natural Language Interface in the <fixed-case>ATIS</fixed-case> Task</title>
-      <author><first>Evelyne</first><last>Tzoukermann</last></author>
+      <author id="evelyne-tzoukermann"><first>Evelyne</first><last>Tzoukermann</last></author>
       <url hash="012cd4a3">H91-1022</url>
       <bibkey>tzoukermann-1991-use</bibkey>
     </paper>
     <paper id="23">
       <title>Session 3: Machine Translation</title>
-      <author><first>Jaime</first><last>Carbonell</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
       <url hash="0f85d800">H91-1023</url>
       <bibkey>carbonell-1991-session</bibkey>
     </paper>
     <paper id="24">
       <title>Machine Translation Using Abductive Inference</title>
-      <author><first>Jerry R.</first><last>Hobbs</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry R.</first><last>Hobbs</last></author>
       <author><first>Megumi</first><last>Kameyama</last></author>
       <url hash="deb2e50e">H91-1024</url>
       <bibkey>hobbs-kameyama-1991-machine</bibkey>
     </paper>
     <paper id="25">
       <title>A Statistical Approach to Sense Disambiguation in Machine Translation</title>
-      <author><first>Peter F.</first><last>Brown</last></author>
-      <author><first>Stephen A.</first><last>Della Pietra</last></author>
-      <author><first>Vincent J.</first><last>Della Pietra</last></author>
-      <author><first>Robert L.</first><last>Mercer</last></author>
+      <author id="peter-f-brown"><first>Peter F.</first><last>Brown</last></author>
+      <author id="stephen-a-della-pietra"><first>Stephen A.</first><last>Della Pietra</last></author>
+      <author id="vincent-j-della-pietra"><first>Vincent J.</first><last>Della Pietra</last></author>
+      <author id="robert-l-mercer"><first>Robert L.</first><last>Mercer</last></author>
       <url hash="c1949fff">H91-1025</url>
       <bibkey>brown-etal-1991-statistical</bibkey>
     </paper>
     <paper id="26">
       <title>Identifying Word Correspondences in Parallel Texts</title>
-      <author><first>William A.</first><last>Gale</last></author>
-      <author><first>Kenneth W.</first><last>Church</last></author>
+      <author id="william-a-gale"><first>William A.</first><last>Gale</last></author>
+      <author id="kenneth-church"><first>Kenneth W.</first><last>Church</last></author>
       <url hash="ed4a295c">H91-1026</url>
       <bibkey>gale-church-1991-identifying</bibkey>
     </paper>
@@ -244,64 +244,64 @@
     </paper>
     <paper id="31">
       <title>Signal Representation Attribute Extraction and the Use Distinctive Features for Phonetic Classification</title>
-      <author><first>Helen M.</first><last>Meng</last></author>
-      <author><first>Victor W.</first><last>Zue</last></author>
-      <author><first>Hong C.</first><last>Leung</last></author>
+      <author id="helen-meng"><first>Helen M.</first><last>Meng</last></author>
+      <author id="victor-zue"><first>Victor W.</first><last>Zue</last></author>
+      <author id="hong-leung"><first>Hong C.</first><last>Leung</last></author>
       <url hash="25d399fe">H91-1031</url>
       <bibkey>meng-etal-1991-signal</bibkey>
     </paper>
     <paper id="32">
       <title>Session 5: Natural Language <fixed-case>I</fixed-case></title>
-      <author><first>James F.</first><last>Allen</last></author>
+      <author id="james-allen"><first>James F.</first><last>Allen</last></author>
       <url hash="2f420bfb">H91-1032</url>
       <bibkey>allen-1991-session</bibkey>
     </paper>
     <paper id="33">
       <title>The Mapping Unit Approach to Subcategorization</title>
-      <author><first>Robert</first><last>Bobrow</last></author>
-      <author><first>Robert</first><last>Ingria</last></author>
-      <author><first>David</first><last>Stallard</last></author>
+      <author id="robert-bobrow"><first>Robert</first><last>Bobrow</last></author>
+      <author id="robert-ingria"><first>Robert</first><last>Ingria</last></author>
+      <author id="david-stallard"><first>David</first><last>Stallard</last></author>
       <url hash="bbdfe64f">H91-1033</url>
       <bibkey>bobrow-etal-1991-mapping</bibkey>
     </paper>
     <paper id="34">
       <title>A Template Matcher for Robust <fixed-case>NL</fixed-case> Interpretation</title>
-      <author><first>Eric</first><last>Jackson</last></author>
-      <author><first>Douglas</first><last>Appelt</last></author>
-      <author><first>John</first><last>Bear</last></author>
-      <author><first>Robert</first><last>Moore</last></author>
+      <author id="eric-jackson"><first>Eric</first><last>Jackson</last></author>
+      <author id="douglas-appelt"><first>Douglas</first><last>Appelt</last></author>
+      <author id="john-bear"><first>John</first><last>Bear</last></author>
+      <author id="robert-c-moore"><first>Robert</first><last>Moore</last></author>
       <author><first>Ann</first><last>Podlozny</last></author>
       <url hash="87722b6b">H91-1034</url>
       <bibkey>jackson-etal-1991-template</bibkey>
     </paper>
     <paper id="35">
       <title>Fixed and Flexible Phrase Structure: Coordination in <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars</title>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
       <author><first>Yves</first><last>Schabes</last></author>
       <url hash="187cfc57">H91-1035</url>
       <bibkey>joshi-schabes-1991-fixed</bibkey>
     </paper>
     <paper id="36">
       <title>Efficient Bottom-Up Parsing</title>
-      <author><first>Robert</first><last>Moore</last></author>
-      <author><first>John</first><last>Dowding</last></author>
+      <author id="robert-c-moore"><first>Robert</first><last>Moore</last></author>
+      <author id="john-dowding"><first>John</first><last>Dowding</last></author>
       <url hash="9922b7a9">H91-1036</url>
       <bibkey>moore-dowding-1991-efficient</bibkey>
     </paper>
     <paper id="37">
       <title>Partial Parsing: A Report on Work in Progress</title>
-      <author><first>Ralph</first><last>Weischedel</last></author>
-      <author><first>Damaris</first><last>Ayuso</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
+      <author id="damaris-ayuso"><first>Damaris</first><last>Ayuso</last></author>
       <author id="robert-bobrow"><first>R.</first><last>Bobrow</last></author>
-      <author><first>Sean</first><last>Boisen</last></author>
-      <author><first>Robert</first><last>Ingria</last></author>
+      <author id="sean-boisen"><first>Sean</first><last>Boisen</last></author>
+      <author id="robert-ingria"><first>Robert</first><last>Ingria</last></author>
       <author><first>Jeff</first><last>Palmucci</last></author>
       <url hash="fb60e702">H91-1037</url>
       <bibkey>weischedel-etal-1991-partial</bibkey>
     </paper>
     <paper id="38">
       <title>Session 6: Demonstrations and Videotapes of Speech and Natural Language Technologies</title>
-      <author><first>Mari</first><last>Ostendorf</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
       <url hash="9037289f">H91-1038</url>
       <bibkey>ostendorf-1991-session</bibkey>
     </paper>
@@ -313,7 +313,7 @@
     </paper>
     <paper id="40">
       <title>Using Spoken Language to Facilitate Military Transportation Planning</title>
-      <author><first>Madeleine</first><last>Bates</last></author>
+      <author id="madeleine-bates"><first>Madeleine</first><last>Bates</last></author>
       <author><first>Dan</first><last>Ellard</last></author>
       <author><first>Pat</first><last>Peterson</last></author>
       <author><first>Varda</first><last>Shaked</last></author>
@@ -322,27 +322,27 @@
     </paper>
     <paper id="41">
       <title>Session 7: Natural Language <fixed-case>II</fixed-case></title>
-      <author><first>Salim</first><last>Roukos</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
       <url hash="7e8d3f3b">H91-1041</url>
       <bibkey>roukos-1991-session</bibkey>
     </paper>
     <paper id="42">
       <title>Statistical Agenda Parsing</title>
-      <author><first>Robert J.</first><last>Bobrow</last></author>
+      <author id="robert-bobrow"><first>Robert J.</first><last>Bobrow</last></author>
       <url hash="b415dbe3">H91-1042</url>
       <bibkey>bobrow-1991-statistical</bibkey>
     </paper>
     <paper id="43">
       <title>Some Results on Stochastic Language Modelling</title>
-      <author><first>Renato</first><last>De Mori</last></author>
+      <author id="renato-de-mori"><first>Renato</first><last>De Mori</last></author>
       <author><first>Roland</first><last>Kuhn</last></author>
       <url hash="8fa61b86">H91-1043</url>
       <bibkey>de-mori-kuhn-1991-results</bibkey>
     </paper>
     <paper id="44">
       <title>Parsing the <fixed-case>V</fixed-case>oyager Domain Using <fixed-case>P</fixed-case>earl</title>
-      <author><first>David M.</first><last>Magerman</last></author>
-      <author><first>Mitchell P.</first><last>Marcus</last></author>
+      <author id="david-m-magerman"><first>David M.</first><last>Magerman</last></author>
+      <author id="mitch-marcus"><first>Mitchell P.</first><last>Marcus</last></author>
       <url hash="80d5b55c">H91-1044</url>
       <bibkey>magerman-marcus-1991-parsing</bibkey>
     </paper>
@@ -361,7 +361,7 @@
     </paper>
     <paper id="47">
       <title>Session 8: Speech <fixed-case>II</fixed-case></title>
-      <author><first>Kai-Fu</first><last>Lee</last></author>
+      <author id="kai-fu-lee"><first>Kai-Fu</first><last>Lee</last></author>
       <url hash="1865128b">H91-1047</url>
       <bibkey>lee-1991-session</bibkey>
     </paper>
@@ -384,8 +384,8 @@
     </paper>
     <paper id="50">
       <title>Recent Progress in Robust Vocabulary-Independent Speech Recognition</title>
-      <author><first>Hsiao-Wuen</first><last>Hon</last></author>
-      <author><first>Kai-Fu</first><last>Lee</last></author>
+      <author id="hsiao-wuen-hon"><first>Hsiao-Wuen</first><last>Hon</last></author>
+      <author id="kai-fu-lee"><first>Kai-Fu</first><last>Lee</last></author>
       <url hash="f7b2e0b0">H91-1050</url>
       <bibkey>hon-lee-1991-recent</bibkey>
     </paper>
@@ -395,13 +395,13 @@
       <author id="peter-v-desouza"><first>P.V.</first><last>de Souza</last></author>
       <author><first>P.S.</first><last>Gopalakrishnan</last></author>
       <author><first>D.</first><last>Nahamoo</last></author>
-      <author><first>M.A.</first><last>Picheny</last></author>
+      <author id="m-a-picheny"><first>M.A.</first><last>Picheny</last></author>
       <url hash="98ec4b57">H91-1051</url>
       <bibkey>bahl-etal-1991-context</bibkey>
     </paper>
     <paper id="52">
       <title>Session 9: Speech <fixed-case>III</fixed-case></title>
-      <author><first>Francis</first><last>Kubala</last></author>
+      <author id="francis-kubala"><first>Francis</first><last>Kubala</last></author>
       <url hash="36b31e07">H91-1052</url>
       <bibkey>kubala-1991-session</bibkey>
     </paper>
@@ -426,8 +426,8 @@
     </paper>
     <paper id="56">
       <title>Lexical Access With a Statistically-Derived Phonetic Network</title>
-      <author><first>Michael D.</first><last>Riley</last></author>
-      <author><first>Andrej</first><last>Ljolje</last></author>
+      <author id="michael-riley"><first>Michael D.</first><last>Riley</last></author>
+      <author id="andrej-ljolje"><first>Andrej</first><last>Ljolje</last></author>
       <url hash="0f4b3cc5">H91-1056</url>
       <bibkey>riley-ljolje-1991-lexical</bibkey>
     </paper>
@@ -442,13 +442,13 @@
     </paper>
     <paper id="58">
       <title>Session 10: Corpora and Evaluation</title>
-      <author><first>Clifford J.</first><last>Weinstein</last></author>
+      <author id="clifford-j-weinstein"><first>Clifford J.</first><last>Weinstein</last></author>
       <url hash="5824b453">H91-1058</url>
       <bibkey>weinstein-1991-session</bibkey>
     </paper>
     <paper id="59">
       <title><fixed-case>T</fixed-case>hird <fixed-case>M</fixed-case>essage <fixed-case>U</fixed-case>nderstanding <fixed-case>E</fixed-case>valuation and <fixed-case>C</fixed-case>onference (<fixed-case>MUC</fixed-case>-3): Phase 1 Status Report</title>
-      <author><first>Beth M.</first><last>Sundheim</last></author>
+      <author id="beth-m-sundheim"><first>Beth M.</first><last>Sundheim</last></author>
       <url hash="73f6481a">H91-1059</url>
       <bibkey>sundheim-1991-third</bibkey>
     </paper>
@@ -474,55 +474,55 @@
     </paper>
     <paper id="61">
       <title>Evaluating Text Categorization <fixed-case>I</fixed-case></title>
-      <author><first>David D.</first><last>Lewis</last></author>
+      <author id="david-d-lewis"><first>David D.</first><last>Lewis</last></author>
       <url hash="cfba599d">H91-1061</url>
       <bibkey>lewis-1991-evaluating</bibkey>
     </paper>
     <paper id="62">
       <title>A Proposal for Incremental Dialogue Evaluation</title>
-      <author><first>Madeleine</first><last>Bates</last></author>
-      <author><first>Damaris</first><last>Ayuso</last></author>
+      <author id="madeleine-bates"><first>Madeleine</first><last>Bates</last></author>
+      <author id="damaris-ayuso"><first>Damaris</first><last>Ayuso</last></author>
       <url hash="c4e52eb6">H91-1062</url>
       <bibkey>bates-ayuso-1991-proposal</bibkey>
     </paper>
     <paper id="63">
       <title>Session 11 - Natural Language <fixed-case>III</fixed-case></title>
-      <author><first>Mitch</first><last>Marcus</last></author>
+      <author id="mitch-marcus"><first>Mitch</first><last>Marcus</last></author>
       <url hash="9d62f00b">H91-1063</url>
       <bibkey>marcus-1991-session</bibkey>
     </paper>
     <paper id="64">
       <title>Discourse Structure in the <fixed-case>TRAINS</fixed-case> Project</title>
-      <author><first>James F.</first><last>Allen</last></author>
+      <author id="james-allen"><first>James F.</first><last>Allen</last></author>
       <url hash="394f52ed">H91-1064</url>
       <bibkey>allen-1991-discourse</bibkey>
     </paper>
     <paper id="65">
       <title>Studies in Part of Speech Labelling</title>
-      <author><first>Marie</first><last>Meteer</last></author>
-      <author><first>Richard</first><last>Schwartz</last></author>
-      <author><first>Ralph</first><last>Weischedel</last></author>
+      <author id="marie-meteer"><first>Marie</first><last>Meteer</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
       <url hash="9d60c514">H91-1065</url>
       <bibkey>meteer-etal-1991-studies</bibkey>
     </paper>
     <paper id="66">
       <title>Lexico-Semantic Pattern Matching as a Companion to Parsing in Text Understanding</title>
-      <author><first>Paul S.</first><last>Jacobs</last></author>
-      <author><first>George R.</first><last>Krupka</last></author>
-      <author><first>Lisa F.</first><last>Rau</last></author>
+      <author id="paul-s-jacobs"><first>Paul S.</first><last>Jacobs</last></author>
+      <author id="george-krupka"><first>George R.</first><last>Krupka</last></author>
+      <author id="lisa-rau"><first>Lisa F.</first><last>Rau</last></author>
       <url hash="e1c56f8b">H91-1066</url>
       <bibkey>jacobs-etal-1991-lexico</bibkey>
     </paper>
     <paper id="67">
       <title>Automatic Acquisition of Subcategorization Frames from Tagged Text</title>
       <author><first>Michael R.</first><last>Brent</last></author>
-      <author><first>Robert C.</first><last>Berwick</last></author>
+      <author id="robert-c-berwick"><first>Robert C.</first><last>Berwick</last></author>
       <url hash="dc5948ad">H91-1067</url>
       <bibkey>brent-berwick-1991-automatic</bibkey>
     </paper>
     <paper id="68">
       <title>Fast Text Processing for Information Retrieval</title>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
       <author><first>Barbara</first><last>Vauthey</last></author>
       <url hash="91b4d4a5">H91-1068</url>
       <bibkey>strzalkowski-vauthey-1991-fast</bibkey>
@@ -535,42 +535,42 @@
     </paper>
     <paper id="70">
       <title>Interactive Problem Solving and Dialogue in the <fixed-case>ATIS</fixed-case> Domain</title>
-      <author><first>Stephanie</first><last>Seneff</last></author>
-      <author><first>Lynette</first><last>Hirschman</last></author>
-      <author><first>Victor W.</first><last>Zue</last></author>
+      <author id="stephanie-seneff"><first>Stephanie</first><last>Seneff</last></author>
+      <author id="lynette-hirschman"><first>Lynette</first><last>Hirschman</last></author>
+      <author id="victor-zue"><first>Victor W.</first><last>Zue</last></author>
       <url hash="07a336fb">H91-1070</url>
       <bibkey>seneff-etal-1991-interactive</bibkey>
     </paper>
     <paper id="71">
       <title>Collection of Spontaneous Speech for the <fixed-case>ATIS</fixed-case> Domain and Comparative Analyses of Data Collected at <fixed-case>MIT</fixed-case> and <fixed-case>TI</fixed-case></title>
-      <author><first>Joseph</first><last>Polifroni</last></author>
-      <author><first>Stephanie</first><last>Seneff</last></author>
-      <author><first>Victor W.</first><last>Zue</last></author>
+      <author id="joseph-polifroni"><first>Joseph</first><last>Polifroni</last></author>
+      <author id="stephanie-seneff"><first>Stephanie</first><last>Seneff</last></author>
+      <author id="victor-zue"><first>Victor W.</first><last>Zue</last></author>
       <url hash="77c7030c">H91-1071</url>
       <bibkey>polifroni-etal-1991-collection</bibkey>
     </paper>
     <paper id="72">
       <title>Integrating Syntax and Semantics into Spoken Language Understanding</title>
-      <author><first>Lynette</first><last>Hirschman</last></author>
-      <author><first>Stephanie</first><last>Seneff</last></author>
-      <author><first>David</first><last>Goodine</last></author>
-      <author><first>Michael</first><last>Phillips</last></author>
+      <author id="lynette-hirschman"><first>Lynette</first><last>Hirschman</last></author>
+      <author id="stephanie-seneff"><first>Stephanie</first><last>Seneff</last></author>
+      <author id="david-goodine"><first>David</first><last>Goodine</last></author>
+      <author id="michael-phillips"><first>Michael</first><last>Phillips</last></author>
       <url hash="84e03661">H91-1072</url>
       <bibkey>hirschman-etal-1991-integrating</bibkey>
     </paper>
     <paper id="73">
       <title>The Use of Prosody in Syntactic Disambiguation</title>
-      <author><first>Patti</first><last>Price</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
-      <author><first>Stefanie</first><last>Shattuck-Hufnagel</last></author>
+      <author id="patti-price"><first>Patti</first><last>Price</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
+      <author id="stefanie-shattuck-hufnagel"><first>Stefanie</first><last>Shattuck-Hufnagel</last></author>
       <author><first>Cynthia</first><last>Fong</last></author>
       <url hash="a7269a87">H91-1073</url>
       <bibkey>price-etal-1991-use</bibkey>
     </paper>
     <paper id="74">
       <title>Predicting Intonational Boundaries Automatically from Text: The <fixed-case>ATIS</fixed-case> Domain</title>
-      <author><first>Michelle Q.</first><last>Wang</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
+      <author id="michelle-q-wang"><first>Michelle Q.</first><last>Wang</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
       <url hash="ff6a8d9a">H91-1074</url>
       <bibkey>wang-hirschberg-1991-predicting</bibkey>
     </paper>
@@ -602,34 +602,34 @@
     </paper>
     <paper id="79">
       <title>Adaptive Natural Language Processing</title>
-      <author><first>Ralph</first><last>Weischedel</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
       <url hash="4a21ded4">H91-1079</url>
       <bibkey>weischedel-1991-adaptive</bibkey>
     </paper>
     <paper id="80">
       <title>Research in Continuous Speech Recognition</title>
-      <author><first>John</first><last>Makhoul</last></author>
-      <author><first>Richard</first><last>Schwartz</last></author>
+      <author id="john-makhoul"><first>John</first><last>Makhoul</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
       <url hash="ab968ec0">H91-1080</url>
       <bibkey>makhoul-schwartz-1991-research</bibkey>
     </paper>
     <paper id="81">
       <title>Spoken Language Systems</title>
-      <author><first>John</first><last>Makhoul</last></author>
+      <author id="john-makhoul"><first>John</first><last>Makhoul</last></author>
       <url hash="1ca0a354">H91-1081</url>
       <bibkey>makhoul-1991-spoken</bibkey>
     </paper>
     <paper id="82">
       <title>Evaluating the Use of Prosodic Information in Speech Recognition and Understanding</title>
-      <author><first>Mari</first><last>Ostendorf</last></author>
-      <author><first>Patti</first><last>Price</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
+      <author id="patti-price"><first>Patti</first><last>Price</last></author>
       <url hash="9898a6bc">H91-1082</url>
       <bibkey>ostendorf-price-1991-evaluating</bibkey>
     </paper>
     <paper id="83">
       <title>Segment-Based Acoustic Models with Multi-level Search Algorithms for Continuous Speech Recognition</title>
-      <author><first>Mari</first><last>Ostendorf</last></author>
-      <author><first>J. Robin</first><last>Rohlicek</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
+      <author id="j-robin-rohlicek"><first>J. Robin</first><last>Rohlicek</last></author>
       <url hash="f068e8c2">H91-1083</url>
       <bibkey>ostendorf-rohlicek-1991-segment</bibkey>
     </paper>
@@ -653,14 +653,14 @@
     </paper>
     <paper id="87">
       <title>Interactive Multimedia Explanation for Equipment Maintenance and Repair</title>
-      <author><first>Kathleen</first><last>McKeown</last></author>
-      <author><first>Steven</first><last>Feiner</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
+      <author id="steven-feiner"><first>Steven</first><last>Feiner</last></author>
       <url hash="d83a14ad">H91-1087</url>
       <bibkey>mckeown-feiner-1991-interactive</bibkey>
     </paper>
     <paper id="88">
       <title>Progress Report for <fixed-case>DARPA</fixed-case> <fixed-case>SLS</fixed-case> Program at <fixed-case>D</fixed-case>ragon <fixed-case>S</fixed-case>ystems, <fixed-case>I</fixed-case>nc.</title>
-      <author><first>Janet</first><last>Baker</last></author>
+      <author id="janet-baker"><first>Janet</first><last>Baker</last></author>
       <author><first>Larry</first><last>Gillick</last></author>
       <url hash="46c0ce16">H91-1088</url>
       <bibkey>baker-gillick-1991-progress</bibkey>
@@ -673,39 +673,39 @@
     </paper>
     <paper id="90">
       <title>Spoken Language Recognition and Understanding</title>
-      <author><first>Victor W.</first><last>Zue</last></author>
-      <author><first>Lynette</first><last>Hirschman</last></author>
+      <author id="victor-zue"><first>Victor W.</first><last>Zue</last></author>
+      <author id="lynette-hirschman"><first>Lynette</first><last>Hirschman</last></author>
       <url hash="a12c21a5">H91-1090</url>
       <bibkey>zue-hirschman-1991-spoken</bibkey>
     </paper>
     <paper id="91">
       <title>Robust Speech Recognition Technology Program Summary</title>
-      <author><first>Clifford J.</first><last>Weinstein</last></author>
+      <author id="clifford-j-weinstein"><first>Clifford J.</first><last>Weinstein</last></author>
       <author><first>Douglas B.</first><last>Paul</last></author>
       <url hash="230aa085">H91-1091</url>
       <bibkey>weinstein-paul-1991-robust</bibkey>
     </paper>
     <paper id="92">
       <title><fixed-case>NIST</fixed-case>-<fixed-case>DARPA</fixed-case> Interagency Agreement: <fixed-case>SLS</fixed-case> Program</title>
-      <author><first>David S.</first><last>Pallett</last></author>
+      <author id="david-s-pallett"><first>David S.</first><last>Pallett</last></author>
       <url hash="2532e7e1">H91-1092</url>
       <bibkey>pallett-1991-nist</bibkey>
     </paper>
     <paper id="93">
       <title>Evaluating Text Understanding Systems</title>
-      <author><first>Beth M.</first><last>Sundheim</last></author>
+      <author id="beth-m-sundheim"><first>Beth M.</first><last>Sundheim</last></author>
       <url hash="ed5387e5">H91-1093</url>
       <bibkey>sundheim-1991-evaluating</bibkey>
     </paper>
     <paper id="94">
       <title>Active Knowledge Structures in Natural Language Understanding</title>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <url hash="180e1706">H91-1094</url>
       <bibkey>wilks-1991-active</bibkey>
     </paper>
     <paper id="95">
       <title>Robust and Portable Text Processing</title>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <url hash="5654e7eb">H91-1095</url>
       <bibkey>grishman-1991-robust</bibkey>
     </paper>
@@ -723,27 +723,27 @@
     </paper>
     <paper id="98">
       <title>Real-Time Speech Recognition System</title>
-      <author><first>Hy</first><last>Murveit</last></author>
-      <author><first>Mitchel</first><last>Weintraub</last></author>
+      <author id="hy-murveit"><first>Hy</first><last>Murveit</last></author>
+      <author id="mitch-weintraub"><first>Mitchel</first><last>Weintraub</last></author>
       <url hash="aebd521e">H91-1098</url>
       <bibkey>murveit-weintraub-1991-real</bibkey>
     </paper>
     <paper id="99">
       <title><fixed-case>SRI</fixed-case>’s Real-Time Spoken Language System</title>
-      <author><first>Patti</first><last>Price</last></author>
-      <author><first>Robert C.</first><last>Moore</last></author>
+      <author id="patti-price"><first>Patti</first><last>Price</last></author>
+      <author id="robert-c-moore"><first>Robert C.</first><last>Moore</last></author>
       <url hash="8cf7f212">H91-1099</url>
       <bibkey>price-moore-1991-sris</bibkey>
     </paper>
     <paper id="100">
       <title><fixed-case>TACITUS</fixed-case>: The Abductive Commonsense Inference-based Text Understanding System</title>
-      <author><first>Jerry R.</first><last>Hobbs</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry R.</first><last>Hobbs</last></author>
       <url hash="19f5f7d5">H91-1100</url>
       <bibkey>hobbs-1991-tacitus</bibkey>
     </paper>
     <paper id="101">
       <title>Linguistic Knowledge Sources for Spoken Language Understanding</title>
-      <author><first>Deborah A.</first><last>Dahl</last></author>
+      <author id="deborah-a-dahl"><first>Deborah A.</first><last>Dahl</last></author>
       <url hash="4a13f8d5">H91-1101</url>
       <bibkey>dahl-1991-linguistic</bibkey>
     </paper>
@@ -755,29 +755,29 @@
     </paper>
     <paper id="103">
       <title>Natural Language Research</title>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
-      <author><first>Mitch</first><last>Marcus</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="mitch-marcus"><first>Mitch</first><last>Marcus</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <url hash="8bc5aa55">H91-1103</url>
       <bibkey>joshi-etal-1991-natural</bibkey>
     </paper>
     <paper id="104">
       <title>Very Large Annotated Database of <fixed-case>A</fixed-case>merican <fixed-case>E</fixed-case>nglish</title>
-      <author><first>Mitch</first><last>Marcus</last></author>
+      <author id="mitch-marcus"><first>Mitch</first><last>Marcus</last></author>
       <url hash="511c1eef">H91-1104</url>
       <bibkey>marcus-1991-large</bibkey>
     </paper>
     <paper id="105">
       <title>Natural Language, Knowledge Representation and Discourse</title>
-      <author><first>James F.</first><last>Allen</last></author>
-      <author><first>Lenhart K.</first><last>Schubert</last></author>
+      <author id="james-allen"><first>James F.</first><last>Allen</last></author>
+      <author id="lenhart-schubert"><first>Lenhart K.</first><last>Schubert</last></author>
       <url hash="63f22584">H91-1105</url>
       <bibkey>allen-schubert-1991-natural</bibkey>
     </paper>
     <paper id="106">
       <title>The <fixed-case>P</fixed-case>enman Natural Language Project Systemics-Based Machine Translation</title>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <url hash="ebd5b3b6">H91-1106</url>
       <bibkey>hovy-1991-penman</bibkey>
     </paper>
diff --git a/data/xml/H92.xml b/data/xml/H92.xml
index 12959390f3..d3fdaf510d 100644
--- a/data/xml/H92.xml
+++ b/data/xml/H92.xml
@@ -12,69 +12,69 @@
     </frontmatter>
     <paper id="1">
       <title>Overview of the Fifth <fixed-case>DARPA</fixed-case> Speech and Natural Language Workshop</title>
-      <author><first>Mitchell P.</first><last>Marcus</last></author>
+      <author id="mitch-marcus"><first>Mitchell P.</first><last>Marcus</last></author>
       <url hash="5e3fb1d6">H92-1001</url>
       <bibkey>marcus-1992-overview</bibkey>
     </paper>
     <paper id="2">
       <title>Session <fixed-case>I</fixed-case>: Evaluating Spoken Language</title>
-      <author><first>James F.</first><last>Allen</last></author>
+      <author id="james-allen"><first>James F.</first><last>Allen</last></author>
       <url hash="447548ea">H92-1002</url>
       <bibkey>allen-1992-session</bibkey>
     </paper>
     <paper id="3">
       <title>Multi-Site Data Collection for a Spoken Language Corpus</title>
-      <author><first>Lynette</first><last>Hirschman</last></author>
+      <author id="lynette-hirschman"><first>Lynette</first><last>Hirschman</last></author>
       <url hash="69de3db4">H92-1003</url>
       <bibkey>hirschman-1992-multi</bibkey>
     </paper>
     <paper id="4">
       <title><fixed-case>DARPA</fixed-case> <fixed-case>F</fixed-case>ebruary 1992 <fixed-case>ATIS</fixed-case> Benchmark Test Results</title>
-      <author><first>David S.</first><last>Pallett</last></author>
+      <author id="david-s-pallett"><first>David S.</first><last>Pallett</last></author>
       <author><first>Nancy L.</first><last>Dahlgren</last></author>
-      <author><first>Jonathan G.</first><last>Fiscus</last></author>
-      <author><first>William M.</first><last>Fisher</last></author>
-      <author><first>John S.</first><last>Garofolo</last></author>
+      <author id="jonathan-g-fiscus"><first>Jonathan G.</first><last>Fiscus</last></author>
+      <author id="william-m-fisher"><first>William M.</first><last>Fisher</last></author>
+      <author id="john-s-garofolo"><first>John S.</first><last>Garofolo</last></author>
       <author><first>Brett C.</first><last>Tjaden</last></author>
       <url hash="ebf75982">H92-1004</url>
       <bibkey>pallett-etal-1992-darpa</bibkey>
     </paper>
     <paper id="5">
       <title>Experiments in Evaluating Interactive Spoken Language Systems</title>
-      <author><first>Joseph</first><last>Polifroni</last></author>
-      <author><first>Lynette</first><last>Hirschman</last></author>
-      <author><first>Stephanie</first><last>Seneff</last></author>
-      <author><first>Victor</first><last>Zue</last></author>
+      <author id="joseph-polifroni"><first>Joseph</first><last>Polifroni</last></author>
+      <author id="lynette-hirschman"><first>Lynette</first><last>Hirschman</last></author>
+      <author id="stephanie-seneff"><first>Stephanie</first><last>Seneff</last></author>
+      <author id="victor-zue"><first>Victor</first><last>Zue</last></author>
       <url hash="92ff6868">H92-1005</url>
       <bibkey>polifroni-etal-1992-experiments</bibkey>
     </paper>
     <paper id="6">
       <title>Subject-Based Evaluation Measures for Interactive Spoken Language Systems</title>
-      <author><first>Patti</first><last>Price</last></author>
-      <author><first>Lynette</first><last>Hirschman</last></author>
-      <author><first>Elizabeth</first><last>Shriberg</last></author>
+      <author id="patti-price"><first>Patti</first><last>Price</last></author>
+      <author id="lynette-hirschman"><first>Lynette</first><last>Hirschman</last></author>
+      <author id="elizabeth-shriberg"><first>Elizabeth</first><last>Shriberg</last></author>
       <author><first>Elizabeth</first><last>Wade</last></author>
       <url hash="6748f9e7">H92-1006</url>
       <bibkey>price-etal-1992-subject</bibkey>
     </paper>
     <paper id="7">
       <title>Session 2: Spoken Language Systems <fixed-case>II</fixed-case></title>
-      <author><first>Wayne</first><last>Ward</last></author>
+      <author id="wayne-ward"><first>Wayne</first><last>Ward</last></author>
       <url hash="bf8a1c35">H92-1007</url>
       <bibkey>ward-1992-session</bibkey>
     </paper>
     <paper id="8">
       <title>Spontaneous Speech Collection for the <fixed-case>ATIS</fixed-case> Domain with an Aural User Feedback Paradigm</title>
       <author><first>Christine</first><last>Pao</last></author>
-      <author><first>Jay</first><last>Wilpon</last></author>
+      <author id="jay-wilpon"><first>Jay</first><last>Wilpon</last></author>
       <url hash="0a101867">H92-1008</url>
       <bibkey>pao-wilpon-1992-spontaneous</bibkey>
     </paper>
     <paper id="9">
       <title>Human-Machine Problem Solving Using Spoken Language Systems (<fixed-case>SLS</fixed-case>): Factors Affecting Performance and User Satisfaction</title>
-      <author><first>Elizabeth</first><last>Shriberg</last></author>
+      <author id="elizabeth-shriberg"><first>Elizabeth</first><last>Shriberg</last></author>
       <author><first>Elizabeth</first><last>Wade</last></author>
-      <author><first>Patti</first><last>Price</last></author>
+      <author id="patti-price"><first>Patti</first><last>Price</last></author>
       <url hash="61f0d661">H92-1009</url>
       <bibkey>shriberg-etal-1992-human</bibkey>
     </paper>
@@ -86,21 +86,21 @@
     </paper>
     <paper id="11">
       <title>Experiences Collecting Genuine Spoken Enquiries using <fixed-case>WOZ</fixed-case> Techniques</title>
-      <author><first>Roger</first><last>Moore</last></author>
+      <author id="roger-k-moore"><first>Roger</first><last>Moore</last></author>
       <author><first>Angela</first><last>Morris</last></author>
       <url hash="c1e32829">H92-1011</url>
       <bibkey>moore-morris-1992-experiences</bibkey>
     </paper>
     <paper id="12">
       <title>Session 3: Spoken Language Systems <fixed-case>III</fixed-case></title>
-      <author><first>John</first><last>Makhoul</last></author>
+      <author id="john-makhoul"><first>John</first><last>Makhoul</last></author>
       <url hash="973fc7ee">H92-1012</url>
       <bibkey>makhoul-1992-session</bibkey>
     </paper>
     <paper id="13">
       <title>Progress Report on the <fixed-case>C</fixed-case>hronus System: <fixed-case>ATIS</fixed-case> Benchmark Results</title>
-      <author><first>Roberto</first><last>Pieraccini</last></author>
-      <author><first>Evelyne</first><last>Tzoukermann</last></author>
+      <author id="roberto-pieraccini"><first>Roberto</first><last>Pieraccini</last></author>
+      <author id="evelyne-tzoukermann"><first>Evelyne</first><last>Tzoukermann</last></author>
       <author><first>Zakhar</first><last>Gorelov</last></author>
       <author><first>Esther</first><last>Levin</last></author>
       <author><first>Chin-Hui</first><last>Lee</last></author>
@@ -110,64 +110,64 @@
     </paper>
     <paper id="14">
       <title><fixed-case>BBN</fixed-case> <fixed-case>BYBLOS</fixed-case> and <fixed-case>HARC</fixed-case> <fixed-case>F</fixed-case>ebruary 1992 <fixed-case>ATIS</fixed-case> Benchmark Results</title>
-      <author><first>Francis</first><last>Kubala</last></author>
-      <author><first>Chris</first><last>Barry</last></author>
-      <author><first>Madeleine</first><last>Bates</last></author>
-      <author><first>Robert</first><last>Bobrow</last></author>
+      <author id="francis-kubala"><first>Francis</first><last>Kubala</last></author>
+      <author id="chris-barry"><first>Chris</first><last>Barry</last></author>
+      <author id="madeleine-bates"><first>Madeleine</first><last>Bates</last></author>
+      <author id="robert-bobrow"><first>Robert</first><last>Bobrow</last></author>
       <author><first>Pascale</first><last>Fung</last></author>
-      <author><first>Robert</first><last>Ingria</last></author>
-      <author><first>John</first><last>Makhoul</last></author>
-      <author><first>Long</first><last>Nguyen</last></author>
-      <author><first>Richard</first><last>Schwartz</last></author>
-      <author><first>David</first><last>Stallard</last></author>
+      <author id="robert-ingria"><first>Robert</first><last>Ingria</last></author>
+      <author id="john-makhoul"><first>John</first><last>Makhoul</last></author>
+      <author id="long-nguyen"><first>Long</first><last>Nguyen</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
+      <author id="david-stallard"><first>David</first><last>Stallard</last></author>
       <url hash="3c539b9e">H92-1014</url>
       <bibkey>kubala-etal-1992-bbn</bibkey>
     </paper>
     <paper id="15">
       <title>Speech Understanding in Open Tasks</title>
-      <author><first>Wayne</first><last>Ward</last></author>
+      <author id="wayne-ward"><first>Wayne</first><last>Ward</last></author>
       <author><first>Sunil</first><last>Issar</last></author>
-      <author><first>Xuedong</first><last>Huang</last></author>
-      <author><first>Hsiao-Wuen</first><last>Hon</last></author>
-      <author><first>Mei-Yuh</first><last>Hwang</last></author>
-      <author><first>Sheryl</first><last>Young</last></author>
+      <author id="xuedong-huang"><first>Xuedong</first><last>Huang</last></author>
+      <author id="hsiao-wuen-hon"><first>Hsiao-Wuen</first><last>Hon</last></author>
+      <author id="mei-yuh-hwang"><first>Mei-Yuh</first><last>Hwang</last></author>
+      <author id="sheryl-young"><first>Sheryl</first><last>Young</last></author>
       <author><first>Mike</first><last>Matessa</last></author>
       <author><first>Fu-Hua</first><last>Liu</last></author>
-      <author><first>Richard</first><last>Stern</last></author>
+      <author id="richard-m-stern"><first>Richard</first><last>Stern</last></author>
       <url hash="9573383f">H92-1015</url>
       <bibkey>ward-etal-1992-speech</bibkey>
     </paper>
     <paper id="16">
       <title>The <fixed-case>MIT</fixed-case> <fixed-case>ATIS</fixed-case> System: <fixed-case>F</fixed-case>ebruary 1992 Progress Report</title>
-      <author><first>Victor</first><last>Zue</last></author>
-      <author><first>James</first><last>Glass</last></author>
+      <author id="victor-zue"><first>Victor</first><last>Zue</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
       <author><first>David</first><last>Goddeau</last></author>
-      <author><first>David</first><last>Goodine</last></author>
-      <author><first>Lynette</first><last>Hirschman</last></author>
-      <author><first>Michael</first><last>Phillips</last></author>
-      <author><first>Joseph</first><last>Polifroni</last></author>
-      <author><first>Stephanie</first><last>Seneff</last></author>
+      <author id="david-goodine"><first>David</first><last>Goodine</last></author>
+      <author id="lynette-hirschman"><first>Lynette</first><last>Hirschman</last></author>
+      <author id="michael-phillips"><first>Michael</first><last>Phillips</last></author>
+      <author id="joseph-polifroni"><first>Joseph</first><last>Polifroni</last></author>
+      <author id="stephanie-seneff"><first>Stephanie</first><last>Seneff</last></author>
       <url hash="29afab94">H92-1016</url>
       <bibkey>zue-etal-1992-mit</bibkey>
     </paper>
     <paper id="17">
       <title>Recent Improvements and Benchmark Results for <fixed-case>P</fixed-case>aramax <fixed-case>ATIS</fixed-case> System</title>
-      <author><first>Lewis M.</first><last>Norton</last></author>
-      <author><first>Deborah A.</first><last>Dahl</last></author>
-      <author><first>Marcia C.</first><last>Linebarger</last></author>
+      <author id="lewis-m-norton"><first>Lewis M.</first><last>Norton</last></author>
+      <author id="deborah-a-dahl"><first>Deborah A.</first><last>Dahl</last></author>
+      <author id="marcia-c-linebarger"><first>Marcia C.</first><last>Linebarger</last></author>
       <url hash="23dae2f3">H92-1017</url>
       <bibkey>norton-etal-1992-recent</bibkey>
     </paper>
     <paper id="18">
       <title><fixed-case>SRI</fixed-case> <fixed-case>I</fixed-case>nternational Results <fixed-case>F</fixed-case>ebruary 1992 <fixed-case>ATIS</fixed-case> Benchmark Test</title>
-      <author><first>Douglas E.</first><last>Appelt</last></author>
-      <author><first>Eric</first><last>Jackson</last></author>
+      <author id="douglas-appelt"><first>Douglas E.</first><last>Appelt</last></author>
+      <author id="eric-jackson"><first>Eric</first><last>Jackson</last></author>
       <url hash="a7f7352d">H92-1018</url>
       <bibkey>appelt-jackson-1992-sri</bibkey>
     </paper>
     <paper id="19">
       <title>Session 4: Statistical Language Modeling</title>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
       <url hash="580d7a3d">H92-1019</url>
       <bibkey>joshi-1992-session</bibkey>
     </paper>
@@ -182,8 +182,8 @@
     </paper>
     <paper id="21">
       <title>Improvements in Stochastic Language Modeling</title>
-      <author><first>Ronald</first><last>Rosenfeld</last></author>
-      <author><first>Xuedong</first><last>Huang</last></author>
+      <author id="ronald-rosenfeld"><first>Ronald</first><last>Rosenfeld</last></author>
+      <author id="xuedong-huang"><first>Xuedong</first><last>Huang</last></author>
       <url hash="da46c557">H92-1021</url>
       <bibkey>rosenfeld-huang-1992-improvements</bibkey>
     </paper>
@@ -195,11 +195,11 @@
     </paper>
     <paper id="23">
       <title>Decision Tree Models Applied to the Labeling of Text with Parts-of-Speech</title>
-      <author><first>Ezra</first><last>Black</last></author>
-      <author><first>Fred</first><last>Jelinek</last></author>
-      <author><first>John</first><last>Lafferty</last></author>
+      <author id="ezra-black"><first>Ezra</first><last>Black</last></author>
+      <author id="frederick-jelinek"><first>Fred</first><last>Jelinek</last></author>
+      <author id="john-lafferty"><first>John</first><last>Lafferty</last></author>
       <author id="robert-l-mercer"><first>Robert</first><last>Mercer</last></author>
-      <author><first>Salim</first><last>Roukos</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
       <url hash="88c711a0">H92-1023</url>
       <bibkey>black-etal-1992-decision</bibkey>
     </paper>
@@ -212,19 +212,19 @@
     </paper>
     <paper id="25">
       <title>Probabilistic Prediction and Picky Chart Parsing</title>
-      <author><first>David M.</first><last>Magerman</last></author>
+      <author id="david-m-magerman"><first>David M.</first><last>Magerman</last></author>
       <author><first>Carl</first><last>Weir</last></author>
       <url hash="dc3dd5d0">H92-1025</url>
       <bibkey>magerman-weir-1992-probabilistic</bibkey>
     </paper>
     <paper id="26">
       <title>Towards History-based Grammars: Using Richer Models for Probabilistic Parsing</title>
-      <author><first>Ezra</first><last>Black</last></author>
-      <author><first>Fred</first><last>Jelinek</last></author>
-      <author><first>John</first><last>Lafferty</last></author>
-      <author><first>David M.</first><last>Magerman</last></author>
+      <author id="ezra-black"><first>Ezra</first><last>Black</last></author>
+      <author id="frederick-jelinek"><first>Fred</first><last>Jelinek</last></author>
+      <author id="john-lafferty"><first>John</first><last>Lafferty</last></author>
+      <author id="david-m-magerman"><first>David M.</first><last>Magerman</last></author>
       <author id="robert-l-mercer"><first>Robert</first><last>Mercer</last></author>
-      <author><first>Salim</first><last>Roukos</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
       <url hash="6971764c">H92-1026</url>
       <bibkey>black-etal-1992-towards</bibkey>
     </paper>
@@ -239,34 +239,34 @@
       <author><first>Kevin</first><last>Mark</last></author>
       <author><first>Michael</first><last>Miller</last></author>
       <author><first>Ulf</first><last>Grenander</last></author>
-      <author><first>Steve</first><last>Abney</last></author>
+      <author id="steven-abney"><first>Steve</first><last>Abney</last></author>
       <url hash="1d967205">H92-1028</url>
       <bibkey>mark-etal-1992-parameter</bibkey>
     </paper>
     <paper id="29">
       <title>An Analogical Parser for Restricted Domains</title>
-      <author><first>Donald</first><last>Hindle</last></author>
+      <author id="donald-hindle"><first>Donald</first><last>Hindle</last></author>
       <url hash="8c03c760">H92-1029</url>
       <bibkey>hindle-1992-analogical</bibkey>
     </paper>
     <paper id="30">
       <title>Automatically Acquiring Phrase Structure Using Distributional Analysis</title>
       <author><first>Eric</first><last>Brill</last></author>
-      <author><first>Mitchell</first><last>Marcus</last></author>
+      <author id="mitch-marcus"><first>Mitchell</first><last>Marcus</last></author>
       <url hash="6327a8a8">H92-1030</url>
       <bibkey>brill-marcus-1992-automatically</bibkey>
     </paper>
     <paper id="31">
       <title>Session 5<fixed-case>A</fixed-case>: Acoustic Modeling</title>
-      <author><first>Hy</first><last>Murveit</last></author>
+      <author id="hy-murveit"><first>Hy</first><last>Murveit</last></author>
       <url hash="5b108dd9">H92-1031</url>
       <bibkey>murveit-1992-session</bibkey>
     </paper>
     <paper id="32">
       <title>Recent Topics in Speech Recognition Research at <fixed-case>NTT</fixed-case> <fixed-case>L</fixed-case>aboratories</title>
-      <author><first>Sadaoki</first><last>Furui</last></author>
+      <author id="sadaoki-furui"><first>Sadaoki</first><last>Furui</last></author>
       <author><first>Kiyohiro</first><last>Shikano</last></author>
-      <author><first>Shoichi</first><last>Matsunaga</last></author>
+      <author id="shoichi-matsunaga"><first>Shoichi</first><last>Matsunaga</last></author>
       <author><first>Tatsuo</first><last>Matsuoka</last></author>
       <author><first>Satoshi</first><last>Takahashi</last></author>
       <author><first>Tomokazu</first><last>Yamada</last></author>
@@ -275,15 +275,15 @@
     </paper>
     <paper id="33">
       <title>Vocabulary and Environment Adaptation in Vocabulary-Independent Speech Recognition</title>
-      <author><first>Hsiao-Wuen</first><last>Hon</last></author>
-      <author><first>Kai-Fu</first><last>Lee</last></author>
+      <author id="hsiao-wuen-hon"><first>Hsiao-Wuen</first><last>Hon</last></author>
+      <author id="kai-fu-lee"><first>Kai-Fu</first><last>Lee</last></author>
       <url hash="88660978">H92-1033</url>
       <bibkey>hon-lee-1992-vocabulary</bibkey>
     </paper>
     <paper id="34">
       <title>Subphonetic Modeling for Speech Recognition</title>
-      <author><first>Mei-Yuh</first><last>Hwang</last></author>
-      <author><first>Xuedong</first><last>Huang</last></author>
+      <author id="mei-yuh-hwang"><first>Mei-Yuh</first><last>Hwang</last></author>
+      <author id="xuedong-huang"><first>Xuedong</first><last>Huang</last></author>
       <url hash="0424277f">H92-1034</url>
       <bibkey>hwang-huang-1992-subphonetic</bibkey>
     </paper>
@@ -305,15 +305,15 @@
     </paper>
     <paper id="37">
       <title>Minimizing Speaker Variation Effects for Speaker-Independent Speech Recognition</title>
-      <author><first>Xuedong</first><last>Huang</last></author>
+      <author id="xuedong-huang"><first>Xuedong</first><last>Huang</last></author>
       <url hash="a4400bf0">H92-1037</url>
       <bibkey>huang-1992-minimizing</bibkey>
     </paper>
     <paper id="38">
       <title>Recognition Using Classification and Segmentation Scoring</title>
-      <author><first>Owen</first><last>Kimball</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
-      <author><first>Robin</first><last>Rohlicek</last></author>
+      <author id="owen-kimball"><first>Owen</first><last>Kimball</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
+      <author id="j-robin-rohlicek"><first>Robin</first><last>Rohlicek</last></author>
       <url hash="819a983a">H92-1038</url>
       <bibkey>kimball-etal-1992-recognition</bibkey>
     </paper>
@@ -325,13 +325,13 @@
     </paper>
     <paper id="40">
       <title>Information Retrieval Using Robust Natural Language Processing</title>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
       <url hash="cc1e5c56">H92-1040</url>
       <bibkey>strzalkowski-1992-information</bibkey>
     </paper>
     <paper id="41">
       <title>Feature Selection and Feature Extraction for Text Categorization</title>
-      <author><first>David D.</first><last>Lewis</last></author>
+      <author id="david-d-lewis"><first>David D.</first><last>Lewis</last></author>
       <url hash="3e9db11f">H92-1041</url>
       <bibkey>lewis-1992-feature</bibkey>
     </paper>
@@ -343,62 +343,62 @@
     </paper>
     <paper id="43">
       <title>Classifying Texts Using Relevancy Signatures</title>
-      <author><first>Ellen</first><last>Riloff</last></author>
-      <author><first>Wendy</first><last>Lehnert</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
+      <author id="wendy-lehnert"><first>Wendy</first><last>Lehnert</last></author>
       <url hash="886dcec0">H92-1043</url>
       <bibkey>riloff-lehnert-1992-classifying</bibkey>
     </paper>
     <paper id="44">
       <title>Session 6: Lexicon and Lexical Semantics</title>
-      <author><first>Paul S.</first><last>Jacobs</last></author>
+      <author id="paul-s-jacobs"><first>Paul S.</first><last>Jacobs</last></author>
       <url hash="848a91c6">H92-1044</url>
       <bibkey>jacobs-1992-session</bibkey>
     </paper>
     <paper id="45">
       <title>One Sense Per Discourse</title>
-      <author><first>William A.</first><last>Gale</last></author>
-      <author><first>Kenneth W.</first><last>Church</last></author>
+      <author id="william-a-gale"><first>William A.</first><last>Gale</last></author>
+      <author id="kenneth-church"><first>Kenneth W.</first><last>Church</last></author>
       <author><first>David</first><last>Yarowsky</last></author>
       <url hash="7418c7db">H92-1045</url>
       <bibkey>gale-etal-1992-one</bibkey>
     </paper>
     <paper id="46">
       <title>Lexical Disambiguation using Simulated Annealing</title>
-      <author><first>Jim</first><last>Cowie</last></author>
+      <author id="jim-cowie"><first>Jim</first><last>Cowie</last></author>
       <author><first>Joe</first><last>Guthrie</last></author>
-      <author><first>Louise</first><last>Guthrie</last></author>
+      <author id="louise-guthrie"><first>Louise</first><last>Guthrie</last></author>
       <url hash="0fab9b6f">H92-1046</url>
       <bibkey>cowie-etal-1992-lexical</bibkey>
     </paper>
     <paper id="47">
       <title>The Acquisition of Lexical Semantic Knowledge from Large Corpora</title>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <url hash="06d43a2c">H92-1047</url>
       <bibkey>pustejovsky-1992-acquisition</bibkey>
     </paper>
     <paper id="48">
       <title>Session 7: Demonstrations and Videos</title>
-      <author><first>Victor W.</first><last>Zue</last></author>
+      <author id="victor-zue"><first>Victor W.</first><last>Zue</last></author>
       <url hash="912c7aeb">H92-1048</url>
       <bibkey>zue-1992-session</bibkey>
     </paper>
     <paper id="49">
       <title><fixed-case>BBN</fixed-case> Real-Time Speech Recognition Demonstrations</title>
-      <author><first>Steve</first><last>Austin</last></author>
-      <author><first>Rusty</first><last>Bobrow</last></author>
+      <author id="steve-austin"><first>Steve</first><last>Austin</last></author>
+      <author id="robert-bobrow"><first>Rusty</first><last>Bobrow</last></author>
       <author><first>Dan</first><last>Ellard</last></author>
-      <author><first>Robert</first><last>Ingria</last></author>
-      <author><first>John</first><last>Makhoul</last></author>
-      <author><first>Long</first><last>Nguyen</last></author>
+      <author id="robert-ingria"><first>Robert</first><last>Ingria</last></author>
+      <author id="john-makhoul"><first>John</first><last>Makhoul</last></author>
+      <author id="long-nguyen"><first>Long</first><last>Nguyen</last></author>
       <author><first>Pat</first><last>Peterson</last></author>
-      <author><first>Paul</first><last>Placeway</last></author>
-      <author><first>Richard</first><last>Schwartz</last></author>
+      <author id="paul-placeway"><first>Paul</first><last>Placeway</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
       <url hash="39dd67c6">H92-1049</url>
       <bibkey>austin-etal-1992-bbn</bibkey>
     </paper>
     <paper id="50">
       <title>Session 8<fixed-case>A</fixed-case>: Machine Translation</title>
-      <author><first>Jerry R.</first><last>Hobbs</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry R.</first><last>Hobbs</last></author>
       <url hash="c703647d">H92-1050</url>
       <bibkey>hobbs-1992-session</bibkey>
     </paper>
@@ -406,36 +406,36 @@
       <title>Interaction between Structural Changes in Machine Translation</title>
       <author><first>Satoshi</first><last>Kinoshita</last></author>
       <author id="john-phillips"><first>John</first><last>Phillips</last></author>
-      <author><first>Jun-ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun-ichi</first><last>Tsujii</last></author>
       <url hash="6401f190">H92-1051</url>
       <bibkey>kinoshita-etal-1992-interaction</bibkey>
     </paper>
     <paper id="52">
       <title>Approximating an Interlingua in a Principled Way</title>
-      <author><first>Eduard</first><last>Hovy</last></author>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <url hash="67194530">H92-1052</url>
       <bibkey>hovy-nirenburg-1992-approximating</bibkey>
     </paper>
     <paper id="53">
       <title>Dividing and Conquering Long Sentences in a Translation System</title>
-      <author><first>Peter F.</first><last>Brown</last></author>
-      <author><first>Stephen A.</first><last>Della Pietra</last></author>
-      <author><first>Vincent J.</first><last>Della Pietra</last></author>
-      <author><first>Robert L.</first><last>Mercer</last></author>
+      <author id="peter-f-brown"><first>Peter F.</first><last>Brown</last></author>
+      <author id="stephen-a-della-pietra"><first>Stephen A.</first><last>Della Pietra</last></author>
+      <author id="vincent-j-della-pietra"><first>Vincent J.</first><last>Della Pietra</last></author>
+      <author id="robert-l-mercer"><first>Robert L.</first><last>Mercer</last></author>
       <author><first>Surya</first><last>Mohanty</last></author>
       <url hash="2fbd2750">H92-1053</url>
       <bibkey>brown-etal-1992-dividing</bibkey>
     </paper>
     <paper id="54">
       <title>Session 8<fixed-case>B</fixed-case>: Robust Speech Processing</title>
-      <author><first>Jordan R.</first><last>Cohen</last></author>
+      <author id="jordan-cohen"><first>Jordan R.</first><last>Cohen</last></author>
       <url hash="1d7c0139">H92-1054</url>
       <bibkey>cohen-1992-session</bibkey>
     </paper>
     <paper id="55">
       <title>Multiple Approaches to Robust Speech Recognition</title>
-      <author><first>Richard M.</first><last>Stern</last></author>
+      <author id="richard-m-stern"><first>Richard M.</first><last>Stern</last></author>
       <author><first>Fu-Hua</first><last>Liu</last></author>
       <author><first>Yoshiaki</first><last>Ohshima</last></author>
       <author><first>Thomas M.</first><last>Sullivan</last></author>
@@ -445,9 +445,9 @@
     </paper>
     <paper id="56">
       <title>Reduced Channel Dependence for Speech Recognition</title>
-      <author><first>Hy</first><last>Murveit</last></author>
+      <author id="hy-murveit"><first>Hy</first><last>Murveit</last></author>
       <author><first>John</first><last>Butzberger</last></author>
-      <author><first>Mitch</first><last>Weintraub</last></author>
+      <author id="mitch-weintraub"><first>Mitch</first><last>Weintraub</last></author>
       <url hash="68ab7536">H92-1056</url>
       <bibkey>murveit-etal-1992-reduced</bibkey>
     </paper>
@@ -468,38 +468,38 @@
     </paper>
     <paper id="59">
       <title>Session 9: Natural Language Processings</title>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <url hash="288d32ae">H92-1059</url>
       <bibkey>mckeown-1992-session</bibkey>
     </paper>
     <paper id="60">
       <title>A Relaxation Method for Understanding Speech Utterances</title>
-      <author><first>Stephanie</first><last>Seneff</last></author>
+      <author id="stephanie-seneff"><first>Stephanie</first><last>Seneff</last></author>
       <url hash="e8dce754">H92-1060</url>
       <bibkey>seneff-1992-relaxation</bibkey>
     </paper>
     <paper id="61">
       <title>Fragment Processing in the <fixed-case>DELPHI</fixed-case> System</title>
-      <author><first>David</first><last>Stallard</last></author>
-      <author><first>Robert</first><last>Bobrow</last></author>
+      <author id="david-stallard"><first>David</first><last>Stallard</last></author>
+      <author id="robert-bobrow"><first>Robert</first><last>Bobrow</last></author>
       <url hash="fc68ff22">H92-1061</url>
       <bibkey>stallard-bobrow-1992-fragment</bibkey>
     </paper>
     <paper id="62">
       <title>Syntactic/Semantic Coupling in the <fixed-case>BBN</fixed-case> <fixed-case>DELPHI</fixed-case> System</title>
-      <author><first>Robert</first><last>Bobrow</last></author>
-      <author><first>Robert</first><last>Ingria</last></author>
-      <author><first>David</first><last>Stallard</last></author>
+      <author id="robert-bobrow"><first>Robert</first><last>Bobrow</last></author>
+      <author id="robert-ingria"><first>Robert</first><last>Ingria</last></author>
+      <author id="david-stallard"><first>David</first><last>Stallard</last></author>
       <url hash="7cfb2672">H92-1062</url>
       <bibkey>bobrow-etal-1992-syntactic</bibkey>
     </paper>
     <paper id="63">
       <title>A New Approach to Text Understanding</title>
-      <author><first>Ralph</first><last>Weischedel</last></author>
-      <author><first>Damaris</first><last>Ayuso</last></author>
-      <author><first>Sean</first><last>Boisen</last></author>
-      <author><first>Heidi</first><last>Fox</last></author>
-      <author><first>Robert</first><last>Ingria</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
+      <author id="damaris-ayuso"><first>Damaris</first><last>Ayuso</last></author>
+      <author id="sean-boisen"><first>Sean</first><last>Boisen</last></author>
+      <author id="heidi-fox"><first>Heidi</first><last>Fox</last></author>
+      <author id="robert-ingria"><first>Robert</first><last>Ingria</last></author>
       <url hash="d294e0d4">H92-1063</url>
       <bibkey>weischedel-etal-1992-new</bibkey>
     </paper>
@@ -511,7 +511,7 @@
     </paper>
     <paper id="65">
       <title>Session 10: Large Vocabulary <fixed-case>CSR</fixed-case></title>
-      <author><first>George R.</first><last>Doddington</last></author>
+      <author id="george-r-doddington"><first>George R.</first><last>Doddington</last></author>
       <url hash="786b150e">H92-1065</url>
       <bibkey>doddington-1992-session</bibkey>
     </paper>
@@ -537,56 +537,56 @@
     <paper id="68">
       <title>Spontaneous Speech Effects In Large Vocabulary Speech Recognition Applications</title>
       <author><first>John</first><last>Butzberger</last></author>
-      <author><first>Hy</first><last>Murveit</last></author>
-      <author><first>Elizabeth</first><last>Shriberg</last></author>
-      <author><first>Patti</first><last>Price</last></author>
+      <author id="hy-murveit"><first>Hy</first><last>Murveit</last></author>
+      <author id="elizabeth-shriberg"><first>Elizabeth</first><last>Shriberg</last></author>
+      <author id="patti-price"><first>Patti</first><last>Price</last></author>
       <url hash="a2e6e41e">H92-1068</url>
       <bibkey>butzberger-etal-1992-spontaneous</bibkey>
     </paper>
     <paper id="69">
       <title>Speaker-Independent Phone Recognition Using <fixed-case>BREF</fixed-case></title>
       <author><first>Jean-Luc</first><last>Gauvain</last></author>
-      <author><first>Lori F.</first><last>Lamel</last></author>
+      <author id="lori-lamel"><first>Lori F.</first><last>Lamel</last></author>
       <url hash="e2298ba9">H92-1069</url>
       <bibkey>gauvain-lamel-1992-speaker</bibkey>
     </paper>
     <paper id="70">
       <title>Session 1<fixed-case>O</fixed-case>b: Core <fixed-case>NL</fixed-case> Lexicon and Grammar</title>
-      <author><first>Mark</first><last>Liberman</last></author>
+      <author id="mark-liberman"><first>Mark</first><last>Liberman</last></author>
       <url hash="5b9a2e4a">H92-1070</url>
       <bibkey>liberman-1992-session</bibkey>
     </paper>
     <paper id="71">
       <title>A National Resource Grammar</title>
-      <author><first>Jerry R.</first><last>Hobbs</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry R.</first><last>Hobbs</last></author>
       <url hash="5a333694">H92-1071</url>
       <bibkey>hobbs-1992-national</bibkey>
     </paper>
     <paper id="72">
       <title>Session 11: Continuous Speech Recognition and Evaluation <fixed-case>I</fixed-case></title>
-      <author><first>Clifford J.</first><last>Weinstein</last></author>
+      <author id="clifford-j-weinstein"><first>Clifford J.</first><last>Weinstein</last></author>
       <url hash="ae6d49bc">H92-1072</url>
       <bibkey>weinstein-1992-session</bibkey>
     </paper>
     <paper id="73">
       <title>The Design for the <fixed-case>W</fixed-case>all <fixed-case>S</fixed-case>treet <fixed-case>J</fixed-case>ournal-based <fixed-case>CSR</fixed-case> Corpus</title>
       <author><first>Douglas B.</first><last>Paul</last></author>
-      <author><first>Janet M.</first><last>Baker</last></author>
+      <author id="janet-baker"><first>Janet M.</first><last>Baker</last></author>
       <url hash="80a16f58">H92-1073</url>
       <bibkey>paul-baker-1992-design</bibkey>
     </paper>
     <paper id="74">
       <title><fixed-case>CSR</fixed-case> Corpus Development</title>
-      <author><first>George R.</first><last>Doddington</last></author>
+      <author id="george-r-doddington"><first>George R.</first><last>Doddington</last></author>
       <url hash="ecd08391">H92-1074</url>
       <bibkey>doddington-1992-csr</bibkey>
     </paper>
     <paper id="75">
       <title>Collection and Analyses of <fixed-case>WSJ</fixed-case>-<fixed-case>CSR</fixed-case> Data at <fixed-case>MIT</fixed-case></title>
-      <author><first>Michael</first><last>Phillips</last></author>
-      <author><first>James</first><last>Glass</last></author>
-      <author><first>Joseph</first><last>Polifroni</last></author>
-      <author><first>Victor</first><last>Zue</last></author>
+      <author id="michael-phillips"><first>Michael</first><last>Phillips</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
+      <author id="joseph-polifroni"><first>Joseph</first><last>Polifroni</last></author>
+      <author id="victor-zue"><first>Victor</first><last>Zue</last></author>
       <url hash="f3e44501">H92-1075</url>
       <bibkey>phillips-etal-1992-collection</bibkey>
     </paper>
@@ -599,20 +599,20 @@
     </paper>
     <paper id="77">
       <title>Session 12: Continuous Speech Recognition and Evaluation <fixed-case>II</fixed-case></title>
-      <author><first>Clifford J.</first><last>Weinstein</last></author>
+      <author id="clifford-j-weinstein"><first>Clifford J.</first><last>Weinstein</last></author>
       <url hash="d24d5d17">H92-1077</url>
       <bibkey>weinstein-1992-session-12</bibkey>
     </paper>
     <paper id="78">
       <title><fixed-case>DARPA</fixed-case> <fixed-case>F</fixed-case>ebruary 1992 Pilot Corpus <fixed-case>CSR</fixed-case> “Dry Run” Benchmark Test Results</title>
-      <author><first>David S.</first><last>Pallett</last></author>
+      <author id="david-s-pallett"><first>David S.</first><last>Pallett</last></author>
       <url hash="caf33b4f">H92-1078</url>
       <bibkey>pallett-1992-darpa</bibkey>
     </paper>
     <paper id="79">
       <title>Large Vocabulary Recognition of <fixed-case>W</fixed-case>all <fixed-case>S</fixed-case>treet <fixed-case>J</fixed-case>ournal Sentences at <fixed-case>D</fixed-case>ragon <fixed-case>S</fixed-case>ystems</title>
-      <author><first>James</first><last>Baker</last></author>
-      <author><first>Janet</first><last>Baker</last></author>
+      <author id="james-baker"><first>James</first><last>Baker</last></author>
+      <author id="janet-baker"><first>Janet</first><last>Baker</last></author>
       <author><first>Paul</first><last>Bamberg</last></author>
       <author><first>Kathleen</first><last>Bishop</last></author>
       <author><first>Larry</first><last>Gillick</last></author>
@@ -651,30 +651,30 @@
     </paper>
     <paper id="83">
       <title>Performance of <fixed-case>SRI</fixed-case>’s <fixed-case>DECIPHER</fixed-case>™ Speech Recognition System on <fixed-case>DARPA</fixed-case>’s <fixed-case>CSR</fixed-case> Task</title>
-      <author><first>Hy</first><last>Murveit</last></author>
+      <author id="hy-murveit"><first>Hy</first><last>Murveit</last></author>
       <author><first>John</first><last>Butzberger</last></author>
-      <author><first>Mitch</first><last>Weintraub</last></author>
+      <author id="mitch-weintraub"><first>Mitch</first><last>Weintraub</last></author>
       <url hash="5ff96fb4">H92-1083</url>
       <bibkey>murveit-etal-1992-performance</bibkey>
     </paper>
     <paper id="84">
       <title>Session 13: Prosody</title>
-      <author><first>Patti</first><last>Price</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
+      <author id="patti-price"><first>Patti</first><last>Price</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
       <url hash="d5dd58c7">H92-1084</url>
       <bibkey>price-hirschberg-1992-session</bibkey>
     </paper>
     <paper id="85">
       <title>Automatic Detection and Correction of Repairs in Human-Computer Dialog</title>
-      <author><first>Elizabeth</first><last>Shriberg</last></author>
-      <author><first>John</first><last>Bear</last></author>
-      <author><first>John</first><last>Dowding</last></author>
+      <author id="elizabeth-shriberg"><first>Elizabeth</first><last>Shriberg</last></author>
+      <author id="john-bear"><first>John</first><last>Bear</last></author>
+      <author id="john-dowding"><first>John</first><last>Dowding</last></author>
       <url hash="123d8377">H92-1085</url>
       <bibkey>shriberg-etal-1992-automatic</bibkey>
     </paper>
     <paper id="86">
       <title>Prosodic Structure, Performance Structure and Phrase Structure</title>
-      <author><first>Steven</first><last>Abney</last></author>
+      <author id="steven-abney"><first>Steven</first><last>Abney</last></author>
       <url hash="46c0078d">H92-1086</url>
       <bibkey>abney-1992-prosodic</bibkey>
     </paper>
@@ -687,17 +687,17 @@
     </paper>
     <paper id="88">
       <title>Towards Using Prosody in Speech Recognition/Understanding Systems: Differences Between Read and Spontaneous Speech</title>
-      <author><first>Kim E.A.</first><last>Silverman</last></author>
+      <author id="kim-e-a-silverman"><first>Kim E.A.</first><last>Silverman</last></author>
       <author><first>Eleonora</first><last>Blaauw</last></author>
       <author><first>Judith</first><last>Spitz</last></author>
-      <author><first>John F.</first><last>Pitrelli</last></author>
+      <author id="john-f-pitrelli"><first>John F.</first><last>Pitrelli</last></author>
       <url hash="99dadac2">H92-1088</url>
       <bibkey>silverman-etal-1992-towards</bibkey>
     </paper>
     <paper id="89">
       <title>Intonational Features of Local and Global Discourse Structure</title>
-      <author><first>Julia</first><last>Hirschberg</last></author>
-      <author><first>Barbara</first><last>Grosz</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
+      <author id="barbara-j-grosz"><first>Barbara</first><last>Grosz</last></author>
       <url hash="2f668f42">H92-1089</url>
       <bibkey>hirschberg-grosz-1992-intonational</bibkey>
     </paper>
@@ -722,61 +722,61 @@
     </paper>
     <paper id="93">
       <title>Weight Estimation for <fixed-case>N</fixed-case>-Best Rescoring</title>
-      <author><first>Ashvin</first><last>Kannan</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
-      <author><first>J. Robin</first><last>Rohlicek</last></author>
+      <author id="ashvin-kannan"><first>Ashvin</first><last>Kannan</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
+      <author id="j-robin-rohlicek"><first>J. Robin</first><last>Rohlicek</last></author>
       <url hash="fa9ce874">H92-1093</url>
       <bibkey>kannan-etal-1992-weight</bibkey>
     </paper>
     <paper id="94">
       <title>Augmenting With Slot Filler Relevancy Signatures Data</title>
-      <author><first>Ellen</first><last>Riloff</last></author>
-      <author><first>Wendy</first><last>Lehnert</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
+      <author id="wendy-lehnert"><first>Wendy</first><last>Lehnert</last></author>
       <url hash="1a53e79f">H92-1094</url>
       <bibkey>riloff-lehnert-1992-augmenting</bibkey>
     </paper>
     <paper id="95">
       <title>Language Understanding Research at <fixed-case>P</fixed-case>aramax</title>
-      <author><first>Deborah A.</first><last>Dahl</last></author>
+      <author id="deborah-a-dahl"><first>Deborah A.</first><last>Dahl</last></author>
       <author><first>Carl</first><last>Weir</last></author>
-      <author><first>Suzanne Liebowitz</first><last>Taylor</last></author>
-      <author><first>Lewis M.</first><last>Norton</last></author>
-      <author><first>Marcia C.</first><last>Linebarger</last></author>
+      <author id="suzanne-liebowitz-taylor"><first>Suzanne Liebowitz</first><last>Taylor</last></author>
+      <author id="lewis-m-norton"><first>Lewis M.</first><last>Norton</last></author>
+      <author id="marcia-c-linebarger"><first>Marcia C.</first><last>Linebarger</last></author>
       <author><first>Mark</first><last>Lipshutz</last></author>
       <url hash="14a5c6b5">H92-1095</url>
       <bibkey>dahl-etal-1992-language</bibkey>
     </paper>
     <paper id="96">
       <title>Development of a Spoken Language System</title>
-      <author><first>John</first><last>Makhoul</last></author>
-      <author><first>Madeleine</first><last>Bates</last></author>
+      <author id="john-makhoul"><first>John</first><last>Makhoul</last></author>
+      <author id="madeleine-bates"><first>Madeleine</first><last>Bates</last></author>
       <url hash="5615e1c8">H92-1096</url>
       <bibkey>makhoul-bates-1992-development</bibkey>
     </paper>
     <paper id="97">
       <title>Robust Continuous Speech Recognition</title>
-      <author><first>John</first><last>Makhoul</last></author>
-      <author><first>Richard</first><last>Schwartz</last></author>
+      <author id="john-makhoul"><first>John</first><last>Makhoul</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
       <url hash="8c400090">H92-1097</url>
       <bibkey>makhoul-schwartz-1992-robust</bibkey>
     </paper>
     <paper id="98">
       <title>Robustness, Portability, and Scalability Language Systems</title>
-      <author><first>Ralph</first><last>Weischedel</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
       <url hash="116bb276">H92-1098</url>
       <bibkey>weischedel-1992-robustness</bibkey>
     </paper>
     <paper id="99">
       <title>Evaluating the Use of Prosodic Information in Speech Recognition and Understanding</title>
-      <author><first>Mari</first><last>Ostendorf</last></author>
-      <author><first>Patti</first><last>Price</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
+      <author id="patti-price"><first>Patti</first><last>Price</last></author>
       <url hash="bc8ccddb">H92-1099</url>
       <bibkey>ostendorf-price-1992-evaluating</bibkey>
     </paper>
     <paper id="100">
       <title>Segment-Based Acoustic Models with Multi-level Search Algorithms for Continuous Speech Recognition</title>
-      <author><first>Mari</first><last>Ostendorf</last></author>
-      <author><first>J. Robin</first><last>Rohlicek</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
+      <author id="j-robin-rohlicek"><first>J. Robin</first><last>Rohlicek</last></author>
       <url hash="c267dbd7">H92-1100</url>
       <bibkey>ostendorf-rohlicek-1992-segment</bibkey>
     </paper>
@@ -794,15 +794,15 @@
     </paper>
     <paper id="103">
       <title>Extracting Constraints on Word Usage from Large Text Corpora</title>
-      <author><first>Kathleen</first><last>McKeown</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
-      <author><first>Rebecca</first><last>Passonneau</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca</first><last>Passonneau</last></author>
       <url hash="25e08840">H92-1103</url>
       <bibkey>mckeown-etal-1992-extracting</bibkey>
     </paper>
     <paper id="104">
       <title>Research in Continuous Speech Recognition at <fixed-case>D</fixed-case>ragon <fixed-case>S</fixed-case>ystems Under the <fixed-case>DARPA</fixed-case> <fixed-case>SLS</fixed-case> Program</title>
-      <author><first>Janet</first><last>Baker</last></author>
+      <author id="janet-baker"><first>Janet</first><last>Baker</last></author>
       <author><first>Larry</first><last>Gillick</last></author>
       <author><first>Robert</first><last>Roth</last></author>
       <url hash="c628df77">H92-1104</url>
@@ -822,8 +822,8 @@
     </paper>
     <paper id="107">
       <title>Spoken Language Recognition and Understanding</title>
-      <author><first>Victor</first><last>Zue</last></author>
-      <author><first>Lynette</first><last>Hirschman</last></author>
+      <author id="victor-zue"><first>Victor</first><last>Zue</last></author>
+      <author id="lynette-hirschman"><first>Lynette</first><last>Hirschman</last></author>
       <url hash="c0aa2fd1">H92-1107</url>
       <bibkey>zue-hirschman-1992-spoken</bibkey>
     </paper>
@@ -835,20 +835,20 @@
     </paper>
     <paper id="109">
       <title>Robust Continuous Speech Recognition Technology Program Summary</title>
-      <author><first>Clifford J.</first><last>Weinstein</last></author>
+      <author id="clifford-j-weinstein"><first>Clifford J.</first><last>Weinstein</last></author>
       <author><first>Douglas B.</first><last>Paul</last></author>
       <url hash="664554f1">H92-1109</url>
       <bibkey>weinstein-paul-1992-robust</bibkey>
     </paper>
     <paper id="110">
       <title><fixed-case>NIST</fixed-case>-<fixed-case>DARPA</fixed-case> Interagency Agreement: Spoken Language Program</title>
-      <author><first>David S.</first><last>Pallett</last></author>
+      <author id="david-s-pallett"><first>David S.</first><last>Pallett</last></author>
       <url hash="b465838e">H92-1110</url>
       <bibkey>pallett-1992-nist</bibkey>
     </paper>
     <paper id="111">
       <title>Evaluating Text Understanding Systems</title>
-      <author><first>Beth M.</first><last>Sundheim</last></author>
+      <author id="beth-m-sundheim"><first>Beth M.</first><last>Sundheim</last></author>
       <url hash="6400dae6">H92-1111</url>
       <bibkey>sundheim-1992-evaluating</bibkey>
     </paper>
@@ -874,7 +874,7 @@
     </paper>
     <paper id="115">
       <title>Research in Natural Language Processing</title>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <url hash="6eeadb80">H92-1115</url>
       <bibkey>grishman-1992-research</bibkey>
     </paper>
@@ -886,7 +886,7 @@
     </paper>
     <paper id="117">
       <title>Annotation of <fixed-case>ATIS</fixed-case> Data</title>
-      <author><first>Kate</first><last>Hunicke-Smith</last></author>
+      <author id="kate-hunicke-smith"><first>Kate</first><last>Hunicke-Smith</last></author>
       <author><first>Jared</first><last>Bernstein</last></author>
       <url hash="6975301f">H92-1117</url>
       <bibkey>hunicke-smith-bernstein-1992-annotation</bibkey>
@@ -900,41 +900,41 @@
     </paper>
     <paper id="119">
       <title>Real-Time Speech Recognition System</title>
-      <author><first>Mitchel</first><last>Weintraub</last></author>
+      <author id="mitch-weintraub"><first>Mitchel</first><last>Weintraub</last></author>
       <url hash="6b096d40">H92-1119</url>
       <bibkey>weintraub-1992-real</bibkey>
     </paper>
     <paper id="120">
       <title>A Real-Time Spoken-Language System for Interactive Problem Solving</title>
-      <author><first>Patti</first><last>Price</last></author>
-      <author><first>Robert C.</first><last>Moore</last></author>
+      <author id="patti-price"><first>Patti</first><last>Price</last></author>
+      <author id="robert-c-moore"><first>Robert C.</first><last>Moore</last></author>
       <url hash="640f9923">H92-1120</url>
       <bibkey>price-moore-1992-real</bibkey>
     </paper>
     <paper id="121">
       <title><fixed-case>TACITUS</fixed-case>: Research in Text Understanding</title>
-      <author><first>Jerry R.</first><last>Hobbs</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry R.</first><last>Hobbs</last></author>
       <url hash="eef10102">H92-1121</url>
       <bibkey>hobbs-1992-tacitus</bibkey>
     </paper>
     <paper id="122">
       <title><fixed-case>NLP</fixed-case> and Text Analysis at the <fixed-case>U</fixed-case>niversity of <fixed-case>M</fixed-case>assachusetts</title>
-      <author><first>Wendy G.</first><last>Lehnert</last></author>
+      <author id="wendy-lehnert"><first>Wendy G.</first><last>Lehnert</last></author>
       <url hash="5613241e">H92-1122</url>
       <bibkey>lehnert-1992-nlp</bibkey>
     </paper>
     <paper id="123">
       <title>Natural Language Research</title>
-      <author><first>Aravind</first><last>Joshi</last></author>
-      <author><first>Mitch</first><last>Marcus</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
+      <author id="mitch-marcus"><first>Mitch</first><last>Marcus</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <url hash="ec995690">H92-1123</url>
       <bibkey>joshi-etal-1992-natural</bibkey>
     </paper>
     <paper id="124">
       <title>In-Depth Knowledge-Based Machine Translation</title>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <url hash="c96ee5bc">H92-1124</url>
       <bibkey>hovy-1992-depth</bibkey>
     </paper>
diff --git a/data/xml/H93.xml b/data/xml/H93.xml
index c59da7b6e6..1428aed30c 100644
--- a/data/xml/H93.xml
+++ b/data/xml/H93.xml
@@ -12,22 +12,22 @@
     </frontmatter>
     <paper id="1">
       <title>Overview of the <fixed-case>ARPA</fixed-case> Human Language Technology Workshop</title>
-      <author><first>Madeleine</first><last>Bates</last></author>
+      <author id="madeleine-bates"><first>Madeleine</first><last>Bates</last></author>
       <url hash="251e4447">H93-1001</url>
       <bibkey>bates-1993-overview</bibkey>
     </paper>
     <paper id="2">
       <title>Session 1: Spoken Language Systems</title>
-      <author><first>Alexander I.</first><last>Rudnicky</last></author>
+      <author id="alexander-rudnicky"><first>Alexander I.</first><last>Rudnicky</last></author>
       <url hash="df30bcfe">H93-1002</url>
       <bibkey>rudnicky-1993-session</bibkey>
     </paper>
     <paper id="3">
       <title>Benchmark Tests for the <fixed-case>DARPA</fixed-case> Spoken Language Program</title>
-      <author><first>David S.</first><last>Pallett</last></author>
+      <author id="david-s-pallett"><first>David S.</first><last>Pallett</last></author>
       <author id="jonathan-g-fiscus"><first>Johathan G.</first><last>Fiscus</last></author>
-      <author><first>William M.</first><last>Fisher</last></author>
-      <author><first>John S.</first><last>Garofolo</last></author>
+      <author id="william-m-fisher"><first>William M.</first><last>Fisher</last></author>
+      <author id="john-s-garofolo"><first>John S.</first><last>Garofolo</last></author>
       <url hash="1f1620e9">H93-1003</url>
       <bibkey>pallett-etal-1993-benchmark</bibkey>
     </paper>
@@ -48,8 +48,8 @@
     </paper>
     <paper id="5">
       <title>The <fixed-case>HCRC</fixed-case> Map Task Corpus: Natural Dialogue for Speech Recognition</title>
-      <author><first>Henry S.</first><last>Thompson</last></author>
-      <author><first>Anne</first><last>Anderson</last></author>
+      <author id="henry-s-thompson"><first>Henry S.</first><last>Thompson</last></author>
+      <author id="anne-h-anderson"><first>Anne</first><last>Anderson</last></author>
       <author><first>Ellen Gurman</first><last>Bard</last></author>
       <author><first>Gwyneth</first><last>Doherty-Sneddon</last></author>
       <author><first>Alison</first><last>Newlands</last></author>
@@ -59,28 +59,28 @@
     </paper>
     <paper id="6">
       <title>A Portable Approach to Last Resort Parsing and Interpretation</title>
-      <author><first>Marcia C.</first><last>Linebarger</last></author>
-      <author><first>Lewis M.</first><last>Norton</last></author>
-      <author><first>Deborah A.</first><last>Dahl</last></author>
+      <author id="marcia-c-linebarger"><first>Marcia C.</first><last>Linebarger</last></author>
+      <author id="lewis-m-norton"><first>Lewis M.</first><last>Norton</last></author>
+      <author id="deborah-a-dahl"><first>Deborah A.</first><last>Dahl</last></author>
       <url hash="4696d46a">H93-1006</url>
       <bibkey>linebarger-etal-1993-portable</bibkey>
     </paper>
     <paper id="7">
       <title>The Semantic Linker- A New Fragment Combining Method</title>
-      <author><first>David</first><last>Stallard</last></author>
-      <author><first>Robert</first><last>Bobrow</last></author>
+      <author id="david-stallard"><first>David</first><last>Stallard</last></author>
+      <author id="robert-bobrow"><first>Robert</first><last>Bobrow</last></author>
       <url hash="ad6e8547">H93-1007</url>
       <bibkey>stallard-bobrow-1993-semantic</bibkey>
     </paper>
     <paper id="8">
       <title><fixed-case>G</fixed-case>emini: A Natural Language System for Spoken-Language Understanding</title>
-      <author><first>John</first><last>Dowding</last></author>
-      <author><first>Jean Mark</first><last>Gawron</last></author>
-      <author><first>Doug</first><last>Appelt</last></author>
-      <author><first>John</first><last>Bear</last></author>
+      <author id="john-dowding"><first>John</first><last>Dowding</last></author>
+      <author id="jean-mark-gawron"><first>Jean Mark</first><last>Gawron</last></author>
+      <author id="douglas-appelt"><first>Doug</first><last>Appelt</last></author>
+      <author id="john-bear"><first>John</first><last>Bear</last></author>
       <author><first>Lynn</first><last>Cherny</last></author>
-      <author><first>Robert</first><last>Moore</last></author>
-      <author><first>Doug</first><last>Moran</last></author>
+      <author id="robert-c-moore"><first>Robert</first><last>Moore</last></author>
+      <author id="douglas-b-moran"><first>Doug</first><last>Moran</last></author>
       <url hash="c9be426b">H93-1008</url>
       <bibkey>dowding-etal-1993-gemini</bibkey>
     </paper>
@@ -96,14 +96,14 @@
     </paper>
     <paper id="10">
       <title>Session 2: Invited Overviews</title>
-      <author><first>Madeleine</first><last>Bates</last></author>
+      <author id="madeleine-bates"><first>Madeleine</first><last>Bates</last></author>
       <url hash="3a2b918d">H93-1010</url>
       <bibkey>bates-1993-session</bibkey>
     </paper>
     <paper id="11">
       <title>Survey of the <fixed-case>M</fixed-case>essage <fixed-case>U</fixed-case>nderstanding <fixed-case>C</fixed-case>onferences</title>
-      <author><first>Beth M.</first><last>Sundheim</last></author>
-      <author><first>Nancy A.</first><last>Chinchor</last></author>
+      <author id="beth-m-sundheim"><first>Beth M.</first><last>Sundheim</last></author>
+      <author id="nancy-chinchor"><first>Nancy A.</first><last>Chinchor</last></author>
       <url hash="110bdd7f">H93-1011</url>
       <bibkey>sundheim-chinchor-1993-survey</bibkey>
     </paper>
@@ -122,75 +122,75 @@
     <paper id="14">
       <title>Efficient Cepstral Normalization for Robust Speech Recognition</title>
       <author><first>Fu-Hua</first><last>Liu</last></author>
-      <author><first>Richard M.</first><last>Stern</last></author>
-      <author><first>Xuedong</first><last>Huang</last></author>
+      <author id="richard-m-stern"><first>Richard M.</first><last>Stern</last></author>
+      <author id="xuedong-huang"><first>Xuedong</first><last>Huang</last></author>
       <author><first>Alejandro</first><last>Acero</last></author>
       <url hash="7deeaa03">H93-1014</url>
       <bibkey>liu-etal-1993-efficient</bibkey>
     </paper>
     <paper id="15">
       <title>Comparative Experiments on Large Vocabulary Speech Recognition</title>
-      <author><first>Richard</first><last>Schwartz</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
       <author><first>Tasos</first><last>Anastasakos</last></author>
-      <author><first>Francis</first><last>Kubala</last></author>
-      <author><first>John</first><last>Makhoul</last></author>
-      <author><first>Long</first><last>Nguyen</last></author>
-      <author><first>George</first><last>Zavaliagkos</last></author>
+      <author id="francis-kubala"><first>Francis</first><last>Kubala</last></author>
+      <author id="john-makhoul"><first>John</first><last>Makhoul</last></author>
+      <author id="long-nguyen"><first>Long</first><last>Nguyen</last></author>
+      <author id="george-zavaliagkos"><first>George</first><last>Zavaliagkos</last></author>
       <url hash="7096f55b">H93-1015</url>
       <bibkey>schwartz-etal-1993-comparative</bibkey>
     </paper>
     <paper id="16">
       <title>An Overview of the <fixed-case>SPHINX</fixed-case>-<fixed-case>II</fixed-case> Speech Recognition System</title>
-      <author><first>Xuedong</first><last>Huang</last></author>
-      <author><first>Fileno</first><last>Alleva</last></author>
-      <author><first>Mei-Yuh</first><last>Hwang</last></author>
-      <author><first>Ronald</first><last>Rosenfeld</last></author>
+      <author id="xuedong-huang"><first>Xuedong</first><last>Huang</last></author>
+      <author id="fil-alleva"><first>Fileno</first><last>Alleva</last></author>
+      <author id="mei-yuh-hwang"><first>Mei-Yuh</first><last>Hwang</last></author>
+      <author id="ronald-rosenfeld"><first>Ronald</first><last>Rosenfeld</last></author>
       <url hash="a831ec3d">H93-1016</url>
       <bibkey>huang-etal-1993-overview</bibkey>
     </paper>
     <paper id="17">
       <title>Progressive-Search Algorithms for Large-Vocabulary Speech Recognition</title>
-      <author><first>Hy</first><last>Murveit</last></author>
+      <author id="hy-murveit"><first>Hy</first><last>Murveit</last></author>
       <author><first>John</first><last>Butzberger</last></author>
-      <author><first>Vassilios</first><last>Digalakis</last></author>
-      <author><first>Mitch</first><last>Weintraub</last></author>
+      <author id="vassilios-digalakis"><first>Vassilios</first><last>Digalakis</last></author>
+      <author id="mitch-weintraub"><first>Mitch</first><last>Weintraub</last></author>
       <url hash="7f9407b9">H93-1017</url>
       <bibkey>murveit-etal-1993-progressive</bibkey>
     </paper>
     <paper id="18">
       <title>Search Algorithms for Software-Only Real-Time Recognition with Very Large Vocabularies</title>
-      <author><first>Long</first><last>Nguyen</last></author>
-      <author><first>Richard</first><last>Schwartz</last></author>
-      <author><first>Francis</first><last>Kubala</last></author>
-      <author><first>Paul</first><last>Placeway</last></author>
+      <author id="long-nguyen"><first>Long</first><last>Nguyen</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
+      <author id="francis-kubala"><first>Francis</first><last>Kubala</last></author>
+      <author id="paul-placeway"><first>Paul</first><last>Placeway</last></author>
       <url hash="50870b43">H93-1018</url>
       <bibkey>nguyen-etal-1993-search</bibkey>
     </paper>
     <paper id="19">
       <title>Identification of Non-Linguistic Speech Features</title>
       <author><first>Jean-Luc</first><last>Gauvain</last></author>
-      <author><first>Lori F.</first><last>Lamel</last></author>
+      <author id="lori-lamel"><first>Lori F.</first><last>Lamel</last></author>
       <url hash="4373bf49">H93-1019</url>
       <bibkey>gauvain-lamel-1993-identification</bibkey>
     </paper>
     <paper id="20">
       <title>On the Use of Tied-Mixture Distributions</title>
-      <author><first>Owen</first><last>Kimball</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
+      <author id="owen-kimball"><first>Owen</first><last>Kimball</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
       <url hash="212b8bf9">H93-1020</url>
       <bibkey>kimball-ostendorf-1993-use</bibkey>
     </paper>
     <paper id="21">
       <title>Adaptive Language Modeling Using the Maximum Entropy Principle</title>
       <author><first>Raymond</first><last>Lau</last></author>
-      <author><first>Ronald</first><last>Rosenfeld</last></author>
-      <author><first>Salim</first><last>Roukos</last></author>
+      <author id="ronald-rosenfeld"><first>Ronald</first><last>Rosenfeld</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
       <url hash="2c4f25cc">H93-1021</url>
       <bibkey>lau-etal-1993-adaptive</bibkey>
     </paper>
     <paper id="22">
       <title>Improved Keyword-Spotting Using <fixed-case>SRI</fixed-case>’s <fixed-case>DECIPHER</fixed-case>™ Large-Vocabuarly Speech-Recognition System</title>
-      <author><first>Mitchel</first><last>Weintraub</last></author>
+      <author id="mitch-weintraub"><first>Mitchel</first><last>Weintraub</last></author>
       <url hash="49e9faaf">H93-1022</url>
       <bibkey>weintraub-1993-improved</bibkey>
     </paper>
@@ -202,8 +202,8 @@
       <author><first>Stephen</first><last>Lowe</last></author>
       <author><first>Robert</first><last>Roth</last></author>
       <author><first>Francesco</first><last>Scattone</last></author>
-      <author><first>James</first><last>Baker</last></author>
-      <author><first>Janet</first><last>Baker</last></author>
+      <author id="james-baker"><first>James</first><last>Baker</last></author>
+      <author id="janet-baker"><first>Janet</first><last>Baker</last></author>
       <author><first>John</first><last>Bridle</last></author>
       <author><first>Melvyn</first><last>Hunt</last></author>
       <author><first>Jeremy</first><last>Orloff</last></author>
@@ -212,22 +212,22 @@
     </paper>
     <paper id="24">
       <title>Session 4: Natural Language</title>
-      <author><first>Robert C.</first><last>Moore</last></author>
+      <author id="robert-c-moore"><first>Robert C.</first><last>Moore</last></author>
       <url hash="09d3a251">H93-1024</url>
       <bibkey>moore-1993-session</bibkey>
     </paper>
     <paper id="25">
       <title>Heuristics for Broad-Coverage Natural Language Parsing</title>
-      <author><first>Michael C.</first><last>McCord</last></author>
+      <author id="michael-c-mccord"><first>Michael C.</first><last>McCord</last></author>
       <url hash="aba0b322">H93-1025</url>
       <bibkey>mccord-1993-heuristics</bibkey>
     </paper>
     <paper id="26">
       <title><fixed-case>FASTUS</fixed-case>: A System for Extracting Information from Text</title>
-      <author><first>Jerry R.</first><last>Hobbs</last></author>
-      <author><first>Douglas</first><last>Appelt</last></author>
-      <author><first>John</first><last>Bear</last></author>
-      <author><first>David</first><last>Israel</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry R.</first><last>Hobbs</last></author>
+      <author id="douglas-appelt"><first>Douglas</first><last>Appelt</last></author>
+      <author id="john-bear"><first>John</first><last>Bear</last></author>
+      <author id="david-israel"><first>David</first><last>Israel</last></author>
       <author><first>Megumi</first><last>Kameyalna</last></author>
       <author><first>Mabry</first><last>Tyson</last></author>
       <url hash="e34c47e1">H93-1026</url>
@@ -236,67 +236,67 @@
     <paper id="27">
       <title>Interpreting Temporal Adverbials</title>
       <author><first>Chung Hee</first><last>Hwang</last></author>
-      <author><first>Lenhart K.</first><last>Schubert</last></author>
+      <author id="lenhart-schubert"><first>Lenhart K.</first><last>Schubert</last></author>
       <url hash="39633ffa">H93-1027</url>
       <bibkey>hwang-schubert-1993-interpreting</bibkey>
     </paper>
     <paper id="28">
       <title>The <fixed-case>M</fixed-case>urasaki Project: Multilingual Natural Language Understanding</title>
       <author><first>Chinatsu</first><last>Aone</last></author>
-      <author><first>Hatte</first><last>Blejer</last></author>
+      <author id="hatte-blejer"><first>Hatte</first><last>Blejer</last></author>
       <author><first>Sharon</first><last>Flank</last></author>
-      <author><first>Douglas</first><last>McKee</last></author>
+      <author id="douglas-mckee"><first>Douglas</first><last>McKee</last></author>
       <author><first>Sandy</first><last>Shinn</last></author>
       <url hash="d38b86cf">H93-1028</url>
       <bibkey>aone-etal-1993-murasaki</bibkey>
     </paper>
     <paper id="29">
       <title>Validation of Terminological Inference in an Information Extraction Task</title>
-      <author><first>Marc</first><last>Vilain</last></author>
+      <author id="marc-vilain"><first>Marc</first><last>Vilain</last></author>
       <url hash="9fe839ab">H93-1029</url>
       <bibkey>vilain-1993-validation</bibkey>
     </paper>
     <paper id="30">
       <title>Session 5: Discourse</title>
-      <author><first>Jerry R.</first><last>Hobbs</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry R.</first><last>Hobbs</last></author>
       <url hash="75ff383b">H93-1030</url>
       <bibkey>hobbs-1993-session</bibkey>
     </paper>
     <paper id="31">
       <title>Development, Implementation and Testing of a Discourse Model for Newspaper Texts</title>
-      <author><first>Elizabeth D.</first><last>Liddy</last></author>
+      <author id="elizabeth-d-liddy"><first>Elizabeth D.</first><last>Liddy</last></author>
       <author><first>Kenneth A.</first><last>McVearry</last></author>
       <author><first>Woojin</first><last>Paik</last></author>
-      <author><first>Edmund</first><last>Yu</last></author>
+      <author id="edmund-yu"><first>Edmund</first><last>Yu</last></author>
       <author><first>Mary</first><last>McKenna</last></author>
       <url hash="d037df73">H93-1031</url>
       <bibkey>liddy-etal-1993-development</bibkey>
     </paper>
     <paper id="32">
       <title>Indexing and Exploiting a Discourse History to Generate Context-Sensitive Explanations</title>
-      <author><first>Johanna D.</first><last>Moore</last></author>
+      <author id="johanna-d-moore"><first>Johanna D.</first><last>Moore</last></author>
       <url hash="d455dcfc">H93-1032</url>
       <bibkey>moore-1993-indexing</bibkey>
     </paper>
     <paper id="33">
       <title>Generic Plan Recognition for Dialogue Systems</title>
       <author><first>George</first><last>Ferguson</last></author>
-      <author><first>James F.</first><last>Allen</last></author>
+      <author id="james-allen"><first>James F.</first><last>Allen</last></author>
       <url hash="75b875c3">H93-1033</url>
       <bibkey>ferguson-allen-1993-generic</bibkey>
     </paper>
     <paper id="34">
       <title>Efficient Collaborative Discourse: A Theory and Its Implementation</title>
-      <author><first>Alan W.</first><last>Biermann</last></author>
-      <author><first>Curry I.</first><last>Guinn</last></author>
+      <author id="alan-w-biermann"><first>Alan W.</first><last>Biermann</last></author>
+      <author id="curry-i-guinn"><first>Curry I.</first><last>Guinn</last></author>
       <author><first>D. Richard</first><last>Hipp</last></author>
-      <author><first>Ronnie W.</first><last>Smith</last></author>
+      <author id="ronnie-w-smith"><first>Ronnie W.</first><last>Smith</last></author>
       <url hash="bb53cfba">H93-1034</url>
       <bibkey>biermann-etal-1993-efficient</bibkey>
     </paper>
     <paper id="35">
       <title>Machine Translation</title>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <url hash="dc025d6c">H93-1035</url>
       <bibkey>waibel-1993-machine</bibkey>
     </paper>
@@ -309,7 +309,7 @@
     <paper id="37">
       <title><fixed-case>LINGSTAT</fixed-case>: An Interactive, Machine-Aided Translation System</title>
       <author><first>Jonathan</first><last>Yamron</last></author>
-      <author><first>James</first><last>Baker</last></author>
+      <author id="james-baker"><first>James</first><last>Baker</last></author>
       <author><first>Paul</first><last>Bamberg</last></author>
       <author><first>Haakon</first><last>Chevalier</last></author>
       <author><first>Taiko</first><last>Dietzel</last></author>
@@ -324,28 +324,28 @@
     </paper>
     <paper id="38">
       <title>An <fixed-case>MAT</fixed-case> Tool and Its Effectiveness</title>
-      <author><first>Robert</first><last>Frederking</last></author>
+      <author id="robert-frederking"><first>Robert</first><last>Frederking</last></author>
       <author><first>Dean</first><last>Grannes</last></author>
       <author><first>Peter</first><last>Cousseau</last></author>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <url hash="e46cfccf">H93-1038</url>
       <bibkey>frederking-etal-1993-mat</bibkey>
     </paper>
     <paper id="39">
       <title>But Dictionaries Are Data Too</title>
-      <author><first>Peter F.</first><last>Brown</last></author>
-      <author><first>Stephen A.</first><last>Della Pietra</last></author>
-      <author><first>Vincent J.</first><last>Della Pietra</last></author>
+      <author id="peter-f-brown"><first>Peter F.</first><last>Brown</last></author>
+      <author id="stephen-a-della-pietra"><first>Stephen A.</first><last>Della Pietra</last></author>
+      <author id="vincent-j-della-pietra"><first>Vincent J.</first><last>Della Pietra</last></author>
       <author><first>Meredith J.</first><last>Goldsmith</last></author>
-      <author><first>Jan</first><last>Hajic</last></author>
-      <author><first>Robert L.</first><last>Mercer</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajic</last></author>
+      <author id="robert-l-mercer"><first>Robert L.</first><last>Mercer</last></author>
       <author><first>Surya</first><last>Mohanty</last></author>
       <url hash="35947963">H93-1039</url>
       <bibkey>brown-etal-1993-dictionaries</bibkey>
     </paper>
     <paper id="40">
       <title>Evaluation of Machine Translation</title>
-      <author><first>John S.</first><last>White</last></author>
+      <author id="john-s-white"><first>John S.</first><last>White</last></author>
       <author><first>Theresa A.</first><last>O’Connell</last></author>
       <url hash="b55442e7">H93-1040</url>
       <bibkey>white-oconnell-1993-evaluation</bibkey>
@@ -371,30 +371,30 @@
     </paper>
     <paper id="42">
       <title>A Speech to Speech Translation System Built From Standard Components</title>
-      <author><first>Manny</first><last>Rayner</last></author>
-      <author><first>Hiyan</first><last>Alshawi</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
+      <author id="hiyan-alshawi"><first>Hiyan</first><last>Alshawi</last></author>
       <author><first>Ivan</first><last>Bretan</last></author>
-      <author><first>David</first><last>Carter</last></author>
-      <author><first>Vassilios</first><last>Digalakis</last></author>
-      <author><first>Bjorn</first><last>Gamback</last></author>
+      <author id="david-carter"><first>David</first><last>Carter</last></author>
+      <author id="vassilios-digalakis"><first>Vassilios</first><last>Digalakis</last></author>
+      <author id="bjorn-gamback"><first>Bjorn</first><last>Gamback</last></author>
       <author><first>Jaan</first><last>Kaja</last></author>
       <author><first>Jussi</first><last>Karlgren</last></author>
       <author><first>Bertil</first><last>Lyberg</last></author>
       <author><first>Steve</first><last>Pulman</last></author>
-      <author><first>Patti</first><last>Price</last></author>
+      <author id="patti-price"><first>Patti</first><last>Price</last></author>
       <author><first>Christer</first><last>Samuelsson</last></author>
       <url hash="534bfba8">H93-1042</url>
       <bibkey>rayner-etal-1993-speech</bibkey>
     </paper>
     <paper id="43">
       <title>Session 7: Demonstrations</title>
-      <author><first>Hy</first><last>Murveit</last></author>
+      <author id="hy-murveit"><first>Hy</first><last>Murveit</last></author>
       <url hash="1ec8f023">H93-1043</url>
       <bibkey>murveit-1993-session</bibkey>
     </paper>
     <paper id="44">
       <title>Session 8: Statistical Language Modeling</title>
-      <author><first>Mitchell</first><last>Marcus</last></author>
+      <author id="mitch-marcus"><first>Mitchell</first><last>Marcus</last></author>
       <url hash="34643b89">H93-1044</url>
       <bibkey>marcus-1993-session</bibkey>
     </paper>
@@ -402,13 +402,13 @@
       <title>Example-Based Correction of Word Segmentation and Part of Speech Labelling</title>
       <author><first>Tomoyoshi</first><last>Matsukawa</last></author>
       <author><first>Scott</first><last>Miller</last></author>
-      <author><first>Ralph</first><last>Weischedel</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
       <url hash="3348c3db">H93-1045</url>
       <bibkey>matsukawa-etal-1993-example</bibkey>
     </paper>
     <paper id="46">
       <title>Measures and Models for Phrase Recognition</title>
-      <author><first>Steven</first><last>Abney</last></author>
+      <author id="steven-abney"><first>Steven</first><last>Abney</last></author>
       <url hash="a06c5e3c">H93-1046</url>
       <bibkey>abney-1993-measures</bibkey>
     </paper>
@@ -420,7 +420,7 @@
     </paper>
     <paper id="48">
       <title>Prediction of Lexicalized Tree Fragments in Text</title>
-      <author><first>Donald</first><last>Hindle</last></author>
+      <author id="donald-hindle"><first>Donald</first><last>Hindle</last></author>
       <url hash="eb6a918e">H93-1048</url>
       <bibkey>hindle-1993-prediction</bibkey>
     </paper>
@@ -432,7 +432,7 @@
     </paper>
     <paper id="50">
       <title>Smoothing of Automatically Generated Selectional Constraints</title>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <author><first>John</first><last>Sterling</last></author>
       <url hash="85de998b">H93-1050</url>
       <bibkey>grishman-sterling-1993-smoothing</bibkey>
@@ -441,7 +441,7 @@
       <title>Corpus-Based Statistical Sense Resolution</title>
       <author><first>Claudia</first><last>Leacock</last></author>
       <author><first>Geoffrey</first><last>Towell</last></author>
-      <author><first>Ellen</first><last>Voorhees</last></author>
+      <author id="ellen-m-voorhees"><first>Ellen</first><last>Voorhees</last></author>
       <url hash="f1698b55">H93-1051</url>
       <bibkey>leacock-etal-1993-corpus</bibkey>
     </paper>
@@ -453,7 +453,7 @@
     </paper>
     <paper id="53">
       <title>Augmenting Lexicons Automatically: Clustering Semantically Related Adjectives</title>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <author><first>Vasileios</first><last>Hatzivassiloglou</last></author>
       <url hash="6829e02a">H93-1053</url>
       <bibkey>mckeown-hatzivassiloglou-1993-augmenting</bibkey>
@@ -466,13 +466,13 @@
     </paper>
     <paper id="55">
       <title>Session 9: Government Panel</title>
-      <author><first>Carol J.</first><last>Van Ess-Dykema</last></author>
+      <author id="carol-van-ess-dykema"><first>Carol J.</first><last>Van Ess-Dykema</last></author>
       <url hash="03a2b9ef">H93-1055</url>
       <bibkey>van-ess-dykema-1993-session</bibkey>
     </paper>
     <paper id="56">
       <title>Projected Government Needs in Human Language Technology and the Role of Researchers in Meeting Them</title>
-      <author><first>Helen M.</first><last>Gigley</last></author>
+      <author id="helen-m-gigley"><first>Helen M.</first><last>Gigley</last></author>
       <url hash="41275b4c">H93-1056</url>
       <bibkey>gigley-1993-projected</bibkey>
     </paper>
@@ -490,14 +490,14 @@
     </paper>
     <paper id="59">
       <title>Session 10: THE LEXICON</title>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <url hash="ecf7005e">H93-1059</url>
       <bibkey>grishman-1993-session</bibkey>
     </paper>
     <paper id="60">
       <title>The <fixed-case>COMLEX</fixed-case> Syntax Project</title>
-      <author><first>Ralph</first><last>Grishman</last></author>
-      <author><first>Catherine</first><last>Macleod</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
+      <author id="catherine-macleod"><first>Catherine</first><last>Macleod</last></author>
       <author><first>Susanne</first><last>Wolff</last></author>
       <url hash="e145d17f">H93-1060</url>
       <bibkey>grishman-etal-1993-comlex</bibkey>
@@ -514,8 +514,8 @@
     <paper id="62">
       <title>Interpretation of Proper Nouns for Information Retrieval</title>
       <author><first>Woojin</first><last>Paik</last></author>
-      <author><first>Elizabeth D.</first><last>Liddy</last></author>
-      <author><first>Edmund</first><last>Yu</last></author>
+      <author id="elizabeth-d-liddy"><first>Elizabeth D.</first><last>Liddy</last></author>
+      <author id="edmund-yu"><first>Edmund</first><last>Yu</last></author>
       <author><first>Mary</first><last>McKenna</last></author>
       <url hash="f92879b6">H93-1062</url>
       <bibkey>paik-etal-1993-interpretation</bibkey>
@@ -528,7 +528,7 @@
     </paper>
     <paper id="64">
       <title>On Customizing Prosody in Speech Synthesis: Names and Addresses as a Case in Point</title>
-      <author><first>Kim E. A.</first><last>Silverman</last></author>
+      <author id="kim-e-a-silverman"><first>Kim E. A.</first><last>Silverman</last></author>
       <url hash="8a1fbdb2">H93-1064</url>
       <bibkey>silverman-1993-customizing</bibkey>
     </paper>
@@ -540,8 +540,8 @@
     </paper>
     <paper id="66">
       <title>A Speech-First Model for Repair Detection and Correction</title>
-      <author><first>Christine</first><last>Nakatani</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
+      <author id="christine-h-nakatani"><first>Christine</first><last>Nakatani</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
       <url hash="c7bc1b87">H93-1066</url>
       <bibkey>nakatani-hirschberg-1993-speech</bibkey>
     </paper>
@@ -562,49 +562,49 @@
     </paper>
     <paper id="69">
       <title>Document retrieval and text retrieval</title>
-      <author><first>Karen</first><last>Sparck Jones</last></author>
+      <author id="karen-sparck-jones"><first>Karen</first><last>Sparck Jones</last></author>
       <url hash="9b43bd7f">H93-1069</url>
       <bibkey>sparck-jones-1993-document</bibkey>
     </paper>
     <paper id="70">
       <title>The Importance of Proper Weighting Methods</title>
-      <author><first>Chris</first><last>Buckley</last></author>
+      <author id="chris-buckley"><first>Chris</first><last>Buckley</last></author>
       <url hash="bbce1327">H93-1070</url>
       <bibkey>buckley-1993-importance</bibkey>
     </paper>
     <paper id="71">
       <title>Query Processing for Retrieval From Large Text Bases</title>
       <author><first>John</first><last>Broglio</last></author>
-      <author><first>W. Bruce</first><last>Croft</last></author>
+      <author id="w-bruce-croft"><first>W. Bruce</first><last>Croft</last></author>
       <url hash="e051ef99">H93-1071</url>
       <bibkey>broglio-croft-1993-query</bibkey>
     </paper>
     <paper id="72">
       <title>An Overview of <fixed-case>DR-LINK</fixed-case> and Its Approach to Document Filtering</title>
-      <author><first>Elizabeth D.</first><last>Liddy</last></author>
+      <author id="elizabeth-d-liddy"><first>Elizabeth D.</first><last>Liddy</last></author>
       <author><first>Woojin</first><last>Paik</last></author>
-      <author><first>Edmund S.</first><last>Yu</last></author>
+      <author id="edmund-yu"><first>Edmund S.</first><last>Yu</last></author>
       <author><first>Kenneth A.</first><last>McVearry</last></author>
       <url hash="e591505e">H93-1072</url>
       <bibkey>liddy-etal-1993-overview</bibkey>
     </paper>
     <paper id="73">
       <title>Session 13: New Directions</title>
-      <author><first>Ralph</first><last>Weischedel</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
       <url hash="0ddbe3d1">H93-1073</url>
       <bibkey>weischedel-1993-session</bibkey>
     </paper>
     <paper id="74">
       <title>Mode preference in a simple data-retrieval task</title>
-      <author><first>Alexander I.</first><last>Rudnicky</last></author>
+      <author id="alexander-rudnicky"><first>Alexander I.</first><last>Rudnicky</last></author>
       <url hash="ae508050">H93-1074</url>
       <bibkey>rudnicky-1993-mode</bibkey>
     </paper>
     <paper id="75">
       <title>A Simulation-Based Research Strategy for Designing Complex <fixed-case>NL</fixed-case> Systems</title>
-      <author><first>Sharon</first><last>Oviatt</last></author>
-      <author><first>Philip</first><last>Cohen</last></author>
-      <author><first>Michelle</first><last>Wang</last></author>
+      <author id="sharon-oviatt"><first>Sharon</first><last>Oviatt</last></author>
+      <author id="philip-r-cohen"><first>Philip</first><last>Cohen</last></author>
+      <author id="michelle-q-wang"><first>Michelle</first><last>Wang</last></author>
       <author><first>Jeremy</first><last>Gaston</last></author>
       <url hash="faaa1fcc">H93-1075</url>
       <bibkey>oviatt-etal-1993-simulation</bibkey>
@@ -630,35 +630,35 @@
     </paper>
     <paper id="79">
       <title>Robust Continuous Speech Recognition</title>
-      <author><first>John</first><last>Makhoul</last></author>
-      <author><first>Richard</first><last>Schwartz</last></author>
+      <author id="john-makhoul"><first>John</first><last>Makhoul</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
       <url hash="5f044e06">H93-1079</url>
       <bibkey>makhoul-schwartz-1993-robust</bibkey>
     </paper>
     <paper id="80">
       <title>Robustness, Portability, and Scalability of Natural Language Systems</title>
-      <author><first>Ralph</first><last>Weischedel</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
       <url hash="caf3a7e3">H93-1080</url>
       <bibkey>weischedel-1993-robustness</bibkey>
     </paper>
     <paper id="81">
       <title>Usable, Real-Time, Interactive Spoken Language Systems</title>
-      <author><first>John</first><last>Makhoul</last></author>
-      <author><first>Madeleine</first><last>Bates</last></author>
+      <author id="john-makhoul"><first>John</first><last>Makhoul</last></author>
+      <author id="madeleine-bates"><first>Madeleine</first><last>Bates</last></author>
       <url hash="45d965da">H93-1081</url>
       <bibkey>makhoul-bates-1993-usable</bibkey>
     </paper>
     <paper id="82">
       <title>Evaluating the Use of Prosodic Information in Speech Recognition and Understanding</title>
-      <author><first>Mari</first><last>Ostendorf</last></author>
-      <author><first>Patti</first><last>Price</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
+      <author id="patti-price"><first>Patti</first><last>Price</last></author>
       <url hash="d674b9d5">H93-1082</url>
       <bibkey>ostendorf-price-1993-evaluating</bibkey>
     </paper>
     <paper id="83">
       <title>Segment-Based Acoustic Models for Continuous Speech Recognition</title>
-      <author><first>Mari</first><last>Ostendorf</last></author>
-      <author><first>J. Robin</first><last>Rohlicek</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
+      <author id="j-robin-rohlicek"><first>J. Robin</first><last>Rohlicek</last></author>
       <url hash="cf64b1dc">H93-1083</url>
       <bibkey>ostendorf-rohlicek-1993-segment</bibkey>
     </paper>
@@ -670,8 +670,8 @@
     </paper>
     <paper id="85">
       <title>Extracting Constraints on Word Usage from Large Text Corpora</title>
-      <author><first>Kathleen</first><last>McKeown</last></author>
-      <author><first>Rebecca</first><last>Passonneau</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca</first><last>Passonneau</last></author>
       <url hash="67e43437">H93-1085</url>
       <bibkey>mckeown-passonneau-1993-extracting</bibkey>
     </paper>
@@ -686,13 +686,13 @@
     <paper id="87">
       <title><fixed-case>LINGSTAT</fixed-case>: An Interactive, Machine-Aided Translation System</title>
       <author><first>Jonathan</first><last>Yamron</last></author>
-      <author><first>James</first><last>Baker</last></author>
+      <author id="james-baker"><first>James</first><last>Baker</last></author>
       <url hash="caa8c2d3">H93-1087</url>
       <bibkey>yamron-baker-1993-lingstat</bibkey>
     </paper>
     <paper id="88">
       <title>Research in Large Vocabulary Continuous Speech Recognition</title>
-      <author><first>Janet</first><last>Baker</last></author>
+      <author id="janet-baker"><first>Janet</first><last>Baker</last></author>
       <author><first>Larry</first><last>Gillick</last></author>
       <author><first>Robert</first><last>Roth</last></author>
       <url hash="9ab65956">H93-1088</url>
@@ -706,20 +706,20 @@
     </paper>
     <paper id="90">
       <title><fixed-case>M</fixed-case>atch<fixed-case>P</fixed-case>lus: A Context Vector System for Document Retrieval</title>
-      <author><first>Stephen L.</first><last>Gallant</last></author>
+      <author id="stephen-l-gallant"><first>Stephen L.</first><last>Gallant</last></author>
       <author><first>William R.</first><last>Caid</last></author>
       <url hash="99511f82">H93-1090</url>
       <bibkey>gallant-caid-1993-matchplus</bibkey>
     </paper>
     <paper id="91">
       <title>Applying Statistical Methods to Machine Translation</title>
-      <author><first>Peter F.</first><last>Brown</last></author>
+      <author id="peter-f-brown"><first>Peter F.</first><last>Brown</last></author>
       <url hash="0ee19f4a">H93-1091</url>
       <bibkey>brown-1993-applying</bibkey>
     </paper>
     <paper id="92">
       <title>Automatic Extraction of Grammars From Annotated Text</title>
-      <author><first>Salim</first><last>Roukos</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
       <url hash="b710541f">H93-1092</url>
       <bibkey>roukos-1993-automatic</bibkey>
     </paper>
@@ -731,27 +731,27 @@
     </paper>
     <paper id="94">
       <title>Robust Continuous Speech Recognition Technology Program Summary</title>
-      <author><first>Clifford J.</first><last>Weinstein</last></author>
+      <author id="clifford-j-weinstein"><first>Clifford J.</first><last>Weinstein</last></author>
       <author><first>Douglas B.</first><last>Paul</last></author>
       <url hash="1ce428ea">H93-1094</url>
       <bibkey>weinstein-paul-1993-robust</bibkey>
     </paper>
     <paper id="95">
       <title>Spoken Language Recognition and Understanding</title>
-      <author><first>Victor</first><last>Zue</last></author>
-      <author><first>Lynette</first><last>Hirschman</last></author>
+      <author id="victor-zue"><first>Victor</first><last>Zue</last></author>
+      <author id="lynette-hirschman"><first>Lynette</first><last>Hirschman</last></author>
       <url hash="80d0ba68">H93-1095</url>
       <bibkey>zue-hirschman-1993-spoken</bibkey>
     </paper>
     <paper id="96">
       <title><fixed-case>NIST</fixed-case>-<fixed-case>DARPA</fixed-case> Interagency Agreement: Spoken Language Program</title>
-      <author><first>David S.</first><last>Pallett</last></author>
+      <author id="david-s-pallett"><first>David S.</first><last>Pallett</last></author>
       <url hash="99c9e4b8">H93-1096</url>
       <bibkey>pallett-1993-nist</bibkey>
     </paper>
     <paper id="97">
       <title>Information Extraction System Evaluation</title>
-      <author><first>Beth M.</first><last>Sundheim</last></author>
+      <author id="beth-m-sundheim"><first>Beth M.</first><last>Sundheim</last></author>
       <url hash="ae19f8e0">H93-1097</url>
       <bibkey>sundheim-1993-information</bibkey>
     </paper>
@@ -777,13 +777,13 @@
     </paper>
     <paper id="101">
       <title>Research in Natural Language Processing</title>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <url hash="22151fae">H93-1101</url>
       <bibkey>grishman-1993-research</bibkey>
     </paper>
     <paper id="102">
       <title>Robust Text Processing and Information Retrieval</title>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
       <url hash="9755d3c6">H93-1102</url>
       <bibkey>strzalkowski-1993-robust</bibkey>
     </paper>
@@ -801,13 +801,13 @@
     </paper>
     <paper id="105">
       <title>Exploiting Concept Spaces for Text Retrieval</title>
-      <author><first>Ellen M.</first><last>Voorhees</last></author>
+      <author id="ellen-m-voorhees"><first>Ellen M.</first><last>Voorhees</last></author>
       <url hash="73ce0aa7">H93-1105</url>
       <bibkey>voorhees-1993-exploiting</bibkey>
     </paper>
     <paper id="106">
       <title>Annotation of <fixed-case>ATIS</fixed-case> Data</title>
-      <author><first>Kate</first><last>Hunicke-Smith</last></author>
+      <author id="kate-hunicke-smith"><first>Kate</first><last>Hunicke-Smith</last></author>
       <author><first>Jared</first><last>Bernstein</last></author>
       <url hash="15b03952">H93-1106</url>
       <bibkey>hunicke-smith-bernstein-1993-annotation</bibkey>
@@ -821,58 +821,58 @@
     </paper>
     <paper id="108">
       <title>A Real-Time Spoken-Language System for Interactive Problem Solving</title>
-      <author><first>Patti</first><last>Price</last></author>
-      <author><first>Robert C.</first><last>Moore</last></author>
+      <author id="patti-price"><first>Patti</first><last>Price</last></author>
+      <author id="robert-c-moore"><first>Robert C.</first><last>Moore</last></author>
       <url hash="4f491d3c">H93-1108</url>
       <bibkey>price-moore-1993-real</bibkey>
     </paper>
     <paper id="109">
       <title>High Performance Speech Recognition Using Consistency Modeling</title>
-      <author><first>Vassilios</first><last>Digalakis</last></author>
-      <author><first>Hy</first><last>Murveit</last></author>
-      <author><first>Mitch</first><last>Weintraub</last></author>
+      <author id="vassilios-digalakis"><first>Vassilios</first><last>Digalakis</last></author>
+      <author id="hy-murveit"><first>Hy</first><last>Murveit</last></author>
+      <author id="mitch-weintraub"><first>Mitch</first><last>Weintraub</last></author>
       <url hash="cdd6a757">H93-1109</url>
       <bibkey>digalakis-etal-1993-high</bibkey>
     </paper>
     <paper id="110">
       <title><fixed-case>DR</fixed-case>-<fixed-case>LINK</fixed-case>: Document Retrieval Using Linguistic Knowledge</title>
-      <author><first>Elizabeth D.</first><last>Liddy</last></author>
-      <author><first>Sung H.</first><last>Myaeng</last></author>
+      <author id="elizabeth-d-liddy"><first>Elizabeth D.</first><last>Liddy</last></author>
+      <author id="sung-hyon-myaeng"><first>Sung H.</first><last>Myaeng</last></author>
       <url hash="11887f06">H93-1110</url>
       <bibkey>liddy-myaeng-1993-dr</bibkey>
     </paper>
     <paper id="111">
       <title><fixed-case>UM</fixed-case>ass/Hughes <fixed-case>TIPSTER</fixed-case> Project on Extraction from Text</title>
-      <author><first>Wendy</first><last>Lehnert</last></author>
-      <author><first>Charles</first><last>Dolan</last></author>
+      <author id="wendy-lehnert"><first>Wendy</first><last>Lehnert</last></author>
+      <author id="charles-p-dolan"><first>Charles</first><last>Dolan</last></author>
       <url hash="051e6276">H93-1111</url>
       <bibkey>lehnert-dolan-1993-umass</bibkey>
     </paper>
     <paper id="112">
       <title>Text Retrieval and Routing Techniques Based on an Inference Net</title>
-      <author><first>W. Bruce</first><last>Croft</last></author>
+      <author id="w-bruce-croft"><first>W. Bruce</first><last>Croft</last></author>
       <url hash="46f274d0">H93-1112</url>
       <bibkey>croft-1993-text</bibkey>
     </paper>
     <paper id="113">
       <title>Natural Language Research</title>
-      <author><first>Aravind</first><last>Joshi</last></author>
-      <author><first>Mitch</first><last>Marcus</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
+      <author id="mitch-marcus"><first>Mitch</first><last>Marcus</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <url hash="d06cc696">H93-1113</url>
       <bibkey>joshi-etal-1993-natural</bibkey>
     </paper>
     <paper id="114">
       <title>Natural Language Planning Dialogue for Intelligent Applications</title>
-      <author><first>James F.</first><last>Allen</last></author>
-      <author><first>Len</first><last>Schubert</last></author>
+      <author id="james-allen"><first>James F.</first><last>Allen</last></author>
+      <author id="lenhart-schubert"><first>Len</first><last>Schubert</last></author>
       <url hash="d94f992d">H93-1114</url>
       <bibkey>allen-schubert-1993-natural</bibkey>
     </paper>
     <paper id="115">
       <title>The <fixed-case>P</fixed-case>enman Project on Knowledge-Based Machine Translation</title>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <url hash="02ab8901">H93-1115</url>
       <bibkey>hovy-1993-penman</bibkey>
     </paper>
diff --git a/data/xml/H94.xml b/data/xml/H94.xml
index f4ad92516d..c21a24c947 100644
--- a/data/xml/H94.xml
+++ b/data/xml/H94.xml
@@ -12,7 +12,7 @@
     </frontmatter>
     <paper id="1">
       <title>Overview of the 1994 <fixed-case>ARPA</fixed-case> <fixed-case>H</fixed-case>uman <fixed-case>L</fixed-case>anguage <fixed-case>T</fixed-case>echnology Workshop</title>
-      <author><first>Clifford J.</first><last>Weinstein</last></author>
+      <author id="clifford-j-weinstein"><first>Clifford J.</first><last>Weinstein</last></author>
       <url hash="7f53ceba">H94-1001</url>
       <bibkey>weinstein-1994-overview</bibkey>
     </paper>
@@ -24,15 +24,15 @@
     </paper>
     <paper id="3">
       <title>The <fixed-case>C</fixed-case>omlex Syntax Project: The First Year</title>
-      <author><first>Catherine</first><last>Macleod</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
-      <author><first>Adam</first><last>Meyers</last></author>
+      <author id="catherine-macleod"><first>Catherine</first><last>Macleod</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
+      <author id="adam-meyers"><first>Adam</first><last>Meyers</last></author>
       <url hash="1c2db771">H94-1003</url>
       <bibkey>macleod-etal-1994-comlex</bibkey>
     </paper>
     <paper id="4">
       <title>Lexicons for Human Language Technology</title>
-      <author><first>Mark</first><last>Liberman</last></author>
+      <author id="mark-liberman"><first>Mark</first><last>Liberman</last></author>
       <url hash="85268414">H94-1004</url>
       <bibkey>liberman-1994-lexicons</bibkey>
     </paper>
@@ -58,82 +58,82 @@
     </paper>
     <paper id="8">
       <title>Corpus Development Activities at the <fixed-case>C</fixed-case>enter for <fixed-case>S</fixed-case>poken <fixed-case>L</fixed-case>anguage <fixed-case>U</fixed-case>nderstanding</title>
-      <author><first>Ron</first><last>Cole</last></author>
+      <author id="ronald-cole"><first>Ron</first><last>Cole</last></author>
       <author><first>Mike</first><last>Noel</last></author>
       <author><first>Daniel C.</first><last>Burnett</last></author>
       <author><first>Mark</first><last>Fanty</last></author>
       <author><first>Terri</first><last>Lander</last></author>
-      <author><first>Beatrice</first><last>Oshika</last></author>
+      <author id="beatrice-oshika"><first>Beatrice</first><last>Oshika</last></author>
       <author><first>Stephen</first><last>Sutton</last></author>
       <url hash="8f3cb7b8">H94-1008</url>
       <bibkey>cole-etal-1994-corpus</bibkey>
     </paper>
     <paper id="9">
       <title>The Hub and Spoke Paradigm for <fixed-case>CSR</fixed-case> Evaluation</title>
-      <author><first>Francis</first><last>Kubala</last></author>
+      <author id="francis-kubala"><first>Francis</first><last>Kubala</last></author>
       <author><first>Jerome</first><last>Bellegarda</last></author>
-      <author><first>Jordan</first><last>Cohen</last></author>
-      <author><first>David</first><last>Pallett</last></author>
+      <author id="jordan-cohen"><first>Jordan</first><last>Cohen</last></author>
+      <author id="david-s-pallett"><first>David</first><last>Pallett</last></author>
       <author><first>Doug</first><last>Paul</last></author>
       <author><first>Mike</first><last>Phillips</last></author>
       <author><first>Raja</first><last>Rajasekaran</last></author>
       <author><first>Fred</first><last>Richardson</last></author>
-      <author><first>Michael</first><last>Riley</last></author>
+      <author id="michael-riley"><first>Michael</first><last>Riley</last></author>
       <author><first>Roni</first><last>Rosenfeld</last></author>
       <author><first>Bob</first><last>Roth</last></author>
-      <author><first>Mitch</first><last>Weintraub</last></author>
+      <author id="mitch-weintraub"><first>Mitch</first><last>Weintraub</last></author>
       <url hash="cc78ab71">H94-1009</url>
       <bibkey>kubala-etal-1994-hub</bibkey>
     </paper>
     <paper id="10">
       <title>Expanding the Scope of the <fixed-case>ATIS</fixed-case> Task: The <fixed-case>ATIS</fixed-case>-3 Corpus</title>
-      <author><first>Deborah A.</first><last>Dahl</last></author>
-      <author><first>Madeleine</first><last>Bates</last></author>
-      <author><first>Michael</first><last>Brown</last></author>
-      <author><first>William</first><last>Fisher</last></author>
-      <author><first>Kate</first><last>Hunicke-Smith</last></author>
-      <author><first>David</first><last>Pallett</last></author>
+      <author id="deborah-a-dahl"><first>Deborah A.</first><last>Dahl</last></author>
+      <author id="madeleine-bates"><first>Madeleine</first><last>Bates</last></author>
+      <author id="michael-k-brown"><first>Michael</first><last>Brown</last></author>
+      <author id="william-m-fisher"><first>William</first><last>Fisher</last></author>
+      <author id="kate-hunicke-smith"><first>Kate</first><last>Hunicke-Smith</last></author>
+      <author id="david-s-pallett"><first>David</first><last>Pallett</last></author>
       <author><first>Christine</first><last>Pao</last></author>
-      <author><first>Alexander</first><last>Rudnicky</last></author>
-      <author><first>Elizabeth</first><last>Shriberg</last></author>
+      <author id="alexander-rudnicky"><first>Alexander</first><last>Rudnicky</last></author>
+      <author id="elizabeth-shriberg"><first>Elizabeth</first><last>Shriberg</last></author>
       <url hash="e796abf5">H94-1010</url>
       <bibkey>dahl-etal-1994-expanding</bibkey>
     </paper>
     <paper id="11">
       <title>1993 Benchmark Tests for the <fixed-case>ARPA</fixed-case> Spoken Language Program</title>
-      <author><first>David S.</first><last>Pallett</last></author>
-      <author><first>Jonathan G.</first><last>Fiscus</last></author>
-      <author><first>William M.</first><last>Fisher</last></author>
-      <author><first>John S.</first><last>Garofolo</last></author>
+      <author id="david-s-pallett"><first>David S.</first><last>Pallett</last></author>
+      <author id="jonathan-g-fiscus"><first>Jonathan G.</first><last>Fiscus</last></author>
+      <author id="william-m-fisher"><first>William M.</first><last>Fisher</last></author>
+      <author id="john-s-garofolo"><first>John S.</first><last>Garofolo</last></author>
       <author><first>Bruce A.</first><last>Lund</last></author>
-      <author><first>Mark A.</first><last>Przybocki</last></author>
+      <author id="mark-przybocki"><first>Mark A.</first><last>Przybocki</last></author>
       <url hash="9d6b3e82">H94-1011</url>
       <bibkey>pallett-etal-1994-1993</bibkey>
     </paper>
     <paper id="12">
       <title>Session 2: Language Modeling</title>
-      <author><first>Xuedong</first><last>Huang</last></author>
+      <author id="xuedong-huang"><first>Xuedong</first><last>Huang</last></author>
       <url hash="ba22e166">H94-1012</url>
       <bibkey>huang-1994-session</bibkey>
     </paper>
     <paper id="13">
       <title>A Hybrid Approach to Adaptive Statistical Language Modeling</title>
-      <author><first>Ronald</first><last>Rosenfeld</last></author>
+      <author id="ronald-rosenfeld"><first>Ronald</first><last>Rosenfeld</last></author>
       <url hash="e376954d">H94-1013</url>
       <bibkey>rosenfeld-1994-hybrid</bibkey>
     </paper>
     <paper id="14">
       <title>Language Modeling with Sentence-Level Mixtures</title>
       <author><first>Rukmini</first><last>Iyer</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
-      <author><first>J. Robin</first><last>Rohlicek</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
+      <author id="j-robin-rohlicek"><first>J. Robin</first><last>Rohlicek</last></author>
       <url hash="bef8af21">H94-1014</url>
       <bibkey>iyer-etal-1994-language</bibkey>
     </paper>
     <paper id="15">
       <title>Speech Recognition Using a Stochastic Language Model Integrating Local and Global Constraints</title>
       <author><first>Ryosuke</first><last>Isotani</last></author>
-      <author><first>Shoichi</first><last>Matsunaga</last></author>
+      <author id="shoichi-matsunaga"><first>Shoichi</first><last>Matsunaga</last></author>
       <url hash="e7163396">H94-1015</url>
       <bibkey>isotani-matsunaga-1994-speech</bibkey>
     </paper>
@@ -150,26 +150,26 @@
     </paper>
     <paper id="17">
       <title>Session 3: Human Language Evaluation</title>
-      <author><first>Lynette</first><last>Hirschman</last></author>
+      <author id="lynette-hirschman"><first>Lynette</first><last>Hirschman</last></author>
       <url hash="39686574">H94-1017</url>
       <bibkey>hirschman-1994-session</bibkey>
     </paper>
     <paper id="18">
       <title>Towards Better <fixed-case>NLP</fixed-case> System Evaluation</title>
-      <author><first>Karen</first><last>Sparck Jones</last></author>
+      <author id="karen-sparck-jones"><first>Karen</first><last>Sparck Jones</last></author>
       <url hash="eacf3572">H94-1018</url>
       <bibkey>sparck-jones-1994-towards</bibkey>
     </paper>
     <paper id="19">
       <title>Automatic Evaluation of Computer Generated Text: A Progress Report on the <fixed-case>T</fixed-case>ext<fixed-case>E</fixed-case>val Project</title>
       <author><first>Chris</first><last>Brew</last></author>
-      <author><first>Henry S.</first><last>Thompson</last></author>
+      <author id="henry-s-thompson"><first>Henry S.</first><last>Thompson</last></author>
       <url hash="ac7242ef">H94-1019</url>
       <bibkey>brew-thompson-1994-automatic</bibkey>
     </paper>
     <paper id="20">
       <title>The <fixed-case>P</fixed-case>enn <fixed-case>T</fixed-case>reebank: Annotating Predicate Argument Structure</title>
-      <author><first>Mitchell</first><last>Marcus</last></author>
+      <author id="mitch-marcus"><first>Mitchell</first><last>Marcus</last></author>
       <author><first>Grace</first><last>Kim</last></author>
       <author><first>Mary Ann</first><last>Marcinkiewicz</last></author>
       <author><first>Robert</first><last>MacIntyre</last></author>
@@ -182,25 +182,25 @@
     </paper>
     <paper id="21">
       <title>Whither Written Language Evaluation?</title>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <url hash="72e47f75">H94-1021</url>
       <bibkey>grishman-1994-whither</bibkey>
     </paper>
     <paper id="22">
       <title>Semantic Evaluation for Spoken-Language Systems</title>
-      <author><first>Robert C.</first><last>Moore</last></author>
+      <author id="robert-c-moore"><first>Robert C.</first><last>Moore</last></author>
       <url hash="971b9ca6">H94-1022</url>
       <bibkey>moore-1994-semantic</bibkey>
     </paper>
     <paper id="23">
       <title>Session 4: Machine Translation</title>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <url hash="a45ab2d8">H94-1023</url>
       <bibkey>hovy-1994-session</bibkey>
     </paper>
     <paper id="24">
       <title>Evaluation in the <fixed-case>ARPA</fixed-case> Machine Translation Program: 1993 Methodology</title>
-      <author><first>John S.</first><last>White</last></author>
+      <author id="john-s-white"><first>John S.</first><last>White</last></author>
       <author><first>Theresa A.</first><last>O’Connell</last></author>
       <url hash="46c77d86">H94-1024</url>
       <bibkey>white-oconnell-1994-evaluation</bibkey>
@@ -208,33 +208,33 @@
     <paper id="25">
       <title>Building <fixed-case>J</fixed-case>apanese-<fixed-case>E</fixed-case>nglish Dictionary based on Ontology for Machine Translation</title>
       <author><first>Akitoshi</first><last>Okumura</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <url hash="3c0ad3ac">H94-1025</url>
       <bibkey>okumura-hovy-1994-building</bibkey>
     </paper>
     <paper id="26">
       <title>Toward Multi-Engine Machine Translation</title>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
-      <author><first>Robert</first><last>Frederking</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="robert-frederking"><first>Robert</first><last>Frederking</last></author>
       <url hash="a3a6445e">H94-1026</url>
       <bibkey>nirenburg-frederking-1994-toward</bibkey>
     </paper>
     <paper id="27">
       <title>Translating Collocations for Use in Bilingual Lexicons</title>
-      <author><first>Frank</first><last>Smadja</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="frank-smadja"><first>Frank</first><last>Smadja</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <url hash="40d161ea">H94-1027</url>
       <bibkey>smadja-mckeown-1994-translating</bibkey>
     </paper>
     <paper id="28">
       <title>The <fixed-case>C</fixed-case>andide System for Machine Translation</title>
-      <author><first>Adam L.</first><last>Berger</last></author>
-      <author><first>Peter F.</first><last>Brown</last></author>
-      <author><first>Stephen A.</first><last>Della Pietra</last></author>
-      <author><first>Vincent J.</first><last>Della Pietra</last></author>
+      <author id="adam-berger"><first>Adam L.</first><last>Berger</last></author>
+      <author id="peter-f-brown"><first>Peter F.</first><last>Brown</last></author>
+      <author id="stephen-a-della-pietra"><first>Stephen A.</first><last>Della Pietra</last></author>
+      <author id="vincent-j-della-pietra"><first>Vincent J.</first><last>Della Pietra</last></author>
       <author><first>John R.</first><last>Gillett</last></author>
-      <author><first>John D.</first><last>Lafferty</last></author>
-      <author><first>Robert L.</first><last>Mercer</last></author>
+      <author id="john-lafferty"><first>John D.</first><last>Lafferty</last></author>
+      <author id="robert-l-mercer"><first>Robert L.</first><last>Mercer</last></author>
       <author><first>Harry</first><last>Printz</last></author>
       <author><first>Lubos</first><last>Ures</last></author>
       <url hash="57c33648">H94-1028</url>
@@ -252,58 +252,58 @@
     </paper>
     <paper id="30">
       <title>Session 5: Natural Language, Discourse</title>
-      <author><first>Paul S.</first><last>Jacobs</last></author>
+      <author id="paul-s-jacobs"><first>Paul S.</first><last>Jacobs</last></author>
       <url hash="45ed92d2">H94-1030</url>
       <bibkey>jacobs-1994-session</bibkey>
     </paper>
     <paper id="31">
       <title>Issues and Methodology for Template Design for Information Extraction</title>
-      <author><first>Boyan</first><last>Onyshkevych</last></author>
+      <author id="boyan-onyshkevych"><first>Boyan</first><last>Onyshkevych</last></author>
       <url hash="9ffaa226">H94-1031</url>
       <bibkey>onyshkevych-1994-issues</bibkey>
     </paper>
     <paper id="32">
       <title>Principles of Template Design</title>
-      <author><first>Jerry</first><last>Hobbs</last></author>
-      <author><first>David</first><last>Israel</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry</first><last>Hobbs</last></author>
+      <author id="david-israel"><first>David</first><last>Israel</last></author>
       <url hash="21a8b388">H94-1032</url>
       <bibkey>hobbs-israel-1994-principles</bibkey>
     </paper>
     <paper id="33">
       <title>Pattern Matching in a Linguistically-Motivated Text Understanding System</title>
-      <author><first>Damaris M.</first><last>Ayuso</last></author>
+      <author id="damaris-ayuso"><first>Damaris M.</first><last>Ayuso</last></author>
       <url hash="5403c915">H94-1033</url>
       <bibkey>ayuso-1994-pattern</bibkey>
     </paper>
     <paper id="34">
       <title>Tagging Speech Repairs</title>
-      <author><first>Peter A.</first><last>Heeman</last></author>
-      <author><first>James</first><last>Allen</last></author>
+      <author id="peter-a-heeman"><first>Peter A.</first><last>Heeman</last></author>
+      <author id="james-allen"><first>James</first><last>Allen</last></author>
       <url hash="8be2460a">H94-1034</url>
       <bibkey>heeman-allen-1994-tagging</bibkey>
     </paper>
     <paper id="35">
       <title>Information Based Intonation Synthesis</title>
       <author><first>Scott</first><last>Prevost</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <url hash="7a1cd202">H94-1035</url>
       <bibkey>prevost-steedman-1994-information</bibkey>
     </paper>
     <paper id="36">
       <title>Session 6: Spoken Language Systems</title>
-      <author><first>Madeleine</first><last>Bates</last></author>
+      <author id="madeleine-bates"><first>Madeleine</first><last>Bates</last></author>
       <url hash="8966d571">H94-1036</url>
       <bibkey>bates-1994-session</bibkey>
     </paper>
     <paper id="37">
       <title><fixed-case>PEGASUS</fixed-case>: A Spoken Language Interface for On-Line Air Travel Planning</title>
-      <author><first>Victor</first><last>Zue</last></author>
-      <author><first>Stephanie</first><last>Seneff</last></author>
-      <author><first>Joseph</first><last>Polifroni</last></author>
-      <author><first>Michael</first><last>Phillips</last></author>
+      <author id="victor-zue"><first>Victor</first><last>Zue</last></author>
+      <author id="stephanie-seneff"><first>Stephanie</first><last>Seneff</last></author>
+      <author id="joseph-polifroni"><first>Joseph</first><last>Polifroni</last></author>
+      <author id="michael-phillips"><first>Michael</first><last>Phillips</last></author>
       <author><first>Christine</first><last>Pao</last></author>
       <author><first>David</first><last>Goddeau</last></author>
-      <author><first>James</first><last>Glass</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
       <author><first>Eric</first><last>Brill</last></author>
       <url hash="a222eb07">H94-1037</url>
       <bibkey>zue-etal-1994-pegasus</bibkey>
@@ -316,44 +316,44 @@
     </paper>
     <paper id="39">
       <title>Recent Improvements in the <fixed-case>CMU</fixed-case> Spoken Language Understanding System</title>
-      <author><first>Wayne</first><last>Ward</last></author>
+      <author id="wayne-ward"><first>Wayne</first><last>Ward</last></author>
       <author><first>Sunil</first><last>Issar</last></author>
       <url hash="7893e4de">H94-1039</url>
       <bibkey>ward-issar-1994-recent</bibkey>
     </paper>
     <paper id="40">
       <title>Combining Knowledge Sources to Reorder N-Best Speech Hypothesis Lists</title>
-      <author><first>Manny</first><last>Rayner</last></author>
-      <author><first>David</first><last>Carter</last></author>
-      <author><first>Vassilios</first><last>Digalakis</last></author>
-      <author><first>Patti</first><last>Price</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
+      <author id="david-carter"><first>David</first><last>Carter</last></author>
+      <author id="vassilios-digalakis"><first>Vassilios</first><last>Digalakis</last></author>
+      <author id="patti-price"><first>Patti</first><last>Price</last></author>
       <url hash="e97ffb91">H94-1040</url>
       <bibkey>rayner-etal-1994-combining</bibkey>
     </paper>
     <paper id="41">
       <title>Predicting and Managing Spoken Disfluencies During Human-Computer Interaction</title>
-      <author><first>Sharon</first><last>Oviatt</last></author>
+      <author id="sharon-oviatt"><first>Sharon</first><last>Oviatt</last></author>
       <url hash="0aa32f61">H94-1041</url>
       <bibkey>oviatt-1994-predicting</bibkey>
     </paper>
     <paper id="42">
       <title>Integrated Techniques for Phrase Extraction From Speech</title>
-      <author><first>Marie</first><last>Meteer</last></author>
-      <author><first>J. Robin</first><last>Rohlicek</last></author>
+      <author id="marie-meteer"><first>Marie</first><last>Meteer</last></author>
+      <author id="j-robin-rohlicek"><first>J. Robin</first><last>Rohlicek</last></author>
       <url hash="f588fe08">H94-1042</url>
       <bibkey>meteer-rohlicek-1994-integrated</bibkey>
     </paper>
     <paper id="43">
       <title>Session 7: Demonstrations</title>
-      <author><first>Victor</first><last>Abrash</last></author>
+      <author id="victor-abrash"><first>Victor</first><last>Abrash</last></author>
       <url hash="6630d6dc">H94-1043</url>
       <bibkey>abrash-1994-session</bibkey>
     </paper>
     <paper id="44">
       <title>A Prototype Reading Coach that Listens: Summary of Project <fixed-case>LISTEN</fixed-case></title>
-      <author><first>Alex</first><last>Hauptmann</last></author>
+      <author id="alexander-g-hauptmann"><first>Alex</first><last>Hauptmann</last></author>
       <author><first>Jack</first><last>Mostow</last></author>
-      <author><first>Steven F.</first><last>Roth</last></author>
+      <author id="steven-roth"><first>Steven F.</first><last>Roth</last></author>
       <author><first>Matthew</first><last>Kane</last></author>
       <author><first>Adam</first><last>Swift</last></author>
       <url hash="df1c6e6b">H94-1044</url>
@@ -361,15 +361,15 @@
     </paper>
     <paper id="45">
       <title>Session 8 &amp;: 9: Statistical and Learning Methods</title>
-      <author><first>Frederick</first><last>Jelinek</last></author>
+      <author id="frederick-jelinek"><first>Frederick</first><last>Jelinek</last></author>
       <url hash="0c8880fc">H94-1045</url>
       <bibkey>jelinek-1994-session</bibkey>
     </paper>
     <paper id="46">
       <title>Using a Semantic Concordance for Sense Identification</title>
       <author><first>George A.</first><last>Miller</last></author>
-      <author><first>Martin</first><last>Chodorow</last></author>
-      <author><first>Shari</first><last>Landes</last></author>
+      <author id="martin-chodorow"><first>Martin</first><last>Chodorow</last></author>
+      <author id="shari-landes"><first>Shari</first><last>Landes</last></author>
       <author><first>Claudia</first><last>Leacock</last></author>
       <author><first>Robert G.</first><last>Thomas</last></author>
       <url hash="fec989aa">H94-1046</url>
@@ -377,16 +377,16 @@
     </paper>
     <paper id="47">
       <title>A New Approach to Word Sense Disambiguation</title>
-      <author><first>Rebecca</first><last>Bruce</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="rebecca-bruce"><first>Rebecca</first><last>Bruce</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <url hash="2d567d86">H94-1047</url>
       <bibkey>bruce-wiebe-1994-new</bibkey>
     </paper>
     <paper id="48">
       <title>A Maximum Entropy Model for Prepositional Phrase Attachment</title>
-      <author><first>Adwait</first><last>Ratnaparkhi</last></author>
+      <author id="adwait-ratnaparkhi"><first>Adwait</first><last>Ratnaparkhi</last></author>
       <author><first>Jeff</first><last>Reynar</last></author>
-      <author><first>Salim</first><last>Roukos</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
       <url hash="26a6c508">H94-1048</url>
       <bibkey>ratnaparkhi-etal-1994-maximum</bibkey>
     </paper>
@@ -399,15 +399,15 @@
     <paper id="50">
       <title>Weighted Rational Transductions and their Application to Human Language Processing</title>
       <author><first>Fernando</first><last>Pereira</last></author>
-      <author><first>Michael</first><last>Riley</last></author>
-      <author><first>Richard</first><last>Sproat</last></author>
+      <author id="michael-riley"><first>Michael</first><last>Riley</last></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last></author>
       <url hash="b06c2079">H94-1050</url>
       <bibkey>pereira-etal-1994-weighted</bibkey>
     </paper>
     <paper id="51">
       <title>Automatic Grammar Acquisition</title>
       <author><first>Scott</first><last>Miller</last></author>
-      <author><first>Heidi J.</first><last>Fox</last></author>
+      <author id="heidi-fox"><first>Heidi J.</first><last>Fox</last></author>
       <url hash="98704a84">H94-1051</url>
       <bibkey>miller-fox-1994-automatic</bibkey>
     </paper>
@@ -425,23 +425,23 @@
     <paper id="53">
       <title>Statistical Language Processing Using Hidden Understanding Models</title>
       <author><first>Scott</first><last>Miller</last></author>
-      <author><first>Richard</first><last>Schwartz</last></author>
-      <author><first>Robert</first><last>Bobrow</last></author>
-      <author><first>Robert</first><last>Ingria</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
+      <author id="robert-bobrow"><first>Robert</first><last>Bobrow</last></author>
+      <author id="robert-ingria"><first>Robert</first><last>Ingria</last></author>
       <url hash="33e86f8b">H94-1053</url>
       <bibkey>miller-etal-1994-statistical</bibkey>
     </paper>
     <paper id="54">
       <title><fixed-case>J</fixed-case>apanese Word Segmentation by Hidden <fixed-case>M</fixed-case>arkov Model</title>
-      <author><first>Constantine P.</first><last>Papageorgiou</last></author>
+      <author id="constantine-papageorgiou"><first>Constantine P.</first><last>Papageorgiou</last></author>
       <url hash="e99caf76">H94-1054</url>
       <bibkey>papageorgiou-1994-japanese</bibkey>
     </paper>
     <paper id="55">
       <title>Phonological Parsing for Bi-directional Letter-to-Sound/Sound-to-Letter Generation</title>
-      <author><first>Helen M.</first><last>Meng</last></author>
-      <author><first>Stephanie</first><last>Seneff</last></author>
-      <author><first>Victor W.</first><last>Zue</last></author>
+      <author id="helen-meng"><first>Helen M.</first><last>Meng</last></author>
+      <author id="stephanie-seneff"><first>Stephanie</first><last>Seneff</last></author>
+      <author id="victor-zue"><first>Victor W.</first><last>Zue</last></author>
       <url hash="83522cf3">H94-1055</url>
       <bibkey>meng-etal-1994-phonological</bibkey>
     </paper>
@@ -459,7 +459,7 @@
     </paper>
     <paper id="58">
       <title>Speech and Human Language Technology at the <fixed-case>N</fixed-case>aval <fixed-case>R</fixed-case>esearch <fixed-case>L</fixed-case>aboratory</title>
-      <author><first>Helen M.</first><last>Gigley</last></author>
+      <author id="helen-m-gigley"><first>Helen M.</first><last>Gigley</last></author>
       <url hash="f70daf35">H94-1058</url>
       <bibkey>gigley-1994-speech</bibkey>
     </paper>
@@ -472,13 +472,13 @@
     <paper id="60">
       <title>Language Processing <fixed-case>R</fixed-case>&amp;<fixed-case>D</fixed-case> Programmes Directorate <fixed-case>XIII E</fixed-case> of the <fixed-case>E</fixed-case>uropean <fixed-case>C</fixed-case>ommission</title>
       <author><first>Roberto</first><last>Cencioni</last></author>
-      <author><first>Giovanni Battista</first><last>Varile</last></author>
+      <author id="giovanni-battista-varile"><first>Giovanni Battista</first><last>Varile</last></author>
       <url hash="b336a5e3">H94-1060</url>
       <bibkey>cencioni-varile-1994-language</bibkey>
     </paper>
     <paper id="61">
       <title>Session 11: Acoustic Modeling and Robust <fixed-case>CSR</fixed-case></title>
-      <author><first>Steve</first><last>Young</last></author>
+      <author id="steve-young"><first>Steve</first><last>Young</last></author>
       <url hash="dce10a41">H94-1061</url>
       <bibkey>young-1994-session</bibkey>
     </paper>
@@ -492,8 +492,8 @@
     </paper>
     <paper id="63">
       <title>High-Accuracy Large-Vocabulary Speech Recognition Using Mixture Tying and Consistency Modeling</title>
-      <author><first>Vassilios</first><last>Digalakis</last></author>
-      <author><first>Hy</first><last>Murveit</last></author>
+      <author id="vassilios-digalakis"><first>Vassilios</first><last>Digalakis</last></author>
+      <author id="hy-murveit"><first>Hy</first><last>Murveit</last></author>
       <url hash="1b4701ca">H94-1063</url>
       <bibkey>digalakis-murveit-1994-high</bibkey>
     </paper>
@@ -509,9 +509,9 @@
     <paper id="65">
       <title>Adaptation to New Microphones Using Tied-Mixture Normalization</title>
       <author><first>Anastasios</first><last>Anastasakos</last></author>
-      <author><first>Francis</first><last>Kubala</last></author>
-      <author><first>John</first><last>Makhoul</last></author>
-      <author><first>Richard</first><last>Schwartz</last></author>
+      <author id="francis-kubala"><first>Francis</first><last>Kubala</last></author>
+      <author id="john-makhoul"><first>John</first><last>Makhoul</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
       <url hash="ae3bcb49">H94-1065</url>
       <bibkey>anastasakos-etal-1994-adaptation</bibkey>
     </paper>
@@ -527,7 +527,7 @@
     <paper id="67">
       <title>Microphone-Independent Robust Signal Processing Using Probabilistic Optimum Filtering</title>
       <author><first>Leonardo</first><last>Neumeyer</last></author>
-      <author><first>Mitchel</first><last>Weintraub</last></author>
+      <author id="mitch-weintraub"><first>Mitchel</first><last>Weintraub</last></author>
       <url hash="5364dfd0">H94-1067</url>
       <bibkey>neumeyer-weintraub-1994-microphone</bibkey>
     </paper>
@@ -562,13 +562,13 @@
     </paper>
     <paper id="72">
       <title>Document Representation in Natural Language Text Retrieval</title>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
       <url hash="58783f6a">H94-1072</url>
       <bibkey>strzalkowski-1994-document</bibkey>
     </paper>
     <paper id="73">
       <title>Assessing the Retrieval Effectiveness of a Speech Retrieval System by Simulating Recognition Errors</title>
-      <author><first>Peter</first><last>Schauble</last></author>
+      <author id="peter-schauble"><first>Peter</first><last>Schauble</last></author>
       <author><first>Ulrike</first><last>Glavitsch</last></author>
       <url hash="1a6412fb">H94-1073</url>
       <bibkey>schauble-glavitsch-1994-assessing</bibkey>
@@ -589,7 +589,7 @@
     </paper>
     <paper id="76">
       <title>Session 13: <fixed-case>CSR</fixed-case> Search</title>
-      <author><first>Richard</first><last>Schwartz</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
       <url hash="5c858d95">H94-1076</url>
       <bibkey>schwartz-1994-session</bibkey>
     </paper>
@@ -600,15 +600,15 @@
       <author><first>Osamu</first><last>Yoshioka</last></author>
       <author><first>Satoshi</first><last>Takahashi</last></author>
       <author><first>Tomokazu</first><last>Yamada</last></author>
-      <author><first>Sadaoki</first><last>Furui</last></author>
+      <author id="sadaoki-furui"><first>Sadaoki</first><last>Furui</last></author>
       <url hash="dfc032ab">H94-1077</url>
       <bibkey>minami-etal-1994-large</bibkey>
     </paper>
     <paper id="78">
       <title>Techniques to Achieve an Accurate Real-Time Large-Vocabulary Speech Recognition System</title>
-      <author><first>Hy</first><last>Murveit</last></author>
+      <author id="hy-murveit"><first>Hy</first><last>Murveit</last></author>
       <author><first>Peter</first><last>Monaco</last></author>
-      <author><first>Vassilios</first><last>Digalakis</last></author>
+      <author id="vassilios-digalakis"><first>Vassilios</first><last>Digalakis</last></author>
       <author><first>John</first><last>Butzberger</last></author>
       <url hash="5b437d4a">H94-1078</url>
       <bibkey>murveit-etal-1994-techniques</bibkey>
@@ -630,16 +630,16 @@
     </paper>
     <paper id="81">
       <title>Is N-Best Dead?</title>
-      <author><first>Long</first><last>Nguyen</last></author>
-      <author><first>Richard</first><last>Schwartz</last></author>
+      <author id="long-nguyen"><first>Long</first><last>Nguyen</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
       <author><first>Ying</first><last>Zhao</last></author>
-      <author><first>George</first><last>Zavaliagkos</last></author>
+      <author id="george-zavaliagkos"><first>George</first><last>Zavaliagkos</last></author>
       <url hash="db85291f">H94-1081</url>
       <bibkey>nguyen-etal-1994-n</bibkey>
     </paper>
     <paper id="82">
       <title>Session 14: New Directions/Applications</title>
-      <author><first>Richard</first><last>Stern</last></author>
+      <author id="richard-m-stern"><first>Richard</first><last>Stern</last></author>
       <url hash="682c3a30">H94-1082</url>
       <bibkey>stern-1994-session</bibkey>
     </paper>
@@ -651,28 +651,28 @@
     </paper>
     <paper id="84">
       <title>Integrated Text and Image Understanding for Document Understanding</title>
-      <author><first>Suzanne</first><last>Liebowitz</last></author>
-      <author><first>Deborah A.</first><last>Dahl</last></author>
+      <author id="suzanne-liebowitz-taylor"><first>Suzanne</first><last>Liebowitz</last></author>
+      <author id="deborah-a-dahl"><first>Deborah A.</first><last>Dahl</last></author>
       <author><first>Mark</first><last>Lipshutz</last></author>
       <author><first>Carl</first><last>Weir</last></author>
-      <author><first>Lewis M.</first><last>Norton</last></author>
+      <author id="lewis-m-norton"><first>Lewis M.</first><last>Norton</last></author>
       <author><first>Roslyn</first><last>Nilson</last></author>
-      <author><first>Marcia</first><last>Linebarger</last></author>
+      <author id="marcia-c-linebarger"><first>Marcia</first><last>Linebarger</last></author>
       <url hash="36f6e35d">H94-1084</url>
       <bibkey>liebowitz-etal-1994-integrated</bibkey>
     </paper>
     <paper id="85">
       <title>Use of Lexical and Syntactic Techniques in Recognizing Handwritten Text</title>
-      <author><first>Rohini K.</first><last>Srihari</last></author>
+      <author id="rohini-k-srihari"><first>Rohini K.</first><last>Srihari</last></author>
       <url hash="f9d5d2e9">H94-1085</url>
       <bibkey>srihari-1994-use</bibkey>
     </paper>
     <paper id="86">
       <title>On-Line Cursive Handwriting Recognition Using Hidden <fixed-case>M</fixed-case>arkov Models and Statistical Grammars</title>
-      <author><first>John</first><last>Makhoul</last></author>
+      <author id="john-makhoul"><first>John</first><last>Makhoul</last></author>
       <author><first>Thad</first><last>Starner</last></author>
-      <author><first>Richard</first><last>Schwartz</last></author>
-      <author><first>George</first><last>Chou</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
+      <author id="george-chou"><first>George</first><last>Chou</last></author>
       <url hash="5cde324b">H94-1086</url>
       <bibkey>makhoul-etal-1994-line</bibkey>
     </paper>
@@ -688,21 +688,21 @@
     </paper>
     <paper id="88">
       <title>Robust Continuous Speech Recognition</title>
-      <author><first>John</first><last>Makhoul</last></author>
-      <author><first>Richard</first><last>Schwartz</last></author>
+      <author id="john-makhoul"><first>John</first><last>Makhoul</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
       <url hash="79eb768d">H94-1088</url>
       <bibkey>makhoul-schwartz-1994-robust</bibkey>
     </paper>
     <paper id="89">
       <title>Robustness, Portability and Scalability Language Systems</title>
-      <author><first>Ralph</first><last>Weischedel</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
       <url hash="2faf0fa8">H94-1089</url>
       <bibkey>weischedel-1994-robustness</bibkey>
     </paper>
     <paper id="90">
       <title>Usable, Real-Time, Interactive Spoken Language Systems</title>
-      <author><first>John</first><last>Makhoul</last></author>
-      <author><first>Madeleine</first><last>Bates</last></author>
+      <author id="john-makhoul"><first>John</first><last>Makhoul</last></author>
+      <author id="madeleine-bates"><first>Madeleine</first><last>Bates</last></author>
       <url hash="8acd9a12">H94-1090</url>
       <bibkey>makhoul-bates-1994-usable</bibkey>
     </paper>
@@ -710,20 +710,20 @@
       <title>Evaluating the Use of Prosodic Information in Speech Recognition and Understanding</title>
       <author id="mari-ostendorf"><first>M.</first><last>Ostendorf</last></author>
       <author id="patti-price"><first>P.</first><last>Price</last></author>
-      <author><first>S. Shattuck</first><last>Hufnagel</last></author>
+      <author id="stefanie-shattuck-hufnagel"><first>S. Shattuck</first><last>Hufnagel</last></author>
       <url hash="40a2cdfa">H94-1091</url>
       <bibkey>ostendorf-etal-1994-evaluating</bibkey>
     </paper>
     <paper id="92">
       <title>Segment-Based Acoustic Models for Continuous Speech Recognition</title>
-      <author><first>Mari</first><last>Ostendorf</last></author>
-      <author><first>J. Robin</first><last>Rohlicek</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
+      <author id="j-robin-rohlicek"><first>J. Robin</first><last>Rohlicek</last></author>
       <url hash="12514d42">H94-1092</url>
       <bibkey>ostendorf-rohlicek-1994-segment</bibkey>
     </paper>
     <paper id="93">
       <title><fixed-case>P</fixed-case>angloss: A Machine Translation Project</title>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <url hash="3664bc01">H94-1093</url>
       <bibkey>nirenburg-1994-pangloss</bibkey>
     </paper>
@@ -735,8 +735,8 @@
     </paper>
     <paper id="95">
       <title>Extracting Constraints on Word Usage from Large Text Corpora</title>
-      <author><first>Kathleen</first><last>McKeown</last></author>
-      <author><first>Rebecca</first><last>Passonneau</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca</first><last>Passonneau</last></author>
       <url hash="ebdd86e5">H94-1095</url>
       <bibkey>mckeown-passonneau-1994-extracting</bibkey>
     </paper>
@@ -752,7 +752,7 @@
     </paper>
     <paper id="97">
       <title>Research in Large Vocabulary Continuous Speech Recognition</title>
-      <author><first>Janet</first><last>Baker</last></author>
+      <author id="janet-baker"><first>Janet</first><last>Baker</last></author>
       <author><first>Larry</first><last>Gillick</last></author>
       <author><first>Robert</first><last>Roth</last></author>
       <url hash="e688de30">H94-1097</url>
@@ -760,20 +760,20 @@
     </paper>
     <paper id="98">
       <title>The Tipster/Shogun Project</title>
-      <author><first>Paul S.</first><last>Jacobs</last></author>
+      <author id="paul-s-jacobs"><first>Paul S.</first><last>Jacobs</last></author>
       <url hash="636ef982">H94-1098</url>
       <bibkey>jacobs-1994-tipster</bibkey>
     </paper>
     <paper id="99">
       <title>Automatic Extraction of Grammars From Annotated Text</title>
-      <author><first>Salim</first><last>Roukos</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
       <url hash="9b64d0bf">H94-1099</url>
       <bibkey>roukos-1994-automatic</bibkey>
     </paper>
     <paper id="100">
       <title><fixed-case>C</fixed-case>andide: A Statistical Machine Translation System</title>
-      <author><first>Stephen</first><last>DellaPietra</last></author>
-      <author><first>Vincent</first><last>DellaPietra</last></author>
+      <author id="stephen-a-della-pietra"><first>Stephen</first><last>DellaPietra</last></author>
+      <author id="vincent-j-della-pietra"><first>Vincent</first><last>DellaPietra</last></author>
       <url hash="0408a16c">H94-1100</url>
       <bibkey>dellapietra-dellapietra-1994-candide</bibkey>
     </paper>
@@ -786,32 +786,32 @@
     </paper>
     <paper id="102">
       <title>Robust Continuous Speech Recognition Technology Program Summary</title>
-      <author><first>Clifford J.</first><last>Weinstein</last></author>
+      <author id="clifford-j-weinstein"><first>Clifford J.</first><last>Weinstein</last></author>
       <author><first>Douglas B.</first><last>Paul</last></author>
       <url hash="3603cd32">H94-1102</url>
       <bibkey>weinstein-paul-1994-robust</bibkey>
     </paper>
     <paper id="103">
       <title>Spoken Language Recognition and Understanding</title>
-      <author><first>Victor</first><last>Zue</last></author>
+      <author id="victor-zue"><first>Victor</first><last>Zue</last></author>
       <url hash="3967a76c">H94-1103</url>
       <bibkey>zue-1994-spoken</bibkey>
     </paper>
     <paper id="104">
       <title><fixed-case>NIST</fixed-case>-<fixed-case>ARPA</fixed-case> Interagency Agreement: Human Language Technology Program</title>
-      <author><first>David S.</first><last>Pallett</last></author>
+      <author id="david-s-pallett"><first>David S.</first><last>Pallett</last></author>
       <url hash="a3fcbd37">H94-1104</url>
       <bibkey>pallett-1994-nist</bibkey>
     </paper>
     <paper id="105">
       <title>Written Language System Evaluation</title>
-      <author><first>Beth M.</first><last>Sundheim</last></author>
+      <author id="beth-m-sundheim"><first>Beth M.</first><last>Sundheim</last></author>
       <url hash="ec1c0405">H94-1105</url>
       <bibkey>sundheim-1994-written</bibkey>
     </paper>
     <paper id="106">
       <title>The <fixed-case>C</fixed-case>onsortium for <fixed-case>L</fixed-case>exical <fixed-case>R</fixed-case>esearch</title>
-      <author><first>Louise</first><last>Guthrie</last></author>
+      <author id="louise-guthrie"><first>Louise</first><last>Guthrie</last></author>
       <url hash="84b7ceae">H94-1106</url>
       <bibkey>guthrie-1994-consortium</bibkey>
     </paper>
@@ -831,13 +831,13 @@
     </paper>
     <paper id="109">
       <title>Research in Natural Language Processing</title>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <url hash="dd26bf5a">H94-1109</url>
       <bibkey>grishman-1994-research</bibkey>
     </paper>
     <paper id="110">
       <title>Robust Text Processing and Information Retrieval</title>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
       <url hash="1b299821">H94-1110</url>
       <bibkey>strzalkowski-1994-robust-text</bibkey>
     </paper>
@@ -864,7 +864,7 @@
     </paper>
     <paper id="114">
       <title>Annotation of <fixed-case>ATIS</fixed-case> Data</title>
-      <author><first>Kate</first><last>Hunicke-Smith</last></author>
+      <author id="kate-hunicke-smith"><first>Kate</first><last>Hunicke-Smith</last></author>
       <author><first>Jared</first><last>Bernstein</last></author>
       <url hash="fc72e6e4">H94-1114</url>
       <bibkey>hunicke-smith-bernstein-1994-annotation</bibkey>
@@ -872,16 +872,16 @@
     <paper id="115">
       <title>Combining Linguistic and Statistical Technology for Improved Spoken Language Understanding</title>
       <author><first>Michael</first><last>Cohen</last></author>
-      <author><first>Robert</first><last>Moore</last></author>
+      <author id="robert-c-moore"><first>Robert</first><last>Moore</last></author>
       <url hash="a9206099">H94-1115</url>
       <bibkey>cohen-moore-1994-combining</bibkey>
     </paper>
     <paper id="116">
       <title>Consistency Modeling</title>
-      <author><first>Hy</first><last>Murveit</last></author>
-      <author><first>Vassilios</first><last>Digalakis</last></author>
+      <author id="hy-murveit"><first>Hy</first><last>Murveit</last></author>
+      <author id="vassilios-digalakis"><first>Vassilios</first><last>Digalakis</last></author>
       <author><first>Peter</first><last>Monaco</last></author>
-      <author><first>Mitch</first><last>Weintraub</last></author>
+      <author id="mitch-weintraub"><first>Mitch</first><last>Weintraub</last></author>
       <url hash="6cb2ebc3">H94-1116</url>
       <bibkey>murveit-etal-1994-consistency</bibkey>
     </paper>
@@ -909,14 +909,14 @@
     </paper>
     <paper id="120">
       <title>Natural Language Planning Dialogue for Interactive</title>
-      <author><first>James F.</first><last>Allen</last></author>
-      <author><first>Len</first><last>Schubert</last></author>
+      <author id="james-allen"><first>James F.</first><last>Allen</last></author>
+      <author id="lenhart-schubert"><first>Len</first><last>Schubert</last></author>
       <url hash="549fa3e2">H94-1120</url>
       <bibkey>allen-schubert-1994-natural</bibkey>
     </paper>
     <paper id="121">
       <title><fixed-case>PANGLOSS</fixed-case>: Knowledge-Based Machine Translation</title>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <url hash="4f8c4f73">H94-1121</url>
       <bibkey>hovy-1994-pangloss</bibkey>
     </paper>
diff --git a/data/xml/I05.xml b/data/xml/I05.xml
index 5f5f10174d..bdbc235455 100644
--- a/data/xml/I05.xml
+++ b/data/xml/I05.xml
@@ -13,7 +13,7 @@
     <paper id="1">
       <title>A New Method for Sentiment Classification in Text Retrieval</title>
       <author><first>Yi</first><last>Hu</last></author>
-      <author><first>Jianyong</first><last>Duan</last></author>
+      <author id="jianyong-duan"><first>Jianyong</first><last>Duan</last></author>
       <author><first>Xiaoming</first><last>Chen</last></author>
       <author><first>Bingzhen</first><last>Pei</last></author>
       <author><first>Ruzhan</first><last>Lu</last></author>
@@ -33,7 +33,7 @@
       <title>The Use of Monolingual Context Vectors for Missing Translations in Cross-Language Information Retrieval</title>
       <author><first>Yan</first><last>Qu</last></author>
       <author><first>Gregory</first><last>Grefenstette</last></author>
-      <author><first>David A.</first><last>Evans</last></author>
+      <author id="david-a-evans"><first>David A.</first><last>Evans</last></author>
       <doi>10.1007/11562214_3</doi>
       <url hash="9b979128">I05-1003</url>
       <bibkey>qu-etal-2005-use</bibkey>
@@ -49,7 +49,7 @@
     <paper id="5">
       <title>Corpus-Based Analysis of <fixed-case>J</fixed-case>apanese Relative Clause Constructions</title>
       <author><first>Takeshi</first><last>Abekawa</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <doi>10.1007/11562214_5</doi>
       <url hash="901b0a89">I05-1005</url>
       <bibkey>abekawa-okumura-2005-corpus</bibkey>
@@ -64,8 +64,8 @@
     </paper>
     <paper id="7">
       <title>Parsing the <fixed-case>P</fixed-case>enn <fixed-case>C</fixed-case>hinese Treebank with Semantic Knowledge</title>
-      <author><first>Deyi</first><last>Xiong</last></author>
-      <author><first>Shuanglong</first><last>Li</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
+      <author id="shuanglong-li"><first>Shuanglong</first><last>Li</last></author>
       <author><first>Qun</first><last>Liu</last></author>
       <author><first>Shouxun</first><last>Lin</last></author>
       <author><first>Yueliang</first><last>Qian</last></author>
@@ -82,7 +82,7 @@
     </paper>
     <paper id="9">
       <title>Entropy as an Indicator of Context Boundaries: An Experiment Using a Web Search Engine</title>
-      <author><first>Kumiko</first><last>Tanaka-Ishii</last></author>
+      <author id="kumiko-tanaka-ishii"><first>Kumiko</first><last>Tanaka-Ishii</last></author>
       <doi>10.1007/11562214_9</doi>
       <url hash="038364ab">I05-1009</url>
       <bibkey>tanaka-ishii-2005-entropy</bibkey>
@@ -90,7 +90,7 @@
     <paper id="10">
       <title>Automatic Discovery of Attribute Words from Web Documents</title>
       <author><first>Kosuke</first><last>Tokunaga</last></author>
-      <author><first>Jun’ichi</first><last>Kazama</last></author>
+      <author id="junichi-kazama"><first>Jun’ichi</first><last>Kazama</last></author>
       <author><first>Kentaro</first><last>Torisawa</last></author>
       <doi>10.1007/11562214_10</doi>
       <url hash="3a821ae1">I05-1010</url>
@@ -98,7 +98,7 @@
     </paper>
     <paper id="11">
       <title>Aligning Needles in a Haystack: Paraphrase Acquisition Across the Web</title>
-      <author><first>Marius</first><last>Paşca</last></author>
+      <author id="marius-pasca"><first>Marius</first><last>Paşca</last></author>
       <author><first>Péter</first><last>Dienes</last></author>
       <doi>10.1007/11562214_11</doi>
       <url hash="ff1e9505">I05-1011</url>
@@ -117,14 +117,14 @@
       <title>Automatic Partial Parsing Rule Acquisition Using Decision Tree Induction</title>
       <author><first>Myung-Seok</first><last>Choi</last></author>
       <author><first>Chul Su</first><last>Lim</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <doi>10.1007/11562214_13</doi>
       <url hash="c8caa71d">I05-1013</url>
       <bibkey>choi-etal-2005-automatic</bibkey>
     </paper>
     <paper id="14">
       <title>Chunking Using Conditional Random Fields in <fixed-case>K</fixed-case>orean Texts</title>
-      <author><first>Yong-Hun</first><last>Lee</last></author>
+      <author id="yong-hun-lee"><first>Yong-Hun</first><last>Lee</last></author>
       <author><first>Mi-Young</first><last>Kim</last></author>
       <author><first>Jong-Hyeok</first><last>Lee</last></author>
       <doi>10.1007/11562214_14</doi>
@@ -133,7 +133,7 @@
     </paper>
     <paper id="15">
       <title>High Efficiency Realization for a Wide-Coverage Unification Grammar</title>
-      <author><first>John</first><last>Carroll</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
       <author><first>Stephan</first><last>Oepen</last></author>
       <doi>10.1007/11562214_15</doi>
       <url hash="a9fead1a">I05-1015</url>
@@ -143,7 +143,7 @@
       <title>Linguistically-Motivated Grammar Extraction, Generalization and Adaptation</title>
       <author><first>Yu-Ming</first><last>Hsieh</last></author>
       <author><first>Duen-Chi</first><last>Yang</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <doi>10.1007/11562214_16</doi>
       <url hash="59df8a9b">I05-1016</url>
       <bibkey>hsieh-etal-2005-linguistically</bibkey>
@@ -160,15 +160,15 @@
       <title>Adapting a Probabilistic Disambiguation Model of an <fixed-case>HPSG</fixed-case> Parser to a New Domain</title>
       <author><first>Tadayoshi</first><last>Hara</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <doi>10.1007/11562214_18</doi>
       <url hash="ae99bfe1">I05-1018</url>
       <bibkey>hara-etal-2005-adapting</bibkey>
     </paper>
     <paper id="19">
       <title>A Hybrid Approach to Single and Multiple <fixed-case>PP</fixed-case> Attachment Using <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et</title>
-      <author><first>Akshar</first><last>Bharathi</last></author>
-      <author><first>U.</first><last>Rohini</last></author>
+      <author id="akshar-bharati"><first>Akshar</first><last>Bharathi</last></author>
+      <author id="u-rohini"><first>U.</first><last>Rohini</last></author>
       <author><first>P.</first><last>Vishnu</last></author>
       <author><first>S.M.</first><last>Bendre</last></author>
       <author><first>Rajeev</first><last>Sangal</last></author>
@@ -178,7 +178,7 @@
     </paper>
     <paper id="20">
       <title>Period Disambiguation with Maxent Model</title>
-      <author><first>Chunyu</first><last>Kit</last></author>
+      <author id="chunyu-kit"><first>Chunyu</first><last>Kit</last></author>
       <author><first>Xiaoyue</first><last>Liu</last></author>
       <doi>10.1007/11562214_20</doi>
       <url hash="b9d72faf">I05-1020</url>
@@ -186,8 +186,8 @@
     </paper>
     <paper id="21">
       <title>Acquiring Synonyms from Monolingual Comparable Texts</title>
-      <author><first>Mitsuo</first><last>Shimohata</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="mitsuo-shimohata"><first>Mitsuo</first><last>Shimohata</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <doi>10.1007/11562214_21</doi>
       <url hash="5c2eadc3">I05-1021</url>
       <bibkey>shimohata-sumita-2005-acquiring</bibkey>
@@ -230,7 +230,7 @@
       <author><last>Ye</last><first>Na</first></author>
       <author><last>Chang</last><first>Xinzhi</first></author>
       <author><last>Chen</last><first>Wenliang</first></author>
-      <author><first>Benjamin K</first><last>Tsou</last></author>
+      <author id="benjamin-k-tsou"><first>Benjamin K</first><last>Tsou</last></author>
       <doi>10.1007/11562214_26</doi>
       <url hash="b72a4fd5">I05-1026</url>
       <bibkey>zhu-etal-2005-using</bibkey>
@@ -239,7 +239,7 @@
       <title>Classifying <fixed-case>C</fixed-case>hinese Texts in Two Steps</title>
       <author><first>Xinghua</first><last>Fan</last></author>
       <author><first>Maosong</first><last>Sun</last></author>
-      <author><first>Key-sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-sun</first><last>Choi</last></author>
       <author><first>Qin</first><last>Zhang</last></author>
       <doi>10.1007/11562214_27</doi>
       <url hash="eefb8f36">I05-1027</url>
@@ -248,7 +248,7 @@
     <paper id="28">
       <title>Assigning Polarity Scores to Reviews Using Machine Learning Techniques</title>
       <author><first>Daisuke</first><last>Okanohara</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <doi>10.1007/11562214_28</doi>
       <url hash="45a1a40c">I05-1028</url>
       <bibkey>okanohara-tsujii-2005-assigning</bibkey>
@@ -283,7 +283,7 @@
     <paper id="32">
       <title>Finding Taxonomical Relation from an <fixed-case>MRD</fixed-case> for Thesaurus Extension</title>
       <author><first>SeonHwa</first><last>Choi</last></author>
-      <author><first>HyukRo</first><last>Park</last></author>
+      <author id="hyukro-park"><first>HyukRo</first><last>Park</last></author>
       <doi>10.1007/11562214_32</doi>
       <url hash="0a8ac5ef">I05-1032</url>
       <bibkey>choi-park-2005-finding</bibkey>
@@ -300,8 +300,8 @@
       <author><first>Min</first><last>Zhang</last></author>
       <author><first>Jian</first><last>Su</last></author>
       <author><first>Danmei</first><last>Wang</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
-      <author><first>Chew Lim</first><last>Tan</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
+      <author id="chew-lim-tan"><first>Chew Lim</first><last>Tan</last></author>
       <doi>10.1007/11562214_34</doi>
       <url hash="b7a0fa38">I05-1034</url>
       <bibkey>zhang-etal-2005-discovering</bibkey>
@@ -309,9 +309,9 @@
     <paper id="35">
       <title>Automatic Relation Extraction with Model Order Selection and Discriminative Label Identification</title>
       <author><last>Chen</last><first>Jinxiu</first></author>
-      <author><last>Ji</last><first>Donghong</first></author>
-      <author><last>Tan</last><first>Chew Lim</first></author>
-      <author><last>Niu</last><first>Zhengyu</first></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
+      <author id="chew-lim-tan"><first>Chew Lim</first><last>Tan</last></author>
+      <author id="zheng-yu-niu"><first>Zhengyu</first><last>Niu</last></author>
       <doi>10.1007/11562214_35</doi>
       <url hash="3e86b2a6">I05-1035</url>
       <bibkey>chen-etal-2005-automatic</bibkey>
@@ -327,8 +327,8 @@
       <title>A Preliminary Work on Classifying Time Granularities of Temporal Questions</title>
       <author><first>Wei</first><last>Li</last></author>
       <author><first>Wenjie</first><last>Li</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <doi>10.1007/11562214_37</doi>
       <url hash="b06fcd15">I05-1037</url>
       <bibkey>li-etal-2005-preliminary</bibkey>
@@ -337,7 +337,7 @@
       <title>Classification of Multiple-Sentence Questions</title>
       <author><first>Akihiro</first><last>Tamura</last></author>
       <author><first>Hiroya</first><last>Takamura</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <doi>10.1007/11562214_38</doi>
       <url hash="ad49b20b">I05-1038</url>
       <bibkey>tamura-etal-2005-classification</bibkey>
@@ -353,8 +353,8 @@
     </paper>
     <paper id="40">
       <title>An Ensemble of Grapheme and Phoneme for Machine Transliteration</title>
-      <author><first>Jong-Hoon</first><last>Oh</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="jong-hoon-oh"><first>Jong-Hoon</first><last>Oh</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <doi>10.1007/11562214_40</doi>
       <url hash="565a3fab">I05-1040</url>
       <bibkey>oh-choi-2005-ensemble</bibkey>
@@ -388,8 +388,8 @@
       <title>Answering Definition Questions Using Web Knowledge Bases</title>
       <author><first>Zhushuo</first><last>Zhang</last></author>
       <author><first>Yaqian</first><last>Zhou</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
-      <author><first>Lide</first><last>Wu</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
+      <author id="lide-wu"><first>Lide</first><last>Wu</last></author>
       <doi>10.1007/11562214_44</doi>
       <url hash="f25f0bb9">I05-1044</url>
       <bibkey>zhang-etal-2005-answering</bibkey>
@@ -397,7 +397,7 @@
     <paper id="45">
       <title>Exploring Syntactic Relation Patterns for Question Answering</title>
       <author><first>Dan</first><last>Shen</last></author>
-      <author><first>Geert-Jan M.</first><last>Kruijff</last></author>
+      <author id="geert-jan-m-kruijff"><first>Geert-Jan M.</first><last>Kruijff</last></author>
       <author><first>Dietrich</first><last>Klakow</last></author>
       <doi>10.1007/11562214_45</doi>
       <url hash="df47d122">I05-1045</url>
@@ -406,16 +406,16 @@
     <paper id="46">
       <title>Web-Based Unsupervised Learning for Query Formulation in Question Answering</title>
       <author><first>Yi-Chia</first><last>Wang</last></author>
-      <author><first>Jian-Cheng</first><last>Wu</last></author>
+      <author id="jian-cheng-wu"><first>Jian-Cheng</first><last>Wu</last></author>
       <author><first>Tyne</first><last>Liang</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <doi>10.1007/11562214_46</doi>
       <url hash="930ffd4e">I05-1046</url>
       <bibkey>wang-etal-2005-web</bibkey>
     </paper>
     <paper id="47">
       <title>A Chunking Strategy Towards Unknown Word Detection in <fixed-case>C</fixed-case>hinese Word Segmentation</title>
-      <author><last>Zhou</last><first>GuoDong</first></author>
+      <author id="guodong-zhou"><first>GuoDong</first><last>Zhou</last></author>
       <doi>10.1007/11562214_47</doi>
       <url hash="05f72595">I05-1047</url>
       <bibkey>zhou-2005-chunking</bibkey>
@@ -432,7 +432,7 @@
     <paper id="49">
       <title>Relative Compositionality of Multi-word Expressions: A Study of Verb-Noun (<fixed-case>V</fixed-case>-N) Collocations</title>
       <author><first>Sriram</first><last>Venkatapathy</last></author>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
       <doi>10.1007/11562214_49</doi>
       <url hash="b7257685">I05-1049</url>
       <bibkey>venkatapathy-joshi-2005-relative</bibkey>
@@ -441,7 +441,7 @@
       <title>Automatic Extraction of Fixed Multiword Expressions</title>
       <author><first>Campbell</first><last>Hore</last></author>
       <author><first>Masayuki</first><last>Asahara</last></author>
-      <author><first>Yūji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yūji</first><last>Matsumoto</last></author>
       <doi>10.1007/11562214_50</doi>
       <url hash="4f79afb5">I05-1050</url>
       <bibkey>hore-etal-2005-automatic</bibkey>
@@ -459,8 +459,8 @@
     <paper id="52">
       <title>Why Is Zero Marking Important in <fixed-case>K</fixed-case>orean?</title>
       <author><first>Sun-Hee</first><last>Lee</last></author>
-      <author><first>Donna K.</first><last>Byron</last></author>
-      <author><first>Seok Bae</first><last>Jang</last></author>
+      <author id="donna-byron"><first>Donna K.</first><last>Byron</last></author>
+      <author id="seok-bae-jang"><first>Seok Bae</first><last>Jang</last></author>
       <doi>10.1007/11562214_52</doi>
       <url hash="e0a96417">I05-1052</url>
       <bibkey>lee-etal-2005-zero</bibkey>
@@ -478,9 +478,9 @@
     <paper id="54">
       <title>Machine Translation Based on Constraint-Based Synchronous Grammar</title>
       <author><first>Fai</first><last>Wong</last></author>
-      <author><first>Dong-Cheng</first><last>Hu</last></author>
-      <author><first>Yu-Hang</first><last>Mao</last></author>
-      <author><first>Ming-Chui</first><last>Dong</last></author>
+      <author id="dong-cheng-hu"><first>Dong-Cheng</first><last>Hu</last></author>
+      <author id="yu-hang-mao"><first>Yu-Hang</first><last>Mao</last></author>
+      <author id="ming-chui-dong"><first>Ming-Chui</first><last>Dong</last></author>
       <author><first>Yi-Ping</first><last>Li</last></author>
       <doi>10.1007/11562214_54</doi>
       <url hash="2239d502">I05-1054</url>
@@ -489,7 +489,7 @@
     <paper id="55">
       <title>A Machine Learning Approach to Sentence Ordering for Multidocument Summarization and Its Evaluation</title>
       <author><first>Danushka</first><last>Bollegala</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Mitsuru</first><last>Ishizuka</last></author>
       <doi>10.1007/11562214_55</doi>
       <url hash="a57ae438">I05-1055</url>
@@ -498,8 +498,8 @@
     <paper id="56">
       <title>Significant Sentence Extraction by <fixed-case>E</fixed-case>uclidean Distance Based on Singular Value Decomposition</title>
       <author><first>Changbeom</first><last>Lee</last></author>
-      <author><first>Hyukro</first><last>Park</last></author>
-      <author><first>Cheolyoung</first><last>Ock</last></author>
+      <author id="hyukro-park"><first>Hyukro</first><last>Park</last></author>
+      <author id="cheol-young-ock"><first>Cheolyoung</first><last>Ock</last></author>
       <doi>10.1007/11562214_56</doi>
       <url hash="0b6a7739">I05-1056</url>
       <bibkey>lee-etal-2005-significant</bibkey>
@@ -509,7 +509,7 @@
       <author><first>Seonho</first><last>Kim</last></author>
       <author><first>Juntae</first><last>Yoon</last></author>
       <author><first>Kyung-Mi</first><last>Park</last></author>
-      <author><first>Hae-Chang</first><last>Rim</last></author>
+      <author id="hae-chang-rim"><first>Hae-Chang</first><last>Rim</last></author>
       <doi>10.1007/11562214_57</doi>
       <url hash="f9d289fe">I05-1057</url>
       <bibkey>kim-etal-2005-two</bibkey>
@@ -517,16 +517,16 @@
     <paper id="58">
       <title>Heuristic Methods for Reducing Errors of Geographic Named Entities Learned by Bootstrapping</title>
       <author><first>Seungwoo</first><last>Lee</last></author>
-      <author><first>Gary Geunbae</first><last>Lee</last></author>
+      <author id="gary-geunbae-lee"><first>Gary Geunbae</first><last>Lee</last></author>
       <doi>10.1007/11562214_58</doi>
       <url hash="02e389c8">I05-1058</url>
       <bibkey>lee-lee-2005-heuristic</bibkey>
     </paper>
     <paper id="59">
       <title>Building a <fixed-case>J</fixed-case>apanese-<fixed-case>C</fixed-case>hinese Dictionary Using Kanji/Hanzi Conversion</title>
-      <author><first>Chooi-Ling</first><last>Goh</last></author>
+      <author id="chooi-ling-goh"><first>Chooi-Ling</first><last>Goh</last></author>
       <author><first>Masayuki</first><last>Asahara</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <doi>10.1007/11562214_59</doi>
       <url hash="be33ee74">I05-1059</url>
       <bibkey>goh-etal-2005-building</bibkey>
@@ -544,7 +544,7 @@
       <title><fixed-case>CTEMP</fixed-case>: A <fixed-case>C</fixed-case>hinese Temporal Parser for Extracting and Normalizing Temporal Information</title>
       <author><last>Wu</last><first>Mingli</first></author>
       <author><last>Li</last><first>Wenjie</first></author>
-      <author><last>Lu</last><first>Qin</first></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <author><last>Li</last><first>Baoli</first></author>
       <doi>10.1007/11562214_61</doi>
       <url hash="5589c42e">I05-1061</url>
@@ -552,7 +552,7 @@
     </paper>
     <paper id="62">
       <title><fixed-case>F</fixed-case>rench-<fixed-case>E</fixed-case>nglish Terminology Extraction from Comparable Corpora</title>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <author><first>Emmanuel</first><last>Morin</last></author>
       <doi>10.1007/11562214_62</doi>
       <url hash="654d7bf1">I05-1062</url>
@@ -562,7 +562,7 @@
       <title>A Twin-Candidate Model of Coreference Resolution with Non-Anaphor Identification Capability</title>
       <author><first>Xiaofeng</first><last>Yang</last></author>
       <author><first>Jian</first><last>Su</last></author>
-      <author><first>Chew Lim</first><last>Tan</last></author>
+      <author id="chew-lim-tan"><first>Chew Lim</first><last>Tan</last></author>
       <doi>10.1007/11562214_63</doi>
       <url hash="ef64c808">I05-1063</url>
       <bibkey>yang-etal-2005-twin</bibkey>
@@ -571,7 +571,7 @@
       <title>Improving <fixed-case>K</fixed-case>orean Speech Acts Analysis by Using Shrinkage and Discourse Stack</title>
       <author><first>Kyungsun</first><last>Kim</last></author>
       <author><first>Youngjoong</first><last>Ko</last></author>
-      <author><first>Jungyun</first><last>Seo</last></author>
+      <author id="jungyun-seo"><first>Jungyun</first><last>Seo</last></author>
       <doi>10.1007/11562214_64</doi>
       <url hash="a192a840">I05-1064</url>
       <bibkey>kim-etal-2005-improving</bibkey>
@@ -602,14 +602,14 @@
     <paper id="68">
       <title>Semantic Role Labelling of Prepositional Phrases</title>
       <author><first>Patrick</first><last>Ye</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <doi>10.1007/11562214_68</doi>
       <url hash="5cb21be3">I05-1068</url>
       <bibkey>ye-baldwin-2005-semantic</bibkey>
     </paper>
     <paper id="69">
       <title>Global Path-Based Refinement of Noisy Graphs Applied to Verb Semantics</title>
-      <author><first>Timothy</first><last>Chklovski</last></author>
+      <author id="timothy-chklovski"><first>Timothy</first><last>Chklovski</last></author>
       <author><first>Patrick</first><last>Pantel</last></author>
       <doi>10.1007/11562214_69</doi>
       <url hash="89c69d95">I05-1069</url>
@@ -617,8 +617,8 @@
     </paper>
     <paper id="70">
       <title>Semantic Role Tagging for <fixed-case>C</fixed-case>hinese at the Lexical Level</title>
-      <author><first>Oi Yee</first><last>Kwong</last></author>
-      <author><first>Benjamin K.</first><last>Tsou</last></author>
+      <author id="olivia-o-y-kwong"><first>Oi Yee</first><last>Kwong</last></author>
+      <author id="benjamin-k-tsou"><first>Benjamin K.</first><last>Tsou</last></author>
       <doi>10.1007/11562214_70</doi>
       <url hash="b54267c5">I05-1070</url>
       <bibkey>kwong-tsou-2005-semantic</bibkey>
@@ -627,7 +627,7 @@
       <title>Detecting Article Errors Based on the Mass Count Distinction</title>
       <author><first>Ryo</first><last>Nagata</last></author>
       <author><first>Takahiro</first><last>Wakana</last></author>
-      <author><first>Fumito</first><last>Masui</last></author>
+      <author id="fumito-masui"><first>Fumito</first><last>Masui</last></author>
       <author><first>Atsuo</first><last>Kawai</last></author>
       <author><first>Naoki</first><last>Isu</last></author>
       <doi>10.1007/11562214_71</doi>
@@ -636,9 +636,9 @@
     </paper>
     <paper id="72">
       <title>Principles of Non-stationary Hidden <fixed-case>M</fixed-case>arkov Model and Its Applications to Sequence Labeling Task</title>
-      <author><last>Xiao</last><first>JingHui</first></author>
-      <author><last>Liu</last><first>BingQuan</first></author>
-      <author><last>Wang</last><first>XiaoLong</first></author>
+      <author id="jinghui-xiao"><first>JingHui</first><last>Xiao</last></author>
+      <author id="bingquan-liu"><first>BingQuan</first><last>Liu</last></author>
+      <author id="xiao-long-wang"><first>XiaoLong</first><last>Wang</last></author>
       <doi>10.1007/11562214_72</doi>
       <url hash="d87cb398">I05-1072</url>
       <bibkey>xiao-etal-2005-principles</bibkey>
@@ -654,7 +654,7 @@
     <paper id="74">
       <title>A Connectionist Model of Anticipation in Visual Worlds</title>
       <author><first>Marshall R.</first><last>Mayberry</last></author>
-      <author><first>Matthew W.</first><last>Crocker</last></author>
+      <author id="matthew-crocker"><first>Matthew W.</first><last>Crocker</last></author>
       <author><first>Pia</first><last>Knoeferle</last></author>
       <doi>10.1007/11562214_74</doi>
       <url hash="c2005eef">I05-1074</url>
@@ -662,8 +662,8 @@
     </paper>
     <paper id="75">
       <title>Automatically Inducing a Part-of-Speech Tagger by Projecting from Multiple Source Languages Across Aligned Corpora</title>
-      <author><first>Victoria</first><last>Fossum</last></author>
-      <author><first>Steven</first><last>Abney</last></author>
+      <author id="victoria-fossum"><first>Victoria</first><last>Fossum</last></author>
+      <author id="steven-abney"><first>Steven</first><last>Abney</last></author>
       <doi>10.1007/11562214_75</doi>
       <url hash="e6f8f830">I05-1075</url>
       <bibkey>fossum-abney-2005-automatically</bibkey>
@@ -687,7 +687,7 @@
     </paper>
     <paper id="78">
       <title>Regularisation Techniques for Conditional Random Fields: Parameterised Versus Parameter-Free</title>
-      <author><first>Andrew</first><last>Smith</last></author>
+      <author id="andrew-smith"><first>Andrew</first><last>Smith</last></author>
       <author><first>Miles</first><last>Osborne</last></author>
       <doi>10.1007/11562214_78</doi>
       <url hash="1b425bb9">I05-1078</url>
@@ -697,7 +697,7 @@
       <title>Exploiting Lexical Conceptual Structure for Paraphrase Generation</title>
       <author><first>Atsushi</first><last>Fujita</last></author>
       <author><first>Kentaro</first><last>Inui</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <doi>10.1007/11562214_79</doi>
       <url hash="efaef3c4">I05-1079</url>
       <bibkey>fujita-etal-2005-exploiting</bibkey>
@@ -705,8 +705,8 @@
     <paper id="80">
       <title>Word Sense Disambiguation by Relative Selection</title>
       <author><first>Hee-Cheol</first><last>Seo</last></author>
-      <author><first>Hae-Chang</first><last>Rim</last></author>
-      <author><first>Myung-Gil</first><last>Jang</last></author>
+      <author id="hae-chang-rim"><first>Hae-Chang</first><last>Rim</last></author>
+      <author id="myung-gil-jang"><first>Myung-Gil</first><last>Jang</last></author>
       <doi>10.1007/11562214_80</doi>
       <url hash="3286187b">I05-1080</url>
       <bibkey>seo-etal-2005-word</bibkey>
@@ -714,7 +714,7 @@
     <paper id="81">
       <title>Towards Robust High Performance Word Sense Disambiguation of <fixed-case>E</fixed-case>nglish Verbs Using Rich Linguistic Features</title>
       <author><first>Jinying</first><last>Chen</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <doi>10.1007/11562214_81</doi>
       <url hash="973d44b6">I05-1081</url>
       <bibkey>chen-palmer-2005-towards</bibkey>
@@ -722,7 +722,7 @@
     <paper id="82">
       <title>Automatic Interpretation of Noun Compounds Using <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Similarity</title>
       <author><first>Su Nam</first><last>Kim</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <doi>10.1007/11562214_82</doi>
       <url hash="c2f87650">I05-1082</url>
       <bibkey>kim-baldwin-2005-automatic</bibkey>
@@ -738,7 +738,7 @@
     </paper>
     <paper id="84">
       <title>A Comparative Study of Language Models for Book and Author Recognition</title>
-      <author><first>Özlem</first><last>Uzuner</last></author>
+      <author id="ozlem-uzuner"><first>Özlem</first><last>Uzuner</last></author>
       <author><first>Boris</first><last>Katz</last></author>
       <doi>10.1007/11562214_84</doi>
       <url hash="4b60f9ec">I05-1084</url>
@@ -772,8 +772,8 @@
     <paper id="88">
       <title>Extracting Terminologically Relevant Collocations in the Translation of <fixed-case>C</fixed-case>hinese Monograph</title>
       <author><first>Byeong-Kwu</first><last>Kang</last></author>
-      <author><first>Bao-Bao</first><last>Chang</last></author>
-      <author><first>Yi-Rong</first><last>Chen</last></author>
+      <author id="baobao-chang"><first>Bao-Bao</first><last>Chang</last></author>
+      <author id="yi-rong-chen"><first>Yi-Rong</first><last>Chen</last></author>
       <author><first>Shi-Wen</first><last>Yu</last></author>
       <doi>10.1007/11562214_88</doi>
       <url hash="c8dc32f1">I05-1088</url>
@@ -798,7 +798,7 @@
     <paper id="1">
       <title>A Classification-based Algorithm for Consistency Check of Part-of-Speech Tagging for <fixed-case>C</fixed-case>hinese Corpora</title>
       <author><first>Hu</first><last>Zhang</last></author>
-      <author><first>Jia-heng</first><last>Zheng</last></author>
+      <author id="jiaheng-zheng"><first>Jia-heng</first><last>Zheng</last></author>
       <author><first>Ying</first><last>Zhao</last></author>
       <url hash="3e2c7f49">I05-2001</url>
       <bibkey>zhang-etal-2005-classification</bibkey>
@@ -806,7 +806,7 @@
     <paper id="2">
       <title>A Hierarchical Parsing Approach with Punctuation Processing for Long <fixed-case>C</fixed-case>hinese Sentences</title>
       <author><first>Xing</first><last>Li</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <author><first>Rile</first><last>Hu</last></author>
       <url hash="634b81b6">I05-2002</url>
       <bibkey>li-etal-2005-hierarchical</bibkey>
@@ -814,7 +814,7 @@
     <paper id="3">
       <title>A Hybrid <fixed-case>C</fixed-case>hinese Language Model based on a Combination of Ontology with Statistical Method</title>
       <author><first>Dequan</first><last>Zheng</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <author><first>Sheng</first><last>Li</last></author>
       <author><first>Hao</first><last>Yu</last></author>
       <url hash="5efa8862">I05-2003</url>
@@ -822,7 +822,7 @@
     </paper>
     <paper id="4">
       <title>A Language Independent Algorithm for Single and Multiple Document Summarization</title>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <author><first>Paul</first><last>Tarau</last></author>
       <url hash="08bdde13">I05-2004</url>
       <bibkey>mihalcea-tarau-2005-language</bibkey>
@@ -845,7 +845,7 @@
     <paper id="7">
       <title>A Resource-based <fixed-case>K</fixed-case>orean Morphological Annotation System</title>
       <author><first>Hyun-gue</first><last>Huh</last></author>
-      <author><first>Éric</first><last>Laporte</last></author>
+      <author id="eric-laporte"><first>Éric</first><last>Laporte</last></author>
       <url hash="5b48ba8c">I05-2007</url>
       <bibkey>huh-laporte-2005-resource</bibkey>
     </paper>
@@ -876,14 +876,14 @@
     <paper id="11">
       <title>Automatic Detection of Opinion Bearing Words and Sentences</title>
       <author><first>Soo-Min</first><last>Kim</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <url hash="ba9c8e92">I05-2011</url>
       <bibkey>kim-hovy-2005-automatic</bibkey>
     </paper>
     <paper id="12">
       <title>Automatic Extraction of <fixed-case>E</fixed-case>nglish-<fixed-case>K</fixed-case>orean Translations for Constituents of Technical Terms</title>
-      <author><first>Jong-Hoon</first><last>Oh</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="jong-hoon-oh"><first>Jong-Hoon</first><last>Oh</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <url hash="559af0b9">I05-2012</url>
       <bibkey>oh-choi-2005-automatic</bibkey>
     </paper>
@@ -927,7 +927,7 @@
     <paper id="18">
       <title>Detecting the Countability of <fixed-case>E</fixed-case>nglish Compound Nouns Using Web-based Models</title>
       <author><first>Jing</first><last>Peng</last></author>
-      <author><first>Kenji</first><last>Araki</last></author>
+      <author id="kenji-araki"><first>Kenji</first><last>Araki</last></author>
       <url hash="4efa7489">I05-2018</url>
       <bibkey>peng-araki-2005-detecting</bibkey>
     </paper>
@@ -948,7 +948,7 @@
       <author><first>Toshihiro</first><last>Takagi</last></author>
       <author><first>Yasuhiro</first><last>Sasaki</last></author>
       <author><first>Takehito</first><last>Utsuro</last></author>
-      <author><first>Satoshi</first><last>Sato</last></author>
+      <author id="satoshi-sato"><first>Satoshi</first><last>Sato</last></author>
       <url hash="734a3fe7">I05-2020</url>
       <bibkey>tonoike-etal-2005-effect</bibkey>
     </paper>
@@ -988,15 +988,15 @@
     </paper>
     <paper id="25">
       <title>Investigating the Features that Affect Cue Usage of Non-native Speakers of <fixed-case>E</fixed-case>nglish</title>
-      <author><first>Xinyu</first><last>Deng</last></author>
-      <author><first>Jun-ichi</first><last>Nakamura</last></author>
+      <author id="xinyu-deng"><first>Xinyu</first><last>Deng</last></author>
+      <author id="jun-ichi-nakamura"><first>Jun-ichi</first><last>Nakamura</last></author>
       <url hash="3910309b">I05-2025</url>
       <bibkey>deng-nakamura-2005-investigating</bibkey>
     </paper>
     <paper id="26">
       <title>Lexical Chains and Sliding Locality Windows in Content-based Text Similarity Detection</title>
       <author><first>Thade</first><last>Nahnsen</last></author>
-      <author><first>Özlem</first><last>Uzuner</last></author>
+      <author id="ozlem-uzuner"><first>Özlem</first><last>Uzuner</last></author>
       <author><first>Boris</first><last>Katz</last></author>
       <url hash="997172cd">I05-2026</url>
       <bibkey>nahnsen-etal-2005-lexical</bibkey>
@@ -1026,15 +1026,15 @@
       <author><first>Nozomi</first><last>Kobayashi</last></author>
       <author><first>Ryu</first><last>Iida</last></author>
       <author><first>Kentaro</first><last>Inui</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <url hash="d40e8c66">I05-2030</url>
       <bibkey>kobayashi-etal-2005-opinion</bibkey>
     </paper>
     <paper id="31">
       <title>Problems of Reusing an Existing <fixed-case>MT</fixed-case> System</title>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <author><first>Petr</first><last>Homola</last></author>
-      <author><first>Vladislav</first><last>Kuboň</last></author>
+      <author id="vladislav-kubon"><first>Vladislav</first><last>Kuboň</last></author>
       <url hash="12dda0d3">I05-2031</url>
       <bibkey>bojar-etal-2005-problems</bibkey>
     </paper>
@@ -1050,22 +1050,22 @@
     <paper id="33">
       <title><fixed-case>POS</fixed-case> Tagger Combinations on <fixed-case>H</fixed-case>ungarian Text</title>
       <author><first>András</first><last>Kuba</last></author>
-      <author><first>László</first><last>Felföldi</last></author>
-      <author><first>András</first><last>Kocsor</last></author>
+      <author id="laszlo-felfoldi"><first>László</first><last>Felföldi</last></author>
+      <author id="andras-kocsor"><first>András</first><last>Kocsor</last></author>
       <url hash="5ef79a77">I05-2033</url>
       <bibkey>kuba-etal-2005-pos</bibkey>
     </paper>
     <paper id="34">
       <title>Probabilistic Models for <fixed-case>K</fixed-case>orean Morphological Analysis</title>
       <author><first>Do-Gil</first><last>Lee</last></author>
-      <author><first>Hae-Chang</first><last>Rim</last></author>
+      <author id="hae-chang-rim"><first>Hae-Chang</first><last>Rim</last></author>
       <url hash="fba65af0">I05-2034</url>
       <bibkey>lee-rim-2005-probabilistic</bibkey>
     </paper>
     <paper id="35">
       <title>Rapid Prototyping of Scalable Grammars: Towards Modularity in Extensions to a Language-Independent Core</title>
-      <author><first>Emily M.</first><last>Bender</last></author>
-      <author><first>Dan</first><last>Flickinger</last></author>
+      <author id="emily-m-bender"><first>Emily M.</first><last>Bender</last></author>
+      <author id="dan-flickinger"><first>Dan</first><last>Flickinger</last></author>
       <url hash="6df2d691">I05-2035</url>
       <bibkey>bender-flickinger-2005-rapid</bibkey>
     </paper>
@@ -1080,16 +1080,16 @@
       <title>Restoring an Elided Entry Word in a Sentence for Encyclopedia <fixed-case>QA</fixed-case> System</title>
       <author><first>Soojong</first><last>Lim</last></author>
       <author><first>Changki</first><last>Lee</last></author>
-      <author><first>Myoung-Gil</first><last>Jang</last></author>
+      <author id="myung-gil-jang"><first>Myoung-Gil</first><last>Jang</last></author>
       <url hash="cb09c70f">I05-2037</url>
       <bibkey>lim-etal-2005-restoring</bibkey>
     </paper>
     <paper id="38">
       <title>Syntax Annotation for the <fixed-case>GENIA</fixed-case> Corpus</title>
-      <author><first>Yuka</first><last>Tateisi</last></author>
+      <author id="yuka-tateisi"><first>Yuka</first><last>Tateisi</last></author>
       <author><first>Akane</first><last>Yakushiji</last></author>
       <author><first>Tomoko</first><last>Ohta</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <url hash="e3654140">I05-2038</url>
       <bibkey>tateisi-etal-2005-syntax</bibkey>
     </paper>
@@ -1102,9 +1102,9 @@
     <paper id="40">
       <title>Transformation Based <fixed-case>C</fixed-case>hinese Entity Detection and Tracking</title>
       <author><first>Yaqian</first><last>Zhou</last></author>
-      <author><first>Changning</first><last>Huang</last></author>
+      <author id="changning-huang"><first>Changning</first><last>Huang</last></author>
       <author><first>Jianfeng</first><last>Gao</last></author>
-      <author><first>Lide</first><last>Wu</last></author>
+      <author id="lide-wu"><first>Lide</first><last>Wu</last></author>
       <url hash="8935031f">I05-2040</url>
       <bibkey>zhou-etal-2005-transformation</bibkey>
     </paper>
@@ -1114,14 +1114,14 @@
       <author><first>Yongjoo</first><last>Cho</last></author>
       <author><first>Sunghoon</first><last>Son</last></author>
       <author><first>Ui-Sung</first><last>Song</last></author>
-      <author><first>Hae-Chang</first><last>Rim</last></author>
+      <author id="hae-chang-rim"><first>Hae-Chang</first><last>Rim</last></author>
       <url hash="de10a880">I05-2041</url>
       <bibkey>park-etal-2005-tree</bibkey>
     </paper>
     <paper id="42">
       <title>Toward a Unified Evaluation Method for Multiple Reading Support Systems: A Reading Speed-based Procedure</title>
       <author><first>Katsunori</first><last>Kotani</last></author>
-      <author><first>Takehiko</first><last>Yoshimi</last></author>
+      <author id="takehiko-yoshimi"><first>Takehiko</first><last>Yoshimi</last></author>
       <author><first>Takeshi</first><last>Kutsumi</last></author>
       <author><first>Ichiko</first><last>Sata</last></author>
       <author><first>Hitoshi</first><last>Isahara</last></author>
@@ -1150,29 +1150,29 @@
     <paper id="45">
       <title>Unsupervised Feature Selection for Relation Extraction</title>
       <author><first>Jinxiu</first><last>Chen</last></author>
-      <author><first>Donghong</first><last>Ji</last></author>
-      <author><first>Chew Lim</first><last>Tan</last></author>
-      <author><first>Zhengyu</first><last>Niu</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
+      <author id="chew-lim-tan"><first>Chew Lim</first><last>Tan</last></author>
+      <author id="zheng-yu-niu"><first>Zhengyu</first><last>Niu</last></author>
       <url hash="13418c19">I05-2045</url>
       <bibkey>chen-etal-2005-unsupervised</bibkey>
     </paper>
     <paper id="46">
       <title>Using Maximum Entropy to Extract Biomedical Named Entities without Dictionaries</title>
-      <author><first>Tzong-Han</first><last>Tsai</last></author>
+      <author id="richard-tzong-han-tsai"><first>Tzong-Han</first><last>Tsai</last></author>
       <author><first>Chia-Wei</first><last>Wu</last></author>
-      <author><first>Wen-Lian</first><last>Hsu</last></author>
+      <author id="wen-lian-hsu"><first>Wen-Lian</first><last>Hsu</last></author>
       <url hash="3d63489f">I05-2046</url>
       <bibkey>tsai-etal-2005-using</bibkey>
     </paper>
     <paper id="47">
       <title>Automated Text Summarization</title>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <url hash="29155078">I05-2047</url>
       <bibkey>lin-2005-automated</bibkey>
     </paper>
     <paper id="48">
       <title>Statistical Machine Translation Part <fixed-case>I</fixed-case>: Hands-On Introduction</title>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <url hash="913f3081">I05-2048</url>
       <bibkey>vogel-2005-statistical</bibkey>
     </paper>
@@ -1200,8 +1200,8 @@
     </frontmatter>
     <paper id="1">
       <title>Detecting Segmentation Errors in <fixed-case>C</fixed-case>hinese Annotated Corpus</title>
-      <author><first>Chengjie</first><last>Sun</last></author>
-      <author><first>Chang-Ning</first><last>Huang</last></author>
+      <author id="cheng-jie-sun"><first>Chengjie</first><last>Sun</last></author>
+      <author id="changning-huang"><first>Chang-Ning</first><last>Huang</last></author>
       <author><first>Xiaolong</first><last>Wang</last></author>
       <author><first>Mu</first><last>Li</last></author>
       <url hash="27d87f8f">I05-3001</url>
@@ -1217,7 +1217,7 @@
       <title><fixed-case>C</fixed-case>hinese Deterministic Dependency Analyzer: Examining Effects of Global Features and Root Node Finder</title>
       <author><first>Yuchang</first><last>Cheng</last></author>
       <author><first>Masayuki</first><last>Asahara</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <url hash="f6186ce5">I05-3003</url>
       <bibkey>cheng-etal-2005-chinese</bibkey>
     </paper>
@@ -1230,9 +1230,9 @@
     </paper>
     <paper id="5">
       <title>Morphological features help <fixed-case>POS</fixed-case> tagging of unknown words across language varieties</title>
-      <author><first>Huihsin</first><last>Tseng</last></author>
-      <author><first>Daniel</first><last>Jurafsky</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="huihsin-tseng"><first>Huihsin</first><last>Tseng</last></author>
+      <author id="dan-jurafsky"><first>Daniel</first><last>Jurafsky</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <url hash="adff81eb">I05-3005</url>
       <bibkey>tseng-etal-2005-morphological</bibkey>
     </paper>
@@ -1251,17 +1251,17 @@
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <author><first>Adam</first><last>Kilgarriff</last></author>
       <author><first>Yiching</first><last>Wu</last></author>
-      <author><first>Chih-Ming</first><last>Chiu</last></author>
+      <author id="chih-ming-chiu"><first>Chih-Ming</first><last>Chiu</last></author>
       <author><first>Simon</first><last>Smith</last></author>
-      <author><first>Pavel</first><last>Rychly</last></author>
+      <author id="pavel-rychly"><first>Pavel</first><last>Rychly</last></author>
       <author><first>Ming-Hong</first><last>Bai</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <url hash="72e80baf">I05-3007</url>
       <bibkey>huang-etal-2005-chinese</bibkey>
     </paper>
     <paper id="8">
       <title>Word Meaning Inducing via Character Ontology: A Survey on the Semantic Prediction of <fixed-case>C</fixed-case>hinese Two-Character Words</title>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <url hash="50d6ac67">I05-3008</url>
       <bibkey>hsieh-2005-word</bibkey>
     </paper>
@@ -1273,21 +1273,21 @@
     </paper>
     <paper id="10">
       <title>Turn-taking in <fixed-case>M</fixed-case>andarin Dialogue: Interactions of Tone and Intonation</title>
-      <author><first>Gina-Anne</first><last>Levow</last></author>
+      <author id="gina-anne-levow"><first>Gina-Anne</first><last>Levow</last></author>
       <url hash="5dc63975">I05-3010</url>
       <bibkey>levow-2005-turn</bibkey>
     </paper>
     <paper id="11">
       <title>Learning a Log-Linear Model with Bilingual Phrase-Pair Features for Statistical Machine Translation</title>
       <author><first>Bing</first><last>Zhao</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <url hash="e706b852">I05-3011</url>
       <bibkey>zhao-waibel-2005-learning</bibkey>
     </paper>
     <paper id="12">
       <title>Integrating Collocation Features in <fixed-case>C</fixed-case>hinese Word Sense Disambiguation</title>
       <author><first>Wanyin</first><last>Li</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <author><first>Wenjie</first><last>Li</last></author>
       <url hash="cbe4d16f">I05-3012</url>
       <bibkey>li-etal-2005-integrating</bibkey>
@@ -1295,7 +1295,7 @@
     <paper id="13">
       <title><fixed-case>NIL</fixed-case> Is Not Nothing: Recognition of <fixed-case>C</fixed-case>hinese Network Informal Language Expressions</title>
       <author><first>Yunqing</first><last>Xia</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <author><first>Wei</first><last>Gao</last></author>
       <url hash="6c259895">I05-3013</url>
       <bibkey>xia-etal-2005-nil</bibkey>
@@ -1304,7 +1304,7 @@
       <title>The Robustness of Domain Lexico-Taxonomy: Expanding Domain Lexicon with <fixed-case>C</fixed-case>i<fixed-case>L</fixed-case>in</title>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <author><first>Xiang-Bing</first><last>Li</last></author>
-      <author><first>Jia-Fei</first><last>Hong</last></author>
+      <author id="jia-fei-hong"><first>Jia-Fei</first><last>Hong</last></author>
       <url hash="1b570c37">I05-3014</url>
       <bibkey>huang-etal-2005-robustness</bibkey>
     </paper>
@@ -1317,7 +1317,7 @@
     </paper>
     <paper id="16">
       <title>Resolving Pronominal References in <fixed-case>C</fixed-case>hinese with the <fixed-case>H</fixed-case>obbs Algorithm</title>
-      <author><first>Susan P.</first><last>Converse</last></author>
+      <author id="susan-p-converse"><first>Susan P.</first><last>Converse</last></author>
       <url hash="3354bdf9">I05-3016</url>
       <bibkey>converse-2005-resolving</bibkey>
     </paper>
@@ -1332,9 +1332,9 @@
       <author><first>Masayuki</first><last>Asahara</last></author>
       <author><first>Kenta</first><last>Fukuoka</last></author>
       <author><first>Ai</first><last>Azuma</last></author>
-      <author><first>Chooi-Ling</first><last>Goh</last></author>
+      <author id="chooi-ling-goh"><first>Chooi-Ling</first><last>Goh</last></author>
       <author><first>Yotaro</first><last>Watanabe</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <author><first>Takashi</first><last>Tsuzuki</last></author>
       <url hash="fdd22684">I05-3018</url>
       <bibkey>asahara-etal-2005-combination</bibkey>
@@ -1356,7 +1356,7 @@
     </paper>
     <paper id="21">
       <title>An Example-Based <fixed-case>C</fixed-case>hinese Word Segmentation System for <fixed-case>CWSB</fixed-case>-2</title>
-      <author><first>Chunyu</first><last>Kit</last></author>
+      <author id="chunyu-kit"><first>Chunyu</first><last>Kit</last></author>
       <author><first>Xiaoyue</first><last>Liu</last></author>
       <url hash="63d67ceb">I05-3021</url>
       <bibkey>kit-liu-2005-example</bibkey>
@@ -1365,7 +1365,7 @@
       <title><fixed-case>C</fixed-case>hinese Word Segmentation in <fixed-case>FTRD</fixed-case> <fixed-case>B</fixed-case>eijing</title>
       <author><first>Heng</first><last>Li</last></author>
       <author><first>Yuan</first><last>Dong</last></author>
-      <author><first>Xinnian</first><last>Mao</last></author>
+      <author id="xinnian-mao"><first>Xinnian</first><last>Mao</last></author>
       <author><first>Haila</first><last>Wang</last></author>
       <author><first>Wu</first><last>Liu</last></author>
       <url hash="e8b2df06">I05-3022</url>
@@ -1375,8 +1375,8 @@
       <title>Perceptron Learning for <fixed-case>C</fixed-case>hinese Word Segmentation</title>
       <author><first>Yaoyong</first><last>Li</last></author>
       <author><first>Chuanjiang</first><last>Miao</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
-      <author><first>Hamish</first><last>Cunningham</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="hamish-cunningham"><first>Hamish</first><last>Cunningham</last></author>
       <url hash="a04ed58a">I05-3023</url>
       <bibkey>li-etal-2005-perceptron</bibkey>
     </paper>
@@ -1399,17 +1399,17 @@
       <title>Description of the <fixed-case>HKU</fixed-case> <fixed-case>C</fixed-case>hinese Word Segmentation System for Sighan Bakeoff 2005</title>
       <author><first>Guohong</first><last>Fu</last></author>
       <author><first>Kang-Kwong</first><last>Luke</last></author>
-      <author><first>Percy Ping-Wai</first><last>Wong</last></author>
+      <author id="ping-wai-wong"><first>Percy Ping-Wai</first><last>Wong</last></author>
       <url hash="a535c71b">I05-3026</url>
       <bibkey>fu-etal-2005-description</bibkey>
     </paper>
     <paper id="27">
       <title>A Conditional Random Field Word Segmenter for Sighan Bakeoff 2005</title>
-      <author><first>Huihsin</first><last>Tseng</last></author>
-      <author><first>Pichuan</first><last>Chang</last></author>
+      <author id="huihsin-tseng"><first>Huihsin</first><last>Tseng</last></author>
+      <author id="pi-chuan-chang"><first>Pichuan</first><last>Chang</last></author>
       <author><first>Galen</first><last>Andrew</last></author>
-      <author><first>Daniel</first><last>Jurafsky</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="dan-jurafsky"><first>Daniel</first><last>Jurafsky</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <url hash="f5c60fbd">I05-3027</url>
       <bibkey>tseng-etal-2005-conditional</bibkey>
     </paper>
@@ -1435,7 +1435,7 @@
       <author><first>Wei</first><last>Jiang</last></author>
       <author><first>Jian</first><last>Zhao</last></author>
       <author><first>Yi</first><last>Guan</last></author>
-      <author><first>Zhiming</first><last>Xu</last></author>
+      <author id="zhiming-xu"><first>Zhiming</first><last>Xu</last></author>
       <url hash="92a42788">I05-3030</url>
       <bibkey>jiang-etal-2005-chinese</bibkey>
     </paper>
@@ -1448,7 +1448,7 @@
     </paper>
     <paper id="32">
       <title><fixed-case>C</fixed-case>hinese Word Segmentation in <fixed-case>ICT</fixed-case>-<fixed-case>NLP</fixed-case></title>
-      <author><first>ShuangLong</first><last>Li</last></author>
+      <author id="shuanglong-li"><first>ShuangLong</first><last>Li</last></author>
       <url hash="51f30783">I05-3032</url>
       <bibkey>li-2005-chinese</bibkey>
     </paper>
@@ -1467,9 +1467,9 @@
     <paper id="35">
       <title>A Hybrid Approach to <fixed-case>C</fixed-case>hinese Word Segmentation around <fixed-case>CRF</fixed-case>s</title>
       <author><last>Zhou</last><first>Jun-sheng</first></author>
-      <author><last>Dai</last><first>Xin-yu</first></author>
+      <author id="xinyu-dai"><first>Xin-yu</first><last>Dai</last></author>
       <author><last>Ni</last><first>Rui-yu</first></author>
-      <author><last>Chen</last><first>Jia-jun</first></author>
+      <author id="jiajun-chen"><first>Jia-jun</first><last>Chen</last></author>
       <url hash="5ab907fb">I05-3035</url>
       <bibkey>zhou-etal-2005-hybrid</bibkey>
     </paper>
@@ -1511,7 +1511,7 @@
     <paper id="3">
       <title>Corpus-oriented Acquisition of <fixed-case>C</fixed-case>hinese Grammar</title>
       <author><first>Yan</first><last>Zhang</last></author>
-      <author><first>Hideki</first><last>Kashioka</last></author>
+      <author id="hideki-kashioka"><first>Hideki</first><last>Kashioka</last></author>
       <url hash="fc1c3b36">I05-4003</url>
       <bibkey>zhang-kashioka-2005-corpus</bibkey>
     </paper>
@@ -1542,7 +1542,7 @@
       <title>Cross-lingual Conversion of Lexical Semantic Relations: Building Parallel Wordnets</title>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <author><first>I-Li</first><last>Su</last></author>
-      <author><first>Jia-Fei</first><last>Hong</last></author>
+      <author id="jia-fei-hong"><first>Jia-Fei</first><last>Hong</last></author>
       <author><first>Xiang-Bing</first><last>Li</last></author>
       <url hash="33d4565b">I05-4007</url>
       <bibkey>huang-etal-2005-cross</bibkey>
@@ -1556,17 +1556,17 @@
     <paper id="9">
       <title>Question Classification using Multiple Classifiers</title>
       <author><last>Li</last><first>Xin</first></author>
-      <author><last>Huang</last><first>Xuan-Jing</first></author>
-      <author><last>Wu</last><first>Li-de</first></author>
+      <author id="xuan-jing-huang"><first>Xuan-Jing</first><last>Huang</last></author>
+      <author id="lide-wu"><first>Li-de</first><last>Wu</last></author>
       <url hash="e8a56cce">I05-4009</url>
       <bibkey>li-etal-2005-question</bibkey>
     </paper>
     <paper id="10">
       <title>Harvesting the Bitexts of the Laws of <fixed-case>H</fixed-case>ong <fixed-case>K</fixed-case>ong From the Web</title>
-      <author><first>Chunyu</first><last>Kit</last></author>
+      <author id="chunyu-kit"><first>Chunyu</first><last>Kit</last></author>
       <author><first>Xiaoyue</first><last>Liu</last></author>
-      <author><first>KingKui</first><last>Sin</last></author>
-      <author><first>Jonathan J.</first><last>Webster</last></author>
+      <author id="king-kui-sin"><first>KingKui</first><last>Sin</last></author>
+      <author id="jonathan-j-webster"><first>Jonathan J.</first><last>Webster</last></author>
       <url hash="5b6df976">I05-4010</url>
       <bibkey>kit-etal-2005-harvesting</bibkey>
     </paper>
@@ -1589,13 +1589,13 @@
     <paper id="1">
       <title>Support Vector Machines for Paraphrase Identification and Corpus Construction</title>
       <author><first>Chris</first><last>Brockett</last></author>
-      <author><first>William B.</first><last>Dolan</last></author>
+      <author id="william-b-dolan"><first>William B.</first><last>Dolan</last></author>
       <url hash="43d57091">I05-5001</url>
       <bibkey>brockett-dolan-2005-support</bibkey>
     </paper>
     <paper id="2">
       <title>Automatically Constructing a Corpus of Sentential Paraphrases</title>
-      <author><first>William B.</first><last>Dolan</last></author>
+      <author id="william-b-dolan"><first>William B.</first><last>Dolan</last></author>
       <author><first>Chris</first><last>Brockett</last></author>
       <url hash="0438642c">I05-5002</url>
       <bibkey>dolan-brockett-2005-automatically</bibkey>
@@ -1604,7 +1604,7 @@
       <title>Using Machine Translation Evaluation Techniques to Determine Sentence-level Semantic Equivalence</title>
       <author><first>Andrew</first><last>Finch</last></author>
       <author><first>Young-Sook</first><last>Hwang</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <url hash="e950ae21">I05-5003</url>
       <bibkey>finch-etal-2005-using</bibkey>
     </paper>
@@ -1618,7 +1618,7 @@
     <paper id="5">
       <title>Structural variation in generated health reports</title>
       <author><first>Catalina</first><last>Hallett</last></author>
-      <author><first>Donia</first><last>Scott</last></author>
+      <author id="donia-scott"><first>Donia</first><last>Scott</last></author>
       <url hash="570b5480">I05-5005</url>
       <bibkey>hallett-scott-2005-structural</bibkey>
     </paper>
@@ -1631,7 +1631,7 @@
     </paper>
     <paper id="7">
       <title>Automated Generalization of Phrasal Paraphrases from the Web</title>
-      <author><first>Weigang</first><last>Li</last></author>
+      <author id="weigang-li"><first>Weigang</first><last>Li</last></author>
       <author><first>Ting</first><last>Liu</last></author>
       <author><first>Yu</first><last>Zhang</last></author>
       <author><first>Sheng</first><last>Li</last></author>
@@ -1655,7 +1655,7 @@
     <paper id="10">
       <title>Automatic generation of large-scale paraphrases</title>
       <author><first>Richard</first><last>Power</last></author>
-      <author><first>Donia</first><last>Scott</last></author>
+      <author id="donia-scott"><first>Donia</first><last>Scott</last></author>
       <url hash="603c73ca">I05-5010</url>
       <bibkey>power-scott-2005-automatic</bibkey>
     </paper>
@@ -1670,7 +1670,7 @@
       <author><first>Stephen</first><last>Wan</last></author>
       <author><first>Mark</first><last>Dras</last></author>
       <author><first>Robert</first><last>Dale</last></author>
-      <author><first>Cécile</first><last>Paris</last></author>
+      <author id="cecile-paris"><first>Cécile</first><last>Paris</last></author>
       <url hash="860bec5f">I05-5012</url>
       <bibkey>wan-etal-2005-towards</bibkey>
     </paper>
@@ -1723,7 +1723,7 @@
     </paper>
     <paper id="5">
       <title>Building Dialogue Corpora for Nursing Activity Analysis</title>
-      <author><first>Hiromi itoh</first><last>Ozaku</last></author>
+      <author id="hiromi-itoh-ozaku"><first>Hiromi itoh</first><last>Ozaku</last></author>
       <author><first>Akinori</first><last>Abe</last></author>
       <author><first>Noriaki</first><last>Kuwahara</last></author>
       <author><first>Futoshi</first><last>Naya</last></author>
@@ -1734,20 +1734,20 @@
     </paper>
     <paper id="6">
       <title>The Syntactically Annotated <fixed-case>ICE</fixed-case> Corpus and the Automatic Induction of a Formal Grammar</title>
-      <author><first>Alex Chengyu</first><last>Fang</last></author>
+      <author id="alex-chengyu-fang"><first>Alex Chengyu</first><last>Fang</last></author>
       <url hash="7bea1fc7">I05-6006</url>
       <bibkey>fang-2005-syntactically</bibkey>
     </paper>
     <paper id="7">
       <title>Syntactic Identification of Attribution in the <fixed-case>RST</fixed-case> Treebank</title>
-      <author><first>Peter Rossen</first><last>Skadhauge</last></author>
+      <author id="peter-rossen-skadhauge"><first>Peter Rossen</first><last>Skadhauge</last></author>
       <author><first>Daniel</first><last>Hardt</last></author>
       <url hash="dbe01a9c">I05-6007</url>
       <bibkey>skadhauge-hardt-2005-syntactic</bibkey>
     </paper>
     <paper id="8">
       <title>Linguistically enriched corpora for establishing variation in support verb constructions</title>
-      <author><first>Begoña Villada</first><last>Moirón</last></author>
+      <author id="begona-villada-moiron"><first>Begoña Villada</first><last>Moirón</last></author>
       <url hash="70d84af4">I05-6008</url>
       <bibkey>moiron-2005-linguistically</bibkey>
     </paper>
@@ -1791,7 +1791,7 @@
     </frontmatter>
     <paper id="1">
       <title>Extended-<fixed-case>H</fixed-case>ow<fixed-case>N</fixed-case>et: A Representational Framework for Concepts</title>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <author><first>Shu-Ling</first><last>Huang</last></author>
       <author><first>Yueh-Yin</first><last>Shih</last></author>
       <author><first>Yi-Jun</first><last>Chen</last></author>
@@ -1809,7 +1809,7 @@
       <title>From General Ontology to Specialized Ontology: A study based on a single author historical corpus</title>
       <author><first>Ru-Yng</first><last>Chang</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
-      <author><first>Feng-Ju</first><last>Lo</last></author>
+      <author id="feng-ju-lo"><first>Feng-Ju</first><last>Lo</last></author>
       <author><first>Sueming</first><last>Chang</last></author>
       <url hash="62873cbc">I05-7003</url>
       <bibkey>chang-etal-2005-general</bibkey>
@@ -1837,7 +1837,7 @@
     </paper>
     <paper id="7">
       <title>Increasing Understanding: Interpreting Events of Change</title>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <author><first>Marjorie</first><last>McShane</last></author>
       <author><first>Stephen</first><last>Beale</last></author>
       <url hash="9a3c6c5b">I05-7007</url>
@@ -1857,7 +1857,7 @@
     <paper id="9">
       <title>The Omega Ontology</title>
       <author><first>Andrew</first><last>Philpot</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <author><first>Patrick</first><last>Pantel</last></author>
       <url hash="2400f991">I05-7009</url>
       <bibkey>philpot-etal-2005-omega</bibkey>
@@ -1865,14 +1865,14 @@
     <paper id="10">
       <title>Experiments of Ontology Construction with Formal Concept Analysis</title>
       <author><first>Sujian</first><last>Li</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <author><first>Wenjie</first><last>Li</last></author>
       <url hash="81884eb3">I05-7010</url>
       <bibkey>li-etal-2005-experiments</bibkey>
     </paper>
     <paper id="11">
       <title>Automatic Acquisition of Lexico-semantic Knowledge for <fixed-case>QA</fixed-case></title>
-      <author><first>Lonneke</first><last>van der Plas</last></author>
+      <author id="lonneke-van-der-plas"><first>Lonneke</first><last>van der Plas</last></author>
       <author><first>Gosse</first><last>Bouma</last></author>
       <url hash="3e8e5767">I05-7011</url>
       <bibkey>van-der-plas-bouma-2005-automatic</bibkey>
@@ -1880,13 +1880,13 @@
     <paper id="12">
       <title>A Case Study in Automatic Building of Wordnets</title>
       <author><first>Eduard</first><last>Barbu</last></author>
-      <author><first>Verginica Barbu</first><last>Mititelu</last></author>
+      <author id="verginica-barbu-mititelu"><first>Verginica Barbu</first><last>Mititelu</last></author>
       <url hash="fcd2164e">I05-7012</url>
       <bibkey>barbu-mititelu-2005-case</bibkey>
     </paper>
     <paper id="13">
       <title>Interfacing Ontologies and Lexical Resources</title>
-      <author><first>Laurent</first><last>Prevot</last></author>
+      <author id="laurent-prevot"><first>Laurent</first><last>Prevot</last></author>
       <author><first>Stefano</first><last>Borgo</last></author>
       <author><first>Alessandro</first><last>Oltramari</last></author>
       <url hash="48b52b29">I05-7013</url>
diff --git a/data/xml/I08.xml b/data/xml/I08.xml
index 51a493ecc2..34e2fda5f4 100644
--- a/data/xml/I08.xml
+++ b/data/xml/I08.xml
@@ -21,7 +21,7 @@
     <paper id="2">
       <title>An Empirical Comparison of Goodness Measures for Unsupervised <fixed-case>C</fixed-case>hinese Word Segmentation with a Unified Framework</title>
       <author><first>Hai</first><last>Zhao</last></author>
-      <author><first>Chunyu</first><last>Kit</last></author>
+      <author id="chunyu-kit"><first>Chunyu</first><last>Kit</last></author>
       <url hash="031ef6a6">I08-1002</url>
       <bibkey>zhao-kit-2008-empirical</bibkey>
     </paper>
@@ -34,18 +34,18 @@
     </paper>
     <paper id="4">
       <title>Context-Sensitive Convolution Tree Kernel for Pronoun Resolution</title>
-      <author><first>GuoDong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>GuoDong</first><last>Zhou</last></author>
       <author><first>Fang</first><last>Kong</last></author>
-      <author><first>QiaoMing</first><last>Zhu</last></author>
+      <author id="qiaoming-zhu"><first>QiaoMing</first><last>Zhu</last></author>
       <url hash="aea740b4">I08-1004</url>
       <bibkey>zhou-etal-2008-context</bibkey>
     </paper>
     <paper id="5">
       <title>Semi-Supervised Learning for Relation Extraction</title>
-      <author><first>GuoDong</first><last>Zhou</last></author>
-      <author><first>JunHui</first><last>Li</last></author>
-      <author><first>LongHua</first><last>Qian</last></author>
-      <author><first>QiaoMing</first><last>Zhu</last></author>
+      <author id="guodong-zhou"><first>GuoDong</first><last>Zhou</last></author>
+      <author id="junhui-li"><first>JunHui</first><last>Li</last></author>
+      <author id="longhua-qian"><first>LongHua</first><last>Qian</last></author>
+      <author id="qiaoming-zhu"><first>QiaoMing</first><last>Zhu</last></author>
       <url hash="fa40e5bc">I08-1005</url>
       <bibkey>zhou-etal-2008-semi</bibkey>
     </paper>
@@ -69,10 +69,10 @@
     <paper id="8">
       <title>Name Origin Recognition Using Maximum Entropy Model and Diverse Features</title>
       <author><first>Min</first><last>Zhang</last></author>
-      <author><first>Chengjie</first><last>Sun</last></author>
+      <author id="cheng-jie-sun"><first>Chengjie</first><last>Sun</last></author>
       <author><first>Haizhou</first><last>Li</last></author>
-      <author><first>AiTi</first><last>Aw</last></author>
-      <author><first>Chew Lim</first><last>Tan</last></author>
+      <author id="aiti-aw"><first>AiTi</first><last>Aw</last></author>
+      <author id="chew-lim-tan"><first>Chew Lim</first><last>Tan</last></author>
       <author><first>Xiaolong</first><last>Wang</last></author>
       <url hash="006fc32a">I08-1008</url>
       <bibkey>zhang-etal-2008-name</bibkey>
@@ -80,7 +80,7 @@
     <paper id="9">
       <title>A More Discerning and Adaptable Multilingual Transliteration Mechanism for <fixed-case>I</fixed-case>ndian Languages</title>
       <author><first>Harshit</first><last>Surana</last></author>
-      <author><first>Anil Kumar</first><last>Singh</last></author>
+      <author id="anil-kumar-singh"><first>Anil Kumar</first><last>Singh</last></author>
       <url hash="c8d35d0f">I08-1009</url>
       <bibkey>surana-singh-2008-discerning</bibkey>
     </paper>
@@ -110,7 +110,7 @@
     </paper>
     <paper id="13">
       <title>An Effective Compositional Model for Lexical Alignment</title>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <author><first>Emmanuel</first><last>Morin</last></author>
       <url hash="590f3d08">I08-1013</url>
       <bibkey>daille-morin-2008-effective</bibkey>
@@ -118,7 +118,7 @@
     <paper id="14">
       <title>Determining the Unithood of Word Sequences Using a Probabilistic Approach</title>
       <author><first>Wilson</first><last>Wong</last></author>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last></author>
+      <author><first>Wei</first><last>Liu</last></author>
       <author><first>Mohammed</first><last>Bennamoun</last></author>
       <url hash="3ddccadb">I08-1014</url>
       <bibkey>wong-etal-2008-determining</bibkey>
@@ -127,7 +127,7 @@
       <title>Lexical Chains as Document Features</title>
       <author><first>Dinakar</first><last>Jayarajan</last></author>
       <author><first>Dipti</first><last>Deodhare</last></author>
-      <author><first>Balaraman</first><last>Ravindran</last></author>
+      <author id="balaraman-ravindran"><first>Balaraman</first><last>Ravindran</last></author>
       <url hash="c1b4aae8">I08-1015</url>
       <bibkey>jayarajan-etal-2008-lexical</bibkey>
     </paper>
@@ -140,7 +140,7 @@
     <paper id="17">
       <title>A New Approach to Automatic Document Summarization</title>
       <author><first>Xiaofeng</first><last>Wu</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <url hash="cdf20595">I08-1017</url>
       <bibkey>wu-zong-2008-new</bibkey>
     </paper>
@@ -149,7 +149,7 @@
       <author><first>Harendra</first><last>Bhandari</last></author>
       <author><first>Masashi</first><last>Shimbo</last></author>
       <author><first>Takahiko</first><last>Ito</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <url hash="4f1a999c">I08-1018</url>
       <bibkey>bhandari-etal-2008-generic</bibkey>
     </paper>
@@ -157,7 +157,7 @@
       <title>Identifying Cross-Document Relations between Sentences</title>
       <author><first>Yasunari</first><last>Miyabe</last></author>
       <author><first>Hiroya</first><last>Takamura</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <url hash="2aa61982">I08-1019</url>
       <bibkey>miyabe-etal-2008-identifying</bibkey>
     </paper>
@@ -171,7 +171,7 @@
       <title>Modeling Context in Scenario Template Creation</title>
       <author><first>Long</first><last>Qiu</last></author>
       <author><first>Min-Yen</first><last>Kan</last></author>
-      <author><first>Tat-Seng</first><last>Chua</last></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last></author>
       <url hash="571dbe88">I08-1021</url>
       <bibkey>qiu-etal-2008-modeling</bibkey>
     </paper>
@@ -179,7 +179,7 @@
       <title>Cross Language Text Categorization Using a Bilingual Lexicon</title>
       <author><first>Ke</first><last>Wu</last></author>
       <author><first>Xiaolin</first><last>Wang</last></author>
-      <author><first>Bao-Liang</first><last>Lu</last></author>
+      <author id="bao-liang-lu"><first>Bao-Liang</first><last>Lu</last></author>
       <url hash="fb83f5b4">I08-1022</url>
       <bibkey>wu-etal-2008-cross</bibkey>
     </paper>
@@ -196,7 +196,7 @@
     <paper id="24">
       <title>A Comparative Study for Query Translation using Linear Combination and Confidence Measure</title>
       <author><first>Youssef</first><last>Kadri</last></author>
-      <author><first>Jian-Yun</first><last>Nie</last></author>
+      <author id="jian-yun-nie"><first>Jian-Yun</first><last>Nie</last></author>
       <url hash="569a4ae9">I08-1024</url>
       <bibkey>kadri-nie-2008-comparative</bibkey>
     </paper>
@@ -221,7 +221,7 @@
       <title>Automatic Estimation of Word Significance oriented for Speech-based Information Retrieval</title>
       <author><first>Takashi</first><last>Shichiri</last></author>
       <author><first>Hiroaki</first><last>Nanjo</last></author>
-      <author><first>Takehiko</first><last>Yoshimi</last></author>
+      <author id="takehiko-yoshimi"><first>Takehiko</first><last>Yoshimi</last></author>
       <url hash="8c95039d">I08-1027</url>
       <bibkey>shichiri-etal-2008-automatic</bibkey>
     </paper>
@@ -233,26 +233,26 @@
       <author><first>Kotaro</first><last>Funakoshi</last></author>
       <author><first>Hiroshi</first><last>Tsujino</last></author>
       <author><first>Tetsuya</first><last>Ogata</last></author>
-      <author><first>Hiroshi G.</first><last>Okuno</last></author>
+      <author id="hiroshi-g-okuno"><first>Hiroshi G.</first><last>Okuno</last></author>
       <url hash="17c675d4">I08-1028</url>
       <bibkey>fukubayashi-etal-2008-rapid</bibkey>
     </paper>
     <paper id="29">
       <title>Automatic Prosodic Labeling with Conditional Random Fields and Rich Acoustic Features</title>
-      <author><first>Gina-Anne</first><last>Levow</last></author>
+      <author id="gina-anne-levow"><first>Gina-Anne</first><last>Levow</last></author>
       <url hash="320abbfb">I08-1029</url>
       <bibkey>levow-2008-automatic</bibkey>
     </paper>
     <paper id="30">
       <title><fixed-case>C</fixed-case>hinese Unknown Word Translation by Subword Re-segmentation</title>
       <author><first>Ruiqiang</first><last>Zhang</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <url hash="5c1bae40">I08-1030</url>
       <bibkey>zhang-sumita-2008-chinese</bibkey>
     </paper>
     <paper id="31">
       <title>Hypothesis Selection in Machine Transliteration: A Web Mining Approach</title>
-      <author><first>Jong-Hoon</first><last>Oh</last></author>
+      <author id="jong-hoon-oh"><first>Jong-Hoon</first><last>Oh</last></author>
       <author><first>Hitoshi</first><last>Isahara</last></author>
       <url hash="0cecaabc">I08-1031</url>
       <bibkey>oh-isahara-2008-hypothesis</bibkey>
@@ -267,16 +267,16 @@
     <paper id="33">
       <title>Improving Word Alignment by Adjusting <fixed-case>C</fixed-case>hinese Word Segmentation</title>
       <author><first>Ming-Hong</first><last>Bai</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <url hash="749859a1">I08-1033</url>
       <bibkey>bai-etal-2008-improving</bibkey>
     </paper>
     <paper id="34">
       <title>The Telling Tail: Signals of Success in Electronic Negotiation Texts</title>
       <author><first>Marina</first><last>Sokolova</last></author>
-      <author><first>Vivi</first><last>Nastase</last></author>
-      <author><first>Stan</first><last>Szpakowicz</last></author>
+      <author id="vivi-nastase"><first>Vivi</first><last>Nastase</last></author>
+      <author id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></author>
       <url hash="666099f8">I08-1034</url>
       <bibkey>sokolova-etal-2008-telling</bibkey>
     </paper>
@@ -284,7 +284,7 @@
       <title>Automatic Extraction of Briefing Templates</title>
       <author><first>Dipanjan</first><last>Das</last></author>
       <author><first>Mohit</first><last>Kumar</last></author>
-      <author><first>Alexander I.</first><last>Rudnicky</last></author>
+      <author id="alexander-rudnicky"><first>Alexander I.</first><last>Rudnicky</last></author>
       <url hash="7c889346">I08-1035</url>
       <bibkey>das-etal-2008-automatic</bibkey>
     </paper>
@@ -298,8 +298,8 @@
     <paper id="37">
       <title>Learning Patterns from the Web to Translate Named Entities for Cross Language Information Retrieval</title>
       <author><first>Yu-Chun</first><last>Wang</last></author>
-      <author><first>Richard Tzong-Han</first><last>Tsai</last></author>
-      <author><first>Wen-Lian</first><last>Hsu</last></author>
+      <author id="richard-tzong-han-tsai"><first>Richard Tzong-Han</first><last>Tsai</last></author>
+      <author id="wen-lian-hsu"><first>Wen-Lian</first><last>Hsu</last></author>
       <url hash="5425ee72">I08-1037</url>
       <bibkey>wang-etal-2008-learning</bibkey>
     </paper>
@@ -314,37 +314,37 @@
       <title>Learning to Shift the Polarity of Words for Sentiment Classification</title>
       <author><first>Daisuke</first><last>Ikeda</last></author>
       <author><first>Hiroya</first><last>Takamura</last></author>
-      <author><first>Lev-Arie</first><last>Ratinov</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="lev-ratinov"><first>Lev-Arie</first><last>Ratinov</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <url hash="29a0a10b">I08-1039</url>
       <bibkey>ikeda-etal-2008-learning</bibkey>
     </paper>
     <paper id="40">
       <title>Unsupervised Classification of Sentiment and Objectivity in <fixed-case>C</fixed-case>hinese Text</title>
       <author><first>Taras</first><last>Zagibalov</last></author>
-      <author><first>John</first><last>Carroll</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
       <url hash="bc3c6e88">I08-1040</url>
       <bibkey>zagibalov-carroll-2008-unsupervised</bibkey>
     </paper>
     <paper id="41">
       <title>Using <fixed-case>R</fixed-case>oget’s Thesaurus for Fine-grained Emotion Recognition</title>
       <author><first>Saima</first><last>Aman</last></author>
-      <author><first>Stan</first><last>Szpakowicz</last></author>
+      <author id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></author>
       <url hash="56b848f3">I08-1041</url>
       <bibkey>aman-szpakowicz-2008-using</bibkey>
     </paper>
     <paper id="42">
       <title>Heterogeneous Automatic <fixed-case>MT</fixed-case> Evaluation Through Non-Parametric Metric Combinations</title>
-      <author><first>Jesús</first><last>Giménez</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="jesus-gimenez"><first>Jesús</first><last>Giménez</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <url hash="08f00ca6">I08-1042</url>
       <bibkey>gimenez-marquez-2008-heterogeneous</bibkey>
     </paper>
     <paper id="43">
       <title>Paraphrasing Depending on Bilingual Context Toward Generalization of Translation Knowledge</title>
       <author><first>Young-Sook</first><last>Hwang</last></author>
-      <author><first>YoungKil</first><last>Kim</last></author>
-      <author><first>Sangkyu</first><last>Park</last></author>
+      <author id="young-gil-kim"><first>YoungKil</first><last>Kim</last></author>
+      <author id="sang-kyu-park"><first>Sangkyu</first><last>Park</last></author>
       <url hash="2b9f6c68">I08-1043</url>
       <bibkey>hwang-etal-2008-paraphrasing</bibkey>
     </paper>
@@ -382,7 +382,7 @@
       <title>Learning a Stopping Criterion for Active Learning for Word Sense Disambiguation and Text Classification</title>
       <author><first>Jingbo</first><last>Zhu</last></author>
       <author><first>Huizhen</first><last>Wang</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <url hash="68b2158d">I08-1048</url>
       <bibkey>zhu-etal-2008-learning</bibkey>
     </paper>
@@ -396,7 +396,7 @@
     <paper id="50">
       <title>Identifying Sections in Scientific Abstracts using Conditional Random Fields</title>
       <author><first>Kenji</first><last>Hirohata</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
       <author><first>Mitsuru</first><last>Ishizuka</last></author>
       <url hash="908a20aa">I08-1050</url>
@@ -405,7 +405,7 @@
     <paper id="51">
       <title>Formalising Multi-layer Corpora in <fixed-case>OWL</fixed-case> <fixed-case>DL</fixed-case> - Lexicon Modelling, Querying and Consistency Control</title>
       <author><first>Aljoscha</first><last>Burchardt</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <author><first>Dennis</first><last>Spohr</last></author>
       <author><first>Anette</first><last>Frank</last></author>
       <author><first>Ulrich</first><last>Heid</last></author>
@@ -417,7 +417,7 @@
       <author><first>Kiyoaki</first><last>Shirai</last></author>
       <author><first>Takenobu</first><last>Tokunaga</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <author><first>Tzu-Yi</first><last>Kuo</last></author>
       <author><first>Virach</first><last>Sornlertlamvanich</last></author>
       <author><first>Thatsanee</first><last>Charoenporn</last></author>
@@ -433,7 +433,7 @@
     </paper>
     <paper id="54">
       <title>Answering Definition Questions via Temporally-Anchored Text Snippets</title>
-      <author><first>Marius</first><last>Paşca</last></author>
+      <author id="marius-pasca"><first>Marius</first><last>Paşca</last></author>
       <url hash="d444fe73">I08-1054</url>
       <bibkey>pasca-2008-answering</bibkey>
     </paper>
@@ -446,8 +446,8 @@
     </paper>
     <paper id="56">
       <title>Cluster-Based Query Expansion for Statistical Question Answering</title>
-      <author><first>Lucian Vlad</first><last>Lita</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
+      <author id="lucian-vlad-lita"><first>Lucian Vlad</first><last>Lita</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
       <url hash="2ae0a5b1">I08-1056</url>
       <bibkey>lita-carbonell-2008-cluster</bibkey>
     </paper>
@@ -460,7 +460,7 @@
     <paper id="58">
       <title>Multilingual Text Entry using Automatic Language Detection</title>
       <author><first>Yo</first><last>Ehara</last></author>
-      <author><first>Kumiko</first><last>Tanaka-Ishii</last></author>
+      <author id="kumiko-tanaka-ishii"><first>Kumiko</first><last>Tanaka-Ishii</last></author>
       <url hash="92808d4a">I08-1058</url>
       <bibkey>ehara-tanaka-ishii-2008-multilingual</bibkey>
     </paper>
@@ -470,7 +470,7 @@
       <author><first>Jianfeng</first><last>Gao</last></author>
       <author><first>Chris</first><last>Brockett</last></author>
       <author><first>Alexandre</first><last>Klementiev</last></author>
-      <author><first>William B.</first><last>Dolan</last></author>
+      <author id="william-b-dolan"><first>William B.</first><last>Dolan</last></author>
       <author><first>Dmitriy</first><last>Belenko</last></author>
       <author><first>Lucy</first><last>Vanderwende</last></author>
       <url hash="0ac9e4f9">I08-1059</url>
@@ -479,7 +479,7 @@
     <paper id="60">
       <title>Bilingual Synonym Identification with Spelling Variations</title>
       <author><first>Takashi</first><last>Tsunakawa</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <url hash="69c00677">I08-1060</url>
       <bibkey>tsunakawa-tsujii-2008-bilingual</bibkey>
     </paper>
@@ -494,7 +494,7 @@
       <title><fixed-case>J</fixed-case>apanese-<fixed-case>S</fixed-case>panish Thesaurus Construction Using <fixed-case>E</fixed-case>nglish as a Pivot</title>
       <author><first>Jessica</first><last>Ramírez</last></author>
       <author><first>Masayuki</first><last>Asahara</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <url hash="ac59f4c4">I08-1062</url>
       <bibkey>ramirez-etal-2008-japanese</bibkey>
     </paper>
@@ -517,15 +517,15 @@
       <title>Acquiring Event Relation Knowledge by Learning Cooccurrence Patterns and Fertilizing Cooccurrence Samples with Verbal Nouns</title>
       <author><first>Shuya</first><last>Abe</last></author>
       <author><first>Kentaro</first><last>Inui</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <url hash="85da17e1">I08-1065</url>
       <bibkey>abe-etal-2008-acquiring</bibkey>
     </paper>
     <paper id="66">
       <title>Refinements in <fixed-case>BTG</fixed-case>-based Statistical Machine Translation</title>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Min</first><last>Zhang</last></author>
-      <author><first>AiTi</first><last>Aw</last></author>
+      <author id="aiti-aw"><first>AiTi</first><last>Aw</last></author>
       <author><first>Haitao</first><last>Mi</last></author>
       <author><first>Qun</first><last>Liu</last></author>
       <author><first>Shouxun</first><last>Lin</last></author>
@@ -536,15 +536,15 @@
       <title>Simple Syntactic and Morphological Processing Can Help <fixed-case>E</fixed-case>nglish-<fixed-case>H</fixed-case>indi Statistical Machine Translation</title>
       <author><first>Ananthakrishnan</first><last>Ramanathan</last></author>
       <author><first>Jayprasad</first><last>Hegde</last></author>
-      <author><first>Ritesh M.</first><last>Shah</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
-      <author><first>Sasikumar</first><last>M.</last></author>
+      <author id="ritesh-shah"><first>Ritesh M.</first><last>Shah</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="sasikumar-m"><first>Sasikumar</first><last>M.</last></author>
       <url hash="844c78f0">I08-1067</url>
       <bibkey>ramanathan-etal-2008-simple</bibkey>
     </paper>
     <paper id="68">
       <title>Statistical Machine Translation Models for Personalized Search</title>
-      <author><first>Rohini</first><last>U</last></author>
+      <author id="u-rohini"><first>Rohini</first><last>U</last></author>
       <author><first>Vamshi</first><last>Ambati</last></author>
       <author><first>Vasudeva</first><last>Varma</last></author>
       <url hash="d3eb9d04">I08-1068</url>
@@ -553,21 +553,21 @@
     <paper id="69">
       <title>Repurposing Theoretical Linguistic Data for Tool Development and Search</title>
       <author><first>Fei</first><last>Xia</last></author>
-      <author><first>William D.</first><last>Lewis</last></author>
+      <author id="william-lewis"><first>William D.</first><last>Lewis</last></author>
       <url hash="e3d368a4">I08-1069</url>
       <bibkey>xia-lewis-2008-repurposing</bibkey>
     </paper>
     <paper id="70">
       <title>Computing Paraphrasability of Syntactic Variants Using Web Snippets</title>
       <author><first>Atsushi</first><last>Fujita</last></author>
-      <author><first>Satoshi</first><last>Sato</last></author>
+      <author id="satoshi-sato"><first>Satoshi</first><last>Sato</last></author>
       <url hash="104757c4">I08-1070</url>
       <bibkey>fujita-sato-2008-computing</bibkey>
     </paper>
     <paper id="71">
       <title>Augmenting <fixed-case>W</fixed-case>ikipedia with Named Entity Tags</title>
       <author><first>Wisam</first><last>Dakka</last></author>
-      <author><first>Silviu</first><last>Cucerzan</last></author>
+      <author id="silviu-cucerzan"><first>Silviu</first><last>Cucerzan</last></author>
       <url hash="946bba50">I08-1071</url>
       <bibkey>dakka-cucerzan-2008-augmenting</bibkey>
     </paper>
@@ -582,7 +582,7 @@
     <paper id="73">
       <title>Gloss-Based Semantic Similarity Metrics for Predominant Sense Acquisition</title>
       <author><first>Ryu</first><last>Iida</last></author>
-      <author><first>Diana</first><last>McCarthy</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
       <author><first>Rob</first><last>Koeling</last></author>
       <url hash="54b8d63d">I08-1073</url>
       <bibkey>iida-etal-2008-gloss</bibkey>
@@ -590,7 +590,7 @@
     <paper id="74">
       <title>Benchmarking Noun Compound Interpretation</title>
       <author><first>Su Nam</first><last>Kim</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <url hash="0ba6e04a">I08-1074</url>
       <bibkey>kim-baldwin-2008-benchmarking</bibkey>
     </paper>
@@ -623,7 +623,7 @@
       <title>Named Entity Recognition in <fixed-case>B</fixed-case>engali: A Conditional Random Field Approach</title>
       <author><first>Asif</first><last>Ekbal</last></author>
       <author><first>Rejwanul</first><last>Haque</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <url hash="3ec0a9cd">I08-2077</url>
       <bibkey>ekbal-etal-2008-named</bibkey>
     </paper>
@@ -650,7 +650,7 @@
     </paper>
     <paper id="81">
       <title>Dimensionality Reduction with Multilingual Resource</title>
-      <author><first>YingJu</first><last>Xia</last></author>
+      <author id="yingju-xia"><first>YingJu</first><last>Xia</last></author>
       <author><first>Hao</first><last>Yu</last></author>
       <author><first>Gang</first><last>Zou</last></author>
       <url hash="2a1f19d8">I08-2081</url>
@@ -660,7 +660,7 @@
       <title>A Web-based <fixed-case>E</fixed-case>nglish Proofing System for <fixed-case>E</fixed-case>nglish as a Second Language Users</title>
       <author><first>Xing</first><last>Yi</last></author>
       <author><first>Jianfeng</first><last>Gao</last></author>
-      <author><first>William B.</first><last>Dolan</last></author>
+      <author id="william-b-dolan"><first>William B.</first><last>Dolan</last></author>
       <url hash="f579b5f7">I08-2082</url>
       <bibkey>yi-etal-2008-web</bibkey>
     </paper>
@@ -674,8 +674,8 @@
     </paper>
     <paper id="84">
       <title>Term Extraction Through Unithood and Termhood Unification</title>
-      <author><first>Thuy</first><last>Vu</last></author>
-      <author><first>Ai Ti</first><last>Aw</last></author>
+      <author id="thuy-vu"><first>Thuy</first><last>Vu</last></author>
+      <author id="aiti-aw"><first>Ai Ti</first><last>Aw</last></author>
       <author><first>Min</first><last>Zhang</last></author>
       <url hash="e8145360">I08-2084</url>
       <bibkey>vu-etal-2008-term</bibkey>
@@ -707,8 +707,8 @@
       <title>Method of Selecting Training Data to Build a Compact and Efficient Translation Model</title>
       <author><first>Keiji</first><last>Yasuda</last></author>
       <author><first>Ruiqiang</first><last>Zhang</last></author>
-      <author><first>Hirofumi</first><last>Yamamoto</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="hirofumi-yamamoto"><first>Hirofumi</first><last>Yamamoto</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <url hash="03c496f4">I08-2088</url>
       <bibkey>yasuda-etal-2008-method</bibkey>
     </paper>
@@ -732,7 +732,7 @@
       <author><first>Thatsanee</first><last>Charoenporn</last></author>
       <author><first>Chumpol</first><last>Mokarat</last></author>
       <author><first>Hitoshi</first><last>Isahara</last></author>
-      <author><first>Hammam</first><last>Riza</last></author>
+      <author id="hammam-riza"><first>Hammam</first><last>Riza</last></author>
       <author><first>Purev</first><last>Jaimai</last></author>
       <url hash="a94f951c">I08-2091</url>
       <bibkey>sornlertlamvanich-etal-2008-synset</bibkey>
@@ -746,7 +746,7 @@
     </paper>
     <paper id="93">
       <title>Automatically Identifying Computationally Relevant Typological Features</title>
-      <author><first>William D.</first><last>Lewis</last></author>
+      <author id="william-lewis"><first>William D.</first><last>Lewis</last></author>
       <author><first>Fei</first><last>Xia</last></author>
       <url hash="07f1bee2">I08-2093</url>
       <bibkey>lewis-xia-2008-automatically</bibkey>
@@ -754,14 +754,14 @@
     <paper id="94">
       <title>Automatic Paraphrasing of <fixed-case>J</fixed-case>apanese Functional Expressions Using a Hierarchically Organized Dictionary</title>
       <author><first>Suguru</first><last>Matsuyoshi</last></author>
-      <author><first>Satoshi</first><last>Sato</last></author>
+      <author id="satoshi-sato"><first>Satoshi</first><last>Sato</last></author>
       <url hash="24db5e20">I08-2094</url>
       <bibkey>matsuyoshi-sato-2008-automatic</bibkey>
     </paper>
     <paper id="95">
       <title>Generation of Referring Expression Using Prefix Tree Structure</title>
       <author><first>Sibabrata</first><last>Paladhi</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <url hash="85ddc5c6">I08-2095</url>
       <bibkey>paladhi-bandyopadhyay-2008-generation</bibkey>
     </paper>
@@ -783,7 +783,7 @@
       <title>Resolving Ambiguities of <fixed-case>C</fixed-case>hinese Conjunctive Structures by Divide-and-conquer Approaches</title>
       <author><first>Duen-Chi</first><last>Yang</last></author>
       <author><first>Yu-Ming</first><last>Hsieh</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <url hash="899f4195">I08-2098</url>
       <bibkey>yang-etal-2008-resolving</bibkey>
     </paper>
@@ -792,7 +792,7 @@
       <author><first>Rafiya</first><last>Begum</last></author>
       <author><first>Samar</first><last>Husain</last></author>
       <author><first>Arun</first><last>Dhwaj</last></author>
-      <author><first>Dipti Misra</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></author>
       <author><first>Lakshmi</first><last>Bai</last></author>
       <author><first>Rajeev</first><last>Sangal</last></author>
       <url hash="7e431979">I08-2099</url>
@@ -811,7 +811,7 @@
     <paper id="101">
       <title>A Multi-Document Multi-Lingual Automatic Summarization System</title>
       <author><first>Mohamad Ali</first><last>Honarpisheh</last></author>
-      <author><first>Gholamreza</first><last>Ghassem-Sani</last></author>
+      <author id="gholamreza-ghassem-sani"><first>Gholamreza</first><last>Ghassem-Sani</last></author>
       <author><first>Ghassem</first><last>Mirroshandel</last></author>
       <url hash="e4b6034e">I08-2101</url>
       <bibkey>honarpisheh-etal-2008-multi</bibkey>
@@ -825,7 +825,7 @@
     </paper>
     <paper id="103">
       <title>Sentence Ordering based on Cluster Adjacency in Multi-Document Summarization</title>
-      <author><first>Donghong</first><last>Ji</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
       <author><first>Yu</first><last>Nie</last></author>
       <url hash="24e23d47">I08-2103</url>
       <bibkey>ji-nie-2008-sentence</bibkey>
@@ -840,13 +840,13 @@
     </paper>
     <paper id="105">
       <title>Unsupervised All-words Word Sense Disambiguation with Grammatical Dependencies</title>
-      <author><first>Vivi</first><last>Nastase</last></author>
+      <author id="vivi-nastase"><first>Vivi</first><last>Nastase</last></author>
       <url hash="05da72fe">I08-2105</url>
       <bibkey>nastase-2008-unsupervised</bibkey>
     </paper>
     <paper id="106">
       <title>Syntactic and Semantic Frames in <fixed-case>P</fixed-case>rep<fixed-case>N</fixed-case>et</title>
-      <author><first>Patrick</first><last>Saint-Dizier</last></author>
+      <author id="patrick-saint-dizier"><first>Patrick</first><last>Saint-Dizier</last></author>
       <url hash="610c8a24">I08-2106</url>
       <bibkey>saint-dizier-2008-syntactic</bibkey>
     </paper>
@@ -860,11 +860,11 @@
     </paper>
     <paper id="108">
       <title><fixed-case>MRD</fixed-case>-based Word Sense Disambiguation: Further Extending <fixed-case>L</fixed-case>esk</title>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Su Nam</first><last>Kim</last></author>
       <author><first>Francis</first><last>Bond</last></author>
       <author><first>Sanae</first><last>Fujita</last></author>
-      <author><first>David</first><last>Martinez</last></author>
+      <author id="david-martinez"><first>David</first><last>Martinez</last></author>
       <author><first>Takaaki</first><last>Tanaka</last></author>
       <url hash="d15c0d04">I08-2108</url>
       <bibkey>baldwin-etal-2008-mrd</bibkey>
@@ -873,8 +873,8 @@
       <title>Fast Computing Grammar-driven Convolution Tree Kernel for Semantic Role Labeling</title>
       <author><first>Wanxiang</first><last>Che</last></author>
       <author><first>Min</first><last>Zhang</last></author>
-      <author><first>Ai Ti</first><last>Aw</last></author>
-      <author><first>Chew Lim</first><last>Tan</last></author>
+      <author id="aiti-aw"><first>Ai Ti</first><last>Aw</last></author>
+      <author id="chew-lim-tan"><first>Chew Lim</first><last>Tan</last></author>
       <author><first>Ting</first><last>Liu</last></author>
       <author><first>Sheng</first><last>Li</last></author>
       <url hash="e7360169">I08-2109</url>
@@ -885,7 +885,7 @@
       <author><first>Tomohide</first><last>Shibata</last></author>
       <author><first>Michitaka</first><last>Odani</last></author>
       <author><first>Jun</first><last>Harashima</last></author>
-      <author><first>Takashi</first><last>Oonishi</last></author>
+      <author id="takashi-onishi"><first>Takashi</first><last>Oonishi</last></author>
       <author><first>Sadao</first><last>Kurohashi</last></author>
       <url hash="023612d7">I08-2110</url>
       <bibkey>shibata-etal-2008-syngraph</bibkey>
@@ -946,7 +946,7 @@
     <paper id="118">
       <title>Learning Decision Lists with Known Rules for Text Mining</title>
       <author><first>Venkatesan</first><last>Chakravarthy</last></author>
-      <author><first>Sachindra</first><last>Joshi</last></author>
+      <author id="sachindra-joshi"><first>Sachindra</first><last>Joshi</last></author>
       <author><first>Ganesh</first><last>Ramakrishnan</last></author>
       <author><first>Shantanu</first><last>Godbole</last></author>
       <author><first>Sreeram</first><last>Balakrishnan</last></author>
@@ -961,7 +961,7 @@
     </paper>
     <paper id="120">
       <title>Mining <fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish Parallel Corpora from the Web</title>
-      <author id="bo-li"><first>Bo</first><last>Li</last></author>
+      <author><first>Bo</first><last>Li</last></author>
       <author><first>Juan</first><last>Liu</last></author>
       <bibkey>li-liu-2008-mining</bibkey>
     </paper>
@@ -976,13 +976,13 @@
       <title>Towards Data and Goal Oriented Analysis: Tool Inter-operability and Combinatorial Comparison</title>
       <author><first>Yoshinobu</first><last>Kano</last></author>
       <author><first>Ngan</first><last>Nguyen</last></author>
-      <author><first>Rune</first><last>Sætre</last></author>
+      <author id="rune-saetre"><first>Rune</first><last>Sætre</last></author>
       <author><first>Kazuhiro</first><last>Yoshida</last></author>
       <author><first>Keiichiro</first><last>Fukamachi</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
       <author><first>Yoshimasa</first><last>Tsuruoka</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <url hash="04716cab">I08-2122</url>
       <bibkey>kano-etal-2008-towards</bibkey>
     </paper>
@@ -999,13 +999,13 @@
       <author><first>Donghui</first><last>Feng</last></author>
       <author><first>Gully</first><last>Burns</last></author>
       <author><first>Jingbo</first><last>Zhu</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <url hash="71c92bca">I08-2124</url>
       <bibkey>feng-etal-2008-towards</bibkey>
     </paper>
     <paper id="125">
       <title>Large Scale Diagnostic Code Classification for Medical Patient Records</title>
-      <author><first>Lucian Vlad</first><last>Lita</last></author>
+      <author id="lucian-vlad-lita"><first>Lucian Vlad</first><last>Lita</last></author>
       <author><first>Shipeng</first><last>Yu</last></author>
       <author><first>Stefan</first><last>Niculescu</last></author>
       <author><first>Jinbo</first><last>Bi</last></author>
@@ -1021,9 +1021,9 @@
     </paper>
     <paper id="127">
       <title>A Discriminative Approach to <fixed-case>J</fixed-case>apanese Abbreviation Extraction</title>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Mitsuru</first><last>Ishizuka</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <url hash="0a0dfa56">I08-2127</url>
       <bibkey>okazaki-etal-2008-discriminative-approach</bibkey>
     </paper>
@@ -1037,7 +1037,7 @@
     </paper>
     <paper id="129">
       <title>How to Take Advantage of the Limitations with <fixed-case>M</fixed-case>arkov Clustering?–The Foundations of Branching <fixed-case>M</fixed-case>arkov Clustering (<fixed-case>BMCL</fixed-case>)</title>
-      <author><first>Hiroyuki</first><last>Akama</last></author>
+      <author id="hiroyuki-akama"><first>Hiroyuki</first><last>Akama</last></author>
       <author><first>Maki</first><last>Miyake</last></author>
       <author><first>Jaeyoung</first><last>Jung</last></author>
       <url hash="fb99548b">I08-2129</url>
@@ -1052,9 +1052,9 @@
     </paper>
     <paper id="131">
       <title>Language Independent Text Correction using Finite State Automata</title>
-      <author><first>Ahmed</first><last>Hassan</last></author>
+      <author id="ahmed-hassan"><first>Ahmed</first><last>Hassan</last></author>
       <author><first>Sara</first><last>Noeman</last></author>
-      <author><first>Hany</first><last>Hassan</last></author>
+      <author id="hany-hassan-awadalla"><first>Hany</first><last>Hassan</last></author>
       <url hash="69dcf9e0">I08-2131</url>
       <bibkey>hassan-etal-2008-language</bibkey>
     </paper>
@@ -1071,13 +1071,13 @@
       <title>A Comparative Study of Mixture Models for Automatic Topic Segmentation of Multiparty Dialogues</title>
       <author><first>Maria</first><last>Georgescul</last></author>
       <author><first>Alexander</first><last>Clark</last></author>
-      <author><first>Susan</first><last>Armstrong</last></author>
+      <author id="susan-armstrong"><first>Susan</first><last>Armstrong</last></author>
       <url hash="cf529373">I08-2133</url>
       <bibkey>georgescul-etal-2008-comparative</bibkey>
     </paper>
     <paper id="134">
       <title>Exploiting Unlabeled Text to Extract New Words of Different Semantic Transparency for <fixed-case>C</fixed-case>hinese Word Segmentation</title>
-      <author><first>Richard Tzong-Han</first><last>Tsai</last></author>
+      <author id="richard-tzong-han-tsai"><first>Richard Tzong-Han</first><last>Tsai</last></author>
       <author><first>Hsi-Chuan</first><last>Hung</last></author>
       <url hash="3069ad56">I08-2134</url>
       <bibkey>tsai-hung-2008-exploiting</bibkey>
@@ -1092,8 +1092,8 @@
     </paper>
     <paper id="136">
       <title>How to Add a New Language on the <fixed-case>NLP</fixed-case> Map: Building Resources and Tools for Languages with Scarce Resources</title>
-      <author><first>Rada</first><last>Mihalcea</last></author>
-      <author><first>Vivi</first><last>Nastase</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
+      <author id="vivi-nastase"><first>Vivi</first><last>Nastase</last></author>
       <url hash="7edbefb4">I08-2136</url>
       <bibkey>mihalcea-nastase-2008-add</bibkey>
     </paper>
@@ -1106,7 +1106,7 @@
     <paper id="138">
       <title>A <fixed-case>P</fixed-case>unjabi Grammar Checker</title>
       <author><first>Mandeep Singh</first><last>Gill</last></author>
-      <author><first>Gurpreet Singh</first><last>Lehal</last></author>
+      <author id="gurpreet-singh-lehal"><first>Gurpreet Singh</first><last>Lehal</last></author>
       <author><first>Shiv Sharma</first><last>Joshi</last></author>
       <url hash="9b865f8e">I08-2138</url>
       <bibkey>gill-etal-2008-punjabi</bibkey>
@@ -1128,7 +1128,7 @@
     </paper>
     <paper id="141">
       <title>A Mechanism to Provide Language-Encoding Support and an <fixed-case>NLP</fixed-case> Friendly Editor</title>
-      <author><first>Anil Kumar</first><last>Singh</last></author>
+      <author id="anil-kumar-singh"><first>Anil Kumar</first><last>Singh</last></author>
       <url hash="4fa1f24a">I08-2141</url>
       <bibkey>singh-2008-mechanism</bibkey>
     </paper>
@@ -1144,8 +1144,8 @@
     <paper id="143">
       <title><fixed-case>POLL</fixed-case>y: A Conversational System that uses a Shared Representation to Generate Action and Social Language</title>
       <author><first>Swati</first><last>Gupta</last></author>
-      <author><first>Marilyn A.</first><last>Walker</last></author>
-      <author><first>Daniela M.</first><last>Romano</last></author>
+      <author id="marilyn-walker"><first>Marilyn A.</first><last>Walker</last></author>
+      <author id="daniela-m-romano"><first>Daniela M.</first><last>Romano</last></author>
       <url hash="6a43fcbd">I08-2143</url>
       <bibkey>gupta-etal-2008-polly</bibkey>
     </paper>
@@ -1192,7 +1192,7 @@
     </paper>
     <paper id="4">
       <title>Natural Language Processing for Less Privileged Languages: Where do we come from? Where are we going?</title>
-      <author><first>Anil Kumar</first><last>Singh</last></author>
+      <author id="anil-kumar-singh"><first>Anil Kumar</first><last>Singh</last></author>
       <url hash="edf979a5">I08-3004</url>
       <bibkey>singh-2008-natural</bibkey>
     </paper>
@@ -1208,7 +1208,7 @@
     <paper id="6">
       <title>Prototype Machine Translation System From Text-To-<fixed-case>I</fixed-case>ndian <fixed-case>S</fixed-case>ign <fixed-case>L</fixed-case>anguage</title>
       <author><first>Tirthankar</first><last>Dasgupta</last></author>
-      <author><first>Sandipan</first><last>Dandpat</last></author>
+      <author id="sandipan-dandapat"><first>Sandipan</first><last>Dandpat</last></author>
       <author><first>Anupam</first><last>Basu</last></author>
       <url hash="c6252b01">I08-3006</url>
       <bibkey>dasgupta-etal-2008-prototype</bibkey>
@@ -1222,7 +1222,7 @@
     </paper>
     <paper id="8">
       <title>Cross-Language Parser Adaptation between Related Languages</title>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <author><first>Philip</first><last>Resnik</last></author>
       <url hash="83ff3c55">I08-3008</url>
       <bibkey>zeman-resnik-2008-cross</bibkey>
@@ -1230,7 +1230,7 @@
     <paper id="9">
       <title><fixed-case>S</fixed-case>ri<fixed-case>S</fixed-case>hell Primo: A Predictive <fixed-case>S</fixed-case>inhala Text Input System</title>
       <author><first>Sandeva</first><last>Goonetilleke</last></author>
-      <author><first>Yoshihiko</first><last>Hayashi</last></author>
+      <author id="yoshihiko-hayashi"><first>Yoshihiko</first><last>Hayashi</last></author>
       <author><first>Yuichi</first><last>Itoh</last></author>
       <author><first>Fumio</first><last>Kishino</last></author>
       <url hash="f2b56633">I08-3009</url>
@@ -1259,7 +1259,7 @@
     <paper id="12">
       <title>Design of a Rule-based Stemmer for Natural Language Text in <fixed-case>B</fixed-case>engali</title>
       <author><first>Sandipan</first><last>Sarkar</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <url hash="37ff9f0b">I08-3012</url>
       <bibkey>sarkar-bandyopadhyay-2008-design</bibkey>
     </paper>
@@ -1280,8 +1280,8 @@
     </paper>
     <paper id="15">
       <title>Morphology Driven <fixed-case>M</fixed-case>anipuri <fixed-case>POS</fixed-case> Tagger</title>
-      <author><first>Thoudam Doren</first><last>Singh</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="thoudam-doren-singh"><first>Thoudam Doren</first><last>Singh</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <url hash="5c1e621e">I08-3015</url>
       <bibkey>singh-bandyopadhyay-2008-morphology</bibkey>
     </paper>
@@ -1301,7 +1301,7 @@
     </paper>
     <paper id="18">
       <title>Indigenous Languages of <fixed-case>I</fixed-case>ndonesia: Creating Language Resources for Language Preservation</title>
-      <author><first>Hammam</first><last>Riza</last></author>
+      <author id="hammam-riza"><first>Hammam</first><last>Riza</last></author>
       <url hash="67f6f3fb">I08-3018</url>
       <bibkey>riza-2008-indigenous</bibkey>
     </paper>
@@ -1314,7 +1314,7 @@
     </paper>
     <paper id="20">
       <title>Speech to speech machine translation: Biblical chatter from <fixed-case>F</fixed-case>innish to <fixed-case>E</fixed-case>nglish</title>
-      <author><first>David</first><last>Ellis</last></author>
+      <author id="david-ellis"><first>David</first><last>Ellis</last></author>
       <author><first>Mathias</first><last>Creutz</last></author>
       <author><first>Timo</first><last>Honkela</last></author>
       <author><first>Mikko</first><last>Kurimo</last></author>
@@ -1336,7 +1336,7 @@
     <paper id="1">
       <title>An Example-based Decoder for Spoken Language Machine Translation</title>
       <author><first>Zhou-Jun</first><last>Li</last></author>
-      <author><first>Wen-Han</first><last>Chao</last></author>
+      <author id="wenhan-chao"><first>Wen-Han</first><last>Chao</last></author>
       <author><first>Yue-Xin</first><last>Chen</last></author>
       <url hash="7cfd094e">I08-4001</url>
       <bibkey>li-etal-2008-example</bibkey>
@@ -1345,7 +1345,7 @@
       <title>Automatic Extraction of <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>hinese Transliteration Pairs using Dynamic Window and Tokenizer</title>
       <author><first>Chengguo</first><last>Jin</last></author>
       <author><first>Seung-Hoon</first><last>Na</last></author>
-      <author><first>Dong-Il</first><last>Kim</last></author>
+      <author id="dong-il-kim"><first>Dong-Il</first><last>Kim</last></author>
       <author><first>Jong-Hyeok</first><last>Lee</last></author>
       <url hash="6f78d315">I08-4002</url>
       <bibkey>jin-etal-2008-automatic</bibkey>
@@ -1354,7 +1354,7 @@
       <title>Mining Transliterations from Web Query Results: An Incremental Approach</title>
       <author><first>Jin-Shea</first><last>Kuo</last></author>
       <author><first>Haizhou</first><last>Li</last></author>
-      <author><first>Chih-Lung</first><last>Lin</last></author>
+      <author id="chih-lung-lin"><first>Chih-Lung</first><last>Lin</last></author>
       <url hash="c6d7ac1c">I08-4003</url>
       <bibkey>kuo-etal-2008-mining</bibkey>
     </paper>
@@ -1369,7 +1369,7 @@
       <title>Use of Event Types for Temporal Relation Identification in <fixed-case>C</fixed-case>hinese Text</title>
       <author><first>Yuchang</first><last>Cheng</last></author>
       <author><first>Masayuki</first><last>Asahara</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <url hash="e93c9623">I08-4005</url>
       <bibkey>cheng-etal-2008-use</bibkey>
     </paper>
@@ -1383,22 +1383,22 @@
     </paper>
     <paper id="7">
       <title>Stochastic Dependency Parsing Based on <fixed-case>A</fixed-case>* Admissible Search</title>
-      <author><first>Bor-shen</first><last>Lin</last></author>
+      <author id="bor-shen-lin"><first>Bor-shen</first><last>Lin</last></author>
       <url hash="2c407d6b">I08-4007</url>
       <bibkey>lin-2008-stochastic</bibkey>
     </paper>
     <paper id="8">
       <title>Analyzing <fixed-case>C</fixed-case>hinese Synthetic Words with Tree-based Information and a Survey on <fixed-case>C</fixed-case>hinese Morphologically Derived Words</title>
-      <author><first>Jia</first><last>Lu</last></author>
+      <author id="jia-lu"><first>Jia</first><last>Lu</last></author>
       <author><first>Masayuki</first><last>Asahara</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <url hash="ddec07cb">I08-4008</url>
       <bibkey>lu-etal-2008-analyzing</bibkey>
     </paper>
     <paper id="9">
       <title>Which Performs Better on In-Vocabulary Word Segmentation: Based on Word or Character?</title>
       <author><first>Zhenxing</first><last>Wang</last></author>
-      <author><first>Changning</first><last>Huang</last></author>
+      <author id="changning-huang"><first>Changning</first><last>Huang</last></author>
       <author><first>Jingbo</first><last>Zhu</last></author>
       <url hash="f7fd5248">I08-4009</url>
       <bibkey>wang-etal-2008-performs</bibkey>
@@ -1425,13 +1425,13 @@
       <author><first>Guohua</first><last>Zhang</last></author>
       <author><first>Yuezhong</first><last>Tang</last></author>
       <author><first>Zhanjiang</first><last>Song</last></author>
-      <author><first>Xia</first><last>Wang</last></author>
+      <author id="xia-wang"><first>Xia</first><last>Wang</last></author>
       <url hash="7a5450f8">I08-4012</url>
       <bibkey>li-etal-2008-nokia</bibkey>
     </paper>
     <paper id="13">
       <title><fixed-case>C</fixed-case>hinese Word Segmentation and Named Entity Recognition Based on Conditional Random Fields</title>
-      <author><first>Xinnian</first><last>Mao</last></author>
+      <author id="xinnian-mao"><first>Xinnian</first><last>Mao</last></author>
       <author><first>Yuan</first><last>Dong</last></author>
       <author><first>Saike</first><last>He</last></author>
       <author><first>Sencheng</first><last>Bao</last></author>
@@ -1451,7 +1451,7 @@
     <paper id="15">
       <title>The Character-based <fixed-case>CRF</fixed-case> Segmenter of <fixed-case>MSRA</fixed-case>&amp;<fixed-case>NEU</fixed-case> for the 4th Bakeoff</title>
       <author><first>Zhenxing</first><last>Wang</last></author>
-      <author><first>Changning</first><last>Huang</last></author>
+      <author id="changning-huang"><first>Changning</first><last>Huang</last></author>
       <author><first>Jingbo</first><last>Zhu</last></author>
       <url hash="47408b21">I08-4015</url>
       <bibkey>wang-etal-2008-character</bibkey>
@@ -1469,14 +1469,14 @@
     <paper id="17">
       <title>Unsupervised Segmentation Helps Supervised Learning of Character Tagging for Word Segmentation and Named Entity Recognition</title>
       <author><first>Hai</first><last>Zhao</last></author>
-      <author><first>Chunyu</first><last>Kit</last></author>
+      <author id="chunyu-kit"><first>Chunyu</first><last>Kit</last></author>
       <url hash="5d670007">I08-4017</url>
       <bibkey>zhao-kit-2008-unsupervised</bibkey>
     </paper>
     <paper id="18">
       <title>An Agent-Based Approach to <fixed-case>C</fixed-case>hinese Word Segmentation</title>
-      <author><first>Samuel W.K.</first><last>Chan</last></author>
-      <author><first>Mickey W.C.</first><last>Chong</last></author>
+      <author id="samuel-w-k-chan"><first>Samuel W.K.</first><last>Chan</last></author>
+      <author id="mickey-w-c-chong"><first>Mickey W.C.</first><last>Chong</last></author>
       <url hash="53c8e637">I08-4018</url>
       <bibkey>chan-chong-2008-agent</bibkey>
     </paper>
@@ -1501,7 +1501,7 @@
     <paper id="21">
       <title>A Morpheme-based Part-of-Speech Tagger for <fixed-case>C</fixed-case>hinese</title>
       <author><first>Guohong</first><last>Fu</last></author>
-      <author><first>Jonathan J.</first><last>Webster</last></author>
+      <author id="jonathan-j-webster"><first>Jonathan J.</first><last>Webster</last></author>
       <url hash="5f7e3d1c">I08-4021</url>
       <bibkey>fu-webster-2008-morpheme</bibkey>
     </paper>
@@ -1514,7 +1514,7 @@
     </paper>
     <paper id="23">
       <title><fixed-case>HMM</fixed-case> and <fixed-case>CRF</fixed-case> Based Hybrid Model for <fixed-case>C</fixed-case>hinese Lexical Analysis</title>
-      <author><first>Degen</first><last>Huang</last></author>
+      <author id="degen-huang"><first>Degen</first><last>Huang</last></author>
       <author><first>Xiao</first><last>Sun</last></author>
       <author><first>Shidou</first><last>Jiao</last></author>
       <author><first>Lishuang</first><last>Li</last></author>
@@ -1528,7 +1528,7 @@
       <author><first>Ka Seng</first><last>Leong</last></author>
       <author><first>Fai</first><last>Wong</last></author>
       <author><first>Yiping</first><last>Li</last></author>
-      <author><first>Ming Chui</first><last>Dong</last></author>
+      <author id="ming-chui-dong"><first>Ming Chui</first><last>Dong</last></author>
       <url hash="7bae5b9c">I08-4024</url>
       <bibkey>leong-etal-2008-chinese</bibkey>
     </paper>
@@ -1542,9 +1542,9 @@
     <paper id="26">
       <title>A Study of <fixed-case>C</fixed-case>hinese Lexical Analysis Based on Discriminative Models</title>
       <author><first>Guang-Lu</first><last>Sun</last></author>
-      <author><first>Cheng-Jie</first><last>Sun</last></author>
+      <author id="cheng-jie-sun"><first>Cheng-Jie</first><last>Sun</last></author>
       <author><first>Ke</first><last>Sun</last></author>
-      <author><first>Xiao-Long</first><last>Wang</last></author>
+      <author id="xiao-long-wang"><first>Xiao-Long</first><last>Wang</last></author>
       <url hash="48c8505d">I08-4026</url>
       <bibkey>sun-etal-2008-study</bibkey>
     </paper>
@@ -1557,7 +1557,7 @@
     <paper id="28">
       <title>An Improved <fixed-case>CRF</fixed-case> based <fixed-case>C</fixed-case>hinese Language Processing System for <fixed-case>SIGHAN</fixed-case> Bakeoff 2007</title>
       <author><first>Xihong</first><last>Wu</last></author>
-      <author><first>Xiaojun</first><last>Lin</last></author>
+      <author id="xiaojun-lin"><first>Xiaojun</first><last>Lin</last></author>
       <author><first>Xinhao</first><last>Wang</last></author>
       <author><first>Chunyao</first><last>Wu</last></author>
       <author><first>Yaozhong</first><last>Zhang</last></author>
@@ -1593,7 +1593,7 @@
     <paper id="32">
       <title>A <fixed-case>C</fixed-case>hinese Word Segmentation System Based on Cascade Model</title>
       <author><first>Jianfeng</first><last>Zhang</last></author>
-      <author><first>Jiaheng</first><last>Zheng</last></author>
+      <author id="jiaheng-zheng"><first>Jiaheng</first><last>Zheng</last></author>
       <author><first>Hu</first><last>Zhang</last></author>
       <author><first>Hongye</first><last>Tan</last></author>
       <url hash="d96a278b">I08-4032</url>
@@ -1602,7 +1602,7 @@
     <paper id="33">
       <title><fixed-case>A</fixed-case>chilles: <fixed-case>N</fixed-case>i<fixed-case>CT</fixed-case>/<fixed-case>ATR</fixed-case> <fixed-case>C</fixed-case>hinese Morphological Analyzer for the Fourth Sighan Bakeoff</title>
       <author><first>Ruiqiang</first><last>Zhang</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <url hash="092fac55">I08-4033</url>
       <bibkey>zhang-sumita-2008-achilles</bibkey>
     </paper>
@@ -1625,19 +1625,19 @@
     </frontmatter>
     <paper id="1">
       <title>Invited Talk: Named Entity Recognition: Different Approaches</title>
-      <author><first>Sobha</first><last>L</last></author>
+      <author id="sobha-l"><first>Sobha</first><last>L</last></author>
       <url hash="e7c6a8ce">I08-5001</url>
       <bibkey>l-2008-invited</bibkey>
     </paper>
     <paper id="2">
       <title>Invited Talk: Multilingual Named Entity Recognition</title>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <url hash="944a82b5">I08-5002</url>
       <bibkey>bandyopadhyay-2008-invited</bibkey>
     </paper>
     <paper id="3">
       <title>Named Entity Recognition for South and South <fixed-case>E</fixed-case>ast <fixed-case>A</fixed-case>sian Languages: Taking Stock</title>
-      <author><first>Anil Kumar</first><last>Singh</last></author>
+      <author id="anil-kumar-singh"><first>Anil Kumar</first><last>Singh</last></author>
       <url hash="5796e050">I08-5003</url>
       <bibkey>singh-2008-named</bibkey>
     </paper>
@@ -1645,7 +1645,7 @@
       <title>A Hybrid Named Entity Recognition System for South and South <fixed-case>E</fixed-case>ast <fixed-case>A</fixed-case>sian Languages</title>
       <author><first>Sujan Kumar</first><last>Saha</last></author>
       <author><first>Sanjay</first><last>Chatterji</last></author>
-      <author><first>Sandipan</first><last>Dandapat</last></author>
+      <author id="sandipan-dandapat"><first>Sandipan</first><last>Dandapat</last></author>
       <author><first>Sudeshna</first><last>Sarkar</last></author>
       <author><first>Pabitra</first><last>Mitra</last></author>
       <url hash="d65dee70">I08-5004</url>
@@ -1656,8 +1656,8 @@
       <author><first>Karthik</first><last>Gali</last></author>
       <author><first>Harshit</first><last>Surana</last></author>
       <author><first>Ashwini</first><last>Vaidya</last></author>
-      <author><first>Praneeth</first><last>Shishtla</last></author>
-      <author><first>Dipti Misra</first><last>Sharma</last></author>
+      <author id="praneeth-m-shishtla"><first>Praneeth</first><last>Shishtla</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></author>
       <url hash="6cd47ce8">I08-5005</url>
       <bibkey>gali-etal-2008-aggregating</bibkey>
     </paper>
@@ -1667,7 +1667,7 @@
       <author><first>Rejwanul</first><last>Haque</last></author>
       <author><first>Amitava</first><last>Das</last></author>
       <author><first>Venkateswarlu</first><last>Poka</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <url hash="577179af">I08-5006</url>
       <bibkey>ekbal-etal-2008-language</bibkey>
     </paper>
@@ -1681,20 +1681,20 @@
     <paper id="8">
       <title><fixed-case>B</fixed-case>engali Named Entity Recognition Using Support Vector Machine</title>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <url hash="3e28fc32">I08-5008</url>
       <bibkey>ekbal-bandyopadhyay-2008-bengali</bibkey>
     </paper>
     <paper id="9">
       <title>Domain Focused Named Entity Recognizer for <fixed-case>T</fixed-case>amil Using Conditional Random Fields</title>
       <author><first>Vijayakrishna</first><last>R</last></author>
-      <author><first>Sobha</first><last>L</last></author>
+      <author id="sobha-l"><first>Sobha</first><last>L</last></author>
       <url hash="4c0537c3">I08-5009</url>
       <bibkey>r-l-2008-domain</bibkey>
     </paper>
     <paper id="10">
       <title>A Character n-gram Based Approach for Improved Recall in <fixed-case>I</fixed-case>ndian Language <fixed-case>NER</fixed-case></title>
-      <author><first>Praneeth M</first><last>Shishtla</last></author>
+      <author id="praneeth-m-shishtla"><first>Praneeth M</first><last>Shishtla</last></author>
       <author><first>Prasad</first><last>Pingali</last></author>
       <author><first>Vasudeva</first><last>Varma</last></author>
       <url hash="aa3cf30f">I08-5010</url>
@@ -1702,14 +1702,14 @@
     </paper>
     <paper id="11">
       <title>An Experiment on Automatic Detection of Named Entities in <fixed-case>B</fixed-case>angla</title>
-      <author><first>Bidyut Baran</first><last>Chaudhuri</last></author>
+      <author id="bidyut-baran-chaudhuri"><first>Bidyut Baran</first><last>Chaudhuri</last></author>
       <author><first>Suvankar</first><last>Bhattacharya</last></author>
       <url hash="ec5bdb75">I08-5011</url>
       <bibkey>chaudhuri-bhattacharya-2008-experiment</bibkey>
     </paper>
     <paper id="12">
       <title>Hybrid Named Entity Recognition System for South and South <fixed-case>E</fixed-case>ast <fixed-case>A</fixed-case>sian Languages</title>
-      <author><first>Praveen</first><last>P</last></author>
+      <author id="praveen-paritosh"><first>Praveen</first><last>P</last></author>
       <author><first>Ravi Kiran</first><last>V</last></author>
       <url hash="c8d0370b">I08-5012</url>
       <bibkey>p-v-2008-hybrid</bibkey>
@@ -1732,7 +1732,7 @@
     </paper>
     <paper id="15">
       <title>Experiments in <fixed-case>T</fixed-case>elugu <fixed-case>NER</fixed-case>: A Conditional Random Field Approach</title>
-      <author><first>Praneeth M</first><last>Shishtla</last></author>
+      <author id="praneeth-m-shishtla"><first>Praneeth M</first><last>Shishtla</last></author>
       <author><first>Karthik</first><last>Gali</last></author>
       <author><first>Prasad</first><last>Pingali</last></author>
       <author><first>Vasudeva</first><last>Varma</last></author>
@@ -1753,7 +1753,7 @@
     </frontmatter>
     <paper id="1">
       <title>The Effects of Language Relatedness on Multilingual Information Retrieval: A Case Study With <fixed-case>I</fixed-case>ndo-<fixed-case>E</fixed-case>uropean and <fixed-case>S</fixed-case>emitic Languages</title>
-      <author><first>Peter</first><last>Chew</last></author>
+      <author id="peter-a-chew"><first>Peter</first><last>Chew</last></author>
       <author><first>Ahmed</first><last>Abdelali</last></author>
       <url hash="c3b41928">I08-6001</url>
       <bibkey>chew-abdelali-2008-effects</bibkey>
@@ -1761,7 +1761,7 @@
     <paper id="2">
       <title>Identifying Similar and Co-referring Documents Across Languages</title>
       <author><first>Pattabhi</first><last>R K Rao T</last></author>
-      <author><first>Sobha</first><last>L</last></author>
+      <author id="sobha-l"><first>Sobha</first><last>L</last></author>
       <url hash="4be7e600">I08-6002</url>
       <bibkey>r-k-rao-t-l-2008-identifying</bibkey>
     </paper>
@@ -1774,8 +1774,8 @@
     </paper>
     <paper id="4">
       <title>Some Experiments in Mining Named Entity Transliteration Pairs from Comparable Corpora</title>
-      <author><first>K</first><last>Saravanan</last></author>
-      <author><first>A</first><last>Kumaran</last></author>
+      <author id="k-saravanan"><first>K</first><last>Saravanan</last></author>
+      <author id="a-kumaran"><first>A</first><last>Kumaran</last></author>
       <url hash="8fae82d0">I08-6004</url>
       <bibkey>saravanan-kumaran-2008-experiments</bibkey>
     </paper>
@@ -1784,14 +1784,14 @@
       <author><first>Gareth</first><last>Jones</last></author>
       <author><first>Fabio</first><last>Fantino</last></author>
       <author><first>Eamonn</first><last>Newman</last></author>
-      <author><first>Ying</first><last>Zhang</last></author>
+      <author id="ying-zhang"><first>Ying</first><last>Zhang</last></author>
       <url hash="0e3c712e">I08-6005</url>
       <bibkey>jones-etal-2008-domain</bibkey>
     </paper>
     <paper id="6">
       <title>Statistical Transliteration for Cross Language Information Retrieval using <fixed-case>HMM</fixed-case> alignment model and <fixed-case>CRF</fixed-case></title>
       <author><first>Prasad</first><last>Pingali</last></author>
-      <author><first>Surya</first><last>Ganesh</last></author>
+      <author id="surya-ganesh"><first>Surya</first><last>Ganesh</last></author>
       <author><first>Sreeharsha</first><last>Yella</last></author>
       <author><first>Vasudeva</first><last>Varma</last></author>
       <url hash="da8b6d47">I08-6006</url>
@@ -1808,23 +1808,23 @@
     <paper id="8">
       <title>A Document Graph Based Query Focused Multi-Document Summarizer</title>
       <author><first>Sibabrata</first><last>Paladhi</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <url hash="007a55d0">I08-6008</url>
       <bibkey>paladhi-bandyopadhyay-2008-document</bibkey>
     </paper>
     <paper id="9">
       <title><fixed-case>H</fixed-case>indi and <fixed-case>M</fixed-case>arathi to <fixed-case>E</fixed-case>nglish Cross Language Information Retrieval</title>
-      <author><first>Manoj Kumar</first><last>Chinnakotla</last></author>
+      <author id="manoj-chinnakotla"><first>Manoj Kumar</first><last>Chinnakotla</last></author>
       <author><first>Sagar</first><last>Ranadive</last></author>
-      <author><first>Om P.</first><last>Damani</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="om-p-damani"><first>Om P.</first><last>Damani</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <url hash="bfa2cce8">I08-6009</url>
       <bibkey>chinnakotla-etal-2008-hindi</bibkey>
     </paper>
     <paper id="10">
       <title><fixed-case>B</fixed-case>engali and <fixed-case>H</fixed-case>indi to <fixed-case>E</fixed-case>nglish <fixed-case>CLIR</fixed-case> Evaluation</title>
       <author><first>Debasis</first><last>Mandal</last></author>
-      <author><first>Sandipan</first><last>Dandapat</last></author>
+      <author id="sandipan-dandapat"><first>Sandipan</first><last>Dandapat</last></author>
       <author><first>Mayank</first><last>Gupta</last></author>
       <author><first>Pratyush</first><last>Banerjee</last></author>
       <author><first>Sudeshna</first><last>Sarkar</last></author>
@@ -1833,9 +1833,9 @@
     </paper>
     <paper id="11">
       <title><fixed-case>B</fixed-case>engali, <fixed-case>H</fixed-case>indi and <fixed-case>T</fixed-case>elugu to <fixed-case>E</fixed-case>nglish Ad-hoc Bilingual Task</title>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <author><first>Tapabrata</first><last>Mondal</last></author>
-      <author><first>Sudip Kumar</first><last>Naskar</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
       <author><first>Rejwanul</first><last>Haque</last></author>
       <author><first>Srinivasa Rao</first><last>Godavarthy</last></author>
@@ -1845,7 +1845,7 @@
     <paper id="12">
       <title>Cross-Lingual Information Retrieval System for <fixed-case>I</fixed-case>ndian Languages</title>
       <author><first>Jagadeesh</first><last>Jagarlamudi</last></author>
-      <author><first>A</first><last>Kumaran</last></author>
+      <author id="a-kumaran"><first>A</first><last>Kumaran</last></author>
       <url hash="cf0a5878">I08-6012</url>
       <bibkey>jagarlamudi-kumaran-2008-cross</bibkey>
     </paper>
@@ -1883,7 +1883,7 @@
     <paper id="1">
       <title>Development of <fixed-case>B</fixed-case>engali Named Entity Tagged Corpus and its Use in <fixed-case>NER</fixed-case> Systems</title>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <url hash="b2fd174d">I08-7001</url>
       <bibkey>ekbal-bandyopadhyay-2008-development</bibkey>
     </paper>
@@ -1898,7 +1898,7 @@
     <paper id="3">
       <title>Preliminary <fixed-case>C</fixed-case>hinese Term Classification for Ontology Construction</title>
       <author><first>Gaoying</first><last>Cui</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <author><first>Wenjie</first><last>Li</last></author>
       <url hash="4a342286">I08-7003</url>
       <bibkey>cui-etal-2008-preliminary</bibkey>
@@ -1947,7 +1947,7 @@
     <paper id="9">
       <title>A Discourse Resource for <fixed-case>T</fixed-case>urkish: Annotating Discourse Connectives in the <fixed-case>METU</fixed-case> Corpus</title>
       <author><first>Deniz</first><last>Zeyrek</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <url hash="09365693">I08-7009</url>
       <bibkey>zeyrek-webber-2008-discourse</bibkey>
     </paper>
@@ -1955,8 +1955,8 @@
       <title>Towards an Annotated Corpus of Discourse Relations in <fixed-case>H</fixed-case>indi</title>
       <author><first>Rashmi</first><last>Prasad</last></author>
       <author><first>Samar</first><last>Husain</last></author>
-      <author><first>Dipti</first><last>Sharma</last></author>
-      <author><first>Aravind</first><last>Joshi</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti</first><last>Sharma</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
       <url hash="28a3bcbb">I08-7010</url>
       <bibkey>prasad-etal-2008-towards</bibkey>
     </paper>
@@ -1978,21 +1978,21 @@
     </paper>
     <paper id="13">
       <title>Designing a Common <fixed-case>POS</fixed-case>-Tagset Framework for <fixed-case>I</fixed-case>ndian Languages</title>
-      <author><first>Sankaran</first><last>Baskaran</last></author>
+      <author id="baskaran-sankaran"><first>Sankaran</first><last>Baskaran</last></author>
       <author><first>Kalika</first><last>Bali</last></author>
       <author><first>Tanmoy</first><last>Bhattacharya</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
-      <author><first>Girish Nath</first><last>Jha</last></author>
-      <author><first>Rajendran</first><last>S</last></author>
-      <author><first>Saravanan</first><last>K</last></author>
-      <author><first>Sobha</first><last>L</last></author>
-      <author><first>Subbarao K</first><last>V.</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="girish-nath-jha"><first>Girish Nath</first><last>Jha</last></author>
+      <author id="s-rajendran"><first>Rajendran</first><last>S</last></author>
+      <author id="k-saravanan"><first>Saravanan</first><last>K</last></author>
+      <author id="sobha-l"><first>Sobha</first><last>L</last></author>
+      <author id="subbarao-k-v"><first>Subbarao K</first><last>V.</last></author>
       <url hash="01c79860">I08-7013</url>
       <bibkey>baskaran-etal-2008-designing</bibkey>
     </paper>
     <paper id="14">
       <title>Resources Report on Languages of <fixed-case>I</fixed-case>ndonesia</title>
-      <author><first>Hammam</first><last>Riza</last></author>
+      <author id="hammam-riza"><first>Hammam</first><last>Riza</last></author>
       <url hash="82e840a5">I08-7014</url>
       <bibkey>riza-2008-resources</bibkey>
     </paper>
@@ -2007,7 +2007,7 @@
     <paper id="16">
       <title>Corpus building for <fixed-case>M</fixed-case>ongolian language</title>
       <author><first>Purev</first><last>Jaimai</last></author>
-      <author><first>Odbayar</first><last>Chimeddorj</last></author>
+      <author id="odbayar-chimeddorj"><first>Odbayar</first><last>Chimeddorj</last></author>
       <url hash="c24bc700">I08-7016</url>
       <bibkey>jaimai-chimeddorj-2008-corpus</bibkey>
     </paper>
@@ -2019,7 +2019,7 @@
     </paper>
     <paper id="18">
       <title><fixed-case>B</fixed-case>alanced <fixed-case>C</fixed-case>orpus of <fixed-case>C</fixed-case>ontemporary <fixed-case>W</fixed-case>ritten <fixed-case>J</fixed-case>apanese</title>
-      <author><first>Kikuo</first><last>Maekawa</last></author>
+      <author id="kikuo-maekawa"><first>Kikuo</first><last>Maekawa</last></author>
       <url hash="3707c849">I08-7018</url>
       <bibkey>maekawa-2008-balanced</bibkey>
     </paper>
@@ -2043,8 +2043,8 @@
     </paper>
     <paper id="21">
       <title><fixed-case>J</fixed-case>apanese Effort Toward Sharing Text and Speech Corpora</title>
-      <author><first>Shuichi</first><last>Itahashi</last></author>
-      <author><first>Koiti</first><last>Hasida</last></author>
+      <author id="shuichi-itahashi"><first>Shuichi</first><last>Itahashi</last></author>
+      <author id="koiti-hasida"><first>Koiti</first><last>Hasida</last></author>
       <url hash="d71e24ab">I08-7021</url>
       <bibkey>itahashi-hasida-2008-japanese</bibkey>
     </paper>
@@ -2069,7 +2069,7 @@
       <title>Transformation-based Sentence Splitting method for Statistical Machine Translation</title>
       <author><first>Jonghoon</first><last>Lee</last></author>
       <author><first>Donghyeon</first><last>Lee</last></author>
-      <author><first>Gary Geunbae</first><last>Lee</last></author>
+      <author id="gary-geunbae-lee"><first>Gary Geunbae</first><last>Lee</last></author>
       <url hash="0eb4bfab">I08-8001</url>
       <bibkey>lee-etal-2008-transformation</bibkey>
     </paper>
@@ -2084,7 +2084,7 @@
     <paper id="3">
       <title>Phrase-based Machine Transliteration</title>
       <author><first>Andrew</first><last>Finch</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <url hash="ff59ae1b">I08-8003</url>
       <bibkey>finch-sumita-2008-phrase</bibkey>
     </paper>
@@ -2092,7 +2092,7 @@
       <title>Development of <fixed-case>I</fixed-case>ndonesian Large Vocabulary Continuous Speech Recognition System within A-<fixed-case>STAR</fixed-case> Project</title>
       <author><first>Sakriani</first><last>Sakti</last></author>
       <author><first>Eka</first><last>Kelana</last></author>
-      <author><first>Hammam</first><last>Riza</last></author>
+      <author id="hammam-riza"><first>Hammam</first><last>Riza</last></author>
       <author><first>Shinsuke</first><last>Sakai</last></author>
       <author><first>Konstantin</first><last>Markov</last></author>
       <author><first>Satoshi</first><last>Nakamura</last></author>
@@ -2101,7 +2101,7 @@
     </paper>
     <paper id="5">
       <title>Using Confidence Vector in Multi-Stage Speech Recognition</title>
-      <author><first>Hyungbae</first><last>Jeon</last></author>
+      <author id="hyung-bae-jeon"><first>Hyungbae</first><last>Jeon</last></author>
       <author><first>Kyuwoong</first><last>Hwang</last></author>
       <author><first>Hoon</first><last>Chung</last></author>
       <author><first>Seunghi</first><last>Kim</last></author>
@@ -2112,7 +2112,7 @@
     </paper>
     <paper id="6">
       <title>Toward <fixed-case>A</fixed-case>sian Speech Translation System: Developing Speech Recognition and Machine Translation for <fixed-case>I</fixed-case>ndonesian Language</title>
-      <author><first>Hammam</first><last>Riza</last></author>
+      <author id="hammam-riza"><first>Hammam</first><last>Riza</last></author>
       <author><first>Oskar</first><last>Riandi</last></author>
       <url hash="4380ed1f">I08-8006</url>
       <bibkey>riza-riandi-2008-toward</bibkey>
diff --git a/data/xml/I11.xml b/data/xml/I11.xml
index f1eab7e60e..dd0f79ad4b 100644
--- a/data/xml/I11.xml
+++ b/data/xml/I11.xml
@@ -19,23 +19,23 @@
     <paper id="1">
       <title>Analyzing the Dynamics of Research by Extracting Key Aspects of Scientific Papers</title>
       <author><first>Sonal</first><last>Gupta</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <pages>1–9</pages>
       <url hash="46218337">I11-1001</url>
       <bibkey>gupta-manning-2011-analyzing</bibkey>
     </paper>
     <paper id="2">
       <title>Dependency-directed Tree Kernel-based Protein-Protein Interaction Extraction from Biomedical Literature</title>
-      <author><first>Longhua</first><last>Qian</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="longhua-qian"><first>Longhua</first><last>Qian</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>10–19</pages>
       <url hash="b9ca64ea">I11-1002</url>
       <bibkey>qian-zhou-2011-dependency</bibkey>
     </paper>
     <paper id="3">
       <title>Learning Logical Structures of Paragraphs in Legal Articles</title>
-      <author><first>Ngo Xuan</first><last>Bach</last></author>
-      <author><first>Nguyen Le</first><last>Minh</last></author>
+      <author id="ngo-xuan-bach"><first>Ngo Xuan</first><last>Bach</last></author>
+      <author id="minh-le-nguyen"><first>Nguyen Le</first><last>Minh</last></author>
       <author><first>Tran Thi</first><last>Oanh</last></author>
       <author><first>Akira</first><last>Shimazu</last></author>
       <pages>20–28</pages>
@@ -56,7 +56,7 @@
     <paper id="5">
       <title>Context-Sensitive Syntactic Source-Reordering by Statistical Transduction</title>
       <author><first>Maxim</first><last>Khalilov</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <pages>38–46</pages>
       <url hash="e6d0dd90">I11-1005</url>
       <bibkey>khalilov-simaan-2011-context</bibkey>
@@ -64,10 +64,10 @@
     <paper id="6">
       <title>Discriminative Phrase-based Lexicalized Reordering Models using Weighted Reordering Graphs</title>
       <author><first>Wang</first><last>Ling</last></author>
-      <author><first>João</first><last>Graça</last></author>
-      <author><first>David</first><last>Martins de Matos</last></author>
+      <author id="joao-graca"><first>João</first><last>Graça</last></author>
+      <author id="david-martins-de-matos"><first>David</first><last>Martins de Matos</last></author>
       <author><first>Isabel</first><last>Trancoso</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <pages>47–55</pages>
       <url hash="16404041">I11-1006</url>
       <bibkey>ling-etal-2011-discriminative</bibkey>
@@ -75,7 +75,7 @@
     <paper id="7">
       <title>Active Learning Strategies for Support Vector Machines, Application to Temporal Relation Classification</title>
       <author><first>Seyed Abolghasem</first><last>Mirroshandel</last></author>
-      <author><first>Gholamreza</first><last>Ghassem-Sani</last></author>
+      <author id="gholamreza-ghassem-sani"><first>Gholamreza</first><last>Ghassem-Sani</last></author>
       <author><first>Alexis</first><last>Nasr</last></author>
       <pages>56–64</pages>
       <url hash="8ceac3e4">I11-1007</url>
@@ -85,7 +85,7 @@
       <title>A Fast Accurate Two-stage Training Algorithm for <fixed-case>L</fixed-case>1-regularized <fixed-case>CRF</fixed-case>s with Heuristic Line Search Strategy</title>
       <author><first>Jinlong</first><last>Zhou</last></author>
       <author><first>Xipeng</first><last>Qiu</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>65–74</pages>
       <url hash="49dc236b">I11-1008</url>
       <bibkey>zhou-etal-2011-fast</bibkey>
@@ -112,7 +112,7 @@
       <author><first>Sriparna</first><last>Saha</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
       <author><first>Olga</first><last>Uryupina</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>93–101</pages>
       <url hash="3001c2a3">I11-1011</url>
       <bibkey>saha-etal-2011-single</bibkey>
@@ -122,7 +122,7 @@
       <author><first>Bin</first><last>Chen</last></author>
       <author><first>Jian</first><last>Su</last></author>
       <author><first>Sinno Jialin</first><last>Pan</last></author>
-      <author><first>Chew Lim</first><last>Tan</last></author>
+      <author id="chew-lim-tan"><first>Chew Lim</first><last>Tan</last></author>
       <pages>102–110</pages>
       <url hash="bdd71757">I11-1012</url>
       <bibkey>chen-etal-2011-unified</bibkey>
@@ -150,7 +150,7 @@
       <author><first>Hassan</first><last>Sajjad</last></author>
       <author><first>Nadir</first><last>Durrani</last></author>
       <author><first>Helmut</first><last>Schmid</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>129–137</pages>
       <url hash="2200c2f9">I11-1015</url>
       <bibkey>sajjad-etal-2011-comparing</bibkey>
@@ -158,7 +158,7 @@
     <paper id="16">
       <title>A Semantic-Specific Model for <fixed-case>C</fixed-case>hinese Named Entity Translation</title>
       <author><first>Yufeng</first><last>Chen</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>138–146</pages>
       <url hash="cf8dc1c1">I11-1016</url>
       <bibkey>chen-zong-2011-semantic</bibkey>
@@ -168,7 +168,7 @@
       <author><first>Tomoya</first><last>Mizumoto</last></author>
       <author><first>Mamoru</first><last>Komachi</last></author>
       <author><first>Masaaki</first><last>Nagata</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>147–155</pages>
       <url hash="91b1ea7b">I11-1017</url>
       <bibkey>mizumoto-etal-2011-mining</bibkey>
@@ -178,7 +178,7 @@
       <author><first>Thamar</first><last>Solorio</last></author>
       <author><first>Sangita</first><last>Pillay</last></author>
       <author><first>Sindhu</first><last>Raghavan</last></author>
-      <author><first>Manuel</first><last>Montes y Gómez</last></author>
+      <author id="manuel-montes"><first>Manuel</first><last>Montes y Gómez</last></author>
       <pages>156–164</pages>
       <url hash="2d6baea3">I11-1018</url>
       <bibkey>solorio-etal-2011-modality</bibkey>
@@ -187,7 +187,7 @@
       <title>Keyphrase Extraction from Online News Using Binary Integer Programming</title>
       <author><first>Zhuoye</first><last>Ding</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>165–173</pages>
       <url hash="ace2cf0f">I11-1019</url>
       <bibkey>ding-etal-2011-keyphrase</bibkey>
@@ -196,8 +196,8 @@
       <title>Improving Related Entity Finding via Incorporating Homepages and Recognizing Fine-grained Entities</title>
       <author><first>Youzheng</first><last>Wu</last></author>
       <author><first>Chiori</first><last>Hori</last></author>
-      <author><first>Hisashi</first><last>Kawai</last></author>
-      <author><first>Hideki</first><last>Kashioka</last></author>
+      <author id="hisashi-kawai"><first>Hisashi</first><last>Kawai</last></author>
+      <author id="hideki-kashioka"><first>Hideki</first><last>Kashioka</last></author>
       <pages>174–182</pages>
       <url hash="e7c96296">I11-1020</url>
       <bibkey>wu-etal-2011-improving</bibkey>
@@ -213,10 +213,10 @@
     </paper>
     <paper id="22">
       <title>Semantic Role Labeling Without Treebanks?</title>
-      <author><first>Stephen</first><last>Boxwell</last></author>
+      <author id="stephen-boxwell"><first>Stephen</first><last>Boxwell</last></author>
       <author><first>Chris</first><last>Brew</last></author>
       <author><first>Jason</first><last>Baldridge</last></author>
-      <author><first>Dennis</first><last>Mehay</last></author>
+      <author id="dennis-mehay"><first>Dennis</first><last>Mehay</last></author>
       <author><first>Sujith</first><last>Ravi</last></author>
       <pages>192–200</pages>
       <url hash="4250f071">I11-1022</url>
@@ -226,7 +226,7 @@
       <title><fixed-case>J</fixed-case>apanese Predicate Argument Structure Analysis Exploiting Argument Position and Type</title>
       <author><first>Yuta</first><last>Hayashibe</last></author>
       <author><first>Mamoru</first><last>Komachi</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>201–209</pages>
       <url hash="5af7178d">I11-1023</url>
       <bibkey>hayashibe-etal-2011-japanese</bibkey>
@@ -234,7 +234,7 @@
     <paper id="24">
       <title>An Empirical Study on Compositionality in Compound Nouns</title>
       <author><first>Siva</first><last>Reddy</last></author>
-      <author><first>Diana</first><last>McCarthy</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
       <author><first>Suresh</first><last>Manandhar</last></author>
       <pages>210–218</pages>
       <url hash="da238c30">I11-1024</url>
@@ -243,7 +243,7 @@
     <paper id="25">
       <title>Feature-Rich Log-Linear Lexical Model for Latent Variable <fixed-case>PCFG</fixed-case> Grammars</title>
       <author><first>Zhongqiang</first><last>Huang</last></author>
-      <author><first>Mary</first><last>Harper</last></author>
+      <author id="mary-harper"><first>Mary</first><last>Harper</last></author>
       <pages>219–227</pages>
       <url hash="5b6631c4">I11-1025</url>
       <bibkey>huang-harper-2011-feature</bibkey>
@@ -267,11 +267,11 @@
     </paper>
     <paper id="28">
       <title><fixed-case>T</fixed-case>reeblazing: Using External Treebanks to Filter Parse Forests for Parse Selection and Treebanking</title>
-      <author><first>Andrew</first><last>MacKinlay</last></author>
+      <author id="andrew-mackinlay"><first>Andrew</first><last>MacKinlay</last></author>
       <author><first>Rebecca</first><last>Dridan</last></author>
-      <author><first>Dan</first><last>Flickinger</last></author>
+      <author id="dan-flickinger"><first>Dan</first><last>Flickinger</last></author>
       <author><first>Stephan</first><last>Oepen</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>246–254</pages>
       <url hash="bf024f5e">I11-1028</url>
       <bibkey>mackinlay-etal-2011-treeblazing</bibkey>
@@ -281,7 +281,7 @@
       <author><first>Paul</first><last>McNamee</last></author>
       <author><first>James</first><last>Mayfield</last></author>
       <author><first>Dawn</first><last>Lawrie</last></author>
-      <author><first>Douglas</first><last>Oard</last></author>
+      <author id="douglas-w-oard"><first>Douglas</first><last>Oard</last></author>
       <author><first>David</first><last>Doermann</last></author>
       <pages>255–263</pages>
       <url hash="a63bac20">I11-1029</url>
@@ -310,7 +310,7 @@
       <title>Identifying Event Descriptions using Co-training with Online News Summaries</title>
       <author><first>William Yang</first><last>Wang</last></author>
       <author><first>Kapil</first><last>Thadani</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>282–291</pages>
       <url hash="6edd00a9">I11-1032</url>
       <bibkey>wang-etal-2011-identifying</bibkey>
@@ -320,7 +320,7 @@
       <author><first>Teruaki</first><last>Oka</last></author>
       <author><first>Mamoru</first><last>Komachi</last></author>
       <author><first>Toshinobu</first><last>Ogiso</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>292–300</pages>
       <url hash="a2d95486">I11-1033</url>
       <bibkey>oka-etal-2011-automatic</bibkey>
@@ -336,7 +336,7 @@
     <paper id="35">
       <title>Improving <fixed-case>C</fixed-case>hinese Word Segmentation and <fixed-case>POS</fixed-case> Tagging with Semi-supervised Methods Using Large Auto-Analyzed Data</title>
       <author><first>Yiou</first><last>Wang</last></author>
-      <author><first>Jun’ichi</first><last>Kazama</last></author>
+      <author id="junichi-kazama"><first>Jun’ichi</first><last>Kazama</last></author>
       <author><first>Yoshimasa</first><last>Tsuruoka</last></author>
       <author><first>Wenliang</first><last>Chen</last></author>
       <author><first>Yujie</first><last>Zhang</last></author>
@@ -348,7 +348,7 @@
     <paper id="36">
       <title><fixed-case>CODACT</fixed-case>: Towards Identifying Orthographic Variants in Dialectal <fixed-case>A</fixed-case>rabic</title>
       <author><first>Pradeep</first><last>Dasigi</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>318–326</pages>
       <url hash="76e5a66f">I11-1036</url>
       <bibkey>dasigi-diab-2011-codact</bibkey>
@@ -363,7 +363,7 @@
     </paper>
     <paper id="38">
       <title>Fine-Grained Sentiment Analysis with Structural Features</title>
-      <author><first>Cäcilia</first><last>Zirn</last></author>
+      <author id="cacilia-zirn"><first>Cäcilia</first><last>Zirn</last></author>
       <author><first>Mathias</first><last>Niepert</last></author>
       <author><first>Heiner</first><last>Stuckenschmidt</last></author>
       <author><first>Michael</first><last>Strube</last></author>
@@ -374,7 +374,7 @@
     <paper id="39">
       <title>Predicting Opinion Dependency Relations for Opinion Analysis</title>
       <author><first>Lun-Wei</first><last>Ku</last></author>
-      <author><first>Ting-Hao</first><last>Huang</last></author>
+      <author id="ting-hao-huang"><first>Ting-Hao</first><last>Huang</last></author>
       <author><first>Hsin-Hsi</first><last>Chen</last></author>
       <pages>345–353</pages>
       <url hash="d32455e6">I11-1039</url>
@@ -383,14 +383,14 @@
     <paper id="40">
       <title>Detecting and Blocking False Sentiment Propagation</title>
       <author><first>Hye-Jin</first><last>Min</last></author>
-      <author><first>Jong C.</first><last>Park</last></author>
+      <author id="jong-c-park"><first>Jong C.</first><last>Park</last></author>
       <pages>354–362</pages>
       <url hash="6aaa48b9">I11-1040</url>
       <bibkey>min-park-2011-detecting</bibkey>
     </paper>
     <paper id="41">
       <title>Efficient induction of probabilistic word classes with <fixed-case>LDA</fixed-case></title>
-      <author><first>Grzegorz</first><last>Chrupala</last></author>
+      <author id="grzegorz-chrupala"><first>Grzegorz</first><last>Chrupala</last></author>
       <pages>363–372</pages>
       <url hash="4b20ba05">I11-1041</url>
       <bibkey>chrupala-2011-efficient</bibkey>
@@ -406,7 +406,7 @@
     </paper>
     <paper id="43">
       <title>Labeling Unlabeled Data using Cross-Language Guided Clustering</title>
-      <author><first>Sachindra</first><last>Joshi</last></author>
+      <author id="sachindra-joshi"><first>Sachindra</first><last>Joshi</last></author>
       <author><first>Danish</first><last>Contractor</last></author>
       <author><first>Sumit</first><last>Negi</last></author>
       <pages>383–391</pages>
@@ -418,14 +418,14 @@
       <author><first>Yaliang</first><last>Li</last></author>
       <author><first>Jing</first><last>Jiang</last></author>
       <author><first>Hai Leong</first><last>Chieu</last></author>
-      <author><first>Kian Ming A.</first><last>Chai</last></author>
+      <author id="kian-ming-a-chai"><first>Kian Ming A.</first><last>Chai</last></author>
       <pages>392–400</pages>
       <url hash="c5df1839">I11-1044</url>
       <bibkey>li-etal-2011-extracting</bibkey>
     </paper>
     <paper id="45">
       <title>Attribute Extraction from Synthetic Web Search Queries</title>
-      <author><first>Marius</first><last>Paşca</last></author>
+      <author id="marius-pasca"><first>Marius</first><last>Paşca</last></author>
       <pages>401–409</pages>
       <url hash="1641e351">I11-1045</url>
       <bibkey>pasca-2011-attribute</bibkey>
@@ -456,7 +456,7 @@
     <paper id="48">
       <title>Crawling Back and Forth: Using Back and Out Links to Locate Bilingual Sites</title>
       <author><first>Luciano</first><last>Barbosa</last></author>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
       <author><first>Vivek Kumar</first><last>Rangarajan Sridhar</last></author>
       <pages>429–437</pages>
       <url hash="f67a19dc">I11-1048</url>
@@ -465,15 +465,15 @@
     <paper id="49">
       <title>Grammar Induction from Text Using Small Syntactic Prototypes</title>
       <author><first>Prachya</first><last>Boonkwan</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>438–446</pages>
       <url hash="9e802681">I11-1049</url>
       <bibkey>boonkwan-steedman-2011-grammar</bibkey>
     </paper>
     <paper id="50">
       <title>Transferring Syntactic Relations from <fixed-case>E</fixed-case>nglish to <fixed-case>H</fixed-case>indi Using Alignments on Local Word Groups</title>
-      <author><first>Aswarth</first><last>Dara</last></author>
-      <author><first>Prashanth</first><last>Mannem</last></author>
+      <author id="aswarth-abhilash-dara"><first>Aswarth</first><last>Dara</last></author>
+      <author id="prashanth-mannem"><first>Prashanth</first><last>Mannem</last></author>
       <author><first>Hemanth Sagar</first><last>Bayyarapu</last></author>
       <author><first>Avinesh</first><last>PVS</last></author>
       <pages>447–455</pages>
@@ -500,8 +500,8 @@
       <title><fixed-case>T</fixed-case>ri<fixed-case>S</fixed-case>: A Statistical Sentence Simplifier with Log-linear Models and Margin-based Discriminative Training</title>
       <author><first>Nguyen</first><last>Bach</last></author>
       <author><first>Qin</first><last>Gao</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>474–482</pages>
       <url hash="d518fe32">I11-1053</url>
       <bibkey>bach-etal-2011-tris</bibkey>
@@ -511,7 +511,7 @@
       <author><first>Po</first><last>Hu</last></author>
       <author><first>Cheng</first><last>Sun</last></author>
       <author><first>Longfei</first><last>Wu</last></author>
-      <author><first>Donghong</first><last>Ji</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
       <author><first>Chong</first><last>Teng</last></author>
       <pages>483–490</pages>
       <url hash="35810f4b">I11-1054</url>
@@ -549,9 +549,9 @@
     <paper id="58">
       <title>Structured and Extended Named Entity Evaluation in Automatic Speech Transcriptions</title>
       <author><first>Olivier</first><last>Galibert</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <author><first>Cyril</first><last>Grouin</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <author><first>Ludovic</first><last>Quintard</last></author>
       <pages>518–526</pages>
       <url hash="4072bbfb">I11-1058</url>
@@ -567,13 +567,13 @@
     </paper>
     <paper id="60">
       <title>Similarity Based Language Model Construction for Voice Activated Open-Domain Question Answering</title>
-      <author><first>István</first><last>Varga</last></author>
+      <author id="istvan-varga"><first>István</first><last>Varga</last></author>
       <author><first>Kiyonori</first><last>Ohtake</last></author>
       <author><first>Kentaro</first><last>Torisawa</last></author>
       <author><first>Stijn</first><last>De Saeger</last></author>
       <author><first>Teruhisa</first><last>Misu</last></author>
       <author><first>Shigeki</first><last>Matsuda</last></author>
-      <author><first>Jun’ichi</first><last>Kazama</last></author>
+      <author id="junichi-kazama"><first>Jun’ichi</first><last>Kazama</last></author>
       <pages>536–544</pages>
       <url hash="dc2f46f0">I11-1060</url>
       <bibkey>varga-etal-2011-similarity</bibkey>
@@ -589,7 +589,7 @@
     <paper id="62">
       <title>Cross-domain Feature Selection for Language Identification</title>
       <author><first>Marco</first><last>Lui</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>553–561</pages>
       <url hash="5e1a3d5f">I11-1062</url>
       <bibkey>lui-baldwin-2011-cross</bibkey>
@@ -598,7 +598,7 @@
       <title>A <fixed-case>W</fixed-case>ikipedia-<fixed-case>LDA</fixed-case> Model for Entity Linking with Batch Size Changing Instance Selection</title>
       <author><first>Wei</first><last>Zhang</last></author>
       <author><first>Jian</first><last>Su</last></author>
-      <author><first>Chew Lim</first><last>Tan</last></author>
+      <author id="chew-lim-tan"><first>Chew Lim</first><last>Tan</last></author>
       <pages>562–570</pages>
       <url hash="f2cfeb61">I11-1063</url>
       <bibkey>zhang-etal-2011-wikipedia</bibkey>
@@ -617,7 +617,7 @@
       <author><first>Yunqing</first><last>Xia</last></author>
       <author><first>Min</first><last>Zhang</last></author>
       <author><first>Haizhou</first><last>Li</last></author>
-      <author><first>Fang</first><last>Zheng</last></author>
+      <author id="fang-zheng"><first>Fang</first><last>Zheng</last></author>
       <pages>580–588</pages>
       <url hash="00a82458">I11-1065</url>
       <bibkey>tang-etal-2011-clgvsm</bibkey>
@@ -627,7 +627,7 @@
       <author><first>Jianfeng</first><last>Zhang</last></author>
       <author><first>Yunqing</first><last>Xia</last></author>
       <author><first>Bin</first><last>Ma</last></author>
-      <author><first>Jianmin</first><last>Yao</last></author>
+      <author id="jianmin-yao"><first>Jianmin</first><last>Yao</last></author>
       <author><first>Yu</first><last>Hong</last></author>
       <pages>589–597</pages>
       <url hash="2a560cd0">I11-1066</url>
@@ -653,7 +653,7 @@
     <paper id="69">
       <title>A <fixed-case>POS</fixed-case>-based Ensemble Model for Cross-domain Sentiment Classification</title>
       <author><first>Rui</first><last>Xia</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>614–622</pages>
       <url hash="c4bf8669">I11-1069</url>
       <bibkey>xia-zong-2011-pos</bibkey>
@@ -661,7 +661,7 @@
     <paper id="70">
       <title>Ensemble-style Self-training on Citation Classification</title>
       <author><first>Cailing</first><last>Dong</last></author>
-      <author><first>Ulrich</first><last>Schäfer</last></author>
+      <author id="ulrich-schafer"><first>Ulrich</first><last>Schäfer</last></author>
       <pages>623–631</pages>
       <url hash="3736bc7e">I11-1070</url>
       <attachment type="dataset" hash="5f5d3551">I11-1070.Datasets.zip</attachment>
@@ -681,8 +681,8 @@
       <author><first>Yinggong</first><last>Zhao</last></author>
       <author><first>Shujie</first><last>Liu</last></author>
       <author><first>Yangsheng</first><last>Ji</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>641–648</pages>
       <url hash="9f82c1e2">I11-1072</url>
       <bibkey>zhao-etal-2011-transductive</bibkey>
@@ -699,7 +699,7 @@
     <paper id="74">
       <title>Going Beyond Word Cooccurrences in Global Lexical Selection for Statistical Machine Translation using a Multilayer Perceptron</title>
       <author><first>Alexandre</first><last>Patry</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <pages>658–666</pages>
       <url hash="d3217666">I11-1074</url>
       <bibkey>patry-langlais-2011-going</bibkey>
@@ -707,7 +707,7 @@
     <paper id="75">
       <title>System Combination Using Discriminative Cross-Adaptation</title>
       <author><first>Jacob</first><last>Devlin</last></author>
-      <author><first>Antti-Veikko</first><last>Rosti</last></author>
+      <author id="antti-veikko-rosti"><first>Antti-Veikko</first><last>Rosti</last></author>
       <author><first>Sankaranarayanan</first><last>Ananthakrishnan</last></author>
       <author><first>Spyros</first><last>Matsoukas</last></author>
       <pages>667–675</pages>
@@ -732,9 +732,9 @@
     </paper>
     <paper id="78">
       <title>It Takes Two to Tango: A Bilingual Unsupervised Approach for Estimating Sense Distributions using Expectation Maximization</title>
-      <author><first>Mitesh M.</first><last>Khapra</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh M.</first><last>Khapra</last></author>
       <author><first>Salil</first><last>Joshi</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>695–704</pages>
       <url hash="ceaa7543">I11-1078</url>
       <bibkey>khapra-etal-2011-takes</bibkey>
@@ -742,8 +742,8 @@
     <paper id="79">
       <title>Dynamic and Static Prototype Vectors for Semantic Composition</title>
       <author><first>Siva</first><last>Reddy</last></author>
-      <author><first>Ioannis</first><last>Klapaftis</last></author>
-      <author><first>Diana</first><last>McCarthy</last></author>
+      <author id="ioannis-klapaftis"><first>Ioannis</first><last>Klapaftis</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
       <author><first>Suresh</first><last>Manandhar</last></author>
       <pages>705–713</pages>
       <url hash="5a164a81">I11-1079</url>
@@ -752,7 +752,7 @@
     <paper id="80">
       <title>Using Prediction from Sentential Scope to Build a Pseudo Co-Testing Learner for Event Extraction</title>
       <author><first>Shasha</first><last>Liao</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <pages>714–722</pages>
       <url hash="4da03152">I11-1080</url>
       <bibkey>liao-grishman-2011-using</bibkey>
@@ -779,7 +779,7 @@
       <author><first>Seokhwan</first><last>Kim</last></author>
       <author><first>Minwoo</first><last>Jeong</last></author>
       <author><first>Jonghoon</first><last>Lee</last></author>
-      <author><first>Gary Geunbae</first><last>Lee</last></author>
+      <author id="gary-geunbae-lee"><first>Gary Geunbae</first><last>Lee</last></author>
       <pages>741–748</pages>
       <url hash="10f1d80b">I11-1083</url>
       <bibkey>kim-etal-2011-cross</bibkey>
@@ -789,7 +789,7 @@
       <author><first>Tadayoshi</first><last>Hara</last></author>
       <author><first>Takuya</first><last>Matsuzaki</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>749–757</pages>
       <url hash="954b8ee3">I11-1084</url>
       <bibkey>hara-etal-2011-exploring</bibkey>
@@ -805,7 +805,7 @@
     <paper id="86">
       <title>An Empirical Comparison of Unknown Word Prediction Methods</title>
       <author><first>Kostadin</first><last>Cholakov</last></author>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <author><first>Valia</first><last>Kordoni</last></author>
       <author><first>Yi</first><last>Zhang</last></author>
       <pages>767–775</pages>
@@ -851,8 +851,8 @@
     </paper>
     <paper id="91">
       <title>Translation Quality Indicators for Pivot-based Statistical <fixed-case>MT</fixed-case></title>
-      <author><first>Michael</first><last>Paul</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="michael-paul"><first>Michael</first><last>Paul</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>811–818</pages>
       <url hash="dae296c3">I11-1091</url>
       <bibkey>paul-sumita-2011-translation</bibkey>
@@ -862,7 +862,7 @@
       <author><first>Sankaranarayanan</first><last>Ananthakrishnan</last></author>
       <author><first>Shiv</first><last>Vitaladevuni</last></author>
       <author><first>Rohit</first><last>Prasad</last></author>
-      <author><first>Prem</first><last>Natarajan</last></author>
+      <author id="prem-natarajan"><first>Prem</first><last>Natarajan</last></author>
       <pages>819–827</pages>
       <url hash="440a83b3">I11-1092</url>
       <bibkey>ananthakrishnan-etal-2011-source</bibkey>
@@ -871,7 +871,7 @@
       <title>A Named Entity Recognition Method based on Decomposition and Concatenation of Word Chunks</title>
       <author><first>Tomoya</first><last>Iwakura</last></author>
       <author><first>Hiroya</first><last>Takamura</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>828–836</pages>
       <url hash="a60045f9">I11-1093</url>
       <bibkey>iwakura-etal-2011-named</bibkey>
@@ -889,8 +889,8 @@
     <paper id="95">
       <title>Entity Disambiguation Using a <fixed-case>M</fixed-case>arkov-<fixed-case>L</fixed-case>ogic Network</title>
       <author><first>Hong-Jie</first><last>Dai</last></author>
-      <author><first>Richard Tzong-Han</first><last>Tsai</last></author>
-      <author><first>Wen-Lian</first><last>Hsu</last></author>
+      <author id="richard-tzong-han-tsai"><first>Richard Tzong-Han</first><last>Tsai</last></author>
+      <author id="wen-lian-hsu"><first>Wen-Lian</first><last>Hsu</last></author>
       <pages>846–855</pages>
       <url hash="6a4b88e5">I11-1095</url>
       <bibkey>dai-etal-2011-entity</bibkey>
@@ -919,10 +919,10 @@
     <paper id="98">
       <title>Extending <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et with Hypernyms and Siblings Acquired from <fixed-case>W</fixed-case>ikipedia</title>
       <author><first>Ichiro</first><last>Yamada</last></author>
-      <author><first>Jong-Hoon</first><last>Oh</last></author>
+      <author id="jong-hoon-oh"><first>Jong-Hoon</first><last>Oh</last></author>
       <author><first>Chikara</first><last>Hashimoto</last></author>
       <author><first>Kentaro</first><last>Torisawa</last></author>
-      <author><first>Jun’ichi</first><last>Kazama</last></author>
+      <author id="junichi-kazama"><first>Jun’ichi</first><last>Kazama</last></author>
       <author><first>Stijn</first><last>De Saeger</last></author>
       <author><first>Takuya</first><last>Kawada</last></author>
       <pages>874–882</pages>
@@ -940,12 +940,12 @@
     <paper id="100">
       <title>From News to Comment: Resources and Benchmarks for Parsing the Language of Web 2.0</title>
       <author><first>Jennifer</first><last>Foster</last></author>
-      <author><first>Özlem</first><last>Çetinoğlu</last></author>
+      <author id="ozlem-cetinoglu"><first>Özlem</first><last>Çetinoğlu</last></author>
       <author><first>Joachim</first><last>Wagner</last></author>
-      <author><first>Joseph</first><last>Le Roux</last></author>
+      <author id="joseph-le-roux"><first>Joseph</first><last>Le Roux</last></author>
       <author><first>Joakim</first><last>Nivre</last></author>
       <author><first>Deirdre</first><last>Hogan</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>893–901</pages>
       <url hash="399b3511">I11-1100</url>
       <bibkey>foster-etal-2011-news</bibkey>
@@ -955,8 +955,8 @@
       <author><first>Masaaki</first><last>Tsuchida</last></author>
       <author><first>Kentaro</first><last>Torisawa</last></author>
       <author><first>Stijn</first><last>De Saeger</last></author>
-      <author><first>Jong-Hoon</first><last>Oh</last></author>
-      <author><first>Jun’ichi</first><last>Kazama</last></author>
+      <author id="jong-hoon-oh"><first>Jong-Hoon</first><last>Oh</last></author>
+      <author id="junichi-kazama"><first>Jun’ichi</first><last>Kazama</last></author>
       <author><first>Chikara</first><last>Hashimoto</last></author>
       <author><first>Hayato</first><last>Ohwada</last></author>
       <pages>902–910</pages>
@@ -966,7 +966,7 @@
     <paper id="102">
       <title>Fleshing it out: A Supervised Approach to <fixed-case>MWE</fixed-case>-token and <fixed-case>MWE</fixed-case>-type Classification</title>
       <author><first>Richard</first><last>Fothergill</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>911–919</pages>
       <url hash="ea3bfdb3">I11-1102</url>
       <bibkey>fothergill-baldwin-2011-fleshing</bibkey>
@@ -976,7 +976,7 @@
       <author><first>Hikaru</first><last>Yokono</last></author>
       <author><first>Takaaki</first><last>Hasegawa</last></author>
       <author><first>Genichiro</first><last>Kikui</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>920–928</pages>
       <url hash="072001e8">I11-1103</url>
       <bibkey>yokono-etal-2011-identification</bibkey>
@@ -1005,7 +1005,7 @@
       <title><fixed-case>K</fixed-case>2<fixed-case>Q</fixed-case>: Generating Natural Language Questions from Keywords with User Refinements</title>
       <author><first>Zhicheng</first><last>Zheng</last></author>
       <author><first>Xiance</first><last>Si</last></author>
-      <author><first>Edward</first><last>Chang</last></author>
+      <author id="edward-y-chang"><first>Edward</first><last>Chang</last></author>
       <author><first>Xiaoyan</first><last>Zhu</last></author>
       <pages>947–955</pages>
       <url hash="624fc6aa">I11-1106</url>
@@ -1015,8 +1015,8 @@
       <title>Answering Complex Questions via Exploiting Social <fixed-case>Q</fixed-case>&amp;<fixed-case>A</fixed-case> Collection</title>
       <author><first>Youzheng</first><last>Wu</last></author>
       <author><first>Chiori</first><last>Hori</last></author>
-      <author><first>Hisashi</first><last>Kawai</last></author>
-      <author><first>Hideki</first><last>Kashioka</last></author>
+      <author id="hisashi-kawai"><first>Hisashi</first><last>Kawai</last></author>
+      <author id="hideki-kashioka"><first>Hideki</first><last>Kashioka</last></author>
       <pages>956–964</pages>
       <url hash="01903c10">I11-1107</url>
       <bibkey>wu-etal-2011-answering</bibkey>
@@ -1042,7 +1042,7 @@
     <paper id="110">
       <title>Using Text Reviews for Product Entity Completion</title>
       <author><first>Mrinmaya</first><last>Sachan</last></author>
-      <author><first>Tanveer</first><last>Faruquie</last></author>
+      <author id="tanveer-a-faruquie"><first>Tanveer</first><last>Faruquie</last></author>
       <author id="l-venkata-subramaniam"><first>L. V.</first><last>Subramaniam</last></author>
       <author><first>Mukesh</first><last>Mohania</last></author>
       <pages>983–991</pages>
@@ -1060,7 +1060,7 @@
       <title>Efficient Near-Duplicate Detection for <fixed-case>Q</fixed-case>&amp;<fixed-case>A</fixed-case> Forum</title>
       <author><first>Yan</first><last>Wu</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>1001–1009</pages>
       <url hash="2d457f79">I11-1112</url>
       <bibkey>wu-etal-2011-efficient</bibkey>
@@ -1079,7 +1079,7 @@
       <title>Harvesting Related Entities with a Search Engine</title>
       <author><first>Shuqi</first><last>Sun</last></author>
       <author><first>Shiqi</first><last>Zhao</last></author>
-      <author><first>Muyun</first><last>Yang</last></author>
+      <author id="muyun-yang"><first>Muyun</first><last>Yang</last></author>
       <author><first>Haifeng</first><last>Wang</last></author>
       <author><first>Sheng</first><last>Li</last></author>
       <pages>1019–1027</pages>
@@ -1105,7 +1105,7 @@
     <paper id="117">
       <title>Passage Retrieval for Information Extraction using Distant Supervision</title>
       <author><first>Wei</first><last>Xu</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <author><first>Le</first><last>Zhao</last></author>
       <pages>1046–1054</pages>
       <url hash="e1ab7fbf">I11-1117</url>
@@ -1115,7 +1115,7 @@
       <title>Using Context Inference to Improve Sentence Ordering for Multi-document Summarization</title>
       <author><first>Peifeng</first><last>Li</last></author>
       <author><first>Guangxi</first><last>Deng</last></author>
-      <author><first>Qiaoming</first><last>Zhu</last></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last></author>
       <pages>1055–1061</pages>
       <url hash="c22dfa39">I11-1118</url>
       <bibkey>li-etal-2011-using</bibkey>
@@ -1123,7 +1123,7 @@
     <paper id="119">
       <title>Enhancing extraction based summarization with outside word space</title>
       <author><first>Christian</first><last>Smith</last></author>
-      <author><first>Arne</first><last>Jönsson</last></author>
+      <author id="arne-jonsson"><first>Arne</first><last>Jönsson</last></author>
       <pages>1062–1070</pages>
       <url hash="d0053176">I11-1119</url>
       <bibkey>smith-jonsson-2011-enhancing</bibkey>
@@ -1141,7 +1141,7 @@
     <paper id="121">
       <title>Relational Lasso —<fixed-case>A</fixed-case>n Improved Method Using the Relations Among Features—</title>
       <author><first>Kotaro</first><last>Kitagawa</last></author>
-      <author><first>Kumiko</first><last>Tanaka-Ishii</last></author>
+      <author id="kumiko-tanaka-ishii"><first>Kumiko</first><last>Tanaka-Ishii</last></author>
       <pages>1080–1088</pages>
       <url hash="f9e44ef2">I11-1121</url>
       <bibkey>kitagawa-tanaka-ishii-2011-relational</bibkey>
@@ -1150,7 +1150,7 @@
       <title>Enhance Top-down method with Meta-Classification for Very Large-scale Hierarchical Classification</title>
       <author><first>Xiao-Lin</first><last>Wang</last></author>
       <author><first>Hai</first><last>Zhao</last></author>
-      <author><first>Bao-Liang</first><last>Lu</last></author>
+      <author id="bao-liang-lu"><first>Bao-Liang</first><last>Lu</last></author>
       <pages>1089–1097</pages>
       <url hash="a740b5e7">I11-1122</url>
       <bibkey>wang-etal-2011-enhance</bibkey>
@@ -1158,7 +1158,7 @@
     <paper id="123">
       <title>Using Syntactic and Shallow Semantic Kernels to Improve Multi-Modality Manifold-Ranking for Topic-Focused Multi-Document Summarization</title>
       <author><first>Yllias</first><last>Chali</last></author>
-      <author><first>Sadid A.</first><last>Hasan</last></author>
+      <author id="sadid-a-hasan"><first>Sadid A.</first><last>Hasan</last></author>
       <author><first>Kaisar</first><last>Imam</last></author>
       <pages>1098–1106</pages>
       <url hash="ae095dcb">I11-1123</url>
@@ -1167,7 +1167,7 @@
     <paper id="124">
       <title>Automatic Determination of a Domain Adaptation Method for Word Sense Disambiguation Using Decision Tree Learning</title>
       <author><first>Kanako</first><last>Komiya</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>1107–1115</pages>
       <url hash="4dd17896">I11-1124</url>
       <bibkey>komiya-okumura-2011-automatic</bibkey>
@@ -1185,7 +1185,7 @@
       <title>Jointly Extracting <fixed-case>J</fixed-case>apanese Predicate-Argument Relation with <fixed-case>M</fixed-case>arkov <fixed-case>L</fixed-case>ogic</title>
       <author><first>Katsumasa</first><last>Yoshikawa</last></author>
       <author><first>Masayuki</first><last>Asahara</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>1125–1133</pages>
       <url hash="931a5b25">I11-1126</url>
       <bibkey>yoshikawa-etal-2011-jointly</bibkey>
@@ -1201,8 +1201,8 @@
     </paper>
     <paper id="128">
       <title>Automatic Analysis of Semantic Coherence in Academic Abstracts Written in <fixed-case>P</fixed-case>ortuguese</title>
-      <author><first>Vinícius Mourão Alves</first><last>de Souza</last></author>
-      <author><first>Valéria Delisandra</first><last>Feltrim</last></author>
+      <author id="vinicius-mourao-alves-de-souza"><first>Vinícius Mourão Alves</first><last>de Souza</last></author>
+      <author id="valeria-delisandra-feltrim"><first>Valéria Delisandra</first><last>Feltrim</last></author>
       <pages>1144–1152</pages>
       <url hash="8559298c">I11-1128</url>
       <bibkey>de-souza-feltrim-2011-automatic</bibkey>
@@ -1233,9 +1233,9 @@
     </paper>
     <paper id="132">
       <title>Towards Context-Based Subjectivity Analysis</title>
-      <author><first>Farah</first><last>Benamara</last></author>
+      <author id="farah-benamara"><first>Farah</first><last>Benamara</last></author>
       <author><first>Baptiste</first><last>Chardon</last></author>
-      <author><first>Yannick</first><last>Mathieu</last></author>
+      <author id="yannick-mathieu"><first>Yannick</first><last>Mathieu</last></author>
       <author><first>Vladimir</first><last>Popescu</last></author>
       <pages>1180–1188</pages>
       <url hash="1cfef514">I11-1132</url>
@@ -1243,9 +1243,9 @@
     </paper>
     <paper id="133">
       <title>Compression Methods by Code Mapping and Code Dividing for <fixed-case>C</fixed-case>hinese Dictionary Stored in a Double-Array Trie</title>
-      <author><first>Huidan</first><last>Liu</last></author>
-      <author><first>Minghua</first><last>Nuo</last></author>
-      <author><first>Longlong</first><last>Ma</last></author>
+      <author id="huidan-liu"><first>Huidan</first><last>Liu</last></author>
+      <author id="minghua-nuo"><first>Minghua</first><last>Nuo</last></author>
+      <author id="longlong-ma"><first>Longlong</first><last>Ma</last></author>
       <author><first>Jian</first><last>Wu</last></author>
       <author><first>Yeping</first><last>He</last></author>
       <pages>1189–1197</pages>
@@ -1255,7 +1255,7 @@
     <paper id="134">
       <title>Functional Elements and <fixed-case>POS</fixed-case> Categories</title>
       <author><first>Qiuye</first><last>Zhao</last></author>
-      <author><first>Mitch</first><last>Marcus</last></author>
+      <author id="mitch-marcus"><first>Mitch</first><last>Marcus</last></author>
       <pages>1198–1206</pages>
       <url hash="bcf94fdd">I11-1134</url>
       <bibkey>zhao-marcus-2011-functional</bibkey>
@@ -1276,7 +1276,7 @@
       <author><first>Jun</first><last>Hatori</last></author>
       <author><first>Takuya</first><last>Matsuzaki</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>1216–1224</pages>
       <url hash="c827948d">I11-1136</url>
       <bibkey>hatori-etal-2011-incremental</bibkey>
@@ -1291,8 +1291,8 @@
     </paper>
     <paper id="138">
       <title>Linguistic Phenomena, Analyses, and Representations: Understanding Conversion between Treebanks</title>
-      <author><first>Rajesh</first><last>Bhatt</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="rajesh-bhatt"><first>Rajesh</first><last>Bhatt</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <author><first>Fei</first><last>Xia</last></author>
       <pages>1234–1242</pages>
       <url hash="ae15533d">I11-1138</url>
@@ -1311,7 +1311,7 @@
     <paper id="140">
       <title>Parse Reranking Based on Higher-Order Lexical Dependencies</title>
       <author><first>Zhiguo</first><last>Wang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>1251–1259</pages>
       <url hash="ff637ecc">I11-1140</url>
       <bibkey>wang-zong-2011-parse</bibkey>
@@ -1319,7 +1319,7 @@
     <paper id="141">
       <title>Improving Part-of-speech Tagging for Context-free Parsing</title>
       <author><first>Xiao</first><last>Chen</last></author>
-      <author><first>Chunyu</first><last>Kit</last></author>
+      <author id="chunyu-kit"><first>Chunyu</first><last>Kit</last></author>
       <pages>1260–1268</pages>
       <url hash="9b9bcd53">I11-1141</url>
       <bibkey>chen-kit-2011-improving</bibkey>
@@ -1327,7 +1327,7 @@
     <paper id="142">
       <title>Models Cascade for Tree-Structured Named Entity Detection</title>
       <author><first>Marco</first><last>Dinarelli</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <pages>1269–1278</pages>
       <url hash="0cde01cb">I11-1142</url>
       <bibkey>dinarelli-rosset-2011-models</bibkey>
@@ -1362,8 +1362,8 @@
     </paper>
     <paper id="146">
       <title>Integration of Reduplicated Multiword Expressions and Named Entities in a Phrase Based Statistical Machine Translation System</title>
-      <author><first>Thoudam</first><last>Doren Singh</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="thoudam-doren-singh"><first>Thoudam</first><last>Doren Singh</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>1304–1312</pages>
       <url hash="9e5320aa">I11-1146</url>
       <bibkey>doren-singh-bandyopadhyay-2011-integration</bibkey>
@@ -1413,7 +1413,7 @@
     <paper id="152">
       <title>Clause-Based Reordering Constraints to Improve Statistical Machine Translation</title>
       <author><first>Ananthakrishnan</first><last>Ramanathan</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <author><first>Karthik</first><last>Visweswariah</last></author>
       <author><first>Kushal</first><last>Ladha</last></author>
       <author><first>Ankur</first><last>Gandhe</last></author>
@@ -1434,16 +1434,16 @@
     </paper>
     <paper id="154">
       <title>Enhancing scarce-resource language translation through pivot combinations</title>
-      <author><first>Marta</first><last>R. Costa-jussà</last></author>
-      <author><first>Carlos</first><last>Henríquez</last></author>
-      <author><first>Rafael E.</first><last>Banchs</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta</first><last>R. Costa-jussà</last></author>
+      <author id="carlos-henriquez"><first>Carlos</first><last>Henríquez</last></author>
+      <author id="rafael-e-banchs"><first>Rafael E.</first><last>Banchs</last></author>
       <pages>1361–1365</pages>
       <url hash="7adac455">I11-1154</url>
       <bibkey>r-costa-jussa-etal-2011-enhancing</bibkey>
     </paper>
     <paper id="155">
       <title>A Baseline System for <fixed-case>C</fixed-case>hinese Near-Synonym Choice</title>
-      <author><first>Liang-Chih</first><last>Yu</last></author>
+      <author id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last></author>
       <author><first>Wei-Nan</first><last>Chien</last></author>
       <author><first>Shih-Ting</first><last>Chen</last></author>
       <pages>1366–1370</pages>
@@ -1462,7 +1462,7 @@
       <title>Text Patterns and Compression Models for Semantic Class Learning</title>
       <author><first>Chung-Yao</first><last>Chuang</last></author>
       <author><first>Yi-Hsun</first><last>Lee</last></author>
-      <author><first>Wen-Lian</first><last>Hsu</last></author>
+      <author id="wen-lian-hsu"><first>Wen-Lian</first><last>Hsu</last></author>
       <pages>1376–1381</pages>
       <url hash="8076dee6">I11-1157</url>
       <bibkey>chuang-etal-2011-text</bibkey>
@@ -1470,7 +1470,7 @@
     <paper id="158">
       <title>Potts Model on the Case Fillers for Word Sense Disambiguation</title>
       <author><first>Hiroya</first><last>Takamura</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>1382–1386</pages>
       <url hash="e4e013c5">I11-1158</url>
       <bibkey>takamura-okumura-2011-potts</bibkey>
@@ -1502,8 +1502,8 @@
     </paper>
     <paper id="162">
       <title>Going Beyond Text: A Hybrid Image-Text Approach for Measuring Word Relatedness</title>
-      <author><first>Chee Wee</first><last>Leong</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="chee-wee-leong"><first>Chee Wee</first><last>Leong</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>1403–1407</pages>
       <url hash="329fd81d">I11-1162</url>
       <bibkey>leong-mihalcea-2011-going</bibkey>
@@ -1529,7 +1529,7 @@
     <paper id="165">
       <title>Compiling Learner Corpus Data of Linguistic Output and Language Processing in Speaking, Listening, Writing, and Reading</title>
       <author><first>Katsunori</first><last>Kotani</last></author>
-      <author><first>Takehiko</first><last>Yoshimi</last></author>
+      <author id="takehiko-yoshimi"><first>Takehiko</first><last>Yoshimi</last></author>
       <author><first>Hiroaki</first><last>Nanjo</last></author>
       <author><first>Hitoshi</first><last>Isahara</last></author>
       <pages>1418–1422</pages>
@@ -1560,7 +1560,7 @@
       <author><first>Xuan</first><last>Wang</last></author>
       <author><first>Ruifeng</first><last>Xu</last></author>
       <author><first>Jun</first><last>Xu</last></author>
-      <author><first>ShiXi</first><last>Fan</last></author>
+      <author id="shixi-fan"><first>ShiXi</first><last>Fan</last></author>
       <pages>1432–1436</pages>
       <url hash="f8f66504">I11-1168</url>
       <bibkey>zhang-etal-2011-diversifying</bibkey>
@@ -1568,7 +1568,7 @@
     <paper id="169">
       <title>Beyond Normalization: Pragmatics of Word Form in Text Messages</title>
       <author><first>Tyler</first><last>Baldwin</last></author>
-      <author><first>Joyce</first><last>Chai</last></author>
+      <author id="joyce-chai"><first>Joyce</first><last>Chai</last></author>
       <pages>1437–1441</pages>
       <url hash="5e9be629">I11-1169</url>
       <bibkey>baldwin-chai-2011-beyond</bibkey>
@@ -1600,16 +1600,16 @@
     </paper>
     <paper id="173">
       <title>Reduction of Search Space to Annotate Monolingual Corpora</title>
-      <author><first>Prajol</first><last>Shrestha</last></author>
+      <author id="prajwol-shrestha"><first>Prajol</first><last>Shrestha</last></author>
       <author><first>Christine</first><last>Jacquin</last></author>
-      <author><first>Beatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Beatrice</first><last>Daille</last></author>
       <pages>1457–1461</pages>
       <url hash="4c576908">I11-1173</url>
       <bibkey>shrestha-etal-2011-reduction</bibkey>
     </paper>
     <paper id="174">
       <title>Toward a Parallel Corpus of Spoken <fixed-case>C</fixed-case>antonese and Written <fixed-case>C</fixed-case>hinese</title>
-      <author><first>John</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
       <pages>1462–1466</pages>
       <url hash="ebbc91b5">I11-1174</url>
       <bibkey>lee-2011-toward</bibkey>
@@ -1617,8 +1617,8 @@
     <paper id="175">
       <title>Query Expansion for <fixed-case>IR</fixed-case> using Knowledge-Based Relatedness</title>
       <author><first>Arantxa</first><last>Otegi</last></author>
-      <author><first>Xabier</first><last>Arregi</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="xabier-arregi"><first>Xabier</first><last>Arregi</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <pages>1467–1471</pages>
       <url hash="79ed1203">I11-1175</url>
       <bibkey>otegi-etal-2011-query</bibkey>
@@ -1636,7 +1636,7 @@
     <meta>
       <booktitle>Proceedings of the <fixed-case>IJCNLP</fixed-case> 2011 System Demonstrations</booktitle>
       <url hash="61e22373">I11-2</url>
-      <editor><first>Kenneth</first><last>Church</last></editor>
+      <editor id="kenneth-church"><first>Kenneth</first><last>Church</last></editor>
       <editor><first>Yunqing</first><last>Xia</last></editor>
       <publisher>Asian Federation of Natural Language Processing</publisher>
       <address>Chiang Mai, Thailand</address>
@@ -1651,7 +1651,7 @@
     <paper id="1">
       <title><fixed-case>W</fixed-case>iki<fixed-case>N</fixed-case>et<fixed-case>TK</fixed-case> – A Tool Kit for <fixed-case>E</fixed-case>mbedding<fixed-case>W</fixed-case>orld Knowledge in <fixed-case>NLP</fixed-case> Applications</title>
       <author><first>Alex</first><last>Judea</last></author>
-      <author><first>Vivi</first><last>Nastase</last></author>
+      <author id="vivi-nastase"><first>Vivi</first><last>Nastase</last></author>
       <author><first>Michael</first><last>Strube</last></author>
       <pages>1–4</pages>
       <url hash="f5d90c84">I11-2001</url>
@@ -1667,7 +1667,7 @@
     <paper id="3">
       <title><fixed-case>TTC</fixed-case> <fixed-case>T</fixed-case>erm<fixed-case>S</fixed-case>uite - A <fixed-case>UIMA</fixed-case> Application for Multilingual Terminology Extraction from Comparable Corpora</title>
       <author><first>Jérôme</first><last>Rocheteau</last></author>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <pages>9–12</pages>
       <url hash="d53b73f9">I11-2003</url>
       <bibkey>rocheteau-daille-2011-ttc</bibkey>
diff --git a/data/xml/I13.xml b/data/xml/I13.xml
index d8138c48fa..83da9e67ab 100644
--- a/data/xml/I13.xml
+++ b/data/xml/I13.xml
@@ -4,8 +4,8 @@
     <meta>
       <booktitle>Proceedings of the Sixth International Joint Conference on Natural Language Processing</booktitle>
       <url hash="ee9088cf">I13-1</url>
-      <editor><first>Ruslan</first><last>Mitkov</last></editor>
-      <editor><first>Jong C.</first><last>Park</last></editor>
+      <editor id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></editor>
+      <editor id="jong-c-park"><first>Jong C.</first><last>Park</last></editor>
       <publisher>Asian Federation of Natural Language Processing</publisher>
       <address>Nagoya, Japan</address>
       <month>October</month>
@@ -18,7 +18,7 @@
     </frontmatter>
     <paper id="1">
       <title>Semi-Supervised Answer Extraction from Discussion Forums</title>
-      <author><first>Rose</first><last>Catherine</last></author>
+      <author id="rose-catherine-kanjirathinkal"><first>Rose</first><last>Catherine</last></author>
       <author><first>Rashmi</first><last>Gangadharaiah</last></author>
       <author><first>Karthik</first><last>Visweswariah</last></author>
       <author><first>Dinesh</first><last>Raghu</last></author>
@@ -29,7 +29,7 @@
     <paper id="2">
       <title><fixed-case>W</fixed-case>ord<fixed-case>T</fixed-case>opic-<fixed-case>M</fixed-case>ulti<fixed-case>R</fixed-case>ank: A New Method for Automatic Keyphrase Extraction</title>
       <author><first>Fan</first><last>Zhang</last></author>
-      <author><first>Lian’en</first><last>Huang</last></author>
+      <author id="lianen-huang"><first>Lian’en</first><last>Huang</last></author>
       <author><first>Bo</first><last>Peng</last></author>
       <pages>10–18</pages>
       <url hash="34a117db">I13-1002</url>
@@ -47,9 +47,9 @@
       <title>Learning a Replacement Model for Query Segmentation with Consistency in Search Logs</title>
       <author><first>Wei</first><last>Zhang</last></author>
       <author><first>Yunbo</first><last>Cao</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <author><first>Jian</first><last>Su</last></author>
-      <author><first>Chew-Lim</first><last>Tan</last></author>
+      <author id="chew-lim-tan"><first>Chew-Lim</first><last>Tan</last></author>
       <pages>28–36</pages>
       <url hash="527564d7">I13-1004</url>
       <bibkey>zhang-etal-2013-learning</bibkey>
@@ -76,15 +76,15 @@
       <author><first>Jiří</first><last>Mírovský</last></author>
       <author><first>Kateřina</first><last>Rysová</last></author>
       <author><first>Magdaléna</first><last>Rysová</last></author>
-      <author><first>Eva</first><last>Hajičová</last></author>
+      <author id="eva-hajicova"><first>Eva</first><last>Hajičová</last></author>
       <pages>55–63</pages>
       <url hash="a79a8e7b">I13-1007</url>
       <bibkey>mirovsky-etal-2013-pre</bibkey>
     </paper>
     <paper id="8">
       <title><fixed-case>A</fixed-case>nimacy Acquisition Using Morphological Case</title>
-      <author><first>Riyaz Ahmad</first><last>Bhat</last></author>
-      <author><first>Dipti Misra</first><last>Sharma</last></author>
+      <author id="riyaz-ahmad-bhat"><first>Riyaz Ahmad</first><last>Bhat</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></author>
       <pages>64–72</pages>
       <url hash="63262514">I13-1008</url>
       <bibkey>bhat-sharma-2013-animacy</bibkey>
@@ -94,7 +94,7 @@
       <author><first>Takuya</first><last>Matsuzaki</last></author>
       <author><first>Hidenao</first><last>Iwane</last></author>
       <author><first>Hirokazu</first><last>Anai</last></author>
-      <author><first>Noriko</first><last>Arai</last></author>
+      <author id="noriko-h-arai"><first>Noriko</first><last>Arai</last></author>
       <pages>73–81</pages>
       <url hash="dbdc4faa">I13-1009</url>
       <bibkey>matsuzaki-etal-2013-complexity</bibkey>
@@ -114,7 +114,7 @@
       <author><first>Anna</first><last>Nedoluzhko</last></author>
       <author><first>Pavlína</first><last>Jínová</last></author>
       <author><first>Šárka</first><last>Zikánová</last></author>
-      <author><first>Eva</first><last>Hajičová</last></author>
+      <author id="eva-hajicova"><first>Eva</first><last>Hajičová</last></author>
       <pages>91–99</pages>
       <url hash="0049d0f3">I13-1011</url>
       <bibkey>polakova-etal-2013-introducing</bibkey>
@@ -129,7 +129,7 @@
     </paper>
     <paper id="13">
       <title>A Weakly Supervised <fixed-case>B</fixed-case>ayesian Model for Violence Detection in Social Media</title>
-      <author><first>Amparo Elizabeth</first><last>Cano Basave</last></author>
+      <author id="amparo-elizabeth-cano-basave"><first>Amparo Elizabeth</first><last>Cano Basave</last></author>
       <author><first>Yulan</first><last>He</last></author>
       <author><first>Kang</first><last>Liu</last></author>
       <author><first>Jun</first><last>Zhao</last></author>
@@ -143,7 +143,7 @@
       <author><first>Yeyun</first><last>Gong</last></author>
       <author><first>Yaqian</first><last>Zhou</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>118–126</pages>
       <url hash="70dcf760">I13-1014</url>
       <bibkey>ding-etal-2013-detecting</bibkey>
@@ -153,7 +153,7 @@
       <author><first>Aobo</first><last>Wang</last></author>
       <author><first>Min-Yen</first><last>Kan</last></author>
       <author><first>Daniel</first><last>Andrade</last></author>
-      <author><first>Takashi</first><last>Onishi</last></author>
+      <author id="takashi-onishi"><first>Takashi</first><last>Onishi</last></author>
       <author><first>Kai</first><last>Ishikawa</last></author>
       <pages>127–135</pages>
       <url hash="abe10be5">I13-1015</url>
@@ -164,7 +164,7 @@
     <paper id="16">
       <title>Feature Selection Using a Semantic Hierarchy for Event Recognition and Type Classification</title>
       <author><first>Yoonjae</first><last>Jeong</last></author>
-      <author><first>Sung-Hyon</first><last>Myaeng</last></author>
+      <author id="sung-hyon-myaeng"><first>Sung-Hyon</first><last>Myaeng</last></author>
       <pages>136–144</pages>
       <url hash="eaf6c09e">I13-1016</url>
       <bibkey>jeong-myaeng-2013-feature</bibkey>
@@ -189,7 +189,7 @@
       <title>A Simple Approach to Unknown Word Processing in <fixed-case>J</fixed-case>apanese Morphological Analysis</title>
       <author><first>Ryohei</first><last>Sasano</last></author>
       <author><first>Sadao</first><last>Kurohashi</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>162–170</pages>
       <url hash="63a55ad8">I13-1019</url>
       <bibkey>sasano-etal-2013-simple</bibkey>
@@ -205,7 +205,7 @@
     </paper>
     <paper id="21">
       <title>Capturing Long-distance Dependencies in Sequence Models: A Case Study of <fixed-case>C</fixed-case>hinese Part-of-speech Tagging</title>
-      <author><first>Weiwei</first><last>Sun</last></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last></author>
       <author><first>Xiaochang</first><last>Peng</last></author>
       <author><first>Xiaojun</first><last>Wan</last></author>
       <pages>180–188</pages>
@@ -217,8 +217,8 @@
       <author><first>Sambhav</first><last>Jain</last></author>
       <author><first>Naman</first><last>Jain</last></author>
       <author><first>Aniruddha</first><last>Tammewar</last></author>
-      <author><first>Riyaz Ahmad</first><last>Bhat</last></author>
-      <author><first>Dipti</first><last>Sharma</last></author>
+      <author id="riyaz-ahmad-bhat"><first>Riyaz Ahmad</first><last>Bhat</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti</first><last>Sharma</last></author>
       <pages>189–197</pages>
       <url hash="ce2646c1">I13-1022</url>
       <bibkey>jain-etal-2013-exploring</bibkey>
@@ -226,7 +226,7 @@
     <paper id="23">
       <title>Towards Robust Cross-Domain Domain Adaptation for Part-of-Speech Tagging</title>
       <author><first>Tobias</first><last>Schnabel</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>198–206</pages>
       <url hash="e0538a95">I13-1023</url>
       <bibkey>schnabel-schutze-2013-towards</bibkey>
@@ -235,7 +235,7 @@
       <title>Dependency Parsing for Identifying <fixed-case>H</fixed-case>ungarian Light Verb Constructions</title>
       <author><first>Veronika</first><last>Vincze</last></author>
       <author><first>János</first><last>Zsibrita</last></author>
-      <author><first>István</first><last>Nagy T.</last></author>
+      <author id="istvan-nagy-t"><first>István</first><last>Nagy T.</last></author>
       <pages>207–215</pages>
       <url hash="dfc04303">I13-1024</url>
       <bibkey>vincze-etal-2013-dependency</bibkey>
@@ -243,7 +243,7 @@
     <paper id="25">
       <title>Written Dialog and Social Power: Manifestations of Different Types of Power in Dialog Behavior</title>
       <author><first>Vinodkumar</first><last>Prabhakaran</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>216–224</pages>
       <url hash="91a59fce">I13-1025</url>
       <bibkey>prabhakaran-rambow-2013-written</bibkey>
@@ -304,9 +304,9 @@
     <paper id="32">
       <title>Tuning <fixed-case>SMT</fixed-case> with a Large Number of Features via Online Feature Grouping</title>
       <author><first>Lemao</first><last>Liu</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <author><first>Taro</first><last>Watanabe</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>279–285</pages>
       <url hash="fd9f1306">I13-1032</url>
       <revision id="1" href="I13-1032v1" hash="61d13497"/>
@@ -324,7 +324,7 @@
     </paper>
     <paper id="34">
       <title>Bootstrapping Large-scale Named Entities using <fixed-case>URL</fixed-case>-Text Hybrid Patterns</title>
-      <author><first>Chao</first><last>Zhang</last></author>
+      <author id="chao-zhang-tu"><first>Chao</first><last>Zhang</last></author>
       <author><first>Shiqi</first><last>Zhao</last></author>
       <author><first>Haifeng</first><last>Wang</last></author>
       <pages>293–301</pages>
@@ -355,16 +355,16 @@
       <author><first>Huan</first><last>Chen</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
       <author><first>Jin</first><last>Qian</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>320–328</pages>
       <url hash="e0c33893">I13-1037</url>
       <bibkey>chen-etal-2013-chinese</bibkey>
     </paper>
     <paper id="38">
       <title>Full-coverage Identification of <fixed-case>E</fixed-case>nglish Light Verb Constructions</title>
-      <author><first>István</first><last>Nagy T.</last></author>
+      <author id="istvan-nagy-t"><first>István</first><last>Nagy T.</last></author>
       <author><first>Veronika</first><last>Vincze</last></author>
-      <author><first>Richárd</first><last>Farkas</last></author>
+      <author id="richard-farkas"><first>Richárd</first><last>Farkas</last></author>
       <pages>329–337</pages>
       <url hash="5f466b38">I13-1038</url>
       <bibkey>nagy-t-etal-2013-full</bibkey>
@@ -387,10 +387,10 @@
     </paper>
     <paper id="41">
       <title>How Noisy Social Media Text, How Diffrnt Social Media Sources?</title>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Paul</first><last>Cook</last></author>
       <author><first>Marco</first><last>Lui</last></author>
-      <author><first>Andrew</first><last>MacKinlay</last></author>
+      <author id="andrew-mackinlay"><first>Andrew</first><last>MacKinlay</last></author>
       <author><first>Li</first><last>Wang</last></author>
       <pages>356–364</pages>
       <url hash="c492811b">I13-1041</url>
@@ -407,7 +407,7 @@
     </paper>
     <paper id="43">
       <title>Readability Indices for Automatic Evaluation of Text Simplification Systems: A Feasibility Study for <fixed-case>S</fixed-case>panish</title>
-      <author><first>Sanja</first><last>Štajner</last></author>
+      <author id="sanja-stajner"><first>Sanja</first><last>Štajner</last></author>
       <author><first>Horacio</first><last>Saggion</last></author>
       <pages>374–382</pages>
       <url hash="249f0cbb">I13-1043</url>
@@ -423,7 +423,7 @@
     <paper id="45">
       <title>Automatically Developing a Fine-grained <fixed-case>A</fixed-case>rabic Named Entity Corpus and Gazetteer by utilizing <fixed-case>W</fixed-case>ikipedia</title>
       <author><first>Fahd</first><last>Alotaibi</last></author>
-      <author><first>Mark</first><last>Lee</last></author>
+      <author id="mark-lee"><first>Mark</first><last>Lee</last></author>
       <pages>392–400</pages>
       <url hash="4d3b7591">I13-1045</url>
       <bibkey>alotaibi-lee-2013-automatically</bibkey>
@@ -431,7 +431,7 @@
     <paper id="46">
       <title>Ranking Translation Candidates Acquired from Comparable Corpora</title>
       <author><first>Rima</first><last>Harastani</last></author>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <author><first>Emmanuel</first><last>Morin</last></author>
       <pages>401–409</pages>
       <url hash="2c952ce6">I13-1046</url>
@@ -441,7 +441,7 @@
       <title>Using the Semantic-Syntactic Interface for Reliable <fixed-case>A</fixed-case>rabic Modality Annotation</title>
       <author><first>Rania</first><last>Al-Sabbagh</last></author>
       <author><first>Jana</first><last>Diesner</last></author>
-      <author><first>Roxana</first><last>Girju</last></author>
+      <author id="roxana-girju"><first>Roxana</first><last>Girju</last></author>
       <pages>410–418</pages>
       <url hash="31089de3">I13-1047</url>
       <bibkey>al-sabbagh-etal-2013-using</bibkey>
@@ -449,8 +449,8 @@
     <paper id="48">
       <title>Mapping Rules for Building a <fixed-case>T</fixed-case>unisian Dialect Lexicon and Generating Corpora</title>
       <author><first>Rahma</first><last>Boujelbane</last></author>
-      <author><first>Mariem</first><last>Ellouze Khemekhem</last></author>
-      <author><first>Lamia Hadrich</first><last>Belguith</last></author>
+      <author id="mariem-ellouze-khemekhem"><first>Mariem</first><last>Ellouze Khemekhem</last></author>
+      <author id="lamia-hadrich-belguith"><first>Lamia Hadrich</first><last>Belguith</last></author>
       <pages>419–428</pages>
       <url hash="36daa702">I13-1048</url>
       <bibkey>boujelbane-etal-2013-mapping</bibkey>
@@ -465,8 +465,8 @@
     </paper>
     <paper id="50">
       <title>Scalable Variational Inference for Extracting Hierarchical Phrase-based Translation Rules</title>
-      <author><first>Baskaran</first><last>Sankaran</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="baskaran-sankaran"><first>Baskaran</first><last>Sankaran</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <author><first>Anoop</first><last>Sarkar</last></author>
       <pages>438–446</pages>
       <url hash="1657a7b2">I13-1050</url>
@@ -476,7 +476,7 @@
       <title>A Topic-Triggered Language Model for Statistical Machine Translation</title>
       <author><first>Heng</first><last>Yu</last></author>
       <author><first>Jinsong</first><last>Su</last></author>
-      <author><first>Yajuan</first><last>Lv</last></author>
+      <author id="yajuan-lu"><first>Yajuan</first><last>Lv</last></author>
       <author><first>Qun</first><last>Liu</last></author>
       <pages>447–454</pages>
       <url hash="d142e4b9">I13-1051</url>
@@ -523,7 +523,7 @@
     <paper id="56">
       <title>Uncovering Distributional Differences between Synonyms and Antonyms in a Word Space Model</title>
       <author><first>Silke</first><last>Scheible</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <author><first>Sylvia</first><last>Springorum</last></author>
       <pages>489–497</pages>
       <url hash="d9931fdb">I13-1056</url>
@@ -532,8 +532,8 @@
     <paper id="57">
       <title>Multilingual Word Sense Disambiguation Using <fixed-case>W</fixed-case>ikipedia</title>
       <author><first>Bharath</first><last>Dandala</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
-      <author><first>Razvan</first><last>Bunescu</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
+      <author id="razvan-bunescu"><first>Razvan</first><last>Bunescu</last></author>
       <pages>498–506</pages>
       <url hash="19550909">I13-1057</url>
       <bibkey>dandala-etal-2013-multilingual</bibkey>
@@ -543,7 +543,7 @@
       <author><first>Rui</first><last>Yan</last></author>
       <author><first>Han</first><last>Jiang</last></author>
       <author><first>Mirella</first><last>Lapata</last></author>
-      <author><first>Shou-De</first><last>Lin</last></author>
+      <author id="shou-de-lin"><first>Shou-De</first><last>Lin</last></author>
       <author><first>Xueqiang</first><last>Lv</last></author>
       <author><first>Xiaoming</first><last>Li</last></author>
       <pages>507–515</pages>
@@ -561,7 +561,7 @@
     <paper id="60">
       <title>Learning a Product of Experts with Elitist Lasso</title>
       <author><first>Mengqiu</first><last>Wang</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>525–533</pages>
       <url hash="ba6b9e7f">I13-1060</url>
       <bibkey>wang-manning-2013-learning</bibkey>
@@ -579,7 +579,7 @@
       <title><fixed-case>T</fixed-case>opic<fixed-case>R</fixed-case>ank: Graph-Based Topic Ranking for Keyphrase Extraction</title>
       <author><first>Adrien</first><last>Bougouin</last></author>
       <author><first>Florian</first><last>Boudin</last></author>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <pages>543–551</pages>
       <url hash="657684d4">I13-1062</url>
       <bibkey>bougouin-etal-2013-topicrank</bibkey>
@@ -588,14 +588,14 @@
       <title>Understanding the Semantic Intent of Natural Language Query</title>
       <author><first>Juan</first><last>Xu</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>552–560</pages>
       <url hash="ed7c5def">I13-1063</url>
       <bibkey>xu-etal-2013-understanding</bibkey>
     </paper>
     <paper id="64">
       <title>Sentiment Classification for Movie Reviews in <fixed-case>C</fixed-case>hinese Using Parsing-based Methods</title>
-      <author><first>Wen-Juan</first><last>Hou</last></author>
+      <author id="wen-juan-hou"><first>Wen-Juan</first><last>Hou</last></author>
       <author><first>Chuang-Ping</first><last>Chang</last></author>
       <pages>561–569</pages>
       <url hash="266bf5f9">I13-1064</url>
@@ -604,7 +604,7 @@
     <paper id="65">
       <title>Sentiment Aggregation using <fixed-case>C</fixed-case>oncept<fixed-case>N</fixed-case>et Ontology</title>
       <author><first>Subhabrata</first><last>Mukherjee</last></author>
-      <author><first>Sachindra</first><last>Joshi</last></author>
+      <author id="sachindra-joshi"><first>Sachindra</first><last>Joshi</last></author>
       <pages>570–578</pages>
       <url hash="a98457e8">I13-1065</url>
       <bibkey>mukherjee-joshi-2013-sentiment</bibkey>
@@ -612,11 +612,11 @@
     <paper id="66">
       <title>Detecting Cyberbullying Entries on Informal School Websites Based on Category Relevance Maximization</title>
       <author><first>Taisei</first><last>Nitta</last></author>
-      <author><first>Fumito</first><last>Masui</last></author>
+      <author id="fumito-masui"><first>Fumito</first><last>Masui</last></author>
       <author><first>Michal</first><last>Ptaszynski</last></author>
       <author><first>Yasutomo</first><last>Kimura</last></author>
       <author><first>Rafal</first><last>Rzepka</last></author>
-      <author><first>Kenji</first><last>Araki</last></author>
+      <author id="kenji-araki"><first>Kenji</first><last>Araki</last></author>
       <pages>579–586</pages>
       <url hash="4a071107">I13-1066</url>
       <bibkey>nitta-etal-2013-detecting</bibkey>
@@ -636,7 +636,7 @@
       <author><first>Ke</first><last>Sun</last></author>
       <author><first>Shiqi</first><last>Zhao</last></author>
       <author><first>Haifeng</first><last>Wang</last></author>
-      <author><first>Muyun</first><last>Yang</last></author>
+      <author id="muyun-yang"><first>Muyun</first><last>Yang</last></author>
       <author><first>Sheng</first><last>Li</last></author>
       <pages>596–604</pages>
       <url hash="0e3a13f7">I13-1068</url>
@@ -646,14 +646,14 @@
       <title>Labeled Alignment for Recognizing Textual Entailment</title>
       <author><first>Xiaolin</first><last>Wang</last></author>
       <author><first>Hai</first><last>Zhao</last></author>
-      <author><first>Bao-Liang</first><last>Lu</last></author>
+      <author id="bao-liang-lu"><first>Bao-Liang</first><last>Lu</last></author>
       <pages>605–613</pages>
       <url hash="2d178a11">I13-1069</url>
       <bibkey>wang-etal-2013-labeled</bibkey>
     </paper>
     <paper id="70">
       <title>Context-Based <fixed-case>C</fixed-case>hinese Word Segmentation using <fixed-case>SVM</fixed-case> Machine-Learning Algorithm without Dictionary Support</title>
-      <author><first>Chia-ming</first><last>Lee</last></author>
+      <author id="chia-ming-lee"><first>Chia-ming</first><last>Lee</last></author>
       <author><first>Chien-Kang</first><last>Huang</last></author>
       <pages>614–622</pages>
       <url hash="d44ddb20">I13-1070</url>
@@ -670,7 +670,7 @@
     <paper id="72">
       <title>Detecting Polysemy in Hard and Soft Cluster Analyses of <fixed-case>G</fixed-case>erman Preposition Vector Spaces</title>
       <author><first>Sylvia</first><last>Springorum</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <author><first>Jason</first><last>Utt</last></author>
       <pages>632–640</pages>
       <url hash="906081a3">I13-1072</url>
@@ -706,7 +706,7 @@
     <paper id="76">
       <title>Detecting Domain Dedicated Polar Words</title>
       <author><first>Raksha</first><last>Sharma</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>661–666</pages>
       <url hash="73e3c27e">I13-1076</url>
       <bibkey>sharma-bhattacharyya-2013-detecting</bibkey>
@@ -716,7 +716,7 @@
       <author><first>Tanveer</first><last>Ali</last></author>
       <author><first>David</first><last>Schramm</last></author>
       <author><first>Marina</first><last>Sokolova</last></author>
-      <author><first>Diana</first><last>Inkpen</last></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last></author>
       <pages>667–673</pages>
       <url hash="1611b03e">I13-1077</url>
       <bibkey>ali-etal-2013-hear</bibkey>
@@ -726,8 +726,8 @@
       <author><first>Braja Gopal</first><last>Patra</last></author>
       <author><first>Hiroya</first><last>Takamura</last></author>
       <author><first>Dipankar</first><last>Das</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>674–679</pages>
       <url hash="6a292b35">I13-1078</url>
       <bibkey>patra-etal-2013-construction</bibkey>
@@ -744,7 +744,7 @@
       <title>Unsupervised Word Class Induction for Under-resourced Languages: A Case Study on <fixed-case>I</fixed-case>ndonesian</title>
       <author><first>Meladel</first><last>Mistica</last></author>
       <author><first>Jey Han</first><last>Lau</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>685–691</pages>
       <url hash="e469fa62">I13-1080</url>
       <bibkey>mistica-etal-2013-unsupervised</bibkey>
@@ -752,7 +752,7 @@
     <paper id="81">
       <title>An Efficient Active Learning Framework for New Relation Types</title>
       <author><first>Lisheng</first><last>Fu</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <pages>692–698</pages>
       <url hash="b47f1fe1">I13-1081</url>
       <bibkey>fu-grishman-2013-efficient</bibkey>
@@ -767,10 +767,10 @@
     </paper>
     <paper id="83">
       <title>Augmentable Paraphrase Extraction Framework</title>
-      <author><first>Mei-Hua</first><last>Chen</last></author>
-      <author><first>Yi-Chun</first><last>Chen</last></author>
-      <author><first>Shih-Ting</first><last>Huang</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="mei-hua-chen"><first>Mei-Hua</first><last>Chen</last></author>
+      <author id="yichun-chen"><first>Yi-Chun</first><last>Chen</last></author>
+      <author id="shih-ting-huang"><first>Shih-Ting</first><last>Huang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>706–711</pages>
       <url hash="b589bb5a">I13-1083</url>
       <bibkey>chen-etal-2013-augmentable</bibkey>
@@ -778,8 +778,8 @@
     <paper id="84">
       <title>Automatic Prediction of Evidence-based Recommendations via Sentence-level Polarity Classification</title>
       <author><first>Abeed</first><last>Sarker</last></author>
-      <author><first>Diego</first><last>Mollá-Aliod</last></author>
-      <author><first>Cécile</first><last>Paris</last></author>
+      <author id="diego-molla"><first>Diego</first><last>Mollá-Aliod</last></author>
+      <author id="cecile-paris"><first>Cécile</first><last>Paris</last></author>
       <pages>712–718</pages>
       <url hash="cc38b438">I13-1084</url>
       <bibkey>sarker-etal-2013-automatic</bibkey>
@@ -815,15 +815,15 @@
       <author><first>Dang Hai</first><last>Tran</last></author>
       <author><first>Cuong Xuan</first><last>Chu</last></author>
       <author><first>Son Bao</first><last>Pham</last></author>
-      <author><first>Minh Le</first><last>Nguyen</last></author>
+      <author id="minh-le-nguyen"><first>Minh Le</first><last>Nguyen</last></author>
       <pages>740–746</pages>
       <url hash="07cf2b63">I13-1088</url>
       <bibkey>tran-etal-2013-learning</bibkey>
     </paper>
     <paper id="89">
       <title>Detecting Bot-Answerable Questions in <fixed-case>U</fixed-case>buntu Chat</title>
-      <author><first>David</first><last>Uthus</last></author>
-      <author><first>David</first><last>Aha</last></author>
+      <author id="david-c-uthus"><first>David</first><last>Uthus</last></author>
+      <author id="david-w-aha"><first>David</first><last>Aha</last></author>
       <pages>747–752</pages>
       <url hash="c2c261ec">I13-1089</url>
       <bibkey>uthus-aha-2013-detecting</bibkey>
@@ -846,7 +846,7 @@
     <paper id="92">
       <title>On the Effectiveness of Using Syntactic and Shallow Semantic Tree Kernels for Automatic Assessment of Essays</title>
       <author><first>Yllias</first><last>Chali</last></author>
-      <author><first>Sadid A.</first><last>Hasan</last></author>
+      <author id="sadid-a-hasan"><first>Sadid A.</first><last>Hasan</last></author>
       <pages>767–773</pages>
       <url hash="98acaf4e">I13-1092</url>
       <bibkey>chali-hasan-2013-effectiveness</bibkey>
@@ -854,7 +854,7 @@
     <paper id="93">
       <title>Little by Little: Semi Supervised Stemming through Stem Set Minimization</title>
       <author><first>Vasudevan</first><last>N</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>774–780</pages>
       <url hash="9c62d7ae">I13-1093</url>
       <bibkey>n-bhattacharyya-2013-little</bibkey>
@@ -863,7 +863,7 @@
       <title>What Information is Helpful for Dependency Based Semantic Role Labeling</title>
       <author><first>Yanyan</first><last>Luo</last></author>
       <author><first>Kevin</first><last>Duh</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>781–787</pages>
       <url hash="51a9c083">I13-1094</url>
       <bibkey>luo-etal-2013-information</bibkey>
@@ -871,7 +871,7 @@
     <paper id="95">
       <title>Classifying Taxonomic Relations between Pairs of <fixed-case>W</fixed-case>ikipedia Articles</title>
       <author><first>Or</first><last>Biran</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>788–794</pages>
       <url hash="de45869d">I13-1095</url>
       <bibkey>biran-mckeown-2013-classifying</bibkey>
@@ -887,7 +887,7 @@
     <paper id="97">
       <title>Financial Sentiment Analysis for Risk Prediction</title>
       <author><first>Chuan-Ju</first><last>Wang</last></author>
-      <author><first>Ming-Feng</first><last>Tsai</last></author>
+      <author id="ming-feng-tsai"><first>Ming-Feng</first><last>Tsai</last></author>
       <author><first>Tse</first><last>Liu</last></author>
       <author><first>Chin-Ting</first><last>Chang</last></author>
       <pages>802–808</pages>
@@ -899,18 +899,18 @@
       <author><first>Minh-Quoc</first><last>Nghiem</last></author>
       <author><first>Giovanni Yoko</first><last>Kristianto</last></author>
       <author><first>Goran</first><last>Topić</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>809–814</pages>
       <url hash="169c0d7b">I13-1098</url>
       <bibkey>nghiem-etal-2013-sense</bibkey>
     </paper>
     <paper id="99">
       <title>Adapting a State-of-the-art Anaphora Resolution System for Resource-poor Language</title>
-      <author><first>Utpal</first><last>Sikdar</last></author>
+      <author id="utpal-kumar-sikdar"><first>Utpal</first><last>Sikdar</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
       <author><first>Sriparna</first><last>Saha</last></author>
       <author><first>Olga</first><last>Uryupina</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>815–821</pages>
       <url hash="0d200a12">I13-1099</url>
       <bibkey>sikdar-etal-2013-adapting</bibkey>
@@ -942,8 +942,8 @@
       <title>Translating <fixed-case>C</fixed-case>hinese Unknown Words by Automatically Acquired Templates</title>
       <author><first>Ming-Hong</first><last>Bai</last></author>
       <author><first>Yu-Ming</first><last>Hsieh</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>839–843</pages>
       <url hash="eab0586e">I13-1103</url>
       <bibkey>bai-etal-2013-translating</bibkey>
@@ -951,8 +951,8 @@
     <paper id="104">
       <title>Multilingual Lexicon Bootstrapping - Improving a Lexicon Induction System Using a Parallel Corpus</title>
       <author><first>Patrick</first><last>Ziering</last></author>
-      <author><first>Lonneke</first><last>van der Plas</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="lonneke-van-der-plas"><first>Lonneke</first><last>van der Plas</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>844–848</pages>
       <url hash="69f35140">I13-1104</url>
       <bibkey>ziering-etal-2013-multilingual</bibkey>
@@ -967,7 +967,7 @@
     <paper id="106">
       <title>A Factoid Question Answering System Using Answer Pattern Matching</title>
       <author><first>Nagehan</first><last>Pala Er</last></author>
-      <author><first>Ilyas</first><last>Cicekli</last></author>
+      <author id="ilyas-cicekli"><first>Ilyas</first><last>Cicekli</last></author>
       <pages>854–858</pages>
       <url hash="2ef30c4a">I13-1106</url>
       <bibkey>pala-er-cicekli-2013-factoid</bibkey>
@@ -988,7 +988,7 @@
       <author><first>Youngsam</first><last>Kim</last></author>
       <author><first>Munhyong</first><last>Kim</last></author>
       <author><first>Andrew</first><last>Cattle</last></author>
-      <author><first>Julia</first><last>Otmakhova</last></author>
+      <author id="julia-otmakhova"><first>Julia</first><last>Otmakhova</last></author>
       <author><first>Suzi</first><last>Park</last></author>
       <author><first>Hyopil</first><last>Shin</last></author>
       <pages>864–868</pages>
@@ -1037,7 +1037,7 @@
     <paper id="113">
       <title>An Empirical Study of Combing Multiple Models in <fixed-case>B</fixed-case>engali Question Classification</title>
       <author><first>Somnath</first><last>Banerjee</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>892–896</pages>
       <url hash="7d7a851a">I13-1113</url>
       <bibkey>banerjee-bandyopadhyay-2013-empirical</bibkey>
@@ -1054,7 +1054,7 @@
     <paper id="115">
       <title>Exploiting User Search Sessions for the Semantic Categorization of Question-like Informational Search Queries</title>
       <author><first>Alejandro</first><last>Figueroa</last></author>
-      <author><first>Guenter</first><last>Neumann</last></author>
+      <author id="gunter-neumann"><first>Guenter</first><last>Neumann</last></author>
       <pages>902–906</pages>
       <url hash="df5fb95d">I13-1115</url>
       <bibkey>figueroa-neumann-2013-exploiting</bibkey>
@@ -1072,7 +1072,7 @@
     </paper>
     <paper id="117">
       <title>Interest Analysis using <fixed-case>P</fixed-case>age<fixed-case>R</fixed-case>ank and Social Interaction Content</title>
-      <author><first>Chung-chi</first><last>Huang</last></author>
+      <author id="chung-chi-huang"><first>Chung-chi</first><last>Huang</last></author>
       <author><first>Lun-Wei</first><last>Ku</last></author>
       <pages>912–916</pages>
       <url hash="a5cb036f">I13-1117</url>
@@ -1102,7 +1102,7 @@
     <paper id="120">
       <title>An Approach of Hybrid Hierarchical Structure for Word Similarity Computing by <fixed-case>H</fixed-case>ow<fixed-case>N</fixed-case>et</title>
       <author><first>Jiangming</first><last>Liu</last></author>
-      <author><first>Jinan</first><last>Xu</last></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last></author>
       <author><first>Yujie</first><last>Zhang</last></author>
       <pages>927–931</pages>
       <url hash="a90903f0">I13-1120</url>
@@ -1119,16 +1119,16 @@
     <paper id="122">
       <title>Automated Grammar Correction Using Hierarchical Phrase-Based Statistical Machine Translation</title>
       <author><first>Bibek</first><last>Behera</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>937–941</pages>
       <url hash="caf1fae5">I13-1122</url>
       <bibkey>behera-bhattacharyya-2013-automated</bibkey>
     </paper>
     <paper id="123">
       <title>Finding Dependency Parsing Limits over a Large <fixed-case>S</fixed-case>panish Corpus</title>
-      <author><first>Muntsa</first><last>Padró</last></author>
+      <author id="muntsa-padro"><first>Muntsa</first><last>Padró</last></author>
       <author><first>Miguel</first><last>Ballesteros</last></author>
-      <author><first>Héctor</first><last>Martínez</last></author>
+      <author id="hector-martinez-alonso"><first>Héctor</first><last>Martínez</last></author>
       <author><first>Bernd</first><last>Bohnet</last></author>
       <pages>942–946</pages>
       <url hash="549ce5d1">I13-1123</url>
@@ -1147,7 +1147,7 @@
       <title>Building Specialized Bilingual Lexicons Using Word Sense Disambiguation</title>
       <author><first>Dhouha</first><last>Bouamor</last></author>
       <author><first>Nasredine</first><last>Semmar</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <pages>952–956</pages>
       <url hash="9280c828">I13-1125</url>
       <bibkey>bouamor-etal-2013-building-specialized</bibkey>
@@ -1166,7 +1166,7 @@
       <author><first>Koichiro</first><last>Yoshino</last></author>
       <author><first>Shinji</first><last>Watanabe</last></author>
       <author><first>Jonathan</first><last>Le Roux</last></author>
-      <author><first>John R.</first><last>Hershey</last></author>
+      <author id="john-r-hershey"><first>John R.</first><last>Hershey</last></author>
       <pages>962–966</pages>
       <url hash="aca61eb3">I13-1127</url>
       <bibkey>yoshino-etal-2013-statistical</bibkey>
@@ -1174,9 +1174,9 @@
     <paper id="128">
       <title>Repairing Incorrect Translation with Examples</title>
       <author><first>Junguo</first><last>Zhu</last></author>
-      <author><first>Muyun</first><last>Yang</last></author>
+      <author id="muyun-yang"><first>Muyun</first><last>Yang</last></author>
       <author><first>Sheng</first><last>Li</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <pages>967–971</pages>
       <url hash="f4bf44b1">I13-1128</url>
       <bibkey>zhu-etal-2013-repairing</bibkey>
@@ -1196,16 +1196,16 @@
       <title>A Hybrid Approach for Anaphora Resolution in <fixed-case>H</fixed-case>indi</title>
       <author><first>Praveen</first><last>Dakwale</last></author>
       <author><first>Vandan</first><last>Mujadia</last></author>
-      <author><first>Dipti M</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti M</first><last>Sharma</last></author>
       <pages>977–981</pages>
       <url hash="11345b4c">I13-1130</url>
       <bibkey>dakwale-etal-2013-hybrid</bibkey>
     </paper>
     <paper id="131">
       <title>Structure Cognizant Pseudo Relevance Feedback</title>
-      <author><first>Arjun</first><last>Atreya V</last></author>
+      <author id="arjun-atreya-v"><first>Arjun</first><last>Atreya V</last></author>
       <author><first>Yogesh</first><last>Kakde</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <author><first>Ganesh</first><last>Ramakrishnan</last></author>
       <pages>982–986</pages>
       <url hash="d3af819b">I13-1131</url>
@@ -1213,8 +1213,8 @@
     </paper>
     <paper id="132">
       <title>Cross-Domain Answer Ranking using Importance Sampling</title>
-      <author><first>Anders</first><last>Johannsen</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-johannsen"><first>Anders</first><last>Johannsen</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>987–991</pages>
       <url hash="ed26265b">I13-1132</url>
       <bibkey>johannsen-sogaard-2013-cross</bibkey>
@@ -1222,16 +1222,16 @@
     <paper id="133">
       <title>Morphological Analysis of <fixed-case>T</fixed-case>unisian Dialect</title>
       <author><first>Inès</first><last>Zribi</last></author>
-      <author><first>Mariem</first><last>Ellouze Khemakhem</last></author>
-      <author><first>Lamia</first><last>Hadrich Belguith</last></author>
+      <author id="mariem-ellouze-khemekhem"><first>Mariem</first><last>Ellouze Khemakhem</last></author>
+      <author id="lamia-hadrich-belguith"><first>Lamia</first><last>Hadrich Belguith</last></author>
       <pages>992–996</pages>
       <url hash="a8d0bf72">I13-1133</url>
       <bibkey>zribi-etal-2013-morphological</bibkey>
     </paper>
     <paper id="134">
       <title>Disambiguating Explicit Discourse Connectives without Oracles</title>
-      <author><first>Anders</first><last>Johannsen</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-johannsen"><first>Anders</first><last>Johannsen</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>997–1001</pages>
       <url hash="a54e79c6">I13-1134</url>
       <bibkey>johannsen-sogaard-2013-disambiguating</bibkey>
@@ -1246,8 +1246,8 @@
     </paper>
     <paper id="136">
       <title>Statistical Morphological Analyzer for <fixed-case>H</fixed-case>indi</title>
-      <author><first>Deepak Kumar</first><last>Malladi</last></author>
-      <author><first>Prashanth</first><last>Mannem</last></author>
+      <author id="deepak-kumar-malladi"><first>Deepak Kumar</first><last>Malladi</last></author>
+      <author id="prashanth-mannem"><first>Prashanth</first><last>Mannem</last></author>
       <pages>1007–1011</pages>
       <url hash="cdce2475">I13-1136</url>
       <bibkey>malladi-mannem-2013-statistical</bibkey>
@@ -1255,15 +1255,15 @@
     <paper id="137">
       <title>Induction of Root and Pattern Lexicon for Unsupervised Morphological Analysis of <fixed-case>A</fixed-case>rabic</title>
       <author><first>Bilal</first><last>Khaliq</last></author>
-      <author><first>John</first><last>Carroll</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
       <pages>1012–1016</pages>
       <url hash="9010581b">I13-1137</url>
       <bibkey>khaliq-carroll-2013-induction</bibkey>
     </paper>
     <paper id="138">
       <title>Using Shallow Semantic Parsing and Relation Extraction for Finding Contradiction in Text</title>
-      <author><first>Minh Quang Nhat</first><last>Pham</last></author>
-      <author><first>Minh Le</first><last>Nguyen</last></author>
+      <author id="minh-quang-nhat-pham"><first>Minh Quang Nhat</first><last>Pham</last></author>
+      <author id="minh-le-nguyen"><first>Minh Le</first><last>Nguyen</last></author>
       <author><first>Akira</first><last>Shimazu</last></author>
       <pages>1017–1021</pages>
       <url hash="af936415">I13-1138</url>
@@ -1272,7 +1272,7 @@
     <paper id="139">
       <title>Using Transliteration of Proper Names from <fixed-case>A</fixed-case>rabic to <fixed-case>L</fixed-case>atin Script to Improve <fixed-case>E</fixed-case>nglish-<fixed-case>A</fixed-case>rabic Word Alignment</title>
       <author><first>Nasredine</first><last>Semmar</last></author>
-      <author><first>Houda</first><last>Saadane</last></author>
+      <author id="houda-saadane"><first>Houda</first><last>Saadane</last></author>
       <pages>1022–1026</pages>
       <url hash="927a8df3">I13-1139</url>
       <bibkey>semmar-saadane-2013-using</bibkey>
@@ -1290,8 +1290,8 @@
       <title>Incremental Segmentation and Decoding Strategies for Simultaneous Translation</title>
       <author><first>Mahsa</first><last>Yarmohammadi</last></author>
       <author><first>Vivek Kumar</first><last>Rangarajan Sridhar</last></author>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
-      <author><first>Baskaran</first><last>Sankaran</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="baskaran-sankaran"><first>Baskaran</first><last>Sankaran</last></author>
       <pages>1032–1036</pages>
       <url hash="8fa6d926">I13-1141</url>
       <bibkey>yarmohammadi-etal-2013-incremental</bibkey>
@@ -1299,7 +1299,7 @@
     <paper id="142">
       <title>Two Case Studies on Translating Pronouns in a Deep Syntax Framework</title>
       <author><first>Michal</first><last>Novák</last></author>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
       <author><first>Anna</first><last>Nedoluzhko</last></author>
       <pages>1037–1041</pages>
       <url hash="303cbde3">I13-1142</url>
@@ -1308,7 +1308,7 @@
     <paper id="143">
       <title>Bootstrapping Phrase-based Statistical Machine Translation via <fixed-case>WSD</fixed-case> Integration</title>
       <author><first>Hien</first><last>Vu Huy</last></author>
-      <author><first>Phuong-Thai</first><last>Nguyen</last></author>
+      <author id="phuong-thai-nguyen"><first>Phuong-Thai</first><last>Nguyen</last></author>
       <author><first>Tung-Lam</first><last>Nguyen</last></author>
       <author id="minh-le-nguyen"><first>M.L</first><last>Nguyen</last></author>
       <pages>1042–1046</pages>
@@ -1326,7 +1326,7 @@
     </paper>
     <paper id="145">
       <title>Interoperability between Service Composition and Processing Pipeline: Case Study on the Language Grid and <fixed-case>UIMA</fixed-case></title>
-      <author><first>Trang</first><last>Mai Xuan</last></author>
+      <author id="trang-mai-xuan"><first>Trang</first><last>Mai Xuan</last></author>
       <author><first>Yohei</first><last>Murakami</last></author>
       <author><first>Donghui</first><last>Lin</last></author>
       <author><first>Toru</first><last>Ishida</last></author>
@@ -1380,7 +1380,7 @@
       <title>Synonym Acquisition Using Bilingual Comparable Corpora</title>
       <author><first>Daniel</first><last>Andrade</last></author>
       <author><first>Masaaki</first><last>Tsuchida</last></author>
-      <author><first>Takashi</first><last>Onishi</last></author>
+      <author id="takashi-onishi"><first>Takashi</first><last>Onishi</last></author>
       <author><first>Kai</first><last>Ishikawa</last></author>
       <pages>1077–1081</pages>
       <url hash="473e16d9">I13-1150</url>
@@ -1390,7 +1390,7 @@
       <title>Exploring Verb Frames for Sentence Simplification in <fixed-case>H</fixed-case>indi</title>
       <author><first>Ankush</first><last>Soni</last></author>
       <author><first>Sambhav</first><last>Jain</last></author>
-      <author><first>Dipti</first><last>Misra Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti</first><last>Misra Sharma</last></author>
       <pages>1082–1086</pages>
       <url hash="b2e7921f">I13-1151</url>
       <bibkey>soni-etal-2013-exploring</bibkey>
@@ -1405,9 +1405,9 @@
     </paper>
     <paper id="153">
       <title>Parser Accuracy in Quality Estimation of Machine Translation: A Tree Kernel Approach</title>
-      <author><first>Rasoul</first><last>Samad Zadeh Kaljahi</last></author>
+      <author id="rasoul-samad-zadeh-kaljahi"><first>Rasoul</first><last>Samad Zadeh Kaljahi</last></author>
       <author><first>Jennifer</first><last>Foster</last></author>
-      <author><first>Raphael</first><last>Rubino</last></author>
+      <author id="raphael-rubino"><first>Raphael</first><last>Rubino</last></author>
       <author><first>Johann</first><last>Roturier</last></author>
       <author><first>Fred</first><last>Hollowood</last></author>
       <pages>1092–1096</pages>
@@ -1416,7 +1416,7 @@
     </paper>
     <paper id="154">
       <title>Attribute Relation Extraction from Template-inconsistent Semi-structured Text by Leveraging Site-level Knowledge</title>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <author><first>Fang</first><last>Liu</last></author>
       <author><first>Siwei</first><last>Lai</last></author>
       <author><first>Kang</first><last>Liu</last></author>
@@ -1456,7 +1456,7 @@
       <author><first>How</first><last>Jing</last></author>
       <author><first>Yu</first><last>Tsao</last></author>
       <author><first>Kuan-Yu</first><last>Chen</last></author>
-      <author><first>Hsin-Min</first><last>Wang</last></author>
+      <author id="hsin-min-wang"><first>Hsin-Min</first><last>Wang</last></author>
       <pages>1117–1123</pages>
       <url hash="480586a6">I13-1158</url>
       <bibkey>jing-etal-2013-semantic</bibkey>
@@ -1464,7 +1464,7 @@
     <paper id="159">
       <title>Cluster-based Web Summarization</title>
       <author><first>Yves</first><last>Petinot</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <author><first>Kapil</first><last>Thadani</last></author>
       <pages>1124–1128</pages>
       <url hash="8520667d">I13-1159</url>
@@ -1474,7 +1474,7 @@
       <title>Automated Activity Recognition in Clinical Documents</title>
       <author><first>Camilo</first><last>Thorne</last></author>
       <author><first>Marco</first><last>Montali</last></author>
-      <author><first>Diego</first><last>Calvanese</last></author>
+      <author id="diego-calvanese"><first>Diego</first><last>Calvanese</last></author>
       <author><first>Elena</first><last>Cardillo</last></author>
       <author><first>Claudio</first><last>Eccher</last></author>
       <pages>1129–1133</pages>
@@ -1495,7 +1495,7 @@
       <author><first>Bingyang</first><last>Liu</last></author>
       <author><first>Dayong</first><last>Wu</last></author>
       <author><first>Yue</first><last>Liu</last></author>
-      <author><first>Xueqi</first><last>Cheng</last></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last></author>
       <pages>1139–1143</pages>
       <url hash="da7aae9f">I13-1162</url>
       <bibkey>liu-etal-2013-self</bibkey>
@@ -1529,9 +1529,9 @@
     </paper>
     <paper id="166">
       <title>Estimating the Quality of Translated User-Generated Content</title>
-      <author><first>Raphael</first><last>Rubino</last></author>
+      <author id="raphael-rubino"><first>Raphael</first><last>Rubino</last></author>
       <author><first>Jennifer</first><last>Foster</last></author>
-      <author><first>Rasoul</first><last>Samad Zadeh Kaljahi</last></author>
+      <author id="rasoul-samad-zadeh-kaljahi"><first>Rasoul</first><last>Samad Zadeh Kaljahi</last></author>
       <author><first>Johann</first><last>Roturier</last></author>
       <author><first>Fred</first><last>Hollowood</last></author>
       <pages>1167–1173</pages>
@@ -1552,7 +1552,7 @@
     <paper id="168">
       <title>Multiword Expressions in the Context of Statistical Machine Translation</title>
       <author><first>Mahmoud</first><last>Ghoneim</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>1181–1187</pages>
       <url hash="f88bd382">I13-1168</url>
       <bibkey>ghoneim-diab-2013-multiword</bibkey>
@@ -1577,7 +1577,7 @@
       <title>Automatic Extraction of Social Networks from Literary Text: A Case Study on Alice in Wonderland</title>
       <author><first>Apoorv</first><last>Agarwal</last></author>
       <author><first>Anup</first><last>Kotalwar</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>1202–1208</pages>
       <url hash="4830de3f">I13-1171</url>
       <bibkey>agarwal-etal-2013-automatic</bibkey>
@@ -1603,7 +1603,7 @@
     <paper id="174">
       <title>Iterative Development and Evaluation of a Social Conversational Agent</title>
       <author><first>Annika</first><last>Silvervarg</last></author>
-      <author><first>Arne</first><last>Jönsson</last></author>
+      <author id="arne-jonsson"><first>Arne</first><last>Jönsson</last></author>
       <pages>1223–1229</pages>
       <url hash="62141ca0">I13-1174</url>
       <bibkey>silvervarg-jonsson-2013-iterative</bibkey>
@@ -1611,7 +1611,7 @@
     <paper id="175">
       <title>A Hybrid Morphological Disambiguation System for <fixed-case>T</fixed-case>urkish</title>
       <author><first>Mucahid</first><last>Kutlu</last></author>
-      <author><first>Ilyas</first><last>Cicekli</last></author>
+      <author id="ilyas-cicekli"><first>Ilyas</first><last>Cicekli</last></author>
       <pages>1230–1236</pages>
       <url hash="650970da">I13-1175</url>
       <bibkey>kutlu-cicekli-2013-hybrid</bibkey>
@@ -1626,7 +1626,7 @@
     </paper>
     <paper id="177">
       <title>Increasing the Quality and Quantity of Source Language Data for Unsupervised Cross-Lingual <fixed-case>POS</fixed-case> Tagging</title>
-      <author><first>Long</first><last>Duong</last></author>
+      <author id="long-duong"><first>Long</first><last>Duong</last></author>
       <author><first>Paul</first><last>Cook</last></author>
       <author><first>Steven</first><last>Bird</last></author>
       <author><first>Pavel</first><last>Pecina</last></author>
@@ -1655,16 +1655,16 @@
       <title>Named Entity Extraction using Information Distance</title>
       <author><first>Sangameshwar</first><last>Patil</last></author>
       <author><first>Sachin</first><last>Pawar</last></author>
-      <author><first>Girish</first><last>Palshikar</last></author>
+      <author id="girish-palshikar"><first>Girish</first><last>Palshikar</last></author>
       <pages>1264–1270</pages>
       <url hash="25d7e2b0">I13-1180</url>
       <bibkey>patil-etal-2013-named</bibkey>
     </paper>
     <paper id="181">
       <title>Feature-based Neural Language Model and <fixed-case>C</fixed-case>hinese Word Segmentation</title>
-      <author><first>Mairgup</first><last>Mansur</last></author>
+      <author id="mairgup-mansur"><first>Mairgup</first><last>Mansur</last></author>
       <author><first>Wenzhe</first><last>Pei</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <pages>1271–1277</pages>
       <url hash="9d3a619d">I13-1181</url>
       <bibkey>mansur-etal-2013-feature</bibkey>
@@ -1682,7 +1682,7 @@
     <paper id="183">
       <title>Effect of Non-linear Deep Architecture in Sequence Labeling</title>
       <author><first>Mengqiu</first><last>Wang</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>1285–1291</pages>
       <url hash="1441e5e4">I13-1183</url>
       <bibkey>wang-manning-2013-effect</bibkey>
@@ -1696,7 +1696,7 @@
     </paper>
     <paper id="185">
       <title>Source and Translation Classification using Most Frequent Words</title>
-      <author><first>Zahurul</first><last>Islam</last></author>
+      <author id="zahurul-islam"><first>Zahurul</first><last>Islam</last></author>
       <author><first>Armin</first><last>Hoenen</last></author>
       <pages>1299–1305</pages>
       <url hash="72d31646">I13-1185</url>
@@ -1723,8 +1723,8 @@
     <paper id="188">
       <title>Bootstrapping Semantic Lexicons for Technical Domains</title>
       <author><first>Patrick</first><last>Ziering</last></author>
-      <author><first>Lonneke</first><last>van der Plas</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="lonneke-van-der-plas"><first>Lonneke</first><last>van der Plas</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>1321–1329</pages>
       <url hash="dc2476da">I13-1188</url>
       <bibkey>ziering-etal-2013-bootstrapping</bibkey>
@@ -1733,7 +1733,7 @@
       <title>Long-Distance Time-Event Relation Extraction</title>
       <author><first>Alessandro</first><last>Moschitti</last></author>
       <author><first>Siddharth</first><last>Patwardhan</last></author>
-      <author><first>Chris</first><last>Welty</last></author>
+      <author id="chris-welty"><first>Chris</first><last>Welty</last></author>
       <pages>1330–1338</pages>
       <url hash="bb7e935a">I13-1189</url>
       <bibkey>moschitti-etal-2013-long</bibkey>
@@ -1784,7 +1784,7 @@
     <paper id="195">
       <title>Diagnosing Causes of Reading Difficulty using <fixed-case>B</fixed-case>ayesian Networks</title>
       <author><first>Pascual</first><last>Martínez-Gómez</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>1383–1391</pages>
       <url hash="607c2f66">I13-1195</url>
       <bibkey>martinez-gomez-aizawa-2013-diagnosing</bibkey>
@@ -1808,7 +1808,7 @@
     <paper id="198">
       <title>Supervised Sentence Fusion with Single-Stage Inference</title>
       <author><first>Kapil</first><last>Thadani</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>1410–1418</pages>
       <url hash="296f24d1">I13-1198</url>
       <bibkey>thadani-mckeown-2013-supervised</bibkey>
@@ -1887,12 +1887,12 @@
       <title><fixed-case>DIRA</fixed-case>: Dialectal <fixed-case>A</fixed-case>rabic Information Retrieval Assistant</title>
       <author><first>Arfath</first><last>Pasha</last></author>
       <author><first>Mohammad</first><last>Al-Badrashiny</last></author>
-      <author><first>Mohamed</first><last>Altantawy</last></author>
+      <author id="mohamed-altantawy"><first>Mohamed</first><last>Altantawy</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
       <author><first>Manoj</first><last>Pooleery</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
-      <author><first>Ryan</first><last>M. Roth</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
+      <author id="ryan-roth"><first>Ryan</first><last>M. Roth</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>13–16</pages>
       <url hash="4e83471c">I13-2004</url>
       <bibkey>pasha-etal-2013-dira</bibkey>
@@ -1900,7 +1900,7 @@
     <paper id="5">
       <title>Keyphrase-Driven Document Visualization Tool</title>
       <author><first>Gábor</first><last>Berend</last></author>
-      <author><first>Richárd</first><last>Farkas</last></author>
+      <author id="richard-farkas"><first>Richárd</first><last>Farkas</last></author>
       <pages>17–20</pages>
       <url hash="1914c18b">I13-2005</url>
       <bibkey>berend-farkas-2013-keyphrase</bibkey>
@@ -1910,7 +1910,7 @@
       <author><first>Aditya</first><last>Joshi</last></author>
       <author><first>Kashyap</first><last>Popat</last></author>
       <author><first>Shubham</first><last>Gautam</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>21–24</pages>
       <url hash="49790384">I13-2006</url>
       <bibkey>joshi-etal-2013-making</bibkey>
@@ -1940,7 +1940,7 @@
       <author><first>Apoorv</first><last>Agarwal</last></author>
       <author><first>Anup</first><last>Kotalwar</last></author>
       <author><first>Jiehan</first><last>Zheng</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>33–36</pages>
       <url hash="3eb39a3f">I13-2009</url>
       <bibkey>agarwal-etal-2013-sinnet</bibkey>
@@ -1957,7 +1957,7 @@
       <title><fixed-case>T</fixed-case>muse: Lexical Network Exploration</title>
       <author><first>Yannick</first><last>Chudy</last></author>
       <author><first>Yann</first><last>Desalle</last></author>
-      <author><first>Benoît</first><last>Gaillard</last></author>
+      <author id="benoit-gaillard"><first>Benoît</first><last>Gaillard</last></author>
       <author><first>Bruno</first><last>Gaume</last></author>
       <author><first>Pierre</first><last>Magistry</last></author>
       <author><first>Emmanuel</first><last>Navarro</last></author>
diff --git a/data/xml/I17.xml b/data/xml/I17.xml
index efa4b29da9..4bd56158d2 100644
--- a/data/xml/I17.xml
+++ b/data/xml/I17.xml
@@ -19,11 +19,11 @@
     <paper id="1">
       <title>Evaluating Layers of Representation in Neural Machine Translation on Part-of-Speech and Semantic Tagging Tasks</title>
       <author><first>Yonatan</first><last>Belinkov</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <author><first>Hassan</first><last>Sajjad</last></author>
       <author><first>Nadir</first><last>Durrani</last></author>
       <author><first>Fahim</first><last>Dalvi</last></author>
-      <author><first>James</first><last>Glass</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
       <pages>1–10</pages>
       <url hash="b75729e3">I17-1001</url>
       <abstract>While neural machine translation (NMT) models provide improved translation quality in an elegant framework, it is less clear what they learn about language. Recent work has started evaluating the quality of vector representations learned by NMT models on morphological and syntactic tasks. In this paper, we investigate the representations learned at different layers of NMT encoders. We train NMT systems on parallel data and use the models to extract features for training a classifier on two tasks: part-of-speech and semantic tagging. We then measure the performance of the classifier as a proxy to the quality of the original NMT model for the given task. Our quantitative analysis yields interesting insights regarding representation learning in NMT models. For instance, we find that higher layers are better at learning semantics while lower layers tend to be better for part-of-speech tagging. We also observe little effect of the target language on source-side representations, especially in higher quality models.</abstract>
@@ -34,8 +34,8 @@
       <author><first>Kehai</first><last>Chen</last></author>
       <author><first>Rui</first><last>Wang</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <pages>11–20</pages>
       <url hash="c927df95">I17-1002</url>
       <abstract>In Neural Machine Translation (NMT), each word is represented as a low-dimension, real-value vector for encoding its syntax and semantic information. This means that even if the word is in a different sentence context, it is represented as the fixed vector to learn source representation. Moreover, a large number of Out-Of-Vocabulary (OOV) words, which have different syntax and semantic information, are represented as the same vector representation of “unk”. To alleviate this problem, we propose a novel context-aware smoothing method to dynamically learn a sentence-specific vector for each word (including OOV words) depending on its local context words in a sentence. The learned context-aware representation is integrated into the NMT to improve the translation performance. Empirical results on NIST Chinese-to-English translation task show that the proposed approach achieves 1.78 BLEU improvements on average over a strong attentional NMT, and outperforms some existing systems.</abstract>
@@ -46,7 +46,7 @@
       <author><first>An</first><last>Nguyen Le</last></author>
       <author><first>Ander</first><last>Martinez</last></author>
       <author><first>Akifumi</first><last>Yoshimoto</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>21–29</pages>
       <url hash="483dd2b0">I17-1003</url>
       <attachment type="dataset" hash="4cea0309">I17-1003.Datasets.zip</attachment>
@@ -87,7 +87,7 @@
     <paper id="7">
       <title>Neural Probabilistic Model for Non-projective <fixed-case>MST</fixed-case> Parsing</title>
       <author><first>Xuezhe</first><last>Ma</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>59–69</pages>
       <url hash="2bee5cf3">I17-1007</url>
       <abstract>In this paper, we propose a probabilistic parsing model that defines a proper conditional probability distribution over non-projective dependency trees for a given sentence, using neural representations as inputs. The neural network architecture is based on bi-directional LSTMCNNs, which automatically benefits from both word- and character-level representations, by using a combination of bidirectional LSTMs and CNNs. On top of the neural network, we introduce a probabilistic structured layer, defining a conditional log-linear model over non-projective trees. By exploiting Kirchhoff’s Matrix-Tree Theorem (Tutte, 1984), the partition functions and marginals can be computed efficiently, leading to a straightforward end-to-end model training procedure via back-propagation. We evaluate our model on 17 different datasets, across 14 different languages. Our parser achieves state-of-the-art parsing performance on nine datasets.</abstract>
@@ -116,7 +116,7 @@
       <title>Improving Implicit Semantic Role Labeling by Predicting Semantic Frame Arguments</title>
       <author><first>Quynh Ngoc Thi</first><last>Do</last></author>
       <author><first>Steven</first><last>Bethard</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>90–99</pages>
       <url hash="d45413b4">I17-1010</url>
       <abstract>Implicit semantic role labeling (iSRL) is the task of predicting the semantic roles of a predicate that do not appear as explicit arguments, but rather regard common sense knowledge or are mentioned earlier in the discourse. We introduce an approach to iSRL based on a predictive recurrent neural semantic frame model (PRNSFM) that uses a large unannotated corpus to learn the probability of a sequence of semantic arguments given a predicate. We leverage the sequence probabilities predicted by the PRNSFM to estimate selectional preferences for predicates and their arguments. On the NomBank iSRL test set, our approach improves state-of-the-art performance on implicit semantic role labeling with less reliance than prior work on manually constructed language resources.</abstract>
@@ -165,7 +165,7 @@
       <author><first>Nadir</first><last>Durrani</last></author>
       <author><first>Hassan</first><last>Sajjad</last></author>
       <author><first>Yonatan</first><last>Belinkov</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>142–151</pages>
       <url hash="b39c6bb0">I17-1015</url>
       <abstract>End-to-end training makes the neural machine translation (NMT) architecture simpler, yet elegant compared to traditional statistical machine translation (SMT). However, little is known about linguistic patterns of morphology, syntax and semantics learned during the training of NMT systems, and more importantly, which parts of the architecture are responsible for learning each of these phenomenon. In this paper we i) analyze how much morphology an NMT decoder learns, and ii) investigate whether injecting target morphology in the decoder helps it to produce better translations. To this end we present three methods: i) simultaneous translation, ii) joint-data learning, and iii) multi-task learning. Our results show that explicit morphological information helps the decoder learn target language morphology and improves the translation quality by 0.2–0.6 BLEU points.</abstract>
@@ -175,7 +175,7 @@
       <title>Improving Neural Machine Translation through Phrase-based Forced Decoding</title>
       <author><first>Jingyi</first><last>Zhang</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichro</first><last>Sumita</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
       <author><first>Satoshi</first><last>Nakamura</last></author>
       <pages>152–162</pages>
@@ -196,7 +196,7 @@
       <title>Character-based Joint Segmentation and <fixed-case>POS</fixed-case> Tagging for <fixed-case>C</fixed-case>hinese using Bidirectional <fixed-case>RNN</fixed-case>-<fixed-case>CRF</fixed-case></title>
       <author><first>Yan</first><last>Shao</last></author>
       <author><first>Christian</first><last>Hardmeier</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <author><first>Joakim</first><last>Nivre</last></author>
       <pages>173–183</pages>
       <url hash="b01cd2df">I17-1018</url>
@@ -239,7 +239,7 @@
     <paper id="22">
       <title>A Computational Study on Word Meanings and Their Distributed Representations via Polymodal Embedding</title>
       <author><first>Joohee</first><last>Park</last></author>
-      <author><first>Sung-hyon</first><last>Myaeng</last></author>
+      <author id="sung-hyon-myaeng"><first>Sung-hyon</first><last>Myaeng</last></author>
       <pages>214–223</pages>
       <url hash="45d8d869">I17-1022</url>
       <abstract>A distributed representation has become a popular approach to capturing a word meaning. Besides its success and practical value, however, questions arise about the relationships between a true word meaning and its distributed representation. In this paper, we examine such a relationship via polymodal embedding approach inspired by the theory that humans tend to use diverse sources in developing a word meaning. The result suggests that the existing embeddings lack in capturing certain aspects of word meanings which can be significantly improved by the polymodal approach. Also, we show distinct characteristics of different types of words (e.g. concreteness) via computational studies. Finally, we show our proposed embedding method outperforms the baselines in the word similarity measure tasks and the hypernym prediction tasks.</abstract>
@@ -280,7 +280,7 @@
     <paper id="26">
       <title>A Sensitivity Analysis of (and Practitioners’ Guide to) Convolutional Neural Networks for Sentence Classification</title>
       <author><first>Ye</first><last>Zhang</last></author>
-      <author><first>Byron</first><last>Wallace</last></author>
+      <author id="byron-c-wallace"><first>Byron</first><last>Wallace</last></author>
       <pages>253–263</pages>
       <url hash="75669d5a">I17-1026</url>
       <abstract>Convolutional Neural Networks (CNNs) have recently achieved remarkably strong performance on the practically important task of sentence classification (Kim, 2014; Kalchbrenner et al., 2014; Johnson and Zhang, 2014; Zhang et al., 2016). However, these models require practitioners to specify an exact model architecture and set accompanying hyperparameters, including the filter region size, regularization parameters, and so on. It is currently unknown how sensitive model performance is to changes in these configurations for the task of sentence classification. We thus conduct a sensitivity analysis of one-layer CNNs to explore the effect of architecture components on model performance; our aim is to distinguish between important and comparatively inconsequential design decisions for sentence classification. We focus on one-layer CNNs (to the exclusion of more complex models) due to their comparative simplicity and strong empirical performance, which makes it a modern standard baseline method akin to Support Vector Machine (SVMs) and logistic regression. We derive practical advice from our extensive empirical results for those interested in getting the most out of CNNs for sentence classification in real world settings.</abstract>
@@ -290,7 +290,7 @@
       <title>Coordination Boundary Identification with Similarity and Replaceability</title>
       <author><first>Hiroki</first><last>Teranishi</last></author>
       <author><first>Hiroyuki</first><last>Shindo</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>264–272</pages>
       <url hash="d83b363e">I17-1027</url>
       <abstract>We propose a neural network model for coordination boundary detection. Our method relies on the two common properties - similarity and replaceability in conjuncts - in order to detect both similar pairs of conjuncts and dissimilar pairs of conjuncts. The model improves identification of clause-level coordination using bidirectional RNNs incorporating two properties as features. We show that our model outperforms the existing state-of-the-art methods on the coordination annotated Penn Treebank and Genia corpus without any syntactic information from parsers.</abstract>
@@ -317,8 +317,8 @@
       <title>Learning How to Simplify From Explicit Labeling of Complex-Simplified Text Pairs</title>
       <author><first>Fernando</first><last>Alva-Manchego</last></author>
       <author><first>Joachim</first><last>Bingel</last></author>
-      <author><first>Gustavo</first><last>Paetzold</last></author>
-      <author><first>Carolina</first><last>Scarton</last></author>
+      <author id="gustavo-paetzold"><first>Gustavo</first><last>Paetzold</last></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>295–305</pages>
       <url hash="c80f7eba">I17-1030</url>
@@ -329,7 +329,7 @@
     <paper id="31">
       <title>Domain-Adaptable Hybrid Generation of <fixed-case>RDF</fixed-case> Entity Descriptions</title>
       <author><first>Or</first><last>Biran</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>306–315</pages>
       <url hash="9171b2ca">I17-1031</url>
       <abstract>RDF ontologies provide structured data on entities in many domains and continue to grow in size and diversity. While they can be useful as a starting point for generating descriptions of entities, they often miss important information about an entity that cannot be captured as simple relations. In addition, generic approaches to generation from RDF cannot capture the unique style and content of specific domains. We describe a framework for hybrid generation of entity descriptions, which combines generation from RDF data with text extracted from a corpus, and extracts unique aspects of the domain from the corpus to create domain-specific generation systems. We show that each component of our approach significantly increases the satisfaction of readers with the text across multiple applications and domains.</abstract>
@@ -339,7 +339,7 @@
       <title><fixed-case>ES</fixed-case>-<fixed-case>LDA</fixed-case>: Entity Summarization using Knowledge-based Topic Modeling</title>
       <author><first>Seyedamin</first><last>Pouriyeh</last></author>
       <author><first>Mehdi</first><last>Allahyari</last></author>
-      <author><first>Krzysztof</first><last>Kochut</last></author>
+      <author id="krzysztof-kochut"><first>Krzysztof</first><last>Kochut</last></author>
       <author><first>Gong</first><last>Cheng</last></author>
       <author><first>Hamid Reza</first><last>Arabnia</last></author>
       <pages>316–325</pages>
@@ -403,8 +403,8 @@
     </paper>
     <paper id="38">
       <title><fixed-case>NMT</fixed-case> or <fixed-case>SMT</fixed-case>: Case Study of a Narrow-domain <fixed-case>E</fixed-case>nglish-<fixed-case>L</fixed-case>atvian Post-editing Project</title>
-      <author><first>Inguna</first><last>Skadiņa</last></author>
-      <author><first>Mārcis</first><last>Pinnis</last></author>
+      <author id="inguna-skadina"><first>Inguna</first><last>Skadiņa</last></author>
+      <author id="marcis-pinnis"><first>Mārcis</first><last>Pinnis</last></author>
       <pages>373–383</pages>
       <url hash="019f3c50">I17-1038</url>
       <abstract>The recent technological shift in machine translation from statistical machine translation (SMT) to neural machine translation (NMT) raises the question of the strengths and weaknesses of NMT. In this paper, we present an analysis of NMT and SMT systems’ outputs from narrow domain English-Latvian MT systems that were trained on a rather small amount of data. We analyze post-edits produced by professional translators and manually annotated errors in these outputs. Analysis of post-edits allowed us to conclude that both approaches are comparably successful, allowing for an increase in translators’ productivity, with the NMT system showing slightly worse results. Through the analysis of annotated errors, we found that NMT translations are more fluent than SMT translations. However, errors related to accuracy, especially, mistranslation and omission errors, occur more often in NMT outputs. The word form errors, that characterize the morphological richness of Latvian, are frequent for both systems, but slightly fewer in NMT outputs.</abstract>
@@ -415,7 +415,7 @@
       <author><first>Yining</first><last>Wang</last></author>
       <author><first>Yang</first><last>Zhao</last></author>
       <author><first>Jiajun</first><last>Zhang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <author><first>Zhengshan</first><last>Xue</last></author>
       <pages>384–393</pages>
       <url hash="4e71e4f9">I17-1039</url>
@@ -426,7 +426,7 @@
       <title>Identifying Usage Expression Sentences in Consumer Product Reviews</title>
       <author><first>Shibamouli</first><last>Lahiri</last></author>
       <author><first>V.G.Vinod</first><last>Vydiswaran</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>394–403</pages>
       <url hash="6fea0b6b">I17-1040</url>
       <abstract>In this paper we introduce the problem of identifying usage expression sentences in a consumer product review. We create a human-annotated gold standard dataset of 565 reviews spanning five distinct product categories. Our dataset consists of more than 3,000 annotated sentences. We further introduce a classification system to label sentences according to whether or not they describe some “usage”. The system combines lexical, syntactic, and semantic features in a product-agnostic fashion to yield good classification performance. We show the effectiveness of our approach using importance ranking of features, error analysis, and cross-product classification experiments.</abstract>
@@ -444,7 +444,7 @@
     <paper id="42">
       <title><fixed-case>W</fixed-case>i<fixed-case>NER</fixed-case>: A <fixed-case>W</fixed-case>ikipedia Annotated Corpus for Named Entity Recognition</title>
       <author><first>Abbas</first><last>Ghaddar</last></author>
-      <author><first>Phillippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Phillippe</first><last>Langlais</last></author>
       <pages>413–422</pages>
       <url hash="ed511c47">I17-1042</url>
       <abstract>We revisit the idea of mining Wikipedia in order to generate named-entity annotations. We propose a new methodology that we applied to English Wikipedia to build WiNER, a large, high quality, annotated corpus. We evaluate its usefulness on 6 NER tasks, comparing 4 popular state-of-the art approaches. We show that LSTM-CRF is the approach that benefits the most from our corpus. We report impressive gains with this model when using a small portion of WiNER on top of the CONLL training material. Last, we propose a simple but efficient method for exploiting the full range of WiNER, leading to further improvements.</abstract>
@@ -473,9 +473,9 @@
     </paper>
     <paper id="45">
       <title>Attentive Language Models</title>
-      <author><first>Giancarlo</first><last>Salton</last></author>
+      <author id="giancarlo-salton"><first>Giancarlo</first><last>Salton</last></author>
       <author><first>Robert</first><last>Ross</last></author>
-      <author><first>John</first><last>Kelleher</last></author>
+      <author id="john-kelleher"><first>John</first><last>Kelleher</last></author>
       <pages>441–450</pages>
       <url hash="bfabee0d">I17-1045</url>
       <abstract>In this paper, we extend Recurrent Neural Network Language Models (RNN-LMs) with an attention mechanism. We show that an “attentive” RNN-LM (with 11M parameters) achieves a better perplexity than larger RNN-LMs (with 66M parameters) and achieves performance comparable to an ensemble of 10 similar sized RNN-LMs. We also show that an “attentive” RNN-LM needs less contextual information to achieve similar results to the state-of-the-art on the wikitext2 dataset.</abstract>
@@ -494,7 +494,7 @@
       <title>Image-Grounded Conversations: Multimodal Context for Natural Question and Response Generation</title>
       <author><first>Nasrin</first><last>Mostafazadeh</last></author>
       <author><first>Chris</first><last>Brockett</last></author>
-      <author><first>Bill</first><last>Dolan</last></author>
+      <author id="william-b-dolan"><first>Bill</first><last>Dolan</last></author>
       <author><first>Michel</first><last>Galley</last></author>
       <author><first>Jianfeng</first><last>Gao</last></author>
       <author><first>Georgios</first><last>Spithourakis</last></author>
@@ -508,7 +508,7 @@
     <paper id="48">
       <title>A Neural Language Model for Dynamically Representing the Meanings of Unknown Words and Entities in a Discourse</title>
       <author><first>Sosuke</first><last>Kobayashi</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Kentaro</first><last>Inui</last></author>
       <pages>473–483</pages>
       <url hash="fc6acaa0">I17-1048</url>
@@ -519,7 +519,7 @@
       <title>Using Explicit Discourse Connectives in Translation for Implicit Discourse Relation Classification</title>
       <author><first>Wei</first><last>Shi</last></author>
       <author><first>Frances</first><last>Yung</last></author>
-      <author><first>Raphael</first><last>Rubino</last></author>
+      <author id="raphael-rubino"><first>Raphael</first><last>Rubino</last></author>
       <author><first>Vera</first><last>Demberg</last></author>
       <pages>484–495</pages>
       <url hash="0d8a510f">I17-1049</url>
@@ -571,7 +571,7 @@
     <paper id="54">
       <title>Sentence Modeling with Deep Neural Architecture using Lexicon and Character Attention Mechanism for Sentiment Classification</title>
       <author><first>Huy Thanh</first><last>Nguyen</last></author>
-      <author><first>Minh Le</first><last>Nguyen</last></author>
+      <author id="minh-le-nguyen"><first>Minh Le</first><last>Nguyen</last></author>
       <pages>536–544</pages>
       <url hash="49f0a951">I17-1054</url>
       <abstract>Tweet-level sentiment classification in Twitter social networking has many challenges: exploiting syntax, semantic, sentiment, and context in tweets. To address these problems, we propose a novel approach to sentiment analysis that uses lexicon features for building lexicon embeddings (LexW2Vs) and generates character attention vectors (CharAVs) by using a Deep Convolutional Neural Network (DeepCNN). Our approach integrates LexW2Vs and CharAVs with continuous word embeddings (ContinuousW2Vs) and dependency-based word embeddings (DependencyW2Vs) simultaneously in order to increase information for each word into a Bidirectional Contextual Gated Recurrent Neural Network (Bi-CGRNN). We evaluate our model on two Twitter sentiment classification datasets. Experimental results show that our model can improve the classification accuracy of sentence-level sentiment analysis in Twitter social networking.</abstract>
@@ -590,8 +590,8 @@
     <paper id="56">
       <title>Capturing Long-range Contextual Dependencies with Memory-enhanced Conditional Random Fields</title>
       <author id="fei-liu-utdallas"><first>Fei</first><last>Liu</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>555–565</pages>
       <url hash="116f077d">I17-1056</url>
       <abstract>Despite successful applications across a broad range of NLP tasks, conditional random fields (“CRFs”), in particular the linear-chain variant, are only able to model local features. While this has important benefits in terms of inference tractability, it limits the ability of the model to capture long-range dependencies between items. Attempts to extend CRFs to capture long-range dependencies have largely come at the cost of computational complexity and approximate inference. In this work, we propose an extension to CRFs by integrating external memory, taking inspiration from memory networks, thereby allowing CRFs to incorporate information far beyond neighbouring steps. Experiments across two tasks show substantial improvements over strong CRF and LSTM baselines.</abstract>
@@ -599,9 +599,9 @@
     </paper>
     <paper id="57">
       <title>Named Entity Recognition with Stack Residual <fixed-case>LSTM</fixed-case> and Trainable Bias Decoding</title>
-      <author><first>Quan</first><last>Tran</last></author>
-      <author><first>Andrew</first><last>MacKinlay</last></author>
-      <author><first>Antonio</first><last>Jimeno Yepes</last></author>
+      <author id="quan-hung-tran"><first>Quan</first><last>Tran</last></author>
+      <author id="andrew-mackinlay"><first>Andrew</first><last>MacKinlay</last></author>
+      <author id="antonio-jimeno-yepes"><first>Antonio</first><last>Jimeno Yepes</last></author>
       <pages>566–575</pages>
       <url hash="f2794935">I17-1057</url>
       <abstract>Recurrent Neural Network models are the state-of-the-art for Named Entity Recognition (NER). We present two innovations to improve the performance of these models. The first innovation is the introduction of residual connections between the Stacked Recurrent Neural Network model to address the degradation problem of deep neural networks. The second innovation is a bias decoding mechanism that allows the trained system to adapt to non-differentiable and externally computed objectives, such as the entity-based F-measure. Our work improves the state-of-the-art results for both Spanish and English languages on the standard train/development/test split of the CoNLL 2003 Shared Task NER dataset.</abstract>
@@ -621,7 +621,7 @@
       <title>Leveraging Discourse Information Effectively for Authorship Attribution</title>
       <author><first>Elisa</first><last>Ferracane</last></author>
       <author><first>Su</first><last>Wang</last></author>
-      <author><first>Raymond</first><last>Mooney</last></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last></author>
       <pages>584–593</pages>
       <url hash="a3a8d6cd">I17-1059</url>
       <abstract>We explore techniques to maximize the effectiveness of discourse information in the task of authorship attribution. We present a novel method to embed discourse features in a Convolutional Neural Network text classifier, which achieves a state-of-the-art result by a significant margin. We empirically investigate several featurization methods to understand the conditions under which discourse features contribute non-trivial performance gains, and analyze discourse embeddings.</abstract>
@@ -641,7 +641,7 @@
       <title>Multi-Task Learning for Speaker-Role Adaptation in Neural Conversation Models</title>
       <author><first>Yi</first><last>Luan</last></author>
       <author><first>Chris</first><last>Brockett</last></author>
-      <author><first>Bill</first><last>Dolan</last></author>
+      <author id="william-b-dolan"><first>Bill</first><last>Dolan</last></author>
       <author><first>Jianfeng</first><last>Gao</last></author>
       <author><first>Michel</first><last>Galley</last></author>
       <pages>605–614</pages>
@@ -685,8 +685,8 @@
     </paper>
     <paper id="65">
       <title>An Ensemble Method with Sentiment Features and Clustering Support</title>
-      <author><last>Nguyen</last><first>Huy Tien</first></author>
-      <author><last>Nguyen</last><first>Minh Le</first></author>
+      <author id="huy-tien-nguyen"><first>Huy Tien</first><last>Nguyen</last></author>
+      <author id="minh-le-nguyen"><first>Minh Le</first><last>Nguyen</last></author>
       <pages>644–653</pages>
       <url hash="c8507c71">I17-1065</url>
       <abstract>Deep learning models have recently been applied successfully in natural language processing, especially sentiment analysis. Each deep learning model has a particular advantage, but it is difficult to combine these advantages into one model, especially in the area of sentiment analysis. In our approach, Convolutional Neural Network (CNN) and Long Short Term Memory (LSTM) were utilized to learn sentiment-specific features in a freezing scheme. This scenario provides a novel and efficient way for integrating advantages of deep learning models. In addition, we also grouped documents into clusters by their similarity and applied the prediction score of Naive Bayes SVM (NBSVM) method to boost the classification accuracy of each group. The experiments show that our method achieves the state-of-the-art performance on two well-known datasets: IMDB large movie reviews for document level and Pang &amp; Lee movie reviews for sentence level.</abstract>
@@ -704,7 +704,7 @@
     <paper id="67">
       <title>Measuring Semantic Relations between Human Activities</title>
       <author><first>Steven</first><last>Wilson</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>664–673</pages>
       <url hash="f78817d5">I17-1067</url>
       <abstract>The things people do in their daily lives can provide valuable insights into their personality, values, and interests. Unstructured text data on social media platforms are rich in behavioral content, and automated systems can be deployed to learn about human activity on a broad scale if these systems are able to reason about the content of interest. In order to aid in the evaluation of such systems, we introduce a new phrase-level semantic textual similarity dataset comprised of human activity phrases, providing a testbed for automated systems that analyze relationships between phrasal descriptions of people’s actions. Our set of 1,000 pairs of activities is annotated by human judges across four relational dimensions including similarity, relatedness, motivational alignment, and perceived actor congruence. We evaluate a set of strong baselines for the task of generating scores that correlate highly with human ratings, and we introduce several new approaches to the phrase-level similarity task in the domain of human activities.</abstract>
@@ -715,7 +715,7 @@
       <author><first>Bonan</first><last>Min</last></author>
       <author><first>Zhuolin</first><last>Jiang</last></author>
       <author><first>Marjorie</first><last>Freedman</last></author>
-      <author><first>Ralph</first><last>Weischedel</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
       <pages>674–684</pages>
       <url hash="0db3d3f9">I17-1068</url>
       <abstract>Typically, relation extraction models are trained to extract instances of a relation ontology using only training data from a single language. However, the concepts represented by the relation ontology (e.g. ResidesIn, EmployeeOf) are language independent. The numbers of annotated examples available for a given ontology vary between languages. For example, there are far fewer annotated examples in Spanish and Japanese than English and Chinese. Furthermore, using only language-specific training data results in the need to manually annotate equivalently large amounts of training for each new language a system encounters. We propose a deep neural network to learn transferable, discriminative bilingual representation. Experiments on the ACE 2005 multilingual training corpus demonstrate that the joint training process results in significant improvement in relation classification performance over the monolingual counterparts. The learnt representation is discriminative and transferable between languages. When using 10% (25K English words, or 30K Chinese characters) of the training data, our approach results in doubling F1 compared to a monolingual baseline. We achieve comparable performance to the monolingual system trained with 250K English words (or 300K Chinese characters) With 50% of training data.</abstract>
@@ -764,7 +764,7 @@
     <paper id="73">
       <title>Finding Dominant User Utterances And System Responses in Conversations</title>
       <author><first>Dhiraj</first><last>Madan</last></author>
-      <author><first>Sachindra</first><last>Joshi</last></author>
+      <author id="sachindra-joshi"><first>Sachindra</first><last>Joshi</last></author>
       <pages>723–732</pages>
       <url hash="914dd69b">I17-1073</url>
       <abstract>There are several dialog frameworks which allow manual specification of intents and rule based dialog flow. The rule based framework provides good control to dialog designers at the expense of being more time consuming and laborious. The job of a dialog designer can be reduced if we could identify pairs of user intents and corresponding responses automatically from prior conversations between users and agents. In this paper we propose an approach to find these frequent user utterances (which serve as examples for intents) and corresponding agent responses. We propose a novel SimCluster algorithm that extends standard K-means algorithm to simultaneously cluster user utterances and agent utterances by taking their adjacency information into account. The method also aligns these clusters to provide pairs of intents and response groups. We compare our results with those produced by using simple Kmeans clustering on a real dataset and observe upto 10% absolute improvement in F1-scores. Through our experiments on synthetic dataset, we show that our algorithm gains more advantage over K-means algorithm when the data has large variance.</abstract>
@@ -787,7 +787,7 @@
       <author><first>Jey Han</first><last>Lau</last></author>
       <author><first>Lianhua</first><last>Chi</last></author>
       <author><first>Khoi-Nguyen</first><last>Tran</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>744–753</pages>
       <url hash="562c250c">I17-1075</url>
       <abstract>We propose an end-to-end neural network to predict the geolocation of a tweet. The network takes as input a number of raw Twitter metadata such as the tweet message and associated user account information. Our model is language independent, and despite minimal feature engineering, it is interpretable and capable of learning location indicative words and timing patterns. Compared to state-of-the-art systems, our model outperforms them by 2%-6%. Additionally, we propose extensions to the model to compress representation learnt by the network into binary codes. Experiments show that it produces compact codes compared to benchmark hashing algorithms. An implementation of the model is released publicly.</abstract>
@@ -809,8 +809,8 @@
       <title>Domain Adaptation from User-level <fixed-case>F</fixed-case>acebook Models to County-level <fixed-case>T</fixed-case>witter Predictions</title>
       <author><first>Daniel</first><last>Rieman</last></author>
       <author><first>Kokil</first><last>Jaidka</last></author>
-      <author><first>H. Andrew</first><last>Schwartz</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <pages>764–773</pages>
       <url hash="7791d140">I17-1077</url>
       <abstract>Several studies have demonstrated how language models of user attributes, such as personality, can be built by using the Facebook language of social media users in conjunction with their responses to psychology questionnaires. It is challenging to apply these models to make general predictions about attributes of communities, such as personality distributions across US counties, because it requires 1. the potentially inavailability of the original training data because of privacy and ethical regulations, 2. adapting Facebook language models to Twitter language without retraining the model, and 3. adapting from users to county-level collections of tweets. We propose a two-step algorithm, Target Side Domain Adaptation (TSDA) for such domain adaptation when no labeled Twitter/county data is available. TSDA corrects for the different word distributions between Facebook and Twitter and for the varying word distributions across counties by adjusting target side word frequencies; no changes to the trained model are made. In the case of predicting the Big Five county-level personality traits, TSDA outperforms a state-of-the-art domain adaptation method, gives county-level predictions that have fewer extreme outliers, higher year-to-year stability, and higher correlation with county-level outcomes.</abstract>
@@ -829,7 +829,7 @@
     <paper id="79">
       <title>Estimating Reactions and Recommending Products with Generative Models of Reviews</title>
       <author><first>Jianmo</first><last>Ni</last></author>
-      <author><first>Zachary C.</first><last>Lipton</last></author>
+      <author id="zachary-c-lipton"><first>Zachary C.</first><last>Lipton</last></author>
       <author><first>Sharad</first><last>Vikram</last></author>
       <author><first>Julian</first><last>McAuley</last></author>
       <pages>783–791</pages>
@@ -841,7 +841,7 @@
       <title>Summarizing Lengthy Questions</title>
       <author><first>Tatsuya</first><last>Ishigaki</last></author>
       <author><first>Hiroya</first><last>Takamura</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>792–800</pages>
       <url hash="5514d580">I17-1080</url>
       <abstract>In this research, we propose the task of question summarization. We first analyzed question-summary pairs extracted from a Community Question Answering (CQA) site, and found that a proportion of questions cannot be summarized by extractive approaches but requires abstractive approaches. We created a dataset by regarding the question-title pairs posted on the CQA site as question-summary pairs. By using the data, we trained extractive and abstractive summarization models, and compared them based on ROUGE scores and manual evaluations. Our experimental results show an abstractive method using an encoder-decoder model with a copying mechanism achieves better scores for both ROUGE-2 F-measure and the evaluations by human judges.</abstract>
@@ -859,7 +859,7 @@
     </paper>
     <paper id="82">
       <title>Abstractive Multi-document Summarization by Partial Tree Extraction, Recombination and Linearization</title>
-      <author><first>Litton</first><last>J Kurisinkel</last></author>
+      <author id="litton-j-kurisinkel"><first>Litton</first><last>J Kurisinkel</last></author>
       <author><first>Yue</first><last>Zhang</last></author>
       <author><first>Vasudeva</first><last>Varma</last></author>
       <pages>812–821</pages>
@@ -889,7 +889,7 @@
     <paper id="85">
       <title>Event Ordering with a Generalized Model for Sieve Prediction Ranking</title>
       <author><first>Bill</first><last>McDowell</last></author>
-      <author><first>Nathanael</first><last>Chambers</last></author>
+      <author id="nathanael-chambers"><first>Nathanael</first><last>Chambers</last></author>
       <author><first>Alexander</first><last>Ororbia II</last></author>
       <author><first>David</first><last>Reitter</last></author>
       <pages>843–853</pages>
@@ -912,7 +912,7 @@
       <author><first>Jinseon</first><last>You</last></author>
       <author><first>Jin-Woo</first><last>Chung</last></author>
       <author><first>Wonsuk</first><last>Yang</last></author>
-      <author><first>Jong C.</first><last>Park</last></author>
+      <author id="jong-c-park"><first>Jong C.</first><last>Park</last></author>
       <pages>865–874</pages>
       <url hash="6324dd59">I17-1087</url>
       <abstract>Genetic information in the literature has been extensively looked into for the purpose of discovering the etiology of a disease. As the gene-disease relation is sensitive to external factors, their identification is important to study a disease. Environmental influences, which are usually called Gene-Environment interaction (GxE), have been considered as important factors and have extensively been researched in biology. Nevertheless, there is still a lack of systems for automatic GxE extraction from the biomedical literature due to new challenges: (1) there are no preprocessing tools and corpora for GxE, (2) expressions of GxE are often quite implicit, and (3) document-level comprehension is usually required. We propose to overcome these challenges with neural network models and show that a modified sequence-to-sequence model with a static RNN decoder produces a good performance in GxE recognition.</abstract>
@@ -936,7 +936,7 @@
       <author><first>Quincy</first><last>Davenport</last></author>
       <author><first>Anna Mengdan</first><last>Dai</last></author>
       <author><first>Mohamed</first><last>Abouelenien</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>885–894</pages>
       <url hash="a57e5159">I17-1089</url>
       <abstract>This paper addresses the task of detecting identity deception in language. Using a novel identity deception dataset, consisting of real and portrayed identities from 600 individuals, we show that we can build accurate identity detectors targeting both age and gender, with accuracies of up to 88. We also perform an analysis of the linguistic patterns used in identity deception, which lead to interesting insights into identity portrayers.</abstract>
@@ -945,7 +945,7 @@
     <paper id="90">
       <title>Learning to Diagnose: Assimilating Clinical Narratives using Deep Reinforcement Learning</title>
       <author><first>Yuan</first><last>Ling</last></author>
-      <author><first>Sadid A.</first><last>Hasan</last></author>
+      <author id="sadid-a-hasan"><first>Sadid A.</first><last>Hasan</last></author>
       <author><first>Vivek</first><last>Datla</last></author>
       <author><first>Ashequl</first><last>Qadir</last></author>
       <author><first>Kathy</first><last>Lee</last></author>
@@ -984,7 +984,7 @@
     <paper id="93">
       <title>Demographic Word Embeddings for Racism Detection on <fixed-case>T</fixed-case>witter</title>
       <author><first>Mohammed</first><last>Hasanuzzaman</last></author>
-      <author><first>Gaël</first><last>Dias</last></author>
+      <author id="gael-dias"><first>Gaël</first><last>Dias</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <pages>926–936</pages>
       <url hash="f3a6fe21">I17-1093</url>
@@ -1086,7 +1086,7 @@
     <paper id="102">
       <title>Multilingual Hierarchical Attention Networks for Document Classification</title>
       <author><first>Nikolaos</first><last>Pappas</last></author>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <pages>1015–1025</pages>
       <url hash="ba2ab69d">I17-1102</url>
       <abstract>Hierarchical attention networks have recently achieved remarkable performance for document classification in a given language. However, when multilingual document collections are considered, training such models separately for each language entails linear parameter growth and lack of cross-language transfer. Learning a single multilingual model with fewer parameters is therefore a challenging but potentially beneficial objective. To this end, we propose multilingual hierarchical attention networks for learning document structures, with shared encoders and/or shared attention mechanisms across languages, using multi-task learning and an aligned semantic space as input. We evaluate the proposed models on multilingual document classification with disjoint label sets, on a large dataset which we provide, with 600k news documents in 8 languages, and 5k labels. The multilingual models outperform monolingual ones in low-resource as well as full-resource settings, and use fewer parameters, thus confirming their computational efficiency and the utility of cross-language transfer.</abstract>
@@ -1097,7 +1097,7 @@
       <author><first>Keith</first><last>Maki</last></author>
       <author><first>Michael</first><last>Yoder</last></author>
       <author><first>Yohan</first><last>Jo</last></author>
-      <author><first>Carolyn</first><last>Rosé</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rosé</last></author>
       <pages>1026–1035</pages>
       <url hash="da33b3ad">I17-1103</url>
       <abstract>In this work we investigate how role-based behavior profiles of a Wikipedia editor, considered against the backdrop of roles taken up by other editors in discussions, predict the success of the editor at achieving an impact on the associated article. We first contribute a new public dataset including a task predicting the success of Wikipedia editors involved in discussion, measured by an operationalization of the lasting impact of their edits in the article. We then propose a probabilistic graphical model that advances earlier work inducing latent discussion roles using the light supervision of success in the negotiation task. We evaluate the performance of the model and interpret findings of roles and group configurations that lead to certain outcomes on Wikipedia.</abstract>
@@ -1136,7 +1136,7 @@
       <author><first>Katsuhiko</first><last>Hayashi</last></author>
       <author><first>Tsutomu</first><last>Hirao</last></author>
       <author><first>Hiroya</first><last>Takamura</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <author><first>Masaaki</first><last>Nagata</last></author>
       <pages>7–12</pages>
       <url hash="bfe2727c">I17-2002</url>
@@ -1147,7 +1147,7 @@
       <title>Transferring Semantic Roles Using Translation and Syntactic Information</title>
       <author><first>Maryam</first><last>Aminian</last></author>
       <author><first>Mohammad Sadegh</first><last>Rasooli</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>13–19</pages>
       <url hash="896787e1">I17-2003</url>
       <abstract>Our paper addresses the problem of annotation projection for semantic role labeling for resource-poor languages using supervised annotations from a resource-rich language through parallel data. We propose a transfer method that employs information from source and target syntactic dependencies as well as word alignment density to improve the quality of an iterative bootstrapping method. Our experiments yield a 3.5 absolute labeled F-score improvement over a standard annotation projection method.</abstract>
@@ -1179,7 +1179,7 @@
     <paper id="6">
       <title>Towards Lower Bounds on Number of Dimensions for Word Embeddings</title>
       <author><first>Kevin</first><last>Patel</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>31–36</pages>
       <url hash="f3647e6b">I17-2006</url>
       <attachment type="note" hash="a34e758a">I17-2006.Notes.pdf</attachment>
@@ -1241,8 +1241,8 @@
     </paper>
     <paper id="12">
       <title>Learning Kernels over Strings using <fixed-case>G</fixed-case>aussian Processes</title>
-      <author><first>Daniel</first><last>Beck</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="daniel-beck"><first>Daniel</first><last>Beck</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>67–73</pages>
       <url hash="b271b09c">I17-2012</url>
       <attachment type="note" hash="9edf7954">I17-2012.Notes.pdf</attachment>
@@ -1265,7 +1265,7 @@
       <author><first>Yi-Jie</first><last>Huang</last></author>
       <author><first>Shu-Hao</first><last>Yeh</last></author>
       <author><first>Chun-Hung</first><last>Chen</last></author>
-      <author><first>Wen-Lian</first><last>Hsu</last></author>
+      <author id="wen-lian-hsu"><first>Wen-Lian</first><last>Hsu</last></author>
       <pages>80–85</pages>
       <url hash="761ec54e">I17-2014</url>
       <abstract>Part-of-speech (POS) tagging and named entity recognition (NER) are crucial steps in natural language processing. In addition, the difficulty of word segmentation places additional burden on those who intend to deal with languages such as Chinese, and pipelined systems often suffer from error propagation. This work proposes an end-to-end model using character-based recurrent neural network (RNN) to jointly accomplish segmentation, POS tagging and NER of a Chinese sentence. Experiments on previous word segmentation and NER datasets show that a single model with the proposed architecture is comparable to those trained specifically for each task, and outperforms freely-available softwares. Moreover, we provide a web-based interface for the public to easily access this resource.</abstract>
@@ -1295,7 +1295,7 @@
       <author><first>Motoki</first><last>Sato</last></author>
       <author><first>Hiroyuki</first><last>Shindo</last></author>
       <author><first>Ikuya</first><last>Yamada</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>97–102</pages>
       <url hash="e202a1c6">I17-2017</url>
       <abstract>We present Segment-level Neural CRF, which combines neural networks with a linear chain CRF for segment-level sequence modeling tasks such as named entity recognition (NER) and syntactic chunking. Our segment-level CRF can consider higher-order label dependencies compared with conventional word-level CRF. Since it is difficult to consider all possible variable length segments, our method uses segment lattice constructed from the word-level tagging model to reduce the search space. Performing experiments on NER and chunking, we demonstrate that our method outperforms conventional word-level CRF with neural networks.</abstract>
@@ -1304,7 +1304,7 @@
     <paper id="18">
       <title>Integrating Vision and Language Datasets to Measure Word Concreteness</title>
       <author><first>Gitit</first><last>Kehat</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <pages>103–108</pages>
       <url hash="19c9f12d">I17-2018</url>
       <abstract>We present and take advantage of the inherent visualizability properties of words in visual corpora (the textual components of vision-language datasets) to compute concreteness scores for words. Our simple method does not require hand-annotated concreteness score lists for training, and yields state-of-the-art results when evaluated against concreteness scores lists and previously derived scores, as well as when used for metaphor detection.</abstract>
@@ -1323,7 +1323,7 @@
       <title>Injecting Word Embeddings with Another Language’s Resource : An Application of Bilingual Embeddings</title>
       <author><first>Prakhar</first><last>Pandey</last></author>
       <author><first>Vikram</first><last>Pudi</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>116–121</pages>
       <url hash="11e9567c">I17-2020</url>
       <abstract>Word embeddings learned from text corpus can be improved by injecting knowledge from external resources, while at the same time also specializing them for similarity or relatedness. These knowledge resources (like WordNet, Paraphrase Database) may not exist for all languages. In this work we introduce a method to inject word embeddings of a language with knowledge resource of another language by leveraging bilingual embeddings. First we improve word embeddings of German, Italian, French and Spanish using resources of English and test them on variety of word similarity tasks. Then we demonstrate the utility of our method by creating improved embeddings for Urdu and Telugu languages using Hindi WordNet, beating the previously established baseline for Urdu.</abstract>
@@ -1333,7 +1333,7 @@
       <title>Improving Black-box Speech Recognition using Semantic Parsing</title>
       <author><first>Rodolfo</first><last>Corona</last></author>
       <author><first>Jesse</first><last>Thomason</last></author>
-      <author><first>Raymond</first><last>Mooney</last></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last></author>
       <pages>122–127</pages>
       <url hash="f2309ddb">I17-2021</url>
       <abstract>Speech is a natural channel for human-computer interaction in robotics and consumer applications. Natural language understanding pipelines that start with speech can have trouble recovering from speech recognition errors. Black-box automatic speech recognition (ASR) systems, built for general purpose use, are unable to take advantage of in-domain language models that could otherwise ameliorate these errors. In this work, we present a method for re-ranking black-box ASR hypotheses using an in-domain language model and semantic parser trained for a particular task. Our re-ranking method significantly improves both transcription accuracy and semantic understanding over a state-of-the-art ASR’s vanilla output.</abstract>
@@ -1360,7 +1360,7 @@
     </paper>
     <paper id="24">
       <title>Modelling Representation Noise in Emotion Analysis using <fixed-case>G</fixed-case>aussian Processes</title>
-      <author><first>Daniel</first><last>Beck</last></author>
+      <author id="daniel-beck"><first>Daniel</first><last>Beck</last></author>
       <pages>140–145</pages>
       <url hash="509c2f34">I17-2024</url>
       <abstract>Emotion Analysis is the task of modelling latent emotions present in natural language. Labelled datasets for this task are scarce so learning good input text representations is not trivial. Using averaged word embeddings is a simple way to leverage unlabelled corpora to build text representations but this approach can be prone to noise either coming from the embedding themselves or the averaging procedure. In this paper we propose a model for Emotion Analysis using Gaussian Processes and kernels that are better suitable for functions that exhibit noisy behaviour. Empirical evaluations in a emotion prediction task show that our model outperforms commonly used baselines for regression.</abstract>
@@ -1369,7 +1369,7 @@
     <paper id="25">
       <title>Are Manually Prepared Affective Lexicons Really Useful for Sentiment Analysis</title>
       <author><first>Minglei</first><last>Li</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <author><first>Yunfei</first><last>Long</last></author>
       <pages>146–150</pages>
       <url hash="a05d54a8">I17-2025</url>
@@ -1391,7 +1391,7 @@
       <title>Can Discourse Relations be Identified Incrementally?</title>
       <author><first>Frances</first><last>Yung</last></author>
       <author><first>Hiroshi</first><last>Noji</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>157–162</pages>
       <url hash="3bdf8148">I17-2027</url>
       <attachment type="note" hash="515b8c63">I17-2027.Notes.pdf</attachment>
@@ -1402,7 +1402,7 @@
     <paper id="28">
       <title>Speaker Role Contextual Modeling for Language Understanding and Dialogue Policy Learning</title>
       <author><first>Ta-Chung</first><last>Chi</last></author>
-      <author><first>Po-Chun</first><last>Chen</last></author>
+      <author id="po-chun-chen"><first>Po-Chun</first><last>Chen</last></author>
       <author><first>Shang-Yu</first><last>Su</last></author>
       <author><first>Yun-Nung</first><last>Chen</last></author>
       <pages>163–168</pages>
@@ -1425,7 +1425,7 @@
     <paper id="30">
       <title>Dialog for Language to Code</title>
       <author><first>Shobhit</first><last>Chaurasia</last></author>
-      <author><first>Raymond J.</first><last>Mooney</last></author>
+      <author id="raymond-mooney"><first>Raymond J.</first><last>Mooney</last></author>
       <pages>175–180</pages>
       <url hash="348efa7c">I17-2030</url>
       <abstract>Generating computer code from natural language descriptions has been a long-standing problem. Prior work in this domain has restricted itself to generating code in one shot from a single description. To overcome this limitation, we propose a system that can engage users in a dialog to clarify their intent until it has all the information to produce correct code. To evaluate the efficacy of dialog in code generation, we focus on synthesizing conditional statements in the form of IFTTT recipes.</abstract>
@@ -1446,7 +1446,7 @@
       <author><first>Yutai</first><last>Hou</last></author>
       <author><first>Jing</first><last>Liu</last></author>
       <author><first>Yunbo</first><last>Cao</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <pages>187–192</pages>
       <url hash="006fadf8">I17-2032</url>
       <abstract>We present in this paper a statistical framework that generates accurate and fluent product description from product attributes. Specifically, after extracting templates and learning writing knowledge from attribute-description parallel data, we use the learned knowledge to decide what to say and how to say for product description generation. To evaluate accuracy and fluency for the generated descriptions, in addition to BLEU and Recall, we propose to measure what to say (in terms of attribute coverage) and to measure how to say (by attribute-specified generation) separately. Experimental results show that our framework is effective.</abstract>
@@ -1464,7 +1464,7 @@
     <paper id="34">
       <title><fixed-case>SSAS</fixed-case>: Semantic Similarity for Abstractive Summarization</title>
       <author><first>Raghuram</first><last>Vadapalli</last></author>
-      <author><first>Litton</first><last>J Kurisinkel</last></author>
+      <author id="litton-j-kurisinkel"><first>Litton</first><last>J Kurisinkel</last></author>
       <author><first>Manish</first><last>Gupta</last></author>
       <author><first>Vasudeva</first><last>Varma</last></author>
       <pages>198–203</pages>
@@ -1522,7 +1522,7 @@
       <title>High Recall Open <fixed-case>IE</fixed-case> for Relation Discovery</title>
       <author><first>Hady</first><last>Elsahar</last></author>
       <author><first>Christophe</first><last>Gravier</last></author>
-      <author><first>Frederique</first><last>Laforest</last></author>
+      <author id="frederique-laforest"><first>Frederique</first><last>Laforest</last></author>
       <pages>228–233</pages>
       <url hash="f28aac69">I17-2039</url>
       <abstract>Relation Discovery discovers predicates (relation types) from a text corpus relying on the co-occurrence of two named entities in the same sentence. This is a very narrowing constraint: it represents only a small fraction of all relation mentions in practice. In this paper we propose a high recall approach for Open IE, which enables covering up to 16 times more sentences in a large corpus. Comparison against OpenIE systems shows that our proposed approach achieves 28% improvement over the highest recall OpenIE system and 6% improvement in precision than the same system.</abstract>
@@ -1543,7 +1543,7 @@
       <author><first>Yu-Lun</first><last>Hsieh</last></author>
       <author><first>Yung-Chun</first><last>Chang</last></author>
       <author><first>Nai-Wen</first><last>Chang</last></author>
-      <author><first>Wen-Lian</first><last>Hsu</last></author>
+      <author id="wen-lian-hsu"><first>Wen-Lian</first><last>Hsu</last></author>
       <pages>240–245</pages>
       <url hash="50fba5ed">I17-2041</url>
       <abstract>In this paper, we propose a recurrent neural network model for identifying protein-protein interactions in biomedical literature. Experiments on two largest public benchmark datasets, AIMed and BioInfer, demonstrate that our approach significantly surpasses state-of-the-art methods with relative improvements of 10% and 18%, respectively. Cross-corpus evaluation also demonstrate that the proposed model remains robust despite using different training data. These results suggest that RNN can effectively capture semantic relationships among proteins as well as generalizes over different corpora, without any feature engineering.</abstract>
@@ -1562,7 +1562,7 @@
     <paper id="43">
       <title>Fake News Detection Through Multi-Perspective Speaker Profiles</title>
       <author><first>Yunfei</first><last>Long</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <author><first>Rong</first><last>Xiang</last></author>
       <author><first>Minglei</first><last>Li</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
@@ -1579,7 +1579,7 @@
       <author><first>Kugatsu</first><last>Sadamitsu</last></author>
       <author><first>Satoshi</first><last>Kobashikawa</last></author>
       <author><first>Ryo</first><last>Masumura</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <author><first>Junji</first><last>Tomita</last></author>
       <pages>257–262</pages>
       <url hash="ab165f02">I17-2044</url>
@@ -1602,7 +1602,7 @@
       <title>Boosting Neural Machine Translation</title>
       <author><first>Dakun</first><last>Zhang</last></author>
       <author><first>Jungi</first><last>Kim</last></author>
-      <author><first>Josep</first><last>Crego</last></author>
+      <author id="josep-m-crego"><first>Josep</first><last>Crego</last></author>
       <author><first>Jean</first><last>Senellart</last></author>
       <pages>271–276</pages>
       <url hash="633f6191">I17-2046</url>
@@ -1625,7 +1625,7 @@
       <author><first>Anoop</first><last>Kunchukuttan</last></author>
       <author><first>Maulik</first><last>Shah</last></author>
       <author><first>Pradyot</first><last>Prakash</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>283–289</pages>
       <url hash="ae9464ea">I17-2048</url>
       <abstract>We investigate pivot-based translation between related languages in a low resource, phrase-based SMT setting. We show that a subword-level pivot-based SMT model using a related pivot language is substantially better than word and morpheme-level pivot models. It is also highly competitive with the best direct translation model, which is encouraging as no direct source-target training corpus is used. We also show that combining multiple related language pivot models can rival a direct translation model. Thus, the use of subwords as translation units coupled with multiple related pivot languages can compensate for the lack of a direct parallel corpus.</abstract>
@@ -1635,7 +1635,7 @@
       <title>Key-value Attention Mechanism for Neural Machine Translation</title>
       <author><first>Hideya</first><last>Mino</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Takenobu</first><last>Tokunaga</last></author>
       <pages>290–295</pages>
       <url hash="b5c4353d">I17-2049</url>
@@ -1644,7 +1644,7 @@
     </paper>
     <paper id="50">
       <title>Transfer Learning across Low-Resource, Related Languages for Neural Machine Translation</title>
-      <author><first>Toan Q.</first><last>Nguyen</last></author>
+      <author id="toan-q-nguyen"><first>Toan Q.</first><last>Nguyen</last></author>
       <author><first>David</first><last>Chiang</last></author>
       <pages>296–301</pages>
       <url hash="97eb3551">I17-2050</url>
@@ -1657,7 +1657,7 @@
       <author><first>Kangil</first><last>Kim</last></author>
       <author><first>Jong-Hun</first><last>Shin</last></author>
       <author><first>Seung-Hoon</first><last>Na</last></author>
-      <author><first>SangKeun</first><last>Jung</last></author>
+      <author id="sangkeun-jung"><first>SangKeun</first><last>Jung</last></author>
       <pages>302–307</pages>
       <url hash="eb87c8f7">I17-2051</url>
       <abstract>Neural machine translation decoders are usually conditional language models to sequentially generate words for target sentences. This approach is limited to find the best word composition and requires help of explicit methods as beam search. To help learning correct compositional mechanisms in NMTs, we propose concept equalization using direct mapping distributed representations of source and target sentences. In a translation experiment from English to French, the concept equalization significantly improved translation quality by 3.00 BLEU points compared to a state-of-the-art NMT model.</abstract>
@@ -1674,7 +1674,7 @@
     </paper>
     <paper id="53">
       <title>A Parallel Corpus of Python Functions and Documentation Strings for Automated Code Documentation and Code Generation</title>
-      <author><first>Antonio Valerio</first><last>Miceli Barone</last></author>
+      <author id="antonio-valerio-miceli-barone"><first>Antonio Valerio</first><last>Miceli Barone</last></author>
       <author><first>Rico</first><last>Sennrich</last></author>
       <pages>314–319</pages>
       <url hash="285b1ca1">I17-2053</url>
@@ -1693,7 +1693,7 @@
     <paper id="55">
       <title>Identifying Speakers and Listeners of Quoted Speech in Literary Works</title>
       <author><first>Chak Yan</first><last>Yeung</last></author>
-      <author><first>John</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
       <pages>325–329</pages>
       <url hash="b2822f57">I17-2055</url>
       <abstract>We present the first study that evaluates both speaker and listener identification for direct speech in literary texts. Our approach consists of two steps: identification of speakers and listeners near the quotes, and dialogue chain segmentation. Evaluation results show that this approach outperforms a rule-based approach that is state-of-the-art on a corpus of literary texts.</abstract>
@@ -1741,7 +1741,7 @@
       <title>Leveraging Diverse Lexical Chains to Construct Essays for <fixed-case>C</fixed-case>hinese College Entrance Examination</title>
       <author><first>Liunian</first><last>Li</last></author>
       <author><first>Xiaojun</first><last>Wan</last></author>
-      <author><first>Jin-ge</first><last>Yao</last></author>
+      <author id="jin-ge-yao"><first>Jin-ge</first><last>Yao</last></author>
       <author><first>Siming</first><last>Yan</last></author>
       <pages>355–360</pages>
       <url hash="f61656c1">I17-2060</url>
@@ -1773,7 +1773,7 @@
       <title>Coreference Resolution on Math Problem Text in <fixed-case>J</fixed-case>apanese</title>
       <author><first>Takumi</first><last>Ito</last></author>
       <author><first>Takuya</first><last>Matsuzaki</last></author>
-      <author><first>Satoshi</first><last>Sato</last></author>
+      <author id="satoshi-sato"><first>Satoshi</first><last>Sato</last></author>
       <pages>373–377</pages>
       <url hash="c6067614">I17-2063</url>
       <abstract>This paper describes a coreference resolution system for math problem text. Case frame dictionaries and a math taxonomy are utilized for supplying domain knowledge. The system deals with various anaphoric phenomena beyond well-studied entity coreferences.</abstract>
@@ -1823,9 +1823,9 @@
     <paper id="68">
       <title><fixed-case>CWIG</fixed-case>3<fixed-case>G</fixed-case>2 - Complex Word Identification Task across Three Text Genres and Two User Groups</title>
       <author><first>Seid Muhie</first><last>Yimam</last></author>
-      <author><first>Sanja</first><last>Štajner</last></author>
+      <author id="sanja-stajner"><first>Sanja</first><last>Štajner</last></author>
       <author><first>Martin</first><last>Riedl</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>401–407</pages>
       <url hash="5b6da92e">I17-2068</url>
       <abstract>Complex word identification (CWI) is an important task in text accessibility. However, due to the scarcity of CWI datasets, previous studies have only addressed this problem on Wikipedia sentences and have solely taken into account the needs of non-native English speakers. We collect a new CWI dataset (CWIG3G2) covering three text genres News, WikiNews, and Wikipedia) annotated by both native and non-native English speakers. Unlike previous datasets, we cover single words, as well as complex phrases, and present them for judgment in a paragraph context. We present the first study on cross-genre and cross-group CWI, showing measurable influences in native language and genre types.</abstract>
@@ -1868,7 +1868,7 @@
       <author><first>Lisheng</first><last>Fu</last></author>
       <author><first>Thien Huu</first><last>Nguyen</last></author>
       <author><first>Bonan</first><last>Min</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <pages>425–429</pages>
       <url hash="569cc39e">I17-2072</url>
       <abstract>Relations are expressed in many domains such as newswire, weblogs and phone conversations. Trained on a source domain, a relation extractor’s performance degrades when applied to target domains other than the source. A common yet labor-intensive method for domain adaptation is to construct a target-domain-specific labeled dataset for adapting the extractor. In response, we present an unsupervised domain adaptation method which only requires labels from the source domain. Our method is a joint model consisting of a CNN-based relation classifier and a domain-adversarial classifier. The two components are optimized jointly to learn a domain-independent representation for prediction on the target domain. Our model outperforms the state-of-the-art on all three test domains of ACE 2005.</abstract>
@@ -1878,7 +1878,7 @@
       <title>Lexical Simplification with the Deep Structured Similarity Model</title>
       <author><first>Lis</first><last>Pereira</last></author>
       <author><first>Xiaodong</first><last>Liu</last></author>
-      <author><first>John</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
       <pages>430–435</pages>
       <url hash="be523ccc">I17-2073</url>
       <abstract>We explore the application of a Deep Structured Similarity Model (DSSM) to ranking in lexical simplification. Our results show that the DSSM can effectively capture fine-grained features to perform semantic matching when ranking substitution candidates, outperforming the state-of-the-art on two standard datasets used for the task.</abstract>
@@ -1888,7 +1888,7 @@
       <title>Proofread Sentence Generation as Multi-Task Learning with Editing Operation Prediction</title>
       <author><first>Yuta</first><last>Hitomi</last></author>
       <author><first>Hideaki</first><last>Tamori</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Kentaro</first><last>Inui</last></author>
       <pages>436–441</pages>
       <url hash="679b0acd">I17-2074</url>
@@ -1897,8 +1897,8 @@
     </paper>
     <paper id="75">
       <title>An Exploration of Data Augmentation and <fixed-case>RNN</fixed-case> Architectures for Question Ranking in Community Question Answering</title>
-      <author><first>Charles</first><last>Chen</last></author>
-      <author><first>Razvan</first><last>Bunescu</last></author>
+      <author id="charles-chen-jr"><first>Charles</first><last>Chen</last></author>
+      <author id="razvan-bunescu"><first>Razvan</first><last>Bunescu</last></author>
       <pages>442–447</pages>
       <url hash="5bc773d6">I17-2075</url>
       <abstract>The automation of tasks in community question answering (cQA) is dominated by machine learning approaches, whose performance is often limited by the number of training examples. Starting from a neural sequence learning approach with attention, we explore the impact of two data augmentation techniques on question ranking performance: a method that swaps reference questions with their paraphrases, and training on examples automatically selected from external datasets. Both methods are shown to lead to substantial gains in accuracy over a strong baseline. Further improvements are obtained by changing the model architecture to mirror the structure seen in the data.</abstract>
@@ -1933,7 +1933,7 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>MASSA</fixed-case>lign: Alignment and Annotation of Comparable Documents</title>
-      <author><first>Gustavo</first><last>Paetzold</last></author>
+      <author id="gustavo-paetzold"><first>Gustavo</first><last>Paetzold</last></author>
       <author><first>Fernando</first><last>Alva-Manchego</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>1–4</pages>
@@ -1949,7 +1949,7 @@
       <author><first>Deana</first><last>Burchfield</last></author>
       <author><first>Adam</first><last>Poliak</last></author>
       <author><first>Cash</first><last>Costello</last></author>
-      <author><first>Tim</first><last>Finin</last></author>
+      <author id="tim-finin"><first>Tim</first><last>Finin</last></author>
       <author><first>Scott</first><last>Miller</last></author>
       <author><first>James</first><last>Mayfield</last></author>
       <author><first>Philipp</first><last>Koehn</last></author>
@@ -1964,7 +1964,7 @@
       <author><first>Mark</first><last>Dredze</last></author>
       <author><first>Benjamin</first><last>Glass</last></author>
       <author><first>Shudong</first><last>Hao</last></author>
-      <author><first>Patrick</first><last>Martin</last></author>
+      <author id="m-patrick-martin"><first>Patrick</first><last>Martin</last></author>
       <author><first>Pushpendre</first><last>Rastogi</last></author>
       <author><first>Rashmi</first><last>Sankepally</last></author>
       <author><first>Travis</first><last>Wolfe</last></author>
@@ -1980,7 +1980,7 @@
       <author><first>Yunseok</first><last>Noh</last></author>
       <author><first>Su Jeong</first><last>Choi</last></author>
       <author><first>Seong-Bae</first><last>Park</last></author>
-      <author><first>Se-Young</first><last>Park</last></author>
+      <author id="se-young-park"><first>Se-Young</first><last>Park</last></author>
       <pages>9–12</pages>
       <url hash="d4eaa515">I17-3003</url>
       <abstract>We demonstrate a report generation system called WiseReporter. The WiseReporter generates a text report of a specific topic which is usually given as a keyword by verbalizing knowledge base facts involving the topic. This demonstration does not demonstate only the report itself, but also the processes how the sentences for the report are generated. We are planning to enhance WiseReporter in the future by adding data analysis based on deep learning architecture and text summarization.</abstract>
@@ -1990,9 +1990,9 @@
       <title><fixed-case>E</fixed-case>ncyclolink: A Cross-Encyclopedia,Cross-language Article-Linking System and Web-based Search Interface</title>
       <author><first>Yu-Chun</first><last>Wang</last></author>
       <author><first>Ka Ming</first><last>Wong</last></author>
-      <author><first>Chun-Kai</first><last>Wu</last></author>
+      <author id="chun-kai-wu"><first>Chun-Kai</first><last>Wu</last></author>
       <author><first>Chao-Lin</first><last>Pan</last></author>
-      <author><first>Richard Tzong-Han</first><last>Tsai</last></author>
+      <author id="richard-tzong-han-tsai"><first>Richard Tzong-Han</first><last>Tsai</last></author>
       <pages>13–16</pages>
       <url hash="a3e660d3">I17-3004</url>
       <abstract>Cross-language article linking (CLAL) is the task of finding corresponding article pairs across encyclopedias of different languages. In this paper, we present Encyclolink, a web-based CLAL search interface designed to help users find equivalent encyclopedia articles in Baidu Baike for a given English Wikipedia article title query. Encyclolink is powered by our cross-encyclopedia entity embedding CLAL system (0.8 MRR). The browser-based Interface provides users with a clear and easily readable preview of the contents of retrieved articles for comparison.</abstract>
@@ -2006,7 +2006,7 @@
       <author><first>Chao-Chuang</first><last>Shih</last></author>
       <author><first>Chun-Hsun</first><last>Chen</last></author>
       <author><first>Po-Ching</first><last>Lee</last></author>
-      <author><first>Richard Tzong-Han</first><last>Tsai</last></author>
+      <author id="richard-tzong-han-tsai"><first>Richard Tzong-Han</first><last>Tsai</last></author>
       <pages>17–20</pages>
       <url hash="8aa1b35e">I17-3005</url>
       <abstract>In the paper, we propose an information retrieval based (IR-based) Question Answering (QA) system to assist online customer service staffs respond users in the telecom domain. When user asks a question, the system retrieves a set of relevant answers and ranks them. Moreover, our system uses a novel reranker to enhance the ranking result of information retrieval. It employs the word2vec model to represent the sentences as vectors. It also uses a sub-category feature, predicted by the k-nearest neighbor algorithm. Finally, the system returns the top five candidate answers, making online staffs find answers much more efficiently.</abstract>
@@ -2030,7 +2030,7 @@
     </paper>
     <paper id="7">
       <title><fixed-case>MUSST</fixed-case>: A Multilingual Syntactic Simplification Tool</title>
-      <author><first>Carolina</first><last>Scarton</last></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last></author>
       <author><first>Alessio</first><last>Palmero Aprosio</last></author>
       <author><first>Sara</first><last>Tonelli</last></author>
       <author><first>Tamara</first><last>Martín Wanton</last></author>
@@ -2070,7 +2070,7 @@
       <author><first>Thai-Hoang</first><last>Pham</last></author>
       <author><first>Xuan-Khoai</first><last>Pham</last></author>
       <author><first>Tuan-Anh</first><last>Nguyen</last></author>
-      <author><first>Phuong</first><last>Le-Hong</last></author>
+      <author id="phuong-le-hong"><first>Phuong</first><last>Le-Hong</last></author>
       <pages>37–40</pages>
       <url hash="791d6c9e">I17-3010</url>
       <abstract>This paper demonstrates neural network-based toolkit namely NNVLP for essential Vietnamese language processing tasks including part-of-speech (POS) tagging, chunking, Named Entity Recognition (NER). Our toolkit is a combination of bidirectional Long Short-Term Memory (Bi-LSTM), Convolutional Neural Network (CNN), Conditional Random Field (CRF), using pre-trained word embeddings as input, which outperforms previously published toolkits on these three tasks. We provide both of API and web demo for this toolkit.</abstract>
@@ -2080,7 +2080,7 @@
       <title><fixed-case>C</fixed-case>lassifier<fixed-case>G</fixed-case>uesser: A Context-based Classifier Prediction System for <fixed-case>C</fixed-case>hinese Language Learners</title>
       <author><first>Nicole</first><last>Peinelt</last></author>
       <author><first>Maria</first><last>Liakata</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <pages>41–44</pages>
       <url hash="10c60ccd">I17-3011</url>
       <abstract>Classifiers are function words that are used to express quantities in Chinese and are especially difficult for language learners. In contrast to previous studies, we argue that the choice of classifiers is highly contextual and train context-aware machine learning models based on a novel publicly available dataset, outperforming previous baselines. We further present use cases for our database and models in an interactive demo system.</abstract>
@@ -2088,7 +2088,7 @@
     </paper>
     <paper id="12">
       <title>Automatic Difficulty Assessment for <fixed-case>C</fixed-case>hinese Texts</title>
-      <author><first>John</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
       <author><first>Meichun</first><last>Liu</last></author>
       <author><first>Chun Yin</first><last>Lam</last></author>
       <author><first>Tak On</first><last>Lau</last></author>
@@ -2103,7 +2103,7 @@
       <title>Verb Replacer: An <fixed-case>E</fixed-case>nglish Verb Error Correction System</title>
       <author><first>Yu-Hsuan</first><last>Wu</last></author>
       <author><first>Jhih-Jie</first><last>Chen</last></author>
-      <author><first>Jason</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason</first><last>Chang</last></author>
       <pages>49–52</pages>
       <url hash="eaf297d4">I17-3013</url>
       <abstract>According to the analysis of Cambridge Learner Corpus, using a wrong verb is the most common type of grammatical errors. This paper describes Verb Replacer, a system for detecting and correcting potential verb errors in a given sentence. In our approach, alternative verbs are considered to replace the verb based on an error-annotated corpus and verb-object collocations. The method involves applying regression on channel models, parsing the sentence, identifying the verbs, retrieving a small set of alternative verbs, and evaluating each alternative. Our method combines and improves channel and language models, resulting in high recall of detecting and correcting verb misuse.</abstract>
@@ -2113,8 +2113,8 @@
       <title>Learning Synchronous Grammar Patterns for Assisted Writing for Second Language Learners</title>
       <author><first>Chi-En</first><last>Wu</last></author>
       <author><first>Jhih-Jie</first><last>Chen</last></author>
-      <author><first>Jim</first><last>Chang</last></author>
-      <author><first>Jason</first><last>Chang</last></author>
+      <author id="jim-chang"><first>Jim</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason</first><last>Chang</last></author>
       <pages>53–56</pages>
       <url hash="695f60c0">I17-3014</url>
       <abstract>In this paper, we present a method for extracting Synchronous Grammar Patterns (SGPs) from a given parallel corpus in order to assisted second language learners in writing. A grammar pattern consists of a head word (verb, noun, or adjective) and its syntactic environment. A synchronous grammar pattern describes a grammar pattern in the target language (e.g., English) and its counterpart in an other language (e.g., Mandarin), serving the purpose of native language support. Our method involves identifying the grammar patterns in the target language, aligning these patterns with the target language patterns, and finally filtering valid SGPs. The extracted SGPs with examples are then used to develop a prototype writing assistant system, called WriteAhead/bilingual. Evaluation on a set of randomly selected SGPs shows that our system provides satisfactory writing suggestions for English as a Second Language (ESL) learners.</abstract>
@@ -2125,7 +2125,7 @@
       <author><first>Yu-Sheng</first><last>Li</last></author>
       <author><first>Chien-Hui</first><last>Tseng</last></author>
       <author><first>Chian-Yun</first><last>Huang</last></author>
-      <author><first>Wei-Yun</first><last>Ma</last></author>
+      <author id="wei-yun-ma"><first>Wei-Yun</first><last>Ma</last></author>
       <pages>57–60</pages>
       <url hash="c64623eb">I17-3015</url>
       <abstract>In this paper, we propose an idea of ondemand knowledge validation and fulfill the idea through an interactive Question-Answering (QA) game system, which is named Guess What. An object (e.g. dog) is first randomly chosen by the system, and then a user can repeatedly ask the system questions in natural language to guess what the object is. The system would respond with yes/no along with a confidence score. Some useful hints can also be given if needed. The proposed framework provides a pioneering example of on-demand knowledge validation in dialog environment to address such needs in AI agents/chatbots. Moreover, the released log data that the system gathered can be used to identify the most critical concepts/attributes of an existing knowledge base, which reflects human’s cognition about the world.</abstract>
@@ -2136,7 +2136,7 @@
       <author><first>Jiarui</first><last>Xu</last></author>
       <author><first>Xuezhe</first><last>Ma</last></author>
       <author><first>Chen-Tse</first><last>Tsai</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>61–64</pages>
       <url hash="e1cacfec">I17-3016</url>
       <abstract>This paper aims to provide an effective tool for conversion between Simplified Chinese and Traditional Chinese. We present STCP, a customizable system comprising statistical conversion model, and proofreading web interface. Experiments show that our system achieves comparable character-level conversion performance with the state-of-art systems. In addition, our proofreading interface can effectively support diagnostics and data annotation. STCP is available at <url>http://lagos.lti.cs.cmu.edu:8002/</url></abstract>
@@ -2147,8 +2147,8 @@
       <author><first>Purvanshi</first><last>Mehta</last></author>
       <author><first>Pruthwik</first><last>Mishra</last></author>
       <author><first>Vinayak</first><last>Athavale</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
-      <author><first>Dipti</first><last>Sharma</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti</first><last>Sharma</last></author>
       <pages>65–68</pages>
       <url hash="1bbf48b4">I17-3017</url>
       <abstract>This paper presents DILTON a system which solves simple arithmetic word problems. DILTON uses a Deep Neural based model to solve math word problems. DILTON divides the question into two parts - worldstate and query. The worldstate and the query are processed separately in two different networks and finally, the networks are merged to predict the final operation. We report the first deep learning approach for the prediction of operation between two numbers. DILTON learns to predict operations with 88.81% accuracy in a corpus of primary school questions.</abstract>
@@ -2160,7 +2160,7 @@
       <booktitle>Proceedings of the <fixed-case>IJCNLP</fixed-case> 2017, Shared Tasks</booktitle>
       <url hash="13c9a2f6">I17-4</url>
       <editor><first>Chao-Hong</first><last>Liu</last></editor>
-      <editor><first>Preslav</first><last>Nakov</last></editor>
+      <editor id="preslav-nakov"><first>Preslav</first><last>Nakov</last></editor>
       <editor><first>Nianwen</first><last>Xue</last></editor>
       <publisher>Asian Federation of Natural Language Processing</publisher>
       <address>Taipei, Taiwan</address>
@@ -2185,10 +2185,10 @@
     </paper>
     <paper id="2">
       <title><fixed-case>IJCNLP</fixed-case>-2017 Task 2: Dimensional Sentiment Analysis for <fixed-case>C</fixed-case>hinese Phrases</title>
-      <author><first>Liang-Chih</first><last>Yu</last></author>
+      <author id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last></author>
       <author><first>Lung-Hao</first><last>Lee</last></author>
       <author><first>Jin</first><last>Wang</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <pages>9–16</pages>
       <url hash="1277b169">I17-4002</url>
       <abstract>This paper presents the IJCNLP 2017 shared task on Dimensional Sentiment Analysis for Chinese Phrases (DSAP) which seeks to identify a real-value sentiment score of Chinese single words and multi-word phrases in the both valence and arousal dimensions. Valence represents the degree of pleasant and unpleasant (or positive and negative) feelings, and arousal represents the degree of excitement and calm. Of the 19 teams registered for this shared task for two-dimensional sentiment analysis, 13 submitted results. We expected that this evaluation campaign could produce more advanced dimensional sentiment analysis techniques, especially for Chinese affective computing. All data sets with gold standards and scoring script are made publicly available to researchers.</abstract>
@@ -2196,7 +2196,7 @@
     </paper>
     <paper id="3">
       <title><fixed-case>IJCNLP</fixed-case>-2017 Task 3: Review Opinion Diversification (<fixed-case>R</fixed-case>ev<fixed-case>O</fixed-case>pi<fixed-case>D</fixed-case>-2017)</title>
-      <author><first>Anil</first><last>Kumar Singh</last></author>
+      <author id="anil-kumar-singh"><first>Anil</first><last>Kumar Singh</last></author>
       <author><first>Avijit</first><last>Thawani</last></author>
       <author><first>Mayank</first><last>Panchal</last></author>
       <author><first>Anubhav</first><last>Gupta</last></author>
@@ -2324,7 +2324,7 @@
     <paper id="14">
       <title><fixed-case>CKIP</fixed-case> at <fixed-case>IJCNLP</fixed-case>-2017 Task 2: Neural Valence-Arousal Prediction for Phrases</title>
       <author><first>Peng-Hsuan</first><last>Li</last></author>
-      <author><first>Wei-Yun</first><last>Ma</last></author>
+      <author id="wei-yun-ma"><first>Wei-Yun</first><last>Ma</last></author>
       <author><first>Hsin-Yang</first><last>Wang</last></author>
       <pages>89–94</pages>
       <url hash="1307210c">I17-4014</url>
@@ -2337,7 +2337,7 @@
       <author><first>Yung-Chun</first><last>Chang</last></author>
       <author><first>Chen-Ann</first><last>Wang</last></author>
       <author><first>Yu-Lun</first><last>Hsieh</last></author>
-      <author><first>Wen-Lian</first><last>Hsu</last></author>
+      <author id="wen-lian-hsu"><first>Wen-Lian</first><last>Hsu</last></author>
       <pages>95–99</pages>
       <url hash="157039b7">I17-4015</url>
       <abstract>Sentiment lexicon is very helpful in dimensional sentiment applications. Because of countless Chinese words, developing a method to predict unseen Chinese words is required. The proposed method can handle both words and phrases by using an ADVWeight List for word prediction, which in turn improves our performance at phrase level. The evaluation results demonstrate that our system is effective in dimensional sentiment analysis for Chinese phrases. The Mean Absolute Error (MAE) and Pearson’s Correlation Coefficient (PCC) for Valence are 0.723 and 0.835, respectively, and those for Arousal are 0.914 and 0.756, respectively.</abstract>
@@ -2346,7 +2346,7 @@
     <paper id="16">
       <title><fixed-case>A</fixed-case>libaba at <fixed-case>IJCNLP</fixed-case>-2017 Task 2: A Boosted Deep System for Dimensional Sentiment Analysis of <fixed-case>C</fixed-case>hinese Phrases</title>
       <author><first>Xin</first><last>Zhou</last></author>
-      <author id="jian-wang"><first>Jian</first><last>Wang</last></author>
+      <author><first>Jian</first><last>Wang</last></author>
       <author><first>Xu</first><last>Xie</last></author>
       <author><first>Changlong</first><last>Sun</last></author>
       <author><first>Luo</first><last>Si</last></author>
@@ -2392,7 +2392,7 @@
       <title><fixed-case>NCTU</fixed-case>-<fixed-case>NTUT</fixed-case> at <fixed-case>IJCNLP</fixed-case>-2017 Task 2: Deep Phrase Embedding using bi-<fixed-case>LSTM</fixed-case>s for Valence-Arousal Ratings Prediction of <fixed-case>C</fixed-case>hinese Phrases</title>
       <author><first>Yen-Hsuan</first><last>Lee</last></author>
       <author><first>Han-Yun</first><last>Yeh</last></author>
-      <author><first>Yih-Ru</first><last>Wang</last></author>
+      <author id="yih-ru-wang"><first>Yih-Ru</first><last>Wang</last></author>
       <author><first>Yuan-Fu</first><last>Liao</last></author>
       <pages>124–129</pages>
       <url hash="7f679e4d">I17-4020</url>
@@ -2430,7 +2430,7 @@
     </paper>
     <paper id="24">
       <title>All-In-1 at <fixed-case>IJCNLP</fixed-case>-2017 Task 4: Short Text Classification with One Model for All Languages</title>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>143–148</pages>
       <url hash="d138826c">I17-4024</url>
       <abstract>We present All-In-1, a simple model for multilingual text classification that does not require any parallel data. It is based on a traditional Support Vector Machine classifier exploiting multilingual word embeddings and character n-grams. Our model is simple, easily extendable yet very effective, overall ranking 1st (out of 12 teams) in the IJCNLP 2017 shared task on customer feedback analysis in four languages: English, French, Japanese and Spanish.</abstract>
@@ -2440,7 +2440,7 @@
       <title><fixed-case>S</fixed-case>enti<fixed-case>NLP</fixed-case> at <fixed-case>IJCNLP</fixed-case>-2017 Task 4: Customer Feedback Analysis Using a <fixed-case>B</fixed-case>i-<fixed-case>LSTM</fixed-case>-<fixed-case>CNN</fixed-case> Model</title>
       <author><first>Shuying</first><last>Lin</last></author>
       <author><first>Huosheng</first><last>Xie</last></author>
-      <author><first>Liang-Chih</first><last>Yu</last></author>
+      <author id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last></author>
       <author><first>K. Robert</first><last>Lai</last></author>
       <pages>149–154</pages>
       <url hash="a7db96a4">I17-4025</url>
@@ -2461,7 +2461,7 @@
     <paper id="27">
       <title><fixed-case>ADAPT</fixed-case> at <fixed-case>IJCNLP</fixed-case>-2017 Task 4: A Multinomial Naive <fixed-case>B</fixed-case>ayes Classification Approach for Customer Feedback Analysis task</title>
       <author><first>Pintu</first><last>Lohar</last></author>
-      <author><first>Koel</first><last>Dutta Chowdhury</last></author>
+      <author id="koel-dutta-chowdhury"><first>Koel</first><last>Dutta Chowdhury</last></author>
       <author><first>Haithem</first><last>Afli</last></author>
       <author><first>Mohammed</first><last>Hasanuzzaman</last></author>
       <author><first>Andy</first><last>Way</last></author>
@@ -2494,7 +2494,7 @@
       <author><first>Partha</first><last>Pakray</last></author>
       <author><first>Riyanka</first><last>Manna</last></author>
       <author><first>Dipankar</first><last>Das</last></author>
-      <author><first>Alexander</first><last>Gelbukh</last></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last></author>
       <pages>180–183</pages>
       <url hash="fd541b88">I17-4030</url>
       <abstract>In this paper, we describe a deep learning framework for analyzing the customer feedback as part of our participation in the shared task on Customer Feedback Analysis at the 8th International Joint Conference on Natural Language Processing (IJCNLP 2017). A Convolutional Neural Network (CNN) based deep neural network model was employed for the customer feedback task. The proposed system was evaluated on two languages, namely, English and French.</abstract>
@@ -2502,11 +2502,11 @@
     </paper>
     <paper id="31">
       <title><fixed-case>IITP</fixed-case> at <fixed-case>IJCNLP</fixed-case>-2017 Task 4: Auto Analysis of Customer Feedback using <fixed-case>CNN</fixed-case> and <fixed-case>GRU</fixed-case> Network</title>
-      <author><first>Deepak</first><last>Gupta</last></author>
+      <author id="deepak-gupta"><first>Deepak</first><last>Gupta</last></author>
       <author><first>Pabitra</first><last>Lenka</last></author>
       <author><first>Harsimran</first><last>Bedi</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>184–193</pages>
       <url hash="c523a5e2">I17-4031</url>
       <abstract>Analyzing customer feedback is the best way to channelize the data into new marketing strategies that benefit entrepreneurs as well as customers. Therefore an automated system which can analyze the customer behavior is in great demand. Users may write feedbacks in any language, and hence mining appropriate information often becomes intractable. Especially in a traditional feature-based supervised model, it is difficult to build a generic system as one has to understand the concerned language for finding the relevant features. In order to overcome this, we propose deep Convolutional Neural Network (CNN) and Recurrent Neural Network (RNN) based approaches that do not require handcrafting of features. We evaluate these techniques for analyzing customer feedback sentences on four languages, namely English, French, Japanese and Spanish. Our empirical analysis shows that our models perform well in all the four languages on the setups of IJCNLP Shared Task on Customer Feedback Analysis. Our model achieved the second rank in French, with an accuracy of 71.75% and third ranks for all the other languages.</abstract>
@@ -2607,7 +2607,7 @@
     </paper>
     <paper id="4">
       <title>Neural Machine Translation: Basics, Practical Aspects and Recent Trends</title>
-      <author><first>Fabien</first><last>Cromieres</last></author>
+      <author id="fabien-cromieres"><first>Fabien</first><last>Cromieres</last></author>
       <author><first>Toshiaki</first><last>Nakazawa</last></author>
       <author><first>Raj</first><last>Dabre</last></author>
       <pages>11–13</pages>
@@ -2617,7 +2617,7 @@
     </paper>
     <paper id="5">
       <title>The Ultimate Presentation Makeup Tutorial: How to <fixed-case>P</fixed-case>olish your Posters, Slides and Presentations Skills</title>
-      <author><first>Gustavo</first><last>Paetzold</last></author>
+      <author id="gustavo-paetzold"><first>Gustavo</first><last>Paetzold</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>14–15</pages>
       <url hash="21571da5">I17-5005</url>
diff --git a/data/xml/J00.xml b/data/xml/J00.xml
index c75dac1929..f35f7d53b7 100644
--- a/data/xml/J00.xml
+++ b/data/xml/J00.xml
@@ -3,7 +3,7 @@
   <volume id="1" type="journal">
     <meta>
       <booktitle>Computational Linguistics, Volume 26, Number 1, March 2000</booktitle>
-      <editor><first>Julia</first><last>Hirschberg</last></editor>
+      <editor id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></editor>
       <publisher>MIT Press</publisher>
       <address>Cambridge, MA</address>
       <year>2000</year>
@@ -42,8 +42,8 @@
     </paper>
     <paper id="4">
       <title>Learning dependency translation models as collections of finite state head transducers</title>
-      <author><first>Hiyan</first><last>Alsawi</last></author>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="hiyan-alshawi"><first>Hiyan</first><last>Alsawi</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
       <author><first>Shona</first><last>Douglas</last></author>
       <pages>45-60</pages>
       <url hash="3a893c07">J00-1004</url>
@@ -51,14 +51,14 @@
     </paper>
     <paper id="5">
       <title>Treatment of epsilon moves in subset construction</title>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <pages>61-76</pages>
       <url hash="38c4764c">J00-1005</url>
       <bibkey>van-noord-2000-treatment</bibkey>
     </paper>
     <paper id="6">
       <title>Multitiered nonlinear morphology using multitape finite automata: a case study on <fixed-case>S</fixed-case>yriac and <fixed-case>A</fixed-case>rabic</title>
-      <author><first>George Anton</first><last>Kiraz</last></author>
+      <author id="george-anton-kiraz"><first>George Anton</first><last>Kiraz</last></author>
       <pages>77-105</pages>
       <url hash="7f2d50ca">J00-1006</url>
       <bibkey>kiraz-2000-multitiered</bibkey>
@@ -92,16 +92,16 @@
     </paper>
     <paper id="2">
       <title>A model for multimodal reference resolution</title>
-      <author><first>Luis</first><last>Pineda</last></author>
-      <author><first>Gabriela</first><last>Garza</last></author>
+      <author id="luis-a-pineda"><first>Luis</first><last>Pineda</last></author>
+      <author id="e-gabriela-garza"><first>Gabriela</first><last>Garza</last></author>
       <pages>139-194</pages>
       <url hash="90f696fe">J00-2002</url>
       <bibkey>pineda-garza-2000-model</bibkey>
     </paper>
     <paper id="3">
       <title>A multistrategy approach to improving pronunciation by analogy</title>
-      <author><first>Yannick</first><last>Marchand</last></author>
-      <author><first>Robert I.</first><last>Damper</last></author>
+      <author id="yannick-marchand"><first>Yannick</first><last>Marchand</last></author>
+      <author id="robert-i-damper"><first>Robert I.</first><last>Damper</last></author>
       <pages>195-220</pages>
       <url hash="d6dbc99d">J00-2003</url>
       <bibkey>marchand-damper-2000-multistrategy</bibkey>
@@ -170,13 +170,13 @@
     </paper>
     <paper id="14">
       <title>Book Reviews: <fixed-case>O</fixed-case>ptimality <fixed-case>T</fixed-case>heory</title>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <url hash="4c53ac00">J00-2014</url>
       <bibkey>eisner-2000-book</bibkey>
     </paper>
     <paper id="15">
       <title>Book Reviews: Systemic Functional Grammar in Natural Language Generation: Linguistic Description and Computational Representation</title>
-      <author><first>Graham</first><last>Wilcock</last></author>
+      <author id="graham-wilcock"><first>Graham</first><last>Wilcock</last></author>
       <url hash="bfe8b84a">J00-2015</url>
       <bibkey>wilcock-2000-book</bibkey>
     </paper>
@@ -214,7 +214,7 @@
       <title>Extracting the lowest-frequency words: pitfalls and possibilities</title>
       <author><first>Marc</first><last>Weeber</last></author>
       <author><first>Rein</first><last>Vos</last></author>
-      <author><first>R. Harald</first><last>Baayen</last></author>
+      <author id="harald-baayen"><first>R. Harald</first><last>Baayen</last></author>
       <pages>301-318</pages>
       <url hash="cc0ec315">J00-3001</url>
       <bibkey>weeber-etal-2000-extracting</bibkey>
@@ -228,16 +228,16 @@
     </paper>
     <paper id="3">
       <title>Dialogue act modeling for automatic tagging and recognition of conversational speech</title>
-      <author><first>Andreas</first><last>Stolcke</last></author>
+      <author id="andreas-stolcke"><first>Andreas</first><last>Stolcke</last></author>
       <author><first>Klaus</first><last>Ries</last></author>
-      <author><first>Noah</first><last>Coccaro</last></author>
-      <author><first>Elizabeth</first><last>Shriberg</last></author>
+      <author id="noah-coccaro"><first>Noah</first><last>Coccaro</last></author>
+      <author id="elizabeth-shriberg"><first>Elizabeth</first><last>Shriberg</last></author>
       <author><first>Rebecca</first><last>Bates</last></author>
-      <author><first>Daniel</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Daniel</first><last>Jurafsky</last></author>
       <author><first>Paul</first><last>Taylor</last></author>
       <author><first>Rachel</first><last>Martin</last></author>
-      <author><first>Carol</first><last>Van Ess-Dykema</last></author>
-      <author><first>Marie</first><last>Meteer</last></author>
+      <author id="carol-van-ess-dykema"><first>Carol</first><last>Van Ess-Dykema</last></author>
+      <author id="marie-meteer"><first>Marie</first><last>Meteer</last></author>
       <pages>339-374</pages>
       <url hash="d3e4c20c">J00-3003</url>
       <bibkey>stolcke-etal-2000-dialogue</bibkey>
@@ -261,19 +261,19 @@
     </paper>
     <paper id="6">
       <title>Book Reviews: Foundations of Computational Linguistics: Man-Machine Communication in Natural Language</title>
-      <author><first>Alexander F.</first><last>Gelbukh</last></author>
+      <author id="alexander-gelbukh"><first>Alexander F.</first><last>Gelbukh</last></author>
       <url hash="c030f456">J00-3006</url>
       <bibkey>gelbukh-2000-book</bibkey>
     </paper>
     <paper id="7">
       <title>Book Reviews: Syntactic Wordclass Tagging</title>
-      <author><first>Adwait</first><last>Ratnaparkhi</last></author>
+      <author id="adwait-ratnaparkhi"><first>Adwait</first><last>Ratnaparkhi</last></author>
       <url hash="857980a3">J00-3007</url>
       <bibkey>ratnaparkhi-2000-book</bibkey>
     </paper>
     <paper id="8">
       <title>Book Reviews: Natural Language Information Retrieval</title>
-      <author><first>Simon</first><last>Corston-Oliver</last></author>
+      <author id="simon-corston-oliver"><first>Simon</first><last>Corston-Oliver</last></author>
       <url hash="b794ad6b">J00-3008</url>
       <bibkey>corston-oliver-2000-book</bibkey>
     </paper>
@@ -315,16 +315,16 @@
     </frontmatter>
     <paper id="1">
       <title>Automatic Text Categorization In Terms Of Genre and Author</title>
-      <author><first>Efstathios</first><last>Stamatatos</last></author>
-      <author><first>Nikos</first><last>Fakotakis</last></author>
-      <author><first>George</first><last>Kokkinakis</last></author>
+      <author id="efstathios-stamatatos"><first>Efstathios</first><last>Stamatatos</last></author>
+      <author id="nikos-fakotakis"><first>Nikos</first><last>Fakotakis</last></author>
+      <author id="george-kokkinakis"><first>George</first><last>Kokkinakis</last></author>
       <pages>471-495</pages>
       <url hash="9d17eb5b">J00-4001</url>
       <bibkey>stamatatos-etal-2000-automatic</bibkey>
     </paper>
     <paper id="2">
       <title>Bidirectional Contextual Resolution</title>
-      <author><first>Stephen G.</first><last>Pulman</last></author>
+      <author id="stephen-pulman"><first>Stephen G.</first><last>Pulman</last></author>
       <pages>497-537</pages>
       <url hash="f4365565">J00-4002</url>
       <doi>10.1162/089120100750105939</doi>
@@ -332,8 +332,8 @@
     </paper>
     <paper id="3">
       <title>An Empirically-based System for Processing Definite Descriptions</title>
-      <author><first>Renata</first><last>Vieira</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="renata-vieira"><first>Renata</first><last>Vieira</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>539-593</pages>
       <url hash="467b34da">J00-4003</url>
       <doi>10.1162/089120100750105948</doi>
@@ -342,15 +342,15 @@
     <paper id="4">
       <title>Learning Methods to Combine Linguistic Indicators:Improving Aspectual Classification and Revealing Linguistic Insights</title>
       <author><first>Eric V.</first><last>Siegel</last></author>
-      <author><first>Kathleen R.</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen R.</first><last>McKeown</last></author>
       <pages>595-627</pages>
       <url hash="94f3568d">J00-4004</url>
       <bibkey>siegel-mckeown-2000-learning</bibkey>
     </paper>
     <paper id="5">
       <title>On Coreferring: Coreference in <fixed-case>MUC</fixed-case> and Related Annotation Schemes</title>
-      <author><first>Kees</first><last>van Deemter</last></author>
-      <author><first>Rodger</first><last>Kibble</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
+      <author id="rodger-kibble"><first>Rodger</first><last>Kibble</last></author>
       <pages>629-637</pages>
       <url hash="d59d8bf1">J00-4005</url>
       <bibkey>van-deemter-kibble-2000-coreferring</bibkey>
@@ -369,13 +369,13 @@
     </paper>
     <paper id="8">
       <title>Book Reviews: Architectures and Mechanisms for Language Processing</title>
-      <author><first>Amy</first><last>Weinberg</last></author>
+      <author id="amy-weinberg"><first>Amy</first><last>Weinberg</last></author>
       <url hash="2372ed38">J00-4008</url>
       <bibkey>weinberg-2000-book</bibkey>
     </paper>
     <paper id="9">
       <title>Book Reviews: Breadth and Depth of Semantic Lexicons</title>
-      <author><first>John S.</first><last>White</last></author>
+      <author id="john-s-white"><first>John S.</first><last>White</last></author>
       <url hash="cf26c167">J00-4009</url>
       <bibkey>white-2000-book</bibkey>
     </paper>
diff --git a/data/xml/J01.xml b/data/xml/J01.xml
index b7d16a6f4f..8551623493 100644
--- a/data/xml/J01.xml
+++ b/data/xml/J01.xml
@@ -3,7 +3,7 @@
   <volume id="1" type="journal">
     <meta>
       <booktitle>Computational Linguistics, Volume 27, Number 1, March 2001</booktitle>
-      <editor><first>Julia</first><last>Hirschberg</last></editor>
+      <editor id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></editor>
       <publisher>MIT Press</publisher>
       <address>Cambridge, MA</address>
       <year>2001</year>
@@ -18,7 +18,7 @@
     <paper id="1">
       <title>Using Suffix Arrays to Compute Term Frequency and Document Frequency for All Substrings in a Corpus</title>
       <author><first>Mikio</first><last>Yamamoto</last></author>
-      <author><first>Kenneth W.</first><last>Church</last></author>
+      <author id="kenneth-church"><first>Kenneth W.</first><last>Church</last></author>
       <pages>1-30</pages>
       <url hash="bc79a753">J01-1001</url>
       <doi>10.1162/089120101300346787</doi>
@@ -38,7 +38,7 @@
     <paper id="3">
       <title>Bootstrapping Morphological Analyzers by Combining Human Elicitation and Machine Learning</title>
       <author><first>Kemal</first><last>Oflazer</last></author>
-      <author><first>Sergei</first><last>Nirenberg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenberg</last></author>
       <author><first>Marjorie</first><last>McShane</last></author>
       <pages>59-85</pages>
       <url hash="0864f573">J01-1003</url>
@@ -47,9 +47,9 @@
     </paper>
     <paper id="4">
       <title><fixed-case>D</fixed-case>-Tree Substitution Grammars</title>
-      <author><first>Owen</first><last>Rambow</last></author>
-      <author><first>K.</first><last>Vijay-Shanker</last></author>
-      <author><first>David</first><last>Weir</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
+      <author id="k-vijay-shanker"><first>K.</first><last>Vijay-Shanker</last></author>
+      <author id="david-weir"><first>David</first><last>Weir</last></author>
       <pages>87-121</pages>
       <url hash="32964500">J01-1004</url>
       <doi>10.1162/089120101300346813</doi>
@@ -58,7 +58,7 @@
     <paper id="5">
       <title>Unsupervised Named Entity Recognition Using Syntactic and Semantic Contextual Evidence</title>
       <author><first>Alessandro</first><last>Cucchiarelli</last></author>
-      <author><first>Paola</first><last>Velardi</last></author>
+      <author id="paola-velardi"><first>Paola</first><last>Velardi</last></author>
       <pages>123-131</pages>
       <url hash="5194bbe1">J01-1005</url>
       <bibkey>cucchiarelli-velardi-2001-unsupervised</bibkey>
@@ -100,7 +100,7 @@
   <volume id="2" type="journal">
     <meta>
       <booktitle>Computational Linguistics, Volume 27, Number 2, June 2001</booktitle>
-      <editor><first>Julia</first><last>Hirschberg</last></editor>
+      <editor id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></editor>
       <publisher>MIT Press</publisher>
       <address>Cambridge, MA</address>
       <year>2001</year>
@@ -122,18 +122,18 @@
     </paper>
     <paper id="2">
       <title>Improving Accuracy in word class tagging through the Combination of Machine Learning Systems</title>
-      <author><first>Hans</first><last>Van Halteren</last></author>
+      <author id="hans-van-halteren"><first>Hans</first><last>Van Halteren</last></author>
       <author><first>Jakub</first><last>Zavrel</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>199-229</pages>
       <url hash="f3401f70">J01-2002</url>
       <bibkey>van-halteren-etal-2001-improving</bibkey>
     </paper>
     <paper id="3">
       <title>The Need for Accurate Alignment in Natural Language System Evaluation</title>
-      <author><first>Andrew</first><last>Kehler</last></author>
-      <author><first>John</first><last>Bear</last></author>
-      <author><first>Douglas</first><last>Appelt</last></author>
+      <author id="andrew-kehler"><first>Andrew</first><last>Kehler</last></author>
+      <author id="john-bear"><first>John</first><last>Bear</last></author>
+      <author id="douglas-appelt"><first>Douglas</first><last>Appelt</last></author>
       <pages>231-248</pages>
       <url hash="80c16f2e">J01-2003</url>
       <doi>10.1162/089120101750300517</doi>
@@ -150,7 +150,7 @@
     <paper id="5">
       <title>Nonminimal Derivations in Unification-Based Parsing</title>
       <author><first>Noriko</first><last>Tomuro</last></author>
-      <author><first>Steven L.</first><last>Lytinen</last></author>
+      <author id="steven-l-lytinen"><first>Steven L.</first><last>Lytinen</last></author>
       <pages>277-285</pages>
       <url hash="882f28a0">J01-2005</url>
       <doi>10.1162/089120101750300535</doi>
@@ -158,13 +158,13 @@
     </paper>
     <paper id="6">
       <title>Book Reviews: Knowledge Representation: Logical, Philosophical, and Computational Foundations</title>
-      <author><first>Stuart C.</first><last>Shapiro</last></author>
+      <author id="stuart-c-shapiro"><first>Stuart C.</first><last>Shapiro</last></author>
       <url hash="ad1562ae">J01-2006</url>
       <bibkey>shapiro-2001-book</bibkey>
     </paper>
     <paper id="7">
       <title>Book Reviews: Natural Language Processing and Knowledge Representation: Language for Knowledge and Knowledge for Language</title>
-      <author><first>Robert E.</first><last>Mercer</last></author>
+      <author id="robert-e-mercer"><first>Robert E.</first><last>Mercer</last></author>
       <url hash="db960248">J01-2007</url>
       <bibkey>mercer-2001-book</bibkey>
     </paper>
@@ -176,7 +176,7 @@
     </paper>
     <paper id="9">
       <title>Book Reviews: Advances in Information Retrieval: Recent Research from the Center for Intelligent Information Retrieval</title>
-      <author><first>Sanda</first><last>Harabagiu</last></author>
+      <author id="sanda-harabagiu"><first>Sanda</first><last>Harabagiu</last></author>
       <url hash="22700dd4">J01-2009</url>
       <bibkey>harabagiu-2001-book</bibkey>
     </paper>
@@ -194,7 +194,7 @@
     </paper>
     <paper id="12">
       <title>Book Reviews: Learnability in <fixed-case>O</fixed-case>ptimality <fixed-case>T</fixed-case>heory</title>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <url hash="dd8d4722">J01-2012</url>
       <bibkey>daelemans-2001-book</bibkey>
     </paper>
@@ -213,7 +213,7 @@
   <volume id="3" type="journal">
     <meta>
       <booktitle>Computational Linguistics, Volume 27, Number 3, September 2001</booktitle>
-      <editor><first>Julia</first><last>Hirschberg</last></editor>
+      <editor id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></editor>
       <publisher>MIT Press</publisher>
       <address>Cambridge, MA</address>
       <year>2001</year>
@@ -228,7 +228,7 @@
     <paper id="1">
       <title>The Interaction of Knowledge Sources in Word Sense Disambiguation</title>
       <author><first>Mark</first><last>Stevenson</last></author>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <pages>321-349</pages>
       <url hash="4530874f">J01-3001</url>
       <doi>10.1162/089120101317066104</doi>
@@ -253,9 +253,9 @@
     </paper>
     <paper id="4">
       <title>Towards Constructive Text, Diagram, and Layout Generation for Information Presentation</title>
-      <author><first>John</first><last>Bateman</last></author>
+      <author id="john-bateman"><first>John</first><last>Bateman</last></author>
       <author><first>Thomas</first><last>Kamps</last></author>
-      <author><first>Jörg</first><last>Kleinz</last></author>
+      <author id="jorg-kleinz"><first>Jörg</first><last>Kleinz</last></author>
       <author><first>Klaus</first><last>Reichenberger</last></author>
       <pages>409-449</pages>
       <url hash="b64991f1">J01-3004</url>
@@ -265,7 +265,7 @@
     <paper id="5">
       <title>Book Reviews: Prosody: Theory and Experiment. Studies presented to Gosta Bruce</title>
       <author><first>Chilin</first><last>Shih</last></author>
-      <author><first>Richard</first><last>Sproat</last></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last></author>
       <url hash="04624afa">J01-3005</url>
       <bibkey>shih-sproat-2001-book</bibkey>
     </paper>
@@ -283,7 +283,7 @@
     </paper>
     <paper id="8">
       <title>Book Reviews: Presumptive Meanings: The theory of generalized conversational implicature</title>
-      <author><first>Nancy</first><last>Green</last></author>
+      <author id="nancy-green"><first>Nancy</first><last>Green</last></author>
       <url hash="7db68826">J01-3008</url>
       <bibkey>green-2001-book</bibkey>
     </paper>
@@ -307,7 +307,7 @@
   <volume id="4" type="journal">
     <meta>
       <booktitle>Computational Linguistics, Volume 27, Number 4, <fixed-case>D</fixed-case>ecember 2001</booktitle>
-      <editor><first>Julia</first><last>Hirschberg</last></editor>
+      <editor id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></editor>
       <publisher>MIT Press</publisher>
       <address>Cambridge, MA</address>
       <year>2001</year>
@@ -321,8 +321,8 @@
     </frontmatter>
     <paper id="1">
       <title>Introduction to the Special Issue on Computational Anaphora Resolution</title>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
-      <author><first>Branimir</first><last>Boguraev</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="branimir-boguraev"><first>Branimir</first><last>Boguraev</last></author>
       <author><first>Shalom</first><last>Lappin</last></author>
       <pages>473-477</pages>
       <url hash="b99a6f44">J01-4001</url>
@@ -339,7 +339,7 @@
     </paper>
     <paper id="3">
       <title>A Corpus-Based Evaluation of Centering and Pronoun Resolution</title>
-      <author><first>Joel R.</first><last>Tetreault</last></author>
+      <author id="joel-tetreault"><first>Joel R.</first><last>Tetreault</last></author>
       <pages>507-520</pages>
       <url hash="a9250068">J01-4003</url>
       <bibkey>tetreault-2001-corpus</bibkey>
@@ -348,7 +348,7 @@
       <title>A Machine Learning Approach to Coreference Resolution of Noun Phrases</title>
       <author><first>Wee Meng</first><last>Soon</last></author>
       <author><first>Hwee Tou</first><last>Ng</last></author>
-      <author><first>Daniel Chung Yong</first><last>Lim</last></author>
+      <author id="chung-yong-lim"><first>Daniel Chung Yong</first><last>Lim</last></author>
       <pages>521-544</pages>
       <url hash="e0d69cfd">J01-4004</url>
       <doi>10.1162/089120101753342653</doi>
@@ -356,13 +356,13 @@
     </paper>
     <paper id="5">
       <title>An Algorithm for Anaphora Resolution in <fixed-case>S</fixed-case>panish Texts</title>
-      <author><first>Manuel</first><last>Palomar</last></author>
-      <author><first>Antonio</first><last>Ferrández</last></author>
-      <author><first>Lidia</first><last>Moreno</last></author>
-      <author><first>Patricio</first><last>Martínez-Barco</last></author>
-      <author><first>Jesús</first><last>Peral</last></author>
-      <author><first>Maximiliano</first><last>Saiz-Noeda</last></author>
-      <author><first>Rafael</first><last>Muñoz</last></author>
+      <author id="manuel-palomar"><first>Manuel</first><last>Palomar</last></author>
+      <author id="antonio-ferrandez"><first>Antonio</first><last>Ferrández</last></author>
+      <author id="lidia-moreno"><first>Lidia</first><last>Moreno</last></author>
+      <author id="patricio-martinez-barco"><first>Patricio</first><last>Martínez-Barco</last></author>
+      <author id="jesus-peral"><first>Jesús</first><last>Peral</last></author>
+      <author id="maximiliano-saiz-noeda"><first>Maximiliano</first><last>Saiz-Noeda</last></author>
+      <author id="rafael-munoz"><first>Rafael</first><last>Muñoz</last></author>
       <pages>545-567</pages>
       <url hash="90a5ff1b">J01-4005</url>
       <doi>10.1162/089120101753342662</doi>
@@ -370,14 +370,14 @@
     </paper>
     <paper id="6">
       <title>The Uncommon Denominator: A Proposal for Consistent Reporting of Pronoun Resolution Results</title>
-      <author><first>Donna K.</first><last>Byron</last></author>
+      <author id="donna-byron"><first>Donna K.</first><last>Byron</last></author>
       <pages>569-578</pages>
       <url hash="a631fd3a">J01-4006</url>
       <bibkey>byron-2001-uncommon</bibkey>
     </paper>
     <paper id="7">
       <title>A Reformulation of Rule 2 of <fixed-case>C</fixed-case>entering <fixed-case>T</fixed-case>heory</title>
-      <author><first>Rodger</first><last>Kibble</last></author>
+      <author id="rodger-kibble"><first>Rodger</first><last>Kibble</last></author>
       <pages>579-587</pages>
       <url hash="9e73140a">J01-4007</url>
       <bibkey>kibble-2001-reformulation</bibkey>
@@ -396,13 +396,13 @@
     </paper>
     <paper id="10">
       <title>Book Reviews: Robustness in Language and Speech Technology</title>
-      <author><first>John</first><last>Carroll</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
       <url hash="c4b769ff">J01-4010</url>
       <bibkey>carroll-2001-book</bibkey>
     </paper>
     <paper id="11">
       <title>Book Reviews: Formal Aspects of Context</title>
-      <author><first>Richmond H.</first><last>Thomason</last></author>
+      <author id="richmond-h-thomason"><first>Richmond H.</first><last>Thomason</last></author>
       <url hash="86694ff4">J01-4011</url>
       <bibkey>thomason-2001-book</bibkey>
     </paper>
diff --git a/data/xml/J02.xml b/data/xml/J02.xml
index 3ab7251c7a..8efcc06a20 100644
--- a/data/xml/J02.xml
+++ b/data/xml/J02.xml
@@ -15,7 +15,7 @@
     </frontmatter>
     <paper id="1">
       <title>Binding Machines</title>
-      <author><first>António</first><last>Branco</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
       <pages>1-18</pages>
       <doi>10.1162/089120102317341747</doi>
       <url hash="0f47810a">J02-1001</url>
@@ -24,7 +24,7 @@
     <paper id="2">
       <title>A Critique and Improvement of an Evaluation Metric for Text Segmentation</title>
       <author><first>Lev</first><last>Pevzner</last></author>
-      <author><first>Marti A.</first><last>Hearst</last></author>
+      <author id="marti-a-hearst"><first>Marti A.</first><last>Hearst</last></author>
       <pages>19-36</pages>
       <doi>10.1162/089120102317341756</doi>
       <url hash="e0c626d2">J02-1002</url>
@@ -32,7 +32,7 @@
     </paper>
     <paper id="3">
       <title>Generating Referring Expressions: <fixed-case>B</fixed-case>oolean Extensions of the Incremental Algorithm</title>
-      <author><first>Kees</first><last>van Deemter</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
       <pages>37-52</pages>
       <doi>10.1162/089120102317341765</doi>
       <url hash="56668890">J02-1003</url>
@@ -40,8 +40,8 @@
     </paper>
     <paper id="4">
       <title>Syllable-Pattern-Based Unknown-Morpheme Segmentation and Estimation for Hybrid Part-of-Speech Tagging of <fixed-case>K</fixed-case>orean</title>
-      <author><first>Gary Geunbae</first><last>Lee</last></author>
-      <author><first>Jeongwon</first><last>Cha</last></author>
+      <author id="gary-geunbae-lee"><first>Gary Geunbae</first><last>Lee</last></author>
+      <author id="jeong-won-cha"><first>Jeongwon</first><last>Cha</last></author>
       <author><first>Jong-Hyeok</first><last>Lee</last></author>
       <pages>53-70</pages>
       <doi>10.1162/089120102317341774</doi>
@@ -82,7 +82,7 @@
     </frontmatter>
     <paper id="1">
       <title>Near-Synonymy and Lexical Choice</title>
-      <author><first>Philip</first><last>Edmonds</last></author>
+      <author id="philip-edmonds"><first>Philip</first><last>Edmonds</last></author>
       <author><first>Graeme</first><last>Hirst</last></author>
       <pages>105-144</pages>
       <doi>10.1162/089120102760173625</doi>
@@ -91,7 +91,7 @@
     </paper>
     <paper id="2">
       <title>The Combinatory Morphemic Lexicon</title>
-      <author><first>Cem</first><last>Bozsahin</last></author>
+      <author id="cem-bozsahin"><first>Cem</first><last>Bozsahin</last></author>
       <pages>145-186</pages>
       <doi>10.1162/089120102760173634</doi>
       <url hash="b0db1979">J02-2002</url>
@@ -100,7 +100,7 @@
     <paper id="3">
       <title>Class-Based Probability Estimation Using a Semantic Hierarchy</title>
       <author><first>Stephen</first><last>Clark</last></author>
-      <author><first>David</first><last>Weir</last></author>
+      <author id="david-weir"><first>David</first><last>Weir</last></author>
       <pages>187-206</pages>
       <doi>10.1162/089120102760173643</doi>
       <url hash="e788dcbd">J02-2003</url>
@@ -109,7 +109,7 @@
     <paper id="4">
       <title>Incremental Construction and Maintenance of Minimal Finite-State Automata</title>
       <author><first>Rafael C.</first><last>Carrasco</last></author>
-      <author><first>Mikel L.</first><last>Forcada</last></author>
+      <author id="mikel-l-forcada"><first>Mikel L.</first><last>Forcada</last></author>
       <pages>207-216</pages>
       <doi>10.1162/089120102760173652</doi>
       <url hash="5ecfc911">J02-2004</url>
@@ -142,7 +142,7 @@
     <paper id="1">
       <title>Automatic Labeling of Semantic Roles</title>
       <author><first>Daniel</first><last>Gildea</last></author>
-      <author><first>Daniel</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Daniel</first><last>Jurafsky</last></author>
       <pages>245-288</pages>
       <doi>10.1162/089120102760275983</doi>
       <url hash="eef3213e">J02-3001</url>
@@ -197,9 +197,9 @@
     </frontmatter>
     <paper id="1">
       <title>Introduction to the Special Issue on Summarization</title>
-      <author><first>Dragomir R.</first><last>Radev</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="dragomir-radev"><first>Dragomir R.</first><last>Radev</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>399-408</pages>
       <doi>10.1162/089120102762671927</doi>
       <url hash="71a06213">J02-4001</url>
@@ -225,7 +225,7 @@
     <paper id="4">
       <title>Efficiently Computed Lexical Chains as an Intermediate Representation for Automatic Text Summarization</title>
       <author><first>H. Gregory</first><last>Silber</last></author>
-      <author><first>Kathleen F.</first><last>McCoy</last></author>
+      <author id="kathleen-f-mccoy"><first>Kathleen F.</first><last>McCoy</last></author>
       <pages>487-496</pages>
       <doi>10.1162/089120102762671954</doi>
       <url hash="6a8f8a27">J02-4004</url>
@@ -242,7 +242,7 @@
     </paper>
     <paper id="6">
       <title>Using Hidden <fixed-case>M</fixed-case>arkov Modeling to Decompose Human-Written Summaries</title>
-      <author><first>Hongyan</first><last>Jing</last></author>
+      <author id="hongyan-jing"><first>Hongyan</first><last>Jing</last></author>
       <pages>527-543</pages>
       <doi>10.1162/089120102762671972</doi>
       <url hash="f66be7c2">J02-4006</url>
@@ -251,7 +251,7 @@
     <paper id="7">
       <title>Squibs and Discussions: Human Variation and Lexical Choice</title>
       <author><first>Ehud</first><last>Reiter</last></author>
-      <author><first>Somayajulu</first><last>Sripada</last></author>
+      <author id="somayajulu-sripada"><first>Somayajulu</first><last>Sripada</last></author>
       <pages>545-553</pages>
       <doi>10.1162/089120102762671981</doi>
       <url hash="1431f6af">J02-4007</url>
diff --git a/data/xml/J03.xml b/data/xml/J03.xml
index a1890b33c6..d13a9ead1f 100644
--- a/data/xml/J03.xml
+++ b/data/xml/J03.xml
@@ -15,7 +15,7 @@
       <title>Optimization Models of Sound Systems Using Genetic Algorithms</title>
       <author><first>Jinyun</first><last>Ke</last></author>
       <author><first>Mieko</first><last>Ogura</last></author>
-      <author><first>William S.-Y.</first><last>Wang</last></author>
+      <author id="william-s-y-wang"><first>William S.-Y.</first><last>Wang</last></author>
       <pages>1-18</pages>
       <doi>10.1162/089120103321337412</doi>
       <url hash="cba1fbdf">J03-1001</url>
@@ -23,8 +23,8 @@
     </paper>
     <paper id="2">
       <title>A Systematic Comparison of Various Statistical Alignment Models</title>
-      <author><first>Franz Josef</first><last>Och</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="franz-josef-och"><first>Franz Josef</first><last>Och</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>19-51</pages>
       <doi>10.1162/089120103321337421</doi>
       <url hash="5ac91b9e">J03-1002</url>
@@ -32,7 +32,7 @@
     </paper>
     <paper id="3">
       <title>Graph-Based Generation of Referring Expressions</title>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <author><first>Sebastiaan</first><last>van Erk</last></author>
       <author><first>André</first><last>Verleg</last></author>
       <pages>53-72</pages>
@@ -51,8 +51,8 @@
     </paper>
     <paper id="5">
       <title>Word Reordering and a Dynamic Programming Beam Search Algorithm for Statistical Machine Translation</title>
-      <author><first>Christoph</first><last>Tillmann</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="christoph-tillmann"><first>Christoph</first><last>Tillmann</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>97-133</pages>
       <doi>10.1162/089120103321337458</doi>
       <url hash="bd99ffb7">J03-1005</url>
@@ -114,7 +114,7 @@
     <paper id="3">
       <title>Document Structure</title>
       <author><first>Richard</first><last>Power</last></author>
-      <author><first>Donia</first><last>Scott</last></author>
+      <author id="donia-scott"><first>Donia</first><last>Scott</last></author>
       <author><first>Nadjet</first><last>Bouayad-Agha</last></author>
       <pages>211-260</pages>
       <doi>10.1162/089120103322145315</doi>
@@ -133,7 +133,7 @@
     <paper id="5">
       <title>Book Reviews: Lexicography and Natural Language Processing: A Festschrift in Honour of <fixed-case>B</fixed-case>. <fixed-case>T</fixed-case>. <fixed-case>S</fixed-case>. Atkins edited by Marie-Hélène Corréard</title>
       <author><first>Woody</first><last>Haynes</last></author>
-      <author><first>Martha</first><last>Evens</last></author>
+      <author id="martha-evens"><first>Martha</first><last>Evens</last></author>
       <url hash="d08439d0">J03-2005</url>
       <bibkey>haynes-evens-2003-book</bibkey>
     </paper>
@@ -163,7 +163,7 @@
     <paper id="2">
       <title>The Web as a Parallel Corpus</title>
       <author><first>Philip</first><last>Resnik</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>349-380</pages>
       <doi>10.1162/089120103322711578</doi>
       <url hash="c18cf498">J03-3002</url>
@@ -172,7 +172,7 @@
     <paper id="3">
       <title>Embedding Web-Based Statistical Translation Models in Cross-Language Information Retrieval</title>
       <author><first>Wessel</first><last>Kraaij</last></author>
-      <author><first>Jian-Yun</first><last>Nie</last></author>
+      <author id="jian-yun-nie"><first>Jian-Yun</first><last>Nie</last></author>
       <author><first>Michel</first><last>Simard</last></author>
       <pages>381-419</pages>
       <doi>10.1162/089120103322711587</doi>
@@ -246,9 +246,9 @@
     </paper>
     <paper id="2">
       <title>Anaphora and Discourse Structure</title>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <author><first>Matthew</first><last>Stone</last></author>
-      <author><first>Aravind</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
       <author><first>Alistair</first><last>Knott</last></author>
       <pages>545-587</pages>
       <doi>10.1162/089120103322753347</doi>
@@ -257,7 +257,7 @@
     </paper>
     <paper id="3">
       <title>Head-Driven Statistical Models for Natural Language Parsing</title>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <pages>589-637</pages>
       <doi>10.1162/089120103322753356</doi>
       <url hash="6d0220bf">J03-4003</url>
@@ -265,8 +265,8 @@
     </paper>
     <paper id="4">
       <title>Disambiguating Nouns, Verbs, and Adjectives Using Automatically Acquired Selectional Preferences</title>
-      <author><first>Diana</first><last>McCarthy</last></author>
-      <author><first>John</first><last>Carroll</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
       <pages>639-654</pages>
       <doi>10.1162/089120103322753365</doi>
       <url hash="306394e3">J03-4004</url>
@@ -274,7 +274,7 @@
     </paper>
     <paper id="5">
       <title>Book Reviews: Learning to Classify Text Using Support Vector Machines: Methods, Theory and Algorithms by Thorsten Joachims; Anaphora Resolution by Ruslan Mitkov</title>
-      <author><first>Roberto</first><last>Basili</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
       <author><first>Michael</first><last>Strube</last></author>
       <url hash="2336bdee">J03-4005</url>
       <bibkey>basili-strube-2003-book</bibkey>
diff --git a/data/xml/J04.xml b/data/xml/J04.xml
index db3106d477..52fac9a51c 100644
--- a/data/xml/J04.xml
+++ b/data/xml/J04.xml
@@ -51,7 +51,7 @@
     <paper id="5">
       <title>Squibs and Discussions: The Kappa Statistic: A Second Look</title>
       <author><first>Barbara</first><last>Di Eugenio</last></author>
-      <author><first>Michael</first><last>Glass</last></author>
+      <author id="michael-glass"><first>Michael</first><last>Glass</last></author>
       <pages>95-101</pages>
       <doi>10.1162/089120104773633402</doi>
       <url hash="b2253006">J04-1005</url>
@@ -95,7 +95,7 @@
     <paper id="2">
       <title>Learning Domain Ontologies from Document Warehouses and Dedicated Web Sites</title>
       <author><first>Roberto</first><last>Navigli</last></author>
-      <author><first>Paola</first><last>Velardi</last></author>
+      <author id="paola-velardi"><first>Paola</first><last>Velardi</last></author>
       <pages>151-179</pages>
       <doi>10.1162/089120104323093276</doi>
       <url hash="47fdfbc1">J04-2002</url>
@@ -103,8 +103,8 @@
     </paper>
     <paper id="3">
       <title>Statistical Machine Translation with Scarce Resources Using Morpho-syntactic Information</title>
-      <author><first>Sonja</first><last>Nießen</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="sonja-niessen"><first>Sonja</first><last>Nießen</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>181-204</pages>
       <doi>10.1162/089120104323093285</doi>
       <url hash="c2a50134">J04-2003</url>
@@ -112,8 +112,8 @@
     </paper>
     <paper id="4">
       <title>Machine Translation with Inferred Stochastic Finite-State Transducers</title>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
-      <author><first>Enrique</first><last>Vidal</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="enrique-vidal"><first>Enrique</first><last>Vidal</last></author>
       <pages>205-225</pages>
       <doi>10.1162/089120104323093294</doi>
       <url hash="a4838b69">J04-2004</url>
@@ -156,11 +156,11 @@
     </paper>
     <paper id="2">
       <title>Learning Subjective Language</title>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <author><first>Theresa</first><last>Wilson</last></author>
-      <author><first>Rebecca</first><last>Bruce</last></author>
+      <author id="rebecca-bruce"><first>Rebecca</first><last>Bruce</last></author>
       <author><first>Matthew</first><last>Bell</last></author>
-      <author><first>Melanie</first><last>Martin</last></author>
+      <author id="melanie-martin"><first>Melanie</first><last>Martin</last></author>
       <pages>277-308</pages>
       <doi>10.1162/0891201041850885</doi>
       <url hash="8fc1e03b">J04-3002</url>
@@ -168,10 +168,10 @@
     </paper>
     <paper id="3">
       <title><fixed-case>C</fixed-case>entering: A Parametric Theory and Its Instantiations</title>
-      <author><first>Massimo</first><last>Poesio</last></author>
-      <author><first>Rosemary</first><last>Stevenson</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
+      <author id="rosemary-stevenson"><first>Rosemary</first><last>Stevenson</last></author>
       <author><first>Barbara</first><last>Di Eugenio</last></author>
-      <author><first>Janet</first><last>Hitzeman</last></author>
+      <author id="janet-hitzeman"><first>Janet</first><last>Hitzeman</last></author>
       <pages>309-363</pages>
       <doi>10.1162/0891201041850911</doi>
       <url hash="6d1e2a53">J04-3003</url>
@@ -179,7 +179,7 @@
     </paper>
     <paper id="4">
       <title>Understanding the Yarowsky Algorithm</title>
-      <author><first>Steven</first><last>Abney</last></author>
+      <author id="steven-abney"><first>Steven</first><last>Abney</last></author>
       <pages>365-395</pages>
       <doi>10.1162/0891201041850876</doi>
       <url hash="4f7ece5c">J04-3004</url>
@@ -204,7 +204,7 @@
     </frontmatter>
     <paper id="1">
       <title>Optimizing Referential Coherence in Text Generation</title>
-      <author><first>Rodger</first><last>Kibble</last></author>
+      <author id="rodger-kibble"><first>Rodger</first><last>Kibble</last></author>
       <author><first>Richard</first><last>Power</last></author>
       <pages>401-416</pages>
       <doi>10.1162/0891201042544893</doi>
@@ -213,8 +213,8 @@
     </paper>
     <paper id="2">
       <title>The Alignment Template Approach to Statistical Machine Translation</title>
-      <author><first>Franz Josef</first><last>Och</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="franz-josef-och"><first>Franz Josef</first><last>Och</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>417-449</pages>
       <doi>10.1162/0891201042544884</doi>
       <url hash="5face8b5">J04-4002</url>
@@ -231,7 +231,7 @@
     </paper>
     <paper id="4">
       <title>Intricacies of Collins’ Parsing Model</title>
-      <author><first>Daniel M.</first><last>Bikel</last></author>
+      <author id="daniel-m-bikel"><first>Daniel M.</first><last>Bikel</last></author>
       <pages>479-511</pages>
       <doi>10.1162/0891201042544929</doi>
       <url hash="4b1ad1b4">J04-4004</url>
diff --git a/data/xml/J05.xml b/data/xml/J05.xml
index ae2fb01abb..55ea75c531 100644
--- a/data/xml/J05.xml
+++ b/data/xml/J05.xml
@@ -13,7 +13,7 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>ACL</fixed-case> Lifetime Achievement Award: Some Points in a Time</title>
-      <author><first>Karen</first><last>Spärck Jones</last></author>
+      <author id="karen-sparck-jones"><first>Karen</first><last>Spärck Jones</last></author>
       <pages>1-14</pages>
       <doi>10.1162/0891201053630237</doi>
       <url hash="3a20dc4d">J05-1001</url>
@@ -21,9 +21,9 @@
     </paper>
     <paper id="2">
       <title>Squibs and Discussions: Real versus Template-Based Natural Language Generation: A False Opposition?</title>
-      <author><first>Kees</first><last>van Deemter</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
-      <author><first>Mariët</first><last>Theune</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
+      <author id="mariet-theune"><first>Mariët</first><last>Theune</last></author>
       <pages>15-24</pages>
       <doi>10.1162/0891201053630291</doi>
       <url hash="42d4e93d">J05-1002</url>
@@ -31,7 +31,7 @@
     </paper>
     <paper id="3">
       <title>Discriminative Reranking for Natural Language Parsing</title>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <author><first>Terry</first><last>Koo</last></author>
       <pages>25-70</pages>
       <doi>10.1162/0891201053630273</doi>
@@ -40,7 +40,7 @@
     </paper>
     <paper id="4">
       <title>The <fixed-case>P</fixed-case>roposition <fixed-case>B</fixed-case>ank: An Annotated Corpus of Semantic Roles</title>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Daniel</first><last>Gildea</last></author>
       <author><first>Paul</first><last>Kingsbury</last></author>
       <pages>71-106</pages>
@@ -52,7 +52,7 @@
       <title>Clustering Syntactic Positions with Similar Semantic Requirements</title>
       <author><first>Pablo</first><last>Gamallo</last></author>
       <author><first>Alexandre</first><last>Agustini</last></author>
-      <author><first>Gabriel P.</first><last>Lopes</last></author>
+      <author id="gabriel-lopes"><first>Gabriel P.</first><last>Lopes</last></author>
       <pages>107-146</pages>
       <doi>10.1162/0891201053630318</doi>
       <url hash="2135d245">J05-1005</url>
@@ -94,7 +94,7 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>O</fixed-case>bituary: Remembering Bill <fixed-case>M</fixed-case>ann</title>
-      <author><first>Christian M. I. M.</first><last>Matthiessen</last></author>
+      <author id="christian-m-i-m-matthiessen"><first>Christian M. I. M.</first><last>Matthiessen</last></author>
       <pages>161-171</pages>
       <doi>10.1162/0891201054224002</doi>
       <url hash="78f37efa">J05-2001</url>
@@ -148,7 +148,7 @@
     <paper id="1">
       <title>Squibs and Discussions: Evaluating Discourse and Dialogue Coding Schemes</title>
       <author><first>Richard</first><last>Craggs</last></author>
-      <author><first>Mary McGee</first><last>Wood</last></author>
+      <author id="mary-mcgee-wood"><first>Mary McGee</first><last>Wood</last></author>
       <pages>289-296</pages>
       <doi>10.1162/089120105774321109</doi>
       <url hash="efcc843c">J05-3001</url>
@@ -157,7 +157,7 @@
     <paper id="2">
       <title>Sentence Fusion for Multidocument News Summarization</title>
       <author><first>Regina</first><last>Barzilay</last></author>
-      <author><first>Kathleen R.</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen R.</first><last>McKeown</last></author>
       <pages>297-328</pages>
       <doi>10.1162/089120105774321091</doi>
       <url hash="018e9558">J05-3002</url>
@@ -168,7 +168,7 @@
       <author><first>Ruth</first><last>O’Donovan</last></author>
       <author><first>Michael</first><last>Burke</last></author>
       <author><first>Aoife</first><last>Cahill</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <pages>329-366</pages>
       <doi>10.1162/089120105774321073</doi>
@@ -198,7 +198,7 @@
     </paper>
     <paper id="7">
       <title>Book Review: New Directions in Question Answering, edited by Mark <fixed-case>T</fixed-case>. Maybury</title>
-      <author><first>Marius</first><last>Paşca</last></author>
+      <author id="marius-pasca"><first>Marius</first><last>Paşca</last></author>
       <url hash="220d2b97">J05-3007</url>
       <bibkey>pasca-2005-book</bibkey>
     </paper>
@@ -235,7 +235,7 @@
     <paper id="2">
       <title>Co-occurrence Retrieval: A Flexible Framework for Lexical Distributional Similarity</title>
       <author><first>Julie</first><last>Weeds</last></author>
-      <author><first>David</first><last>Weir</last></author>
+      <author id="david-weir"><first>David</first><last>Weir</last></author>
       <pages>439-475</pages>
       <doi>10.1162/089120105775299122</doi>
       <url hash="220815af">J05-4002</url>
@@ -243,7 +243,7 @@
     </paper>
     <paper id="3">
       <title>Improving Machine Translation Performance by Exploiting Non-Parallel Corpora</title>
-      <author><first>Dragos Stefan</first><last>Munteanu</last></author>
+      <author id="dragos-stefan-munteanu"><first>Dragos Stefan</first><last>Munteanu</last></author>
       <author><first>Daniel</first><last>Marcu</last></author>
       <pages>477-504</pages>
       <doi>10.1162/089120105775299168</doi>
@@ -252,7 +252,7 @@
     </paper>
     <paper id="4">
       <title>Induction of Word and Phrase Alignments for Automatic Document Summarization</title>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <author><first>Daniel</first><last>Marcu</last></author>
       <pages>505-530</pages>
       <doi>10.1162/089120105775299140</doi>
@@ -264,7 +264,7 @@
       <author><first>Jianfeng</first><last>Gao</last></author>
       <author><first>Mu</first><last>Li</last></author>
       <author><first>Andi</first><last>Wu</last></author>
-      <author><first>Chang-Ning</first><last>Huang</last></author>
+      <author id="changning-huang"><first>Chang-Ning</first><last>Huang</last></author>
       <pages>531-574</pages>
       <doi>10.1162/089120105775299177</doi>
       <url hash="872e0484">J05-4005</url>
@@ -272,7 +272,7 @@
     </paper>
     <paper id="6">
       <title>Last Words: Reviewing the Reviewers</title>
-      <author><first>Kenneth</first><last>Church</last></author>
+      <author id="kenneth-church"><first>Kenneth</first><last>Church</last></author>
       <pages>575-578</pages>
       <doi>10.1162/089120105775299131</doi>
       <url hash="e07da2c3">J05-4006</url>
diff --git a/data/xml/J06.xml b/data/xml/J06.xml
index 8b17a87d3c..1d4f061f2d 100644
--- a/data/xml/J06.xml
+++ b/data/xml/J06.xml
@@ -39,7 +39,7 @@
     </paper>
     <paper id="4">
       <title>Finite-State Registered Automata for Non-Concatenative Morphology</title>
-      <author><first>Yael</first><last>Cohen-Sygal</last></author>
+      <author id="yael-cohen-sygal"><first>Yael</first><last>Cohen-Sygal</last></author>
       <author><first>Shuly</first><last>Wintner</last></author>
       <pages>49-82</pages>
       <doi>10.1162/coli.2006.32.1.49</doi>
@@ -48,9 +48,9 @@
     </paper>
     <paper id="5">
       <title>Automatic Discovery of Part-Whole Relations</title>
-      <author><first>Roxana</first><last>Girju</last></author>
-      <author><first>Adriana</first><last>Badulescu</last></author>
-      <author><first>Dan</first><last>Moldovan</last></author>
+      <author id="roxana-girju"><first>Roxana</first><last>Girju</last></author>
+      <author id="adriana-badulescu"><first>Adriana</first><last>Badulescu</last></author>
+      <author id="dan-moldovan"><first>Dan</first><last>Moldovan</last></author>
       <pages>83-135</pages>
       <doi>10.1162/coli.2006.32.1.83</doi>
       <url hash="40410d79">J06-1005</url>
@@ -58,7 +58,7 @@
     </paper>
     <paper id="6">
       <title>Introducing Speech and Language Processing, by John Coleman</title>
-      <author><first>Mary</first><last>Harper</last></author>
+      <author id="mary-harper"><first>Mary</first><last>Harper</last></author>
       <url hash="4feca110">J06-1006</url>
       <bibkey>harper-2006-introducing</bibkey>
     </paper>
@@ -70,13 +70,13 @@
     </paper>
     <paper id="8">
       <title>Book Reviews: Computational and Quantitative Studies by <fixed-case>M</fixed-case>. <fixed-case>A</fixed-case>. <fixed-case>K</fixed-case>. Halliday, edited by Jonathan <fixed-case>J</fixed-case>. Webster</title>
-      <author><first>Chris</first><last>Mellish</last></author>
+      <author id="chris-mellish"><first>Chris</first><last>Mellish</last></author>
       <url hash="e3ea6ee1">J06-1008</url>
       <bibkey>mellish-2006-book</bibkey>
     </paper>
     <paper id="9">
       <title>Book Reviews: Corpus Linguistics: Readings in a Widening Discipline, edited by Geoffrey Sampson and Diana <fixed-case>M</fixed-case>c<fixed-case>C</fixed-case>arthy</title>
-      <author><first>Robert</first><last>Malouf</last></author>
+      <author id="robert-malouf"><first>Robert</first><last>Malouf</last></author>
       <url hash="513f4b5b">J06-1009</url>
       <bibkey>malouf-2006-book</bibkey>
     </paper>
@@ -100,7 +100,7 @@
     </frontmatter>
     <paper id="1">
       <title>Experiments on the Automatic Induction of <fixed-case>G</fixed-case>erman Semantic Verb Classes</title>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>159-194</pages>
       <doi>10.1162/coli.2006.32.2.159</doi>
       <url hash="2827c7a2">J06-2001</url>
@@ -108,7 +108,7 @@
     </paper>
     <paper id="2">
       <title>Generating Referring Expressions that Involve Gradable Properties</title>
-      <author><first>Kees</first><last>van Deemter</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
       <pages>195-222</pages>
       <doi>10.1162/coli.2006.32.2.195</doi>
       <url hash="0ea19f44">J06-2002</url>
@@ -116,7 +116,7 @@
     </paper>
     <paper id="3">
       <title>Building and Using a Lexical Knowledge Base of Near-Synonym Differences</title>
-      <author><first>Diana</first><last>Inkpen</last></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last></author>
       <author><first>Graeme</first><last>Hirst</last></author>
       <pages>223-262</pages>
       <doi>10.1162/coli.2006.32.2.223</doi>
@@ -126,7 +126,7 @@
     <paper id="4">
       <title><fixed-case>S</fixed-case>quibs: The <fixed-case>PARADISE</fixed-case> Evaluation Framework: Issues and Findings</title>
       <author><first>Melita</first><last>Hajdinjak</last></author>
-      <author><first>France</first><last>Mihelic</last></author>
+      <author id="france-mihelic"><first>France</first><last>Mihelic</last></author>
       <pages>263-272</pages>
       <doi>10.1162/coli.2006.32.2.263</doi>
       <url hash="1cc93dd8">J06-2004</url>
@@ -195,7 +195,7 @@
     </paper>
     <paper id="3">
       <title>Similarity of Semantic Relations</title>
-      <author><first>Peter D.</first><last>Turney</last></author>
+      <author id="peter-turney"><first>Peter D.</first><last>Turney</last></author>
       <pages>379-416</pages>
       <doi>10.1162/coli.2006.32.3.379</doi>
       <url hash="56c7dcc5">J06-3003</url>
@@ -203,9 +203,9 @@
     </paper>
     <paper id="4">
       <title>Characterizing and Predicting Corrections in Spoken Dialogue Systems</title>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <author><first>Marc</first><last>Swerts</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
       <pages>417-438</pages>
       <doi>10.1162/coli.2006.32.3.417</doi>
       <url hash="27902f0a">J06-3004</url>
@@ -231,7 +231,7 @@
     </paper>
     <paper id="8">
       <title>Book Reviews: Argument Realization, by Beth Levin and Malka Rappaport Hovav</title>
-      <author><first>Karin</first><last>Kipper</last></author>
+      <author id="karin-kipper"><first>Karin</first><last>Kipper</last></author>
       <url hash="631352c7">J06-3008</url>
       <bibkey>kipper-2006-book</bibkey>
     </paper>
@@ -264,7 +264,7 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>ACL</fixed-case> Lifetime Achievement Award: Old Linguists Never Die, They Only Get Obligatorily Deleted</title>
-      <author><first>Eva</first><last>Hajicova</last></author>
+      <author id="eva-hajicova"><first>Eva</first><last>Hajicova</last></author>
       <pages>457-469</pages>
       <doi>10.1162/coli.2006.32.4.457</doi>
       <url hash="e1e07f69">J06-4001</url>
@@ -289,13 +289,13 @@
     </paper>
     <paper id="4">
       <title>N-gram-based Machine Translation</title>
-      <author><first>José</first><last>Mariño</last></author>
-      <author><first>Rafael E.</first><last>Banchs</last></author>
-      <author><first>Josep M.</first><last>Crego</last></author>
-      <author><first>Adrià</first><last>de Gispert</last></author>
+      <author id="jose-b-marino"><first>José</first><last>Mariño</last></author>
+      <author id="rafael-e-banchs"><first>Rafael E.</first><last>Banchs</last></author>
+      <author id="josep-m-crego"><first>Josep M.</first><last>Crego</last></author>
+      <author id="adria-de-gispert"><first>Adrià</first><last>de Gispert</last></author>
       <author><first>Patrik</first><last>Lambert</last></author>
-      <author><first>José A. R.</first><last>Fonollosa</last></author>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="jose-a-r-fonollosa"><first>José A. R.</first><last>Fonollosa</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
       <pages>527-549</pages>
       <doi>10.1162/coli.2006.32.4.527</doi>
       <url hash="0388696f">J06-4004</url>
@@ -309,13 +309,13 @@
     </paper>
     <paper id="6">
       <title>Book Review: One-on-One Tutoring by Humans and Computers, by Martha Evens and Joel <fixed-case>M</fixed-case>ichael</title>
-      <author><first>Pamela</first><last>Jordan</last></author>
+      <author id="pamela-jordan"><first>Pamela</first><last>Jordan</last></author>
       <url hash="8944d1dd">J06-4006</url>
       <bibkey>jordan-2006-book</bibkey>
     </paper>
     <paper id="7">
       <title>Book Review: Memory-Based Language Processing, by Walter Daelemans and Antal van den Bosch</title>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <url hash="2f6abff4">J06-4007</url>
       <bibkey>kubler-2006-book</bibkey>
     </paper>
diff --git a/data/xml/J07.xml b/data/xml/J07.xml
index fdba373df7..f975ddc641 100644
--- a/data/xml/J07.xml
+++ b/data/xml/J07.xml
@@ -13,8 +13,8 @@
     </frontmatter>
     <paper id="1">
       <title>Letter to the Editor</title>
-      <author><first>Walter</first><last>Daelemans</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <pages>1</pages>
       <doi>10.1162/coli.2007.33.1.1</doi>
       <url hash="e6dfdec7">J07-1001</url>
@@ -32,7 +32,7 @@
     <paper id="3">
       <title>Word-Level Confidence Estimation for Machine Translation</title>
       <author><first>Nicola</first><last>Ueffing</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>9-40</pages>
       <doi>10.1162/coli.2007.33.1.9</doi>
       <url hash="c0bc6c33">J07-1003</url>
@@ -40,8 +40,8 @@
     </paper>
     <paper id="4">
       <title>Question Answering in Restricted Domains: An Overview</title>
-      <author><first>Diego</first><last>Mollá</last></author>
-      <author><first>José Luis</first><last>Vicedo</last></author>
+      <author id="diego-molla"><first>Diego</first><last>Mollá</last></author>
+      <author id="jose-luis-vicedo"><first>José Luis</first><last>Vicedo</last></author>
       <pages>41-61</pages>
       <doi>10.1162/coli.2007.33.1.41</doi>
       <url hash="17d718c2">J07-1004</url>
@@ -59,7 +59,7 @@
     <paper id="6">
       <title>Composing Questions through Conceptual Authoring</title>
       <author><first>Catalina</first><last>Hallett</last></author>
-      <author><first>Donia</first><last>Scott</last></author>
+      <author id="donia-scott"><first>Donia</first><last>Scott</last></author>
       <author><first>Richard</first><last>Power</last></author>
       <pages>105-133</pages>
       <doi>10.1162/coli.2007.33.1.105</doi>
@@ -75,7 +75,7 @@
     </paper>
     <paper id="8">
       <title>Book Reviews: Flexible Semantics for Reinterpretation Phenomena, by Markus Egg</title>
-      <author><first>Stephen</first><last>Pulman</last></author>
+      <author id="stephen-pulman"><first>Stephen</first><last>Pulman</last></author>
       <pages>141-143</pages>
       <url hash="f94e069c">J07-1008</url>
       <bibkey>pulman-2007-book</bibkey>
@@ -109,7 +109,7 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>S</fixed-case>quibs: Maximal Consistent Subsets</title>
-      <author><first>Robert</first><last>Malouf</last></author>
+      <author id="robert-malouf"><first>Robert</first><last>Malouf</last></author>
       <pages>153-160</pages>
       <doi>10.1162/coli.2007.33.2.153</doi>
       <url hash="fdb18502">J07-2001</url>
@@ -117,7 +117,7 @@
     </paper>
     <paper id="2">
       <title>Dependency-Based Construction of Semantic Space Models</title>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <author><first>Mirella</first><last>Lapata</last></author>
       <pages>161-199</pages>
       <doi>10.1162/coli.2007.33.2.161</doi>
@@ -134,8 +134,8 @@
     </paper>
     <paper id="4">
       <title>Generating Referring Expressions: Making Referents Easy to Identify</title>
-      <author><first>Ivandré</first><last>Paraboni</last></author>
-      <author><first>Kees</first><last>van Deemter</last></author>
+      <author id="ivandre-paraboni"><first>Ivandré</first><last>Paraboni</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
       <author><first>Judith</first><last>Masthoff</last></author>
       <pages>229-254</pages>
       <doi>10.1162/coli.2007.33.2.229</doi>
@@ -144,14 +144,14 @@
     </paper>
     <paper id="5">
       <title>Book Reviews: Word Sense Disambiguation: Algorithms and Applications, edited by Eneko Agirre and Philip Edmonds</title>
-      <author><first>Diana</first><last>McCarthy</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
       <pages>255-258</pages>
       <url hash="0d7d5cc6">J07-2005</url>
       <bibkey>mccarthy-2007-book</bibkey>
     </paper>
     <paper id="6">
       <title>Book Reviews: From Molecule to Metaphor: A Neural Theory of Language, by Jerome <fixed-case>A</fixed-case>. Feldman</title>
-      <author><first>Stefan</first><last>Frank</last></author>
+      <author id="stefan-l-frank"><first>Stefan</first><last>Frank</last></author>
       <pages>259-261</pages>
       <url hash="7476f899">J07-2006</url>
       <bibkey>frank-2007-book</bibkey>
@@ -218,7 +218,7 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>O</fixed-case>bituary: <fixed-case>K</fixed-case>aren <fixed-case>S</fixed-case>pärck <fixed-case>J</fixed-case>ones</title>
-      <author><first>John</first><last>Tait</last></author>
+      <author id="john-tait"><first>John</first><last>Tait</last></author>
       <pages>289-291</pages>
       <doi>10.1162/coli.2007.33.3.289</doi>
       <url hash="5395350d">J07-3001</url>
@@ -226,7 +226,7 @@
     </paper>
     <paper id="2">
       <title>Squibs and Discussions: Measuring Word Alignment Quality for Statistical Machine Translation</title>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <author><first>Daniel</first><last>Marcu</last></author>
       <pages>293-303</pages>
       <doi>10.1162/coli.2007.33.3.293</doi>
@@ -236,7 +236,7 @@
     <paper id="3">
       <title>A Sketch Algorithm for Estimating Two-Way and Multi-Way Associations</title>
       <author><first>Ping</first><last>Li</last></author>
-      <author><first>Kenneth W.</first><last>Church</last></author>
+      <author id="kenneth-church"><first>Kenneth W.</first><last>Church</last></author>
       <pages>305-354</pages>
       <doi>10.1162/coli.2007.33.3.305</doi>
       <url hash="b1b8a52d">J07-3003</url>
@@ -245,7 +245,7 @@
     <paper id="4">
       <title><fixed-case>CCG</fixed-case>bank: A Corpus of <fixed-case>CCG</fixed-case> Derivations and Dependency Structures Extracted from the <fixed-case>P</fixed-case>enn <fixed-case>T</fixed-case>reebank</title>
       <author><first>Julia</first><last>Hockenmaier</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>355-396</pages>
       <doi>10.1162/coli.2007.33.3.355</doi>
       <url hash="9c823308">J07-3004</url>
@@ -253,7 +253,7 @@
     </paper>
     <paper id="5">
       <title>Classifying Non-Sentential Utterances in Dialogue: A Machine Learning Approach</title>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <author><first>Jonathan</first><last>Ginzburg</last></author>
       <author><first>Shalom</first><last>Lappin</last></author>
       <pages>397-427</pages>
@@ -275,7 +275,7 @@
     </paper>
     <paper id="8">
       <title>Last Words: Computational Linguistics: What About the Linguistics?</title>
-      <author><first>Karen</first><last>Spärck Jones</last></author>
+      <author id="karen-sparck-jones"><first>Karen</first><last>Spärck Jones</last></author>
       <pages>437-441</pages>
       <doi>10.1162/coli.2007.33.3.437</doi>
       <url hash="fc19c472">J07-3008</url>
@@ -304,7 +304,7 @@
     <paper id="2">
       <title><fixed-case>S</fixed-case>quibs: Prepositional Phrase Attachment without Oracles</title>
       <author><first>Michaela</first><last>Atterer</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>469-476</pages>
       <doi>10.1162/coli.2007.33.4.469</doi>
       <url hash="f1bdfab9">J07-4002</url>
@@ -312,7 +312,7 @@
     </paper>
     <paper id="3">
       <title>Weighted and Probabilistic Context-Free Grammars Are Equally Expressive</title>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <author><first>Mark</first><last>Johnson</last></author>
       <pages>477-491</pages>
       <doi>10.1162/coli.2007.33.4.477</doi>
@@ -322,7 +322,7 @@
     <paper id="4">
       <title>Wide-Coverage Efficient Statistical Parsing with <fixed-case>CCG</fixed-case> and Log-Linear Models</title>
       <author><first>Stephen</first><last>Clark</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <pages>493-552</pages>
       <doi>10.1162/coli.2007.33.4.493</doi>
       <url hash="18f809bb">J07-4004</url>
@@ -330,10 +330,10 @@
     </paper>
     <paper id="5">
       <title>Unsupervised Acquisition of Predominant Word Senses</title>
-      <author><first>Diana</first><last>McCarthy</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
       <author><first>Rob</first><last>Koeling</last></author>
       <author><first>Julie</first><last>Weeds</last></author>
-      <author><first>John</first><last>Carroll</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
       <pages>553-590</pages>
       <doi>10.1162/coli.2007.33.4.553</doi>
       <url hash="fbadf72f">J07-4005</url>
@@ -360,7 +360,7 @@
     </paper>
     <paper id="9">
       <title>Last Words: Breaking News: Changing Attitudes and Practices</title>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <pages>607-611</pages>
       <doi>10.1162/coli.2007.33.4.607</doi>
       <url hash="f0155d5e">J07-4009</url>
diff --git a/data/xml/J08.xml b/data/xml/J08.xml
index 0d87e79c2d..6f24d168f1 100644
--- a/data/xml/J08.xml
+++ b/data/xml/J08.xml
@@ -23,7 +23,7 @@
     <paper id="2">
       <title>Feature Forest Models for Probabilistic <fixed-case>HPSG</fixed-case> Parsing</title>
       <author><first>Yusuke</first><last>Miyao</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>35-80</pages>
       <doi>10.1162/coli.2008.34.1.35</doi>
       <url hash="36845480">J08-1002</url>
@@ -35,7 +35,7 @@
       <author><first>Michael</first><last>Burke</last></author>
       <author><first>Ruth</first><last>O’Donovan</last></author>
       <author><first>Stefan</first><last>Riezler</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <pages>81-124</pages>
       <doi>10.1162/coli.2008.34.1.81</doi>
@@ -44,7 +44,7 @@
     </paper>
     <paper id="4">
       <title>Book Reviews: The Text Mining Handbook: Advanced Approaches to Analyzing Unstructured Data by Ronen Feldman and <fixed-case>J</fixed-case>ames Sanger</title>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>125-127</pages>
       <doi>10.1162/coli.2008.34.1.125</doi>
       <url hash="9ddc5595">J08-1004</url>
@@ -74,7 +74,7 @@
     </paper>
     <paper id="8">
       <title>Last Words: On Becoming a Discipline</title>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>137-144</pages>
       <doi>10.1162/coli.2008.34.1.137</doi>
       <url hash="a14d54d9">J08-1008</url>
@@ -94,7 +94,7 @@
     </frontmatter>
     <paper id="1">
       <title>Special Issue Introduction: Semantic Role Labeling: An Introduction to the Special Issue</title>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <author><first>Xavier</first><last>Carreras</last></author>
       <author><first>Kenneth C.</first><last>Litkowski</last></author>
       <author><first>Suzanne</first><last>Stevenson</last></author>
@@ -107,7 +107,7 @@
       <title>A Global Joint Model for Semantic Role Labeling</title>
       <author><first>Kristina</first><last>Toutanova</last></author>
       <author><first>Aria</first><last>Haghighi</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>161-191</pages>
       <doi>10.1162/coli.2008.34.2.161</doi>
       <url hash="21313937">J08-2002</url>
@@ -117,7 +117,7 @@
       <title>Tree Kernels for Semantic Role Labeling</title>
       <author><first>Alessandro</first><last>Moschitti</last></author>
       <author><first>Daniele</first><last>Pighin</last></author>
-      <author><first>Roberto</first><last>Basili</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
       <pages>193-224</pages>
       <doi>10.1162/coli.2008.34.2.193</doi>
       <url hash="d27505ad">J08-2003</url>
@@ -135,7 +135,7 @@
       <title>The Importance of Syntactic Parsing and Inference in Semantic Role Labeling</title>
       <author><first>Vasin</first><last>Punyakanok</last></author>
       <author><first>Dan</first><last>Roth</last></author>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <pages>257-287</pages>
       <doi>10.1162/coli.2008.34.2.257</doi>
       <url hash="6693c8c8">J08-2005</url>
@@ -143,9 +143,9 @@
     </paper>
     <paper id="6">
       <title>Towards Robust Semantic Role Labeling</title>
-      <author><first>Sameer S.</first><last>Pradhan</last></author>
-      <author><first>Wayne</first><last>Ward</last></author>
-      <author><first>James H.</first><last>Martin</last></author>
+      <author id="sameer-pradhan"><first>Sameer S.</first><last>Pradhan</last></author>
+      <author id="wayne-ward"><first>Wayne</first><last>Ward</last></author>
+      <author id="james-h-martin"><first>James H.</first><last>Martin</last></author>
       <pages>289-310</pages>
       <doi>10.1162/coli.2008.34.2.289</doi>
       <url hash="b989e8e2">J08-2006</url>
@@ -192,7 +192,7 @@
       <title>A Twin-Candidate Model for Learning-Based Anaphora Resolution</title>
       <author><first>Xiaofeng</first><last>Yang</last></author>
       <author><first>Jian</first><last>Su</last></author>
-      <author><first>Chew Lim</first><last>Tan</last></author>
+      <author id="chew-lim-tan"><first>Chew Lim</first><last>Tan</last></author>
       <pages>327-356</pages>
       <doi>10.1162/coli.2008.07-004-R2-06-57</doi>
       <url hash="e01e9b13">J08-3002</url>
@@ -200,7 +200,7 @@
     </paper>
     <paper id="3">
       <title>Dependency Parsing of <fixed-case>T</fixed-case>urkish</title>
-      <author><first>Gülşen</first><last>Eryiğit</last></author>
+      <author id="gulsen-eryigit"><first>Gülşen</first><last>Eryiğit</last></author>
       <author><first>Joakim</first><last>Nivre</last></author>
       <author><first>Kemal</first><last>Oflazer</last></author>
       <pages>357-389</pages>
@@ -238,7 +238,7 @@
     </paper>
     <paper id="7">
       <title>Book Reviews: Computational Approaches to Morphology and Syntax by Brian Roark and Richard Sproat</title>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>453-457</pages>
       <doi>10.1162/coli.2008.34.3.453</doi>
       <url hash="808fcdeb">J08-3007</url>
@@ -246,7 +246,7 @@
     </paper>
     <paper id="8">
       <title>Book Reviews: <fixed-case>A</fixed-case>rabic Computational Morphology: Knowledge-Based and Empirical Methods by Abdelhadi Soudi, Antal van den Bosch, and Günter Neumann (editors)</title>
-      <author><first>George</first><last>Kiraz</last></author>
+      <author id="george-anton-kiraz"><first>George</first><last>Kiraz</last></author>
       <pages>459-462</pages>
       <doi>10.1162/coli.2008.34.3.459</doi>
       <url hash="a48b507c">J08-3008</url>
@@ -280,7 +280,7 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>ACL</fixed-case> Lifetime Achievement Award: On Whose Shoulders?</title>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <pages>471-486</pages>
       <doi>10.1162/coli.2008.34.4.471</doi>
       <url hash="fe35ec4e">J08-4001</url>
@@ -288,7 +288,7 @@
     </paper>
     <paper id="2">
       <title>Hybrid Reinforcement/Supervised Learning of Dialogue Policies from Fixed Data Sets</title>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <author><first>Oliver</first><last>Lemon</last></author>
       <author><first>Kallirroi</first><last>Georgila</last></author>
       <pages>487-511</pages>
@@ -307,7 +307,7 @@
     <paper id="4">
       <title>Survey Article: Inter-Coder Agreement for Computational Linguistics</title>
       <author><first>Ron</first><last>Artstein</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>555-596</pages>
       <doi>10.1162/coli.07-034-R2</doi>
       <url hash="3789e253">J08-4004</url>
@@ -315,7 +315,7 @@
     </paper>
     <paper id="5">
       <title>Constructing Corpora for the Development and Evaluation of Paraphrase Systems</title>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <author><first>Chris</first><last>Callison-Burch</last></author>
       <author><first>Mirella</first><last>Lapata</last></author>
       <pages>597-614</pages>
@@ -325,8 +325,8 @@
     </paper>
     <paper id="6">
       <title>Book Review: Mathematical Linguistics by András Kornai</title>
-      <author><first>Richard</first><last>Sproat</last></author>
-      <author><first>Roxana</first><last>Gîrju</last></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last></author>
+      <author id="roxana-girju"><first>Roxana</first><last>Gîrju</last></author>
       <pages>615-617</pages>
       <doi>10.1162/coli.2008.34.4.615</doi>
       <url hash="3ce2f7d0">J08-4006</url>
@@ -354,7 +354,7 @@
     </paper>
     <paper id="10">
       <title><fixed-case>E</fixed-case>rratum: Dependency Parsing of <fixed-case>T</fixed-case>urkish</title>
-      <author><first>Gülşen</first><last>Eryiğit</last></author>
+      <author id="gulsen-eryigit"><first>Gülşen</first><last>Eryiğit</last></author>
       <author><first>Joakim</first><last>Nivre</last></author>
       <author><first>Kemal</first><last>Oflazer</last></author>
       <pages>627</pages>
diff --git a/data/xml/J09.xml b/data/xml/J09.xml
index c0a584ca8d..95d48ff68c 100644
--- a/data/xml/J09.xml
+++ b/data/xml/J09.xml
@@ -16,7 +16,7 @@
     </frontmatter>
     <paper id="1">
       <title>Letter to the Editor</title>
-      <author><first>Sean</first><last>Fulop</last></author>
+      <author id="sean-a-fulop"><first>Sean</first><last>Fulop</last></author>
       <doi>10.1162/coli.2009.35.1.001</doi>
       <pages>1</pages>
       <url hash="45547632">J09-1001</url>
@@ -24,17 +24,17 @@
     </paper>
     <paper id="2">
       <title>Statistical Approaches to Computer-Assisted Translation</title>
-      <author><first>Sergio</first><last>Barrachina</last></author>
+      <author id="sergio-barrachina"><first>Sergio</first><last>Barrachina</last></author>
       <author><first>Oliver</first><last>Bender</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <author><first>Jorge</first><last>Civera</last></author>
       <author><first>Elsa</first><last>Cubel</last></author>
       <author><first>Shahram</first><last>Khadivi</last></author>
-      <author><first>Antonio</first><last>Lagarda</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="antonio-l-lagarda"><first>Antonio</first><last>Lagarda</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <author><first>Jesús</first><last>Tomás</last></author>
-      <author><first>Enrique</first><last>Vidal</last></author>
-      <author><first>Juan-Miguel</first><last>Vilar</last></author>
+      <author id="enrique-vidal"><first>Enrique</first><last>Vidal</last></author>
+      <author id="juan-miguel-vilar"><first>Juan-Miguel</first><last>Vilar</last></author>
       <doi>10.1162/coli.2008.07-055-R2-06-29</doi>
       <pages>3-28</pages>
       <url hash="e25d58b5">J09-1002</url>
@@ -43,9 +43,9 @@
     <paper id="3">
       <title>Evaluating Centering for Information Ordering Using Corpora</title>
       <author><first>Nikiforos</first><last>Karamanis</last></author>
-      <author><first>Chris</first><last>Mellish</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
-      <author><first>Jon</first><last>Oberlander</last></author>
+      <author id="chris-mellish"><first>Chris</first><last>Mellish</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
+      <author id="jon-oberlander"><first>Jon</first><last>Oberlander</last></author>
       <doi>10.1162/coli.07-036-R2-06-22</doi>
       <pages>29-46</pages>
       <url hash="213f17ed">J09-1003</url>
@@ -84,7 +84,7 @@
     </paper>
     <paper id="8">
       <title>Last Words: That’s Nice ... What Can You Do With It?</title>
-      <author><first>Anja</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anja</first><last>Belz</last></author>
       <doi>10.1162/coli.2009.35.1.111</doi>
       <url hash="d60da27f">J09-1008</url>
       <bibkey>belz-2009-last</bibkey>
@@ -106,7 +106,7 @@
     </frontmatter>
     <paper id="1">
       <title>Prepositions in Applications: A Survey and Introduction to the Special Issue</title>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Valia</first><last>Kordoni</last></author>
       <author><first>Aline</first><last>Villavicencio</last></author>
       <doi>10.1162/coli.2009.35.2.119</doi>
@@ -117,7 +117,7 @@
     <paper id="2">
       <title>Exploiting Semantic Role Resources for Preposition Disambiguation</title>
       <author><first>Tom</first><last>O’Hara</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <doi>10.1162/coli.06-79-prep15</doi>
       <pages>151-184</pages>
       <url hash="ca9e7ec3">J09-2002</url>
@@ -125,7 +125,7 @@
     </paper>
     <paper id="3">
       <title>The Syntax and Semantics of Prepositions in the Task of Automatic Interpretation of Nominal Phrases and Compounds: A Cross-Linguistic Study</title>
-      <author><first>Roxana</first><last>Girju</last></author>
+      <author id="roxana-girju"><first>Roxana</first><last>Girju</last></author>
       <doi>10.1162/coli.06-77-prep13</doi>
       <pages>185-228</pages>
       <url hash="e801b004">J09-2003</url>
@@ -142,8 +142,8 @@
     </paper>
     <paper id="5">
       <title>Applying Computational Models of Spatial Prepositions to Visually Situated Dialog</title>
-      <author><first>John D.</first><last>Kelleher</last></author>
-      <author><first>Fintan J.</first><last>Costello</last></author>
+      <author id="john-kelleher"><first>John D.</first><last>Kelleher</last></author>
+      <author id="fintan-j-costello"><first>Fintan J.</first><last>Costello</last></author>
       <doi>10.1162/coli.06-78-prep14</doi>
       <pages>271-306</pages>
       <url hash="7e77cd77">J09-2005</url>
@@ -189,8 +189,8 @@
     </paper>
     <paper id="2">
       <title><fixed-case>A</fixed-case>rticles: Robust Understanding in Multimodal Interfaces</title>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
-      <author><first>Michael</first><last>Johnston</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="michael-johnston"><first>Michael</first><last>Johnston</last></author>
       <doi>10.1162/coli.08-022-R2-06-26</doi>
       <pages>345-397</pages>
       <url hash="4d74fe61">J09-3002</url>
@@ -199,7 +199,7 @@
     <paper id="3">
       <title><fixed-case>A</fixed-case>rticles: Recognizing Contextual Polarity: An Exploration of Features for Phrase-Level Sentiment Analysis</title>
       <author><first>Theresa</first><last>Wilson</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <author><first>Paul</first><last>Hoffmann</last></author>
       <doi>10.1162/coli.08-012-R1-06-90</doi>
       <pages>399-433</pages>
@@ -208,7 +208,7 @@
     </paper>
     <paper id="4">
       <title><fixed-case>A</fixed-case>rticles: Bootstrapping Distributional Feature Vector Quality</title>
-      <author><first>Maayan</first><last>Zhitomirsky-Geffet</last></author>
+      <author id="maayan-geffet"><first>Maayan</first><last>Zhitomirsky-Geffet</last></author>
       <author><first>Ido</first><last>Dagan</last></author>
       <doi>10.1162/coli.08-032-R1-06-96</doi>
       <pages>435-461</pages>
@@ -217,7 +217,7 @@
     </paper>
     <paper id="5">
       <title>Book Review: Speech and Language Processing (second edition) by Daniel <fixed-case>J</fixed-case>urafsky and <fixed-case>J</fixed-case>ames <fixed-case>H</fixed-case>. <fixed-case>M</fixed-case>artin</title>
-      <author><first>Vlado</first><last>Keselj</last></author>
+      <author id="vlado-keselj"><first>Vlado</first><last>Keselj</last></author>
       <doi>10.1162/coli.B09-001</doi>
       <url hash="522b8104">J09-3005</url>
       <bibkey>keselj-2009-book</bibkey>
@@ -261,8 +261,8 @@
     </paper>
     <paper id="2">
       <title><fixed-case>O</fixed-case>bituaries: Janet Hitzeman</title>
-      <author><first>Massimo</first><last>Poesio</last></author>
-      <author><first>David</first><last>Day</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
+      <author id="david-day"><first>David</first><last>Day</last></author>
       <author><first>Inderjeet</first><last>Mani</last></author>
       <doi>10.1162/coli.2009.35.4.35411</doi>
       <pages>475-481</pages>
@@ -271,9 +271,9 @@
     </paper>
     <paper id="3">
       <title><fixed-case>O</fixed-case>bituaries: Hozumi <fixed-case>T</fixed-case>anaka</title>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Takenobu</first><last>Tokunaga</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <doi>10.1162/coli.2009.35.4.35412</doi>
       <pages>475-481</pages>
       <url hash="36177081">J09-4003</url>
@@ -281,7 +281,7 @@
     </paper>
     <paper id="4">
       <title><fixed-case>ACL</fixed-case> Lifetime Achievement Award: The Dawn of Statistical <fixed-case>ASR</fixed-case> and <fixed-case>MT</fixed-case></title>
-      <author><first>Frederick</first><last>Jelinek</last></author>
+      <author id="frederick-jelinek"><first>Frederick</first><last>Jelinek</last></author>
       <doi>10.1162/coli.2009.35.4.35401</doi>
       <pages>483-494</pages>
       <url hash="470b703b">J09-4004</url>
@@ -307,8 +307,8 @@
     </paper>
     <paper id="7">
       <title>Kernel Methods for Minimally Supervised <fixed-case>WSD</fixed-case></title>
-      <author><first>Claudio</first><last>Giuliano</last></author>
-      <author><first>Alfio Massimiliano</first><last>Gliozzo</last></author>
+      <author id="claudio-giuliano"><first>Claudio</first><last>Giuliano</last></author>
+      <author id="alfio-gliozzo"><first>Alfio Massimiliano</first><last>Gliozzo</last></author>
       <author><first>Carlo</first><last>Strapparava</last></author>
       <doi>10.1162/coli.2009.35.4.35407</doi>
       <pages>513-528</pages>
@@ -318,7 +318,7 @@
     <paper id="8">
       <title>An Investigation into the Validity of Some Metrics for Automatically Evaluating Natural Language Generation Systems</title>
       <author><first>Ehud</first><last>Reiter</last></author>
-      <author><first>Anja</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anja</first><last>Belz</last></author>
       <doi>10.1162/coli.2009.35.4.35405</doi>
       <pages>529-558</pages>
       <url hash="bf605c1b">J09-4008</url>
@@ -346,7 +346,7 @@
     </paper>
     <paper id="11">
       <title>Book Review: Learning Machine Translation by Cyril Goutte, Nicola Cancedda, Marc Dymetman, and <fixed-case>G</fixed-case>eorge Foster (editors)</title>
-      <author><first>Phil</first><last>Blunsom</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
       <doi>10.1162/coli.2009.35.4.35408</doi>
       <url hash="3f47eb9e">J09-4011</url>
       <bibkey>blunsom-2009-book</bibkey>
diff --git a/data/xml/J10.xml b/data/xml/J10.xml
index f3cf03709e..9241e8e2e5 100644
--- a/data/xml/J10.xml
+++ b/data/xml/J10.xml
@@ -17,8 +17,8 @@
     <paper id="1">
       <title>Broad-Coverage Parsing Using Human-Like Memory Constraints</title>
       <author><first>William</first><last>Schuler</last></author>
-      <author><first>Samir</first><last>AbdelRahman</last></author>
-      <author><first>Tim</first><last>Miller</last></author>
+      <author id="samir-abdelrahman"><first>Samir</first><last>AbdelRahman</last></author>
+      <author id="timothy-miller"><first>Tim</first><last>Miller</last></author>
       <author><first>Lane</first><last>Schwartz</last></author>
       <doi>10.1162/coli.2010.36.1.36100</doi>
       <pages>1-30</pages>
@@ -37,7 +37,7 @@
     <paper id="3">
       <title>Summarizing Short Stories</title>
       <author><first>Anna</first><last>Kazantseva</last></author>
-      <author><first>Stan</first><last>Szpakowicz</last></author>
+      <author id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></author>
       <doi>10.1162/coli.2010.36.1.36102</doi>
       <pages>71-109</pages>
       <url hash="21d1c7ee">J10-1003</url>
@@ -70,14 +70,14 @@
     </paper>
     <paper id="7">
       <title>Book Review: Dependency Parsing by Sandra Kübler, Ryan <fixed-case>M</fixed-case>c<fixed-case>D</fixed-case>onald, and Joakim <fixed-case>N</fixed-case>ivre</title>
-      <author><first>John</first><last>Carroll</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
       <doi>10.1162/coli.2010.36.1.36107</doi>
       <url hash="c50804bd">J10-1007</url>
       <bibkey>carroll-2010-book</bibkey>
     </paper>
     <paper id="8">
       <title>Last Words: Failure is an Orphan (Let’s Adopt)</title>
-      <author><first>Stan</first><last>Szpakowicz</last></author>
+      <author id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></author>
       <doi>10.1162/coli.2010.36.1.36105</doi>
       <url hash="9373bdaa">J10-1008</url>
       <bibkey>szpakowicz-2010-last</bibkey>
@@ -99,9 +99,9 @@
     </frontmatter>
     <paper id="1">
       <title>Generating Tailored, Comparative Descriptions with Contextually Appropriate Intonation</title>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <author><first>Robert A. J.</first><last>Clark</last></author>
-      <author><first>Johanna D.</first><last>Moore</last></author>
+      <author id="johanna-d-moore"><first>Johanna D.</first><last>Moore</last></author>
       <doi>10.1162/coli.2010.09-023-R1-08-002</doi>
       <pages>159-201</pages>
       <url hash="acf20c31">J10-2001</url>
@@ -109,7 +109,7 @@
     </paper>
     <paper id="2">
       <title>Sorting Texts by Readability</title>
-      <author><first>Kumiko</first><last>Tanaka-Ishii</last></author>
+      <author id="kumiko-tanaka-ishii"><first>Kumiko</first><last>Tanaka-Ishii</last></author>
       <author><first>Satoshi</first><last>Tezuka</last></author>
       <author><first>Hiroshi</first><last>Terada</last></author>
       <doi>10.1162/coli.2010.09-036-R2-08-050</doi>
@@ -120,9 +120,9 @@
     <paper id="3">
       <title>What Is Not in the Bag of Words for Why-<fixed-case>QA</fixed-case>?</title>
       <author><first>Suzan</first><last>Verberne</last></author>
-      <author><first>Lou</first><last>Boves</last></author>
+      <author id="lou-boves"><first>Lou</first><last>Boves</last></author>
       <author><first>Nelleke</first><last>Oostdijk</last></author>
-      <author><first>Peter-Arno</first><last>Coppen</last></author>
+      <author id="peter-arno-coppen"><first>Peter-Arno</first><last>Coppen</last></author>
       <doi>10.1162/coli.2010.09-032-R1-08-034</doi>
       <pages>229-245</pages>
       <url hash="d13dc624">J10-2003</url>
@@ -141,7 +141,7 @@
     </paper>
     <paper id="5">
       <title>Book Review: Statistical Language Models for Information Retrieval by <fixed-case>C</fixed-case>heng<fixed-case>X</fixed-case>iang Zhai</title>
-      <author><first>Eric</first><last>Gaussier</last></author>
+      <author id="eric-gaussier"><first>Eric</first><last>Gaussier</last></author>
       <doi>10.1162/coli.2010.36.2.36200</doi>
       <url hash="dd2bb160">J10-2005</url>
       <bibkey>gaussier-2010-book</bibkey>
@@ -154,7 +154,7 @@
     </paper>
     <paper id="7">
       <title>Last Words: What Computational Linguists Can Learn from Psychologists (and Vice Versa)</title>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <doi>10.1162/coli.2010.36.2.36201</doi>
       <pages>285-294</pages>
       <url hash="49a2f553">J10-2007</url>
@@ -197,7 +197,7 @@
     <paper id="3">
       <title>Generating Phrasal and Sentential Paraphrases: A Survey of Data-Driven Methods</title>
       <author><first>Nitin</first><last>Madnani</last></author>
-      <author><first>Bonnie J.</first><last>Dorr</last></author>
+      <author id="bonnie-dorr"><first>Bonnie J.</first><last>Dorr</last></author>
       <doi>10.1162/coli_a_00002</doi>
       <pages>341-387</pages>
       <url hash="48a47e41">J10-3003</url>
@@ -225,7 +225,7 @@
       <title>Complexity, Parsing, and Factorization of Tree-Local Multi-Component <fixed-case>T</fixed-case>ree-<fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammar</title>
       <author><first>Rebecca</first><last>Nesson</last></author>
       <author><first>Giorgio</first><last>Satta</last></author>
-      <author><first>Stuart M.</first><last>Shieber</last></author>
+      <author id="stuart-m-shieber"><first>Stuart M.</first><last>Shieber</last></author>
       <doi>10.1162/coli_a_00005</doi>
       <pages>443-480</pages>
       <url hash="be8b6f78">J10-3006</url>
@@ -233,7 +233,7 @@
     </paper>
     <paper id="7">
       <title>Learning Tractable Word Alignment Models with Complex Constraints</title>
-      <author><first>João V.</first><last>Graça</last></author>
+      <author id="joao-graca"><first>João V.</first><last>Graça</last></author>
       <author><first>Kuzman</first><last>Ganchev</last></author>
       <author><first>Ben</first><last>Taskar</last></author>
       <doi>10.1162/coli_a_00007</doi>
@@ -243,10 +243,10 @@
     </paper>
     <paper id="8">
       <title>Hierarchical Phrase-Based Translation with Weighted Finite-State Transducers and Shallow-n Grammars</title>
-      <author><first>Adrià</first><last>de Gispert</last></author>
+      <author id="adria-de-gispert"><first>Adrià</first><last>de Gispert</last></author>
       <author><first>Gonzalo</first><last>Iglesias</last></author>
       <author><first>Graeme</first><last>Blackwood</last></author>
-      <author><first>Eduardo R.</first><last>Banga</last></author>
+      <author id="eduardo-r-banga"><first>Eduardo R.</first><last>Banga</last></author>
       <author id="bill-byrne"><first>William</first><last>Byrne</last></author>
       <doi>10.1162/coli_a_00006</doi>
       <pages>505-533</pages>
@@ -255,9 +255,9 @@
     </paper>
     <paper id="9">
       <title>Linguistically Annotated Reordering: Evaluation and Analysis</title>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Min</first><last>Zhang</last></author>
-      <author><first>Aiti</first><last>Aw</last></author>
+      <author id="aiti-aw"><first>Aiti</first><last>Aw</last></author>
       <author><first>Haizhou</first><last>Li</last></author>
       <doi>10.1162/coli_a_00009</doi>
       <pages>535-568</pages>
@@ -281,7 +281,7 @@
     </paper>
     <paper id="12">
       <title>Last Words: Ancient Symbols, Computational Linguistics, and the Reviewing Practices of the General Science Journals</title>
-      <author><first>Richard</first><last>Sproat</last></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last></author>
       <doi>10.1162/coli_a_00011</doi>
       <pages>585-594</pages>
       <url hash="03af2914">J10-3012</url>
@@ -304,7 +304,7 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>O</fixed-case>bituary: Fred Jelinek</title>
-      <author><first>Mark</first><last>Liberman</last></author>
+      <author id="mark-liberman"><first>Mark</first><last>Liberman</last></author>
       <doi>10.1162/coli_a_00032</doi>
       <pages>595-599</pages>
       <url hash="06d3d06f">J10-4001</url>
@@ -312,7 +312,7 @@
     </paper>
     <paper id="2">
       <title><fixed-case>ACL</fixed-case> Lifetime Achievement Award: The Right Tools: Reflections on Computation and Language</title>
-      <author><first>William A.</first><last>Woods</last></author>
+      <author id="william-a-woods"><first>William A.</first><last>Woods</last></author>
       <doi>10.1162/coli_a_00018</doi>
       <pages>601-630</pages>
       <url hash="72649e7d">J10-4002</url>
@@ -339,7 +339,7 @@
       <title>String-to-Dependency Statistical Machine Translation</title>
       <author><first>Libin</first><last>Shen</last></author>
       <author><first>Jinxi</first><last>Xu</last></author>
-      <author><first>Ralph</first><last>Weischedel</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
       <doi>10.1162/coli_a_00015</doi>
       <pages>649-671</pages>
       <url hash="1bd2240c">J10-4005</url>
@@ -347,7 +347,7 @@
     </paper>
     <paper id="6">
       <title>Distributional Memory: A General Framework for Corpus-Based Semantics</title>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <author><first>Alessandro</first><last>Lenci</last></author>
       <doi>10.1162/coli_a_00016</doi>
       <pages>673-721</pages>
@@ -358,8 +358,8 @@
     <paper id="7">
       <title>A Flexible, Corpus-Driven Model of Regular and Inverse Selectional Preferences</title>
       <author><first>Katrin</first><last>Erk</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
-      <author><first>Ulrike</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
+      <author id="ulrike-pado"><first>Ulrike</first><last>Padó</last></author>
       <doi>10.1162/coli_a_00017</doi>
       <pages>723-763</pages>
       <url hash="4bd30d32">J10-4007</url>
@@ -395,7 +395,7 @@
     </paper>
     <paper id="12">
       <title>Book Review: Spoken Dialogue Systems by Kristiina Jokinen and <fixed-case>M</fixed-case>ichael <fixed-case>M</fixed-case>c<fixed-case>T</fixed-case>ear</title>
-      <author><first>Mary Ellen</first><last>Foster</last></author>
+      <author id="mary-ellen-foster"><first>Mary Ellen</first><last>Foster</last></author>
       <doi>10.1162/coli_r_00025</doi>
       <url hash="0f3f8b47">J10-4012</url>
       <bibkey>foster-2010-book</bibkey>
@@ -429,7 +429,7 @@
       <title>Commentary and Discussion: Entropy, the <fixed-case>I</fixed-case>ndus Script, and Language: A Reply to <fixed-case>R</fixed-case>. <fixed-case>S</fixed-case>proat</title>
       <author><first>Rajesh P. N.</first><last>Rao</last></author>
       <author><first>Nisha</first><last>Yadav</last></author>
-      <author><first>Mayank N.</first><last>Vahia</last></author>
+      <author id="mayank-n-vahia"><first>Mayank N.</first><last>Vahia</last></author>
       <author><first>Hrishikesh</first><last>Joglekar</last></author>
       <author><first>Ronojoy</first><last>Adhikari</last></author>
       <author><first>Iravatham</first><last>Mahadevan</last></author>
@@ -440,7 +440,7 @@
     </paper>
     <paper id="17">
       <title>Commentary and Discussion: Reply to <fixed-case>R</fixed-case>ao et al. and <fixed-case>L</fixed-case>ee et al.</title>
-      <author><first>Richard</first><last>Sproat</last></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last></author>
       <doi>10.1162/coli_c_00031</doi>
       <pages>807-816</pages>
       <url hash="c9fc54c0">J10-4017</url>
diff --git a/data/xml/J11.xml b/data/xml/J11.xml
index 48999bf78e..9bf6fb91c5 100644
--- a/data/xml/J11.xml
+++ b/data/xml/J11.xml
@@ -16,9 +16,9 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>S</fixed-case>quibs: Nouveau-<fixed-case>ROUGE</fixed-case>: A Novelty Metric for Update Summarization</title>
-      <author><first>John M.</first><last>Conroy</last></author>
-      <author><first>Judith D.</first><last>Schlesinger</last></author>
-      <author><first>Dianne P.</first><last>O’Leary</last></author>
+      <author id="john-conroy"><first>John M.</first><last>Conroy</last></author>
+      <author id="judith-d-schlesinger"><first>Judith D.</first><last>Schlesinger</last></author>
+      <author id="dianne-p-oleary"><first>Dianne P.</first><last>O’Leary</last></author>
       <doi>10.1162/coli_a_00033</doi>
       <pages>1-8</pages>
       <url hash="f29f6e98">J11-1001</url>
@@ -37,7 +37,7 @@
     </paper>
     <paper id="3">
       <title>Towards Modular Development of Typed Unification Grammars</title>
-      <author><first>Yael</first><last>Sygal</last></author>
+      <author id="yael-cohen-sygal"><first>Yael</first><last>Sygal</last></author>
       <author><first>Shuly</first><last>Wintner</last></author>
       <doi>10.1162/coli_a_00035</doi>
       <pages>29-74</pages>
@@ -47,8 +47,8 @@
     <paper id="4">
       <title>An Investigation of Interruptions and Resumptions in Multi-Tasking Dialogues</title>
       <author><first>Fan</first><last>Yang</last></author>
-      <author><first>Peter A.</first><last>Heeman</last></author>
-      <author><first>Andrew L.</first><last>Kun</last></author>
+      <author id="peter-a-heeman"><first>Peter A.</first><last>Heeman</last></author>
+      <author id="andrew-l-kun"><first>Andrew L.</first><last>Kun</last></author>
       <doi>10.1162/coli_a_00036</doi>
       <pages>75-104</pages>
       <url hash="0f26aff3">J11-1004</url>
@@ -147,7 +147,7 @@
     </frontmatter>
     <paper id="1">
       <title>Lexicon-Based Methods for Sentiment Analysis</title>
-      <author><first>Maite</first><last>Taboada</last></author>
+      <author id="maite-taboada"><first>Maite</first><last>Taboada</last></author>
       <author><first>Julian</first><last>Brooke</last></author>
       <author><first>Milan</first><last>Tofiloski</last></author>
       <author><first>Kimberly</first><last>Voll</last></author>
@@ -180,7 +180,7 @@
     <paper id="4">
       <title><fixed-case>S</fixed-case>quibs: Stable Classification of Text Genres</title>
       <author><first>Philipp</first><last>Petrenz</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <doi>10.1162/COLI_a_00052</doi>
       <pages>385-393</pages>
       <url hash="d8848d42">J11-2004</url>
@@ -188,7 +188,7 @@
     </paper>
     <paper id="5">
       <title>Book Review: Handbook of Natural Language Processing (second edition) edited by Nitin Indurkhya and Fred <fixed-case>J</fixed-case>. Damerau</title>
-      <author><first>Jochen L.</first><last>Leidner</last></author>
+      <author id="jochen-l-leidner"><first>Jochen L.</first><last>Leidner</last></author>
       <doi>10.1162/COLI_r_00048</doi>
       <url hash="9b757ffb">J11-2005</url>
       <bibkey>leidner-2011-book</bibkey>
@@ -224,8 +224,8 @@
     <paper id="10">
       <title>Last Words: <fixed-case>A</fixed-case>mazon <fixed-case>M</fixed-case>echanical <fixed-case>T</fixed-case>urk: Gold Mine or Coal Mine?</title>
       <author><first>Karën</first><last>Fort</last></author>
-      <author><first>Gilles</first><last>Adda</last></author>
-      <author><first>K. Bretonnel</first><last>Cohen</last></author>
+      <author id="gilles-adda"><first>Gilles</first><last>Adda</last></author>
+      <author id="k-bretonnel-cohen"><first>K. Bretonnel</first><last>Cohen</last></author>
       <doi>10.1162/COLI_a_00057</doi>
       <pages>413-420</pages>
       <url hash="bf7388c8">J11-2010</url>
@@ -259,8 +259,8 @@
     </paper>
     <paper id="2">
       <title>Controlling User Perceptions of Linguistic Style: Trainable Generation of Personality Traits</title>
-      <author><first>François</first><last>Mairesse</last></author>
-      <author><first>Marilyn A.</first><last>Walker</last></author>
+      <author id="francois-mairesse"><first>François</first><last>Mairesse</last></author>
+      <author id="marilyn-walker"><first>Marilyn A.</first><last>Walker</last></author>
       <doi>10.1162/COLI_a_00063</doi>
       <pages>455-488</pages>
       <url hash="43036210">J11-3002</url>
@@ -270,7 +270,7 @@
       <title>A Strategy for Information Presentation in Spoken Dialog Systems</title>
       <author><first>Vera</first><last>Demberg</last></author>
       <author><first>Andi</first><last>Winterboer</last></author>
-      <author><first>Johanna D.</first><last>Moore</last></author>
+      <author id="johanna-d-moore"><first>Johanna D.</first><last>Moore</last></author>
       <doi>10.1162/COLI_a_00064</doi>
       <pages>489-539</pages>
       <url hash="a4ca1a33">J11-3003</url>
@@ -279,8 +279,8 @@
     <paper id="4">
       <title>Dependency Parsing Schemata and Mildly Non-Projective Dependency Parsing</title>
       <author><first>Carlos</first><last>Gómez-Rodríguez</last></author>
-      <author><first>John</first><last>Carroll</last></author>
-      <author><first>David</first><last>Weir</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
+      <author id="david-weir"><first>David</first><last>Weir</last></author>
       <doi>10.1162/COLI_a_00060</doi>
       <pages>541-586</pages>
       <url hash="acfaf906">J11-3004</url>
@@ -296,7 +296,7 @@
     </paper>
     <paper id="6">
       <title>Book Reviews: Automated Grammatical Error Detection for Language Learners by Claudia Leacock, <fixed-case>M</fixed-case>artin Chodorow, <fixed-case>M</fixed-case>ichael Gamon, and Joel Tetreault</title>
-      <author><first>Stephen</first><last>Pulman</last></author>
+      <author id="stephen-pulman"><first>Stephen</first><last>Pulman</last></author>
       <doi>10.1162/COLI_r_00062</doi>
       <url hash="3db7053e">J11-3006</url>
       <bibkey>pulman-2011-book</bibkey>
@@ -317,7 +317,7 @@
     </paper>
     <paper id="9">
       <title>Book Reviews: Computational Modeling of Human Language Acquisition by Afra Alishahi</title>
-      <author><first>Sharon</first><last>Goldwater</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
       <doi>10.1162/COLI_r_00067</doi>
       <url hash="0215112a">J11-3009</url>
       <bibkey>goldwater-2011-book</bibkey>
@@ -372,8 +372,8 @@
     </paper>
     <paper id="2">
       <title>Towards Automatic Error Analysis of Machine Translation Output</title>
-      <author><first>Maja</first><last>Popović</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <doi>10.1162/COLI_a_00072</doi>
       <pages>657-688</pages>
       <url hash="42f18090">J11-4002</url>
@@ -399,8 +399,8 @@
     <paper id="5">
       <title>Annotating and Learning Event Durations in Text</title>
       <author><first>Feng</first><last>Pan</last></author>
-      <author><first>Rutu</first><last>Mulkar-Mehta</last></author>
-      <author><first>Jerry R.</first><last>Hobbs</last></author>
+      <author id="rutu-mulkar-mehta"><first>Rutu</first><last>Mulkar-Mehta</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry R.</first><last>Hobbs</last></author>
       <doi>10.1162/COLI_a_00075</doi>
       <pages>727-752</pages>
       <url hash="28e28a55">J11-4005</url>
@@ -409,7 +409,7 @@
     <paper id="6">
       <title>Parsing Noun Phrases in the <fixed-case>P</fixed-case>enn <fixed-case>T</fixed-case>reebank</title>
       <author><first>David</first><last>Vadas</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <doi>10.1162/COLI_a_00076</doi>
       <pages>753-809</pages>
       <url hash="5e018174">J11-4006</url>
@@ -419,7 +419,7 @@
       <title>Information Status Distinctions and Referring Expressions: An Empirical Study of References to People in News Summaries</title>
       <author><first>Advaith</first><last>Siddharthan</last></author>
       <author><first>Ani</first><last>Nenkova</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <doi>10.1162/COLI_a_00077</doi>
       <pages>811-842</pages>
       <url hash="8a5654b4">J11-4007</url>
@@ -427,7 +427,7 @@
     </paper>
     <paper id="8">
       <title>Half-Context Language Models</title>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <author><first>Michael</first><last>Walsh</last></author>
       <doi>10.1162/COLI_a_00078</doi>
       <pages>843-865</pages>
diff --git a/data/xml/J12.xml b/data/xml/J12.xml
index ebaced76ef..6378dcddb0 100644
--- a/data/xml/J12.xml
+++ b/data/xml/J12.xml
@@ -16,9 +16,9 @@
     </frontmatter>
     <paper id="1">
       <title>Affirmative Cue Words in Task-Oriented Dialogue</title>
-      <author><first>Agustín</first><last>Gravano</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
-      <author><first>Štefan</first><last>Beňuš</last></author>
+      <author id="agustin-gravano"><first>Agustín</first><last>Gravano</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
+      <author id="stefan-benus"><first>Štefan</first><last>Beňuš</last></author>
       <doi>10.1162/COLI_a_00083</doi>
       <url hash="5f668c67">J12-1001</url>
       <pages>1-39</pages>
@@ -62,8 +62,8 @@
     </paper>
     <paper id="6">
       <title>Computational Generation of Referring Expressions: A Survey</title>
-      <author><first>Emiel</first><last>Krahmer</last></author>
-      <author><first>Kees</first><last>van Deemter</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
       <doi>10.1162/COLI_a_00088</doi>
       <url hash="63c633e3">J12-1006</url>
       <pages>173-218</pages>
@@ -71,7 +71,7 @@
     </paper>
     <paper id="7">
       <title>Book Review: Graph-Based Natural Language Processing and Information Retrieval by Rada Mihalcea and Dragomir Radev</title>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <doi>10.1162/COLI_r_00089</doi>
       <url hash="4f8dda41">J12-1007</url>
       <bibkey>biemann-2012-book</bibkey>
@@ -108,8 +108,8 @@
     </paper>
     <paper id="2">
       <title>Are You Sure That This Happened? Assessing the Factuality Degree of Events in Text</title>
-      <author><first>Roser</first><last>Saurí</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="roser-sauri"><first>Roser</first><last>Saurí</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <doi>10.1162/COLI_a_00096</doi>
       <url hash="0fad17ed">J12-2002</url>
       <pages>261-299</pages>
@@ -117,8 +117,8 @@
     </paper>
     <paper id="3">
       <title>Did It Happen? The Pragmatic Complexity of Veridicality Assessment</title>
-      <author><first>Marie-Catherine</first><last>de Marneffe</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="marie-catherine-de-marneffe"><first>Marie-Catherine</first><last>de Marneffe</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <author><first>Christopher</first><last>Potts</last></author>
       <doi>10.1162/COLI_a_00097</doi>
       <url hash="5c4e917c">J12-2003</url>
@@ -129,7 +129,7 @@
       <title>Cross-Genre and Cross-Domain Detection of Semantic Uncertainty</title>
       <author><first>György</first><last>Szarvas</last></author>
       <author><first>Veronika</first><last>Vincze</last></author>
-      <author><first>Richárd</first><last>Farkas</last></author>
+      <author id="richard-farkas"><first>Richárd</first><last>Farkas</last></author>
       <author><first>György</first><last>Móra</last></author>
       <author><first>Iryna</first><last>Gurevych</last></author>
       <doi>10.1162/COLI_a_00098</doi>
@@ -140,7 +140,7 @@
     <paper id="5">
       <title>Speculation and Negation: Rules, Rankers, and the Role of Syntax</title>
       <author><first>Erik</first><last>Velldal</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <author><first>Jonathon</first><last>Read</last></author>
       <author><first>Stephan</first><last>Oepen</last></author>
       <doi>10.1162/COLI_a_00126</doi>
@@ -150,13 +150,13 @@
     </paper>
     <paper id="6">
       <title>Modality and Negation in <fixed-case>SIMT</fixed-case> Use of Modality and Negation in Semantically-Informed Syntactic <fixed-case>MT</fixed-case></title>
-      <author><first>Kathryn</first><last>Baker</last></author>
+      <author id="kathryn-baker"><first>Kathryn</first><last>Baker</last></author>
       <author><first>Michael</first><last>Bloodgood</last></author>
-      <author><first>Bonnie J.</first><last>Dorr</last></author>
+      <author id="bonnie-dorr"><first>Bonnie J.</first><last>Dorr</last></author>
       <author><first>Chris</first><last>Callison-Burch</last></author>
       <author><first>Nathaniel W.</first><last>Filardo</last></author>
-      <author><first>Christine</first><last>Piatko</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
+      <author id="christine-piatko"><first>Christine</first><last>Piatko</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
       <author><first>Scott</first><last>Miller</last></author>
       <doi>10.1162/COLI_a_00099</doi>
       <url hash="f924baca">J12-2006</url>
@@ -172,14 +172,14 @@
     </paper>
     <paper id="8">
       <title>Book Review: Unification Grammars by Nissim Francez and Shuly <fixed-case>W</fixed-case>intner</title>
-      <author><first>Tracy Holloway</first><last>King</last></author>
+      <author id="tracy-holloway-king"><first>Tracy Holloway</first><last>King</last></author>
       <doi>10.1162/COLI_r_00101</doi>
       <url hash="aaa5b48c">J12-2008</url>
       <bibkey>king-2012-book</bibkey>
     </paper>
     <paper id="9">
       <title>Book Review: The Structure of Scientific Articles: Applications to Citation Indexing and Summarization by Simone Teufel</title>
-      <author><first>Robert E.</first><last>Mercer</last></author>
+      <author id="robert-e-mercer"><first>Robert E.</first><last>Mercer</last></author>
       <doi>10.1162/COLI_r_00102</doi>
       <url hash="6e63477f">J12-2009</url>
       <bibkey>mercer-2012-book</bibkey>
@@ -193,7 +193,7 @@
     </paper>
     <paper id="11">
       <title>Book Review: Interactive Multi-Modal Question-Answering by Antal van den Bosch and Gosse Bouma</title>
-      <author><first>Constantin</first><last>Orăsan</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orăsan</last></author>
       <doi>10.1162/COLI_r_00104</doi>
       <url hash="0aac19ce">J12-2011</url>
       <bibkey>orasan-2012-book</bibkey>
@@ -228,7 +228,7 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>O</fixed-case>bituary: Victor <fixed-case>H</fixed-case>. Yngve</title>
-      <author><first>W. John</first><last>Hutchins</last></author>
+      <author id="w-john-hutchins"><first>W. John</first><last>Hutchins</last></author>
       <doi>10.1162/COLI_a_00115</doi>
       <url hash="5ab565bc">J12-3001</url>
       <pages>461-467</pages>
@@ -236,10 +236,10 @@
     </paper>
     <paper id="2">
       <title><fixed-case>S</fixed-case>quibs: Fruit Carts: A Domain and Corpus for Research in Dialogue Systems and Psycholinguistics</title>
-      <author><first>Gregory</first><last>Aist</last></author>
-      <author><first>Ellen</first><last>Campana</last></author>
-      <author><first>James</first><last>Allen</last></author>
-      <author><first>Mary</first><last>Swift</last></author>
+      <author id="gregory-aist"><first>Gregory</first><last>Aist</last></author>
+      <author id="ellen-campana"><first>Ellen</first><last>Campana</last></author>
+      <author id="james-allen"><first>James</first><last>Allen</last></author>
+      <author id="mary-swift"><first>Mary</first><last>Swift</last></author>
       <author><first>Michael K.</first><last>Tanenhaus</last></author>
       <doi>10.1162/COLI_a_00114</doi>
       <url hash="f69ff610">J12-3002</url>
@@ -248,8 +248,8 @@
     </paper>
     <paper id="3">
       <title>Empirical Risk Minimization for Probabilistic Grammars: Sample Complexity and Hardness of Learning</title>
-      <author><first>Shay B.</first><last>Cohen</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <doi>10.1162/COLI_a_00092</doi>
       <url hash="2d8c2a04">J12-3003</url>
       <pages>479-526</pages>
@@ -258,8 +258,8 @@
     <paper id="4">
       <title>Summarizing Information Graphics Textually</title>
       <author><first>Seniz</first><last>Demir</last></author>
-      <author><first>Sandra</first><last>Carberry</last></author>
-      <author><first>Kathleen F.</first><last>McCoy</last></author>
+      <author id="sandra-carberry"><first>Sandra</first><last>Carberry</last></author>
+      <author id="kathleen-f-mccoy"><first>Kathleen F.</first><last>McCoy</last></author>
       <doi>10.1162/COLI_a_00091</doi>
       <url hash="5d1ae0a8">J12-3004</url>
       <pages>527-574</pages>
@@ -267,8 +267,8 @@
     </paper>
     <paper id="5">
       <title>Modeling Regular Polysemy: A Study on the Semantic Classification of <fixed-case>C</fixed-case>atalan Adjectives</title>
-      <author><first>Gemma</first><last>Boleda</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="gemma-boleda"><first>Gemma</first><last>Boleda</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <author><first>Toni</first><last>Badia</last></author>
       <doi>10.1162/COLI_a_00093</doi>
       <url hash="63948f90">J12-3005</url>
@@ -334,7 +334,7 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>ACL</fixed-case> Lifetime Achievement Award: Encounters with Language</title>
-      <author><first>Charles J.</first><last>Fillmore</last></author>
+      <author id="charles-j-fillmore"><first>Charles J.</first><last>Fillmore</last></author>
       <doi>10.1162/COLI_a_00129</doi>
       <url hash="de4efecf">J12-4001</url>
       <pages>701-718</pages>
@@ -352,8 +352,8 @@
     </paper>
     <paper id="3">
       <title>Semantic Role Labeling of Implicit Arguments for Nominal Predicates</title>
-      <author><first>Matthew</first><last>Gerber</last></author>
-      <author><first>Joyce Y.</first><last>Chai</last></author>
+      <author id="matthew-gerber"><first>Matthew</first><last>Gerber</last></author>
+      <author id="joyce-chai"><first>Joyce Y.</first><last>Chai</last></author>
       <doi>10.1162/COLI_a_00110</doi>
       <url hash="54ce07e8">J12-4003</url>
       <pages>755-798</pages>
@@ -372,8 +372,8 @@
     <paper id="5">
       <title>Empirical Methods for the Study of Denotation in Nominalizations in <fixed-case>S</fixed-case>panish</title>
       <author><first>Aina</first><last>Peris</last></author>
-      <author><first>Mariona</first><last>Taulé</last></author>
-      <author><first>Horacio</first><last>Rodríguez</last></author>
+      <author id="mariona-taule"><first>Mariona</first><last>Taulé</last></author>
+      <author id="horacio-rodriguez"><first>Horacio</first><last>Rodríguez</last></author>
       <doi>10.1162/COLI_a_00112</doi>
       <url hash="3b0df0b2">J12-4005</url>
       <pages>827-865</pages>
@@ -381,8 +381,8 @@
     </paper>
     <paper id="6">
       <title><fixed-case>LFG</fixed-case> Generation by Grammar Specialization</title>
-      <author><first>Jürgen</first><last>Wedekind</last></author>
-      <author><first>Ronald M.</first><last>Kaplan</last></author>
+      <author id="jurgen-wedekind"><first>Jürgen</first><last>Wedekind</last></author>
+      <author id="ronald-m-kaplan"><first>Ronald M.</first><last>Kaplan</last></author>
       <doi>10.1162/COLI_a_00113</doi>
       <url hash="d7d408fd">J12-4006</url>
       <pages>867-915</pages>
@@ -390,7 +390,7 @@
     </paper>
     <paper id="7">
       <title>Book Review: Discourse Processing by Manfred Stede</title>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <doi>10.1162/COLI_r_00118</doi>
       <url hash="abcd3fce">J12-4007</url>
       <bibkey>webber-2012-book</bibkey>
diff --git a/data/xml/J13.xml b/data/xml/J13.xml
index f70eb59026..9cfee72d39 100644
--- a/data/xml/J13.xml
+++ b/data/xml/J13.xml
@@ -34,8 +34,8 @@
     <paper id="3">
       <title>Parsing Morphologically Rich Languages: Introduction to the Special Issue</title>
       <author><first>Reut</first><last>Tsarfaty</last></author>
-      <author><first>Djamé</first><last>Seddah</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <author><first>Joakim</first><last>Nivre</last></author>
       <doi>10.1162/COLI_a_00133</doi>
       <url hash="e064acc5">J13-1003</url>
@@ -53,11 +53,11 @@
     </paper>
     <paper id="5">
       <title>Knowledge Sources for Constituent Parsing of <fixed-case>G</fixed-case>erman, a Morphologically Rich and Less-Configurational Language</title>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <author><first>Helmut</first><last>Schmid</last></author>
-      <author><first>Richárd</first><last>Farkas</last></author>
+      <author id="richard-farkas"><first>Richárd</first><last>Farkas</last></author>
       <author><first>Renjing</first><last>Wang</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <doi>10.1162/COLI_a_00135</doi>
       <url hash="e1fa1842">J13-1005</url>
       <pages>57-85</pages>
@@ -85,7 +85,7 @@
       <title>Dependency Parsing of <fixed-case>M</fixed-case>odern <fixed-case>S</fixed-case>tandard <fixed-case>A</fixed-case>rabic with Lexical and Inflectional Features</title>
       <author><first>Yuval</first><last>Marton</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <doi>10.1162/COLI_a_00138</doi>
       <url hash="bdc9a389">J13-1008</url>
       <pages>161-194</pages>
@@ -94,8 +94,8 @@
     <paper id="9">
       <title>Parsing Models for Identifying Multiword Expressions</title>
       <author><first>Spence</first><last>Green</last></author>
-      <author><first>Marie-Catherine</first><last>de Marneffe</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="marie-catherine-de-marneffe"><first>Marie-Catherine</first><last>de Marneffe</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <doi>10.1162/COLI_a_00139</doi>
       <url hash="704b140a">J13-1009</url>
       <pages>195-227</pages>
@@ -119,7 +119,7 @@
     <paper id="1">
       <title>A Joint Model to Identify and Align Bilingual Named Entities</title>
       <author><first>Yufeng</first><last>Chen</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <author><first>Keh-Yih</first><last>Su</last></author>
       <doi>10.1162/COLI_a_00122</doi>
       <url hash="e6afc6d5">J13-2001</url>
@@ -156,7 +156,7 @@
     <paper id="5">
       <title>Learning Dependency-Based Compositional Semantics</title>
       <author><first>Percy</first><last>Liang</last></author>
-      <author><first>Michael I.</first><last>Jordan</last></author>
+      <author id="michael-i-jordan"><first>Michael I.</first><last>Jordan</last></author>
       <author><first>Dan</first><last>Klein</last></author>
       <doi>10.1162/COLI_a_00127</doi>
       <url hash="0a078064">J13-2005</url>
@@ -208,7 +208,7 @@
     <paper id="1">
       <title><fixed-case>S</fixed-case>quibs: What Is a Paraphrase?</title>
       <author><first>Rahul</first><last>Bhagat</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <doi>10.1162/COLI_a_00166</doi>
       <url hash="30bfcf12">J13-3001</url>
       <pages>463-472</pages>
@@ -226,7 +226,7 @@
     <paper id="3">
       <title>Measuring Word Meaning in Context</title>
       <author><first>Katrin</first><last>Erk</last></author>
-      <author><first>Diana</first><last>McCarthy</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
       <author><first>Nicholas</first><last>Gaylord</last></author>
       <doi>10.1162/COLI_a_00142</doi>
       <url hash="b20e331b">J13-3003</url>
@@ -235,10 +235,10 @@
     </paper>
     <paper id="4">
       <title>Computing Lexical Contrast</title>
-      <author><first>Saif M.</first><last>Mohammad</last></author>
-      <author><first>Bonnie J.</first><last>Dorr</last></author>
+      <author id="saif-mohammad"><first>Saif M.</first><last>Mohammad</last></author>
+      <author id="bonnie-dorr"><first>Bonnie J.</first><last>Dorr</last></author>
       <author><first>Graeme</first><last>Hirst</last></author>
-      <author><first>Peter D.</first><last>Turney</last></author>
+      <author id="peter-turney"><first>Peter D.</first><last>Turney</last></author>
       <doi>10.1162/COLI_a_00143</doi>
       <url hash="eee6b3dc">J13-3004</url>
       <pages>555-590</pages>
@@ -246,10 +246,10 @@
     </paper>
     <paper id="5">
       <title><fixed-case>XMG</fixed-case>: e<fixed-case>X</fixed-case>tensible <fixed-case>M</fixed-case>eta<fixed-case>G</fixed-case>rammar</title>
-      <author><first>Benoît</first><last>Crabbé</last></author>
+      <author id="benoit-crabbe"><first>Benoît</first><last>Crabbé</last></author>
       <author><first>Denys</first><last>Duchier</last></author>
       <author><first>Claire</first><last>Gardent</last></author>
-      <author><first>Joseph Le</first><last>Roux</last></author>
+      <author id="joseph-le-roux"><first>Joseph Le</first><last>Roux</last></author>
       <author><first>Yannick</first><last>Parmentier</last></author>
       <doi>10.1162/COLI_a_00144</doi>
       <url hash="04fb95a2">J13-3005</url>
@@ -259,8 +259,8 @@
     <paper id="6">
       <title>Selectional Preferences for Semantic Role Classification</title>
       <author><first>Beñat</first><last>Zapirain</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <author><first>Mihai</first><last>Surdeanu</last></author>
       <doi>10.1162/COLI_a_00145</doi>
       <url hash="c25f5cc1">J13-3006</url>
@@ -269,7 +269,7 @@
     </paper>
     <paper id="7">
       <title><fixed-case>O</fixed-case>nto<fixed-case>L</fixed-case>earn Reloaded: A Graph-Based Algorithm for Taxonomy Induction</title>
-      <author><first>Paola</first><last>Velardi</last></author>
+      <author id="paola-velardi"><first>Paola</first><last>Velardi</last></author>
       <author><first>Stefano</first><last>Faralli</last></author>
       <author><first>Roberto</first><last>Navigli</last></author>
       <doi>10.1162/COLI_a_00146</doi>
@@ -291,7 +291,7 @@
       <author><first>Eva</first><last>D’hondt</last></author>
       <author><first>Suzan</first><last>Verberne</last></author>
       <author><first>Cornelis</first><last>Koster</last></author>
-      <author><first>Lou</first><last>Boves</last></author>
+      <author id="lou-boves"><first>Lou</first><last>Boves</last></author>
       <doi>10.1162/COLI_a_00149</doi>
       <url hash="6d49dd5e">J13-3009</url>
       <pages>755-775</pages>
@@ -299,7 +299,7 @@
     </paper>
     <paper id="10">
       <title>Book Review:</title>
-      <author><first>Mats</first><last>Wirén</last></author>
+      <author id="mats-wiren"><first>Mats</first><last>Wirén</last></author>
       <doi>10.1162/COLI_r_00165</doi>
       <url hash="7926f955">J13-3010</url>
       <bibkey>wiren-2013-book</bibkey>
@@ -321,7 +321,7 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>ACL</fixed-case> Lifetime Achievement Award: Influences and Inferences</title>
-      <author><first>Jerry R.</first><last>Hobbs</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry R.</first><last>Hobbs</last></author>
       <doi>10.1162/COLI_a_00171</doi>
       <url hash="c6cecbaf">J13-4001</url>
       <pages>781-798</pages>
@@ -339,8 +339,8 @@
     <paper id="3">
       <title>A Constraint-Based Hypergraph Partitioning Approach to Coreference Resolution</title>
       <author><first>Emili</first><last>Sapena</last></author>
-      <author><first>Lluís</first><last>Padró</last></author>
-      <author><first>Jordi</first><last>Turmo</last></author>
+      <author id="lluis-padro"><first>Lluís</first><last>Padró</last></author>
+      <author id="jordi-turmo"><first>Jordi</first><last>Turmo</last></author>
       <doi>10.1162/COLI_a_00151</doi>
       <url hash="84024249">J13-4003</url>
       <pages>847-884</pages>
@@ -349,11 +349,11 @@
     <paper id="4">
       <title>Deterministic Coreference Resolution Based on Entity-Centric, Precision-Ranked Rules</title>
       <author><first>Heeyoung</first><last>Lee</last></author>
-      <author><first>Angel</first><last>Chang</last></author>
+      <author id="angel-chang"><first>Angel</first><last>Chang</last></author>
       <author><first>Yves</first><last>Peirsman</last></author>
-      <author><first>Nathanael</first><last>Chambers</last></author>
+      <author id="nathanael-chambers"><first>Nathanael</first><last>Chambers</last></author>
       <author><first>Mihai</first><last>Surdeanu</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <doi>10.1162/COLI_a_00152</doi>
       <url hash="d82b23ba">J13-4004</url>
       <pages>885-916</pages>
@@ -363,7 +363,7 @@
       <title>Plagiarism Meets Paraphrasing: Insights for the Next Generation in Automatic Plagiarism Detection</title>
       <author><first>Alberto</first><last>Barrón-Cedeño</last></author>
       <author><first>Marta</first><last>Vila</last></author>
-      <author><first>M. Antònia</first><last>Martí</last></author>
+      <author id="m-antonia-marti"><first>M. Antònia</first><last>Martí</last></author>
       <author><first>Paolo</first><last>Rosso</last></author>
       <doi>10.1162/COLI_a_00153</doi>
       <url hash="b8be8b89">J13-4005</url>
@@ -372,10 +372,10 @@
     </paper>
     <paper id="6">
       <title>Multilingual Joint Parsing of Syntactic and Semantic Dependencies with a Latent Variable Model</title>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <author><first>Paola</first><last>Merlo</last></author>
       <author><first>Ivan</first><last>Titov</last></author>
-      <author><first>Gabriele</first><last>Musillo</last></author>
+      <author id="gabriele-musillo"><first>Gabriele</first><last>Musillo</last></author>
       <doi>10.1162/COLI_a_00158</doi>
       <url hash="ba9161c0">J13-4006</url>
       <pages>949-998</pages>
diff --git a/data/xml/J14.xml b/data/xml/J14.xml
index 3e690fba49..cfa74952bf 100644
--- a/data/xml/J14.xml
+++ b/data/xml/J14.xml
@@ -16,7 +16,7 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>O</fixed-case>bituary: Ivan A. Sag</title>
-      <author><first>Emily M.</first><last>Bender</last></author>
+      <author id="emily-m-bender"><first>Emily M.</first><last>Bender</last></author>
       <pages>1-7</pages>
       <doi>10.1162/COLI_a_00179</doi>
       <url hash="6a820d69">J14-1001</url>
@@ -26,9 +26,9 @@
       <title>Frame-Semantic Parsing</title>
       <author><first>Dipanjan</first><last>Das</last></author>
       <author><first>Desai</first><last>Chen</last></author>
-      <author><first>André F. T.</first><last>Martins</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
       <author><first>Nathan</first><last>Schneider</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>9-56</pages>
       <doi>10.1162/COLI_a_00163</doi>
       <url hash="f9c2d914">J14-1002</url>
@@ -36,9 +36,9 @@
     </paper>
     <paper id="3">
       <title>Random Walks for Knowledge-Based Word Sense Disambiguation</title>
-      <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>Oier López</first><last>de Lacalle</last></author>
-      <author><first>Aitor</first><last>Soroa</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
+      <author id="oier-lopez-de-lacalle"><first>Oier López</first><last>de Lacalle</last></author>
+      <author id="aitor-soroa"><first>Aitor</first><last>Soroa</last></author>
       <pages>57-84</pages>
       <doi>10.1162/COLI_a_00164</doi>
       <url hash="02bb04dd">J14-1003</url>
@@ -67,7 +67,7 @@
     </paper>
     <paper id="6">
       <title><fixed-case>A</fixed-case>rabic Dialect Identification</title>
-      <author><first>Omar F.</first><last>Zaidan</last></author>
+      <author id="omar-zaidan"><first>Omar F.</first><last>Zaidan</last></author>
       <author><first>Chris</first><last>Callison-Burch</last></author>
       <pages>171-202</pages>
       <doi>10.1162/COLI_a_00169</doi>
@@ -87,7 +87,7 @@
     </paper>
     <paper id="8">
       <title>Book Review: Natural Language Processing for Historical Texts by <fixed-case>M</fixed-case>ichael Piotrowski</title>
-      <author><first>Laurent</first><last>Romary</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
       <pages>231-233</pages>
       <doi>10.1162/COLI_r_00180</doi>
       <url hash="6aa2aec1">J14-1008</url>
@@ -155,8 +155,8 @@
     </paper>
     <paper id="4">
       <title>Unsupervised Event Coreference Resolution</title>
-      <author><first>Cosmin</first><last>Adrian Bejan</last></author>
-      <author><first>Sanda</first><last>Harabagiu</last></author>
+      <author id="cosmin-adrian-bejan"><first>Cosmin</first><last>Adrian Bejan</last></author>
+      <author id="sanda-harabagiu"><first>Sanda</first><last>Harabagiu</last></author>
       <pages>311-347</pages>
       <doi>10.1162/COLI_a_00174</doi>
       <url hash="f3ca3fef">J14-2004</url>
@@ -165,7 +165,7 @@
     <paper id="5">
       <title>Phrase Dependency Machine Translation with Quasi-Synchronous Tree-to-Tree Features</title>
       <author><first>Kevin</first><last>Gimpel</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>349-401</pages>
       <doi>10.1162/COLI_a_00175</doi>
       <url hash="899a8707">J14-2005</url>
@@ -173,7 +173,7 @@
     </paper>
     <paper id="6">
       <title>Practical Linguistic Steganography using Contextual Synonym Substitution and a Novel Vertex Coding Method</title>
-      <author><first>Ching-Yun</first><last>Chang</last></author>
+      <author id="ching-yun-chang"><first>Ching-Yun</first><last>Chang</last></author>
       <author><first>Stephen</first><last>Clark</last></author>
       <pages>403-448</pages>
       <doi>10.1162/COLI_a_00176</doi>
@@ -199,7 +199,7 @@
     </paper>
     <paper id="9">
       <title>Book Reviews: Sentiment Analysis and Opinion Mining by Bing <fixed-case>L</fixed-case>iu</title>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>511-513</pages>
       <doi>10.1162/COLI_r_00186</doi>
       <url hash="b5104ef5">J14-2009</url>
@@ -238,9 +238,9 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>S</fixed-case>quibs: Automatic Selection of <fixed-case>HPSG</fixed-case>-Parsed Sentences for Treebank Construction</title>
-      <author><first>Montserrat</first><last>Marimon</last></author>
-      <author><first>Núria</first><last>Bel</last></author>
-      <author><first>Lluís</first><last>Padró</last></author>
+      <author id="montserrat-marimon"><first>Montserrat</first><last>Marimon</last></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last></author>
+      <author id="lluis-padro"><first>Lluís</first><last>Padró</last></author>
       <pages>523–531</pages>
       <doi>10.1162/COLI_a_00190</doi>
       <url hash="c8dbf06e">J14-3001</url>
@@ -248,7 +248,7 @@
     </paper>
     <paper id="2">
       <title><fixed-case>S</fixed-case>quibs: On the Universal Generation Problem for Unification Grammars</title>
-      <author><first>Jürgen</first><last>Wedekind</last></author>
+      <author id="jurgen-wedekind"><first>Jürgen</first><last>Wedekind</last></author>
       <pages>533-538</pages>
       <doi>10.1162/COLI_a_00191</doi>
       <url hash="cdb84736">J14-3002</url>
@@ -256,10 +256,10 @@
     </paper>
     <paper id="3">
       <title>A Random Walk–Based Model for Identifying Semantic Orientation</title>
-      <author><first>Ahmed</first><last>Hassan</last></author>
+      <author id="ahmed-hassan"><first>Ahmed</first><last>Hassan</last></author>
       <author><first>Amjad</first><last>Abu-Jbara</last></author>
       <author><first>Wanchen</first><last>Lu</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <pages>539-562</pages>
       <doi>10.1162/COLI_a_00192</doi>
       <url hash="62e62e38">J14-3003</url>
@@ -270,7 +270,7 @@
       <author><first>Xu</first><last>Sun</last></author>
       <author><first>Wenjie</first><last>Li</last></author>
       <author><first>Houfeng</first><last>Wang</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <pages>563-586</pages>
       <doi>10.1162/COLI_a_00193</doi>
       <url hash="69b99852">J14-3004</url>
@@ -308,9 +308,9 @@
       <title>Pushdown Automata in Statistical Machine Translation</title>
       <author><first>Cyril</first><last>Allauzen</last></author>
       <author id="bill-byrne"><first>Bill</first><last>Byrne</last></author>
-      <author><first>Adrià</first><last>de Gispert</last></author>
+      <author id="adria-de-gispert"><first>Adrià</first><last>de Gispert</last></author>
       <author><first>Gonzalo</first><last>Iglesias</last></author>
-      <author><first>Michael</first><last>Riley</last></author>
+      <author id="michael-riley"><first>Michael</first><last>Riley</last></author>
       <pages>687-723</pages>
       <doi>10.1162/COLI_a_00197</doi>
       <url hash="ba39f6cd">J14-3008</url>
@@ -318,7 +318,7 @@
     </paper>
     <paper id="9">
       <title><fixed-case>O</fixed-case>bituary: <fixed-case>C</fixed-case>harles <fixed-case>J</fixed-case>. <fixed-case>F</fixed-case>illmore</title>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <pages>725-731</pages>
       <doi>10.1162/COLI_a_00201</doi>
       <url hash="d0847edb">J14-3009</url>
@@ -348,7 +348,7 @@
     </paper>
     <paper id="2">
       <title>Applications of Lexicographic Semirings to Problems in Speech and Language Processing</title>
-      <author><first>Richard</first><last>Sproat</last></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last></author>
       <author><first>Mahsa</first><last>Yarmohammadi</last></author>
       <author><first>Izhak</first><last>Shafran</last></author>
       <author><first>Brian</first><last>Roark</last></author>
@@ -359,8 +359,8 @@
     </paper>
     <paper id="3">
       <title>Stochastic Language Generation in Dialogue using Factored Language Models</title>
-      <author><first>François</first><last>Mairesse</last></author>
-      <author><first>Steve</first><last>Young</last></author>
+      <author id="francois-mairesse"><first>François</first><last>Mairesse</last></author>
+      <author id="steve-young"><first>Steve</first><last>Young</last></author>
       <pages>763-799</pages>
       <doi>10.1162/COLI_a_00199</doi>
       <url hash="e48e5910">J14-4003</url>
@@ -368,9 +368,9 @@
     </paper>
     <paper id="4">
       <title>Latent Trees for Coreference Resolution</title>
-      <author><first>Eraldo Rezende</first><last>Fernandes</last></author>
-      <author><first>Cícero Nogueira</first><last>dos Santos</last></author>
-      <author><first>Ruy Luiz</first><last>Milidiú</last></author>
+      <author id="eraldo-fernandes"><first>Eraldo Rezende</first><last>Fernandes</last></author>
+      <author id="cicero-dos-santos"><first>Cícero Nogueira</first><last>dos Santos</last></author>
+      <author id="ruy-luiz-milidiu"><first>Ruy Luiz</first><last>Milidiú</last></author>
       <pages>801-835</pages>
       <doi>10.1162/COLI_a_00200</doi>
       <url hash="2cf49600">J14-4004</url>
@@ -387,7 +387,7 @@
     </paper>
     <paper id="6">
       <title>Adaptive Generation in Dialogue Systems Using Dynamic User Modeling</title>
-      <author><first>Srinivasan</first><last>Janarthanam</last></author>
+      <author id="srinivasan-janarthanam"><first>Srinivasan</first><last>Janarthanam</last></author>
       <author><first>Oliver</first><last>Lemon</last></author>
       <pages>883-920</pages>
       <doi>10.1162/COLI_a_00203</doi>
@@ -397,8 +397,8 @@
     <paper id="7">
       <title>Reflections on the <fixed-case>P</fixed-case>enn <fixed-case>D</fixed-case>iscourse <fixed-case>T</fixed-case>ree<fixed-case>B</fixed-case>ank, Comparable Corpora, and Complementary Annotation</title>
       <author><first>Rashmi</first><last>Prasad</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
-      <author><first>Aravind</first><last>Joshi</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
       <pages>921-950</pages>
       <doi>10.1162/COLI_a_00204</doi>
       <url hash="d51b90f4">J14-4007</url>
diff --git a/data/xml/J15.xml b/data/xml/J15.xml
index 89d0168784..89e4b67997 100644
--- a/data/xml/J15.xml
+++ b/data/xml/J15.xml
@@ -17,7 +17,7 @@
     <paper id="1">
       <title>Towards Topic-to-Question Generation</title>
       <author><first>Yllias</first><last>Chali</last></author>
-      <author><first>Sadid A.</first><last>Hasan</last></author>
+      <author id="sadid-a-hasan"><first>Sadid A.</first><last>Hasan</last></author>
       <pages>1-20</pages>
       <doi>10.1162/COLI_a_00206</doi>
       <url hash="7367d5db">J15-1001</url>
@@ -46,8 +46,8 @@
     </paper>
     <paper id="4">
       <title>Concrete Models and Empirical Evaluations for the Categorical Compositional Distributional Model of Meaning</title>
-      <author><first>Edward</first><last>Grefenstette</last></author>
-      <author><first>Mehrnoosh</first><last>Sadrzadeh</last></author>
+      <author id="edward-grefenstette"><first>Edward</first><last>Grefenstette</last></author>
+      <author id="mehrnoosh-sadrzadeh"><first>Mehrnoosh</first><last>Sadrzadeh</last></author>
       <pages>71-118</pages>
       <doi>10.1162/COLI_a_00209</doi>
       <url hash="0ac3e82a">J15-1004</url>
@@ -56,7 +56,7 @@
     <paper id="5">
       <title>Automatic Adaptation of Annotations</title>
       <author><first>Wenbin</first><last>Jiang</last></author>
-      <author><first>Yajuan</first><last>Lü</last></author>
+      <author id="yajuan-lu"><first>Yajuan</first><last>Lü</last></author>
       <author><first>Liang</first><last>Huang</last></author>
       <author><first>Qun</first><last>Liu</last></author>
       <pages>119-147</pages>
@@ -74,7 +74,7 @@
     </paper>
     <paper id="7">
       <title>Book Reviews: Linguistic Fundamentals for Natural Language Processing: 100 Essentials from Morphology and Syntax by Emily <fixed-case>M</fixed-case>. Bender</title>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <pages>153-155</pages>
       <doi>10.1162/COLI_a_00212</doi>
       <url hash="4ca72cfb">J15-1007</url>
@@ -82,7 +82,7 @@
     </paper>
     <paper id="8">
       <title>Book Reviews: Recognizing Textual Entailment: Models and Applications by <fixed-case>I</fixed-case>do <fixed-case>D</fixed-case>agan, <fixed-case>D</fixed-case>an <fixed-case>R</fixed-case>oth, Mark Sammons and Fabio Massimo Zanzotto</title>
-      <author><first>Bernardo</first><last>Magnini</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
       <pages>157-159</pages>
       <doi>10.1162/COLI_a_00213</doi>
       <url hash="89611e31">J15-1008</url>
@@ -98,9 +98,9 @@
     </paper>
     <paper id="10">
       <title><fixed-case>S</fixed-case>quibs: When the Whole Is Not Greater Than the Combination of Its Parts: A “Decompositional” Look at Compositional Distributional Semantics</title>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last></author>
       <author><first>Lorenzo</first><last>Ferrone</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <pages>165-173</pages>
       <doi>10.1162/COLI_a_00215</doi>
       <url hash="8cf1180a">J15-1010</url>
@@ -109,8 +109,8 @@
     <paper id="11">
       <title><fixed-case>S</fixed-case>quibs: Spelling Error Patterns in <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese</title>
       <author><first>Priscila A.</first><last>Gimenes</last></author>
-      <author><first>Norton T.</first><last>Roman</last></author>
-      <author><first>Ariadne M. B. R.</first><last>Carvalho</last></author>
+      <author id="norton-trevisan-roman"><first>Norton T.</first><last>Roman</last></author>
+      <author id="ariadne-m-b-rizzoni-carvalho"><first>Ariadne M. B. R.</first><last>Carvalho</last></author>
       <pages>175-183</pages>
       <doi>10.1162/COLI_a_00216</doi>
       <url hash="8507ea07">J15-1011</url>
@@ -135,9 +135,9 @@
       <title>The Operation Sequence <fixed-case>M</fixed-case>odel—<fixed-case>C</fixed-case>ombining N-Gram-Based and Phrase-Based Statistical Machine Translation</title>
       <author><first>Nadir</first><last>Durrani</last></author>
       <author><first>Helmut</first><last>Schmid</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <author><first>Philipp</first><last>Koehn</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>157–186</pages>
       <doi>10.1162/COLI_a_00218</doi>
       <url hash="51d6d1fb">J15-2001</url>
@@ -186,7 +186,7 @@
     </paper>
     <paper id="6">
       <title>Book Reviews: Ontology-Based Interpretation of Natural Language by Philipp Cimiano, Christina Unger and John <fixed-case>M</fixed-case>c<fixed-case>C</fixed-case>rae</title>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>319-322</pages>
       <doi>10.1162/COLI_r_00223</doi>
       <url hash="7d79cc6d">J15-2006</url>
@@ -194,7 +194,7 @@
     </paper>
     <paper id="7">
       <title>Book Reviews: Robots that Talk and Listen edited by Judith <fixed-case>A</fixed-case>. Markowitz</title>
-      <author><first>Martha</first><last>Evens</last></author>
+      <author id="martha-evens"><first>Martha</first><last>Evens</last></author>
       <pages>323-326</pages>
       <doi>10.1162/COLI_r_00224</doi>
       <url hash="4287f44c">J15-2007</url>
@@ -218,7 +218,7 @@
     <paper id="1">
       <title>Large Linguistic Corpus Reduction with <fixed-case>SCP</fixed-case> Algorithms</title>
       <author><first>Nelly</first><last>Barbot</last></author>
-      <author><first>Olivier</first><last>Boëffard</last></author>
+      <author id="olivier-boeffard"><first>Olivier</first><last>Boëffard</last></author>
       <author><first>Jonathan</first><last>Chevelu</last></author>
       <author><first>Arnaud</first><last>Delhay</last></author>
       <pages>355-383</pages>
@@ -228,9 +228,9 @@
     </paper>
     <paper id="2">
       <title><fixed-case>CODRA</fixed-case>: A Novel Discriminative Framework for Rhetorical Analysis</title>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <author><first>Giuseppe</first><last>Carenini</last></author>
-      <author><first>Raymond T.</first><last>Ng</last></author>
+      <author id="raymond-ng"><first>Raymond T.</first><last>Ng</last></author>
       <pages>385-435</pages>
       <doi>10.1162/COLI_a_00226</doi>
       <url hash="c6489a98">J15-3002</url>
@@ -248,7 +248,7 @@
     </paper>
     <paper id="4">
       <title>Computational Constancy Measures of <fixed-case>T</fixed-case>exts—<fixed-case>Y</fixed-case>ule’s K and Rényi’s Entropy</title>
-      <author><first>Kumiko</first><last>Tanaka-Ishii</last></author>
+      <author id="kumiko-tanaka-ishii"><first>Kumiko</first><last>Tanaka-Ishii</last></author>
       <author><first>Shunsuke</first><last>Aihara</last></author>
       <pages>481-502</pages>
       <doi>10.1162/COLI_a_00228</doi>
@@ -291,7 +291,7 @@
     </frontmatter>
     <paper id="1">
       <title>Graph-Based Word Alignment for Clinical Language Evaluation</title>
-      <author><first>Emily</first><last>Prud’hommeaux</last></author>
+      <author id="emily-prudhommeaux"><first>Emily</first><last>Prud’hommeaux</last></author>
       <author><first>Brian</first><last>Roark</last></author>
       <pages>549-578</pages>
       <doi>10.1162/COLI_a_00232</doi>
@@ -335,7 +335,7 @@
     </paper>
     <paper id="6">
       <title>Last Words: Computational Linguistics and Deep Learning</title>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>701-707</pages>
       <doi>doi:10.1162/COLI_a_00239</doi>
       <url hash="d61f55a4">J15-4006</url>
@@ -351,7 +351,7 @@
     </paper>
     <paper id="8">
       <title><fixed-case>O</fixed-case>bituaries: <fixed-case>A</fixed-case>dam Kilgarriff</title>
-      <author><first>Roger</first><last>Evans</last></author>
+      <author id="roger-evans"><first>Roger</first><last>Evans</last></author>
       <pages>719-721</pages>
       <doi>10.1162/COLI_a_00234</doi>
       <url hash="bcd60092">J15-4008</url>
@@ -359,9 +359,9 @@
     </paper>
     <paper id="9">
       <title><fixed-case>O</fixed-case>bituaries: Jane <fixed-case>J</fixed-case>. Robinson</title>
-      <author><first>Barbara J.</first><last>Grosz</last></author>
-      <author><first>Eva</first><last>Hajicova</last></author>
-      <author><first>Aravind</first><last>Joshi</last></author>
+      <author id="barbara-j-grosz"><first>Barbara J.</first><last>Grosz</last></author>
+      <author id="eva-hajicova"><first>Eva</first><last>Hajicova</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
       <pages>723-726</pages>
       <doi>10.1162/COLI_a_00235</doi>
       <url hash="4981cd8b">J15-4009</url>
diff --git a/data/xml/J16.xml b/data/xml/J16.xml
index 8e57bca125..2416fd58bb 100644
--- a/data/xml/J16.xml
+++ b/data/xml/J16.xml
@@ -47,7 +47,7 @@
     </paper>
     <paper id="4">
       <title>Online Learning for Statistical Machine Translation</title>
-      <author><first>Daniel</first><last>Ortiz-Martínez</last></author>
+      <author id="daniel-ortiz-martinez"><first>Daniel</first><last>Ortiz-Martínez</last></author>
       <pages>121-161</pages>
       <doi>10.1162/COLI_a_00244</doi>
       <url hash="0af77e51">J16-1004</url>
@@ -88,7 +88,7 @@
     </paper>
     <paper id="3">
       <title>Word Sense Clustering and Clusterability</title>
-      <author><first>Diana</first><last>McCarthy</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
       <author><first>Marianna</first><last>Apidianaki</last></author>
       <author><first>Katrin</first><last>Erk</last></author>
       <pages>245-275</pages>
@@ -99,7 +99,7 @@
     <paper id="4">
       <title>Source Language Adaptation Approaches for Resource-Poor Machine Translation</title>
       <author><first>Pidong</first><last>Wang</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Hwee Tou</first><last>Ng</last></author>
       <pages>277-306</pages>
       <doi>10.1162/COLI_a_00248</doi>
@@ -109,9 +109,9 @@
     <paper id="5">
       <title>Mining Parallel Corpora from <fixed-case>S</fixed-case>ina <fixed-case>W</fixed-case>eibo and <fixed-case>T</fixed-case>witter</title>
       <author><first>Wang</first><last>Ling</last></author>
-      <author><first>Luís</first><last>Marujo</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Alan W.</first><last>Black</last></author>
+      <author id="luis-marujo"><first>Luís</first><last>Marujo</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="alan-w-black"><first>Alan W.</first><last>Black</last></author>
       <author><first>Isabel</first><last>Trancoso</last></author>
       <pages>307-343</pages>
       <doi>10.1162/COLI_a_00249</doi>
@@ -121,7 +121,7 @@
     <paper id="6">
       <title><fixed-case>S</fixed-case>quibs: When the Whole Is Less Than the Sum of Its Parts: How Composition Affects <fixed-case>PMI</fixed-case> Values in Distributional Semantic Vectors</title>
       <author><first>Denis</first><last>Paperno</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <pages>345-350</pages>
       <doi>10.1162/COLI_a_00250</doi>
       <url hash="432ba338">J16-2006</url>
@@ -129,10 +129,10 @@
     </paper>
     <paper id="7">
       <title><fixed-case>O</fixed-case>bituary: In Memoriam: Susan Armstrong</title>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Paola</first><last>Merlo</last></author>
-      <author><first>Gertjan</first><last>van Noord</last></author>
-      <author><first>Mike</first><last>Rosner</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
+      <author id="michael-rosner"><first>Mike</first><last>Rosner</last></author>
       <pages>351-352</pages>
       <doi>10.1162/COLI_a_00251</doi>
       <url hash="dd3ef5f7">J16-2007</url>
@@ -157,7 +157,7 @@
       <title>Transition-Based Parsing for Deep Dependency Structures</title>
       <author><first>Xun</first><last>Zhang</last></author>
       <author><first>Yantao</first><last>Du</last></author>
-      <author><first>Weiwei</first><last>Sun</last></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last></author>
       <author><first>Xiaojun</first><last>Wan</last></author>
       <pages>353–389</pages>
       <doi>10.1162/COLI_a_00252</doi>
@@ -166,7 +166,7 @@
     </paper>
     <paper id="2">
       <title>Towards Accurate and Efficient <fixed-case>C</fixed-case>hinese Part-of-Speech Tagging</title>
-      <author><first>Weiwei</first><last>Sun</last></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last></author>
       <author><first>Xiaojun</first><last>Wan</last></author>
       <pages>391–419</pages>
       <doi>10.1162/COLI_a_00253</doi>
@@ -175,7 +175,7 @@
     </paper>
     <paper id="3">
       <title>Parsing Linear Context-Free Rewriting Systems with Fast Matrix Multiplication</title>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <author><first>Daniel</first><last>Gildea</last></author>
       <pages>421–455</pages>
       <doi>10.1162/COLI_a_00254</doi>
@@ -184,8 +184,8 @@
     </paper>
     <paper id="4">
       <title>All Mixed Up? Finding the Optimal Feature Set for General Readability Prediction and Its Application to <fixed-case>E</fixed-case>nglish and <fixed-case>D</fixed-case>utch</title>
-      <author><first>Orphée</first><last>De Clercq</last></author>
-      <author><first>Véronique</first><last>Hoste</last></author>
+      <author id="orphee-de-clercq"><first>Orphée</first><last>De Clercq</last></author>
+      <author id="veronique-hoste"><first>Véronique</first><last>Hoste</last></author>
       <pages>457–490</pages>
       <doi>10.1162/COLI_a_00255</doi>
       <url hash="8197648a">J16-3004</url>
@@ -213,7 +213,7 @@
       <title>Computational Sociolinguistics: A <fixed-case>S</fixed-case>urvey</title>
       <author><first>Dong</first><last>Nguyen</last></author>
       <author><first>A. Seza</first><last>Doğruöz</last></author>
-      <author><first>Carolyn P.</first><last>Rosé</last></author>
+      <author id="carolyn-rose"><first>Carolyn P.</first><last>Rosé</last></author>
       <author><first>Franciska</first><last>de Jong</last></author>
       <pages>537–593</pages>
       <doi>10.1162/COLI_a_00258</doi>
@@ -255,8 +255,8 @@
     </paper>
     <paper id="2">
       <title>Formal Distributional Semantics: Introduction to the Special Issue</title>
-      <author><first>Gemma</first><last>Boleda</last></author>
-      <author><first>Aurélie</first><last>Herbelot</last></author>
+      <author id="gemma-boleda"><first>Gemma</first><last>Boleda</last></author>
+      <author id="aurelie-herbelot"><first>Aurélie</first><last>Herbelot</last></author>
       <pages>619–635</pages>
       <doi>10.1162/COLI_a_00261</doi>
       <url hash="1f0612c3">J16-4002</url>
@@ -264,10 +264,10 @@
     </paper>
     <paper id="3">
       <title>There Is No Logical Negation Here, But There Are Alternatives: Modeling Conversational Negation with Distributional Semantics</title>
-      <author><first>Germán</first><last>Kruszewski</last></author>
+      <author id="german-kruszewski"><first>Germán</first><last>Kruszewski</last></author>
       <author><first>Denis</first><last>Paperno</last></author>
-      <author><first>Raffaella</first><last>Bernardi</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <pages>637–660</pages>
       <doi>10.1162/COLI_a_00262</doi>
       <url hash="f7fb7bd7">J16-4003</url>
@@ -286,8 +286,8 @@
     </paper>
     <paper id="5">
       <title>Integrating Type Theory and Distributional Semantics: A Case Study on Adjective–Noun Compositions</title>
-      <author><first>Nicholas</first><last>Asher</last></author>
-      <author><first>Tim</first><last>Van de Cruys</last></author>
+      <author id="nicholas-asher"><first>Nicholas</first><last>Asher</last></author>
+      <author id="tim-van-de-cruys"><first>Tim</first><last>Van de Cruys</last></author>
       <author><first>Antoine</first><last>Bride</last></author>
       <author><first>Márta</first><last>Abrusán</last></author>
       <pages>703–725</pages>
@@ -297,7 +297,7 @@
     </paper>
     <paper id="6">
       <title>Aligning Packed Dependency Trees: A Theory of Composition for Distributional Semantics</title>
-      <author><first>David</first><last>Weir</last></author>
+      <author id="david-weir"><first>David</first><last>Weir</last></author>
       <author><first>Julie</first><last>Weeds</last></author>
       <author><first>Jeremy</first><last>Reffin</last></author>
       <author><first>Thomas</first><last>Kober</last></author>
@@ -312,7 +312,7 @@
       <author><first>Stephen</first><last>Roller</last></author>
       <author><first>Pengxiang</first><last>Cheng</last></author>
       <author><first>Katrin</first><last>Erk</last></author>
-      <author><first>Raymond J.</first><last>Mooney</last></author>
+      <author id="raymond-mooney"><first>Raymond J.</first><last>Mooney</last></author>
       <pages>763–808</pages>
       <doi>10.1162/COLI_a_00266</doi>
       <url hash="9db5d69c">J16-4007</url>
@@ -337,7 +337,7 @@
     </paper>
     <paper id="10">
       <title>Book Reviews: Semantic Similarity from Natural Language and Ontology Analysis by Sébastien Harispe, Sylvie Ranwez, Stefan Janaqi, and Jacky Montmain</title>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <pages>829–831</pages>
       <doi>10.1162/COLI_r_00269</doi>
       <url hash="024a9cb1">J16-4010</url>
diff --git a/data/xml/J17.xml b/data/xml/J17.xml
index b9e595c4bf..82cb9e36d4 100644
--- a/data/xml/J17.xml
+++ b/data/xml/J17.xml
@@ -38,9 +38,9 @@
       <title>Multilingual Metaphor Processing: Experiments with Semi-Supervised and Unsupervised Learning</title>
       <author><first>Ekaterina</first><last>Shutova</last></author>
       <author><first>Lin</first><last>Sun</last></author>
-      <author><first>Elkin</first><last>Darío Gutiérrez</last></author>
+      <author id="e-dario-gutierrez"><first>Elkin</first><last>Darío Gutiérrez</last></author>
       <author><first>Patricia</first><last>Lichtenstein</last></author>
-      <author><first>Srini</first><last>Narayanan</last></author>
+      <author id="srini-narayanan"><first>Srini</first><last>Narayanan</last></author>
       <abstract>Highly frequent in language and communication, metaphor represents a significant challenge for Natural Language Processing (NLP) applications. Computational work on metaphor has traditionally evolved around the use of hand-coded knowledge, making the systems hard to scale. Recent years have witnessed a rise in statistical approaches to metaphor processing. However, these approaches often require extensive human annotation effort and are predominantly evaluated within a limited domain. In contrast, we experiment with weakly supervised and unsupervised techniques—with little or no annotation—to generalize higher-level mechanisms of metaphor from distributional properties of concepts. We investigate different levels and types of supervision (learning from linguistic examples vs. learning from a given set of metaphorical mappings vs. learning without annotation) in flat and hierarchical, unconstrained and constrained clustering settings. Our aim is to identify the optimal type of supervision for a learning algorithm that discovers patterns of metaphorical association from text. In order to investigate the scalability and adaptability of our models, we applied them to data in three languages from different language groups—English, Spanish, and Russian—achieving state-of-the-art results with little supervision. Finally, we demonstrate that statistical methods can facilitate and scale up cross-linguistic research on metaphor.</abstract>
       <pages>71-123</pages>
       <doi>10.1162/COLI_a_00275</doi>
@@ -60,7 +60,7 @@
     <paper id="5">
       <title>Hashtag Sense Clustering Based on Temporal Similarity</title>
       <author><first>Giovanni</first><last>Stilo</last></author>
-      <author><first>Paola</first><last>Velardi</last></author>
+      <author id="paola-velardi"><first>Paola</first><last>Velardi</last></author>
       <abstract>Hashtags are creative labels used in micro-blogs to characterize the topic of a message/discussion. Regardless of the use for which they were originally intended, hashtags cannot be used as a means to cluster messages with similar content. First, because hashtags are created in a spontaneous and highly dynamic way by users in multiple languages, the same topic can be associated with different hashtags, and conversely, the same hashtag may refer to different topics in different time periods. Second, contrary to common words, hashtag disambiguation is complicated by the fact that no sense catalogs (e.g., Wikipedia or WordNet) are available; and, furthermore, hashtag labels are difficult to analyze, as they often consist of acronyms, concatenated words, and so forth. A common way to determine the meaning of hashtags has been to analyze their context, but, as we have just pointed out, hashtags can have multiple and variable meanings. In this article, we propose a temporal sense clustering algorithm based on the idea that semantically related hashtags have similar and synchronous usage patterns.</abstract>
       <pages>181-200</pages>
       <doi>10.1162/COLI_a_00277</doi>
@@ -69,9 +69,9 @@
     </paper>
     <paper id="6">
       <title>Evaluative Language Beyond Bags of Words: Linguistic Insights and Computational Applications</title>
-      <author><first>Farah</first><last>Benamara</last></author>
-      <author><first>Maite</first><last>Taboada</last></author>
-      <author><first>Yannick</first><last>Mathieu</last></author>
+      <author id="farah-benamara"><first>Farah</first><last>Benamara</last></author>
+      <author id="maite-taboada"><first>Maite</first><last>Taboada</last></author>
+      <author id="yannick-mathieu"><first>Yannick</first><last>Mathieu</last></author>
       <abstract>The study of evaluation, affect, and subjectivity is a multidisciplinary enterprise, including sociology, psychology, economics, linguistics, and computer science. A number of excellent computational linguistics and linguistic surveys of the field exist. Most surveys, however, do not bring the two disciplines together to show how methods from linguistics can benefit computational sentiment analysis systems. In this survey, we show how incorporating linguistic insights, discourse information, and other contextual phenomena, in combination with the statistical exploitation of data, can result in an improvement over approaches that take advantage of only one of these perspectives. We first provide a comprehensive introduction to evaluative language from both a linguistic and computational perspective. We then argue that the standard computational definition of the concept of evaluative language neglects the dynamic nature of evaluation, in which the interpretation of a given evaluation depends on linguistic and extra-linguistic contextual factors. We thus propose a dynamic definition that incorporates update functions. The update functions allow for different contextual aspects to be incorporated into the calculation of sentiment for evaluative words or expressions, and can be applied at all levels of discourse. We explore each level and highlight which linguistic aspects contribute to accurate extraction of sentiment. We end the review by outlining what we believe the future directions of sentiment analysis are, and the role that discourse and contextual information need to play.</abstract>
       <pages>201-264</pages>
       <doi>10.1162/COLI_a_00278</doi>
@@ -88,7 +88,7 @@
     </paper>
     <paper id="8">
       <title>Book Review: Automatic Detection of Verbal Deception by Eileen Fitzpatrick, Joan Bachenko and Tommaso Fornaciari</title>
-      <author><first>Yoong</first><last>Keok Lee</last></author>
+      <author id="yoong-keok-lee"><first>Yoong</first><last>Keok Lee</last></author>
       <pages>269-271</pages>
       <doi>10.1162/COLI_r_00282</doi>
       <url hash="9e272402">J17-1008</url>
@@ -122,9 +122,9 @@
     <paper id="2">
       <title>Greedy Transition-Based Dependency Parsing with Stack <fixed-case>LSTM</fixed-case>s</title>
       <author><first>Miguel</first><last>Ballesteros</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <author><first>Yoav</first><last>Goldberg</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <abstract>We introduce a greedy transition-based parser that learns to represent parser states using recurrent neural networks. Our primary innovation that enables us to do this efficiently is a new control structure for sequential neural networks—the stack long short-term memory unit (LSTM). Like the conventional stack data structures used in transition-based parsers, elements can be pushed to or popped from the top of the stack in constant time, but, in addition, an LSTM maintains a continuous space embedding of the stack contents. Our model captures three facets of the parser’s state: (i) unbounded look-ahead into the buffer of incoming words, (ii) the complete history of transition actions taken by the parser, and (iii) the complete contents of the stack of partially built tree fragments, including their internal structures. In addition, we compare two different word representations: (i) standard word vectors based on look-up tables and (ii) character-based models of words. Although standard word embedding models work well in all languages, the character-based models improve the handling of out-of-vocabulary words, particularly in morphologically rich languages. Finally, we discuss the use of dynamic oracles in training the parser. During training, dynamic oracles alternate between sampling parser states from the training data and from the model as it is being learned, making the model more robust to the kinds of errors that will be made at test time. Training our model with dynamic oracles yields a linear-time greedy parser with very competitive performance.</abstract>
       <pages>311–347</pages>
       <doi>10.1162/COLI_a_00285</doi>
@@ -135,8 +135,8 @@
       <title>Statistical Models for Unsupervised, Semi-Supervised Supervised Transliteration Mining</title>
       <author><first>Hassan</first><last>Sajjad</last></author>
       <author><first>Helmut</first><last>Schmid</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <abstract>We present a generative model that efficiently mines transliteration pairs in a consistent fashion in three different settings: unsupervised, semi-supervised, and supervised transliteration mining. The model interpolates two sub-models, one for the generation of transliteration pairs and one for the generation of non-transliteration pairs (i.e., noise). The model is trained on noisy unlabeled data using the EM algorithm. During training the transliteration sub-model learns to generate transliteration pairs and the fixed non-transliteration model generates the noise pairs. After training, the unlabeled data is disambiguated based on the posterior probabilities of the two sub-models. We evaluate our transliteration mining system on data from a transliteration mining shared task and on parallel corpora. For three out of four language pairs, our system outperforms all semi-supervised and supervised systems that participated in the NEWS 2010 shared task. On word pairs extracted from parallel corpora with fewer than 2% transliteration pairs, our system achieves up to 86.7% F-measure with 77.9% precision and 97.8% recall.</abstract>
       <pages>349–375</pages>
       <doi>10.1162/COLI_a_00286</doi>
@@ -159,7 +159,7 @@
     </paper>
     <paper id="5">
       <title>Framing <fixed-case>QA</fixed-case> as Building and Ranking Intersentence Answer Justifications</title>
-      <author><first>Peter</first><last>Jansen</last></author>
+      <author id="peter-jansen"><first>Peter</first><last>Jansen</last></author>
       <author><first>Rebecca</first><last>Sharp</last></author>
       <author><first>Mihai</first><last>Surdeanu</last></author>
       <author><first>Peter</first><last>Clark</last></author>
@@ -171,7 +171,7 @@
     </paper>
     <paper id="6">
       <title><fixed-case>S</fixed-case>quib: Effects of Cognitive Effort on the Resolution of Overspecified Descriptions</title>
-      <author><first>Ivandré</first><last>Paraboni</last></author>
+      <author id="ivandre-paraboni"><first>Ivandré</first><last>Paraboni</last></author>
       <author><first>Alex Gwo Jen</first><last>Lan</last></author>
       <author><first>Matheus Mendes</first><last>de Sant’Ana</last></author>
       <author><first>Flávio Luiz</first><last>Coutinho</last></author>
@@ -238,7 +238,7 @@
     <paper id="4">
       <title><fixed-case>A</fixed-case>uto<fixed-case>E</fixed-case>xtend: Combining Word Embeddings with Semantic Resources</title>
       <author><first>Sascha</first><last>Rothe</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <abstract>We present AutoExtend, a system that combines word embeddings with semantic resources by learning embeddings for non-word objects like synsets and entities and learning word embeddings that incorporate the semantic information from the resource. The method is based on encoding and decoding the word embeddings and is flexible in that it can take any word embeddings as input and does not need an additional training corpus. The obtained embeddings live in the same vector space as the input word embeddings. A sparse tensor formalization guarantees efficiency and parallelizability. We use WordNet, GermaNet, and Freebase as semantic resources. AutoExtend achieves state-of-the-art performance on Word-in-Context Similarity and Word Sense Disambiguation tasks.</abstract>
       <pages>593–617</pages>
       <doi>10.1162/COLI_a_00294</doi>
@@ -281,10 +281,10 @@
     </frontmatter>
     <paper id="1">
       <title>Discourse Structure in Machine Translation Evaluation</title>
-      <author><first>Shafiq</first><last>Joty</last></author>
-      <author><first>Francisco</first><last>Guzmán</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzmán</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <abstract>In this article, we explore the potential of using sentence-level discourse structure for machine translation evaluation. We first design discourse-aware similarity measures, which use all-subtree kernels to compare discourse parse trees in accordance with the Rhetorical Structure Theory (RST). Then, we show that a simple linear combination with these measures can help improve various existing machine translation evaluation metrics regarding correlation with human judgments both at the segment level and at the system level. This suggests that discourse information is complementary to the information used by many of the existing evaluation metrics, and thus it could be taken into account when developing richer evaluation metrics, such as the WMT-14 winning combined metric DiscoTKparty. We also provide a detailed analysis of the relevance of various discourse elements and relations from the RST parse trees for machine translation evaluation. In particular, we show that (i) all aspects of the RST tree are relevant, (ii) nuclearity is more useful than relation type, and (iii) the similarity of the translation RST tree to the reference RST tree is positively correlated with translation quality.</abstract>
       <pages>683–722</pages>
       <doi>10.1162/COLI_a_00298</doi>
@@ -305,7 +305,7 @@
     <paper id="3">
       <title>Representation of Linguistic Form and Function in Recurrent Neural Networks</title>
       <author><first>Ákos</first><last>Kádár</last></author>
-      <author><first>Grzegorz</first><last>Chrupała</last></author>
+      <author id="grzegorz-chrupala"><first>Grzegorz</first><last>Chrupała</last></author>
       <author><first>Afra</first><last>Alishahi</last></author>
       <abstract>We present novel methods for analyzing the activation patterns of recurrent neural networks from a linguistic point of view and explore the types of linguistic structure they learn. As a case study, we use a standard standalone language model, and a multi-task gated recurrent network architecture consisting of two parallel pathways with shared word embeddings: The Visual pathway is trained on predicting the representations of the visual scene corresponding to an input sentence, and the Textual pathway is trained to predict the next word in the same sentence. We propose a method for estimating the amount of contribution of individual tokens in the input to the final prediction of the networks. Using this method, we show that the Visual pathway pays selective attention to lexical categories and grammatical functions that carry semantic information, and learns to treat word types differently depending on their grammatical function and their position in the sequential structure of the sentence. In contrast, the language models are comparatively more sensitive to words with a syntactic function. Further analysis of the most informative n-gram contexts for each model shows that in comparison with the Visual pathway, the language models react more strongly to abstract contexts that represent syntactic constructions.</abstract>
       <pages>761–780</pages>
@@ -328,13 +328,13 @@
     </paper>
     <paper id="5">
       <title><fixed-case>S</fixed-case>urvey: Multiword Expression Processing: A <fixed-case>S</fixed-case>urvey</title>
-      <author><first>Mathieu</first><last>Constant</last></author>
-      <author><first>Gülşen</first><last>Eryiǧit</last></author>
+      <author id="matthieu-constant"><first>Mathieu</first><last>Constant</last></author>
+      <author id="gulsen-eryigit"><first>Gülşen</first><last>Eryiǧit</last></author>
       <author><first>Johanna</first><last>Monti</last></author>
-      <author><first>Lonneke</first><last>van der Plas</last></author>
+      <author id="lonneke-van-der-plas"><first>Lonneke</first><last>van der Plas</last></author>
       <author><first>Carlos</first><last>Ramisch</last></author>
-      <author><first>Michael</first><last>Rosner</last></author>
-      <author><first>Amalia</first><last>Todirascu</last></author>
+      <author id="michael-rosner"><first>Michael</first><last>Rosner</last></author>
+      <author id="amalia-todirascu"><first>Amalia</first><last>Todirascu</last></author>
       <abstract>Multiword expressions (MWEs) are a class of linguistic forms spanning conventional word boundaries that are both idiosyncratic and pervasive across different languages. The structure of linguistic processing that depends on the clear distinction between words and phrases has to be re-thought to accommodate MWEs. The issue of MWE handling is crucial for NLP applications, where it raises a number of challenges. The emergence of solutions in the absence of guiding principles motivates this survey, whose aim is not only to provide a focused review of MWE processing, but also to clarify the nature of interactions between MWE processing and downstream applications. We propose a conceptual framework within which challenges and research contributions can be positioned. It offers a shared understanding of what is meant by “MWE processing,” distinguishing the subtasks of MWE discovery and identification. It also elucidates the interactions between MWE processing and two use cases: Parsing and machine translation. Many of the approaches in the literature can be differentiated according to how MWE processing is timed with respect to underlying use cases. We discuss how such orchestration choices affect the scope of MWE-aware systems. For each of the two MWE processing subtasks and for each of the two use cases, we conclude on open issues and research perspectives.</abstract>
       <pages>837–892</pages>
       <doi>10.1162/COLI_a_00302</doi>
@@ -356,7 +356,7 @@
       <author><first>Kilian</first><last>Evang</last></author>
       <author><first>Rob</first><last>van der Goot</last></author>
       <author><first>Hessel</first><last>Haagsma</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <author><first>Martijn</first><last>Wieling</last></author>
       <pages>897–904</pages>
       <doi>10.1162/COLI_a_00304</doi>
diff --git a/data/xml/J18.xml b/data/xml/J18.xml
index 4c7fc1df5c..f458bc5d5d 100644
--- a/data/xml/J18.xml
+++ b/data/xml/J18.xml
@@ -16,7 +16,7 @@
     </frontmatter>
     <paper id="1">
       <title>Smart Enough to Talk With Us? Foundations and Challenges for Dialogue Capable <fixed-case>AI</fixed-case> Systems</title>
-      <author><first>Barbara J.</first><last>Grosz</last></author>
+      <author id="barbara-j-grosz"><first>Barbara J.</first><last>Grosz</last></author>
       <pages>1–15</pages>
       <doi>10.1162/COLI_a_00313</doi>
       <url hash="40506484">J18-1001</url>
@@ -24,8 +24,8 @@
     </paper>
     <paper id="2">
       <title>On the Derivational Entropy of Left-to-Right Probabilistic Finite-State Automata and Hidden <fixed-case>M</fixed-case>arkov Models</title>
-      <author><first>Joan Andreu</first><last>Sánchez</last></author>
-      <author><first>Martha Alicia</first><last>Rocha</last></author>
+      <author id="joan-andreu-sanchez"><first>Joan Andreu</first><last>Sánchez</last></author>
+      <author id="martha-alicia-rocha"><first>Martha Alicia</first><last>Rocha</last></author>
       <author><first>Verónica</first><last>Romero</last></author>
       <author><first>Mauricio</first><last>Villegas</last></author>
       <abstract>Probabilistic finite-state automata are a formalism that is widely used in many problems of automatic speech recognition and natural language processing. Probabilistic finite-state automata are closely related to other finite-state models as weighted finite-state automata, word lattices, and hidden Markov models. Therefore, they share many similar properties and problems. Entropy measures of finite-state models have been investigated in the past in order to study the information capacity of these models. The derivational entropy quantifies the uncertainty that the model has about the probability distribution it represents. The derivational entropy in a finite-state automaton is computed from the probability that is accumulated in all of its individual state sequences. The computation of the entropy from a weighted finite-state automaton requires a normalized model. This article studies an efficient computation of the derivational entropy of left-to-right probabilistic finite-state automata, and it introduces an efficient algorithm for normalizing weighted finite-state automata. The efficient computation of the derivational entropy is also extended to continuous hidden Markov models.</abstract>
@@ -38,7 +38,7 @@
       <title>A Notion of Semantic Coherence for Underspecified Semantic Representation</title>
       <author><first>Mehdi</first><last>Manshadi</last></author>
       <author><first>Daniel</first><last>Gildea</last></author>
-      <author><first>James F.</first><last>Allen</last></author>
+      <author id="james-allen"><first>James F.</first><last>Allen</last></author>
       <abstract>The general problem of finding satisfying solutions to constraint-based underspecified representations of quantifier scope is NP-complete. Existing frameworks, including Dominance Graphs, Minimal Recursion Semantics, and Hole Semantics, have struggled to balance expressivity and tractability in order to cover real natural language sentences with efficient algorithms. We address this trade-off with a general principle of coherence, which requires that every variable introduced in the domain of discourse must contribute to the overall semantics of the sentence. We show that every underspecified representation meeting this criterion can be efficiently processed, and that our set of representations subsumes all previously identified tractable sets.</abstract>
       <pages>39–83</pages>
       <doi>10.1162/COLI_a_00307</doi>
@@ -113,7 +113,7 @@
       <title>A Dependency Perspective on <fixed-case>RST</fixed-case> Discourse Parsing and Evaluation</title>
       <author><first>Mathieu</first><last>Morey</last></author>
       <author><first>Philippe</first><last>Muller</last></author>
-      <author><first>Nicholas</first><last>Asher</last></author>
+      <author id="nicholas-asher"><first>Nicholas</first><last>Asher</last></author>
       <abstract>Computational text-level discourse analysis mostly happens within Rhetorical Structure Theory (RST), whose structures have classically been presented as constituency trees, and relies on data from the RST Discourse Treebank (RST-DT); as a result, the RST discourse parsing community has largely borrowed from the syntactic constituency parsing community. The standard evaluation procedure for RST discourse parsers is thus a simplified variant of PARSEVAL, and most RST discourse parsers use techniques that originated in syntactic constituency parsing. In this article, we isolate a number of conceptual and computational problems with the constituency hypothesis. We then examine the consequences, for the implementation and evaluation of RST discourse parsers, of adopting a dependency perspective on RST structures, a view advocated so far only by a few approaches to discourse parsing. While doing that, we show the importance of the notion of headedness of RST structures. We analyze RST discourse parsing as dependency parsing by adapting to RST a recent proposal in syntactic parsing that relies on head-ordered dependency trees, a representation isomorphic to headed constituency trees. We show how to convert the original trees from the RST corpus, RST-DT, and their binarized versions used by all existing RST parsers to head-ordered dependency trees. We also propose a way to convert existing simple dependency parser output to constituent trees. This allows us to evaluate and to compare approaches from both constituent-based and dependency-based perspectives in a unified framework, using constituency and dependency metrics. We thus propose an evaluation framework to compare extant approaches easily and uniformly, something the RST parsing community has lacked up to now. We can also compare parsers’ predictions to each other across frameworks. This allows us to characterize families of parsing strategies across the different frameworks, in particular with respect to the notion of headedness. Our experiments provide evidence for the conceptual similarities between dependency parsers and shift-reduce constituency parsers, and confirm that dependency parsing constitutes a viable approach to RST discourse parsing.</abstract>
       <pages>197–235</pages>
       <doi>10.1162/COLI_a_00314</doi>
@@ -144,7 +144,7 @@
     <paper id="4">
       <title>The Influence of Context on the Learning of Metrical Stress Systems Using Finite-State Machines</title>
       <author><first>Cesko</first><last>Voeten</last></author>
-      <author><first>Menno</first><last>van Zaanen</last></author>
+      <author id="menno-van-zaanen"><first>Menno</first><last>van Zaanen</last></author>
       <abstract>Languages vary in the way stress is assigned to syllables within words. This article investigates the learnability of stress systems in a wide range of languages. The stress systems can be described using finite-state automata with symbols indicating levels of stress (primary, secondary, or no stress). Finite-state automata have been the focus of research in the area of grammatical inference for some time now. It has been shown that finite-state machines are learnable from examples using state-merging. One such approach, which aims to learn k-testable languages, has been applied to stress systems with some success. The family of k-testable languages has been shown to be efficiently learnable (in polynomial time). Here, we extend this approach to k, l-local languages by taking not only left context, but also right context, into account. We consider empirical results testing the performance of our learner using various amounts of context (corresponding to varying definitions of phonological locality). Our results show that our approach of learning stress patterns using state-merging is more reliant on left context than on right context. Additionally, some stress systems fail to be learned by our learner using either the left-context k-testable or the left-and-right-context k, l-local learning system. A more complex merging strategy, and hence grammar representation, is required for these stress systems.</abstract>
       <pages>329–348</pages>
       <doi>10.1162/COLI_a_00317</doi>
@@ -194,7 +194,7 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>O</fixed-case>bituary: Aravind <fixed-case>K</fixed-case>. Joshi</title>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <pages>387–392</pages>
       <doi>10.1162/coli_a_00321</doi>
       <url hash="d1f17060">J18-3001</url>
@@ -211,7 +211,7 @@
     </paper>
     <paper id="3">
       <title>Native Language Identification With Classifier Stacking and Ensembles</title>
-      <author><first>Shervin</first><last>Malmasi</last></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></author>
       <author><first>Mark</first><last>Dras</last></author>
       <abstract>Ensemble methods using multiple classifiers have proven to be among the most successful approaches for the task of Native Language Identification (NLI), achieving the current state of the art. However, a systematic examination of ensemble methods for NLI has yet to be conducted. Additionally, deeper ensemble architectures such as classifier stacking have not been closely evaluated. We present a set of experiments using three ensemble-based models, testing each with multiple configurations and algorithms. This includes a rigorous application of meta-classification models for NLI, achieving state-of-the-art results on several large data sets, evaluated in both intra-corpus and cross-corpus modes.</abstract>
       <pages>403–446</pages>
@@ -233,7 +233,7 @@
     <paper id="5">
       <title>Using Semantics for Granularities of Tokenization</title>
       <author><first>Martin</first><last>Riedl</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <abstract>Depending on downstream applications, it is advisable to extend the notion of tokenization from low-level character-based token boundary detection to identification of meaningful and useful language units. This entails both identifying units composed of several single words that form a several single words that form a, as well as splitting single-word compounds into their meaningful parts. In this article, we introduce unsupervised and knowledge-free methods for these two tasks. The main novelty of our research is based on the fact that methods are primarily based on distributional similarity, of which we use two flavors: a sparse count-based and a dense neural-based distributional semantic model. First, we introduce DRUID, which is a method for detecting MWEs. The evaluation on MWE-annotated data sets in two languages and newly extracted evaluation data sets for 32 languages shows that DRUID compares favorably over previous methods not utilizing distributional information. Second, we present SECOS, an algorithm for decompounding close compounds. In an evaluation of four dedicated decompounding data sets across four languages and on data sets extracted from Wiktionary for 14 languages, we demonstrate the superiority of our approach over unsupervised baselines, sometimes even matching the performance of previous language-specific and supervised methods. In a final experiment, we show how both decompounding and MWE information can be used in information retrieval. Here, we obtain the best results when combining word information with MWEs and the compound parts in a bag-of-words retrieval set-up. Overall, our methodology paves the way to automatic detection of lexical units beyond standard tokenization techniques without language-specific preprocessing steps such as POS tagging.</abstract>
       <pages>483–524</pages>
       <doi>10.1162/coli_a_00325</doi>
@@ -280,8 +280,8 @@
     </frontmatter>
     <paper id="1">
       <title>The Lost Combinator</title>
-      <author><first>Mark</first><last>Steedman</last></author>
-      <abstract/>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
+      <abstract></abstract>
       <pages>613-629</pages>
       <doi>10.1162/coli_a_00328</doi>
       <url hash="1530d085">J18-4001</url>
@@ -300,7 +300,7 @@
       <title><fixed-case>S</fixed-case>quib: Reproducibility in Computational Linguistics: Are We Willing to Share?</title>
       <author><first>Martijn</first><last>Wieling</last></author>
       <author><first>Josine</first><last>Rawee</last></author>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <abstract>This study focuses on an essential precondition for reproducibility in computational linguistics: the willingness of authors to share relevant source code and data. Ten years after Ted Pedersen’s influential “Last Words” contribution in Computational Linguistics, we investigate to what extent researchers in computational linguistics are willing and able to share their data and code. We surveyed all 395 full papers presented at the 2011 and 2016 ACL Annual Meetings, and identified whether links to data and code were provided. If working links were not provided, authors were requested to provide this information. Although data were often available, code was shared less often. When working links to code or data were not provided in the paper, authors provided the code in about one third of cases. For a selection of ten papers, we attempted to reproduce the results using the provided data and code. We were able to reproduce the results approximately for six papers. For only a single paper did we obtain the exact same results. Our findings show that even though the situation appears to have improved comparing 2016 to 2011, empiricism in computational linguistics still largely remains a matter of faith. Nevertheless, we are somewhat optimistic about the future. Ensuring reproducibility is not only important for the field as a whole, but also seems worthwhile for individual researchers: The median citation count for studies with working links to the source code is higher.</abstract>
       <pages>641–649</pages>
       <doi>10.1162/coli_a_00330</doi>
@@ -309,7 +309,7 @@
     </paper>
     <paper id="4">
       <title>Last Words: What Can Be Accomplished with the State of the Art in Information Extraction? A Personal View</title>
-      <author><first>Ralph</first><last>Weischedel</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
       <author><first>Elizabeth</first><last>Boschee</last></author>
       <abstract>Though information extraction (IE) research has more than a 25-year history, F1 scores remain low. Thus, one could question continued investment in IE research. In this article, we present three applications where information extraction of entities, relations, and/or events has been used, and note the common features that seem to have led to success. We also identify key research challenges whose solution seems essential for broader successes. Because a few practical deployments already exist and because breakthroughs on particular challenges would greatly broaden the technology’s deployment, further R and D investments are justified.</abstract>
       <pages>651–658</pages>
@@ -327,9 +327,9 @@
     </paper>
     <paper id="6">
       <title>Introduction to the Special Issue on Language in Social Media: Exploiting Discourse and Other Contextual Information</title>
-      <author><first>Farah</first><last>Benamara</last></author>
-      <author><first>Diana</first><last>Inkpen</last></author>
-      <author><first>Maite</first><last>Taboada</last></author>
+      <author id="farah-benamara"><first>Farah</first><last>Benamara</last></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last></author>
+      <author id="maite-taboada"><first>Maite</first><last>Taboada</last></author>
       <abstract>Social media content is changing the way people interact with each other and share information, personal messages, and opinions about situations, objects, and past experiences. Most social media texts are short online conversational posts or comments that do not contain enough information for natural language processing (NLP) tools, as they are often accompanied by non-linguistic contextual information, including meta-data (e.g., the user’s profile, the social network of the user, and their interactions with other users). Exploiting such different types of context and their interactions makes the automatic processing of social media texts a challenging research task. Indeed, simply applying traditional text mining tools is clearly sub-optimal, as, typically, these tools take into account neither the interactive dimension nor the particular nature of this data, which shares properties with both spoken and written language. This special issue contributes to a deeper understanding of the role of these interactions to process social media data from a new perspective in discourse interpretation. This introduction first provides the necessary background to understand what context is from both the linguistic and computational linguistic perspectives, then presents the most recent context-based approaches to NLP for social media. We conclude with an overview of the papers accepted in this special issue, highlighting what we believe are the future directions in processing social media texts.</abstract>
       <pages>663–681</pages>
       <doi>10.1162/coli_a_00333</doi>
@@ -338,7 +338,7 @@
     </paper>
     <paper id="7">
       <title>Interactional Stancetaking in Online Forums</title>
-      <author><first>Scott F.</first><last>Kiesling</last></author>
+      <author id="scott-f-kiesling"><first>Scott F.</first><last>Kiesling</last></author>
       <author><first>Umashanthi</first><last>Pavalanathan</last></author>
       <author><first>Jim</first><last>Fitzpatrick</last></author>
       <author><first>Xiaochuang</first><last>Han</last></author>
@@ -354,7 +354,7 @@
       <author><first>Jing</first><last>Li</last></author>
       <author><first>Yan</first><last>Song</last></author>
       <author><first>Zhongyu</first><last>Wei</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <abstract>Conventional topic models are ineffective for topic extraction from microblog messages, because the data sparseness exhibited in short messages lacking structure and contexts results in poor message-level word co-occurrence patterns. To address this issue, we organize microblog messages as conversation trees based on their reposting and replying relations, and propose an unsupervised model that jointly learns word distributions to represent: (1) different roles of conversational discourse, and (2) various latent topics in reflecting content information. By explicitly distinguishing the probabilities of messages with varying discourse roles in containing topical words, our model is able to discover clusters of discourse words that are indicative of topical content. In an automatic evaluation on large-scale microblog corpora, our joint model yields topics with better coherence scores than competitive topic models from previous studies. Qualitative analysis on model outputs indicates that our model induces meaningful representations for both discourse and topics. We further present an empirical study on microblog summarization based on the outputs of our joint model. The results show that the jointly modeled discourse and topic representations can effectively indicate summary-worthy content in microblog conversations.</abstract>
       <pages>719–754</pages>
       <doi>10.1162/coli_a_00335</doi>
@@ -364,7 +364,7 @@
     <paper id="9">
       <title>Sarcasm Analysis Using Conversation Context</title>
       <author><first>Debanjan</first><last>Ghosh</last></author>
-      <author><first>Alexander R.</first><last>Fabbri</last></author>
+      <author id="alexander-richard-fabbri"><first>Alexander R.</first><last>Fabbri</last></author>
       <author><first>Smaranda</first><last>Muresan</last></author>
       <abstract>Computational models for sarcasm detection have often relied on the content of utterances in isolation. However, the speaker’s sarcastic intent is not always apparent without additional context. Focusing on social media discussions, we investigate three issues: (1) does modeling conversation context help in sarcasm detection? (2) can we identify what part of conversation context triggered the sarcastic reply? and (3) given a sarcastic post that contains multiple sentences, can we identify the specific sentence that is sarcastic? To address the first issue, we investigate several types of Long Short-Term Memory (LSTM) networks that can model both the conversation context and the current turn. We show that LSTM networks with sentence-level attention on context and current turn, as well as the conditional LSTM network, outperform the LSTM model that reads only the current turn. As conversation context, we consider the prior turn, the succeeding turn, or both. Our computational models are tested on two types of social media platforms: Twitter and discussion forums. We discuss several differences between these data sets, ranging from their size to the nature of the gold-label annotations. To address the latter two issues, we present a qualitative analysis of the attention weights produced by the LSTM models (with attention) and discuss the results compared with human performance on the two tasks.</abstract>
       <pages>755–792</pages>
@@ -376,7 +376,7 @@
       <title>We Usually Don’t Like Going to the Dentist: Using Common Sense to Detect Irony on <fixed-case>T</fixed-case>witter</title>
       <author><first>Cynthia</first><last>Van Hee</last></author>
       <author><first>Els</first><last>Lefever</last></author>
-      <author><first>Véronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Véronique</first><last>Hoste</last></author>
       <abstract>Although common sense and connotative knowledge come naturally to most people, computers still struggle to perform well on tasks for which such extratextual information is required. Automatic approaches to sentiment analysis and irony detection have revealed that the lack of such world knowledge undermines classification performance. In this article, we therefore address the challenge of modeling implicit or prototypical sentiment in the framework of automatic irony detection. Starting from manually annotated connoted situation phrases (e.g., “flight delays,” “sitting the whole day at the doctor’s office”), we defined the implicit sentiment held towards such situations automatically by using both a lexico-semantic knowledge base and a data-driven method. We further investigate how such implicit sentiment information affects irony detection by assessing a state-of-the-art irony classifier before and after it is informed with implicit sentiment information.</abstract>
       <pages>793–832</pages>
       <doi>10.1162/coli_a_00337</doi>
@@ -395,8 +395,8 @@
     </paper>
     <paper id="12">
       <title>Modeling Speech Acts in Asynchronous Conversations: A Neural-<fixed-case>CRF</fixed-case> Approach</title>
-      <author><first>Shafiq</first><last>Joty</last></author>
-      <author><first>Tasnim</first><last>Mohiuddin</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
+      <author id="muhammad-tasnim-mohiuddin"><first>Tasnim</first><last>Mohiuddin</last></author>
       <abstract>Participants in an asynchronous conversation (e.g., forum, e-mail) interact with each other at different times, performing certain communicative acts, called speech acts (e.g., question, request). In this article, we propose a hybrid approach to speech act recognition in asynchronous conversations. Our approach works in two main steps: a long short-term memory recurrent neural network (LSTM-RNN) first encodes each sentence separately into a task-specific distributed representation, and this is then used in a conditional random field (CRF) model to capture the conversational dependencies between sentences. The LSTM-RNN model uses pretrained word embeddings learned from a large conversational corpus and is trained to classify sentences into speech act types. The CRF model can consider arbitrary graph structures to model conversational dependencies in an asynchronous conversation. In addition, to mitigate the problem of limited annotated data in the asynchronous domains, we adapt the LSTM-RNN model to learn from synchronous conversations (e.g., meetings), using domain adversarial training of neural networks. Empirical evaluation shows the effectiveness of our approach over existing ones: (i) LSTM-RNNs provide better task-specific representations, (ii) conversational word embeddings benefit the LSTM-RNNs more than the off-the-shelf ones, (iii) adversarial training gives better domain-invariant representations, and (iv) the global CRF model improves over local models.</abstract>
       <pages>859–894</pages>
       <doi>10.1162/coli_a_00339</doi>
diff --git a/data/xml/J19.xml b/data/xml/J19.xml
index 6587711506..0bf6e1c4ed 100644
--- a/data/xml/J19.xml
+++ b/data/xml/J19.xml
@@ -16,7 +16,7 @@
     </frontmatter>
     <paper id="1">
       <title>Unsupervised Compositionality Prediction of Nominal Compounds</title>
-      <author><first>Silvio</first><last>Cordeiro</last></author>
+      <author id="silvio-cordeiro"><first>Silvio</first><last>Cordeiro</last></author>
       <author><first>Aline</first><last>Villavicencio</last></author>
       <author><first>Marco</first><last>Idiart</last></author>
       <author><first>Carlos</first><last>Ramisch</last></author>
@@ -40,7 +40,7 @@
     </paper>
     <paper id="3">
       <title>Parsing <fixed-case>C</fixed-case>hinese Sentences with Grammatical Relations</title>
-      <author><first>Weiwei</first><last>Sun</last></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last></author>
       <author><first>Yufei</first><last>Chen</last></author>
       <author><first>Xiaojun</first><last>Wan</last></author>
       <author><first>Meichun</first><last>Liu</last></author>
@@ -96,7 +96,7 @@
     </paper>
     <paper id="2">
       <title>Novel Event Detection and Classification for Historical Texts</title>
-      <author><first>Rachele</first><last>Sprugnoli</last></author>
+      <author id="rachele-sprugnoli"><first>Rachele</first><last>Sprugnoli</last></author>
       <author><first>Sara</first><last>Tonelli</last></author>
       <doi>10.1162/coli_a_00347</doi>
       <abstract>Event processing is an active area of research in the Natural Language Processing community, but resources and automatic systems developed so far have mainly addressed contemporary texts. However, the recognition and elaboration of events is a crucial step when dealing with historical texts Particularly in the current era of massive digitization of historical sources: Research in this domain can lead to the development of methodologies and tools that can assist historians in enhancing their work, while having an impact also on the field of Natural Language Processing. Our work aims at shedding light on the complex concept of events when dealing with historical texts. More specifically, we introduce new annotation guidelines for event mentions and types, categorized into 22 classes. Then, we annotate a historical corpus accordingly, and compare two approaches for automatic event detection and classification following this novel scheme. We believe that this work can foster research in a field of inquiry as yet underestimated in the area of Temporal Information Processing. To this end, we release new annotation guidelines, a corpus, and new models for automatic annotation.</abstract>
@@ -118,7 +118,7 @@
     <paper id="4">
       <title>Neural Models of Text Normalization for Speech Applications</title>
       <author><first>Hao</first><last>Zhang</last></author>
-      <author><first>Richard</first><last>Sproat</last></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last></author>
       <author><first>Axel H.</first><last>Ng</last></author>
       <author><first>Felix</first><last>Stahlberg</last></author>
       <author><first>Xiaochang</first><last>Peng</last></author>
@@ -146,7 +146,7 @@
       <author><first>Johannes</first><last>Bjerva</last></author>
       <author><first>Robert</first><last>Östling</last></author>
       <author><first>Maria Han</first><last>Veiga</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <author><first>Isabelle</first><last>Augenstein</last></author>
       <doi>10.1162/coli_a_00351</doi>
       <abstract>A neural language model trained on a text corpus can be used to induce distributed representations of words, such that similar words end up with similar representations. If the corpus is multilingual, the same model can be used to learn distributed representations of languages, such that similar languages end up with similar representations. We show that this holds even when the multilingual corpus has been translated into English, by picking up the faint signal left by the source languages. However, just as it is a thorny problem to separate semantic from syntactic similarity in word representations, it is not obvious what type of similarity is captured by language representations. We investigate correlations and causal relationships between language representations learned from translations on one hand, and genetic, geographical, and several levels of structural similarity between languages on the other. Of these, structural similarity is found to correlate most strongly with language representation similarity, whereas genetic relationships—a convenient benchmark used for evaluation in previous work—appears to be a confounding factor. Apart from implications about translation effects, we see this more generally as a case where NLP and linguistic typology can interact and benefit one another.</abstract>
@@ -169,8 +169,8 @@
     <paper id="1">
       <title>Contextualized Translations of Phrasal Verbs with Distributional Compositional Semantics and Monolingual Corpora</title>
       <author><first>Pablo</first><last>Gamallo</last></author>
-      <author><first>Susana</first><last>Sotelo</last></author>
-      <author><first>José Ramom</first><last>Pichel</last></author>
+      <author id="susana-sotelo"><first>Susana</first><last>Sotelo</last></author>
+      <author id="jose-ramom-pichel-campos"><first>José Ramom</first><last>Pichel</last></author>
       <author><first>Mikel</first><last>Artetxe</last></author>
       <doi>10.1162/coli_a_00353</doi>
       <abstract>This article describes a compositional distributional method to generate contextualized senses of words and identify their appropriate translations in the target language using monolingual corpora. Word translation is modeled in the same way as contextualization of word meaning, but in a bilingual vector space. The contextualization of meaning is carried out by means of distributional composition within a structured vector space with syntactic dependencies, and the bilingual space is created by means of transfer rules and a bilingual dictionary. A phrase in the source language, consisting of a head and a dependent, is translated into the target language by selecting both the nearest neighbor of the head given the dependent, and the nearest neighbor of the dependent given the head. This process is expanded to larger phrases by means of incremental composition. Experiments were performed on English and Spanish monolingual corpora in order to translate phrasal verbs in context. A new bilingual data set to evaluate strategies aimed at translating phrasal verbs in restricted syntactic domains has been created and released.</abstract>
@@ -182,8 +182,8 @@
       <title>Watset: Local-Global Graph Clustering with Applications in Sense and Frame Induction</title>
       <author><first>Dmitry</first><last>Ustalov</last></author>
       <author><first>Alexander</first><last>Panchenko</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <doi>10.1162/coli_a_00354</doi>
       <abstract>We present a detailed theoretical and computational analysis of the Watset meta-algorithm for fuzzy graph clustering, which has been found to be widely applicable in a variety of domains. This algorithm creates an intermediate representation of the input graph, which reflects the “ambiguity” of its nodes. Then, it uses hard clustering to discover clusters in this “disambiguated” intermediate graph. After outlining the approach and analyzing its computational complexity, we demonstrate that Watset shows competitive results in three applications: unsupervised synset induction from a synonymy graph, unsupervised semantic frame induction from dependency triples, and unsupervised semantic class induction from a distributional thesaurus. Our algorithm is generic and can also be applied to other networks of linguistic data.</abstract>
       <pages>423–479</pages>
@@ -193,7 +193,7 @@
     <paper id="3">
       <title>Evaluating Computational Language Models with Scaling Properties of Natural Language</title>
       <author><first>Shuntaro</first><last>Takahashi</last></author>
-      <author><first>Kumiko</first><last>Tanaka-Ishii</last></author>
+      <author id="kumiko-tanaka-ishii"><first>Kumiko</first><last>Tanaka-Ishii</last></author>
       <doi>10.1162/coli_a_00355</doi>
       <abstract>In this article, we evaluate computational models of natural language with respect to the universal statistical behaviors of natural language. Statistical mechanical analyses have revealed that natural language text is characterized by scaling properties, which quantify the global structure in the vocabulary population and the long memory of a text. We study whether five scaling properties (given by Zipf’s law, Heaps’ law, Ebeling’s method, Taylor’s law, and long-range correlation analysis) can serve for evaluation of computational models. Specifically, we test n-gram language models, a probabilistic context-free grammar, language models based on Simon/Pitman-Yor processes, neural language models, and generative adversarial networks for text generation. Our analysis reveals that language models based on recurrent neural networks with a gating mechanism (i.e., long short-term memory; a gated recurrent unit; and quasi-recurrent neural networks) are the only computational models that can reproduce the long memory behavior of natural language. Furthermore, through comparison with recently proposed model-based evaluation methods, we find that the exponent of Taylor’s law is a good indicator of model quality.</abstract>
       <pages>481–513</pages>
@@ -240,7 +240,7 @@
     </meta>
     <paper id="1">
       <title>Computational Psycholinguistics</title>
-      <author><first>Ronald M.</first><last>Kaplan</last></author>
+      <author id="ronald-m-kaplan"><first>Ronald M.</first><last>Kaplan</last></author>
       <doi>10.1162/coli_a_00359</doi>
       <pages>607–626</pages>
       <url hash="7b042947">J19-4001</url>
@@ -250,10 +250,10 @@
       <title>Discourse in Multimedia: A Case Study in Extracting Geometry Knowledge from Textbooks</title>
       <author><first>Mrinmaya</first><last>Sachan</last></author>
       <author><first>Avinava</first><last>Dubey</last></author>
-      <author><first>Eduard H.</first><last>Hovy</last></author>
-      <author><first>Tom M.</first><last>Mitchell</last></author>
+      <author id="eduard-hovy"><first>Eduard H.</first><last>Hovy</last></author>
+      <author id="tom-mitchell"><first>Tom M.</first><last>Mitchell</last></author>
       <author><first>Dan</first><last>Roth</last></author>
-      <author><first>Eric P.</first><last>Xing</last></author>
+      <author id="eric-xing"><first>Eric P.</first><last>Xing</last></author>
       <doi>10.1162/coli_a_00360</doi>
       <abstract>To ensure readability, text is often written and presented with due formatting. These text formatting devices help the writer to effectively convey the narrative. At the same time, these help the readers pick up the structure of the discourse and comprehend the conveyed information. There have been a number of linguistic theories on discourse structure of text. However, these theories only consider unformatted text. Multimedia text contains rich formatting features that can be leveraged for various NLP tasks. In this article, we study some of these discourse features in multimedia text and what communicative function they fulfill in the context. As a case study, we use these features to harvest structured subject knowledge of geometry from textbooks. We conclude that the discourse and text layout features provide information that is complementary to lexical semantic information. Finally, we show that the harvested structured knowledge can be used to improve an existing solver for geometry problems, making it more accurate as well as more explainable.</abstract>
       <pages>627–665</pages>
@@ -262,8 +262,8 @@
     </paper>
     <paper id="3">
       <title>Automatic Identification and Production of Related Words for Historical Linguistics</title>
-      <author><first>Alina Maria</first><last>Ciobanu</last></author>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="alina-maria-ciobanu"><first>Alina Maria</first><last>Ciobanu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <doi>10.1162/coli_a_00361</doi>
       <abstract>Language change across space and time is one of the main concerns in historical linguistics. In this article, we develop tools to assist researchers and domain experts in the study of language evolution. First, we introduce a method to automatically determine whether two words are cognates. We propose an algorithm for extracting cognates from electronic dictionaries that contain etymological information. Having built a data set of related words, we further develop machine learning methods based on orthographic alignment for identifying cognates. We use aligned subsequences as features for classification algorithms in order to infer rules for linguistic changes undergone by words when entering new languages and to discriminate between cognates and non-cognates. Second, we extend the method to a finer-grained level, to identify the type of relationship between words. Discriminating between cognates and borrowings provides a deeper insight into the history of a language and allows a better characterization of language relatedness. We show that orthographic features have discriminative power and we analyze the underlying linguistic factors that prove relevant in the classification task. To our knowledge, this is the first attempt of this kind. Third, we develop a machine learning method for automatically producing related words. We focus on reconstructing proto-words, but we also address two related sub-problems, producing modern word forms and producing cognates. The task of reconstructing proto-words consists of recreating the words in an ancient language from its modern daughter languages. Having modern word forms in multiple Romance languages, we infer the form of their common Latin ancestors. Our approach relies on the regularities that occurred when words entered the modern languages. We leverage information from several modern languages, building an ensemble system for reconstructing proto-words. We apply our method to multiple data sets, showing that our approach improves on previous results, also having the advantage of requiring less input data, which is essential in historical linguistics, where resources are generally scarce.</abstract>
       <pages>667–704</pages>
@@ -295,7 +295,7 @@
     <paper id="6">
       <title>Argument Mining: A Survey</title>
       <author><first>John</first><last>Lawrence</last></author>
-      <author><first>Chris</first><last>Reed</last></author>
+      <author id="chris-reed"><first>Chris</first><last>Reed</last></author>
       <doi>10.1162/coli_a_00364</doi>
       <abstract>Argument mining is the automatic identification and extraction of the structure of inference and reasoning expressed as arguments presented in natural language. Understanding argumentative structure makes it possible to determine not only what positions people are adopting, but also why they hold the opinions they do, providing valuable insights in domains as diverse as financial market prediction and public relations. This survey explores the techniques that establish the foundations for argument mining, provides a review of recent advances in argument mining techniques, and discusses the challenges faced in automatically extracting a deeper understanding of reasoning expressed in language in general.</abstract>
       <pages>765–818</pages>
diff --git a/data/xml/J74.xml b/data/xml/J74.xml
index ff6fae6513..7764ddd604 100644
--- a/data/xml/J74.xml
+++ b/data/xml/J74.xml
@@ -28,15 +28,15 @@
     </paper>
     <paper id="2">
       <title>The Lexical Subclasses of the Linguistic String Parser</title>
-      <author><first>Eileen</first><last>Fitzpatrick</last></author>
-      <author><first>Naomi</first><last>Sager</last></author>
+      <author id="eileen-fitzpatrick"><first>Eileen</first><last>Fitzpatrick</last></author>
+      <author id="naomi-sager"><first>Naomi</first><last>Sager</last></author>
       <note>Microfiche 2</note>
       <url hash="7dc6fd6f">J74-1002</url>
       <bibkey>fitzpatrick-sager-1974-lexical</bibkey>
     </paper>
     <paper id="3">
       <title>Natural Semantics in Artificial Intelligence</title>
-      <author><first>Jaime R.</first><last>Carbonell</last></author>
+      <author id="jaime-r-carbonell"><first>Jaime R.</first><last>Carbonell</last></author>
       <author><first>Allan M.</first><last>Collins</last></author>
       <note>Microfiche 3</note>
       <url hash="9c884bc4">J74-1003</url>
diff --git a/data/xml/J75.xml b/data/xml/J75.xml
index a918c342cd..58592e11b7 100644
--- a/data/xml/J75.xml
+++ b/data/xml/J75.xml
@@ -22,7 +22,7 @@
     </paper>
     <paper id="2">
       <title>Simple Digital Speech Synthesis</title>
-      <author><first>William M.</first><last>Fisher</last></author>
+      <author id="william-m-fisher"><first>William M.</first><last>Fisher</last></author>
       <author><first>A. Maynard</first><last>Engebretson</last></author>
       <note>Microfiche 16</note>
       <url hash="5f0a2ca8">J75-1002</url>
@@ -89,7 +89,7 @@
     <paper id="5">
       <title>“Formulae” in Coherent Text: Linguistic Relevance of Symbolic Insertions</title>
       <pages>70–85</pages>
-      <author><first>Felix</first><last>Dreizin</last></author>
+      <author id="felix-dreizin"><first>Felix</first><last>Dreizin</last></author>
       <note>Microfiche 17</note>
       <url hash="ad4e28ff">J75-2005</url>
       <bibkey>dreizin-1975-formulae</bibkey>
@@ -112,7 +112,7 @@
     </paper>
     <paper id="8">
       <title>A Case History in Computer Exploration of Fast Speech Rules</title>
-      <author><first>Douglas B.</first><last>Moran</last></author>
+      <author id="douglas-b-moran"><first>Douglas B.</first><last>Moran</last></author>
       <note>Microfiche 19</note>
       <url hash="2b787198">J75-2008</url>
       <bibkey>moran-1975-case</bibkey>
@@ -189,7 +189,7 @@
       <title>Review: <i>
           <fixed-case>I</fixed-case>nformal <fixed-case>S</fixed-case>peech: <fixed-case>A</fixed-case>lphabetic and <fixed-case>P</fixed-case>honemic <fixed-case>T</fixed-case>exts with <fixed-case>S</fixed-case>tatistical <fixed-case>A</fixed-case>nalyses and <fixed-case>T</fixed-case>ables</i>, by <fixed-case>E</fixed-case>dward <fixed-case>C</fixed-case>. <fixed-case>C</fixed-case>arterette and <fixed-case>M</fixed-case>argaret <fixed-case>H</fixed-case>ubbard <fixed-case>J</fixed-case>ones</title>
       <pages>78–91</pages>
-      <author><first>John B.</first><last>Carroll</last></author>
+      <author id="john-b-carroll"><first>John B.</first><last>Carroll</last></author>
       <note>Microfiche 22</note>
       <url hash="5dd85f0f">J75-3003</url>
       <bibkey>carroll-1975-review</bibkey>
@@ -226,10 +226,10 @@
     </paper>
     <paper id="8">
       <title><fixed-case>J</fixed-case>unction <fixed-case>G</fixed-case>rammar as a Base for Natural Language Processing</title>
-      <author><first>Eldon G.</first><last>Lytel</last></author>
+      <author id="eldon-g-lytle"><first>Eldon G.</first><last>Lytel</last></author>
       <author><first>Dennis</first><last>Packard</last></author>
       <author><first>Daryl</first><last>Gibb</last></author>
-      <author><first>Alan K.</first><last>Melby</last></author>
+      <author id="alan-k-melby"><first>Alan K.</first><last>Melby</last></author>
       <author><first>Floyd H.</first><last>Billings, Jr.</last></author>
       <note>Microfiche 26</note>
       <url hash="efc70f13">J75-3008</url>
@@ -298,7 +298,7 @@
     <paper id="3">
       <title>Interpretation and Integration of Sentences into a <fixed-case>C</fixed-case>-Net</title>
       <pages>46–66</pages>
-      <author><first>Th. R.</first><last>Hofmann</last></author>
+      <author id="th-r-hofmann"><first>Th. R.</first><last>Hofmann</last></author>
       <note>Microfiche 29</note>
       <url hash="d6647067">J75-4003</url>
       <bibkey>hofmann-1975-interpretation</bibkey>
@@ -363,7 +363,7 @@
     <paper id="10">
       <title>A General System for Semantic Analysis of <fixed-case>E</fixed-case>nglish and its Use in Drawing Maps from Directions</title>
       <pages>21–41</pages>
-      <author><first>Jerry R.</first><last>Hobbs</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry R.</first><last>Hobbs</last></author>
       <note>Microfiche 32</note>
       <url hash="830c9bd8">J75-4010</url>
       <bibkey>hobbs-1975-general</bibkey>
@@ -379,7 +379,7 @@
     <paper id="12">
       <title>Conceptual Grammar [abstract]</title>
       <pages>57</pages>
-      <author><first>William A.</first><last>Martin</last></author>
+      <author id="william-a-martin"><first>William A.</first><last>Martin</last></author>
       <note>Microfiche 32</note>
       <url hash="1bdb977b">J75-4012</url>
       <bibkey>martin-1975-conceptual</bibkey>
@@ -387,7 +387,7 @@
     <paper id="13">
       <title>Semantic-Based Parsing and a Natural-Language Interface for Interactive Data Management</title>
       <pages>58–71</pages>
-      <author><first>John F.</first><last>Burger</last></author>
+      <author id="john-f-burger"><first>John F.</first><last>Burger</last></author>
       <author><first>Antonio</first><last>Leal</last></author>
       <author><first>Arie</first><last>Shoshani</last></author>
       <note>Microfiche 32</note>
@@ -400,7 +400,7 @@
       <author><first>P.</first><last>Medema</last></author>
       <author><first>W. J.</first><last>Bronnenberg</last></author>
       <author id="harry-bunt"><first>H. C.</first><last>Bunt</last></author>
-      <author><first>S. P. J.</first><last>Landsbergen</last></author>
+      <author id="jan-landsbergen"><first>S. P. J.</first><last>Landsbergen</last></author>
       <author id="remko-scha"><first>R. J. H.</first><last>Scha</last></author>
       <author><first>W. J.</first><last>Schoenmakers</last></author>
       <author><first>E. P. C.</first><last>van Utteren</last></author>
@@ -443,7 +443,7 @@
     <paper id="19">
       <title>Generation as Parsing from a Network into a Linear String</title>
       <pages>45–62</pages>
-      <author><first>Stuart C.</first><last>Shapiro</last></author>
+      <author id="stuart-c-shapiro"><first>Stuart C.</first><last>Shapiro</last></author>
       <note>Microfiche 33</note>
       <url hash="cff36921">J75-4019</url>
       <bibkey>shapiro-1975-generation</bibkey>
@@ -475,7 +475,7 @@
     <paper id="23">
       <title>Syntactic Processing in the <fixed-case>BBN</fixed-case> Speech Understanding System [abstract]</title>
       <pages>4</pages>
-      <author><first>Madeline</first><last>Bates</last></author>
+      <author id="madeleine-bates"><first>Madeline</first><last>Bates</last></author>
       <note>Microfiche 34</note>
       <url hash="313f4f4b">J75-4023</url>
       <bibkey>bates-1975-syntactic</bibkey>
@@ -492,7 +492,7 @@
     <paper id="25">
       <title>A Tuneable Performance Grammar</title>
       <pages>19–33</pages>
-      <author><first>Jane J.</first><last>Robinson</last></author>
+      <author id="jane-j-robinson"><first>Jane J.</first><last>Robinson</last></author>
       <note>Microfiche 34</note>
       <url hash="37215ea7">J75-4025</url>
       <bibkey>robinson-1975-tuneable</bibkey>
@@ -500,7 +500,7 @@
     <paper id="26">
       <title>Semantic Processing for Speech Understanding</title>
       <pages>34–48</pages>
-      <author><first>Gary G.</first><last>Hendrix</last></author>
+      <author id="gary-g-hendrix"><first>Gary G.</first><last>Hendrix</last></author>
       <note>Microfiche 34</note>
       <url hash="7448f4ff">J75-4026</url>
       <bibkey>hendrix-1975-semantic</bibkey>
@@ -508,7 +508,7 @@
     <paper id="27">
       <title><fixed-case>SPS</fixed-case>: A Formalism for Semantic Interpretation and its Use in Processing Prepositions that Reference Space</title>
       <pages>49–63</pages>
-      <author><first>Norman K.</first><last>Sondheimer</last></author>
+      <author id="norman-k-sondheimer"><first>Norman K.</first><last>Sondheimer</last></author>
       <author><first>Doyt</first><last>Perry</last></author>
       <note>Microfiche 34</note>
       <url hash="11d3b938">J75-4027</url>
@@ -573,8 +573,8 @@
     <paper id="35">
       <title>A Frame Analysis of <fixed-case>A</fixed-case>merican <fixed-case>S</fixed-case>ign <fixed-case>L</fixed-case>anguage</title>
       <pages>84–96</pages>
-      <author><first>Judy Anne</first><last>Kegl</last></author>
-      <author><first>Nancy</first><last>Chinchor</last></author>
+      <author id="judy-anne-kegl"><first>Judy Anne</first><last>Kegl</last></author>
+      <author id="nancy-chinchor"><first>Nancy</first><last>Chinchor</last></author>
       <note>Microfiche 35</note>
       <url hash="001feb89">J75-4035</url>
       <bibkey>kegl-chinchor-1975-frame</bibkey>
@@ -599,7 +599,7 @@
     <paper id="38">
       <title>How Does a System Know When to Stop Inferencing?</title>
       <pages>26–39</pages>
-      <author><first>Stan</first><last>Rosenschein</last></author>
+      <author id="stanley-j-rosenschein"><first>Stan</first><last>Rosenschein</last></author>
       <note>Microfiche 36</note>
       <url hash="933bee64">J75-4038</url>
       <bibkey>rosenschein-1975-system</bibkey>
@@ -622,7 +622,7 @@
       <title>A Natural Language Processing Package</title>
       <pages>52–66</pages>
       <author><first>David</first><last>Brill</last></author>
-      <author><first>Beatrice T.</first><last>Oshika</last></author>
+      <author id="beatrice-oshika"><first>Beatrice T.</first><last>Oshika</last></author>
       <note>Microfiche 36</note>
       <url hash="b3ff39ca">J75-4040</url>
       <bibkey>brill-oshika-1975-natural</bibkey>
@@ -639,7 +639,7 @@
       <title>Grammatical Compression in Notes and Records: Analysis and Computation</title>
       <pages>68–81</pages>
       <author><first>Barbara B.</first><last>Anderson</last></author>
-      <author><first>Naomi</first><last>Sager</last></author>
+      <author id="naomi-sager"><first>Naomi</first><last>Sager</last></author>
       <note>Microfiche 36</note>
       <url hash="ddb32898">J75-4042</url>
       <bibkey>anderson-sager-1975-grammatical</bibkey>
diff --git a/data/xml/J76.xml b/data/xml/J76.xml
index 345299dc16..7c4be67cef 100644
--- a/data/xml/J76.xml
+++ b/data/xml/J76.xml
@@ -46,8 +46,8 @@
     <paper id="3">
       <title><fixed-case>PLATON</fixed-case>--A New Programming Language for Natural Language Analysis</title>
       <pages>28–53</pages>
-      <author><first>Makoto</first><last>Nagao</last></author>
-      <author><first>Jun-Ichi</first><last>Tsujii</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
+      <author id="junichi-tsujii"><first>Jun-Ichi</first><last>Tsujii</last></author>
       <note>Microfiche 37</note>
       <url hash="cf9606b3">J76-1003</url>
       <bibkey>nagao-tsujii-1976-platon</bibkey>
@@ -79,15 +79,15 @@
     </paper>
     <paper id="7">
       <title>Natural Language Understanding Systems within the <fixed-case>A</fixed-case>. <fixed-case>I</fixed-case>. Paradigm: A Survey and Some Comparisons</title>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <note>Microfiche 40</note>
       <url hash="b1609c4b">J76-1007</url>
       <bibkey>wilks-1976-natural</bibkey>
     </paper>
     <paper id="8">
       <title>Analysis of <fixed-case>J</fixed-case>apanese Sentences</title>
-      <author><first>Makoto</first><last>Nagao</last></author>
-      <author><first>Jun-Ichi</first><last>Tsujii</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
+      <author id="junichi-tsujii"><first>Jun-Ichi</first><last>Tsujii</last></author>
       <note>Microfiche 41</note>
       <url hash="7292c62e">J76-1008</url>
       <bibkey>nagao-tsujii-1976-analysis</bibkey>
@@ -150,7 +150,7 @@
     </paper>
     <paper id="4">
       <title>Syntax in Automatic Speech Understanding</title>
-      <author><first>Madeleine</first><last>Bates</last></author>
+      <author id="madeleine-bates"><first>Madeleine</first><last>Bates</last></author>
       <note>Microfiche 45</note>
       <url hash="2dcd6b99">J76-2004</url>
       <bibkey>bates-1976-syntax</bibkey>
@@ -165,7 +165,7 @@
     </paper>
     <paper id="6">
       <title>A Survey of Syntactic Analysis Procedures for Natural Language</title>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <note>Microfiche 47</note>
       <url hash="a5ce6319">J76-2006</url>
       <bibkey>grishman-1976-survey</bibkey>
@@ -292,7 +292,7 @@
     </paper>
     <paper id="6">
       <title>Multiple Environments Approach to Natural Language</title>
-      <author><first>Janusz Stanisław</first><last>Bien</last></author>
+      <author id="janusz-stanislaw-bien"><first>Janusz Stanisław</first><last>Bien</last></author>
       <note>Microfiche 54</note>
       <url hash="d0347728">J76-3006</url>
       <bibkey>bien-1976-multiple</bibkey>
@@ -365,7 +365,7 @@
     </paper>
     <paper id="7">
       <title>Processing Case</title>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <note>Microfiche 56</note>
       <url hash="570055dd">J76-4007</url>
       <bibkey>wilks-1976-processing</bibkey>
diff --git a/data/xml/J77.xml b/data/xml/J77.xml
index 43e6b65444..68b4d6d23b 100644
--- a/data/xml/J77.xml
+++ b/data/xml/J77.xml
@@ -59,9 +59,9 @@
     </paper>
     <paper id="4">
       <title>Pitch Contour Generation in Speech Synthesis: A <fixed-case>J</fixed-case>unction <fixed-case>G</fixed-case>rammar Approach</title>
-      <author><first>Alan K.</first><last>Melby</last></author>
+      <author id="alan-k-melby"><first>Alan K.</first><last>Melby</last></author>
       <author><first>William J.</first><last>Strong</last></author>
-      <author><first>Eldon G.</first><last>Lytle</last></author>
+      <author id="eldon-g-lytle"><first>Eldon G.</first><last>Lytle</last></author>
       <author><first>Ronald</first><last>Millett</last></author>
       <note>Microfiche 60</note>
       <url hash="338e9caf">J77-1004</url>
@@ -97,8 +97,8 @@
     </paper>
     <paper id="8">
       <title>Computation of a Subclass of Inferences: Presupposition and Entailment</title>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
-      <author><first>Ralph</first><last>Weischedel</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
       <pages>1–54</pages>
       <note>Microfiche 63</note>
       <url hash="a426a217">J77-1008</url>
@@ -215,7 +215,7 @@
       <title>A Goal Oriented Model of Human Dialogue</title>
       <author><first>James A.</first><last>Moore</last></author>
       <author><first>James A.</first><last>Levin</last></author>
-      <author><first>William C.</first><last>Mann</last></author>
+      <author id="william-c-mann"><first>William C.</first><last>Mann</last></author>
       <note>Microfiche 67</note>
       <url hash="a2b6f73c">J77-3002</url>
       <bibkey>moore-etal-1977-goal</bibkey>
@@ -283,7 +283,7 @@
     </paper>
     <paper id="3">
       <title>Spatial Reference and Semantic Nets</title>
-      <author><first>Norman K.</first><last>Sondheimer</last></author>
+      <author id="norman-k-sondheimer"><first>Norman K.</first><last>Sondheimer</last></author>
       <pages>1–67</pages>
       <note>Microfiche 71</note>
       <url hash="7914c903">J77-4003</url>
diff --git a/data/xml/J78.xml b/data/xml/J78.xml
index e0f49eff23..8547a67df4 100644
--- a/data/xml/J78.xml
+++ b/data/xml/J78.xml
@@ -39,7 +39,7 @@
     <paper id="2">
       <title>Two Papers on Semantic Interpretation in <fixed-case>M</fixed-case>ontague Grammar</title>
       <author><first>Joyce</first><last>Friedman</last></author>
-      <author><first>Douglas B.</first><last>Moran</last></author>
+      <author id="douglas-b-moran"><first>Douglas B.</first><last>Moran</last></author>
       <author><first>David S.</first><last>Warren</last></author>
       <note>Microfiche 74</note>
       <url hash="2d96b6ab">J78-1002</url>
@@ -68,7 +68,7 @@
     <paper id="2">
       <title>The Derivation of Answers from Logical Forms in a Question Answering System</title>
       <pages>3–42</pages>
-      <author><first>Fred J.</first><last>Damerau</last></author>
+      <author id="fred-damerau"><first>Fred J.</first><last>Damerau</last></author>
       <note>Microfiche 75</note>
       <url hash="1be7acf0">J78-2002</url>
       <bibkey>damerau-1978-derivation</bibkey>
@@ -85,7 +85,7 @@
     <paper id="4">
       <title>Computation in Departments of Linguistics</title>
       <pages>62–68</pages>
-      <author><first>Richard</first><last>Fritzson</last></author>
+      <author id="richard-fritzson"><first>Richard</first><last>Fritzson</last></author>
       <note>Microfiche 75</note>
       <url hash="7730dc58">J78-2004</url>
       <bibkey>fritzson-1978-computation</bibkey>
@@ -203,7 +203,7 @@
     </paper>
     <paper id="6">
       <title>Language Representation: Papers presented in two sessions of <fixed-case>TINLAP</fixed-case>-2</title>
-      <editor><first>David L.</first><last>Waltz</last></editor>
+      <editor id="david-l-waltz"><first>David L.</first><last>Waltz</last></editor>
       <pages>1–2</pages>
       <note>Microfiche 78</note>
       <url hash="442f8437">J78-3006</url>
@@ -220,7 +220,7 @@
     <paper id="8">
       <title>What Makes Something “Ad Hoc”</title>
       <pages>10–15</pages>
-      <author><first>Roger C.</first><last>Schank</last></author>
+      <author id="roger-c-schank"><first>Roger C.</first><last>Schank</last></author>
       <note>Microfiche 78</note>
       <url hash="691c7e90">J78-3008</url>
       <bibkey>schank-1978-makes</bibkey>
@@ -244,7 +244,7 @@
     <paper id="11">
       <title>Taxonomic Lattice Structures for Situation Recognition</title>
       <pages>35–43</pages>
-      <author><first>William A.</first><last>Woods</last></author>
+      <author id="william-a-woods"><first>William A.</first><last>Woods</last></author>
       <note>Microfiche 78</note>
       <url hash="961732b2">J78-3011</url>
       <bibkey>woods-1978-taxonomic</bibkey>
@@ -252,7 +252,7 @@
     <paper id="12">
       <title>Description Formation and Discourse Model Synthesis</title>
       <pages>44–52</pages>
-      <author><first>Bonnie Lynn</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie Lynn</first><last>Webber</last></author>
       <note>Microfiche 78</note>
       <url hash="e786cfd5">J78-3012</url>
       <bibkey>webber-1978-description</bibkey>
@@ -277,7 +277,7 @@
     <paper id="15">
       <title>Subsequent Reference: Syntactic and Rhetorical Considerations</title>
       <pages>66–74</pages>
-      <author><first>David D.</first><last>McDonald</last></author>
+      <author id="david-d-mcdonald"><first>David D.</first><last>McDonald</last></author>
       <note>Microfiche 78</note>
       <url hash="517ec9a5">J78-3015</url>
       <bibkey>mcdonald-1978-subsequent</bibkey>
@@ -293,7 +293,7 @@
     <paper id="17">
       <title>Bound Variables and Other Anaphors</title>
       <pages>81–87</pages>
-      <author><first>Barbara H.</first><last>Partee</last></author>
+      <author id="barbara-h-partee"><first>Barbara H.</first><last>Partee</last></author>
       <note>Microfiche 78</note>
       <url hash="88080dab">J78-3017</url>
       <bibkey>partee-1978-bound</bibkey>
@@ -301,14 +301,14 @@
     <paper id="18">
       <title>The Use of Focus as a Tool for Disambiguation of Definite Noun Phrases</title>
       <pages>88–97</pages>
-      <author><first>Candace L.</first><last>Sidner</last></author>
+      <author id="candace-l-sidner"><first>Candace L.</first><last>Sidner</last></author>
       <note>Microfiche 78</note>
       <url hash="d68d1567">J78-3018</url>
       <bibkey>sidner-1978-use</bibkey>
     </paper>
     <paper id="19">
       <title>Language Representation: Papers presented in two sessions of <fixed-case>TINLAP</fixed-case>-2</title>
-      <editor><first>David L.</first><last>Waltz</last></editor>
+      <editor id="david-l-waltz"><first>David L.</first><last>Waltz</last></editor>
       <pages>1–2</pages>
       <note>Microfiche 79</note>
       <url hash="b4301f93">J78-3019</url>
@@ -317,7 +317,7 @@
     <paper id="20">
       <title>Focusing in Dialog</title>
       <pages>3–10</pages>
-      <author><first>Barbara J.</first><last>Grosz</last></author>
+      <author id="barbara-j-grosz"><first>Barbara J.</first><last>Grosz</last></author>
       <note>Microfiche 79</note>
       <url hash="871f026e">J78-3020</url>
       <bibkey>grosz-1978-focusing</bibkey>
@@ -349,9 +349,9 @@
     <paper id="24">
       <title>Speech Acts as a Basis for Understanding Dialogue Coherence</title>
       <pages>32–39</pages>
-      <author><first>C. Raymond</first><last>Perrault</last></author>
-      <author><first>James F.</first><last>Allen</last></author>
-      <author><first>Philip R.</first><last>Cohen</last></author>
+      <author id="c-raymond-perrault"><first>C. Raymond</first><last>Perrault</last></author>
+      <author id="james-allen"><first>James F.</first><last>Allen</last></author>
+      <author id="philip-r-cohen"><first>Philip R.</first><last>Cohen</last></author>
       <note>Microfiche 79</note>
       <url hash="88871e6c">J78-3024</url>
       <bibkey>perrault-etal-1978-speech</bibkey>
@@ -367,7 +367,7 @@
     <paper id="26">
       <title>Intentlonallty and Human Conversations</title>
       <pages>48–55</pages>
-      <author><first>Jaime G.</first><last>Carbonell Jr</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime G.</first><last>Carbonell Jr</last></author>
       <note>Microfiche 79</note>
       <url hash="92b920b4">J78-3026</url>
       <bibkey>carbonell-jr-1978-intentlonallty</bibkey>
@@ -375,7 +375,7 @@
     <paper id="27">
       <title>On the Interdependence of Language and Perception</title>
       <pages>56–63</pages>
-      <author><first>David L.</first><last>Waltz</last></author>
+      <author id="david-l-waltz"><first>David L.</first><last>Waltz</last></author>
       <note>Microfiche 79</note>
       <url hash="fb513b18">J78-3027</url>
       <bibkey>waltz-1978-interdependence</bibkey>
@@ -416,14 +416,14 @@
     <paper id="32">
       <title>Semantic Primitives in Language and Vision</title>
       <pages>87–90</pages>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <note>Microfiche 79</note>
       <url hash="0d8819b2">J78-3032</url>
       <bibkey>wilks-1978-semantic</bibkey>
     </paper>
     <paper id="33">
       <title>Inference and Theory: Papers presented in two sessions of <fixed-case>TINLAP</fixed-case>-2</title>
-      <editor><first>David L.</first><last>Waltz</last></editor>
+      <editor id="david-l-waltz"><first>David L.</first><last>Waltz</last></editor>
       <pages>1–2</pages>
       <note>Microfiche 80</note>
       <url hash="ee4738b3">J78-3033</url>
@@ -432,7 +432,7 @@
     <paper id="34">
       <title>A Note on Partial Match of Descriptions. Can One Simultaneously Question (Retrieve) and Inform (Update)?</title>
       <pages>3–5</pages>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
       <note>Microfiche 80</note>
       <url hash="3c29c8bc">J78-3034</url>
       <bibkey>joshi-1978-note</bibkey>
@@ -472,7 +472,7 @@
     <paper id="39">
       <title>Path-Based and Node-Based Inference in Semantic Networks</title>
       <pages>38–44</pages>
-      <author><first>Stuart C.</first><last>Shapiro</last></author>
+      <author id="stuart-c-shapiro"><first>Stuart C.</first><last>Shapiro</last></author>
       <note>Microfiche 80</note>
       <url hash="441c6579">J78-3039</url>
       <bibkey>shapiro-1978-path</bibkey>
@@ -498,7 +498,7 @@
     <paper id="42">
       <title>A Computational Account of Some Constraints on Language</title>
       <pages>55–65</pages>
-      <author><first>Mitchell</first><last>Marcus</last></author>
+      <author id="mitch-marcus"><first>Mitchell</first><last>Marcus</last></author>
       <note>Microfiche 80</note>
       <url hash="035b402e">J78-3042</url>
       <bibkey>marcus-1978-computational</bibkey>
@@ -506,7 +506,7 @@
     <paper id="43">
       <title>Remarks on Processing, Constraints, and the Lexicon</title>
       <pages>66–70</pages>
-      <author><first>Thomas</first><last>Wasow</last></author>
+      <author id="thomas-wasow"><first>Thomas</first><last>Wasow</last></author>
       <note>Microfiche 80</note>
       <url hash="0eddd7a1">J78-3043</url>
       <bibkey>wasow-1978-remarks</bibkey>
@@ -544,8 +544,8 @@
     <paper id="2">
       <title>Properties of Lexical Relations [Appendix <fixed-case>II</fixed-case> of “A Lexicon for a Computer Question-Answering System,” <fixed-case>AJCL</fixed-case> Microfiche 83]</title>
       <pages>16–24</pages>
-      <author><first>Martha W.</first><last>Evens</last></author>
-      <author><first>Raoul N.</first><last>Smith</last></author>
+      <author id="martha-evens"><first>Martha W.</first><last>Evens</last></author>
+      <author id="raoul-n-smith"><first>Raoul N.</first><last>Smith</last></author>
       <note>Microfiche 81</note>
       <url hash="f9e68138">J78-4002</url>
       <bibkey>evens-smith-1978-properties</bibkey>
@@ -580,8 +580,8 @@
     </paper>
     <paper id="6">
       <title>A Lexicon for a Computer Question-Answering System</title>
-      <author><first>Martha W</first><last>Evens</last></author>
-      <author><first>Raoul N</first><last>Smith</last></author>
+      <author id="martha-evens"><first>Martha W</first><last>Evens</last></author>
+      <author id="raoul-n-smith"><first>Raoul N</first><last>Smith</last></author>
       <note>Microfiche 83</note>
       <url hash="78c520d4">J78-4006</url>
       <bibkey>evens-smith-1978-lexicon</bibkey>
diff --git a/data/xml/J80.xml b/data/xml/J80.xml
index 25e55dd5c9..0372d35075 100644
--- a/data/xml/J80.xml
+++ b/data/xml/J80.xml
@@ -15,14 +15,14 @@
     </frontmatter>
     <paper id="1">
       <title>Cascaded <fixed-case>ATN</fixed-case> Grammars</title>
-      <author><first>William A.</first><last>Woods</last></author>
+      <author id="william-a-woods"><first>William A.</first><last>Woods</last></author>
       <pages>1-12</pages>
       <url hash="35e81afa">J80-1001</url>
       <bibkey>woods-1980-cascaded</bibkey>
     </paper>
     <paper id="2">
       <title>An Integrated Understander</title>
-      <author><first>Roger C.</first><last>Schank</last></author>
+      <author id="roger-c-schank"><first>Roger C.</first><last>Schank</last></author>
       <author><first>Michael</first><last>Lebowitz</last></author>
       <author><first>Lawrence</first><last>Birnbaum</last></author>
       <pages>13-30</pages>
@@ -31,7 +31,7 @@
     </paper>
     <paper id="3">
       <title>Slot Grammars</title>
-      <author><first>Michael C.</first><last>McCord</last></author>
+      <author id="michael-c-mccord"><first>Michael C.</first><last>McCord</last></author>
       <pages>31-42</pages>
       <url hash="fe6d91a4">J80-1003</url>
       <bibkey>mccord-1980-slot</bibkey>
@@ -94,8 +94,8 @@
     </frontmatter>
     <paper id="1">
       <title>Toward Natural Language Computation <fixed-case>I</fixed-case></title>
-      <author><first>Alan W.</first><last>Biermann</last></author>
-      <author><first>Bruce W.</first><last>Ballard</last></author>
+      <author id="alan-w-biermann"><first>Alan W.</first><last>Biermann</last></author>
+      <author id="bruce-w-ballard"><first>Bruce W.</first><last>Ballard</last></author>
       <pages>71-86</pages>
       <url hash="f8032ae1">J80-2001</url>
       <bibkey>biermann-ballard-1980-toward</bibkey>
@@ -109,7 +109,7 @@
     </paper>
     <paper id="3">
       <title>Responding Intelligently to Unparsable Inputs</title>
-      <author><first>Ralph M.</first><last>Weischedel</last></author>
+      <author id="ralph-weischedel"><first>Ralph M.</first><last>Weischedel</last></author>
       <author><first>John E.</first><last>Black</last></author>
       <pages>97-109</pages>
       <url hash="f1098c49">J80-2003</url>
@@ -128,7 +128,7 @@
     </paper>
     <paper id="6">
       <title>Meaning and Discourse - A Computer Model of Psychoanalytic Speech and Cognition</title>
-      <author><first>John Henry</first><last>Clippinger, Jr.</last></author>
+      <author id="john-h-clippinger-jr"><first>John Henry</first><last>Clippinger, Jr.</last></author>
       <url hash="ab2ca72e">J80-2006</url>
       <bibkey>clippinger-jr-1980-meaning</bibkey>
     </paper>
@@ -192,7 +192,7 @@
     </paper>
     <paper id="3">
       <title>A Plan-Based Analysis of Indirect Speech Act</title>
-      <author><first>C. Raymond</first><last>Perrault</last></author>
+      <author id="c-raymond-perrault"><first>C. Raymond</first><last>Perrault</last></author>
       <pages>167-182</pages>
       <url hash="9be3c71e">J80-3003</url>
       <bibkey>perrault-1980-plan</bibkey>
diff --git a/data/xml/J81.xml b/data/xml/J81.xml
index 2b86bd89c7..ee25b477fe 100644
--- a/data/xml/J81.xml
+++ b/data/xml/J81.xml
@@ -22,7 +22,7 @@
     </paper>
     <paper id="2">
       <title>Computer Generation of Multiparagraph <fixed-case>E</fixed-case>nglish Text</title>
-      <author><first>William C.</first><last>Mann</last></author>
+      <author id="william-c-mann"><first>William C.</first><last>Mann</last></author>
       <author><first>James A.</first><last>Moore</last></author>
       <pages>17-29</pages>
       <url hash="68823732">J81-1002</url>
@@ -30,7 +30,7 @@
     </paper>
     <paper id="3">
       <title>Operating Statistics for the Transformational Question Answering System</title>
-      <author><first>Fred J.</first><last>Damerau</last></author>
+      <author id="fred-damerau"><first>Fred J.</first><last>Damerau</last></author>
       <pages>30-42</pages>
       <url hash="50382ba2">J81-1003</url>
       <bibkey>damerau-1981-operating</bibkey>
@@ -88,8 +88,8 @@
     </paper>
     <paper id="2">
       <title>Relaxation Techniques for Parsing Grammatically Ill-Formed Input in Natural Language Understanding Systems</title>
-      <author><first>Stan C.</first><last>Kwasny</last></author>
-      <author><first>Norman K.</first><last>Sondheimer</last></author>
+      <author id="stan-c-kwasny"><first>Stan C.</first><last>Kwasny</last></author>
+      <author id="norman-k-sondheimer"><first>Norman K.</first><last>Sondheimer</last></author>
       <pages>99-108</pages>
       <url hash="b6e416b4">J81-2002</url>
       <bibkey>kwasny-sondheimer-1981-relaxation</bibkey>
@@ -137,7 +137,7 @@
     </frontmatter>
     <paper id="1">
       <title>Formal Roles, Co-Descriptors, and the Representation of Quantified <fixed-case>E</fixed-case>nglish Expressions</title>
-      <author><first>William A.</first><last>Martin</last></author>
+      <author id="william-a-martin"><first>William A.</first><last>Martin</last></author>
       <pages>137-148</pages>
       <url hash="3bff318f">J81-3001</url>
       <bibkey>martin-1981-formal</bibkey>
@@ -152,7 +152,7 @@
     <paper id="3">
       <title>Prospects for Computer-Assisted Dialect Adaptation</title>
       <author><first>David J.</first><last>Weber</last></author>
-      <author><first>William C.</first><last>Mann</last></author>
+      <author id="william-c-mann"><first>William C.</first><last>Mann</last></author>
       <pages>165-177</pages>
       <url hash="1adb511d">J81-3003</url>
       <bibkey>weber-mann-1981-prospects</bibkey>
@@ -165,7 +165,7 @@
     </paper>
     <paper id="5">
       <title>New <fixed-case>G</fixed-case>uinea and Neighboring Areas: A Sociolinguistic Laboratory</title>
-      <author><first>Karen</first><last>Jensen</last></author>
+      <author id="karen-jensen"><first>Karen</first><last>Jensen</last></author>
       <url hash="96c31191">J81-3005</url>
       <bibkey>jensen-1981-new</bibkey>
     </paper>
@@ -215,7 +215,7 @@
     </frontmatter>
     <paper id="1">
       <title>Focusing for Interpretation of Pronouns</title>
-      <author><first>Candace L.</first><last>Sidner</last></author>
+      <author id="candace-l-sidner"><first>Candace L.</first><last>Sidner</last></author>
       <pages>217-231</pages>
       <url hash="d1afe21d">J81-4001</url>
       <bibkey>sidner-1981-focusing</bibkey>
diff --git a/data/xml/J82.xml b/data/xml/J82.xml
index aec3798aa4..696fb6e738 100644
--- a/data/xml/J82.xml
+++ b/data/xml/J82.xml
@@ -15,7 +15,7 @@
     </frontmatter>
     <paper id="1">
       <title>Phrase Structure Trees Bear More Fruit than You Would Have Thought</title>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
       <author><first>Leon S.</first><last>Levy</last></author>
       <pages>1-11</pages>
       <url hash="f421715b">J82-1001</url>
@@ -23,14 +23,14 @@
     </paper>
     <paper id="2">
       <title>Generalized <fixed-case>A</fixed-case>ugmented <fixed-case>T</fixed-case>ransition <fixed-case>N</fixed-case>etwork <fixed-case>G</fixed-case>rammars for Generation from Semantic Networks</title>
-      <author><first>Stuart C.</first><last>Shapiro</last></author>
+      <author id="stuart-c-shapiro"><first>Stuart C.</first><last>Shapiro</last></author>
       <pages>12-26</pages>
       <url hash="2b05aa66">J82-1002</url>
       <bibkey>shapiro-1982-generalized</bibkey>
     </paper>
     <paper id="3">
       <title>From <fixed-case>E</fixed-case>nglish to Logic: Context-Free Computation of ‘Conventional’ Logical Translation</title>
-      <author><first>Lenhart K.</first><last>Schubert</last></author>
+      <author id="lenhart-schubert"><first>Lenhart K.</first><last>Schubert</last></author>
       <author><first>Francis Jeffry</first><last>Pelletier</last></author>
       <pages>27-44</pages>
       <url hash="69964b35">J82-1003</url>
@@ -79,28 +79,28 @@
     <paper id="1">
       <title>Applied Computational Linguistics in Perspective: Proceedings of the Workshop</title>
       <author><first>Carroll</first><last>Johnson</last></author>
-      <author><first>Joan</first><last>Bachenko</last></author>
+      <author id="joan-bachenko"><first>Joan</first><last>Bachenko</last></author>
       <pages>55-84</pages>
       <url hash="a8e4dcde">J82-2001</url>
       <bibkey>johnson-bachenko-1982-applied</bibkey>
     </paper>
     <paper id="2">
       <title>Natural-Language Interface</title>
-      <author><first>Gary G.</first><last>Hendrix</last></author>
+      <author id="gary-g-hendrix"><first>Gary G.</first><last>Hendrix</last></author>
       <pages>56-61</pages>
       <url hash="cc182454">J82-2002</url>
       <bibkey>hendrix-1982-natural</bibkey>
     </paper>
     <paper id="3">
       <title>Text Generation</title>
-      <author><first>William</first><last>Mann</last></author>
+      <author id="william-c-mann"><first>William</first><last>Mann</last></author>
       <pages>62-69</pages>
       <url hash="16d9ade4">J82-2003</url>
       <bibkey>mann-1982-text</bibkey>
     </paper>
     <paper id="4">
       <title>Concept Extraction</title>
-      <author><first>Christine</first><last>Montgomery</last></author>
+      <author id="christine-a-montgomery"><first>Christine</first><last>Montgomery</last></author>
       <pages>70-73</pages>
       <url hash="5d9139a1">J82-2004</url>
       <bibkey>montgomery-1982-concept</bibkey>
@@ -114,7 +114,7 @@
     </paper>
     <paper id="6">
       <title>Sublanguages</title>
-      <author><first>Richard</first><last>Kittredge</last></author>
+      <author id="richard-kittredge"><first>Richard</first><last>Kittredge</last></author>
       <pages>79-84</pages>
       <url hash="8e060d2a">J82-2006</url>
       <bibkey>kittredge-1982-sublanguages</bibkey>
@@ -170,15 +170,15 @@
     </frontmatter>
     <paper id="1">
       <title>Computational Complexity and <fixed-case>L</fixed-case>exical-<fixed-case>F</fixed-case>unctional <fixed-case>G</fixed-case>rammar</title>
-      <author><first>Robert C.</first><last>Berwick</last></author>
+      <author id="robert-c-berwick"><first>Robert C.</first><last>Berwick</last></author>
       <pages>97-109</pages>
       <url hash="ee81a91f">J82-3001</url>
       <bibkey>berwick-1982-computational</bibkey>
     </paper>
     <paper id="2">
       <title>An Efficient Easily Adaptable System for Interpreting Natural Language Queries</title>
-      <author><first>David H.D.</first><last>Warren</last></author>
-      <author><first>Fernando C.N.</first><last>Pereira</last></author>
+      <author id="david-h-d-warren"><first>David H.D.</first><last>Warren</last></author>
+      <author id="fernando-c-n-pereira"><first>Fernando C.N.</first><last>Pereira</last></author>
       <pages>110-122</pages>
       <url hash="36ea64fa">J82-3002</url>
       <bibkey>warren-pereira-1982-efficient</bibkey>
@@ -193,7 +193,7 @@
     </paper>
     <paper id="4">
       <title>Coping with Syntactic Ambiguity or How to Put the Block in the Box on the Table</title>
-      <author><first>Kenneth</first><last>Church</last></author>
+      <author id="kenneth-church"><first>Kenneth</first><last>Church</last></author>
       <author><first>Ramesh</first><last>Patil</last></author>
       <pages>139-149</pages>
       <url hash="5801978e">J82-3004</url>
diff --git a/data/xml/J83.xml b/data/xml/J83.xml
index 8ab53605b5..d9c3c99b58 100644
--- a/data/xml/J83.xml
+++ b/data/xml/J83.xml
@@ -15,7 +15,7 @@
     </frontmatter>
     <paper id="1">
       <title>Paraphrasing Questions Using Given and new information</title>
-      <author><first>Kathleen R.</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen R.</first><last>McKeown</last></author>
       <pages>1-10</pages>
       <url hash="83c7e397">J83-1001</url>
       <bibkey>mckeown-1983-paraphrasing</bibkey>
@@ -30,7 +30,7 @@
     </paper>
     <paper id="3">
       <title>Questioning the Need for Parsing Ill-formed Inputs</title>
-      <author><first>Linda</first><last>Fineman</last></author>
+      <author id="linda-fineman"><first>Linda</first><last>Fineman</last></author>
       <pages>22-23</pages>
       <url hash="745585aa">J83-1003</url>
       <bibkey>fineman-1983-questioning</bibkey>
@@ -99,14 +99,14 @@
     <paper id="2">
       <title>Treating Coordination in Logic Grammars</title>
       <author><first>Veronica</first><last>Dahl</last></author>
-      <author><first>Michael C.</first><last>McCord</last></author>
+      <author id="michael-c-mccord"><first>Michael C.</first><last>McCord</last></author>
       <pages>69-91</pages>
       <url hash="ddb65952">J83-2002</url>
       <bibkey>dahl-mccord-1983-treating</bibkey>
     </paper>
     <paper id="3">
       <title>Letters to the Editor: Re <fixed-case>B</fixed-case>allard on the Need for Careful Description</title>
-      <author><first>Karen</first><last>Sparck Jones</last></author>
+      <author id="karen-sparck-jones"><first>Karen</first><last>Sparck Jones</last></author>
       <url hash="39c88416">J83-2003</url>
       <bibkey>sparck-jones-1983-letters</bibkey>
     </paper>
@@ -117,7 +117,7 @@
     </paper>
     <paper id="5">
       <title>Directory of Graduate Programs in Computational Linguistics</title>
-      <author><first>Martha</first><last>Evens</last></author>
+      <author id="martha-evens"><first>Martha</first><last>Evens</last></author>
       <author><first>Lauri</first><last>Karttunen</last></author>
       <url hash="9a203bd1">J83-2005</url>
       <bibkey>evens-karttunen-1983-directory</bibkey>
@@ -158,7 +158,7 @@
     </frontmatter>
     <paper id="1">
       <title>Recovery Strategies for Parsing Extragrammatical Language</title>
-      <author><first>Jaime G.</first><last>Carbonell</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime G.</first><last>Carbonell</last></author>
       <author><first>Philip J.</first><last>Hayes</last></author>
       <pages>123-146</pages>
       <url hash="ec9dadae">J83-3001</url>
@@ -176,8 +176,8 @@
     </paper>
     <paper id="3">
       <title>Meta-rules as a Basis for Processing Ill-Formed input</title>
-      <author><first>Ralph M.</first><last>Weischedel</last></author>
-      <author><first>Norman K.</first><last>Sondheimer</last></author>
+      <author id="ralph-weischedel"><first>Ralph M.</first><last>Weischedel</last></author>
+      <author id="norman-k-sondheimer"><first>Norman K.</first><last>Sondheimer</last></author>
       <pages>161-177</pages>
       <url hash="7758f5c4">J83-3003</url>
       <bibkey>weischedel-sondheimer-1983-meta</bibkey>
@@ -185,7 +185,7 @@
     <paper id="4">
       <title>Preference Semantics, Ill-Formedness, and Metaphor</title>
       <author><first>Dan</first><last>Fass</last></author>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <pages>178-187</pages>
       <url hash="2cf207df">J83-3004</url>
       <bibkey>fass-wilks-1983-preference</bibkey>
@@ -199,7 +199,7 @@
     </paper>
     <paper id="6">
       <title>Re <fixed-case>S</fixed-case>parck <fixed-case>J</fixed-case>ones Re <fixed-case>B</fixed-case>allard on the Need for Careful Description</title>
-      <author><first>Bruce</first><last>Ballard</last></author>
+      <author id="bruce-w-ballard"><first>Bruce</first><last>Ballard</last></author>
       <pages>197-198</pages>
       <url hash="1c8c3213">J83-3006</url>
       <bibkey>ballard-1983-sparck</bibkey>
diff --git a/data/xml/J84.xml b/data/xml/J84.xml
index 87cef42a0f..f2a00daa54 100644
--- a/data/xml/J84.xml
+++ b/data/xml/J84.xml
@@ -29,7 +29,7 @@
     </paper>
     <paper id="3">
       <title>Book Review: Principles of Computer Speech</title>
-      <author><first>John</first><last>Thomas</last></author>
+      <author id="john-c-thomas"><first>John</first><last>Thomas</last></author>
       <url hash="866520d3">J84-1003</url>
       <bibkey>thomas-1984-book</bibkey>
     </paper>
@@ -93,7 +93,7 @@
     </frontmatter>
     <paper id="1">
       <title>A Phrase-Structured Grammatical Framework for Transportable Natural Language Processing</title>
-      <author><first>Bruce W.</first><last>Ballard</last></author>
+      <author id="bruce-w-ballard"><first>Bruce W.</first><last>Ballard</last></author>
       <author><first>Nancy L.</first><last>Tinkham</last></author>
       <pages>81-96</pages>
       <url hash="3283a139">J84-2001</url>
@@ -101,14 +101,14 @@
     </paper>
     <paper id="2">
       <title>The Pragmatics of Referring and the Modality of Communication</title>
-      <author><first>Philip R.</first><last>Cohen</last></author>
+      <author id="philip-r-cohen"><first>Philip R.</first><last>Cohen</last></author>
       <pages>97-146</pages>
       <url hash="20a69178">J84-2002</url>
       <bibkey>cohen-1984-pragmatics</bibkey>
     </paper>
     <paper id="3">
       <title>On the <fixed-case>F</fixed-case>ass and <fixed-case>W</fixed-case>ilks Proposal to Use “Polysemy Rules”</title>
-      <author><first>David M.</first><last>Carter</last></author>
+      <author id="david-carter"><first>David M.</first><last>Carter</last></author>
       <pages>147-148</pages>
       <url hash="56627c7e">J84-2003</url>
       <bibkey>carter-1984-fass</bibkey>
@@ -169,7 +169,7 @@
     </frontmatter>
     <paper id="1">
       <title>On the Mathematical Properties of Linguistic Theories</title>
-      <author><first>C. Raymond</first><last>Perrault</last></author>
+      <author id="c-raymond-perrault"><first>C. Raymond</first><last>Perrault</last></author>
       <pages>165-176</pages>
       <url hash="a39a053e">J84-3001</url>
       <bibkey>perrault-1984-mathematical</bibkey>
@@ -177,21 +177,21 @@
     <paper id="2">
       <title><fixed-case>E</fixed-case>nglish and the Class of Context-Free Languages</title>
       <author><first>Paul M.</first><last>Postal</last></author>
-      <author><first>D. Terence</first><last>Langendoen</last></author>
+      <author id="d-terence-langendoen"><first>D. Terence</first><last>Langendoen</last></author>
       <pages>177-181</pages>
       <url hash="ee0f120d">J84-3002</url>
       <bibkey>postal-langendoen-1984-english</bibkey>
     </paper>
     <paper id="3">
       <title>On Two Recent Attempts to Show that <fixed-case>E</fixed-case>nglish Is Not a <fixed-case>CFL</fixed-case></title>
-      <author><first>Geoffrey K.</first><last>Pullum</last></author>
+      <author id="geoffrey-k-pullum"><first>Geoffrey K.</first><last>Pullum</last></author>
       <pages>182-186</pages>
       <url hash="5f398bb4">J84-3003</url>
       <bibkey>pullum-1984-two</bibkey>
     </paper>
     <paper id="4">
       <title>Comments on <fixed-case>P</fixed-case>ullum’s Criticisms</title>
-      <author><first>D. Terence</first><last>Langendoen</last></author>
+      <author id="d-terence-langendoen"><first>D. Terence</first><last>Langendoen</last></author>
       <author><first>Paul M.</first><last>Postal</last></author>
       <pages>186-188</pages>
       <url hash="09956278">J84-3004</url>
@@ -199,14 +199,14 @@
     </paper>
     <paper id="5">
       <title>Strong Generative Capacity, Weak Generative Capacity, and Modern Linguistic Theories</title>
-      <author><first>Robert C.</first><last>Berwick</last></author>
+      <author id="robert-c-berwick"><first>Robert C.</first><last>Berwick</last></author>
       <pages>189-202</pages>
       <url hash="e6dd4dab">J84-3005</url>
       <bibkey>berwick-1984-strong</bibkey>
     </paper>
     <paper id="6">
       <title>Book Review: A Grammar of <fixed-case>E</fixed-case>nglish on Mathematical Principles</title>
-      <author><first>Bruce E.</first><last>Nevin</last></author>
+      <author id="bruce-e-nevin"><first>Bruce E.</first><last>Nevin</last></author>
       <url hash="2a2bb290">J84-3006</url>
       <bibkey>nevin-1984-book</bibkey>
     </paper>
diff --git a/data/xml/J85.xml b/data/xml/J85.xml
index be47323488..9d0fd024b3 100644
--- a/data/xml/J85.xml
+++ b/data/xml/J85.xml
@@ -22,22 +22,22 @@
     <paper id="2">
       <title>Taum-Aviation: Its Technical Features and Some Experimental Results</title>
       <author><first>Pierre</first><last>Isabelle</last></author>
-      <author><first>Laurent</first><last>Bourbeau</last></author>
+      <author id="laurent-bourbeau"><first>Laurent</first><last>Bourbeau</last></author>
       <pages>18-27</pages>
       <url hash="7b7b8971">J85-1002</url>
       <bibkey>isabelle-bourbeau-1985-taum</bibkey>
     </paper>
     <paper id="3">
       <title>Automated Translation at Grenoble University</title>
-      <author><first>Bernard</first><last>Vauquois</last></author>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="bernard-vauquois"><first>Bernard</first><last>Vauquois</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <pages>28-36</pages>
       <url hash="7399c1fc">J85-1003</url>
       <bibkey>vauquois-boitet-1985-automated</bibkey>
     </paper>
     <paper id="4">
       <title>Book Review: THE LOGIC OF MIND</title>
-      <author><first>David</first><last>Israel</last></author>
+      <author id="david-israel"><first>David</first><last>Israel</last></author>
       <url hash="c82ff819">J85-1004</url>
       <bibkey>israel-1985-book</bibkey>
     </paper>
@@ -96,9 +96,9 @@
     </frontmatter>
     <paper id="1">
       <title>The <fixed-case>J</fixed-case>apanese Government Project for Machine Translation</title>
-      <author><first>Makoto</first><last>Nagao</last></author>
-      <author><first>Jun-ichi</first><last>Tsujii</last></author>
-      <author><first>Jun-ichi</first><last>Nakamura</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
+      <author id="junichi-tsujii"><first>Jun-ichi</first><last>Tsujii</last></author>
+      <author id="jun-ichi-nakamura"><first>Jun-ichi</first><last>Nakamura</last></author>
       <pages>91-110</pages>
       <url hash="21b2cd36">J85-2001</url>
       <bibkey>nagao-etal-1985-japanese</bibkey>
@@ -133,7 +133,7 @@
       <title><fixed-case>EUROTRA</fixed-case>: A Multilingual System under Development</title>
       <author><first>Rod</first><last>Johnson</last></author>
       <author><first>Maghi</first><last>King</last></author>
-      <author><first>Louis</first><last>des Tombe</last></author>
+      <author id="louis-des-tombe"><first>Louis</first><last>des Tombe</last></author>
       <pages>155-169</pages>
       <url hash="3780c148">J85-2005</url>
       <bibkey>johnson-etal-1985-eurotra</bibkey>
@@ -206,14 +206,14 @@
     </frontmatter>
     <paper id="1">
       <title>On the Complexity of <fixed-case>ID</fixed-case>/<fixed-case>LP</fixed-case> Parsing</title>
-      <author><first>G. Edward</first><last>Barton, Jr.</last></author>
+      <author id="g-edward-barton"><first>G. Edward</first><last>Barton, Jr.</last></author>
       <pages>205-218</pages>
       <url hash="5ea94328">J85-4001</url>
       <bibkey>barton-jr-1985-complexity</bibkey>
     </paper>
     <paper id="2">
       <title><fixed-case>PHRED</fixed-case>: A Generator for Natural Language Interfaces</title>
-      <author><first>Paul S.</first><last>Jacobs</last></author>
+      <author id="paul-s-jacobs"><first>Paul S.</first><last>Jacobs</last></author>
       <pages>219-242</pages>
       <url hash="9d246899">J85-4002</url>
       <bibkey>jacobs-1985-phred</bibkey>
@@ -233,7 +233,7 @@
     </paper>
     <paper id="5">
       <title>Information Retrieval Experiment</title>
-      <author><first>Martha</first><last>Evens</last></author>
+      <author id="martha-evens"><first>Martha</first><last>Evens</last></author>
       <url hash="df335b24">J85-4005</url>
       <bibkey>evens-1985-information</bibkey>
     </paper>
diff --git a/data/xml/J86.xml b/data/xml/J86.xml
index e4ead36f6a..c45d42657d 100644
--- a/data/xml/J86.xml
+++ b/data/xml/J86.xml
@@ -14,15 +14,15 @@
     </frontmatter>
     <paper id="1">
       <title>Resolving Lexical Ambiguity in a Deterministic Parser</title>
-      <author><first>Robert</first><last>Milne</last></author>
+      <author id="robert-milne"><first>Robert</first><last>Milne</last></author>
       <pages>1-12</pages>
       <url hash="74fed9e1">J86-1001</url>
       <bibkey>milne-1986-resolving</bibkey>
     </paper>
     <paper id="2">
       <title>The Correction of Ill-Formed Input Using History-Based Expectation with Applications to Speech Understanding</title>
-      <author><first>Pamela E.</first><last>Fink</last></author>
-      <author><first>Alan W.</first><last>Biermann</last></author>
+      <author id="pamela-e-fink"><first>Pamela E.</first><last>Fink</last></author>
+      <author id="alan-w-biermann"><first>Alan W.</first><last>Biermann</last></author>
       <pages>13-36</pages>
       <url hash="fb5c6951">J86-1002</url>
       <bibkey>fink-biermann-1986-correction</bibkey>
@@ -36,13 +36,13 @@
     </paper>
     <paper id="4">
       <title>Book Reviews: Surface Compositional Grammar</title>
-      <author><first>Alain</first><last>Polguère</last></author>
+      <author id="alain-polguere"><first>Alain</first><last>Polguère</last></author>
       <url hash="727c3472">J86-1004</url>
       <bibkey>polguere-1986-book</bibkey>
     </paper>
     <paper id="5">
       <title>Book Reviews: Talking Minds</title>
-      <author><first>Helen</first><last>Gigley</last></author>
+      <author id="helen-m-gigley"><first>Helen</first><last>Gigley</last></author>
       <url hash="2462d587">J86-1005</url>
       <bibkey>gigley-1986-book</bibkey>
     </paper>
@@ -54,7 +54,7 @@
     </paper>
     <paper id="7">
       <title>The <fixed-case>F</fixed-case>inite <fixed-case>S</fixed-case>tring Newsletter</title>
-      <author><first>Bernard</first><last>Vauquois</last></author>
+      <author id="bernard-vauquois"><first>Bernard</first><last>Vauquois</last></author>
       <url hash="8c88db66">J86-1007</url>
       <bibkey>vauquois-1986-finite</bibkey>
     </paper>
@@ -95,7 +95,7 @@
     </paper>
     <paper id="15">
       <title>Three Titles from the <fixed-case>C</fixed-case>ambridge Series: <fixed-case>S</fixed-case>TUDIES IN <fixed-case>N</fixed-case>ATURAL <fixed-case>L</fixed-case>ANGUAGE <fixed-case>P</fixed-case>ROCESSING</title>
-      <author><first>Aravind</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
       <url hash="35018049">J86-1015</url>
       <bibkey>joshi-1986-three</bibkey>
     </paper>
@@ -121,9 +121,9 @@
     </paper>
     <paper id="2">
       <title>Summarizing Natural Language Database Responses</title>
-      <author><first>Jugal K.</first><last>Kalita</last></author>
+      <author id="jugal-kalita"><first>Jugal K.</first><last>Kalita</last></author>
       <author><first>Marlene L.</first><last>Jones</last></author>
-      <author><first>Gordon I.</first><last>McCalla</last></author>
+      <author id="gordon-i-mccalla"><first>Gordon I.</first><last>McCalla</last></author>
       <pages>107-124</pages>
       <url hash="114a63b0">J86-2002</url>
       <bibkey>kalita-etal-1986-summarizing</bibkey>
@@ -143,7 +143,7 @@
     </paper>
     <paper id="5">
       <title>Book Reviews: Communicating with Databases in Natural Language</title>
-      <author><first>Stan</first><last>Kwasny</last></author>
+      <author id="stan-c-kwasny"><first>Stan</first><last>Kwasny</last></author>
       <url hash="ce93dbd6">J86-2005</url>
       <bibkey>kwasny-1986-book</bibkey>
     </paper>
@@ -208,17 +208,17 @@
     </frontmatter>
     <paper id="1">
       <title>Attention, Intentions, and the Structure of Discourse</title>
-      <author><first>Barbara J.</first><last>Grosz</last></author>
-      <author><first>Candace L.</first><last>Sidner</last></author>
+      <author id="barbara-j-grosz"><first>Barbara J.</first><last>Grosz</last></author>
+      <author id="candace-l-sidner"><first>Candace L.</first><last>Sidner</last></author>
       <pages>175-204</pages>
       <url hash="b7d85d2d">J86-3001</url>
       <bibkey>grosz-sidner-1986-attention</bibkey>
     </paper>
     <paper id="2">
       <title>Discovery Procedures for Sublanguage Selectional Patterns: Initial Experiments</title>
-      <author><first>Ralph</first><last>Grishman</last></author>
-      <author><first>Lynette</first><last>Hirschman</last></author>
-      <author><first>Ngo Thanh</first><last>Nhan</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
+      <author id="lynette-hirschman"><first>Lynette</first><last>Hirschman</last></author>
+      <author id="ngo-thanh-nhan"><first>Ngo Thanh</first><last>Nhan</last></author>
       <pages>205-215</pages>
       <url hash="25cdaa4a">J86-3002</url>
       <bibkey>grishman-etal-1986-discovery</bibkey>
@@ -243,7 +243,7 @@
     </paper>
     <paper id="6">
       <title>The <fixed-case>F</fixed-case>inite <fixed-case>S</fixed-case>tring Newsletter: Site Report: Another From the <fixed-case>DARPA</fixed-case> Series, Overview of the <fixed-case>TACITUS</fixed-case> Project</title>
-      <author><first>Jerry R.</first><last>Hobbs</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry R.</first><last>Hobbs</last></author>
       <url hash="9567c93d">J86-3006</url>
       <bibkey>hobbs-1986-finite</bibkey>
     </paper>
@@ -287,8 +287,8 @@
     </frontmatter>
     <paper id="1">
       <title>Associative Model of Morphological Analysis: An Empirical Inquiry</title>
-      <author><first>Harri</first><last>Jäppinen</last></author>
-      <author><first>Matti</first><last>Ylilammi</last></author>
+      <author id="harri-jappinen"><first>Harri</first><last>Jäppinen</last></author>
+      <author id="matti-ylilammi"><first>Matti</first><last>Ylilammi</last></author>
       <pages>257-272</pages>
       <url hash="88ba0dc7">J86-4001</url>
       <bibkey>jappinen-ylilammi-1986-associative</bibkey>
@@ -309,13 +309,13 @@
     </paper>
     <paper id="4">
       <title>Book Reviews: Natural Language Computing: The Commercial Applications</title>
-      <author><first>Mark</first><last>Jones</last></author>
+      <author id="mark-jones"><first>Mark</first><last>Jones</last></author>
       <url hash="0d0e91e2">J86-4004</url>
       <bibkey>jones-1986-book</bibkey>
     </paper>
     <paper id="5">
       <title><fixed-case>B</fixed-case>oolean Semantics for Natural Language</title>
-      <author><first>Lawrence</first><last>Moss</last></author>
+      <author id="lawrence-s-moss"><first>Lawrence</first><last>Moss</last></author>
       <url hash="c911459f">J86-4005</url>
       <bibkey>moss-1986-boolean</bibkey>
     </paper>
diff --git a/data/xml/J87.xml b/data/xml/J87.xml
index 79ffded0cf..ec027cc1b9 100644
--- a/data/xml/J87.xml
+++ b/data/xml/J87.xml
@@ -13,7 +13,7 @@
     </frontmatter>
     <paper id="1">
       <title>Restricting Logic Grammars with Government-Binding Theory</title>
-      <author><first>Edward P.</first><last>Stabler, Jr.</last></author>
+      <author id="edward-stabler"><first>Edward P.</first><last>Stabler, Jr.</last></author>
       <pages>1-10</pages>
       <url hash="2f308073">J87-1001</url>
       <bibkey>stabler-jr-1987-restricting</bibkey>
@@ -27,8 +27,8 @@
     </paper>
     <paper id="3">
       <title>Simultaneous-Distributive Coordination and Context-Freeness</title>
-      <author><first>Michael B.</first><last>Kac</last></author>
-      <author><first>Alexis</first><last>Manaster-Ramer</last></author>
+      <author id="michael-b-kac"><first>Michael B.</first><last>Kac</last></author>
+      <author id="alexis-manaster-ramer"><first>Alexis</first><last>Manaster-Ramer</last></author>
       <author><first>William C.</first><last>Rounds</last></author>
       <pages>25-30</pages>
       <url hash="e92e36b5">J87-1003</url>
@@ -36,30 +36,30 @@
     </paper>
     <paper id="4">
       <title>An Efficient Augmented-Context-Free Parsing Algorithm</title>
-      <author><first>Masaru</first><last>Tomita</last></author>
+      <author id="masaru-tomita"><first>Masaru</first><last>Tomita</last></author>
       <pages>31-46</pages>
       <url hash="5ae03f10">J87-1004</url>
       <bibkey>tomita-1987-efficient</bibkey>
     </paper>
     <paper id="5">
       <title>An Algorithm for Generating Quantifier Scopings</title>
-      <author><first>Jerry R.</first><last>Hobbs</last></author>
-      <author><first>Stuart M.</first><last>Shieber</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry R.</first><last>Hobbs</last></author>
+      <author id="stuart-m-shieber"><first>Stuart M.</first><last>Shieber</last></author>
       <pages>47-63</pages>
       <url hash="10a0ab2d">J87-1005</url>
       <bibkey>hobbs-shieber-1987-algorithm</bibkey>
     </paper>
     <paper id="6">
       <title>Subject-Verb Agreement in Respective Coordinations and Context Freeness</title>
-      <author><first>Alexis</first><last>Manaster-Ramer</last></author>
+      <author id="alexis-manaster-ramer"><first>Alexis</first><last>Manaster-Ramer</last></author>
       <pages>64-65</pages>
       <url hash="48cb1d04">J87-1006</url>
       <bibkey>manaster-ramer-1987-subject</bibkey>
     </paper>
     <paper id="7">
       <title>A Note on a Study of Cases</title>
-      <author><first>Karen</first><last>Sparck Jones</last></author>
-      <author><first>Branimir</first><last>Boguraev</last></author>
+      <author id="karen-sparck-jones"><first>Karen</first><last>Sparck Jones</last></author>
+      <author id="branimir-boguraev"><first>Branimir</first><last>Boguraev</last></author>
       <pages>65-68</pages>
       <url hash="3ae61560">J87-1007</url>
       <bibkey>sparck-jones-boguraev-1987-note</bibkey>
@@ -84,13 +84,13 @@
     </paper>
     <paper id="11">
       <title>Book Reviews: Electronic Synthesis of Speech</title>
-      <author><first>William M.</first><last>Fisher</last></author>
+      <author id="william-m-fisher"><first>William M.</first><last>Fisher</last></author>
       <url hash="039c1edd">J87-1011</url>
       <bibkey>fisher-1987-book</bibkey>
     </paper>
     <paper id="12">
       <title>Book Reviews: Readings in Knowledge Representation</title>
-      <author><first>Helen</first><last>Gigley</last></author>
+      <author id="helen-m-gigley"><first>Helen</first><last>Gigley</last></author>
       <url hash="fc9cef78">J87-1012</url>
       <bibkey>gigley-1987-book</bibkey>
     </paper>
@@ -102,7 +102,7 @@
     </paper>
     <paper id="14">
       <title>Book Reviews: Planning and Understanding: A Computational Approach to Human Reasoning</title>
-      <author><first>Harold</first><last>Somers</last></author>
+      <author id="harold-somers"><first>Harold</first><last>Somers</last></author>
       <url hash="d9f8c109">J87-1014</url>
       <bibkey>somers-1987-book</bibkey>
     </paper>
@@ -171,15 +171,15 @@
     </frontmatter>
     <paper id="1">
       <title>Processing Dictionary Definitions with Phrasal Pattern Hierarchies</title>
-      <author><first>Hiyan</first><last>Alshawi</last></author>
+      <author id="hiyan-alshawi"><first>Hiyan</first><last>Alshawi</last></author>
       <pages>195-202</pages>
       <url hash="ce589373">J87-3001</url>
       <bibkey>alshawi-1987-processing</bibkey>
     </paper>
     <paper id="2">
       <title>Large Lexicons for Natural Language Processing: Utilising the Grammar Coding System of <fixed-case>LDOCE</fixed-case></title>
-      <author><first>Bran</first><last>Boguraev</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="branimir-boguraev"><first>Bran</first><last>Boguraev</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <pages>203-218</pages>
       <url hash="96c2bf5c">J87-3002</url>
       <bibkey>boguraev-briscoe-1987-large</bibkey>
@@ -187,10 +187,10 @@
     <paper id="3">
       <title>Tools and Methods for Computational Linguistics</title>
       <author><first>Roy J.</first><last>Byrd</last></author>
-      <author><first>Nicoletta</first><last>Calzolari</last></author>
-      <author><first>Martin S.</first><last>Chodorow</last></author>
-      <author><first>Judith L.</first><last>Klavans</last></author>
-      <author><first>Mary S.</first><last>Neff</last></author>
+      <author id="nicoletta-calzolari"><first>Nicoletta</first><last>Calzolari</last></author>
+      <author id="martin-chodorow"><first>Martin S.</first><last>Chodorow</last></author>
+      <author id="judith-l-klavans"><first>Judith L.</first><last>Klavans</last></author>
+      <author id="mary-s-neff"><first>Mary S.</first><last>Neff</last></author>
       <author><first>Omneya A.</first><last>Rizk</last></author>
       <pages>219-240</pages>
       <url hash="08ac6819">J87-3003</url>
@@ -198,7 +198,7 @@
     </paper>
     <paper id="4">
       <title>Commonsense Metaphysics and Lexical Semantics</title>
-      <author><first>Jerry R.</first><last>Hobbs</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry R.</first><last>Hobbs</last></author>
       <author><first>William</first><last>Croft</last></author>
       <author><first>Todd</first><last>Davies</last></author>
       <author><first>Douglas</first><last>Edwards</last></author>
@@ -209,7 +209,7 @@
     </paper>
     <paper id="5">
       <title>Disambiguating Prepositional Phrase Attachments by Using On-Line Dictionary Definitions</title>
-      <author><first>Karen</first><last>Jensen</last></author>
+      <author id="karen-jensen"><first>Karen</first><last>Jensen</last></author>
       <author><first>Jean-Louis</first><last>Binot</last></author>
       <pages>251-260</pages>
       <url hash="33367850">J87-3005</url>
@@ -217,15 +217,15 @@
     </paper>
     <paper id="6">
       <title>A Formal Lexicon in Meaning-Text Theory (Or How to Do Lexica with Words)</title>
-      <author><first>Igor</first><last>Mel’čuk</last></author>
-      <author><first>Alain</first><last>Polguere</last></author>
+      <author id="igor-melcuk"><first>Igor</first><last>Mel’čuk</last></author>
+      <author id="alain-polguere"><first>Alain</first><last>Polguere</last></author>
       <pages>261-275</pages>
       <url hash="a1af625a">J87-3006</url>
       <bibkey>melcuk-polguere-1987-formal</bibkey>
     </paper>
     <paper id="7">
       <title>The Subworld Concept Lexicon and the Lexicon Management System</title>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <author><first>Victor</first><last>Raskin</last></author>
       <pages>276-289</pages>
       <url hash="e55926ed">J87-3007</url>
@@ -233,10 +233,10 @@
     </paper>
     <paper id="8">
       <title>A Computational Framework for Lexical Description</title>
-      <author><first>Graeme D.</first><last>Ritchie</last></author>
-      <author><first>Stephen G.</first><last>Pulman</last></author>
-      <author><first>Alan W.</first><last>Black</last></author>
-      <author><first>Graham J.</first><last>Russell</last></author>
+      <author id="graeme-ritchie"><first>Graeme D.</first><last>Ritchie</last></author>
+      <author id="stephen-pulman"><first>Stephen G.</first><last>Pulman</last></author>
+      <author id="alan-w-black"><first>Alan W.</first><last>Black</last></author>
+      <author id="graham-russell"><first>Graham J.</first><last>Russell</last></author>
       <pages>290-307</pages>
       <url hash="3f2fd076">J87-3008</url>
       <bibkey>ritchie-etal-1987-computational</bibkey>
@@ -244,7 +244,7 @@
     <paper id="9">
       <title>The Self-Extending Phrasal Lexicon</title>
       <author><first>Uri</first><last>Zernik</last></author>
-      <author><first>Michael G.</first><last>Dyer</last></author>
+      <author id="michael-g-dyer"><first>Michael G.</first><last>Dyer</last></author>
       <pages>308-327</pages>
       <url hash="4e9df60d">J87-3009</url>
       <bibkey>zernik-dyer-1987-self</bibkey>
diff --git a/data/xml/J88.xml b/data/xml/J88.xml
index 53bad4fbd1..736a1d5952 100644
--- a/data/xml/J88.xml
+++ b/data/xml/J88.xml
@@ -15,7 +15,7 @@
     <paper id="1">
       <title>Category Structures</title>
       <author><first>Gerald</first><last>Gazdar</last></author>
-      <author><first>Geoffrey K.</first><last>Pullum</last></author>
+      <author id="geoffrey-k-pullum"><first>Geoffrey K.</first><last>Pullum</last></author>
       <author><first>Robert</first><last>Carpenter</last></author>
       <author><first>Ewan</first><last>Klein</last></author>
       <author><first>Thomas E.</first><last>Hukari</last></author>
@@ -59,7 +59,7 @@
     </paper>
     <paper id="7">
       <title>Book Reviews: Computer Speech Processing</title>
-      <author><first>John C.</first><last>Thomas</last></author>
+      <author id="john-c-thomas"><first>John C.</first><last>Thomas</last></author>
       <url hash="d819721a">J88-1007</url>
       <bibkey>thomas-1988-book</bibkey>
     </paper>
@@ -71,7 +71,7 @@
     </paper>
     <paper id="9">
       <title>Book Reviews: Machine Translation: Theoretical and Methodological Issues</title>
-      <author><first>Harold</first><last>Somers</last></author>
+      <author id="harold-somers"><first>Harold</first><last>Somers</last></author>
       <url hash="56e4e727">J88-1009</url>
       <bibkey>somers-1988-book</bibkey>
     </paper>
@@ -115,14 +115,14 @@
     </frontmatter>
     <paper id="1">
       <title>Foreword to Special Issue on Tense and Aspect</title>
-      <author><first>Bonnie Lynn</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie Lynn</first><last>Webber</last></author>
       <pages>1-2</pages>
       <url hash="d15e3f5a">J88-2001</url>
       <bibkey>webber-1988-foreword</bibkey>
     </paper>
     <paper id="2">
       <title>Tense, Quantifiers, and Contexts</title>
-      <author><first>Erhard W.</first><last>Hinrichs</last></author>
+      <author id="erhard-hinrichs"><first>Erhard W.</first><last>Hinrichs</last></author>
       <pages>3-14</pages>
       <url hash="bbfb045a">J88-2002</url>
       <bibkey>hinrichs-1988-tense</bibkey>
@@ -130,7 +130,7 @@
     <paper id="3">
       <title>Temporal Ontology and Temporal Reference</title>
       <author><first>Marc</first><last>Moens</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>15-28</pages>
       <url hash="6947c95e">J88-2003</url>
       <bibkey>moens-steedman-1988-temporal</bibkey>
@@ -144,14 +144,14 @@
     </paper>
     <paper id="5">
       <title>A Computational Model of the Semantics of Tense and Aspect</title>
-      <author><first>Rebecca J.</first><last>Passonneau</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca J.</first><last>Passonneau</last></author>
       <pages>44-60</pages>
       <url hash="d7481439">J88-2005</url>
       <bibkey>passonneau-1988-computational</bibkey>
     </paper>
     <paper id="6">
       <title>Tense as Discourse Anaphor</title>
-      <author><first>Bonnie Lynn</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie Lynn</first><last>Webber</last></author>
       <pages>61-73</pages>
       <url hash="0609460f">J88-2006</url>
       <bibkey>webber-1988-tense</bibkey>
@@ -176,13 +176,13 @@
     </paper>
     <paper id="10">
       <title>Book Reviews: <fixed-case>J</fixed-case>apanese Phrase Structure Grammar: A Unification-Based Approach</title>
-      <author><first>Pete</first><last>Whitelock</last></author>
+      <author id="pete-whitelock"><first>Pete</first><last>Whitelock</last></author>
       <url hash="68b8fa06">J88-2010</url>
       <bibkey>whitelock-1988-book</bibkey>
     </paper>
     <paper id="11">
       <title>Book Reviews: <fixed-case>P</fixed-case>rolog and Natural-Language Analysis</title>
-      <author><first>Patrick</first><last>Saint-Dizier</last></author>
+      <author id="patrick-saint-dizier"><first>Patrick</first><last>Saint-Dizier</last></author>
       <url hash="afcd40bb">J88-2011</url>
       <bibkey>saint-dizier-1988-book</bibkey>
     </paper>
@@ -234,23 +234,23 @@
     <paper id="2">
       <title>Modeling the User in Natural Language Systems</title>
       <author><first>Robert</first><last>Kass</last></author>
-      <author><first>Tim</first><last>Finin</last></author>
+      <author id="tim-finin"><first>Tim</first><last>Finin</last></author>
       <pages>5-22</pages>
       <url hash="363cce28">J88-3002</url>
       <bibkey>kass-finin-1988-modeling</bibkey>
     </paper>
     <paper id="3">
       <title>Modeling the User’s Plans and Goals</title>
-      <author><first>Sandra</first><last>Carberry</last></author>
+      <author id="sandra-carberry"><first>Sandra</first><last>Carberry</last></author>
+      <author><first>Margot</first><last>Flowers</last></author>
       <pages>23-37</pages>
       <url hash="495c95ae">J88-3003</url>
-      <author><first>Margot</first><last>Flowers</last></author>
       <bibkey>carberry-flowers-1988-modeling</bibkey>
     </paper>
     <paper id="4">
       <title>Recognizing and Responding to Plan-Oriented Misconceptions</title>
       <author><first>Alex</first><last>Quilici</last></author>
-      <author><first>Michael G.</first><last>Dyer</last></author>
+      <author id="michael-g-dyer"><first>Michael G.</first><last>Dyer</last></author>
       <author><first>Margot</first><last>Flowers</last></author>
       <pages>38-51</pages>
       <url hash="e7554cbf">J88-3004</url>
@@ -258,14 +258,14 @@
     </paper>
     <paper id="5">
       <title>Reasoning on a Highlighted User Model to Respond to Misconceptions</title>
-      <author><first>Kathleen F.</first><last>McCoy</last></author>
+      <author id="kathleen-f-mccoy"><first>Kathleen F.</first><last>McCoy</last></author>
       <pages>52-63</pages>
       <url hash="919267b0">J88-3005</url>
       <bibkey>mccoy-1988-reasoning</bibkey>
     </paper>
     <paper id="6">
       <title>Tailoring Object Descriptions to a User’s Level of Expertise</title>
-      <author><first>Cecile L.</first><last>Paris</last></author>
+      <author id="cecile-paris"><first>Cecile L.</first><last>Paris</last></author>
       <pages>64-78</pages>
       <url hash="9fa3b7e4">J88-3006</url>
       <bibkey>paris-1988-tailoring</bibkey>
@@ -307,13 +307,13 @@
     </paper>
     <paper id="13">
       <title>User Models, Discourse Models, and Some Others</title>
-      <author><first>Karen</first><last>Sparck Jones</last></author>
+      <author id="karen-sparck-jones"><first>Karen</first><last>Sparck Jones</last></author>
       <url hash="dd7f111d">J88-3013</url>
       <bibkey>sparck-jones-1988-user</bibkey>
     </paper>
     <paper id="14">
       <title>Distinguishing User Models From Discourse Models</title>
-      <author><first>Wolfgang</first><last>Wahlster</last></author>
+      <author id="wolfgang-wahlster"><first>Wolfgang</first><last>Wahlster</last></author>
       <url hash="6a0f436c">J88-3014</url>
       <bibkey>wahlster-1988-distinguishing</bibkey>
     </paper>
@@ -331,25 +331,25 @@
     </paper>
     <paper id="17">
       <title>Book Reviews: Natural Language Generation, New Results in Artificial Intelligence, Psychology, and Linguistics</title>
-      <author><first>Marie</first><last>Bienkowski</last></author>
+      <author id="marie-a-bienkowski"><first>Marie</first><last>Bienkowski</last></author>
       <url hash="ba4351ef">J88-3017</url>
       <bibkey>bienkowski-1988-book</bibkey>
     </paper>
     <paper id="18">
       <title>Book Reviews: The Linguistic Basis of Text Generation</title>
-      <author><first>Kathleen</first><last>McCoy</last></author>
+      <author id="kathleen-f-mccoy"><first>Kathleen</first><last>McCoy</last></author>
       <url hash="fde81b2b">J88-3018</url>
       <bibkey>mccoy-1988-book</bibkey>
     </paper>
     <paper id="19">
       <title>Book Reviews: Cognitive Science: An Introduction</title>
-      <author><first>Helen M.</first><last>Gigley</last></author>
+      <author id="helen-m-gigley"><first>Helen M.</first><last>Gigley</last></author>
       <url hash="cf602e96">J88-3019</url>
       <bibkey>gigley-1988-book</bibkey>
     </paper>
     <paper id="20">
       <title>Book Reviews: Machine Translation: Past, Present, Future</title>
-      <author><first>Richard</first><last>Kittredge</last></author>
+      <author id="richard-kittredge"><first>Richard</first><last>Kittredge</last></author>
       <url hash="b51d8f00">J88-3020</url>
       <bibkey>kittredge-1988-book</bibkey>
     </paper>
@@ -388,7 +388,7 @@
       <author><first>Robert</first><last>Wilensky</last></author>
       <author><first>David N.</first><last>Chin</last></author>
       <author><first>Marc</first><last>Luria</last></author>
-      <author><first>James</first><last>Martin</last></author>
+      <author id="james-h-martin"><first>James</first><last>Martin</last></author>
       <author><first>James</first><last>Mayfield</last></author>
       <author><first>Dekai</first><last>Wu</last></author>
       <url hash="cf6d949d">J88-4003</url>
@@ -408,7 +408,7 @@
     </paper>
     <paper id="6">
       <title>Book Reviews: Language and Information</title>
-      <author><first>Bruce</first><last>Nevin</last></author>
+      <author id="bruce-e-nevin"><first>Bruce</first><last>Nevin</last></author>
       <url hash="5e257d5a">J88-4006</url>
       <bibkey>nevin-1988-book</bibkey>
     </paper>
@@ -420,31 +420,31 @@
     </paper>
     <paper id="8">
       <title>Book Reviews: Semantic Interpretation and the Resolution of Ambiguity</title>
-      <author><first>Karen</first><last>Sparck Jones</last></author>
+      <author id="karen-sparck-jones"><first>Karen</first><last>Sparck Jones</last></author>
       <url hash="c2555bc8">J88-4008</url>
       <bibkey>sparck-jones-1988-book</bibkey>
     </paper>
     <paper id="9">
       <title>Book Reviews: The Fifth Generation Fallacy: Why <fixed-case>J</fixed-case>apan is Betting Its Future on Artificial Intelligence</title>
-      <author><first>Harold</first><last>Somers</last></author>
+      <author id="harold-somers"><first>Harold</first><last>Somers</last></author>
       <url hash="ec8e55aa">J88-4009</url>
       <bibkey>somers-1988-book-reviews</bibkey>
     </paper>
     <paper id="10">
       <title>Book Reviews: Natural Language Understanding</title>
-      <author><first>Michael</first><last>Kac</last></author>
+      <author id="michael-b-kac"><first>Michael</first><last>Kac</last></author>
       <url hash="751ddf3c">J88-4010</url>
       <bibkey>kac-1988-book</bibkey>
     </paper>
     <paper id="11">
       <title>Book Reviews: A Natural Language Interface for Computer-Aided Design</title>
-      <author><first>Bruce</first><last>Ballard</last></author>
+      <author id="bruce-w-ballard"><first>Bruce</first><last>Ballard</last></author>
       <url hash="fcad1d41">J88-4011</url>
       <bibkey>ballard-1988-book</bibkey>
     </paper>
     <paper id="12">
       <title>Book Reviews: The Formal Complexity of Natural Language</title>
-      <author><first>Alexis</first><last>Manaster-Ramer</last></author>
+      <author id="alexis-manaster-ramer"><first>Alexis</first><last>Manaster-Ramer</last></author>
       <url hash="e05b3398">J88-4012</url>
       <bibkey>manaster-ramer-1988-book</bibkey>
     </paper>
diff --git a/data/xml/J89.xml b/data/xml/J89.xml
index a7170d53cd..45af0de917 100644
--- a/data/xml/J89.xml
+++ b/data/xml/J89.xml
@@ -21,7 +21,7 @@
     </paper>
     <paper id="2">
       <title>Syntactic Graphs: A Representation for the Union of All Ambiguous Parse Trees</title>
-      <author><first>Jungyun</first><last>Seo</last></author>
+      <author id="jungyun-seo"><first>Jungyun</first><last>Seo</last></author>
       <author><first>Robert F.</first><last>Simmons</last></author>
       <pages>19-32</pages>
       <url hash="69333566">J89-1002</url>
@@ -29,14 +29,14 @@
     </paper>
     <paper id="3">
       <title>Design of <fixed-case>LMT</fixed-case>: A <fixed-case>P</fixed-case>rolog-Based Machine Translation System</title>
-      <author><first>Michael C.</first><last>McCord</last></author>
+      <author id="michael-c-mccord"><first>Michael C.</first><last>McCord</last></author>
       <pages>33-52</pages>
       <url hash="e4f9aa95">J89-1003</url>
       <bibkey>mccord-1989-design</bibkey>
     </paper>
     <paper id="4">
       <title>Book Reviews: An Artificial Intelligence Approach to Legal Reasoning</title>
-      <author><first>Martha</first><last>Evens</last></author>
+      <author id="martha-evens"><first>Martha</first><last>Evens</last></author>
       <url hash="70374a4d">J89-1004</url>
       <bibkey>evens-1989-book</bibkey>
     </paper>
@@ -60,7 +60,7 @@
     </paper>
     <paper id="8">
       <title>Book Reviews: Computer Interpretation of Natural Language Descriptions</title>
-      <author><first>Deborah A.</first><last>Dahl</last></author>
+      <author id="deborah-a-dahl"><first>Deborah A.</first><last>Dahl</last></author>
       <url hash="8399ea7f">J89-1008</url>
       <bibkey>dahl-1989-book</bibkey>
     </paper>
@@ -109,14 +109,14 @@
     </frontmatter>
     <paper id="1">
       <title>A Pragmatic-Based Approach to Ellipsis Resolution</title>
-      <author><first>Sandra</first><last>Carberry</last></author>
+      <author id="sandra-carberry"><first>Sandra</first><last>Carberry</last></author>
       <pages>75-96</pages>
       <url hash="7d3c6f06">J89-2001</url>
       <bibkey>carberry-1989-pragmatic</bibkey>
     </paper>
     <paper id="2">
       <title>Parsing with a Small Dictionary for Applications such as Text to Speech</title>
-      <author><first>Douglas D.</first><last>O’Shaughnessy</last></author>
+      <author id="douglas-oshaughnessy"><first>Douglas D.</first><last>O’Shaughnessy</last></author>
       <pages>97-108</pages>
       <url hash="539832a3">J89-2002</url>
       <bibkey>oshaughnessy-1989-parsing</bibkey>
@@ -137,7 +137,7 @@
     </paper>
     <paper id="5">
       <title>Book Reviews: The Case for Lexicase: An Outline of Lexicase Grammatical Theory</title>
-      <author><first>Norman</first><last>Fraser</last></author>
+      <author id="norman-m-fraser"><first>Norman</first><last>Fraser</last></author>
       <url hash="30f2d6e9">J89-2005</url>
       <bibkey>fraser-1989-book</bibkey>
     </paper>
@@ -155,31 +155,31 @@
     </paper>
     <paper id="8">
       <title>Book Reviews: Machine Translation Today: The State of the Ar</title>
-      <author><first>John</first><last>Hutchins</last></author>
+      <author id="w-john-hutchins"><first>John</first><last>Hutchins</last></author>
       <url hash="f54511af">J89-2008</url>
       <bibkey>hutchins-1989-book</bibkey>
     </paper>
     <paper id="9">
       <title>Book Reviews: Advances in Natural Language Generation: An Interdisciplinary Perspective</title>
-      <author><first>Marie</first><last>Meteer</last></author>
+      <author id="marie-meteer"><first>Marie</first><last>Meteer</last></author>
       <url hash="68f67add">J89-2009</url>
       <bibkey>meteer-1989-book</bibkey>
     </paper>
     <paper id="10">
       <title>Book Reviews: Natural Language Parsing Systems</title>
-      <author><first>Petr</first><last>Sgall</last></author>
+      <author id="petr-sgall"><first>Petr</first><last>Sgall</last></author>
       <url hash="26e158a8">J89-2010</url>
       <bibkey>sgall-1989-book</bibkey>
     </paper>
     <paper id="11">
       <title>Book Reviews: Philosophy, Language, and Artificial Intelligence: Resources for Processing Natural Language</title>
-      <author><first>Peter</first><last>Ludlow</last></author>
+      <author id="peter-j-ludlow"><first>Peter</first><last>Ludlow</last></author>
       <url hash="47fcbf23">J89-2011</url>
       <bibkey>ludlow-1989-book</bibkey>
     </paper>
     <paper id="12">
       <title>Book Reviews: Systemic Text Generation as Problem Solving</title>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <url hash="9c369344">J89-2012</url>
       <bibkey>hovy-1989-book</bibkey>
     </paper>
@@ -225,15 +225,15 @@
     </paper>
     <paper id="2">
       <title>Knowledge Representation for Commonsense Reasoning with Text</title>
-      <author><first>Kathleen</first><last>Dahlgren</last></author>
-      <author><first>Joyce</first><last>McDowell</last></author>
+      <author id="kathleen-dahlgren"><first>Kathleen</first><last>Dahlgren</last></author>
+      <author id="joyce-mcdowell"><first>Joyce</first><last>McDowell</last></author>
       <pages>149-170</pages>
       <url hash="656965e4">J89-3002</url>
       <bibkey>dahlgren-mcdowell-1989-knowledge</bibkey>
     </paper>
     <paper id="3">
       <title>Non-singular Concepts in Natural Language Discourse</title>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
       <author><first>Nick</first><last>Cercone</last></author>
       <pages>171-186</pages>
       <url hash="5b4e2f00">J89-3003</url>
@@ -241,13 +241,13 @@
     </paper>
     <paper id="4">
       <title>Book Reviews: Natural Language Understanding and Logic Programming, <fixed-case>II</fixed-case>: Proceedings of the Second International Workshop</title>
-      <author><first>Janusz S.</first><last>Bien</last></author>
+      <author id="janusz-stanislaw-bien"><first>Janusz S.</first><last>Bien</last></author>
       <url hash="f9ed34ae">J89-3004</url>
       <bibkey>bien-1989-book</bibkey>
     </paper>
     <paper id="5">
       <title>Language and Spatial Cognition</title>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <url hash="ca4271e8">J89-3005</url>
       <bibkey>pustejovsky-1989-language</bibkey>
     </paper>
@@ -271,32 +271,32 @@
     </paper>
     <paper id="9">
       <title>Book Reviews: Medical Language Processing: Computer Management of Narrative Data</title>
-      <author><first>Nicoletta</first><last>Calzolari</last></author>
+      <author id="nicoletta-calzolari"><first>Nicoletta</first><last>Calzolari</last></author>
       <url hash="5b9c0f68">J89-3009</url>
       <bibkey>calzolari-1989-book</bibkey>
     </paper>
     <paper id="10">
       <title>Book Reviews: Information-based Syntax and Semantics. Vol 1: Fundamentals</title>
-      <author><first>Edward P.</first><last>Stabler, Jr.</last></author>
+      <author id="edward-stabler"><first>Edward P.</first><last>Stabler, Jr.</last></author>
       <url hash="d630fddf">J89-3010</url>
       <bibkey>stabler-jr-1989-book</bibkey>
     </paper>
     <paper id="11">
       <title>Book Reviews: Machine Translation Systems</title>
-      <author><first>John S.</first><last>White</last></author>
+      <author id="john-s-white"><first>John S.</first><last>White</last></author>
       <url hash="a02a7e5c">J89-3011</url>
       <bibkey>white-1989-book</bibkey>
     </paper>
     <paper id="12">
       <title>Book Reviews: Natural Language Processing</title>
-      <author><first>Elena</first><last>Pascaleva</last></author>
+      <author id="elena-paskaleva"><first>Elena</first><last>Pascaleva</last></author>
       <author><first>Dan</first><last>Fass</last></author>
       <url hash="aefd45c5">J89-3012</url>
       <bibkey>pascaleva-fass-1989-book</bibkey>
     </paper>
     <paper id="13">
       <title>Book Reviews: Text Coherence in Translation</title>
-      <author><first>Chrysanne</first><last>DiMarco</last></author>
+      <author id="chrysanne-dimarco"><first>Chrysanne</first><last>DiMarco</last></author>
       <url hash="380fd530">J89-3013</url>
       <bibkey>dimarco-1989-book</bibkey>
     </paper>
@@ -308,7 +308,7 @@
     </paper>
     <paper id="15">
       <title>Book Reviews: Knowledge Systems and <fixed-case>P</fixed-case>rolog: A Logical Approach to Expert Systems and Natural Language Processing</title>
-      <author><first>Stan</first><last>Kwasny</last></author>
+      <author id="stan-c-kwasny"><first>Stan</first><last>Kwasny</last></author>
       <url hash="3671b369">J89-3015</url>
       <bibkey>kwasny-1989-book</bibkey>
     </paper>
@@ -347,15 +347,15 @@
     </frontmatter>
     <paper id="1">
       <title>A Parsing Algorithm for Unification Grammar</title>
-      <author><first>Andrew</first><last>Haas</last></author>
+      <author id="andrew-haas"><first>Andrew</first><last>Haas</last></author>
       <pages>219-232</pages>
       <url hash="d0d282ae">J89-4001</url>
       <bibkey>haas-1989-parsing</bibkey>
     </paper>
     <paper id="2">
       <title>Natural Language Generation from Plans</title>
-      <author><first>Chris</first><last>Mellish</last></author>
-      <author><first>Roger</first><last>Evans</last></author>
+      <author id="chris-mellish"><first>Chris</first><last>Mellish</last></author>
+      <author id="roger-evans"><first>Roger</first><last>Evans</last></author>
       <pages>233-249</pages>
       <url hash="98a03780">J89-4002</url>
       <bibkey>mellish-evans-1989-natural</bibkey>
@@ -369,19 +369,19 @@
     </paper>
     <paper id="4">
       <title>Book Reviews: An Introduction to Formal Language Theory</title>
-      <author><first>Geoffrey K.</first><last>Pullum</last></author>
+      <author id="geoffrey-k-pullum"><first>Geoffrey K.</first><last>Pullum</last></author>
       <url hash="29eebdb1">J89-4004</url>
       <bibkey>pullum-1989-book</bibkey>
     </paper>
     <paper id="5">
       <title>Book Reviews: Attribute-Value Logic and the Theory of Grammar</title>
-      <author><first>Robert</first><last>Kuhns</last></author>
+      <author id="robert-j-kuhns"><first>Robert</first><last>Kuhns</last></author>
       <url hash="6ce40098">J89-4005</url>
       <bibkey>kuhns-1989-book</bibkey>
     </paper>
     <paper id="6">
       <title>Book Reviews: New Directions in Machine Translation (Proceedings of the Conference, <fixed-case>B</fixed-case>udapest, <fixed-case>A</fixed-case>ugust 1988)</title>
-      <author><first>Esmeralda</first><last>Manandise</last></author>
+      <author id="esmeralda-manandise"><first>Esmeralda</first><last>Manandise</last></author>
       <url hash="aebdc381">J89-4006</url>
       <bibkey>manandise-1989-book</bibkey>
     </paper>
diff --git a/data/xml/J90.xml b/data/xml/J90.xml
index 775d971ace..dc6330897b 100644
--- a/data/xml/J90.xml
+++ b/data/xml/J90.xml
@@ -14,7 +14,7 @@
     </frontmatter>
     <paper id="1">
       <title>Categorial Semantics and Scoping</title>
-      <author><first>Fernando C. N.</first><last>Pereira</last></author>
+      <author id="fernando-c-n-pereira"><first>Fernando C. N.</first><last>Pereira</last></author>
       <pages>1-10</pages>
       <url hash="cc4a9242">J90-1001</url>
       <bibkey>pereira-1990-categorial</bibkey>
@@ -22,14 +22,14 @@
     <paper id="2">
       <title>An Interpretation of Negation in Feature Structure Descriptions</title>
       <author><first>Anuj</first><last>Dawar</last></author>
-      <author><first>K.</first><last>Vijay-Shanker</last></author>
+      <author id="k-vijay-shanker"><first>K.</first><last>Vijay-Shanker</last></author>
       <pages>11-21</pages>
       <url hash="ce44b33c">J90-1002</url>
       <bibkey>dawar-vijay-shanker-1990-interpretation</bibkey>
     </paper>
     <paper id="3">
       <title>Word Association Norms, Mutual Information, and Lexicography</title>
-      <author><first>Kenneth Ward</first><last>Church</last></author>
+      <author id="kenneth-church"><first>Kenneth Ward</first><last>Church</last></author>
       <author><first>Patrick</first><last>Hanks</last></author>
       <pages>22-29</pages>
       <url hash="33b74ea7">J90-1003</url>
@@ -37,23 +37,23 @@
     </paper>
     <paper id="4">
       <title>Semantic-Head-Driven Generation</title>
-      <author><first>Stuart M.</first><last>Shieber</last></author>
-      <author><first>Gertjan</first><last>van Noord</last></author>
-      <author><first>Fernando C. N.</first><last>Pereira</last></author>
-      <author><first>Robert C.</first><last>Moore</last></author>
+      <author id="stuart-m-shieber"><first>Stuart M.</first><last>Shieber</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
+      <author id="fernando-c-n-pereira"><first>Fernando C. N.</first><last>Pereira</last></author>
+      <author id="robert-c-moore"><first>Robert C.</first><last>Moore</last></author>
       <pages>30-42</pages>
       <url hash="048cc9cc">J90-1004</url>
       <bibkey>shieber-etal-1990-semantic</bibkey>
     </paper>
     <paper id="5">
       <title>Letter to the Editor</title>
-      <author><first>Michael B.</first><last>Kac</last></author>
+      <author id="michael-b-kac"><first>Michael B.</first><last>Kac</last></author>
       <url hash="647a104a">J90-1005</url>
       <bibkey>kac-1990-letter</bibkey>
     </paper>
     <paper id="6">
       <title>Book Reviews: Interpreting Anaphors in Natural Language Texts</title>
-      <author><first>Deborah</first><last>Dahl</last></author>
+      <author id="deborah-a-dahl"><first>Deborah</first><last>Dahl</last></author>
       <url hash="801abaaa">J90-1006</url>
       <bibkey>dahl-1990-book</bibkey>
     </paper>
@@ -65,19 +65,19 @@
     </paper>
     <paper id="8">
       <title>Book Reviews: Prosody and Speech Recognition</title>
-      <author><first>Joan</first><last>Bachenko</last></author>
+      <author id="joan-bachenko"><first>Joan</first><last>Bachenko</last></author>
       <url hash="cc9e770d">J90-1008</url>
       <bibkey>bachenko-1990-book</bibkey>
     </paper>
     <paper id="9">
       <title>Book Reviews: From Syntax to Semantics: Insights from Machine Translation</title>
-      <author><first>Harold</first><last>Somers</last></author>
+      <author id="harold-somers"><first>Harold</first><last>Somers</last></author>
       <url hash="c0f690c4">J90-1009</url>
       <bibkey>somers-1990-book</bibkey>
     </paper>
     <paper id="10">
       <title>Book Reviews: Studies in Computer-Aided Lexicology</title>
-      <author><first>Martha</first><last>Evens</last></author>
+      <author id="martha-evens"><first>Martha</first><last>Evens</last></author>
       <url hash="af5b96a1">J90-1010</url>
       <bibkey>evens-1990-book</bibkey>
     </paper>
@@ -136,13 +136,13 @@
     </paper>
     <paper id="2">
       <title>A Statistical Approach to Machine Translation</title>
-      <author><first>Peter F.</first><last>Brown</last></author>
-      <author><first>John</first><last>Cocke</last></author>
-      <author><first>Stephen A.</first><last>Della Pietra</last></author>
-      <author><first>Vincent J.</first><last>Della Pietra</last></author>
-      <author><first>Fredrick</first><last>Jelinek</last></author>
-      <author><first>John D.</first><last>Lafferty</last></author>
-      <author><first>Robert L.</first><last>Mercer</last></author>
+      <author id="peter-f-brown"><first>Peter F.</first><last>Brown</last></author>
+      <author id="john-cocke"><first>John</first><last>Cocke</last></author>
+      <author id="stephen-a-della-pietra"><first>Stephen A.</first><last>Della Pietra</last></author>
+      <author id="vincent-j-della-pietra"><first>Vincent J.</first><last>Della Pietra</last></author>
+      <author id="frederick-jelinek"><first>Fredrick</first><last>Jelinek</last></author>
+      <author id="john-lafferty"><first>John D.</first><last>Lafferty</last></author>
+      <author id="robert-l-mercer"><first>Robert L.</first><last>Mercer</last></author>
       <author><first>Paul S.</first><last>Roossin</last></author>
       <pages>79-85</pages>
       <url hash="cdf8904b">J90-2002</url>
@@ -150,7 +150,7 @@
     </paper>
     <paper id="3">
       <title>An Implementable Semantics for Comparative Constructions</title>
-      <author><first>Manny</first><last>Rayner</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
       <author><first>Amelie</first><last>Banks</last></author>
       <pages>86-112</pages>
       <url hash="55a34de0">J90-2003</url>
@@ -215,7 +215,7 @@
     </frontmatter>
     <paper id="1">
       <title>Resolving Quasi Logical Forms</title>
-      <author><first>Hiyan</first><last>Alshawi</last></author>
+      <author id="hiyan-alshawi"><first>Hiyan</first><last>Alshawi</last></author>
       <pages>133-144</pages>
       <url hash="b00542fa">J90-3001</url>
       <bibkey>alshawi-1990-resolving</bibkey>
@@ -245,27 +245,27 @@
     </paper>
     <paper id="5">
       <title>Workshop on the Evaluation of Natural Language Processing Systems</title>
-      <author><first>Martha</first><last>Palmer</last></author>
-      <author><first>Tim</first><last>Finin</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
+      <author id="tim-finin"><first>Tim</first><last>Finin</last></author>
       <pages>175-181</pages>
       <url hash="9e363d89">J90-3005</url>
       <bibkey>palmer-finin-1990-workshop</bibkey>
     </paper>
     <paper id="6">
       <title>Book Reviews: Machine Translation: How Far Can It Go?</title>
-      <author><first>Dominique</first><last>Estival</last></author>
+      <author id="dominique-estival"><first>Dominique</first><last>Estival</last></author>
       <url hash="626f9c3e">J90-3006</url>
       <bibkey>estival-1990-book</bibkey>
     </paper>
     <paper id="7">
       <title>Book Reviews: Looking Up: An Account of the <fixed-case>COBUILD</fixed-case> <fixed-case>PROJECT</fixed-case> <fixed-case>IN</fixed-case> <fixed-case>LEXICAL</fixed-case> <fixed-case>COMPUTING</fixed-case></title>
-      <author><first>Branimir</first><last>Boguraev</last></author>
+      <author id="branimir-boguraev"><first>Branimir</first><last>Boguraev</last></author>
       <url hash="ee09de77">J90-3007</url>
       <bibkey>boguraev-1990-book</bibkey>
     </paper>
     <paper id="8">
       <title>Book Reviews: Generating Natural Language under Pragmatic Constraints</title>
-      <author><first>Wolfgang</first><last>Hoeppner</last></author>
+      <author id="wolfgang-hoeppner"><first>Wolfgang</first><last>Hoeppner</last></author>
       <url hash="3e142586">J90-3008</url>
       <bibkey>hoeppner-1990-book</bibkey>
     </paper>
@@ -305,14 +305,14 @@
     <paper id="1">
       <title>Anaphora Resolution in Slot Grammar</title>
       <author><first>Shalom</first><last>Lappin</last></author>
-      <author><first>Michael</first><last>McCord</last></author>
+      <author id="michael-c-mccord"><first>Michael</first><last>McCord</last></author>
       <pages>197-212</pages>
       <url hash="0d13fd61">J90-4001</url>
       <bibkey>lappin-mccord-1990-anaphora</bibkey>
     </paper>
     <paper id="2">
       <title>Sentential Semantics for Propositional Attitudes</title>
-      <author><first>Andrew R.</first><last>Haas</last></author>
+      <author id="andrew-haas"><first>Andrew R.</first><last>Haas</last></author>
       <pages>213-233</pages>
       <url hash="00d0929a">J90-4002</url>
       <bibkey>haas-1990-sentential</bibkey>
diff --git a/data/xml/J91.xml b/data/xml/J91.xml
index 72713cf102..43bb5d487d 100644
--- a/data/xml/J91.xml
+++ b/data/xml/J91.xml
@@ -44,13 +44,13 @@
     </paper>
     <paper id="5">
       <title>Book Reviews: Theory and Practice in Corpus Linguistics</title>
-      <author><first>Kenneth Ward</first><last>Church</last></author>
+      <author id="kenneth-church"><first>Kenneth Ward</first><last>Church</last></author>
       <url hash="8772e549">J91-1005</url>
       <bibkey>church-1991-book</bibkey>
     </paper>
     <paper id="6">
       <title>Book Reviews: Functional Grammar and the Computer</title>
-      <author><first>Norman</first><last>Fraser</last></author>
+      <author id="norman-m-fraser"><first>Norman</first><last>Fraser</last></author>
       <url hash="894e0846">J91-1006</url>
       <bibkey>fraser-1991-book</bibkey>
     </paper>
@@ -62,7 +62,7 @@
     </paper>
     <paper id="8">
       <title>Practical <fixed-case>SGML</fixed-case></title>
-      <author><first>Carol</first><last>Van Ess-Dykema</last></author>
+      <author id="carol-van-ess-dykema"><first>Carol</first><last>Van Ess-Dykema</last></author>
       <url hash="8c7f980c">J91-1008</url>
       <bibkey>van-ess-dykema-1991-practical</bibkey>
     </paper>
@@ -108,8 +108,8 @@
     </paper>
     <paper id="2">
       <title>How to Encode Semantic Knowledge: A Method for Meaning Representation and Computer-Aided Acquisition</title>
-      <author><first>Paola</first><last>Velardi</last></author>
-      <author><first>Maria Teresa</first><last>Pazienze</last></author>
+      <author id="paola-velardi"><first>Paola</first><last>Velardi</last></author>
+      <author id="maria-teresa-pazienza"><first>Maria Teresa</first><last>Pazienze</last></author>
       <author><first>Michela</first><last>Fasolo</last></author>
       <pages>153-170</pages>
       <url hash="0b7b562b">J91-2002</url>
@@ -118,7 +118,7 @@
     <paper id="3">
       <title>Semantics of Paragraphs</title>
       <author><first>Wlodek</first><last>Zadrozny</last></author>
-      <author><first>Karen</first><last>Jensen</last></author>
+      <author id="karen-jensen"><first>Karen</first><last>Jensen</last></author>
       <pages>171-210</pages>
       <url hash="5c3fc6d4">J91-2003</url>
       <bibkey>zadrozny-jensen-1991-semantics</bibkey>
@@ -132,7 +132,7 @@
     </paper>
     <paper id="5">
       <title>Book Reviews: <fixed-case>PC</fixed-case>-<fixed-case>KIMMO</fixed-case>: A Two-Level Processor for Morphological Analysis</title>
-      <author><first>Richard</first><last>Sproat</last></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last></author>
       <url hash="47f2580a">J91-2005</url>
       <bibkey>sproat-1991-book</bibkey>
     </paper>
@@ -150,7 +150,7 @@
     </paper>
     <paper id="8">
       <title><fixed-case>A</fixed-case>ntilinguistics: A Critical Assessment of Modern Linguistic Theory and Practice</title>
-      <author><first>Geoffrey K.</first><last>Pullum</last></author>
+      <author id="geoffrey-k-pullum"><first>Geoffrey K.</first><last>Pullum</last></author>
       <url hash="fb1ffa97">J91-2008</url>
       <bibkey>pullum-1991-antilinguistics</bibkey>
     </paper>
@@ -217,21 +217,21 @@
     </paper>
     <paper id="4">
       <title>Computation of the Probability of Initial Substring Generation by Stochastic Context-Free Grammars</title>
-      <author><first>Frederick</first><last>Jelinek</last></author>
-      <author><first>John D.</first><last>Lafferty</last></author>
+      <author id="frederick-jelinek"><first>Frederick</first><last>Jelinek</last></author>
+      <author id="john-lafferty"><first>John D.</first><last>Lafferty</last></author>
       <pages>315-353</pages>
       <url hash="33f4b0e2">J91-3004</url>
       <bibkey>jelinek-lafferty-1991-computation</bibkey>
     </paper>
     <paper id="5">
       <title>Erratum to: A Statistical Approach to Machine Translation</title>
-      <author><first>Peter F.</first><last>Brown</last></author>
-      <author><first>Stephen A.</first><last>Della Pietra</last></author>
-      <author><first>Fredrick</first><last>Jelinek</last></author>
-      <author><first>Robert L.</first><last>Mercer</last></author>
-      <author><first>John</first><last>Cocke</last></author>
-      <author><first>Vincent J.</first><last>Della Pietra</last></author>
-      <author><first>John D.</first><last>Lafferty</last></author>
+      <author id="peter-f-brown"><first>Peter F.</first><last>Brown</last></author>
+      <author id="stephen-a-della-pietra"><first>Stephen A.</first><last>Della Pietra</last></author>
+      <author id="frederick-jelinek"><first>Fredrick</first><last>Jelinek</last></author>
+      <author id="robert-l-mercer"><first>Robert L.</first><last>Mercer</last></author>
+      <author id="john-cocke"><first>John</first><last>Cocke</last></author>
+      <author id="vincent-j-della-pietra"><first>Vincent J.</first><last>Della Pietra</last></author>
+      <author id="john-lafferty"><first>John D.</first><last>Lafferty</last></author>
       <author><first>Paul S.</first><last>Roossin</last></author>
       <pages>79-85</pages>
       <url hash="f04d4753">J91-3005</url>
@@ -251,7 +251,7 @@
     </paper>
     <paper id="8">
       <title>A Computational Model of First Language Acquisition</title>
-      <author><first>Robert C.</first><last>Berwick</last></author>
+      <author id="robert-c-berwick"><first>Robert C.</first><last>Berwick</last></author>
       <url hash="5b9ef6a0">J91-3008</url>
       <bibkey>berwick-1991-computational</bibkey>
     </paper>
@@ -292,11 +292,11 @@
     </frontmatter>
     <paper id="1">
       <title>An Efficient Natural Language Processing System Specially Designed for the <fixed-case>C</fixed-case>hinese Language</title>
-      <author><first>Lin-Shan</first><last>Lee</last></author>
+      <author id="lin-shan-lee"><first>Lin-Shan</first><last>Lee</last></author>
       <author><first>Lee-Feng</first><last>Chien</last></author>
       <author><first>Long-Ji</first><last>Lin</last></author>
       <author><first>James</first><last>Huang</last></author>
-      <author><first>K. J.</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>K. J.</first><last>Chen</last></author>
       <pages>347-374</pages>
       <url hash="97020dcd">J91-4001</url>
       <bibkey>lee-etal-1991-efficient</bibkey>
@@ -310,7 +310,7 @@
     </paper>
     <paper id="3">
       <title>The <fixed-case>G</fixed-case>enerative <fixed-case>L</fixed-case>exicon</title>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <pages>409-441</pages>
       <url hash="eff4323d">J91-4003</url>
       <bibkey>pustejovsky-1991-generative</bibkey>
@@ -329,7 +329,7 @@
     </paper>
     <paper id="6">
       <title>Book Reviews: Current Issues in Parsing Technology</title>
-      <author><first>Robert J.</first><last>Kuhns</last></author>
+      <author id="robert-j-kuhns"><first>Robert J.</first><last>Kuhns</last></author>
       <url hash="360edcf9">J91-4006</url>
       <bibkey>kuhns-1991-book</bibkey>
     </paper>
diff --git a/data/xml/J92.xml b/data/xml/J92.xml
index ea534e1afa..cd373bd855 100644
--- a/data/xml/J92.xml
+++ b/data/xml/J92.xml
@@ -14,32 +14,32 @@
     </frontmatter>
     <paper id="1">
       <title>Using Multiple Knowledge Sources for Word Sense Discrimination</title>
-      <author><first>Susan W.</first><last>McRoy</last></author>
+      <author id="susan-w-mcroy"><first>Susan W.</first><last>McRoy</last></author>
       <pages>1-30</pages>
       <url hash="31d89412">J92-1001</url>
       <bibkey>mcroy-1992-using</bibkey>
     </paper>
     <paper id="2">
       <title>An Estimate of an Upper Bound for the Entropy of <fixed-case>E</fixed-case>nglish</title>
-      <author><first>Peter F.</first><last>Brown</last></author>
-      <author><first>Stephen A.</first><last>Della Pietra</last></author>
-      <author><first>Vincent J.</first><last>Della Pietra</last></author>
-      <author><first>Jennifer C.</first><last>Lai</last></author>
-      <author><first>Robert L.</first><last>Mercer</last></author>
+      <author id="peter-f-brown"><first>Peter F.</first><last>Brown</last></author>
+      <author id="stephen-a-della-pietra"><first>Stephen A.</first><last>Della Pietra</last></author>
+      <author id="vincent-j-della-pietra"><first>Vincent J.</first><last>Della Pietra</last></author>
+      <author id="jennifer-c-lai"><first>Jennifer C.</first><last>Lai</last></author>
+      <author id="robert-l-mercer"><first>Robert L.</first><last>Mercer</last></author>
       <pages>31-40</pages>
       <url hash="ed06cddd">J92-1002</url>
       <bibkey>brown-etal-1992-estimate</bibkey>
     </paper>
     <paper id="3">
       <title>Language Generated by Two-Level Morphological Rules</title>
-      <author><first>Graeme D.</first><last>Ritchie</last></author>
+      <author id="graeme-ritchie"><first>Graeme D.</first><last>Ritchie</last></author>
       <pages>41-60</pages>
       <url hash="49f71e62">J92-1003</url>
       <bibkey>ritchie-1992-language</bibkey>
     </paper>
     <paper id="4">
       <title><fixed-case>TINA</fixed-case>: A Natural Language System for Spoken Language Applications</title>
-      <author><first>Stephanie</first><last>Seneff</last></author>
+      <author id="stephanie-seneff"><first>Stephanie</first><last>Seneff</last></author>
       <pages>61-86</pages>
       <url hash="7fca8689">J92-1004</url>
       <bibkey>seneff-1992-tina</bibkey>
@@ -52,25 +52,25 @@
     </paper>
     <paper id="6">
       <title>Book Reviews: <fixed-case>E</fixed-case>nglish Word Grammar</title>
-      <author><first>Lynne J.</first><last>Cahill</last></author>
+      <author id="lynne-cahill"><first>Lynne J.</first><last>Cahill</last></author>
       <url hash="3d6e4e92">J92-1006</url>
       <bibkey>cahill-1992-book</bibkey>
     </paper>
     <paper id="7">
       <title>Book Reviews: Semantic Structures</title>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <url hash="4baef86a">J92-1007</url>
       <bibkey>wilks-1992-book</bibkey>
     </paper>
     <paper id="8">
       <title>Reference and Computation</title>
-      <author><first>John</first><last>Barnden</last></author>
+      <author id="john-barnden"><first>John</first><last>Barnden</last></author>
       <url hash="36a3db7e">J92-1008</url>
       <bibkey>barnden-1992-reference</bibkey>
     </paper>
     <paper id="9">
       <title>Mathematical Methods in Linguistics</title>
-      <author><first>Alexis</first><last>Manaster Ramer</last></author>
+      <author id="alexis-manaster-ramer"><first>Alexis</first><last>Manaster Ramer</last></author>
       <url hash="68fad546">J92-1009</url>
       <bibkey>manaster-ramer-1992-mathematical</bibkey>
     </paper>
@@ -82,7 +82,7 @@
     </paper>
     <paper id="11">
       <title>Knowledge Representation and Metaphor</title>
-      <author><first>James</first><last>Martin</last></author>
+      <author id="james-h-martin"><first>James</first><last>Martin</last></author>
       <url hash="c37c066e">J92-1011</url>
       <bibkey>martin-1992-knowledge</bibkey>
     </paper>
@@ -126,15 +126,15 @@
     </frontmatter>
     <paper id="1">
       <title>Inheritance in <fixed-case>W</fixed-case>ord <fixed-case>G</fixed-case>rammar</title>
-      <author><first>Norman M.</first><last>Fraser</last></author>
-      <author><first>Richard A.</first><last>Hudson</last></author>
+      <author id="norman-m-fraser"><first>Norman M.</first><last>Fraser</last></author>
+      <author id="richard-a-hudson"><first>Richard A.</first><last>Hudson</last></author>
       <pages>133-158</pages>
       <url hash="1e0c96b7">J92-2001</url>
       <bibkey>fraser-hudson-1992-inheritance</bibkey>
     </paper>
     <paper id="2">
       <title>Inheritance and Constraint-Based Grammar Formalisms</title>
-      <author><first>Rémi</first><last>Zajac</last></author>
+      <author id="remi-zajac"><first>Rémi</first><last>Zajac</last></author>
       <pages>159-182</pages>
       <url hash="23f48ccb">J92-2002</url>
       <bibkey>zajac-1992-inheritance</bibkey>
@@ -148,8 +148,8 @@
     </paper>
     <paper id="4">
       <title>Inheritance in Natural Language Processing</title>
-      <author><first>Walter</first><last>Daelemans</last></author>
-      <author><first>Koenraad</first><last>De Smedt</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
+      <author id="koenraad-de-smedt"><first>Koenraad</first><last>De Smedt</last></author>
       <author><first>Gerald</first><last>Gazdar</last></author>
       <pages>205-218</pages>
       <url hash="48bea6f6">J92-2004</url>
@@ -189,17 +189,17 @@
     <paper id="1">
       <title>Making <fixed-case>DATR</fixed-case> Work for Speech: Lexicon Compilation in <fixed-case>SUNDIAL</fixed-case></title>
       <author><first>Francois</first><last>Andry</last></author>
-      <author><first>Norman M.</first><last>Fraser</last></author>
+      <author id="norman-m-fraser"><first>Norman M.</first><last>Fraser</last></author>
       <author><first>Scott</first><last>McGlashan</last></author>
       <author><first>Simon</first><last>Thornton</last></author>
-      <author><first>Nick J.</first><last>Youd</last></author>
+      <author id="nick-j-youd"><first>Nick J.</first><last>Youd</last></author>
       <pages>245-267</pages>
       <url hash="44089aec">J92-3001</url>
       <bibkey>andry-etal-1992-making</bibkey>
     </paper>
     <paper id="2">
       <title>Inheritance and Complementation: a Case Study of Easy Adjectives and Related Nouns</title>
-      <author><first>Dan</first><last>Flickinger</last></author>
+      <author id="dan-flickinger"><first>Dan</first><last>Flickinger</last></author>
       <author><first>John</first><last>Nerbonne</last></author>
       <pages>269-309</pages>
       <url hash="65038212">J92-3002</url>
@@ -207,10 +207,10 @@
     </paper>
     <paper id="3">
       <title>A Practical Approach to Multiple Default Inheritance for Unification-Based Lexicons</title>
-      <author><first>Graham</first><last>Russell</last></author>
+      <author id="graham-russell"><first>Graham</first><last>Russell</last></author>
       <author><first>Afzal</first><last>Ballim</last></author>
-      <author><first>John</first><last>Carroll</last></author>
-      <author><first>Susan</first><last>Warwick-Armstrong</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
+      <author id="susan-armstrong"><first>Susan</first><last>Warwick-Armstrong</last></author>
       <pages>311-337</pages>
       <url hash="dd4cfb36">J92-3003</url>
       <bibkey>russell-etal-1992-practical</bibkey>
@@ -267,7 +267,7 @@
     </paper>
     <paper id="12">
       <title>Book Reviews: Literature and Cognition</title>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <url hash="6d0934a6">J92-3012</url>
       <bibkey>wiebe-1992-book</bibkey>
     </paper>
@@ -304,25 +304,25 @@
     </paper>
     <paper id="2">
       <title>Ambiguous Noun Phrases in Logical Form</title>
-      <author><first>Mary P.</first><last>Harper</last></author>
+      <author id="mary-harper"><first>Mary P.</first><last>Harper</last></author>
       <pages>419-466</pages>
       <url hash="628de387">J92-4002</url>
       <bibkey>harper-1992-ambiguous</bibkey>
     </paper>
     <paper id="3">
       <title>Class-Based <i>n</i>-gram Models of Natural Language</title>
-      <author><first>Peter F.</first><last>Brown</last></author>
-      <author><first>Vincent J.</first><last>Della Pietra</last></author>
-      <author><first>Peter V.</first><last>deSouza</last></author>
-      <author><first>Jenifer C.</first><last>Lai</last></author>
-      <author><first>Robert L.</first><last>Mercer</last></author>
+      <author id="peter-f-brown"><first>Peter F.</first><last>Brown</last></author>
+      <author id="vincent-j-della-pietra"><first>Vincent J.</first><last>Della Pietra</last></author>
+      <author id="peter-v-desouza"><first>Peter V.</first><last>deSouza</last></author>
+      <author id="jennifer-c-lai"><first>Jenifer C.</first><last>Lai</last></author>
+      <author id="robert-l-mercer"><first>Robert L.</first><last>Mercer</last></author>
       <pages>467-480</pages>
       <url hash="6a1522f6">J92-4003</url>
       <bibkey>brown-etal-1992-class</bibkey>
     </paper>
     <paper id="4">
       <title>Using Descriptions of Trees in a <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammar</title>
-      <author><first>K</first><last>Vijay-Shanker</last></author>
+      <author id="k-vijay-shanker"><first>K</first><last>Vijay-Shanker</last></author>
       <pages>481-518</pages>
       <url hash="c75f3990">J92-4004</url>
       <bibkey>vijay-shanker-1992-using</bibkey>
@@ -344,7 +344,7 @@
     </paper>
     <paper id="7">
       <title>A Problem for <fixed-case>RST</fixed-case>: The Need for Multi-Level Discourse Analysis</title>
-      <author><first>Johanna D.</first><last>Moore</last></author>
+      <author id="johanna-d-moore"><first>Johanna D.</first><last>Moore</last></author>
       <author><first>Martha E.</first><last>Pollack</last></author>
       <pages>537-544</pages>
       <url hash="4c0060b7">J92-4007</url>
@@ -376,7 +376,7 @@
     </paper>
     <paper id="12">
       <title>Book Reviews: <fixed-case>P</fixed-case>rolog for Natural Language Processing</title>
-      <author><first>Norman M.</first><last>Fraser</last></author>
+      <author id="norman-m-fraser"><first>Norman M.</first><last>Fraser</last></author>
       <url hash="b686dedb">J92-4012</url>
       <bibkey>fraser-1992-book</bibkey>
     </paper>
diff --git a/data/xml/J93.xml b/data/xml/J93.xml
index 59cc36a00d..f42370a94e 100644
--- a/data/xml/J93.xml
+++ b/data/xml/J93.xml
@@ -3,7 +3,7 @@
   <volume id="1" type="journal">
     <meta>
       <booktitle>Computational Linguistics, Volume 19, Number 1, March 1993, Special Issue on Using Large Corpora: <fixed-case>I</fixed-case></booktitle>
-      <editor><first>Julia</first><last>Hirschberg</last></editor>
+      <editor id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></editor>
       <publisher>MIT Press</publisher>
       <address>Cambridge, MA</address>
       <year>1993</year>
@@ -17,38 +17,38 @@
     </frontmatter>
     <paper id="1">
       <title>Introduction to the Special Issue on Computational Linguistics Using Large Corpora</title>
-      <author><first>Kenneth W.</first><last>Church</last></author>
-      <author><first>Robert L.</first><last>Mercer</last></author>
+      <author id="kenneth-church"><first>Kenneth W.</first><last>Church</last></author>
+      <author id="robert-l-mercer"><first>Robert L.</first><last>Mercer</last></author>
       <pages>1-24</pages>
       <url hash="5200835a">J93-1001</url>
       <bibkey>church-mercer-1993-introduction</bibkey>
     </paper>
     <paper id="2">
       <title>Generalized Probabilistic <fixed-case>LR</fixed-case> Parsing of Natural Language (Corpora) with Unification-Based Grammars</title>
-      <author><first>Ted</first><last>Briscoe</last></author>
-      <author><first>John</first><last>Carroll</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
       <pages>25-59</pages>
       <url hash="f7800ecd">J93-1002</url>
       <bibkey>briscoe-carroll-1993-generalized</bibkey>
     </paper>
     <paper id="3">
       <title>Accurate Methods for the Statistics of Surprise and Coincidence</title>
-      <author><first>Ted</first><last>Dunning</last></author>
+      <author id="ted-e-dunning"><first>Ted</first><last>Dunning</last></author>
       <pages>61-74</pages>
       <url hash="b687bf2c">J93-1003</url>
       <bibkey>dunning-1993-accurate</bibkey>
     </paper>
     <paper id="4">
       <title>A Program for Aligning Sentences in Bilingual Corpora</title>
-      <author><first>William A.</first><last>Gale</last></author>
-      <author><first>Kenneth W.</first><last>Church</last></author>
+      <author id="william-a-gale"><first>William A.</first><last>Gale</last></author>
+      <author id="kenneth-church"><first>Kenneth W.</first><last>Church</last></author>
       <pages>75-102</pages>
       <url hash="932f87c3">J93-1004</url>
       <bibkey>gale-church-1993-program</bibkey>
     </paper>
     <paper id="5">
       <title>Structural Ambiguity and Lexical Relations</title>
-      <author><first>Donald</first><last>Hindle</last></author>
+      <author id="donald-hindle"><first>Donald</first><last>Hindle</last></author>
       <author><first>Mats</first><last>Rooth</last></author>
       <pages>103-120</pages>
       <url hash="d4075483">J93-1005</url>
@@ -64,34 +64,34 @@
     </paper>
     <paper id="7">
       <title>Retrieving Collocations from Text: <fixed-case>X</fixed-case>tract</title>
-      <author><first>Frank</first><last>Smadja</last></author>
+      <author id="frank-smadja"><first>Frank</first><last>Smadja</last></author>
       <pages>143-178</pages>
       <url hash="a4e02f0a">J93-1007</url>
       <bibkey>smadja-1993-retrieving</bibkey>
     </paper>
     <paper id="8">
       <title>The problem of logical form equivalence</title>
-      <author><first>Stuart M.</first><last>Shieber</last></author>
+      <author id="stuart-m-shieber"><first>Stuart M.</first><last>Shieber</last></author>
       <pages>179-190</pages>
       <url hash="219ea5af">J93-1008</url>
       <bibkey>shieber-1993-problem</bibkey>
     </paper>
     <paper id="9">
       <title>Issues in the choice of a source for Natural Language Generation</title>
-      <author><first>David D.</first><last>McDonald</last></author>
+      <author id="david-d-mcdonald"><first>David D.</first><last>McDonald</last></author>
       <pages>191-197</pages>
       <url hash="562457ea">J93-1009</url>
       <bibkey>mcdonald-1993-issues</bibkey>
     </paper>
     <paper id="10">
       <title>Book Reviews: The Core Language Engine</title>
-      <author><first>Deborah A.</first><last>Dahl</last></author>
+      <author id="deborah-a-dahl"><first>Deborah A.</first><last>Dahl</last></author>
       <url hash="bf5a8d86">J93-1010</url>
       <bibkey>dahl-1993-book</bibkey>
     </paper>
     <paper id="11">
       <title>Book Reviews: Text Generation and Systemic-Functional Linguistics: Experiences from <fixed-case>E</fixed-case>nglish and <fixed-case>J</fixed-case>apanese</title>
-      <author><first>Terry</first><last>Patten</last></author>
+      <author id="terry-patten"><first>Terry</first><last>Patten</last></author>
       <url hash="c34ab760">J93-1011</url>
       <bibkey>patten-1993-book</bibkey>
     </paper>
@@ -103,19 +103,19 @@
     </paper>
     <paper id="13">
       <title>Book Reviews: Machine Translation: A Knowledge-Based Approach</title>
-      <author><first>Steven</first><last>Lytinen</last></author>
+      <author id="steven-l-lytinen"><first>Steven</first><last>Lytinen</last></author>
       <url hash="b6600a89">J93-1013</url>
       <bibkey>lytinen-1993-book</bibkey>
     </paper>
     <paper id="14">
       <title>Book Reviews: Corpus Linguistics and the Automatic Analysis of <fixed-case>E</fixed-case>nglish</title>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <url hash="627f112b">J93-1014</url>
       <bibkey>briscoe-1993-book</bibkey>
     </paper>
     <paper id="15">
       <title>Book Reviews: Lexical Acquisition: Exploiting On-Line Resources to Build a Lexicon</title>
-      <author><first>Victor</first><last>Sadler</last></author>
+      <author id="victor-sadler"><first>Victor</first><last>Sadler</last></author>
       <url hash="c1d7ca2a">J93-1015</url>
       <bibkey>sadler-1993-book</bibkey>
     </paper>
@@ -133,7 +133,7 @@
   <volume id="2" type="journal">
     <meta>
       <booktitle>Computational Linguistics, Volume 19, Number 2, June 1993, Special Issue on Using Large Corpora: <fixed-case>II</fixed-case></booktitle>
-      <editor><first>Julia</first><last>Hirschberg</last></editor>
+      <editor id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></editor>
       <publisher>MIT Press</publisher>
       <address>Cambridge, MA</address>
       <year>1993</year>
@@ -161,18 +161,18 @@
     </paper>
     <paper id="3">
       <title>The Mathematics of Statistical Machine Translation: Parameter Estimation</title>
-      <author><first>Peter F.</first><last>Brown</last></author>
-      <author><first>Stephen A.</first><last>Della Pietra</last></author>
-      <author><first>Vincent J.</first><last>Della Pietra</last></author>
-      <author><first>Robert L.</first><last>Mercer</last></author>
+      <author id="peter-f-brown"><first>Peter F.</first><last>Brown</last></author>
+      <author id="stephen-a-della-pietra"><first>Stephen A.</first><last>Della Pietra</last></author>
+      <author id="vincent-j-della-pietra"><first>Vincent J.</first><last>Della Pietra</last></author>
+      <author id="robert-l-mercer"><first>Robert L.</first><last>Mercer</last></author>
       <pages>263-311</pages>
       <url hash="152b24bd">J93-2003</url>
       <bibkey>brown-etal-1993-mathematics</bibkey>
     </paper>
     <paper id="4">
       <title>Building a Large Annotated Corpus of <fixed-case>E</fixed-case>nglish: The <fixed-case>P</fixed-case>enn <fixed-case>T</fixed-case>reebank</title>
-      <author><first>Mitchell P.</first><last>Marcus</last></author>
-      <author><first>Beatrice</first><last>Santorini</last></author>
+      <author id="mitch-marcus"><first>Mitchell P.</first><last>Marcus</last></author>
+      <author id="beatrice-santorini"><first>Beatrice</first><last>Santorini</last></author>
       <author><first>Mary Ann</first><last>Marcinkiewicz</last></author>
       <pages>313-330</pages>
       <url hash="876fee13">J93-2004</url>
@@ -180,19 +180,19 @@
     </paper>
     <paper id="5">
       <title>Lexical Semantic Techniques for Corpus Analysis</title>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <author><first>Sabine</first><last>Bergler</last></author>
-      <author><first>Peter</first><last>Anick</last></author>
+      <author id="peter-anick"><first>Peter</first><last>Anick</last></author>
       <pages>331-358</pages>
       <url hash="63544e2a">J93-2005</url>
       <bibkey>pustejovsky-etal-1993-lexical</bibkey>
     </paper>
     <paper id="6">
       <title>Coping with Ambiguity and Unknown Words through Probabilistic Models</title>
-      <author><first>Ralph</first><last>Weischedel</last></author>
-      <author><first>Marie</first><last>Meteer</last></author>
-      <author><first>Richard</first><last>Schwartz</last></author>
-      <author><first>Lance</first><last>Ramshaw</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
+      <author id="marie-meteer"><first>Marie</first><last>Meteer</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
+      <author id="lance-ramshaw"><first>Lance</first><last>Ramshaw</last></author>
       <author><first>Jeff</first><last>Palmucci</last></author>
       <pages>359-382</pages>
       <url hash="5d862357">J93-2006</url>
@@ -200,7 +200,7 @@
     </paper>
     <paper id="7">
       <title>Book Reviews: An Introduction to Machine Translation</title>
-      <author><first>Guðrun</first><last>Magnúsdóttir</last></author>
+      <author id="gudrun-magnusdottir"><first>Guðrun</first><last>Magnúsdóttir</last></author>
       <url hash="f7551255">J93-2007</url>
       <bibkey>magnusdottir-1993-book</bibkey>
     </paper>
@@ -218,13 +218,13 @@
     </paper>
     <paper id="10">
       <title>Book Reviews: Principle-Based Parsing: Computation and Psycholinguistics</title>
-      <author><first>Geoffrey K.</first><last>Pullum</last></author>
+      <author id="geoffrey-k-pullum"><first>Geoffrey K.</first><last>Pullum</last></author>
       <url hash="17c31dc8">J93-2010</url>
       <bibkey>pullum-1993-book</bibkey>
     </paper>
     <paper id="11">
       <title>Book Reviews: Questions and Information Systems</title>
-      <author><first>John S.</first><last>White</last></author>
+      <author id="john-s-white"><first>John S.</first><last>White</last></author>
       <url hash="ca58fe8a">J93-2011</url>
       <bibkey>white-1993-book</bibkey>
     </paper>
@@ -248,7 +248,7 @@
   <volume id="3" type="journal">
     <meta>
       <booktitle>Computational Linguistics, Volume 19, Number 3, September 1993</booktitle>
-      <editor><first>Julia</first><last>Hirschberg</last></editor>
+      <editor id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></editor>
       <publisher>MIT Press</publisher>
       <address>Cambridge, MA</address>
       <year>1993</year>
@@ -262,16 +262,16 @@
     </frontmatter>
     <paper id="1">
       <title>Evaluating Message Understanding Systems: An Analysis of the Third <fixed-case>M</fixed-case>essage <fixed-case>U</fixed-case>nderstanding <fixed-case>C</fixed-case>onference (<fixed-case>MUC</fixed-case>-3)</title>
-      <author><first>Nancy</first><last>Chinchor</last></author>
-      <author><first>Lynette</first><last>Hirschman</last></author>
-      <author><first>David D.</first><last>Lewis</last></author>
+      <author id="nancy-chinchor"><first>Nancy</first><last>Chinchor</last></author>
+      <author id="lynette-hirschman"><first>Lynette</first><last>Hirschman</last></author>
+      <author id="david-d-lewis"><first>David D.</first><last>Lewis</last></author>
       <pages>409-450</pages>
       <url hash="4dac6b82">J93-3001</url>
       <bibkey>chinchor-etal-1993-evaluating</bibkey>
     </paper>
     <paper id="2">
       <title>A Computational Theory of Goal-Directed Style in Syntax</title>
-      <author><first>Chrysanne</first><last>DiMarco</last></author>
+      <author id="chrysanne-dimarco"><first>Chrysanne</first><last>DiMarco</last></author>
       <author><first>Graeme</first><last>Hirst</last></author>
       <pages>451-500</pages>
       <url hash="044b50bb">J93-3002</url>
@@ -280,7 +280,7 @@
     <paper id="3">
       <title>Empirical Studies on the Disambiguation of Cue Phrases</title>
       <author><first>Julia</first><last>Hirschberg</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <pages>501-530</pages>
       <url hash="36cdc99f">J93-3003</url>
       <bibkey>hirschberg-litman-1993-empirical</bibkey>
@@ -294,7 +294,7 @@
     </paper>
     <paper id="5">
       <title>Book Reviews: Ontologie und Axiomatik der Wissensbasis von <fixed-case>LILOG</fixed-case></title>
-      <author><first>John</first><last>Bateman</last></author>
+      <author id="john-bateman"><first>John</first><last>Bateman</last></author>
       <url hash="1d27f352">J93-3005</url>
       <bibkey>bateman-1993-book</bibkey>
     </paper>
@@ -312,7 +312,7 @@
     </paper>
     <paper id="8">
       <title>Connectionist Approaches to Natural Language Processing</title>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <url hash="c96a6abb">J93-3008</url>
       <bibkey>henderson-1993-connectionist</bibkey>
     </paper>
@@ -341,7 +341,7 @@
   <volume id="4" type="journal">
     <meta>
       <booktitle>Computational Linguistics, Volume 19, Number 4, <fixed-case>D</fixed-case>ecember 1993</booktitle>
-      <editor><first>Julia</first><last>Hirschberg</last></editor>
+      <editor id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></editor>
       <publisher>MIT Press</publisher>
       <address>Cambridge, MA</address>
       <year>1993</year>
@@ -355,44 +355,44 @@
     </frontmatter>
     <paper id="1">
       <title>The Interface between Phrasal and Functional Constraints</title>
-      <author><first>John T.</first><last>Maxwell</last></author>
-      <author><first>Ronald M.</first><last>Kaplan</last></author>
+      <author id="john-t-maxwell-iii"><first>John T.</first><last>Maxwell</last></author>
+      <author id="ronald-m-kaplan"><first>Ronald M.</first><last>Kaplan</last></author>
       <pages>571-590</pages>
       <url hash="5dff8d49">J93-4001</url>
       <bibkey>maxwell-kaplan-1993-interface</bibkey>
     </paper>
     <paper id="2">
       <title>Parsing Some Constrained Grammar Formalisms</title>
-      <author><first>K</first><last>Vijay-Shanker</last></author>
-      <author><first>David J.</first><last>Weir</last></author>
+      <author id="k-vijay-shanker"><first>K</first><last>Vijay-Shanker</last></author>
+      <author id="david-weir"><first>David J.</first><last>Weir</last></author>
       <pages>591-636</pages>
       <url hash="1515a593">J93-4002</url>
       <bibkey>vijay-shanker-weir-1993-parsing</bibkey>
     </paper>
     <paper id="3">
       <title>Indexical Expressions in the Scope of Attitude Verbs</title>
-      <author><first>Andrew R.</first><last>Haas</last></author>
+      <author id="andrew-haas"><first>Andrew R.</first><last>Haas</last></author>
       <pages>637-649</pages>
       <url hash="dbbd86ae">J93-4003</url>
       <bibkey>haas-1993-indexical</bibkey>
     </paper>
     <paper id="4">
       <title>Planning Text for Advisory Dialogues: Capturing Intentional and Rhetorical Information</title>
-      <author><first>Johanna D.</first><last>Moore</last></author>
-      <author><first>Cecile L.</first><last>Paris</last></author>
+      <author id="johanna-d-moore"><first>Johanna D.</first><last>Moore</last></author>
+      <author id="cecile-paris"><first>Cecile L.</first><last>Paris</last></author>
       <pages>651-694</pages>
       <url hash="e6172846">J93-4004</url>
       <bibkey>moore-paris-1993-planning</bibkey>
     </paper>
     <paper id="5">
       <title>Book Reviews: Functional Grammar in <fixed-case>P</fixed-case>rolog: An Integrated Implementation for <fixed-case>E</fixed-case>nglish, <fixed-case>F</fixed-case>rench, and <fixed-case>D</fixed-case>utch</title>
-      <author><first>Patrick</first><last>Saint-Dizier</last></author>
+      <author id="patrick-saint-dizier"><first>Patrick</first><last>Saint-Dizier</last></author>
       <url hash="7f816ff0">J93-4005</url>
       <bibkey>saint-dizier-1993-book</bibkey>
     </paper>
     <paper id="6">
       <title>Book Reviews: Natural Language Processing: The <fixed-case>PLNLP</fixed-case> Approach</title>
-      <author><first>Paul S.</first><last>Jacobs</last></author>
+      <author id="paul-s-jacobs"><first>Paul S.</first><last>Jacobs</last></author>
       <url hash="24c9a63c">J93-4006</url>
       <bibkey>jacobs-1993-book</bibkey>
     </paper>
diff --git a/data/xml/J94.xml b/data/xml/J94.xml
index 257eb70fa8..ce0ee5e872 100644
--- a/data/xml/J94.xml
+++ b/data/xml/J94.xml
@@ -3,7 +3,7 @@
   <volume id="1" type="journal">
     <meta>
       <booktitle>Computational Linguistics, Volume 20, Number 1, March 1994</booktitle>
-      <editor><first>Julia</first><last>Hirschberg</last></editor>
+      <editor id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></editor>
       <publisher>MIT Press</publisher>
       <address>Cambridge, MA</address>
       <year>1994</year>
@@ -33,7 +33,7 @@
     <paper id="3">
       <title>One-Level Phonology: Autosegmental Representations and Rules as Finite Automata</title>
       <author><first>Steven</first><last>Bird</last></author>
-      <author><first>T. Mark</first><last>Ellison</last></author>
+      <author id="t-mark-ellison"><first>T. Mark</first><last>Ellison</last></author>
       <pages>55-90</pages>
       <url hash="c643b8f5">J94-1003</url>
       <bibkey>bird-ellison-1994-one</bibkey>
@@ -41,7 +41,7 @@
     <paper id="4">
       <title>An Alternative Conception of Tree-Adjoining Derivation</title>
       <author><first>Yves</first><last>Schabes</last></author>
-      <author><first>Stuart M.</first><last>Shieber</last></author>
+      <author id="stuart-m-shieber"><first>Stuart M.</first><last>Shieber</last></author>
       <pages>91-124</pages>
       <url hash="c4f799c8">J94-1004</url>
       <bibkey>schabes-shieber-1994-alternative</bibkey>
@@ -54,7 +54,7 @@
     </paper>
     <paper id="6">
       <title>Book Reviews: Generating Referring Expressions</title>
-      <author><first>Donia</first><last>Scott</last></author>
+      <author id="donia-scott"><first>Donia</first><last>Scott</last></author>
       <url hash="16e476b1">J94-1006</url>
       <bibkey>scott-1994-book</bibkey>
     </paper>
@@ -66,13 +66,13 @@
     </paper>
     <paper id="8">
       <title>Book Reviews: Expressibility and the Problem of Efficient Text Planning</title>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <url hash="0c818f47">J94-1008</url>
       <bibkey>mitkov-1994-book</bibkey>
     </paper>
     <paper id="9">
       <title>Book Reviews: Explanation and Interaction: The Computer Generation of Explanatory Dialogues</title>
-      <author><first>Sandra</first><last>Carberry</last></author>
+      <author id="sandra-carberry"><first>Sandra</first><last>Carberry</last></author>
       <url hash="55e1e0e5">J94-1009</url>
       <bibkey>carberry-1994-book</bibkey>
     </paper>
@@ -96,7 +96,7 @@
     </paper>
     <paper id="13">
       <title>Book Reviews: The Logical Approach to Syntax: Foundations, Specifications, and Implementations of Theories of Government and Binding</title>
-      <author><first>Robert J.</first><last>Kuhns</last></author>
+      <author id="robert-j-kuhns"><first>Robert J.</first><last>Kuhns</last></author>
       <url hash="8859870c">J94-1013</url>
       <bibkey>kuhns-1994-book</bibkey>
     </paper>
@@ -114,7 +114,7 @@
   <volume id="2" type="journal">
     <meta>
       <booktitle>Computational Linguistics, Volume 20, Number 2, June 1994</booktitle>
-      <editor><first>Julia</first><last>Hirschberg</last></editor>
+      <editor id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></editor>
       <publisher>MIT Press</publisher>
       <address>Cambridge, MA</address>
       <year>1994</year>
@@ -128,7 +128,7 @@
     </frontmatter>
     <paper id="1">
       <title>Tagging <fixed-case>E</fixed-case>nglish Text with a Probabilistic Model</title>
-      <author><first>Bernard</first><last>Merialdo</last></author>
+      <author id="bernard-merialdo"><first>Bernard</first><last>Merialdo</last></author>
       <pages>155-171</pages>
       <url hash="68e9957d">J94-2001</url>
       <bibkey>merialdo-1994-tagging</bibkey>
@@ -142,7 +142,7 @@
     </paper>
     <paper id="3">
       <title><fixed-case>J</fixed-case>apanese Discourse and the Process of Centering</title>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <author><first>Masayo</first><last>Iida</last></author>
       <author><first>Sharon</first><last>Cote</last></author>
       <pages>193-231</pages>
@@ -151,7 +151,7 @@
     </paper>
     <paper id="4">
       <title>Tracking Point of View in Narrative</title>
-      <author><first>Janyce M.</first><last>Wiebe</last></author>
+      <author id="janyce-wiebe"><first>Janyce M.</first><last>Wiebe</last></author>
       <pages>233-287</pages>
       <url hash="3bc43ba9">J94-2004</url>
       <bibkey>wiebe-1994-tracking</bibkey>
@@ -167,7 +167,7 @@
     <paper id="6">
       <title><fixed-case>RAFT</fixed-case>/<fixed-case>RAPR</fixed-case> and Centering: a comparison and discussion of problems related to processing complex sentences</title>
       <author><first>Linda Z.</first><last>Suri</last></author>
-      <author><first>Kathleen</first><last>McCoy</last></author>
+      <author id="kathleen-f-mccoy"><first>Kathleen</first><last>McCoy</last></author>
       <pages>301-317</pages>
       <url hash="a4b35896">J94-2006</url>
       <bibkey>suri-mccoy-1994-raft</bibkey>
@@ -202,7 +202,7 @@
   <volume id="3" type="journal">
     <meta>
       <booktitle>Computational Linguistics, Volume 20, Number 3, September 1994</booktitle>
-      <editor><first>Julia</first><last>Hirschberg</last></editor>
+      <editor id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></editor>
       <publisher>MIT Press</publisher>
       <address>Cambridge, MA</address>
       <year>1994</year>
@@ -216,7 +216,7 @@
     </frontmatter>
     <paper id="1">
       <title>Regular Models of Phonological Rule Systems</title>
-      <author><first>Ronald M.</first><last>Kaplan</last></author>
+      <author id="ronald-m-kaplan"><first>Ronald M.</first><last>Kaplan</last></author>
       <author><first>Martin</first><last>Kay</last></author>
       <pages>331-378</pages>
       <url hash="6e8b3f3a">J94-3001</url>
@@ -224,21 +224,21 @@
     </paper>
     <paper id="2">
       <title>Commentary on <fixed-case>K</fixed-case>aplan and <fixed-case>K</fixed-case>ay</title>
-      <author><first>Mark</first><last>Liberman</last></author>
+      <author id="mark-liberman"><first>Mark</first><last>Liberman</last></author>
       <pages>379</pages>
       <url hash="8e1a5a7d">J94-3002</url>
       <bibkey>liberman-1994-commentary</bibkey>
     </paper>
     <paper id="3">
       <title>Commentary on <fixed-case>K</fixed-case>aplan and <fixed-case>K</fixed-case>ay</title>
-      <author><first>Graeme</first><last>Ritchie</last></author>
+      <author id="graeme-ritchie"><first>Graeme</first><last>Ritchie</last></author>
       <pages>380</pages>
       <url hash="cd1f7d1c">J94-3003</url>
       <bibkey>ritchie-1994-commentary</bibkey>
     </paper>
     <paper id="4">
       <title>The Reconstruction Engine: A Computer Implementation of the Comparative Method</title>
-      <author><first>John B.</first><last>Lowe</last></author>
+      <author id="john-b-lowe"><first>John B.</first><last>Lowe</last></author>
       <author><first>Martine</first><last>Mazaudon</last></author>
       <pages>381-417</pages>
       <url hash="efbc8c00">J94-3004</url>
@@ -260,7 +260,7 @@
     </paper>
     <paper id="7">
       <title>The Acquisition of Stress: A Data-Oriented Approach</title>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <author><first>Steven</first><last>Gillis</last></author>
       <author><first>Gert</first><last>Durieux</last></author>
       <pages>421-453</pages>
@@ -298,14 +298,14 @@
     </paper>
     <paper id="12">
       <title>Commentary on <fixed-case>B</fixed-case>ird and <fixed-case>K</fixed-case>lein</title>
-      <author><first>Richard</first><last>Sproat</last></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last></author>
       <pages>493</pages>
       <url hash="d97733bc">J94-3012</url>
       <bibkey>sproat-1994-commentary</bibkey>
     </paper>
     <paper id="13">
       <title>Book Reviews: <fixed-case>E</fixed-case>nglish Verb Classes and Alternations: A Preliminary Investigation</title>
-      <author><first>Harold</first><last>Somers</last></author>
+      <author id="harold-somers"><first>Harold</first><last>Somers</last></author>
       <url hash="6d37e386">J94-3013</url>
       <bibkey>somers-1994-book</bibkey>
     </paper>
@@ -330,7 +330,7 @@
   <volume id="4" type="journal">
     <meta>
       <booktitle>Computational Linguistics, Volume 20, Number 4, <fixed-case>D</fixed-case>ecember 1994</booktitle>
-      <editor><first>Julia</first><last>Hirschberg</last></editor>
+      <editor id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></editor>
       <publisher>MIT Press</publisher>
       <address>Cambridge, MA</address>
       <year>1994</year>
@@ -345,7 +345,7 @@
     <paper id="1">
       <title>A Syntactic Analysis Method of Long <fixed-case>J</fixed-case>apanese Sentences Based on the Detection of Conjunctive Structures</title>
       <author><first>Sadao</first><last>Kurohashi</last></author>
-      <author><first>Makoto</first><last>Nagao</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
       <pages>507-534</pages>
       <url hash="9ad28ab7">J94-4001</url>
       <bibkey>kurohashi-nagao-1994-syntactic</bibkey>
@@ -368,35 +368,35 @@
     </paper>
     <paper id="4">
       <title>Machine Translation Divergences: A Formal Description and Proposed Solution</title>
-      <author><first>Bonnie J.</first><last>Dorr</last></author>
+      <author id="bonnie-dorr"><first>Bonnie J.</first><last>Dorr</last></author>
       <pages>597-633</pages>
       <url hash="8552c305">J94-4004</url>
       <bibkey>dorr-1994-machine</bibkey>
     </paper>
     <paper id="5">
       <title>Training and Scaling Preference Functions for Disambiguation</title>
-      <author><first>Hiyan</first><last>Alshawi</last></author>
-      <author><first>David</first><last>Carter</last></author>
+      <author id="hiyan-alshawi"><first>Hiyan</first><last>Alshawi</last></author>
+      <author id="david-carter"><first>David</first><last>Carter</last></author>
       <pages>635-648</pages>
       <url hash="7f2a4d7e">J94-4005</url>
       <bibkey>alshawi-carter-1994-training</bibkey>
     </paper>
     <paper id="6">
       <title>Squibs and Discussions: Storing Logical Form in a Shared-Packed Forest</title>
-      <author><first>Mary P.</first><last>Harper</last></author>
+      <author id="mary-harper"><first>Mary P.</first><last>Harper</last></author>
       <pages>649-660</pages>
       <url hash="81604f6b">J94-4006</url>
       <bibkey>harper-1994-squibs</bibkey>
     </paper>
     <paper id="7">
       <title>Book Reviews: Inheritance, Defaults, and the Lexicon</title>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <url hash="a5187bdc">J94-4007</url>
       <bibkey>daelemans-1994-book</bibkey>
     </paper>
     <paper id="8">
       <title>Book Reviews: Grammaires d’unification a traits et conto1e des infinitives en francais</title>
-      <author><first>Dominique</first><last>Estival</last></author>
+      <author id="dominique-estival"><first>Dominique</first><last>Estival</last></author>
       <url hash="30fab0eb">J94-4008</url>
       <bibkey>estival-1994-book</bibkey>
     </paper>
diff --git a/data/xml/J95.xml b/data/xml/J95.xml
index 4b1124adda..25505b8df0 100644
--- a/data/xml/J95.xml
+++ b/data/xml/J95.xml
@@ -3,7 +3,7 @@
   <volume id="1" type="journal">
     <meta>
       <booktitle>Computational Linguistics, Volume 21, Number 1, March 1995</booktitle>
-      <editor><first>Julia</first><last>Hirschberg</last></editor>
+      <editor id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></editor>
       <publisher>MIT Press</publisher>
       <address>Cambridge, MA</address>
       <year>1995</year>
@@ -26,7 +26,7 @@
     <paper id="2">
       <title>Expressing Rhetorical Relations in Instructional Text: a case study of the purpose relation</title>
       <author><first>Keith</first><last>Vander Linden</last></author>
-      <author><first>James</first><last>Martin</last></author>
+      <author id="james-h-martin"><first>James</first><last>Martin</last></author>
       <pages>29-57</pages>
       <url hash="048284a5">J95-1002</url>
       <bibkey>vander-linden-martin-1995-expressing</bibkey>
@@ -42,9 +42,9 @@
     </paper>
     <paper id="4">
       <title>Identifying Topic and Focus by an Automatic Procedure</title>
-      <author><first>Eva</first><last>Hajicova</last></author>
-      <author><first>Hana</first><last>Skoumalova</last></author>
-      <author><first>Petr</first><last>Sgall</last></author>
+      <author id="eva-hajicova"><first>Eva</first><last>Hajicova</last></author>
+      <author id="hana-skoumalova"><first>Hana</first><last>Skoumalova</last></author>
+      <author id="petr-sgall"><first>Petr</first><last>Sgall</last></author>
       <pages>81-94</pages>
       <url hash="e4f0e789">J95-1004</url>
       <bibkey>hajicova-etal-1995-identifying</bibkey>
@@ -59,7 +59,7 @@
     </paper>
     <paper id="6">
       <title>Book Reviews: Statistical Language Learning</title>
-      <author><first>David M.</first><last>Magerman</last></author>
+      <author id="david-m-magerman"><first>David M.</first><last>Magerman</last></author>
       <url hash="c0e358db">J95-1006</url>
       <bibkey>magerman-1995-book</bibkey>
     </paper>
@@ -77,13 +77,13 @@
     </paper>
     <paper id="9">
       <title>Book Reviews: Challenges in Natural Language Processing</title>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <url hash="70162d65">J95-1009</url>
       <bibkey>hovy-1995-book</bibkey>
     </paper>
     <paper id="10">
       <title>Book Reviews: The Language Complexity Game</title>
-      <author><first>Alexis</first><last>Manaster Ramer</last></author>
+      <author id="alexis-manaster-ramer"><first>Alexis</first><last>Manaster Ramer</last></author>
       <url hash="fb42cc03">J95-1010</url>
       <bibkey>manaster-ramer-1995-book</bibkey>
     </paper>
@@ -106,7 +106,7 @@
   <volume id="2" type="journal">
     <meta>
       <booktitle>Computational Linguistics, Volume 21, Number 2, June 1995</booktitle>
-      <editor><first>Julia</first><last>Hirschberg</last></editor>
+      <editor id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></editor>
       <publisher>MIT Press</publisher>
       <address>Cambridge, MA</address>
       <year>1995</year>
@@ -121,22 +121,22 @@
     <paper id="1">
       <title>Automatic Stochastic Tagging of Natural Language Texts</title>
       <author><first>Evangelos</first><last>Dermatas</last></author>
-      <author><first>George</first><last>Kokkinakis</last></author>
+      <author id="george-kokkinakis"><first>George</first><last>Kokkinakis</last></author>
       <pages>137-163</pages>
       <url hash="97938264">J95-2001</url>
       <bibkey>dermatas-kokkinakis-1995-automatic</bibkey>
     </paper>
     <paper id="2">
       <title>An Efficient Probabilistic Context-Free Parsing Algorithm that Computes Prefix Probabilities</title>
-      <author><first>Andreas</first><last>Stolcke</last></author>
+      <author id="andreas-stolcke"><first>Andreas</first><last>Stolcke</last></author>
       <pages>165-201</pages>
       <url hash="cd17f369">J95-2002</url>
       <bibkey>stolcke-1995-efficient</bibkey>
     </paper>
     <paper id="3">
       <title><fixed-case>C</fixed-case>entering: A Framework for Modeling the Local Coherence of Discourse</title>
-      <author><first>Barbara J.</first><last>Grosz</last></author>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="barbara-j-grosz"><first>Barbara J.</first><last>Grosz</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
       <author><first>Scott</first><last>Weinstein</last></author>
       <pages>203-225</pages>
       <award>ACL 2020 Test-of-Time Award (25 years)</award>
@@ -153,7 +153,7 @@
     </paper>
     <paper id="5">
       <title>Squibs and Discussions: Efficient Parsing for <fixed-case>K</fixed-case>orean and <fixed-case>E</fixed-case>nglish: A Parameterized Message-Passing Approach</title>
-      <author><first>Bonnie J.</first><last>Dorr</last></author>
+      <author id="bonnie-dorr"><first>Bonnie J.</first><last>Dorr</last></author>
       <author><first>Jye-hoon</first><last>Lee</last></author>
       <author><first>Dekang</first><last>Lin</last></author>
       <author><first>Sungki</first><last>Suh</last></author>
@@ -197,7 +197,7 @@
   <volume id="3" type="journal">
     <meta>
       <booktitle>Computational Linguistics, Volume 21, Number 3, September 1995</booktitle>
-      <editor><first>Julia</first><last>Hirschberg</last></editor>
+      <editor id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></editor>
       <publisher>MIT Press</publisher>
       <address>Cambridge, MA</address>
       <year>1995</year>
@@ -211,16 +211,16 @@
     </frontmatter>
     <paper id="1">
       <title>An Architecture for Voice Dialog Systems Based on <fixed-case>P</fixed-case>rolog-Style Theorem Proving</title>
-      <author><first>Ronnie W.</first><last>Smith</last></author>
+      <author id="ronnie-w-smith"><first>Ronnie W.</first><last>Smith</last></author>
       <author><first>D. Richard</first><last>Hipp</last></author>
-      <author><first>Alan W.</first><last>Biermann</last></author>
+      <author id="alan-w-biermann"><first>Alan W.</first><last>Biermann</last></author>
       <pages>281-320</pages>
       <url hash="95071624">J95-3001</url>
       <bibkey>smith-etal-1995-architecture</bibkey>
     </paper>
     <paper id="2">
       <title>Robust Learning, Smoothing, and Parameter Tying on Syntactic Ambiguity Resolution</title>
-      <author><first>Tung-Hui</first><last>Chiang</last></author>
+      <author id="tung-hui-chiang"><first>Tung-Hui</first><last>Chiang</last></author>
       <author><first>Yi-Chung</first><last>Lin</last></author>
       <author><first>Keh-Yih</first><last>Su</last></author>
       <pages>321-349</pages>
@@ -229,7 +229,7 @@
     </paper>
     <paper id="3">
       <title>Collaborating on Referring Expressions</title>
-      <author><first>Peter A.</first><last>Heeman</last></author>
+      <author id="peter-a-heeman"><first>Peter A.</first><last>Heeman</last></author>
       <author><first>Graeme</first><last>Hirst</last></author>
       <pages>351-382</pages>
       <url hash="d9ab9567">J95-3003</url>
@@ -288,7 +288,7 @@
   <volume id="4" type="journal">
     <meta>
       <booktitle>Computational Linguistics, Volume 21, Number 4, <fixed-case>D</fixed-case>ecember 1995</booktitle>
-      <editor><first>Julia</first><last>Hirschberg</last></editor>
+      <editor id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></editor>
       <publisher>MIT Press</publisher>
       <address>Cambridge, MA</address>
       <year>1995</year>
@@ -302,7 +302,7 @@
     </frontmatter>
     <paper id="1">
       <title>The Repair of Speech Act Misunderstandings by Abductive Inference</title>
-      <author><first>Susan W.</first><last>McRoy</last></author>
+      <author id="susan-w-mcroy"><first>Susan W.</first><last>McRoy</last></author>
       <author><first>Graeme</first><last>Hirst</last></author>
       <pages>435-478</pages>
       <url hash="6a925a53">J95-4001</url>
@@ -351,13 +351,13 @@
     </paper>
     <paper id="8">
       <title>Book Reviews: Compositional translation</title>
-      <author><first>Bonnie J.</first><last>Dorr</last></author>
+      <author id="bonnie-dorr"><first>Bonnie J.</first><last>Dorr</last></author>
       <url hash="27689106">J95-4008</url>
       <bibkey>dorr-1995-book</bibkey>
     </paper>
     <paper id="9">
       <title>Book Reviews: Speech-to-speech translation: A massively parallel memory-based approach</title>
-      <author><first>Nigel</first><last>Ward</last></author>
+      <author id="nigel-ward"><first>Nigel</first><last>Ward</last></author>
       <url hash="a7b839cc">J95-4009</url>
       <bibkey>ward-1995-book</bibkey>
     </paper>
diff --git a/data/xml/J96.xml b/data/xml/J96.xml
index e7fc02deae..45fe851393 100644
--- a/data/xml/J96.xml
+++ b/data/xml/J96.xml
@@ -3,7 +3,7 @@
   <volume id="1" type="journal">
     <meta>
       <booktitle>Computational Linguistics, Volume 22, Number 1, March 1996</booktitle>
-      <editor><first>Julia</first><last>Hirschberg</last></editor>
+      <editor id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></editor>
       <publisher>MIT Press</publisher>
       <address>Cambridge, MA</address>
       <year>1996</year>
@@ -17,8 +17,8 @@
     </frontmatter>
     <paper id="1">
       <title>Translating Collocations for Bilingual Lexicons: A Statistical Approach</title>
-      <author><first>Frank</first><last>Smadja</last></author>
-      <author><first>Kathleen R.</first><last>McKeown</last></author>
+      <author id="frank-smadja"><first>Frank</first><last>Smadja</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen R.</first><last>McKeown</last></author>
       <author><first>Vasileios</first><last>Hatzivassiloglou</last></author>
       <pages>1-38</pages>
       <url hash="05d14e47">J96-1001</url>
@@ -26,9 +26,9 @@
     </paper>
     <paper id="2">
       <title>A Maximum Entropy Approach to Natural Language Processing</title>
-      <author><first>Adam L.</first><last>Berger</last></author>
-      <author><first>Stephen A.</first><last>Della Pietra</last></author>
-      <author><first>Vincent J.</first><last>Della Pietra</last></author>
+      <author id="adam-berger"><first>Adam L.</first><last>Berger</last></author>
+      <author id="stephen-a-della-pietra"><first>Stephen A.</first><last>Della Pietra</last></author>
+      <author id="vincent-j-della-pietra"><first>Vincent J.</first><last>Della Pietra</last></author>
       <pages>39-71</pages>
       <award>ACL 2021 Test-of-Time Award (25 year)</award>
       <url hash="f7ad65f3">J96-1002</url>
@@ -58,13 +58,13 @@
     <paper id="6">
       <title>Book Reviews: Natural Language Processing for <fixed-case>P</fixed-case>rolog Programmers</title>
       <author><first>Ken</first><last>Barker</last></author>
-      <author><first>Stan</first><last>Szpakowicz</last></author>
+      <author id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></author>
       <url hash="b8ed66d7">J96-1006</url>
       <bibkey>barker-szpakowicz-1996-book</bibkey>
     </paper>
     <paper id="7">
       <title>Book Reviews: Logic and Lexicon</title>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <url hash="53d33baa">J96-1007</url>
       <bibkey>poesio-1996-book</bibkey>
     </paper>
@@ -94,7 +94,7 @@
   <volume id="2" type="journal">
     <meta>
       <booktitle>Computational Linguistics, Volume 22, Number 2, June 1996</booktitle>
-      <editor><first>Julia</first><last>Hirschberg</last></editor>
+      <editor id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></editor>
       <publisher>MIT Press</publisher>
       <address>Cambridge, MA</address>
       <year>1996</year>
@@ -108,15 +108,15 @@
     </frontmatter>
     <paper id="1">
       <title>Estimating Lexical Priors for Low-Frequency Morphologically Ambiguous Forms</title>
-      <author><first>Harald</first><last>Baayen</last></author>
-      <author><first>Richard</first><last>Sproat</last></author>
+      <author id="harald-baayen"><first>Harald</first><last>Baayen</last></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last></author>
       <pages>155-166</pages>
       <url hash="2be94ec7">J96-2001</url>
       <bibkey>baayen-sproat-1996-estimating</bibkey>
     </paper>
     <paper id="2">
       <title><fixed-case>DATR</fixed-case>: A Language for Lexical Knowledge Representation</title>
-      <author><first>Roger</first><last>Evans</last></author>
+      <author id="roger-evans"><first>Roger</first><last>Evans</last></author>
       <author><first>Gerald</first><last>Gazdar</last></author>
       <pages>167-216</pages>
       <url hash="f4021d03">J96-2002</url>
@@ -125,7 +125,7 @@
     <paper id="3">
       <title>Improving Statistical Language Model Performance with Automatically Generated Word Hierarchies</title>
       <author><first>John G.</first><last>McMahon</last></author>
-      <author><first>Francis J.</first><last>Smith</last></author>
+      <author id="francis-j-smith"><first>Francis J.</first><last>Smith</last></author>
       <pages>217-247</pages>
       <url hash="9c658f4b">J96-2003</url>
       <bibkey>mcmahon-smith-1996-improving</bibkey>
@@ -140,14 +140,14 @@
     </paper>
     <paper id="5">
       <title>Limited Attention and Discourse Structure</title>
-      <author><first>Marilyn A.</first><last>Walker</last></author>
+      <author id="marilyn-walker"><first>Marilyn A.</first><last>Walker</last></author>
       <pages>255-264</pages>
       <url hash="d14a9664">J96-2005</url>
       <bibkey>walker-1996-limited</bibkey>
     </paper>
     <paper id="6">
       <title>Book Reviews: Time-constrained Memory: A Reader-based Approach to Text Comprehension</title>
-      <author><first>Arthur C.</first><last>Graesser</last></author>
+      <author id="arthur-c-graesser"><first>Arthur C.</first><last>Graesser</last></author>
       <url hash="2e6b4964">J96-2006</url>
       <bibkey>graesser-1996-book</bibkey>
     </paper>
@@ -159,7 +159,7 @@
     </paper>
     <paper id="8">
       <title>Book Reviews: Representing Time in Natural Language: The Dynamic Interpretation of Tense and Aspect</title>
-      <author><first>Rebecca J.</first><last>Passonneau</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca J.</first><last>Passonneau</last></author>
       <url hash="445b11b5">J96-2008</url>
       <bibkey>passonneau-1996-book</bibkey>
     </paper>
@@ -175,7 +175,7 @@
     </paper>
     <paper id="11">
       <title>Letters to the Editor</title>
-      <author><first>Eric</first><last>Ristad</last></author>
+      <author id="eric-sven-ristad"><first>Eric</first><last>Ristad</last></author>
       <url hash="5c6bc8ac">J96-2011</url>
       <bibkey>ristad-1996-letters</bibkey>
     </paper>
@@ -188,7 +188,7 @@
   <volume id="3" type="journal">
     <meta>
       <booktitle>Computational Linguistics, Volume 22, Number 3, September 1996</booktitle>
-      <editor><first>Julia</first><last>Hirschberg</last></editor>
+      <editor id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></editor>
       <publisher>MIT Press</publisher>
       <address>Cambridge, MA</address>
       <year>1996</year>
@@ -202,7 +202,7 @@
     </frontmatter>
     <paper id="1">
       <title>Unification Encodings of Grammatical Notations</title>
-      <author><first>Stephen G.</first><last>Pulman</last></author>
+      <author id="stephen-pulman"><first>Stephen G.</first><last>Pulman</last></author>
       <pages>295-327</pages>
       <url hash="2f18f746">J96-3001</url>
       <bibkey>pulman-1996-unification</bibkey>
@@ -219,16 +219,16 @@
     <paper id="3">
       <title>Efficient Multilingual Phoneme-to-Grapheme Conversion Based on <fixed-case>HMM</fixed-case></title>
       <author><first>Panagiotis A.</first><last>Rentzepopoulos</last></author>
-      <author><first>George K.</first><last>Kokkinakis</last></author>
+      <author id="george-kokkinakis"><first>George K.</first><last>Kokkinakis</last></author>
       <pages>351-376</pages>
       <url hash="d20a48cb">J96-3003</url>
       <bibkey>rentzepopoulos-kokkinakis-1996-efficient</bibkey>
     </paper>
     <paper id="4">
       <title>A Stochastic Finite-State Word-Segmentation Algorithm for <fixed-case>C</fixed-case>hinese</title>
-      <author><first>Richard W.</first><last>Sproat</last></author>
+      <author id="richard-sproat"><first>Richard W.</first><last>Sproat</last></author>
       <author><first>Chilin</first><last>Shih</last></author>
-      <author><first>William</first><last>Gale</last></author>
+      <author id="william-a-gale"><first>William</first><last>Gale</last></author>
       <author><first>Nancy</first><last>Chang</last></author>
       <pages>377-404</pages>
       <url hash="e3f30e5f">J96-3004</url>
@@ -245,7 +245,7 @@
     <paper id="6">
       <title>Toward a Synthesis of Two Accounts of Discourse Structure</title>
       <author><first>Megan</first><last>Moser</last></author>
-      <author><first>Johanna D.</first><last>Moore</last></author>
+      <author id="johanna-d-moore"><first>Johanna D.</first><last>Moore</last></author>
       <pages>409-419</pages>
       <url hash="7c31c55c">J96-3006</url>
       <bibkey>moser-moore-1996-toward</bibkey>
@@ -253,20 +253,20 @@
     <paper id="7">
       <title>A Chart Re-estimation Algorithm for a Probabilistic Recursive Transition Network</title>
       <author><first>Young S.</first><last>Han</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <pages>421-429</pages>
       <url hash="f394567d">J96-3007</url>
       <bibkey>han-choi-1996-chart</bibkey>
     </paper>
     <paper id="8">
       <title>Book Reviews: Spoken Natural Language Dialogue Systems: A Practical Approach</title>
-      <author><first>David R.</first><last>Traum</last></author>
+      <author id="david-traum"><first>David R.</first><last>Traum</last></author>
       <url hash="1d9681dd">J96-3008</url>
       <bibkey>traum-1996-book</bibkey>
     </paper>
     <paper id="9">
       <title>Book Reviews: Electric Words: Dictionaries, Computers, and Meanings</title>
-      <author><first>Archibald</first><last>Michiels</last></author>
+      <author id="archibald-michiels"><first>Archibald</first><last>Michiels</last></author>
       <url hash="a65b25d8">J96-3009</url>
       <bibkey>michiels-1996-book</bibkey>
     </paper>
@@ -278,7 +278,7 @@
     </paper>
     <paper id="11">
       <title>Book Reviews: Speakers, Listeners, and Communication: Explorations in Discourse Analysis</title>
-      <author><first>Susan</first><last>McRoy</last></author>
+      <author id="susan-w-mcroy"><first>Susan</first><last>McRoy</last></author>
       <url hash="abcaf642">J96-3011</url>
       <bibkey>mcroy-1996-book</bibkey>
     </paper>
@@ -307,7 +307,7 @@
   <volume id="4" type="journal">
     <meta>
       <booktitle>Computational Linguistics, Volume 22, Number 4, <fixed-case>D</fixed-case>ecember 1996</booktitle>
-      <editor><first>Julia</first><last>Hirschberg</last></editor>
+      <editor id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></editor>
       <publisher>MIT Press</publisher>
       <address>Cambridge, MA</address>
       <year>1996</year>
@@ -321,7 +321,7 @@
     </frontmatter>
     <paper id="1">
       <title>The Effects of Lexical Specialization on the Growth Curve of the Vocabulary</title>
-      <author><first>R. Harald</first><last>Baayen</last></author>
+      <author id="harald-baayen"><first>R. Harald</first><last>Baayen</last></author>
       <pages>455-480</pages>
       <url hash="ff835cec">J96-4001</url>
       <bibkey>baayen-1996-effects</bibkey>
@@ -336,33 +336,33 @@
     <paper id="3">
       <title>Learning Bias and Phonological-Rule Induction</title>
       <author><first>Daniel</first><last>Gildea</last></author>
-      <author><first>Daniel</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Daniel</first><last>Jurafsky</last></author>
       <pages>497-530</pages>
       <url hash="ddf30379">J96-4003</url>
       <bibkey>gildea-jurafsky-1996-learning</bibkey>
     </paper>
     <paper id="4">
       <title>A Statistically Emergent Approach for Language Processing: Application to Modeling Context Effects in Ambiguous <fixed-case>C</fixed-case>hinese Word Boundary Perception</title>
-      <author><first>Kok-Wee</first><last>Gan</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
-      <author><first>Kim-Teng</first><last>Lua</last></author>
+      <author id="kok-wee-gan"><first>Kok-Wee</first><last>Gan</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
+      <author id="kim-teng-lua"><first>Kim-Teng</first><last>Lua</last></author>
       <pages>531-553</pages>
       <url hash="ff204235">J96-4004</url>
       <bibkey>gan-etal-1996-statistically</bibkey>
     </paper>
     <paper id="5">
       <title>Ambiguity-preserving Generation with <fixed-case>LFG</fixed-case>- and <fixed-case>PATR</fixed-case>-style Grammars</title>
-      <author><first>Jurgen</first><last>Wedekind</last></author>
-      <author><first>Ronald M.</first><last>Kaplan</last></author>
+      <author id="jurgen-wedekind"><first>Jurgen</first><last>Wedekind</last></author>
+      <author id="ronald-m-kaplan"><first>Ronald M.</first><last>Kaplan</last></author>
       <pages>555-558</pages>
       <url hash="a505b400">J96-4005</url>
       <bibkey>wedekind-kaplan-1996-ambiguity</bibkey>
     </paper>
     <paper id="6">
       <title>Integrating General-purpose and Corpus-based Verb Classification</title>
-      <author><first>Roberto</first><last>Basili</last></author>
-      <author><first>Maria Teresa</first><last>Pazienza</last></author>
-      <author><first>Paola</first><last>Velardi</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
+      <author id="maria-teresa-pazienza"><first>Maria Teresa</first><last>Pazienza</last></author>
+      <author id="paola-velardi"><first>Paola</first><last>Velardi</last></author>
       <pages>559-568</pages>
       <url hash="0d3dc93f">J96-4006</url>
       <bibkey>basili-etal-1996-integrating</bibkey>
diff --git a/data/xml/J97.xml b/data/xml/J97.xml
index a1830c4702..70f2095300 100644
--- a/data/xml/J97.xml
+++ b/data/xml/J97.xml
@@ -3,7 +3,7 @@
   <volume id="1" type="journal">
     <meta>
       <booktitle>Computational Linguistics, Volume 23, Number 1, March 1997</booktitle>
-      <editor><first>Julia</first><last>Hirschberg</last></editor>
+      <editor id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></editor>
       <publisher>MIT Press</publisher>
       <address>Cambridge, MA</address>
       <year>1997</year>
@@ -17,8 +17,8 @@
     </frontmatter>
     <paper id="1">
       <title>Empirical Studies in Discourse</title>
-      <author><first>Marilyn A.</first><last>Walker</last></author>
-      <author><first>Johanna D.</first><last>Moore</last></author>
+      <author id="marilyn-walker"><first>Marilyn A.</first><last>Walker</last></author>
+      <author id="johanna-d-moore"><first>Johanna D.</first><last>Moore</last></author>
       <pages>1-12</pages>
       <url hash="fe27886a">J97-1001</url>
       <bibkey>walker-moore-1997-empirical</bibkey>
@@ -30,37 +30,37 @@
       <author><first>Stephen</first><last>Isard</last></author>
       <author><first>Jacqueline C.</first><last>Kowtko</last></author>
       <author><first>Gwyneth</first><last>Doherty-Sneddon</last></author>
-      <author><first>Anne H.</first><last>Anderson</last></author>
+      <author id="anne-h-anderson"><first>Anne H.</first><last>Anderson</last></author>
       <pages>13-31</pages>
       <url hash="610cb29a">J97-1002</url>
       <bibkey>carletta-etal-1997-reliability</bibkey>
     </paper>
     <paper id="3">
       <title>Text Tiling: Segmenting Text into Multi-paragraph Subtopic Passages</title>
-      <author><first>Marti A.</first><last>Hearst</last></author>
+      <author id="marti-a-hearst"><first>Marti A.</first><last>Hearst</last></author>
       <pages>33-64</pages>
       <url hash="de10e0c7">J97-1003</url>
       <bibkey>hearst-1997-text</bibkey>
     </paper>
     <paper id="4">
       <title>Developing and Empirically Evaluating Robust Explanation Generators: The <fixed-case>KNIGHT</fixed-case> Experiments</title>
-      <author><first>James C.</first><last>Lester</last></author>
-      <author><first>Bruce W.</first><last>Porter</last></author>
+      <author id="james-lester"><first>James C.</first><last>Lester</last></author>
+      <author id="bruce-porter"><first>Bruce W.</first><last>Porter</last></author>
       <pages>65-101</pages>
       <url hash="37463bc1">J97-1004</url>
       <bibkey>lester-porter-1997-developing</bibkey>
     </paper>
     <paper id="5">
       <title>Discourse Segmentation by Human and Automated Means</title>
-      <author><first>Rebecca J.</first><last>Passonneau</last></author>
-      <author><first>Diane J.</first><last>Litman</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca J.</first><last>Passonneau</last></author>
+      <author id="diane-litman"><first>Diane J.</first><last>Litman</last></author>
       <pages>103-139</pages>
       <url hash="ffae9816">J97-1005</url>
       <bibkey>passonneau-litman-1997-discourse</bibkey>
     </paper>
     <paper id="6">
       <title>Effects of Variable Initiative on Linguistic Behavior in Human-Computer Spoken Natural Language Dialogue</title>
-      <author><first>Ronnie W.</first><last>Smith</last></author>
+      <author id="ronnie-w-smith"><first>Ronnie W.</first><last>Smith</last></author>
       <author><first>Steven A.</first><last>Gordon</last></author>
       <pages>141-168</pages>
       <url hash="e1b0bb61">J97-1006</url>
@@ -69,7 +69,7 @@
     <paper id="7">
       <title>An Empirical Study on the Generation of Anaphora in <fixed-case>C</fixed-case>hinese</title>
       <author><first>Ching-Long</first><last>Yeh</last></author>
-      <author><first>Chris</first><last>Mellish</last></author>
+      <author id="chris-mellish"><first>Chris</first><last>Mellish</last></author>
       <pages>169-190</pages>
       <url hash="e8aeac9a">J97-1007</url>
       <bibkey>yeh-mellish-1997-empirical</bibkey>
@@ -89,7 +89,7 @@
   <volume id="2" type="journal">
     <meta>
       <booktitle>Computational Linguistics, Volume 23, Number 2, June 1997</booktitle>
-      <editor><first>Julia</first><last>Hirschberg</last></editor>
+      <editor id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></editor>
       <publisher>MIT Press</publisher>
       <address>Cambridge, MA</address>
       <year>1997</year>
@@ -104,7 +104,7 @@
     <paper id="1">
       <title>Floating Constraints in Lexical Choice</title>
       <author><first>Michael</first><last>Elhadad</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <author><first>Jacques</first><last>Robin</last></author>
       <pages>195-239</pages>
       <url hash="5bae4757">J97-2001</url>
@@ -112,8 +112,8 @@
     </paper>
     <paper id="2">
       <title>Adaptive Multilingual Sentence Boundary Disambiguation</title>
-      <author><first>David D.</first><last>Palmer</last></author>
-      <author><first>Marti A.</first><last>Hearst</last></author>
+      <author id="david-d-palmer"><first>David D.</first><last>Palmer</last></author>
+      <author id="marti-a-hearst"><first>Marti A.</first><last>Hearst</last></author>
       <pages>241-267</pages>
       <url hash="d1b2f202">J97-2002</url>
       <bibkey>palmer-hearst-1997-adaptive</bibkey>
@@ -127,8 +127,8 @@
     </paper>
     <paper id="4">
       <title>A Class-based Approach to Word Alignment</title>
-      <author><first>Sue J.</first><last>Ker</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="sue-j-ker"><first>Sue J.</first><last>Ker</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>313-343</pages>
       <url hash="eb8c7602">J97-2004</url>
       <bibkey>ker-chang-1997-class</bibkey>
@@ -166,7 +166,7 @@
   <volume id="3" type="journal">
     <meta>
       <booktitle>Computational Linguistics, Volume 23, Number 3, September 1997</booktitle>
-      <editor><first>Julia</first><last>Hirschberg</last></editor>
+      <editor id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></editor>
       <publisher>MIT Press</publisher>
       <address>Cambridge, MA</address>
       <year>1997</year>
@@ -201,29 +201,29 @@
     </paper>
     <paper id="4">
       <title>An Efficient Implementation of the Head-Corner Parser</title>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <pages>425-456</pages>
       <url hash="761ddcc7">J97-3004</url>
       <bibkey>van-noord-1997-efficient</bibkey>
     </paper>
     <paper id="5">
       <title>Anaphoric Dependencies in Ellipsis</title>
-      <author><first>Andrew</first><last>Kehler</last></author>
-      <author><first>Stuart</first><last>Shieber</last></author>
+      <author id="andrew-kehler"><first>Andrew</first><last>Kehler</last></author>
+      <author id="stuart-m-shieber"><first>Stuart</first><last>Shieber</last></author>
       <pages>457-466</pages>
       <url hash="64020c0b">J97-3005</url>
       <bibkey>kehler-shieber-1997-anaphoric</bibkey>
     </paper>
     <paper id="6">
       <title>Current theories of centering for pronoun interpretation: a critical evaluation</title>
-      <author><first>Andrew</first><last>Kehler</last></author>
+      <author id="andrew-kehler"><first>Andrew</first><last>Kehler</last></author>
       <pages>467-475</pages>
       <url hash="2d999f7a">J97-3006</url>
       <bibkey>kehler-1997-current</bibkey>
     </paper>
     <paper id="7">
       <title>Book Reviews: Semantic Ambiguity and Underspecification</title>
-      <author><first>Peter J.</first><last>Ludlow</last></author>
+      <author id="peter-j-ludlow"><first>Peter J.</first><last>Ludlow</last></author>
       <url hash="e304252a">J97-3007</url>
       <bibkey>ludlow-1997-book</bibkey>
     </paper>
@@ -263,7 +263,7 @@
   <volume id="4" type="journal">
     <meta>
       <booktitle>Computational Linguistics, Volume 23, Number 4, <fixed-case>D</fixed-case>ecember 1997</booktitle>
-      <editor><first>Julia</first><last>Hirschberg</last></editor>
+      <editor id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></editor>
       <publisher>MIT Press</publisher>
       <address>Cambridge, MA</address>
       <year>1997</year>
@@ -292,7 +292,7 @@
     </paper>
     <paper id="3">
       <title>A Computational Treatment of Lexical Rules in <fixed-case>HPSG</fixed-case> as Covariation in Lexical Entries</title>
-      <author><first>W. Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>W. Detmar</first><last>Meurers</last></author>
       <author><first>Guido</first><last>Minnen</last></author>
       <pages>543-568</pages>
       <url hash="71efe705">J97-4003</url>
@@ -307,7 +307,7 @@
     </paper>
     <paper id="5">
       <title>Stochastic Attribute-Value Grammars</title>
-      <author><first>Steven P.</first><last>Abney</last></author>
+      <author id="steven-abney"><first>Steven P.</first><last>Abney</last></author>
       <pages>597-618</pages>
       <url hash="d090a8f4">J97-4005</url>
       <bibkey>abney-1997-stochastic</bibkey>
@@ -320,13 +320,13 @@
     </paper>
     <paper id="7">
       <title>Book Reviews: Industrial Parsing of Software Manuals</title>
-      <author><first>John</first><last>Carroll</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
       <url hash="1683a3ee">J97-4007</url>
       <bibkey>carroll-1997-book</bibkey>
     </paper>
     <paper id="8">
       <title>Book Reviews: Using Language</title>
-      <author><first>Marilyn A.</first><last>Walker</last></author>
+      <author id="marilyn-walker"><first>Marilyn A.</first><last>Walker</last></author>
       <url hash="ebee6bd3">J97-4008</url>
       <bibkey>walker-1997-book</bibkey>
     </paper>
diff --git a/data/xml/J98.xml b/data/xml/J98.xml
index 255cfd1347..3fd55e7a3a 100644
--- a/data/xml/J98.xml
+++ b/data/xml/J98.xml
@@ -3,7 +3,7 @@
   <volume id="1" type="journal">
     <meta>
       <booktitle>Computational Linguistics, Volume 24, Number 1, March 1998 - Special Issue on Word Sense Disambiguation</booktitle>
-      <editor><first>Julia</first><last>Hirschberg</last></editor>
+      <editor id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></editor>
       <publisher>MIT Press</publisher>
       <address>Cambridge, MA</address>
       <year>1998</year>
@@ -17,8 +17,8 @@
     </frontmatter>
     <paper id="1">
       <title>Introduction to the Special Issue on Word Sense Disambiguation: The State of the Art</title>
-      <author><first>Nancy</first><last>Ide</last></author>
-      <author><first>Jean</first><last>Véronis</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
+      <author id="jean-veronis"><first>Jean</first><last>Véronis</last></author>
       <pages>1-40</pages>
       <url hash="60d7ff0b">J98-1001</url>
       <bibkey>ide-veronis-1998-introduction</bibkey>
@@ -33,15 +33,15 @@
     </paper>
     <paper id="3">
       <title>Topical Clustering of <fixed-case>MRD</fixed-case> Senses Based on Information Retrieval Techniques</title>
-      <author><first>Jen Nan</first><last>Chen</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jen-nan-chen"><first>Jen Nan</first><last>Chen</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>61-95</pages>
       <url hash="fc1c66fe">J98-1003</url>
       <bibkey>chen-chang-1998-topical</bibkey>
     </paper>
     <paper id="4">
       <title>Automatic Word Sense Discrimination</title>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>97-123</pages>
       <url hash="c38b0492">J98-1004</url>
       <bibkey>schutze-1998-automatic</bibkey>
@@ -50,7 +50,7 @@
     <paper id="5">
       <title>Disambiguating Highly Ambiguous Words</title>
       <author><first>Geoffrey</first><last>Towell</last></author>
-      <author><first>Ellen M.</first><last>Voorhees</last></author>
+      <author id="ellen-m-voorhees"><first>Ellen M.</first><last>Voorhees</last></author>
       <pages>125-145</pages>
       <url hash="0aa2ec5a">J98-1005</url>
       <bibkey>towell-voorhees-1998-disambiguating</bibkey>
@@ -58,7 +58,7 @@
     <paper id="6">
       <title>Using Corpus Statistics and <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Relations for Sense Identification</title>
       <author><first>Claudia</first><last>Leacock</last></author>
-      <author><first>Martin</first><last>Chodorow</last></author>
+      <author id="martin-chodorow"><first>Martin</first><last>Chodorow</last></author>
       <author><first>George A.</first><last>Miller</last></author>
       <pages>147-165</pages>
       <url hash="0e7bc902">J98-1006</url>
@@ -96,7 +96,7 @@
   <volume id="2" type="journal">
     <meta>
       <booktitle>Computational Linguistics, Volume 24, Number 2, June 1998</booktitle>
-      <editor><first>Julia</first><last>Hirschberg</last></editor>
+      <editor id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></editor>
       <publisher>MIT Press</publisher>
       <address>Cambridge, MA</address>
       <year>1998</year>
@@ -110,8 +110,8 @@
     </frontmatter>
     <paper id="1">
       <title>A Corpus-based Investigation of Definite Description Use</title>
-      <author><first>Massimo</first><last>Poesio</last></author>
-      <author><first>Renata</first><last>Vieira</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
+      <author id="renata-vieira"><first>Renata</first><last>Vieira</last></author>
       <pages>183-216</pages>
       <url hash="fd793a87">J98-2001</url>
       <bibkey>poesio-vieira-1998-corpus</bibkey>
@@ -127,7 +127,7 @@
     <paper id="3">
       <title>Contextual Grammars as Generative Models of Natural Language</title>
       <author><first>Solomon</first><last>Marcus</last></author>
-      <author><first>Carlos</first><last>Martín-Vide</last></author>
+      <author id="carlos-martin-vide"><first>Carlos</first><last>Martín-Vide</last></author>
       <author><first>Gheorghe</first><last>Păun</last></author>
       <pages>245-274</pages>
       <url hash="7d81ce17">J98-2003</url>
@@ -159,19 +159,19 @@
     </paper>
     <paper id="7">
       <title>Book Reviews: Corpus-Based Methods in Language and Speech Processing</title>
-      <author><first>Rebecca</first><last>Bruce</last></author>
+      <author id="rebecca-bruce"><first>Rebecca</first><last>Bruce</last></author>
       <url hash="dea3cd70">J98-2007</url>
       <bibkey>bruce-1998-book</bibkey>
     </paper>
     <paper id="8">
       <title>Book Reviews: Text Databases: One Database Model and Several Retrieval Languages</title>
-      <author><first>Nancy</first><last>Ide</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
       <url hash="c0798670">J98-2008</url>
       <bibkey>ide-1998-book</bibkey>
     </paper>
     <paper id="9">
       <title>Book Reviews: An Introduction to Text-to-Speech Synthesis</title>
-      <author><first>Eileen</first><last>Fitzpatrick</last></author>
+      <author id="eileen-fitzpatrick"><first>Eileen</first><last>Fitzpatrick</last></author>
       <url hash="47d6ec2e">J98-2009</url>
       <bibkey>fitzpatrick-1998-book</bibkey>
     </paper>
@@ -218,7 +218,7 @@
   <volume id="3" type="journal">
     <meta>
       <booktitle>Computational-Linguistics, Volume 24, Number 3, September 1998</booktitle>
-      <editor><first>Julia</first><last>Hirschberg</last></editor>
+      <editor id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></editor>
       <publisher>MIT Press</publisher>
       <address>Cambridge, MA</address>
       <year>1998</year>
@@ -234,15 +234,15 @@
       <title>Introduction to the Special Issue on Natural Language Generation</title>
       <author><first>Robert</first><last>Dale</last></author>
       <author><first>Barbara</first><last>Di Eugenio</last></author>
-      <author><first>Donia</first><last>Scott</last></author>
+      <author id="donia-scott"><first>Donia</first><last>Scott</last></author>
       <pages>345-353</pages>
       <url hash="88fcef8f">J98-3001</url>
       <bibkey>dale-etal-1998-introduction</bibkey>
     </paper>
     <paper id="2">
       <title>Collaborative Response Generation in Planning Dialogues</title>
-      <author><first>Jennifer</first><last>Chu-Carroll</last></author>
-      <author><first>Sandra</first><last>Carberry</last></author>
+      <author id="jennifer-chu-carroll"><first>Jennifer</first><last>Chu-Carroll</last></author>
+      <author id="sandra-carberry"><first>Sandra</first><last>Carberry</last></author>
       <pages>355-400</pages>
       <url hash="5c465a92">J98-3002</url>
       <bibkey>chu-carroll-carberry-1998-collaborative</bibkey>
@@ -256,25 +256,25 @@
     </paper>
     <paper id="4">
       <title>Describing Complex Charts in Natural Language: A Caption Generation System</title>
-      <author><first>Vibhu O.</first><last>Mittal</last></author>
-      <author><first>Johanna D.</first><last>Moore</last></author>
+      <author id="vibhu-o-mittal"><first>Vibhu O.</first><last>Mittal</last></author>
+      <author id="johanna-d-moore"><first>Johanna D.</first><last>Moore</last></author>
       <author><first>Giuseppe</first><last>Carenini</last></author>
-      <author><first>Steven</first><last>Roth</last></author>
+      <author id="steven-roth"><first>Steven</first><last>Roth</last></author>
       <pages>431-467</pages>
       <url hash="3c5de80d">J98-3004</url>
       <bibkey>mittal-etal-1998-describing</bibkey>
     </paper>
     <paper id="5">
       <title>Generating Natural Language Summaries from Multiple On-Line Sources</title>
-      <author><first>Dragomir R.</first><last>Radev</last></author>
-      <author><first>Kathleen R.</first><last>McKeown</last></author>
+      <author id="dragomir-radev"><first>Dragomir R.</first><last>Radev</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen R.</first><last>McKeown</last></author>
       <pages>469-500</pages>
       <url hash="22a30402">J98-3005</url>
       <bibkey>radev-mckeown-1998-generating</bibkey>
     </paper>
     <paper id="6">
       <title>Do the Right Thing … but Expect the Unexpected</title>
-      <author><first>Jon</first><last>Oberlander</last></author>
+      <author id="jon-oberlander"><first>Jon</first><last>Oberlander</last></author>
       <pages>501-507</pages>
       <url hash="9ca63ed2">J98-3006</url>
       <bibkey>oberlander-1998-right</bibkey>
@@ -293,7 +293,7 @@
     </paper>
     <paper id="9">
       <title>Book Review: Machine Translation and Translation Theory</title>
-      <author><first>Frank</first><last>Van Eynde</last></author>
+      <author id="frank-van-eynde"><first>Frank</first><last>Van Eynde</last></author>
       <url hash="d38af734">J98-3009</url>
       <bibkey>van-eynde-1998-book</bibkey>
     </paper>
@@ -312,7 +312,7 @@
   <volume id="4" type="journal">
     <meta>
       <booktitle>Computational Linguistics, Volume 24, Number 4, <fixed-case>D</fixed-case>ecember 1998</booktitle>
-      <editor><first>Julia</first><last>Hirschberg</last></editor>
+      <editor id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></editor>
       <publisher>MIT Press</publisher>
       <address>Cambridge, MA</address>
       <year>1998</year>
@@ -326,7 +326,7 @@
     </frontmatter>
     <paper id="1">
       <title>A Collaborative Planning Model of Intentional Structure</title>
-      <author><first>Karen E.</first><last>Lochbaum</last></author>
+      <author id="karen-e-lochbaum"><first>Karen E.</first><last>Lochbaum</last></author>
       <pages>525-572</pages>
       <url hash="9efbaced">J98-4001</url>
       <bibkey>lochbaum-1998-collaborative</bibkey>
@@ -389,7 +389,7 @@
     </paper>
     <paper id="10">
       <title>Multilingual Text-to-Speech Synthesis: The Bell Labs Approach</title>
-      <author><first>Douglas</first><last>O’Shaughnessy</last></author>
+      <author id="douglas-oshaughnessy"><first>Douglas</first><last>O’Shaughnessy</last></author>
       <url hash="365cd537">J98-4010</url>
       <bibkey>oshaughnessy-1998-multilingual</bibkey>
     </paper>
diff --git a/data/xml/J99.xml b/data/xml/J99.xml
index c35c757aed..caefcbd2a8 100644
--- a/data/xml/J99.xml
+++ b/data/xml/J99.xml
@@ -3,7 +3,7 @@
   <volume id="1" type="journal">
     <meta>
       <booktitle>Computational Linguistics, Volume 25, Number 1, March 1999</booktitle>
-      <editor><first>Julia</first><last>Hirschberg</last></editor>
+      <editor id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></editor>
       <publisher>MIT Press</publisher>
       <address>Cambridge, MA</address>
       <year>1999</year>
@@ -17,7 +17,7 @@
     </frontmatter>
     <paper id="1">
       <title>A Process Model for Recognizing Communicative Acts and Modeling Negotiation Subdialogues</title>
-      <author><first>Sandra</first><last>Carberry</last></author>
+      <author id="sandra-carberry"><first>Sandra</first><last>Carberry</last></author>
       <author><first>Lynn</first><last>Lambert</last></author>
       <pages>1-53</pages>
       <url hash="c3702966">J99-1001</url>
@@ -59,7 +59,7 @@
     </paper>
     <paper id="7">
       <title>Book Reviews: Linguistic Databases</title>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <url hash="d394348d">J99-1007</url>
       <bibkey>tiedemann-1999-book</bibkey>
     </paper>
@@ -82,7 +82,7 @@
   <volume id="2" type="journal">
     <meta>
       <booktitle>Computational Linguistics, Volume 25, Number 2, June 1999</booktitle>
-      <editor><first>Julia</first><last>Hirschberg</last></editor>
+      <editor id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></editor>
       <publisher>MIT Press</publisher>
       <address>Cambridge, MA</address>
       <year>1999</year>
@@ -97,16 +97,16 @@
     <paper id="1">
       <title>A Methodology for Extending Focusing Frameworks</title>
       <author><first>Linda Z.</first><last>Suri</last></author>
-      <author><first>Kathleen F.</first><last>McCoy</last></author>
-      <author><first>Jonathan D.</first><last>DeCristofaro</last></author>
+      <author id="kathleen-f-mccoy"><first>Kathleen F.</first><last>McCoy</last></author>
+      <author id="jonathan-decristofaro"><first>Jonathan D.</first><last>DeCristofaro</last></author>
       <pages>173-194</pages>
       <url hash="2e68df27">J99-2001</url>
       <bibkey>suri-etal-1999-methodology</bibkey>
     </paper>
     <paper id="2">
       <title>Decomposable Modeling in Natural Language Processing</title>
-      <author><first>Rebecca</first><last>Bruce</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="rebecca-bruce"><first>Rebecca</first><last>Bruce</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <pages>195-207</pages>
       <url hash="29e209ca">J99-2002</url>
       <bibkey>bruce-wiebe-1999-decomposable</bibkey>
@@ -114,7 +114,7 @@
     <paper id="3">
       <title><fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars in a Fragment of the <fixed-case>L</fixed-case>ambek Calculus</title>
       <author><first>V. Michele</first><last>Abrusci</last></author>
-      <author><first>Christophe</first><last>Fouqueré</last></author>
+      <author id="christophe-fouquere"><first>Christophe</first><last>Fouqueré</last></author>
       <author><first>Jacqueline</first><last>Vauzeilles</last></author>
       <pages>209-236</pages>
       <url hash="74f07ca5">J99-2003</url>
@@ -122,22 +122,22 @@
     </paper>
     <paper id="4">
       <title><fixed-case>S</fixed-case>upertagging: An Approach to Almost Parsing</title>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
       <pages>237-265</pages>
       <url hash="d705f5ea">J99-2004</url>
       <bibkey>bangalore-joshi-1999-supertagging</bibkey>
     </paper>
     <paper id="5">
       <title>Aligning Phonetic Segments for Children’s Articulation Assessment</title>
-      <author><first>Harold</first><last>Somers</last></author>
+      <author id="harold-somers"><first>Harold</first><last>Somers</last></author>
       <pages>267-275</pages>
       <url hash="e9e02888">J99-2005</url>
       <bibkey>somers-1999-aligning</bibkey>
     </paper>
     <paper id="6">
       <title>Semantic-driven Generation with <fixed-case>LFG</fixed-case>- and <fixed-case>PATR</fixed-case>-style Grammars</title>
-      <author><first>Jürgen</first><last>Wedekind</last></author>
+      <author id="jurgen-wedekind"><first>Jürgen</first><last>Wedekind</last></author>
       <pages>277-281</pages>
       <url hash="92639250">J99-2006</url>
       <bibkey>wedekind-1999-semantic</bibkey>
@@ -193,7 +193,7 @@
   <volume id="3" type="journal">
     <meta>
       <booktitle>Computational Linguistics, Volume 25, Number 3, September 1999</booktitle>
-      <editor><first>Julia</first><last>Hirschberg</last></editor>
+      <editor id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></editor>
       <publisher>MIT Press</publisher>
       <address>Cambridge, MA</address>
       <year>1999</year>
@@ -222,7 +222,7 @@
     </paper>
     <paper id="3">
       <title>Vector-based Natural Language Call Routing</title>
-      <author><first>Jennifer</first><last>Chu-Carroll</last></author>
+      <author id="jennifer-chu-carroll"><first>Jennifer</first><last>Chu-Carroll</last></author>
       <author><first>Bob</first><last>Carpenter</last></author>
       <pages>361-388</pages>
       <url hash="8516d250">J99-3003</url>
@@ -230,27 +230,27 @@
     </paper>
     <paper id="4">
       <title>Interpreting and Generating Indirect Answers</title>
-      <author><first>Nancy</first><last>Green</last></author>
-      <author><first>Sandra</first><last>Carberry</last></author>
+      <author id="nancy-green"><first>Nancy</first><last>Green</last></author>
+      <author id="sandra-carberry"><first>Sandra</first><last>Carberry</last></author>
       <pages>389-435</pages>
       <url hash="ba1539a1">J99-3004</url>
       <bibkey>green-carberry-1999-interpreting</bibkey>
     </paper>
     <paper id="5">
       <title>Book Reviews: Ambiguity Resolution in Language Learning: Computational and Cognitive Models</title>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <url hash="189057b0">J99-3005</url>
       <bibkey>schutze-1999-book</bibkey>
     </paper>
     <paper id="6">
       <title>Book Reviews: Beyond Grammar: An Experience-based Theory of Language</title>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <url hash="247ff3ca">J99-3006</url>
       <bibkey>collins-1999-book</bibkey>
     </paper>
     <paper id="7">
       <title>Book Reviews: Type-Logical Semantics</title>
-      <author><first>Stephen</first><last>Pulman</last></author>
+      <author id="stephen-pulman"><first>Stephen</first><last>Pulman</last></author>
       <url hash="f40c6c7c">J99-3007</url>
       <bibkey>pulman-1999-book</bibkey>
     </paper>
@@ -262,7 +262,7 @@
     </paper>
     <paper id="9">
       <title>Book Reviews: Processing Metonymy and Metaphor</title>
-      <author><first>Stéphane</first><last>Ferrari</last></author>
+      <author id="stephane-ferrari"><first>Stéphane</first><last>Ferrari</last></author>
       <url hash="c2acaf5f">J99-3009</url>
       <bibkey>ferrari-1999-book</bibkey>
     </paper>
@@ -286,7 +286,7 @@
   <volume id="4" type="journal">
     <meta>
       <booktitle>Computational Linguistics, Volume 25, Number 4, <fixed-case>D</fixed-case>ecember 1999</booktitle>
-      <editor><first>Julia</first><last>Hirschberg</last></editor>
+      <editor id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></editor>
       <publisher>MIT Press</publisher>
       <address>Cambridge, MA</address>
       <year>1999</year>
@@ -300,14 +300,14 @@
     </frontmatter>
     <paper id="1">
       <title>Completeness conditions for mixed strategy bidirectional parsing</title>
-      <author><first>Graeme</first><last>Ritchie</last></author>
+      <author id="graeme-ritchie"><first>Graeme</first><last>Ritchie</last></author>
       <pages>457-486</pages>
       <url hash="a82b3e8e">J99-4001</url>
       <bibkey>ritchie-1999-completeness</bibkey>
     </paper>
     <paper id="2">
       <title>Lexical rules in constraint based grammars</title>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <author><first>Ann</first><last>Copestake</last></author>
       <pages>487–526</pages>
       <url hash="0a0456f7">J99-4002</url>
@@ -315,15 +315,15 @@
     </paper>
     <paper id="3">
       <title>Speech repains, intonational phrases, and discourse markers: modeling speakers’ utterances in spoken dialogue</title>
-      <author><first>Peter A.</first><last>Heeman</last></author>
-      <author><first>James F.</first><last>Allen</last></author>
+      <author id="peter-a-heeman"><first>Peter A.</first><last>Heeman</last></author>
+      <author id="james-allen"><first>James F.</first><last>Allen</last></author>
       <pages>527-572</pages>
       <url hash="c95883a3">J99-4003</url>
       <bibkey>heeman-allen-1999-speech</bibkey>
     </paper>
     <paper id="4">
       <title>Semiring Parsing</title>
-      <author><first>Joshua</first><last>Goodman</last></author>
+      <author id="joshua-goodman"><first>Joshua</first><last>Goodman</last></author>
       <pages>573-606</pages>
       <url hash="8111c97f">J99-4004</url>
       <bibkey>goodman-1999-semiring</bibkey>
@@ -337,7 +337,7 @@
     </paper>
     <paper id="6">
       <title>Conceptions of limited attention and discourse focus</title>
-      <author><first>Barbara J.</first><last>Grosz</last></author>
+      <author id="barbara-j-grosz"><first>Barbara J.</first><last>Grosz</last></author>
       <author><first>Peter C.</first><last>Gordon</last></author>
       <pages>617-624</pages>
       <url hash="2ee45def">J99-4006</url>
@@ -345,7 +345,7 @@
     </paper>
     <paper id="7">
       <title>Book Reviews: Centering Theory in Discourse</title>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <url hash="20890459">J99-4007</url>
       <bibkey>mitkov-1999-book</bibkey>
     </paper>
@@ -373,7 +373,7 @@
     </paper>
     <paper id="12">
       <title>Letter to the Editor: Language Technology for Beginners</title>
-      <author><first>Ronald A.</first><last>Cole</last></author>
+      <author id="ronald-cole"><first>Ronald A.</first><last>Cole</last></author>
       <url hash="73d6531e">J99-4012</url>
       <bibkey>cole-1999-letter</bibkey>
     </paper>
diff --git a/data/xml/K15.xml b/data/xml/K15.xml
index c3a48419d0..d6c6828444 100644
--- a/data/xml/K15.xml
+++ b/data/xml/K15.xml
@@ -17,9 +17,9 @@
     </frontmatter>
     <paper id="1">
       <title>A Coactive Learning View of Online Structured Prediction in Statistical Machine Translation</title>
-      <author><first>Artem</first><last>Sokolov</last></author>
+      <author id="artem-sokolov"><first>Artem</first><last>Sokolov</last></author>
       <author><first>Stefan</first><last>Riezler</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <pages>1–11</pages>
       <url hash="e9589e43">K15-1001</url>
       <doi>10.18653/v1/K15-1001</doi>
@@ -38,9 +38,9 @@
     <paper id="3">
       <title>A Supertag-Context Model for Weakly-Supervised <fixed-case>CCG</fixed-case> Parser Learning</title>
       <author><first>Dan</first><last>Garrette</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <author><first>Jason</first><last>Baldridge</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>22–31</pages>
       <url hash="afe06de4">K15-1003</url>
       <doi>10.18653/v1/K15-1003</doi>
@@ -60,7 +60,7 @@
       <title><fixed-case>AIDA</fixed-case>2: A Hybrid Approach for Token and Sentence Level Dialect Identification in <fixed-case>A</fixed-case>rabic</title>
       <author><first>Mohamed</first><last>Al-Badrashiny</last></author>
       <author><first>Heba</first><last>Elfardy</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>42–51</pages>
       <url hash="cc8022cb">K15-1005</url>
       <doi>10.18653/v1/K15-1005</doi>
@@ -78,9 +78,9 @@
     </paper>
     <paper id="7">
       <title>Analyzing Optimization for Statistical Machine Translation: <fixed-case>MERT</fixed-case> Learns Verbosity, <fixed-case>PRO</fixed-case> Learns Length</title>
-      <author><first>Francisco</first><last>Guzmán</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzmán</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>62–72</pages>
       <url hash="09f5e591">K15-1007</url>
       <doi>10.18653/v1/K15-1007</doi>
@@ -102,7 +102,7 @@
       <author><first>Liyuan</first><last>Zhou</last></author>
       <author><first>Weiwei</first><last>Hou</last></author>
       <author><first>Nathan</first><last>Schneider</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>83–93</pages>
       <url hash="85b03b61">K15-1009</url>
       <doi>10.18653/v1/K15-1009</doi>
@@ -120,9 +120,9 @@
     </paper>
     <paper id="11">
       <title>Cross-lingual syntactic variation over age and gender</title>
-      <author><first>Anders</first><last>Johannsen</last></author>
+      <author id="anders-johannsen"><first>Anders</first><last>Johannsen</last></author>
       <author><first>Dirk</first><last>Hovy</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>103–112</pages>
       <url hash="84d8b5d2">K15-1011</url>
       <doi>10.18653/v1/K15-1011</doi>
@@ -130,8 +130,8 @@
     </paper>
     <paper id="12">
       <title>Cross-lingual Transfer for Unsupervised Dependency Parsing Without Parallel Data</title>
-      <author><first>Long</first><last>Duong</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="long-duong"><first>Long</first><last>Duong</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <author><first>Steven</first><last>Bird</last></author>
       <author><first>Paul</first><last>Cook</last></author>
       <pages>113–122</pages>
@@ -142,7 +142,7 @@
     <paper id="13">
       <title>Detecting Semantically Equivalent Questions in Online User Forums</title>
       <author><first>Dasha</first><last>Bogdanova</last></author>
-      <author><first>Cícero</first><last>dos Santos</last></author>
+      <author id="cicero-dos-santos"><first>Cícero</first><last>dos Santos</last></author>
       <author><first>Luciano</first><last>Barbosa</last></author>
       <author><first>Bianca</first><last>Zadrozny</last></author>
       <pages>123–131</pages>
@@ -153,7 +153,7 @@
     <paper id="14">
       <title>Entity Linking <fixed-case>K</fixed-case>orean Text: An Unsupervised Learning Approach using Semantic Relations</title>
       <author><first>Youngsik</first><last>Kim</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <pages>132–141</pages>
       <url hash="4e29d50d">K15-1014</url>
       <doi>10.18653/v1/K15-1014</doi>
@@ -162,7 +162,7 @@
     <paper id="15">
       <title>Incremental Recurrent Neural Network Dependency Parser with Search-based Discriminative Training</title>
       <author><first>Majid</first><last>Yazdani</last></author>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <pages>142–152</pages>
       <url hash="f45b1362">K15-1015</url>
       <doi>10.18653/v1/K15-1015</doi>
@@ -171,7 +171,7 @@
     <paper id="16">
       <title>Instance Selection Improves Cross-Lingual Model Training for Fine-Grained Sentiment Analysis</title>
       <author><first>Roman</first><last>Klinger</last></author>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <pages>153–163</pages>
       <url hash="43b78fb1">K15-1016</url>
       <doi>10.18653/v1/K15-1016</doi>
@@ -180,9 +180,9 @@
     <paper id="17">
       <title>Labeled Morphological Segmentation with Semi-<fixed-case>M</fixed-case>arkov Models</title>
       <author><first>Ryan</first><last>Cotterell</last></author>
-      <author><first>Thomas</first><last>Müller</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="thomas-mueller"><first>Thomas</first><last>Müller</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>164–174</pages>
       <url hash="bbaf25f0">K15-1017</url>
       <doi>10.18653/v1/K15-1017</doi>
@@ -212,7 +212,7 @@
     <paper id="20">
       <title>Making the Most of Crowdsourced Document Annotations: Confused Supervised <fixed-case>LDA</fixed-case></title>
       <author><first>Paul</first><last>Felt</last></author>
-      <author><first>Eric</first><last>Ringger</last></author>
+      <author id="eric-ringger"><first>Eric</first><last>Ringger</last></author>
       <author><first>Jordan</first><last>Boyd-Graber</last></author>
       <author><first>Kevin</first><last>Seppi</last></author>
       <pages>194–203</pages>
@@ -223,7 +223,7 @@
     <paper id="21">
       <title>Multichannel Variable-Size Convolution for Sentence Classification</title>
       <author><first>Wenpeng</first><last>Yin</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>204–214</pages>
       <url hash="b8796a28">K15-1021</url>
       <doi>10.18653/v1/K15-1021</doi>
@@ -241,7 +241,7 @@
     <paper id="23">
       <title>Quantity, Contrast, and Convention in Cross-Situated Language Comprehension</title>
       <author><first>Ian</first><last>Perera</last></author>
-      <author><first>James</first><last>Allen</last></author>
+      <author id="james-allen"><first>James</first><last>Allen</last></author>
       <pages>226–236</pages>
       <url hash="da1a8dd7">K15-1023</url>
       <doi>10.18653/v1/K15-1023</doi>
@@ -323,9 +323,9 @@
     </paper>
     <paper id="31">
       <title>Deep Neural Language Models for Machine Translation</title>
-      <author><first>Thang</first><last>Luong</last></author>
+      <author id="minh-thang-luong"><first>Thang</first><last>Luong</last></author>
       <author><first>Michael</first><last>Kayser</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>305–309</pages>
       <url hash="ec861eec">K15-1031</url>
       <doi>10.18653/v1/K15-1031</doi>
@@ -335,7 +335,7 @@
       <title>Finding Opinion Manipulation Trolls in News Community Forums</title>
       <author><first>Todor</first><last>Mihaylov</last></author>
       <author><first>Georgi</first><last>Georgiev</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>310–314</pages>
       <url hash="9fde414b">K15-1032</url>
       <doi>10.18653/v1/K15-1032</doi>
@@ -343,11 +343,11 @@
     </paper>
     <paper id="33">
       <title>Do dependency parsing metrics correlate with human judgments?</title>
-      <author><first>Barbara</first><last>Plank</last></author>
-      <author><first>Héctor</first><last>Martínez Alonso</last></author>
-      <author><first>Željko</first><last>Agić</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
+      <author id="hector-martinez-alonso"><first>Héctor</first><last>Martínez Alonso</last></author>
+      <author id="zeljko-agic"><first>Željko</first><last>Agić</last></author>
       <author><first>Danijela</first><last>Merkler</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>315–320</pages>
       <url hash="a0ea60f4">K15-1033</url>
       <doi>10.18653/v1/K15-1033</doi>
@@ -395,7 +395,7 @@
     <paper id="38">
       <title>Reading behavior predicts syntactic categories</title>
       <author><first>Maria</first><last>Barrett</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>345–349</pages>
       <url hash="371de5a6">K15-1038</url>
       <doi>10.18653/v1/K15-1038</doi>
@@ -421,7 +421,7 @@
       <title>The <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>-2015 Shared Task on Shallow Discourse Parsing</title>
       <author><first>Nianwen</first><last>Xue</last></author>
       <author><first>Hwee Tou</first><last>Ng</last></author>
-      <author><first>Sameer</first><last>Pradhan</last></author>
+      <author id="sameer-pradhan"><first>Sameer</first><last>Pradhan</last></author>
       <author><first>Rashmi</first><last>Prasad</last></author>
       <author><first>Christopher</first><last>Bryant</last></author>
       <author><first>Attapol</first><last>Rutherford</last></author>
@@ -432,8 +432,8 @@
     </paper>
     <paper id="2">
       <title>A Refined End-to-End Discourse Parser</title>
-      <author><first>Jianxiang</first><last>Wang</last></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="jianxiang-wang"><first>Jianxiang</first><last>Wang</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <pages>17–24</pages>
       <url hash="fe020c17">K15-2002</url>
       <doi>10.18653/v1/K15-2002</doi>
@@ -441,7 +441,7 @@
     </paper>
     <paper id="3">
       <title>The <fixed-case>U</fixed-case>ni<fixed-case>TN</fixed-case> Discourse Parser in <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case> 2015 Shared Task: Token-level Sequence Labeling with Argument-specific Models</title>
-      <author><first>Evgeny</first><last>Stepanov</last></author>
+      <author id="evgeny-stepanov"><first>Evgeny</first><last>Stepanov</last></author>
       <author><first>Giuseppe</first><last>Riccardi</last></author>
       <author><first>Ali Orkan</first><last>Bayer</last></author>
       <pages>25–31</pages>
@@ -453,7 +453,7 @@
       <title>The <fixed-case>S</fixed-case>o<fixed-case>NLP</fixed-case>-<fixed-case>DP</fixed-case> System in the <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>-2015 shared Task</title>
       <author><first>Fang</first><last>Kong</last></author>
       <author><first>Sheng</first><last>Li</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>32–36</pages>
       <url hash="317871ba">K15-2004</url>
       <doi>10.18653/v1/K15-2004</doi>
@@ -480,12 +480,12 @@
     </paper>
     <paper id="7">
       <title>A Hybrid Discourse Relation Parser in <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case> 2015</title>
-      <author><first>Sobha</first><last>Lalitha Devi</last></author>
+      <author id="sobha-lalitha-devi"><first>Sobha</first><last>Lalitha Devi</last></author>
       <author><first>Sindhuja</first><last>Gopalan</last></author>
-      <author><first>Lakshmi</first><last>S.</last></author>
-      <author><first>Pattabhi</first><last>RK Rao</last></author>
-      <author><first>Vijay</first><last>Sundar Ram</last></author>
-      <author><first>Malarkodi</first><last>C.S.</last></author>
+      <author id="lakshmi-s"><first>Lakshmi</first><last>S.</last></author>
+      <author id="pattabhi-rk-rao"><first>Pattabhi</first><last>RK Rao</last></author>
+      <author id="vijay-sundar-ram"><first>Vijay</first><last>Sundar Ram</last></author>
+      <author id="malarkodi-c-s"><first>Malarkodi</first><last>C.S.</last></author>
       <pages>50–55</pages>
       <url hash="55306412">K15-2007</url>
       <doi>10.18653/v1/K15-2007</doi>
@@ -506,7 +506,7 @@
       <author><first>Shubham</first><last>Mukherjee</last></author>
       <author><first>Abhishek</first><last>Tiwari</last></author>
       <author><first>Mohit</first><last>Gupta</last></author>
-      <author><first>Anil</first><last>Kumar Singh</last></author>
+      <author id="anil-kumar-singh"><first>Anil</first><last>Kumar Singh</last></author>
       <pages>61–65</pages>
       <url hash="60278c25">K15-2009</url>
       <doi>10.18653/v1/K15-2009</doi>
@@ -515,8 +515,8 @@
     <paper id="10">
       <title><fixed-case>JAIST</fixed-case>: A two-phase machine learning approach for identifying discourse relations in newswire texts</title>
       <author><first>Truong Son</first><last>Nguyen</last></author>
-      <author><first>Bao Quoc</first><last>Ho</last></author>
-      <author><first>Le Minh</first><last>Nguyen</last></author>
+      <author id="bao-quoc-ho"><first>Bao Quoc</first><last>Ho</last></author>
+      <author id="minh-le-nguyen"><first>Le Minh</first><last>Nguyen</last></author>
       <pages>66–70</pages>
       <url hash="0b0f2c50">K15-2010</url>
       <doi>10.18653/v1/K15-2010</doi>
@@ -558,7 +558,7 @@
     <paper id="14">
       <title>The <fixed-case>DCU</fixed-case> Discourse Parser for Connective, Argument Identification and Explicit Sense Classification</title>
       <author><first>Longyue</first><last>Wang</last></author>
-      <author><first>Chris</first><last>Hokamp</last></author>
+      <author id="chris-hokamp"><first>Chris</first><last>Hokamp</last></author>
       <author><first>Tsuyoshi</first><last>Okita</last></author>
       <author><first>Xiaojun</first><last>Zhang</last></author>
       <author><first>Qun</first><last>Liu</last></author>
diff --git a/data/xml/K16.xml b/data/xml/K16.xml
index 05196c4cbc..ab9aa2f0dc 100644
--- a/data/xml/K16.xml
+++ b/data/xml/K16.xml
@@ -27,7 +27,7 @@
     </paper>
     <paper id="2">
       <title>Generating Sentences from a Continuous Space</title>
-      <author><first>Samuel R.</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel R.</first><last>Bowman</last></author>
       <author><first>Luke</first><last>Vilnis</last></author>
       <author><first>Oriol</first><last>Vinyals</last></author>
       <author><first>Andrew</first><last>Dai</last></author>
@@ -41,8 +41,8 @@
     <paper id="3">
       <title>Identifying Temporal Orientation of Word Senses</title>
       <author><first>Mohammed</first><last>Hasanuzzaman</last></author>
-      <author><first>Gaël</first><last>Dias</last></author>
-      <author><first>Stéphane</first><last>Ferrari</last></author>
+      <author id="gael-dias"><first>Gaël</first><last>Dias</last></author>
+      <author id="stephane-ferrari"><first>Stéphane</first><last>Ferrari</last></author>
       <author><first>Yann</first><last>Mathet</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <pages>22–30</pages>
@@ -85,7 +85,7 @@
       <title>Learning to Jointly Predict Ellipsis and Comparison Structures</title>
       <author><first>Omid</first><last>Bakhshandeh</last></author>
       <author><first>Alexis</first><last>Cornelia Wellwood</last></author>
-      <author><first>James</first><last>Allen</last></author>
+      <author id="james-allen"><first>James</first><last>Allen</last></author>
       <pages>62–74</pages>
       <url hash="803ba5a5">K16-1007</url>
       <doi>10.18653/v1/K16-1007</doi>
@@ -102,7 +102,7 @@
     <paper id="9">
       <title>Beyond Centrality and Structural Features: Learning Information Importance for Text Summarization</title>
       <author><first>Markus</first><last>Zopf</last></author>
-      <author><first>Eneldo</first><last>Loza Mencía</last></author>
+      <author id="eneldo-loza-mencia"><first>Eneldo</first><last>Loza Mencía</last></author>
       <author><first>Johannes</first><last>Fürnkranz</last></author>
       <pages>84–94</pages>
       <url hash="36be4bed">K16-1009</url>
@@ -122,7 +122,7 @@
     <paper id="11">
       <title>A Data-driven Investigation of Corrective Feedback on Subject Omission Errors in First Language Acquisition</title>
       <author><first>Sarah</first><last>Hiller</last></author>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <pages>105–114</pages>
       <url hash="f9891b48">K16-1011</url>
       <doi>10.18653/v1/K16-1011</doi>
@@ -132,7 +132,7 @@
       <title>Redefining part-of-speech classes with distributional semantic models</title>
       <author><first>Andrey</first><last>Kutuzov</last></author>
       <author><first>Erik</first><last>Velldal</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <pages>115–125</pages>
       <url hash="ff2bfdcd">K16-1012</url>
       <doi>10.18653/v1/K16-1012</doi>
@@ -143,7 +143,7 @@
       <author><first>Rebecca</first><last>Knowles</last></author>
       <author><first>Adithya</first><last>Renduchintala</last></author>
       <author><first>Philipp</first><last>Koehn</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>126–135</pages>
       <url hash="d68f55d3">K16-1013</url>
       <doi>10.18653/v1/K16-1013</doi>
@@ -162,8 +162,8 @@
       <title>Harnessing Sequence Labeling for Sarcasm Detection in Dialogue from <fixed-case>TV</fixed-case> Series ‘<fixed-case>F</fixed-case>riends’</title>
       <author><first>Aditya</first><last>Joshi</last></author>
       <author><first>Vaibhav</first><last>Tripathi</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
-      <author><first>Mark J.</first><last>Carman</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="mark-carman"><first>Mark J.</first><last>Carman</last></author>
       <pages>146–155</pages>
       <url hash="7f63b1e9">K16-1015</url>
       <doi>10.18653/v1/K16-1015</doi>
@@ -175,7 +175,7 @@
       <author><first>Diptesh</first><last>Kanojia</last></author>
       <author><first>Seema</first><last>Nagar</last></author>
       <author><first>Kuntal</first><last>Dey</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>156–166</pages>
       <url hash="8e437189">K16-1016</url>
       <doi>10.18653/v1/K16-1016</doi>
@@ -184,10 +184,10 @@
     <paper id="17">
       <title>Modelling Context with User Embeddings for Sarcasm Detection in Social Media</title>
       <author><first>Silvio</first><last>Amir</last></author>
-      <author><first>Byron C.</first><last>Wallace</last></author>
+      <author id="byron-c-wallace"><first>Byron C.</first><last>Wallace</last></author>
       <author><first>Hao</first><last>Lyu</last></author>
       <author><first>Paula</first><last>Carvalho</last></author>
-      <author><first>Mário J.</first><last>Silva</last></author>
+      <author id="mario-j-silva"><first>Mário J.</first><last>Silva</last></author>
       <pages>167–177</pages>
       <url hash="f071f7e8">K16-1017</url>
       <doi>10.18653/v1/K16-1017</doi>
@@ -196,7 +196,7 @@
     <paper id="18">
       <title>Learning when to trust distant supervision: An application to low-resource <fixed-case>POS</fixed-case> tagging using cross-lingual projection</title>
       <author><first>Meng</first><last>Fang</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>178–186</pages>
       <url hash="89251299">K16-1018</url>
       <doi>10.18653/v1/K16-1018</doi>
@@ -206,8 +206,8 @@
       <title>Greedy, Joint Syntactic-Semantic Parsing with Stack <fixed-case>LSTM</fixed-case>s</title>
       <author><first>Swabha</first><last>Swayamdipta</last></author>
       <author><first>Miguel</first><last>Ballesteros</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>187–197</pages>
       <url hash="98187e1c">K16-1019</url>
       <doi>10.18653/v1/K16-1019</doi>
@@ -215,10 +215,10 @@
     </paper>
     <paper id="20">
       <title>Beyond Prefix-Based Interactive Translation Prediction</title>
-      <author><first>Jesús</first><last>González-Rubio</last></author>
-      <author><first>Daniel</first><last>Ortiz-Martínez</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
-      <author><first>José Miguel</first><last>Benedi Ruiz</last></author>
+      <author id="jesus-gonzalez-rubio"><first>Jesús</first><last>González-Rubio</last></author>
+      <author id="daniel-ortiz-martinez"><first>Daniel</first><last>Ortiz-Martínez</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="jose-miguel-benedi"><first>José Miguel</first><last>Benedi Ruiz</last></author>
       <pages>198–207</pages>
       <url hash="2f89c506">K16-1020</url>
       <doi>10.18653/v1/K16-1020</doi>
@@ -226,9 +226,9 @@
     </paper>
     <paper id="21">
       <title>Exploring Prediction Uncertainty in Machine Translation Quality Estimation</title>
-      <author><first>Daniel</first><last>Beck</last></author>
+      <author id="daniel-beck"><first>Daniel</first><last>Beck</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>208–218</pages>
       <url hash="83a6994d">K16-1021</url>
       <doi>10.18653/v1/K16-1021</doi>
@@ -247,7 +247,7 @@
     <paper id="23">
       <title>Coreference in <fixed-case>W</fixed-case>ikipedia: Main Concept Resolution</title>
       <author><first>Abbas</first><last>Ghaddar</last></author>
-      <author><first>Phillippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Phillippe</first><last>Langlais</last></author>
       <pages>229–238</pages>
       <url hash="2018b8a6">K16-1023</url>
       <doi>10.18653/v1/K16-1023</doi>
@@ -290,8 +290,8 @@
     <paper id="27">
       <title>Substring-based unsupervised transliteration with phonetic and contextual knowledge</title>
       <author><first>Anoop</first><last>Kunchukuttan</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
-      <author><first>Mitesh M.</first><last>Khapra</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh M.</first><last>Khapra</last></author>
       <pages>270–279</pages>
       <url hash="96f99e56">K16-1027</url>
       <doi>10.18653/v1/K16-1027</doi>
@@ -301,7 +301,7 @@
       <title>Abstractive Text Summarization using Sequence-to-sequence <fixed-case>RNN</fixed-case>s and Beyond</title>
       <author><first>Ramesh</first><last>Nallapati</last></author>
       <author><first>Bowen</first><last>Zhou</last></author>
-      <author><first>Cicero</first><last>dos Santos</last></author>
+      <author id="cicero-dos-santos"><first>Cicero</first><last>dos Santos</last></author>
       <author><first>Çağlar</first><last>Gu̇lçehre</last></author>
       <author><first>Bing</first><last>Xiang</last></author>
       <pages>280–290</pages>
@@ -312,8 +312,8 @@
     <paper id="29">
       <title>Compression of Neural Machine Translation Models via Pruning</title>
       <author><first>Abigail</first><last>See</last></author>
-      <author><first>Minh-Thang</first><last>Luong</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="minh-thang-luong"><first>Minh-Thang</first><last>Luong</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>291–301</pages>
       <url hash="bcbe2172">K16-1029</url>
       <doi>10.18653/v1/K16-1029</doi>
@@ -324,7 +324,7 @@
       <author><first>Frances</first><last>Yung</last></author>
       <author><first>Kevin</first><last>Duh</last></author>
       <author><first>Taku</first><last>Komura</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>302–313</pages>
       <url hash="17a51b8e">K16-1030</url>
       <doi>10.18653/v1/K16-1030</doi>
@@ -360,9 +360,9 @@
       <title><fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case> 2016 Shared Task on Multilingual Shallow Discourse Parsing</title>
       <author><first>Nianwen</first><last>Xue</last></author>
       <author><first>Hwee Tou</first><last>Ng</last></author>
-      <author><first>Sameer</first><last>Pradhan</last></author>
+      <author id="sameer-pradhan"><first>Sameer</first><last>Pradhan</last></author>
       <author><first>Attapol</first><last>Rutherford</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <author><first>Chuan</first><last>Wang</last></author>
       <author><first>Hongmin</first><last>Wang</last></author>
       <pages>1–19</pages>
@@ -378,7 +378,7 @@
       <author><first>Uladzimir</first><last>Sidarenka</last></author>
       <author><first>Manfred</first><last>Stede</last></author>
       <author><first>Erik</first><last>Velldal</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <pages>20–26</pages>
       <url hash="a32ca325">K16-2002</url>
       <attachment type="presentation" hash="ad1a375a">K16-2002.Presentation.pdf</attachment>
@@ -391,7 +391,7 @@
       <author><first>Haoran</first><last>Li</last></author>
       <author><first>Long</first><last>Zhou</last></author>
       <author><first>Jiajun</first><last>Zhang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>27–32</pages>
       <url hash="11e3b429">K16-2003</url>
       <doi>10.18653/v1/K16-2003</doi>
@@ -399,8 +399,8 @@
     </paper>
     <paper id="4">
       <title>Two End-to-end Shallow Discourse Parsers for <fixed-case>E</fixed-case>nglish and <fixed-case>C</fixed-case>hinese in <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>-2016 Shared Task</title>
-      <author><first>Jianxiang</first><last>Wang</last></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="jianxiang-wang"><first>Jianxiang</first><last>Wang</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <pages>33–40</pages>
       <url hash="ddd4df87">K16-2004</url>
       <doi>10.18653/v1/K16-2004</doi>
@@ -412,7 +412,7 @@
       <author><first>Christian</first><last>Chiarcos</last></author>
       <author><first>Kathrin</first><last>Donandt</last></author>
       <author><first>Samuel</first><last>Rönnqvist</last></author>
-      <author><first>Evgeny</first><last>Stepanov</last></author>
+      <author id="evgeny-stepanov"><first>Evgeny</first><last>Stepanov</last></author>
       <author><first>Giuseppe</first><last>Riccardi</last></author>
       <pages>41–49</pages>
       <url hash="59e07b64">K16-2005</url>
@@ -453,9 +453,9 @@
       <title><fixed-case>S</fixed-case>o<fixed-case>NLP</fixed-case>-<fixed-case>DP</fixed-case> System for <fixed-case>C</fixed-case>on<fixed-case>LL</fixed-case>-2016 <fixed-case>E</fixed-case>nglish Shallow Discourse Parsing</title>
       <author><first>Fang</first><last>Kong</last></author>
       <author><first>Sheng</first><last>Li</last></author>
-      <author><first>Junhui</first><last>Li</last></author>
+      <author id="junhui-li"><first>Junhui</first><last>Li</last></author>
       <author><first>Muhua</first><last>Zhu</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>65–69</pages>
       <url hash="c94e4fcc">K16-2009</url>
       <doi>10.18653/v1/K16-2009</doi>
@@ -473,11 +473,11 @@
     </paper>
     <paper id="11">
       <title><fixed-case>S</fixed-case>o<fixed-case>NLP</fixed-case>-<fixed-case>DP</fixed-case> System for <fixed-case>C</fixed-case>on<fixed-case>LL</fixed-case>-2016 <fixed-case>C</fixed-case>hinese Shallow Discourse Parsing</title>
-      <author><first>Junhui</first><last>Li</last></author>
+      <author id="junhui-li"><first>Junhui</first><last>Li</last></author>
       <author><first>Fang</first><last>Kong</last></author>
       <author><first>Sheng</first><last>Li</last></author>
       <author><first>Muhua</first><last>Zhu</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>78–84</pages>
       <url hash="844a95e8">K16-2011</url>
       <doi>10.18653/v1/K16-2011</doi>
@@ -485,7 +485,7 @@
     </paper>
     <paper id="12">
       <title><fixed-case>U</fixed-case>ni<fixed-case>TN</fixed-case> End-to-End Discourse Parser for <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case> 2016 Shared Task</title>
-      <author><first>Evgeny</first><last>Stepanov</last></author>
+      <author id="evgeny-stepanov"><first>Evgeny</first><last>Stepanov</last></author>
       <author><first>Giuseppe</first><last>Riccardi</last></author>
       <pages>85–91</pages>
       <url hash="82c1e447">K16-2012</url>
@@ -515,7 +515,7 @@
       <title><fixed-case>IIT</fixed-case> (<fixed-case>BHU</fixed-case>) Submission on the <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>-2016 Shared Task: Shallow Discourse Parsing using Semantic Lexicons</title>
       <author><first>Manpreet</first><last>Kaur</last></author>
       <author><first>Nishu</first><last>Kumari</last></author>
-      <author><first>Anil Kumar</first><last>Singh</last></author>
+      <author id="anil-kumar-singh"><first>Anil Kumar</first><last>Singh</last></author>
       <author><first>Rajeev</first><last>Sangal</last></author>
       <pages>108–114</pages>
       <url hash="2b35c868">K16-2015</url>
@@ -547,7 +547,7 @@
     <paper id="18">
       <title>Discourse Relation Sense Classification with Two-Step Classifiers</title>
       <author><first>Yusuke</first><last>Kido</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>129–135</pages>
       <url hash="28cbf6cb">K16-2018</url>
       <doi>10.18653/v1/K16-2018</doi>
@@ -555,7 +555,7 @@
     </paper>
     <paper id="19">
       <title>Adapting Event Embedding for Implicit Discourse Relation Recognition</title>
-      <author><first>Maria Leonor</first><last>Pacheco</last></author>
+      <author id="maria-leonor-pacheco"><first>Maria Leonor</first><last>Pacheco</last></author>
       <author><first>I-Ta</first><last>Lee</last></author>
       <author><first>Xiao</first><last>Zhang</last></author>
       <author><first>Abdullah Khan</first><last>Zehady</last></author>
diff --git a/data/xml/K17.xml b/data/xml/K17.xml
index 9a2129b259..c4510f2630 100644
--- a/data/xml/K17.xml
+++ b/data/xml/K17.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the 21st Conference on Computational Natural Language Learning (<fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case> 2017)</booktitle>
       <url hash="4d5de18f">K17-1</url>
-      <editor><first>Roger</first><last>Levy</last></editor>
+      <editor id="roger-levy"><first>Roger</first><last>Levy</last></editor>
       <editor><first>Lucia</first><last>Specia</last></editor>
       <doi>10.18653/v1/K17-1</doi>
       <publisher>Association for Computational Linguistics</publisher>
@@ -19,7 +19,7 @@
     </frontmatter>
     <paper id="1">
       <title>Should Neural Network Architecture Reflect Linguistic Structure?</title>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <pages>1</pages>
       <url hash="b3fed06d">K17-1001</url>
       <doi>10.18653/v1/K17-1001</doi>
@@ -28,7 +28,7 @@
     </paper>
     <paper id="2">
       <title>Rational Distortions of Learners’ Linguistic Input</title>
-      <author><first>Naomi</first><last>Feldman</last></author>
+      <author id="naomi-feldman"><first>Naomi</first><last>Feldman</last></author>
       <pages>2</pages>
       <url hash="83dbcf94">K17-1002</url>
       <doi>10.18653/v1/K17-1002</doi>
@@ -53,7 +53,7 @@
       <author><first>Ioannis</first><last>Konstas</last></author>
       <author><first>Leila</first><last>Zilles</last></author>
       <author><first>Yejin</first><last>Choi</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>15–25</pages>
       <url hash="947cccb9">K17-1004</url>
       <doi>10.18653/v1/K17-1004</doi>
@@ -62,7 +62,7 @@
     </paper>
     <paper id="5">
       <title>Parsing for Grammatical Relations via Graph Merging</title>
-      <author><first>Weiwei</first><last>Sun</last></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last></author>
       <author><first>Yantao</first><last>Du</last></author>
       <author><first>Xiaojun</first><last>Wan</last></author>
       <pages>26–35</pages>
@@ -75,7 +75,7 @@
       <title>Leveraging Eventive Information for Better Metaphor Detection and Classification</title>
       <author><first>I-Hsuan</first><last>Chen</last></author>
       <author><first>Yunfei</first><last>Long</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <pages>36–46</pages>
       <url hash="70f1aa4c">K17-1006</url>
@@ -111,8 +111,8 @@
       <title>Tell Me Why: Using Question Answering as Distant Supervision for Answer Justification</title>
       <author><first>Rebecca</first><last>Sharp</last></author>
       <author><first>Mihai</first><last>Surdeanu</last></author>
-      <author><first>Peter</first><last>Jansen</last></author>
-      <author><first>Marco A.</first><last>Valenzuela-Escárcega</last></author>
+      <author id="peter-jansen"><first>Peter</first><last>Jansen</last></author>
+      <author id="marco-a-valenzuela-escarcega"><first>Marco A.</first><last>Valenzuela-Escárcega</last></author>
       <author><first>Peter</first><last>Clark</last></author>
       <author><first>Michael</first><last>Hammond</last></author>
       <pages>69–79</pages>
@@ -138,8 +138,8 @@
       <author><first>Huadong</first><last>Chen</last></author>
       <author><first>Shujian</first><last>Huang</last></author>
       <author><first>David</first><last>Chiang</last></author>
-      <author><first>Xinyu</first><last>Dai</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
+      <author id="xinyu-dai"><first>Xinyu</first><last>Dai</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
       <pages>90–99</pages>
       <url hash="824faaee">K17-1011</url>
       <doi>10.18653/v1/K17-1011</doi>
@@ -149,7 +149,7 @@
     <paper id="12">
       <title>Embedding Words and Senses Together via Joint Knowledge-Enhanced Training</title>
       <author><first>Massimiliano</first><last>Mancini</last></author>
-      <author><first>Jose</first><last>Camacho-Collados</last></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></author>
       <author><first>Ignacio</first><last>Iacobacci</last></author>
       <author><first>Roberto</first><last>Navigli</last></author>
       <pages>100–111</pages>
@@ -184,8 +184,8 @@
     </paper>
     <paper id="15">
       <title>An Artificial Language Evaluation of Distributional Semantic Models</title>
-      <author><first>Fatemeh</first><last>Torabi Asr</last></author>
-      <author><first>Michael</first><last>Jones</last></author>
+      <author id="fatemeh-torabi-asr"><first>Fatemeh</first><last>Torabi Asr</last></author>
+      <author id="michael-jones"><first>Michael</first><last>Jones</last></author>
       <pages>134–142</pages>
       <url hash="bb0adef1">K17-1015</url>
       <doi>10.18653/v1/K17-1015</doi>
@@ -216,7 +216,7 @@
     </paper>
     <paper id="18">
       <title>Feature Selection as Causal Inference: Experiments with Text Classification</title>
-      <author><first>Michael J.</first><last>Paul</last></author>
+      <author id="michael-paul"><first>Michael J.</first><last>Paul</last></author>
       <pages>163–172</pages>
       <url hash="60ac5aea">K17-1018</url>
       <doi>10.18653/v1/K17-1018</doi>
@@ -236,8 +236,8 @@
     </paper>
     <paper id="20">
       <title>Neural Sequence-to-sequence Learning of Internal Word Structure</title>
-      <author><first>Tatyana</first><last>Ruzsics</last></author>
-      <author><first>Tanja</first><last>Samardžić</last></author>
+      <author id="tatyana-ruzsics"><first>Tatyana</first><last>Ruzsics</last></author>
+      <author id="tanja-samardzic"><first>Tanja</first><last>Samardžić</last></author>
       <pages>184–194</pages>
       <url hash="20ce76f0">K17-1020</url>
       <doi>10.18653/v1/K17-1020</doi>
@@ -246,7 +246,7 @@
     </paper>
     <paper id="21">
       <title>A Supervised Approach to Extractive Summarisation of Scientific Papers</title>
-      <author><first>Ed</first><last>Collins</last></author>
+      <author id="edward-collins"><first>Ed</first><last>Collins</last></author>
       <author><first>Isabelle</first><last>Augenstein</last></author>
       <author><first>Sebastian</first><last>Riedel</last></author>
       <pages>195–205</pages>
@@ -259,7 +259,7 @@
       <title>An Automatic Approach for Document-level Topic Model Evaluation</title>
       <author><first>Shraey</first><last>Bhatia</last></author>
       <author><first>Jey Han</first><last>Lau</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>206–215</pages>
       <url hash="2eaad930">K17-1022</url>
       <doi>10.18653/v1/K17-1022</doi>
@@ -270,7 +270,7 @@
       <title>Robust Coreference Resolution and Entity Linking on Dialogues: Character Identification on <fixed-case>TV</fixed-case> Show Transcripts</title>
       <author><first>Henry Y.</first><last>Chen</last></author>
       <author><first>Ethan</first><last>Zhou</last></author>
-      <author><first>Jinho D.</first><last>Choi</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
       <pages>216–225</pages>
       <url hash="4b8a50cd">K17-1023</url>
       <doi>10.18653/v1/K17-1023</doi>
@@ -279,9 +279,9 @@
     </paper>
     <paper id="24">
       <title>Cross-language Learning with Adversarial Neural Networks</title>
-      <author><first>Shafiq</first><last>Joty</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <author><first>Israa</first><last>Jaradat</last></author>
       <pages>226–237</pages>
       <url hash="9310b2ab">K17-1024</url>
@@ -293,7 +293,7 @@
       <title>Knowledge Tracing in Sequential Learning of Inflected Vocabulary</title>
       <author><first>Adithya</first><last>Renduchintala</last></author>
       <author><first>Philipp</first><last>Koehn</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>238–247</pages>
       <url hash="9c51c7e4">K17-1025</url>
       <doi>10.18653/v1/K17-1025</doi>
@@ -304,7 +304,7 @@
       <title>A Probabilistic Generative Grammar for Semantic Parsing</title>
       <author><first>Abulhair</first><last>Saparov</last></author>
       <author><first>Vijay</first><last>Saraswat</last></author>
-      <author><first>Tom</first><last>Mitchell</last></author>
+      <author id="tom-mitchell"><first>Tom</first><last>Mitchell</last></author>
       <pages>248–259</pages>
       <url hash="5d99e2ff">K17-1026</url>
       <doi>10.18653/v1/K17-1026</doi>
@@ -390,7 +390,7 @@
       <author><first>Omer</first><last>Levy</last></author>
       <author><first>Minjoon</first><last>Seo</last></author>
       <author><first>Eunsol</first><last>Choi</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>333–342</pages>
       <url hash="fed4f480">K17-1034</url>
       <doi>10.18653/v1/K17-1034</doi>
@@ -400,7 +400,7 @@
     <paper id="35">
       <title>The Covert Helps Parse the Overt</title>
       <author><first>Xun</first><last>Zhang</last></author>
-      <author><first>Weiwei</first><last>Sun</last></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last></author>
       <author><first>Xiaojun</first><last>Wan</last></author>
       <pages>343–353</pages>
       <url hash="e92a3e90">K17-1035</url>
@@ -413,7 +413,7 @@
       <author><first>Dominik</first><last>Schlechtweg</last></author>
       <author><first>Stefanie</first><last>Eckmann</last></author>
       <author><first>Enrico</first><last>Santus</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <author><first>Daniel</first><last>Hole</last></author>
       <pages>354–367</pages>
       <url hash="1e5a2350">K17-1036</url>
@@ -426,7 +426,7 @@
       <title>Encoding of phonology in a recurrent neural model of grounded speech</title>
       <author><first>Afra</first><last>Alishahi</last></author>
       <author><first>Marie</first><last>Barking</last></author>
-      <author><first>Grzegorz</first><last>Chrupała</last></author>
+      <author id="grzegorz-chrupala"><first>Grzegorz</first><last>Chrupała</last></author>
       <pages>368–378</pages>
       <url hash="241f74bd">K17-1037</url>
       <doi>10.18653/v1/K17-1037</doi>
@@ -436,11 +436,11 @@
     </paper>
     <paper id="38">
       <title>Multilingual Semantic Parsing And Code-Switching</title>
-      <author><first>Long</first><last>Duong</last></author>
+      <author id="long-duong"><first>Long</first><last>Duong</last></author>
       <author><first>Hadi</first><last>Afshar</last></author>
-      <author><first>Dominique</first><last>Estival</last></author>
+      <author id="dominique-estival"><first>Dominique</first><last>Estival</last></author>
       <author><first>Glen</first><last>Pink</last></author>
-      <author><first>Philip</first><last>Cohen</last></author>
+      <author id="philip-r-cohen"><first>Philip</first><last>Cohen</last></author>
       <author><first>Mark</first><last>Johnson</last></author>
       <pages>379–389</pages>
       <url hash="fcaea308">K17-1038</url>
@@ -483,7 +483,7 @@
       <title>Joint Prediction of Morphosyntactic Categories for Fine-Grained <fixed-case>A</fixed-case>rabic Part-of-Speech Tagging Exploiting Tag Dictionary Information</title>
       <author><first>Go</first><last>Inoue</last></author>
       <author><first>Hiroyuki</first><last>Shindo</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>421–431</pages>
       <url hash="2d527a06">K17-1042</url>
       <doi>10.18653/v1/K17-1042</doi>
@@ -508,7 +508,7 @@
     <paper id="44">
       <title>Natural Language Generation for Spoken Dialogue System using <fixed-case>RNN</fixed-case> Encoder-Decoder Networks</title>
       <author><first>Van-Khanh</first><last>Tran</last></author>
-      <author><first>Le-Minh</first><last>Nguyen</last></author>
+      <author id="minh-le-nguyen"><first>Le-Minh</first><last>Nguyen</last></author>
       <pages>442–451</pages>
       <url hash="4b5541c6">K17-1044</url>
       <doi>10.18653/v1/K17-1044</doi>
@@ -522,7 +522,7 @@
       <author><first>Kshitijh</first><last>Meelu</last></author>
       <author><first>Ayush</first><last>Pareek</last></author>
       <author><first>Krishnan</first><last>Srinivasan</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <pages>452–462</pages>
       <url hash="a42896cd">K17-1045</url>
       <doi>10.18653/v1/K17-1045</doi>
@@ -555,9 +555,9 @@
       <author><first>Ekaterina</first><last>Vylomova</last></author>
       <author><first>Patrick</first><last>Xia</last></author>
       <author><first>Manaal</first><last>Faruqui</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <author><first>David</first><last>Yarowsky</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <author><first>Mans</first><last>Hulden</last></author>
       <pages>1–30</pages>
       <url hash="db473344">K17-2001</url>
@@ -567,9 +567,9 @@
     <paper id="2">
       <title>Training Data Augmentation for Low-Resource Morphological Inflection</title>
       <author><first>Toms</first><last>Bergmanis</last></author>
-      <author><first>Katharina</first><last>Kann</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
-      <author><first>Sharon</first><last>Goldwater</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
       <pages>31–39</pages>
       <url hash="741e6c01">K17-2002</url>
       <doi>10.18653/v1/K17-2002</doi>
@@ -577,8 +577,8 @@
     </paper>
     <paper id="3">
       <title>The <fixed-case>LMU</fixed-case> System for the <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>-<fixed-case>SIGMORPHON</fixed-case> 2017 Shared Task on Universal Morphological Reinflection</title>
-      <author><first>Katharina</first><last>Kann</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>40–48</pages>
       <url hash="1973c642">K17-2003</url>
       <doi>10.18653/v1/K17-2003</doi>
@@ -587,7 +587,7 @@
     <paper id="4">
       <title>Align and Copy: <fixed-case>UZH</fixed-case> at <fixed-case>SIGMORPHON</fixed-case> 2017 Shared Task for Morphological Reinflection</title>
       <author><first>Peter</first><last>Makarov</last></author>
-      <author><first>Tatiana</first><last>Ruzsics</last></author>
+      <author id="tatyana-ruzsics"><first>Tatiana</first><last>Ruzsics</last></author>
       <author><first>Simon</first><last>Clematide</last></author>
       <pages>49–57</pages>
       <url hash="783d8e09">K17-2004</url>
@@ -615,7 +615,7 @@
     <paper id="7">
       <title>Experiments on Morphological Reinflection: <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>-2017 Shared Task</title>
       <author><first>Akhilesh</first><last>Sudhakar</last></author>
-      <author><first>Anil Kumar</first><last>Singh</last></author>
+      <author id="anil-kumar-singh"><first>Anil Kumar</first><last>Singh</last></author>
       <pages>71–78</pages>
       <url hash="6c1448bd">K17-2007</url>
       <doi>10.18653/v1/K17-2007</doi>
@@ -645,10 +645,10 @@
     </paper>
     <paper id="10">
       <title>Data Augmentation for Morphological Reinflection</title>
-      <author><first>Miikka</first><last>Silfverberg</last></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last></author>
       <author><first>Adam</first><last>Wiemerslage</last></author>
       <author><first>Ling</first><last>Liu</last></author>
-      <author><first>Lingshuang Jack</first><last>Mao</last></author>
+      <author id="lingshuang-jack-mao"><first>Lingshuang Jack</first><last>Mao</last></author>
       <pages>90–99</pages>
       <url hash="72d71331">K17-2010</url>
       <doi>10.18653/v1/K17-2010</doi>
@@ -657,7 +657,7 @@
     <paper id="11">
       <title>Seq2seq for Morphological Reinflection: When Deep Learning Fails</title>
       <author><first>Hajime</first><last>Senuma</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>100–109</pages>
       <url hash="94852279">K17-2011</url>
       <doi>10.18653/v1/K17-2011</doi>
@@ -677,8 +677,8 @@
     <meta>
       <booktitle>Proceedings of the <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case> 2017 Shared Task: Multilingual Parsing from Raw Text to Universal Dependencies</booktitle>
       <url hash="1f3a8c57">K17-3</url>
-      <editor><first>Jan</first><last>Hajič</last></editor>
-      <editor><first>Dan</first><last>Zeman</last></editor>
+      <editor id="jan-hajic"><first>Jan</first><last>Hajič</last></editor>
+      <editor id="daniel-zeman"><first>Dan</first><last>Zeman</last></editor>
       <doi>10.18653/v1/K17-3</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Vancouver, Canada</address>
@@ -702,31 +702,31 @@
       <author><first>Sampo</first><last>Pyysalo</last></author>
       <author><first>Slav</first><last>Petrov</last></author>
       <author><first>Martin</first><last>Potthast</last></author>
-      <author><first>Francis</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last></author>
       <author><first>Elena</first><last>Badmaeva</last></author>
-      <author><first>Memduh</first><last>Gokirmak</last></author>
+      <author id="memduh-gokirmak"><first>Memduh</first><last>Gokirmak</last></author>
       <author><first>Anna</first><last>Nedoluzhko</last></author>
       <author><first>Silvie</first><last>Cinková</last></author>
-      <author><first>Jan</first><last>Hajič jr.</last></author>
+      <author id="jan-hajic-jr"><first>Jan</first><last>Hajič jr.</last></author>
       <author><first>Jaroslava</first><last>Hlaváčová</last></author>
       <author><first>Václava</first><last>Kettnerová</last></author>
-      <author><first>Zdeňka</first><last>Urešová</last></author>
+      <author id="zdenka-uresova"><first>Zdeňka</first><last>Urešová</last></author>
       <author><first>Jenna</first><last>Kanerva</last></author>
       <author><first>Stina</first><last>Ojala</last></author>
       <author><first>Anna</first><last>Missilä</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <author><first>Sebastian</first><last>Schuster</last></author>
       <author><first>Siva</first><last>Reddy</last></author>
       <author><first>Dima</first><last>Taji</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
       <author><first>Herman</first><last>Leung</last></author>
-      <author><first>Marie-Catherine</first><last>de Marneffe</last></author>
+      <author id="marie-catherine-de-marneffe"><first>Marie-Catherine</first><last>de Marneffe</last></author>
       <author><first>Manuela</first><last>Sanguinetti</last></author>
       <author><first>Maria</first><last>Simi</last></author>
       <author><first>Hiroshi</first><last>Kanayama</last></author>
-      <author><first>Valeria</first><last>de Paiva</last></author>
+      <author id="valeria-de-paiva"><first>Valeria</first><last>de Paiva</last></author>
       <author><first>Kira</first><last>Droganova</last></author>
-      <author><first>Héctor</first><last>Martínez Alonso</last></author>
+      <author id="hector-martinez-alonso"><first>Héctor</first><last>Martínez Alonso</last></author>
       <author><first>Çağrı</first><last>Çöltekin</last></author>
       <author><first>Umut</first><last>Sulubacak</last></author>
       <author><first>Hans</first><last>Uszkoreit</last></author>
@@ -741,16 +741,16 @@
       <author><first>Zhuoran</first><last>Yu</last></author>
       <author><first>Emily</first><last>Pitler</last></author>
       <author><first>Saran</first><last>Lertpradit</last></author>
-      <author><first>Michael</first><last>Mandl</last></author>
+      <author id="michael-mandel"><first>Michael</first><last>Mandl</last></author>
       <author><first>Jesse</first><last>Kirchner</last></author>
       <author><first>Hector Fernandez</first><last>Alcalde</last></author>
       <author><first>Jana</first><last>Strnadová</last></author>
       <author><first>Esha</first><last>Banerjee</last></author>
-      <author><first>Ruli</first><last>Manurung</last></author>
+      <author id="ruli-manurung"><first>Ruli</first><last>Manurung</last></author>
       <author><first>Antonio</first><last>Stella</last></author>
       <author><first>Atsuko</first><last>Shimada</last></author>
       <author><first>Sookyoung</first><last>Kwak</last></author>
-      <author><first>Gustavo</first><last>Mendonça</last></author>
+      <author id="gustavo-mendonca"><first>Gustavo</first><last>Mendonça</last></author>
       <author><first>Tatiana</first><last>Lando</last></author>
       <author><first>Rattima</first><last>Nitisaroj</last></author>
       <author><first>Josie</first><last>Li</last></author>
@@ -766,7 +766,7 @@
       <title><fixed-case>S</fixed-case>tanford’s Graph-based Neural Dependency Parser at the <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case> 2017 Shared Task</title>
       <author><first>Timothy</first><last>Dozat</last></author>
       <author><first>Peng</first><last>Qi</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>20–30</pages>
       <url hash="e174a8ef">K17-3002</url>
       <doi>10.18653/v1/K17-3002</doi>
@@ -788,7 +788,7 @@
     <paper id="4">
       <title><fixed-case>IMS</fixed-case> at the <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case> 2017 <fixed-case>UD</fixed-case> Shared Task: <fixed-case>CRF</fixed-case>s and Perceptrons Meet Neural Networks</title>
       <author><first>Anders</first><last>Björkelund</last></author>
-      <author><first>Agnieszka</first><last>Falenska</last></author>
+      <author id="agnieszka-falenska"><first>Agnieszka</first><last>Falenska</last></author>
       <author><first>Xiang</first><last>Yu</last></author>
       <author><first>Jonas</first><last>Kuhn</last></author>
       <pages>40–51</pages>
@@ -804,7 +804,7 @@
       <author><first>Yuxuan</first><last>Wang</last></author>
       <author><first>Bo</first><last>Zheng</last></author>
       <author><first>Huaipeng</first><last>Zhao</last></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <author><first>Dechuan</first><last>Teng</last></author>
       <author><first>Ting</first><last>Liu</last></author>
       <pages>52–62</pages>
@@ -826,7 +826,7 @@
     </paper>
     <paper id="6">
       <title>A System for Multilingual Dependency Parsing based on Bidirectional <fixed-case>LSTM</fixed-case> Feature Representations</title>
-      <author><first>KyungTae</first><last>Lim</last></author>
+      <author id="kyungtae-lim"><first>KyungTae</first><last>Lim</last></author>
       <author><first>Thierry</first><last>Poibeau</last></author>
       <pages>63–70</pages>
       <url hash="2cacd098">K17-3006</url>
@@ -839,7 +839,7 @@
       <author><first>Motoki</first><last>Sato</last></author>
       <author><first>Hitoshi</first><last>Manabe</last></author>
       <author><first>Hiroshi</first><last>Noji</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>71–79</pages>
       <url hash="f4a7a497">K17-3007</url>
       <doi>10.18653/v1/K17-3007</doi>
@@ -849,7 +849,7 @@
     <paper id="8">
       <title>Parsing with Context Embeddings</title>
       <author><first>Ömer</first><last>Kırnap</last></author>
-      <author><first>Berkay Furkan</first><last>Önder</last></author>
+      <author id="berkay-furkan-onder"><first>Berkay Furkan</first><last>Önder</last></author>
       <author><first>Deniz</first><last>Yuret</last></author>
       <pages>80–87</pages>
       <url hash="c68db857">K17-3008</url>
@@ -906,7 +906,7 @@
       <author><first>Kuan</first><last>Yu</last></author>
       <author><first>Pavel</first><last>Sofroniev</last></author>
       <author><first>Erik</first><last>Schill</last></author>
-      <author><first>Erhard</first><last>Hinrichs</last></author>
+      <author id="erhard-hinrichs"><first>Erhard</first><last>Hinrichs</last></author>
       <pages>126–133</pages>
       <url hash="18cc1213">K17-3013</url>
       <doi>10.18653/v1/K17-3013</doi>
@@ -957,9 +957,9 @@
     </paper>
     <paper id="18">
       <title><fixed-case>RACAI</fixed-case>’s Natural Language Processing pipeline for <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies</title>
-      <author><first>Stefan Daniel</first><last>Dumitrescu</last></author>
-      <author><first>Tiberiu</first><last>Boros</last></author>
-      <author><first>Dan</first><last>Tufis</last></author>
+      <author id="stefan-daniel-dumitrescu"><first>Stefan Daniel</first><last>Dumitrescu</last></author>
+      <author id="tiberiu-boros"><first>Tiberiu</first><last>Boros</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufis</last></author>
       <pages>174–181</pages>
       <url hash="a33928cc">K17-3018</url>
       <doi>10.18653/v1/K17-3018</doi>
@@ -1021,7 +1021,7 @@
       <title>Initial Explorations of <fixed-case>CCG</fixed-case> Supertagging for <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependency Parsing</title>
       <author><first>Burak Kerim</first><last>Akkus</last></author>
       <author><first>Heval</first><last>Azizoglu</last></author>
-      <author><first>Ruket</first><last>Cakici</last></author>
+      <author id="ruket-cakici"><first>Ruket</first><last>Cakici</last></author>
       <pages>218–227</pages>
       <url hash="c7e29f0b">K17-3023</url>
       <doi>10.18653/v1/K17-3023</doi>
@@ -1032,7 +1032,7 @@
       <title><fixed-case>CLCL</fixed-case> (Geneva) <fixed-case>DINN</fixed-case> Parser: a Neural Network Dependency Parser Ten Years Later</title>
       <author><first>Christophe</first><last>Moor</last></author>
       <author><first>Paola</first><last>Merlo</last></author>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <author><first>Haozhou</first><last>Wang</last></author>
       <pages>228–236</pages>
       <url hash="e7d4bbed">K17-3024</url>
@@ -1044,7 +1044,7 @@
       <title>A Fast and Lightweight System for Multilingual Dependency Parsing</title>
       <author><first>Tao</first><last>Ji</last></author>
       <author><first>Yuanbin</first><last>Wu</last></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <pages>237–242</pages>
       <url hash="d9e67330">K17-3025</url>
       <doi>10.18653/v1/K17-3025</doi>
@@ -1053,9 +1053,9 @@
     </paper>
     <paper id="26">
       <title>The <fixed-case>P</fixed-case>aris<fixed-case>NLP</fixed-case> entry at the <fixed-case>C</fixed-case>on<fixed-case>LL</fixed-case> <fixed-case>UD</fixed-case> Shared Task 2017: A Tale of a #<fixed-case>P</fixed-case>arsing<fixed-case>T</fixed-case>ragedy</title>
-      <author><first>Éric</first><last>de La Clergerie</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
-      <author><first>Djamé</first><last>Seddah</last></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Éric</first><last>de La Clergerie</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
       <pages>243–252</pages>
       <url hash="0cadf968">K17-3026</url>
       <doi>10.18653/v1/K17-3026</doi>
diff --git a/data/xml/K18.xml b/data/xml/K18.xml
index 0ebc70288e..1ffd35af61 100644
--- a/data/xml/K18.xml
+++ b/data/xml/K18.xml
@@ -43,7 +43,7 @@
     <paper id="3">
       <title>Dual Latent Variable Model for Low-Resource Natural Language Generation in Dialogue Systems</title>
       <author><first>Van-Khanh</first><last>Tran</last></author>
-      <author><first>Le-Minh</first><last>Nguyen</last></author>
+      <author id="minh-le-nguyen"><first>Le-Minh</first><last>Nguyen</last></author>
       <pages>21–30</pages>
       <url hash="ee8332eb">K18-1003</url>
       <abstract>Recent deep learning models have shown improving results to natural language generation (NLG) irrespective of providing sufficient annotated data. However, a modest training data may harm such models’ performance. Thus, how to build a generator that can utilize as much of knowledge from a low-resource setting data is a crucial issue in NLG. This paper presents a variational neural-based generation model to tackle the NLG problem of having limited labeled dataset, in which we integrate a variational inference into an encoder-decoder generator and introduce a novel auxiliary auto-encoding with an effective training procedure. Experiments showed that the proposed methods not only outperform the previous models when having sufficient training dataset but also demonstrate strong ability to work acceptably well when the training data is scarce.</abstract>
@@ -53,7 +53,7 @@
     <paper id="4">
       <title>A Trio Neural Model for Dynamic Entity Relatedness Ranking</title>
       <author><first>Tu</first><last>Nguyen</last></author>
-      <author><first>Tuan</first><last>Tran</last></author>
+      <author id="tuan-tran"><first>Tuan</first><last>Tran</last></author>
       <author><first>Wolfgang</first><last>Nejdl</last></author>
       <pages>31–41</pages>
       <url hash="9015a14e">K18-1004</url>
@@ -65,7 +65,7 @@
       <title>A Unified Neural Network Model for Geolocating <fixed-case>T</fixed-case>witter Users</title>
       <author><first>Mohammad</first><last>Ebrahimi</last></author>
       <author><first>Elaheh</first><last>ShafieiBavani</last></author>
-      <author><first>Raymond</first><last>Wong</last></author>
+      <author id="raymond-wong"><first>Raymond</first><last>Wong</last></author>
       <author><first>Fang</first><last>Chen</last></author>
       <pages>42–53</pages>
       <url hash="453ebfae">K18-1005</url>
@@ -97,7 +97,7 @@
       <title>From Strings to Other Things: Linking the Neighborhood and Transposition Effects in Word Reading</title>
       <author><first>Stéphan</first><last>Tulkens</last></author>
       <author><first>Dominiek</first><last>Sandra</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>75–85</pages>
       <url hash="433baa6e">K18-1008</url>
       <abstract>We investigate the relation between the transposition and deletion effects in word reading, i.e., the finding that readers can successfully read “SLAT” as “SALT”, or “WRK” as “WORK”, and the neighborhood effect. In particular, we investigate whether lexical orthographic neighborhoods take into account transposition and deletion in determining neighbors. If this is the case, it is more likely that the neighborhood effect takes place early during processing, and does not solely rely on similarity of internal representations. We introduce a new neighborhood measure, rd20, which can be used to quantify neighborhood effects over arbitrary feature spaces. We calculate the rd20 over large sets of words in three languages using various feature sets and show that feature sets that do not allow for transposition or deletion explain more variance in Reaction Time (RT) measurements. We also show that the rd20 can be calculated using the hidden state representations of an Multi-Layer Perceptron, and show that these explain less variance than the raw features. We conclude that the neighborhood effect is unlikely to have a perceptual basis, but is more likely to be the result of items co-activating after recognition. All code is available at: <url>www.github.com/clips/conll2018</url></abstract>
@@ -119,7 +119,7 @@
     <paper id="10">
       <title>Pervasive Attention: 2<fixed-case>D</fixed-case> Convolutional Neural Networks for Sequence-to-Sequence Prediction</title>
       <author><first>Maha</first><last>Elbayad</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <author><first>Jakob</first><last>Verbeek</last></author>
       <pages>97–107</pages>
       <url hash="956499b1">K18-1010</url>
@@ -142,10 +142,10 @@
     </paper>
     <paper id="12">
       <title>Uncovering Code-Mixed Challenges: A Framework for Linguistically Driven Question Generation and Neural Based Question Answering</title>
-      <author><first>Deepak</first><last>Gupta</last></author>
+      <author id="deepak-gupta"><first>Deepak</first><last>Gupta</last></author>
       <author><first>Pabitra</first><last>Lenka</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>119–130</pages>
       <url hash="67b55f94">K18-1012</url>
       <abstract>Existing research on question answering (QA) and comprehension reading (RC) are mainly focused on the resource-rich language like English. In recent times, the rapid growth of multi-lingual web content has posed several challenges to the existing QA systems. Code-mixing is one such challenge that makes the task more complex. In this paper, we propose a linguistically motivated technique for code-mixed question generation (CMQG) and a neural network based architecture for code-mixed question answering (CMQA). For evaluation, we manually create the code-mixed questions for Hindi-English language pair. In order to show the effectiveness of our neural network based CMQA technique, we utilize two benchmark datasets, SQuAD and MMQA. Experiments show that our proposed model achieves encouraging performance on CMQG and CMQA.</abstract>
@@ -154,7 +154,7 @@
     </paper>
     <paper id="13">
       <title>Learning to Embed Semantic Correspondence for Natural Language Understanding</title>
-      <author><first>Sangkeun</first><last>Jung</last></author>
+      <author id="sangkeun-jung"><first>Sangkeun</first><last>Jung</last></author>
       <author><first>Jinsik</first><last>Lee</last></author>
       <author><first>Jiwon</first><last>Kim</last></author>
       <pages>131–140</pages>
@@ -177,8 +177,8 @@
     </paper>
     <paper id="15">
       <title>Active Learning for Interactive Neural Machine Translation of Data Streams</title>
-      <author><first>Álvaro</first><last>Peris</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="alvaro-peris"><first>Álvaro</first><last>Peris</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <pages>151–160</pages>
       <url hash="5230d0b7">K18-1015</url>
       <abstract>We study the application of active learning techniques to the translation of unbounded data streams via interactive neural machine translation. The main idea is to select, from an unbounded stream of source sentences, those worth to be supervised by a human agent. The user will interactively translate those samples. Once validated, these data is useful for adapting the neural machine translation model. We propose two novel methods for selecting the samples to be validated. We exploit the information from the attention mechanism of a neural machine translation system. Our experiments show that the inclusion of active learning techniques into this pipeline allows to reduce the effort required during the process, while increasing the quality of the translation system. Moreover, it enables to balance the human effort required for achieving a certain translation quality. Moreover, our neural system outperforms classical approaches by a large margin.</abstract>
@@ -193,7 +193,7 @@
       <author><first>Robert</first><last>West</last></author>
       <author><first>Andreea</first><last>Hossmann</last></author>
       <author><first>Michael</first><last>Baeriswyl</last></author>
-      <author><first>Claudiu</first><last>Musat</last></author>
+      <author id="claudiu-musat"><first>Claudiu</first><last>Musat</last></author>
       <pages>161–170</pages>
       <url hash="7c4dd91f">K18-1016</url>
       <abstract>We propose a new method to detect when users express the intent to leave a service, also known as churn. While previous work focuses solely on social media, we show that this intent can be detected in chatbot conversations. As companies increasingly rely on chatbots they need an overview of potentially churny users. To this end, we crowdsource and publish a dataset of churn intent expressions in chatbot interactions in German and English. We show that classifiers trained on social media data can detect the same intent in the context of chatbots. We introduce a classification architecture that outperforms existing work on churn intent detection in social media. Moreover, we show that, using bilingual word embeddings, a system trained on combined English and German data outperforms monolingual approaches. As the only existing dataset is in English, we crowdsource and publish a novel dataset of German tweets. We thus underline the universal aspect of the problem, as examples of churn intent in English help us identify churn in German tweets and chatbot conversations.</abstract>
@@ -203,8 +203,8 @@
     <paper id="17">
       <title>Learning Text Representations for 500<fixed-case>K</fixed-case> Classification Tasks on Named Entity Disambiguation</title>
       <author><first>Ander</first><last>Barrena</last></author>
-      <author><first>Aitor</first><last>Soroa</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="aitor-soroa"><first>Aitor</first><last>Soroa</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <pages>171–180</pages>
       <url hash="e075e22d">K18-1017</url>
       <abstract>Named Entity Disambiguation algorithms typically learn a single model for all target entities. In this paper we present a word expert model and train separate deep learning models for each target entity string, yielding 500K classification tasks. This gives us the opportunity to benchmark popular text representation alternatives on this massive dataset. In order to face scarce training data we propose a simple data-augmentation technique and transfer-learning. We show that bag-of-word-embeddings are better than LSTMs for tasks with scarce training data, while the situation is reversed when having larger amounts. Transferring a LSTM which is learned on all datasets is the most effective context representation option for the word experts in all frequency bands. The experiments show that our system trained on out-of-domain Wikipedia data surpass comparable NED systems which have been trained on in-domain training data.</abstract>
@@ -214,7 +214,7 @@
     <paper id="18">
       <title>Hierarchical Attention Based Position-Aware Network for Aspect-Level Sentiment Analysis</title>
       <author><first>Lishuang</first><last>Li</last></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <author><first>AnQiao</first><last>Zhou</last></author>
       <pages>181–189</pages>
       <url hash="193f8068">K18-1018</url>
@@ -250,7 +250,7 @@
       <author><first>Yova</first><last>Kementchedjhieva</last></author>
       <author><first>Sebastian</first><last>Ruder</last></author>
       <author><first>Ryan</first><last>Cotterell</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>211–220</pages>
       <url hash="03dd4757">K18-1021</url>
       <abstract>Most recent approaches to bilingual dictionary induction find a linear alignment between the word vector spaces of two languages. We show that projecting the two languages onto a third, latent space, rather than directly onto each other, while equivalent in terms of expressivity, makes it easier to learn approximate alignments. Our modified approach also allows for supporting languages to be included in the alignment process, to obtain an even better performance in low resource settings.</abstract>
@@ -260,7 +260,7 @@
     <paper id="22">
       <title>Simple Unsupervised Keyphrase Extraction using Sentence Embeddings</title>
       <author><first>Kamil</first><last>Bennani-Smires</last></author>
-      <author><first>Claudiu</first><last>Musat</last></author>
+      <author id="claudiu-musat"><first>Claudiu</first><last>Musat</last></author>
       <author><first>Andreea</first><last>Hossmann</last></author>
       <author><first>Michael</first><last>Baeriswyl</last></author>
       <author><first>Martin</first><last>Jaggi</last></author>
@@ -330,9 +330,9 @@
     <paper id="28">
       <title>Uncovering Divergent Linguistic Information in Word Embeddings with Lessons for Intrinsic and Extrinsic Evaluation</title>
       <author><first>Mikel</first><last>Artetxe</last></author>
-      <author><first>Gorka</first><last>Labaka</last></author>
+      <author id="gorka-labaka"><first>Gorka</first><last>Labaka</last></author>
       <author><first>Iñigo</first><last>Lopez-Gazpio</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <pages>282–291</pages>
       <url hash="56280bed">K18-1028</url>
       <abstract>Following the recent success of word embeddings, it has been argued that there is no such thing as an ideal representation for words, as different models tend to capture divergent and often mutually incompatible aspects like semantics/syntax and similarity/relatedness. In this paper, we show that each embedding model captures more information than directly apparent. A linear transformation that adjusts the similarity order of the model without any external resource can tailor it to achieve better results in those aspects, providing a new perspective on how embeddings encode divergent linguistic information. In addition, we explore the relation between intrinsic and extrinsic evaluation, as the effect of our transformations in downstream tasks is higher for unsupervised systems than for supervised ones.</abstract>
@@ -344,7 +344,7 @@
       <author><first>Judy Hanwen</first><last>Shen</last></author>
       <author><first>Matthias</first><last>Hofer</last></author>
       <author><first>Bjarke</first><last>Felbo</last></author>
-      <author><first>Roger</first><last>Levy</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
       <pages>292–301</pages>
       <url hash="29b3d1cc">K18-1029</url>
       <abstract>Simple reference games are of central theoretical and empirical importance in the study of situated language use. Although language provides rich, compositional truth-conditional semantics to facilitate reference, speakers and listeners may sometimes lack the overall lexical and cognitive resources to guarantee successful reference through these means alone. However, language also has rich associational structures that can serve as a further resource for achieving successful reference. Here we investigate this use of associational information in a setting where only associational information is available: a simplified version of the popular game Codenames. Using optimal experiment design techniques, we compare a range of models varying in the type of associative information deployed and in level of pragmatic sophistication against human behavior. In this setting we find that listeners’ behavior reflects direct bigram collocational associations more strongly than word-embedding or semantic knowledge graph-based associations and that there is little evidence for pragmatically sophisticated behavior on the part of either speakers or listeners. More generally, we demonstrate the effective use of simple tasks to derive insights into the nature of complex linguistic phenomena.</abstract>
@@ -357,7 +357,7 @@
       <author><first>Joachim</first><last>Bingel</last></author>
       <author><first>Nora</first><last>Hollenstein</last></author>
       <author><first>Marek</first><last>Rei</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>302–312</pages>
       <url hash="94a3105d">K18-1030</url>
       <abstract>Learning attention functions requires large volumes of data, but many NLP tasks simulate human behavior, and in this paper, we show that human attention really does provide a good inductive bias on many attention functions in NLP. Specifically, we use estimated human attention derived from eye-tracking corpora to regularize attention functions in recurrent neural networks. We show substantial improvements across a range of tasks, including sentiment analysis, grammatical error detection, and detection of abusive language.</abstract>
@@ -366,7 +366,7 @@
     </paper>
     <paper id="31">
       <title>Sentence-Level Fluency Evaluation: References Help, But Can Be Spared!</title>
-      <author><first>Katharina</first><last>Kann</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
       <author><first>Sascha</first><last>Rothe</last></author>
       <author><first>Katja</first><last>Filippova</last></author>
       <pages>313–323</pages>
@@ -391,7 +391,7 @@
       <title>Learning to Actively Learn Neural Machine Translation</title>
       <author><first>Ming</first><last>Liu</last></author>
       <author><first>Wray</first><last>Buntine</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>334–344</pages>
       <url hash="bf76014c">K18-1033</url>
       <abstract>Traditional active learning (AL) methods for machine translation (MT) rely on heuristics. However, these heuristics are limited when the characteristics of the MT problem change due to e.g. the language pair or the amount of the initial bitext. In this paper, we present a framework to learn sentence selection strategies for neural MT. We train the AL query strategy using a high-resource language-pair based on AL simulations, and then transfer it to the low-resource language-pair of interest. The learned query strategy capitalizes on the shared characteristics between the language pairs to make an effective use of the AL budget. Our experiments on three language-pairs confirms that our method is more effective than strong heuristic-based methods in various conditions, including cold-start and warm-start as well as small and extremely small data conditions.</abstract>
@@ -402,7 +402,7 @@
       <title>Upcycle Your <fixed-case>OCR</fixed-case>: Reusing <fixed-case>OCR</fixed-case>s for Post-<fixed-case>OCR</fixed-case> Text Correction in <fixed-case>R</fixed-case>omanised <fixed-case>S</fixed-case>anskrit</title>
       <author><first>Amrith</first><last>Krishna</last></author>
       <author><first>Bodhisattwa P.</first><last>Majumder</last></author>
-      <author><first>Rajesh</first><last>Bhat</last></author>
+      <author id="rajesh-bhat"><first>Rajesh</first><last>Bhat</last></author>
       <author><first>Pawan</first><last>Goyal</last></author>
       <pages>345–355</pages>
       <url hash="62970ccd">K18-1034</url>
@@ -432,7 +432,7 @@
     </paper>
     <paper id="37">
       <title>Evolutionary Data Measures: Understanding the Difficulty of Text Classification Tasks</title>
-      <author><first>Edward</first><last>Collins</last></author>
+      <author id="edward-collins"><first>Edward</first><last>Collins</last></author>
       <author><first>Nikolai</first><last>Rozanov</last></author>
       <author><first>Bingbing</first><last>Zhang</last></author>
       <pages>380–391</pages>
@@ -456,7 +456,7 @@
       <author><first>Ákos</first><last>Kádár</last></author>
       <author><first>Desmond</first><last>Elliott</last></author>
       <author><first>Marc-Alexandre</first><last>Côté</last></author>
-      <author><first>Grzegorz</first><last>Chrupała</last></author>
+      <author id="grzegorz-chrupala"><first>Grzegorz</first><last>Chrupała</last></author>
       <author><first>Afra</first><last>Alishahi</last></author>
       <pages>402–412</pages>
       <url hash="0a3ec32f">K18-1039</url>
@@ -478,7 +478,7 @@
       <title>Resources to Examine the Quality of Word Embedding Models Trained on n-Gram Data</title>
       <author><first>Ábel</first><last>Elekes</last></author>
       <author><first>Adrian</first><last>Englhardt</last></author>
-      <author><first>Martin</first><last>Schäler</last></author>
+      <author id="martin-schaler"><first>Martin</first><last>Schäler</last></author>
       <author><first>Klemens</first><last>Böhm</last></author>
       <pages>423–432</pages>
       <url hash="5a8d79bc">K18-1041</url>
@@ -511,7 +511,7 @@
       <title>Challenge or Empower: Revisiting Argumentation Quality in a News Editorial Corpus</title>
       <author><first>Roxanne</first><last>El Baff</last></author>
       <author><first>Henning</first><last>Wachsmuth</last></author>
-      <author><first>Khalid</first><last>Al-Khatib</last></author>
+      <author id="khalid-al-khatib"><first>Khalid</first><last>Al-Khatib</last></author>
       <author><first>Benno</first><last>Stein</last></author>
       <pages>454–464</pages>
       <url hash="367a3f61">K18-1044</url>
@@ -521,7 +521,7 @@
     </paper>
     <paper id="45">
       <title>Bringing Order to Neural Word Embeddings with Embeddings Augmented by Random Permutations (<fixed-case>EARP</fixed-case>)</title>
-      <author><first>Trevor</first><last>Cohen</last></author>
+      <author id="trevor-cohen"><first>Trevor</first><last>Cohen</last></author>
       <author><first>Dominic</first><last>Widdows</last></author>
       <pages>465–475</pages>
       <url hash="aa404145">K18-1045</url>
@@ -535,7 +535,7 @@
       <author><first>Shuyan</first><last>Zhou</last></author>
       <author><first>Jing</first><last>Liu</last></author>
       <author><first>Jinpeng</first><last>Wang</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <author><first>Rong</first><last>Pan</last></author>
       <pages>476–485</pages>
       <url hash="41264888">K18-1046</url>
@@ -569,7 +569,7 @@
       <title>The Lifted Matrix-Space Model for Semantic Composition</title>
       <author><first>WooJin</first><last>Chung</last></author>
       <author><first>Sheng-Fu</first><last>Wang</last></author>
-      <author><first>Samuel</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel</first><last>Bowman</last></author>
       <pages>508–518</pages>
       <url hash="a7ad069c">K18-1049</url>
       <abstract>Tree-structured neural network architectures for sentence encoding draw inspiration from the approach to semantic composition generally seen in formal linguistics, and have shown empirical improvements over comparable sequence models by doing so. Moreover, adding multiplicative interaction terms to the composition functions in these models can yield significant further improvements. However, existing compositional approaches that adopt such a powerful composition function scale poorly, with parameter counts exploding as model dimension or vocabulary size grows. We introduce the Lifted Matrix-Space model, which uses a global transformation to map vector word embeddings to matrices, which can then be composed via an operation based on matrix-matrix multiplication. Its composition function effectively transmits a larger number of activations across layers with relatively few model parameters. We evaluate our model on the Stanford NLI corpus, the Multi-Genre NLI corpus, and the Stanford Sentiment Treebank and find that it consistently outperforms TreeLSTM (Tai et al., 2015), the previous best known composition function for tree-structured models.</abstract>
@@ -624,7 +624,7 @@
       <author><first>Sheng</first><last>Huang</last></author>
       <author><first>Fang</first><last>Wang</last></author>
       <author><first>Junjie</first><last>Cao</last></author>
-      <author><first>Weiwei</first><last>Sun</last></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last></author>
       <author><first>Xiaojun</first><last>Wan</last></author>
       <pages>562–572</pages>
       <url hash="51182282">K18-1054</url>
@@ -648,7 +648,7 @@
     <paper id="56">
       <title>Sequence to Sequence Mixture Model for Diverse Machine Translation</title>
       <author><first>Xuanli</first><last>He</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <author><first>Mohammad</first><last>Norouzi</last></author>
       <pages>583–592</pages>
       <url hash="43d8e10a">K18-1056</url>
@@ -661,8 +661,8 @@
     <meta>
       <booktitle>Proceedings of the <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case> 2018 Shared Task: Multilingual Parsing from Raw Text to Universal Dependencies</booktitle>
       <url hash="f401e9c4">K18-2</url>
-      <editor><first>Daniel</first><last>Zeman</last></editor>
-      <editor><first>Jan</first><last>Hajič</last></editor>
+      <editor id="daniel-zeman"><first>Daniel</first><last>Zeman</last></editor>
+      <editor id="jan-hajic"><first>Jan</first><last>Hajič</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Brussels, Belgium</address>
       <month>October</month>
@@ -695,7 +695,7 @@
       <title>The 2018 Shared Task on Extrinsic Parser Evaluation: On the Downstream Utility of <fixed-case>E</fixed-case>nglish <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependency Parsers</title>
       <author><first>Murhaf</first><last>Fares</last></author>
       <author><first>Stephan</first><last>Oepen</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <author><first>Jari</first><last>Björne</last></author>
       <author><first>Richard</first><last>Johansson</last></author>
       <pages>22–33</pages>
@@ -763,7 +763,7 @@
     <paper id="8">
       <title>An Improved Neural Network Model for Joint <fixed-case>POS</fixed-case> Tagging and Dependency Parsing</title>
       <author><first>Dat Quoc</first><last>Nguyen</last></author>
-      <author><first>Karin</first><last>Verspoor</last></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last></author>
       <pages>81–91</pages>
       <url hash="f803f174">K18-2008</url>
       <abstract>We propose a novel neural network model for joint part-of-speech (POS) tagging and dependency parsing. Our model extends the well-known BIST graph-based dependency parser (Kiperwasser and Goldberg, 2016) by incorporating a BiLSTM-based tagging component to produce automatically predicted POS tags for the parser. On the benchmark English Penn treebank, our model obtains strong UAS and LAS scores at 94.51% and 92.87%, respectively, producing 1.5+% absolute improvements to the BIST graph-based parser, and also obtaining a state-of-the-art POS tagging accuracy at 97.97%. Furthermore, experimental results on parsing 61 “big” Universal Dependencies treebanks from raw texts show that our model outperforms the baseline UDPipe (Straka and Strakova, 2017) with 0.8% higher average POS tagging score and 3.6% higher average LAS score. In addition, with our model, we also obtain state-of-the-art downstream task scores for biomedical event extraction and opinion analysis applications. Our code is available together with all pre-trained models at: <url>https://github.com/datquocnguyen/jPTDP</url></abstract>
@@ -837,7 +837,7 @@
     </paper>
     <paper id="14">
       <title><fixed-case>SE</fixed-case>x <fixed-case>B</fixed-case>i<fixed-case>ST</fixed-case>: A Multi-Source Trainable Parser with Deep Contextualized Lexical Representations</title>
-      <author><first>KyungTae</first><last>Lim</last></author>
+      <author id="kyungtae-lim"><first>KyungTae</first><last>Lim</last></author>
       <author><first>Cheoneum</first><last>Park</last></author>
       <author><first>Changki</first><last>Lee</last></author>
       <author><first>Thierry</first><last>Poibeau</last></author>
@@ -849,9 +849,9 @@
     </paper>
     <paper id="15">
       <title>The <fixed-case>SLT</fixed-case>-Interactions Parsing System at the <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case> 2018 Shared Task</title>
-      <author><first>Riyaz A.</first><last>Bhat</last></author>
-      <author><first>Irshad</first><last>Bhat</last></author>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="riyaz-ahmad-bhat"><first>Riyaz A.</first><last>Bhat</last></author>
+      <author id="irshad-bhat"><first>Irshad</first><last>Bhat</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
       <pages>153–159</pages>
       <url hash="1401d018">K18-2015</url>
       <abstract>This paper describes our system (SLT-Interactions) for the CoNLL 2018 shared task: Multilingual Parsing from Raw Text to Universal Dependencies. Our system performs three main tasks: word segmentation (only for few treebanks), POS tagging and parsing. While segmentation is learned separately, we use neural stacking for joint learning of POS tagging and parsing tasks. For all the tasks, we employ simple neural network architectures that rely on long short-term memory (LSTM) networks for learning task-dependent features. At the basis of our parser, we use an arc-standard algorithm with Swap action for general non-projective parsing. Additionally, we use neural stacking as a knowledge transfer mechanism for cross-domain parsing of low resource domains. Our system shows substantial gains against the UDPipe baseline, with an average improvement of 4.18% in LAS across all languages. Overall, we are placed at the 12th position on the official test sets.</abstract>
@@ -863,7 +863,7 @@
       <author><first>Peng</first><last>Qi</last></author>
       <author><first>Timothy</first><last>Dozat</last></author>
       <author><first>Yuhao</first><last>Zhang</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>160–170</pages>
       <url hash="e6afe36d">K18-2016</url>
       <abstract>This paper describes Stanford’s system at the CoNLL 2018 UD Shared Task. We introduce a complete neural pipeline system that takes raw text as input, and performs all tasks required by the shared task, ranging from tokenization and sentence segmentation, to POS tagging and dependency parsing. Our single system submission achieved very competitive performance on big treebanks. Moreover, after fixing an unfortunate bug, our corrected system would have placed the 2nd, 1st, and 3rd on the official evaluation metrics LAS, MLAS, and BLEX, and would have outperformed all submission systems on low-resource treebank categories on all metrics by a large margin. We further show the effectiveness of different model components through extensive ablation studies.</abstract>
@@ -872,8 +872,8 @@
     </paper>
     <paper id="17">
       <title><fixed-case>NLP</fixed-case>-Cube: End-to-End Raw Text Processing With Neural Networks</title>
-      <author><first>Tiberiu</first><last>Boros</last></author>
-      <author><first>Stefan Daniel</first><last>Dumitrescu</last></author>
+      <author id="tiberiu-boros"><first>Tiberiu</first><last>Boros</last></author>
+      <author id="stefan-daniel-dumitrescu"><first>Stefan Daniel</first><last>Dumitrescu</last></author>
       <author><first>Ruxandra</first><last>Burtica</last></author>
       <pages>171–179</pages>
       <url hash="5d0ef661">K18-2017</url>
@@ -924,7 +924,7 @@
     </paper>
     <paper id="22">
       <title><fixed-case>SP</fixed-case>arse: <fixed-case>K</fixed-case>oç <fixed-case>U</fixed-case>niversity Graph-Based Parsing System for the <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case> 2018 Shared Task</title>
-      <author><first>Berkay</first><last>Önder</last></author>
+      <author id="berkay-furkan-onder"><first>Berkay</first><last>Önder</last></author>
       <author><first>Can</first><last>Gümeli</last></author>
       <author><first>Deniz</first><last>Yuret</last></author>
       <pages>216–222</pages>
@@ -939,9 +939,9 @@
       <author><first>Benjamin</first><last>Muller</last></author>
       <author><first>Amal</first><last>Fethi</last></author>
       <author><first>Louis</first><last>Martin</last></author>
-      <author><first>Éric</first><last>Villemonte de la Clergerie</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
-      <author><first>Djamé</first><last>Seddah</last></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Éric</first><last>Villemonte de la Clergerie</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
       <pages>223–237</pages>
       <url hash="01a41bae">K18-2023</url>
       <abstract>In this paper, we present the details of the neural dependency parser and the neural tagger submitted by our team ‘ParisNLP’ to the CoNLL 2018 Shared Task on parsing from raw text to Universal Dependencies. We augment the deep Biaffine (BiAF) parser (Dozat and Manning, 2016) with novel features to perform competitively: we utilize an indomain version of ELMo features (Peters et al., 2018) which provide context-dependent word representations; we utilize disambiguated, embedded, morphosyntactic features from lexicons (Sagot, 2018), which complements the existing feature set. Henceforth, we call our system ‘ELMoLex’. In addition to incorporating character embeddings, ELMoLex benefits from pre-trained word vectors, ELMo and morphosyntactic features (whenever available) to correctly handle rare or unknown words which are prevalent in languages with complex morphology. ELMoLex ranked 11th by Labeled Attachment Score metric (70.64%), Morphology-aware LAS metric (55.74%) and ranked 9th by Bilexical dependency metric (60.70%).</abstract>
@@ -951,7 +951,7 @@
     <paper id="24">
       <title>A Morphology-Based Representation Model for <fixed-case>LSTM</fixed-case>-Based Dependency Parsing of Agglutinative Languages</title>
       <author><first>Şaziye Betül</first><last>Özateş</last></author>
-      <author><first>Arzucan</first><last>Özgür</last></author>
+      <author id="arzucan-ozgur"><first>Arzucan</first><last>Özgür</last></author>
       <author><first>Tunga</first><last>Güngör</last></author>
       <author><first>Balkız</first><last>Öztürk</last></author>
       <pages>238–247</pages>
@@ -966,7 +966,7 @@
       <author><first>Yufang</first><last>Liu</last></author>
       <author><first>Yijun</first><last>Wang</last></author>
       <author><first>Yuanbin</first><last>Wu</last></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <pages>248–255</pages>
       <url hash="d120dc33">K18-2025</url>
       <abstract>We describe the graph-based dependency parser in our system (AntNLP) submitted to the CoNLL 2018 UD Shared Task. We use bidirectional lstm to get the word representation, then a bi-affine pointer networks to compute scores of candidate dependency edges and the MST algorithm to get the final dependency tree. From the official testing results, our system gets 70.90 LAS F1 score (rank 9/26), 55.92 MLAS (10/26) and 60.91 BLEX (8/26).</abstract>
@@ -1009,13 +1009,13 @@
       <author><first>John</first><last>Sylak-Glassman</last></author>
       <author><first>Géraldine</first><last>Walther</last></author>
       <author><first>Ekaterina</first><last>Vylomova</last></author>
-      <author><first>Arya D.</first><last>McCarthy</last></author>
-      <author><first>Katharina</first><last>Kann</last></author>
-      <author><first>Sabrina J.</first><last>Mielke</last></author>
+      <author id="arya-d-mccarthy"><first>Arya D.</first><last>McCarthy</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
+      <author id="sabrina-j-mielke"><first>Sabrina J.</first><last>Mielke</last></author>
       <author><first>Garrett</first><last>Nicolai</last></author>
-      <author><first>Miikka</first><last>Silfverberg</last></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last></author>
       <author><first>David</first><last>Yarowsky</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <author><first>Mans</first><last>Hulden</last></author>
       <pages>1–27</pages>
       <url hash="967c01e5">K18-3001</url>
@@ -1055,7 +1055,7 @@
     <paper id="5">
       <title>Experiments on Morphological Reinflection: <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>-2018 Shared Task</title>
       <author><first>Rishabh</first><last>Jain</last></author>
-      <author><first>Anil Kumar</first><last>Singh</last></author>
+      <author id="anil-kumar-singh"><first>Anil Kumar</first><last>Singh</last></author>
       <pages>48–57</pages>
       <url hash="7ca6bc23">K18-3005</url>
       <doi>10.18653/v1/K18-3005</doi>
@@ -1063,7 +1063,7 @@
     </paper>
     <paper id="6">
       <title>The <fixed-case>NYU</fixed-case> System for the <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>–<fixed-case>SIGMORPHON</fixed-case> 2018 Shared Task on Universal Morphological Reinflection</title>
-      <author><first>Katharina</first><last>Kann</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
       <author><first>Stanislas</first><last>Lauly</last></author>
       <author><first>Kyunghyun</first><last>Cho</last></author>
       <pages>58–63</pages>
@@ -1073,8 +1073,8 @@
     </paper>
     <paper id="7">
       <title>Attention-free encoder decoder for morphological processing</title>
-      <author><first>Stefan Daniel</first><last>Dumitrescu</last></author>
-      <author><first>Tiberiu</first><last>Boros</last></author>
+      <author id="stefan-daniel-dumitrescu"><first>Stefan Daniel</first><last>Dumitrescu</last></author>
+      <author id="tiberiu-boros"><first>Tiberiu</first><last>Boros</last></author>
       <pages>64–68</pages>
       <url hash="7faa1930">K18-3007</url>
       <doi>10.18653/v1/K18-3007</doi>
@@ -1106,7 +1106,7 @@
       <author><first>Ilamvazhuthy</first><last>Subbiah</last></author>
       <author><first>Adam</first><last>Wiemerslage</last></author>
       <author><first>Jonathan</first><last>Lilley</last></author>
-      <author><first>Sarah</first><last>Moeller</last></author>
+      <author id="sarah-moeller"><first>Sarah</first><last>Moeller</last></author>
       <pages>86–92</pages>
       <url hash="03edb49b">K18-3010</url>
       <doi>10.18653/v1/K18-3010</doi>
@@ -1134,7 +1134,7 @@
       <title><fixed-case>IIT</fixed-case>(<fixed-case>BHU</fixed-case>)–<fixed-case>IIITH</fixed-case> at <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>–<fixed-case>SIGMORPHON</fixed-case> 2018 Shared Task on Universal Morphological Reinflection</title>
       <author><first>Abhishek</first><last>Sharma</last></author>
       <author><first>Ganesh</first><last>Katrapati</last></author>
-      <author><first>Dipti Misra</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></author>
       <pages>105–111</pages>
       <url hash="cac6be99">K18-3013</url>
       <doi>10.18653/v1/K18-3013</doi>
diff --git a/data/xml/K19.xml b/data/xml/K19.xml
index 6417992892..9ff90a7015 100644
--- a/data/xml/K19.xml
+++ b/data/xml/K19.xml
@@ -51,7 +51,7 @@
     <paper id="4">
       <title>Investigating Cross-Lingual Alignment Methods for Contextualized Embeddings with Token-Level Evaluation</title>
       <author><first>Qianchu</first><last>Liu</last></author>
-      <author><first>Diana</first><last>McCarthy</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
       <author><first>Ivan</first><last>Vulić</last></author>
       <author><first>Anna</first><last>Korhonen</last></author>
       <pages>33–43</pages>
@@ -87,7 +87,7 @@
     <paper id="7">
       <title>Using Priming to Uncover the Organization of Syntactic Representations in Neural Language Models</title>
       <author><first>Grusha</first><last>Prasad</last></author>
-      <author><first>Marten</first><last>van Schijndel</last></author>
+      <author id="marten-van-schijndel"><first>Marten</first><last>van Schijndel</last></author>
       <author><first>Tal</first><last>Linzen</last></author>
       <pages>66–76</pages>
       <abstract>Neural language models (LMs) perform well on tasks that require sensitivity to syntactic structure. Drawing on the syntactic priming paradigm from psycholinguistics, we propose a novel technique to analyze the representations that enable such success. By establishing a gradient similarity metric between structures, this technique allows us to reconstruct the organization of the LMs’ syntactic representational space. We use this technique to demonstrate that LSTM LMs’ representations of different types of sentences with relative clauses are organized hierarchically in a linguistically interpretable manner, suggesting that the LMs track abstract properties of the sentence.</abstract>
@@ -127,7 +127,7 @@
       <author><first>Mahmoud</first><last>Azab</last></author>
       <author><first>Noriyuki</first><last>Kojima</last></author>
       <author><first>Jia</first><last>Deng</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>99–109</pages>
       <abstract>We introduce a new embedding model to represent movie characters and their interactions in a dialogue by encoding in the same representation the language used by these characters as well as information about the other participants in the dialogue. We evaluate the performance of these new character embeddings on two tasks: (1) character relatedness, using a dataset we introduce consisting of a dense character interaction matrix for 4,378 unique character pairs over 22 hours of dialogue from eighteen movies; and (2) character relation classification, for fine- and coarse-grained relations, as well as sentiment relations. Our experiments show that our model significantly outperforms the traditional Word2Vec continuous bag-of-words and skip-gram models, demonstrating the effectiveness of the character embeddings we introduce. We further show how these embeddings can be used in conjunction with a visual question answering system to improve over previous results.</abstract>
       <url hash="5c915b4b">K19-1010</url>
@@ -153,7 +153,7 @@
       <author><first>Adeline</first><last>Wong</last></author>
       <author><first>Cyril</first><last>Allauzen</last></author>
       <author><first>Françoise</first><last>Beaufays</last></author>
-      <author><first>Michael</first><last>Riley</last></author>
+      <author id="michael-riley"><first>Michael</first><last>Riley</last></author>
       <pages>121–130</pages>
       <abstract>We propose algorithms to train production-quality n-gram language models using federated learning. Federated learning is a distributed computation platform that can be used to train global models for portable devices such as smart phones. Federated learning is especially relevant for applications handling privacy-sensitive data, such as virtual keyboards, because training is performed without the users’ data ever leaving their devices. While the principles of federated learning are fairly generic, its methodology assumes that the underlying models are neural networks. However, virtual keyboards are typically powered by n-gram language models for latency reasons. We propose to train a recurrent neural network language model using the decentralized FederatedAveraging algorithm and to approximate this federated model server-side with an n-gram model that can be deployed to devices for fast inference. Our technical contributions include ways of handling large vocabularies, algorithms to correct capitalization errors in user data, and efficient finite state transducer algorithms to convert word language models to word-piece language models and vice versa. The n-gram language models trained with federated learning are compared to n-grams trained with traditional server-based algorithms using A/B tests on tens of millions of users of a virtual keyboard. Results are presented for two languages, American English and Brazilian Portuguese. This work demonstrates that high-quality n-gram language models can be trained directly on client mobile devices without sensitive training data ever leaving the devices.</abstract>
       <url hash="e139da7a">K19-1012</url>
@@ -174,10 +174,10 @@
     <paper id="14">
       <title>Weird Inflects but <fixed-case>OK</fixed-case>: Making Sense of Morphological Generation Errors</title>
       <author><first>Kyle</first><last>Gorman</last></author>
-      <author><first>Arya D.</first><last>McCarthy</last></author>
+      <author id="arya-d-mccarthy"><first>Arya D.</first><last>McCarthy</last></author>
       <author><first>Ryan</first><last>Cotterell</last></author>
       <author><first>Ekaterina</first><last>Vylomova</last></author>
-      <author><first>Miikka</first><last>Silfverberg</last></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last></author>
       <author><first>Magdalena</first><last>Markowska</last></author>
       <pages>140–151</pages>
       <abstract>We conduct a manual error analysis of the CoNLL-SIGMORPHON Shared Task on Morphological Reinflection. This task involves natural language generation: systems are given a word in citation form (e.g., hug) and asked to produce the corresponding inflected form (e.g., the simple past hugged). We propose an error taxonomy and use it to annotate errors made by the top two systems across twelve languages. Many of the observed errors are related to inflectional patterns sensitive to inherent linguistic properties such as animacy or affect; many others are failures to predict truly unpredictable inflectional behaviors. We also find nearly one quarter of the residual “errors” reflect errors in the gold data.</abstract>
@@ -191,7 +191,7 @@
       <author><first>Yingtao</first><last>Tian</last></author>
       <author><first>Haochen</first><last>Chen</last></author>
       <author><first>Kai-Wei</first><last>Chang</last></author>
-      <author><first>Steven</first><last>Skiena</last></author>
+      <author id="steven-skiena"><first>Steven</first><last>Skiena</last></author>
       <author><first>Carlo</first><last>Zaniolo</last></author>
       <pages>152–162</pages>
       <abstract>Bilingual word embeddings have been widely used to capture the correspondence of lexical semantics in different human languages. However, the cross-lingual correspondence between sentences and words is less studied, despite that this correspondence can significantly benefit many applications such as crosslingual semantic search and textual inference. To bridge this gap, we propose a neural embedding model that leverages bilingual dictionaries. The proposed model is trained to map the lexical definitions to the cross-lingual target words, for which we explore with different sentence encoding techniques. To enhance the learning process on limited resources, our model adopts several critical learning strategies, including multi-task learning on different bridges of languages, and joint learning of the dictionary model with a bilingual word embedding model. We conduct experiments on two new tasks. In the cross-lingual reverse dictionary retrieval task, we demonstrate that our model is capable of comprehending bilingual concepts based on descriptions, and the proposed learning strategies are effective. In the bilingual paraphrase identification task, we show that our model effectively associates sentences in different languages via a shared embedding space, and outperforms existing approaches in identifying bilingual paraphrases.</abstract>
@@ -276,7 +276,7 @@
       <title>Comparing Top-Down and Bottom-Up Neural Generative Dependency Models</title>
       <author><first>Austin</first><last>Matthews</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <pages>227–237</pages>
       <abstract>Recurrent neural network grammars generate sentences using phrase-structure syntax and perform very well on both parsing and language modeling. To explore whether generative dependency models are similarly effective, we propose two new generative models of dependency syntax. Both models use recurrent neural nets to avoid making explicit independence assumptions, but they differ in the order used to construct the trees: one builds the tree bottom-up and the other top-down, which profoundly changes the estimation problem faced by the learner. We evaluate the two models on three typologically different languages: English, Arabic, and Japanese. While both generative models improve parsing performance over a discriminative baseline, they are significantly less effective than non-syntactic LSTM language models. Surprisingly, little difference between the construction orders is observed for either parsing or language modeling.</abstract>
       <url hash="88970331">K19-1022</url>
@@ -285,7 +285,7 @@
     </paper>
     <paper id="23">
       <title>Representation Learning and Dynamic Programming for Arc-Hybrid Parsing</title>
-      <author><first>Joseph</first><last>Le Roux</last></author>
+      <author id="joseph-le-roux"><first>Joseph</first><last>Le Roux</last></author>
       <author><first>Antoine</first><last>Rozenknop</last></author>
       <author><first>Mathieu</first><last>Lacroix</last></author>
       <pages>238–248</pages>
@@ -298,8 +298,8 @@
       <title>Policy Preference Detection in Parliamentary Debate Motions</title>
       <author><first>Gavin</first><last>Abercrombie</last></author>
       <author><first>Federico</first><last>Nanni</last></author>
-      <author><first>Riza</first><last>Batista-Navarro</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="riza-theresa-batista-navarro"><first>Riza</first><last>Batista-Navarro</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <pages>249–259</pages>
       <abstract>Debate motions (proposals) tabled in the UK Parliament contain information about the stated policy preferences of the Members of Parliament who propose them, and are key to the analysis of all subsequent speeches given in response to them. We attempt to automatically label debate motions with codes from a pre-existing coding scheme developed by political scientists for the annotation and analysis of political parties’ manifestos. We develop annotation guidelines for the task of applying these codes to debate motions at two levels of granularity and produce a dataset of manually labelled examples. We evaluate the annotation process and the reliability and utility of the labelling scheme, finding that inter-annotator agreement is comparable with that of other studies conducted on manifesto data. Moreover, we test a variety of ways of automatically labelling motions with the codes, ranging from similarity matching to neural classification methods, and evaluate them against the gold standard labels. From these experiments, we note that established supervised baselines are not always able to improve over simple lexical heuristics. At the same time, we detect a clear and evident benefit when employing BERT, a state-of-the-art deep language representation model, even in classification scenarios with over 30 different labels and limited amounts of training data.</abstract>
       <url hash="79562fde">K19-1024</url>
@@ -309,7 +309,7 @@
     <paper id="25">
       <title>Improving Neural Machine Translation by Achieving Knowledge Transfer with Sentence Alignment Learning</title>
       <author><first>Xuewen</first><last>Shi</last></author>
-      <author><first>Heyan</first><last>Huang</last></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last></author>
       <author><first>Wenguan</first><last>Wang</last></author>
       <author><first>Ping</first><last>Jian</last></author>
       <author><first>Yi-Kun</first><last>Tang</last></author>
@@ -321,7 +321,7 @@
     </paper>
     <paper id="26">
       <title>Code-Switched Language Models Using Neural Based Synthetic Data from Parallel Sentences</title>
-      <author><first>Genta Indra</first><last>Winata</last></author>
+      <author id="genta-indra-winata"><first>Genta Indra</first><last>Winata</last></author>
       <author><first>Andrea</first><last>Madotto</last></author>
       <author><first>Chien-Sheng</first><last>Wu</last></author>
       <author><first>Pascale</first><last>Fung</last></author>
@@ -359,7 +359,7 @@
       <title>Low-Resource Parsing with Crosslingual Contextualized Representations</title>
       <author><first>Phoebe</first><last>Mulcaire</last></author>
       <author><first>Jungo</first><last>Kasai</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>304–315</pages>
       <abstract>Despite advances in dependency parsing, languages with small treebanks still present challenges. We assess recent approaches to multilingual contextual word representations (CWRs), and compare them for crosslingual transfer from a language with a large treebank to a language with a small or nonexistent treebank, by sharing parameters between languages in the parser itself. We experiment with a diverse selection of languages in both simulated and truly low-resource scenarios, and show that multilingual CWRs greatly facilitate low-resource dependency parsing even without crosslingual supervision such as dictionaries or parallel text. Furthermore, we examine the non-contextual part of the learned language models (which we call a “decontextual probe”) to demonstrate that polyglot language models better encode crosslingual lexical correspondence compared to aligned monolingual language models. This analysis provides further evidence that polyglot training is an effective approach to crosslingual transfer.</abstract>
       <url hash="b4fd32f1">K19-1029</url>
@@ -394,7 +394,7 @@
       <title>Word Recognition, Competition, and Activation in a Model of Visually Grounded Speech</title>
       <author><first>William N.</first><last>Havard</last></author>
       <author><first>Jean-Pierre</first><last>Chevrot</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <pages>339–348</pages>
       <abstract>In this paper, we study how word-like units are represented and activated in a recurrent neural model of visually grounded speech. The model used in our experiments is trained to project an image and its spoken description in a common representation space. We show that a recurrent model trained on spoken sentences implicitly segments its input into word-like units and reliably maps them to their correct visual referents. We introduce a methodology originating from linguistics to analyse the representation learned by neural networks – the gating paradigm – and show that the correct representation of a word is only activated if the network has access to first phoneme of the target word, suggesting that the network does not rely on a global acoustic pattern. Furthermore, we find out that not all speech frames (MFCC vectors in our case) play an equal role in the final encoded representation of a given word, but that some frames have a crucial effect on it. Finally we suggest that word representation could be activated through a process of lexical competition.</abstract>
       <url hash="125a4792">K19-1032</url>
@@ -406,8 +406,8 @@
       <title><fixed-case>EQUATE</fixed-case>: A Benchmark Evaluation Framework for Quantitative Reasoning in Natural Language Inference</title>
       <author><first>Abhilasha</first><last>Ravichander</last></author>
       <author><first>Aakanksha</first><last>Naik</last></author>
-      <author><first>Carolyn</first><last>Rose</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rose</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>349–361</pages>
       <abstract>Quantitative reasoning is a higher-order reasoning skill that any intelligent natural language understanding system can reasonably be expected to handle. We present EQUATE (Evaluating Quantitative Understanding Aptitude in Textual Entailment), a new framework for quantitative reasoning in textual entailment. We benchmark the performance of 9 published NLI models on EQUATE, and find that on average, state-of-the-art methods do not achieve an absolute improvement over a majority-class baseline, suggesting that they do not implicitly learn to reason with quantities. We establish a new baseline Q-REAS that manipulates quantities symbolically. In comparison to the best performing NLI model, it achieves success on numerical reasoning tests (+24.2 %), but has limited verbal reasoning capabilities (-8.1 %). We hope our evaluation framework will support the development of models of quantitative reasoning in language understanding.</abstract>
       <url hash="204b01ea">K19-1033</url>
@@ -417,9 +417,9 @@
     <paper id="34">
       <title>Linguistic Analysis Improves Neural Metaphor Detection</title>
       <author><first>Kevin</first><last>Stowe</last></author>
-      <author><first>Sarah</first><last>Moeller</last></author>
+      <author id="sarah-moeller"><first>Sarah</first><last>Moeller</last></author>
       <author><first>Laura</first><last>Michaelis</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>362–371</pages>
       <abstract>In the field of metaphor detection, deep learning systems are the ubiquitous and achieve strong performance on many tasks. However, due to the complicated procedures for manually identifying metaphors, the datasets available are relatively small and fraught with complications. We show that using syntactic features and lexical resources can automatically provide additional high-quality training data for metaphoric language, and this data can cover gaps and inconsistencies in metaphor annotation, improving state-of-the-art word-level metaphor identification. This novel application of automatically improving training data improves classification across numerous tasks, and reconfirms the necessity of high-quality data for deep learning frameworks.</abstract>
       <url hash="cfca20ce">K19-1034</url>
@@ -428,7 +428,7 @@
     </paper>
     <paper id="35">
       <title>Cross-Lingual Dependency Parsing with Unlabeled Auxiliary Languages</title>
-      <author><first>Wasi Uddin</first><last>Ahmad</last></author>
+      <author id="wasi-ahmad"><first>Wasi Uddin</first><last>Ahmad</last></author>
       <author><first>Zhisong</first><last>Zhang</last></author>
       <author><first>Xuezhe</first><last>Ma</last></author>
       <author><first>Kai-Wei</first><last>Chang</last></author>
@@ -458,7 +458,7 @@
       <author><first>Tian</first><last>Wang</last></author>
       <author><first>Arun Tejasvi</first><last>Chaganty</last></author>
       <author><first>Gabor</first><last>Angeli</last></author>
-      <author><first>Angel X.</first><last>Chang</last></author>
+      <author id="angel-chang"><first>Angel X.</first><last>Chang</last></author>
       <pages>393–403</pages>
       <abstract>Reflective listening–demonstrating that you have heard your conversational partner–is key to effective communication. Expert human communicators often mimic and rephrase their conversational partner, e.g., when responding to sentimental stories or to questions they don’t know the answer to. We introduce a new task and an associated dataset wherein dialogue agents similarly mimic and rephrase a user’s request to communicate sympathy (I’m sorry to hear that) or lack of knowledge (I do not know that). We study what makes a rephrasal response good against a set of qualitative metrics. We then evaluate three models for generating responses: a syntax-aware rule-based system, a seq2seq LSTM neural models with attention (S2SA), and the same neural model augmented with a copy mechanism (S2SA+C). In a human evaluation, we find that S2SA+C and the rule-based system are comparable and approach human-generated response quality. In addition, experiences with a live deployment of S2SA+C in a customer support setting suggest that this generation task is a practical contribution to real world conversational agents.</abstract>
       <url hash="82065a5a">K19-1037</url>
@@ -470,7 +470,7 @@
       <title>Automated Pyramid Summarization Evaluation</title>
       <author><first>Yanjun</first><last>Gao</last></author>
       <author><first>Chen</first><last>Sun</last></author>
-      <author><first>Rebecca J.</first><last>Passonneau</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca J.</first><last>Passonneau</last></author>
       <pages>404–418</pages>
       <abstract>Pyramid evaluation was developed to assess the content of paragraph length summaries of source texts. A pyramid lists the distinct units of content found in several reference summaries, weights content units by how many reference summaries they occur in, and produces three scores based on the weighted content of new summaries. We present an automated method that is more efficient, more transparent, and more complete than previous automated pyramid methods. It is tested on a new dataset of student summaries, and historical NIST data from extractive summarizers.</abstract>
       <url hash="f85672a0">K19-1038</url>
@@ -583,7 +583,7 @@
       <author><first>Lei</first><last>Guo</last></author>
       <author><first>Kate</first><last>Mays</last></author>
       <author><first>Margrit</first><last>Betke</last></author>
-      <author><first>Derry Tanti</first><last>Wijaya</last></author>
+      <author id="derry-tanti-wijaya"><first>Derry Tanti</first><last>Wijaya</last></author>
       <pages>504–514</pages>
       <abstract>Different news articles about the same topic often offer a variety of perspectives: an article written about gun violence might emphasize gun control, while another might promote 2nd Amendment rights, and yet a third might focus on mental health issues. In communication research, these different perspectives are known as “frames”, which, when used in news media will influence the opinion of their readers in multiple ways. In this paper, we present a method for effectively detecting frames in news headlines. Our training and performance evaluation is based on a new dataset of news headlines related to the issue of gun violence in the United States. This Gun Violence Frame Corpus (GVFC) was curated and annotated by journalism and communication experts. Our proposed approach sets a new state-of-the-art performance for multiclass news frame detection, significantly outperforming a recent baseline by 35.9% absolute difference in accuracy. We apply our frame detection approach in a large scale study of 88k news headlines about the coverage of gun violence in the U.S. between 2016 and 2018.</abstract>
       <url hash="a666a1db">K19-1047</url>
@@ -606,7 +606,7 @@
     </paper>
     <paper id="49">
       <title>Learning Dense Representations for Entity Retrieval</title>
-      <author><first>Daniel</first><last>Gillick</last></author>
+      <author id="dan-gillick"><first>Daniel</first><last>Gillick</last></author>
       <author><first>Sayali</first><last>Kulkarni</last></author>
       <author><first>Larry</first><last>Lansing</last></author>
       <author><first>Alessandro</first><last>Presta</last></author>
@@ -663,7 +663,7 @@
       <author><first>Sheshera</first><last>Mysore</last></author>
       <author><first>Andrew</first><last>McCallum</last></author>
       <author><first>Adrian</first><last>Benton</last></author>
-      <author><first>Amanda</first><last>Stent</last></author>
+      <author id="amanda-stent"><first>Amanda</first><last>Stent</last></author>
       <pages>574–581</pages>
       <abstract>The official voting records of United States congresspeople are preserved as roll call votes. Prediction of voting behavior of politicians for whom no voting record exists, such as individuals running for office, is important for forecasting key political decisions. Prior work has relied on past votes cast to predict future votes, and thus fails to predict voting patterns for politicians without voting records. We address this by augmenting a prior state of the art model with multiple sources of external knowledge so as to enable prediction on unseen politicians. The sources of knowledge we use are news text and Freebase, a manually curated knowledge base. We propose augmentations based on unigram features for news text, and a knowledge base embedding method followed by a neural network composition for relations from Freebase. Empirical evaluation of these approaches indicate that the proposed models outperform the prior system for politicians with complete historical voting records by 1.0% point of accuracy (8.7% error reduction) and for politicians without voting records by 33.4% points of accuracy (66.7% error reduction). We also show that the knowledge base augmented approach outperforms the news text augmented approach by 4.2% points of accuracy.</abstract>
       <url hash="4cad6e78">K19-1053</url>
@@ -674,7 +674,7 @@
       <title><fixed-case>B</fixed-case>eam<fixed-case>S</fixed-case>eg: A Joint Model for Multi-Document Segmentation and Topic Identification</title>
       <author><first>Pedro</first><last>Mota</last></author>
       <author><first>Maxine</first><last>Eskenazi</last></author>
-      <author><first>Luísa</first><last>Coheur</last></author>
+      <author id="luisa-coheur"><first>Luísa</first><last>Coheur</last></author>
       <pages>582–592</pages>
       <abstract>We propose BeamSeg, a joint model for segmentation and topic identification of documents from the same domain. The model assumes that lexical cohesion can be observed across documents, meaning that segments describing the same topic use a similar lexical distribution over the vocabulary. The model implements lexical cohesion in an unsupervised Bayesian setting by drawing from the same language model segments with the same topic. Contrary to previous approaches, we assume that language models are not independent, since the vocabulary changes in consecutive segments are expected to be smooth and not abrupt. We achieve this by using a dynamic Dirichlet prior that takes into account data contributions from other topics. BeamSeg also models segment length properties of documents based on modality (textbooks, slides, <i>etc.</i>). The evaluation is carried out in three datasets. In two of them, improvements of up to 4.8% and 7.3% are obtained in the segmentation and topic identifications tasks, indicating that both tasks should be jointly modeled.</abstract>
       <url hash="d218f465">K19-1054</url>
@@ -695,7 +695,7 @@
     </paper>
     <paper id="56">
       <title>Effective Attention Modeling for Neural Relation Extraction</title>
-      <author><first>Tapas</first><last>Nayak</last></author>
+      <author id="tapas-nayak"><first>Tapas</first><last>Nayak</last></author>
       <author><first>Hwee Tou</first><last>Ng</last></author>
       <pages>603–612</pages>
       <abstract>Relation extraction is the task of determining the relation between two entities in a sentence. Distantly-supervised models are popular for this task. However, sentences can be long and two entities can be located far from each other in a sentence. The pieces of evidence supporting the presence of a relation between two entities may not be very direct, since the entities may be connected via some indirect links such as a third entity or via co-reference. Relation extraction in such scenarios becomes more challenging as we need to capture the long-distance interactions among the entities and other words in the sentence. Also, the words in a sentence do not contribute equally in identifying the relation between the two entities. To address this issue, we propose a novel and effective attention model which incorporates syntactic information of the sentence and a multi-factor attention mechanism. Experiments on the New York Times corpus show that our proposed model outperforms prior state-of-the-art models.</abstract>
@@ -772,7 +772,7 @@
       <author><first>I-Hung</first><last>Hsu</last></author>
       <author><first>Mu</first><last>Yang</last></author>
       <author><first>Aram</first><last>Galstyan</last></author>
-      <author><first>Ralph</first><last>Weischedel</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
       <author><first>Nanyun</first><last>Peng</last></author>
       <pages>666–106</pages>
       <abstract>We propose a novel deep structured learning framework for event temporal relation extraction. The model consists of 1) a recurrent neural network (RNN) to learn scoring functions for pair-wise relations, and 2) a structured support vector machine (SSVM) to make joint predictions. The neural network automatically learns representations that account for long-term contexts to provide robust features for the structured model, while the SSVM incorporates domain knowledge such as transitive closure of temporal relations as constraints to make better globally consistent decisions. By jointly training the two components, our model combines the benefits of both data-driven learning and knowledge exploitation. Experimental results on three high-quality event temporal relation datasets (TCR, MATRES, and TB-Dense) demonstrate that incorporated with pre-trained contextualized embeddings, the proposed model achieves significantly better performances than the state-of-the-art methods on all three datasets. We also provide thorough ablation studies to investigate our model.</abstract>
@@ -859,7 +859,7 @@
       <author><first>Yiming</first><last>Cui</last></author>
       <author><first>Nan</first><last>Shao</last></author>
       <author><first>Su</first><last>He</last></author>
-      <author><first>Wei-Nan</first><last>Zhang</last></author>
+      <author id="weinan-zhang"><first>Wei-Nan</first><last>Zhang</last></author>
       <author><first>Ting</first><last>Liu</last></author>
       <author><first>Shijin</first><last>Wang</last></author>
       <author><first>Guoping</first><last>Hu</last></author>
@@ -873,7 +873,7 @@
       <title>Relation Module for Non-Answerable Predictions on Reading Comprehension</title>
       <author><first>Kevin</first><last>Huang</last></author>
       <author><first>Yun</first><last>Tang</last></author>
-      <author id="jing-huang"><first>Jing</first><last>Huang</last></author>
+      <author><first>Jing</first><last>Huang</last></author>
       <author><first>Xiaodong</first><last>He</last></author>
       <author><first>Bowen</first><last>Zhou</last></author>
       <pages>747–756</pages>
@@ -910,7 +910,7 @@
       <title><fixed-case>TILM</fixed-case>: Neural Language Models with Evolving Topical Influence</title>
       <author><first>Shubhra Kanti</first><last>Karmaker Santu</last></author>
       <author><first>Kalyan</first><last>Veeramachaneni</last></author>
-      <author><first>Chengxiang</first><last>Zhai</last></author>
+      <author id="chengxiang-zhai"><first>Chengxiang</first><last>Zhai</last></author>
       <pages>778–788</pages>
       <abstract>Content of text data are often influenced by contextual factors which often evolve over time (e.g., content of social media are often influenced by topics covered in the major news streams). Existing language models do not consider the influence of such related evolving topics, and thus are not optimal. In this paper, we propose to incorporate such topical-influence into a language model to both improve its accuracy and enable cross-stream analysis of topical influences. Specifically, we propose a novel language model called Topical Influence Language Model (TILM), which is a novel extension of a neural language model to capture the influences on the contents in one text stream by the evolving topics in another related (or possibly same) text stream. Experimental results on six different text stream data comprised of conference paper titles show that the incorporation of evolving topical influence into a language model is beneficial and TILM outperforms multiple baselines in a challenging task of text forecasting. In addition to serving as a language model, TILM further enables interesting analysis of topical influence among multiple text streams.</abstract>
       <url hash="9202a96c">K19-1073</url>
@@ -987,7 +987,7 @@
       <author><first>Aneesh</first><last>Pappu</last></author>
       <author><first>Rohun</first><last>Saxena</last></author>
       <author><first>Akhila</first><last>Yerukola</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>843–861</pages>
       <abstract>Large neural language models trained on massive amounts of text have emerged as a formidable strategy for Natural Language Understanding tasks. However, the strength of these models as Natural Language Generators is less clear. Though anecdotal evidence suggests that these models generate better quality text, there has been no detailed study characterizing their generation abilities. In this work, we compare the performance of an extensively pretrained model, OpenAI GPT2-117 (Radford et al., 2019), to a state-of-the-art neural story generation model (Fan et al., 2018). By evaluating the generated text across a wide variety of automatic metrics, we characterize the ways in which pretrained models do, and do not, make better storytellers. We find that although GPT2-117 conditions more strongly on context, is more sensitive to ordering of events, and uses more unusual words, it is just as likely to produce repetitive and under-diverse text when using likelihood-maximizing decoding algorithms.</abstract>
       <url hash="762d2c11">K19-1079</url>
@@ -1038,7 +1038,7 @@
       <author><first>Lorenzo</first><last>Tarantino</last></author>
       <author><first>Alexandros</first><last>Lazaridis</last></author>
       <author><first>Andreas</first><last>Fischer</last></author>
-      <author><first>Claudiu</first><last>Musat</last></author>
+      <author id="claudiu-musat"><first>Claudiu</first><last>Musat</last></author>
       <pages>890–899</pages>
       <abstract>In sequence modeling tasks the token order matters, but this information can be partially lost due to the discretization of the sequence into data points. In this paper, we study the imbalance between the way certain token pairs are included in data points and others are not. We denote this a token order imbalance (TOI) and we link the partial sequence information loss to a diminished performance of the system as a whole, both in text and speech processing tasks. We then provide a mechanism to leverage the full token order information—Alleviated TOI—by iteratively overlapping the token composition of data points. For recurrent networks, we use prime numbers for the batch size to avoid redundancies when building batches from overlapped data points. The proposed method achieved state of the art performance in both text and speech related tasks.</abstract>
       <url hash="b8f78ed8">K19-1083</url>
@@ -1074,7 +1074,7 @@
       <author><first>Yukun</first><last>Feng</last></author>
       <author><first>Hidetaka</first><last>Kamigaito</last></author>
       <author><first>Hiroya</first><last>Takamura</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>920–928</pages>
       <abstract>We propose a simple and effective method to inject word-level information into character-aware neural language models. Unlike previous approaches which usually inject word-level information at the input of a long short-term memory (LSTM) network, we inject it into the softmax function. The resultant model can be seen as a combination of character-aware language model and simple word-level language model. Our injection method can also be used together with previous methods. Through the experiments on 14 typologically diverse languages, we empirically show that our injection method, when used together with the previous methods, works better than the previous methods, including a gating mechanism, averaging, and concatenation of word vectors. We also provide a comprehensive comparison of these injection methods.</abstract>
       <url hash="9c32eb7c">K19-1086</url>
@@ -1083,7 +1083,7 @@
     </paper>
     <paper id="87">
       <title>On Model Stability as a Function of Random Seed</title>
-      <author><first>Pranava</first><last>Madhyastha</last></author>
+      <author id="pranava-swaroop-madhyastha"><first>Pranava</first><last>Madhyastha</last></author>
       <author><first>Rishabh</first><last>Jain</last></author>
       <pages>929–939</pages>
       <abstract>In this paper, we focus on quantifying model stability as a function of random seed by investigating the effects of the induced randomness on model performance and the robustness of the model in general. We specifically perform a controlled study on the effect of random seeds on the behaviour of attention, gradient-based and surrogate model based (LIME) interpretations. Our analysis suggests that random seeds can adversely affect the consistency of models resulting in counterfactual interpretations. We propose a technique called Aggressive Stochastic Weight Averaging (ASWA) and an extension called Norm-filtered Aggressive Stochastic Weight Averaging (NASWA) which improves the stability of models over random seeds. With our ASWA and NASWA based optimization, we are able to improve the robustness of the original model, on average reducing the standard deviation of the model’s performance by 72%.</abstract>
@@ -1097,7 +1097,7 @@
       <title>Studying Generalisability across Abusive Language Detection Datasets</title>
       <author><first>Steve Durairaj</first><last>Swamy</last></author>
       <author><first>Anupam</first><last>Jamatia</last></author>
-      <author><first>Björn</first><last>Gambäck</last></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gambäck</last></author>
       <pages>940–950</pages>
       <abstract>Work on Abusive Language Detection has tackled a wide range of subtasks and domains. As a result of this, there exists a great deal of redundancy and non-generalisability between datasets. Through experiments on cross-dataset training and testing, the paper reveals that the preconceived notion of including more non-abusive samples in a dataset (to emulate reality) may have a detrimental effect on the generalisability of a model trained on that data. Hence a hierarchical annotation model is utilised here to reveal redundancies in existing datasets and to help reduce redundancy in future efforts.</abstract>
       <url hash="bafcaa79">K19-1088</url>
@@ -1196,7 +1196,7 @@
       <title>Predicting the Role of Political Trolls in Social Media</title>
       <author><first>Atanas</first><last>Atanasov</last></author>
       <author><first>Gianmarco</first><last>De Francisci Morales</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>1023–1034</pages>
       <abstract>We investigate the political roles of “Internet trolls” in social media. Political trolls, such as the ones linked to the Russian Internet Research Agency (IRA), have recently gained enormous attention for their ability to sway public opinion and even influence elections. Analysis of the online traces of trolls has shown different behavioral patterns, which target different slices of the population. However, this analysis is manual and labor-intensive, thus making it impractical as a first-response tool for newly-discovered troll farms. In this paper, we show how to automate this analysis by using machine learning in a realistic setting. In particular, we show how to classify trolls according to their political role —left, news feed, right— by using features extracted from social media, i.e., Twitter, in two scenarios: (i) in a traditional supervised learning scenario, where labels for trolls are available, and (ii) in a distant supervision scenario, where labels for trolls are not available, and we rely on more-commonly-available labels for news outlets mentioned by the trolls. Technically, we leverage the community structure and the text of the messages in the online social network of trolls represented as a graph, from which we extract several types of learned representations, i.e., embeddings, for the trolls. Experiments on the “IRA Russian Troll” dataset show that our methodology improves over the state-of-the-art in the first scenario, while providing a compelling case for the second scenario, which has not been explored in the literature thus far.</abstract>
       <url hash="5faa5564">K19-1096</url>
@@ -1221,7 +1221,7 @@
       <url hash="1639259f">K19-2</url>
       <editor><first>Stephan</first><last>Oepen</last></editor>
       <editor><first>Omri</first><last>Abend</last></editor>
-      <editor><first>Jan</first><last>Hajic</last></editor>
+      <editor id="jan-hajic"><first>Jan</first><last>Hajic</last></editor>
       <editor><first>Daniel</first><last>Hershcovich</last></editor>
       <editor><first>Marco</first><last>Kuhlmann</last></editor>
       <editor><first>Tim</first><last>O’Gorman</last></editor>
@@ -1247,7 +1247,7 @@
       <author><first>Nianwen</first><last>Xue</last></author>
       <author><first>Jayeol</first><last>Chun</last></author>
       <author><first>Milan</first><last>Straka</last></author>
-      <author><first>Zdenka</first><last>Uresova</last></author>
+      <author id="zdenka-uresova"><first>Zdenka</first><last>Uresova</last></author>
       <pages>1–27</pages>
       <abstract>The 2019 Shared Task at the Conference for Computational Language Learning (CoNLL) was devoted to Meaning Representation Parsing (MRP) across frameworks. Five distinct approaches to the representation of sentence meaning in the form of directed graph were represented in the training and evaluation data for the task, packaged in a uniform abstract graph representation and serialization. The task received submissions from eighteen teams, of which five do not participate in the official ranking because they arrived after the closing deadline, made use of additional training data, or involved one of the task co-organizers. All technical information regarding the task, including system submissions, official results, and links to supporting resources and software are available from the task web site at: <url>http://mrp.nlpl.eu</url></abstract>
       <url hash="334f3476">K19-2001</url>
@@ -1269,7 +1269,7 @@
     <paper id="3">
       <title>The <fixed-case>ERG</fixed-case> at <fixed-case>MRP</fixed-case> 2019: Radically Compositional Semantic Dependencies</title>
       <author><first>Stephan</first><last>Oepen</last></author>
-      <author><first>Dan</first><last>Flickinger</last></author>
+      <author id="dan-flickinger"><first>Dan</first><last>Flickinger</last></author>
       <pages>40–44</pages>
       <abstract>The English Resource Grammar (ERG) is a broad-coverage computational grammar of English that outputs underspecified logical-form representations of meaning in a framework dubbed English Resource Semantics (ERS). Two of the target representations in the the 2019 Shared Task on Cross-Framework Meaning Representation Parsing (MRP 2019) derive graph-based simplifications of ERS, viz. Elementary Dependency Structures (EDS) and DELPH-IN MRS Bi-Lexical Dependencies (DM). As a point of reference outside the official MRP competition, we parsed the evaluation strings using the ERG and converted the resulting meaning representations to EDS and DM. These graphs yield higher evaluation scores than the purely data-driven parsers in the actual shared task, suggesting that the general-purpose linguistic knowledge about English grammar encoded in the ERG can add value when parsing into these meaning representations.</abstract>
       <url hash="b3459db6">K19-2003</url>
@@ -1283,7 +1283,7 @@
       <author><first>Zhuosheng</first><last>Zhang</last></author>
       <author><first>Rui</first><last>Wang</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>45–54</pages>
       <abstract>This paper describes our SJTU-NICT’s system for participating in the shared task on Cross-Framework Meaning Representation Parsing (MRP) at the 2019 Conference for Computational Language Learning (CoNLL). Our system uses a graph-based approach to model a variety of semantic graph parsing tasks. Our main contributions in the submitted system are summarized as follows: 1. Our model is fully end-to-end and is capable of being trained only on the given training set which does not rely on any other extra training source including the companion data provided by the organizer; 2. We extend our graph pruning algorithm to a variety of semantic graphs, solving the problem of excessive semantic graph search space; 3. We introduce multi-task learning for multiple objectives within the same framework. The evaluation results show that our system achieved second place in the overall <tex-math>F_1</tex-math> score and achieved the best <tex-math>F_1</tex-math> score on the DM framework.</abstract>
       <url hash="c50a7818">K19-2004</url>
@@ -1350,7 +1350,7 @@
       <author><first>Jinwoon</first><last>Min</last></author>
       <author><first>Kwanghyeon</first><last>Park</last></author>
       <author><first>Jong-Hun</first><last>Shin</last></author>
-      <author><first>Young-Kil</first><last>Kim</last></author>
+      <author id="young-gil-kim"><first>Young-Kil</first><last>Kim</last></author>
       <pages>95–103</pages>
       <abstract>This paper describes Jeonbuk National University (JBNU)’s system for the 2019 shared task on Cross-Framework Meaning Representation Parsing (MRP 2019) at the Conference on Computational Natural Language Learning. Of the five frameworks, we address only the DELPH-IN MRS Bi-Lexical Dependencies (DP), Prague Semantic Dependencies (PSD), and Universal Conceptual Cognitive Annotation (UCCA) frameworks. We propose a unified parsing model using biaffine attention (Dozat and Manning, 2017), consisting of 1) a BERT-BiLSTM encoder and 2) a biaffine attention decoder. First, the BERT-BiLSTM for sentence encoder uses BERT to compose a sentence’s wordpieces into word-level embeddings and subsequently applies BiLSTM to word-level representations. Second, the biaffine attention decoder determines the scores for an edge’s existence and its labels based on biaffine attention functions between roledependent representations. We also present multi-level biaffine attention models by combining all the role-dependent representations that appear at multiple intermediate layers.</abstract>
       <url hash="19d93a0f">K19-2009</url>
@@ -1425,7 +1425,7 @@
       <author><first>Kira</first><last>Droganova</last></author>
       <author><first>Andrey</first><last>Kutuzov</last></author>
       <author><first>Nikita</first><last>Mediankin</last></author>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <pages>158–165</pages>
       <abstract>This paper describes the ÚFAL--Oslo system submission to the shared task on Cross-Framework Meaning Representation Parsing (MRP, Oepen et al. 2019). The submission is based on several third-party parsers. Within the official shared task results, the submission ranked 11th out of 13 participating systems.</abstract>
       <url hash="b353eb20">K19-2015</url>
@@ -1437,7 +1437,7 @@
       <title>Peking at <fixed-case>MRP</fixed-case> 2019: Factorization- and Composition-Based Parsing for Elementary Dependency Structures</title>
       <author><first>Yufei</first><last>Chen</last></author>
       <author><first>Yajie</first><last>Ye</last></author>
-      <author><first>Weiwei</first><last>Sun</last></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last></author>
       <pages>166–176</pages>
       <abstract>We design, implement and evaluate two semantic parsers, which represent factorization- and composition-based approaches respectively, for Elementary Dependency Structures (EDS) at the CoNLL 2019 Shared Task on Cross-Framework Meaning Representation Parsing. The detailed evaluation of the two parsers gives us a new perception about parsing into linguistically enriched meaning representations: current neural EDS parsers are able to reach an accuracy at the inter-annotator agreement level in the same-epoch-and-domain setup.</abstract>
       <url hash="a8a2198e">K19-2016</url>
diff --git a/data/xml/L00.xml b/data/xml/L00.xml
index 192512d123..df4562c0a6 100644
--- a/data/xml/L00.xml
+++ b/data/xml/L00.xml
@@ -19,7 +19,7 @@
     </frontmatter>
     <paper id="1">
       <author><first>Gérard</first><last>Bailly</last></author>
-      <author><first>Eduardo R.</first><last>Banga</last></author>
+      <author id="eduardo-r-banga"><first>Eduardo R.</first><last>Banga</last></author>
       <author><first>Alex</first><last>Monaghan</last></author>
       <author><first>Erhard</first><last>Rank</last></author>
       <title>The Cost258 Signal Generation Test Array</title>
@@ -56,11 +56,11 @@
       <bibkey>kilgarriff-rosenzweig-2000-english</bibkey>
     </paper>
     <paper id="6">
-      <author><first>Asunción</first><last>Moreno</last></author>
+      <author id="asuncion-moreno"><first>Asunción</first><last>Moreno</last></author>
       <author><first>Robrecht</first><last>Comeyne</last></author>
       <author><first>Keith</first><last>Haslam</last></author>
-      <author><first>Henk</first><last>van den Heuvel</last></author>
-      <author><first>Harald</first><last>Höge</last></author>
+      <author id="henk-van-den-heuvel"><first>Henk</first><last>van den Heuvel</last></author>
+      <author id="harald-hoge"><first>Harald</first><last>Höge</last></author>
       <author><first>Sabine</first><last>Horbach</last></author>
       <author><first>Giorgio</first><last>Micca</last></author>
       <title><fixed-case>SALA</fixed-case>: <fixed-case>S</fixed-case>peech<fixed-case>D</fixed-case>at across <fixed-case>L</fixed-case>atin <fixed-case>A</fixed-case>merica. Results of the First Phase</title>
@@ -68,7 +68,7 @@
       <bibkey>moreno-etal-2000-sala</bibkey>
     </paper>
     <paper id="7">
-      <author><first>Dan</first><last>Tufiş</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufiş</last></author>
       <title>Using a Large Set of <fixed-case>EAGLES</fixed-case>-compliant Morpho-syntactic Descriptors as a Tagset for Probabilistic Tagging</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/11.pdf</url>
       <bibkey>tufis-2000-using</bibkey>
@@ -76,13 +76,13 @@
     <paper id="8">
       <author><first>Elliott</first><last>Macklovitch</last></author>
       <author><first>Michel</first><last>Simard</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <title><fixed-case>T</fixed-case>rans<fixed-case>S</fixed-case>earch: A Free Translation Memory on the World Wide Web</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/12.pdf</url>
       <bibkey>macklovitch-etal-2000-transsearch</bibkey>
     </paper>
     <paper id="9">
-      <author><first>Bolette Sandford</first><last>Pedersen</last></author>
+      <author id="bolette-sandford-pedersen"><first>Bolette Sandford</first><last>Pedersen</last></author>
       <author><first>Sanni</first><last>Nimb</last></author>
       <title>Semantic Encoding of <fixed-case>D</fixed-case>anish Verbs in <fixed-case>SIMPLE</fixed-case> - Adapting a Verb Framed Model to a Satellite-framed Language</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/13.pdf</url>
@@ -90,14 +90,14 @@
     </paper>
     <paper id="10">
       <author><last>Mochizuki</last><first>Hajime</first></author>
-      <author><last>Okumura</last><first>Manabu</first></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <title>A Comparison of Summarization Methods Based on Task-based Evaluation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/14.pdf</url>
       <bibkey>mochizuki-okumura-2000-comparison</bibkey>
     </paper>
     <paper id="11">
       <author><last>Zheng</last><first>Jie</first></author>
-      <author><last>Mao</last><first>Yuhang</first></author>
+      <author id="yu-hang-mao"><first>Yuhang</first><last>Mao</last></author>
       <title>A Word Sense Disambiguation Method Using Bilingual Corpus</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/15.pdf</url>
       <bibkey>zheng-mao-2000-word</bibkey>
@@ -114,23 +114,23 @@
     </paper>
     <paper id="13">
       <author><first>Sandro</first><last>Pedrazzini</last></author>
-      <author><first>Elisabeth</first><last>Maier</last></author>
+      <author id="elisabeth-maier"><first>Elisabeth</first><last>Maier</last></author>
       <author><first>Dierk</first><last>König</last></author>
       <title>Terms Specification and Extraction within a Linguistic-based Intranet Service</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/17.pdf</url>
       <bibkey>pedrazzini-etal-2000-terms</bibkey>
     </paper>
     <paper id="14">
-      <author><first>Eva</first><last>Hajičová</last></author>
-      <author><first>Petr</first><last>Sgall</last></author>
+      <author id="eva-hajicova"><first>Eva</first><last>Hajičová</last></author>
+      <author id="petr-sgall"><first>Petr</first><last>Sgall</last></author>
       <title>Semantico-syntactic Tagging of Very Large Corpora: the Case of Restoration of Nodes on the Underlying Level</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/18.pdf</url>
       <bibkey>hajicova-sgall-2000-semantico</bibkey>
     </paper>
     <paper id="15">
-      <author><first>Eva</first><last>Hajičová</last></author>
-      <author><first>Jarmila</first><last>Panevová</last></author>
-      <author><first>Petr</first><last>Sgall</last></author>
+      <author id="eva-hajicova"><first>Eva</first><last>Hajičová</last></author>
+      <author id="jarmila-panevova"><first>Jarmila</first><last>Panevová</last></author>
+      <author id="petr-sgall"><first>Petr</first><last>Sgall</last></author>
       <title>Coreference in Annotating a Large Corpus</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/19.pdf</url>
       <bibkey>hajicova-etal-2000-coreference</bibkey>
@@ -150,7 +150,7 @@
       <bibkey>maynard-ananiadou-2000-creating</bibkey>
     </paper>
     <paper id="18">
-      <author><first>Ellen M.</first><last>Voorhees</last></author>
+      <author id="ellen-m-voorhees"><first>Ellen M.</first><last>Voorhees</last></author>
       <author><first>Dawn M.</first><last>Tice</last></author>
       <title>The <fixed-case>TREC</fixed-case>-8 Question Answering Track</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/26.pdf</url>
@@ -165,7 +165,7 @@
     </paper>
     <paper id="20">
       <author><first>Svetlana</first><last>Sheremetyeva</last></author>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <title>Towards A Universal Tool For <fixed-case>NLP</fixed-case> Resource Acquisition</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/28.pdf</url>
       <bibkey>sheremetyeva-nirenburg-2000-towards</bibkey>
@@ -193,17 +193,17 @@
       <bibkey>chenfour-etal-2000-etude</bibkey>
     </paper>
     <paper id="24">
-      <author><first>Marcela</first><last>Charfuelán</last></author>
-      <author><first>José</first><last>Relaño Gil</last></author>
-      <author><first>M. Carmen Rodríguez</first><last>Gancedo</last></author>
-      <author><first>Daniel Tapias</first><last>Merino</last></author>
-      <author><first>Luis Hernández</first><last>Gómez</last></author>
+      <author id="marcela-charfuelan"><first>Marcela</first><last>Charfuelán</last></author>
+      <author id="jose-relano-gil"><first>José</first><last>Relaño Gil</last></author>
+      <author id="mari-carmen-rodriguez-gancedo"><first>M. Carmen Rodríguez</first><last>Gancedo</last></author>
+      <author id="daniel-tapias"><first>Daniel Tapias</first><last>Merino</last></author>
+      <author id="luis-hernandez"><first>Luis Hernández</first><last>Gómez</last></author>
       <title>Dialogue Annotation for Language Systems Evaluation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/33.pdf</url>
       <bibkey>charfuelan-etal-2000-dialogue</bibkey>
     </paper>
     <paper id="25">
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <author><first>Sébastien</first><last>Sauvé</last></author>
       <author><first>George</first><last>Foster</last></author>
       <author><first>Elliott</first><last>Macklovitch</last></author>
@@ -213,8 +213,8 @@
       <bibkey>langlais-etal-2000-evaluation</bibkey>
     </paper>
     <paper id="26">
-      <author><first>Gerardo</first><last>Sierra</last></author>
-      <author><first>John</first><last>McNaught</last></author>
+      <author id="gerardo-sierra"><first>Gerardo</first><last>Sierra</last></author>
+      <author id="john-mcnaught"><first>John</first><last>McNaught</last></author>
       <title>Extraction of Semantic Clusters for Terminological Information Retrieval from <fixed-case>MRD</fixed-case>s</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/35.pdf</url>
       <bibkey>sierra-mcnaught-2000-extraction</bibkey>
@@ -223,32 +223,32 @@
       <author><first>Jean-Yves</first><last>Antoine</last></author>
       <author><first>Jacques</first><last>Siroux</last></author>
       <author><first>Jean</first><last>Caelen</last></author>
-      <author><first>Jeanne</first><last>Villaneau</last></author>
-      <author><first>Jérôme</first><last>Goulian</last></author>
+      <author id="jeanne-villaneau"><first>Jeanne</first><last>Villaneau</last></author>
+      <author id="jerome-goulian"><first>Jérôme</first><last>Goulian</last></author>
       <author><first>Mohamed</first><last>Ahafhaf</last></author>
       <title>Obtaining Predictive Results with an Objective Evaluation of Spoken Dialogue Systems: Experiments with the <fixed-case>DCR</fixed-case> Assessment Paradigm</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/36.pdf</url>
       <bibkey>antoine-etal-2000-obtaining</bibkey>
     </paper>
     <paper id="28">
-      <author><first>Guy</first><last>Pérennou</last></author>
-      <author><first>Martine</first><last>de Calmès</last></author>
+      <author id="guy-perennou"><first>Guy</first><last>Pérennou</last></author>
+      <author id="martine-de-calmes"><first>Martine</first><last>de Calmès</last></author>
       <title><fixed-case>MHATL</fixed-case>ex: Lexical Resources for Modelling the <fixed-case>F</fixed-case>rench Pronunciation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/37.pdf</url>
       <bibkey>perennou-de-calmes-2000-mhatlex</bibkey>
     </paper>
     <paper id="29">
       <author><first>Carine-Alexia</first><last>Lavelle</last></author>
-      <author><first>Martine</first><last>de Calmès</last></author>
-      <author><first>Guy</first><last>Pérennou</last></author>
+      <author id="martine-de-calmes"><first>Martine</first><last>de Calmès</last></author>
+      <author id="guy-perennou"><first>Guy</first><last>Pérennou</last></author>
       <title>Dialogue and Prompting Strategies Evaluation in the <fixed-case>DEMON</fixed-case> System</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/38.pdf</url>
       <bibkey>lavelle-etal-2000-dialogue</bibkey>
     </paper>
     <paper id="30">
-      <author><first>Henk</first><last>van den Heuvel</last></author>
-      <author><first>Lou</first><last>Boves</last></author>
-      <author><first>Khalid</first><last>Choukri</last></author>
+      <author id="henk-van-den-heuvel"><first>Henk</first><last>van den Heuvel</last></author>
+      <author id="lou-boves"><first>Lou</first><last>Boves</last></author>
+      <author id="khalid-choukri"><first>Khalid</first><last>Choukri</last></author>
       <author><first>Simo</first><last>Goddijn</last></author>
       <author><first>Eric</first><last>Sanders</last></author>
       <title><fixed-case>SLR</fixed-case> Validation: Present State of Affairs and Prospects</title>
@@ -269,7 +269,7 @@
     </paper>
     <paper id="32">
       <author><first>Marc</first><last>Swerts</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <title>On the Use of Prosody for On-line Evaluation of Spoken Dialogue Systems</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/43.pdf</url>
       <bibkey>swerts-krahmer-2000-use</bibkey>
@@ -290,10 +290,10 @@
       <bibkey>aduriz-etal-2000-word-level</bibkey>
     </paper>
     <paper id="34">
-      <author><first>Albert</first><last>Russel</last></author>
-      <author><first>Hennie</first><last>Brugman</last></author>
-      <author><first>Daan</first><last>Broeder</last></author>
-      <author><first>Peter</first><last>Wittenburg</last></author>
+      <author id="albert-russel"><first>Albert</first><last>Russel</last></author>
+      <author id="hennie-brugman"><first>Hennie</first><last>Brugman</last></author>
+      <author id="daan-broeder"><first>Daan</first><last>Broeder</last></author>
+      <author id="peter-wittenburg"><first>Peter</first><last>Wittenburg</last></author>
       <title>The <fixed-case>EUDICO</fixed-case> Project, Multi Media Annotation over the <fixed-case>I</fixed-case>nternet</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/45.pdf</url>
       <bibkey>russel-etal-2000-eudico</bibkey>
@@ -306,7 +306,7 @@
       <bibkey>braasch-olsen-2000-towards</bibkey>
     </paper>
     <paper id="36">
-      <author><first>Stavroula-Evita</first><last>Fotinea</last></author>
+      <author id="stavroula-evita-fotinea"><first>Stavroula-Evita</first><last>Fotinea</last></author>
       <author><first>Athanassios</first><last>Protopapas</last></author>
       <author><first>Dimitris</first><last>Dimitriadis</last></author>
       <author><first>George</first><last>Carayannis</last></author>
@@ -328,7 +328,7 @@
       <author><first>Damjan</first><last>Vlaj</last></author>
       <author><first>Janez</first><last>Kaiser</last></author>
       <author><first>Ralph</first><last>Wilhelm</last></author>
-      <author><first>Ute</first><last>Ziegenhain</last></author>
+      <author id="ute-ziegenhain"><first>Ute</first><last>Ziegenhain</last></author>
       <title><fixed-case>PLEDIT</fixed-case> - A New Efficient Tool for Management of Multilingual Pronunciation Lexica and Batchlists</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/53.pdf</url>
       <bibkey>vlaj-etal-2000-pledit</bibkey>
@@ -336,7 +336,7 @@
     <paper id="39">
       <author><first>Rosa</first><last>Estopà</last></author>
       <author><first>Jordi</first><last>Vivaldi</last></author>
-      <author><first>M. Teresa</first><last>Cabré</last></author>
+      <author id="maria-teresa-cabre"><first>M. Teresa</first><last>Cabré</last></author>
       <title>Use of <fixed-case>G</fixed-case>reek and <fixed-case>L</fixed-case>atin Forms for Term Detection</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/55.pdf</url>
       <bibkey>estopa-etal-2000-use</bibkey>
@@ -350,8 +350,8 @@
       <bibkey>canelli-etal-2000-methods</bibkey>
     </paper>
     <paper id="41">
-      <author><first>Noah A.</first><last>Smith</last></author>
-      <author><first>Michael E.</first><last>Jahr</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
+      <author id="michael-e-jahr"><first>Michael E.</first><last>Jahr</last></author>
       <title><fixed-case>C</fixed-case>airo: An Alignment Visualization Tool</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/58.pdf</url>
       <bibkey>smith-jahr-2000-cairo</bibkey>
@@ -364,26 +364,26 @@
       <bibkey>mengel-lezius-2000-xml</bibkey>
     </paper>
     <paper id="43">
-      <author><first>Ornella</first><last>Corazzari</last></author>
-      <author><first>Nicoletta</first><last>Calzolari</last></author>
-      <author><first>Antonio</first><last>Zampolli</last></author>
+      <author id="ornella-corazzari"><first>Ornella</first><last>Corazzari</last></author>
+      <author id="nicoletta-calzolari"><first>Nicoletta</first><last>Calzolari</last></author>
+      <author id="antonio-zampolli"><first>Antonio</first><last>Zampolli</last></author>
       <title>An Experiment of Lexical-Semantic Tagging of an <fixed-case>I</fixed-case>talian Corpus</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/60.pdf</url>
       <bibkey>corazzari-etal-2000-experiment</bibkey>
     </paper>
     <paper id="44">
-      <author><first>Nuria</first><last>Bel</last></author>
+      <author id="nuria-bel"><first>Nuria</first><last>Bel</last></author>
       <author><first>Federica</first><last>Busa</last></author>
-      <author><first>Nicoletta</first><last>Calzolari</last></author>
+      <author id="nicoletta-calzolari"><first>Nicoletta</first><last>Calzolari</last></author>
       <author><first>Elisabetta</first><last>Gola</last></author>
       <author><first>Alessandro</first><last>Lenci</last></author>
       <author><first>Monica</first><last>Monachini</last></author>
       <author><first>Antoine</first><last>Ogonowski</last></author>
       <author><first>Ivonne</first><last>Peters</last></author>
-      <author><first>Wim</first><last>Peters</last></author>
+      <author id="wim-peters"><first>Wim</first><last>Peters</last></author>
       <author><first>Nilda</first><last>Ruimy</last></author>
       <author><first>Marta</first><last>Villegas</last></author>
-      <author><first>Antonio</first><last>Zampolli</last></author>
+      <author id="antonio-zampolli"><first>Antonio</first><last>Zampolli</last></author>
       <title><fixed-case>SIMPLE</fixed-case>: A General Framework for the Development of Multilingual Lexicons</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/61.pdf</url>
       <bibkey>bel-etal-2000-simple</bibkey>
@@ -396,7 +396,7 @@
     </paper>
     <paper id="46">
       <author><first>Rainer</first><last>Siemund</last></author>
-      <author><first>Harald</first><last>Höge</last></author>
+      <author id="harald-hoge"><first>Harald</first><last>Höge</last></author>
       <author><first>Siegfried</first><last>Kunzmann</last></author>
       <author><first>Krzysztof</first><last>Marasek</last></author>
       <title><fixed-case>SPEECON</fixed-case> - Speech Data for Consumer Devices</title>
@@ -405,9 +405,9 @@
     </paper>
     <paper id="47">
       <author id="antonio-moreno-sandoval"><first>Antonio</first><last>Moreno</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <author><first>Susana</first><last>López</last></author>
-      <author><first>Fernando</first><last>Sánchez</last></author>
+      <author id="fernando-sanchez-leon"><first>Fernando</first><last>Sánchez</last></author>
       <author><first>Satoshi</first><last>Sekine</last></author>
       <title>A Treebank of <fixed-case>S</fixed-case>panish and its Application to Parsing</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/66.pdf</url>
@@ -443,7 +443,7 @@
       <author><first>Michael</first><last>Hess</last></author>
       <author><first>Michael</first><last>Kluck</last></author>
       <author><first>Carol</first><last>Peters</last></author>
-      <author><first>Peter</first><last>Schäuble</last></author>
+      <author id="peter-schauble"><first>Peter</first><last>Schäuble</last></author>
       <title>The Evaluation of Systems for Cross-language Information Retrieval</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/70.pdf</url>
       <bibkey>braschler-etal-2000-evaluation</bibkey>
@@ -456,8 +456,8 @@
       <bibkey>goncalves-veloso-2000-spoken</bibkey>
     </paper>
     <paper id="53">
-      <author><first>Maria Fernanda Bacelar</first><last>do Nascimento</last></author>
-      <author><first>Luisa</first><last>Pereira</last></author>
+      <author id="maria-fernanda-bacelar-do-nascimento"><first>Maria Fernanda Bacelar</first><last>do Nascimento</last></author>
+      <author id="luisa-pereira"><first>Luisa</first><last>Pereira</last></author>
       <author><first>João</first><last>Saramago</last></author>
       <title><fixed-case>P</fixed-case>ortuguese Corpora at <fixed-case>CLUL</fixed-case></title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/72.pdf</url>
@@ -465,14 +465,14 @@
     </paper>
     <paper id="54">
       <author id="antonio-moreno-ortiz"><first>Antonio</first><last>Moreno</last></author>
-      <author><first>Chantal</first><last>Pérez</last></author>
+      <author id="chantal-perez-hernandez"><first>Chantal</first><last>Pérez</last></author>
       <title>Reusing the Mikrokosmos Ontology for Concept-based Multilingual Terminology Databases</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/74.pdf</url>
       <bibkey>moreno-perez-2000-reusing</bibkey>
     </paper>
     <paper id="55">
       <author><last>Kimura</last><first>Kazuhiro</first></author>
-      <author><last>Hirakawa</last><first>Hideki</first></author>
+      <author id="hideki-hirakawa"><first>Hideki</first><last>Hirakawa</last></author>
       <title>Abstraction of the <fixed-case>EDR</fixed-case> Concept Classification and its Effectiveness in Word Sense Disambiguation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/75.pdf</url>
       <bibkey>kimura-hirakawa-2000-abstraction</bibkey>
@@ -480,13 +480,13 @@
     <paper id="56">
       <author><first>Alessandro</first><last>Cucchiarelli</last></author>
       <author><first>Enrico</first><last>Faggioli</last></author>
-      <author><first>Paola</first><last>Velardi</last></author>
+      <author id="paola-velardi"><first>Paola</first><last>Velardi</last></author>
       <title>Will Very Large Corpora Play For Semantic Disambiguation The Role That Massive Computing Power Is Playing For Other <fixed-case>AI</fixed-case>-Hard Problems?</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/76.pdf</url>
       <bibkey>cucchiarelli-etal-2000-will</bibkey>
     </paper>
     <paper id="57">
-      <author><first>Shuichi</first><last>Itahashi</last></author>
+      <author id="shuichi-itahashi"><first>Shuichi</first><last>Itahashi</last></author>
       <title>Guidelines for <fixed-case>J</fixed-case>apanese Speech Synthesizer Evaluation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/77.pdf</url>
       <bibkey>itahashi-2000-guidelines</bibkey>
@@ -518,14 +518,14 @@
       <bibkey>gavieiro-villatte-spaggiari-2000-open</bibkey>
     </paper>
     <paper id="62">
-      <author><first>Rodolfo</first><last>Delmonte</last></author>
+      <author id="rodolfo-delmonte"><first>Rodolfo</first><last>Delmonte</last></author>
       <title>Shallow Parsing and Functional Structure in <fixed-case>I</fixed-case>talian Corpora</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/82.pdf</url>
       <bibkey>delmonte-2000-shallow</bibkey>
     </paper>
     <paper id="63">
       <author><first>Dimitrios</first><last>Kokkinakis</last></author>
-      <author><first>Maria Toporowska</first><last>Gronostaj</last></author>
+      <author id="maria-toporowska-gronostaj"><first>Maria Toporowska</first><last>Gronostaj</last></author>
       <author><first>Karin</first><last>Warmenius</last></author>
       <title>Annotating, Disambiguating &amp; Automatically Extending the Coverage of the <fixed-case>S</fixed-case>wedish <fixed-case>SIMPLE</fixed-case> Lexicon</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/84.pdf</url>
@@ -533,7 +533,7 @@
     </paper>
     <paper id="64">
       <author><first>Diana</first><last>Santos</last></author>
-      <author><first>Eckhard</first><last>Bick</last></author>
+      <author id="eckhard-bick"><first>Eckhard</first><last>Bick</last></author>
       <title>Providing <fixed-case>I</fixed-case>nternet Access to <fixed-case>P</fixed-case>ortuguese Corpora: the <fixed-case>AC</fixed-case>/<fixed-case>DC</fixed-case> Project</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/85.pdf</url>
       <bibkey>santos-bick-2000-providing</bibkey>
@@ -558,7 +558,7 @@
     <paper id="67">
       <author><first>Giulia</first><last>Bernardis</last></author>
       <author><first>Hervé</first><last>Bourlard</last></author>
-      <author><first>Martin</first><last>Rajman</last></author>
+      <author id="martin-rajman"><first>Martin</first><last>Rajman</last></author>
       <author><first>Jean-Cédric</first><last>Chappelier</last></author>
       <title>Development of Acoustic and Linguistic Resources for Research and Evaluation in Interactive Vocal Information Servers</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/90.pdf</url>
@@ -570,14 +570,14 @@
       <author><first>Pilar</first><last>Alvariño</last></author>
       <author><first>Adelaida</first><last>Gil</last></author>
       <author><first>María Paula</first><last>Santalla</last></author>
-      <author><first>Susana</first><last>Sotelo</last></author>
+      <author id="susana-sotelo"><first>Susana</first><last>Sotelo</last></author>
       <title>An Architecture for Document Routing in <fixed-case>S</fixed-case>panish: Two Language Components, Pre-processor and Parser</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/91.pdf</url>
       <bibkey>rojo-etal-2000-architecture</bibkey>
     </paper>
     <paper id="69">
-      <author><first>John A.</first><last>Bateman</last></author>
-      <author><first>Anthony F.</first><last>Hartley</last></author>
+      <author id="john-bateman"><first>John A.</first><last>Bateman</last></author>
+      <author id="anthony-hartley"><first>Anthony F.</first><last>Hartley</last></author>
       <title>Target Suites for Evaluating the Coverage of Text Generators</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/92.pdf</url>
       <bibkey>bateman-hartley-2000-target</bibkey>
@@ -608,20 +608,20 @@
     </paper>
     <paper id="73">
       <author><first>Marta</first><last>Villegas</last></author>
-      <author><first>Nuria</first><last>Bel</last></author>
+      <author id="nuria-bel"><first>Nuria</first><last>Bel</last></author>
       <author><first>Alessandro</first><last>Lenci</last></author>
-      <author><first>Nicoletta</first><last>Calzolari</last></author>
+      <author id="nicoletta-calzolari"><first>Nicoletta</first><last>Calzolari</last></author>
       <author><first>Nilda</first><last>Ruimy</last></author>
-      <author><first>Antonio</first><last>Zampolli</last></author>
+      <author id="antonio-zampolli"><first>Antonio</first><last>Zampolli</last></author>
       <author><first>Teresa</first><last>Sadurní</last></author>
-      <author><first>Joan</first><last>Soler</last></author>
+      <author id="joan-soler-i-bou"><first>Joan</first><last>Soler</last></author>
       <title>Multilingual Linguistic Resources: From Monolingual Lexicons to Bilingual Interrelated Lexicons</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/96.pdf</url>
       <bibkey>villegas-etal-2000-multilingual</bibkey>
     </paper>
     <paper id="74">
       <author><first>Alessandro</first><last>Lenci</last></author>
-      <author><first>Simonetta</first><last>Montemagni</last></author>
+      <author id="simonetta-montemagni"><first>Simonetta</first><last>Montemagni</last></author>
       <author><first>Vito</first><last>Pirrelli</last></author>
       <author><first>Claudia</first><last>Soria</last></author>
       <title>Where Opposites Meet. A Syntactic Meta-scheme for Corpus Annotation and Parsing Evaluation</title>
@@ -630,15 +630,15 @@
     </paper>
     <paper id="75">
       <author><first>Paolo</first><last>Allegrini</last></author>
-      <author><first>Simonetta</first><last>Montemagni</last></author>
+      <author id="simonetta-montemagni"><first>Simonetta</first><last>Montemagni</last></author>
       <author><first>Vito</first><last>Pirrelli</last></author>
       <title>Controlled Bootstrapping of Lexico-semantic Classes as a Bridge between Paradigmatic and Syntagmatic Knowledge: Methodology and Evaluation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/99.pdf</url>
       <bibkey>allegrini-etal-2000-controlled</bibkey>
     </paper>
     <paper id="76">
-      <author><first>Rodger</first><last>Kibble</last></author>
-      <author><first>Kees</first><last>van Deemter</last></author>
+      <author id="rodger-kibble"><first>Rodger</first><last>Kibble</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
       <title>Coreference Annotation: Whither?</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/100.pdf</url>
       <bibkey>kibble-van-deemter-2000-coreference</bibkey>
@@ -660,7 +660,7 @@
       <bibkey>munteanu-boldea-2000-mdwoz</bibkey>
     </paper>
     <paper id="79">
-      <author><first>Dan</first><last>Bohuş</last></author>
+      <author id="dan-bohus"><first>Dan</first><last>Bohuş</last></author>
       <author><first>Marian</first><last>Boldea</last></author>
       <title>A Web-based Text Corpora Development System</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/105.pdf</url>
@@ -674,7 +674,7 @@
       <bibkey>georgantopoulos-piperidis-2000-term</bibkey>
     </paper>
     <paper id="81">
-      <author><first>Kristīne</first><last>Levāne</last></author>
+      <author id="kristine-levane-petrova"><first>Kristīne</first><last>Levāne</last></author>
       <author><first>Andrejs</first><last>Spektors</last></author>
       <title>Morphemic Analysis and Morphological Tagging of <fixed-case>L</fixed-case>atvian Corpus</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/107.pdf</url>
@@ -682,7 +682,7 @@
     </paper>
     <paper id="82">
       <author><first>Patrick</first><last>Kremer</last></author>
-      <author><first>Laurent</first><last>Schmitt</last></author>
+      <author id="laurent-schmitt"><first>Laurent</first><last>Schmitt</last></author>
       <title>Textual Information Retrieval Systems Test: The Point of View of an Organizer and Corpuses Provider</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/109.pdf</url>
       <bibkey>kremer-schmitt-2000-textual</bibkey>
@@ -695,13 +695,13 @@
     </paper>
     <paper id="84">
       <author><first>Toni</first><last>Badia</last></author>
-      <author><first>Àngels</first><last>Egea</last></author>
+      <author id="angels-egea"><first>Àngels</first><last>Egea</last></author>
       <title>A Strategy for the Syntactic Parsing of Corpora: from Constraint Grammar Output to Unification-based Processing</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/111.pdf</url>
       <bibkey>badia-egea-2000-strategy</bibkey>
     </paper>
     <paper id="85">
-      <author><first>Joan</first><last>Soler i Bou</last></author>
+      <author id="joan-soler-i-bou"><first>Joan</first><last>Soler i Bou</last></author>
       <title>Producing <fixed-case>LR</fixed-case>s in Parallel with Lexicographic Description: the <fixed-case>DCC</fixed-case> project</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/112.pdf</url>
       <bibkey>soler-i-bou-2000-producing</bibkey>
@@ -714,14 +714,14 @@
       <bibkey>fujii-ishikawa-2000-novelty</bibkey>
     </paper>
     <paper id="87">
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <title>Towards More Comprehensive Evaluation in Anaphora Resolution</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/115.pdf</url>
       <bibkey>mitkov-2000-towards</bibkey>
     </paper>
     <paper id="88">
-      <author><first>Joseph</first><last>Polifroni</last></author>
-      <author><first>Stephanie</first><last>Seneff</last></author>
+      <author id="joseph-polifroni"><first>Joseph</first><last>Polifroni</last></author>
+      <author id="stephanie-seneff"><first>Stephanie</first><last>Seneff</last></author>
       <title>Galaxy-<fixed-case>II</fixed-case> as an Architecture for Spoken Dialogue Evaluation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/116.pdf</url>
       <bibkey>polifroni-seneff-2000-galaxy</bibkey>
@@ -733,7 +733,7 @@
       <bibkey>tadic-2000-building</bibkey>
     </paper>
     <paper id="90">
-      <author><first>Tamás</first><last>Váradi</last></author>
+      <author id="tamas-varadi"><first>Tamás</first><last>Váradi</last></author>
       <title>Lexical and Translation Equivalence in Parallel Corpora</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/122.pdf</url>
       <bibkey>varadi-2000-lexical</bibkey>
@@ -760,23 +760,23 @@
     <paper id="93">
       <author><first>Adriana</first><last>Roventini</last></author>
       <author><first>Antonietta</first><last>Alonge</last></author>
-      <author><first>Nicoletta</first><last>Calzolari</last></author>
-      <author><first>Bernardo</first><last>Magnini</last></author>
+      <author id="nicoletta-calzolari"><first>Nicoletta</first><last>Calzolari</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
       <author><first>Francesca</first><last>Bertagna</last></author>
       <title><fixed-case>I</fixed-case>tal<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et: a Large Semantic Database for <fixed-case>I</fixed-case>talian</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/129.pdf</url>
       <bibkey>roventini-etal-2000-italwordnet</bibkey>
     </paper>
     <paper id="94">
-      <author><first>Cătălina</first><last>Barbu</last></author>
+      <author id="catalina-barbu"><first>Cătălina</first><last>Barbu</last></author>
       <title><fixed-case>FAST</fixed-case> - Towards a Semi-automatic Annotation of Corpora</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/130.pdf</url>
       <bibkey>barbu-2000-fast</bibkey>
     </paper>
     <paper id="95">
       <author><first>François</first><last>Trouilleux</last></author>
-      <author><first>Eric</first><last>Gaussier</last></author>
-      <author><first>Gabriel G.</first><last>Bès</last></author>
+      <author id="eric-gaussier"><first>Eric</first><last>Gaussier</last></author>
+      <author id="gabriel-g-bes"><first>Gabriel G.</first><last>Bès</last></author>
       <author><first>Annie</first><last>Zaenen</last></author>
       <title>Coreference Resolution Evaluation Based on Descriptive Specificity</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/131.pdf</url>
@@ -789,20 +789,20 @@
       <bibkey>dutoit-2000-text</bibkey>
     </paper>
     <paper id="97">
-      <author><first>Philippe Boula</first><last>de Mareüil</last></author>
+      <author id="philippe-boula-de-mareuil"><first>Philippe Boula</first><last>de Mareüil</last></author>
       <author><first>Christophe</first><last>d’Alessandro</last></author>
       <author><first>François</first><last>Yvon</last></author>
       <author><first>Véronique</first><last>Aubergé</last></author>
-      <author><first>Jacqueline</first><last>Vaissière</last></author>
+      <author id="jacqueline-vaissiere"><first>Jacqueline</first><last>Vaissière</last></author>
       <author><first>Angélique</first><last>Amelot</last></author>
       <title>A <fixed-case>F</fixed-case>rench Phonetic Lexicon with Variants for Speech and Language Processing</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/133.pdf</url>
       <bibkey>de-mareuil-etal-2000-french</bibkey>
     </paper>
     <paper id="98">
-      <author><first>Laila</first><last>Dybkjær</last></author>
+      <author id="laila-dybkjaer"><first>Laila</first><last>Dybkjær</last></author>
       <author><first>Morten Baun</first><last>Møller</last></author>
-      <author><first>Niels Ole</first><last>Bernsen</last></author>
+      <author id="niels-ole-bernsen"><first>Niels Ole</first><last>Bernsen</last></author>
       <author><first>Michael</first><last>Grosse</last></author>
       <author><first>Martin</first><last>Olsen</last></author>
       <author><first>Amanda</first><last>Schiffrin</last></author>
@@ -811,8 +811,8 @@
       <bibkey>dybkjaer-etal-2000-annotating</bibkey>
     </paper>
     <paper id="99">
-      <author><first>Niels Ole</first><last>Bernsen</last></author>
-      <author><first>Laila</first><last>Dybkjær</last></author>
+      <author id="niels-ole-bernsen"><first>Niels Ole</first><last>Bernsen</last></author>
+      <author id="laila-dybkjaer"><first>Laila</first><last>Dybkjær</last></author>
       <title>A Methodology for Evaluating Spoken Language Dialogue Systems and Their Components</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/135.pdf</url>
       <bibkey>bernsen-dybkjaer-2000-methodology</bibkey>
@@ -828,22 +828,22 @@
     <paper id="101">
       <author><first>Lars</first><last>Ahrenberg</last></author>
       <author><first>Magnus</first><last>Merkel</last></author>
-      <author><first>Anna Sågvall</first><last>Hein</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="anna-sagvall-hein"><first>Anna Sågvall</first><last>Hein</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <title>Evaluation of Word Alignment Systems</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/137.pdf</url>
       <bibkey>ahrenberg-etal-2000-evaluation</bibkey>
     </paper>
     <paper id="102">
-      <author><first>Hervé</first><last>Déjean</last></author>
+      <author id="herve-dejean"><first>Hervé</first><last>Déjean</last></author>
       <title>How To Evaluate and Compare Tagsets? A Proposal</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/138.pdf</url>
       <bibkey>dejean-2000-evaluate</bibkey>
     </paper>
     <paper id="103">
-      <author><first>John</first><last>White</last></author>
-      <author><first>Jennifer</first><last>Doyon</last></author>
-      <author><first>Susan</first><last>Talbott</last></author>
+      <author id="john-s-white"><first>John</first><last>White</last></author>
+      <author id="jennifer-doyon"><first>Jennifer</first><last>Doyon</last></author>
+      <author id="susan-w-talbott"><first>Susan</first><last>Talbott</last></author>
       <title>Determining the Tolerance of Text-handling Tasks for <fixed-case>MT</fixed-case> Output</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/139.pdf</url>
       <bibkey>white-etal-2000-determining</bibkey>
@@ -856,20 +856,20 @@
     </paper>
     <paper id="105">
       <author><first>Sabine</first><last>Buchholz</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <title>Integrating Seed Names and ngrams for a Named Entity List and Classifier</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/141.pdf</url>
       <bibkey>buchholz-van-den-bosch-2000-integrating</bibkey>
     </paper>
     <paper id="106">
-      <author><first>Hideki</first><last>Kashioka</last></author>
-      <author><first>Satosi</first><last>Shirai</last></author>
+      <author id="hideki-kashioka"><first>Hideki</first><last>Kashioka</last></author>
+      <author id="satoshi-shirai"><first>Satosi</first><last>Shirai</last></author>
       <title>Automatically Expansion of Thesaurus Entries with a Different Thesaurus</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/142.pdf</url>
       <bibkey>kashioka-shirai-2000-automatically</bibkey>
     </paper>
     <paper id="107">
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <author><first>Anoop</first><last>Sarkar</last></author>
       <title>Learning Verb Subcategorization from Corpora: Counting Frame Subsets</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/145.pdf</url>
@@ -877,7 +877,7 @@
     </paper>
     <paper id="108">
       <author><first>Sašo</first><last>Džeroski</last></author>
-      <author><first>Tomaž</first><last>Erjavec</last></author>
+      <author id="tomaz-erjavec"><first>Tomaž</first><last>Erjavec</last></author>
       <author><first>Jakub</first><last>Zavrel</last></author>
       <title>Morphosyntactic Tagging of <fixed-case>S</fixed-case>lovene: Evaluating Taggers and Tagsets</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/146.pdf</url>
@@ -892,14 +892,14 @@
       <bibkey>micca-etal-2000-cross</bibkey>
     </paper>
     <paper id="110">
-      <author><first>Wim</first><last>Peters</last></author>
+      <author id="wim-peters"><first>Wim</first><last>Peters</last></author>
       <author><first>Ivonne</first><last>Peters</last></author>
       <title>Lexicalised Systematic Polysemy in <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/148.pdf</url>
       <bibkey>peters-peters-2000-lexicalised</bibkey>
     </paper>
     <paper id="111">
-      <author><first>Björn</first><last>Gambäck</last></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gambäck</last></author>
       <author><first>Fredrik</first><last>Olsson</last></author>
       <title>Experiences of Language Engineering Algorithm Reuse</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/151.pdf</url>
@@ -914,13 +914,13 @@
     </paper>
     <paper id="113">
       <author><first>Jakub</first><last>Zavrel</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <title>Bootstrapping a Tagged Corpus through Combination of Existing Heterogeneous Taggers</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/155.pdf</url>
       <bibkey>zavrel-daelemans-2000-bootstrapping</bibkey>
     </paper>
     <paper id="114">
-      <author><first>Barbora</first><last>Hladká</last></author>
+      <author id="barbora-hladka"><first>Barbora</first><last>Hladká</last></author>
       <title>The Context (not only) for Humans</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/156.pdf</url>
       <bibkey>hladka-2000-context</bibkey>
@@ -941,7 +941,7 @@
       <author><first>Philippe</first><last>Alcouffe</last></author>
       <author><first>Nicolas</first><last>Gacon</last></author>
       <author><first>Claude</first><last>Roux</last></author>
-      <author><first>Frédérique</first><last>Segond</last></author>
+      <author id="frederique-segond"><first>Frédérique</first><last>Segond</last></author>
       <title>A Step toward Semantic Indexing of an Encyclopedic Corpus</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/161.pdf</url>
       <bibkey>alcouffe-etal-2000-step</bibkey>
@@ -958,8 +958,8 @@
     </paper>
     <paper id="119">
       <author><first>Gees C.</first><last>Stein</last></author>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
-      <author><first>G. Bowden</first><last>Wise</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="g-bowden-wise"><first>G. Bowden</first><last>Wise</last></author>
       <author><first>Amit</first><last>Bagga</last></author>
       <title>Evaluating Summaries for Multiple Documents in an Interactive Environment</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/163.pdf</url>
@@ -1010,25 +1010,25 @@
       <bibkey>wayne-2000-multilingual</bibkey>
     </paper>
     <paper id="125">
-      <author><first>Montserrat Marimon</first><last>Felipe</last></author>
-      <author><first>Jordi Porta</first><last>Zamorano</last></author>
+      <author id="montserrat-marimon"><first>Montserrat Marimon</first><last>Felipe</last></author>
+      <author id="jordi-porta-zamorano"><first>Jordi Porta</first><last>Zamorano</last></author>
       <title><fixed-case>P</fixed-case>o<fixed-case>S</fixed-case> Disambiguation and Partial Parsing Bidirectional Interaction</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/169.pdf</url>
       <bibkey>felipe-zamorano-2000-pos</bibkey>
     </paper>
     <paper id="126">
-      <author><first>Hamish</first><last>Cunningham</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="hamish-cunningham"><first>Hamish</first><last>Cunningham</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
       <author><first>Valentin</first><last>Tablan</last></author>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <title>Software Infrastructure for Language Resources: a Taxonomy of Previous Work and a Requirements Analysis</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/170.pdf</url>
       <bibkey>cunningham-etal-2000-software</bibkey>
     </paper>
     <paper id="127">
-      <author><first>Nancy</first><last>Ide</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
       <author><first>Patrice</first><last>Bonhomme</last></author>
-      <author><first>Laurent</first><last>Romary</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
       <title><fixed-case>XCES</fixed-case>: An <fixed-case>XML</fixed-case>-based Encoding Standard for Linguistic Corpora</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/172.pdf</url>
       <bibkey>ide-etal-2000-xces</bibkey>
@@ -1036,9 +1036,9 @@
     <paper id="128">
       <author><first>Iason</first><last>Demiros</last></author>
       <author><first>Sotiris</first><last>Boutsis</last></author>
-      <author><first>Voula</first><last>Giouli</last></author>
+      <author id="voula-giouli"><first>Voula</first><last>Giouli</last></author>
       <author><first>Maria</first><last>Liakata</last></author>
-      <author><first>Harris</first><last>Papageorgiou</last></author>
+      <author id="harris-papageorgiou"><first>Harris</first><last>Papageorgiou</last></author>
       <author><first>Stelios</first><last>Piperidis</last></author>
       <title>Named Entity Recognition in <fixed-case>G</fixed-case>reek Texts</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/173.pdf</url>
@@ -1047,7 +1047,7 @@
     <paper id="129">
       <author><first>Sotiris</first><last>Boutsis</last></author>
       <author><first>Prokopis</first><last>Prokopidis</last></author>
-      <author><first>Voula</first><last>Giouli</last></author>
+      <author id="voula-giouli"><first>Voula</first><last>Giouli</last></author>
       <author><first>Stelios</first><last>Piperidis</last></author>
       <title>A Robust Parser for Unrestricted <fixed-case>G</fixed-case>reek Text</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/174.pdf</url>
@@ -1055,13 +1055,13 @@
     </paper>
     <paper id="130">
       <author><first>Matej</first><last>Rojc</last></author>
-      <author><first>Zdravko</first><last>Kačič</last></author>
+      <author id="zdravko-kacic"><first>Zdravko</first><last>Kačič</last></author>
       <title>A Computational Platform for Development of Morphologic and Phonetic Lexica</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/175.pdf</url>
       <bibkey>rojc-kacic-2000-computational</bibkey>
     </paper>
     <paper id="131">
-      <author><first>Constantin</first><last>Orăsan</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orăsan</last></author>
       <author><first>Ramesh</first><last>Krishnamurthy</last></author>
       <title>An Open Architecture for the Construction and Administration of Corpora</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/176.pdf</url>
@@ -1069,13 +1069,13 @@
     </paper>
     <paper id="132">
       <author><first>Matej</first><last>Rojc</last></author>
-      <author><first>Zdravko</first><last>Kačič</last></author>
+      <author id="zdravko-kacic"><first>Zdravko</first><last>Kačič</last></author>
       <title>Design of Optimal <fixed-case>S</fixed-case>lovenian Speech Corpus for Use in the Concatenative Speech Synthesis System</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/177.pdf</url>
       <bibkey>rojc-kacic-2000-design</bibkey>
     </paper>
     <paper id="133">
-      <author><first>Constantin</first><last>Orăsan</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orăsan</last></author>
       <title><fixed-case>CL</fixed-case>ink<fixed-case>A</fixed-case> A Coreferential Links Annotator</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/179.pdf</url>
       <bibkey>orasan-2000-clinka</bibkey>
@@ -1088,9 +1088,9 @@
       <bibkey>kilgarriff-yallop-2000-whats</bibkey>
     </paper>
     <paper id="135">
-      <author><first>Harris</first><last>Papageorgiou</last></author>
+      <author id="harris-papageorgiou"><first>Harris</first><last>Papageorgiou</last></author>
       <author><first>Prokopis</first><last>Prokopidis</last></author>
-      <author><first>Voula</first><last>Giouli</last></author>
+      <author id="voula-giouli"><first>Voula</first><last>Giouli</last></author>
       <author><first>Stelios</first><last>Piperidis</last></author>
       <title>A Unified <fixed-case>POS</fixed-case> Tagging Architecture and its Application to <fixed-case>G</fixed-case>reek</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/181.pdf</url>
@@ -1105,7 +1105,7 @@
     </paper>
     <paper id="137">
       <author><first>Andreas</first><last>Witt</last></author>
-      <author><first>Harald</first><last>Lüngen</last></author>
+      <author id="harald-lungen"><first>Harald</first><last>Lüngen</last></author>
       <author><first>Dafydd</first><last>Gibbon</last></author>
       <title>Enhancing Speech Corpus Resources with Multiple Lexical Tag Layers</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/183.pdf</url>
@@ -1113,11 +1113,11 @@
     </paper>
     <paper id="138">
       <author><first>Steven</first><last>Bird</last></author>
-      <author><first>David</first><last>Day</last></author>
-      <author><first>John</first><last>Garofolo</last></author>
-      <author><first>John</first><last>Henderson</last></author>
-      <author><first>Christophe</first><last>Laprun</last></author>
-      <author><first>Mark</first><last>Liberman</last></author>
+      <author id="david-day"><first>David</first><last>Day</last></author>
+      <author id="john-s-garofolo"><first>John</first><last>Garofolo</last></author>
+      <author id="john-henderson"><first>John</first><last>Henderson</last></author>
+      <author id="christophe-laprun"><first>Christophe</first><last>Laprun</last></author>
+      <author id="mark-liberman"><first>Mark</first><last>Liberman</last></author>
       <title><fixed-case>ATLAS</fixed-case>: A Flexible and Extensible Architecture for Linguistic Annotation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/184.pdf</url>
       <bibkey>bird-etal-2000-atlas</bibkey>
@@ -1133,7 +1133,7 @@
       <author><first>Lluís</first><last>de Yzaguirre</last></author>
       <author><first>Marta</first><last>Ribas</last></author>
       <author><first>Jordi</first><last>Vivaldi</last></author>
-      <author><first>M. Teresa</first><last>Cabré</last></author>
+      <author id="maria-teresa-cabre"><first>M. Teresa</first><last>Cabré</last></author>
       <title>Some Technical Aspects about Aligning Near Languages</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/186.pdf</url>
       <bibkey>de-yzaguirre-etal-2000-technical</bibkey>
@@ -1153,16 +1153,16 @@
       <bibkey>krenn-2000-cdb</bibkey>
     </paper>
     <paper id="143">
-      <author><first>Marilyn</first><last>Walker</last></author>
-      <author><first>Lynette</first><last>Hirschman</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
+      <author id="lynette-hirschman"><first>Lynette</first><last>Hirschman</last></author>
       <author><first>John</first><last>Aberdeen</last></author>
       <title>Evaluation for Darpa Communicator Spoken Dialogue Systems</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/191.pdf</url>
       <bibkey>walker-etal-2000-evaluation</bibkey>
     </paper>
     <paper id="144">
-      <author><first>Edouard</first><last>Geoffrois</last></author>
-      <author><first>Claude</first><last>Barras</last></author>
+      <author id="edouard-geoffrois"><first>Edouard</first><last>Geoffrois</last></author>
+      <author id="claude-barras"><first>Claude</first><last>Barras</last></author>
       <author><first>Steven</first><last>Bird</last></author>
       <author><first>Zhibiao</first><last>Wu</last></author>
       <title>Transcribing with Annotation Graphs</title>
@@ -1170,7 +1170,7 @@
       <bibkey>geoffrois-etal-2000-transcribing</bibkey>
     </paper>
     <paper id="145">
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <title>Annotating a Corpus to Develop and Evaluate Discourse Entity Realization Algorithms: Issues and Preliminary Results</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/193.pdf</url>
       <bibkey>poesio-2000-annotating</bibkey>
@@ -1184,16 +1184,16 @@
       <bibkey>bird-etal-2000-towards</bibkey>
     </paper>
     <paper id="147">
-      <author><first>Catherine</first><last>Macleod</last></author>
-      <author><first>Nancy</first><last>Ide</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="catherine-macleod"><first>Catherine</first><last>Macleod</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <title>The <fixed-case>A</fixed-case>merican National Corpus: A Standardized Resource for <fixed-case>A</fixed-case>merican <fixed-case>E</fixed-case>nglish</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/196.pdf</url>
       <bibkey>macleod-etal-2000-american</bibkey>
     </paper>
     <paper id="148">
-      <author><first>Martha</first><last>Palmer</last></author>
-      <author><first>Hoa Trang</first><last>Dang</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
+      <author id="hoa-trang-dang"><first>Hoa Trang</first><last>Dang</last></author>
       <author><first>Joseph</first><last>Rosenzweig</last></author>
       <title>Semantic Tagging for the <fixed-case>P</fixed-case>enn <fixed-case>T</fixed-case>reebank</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/197.pdf</url>
@@ -1212,9 +1212,9 @@
       <bibkey>ribarov-2000-un</bibkey>
     </paper>
     <paper id="151">
-      <author><first>David</first><last>Day</last></author>
+      <author id="david-day"><first>David</first><last>Day</last></author>
       <author><first>Alan</first><last>Goldschen</last></author>
-      <author><first>John</first><last>Henderson</last></author>
+      <author id="john-henderson"><first>John</first><last>Henderson</last></author>
       <title>A Framework for Cross-Document Annotation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/201.pdf</url>
       <bibkey>day-etal-2000-framework</bibkey>
@@ -1227,10 +1227,10 @@
       <bibkey>cadel-ledouble-2000-extraction</bibkey>
     </paper>
     <paper id="153">
-      <author><first>Eric J.</first><last>Breck</last></author>
-      <author><first>John D.</first><last>Burger</last></author>
+      <author id="eric-breck"><first>Eric J.</first><last>Breck</last></author>
+      <author id="john-d-burger"><first>John D.</first><last>Burger</last></author>
       <author><first>Lisa</first><last>Ferro</last></author>
-      <author><first>Lynette</first><last>Hirschman</last></author>
+      <author id="lynette-hirschman"><first>Lynette</first><last>Hirschman</last></author>
       <author><first>David</first><last>House</last></author>
       <author><first>Marc</first><last>Light</last></author>
       <author><first>Inderjeet</first><last>Mani</last></author>
@@ -1256,40 +1256,40 @@
       <author><last>Sun</last><first>Le</first></author>
       <author><last>Jin</last><first>Youbing</first></author>
       <author><last>Du</last><first>Lin</first></author>
-      <author><last>Sun</last><first>Yufang</first></author>
+      <author id="yufang-sun"><first>Yufang</first><last>Sun</last></author>
       <title>Automatic Extraction of <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>hinese Term Lexicons from Noisy Bilingual Corpora</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/208.pdf</url>
       <bibkey>sun-etal-2000-automatic</bibkey>
     </paper>
     <paper id="157">
-      <author><first>Christopher</first><last>Cieri</last></author>
-      <author><first>Mark</first><last>Liberman</last></author>
+      <author id="christopher-cieri"><first>Christopher</first><last>Cieri</last></author>
+      <author id="mark-liberman"><first>Mark</first><last>Liberman</last></author>
       <title>Issues in Corpus Creation and Distribution: The Evolution of the <fixed-case>L</fixed-case>inguistic <fixed-case>D</fixed-case>ata <fixed-case>C</fixed-case>onsortium</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/209.pdf</url>
       <bibkey>cieri-liberman-2000-issues</bibkey>
     </paper>
     <paper id="158">
-      <author><first>Christopher</first><last>Cieri</last></author>
+      <author id="christopher-cieri"><first>Christopher</first><last>Cieri</last></author>
       <author><first>David</first><last>Graff</last></author>
-      <author><first>Mark</first><last>Liberman</last></author>
+      <author id="mark-liberman"><first>Mark</first><last>Liberman</last></author>
       <author><first>Nii</first><last>Martey</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <title>Large, Multilingual, Broadcast News Corpora for Cooperative Research in Topic Detection and Tracking: The <fixed-case>TDT</fixed-case>-2 and <fixed-case>TDT</fixed-case>-3 Corpus Efforts</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/210.pdf</url>
       <bibkey>cieri-etal-2000-large</bibkey>
     </paper>
     <paper id="159">
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <author><first>Tatsuo</first><last>Yamashita</last></author>
       <title>Using Machine Learning Methods to Improve Quality of Tagged Corpora and Learning Models</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/211.pdf</url>
       <bibkey>matsumoto-yamashita-2000-using</bibkey>
     </paper>
     <paper id="160">
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <author><first>David</first><last>Graff</last></author>
       <author><first>Nii</first><last>Martey</last></author>
-      <author><first>Christopher</first><last>Cieri</last></author>
+      <author id="christopher-cieri"><first>Christopher</first><last>Cieri</last></author>
       <title>Quality Control in Large Annotation Projects Involving Multiple Judges: The Case of the <fixed-case>TDT</fixed-case> Corpora</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/212.pdf</url>
       <bibkey>strassel-etal-2000-quality</bibkey>
@@ -1301,29 +1301,29 @@
       <bibkey>utsuro-2000-learning</bibkey>
     </paper>
     <paper id="162">
-      <author><first>Lin-Shan</first><last>Lee</last></author>
+      <author id="lin-shan-lee"><first>Lin-Shan</first><last>Lee</last></author>
       <author><first>Lee-Feng</first><last>Chien</last></author>
       <title>Live Lexicons and Dynamic Corpora Adapted to the Network Resources for <fixed-case>C</fixed-case>hinese Spoken Language Processing Applications in an <fixed-case>I</fixed-case>nternet Era</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/214.pdf</url>
       <bibkey>lee-chien-2000-live</bibkey>
     </paper>
     <paper id="163">
-      <author><first>Lori</first><last>Levin</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
       <author><first>Boris</first><last>Bartlog</last></author>
-      <author><first>Ariadna</first><last>Font Llitjos</last></author>
-      <author><first>Donna</first><last>Gates</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="ariadna-font-llitjos"><first>Ariadna</first><last>Font Llitjos</last></author>
+      <author id="donna-gates"><first>Donna</first><last>Gates</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <author><first>Dorcas</first><last>Wallace</last></author>
       <author><first>Taro</first><last>Watanabe</last></author>
-      <author><first>Monika</first><last>Woszczyna</last></author>
+      <author id="monika-woszczyna"><first>Monika</first><last>Woszczyna</last></author>
       <title>Lessons Learned from a Task-based Evaluation of Speech-to-Speech Machine Translation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/215.pdf</url>
       <bibkey>levin-etal-2000-lessons</bibkey>
     </paper>
     <paper id="164">
-      <author><first>Frank</first><last>Van Eynde</last></author>
+      <author id="frank-van-eynde"><first>Frank</first><last>Van Eynde</last></author>
       <author><first>Jakub</first><last>Zavrel</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <title>Part of Speech Tagging and Lemmatisation for the Spoken <fixed-case>D</fixed-case>utch Corpus</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/216.pdf</url>
       <bibkey>van-eynde-etal-2000-part</bibkey>
@@ -1337,15 +1337,15 @@
       <bibkey>weilhammer-etal-2000-influence</bibkey>
     </paper>
     <paper id="166">
-      <author><first>Leonardo</first><last>Lesmo</last></author>
+      <author id="leonardo-lesmo"><first>Leonardo</first><last>Lesmo</last></author>
       <author><first>Vincenzo</first><last>Lombardo</last></author>
       <title>Automatic Assignment of Grammatical Relations</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/218.pdf</url>
       <bibkey>lesmo-lombardo-2000-automatic</bibkey>
     </paper>
     <paper id="167">
-      <author><first>Bernardo</first><last>Magnini</last></author>
-      <author><first>Gabriela</first><last>Cavaglià</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
+      <author id="gabriela-cavaglia"><first>Gabriela</first><last>Cavaglià</last></author>
       <title>Integrating Subject Field Codes into <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/219.pdf</url>
       <bibkey>magnini-cavaglia-2000-integrating</bibkey>
@@ -1354,7 +1354,7 @@
       <author><first>Cristina</first><last>Bosco</last></author>
       <author><first>Vincenzo</first><last>Lombardo</last></author>
       <author><first>Daniela</first><last>Vassallo</last></author>
-      <author><first>Leonardo</first><last>Lesmo</last></author>
+      <author id="leonardo-lesmo"><first>Leonardo</first><last>Lesmo</last></author>
       <title>Building a Treebank for <fixed-case>I</fixed-case>talian: a Data-driven Annotation Schema</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/220.pdf</url>
       <bibkey>bosco-etal-2000-building</bibkey>
@@ -1376,9 +1376,9 @@
     </paper>
     <paper id="171">
       <author><first>Byeongchang</first><last>Kim</last></author>
-      <author><first>Jin-seok</first><last>Lee</last></author>
-      <author><first>Jeongwon</first><last>Cha</last></author>
-      <author><first>Geunbae</first><last>Lee</last></author>
+      <author id="jin-seok-lee"><first>Jin-seok</first><last>Lee</last></author>
+      <author id="jeong-won-cha"><first>Jeongwon</first><last>Cha</last></author>
+      <author id="gary-geunbae-lee"><first>Geunbae</first><last>Lee</last></author>
       <title><fixed-case>POSCAT</fixed-case>: A Morpheme-based Speech Corpus Annotation Tool</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/224.pdf</url>
       <bibkey>kim-etal-2000-poscat</bibkey>
@@ -1392,23 +1392,23 @@
     </paper>
     <paper id="173">
       <author><first>Byung-Ju</first><last>Kang</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <title>Automatic Transliteration and Back-transliteration by Decision Tree Learning</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/227.pdf</url>
       <bibkey>kang-choi-2000-automatic</bibkey>
     </paper>
     <paper id="174">
       <author><first>Klaus</first><last>Ries</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
       <author><first>Liza</first><last>Valle</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <title>Shallow Discourse Genre Annotation in <fixed-case>C</fixed-case>all<fixed-case>H</fixed-case>ome <fixed-case>S</fixed-case>panish</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/228.pdf</url>
       <bibkey>ries-etal-2000-shallow</bibkey>
     </paper>
     <paper id="175">
-      <author><first>Anne</first><last>Abeillé</last></author>
+      <author id="anne-abeille"><first>Anne</first><last>Abeillé</last></author>
       <author><first>Lionel</first><last>Clément</last></author>
       <author><first>Alexandra</first><last>Kinyon</last></author>
       <title>Building a Treebank for <fixed-case>F</fixed-case>rench</title>
@@ -1440,33 +1440,33 @@
     </paper>
     <paper id="179">
       <author><first>Luisa</first><last>Bentivogli</last></author>
-      <author><first>Emanuele</first><last>Pianta</last></author>
-      <author><first>Fabio</first><last>Pianesi</last></author>
+      <author id="emanuele-pianta"><first>Emanuele</first><last>Pianta</last></author>
+      <author id="fabio-pianesi"><first>Fabio</first><last>Pianesi</last></author>
       <title>Coping with Lexical Gaps when Building Aligned Multilingual Wordnets</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/236.pdf</url>
       <bibkey>bentivogli-etal-2000-coping</bibkey>
     </paper>
     <paper id="180">
       <author><first>Young-Soog</first><last>Chae</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <title>Design and Construction of Knowledge base for Verb using <fixed-case>MRD</fixed-case> and Tagged Corpus</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/237.pdf</url>
       <bibkey>chae-choi-2000-design</bibkey>
     </paper>
     <paper id="181">
       <author><first>Young-Soog</first><last>Chae</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <title>Introduction of <fixed-case>KIBS</fixed-case> (<fixed-case>K</fixed-case>orean Information Base System) Project</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/239.pdf</url>
       <bibkey>chae-choi-2000-introduction</bibkey>
     </paper>
     <paper id="182">
-      <author><first>John</first><last>Bateman</last></author>
+      <author id="john-bateman"><first>John</first><last>Bateman</last></author>
       <author><first>Elke</first><last>Teich</last></author>
-      <author><first>Geert-Jan</first><last>Kruijff</last></author>
-      <author><first>Ivana</first><last>Kruijff-Korbayová</last></author>
+      <author id="geert-jan-m-kruijff"><first>Geert-Jan</first><last>Kruijff</last></author>
+      <author id="ivana-kruijff-korbayova"><first>Ivana</first><last>Kruijff-Korbayová</last></author>
       <author><first>Serge</first><last>Sharoff</last></author>
-      <author><first>Hana</first><last>Skoumalová</last></author>
+      <author id="hana-skoumalova"><first>Hana</first><last>Skoumalová</last></author>
       <title>Resources for Multilingual Text Generation in Three <fixed-case>S</fixed-case>lavic Languages</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/241.pdf</url>
       <bibkey>bateman-etal-2000-resources</bibkey>
@@ -1479,23 +1479,23 @@
       <bibkey>gibbon-trippel-2000-multi</bibkey>
     </paper>
     <paper id="184">
-      <author><first>Lynne</first><last>Cahill</last></author>
+      <author id="lynne-cahill"><first>Lynne</first><last>Cahill</last></author>
       <author><first>Christy</first><last>Doran</last></author>
-      <author><first>Roger</first><last>Evans</last></author>
-      <author><first>Rodger</first><last>Kibble</last></author>
-      <author><first>Chris</first><last>Mellish</last></author>
+      <author id="roger-evans"><first>Roger</first><last>Evans</last></author>
+      <author id="rodger-kibble"><first>Rodger</first><last>Kibble</last></author>
+      <author id="chris-mellish"><first>Chris</first><last>Mellish</last></author>
       <author id="daniel-paiva"><first>D.</first><last>Paiva</last></author>
-      <author><first>Mike</first><last>Reape</last></author>
-      <author><first>Donia</first><last>Scott</last></author>
-      <author><first>Neil</first><last>Tipper</last></author>
+      <author id="mike-reape"><first>Mike</first><last>Reape</last></author>
+      <author id="donia-scott"><first>Donia</first><last>Scott</last></author>
+      <author id="neil-tipper"><first>Neil</first><last>Tipper</last></author>
       <title>Enabling Resource Sharing in Language Generation: an Abstract Reference Architecture</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/244.pdf</url>
       <bibkey>cahill-etal-2000-enabling</bibkey>
     </paper>
     <paper id="185">
-      <author><first>Zdravko</first><last>Kačič</last></author>
+      <author id="zdravko-kacic"><first>Zdravko</first><last>Kačič</last></author>
       <author><first>Bogomir</first><last>Horvat</last></author>
-      <author><first>Aleksandra</first><last>Zögling</last></author>
+      <author id="aleksandra-zogling-markus"><first>Aleksandra</first><last>Zögling</last></author>
       <title>Issues in Design and Collection of Large Telephone Speech Corpus for <fixed-case>S</fixed-case>lovenian Language</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/246.pdf</url>
       <bibkey>kacic-etal-2000-issues</bibkey>
@@ -1508,17 +1508,17 @@
       <bibkey>jouis-arc-a3-2000-arc</bibkey>
     </paper>
     <paper id="187">
-      <author><first>Richard F. E.</first><last>Sutcliffe</last></author>
+      <author id="richard-f-e-sutcliffe"><first>Richard F. E.</first><last>Sutcliffe</last></author>
       <author><first>Sadao</first><last>Kurohashi</last></author>
       <title>A Parallel <fixed-case>E</fixed-case>nglish-<fixed-case>J</fixed-case>apanese Query Collection for the Evaluation of On-Line Help Systems</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/248.pdf</url>
       <bibkey>sutcliffe-kurohashi-2000-parallel</bibkey>
     </paper>
     <paper id="188">
-      <author><first>Dan</first><last>Tufiş</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufiş</last></author>
       <author><first>Péter</first><last>Dienes</last></author>
       <author><first>Csaba</first><last>Oravecz</last></author>
-      <author><first>Tamás</first><last>Váradi</last></author>
+      <author id="tamas-varadi"><first>Tamás</first><last>Váradi</last></author>
       <title>Principled Hidden Tagset Design for Tiered Tagging of <fixed-case>H</fixed-case>ungarian</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/249.pdf</url>
       <bibkey>tufis-etal-2000-principled</bibkey>
@@ -1526,9 +1526,9 @@
     <paper id="189">
       <author><first>Felisa</first><last>Verdejo</last></author>
       <author><first>Julio</first><last>Gonzalo</last></author>
-      <author><first>Anselmo</first><last>Peñas</last></author>
+      <author id="anselmo-penas"><first>Anselmo</first><last>Peñas</last></author>
       <author><first>Fernando</first><last>López</last></author>
-      <author><first>David</first><last>Fernández</last></author>
+      <author id="david-fernandez-amoros"><first>David</first><last>Fernández</last></author>
       <title>Evaluating Wordnets in Cross-language Information Retrieval: the <fixed-case>ITEM</fixed-case> Search Engine</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/250.pdf</url>
       <bibkey>verdejo-etal-2000-evaluating</bibkey>
@@ -1555,9 +1555,9 @@
       <bibkey>milde-reinsch-2000-universal</bibkey>
     </paper>
     <paper id="193">
-      <author><first>Helka</first><last>Folch</last></author>
+      <author id="helka-folch"><first>Helka</first><last>Folch</last></author>
       <author><first>Serge</first><last>Heiden</last></author>
-      <author><first>Benoît</first><last>Habert</last></author>
+      <author id="benoit-habert"><first>Benoît</first><last>Habert</last></author>
       <author><first>Serge</first><last>Fleury</last></author>
       <author><first>Gabriel</first><last>Illouz</last></author>
       <author><first>Pierre</first><last>Lafon</last></author>
@@ -1579,7 +1579,7 @@
     </paper>
     <paper id="195">
       <author><first>Luzia</first><last>Wittmann</last></author>
-      <author><first>Ricardo Daniel</first><last>Ribeiro</last></author>
+      <author id="ricardo-ribeiro"><first>Ricardo Daniel</first><last>Ribeiro</last></author>
       <author><first>Tânia</first><last>Pêgo</last></author>
       <author><first>Fernando</first><last>Batista</last></author>
       <title>Some Language Resources and Tools for Computational Processing of <fixed-case>P</fixed-case>ortuguese at <fixed-case>INESC</fixed-case></title>
@@ -1594,7 +1594,7 @@
       <bibkey>utsuro-sassano-2000-minimally</bibkey>
     </paper>
     <paper id="197">
-      <author><first>Joyce Yue</first><last>Chai</last></author>
+      <author id="joyce-chai"><first>Joyce Yue</first><last>Chai</last></author>
       <title>Evaluation of a Generic Lexical Semantic Resource in Information Extraction</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/259.pdf</url>
       <bibkey>chai-2000-evaluation</bibkey>
@@ -1609,12 +1609,12 @@
       <author><first>Katsunobu</first><last>Itou</last></author>
       <author><first>Kiyohiro</first><last>Shikano</last></author>
       <author><first>Tatsuya</first><last>Kawahara</last></author>
-      <author><first>Kasuya</first><last>Takeda</last></author>
+      <author id="kazuya-takeda"><first>Kasuya</first><last>Takeda</last></author>
       <author><first>Atsushi</first><last>Yamada</last></author>
       <author><first>Akinori</first><last>Itou</last></author>
       <author><first>Takehito</first><last>Utsuro</last></author>
       <author><first>Tetsunori</first><last>Kobayashi</last></author>
-      <author><first>Nobuaki</first><last>Minematsu</last></author>
+      <author id="nobuaki-minematsu"><first>Nobuaki</first><last>Minematsu</last></author>
       <author><first>Mikio</first><last>Yamamoto</last></author>
       <author><first>Shigeki</first><last>Sagayama</last></author>
       <author><first>Akinobu</first><last>Lee</last></author>
@@ -1623,20 +1623,20 @@
       <bibkey>itou-etal-2000-ipa</bibkey>
     </paper>
     <paper id="200">
-      <author><first>Kikuo</first><last>Maekawa</last></author>
-      <author><first>Hanae</first><last>Koiso</last></author>
-      <author><first>Sadaoki</first><last>Furui</last></author>
+      <author id="kikuo-maekawa"><first>Kikuo</first><last>Maekawa</last></author>
+      <author id="hanae-koiso"><first>Hanae</first><last>Koiso</last></author>
+      <author id="sadaoki-furui"><first>Sadaoki</first><last>Furui</last></author>
       <author><first>Hitoshi</first><last>Isahara</last></author>
       <title>Spontaneous Speech Corpus of <fixed-case>J</fixed-case>apanese</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/262.pdf</url>
       <bibkey>maekawa-etal-2000-spontaneous</bibkey>
     </paper>
     <paper id="201">
-      <author><first>Sean</first><last>Boisen</last></author>
-      <author><first>Michael R.</first><last>Crystal</last></author>
-      <author><first>Richard</first><last>Schwartz</last></author>
+      <author id="sean-boisen"><first>Sean</first><last>Boisen</last></author>
+      <author id="michael-crystal"><first>Michael R.</first><last>Crystal</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
       <author><first>Rebecca</first><last>Stone</last></author>
-      <author><first>Ralph</first><last>Weischedel</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
       <title>Annotating Resources for Information Extraction</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/263.pdf</url>
       <bibkey>boisen-etal-2000-annotating</bibkey>
@@ -1657,16 +1657,16 @@
     </paper>
     <paper id="204">
       <author><first>Constandina</first><last>Economou</last></author>
-      <author><first>Spyros</first><last>Raptis</last></author>
+      <author id="spyros-raptis"><first>Spyros</first><last>Raptis</last></author>
       <author id="gregory-stainhauer"><first>Gregory</first><last>Stainhaouer</last></author>
       <title><fixed-case>LEXIPLOIGISSI</fixed-case>: An Educational Platform for the Teaching of Terminology in <fixed-case>G</fixed-case>reece</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/271.pdf</url>
       <bibkey>economou-etal-2000-lexiploigissi</bibkey>
     </paper>
     <paper id="205">
-      <author><first>Malgorzata</first><last>Marciniak</last></author>
+      <author id="malgorzata-marciniak"><first>Malgorzata</first><last>Marciniak</last></author>
       <author><first>Agnieszka</first><last>Mykowiecka</last></author>
-      <author><first>Anna</first><last>Kupść</last></author>
+      <author id="anna-kupsc"><first>Anna</first><last>Kupść</last></author>
       <author><first>Adam</first><last>Przepiórkowski</last></author>
       <title>An <fixed-case>HPSG</fixed-case>-Annotated Test Suite for <fixed-case>P</fixed-case>olish</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/272.pdf</url>
@@ -1677,7 +1677,7 @@
       <author><first>Narada</first><last>Warakagoda</last></author>
       <author><first>Børge</first><last>Lindberg</last></author>
       <author><first>Gunnar</first><last>Lehtinen</last></author>
-      <author><first>Zdravko</first><last>Kačič</last></author>
+      <author id="zdravko-kacic"><first>Zdravko</first><last>Kačič</last></author>
       <author><first>Andrej</first><last>Žgank</last></author>
       <author><first>Kjell</first><last>Elenius</last></author>
       <author><first>Giampiero</first><last>Salvi</last></author>
@@ -1693,7 +1693,7 @@
       <bibkey>katsoyannou-efthimiou-2000-terminology</bibkey>
     </paper>
     <paper id="208">
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <author><first>Young-Soog</first><last>Chae</last></author>
       <title>Terminology in <fixed-case>K</fixed-case>orea: <fixed-case>KORTERM</fixed-case></title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/276.pdf</url>
@@ -1706,16 +1706,16 @@
       <bibkey>birocheau-2000-morphological</bibkey>
     </paper>
     <paper id="210">
-      <author><first>Sonja</first><last>Nießen</last></author>
-      <author><first>Franz Josef</first><last>Och</last></author>
+      <author id="sonja-niessen"><first>Sonja</first><last>Nießen</last></author>
+      <author id="franz-josef-och"><first>Franz Josef</first><last>Och</last></author>
       <author><first>Gregor</first><last>Leusch</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <title>An Evaluation Tool for Machine Translation: Fast Evaluation for <fixed-case>MT</fixed-case> Research</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/278.pdf</url>
       <bibkey>niessen-etal-2000-evaluation</bibkey>
     </paper>
     <paper id="211">
-      <author><first>Fiammetta</first><last>Namer</last></author>
+      <author id="fiammetta-namer"><first>Fiammetta</first><last>Namer</last></author>
       <author><first>Georgette</first><last>Dal</last></author>
       <title><fixed-case>G</fixed-case>é<fixed-case>D</fixed-case>éri<fixed-case>F</fixed-case>: Automatic Generation and Analysis of Morphologically Constructed Lexical Resources</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/279.pdf</url>
@@ -1737,7 +1737,7 @@
     </paper>
     <paper id="214">
       <author><first>Gerhard</first><last>Budin</last></author>
-      <author><first>Alan K.</first><last>Melby</last></author>
+      <author id="alan-k-melby"><first>Alan K.</first><last>Melby</last></author>
       <title>Accessibility of Multilingual Terminological Resources - Current Problems and Prospects for the Future</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/283.pdf</url>
       <bibkey>budin-melby-2000-accessibility</bibkey>
@@ -1745,37 +1745,37 @@
     <paper id="215">
       <author><first>Bilel</first><last>Gargouri</last></author>
       <author><first>Mohamed</first><last>Jmaiel</last></author>
-      <author><first>Abdelmajid</first><last>Ben Hamadou</last></author>
+      <author id="abdelmajid-ben-hamadou"><first>Abdelmajid</first><last>Ben Hamadou</last></author>
       <title>Using a Formal Approach to Evaluate Grammars</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/285.pdf</url>
       <bibkey>gargouri-etal-2000-using</bibkey>
     </paper>
     <paper id="216">
-      <author><first>Alvin</first><last>Martin</last></author>
-      <author><first>Mark</first><last>Przybocki</last></author>
+      <author id="alvin-martin"><first>Alvin</first><last>Martin</last></author>
+      <author id="mark-przybocki"><first>Mark</first><last>Przybocki</last></author>
       <title>Design Issues in Text-Independent Speaker Recognition Evaluation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/286.pdf</url>
       <bibkey>martin-przybocki-2000-design</bibkey>
     </paper>
     <paper id="217">
       <author><first>Fei</first><last>Xia</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Nianwen</first><last>Xue</last></author>
       <author><first>Mary Ellen</first><last>Okurowski</last></author>
-      <author><first>John</first><last>Kovarik</last></author>
+      <author id="john-j-kovarik"><first>John</first><last>Kovarik</last></author>
       <author><first>Fu-Dong</first><last>Chiou</last></author>
       <author><first>Shizhe</first><last>Huang</last></author>
       <author><first>Tony</first><last>Kroch</last></author>
-      <author><first>Mitch</first><last>Marcus</last></author>
+      <author id="mitch-marcus"><first>Mitch</first><last>Marcus</last></author>
       <title>Developing Guidelines and Ensuring Consistency for <fixed-case>C</fixed-case>hinese Text Annotation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/287.pdf</url>
       <bibkey>xia-etal-2000-developing</bibkey>
     </paper>
     <paper id="218">
-      <author><first>Jerneja</first><last>Gros</last></author>
-      <author><first>France</first><last>Mihelič</last></author>
-      <author><first>Simon</first><last>Dobrišek</last></author>
-      <author><first>Tomaž</first><last>Erjavec</last></author>
+      <author id="jerneja-gros"><first>Jerneja</first><last>Gros</last></author>
+      <author id="france-mihelic"><first>France</first><last>Mihelič</last></author>
+      <author id="simon-dobrisek"><first>Simon</first><last>Dobrišek</last></author>
+      <author id="tomaz-erjavec"><first>Tomaž</first><last>Erjavec</last></author>
       <author><first>Mario</first><last>Žganec</last></author>
       <title>Corpora of <fixed-case>S</fixed-case>lovene Spoken Language for Multi-lingual Applications</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/288.pdf</url>
@@ -1792,18 +1792,18 @@
       <bibkey>kavallieratou-etal-2000-gruhd</bibkey>
     </paper>
     <paper id="220">
-      <author><first>France</first><last>Mihelič</last></author>
-      <author><first>Jerneja</first><last>Gros</last></author>
-      <author><first>Elmar</first><last>Nöth</last></author>
+      <author id="france-mihelic"><first>France</first><last>Mihelič</last></author>
+      <author id="jerneja-gros"><first>Jerneja</first><last>Gros</last></author>
+      <author id="elmar-noth"><first>Elmar</first><last>Nöth</last></author>
       <author><first>Volker</first><last>Warnke</last></author>
       <title>Labeling of Prosodic Events in <fixed-case>S</fixed-case>lovenian Speech Database <fixed-case>GOPOLIS</fixed-case></title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/292.pdf</url>
       <bibkey>mihelic-etal-2000-labeling</bibkey>
     </paper>
     <paper id="221">
-      <author><first>Catia</first><last>Cucchiarini</last></author>
+      <author id="catia-cucchiarini"><first>Catia</first><last>Cucchiarini</last></author>
       <author><first>Johan</first><last>Van Hoorde</last></author>
-      <author><first>Elizabeth</first><last>D’Halleweyn</last></author>
+      <author id="elisabeth-dhalleweyn"><first>Elizabeth</first><last>D’Halleweyn</last></author>
       <title><fixed-case>NL</fixed-case>-Translex: Machine Translation for <fixed-case>D</fixed-case>utch</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/294.pdf</url>
       <bibkey>cucchiarini-etal-2000-nl</bibkey>
@@ -1817,16 +1817,16 @@
     <paper id="223">
       <author><first>Ángel Martín</first><last>Municio</last></author>
       <author><first>Guillermo</first><last>Rojo</last></author>
-      <author><first>Fernando Sánchez</first><last>León</last></author>
+      <author id="fernando-sanchez-leon"><first>Fernando Sánchez</first><last>León</last></author>
       <author><first>Octavio</first><last>Pinillos</last></author>
       <title>Language Resources Development at the <fixed-case>S</fixed-case>panish Royal Academy</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/297.pdf</url>
       <bibkey>municio-etal-2000-language</bibkey>
     </paper>
     <paper id="224">
-      <author><first>Irina</first><last>Prodanof</last></author>
-      <author><first>Amedeo</first><last>Cappelli</last></author>
-      <author><first>Lorenzo</first><last>Moretti</last></author>
+      <author id="irina-prodanof"><first>Irina</first><last>Prodanof</last></author>
+      <author id="amedeo-cappelli"><first>Amedeo</first><last>Cappelli</last></author>
+      <author id="lorenzo-moretti"><first>Lorenzo</first><last>Moretti</last></author>
       <title>Reusability as Easy Adaptability: A Substantial Advance in <fixed-case>NL</fixed-case> Technology</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/298.pdf</url>
       <bibkey>prodanof-etal-2000-reusability</bibkey>
@@ -1858,8 +1858,8 @@
     </paper>
     <paper id="228">
       <author><first>Aristomenis</first><last>Thanopoulos</last></author>
-      <author><first>Nikos</first><last>Fakotakis</last></author>
-      <author><first>George</first><last>Kokkinakis</last></author>
+      <author id="nikos-fakotakis"><first>Nikos</first><last>Fakotakis</last></author>
+      <author id="george-kokkinakis"><first>George</first><last>Kokkinakis</last></author>
       <title>Automatic Extraction of Semantic Similarity of Words from Raw Technical Texts</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/302.pdf</url>
       <bibkey>thanopoulos-etal-2000-automatic-extraction</bibkey>
@@ -1873,9 +1873,9 @@
       <bibkey>bonneau-maynard-etal-2000-predictive</bibkey>
     </paper>
     <paper id="230">
-      <author><first>Penny</first><last>Labropoulou</last></author>
+      <author id="penny-labropoulou"><first>Penny</first><last>Labropoulou</last></author>
       <author><first>Elena</first><last>Mantzari</last></author>
-      <author><first>Harris</first><last>Papageorgiou</last></author>
+      <author id="harris-papageorgiou"><first>Harris</first><last>Papageorgiou</last></author>
       <author><first>Maria</first><last>Gavrilidou</last></author>
       <title>Automatic Generation of Dictionary Definitions from a Computational Lexicon</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/306.pdf</url>
@@ -1905,7 +1905,7 @@
     </paper>
     <paper id="234">
       <author><first>Wolfgang</first><last>Menzel</last></author>
-      <author><first>Eric</first><last>Atwell</last></author>
+      <author id="eric-atwell"><first>Eric</first><last>Atwell</last></author>
       <author><first>Patrizia</first><last>Bonaventura</last></author>
       <author><first>Daniel</first><last>Herron</last></author>
       <author><first>Peter</first><last>Howarth</last></author>
@@ -1917,8 +1917,8 @@
     </paper>
     <paper id="235">
       <author><first>Kallirroi</first><last>Georgila</last></author>
-      <author><first>Nikos</first><last>Fakotakis</last></author>
-      <author><first>George</first><last>Kokkinakis</last></author>
+      <author id="nikos-fakotakis"><first>Nikos</first><last>Fakotakis</last></author>
+      <author id="george-kokkinakis"><first>George</first><last>Kokkinakis</last></author>
       <title>A Graphical Parametric Language-Independent Tool for the Annotation of Speech Corpora</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/314.pdf</url>
       <bibkey>georgila-etal-2000-graphical</bibkey>
@@ -1931,43 +1931,43 @@
     </paper>
     <paper id="237">
       <author><first>Stéphane</first><last>Chaudiron</last></author>
-      <author><first>Khalid</first><last>Choukri</last></author>
+      <author id="khalid-choukri"><first>Khalid</first><last>Choukri</last></author>
       <author><first>Audrey</first><last>Mance</last></author>
-      <author><first>Valérie</first><last>Mapelli</last></author>
+      <author id="valerie-mapelli"><first>Valérie</first><last>Mapelli</last></author>
       <title>For a Repository of <fixed-case>NLP</fixed-case> Tools</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/316.pdf</url>
       <bibkey>chaudiron-etal-2000-repository</bibkey>
     </paper>
     <paper id="238">
       <author><first>Jeffrey</first><last>Allen</last></author>
-      <author><first>Khalid</first><last>Choukri</last></author>
+      <author id="khalid-choukri"><first>Khalid</first><last>Choukri</last></author>
       <title>Survey of Language Engineering Needs: a Language Resources Perspective</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/317.pdf</url>
       <bibkey>allen-choukri-2000-survey</bibkey>
     </paper>
     <paper id="239">
-      <author><first>Jo</first><last>Calder</last></author>
+      <author id="jo-calder"><first>Jo</first><last>Calder</last></author>
       <title>Interarbora and Thistle - Delivering Linguistic Structure by the <fixed-case>I</fixed-case>nternet</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/319.pdf</url>
       <bibkey>calder-2000-interarbora</bibkey>
     </paper>
     <paper id="240">
-      <author><first>George</first><last>Demetriou</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="george-demetriou"><first>George</first><last>Demetriou</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <title>Automatically Augmenting Terminological Lexicons from Untagged Text</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/320.pdf</url>
       <bibkey>demetriou-gaizauskas-2000-automatically</bibkey>
     </paper>
     <paper id="241">
-      <author><first>Andrea</first><last>Setzer</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="andrea-setzer"><first>Andrea</first><last>Setzer</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <title>Annotating Events and Temporal Information in Newswire Texts</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/321.pdf</url>
       <bibkey>setzer-gaizauskas-2000-annotating</bibkey>
     </paper>
     <paper id="242">
-      <author><first>Bonnie J.</first><last>Dorr</last></author>
-      <author><first>Gina-Anne</first><last>Levow</last></author>
+      <author id="bonnie-dorr"><first>Bonnie J.</first><last>Dorr</last></author>
+      <author id="gina-anne-levow"><first>Gina-Anne</first><last>Levow</last></author>
       <author><first>Dekang</first><last>Lin</last></author>
       <author><first>Scott</first><last>Thomas</last></author>
       <title><fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish Semantic Resource Construction</title>
@@ -1976,7 +1976,7 @@
     </paper>
     <paper id="243">
       <author><first>Vera</first><last>Fluhr-Semenova</last></author>
-      <author><first>Christian</first><last>Fluhr</last></author>
+      <author id="christian-fluhr"><first>Christian</first><last>Fluhr</last></author>
       <author><first>Stéphanie</first><last>Brisson</last></author>
       <title>Production of <fixed-case>NLP</fixed-case>-oriented Bilingual Language Resources from Human-oriented dictionaries</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/328.pdf</url>
@@ -1991,17 +1991,17 @@
       <bibkey>roux-etal-2000-developing</bibkey>
     </paper>
     <paper id="245">
-      <author><first>Roberto</first><last>Basili</last></author>
-      <author><first>Maria Teresa</first><last>Pazienza</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
+      <author id="maria-teresa-pazienza"><first>Maria Teresa</first><last>Pazienza</last></author>
       <author><first>Michele</first><last>Vindigni</last></author>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last></author>
       <title>Tuning Lexicons to New Operational Scenarios</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/330.pdf</url>
       <bibkey>basili-etal-2000-tuning</bibkey>
     </paper>
     <paper id="246">
-      <author><first>José A.R.</first><last>Fonollosa</last></author>
-      <author><first>Asunción</first><last>Moreno</last></author>
+      <author id="jose-a-r-fonollosa"><first>José A.R.</first><last>Fonollosa</last></author>
+      <author id="asuncion-moreno"><first>Asunción</first><last>Moreno</last></author>
       <title><fixed-case>S</fixed-case>peech<fixed-case>D</fixed-case>at-Car Fixed Platform</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/331.pdf</url>
       <bibkey>fonollosa-moreno-2000-speechdat</bibkey>
@@ -2020,9 +2020,9 @@
       <bibkey>brants-plaehn-2000-interactive</bibkey>
     </paper>
     <paper id="249">
-      <author><first>Tomaž</first><last>Erjavec</last></author>
-      <author><first>Roger</first><last>Evans</last></author>
-      <author><first>Nancy</first><last>Ide</last></author>
+      <author id="tomaz-erjavec"><first>Tomaž</first><last>Erjavec</last></author>
+      <author id="roger-evans"><first>Roger</first><last>Evans</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
       <author><first>Adam</first><last>Kilgarriff</last></author>
       <title>The Concede Model for Lexical Databases</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/335.pdf</url>
@@ -2036,9 +2036,9 @@
       <author><first>Anastasia</first><last>Papakostopoulou</last></author>
       <author><first>Athanassia</first><last>Spiliotopoulou</last></author>
       <author><first>Anna</first><last>Vacalopoulou</last></author>
-      <author><first>Penny</first><last>Labropoulou</last></author>
+      <author id="penny-labropoulou"><first>Penny</first><last>Labropoulou</last></author>
       <author><first>Elena</first><last>Mantzari</last></author>
-      <author><first>Harris</first><last>Papageorgiou</last></author>
+      <author id="harris-papageorgiou"><first>Harris</first><last>Papageorgiou</last></author>
       <author><first>Iason</first><last>Demiros</last></author>
       <title>Design and Implementation of the Online <fixed-case>ILSP</fixed-case> <fixed-case>G</fixed-case>reek Corpus</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/336.pdf</url>
@@ -2079,20 +2079,20 @@
     <paper id="256">
       <author><first>David</first><last>Portabella</last></author>
       <author><first>Albert</first><last>Febrer</last></author>
-      <author><first>Asunción</first><last>Moreno</last></author>
+      <author id="asuncion-moreno"><first>Asunción</first><last>Moreno</last></author>
       <title><fixed-case>N</fixed-case>ani<fixed-case>T</fixed-case>rans: a Speech Labelling Tool</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/345.pdf</url>
       <bibkey>portabella-etal-2000-nanitrans</bibkey>
     </paper>
     <paper id="257">
-      <author><first>Sanda M.</first><last>Harabagiu</last></author>
-      <author><first>Steven J.</first><last>Maiorano</last></author>
+      <author id="sanda-harabagiu"><first>Sanda M.</first><last>Harabagiu</last></author>
+      <author id="steven-j-maiorano"><first>Steven J.</first><last>Maiorano</last></author>
       <title>Acquisition of Linguistic Patterns for Knowledge-based Information Extraction</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/347.pdf</url>
       <bibkey>harabagiu-maiorano-2000-acquisition</bibkey>
     </paper>
     <paper id="258">
-      <author><first>Elisabeth</first><last>D’Halleweyn</last></author>
+      <author id="elisabeth-dhalleweyn"><first>Elisabeth</first><last>D’Halleweyn</last></author>
       <author><first>Erwin</first><last>Dewallef</last></author>
       <author><first>Jeannine</first><last>Beeken</last></author>
       <title>A Platform for <fixed-case>D</fixed-case>utch in Human Language Technologies</title>
@@ -2100,16 +2100,16 @@
       <bibkey>dhalleweyn-etal-2000-platform</bibkey>
     </paper>
     <paper id="259">
-      <author><first>Marilyn</first><last>Walker</last></author>
-      <author><first>Candace</first><last>Kamm</last></author>
-      <author><first>Julie</first><last>Boland</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
+      <author id="candace-a-kamm"><first>Candace</first><last>Kamm</last></author>
+      <author id="julie-e-boland"><first>Julie</first><last>Boland</last></author>
       <title>Developing and Testing General Models of Spoken Dialogue System Peformance</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/349.pdf</url>
       <bibkey>walker-etal-2000-developing</bibkey>
     </paper>
     <paper id="260">
-      <author><first>Claude</first><last>de Loupy</last></author>
-      <author><first>Marc</first><last>El-Bèze</last></author>
+      <author id="claude-de-loupy"><first>Claude</first><last>de Loupy</last></author>
+      <author id="marc-el-beze"><first>Marc</first><last>El-Bèze</last></author>
       <title>Using Few Clues Can Compensate the Small Amount of Resources Available for Word Sense Disambiguation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/350.pdf</url>
       <bibkey>de-loupy-el-beze-2000-using</bibkey>
@@ -2122,15 +2122,15 @@
       <bibkey>mikros-carayannis-2000-modern</bibkey>
     </paper>
     <paper id="262">
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <title>Language Resources as by-Product of Evaluation: The <fixed-case>MULTITAG</fixed-case> Example</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/353.pdf</url>
       <bibkey>paroubek-2000-language</bibkey>
     </paper>
     <paper id="263">
-      <author><first>Judith L.</first><last>Klavans</last></author>
+      <author id="judith-l-klavans"><first>Judith L.</first><last>Klavans</last></author>
       <author><first>Nina</first><last>Wacholder</last></author>
-      <author><first>David K.</first><last>Evans</last></author>
+      <author id="david-k-evans"><first>David K.</first><last>Evans</last></author>
       <title>Evaluation of Computational Linguistic Techniques for Identifying Significant Topics for Browsing Applications</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/355.pdf</url>
       <bibkey>klavans-etal-2000-evaluation</bibkey>
@@ -2146,8 +2146,8 @@
       <bibkey>nakamura-etal-2000-acoustical</bibkey>
     </paper>
     <paper id="265">
-      <author><first>George</first><last>Demetriou</last></author>
-      <author><first>Eric</first><last>Atwell</last></author>
+      <author id="george-demetriou"><first>George</first><last>Demetriou</last></author>
+      <author id="eric-atwell"><first>Eric</first><last>Atwell</last></author>
       <author><first>Clive</first><last>Souter</last></author>
       <title>Using Lexical Semantic Knowledge from Machine Readable Dictionaries for Domain Independent Language Modelling</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/357.pdf</url>
@@ -2178,7 +2178,7 @@
       <bibkey>hofland-2000-self</bibkey>
     </paper>
     <paper id="269">
-      <author><first>Janne Bondi</first><last>Johannessen</last></author>
+      <author id="janne-bondi-johannessen"><first>Janne Bondi</first><last>Johannessen</last></author>
       <author><first>Anders</first><last>Nøklestad</last></author>
       <author><first>Kristin</first><last>Hagen</last></author>
       <title>A Web-based Advanced and User Friendly System: The <fixed-case>O</fixed-case>slo Corpus of Tagged <fixed-case>N</fixed-case>orwegian Texts</title>
@@ -2193,7 +2193,7 @@
     </paper>
     <paper id="271">
       <author><first>Ivonne</first><last>Peters</last></author>
-      <author><first>Wim</first><last>Peters</last></author>
+      <author id="wim-peters"><first>Wim</first><last>Peters</last></author>
       <title>The Treatment of Adjectives in <fixed-case>SIMPLE</fixed-case>: Theoretical Observations</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/366.pdf</url>
       <bibkey>peters-peters-2000-treatment</bibkey>
@@ -2205,12 +2205,12 @@
       <bibkey>michel-2000-cardinal</bibkey>
     </paper>
     <paper id="273">
-      <author><first>Laurie E.</first><last>Damianos</last></author>
+      <author id="laurie-damianos"><first>Laurie E.</first><last>Damianos</last></author>
       <author><first>Jill</first><last>Drury</last></author>
       <author><first>Tari</first><last>Fanderclai</last></author>
-      <author><first>Lynette</first><last>Hirschman</last></author>
+      <author id="lynette-hirschman"><first>Lynette</first><last>Hirschman</last></author>
       <author><first>Jeff</first><last>Kurtz</last></author>
-      <author><first>Beatrice</first><last>Oshika</last></author>
+      <author id="beatrice-oshika"><first>Beatrice</first><last>Oshika</last></author>
       <title>Evaluating Multi-party Multi-modal Systems</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/368.pdf</url>
       <bibkey>damianos-etal-2000-evaluating</bibkey>
@@ -2222,14 +2222,14 @@
       <bibkey>kunze-2000-extension</bibkey>
     </paper>
     <paper id="275">
-      <author><first>Serge A.</first><last>Yablonsky</last></author>
+      <author id="serge-a-yablonsky"><first>Serge A.</first><last>Yablonsky</last></author>
       <title><fixed-case>R</fixed-case>ussian Monitor Corpora: Composition, Linguistic Encoding and <fixed-case>I</fixed-case>nternet Publication</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/370.pdf</url>
       <bibkey>yablonsky-2000-russian</bibkey>
     </paper>
     <paper id="276">
       <author><first>Ann</first><last>Copestake</last></author>
-      <author><first>Dan</first><last>Flickinger</last></author>
+      <author id="dan-flickinger"><first>Dan</first><last>Flickinger</last></author>
       <title>An Open Source Grammar Development Environment and Broad-coverage <fixed-case>E</fixed-case>nglish Grammar Using <fixed-case>HPSG</fixed-case></title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/371.pdf</url>
       <bibkey>copestake-flickinger-2000-open</bibkey>
@@ -2237,7 +2237,7 @@
     <paper id="277">
       <author><last>Sun</last><first>Maosong</first></author>
       <author><last>Sun</last><first>Honglin</first></author>
-      <author><last>Huang</last><first>Changning</first></author>
+      <author id="changning-huang"><first>Changning</first><last>Huang</last></author>
       <author><last>Zhang</last><first>Pu</first></author>
       <author><last>Xing</last><first>Hongbing</first></author>
       <author><last>Zhou</last><first>Qiang</first></author>
@@ -2246,11 +2246,11 @@
       <bibkey>sun-etal-2000-hua</bibkey>
     </paper>
     <paper id="278">
-      <author><first>Asunción</first><last>Moreno</last></author>
+      <author id="asuncion-moreno"><first>Asunción</first><last>Moreno</last></author>
       <author><first>Børge</first><last>Lindberg</last></author>
       <author><first>Christoph</first><last>Draxler</last></author>
       <author><first>Gaël</first><last>Richard</last></author>
-      <author><first>Khalid</first><last>Choukri</last></author>
+      <author id="khalid-choukri"><first>Khalid</first><last>Choukri</last></author>
       <author><first>Stephan</first><last>Euler</last></author>
       <author><first>Jeffrey</first><last>Allen</last></author>
       <title><fixed-case>SPEECHDAT</fixed-case>-<fixed-case>CAR</fixed-case>. A Large Speech Database for Automotive Environments</title>
@@ -2266,9 +2266,9 @@
       <bibkey>turrini-etal-2000-addizionario</bibkey>
     </paper>
     <paper id="280">
-      <author><first>Khalid</first><last>Choukri</last></author>
+      <author id="khalid-choukri"><first>Khalid</first><last>Choukri</last></author>
       <author><first>Audrey</first><last>Mance</last></author>
-      <author><first>Valérie</first><last>Mapelli</last></author>
+      <author id="valerie-mapelli"><first>Valérie</first><last>Mapelli</last></author>
       <title>Recent Developments within the <fixed-case>E</fixed-case>uropean Language Resources Association (<fixed-case>ELRA</fixed-case>)</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2000/pdf/377.pdf</url>
       <bibkey>choukri-etal-2000-recent</bibkey>
diff --git a/data/xml/L02.xml b/data/xml/L02.xml
index 239b451b3b..f183b0df93 100644
--- a/data/xml/L02.xml
+++ b/data/xml/L02.xml
@@ -16,7 +16,7 @@
     </frontmatter>
     <paper id="1">
       <author><first>Susana</first><last>Afonso</last></author>
-      <author><first>Eckhard</first><last>Bick</last></author>
+      <author id="eckhard-bick"><first>Eckhard</first><last>Bick</last></author>
       <author><first>Renato</first><last>Haber</last></author>
       <author><first>Diana</first><last>Santos</last></author>
       <title>Floresta Sintá(c)tica: A treebank for <fixed-case>P</fixed-case>ortuguese</title>
@@ -47,9 +47,9 @@
       <bibkey>vandeghinste-2002-lexicon</bibkey>
     </paper>
     <paper id="5">
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <author><first>Margaret</first><last>King</last></author>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <title>Computer-Aided Specification of Quality Models for Machine Translation Evaluation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/5.pdf</url>
       <bibkey>hovy-etal-2002-computer</bibkey>
@@ -62,15 +62,15 @@
     </paper>
     <paper id="7">
       <author><first>Min-Yen</first><last>Kan</last></author>
-      <author><first>Judith L.</first><last>Klavans</last></author>
-      <author><first>Kathleen R.</first><last>McKeown</last></author>
+      <author id="judith-l-klavans"><first>Judith L.</first><last>Klavans</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen R.</first><last>McKeown</last></author>
       <title>Using the Annotated Bibliography as a Resource for Indicative Summarization</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/7.pdf</url>
       <bibkey>kan-etal-2002-using</bibkey>
     </paper>
     <paper id="8">
       <author><first>Choy-Kim</first><last>Chuah</last></author>
-      <author><first>Zaharin</first><last>Yusoff</last></author>
+      <author id="zaharin-yusoff"><first>Zaharin</first><last>Yusoff</last></author>
       <title>Computational Linguistics at Universiti Sains <fixed-case>M</fixed-case>alaysia</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/8.pdf</url>
       <bibkey>chuah-yusoff-2002-computational</bibkey>
@@ -78,7 +78,7 @@
     <paper id="9">
       <author><first>Judit</first><last>Feliu</last></author>
       <author><first>Jorge</first><last>Vivaldi</last></author>
-      <author><first>M. Teresa</first><last>Cabré</last></author>
+      <author id="maria-teresa-cabre"><first>M. Teresa</first><last>Cabré</last></author>
       <title>Towards an Ontology for a Human Genome Knowledge Base</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/9.pdf</url>
       <bibkey>feliu-etal-2002-towards</bibkey>
@@ -100,7 +100,7 @@
       <bibkey>markert-nissim-2002-towards</bibkey>
     </paper>
     <paper id="12">
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <author><first>Marie</first><last>Loranger</last></author>
       <author><first>Guy</first><last>Lapalme</last></author>
       <title>Translators at work with <fixed-case>TRANSTYPE</fixed-case>: Resource and Evaluation.</title>
@@ -109,7 +109,7 @@
     </paper>
     <paper id="13">
       <author><first>Qiang</first><last>Zhou</last></author>
-      <author><first>Elliott Franco</first><last>Drabek</last></author>
+      <author id="elliott-franco-drabek"><first>Elliott Franco</first><last>Drabek</last></author>
       <author><first>Fuji</first><last>Ren</last></author>
       <title>Annotating the functional chunks in <fixed-case>C</fixed-case>hinese sentences</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/13.pdf</url>
@@ -117,11 +117,11 @@
     </paper>
     <paper id="14">
       <author><first>Hisao</first><last>Kuwabara</last></author>
-      <author><first>Shuich</first><last>Itahashi</last></author>
+      <author id="shuichi-itahashi"><first>Shuich</first><last>Itahashi</last></author>
       <author><first>Mikio</first><last>Yamamoto</last></author>
       <author><first>Toshiyuki</first><last>Takezawa</last></author>
       <author><first>Satoshi</first><last>Nakamura</last></author>
-      <author><first>Kazuya</first><last>Takeda</last></author>
+      <author id="kazuya-takeda"><first>Kazuya</first><last>Takeda</last></author>
       <title>The Present Status of Speech Database in <fixed-case>J</fixed-case>apan: Development, Management, and Application to Speech Research</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/14.pdf</url>
       <bibkey>kuwabara-etal-2002-present</bibkey>
@@ -134,8 +134,8 @@
       <bibkey>santos-gasperin-2002-evaluation</bibkey>
     </paper>
     <paper id="16">
-      <author><first>Laura</first><last>Docío-Fernández</last></author>
-      <author><first>Carmen</first><last>García-Mateo</last></author>
+      <author id="laura-docio-fernandez"><first>Laura</first><last>Docío-Fernández</last></author>
+      <author id="carmen-garcia-mateo"><first>Carmen</first><last>García-Mateo</last></author>
       <title>Acoustic Modeling and Training of a Bilingual <fixed-case>ASR</fixed-case> System when a Minority Language is Involved</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/16.pdf</url>
       <bibkey>docio-fernandez-garcia-mateo-2002-acoustic</bibkey>
@@ -150,7 +150,7 @@
     </paper>
     <paper id="18">
       <author><first>Jakub</first><last>Piskorski</last></author>
-      <author><first>Witold</first><last>Drożdżyński</last></author>
+      <author id="witold-drozdzynski"><first>Witold</first><last>Drożdżyński</last></author>
       <author><first>Oliver</first><last>Scherf</last></author>
       <author><first>Feiyu</first><last>Xu</last></author>
       <title>A Flexible <fixed-case>XML</fixed-case>-based Regular Compiler for Creation and Conversion of Linguistic Resources</title>
@@ -172,7 +172,7 @@
       <bibkey>draxler-schiel-2002-three</bibkey>
     </paper>
     <paper id="21">
-      <author><first>René</first><last>Schneider</last></author>
+      <author id="rene-schneider"><first>René</first><last>Schneider</last></author>
       <title>n-grams of Seeds: A Hybrid System for Corpus-Based Text Summarization</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/21.pdf</url>
       <bibkey>schneider-2002-n</bibkey>
@@ -184,7 +184,7 @@
       <bibkey>schiffman-2002-building</bibkey>
     </paper>
     <paper id="23">
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <title>A Subcategorisation Lexicon for <fixed-case>G</fixed-case>erman Verbs induced from a Lexicalised <fixed-case>PCFG</fixed-case></title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/23.pdf</url>
       <bibkey>schulte-im-walde-2002-subcategorisation</bibkey>
@@ -219,13 +219,13 @@
       <bibkey>murata-isahara-2002-automatic</bibkey>
     </paper>
     <paper id="28">
-      <author><first>Fabio</first><last>Tamburini</last></author>
+      <author id="fabio-tamburini"><first>Fabio</first><last>Tamburini</last></author>
       <title>Automatic detection of prosodic prominence in continuous speech</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/28.pdf</url>
       <bibkey>tamburini-2002-automatic</bibkey>
     </paper>
     <paper id="29">
-      <author><first>Fabio</first><last>Tamburini</last></author>
+      <author id="fabio-tamburini"><first>Fabio</first><last>Tamburini</last></author>
       <title>A dynamic model for reference corpora structure definition</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/29.pdf</url>
       <bibkey>tamburini-2002-dynamic</bibkey>
@@ -239,9 +239,9 @@
     </paper>
     <paper id="31">
       <author><first>Javier</first><last>Caminero</last></author>
-      <author><first>Joaquín</first><last>González-Rodríguez</last></author>
-      <author><first>Javier</first><last>Ortega-García</last></author>
-      <author><first>Daniel</first><last>Tapias</last></author>
+      <author id="joaquin-gonzalez-rodriguez"><first>Joaquín</first><last>González-Rodríguez</last></author>
+      <author id="javier-ortega-garcia"><first>Javier</first><last>Ortega-García</last></author>
+      <author id="daniel-tapias"><first>Daniel</first><last>Tapias</last></author>
       <author><first>Pedro M.</first><last>Ruz</last></author>
       <author><first>Mercedes</first><last>Solá</last></author>
       <title>A Multilingual Speaker Verification System: Architecture and Performance Evaluation</title>
@@ -249,7 +249,7 @@
       <bibkey>caminero-etal-2002-multilingual</bibkey>
     </paper>
     <paper id="32">
-      <author><first>Dan</first><last>Tufiş</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufiş</last></author>
       <author><first>Ana-Maria</first><last>Barbu</last></author>
       <title>Lexical token alignment: experiments, results and applications</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/32.pdf</url>
@@ -266,8 +266,8 @@
     <paper id="34">
       <author><first>Nadjet</first><last>Bouayad-Agha</last></author>
       <author><first>Richard</first><last>Power</last></author>
-      <author><first>Donia</first><last>Scott</last></author>
-      <author><first>Anja</first><last>Belz</last></author>
+      <author id="donia-scott"><first>Donia</first><last>Scott</last></author>
+      <author id="anja-belz"><first>Anja</first><last>Belz</last></author>
       <title><fixed-case>PILLS</fixed-case>: Multilingual generation of medical information documents with overlapping content</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/34.pdf</url>
       <bibkey>bouayad-agha-etal-2002-pills</bibkey>
@@ -276,7 +276,7 @@
       <author><first>Felix</first><last>Sasaki</last></author>
       <author><first>Claudia</first><last>Wegener</last></author>
       <author><first>Andreas</first><last>Witt</last></author>
-      <author><first>Dieter</first><last>Metzing</last></author>
+      <author id="dieter-metzing"><first>Dieter</first><last>Metzing</last></author>
       <author><first>Jens</first><last>Pönninghaus</last></author>
       <title>Co-reference annotation and resources: A multilingual corpus of typologically diverse languages</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/35.pdf</url>
@@ -284,7 +284,7 @@
     </paper>
     <paper id="36">
       <author><first>Udo</first><last>Hahn</last></author>
-      <author><first>Stefan</first><last>Schulz</last></author>
+      <author id="stefan-schulz"><first>Stefan</first><last>Schulz</last></author>
       <title>Towards Very Large Ontologies for Medical Language Processing</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/36.pdf</url>
       <bibkey>hahn-schulz-2002-towards</bibkey>
@@ -304,13 +304,13 @@
       <bibkey>alfonseca-manandhar-2002-proposal</bibkey>
     </paper>
     <paper id="39">
-      <author><first>Matthieu</first><last>Constant</last></author>
+      <author id="matthieu-constant"><first>Matthieu</first><last>Constant</last></author>
       <title>Methods for Constructing Lexicon-Grammar Resources: The Example of Measure Expressions</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/39.pdf</url>
       <bibkey>constant-2002-methods</bibkey>
     </paper>
     <paper id="40">
-      <author><first>Kristina</first><last>Nilsson</last></author>
+      <author id="kristina-nilsson-bjorkenstam"><first>Kristina</first><last>Nilsson</last></author>
       <author><first>Lars</first><last>Borin</last></author>
       <title>Living off the land: The Web as a source of practice texts for learners of less prevalent languages</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/40.pdf</url>
@@ -318,15 +318,15 @@
     </paper>
     <paper id="41">
       <author><first>Sebastian</first><last>Möller</last></author>
-      <author><first>Ergina</first><last>Kavallieratou</last></author>
+      <author id="ergina-kavallieratou"><first>Ergina</first><last>Kavallieratou</last></author>
       <title>Diagnostic Assessment of Telephone Transmission Impact on <fixed-case>ASR</fixed-case> Performance and Human-to-Human Speech Quality</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/41.pdf</url>
       <bibkey>moller-kavallieratou-2002-diagnostic</bibkey>
     </paper>
     <paper id="42">
-      <author><first>Carlos D.</first><last>Martínez-Hinarejos</last></author>
-      <author><first>Emilio</first><last>Sanchís</last></author>
-      <author><first>Fernando</first><last>García-Granada</last></author>
+      <author id="carlos-d-martinez-hinarejos"><first>Carlos D.</first><last>Martínez-Hinarejos</last></author>
+      <author id="emilio-sanchis"><first>Emilio</first><last>Sanchís</last></author>
+      <author id="fernando-garcia"><first>Fernando</first><last>García-Granada</last></author>
       <author><first>Pablo</first><last>Aibar</last></author>
       <title>A Labelling Proposal to Annotate Dialogues</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/42.pdf</url>
@@ -334,22 +334,22 @@
     </paper>
     <paper id="43">
       <author><first>Simone</first><last>Teufel</last></author>
-      <author><first>Noemie</first><last>Elhadad</last></author>
+      <author id="noemie-elhadad"><first>Noemie</first><last>Elhadad</last></author>
       <title>Collection and linguistic processing of a large-scale corpus of medical articles</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/43.pdf</url>
       <bibkey>teufel-elhadad-2002-collection</bibkey>
     </paper>
     <paper id="44">
       <author><last>Tokunaga</last><first>Takenobu</first></author>
-      <author><last>Okumura</last><first>Manabu</first></author>
-      <author><last>Saitô</last><first>Suguru</first></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
+      <author id="suguru-saito"><first>Suguru</first><last>Saitô</last></author>
       <author><last>Tanaka</last><first>Hozumi</first></author>
       <title>Constructing a lexicon of action</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/44.pdf</url>
       <bibkey>tokunaga-etal-2002-constructing</bibkey>
     </paper>
     <paper id="45">
-      <author><first>Birte</first><last>Lönneker</last></author>
+      <author id="birte-lonneker"><first>Birte</first><last>Lönneker</last></author>
       <title>Building Concept Frames based on Text Corpora</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/45.pdf</url>
       <bibkey>lonneker-2002-building</bibkey>
@@ -367,14 +367,14 @@
     </paper>
     <paper id="47">
       <author><first>Roberto</first><last>Navigli</last></author>
-      <author><first>Paola</first><last>Velardi</last></author>
+      <author id="paola-velardi"><first>Paola</first><last>Velardi</last></author>
       <title>Automatic Adaptation of <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et to Domains</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/47.pdf</url>
       <bibkey>navigli-velardi-2002-automatic</bibkey>
     </paper>
     <paper id="48">
       <author><first>Marta</first><last>Villegas</last></author>
-      <author><first>Nuria</first><last>Bel</last></author>
+      <author id="nuria-bel"><first>Nuria</first><last>Bel</last></author>
       <title>From <fixed-case>DTD</fixed-case> to relational d<fixed-case>B</fixed-case>. An automatic generation of a lexicographical station out off <fixed-case>ISLE</fixed-case> guidelines</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/48.pdf</url>
       <bibkey>villegas-bel-2002-dtd</bibkey>
@@ -400,7 +400,7 @@
       <author><first>Antonio</first><last>Molina</last></author>
       <author><first>Ferran</first><last>Pla</last></author>
       <author><first>Encarna</first><last>Segarra</last></author>
-      <author><first>Lidia</first><last>Moreno</last></author>
+      <author id="lidia-moreno"><first>Lidia</first><last>Moreno</last></author>
       <title>Word Sense Disambiguation using Statistical Models and <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/51.pdf</url>
       <bibkey>molina-etal-2002-word</bibkey>
@@ -426,23 +426,23 @@
     </paper>
     <paper id="55">
       <author><first>Marianne</first><last>Starlander</last></author>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <title>Corpus-based Evaluation of a <fixed-case>F</fixed-case>rench Spelling and Grammar Checker</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/55.pdf</url>
       <bibkey>starlander-popescu-belis-2002-corpus</bibkey>
     </paper>
     <paper id="56">
-      <author><first>Adam</first><last>Meyers</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="adam-meyers"><first>Adam</first><last>Meyers</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <author><first>Michiko</first><last>Kosaka</last></author>
       <title>Formal Mechanisms for Capturing Regularizations</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/56.pdf</url>
       <bibkey>meyers-etal-2002-formal</bibkey>
     </paper>
     <paper id="57">
-      <author><first>Erhard W.</first><last>Hinrichs</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
-      <author><first>Frank H.</first><last>Müller</last></author>
+      <author id="erhard-hinrichs"><first>Erhard W.</first><last>Hinrichs</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
+      <author id="frank-henrik-muller"><first>Frank H.</first><last>Müller</last></author>
       <author><first>Tylman</first><last>Ule</last></author>
       <title>A Hybrid Architecture for Robust Parsing of <fixed-case>G</fixed-case>erman</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/57.pdf</url>
@@ -451,14 +451,14 @@
     <paper id="58">
       <author><first>Rainer</first><last>Siemund</last></author>
       <author><first>Barbara</first><last>Heuft</last></author>
-      <author><first>Khalid</first><last>Choukri</last></author>
+      <author id="khalid-choukri"><first>Khalid</first><last>Choukri</last></author>
       <author><first>Ossama</first><last>Emam</last></author>
       <author><first>Emmanuel</first><last>Maragoudakis</last></author>
       <author><first>Herbert</first><last>Tropf</last></author>
       <author><first>Oren</first><last>Gedge</last></author>
       <author><first>Sherrie</first><last>Shammass</last></author>
-      <author><first>Asuncion</first><last>Moreno</last></author>
-      <author><first>Albino Nogueiras</first><last>Rodriguez</last></author>
+      <author id="asuncion-moreno"><first>Asuncion</first><last>Moreno</last></author>
+      <author id="albino-nogueiras"><first>Albino Nogueiras</first><last>Rodriguez</last></author>
       <author><first>Imed</first><last>Zitouni</last></author>
       <author><first>Dorota</first><last>Iskra</last></author>
       <title><fixed-case>O</fixed-case>rien<fixed-case>T</fixed-case>el - Multilingual access to interactive communication services for the Mediterranean and the <fixed-case>M</fixed-case>iddle <fixed-case>E</fixed-case>ast</title>
@@ -468,7 +468,7 @@
     <paper id="59">
       <author><first>Kazutaka</first><last>Takao</last></author>
       <author><first>Kenji</first><last>Imamura</last></author>
-      <author><first>Hideki</first><last>Kashioka</last></author>
+      <author id="hideki-kashioka"><first>Hideki</first><last>Kashioka</last></author>
       <title>Comparing and Extracting Paraphrasing Words with 2-Way Bilingual Dictionaries</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/59.pdf</url>
       <bibkey>takao-etal-2002-comparing</bibkey>
@@ -481,30 +481,30 @@
     </paper>
     <paper id="61">
       <author><first>Sabine</first><last>Brants</last></author>
-      <author><first>Silvia</first><last>Hansen</last></author>
+      <author id="silvia-hansen-schirra"><first>Silvia</first><last>Hansen</last></author>
       <title>Developments in the <fixed-case>TIGER</fixed-case> Annotation Scheme and their Realization in the Corpus</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/61.pdf</url>
       <bibkey>brants-hansen-2002-developments</bibkey>
     </paper>
     <paper id="62">
-      <author><first>António</first><last>Branco</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
       <author><first>José</first><last>Leitão</last></author>
-      <author><first>João</first><last>Silva</last></author>
+      <author id="joao-silva"><first>João</first><last>Silva</last></author>
       <author><first>Luís</first><last>Gomes</last></author>
       <title>Nexing Corpus: a corpus of verbal protocols on syllogistic reasoning</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/62.pdf</url>
       <bibkey>branco-etal-2002-nexing</bibkey>
     </paper>
     <paper id="63">
-      <author><first>Eva</first><last>Hajičová</last></author>
-      <author><first>Ivona</first><last>Kučerová</last></author>
+      <author id="eva-hajicova"><first>Eva</first><last>Hajičová</last></author>
+      <author id="ivona-kucerova"><first>Ivona</first><last>Kučerová</last></author>
       <title>Argument/Valency Structure in <fixed-case>P</fixed-case>rop<fixed-case>B</fixed-case>ank, <fixed-case>LCS</fixed-case> Database and <fixed-case>P</fixed-case>rague Dependency Treebank: A Comparative Pilot Study</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/63.pdf</url>
       <bibkey>hajicova-kucerova-2002-argument</bibkey>
     </paper>
     <paper id="64">
       <author><first>Karl</first><last>Weilhammer</last></author>
-      <author><first>Uwe</first><last>Reichel</last></author>
+      <author id="uwe-reichel"><first>Uwe</first><last>Reichel</last></author>
       <author><first>Florian</first><last>Schiel</last></author>
       <title>Multi-Tier Annotations in the Verbmobil Corpus</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/64.pdf</url>
@@ -543,24 +543,24 @@
       <bibkey>cappelli-etal-2002-knowledge</bibkey>
     </paper>
     <paper id="69">
-      <author><first>Alberto</first><last>Lavelli</last></author>
-      <author><first>Fabio</first><last>Pianesi</last></author>
+      <author id="alberto-lavelli"><first>Alberto</first><last>Lavelli</last></author>
+      <author id="fabio-pianesi"><first>Fabio</first><last>Pianesi</last></author>
       <author><first>Ermanno</first><last>Maci</last></author>
-      <author><first>Irina</first><last>Prodanof</last></author>
-      <author><first>Luca</first><last>Dini</last></author>
-      <author><first>Giampaolo</first><last>Mazzini</last></author>
+      <author id="irina-prodanof"><first>Irina</first><last>Prodanof</last></author>
+      <author id="luca-dini"><first>Luca</first><last>Dini</last></author>
+      <author id="giampaolo-mazzini"><first>Giampaolo</first><last>Mazzini</last></author>
       <title><fixed-case>S</fixed-case>i<fixed-case>SSA</fixed-case>: An Infrastructure for Developing <fixed-case>NLP</fixed-case> Applications</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/69.pdf</url>
       <bibkey>lavelli-etal-2002-sissa</bibkey>
     </paper>
     <paper id="70">
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <author><first>Petya</first><last>Osenova</last></author>
       <author><first>Milena</first><last>Slavcheva</last></author>
-      <author><first>Sia</first><last>Kolkovska</last></author>
+      <author id="sia-kolkovska"><first>Sia</first><last>Kolkovska</last></author>
       <author><first>Elisaveta</first><last>Balabanova</last></author>
       <author><first>Dimitar</first><last>Doikoff</last></author>
-      <author><first>Krassimira</first><last>Ivanova</last></author>
+      <author id="krasimira-ivanova"><first>Krassimira</first><last>Ivanova</last></author>
       <author><first>Alexander</first><last>Simov</last></author>
       <author><first>Milen</first><last>Kouylekov</last></author>
       <title>Building a Linguistically Interpreted Corpus of <fixed-case>B</fixed-case>ulgarian: the <fixed-case>B</fixed-case>ul<fixed-case>T</fixed-case>ree<fixed-case>B</fixed-case>ank</title>
@@ -570,7 +570,7 @@
     <paper id="71">
       <author><first>Ton</first><last>van der Wouden</last></author>
       <author><first>Heleen</first><last>Hoekstra</last></author>
-      <author><first>Michael</first><last>Moortgat</last></author>
+      <author id="michael-moortgat"><first>Michael</first><last>Moortgat</last></author>
       <author><first>Bram</first><last>Renmans</last></author>
       <author><first>Ineke</first><last>Schuurman</last></author>
       <title>Syntactic Analysis in the Spoken <fixed-case>D</fixed-case>utch Corpus (<fixed-case>CGN</fixed-case>)</title>
@@ -578,8 +578,8 @@
       <bibkey>van-der-wouden-etal-2002-syntactic</bibkey>
     </paper>
     <paper id="72">
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
-      <author><first>Susan</first><last>Armstrong</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="susan-armstrong"><first>Susan</first><last>Armstrong</last></author>
       <author><first>Gilbert</first><last>Robert</last></author>
       <title>Electronic Dictionaries - from Publisher Data to a Distribution Server: the <fixed-case>D</fixed-case>ico<fixed-case>P</fixed-case>ro, <fixed-case>D</fixed-case>ico<fixed-case>E</fixed-case>ast and <fixed-case>RERO</fixed-case> Projects</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/72.pdf</url>
@@ -601,44 +601,44 @@
       <bibkey>geutner-etal-2002-design</bibkey>
     </paper>
     <paper id="75">
-      <author><first>Nadia</first><last>Mana</last></author>
-      <author><first>Ornella</first><last>Corazzari</last></author>
+      <author id="nadia-mana"><first>Nadia</first><last>Mana</last></author>
+      <author id="ornella-corazzari"><first>Ornella</first><last>Corazzari</last></author>
       <title>The Lexico-semantic Annotation of an <fixed-case>I</fixed-case>talian Treebank</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/75.pdf</url>
       <bibkey>mana-corazzari-2002-lexico</bibkey>
     </paper>
     <paper id="76">
-      <author><first>Bernardo</first><last>Magnini</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Roberto</first><last>Prevete</last></author>
-      <author><first>Hristo</first><last>Tanev</last></author>
+      <author id="hristo-tanev"><first>Hristo</first><last>Tanev</last></author>
       <title>Towards Automatic Evaluation of Question/Answering Systems</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/76.pdf</url>
       <bibkey>magnini-etal-2002-towards</bibkey>
     </paper>
     <paper id="77">
-      <author><first>Martin</first><last>Rajman</last></author>
-      <author><first>Anthony</first><last>Hartley</last></author>
+      <author id="martin-rajman"><first>Martin</first><last>Rajman</last></author>
+      <author id="anthony-hartley"><first>Anthony</first><last>Hartley</last></author>
       <title>Automatic Ranking of <fixed-case>MT</fixed-case> Systems</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/77.pdf</url>
       <bibkey>rajman-hartley-2002-automatic</bibkey>
     </paper>
     <paper id="78">
       <author><first>Luisa</first><last>Bentivogli</last></author>
-      <author><first>Emanuele</first><last>Pianta</last></author>
+      <author id="emanuele-pianta"><first>Emanuele</first><last>Pianta</last></author>
       <title>Opportunistic Semantic Tagging</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/78.pdf</url>
       <bibkey>bentivogli-pianta-2002-opportunistic</bibkey>
     </paper>
     <paper id="79">
-      <author><first>Petr</first><last>Pollák</last></author>
+      <author id="petr-pollak"><first>Petr</first><last>Pollák</last></author>
       <author><first>Václav</first><last>Hanžl</last></author>
       <title>Tool for <fixed-case>C</fixed-case>zech Pronunciation Generation Combining Fixed Rules with Pronunciation Lexicon and Lexicon Management Tool</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/79.pdf</url>
       <bibkey>pollak-hanzl-2002-tool</bibkey>
     </paper>
     <paper id="80">
-      <author><first>Tony</first><last>Rose</last></author>
+      <author id="tony-rose"><first>Tony</first><last>Rose</last></author>
       <author><first>Mark</first><last>Stevenson</last></author>
       <author><first>Miles</first><last>Whitehead</last></author>
       <title>The <fixed-case>R</fixed-case>euters Corpus Volume 1 -from Yesterday’s News to Tomorrow’s Language Resources</title>
@@ -653,8 +653,8 @@
       <bibkey>dutilh-kruyt-2002-implementation</bibkey>
     </paper>
     <paper id="82">
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
-      <author><first>Petr</first><last>Sgall</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="petr-sgall"><first>Petr</first><last>Sgall</last></author>
       <author><first>Sašo</first><last>Džeroski</last></author>
       <title>A Machine Learning Approach to Automatic Functor Assignment in the <fixed-case>P</fixed-case>rague Dependency Treebank</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/82.pdf</url>
@@ -669,7 +669,7 @@
     <paper id="84">
       <author><first>Carole</first><last>Tiberius</last></author>
       <author><first>Dunstan</first><last>Brown</last></author>
-      <author><first>Greville</first><last>Corbett</last></author>
+      <author id="greville-c-corbett"><first>Greville</first><last>Corbett</last></author>
       <title>A typological database of agreement</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/84.pdf</url>
       <bibkey>tiberius-etal-2002-typological</bibkey>
@@ -681,8 +681,8 @@
       <bibkey>lin-2002-web</bibkey>
     </paper>
     <paper id="86">
-      <author><first>Mitsuo</first><last>Shimohata</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="mitsuo-shimohata"><first>Mitsuo</first><last>Shimohata</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <title>Automatic paraphrasing based on parallel corpus for normalization</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/86.pdf</url>
       <bibkey>shimohata-sumita-2002-automatic</bibkey>
@@ -702,7 +702,7 @@
     </paper>
     <paper id="89">
       <author><first>Andrej</first><last>Žgank</last></author>
-      <author><first>Zdravko</first><last>Kačič</last></author>
+      <author id="zdravko-kacic"><first>Zdravko</first><last>Kačič</last></author>
       <author><first>Bogomir</first><last>Horvat</last></author>
       <title>Preliminary Evaluation of <fixed-case>S</fixed-case>lovenian Mobile Database <fixed-case>P</fixed-case>oli<fixed-case>D</fixed-case>at</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/89.pdf</url>
@@ -732,7 +732,7 @@
     </paper>
     <paper id="93">
       <author><first>Olivier</first><last>Ferret</last></author>
-      <author><first>Christian</first><last>Fluhr</last></author>
+      <author id="christian-fluhr"><first>Christian</first><last>Fluhr</last></author>
       <author><first>Françoise</first><last>Rousseau-Hans</last></author>
       <author><first>Jean-Luc</first><last>Simoni</last></author>
       <title>Building domain specific lexical hierarchies from corpora</title>
@@ -740,14 +740,14 @@
       <bibkey>ferret-etal-2002-building</bibkey>
     </paper>
     <paper id="94">
-      <author><first>Walter</first><last>Daelemans</last></author>
-      <author><first>Véronique</first><last>Hoste</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
+      <author id="veronique-hoste"><first>Véronique</first><last>Hoste</last></author>
       <title>Evaluation of Machine Learning Methods for Natural Language Processing Tasks</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/94.pdf</url>
       <bibkey>daelemans-hoste-2002-evaluation</bibkey>
     </paper>
     <paper id="95">
-      <author><first>Tristan</first><last>Van Rullen</last></author>
+      <author id="tristan-vanrullen"><first>Tristan</first><last>Van Rullen</last></author>
       <author><first>Philippe</first><last>Blache</last></author>
       <title>An evaluation of different symbolic shallow parsing techniques</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/95.pdf</url>
@@ -766,7 +766,7 @@
     </paper>
     <paper id="97">
       <author><first>Jean-Pierre</first><last>Martens</last></author>
-      <author><first>Diana</first><last>Binnenpoorte</last></author>
+      <author id="diana-binnenpoorte"><first>Diana</first><last>Binnenpoorte</last></author>
       <author><first>Kris</first><last>Demuynck</last></author>
       <author><first>Ruben</first><last>Van Parys</last></author>
       <author><first>Tom</first><last>Laureys</last></author>
@@ -779,11 +779,11 @@
     <paper id="98">
       <author><first>Nelleke</first><last>Oostdijk</last></author>
       <author><first>Wim</first><last>Goedertier</last></author>
-      <author><first>Frank</first><last>van Eynde</last></author>
-      <author><first>Louis</first><last>Boves</last></author>
+      <author id="frank-van-eynde"><first>Frank</first><last>van Eynde</last></author>
+      <author id="lou-boves"><first>Louis</first><last>Boves</last></author>
       <author><first>Jean-Pierre</first><last>Martens</last></author>
-      <author><first>Michael</first><last>Moortgat</last></author>
-      <author><first>Harald</first><last>Baayen</last></author>
+      <author id="michael-moortgat"><first>Michael</first><last>Moortgat</last></author>
+      <author id="harald-baayen"><first>Harald</first><last>Baayen</last></author>
       <title>Experiences from the Spoken <fixed-case>D</fixed-case>utch Corpus Project</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/98.pdf</url>
       <bibkey>oostdijk-etal-2002-experiences</bibkey>
@@ -795,9 +795,9 @@
       <bibkey>mikros-2002-quantitative</bibkey>
     </paper>
     <paper id="100">
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Vincent</first><last>Claveau</last></author>
-      <author><first>Cécile</first><last>Fabre</last></author>
+      <author id="cecile-fabre"><first>Cécile</first><last>Fabre</last></author>
       <author><first>Pascale</first><last>Sébillot</last></author>
       <title>Acquisition of Qualia Elements from Corpora - Evaluation of a Symbolic Learning Method</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/100.pdf</url>
@@ -827,8 +827,8 @@
       <bibkey>widdows-etal-2002-using</bibkey>
     </paper>
     <paper id="104">
-      <author><first>Ariadna</first><last>Font Llitjós</last></author>
-      <author><first>Alan W.</first><last>Black</last></author>
+      <author id="ariadna-font-llitjos"><first>Ariadna</first><last>Font Llitjós</last></author>
+      <author id="alan-w-black"><first>Alan W.</first><last>Black</last></author>
       <title>Evaluation and collection of proper name pronunciations online</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/104.pdf</url>
       <bibkey>font-llitjos-black-2002-evaluation</bibkey>
@@ -845,7 +845,7 @@
     <paper id="106">
       <author><first>Alex</first><last>Alsina</last></author>
       <author><first>Toni</first><last>Badia</last></author>
-      <author><first>Gemma</first><last>Boleda</last></author>
+      <author id="gemma-boleda"><first>Gemma</first><last>Boleda</last></author>
       <author><first>Stefan</first><last>Bott</last></author>
       <author><first>Àngel</first><last>Gil</last></author>
       <author><first>Martí</first><last>Quixal</last></author>
@@ -868,7 +868,7 @@
     </paper>
     <paper id="109">
       <author><first>Aoife</first><last>Cahill</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <title><fixed-case>TTS</fixed-case> - A Treebank Tool Suite</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/109.pdf</url>
       <bibkey>cahill-van-genabith-2002-tts</bibkey>
@@ -898,14 +898,14 @@
       <bibkey>cassidy-2002-xquery</bibkey>
     </paper>
     <paper id="113">
-      <author><first>Constantin</first><last>Orasan</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orasan</last></author>
       <author><first>Ramesh</first><last>Krishnamurthy</last></author>
       <title>A corpus-based investigation of junk emails</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/113.pdf</url>
       <bibkey>orasan-krishnamurthy-2002-corpus</bibkey>
     </paper>
     <paper id="114">
-      <author><first>Constantin</first><last>Orasan</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orasan</last></author>
       <title>Building annotated resources for automatic text summarisation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/114.pdf</url>
       <bibkey>orasan-2002-building</bibkey>
@@ -925,10 +925,10 @@
       <bibkey>steiner-kallmeyer-2002-viqtorya</bibkey>
     </paper>
     <paper id="117">
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <author><first>Tomonori</first><last>Ishikawa</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
-      <author><first>Renata</first><last>Vieira</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="renata-vieira"><first>Renata</first><last>Vieira</last></author>
       <title>Acquiring Lexical Knowledge for Anaphora Resolution</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/117.pdf</url>
       <bibkey>poesio-etal-2002-acquiring</bibkey>
@@ -943,7 +943,7 @@
       <author><first>Chikashi</first><last>Nobata</last></author>
       <author><first>Satoshi</first><last>Sekine</last></author>
       <author><first>Hitoshi</first><last>Isahara</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <title>Summarization System Integrated with Named Entity Tagging and <fixed-case>IE</fixed-case> pattern Discovery</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/119.pdf</url>
       <bibkey>nobata-etal-2002-summarization</bibkey>
@@ -970,7 +970,7 @@
       <bibkey>matsumoto-tanaka-2002-automatic</bibkey>
     </paper>
     <paper id="123">
-      <author><first>Satoshi</first><last>Shirai</last></author>
+      <author id="satoshi-shirai"><first>Satoshi</first><last>Shirai</last></author>
       <author><first>Kazuhide</first><last>Yamamoto</last></author>
       <author><first>Francis</first><last>Bond</last></author>
       <author><first>Hozumi</first><last>Tanaka</last></author>
@@ -980,7 +980,7 @@
     </paper>
     <paper id="124">
       <author><first>Yong-Ju</first><last>Lee</last></author>
-      <author><first>Bong-Wan</first><last>Kim</last></author>
+      <author id="bong-wan-kim"><first>Bong-Wan</first><last>Kim</last></author>
       <author><first>Yongnam</first><last>Um</last></author>
       <title>Speech Information Technology &amp; Industry Promotion Center in <fixed-case>K</fixed-case>orea: Activities and Directions</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/124.pdf</url>
@@ -998,9 +998,9 @@
     </paper>
     <paper id="126">
       <author><first>Manolis</first><last>Maragoudakis</last></author>
-      <author><first>Katia</first><last>Kermanidis</last></author>
-      <author><first>Nikos</first><last>Fakotakis</last></author>
-      <author><first>George</first><last>Kokkinakis</last></author>
+      <author id="katia-lida-kermanidis"><first>Katia</first><last>Kermanidis</last></author>
+      <author id="nikos-fakotakis"><first>Nikos</first><last>Fakotakis</last></author>
+      <author id="george-kokkinakis"><first>George</first><last>Kokkinakis</last></author>
       <title>Combining <fixed-case>B</fixed-case>ayesian and Support Vector Machines Learning to automatically complete Syntactical Information for <fixed-case>HPSG</fixed-case>-like Formalisms</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/126.pdf</url>
       <bibkey>maragoudakis-etal-2002-combining</bibkey>
@@ -1017,34 +1017,34 @@
     </paper>
     <paper id="128">
       <author><first>Aristomenis</first><last>Thanopoulos</last></author>
-      <author><first>Nikos</first><last>Fakotakis</last></author>
-      <author><first>George</first><last>Kokkinakis</last></author>
+      <author id="nikos-fakotakis"><first>Nikos</first><last>Fakotakis</last></author>
+      <author id="george-kokkinakis"><first>George</first><last>Kokkinakis</last></author>
       <title>Comparative Evaluation of Collocation Extraction Metrics</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/128.pdf</url>
       <bibkey>thanopoulos-etal-2002-comparative</bibkey>
     </paper>
     <paper id="129">
-      <author><first>Christophe</first><last>Laprun</last></author>
-      <author><first>Jonathan G.</first><last>Fiscus</last></author>
-      <author><first>John</first><last>Garofolo</last></author>
+      <author id="christophe-laprun"><first>Christophe</first><last>Laprun</last></author>
+      <author id="jonathan-g-fiscus"><first>Jonathan G.</first><last>Fiscus</last></author>
+      <author id="john-s-garofolo"><first>John</first><last>Garofolo</last></author>
       <author><first>Sylvain</first><last>Pajot</last></author>
       <title>A Pratical Introduction to <fixed-case>ATLAS</fixed-case></title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/129.pdf</url>
       <bibkey>laprun-etal-2002-pratical</bibkey>
     </paper>
     <paper id="130">
-      <author><first>John</first><last>Garofolo</last></author>
-      <author><first>Jonathan G.</first><last>Fiscus</last></author>
-      <author><first>Alvin</first><last>Martin</last></author>
-      <author><first>David</first><last>Pallett</last></author>
-      <author><first>Mark</first><last>Przybocki</last></author>
+      <author id="john-s-garofolo"><first>John</first><last>Garofolo</last></author>
+      <author id="jonathan-g-fiscus"><first>Jonathan G.</first><last>Fiscus</last></author>
+      <author id="alvin-martin"><first>Alvin</first><last>Martin</last></author>
+      <author id="david-s-pallett"><first>David</first><last>Pallett</last></author>
+      <author id="mark-przybocki"><first>Mark</first><last>Przybocki</last></author>
       <title><fixed-case>NIST</fixed-case> Rich Transcription 2002 Evaluation: A Preview</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/130.pdf</url>
       <bibkey>garofolo-etal-2002-nist</bibkey>
     </paper>
     <paper id="131">
       <author><first>Paloma</first><last>Martínez</last></author>
-      <author><first>Ana</first><last>García-Serrano</last></author>
+      <author id="ana-garcia-serrano"><first>Ana</first><last>García-Serrano</last></author>
       <author><first>Alberto</first><last>Ruiz-Cristina</last></author>
       <title>Integrating <fixed-case>S</fixed-case>panish Linguistic Resources in a Web Site Assistant</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/131.pdf</url>
@@ -1059,15 +1059,15 @@
     <paper id="133">
       <author><first>Gregory</first><last>Grefenstette</last></author>
       <author><first>Yan</first><last>Qu</last></author>
-      <author><first>David A.</first><last>Evans</last></author>
+      <author id="david-a-evans"><first>David A.</first><last>Evans</last></author>
       <title>Expanding lexicons by inducing paradigms and validating attested forms</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/133.pdf</url>
       <bibkey>grefenstette-etal-2002-expanding</bibkey>
     </paper>
     <paper id="134">
       <author><first>Taro</first><last>Watanabe</last></author>
-      <author><first>Mitsuo</first><last>Shimohata</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="mitsuo-shimohata"><first>Mitsuo</first><last>Shimohata</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <title>Statistical Machine Translation on Paraphrased Corpora</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/134.pdf</url>
       <bibkey>watanabe-etal-2002-statistical</bibkey>
@@ -1080,7 +1080,7 @@
       <bibkey>bia-quero-2002-building</bibkey>
     </paper>
     <paper id="136">
-      <author><first>Hideki</first><last>Kashioka</last></author>
+      <author id="hideki-kashioka"><first>Hideki</first><last>Kashioka</last></author>
       <title>Translation Unit Concerning Timing of Simultaneous Translation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/136.pdf</url>
       <bibkey>kashioka-2002-translation</bibkey>
@@ -1094,24 +1094,24 @@
       <bibkey>narita-etal-2002-web</bibkey>
     </paper>
     <paper id="138">
-      <author><first>Ricardo</first><last>Ribeiro</last></author>
-      <author><first>Luís</first><last>Oliveira</last></author>
+      <author id="ricardo-ribeiro"><first>Ricardo</first><last>Ribeiro</last></author>
+      <author id="jose-luis-oliveira"><first>Luís</first><last>Oliveira</last></author>
       <author><first>Isabel</first><last>Trancoso</last></author>
       <title>Morphosyntactic Disambiguation for <fixed-case>TTS</fixed-case> Systems</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/138.pdf</url>
       <bibkey>ribeiro-etal-2002-morphosyntactic</bibkey>
     </paper>
     <paper id="139">
-      <author><first>Charles J.</first><last>Fillmore</last></author>
-      <author><first>Collin F.</first><last>Baker</last></author>
+      <author id="charles-j-fillmore"><first>Charles J.</first><last>Fillmore</last></author>
+      <author id="collin-f-baker"><first>Collin F.</first><last>Baker</last></author>
       <author><first>Hiroaki</first><last>Sato</last></author>
       <title>Seeing Arguments through Transparent Structures</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/139.pdf</url>
       <bibkey>fillmore-etal-2002-seeing</bibkey>
     </paper>
     <paper id="140">
-      <author><first>Charles J.</first><last>Fillmore</last></author>
-      <author><first>Collin F.</first><last>Baker</last></author>
+      <author id="charles-j-fillmore"><first>Charles J.</first><last>Fillmore</last></author>
+      <author id="collin-f-baker"><first>Collin F.</first><last>Baker</last></author>
       <author><first>Hiroaki</first><last>Sato</last></author>
       <title>The <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et Database and Software Tools</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/140.pdf</url>
@@ -1127,31 +1127,31 @@
       <bibkey>ma-etal-2002-models</bibkey>
     </paper>
     <paper id="142">
-      <author><first>Doroteo Torre</first><last>Toledano</last></author>
-      <author><first>Luis A. Hernández</first><last>Gómez</last></author>
+      <author id="doroteo-t-toledano"><first>Doroteo Torre</first><last>Toledano</last></author>
+      <author id="luis-hernandez"><first>Luis A. Hernández</first><last>Gómez</last></author>
       <title><fixed-case>HMM</fixed-case>s for Automatic Phonetic Segmentation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/142.pdf</url>
       <bibkey>toledano-gomez-2002-hmms</bibkey>
     </paper>
     <paper id="143">
-      <author><first>Helen Wright</first><last>Hastie</last></author>
+      <author id="helen-hastie"><first>Helen Wright</first><last>Hastie</last></author>
       <author><first>Rashmi</first><last>Prasad</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <title>Automatic Evaluation: Using a <fixed-case>DATE</fixed-case> Dialogue Act Tagger for User Satisfaction and Task Completion Prediction</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/143.pdf</url>
       <bibkey>hastie-etal-2002-automatic</bibkey>
     </paper>
     <paper id="144">
-      <author><first>Nuria</first><last>Bel</last></author>
+      <author id="nuria-bel"><first>Nuria</first><last>Bel</last></author>
       <author><first>Javier</first><last>Caminero</last></author>
-      <author><first>Luis</first><last>Hernández</last></author>
-      <author><first>Montserrat</first><last>Marimón</last></author>
+      <author id="luis-hernandez"><first>Luis</first><last>Hernández</last></author>
+      <author id="montserrat-marimon"><first>Montserrat</first><last>Marimón</last></author>
       <author><first>José F.</first><last>Morlesín</last></author>
       <author><first>Josep M.</first><last>Otero</last></author>
-      <author><first>José</first><last>Relaño</last></author>
-      <author><first>M. Carmen</first><last>Rodríguez</last></author>
+      <author id="jose-relano-gil"><first>José</first><last>Relaño</last></author>
+      <author id="mari-carmen-rodriguez-gancedo"><first>M. Carmen</first><last>Rodríguez</last></author>
       <author><first>Pedro M.</first><last>Ruz</last></author>
-      <author><first>Daniel</first><last>Tapias</last></author>
+      <author id="daniel-tapias"><first>Daniel</first><last>Tapias</last></author>
       <title>Design and Evaluation of a <fixed-case>SLDS</fixed-case> for <fixed-case>E</fixed-case>-Mail Access through the Telephone</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/144.pdf</url>
       <bibkey>bel-etal-2002-design</bibkey>
@@ -1161,30 +1161,30 @@
       <author><first>Fabre</first><last>Lambeau</last></author>
       <author><first>Aline</first><last>Villavicencio</last></author>
       <author><first>Francis</first><last>Bond</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Ivan A.</first><last>Sag</last></author>
-      <author><first>Dan</first><last>Flickinger</last></author>
+      <author id="dan-flickinger"><first>Dan</first><last>Flickinger</last></author>
       <title>Multiword expressions: linguistic precision and reusability</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/145.pdf</url>
       <bibkey>copestake-etal-2002-multiword</bibkey>
     </paper>
     <paper id="146">
       <author><first>Keita</first><last>Tsuji</last></author>
-      <author><first>Beatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Beatrice</first><last>Daille</last></author>
       <author><first>Kyo</first><last>Kageura</last></author>
       <title>Extracting <fixed-case>F</fixed-case>rench-<fixed-case>J</fixed-case>apanese Word Pairs from Bilingual Corpora based on Transliteration Rules</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/146.pdf</url>
       <bibkey>tsuji-etal-2002-extracting</bibkey>
     </paper>
     <paper id="147">
-      <author><first>Elaine</first><last>Uí Dhonnchadha</last></author>
+      <author id="elaine-ui-dhonnchadha"><first>Elaine</first><last>Uí Dhonnchadha</last></author>
       <title>A Two-level Morphological Analyser and Generator for <fixed-case>I</fixed-case>rish using Finite-State Transducers</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/147.pdf</url>
       <bibkey>ui-dhonnchadha-2002-two</bibkey>
     </paper>
     <paper id="148">
       <author><first>Smaranda</first><last>Muresan</last></author>
-      <author><first>Judith</first><last>Klavans</last></author>
+      <author id="judith-l-klavans"><first>Judith</first><last>Klavans</last></author>
       <title>A Method for Automatically Building and Evaluating Dictionary Resources</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/148.pdf</url>
       <bibkey>muresan-klavans-2002-method</bibkey>
@@ -1203,17 +1203,17 @@
       <bibkey>shirai-2002-construction</bibkey>
     </paper>
     <paper id="151">
-      <author><first>Chung-hye</first><last>Han</last></author>
+      <author id="chung-hye-han"><first>Chung-hye</first><last>Han</last></author>
       <author><first>Na-Rae</first><last>Han</last></author>
       <author><first>Eon-Suk</first><last>Ko</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <title>Development and Evaluation of a <fixed-case>K</fixed-case>orean Treebank and its Application to <fixed-case>NLP</fixed-case></title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/151.pdf</url>
       <bibkey>han-etal-2002-development</bibkey>
     </paper>
     <paper id="152">
       <author><first>Alexandra</first><last>Kinyon</last></author>
-      <author><first>Carlos A.</first><last>Prolo</last></author>
+      <author id="carlos-a-prolo"><first>Carlos A.</first><last>Prolo</last></author>
       <title>Identifying Verb Arguments and their Syntactic Function in the <fixed-case>P</fixed-case>enn <fixed-case>T</fixed-case>reebank</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/152.pdf</url>
       <bibkey>kinyon-prolo-2002-identifying</bibkey>
@@ -1226,10 +1226,10 @@
       <bibkey>mokhtari-campbell-2002-automatic</bibkey>
     </paper>
     <paper id="154">
-      <author><first>Jong-Hoon</first><last>Oh</last></author>
-      <author><first>Saim</first><last>Shin</last></author>
+      <author id="jong-hoon-oh"><first>Jong-Hoon</first><last>Oh</last></author>
+      <author id="saim-shin"><first>Saim</first><last>Shin</last></author>
       <author><first>Yong-Seok</first><last>Choi</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <title>Word Sense Disambiguation with Information Retrieval Technique</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/154.pdf</url>
       <bibkey>oh-etal-2002-word</bibkey>
@@ -1249,56 +1249,56 @@
     <paper id="156">
       <author><first>Erica</first><last>Costantini</last></author>
       <author><first>Susanne</first><last>Burger</last></author>
-      <author><first>Fabio</first><last>Pianesi</last></author>
+      <author id="fabio-pianesi"><first>Fabio</first><last>Pianesi</last></author>
       <title><fixed-case>NESPOLE</fixed-case>!’s Multilingual and Multimodal Corpus</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/156.pdf</url>
       <bibkey>costantini-etal-2002-nespole</bibkey>
     </paper>
     <paper id="157">
       <author><first>Horacio</first><last>Saggion</last></author>
-      <author><first>Hamish</first><last>Cunningham</last></author>
+      <author id="hamish-cunningham"><first>Hamish</first><last>Cunningham</last></author>
       <author><first>Diana</first><last>Maynard</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
       <author><first>Oana</first><last>Hamza</last></author>
-      <author><first>Christian</first><last>Ursu</last></author>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="cristian-ursu"><first>Christian</first><last>Ursu</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <title>Extracting Information for Automatic Indexing of Multimedia Material</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/157.pdf</url>
       <bibkey>saggion-etal-2002-extracting</bibkey>
     </paper>
     <paper id="158">
       <author><first>Horacio</first><last>Saggion</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <author><first>Simone</first><last>Teufel</last></author>
       <author><first>Wai</first><last>Lam</last></author>
-      <author><first>Stephanie M.</first><last>Strassel</last></author>
+      <author id="stephanie-strassel"><first>Stephanie M.</first><last>Strassel</last></author>
       <title>Developing Infrastructure for the Evaluation of Single and Multi-document Summarization Systems in a Cross-lingual Environment</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/158.pdf</url>
       <bibkey>saggion-etal-2002-developing</bibkey>
     </paper>
     <paper id="159">
-      <author><first>Harris</first><last>Papageorgiou</last></author>
+      <author id="harris-papageorgiou"><first>Harris</first><last>Papageorgiou</last></author>
       <author><first>Prokopis</first><last>Prokopidis</last></author>
-      <author><first>Voula</first><last>Giouli</last></author>
+      <author id="voula-giouli"><first>Voula</first><last>Giouli</last></author>
       <author><first>Iason</first><last>Demiros</last></author>
-      <author><first>Alexis</first><last>Konstantinidis</last></author>
-      <author><first>Stelios</first><last>Piperidis</last></author>
+      <author id="alexis-konstantinidis"><first>Alexis</first><last>Konstantinidis</last></author>
+      <author id="stelios-piperidis"><first>Stelios</first><last>Piperidis</last></author>
       <title>Multi-level <fixed-case>XML</fixed-case>-based Corpus Annotation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/159.pdf</url>
       <bibkey>papageorgiou-etal-2002-multi</bibkey>
     </paper>
     <paper id="160">
-      <author><first>Harald</first><last>Höge</last></author>
+      <author id="harald-hoge"><first>Harald</first><last>Höge</last></author>
       <title>Project Proposal <fixed-case>TC</fixed-case>-<fixed-case>STAR</fixed-case> - Make Speech to Speech Translation Real</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/160.pdf</url>
       <bibkey>hoge-2002-project</bibkey>
     </paper>
     <paper id="161">
-      <author><first>Igor</first><last>Boguslavsky</last></author>
+      <author id="igor-boguslavsky"><first>Igor</first><last>Boguslavsky</last></author>
       <author><first>Ivan</first><last>Chardin</last></author>
       <author><first>Svetlana</first><last>Grigorieva</last></author>
       <author><first>Nikolai</first><last>Grigoriev</last></author>
-      <author><first>Leonid</first><last>Iomdin</last></author>
+      <author id="leonid-iomdin"><first>Leonid</first><last>Iomdin</last></author>
       <author><first>Leonid</first><last>Kreidlin</last></author>
       <author><first>Nadezhda</first><last>Frid</last></author>
       <title>Development of a Dependency Treebank for <fixed-case>R</fixed-case>ussian and its Possible Applications in <fixed-case>NLP</fixed-case></title>
@@ -1337,7 +1337,7 @@
       <bibkey>munoz-etal-2002-bilingual</bibkey>
     </paper>
     <paper id="165">
-      <author><first>Heiki-Jaan</first><last>Kaalep</last></author>
+      <author id="heiki-jaan-kaalep"><first>Heiki-Jaan</first><last>Kaalep</last></author>
       <author><first>Kadri</first><last>Muischnek</last></author>
       <title>Using the Text Corpus to Create a Comprehensive List of Phrasal Verbs</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/165.pdf</url>
@@ -1346,14 +1346,14 @@
     <paper id="166">
       <author><first>Diana</first><last>Raileanu</last></author>
       <author><first>Paul</first><last>Buitelaar</last></author>
-      <author><first>Spela</first><last>Vintar</last></author>
+      <author id="spela-vintar"><first>Spela</first><last>Vintar</last></author>
       <author><first>Jörg</first><last>Bay</last></author>
       <title>Evaluation Corpora for Sense Disambiguation in the Medical Domain</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/166.pdf</url>
       <bibkey>raileanu-etal-2002-evaluation</bibkey>
     </paper>
     <paper id="167">
-      <author><first>Špela</first><last>Vintar</last></author>
+      <author id="spela-vintar"><first>Špela</first><last>Vintar</last></author>
       <author><first>Paul</first><last>Buitelaar</last></author>
       <author><first>Bärbel</first><last>Ripplinger</last></author>
       <author><first>Bogdan</first><last>Sacaleanu</last></author>
@@ -1364,8 +1364,8 @@
       <bibkey>vintar-etal-2002-efficient</bibkey>
     </paper>
     <paper id="168">
-      <author><first>Markéta</first><last>Straňáková-Lopatková</last></author>
-      <author><first>Zdenĕk</first><last>Žabokrtský</last></author>
+      <author id="marketa-lopatkova"><first>Markéta</first><last>Straňáková-Lopatková</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdenĕk</first><last>Žabokrtský</last></author>
       <title>Valency Dictionary of <fixed-case>C</fixed-case>zech Verbs: Complex Tectogrammatical Annotation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/168.pdf</url>
       <bibkey>stranakova-lopatkova-zabokrtsky-2002-valency</bibkey>
@@ -1390,41 +1390,41 @@
       <bibkey>trippel-gibbon-2002-annotation</bibkey>
     </paper>
     <paper id="172">
-      <author><first>Katia Lida</first><last>Kermanidis</last></author>
-      <author><first>Nikos</first><last>Fakotakis</last></author>
-      <author><first>George</first><last>Kokkinakis</last></author>
+      <author id="katia-lida-kermanidis"><first>Katia Lida</first><last>Kermanidis</last></author>
+      <author id="nikos-fakotakis"><first>Nikos</first><last>Fakotakis</last></author>
+      <author id="george-kokkinakis"><first>George</first><last>Kokkinakis</last></author>
       <title><fixed-case>DELOS</fixed-case>: An Automatically Tagged Economic Corpus for <fixed-case>M</fixed-case>odern <fixed-case>G</fixed-case>reek</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/172.pdf</url>
       <bibkey>kermanidis-etal-2002-delos</bibkey>
     </paper>
     <paper id="173">
-      <author><first>Henk</first><last>van den Heuvel</last></author>
-      <author><first>Khalid</first><last>Choukri</last></author>
-      <author><first>Harald</first><last>Höge</last></author>
+      <author id="henk-van-den-heuvel"><first>Henk</first><last>van den Heuvel</last></author>
+      <author id="khalid-choukri"><first>Khalid</first><last>Choukri</last></author>
+      <author id="harald-hoge"><first>Harald</first><last>Höge</last></author>
       <title>Give me a bug. a framework for a bug report service</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/173.pdf</url>
       <bibkey>van-den-heuvel-etal-2002-give</bibkey>
     </paper>
     <paper id="174">
       <author><first>Vladimir</first><last>Hozjan</last></author>
-      <author><first>Zdravko</first><last>Kacic</last></author>
-      <author><first>Asunción</first><last>Moreno</last></author>
-      <author><first>Antonio</first><last>Bonafonte</last></author>
-      <author><first>Albino</first><last>Nogueiras</last></author>
+      <author id="zdravko-kacic"><first>Zdravko</first><last>Kacic</last></author>
+      <author id="asuncion-moreno"><first>Asunción</first><last>Moreno</last></author>
+      <author id="antonio-bonafonte"><first>Antonio</first><last>Bonafonte</last></author>
+      <author id="albino-nogueiras"><first>Albino</first><last>Nogueiras</last></author>
       <title>Interface Databases: Design and Collection of a Multilingual Emotional Speech Database</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/174.pdf</url>
       <bibkey>hozjan-etal-2002-interface</bibkey>
     </paper>
     <paper id="175">
       <author><first>Vladimir</first><last>Hozjan</last></author>
-      <author><first>Zdravko</first><last>Kacic</last></author>
+      <author id="zdravko-kacic"><first>Zdravko</first><last>Kacic</last></author>
       <title>Objective analysis of emotional speech for <fixed-case>E</fixed-case>nglish and <fixed-case>S</fixed-case>lovenian Interface emotional speech databases</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/175.pdf</url>
       <bibkey>hozjan-kacic-2002-objective</bibkey>
     </paper>
     <paper id="176">
       <author><first>Konstantin</first><last>Biatov</last></author>
-      <author><first>Joachim</first><last>Köhler</last></author>
+      <author id="joachim-kohler"><first>Joachim</first><last>Köhler</last></author>
       <title>Methods and Tools for Speech Data Acquisition exploiting a Database of <fixed-case>G</fixed-case>erman Parliamentary Speeches and Transcripts from the <fixed-case>I</fixed-case>nternet</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/176.pdf</url>
       <bibkey>biatov-kohler-2002-methods</bibkey>
@@ -1433,7 +1433,7 @@
       <author><first>Dorota</first><last>Iskra</last></author>
       <author><first>Beate</first><last>Grosskopf</last></author>
       <author><first>Krzysztof</first><last>Marasek</last></author>
-      <author><first>Henk</first><last>van den Heuvel</last></author>
+      <author id="henk-van-den-heuvel"><first>Henk</first><last>van den Heuvel</last></author>
       <author><first>Frank</first><last>Diehl</last></author>
       <author><first>Andreas</first><last>Kiessling</last></author>
       <title><fixed-case>SPEECON</fixed-case> – Speech Databases for Consumer Devices: Database Specification and Validation</title>
@@ -1445,7 +1445,7 @@
       <author><first>Michael</first><last>Hess</last></author>
       <author><first>Neeme</first><last>Kahusk</last></author>
       <author><first>Kaarel</first><last>Kaljurand</last></author>
-      <author><first>Mare</first><last>Koit</last></author>
+      <author id="mare-koit"><first>Mare</first><last>Koit</last></author>
       <author><first>Fabio</first><last>Rinaldi</last></author>
       <author><first>Kadri</first><last>Vider</last></author>
       <title>Technical Terminology as a Critical Resource</title>
@@ -1462,23 +1462,23 @@
     <paper id="180">
       <author><first>Rickard</first><last>Domeij</last></author>
       <author><first>Ola</first><last>Knutsson</last></author>
-      <author><first>Kerstin Severinson</first><last>Eklundh</last></author>
+      <author id="kerstin-severinson-eklundh"><first>Kerstin Severinson</first><last>Eklundh</last></author>
       <title>Different Ways of Evaluating a <fixed-case>S</fixed-case>wedish Grammar Checker</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/180.pdf</url>
       <bibkey>domeij-etal-2002-different</bibkey>
     </paper>
     <paper id="181">
-      <author><first>Antonio</first><last>Moreno Ortiz</last></author>
+      <author id="antonio-moreno-ortiz"><first>Antonio</first><last>Moreno Ortiz</last></author>
       <author><first>Victor</first><last>Raskin</last></author>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <title>New Developments in Ontological Semantics</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/181.pdf</url>
       <bibkey>moreno-ortiz-etal-2002-new</bibkey>
     </paper>
     <paper id="182">
-      <author><first>Amalia</first><last>Todirascu</last></author>
+      <author id="amalia-todirascu"><first>Amalia</first><last>Todirascu</last></author>
       <author><first>Eric</first><last>Kow</last></author>
-      <author><first>Laurent</first><last>Romary</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
       <title>Towards Reusable <fixed-case>NLP</fixed-case> Components</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/182.pdf</url>
       <bibkey>todirascu-etal-2002-towards</bibkey>
@@ -1486,7 +1486,7 @@
     <paper id="183">
       <author><first>Judita</first><last>Preiss</last></author>
       <author><first>Anna</first><last>Korhonen</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <title>Subcategorization Acquisition as an Evaluation Method for <fixed-case>WSD</fixed-case></title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/183.pdf</url>
       <bibkey>preiss-etal-2002-subcategorization</bibkey>
@@ -1508,7 +1508,7 @@
     </paper>
     <paper id="186">
       <author><first>Matej</first><last>Rojc</last></author>
-      <author><first>Zdravko</first><last>Kačič</last></author>
+      <author id="zdravko-kacic"><first>Zdravko</first><last>Kačič</last></author>
       <author><first>Darinka</first><last>Verdonik</last></author>
       <title>Design and Implementation of the <fixed-case>S</fixed-case>lovenian Phonetic and Morphology Lexicons for the Use in Spoken Language Applications</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/186.pdf</url>
@@ -1522,8 +1522,8 @@
       <bibkey>hathout-tanguy-2002-webaffix</bibkey>
     </paper>
     <paper id="188">
-      <author><first>Natalia V.</first><last>Loukachevitch</last></author>
-      <author><first>Boris V.</first><last>Dobrov</last></author>
+      <author id="natalia-loukachevitch"><first>Natalia V.</first><last>Loukachevitch</last></author>
+      <author id="boris-v-dobrov"><first>Boris V.</first><last>Dobrov</last></author>
       <title>Evaluation of Thesaurus on Sociopolitical Life as Information-Retrieval Tool</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/188.pdf</url>
       <bibkey>loukachevitch-dobrov-2002-evaluation</bibkey>
@@ -1547,7 +1547,7 @@
       <author><first>Ryuichi</first><last>Yoneda</last></author>
       <author><first>Akiko</first><last>Yamashita</last></author>
       <author><first>Yasuharu</first><last>Den</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <title>Use of <fixed-case>XML</fixed-case> and Relational Databases for Consistent Development and Maintenance of Lexicons and Annotated Corpora</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/191.pdf</url>
       <bibkey>asahara-etal-2002-use</bibkey>
@@ -1581,7 +1581,7 @@
       <bibkey>rio-2002-compiling</bibkey>
     </paper>
     <paper id="196">
-      <author><first>Thierry</first><last>Hamon</last></author>
+      <author id="thierry-hamon"><first>Thierry</first><last>Hamon</last></author>
       <author><first>Olivier</first><last>Hû</last></author>
       <title>How to evaluate necessary cooperative systems of terminology building?</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/196.pdf</url>
@@ -1594,15 +1594,15 @@
       <author><first>Elisabetta</first><last>Guazzini</last></author>
       <author><first>Stefano</first><last>Molino</last></author>
       <author><first>Marisa</first><last>Ulivieri</last></author>
-      <author><first>Nicoletta</first><last>Calzolari</last></author>
-      <author><first>Antonio</first><last>Zampolli</last></author>
+      <author id="nicoletta-calzolari"><first>Nicoletta</first><last>Calzolari</last></author>
+      <author id="antonio-zampolli"><first>Antonio</first><last>Zampolli</last></author>
       <title><fixed-case>CLIPS</fixed-case>, a Multi-level <fixed-case>I</fixed-case>talian Computational Lexicon: a Glimpse to Data</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/197.pdf</url>
       <bibkey>ruimy-etal-2002-clips</bibkey>
     </paper>
     <paper id="198">
       <author><first>Susanne</first><last>Salmon-Alt</last></author>
-      <author><first>Renata</first><last>Vieira</last></author>
+      <author id="renata-vieira"><first>Renata</first><last>Vieira</last></author>
       <title>Nominal Expressions in Multilingual Corpora: Definites and Demonstratives</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/198.pdf</url>
       <bibkey>salmon-alt-vieira-2002-nominal</bibkey>
@@ -1610,7 +1610,7 @@
     <paper id="199">
       <author><first>Jerker</first><last>Järborg</last></author>
       <author><first>Dimitrios</first><last>Kokkinakis</last></author>
-      <author><first>Maria Toporowska</first><last>Gronostaj</last></author>
+      <author id="maria-toporowska-gronostaj"><first>Maria Toporowska</first><last>Gronostaj</last></author>
       <title>Lexical and Textual Resources for Sense Recognition and Description</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/199.pdf</url>
       <bibkey>jarborg-etal-2002-lexical</bibkey>
@@ -1649,56 +1649,56 @@
       <bibkey>eguchi-etal-2002-sensitivity</bibkey>
     </paper>
     <paper id="204">
-      <author><first>Brian</first><last>Mitchell</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="brian-mitchell"><first>Brian</first><last>Mitchell</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <title>A Comparison of Machine Learning Algorithms for Prepositional Phrase Attachment</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/204.pdf</url>
       <bibkey>mitchell-gaizauskas-2002-comparison</bibkey>
     </paper>
     <paper id="205">
       <author><first>Dan</first><last>Cristea</last></author>
-      <author><first>Oana-Diana</first><last>Postolache</last></author>
+      <author id="oana-postolache"><first>Oana-Diana</first><last>Postolache</last></author>
       <author><first>Gabriela-Eugenia</first><last>Dima</last></author>
-      <author><first>Cătălina</first><last>Barbu</last></author>
+      <author id="catalina-barbu"><first>Cătălina</first><last>Barbu</last></author>
       <title><fixed-case>AR</fixed-case>-Engine - a framework for unrestricted co-reference resolution</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/205.pdf</url>
       <bibkey>cristea-etal-2002-ar</bibkey>
     </paper>
     <paper id="206">
-      <author><first>Cătălina</first><last>Barbu</last></author>
-      <author><first>Richard</first><last>Evans</last></author>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="catalina-barbu"><first>Cătălina</first><last>Barbu</last></author>
+      <author id="richard-evans"><first>Richard</first><last>Evans</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <title>A corpus based investigation of morphological disagreement in anaphoric relations</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/206.pdf</url>
       <bibkey>barbu-etal-2002-corpus</bibkey>
     </paper>
     <paper id="207">
-      <author><first>Cătălina</first><last>Barbu</last></author>
+      <author id="catalina-barbu"><first>Cătălina</first><last>Barbu</last></author>
       <title>Error analysis in anaphora resolution</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/207.pdf</url>
       <bibkey>barbu-2002-error</bibkey>
     </paper>
     <paper id="208">
       <author><first>Jean-Yves</first><last>Antoine</last></author>
-      <author><first>Caroline</first><last>Bousquet-Vernhettes</last></author>
-      <author><first>Jérôme</first><last>Goulian</last></author>
-      <author><first>Mohamed Zakaria</first><last>Kurdi</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="caroline-bousquet-vernhettes"><first>Caroline</first><last>Bousquet-Vernhettes</last></author>
+      <author id="jerome-goulian"><first>Jérôme</first><last>Goulian</last></author>
+      <author id="mohamed-zakaria-kurdi"><first>Mohamed Zakaria</first><last>Kurdi</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <author><first>Nadine</first><last>Vigouroux</last></author>
-      <author><first>Jeanne</first><last>Villaneau</last></author>
+      <author id="jeanne-villaneau"><first>Jeanne</first><last>Villaneau</last></author>
       <title>Predictive and objective evaluation of speech understanding: the “challenge” evaluation campaign of the I3 speech workgroup of the <fixed-case>F</fixed-case>rench <fixed-case>CNRS</fixed-case></title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/208.pdf</url>
       <bibkey>antoine-etal-2002-predictive</bibkey>
     </paper>
     <paper id="209">
-      <author><first>Michael</first><last>Moortgat</last></author>
+      <author id="michael-moortgat"><first>Michael</first><last>Moortgat</last></author>
       <author><first>Richard</first><last>Moot</last></author>
       <title>Using the Spoken <fixed-case>D</fixed-case>utch Corpus for type-logical grammar induction</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/209.pdf</url>
       <bibkey>moortgat-moot-2002-using</bibkey>
     </paper>
     <paper id="210">
-      <author><first>Bolette S.</first><last>Pedersen</last></author>
+      <author id="bolette-sandford-pedersen"><first>Bolette S.</first><last>Pedersen</last></author>
       <author><first>Patrizia</first><last>Paggio</last></author>
       <title>Semantic Lexical Resources Applied to Content-based Querying - the <fixed-case>O</fixed-case>nto<fixed-case>Q</fixed-case>uery Project</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/210.pdf</url>
@@ -1709,13 +1709,13 @@
       <author><first>Vangelis</first><last>Karkaletsis</last></author>
       <author><first>Georgios</first><last>Paliouras</last></author>
       <author><first>Ion</first><last>Androutsopoulos</last></author>
-      <author><first>Constantine D.</first><last>Spyropoulos</last></author>
+      <author id="constantine-d-spyropoulos"><first>Constantine D.</first><last>Spyropoulos</last></author>
       <title><fixed-case>E</fixed-case>llogon: A New Text Engineering Platform</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/211.pdf</url>
       <bibkey>petasis-etal-2002-ellogon</bibkey>
     </paper>
     <paper id="212">
-      <author><first>Antonio S.</first><last>Valderrábanos</last></author>
+      <author id="antonio-s-valderrabanos"><first>Antonio S.</first><last>Valderrábanos</last></author>
       <author><first>Alexander</first><last>Belskis</last></author>
       <author><first>Luis Iraola</first><last>Moreno</last></author>
       <title>Multilingual Terminology Extraction and Validation</title>
@@ -1723,15 +1723,15 @@
       <bibkey>valderrabanos-etal-2002-multilingual</bibkey>
     </paper>
     <paper id="213">
-      <author><first>Laila</first><last>Dybkjær</last></author>
-      <author><first>Niels Ole</first><last>Bernsen</last></author>
+      <author id="laila-dybkjaer"><first>Laila</first><last>Dybkjær</last></author>
+      <author id="niels-ole-bernsen"><first>Niels Ole</first><last>Bernsen</last></author>
       <title>Natural Interactivity Resources – Data, Annotation Schemes and Tools</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/213.pdf</url>
       <bibkey>dybkjaer-bernsen-2002-natural</bibkey>
     </paper>
     <paper id="214">
-      <author><first>Niels Ole</first><last>Bernsen</last></author>
-      <author><first>Laila</first><last>Dybkjær</last></author>
+      <author id="niels-ole-bernsen"><first>Niels Ole</first><last>Bernsen</last></author>
+      <author id="laila-dybkjaer"><first>Laila</first><last>Dybkjær</last></author>
       <author><first>Mykola</first><last>Kolodnytsky</last></author>
       <title><fixed-case>THE</fixed-case> <fixed-case>NITE</fixed-case> <fixed-case>WORKBENCH</fixed-case>. A Tool for Annotation of Natural Interactivity and Multimodal Data</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/214.pdf</url>
@@ -1739,9 +1739,9 @@
     </paper>
     <paper id="215">
       <author><first>Valentin</first><last>Tablan</last></author>
-      <author><first>Cristian</first><last>Ursu</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
-      <author><first>Hamish</first><last>Cunningham</last></author>
+      <author id="cristian-ursu"><first>Cristian</first><last>Ursu</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="hamish-cunningham"><first>Hamish</first><last>Cunningham</last></author>
       <author><first>Diana</first><last>Maynard</last></author>
       <author><first>Oana</first><last>Hamza</last></author>
       <author><first>Tony</first><last>McEnery</last></author>
@@ -1756,30 +1756,30 @@
       <author><first>Vangelis</first><last>Karkaletsis</last></author>
       <author><first>Ioannis</first><last>Koutsias</last></author>
       <author><first>George</first><last>Petasis</last></author>
-      <author><first>Constantine D.</first><last>Spyropoulos</last></author>
+      <author id="constantine-d-spyropoulos"><first>Constantine D.</first><last>Spyropoulos</last></author>
       <title><fixed-case>P</fixed-case>at<fixed-case>E</fixed-case>dit: An Information Extraction Pattern Editor for Fast System Customization</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/216.pdf</url>
       <bibkey>farmakiotou-etal-2002-patedit</bibkey>
     </paper>
     <paper id="217">
-      <author><first>Tamás</first><last>Váradi</last></author>
+      <author id="tamas-varadi"><first>Tamás</first><last>Váradi</last></author>
       <title>The <fixed-case>H</fixed-case>ungarian National Corpus</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/217.pdf</url>
       <bibkey>varadi-2002-hungarian</bibkey>
     </paper>
     <paper id="218">
-      <author><first>Paul</first><last>Clough</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="paul-clough"><first>Paul</first><last>Clough</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <author id="scott-s-l-piao"><first>S. L.</first><last>Piao</last></author>
       <title>Building and annotating a corpus for the study of journalistic text reuse</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/218.pdf</url>
       <bibkey>clough-etal-2002-building</bibkey>
     </paper>
     <paper id="219">
-      <author><first>Hennie</first><last>Brugman</last></author>
+      <author id="hennie-brugman"><first>Hennie</first><last>Brugman</last></author>
       <author><first>Harriet</first><last>Spenke</last></author>
       <author><first>Markus</first><last>Kramer</last></author>
-      <author><first>Alexander</first><last>Klassmann</last></author>
+      <author id="alex-klassmann"><first>Alexander</first><last>Klassmann</last></author>
       <title>Multimedia Annotation with Multilingual Input Methods and Search Support</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/219.pdf</url>
       <bibkey>brugman-etal-2002-multimedia</bibkey>
@@ -1818,7 +1818,7 @@
       <bibkey>wittenburg-etal-2002-multimodal</bibkey>
     </paper>
     <paper id="224">
-      <author><first>Daan</first><last>Broeder</last></author>
+      <author id="daan-broeder"><first>Daan</first><last>Broeder</last></author>
       <author><first>Freddy</first><last>Offenga</last></author>
       <author><first>Don</first><last>Willems</last></author>
       <title>Metadata Tools Supporting Controlled Vocabulary Services</title>
@@ -1826,10 +1826,10 @@
       <bibkey>broeder-etal-2002-metadata</bibkey>
     </paper>
     <paper id="225">
-      <author><first>Daan</first><last>Broeder</last></author>
-      <author><first>Peter</first><last>Wittenburg</last></author>
+      <author id="daan-broeder"><first>Daan</first><last>Broeder</last></author>
+      <author id="peter-wittenburg"><first>Peter</first><last>Wittenburg</last></author>
       <author><first>Thierry</first><last>Declerck</last></author>
-      <author><first>Laurent</first><last>Romary</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
       <title><fixed-case>LREP</fixed-case>: A Language Repository Exchange Protocol</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/225.pdf</url>
       <bibkey>broeder-etal-2002-lrep</bibkey>
@@ -1848,9 +1848,9 @@
       <bibkey>schmitz-2002-subject</bibkey>
     </paper>
     <paper id="228">
-      <author><first>Steve</first><last>Whittaker</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
-      <author><first>Johanna</first><last>Moore</last></author>
+      <author id="steve-whittaker"><first>Steve</first><last>Whittaker</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
+      <author id="johanna-d-moore"><first>Johanna</first><last>Moore</last></author>
       <title>Fish or Fowl:A <fixed-case>W</fixed-case>izard of <fixed-case>O</fixed-case>z Evaluation of Dialogue Strategies in the Restaurant Domain</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/228.pdf</url>
       <bibkey>whittaker-etal-2002-fish</bibkey>
@@ -1858,7 +1858,7 @@
     <paper id="229">
       <author><first>Adriana</first><last>Roventini</last></author>
       <author><first>Marisa</first><last>Ulivieri</last></author>
-      <author><first>Nicoletta</first><last>Calzolari</last></author>
+      <author id="nicoletta-calzolari"><first>Nicoletta</first><last>Calzolari</last></author>
       <title>Integrating Two Semantic Lexicons, <fixed-case>SIMPLE</fixed-case> and <fixed-case>I</fixed-case>tal<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et: What Can We Gain?</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/229.pdf</url>
       <bibkey>roventini-etal-2002-integrating</bibkey>
@@ -1871,14 +1871,14 @@
       <bibkey>marinelli-roventini-2002-proper</bibkey>
     </paper>
     <paper id="231">
-      <author><first>Leonardo</first><last>Lesmo</last></author>
+      <author id="leonardo-lesmo"><first>Leonardo</first><last>Lesmo</last></author>
       <author><first>Vincenzo</first><last>Lombardo</last></author>
       <title>Transformed Subcategorization Frames in Chunk Parsing</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/231.pdf</url>
       <bibkey>lesmo-lombardo-2002-transformed</bibkey>
     </paper>
     <paper id="232">
-      <author><first>Gabriela</first><last>Cavaglià</last></author>
+      <author id="gabriela-cavaglia"><first>Gabriela</first><last>Cavaglià</last></author>
       <title>Measuring corpus homogeneity using a range of measures for inter-document distance</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/232.pdf</url>
       <bibkey>cavaglia-2002-measuring</bibkey>
@@ -1891,7 +1891,7 @@
       <author><first>Dimitra</first><last>Farmakiotou</last></author>
       <author><first>Georgios</first><last>Samaritakis</last></author>
       <author><first>Georgios</first><last>Petasis</last></author>
-      <author><first>Maria Teresa</first><last>Pazienza</last></author>
+      <author id="maria-teresa-pazienza"><first>Maria Teresa</first><last>Pazienza</last></author>
       <author><first>Michele</first><last>Vindigni</last></author>
       <author><first>Frantz</first><last>Vichot</last></author>
       <author><first>Francis</first><last>Wolinski</last></author>
@@ -1900,7 +1900,7 @@
       <bibkey>grover-etal-2002-multilingual</bibkey>
     </paper>
     <paper id="234">
-      <author><first>Janienke</first><last>Sturm</last></author>
+      <author id="janienke-sturm"><first>Janienke</first><last>Sturm</last></author>
       <author><first>Ilse</first><last>Bakx</last></author>
       <author><first>Bert</first><last>Cranen</last></author>
       <author><first>Jacques</first><last>Terken</last></author>
@@ -1913,18 +1913,18 @@
       <author><first>Katerina</first><last>Pastra</last></author>
       <author><first>Diana</first><last>Maynard</last></author>
       <author><first>Oana</first><last>Hamza</last></author>
-      <author><first>Hamish</first><last>Cunningham</last></author>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="hamish-cunningham"><first>Hamish</first><last>Cunningham</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <title>How feasible is the reuse of grammars for Named Entity Recognition?</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/235.pdf</url>
       <bibkey>pastra-etal-2002-feasible</bibkey>
     </paper>
     <paper id="236">
       <author><first>Claudia</first><last>Soria</last></author>
-      <author><first>Niels Ole</first><last>Bernsen</last></author>
+      <author id="niels-ole-bernsen"><first>Niels Ole</first><last>Bernsen</last></author>
       <author><first>Niels</first><last>Cadée</last></author>
       <author><first>Jean</first><last>Carletta</last></author>
-      <author><first>Laila</first><last>Dybkjær</last></author>
+      <author id="laila-dybkjaer"><first>Laila</first><last>Dybkjær</last></author>
       <author><first>Stefan</first><last>Evert</last></author>
       <author><first>Ulrich</first><last>Heid</last></author>
       <author><first>Amy</first><last>Isard</last></author>
@@ -1950,23 +1950,23 @@
     </paper>
     <paper id="238">
       <author><first>Jason</first><last>Baldridge</last></author>
-      <author><first>John</first><last>Dowding</last></author>
-      <author><first>Susana</first><last>Early</last></author>
+      <author id="john-dowding"><first>John</first><last>Dowding</last></author>
+      <author id="susana-early"><first>Susana</first><last>Early</last></author>
       <title><fixed-case>L</fixed-case>eo: an Architecture for Sharing Resources for Unification-Based Grammars</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/238.pdf</url>
       <bibkey>baldridge-etal-2002-leo</bibkey>
     </paper>
     <paper id="239">
-      <author><first>Irena</first><last>Spasić</last></author>
-      <author><first>Goran</first><last>Nenadić</last></author>
+      <author id="irena-spasic"><first>Irena</first><last>Spasić</last></author>
+      <author id="goran-nenadic"><first>Goran</first><last>Nenadić</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
       <title>Tuning Context Features with Genetic Algorithms</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/239.pdf</url>
       <bibkey>spasic-etal-2002-tuning</bibkey>
     </paper>
     <paper id="240">
-      <author><first>Goran</first><last>Nenadić</last></author>
-      <author><first>Irena</first><last>Spasić</last></author>
+      <author id="goran-nenadic"><first>Goran</first><last>Nenadić</last></author>
+      <author id="irena-spasic"><first>Irena</first><last>Spasić</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
       <title>Automatic Acronym Acquisition and Term Variation Management within Domain-Specific Texts</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/240.pdf</url>
@@ -1979,9 +1979,9 @@
       <bibkey>nimb-2002-adverbs</bibkey>
     </paper>
     <paper id="242">
-      <author><first>Anna Sågvall</first><last>Hein</last></author>
+      <author id="anna-sagvall-hein"><first>Anna Sågvall</first><last>Hein</last></author>
       <author><first>Eva</first><last>Forsbom</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <author><first>Per</first><last>Weijnitz</last></author>
       <author><first>Ingrid</first><last>Almqvist</last></author>
       <author><first>Leif-Jöran</first><last>Olsson</last></author>
@@ -1992,7 +1992,7 @@
     </paper>
     <paper id="243">
       <author><first>Xavier</first><last>Carreras</last></author>
-      <author><first>Lluís</first><last>Padró</last></author>
+      <author id="lluis-padro"><first>Lluís</first><last>Padró</last></author>
       <title>A Flexible Distributed Architecture for Natural Language Analyzers</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/243.pdf</url>
       <bibkey>carreras-padro-2002-flexible</bibkey>
@@ -2008,8 +2008,8 @@
       <bibkey>picchi-etal-2002-italian</bibkey>
     </paper>
     <paper id="245">
-      <author><first>Christopher</first><last>Cieri</last></author>
-      <author><first>Mark</first><last>Liberman</last></author>
+      <author id="christopher-cieri"><first>Christopher</first><last>Cieri</last></author>
+      <author id="mark-liberman"><first>Mark</first><last>Liberman</last></author>
       <title>Language Resource Creation and Distribution at the <fixed-case>L</fixed-case>inguistic <fixed-case>D</fixed-case>ata <fixed-case>C</fixed-case>onsortium: A Progress Report</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/245.pdf</url>
       <bibkey>cieri-liberman-2002-language</bibkey>
@@ -2022,7 +2022,7 @@
       <bibkey>sassen-gibbon-2002-enhanced</bibkey>
     </paper>
     <paper id="247">
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <title><fixed-case>M</fixed-case>ats<fixed-case>L</fixed-case>ex - a Multilingual Lexical Database for Machine Translation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/247.pdf</url>
       <bibkey>tiedemann-2002-matslex</bibkey>
@@ -2040,16 +2040,16 @@
       <bibkey>pfitzinger-2002-reducing</bibkey>
     </paper>
     <paper id="250">
-      <author><first>Ted</first><last>Briscoe</last></author>
-      <author><first>John</first><last>Carroll</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
       <title>Robust Accurate Statistical Annotation of General Text</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/250.pdf</url>
       <bibkey>briscoe-carroll-2002-robust</bibkey>
     </paper>
     <paper id="251">
-      <author><first>Catia</first><last>Cucchiarini</last></author>
-      <author><first>Elisabeth</first><last>D’Halleweyn</last></author>
-      <author><first>Lisanne</first><last>Teunissen</last></author>
+      <author id="catia-cucchiarini"><first>Catia</first><last>Cucchiarini</last></author>
+      <author id="elisabeth-dhalleweyn"><first>Elisabeth</first><last>D’Halleweyn</last></author>
+      <author id="lisanne-teunissen"><first>Lisanne</first><last>Teunissen</last></author>
       <title>A Human Language Technologies Platform for the <fixed-case>D</fixed-case>utch language: awareness, management maintenance and distribution</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/251.pdf</url>
       <bibkey>cucchiarini-etal-2002-human</bibkey>
@@ -2066,8 +2066,8 @@
       <bibkey>binnenpoorte-etal-2002-field</bibkey>
     </paper>
     <paper id="253">
-      <author><first>Ana M.</first><last>García-Serrano</last></author>
-      <author><first>Luis</first><last>Rodrigo-Aguado</last></author>
+      <author id="ana-garcia-serrano"><first>Ana M.</first><last>García-Serrano</last></author>
+      <author id="luis-rodrigo-aguado"><first>Luis</first><last>Rodrigo-Aguado</last></author>
       <author><first>Javier</first><last>Calle</last></author>
       <title>Natural Language Dialogue in a Virtual Assistant Interface</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/253.pdf</url>
@@ -2083,7 +2083,7 @@
     </paper>
     <paper id="255">
       <author><first>Dieter</first><last>Maas</last></author>
-      <author><first>Rita</first><last>Nuebel</last></author>
+      <author id="rita-nuebel"><first>Rita</first><last>Nuebel</last></author>
       <author><first>Catherine</first><last>Pease</last></author>
       <author><first>Paul</first><last>Schmidt</last></author>
       <title>Bilingual Indexing for Information Retrieval with <fixed-case>AUTINDEX</fixed-case></title>
@@ -2091,45 +2091,45 @@
       <bibkey>maas-etal-2002-bilingual</bibkey>
     </paper>
     <paper id="256">
-      <author><first>Michael</first><last>Rosner</last></author>
+      <author id="michael-rosner"><first>Michael</first><last>Rosner</last></author>
       <title>The Future of Maltilex</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/256.pdf</url>
       <bibkey>rosner-2002-future</bibkey>
     </paper>
     <paper id="257">
-      <author><first>Nicoletta</first><last>Calzolari</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="nicoletta-calzolari"><first>Nicoletta</first><last>Calzolari</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <title>Standards &amp; best practice for multilingual computational lexicons: <fixed-case>ISLE</fixed-case> <fixed-case>MILE</fixed-case> and more”</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/257.pdf</url>
       <bibkey>calzolari-etal-2002-standards</bibkey>
     </paper>
     <paper id="258">
       <author><first>Sue</first><last>Atkins</last></author>
-      <author><first>Nuria</first><last>Bel</last></author>
+      <author id="nuria-bel"><first>Nuria</first><last>Bel</last></author>
       <author><first>Francesca</first><last>Bertagna</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
-      <author><first>Nicoletta</first><last>Calzolari</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="nicoletta-calzolari"><first>Nicoletta</first><last>Calzolari</last></author>
       <author><first>Christiane</first><last>Fellbaum</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <author><first>Alessandro</first><last>Lenci</last></author>
-      <author><first>Catherine</first><last>MacLeod</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="catherine-macleod"><first>Catherine</first><last>MacLeod</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Gregor</first><last>Thurmair</last></author>
       <author><first>Marta</first><last>Villegas</last></author>
-      <author><first>Antonio</first><last>Zampolli</last></author>
+      <author id="antonio-zampolli"><first>Antonio</first><last>Zampolli</last></author>
       <title>From Resources to Applications. Designing the Multilingual <fixed-case>ISLE</fixed-case> Lexical Entry</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/258.pdf</url>
       <bibkey>atkins-etal-2002-resources</bibkey>
     </paper>
     <paper id="259">
-      <author><first>Nicoletta</first><last>Calzolari</last></author>
-      <author><first>Charles J.</first><last>Fillmore</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
-      <author><first>Nancy</first><last>Ide</last></author>
+      <author id="nicoletta-calzolari"><first>Nicoletta</first><last>Calzolari</last></author>
+      <author id="charles-j-fillmore"><first>Charles J.</first><last>Fillmore</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
       <author><first>Alessandro</first><last>Lenci</last></author>
-      <author><first>Catherine</first><last>MacLeod</last></author>
-      <author><first>Antonio</first><last>Zampolli</last></author>
+      <author id="catherine-macleod"><first>Catherine</first><last>MacLeod</last></author>
+      <author id="antonio-zampolli"><first>Antonio</first><last>Zampolli</last></author>
       <title>Towards Best Practice for Multiword Expressions in Computational Lexicons</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/259.pdf</url>
       <bibkey>calzolari-etal-2002-towards</bibkey>
@@ -2137,12 +2137,12 @@
     <paper id="260">
       <author><first>Alessandro</first><last>Lenci</last></author>
       <author><first>Roberto</first><last>Bartolini</last></author>
-      <author><first>Nicoletta</first><last>Calzolari</last></author>
+      <author id="nicoletta-calzolari"><first>Nicoletta</first><last>Calzolari</last></author>
       <author><first>Ana</first><last>Agua</last></author>
       <author><first>Stephan</first><last>Busemann</last></author>
       <author><first>Emmanuel</first><last>Cartier</last></author>
       <author><first>Karine</first><last>Chevreau</last></author>
-      <author><first>José</first><last>Coch</last></author>
+      <author id="jose-coch"><first>José</first><last>Coch</last></author>
       <title>Multilingual Summarization by Integrating Linguistic Resources in the <fixed-case>MLIS</fixed-case>-<fixed-case>MUSI</fixed-case> Project</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/260.pdf</url>
       <bibkey>lenci-etal-2002-multilingual</bibkey>
@@ -2154,9 +2154,9 @@
       <bibkey>braasch-2002-current</bibkey>
     </paper>
     <paper id="262">
-      <author><first>Robert E.</first><last>Frederking</last></author>
-      <author><first>Alan W.</first><last>Black</last></author>
-      <author><first>Ralf D.</first><last>Brown</last></author>
+      <author id="robert-frederking"><first>Robert E.</first><last>Frederking</last></author>
+      <author id="alan-w-black"><first>Alan W.</first><last>Black</last></author>
+      <author id="ralf-d-brown"><first>Ralf D.</first><last>Brown</last></author>
       <author><first>John</first><last>Moody</last></author>
       <author><first>Eric</first><last>Steinbrecher</last></author>
       <title>Field Testing the Tongues Speech-to-Speech Machine Translation System</title>
@@ -2165,7 +2165,7 @@
     </paper>
     <paper id="263">
       <author><first>Julia</first><last>Hockenmaier</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <title>Acquiring Compact Lexicalized Grammars from a Cleaner Treebank</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/263.pdf</url>
       <bibkey>hockenmaier-steedman-2002-acquiring</bibkey>
@@ -2180,7 +2180,7 @@
     </paper>
     <paper id="265">
       <author><first>Hélène</first><last>François</last></author>
-      <author><first>Olivier</first><last>Boëffard</last></author>
+      <author id="olivier-boeffard"><first>Olivier</first><last>Boëffard</last></author>
       <title>The Greedy Algorithm and its Application to the Construction of a Continuous Speech Database</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/265.pdf</url>
       <bibkey>francois-boeffard-2002-greedy</bibkey>
@@ -2217,14 +2217,14 @@
       <bibkey>cassan-etal-2002-step</bibkey>
     </paper>
     <paper id="269">
-      <author><first>Asunción</first><last>Moreno</last></author>
+      <author id="asuncion-moreno"><first>Asunción</first><last>Moreno</last></author>
       <author><first>Oren</first><last>Gedge</last></author>
-      <author><first>Henk</first><last>van den Heuvel</last></author>
-      <author><first>Harald</first><last>Höge</last></author>
+      <author id="henk-van-den-heuvel"><first>Henk</first><last>van den Heuvel</last></author>
+      <author id="harald-hoge"><first>Harald</first><last>Höge</last></author>
       <author><first>Sabine</first><last>Horbach</last></author>
       <author><first>Patricia</first><last>Martin</last></author>
       <author><first>Elisabeth</first><last>Pinto</last></author>
-      <author><first>Antonio</first><last>Rincón</last></author>
+      <author id="antonio-rincon"><first>Antonio</first><last>Rincón</last></author>
       <author><first>Franco</first><last>Senia</last></author>
       <author><first>Rafid</first><last>Sukkar</last></author>
       <title><fixed-case>S</fixed-case>peech<fixed-case>D</fixed-case>at across all <fixed-case>A</fixed-case>merica: <fixed-case>SALA</fixed-case> <fixed-case>II</fixed-case></title>
@@ -2248,7 +2248,7 @@
     <paper id="272">
       <author><first>Nordine</first><last>Fourour</last></author>
       <author><first>Emmanuel</first><last>Morin</last></author>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <title>Incremental Recognition and Referential Categorization of <fixed-case>F</fixed-case>rench Proper Names</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/272.pdf</url>
       <bibkey>fourour-etal-2002-incremental</bibkey>
@@ -2263,8 +2263,8 @@
       <bibkey>matsubara-etal-2002-bilingual</bibkey>
     </paper>
     <paper id="274">
-      <author><first>Marcela</first><last>Charfuelán</last></author>
-      <author><first>Luis Hernández</first><last>Gómez</last></author>
+      <author id="marcela-charfuelan"><first>Marcela</first><last>Charfuelán</last></author>
+      <author id="luis-hernandez"><first>Luis Hernández</first><last>Gómez</last></author>
       <author><first>Cristina Esteban</first><last>López</last></author>
       <author><first>Holmer</first><last>Hemsen</last></author>
       <title>A <fixed-case>XML</fixed-case>-based tool for evaluation of <fixed-case>SLDS</fixed-case></title>
@@ -2280,7 +2280,7 @@
       <bibkey>lopez-de-ipina-etal-2002-automatic</bibkey>
     </paper>
     <paper id="276">
-      <author><first>Richard F. E.</first><last>Sutcliffe</last></author>
+      <author id="richard-f-e-sutcliffe"><first>Richard F. E.</first><last>Sutcliffe</last></author>
       <author><first>Kieran</first><last>White</last></author>
       <title>Searching via Keywords or Concept Hierarchies - Which is Better?</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/276.pdf</url>
@@ -2288,16 +2288,16 @@
     </paper>
     <paper id="277">
       <author><first>Juliana Galvani</first><last>Greghi</last></author>
-      <author><first>Ronaldo Teixeira</first><last>Martins</last></author>
-      <author><first>Maria</first><last>das Graças Volpe Nunes</last></author>
+      <author id="ronaldo-teixeira-martins"><first>Ronaldo Teixeira</first><last>Martins</last></author>
+      <author id="maria-das-gracas-volpe-nunes"><first>Maria</first><last>das Graças Volpe Nunes</last></author>
       <title><fixed-case>DIADORIM</fixed-case> - A Lexical Database for <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/277.pdf</url>
       <bibkey>greghi-etal-2002-diadorim</bibkey>
     </paper>
     <paper id="278">
       <author><first>Mónica</first><last>Caballero</last></author>
-      <author><first>José B.</first><last>Mariño</last></author>
-      <author><first>Asunción</first><last>Moreno</last></author>
+      <author id="jose-b-marino"><first>José B.</first><last>Mariño</last></author>
+      <author id="asuncion-moreno"><first>Asunción</first><last>Moreno</last></author>
       <title>Multidialectal <fixed-case>S</fixed-case>panish Modeling for <fixed-case>ASR</fixed-case></title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/278.pdf</url>
       <bibkey>caballero-etal-2002-multidialectal</bibkey>
@@ -2327,16 +2327,16 @@
       <bibkey>uibo-2002-experimental</bibkey>
     </paper>
     <paper id="282">
-      <author><first>Marianne</first><last>Dabbadie</last></author>
-      <author><first>Widad Mustafa El</first><last>Hadi</last></author>
-      <author><first>Ismaïl</first><last>Timimi</last></author>
+      <author id="marianne-dabbadie"><first>Marianne</first><last>Dabbadie</last></author>
+      <author id="widad-mustafa-el-hadi"><first>Widad Mustafa El</first><last>Hadi</last></author>
+      <author id="ismail-timimi"><first>Ismaïl</first><last>Timimi</last></author>
       <title>Terminological Enrichment for non-Interactive <fixed-case>MT</fixed-case> Evaluation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/282.pdf</url>
       <bibkey>dabbadie-etal-2002-terminological</bibkey>
     </paper>
     <paper id="283">
       <author><first>Paul</first><last>Kingsbury</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <title>From <fixed-case>T</fixed-case>ree<fixed-case>B</fixed-case>ank to <fixed-case>P</fixed-case>rop<fixed-case>B</fixed-case>ank</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/283.pdf</url>
       <bibkey>kingsbury-palmer-2002-treebank</bibkey>
@@ -2345,9 +2345,9 @@
       <author><first>Almudena</first><last>Ballester</last></author>
       <author><first>Ángel Martín</first><last>Municio</last></author>
       <author><first>Fernando</first><last>Pardos</last></author>
-      <author><first>Jordi Porta</first><last>Zamorano</last></author>
+      <author id="jordi-porta-zamorano"><first>Jordi Porta</first><last>Zamorano</last></author>
       <author><first>Rafael J. Ruiz</first><last>Ureña</last></author>
-      <author><first>Fernando Sánchez</first><last>León</last></author>
+      <author id="fernando-sanchez-leon"><first>Fernando Sánchez</first><last>León</last></author>
       <title>Combining statistics on n-grams for automatic term recognition</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/284.pdf</url>
       <bibkey>ballester-etal-2002-combining</bibkey>
@@ -2375,23 +2375,23 @@
     <paper id="287">
       <author><first>Nobuo</first><last>Kawaguchi</last></author>
       <author><first>Shigeki</first><last>Matsubara</last></author>
-      <author><first>Kazuya</first><last>Takeda</last></author>
+      <author id="kazuya-takeda"><first>Kazuya</first><last>Takeda</last></author>
       <author><first>Fumitada</first><last>Itakura</last></author>
       <title>Multi-Dimensional Data Acquisition for Integrated Acoustic Information Research</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/287.pdf</url>
       <bibkey>kawaguchi-etal-2002-multi</bibkey>
     </paper>
     <paper id="288">
-      <author><first>Laurence</first><last>Devillers</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
-      <author><first>Hélène</first><last>Bonneau-Maynard</last></author>
-      <author><first>Lori</first><last>Lamel</last></author>
+      <author id="laurence-devillers"><first>Laurence</first><last>Devillers</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
+      <author id="helene-bonneau-maynard"><first>Hélène</first><last>Bonneau-Maynard</last></author>
+      <author id="lori-lamel"><first>Lori</first><last>Lamel</last></author>
       <title>Annotations for Dynamic Diagnosis of the Dialog State</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/288.pdf</url>
       <bibkey>devillers-etal-2002-annotations</bibkey>
     </paper>
     <paper id="289">
-      <author><first>Jean-Claude</first><last>Martin</last></author>
+      <author id="jean-claude-martin"><first>Jean-Claude</first><last>Martin</last></author>
       <author><first>Michael</first><last>Kipp</last></author>
       <title>Annotating and Measuring Multimodal Behaviour – Tycoon Metrics in the Anvil Tool</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/289.pdf</url>
@@ -2400,12 +2400,12 @@
     <paper id="290">
       <author><first>Emanuela</first><last>Cresti</last></author>
       <author><first>Massimo</first><last>Moneglia</last></author>
-      <author><first>Fernanda Bacelar</first><last>do Nascimento</last></author>
-      <author><first>Antonio Moreno</first><last>Sandoval</last></author>
-      <author><first>Jean</first><last>Veronis</last></author>
+      <author id="maria-fernanda-bacelar-do-nascimento"><first>Fernanda Bacelar</first><last>do Nascimento</last></author>
+      <author id="antonio-moreno-sandoval"><first>Antonio Moreno</first><last>Sandoval</last></author>
+      <author id="jean-veronis"><first>Jean</first><last>Veronis</last></author>
       <author><first>Philippe</first><last>Martin</last></author>
-      <author><first>Kalid</first><last>Choukri</last></author>
-      <author><first>Valerie</first><last>Mapelli</last></author>
+      <author id="khalid-choukri"><first>Kalid</first><last>Choukri</last></author>
+      <author id="valerie-mapelli"><first>Valerie</first><last>Mapelli</last></author>
       <author><first>Daniele</first><last>Falavigna</last></author>
       <author><first>Antonio</first><last>Cid</last></author>
       <author><first>Claude</first><last>Blum</last></author>
@@ -2414,8 +2414,8 @@
       <bibkey>cresti-etal-2002-c</bibkey>
     </paper>
     <paper id="291">
-      <author><first>Christopher</first><last>Cieri</last></author>
-      <author><first>Mark</first><last>Liberman</last></author>
+      <author id="christopher-cieri"><first>Christopher</first><last>Cieri</last></author>
+      <author id="mark-liberman"><first>Mark</first><last>Liberman</last></author>
       <title><fixed-case>TIDES</fixed-case> Language Resources: A Resource Map for Translingual Information Access</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/291.pdf</url>
       <bibkey>cieri-liberman-2002-tides</bibkey>
@@ -2424,7 +2424,7 @@
       <author><first>Stefan</first><last>Eickeler</last></author>
       <author><first>Martha</first><last>Larson</last></author>
       <author><first>Wolff</first><last>Rüter</last></author>
-      <author><first>Joachim</first><last>Köhler</last></author>
+      <author id="joachim-kohler"><first>Joachim</first><last>Köhler</last></author>
       <title>Creation of an Annotated <fixed-case>G</fixed-case>erman Broadcast Speech Database for Spoken Document Retrieval</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/292.pdf</url>
       <bibkey>eickeler-etal-2002-creation</bibkey>
@@ -2452,7 +2452,7 @@
       <bibkey>duclaye-etal-2002-using</bibkey>
     </paper>
     <paper id="296">
-      <author><first>Christoph</first><last>Müller</last></author>
+      <author id="christoph-muller"><first>Christoph</first><last>Müller</last></author>
       <author><first>Michael</first><last>Strube</last></author>
       <title>An <fixed-case>API</fixed-case> for Discourse-level Access to <fixed-case>XML</fixed-case>-encoded Corpora</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/296.pdf</url>
@@ -2460,13 +2460,13 @@
     </paper>
     <paper id="297">
       <author><first>Hidetsugu</first><last>Nanba</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <title>Some Examinations of Intrinsic Methods for Summary Evaluation Based on the Text Summarization Challenge (<fixed-case>TSC</fixed-case>)</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/297.pdf</url>
       <bibkey>nanba-okumura-2002-examinations</bibkey>
     </paper>
     <paper id="298">
-      <author><first>Mohamed-Zakaria</first><last>Kurdi</last></author>
+      <author id="mohamed-zakaria-kurdi"><first>Mohamed-Zakaria</first><last>Kurdi</last></author>
       <author><first>Mohamed</first><last>Ahafhaf</last></author>
       <title>Toward an objective and generic Method for Spoken Language Understanding Systems Evaluation: an extension of the <fixed-case>DCR</fixed-case> method</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/298.pdf</url>
@@ -2481,14 +2481,14 @@
       <bibkey>heyer-etal-2002-information</bibkey>
     </paper>
     <paper id="300">
-      <author><first>Christopher</first><last>Cieri</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="christopher-cieri"><first>Christopher</first><last>Cieri</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <title>The <fixed-case>DASL</fixed-case> Project: a Case Study in Data Re-Annotation and Re-Use</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/300.pdf</url>
       <bibkey>cieri-strassel-2002-dasl</bibkey>
     </paper>
     <paper id="301">
-      <author><first>Dragomir R.</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir R.</first><last>Radev</last></author>
       <author><first>Hong</first><last>Qi</last></author>
       <author><first>Harris</first><last>Wu</last></author>
       <author><first>Weiguo</first><last>Fan</last></author>
@@ -2499,13 +2499,13 @@
     <paper id="302">
       <author><first>Daisuke</first><last>Kawahara</last></author>
       <author><first>Sadao</first><last>Kurohashi</last></author>
-      <author><first>Kôiti</first><last>Hasida</last></author>
+      <author id="koiti-hasida"><first>Kôiti</first><last>Hasida</last></author>
       <title>Construction of a <fixed-case>J</fixed-case>apanese Relevance-tagged Corpus</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/302.pdf</url>
       <bibkey>kawahara-etal-2002-construction</bibkey>
     </paper>
     <paper id="303">
-      <author><first>Nancy</first><last>Ide</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
       <author><first>Randi</first><last>Reppen</last></author>
       <author><first>Keith</first><last>Suderman</last></author>
       <title>The <fixed-case>A</fixed-case>merican National Corpus: More Than the Web Can Provide</title>
@@ -2520,9 +2520,9 @@
     </paper>
     <paper id="305">
       <author><first>Toshiyuki</first><last>Takezawa</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Fumiaki</first><last>Sugaya</last></author>
-      <author><first>Hirofumi</first><last>Yamamoto</last></author>
+      <author id="hirofumi-yamamoto"><first>Hirofumi</first><last>Yamamoto</last></author>
       <author><first>Seiichi</first><last>Yamamoto</last></author>
       <title>Toward a Broad-coverage Bilingual Corpus for Speech Translation of Travel Conversations in the Real World</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/305.pdf</url>
@@ -2530,7 +2530,7 @@
     </paper>
     <paper id="306">
       <author><first>Michelle</first><last>Vanni</last></author>
-      <author><first>Keith</first><last>Miller</last></author>
+      <author id="keith-j-miller"><first>Keith</first><last>Miller</last></author>
       <title>Scaling the <fixed-case>ISLE</fixed-case> Framework: Use of Existing Corpus Resources for Validation of <fixed-case>MT</fixed-case> Evaluation Metrics across Languages</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/306.pdf</url>
       <bibkey>vanni-miller-2002-scaling</bibkey>
@@ -2543,7 +2543,7 @@
     </paper>
     <paper id="308">
       <author><first>Barbara</first><last>Di Eugenio</last></author>
-      <author><first>Michael</first><last>Glass</last></author>
+      <author id="michael-glass"><first>Michael</first><last>Glass</last></author>
       <author><first>Michael J.</first><last>Scott</last></author>
       <title>The binomial cumulative distribution function, or, is my system better than yours?</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/308.pdf</url>
@@ -2559,7 +2559,7 @@
       <bibkey>suyaga-etal-2002-proposal</bibkey>
     </paper>
     <paper id="310">
-      <author><first>Rada F.</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada F.</first><last>Mihalcea</last></author>
       <title>Bootstrapping Large Sense Tagged Corpora</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/310.pdf</url>
       <bibkey>mihalcea-2002-bootstrapping</bibkey>
@@ -2573,7 +2573,7 @@
       <bibkey>ogino-etal-2002-valence</bibkey>
     </paper>
     <paper id="312">
-      <author><first>Jean-Claude</first><last>Martin</last></author>
+      <author id="jean-claude-martin"><first>Jean-Claude</first><last>Martin</last></author>
       <author><first>Jean-Hugues</first><last>Réty</last></author>
       <author><first>Nelly</first><last>Bensimon</last></author>
       <title>Multimodal and Adaptative Pedagogical Resources</title>
@@ -2581,7 +2581,7 @@
       <bibkey>martin-etal-2002-multimodal</bibkey>
     </paper>
     <paper id="313">
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Slaven</first><last>Bilac</last></author>
       <author><first>Ryo</first><last>Okumura</last></author>
       <author><first>Takenobu</first><last>Tokunaga</last></author>
@@ -2592,13 +2592,13 @@
     </paper>
     <paper id="314">
       <author><first>Romaric</first><last>Besançon</last></author>
-      <author><first>Martin</first><last>Rajman</last></author>
+      <author id="martin-rajman"><first>Martin</first><last>Rajman</last></author>
       <title>Evaluation of a Vector Space Similarity Measure in a Multilingual Framework</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/314.pdf</url>
       <bibkey>besancon-rajman-2002-evaluation</bibkey>
     </paper>
     <paper id="315">
-      <author><first>Serge A.</first><last>Yablonsky</last></author>
+      <author id="serge-a-yablonsky"><first>Serge A.</first><last>Yablonsky</last></author>
       <title>Corpora as Object-Oriented System. From <fixed-case>UML</fixed-case>-notation to Implementation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/315.pdf</url>
       <bibkey>yablonsky-2002-corpora</bibkey>
@@ -2606,7 +2606,7 @@
     <paper id="316">
       <author><first>Roberto</first><last>Bartolini</last></author>
       <author><first>Alessandro</first><last>Lenci</last></author>
-      <author><first>Simonetta</first><last>Montemagni</last></author>
+      <author id="simonetta-montemagni"><first>Simonetta</first><last>Montemagni</last></author>
       <author><first>Vito</first><last>Pirrelli</last></author>
       <title>The Lexicon-Grammar Balance in Robust Parsing of <fixed-case>I</fixed-case>talian</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/316.pdf</url>
@@ -2619,7 +2619,7 @@
       <bibkey>jung-2002-humans</bibkey>
     </paper>
     <paper id="318">
-      <author><first>Atsuko</first><last>Koizumi</last></author>
+      <author id="atsuko-koizumi"><first>Atsuko</first><last>Koizumi</last></author>
       <author><first>Hirohiko</first><last>Sagawa</last></author>
       <author><first>Masaru</first><last>Takeuchi</last></author>
       <title>An Annotated <fixed-case>J</fixed-case>apanese <fixed-case>S</fixed-case>ign <fixed-case>L</fixed-case>anguage Corpus</title>
@@ -2630,8 +2630,8 @@
       <author><first>Paul</first><last>Baker</last></author>
       <author><first>Andrew</first><last>Hardie</last></author>
       <author><first>Tony</first><last>McEnery</last></author>
-      <author><first>Hamish</first><last>Cunningham</last></author>
-      <author><first>Rob</first><last>Gaizauskas</last></author>
+      <author id="hamish-cunningham"><first>Hamish</first><last>Cunningham</last></author>
+      <author id="robert-gaizauskas"><first>Rob</first><last>Gaizauskas</last></author>
       <title><fixed-case>EMILLE</fixed-case>, A 67-Million Word Corpus of Indic Languages: Data Collection, Mark-up and Harmonisation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/319.pdf</url>
       <bibkey>baker-etal-2002-emille</bibkey>
@@ -2643,8 +2643,8 @@
       <bibkey>salmen-2002-multi</bibkey>
     </paper>
     <paper id="321">
-      <author><first>Constantin</first><last>Orăsan</last></author>
-      <author><first>Richard</first><last>Evans</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orăsan</last></author>
+      <author id="richard-evans"><first>Richard</first><last>Evans</last></author>
       <title>Assessing the difficulty of finding people in texts</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/321.pdf</url>
       <bibkey>orasan-evans-2002-assessing</bibkey>
@@ -2656,7 +2656,7 @@
       <bibkey>olsen-2002-lemma</bibkey>
     </paper>
     <paper id="323">
-      <author><first>Gábor</first><last>Prószéky</last></author>
+      <author id="gabor-proszeky"><first>Gábor</first><last>Prószéky</last></author>
       <author><first>Márton</first><last>Miháltz</last></author>
       <title>Automatism and User Interaction: Building a <fixed-case>H</fixed-case>ungarian <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/323.pdf</url>
@@ -2669,11 +2669,11 @@
       <bibkey>gendner-2002-comparative</bibkey>
     </paper>
     <paper id="325">
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <author><first>Cassandre</first><last>Creswell</last></author>
       <author><first>Rachel</first><last>Szekely</last></author>
       <author><first>Harriet</first><last>Taber</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <title>A Dependency Treebank for <fixed-case>E</fixed-case>nglish</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/325.pdf</url>
       <bibkey>rambow-etal-2002-dependency</bibkey>
@@ -2686,8 +2686,8 @@
       <bibkey>ramesh-bagga-2002-text</bibkey>
     </paper>
     <paper id="327">
-      <author><first>Nancy</first><last>Ide</last></author>
-      <author><first>Laurent</first><last>Romary</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
       <title>Standards for Language Resources</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/327.pdf</url>
       <bibkey>ide-romary-2002-standards</bibkey>
@@ -2703,7 +2703,7 @@
       <author><first>Nigel</first><last>Collier</last></author>
       <author><first>Koichi</first><last>Takeuchi</last></author>
       <author><first>Chikashi</first><last>Nobata</last></author>
-      <author><first>Junichi</first><last>Fukumoto</last></author>
+      <author id="junichi-fukumoto"><first>Junichi</first><last>Fukumoto</last></author>
       <author><first>Norihiro</first><last>Ogata</last></author>
       <title>Progress on Multi-lingual Named Entity Annotation Guidelines using <fixed-case>RDF</fixed-case> (<fixed-case>S</fixed-case>)</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/329.pdf</url>
@@ -2718,11 +2718,11 @@
     <paper id="331">
       <author><first>Joanne</first><last>Capstick</last></author>
       <author><first>Hans</first><last>Uszkoreit</last></author>
-      <author><first>Wolfgang</first><last>Wahlster</last></author>
+      <author id="wolfgang-wahlster"><first>Wolfgang</first><last>Wahlster</last></author>
       <author><first>Thierry</first><last>Declerck</last></author>
       <author><first>Gregor</first><last>Erbach</last></author>
-      <author><first>Anthony</first><last>Jameson</last></author>
-      <author><first>Brigitte</first><last>Jorg</last></author>
+      <author id="anthony-jameson"><first>Anthony</first><last>Jameson</last></author>
+      <author id="brigitte-jorg"><first>Brigitte</first><last>Jorg</last></author>
       <author><first>Reinhard</first><last>Karger</last></author>
       <author><first>Tillmann</first><last>Wegst</last></author>
       <title><fixed-case>COLLATE</fixed-case>: Competence Center in Speech and Language Technology</title>
@@ -2737,13 +2737,13 @@
       <bibkey>suzuki-kakihana-2002-japanese</bibkey>
     </paper>
     <paper id="333">
-      <author><first>Primož</first><last>Jakopin</last></author>
+      <author id="primoz-jakopin"><first>Primož</first><last>Jakopin</last></author>
       <title>The feasibility of a complete text corpus</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/333.pdf</url>
       <bibkey>jakopin-2002-feasibility</bibkey>
     </paper>
     <paper id="334">
-      <author><first>Catherine</first><last>Macleod</last></author>
+      <author id="catherine-macleod"><first>Catherine</first><last>Macleod</last></author>
       <title>Lexical Annotation for Multi-word Entries Containing Nominalizations</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/334.pdf</url>
       <bibkey>macleod-2002-lexical</bibkey>
@@ -2751,13 +2751,13 @@
     <paper id="335">
       <author><first>Silja</first><last>Huttunen</last></author>
       <author><first>Roman</first><last>Yangarber</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <title>Diversity of Scenarios in Information extraction</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/335.pdf</url>
       <bibkey>huttunen-etal-2002-diversity</bibkey>
     </paper>
     <paper id="336">
-      <author><first>Mark T.</first><last>Maybury</last></author>
+      <author id="mark-t-maybury"><first>Mark T.</first><last>Maybury</last></author>
       <title>Multimodal Systems, Resources and Evaluation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/336.pdf</url>
       <bibkey>maybury-2002-multimodal</bibkey>
@@ -2765,7 +2765,7 @@
     <paper id="337">
       <author><first>Hiromichi</first><last>Kawanami</last></author>
       <author><first>Tsuyoshi</first><last>Masuda</last></author>
-      <author><first>Tomoki</first><last>Toda</last></author>
+      <author id="tomoki-toda"><first>Tomoki</first><last>Toda</last></author>
       <author><first>Kiyohiro</first><last>Shikano</last></author>
       <title>Designing speech database with prosodic variety for expressive <fixed-case>TTS</fixed-case> system</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/337.pdf</url>
@@ -2782,7 +2782,7 @@
     <paper id="339">
       <author><first>Akinobu</first><last>Lee</last></author>
       <author><first>Tatsuya</first><last>Kawahara</last></author>
-      <author><first>Kazuya</first><last>Takeda</last></author>
+      <author id="kazuya-takeda"><first>Kazuya</first><last>Takeda</last></author>
       <author><first>Masato</first><last>Mimura</last></author>
       <author><first>Atsushi</first><last>Yamada</last></author>
       <author><first>Akinori</first><last>Ito</last></author>
@@ -2830,9 +2830,9 @@
     <paper id="345">
       <author><first>Véronique</first><last>Gendner</last></author>
       <author><first>Gabriel</first><last>Illouz</last></author>
-      <author><first>Michèle</first><last>Jardino</last></author>
+      <author id="michele-jardino"><first>Michèle</first><last>Jardino</last></author>
       <author><first>Laura</first><last>Monceaux</last></author>
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <author><first>Isabelle</first><last>Robba</last></author>
       <author><first>Anne</first><last>Vilnat</last></author>
       <title>A Protocol for Evaluating Analyzers of Syntax (<fixed-case>PEAS</fixed-case>)</title>
@@ -2840,8 +2840,8 @@
       <bibkey>gendner-etal-2002-protocol</bibkey>
     </paper>
     <paper id="346">
-      <author><first>Mark T.</first><last>Maybury</last></author>
-      <author><first>Antonio</first><last>Zampolli</last></author>
+      <author id="mark-t-maybury"><first>Mark T.</first><last>Maybury</last></author>
+      <author id="antonio-zampolli"><first>Antonio</first><last>Zampolli</last></author>
       <title>Language Resources and Evaluation: International Strategy Panel</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/346.pdf</url>
       <bibkey>maybury-zampolli-2002-language</bibkey>
@@ -2861,16 +2861,16 @@
     </paper>
     <paper id="349">
       <author><first>Andrew</first><last>Finch</last></author>
-      <author><first>Ezra</first><last>Black</last></author>
+      <author id="ezra-black"><first>Ezra</first><last>Black</last></author>
       <author><first>Ringo</first><last>Wathelet</last></author>
       <title>Beyond Tag Trigrams: New Local Features for Tagging</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/349.pdf</url>
       <bibkey>finch-etal-2002-beyond</bibkey>
     </paper>
     <paper id="350">
-      <author><first>Sanda</first><last>Harabagiu</last></author>
-      <author><first>Finley</first><last>Lacatusu</last></author>
-      <author><first>Paul</first><last>Morarescu</last></author>
+      <author id="sanda-harabagiu"><first>Sanda</first><last>Harabagiu</last></author>
+      <author id="finley-lacatusu"><first>Finley</first><last>Lacatusu</last></author>
+      <author id="paul-morarescu"><first>Paul</first><last>Morarescu</last></author>
       <title>Multidocument Summarization with <fixed-case>GIST</fixed-case>exter</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/350.pdf</url>
       <bibkey>harabagiu-etal-2002-multidocument</bibkey>
@@ -2893,14 +2893,14 @@
       <bibkey>steininger-etal-2002-user</bibkey>
     </paper>
     <paper id="353">
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <title>Creating Domain-specific Information Servers</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/353.pdf</url>
       <bibkey>pustejovsky-2002-creating</bibkey>
     </paper>
     <paper id="354">
       <author><first>Mathieu</first><last>Lafourcade</last></author>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <title><fixed-case>UNL</fixed-case> Lexical Selection with Conceptual Vectors</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2002/pdf/354.pdf</url>
       <bibkey>lafourcade-boitet-2002-unl</bibkey>
diff --git a/data/xml/L04.xml b/data/xml/L04.xml
index ef631a9136..5c447227ff 100644
--- a/data/xml/L04.xml
+++ b/data/xml/L04.xml
@@ -3,7 +3,7 @@
   <volume id="1" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Fourth International Conference on Language Resources and Evaluation (<fixed-case>LREC</fixed-case>’04)</booktitle>
-      <editor><first>Maria Teresa</first><last>Lino</last></editor>
+      <editor id="maria-teresa-lino"><first>Maria Teresa</first><last>Lino</last></editor>
       <editor><first>Maria Francisca</first><last>Xavier</last></editor>
       <editor><first>Fátima</first><last>Ferreira</last></editor>
       <editor><first>Rute</first><last>Costa</last></editor>
@@ -18,7 +18,7 @@
       <bibkey>lrec-2004-international</bibkey>
     </frontmatter>
     <paper id="1">
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <title>Can We Talk? Prospects for Automatically Training Spoken Dialogue Systems</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/kII.pdf</url>
       <bibkey>walker-2004-talk</bibkey>
@@ -48,13 +48,13 @@
       <bibkey>campbell-2004-getting</bibkey>
     </paper>
     <paper id="6">
-      <author><first>Bente</first><last>Maegaard</last></author>
+      <author id="bente-maegaard"><first>Bente</first><last>Maegaard</last></author>
       <title>Industrial Needs for Language Resources</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/I.pdf</url>
       <bibkey>maegaard-2004-industrial</bibkey>
     </paper>
     <paper id="7">
-      <author><first>Junichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Junichi</first><last>Tsujii</last></author>
       <title>Thesaurus or Logical Ontology, Which do we Need for Mining Text?</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/kI.pdf</url>
       <bibkey>tsujii-2004-thesaurus</bibkey>
@@ -82,7 +82,7 @@
       <author><first>Lourdes</first><last>Díaz</last></author>
       <author><first>Martí</first><last>Quixal</last></author>
       <author><first>Ana</first><last>Ruggia</last></author>
-      <author><first>Antonio S.</first><last>Valderrabanos</last></author>
+      <author id="antonio-s-valderrabanos"><first>Antonio S.</first><last>Valderrabanos</last></author>
       <author><first>Alberto J.</first><last>Cruz</last></author>
       <author><first>Enrique</first><last>Torrejon</last></author>
       <author><first>Celia</first><last>Rico</last></author>
@@ -92,12 +92,12 @@
       <bibkey>schmidt-etal-2004-alles</bibkey>
     </paper>
     <paper id="11">
-      <author><first>George</first><last>Doddington</last></author>
+      <author id="george-r-doddington"><first>George</first><last>Doddington</last></author>
       <author><first>Alexis</first><last>Mitchell</last></author>
-      <author><first>Mark</first><last>Przybocki</last></author>
-      <author><first>Lance</first><last>Ramshaw</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
-      <author><first>Ralph</first><last>Weischedel</last></author>
+      <author id="mark-przybocki"><first>Mark</first><last>Przybocki</last></author>
+      <author id="lance-ramshaw"><first>Lance</first><last>Ramshaw</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
       <title>The Automatic Content Extraction (<fixed-case>ACE</fixed-case>) Program – Tasks, Data, and Evaluation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/5.pdf</url>
       <bibkey>doddington-etal-2004-automatic</bibkey>
@@ -121,7 +121,7 @@
       <author><first>Jan</first><last>Krebber</last></author>
       <author><first>Alexander</first><last>Raake</last></author>
       <author><first>Paula</first><last>Smeele</last></author>
-      <author><first>Martin</first><last>Rajman</last></author>
+      <author id="martin-rajman"><first>Martin</first><last>Rajman</last></author>
       <author><first>Mirek</first><last>Melichar</last></author>
       <author><first>Vincenzo</first><last>Pallotta</last></author>
       <author><first>Gianna</first><last>Tsakou</last></author>
@@ -129,7 +129,7 @@
       <author><first>Anestis</first><last>Vovos</last></author>
       <author><first>Jettie</first><last>Hoonhout</last></author>
       <author><first>Dietmar</first><last>Schuchardt</last></author>
-      <author><first>Nikos</first><last>Fakotakis</last></author>
+      <author id="nikos-fakotakis"><first>Nikos</first><last>Fakotakis</last></author>
       <author><first>Todor</first><last>Ganchev</last></author>
       <author><first>Ilyas</first><last>Potamitis</last></author>
       <title><fixed-case>INSPIRE</fixed-case>: Evaluation of a Smart-Home System for Infotainment Management and Device Control</title>
@@ -138,7 +138,7 @@
     </paper>
     <paper id="15">
       <author><first>Ielka</first><last>van der Sluis</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <title>Evaluating Multimodal <fixed-case>NLG</fixed-case> Using Production Experiments</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/14.pdf</url>
       <bibkey>van-der-sluis-krahmer-2004-evaluating</bibkey>
@@ -146,7 +146,7 @@
     <paper id="16">
       <author><first>Nuno</first><last>Seco</last></author>
       <author><first>Tony</first><last>Veale</last></author>
-      <author><first>Jer</first><last>Hayes</last></author>
+      <author id="jer-hayes"><first>Jer</first><last>Hayes</last></author>
       <title>Concept Creation in Lexical Ontologies</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/15.pdf</url>
       <bibkey>seco-etal-2004-concept</bibkey>
@@ -159,7 +159,7 @@
     </paper>
     <paper id="18">
       <author><first>Susanne</first><last>Salmon-Alt</last></author>
-      <author><first>Laurent</first><last>Romary</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
       <title>Towards a Reference Annotation Framework</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/17.pdf</url>
       <bibkey>salmon-alt-romary-2004-towards</bibkey>
@@ -195,7 +195,7 @@
     <paper id="23">
       <author><first>Hsin-Hsi</first><last>Chen</last></author>
       <author><first>Yi-Cheng</first><last>Yu</last></author>
-      <author><first>Chih-Long</first><last>Lin</last></author>
+      <author id="chih-lung-lin"><first>Chih-Long</first><last>Lin</last></author>
       <title>Collocation Extraction Using Web Statistics</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/24.pdf</url>
       <bibkey>chen-etal-2004-collocation</bibkey>
@@ -209,23 +209,23 @@
     </paper>
     <paper id="25">
       <author><first>Christina</first><last>Alexandris</last></author>
-      <author><first>Stavroula-Evita</first><last>Fotinea</last></author>
+      <author id="stavroula-evita-fotinea"><first>Stavroula-Evita</first><last>Fotinea</last></author>
       <title>Reusing Language Resources for Speech Applications involving Emotion</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/27.pdf</url>
       <bibkey>alexandris-fotinea-2004-reusing</bibkey>
     </paper>
     <paper id="26">
-      <author><first>Eva</first><last>Navas</last></author>
+      <author id="eva-navas"><first>Eva</first><last>Navas</last></author>
       <author><first>Amaia</first><last>Castelruiz</last></author>
       <author><first>Iker</first><last>Luengo</last></author>
-      <author><first>Jon</first><last>Sánchez</last></author>
-      <author><first>Inmaculada</first><last>Hernáez</last></author>
+      <author id="jon-sanchez"><first>Jon</first><last>Sánchez</last></author>
+      <author id="inmaculada-hernaez"><first>Inmaculada</first><last>Hernáez</last></author>
       <title>Designing and Recording an Audiovisual Database of Emotional Speech in <fixed-case>B</fixed-case>asque</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/28.pdf</url>
       <bibkey>navas-etal-2004-designing</bibkey>
     </paper>
     <paper id="27">
-      <author><first>Gaël</first><last>Dias</last></author>
+      <author id="gael-dias"><first>Gaël</first><last>Dias</last></author>
       <author><first>Sérgio</first><last>Nunes</last></author>
       <title>Evaluation of Different Similarity Measures for the Extraction of Multiword Units in a Reinforcement Learning Environment</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/29.pdf</url>
@@ -260,14 +260,14 @@
       <bibkey>bordoni-2004-investigation</bibkey>
     </paper>
     <paper id="32">
-      <author><first>Wim</first><last>Peters</last></author>
+      <author id="wim-peters"><first>Wim</first><last>Peters</last></author>
       <title>Incremental Knowledge Acquisition from <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et and <fixed-case>E</fixed-case>uro<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/36.pdf</url>
       <bibkey>peters-2004-incremental</bibkey>
     </paper>
     <paper id="33">
-      <author><first>Vivi</first><last>Năstase</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="vivi-nastase"><first>Vivi</first><last>Năstase</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <title>Finding Semantic Associations on Express Lane</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/38.pdf</url>
       <bibkey>nastase-mihalcea-2004-finding</bibkey>
@@ -281,24 +281,24 @@
     </paper>
     <paper id="35">
       <author><first>Diana</first><last>Maynard</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
-      <author><first>Hamish</first><last>Cunningham</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="hamish-cunningham"><first>Hamish</first><last>Cunningham</last></author>
       <title>Automatic Language-Independent Induction of Gazetteer Lists</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/40.pdf</url>
       <bibkey>maynard-etal-2004-automatic</bibkey>
     </paper>
     <paper id="36">
-      <author><first>Nikos</first><last>Fakotakis</last></author>
+      <author id="nikos-fakotakis"><first>Nikos</first><last>Fakotakis</last></author>
       <title>Corpus Design, Recording and Phonetic Analysis of <fixed-case>G</fixed-case>reek Emotional Database</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/41.pdf</url>
       <bibkey>fakotakis-2004-corpus</bibkey>
     </paper>
     <paper id="37">
-      <author><first>Yorick</first><last>Wilks</last></author>
-      <author><first>Nick</first><last>Webb</last></author>
-      <author><first>Andrea</first><last>Setzer</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
+      <author id="nick-webb"><first>Nick</first><last>Webb</last></author>
+      <author id="andrea-setzer"><first>Andrea</first><last>Setzer</last></author>
       <author><first>Mark</first><last>Hepple</last></author>
-      <author><first>Roberta</first><last>Catizone</last></author>
+      <author id="roberta-catizone"><first>Roberta</first><last>Catizone</last></author>
       <title>Human Dialogue Modelling Using Annotated Corpora</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/42.pdf</url>
       <bibkey>wilks-etal-2004-human</bibkey>
@@ -339,7 +339,7 @@
     <paper id="43">
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <author><first>Ru-Yng</first><last>Chang</last></author>
-      <author><first>Hsiang-Pin</first><last>Lee</last></author>
+      <author id="hsiang-pin-lee"><first>Hsiang-Pin</first><last>Lee</last></author>
       <title>Sinica <fixed-case>BOW</fixed-case> (Bilingual Ontological <fixed-case>W</fixed-case>ordnet): Integration of Bilingual <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et and <fixed-case>SUMO</fixed-case></title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/53.pdf</url>
       <bibkey>huang-etal-2004-sinica</bibkey>
@@ -347,7 +347,7 @@
     <paper id="44">
       <author><first>Stephan</first><last>Bopp</last></author>
       <author><first>Sandro</first><last>Pedrazzini</last></author>
-      <author><first>Elisabeth</first><last>Maier</last></author>
+      <author id="elisabeth-maier"><first>Elisabeth</first><last>Maier</last></author>
       <title>How to Disassemble Alphabetical Processions - Morphological Treatment of Unknown Words</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/54.pdf</url>
       <bibkey>bopp-etal-2004-disassemble</bibkey>
@@ -355,7 +355,7 @@
     <paper id="45">
       <author><first>Darinka</first><last>Verdonik</last></author>
       <author><first>Matej</first><last>Rojc</last></author>
-      <author><first>Zdravko</first><last>Kačič</last></author>
+      <author id="zdravko-kacic"><first>Zdravko</first><last>Kačič</last></author>
       <title>Creating <fixed-case>S</fixed-case>lovenian Language Resources for Development of Speech-to-speech Translation Components</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/57.pdf</url>
       <bibkey>verdonik-etal-2004-creating</bibkey>
@@ -368,7 +368,7 @@
     </paper>
     <paper id="47">
       <author><first>Bojan</first><last>Kotnik</last></author>
-      <author><first>Zdravko</first><last>Kačič</last></author>
+      <author id="zdravko-kacic"><first>Zdravko</first><last>Kačič</last></author>
       <author><first>Bogomir</first><last>Horvat</last></author>
       <title>The Development and Integration of the <fixed-case>LDA</fixed-case>-Toolkit Into <fixed-case>COST</fixed-case>249 <fixed-case>S</fixed-case>peech<fixed-case>D</fixed-case>at(<fixed-case>II</fixed-case>) <fixed-case>SIG</fixed-case> Reference Recognizer</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/59.pdf</url>
@@ -384,7 +384,7 @@
       <bibkey>ozturk-etal-2004-duration</bibkey>
     </paper>
     <paper id="49">
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <author><first>Andreas</first><last>Hotho</last></author>
       <author><first>Steffen</first><last>Staab</last></author>
       <title>Clustering Concept Hierarchies from Text</title>
@@ -392,13 +392,13 @@
       <bibkey>cimiano-etal-2004-clustering</bibkey>
     </paper>
     <paper id="50">
-      <author><first>Alvin F.</first><last>Martin</last></author>
-      <author><first>John S.</first><last>Garofolo</last></author>
-      <author><first>Jonathan C.</first><last>Fiscus</last></author>
-      <author><first>Audrey N.</first><last>Le</last></author>
-      <author><first>David S.</first><last>Pallett</last></author>
-      <author><first>Mark A.</first><last>Przybocki</last></author>
-      <author><first>Gregory A.</first><last>Sanders</last></author>
+      <author id="alvin-martin"><first>Alvin F.</first><last>Martin</last></author>
+      <author id="john-s-garofolo"><first>John S.</first><last>Garofolo</last></author>
+      <author id="jonathan-g-fiscus"><first>Jonathan C.</first><last>Fiscus</last></author>
+      <author id="audrey-le"><first>Audrey N.</first><last>Le</last></author>
+      <author id="david-s-pallett"><first>David S.</first><last>Pallett</last></author>
+      <author id="mark-przybocki"><first>Mark A.</first><last>Przybocki</last></author>
+      <author id="gregory-sanders"><first>Gregory A.</first><last>Sanders</last></author>
       <title><fixed-case>NIST</fixed-case> Language Technology Evaluation Cookbook</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/64.pdf</url>
       <bibkey>martin-etal-2004-nist</bibkey>
@@ -419,7 +419,7 @@
     </paper>
     <paper id="53">
       <author><first>Yong-Ju</first><last>Lee</last></author>
-      <author><first>Bong-Wan</first><last>Kim</last></author>
+      <author id="bong-wan-kim"><first>Bong-Wan</first><last>Kim</last></author>
       <author><first>Young-Il</first><last>Kim</last></author>
       <author><first>Dae-Lim</first><last>Choi</last></author>
       <author><first>Kwang-Hyun</first><last>Lee</last></author>
@@ -432,7 +432,7 @@
       <author><first>Alessandro</first><last>Cucchiarelli</last></author>
       <author><first>Roberto</first><last>Navigli</last></author>
       <author><first>Francesca</first><last>Neri</last></author>
-      <author><first>Paola</first><last>Velardi</last></author>
+      <author id="paola-velardi"><first>Paola</first><last>Velardi</last></author>
       <title>Automatic Generation of Glosses in the <fixed-case>O</fixed-case>nto<fixed-case>L</fixed-case>earn System</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/69.pdf</url>
       <bibkey>cucchiarelli-etal-2004-automatic</bibkey>
@@ -440,17 +440,17 @@
     <paper id="55">
       <author><first>An</first><last>Vandecatseye</last></author>
       <author><first>Jean-Pierre</first><last>Martens</last></author>
-      <author><first>Joao</first><last>Neto</last></author>
+      <author id="joao-p-neto"><first>Joao</first><last>Neto</last></author>
       <author><first>Hugo</first><last>Meinedo</last></author>
-      <author><first>Carmen</first><last>Garcia-Mateo</last></author>
-      <author><first>Javier</first><last>Dieguez</last></author>
-      <author><first>France</first><last>Mihelic</last></author>
-      <author><first>Janez</first><last>Zibert</last></author>
+      <author id="carmen-garcia-mateo"><first>Carmen</first><last>Garcia-Mateo</last></author>
+      <author id="javier-dieguez-tirado"><first>Javier</first><last>Dieguez</last></author>
+      <author id="france-mihelic"><first>France</first><last>Mihelic</last></author>
+      <author id="janez-zibert"><first>Janez</first><last>Zibert</last></author>
       <author><first>Jan</first><last>Nouza</last></author>
       <author><first>Petr</first><last>David</last></author>
-      <author><first>Matus</first><last>Pleva</last></author>
+      <author id="matus-pleva"><first>Matus</first><last>Pleva</last></author>
       <author><first>Anton</first><last>Cizmar</last></author>
-      <author><first>Harris</first><last>Papageorgiou</last></author>
+      <author id="harris-papageorgiou"><first>Harris</first><last>Papageorgiou</last></author>
       <author><first>Christina</first><last>Alexandris</last></author>
       <title>The <fixed-case>COST</fixed-case>278 Pan-<fixed-case>E</fixed-case>uropean Broadcast News Database</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/70.pdf</url>
@@ -481,7 +481,7 @@
     </paper>
     <paper id="59">
       <author><first>Mark</first><last>Stevenson</last></author>
-      <author><first>Paul</first><last>Clough</last></author>
+      <author id="paul-clough"><first>Paul</first><last>Clough</last></author>
       <title><fixed-case>E</fixed-case>uro<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et as a Resource for Cross-language Information Retrieval</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/76.pdf</url>
       <bibkey>stevenson-clough-2004-eurowordnet</bibkey>
@@ -502,22 +502,22 @@
       <bibkey>ando-etal-2004-automatic</bibkey>
     </paper>
     <paper id="62">
-      <author><first>Karin</first><last>Kipper</last></author>
+      <author id="karin-kipper"><first>Karin</first><last>Kipper</last></author>
       <author><first>Benjamin</first><last>Snyder</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <title>Extending a Verb-lexicon Using a Semantically Annotated Corpus</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/84.pdf</url>
       <bibkey>kipper-etal-2004-extending</bibkey>
     </paper>
     <paper id="63">
       <author><first>J.C.T.</first><last>Beeken</last></author>
-      <author><first>P.H.J.</first><last>van der Kamp</last></author>
+      <author id="p-h-j-van-der-kamp"><first>P.H.J.</first><last>van der Kamp</last></author>
       <title>The Centre for <fixed-case>D</fixed-case>utch Language and Speech Technology (<fixed-case>TST</fixed-case> Centre)</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/85.pdf</url>
       <bibkey>beeken-van-der-kamp-2004-centre</bibkey>
     </paper>
     <paper id="64">
-      <author><first>Sue Ellen</first><last>Wright</last></author>
+      <author id="sue-ellen-wright"><first>Sue Ellen</first><last>Wright</last></author>
       <title>A Global Data Category Registry for Interoperable Language Resources</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/87.pdf</url>
       <bibkey>wright-2004-global</bibkey>
@@ -529,14 +529,14 @@
       <bibkey>kruyt-2004-integrated</bibkey>
     </paper>
     <paper id="66">
-      <author><first>Hans</first><last>Dybkjær</last></author>
-      <author><first>Laila</first><last>Dybkjær</last></author>
+      <author id="hans-dybkjaer"><first>Hans</first><last>Dybkjær</last></author>
+      <author id="laila-dybkjaer"><first>Laila</first><last>Dybkjær</last></author>
       <title>From Acts and Topics to Transactions and Dialogue Smoothness</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/92.pdf</url>
       <bibkey>dybkjaer-dybkjaer-2004-acts</bibkey>
     </paper>
     <paper id="67">
-      <author><first>Hideki</first><last>Kashioka</last></author>
+      <author id="hideki-kashioka"><first>Hideki</first><last>Kashioka</last></author>
       <title>Grouping Synonymous Sentences from a Parallel Corpus</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/93.pdf</url>
       <bibkey>kashioka-2004-grouping</bibkey>
@@ -549,7 +549,7 @@
       <bibkey>ahmad-musacchio-2004-discovery</bibkey>
     </paper>
     <paper id="69">
-      <author><first>Harald</first><last>Höge</last></author>
+      <author id="harald-hoge"><first>Harald</first><last>Höge</last></author>
       <author><first>Josef G.</first><last>Bauer</last></author>
       <author><first>Christian</first><last>Geißler</last></author>
       <author><first>Panji</first><last>Setiawan</last></author>
@@ -559,20 +559,20 @@
       <bibkey>hoge-etal-2004-evaluation</bibkey>
     </paper>
     <paper id="70">
-      <author><first>Janez</first><last>Žibert</last></author>
-      <author><first>France</first><last>Mihelič</last></author>
+      <author id="janez-zibert"><first>Janez</first><last>Žibert</last></author>
+      <author id="france-mihelic"><first>France</first><last>Mihelič</last></author>
       <title>Development of <fixed-case>S</fixed-case>lovenian Broadcast News Speech Database</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/98.pdf</url>
       <bibkey>zibert-mihelic-2004-development</bibkey>
     </paper>
     <paper id="71">
-      <author><first>Eckhard</first><last>Bick</last></author>
+      <author id="eckhard-bick"><first>Eckhard</first><last>Bick</last></author>
       <title>A Named Entity Recognizer for <fixed-case>D</fixed-case>anish</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/99.pdf</url>
       <bibkey>bick-2004-named</bibkey>
     </paper>
     <paper id="72">
-      <author><first>M. Teresa</first><last>Cabré</last></author>
+      <author id="maria-teresa-cabre"><first>M. Teresa</first><last>Cabré</last></author>
       <author><first>Carme</first><last>Bach</last></author>
       <author><first>Rosa</first><last>Estopà</last></author>
       <author><first>Judit</first><last>Feliu</last></author>
@@ -612,7 +612,7 @@
       <bibkey>sorensen-2004-bilingual</bibkey>
     </paper>
     <paper id="77">
-      <author><first>Tomaž</first><last>Erjavec</last></author>
+      <author id="tomaz-erjavec"><first>Tomaž</first><last>Erjavec</last></author>
       <author><first>Kristina Hmeljak</first><last>Sangawa</last></author>
       <author><first>Irena</first><last>Srdanović</last></author>
       <author><first>Anton</first><last>ml. Vahčič</last></author>
@@ -621,7 +621,7 @@
       <bibkey>erjavec-etal-2004-making</bibkey>
     </paper>
     <paper id="78">
-      <author><first>Tomaž</first><last>Erjavec</last></author>
+      <author id="tomaz-erjavec"><first>Tomaž</first><last>Erjavec</last></author>
       <title><fixed-case>MULTEXT</fixed-case>-East Version 3: Multilingual Morphosyntactic Specifications, Lexicons and Corpora</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/109.pdf</url>
       <bibkey>erjavec-2004-multext</bibkey>
@@ -630,15 +630,15 @@
       <author><first>Lorena Seijo</first><last>Pereiro</last></author>
       <author><first>Ana Martínez</first><last>Ínsua</last></author>
       <author><first>Francisco Méndez</first><last>Pazó</last></author>
-      <author><first>Francisco Campillo</first><last>Díaz</last></author>
-      <author><first>Eduardo Rodríguez</first><last>Banga</last></author>
+      <author id="francisco-campillo"><first>Francisco Campillo</first><last>Díaz</last></author>
+      <author id="eduardo-r-banga"><first>Eduardo Rodríguez</first><last>Banga</last></author>
       <title>A <fixed-case>G</fixed-case>alician Textual Corpus for Morphosyntactic Tagging with Application to Text-to-Speech Synthesis</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/111.pdf</url>
       <bibkey>pereiro-etal-2004-galician</bibkey>
     </paper>
     <paper id="80">
-      <author><first>Salvador</first><last>España</last></author>
-      <author><first>María José</first><last>Castro</last></author>
+      <author id="salvador-espana"><first>Salvador</first><last>España</last></author>
+      <author id="maria-jose-castro-bleda"><first>María José</first><last>Castro</last></author>
       <author><first>José Luis</first><last>Hidalgo</last></author>
       <title>The <fixed-case>SPARTACUS</fixed-case>-Database: a <fixed-case>S</fixed-case>panish Sentence Database for Offline Handwriting Recognition</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/112.pdf</url>
@@ -646,32 +646,32 @@
     </paper>
     <paper id="81">
       <author><first>Sofia</first><last>Stamou</last></author>
-      <author><first>Goran</first><last>Nenadic</last></author>
-      <author><first>Dimitris</first><last>Christodoulakis</last></author>
+      <author id="goran-nenadic"><first>Goran</first><last>Nenadic</last></author>
+      <author id="dimitris-christodoulakis"><first>Dimitris</first><last>Christodoulakis</last></author>
       <title>Exploring <fixed-case>B</fixed-case>alkanet Shared Ontology for Multilingual Conceptual Indexing</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/113.pdf</url>
       <bibkey>stamou-etal-2004-exploring</bibkey>
     </paper>
     <paper id="82">
-      <author><first>Mitsuo</first><last>Shimohata</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="mitsuo-shimohata"><first>Mitsuo</first><last>Shimohata</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <title>Building a Paraphrase Corpus for Speech Translation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/114.pdf</url>
       <bibkey>shimohata-etal-2004-building</bibkey>
     </paper>
     <paper id="83">
       <author><first>Yasuhiro</first><last>Akiba</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Hiromi</first><last>Nakaiwa</last></author>
       <author><first>Seiichi</first><last>Yamamoto</last></author>
-      <author><first>Hiroshi G.</first><last>Okuno</last></author>
+      <author id="hiroshi-g-okuno"><first>Hiroshi G.</first><last>Okuno</last></author>
       <title>Incremental Methods to Select Test Sentences for Evaluating Translation Ability</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/115.pdf</url>
       <bibkey>akiba-etal-2004-incremental</bibkey>
     </paper>
     <paper id="84">
-      <author><first>Jan</first><last>Odijk</last></author>
+      <author id="jan-odijk"><first>Jan</first><last>Odijk</last></author>
       <title>Reusable Lexical Representations for Idioms</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/116.pdf</url>
       <bibkey>odijk-2004-reusable</bibkey>
@@ -687,7 +687,7 @@
       <author><first>Janez</first><last>Stergar</last></author>
       <author><first>Caglayan</first><last>Erdem</last></author>
       <author><first>Bogomir</first><last>Horvat</last></author>
-      <author><first>Zdravko</first><last>Kačič</last></author>
+      <author id="zdravko-kacic"><first>Zdravko</first><last>Kačič</last></author>
       <title>A Data-driven Adaptation of Prosody in a Multilingual <fixed-case>TTS</fixed-case></title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/130.pdf</url>
       <bibkey>stergar-etal-2004-data</bibkey>
@@ -713,12 +713,12 @@
     <paper id="89">
       <author><first>Andrej</first><last>Žgank</last></author>
       <author><first>Tomaž</first><last>Rotovnik</last></author>
-      <author><first>Mirjam Sepesy</first><last>Maučec</last></author>
+      <author id="mirjam-sepesy-maucec"><first>Mirjam Sepesy</first><last>Maučec</last></author>
       <author><first>Darinka</first><last>Verdonik</last></author>
       <author><first>Janez</first><last>Kitak</last></author>
       <author><first>Damjan</first><last>Vlaj</last></author>
       <author><first>Vladimir</first><last>Hozjan</last></author>
-      <author><first>Zdravko</first><last>Kačič</last></author>
+      <author id="zdravko-kacic"><first>Zdravko</first><last>Kačič</last></author>
       <author><first>Bogomir</first><last>Horvat</last></author>
       <title>Acquisition and Annotation of <fixed-case>S</fixed-case>lovenian Broadcast News Database</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/123.pdf</url>
@@ -726,11 +726,11 @@
     </paper>
     <paper id="90">
       <author><first>Andrej</first><last>Žgank</last></author>
-      <author><first>Zdravko</first><last>Kačič</last></author>
+      <author id="zdravko-kacic"><first>Zdravko</first><last>Kačič</last></author>
       <author><first>Frank</first><last>Diehl</last></author>
       <author><first>Klara</first><last>Vicsi</last></author>
       <author><first>Gyorgy</first><last>Szaszak</last></author>
-      <author><first>Jozef</first><last>Juhar</last></author>
+      <author id="jozef-juhar"><first>Jozef</first><last>Juhar</last></author>
       <author><first>Slavomir</first><last>Lihan</last></author>
       <title>The <fixed-case>COST</fixed-case> 278 <fixed-case>MASPER</fixed-case> Initiative - Crosslingual Speech Recognition with Large Telephone Databases</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/124.pdf</url>
@@ -750,14 +750,14 @@
     </paper>
     <paper id="93">
       <author><first>Vincent</first><last>Vandeghinste</last></author>
-      <author><first>Erik</first><last>Tjong Kim Sang</last></author>
+      <author id="erik-tjong-kim-sang"><first>Erik</first><last>Tjong Kim Sang</last></author>
       <title>Using a Parallel Transcript/Subtitle Corpus for Sentence Compression</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/128.pdf</url>
       <bibkey>vandeghinste-tjong-kim-sang-2004-using</bibkey>
     </paper>
     <paper id="94">
       <author><first>Sofia</first><last>Stamou</last></author>
-      <author><first>Dimitris</first><last>Christodoulakis</last></author>
+      <author id="dimitris-christodoulakis"><first>Dimitris</first><last>Christodoulakis</last></author>
       <title>Handling Subtle Sense Distinctions Through <fixed-case>W</fixed-case>ordnet Semantic Types</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/133.pdf</url>
       <bibkey>stamou-christodoulakis-2004-handling</bibkey>
@@ -771,15 +771,15 @@
     </paper>
     <paper id="96">
       <author><first>Heike</first><last>Telljohann</last></author>
-      <author><first>Erhard</first><last>Hinrichs</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="erhard-hinrichs"><first>Erhard</first><last>Hinrichs</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <title>The Tüba-<fixed-case>D</fixed-case>/<fixed-case>Z</fixed-case> Treebank: Annotating <fixed-case>G</fixed-case>erman with a Context-Free Backbone</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/135.pdf</url>
       <bibkey>telljohann-etal-2004-tuba</bibkey>
     </paper>
     <paper id="97">
-      <author><first>John S.</first><last>Garofolo</last></author>
-      <author><first>Christophe D.</first><last>Laprun</last></author>
+      <author id="john-s-garofolo"><first>John S.</first><last>Garofolo</last></author>
+      <author id="christophe-laprun"><first>Christophe D.</first><last>Laprun</last></author>
       <author><first>Martial</first><last>Michel</last></author>
       <author><first>Vincent M.</first><last>Stanford</last></author>
       <author><first>Elham</first><last>Tabassi</last></author>
@@ -804,7 +804,7 @@
       <bibkey>takezawa-kikui-2004-comparative</bibkey>
     </paper>
     <paper id="100">
-      <author><first>Núria</first><last>Bel</last></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last></author>
       <author><first>Cornelis H.A.</first><last>Koster</last></author>
       <author><first>Marta</first><last>Villegas</last></author>
       <title>Cost-effective Cross-lingual Document Classification</title>
@@ -813,7 +813,7 @@
     </paper>
     <paper id="101">
       <author><first>Katrin</first><last>Erk</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <title>A Powerful and Versatile <fixed-case>XML</fixed-case> Format for Representing Role-semantic Annotation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/202.pdf</url>
       <bibkey>erk-pado-2004-powerful</bibkey>
@@ -821,11 +821,11 @@
     <paper id="102">
       <author><first>Stefan</first><last>Baumann</last></author>
       <author><first>Caren</first><last>Brinckmann</last></author>
-      <author><first>Silvia</first><last>Hansen-Schirra</last></author>
-      <author><first>Geert-Jan</first><last>Kruijff</last></author>
-      <author><first>Ivana</first><last>Kruijff-Korbayová</last></author>
+      <author id="silvia-hansen-schirra"><first>Silvia</first><last>Hansen-Schirra</last></author>
+      <author id="geert-jan-m-kruijff"><first>Geert-Jan</first><last>Kruijff</last></author>
+      <author id="ivana-kruijff-korbayova"><first>Ivana</first><last>Kruijff-Korbayová</last></author>
       <author><first>Stella</first><last>Neumann</last></author>
-      <author><first>Erich</first><last>Steiner</last></author>
+      <author id="erich-h-steiner"><first>Erich</first><last>Steiner</last></author>
       <author><first>Elke</first><last>Teich</last></author>
       <author><first>Hans</first><last>Uszkoreit</last></author>
       <title>The <fixed-case>MULI</fixed-case> Project: Annotation and Analysis of Information Structure in <fixed-case>G</fixed-case>erman and <fixed-case>E</fixed-case>nglish</title>
@@ -833,14 +833,14 @@
       <bibkey>baumann-etal-2004-muli</bibkey>
     </paper>
     <paper id="103">
-      <author><first>P. H. J.</first><last>van der Kamp</last></author>
+      <author id="p-h-j-van-der-kamp"><first>P. H. J.</first><last>van der Kamp</last></author>
       <author><first>J. G.</first><last>Kruyt</last></author>
       <title>Putting the <fixed-case>D</fixed-case>utch <fixed-case>PAROLE</fixed-case> Corpus to Work</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/207.pdf</url>
       <bibkey>van-der-kamp-kruyt-2004-putting</bibkey>
     </paper>
     <paper id="104">
-      <author><first>Julie</first><last>Carson-Berndsen</last></author>
+      <author id="julie-carson-berndsen"><first>Julie</first><last>Carson-Berndsen</last></author>
       <author><first>Robert</first><last>Kelly</last></author>
       <title>Acquiring Reusable Multilingual Phonotactic Resources</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/208.pdf</url>
@@ -854,9 +854,9 @@
       <bibkey>neugebauer-wilson-2004-phonological</bibkey>
     </paper>
     <paper id="106">
-      <author><first>Pedro Concejero</first><last>Cerezo</last></author>
-      <author><first>Juan José Rodríguez</first><last>Soler</last></author>
-      <author><first>Daniel Tapias</first><last>Merino</last></author>
+      <author id="pedro-concejero-cerezo"><first>Pedro Concejero</first><last>Cerezo</last></author>
+      <author id="juan-jose-rodriguez-soler"><first>Juan José Rodríguez</first><last>Soler</last></author>
+      <author id="daniel-tapias"><first>Daniel Tapias</first><last>Merino</last></author>
       <author><first>Alberto J. Sánchez</first><last>García</last></author>
       <title>Methodology for Rapid Prototyping and Testing of <fixed-case>ASR</fixed-case> Based User Interfaces</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/210.pdf</url>
@@ -864,21 +864,21 @@
     </paper>
     <paper id="107">
       <author><first>Lars</first><last>Degerstedt</last></author>
-      <author><first>Arne</first><last>Jönsson</last></author>
+      <author id="arne-jonsson"><first>Arne</first><last>Jönsson</last></author>
       <title>Open Resources for Language Technology</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/211.pdf</url>
       <bibkey>degerstedt-jonsson-2004-open</bibkey>
     </paper>
     <paper id="108">
       <author><first>Marie-Laure</first><last>Reinberger</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <title>Unsupervised Text Mining for Ontology Extraction: An Evaluation of Statistical Measures</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/212.pdf</url>
       <bibkey>reinberger-daelemans-2004-unsupervised</bibkey>
     </paper>
     <paper id="109">
       <author><first>Daniel</first><last>Aioanei</last></author>
-      <author><first>Julie</first><last>Carson-Berndsen</last></author>
+      <author id="julie-carson-berndsen"><first>Julie</first><last>Carson-Berndsen</last></author>
       <author><first>Anja</first><last>Geumann</last></author>
       <author><first>Robert</first><last>Kelly</last></author>
       <author><first>Moritz</first><last>Neugebauer</last></author>
@@ -890,28 +890,28 @@
     <paper id="110">
       <author><first>Oscar</first><last>Corcho</last></author>
       <author><first>Raúl</first><last>García-Castro</last></author>
-      <author><first>Asunción</first><last>Gómez-Pérez</last></author>
+      <author id="asuncion-gomez-perez"><first>Asunción</first><last>Gómez-Pérez</last></author>
       <title>Benchmarking Ontology Tools. A Case Study for the <fixed-case>W</fixed-case>eb<fixed-case>ODE</fixed-case> Platform.</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/217.pdf</url>
       <bibkey>corcho-etal-2004-benchmarking</bibkey>
     </paper>
     <paper id="111">
-      <author><first>Bayan Abu</first><last>Shawar</last></author>
-      <author><first>Eric</first><last>Atwell</last></author>
+      <author id="bayan-abu-shawar"><first>Bayan Abu</first><last>Shawar</last></author>
+      <author id="eric-atwell"><first>Eric</first><last>Atwell</last></author>
       <title>A Chatbot as a Novel Corpus Visualization Tool</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/218.pdf</url>
       <bibkey>shawar-atwell-2004-chatbot</bibkey>
     </paper>
     <paper id="112">
       <author><first>Florentina</first><last>Vasilescu</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <author><first>Guy</first><last>Lapalme</last></author>
       <title>Evaluating Variants of the <fixed-case>L</fixed-case>esk Approach for Disambiguating Words</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/219.pdf</url>
       <bibkey>vasilescu-etal-2004-evaluating</bibkey>
     </paper>
     <paper id="113">
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <author><first>Marjorie</first><last>McShane</last></author>
       <author><first>Stephen</first><last>Beale</last></author>
       <title>The Rationale for Building an Ontology Expressly for <fixed-case>NLP</fixed-case></title>
@@ -921,14 +921,14 @@
     <paper id="114">
       <author><first>Marjorie</first><last>McShane</last></author>
       <author><first>Stephen</first><last>Beale</last></author>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <title>Some Meaning Procedures of Ontological Semantics</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/224.pdf</url>
       <bibkey>mcshane-etal-2004-meaning</bibkey>
     </paper>
     <paper id="115">
-      <author><first>Eric K.</first><last>Ringger</last></author>
-      <author><first>Robert C.</first><last>Moore</last></author>
+      <author id="eric-ringger"><first>Eric K.</first><last>Ringger</last></author>
+      <author id="robert-c-moore"><first>Robert C.</first><last>Moore</last></author>
       <author><first>Eugene</first><last>Charniak</last></author>
       <author><first>Lucy</first><last>Vanderwende</last></author>
       <author><first>Hisami</first><last>Suzuki</last></author>
@@ -938,7 +938,7 @@
     </paper>
     <paper id="116">
       <author><first>Hidetsugu</first><last>Nanba</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <title>Comparison of Some Automatic and Manual Methods for Summary Evaluation Based on the Text Summarization Challenge 2</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/229.pdf</url>
       <bibkey>nanba-okumura-2004-comparison</bibkey>
@@ -962,15 +962,15 @@
       <bibkey>folch-etal-2004-highlighting</bibkey>
     </paper>
     <paper id="119">
-      <author><first>Dan</first><last>Tufis</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufis</last></author>
       <author><first>Radu</first><last>Ion</last></author>
-      <author><first>Nancy</first><last>Ide</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
       <title>Word Sense Disambiguation as a <fixed-case>W</fixed-case>ordnets’ Validation Method in <fixed-case>B</fixed-case>alkanet</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/234.pdf</url>
       <bibkey>tufis-etal-2004-word</bibkey>
     </paper>
     <paper id="120">
-      <author><first>Dan</first><last>Tufis</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufis</last></author>
       <title>Term Translations in Parallel Corpora: Discovery and Consistency Check</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/235.pdf</url>
       <bibkey>tufis-2004-term</bibkey>
@@ -984,27 +984,27 @@
       <bibkey>sarmento-etal-2004-corpografo</bibkey>
     </paper>
     <paper id="122">
-      <author><first>Daniel</first><last>Ferrés</last></author>
+      <author id="daniel-ferres"><first>Daniel</first><last>Ferrés</last></author>
       <author><first>Marc</first><last>Massot</last></author>
-      <author><first>Muntsa</first><last>Padró</last></author>
-      <author><first>Horacio</first><last>Rodríguez</last></author>
-      <author><first>Jordi</first><last>Turmo</last></author>
+      <author id="muntsa-padro"><first>Muntsa</first><last>Padró</last></author>
+      <author id="horacio-rodriguez"><first>Horacio</first><last>Rodríguez</last></author>
+      <author id="jordi-turmo"><first>Jordi</first><last>Turmo</last></author>
       <title>Automatic Classification of Geographic Named Entities</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/237.pdf</url>
       <bibkey>ferres-etal-2004-automatic</bibkey>
     </paper>
     <paper id="123">
-      <author><first>Olivia</first><last>Sanchez-Graillet</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="olivia-sanchez-graillet"><first>Olivia</first><last>Sanchez-Graillet</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <title>Acquiring <fixed-case>B</fixed-case>ayesian Networks from Text</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/240.pdf</url>
       <bibkey>sanchez-graillet-poesio-2004-acquiring</bibkey>
     </paper>
     <paper id="124">
       <author><first>Thanh Bon</first><last>Nguyen</last></author>
-      <author><first>Thi Minh Huyen</first><last>Nguyen</last></author>
-      <author><first>Laurent</first><last>Romary</last></author>
-      <author><first>Xuan Luong</first><last>Vu</last></author>
+      <author id="thi-minh-huyen-nguyen"><first>Thi Minh Huyen</first><last>Nguyen</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
+      <author id="xuan-luong-vu"><first>Xuan Luong</first><last>Vu</last></author>
       <title>Developping Tools and Building Linguistic Resources for <fixed-case>V</fixed-case>ietnamese Morpho-syntactic Processing</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/241.pdf</url>
       <bibkey>nguyen-etal-2004-developping</bibkey>
@@ -1033,7 +1033,7 @@
       <bibkey>panunzi-etal-2004-using</bibkey>
     </paper>
     <paper id="128">
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <author><first>Silvia</first><last>Bernardini</last></author>
       <author><first>Federica</first><last>Comastri</last></author>
       <author><first>Lorenzo</first><last>Piccioni</last></author>
@@ -1045,14 +1045,14 @@
       <bibkey>baroni-etal-2004-introducing</bibkey>
     </paper>
     <paper id="129">
-      <author><first>Nancy</first><last>Ide</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
       <author><first>David</first><last>Woolner</last></author>
       <title>Exploiting Semantic Web Technologies for Intelligent Access to Historical Documents</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/248.pdf</url>
       <bibkey>ide-woolner-2004-exploiting</bibkey>
     </paper>
     <paper id="130">
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <author><first>Sabrina</first><last>Bisi</last></author>
       <title>Using Cooccurrence Statistics and the Web to Discover Synonyms in a Technical Language</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/249.pdf</url>
@@ -1073,7 +1073,7 @@
     </paper>
     <paper id="133">
       <author><first>Salma</first><last>Jamoussi</last></author>
-      <author><first>Kamel</first><last>Smaïli</last></author>
+      <author id="kamel-smaili"><first>Kamel</first><last>Smaïli</last></author>
       <author><first>Dominique</first><last>Fohr</last></author>
       <author><first>Jean-Paul</first><last>Haton</last></author>
       <title>A Complete Understanding Speech System Based on Semantic Concepts</title>
@@ -1081,10 +1081,10 @@
       <bibkey>jamoussi-etal-2004-complete</bibkey>
     </paper>
     <paper id="134">
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <author><first>Alexander</first><last>Simov</last></author>
       <author><first>Hristo</first><last>Ganev</last></author>
-      <author><first>Krasimira</first><last>Ivanova</last></author>
+      <author id="krasimira-ivanova"><first>Krasimira</first><last>Ivanova</last></author>
       <author><first>Ilko</first><last>Grigorov</last></author>
       <title>The <fixed-case>CL</fixed-case>a<fixed-case>RK</fixed-case> System: <fixed-case>XML</fixed-case>-based Corpora Development System for Rapid Prototyping</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/258.pdf</url>
@@ -1100,15 +1100,15 @@
       <bibkey>badia-etal-2004-nlp</bibkey>
     </paper>
     <paper id="136">
-      <author><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
       <title>Open-source Tools for Creation, Maintenance, and Storage of Lexical Resources for Language Generation from Ontologies</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/264.pdf</url>
       <bibkey>bontcheva-2004-open</bibkey>
     </paper>
     <paper id="137">
       <author><first>Agnes</first><last>Lisowska</last></author>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
-      <author><first>Susan</first><last>Armstrong</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="susan-armstrong"><first>Susan</first><last>Armstrong</last></author>
       <title>User Query Analysis for the Specification and Evaluation of a Dialogue Processing and Retrieval System</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/266.pdf</url>
       <bibkey>lisowska-etal-2004-user</bibkey>
@@ -1123,16 +1123,16 @@
       <bibkey>popov-etal-2004-creation</bibkey>
     </paper>
     <paper id="139">
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <title>Abstracting a Dialog Act Tagset for Meeting Processing</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/268.pdf</url>
       <bibkey>popescu-belis-2004-abstracting</bibkey>
     </paper>
     <paper id="140">
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <author><first>Loïs</first><last>Rigouste</last></author>
       <author><first>Susanne</first><last>Salmon-Alt</last></author>
-      <author><first>Laurent</first><last>Romary</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
       <title>Online Evaluation of Coreference Resolution</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/270.pdf</url>
       <bibkey>popescu-belis-etal-2004-online</bibkey>
@@ -1140,8 +1140,8 @@
     <paper id="141">
       <author><first>Xavier</first><last>Carreras</last></author>
       <author><first>Isaac</first><last>Chao</last></author>
-      <author><first>Lluís</first><last>Padró</last></author>
-      <author><first>Muntsa</first><last>Padró</last></author>
+      <author id="lluis-padro"><first>Lluís</first><last>Padró</last></author>
+      <author id="muntsa-padro"><first>Muntsa</first><last>Padró</last></author>
       <title><fixed-case>F</fixed-case>ree<fixed-case>L</fixed-case>ing: An Open-Source Suite of Language Analyzers</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/271.pdf</url>
       <bibkey>carreras-etal-2004-freeling</bibkey>
@@ -1184,7 +1184,7 @@
       <bibkey>przepiorkowski-etal-2004-search</bibkey>
     </paper>
     <paper id="146">
-      <author><first>Peter A.</first><last>Heeman</last></author>
+      <author id="peter-a-heeman"><first>Peter A.</first><last>Heeman</last></author>
       <title>The <fixed-case>A</fixed-case>merican <fixed-case>E</fixed-case>nglish <fixed-case>SALA</fixed-case>-<fixed-case>II</fixed-case> Data Collection</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/276.pdf</url>
       <bibkey>heeman-2004-american</bibkey>
@@ -1192,14 +1192,14 @@
     <paper id="147">
       <author><first>Andrew</first><last>Finch</last></author>
       <author><first>Yasuhiro</first><last>Akiba</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <title>How Does Automatic Machine Translation Evaluation Correlate with Human Scoring as the Number of Reference Translations Increases?</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/277.pdf</url>
       <bibkey>finch-etal-2004-automatic</bibkey>
     </paper>
     <paper id="148">
       <author><first>Slaven</first><last>Bilac</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Hozumi</first><last>Tanaka</last></author>
       <title>Evaluating the <fixed-case>FOKS</fixed-case> Error Model</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/278.pdf</url>
@@ -1208,7 +1208,7 @@
     <paper id="149">
       <author><first>Guillaume</first><last>Gibert</last></author>
       <author><first>Gérard</first><last>Bailly</last></author>
-      <author><first>Frédéric</first><last>Eliséi</last></author>
+      <author id="frederic-elisei"><first>Frédéric</first><last>Eliséi</last></author>
       <author><first>Denis</first><last>Beautemps</last></author>
       <author><first>Rémi</first><last>Brun</last></author>
       <title>Evaluation of a Speech Cuer: From Motion Capture to a Concatenative Text-to-cued Speech System</title>
@@ -1218,7 +1218,7 @@
     <paper id="150">
       <author><first>Nikolaos</first><last>Nanas</last></author>
       <author><first>Victoria</first><last>Uren</last></author>
-      <author><first>Anne</first><last>de Roeck</last></author>
+      <author id="anne-de-roeck"><first>Anne</first><last>de Roeck</last></author>
       <author><first>John</first><last>Domingue</last></author>
       <title>Beyond <fixed-case>TREC</fixed-case>’s Filtering Track</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/283.pdf</url>
@@ -1234,7 +1234,7 @@
     <paper id="152">
       <author><first>Carol</first><last>Peters</last></author>
       <author><first>Martin</first><last>Braschler</last></author>
-      <author><first>Khalid</first><last>Choukri</last></author>
+      <author id="khalid-choukri"><first>Khalid</first><last>Choukri</last></author>
       <author><first>Julio</first><last>Gonzalo</last></author>
       <author><first>Michael</first><last>Kluck</last></author>
       <title>The Future of Evaluation for Cross-Language Information Retrieval Systems</title>
@@ -1243,11 +1243,11 @@
       <bibkey>peters-etal-2004-future</bibkey>
     </paper>
     <paper id="153">
-      <author><first>Henk</first><last>van den Heuvel</last></author>
+      <author id="henk-van-den-heuvel"><first>Henk</first><last>van den Heuvel</last></author>
       <author><first>Phil</first><last>Hall</last></author>
-      <author><first>Harald</first><last>Höge</last></author>
-      <author><first>Asunción</first><last>Moreno</last></author>
-      <author><first>Antonio</first><last>Rincon</last></author>
+      <author id="harald-hoge"><first>Harald</first><last>Höge</last></author>
+      <author id="asuncion-moreno"><first>Asunción</first><last>Moreno</last></author>
+      <author id="antonio-rincon"><first>Antonio</first><last>Rincon</last></author>
       <author><first>Francesco</first><last>Senia</last></author>
       <title><fixed-case>SALA</fixed-case> <fixed-case>II</fixed-case> Across the Finish Line: A Large Collection of Mobile Telephone Speech Databases from <fixed-case>N</fixed-case>orth and <fixed-case>L</fixed-case>atin <fixed-case>A</fixed-case>merica completed</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/288.pdf</url>
@@ -1255,7 +1255,7 @@
       <bibkey>van-den-heuvel-etal-2004-sala</bibkey>
     </paper>
     <paper id="154">
-      <author><first>Xavier</first><last>Gómez-Guinovart</last></author>
+      <author id="xavier-gomez-guinovart"><first>Xavier</first><last>Gómez-Guinovart</last></author>
       <author><first>Elena Sacau</first><last>Fontenla</last></author>
       <title>Parallel Corpora for the <fixed-case>G</fixed-case>alician Language: Building and Processing of the <fixed-case>CLUVI</fixed-case> (Linguistic Corpus of the <fixed-case>U</fixed-case>niversity of <fixed-case>V</fixed-case>igo)</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/290.pdf</url>
@@ -1280,15 +1280,15 @@
       <bibkey>wagner-zeisler-2004-syntactically</bibkey>
     </paper>
     <paper id="157">
-      <author><first>Montserrat</first><last>Marimon</last></author>
-      <author><first>Núria</first><last>Bel</last></author>
+      <author id="montserrat-marimon"><first>Montserrat</first><last>Marimon</last></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last></author>
       <title>Lexical Entry Templates for Robust Deep Parsing</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/295.pdf</url>
       <abstract>We report on the development and employment of lexical entry templates in a large--coverage unification--based grammar of Spanish. The aim of the work reported in this paper is to provide robust deep linguistic processing in order to make the grammar more adequate for industrial NLP applications.</abstract>
       <bibkey>marimon-bel-2004-lexical</bibkey>
     </paper>
     <paper id="158">
-      <author><first>Dan</first><last>Tufis</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufis</last></author>
       <author><first>Liviu</first><last>Dragomirescu</last></author>
       <title>Tiered Tagging Revisited</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/296.pdf</url>
@@ -1296,7 +1296,7 @@
       <bibkey>tufis-dragomirescu-2004-tiered</bibkey>
     </paper>
     <paper id="159">
-      <author><first>Dan</first><last>Tufis</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufis</last></author>
       <author><first>Eduard</first><last>Barbu</last></author>
       <title>A Methodology and Associated Tools for Building Interlingual Wordnets</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/298.pdf</url>
@@ -1305,8 +1305,8 @@
     </paper>
     <paper id="160">
       <author><first>Doaa</first><last>Samy</last></author>
-      <author><first>Antonio</first><last>Moreno-Sandoval</last></author>
-      <author><first>José M.</first><last>Guirao</last></author>
+      <author id="antonio-moreno-sandoval"><first>Antonio</first><last>Moreno-Sandoval</last></author>
+      <author id="jose-m-guirao"><first>José M.</first><last>Guirao</last></author>
       <title>Construction of a Bilingual <fixed-case>A</fixed-case>rabic-<fixed-case>S</fixed-case>panish Lexicon of Verbs Based on a Parallel Corpus</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/299.pdf</url>
       <abstract>Parallel corpora are considered an important resource for the development of linguistic tools. In this paper our main goal is the development of a bilingual lexicon of verbs. The construction of this lexicon is possible using two main resources: I) a parallel corpus (through the alignment); II) the linguistic tools developed for Spanish (which serve as a starting point for developing tools for Arabic language). At the end, aligned equivalent verbs are detected automatically from a parallel corpus Spanish-Arabic. To achieve this goal, we had to pass through different preparatory stages concerning the assesment of the parallel corpus, the monolingual tokenization of each corpus, a preliminary sentence alignment and finally applying the model of automatic extraction of equivalent verbs. Our method is hybrid, since it combines both statistical and linguistic approaches.</abstract>
@@ -1326,8 +1326,8 @@
     </paper>
     <paper id="162">
       <author><first>Manolis</first><last>Maragoudakis</last></author>
-      <author><first>Nikos</first><last>Fakotakis</last></author>
-      <author><first>George</first><last>Kokkinakis</last></author>
+      <author id="nikos-fakotakis"><first>Nikos</first><last>Fakotakis</last></author>
+      <author id="george-kokkinakis"><first>George</first><last>Kokkinakis</last></author>
       <title>A <fixed-case>B</fixed-case>ayesian Model for Shallow Syntactic Parsing of Natural Language Texts</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/302.pdf</url>
       <abstract>For the present work, we introduce and evaluate a novel Bayesian syntactic shallow parser that is able to perform robust detection of pairs of subject-object and subject-direct object-indirect object for a given verb, in a natural language sentence. The shallow parser infers on the correct subject-object pairs based on knowledge provided by Bayesian network learning from annotated text corpora. The DELOS corpus, a collection of economic domain texts that has been automatically annotated using various morphological and syntactic tools was used as training material. Our shallow parser makes use of limited linguistic input. More specifically, we consider only part of speech tagging, the voice and the mood of the verb as well as the head word of a noun phrase. For the task of detecting the head word of a phrase we used a sentence boundary detector. Identifying the head word of a noun phrase, i.e. the word that holds the morphological information (case, number) of the whole phrase, also proves to be very helpful for our task as its morphological tag is all the information that is needed regarding the phrase. The evaluation of the proposed method was performed against three other machine learning techniques, namely naive Bayes, k-Nearest Neighbor and Support Vector Machines, methods that have been previously applied to natural language processing tasks with satisfactory results. The experimental outcomes portray a satisfactory performance of our proposed shallow parser, which reaches almost 92 per cent in terms of precision.</abstract>
@@ -1344,7 +1344,7 @@
     <paper id="164">
       <author><first>Jacques</first><last>Duchateau</last></author>
       <author><first>Tim</first><last>Ceyssens</last></author>
-      <author><first>Hugo</first><last>Van hamme</last></author>
+      <author id="hugo-van-hamme"><first>Hugo</first><last>Van hamme</last></author>
       <title>Use and Evaluation of Prosodic Annotations in <fixed-case>D</fixed-case>utch</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/304.pdf</url>
       <abstract>In the development of annotations for a spoken database, an important issue is whether the annotations can be generated automatically with sufficient precision, or whether expensive manual annotations are needed. In this paper, the case of prosodic annotations is discussed, which was investigated on the CGN database (Spoken Dutch Corpus). The main conclusions of this work are as follows. First, it was found that the available amount of manual prosodic annotations is sufficient for the development of our (baseline, decision tree based) prosodic models. In other words, more manual annotations do not improve the models. Second, the developed prosodic models for prominence are insufficiently accurate to produce automatic prominence annotations that are as good as the manual ones. But on the other hand the consistency between manual and automatic break annotations is as high as the inter-transcriber consistency for breaks. So given the current amount of manual break annotations, annotations for the remainder of the CGN database can be generated automatically with the same quality as the manual annotations.</abstract>
@@ -1352,7 +1352,7 @@
     </paper>
     <paper id="165">
       <author><first>Stephan</first><last>Busemann</last></author>
-      <author><first>Hans-Ulrich</first><last>Krieger</last></author>
+      <author id="hans-ulrich-krieger"><first>Hans-Ulrich</first><last>Krieger</last></author>
       <title>Resources and Techniques for Multilingual Information Extraction</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/306.pdf</url>
       <abstract>Official travel warnings published regularly in the internet by the ministries for foreign affairs of France, Germany, and the UK provide a useful resource for assessing the risks associated with travelling to some countries. The shallow IE system SProUT has been extended to meet the specific needs of delivering a language-neutral output for English, French, or German input texts. A shared type hierarchy, a feature-enhanced gazetteer resource, and generic techniques of merging chunk analyses into larger results are major reusable results of this work.</abstract>
@@ -1361,9 +1361,9 @@
     <paper id="166">
       <author><first>Lei</first><last>Chen</last></author>
       <author id="yang-liu-icsi"><first>Yang</first><last>Liu</last></author>
-      <author><first>Mary</first><last>Harper</last></author>
+      <author id="mary-harper"><first>Mary</first><last>Harper</last></author>
       <author><first>Eduardo</first><last>Maia</last></author>
-      <author><first>Susan</first><last>McRoy</last></author>
+      <author id="susan-w-mcroy"><first>Susan</first><last>McRoy</last></author>
       <title>Evaluating Factors Impacting the Accuracy of Forced Alignments in a Multimodal Corpus</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/307.pdf</url>
       <abstract>People, when processing human-to-human communication, utilize everything they can in order to understand that communication, including speech and information such as the time and location of an interlocutor's gesture and gaze. Speech and gesture are known to exhibit a synchronous relationship in human communication; however, the precise nature of that relationship requires further investigation. The construction of computer models of multimodal human communication would be enabled by the availability of multimodal communication corpora annotated with synchronized gesture and speech features. To investigate the temporal relationships of these knowledge sources, we have collected and are annotating several multimodal corpora with time-aligned features. Forced alignment between a speech file and its transcription is a crucial part of multimodal corpus production. This paper investigates a number of factors that may contribute to highly accurate forced alignments to support the rapid production of these multimodal corpora including the acoustic model, the match between the speech used for training the system and that to be force aligned, the amount of data used to train the ASR system, the availability of speaker adaptation, and the duration of alignment segments.</abstract>
@@ -1406,9 +1406,9 @@
       <bibkey>afify-emam-2004-collection</bibkey>
     </paper>
     <paper id="171">
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <author><first>Petya</first><last>Osenova</last></author>
-      <author><first>Sia</first><last>Kolkovska</last></author>
+      <author id="sia-kolkovska"><first>Sia</first><last>Kolkovska</last></author>
       <author><first>Elisaveta</first><last>Balabanova</last></author>
       <author><first>Dimitar</first><last>Doikoff</last></author>
       <title>A Language Resources Infrastructure for <fixed-case>B</fixed-case>ulgarian</title>
@@ -1441,7 +1441,7 @@
       <bibkey>dowdall-etal-2004-role</bibkey>
     </paper>
     <paper id="174">
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <author><first>Lars</first><last>Nygaard</last></author>
       <title>The <fixed-case>OPUS</fixed-case> Corpus - Parallel and Free: <url>http://logos.uio.no/opus</url></title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/320.pdf</url>
@@ -1449,18 +1449,18 @@
       <bibkey>tiedemann-nygaard-2004-opus</bibkey>
     </paper>
     <paper id="175">
-      <author><first>Javier</first><last>Farreres</last></author>
-      <author><first>Horacio</first><last>Rodríguez</last></author>
+      <author id="javier-farreres"><first>Javier</first><last>Farreres</last></author>
+      <author id="horacio-rodriguez"><first>Horacio</first><last>Rodríguez</last></author>
       <title>Selecting the Correct <fixed-case>E</fixed-case>nglish Synset for a <fixed-case>S</fixed-case>panish Sense</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/324.pdf</url>
       <abstract>This work tries to enrich the Spanish Wordnet using a Spanish taxonomy as a knowledge source. The Spanish taxonomy is composed by Spanish senses, while Spanish Wordnet is composed by synsets, mostly linked to English WordNet. A set of weighted associations between Spanish words and Wordnet synsets is used for inferring associations between both taxonomies.</abstract>
       <bibkey>farreres-rodriguez-2004-selecting</bibkey>
     </paper>
     <paper id="176">
-      <author><first>Asunción</first><last>Moreno</last></author>
-      <author><first>Khalid</first><last>Choukri</last></author>
+      <author id="asuncion-moreno"><first>Asunción</first><last>Moreno</last></author>
+      <author id="khalid-choukri"><first>Khalid</first><last>Choukri</last></author>
       <author><first>Phil</first><last>Hall</last></author>
-      <author><first>Henk</first><last>van den Heuvel</last></author>
+      <author id="henk-van-den-heuvel"><first>Henk</first><last>van den Heuvel</last></author>
       <author><first>Eric</first><last>Sanders</last></author>
       <author><first>Francesco</first><last>Senia</last></author>
       <author><first>Herbert</first><last>Tropf</last></author>
@@ -1487,10 +1487,10 @@
       <bibkey>dalby-etal-2004-standards</bibkey>
     </paper>
     <paper id="179">
-      <author><first>Henk</first><last>van den Heuvel</last></author>
+      <author id="henk-van-den-heuvel"><first>Henk</first><last>van den Heuvel</last></author>
       <author><first>Dorota</first><last>Iskra</last></author>
       <author><first>Eric</first><last>Sanders</last></author>
-      <author><first>Folkert</first><last>de Vriend</last></author>
+      <author id="folkert-de-vriend"><first>Folkert</first><last>de Vriend</last></author>
       <title><fixed-case>SLR</fixed-case> Validation: Current Trends and Developments</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/328.pdf</url>
       <abstract>This paper deals with the quality evaluation (validation) of Spoken Language Resources (SLR). The current situation in terms of relevant validation criteria and procedures is briefly presented. Next, a number of validation issues related to new data formats (XML-based annotations, UTF-16 encoding) are discussed. Further, new validation cycles that were introduced in a series of new projects like SpeeCon and OrienTel are addressed: prompt sheet validation, lexicon validation and pre-release validation. Finally, SPEX's current and future</abstract>
@@ -1503,11 +1503,11 @@
       <bibkey>saggion-2004-identifying</bibkey>
     </paper>
     <paper id="181">
-      <author><first>Laura</first><last>Alonso</last></author>
-      <author><first>Irene</first><last>Castellón</last></author>
+      <author id="laura-alonso-alemany"><first>Laura</first><last>Alonso</last></author>
+      <author id="irene-castellon"><first>Irene</first><last>Castellón</last></author>
       <author><first>Jordi</first><last>Escribano</last></author>
       <author><first>Xavier</first><last>Messeguer</last></author>
-      <author><first>Lluís</first><last>Padró</last></author>
+      <author id="lluis-padro"><first>Lluís</first><last>Padró</last></author>
       <title>Multiple Sequence Alignment for Characterizing the Lineal Structure of Revision</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/332.pdf</url>
       <abstract>We present a first approach to the application of a data mining technique, Multiple Sequence Alignment, to the systematization of a polemic aspect of discourse, namely, the expression of contrast, concession, counterargument and semantically similar discursive relations. The representation of the phenomena under study is carried out by very simple techniques, mostly pattern-matching, but the results allow to drive insightful conclusions on the organization of this aspect of discourse: equivalence classes of discourse markers are established, and systematic patterns are discovered, which will be applied in enhancing a discursive parser.</abstract>
@@ -1546,8 +1546,8 @@
     </paper>
     <paper id="186">
       <author><first>Costanza</first><last>Navarretta</last></author>
-      <author><first>Bolette Sandford</first><last>Pedersen</last></author>
-      <author><first>Dorte Haltrup</first><last>Hansen</last></author>
+      <author id="bolette-sandford-pedersen"><first>Bolette Sandford</first><last>Pedersen</last></author>
+      <author id="dorte-haltrup-hansen"><first>Dorte Haltrup</first><last>Hansen</last></author>
       <title>“Human Language Technology Elements in a Knowledge Organisation System - The <fixed-case>VID</fixed-case> Project”</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/339.pdf</url>
       <abstract>This paper describes how Human Language Technologies and linguistic resources are used to support the construction of components of a knowledge organisation system. In particular we focus on methodologies and resources for building a corpus-based domain ontology and extracting relevant metadata information for text chunks from domain-specific corpora.</abstract>
@@ -1555,36 +1555,36 @@
     </paper>
     <paper id="187">
       <author><first>Kedar</first><last>Bellare</last></author>
-      <author><first>Anish Das</first><last>Sarma</last></author>
+      <author id="anish-das-sarma"><first>Anish Das</first><last>Sarma</last></author>
       <author><first>Atish Das</first><last>Sarma</last></author>
       <author><first>Navneet</first><last>Loiwal</last></author>
-      <author><first>Vaibhav</first><last>Mehta</last></author>
+      <author id="sanket-vaibhav-mehta"><first>Vaibhav</first><last>Mehta</last></author>
       <author><first>Ganesh</first><last>Ramakrishnan</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <title>Generic Text Summarization Using <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/342.pdf</url>
       <bibkey>bellare-etal-2004-generic</bibkey>
     </paper>
     <paper id="188">
-      <author><first>Natalia V.</first><last>Loukachevitch</last></author>
-      <author><first>Boris V.</first><last>Dobrov</last></author>
+      <author id="natalia-loukachevitch"><first>Natalia V.</first><last>Loukachevitch</last></author>
+      <author id="boris-v-dobrov"><first>Boris V.</first><last>Dobrov</last></author>
       <title>Development of Bilingual Domain-Specific Ontology for Automatic Conceptual Indexing</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/343.pdf</url>
       <abstract>In the paper we describe development, means of evaluation and applications of Russian-English Sociopolitical Thesaurus specially developed as a linguistic resource for automatic text processing applications. The Sociopolitical domain is not a domain of social research but a broad domain of social relations including economic, political, military, cultural, sports and other subdomains. The knowledge of this domain is necessary for automatic text processing of such important documents as official documents, legislative acts, newspaper articles.</abstract>
       <bibkey>loukachevitch-dobrov-2004-development</bibkey>
     </paper>
     <paper id="189">
-      <author><first>Natalia V.</first><last>Loukachevitch</last></author>
-      <author><first>Boris V.</first><last>Dobrov</last></author>
+      <author id="natalia-loukachevitch"><first>Natalia V.</first><last>Loukachevitch</last></author>
+      <author id="boris-v-dobrov"><first>Boris V.</first><last>Dobrov</last></author>
       <title>Development of Ontologies with Minimal Set of Conceptual Relations</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/345.pdf</url>
       <abstract>In the paper we describe our approach to development of ontologies with small number of relation types. Non-taxonomic relations in our ontologies are based on ontological dependence conception described in the formal ontology. This minimal relations set does not depend on a domain or a task and makes possible to begin the ontology construction at once, as soon as a task is set and a domain is determined, to receive the first version of an ontology in short time. Such an initial ontology can be used for information-retrieval applications and can serve as a structural basis for further development of the ontology</abstract>
       <bibkey>loukachevitch-dobrov-2004-development-ontologies</bibkey>
     </paper>
     <paper id="190">
-      <author><first>Maria Fernanda Bacelar</first><last>do Nascimento</last></author>
+      <author id="maria-fernanda-bacelar-do-nascimento"><first>Maria Fernanda Bacelar</first><last>do Nascimento</last></author>
       <author><first>Amália</first><last>Mendes</last></author>
-      <author><first>Luísa</first><last>Pereira</last></author>
+      <author id="luisa-pereira"><first>Luísa</first><last>Pereira</last></author>
       <title>Providing On-line Access to <fixed-case>P</fixed-case>ortuguese Language Resources: Corpora and Lexicons</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/346.pdf</url>
       <abstract>Several Language Resources (LRs) for Portuguese, developed at the Center of Linguistics of the Lisbon University (CLUL), are available on-line at CLUL's webpage: www.clul.ul.pt/english/sectores/projecto_rld.html. These LRs have been extracted from or developed based on the Reference Corpus of Contemporary Portuguese (CRPC), a monitor corpus containing, at the present, more than 300 million words, taken by sampling from several types of written text (literary, newspaper, technical, didactic, juridical, parlamentary, etc.) and spoken text (informal and formal), pertaining to national and regional varieties of Portuguese (including European, Brazilian, African and Asian Portuguese). The LRs available for on-line queries include: a) several subcorpora (written and spoken, tagged and untagged) compiled and extracted from CRPC for specific CLUL's projects and now available for on-line queries; b) a published sample of "Português Fundamental", a spoken CRPC subcorpus, available for texts download; c) a frequency lexicon extracted from a CRPC subcorpus available for both on-line queries and download. Other RLs available for Portuguese are also referred: C-ORAL-ROM - Integrated Reference Corpora for Spoken Romance Languages, a CD-ROM edition of a spoken corpus with text-to-sound alignment; the LE-PAROLE corpus; the LE-PAROLE Lexicon and the SIMPLE Lexicon.</abstract>
@@ -1592,7 +1592,7 @@
     </paper>
     <paper id="191">
       <author><first>Bruno</first><last>Cartoni</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Yalina</first><last>Alphonse</last></author>
       <author><first>Sabine</first><last>Lehmann</last></author>
       <title>Automatisation of the Activity of Term Collection in Different Languages</title>
@@ -1602,7 +1602,7 @@
     </paper>
     <paper id="192">
       <author><first>Jorge</first><last>Vivaldi</last></author>
-      <author><first>Horacio</first><last>Rodríguez</last></author>
+      <author id="horacio-rodriguez"><first>Horacio</first><last>Rodríguez</last></author>
       <title>Automatically Selecting Domain Markers for Terminology Extraction</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/348.pdf</url>
       <abstract>Some approaches to automatic terminology extraction from corpora imply the use of existing semantic resources for guiding the detection of terms. Most of these systems exploit specialised resources, like UMLS in the medical domain, while a few try to take profit from general-purpose semantic resources, like EuroWordNet (EWN). As the term extraction task is clearly domain depending, in the case a general-purpose resource without specific domain information is used, we need a way of attaching domain information to the units of the resource. For big resources it is desirable that this semantic enrichment could be carried out automatically. Given a specific domain, our proposal aims to detect in EWN those units that can be considered as domain markers (DM). We can define a DM as an EWN entry whose attached strings belong to the domain, as well as the variants of all its descendents through the hyponymy relation. The procedure we propose in this paper is fully automatic and, a priori, domain-independent. The only external knowledge it uses is a set of terms, which is an external vocabulary, which is considered to have at least one sense belonging to the domain.</abstract>
@@ -1610,28 +1610,28 @@
     </paper>
     <paper id="193">
       <author><first>Anne</first><last>Vilnat</last></author>
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <author><first>Laura</first><last>Monceaux</last></author>
       <author><first>Isabelle</first><last>Robba</last></author>
       <author><first>Véronique</first><last>Gendner</last></author>
       <author><first>Gabriel</first><last>Illouz</last></author>
-      <author><first>Michèle</first><last>Jardino</last></author>
+      <author id="michele-jardino"><first>Michèle</first><last>Jardino</last></author>
       <title>The Ongoing Evaluation Campaign of Syntactic Parsing of <fixed-case>F</fixed-case>rench: <fixed-case>EASY</fixed-case></title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/349.pdf</url>
       <abstract>This paper presents EASY (Evaluation of Analyzers of SYntax), an ongoing evaluation campaign of syntactic parsing of French, a subproject of EVALDA in the French TECHNOLANGUE program. After presenting the elaboration of the annotation formalism, we describe the corpus building steps, the annotation tools, the evaluation measures and finally, plans to produce a validated large linguistic resource, syntactically annotated</abstract>
       <bibkey>vilnat-etal-2004-ongoing</bibkey>
     </paper>
     <paper id="194">
-      <author><first>Kateřina</first><last>Veselá</last></author>
-      <author><first>Jiří</first><last>Havelka</last></author>
-      <author><first>Eva</first><last>Hajičová</last></author>
+      <author id="katerina-vesela"><first>Kateřina</first><last>Veselá</last></author>
+      <author id="jiri-havelka"><first>Jiří</first><last>Havelka</last></author>
+      <author id="eva-hajicova"><first>Eva</first><last>Hajičová</last></author>
       <title>Annotators’ Agreement: The Case of Topic-Focus Articulation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/350.pdf</url>
       <abstract>The annotation of the Prague Dependency Treebank (PDT) is conceived of as a multilayered scenario that comprises also dependency representations (tectogrammatical tree structures, TGTS's) of the underlying structure of the sentences. TGTS's capture three basic aspects of the underlying structure of sentences: (a) the dependency tree structure, (b) the kinds of dependency syntactic relations, and (c) the basic characteristics of the topic-focus articulation (TFA). Since the PDT is a large collection and the annotations on the deepest layer are to a large extent performed by several human annotators (based on an automatic preprocessing module), it is more than necessary to observe the consistence of annotators and the agreement among them. In the present paper, we summarize the results of the evaluation of parallel annotations of several samples taken from PDT and the measures accepted to improve the consistency of annotations.</abstract>
       <bibkey>vesela-etal-2004-annotators</bibkey>
     </paper>
     <paper id="195">
-      <author><first>Scott S. L.</first><last>Piao</last></author>
+      <author id="scott-s-l-piao"><first>Scott S. L.</first><last>Piao</last></author>
       <author><first>Paul</first><last>Rayson</last></author>
       <author><first>Dawn</first><last>Archer</last></author>
       <author><first>Tony</first><last>McEnery</last></author>
@@ -1642,9 +1642,9 @@
     </paper>
     <paper id="196">
       <author><first>Frédéric</first><last>Landragin</last></author>
-      <author><first>Alexandre</first><last>Denis</last></author>
+      <author id="alexandre-denis"><first>Alexandre</first><last>Denis</last></author>
       <author><first>Annalisa</first><last>Ricci</last></author>
-      <author><first>Laurent</first><last>Romary</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
       <title>Multimodal Meaning Representation for Generic Dialogue Systems Architectures</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/352.pdf</url>
       <abstract>An unified language for the communicative acts between agents is essential for the design of multi-agents architectures. Whatever the type of interaction (linguistic, multimodal, including particular aspects such as force feedback), whatever the type of application (command dialogue, request dialogue, database querying), the concepts are common and we need a generic meta-model. In order to tend towards task-independent systems, we need to clarify the modules parameterization procedures. In this paper, we focus on the characteristics of a meta-model designed to represent meaning in linguistic and multimodal applications. This meta-model is called MMIL for MultiModal Interface Language, and has first been specified in the framework of the IST MIAMM European project. What we want to test here is how relevant is MMIL for a completely different context (a different task, a different interaction type, a different linguistic domain). We detail the exploitation of MMIL in the framework of the IST OZONE European project, and we draw the conclusions on the role of MMIL in the parameterization of task-independent dialogue managers.</abstract>
@@ -1659,26 +1659,26 @@
       <bibkey>braasch-olsen-2004-sto</bibkey>
     </paper>
     <paper id="198">
-      <author><first>Kalliopi</first><last>Zervanou</last></author>
-      <author><first>John</first><last>McNaught</last></author>
+      <author id="kalliopi-zervanou"><first>Kalliopi</first><last>Zervanou</last></author>
+      <author id="john-mcnaught"><first>John</first><last>McNaught</last></author>
       <title>A Domain-Independent Approach to <fixed-case>IE</fixed-case> Rule Development</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/355.pdf</url>
       <abstract>A key element for the extraction of information in a natural language document is a set of shallow text analysis rules, which are typically based on pre-defined linguistic patterns. Current Information Extraction research aims at the automatic or semi-automatic acquisition of these rules. Within this research framework, we consider in this paper the potential for acquiring generic extraction patterns. Our research is based on the hypothesis that, terms (the linguistic representation of concepts in a specialised domain) and Named Entities (the names of persons, organisations and dates of importance in the text) can together be considered as the basic semantic entities of textual information and can therefore be used as a basis for the conceptual representation of domain specific texts and the definition of what constitutes an information extraction template in linguistic terms. The extraction patterns discovered by this approach involve significant associations of these semantic entities with verbs and they can subsequently be translated into the grammar formalism of choice.</abstract>
       <bibkey>zervanou-mcnaught-2004-domain</bibkey>
     </paper>
     <paper id="199">
-      <author><first>Laurence</first><last>Devillers</last></author>
-      <author><first>Hélène</first><last>Maynard</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
-      <author><first>Patrick</first><last>Paroubek</last></author>
-      <author><first>Kevin</first><last>McTait</last></author>
+      <author id="laurence-devillers"><first>Laurence</first><last>Devillers</last></author>
+      <author id="helene-bonneau-maynard"><first>Hélène</first><last>Maynard</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
+      <author id="kevin-mctait"><first>Kevin</first><last>McTait</last></author>
       <author id="djamel-mostefa"><first>D.</first><last>Mostefa</last></author>
-      <author><first>Khalid</first><last>Choukri</last></author>
+      <author id="khalid-choukri"><first>Khalid</first><last>Choukri</last></author>
       <author><first>Laurent</first><last>Charnay</last></author>
-      <author><first>Caroline</first><last>Bousquet</last></author>
+      <author id="caroline-bousquet-vernhettes"><first>Caroline</first><last>Bousquet</last></author>
       <author><first>Nadine</first><last>Vigouroux</last></author>
-      <author><first>Frédéric</first><last>Béchet</last></author>
-      <author><first>Laurent</first><last>Romary</last></author>
+      <author id="frederic-bechet"><first>Frédéric</first><last>Béchet</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
       <author><first>Jean-Yves</first><last>Antoine</last></author>
       <author id="jeanne-villaneau"><first>J.</first><last>Villaneau</last></author>
       <author><first>Myriam</first><last>Vergnes</last></author>
@@ -1690,19 +1690,19 @@
     </paper>
     <paper id="200">
       <author><first>Emanuela</first><last>Cresti</last></author>
-      <author><first>Fernanda Bacelar</first><last>do Nascimento</last></author>
-      <author><first>Antonio Moreno</first><last>Sandoval</last></author>
-      <author><first>Jean</first><last>Veronis</last></author>
+      <author id="maria-fernanda-bacelar-do-nascimento"><first>Fernanda Bacelar</first><last>do Nascimento</last></author>
+      <author id="antonio-moreno-sandoval"><first>Antonio Moreno</first><last>Sandoval</last></author>
+      <author id="jean-veronis"><first>Jean</first><last>Veronis</last></author>
       <author><first>Philippe</first><last>Martin</last></author>
-      <author><first>Khalid</first><last>Choukri</last></author>
+      <author id="khalid-choukri"><first>Khalid</first><last>Choukri</last></author>
       <title>The <fixed-case>C</fixed-case>-<fixed-case>ORAL</fixed-case>-<fixed-case>ROM</fixed-case> <fixed-case>CORPUS</fixed-case>. A Multilingual Resource of Spontaneous Speech for <fixed-case>R</fixed-case>omance Languages</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/357.pdf</url>
       <abstract>The C-ORAL-ROM project has delivered a multilingual corpus of spontaneous speech for the main romance languages (Italian, French, Portuguese and Spanish). The collection aims to represent the variety of speech acts performed in everyday language and to enable the description of prosodic and syntactic structures in the four romance languages. Sampling criteria are defined in a corpus design scheme. C-ORAL-ROM adopts two different sampling strategies, one for the formal and one for the informal part: While a set of typical domains of application is selected to document the formal use of language, the informal part documents speech variation using parameters referring to the event’s structure (dialogue vs. monologue) and the sociological domain of use (family-private vs public). The four romance corpora are tagged with respect to terminal and non terminal prosodic breaks. Terminal breaks are assumed to be the more relevant cues for the identification of relevant linguistic domains in spontaneous speech (utterances). Relations with other concurrent criteria are discussed. The multimedia storage of the C-ORAL-ROM corpus is based on this principle; each textual string ending with a terminal break is aligned, through the Win Pitch speech software, to its acoustic counterpart, generating the data base of all utterances.</abstract>
       <bibkey>cresti-etal-2004-c</bibkey>
     </paper>
     <paper id="201">
-      <author><first>Bodil Nistrup</first><last>Madsen</last></author>
-      <author><first>Hanne Erdman</first><last>Thomsen</last></author>
+      <author id="bodil-nistrup-madsen"><first>Bodil Nistrup</first><last>Madsen</last></author>
+      <author id="hanne-erdman-thomsen"><first>Hanne Erdman</first><last>Thomsen</last></author>
       <author><first>Carl</first><last>Vikner</last></author>
       <title>Principles of a System for Terminological Concept Modelling</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/358.pdf</url>
@@ -1711,8 +1711,8 @@
     </paper>
     <paper id="202">
       <author><first>Christophe</first><last>Van Bael</last></author>
-      <author><first>Helmer</first><last>Strik</last></author>
-      <author><first>Henk</first><last>van den Heuvel</last></author>
+      <author id="helmer-strik"><first>Helmer</first><last>Strik</last></author>
+      <author id="henk-van-den-heuvel"><first>Henk</first><last>van den Heuvel</last></author>
       <title>On the Usefulness of Large Spoken Language Corpora for Linguistic Research</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/359.pdf</url>
       <abstract>In the past, fundamental linguistic research was typically conducted on small data sets that were handcrafted for the specific research at hand. However, from the eighties onwards, many large spoken language corpora have become available. This study investigates the usefulness of large multi-purpose spoken language corpora for fundamental linguistic research. A research task was designed in which we tried to capture the major pronunciation differences between three speech styles in context-sensitive re-write rules at the phone level. These re-write rules were extracted from the alignments of both a manual phonetic transcription and an automatic phonetic transcription with a canonical reference transcription of the same material.</abstract>
@@ -1736,7 +1736,7 @@
       <bibkey>bartsch-2004-annotating</bibkey>
     </paper>
     <paper id="205">
-      <author><first>Constantin</first><last>Orăsan</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orăsan</last></author>
       <author><first>Viktor</first><last>Pekar</last></author>
       <author><first>Laura</first><last>Hasler</last></author>
       <title>A Comparison of Summarisation Methods Based on Term Specificity Estimation</title>
@@ -1772,7 +1772,7 @@
     </paper>
     <paper id="210">
       <author><first>Morena</first><last>Danieli</last></author>
-      <author><first>Juan María</first><last>Garrido</last></author>
+      <author id="juan-maria-garrido"><first>Juan María</first><last>Garrido</last></author>
       <author><first>Massimo</first><last>Moneglia</last></author>
       <author><first>Andrea</first><last>Panizza</last></author>
       <author><first>Silvia</first><last>Quazza</last></author>
@@ -1782,16 +1782,16 @@
       <bibkey>danieli-etal-2004-evaluation</bibkey>
     </paper>
     <paper id="211">
-      <author><first>Maja</first><last>Popović</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <title>Towards the Use of Word Stems and Suffixes for Statistical Machine Translation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/372.pdf</url>
       <bibkey>popovic-ney-2004-towards</bibkey>
     </paper>
     <paper id="212">
       <author><first>Matthias</first><last>Eck</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <title>Language Model Adaptation for Statistical Machine Translation Based on Information Retrieval</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/374.pdf</url>
       <bibkey>eck-etal-2004-language</bibkey>
@@ -1805,8 +1805,8 @@
     <paper id="214">
       <author><first>Carlos</first><last>Amaral</last></author>
       <author><first>Dominique</first><last>Laurent</last></author>
-      <author><first>André</first><last>Martins</last></author>
-      <author><first>Afonso</first><last>Mendes</last></author>
+      <author id="andre-f-t-martins"><first>André</first><last>Martins</last></author>
+      <author id="alfonso-mendes"><first>Afonso</first><last>Mendes</last></author>
       <author><first>Cláudia</first><last>Pinto</last></author>
       <title>Design and Implementation of a Semantic Search Engine for <fixed-case>P</fixed-case>ortuguese</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/378.pdf</url>
@@ -1814,31 +1814,31 @@
     </paper>
     <paper id="215">
       <author><first>Richard</first><last>Campbell</last></author>
-      <author><first>Eric</first><last>Ringger</last></author>
+      <author id="eric-ringger"><first>Eric</first><last>Ringger</last></author>
       <title>Converting Treebank Annotations to Language Neutral Syntax</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/380.pdf</url>
       <bibkey>campbell-ringger-2004-converting</bibkey>
     </paper>
     <paper id="216">
       <author><first>Yalina</first><last>Alphonse</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <title>Methodology For Building Thematic Indexes In Medicine For <fixed-case>F</fixed-case>rench</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/381.pdf</url>
       <bibkey>alphonse-bouillon-2004-methodology</bibkey>
     </paper>
     <paper id="217">
-      <author><first>Carmen</first><last>Garcia-Mateo</last></author>
-      <author><first>Javier</first><last>Dieguez-Tirado</last></author>
-      <author><first>Laura</first><last>Docio-Fernandez</last></author>
-      <author><first>Antonio</first><last>Cardenal-Lopez</last></author>
+      <author id="carmen-garcia-mateo"><first>Carmen</first><last>Garcia-Mateo</last></author>
+      <author id="javier-dieguez-tirado"><first>Javier</first><last>Dieguez-Tirado</last></author>
+      <author id="laura-docio-fernandez"><first>Laura</first><last>Docio-Fernandez</last></author>
+      <author id="antonio-cardenal"><first>Antonio</first><last>Cardenal-Lopez</last></author>
       <title><fixed-case>T</fixed-case>ranscrigal: A Bilingual System for Automatic Indexing of Broadcast News</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/382.pdf</url>
       <bibkey>garcia-mateo-etal-2004-transcrigal</bibkey>
     </paper>
     <paper id="218">
-      <author><first>Arantza</first><last>Díaz de Ilarraza</last></author>
+      <author id="arantza-diaz-de-ilarraza"><first>Arantza</first><last>Díaz de Ilarraza</last></author>
       <author><first>Aitzpea</first><last>Garmendia</last></author>
-      <author><first>Maite</first><last>Oronoz</last></author>
+      <author id="maite-oronoz"><first>Maite</first><last>Oronoz</last></author>
       <title><fixed-case>A</fixed-case>bar-<fixed-case>H</fixed-case>itz: An Annotation Tool for the <fixed-case>B</fixed-case>asque Dependency Treebank</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/383.pdf</url>
       <bibkey>diaz-de-ilarraza-etal-2004-abar</bibkey>
@@ -1852,15 +1852,15 @@
     </paper>
     <paper id="220">
       <author><first>Leo</first><last>Wanner</last></author>
-      <author><first>Margarita Alonso</first><last>Ramos</last></author>
-      <author><first>Antonia</first><last>Martí</last></author>
+      <author id="margarita-alonso-ramos"><first>Margarita Alonso</first><last>Ramos</last></author>
+      <author id="m-antonia-marti"><first>Antonia</first><last>Martí</last></author>
       <title>Enriching the <fixed-case>S</fixed-case>panish <fixed-case>E</fixed-case>uro<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et by Collocations</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/386.pdf</url>
       <bibkey>wanner-etal-2004-enriching</bibkey>
     </paper>
     <paper id="221">
-      <author><first>Charles J.</first><last>Fillmore</last></author>
-      <author><first>Collin F.</first><last>Baker</last></author>
+      <author id="charles-j-fillmore"><first>Charles J.</first><last>Fillmore</last></author>
+      <author id="collin-f-baker"><first>Collin F.</first><last>Baker</last></author>
       <author><first>Hiroaki</first><last>Sato</last></author>
       <title><fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et as a “Net”</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/388.pdf</url>
@@ -1869,7 +1869,7 @@
     <paper id="222">
       <author><first>Alfonso</first><last>Ortega</last></author>
       <author><first>Federico</first><last>Sukno</last></author>
-      <author><first>Eduardo</first><last>LLeida</last></author>
+      <author id="eduardo-lleida"><first>Eduardo</first><last>LLeida</last></author>
       <author><first>Alejandro</first><last>Frangi</last></author>
       <author><first>Antonio</first><last>Miguel</last></author>
       <author><first>Luis</first><last>Buera</last></author>
@@ -1879,10 +1879,10 @@
       <bibkey>ortega-etal-2004-av</bibkey>
     </paper>
     <paper id="223">
-      <author><first>Robert S.</first><last>Melvin</last></author>
+      <author id="robert-s-belvin"><first>Robert S.</first><last>Melvin</last></author>
       <author><first>Win</first><last>May</last></author>
-      <author><first>Shrikanth</first><last>Narayanan</last></author>
-      <author><first>Panayiotis</first><last>Georgiou</last></author>
+      <author id="shrikanth-narayanan"><first>Shrikanth</first><last>Narayanan</last></author>
+      <author id="panayiotis-georgiou"><first>Panayiotis</first><last>Georgiou</last></author>
       <author><first>Shadi</first><last>Ganjavi</last></author>
       <title>Creation of a Doctor-Patient Dialogue Corpus Using Standardized Patients</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/391.pdf</url>
@@ -1891,14 +1891,14 @@
     <paper id="224">
       <author><first>Brian</first><last>MacWhinney</last></author>
       <author><first>Steven</first><last>Bird</last></author>
-      <author><first>Christopher</first><last>Cieri</last></author>
+      <author id="christopher-cieri"><first>Christopher</first><last>Cieri</last></author>
       <author><first>Craig</first><last>Martell</last></author>
       <title><fixed-case>T</fixed-case>alkbank: Building an Open Unified Multimodal Database of Communicative Interaction</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/392.pdf</url>
       <bibkey>macwhinney-etal-2004-talkbank</bibkey>
     </paper>
     <paper id="225">
-      <author><first>Robert S.</first><last>Belvin</last></author>
+      <author id="robert-s-belvin"><first>Robert S.</first><last>Belvin</last></author>
       <author><first>Susanne</first><last>Riehemann</last></author>
       <author><first>Kristin</first><last>Precoda</last></author>
       <title>A Fine-Grained Evaluation Method for Speech-to-Speech Machine Translation Using Concept Annotations</title>
@@ -1906,17 +1906,17 @@
       <bibkey>belvin-etal-2004-fine</bibkey>
     </paper>
     <paper id="226">
-      <author><first>David M.</first><last>de Matos</last></author>
-      <author><first>Ricardo</first><last>Ribeiro</last></author>
-      <author><first>Nuno J.</first><last>Mamede</last></author>
+      <author id="david-martins-de-matos"><first>David M.</first><last>de Matos</last></author>
+      <author id="ricardo-ribeiro"><first>Ricardo</first><last>Ribeiro</last></author>
+      <author id="nuno-mamede"><first>Nuno J.</first><last>Mamede</last></author>
       <title>Rethinking Reusable Resources</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/395.pdf</url>
       <bibkey>de-matos-etal-2004-rethinking</bibkey>
     </paper>
     <paper id="227">
-      <author><first>Adam</first><last>Meyers</last></author>
+      <author id="adam-meyers"><first>Adam</first><last>Meyers</last></author>
       <author><first>Ruth</first><last>Reeves</last></author>
-      <author><first>Catherine</first><last>Macleod</last></author>
+      <author id="catherine-macleod"><first>Catherine</first><last>Macleod</last></author>
       <author><first>Rachel</first><last>Szekely</last></author>
       <author><first>Veronika</first><last>Zielinska</last></author>
       <author><first>Brian</first><last>Young</last></author>
@@ -1925,13 +1925,13 @@
       <bibkey>meyers-etal-2004-cross</bibkey>
     </paper>
     <paper id="228">
-      <author><first>Adam</first><last>Meyers</last></author>
+      <author id="adam-meyers"><first>Adam</first><last>Meyers</last></author>
       <author><first>Ruth</first><last>Reeves</last></author>
-      <author><first>Catherine</first><last>Macleod</last></author>
+      <author id="catherine-macleod"><first>Catherine</first><last>Macleod</last></author>
       <author><first>Rachel</first><last>Szekely</last></author>
       <author><first>Veronika</first><last>Zielinska</last></author>
       <author><first>Brian</first><last>Young</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <title>Annotating Noun Argument Structure for <fixed-case>N</fixed-case>om<fixed-case>B</fixed-case>ank</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/398.pdf</url>
       <bibkey>meyers-etal-2004-annotating</bibkey>
@@ -1970,7 +1970,7 @@
       <bibkey>palmer-etal-2004-utilization</bibkey>
     </paper>
     <paper id="233">
-      <author><last>Yoshida</last><first>Kyôsuke</first></author>
+      <author id="kyosuke-yoshida"><first>Kyôsuke</first><last>Yoshida</last></author>
       <author><last>Hashimoto</last><first>Taiichi</first></author>
       <author><last>Tokunaga</last><first>Takenobu</first></author>
       <author><last>Tanaka</last><first>Hozumi</first></author>
@@ -1982,7 +1982,7 @@
     <paper id="234">
       <author><last>Tokunaga</last><first>Takenobu</first></author>
       <author><last>Koyama</last><first>Tomofumi</first></author>
-      <author><last>Saito</last><first>Suguru</first></author>
+      <author id="suguru-saito"><first>Suguru</first><last>Saito</last></author>
       <author><last>Nakajima</last><first>Masayuki</first></author>
       <title>Classification of <fixed-case>J</fixed-case>apanese Spatial Nouns</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/405.pdf</url>
@@ -2001,25 +2001,25 @@
       <bibkey>sanfilippo-etal-2004-meaningful</bibkey>
     </paper>
     <paper id="236">
-      <author><first>V. Finley</first><last>Lacatusu</last></author>
-      <author><first>Steven J.</first><last>Maiorano</last></author>
-      <author><first>Sanda M.</first><last>Harabagiu</last></author>
+      <author id="finley-lacatusu"><first>V. Finley</first><last>Lacatusu</last></author>
+      <author id="steven-j-maiorano"><first>Steven J.</first><last>Maiorano</last></author>
+      <author id="sanda-harabagiu"><first>Sanda M.</first><last>Harabagiu</last></author>
       <title>Multi-Document Summarization Using Multiple-Sequence Alignment</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/408.pdf</url>
       <abstract>This paper describes a novel clustering-based text summarization system that uses Multiple Sequence Alignment to improve the alignment of sentences within topic clusters. While most current clustering-based summarization systems base their summaries only on the common information contained in a collection of highly-related sentences, our system constructs more informative summaries that incorporate both the redundant and unique contributions of the sentences in the cluster. When evaluated using ROUGE, the summaries produced by our system represent a substantial improvement over the baseline, which is at 63% of the human performance.</abstract>
       <bibkey>lacatusu-etal-2004-multi</bibkey>
     </paper>
     <paper id="237">
-      <author><first>Jahna</first><last>Otterbacher</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="jahna-otterbacher"><first>Jahna</first><last>Otterbacher</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <title><fixed-case>R</fixed-case>evision<fixed-case>B</fixed-case>ank: A Resource for Revision-based Multi-document Summarization and Evaluation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/409.pdf</url>
       <abstract>Multi-document summaries produced via sentence extraction often suffer from a number of cohesion problems, including dangling anaphora, sudden shifts in topic and incorrect or awkward chronological ordering. Therefore, the development of an automated revision process to correct such problems is a research area of current interest. We present the RevisionBank, a corpus of 240 extractive, multi-document summaries that have been manually revised to promote cohesion. The summaries were revised by six linguistic students using a constrained set of revision operations that we previously developed. In the current paper, we describe the process of developing a taxonomy of cohesion problems and corrective revision operators that address such problems, as well as an annotation schema for our corpus. Finally, we discuss how our taxonomy and corpus can be used for the study of revision-based multi-document summarization as well as for summary evaluation.</abstract>
       <bibkey>otterbacher-radev-2004-revisionbank</bibkey>
     </paper>
     <paper id="238">
-      <author><first>Sandra</first><last>Aluisio</last></author>
-      <author><first>Gisele Montilha</first><last>Pinheiro</last></author>
+      <author id="sandra-aluisio"><first>Sandra</first><last>Aluisio</last></author>
+      <author id="gisele-montilha-pinheiro"><first>Gisele Montilha</first><last>Pinheiro</last></author>
       <author><first>Aline M. P.</first><last>Manfrin</last></author>
       <author><first>Leandro H. M.</first><last>de Oliveira</last></author>
       <author><first>Luiz C.</first><last>Genoves, Jr.</last></author>
@@ -2030,8 +2030,8 @@
       <bibkey>aluisio-etal-2004-lacio</bibkey>
     </paper>
     <paper id="239">
-      <author><first>Dragomir</first><last>Radev</last></author>
-      <author><first>Jahna</first><last>Otterbacher</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
+      <author id="jahna-otterbacher"><first>Jahna</first><last>Otterbacher</last></author>
       <author><first>Zhu</first><last>Zhang</last></author>
       <title><fixed-case>CST</fixed-case> Bank: A Corpus for the Study of Cross-document Structural Relationships</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/411.pdf</url>
@@ -2065,14 +2065,14 @@
     </paper>
     <paper id="243">
       <author><first>Sun-Mee</first><last>Bae</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <title>Lexical Analysis of Agglutinative Languages Using a Dictionary of Lemmas and Lexical Transducers</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/417.pdf</url>
       <abstract>This paper presents a simple method for performing a lexical analysis of agglutinative languages like Korean, which have a heavy morphology. Especially, for nouns and adverbs with regular morphological modifications and/or high productivity, we do not need to artificially construct huge dictionaries of all inflected forms of lemmas. To construct a dictionary of lemmas and lexical transducers, first, we construct automatically a dictionary of all inflected forms from KAIST POS-Tagged Corpus. Secondly, we separate the party of lemmas and one of sequences of inflectional suffixes. Thirdly, we describe their lexical transducers (i.e., morphological rules) to recognize all inflected forms of lemmas for nouns and adverbs according to the combinatorial restrictions between lemmas and their inflectional suffixes. Finally, we evaluate the advantages of this method.</abstract>
       <bibkey>bae-choi-2004-lexical</bibkey>
     </paper>
     <paper id="244">
-      <author><first>Rita</first><last>Nüebel</last></author>
+      <author id="rita-nuebel"><first>Rita</first><last>Nüebel</last></author>
       <title>Evaluation and Adaptation of a Specialised Language Checking Tool for Non-specialised Machine Translation and Non-expert <fixed-case>MT</fixed-case> Users for Multi-lingual Telecooperation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/418.pdf</url>
       <abstract>Style guides or writing recommendations play an important role in the field of technical documentation production, e.g. in industrial contexts. Also, writing recommendations are used in technical contexts together with machine translation (MT) in order to circumvent the MT system's weaknesses. This paper describes the evaluation and adaptation of a language checker deployed in the project int.unity In this project, both MT and a specialised language checker were adapted to the requirements of non-expert users and a non-technical domain. The language technology was integrated with the groupware platform BSCW to support the multi-lingual communication of geographically distributed teams concerned with trade union work. The users' languages were either German or English, i.e. the users were monolingual. We chose linguatec's server version of Personal Translator 2004 MT system for the German&lt;-&gt;English translations. The language checker CLAT for German and English has been developed at IAI. It is used by technical authors to support the production of high-quality technical documentation. The CLAT core system was adapted and extended in order to match the new requirements imposed by both the user profile and the subsequent MT application. In this paper, the focus will be on the assessment and adaptation of style rules for German.</abstract>
@@ -2092,7 +2092,7 @@
       <bibkey>lavelli-etal-2004-critical</bibkey>
     </paper>
     <paper id="246">
-      <author><first>Jer</first><last>Hayes</last></author>
+      <author id="jer-hayes"><first>Jer</first><last>Hayes</last></author>
       <author><first>Tony</first><last>Veale</last></author>
       <author><first>Nuno</first><last>Seco</last></author>
       <title>Enriching <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Via Generative Metonymy and Creative Polysemy</title>
@@ -2103,8 +2103,8 @@
     <paper id="247">
       <author><first>Tom</first><last>Laureys</last></author>
       <author><first>Guy</first><last>De Pauw</last></author>
-      <author><first>Hugo</first><last>Van hamme</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="hugo-van-hamme"><first>Hugo</first><last>Van hamme</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <author><first>Dirk</first><last>Van Compernolle</last></author>
       <title>Evaluation and Adaptation of the Celex <fixed-case>D</fixed-case>utch Morphological Database</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/421.pdf</url>
@@ -2113,8 +2113,8 @@
     </paper>
     <paper id="248">
       <author><first>Li</first><last>Tang</last></author>
-      <author><first>Donghong</first><last>Ji</last></author>
-      <author><first>Lingpeng</first><last>Yang</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
+      <author id="lingpeng-yang"><first>Lingpeng</first><last>Yang</last></author>
       <author><first>Yu</first><last>Nie</last></author>
       <title>A Model of Semantic Representations Analysis for <fixed-case>C</fixed-case>hinese Sentences</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/422.pdf</url>
@@ -2137,7 +2137,7 @@
       <bibkey>quirk-2004-training</bibkey>
     </paper>
     <paper id="251">
-      <author><first>Sonja E.</first><last>Bosch</last></author>
+      <author id="sonja-bosch"><first>Sonja E.</first><last>Bosch</last></author>
       <author><first>Laurette</first><last>Pretorius</last></author>
       <title>Software Tools for Morphological Tagging of <fixed-case>Z</fixed-case>ulu Corpora and Lexicon Development</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/427.pdf</url>
@@ -2187,7 +2187,7 @@
       <bibkey>charoenporn-etal-2004-open</bibkey>
     </paper>
     <paper id="257">
-      <author><first>Lambros</first><last>Kranias</last></author>
+      <author id="lambros-cranias"><first>Lambros</first><last>Kranias</last></author>
       <author><first>Anna</first><last>Samiotou</last></author>
       <title>Automatic Translation Memory Fuzzy Match Post-Editing: A Step Beyond Traditional <fixed-case>TM</fixed-case>/<fixed-case>MT</fixed-case> Integration</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/435.pdf</url>
@@ -2216,11 +2216,11 @@
       <bibkey>novak-etal-2004-combining</bibkey>
     </paper>
     <paper id="260">
-      <author><first>Balázs</first><last>Kis</last></author>
-      <author><first>Begoña</first><last>Villada</last></author>
+      <author id="balazs-kis"><first>Balázs</first><last>Kis</last></author>
+      <author id="begona-villada-moiron"><first>Begoña</first><last>Villada</last></author>
       <author><first>Gosse</first><last>Bouma</last></author>
       <author><first>Gábor</first><last>Ugray</last></author>
-      <author><first>Tamás</first><last>Bíró</last></author>
+      <author id="tamas-biro"><first>Tamás</first><last>Bíró</last></author>
       <author><first>Gábor</first><last>Pohl</last></author>
       <author><first>John</first><last>Nerbonne</last></author>
       <title>A New Approach to the Corpus-based Statistical Investigation of <fixed-case>H</fixed-case>ungarian Multi-word Lexemes</title>
@@ -2228,7 +2228,7 @@
       <bibkey>kis-etal-2004-new</bibkey>
     </paper>
     <paper id="261">
-      <author><first>M. Begoña Villada</first><last>Moirón</last></author>
+      <author id="begona-villada-moiron"><first>M. Begoña Villada</first><last>Moirón</last></author>
       <title>Discarding Noise in an Automatically Acquired Lexicon of Support verb Constructions</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/442.pdf</url>
       <abstract>We applied data-driven methods to carry out automatic acquisition of Dutch prepositional support verb constructions (SVCs) in corpora (e.g., iets in de gaten houden (``keep an eye on something'')). This paper addresses the question whether linguistic diagnostics help to discard noise from the nbest lists and how to (semi-)automatically apply such linguistic diagnostics to parsed corpora. We show that some of the linguistic diagnostics proposed in Hollebrandse (1993) effectively identify SVCs and contribute a modest error rate decrease.</abstract>
@@ -2236,7 +2236,7 @@
     </paper>
     <paper id="262">
       <author><first>Francisco</first><last>Nevado</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <author><first>Josu</first><last>Landa</last></author>
       <title>Translation Memories Enrichment by Statistical Bilingual Segmentation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/443.pdf</url>
@@ -2264,7 +2264,7 @@
     </paper>
     <paper id="265">
       <author><first>Nelleke</first><last>Oostdijk</last></author>
-      <author><first>Lou</first><last>Boves</last></author>
+      <author id="lou-boves"><first>Lou</first><last>Boves</last></author>
       <title>Using Large Multi-purpose Corpora for Specific Research Questions: Discourse Phenomena Related to Wh-questions in the Spoken <fixed-case>D</fixed-case>utch Corpus</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/449.pdf</url>
       <abstract>In this paper, we investigate whether a dataset derived from a multi-purpose corpus such as the Spoken Dutch Corpus may be considered appropriate for developing a taxonomy of wh-questions, and a model of the way in which these questions are integrated in spoken discourse. We compare the results obtained from the Spoken Dutch Corpus with a similar analysis of a large random collection of FAQs from the internet. We find substantial differences between the questions in spoken discourse and FAQs. Therefore, it may not be trivial to use a general purpose corpus as a starting point for developing models for human-computer interaction.</abstract>
@@ -2279,24 +2279,24 @@
       <bibkey>mariani-badii-2004-methods</bibkey>
     </paper>
     <paper id="267">
-      <author><first>Peter</first><last>Wittenburg</last></author>
+      <author id="peter-wittenburg"><first>Peter</first><last>Wittenburg</last></author>
       <author><first>Heidi</first><last>Johnson</last></author>
       <author><first>Markus</first><last>Buchhorn</last></author>
-      <author><first>Hennie</first><last>Brugman</last></author>
-      <author><first>Daan</first><last>Broeder</last></author>
+      <author id="hennie-brugman"><first>Hennie</first><last>Brugman</last></author>
+      <author id="daan-broeder"><first>Daan</first><last>Broeder</last></author>
       <title>Architecture for Distributed Language Resource Management and Archiving</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/451.pdf</url>
       <abstract>An architecture is presented that provides an integrated framework for managing, archiving and accessing language resources. This architecture was discussed in the DELAMAN network – a world-wide network of archives holding material about endangered languages. Such a framework will be built upon a metadata infrastructure, a mechanism to resolve unique resource identifiers, user and access rights management components. These components are closely related and have to be based on redundant and distributed services. For all these components existing middleware seems to be available, however, it has to be checked how they can interact with each other.</abstract>
       <bibkey>wittenburg-etal-2004-architecture</bibkey>
     </paper>
     <paper id="268">
-      <author><first>Hanne</first><last>Fersøe</last></author>
+      <author id="hanne-fersoe"><first>Hanne</first><last>Fersøe</last></author>
       <author><first>Elviira</first><last>Hartikainen</last></author>
-      <author><first>Henk</first><last>van den Heuvel</last></author>
+      <author id="henk-van-den-heuvel"><first>Henk</first><last>van den Heuvel</last></author>
       <author><first>Giulio</first><last>Maltese</last></author>
-      <author><first>Asuncíon</first><last>Moreno</last></author>
+      <author id="asuncion-moreno"><first>Asuncíon</first><last>Moreno</last></author>
       <author><first>Shaunie</first><last>Shammass</last></author>
-      <author><first>Ute</first><last>Ziegenhain</last></author>
+      <author id="ute-ziegenhain"><first>Ute</first><last>Ziegenhain</last></author>
       <title>Creation and Validation of Large Lexica for Speech-to-Speech Translation Purposes</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/452.pdf</url>
       <abstract>This paper presents specifications and requirements for creation and validation of large lexica that are needed in automatic Speech Recognition (ASR), Text-to-Speech (TTS) and statistical Speech-to-Speech Translation (SST) systems. The prepared language resources are created and validated within the scope of the EU-project LC-STAR (Lexica and Corpora for Speech-to-Speech Translation Components) during years 2002-2005. Large lexica consisting of phonetic, suprasegmental and morpho-syntactic content will be provided with well-documented specifications for 13 languages. A short summary of the LC-STAR project itself is presented. Overview about the specification for the corpora collection and word extraction as well as the specification and format of the lexica are presented. Particular attention is paid to the validation of the produced lexica and the lessons learnt during pre-validation. The created and validated language resources will be available via ELRA/ELDA.</abstract>
@@ -2313,8 +2313,8 @@
     <paper id="270">
       <author><first>Panagiotis</first><last>Zervas</last></author>
       <author><first>Manolis</first><last>Maragoudakis</last></author>
-      <author><first>Nikos</first><last>Fakotakis</last></author>
-      <author><first>George</first><last>Kokkinakis</last></author>
+      <author id="nikos-fakotakis"><first>Nikos</first><last>Fakotakis</last></author>
+      <author id="george-kokkinakis"><first>George</first><last>Kokkinakis</last></author>
       <title>Learning to Predict Pitch Accents Using <fixed-case>B</fixed-case>ayesian Belief Networks for <fixed-case>G</fixed-case>reek Language</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/458.pdf</url>
       <bibkey>zervas-etal-2004-learning</bibkey>
@@ -2328,10 +2328,10 @@
       <bibkey>more-etal-2004-grammar</bibkey>
     </paper>
     <paper id="272">
-      <author><first>Peter</first><last>Wittenburg</last></author>
-      <author><first>Greg</first><last>Gulrajani</last></author>
-      <author><first>Daan</first><last>Broeder</last></author>
-      <author><first>Marcus</first><last>Uneson</last></author>
+      <author id="peter-wittenburg"><first>Peter</first><last>Wittenburg</last></author>
+      <author id="greg-gul-rajani"><first>Greg</first><last>Gulrajani</last></author>
+      <author id="daan-broeder"><first>Daan</first><last>Broeder</last></author>
+      <author id="marcus-uneson"><first>Marcus</first><last>Uneson</last></author>
       <title>Cross-Disciplinary Integration of Metadata Descriptions</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/462.pdf</url>
       <bibkey>wittenburg-etal-2004-cross</bibkey>
@@ -2374,10 +2374,10 @@
       <bibkey>gevaudan-wiebel-2004-dynamic</bibkey>
     </paper>
     <paper id="278">
-      <author><first>Laura</first><last>Alonso</last></author>
-      <author><first>Maria</first><last>Fuentes</last></author>
+      <author id="laura-alonso-alemany"><first>Laura</first><last>Alonso</last></author>
+      <author id="maria-fuentes"><first>Maria</first><last>Fuentes</last></author>
       <author><first>Marc</first><last>Massot</last></author>
-      <author><first>Horacio</first><last>Rodríguez</last></author>
+      <author id="horacio-rodriguez"><first>Horacio</first><last>Rodríguez</last></author>
       <title>Re-using High-quality Resources for Continued Evaluation of Automated Summarization Systems</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/472.pdf</url>
       <bibkey>alonso-etal-2004-using</bibkey>
@@ -2389,18 +2389,18 @@
       <bibkey>rossler-2004-corpus</bibkey>
     </paper>
     <paper id="280">
-      <author><first>Hennie</first><last>Brugman</last></author>
+      <author id="hennie-brugman"><first>Hennie</first><last>Brugman</last></author>
       <author><first>Onno</first><last>Crasborn</last></author>
-      <author><first>Albert</first><last>Russel</last></author>
+      <author id="albert-russel"><first>Albert</first><last>Russel</last></author>
       <title>Collaborative Annotation of Sign Language Data with Peer-to-Peer Technology</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/473.pdf</url>
       <bibkey>brugman-etal-2004-collaborative</bibkey>
     </paper>
     <paper id="281">
-      <author><first>Glòria</first><last>Vázquez</last></author>
-      <author><first>Ana Fernández</first><last>Montraveta</last></author>
-      <author><first>Irene</first><last>Castellón</last></author>
-      <author><first>Laura</first><last>Alonso</last></author>
+      <author id="gloria-vazquez"><first>Glòria</first><last>Vázquez</last></author>
+      <author id="ana-fernandez"><first>Ana Fernández</first><last>Montraveta</last></author>
+      <author id="irene-castellon"><first>Irene</first><last>Castellón</last></author>
+      <author id="laura-alonso-alemany"><first>Laura</first><last>Alonso</last></author>
       <title>Semantic Categorization of <fixed-case>S</fixed-case>panish Se-constructions</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/474.pdf</url>
       <bibkey>vazquez-etal-2004-semantic</bibkey>
@@ -2408,22 +2408,22 @@
     <paper id="282">
       <author><first>Angelo</first><last>Dalli</last></author>
       <author><first>Valentin</first><last>Tablan</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
-      <author><first>Yorick</first><last>Wilks</last></author>
-      <author><first>Daan</first><last>Broeder</last></author>
-      <author><first>Hennie</first><last>Brugman</last></author>
-      <author><first>Peter</first><last>Wittenburg</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
+      <author id="daan-broeder"><first>Daan</first><last>Broeder</last></author>
+      <author id="hennie-brugman"><first>Hennie</first><last>Brugman</last></author>
+      <author id="peter-wittenburg"><first>Peter</first><last>Wittenburg</last></author>
       <title>Web Services Architecture for Language Resources</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/475.pdf</url>
       <bibkey>dalli-etal-2004-web</bibkey>
     </paper>
     <paper id="283">
-      <author><first>Daan</first><last>Broeder</last></author>
+      <author id="daan-broeder"><first>Daan</first><last>Broeder</last></author>
       <author><first>Thierry</first><last>Declerck</last></author>
-      <author><first>Laurent</first><last>Romary</last></author>
-      <author><first>Markus</first><last>Uneson</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
+      <author id="marcus-uneson"><first>Markus</first><last>Uneson</last></author>
       <author><first>Sven</first><last>Strömqvist</last></author>
-      <author><first>Peter</first><last>Wittenburg</last></author>
+      <author id="peter-wittenburg"><first>Peter</first><last>Wittenburg</last></author>
       <title>A Large Metadata Domain of Language Resources</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/478.pdf</url>
       <bibkey>broeder-etal-2004-large</bibkey>
@@ -2431,44 +2431,44 @@
     <paper id="284">
       <author><first>Tamás</first><last>Gröbler</last></author>
       <author><first>Gábor</first><last>Hodász</last></author>
-      <author><first>Balázs</first><last>Kis</last></author>
+      <author id="balazs-kis"><first>Balázs</first><last>Kis</last></author>
       <title><fixed-case>M</fixed-case>eta<fixed-case>M</fixed-case>orpho <fixed-case>TM</fixed-case>: A Rule-Based Translation Corpus</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/479.pdf</url>
       <bibkey>grobler-etal-2004-metamorpho</bibkey>
     </paper>
     <paper id="285">
-      <author><first>Hennie</first><last>Brugman</last></author>
-      <author><first>Albert</first><last>Russel</last></author>
+      <author id="hennie-brugman"><first>Hennie</first><last>Brugman</last></author>
+      <author id="albert-russel"><first>Albert</first><last>Russel</last></author>
       <title>Annotating Multi-media/Multi-modal Resources with <fixed-case>ELAN</fixed-case></title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/480.pdf</url>
       <bibkey>brugman-russel-2004-annotating</bibkey>
     </paper>
     <paper id="286">
-      <author><first>Agnès</first><last>Tutin</last></author>
+      <author id="agnes-tutin"><first>Agnès</first><last>Tutin</last></author>
       <author><first>Meriam</first><last>Haddara</last></author>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
-      <author><first>Constantin</first><last>Orasan</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orasan</last></author>
       <title>Annotation of Anaphoric Expressions in an Aligned Bilingual Corpus</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/482.pdf</url>
       <bibkey>tutin-etal-2004-annotation</bibkey>
     </paper>
     <paper id="287">
       <author><first>Tylman</first><last>Ule</last></author>
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <title>Unexpected Productions May Well be Errors</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/483.pdf</url>
       <bibkey>ule-simov-2004-unexpected</bibkey>
     </paper>
     <paper id="288">
       <author><first>Avik</first><last>Sarkar</last></author>
-      <author><first>Anne</first><last>De Roeck</last></author>
+      <author id="anne-de-roeck"><first>Anne</first><last>De Roeck</last></author>
       <title>A Framework for Evaluating the Suitability of Non-<fixed-case>E</fixed-case>nglish Corpora for Language Engineering</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/485.pdf</url>
       <bibkey>sarkar-de-roeck-2004-framework</bibkey>
     </paper>
     <paper id="289">
       <author><first>Anna</first><last>Samiotou</last></author>
-      <author><first>Lambros</first><last>Kranias</last></author>
+      <author id="lambros-cranias"><first>Lambros</first><last>Kranias</last></author>
       <author><first>Dimitrios</first><last>Kokkinakis</last></author>
       <title>Intelligent Building of Language Resources for <fixed-case>HLT</fixed-case> Applications</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/486.pdf</url>
@@ -2483,18 +2483,18 @@
       <bibkey>akiba-etal-2004-collecting</bibkey>
     </paper>
     <paper id="291">
-      <author><first>Hristo</first><last>Tanev</last></author>
+      <author id="hristo-tanev"><first>Hristo</first><last>Tanev</last></author>
       <author><first>Milen</first><last>Kouylekov</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Bonaventura</first><last>Coppola</last></author>
-      <author><first>Bernardo</first><last>Magnini</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
       <title>Multilingual Pattern Libraries for Question Answering: a Case Study for Definition Questions</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/488.pdf</url>
       <bibkey>tanev-etal-2004-multilingual</bibkey>
     </paper>
     <paper id="292">
       <author><first>Michael</first><last>Daum</last></author>
-      <author><first>Kilian A.</first><last>Foth</last></author>
+      <author id="kilian-a-foth"><first>Kilian A.</first><last>Foth</last></author>
       <author><first>Wolfgang</first><last>Menzel</last></author>
       <title>Automatic Transformation of Phrase Treebanks to Dependency Trees</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/489.pdf</url>
@@ -2528,7 +2528,7 @@
       <bibkey>slavcheva-2004-verb</bibkey>
     </paper>
     <paper id="297">
-      <author><first>Walter</first><last>Kasper</last></author>
+      <author id="walter-kasper"><first>Walter</first><last>Kasper</last></author>
       <author><first>Jörg</first><last>Steffen</last></author>
       <author><first>Jakub</first><last>Piskorski</last></author>
       <author><first>Paul</first><last>Buitelaar</last></author>
@@ -2540,14 +2540,14 @@
       <author><first>S.R.</first><last>Deepa</last></author>
       <author><first>Kalika</first><last>Bali</last></author>
       <author><first>A.G.</first><last>Ramakrishnan</last></author>
-      <author><first>Partha Pratim</first><last>Talukdar</last></author>
+      <author id="partha-talukdar"><first>Partha Pratim</first><last>Talukdar</last></author>
       <title>Automatic Generation of Compound Word Lexicon for <fixed-case>H</fixed-case>indi Speech Synthesis</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/501.pdf</url>
       <bibkey>deepa-etal-2004-automatic</bibkey>
     </paper>
     <paper id="299">
       <author><first>Saif</first><last>Ahmad</last></author>
-      <author><first>Paulo C. F.</first><last>de Oliveira</last></author>
+      <author id="paulo-c-f-de-oliveira"><first>Paulo C. F.</first><last>de Oliveira</last></author>
       <author><first>Khurshid</first><last>Ahmad</last></author>
       <title>Summarization of Multimodal Information</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/502.pdf</url>
@@ -2564,7 +2564,7 @@
       <author><first>Syd</first><last>Bauman</last></author>
       <author><first>Alejandro</first><last>Bia</last></author>
       <author><first>Lou</first><last>Burnard</last></author>
-      <author><first>Tomaž</first><last>Erjavec</last></author>
+      <author id="tomaz-erjavec"><first>Tomaž</first><last>Erjavec</last></author>
       <author><first>Christine</first><last>Ruotolo</last></author>
       <author><first>Susan</first><last>Schreibman</last></author>
       <title>Migrating Language Resources from <fixed-case>SGML</fixed-case> to <fixed-case>XML</fixed-case>: The Text Encoding Initiative Recommendations</title>
@@ -2572,16 +2572,16 @@
       <bibkey>bauman-etal-2004-migrating</bibkey>
     </paper>
     <paper id="302">
-      <author><first>Niels Ole</first><last>Bernsen</last></author>
-      <author><first>Laila</first><last>Dybkjær</last></author>
+      <author id="niels-ole-bernsen"><first>Niels Ole</first><last>Bernsen</last></author>
+      <author id="laila-dybkjaer"><first>Laila</first><last>Dybkjær</last></author>
       <author><first>Svend</first><last>Kiilerich</last></author>
       <title>Evaluating Conversation with Hans Christian Andersen</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/505.pdf</url>
       <bibkey>bernsen-etal-2004-evaluating</bibkey>
     </paper>
     <paper id="303">
-      <author><first>Catia</first><last>Cucchiarini</last></author>
-      <author><first>Elisabeth</first><last>D’Halleweyn</last></author>
+      <author id="catia-cucchiarini"><first>Catia</first><last>Cucchiarini</last></author>
+      <author id="elisabeth-dhalleweyn"><first>Elisabeth</first><last>D’Halleweyn</last></author>
       <title>The New <fixed-case>D</fixed-case>utch-<fixed-case>F</fixed-case>lemish <fixed-case>HLT</fixed-case> Programme: a Concerted Effort to Stimulate the <fixed-case>HLT</fixed-case> Sector</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/506.pdf</url>
       <bibkey>cucchiarini-dhalleweyn-2004-new</bibkey>
@@ -2596,14 +2596,14 @@
     <paper id="305">
       <author><first>Rachel</first><last>Aires</last></author>
       <author><first>Aline</first><last>Manfrin</last></author>
-      <author><first>Sandra</first><last>Aluísio</last></author>
+      <author id="sandra-aluisio"><first>Sandra</first><last>Aluísio</last></author>
       <author><first>Diana</first><last>Santos</last></author>
       <title>What is my Style? Using Stylistic Features of <fixed-case>P</fixed-case>ortuguese Web Texts to Classify Web Pages According to Users’ Needs</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/508.pdf</url>
       <bibkey>aires-etal-2004-style</bibkey>
     </paper>
     <paper id="306">
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <author><first>Silvia</first><last>Bernardini</last></author>
       <title><fixed-case>B</fixed-case>oot<fixed-case>C</fixed-case>a<fixed-case>T</fixed-case>: Bootstrapping Corpora and Terms from the Web</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/509.pdf</url>
@@ -2622,8 +2622,8 @@
       <bibkey>barbu-2004-word</bibkey>
     </paper>
     <paper id="309">
-      <author><first>Daan</first><last>Broeder</last></author>
-      <author><first>Peter</first><last>Wittenburg</last></author>
+      <author id="daan-broeder"><first>Daan</first><last>Broeder</last></author>
+      <author id="peter-wittenburg"><first>Peter</first><last>Wittenburg</last></author>
       <author><first>Onno</first><last>Crasborn</last></author>
       <title>Using Profiles for <fixed-case>IMDI</fixed-case> Metadata Creation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/513.pdf</url>
@@ -2636,25 +2636,25 @@
       <bibkey>morth-2004-rethinking</bibkey>
     </paper>
     <paper id="311">
-      <author><first>Daniel</first><last>Ferrés</last></author>
+      <author id="daniel-ferres"><first>Daniel</first><last>Ferrés</last></author>
       <author><first>Marc</first><last>Massot</last></author>
-      <author><first>Muntsa</first><last>Padró</last></author>
-      <author><first>Horacio</first><last>Rodríguez</last></author>
-      <author><first>Jordi</first><last>Turmo</last></author>
+      <author id="muntsa-padro"><first>Muntsa</first><last>Padró</last></author>
+      <author id="horacio-rodriguez"><first>Horacio</first><last>Rodríguez</last></author>
+      <author id="jordi-turmo"><first>Jordi</first><last>Turmo</last></author>
       <title>Automatic Building Gazetteers of Co-referring Named Entities</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/516.pdf</url>
       <bibkey>ferres-etal-2004-automatic-building</bibkey>
     </paper>
     <paper id="312">
       <author><first>Nilda</first><last>Ruimy</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Bruno</first><last>Cartoni</last></author>
       <title>Semi-Automatic Derivation of a <fixed-case>F</fixed-case>rench Lexicon from <fixed-case>CLIPS</fixed-case></title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/517.pdf</url>
       <bibkey>ruimy-etal-2004-semi</bibkey>
     </paper>
     <paper id="313">
-      <author><first>Nancy</first><last>Ide</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
       <author><first>Keith</first><last>Suderman</last></author>
       <title>The <fixed-case>A</fixed-case>merican National Corpus First Release</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/518.pdf</url>
@@ -2669,22 +2669,22 @@
       <bibkey>evert-etal-2004-identifying</bibkey>
     </paper>
     <paper id="315">
-      <author><first>Laila</first><last>Dybkjær</last></author>
-      <author><first>Niels Ole</first><last>Bernse</last></author>
+      <author id="laila-dybkjaer"><first>Laila</first><last>Dybkjær</last></author>
+      <author id="niels-ole-bernsen"><first>Niels Ole</first><last>Bernse</last></author>
       <title>Towards General-Purpose Annotation Tools – How Far Are We Today?</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/520.pdf</url>
       <bibkey>dybkjaer-bernse-2004-towards</bibkey>
     </paper>
     <paper id="316">
-      <author><first>Uwe D.</first><last>Reichel</last></author>
+      <author id="uwe-reichel"><first>Uwe D.</first><last>Reichel</last></author>
       <author><first>Karl</first><last>Weilhammer</last></author>
       <title>Automated Morphological Segmentation and Evaluation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/521.pdf</url>
       <bibkey>reichel-weilhammer-2004-automated</bibkey>
     </paper>
     <paper id="317">
-      <author><first>Nancy</first><last>Ide</last></author>
-      <author><first>Laurent</first><last>Romary</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
       <title>A Registry of Standard Data Categories for Linguistic Annotation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/522.pdf</url>
       <bibkey>ide-romary-2004-registry</bibkey>
@@ -2705,7 +2705,7 @@
       <bibkey>marinelli-etal-2004-building</bibkey>
     </paper>
     <paper id="320">
-      <author><first>Péter</first><last>Halácsy</last></author>
+      <author id="peter-halacsy"><first>Péter</first><last>Halácsy</last></author>
       <author><first>András</first><last>Kornai</last></author>
       <author><first>László</first><last>Németh</last></author>
       <author><first>András</first><last>Rung</last></author>
@@ -2724,8 +2724,8 @@
       <bibkey>fujii-etal-2004-test</bibkey>
     </paper>
     <paper id="322">
-      <author><first>Yuka</first><last>Tateisi</last></author>
-      <author><first>Jun-ichi</first><last>Tsujii</last></author>
+      <author id="yuka-tateisi"><first>Yuka</first><last>Tateisi</last></author>
+      <author id="junichi-tsujii"><first>Jun-ichi</first><last>Tsujii</last></author>
       <title>Part-of-Speech Annotation of Biology Research Abstracts</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/528.pdf</url>
       <bibkey>tateisi-tsujii-2004-part</bibkey>
@@ -2733,7 +2733,7 @@
     <paper id="323">
       <author><first>Božo</first><last>Bekavac</last></author>
       <author><first>Petya</first><last>Osenova</last></author>
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <author><first>Marko</first><last>Tadić</last></author>
       <title>Making Monolingual Corpora Comparable: a Case Study of <fixed-case>B</fixed-case>ulgarian and <fixed-case>C</fixed-case>roatian</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/529.pdf</url>
@@ -2742,21 +2742,21 @@
     <paper id="324">
       <author><first>Lina</first><last>Henriksen</last></author>
       <author><first>Bart</first><last>Jongejan</last></author>
-      <author><first>Bente</first><last>Maegaard</last></author>
+      <author id="bente-maegaard"><first>Bente</first><last>Maegaard</last></author>
       <title>Corporate Voice, Tone of Voice and Controlled Language Techniques</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/530.pdf</url>
       <bibkey>henriksen-etal-2004-corporate</bibkey>
     </paper>
     <paper id="325">
-      <author><first>Nikos</first><last>Fakotakis</last></author>
+      <author id="nikos-fakotakis"><first>Nikos</first><last>Fakotakis</last></author>
       <title><fixed-case>C</fixed-case>ypriot Speech Database: Data Collection and <fixed-case>G</fixed-case>reek to <fixed-case>C</fixed-case>ypriot Dialect Adaptation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/531.pdf</url>
       <bibkey>fakotakis-2004-cypriot</bibkey>
     </paper>
     <paper id="326">
-      <author><first>Borja</first><last>Navarro</last></author>
-      <author><first>Manuel</first><last>Palomar</last></author>
-      <author><first>Patricio</first><last>Martínez-Barco</last></author>
+      <author id="borja-navarro"><first>Borja</first><last>Navarro</last></author>
+      <author id="manuel-palomar"><first>Manuel</first><last>Palomar</last></author>
+      <author id="patricio-martinez-barco"><first>Patricio</first><last>Martínez-Barco</last></author>
       <title>Automatic Extraction of Syntactic Semantic Patterns for Multilingual Resources</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/532.pdf</url>
       <bibkey>navarro-etal-2004-automatic</bibkey>
@@ -2771,8 +2771,8 @@
     </paper>
     <paper id="328">
       <author><first>Viktor</first><last>Pekar</last></author>
-      <author><first>Richard</first><last>Evans</last></author>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="richard-evans"><first>Richard</first><last>Evans</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <title>Categorizing Web Pages as a Preprocessing Step for Information Extraction</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/534.pdf</url>
       <bibkey>pekar-etal-2004-categorizing</bibkey>
@@ -2785,7 +2785,7 @@
     </paper>
     <paper id="330">
       <author><first>Manuela</first><last>Kunze</last></author>
-      <author><first>Dietmar</first><last>Rösner</last></author>
+      <author id="dietmar-rosner"><first>Dietmar</first><last>Rösner</last></author>
       <title>Corpus Based Enrichment of <fixed-case>G</fixed-case>erma<fixed-case>N</fixed-case>et Verb Frames</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/536.pdf</url>
       <bibkey>kunze-rosner-2004-corpus</bibkey>
@@ -2800,31 +2800,31 @@
     <paper id="332">
       <author><first>Thierry</first><last>Declerck</last></author>
       <author><first>Paul</first><last>Buitelaar</last></author>
-      <author><first>Nicoletta</first><last>Calzolari</last></author>
+      <author id="nicoletta-calzolari"><first>Nicoletta</first><last>Calzolari</last></author>
       <author><first>Alessandro</first><last>Lenci</last></author>
       <title>Towards a Language Infrastructure for the Semantic Web</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/541.pdf</url>
       <bibkey>declerck-etal-2004-towards</bibkey>
     </paper>
     <paper id="333">
-      <author><first>Alvin</first><last>Martin</last></author>
+      <author id="alvin-martin"><first>Alvin</first><last>Martin</last></author>
       <author><first>David</first><last>Miller</last></author>
-      <author><first>Mark</first><last>Przybocki</last></author>
-      <author><first>Joseph</first><last>Campbell</last></author>
+      <author id="mark-przybocki"><first>Mark</first><last>Przybocki</last></author>
+      <author id="joseph-p-campbell"><first>Joseph</first><last>Campbell</last></author>
       <author><first>Hirotaka</first><last>Nakasone</last></author>
       <title>Conversational Telephone Speech Corpus Collection for the <fixed-case>NIST</fixed-case> Speaker Recognition Evaluation 2004</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/542.pdf</url>
       <bibkey>martin-etal-2004-conversational</bibkey>
     </paper>
     <paper id="334">
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <author><first>Christian</first><last>Monson</last></author>
       <title>Augmenting Manual Dictionaries for Statistical Machine Translation Systems</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/543.pdf</url>
       <bibkey>vogel-monson-2004-augmenting</bibkey>
     </paper>
     <paper id="335">
-      <author><first>Christian</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Christian</first><last>Biemann</last></author>
       <author><first>Uwe</first><last>Quasthoff</last></author>
       <author><first>Christian</first><last>Wolff</last></author>
       <title>Linguistic Corpus Search</title>
@@ -2832,16 +2832,16 @@
       <bibkey>biemann-etal-2004-linguistic</bibkey>
     </paper>
     <paper id="336">
-      <author><first>Nicoletta</first><last>Calzolari</last></author>
-      <author><first>Khalid</first><last>Choukri</last></author>
-      <author><first>Maria</first><last>Gavrilidou</last></author>
-      <author><first>Bente</first><last>Maegaard</last></author>
+      <author id="nicoletta-calzolari"><first>Nicoletta</first><last>Calzolari</last></author>
+      <author id="khalid-choukri"><first>Khalid</first><last>Choukri</last></author>
+      <author id="maria-gavrilidou"><first>Maria</first><last>Gavrilidou</last></author>
+      <author id="bente-maegaard"><first>Bente</first><last>Maegaard</last></author>
       <author><first>Paola</first><last>Baroni</last></author>
-      <author><first>Hanne</first><last>Fersøe</last></author>
+      <author id="hanne-fersoe"><first>Hanne</first><last>Fersøe</last></author>
       <author><first>Alessandro</first><last>Lenci</last></author>
-      <author><first>Valérie</first><last>Mapelli</last></author>
+      <author id="valerie-mapelli"><first>Valérie</first><last>Mapelli</last></author>
       <author><first>Monica</first><last>Monachini</last></author>
-      <author><first>Stelios</first><last>Piperidis</last></author>
+      <author id="stelios-piperidis"><first>Stelios</first><last>Piperidis</last></author>
       <title><fixed-case>ENABLER</fixed-case> Thematic Network of National Projects: Technical, Strategic and Political Issues of <fixed-case>LR</fixed-case>s</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/545.pdf</url>
       <bibkey>calzolari-etal-2004-enabler</bibkey>
@@ -2861,7 +2861,7 @@
       <author><first>Martin</first><last>Volk</last></author>
       <author><first>Dominic</first><last>Widdows</last></author>
       <author><first>Bogdan</first><last>Sacaleanu</last></author>
-      <author><first>Špela</first><last>Vintar</last></author>
+      <author id="spela-vintar"><first>Špela</first><last>Vintar</last></author>
       <author><first>Stanley</first><last>Peters</last></author>
       <author><first>Hans</first><last>Uszkoreit</last></author>
       <title>Evaluation Resources for Concept-based Cross-Lingual Information Retrieval in the Medical Domain</title>
@@ -2869,7 +2869,7 @@
       <bibkey>buitelaar-etal-2004-evaluation</bibkey>
     </paper>
     <paper id="339">
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <author><first>Stefan</first><last>Bordag</last></author>
       <author><first>Uwe</first><last>Quasthoff</last></author>
       <title>Automatic Acquisition of Paradigmatic Relations Using Iterated Co-occurrences</title>
@@ -2891,21 +2891,21 @@
       <author><first>Dorota</first><last>Iskra</last></author>
       <author><first>Rainer</first><last>Siemund</last></author>
       <author><first>Jamal</first><last>Borno</last></author>
-      <author><first>Asuncion</first><last>Moreno</last></author>
+      <author id="asuncion-moreno"><first>Asuncion</first><last>Moreno</last></author>
       <author><first>Ossama</first><last>Emam</last></author>
-      <author><first>Khalid</first><last>Choukri</last></author>
+      <author id="khalid-choukri"><first>Khalid</first><last>Choukri</last></author>
       <author><first>Oren</first><last>Gedge</last></author>
       <author><first>Herbert</first><last>Tropf</last></author>
-      <author><first>Albino</first><last>Nogueiras</last></author>
+      <author id="albino-nogueiras"><first>Albino</first><last>Nogueiras</last></author>
       <author><first>Imed</first><last>Zitouni</last></author>
       <author><first>Anastasios</first><last>Tsopanoglou</last></author>
-      <author><first>Nikos</first><last>Fakotakis</last></author>
+      <author id="nikos-fakotakis"><first>Nikos</first><last>Fakotakis</last></author>
       <title><fixed-case>O</fixed-case>rien<fixed-case>T</fixed-case>el - Telephony Databases Across <fixed-case>N</fixed-case>orthern <fixed-case>A</fixed-case>frica and the <fixed-case>M</fixed-case>iddle <fixed-case>E</fixed-case>ast</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/552.pdf</url>
       <bibkey>iskra-etal-2004-orientel</bibkey>
     </paper>
     <paper id="342">
-      <author><first>Hanne</first><last>Fersøe</last></author>
+      <author id="hanne-fersoe"><first>Hanne</first><last>Fersøe</last></author>
       <author><first>Monica</first><last>Monachini</last></author>
       <title><fixed-case>ELRA</fixed-case> Validation Methodology and Standard Promotion for Linguistic Resources</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/553.pdf</url>
@@ -2919,38 +2919,38 @@
       <bibkey>biber-breiteneder-2004-aac</bibkey>
     </paper>
     <paper id="344">
-      <author><first>Diana</first><last>Binnenpoorte</last></author>
-      <author><first>Catia</first><last>Cucchiarini</last></author>
-      <author><first>Helmer</first><last>Strik</last></author>
-      <author><first>Lou</first><last>Boves</last></author>
+      <author id="diana-binnenpoorte"><first>Diana</first><last>Binnenpoorte</last></author>
+      <author id="catia-cucchiarini"><first>Catia</first><last>Cucchiarini</last></author>
+      <author id="helmer-strik"><first>Helmer</first><last>Strik</last></author>
+      <author id="lou-boves"><first>Lou</first><last>Boves</last></author>
       <title>Improving Automatic Phonetic Transcription of Spontaneous Speech Through Variant-Based Pronunciation Variation Modelling</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/558.pdf</url>
       <bibkey>binnenpoorte-etal-2004-improving</bibkey>
     </paper>
     <paper id="345">
-      <author><first>Massimo</first><last>Poesio</last></author>
-      <author><first>Mijail A.</first><last>Kabadjov</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
+      <author id="mijail-kabadjov"><first>Mijail A.</first><last>Kabadjov</last></author>
       <title>A General-Purpose, Off-the-shelf Anaphora Resolution Module: Implementation and Preliminary Evaluation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/559.pdf</url>
       <bibkey>poesio-kabadjov-2004-general</bibkey>
     </paper>
     <paper id="346">
-      <author><first>Donghong</first><last>Ji</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
       <author><first>Li</first><last>Tang</last></author>
-      <author><first>Lingpeng</first><last>Yang</last></author>
+      <author id="lingpeng-yang"><first>Lingpeng</first><last>Yang</last></author>
       <title>Building a Conceptual Graph Bank for <fixed-case>C</fixed-case>hinese Language</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/561.pdf</url>
       <bibkey>ji-etal-2004-building</bibkey>
     </paper>
     <paper id="347">
-      <author><first>Anne</first><last>Abeillé</last></author>
+      <author id="anne-abeille"><first>Anne</first><last>Abeillé</last></author>
       <author><first>Nicolas</first><last>Barrier</last></author>
       <title>Enriching a <fixed-case>F</fixed-case>rench Treebank</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/562.pdf</url>
       <bibkey>abeille-barrier-2004-enriching</bibkey>
     </paper>
     <paper id="348">
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <author><first>Samuel</first><last>Dufour-Kowalski</last></author>
       <author><first>Emmanuel</first><last>Morin</last></author>
       <title><fixed-case>F</fixed-case>rench-<fixed-case>E</fixed-case>nglish Multi-word Term Alignment Based on Lexical Context Analysis</title>
@@ -2975,13 +2975,13 @@
     </paper>
     <paper id="351">
       <author><first>Abdelhadi</first><last>Soudi</last></author>
-      <author><first>Andreas</first><last>Eisele</last></author>
+      <author id="andreas-eisele"><first>Andreas</first><last>Eisele</last></author>
       <title>Generating an <fixed-case>A</fixed-case>rabic Full-form Lexicon for Bidirectional Morphology Lookup</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/567.pdf</url>
       <bibkey>soudi-eisele-2004-generating</bibkey>
     </paper>
     <paper id="352">
-      <author><first>Petr</first><last>Pollák</last></author>
+      <author id="petr-pollak"><first>Petr</first><last>Pollák</last></author>
       <author><first>Jan</first><last>Černocký</last></author>
       <title>Orthographic and Phonetic Annotation of Very Large <fixed-case>C</fixed-case>zech Corpora with Quality Assessment</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/568.pdf</url>
@@ -2998,8 +2998,8 @@
       <bibkey>ribeiro-etal-2004-inquer</bibkey>
     </paper>
     <paper id="354">
-      <author><first>António</first><last>Branco</last></author>
-      <author><first>João</first><last>Silva</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
+      <author id="joao-silva"><first>João</first><last>Silva</last></author>
       <title>Evaluating Solutions for the Rapid Development of State-of-the-Art <fixed-case>POS</fixed-case> Taggers for <fixed-case>P</fixed-case>ortuguese</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/572.pdf</url>
       <bibkey>branco-silva-2004-evaluating</bibkey>
@@ -3012,7 +3012,7 @@
     </paper>
     <paper id="356">
       <author><first>Manolis</first><last>Maragoudakis</last></author>
-      <author><first>Nikos</first><last>Fakotakis</last></author>
+      <author id="nikos-fakotakis"><first>Nikos</first><last>Fakotakis</last></author>
       <title><fixed-case>B</fixed-case>ayesian Semantics Incorporation to Web Content for Natural Language Information Retrieval</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/576.pdf</url>
       <bibkey>maragoudakis-fakotakis-2004-bayesian</bibkey>
@@ -3025,9 +3025,9 @@
     </paper>
     <paper id="358">
       <author><first>Iulia</first><last>Nica</last></author>
-      <author><first>Mª Antònia</first><last>Martí</last></author>
-      <author><first>Andrés</first><last>Montoyo</last></author>
-      <author><first>Sonia</first><last>Vázquez</last></author>
+      <author id="m-antonia-marti"><first>Mª Antònia</first><last>Martí</last></author>
+      <author id="andres-montoyo"><first>Andrés</first><last>Montoyo</last></author>
+      <author id="sonia-vazquez"><first>Sonia</first><last>Vázquez</last></author>
       <title>Enriching <fixed-case>EWN</fixed-case> with Syntagmatic Information by Means of <fixed-case>WSD</fixed-case></title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/579.pdf</url>
       <bibkey>nica-etal-2004-enriching</bibkey>
@@ -3056,9 +3056,9 @@
     <paper id="362">
       <author><first>Juan</first><last>Fernández</last></author>
       <author><first>Mauro</first><last>Castillo</last></author>
-      <author><first>German</first><last>Rigau</last></author>
-      <author><first>Jordi</first><last>Atserias</last></author>
-      <author><first>Jordi</first><last>Turmo</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
+      <author id="jordi-atserias"><first>Jordi</first><last>Atserias</last></author>
+      <author id="jordi-turmo"><first>Jordi</first><last>Turmo</last></author>
       <title>Automatic Acquisition of Sense Examples Using <fixed-case>E</fixed-case>x<fixed-case>R</fixed-case>etriever</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/583.pdf</url>
       <bibkey>fernandez-etal-2004-automatic</bibkey>
@@ -3066,8 +3066,8 @@
     <paper id="363">
       <author><first>Cvetana</first><last>Krstev</last></author>
       <author><first>Duško</first><last>Vitas</last></author>
-      <author><first>Ranka</first><last>Stankoviæ</last></author>
-      <author><first>Ivan</first><last>Obradoviæ</last></author>
+      <author id="ranka-stankovic"><first>Ranka</first><last>Stankoviæ</last></author>
+      <author id="ivan-obradovic"><first>Ivan</first><last>Obradoviæ</last></author>
       <author><first>Gordana</first><last>Pavloviæ-Lažetiæ</last></author>
       <title>Combining Heterogeneous Lexical Resources</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/584.pdf</url>
@@ -3075,19 +3075,19 @@
     </paper>
     <paper id="364">
       <author><first>Viet-Bac</first><last>Le</last></author>
-      <author><first>Do-Dat</first><last>Tran</last></author>
-      <author><first>Eric</first><last>Castelli</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
-      <author><first>Jean-François</first><last>Serignat</last></author>
+      <author id="do-dat-tran"><first>Do-Dat</first><last>Tran</last></author>
+      <author id="eric-castelli"><first>Eric</first><last>Castelli</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
+      <author id="jean-francois-serignat"><first>Jean-François</first><last>Serignat</last></author>
       <title>Spoken and Written Language Resources for <fixed-case>V</fixed-case>ietnamese</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/586.pdf</url>
       <bibkey>le-etal-2004-spoken</bibkey>
     </paper>
     <paper id="365">
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <author><first>Maria</first><last>Georgescul</last></author>
       <author><first>Alexander</first><last>Clark</last></author>
-      <author><first>Susan</first><last>Armstrong</last></author>
+      <author id="susan-armstrong"><first>Susan</first><last>Armstrong</last></author>
       <title>Building and Using a Corpus of Shallow Dialogue Annotated Meetings</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/587.pdf</url>
       <bibkey>popescu-belis-etal-2004-building</bibkey>
@@ -3111,7 +3111,7 @@
       <author><first>Jan-Torsten</first><last>Milde</last></author>
       <author><first>Ulrike</first><last>Gut</last></author>
       <author><first>Katrin</first><last>Erk</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <title>Querying Both Time-aligned and Hierarchical Corpora with <fixed-case>NXT</fixed-case> Search</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/590.pdf</url>
       <bibkey>heid-etal-2004-querying</bibkey>
@@ -3148,8 +3148,8 @@
       <bibkey>ha-2004-practical</bibkey>
     </paper>
     <paper id="373">
-      <author><first>Jesús</first><last>Giménez</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="jesus-gimenez"><first>Jesús</first><last>Giménez</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <title><fixed-case>SVMT</fixed-case>ool: A general <fixed-case>POS</fixed-case> Tagger Generator Based on Support Vector Machines</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/597.pdf</url>
       <bibkey>gimenez-marquez-2004-svmtool</bibkey>
@@ -3164,17 +3164,17 @@
     </paper>
     <paper id="375">
       <author><first>Ulrich</first><last>Callmeier</last></author>
-      <author><first>Andreas</first><last>Eisele</last></author>
-      <author><first>Ulrich</first><last>Schäfer</last></author>
+      <author id="andreas-eisele"><first>Andreas</first><last>Eisele</last></author>
+      <author id="ulrich-schafer"><first>Ulrich</first><last>Schäfer</last></author>
       <author><first>Melanie</first><last>Siegel</last></author>
       <title>The <fixed-case>D</fixed-case>eep<fixed-case>T</fixed-case>hought Core Architecture Framework</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/603.pdf</url>
       <bibkey>callmeier-etal-2004-deepthought</bibkey>
     </paper>
     <paper id="376">
-      <author><first>Jordi</first><last>Atserias</last></author>
+      <author id="jordi-atserias"><first>Jordi</first><last>Atserias</last></author>
       <author><first>Salvador</first><last>Climent</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <title>Towards the Meaning Top Ontology: Sources of Ontological Meaning</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/604.pdf</url>
       <bibkey>atserias-etal-2004-towards</bibkey>
@@ -3209,15 +3209,15 @@
       <bibkey>teixeira-etal-2004-acoustic</bibkey>
     </paper>
     <paper id="380">
-      <author><first>Laurent</first><last>Romary</last></author>
-      <author><first>Amalia</first><last>Todirascu</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
+      <author id="amalia-todirascu"><first>Amalia</first><last>Todirascu</last></author>
       <author><first>David</first><last>Langlois</last></author>
       <title>Experiments on Building Language Resources for Multi-Modal Dialogue Systems</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/611.pdf</url>
       <bibkey>romary-etal-2004-experiments</bibkey>
     </paper>
     <paper id="381">
-      <author><first>David</first><last>Day</last></author>
+      <author id="david-day"><first>David</first><last>Day</last></author>
       <author><first>Chad</first><last>McHenry</last></author>
       <author><first>Robyn</first><last>Kozierok</last></author>
       <author><first>Laurel</first><last>Riek</last></author>
@@ -3228,9 +3228,9 @@
     <paper id="382">
       <author><first>Ray</first><last>Clifford</last></author>
       <author><first>Neil</first><last>Granoien</last></author>
-      <author><first>Douglas</first><last>Jones</last></author>
+      <author id="douglas-jones"><first>Douglas</first><last>Jones</last></author>
       <author><first>Wade</first><last>Shen</last></author>
-      <author><first>Clifford</first><last>Weinstein</last></author>
+      <author id="clifford-j-weinstein"><first>Clifford</first><last>Weinstein</last></author>
       <title>The Effect of Text Difficulty on Machine Translation Performance – A Pilot Study with <fixed-case>ILR</fixed-case>-Rated Texts in <fixed-case>S</fixed-case>panish, <fixed-case>F</fixed-case>arsi, <fixed-case>A</fixed-case>rabic, <fixed-case>R</fixed-case>ussian and <fixed-case>K</fixed-case>orean</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/613.pdf</url>
       <bibkey>clifford-etal-2004-effect</bibkey>
@@ -3263,8 +3263,8 @@
     <paper id="386">
       <author><first>Eleni</first><last>Miltsakaki</last></author>
       <author><first>Rashmi</first><last>Prasad</last></author>
-      <author><first>Aravind</first><last>Joshi</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <title>The <fixed-case>P</fixed-case>enn <fixed-case>D</fixed-case>iscourse <fixed-case>T</fixed-case>reebank</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/618.pdf</url>
       <bibkey>miltsakaki-etal-2004-penn</bibkey>
@@ -3272,7 +3272,7 @@
     <paper id="387">
       <author><first>Violeta</first><last>Seretan</last></author>
       <author><first>Luka</first><last>Nerima</last></author>
-      <author><first>Eric</first><last>Wehrli</last></author>
+      <author id="eric-wehrli"><first>Eric</first><last>Wehrli</last></author>
       <title>Using the Web as a Corpus for the Syntactic-Based Collocation Identification</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/619.pdf</url>
       <bibkey>seretan-etal-2004-using</bibkey>
@@ -3286,7 +3286,7 @@
     </paper>
     <paper id="389">
       <author><first>Henk</first><last>Harkema</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <author><first>Mark</first><last>Hepple</last></author>
       <author><first>Neil</first><last>Davis</last></author>
       <author><first>Yikun</first><last>Guo</last></author>
@@ -3303,7 +3303,7 @@
       <bibkey>hemsen-2004-evaluation</bibkey>
     </paper>
     <paper id="391">
-      <author><first>Christian</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Christian</first><last>Biemann</last></author>
       <author><first>Stefan</first><last>Bordag</last></author>
       <author><first>Uwe</first><last>Quasthoff</last></author>
       <author><first>Christian</first><last>Wolff</last></author>
@@ -3315,12 +3315,12 @@
       <author><first>Elisabeth</first><last>Pinto</last></author>
       <author><first>Delphine</first><last>Charlet</last></author>
       <author><first>Hélène</first><last>François</last></author>
-      <author><first>Djamel</first><last>Mostefa</last></author>
-      <author><first>Olivier</first><last>Boëffard</last></author>
+      <author id="djamel-mostefa"><first>Djamel</first><last>Mostefa</last></author>
+      <author id="olivier-boeffard"><first>Olivier</first><last>Boëffard</last></author>
       <author><first>Dominique</first><last>Fohr</last></author>
       <author><first>Odile</first><last>Mella</last></author>
       <author><first>Frédéric</first><last>Bimbot</last></author>
-      <author><first>Khalid</first><last>Choukri</last></author>
+      <author id="khalid-choukri"><first>Khalid</first><last>Choukri</last></author>
       <author><first>Yann</first><last>Philip</last></author>
       <author><first>Francis</first><last>Charpentier</last></author>
       <title>Development of New Telephone Speech Databases for <fixed-case>F</fixed-case>rench: the <fixed-case>NEOLOGOS</fixed-case> Project</title>
@@ -3329,7 +3329,7 @@
     </paper>
     <paper id="393">
       <author><first>Karel</first><last>Pala</last></author>
-      <author><first>Pavel</first><last>Smrz</last></author>
+      <author id="pavel-smrz"><first>Pavel</first><last>Smrz</last></author>
       <title>Top Ontology as a Tool for Semantic Role Tagging</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/626.pdf</url>
       <bibkey>pala-smrz-2004-top</bibkey>
@@ -3337,23 +3337,23 @@
     <paper id="394">
       <author><first>Argyrios</first><last>Vasilakopoulos</last></author>
       <author><first>Michele</first><last>Bersani</last></author>
-      <author><first>William J.</first><last>Black</last></author>
+      <author id="william-j-black"><first>William J.</first><last>Black</last></author>
       <title>A Suite of Tools for Marking Up Textual Data for Temporal Text Mining Scenarios</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/627.pdf</url>
       <bibkey>vasilakopoulos-etal-2004-suite</bibkey>
     </paper>
     <paper id="395">
-      <author><first>Anne</first><last>De Roeck</last></author>
+      <author id="anne-de-roeck"><first>Anne</first><last>De Roeck</last></author>
       <author><first>Avik</first><last>Sarkar</last></author>
-      <author><first>Paul</first><last>Garthwaite</last></author>
+      <author id="paul-h-garthwaite"><first>Paul</first><last>Garthwaite</last></author>
       <title>Frequent Term Distribution Measures for Dataset Profiling</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/629.pdf</url>
       <bibkey>de-roeck-etal-2004-frequent</bibkey>
     </paper>
     <paper id="396">
-      <author><first>Josef</first><last>Psutka</last></author>
+      <author id="josef-psutka"><first>Josef</first><last>Psutka</last></author>
       <author><first>Pavel</first><last>Ircing</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
       <author><first>Vlasta</first><last>Radová</last></author>
       <author><first>Josef V.</first><last>Psutka</last></author>
       <author id="bill-byrne"><first>William J.</first><last>Byrne</last></author>
@@ -3363,15 +3363,15 @@
       <bibkey>psutka-etal-2004-issues</bibkey>
     </paper>
     <paper id="397">
-      <author><first>Asunción</first><last>Gómez-Pérez</last></author>
-      <author><first>M. Carmen</first><last>Suárez-Figueroa</last></author>
+      <author id="asuncion-gomez-perez"><first>Asunción</first><last>Gómez-Pérez</last></author>
+      <author id="mari-carmen-suarez-figueroa"><first>M. Carmen</first><last>Suárez-Figueroa</last></author>
       <title>Ontology Evaluation Functionalities of <fixed-case>RDF</fixed-case>(<fixed-case>S</fixed-case>),<fixed-case>DAML</fixed-case>+<fixed-case>OIL</fixed-case>, and <fixed-case>OWL</fixed-case> Parsers and Ontology Platforms</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/631.pdf</url>
       <bibkey>gomez-perez-suarez-figueroa-2004-ontology</bibkey>
     </paper>
     <paper id="398">
       <author><first>Anna</first><last>Sinopalnikova</last></author>
-      <author><first>Pavel</first><last>Smrz</last></author>
+      <author id="pavel-smrz"><first>Pavel</first><last>Smrz</last></author>
       <title>Word Association Norms as a Unique Supplement of Traditional Language Resources</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/632.pdf</url>
       <bibkey>sinopalnikova-smrz-2004-word</bibkey>
@@ -3401,7 +3401,7 @@
       <author><first>Malvina</first><last>Nissim</last></author>
       <author><first>Shipra</first><last>Dingare</last></author>
       <author><first>Jean</first><last>Carletta</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <title>An Annotation Scheme for Information Status in Dialogue</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/638.pdf</url>
       <bibkey>nissim-etal-2004-annotation</bibkey>
@@ -3409,7 +3409,7 @@
     <paper id="403">
       <author><first>Alex</first><last>Trutnev</last></author>
       <author><first>Antoine</first><last>Rozenknop</last></author>
-      <author><first>Martin</first><last>Rajman</last></author>
+      <author id="martin-rajman"><first>Martin</first><last>Rajman</last></author>
       <title>Speech Recognition Simulation and its Application for <fixed-case>W</fixed-case>izard-of-<fixed-case>O</fixed-case>z Experiments</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/639.pdf</url>
       <bibkey>trutnev-etal-2004-speech</bibkey>
@@ -3417,7 +3417,7 @@
     <paper id="404">
       <author><first>Murat</first><last>Deviren</last></author>
       <author><first>Khalid</first><last>Daoudi</last></author>
-      <author><first>Kamel</first><last>Smaïli</last></author>
+      <author id="kamel-smaili"><first>Kamel</first><last>Smaïli</last></author>
       <title>Language Modeling Using Dynamic <fixed-case>B</fixed-case>ayesian Networks</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/640.pdf</url>
       <bibkey>deviren-etal-2004-language</bibkey>
@@ -3430,20 +3430,20 @@
       <bibkey>hahn-wermter-2004-pumping</bibkey>
     </paper>
     <paper id="406">
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <author><first>Petya</first><last>Osenova</last></author>
       <title>A Hybrid Strategy For Regular Grammar Parsing</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/642.pdf</url>
       <bibkey>simov-osenova-2004-hybrid</bibkey>
     </paper>
     <paper id="407">
-      <author><first>Jordi</first><last>Atserias</last></author>
-      <author><first>Bernardo</first><last>Magnini</last></author>
+      <author id="jordi-atserias"><first>Jordi</first><last>Atserias</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
       <author><first>Octavian</first><last>Popescu</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <author><first>Aitziber</first><last>Atutxa</last></author>
-      <author><first>German</first><last>Rigau</last></author>
-      <author><first>John</first><last>Carroll</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
       <author><first>Rob</first><last>Koeling</last></author>
       <title>Cross-Language Acquisition of Semantic Models for Verbal Predicates</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/643.pdf</url>
@@ -3457,8 +3457,8 @@
     </paper>
     <paper id="409">
       <author><first>Kallirroi</first><last>Georgila</last></author>
-      <author><first>Nikos</first><last>Fakotakis</last></author>
-      <author><first>George</first><last>Kokkinakis</last></author>
+      <author id="nikos-fakotakis"><first>Nikos</first><last>Fakotakis</last></author>
+      <author id="george-kokkinakis"><first>George</first><last>Kokkinakis</last></author>
       <title>A graphical Tool for Handling Rule Grammars in <fixed-case>J</fixed-case>ava Speech Grammar Format</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/645.pdf</url>
       <bibkey>georgila-etal-2004-graphical</bibkey>
@@ -3470,18 +3470,18 @@
       <bibkey>sheremetyeva-2004-flexible</bibkey>
     </paper>
     <paper id="411">
-      <author><first>David</first><last>Martínez</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="david-martinez"><first>David</first><last>Martínez</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <title>The Effect of Bias on an Automatically-built Word Sense Corpus</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/648.pdf</url>
       <bibkey>martinez-agirre-2004-effect</bibkey>
     </paper>
     <paper id="412">
       <author><first>Victoria</first><last>Arranz</last></author>
-      <author><first>Núria</first><last>Castell</last></author>
-      <author><first>Josep Maria</first><last>Crego</last></author>
-      <author><first>Jesús</first><last>Giménez</last></author>
-      <author><first>Adrià</first><last>de Gispert</last></author>
+      <author id="nuria-castell"><first>Núria</first><last>Castell</last></author>
+      <author id="josep-m-crego"><first>Josep Maria</first><last>Crego</last></author>
+      <author id="jesus-gimenez"><first>Jesús</first><last>Giménez</last></author>
+      <author id="adria-de-gispert"><first>Adrià</first><last>de Gispert</last></author>
       <author><first>Patrik</first><last>Lambert</last></author>
       <title>Bilingual Connections for Trilingual Corpora: An <fixed-case>XML</fixed-case> Approach</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/649.pdf</url>
@@ -3507,7 +3507,7 @@
     </paper>
     <paper id="415">
       <author><first>Alex</first><last>Trutnev</last></author>
-      <author><first>Martin</first><last>Rajman</last></author>
+      <author id="martin-rajman"><first>Martin</first><last>Rajman</last></author>
       <title>Comparative Evaluations in the Domain of Automatic Speech Recognition</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/654.pdf</url>
       <bibkey>trutnev-rajman-2004-comparative</bibkey>
@@ -3550,7 +3550,7 @@
       <bibkey>braffort-etal-2004-toward</bibkey>
     </paper>
     <paper id="420">
-      <author><first>Fabio</first><last>Tamburini</last></author>
+      <author id="fabio-tamburini"><first>Fabio</first><last>Tamburini</last></author>
       <title>Building Distributed Language Resources By Grid Computing</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/661.pdf</url>
       <bibkey>tamburini-2004-building</bibkey>
@@ -3575,11 +3575,11 @@
       <bibkey>busemann-2004-egram</bibkey>
     </paper>
     <paper id="424">
-      <author><first>Louise</first><last>Guthrie</last></author>
-      <author><first>Roberto</first><last>Basili</last></author>
-      <author><first>Fabio</first><last>Zanzotto</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
-      <author><first>Hamish</first><last>Cunningham</last></author>
+      <author id="louise-guthrie"><first>Louise</first><last>Guthrie</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio</first><last>Zanzotto</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="hamish-cunningham"><first>Hamish</first><last>Cunningham</last></author>
       <author><first>David</first><last>Guthrie</last></author>
       <author><first>Jia</first><last>Cui</last></author>
       <author><first>Marco</first><last>Cammisa</last></author>
@@ -3588,24 +3588,24 @@
       <author><first>Kristiyan</first><last>Haralambiev</last></author>
       <author><first>Martin</first><last>Holub</last></author>
       <author><first>Klaus</first><last>Macherey</last></author>
-      <author><first>Fredrick</first><last>Jelinek</last></author>
+      <author id="frederick-jelinek"><first>Fredrick</first><last>Jelinek</last></author>
       <title>Large Scale Experiments for Semantic Labeling of Noun Phrases in Raw Text</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/666.pdf</url>
       <bibkey>guthrie-etal-2004-large</bibkey>
     </paper>
     <paper id="425">
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <author><first>Aitziber</first><last>Atutxa</last></author>
-      <author><first>Koldo</first><last>Gojenola</last></author>
-      <author><first>Kepa</first><last>Sarasola</last></author>
+      <author id="koldo-gojenola"><first>Koldo</first><last>Gojenola</last></author>
+      <author id="kepa-sarasola"><first>Kepa</first><last>Sarasola</last></author>
       <title>Exploring Portability of Syntactic Information from <fixed-case>E</fixed-case>nglish to <fixed-case>B</fixed-case>asque</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/667.pdf</url>
       <bibkey>agirre-etal-2004-exploring</bibkey>
     </paper>
     <paper id="426">
-      <author><first>Jordi</first><last>Atserias</last></author>
-      <author><first>Luís</first><last>Villarejo</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="jordi-atserias"><first>Jordi</first><last>Atserias</last></author>
+      <author id="luis-villarejo"><first>Luís</first><last>Villarejo</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <title><fixed-case>S</fixed-case>panish <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et 1.6: Porting the <fixed-case>S</fixed-case>panish <fixed-case>W</fixed-case>ordnet Across <fixed-case>P</fixed-case>rinceton Versions</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/668.pdf</url>
       <bibkey>atserias-etal-2004-spanish</bibkey>
@@ -3614,7 +3614,7 @@
       <author><first>Magdalena</first><last>Wolska</last></author>
       <author><first>Bao Quoc</first><last>Vo</last></author>
       <author><first>Dimitra</first><last>Tsovaltzi</last></author>
-      <author><first>Ivana</first><last>Kruijff-Korbayová</last></author>
+      <author id="ivana-kruijff-korbayova"><first>Ivana</first><last>Kruijff-Korbayová</last></author>
       <author><first>Elena</first><last>Karagjosova</last></author>
       <author><first>Helmut</first><last>Horacek</last></author>
       <author><first>Armin</first><last>Fiedler</last></author>
@@ -3624,19 +3624,19 @@
       <bibkey>wolska-etal-2004-annotated</bibkey>
     </paper>
     <paper id="428">
-      <author><first>Lonneke</first><last>van der Plas</last></author>
+      <author id="lonneke-van-der-plas"><first>Lonneke</first><last>van der Plas</last></author>
       <author><first>Vincenzo</first><last>Pallotta</last></author>
-      <author><first>Martin</first><last>Rajman</last></author>
+      <author id="martin-rajman"><first>Martin</first><last>Rajman</last></author>
       <author><first>Hatem</first><last>Ghorbel</last></author>
       <title>Automatic Keyword Extraction from Spoken Text. A Comparison of Two Lexical Resources: <fixed-case>EDR</fixed-case> and <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/670.pdf</url>
       <bibkey>van-der-plas-etal-2004-automatic</bibkey>
     </paper>
     <paper id="429">
-      <author><first>Anna</first><last>Kupść</last></author>
+      <author id="anna-kupsc"><first>Anna</first><last>Kupść</last></author>
       <author><first>Teruko</first><last>Mitamura</last></author>
       <author><first>Benjamin</first><last>Van Durme</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <title>Pronominal Anaphora Resolution for Unrestricted Text</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/671.pdf</url>
       <bibkey>kupsc-etal-2004-pronominal</bibkey>
@@ -3662,7 +3662,7 @@
     </paper>
     <paper id="432">
       <author><first>Nina</first><last>Wacholder</last></author>
-      <author><first>Sharon</first><last>Small</last></author>
+      <author id="sharon-small"><first>Sharon</first><last>Small</last></author>
       <author><first>Bing</first><last>Bai</last></author>
       <author><first>Diane</first><last>Kelly</last></author>
       <author><first>Robert</first><last>Rittman</last></author>
@@ -3670,9 +3670,9 @@
       <author><first>Robert</first><last>Salkin</last></author>
       <author><first>Peng</first><last>Song</last></author>
       <author><first>Ying</first><last>Sun</last></author>
-      <author><last>Liu</last><first>Ting</first></author>
-      <author><first>Paul</first><last>Kantor</last></author>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
+      <author><first>Ting</first><last>Liu</last></author>
+      <author id="paul-kantor"><first>Paul</first><last>Kantor</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
       <title>Designing a Realistic Evaluation of an End-to-end Interactive Question Answering System</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/675.pdf</url>
       <bibkey>wacholder-etal-2004-designing</bibkey>
@@ -3686,18 +3686,18 @@
     <paper id="434">
       <author><first>Bogdan</first><last>Babych</last></author>
       <author><first>Debbie</first><last>Elliott</last></author>
-      <author><first>Anthony</first><last>Hartley</last></author>
+      <author id="anthony-hartley"><first>Anthony</first><last>Hartley</last></author>
       <title>Calibrating Resource-light Automatic <fixed-case>MT</fixed-case> Evaluation: a Cheap Approach to Ranking <fixed-case>MT</fixed-case> Systems by the Usability of Their Output</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/678.pdf</url>
       <bibkey>babych-etal-2004-calibrating</bibkey>
     </paper>
     <paper id="435">
-      <author><first>Stelios</first><last>Piperidis</last></author>
+      <author id="stelios-piperidis"><first>Stelios</first><last>Piperidis</last></author>
       <author><first>Iason</first><last>Demiros</last></author>
       <author><first>Prokopis</first><last>Prokopidis</last></author>
       <author><first>Peter</first><last>Vanroose</last></author>
-      <author><first>Anja</first><last>Hoethker</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="anja-hoethker"><first>Anja</first><last>Hoethker</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <author><first>Elsa</first><last>Sklavounou</last></author>
       <author><first>Manos</first><last>Konstantinou</last></author>
       <author><first>Yannis</first><last>Karavidas</last></author>
@@ -3707,7 +3707,7 @@
     </paper>
     <paper id="436">
       <author><first>Kazuki</first><last>Adachi</last></author>
-      <author><first>Tomoki</first><last>Toda</last></author>
+      <author id="tomoki-toda"><first>Tomoki</first><last>Toda</last></author>
       <author><first>Hiromichi</first><last>Kawanami</last></author>
       <author><first>Hiroshi</first><last>Saruwatari</last></author>
       <author><first>Kiyohiro</first><last>Shikano</last></author>
@@ -3716,24 +3716,24 @@
       <bibkey>adachi-etal-2004-perceptual</bibkey>
     </paper>
     <paper id="437">
-      <author><first>Serge A.</first><last>Yablonsky</last></author>
+      <author id="serge-a-yablonsky"><first>Serge A.</first><last>Yablonsky</last></author>
       <title>Integration of <fixed-case>R</fixed-case>ussian Language Resources</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/682.pdf</url>
       <bibkey>yablonsky-2004-integration</bibkey>
     </paper>
     <paper id="438">
-      <author><first>Roberto</first><last>Basili</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
       <author><first>Nicola</first><last>Lorusso</last></author>
-      <author><first>Maria Teresa</first><last>Pazienza</last></author>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last></author>
+      <author id="maria-teresa-pazienza"><first>Maria Teresa</first><last>Pazienza</last></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last></author>
       <title><fixed-case>A</fixed-case>2<fixed-case>Q</fixed-case>: An Agent-based Architecure for Multilingual <fixed-case>Q</fixed-case>&amp;<fixed-case>A</fixed-case></title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/683.pdf</url>
       <bibkey>basili-etal-2004-a2q</bibkey>
     </paper>
     <paper id="439">
-      <author><first>Guadalupe Aguado</first><last>de Cea</last></author>
+      <author id="guadalupe-aguado-de-cea"><first>Guadalupe Aguado</first><last>de Cea</last></author>
       <author><first>Inmaculada</first><last>Álvarez-de-Mon</last></author>
-      <author><first>Antonio</first><last>Pareja-Lora</last></author>
+      <author id="antonio-pareja-lora"><first>Antonio</first><last>Pareja-Lora</last></author>
       <title><fixed-case>O</fixed-case>nto<fixed-case>T</fixed-case>ag’s Linguistic Ontologies: Enhancing Higher Level and Semantic Web Annotations</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/684.pdf</url>
       <bibkey>de-cea-etal-2004-ontotags</bibkey>
@@ -3750,13 +3750,13 @@
     <paper id="441">
       <author><first>Kiyong</first><last>Lee</last></author>
       <author><first>Lou</first><last>Burnard</last></author>
-      <author><first>Laurent</first><last>Romary</last></author>
-      <author><first>Eric</first><last>de la Clergerie</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Eric</first><last>de la Clergerie</last></author>
       <author><first>Thierry</first><last>Declerck</last></author>
       <author><first>Syd</first><last>Bauman</last></author>
-      <author><first>Harry</first><last>Bunt</last></author>
+      <author id="harry-bunt"><first>Harry</first><last>Bunt</last></author>
       <author><first>Lionel</first><last>Clément</last></author>
-      <author><first>Tomaž</first><last>Erjavec</last></author>
+      <author id="tomaz-erjavec"><first>Tomaž</first><last>Erjavec</last></author>
       <author><first>Azim</first><last>Roussanaly</last></author>
       <author><first>Claude</first><last>Roux</last></author>
       <title>Towards an International Standard on Feature Structure Representation</title>
@@ -3764,43 +3764,43 @@
       <bibkey>lee-etal-2004-towards</bibkey>
     </paper>
     <paper id="442">
-      <author><first>Ariadna</first><last>Font Llitjós</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
+      <author id="ariadna-font-llitjos"><first>Ariadna</first><last>Font Llitjós</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
       <title>The Translation Correction Tool: <fixed-case>E</fixed-case>nglish-<fixed-case>S</fixed-case>panish User Studies</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/688.pdf</url>
       <bibkey>font-llitjos-carbonell-2004-translation</bibkey>
     </paper>
     <paper id="443">
-      <author><first>Brian</first><last>Mitchell</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="brian-mitchell"><first>Brian</first><last>Mitchell</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <title>A Labelled Corpus for Prepositional Phrase Attachment</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/690.pdf</url>
       <bibkey>mitchell-gaizauskas-2004-labelled</bibkey>
     </paper>
     <paper id="444">
       <author><first>Gabriel</first><last>Infante-Lopez</last></author>
-      <author><first>Maarten</first><last>de Rijke</last></author>
+      <author id="maarten-de-rijke"><first>Maarten</first><last>de Rijke</last></author>
       <title>Comparing the Ambiguity Reduction Abilities of Probabilistic Context-Free Grammars</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/692.pdf</url>
       <bibkey>infante-lopez-de-rijke-2004-comparing</bibkey>
     </paper>
     <paper id="445">
-      <author><first>Paul</first><last>Morarescu</last></author>
-      <author><first>Sanda</first><last>Harabagiu</last></author>
+      <author id="paul-morarescu"><first>Paul</first><last>Morarescu</last></author>
+      <author id="sanda-harabagiu"><first>Sanda</first><last>Harabagiu</last></author>
       <title><fixed-case>N</fixed-case>ame<fixed-case>N</fixed-case>et: a Self-Improving Resource for Name Classification</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/693.pdf</url>
       <bibkey>morarescu-harabagiu-2004-namenet</bibkey>
     </paper>
     <paper id="446">
       <author><first>Katerina</first><last>Pastra</last></author>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <title>Image-Language Multimodal Corpora: Needs, Lacunae and an <fixed-case>AI</fixed-case> Synergy for Annotation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/694.pdf</url>
       <bibkey>pastra-wilks-2004-image</bibkey>
     </paper>
     <paper id="447">
       <author><first>Na-Rae</first><last>Han</last></author>
-      <author><first>Martin</first><last>Chodorow</last></author>
+      <author id="martin-chodorow"><first>Martin</first><last>Chodorow</last></author>
       <author><first>Claudia</first><last>Leacock</last></author>
       <title>Detecting Errors in <fixed-case>E</fixed-case>nglish Article Usage with a Maximum Entropy Classifier Trained on a Large, Diverse Corpus</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/695.pdf</url>
@@ -3813,9 +3813,9 @@
       <bibkey>sedlacek-2004-core</bibkey>
     </paper>
     <paper id="449">
-      <author><first>Walter</first><last>Daelemans</last></author>
-      <author><first>Anja</first><last>Höthker</last></author>
-      <author><first>Erik</first><last>Tjong Kim Sang</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
+      <author id="anja-hoethker"><first>Anja</first><last>Höthker</last></author>
+      <author id="erik-tjong-kim-sang"><first>Erik</first><last>Tjong Kim Sang</last></author>
       <title>Automatic Sentence Simplification for Subtitling in <fixed-case>D</fixed-case>utch and <fixed-case>E</fixed-case>nglish</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/697.pdf</url>
       <bibkey>daelemans-etal-2004-automatic</bibkey>
@@ -3830,7 +3830,7 @@
       <bibkey>kruengkrai-etal-2004-enriching</bibkey>
     </paper>
     <paper id="451">
-      <author><first>Jonathan G.</first><last>Fiscus</last></author>
+      <author id="jonathan-g-fiscus"><first>Jonathan G.</first><last>Fiscus</last></author>
       <title>Results of the 2003 Topic Detection and Tracking Evaluation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/702.pdf</url>
       <bibkey>fiscus-2004-results</bibkey>
@@ -3853,7 +3853,7 @@
       <author><first>Fabre</first><last>Lambeau</last></author>
       <author><first>Benjamin</first><last>Waldron</last></author>
       <author><first>Francis</first><last>Bond</last></author>
-      <author><first>Dan</first><last>Flickinger</last></author>
+      <author id="dan-flickinger"><first>Dan</first><last>Flickinger</last></author>
       <author><first>Stephan</first><last>Oepen</last></author>
       <title>A Lexicon Module for a Grammar Development Environment</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/706.pdf</url>
@@ -3861,7 +3861,7 @@
     </paper>
     <paper id="455">
       <author><first>Bogdan</first><last>Babych</last></author>
-      <author><first>Anthony</first><last>Hartley</last></author>
+      <author id="anthony-hartley"><first>Anthony</first><last>Hartley</last></author>
       <title>Modelling Legitimate Translation Variation for Automatic Evaluation of <fixed-case>MT</fixed-case> Quality</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/707.pdf</url>
       <bibkey>babych-hartley-2004-modelling</bibkey>
@@ -3869,7 +3869,7 @@
     <paper id="456">
       <author><first>Roberto</first><last>Bartolini</last></author>
       <author><first>Alessandro</first><last>Lenci</last></author>
-      <author><first>Simonetta</first><last>Montemagni</last></author>
+      <author id="simonetta-montemagni"><first>Simonetta</first><last>Montemagni</last></author>
       <author><first>Vito</first><last>Pirrelli</last></author>
       <author><first>Claudia</first><last>Soria</last></author>
       <title>Semantic Mark-up of <fixed-case>I</fixed-case>talian Legal Texts Through <fixed-case>NLP</fixed-case>-based Techniques</title>
@@ -3878,7 +3878,7 @@
     </paper>
     <paper id="457">
       <author><first>Lionel</first><last>Clément</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <author><first>Bernard</first><last>Lang</last></author>
       <title>Morphology Based Automatic Acquisition of Large-coverage Lexica</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/711.pdf</url>
@@ -3892,26 +3892,26 @@
     </paper>
     <paper id="459">
       <author><first>Violetta</first><last>Cavalli-Sforza</last></author>
-      <author><first>Jaime G.</first><last>Carbonell</last></author>
-      <author><first>Peter J.</first><last>Jansen</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime G.</first><last>Carbonell</last></author>
+      <author id="peter-jansen"><first>Peter J.</first><last>Jansen</last></author>
       <title>Developing Language Resources for a Transnational Digital Government System</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/713.pdf</url>
       <bibkey>cavalli-sforza-etal-2004-developing</bibkey>
     </paper>
     <paper id="460">
-      <author><first>Mary D.</first><last>Swift</last></author>
-      <author><first>Myroslava O.</first><last>Dzikovska</last></author>
-      <author><first>Joel R.</first><last>Tetreault</last></author>
-      <author><first>James F.</first><last>Allen</last></author>
+      <author id="mary-swift"><first>Mary D.</first><last>Swift</last></author>
+      <author id="myroslava-o-dzikovska"><first>Myroslava O.</first><last>Dzikovska</last></author>
+      <author id="joel-tetreault"><first>Joel R.</first><last>Tetreault</last></author>
+      <author id="james-allen"><first>James F.</first><last>Allen</last></author>
       <title>Semi-automatic Syntactic and Semantic Corpus Annotation with a Deep Parser</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/714.pdf</url>
       <bibkey>swift-etal-2004-semi</bibkey>
     </paper>
     <paper id="461">
       <author><first>Georges</first><last>Fafiotte</last></author>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <author><first>Mark</first><last>Seligman</last></author>
-      <author><last>Zong</last><first>Chengqing</first></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <title>Collecting and Sharing Bilingual Spontaneous Speech Corpora: the <fixed-case>C</fixed-case>hin<fixed-case>F</fixed-case>a<fixed-case>D</fixed-case>ial Experiment</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/715.pdf</url>
       <bibkey>fafiotte-etal-2004-collecting</bibkey>
@@ -3919,7 +3919,7 @@
     <paper id="462">
       <author><first>Judita</first><last>Preiss</last></author>
       <author><first>Caroline</first><last>Gasperin</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <title>Can Anaphoric Definite Descriptions be Replaced by Pronouns?</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/718.pdf</url>
       <bibkey>preiss-etal-2004-anaphoric</bibkey>
@@ -3927,7 +3927,7 @@
     <paper id="463">
       <author><first>Roberto</first><last>Bartolini</last></author>
       <author><first>Alessandro</first><last>Lenci</last></author>
-      <author><first>Simonetta</first><last>Montemagni</last></author>
+      <author id="simonetta-montemagni"><first>Simonetta</first><last>Montemagni</last></author>
       <author><first>Vito</first><last>Pirrelli</last></author>
       <title>Hybrid Constraints for Robust Parsing: First Experiments and Evaluation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/719.pdf</url>
@@ -3943,7 +3943,7 @@
     </paper>
     <paper id="465">
       <author><first>Simone</first><last>Teufel</last></author>
-      <author><first>Hans</first><last>van Halteren</last></author>
+      <author id="hans-van-halteren"><first>Hans</first><last>van Halteren</last></author>
       <title>Agreement in Human Factoid Annotation for Summarization Evaluation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/723.pdf</url>
       <bibkey>teufel-van-halteren-2004-agreement</bibkey>
@@ -3957,11 +3957,11 @@
       <bibkey>rilliard-etal-2004-evaluating</bibkey>
     </paper>
     <paper id="467">
-      <author><first>Nadia</first><last>Mana</last></author>
+      <author id="nadia-mana"><first>Nadia</first><last>Mana</last></author>
       <author><first>Roldano</first><last>Cattoni</last></author>
-      <author><first>Emanuele</first><last>Pianta</last></author>
+      <author id="emanuele-pianta"><first>Emanuele</first><last>Pianta</last></author>
       <author><first>Franca</first><last>Rossi</last></author>
-      <author><first>Fabio</first><last>Pianesi</last></author>
+      <author id="fabio-pianesi"><first>Fabio</first><last>Pianesi</last></author>
       <author><first>Susanne</first><last>Burger</last></author>
       <title>The <fixed-case>I</fixed-case>talian <fixed-case>NESPOLE</fixed-case>! Corpus: a Multilingual Database with Interlingua Annotation in Tourism and Medical Domains</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/725.pdf</url>
@@ -3979,29 +3979,29 @@
     </paper>
     <paper id="469">
       <author><first>Antonietta</first><last>Alonge</last></author>
-      <author><first>Birte</first><last>Lönneker</last></author>
+      <author id="birte-lonneker"><first>Birte</first><last>Lönneker</last></author>
       <title>Metaphors in Wordnets: From Theory to Practice</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/728.pdf</url>
       <bibkey>alonge-lonneker-2004-metaphors</bibkey>
     </paper>
     <paper id="470">
-      <author><first>Harry</first><last>Bunt</last></author>
-      <author><first>Laurent</first><last>Romary</last></author>
+      <author id="harry-bunt"><first>Harry</first><last>Bunt</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
       <title>Standardization in Multimodal Content Representation: Some Methodological Issues</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/729.pdf</url>
       <bibkey>bunt-romary-2004-standardization</bibkey>
     </paper>
     <paper id="471">
-      <author><first>Roberto</first><last>Basili</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
       <author><first>Marco</first><last>Cammisa</last></author>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last></author>
       <title>A Similarity Measure for Unsupervised Semantic Disambiguation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/732.pdf</url>
       <bibkey>basili-etal-2004-similarity</bibkey>
     </paper>
     <paper id="472">
-      <author><first>Laila</first><last>Dybkjær</last></author>
-      <author><first>Niels Ole</first><last>Bernsen</last></author>
+      <author id="laila-dybkjaer"><first>Laila</first><last>Dybkjær</last></author>
+      <author id="niels-ole-bernsen"><first>Niels Ole</first><last>Bernsen</last></author>
       <author><first>Wolfgang</first><last>Minker</last></author>
       <title>Usability Evaluation of Multimodal and Domain-Oriented Spoken Language Dialogue Systems</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/733.pdf</url>
@@ -4011,7 +4011,7 @@
       <author><first>Jaap</first><last>Kamps</last></author>
       <author><first>Maarten</first><last>Marx</last></author>
       <author><first>Robert J.</first><last>Mokken</last></author>
-      <author><first>Maarten</first><last>de Rijke</last></author>
+      <author id="maarten-de-rijke"><first>Maarten</first><last>de Rijke</last></author>
       <title>Using <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et to Measure Semantic Orientations of Adjectives</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/734.pdf</url>
       <bibkey>kamps-etal-2004-using</bibkey>
@@ -4021,7 +4021,7 @@
       <author><first>Eva</first><last>Forsbom</last></author>
       <author><first>Ebba</first><last>Gustavii</last></author>
       <author><first>Eva</first><last>Pettersson</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <title><fixed-case>MT</fixed-case> Goes Farming: Comparing Two Machine Translation Approaches on a New Domain</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/735.pdf</url>
       <bibkey>weijnitz-etal-2004-mt</bibkey>
@@ -4037,7 +4037,7 @@
       <author><first>Christopher</first><last>Brewster</last></author>
       <author><first>Harith</first><last>Alani</last></author>
       <author><first>Srinandan</first><last>Dasmahapatra</last></author>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <title>Data Driven Ontology Evaluation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/737.pdf</url>
       <bibkey>brewster-etal-2004-data</bibkey>
@@ -4058,7 +4058,7 @@
     <paper id="479">
       <author><first>Vasco Calais</first><last>Pedro</last></author>
       <author><first>Jeongwoo</first><last>Ko</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <author><first>Teruko</first><last>Mitamura</last></author>
       <title>An Information Repository Model for Advanced Question Answering Systems</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/742.pdf</url>
@@ -4068,29 +4068,29 @@
       <author><first>Francesca</first><last>Bertagna</last></author>
       <author><first>Alessandro</first><last>Lenci</last></author>
       <author><first>Monica</first><last>Monachini</last></author>
-      <author><first>Nicoletta</first><last>Calzolari</last></author>
+      <author id="nicoletta-calzolari"><first>Nicoletta</first><last>Calzolari</last></author>
       <title>Content Interoperability of Lexical Resources: Open Issues and “<fixed-case>MILE</fixed-case>” Perspectives</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/743.pdf</url>
       <bibkey>bertagna-etal-2004-content</bibkey>
     </paper>
     <paper id="481">
-      <author><first>Martin</first><last>Čmejrek</last></author>
-      <author><first>Jan</first><last>Cuřín</last></author>
-      <author><first>Jiří</first><last>Havelka</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
-      <author><first>Vladislav</first><last>Kuboň</last></author>
+      <author id="martin-cmejrek"><first>Martin</first><last>Čmejrek</last></author>
+      <author id="jan-curin"><first>Jan</first><last>Cuřín</last></author>
+      <author id="jiri-havelka"><first>Jiří</first><last>Havelka</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
+      <author id="vladislav-kubon"><first>Vladislav</first><last>Kuboň</last></author>
       <title><fixed-case>P</fixed-case>rague <fixed-case>C</fixed-case>zech-<fixed-case>E</fixed-case>nglish <fixed-case>D</fixed-case>ependency <fixed-case>T</fixed-case>reebank. Syntactically Annotated Resources for Machine Translation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/745.pdf</url>
       <bibkey>cmejrek-etal-2004-prague</bibkey>
     </paper>
     <paper id="482">
       <author><first>Christian</first><last>Monson</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
       <author><first>Rodolfo</first><last>Vega</last></author>
-      <author><first>Ralf</first><last>Brown</last></author>
-      <author><first>Ariadna</first><last>Font Llitjos</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
+      <author id="ralf-d-brown"><first>Ralf</first><last>Brown</last></author>
+      <author id="ariadna-font-llitjos"><first>Ariadna</first><last>Font Llitjos</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
       <author><first>Eliseo</first><last>Cañulef</last></author>
       <author><first>Rosendo</first><last>Huisca</last></author>
       <title>Data Collection and Analysis of <fixed-case>M</fixed-case>apudungun Morphology for Spelling Correction</title>
@@ -4098,8 +4098,8 @@
       <bibkey>monson-etal-2004-data</bibkey>
     </paper>
     <paper id="483">
-      <author><first>Arlindo O.</first><last>Veiga</last></author>
-      <author><first>Fernando S.</first><last>Perdigão</last></author>
+      <author id="arlindo-veiga"><first>Arlindo O.</first><last>Veiga</last></author>
+      <author id="fernando-perdigao"><first>Fernando S.</first><last>Perdigão</last></author>
       <title>An Efficient Word Confidence Measure Using Likelihood Ratio Scores</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/748.pdf</url>
       <bibkey>veiga-perdigao-2004-efficient</bibkey>
@@ -4107,13 +4107,13 @@
     <paper id="484">
       <author><first>Kenji</first><last>Sagae</last></author>
       <author><first>Brian</first><last>MacWhinney</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <title>Adding Syntactic Annotations to Transcripts of Parent-Child Dialogs</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/749.pdf</url>
       <bibkey>sagae-etal-2004-adding</bibkey>
     </paper>
     <paper id="485">
-      <author><first>Huarui</first><last>Zhang</last></author>
+      <author id="huarui-zhang"><first>Huarui</first><last>Zhang</last></author>
       <author><first>Churen</first><last>Huang</last></author>
       <author><first>Shiwen</first><last>Yu</last></author>
       <title>Distributional Consistency: As a General Method for Defining a Core Lexicon</title>
@@ -4121,22 +4121,22 @@
       <bibkey>zhang-etal-2004-distributional</bibkey>
     </paper>
     <paper id="486">
-      <author><first>Rebecca J.</first><last>Passonneau</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca J.</first><last>Passonneau</last></author>
       <title>Computing Reliability for Coreference Annotation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/752.pdf</url>
       <bibkey>passonneau-2004-computing</bibkey>
     </paper>
     <paper id="487">
-      <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>Oier Lopez</first><last>de Lacalle</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
+      <author id="oier-lopez-de-lacalle"><first>Oier Lopez</first><last>de Lacalle</last></author>
       <title>Publicly Available Topic Signatures for all <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Nominal Senses</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/753.pdf</url>
       <bibkey>agirre-de-lacalle-2004-publicly</bibkey>
     </paper>
     <paper id="488">
-      <author><first>Timothy</first><last>Baldwin</last></author>
-      <author><first>Emily M.</first><last>Bender</last></author>
-      <author><first>Dan</first><last>Flickinger</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
+      <author id="emily-m-bender"><first>Emily M.</first><last>Bender</last></author>
+      <author id="dan-flickinger"><first>Dan</first><last>Flickinger</last></author>
       <author><first>Ara</first><last>Kim</last></author>
       <author><first>Stephan</first><last>Oepen</last></author>
       <title>Road-testing the <fixed-case>E</fixed-case>nglish <fixed-case>R</fixed-case>esource <fixed-case>G</fixed-case>rammar Over the <fixed-case>B</fixed-case>ritish <fixed-case>N</fixed-case>ational <fixed-case>C</fixed-case>orpus</title>
@@ -4144,31 +4144,31 @@
       <bibkey>baldwin-etal-2004-road</bibkey>
     </paper>
     <paper id="489">
-      <author><first>Ying</first><last>Zhang</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="ying-zhang"><first>Ying</first><last>Zhang</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <title>Interpreting <fixed-case>BLEU</fixed-case>/<fixed-case>NIST</fixed-case> Scores: How Much Improvement do We Need to Have a Better System?</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/755.pdf</url>
       <bibkey>zhang-etal-2004-interpreting</bibkey>
     </paper>
     <paper id="490">
-      <author><first>Peter</first><last>Anick</last></author>
+      <author id="peter-anick"><first>Peter</first><last>Anick</last></author>
       <title>Exploiting Anchor Text as a Lexical Resource</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/756.pdf</url>
       <bibkey>anick-2004-exploiting</bibkey>
     </paper>
     <paper id="491">
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <author><first>Timothy</first><last>Allison</last></author>
       <author><first>Sasha</first><last>Blair-Goldensohn</last></author>
       <author><first>John</first><last>Blitzer</last></author>
       <author><first>Arda</first><last>Çelebi</last></author>
       <author><first>Stanko</first><last>Dimitrov</last></author>
-      <author><first>Elliott</first><last>Drabek</last></author>
+      <author id="elliott-franco-drabek"><first>Elliott</first><last>Drabek</last></author>
       <author><first>Ali</first><last>Hakim</last></author>
       <author><first>Wai</first><last>Lam</last></author>
       <author><first>Danyu</first><last>Liu</last></author>
-      <author><first>Jahna</first><last>Otterbacher</last></author>
+      <author id="jahna-otterbacher"><first>Jahna</first><last>Otterbacher</last></author>
       <author><first>Hong</first><last>Qi</last></author>
       <author><first>Horacio</first><last>Saggion</last></author>
       <author><first>Simone</first><last>Teufel</last></author>
@@ -4184,8 +4184,8 @@
       <author><first>Bilyana</first><last>Martinovski</last></author>
       <author><first>Susan</first><last>Robinson</last></author>
       <author><first>Jens</first><last>Stephan</last></author>
-      <author><first>Joel</first><last>Tetreault</last></author>
-      <author><first>David R.</first><last>Traum</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
+      <author id="david-traum"><first>David R.</first><last>Traum</last></author>
       <title>Evaluation of Transcription and Annotation Tools for a Multi-modal, Multi-party Dialogue Corpus</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/758.pdf</url>
       <bibkey>garg-etal-2004-evaluation</bibkey>
@@ -4198,7 +4198,7 @@
     </paper>
     <paper id="494">
       <author><first>Aline</first><last>Villavicencio</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Benjamin</first><last>Waldron</last></author>
       <title>A Multilingual Database of Idioms</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/760.pdf</url>
@@ -4206,19 +4206,19 @@
     </paper>
     <paper id="495">
       <author><first>Kazuaki</first><last>Maeda</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <title>Annotation Tools for Large-Scale Corpus Development: Using <fixed-case>AGTK</fixed-case> at the <fixed-case>L</fixed-case>inguistic <fixed-case>D</fixed-case>ata <fixed-case>C</fixed-case>onsortium</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/761.pdf</url>
       <bibkey>maeda-strassel-2004-annotation</bibkey>
     </paper>
     <paper id="496">
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <title>Linguistic Resources for Effective, Affordable, Reusable Speech-to-Text</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/762.pdf</url>
       <bibkey>strassel-2004-linguistic</bibkey>
     </paper>
     <paper id="497">
-      <author><first>Marc</first><last>Vilain</last></author>
+      <author id="marc-vilain"><first>Marc</first><last>Vilain</last></author>
       <title>Building part-of-speech Corpora Through Histogram Hopping</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/763.pdf</url>
       <bibkey>vilain-2004-building</bibkey>
@@ -4235,13 +4235,13 @@
       <author><first>Bilyana</first><last>Martinovski</last></author>
       <author><first>Saurabh</first><last>Garg</last></author>
       <author><first>Jens</first><last>Stephan</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <title>Issues in Corpus Development for Multi-party Multi-modal Task-oriented Dialogue</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/766.pdf</url>
       <bibkey>robinson-etal-2004-issues</bibkey>
     </paper>
     <paper id="500">
-      <author><first>Christopher</first><last>Cieri</last></author>
+      <author id="christopher-cieri"><first>Christopher</first><last>Cieri</last></author>
       <author><first>David</first><last>Miller</last></author>
       <author><first>Kevin</first><last>Walker</last></author>
       <title>The Fisher Corpus: a Resource for the Next Generations of Speech-to-Text</title>
@@ -4249,7 +4249,7 @@
       <bibkey>cieri-etal-2004-fisher</bibkey>
     </paper>
     <paper id="501">
-      <author><first>David R.</first><last>Traum</last></author>
+      <author id="david-traum"><first>David R.</first><last>Traum</last></author>
       <author><first>Susan</first><last>Robinson</last></author>
       <author><first>Jens</first><last>Stephan</last></author>
       <title>Evaluation of Multi-party Virtual Reality Dialogue Interaction</title>
@@ -4257,8 +4257,8 @@
       <bibkey>traum-etal-2004-evaluation</bibkey>
     </paper>
     <paper id="502">
-      <author><first>Christopher</first><last>Cieri</last></author>
-      <author><first>Joseph P.</first><last>Campbell</last></author>
+      <author id="christopher-cieri"><first>Christopher</first><last>Cieri</last></author>
+      <author id="joseph-p-campbell"><first>Joseph P.</first><last>Campbell</last></author>
       <author><first>Hirotaka</first><last>Nakasone</last></author>
       <author><first>David</first><last>Miller</last></author>
       <author><first>Kevin</first><last>Walker</last></author>
@@ -4267,7 +4267,7 @@
       <bibkey>cieri-etal-2004-mixer</bibkey>
     </paper>
     <paper id="503">
-      <author><first>Alessandro</first><last>Mazzei</last></author>
+      <author id="alessandro-mazzei"><first>Alessandro</first><last>Mazzei</last></author>
       <author><first>Vincenzo</first><last>Lombardo</last></author>
       <title>Building a Large Grammar for <fixed-case>I</fixed-case>talian</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/772.pdf</url>
@@ -4292,7 +4292,7 @@
     <paper id="506">
       <author><first>Long</first><last>Qiu</last></author>
       <author><first>Min-Yen</first><last>Kan</last></author>
-      <author><first>Tat-Seng</first><last>Chua</last></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last></author>
       <title>A Public Reference Implementation of the <fixed-case>RAP</fixed-case> Anaphora Resolution Algorithm</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/778.pdf</url>
       <bibkey>qiu-etal-2004-public</bibkey>
@@ -4302,8 +4302,8 @@
       <author><first>Neil</first><last>Ireson</last></author>
       <author><first>Paolo</first><last>Allegrini</last></author>
       <author><first>Simone</first><last>Marchi</last></author>
-      <author><first>Simonetta</first><last>Montemagni</last></author>
-      <author><first>Jose Maria Gomez</first><last>Hidalgo</last></author>
+      <author id="simonetta-montemagni"><first>Simonetta</first><last>Montemagni</last></author>
+      <author id="jose-maria-gomez-hidalgo"><first>Jose Maria Gomez</first><last>Hidalgo</last></author>
       <title><fixed-case>NLP</fixed-case>-enhanced Content Filtering Within the <fixed-case>POESIA</fixed-case> Project</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/779.pdf</url>
       <bibkey>hepple-etal-2004-nlp</bibkey>
@@ -4330,20 +4330,20 @@
     </paper>
     <paper id="511">
       <author><first>Robert</first><last>Irie</last></author>
-      <author><first>Beth</first><last>Sundheim</last></author>
+      <author id="beth-m-sundheim"><first>Beth</first><last>Sundheim</last></author>
       <title>Resources for Place Name Analysis</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/785.pdf</url>
       <bibkey>irie-sundheim-2004-resources</bibkey>
     </paper>
     <paper id="512">
-      <author><first>Bente</first><last>Maegaard</last></author>
+      <author id="bente-maegaard"><first>Bente</first><last>Maegaard</last></author>
       <title><fixed-case>NEMLAR</fixed-case> - An <fixed-case>A</fixed-case>rabic Language Resources Project</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/786.pdf</url>
       <bibkey>maegaard-2004-nemlar-arabic</bibkey>
     </paper>
     <paper id="513">
-      <author><first>Key-Sun</first><last>Choi</last></author>
-      <author><first>Hee-Sook</first><last>Bae</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
+      <author id="hee-sook-bae"><first>Hee-Sook</first><last>Bae</last></author>
       <author><first>Wonseok</first><last>Kang</last></author>
       <author><first>Juho</first><last>Lee</last></author>
       <author><first>Eunhe</first><last>Kim</last></author>
@@ -4363,22 +4363,22 @@
       <bibkey>jouis-ferru-2004-intranet</bibkey>
     </paper>
     <paper id="515">
-      <author><first>Christopher</first><last>Cieri</last></author>
-      <author><first>Mark</first><last>Liberman</last></author>
+      <author id="christopher-cieri"><first>Christopher</first><last>Cieri</last></author>
+      <author id="mark-liberman"><first>Mark</first><last>Liberman</last></author>
       <title>A Progress Report from the <fixed-case>L</fixed-case>inguistic <fixed-case>D</fixed-case>ata <fixed-case>C</fixed-case>onsortium: Recent Activities in Resource Creation and Distribution and the Development of Tools and Standards</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/789.pdf</url>
       <bibkey>cieri-liberman-2004-progress</bibkey>
     </paper>
     <paper id="516">
-      <author><first>Khalid</first><last>Choukri</last></author>
+      <author id="khalid-choukri"><first>Khalid</first><last>Choukri</last></author>
       <title>Recent Activities within the <fixed-case>E</fixed-case>uropean Language Resources Association: Issues on Sharing Language Resources and Evaluation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/791.pdf</url>
       <bibkey>choukri-2004-recent</bibkey>
     </paper>
     <paper id="517">
-      <author><first>Widad Mustafa El</first><last>Hadi</last></author>
-      <author><first>Ismail</first><last>Timimi</last></author>
-      <author><first>Marianne</first><last>Dabbadie</last></author>
+      <author id="widad-mustafa-el-hadi"><first>Widad Mustafa El</first><last>Hadi</last></author>
+      <author id="ismail-timimi"><first>Ismail</first><last>Timimi</last></author>
+      <author id="marianne-dabbadie"><first>Marianne</first><last>Dabbadie</last></author>
       <title><fixed-case>EVALDA</fixed-case>-<fixed-case>CESART</fixed-case> Project: Terminological Resources Acquisition Tools Evaluation Campaign</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/792.pdf</url>
       <bibkey>hadi-etal-2004-evalda</bibkey>
@@ -4401,25 +4401,25 @@
     <paper id="520">
       <author><first>Joaquim F. Ferreira</first><last>da Silva</last></author>
       <author><first>Zornitsa</first><last>Kozareva</last></author>
-      <author><first>José Gabriel Pereira</first><last>Lopes</last></author>
+      <author id="gabriel-lopes"><first>José Gabriel Pereira</first><last>Lopes</last></author>
       <title>Cluster Analysis and Classification of Named Entities</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/796.pdf</url>
       <bibkey>da-silva-etal-2004-cluster</bibkey>
     </paper>
     <paper id="521">
-      <author><first>Khalid</first><last>Choukri</last></author>
+      <author id="khalid-choukri"><first>Khalid</first><last>Choukri</last></author>
       <author><first>Mahtab</first><last>Nikkhou</last></author>
-      <author><first>Niklas</first><last>Paulsson</last></author>
+      <author id="niklas-paulsson"><first>Niklas</first><last>Paulsson</last></author>
       <title>Network of Data Centres (<fixed-case>N</fixed-case>et<fixed-case>DC</fixed-case>): <fixed-case>BNSC</fixed-case> - An <fixed-case>A</fixed-case>rabic Broadcast News Speech Corpus</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/797.pdf</url>
       <bibkey>choukri-etal-2004-network</bibkey>
     </paper>
     <paper id="522">
-      <author><first>Valérie</first><last>Mapelli</last></author>
+      <author id="valerie-mapelli"><first>Valérie</first><last>Mapelli</last></author>
       <author><first>Maria</first><last>Nava</last></author>
       <author><first>Sylvain</first><last>Surcin</last></author>
-      <author><first>Djamel</first><last>Mostefa</last></author>
-      <author><first>Khalid</first><last>Choukri</last></author>
+      <author id="djamel-mostefa"><first>Djamel</first><last>Mostefa</last></author>
+      <author id="khalid-choukri"><first>Khalid</first><last>Choukri</last></author>
       <title><fixed-case>T</fixed-case>echnolangue: A Permanent Evaluation and Information Infrastructure</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/798.pdf</url>
       <bibkey>mapelli-etal-2004-technolangue</bibkey>
@@ -4431,9 +4431,9 @@
       <bibkey>marrafa-2004-extending</bibkey>
     </paper>
     <paper id="524">
-      <author><first>Boris</first><last>Dobrov</last></author>
+      <author id="boris-v-dobrov"><first>Boris</first><last>Dobrov</last></author>
       <author><first>Igor</first><last>Kuralenok</last></author>
-      <author><first>Natalia</first><last>Loukachevitch</last></author>
+      <author id="natalia-loukachevitch"><first>Natalia</first><last>Loukachevitch</last></author>
       <author><first>Igor</first><last>Nekrestyanov</last></author>
       <author><first>Ilya</first><last>Segalovich</last></author>
       <title><fixed-case>R</fixed-case>ussian Information Retrieval Evaluation Seminar</title>
diff --git a/data/xml/L06.xml b/data/xml/L06.xml
index a3eb502292..8540b50fc5 100644
--- a/data/xml/L06.xml
+++ b/data/xml/L06.xml
@@ -3,13 +3,13 @@
   <volume id="1" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Fifth International Conference on Language Resources and Evaluation (<fixed-case>LREC</fixed-case>’06)</booktitle>
-      <editor><first>Nicoletta</first><last>Calzolari</last></editor>
-      <editor><first>Khalid</first><last>Choukri</last></editor>
+      <editor id="nicoletta-calzolari"><first>Nicoletta</first><last>Calzolari</last></editor>
+      <editor id="khalid-choukri"><first>Khalid</first><last>Choukri</last></editor>
       <editor><first>Aldo</first><last>Gangemi</last></editor>
-      <editor><first>Bente</first><last>Maegaard</last></editor>
-      <editor><first>Joseph</first><last>Mariani</last></editor>
-      <editor><first>Jan</first><last>Odijk</last></editor>
-      <editor><first>Daniel</first><last>Tapias</last></editor>
+      <editor id="bente-maegaard"><first>Bente</first><last>Maegaard</last></editor>
+      <editor id="joseph-mariani"><first>Joseph</first><last>Mariani</last></editor>
+      <editor id="jan-odijk"><first>Jan</first><last>Odijk</last></editor>
+      <editor id="daniel-tapias"><first>Daniel</first><last>Tapias</last></editor>
       <publisher>European Language Resources Association (ELRA)</publisher>
       <address>Genoa, Italy</address>
       <month>May</month>
@@ -43,9 +43,9 @@
       <bibkey>chaudiron-mariani-2006-techno</bibkey>
     </paper>
     <paper id="4">
-      <author><first>Manny</first><last>Rayner</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
-      <author><first>Beth Ann</first><last>Hockey</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="beth-ann-hockey"><first>Beth Ann</first><last>Hockey</last></author>
       <author><first>Nikos</first><last>Chatzichrisafis</last></author>
       <title><fixed-case>REGULUS</fixed-case>: A Generic Multilingual Open Source Platform for Grammar-Based Speech Applications</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/9_pdf.pdf</url>
@@ -86,8 +86,8 @@
     </paper>
     <paper id="9">
       <author><first>Ibon</first><last>Saratxaga</last></author>
-      <author><first>Eva</first><last>Navas</last></author>
-      <author><first>Inmaculada</first><last>Hernáez</last></author>
+      <author id="eva-navas"><first>Eva</first><last>Navas</last></author>
+      <author id="inmaculada-hernaez"><first>Inmaculada</first><last>Hernáez</last></author>
       <author><first>Iker</first><last>Aholab</last></author>
       <title>Designing and Recording an Emotional Speech Database for Corpus Based Synthesis in <fixed-case>B</fixed-case>asque</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/19_pdf.pdf</url>
@@ -113,7 +113,7 @@
     </paper>
     <paper id="12">
       <author><first>Tien-Ping</first><last>Tan</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <title>A <fixed-case>F</fixed-case>rench Non-Native Corpus for Automatic Speech Recognition</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/33_pdf.pdf</url>
       <abstract>Automatic speech recognition (ASR) technology has achieved a level of maturity, where it is already practical to be used by novice users. However, most non-native speakers are still not comfortable with services including ASR systems, because of the accuracy on non-native speakers. This paper describes our approach in constructing a non-native corpus particularly in French for testing and adapting non-native speaker for automatic speech recognition. Finally, we also propose in this paper a method for detecting pronunciation variants and possible pronunciation mistakes by non-native speakers.</abstract>
@@ -147,7 +147,7 @@
       <bibkey>tongchim-etal-2006-blind</bibkey>
     </paper>
     <paper id="16">
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <author><first>Masayuki</first><last>Asahara</last></author>
       <author><first>Kiyota</first><last>Hashimoto</last></author>
       <author><first>Yukio</first><last>Tono</last></author>
@@ -168,23 +168,23 @@
     <paper id="18">
       <author><first>Ivan</first><last>Berlocher</last></author>
       <author><first>Hyun-gue</first><last>Huh</last></author>
-      <author><first>Eric</first><last>Laporte</last></author>
-      <author><first>Jee-sun</first><last>Nam</last></author>
+      <author id="eric-laporte"><first>Eric</first><last>Laporte</last></author>
+      <author id="jee-sun-nam"><first>Jee-sun</first><last>Nam</last></author>
       <title>Morphological annotation of <fixed-case>K</fixed-case>orean with Directly Maintainable Resources</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/44_pdf.pdf</url>
       <abstract>This article describes an exclusively resource-based method of morphological annotation of written Korean text. Korean is an agglutinative language. Our annotator is designed to process text before the operation of a syntactic parser. In its present state, it annotates one-stem words only. The output is a graph of morphemes annotated with accurate linguistic information. The granularity of the tagset is 3 to 5 times higher than usual tagsets. A comparison with a reference annotated corpus showed that it achieves 89% recall without any corpus training. The language resources used by the system are lexicons of stems, transducers of suffixes and transducers of generation of allomorphs. All can be easily updated, which allows users to control the evolution of the performances of the system. It has been claimed that morphological annotation of Korean text could only be performed by a morphological analysis module accessing a lexicon of morphemes. We show that it can also be performed directly with a lexicon of words and without applying morphological rules at annotation time, which speeds up annotation to 1,210 words. The lexicon of words is obtained from the maintainable language resources through a fully automated compilation process.</abstract>
       <bibkey>berlocher-etal-2006-morphological</bibkey>
     </paper>
     <paper id="19">
-      <author><first>Patrick</first><last>Saint-Dizier</last></author>
+      <author id="patrick-saint-dizier"><first>Patrick</first><last>Saint-Dizier</last></author>
       <title><fixed-case>P</fixed-case>rep<fixed-case>N</fixed-case>et: a Multilingual Lexical Description of Prepositions</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/45_pdf.pdf</url>
       <abstract>In this paper, we present the results of a preliminary investigation that aims at constructing a repository of preposition syntactic and semantic behaviors. A preliminary frame-based format for representing their prototypical behavior is then proposed together with related inferential patterns that describe functional or paradigmatic relations between preposition senses.</abstract>
       <bibkey>saint-dizier-2006-prepnet</bibkey>
     </paper>
     <paper id="20">
-      <author><first>Marie-Claude</first><last>L’Homme</last></author>
-      <author><first>Hee Sook</first><last>Bae</last></author>
+      <author id="marie-claude-lhomme"><first>Marie-Claude</first><last>L’Homme</last></author>
+      <author id="hee-sook-bae"><first>Hee Sook</first><last>Bae</last></author>
       <title>A Methodology for Developing Multilingual Resources for Terminology</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/48_pdf.pdf</url>
       <abstract>This paper presents a project that aims at building lexical resources for terminology. By lexical resources, we mean dictionaries that provide detailed lexico-semantic information on terms, i.e. lexical units the sense of which can be related to a special subject field. In terminology, there is a lack of such resources. The specific dictionaries we are currently developing describe basic French and Korean terms that belong to the fields of computer science and the Internet (e.g. computer, configure, user-friendly, Web, browse, spam). This paper presents the structure of the French and Korean articles: each component is examined and illustrated with examples. We then describe the corpus-based methodology and the different computer applications used for developing the articles. Our methodology comprises five steps: design of the corpora, selection of terms; sense distinction; definition of actantial structures and listing of semantic relations. Details on the current state of each database are also given.</abstract>
@@ -193,7 +193,7 @@
     <paper id="21">
       <author><first>Stephan</first><last>Raidt</last></author>
       <author><first>Gérard</first><last>Bailly</last></author>
-      <author><first>Frederic</first><last>Elisei</last></author>
+      <author id="frederic-elisei"><first>Frederic</first><last>Elisei</last></author>
       <title>Does a Virtual Talking Face Generate Proper Multimodal Cues to Draw User’s Attention to Points of Interest?</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/49_pdf.pdf</url>
       <abstract>We present a series of experiments investigating face-to-face interaction between an Embodied Conversational Agent (ECA) and a human interlocutor. The ECA is embodied by a video realistic talking head with independent head and eye movements. For a beneficial application in face-to-face interaction, the ECA should be able to derive meaning from communicational gestures of a human interlocutor, and likewise to reproduce such gestures. Conveying its capability to interpret human behaviour, the system encourages the interlocutor to show appropriate natural activity. Therefore it is important that the ECA knows how to display what would correspond to mental states in humans. This allows to interpret the machine processes of the system in terms of human expressiveness and to assign them a corresponding meaning. Thus the system may maintain an interaction based on human patterns. During a first experiment we investigated the ability of our talking head to direct user attention with facial deictic cues (Raidt, Bailly et al. 2005). Users interact with the ECA during a simple card game offering different levels of help and guidance through facial deictic cues. We analyzed the users performance and their perception of the quality of assistance given by the ECA. The experiment showed that users profit from its presence and its facial deictic cues. In the continuative series of experiments presented here, we investigated the effect of an enhancement of the multimodality of the deictic gestures by adding a spoken instruction.</abstract>
@@ -207,7 +207,7 @@
       <bibkey>wong-2006-skeleton</bibkey>
     </paper>
     <paper id="23">
-      <author><first>Constantin</first><last>Orăsan</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orăsan</last></author>
       <author><first>Laura</first><last>Hasler</last></author>
       <title>Computer-aided summarisation – what the user really wants</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/52_pdf.pdf</url>
@@ -230,7 +230,7 @@
       <bibkey>kageura-kikui-2006-self</bibkey>
     </paper>
     <paper id="26">
-      <author><first>Hiromi itoh</first><last>Ozaku</last></author>
+      <author id="hiromi-itoh-ozaku"><first>Hiromi itoh</first><last>Ozaku</last></author>
       <author><first>Akinori</first><last>Abe</last></author>
       <author><first>Kaoru</first><last>Sagara</last></author>
       <author><first>Noriaki</first><last>Kuwahara</last></author>
@@ -251,8 +251,8 @@
       <bibkey>santos-etal-2006-harem</bibkey>
     </paper>
     <paper id="28">
-      <author><first>Rafael</first><last>Banchs</last></author>
-      <author><first>Antonio</first><last>Bonafonte</last></author>
+      <author id="rafael-e-banchs"><first>Rafael</first><last>Banchs</last></author>
+      <author id="antonio-bonafonte"><first>Antonio</first><last>Bonafonte</last></author>
       <author><first>Javier</first><last>Pérez</last></author>
       <title>Acceptance Testing of a Spoken Language Translation System</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/60_pdf.pdf</url>
@@ -261,9 +261,9 @@
     </paper>
     <paper id="29">
       <author><first>Valentin</first><last>Tablan</last></author>
-      <author><first>Wim</first><last>Peters</last></author>
+      <author id="wim-peters"><first>Wim</first><last>Peters</last></author>
       <author><first>Diana</first><last>Maynard</last></author>
-      <author><first>Hamish</first><last>Cunningham</last></author>
+      <author id="hamish-cunningham"><first>Hamish</first><last>Cunningham</last></author>
       <title>Creating Tools for Morphological Analysis of <fixed-case>S</fixed-case>umerian</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/64_pdf.pdf</url>
       <abstract>Sumerian is a long-extinct language documented throughout the ancient MiddleEast, arguably the first language for which we have written evidence, and is a language isolate (i.e. no related languages have so far been identified). The Electronic Text Corpus of Sumerian Literature (ETCSL), based at theUniversity of Oxford, aims to make accessible on the web over 350 literary workscomposed during the late third and early second millennia BCE. The transliterations and translations can be searched, browsed and read online using the tools of the website. In this paper we describe the creation of linguistic analysis and corpus search tools for Sumerian, as part of the development of the ETCSL. This is designed to enable Sumerian scholars, students and interested laymen to analyse the texts online and electronically, and to further knowledge about the language.</abstract>
@@ -295,18 +295,18 @@
     </paper>
     <paper id="33">
       <author><first>Reinhard</first><last>Rapp</last></author>
-      <author><first>Carlos Martin</first><last>Vide</last></author>
+      <author id="carlos-martin-vide"><first>Carlos Martin</first><last>Vide</last></author>
       <title>Example-Based Machine Translation Using a Dictionary of Word Pairs</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/74_pdf.pdf</url>
       <abstract>Machine translation systems, whether rule-based, example-based, or statistical, all rely on dictionaries that are in essence mappings between individual words of the source and the target language. Criteria for the disambiguation of ambiguous words and for differences in word order between the two languages are not accounted for in the lexicon. Instead, these important issues are dealt with in the translation engines. Because the engines tend to be compact and (even with data-oriented approaches) do not fully reflect the complexity of the problem, this approach generally does not account for the more fine grained facets of word behavior. This leads to wrong generalizations and, as a consequence, translation quality tends to be poor. In this paper we suggest to approach this problem by using a new type of lexicon that is not based on individual words but on pairs of words. For each pair of consecutive words in the source language the lexicon lists the possible translations in the target language together with information on order and distance of the target words. The process of machine translation is then seen as a combinatorial problem: For all word pairs in a source sentence all possible translations are retrieved from the lexicon and then those translations are discarded that lead to contradictions when constructing the target sentence. This process implicitly leads to word sense disambiguation and to language specific reordering of words.</abstract>
       <bibkey>rapp-vide-2006-example</bibkey>
     </paper>
     <paper id="34">
-      <author><first>Widad Mustafa</first><last>El Hadi</last></author>
-      <author><first>Ismail</first><last>Timimi</last></author>
-      <author><first>Marianne</first><last>Dabbadie</last></author>
+      <author id="widad-mustafa-el-hadi"><first>Widad Mustafa</first><last>El Hadi</last></author>
+      <author id="ismail-timimi"><first>Ismail</first><last>Timimi</last></author>
+      <author id="marianne-dabbadie"><first>Marianne</first><last>Dabbadie</last></author>
       <author><first>Khalid</first><last>Choukri</last></author>
-      <author><first>Olivier</first><last>Hamon</last></author>
+      <author id="olivier-hamon"><first>Olivier</first><last>Hamon</last></author>
       <author><first>Yun-Chuang</first><last>Chiao</last></author>
       <title>Terminological Resources Acquisition Tools: Toward a User-oriented Evaluation Model</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/75_pdf.pdf</url>
@@ -314,9 +314,9 @@
       <bibkey>el-hadi-etal-2006-terminological</bibkey>
     </paper>
     <paper id="35">
-      <author><first>Núria</first><last>Bel</last></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last></author>
       <author><first>Sergio</first><last>Espeja</last></author>
-      <author><first>Montserrat</first><last>Marimon</last></author>
+      <author id="montserrat-marimon"><first>Montserrat</first><last>Marimon</last></author>
       <title>New tools for the encoding of lexical data extracted from corpus</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/76_pdf.pdf</url>
       <abstract>This paper describes the methodology and tools that are the basis of our platform AAILE.4 AAILE has been built for supplying those working in the construction of lexicons for syntactic parsing with more efficient ways of visualizing and analyzing data extracted from corpus. The platform offers support using techniques such as similarity measures, clustering and pattern classification.</abstract>
@@ -325,7 +325,7 @@
     <paper id="36">
       <author><first>Daniela</first><last>Braga</last></author>
       <author><first>Luís</first><last>Coelho</last></author>
-      <author><first>João P.</first><last>Teixeira</last></author>
+      <author id="joao-paulo-teixeira"><first>João P.</first><last>Teixeira</last></author>
       <author><first>Diamantino</first><last>Freitas</last></author>
       <title><fixed-case>P</fixed-case>rogmatica: A Prosodic Database for <fixed-case>E</fixed-case>uropean <fixed-case>P</fixed-case>ortuguese</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/77_pdf.pdf</url>
@@ -333,8 +333,8 @@
       <bibkey>braga-etal-2006-progmatica</bibkey>
     </paper>
     <paper id="37">
-      <author><first>Jesús</first><last>Giménez</last></author>
-      <author><first>Enrique</first><last>Amigó</last></author>
+      <author id="jesus-gimenez"><first>Jesús</first><last>Giménez</last></author>
+      <author id="enrique-amigo"><first>Enrique</first><last>Amigó</last></author>
       <title><fixed-case>I</fixed-case>qmt: A Framework for Automatic Machine Translation Evaluation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/79_pdf.pdf</url>
       <abstract>We present the IQMT Framework for Machine Translation Evaluation Inside QARLA. IQMT offers a common workbench in which existing evaluation metrics can be utilized and combined. It provides i) a measure to evaluate the quality of any set of similarity metrics (KING), ii) a measure to evaluate the quality of a translation using a set of similarity metrics (QUEEN), and iii) a measure to evaluate the reliability of a test set (JACK). The first release of the IQMT package is freely available for public use. Current version includes a set of 26 metrics from 7 different well-known metric families, and allows the user to supply its own metrics. For future releases, we are working on the design of new metrics that are able to capture linguistic aspects of translation beyond lexical ones.</abstract>
@@ -342,18 +342,18 @@
     </paper>
     <paper id="38">
       <author><first>Tommaso</first><last>Caselli</last></author>
-      <author><first>Irina</first><last>Prodanof</last></author>
+      <author id="irina-prodanof"><first>Irina</first><last>Prodanof</last></author>
       <title>Annotating Bridging Anaphors in <fixed-case>I</fixed-case>talian: in Search of Reliability</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/80_pdf.pdf</url>
       <abstract>The aim of this work is the presentation and preliminary evaluation of an XML annotation scheme for marking bridging anaphors of the form definite article + N in Italian. The scheme is based on a corpus-study. The data we collected from the evaluation experiment seem to support the reliability of the scheme, although some problems still remain open.</abstract>
       <bibkey>caselli-prodanof-2006-annotating</bibkey>
     </paper>
     <paper id="39">
-      <author><first>Henk</first><last>van den Heuvel</last></author>
+      <author id="henk-van-den-heuvel"><first>Henk</first><last>van den Heuvel</last></author>
       <author><first>Khalid</first><last>Choukri</last></author>
       <author><first>Christian</first><last>Gollan</last></author>
-      <author><first>Asuncion</first><last>Moreno</last></author>
-      <author><first>Djamel</first><last>Mostefa</last></author>
+      <author id="asuncion-moreno"><first>Asuncion</first><last>Moreno</last></author>
+      <author id="djamel-mostefa"><first>Djamel</first><last>Mostefa</last></author>
       <title><fixed-case>TC</fixed-case>-<fixed-case>STAR</fixed-case>: New language resources for <fixed-case>ASR</fixed-case> and <fixed-case>SLT</fixed-case> purposes</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/81_pdf.pdf</url>
       <abstract>In TC-STAR a variety of Language Resources (LR) is being produced. In this contribution we address the resources that have been created for Automatic Speech Recrognition and Spoken Language Translation. As yet, these are 14 LR in total: two training SLR for ASR (English and Spanish), three development LR and three evaluation LR for ASR (English, Spanish, Mandarin), and three development LR and three evaluation LR for SLT (English-Spanish, Spanish-English, Mandarin-English). In this paper we describe the properties, validation, and availability of these resources.</abstract>
@@ -361,7 +361,7 @@
     </paper>
     <paper id="40">
       <author><first>Yunqing</first><last>Xia</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <author><first>Wenjie</first><last>Li</last></author>
       <title>Constructing A <fixed-case>C</fixed-case>hinese Chat Language Corpus with A Two-Stage Incremental Annotation Approach</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/86_pdf.pdf</url>
@@ -378,7 +378,7 @@
     <paper id="42">
       <author><first>Yasunori</first><last>Ohishi</last></author>
       <author><first>Katunobu</first><last>Itou</last></author>
-      <author><first>Kazuya</first><last>Takeda</last></author>
+      <author id="kazuya-takeda"><first>Kazuya</first><last>Takeda</last></author>
       <author><first>Atsushi</first><last>Fujii</last></author>
       <title>Statistical Analysis for Thesaurus Construction using an Encyclopedic Corpus</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/88_pdf.pdf</url>
@@ -387,7 +387,7 @@
     </paper>
     <paper id="43">
       <author><first>Catherine</first><last>Havasi</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <author><first>Marc</first><last>Verhagen</last></author>
       <title><fixed-case>BULB</fixed-case>: A Unified Lexical Browser</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/89_pdf.pdf</url>
@@ -395,8 +395,8 @@
       <bibkey>havasi-etal-2006-bulb</bibkey>
     </paper>
     <paper id="44">
-      <author><first>Ulrich</first><last>Schäfer</last></author>
-      <author><first>Daniel</first><last>Beck</last></author>
+      <author id="ulrich-schafer"><first>Ulrich</first><last>Schäfer</last></author>
+      <author id="daniel-beck"><first>Daniel</first><last>Beck</last></author>
       <title>Automatic Testing and Evaluation of Multilingual Language Technology Resources and Components</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/91_pdf.pdf</url>
       <abstract>We describe SProUTomat, a tool for daily building, testing and evaluating a complex general-purpose multilingual natural language text processor including its linguistic resources (lingware). Software and lingware are developed, maintained and extended in a distributed manner by multiple authors and projects, i.e., the source code stored in a version control system is modified frequently. The modular design of different, dedicated lingware modules like tokenizers, morphology, gazetteers, type hierarchy, rule formalism on the one hand increases flexibility and re-usability, but on the other hand may lead to fragility with respect to changes. Therefore, frequent testing as known from software engineering is necessary also for lingware to warrant a high level of quality and overall stability of the system. We describe the build, testing and evaluation methods for LT software and lingware we have developed on the basis of the open source, platform-independent Apache Ant tool and the configurable evaluation tool JTaCo.</abstract>
@@ -411,7 +411,7 @@
     </paper>
     <paper id="46">
       <author><first>Paul</first><last>Buitelaar</last></author>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <author><first>Stefania</first><last>Racioppa</last></author>
       <author><first>Melanie</first><last>Siegel</last></author>
       <title>Ontology-based Information Extraction with <fixed-case>SOBA</fixed-case></title>
@@ -486,7 +486,7 @@
       <bibkey>ahmad-etal-2006-visual</bibkey>
     </paper>
     <paper id="55">
-      <author><first>Mathieu</first><last>Mangeot</last></author>
+      <author id="mathieu-mangeot"><first>Mathieu</first><last>Mangeot</last></author>
       <author><first>Antoine</first><last>Chalvin</last></author>
       <title>Dictionary Building with the Jibiki Platform: the <fixed-case>GDEF</fixed-case> case</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/105_pdf.pdf</url>
@@ -496,8 +496,8 @@
     <paper id="56">
       <author><first>Tomohiro</first><last>Ohno</last></author>
       <author><first>Shigeki</first><last>Matsubara</last></author>
-      <author><first>Hideki</first><last>Kashioka</last></author>
-      <author><first>Naoto</first><last>Kato</last></author>
+      <author id="hideki-kashioka"><first>Hideki</first><last>Kashioka</last></author>
+      <author id="naoto-kato"><first>Naoto</first><last>Kato</last></author>
       <author><first>Yasuyoshi</first><last>Inagaki</last></author>
       <title>A Syntactically Annotated Corpus of <fixed-case>J</fixed-case>apanese Spoken Monologue</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/106_pdf.pdf</url>
@@ -505,9 +505,9 @@
       <bibkey>ohno-etal-2006-syntactically</bibkey>
     </paper>
     <paper id="57">
-      <author><first>Jerneja Žganec</first><last>Gros</last></author>
+      <author id="jerneja-gros"><first>Jerneja Žganec</first><last>Gros</last></author>
       <author><first>Varja</first><last>Cvetko-Orešnik</last></author>
-      <author><first>Primož</first><last>Jakopin</last></author>
+      <author id="primoz-jakopin"><first>Primož</first><last>Jakopin</last></author>
       <author><first>Aleš</first><last>Mihelič</last></author>
       <title><fixed-case>SI</fixed-case>-<fixed-case>PRON</fixed-case>: A Pronunciation Lexicon for <fixed-case>S</fixed-case>lovenian</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/111_pdf.pdf</url>
@@ -531,18 +531,18 @@
     </paper>
     <paper id="60">
       <author><first>Brian</first><last>Roark</last></author>
-      <author><first>Mary</first><last>Harper</last></author>
+      <author id="mary-harper"><first>Mary</first><last>Harper</last></author>
       <author><first>Eugene</first><last>Charniak</last></author>
-      <author><first>Bonnie</first><last>Dorr</last></author>
+      <author id="bonnie-dorr"><first>Bonnie</first><last>Dorr</last></author>
       <author><first>Mark</first><last>Johnson</last></author>
-      <author><first>Jeremy</first><last>Kahn</last></author>
+      <author id="jeremy-g-kahn"><first>Jeremy</first><last>Kahn</last></author>
       <author id="yang-liu-icsi"><first>Yang</first><last>Liu</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
-      <author><first>John</first><last>Hale</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
+      <author id="john-hale"><first>John</first><last>Hale</last></author>
       <author><first>Anna</first><last>Krasnyanskaya</last></author>
       <author><first>Matthew</first><last>Lease</last></author>
       <author><first>Izhak</first><last>Shafran</last></author>
-      <author><first>Matthew</first><last>Snover</last></author>
+      <author id="matthew-snover"><first>Matthew</first><last>Snover</last></author>
       <author><first>Robin</first><last>Stewart</last></author>
       <author><first>Lisa</first><last>Yung</last></author>
       <title><fixed-case>SP</fixed-case>arseval: Evaluation Metrics for Parsing Speech</title>
@@ -584,8 +584,8 @@
       <bibkey>tohyama-matsubara-2006-collection</bibkey>
     </paper>
     <paper id="65">
-      <author><first>Shuichi</first><last>Itahashi</last></author>
-      <author><first>Chiu-yu</first><last>Tseng</last></author>
+      <author id="shuichi-itahashi"><first>Shuichi</first><last>Itahashi</last></author>
+      <author id="chiu-yu-tseng"><first>Chiu-yu</first><last>Tseng</last></author>
       <author><first>Satoshi</first><last>Nakamura</last></author>
       <title>Oriental <fixed-case>COCOSDA</fixed-case>: Past, Present and Future</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/127_pdf.pdf</url>
@@ -625,10 +625,10 @@
     </paper>
     <paper id="68">
       <author><first>Sašo</first><last>Džeroski</last></author>
-      <author><first>Tomaž</first><last>Erjavec</last></author>
+      <author id="tomaz-erjavec"><first>Tomaž</first><last>Erjavec</last></author>
       <author><first>Nina</first><last>Ledinek</last></author>
       <author><first>Petr</first><last>Pajas</last></author>
-      <author><first>Zdenek</first><last>Žabokrtsky</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdenek</first><last>Žabokrtsky</last></author>
       <author><first>Andreja</first><last>Žele</last></author>
       <title>Towards a <fixed-case>S</fixed-case>lovene Dependency Treebank</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/133_pdf.pdf</url>
@@ -645,7 +645,7 @@
       <bibkey>kruengkrai-etal-2006-conditional</bibkey>
     </paper>
     <paper id="70">
-      <author><first>Yoshihide</first><last>Kato</last></author>
+      <author id="yoshihide-kato"><first>Yoshihide</first><last>Kato</last></author>
       <author><first>Shigeki</first><last>Matsubara</last></author>
       <author><first>Yasuyoshi</first><last>Inagaki</last></author>
       <title>A Corpus Search System Utilizing Lexical Dependency Structure</title>
@@ -655,35 +655,35 @@
     </paper>
     <paper id="71">
       <author><first>Peter</first><last>Berck</last></author>
-      <author><first>Albert</first><last>Russel</last></author>
+      <author id="albert-russel"><first>Albert</first><last>Russel</last></author>
       <title><fixed-case>ANNEX</fixed-case> - a web-based Framework for Exploiting Annotated Media Resources</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/139_pdf.pdf</url>
       <abstract>Manual annotation of various media streams, time series data and also text sequences is still a very time consuming work that has to be carried out in many areas of linguistics and beyond. Based on many theoretical discussions and practical experiences professional tools have been deployed such as ELAN that support the researcher in his/her work. Most of these annotation tools operate on local computers. However, since more and more language resources are stored in web-accessible archives, researchers want to take profit from the new possibilities. ANNEX was developed to fill this gap, since it allows web-based analysis of complex annotated media streams, i.e., the users dont have to download resources and dont have to download and install programs. By simply using a normal web-browser they can start their linguistic work. Yet, due to the architecture of the Internet, ANNEX does not offer the options to create annotations, but this feature will come. However, users have to be aware of the fact that media streaming does not offer that high accuracy as on local computers.</abstract>
       <bibkey>berck-russel-2006-annex</bibkey>
     </paper>
     <paper id="72">
-      <author><first>Tomaž</first><last>Erjavec</last></author>
+      <author id="tomaz-erjavec"><first>Tomaž</first><last>Erjavec</last></author>
       <title>The <fixed-case>E</fixed-case>nglish-<fixed-case>S</fixed-case>lovene <fixed-case>ACQUIS</fixed-case> corpus</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/140_pdf.pdf</url>
       <abstract>The paper presents the SVEZ-IJS corpus, a large parallel annotated English-Slovene corpus containing translated legal texts of the European Union, the ACQUIS Communautaire. The corpus contains approx. 2 x 5 million words and was compiled from the translation memory obtained from the Translation Unit of the Slovene Government Office for European Affairs. The corpus is encoded in XML, accordingto the Text Encoding Initiative Guidelines TEI P4, where each translation memory unit contains useful metadata and the two aligned segments (sentences). Both the Slovene and English text islinguistically annotated at the word-level, by context disambiguatedlemmas and morphosyntactic descriptions, which follow the MULTEXTguidelines. The complete corpus is freely available for research, either via an on-line concordancer, or for downloading from the corpushome page at <url>http://nl.ijs.si/svez/</url>.</abstract>
       <bibkey>erjavec-2006-english</bibkey>
     </paper>
     <paper id="73">
-      <author><first>Daan</first><last>Broeder</last></author>
+      <author id="daan-broeder"><first>Daan</first><last>Broeder</last></author>
       <author><first>Andreas</first><last>Claus</last></author>
       <author><first>Freddy</first><last>Offenga</last></author>
-      <author><first>Romuald</first><last>Skiba</last></author>
+      <author id="romuald-skiba"><first>Romuald</first><last>Skiba</last></author>
       <author><first>Paul</first><last>Trilsbeek</last></author>
-      <author><first>Peter</first><last>Wittenburg</last></author>
+      <author id="peter-wittenburg"><first>Peter</first><last>Wittenburg</last></author>
       <title><fixed-case>LAMUS</fixed-case>: the Language Archive Management and Upload System</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/141_pdf.pdf</url>
       <abstract>Language Archiving, Resource Management LAMUS is a web-based service that allows researchers to deposit their language resources into a language resources archive. It was developed at the MPI for Psycholinguistics for stricter control of the archive coherence and consistency and allowing wider use of the archiving facilities without increasing the workload for archive and corpus managers. LAMUS is based on the use of IMDI metadata standard for language resources and offers metadata search and browsing over the archive.</abstract>
       <bibkey>broeder-etal-2006-lamus</bibkey>
     </paper>
     <paper id="74">
-      <author><first>Daan</first><last>Broeder</last></author>
+      <author id="daan-broeder"><first>Daan</first><last>Broeder</last></author>
       <author><first>Freddy</first><last>Offenga</last></author>
-      <author><first>Peter</first><last>Wittenburg</last></author>
+      <author id="peter-wittenburg"><first>Peter</first><last>Wittenburg</last></author>
       <author><first>Peter</first><last>van der Kamp</last></author>
       <author><first>David</first><last>Nathan</last></author>
       <author><first>Sven</first><last>Strömqvist</last></author>
@@ -695,19 +695,19 @@
     <paper id="75">
       <author><first>Marc</first><last>Kemps-Snijders</last></author>
       <author><first>Julien</first><last>Ducret</last></author>
-      <author><first>Laurent</first><last>Romary</last></author>
-      <author><first>Peter</first><last>Wittenburg</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
+      <author id="peter-wittenburg"><first>Peter</first><last>Wittenburg</last></author>
       <title>An <fixed-case>API</fixed-case> for accessing the Data Category Registry</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/146_pdf.pdf</url>
       <abstract>Central Ontologies are increasingly important to manage interoperability between different types of language resources. This was the reason for ISO to set up a new committee ISO TC37/SC4 taking care of language resource management issues. Central to the work of this committee is the definition of a framework for a central registry of data categories that are important in the domain of language resources. This paper describes an application programming interface that was designed to request services from this data category registry. The DCR is operational and the described API has already been tested from a lexicon application.</abstract>
       <bibkey>kemps-snijders-etal-2006-api</bibkey>
     </paper>
     <paper id="76">
-      <author><first>Peter</first><last>Wittenburg</last></author>
-      <author><first>Daan</first><last>Broeder</last></author>
-      <author><first>Wolfgang</first><last>Klein</last></author>
+      <author id="peter-wittenburg"><first>Peter</first><last>Wittenburg</last></author>
+      <author id="daan-broeder"><first>Daan</first><last>Broeder</last></author>
+      <author id="wolfgang-klein"><first>Wolfgang</first><last>Klein</last></author>
       <author><first>Stephen</first><last>Levinson</last></author>
-      <author><first>Laurent</first><last>Romary</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
       <title>Foundations of Modern Language Resource Archives</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/147_pdf.pdf</url>
       <abstract>A number of serious reasons will convince an increasing amount of researchers to store their relevant material in centers which we will call "language resource archives". They combine the duty of taking care of long-term preservation as well as the task to give access to their material to different user groups. Access here is meant in the sense that an active interaction with the data will be made possible to support the integration of new data, new versions or commentaries of all sorts. Modern Language Resource Archives will have to adhere to a number of basic principles to fulfill all requirements and they will have to be involved in federations to create joint language resource domains making it even simpler for the researchers to access the data. This paper makes an attempt to formulate the essential pillars language resource archives have to adhere to.</abstract>
@@ -715,10 +715,10 @@
     </paper>
     <paper id="77">
       <author><first>Freddy</first><last>Offenga</last></author>
-      <author><first>Daan</first><last>Broeder</last></author>
-      <author><first>Peter</first><last>Wittenburg</last></author>
+      <author id="daan-broeder"><first>Daan</first><last>Broeder</last></author>
+      <author id="peter-wittenburg"><first>Peter</first><last>Wittenburg</last></author>
       <author><first>Julien</first><last>Ducret</last></author>
-      <author><first>Laurent</first><last>Romary</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
       <title>Metadata Profile in the <fixed-case>ISO</fixed-case> Data Category Registry</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/148_pdf.pdf</url>
       <abstract>Metadata descriptions of language resources become an increasing necessity since the shear amount of language resources is increasing rapidly and especially since we are now creating infrastuctures to access these resources via the web through integrated domains of language resource archives. Yet, the metadata frameworks offered for the domain of language resources (IMDI and OLAC), although mature, are not as widely accepted as necessary. The lack of confidence in the stability and persistence of the concepts and formats introduced by these metadata sets seems to be one argument for people to not invest the time needed for metadata creation. The introduction of these concepts into an ISO standardization process may convince contributors to make use of the terminology. The availability of the ISO Data Category Registry that includes a metadata profile will also offer the opportunity for researchers to construct their own metadata set tailored to the needs of the project at hand, but nevertheless supporting interoperability.</abstract>
@@ -727,14 +727,14 @@
     <paper id="78">
       <author><first>Marc</first><last>Kemps-Snijders</last></author>
       <author><first>Mark-Jan</first><last>Nederhof</last></author>
-      <author><first>Peter</first><last>Wittenburg</last></author>
+      <author id="peter-wittenburg"><first>Peter</first><last>Wittenburg</last></author>
       <title><fixed-case>LEXUS</fixed-case>, a web-based tool for manipulating lexical resources lexicon</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/149_pdf.pdf</url>
       <abstract>LEXUS provides a flexible framework for the maintaining lexical structure and content. It is the first implementation of the Lexical Markup Framework model currently being developed at ISO TC37/SC4. Amongst its capabilities are the possibility to create lexicon structures, manipulate content and use of typed relations. Integration of well established Data Category Registries is supported to further promote interoperability by allowing access to well established linguistic concepts. Advanced linguistic functionality is offered to assist users in cross lexica operations such as search and comparison and merging of lexica. To enable use within various user groups the look and feel of each lexicon may be customized. In the near future more functionality will be added including integration with other tools accessing lexical content.</abstract>
       <bibkey>kemps-snijders-etal-2006-lexus</bibkey>
     </paper>
     <paper id="79">
-      <author><first>Tomaž</first><last>Erjavec</last></author>
+      <author id="tomaz-erjavec"><first>Tomaž</first><last>Erjavec</last></author>
       <author><first>Darja</first><last>Fišer</last></author>
       <title>Building <fixed-case>S</fixed-case>lovene <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/150_pdf.pdf</url>
@@ -758,10 +758,10 @@
       <bibkey>johansson-nugues-2006-construction</bibkey>
     </paper>
     <paper id="82">
-      <author><first>Peter</first><last>Wittenburg</last></author>
-      <author><first>Hennie</first><last>Brugman</last></author>
-      <author><first>Albert</first><last>Russel</last></author>
-      <author><first>Alex</first><last>Klassmann</last></author>
+      <author id="peter-wittenburg"><first>Peter</first><last>Wittenburg</last></author>
+      <author id="hennie-brugman"><first>Hennie</first><last>Brugman</last></author>
+      <author id="albert-russel"><first>Albert</first><last>Russel</last></author>
+      <author id="alex-klassmann"><first>Alex</first><last>Klassmann</last></author>
       <author><first>Han</first><last>Sloetjes</last></author>
       <title><fixed-case>ELAN</fixed-case>: a Professional Framework for Multimodality Research</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/153_pdf.pdf</url>
@@ -772,8 +772,8 @@
       <author><first>Peter</first><last>Berck</last></author>
       <author><first>Hans-Jörg</first><last>Bibiko</last></author>
       <author><first>Marc</first><last>Kemps-Snijders</last></author>
-      <author><first>Albert</first><last>Russel</last></author>
-      <author><first>Peter</first><last>Wittenburg</last></author>
+      <author id="albert-russel"><first>Albert</first><last>Russel</last></author>
+      <author id="peter-wittenburg"><first>Peter</first><last>Wittenburg</last></author>
       <title>Ontology-based Language Archive Utilization</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/154_pdf.pdf</url>
       <abstract>At the MPI for Psycholinguistics a large archive with language resources has been created with contributions from many different individual researchers and research projects. All of these resources, in particular annotated media streams and multimedia lexica, are accessible via the web and can be utilized with the help of web-based utilization frameworks. Therefore, the archive lends itself to motivate users to operate across the boundaries of single corpora and to support cross-language work. This, however, can only be done when the problems of interoperability, in particular at the level of linguistic encoding, can be solved in an efficient way. Two Max-Planck-Institutes are cooperating to build a framework that allows users to easily create their own practical ontologies and if wanted to relate their concepts to central ontologies.</abstract>
@@ -791,8 +791,8 @@
     <paper id="85">
       <author><first>Andrej</first><last>Žgank</last></author>
       <author><first>Darinka</first><last>Verdonik</last></author>
-      <author><first>Aleksandra Zögling</first><last>Markuš</last></author>
-      <author><first>Zdravko</first><last>Kačič</last></author>
+      <author id="aleksandra-zogling-markus"><first>Aleksandra Zögling</first><last>Markuš</last></author>
+      <author id="zdravko-kacic"><first>Zdravko</first><last>Kačič</last></author>
       <title><fixed-case>SINOD</fixed-case> - <fixed-case>S</fixed-case>lovenian non-native speech database</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/163_pdf.pdf</url>
       <abstract>This paper presents the SINOD database, which is the first Slovenian non-native speech database. It will be used to improve the performance of large vocabulary continuous speech recogniser for non-native speakers. The main quality impact is expected for acoustic models and recognisers vocabulary. The SINOD database is designed as supplement to the Slovenian BNSI Broadcast News database. The same BN recommendations were used for both databases. Two interviews with non-native Slovenian speakers were incorporated in the set. Both non-native speakers were female, whereas the journalist was Slovenian native male speaker. The transcription approach applied in the production phase is presented. Different statistics and analyses of database are given in the paper.</abstract>
@@ -808,7 +808,7 @@
       <bibkey>van-assem-etal-2006-conversion</bibkey>
     </paper>
     <paper id="87">
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <author><first>Ineke</first><last>Schuurman</last></author>
       <author><first>Vincent</first><last>Vandeghinste</last></author>
       <title>Transferring <fixed-case>P</fixed-case>o<fixed-case>S</fixed-case>-tagging and lemmatization tools from spoken to written <fixed-case>D</fixed-case>utch corpus development</title>
@@ -817,17 +817,17 @@
       <bibkey>van-den-bosch-etal-2006-transferring</bibkey>
     </paper>
     <paper id="88">
-      <author><first>Mark</first><last>Przybocki</last></author>
-      <author><first>Gregory</first><last>Sanders</last></author>
-      <author><first>Audrey</first><last>Le</last></author>
+      <author id="mark-przybocki"><first>Mark</first><last>Przybocki</last></author>
+      <author id="gregory-sanders"><first>Gregory</first><last>Sanders</last></author>
+      <author id="audrey-le"><first>Audrey</first><last>Le</last></author>
       <title>Edit Distance: A Metric for Machine Translation Evaluation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/168_pdf.pdf</url>
       <abstract>NIST has coordinated machine translation (MT) evaluations for several years using an automatic and repeatable evaluation measure. Under the Global Autonomous Language Exploitation (GALE) program, NIST is tasked with implementing an edit-distance-based evaluation of MT. Here edit distance is defined to be the number of modifications a human editor is required to make to a system translation such that the resulting edited translation contains the complete meaning in easily understandable English, as a single high-quality human reference translation. In preparation for this change in evaluation paradigm, NIST conducted two proof-of-concept exercises specifically designed to probe the data space, to answer questions related to editor agreement, and to establish protocols for the formal GALE evaluations. We report here our experimental design, the data used, and our findings for these exercises.</abstract>
       <bibkey>przybocki-etal-2006-edit</bibkey>
     </paper>
     <paper id="89">
-      <author><first>Niels Ole</first><last>Bernsen</last></author>
-      <author><first>Laila</first><last>Dybkjær</last></author>
+      <author id="niels-ole-bernsen"><first>Niels Ole</first><last>Bernsen</last></author>
+      <author id="laila-dybkjaer"><first>Laila</first><last>Dybkjær</last></author>
       <author><first>Svend</first><last>Kiilerich</last></author>
       <title><fixed-case>H</fixed-case>. <fixed-case>C</fixed-case>. Andersen Conversation Corpus</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/169_pdf.pdf</url>
@@ -842,10 +842,10 @@
       <bibkey>sahlgren-2006-towards</bibkey>
     </paper>
     <paper id="91">
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <author><first>Paula</first><last>Estrella</last></author>
       <author><first>Margaret</first><last>King</last></author>
-      <author><first>Nancy</first><last>Underwood</last></author>
+      <author id="nancy-underwood"><first>Nancy</first><last>Underwood</last></author>
       <title>A Model for Context-Based Evaluation of Language Processing Systems and its Application to Machine Translation Evaluation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/171_pdf.pdf</url>
       <abstract>In this paper, we propose a formal framework that takes into account the influence of the intended context of use of an NLP system on the procedure and the metrics used to evaluate the system. We introduce in particular the notion of a context-dependent quality model and explain how it can be adapted to a given context of use. More specifically, we define vector-space representations of contexts of use and of quality models, which are connected by a generic contextual quality model (GCQM). For each domain, experts in evaluation are needed to build a GCQM based on analytic knowledge and on previous evaluations, using the mechanism proposed here. The main inspiration source for this work is the FEMTI framework for the evaluation of machine translation, which implements partly the present model, and which is described briefly along with insights from other domains.</abstract>
@@ -853,7 +853,7 @@
     </paper>
     <paper id="92">
       <author><first>Martin</first><last>Forst</last></author>
-      <author><first>Ronald M.</first><last>Kaplan</last></author>
+      <author id="ronald-m-kaplan"><first>Ronald M.</first><last>Kaplan</last></author>
       <title>The importance of precise tokenizing for deep grammars</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/172_pdf.pdf</url>
       <abstract>We present a non-deterministic finite-state transducer that acts as a tokenizer and normalizer for free text that is input to a broad-coverage LFG of German. We compare the basic tokenizer used in an earlier version of the grammar and the more sophisticated tokenizer that we now use. The revised tokenizer increases the coverage of the grammar in terms of full parses from 68.3% to 73.4% on sentences 8,001 through 10,000 of the TiGer Corpus.</abstract>
@@ -877,7 +877,7 @@
     </paper>
     <paper id="95">
       <author><first>Bernt</first><last>Andrassy</last></author>
-      <author><first>Harald</first><last>Hoege</last></author>
+      <author id="harald-hoge"><first>Harald</first><last>Hoege</last></author>
       <title>Human and machine recognition as a function of <fixed-case>SNR</fixed-case></title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/179_pdf.pdf</url>
       <abstract>In-car automatic speech recognition (ASR) is usually evaluated behaviour for different levels of noise. Yet this is interesting for car manufacturers in order to predict system performances for different speeds and different car models and thus allow to design speech based applications in a better way. It therefore makes sense to split the single WER into SNR dependent WERs, where SNR stands for the signal to noise ratio, which is an appropriate measure for the noise level. In this paper a SNR measure based on the concept of the Articulation Index is developed, which allows the direct comparison with human recognition performance.</abstract>
@@ -919,7 +919,7 @@
     </paper>
     <paper id="100">
       <author><first>Agnes</first><last>Lisowska</last></author>
-      <author><first>Nancy L.</first><last>Underwood</last></author>
+      <author id="nancy-underwood"><first>Nancy L.</first><last>Underwood</last></author>
       <title><fixed-case>ROTE</fixed-case>: A Tool to Support Users in Defining the Relative Importance of Quality Characteristics</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/187_pdf.pdf</url>
       <abstract>This paper describes the Relative Ordering Tool for Evaluation (ROTE) which is designed to support the process of building a parameterised quality model for evaluation. It is a very simple tool which enables users to specify the relative importance of quality characteristics (and associated metrics) to reflect the users' particular requirements. The tool allows users to order any number of quality characteristics by comparing them in a pair-wise fashion. The tool was developed in the context of a collaborative project developing a text mining system. A full scale evaluation of the text mining system was designed and executed for three different users and the ROTE tool was successfully applied by those users during that process. The tool will be made available for general use by the evaluation community.</abstract>
@@ -928,16 +928,16 @@
     <paper id="101">
       <author><first>Serge</first><last>Sharoff</last></author>
       <author><first>Bogdan</first><last>Babych</last></author>
-      <author><first>Anthony</first><last>Hartley</last></author>
+      <author id="anthony-hartley"><first>Anthony</first><last>Hartley</last></author>
       <title>Using collocations from comparable corpora to find translation equivalents</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/190_pdf.pdf</url>
       <abstract>In this paper we present a tool for finding appropriate translation equivalents for words from the general lexicon using comparable corpora. For a phrase in the source language the tool suggests arange of possible expressions used in similar contexts in target language corpora. In the paper we discuss the method and present results of human evaluation of the performance of the tool.</abstract>
       <bibkey>sharoff-etal-2006-using</bibkey>
     </paper>
     <paper id="102">
-      <author><first>Folkert</first><last>de Vriend</last></author>
-      <author><first>Lou</first><last>Boves</last></author>
-      <author><first>Henk</first><last>van den Heuvel</last></author>
+      <author id="folkert-de-vriend"><first>Folkert</first><last>de Vriend</last></author>
+      <author id="lou-boves"><first>Lou</first><last>Boves</last></author>
+      <author id="henk-van-den-heuvel"><first>Henk</first><last>van den Heuvel</last></author>
       <author><first>Roeland</first><last>van Hout</last></author>
       <author><first>Joep</first><last>Kruijsen</last></author>
       <author><first>Jos</first><last>Swanenberg</last></author>
@@ -955,7 +955,7 @@
       <bibkey>ui-dhonnchadha-van-genabith-2006-part</bibkey>
     </paper>
     <paper id="104">
-      <author><first>Véronique</first><last>Moriceau</last></author>
+      <author id="veronique-moriceau"><first>Véronique</first><last>Moriceau</last></author>
       <title>Language Challenges for Data Fusion in Question-Answering</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/194_pdf.pdf</url>
       <abstract>Search engines on the web and most existing question-answering systems provide the user with a set of hyperlinks and/or web page extracts containing answer(s) to a question. These answers are often incoherent to a certain degree (equivalent, contradictory, etc.). It is then quite difficult for the user to know which answer is the correct one. In this paper, we present an approach which aims at providing synthetic numerical answers in a question-answering system. These answers are generated in natural language and, in a cooperative perspective, the aim is to explain to the user the variation of numerical values when several values, apparently incoherent, are extracted from the web as possible answers to a question. We present in particular how lexical resources are essential to answer extraction from the web, to the characterization of the variation mode associated with the type of information and to answer generation in natural language.</abstract>
@@ -969,17 +969,17 @@
       <bibkey>sarmento-2006-baco</bibkey>
     </paper>
     <paper id="106">
-      <author><first>Ulrich</first><last>Schäfer</last></author>
+      <author id="ulrich-schafer"><first>Ulrich</first><last>Schäfer</last></author>
       <title><fixed-case>O</fixed-case>nto<fixed-case>NER</fixed-case>d<fixed-case>IE</fixed-case> – Mapping and Linking Ontologies to Named Entity Recognition and Information Extraction Resources</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/196_pdf.pdf</url>
       <abstract>Semantic Web and NLP We describe an implemented offline procedure that maps OWL/RDF-encoded ontologies with large, dynamically maintained instance data to named entity recognition (NER) and information extraction (IE) engine resources, preserving hierarchical concept information and links back to the ontology concepts and instances. The main motivations are (i) improving NER/IE precision and recall in closed domains, (ii) exploiting linguistic knowledge (context, inflection, anaphora) for identifying ontology instances in texts more robustly, (iii) giving full access to ontology instances and concepts in natural language processing results, e.g. for subsequent ontology queries, navigation or inference, (iv) avoiding duplication of work in development and maintenance of similar resources in independent places, namely lingware and ontologies. We show an application in hybrid deep-shallow natural language processing that is e.g. used for question analysis in closed domains. Further applications could be automatic hyperlinking or other innovative semantic-web related applications.</abstract>
       <bibkey>schafer-2006-ontonerdie</bibkey>
     </paper>
     <paper id="107">
-      <author><first>Jonathan G.</first><last>Fiscus</last></author>
+      <author id="jonathan-g-fiscus"><first>Jonathan G.</first><last>Fiscus</last></author>
       <author><first>Jerome</first><last>Ajot</last></author>
       <author><first>Nicolas</first><last>Radde</last></author>
-      <author><first>Christophe</first><last>Laprun</last></author>
+      <author id="christophe-laprun"><first>Christophe</first><last>Laprun</last></author>
       <title>Multiple Dimension <fixed-case>L</fixed-case>evenshtein Edit Distance Calculations for Evaluating Automatic Speech Recognition Systems During Simultaneous Speech</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/197_pdf.pdf</url>
       <abstract>Since 1987, the National Institute of Standards and Technology has been providing evaluation infrastructure for the Automatic Speech Recognition (ASR), and more recently referred to as the Speech-To-Text (STT), research community. From the first efforts in the Resource Management domain to the present research, the NIST SCoring ToolKit (SCTK) has formed the tool set for system developers to make continued progress in many domains; Wall Street Journal, Conversational Telephone Speech (CTS), Broadcast News (BN), and Meetings (MTG) to name a few. For these domains, the community agreed to declared sections of simultaneous speech as not scoreable. While this had minor impact on most of these domains, the highly interactive nature of Meeting speech rendered a very large fraction of the test material not scoreable. This paper documents a multi-dimensional extension of the Dynamic Programming solution to Levenshtein Edit Distance calculations capable of evaluating STT systems during periods of overlapping, simultaneous speech.</abstract>
@@ -998,7 +998,7 @@
       <bibkey>atserias-etal-2006-freeling</bibkey>
     </paper>
     <paper id="109">
-      <author><first>Jiří</first><last>Semecký</last></author>
+      <author id="jiri-semecky"><first>Jiří</first><last>Semecký</last></author>
       <title>On Automatic Assignment of Verb Valency Frames in <fixed-case>C</fixed-case>zech</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/199_pdf.pdf</url>
       <abstract>Many recent NLP applications, including machine translation and information retrieval, could benefit from semantic analysis of language data on the sentence level. This paper presents a method for automatic disambiguation of verb valency frames on Czech data. For each verb occurrence, we extracted features describing its local context. We experimented with diverse types of features, including morphological, syntax-based, idiomatic, animacy and WordNet-based features. The main contribution of the paper lies in determining which ones are most useful for the disambiguation task. The considered features were classified using decision trees, rule-based learning and a Naïve Bayes classifier. We evaluated the methods using 10-fold cross-validation on VALEVAL, a manually annotated corpus of frame annotations containing 7,778 sentences. Syntax-based features have shown to be the most effective. When we used the full set of features, we achieved an accuracy of 80.55% against the baseline 67.87% obtained by assigning the most frequent frame.</abstract>
@@ -1012,7 +1012,7 @@
       <bibkey>medlock-2006-introduction</bibkey>
     </paper>
     <paper id="111">
-      <author><first>Hennie</first><last>Brugman</last></author>
+      <author id="hennie-brugman"><first>Hennie</first><last>Brugman</last></author>
       <author><first>Véronique</first><last>Malaisé</last></author>
       <author><first>Luit</first><last>Gazendam</last></author>
       <title>A Web Based General Thesaurus Browser to Support Indexing of Television and Radio Programs</title>
@@ -1023,7 +1023,7 @@
     <paper id="112">
       <author><first>Judit</first><last>Feliu</last></author>
       <author><first>Jorge</first><last>Vivaldi</last></author>
-      <author><first>M. Teresa</first><last>Cabré</last></author>
+      <author id="maria-teresa-cabre"><first>M. Teresa</first><last>Cabré</last></author>
       <title><fixed-case>SKELETON</fixed-case>: Specialised knowledge retrieval on the basis of terms and conceptual relations</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/203_pdf.pdf</url>
       <abstract>The main goal of this paper is to present a first approach to an automatic detection of conceptual relations between two terms in specialised written text. Previous experiments on the basis of the manual analysis lead the authors to implement an automatic query strategy combining the term candidates proposed by an extractor together with a list of verbal syntactic patterns used for the relations refinement. Next step on the research will be the integration of the results into the term extractor in order to attain more restrictive pieces of information directly reused for the ontology building task.</abstract>
@@ -1031,7 +1031,7 @@
     </paper>
     <paper id="113">
       <author><first>Irene</first><last>Cramer</last></author>
-      <author><first>Jochen L.</first><last>Leidner</last></author>
+      <author id="jochen-l-leidner"><first>Jochen L.</first><last>Leidner</last></author>
       <author><first>Dietrich</first><last>Klakow</last></author>
       <title>Building an Evaluation Corpus for <fixed-case>G</fixed-case>erman Question Answering by Harvesting <fixed-case>W</fixed-case>ikipedia</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/206_pdf.pdf</url>
@@ -1048,8 +1048,8 @@
     <paper id="115">
       <author><first>Benjamin</first><last>Waldron</last></author>
       <author><first>Ann</first><last>Copestake</last></author>
-      <author><first>Ulrich</first><last>Schäfer</last></author>
-      <author><first>Bernd</first><last>Kiefer</last></author>
+      <author id="ulrich-schafer"><first>Ulrich</first><last>Schäfer</last></author>
+      <author id="bernd-kiefer"><first>Bernd</first><last>Kiefer</last></author>
       <title>Preprocessing and Tokenisation Standards in <fixed-case>DELPH</fixed-case>-<fixed-case>IN</fixed-case> Tools</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/214_pdf.pdf</url>
       <abstract>We discuss preprocessing and tokenisation standards within DELPH-IN, a large scale open-source collaboration providing multiple independent multilingual shallow and deep processors. We discuss (i) a component-specific XML interface format which has been used for some time to interface preprocessor results to the PET parser, and (ii) our implementation of a more generic XML interface format influenced heavily by the (ISO working draft) Morphosyntactic Annotation Framework (MAF). Our generic format encapsulates the information which may be passed from the preprocessing stage to a parser: it uses standoff-annotation, a lattice for the representation of structural ambiguity, intra-annotation dependencies and allows for highly structured annotation content. This work builds on the existing Heart of Gold middleware system, and previous work on Robust Minimal Recursion Semantics (RMRS) as part of an inter-component interface. We give examples of usage with a number of the DELPH-IN processing components and deep grammars.</abstract>
@@ -1057,9 +1057,9 @@
     </paper>
     <paper id="116">
       <author><first>Juri</first><last>Apresjan</last></author>
-      <author><first>Igor</first><last>Boguslavsky</last></author>
+      <author id="igor-boguslavsky"><first>Igor</first><last>Boguslavsky</last></author>
       <author><first>Boris</first><last>Iomdin</last></author>
-      <author><first>Leonid</first><last>Iomdin</last></author>
+      <author id="leonid-iomdin"><first>Leonid</first><last>Iomdin</last></author>
       <author><first>Andrei</first><last>Sannikov</last></author>
       <author><first>Victor</first><last>Sizov</last></author>
       <title>A Syntactically and Semantically Tagged Corpus of <fixed-case>R</fixed-case>ussian: State of the Art and Prospects</title>
@@ -1110,9 +1110,9 @@
       <bibkey>nivre-etal-2006-talbanken05</bibkey>
     </paper>
     <paper id="122">
-      <author><first>Oana</first><last>Postolache</last></author>
+      <author id="oana-postolache"><first>Oana</first><last>Postolache</last></author>
       <author><first>Dan</first><last>Cristea</last></author>
-      <author><first>Constantin</first><last>Orasan</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orasan</last></author>
       <title>Transferring Coreference Chains through Word Alignment</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/224_pdf.pdf</url>
       <abstract>This paper investigates the problem of automatically annotating resources with NP coreference information using a parallel corpus, English-Romanian, in order to transfer, through word alignment, coreference chains from the English part to the Romanian part of the corpus. The results show that we can detect Romanian referential expressions and coreference chains with over 80% F-measure, thus using our method as a preprocessing step followed by manual correction as part of an annotation effort for creating a large Romanian corpus with coreference information is worthwhile.</abstract>
@@ -1142,10 +1142,10 @@
     </paper>
     <paper id="125">
       <author><first>Olena</first><last>Medelyan</last></author>
-      <author><first>Stefan</first><last>Schulz</last></author>
+      <author id="stefan-schulz"><first>Stefan</first><last>Schulz</last></author>
       <author><first>Jan</first><last>Paetzold</last></author>
       <author><first>Michael</first><last>Poprat</last></author>
-      <author><first>Kornél</first><last>Markó</last></author>
+      <author id="kornel-marko"><first>Kornél</first><last>Markó</last></author>
       <title>Language Specific and Topic Focused Web Crawling</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/228_pdf.pdf</url>
       <abstract>We describe an experiment on collecting large language and topic specific corpora automatically by using a focused Web crawler. Our crawler combines efficient crawling techniques with a common text classification tool. Given a sample corpus of medical documents, we automatically extract query phrases and then acquire seed URLs with a standard search engine. Starting from these seed URLs, the crawler builds a new large collection consisting only of documents that satisfy both the language and the topic model. The manual analysis of acquired English and German medicine corpora reveals the high accuracy of the crawler. However, there are significant differences between both languages.</abstract>
@@ -1159,7 +1159,7 @@
       <bibkey>reynaert-2006-corpus</bibkey>
     </paper>
     <paper id="127">
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <author><first>Maria</first><last>Georgescul</last></author>
       <title><fixed-case>TQB</fixed-case>: Accessing Multimodal Data Using a Transcript-based Query and Browsing Interface</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/233_pdf.pdf</url>
@@ -1168,8 +1168,8 @@
     </paper>
     <paper id="128">
       <author><first>Feng</first><last>Pan</last></author>
-      <author><first>Rutu</first><last>Mulkar</last></author>
-      <author><first>Jerry R.</first><last>Hobbs</last></author>
+      <author id="rutu-mulkar-mehta"><first>Rutu</first><last>Mulkar</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry R.</first><last>Hobbs</last></author>
       <title>An Annotated Corpus of Typical Durations of Events</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/234_pdf.pdf</url>
       <abstract>In this paper, we present our work on generating an annotated corpus for extracting information about the typical durations of events from texts. We include the annotation guidelines, the event classes we categorized, the way we use normal distributions to model vague and implicit temporal information, and how we evaluate inter-annotator agreement. The experimental results show that our guidelines are effective in improving the inter-annotator agreement.</abstract>
@@ -1177,7 +1177,7 @@
     </paper>
     <paper id="129">
       <author><first>Agam</first><last>Patel</last></author>
-      <author><first>Dragomir R.</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir R.</first><last>Radev</last></author>
       <title>Lexical similarity can distinguish between automatic and manual translations</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/235_pdf.pdf</url>
       <abstract>We consider the problem of identifying automatic translations from manual translations of the same sentence. Using two different similarity metrics (BLEU and Levenshtein edit distance), we found out that automatic translations are closer to each other than they are to manual translations. We also use phylogenetic trees to provide a visual representation of the distances between pairs of individual sentences in a set of translations. The differences in lexical distance are statistically significant, both for Chinese to English and for Arabic to English translations.</abstract>
@@ -1206,8 +1206,8 @@
     </paper>
     <paper id="132">
       <author><first>Doaa</first><last>Samy</last></author>
-      <author><first>Antonio Moreno</first><last>Sandoval</last></author>
-      <author><first>José M.</first><last>Guirao</last></author>
+      <author id="antonio-moreno-sandoval"><first>Antonio Moreno</first><last>Sandoval</last></author>
+      <author id="jose-m-guirao"><first>José M.</first><last>Guirao</last></author>
       <author><first>Enrique</first><last>Alfonseca</last></author>
       <title>Building a Parallel Multilingual Corpus (<fixed-case>A</fixed-case>rabic-<fixed-case>S</fixed-case>panish-<fixed-case>E</fixed-case>nglish)</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/238_pdf.pdf</url>
@@ -1215,16 +1215,16 @@
       <bibkey>samy-etal-2006-building</bibkey>
     </paper>
     <paper id="133">
-      <author><first>Donna K.</first><last>Byron</last></author>
-      <author><first>Eric</first><last>Fosler-Lussier</last></author>
+      <author id="donna-byron"><first>Donna K.</first><last>Byron</last></author>
+      <author id="eric-fosler-lussier"><first>Eric</first><last>Fosler-Lussier</last></author>
       <title>The <fixed-case>OSU</fixed-case> Quake 2004 corpus of two-party situated problem-solving dialogs</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/241_pdf.pdf</url>
       <abstract>This report describes the Ohio State University Quake 2004 corpus of English spontaneous task-oriented two-person situated dialog. The corpus was collected using a first-person display of an interior space (rooms, corridors, stairs) in which the partners collaborate on a treasure hunt task. The corpus contains exciting new features such as deictic and exophoric reference, language that is calibrated against the spatial arrangement of objects in the world, and partial-observability of the task world imposed by the perceptual limitations inherent in the physical arrangement of the world. The corpus differs from prior dialog collections which intentionally restricted the interacting subjects from sharing any perceptual context, and which allowed one subject (the direction-giver or system) to have total knowledge of the state of the task world. The corpus consists of audio/video recordings of each person's experience in the virtual world and orthographic transcriptions. The virtual world can also be used by other researchers who want to conduct additional studies using this stimulus.</abstract>
       <bibkey>byron-fosler-lussier-2006-osu</bibkey>
     </paper>
     <paper id="134">
-      <author><first>Md. Aminul</first><last>Islam</last></author>
-      <author><first>Diana</first><last>Inkpen</last></author>
+      <author id="aminul-islam"><first>Md. Aminul</first><last>Islam</last></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last></author>
       <title>Second Order Co-occurrence <fixed-case>PMI</fixed-case> for Determining the Semantic Similarity of Words</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/242_pdf.pdf</url>
       <abstract>This paper presents a new corpus-based method for calculating the semantic similarity of two target words. Our method, called Second Order Co-occurrencePMI (SOC-PMI), uses Pointwise Mutual Information to sort lists of important neighbor words of the two target words. Then we consider the words which are common in both lists and aggregate their PMI values (from the opposite list) to calculate the relative semantic similarity. Our method was empirically evaluated using Miller and Charlers (1991) 30 noun pair subset, Ruben-stein and Goodenoughs (1965) 65 noun pairs, 80 synonym test questions from the Test of English as a Foreign Language (TOEFL), and 50 synonym test questions from a collection of English as a Second Language (ESL) tests. Evaluation results show that our method outperforms several competing corpus-based methods.</abstract>
@@ -1248,7 +1248,7 @@
     </paper>
     <paper id="137">
       <author><first>Václav</first><last>Novák</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
       <title>Perspectives of Turning <fixed-case>P</fixed-case>rague Dependency Treebank into a Knowledge Base</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/245_pdf.pdf</url>
       <abstract>Recently, the Prague Dependency Treebank 2.0 (PDT 2.0) has emerged as the largest text corpora annotated on the level of tectogrammatical representation (linguistic meaning) described in Sgall et al. (2004) and containing about 0.8 milion words (see Hajic (2004)). We hope that this level of annotation is so close to the meaning of the utterances contained in the corpora that it should enable us to automatically transform texts contained in the corpora to the form of knowledge base, usable for information extraction, question answering, summarization, etc. We can use Multilayered Extended Semantic Networks (MultiNet) described in Helbig (2006) as the target formalism. In this paper we discuss the suitability of such approach and some of the main issues that will arise in the process. In section 1, we introduce formalisms underlying PDT 2.0 and MultiNet, in section 2. We describe the role MultiNet can play in the system of Functional Generative Description (FGD), section 3 discusses issues of automatic conversion to MultiNet and section 4 gives some conclusions.</abstract>
@@ -1270,7 +1270,7 @@
       <author><first>Qian</first><last>Yang</last></author>
       <author><first>Jean-Pierre</first><last>Martens</last></author>
       <author><first>Nanneke</first><last>Konings</last></author>
-      <author><first>Henk</first><last>van den Heuvel</last></author>
+      <author id="henk-van-den-heuvel"><first>Henk</first><last>van den Heuvel</last></author>
       <title>Development of a phoneme-to-phoneme (p2p) converter to improve the grapheme-to-phoneme (g2p) conversion of names</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/248_pdf.pdf</url>
       <abstract>It is acknowledged that a good phonemic transcription of proper names is imperative for the success of many modern speech-based services such as directory assistance, car navigation, etc. It is also known that state-of-the-art general-purpose grapheme-to-phoneme (g2p) converters perform rather poorly on many name categories. This paper proposes to use a g2p-p2p tandem comprising a state-of-the-art general-purpose g2p converter that produces an initial transcription and a name category specific phoneme-to-phoneme (p2p) converter that aims at correcting the mistakes made by the g2p converter. The main body of the paper describes a novel methodology for the automatic construction of the p2p converter. The methodology is implemented in a software toolbox that will be made publicly available in a form that will permit the user to design a p2p converter for an arbitrary name category. To give a proof of concept, the toolbox was used for the development of three p2p converters for first names, surnames and geographical names respectively. The obtained systems are small (few rules) and effective: significant improvements (up to 50% relative) of the grapheme-to-phoneme conversion are obtained. These encouraging results call for a further development and improvement of the approach.</abstract>
@@ -1279,15 +1279,15 @@
     <paper id="140">
       <author><first>Jaeyoung</first><last>Jung</last></author>
       <author><first>Maki</first><last>Miyake</last></author>
-      <author><first>Hiroyuki</first><last>Akam</last></author>
+      <author id="hiroyuki-akama"><first>Hiroyuki</first><last>Akam</last></author>
       <title>Recurrent <fixed-case>M</fixed-case>arkov Cluster (<fixed-case>RMCL</fixed-case>) Algorithm for the Refinement of the Semantic Network</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/249_pdf.pdf</url>
       <abstract>The purpose of this work is to propose a new methodology to ameliorate the Markov Cluster (MCL) Algorithm that is well known as an efficient way of graph clustering (Van Dongen, 2000). The MCL when applied to a graph of word associations has the effect of producing concept areas in which words are grouped into the similar topics or similar meanings as paradigms. However, since a word is determined to belong to only one cluster that represents a concept, Markov clusters cannot show the polysemy or semantic indetermination among the properties of natural language. Our Recurrent MCL (RMCL) allows us to create a virtual adjacency relationship among the Markov hard clusters and produce a downsized and intrinsically informative semantic network of word association data. We applied one of the RMCL algorithms (Stepping-stone type) to a Japanese associative concept dictionary and obtained a satisfactory level of performance in refining the semantic network generated from MCL.</abstract>
       <bibkey>jung-etal-2006-recurrent</bibkey>
     </paper>
     <paper id="141">
-      <author><first>Catia</first><last>Cucchiarini</last></author>
-      <author><first>Hugo</first><last>Van hamme</last></author>
+      <author id="catia-cucchiarini"><first>Catia</first><last>Cucchiarini</last></author>
+      <author id="hugo-van-hamme"><first>Hugo</first><last>Van hamme</last></author>
       <author><first>Olga</first><last>van Herwijnen</last></author>
       <author><first>Felix</first><last>Smits</last></author>
       <title><fixed-case>JASMIN</fixed-case>-<fixed-case>CGN</fixed-case>: Extension of the Spoken <fixed-case>D</fixed-case>utch Corpus with Speech of Elderly People, Children and Non-natives in the Human-Machine Interaction Modality</title>
@@ -1296,7 +1296,7 @@
       <bibkey>cucchiarini-etal-2006-jasmin</bibkey>
     </paper>
     <paper id="142">
-      <author><first>Cédrick</first><last>Fairon</last></author>
+      <author id="cedrick-fairon"><first>Cédrick</first><last>Fairon</last></author>
       <author><first>Sébastien</first><last>Paumier</last></author>
       <title>A framework for real-time dictionary updating</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/255_pdf.pdf</url>
@@ -1304,8 +1304,8 @@
       <bibkey>fairon-paumier-2006-framework</bibkey>
     </paper>
     <paper id="143">
-      <author><first>Vicente</first><last>Alabau</last></author>
-      <author><first>Carlos D.</first><last>Martínez</last></author>
+      <author id="vicent-alabau"><first>Vicente</first><last>Alabau</last></author>
+      <author id="carlos-d-martinez-hinarejos"><first>Carlos D.</first><last>Martínez</last></author>
       <title>Bilingual speech corpus in two phonetically similar languages</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/256_pdf.pdf</url>
       <abstract>As Speech Recognition Systems improve, they become suitable for facingnew problems. Multilingual speech recognition is one such problems. In the present work, the case of the Comunitat Valenciana multilingual environment is studied. The official languages in the Comunitat Valenciana (Spanish and Valencian) share most of their acoustic units, and their vocabularies and syntax are quite similar. They have influenced each other for many years.A small corpus on an Information System task was developed for experimentationpurposes.This choice will make it possible to develop a working prototype in the future,and it is simple enough to build semi-automatic language models. The design of the acoustic corpus is discussed, showing that all combinations of accents have been studied (native, non-native speakers, male, female, etc.).</abstract>
@@ -1315,7 +1315,7 @@
       <author><first>Vincent</first><last>Vandeghinste</last></author>
       <author><first>Ineke</first><last>Schuurman</last></author>
       <author><first>Michael</first><last>Carl</last></author>
-      <author><first>Stella</first><last>Markantonatou</last></author>
+      <author id="stella-markantonatou"><first>Stella</first><last>Markantonatou</last></author>
       <author><first>Toni</first><last>Badia</last></author>
       <title><fixed-case>METIS</fixed-case>-<fixed-case>II</fixed-case>: Machine Translation for Low Resource Languages</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/258_pdf.pdf</url>
@@ -1323,10 +1323,10 @@
       <bibkey>vandeghinste-etal-2006-metis</bibkey>
     </paper>
     <paper id="145">
-      <author><first>Elisabeth</first><last>D’Halleweyn</last></author>
+      <author id="elisabeth-dhalleweyn"><first>Elisabeth</first><last>D’Halleweyn</last></author>
       <author><first>Jan</first><last>Odijk</last></author>
-      <author><first>Lisanne</first><last>Teunissen</last></author>
-      <author><first>Catia</first><last>Cucchiarini</last></author>
+      <author id="lisanne-teunissen"><first>Lisanne</first><last>Teunissen</last></author>
+      <author id="catia-cucchiarini"><first>Catia</first><last>Cucchiarini</last></author>
       <title>The <fixed-case>D</fixed-case>utch-<fixed-case>F</fixed-case>lemish <fixed-case>HLT</fixed-case> Programme <fixed-case>STEVIN</fixed-case>: Essential Speech and Language Technology Resources</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/259_pdf.pdf</url>
       <abstract>In 2004 a consortium of ministries and organizations in the Netherlands and Flanders launched the comprehensive Dutch-Flemish HLT programme STEVIN (a Dutch acronym for Essential Speech and Language Technology Resources). To guarantee its Dutch-Flemish character, this large-scale programme is carried out under the auspices of the intergovernmental Dutch Language Union (NTU). The aim of STEVIN is to contribute to the further progress of HLT for the Dutch language, by raising awareness of HLT results, stimulating the demand of HLT products, promoting strategic research in HLT, and developing HLT resources that are essential and are known to be missing. Furthermore, a structure was set up for the management, maintenance and distribution of HLT resources. The STEVIN programme, which will run from 2004 to 2009, resulted from HLT activities in the Dutch language area, which were reported on at previous LREC conferences (2000, 2002, 2004). In this paper we will explain how different activities are combined in one comprehensive programme. We will show how cooperation can successfully be realized between different parties (language and speech technology, Flanders and the Netherlands, academia, industry and policy institutions) so as to achieve one common goal: progress in HLT.</abstract>
@@ -1350,7 +1350,7 @@
       <bibkey>goecke-witt-2006-exploiting</bibkey>
     </paper>
     <paper id="148">
-      <author><first>Cédrick</first><last>Fairon</last></author>
+      <author id="cedrick-fairon"><first>Cédrick</first><last>Fairon</last></author>
       <author><first>Sébastien</first><last>Paumier</last></author>
       <title>A translated corpus of 30,000 <fixed-case>F</fixed-case>rench <fixed-case>SMS</fixed-case></title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/270_pdf.pdf</url>
@@ -1369,7 +1369,7 @@
     </paper>
     <paper id="150">
       <author><first>Anna</first><last>Sinopalnikova</last></author>
-      <author><first>Pavel</first><last>Smrž</last></author>
+      <author id="pavel-smrz"><first>Pavel</first><last>Smrž</last></author>
       <title>Intelligent Dictionary Interfaces: Usability Evaluation of Access-Supporting Enhancements</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/275_pdf.pdf</url>
       <abstract>The present paper describes psycholinguistic experiments aimed at exploring the way people behave while accessing electronic dictionaries. In our work we focused on the access by meaning that, in comparison with the access by form, is currently less studied and very seldom implemented in modern dictionary interfaces. Thus, the goal of our experiments was to explore dictionary users requirements and to study what services an intelligent dictionary interface should be able to supply to help solving access by meaning problems. We tested several access-supporting enhancements of electronic dictionaries based on various language resources (corpora, wordnets, word association norms and explanatory dictionaries). Experiments were carried out with native speakers of three European languages  English, Czech and Russian. Results for monolingual and bilingual cases are presented.</abstract>
@@ -1385,8 +1385,8 @@
       <bibkey>mogele-etal-2006-smartweb</bibkey>
     </paper>
     <paper id="152">
-      <author><first>Markéta</first><last>Lopatková</last></author>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="marketa-lopatkova"><first>Markéta</first><last>Lopatková</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
       <author><first>Karolina</first><last>Skwarska</last></author>
       <title>Valency Lexicon of <fixed-case>C</fixed-case>zech Verbs: Alternation-Based Model</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/278_pdf.pdf</url>
@@ -1424,7 +1424,7 @@
       <bibkey>kawtrakul-etal-2006-ontology</bibkey>
     </paper>
     <paper id="156">
-      <author><first>Corina</first><last>Forăscu</last></author>
+      <author id="corina-forascu"><first>Corina</first><last>Forăscu</last></author>
       <author><first>Ionuț Cristian</first><last>Pistol</last></author>
       <author><first>Dan</first><last>Cristea</last></author>
       <title>Temporality in relation with discourse structure</title>
@@ -1433,15 +1433,15 @@
       <bibkey>forascu-etal-2006-temporality</bibkey>
     </paper>
     <paper id="157">
-      <author><first>Eva</first><last>Hajičová</last></author>
-      <author><first>Petr</first><last>Sgall</last></author>
+      <author id="eva-hajicova"><first>Eva</first><last>Hajičová</last></author>
+      <author id="petr-sgall"><first>Petr</first><last>Sgall</last></author>
       <title>Corpus Annotation as a Test of a Linguistic Theory</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/283_pdf.pdf</url>
       <abstract>In the present contribution we claim that corpus annotation serves, among other things, as an invaluable test for linguistic theories standing behind the annotation schemes, and as such represents an irreplaceable resource of linguistic information for the build-up of grammars. To support this claim we present four linguistic phenomena for the study and relevant description of which in grammar a deep layer of corpus annotation as introduced in the Prague Dependency Treebank has brought important observations, namely the information structure of the sentence, condition of projectivity and word order, types of dependency relations and textual coreference.</abstract>
       <bibkey>hajicova-sgall-2006-corpus</bibkey>
     </paper>
     <paper id="158">
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <author><first>Magdelena</first><last>Prokopová</last></author>
       <title><fixed-case>C</fixed-case>zech-<fixed-case>E</fixed-case>nglish Word Alignment</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/285_pdf.pdf</url>
@@ -1449,7 +1449,7 @@
       <bibkey>bojar-prokopova-2006-czech</bibkey>
     </paper>
     <paper id="159">
-      <author><first>Emiliano</first><last>Guevara</last></author>
+      <author id="emiliano-raul-guevara"><first>Emiliano</first><last>Guevara</last></author>
       <author><first>Sergio</first><last>Scalise</last></author>
       <author><first>Antonietta</first><last>Bisetto</last></author>
       <author><first>Chiara</first><last>Melloni</last></author>
@@ -1467,7 +1467,7 @@
       <bibkey>sonntag-romanelli-2006-multimodal</bibkey>
     </paper>
     <paper id="161">
-      <author><first>Nicole</first><last>Grégoire</last></author>
+      <author id="nicole-gregoire"><first>Nicole</first><last>Grégoire</last></author>
       <title>Elaborating the parameterized Equivalence Class Method for <fixed-case>D</fixed-case>utch</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/292_pdf.pdf</url>
       <abstract>This paper discusses the parameterized Equivalence Class Method for Dutch, an approach developed to incorporate standard lexical representations for Dutch idioms into representations required by any specific NLP system with as minimal manual work as possible. The purpose of the paper is to give an overview of parameters applicable to Dutch, which are determined by examining a large set of data and two Dutch NLP systems. The effects of the introduced parameters are evaluated and the results presented.</abstract>
@@ -1477,14 +1477,14 @@
       <author><first>Anders</first><last>Green</last></author>
       <author><first>Helge</first><last>Hüttenrauch</last></author>
       <author><first>Elin Anna</first><last>Topp</last></author>
-      <author><first>Kerstin</first><last>Severinson</last></author>
+      <author id="kerstin-severinson-eklundh"><first>Kerstin</first><last>Severinson</last></author>
       <title>Developing a <fixed-case>C</fixed-case>ontextualized<fixed-case>M</fixed-case>ultimodal Corpus for Human-Robot Interaction</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/293_pdf.pdf</url>
       <abstract>This paper describes the development process of a contextualized corpus for research on Human-Robot Communication. The data have been collected in two Wizard-of-Oz user studies performedwith 22 and 5 users respectively in a scenario that is called the HomeTour. In this scenario the users show the environment (a single room, or a whole floor) to the robot using a combination of speech and gestures. The corpus has been transcribed and annotated with respect to gestures and conversational acts, thus forming a core annotation. We have also annotated or linked other types of data, e.g., laser range finder readings, positioning analysis, questionnaire data and task descriptions that form the annotated context of the scenario. By providing a rich set of different annotated data, thecorpus is thus an important resource both for research on natural language speech interfaces for robots and for research on human-robot communication in general.</abstract>
       <bibkey>green-etal-2006-developing</bibkey>
     </paper>
     <paper id="163">
-      <author><first>Wei-Yun</first><last>Ma</last></author>
+      <author id="wei-yun-ma"><first>Wei-Yun</first><last>Ma</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <title>Uniform and Effective Tagging of a Heterogeneous Giga-word Corpus</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/294_pdf.pdf</url>
@@ -1494,14 +1494,14 @@
     <paper id="164">
       <author><first>Ana-Maria</first><last>Barbu</last></author>
       <author><first>Emil</first><last>Ionescu</last></author>
-      <author><first>Verginica Barbu</first><last>Mititelu</last></author>
+      <author id="verginica-barbu-mititelu"><first>Verginica Barbu</first><last>Mititelu</last></author>
       <title><fixed-case>R</fixed-case>omanian Valence Dictionary in <fixed-case>XML</fixed-case> Format</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/295_pdf.pdf</url>
       <abstract>Valence dictionaries are dictionaries in which logical predicates (most of the times verbs) are inventoried alongside with the semantic and syntactic information regarding the role of the arguments with which they combine, as well as the syntactic restrictions these arguments have to obey. In this article we present the incipient stage of the project Syntactic and semantic database in XML format: an HPSG representation of verb valences in Romanian. Its aim is the development of a valence dictionary in XML format for a set of 3000 Romanian verbs. Valences are specified for each sense of each verb, alongside with an illustrative example, possible argument alternations and a set of multiword expressions in which the respective verb occurs with the respective sense. The grammatical formalism we make use of is Head-driven Phrase Structure Grammar, which offers one of the most comprehensive frames of encoding various types of linguistic information for lexical items. XML is the most appropriate mark-up language for describing information structured in HPSG framework. The project can be further on extended so that to cover all Romanian verbs (around 7000) and also other predicates (nouns, adjectives, prepositions).</abstract>
       <bibkey>barbu-etal-2006-romanian</bibkey>
     </paper>
     <paper id="165">
-      <author><first>Niels Ole</first><last>Bernsen</last></author>
+      <author id="niels-ole-bernsen"><first>Niels Ole</first><last>Bernsen</last></author>
       <author><first>Thomas K.</first><last>Hansen</last></author>
       <author><first>Svend</first><last>Kiilerich</last></author>
       <author><first>Torben Kruchov</first><last>Madsen</last></author>
@@ -1513,7 +1513,7 @@
     <paper id="166">
       <author><first>Wei</first><last>Li</last></author>
       <author><first>Wenjie</first><last>Li</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <title>Mining Implicit Entities in Queries</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/297_pdf.pdf</url>
       <abstract>Entities are pivotal in describing events and objects, and also very important in Document Summarization. In general only explicit entities which can be extracted by a Named Entity Recognizer are used in real applications. However, implicit entities hidden behind the phrases or words, e.g. entity referred by the phrase cross border, are proved to be helpful in Document Summarization. In our experiment, we extract the implicit entities from the web resources.</abstract>
@@ -1550,8 +1550,8 @@
       <bibkey>areta-etal-2006-structure</bibkey>
     </paper>
     <paper id="169">
-      <author><first>Joseph</first><last>Polifroni</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="joseph-polifroni"><first>Joseph</first><last>Polifroni</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <title>Learning Database Content for Spoken Dialogue System Design</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/301_pdf.pdf</url>
       <abstract>Spoken dialogue systems are common interfaces to backend data in information retrieval domains. As more data is made available on the Web and IE technology matures, dialogue systems, whether they be speech- or text-based, will be more in demand to provide user-friendly access to this data. However, dialogue systems must become both easier to configure, as well as more informative than the traditional form-based systems that are currently available. We present techniques in this paper to address the issue of automating both content selection for use in summary responses and in system initiative queries.</abstract>
@@ -1559,7 +1559,7 @@
     </paper>
     <paper id="170">
       <author><first>Jorge</first><last>Civera</last></author>
-      <author><first>Alfons</first><last>Juan</last></author>
+      <author id="alfons-juan"><first>Alfons</first><last>Juan</last></author>
       <title>Bilingual Machine-Aided Indexing</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/304_pdf.pdf</url>
       <abstract>The proliferation of multilingual documentation in our Information Society has become a common phenomenon. This documentation is usually categorised by hand, entailing a time-consuming and arduous burden. This is particularly true in the case of keyword assignment, in which a list of keywords (descriptors) from a controlled vocabulary (thesaurus) is assigned to a document. A possible solution to alleviate this problem comes from the hand of the so-called Machine-Aided Indexing (MAI) systems. These systems work in cooperation with professional indexer by providing a initial list of descriptors from which those most appropiated will be selected. This way of proceeding increases the productivity and eases the task of indexers. In this paper, we propose a statistical text classification framework for bilingual documentation, from which we derive two novel bilingual classifiers based on the naive combination of monolingual classifiers. We report preliminary results on the multilingual corpus Acquis Communautaire (AC) that demonstrates the suitability of the proposed classifiers as the backend of a fully-working MAI system.</abstract>
@@ -1575,7 +1575,7 @@
       <bibkey>panunzi-etal-2006-integrating</bibkey>
     </paper>
     <paper id="172">
-      <author><first>Luís Fernando</first><last>Costa</last></author>
+      <author id="luis-fernando-costa"><first>Luís Fernando</first><last>Costa</last></author>
       <author><first>Luís</first><last>Sarmento</last></author>
       <title>Component Evaluation in a Question Answering System</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/306_pdf.pdf</url>
@@ -1594,8 +1594,8 @@
     </paper>
     <paper id="174">
       <author><first>Nasredine</first><last>Semmar</last></author>
-      <author><first>Meriama</first><last>Laib</last></author>
-      <author><first>Christian</first><last>Fluhr</last></author>
+      <author id="meriama-laib"><first>Meriama</first><last>Laib</last></author>
+      <author id="christian-fluhr"><first>Christian</first><last>Fluhr</last></author>
       <title>A Deep Linguistic Analysis for Cross-language Information Retrieval</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/308_pdf.pdf</url>
       <abstract>Cross-language information retrieval consists in providing a query in one language and searching documents in one or different languages. These documents are ordered by the probability of being relevant to the user's request. The highest ranked document is considered to be the most likely relevant document. The LIC2M cross-language information retrieval system is a weighted Boolean search engine based on a deep linguistic analysis of the query and the documents. This system is composed of a linguistic analyzer, a statistic analyzer, a reformulator, a comparator and a search engine. The linguistic analysis processes both documents to be indexed and queries to extract concepts representing their content. This analysis includes a morphological analysis, a part-of-speech tagging and a syntactic analysis. In this paper, we present the deep linguistic analysis used in the LIC2M cross-lingual search engine and we will particularly focus on the impact of the syntactic analysis on the retrieval effectiveness.</abstract>
@@ -1612,7 +1612,7 @@
     <paper id="176">
       <author><first>Lina</first><last>Henriksen</last></author>
       <author><first>Claus</first><last>Povlsen</last></author>
-      <author><first>Andrejs</first><last>Vasiljevs</last></author>
+      <author id="andrejs-vasiljevs"><first>Andrejs</first><last>Vasiljevs</last></author>
       <title><fixed-case>E</fixed-case>uro<fixed-case>T</fixed-case>erm<fixed-case>B</fixed-case>ank - a Terminology Resource based on Best Practice</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/310_pdf.pdf</url>
       <abstract>The new EU member countries face the problems of terminology resource fragmentation and lack of coordination in terminology development in general. The EuroTermBank project aims at contributing to improve the terminology infrastructure of the new EU countries and the project will result in a centralized online terminology bank - interlinked to other terminology banks and resources - for languages of the new EU member countries. The main focus of this paper is on a description of how to identify best practice within terminology work seen from a broad perspective. Surveys of real life terminology work have been conducted and these surveys have resulted in identification of scenario specific best practice descriptions of terminology work. Furthermore, this paper will present an outline of the specific criteria that have been used for selection of existing term resources to be included in the EuroTermBank database.</abstract>
@@ -1620,12 +1620,12 @@
     </paper>
     <paper id="177">
       <author><first>Florbela</first><last>Barreto</last></author>
-      <author><first>António</first><last>Branco</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
       <author><first>Eduardo</first><last>Ferreira</last></author>
       <author><first>Amália</first><last>Mendes</last></author>
-      <author><first>Maria Fernanda Bacelar do</first><last>Nascimento</last></author>
+      <author id="maria-fernanda-bacelar-do-nascimento"><first>Maria Fernanda Bacelar do</first><last>Nascimento</last></author>
       <author><first>Filipe</first><last>Nunes</last></author>
-      <author><first>João Ricardo</first><last>Silva</last></author>
+      <author id="joao-silva"><first>João Ricardo</first><last>Silva</last></author>
       <title>Open Resources and Tools for the Shallow Processing of <fixed-case>P</fixed-case>ortuguese: The <fixed-case>T</fixed-case>ag<fixed-case>S</fixed-case>hare Project</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/311_pdf.pdf</url>
       <abstract>This paper presents the TagShare project and the linguistic resources and tools for the shallow processing of Portuguese developed in its scope. These resources include a 1 million token corpus that has been accurately hand annotated with a variety of linguistic information, as well as several state of the art shallow processing tools capable of automatically producing that type of annotation. At present, the linguistic annotations in the corpus are sentence and paragraph boundaries, token boundaries, morphosyntactic POS categories, values of inflection features, lemmas and namedentities. Hence, the set of tools comprise a sentence chunker, a tokenizer, a POS tagger, nominal and verbal analyzers and lemmatizers, a verbal conjugator, a nominal inflector, and a namedentity recognizer, some of which underline several online services.</abstract>
@@ -1636,14 +1636,14 @@
       <author><first>Belinda</first><last>Maia</last></author>
       <author><first>Diana</first><last>Santos</last></author>
       <author><first>Ana</first><last>Pinto</last></author>
-      <author><first>Luís</first><last>Cabral</last></author>
+      <author id="luis-miguel-cabral"><first>Luís</first><last>Cabral</last></author>
       <title>Corpógrafo V3 - From Terminological Aid to Semi-automatic Knowledge Engineering</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/312_pdf.pdf</url>
       <abstract>In this paper we will present Corpógrafo, a mature web-based environment for working with corpora, for terminology extraction, and for ontology development. We will explain Corpógrafos workflow and describe the most important information extraction methods used, namely its term extraction, and definition / semantic relations identification procedures. We will describe current Corpógrafo users and present a brief overview of the XML format currently used to export terminology databases. Finally, we present future improvements for this tool.</abstract>
       <bibkey>sarmento-etal-2006-corpografo</bibkey>
     </paper>
     <paper id="179">
-      <author><first>Liviu</first><last>Dinu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu</first><last>Dinu</last></author>
       <author><first>Anca</first><last>Dinu</last></author>
       <title>On the data base of <fixed-case>R</fixed-case>omanian syllables and some of its quantitative and cryptographic aspects</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/313_pdf.pdf</url>
@@ -1652,7 +1652,7 @@
     </paper>
     <paper id="180">
       <author><first>Alessandro Bahgat</first><last>Shehata</last></author>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last></author>
       <title>A Dependency-based Algorithm for Grammar Conversion</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/314_pdf.pdf</url>
       <abstract>In this paper we present a model to transfer a grammatical formalism in another. The model is applicable only on restrictive conditions. However, it is fairly useful for many purposes: parsing evaluation, researching methods for truly combining different parsing outputs to reach better parsing performances, and building larger syntactically annotated corpora for data-driven approaches. The model has been tested over a case study: the translation of the Turin Tree Bank Grammar to the Shallow Grammar of the CHAOS Italian parser.</abstract>
@@ -1676,8 +1676,8 @@
     </paper>
     <paper id="183">
       <author><first>Chloé</first><last>Clavel</last></author>
-      <author><first>Ioana</first><last>Vasilescu</last></author>
-      <author><first>Laurence</first><last>Devillers</last></author>
+      <author id="ioana-vasilescu"><first>Ioana</first><last>Vasilescu</last></author>
+      <author id="laurence-devillers"><first>Laurence</first><last>Devillers</last></author>
       <author><first>Thibaut</first><last>Ehrette</last></author>
       <author><first>Gaël</first><last>Richard</last></author>
       <title>Fear-type emotions of the <fixed-case>SAFE</fixed-case> Corpus: annotation issues</title>
@@ -1688,7 +1688,7 @@
     <paper id="184">
       <author><first>Arne</first><last>Mauser</last></author>
       <author><first>Evgeny</first><last>Matusov</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <title>Training a Statistical Machine Translation System without <fixed-case>GIZA</fixed-case>++</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/320_pdf.pdf</url>
       <abstract>The IBM Models (Brown et al., 1993) enjoy great popularity in the machine translation community because they offer high quality word alignments and a free implementation is available with the GIZA++ Toolkit (Och and Ney, 2003). Several methods have been developed to overcome the asymmetry of the alignment generated by the IBM Models. A remaining disadvantage, however, is the high model complexity. This paper describes a word alignment training procedure for statistical machine translation that uses a simple and clear statistical model, different from the IBM models. The main idea of the algorithm is to generate a symmetric and monotonic alignment between the target sentence and a permutation graph representing different reorderings of the words in the source sentence. The quality of the generated alignment is shown to be comparable to the standard GIZA++ training in an SMT setup.</abstract>
@@ -1712,7 +1712,7 @@
       <bibkey>fujii-etal-2006-test</bibkey>
     </paper>
     <paper id="188">
-      <author><first>Željko</first><last>Agić</last></author>
+      <author id="zeljko-agic"><first>Željko</first><last>Agić</last></author>
       <author><first>Marko</first><last>Tadić</last></author>
       <title>Evaluating Morphosyntactic Tagging of <fixed-case>C</fixed-case>roatian Texts</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/326_pdf.pdf</url>
@@ -1720,18 +1720,18 @@
       <bibkey>agic-tadic-2006-evaluating</bibkey>
     </paper>
     <paper id="189">
-      <author><first>Martin</first><last>Rajman</last></author>
+      <author id="martin-rajman"><first>Martin</first><last>Rajman</last></author>
       <author><first>Marita</first><last>Ailomaa</last></author>
       <author><first>Agnes</first><last>Lisowska</last></author>
       <author><first>Miroslav</first><last>Melichar</last></author>
-      <author><first>Susan</first><last>Armstrong</last></author>
+      <author id="susan-armstrong"><first>Susan</first><last>Armstrong</last></author>
       <title>Extending the <fixed-case>W</fixed-case>izard of <fixed-case>O</fixed-case>z Methodologie for Multimodal Language-enabled Systems</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/328_pdf.pdf</url>
       <abstract>In this paper we present a proposal for extending the standard Wizard of Oz experimental methodology to language-enabled multimodal systems. We first discuss how Wizard of Oz experiments involving multimodal systems differ from those involving voice-only systems. We then go on to discuss the Extended Wizard of Oz methodology and the Wizard of Oz testing environment and protocol that we have developed. We then describe an example of applying this methodology to Archivus, a multimodal system for multimedia meeting retrieval and browsing. We focus in particular on the tools that the wizards would need to successfully and efficiently perform their tasks in a multimodal context. We conclude with some general comments about which questions need to be addressed when developing and using the Wizard of Oz methodology for testing multimodal systems.</abstract>
       <bibkey>rajman-etal-2006-extending</bibkey>
     </paper>
     <paper id="190">
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <author><first>Ineke</first><last>Schuurman</last></author>
       <author><first>Vincent</first><last>Vandeghinste</last></author>
       <title>Syntactic Annotation of Large Corpora in <fixed-case>STEVIN</fixed-case></title>
@@ -1748,14 +1748,14 @@
       <bibkey>buscaldi-rosso-2006-mining</bibkey>
     </paper>
     <paper id="192">
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <title>Human Verb Associations as the Basis for Gold Standard Verb Classes: Validation against <fixed-case>G</fixed-case>erma<fixed-case>N</fixed-case>et and <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/333_pdf.pdf</url>
       <abstract>We describe a gold standard for semantic verb classes which is based on human associations to verbs. The associations were collected in a web experiment and then applied as verb features in a hierarchical cluster analysis. We claim that the resulting classes represent a theory-independent gold standard classification which covers a variety of semantic verb relations, and whose features can be used to guide the feature selection in automatic processes. To evaluate our claims, the association-based classification is validated against two standard approaches to semantic verb classes, GermaNet and FrameNet.</abstract>
       <bibkey>schulte-im-walde-2006-human</bibkey>
     </paper>
     <paper id="193">
-      <author><first>Peter W.</first><last>Wagacha</last></author>
+      <author id="peter-waiganjo-wagacha"><first>Peter W.</first><last>Wagacha</last></author>
       <author><first>Guy</first><last>De Pauw</last></author>
       <author><first>Pauline W.</first><last>Githinji</last></author>
       <title>A Grapheme-Based Approach for Accent Restoration in <fixed-case>G</fixed-case>ikuyu</title>
@@ -1764,8 +1764,8 @@
       <bibkey>wagacha-etal-2006-grapheme</bibkey>
     </paper>
     <paper id="194">
-      <author><first>Juan José Rodríguez</first><last>Soler</last></author>
-      <author><first>Pedro Concejero</first><last>Cerezo</last></author>
+      <author id="juan-jose-rodriguez-soler"><first>Juan José Rodríguez</first><last>Soler</last></author>
+      <author id="pedro-concejero-cerezo"><first>Pedro Concejero</first><last>Cerezo</last></author>
       <author><first>Daniel Tapias</first><last>Merino</last></author>
       <author><first>José</first><last>Sánchez</last></author>
       <title><fixed-case>MEDUSA</fixed-case>: User-Centred Design and usability evaluation of Automatic Speech Recognition telephone services in Telefónica Móviles España</title>
@@ -1778,7 +1778,7 @@
       <author><first>Katrin</first><last>Erk</last></author>
       <author><first>Anette</first><last>Frank</last></author>
       <author><first>Andrea</first><last>Kowalski</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <author><first>Manfred</first><last>Pinkal</last></author>
       <title>The <fixed-case>SALSA</fixed-case> Corpus: a <fixed-case>G</fixed-case>erman Corpus Resource for Lexical Semantics</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/339_pdf.pdf</url>
@@ -1790,9 +1790,9 @@
       <author><first>Bruno</first><last>Pouliquen</last></author>
       <author><first>Anna</first><last>Widiger</last></author>
       <author><first>Camelia</first><last>Ignat</last></author>
-      <author><first>Tomaž</first><last>Erjavec</last></author>
-      <author><first>Dan</first><last>Tufiş</last></author>
-      <author><first>Dániel</first><last>Varga</last></author>
+      <author id="tomaz-erjavec"><first>Tomaž</first><last>Erjavec</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufiş</last></author>
+      <author id="daniel-varga"><first>Dániel</first><last>Varga</last></author>
       <title>The <fixed-case>JRC</fixed-case>-<fixed-case>A</fixed-case>cquis: A Multilingual Aligned Parallel Corpus with 20+ Languages</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/340_pdf.pdf</url>
       <abstract>We present a new, unique and freely available parallel corpus containing European Union (EU) documents of mostly legal nature. It is available in all 20 official EU languages, with additional documents being available in the languages of the EU candidate countries. The corpus consists of almost 8,000 documents per language, with an average size of nearly 9 million words per language. Pair-wise paragraph alignment information produced by two different aligners (Vanilla and HunAlign) is available for all 190+ language pair combinations. Most texts have been manually classified according to the EUROVOC subject domains so that the collection can also be used to train and test multi-label classification algorithms and keyword-assignment software. The corpus is encoded in XML, according to the Text Encoding Initiative Guidelines. Due to the large number of parallel texts in many languages, the JRC-Acquis is particularly suitable to carry out all types of cross-language research, as well as to test and benchmark text analysis software across different languages (for instance for alignment, sentence splitting and term extraction).</abstract>
@@ -1803,14 +1803,14 @@
       <author><first>Katrin</first><last>Erk</last></author>
       <author><first>Anette</first><last>Frank</last></author>
       <author><first>Andrea</first><last>Kowalski</last></author>
-      <author><first>Sebastian</first><last>Pado</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Pado</last></author>
       <title><fixed-case>SALTO</fixed-case> - A Versatile Multi-Level Annotation Tool</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/341_pdf.pdf</url>
       <abstract>In this paper, we describe the SALTO tool. It was originally developed for the annotation of semantic roles in the frame semantics paradigm, but can be used for graphical annotation of treebanks with general relational information in a simple drag-and-drop fashion. The tool additionally supports corpus management and quality control.</abstract>
       <bibkey>burchardt-etal-2006-salto</bibkey>
     </paper>
     <paper id="198">
-      <author><first>Véronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Véronique</first><last>Hoste</last></author>
       <author><first>Guy</first><last>De Pauw</last></author>
       <title><fixed-case>KNACK</fixed-case>-2002: a Richly Annotated Corpus of <fixed-case>D</fixed-case>utch Written Text</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/342_pdf.pdf</url>
@@ -1828,7 +1828,7 @@
     </paper>
     <paper id="200">
       <author><first>Tomoyuki</first><last>Kato</last></author>
-      <author><first>Tomiki</first><last>Toda</last></author>
+      <author id="tomoki-toda"><first>Tomiki</first><last>Toda</last></author>
       <author><first>Hiroshi</first><last>Saruwatari</last></author>
       <author><first>Kiyohiro</first><last>Shikano</last></author>
       <title>Transcription Cost Reduction for Constructing Acoustic Models Using Acoustic Likelihood Selection Criteria</title>
@@ -1845,8 +1845,8 @@
       <bibkey>mieskes-strube-2006-part</bibkey>
     </paper>
     <paper id="202">
-      <author><first>Branimir</first><last>Boguraev</last></author>
-      <author><first>Rie Kubota</first><last>Ando</last></author>
+      <author id="branimir-boguraev"><first>Branimir</first><last>Boguraev</last></author>
+      <author id="rie-johnson"><first>Rie Kubota</first><last>Ando</last></author>
       <title>Analysis of <fixed-case>T</fixed-case>ime<fixed-case>B</fixed-case>ank as a Resource for <fixed-case>T</fixed-case>ime<fixed-case>ML</fixed-case> Parsing</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/346_pdf.pdf</url>
       <abstract>In our work, we present an analysis of the TimeBank corpus---the only available reference sample of TimeML-compliant annotation---from the point of view of its utility as a training resource for developing automated TimeML annotators. We are encouraged by experimental results indicative of the potential of TimeBank; at the same time, closer inspection of causes for some systematic errors shows off certain deficiencies in the corpus, primarily to do with small size and inconsistent annotation. Our analysis suggests that even a reference resource, developed outside of a rigorous process of training corpus design and creation, can be extremely valuable for training and development purposes. The analysis also highlights areas of correction and improvement for evolving the current reference corpus into a community infrastructure resource.</abstract>
@@ -1861,10 +1861,10 @@
       <bibkey>kawahara-kurohashi-2006-case</bibkey>
     </paper>
     <paper id="204">
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <author><first>Isabelle</first><last>Robba</last></author>
       <author><first>Anne</first><last>Vilnat</last></author>
-      <author><first>Christelle</first><last>Ayache</last></author>
+      <author id="christelle-ayache"><first>Christelle</first><last>Ayache</last></author>
       <title>Data, Annotations and Measures in <fixed-case>EASY</fixed-case> the Evaluation Campaign for Parsers of <fixed-case>F</fixed-case>rench.</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/348_pdf.pdf</url>
       <abstract>This paper presents the protocol of EASY the evaluation campaign for syntactic parsers of French in the EVALDA project of the TECHNOLANGUE program. We describe the participants, the corpus and its genre partitioning, the annotation scheme, which allows for the annotation of both constituents and relations, the evaluation methodology and, as an illustration, the results obtained by one participant on half of the corpus.</abstract>
@@ -1882,7 +1882,7 @@
     <paper id="206">
       <author><first>Gregory</first><last>Grefenstette</last></author>
       <author><first>Fathi</first><last>Debili</last></author>
-      <author><first>Christian</first><last>Fluhr</last></author>
+      <author id="christian-fluhr"><first>Christian</first><last>Fluhr</last></author>
       <author><first>Svitlana</first><last>Zinger</last></author>
       <title>Exploiting text for extracting image processing resources</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/350_pdf.pdf</url>
@@ -1890,7 +1890,7 @@
       <bibkey>grefenstette-etal-2006-exploiting</bibkey>
     </paper>
     <paper id="207">
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
       <title>Clustering acronyms in biomedical text for disambiguation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/351_pdf.pdf</url>
@@ -1898,8 +1898,8 @@
       <bibkey>okazaki-ananiadou-2006-clustering</bibkey>
     </paper>
     <paper id="208">
-      <author><first>Goran</first><last>Nenadic</last></author>
-      <author><first>Naoki</first><last>Okazaki</last></author>
+      <author id="goran-nenadic"><first>Goran</first><last>Nenadic</last></author>
+      <author id="naoaki-okazaki"><first>Naoki</first><last>Okazaki</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
       <title>Towards a terminological resource for biomedical text mining</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/352_pdf.pdf</url>
@@ -1916,9 +1916,9 @@
     <paper id="210">
       <author><first>David</first><last>Guthrie</last></author>
       <author><first>Ben</first><last>Allison</last></author>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last></author>
-      <author><first>Louise</first><last>Guthrie</last></author>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author><first>Wei</first><last>Liu</last></author>
+      <author id="louise-guthrie"><first>Louise</first><last>Guthrie</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <title>A Closer Look at Skip-gram Modelling</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/357_pdf.pdf</url>
       <abstract>Data sparsity is a large problem in natural language processing that refers to the fact that language is a system of rare events, so varied and complex, that even using an extremely large corpus, we can never accurately model all possible strings of words. This paper examines the use of skip-grams (a technique where by n-grams are still stored to model language, but they allow for tokens to be skipped) to overcome the data sparsity problem. We analyze this by computing all possible skip-grams in a training corpus and measure how many adjacent (standard) n-grams these cover in test documents. We examine skip-gram modelling using one to four skips with various amount of training data and test against similar documents as well as documents generated from a machine translation system. In this paper we also determine the amount of extra training data required to achieve skip-gram coverage using standard adjacent tri-grams.</abstract>
@@ -1933,8 +1933,8 @@
       <bibkey>dobrov-loukachevitch-2006-development</bibkey>
     </paper>
     <paper id="212">
-      <author><first>Matthew W.</first><last>Bilotti</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="matthew-w-bilotti"><first>Matthew W.</first><last>Bilotti</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <title>Evaluation for Scenario Question Answering Systems</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/360_pdf.pdf</url>
       <abstract>Scenario Question Answering is a relatively new direction in Question Answering (QA) research that presents a number of challenges for evaluation. In this paper, we propose a comprehensive evaluation strategy for Scenario QA, including amethodology for building reusable test collections for Scenario QA and metrics for evaluating system performance over such test collections. Using this methodology, we have built a test collection, which we have made available for public download as a service to the research community. It is our hope that widespread availability of quality evaluation materials fuels research in new approaches to the Scenario QA task.</abstract>
@@ -1958,9 +1958,9 @@
     </paper>
     <paper id="215">
       <author><first>György</first><last>Szarvas</last></author>
-      <author><first>Richárd</first><last>Farkas</last></author>
-      <author><first>László</first><last>Felföldi</last></author>
-      <author><first>András</first><last>Kocsor</last></author>
+      <author id="richard-farkas"><first>Richárd</first><last>Farkas</last></author>
+      <author id="laszlo-felfoldi"><first>László</first><last>Felföldi</last></author>
+      <author id="andras-kocsor"><first>András</first><last>Kocsor</last></author>
       <author><first>János</first><last>Csirik</last></author>
       <title>A highly accurate Named Entity corpus for <fixed-case>H</fixed-case>ungarian</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/365_pdf.pdf</url>
@@ -2007,7 +2007,7 @@
     </paper>
     <paper id="220">
       <author><first>Manfred</first><last>Sailer</last></author>
-      <author><first>Beata</first><last>Trawiński</last></author>
+      <author id="beata-trawinski"><first>Beata</first><last>Trawiński</last></author>
       <title>The Collection of Distributionally Idiosyncratic Items: A Multilingual Resource for Linguistic Research</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/375_pdf.pdf</url>
       <abstract>We present two collections of lexical items with idiosyncratic distribution. The collections document the behavior of German and English bound words (BW, such as English headway), i.e., words which can only occur in one expression (make headway). BWs are a problem for both general and idiomatic dictionaries since it is unclear whether they have an independent lexical status and to what extent the expressions in which they occur are typical idiomatic expressions. We propose a system which allows us to document the information about BWs from dictionaries and linguistic literature, together with corpus data and example queries for major text corpora. We present our data structure and point to other phraseologically oriented collections. We will also show differences between the German and the English collection.</abstract>
@@ -2016,7 +2016,7 @@
     <paper id="221">
       <author><first>Ulrich</first><last>Heid</last></author>
       <author><first>Elsabé</first><last>Taljard</last></author>
-      <author><first>Danie J.</first><last>Prinsloo</last></author>
+      <author id="danie-j-prinsloo"><first>Danie J.</first><last>Prinsloo</last></author>
       <title>Grammar-based tools for the creation of tagging resources for an unresourced language: the case of <fixed-case>N</fixed-case>orthern <fixed-case>S</fixed-case>otho</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/376_pdf.pdf</url>
       <abstract>We describe an architecture for the parallel construction of a tagger lexicon and an annotated reference corpus for the part-of-speech tagging of Nothern Sotho, a Bantu language of South Africa, for which no tagged resources have been available so far. Our tools make use of grammatical properties (morphological and syntactic) of the language. We use symbolic pretagging, followed by stochastic tagging, an architecture which proves useful not only for the bootstrapping of tagging resources, but also for the tagging of any new text. We discuss the tagset design, the tool architecture and the current state of our ongoing effort.</abstract>
@@ -2031,9 +2031,9 @@
       <bibkey>de-sousa-trippel-2006-building</bibkey>
     </paper>
     <paper id="223">
-      <author><first>Maria Teresa</first><last>Pazienza</last></author>
+      <author id="maria-teresa-pazienza"><first>Maria Teresa</first><last>Pazienza</last></author>
       <author><first>Marco</first><last>Pennacchiotti</last></author>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last></author>
       <title>Mixing <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et, <fixed-case>V</fixed-case>erb<fixed-case>N</fixed-case>et and <fixed-case>P</fixed-case>rop<fixed-case>B</fixed-case>ank for studying verb relations</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/379_pdf.pdf</url>
       <abstract>In this paper we present a novel resource for studying the semantics of verb relations. The resource is created by mixing sense relational knowledge enclosed in WordNet, frame knowledge enclosed in VerbNet and corpus knowledge enclosed in PropBank. As a result, a set of about 1000 frame pairs is made available. A frame pair represents a pair of verbs in a peculiar semantic relation accompanied with specific information, such as: the syntactic-semantic frames of the two verbs, the mapping among their thematic roles and a set of textual examples extracted from the PennTreeBank. We specifically focus on four relations: Troponymy, Causation, Entailment and Antonymy. The different steps required for the mapping are described in detail and statistics on resource mutual coverage are reported. We also propose a practical use of the resource for the task of Textual Entailment acquisition and for Question Answering. A first attempt for automate the mapping among verb arguments is also presented: early experiments show that simple techniques can achieve good results, up to 85% F-Measure.</abstract>
@@ -2041,7 +2041,7 @@
     </paper>
     <paper id="224">
       <author><first>Leo</first><last>Wanner</last></author>
-      <author><first>Margarita Alonso</first><last>Ramos</last></author>
+      <author id="margarita-alonso-ramos"><first>Margarita Alonso</first><last>Ramos</last></author>
       <title>Local Document Relevance Clustering in <fixed-case>IR</fixed-case> Using Collocation Information</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/381_pdf.pdf</url>
       <abstract>A series of different automatic query expansion techniques has been suggested in Information Retrieval. To estimate how suitable a document term is as an expansion term, the most popular of them use a measure of the frequency of the co-occurrence of this term with one or several query terms. The benefit of the use of the linguistic relations that hold between query terms is often questioned. If a linguistic phenomenon is taken into account, it is the phrase structure or lexical compound. We propose a technique that is based on the restricted lexical cooccurrence (collocation) of query terms. We use the knowledge on collocations formed by query terms for two tasks: (i) document relevance clustering done in the first stage of local query expansion and (ii) choice of suitable expansion terms from the relevant document cluster. In this paper, we describe the first task, providing evidence from first preliminary experiments on Spanish material that local relevance clustering benefits largely from knowledge on collocations.</abstract>
@@ -2056,8 +2056,8 @@
       <bibkey>esuli-sebastiani-2006-sentiwordnet</bibkey>
     </paper>
     <paper id="226">
-      <author><first>Alexandre</first><last>Denis</last></author>
-      <author><first>Matthieu</first><last>Quignard</last></author>
+      <author id="alexandre-denis"><first>Alexandre</first><last>Denis</last></author>
+      <author id="matthieu-quignard"><first>Matthieu</first><last>Quignard</last></author>
       <author><first>Guillaume</first><last>Pitel</last></author>
       <title>A Deep-Parsing Approach to Natural Language Understanding in Dialogue System: Results of a Corpus-Based Evaluation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/386_pdf.pdf</url>
@@ -2073,17 +2073,17 @@
     </paper>
     <paper id="228">
       <author><first>Margaret</first><last>King</last></author>
-      <author><first>Nancy</first><last>Underwood</last></author>
+      <author id="nancy-underwood"><first>Nancy</first><last>Underwood</last></author>
       <title>Evaluating Symbiotic Systems: the challenge</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/389_pdf.pdf</url>
       <abstract>This paper looks at a class of systems which pose severe problems in evaluation design for current conventional approaches to evaluation. After describing the two conventional evaluation paradigms: the functionality paradigm as typified by evaluation campaigns and the ISO inspired user-centred paradigm typified by the work of the EAGLES and ISLE projects, it goes on to outline the problems posed by the evaluation of systems which are designed to work in critical interaction with a human expert user and to work over vast amounts of data. These systems pose problems for both paradigms although for different reasons. The primary aim of this paper is to provoke discussion and the search for solutions. We have no proven solutions at present. However, we describe a programme of exploratory research on which we have already embarked, which involves ground clearing work which we expect to result in a deep understanding of the systems and users, a pre-requisite for developing a general framework for evaluation in this field.</abstract>
       <bibkey>king-underwood-2006-evaluating</bibkey>
     </paper>
     <paper id="229">
-      <author><first>Aimilios</first><last>Chalamandaris</last></author>
+      <author id="aimilios-chalamandaris"><first>Aimilios</first><last>Chalamandaris</last></author>
       <author><first>Athanassios</first><last>Protopapas</last></author>
-      <author><first>Pirros</first><last>Tsiakoulis</last></author>
-      <author><first>Spyros</first><last>Raptis</last></author>
+      <author id="pirros-tsiakoulis"><first>Pirros</first><last>Tsiakoulis</last></author>
+      <author id="spyros-raptis"><first>Spyros</first><last>Raptis</last></author>
       <title>All <fixed-case>G</fixed-case>reek to me! An automatic <fixed-case>G</fixed-case>reeklish to <fixed-case>G</fixed-case>reek transliteration system</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/390_pdf.pdf</url>
       <abstract>This paper presents research on Greeklish, that is, a transliteration of Greek using the Latin alphabet, which is used frequently in Greek e-mail communication. Greeklish is not standardized and there are a number of competing conventions co-existing in communication, based on personal preferences regarding similarities between Greek and Latin letters in shape, sound, or keyboard position. Our research has led to the development of All Greek to me! the first automatic transliteration system that can cope with any type of Greeklish. In this paper we first present previous research on Greeklish, describing other approaches that have attempted to deal with the same problems. We then provide a brief description of our approach, illustrating the functional flowchart of our system and the main ideas that underlie it. We present measures of system performance, based on about a years worth of usage as a public web service, and preliminary research, based on the same corpus, on the use of Greeklish and the trends in preferred Latin-Greek letter mapping. We evaluate the consistency of different transliteration patterns among users as well as the within-user consistency based on coherent principles. Finally we outline planned future research to further understand the use of Greeklish and improve All Greek to me! to function reliably embedded in integrated communication platforms bridging e-mail to mobile telephony and ubiquitous connectivity.</abstract>
@@ -2091,7 +2091,7 @@
     </paper>
     <paper id="230">
       <author><first>Thurid</first><last>Vogt</last></author>
-      <author><first>Elisabeth</first><last>André</last></author>
+      <author id="elisabeth-andre"><first>Elisabeth</first><last>André</last></author>
       <title>Improving Automatic Emotion Recognition from Speech via Gender Differentiaion</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/392_pdf.pdf</url>
       <abstract>Feature extraction is still a disputed issue for the recognition of emotions from speech. Differences in features for male and female speakers are a well-known problem and it is established that gender-dependent emotion recognizers perform better than gender-independent ones. We propose a way to improve the discriminative quality of gender-dependent features: The emotion recognition system is preceded by an automatic gender detection that decides upon which of two gender-dependent emotion classifiers is used to classify an utterance. This framework was tested on two different databases, one with emotional speech produced by actors and one with spontaneous emotional speech from a Wizard-of-Oz setting. Gender detection achieved an accuracy of about 90 % and the combined gender and emotion recognition system improved the overall recognition rate of a gender-independent emotion recognition system by 2-4 %.</abstract>
@@ -2117,7 +2117,7 @@
     </paper>
     <paper id="233">
       <author><first>Thierry</first><last>Declerck</last></author>
-      <author><first>Asunción Gómez</first><last>Pérez</last></author>
+      <author id="asuncion-gomez-perez"><first>Asunción Gómez</first><last>Pérez</last></author>
       <author><first>Ovidiu</first><last>Vela</last></author>
       <author><first>Zeno</first><last>Gantner</last></author>
       <author><first>David</first><last>Manzano-Macho</last></author>
@@ -2127,7 +2127,7 @@
       <bibkey>declerck-etal-2006-multilingual</bibkey>
     </paper>
     <paper id="235">
-      <author><first>Beatrice</first><last>Alex</last></author>
+      <author id="beatrice-alex"><first>Beatrice</first><last>Alex</last></author>
       <author><first>Malvina</first><last>Nissim</last></author>
       <author><first>Claire</first><last>Grover</last></author>
       <title>The Impact of Annotation on the Performance of Protein Tagging in Biomedical Text</title>
@@ -2136,17 +2136,17 @@
       <bibkey>alex-etal-2006-impact</bibkey>
     </paper>
     <paper id="236">
-      <author><first>Ben</first><last>Wellner</last></author>
-      <author><first>Marc</first><last>Vilain</last></author>
+      <author id="ben-wellner"><first>Ben</first><last>Wellner</last></author>
+      <author id="marc-vilain"><first>Marc</first><last>Vilain</last></author>
       <title>Leveraging Machine Readable Dictionaries in Discriminative Sequence Models</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/404_pdf.pdf</url>
       <abstract>Many natural language processing tasks make use of a lexicon  typically the words collected from some annotated training data along with their associated properties. We demonstrate here the utility of corpora-independent lexicons derived from machine readable dictionaries. Lexical information is encoded in the form of features in a Conditional Random Field tagger providing improved performance in cases where: i) limited training data is made available ii) the data is case-less and iii) the test data genre or domain is different than that of the training data. We show substantial error reductions, especially on unknown words, for the tasks of part-of-speech tagging and shallow parsing, achieving up to 20% error reduction on Penn TreeBank part-of-speech tagging and up to a 15.7% error reduction for shallow parsing using the CoNLL 2000 data. Our results here point towards a simple, but effective methodology for increasing the adaptability of text processing systems by training models with annotated data in one genre augmented with general lexical information or lexical information pertinent to the target genre (or domain).</abstract>
       <bibkey>wellner-vilain-2006-leveraging</bibkey>
     </paper>
     <paper id="237">
-      <author><first>Saša</first><last>Hasan</last></author>
-      <author><first>Anas El</first><last>Isbihani</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="sasa-hasan"><first>Saša</first><last>Hasan</last></author>
+      <author id="anas-el-isbihani"><first>Anas El</first><last>Isbihani</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <title>Creating a Large-Scale <fixed-case>A</fixed-case>rabic to <fixed-case>F</fixed-case>rench Statistical <fixed-case>M</fixed-case>achine<fixed-case>T</fixed-case>ranslation System</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/405_pdf.pdf</url>
       <abstract>In this work, the creation of a large-scale Arabic to French statistical machine translation system is presented. We introduce all necessary steps from corpus aquisition, preprocessing the data to training and optimizing the system and eventual evaluation. Since no corpora existed previously, we collected large amounts of data from the web. Arabic word segmentation was crucial to reduce the overall number of unknown words. We describe the phrase-based SMT system used for training and generation of the translation hypotheses. Results on the second CESTA evaluation campaign are reported. The setting was inthe medical domain. The prototype reaches a favorable BLEU score of40.8%.</abstract>
@@ -2154,7 +2154,7 @@
     </paper>
     <paper id="238">
       <author><last>Chen</last><first>Yirong</first></author>
-      <author><last>Lu</last><first>Qin</first></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <author><last>Li</last><first>Wenjie</first></author>
       <author><last>Sui</last><first>Zhifang</first></author>
       <author><last>Ji</last><first>Luning</first></author>
@@ -2182,7 +2182,7 @@
     </paper>
     <paper id="241">
       <author><first>Laurianne</first><last>Sitbon</last></author>
-      <author><first>Patrice</first><last>Bellot</last></author>
+      <author id="patrice-bellot"><first>Patrice</first><last>Bellot</last></author>
       <title>Tools and methods for objective or contextual evaluation of topic segmentation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/410_pdf.pdf</url>
       <abstract>In this paper we discuss the way of evaluating topic segmentation, from mathematical measures on variously constructed reference corpus to contextual evaluation depending on different topic segmentation usages. We present an overview of the different ways of building reference corpora and of mathematically evaluating segmentation methods, and then we focus on three tasks which may involve a topic segmentation: text extraction, information retrieval and document presentation. We have developed two graphical interfaces, one for an intrinsic comparison, and the other one dedicated to an evaluation in an information retrieval context. These tools will be very soon distributed under GPL licences on the Technolangue project web page.</abstract>
@@ -2201,8 +2201,8 @@
       <bibkey>devillers-etal-2006-real</bibkey>
     </paper>
     <paper id="243">
-      <author><first>Maja</first><last>Popović</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <title><fixed-case>POS</fixed-case>-based Word Reorderings for Statistical Machine Translation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/412_pdf.pdf</url>
       <abstract>Translation In this work we investigate new possibilities for improving the quality of statistical machine translation (SMT) by applying word reorderings of the source language sentences based on Part-of-Speech tags. Results are presented on the European Parliament corpus containing about 700k sentences and 15M running words. In order to investigate sparse training data scenarios, we also report results obtained on about 1\% of the original corpus. The source languages are Spanish and English and target languages are Spanish, English and German. We propose two types of reorderings depending on the language pair and the translation direction: local reorderings of nouns and adjectives for translation from and into Spanish and long-range reorderings of verbs for translation into German. For our best translation system, we achieve up to 2\% relative reduction of WER and up to 7\% relative increase of BLEU score. Improvements can be seen both on the reordered sentences as well as on the rest of the test corpus. Local reorderings are especially important for the translation systems trained on the small corpus whereas long-range reorderings are more effective for the larger corpus.</abstract>
@@ -2211,18 +2211,18 @@
     <paper id="244">
       <author><first>David</first><last>Vilar</last></author>
       <author><first>Jia</first><last>Xu</last></author>
-      <author><first>Luis Fernando</first><last>D’Haro</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="luis-fernando-dharo"><first>Luis Fernando</first><last>D’Haro</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <title>Error Analysis of Statistical Machine Translation Output</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/413_pdf.pdf</url>
       <abstract>Evaluation of automatic translation output is a difficult task. Several performance measures like Word Error Rate, Position Independent Word Error Rate and the BLEU and NIST scores are widely use and provide a useful tool for comparing different systems and to evaluate improvements within a system. However the interpretation of all of these measures is not at all clear, and the identification of the most prominent source of errors in a given system using these measures alone is not possible. Therefore some analysis of the generated translations is needed in order to identify the main problems and to focus the research efforts. This area is however mostly unexplored and few works have dealt with it until now. In this paper we will present a framework for classification of the errors of a machine translation system and we will carry out an error analysis of the system used by the RWTH in the first TC-STAR evaluation.</abstract>
       <bibkey>vilar-etal-2006-error</bibkey>
     </paper>
     <paper id="245">
-      <author><first>Irene</first><last>Castellón</last></author>
-      <author><first>Ana</first><last>Fernández-Montraveta</last></author>
-      <author><first>Gloria</first><last>Vázquez</last></author>
-      <author><first>Laura</first><last>Alonso Alemany</last></author>
+      <author id="irene-castellon"><first>Irene</first><last>Castellón</last></author>
+      <author id="ana-fernandez"><first>Ana</first><last>Fernández-Montraveta</last></author>
+      <author id="gloria-vazquez"><first>Gloria</first><last>Vázquez</last></author>
+      <author id="laura-alonso-alemany"><first>Laura</first><last>Alonso Alemany</last></author>
       <author><first>Joan Antoni</first><last>Capilla</last></author>
       <title>The Sensem Corpus: a Corpus Annotated at the Syntactic and Semantic Level</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/414_pdf.pdf</url>
@@ -2231,7 +2231,7 @@
     </paper>
     <paper id="246">
       <author><first>Javier</first><last>Pérez</last></author>
-      <author><first>Antonio</first><last>Bonafonte</last></author>
+      <author id="antonio-bonafonte"><first>Antonio</first><last>Bonafonte</last></author>
       <title><fixed-case>GAIA</fixed-case>: Common Framework for the Development of Speech Translation Technologies</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/415_pdf.pdf</url>
       <abstract>We present here an open-source software platform for the integration of speech translation components. This tool is useful to integrate into a common framework different automatic speech recognition, spoken language translation and text-to-speech synthesis solutions, as demonstrated in the evaluation of the European LC-STAR project, and during the development of the national ALIADO project. Gaia operates with great flexibility, and it has been used to obtain the text and speech corpora needed when performing speech translation. The platform follows a modular distributed approach, with a specifically designed extensible network protocol handling the communication with the different modules. A well defined and publicly available API facilitates the integration of existing solutions into the architecture. Completely functional audio and text interfaces together with remote monitoring tools are provided.</abstract>
@@ -2246,8 +2246,8 @@
     </paper>
     <paper id="248">
       <author><first>Javier</first><last>Pérez</last></author>
-      <author><first>Antonio</first><last>Bonafonte</last></author>
-      <author><first>Horst-Udo</first><last>Hain</last></author>
+      <author id="antonio-bonafonte"><first>Antonio</first><last>Bonafonte</last></author>
+      <author id="horst-udo-hain"><first>Horst-Udo</first><last>Hain</last></author>
       <author><first>Eric</first><last>Keller</last></author>
       <author><first>Stefan</first><last>Breuer</last></author>
       <author><first>Jilei</first><last>Tian</last></author>
@@ -2264,7 +2264,7 @@
       <bibkey>nemec-2006-tree</bibkey>
     </paper>
     <paper id="250">
-      <author><first>Maite</first><last>Taboada</last></author>
+      <author id="maite-taboada"><first>Maite</first><last>Taboada</last></author>
       <author><first>Caroline</first><last>Anthony</last></author>
       <author><first>Kimberly</first><last>Voll</last></author>
       <title>Methods for Creating Semantic Orientation Dictionaries</title>
@@ -2284,14 +2284,14 @@
     <paper id="252">
       <author><first>Hynek</first><last>Bořil</last></author>
       <author><first>Tomáš</first><last>Bořil</last></author>
-      <author><first>Petr</first><last>Pollák</last></author>
+      <author id="petr-pollak"><first>Petr</first><last>Pollák</last></author>
       <title>Methodology of <fixed-case>L</fixed-case>ombard Speech Database Acquisition: Experiences with <fixed-case>CLSD</fixed-case></title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/427_pdf.pdf</url>
       <abstract>In this paper, process of the Czech Lombard Speech Database (CLSD'05) acquisition is presented. Feature analyses have proven a strong appearance of Lombard effect in the database. In the small vocabulary recognition task, significant performance degradation was observed for the Lombard speech recorded in the database. Aim of this paper is to describe the hardware platform, scenarios and recording tool used for the acquisition of CLSD'05. During the database recording and processing, several difficulties were encountered. The most important question was how to adjust the level of speech feedback for the speaker. A method for minimization of the speech attenuation introduced to the speaker by headphones is proposed in this paper. Finally, contents and corpus of the database are presented to outline it's suitability for analysis and modeling of Lombard effect. The whole CLSD'05 database with a detailed documentation is now released for public use.</abstract>
       <bibkey>boril-etal-2006-methodology</bibkey>
     </paper>
     <paper id="253">
-      <author><first>Harry</first><last>Bunt</last></author>
+      <author id="harry-bunt"><first>Harry</first><last>Bunt</last></author>
       <title>Dimensions in Dialogue Act Annotation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/428_pdf.pdf</url>
       <abstract>This paper is concerned with the fundamentals of multidimensional dialogue act annotation, i.e. with what it means to annotate dialogues with information about the communicative acts that are performed with the utterances, taking various 'dimensions' into account. Two ideas seem to be prevalent in the literature concerning the notion of dimension: (1) dimensions correspond to different types of information; and (2) a dimension is formed by a set of mutually exclusive tags. In DAMSL, for instance, the terms dimension and layer are used sometimes in the sense of (1) and sometimes in that of (2). We argue that being mutually exclusive is not a good criterion for a set of dialogue act types to constitute a dimension, even though the description of an object in a multidimensional space should never assign more than one value per dimension. We define a dimension of dialogue act annotation as an aspect of participating in a dialogue that can be addressed independently by means of dialogue acts. We show that DAMSL dimensions such as Info-request, Statement, and Answer do not qualify as proper dimensions, and that the communicative functions in these categories do not fall in any specific dimension, but should be considered as general-purpose in the sense that they can be used in any dimension. We argue that using the notion of dimension that we propose, a multidimensional taxonomy of dialogue acts emerges that optimally supports multidimensional dialogue act annotation.</abstract>
@@ -2321,7 +2321,7 @@
       <bibkey>bernardi-etal-2006-multilingual</bibkey>
     </paper>
     <paper id="256">
-      <author><first>Irene</first><last>Langkilde-Geary</last></author>
+      <author id="irene-langkilde"><first>Irene</first><last>Langkilde-Geary</last></author>
       <author><first>Justin</first><last>Betteridge</last></author>
       <title>A Factored Functional Dependency Transformation of the <fixed-case>E</fixed-case>nglish <fixed-case>P</fixed-case>enn <fixed-case>T</fixed-case>reebank for Probabilistic Surface Generation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/435_pdf.pdf</url>
@@ -2336,10 +2336,10 @@
       <bibkey>talley-2006-bootstrapping</bibkey>
     </paper>
     <paper id="258">
-      <author><first>Eduard</first><last>Hovy</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <author><first>Liang</first><last>Zhou</last></author>
-      <author><first>Junichi</first><last>Fukumoto</last></author>
+      <author id="junichi-fukumoto"><first>Junichi</first><last>Fukumoto</last></author>
       <title>Automated Summarization Evaluation with Basic Elements.</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/438_pdf.pdf</url>
       <abstract>As part of evaluating a summary automati-cally, it is usual to determine how much of the contents of one or more human-produced ideal summaries it contains. Past automated methods such as ROUGE compare using fixed word ngrams, which are not ideal for a variety of reasons. In this paper we describe a framework in which summary evaluation measures can be instantiated and compared, and we implement a specific evaluation method using very small units of content, called Basic Elements that address some of the shortcomings of ngrams. This method is tested on DUC 2003, 2004, and 2005 systems and produces very good correlations with human judgments.</abstract>
@@ -2354,9 +2354,9 @@
       <bibkey>kaji-watanabe-2006-automatic</bibkey>
     </paper>
     <paper id="260">
-      <author><first>Marie-Catherine</first><last>de Marneffe</last></author>
+      <author id="marie-catherine-de-marneffe"><first>Marie-Catherine</first><last>de Marneffe</last></author>
       <author><first>Bill</first><last>MacCartney</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <title>Generating Typed Dependency Parses from Phrase Structure Parses</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/440_pdf.pdf</url>
       <abstract>This paper describes a system for extracting typed dependency parses of English sentences from phrase structure parses. In order to capture inherent relations occurring in corpus texts that can be critical in real-world applications, many NP relations are included in the set of grammatical relations used. We provide a comparison of our system with Minipar and the Link parser. The typed dependency extraction facility described here is integrated in the Stanford Parser, available for download.</abstract>
@@ -2380,8 +2380,8 @@
     </paper>
     <paper id="263">
       <author><first>Liang</first><last>Zhou</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <title>Summarizing Answers for Complicated Questions</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/443_pdf.pdf</url>
       <abstract>Recent work in several computational linguistics (CL) applications (especially question answering) has shown the value of semantics (in fact, many people argue that the current performance ceiling experienced by so many CL applications derives from their inability to perform any kind of semantic processing). But the absence of a large semantic information repository that provides representations for sentences prevents the training of statistical CL engines and thus hampers the development of such semantics-enabled applications. This talk refers to recent work in several projects that seek to annotate large volumes of text with shallower or deeper representations of some semantic phenomena. It describes one of the essential problemscreating, managing, and annotating (at large scale) the meanings of words, and outlines the Omega ontology, being built at ISI, that acts as term repository. The talk illustrates how one can proceed from words via senses to concepts, and how the annotation process can help verify good concept decisions and expose bad ones. Much of this work is performed in the context of the OntoNotes project, joint with BBN, the Universities of Colorado and Pennsylvania, and ISI, that is working to build a corpus of about 1M words (English, Chinese, and Arabic), annotated for shallow semantics, over the next few years.</abstract>
@@ -2404,7 +2404,7 @@
     <paper id="266">
       <author><first>Ronny</first><last>Melz</last></author>
       <author><first>Pum-Mo</first><last>Ryu</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <title>Compiling large language resources using lexical similarity metrics for domain taxonomy learning</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/446_pdf.pdf</url>
       <abstract>In this contribution we present a new methodology to compile large language resources for domain-specific taxonomy learning. We describe the necessary stages to deal with the rich morphology of an agglutinative language, i.e. Korean, and point out a second order machine learning algorithm to unveil term similarity from a given raw text corpus. The language resource compilation described is part of a fully automatic top-down approach to construct taxonomies, without involving the human efforts which are usually required.</abstract>
@@ -2412,14 +2412,14 @@
     </paper>
     <paper id="267">
       <author><first>Felix</first><last>Pîrvan</last></author>
-      <author><first>Dan</first><last>Tufiş</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufiş</last></author>
       <title>Tagset Mapping and Statistical Training Data Cleaning-up</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/448_pdf.pdf</url>
       <abstract>The paper describes a general method (as well as its implementation and evaluation) for deriving mapping systems for different tagsets available in existing training corpora (gold standards) for a specific language. For each pair of corpora (tagged with different tagsets), one such mapping system is derived. This mapping system is then used to improve the tagging of each of the two corpora with the tagset of the other (this process will be called cross-tagging). By reapplying the algorithm to the newly obtained corpora, the accuracy of the underlying training corpora can also be improved. Furthermore, comparing the results with the gold standards makes it possible to assess the distributional adequacy of various tagsets used in processing the language in case. Unlike other methods, such as those reported in (Brants, 1995) or (Tufis &amp; Dragomirescu, 2004), which assume a subsumption relation between the considered tagsets, and as such they aim at minimizing the tagsets by eliminating the feature-value redundancy, this method is applicable for completely unrelated tagsets. Although the experiments were focused on morpho-syntactic (POS) tagging, the method is applicable to other types of tagging as well.</abstract>
       <bibkey>pirvan-tufis-2006-tagset</bibkey>
     </paper>
     <paper id="268">
-      <author><first>Dan</first><last>Tufiş</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufiş</last></author>
       <author><first>Elena</first><last>Irimia</last></author>
       <title><fixed-case>R</fixed-case>o<fixed-case>C</fixed-case>o-News: A Hand Validated Journalistic Corpus of <fixed-case>R</fixed-case>omanian</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/451_pdf.pdf</url>
@@ -2427,15 +2427,15 @@
       <bibkey>tufis-irimia-2006-roco</bibkey>
     </paper>
     <paper id="269">
-      <author><first>Eckhard</first><last>Bick</last></author>
+      <author id="eckhard-bick"><first>Eckhard</first><last>Bick</last></author>
       <title>Turning a Dependency Treebank into a <fixed-case>PSG</fixed-case>-style Constituent Treebank</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/452_pdf.pdf</url>
       <abstract>In this paper, we present and evaluate a new method to convert Constraint Grammar (CG) parses of running text into Constituent Treebanks. The conversion is two-step - first a grammar-based method is used to bridge the gap between raw CG annotation and full dependency structure, then phrase structure bracketing and non-terminal nodes are introduced by clustering sister dependents, effectively building one syntactic treebank on top of another. The method is compared with another approach (Bick 2003-2), where constituent structures are arrived at by employing a function-tag based Phrase Structure Grammar (PSG). Results are evaluated on a small reference corpus for both raw and revised CG input, with bracketing F-Scores of 87.5% for raw text and 97.1% for revised CG input, and a raw text edge label accuracy of 95.9% for forms and 86% for functions, or 99.7% and 99.4%, respectively, for revised CG. By applying the tools to the CG-only part of the Danish Arboretum treebank we were able to increase the size of the treebank by 86%, from 197.400 to 367.500 words.</abstract>
       <bibkey>bick-2006-turning</bibkey>
     </paper>
     <paper id="270">
-      <author><first>Dan</first><last>Ştefănescu</last></author>
-      <author><first>Dan</first><last>Tufiş</last></author>
+      <author id="dan-stefanescu"><first>Dan</first><last>Ştefănescu</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufiş</last></author>
       <title>Aligning Multilingual Thesauri</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/453_pdf.pdf</url>
       <abstract>The aligning and merging of ontologies with overlapping information are actual one of the most active domain of investigation in the Semantic Web community. Multilingual lexical ontologies thesauri are fundamental knowledge sources for most NLP projects addressing multilinguality. The alignment of multilingual lexical knowledge sources has various applications ranging from knowledge acquisition to semantic validation of interlingual equivalence of presumably the same meaning express in different languages. In this paper, we present a general method for aligning ontologies, which was used to align a conceptual thesaurus, lexicalized in 20 languages with a partial version of it lexicalized in Romanian. The objective of our work was to align the existing terms in the Romanian Eurovoc to the terms in the English Eurovoc and to automatically update the Romanian Eurovoc. The general formulation of the ontology alignment problem was set up along the lines established by Heterogeneity group of the KnowledgeWeb consortium, but the actual case study was motivated by the needs of a specific NLP project.</abstract>
@@ -2443,17 +2443,17 @@
     </paper>
     <paper id="271">
       <author><first>Radu</first><last>Ion</last></author>
-      <author><first>Alexandru</first><last>Ceauşu</last></author>
-      <author><first>Dan</first><last>Tufiş</last></author>
+      <author id="alexandru-ceausu"><first>Alexandru</first><last>Ceauşu</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufiş</last></author>
       <title>Dependency-Based Phrase Alignment</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/454_pdf.pdf</url>
       <abstract>Phrase alignment is the task that requires the constituent phrases of two halves of a bitext to be aligned. In order to align phrases, one must discover them first and this article presents a method of aligning phrases that are discovered automatically. Here, the notion of a 'phrase' will be understood as being given by a subtree of a dependency-like structure of a sentence called linkage. To discover phrases, we will make use of two distinct, language independent methods: the IBM-1 model (Brown et al., 1993) adapted to detect linkages and Constrained Lexical Attraction Models (Ion &amp; Barbu Mititelu, 2006). The methods will be combined and the resulted model will be used to annotate the bitext. The accuracy of phrase alignment will be evaluated by obtaining word alignments from link alignments and then by checking the F-measure of the latter word aligner.</abstract>
       <bibkey>ion-etal-2006-dependency</bibkey>
     </paper>
     <paper id="272">
-      <author><first>Alexandru</first><last>Ceauşu</last></author>
-      <author><first>Dan</first><last>Ştefănescu</last></author>
-      <author><first>Dan</first><last>Tufiş</last></author>
+      <author id="alexandru-ceausu"><first>Alexandru</first><last>Ceauşu</last></author>
+      <author id="dan-stefanescu"><first>Dan</first><last>Ştefănescu</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufiş</last></author>
       <title><fixed-case>A</fixed-case>cquis <fixed-case>C</fixed-case>ommunautaire Sentence Alignment using Support Vector Machines</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/456_pdf.pdf</url>
       <abstract>Sentence alignment is a task that requires not only accuracy, as possible errors can affect further processing, but also requires small computation resources and to be language pair independent. Although many implementations do not use translation equivalents because they are dependent on the language pair, this feature is a requirement for the accuracy increase. The paper presents a hybrid sentence aligner that has two alignment iterations. The first iteration is based mostly on sentences length, and the second is based on a translation equivalents table estimated from the results of the first iteration. The aligner uses a Support Vector Machine classifier to discriminate between positive and negative examples of sentence pairs.</abstract>
@@ -2469,10 +2469,10 @@
     </paper>
     <paper id="274">
       <author><first>Baden</first><last>Hughes</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Steven</first><last>Bird</last></author>
       <author><first>Jeremy</first><last>Nicholson</last></author>
-      <author><first>Andrew</first><last>MacKinlay</last></author>
+      <author id="andrew-mackinlay"><first>Andrew</first><last>MacKinlay</last></author>
       <title>Reconsidering Language Identification for Written Language Resources</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/459_pdf.pdf</url>
       <abstract>The task of identifying the language in which a given document (ranging from a sentence to thousands of pages) is written has been relatively well studied over several decades. Automated approachesto written language identification are used widely throughout research and industrial contexts, over both oral and written source materials. Despite this widespread acceptance, a review of previous research in written language identification reveals a number of questions which remain openand ripe for further investigation.</abstract>
@@ -2481,7 +2481,7 @@
     <paper id="275">
       <author><first>Yasuko</first><last>Senda</last></author>
       <author><first>Yasusi</first><last>Sinohara</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <title>Automatic Terminology Intelligibility Estimation for Readership-oriented Technical Writing</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/461_pdf.pdf</url>
       <abstract>This paper describes automatic terminology intelligibility estimation for readership-oriented technical writing. We assume that the term frequency weighted by the types of documents can be an indicator of the term intelligibility for a certain readership. From this standpoint, we analyzed the relationship between the following: average intelligibility levels of 46 technical terms that were rated by about 120 laymen; numbers of documents that an Internet search</abstract>
@@ -2489,7 +2489,7 @@
     </paper>
     <paper id="276">
       <author><first>Mats</first><last>Lundälv</last></author>
-      <author><first>Katarina</first><last>Mühlenbock</last></author>
+      <author id="katarina-heimann-muhlenbock"><first>Katarina</first><last>Mühlenbock</last></author>
       <author><first>Bengt</first><last>Farre</last></author>
       <author><first>Annika</first><last>Brännström</last></author>
       <title><fixed-case>SYMBERED</fixed-case> - a Symbol-Concept Editing Tool</title>
@@ -2518,26 +2518,26 @@
     </paper>
     <paper id="279">
       <author><first>Cvetana</first><last>Krstev</last></author>
-      <author><first>Ranka</first><last>Stanković</last></author>
+      <author id="ranka-stankovic"><first>Ranka</first><last>Stanković</last></author>
       <author><first>Duško</first><last>Vitas</last></author>
-      <author><first>Ivan</first><last>Obradović</last></author>
+      <author id="ivan-obradovic"><first>Ivan</first><last>Obradović</last></author>
       <title><fixed-case>WS</fixed-case>4<fixed-case>LR</fixed-case>: A Workstation for Lexical Resources</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/467_pdf.pdf</url>
       <abstract>In this paper we describe WS4LR, the workstation for lexical resources, a software tool developed within the Human Language Technology Group at the Faculty of Mathematics, University of Belgrade. The tool is aimed at manipulating heterogeneous lexical resources, and the need for such a tool came from the large volume of resources the Group has developed in the course of many years and within different projects. The tool handles morphological dictionaries, wordnets, aligned texts and transducers equally and has already proved very useful for various tasks. Although it has so far been used mainly for Serbian, WS4LR is not language dependent and can be successfully used for resources in other languages provided that they follow the described formats and methodologies. The tool operates on the .NET platform and runs on a personal computer under Windows 2000/XP/2003 operating system with at least 256MB of internal memory.</abstract>
       <bibkey>krstev-etal-2006-ws4lr</bibkey>
     </paper>
     <paper id="280">
-      <author><first>Karin</first><last>Kipper</last></author>
+      <author id="karin-kipper"><first>Karin</first><last>Kipper</last></author>
       <author><first>Anna</first><last>Korhonen</last></author>
       <author><first>Neville</first><last>Ryant</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <title>Extending <fixed-case>V</fixed-case>erb<fixed-case>N</fixed-case>et with Novel Verb Classes</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/468_pdf.pdf</url>
       <abstract>Lexical classifications have proved useful in supporting various natural language processing (NLP) tasks. The largest verb classification for English is Levin's (1993) work which defined groupings of verbs based on syntactic properties. VerbNet - the largest computational verb lexicon currently available for English - provides detailed syntactic-semantic descriptions of Levin classes. While the classes included are extensive enough for some NLP use, they are not comprehensive. Korhonen and Briscoe (2004) have proposed a significant extension of Levin's classification which incorporates 57 novel classes for verbs not covered (comprehensively) by Levin. This paper describes the integration of these classes into VerbNet. The result is the most extensive Levin-style classification for English verbs which can be highly useful for practical applications.</abstract>
       <bibkey>kipper-etal-2006-extending</bibkey>
     </paper>
     <paper id="281">
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <author><first>Catherine</first><last>Havasi</last></author>
       <author><first>Jessica</first><last>Littman</last></author>
       <author><first>Anna</first><last>Rumshisky</last></author>
@@ -2548,15 +2548,15 @@
       <bibkey>pustejovsky-etal-2006-towards</bibkey>
     </paper>
     <paper id="282">
-      <author><first>Hans</first><last>Dybkjær</last></author>
-      <author><first>Laila</first><last>Dybkjær</last></author>
+      <author id="hans-dybkjaer"><first>Hans</first><last>Dybkjær</last></author>
+      <author id="laila-dybkjaer"><first>Laila</first><last>Dybkjær</last></author>
       <title>Act-Topic Patterns for Automatically Checking Dialogue Models</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/471_pdf.pdf</url>
       <abstract>When dialogue models are evaluated today, this is normally done by using some evaluation method to collect data, often involving users interacting with the system model, and then subsequently analysing the collected data. We present a tool called DialogDesigner that enables automatic evaluation performed directly on the dialogue model and that does not require any data collection first. DialogDesigner is a tool in support of rapid design and evaluation of dialogue models. The first version was developed in 2005 and enabled developers to create an electronic dialogue model, get various graphical views of the model, run a Wizard-of-Oz (WOZ) simulation session, and extract different presentations in HTML. The second version includes extensions in terms of support for automatic dialogue model evaluation. Various aspects of dialogue model well-formedness can be automatically checked. Some of the automatic analyses simply perform checks based on the state and transition structure of the dialogue model while the core part are based on act-topic annotation of prompts and transitions in the dialogue model and specification of act-topic patterns. This paper focuses on the version 2 extensions.</abstract>
       <bibkey>dybkjaer-dybkjaer-2006-act</bibkey>
     </paper>
     <paper id="283">
-      <author><first>David M.</first><last>Rojas</last></author>
+      <author id="david-m-rojas"><first>David M.</first><last>Rojas</last></author>
       <author><first>Takako</first><last>Aikawa</last></author>
       <title>Predicting <fixed-case>MT</fixed-case> Quality as a Function of the Source Language</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/472_pdf.pdf</url>
@@ -2564,7 +2564,7 @@
       <bibkey>rojas-aikawa-2006-predicting</bibkey>
     </paper>
     <paper id="284">
-      <author><first>Paweł</first><last>Mazur</last></author>
+      <author id="pawel-mazur"><first>Paweł</first><last>Mazur</last></author>
       <author><first>Robert</first><last>Dale</last></author>
       <title>Named Entity Extraction with Conjunction Disambiguation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/473_pdf.pdf</url>
@@ -2595,7 +2595,7 @@
     </paper>
     <paper id="287">
       <author><first>Claudia</first><last>Soria</last></author>
-      <author><first>Maurizio</first><last>Tesconi</last></author>
+      <author id="maurizio-tesconi"><first>Maurizio</first><last>Tesconi</last></author>
       <author><first>Francesca</first><last>Bertagna</last></author>
       <author><first>Nicoletta</first><last>Calzolari</last></author>
       <author><first>Andrea</first><last>Marchetti</last></author>
@@ -2609,7 +2609,7 @@
       <author><first>Caroline</first><last>Sporleder</last></author>
       <author><first>Marieke</first><last>van Erp</last></author>
       <author><first>Tijn</first><last>Porcelijn</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <author><first>Pim</first><last>Arntzen</last></author>
       <title>Identifying Named Entities in Text Databases from the Natural History Domain</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/482_pdf.pdf</url>
@@ -2617,7 +2617,7 @@
       <bibkey>sporleder-etal-2006-identifying</bibkey>
     </paper>
     <paper id="289">
-      <author><first>Harold</first><last>Somers</last></author>
+      <author id="harold-somers"><first>Harold</first><last>Somers</last></author>
       <author><first>Gareth</first><last>Evans</last></author>
       <author><first>Zeinab</first><last>Mohamed</last></author>
       <title>Developing Speech Synthesis for Under-Resourced Languages by “Faking it”: An Experiment with <fixed-case>S</fixed-case>omali</title>
@@ -2648,10 +2648,10 @@
     </paper>
     <paper id="292">
       <author><first>Bente</first><last>Maegaard</last></author>
-      <author><first>Jens-Erik</first><last>Fenstad</last></author>
+      <author id="jens-erik-fenstad"><first>Jens-Erik</first><last>Fenstad</last></author>
       <author><first>Lars</first><last>Ahrenberg</last></author>
       <author><first>Knut</first><last>Kvale</last></author>
-      <author><first>Katarina</first><last>Mühlenbock</last></author>
+      <author id="katarina-heimann-muhlenbock"><first>Katarina</first><last>Mühlenbock</last></author>
       <author><first>Bernt-Erik</first><last>Heid</last></author>
       <title><fixed-case>KUNSTI</fixed-case> - Knowledge Generation for <fixed-case>N</fixed-case>orwegian Language Technology</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/487_pdf.pdf</url>
@@ -2659,11 +2659,11 @@
       <bibkey>maegaard-etal-2006-kunsti</bibkey>
     </paper>
     <paper id="293">
-      <author><first>Péter</first><last>Halácsy</last></author>
+      <author id="peter-halacsy"><first>Péter</first><last>Halácsy</last></author>
       <author><first>András</first><last>Kornai</last></author>
       <author><first>Csaba</first><last>Oravecz</last></author>
       <author><first>Viktor</first><last>Trón</last></author>
-      <author><first>Dániel</first><last>Varga</last></author>
+      <author id="daniel-varga"><first>Dániel</first><last>Varga</last></author>
       <title>Using a morphological analyzer in high precision <fixed-case>POS</fixed-case> tagging of <fixed-case>H</fixed-case>ungarian</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/488_pdf.pdf</url>
       <abstract>The paper presents an evaluation of maxent POS disambiguation systems that incorporate an open source morphological analyzer to constrain the probabilistic models. The experiments show that the best proposed architecture, which is the first application of the maximum entropy framework in a Hungarian NLP task, outperforms comparable state of the art tagging methods and is able to handle out of vocabulary items robustly, allowing for efficient analysis of large (web-based) corpora.</abstract>
@@ -2749,19 +2749,19 @@
     </paper>
     <paper id="303">
       <author><first>Manolis</first><last>Maragoudakis</last></author>
-      <author><first>Katia</first><last>Kermanidis</last></author>
+      <author id="katia-lida-kermanidis"><first>Katia</first><last>Kermanidis</last></author>
       <author><first>Aristogiannis</first><last>Garbis</last></author>
-      <author><first>Nikos</first><last>Fakotakis</last></author>
+      <author id="nikos-fakotakis"><first>Nikos</first><last>Fakotakis</last></author>
       <title>Dealing with Imbalanced Data using <fixed-case>B</fixed-case>ayesian Techniques</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/503_pdf.pdf</url>
       <abstract>For the present work, we deal with the significant problem of high imbalance in data in binary or multi-class classification problems. We study two different linguistic applications. The former determines whether a syntactic construction (environment) co-occurs with a verb in a natural text corpus consists a subcategorization frame of the verb or not. The latter is called Name Entity Recognition (NER) and it concerns determining whether a noun belongs to a specific Name Entity class. Regarding the subcategorization domain, each environment is encoded as a vector of heterogeneous attributes, where a very high imbalance between positive and negative examples is observed (an imbalance ratio of approximately 1:80). In the NER application, the imbalance between a name entity class and the negative class is even greater (1:120). In order to confront the plethora of negative instances, we suggest a search tactic during training phase that employs Tomek links for reducing unnecessary negative examples from the training set. Regarding the classification mechanism, we argue that Bayesian networks are well suited and we propose a novel network structure which efficiently handles heterogeneous attributes without discretization and is more classification-oriented. Comparing the experimental results with those of other known machine learning algorithms, our methodology performs significantly better in detecting examples of the rare class.</abstract>
       <bibkey>maragoudakis-etal-2006-dealing</bibkey>
     </paper>
     <paper id="304">
-      <author><first>José-Miguel</first><last>Benedí</last></author>
-      <author><first>Eduardo</first><last>Lleida</last></author>
+      <author id="jose-miguel-benedi"><first>José-Miguel</first><last>Benedí</last></author>
+      <author id="eduardo-lleida"><first>Eduardo</first><last>Lleida</last></author>
       <author><first>Amparo</first><last>Varona</last></author>
-      <author><first>María-José</first><last>Castro</last></author>
+      <author id="maria-jose-castro-bleda"><first>María-José</first><last>Castro</last></author>
       <author><first>Isabel</first><last>Galiano</last></author>
       <author><first>Raquel</first><last>Justo</last></author>
       <author><first>Iñigo</first><last>López de Letona</last></author>
@@ -2782,12 +2782,12 @@
     </paper>
     <paper id="306">
       <author><first>Yun-Chuang</first><last>Chiao</last></author>
-      <author><first>Olivier</first><last>Kraif</last></author>
+      <author id="olivier-kraif"><first>Olivier</first><last>Kraif</last></author>
       <author><first>Dominique</first><last>Laurent</last></author>
-      <author><first>Thi Minh Huyen</first><last>Nguyen</last></author>
+      <author id="thi-minh-huyen-nguyen"><first>Thi Minh Huyen</first><last>Nguyen</last></author>
       <author><first>Nasredine</first><last>Semmar</last></author>
       <author><first>François</first><last>Stuck</last></author>
-      <author><first>Jean</first><last>Véronis</last></author>
+      <author id="jean-veronis"><first>Jean</first><last>Véronis</last></author>
       <author><first>Wajdi</first><last>Zaghouani</last></author>
       <title>Evaluation of multilingual text alignment systems: the <fixed-case>ARCADE</fixed-case> <fixed-case>II</fixed-case> project</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/506_pdf.pdf</url>
@@ -2807,7 +2807,7 @@
       <author><first>Kareem</first><last>Darwish</last></author>
       <author><first>Ossama</first><last>Emam</last></author>
       <author><first>Walid</first><last>Magdy</last></author>
-      <author><first>Magdi</first><last>Nagi</last></author>
+      <author id="magdi-nagi"><first>Magdi</first><last>Nagi</last></author>
       <title>Building a Heterogeneous Information Retrieval Collection of Printed <fixed-case>A</fixed-case>rabic Documents</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/509_pdf.pdf</url>
       <abstract>This paper describes the development of an Arabic document image collection containing 34,651 documents from 1,378 different books and 25 topics with their relevance judgments. The books from which the collection is obtained are a part of a larger collection 75,000 books being scanned for archival and retrieval at the bibliotheca Alexandrina (BA). The documents in the collection vary widely in topics, fonts, and degradation levels. Initial baseline experiments were performed to examine the effectiveness of different index terms, with and without blind relevance feedback, on Arabic OCR degraded text.</abstract>
@@ -2824,14 +2824,14 @@
     </paper>
     <paper id="310">
       <author><first>Dimou</first><last>Athanassia Lida</last></author>
-      <author><first>Chalamandaris</first><last>Aimilios</last></author>
+      <author id="aimilios-chalamandaris"><first>Chalamandaris</first><last>Aimilios</last></author>
       <title>Language identification from suprasegmental cues: Speech synthesis of <fixed-case>G</fixed-case>reek utterances from different dialectal variations.</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/511_pdf.pdf</url>
       <abstract>In this paper we present the continuation of our research on the ability of native Greek adults to identify their mother tongue from synthesized stimuli which contain only prosodic - melodic and rhythmic - information. In the first section we present the ideas that underlie our theory, together with a brief review of our preliminary results. In the second section the detailed description of our experimental approach is given, as well as the results and their statistical analysis. In the final two sections we provide the conclusions derived from our experiments and the future work we are planning to carry out.</abstract>
       <bibkey>athanassia-lida-aimilios-2006-language</bibkey>
     </paper>
     <paper id="311">
-      <author><first>Roger</first><last>Levy</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
       <author><first>Galen</first><last>Andrew</last></author>
       <title>Tregex and Tsurgeon: tools for querying and manipulating tree data structures</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/513_pdf.pdf</url>
@@ -2863,7 +2863,7 @@
     </paper>
     <paper id="314">
       <author><first>Bente</first><last>Maegaard</last></author>
-      <author><first>Steven</first><last>Krauwer</last></author>
+      <author id="steven-krauwer"><first>Steven</first><last>Krauwer</last></author>
       <author><first>Khalid</first><last>Choukri</last></author>
       <author><first>Lise Damsgaard</first><last>Jørgensen</last></author>
       <title>The <fixed-case>BLARK</fixed-case> concept and <fixed-case>BLARK</fixed-case> for <fixed-case>A</fixed-case>rabic</title>
@@ -2883,16 +2883,16 @@
     </paper>
     <paper id="316">
       <author><first>Suzan</first><last>Verberne</last></author>
-      <author><first>Lou</first><last>Boves</last></author>
+      <author id="lou-boves"><first>Lou</first><last>Boves</last></author>
       <author><first>Nelleke</first><last>Oostdijk</last></author>
-      <author><first>Peter-Arno</first><last>Coppen</last></author>
+      <author id="peter-arno-coppen"><first>Peter-Arno</first><last>Coppen</last></author>
       <title>Data for question answering: The case of why</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/525_pdf.pdf</url>
       <abstract>For research and development of an approach for automatically answering why-questions (why-QA) a data collection was created. The data set was obtained by way of elicitation and comprises a total of 395 why-questions. For each question, the data set includes the source document and one or two user-formulated answers. In addition, for a subset of the questions, user-formulated paraphrases are available. All question-answer pairs have been annotated with information on topic and semantic answer type. The resulting data set is of importance not only for our research, but we expect it to contribute to and stimulate other research in the field of why-QA.</abstract>
       <bibkey>verberne-etal-2006-data</bibkey>
     </paper>
     <paper id="317">
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <author><first>Petya</first><last>Osenova</last></author>
       <title>Shallow Semantic Annotation of <fixed-case>B</fixed-case>ulgarian</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/527_pdf.pdf</url>
@@ -2900,16 +2900,16 @@
       <bibkey>simov-osenova-2006-shallow</bibkey>
     </paper>
     <paper id="318">
-      <author><first>Christopher</first><last>Cieri</last></author>
+      <author id="christopher-cieri"><first>Christopher</first><last>Cieri</last></author>
       <author><first>Walt</first><last>Andrews</last></author>
-      <author><first>Joseph P.</first><last>Campbell</last></author>
-      <author><first>George</first><last>Doddington</last></author>
+      <author id="joseph-p-campbell"><first>Joseph P.</first><last>Campbell</last></author>
+      <author id="george-r-doddington"><first>George</first><last>Doddington</last></author>
       <author><first>Jack</first><last>Godfrey</last></author>
       <author><first>Shudong</first><last>Huang</last></author>
-      <author><first>Mark</first><last>Liberman</last></author>
-      <author><first>Alvin</first><last>Martin</last></author>
+      <author id="mark-liberman"><first>Mark</first><last>Liberman</last></author>
+      <author id="alvin-martin"><first>Alvin</first><last>Martin</last></author>
       <author><first>Hirotaka</first><last>Nakasone</last></author>
-      <author><first>Mark</first><last>Przybocki</last></author>
+      <author id="mark-przybocki"><first>Mark</first><last>Przybocki</last></author>
       <author><first>Kevin</first><last>Walker</last></author>
       <title>The Mixer and Transcript Reading Corpora: Resources for Multilingual, Crosschannel Speaker Recognition Research</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/530_pdf.pdf</url>
@@ -2934,10 +2934,10 @@
       <bibkey>chou-huang-2006-hantology</bibkey>
     </paper>
     <paper id="321">
-      <author><first>Maria</first><last>Gavrilidou</last></author>
-      <author><first>Penny</first><last>Labropoulou</last></author>
-      <author><first>Stelios</first><last>Piperidis</last></author>
-      <author><first>Voula</first><last>Giouli</last></author>
+      <author id="maria-gavrilidou"><first>Maria</first><last>Gavrilidou</last></author>
+      <author id="penny-labropoulou"><first>Penny</first><last>Labropoulou</last></author>
+      <author id="stelios-piperidis"><first>Stelios</first><last>Piperidis</last></author>
+      <author id="voula-giouli"><first>Voula</first><last>Giouli</last></author>
       <author><first>Nicoletta</first><last>Calzolari</last></author>
       <author><first>Monica</first><last>Monachini</last></author>
       <author><first>Claudia</first><last>Soria</last></author>
@@ -2959,7 +2959,7 @@
     </paper>
     <paper id="323">
       <author><first>Katrin</first><last>Erk</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <title>Shalmaneser - A Toolchain For Shallow Semantic Parsing</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/537_pdf.pdf</url>
       <abstract>This paper presents Shalmaneser, a software package for shallow semantic parsing, the automatic assignment of semantic classes and roles to free text. Shalmaneser is a toolchain of independent modules communicating through a common XML format. System output can be inspected graphically. Shalmaneser can be used either as a black box to obtain semantic parses for new datasets (classifiers for English and German frame-semantic analysis are included), or as a research platform that can be extended to new parsers, languages, or classification paradigms.</abstract>
@@ -2968,8 +2968,8 @@
     <paper id="324">
       <author><first>Valentin</first><last>Tablan</last></author>
       <author><first>Tamara</first><last>Polajnar</last></author>
-      <author><first>Hamish</first><last>Cunningham</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="hamish-cunningham"><first>Hamish</first><last>Cunningham</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
       <title>User-friendly ontology authoring using a controlled language</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/538_pdf.pdf</url>
       <abstract>In recent years, following the rapid development in the Semantic Web and Knowledge Management research, ontologies have become more in demand in Natural Language Processing. An increasing number of systems use ontologies either internally, for modelling the domain of the application, or as data structures that hold the output resulting from the work of the system, in the form of knowledge bases. While there are many ontology editing tools aimed at expert users, there are very few which are accessible to users wishing to create simple structures without delving into the intricacies of knowledge representation languages. The approach described in this paper allows users to create and edit ontologies simply by using a restricted version of the English language. The controlled language described within is based on an open vocabulary and a restricted set of grammatical constructs. Sentences written in this language unambiguously map into a number of knowledge representation formats including OWL and RDF-S to allow round-trip ontology management.</abstract>
@@ -2977,7 +2977,7 @@
     </paper>
     <paper id="325">
       <author><first>Laura</first><last>Hasler</last></author>
-      <author><first>Constantin</first><last>Orasan</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orasan</last></author>
       <author><first>Karin</first><last>Naumann</last></author>
       <title><fixed-case>NP</fixed-case>s for Events: Experiments in Coreference Annotation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/539_pdf.pdf</url>
@@ -2987,9 +2987,9 @@
     <paper id="326">
       <author><first>Amália</first><last>Mendes</last></author>
       <author><first>Sandra</first><last>Antunes</last></author>
-      <author><first>Maria Fernanda Bacelar do</first><last>Nascimento</last></author>
-      <author><first>João Miguel</first><last>Casteleiro</last></author>
-      <author><first>Luísa</first><last>Pereira</last></author>
+      <author id="maria-fernanda-bacelar-do-nascimento"><first>Maria Fernanda Bacelar do</first><last>Nascimento</last></author>
+      <author id="joao-miguel-casteleiro"><first>João Miguel</first><last>Casteleiro</last></author>
+      <author id="luisa-pereira"><first>Luísa</first><last>Pereira</last></author>
       <author><first>Tiago</first><last>Sá</last></author>
       <title><fixed-case>COMBINA</fixed-case>-<fixed-case>PT</fixed-case>: A Large Corpus-extracted and Hand-checked Lexical Database of <fixed-case>P</fixed-case>ortuguese Multiword Expressions</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/540_pdf.pdf</url>
@@ -2999,7 +2999,7 @@
     <paper id="327">
       <author><first>David</first><last>Graff</last></author>
       <author><first>Tim</first><last>Buckwalter</last></author>
-      <author><first>Mohamed</first><last>Maamouri</last></author>
+      <author id="mohamed-maamouri"><first>Mohamed</first><last>Maamouri</last></author>
       <author><first>Hubert</first><last>Jin</last></author>
       <title>Lexicon Development for Varieties of Spoken Colloquial <fixed-case>A</fixed-case>rabic</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/541_pdf.pdf</url>
@@ -3009,19 +3009,19 @@
     <paper id="328">
       <author><first>Alexandre</first><last>Patry</last></author>
       <author><first>Fabrizio</first><last>Gotti</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <title><fixed-case>MOOD</fixed-case>: A Modular Object-Oriented Decoder for Statistical Machine Translation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/542_pdf.pdf</url>
       <abstract>We present an Open Source framework called MOOD developed in order tofacilitate the development of a Statistical Machine Translation Decoder.MOOD has been modularized using an object-oriented approach which makes itespecially suitable for the fast development of state-of-the-art decoders. Asa proof of concept, a clone of the pharaoh decoder has been implemented andevaluated. This clone named ramses is part of the current distribution of MOOD.</abstract>
       <bibkey>patry-etal-2006-mood</bibkey>
     </paper>
     <paper id="329">
-      <author><first>Mohamed</first><last>Maamouri</last></author>
+      <author id="mohamed-maamouri"><first>Mohamed</first><last>Maamouri</last></author>
       <author><first>Ann</first><last>Bies</last></author>
       <author><first>Tim</first><last>Buckwalter</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <author><first>Dalila</first><last>Tabessi</last></author>
       <title>Developing and Using a Pilot Dialectal <fixed-case>A</fixed-case>rabic Treebank</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/543_pdf.pdf</url>
@@ -3029,8 +3029,8 @@
       <bibkey>maamouri-etal-2006-developing</bibkey>
     </paper>
     <paper id="330">
-      <author><first>Beáta Bandmann</first><last>Megyesi</last></author>
-      <author><first>Anna Sågvall</first><last>Hein</last></author>
+      <author id="beata-megyesi"><first>Beáta Bandmann</first><last>Megyesi</last></author>
+      <author id="anna-sagvall-hein"><first>Anna Sågvall</first><last>Hein</last></author>
       <author><first>Éva Csató</first><last>Johanson</last></author>
       <title>Building a <fixed-case>S</fixed-case>wedish-<fixed-case>T</fixed-case>urkish Parallel Corpus</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/544_pdf.pdf</url>
@@ -3039,7 +3039,7 @@
     </paper>
     <paper id="331">
       <author><first>Horacio</first><last>Saggion</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <title>Language Resources for Background Gathering</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/545_pdf.pdf</url>
       <abstract>We describe the Cubreporter information access system which allows access to news archives through the use of natural language technology. The system includes advanced text search, question answering, summarization, and entity profiling capabilities. It has been designed taking into account the characteristics of the background gathering task.</abstract>
@@ -3048,8 +3048,8 @@
     <paper id="332">
       <author><first>Julie</first><last>Medero</last></author>
       <author><first>Kazuaki</first><last>Maeda</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
-      <author><first>Christopher</first><last>Walker</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
+      <author id="christopher-r-walker"><first>Christopher</first><last>Walker</last></author>
       <title>An Efficient Approach to Gold-Standard Annotation: Decision Points for Complex Tasks</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/550_pdf.pdf</url>
       <abstract>Inter-annotator consistency is a concern for any corpus building effort relying on human annotation. Adjudication is as effective way to locate and correct discrepancies of various kinds. It can also be both difficult and time-consuming. This paper introduces Linguistic Data Consortium (LDC)s model for decision point-based annotation and adjudication, and describes the annotation tools developed to enable this approach for the Automatic Content Extraction (ACE) Program. Using a customized user interface incorporating decision points, we improved adjudication efficiency over 2004 annotation rates, despite increased annotation task complexity. We examine the factors that lead to more efficient, less demanding adjudication. We further discuss how a decision point model might be applied to annotation tools designed for a wide range of annotation tasks. Finally, we consider issues of annotation tool customization versus development time in the context of a decision point model.</abstract>
@@ -3063,7 +3063,7 @@
       <bibkey>klatt-2006-corpus</bibkey>
     </paper>
     <paper id="334">
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <author><first>Sandra</first><last>Petel</last></author>
       <title>The Ritel Corpus - An annotated Human-Machine open-domain question answering spoken dialog corpus</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/553_pdf.pdf</url>
@@ -3092,14 +3092,14 @@
     <paper id="337">
       <author><first>Anna</first><last>Korhonen</last></author>
       <author><first>Yuval</first><last>Krymolowski</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <title>A Large Subcategorization Lexicon for Natural Language Processing Applications</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/558_pdf.pdf</url>
       <abstract>We introduce a large computational subcategorizationlexicon which includes subcategorization frame (SCF) and frequencyinformation for 6,397 English verbs. This extensive lexicon was acquiredautomatically from five corpora and the Web using the current version of the comprehensive subcategorization acquisition system of Briscoe and Carroll (1997). The lexicon is provided freely for research use, along with a script which can be used to filter and build sub-lexicons suited for different natural languageprocessing (NLP) purposes. Documentation is also provided whichexplains each sub-lexicon option and evaluates its accuracy.</abstract>
       <bibkey>korhonen-etal-2006-large</bibkey>
     </paper>
     <paper id="338">
-      <author><first>Nancy</first><last>Ide</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
       <author><first>Keith</first><last>Suderman</last></author>
       <title>Integrating Linguistic Resources: The <fixed-case>A</fixed-case>merican National Corpus Model</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/560_pdf.pdf</url>
@@ -3107,8 +3107,8 @@
       <bibkey>ide-suderman-2006-integrating</bibkey>
     </paper>
     <paper id="339">
-      <author><first>Nancy</first><last>Ide</last></author>
-      <author><first>Laurent</first><last>Romary</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
       <title>Representing Linguistic Corpora and Their Annotations</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/562_pdf.pdf</url>
       <abstract>A Linguistic Annotation Framework (LAF) is being developed within the International Standards Organization Technical Committee 37 Sub-committee on Language Resource Management (ISO TC37 SC4). LAF is intended to provide a standardized means to represent linguistic data and its annotations that is defined broadly enough to accommodate all types of linguistic annotations, and at the same time provide means to represent precise and potentially complex linguistic information. The general principles informing the design of LAF have been previously reported (Ide and Romary, 2003; Ide and Romary, 2004a). This paper describes some of the more technical aspects of the LAF design that have been addressed in the process of finalizing the specifications for the standard.</abstract>
@@ -3117,7 +3117,7 @@
     <paper id="340">
       <author><first>Zhongqiang</first><last>Huang</last></author>
       <author><first>Lei</first><last>Chen</last></author>
-      <author><first>Mary</first><last>Harper</last></author>
+      <author id="mary-harper"><first>Mary</first><last>Harper</last></author>
       <title>An Open Source Prosodic Feature Extraction Tool</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/565_pdf.pdf</url>
       <abstract>There has been an increasing interest in utilizing a wide variety of knowledge sources in order to perform automatic tagging of speech events, such as sentence boundaries and dialogue acts. In addition to the word spoken, the prosodic content of the speech has been proved quite valuable in a variety of spoken language processing tasks such as sentence segmentation and tagging, disfluency detection, dialog act segmentation and tagging, and speaker recognition. In this paper, we report on an open source prosodic feature extraction tool based on Praat, with a description of the prosodic features and the implementation details, as well as a discussion of its extension capability. We also evaluate our tool on a sentence boundary detection task and report the system performance on the NIST RT04 CTS data.</abstract>
@@ -3142,9 +3142,9 @@
     </paper>
     <paper id="343">
       <author><first>Enrique</first><last>Alfonseca</last></author>
-      <author><first>Antonio</first><last>Moreno-Sandoval</last></author>
-      <author><first>José María</first><last>Guirao</last></author>
-      <author><first>María</first><last>Ruiz-Casado</last></author>
+      <author id="antonio-moreno-sandoval"><first>Antonio</first><last>Moreno-Sandoval</last></author>
+      <author id="jose-m-guirao"><first>José María</first><last>Guirao</last></author>
+      <author id="maria-ruiz-casado"><first>María</first><last>Ruiz-Casado</last></author>
       <title>The wraetlic <fixed-case>NLP</fixed-case> suite</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/569_pdf.pdf</url>
       <abstract>In this paper, we describe the second release of a suite of language analysers, developed over the last five years, called wraetlic, which includes tools for several partial parsing tasks, both for English and Spanish. It has been successfully used in fields such as Information Extraction, thesaurus acquisition, Text Summarisation and Computer Assisted Assessment.</abstract>
@@ -3152,10 +3152,10 @@
     </paper>
     <paper id="344">
       <author><first>Tomoko</first><last>Ohta</last></author>
-      <author><first>Yuka</first><last>Tateisi</last></author>
+      <author id="yuka-tateisi"><first>Yuka</first><last>Tateisi</last></author>
       <author><first>Jin-Dong</first><last>Kim</last></author>
       <author><first>Akane</first><last>Yakushiji</last></author>
-      <author><first>Jun-ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun-ichi</first><last>Tsujii</last></author>
       <title>Linguistic and Biological Annotations of Biological Interaction Events</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/570_pdf.pdf</url>
       <abstract>This paper discusses an augmentation of a corpus ofresearch abstracts in biomedical domain (the GENIA corpus) with two kinds of annotations: tree annotation and event annotation. The tree annotation identifies the linguistic structure that encodes the relations among entities. The event annotation reveals the semantic structure of the biological interaction events encoded in the text. With these annotations we aim to provide a link between the clue and the target of biological event information extraction.</abstract>
@@ -3171,17 +3171,17 @@
       <bibkey>tenfjord-etal-2006-ask</bibkey>
     </paper>
     <paper id="346">
-      <author><first>Ivana</first><last>Kruijff-Korbayová</last></author>
+      <author id="ivana-kruijff-korbayova"><first>Ivana</first><last>Kruijff-Korbayová</last></author>
       <author><first>Klára</first><last>Chvátalová</last></author>
-      <author><first>Oana</first><last>Postolache</last></author>
+      <author id="oana-postolache"><first>Oana</first><last>Postolache</last></author>
       <title>Annotation Guidelines for <fixed-case>C</fixed-case>zech-<fixed-case>E</fixed-case>nglish Word Alignment</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/575_pdf.pdf</url>
       <abstract>We report on our experience with manual alignment of Czech and English parallel corpus text. We applied existing guidelines for English and French and augmented them to cover systematically occurring cases in our corpus. We describe the main extensions covered in our guidelines and provide examples. We evaluated both intra- and inter-annotator agreement and obtained very good results of Kappa well above 0.9 and agreement of 95% and 93%, respectively.</abstract>
       <bibkey>kruijff-korbayova-etal-2006-annotation</bibkey>
     </paper>
     <paper id="347">
-      <author><first>Jérémie</first><last>Segouat</last></author>
-      <author><first>Annelies</first><last>Braffort</last></author>
+      <author id="jeremie-segouat"><first>Jérémie</first><last>Segouat</last></author>
+      <author id="annelies-braffort"><first>Annelies</first><last>Braffort</last></author>
       <author><first>Emilie</first><last>Martin</last></author>
       <title>Sign Language corpus analysis: Synchronisation of linguistic annotation and numerical data</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/576_pdf.pdf</url>
@@ -3193,7 +3193,7 @@
       <author><first>Monte</first><last>George</last></author>
       <author><first>Nicoletta</first><last>Calzolari</last></author>
       <author><first>Monica</first><last>Monachini</last></author>
-      <author><first>Nuria</first><last>Bel</last></author>
+      <author id="nuria-bel"><first>Nuria</first><last>Bel</last></author>
       <author><first>Mandy</first><last>Pet</last></author>
       <author><first>Claudia</first><last>Soria</last></author>
       <title>Lexical Markup Framework (<fixed-case>LMF</fixed-case>)</title>
@@ -3219,7 +3219,7 @@
       <bibkey>pouliquen-etal-2006-geocoding</bibkey>
     </paper>
     <paper id="350">
-      <author><first>Bolette Sandford</first><last>Pedersen</last></author>
+      <author id="bolette-sandford-pedersen"><first>Bolette Sandford</first><last>Pedersen</last></author>
       <title>Query Expansion on Compounds</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/580_pdf.pdf</url>
       <abstract>Compounds constitute a specific issue in search, in particular in languages where they are written in one word, as is the case for Danish and the other Scandinavian languages. For such languages, expansion of the query compound into separate lemmas is a way of finding the often frequent alternative synonymous phrases in which the content of a compound can also be expressed. However, it is crucial to note that the number of irrelevant hits is generally very high when using this expansion strategy. The aim of this paper is to examine how we can obtain better search results on split compounds, partly by looking at the internal structure of the original compound, partly by analyzing the context in which the split compound occurs. We perform an NP analysis and introduce a new, linguistically based threshold for retrieved hits. The results obtained by using this strategy demonstrate that compound splitting combined with a shallow linguistic analysis focusing on the recognition of NPs can improve search by bringing down the number of irrelevant hits.</abstract>
@@ -3237,7 +3237,7 @@
       <bibkey>charoenporn-etal-2006-word</bibkey>
     </paper>
     <paper id="352">
-      <author><first>Wim</first><last>Peters</last></author>
+      <author id="wim-peters"><first>Wim</first><last>Peters</last></author>
       <author><first>Maria Teresa</first><last>Sagri</last></author>
       <author><first>Daniela</first><last>Tiscornia</last></author>
       <author><first>Sara</first><last>Castagnoli</last></author>
@@ -3250,7 +3250,7 @@
       <author><first>Joris</first><last>Vaneyghen</last></author>
       <author><first>Guy</first><last>De Pauw</last></author>
       <author><first>Dirk</first><last>Van Compernolle</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <title>A mixed word / morphological approach for extending <fixed-case>CELEX</fixed-case> for high coverage on contemporary large corpora</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/583_pdf.pdf</url>
       <abstract>This paper describes an alternative approach to morphological language modeling, which incorporates constraints on the morphological production of new words. This is done by applying the constraints as a preprocessing step in which only one morphological production rule can be applied to an extended lexicon of knownmorphemes, lemmas and word forms. This approach is used to extend the CELEX Dutch morphological database, so that a higher coverage can be reached on a largecorpus of Dutch newspaper articles. We present experimental results on the coverage of this extended database and use the extension to further evaluate our morphologicalsystem, as well as the impact of the constraints on the coverage of out-of-vocabulary words.</abstract>
@@ -3287,9 +3287,9 @@
     </paper>
     <paper id="357">
       <author><first>Martí</first><last>Umbert</last></author>
-      <author><first>Asunción</first><last>Moreno</last></author>
+      <author id="asuncion-moreno"><first>Asunción</first><last>Moreno</last></author>
       <author><first>Pablo</first><last>Agüero</last></author>
-      <author><first>Antonio</first><last>Bonafonte</last></author>
+      <author id="antonio-bonafonte"><first>Antonio</first><last>Bonafonte</last></author>
       <title><fixed-case>S</fixed-case>panish Synthesis Corpora</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/590_pdf.pdf</url>
       <abstract>This paper deals with the design of a synthesis database for a high quality corpus-based Speech Synthesis system in Spanish. The database has been designed for speech synthesis, speech conversion and expressive speech. The design follows the specifications of TC-STAR project and has been applied to collect equivalent English and Mandarin synthesis databases. The sentences of the corpus have been selected mainly from transcribed speech and novels. The selection criterion is a phonetic and prosodic coverage. The corpus was completed with sentences specifically designed to cover frequent phrases and words. Two baseline speakers and four bilingual speakers were recorded. Recordings consist of 10 hours of speech for each baseline speaker and one hour of speech for each voice conversion bilingual speaker. The database is labelled and segmented. Pitch marks and phonetic segmentation was done automatically and up to 50% manually supervised. The database will be available at ELRA.</abstract>
@@ -3298,7 +3298,7 @@
     <paper id="358">
       <author><first>Pavel</first><last>Ircing</last></author>
       <author><first>Jan</first><last>Hoidekr</last></author>
-      <author><first>Josef</first><last>Psutka</last></author>
+      <author id="josef-psutka"><first>Josef</first><last>Psutka</last></author>
       <title>Exploiting Linguistic Knowledge in Language Modeling of <fixed-case>C</fixed-case>zech Spontaneous Speech</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/591_pdf.pdf</url>
       <abstract>In our paper, we present a method for incorporating available linguistic information into a statistical language model that is used in ASR system for transcribing spontaneous speech. We employ the class-based language model paradigm and use the morphological tags as the basis for world-to-class mapping. Since the number of different tags is at least by one order of magnitude lower than the number of words even in the tasks with moderately-sized vocabularies, the tag-based model can be rather robustly estimated using even the relatively small text corpora. Unfortunately, this robustness goes hand in hand with restricted predictive ability of the class-based model. Hence we apply the two-pass recognition strategy, where the first pass is performed with the standard word-based n-gram and the resulting lattices are rescored in the second pass using the aforementioned class-based model. Using this decoding scenario, we have managed to moderately improve the word error rate in the performed ASR experiments.</abstract>
@@ -3312,7 +3312,7 @@
       <bibkey>slavcheva-2006-semantic</bibkey>
     </paper>
     <paper id="360">
-      <author><first>Günter</first><last>Neumann</last></author>
+      <author id="gunter-neumann"><first>Günter</first><last>Neumann</last></author>
       <author><first>Berthold</first><last>Crysmann</last></author>
       <title>Exploring <fixed-case>HPSG</fixed-case>-based Treebanks for Probabilistic Parsing <fixed-case>HPSG</fixed-case> grammar extraction</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/595_pdf.pdf</url>
@@ -3328,7 +3328,7 @@
       <bibkey>marinelli-bindi-2006-proper</bibkey>
     </paper>
     <paper id="362">
-      <author><first>Sonja E.</first><last>Bosch</last></author>
+      <author id="sonja-bosch"><first>Sonja E.</first><last>Bosch</last></author>
       <author><first>Laurette</first><last>Pretorius</last></author>
       <author><first>Jackie</first><last>Jones</last></author>
       <title>Towards machine-readable lexicons for <fixed-case>S</fixed-case>outh <fixed-case>A</fixed-case>frican <fixed-case>B</fixed-case>antu languages</title>
@@ -3347,7 +3347,7 @@
       <bibkey>lechenadec-etal-2006-creation</bibkey>
     </paper>
     <paper id="364">
-      <author><first>Yoshihiko</first><last>Hayashi</last></author>
+      <author id="yoshihiko-hayashi"><first>Yoshihiko</first><last>Hayashi</last></author>
       <author><first>Toru</first><last>Ishida</last></author>
       <title>A Dictionary Model for Unifying Machine Readable Dictionaries and Computational Concept Lexicons</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/600_pdf.pdf</url>
@@ -3385,7 +3385,7 @@
     </paper>
     <paper id="368">
       <author><first>Vít</first><last>Nováček</last></author>
-      <author><first>Pavel</first><last>Smrž</last></author>
+      <author id="pavel-smrz"><first>Pavel</first><last>Smrž</last></author>
       <author><first>Jan</first><last>Pomikálek</last></author>
       <title>Text Mining for Semantic Relations as a Support Base of a Scientific Portal Generator</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/606_pdf.pdf</url>
@@ -3393,10 +3393,10 @@
       <bibkey>novacek-etal-2006-text</bibkey>
     </paper>
     <paper id="369">
-      <author><first>Raffaella</first><last>Bernardi</last></author>
-      <author><first>Andrea</first><last>Bolognesi</last></author>
-      <author><first>Corrado</first><last>Seidenari</last></author>
-      <author><first>Fabio</first><last>Tamburini</last></author>
+      <author id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last></author>
+      <author id="andrea-bolognesi"><first>Andrea</first><last>Bolognesi</last></author>
+      <author id="corrado-seidenari"><first>Corrado</first><last>Seidenari</last></author>
+      <author id="fabio-tamburini"><first>Fabio</first><last>Tamburini</last></author>
       <title><fixed-case>POS</fixed-case> tagset design for <fixed-case>I</fixed-case>talian</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/608_pdf.pdf</url>
       <abstract>We aim to automatically induce a PoS tagset for Italian by analysing the distributional behaviour of Italian words. To this end, we propose an algorithm that (a) extracts information from loosely labelled dependency structures that encode only basic and broadly accepted syntactic relations, namely Head/Dependent and the distinction of dependents into Argument vs. Adjunct, and (b) derives a possible set of word classes. The paper reports on some preliminary experiments carried out using the induced tagset in conjunction with state-of-the-art PoS taggers. The method proposed to design a proper tagset exploits little, if any, language-specific knowledge: hence it is in principle applicable to any language.</abstract>
@@ -3407,7 +3407,7 @@
       <author><first>Toru</first><last>Hirano</last></author>
       <author><first>Ryu</first><last>Iida</last></author>
       <author><first>Atsushi</first><last>Fujita</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <title>Augmenting a Semantic Verb Lexicon with a Large Scale Collection of Example Sentences</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/610_pdf.pdf</url>
       <abstract>One of the crucial issues in semantic parsing is how to reduce costs of collecting a sufficiently large amount of labeled data. This paper presents a new approach to cost-saving annotation of example sentences with predicate-argument structure information, taking Japanese as a target language. In this scheme, a large collection of unlabeled examples are first clustered and selectively sampled, and for each sampled cluster, only one representative example is given a label by a human annotator. The advantages of this approach are empirically supported by the results of our preliminary experiments, where we use an existing similarity function and naive sampling strategy.</abstract>
@@ -3424,8 +3424,8 @@
       <bibkey>onelli-etal-2006-diacoris</bibkey>
     </paper>
     <paper id="372">
-      <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>Izaskun</first><last>Aldezabal</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
+      <author id="izaskun-aldezabal"><first>Izaskun</first><last>Aldezabal</last></author>
       <author><first>Jone</first><last>Etxeberria</last></author>
       <author><first>Eli</first><last>Pociello</last></author>
       <title>A Preliminary Study for Building the <fixed-case>B</fixed-case>asque <fixed-case>P</fixed-case>rop<fixed-case>B</fixed-case>ank</title>
@@ -3443,8 +3443,8 @@
       <bibkey>yamamoto-etal-2006-detection</bibkey>
     </paper>
     <paper id="374">
-      <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>Izaskun</first><last>Aldezabal</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
+      <author id="izaskun-aldezabal"><first>Izaskun</first><last>Aldezabal</last></author>
       <author><first>Jone</first><last>Etxeberria</last></author>
       <author><first>Eli</first><last>Izagirre</last></author>
       <author><first>Karmele</first><last>Mendizabal</last></author>
@@ -3490,7 +3490,7 @@
       <bibkey>de-luca-nurnberger-2006-rebuilding</bibkey>
     </paper>
     <paper id="379">
-      <author><first>Pavel</first><last>Smrž</last></author>
+      <author id="pavel-smrz"><first>Pavel</first><last>Smrž</last></author>
       <title>Automatic Acquisition of Semantics-Extraction Patterns</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/621_pdf.pdf</url>
       <abstract>This paper examines the use of parallel and comparable corpora for automatic acquisition of semantics-extraction patterns. It presents a new method of the pattern extraction which takes advantage of parallel texts to "port" text mining solutions from a source language to a target language. It is shown thatthe technique can help in situations when the extraction procedure is to beapplied in a language (languages) with a limited set of available resources,e.g. domain-specific thesauri. The primary motivation of our work lies in a particular multilingual e-learning system. For testing purposes, other applications of the given approach were implemented. They include pattern extraction from general texts (tested on wordnet relations), acquisition of domain-specific patterns from large parallel corpus of legal EU documents, and mining of subjectivity expressions for multilingual opinion extraction system.</abstract>
@@ -3512,10 +3512,10 @@
       <bibkey>draxler-jansch-2006-speech</bibkey>
     </paper>
     <paper id="382">
-      <author><first>Benjamin K.</first><last>Tsou</last></author>
-      <author><first>Tom B.Y.</first><last>Lai</last></author>
+      <author id="benjamin-k-tsou"><first>Benjamin K.</first><last>Tsou</last></author>
+      <author id="tom-b-y-lai"><first>Tom B.Y.</first><last>Lai</last></author>
       <author id="king-kui-sin"><first>K.K.</first><last>Sin</last></author>
-      <author><first>Lawrence Y.L.</first><last>Cheung</last></author>
+      <author id="lawrence-y-l-cheung"><first>Lawrence Y.L.</first><last>Cheung</last></author>
       <title>Court Stenography-To-Text (“<fixed-case>STT</fixed-case>”) in <fixed-case>H</fixed-case>ong <fixed-case>K</fixed-case>ong: A Jurilinguistic Engineering Effort</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/624_pdf.pdf</url>
       <abstract>Implementation of legal bilingualism in Hong Kong after 1997 has necessitated the production of voluminous and extensive court proceedings and judgments in both Chinese and English. For the former, Cantonese, a dialect of Chinese, is the home language of more than 90% of the population in Hong Kong and so used in the courts. To record speech in Cantonese verbatim, a Chinese Computer-Aided Transcription system has been developed. The transcription system converts stenographic codes into Chinese text, i.e. from phonetic to orthographic representation of the language. The main challenge lies in the resolution of the sever ambiguity resulting from homocode problems in the conversion process. Cantonese Chinese is typified by problematic homonymy, which presents serious challenges. The N-gram statistical model is employed to estimate the most probable character string of the input transcription codes. Domain-specific corpora have been compiled to support the statistical computation. To improve accuracy, scalable techniques such as domain-specific transcription and special encoding are used. Put together, these techniques deliver 96% transcription accuracy.</abstract>
@@ -3567,7 +3567,7 @@
       <author><first>Julie</first><last>Mauclair</last></author>
       <author><first>Yannick</first><last>Estève</last></author>
       <author><first>Simon</first><last>Petit-Renaud</last></author>
-      <author><first>Paul</first><last>Deléglise</last></author>
+      <author id="paul-deleglise"><first>Paul</first><last>Deléglise</last></author>
       <title>Automatic Detection of Well Recognized Words in Automatic Speech Transcriptions</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/630_pdf.pdf</url>
       <abstract>This work adresses the use of confidence measures for extracting well recognized words with very low error rate from automatically transcribed segments in a unsupervised way. We present and compare several confidence measures and propose a method to merge them into a new one. We study its capabilities on extracting correct recognized word-segments compared to the amount of rejected words. We apply this fusion measure to select audio segments composed of words with a high confidence score. These segments come from an automatic transcription of french broadcast news given by our speech recognition system based on the CMU Sphinx3.3 decoder. Injecting new data resulting from unsupervised treatments of raw audio recordings in the training corpus of acoustic models gives statistically significant improvement (95% confident interval) in terms of word error rate. Experiments have been carried out on the corpus used during ESTER, the french evaluation campaign.</abstract>
@@ -3597,26 +3597,26 @@
       <bibkey>nimb-2006-lexadv</bibkey>
     </paper>
     <paper id="390">
-      <author><first>Voula</first><last>Giouli</last></author>
-      <author><first>Alexis</first><last>Konstandinidis</last></author>
-      <author><first>Elina</first><last>Desypri</last></author>
-      <author><first>Harris</first><last>Papageorgiou</last></author>
+      <author id="voula-giouli"><first>Voula</first><last>Giouli</last></author>
+      <author id="alexis-konstantinidis"><first>Alexis</first><last>Konstandinidis</last></author>
+      <author id="elina-desipri"><first>Elina</first><last>Desypri</last></author>
+      <author id="harris-papageorgiou"><first>Harris</first><last>Papageorgiou</last></author>
       <title>Multi-domain Multi-lingual Named Entity Recognition: Revisiting &amp; Grounding the resources issue</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/633_pdf.pdf</url>
       <abstract>The paper reports on the development methodology of a system aimed at multi-domain multi-lingual recognition and classification of names in texts, the focus being on the linguistic resources used for training and testing purposes. The corpus presented here has been collected and annotated in the framework of different projects the critical issue being the development of a final resource that is homogenous, re-usable and adaptable to different domains and languages with a view to robust multi-domain and multi-lingual NERC.</abstract>
       <bibkey>giouli-etal-2006-multi</bibkey>
     </paper>
     <paper id="391">
-      <author><first>Rebecca</first><last>Passonneau</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca</first><last>Passonneau</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <title>Inter-annotator Agreement on a Multilingual Semantic Annotation Task</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/634_pdf.pdf</url>
       <abstract>Six sites participated in the Interlingual Annotation of Multilingual Text Corpora (IAMTC) project (Dorr et al., 2004; Farwell et al., 2004; Mitamura et al., 2004). Parsed versions of English translations of news articles in Arabic, French, Hindi, Japanese, Korean and Spanish were annotated by up to ten annotators. Their task was to match open-class lexical items (nouns, verbs, adjectives, adverbs) to one or more concepts taken from the Omega ontology (Philpot et al., 2003), and to identify theta roles for verb arguments. The annotated corpus is intended to be a resource for meaning-based approaches to machine translation. Here we discuss inter-annotator agreement for the corpus. The annotation task is characterized by annotators freedom to select multiple concepts or roles per lexical item. As a result, the annotation categories are sets, the number of which is bounded only by the number of distinct annotator-lexical item pairs. We use a reliability metric designed to handle partial agreement between sets. The best results pertain to the part of the ontology derived from WordNet. We examine change over the course of the project, differences among annotators, and differences across parts of speech. Our results suggest a strong learning effect early in the project.</abstract>
       <bibkey>passonneau-etal-2006-inter</bibkey>
     </paper>
     <paper id="392">
-      <author><first>Rebecca</first><last>Passonneau</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca</first><last>Passonneau</last></author>
       <title>Measuring Agreement on Set-valued Items (<fixed-case>MASI</fixed-case>) for Semantic and Pragmatic Annotation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/636_pdf.pdf</url>
       <abstract>Annotation projects dealing with complex semantic or pragmatic phenomena face the dilemma of creating annotation schemes that oversimplify the phenomena, or that capture distinctions conventional reliability metrics cannot measure adequately. The solution to the dilemma is to develop metrics that quantify the decisions that annotators are asked to make. This paper discusses MASI, distance metric for comparing sets, and illustrates its use in quantifying the reliability of a specific dataset. Annotations of Summary Content Units (SCUs) generate models referred to as pyramids which can be used to evaluate unseen human summaries or machine summaries. The paper presents reliability results for five pairs of pyramids created for document sets from the 2003 Document Understanding Conference (DUC). The annotators worked independently of each other. Differences between application of MASI to pyramid annotation and its previous application to co-reference annotation are discussed. In addition, it is argued that a paradigmatic reliability study should relate measures of inter-annotator agreement to independent assessments, such as significance tests of the annotated variables with respect to other phenomena. In effect, what counts as sufficiently reliable intera-annotator agreement depends on the use the annotated data will be put to.</abstract>
@@ -3633,7 +3633,7 @@
       <author><first>Marc</first><last>Verhagen</last></author>
       <author><first>Robert</first><last>Knippen</last></author>
       <author><first>Inderjeet</first><last>Mani</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <title>Annotation of Temporal Relations with Tango</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/638_pdf.pdf</url>
       <abstract>Temporal annotation is a complex task characterized by low markup speed and low inter-annotator agreements scores. Tango is a graphical annotation tool for temporal relations. It is developed for the TimeML annotation language and allows annotators to build a graph that resembles a timeline. Temporal relations are added by selecting events and drawing labeled arrows between them. Tango is integrated with a temporal closure component and includes features like SmartLink, user prompting and automatic linking of time expressions. Tango has been used to create two corpora with temporal annotation, TimeBank and the AQUAINT Opinion corpus.</abstract>
@@ -3649,14 +3649,14 @@
     <paper id="396">
       <author><first>Uwe</first><last>Quasthoff</last></author>
       <author><first>Matthias</first><last>Richter</last></author>
-      <author><first>Christian</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Christian</first><last>Biemann</last></author>
       <title>Corpus Portal for Search in Monolingual Corpora</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/641_pdf.pdf</url>
       <abstract>A simple and flexible schema for storing and presenting monolingual language resources is proposed. In this format, data for 18 different languages is already available in various sizes. The data is provided free of charge for online use and download. The main target is to ease the application of algorithms for monolingual and interlingual studies.</abstract>
       <bibkey>quasthoff-etal-2006-corpus</bibkey>
     </paper>
     <paper id="397">
-      <author><first>Tristan</first><last>Vanrullen</last></author>
+      <author id="tristan-vanrullen"><first>Tristan</first><last>Vanrullen</last></author>
       <author><first>Philippe</first><last>Blache</last></author>
       <author><first>Jean-Marie</first><last>Balfourier</last></author>
       <title>Constraint-Based Parsing as an Efficient Solution: Results from the Parsing Evaluation Campaign <fixed-case>EAS</fixed-case>y</title>
@@ -3665,24 +3665,24 @@
       <bibkey>vanrullen-etal-2006-constraint</bibkey>
     </paper>
     <paper id="398">
-      <author><first>Andreas</first><last>Eisele</last></author>
+      <author id="andreas-eisele"><first>Andreas</first><last>Eisele</last></author>
       <title>Parallel Corpora and Phrase-Based Statistical Machine Translation for New Language Pairs via Multiple Intermediaries</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/643_pdf.pdf</url>
       <abstract>We present a large parallel corpus of texts published by the United Nations Organization, which we exploit for the creation ofphrase-based statistical machine translation (SMT) systems for new language pairs. We present a setup where phrase tables for these language pairs are used for translation between languages for which parallel corpora of sufficient size are so far not available. We give some preliminary results for this novel application of SMT and discuss further refinements.</abstract>
       <bibkey>eisele-2006-parallel</bibkey>
     </paper>
     <paper id="399">
-      <author><first>Owen</first><last>Rambow</last></author>
-      <author><first>Bonnie</first><last>Dorr</last></author>
-      <author><first>David</first><last>Farwell</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
+      <author id="bonnie-dorr"><first>Bonnie</first><last>Dorr</last></author>
+      <author id="david-farwell"><first>David</first><last>Farwell</last></author>
       <author><first>Rebecca</first><last>Green</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
       <author><first>Stephen</first><last>Helmreich</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
-      <author><first>Keith J.</first><last>Miller</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
+      <author id="keith-j-miller"><first>Keith J.</first><last>Miller</last></author>
       <author><first>Teruko</first><last>Mitamura</last></author>
-      <author><first>Florence</first><last>Reeder</last></author>
+      <author id="florence-reeder"><first>Florence</first><last>Reeder</last></author>
       <author><first>Advaith</first><last>Siddharthan</last></author>
       <title>Parallel Syntactic Annotation of Multiple Languages</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/645_pdf.pdf</url>
@@ -3702,8 +3702,8 @@
       <bibkey>galliano-etal-2006-corpus</bibkey>
     </paper>
     <paper id="401">
-      <author><first>Juan José Rodríguez</first><last>Soler</last></author>
-      <author><first>Pedro Concejero</first><last>Cerezo</last></author>
+      <author id="juan-jose-rodriguez-soler"><first>Juan José Rodríguez</first><last>Soler</last></author>
+      <author id="pedro-concejero-cerezo"><first>Pedro Concejero</first><last>Cerezo</last></author>
       <author><first>Carlos Lázaro</first><last>Ávila</last></author>
       <author><first>Daniel Tapias</first><last>Merino</last></author>
       <title>Usability evaluation of 3<fixed-case>G</fixed-case> multimodal services in Telefónica Móviles España</title>
@@ -3721,7 +3721,7 @@
     </paper>
     <paper id="403">
       <author><first>Porfírio</first><last>Filipe</last></author>
-      <author><first>Nuno</first><last>Mamede</last></author>
+      <author id="nuno-mamede"><first>Nuno</first><last>Mamede</last></author>
       <title>A Framework to Integrate Ubiquitous Knowledge Modeling</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/650_pdf.pdf</url>
       <abstract>This paper describes our contribution to let end users configure mixed-initiative spoken dialogue systems to suit their personalized goals. The main problem that we want to address is the reconfiguration of spoken language dialogue systems to deal with generic plug and play artifacts. Such reconfiguration can be seen as a portability problem and is a critical research issue. In order to solve this problem we describe a hybrid approach to design ubiquitous domain models that allows the dialogue system to perform recognition of available tasks on the fly. Our approach considers two kinds of domain knowledge: the global knowledge and the local knowledge. The global knowledge, that is modeled using a top-down approach, is associated at design time with the dialogue system itself. The local knowledge, that is modeled using a bottom-up approach, is defined with each one of the artifacts. When an artifact is activated or deactivated, a bilateral process, supported by a broker, updates the domain knowledge considering the artifact local knowledge. We assume that everyday artifacts are augmented with computational capabilities and semantic descriptions supported by their own knowledge model. A case study focusing a microwave oven is depicted.</abstract>
@@ -3730,7 +3730,7 @@
     <paper id="404">
       <author><first>Felice</first><last>Dell’Orletta</last></author>
       <author><first>Alessandro</first><last>Lenci</last></author>
-      <author><first>Simonetta</first><last>Montemagni</last></author>
+      <author id="simonetta-montemagni"><first>Simonetta</first><last>Montemagni</last></author>
       <author><first>Vito</first><last>Pirrelli</last></author>
       <title>Searching treebanks for functional constraints: cross-lingual experiments in grammatical relation assignment</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/651_pdf.pdf</url>
@@ -3745,7 +3745,7 @@
       <bibkey>declerck-2006-synaf</bibkey>
     </paper>
     <paper id="406">
-      <author><first>Christelle</first><last>Ayache</last></author>
+      <author id="christelle-ayache"><first>Christelle</first><last>Ayache</last></author>
       <author><first>Brigitte</first><last>Grau</last></author>
       <author><first>Anne</first><last>Vilnat</last></author>
       <title><fixed-case>EQ</fixed-case>ue<fixed-case>R</fixed-case>: the <fixed-case>F</fixed-case>rench Evaluation campaign of Question-Answering Systems</title>
@@ -3754,9 +3754,9 @@
       <bibkey>ayache-etal-2006-equer</bibkey>
     </paper>
     <paper id="407">
-      <author><first>Maria Fernanda Bacelar do</first><last>Nascimento</last></author>
+      <author id="maria-fernanda-bacelar-do-nascimento"><first>Maria Fernanda Bacelar do</first><last>Nascimento</last></author>
       <author><first>José Bettencourt</first><last>Gonçalves</last></author>
-      <author><first>Luísa</first><last>Pereira</last></author>
+      <author id="luisa-pereira"><first>Luísa</first><last>Pereira</last></author>
       <author><first>Antónia</first><last>Estrela</last></author>
       <author><first>Afonso</first><last>Pereira</last></author>
       <author><first>Rui</first><last>Santos</last></author>
@@ -3767,8 +3767,8 @@
       <bibkey>nascimento-etal-2006-african</bibkey>
     </paper>
     <paper id="408">
-      <author><first>Benjamin K.</first><last>Tsou</last></author>
-      <author><first>Oi Yee</first><last>Kwong</last></author>
+      <author id="benjamin-k-tsou"><first>Benjamin K.</first><last>Tsou</last></author>
+      <author id="olivia-o-y-kwong"><first>Oi Yee</first><last>Kwong</last></author>
       <title>Toward a Pan-<fixed-case>C</fixed-case>hinese Thesaurus</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/656_pdf.pdf</url>
       <abstract>In this paper, we propose a corpus-based approach to the construction of a Pan-Chinese lexical resource, starting out with the aim to enrich existing Chinese thesauri in the Pan-Chinese context. The resulting thesaurus is thus expected to contain not only the core senses and usages of Chinese lexical items but also usages specific to individual Chinese speech communities. We introduce the ideas behind the construction of the resource, outline the steps to be taken, and discuss some preliminary analyses. The work is backed up by a unique and large Chinese synchronous corpus containing textual data from various Chinese speech communities including Hong Kong, Beijing, Taipei and Singapore.</abstract>
@@ -3776,7 +3776,7 @@
     </paper>
     <paper id="409">
       <author><first>Nelleke</first><last>Oostdijk</last></author>
-      <author><first>Lou</first><last>Boves</last></author>
+      <author id="lou-boves"><first>Lou</first><last>Boves</last></author>
       <title>User requirements analysis for the design of a reference corpus of written <fixed-case>D</fixed-case>utch</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/657_pdf.pdf</url>
       <abstract>The Dutch Language Corpus Initiative (D-Coi) project aims to specify the design of a 500-million-word reference corpus of written Dutch, and to put the tools and procedures in place that are needed to actually construct such a corpus. One of the tasks in the project is to conduct a user requirements study that should provide the basis for the eventual design of the 500-million-word reference corpus. The present paper outlines the user requirements analysis and reports the results so far.</abstract>
@@ -3802,7 +3802,7 @@
     </paper>
     <paper id="412">
       <author><first>Alberto</first><last>Simões</last></author>
-      <author><first>José João</first><last>Almeida</last></author>
+      <author id="jose-joao-almeida"><first>José João</first><last>Almeida</last></author>
       <title><fixed-case>T</fixed-case>2<fixed-case>O</fixed-case> - Recycling Thesauri into a Multilingual Ontology</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/664_pdf.pdf</url>
       <abstract>In this article we present T2O - a workbench to assist the process of translating heterogeneous resources into ontologies, to enrich and add multilingual information, to help programming with them, and to support ontology publishing. T2O is an ontology algebra.</abstract>
@@ -3816,7 +3816,7 @@
       <bibkey>amsalu-2006-data</bibkey>
     </paper>
     <paper id="414">
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <title><fixed-case>ISA</fixed-case> &amp; <fixed-case>ICA</fixed-case> - Two Web Interfaces for Interactive Alignment of Bitexts alignment of parallel texts</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/667_pdf.pdf</url>
       <abstract>ISA and ICA are two web interfaces for interactive alignment of parallel texts. ISA provides an interface for automatic and manual sentence alignment. It includes cognate filters and uses structural markup to improve automatic alignment and provides intuitive tools for editing them. Alignment results can be saved to disk or sent via e-mail. ICA provides an interface to the clue aligner from the Uplug toolbox. It allows one to set various parameters and visualizes alignment results in a two-dimensional matrix. Word alignments can be edited and saved to disk.</abstract>
@@ -3824,7 +3824,7 @@
     </paper>
     <paper id="415">
       <author><first>Petra-Maria</first><last>Strauß</last></author>
-      <author><first>Holger</first><last>Hoffman</last></author>
+      <author id="holger-hoffmann"><first>Holger</first><last>Hoffman</last></author>
       <author><first>Wolfgang</first><last>Minker</last></author>
       <author><first>Heiko</first><last>Neumann</last></author>
       <author><first>Günther</first><last>Palm</last></author>
@@ -3839,7 +3839,7 @@
       <bibkey>strauss-etal-2006-wizard</bibkey>
     </paper>
     <paper id="416">
-      <author><first>Nancy L.</first><last>Underwood</last></author>
+      <author id="nancy-underwood"><first>Nancy L.</first><last>Underwood</last></author>
       <author><first>Agnes</first><last>Lisowska</last></author>
       <title>The Evolution of an Evaluation Framework for a Text Mining System</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/670_pdf.pdf</url>
@@ -3859,7 +3859,7 @@
       <author><first>Daniel</first><last>Stein</last></author>
       <author><first>Philippe</first><last>Dreuw</last></author>
       <author><first>Morteza</first><last>Zahedi</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <title>A <fixed-case>G</fixed-case>erman <fixed-case>S</fixed-case>ign <fixed-case>L</fixed-case>anguage Corpus of the Domain Weather Report</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/673_pdf.pdf</url>
       <abstract>All systems for automatic sign language translation and recognition, in particular statistical systems, rely on adequately sized corpora. For this purpose, we created the Phoenix corpus that is based on German television weather reports translated into German Sign Language. It comes with a rich annotation of the video data, a bilingual text-based sentence corpus and a monolingual German corpus. All systems for automatic sign language translation and recognition, in particular statistical systems, rely on adequately sized corpora. For this purpose, we created the Phoenix corpus that is based on German television weather reports translated into German Sign Language. It comes with a rich annotation of the video data, a bilingual text-based sentence corpus and a monolingual German corpus.</abstract>
@@ -3868,7 +3868,7 @@
     <paper id="419">
       <author><first>Roberto</first><last>Bartolini</last></author>
       <author><first>Caterina</first><last>Caracciolo</last></author>
-      <author><first>Emiliano</first><last>Giovanetti</last></author>
+      <author id="emiliano-giovannetti"><first>Emiliano</first><last>Giovanetti</last></author>
       <author><first>Alessandro</first><last>Lenci</last></author>
       <author><first>Simone</first><last>Marchi</last></author>
       <author><first>Vito</first><last>Pirrelli</last></author>
@@ -3888,7 +3888,7 @@
       <bibkey>mulloni-pekar-2006-automatic</bibkey>
     </paper>
     <paper id="421">
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Su’ad</first><last>Awab</last></author>
       <title>Open Source Corpus Analysis Tools for <fixed-case>M</fixed-case>alay</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/677_pdf.pdf</url>
@@ -3896,7 +3896,7 @@
       <bibkey>baldwin-awab-2006-open</bibkey>
     </paper>
     <paper id="422">
-      <author><first>Fidelia</first><last>Ibekwe-Sanjuan</last></author>
+      <author id="fidelia-ibekwe-sanjuan"><first>Fidelia</first><last>Ibekwe-Sanjuan</last></author>
       <title>A task-oriented framework for evaluating theme detection systems: A discussion paper</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/678_pdf.pdf</url>
       <abstract>This paper discusses the inherent difficulties in evaluating systems for theme detection. Such systems are based essentially on unsupervised clustering aiming to discover the underlying structure in a corpus of texts. As the structures are precisely unknown beforehand, it is difficult to devise a satisfactory evaluation protocol. Several problems are posed by cluster evaluation: determining the optimal number of clusters, cluster content evaluation, topology of the discovered structure. Each of these problems has been studied separately but some of the proposed metrics portray significant flaws. Moreover, no benchmark has been commonly agreed upon. Finally, it is necessary to distinguish between task-oriented and activity-oriented evaluation as the two frameworks imply different evaluation protocols. Possible solutions to the activity-oriented evaluation can be sought from the data and text mining communities.</abstract>
@@ -3905,7 +3905,7 @@
     <paper id="423">
       <author id="asuncion-moreno"><first>A.</first><last>Moreno</last></author>
       <author><first>Albert</first><last>Febrer</last></author>
-      <author><first>Lluis</first><last>Márquez</last></author>
+      <author id="lluis-marquez"><first>Lluis</first><last>Márquez</last></author>
       <title>Generation of Language Resources for the Development of Speech Technologies in <fixed-case>C</fixed-case>atalan</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/679_pdf.pdf</url>
       <abstract>This paper describes a joint initiative of the Catalan and Spanish Government to produce Language Resources for the Catalan language. A similar methodology to the Basic Language Resource Kit (BLARK) concept was applied to determine the priorities on the production of the Language Resources. The paper shows the LR and tools currently available for the Catalan Language both for Language and Speech technologies. The production of large databases for Automatic Speech Recognition purposes already started. All the resources generated in the project follow EU standards, will be validated by an external centre and will be free and public available through ELRA.</abstract>
@@ -3913,7 +3913,7 @@
     </paper>
     <paper id="424">
       <author><first>Georgiana</first><last>Puşcaşu</last></author>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <title>If “it” were “then”, then when was “it”? Establishing the anaphoric role of “then”</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/680_pdf.pdf</url>
       <abstract>The adverb "then" is among the most frequent Englishtemporal adverbs, being also capable of filling a variety of semantic roles. The identification of anaphoric usages of "then"is important for temporal expression resolution, while thetemporal relationship usage is important for event ordering. Given that previous work has not tackled the identification and temporal resolution of anaphoric "then", this paper presents a machine learning approach for setting apart anaphoric usages and a rule-based normaliser that resolves it with respect to an antecedent. The performance of the two modules is evaluated. The present paper also describes the construction of an annotated corpus and the subsequent derivation of training data required by the machine learning module.</abstract>
@@ -3921,7 +3921,7 @@
     </paper>
     <paper id="425">
       <author><first>Viktor</first><last>Trón</last></author>
-      <author><first>Péter</first><last>Halácsy</last></author>
+      <author id="peter-halacsy"><first>Péter</first><last>Halácsy</last></author>
       <author><first>Péter</first><last>Rebrus</last></author>
       <author><first>András</first><last>Rung</last></author>
       <author><first>Péter</first><last>Vajda</last></author>
@@ -3934,7 +3934,7 @@
     <paper id="426">
       <author id="phuong-le-hong"><last>Le</last><first>H. Phuong</first></author>
       <author id="thi-minh-huyen-nguyen"><last>Nguyen</last><first>T. M. Huyen</first></author>
-      <author><last>Romary</last><first>Laurent</first></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
       <author><last>Roussanaly</last><first>Azim</first></author>
       <title>A <fixed-case>L</fixed-case>exicalized <fixed-case>T</fixed-case>ree-<fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammar for <fixed-case>V</fixed-case>ietnamese</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/685_pdf.pdf</url>
@@ -3990,10 +3990,10 @@
       <bibkey>ueyama-2006-evaluation</bibkey>
     </paper>
     <paper id="433">
-      <author><first>José</first><last>Iria</last></author>
+      <author id="jose-iria"><first>José</first><last>Iria</last></author>
       <author><first>Christopher</first><last>Brewster</last></author>
-      <author><first>Fabio</first><last>Ciravegna</last></author>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="fabio-ciravegna"><first>Fabio</first><last>Ciravegna</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <title>An Incremental Tri-Partite Approach To Ontology Learning</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/700_pdf.pdf</url>
       <abstract>In this paper we present a new approach to ontology learning. Its basis lies in a dynamic and iterative view of knowledge acquisition for ontologies. The Abraxas approach is founded on three resources, a set of texts, a set of learning patterns and a set of ontological triples, each of which must remain in equilibrium. As events occur which disturb this equilibrium various actions are triggered to re- establish a balance between the resources. Such events include acquisition of a further text from external resources such as the Web or the addition of ontological triples to the ontology. We develop the concept of a knowledge gap between the coverage of an ontology and the corpus of texts as a measure triggering actions. We present an overview of the algorithm and its functionalities.</abstract>
@@ -4001,27 +4001,27 @@
     </paper>
     <paper id="434">
       <author><first>Thomas</first><last>Pellegrini</last></author>
-      <author><first>Lori</first><last>Lamel</last></author>
+      <author id="lori-lamel"><first>Lori</first><last>Lamel</last></author>
       <title>Experimental detection of vowel pronunciation variants in <fixed-case>A</fixed-case>mharic</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/701_pdf.pdf</url>
       <abstract>The pronunciation lexicon is a fundamental element in an automatic speech transcription system. It associates each lexical entry (usually a grapheme), with one or more phonemic or phone-like forms, the pronunciation variants. Thorough knowledge of the target language is a priori necessary to establish the pronunciation baseforms and variants. The reliance on human expertise can pose difficulties in developing a system for a language where such knowledge may not be readily available. In this article a speech recognizer is used to help select pronunciation variants in Amharic, the official language of Ethiopia, focusing on alternate choices for vowels. This study is carried out using an audio corpus composed of 37 hours of speech from radio broadcasts which were orthographically transcribed by native speakers. Since the corpus is relatively small for estimating pronunciation variants, a first set of studies were carried out at a syllabic level. Word lexica were then constructed based on the observed syllable occurences. Automatic alignments were compared for lexica containing different vowel variants, with both context-independent and context-dependent acoustic models sets. The variant2+ measure proposed in (Adda-Decker and Lamel, 1999) is used to assess the potential need for pronunciation variants.</abstract>
       <bibkey>pellegrini-lamel-2006-experimental</bibkey>
     </paper>
     <paper id="435">
-      <author><first>Roberta</first><last>Catizone</last></author>
+      <author id="roberta-catizone"><first>Roberta</first><last>Catizone</last></author>
       <author><first>Angelo</first><last>Dalli</last></author>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <title>Evaluating Automatically Generated Timelines from the Web</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/702_pdf.pdf</url>
       <abstract>As web searches increase, there is a need to represent the search results in the most comprehensible way possible. In particular, we focus on search results from queries about people and places. The standard method for presentation of search results is an ordered list determined by the Web search engine. Although this is satisfactory in some cases, when searching for people and places, presenting the information indexed by time may be more desirable. We are developing a system called Cronopath, which generates a timeline of web search engine results by determining the time frame of each document in the collection and linking elements in the timeline to the relevant articles. In this paper, we propose evaluation guidelines for judging the quality of automatically generated timelines based on a set of common features.</abstract>
       <bibkey>catizone-etal-2006-evaluating</bibkey>
     </paper>
     <paper id="436">
-      <author><first>Ivana</first><last>Kruijff-Korbayová</last></author>
+      <author id="ivana-kruijff-korbayova"><first>Ivana</first><last>Kruijff-Korbayová</last></author>
       <author><first>Tilman</first><last>Becker</last></author>
-      <author><first>Nate</first><last>Blaylock</last></author>
+      <author id="nate-blaylock"><first>Nate</first><last>Blaylock</last></author>
       <author><first>Ciprian</first><last>Gerstenberger</last></author>
-      <author><first>Michael</first><last>Kaißer</last></author>
+      <author id="michael-kaisser"><first>Michael</first><last>Kaißer</last></author>
       <author><first>Peter</first><last>Poller</last></author>
       <author><first>Verena</first><last>Rieser</last></author>
       <author><first>Jan</first><last>Schehl</last></author>
@@ -4031,11 +4031,11 @@
       <bibkey>kruijff-korbayova-etal-2006-sammie</bibkey>
     </paper>
     <paper id="437">
-      <author><first>Rebecca</first><last>Passonneau</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca</first><last>Passonneau</last></author>
       <author><first>Roberta</first><last>Blitz</last></author>
-      <author><first>David</first><last>Elson</last></author>
+      <author id="david-elson"><first>David</first><last>Elson</last></author>
       <author><first>Angela</first><last>Giral</last></author>
-      <author><first>Judith</first><last>Klavans</last></author>
+      <author id="judith-l-klavans"><first>Judith</first><last>Klavans</last></author>
       <title><fixed-case>CL</fixed-case>i<fixed-case>MB</fixed-case> <fixed-case>T</fixed-case>ool<fixed-case>K</fixed-case>it: A Case Study of Iterative Evaluation in a Multidisciplinary Project</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/705_pdf.pdf</url>
       <abstract>Digital image collections in libraries and other curatorial institutions grow too rapidly to create new descriptive metadata for subject matter search or browsing. CLiMB (Computational Linguistics for Metadata Building) was a project designed to address this dilemma that involved computer scientists, linguists, librarians, and art librarians. The CLiMB project followed an iterative evaluation model: each next phase of the project emerged from the results of an evaluation. After assembling a suite of text processing tools to be used in extracting metada, we conducted a formative evaluation with thirteen participants, using a survey in which we varied the order and type of four conditions under which respondents would propose or select image search terms. Results of the formative evaluation led us to conclude that a CLiMB ToolKit would work best if its main function was to propose terms for users to review. After implementing a prototype ToolKit using a browser interface, we conducted an evaluation with ten experts. Users found the ToolKit very habitable, remained consistently satisfied throughout a lengthy evaluation, and selected a large number of terms per image.</abstract>
@@ -4043,7 +4043,7 @@
     </paper>
     <paper id="438">
       <author><first>Anna</first><last>Rumshisky</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <title>Inducing Sense-Discriminating Context Patterns from Sense-Tagged Corpora</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/706_pdf.pdf</url>
       <abstract>Traditionally, context features used in word sense disambiguation are based on collocation statistics and use only minimal syntactic and semantic information. Corpus Pattern Analysis is a technique for producing knowledge-rich context features that capture sense distinctions. It involves (1) identifying sense-carrying context patterns and using the derived context features to discriminate between the unseen instances. Both stages require manual seeding. In this paper, we show how to automate inducing sense-discriminating context features from a sense-tagged corpus.</abstract>
@@ -4051,7 +4051,7 @@
     </paper>
     <paper id="439">
       <author><first>Milen</first><last>Kouylekov</last></author>
-      <author><first>Bernardo</first><last>Magnini</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
       <title>Building a Large-Scale Repository of Textual Entailment Rules</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/707_pdf.pdf</url>
       <abstract>Entailment rules are rules where the left hand side (LHS) specifies some knowledge which entails the knowledge expressed n the RHS of the rule, with some degree of confidence. Simple entailment rules can be combined in complex entailment chains, which n turn are at the basis of entailment-based reasoning, which has been recently proposed as a pervasive and application independent approach to Natural Language Understanding. We present the first elease of a large-scale repository of entailment rules at the lexical level, which have been derived from a number of available resources, including WordNet and a word similarity database. Experiments on the PASCAL-RTE dataset show that this resource plays a crucial role in recognizing textual entailment.</abstract>
@@ -4059,18 +4059,18 @@
     </paper>
     <paper id="440">
       <author><first>Alessandro</first><last>Moschitti</last></author>
-      <author><first>Roberto</first><last>Basili</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
       <title>A Tree Kernel approach to Question and Answer Classification in Question Answering Systems</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/708_pdf.pdf</url>
       <abstract>A critical step in Question Answering design is the definition of the models for question focus identification and answer extraction. In case of factoid questions, we can use a question classifier (trained according to a target taxonomy) and a named entity recognizer. Unfortunately, this latter cannot be applied to generate answers related to non-factoid questions. In this paper, we tackle such problem by designing classifiers of non-factoid answers. As the feature design for this learning task is very complex, we take advantage of tree kernels to generate large feature set from the syntactic parse trees of passages relevant to the target question. Such kernels encode syntactic and lexical information in Support Vector Machines which can decide if a sentence focuses on a target taxonomy subject. The experiments with SVMs on the TREC 10 dataset show that our approach is an interesting future research.</abstract>
       <bibkey>moschitti-basili-2006-tree</bibkey>
     </paper>
     <paper id="441">
-      <author><first>Philippe Boula</first><last>de Mareüil</last></author>
+      <author id="philippe-boula-de-mareuil"><first>Philippe Boula</first><last>de Mareüil</last></author>
       <author><first>Christophe</first><last>d’Alessandro</last></author>
       <author><first>Alexander</first><last>Raake</last></author>
       <author><first>Gérard</first><last>Bailly</last></author>
-      <author><first>Marie-Neige</first><last>Garcia</last></author>
+      <author id="marie-neige-garcia"><first>Marie-Neige</first><last>Garcia</last></author>
       <author><first>Michel</first><last>Morel</last></author>
       <title>A joint intelligibility evaluation of <fixed-case>F</fixed-case>rench text-to-speech synthesis systems: the <fixed-case>E</fixed-case>va<fixed-case>S</fixed-case>y <fixed-case>SUS</fixed-case>/<fixed-case>ACR</fixed-case> campaign</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/709_pdf.pdf</url>
@@ -4078,7 +4078,7 @@
       <bibkey>de-mareuil-etal-2006-joint</bibkey>
     </paper>
     <paper id="442">
-      <author><first>Winston N</first><last>Anderson</last></author>
+      <author id="winston-n-anderson"><first>Winston N</first><last>Anderson</last></author>
       <author><first>Petronella M</first><last>Kotzé</last></author>
       <title>Finite state tokenisation of an orthographical disjunctive agglutinative language: The verbal segment of <fixed-case>N</fixed-case>orthern <fixed-case>S</fixed-case>otho</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/710_pdf.pdf</url>
@@ -4111,17 +4111,17 @@
       <bibkey>nitta-etal-2006-building</bibkey>
     </paper>
     <paper id="446">
-      <author><first>Roser</first><last>Saurí</last></author>
+      <author id="roser-sauri"><first>Roser</first><last>Saurí</last></author>
       <author><first>Marc</first><last>Verhagen</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <title><fixed-case>S</fixed-case>link<fixed-case>ET</fixed-case>: A Partial Modal Parser for Events</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/716_pdf.pdf</url>
       <abstract>We present SlinkET, a parser for identifying contexts of event modality in text developed within the TARSQI (Temporal Awareness and Reasoning Systems for Question Interpretation) research framework. SlinkET is grounded on TimeML, a specification language for capturing temporal and event related information in discourse, which provides an adequate foundation to handle event modality. SlinkET builds on top of a robust event recognizer, and provides each relevant event with a value that specifies the degree of certainty about its factuality; e.g., whether it has happened or holds (factive or counter-factive), whether it is being reported or witnessed by somebody else (evidential), or if it is introduced as a possibility (modal). It is based on well-established technology in the field (namely, finite-state techniques), and informed with corpus-induced knowledge that relies on basic information, such as morphological features, POS, and chunking. SlinkET is under continuing development and it currently achieves a performance ratio of 70% F1-measure.</abstract>
       <bibkey>sauri-etal-2006-slinket</bibkey>
     </paper>
     <paper id="447">
-      <author><first>Christopher</first><last>Cieri</last></author>
-      <author><first>Mark</first><last>Liberman</last></author>
+      <author id="christopher-cieri"><first>Christopher</first><last>Cieri</last></author>
+      <author id="mark-liberman"><first>Mark</first><last>Liberman</last></author>
       <title>More Data and Tools for More Languages and Research Areas: A Progress Report on <fixed-case>LDC</fixed-case> Activities</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/717_pdf.pdf</url>
       <abstract>This presentation reports on recent progress the Linguistic Data Consortium has made in addressing the needs of multiple research communities by collecting, annotating and distributing, simplifying access and developing standards and tools. Specifically, it describes new trends in publication, a sample of recent projects and significant improvements to LDC Online that improve access to LDC data especially for those with limited computing support.</abstract>
@@ -4151,7 +4151,7 @@
       <bibkey>voghera-cutugno-2006-observatory</bibkey>
     </paper>
     <paper id="450">
-      <author><first>Kamel</first><last>Smaïli</last></author>
+      <author id="kamel-smaili"><first>Kamel</first><last>Smaïli</last></author>
       <author><first>Caroline</first><last>Lavecchia</last></author>
       <author><first>Jean-Paul</first><last>Haton</last></author>
       <title>Linguistic features modeling based on Partial New Cache</title>
@@ -4160,8 +4160,8 @@
       <bibkey>smaili-etal-2006-linguistic</bibkey>
     </paper>
     <paper id="451">
-      <author><first>Stefan</first><last>Schulz</last></author>
-      <author><first>Kornél</first><last>Markó</last></author>
+      <author id="stefan-schulz"><first>Stefan</first><last>Schulz</last></author>
+      <author id="kornel-marko"><first>Kornél</first><last>Markó</last></author>
       <author><first>Philipp</first><last>Daumke</last></author>
       <author><first>Udo</first><last>Hahn</last></author>
       <author><first>Susanne</first><last>Hanser</last></author>
@@ -4189,7 +4189,7 @@
       <bibkey>uryupina-2006-coreference</bibkey>
     </paper>
     <paper id="454">
-      <author><first>Keith J.</first><last>Miller</last></author>
+      <author id="keith-j-miller"><first>Keith J.</first><last>Miller</last></author>
       <author><first>Michelle</first><last>Vanni</last></author>
       <title>Formal v. Informal: Register-Differentiated <fixed-case>A</fixed-case>rabic <fixed-case>MT</fixed-case> Evaluation in the <fixed-case>PLATO</fixed-case> Paradigm</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/727_pdf.pdf</url>
@@ -4229,7 +4229,7 @@
       <bibkey>schiehlen-spranger-2006-mass</bibkey>
     </paper>
     <paper id="459">
-      <author><first>Andrew W.</first><last>Cole</last></author>
+      <author id="andrew-w-cole"><first>Andrew W.</first><last>Cole</last></author>
       <title>Corpus Development and Publication</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/738_pdf.pdf</url>
       <abstract>This paper will discuss issues relevant to corpus development and publication at the LDC and will illustrate those issues by examining the history of three LDC corpora. This paper will also briefly examine alternative corpus creation and distribution methods and their challenges. The intent of this paper is to increase the available linguistic resources by describing the regulatory and technical environment and thus improving the understanding and interaction between corpus providers and distributors.</abstract>
@@ -4244,7 +4244,7 @@
       <bibkey>gibbon-tseng-2006-discourse</bibkey>
     </paper>
     <paper id="461">
-      <author><first>Leonardo</first><last>Lesmo</last></author>
+      <author id="leonardo-lesmo"><first>Leonardo</first><last>Lesmo</last></author>
       <author><first>Livio</first><last>Robaldo</last></author>
       <title>From Natural Language to Databases via Ontologies</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/740_pdf.pdf</url>
@@ -4272,13 +4272,13 @@
       <bibkey>raake-katz-2006-us</bibkey>
     </paper>
     <paper id="464">
-      <author><first>Stephanie</first><last>Strassel</last></author>
-      <author><first>Christopher</first><last>Cieri</last></author>
-      <author><first>Andrew</first><last>Cole</last></author>
-      <author><first>Denise</first><last>Dipersio</last></author>
-      <author><first>Mark</first><last>Liberman</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
+      <author id="christopher-cieri"><first>Christopher</first><last>Cieri</last></author>
+      <author id="andrew-w-cole"><first>Andrew</first><last>Cole</last></author>
+      <author id="denise-dipersio"><first>Denise</first><last>Dipersio</last></author>
+      <author id="mark-liberman"><first>Mark</first><last>Liberman</last></author>
       <author><first>Xiaoyi</first><last>Ma</last></author>
-      <author><first>Mohamed</first><last>Maamouri</last></author>
+      <author id="mohamed-maamouri"><first>Mohamed</first><last>Maamouri</last></author>
       <author><first>Kazuaki</first><last>Maeda</last></author>
       <title>Integrated Linguistic Resources for Language Exploitation Technologies</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/745_pdf.pdf</url>
@@ -4301,8 +4301,8 @@
       <bibkey>behrens-milde-2006-eclipse</bibkey>
     </paper>
     <paper id="467">
-      <author><first>Harris</first><last>Papageorgiou</last></author>
-      <author><first>Elina</first><last>Desipri</last></author>
+      <author id="harris-papageorgiou"><first>Harris</first><last>Papageorgiou</last></author>
+      <author id="elina-desipri"><first>Elina</first><last>Desipri</last></author>
       <author><first>Maria</first><last>Koutsombogera</last></author>
       <author><first>Kanella</first><last>Pouli</last></author>
       <author><first>Prokopis</first><last>Prokopidis</last></author>
@@ -4321,7 +4321,7 @@
     </paper>
     <paper id="469">
       <author><first>Nella</first><last>Cucurullo</last></author>
-      <author><first>Simonetta</first><last>Montemagni</last></author>
+      <author id="simonetta-montemagni"><first>Simonetta</first><last>Montemagni</last></author>
       <author><first>Matilde</first><last>Paoli</last></author>
       <author><first>Eugenio</first><last>Picchi</last></author>
       <author><first>Eva</first><last>Sassolini</last></author>
@@ -4332,7 +4332,7 @@
     </paper>
     <paper id="470">
       <author><first>Xiaoyi</first><last>Ma</last></author>
-      <author><first>Christopher</first><last>Cieri</last></author>
+      <author id="christopher-cieri"><first>Christopher</first><last>Cieri</last></author>
       <title>Corpus Support for Machine Translation at <fixed-case>LDC</fixed-case></title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/754_pdf.pdf</url>
       <abstract>This paper describes LDC's efforts in collecting, creating and processing different types of linguistic data, including lexicons, parallel text, multiple translation corpora, and human assessment of translation quality, to support the research and development in Machine Translation. Through a combination of different procedures and core technologies, the LDC was able to create very large, high quality, and cost-efficient corpora, which have contributed significantly to recent advances in Machine Translation. Multiple translation corpora and human assessment together facilitate, validate and improve automatic evaluation metrics, which are vital to the development of MT systems. The Bilingual Internet Text Search (BITS) and Champollion sentence aligner enable the finding and processing of large quantities of parallel text. All specifications and tools used by LDC and described in the paper are or will be available to the general public.</abstract>
@@ -4340,12 +4340,12 @@
     </paper>
     <paper id="471">
       <author><first>Ann</first><last>Bies</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <author><first>Haejoong</first><last>Lee</last></author>
       <author><first>Kazuaki</first><last>Maeda</last></author>
       <author><first>Seth</first><last>Kulick</last></author>
       <author id="yang-liu-icsi"><first>Yang</first><last>Liu</last></author>
-      <author><first>Mary</first><last>Harper</last></author>
+      <author id="mary-harper"><first>Mary</first><last>Harper</last></author>
       <author><first>Matthew</first><last>Lease</last></author>
       <title>Linguistic Resources for Speech Parsing</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/755_pdf.pdf</url>
@@ -4353,7 +4353,7 @@
       <bibkey>bies-etal-2006-linguistic</bibkey>
     </paper>
     <paper id="472">
-      <author><first>Tomasz</first><last>Obrębski</last></author>
+      <author id="tomasz-obrebski"><first>Tomasz</first><last>Obrębski</last></author>
       <author><first>Michał</first><last>Stolarski</last></author>
       <title><fixed-case>UAM</fixed-case> Text Tools - a flexible <fixed-case>NLP</fixed-case> architecture</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/756_pdf.pdf</url>
@@ -4390,9 +4390,9 @@
       <bibkey>habash-etal-2006-design</bibkey>
     </paper>
     <paper id="476">
-      <author><first>Grażyna</first><last>Vetulani</last></author>
+      <author id="grazyna-vetulani"><first>Grażyna</first><last>Vetulani</last></author>
       <author><first>Zygmunt</first><last>Vetulani</last></author>
-      <author><first>Tomasz</first><last>Obrębski</last></author>
+      <author id="tomasz-obrebski"><first>Tomasz</first><last>Obrębski</last></author>
       <title>Syntactic Lexicon of <fixed-case>P</fixed-case>olish Predicative Nouns</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/760_pdf.pdf</url>
       <abstract>In the paper we report realization of SyntLex project aiming at construction of a full lexicon grammar for Polish. The lexicon-grammar based paradigm in computer linguistics is derived from the predicate logic and attributes a central role to the predicative constructions. An important class of syntactic constructions in many languages (French, English, Polish and other Slavonic languages in particular) are those based on verbo-nominal collocations, with the verb playing a support role with respect to the noun considered as carrying the predicative information. In this paper we refer to the former research by one of the authors aiming at full description of verbo-nominal predicative constructions for Polish in the form of an electronic resource for LI applications. We describe procedures to complete and corpus-validate the resource obtained so far.</abstract>
@@ -4406,8 +4406,8 @@
       <bibkey>alonge-2006-italian</bibkey>
     </paper>
     <paper id="478">
-      <author><first>José</first><last>Iria</last></author>
-      <author><first>Fabio</first><last>Ciravegna</last></author>
+      <author id="jose-iria"><first>José</first><last>Iria</last></author>
+      <author id="fabio-ciravegna"><first>Fabio</first><last>Ciravegna</last></author>
       <title>A Methodology and Tool for Representing Language Resources for Information Extraction</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/765_pdf.pdf</url>
       <abstract>In recent years there has been a growing interest in clarifying the process of Information Extraction (IE) from documents, particularly when coupled with Machine Learning. We believe that a fundamental step forward in clarifying the IE process would be to be able to perform comparative evaluations on the use of different representations. However, this is difficult because most of the time the way information is represented is too tightly coupled with the algorithm at an implementation level, making it impossible to vary representation while keeping the algorithm constant. A further motivation behind our work is to reduce the complexity of designing, developing and testing IE systems. The major contribution of this work is in defining a methodology and providing a software infrastructure for representing language resources independently of the algorithm, mainly for Information Extraction but with application in other fields - we are currently evaluating its use for ontology learning and document classification.</abstract>
@@ -4421,7 +4421,7 @@
       <bibkey>halpin-2006-automatic</bibkey>
     </paper>
     <paper id="480">
-      <author><first>Harry</first><last>Bunt</last></author>
+      <author id="harry-bunt"><first>Harry</first><last>Bunt</last></author>
       <author><first>Amanda</first><last>Schiffrin</last></author>
       <title>Methodological Aspects of Semantic Annotation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/769_pdf.pdf</url>
@@ -4430,7 +4430,7 @@
     </paper>
     <paper id="481">
       <author><first>Whitney</first><last>Gegg-Harrison</last></author>
-      <author><first>Donna K.</first><last>Byron</last></author>
+      <author id="donna-byron"><first>Donna K.</first><last>Byron</last></author>
       <title><fixed-case>PYCOT</fixed-case>: An <fixed-case>O</fixed-case>ptimality <fixed-case>T</fixed-case>heory-based Pronoun Resolution Toolkit</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/770_pdf.pdf</url>
       <abstract>In this paper, we present PYCOT, a pronoun resolution toolkit. This toolkit is written in the Python programming language and is intended to be an addition to the open-source NLTK collection of natural language processing tools. We discuss the design of the module as well as studies of its performance on pronoun resolution in English and in Korean.</abstract>
@@ -4439,9 +4439,9 @@
     <paper id="482">
       <author><first>Emma</first><last>Barker</last></author>
       <author><first>Ryuichiro</first><last>Higashinaka</last></author>
-      <author><first>François</first><last>Mairesse</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="francois-mairesse"><first>François</first><last>Mairesse</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <author><first>Jonathan</first><last>Foster</last></author>
       <title>Simulating Cub Reporter Dialogues: The collection of naturalistic human-human dialogues for information access to text archives</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/772_pdf.pdf</url>
@@ -4451,7 +4451,7 @@
     <paper id="483">
       <author><first>Jeongwoo</first><last>Ko</last></author>
       <author><first>Laurie</first><last>Hiyakumoto</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <title>Exploiting Multiple Semantic Resources for Answer Selection</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/774_pdf.pdf</url>
       <abstract>This paper describes the utility of semantic resources such as the Web, WordNet and gazetteers in the answer selection process for a question-answering system. In contrast with previous work using individual semantic resources to support answer selection, our work combines multiple resources to boost the confidence scores assigned to correct answers and evaluates different combination strategies based on unweighted sums, weighted linear combinations, and logistic regression. We apply our approach to select answers from candidates produced by three different extraction techniques of varying quality, focusing on TREC questions whose answers represent locations or proper-names. Our experimental results demonstrate that the combination of semantic resources is more effective than individual resources for all three extraction techniques, improving answer selection accuracy by as much as 32.35% for location questions and 72% for proper-name questions. Of the combination strategies tested, logistic regression models produced the best results for both location and proper-name questions.</abstract>
@@ -4459,7 +4459,7 @@
     </paper>
     <paper id="484">
       <author><first>Kazuaki</first><last>Maeda</last></author>
-      <author><first>Christopher</first><last>Cieri</last></author>
+      <author id="christopher-cieri"><first>Christopher</first><last>Cieri</last></author>
       <author><first>Kevin</first><last>Walker</last></author>
       <title>Low-cost Customized Speech Corpus Creation for Speech Technology Applications</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/776_pdf.pdf</url>
@@ -4478,7 +4478,7 @@
       <author><first>Christoph</first><last>Benzmüller</last></author>
       <author><first>Helmut</first><last>Horacek</last></author>
       <author><first>Henri</first><last>Lesourd</last></author>
-      <author><first>Ivana</first><last>Kruijff-Korbayova</last></author>
+      <author id="ivana-kruijff-korbayova"><first>Ivana</first><last>Kruijff-Korbayova</last></author>
       <author><first>Marvin</first><last>Schiller</last></author>
       <author><first>Magdalena</first><last>Wolska</last></author>
       <title>A corpus of tutorial dialogs on theorem proving; the influence of the presentation of the study-material</title>
@@ -4489,7 +4489,7 @@
     <paper id="487">
       <author><first>Jamal</first><last>Laoudi</last></author>
       <author><first>Calandra R.</first><last>Tate</last></author>
-      <author><first>Clare R.</first><last>Voss</last></author>
+      <author id="clare-voss"><first>Clare R.</first><last>Voss</last></author>
       <title>Task-based <fixed-case>MT</fixed-case> Evaluation: From Who/When/Where Extraction to Event Understanding</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/779_pdf.pdf</url>
       <abstract>Task-based machine translation (MT) evaluation asks, how well do people perform text-handling tasks given MT output? This method of evaluation yields an extrinsic assessment of an MT engine, in terms of users task performance on MT output. While this method is time-consuming, its key advantage is that MT users and stakeholders understand how to interpret the assessment results. Prior experiments showed that subjects can extract individual who-, when-, and where-type elements of information from MT output passages that were not especially fluent. This paper presents the results of a pilot study to assess a slightly more complex task: when given such wh-items already identified in an MT output passage, how well can subjects properly select from and place these items into wh-typed slots to complete a sentence-template about the passages event? The results of the pilot with nearly sixty subjects, while only preliminary, indicate that this task was extremely challenging: given six test templates to complete, half of the subjects had no completely correct templates and 42% had exactly one completely correct template. The provisional interpretation of this pilot study is that event-based template completion defines a task ceiling, against which to evaluate future improvements on MT engines.</abstract>
@@ -4499,7 +4499,7 @@
       <author><first>Kazuaki</first><last>Maeda</last></author>
       <author><first>Haejoong</first><last>Lee</last></author>
       <author><first>Julie</first><last>Medero</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <title>A New Phase in Annotation Tool Development at the <fixed-case>L</fixed-case>inguistic <fixed-case>D</fixed-case>ata <fixed-case>C</fixed-case>onsortium: The Evolution of the Annotation Graph Toolkit</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/780_pdf.pdf</url>
       <abstract>The Linguistic Data Consortium (LDC) has created various annotated linguistic data for a variety of common task evaluation programs and projects to create shared linguistic resources. The majority of these annotated linguistic data were created with highly customized annotation tools developed at LDC. The Annotation Graph Toolkit (AGTK) has been used as a primary infrastructure for annotation tool development at LDC in recent years. Thanks to the direct feedback from annotation task designers and annotators in-house, annotation tool development at LDC has entered a new, more mature and productive phase. This paper describes recent additions to LDC's annotation tools that are newly developed or significantly improved since our last report at the Fourth International Conference on Language Resource and Evaluation Conference in 2004. These tools are either directly based on AGTK or share a common philosophy with other AGTK tools.</abstract>
@@ -4519,7 +4519,7 @@
       <author><first>Jeongwoo</first><last>Ko</last></author>
       <author><first>Fumihiko</first><last>Murase</last></author>
       <author><first>Teruko</first><last>Mitamura</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <author><first>Masahiko</first><last>Tateishi</last></author>
       <author><first>Ichiro</first><last>Akahori</last></author>
       <title>Analyzing the Effects of Spoken Dialog Systems on Driving Behavior</title>
@@ -4540,7 +4540,7 @@
     </paper>
     <paper id="492">
       <author><first>Vasile</first><last>Rus</last></author>
-      <author><first>Art</first><last>Graesser</last></author>
+      <author id="arthur-c-graesser"><first>Art</first><last>Graesser</last></author>
       <title>The Look and Feel of a Confident Entailer</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/788_pdf.pdf</url>
       <abstract>The paper presents a software system that embodies a lexico-syntactic approach to the task of Textual Entailment. Although the approach is based on a minimal set of resources it is highly confident. The architecture of the system is open and can be easily expanded with more and deeper processing modules. Results on a standard data set are presented.</abstract>
@@ -4555,24 +4555,24 @@
       <bibkey>marton-katz-2006-using</bibkey>
     </paper>
     <paper id="494">
-      <author><first>Finley</first><last>Lacatusu</last></author>
+      <author id="finley-lacatusu"><first>Finley</first><last>Lacatusu</last></author>
       <author><first>Andrew</first><last>Hickl</last></author>
-      <author><first>Sanda</first><last>Harabagiu</last></author>
+      <author id="sanda-harabagiu"><first>Sanda</first><last>Harabagiu</last></author>
       <title>Impact of Question Decomposition on the Quality of Answer Summaries</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/792_pdf.pdf</url>
       <abstract>Generating answers to complex questions in the form of multi-document summaries requires access to question decomposition methods. In this paper we present three methods for decomposing complex questions and we evaluate their impact on the responsiveness of the answers they enable.</abstract>
       <bibkey>lacatusu-etal-2006-impact</bibkey>
     </paper>
     <paper id="495">
-      <author><first>Sanda</first><last>Harabagiu</last></author>
-      <author><first>Cosmin Adrian</first><last>Bejan</last></author>
+      <author id="sanda-harabagiu"><first>Sanda</first><last>Harabagiu</last></author>
+      <author id="cosmin-adrian-bejan"><first>Cosmin Adrian</first><last>Bejan</last></author>
       <title>An Answer Bank for Temporal Inference</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/794_pdf.pdf</url>
       <abstract>Answering questions that ask about temporal information involves several forms of inference. In order to develop question answering capabilities that benefit from temporal inference, we believe that a large corpus of questions and answers that are discovered based on temporal information should be available. This paper describes our methodology for creating AnswerTime-Bank, a large corpus of questions and answers on which Question Answering systems can operate using complex temporal inference.</abstract>
       <bibkey>harabagiu-bejan-2006-answer</bibkey>
     </paper>
     <paper id="496">
-      <author><first>Paul C.</first><last>Morărescu</last></author>
+      <author id="paul-morarescu"><first>Paul C.</first><last>Morărescu</last></author>
       <title>Principles for annotating and reasoning with spatial information</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/795_pdf.pdf</url>
       <abstract>In this paper we present the first phase of the ongoing SpaceBank project that attempts to create a linguistic resource for annotating and reasoning with spatial information from text. SpaceBank is the spatial counterpart of TimeBank, an electronic resource for temporal semantics and reasoning. The paper focuses on building an ontology of lexicalized spatial concepts. The textual occurrences of the concepts in this ontology will be annotated using the SpaceML language, briefly described here. SpaceBank is designed to be integrated with TimeBank, for a spatio-temporal model of the textual information.</abstract>
@@ -4580,7 +4580,7 @@
     </paper>
     <paper id="497">
       <author><first>Sujian</first><last>Li</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <author><first>Wenjie</first><last>Li</last></author>
       <author><first>Ruifeng</first><last>Xu</last></author>
       <title>Interaction between Lexical Base and Ontology with Formal Concept Analysis</title>
@@ -4598,7 +4598,7 @@
     </paper>
     <paper id="499">
       <author><first>Ruifeng</first><last>Xu</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <author><first>Sujian</first><last>Li</last></author>
       <title>The Design and Construction of A <fixed-case>C</fixed-case>hinese Collocation Bank</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/799_pdf.pdf</url>
@@ -4614,8 +4614,8 @@
     </paper>
     <paper id="501">
       <author><last>Nimaan</last><first>Abdillahi</first></author>
-      <author><last>Nocera</last><first>Pascal</first></author>
-      <author><last>Bonastre</last><first>Jean-François</first></author>
+      <author id="pascal-nocera"><first>Pascal</first><last>Nocera</last></author>
+      <author id="jean-francois-bonastre"><first>Jean-François</first><last>Bonastre</last></author>
       <title>Towards automatic transcription of <fixed-case>S</fixed-case>omali language</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/801_pdf.pdf</url>
       <abstract>Most African countries follow an oral tradition system to transmit their cultural, scientific and historic heritage through generations. This ancestral knowledge accumulated during centuries is today threatened of disappearing. This paper presents the first steps in the building of an automatic speech to text transcription for African oral patrimony, particularly the Djibouti cultural heritage. This work is dedicated to process Somali language, which represents half of the targeted Djiboutian audio archives. The main problem is the lack of annotated audio and textual resources for this language. We describe the principal characteristics of audio (10 hours) and textual (3M words) training corpora collected. Using the large vocabulary speech recognizer engine, Speeral, developed at the Laboratoire Informatique dAvignon (LIA) (computer science laboratory of Avignon), we obtain about 20.9% word error rate (WER). This is an encouraging result, considering the small size of our corpora. This first recognizer of Somali language will serve as a reference and will be used to transcribe some Djibouti cultural archives. We will also discuss future ways of research like sub-words indexing of audio archives, related to the specificities of the Somali language.</abstract>
@@ -4640,8 +4640,8 @@
     </paper>
     <paper id="504">
       <author><first>Sabri</first><last>Elkateb</last></author>
-      <author><first>William</first><last>Black</last></author>
-      <author><first>Horacio</first><last>Rodríguez</last></author>
+      <author id="william-j-black"><first>William</first><last>Black</last></author>
+      <author id="horacio-rodriguez"><first>Horacio</first><last>Rodríguez</last></author>
       <author><first>Musa</first><last>Alkhalifa</last></author>
       <author><first>Piek</first><last>Vossen</last></author>
       <author><first>Adam</first><last>Pease</last></author>
@@ -4652,7 +4652,7 @@
       <bibkey>elkateb-etal-2006-building</bibkey>
     </paper>
     <paper id="505">
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <author><first>Pierre</first><last>Boullier</last></author>
       <title>Deep non-probabilistic parsing of large corpora</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/806_pdf.pdf</url>
@@ -4670,11 +4670,11 @@
       <bibkey>brekke-etal-2006-automatic</bibkey>
     </paper>
     <paper id="507">
-      <author><first>Alex</first><last>Klassmann</last></author>
+      <author id="alex-klassmann"><first>Alex</first><last>Klassmann</last></author>
       <author><first>Freddy</first><last>Offenga</last></author>
-      <author><first>Daan</first><last>Broeder</last></author>
-      <author><first>Romuald</first><last>Skiba</last></author>
-      <author><first>Peter</first><last>Wittenburg</last></author>
+      <author id="daan-broeder"><first>Daan</first><last>Broeder</last></author>
+      <author id="romuald-skiba"><first>Romuald</first><last>Skiba</last></author>
+      <author id="peter-wittenburg"><first>Peter</first><last>Wittenburg</last></author>
       <title>Comparison of Resource Discovery Methods</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/808_pdf.pdf</url>
       <abstract>It is an ongoing debate whether categorical systems created by some experts are an appropriate way to help users finding useful resources in the internet. However for the much more restricted domain of language documentation such a category system might still prove reasonable if not indispensable. This article gives an overview over the particular IMDI category set and presents a rough evaluation of its practical use at the Max-Planck-Institute Nijmegen.</abstract>
@@ -4691,9 +4691,9 @@
       <bibkey>lucas-etal-2006-information</bibkey>
     </paper>
     <paper id="509">
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <author><first>Lionel</first><last>Clément</last></author>
-      <author><first>Éric</first><last>Villemonte de La Clergerie</last></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Éric</first><last>Villemonte de La Clergerie</last></author>
       <author><first>Pierre</first><last>Boullier</last></author>
       <title>The Lefff 2 syntactic lexicon for <fixed-case>F</fixed-case>rench: architecture, acquisition, use</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/810_pdf.pdf</url>
@@ -4716,8 +4716,8 @@
       <bibkey>geyken-schrader-2006-lexikonet</bibkey>
     </paper>
     <paper id="512">
-      <author><first>Djamel</first><last>Mostefa</last></author>
-      <author><first>Olivier</first><last>Hamon</last></author>
+      <author id="djamel-mostefa"><first>Djamel</first><last>Mostefa</last></author>
+      <author id="olivier-hamon"><first>Olivier</first><last>Hamon</last></author>
       <author><first>Khalid</first><last>Choukri</last></author>
       <title>Evaluation of Automatic Speech Recognition and Speech Language Translation within <fixed-case>TC</fixed-case>-<fixed-case>STAR</fixed-case>:Results from the first evaluation campaign</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/813_pdf.pdf</url>
@@ -4725,8 +4725,8 @@
       <bibkey>mostefa-etal-2006-evaluation</bibkey>
     </paper>
     <paper id="513">
-      <author><first>Djamel</first><last>Mostefa</last></author>
-      <author><first>Marie-Neige</first><last>Garcia</last></author>
+      <author id="djamel-mostefa"><first>Djamel</first><last>Mostefa</last></author>
+      <author id="marie-neige-garcia"><first>Marie-Neige</first><last>Garcia</last></author>
       <author><first>Khalid</first><last>Choukri</last></author>
       <title>Evaluation of multimodal components within <fixed-case>CHIL</fixed-case>: The evaluation packages and results</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/814_pdf.pdf</url>
@@ -4741,13 +4741,13 @@
       <bibkey>peters-2006-impact</bibkey>
     </paper>
     <paper id="515">
-      <author><first>Bernardo</first><last>Magnini</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
       <author><first>Danilo</first><last>Giampiccolo</last></author>
       <author><first>Lili</first><last>Aunimo</last></author>
-      <author><first>Christelle</first><last>Ayache</last></author>
+      <author id="christelle-ayache"><first>Christelle</first><last>Ayache</last></author>
       <author><first>Petya</first><last>Osenova</last></author>
-      <author><first>Anselmo</first><last>Peñas</last></author>
-      <author><first>Maarten</first><last>de Rijke</last></author>
+      <author id="anselmo-penas"><first>Anselmo</first><last>Peñas</last></author>
+      <author id="maarten-de-rijke"><first>Maarten</first><last>de Rijke</last></author>
       <author><first>Bogdan</first><last>Sacaleanu</last></author>
       <author><first>Diana</first><last>Santos</last></author>
       <author><first>Richard</first><last>Sutcliffe</last></author>
@@ -4757,10 +4757,10 @@
       <bibkey>magnini-etal-2006-multilingual</bibkey>
     </paper>
     <paper id="516">
-      <author><first>Marie-Neige</first><last>Garcia</last></author>
+      <author id="marie-neige-garcia"><first>Marie-Neige</first><last>Garcia</last></author>
       <author><first>Christophe</first><last>d’Alessandro</last></author>
       <author><first>Gérard</first><last>Bailly</last></author>
-      <author><first>Philippe</first><last>Boula de Mareüil</last></author>
+      <author id="philippe-boula-de-mareuil"><first>Philippe</first><last>Boula de Mareüil</last></author>
       <author><first>Michel</first><last>Morel</last></author>
       <title>A joint prosody evaluation of <fixed-case>F</fixed-case>rench text-to-speech synthesis systems</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/817_pdf.pdf</url>
diff --git a/data/xml/L08.xml b/data/xml/L08.xml
index b14f218aea..53b8764701 100644
--- a/data/xml/L08.xml
+++ b/data/xml/L08.xml
@@ -3,13 +3,13 @@
   <volume id="1" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Sixth International Conference on Language Resources and Evaluation (<fixed-case>LREC</fixed-case>'08)</booktitle>
-      <editor><first>Nicoletta</first><last>Calzolari</last></editor>
-      <editor><first>Khalid</first><last>Choukri</last></editor>
-      <editor><first>Bente</first><last>Maegaard</last></editor>
-      <editor><first>Joseph</first><last>Mariani</last></editor>
-      <editor><first>Jan</first><last>Odijk</last></editor>
-      <editor><first>Stelios</first><last>Piperidis</last></editor>
-      <editor><first>Daniel</first><last>Tapias</last></editor>
+      <editor id="nicoletta-calzolari"><first>Nicoletta</first><last>Calzolari</last></editor>
+      <editor id="khalid-choukri"><first>Khalid</first><last>Choukri</last></editor>
+      <editor id="bente-maegaard"><first>Bente</first><last>Maegaard</last></editor>
+      <editor id="joseph-mariani"><first>Joseph</first><last>Mariani</last></editor>
+      <editor id="jan-odijk"><first>Jan</first><last>Odijk</last></editor>
+      <editor id="stelios-piperidis"><first>Stelios</first><last>Piperidis</last></editor>
+      <editor id="daniel-tapias"><first>Daniel</first><last>Tapias</last></editor>
       <publisher>European Language Resources Association (ELRA)</publisher>
       <address>Marrakech, Morocco</address>
       <month>May</month>
@@ -22,7 +22,7 @@
     <paper id="1">
       <author><first>Kathrin</first><last>Eichler</last></author>
       <author><first>Holmer</first><last>Hemsen</last></author>
-      <author><first>Günter</first><last>Neumann</last></author>
+      <author id="gunter-neumann"><first>Günter</first><last>Neumann</last></author>
       <title>Unsupervised Relation Extraction From Web Documents</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/425_paper.pdf</url>
       <abstract>The IDEX system is a prototype of an interactive dynamic Information Extraction (IE) system. A user of the system expresses an information request in the form of a topic description, which is used for an initial search in order to retrieve a relevant set of documents. On basis of this set of documents, unsupervised relation extraction and clustering is done by the system. The results of these operations can then be interactively inspected by the user. In this paper we describe the relation extraction and clustering components of the IDEX system. Preliminary evaluation results of these components are presented and an overview is given of possible enhancements to improve the relation extraction and clustering components.</abstract>
@@ -30,7 +30,7 @@
     </paper>
     <paper id="2">
       <author><first>Muath</first><last>Alzghool</last></author>
-      <author><first>Diana</first><last>Inkpen</last></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last></author>
       <title>Combining Multiple Models for Speech Information Retrieval</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/45_paper.pdf</url>
       <abstract>In this article we present a method for combining different information retrieval models in order to increase the retrieval performance in a Speech Information Retrieval task. The formulas for combining the models are tuned on training data. Then the system is evaluated on test data. The task is particularly difficult because the text collection is automatically transcribed spontaneous speech, with many recognition errors. Also, the topics are real information needs, difficult to satisfy. Information Retrieval systems are not able to obtain good results on this data set, except for the case when manual summaries are included.</abstract>
@@ -46,9 +46,9 @@
     </paper>
     <paper id="4">
       <author><first>Cvetana</first><last>Krstev</last></author>
-      <author><first>Ranka</first><last>Stanković</last></author>
+      <author id="ranka-stankovic"><first>Ranka</first><last>Stanković</last></author>
       <author><first>Duško</first><last>Vitas</last></author>
-      <author><first>Ivan</first><last>Obradović</last></author>
+      <author id="ivan-obradovic"><first>Ivan</first><last>Obradović</last></author>
       <title>The Usage of Various Lexical Resources and Tools to Improve the Performance of Web Search Engines</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/67_paper.pdf</url>
       <abstract>In this paper we present how resources and tools developed within the Human Language Technology Group at the University of Belgrade can be used for tuning queries before submitting them to a web search engine. We argue that the selection of words chosen for a query, which are of paramount importance for the quality of results obtained by the query, can be substantially improved by using various lexical resources, such as morphological dictionaries and wordnets. These dictionaries enable semantic and morphological expansion of the query, the latter being very important in highly inflective languages, such as Serbian. Wordnets can also be used for adding another language to a query, if appropriate, thus making the query bilingual. Problems encountered in retrieving documents of interest are discussed and illustrated by examples. A brief description of resources is given, followed by an outline of the web tool which enables their integration. Finally, a set of examples is chosen in order to illustrate the use of the lexical resources and tool in question. Results obtained for these examples show that the number of documents obtained through a query by using our approach can double and even quadruple in some cases.</abstract>
@@ -57,13 +57,13 @@
     <paper id="5">
       <author><first>Steven</first><last>Bird</last></author>
       <author><first>Robert</first><last>Dale</last></author>
-      <author><first>Bonnie</first><last>Dorr</last></author>
+      <author id="bonnie-dorr"><first>Bonnie</first><last>Dorr</last></author>
       <author><first>Bryan</first><last>Gibson</last></author>
       <author><first>Mark</first><last>Joseph</last></author>
       <author><first>Min-Yen</first><last>Kan</last></author>
       <author><first>Dongwon</first><last>Lee</last></author>
       <author><first>Brett</first><last>Powley</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <author><first>Yee Fan</first><last>Tan</last></author>
       <title>The <fixed-case>ACL</fixed-case> <fixed-case>A</fixed-case>nthology Reference Corpus: A Reference Dataset for Bibliographic Research in Computational Linguistics</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/445_paper.pdf</url>
@@ -72,15 +72,15 @@
     </paper>
     <paper id="6">
       <author><first>Marian</first><last>Reed</last></author>
-      <author><first>Denise</first><last>DiPersio</last></author>
-      <author><first>Christopher</first><last>Cieri</last></author>
+      <author id="denise-dipersio"><first>Denise</first><last>DiPersio</last></author>
+      <author id="christopher-cieri"><first>Christopher</first><last>Cieri</last></author>
       <title>The <fixed-case>L</fixed-case>inguistic <fixed-case>D</fixed-case>ata <fixed-case>C</fixed-case>onsortium Member Survey: Purpose, Execution and Results</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/755_paper.pdf</url>
       <abstract>The Linguistic Data Consortium (LDC) seeks to provide its members with quality linguistic resources and services. In order to pursue these ideals and to remain current, LDC monitors the needs and sentiments of its communities. One mechanism LDC uses to generate feedback on consortium and resource issues is the LDC Member Survey. The survey allows LDC Members and nonmembers to provide LDC with valuable insight into their own unique circumstances, their current and future data needs and their views on LDCs role in meeting them. When the 2006 Survey was found to be a useful tool for communicating with the Consortium membership, a 2007 Survey was organized and administered. As a result of the surveys, LDC has confirmed that it has made a positive impact on the community and has identified ways to improve the quality of service and the diversity of monthly offerings. Many respondents recommended ways to improve LDCs functions, ordering mechanism and webpage. Some of these comments have inspired changes to LDCs operation and strategy.</abstract>
       <bibkey>reed-etal-2008-linguistic</bibkey>
     </paper>
     <paper id="7">
-      <author><first>Dieter</first><last>Van Uytvanck</last></author>
+      <author id="dieter-van-uytvanck"><first>Dieter</first><last>Van Uytvanck</last></author>
       <author><first>Alex</first><last>Dukers</last></author>
       <author><first>Jacquelijn</first><last>Ringersma</last></author>
       <author><first>Paul</first><last>Trilsbeek</last></author>
@@ -90,9 +90,9 @@
       <bibkey>van-uytvanck-etal-2008-language</bibkey>
     </paper>
     <paper id="8">
-      <author><first>Tamás</first><last>Váradi</last></author>
-      <author><first>Steven</first><last>Krauwer</last></author>
-      <author><first>Peter</first><last>Wittenburg</last></author>
+      <author id="tamas-varadi"><first>Tamás</first><last>Váradi</last></author>
+      <author id="steven-krauwer"><first>Steven</first><last>Krauwer</last></author>
+      <author id="peter-wittenburg"><first>Peter</first><last>Wittenburg</last></author>
       <author><first>Martin</first><last>Wynne</last></author>
       <author><first>Kimmo</first><last>Koskenniemi</last></author>
       <title><fixed-case>CLARIN</fixed-case>: Common Language Resources and Technology Infrastructure</title>
@@ -103,16 +103,16 @@
     <paper id="9">
       <author><first>Jeroen</first><last>Geertzen</last></author>
       <author><first>Volha</first><last>Petukhova</last></author>
-      <author><first>Harry</first><last>Bunt</last></author>
+      <author id="harry-bunt"><first>Harry</first><last>Bunt</last></author>
       <title>Evaluating Dialogue Act Tagging with Naive and Expert Annotators</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/279_paper.pdf</url>
       <abstract>In this paper the dialogue act annotation of naive and expert annotators, both annotating the same data, are compared in order to characterise the insights annotations made by different kind of annotators may provide for evaluating dialogue act tagsets. It is argued that the agreement among naive annotators provides insight in the clarity of the tagset, whereas agreement among expert annotators provides an indication of how reliably the tagset can be applied when errors are ruled out that are due to deficiencies in understanding the concepts of the tagset, to a lack of experience in using the annotation tool, or to little experience in annotation more generally. An indication of the differences between the two groups in terms of inter-annotator agreement and tagging accuracy on task-oriented dialogue in different domains, annotated with the DIT++ dialogue act tagset is presented, and the annotations of both groups are assessed against a gold standard. Additionally, the effect of the reduction of the tagsets granularity on the performances of both groups is looked into. In general, it is concluded that the annotations of both groups provide complementary insights in reliability, clarity, and more fundamental conceptual issues.</abstract>
       <bibkey>geertzen-etal-2008-evaluating</bibkey>
     </paper>
     <paper id="10">
-      <author><first>Drahomíra „johanka“</first><last>Spoustová</last></author>
+      <author id="drahomira-johanka-spoustova"><first>Drahomíra „johanka“</first><last>Spoustová</last></author>
       <author><first>Pavel</first><last>Pecina</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
       <author><first>Miroslav</first><last>Spousta</last></author>
       <title>Validating the Quality of Full Morphological Annotation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/290_paper.pdf</url>
@@ -122,7 +122,7 @@
     <paper id="11">
       <author><first>Kremena</first><last>Ivanova</last></author>
       <author><first>Ulrich</first><last>Heid</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <author><first>Adam</first><last>Kilgarriff</last></author>
       <author><first>Jan</first><last>Pomikálek</last></author>
       <title>Evaluating a <fixed-case>G</fixed-case>erman Sketch Grammar: A Case Study on Noun Phrase Case</title>
@@ -132,7 +132,7 @@
     </paper>
     <paper id="12">
       <author><first>Mark</first><last>McConville</last></author>
-      <author><first>Myroslava O.</first><last>Dzikovska</last></author>
+      <author id="myroslava-o-dzikovska"><first>Myroslava O.</first><last>Dzikovska</last></author>
       <title>Evaluating Complement-Modifier Distinctions in a Semantically Annotated Corpus</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/691_paper.pdf</url>
       <abstract>We evaluate the extent to which the distinction between semantically core and non-core dependents as used in the FrameNet corpus corresponds to the traditional distinction between syntactic complements and modifiers of a verb, for the purposes of harvesting a wide-coverage verb lexicon from FrameNet for use in deep linguistic processing applications. We use the VerbNet verb database as our gold standard for making judgements about complement-hood, in conjunction with our own intuitions in cases where VerbNet is incomplete. We conclude that there is enough agreement between the two notions (0.85) to make practical the simple expedient of equating core PP dependents in FrameNet with PP complements in our lexicon. Doing so means that we lose around 13% of PP complements, whilst around 9% of the PP dependents left in the lexicon are not complements.</abstract>
@@ -140,7 +140,7 @@
     </paper>
     <paper id="13">
       <author><first>Petra-Maria</first><last>Strauß</last></author>
-      <author><first>Holger</first><last>Hoffmann</last></author>
+      <author id="holger-hoffmann"><first>Holger</first><last>Hoffmann</last></author>
       <author><first>Wolfgang</first><last>Minker</last></author>
       <author><first>Heiko</first><last>Neumann</last></author>
       <author><first>Günther</first><last>Palm</last></author>
@@ -153,12 +153,12 @@
       <bibkey>strauss-etal-2008-pit</bibkey>
     </paper>
     <paper id="14">
-      <author><first>Martine</first><last>Adda-Decker</last></author>
-      <author><first>Claude</first><last>Barras</last></author>
-      <author><first>Gilles</first><last>Adda</last></author>
-      <author><first>Patrick</first><last>Paroubek</last></author>
-      <author><first>Philippe Boula</first><last>de Mareüil</last></author>
-      <author><first>Benoit</first><last>Habert</last></author>
+      <author id="martine-adda-decker"><first>Martine</first><last>Adda-Decker</last></author>
+      <author id="claude-barras"><first>Claude</first><last>Barras</last></author>
+      <author id="gilles-adda"><first>Gilles</first><last>Adda</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
+      <author id="philippe-boula-de-mareuil"><first>Philippe Boula</first><last>de Mareüil</last></author>
+      <author id="benoit-habert"><first>Benoit</first><last>Habert</last></author>
       <title>Annotation and analysis of overlapping speech in political interviews</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/788_paper.pdf</url>
       <abstract>Looking for a better understanding of spontaneous speech-related phenomena and to improve automatic speech recognition (ASR), we present here a study on the relationship between the occurrence of overlapping speech segments and disfluencies (filled pauses, repetitions, revisions) in political interviews. First we present our data, and our overlap annotation scheme. We detail our choice of overlapping tags and our definition of disfluencies; the observed ratios of the different overlapping tags are examined, as well as their correlation with of the speaker role and propose two measures to characterise speakers interacting attitude: the attack/resist ratio and the attack density. We then study the relationship between the overlapping speech segments and the disfluencies in our corpus, before concluding on the perspectives that our experiments offer.</abstract>
@@ -166,7 +166,7 @@
     </paper>
     <paper id="15">
       <author><first>Nicolas</first><last>Moreau</last></author>
-      <author><first>Djamel</first><last>Mostefa</last></author>
+      <author id="djamel-mostefa"><first>Djamel</first><last>Mostefa</last></author>
       <author><first>Rainer</first><last>Stiefelhagen</last></author>
       <author><first>Susanne</first><last>Burger</last></author>
       <author><first>Khalid</first><last>Choukri</last></author>
@@ -186,11 +186,11 @@
     </paper>
     <paper id="17">
       <author><first>Inderjeet</first><last>Mani</last></author>
-      <author><first>Janet</first><last>Hitzeman</last></author>
+      <author id="janet-hitzeman"><first>Janet</first><last>Hitzeman</last></author>
       <author><first>Justin</first><last>Richer</last></author>
       <author><first>Dave</first><last>Harris</last></author>
       <author><first>Rob</first><last>Quimby</last></author>
-      <author><first>Ben</first><last>Wellner</last></author>
+      <author id="ben-wellner"><first>Ben</first><last>Wellner</last></author>
       <title><fixed-case>S</fixed-case>patial<fixed-case>ML</fixed-case>: Annotation Scheme, Corpora, and Tools</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/106_paper.pdf</url>
       <abstract>SpatialML is an annotation scheme for marking up references to places in natural language. It covers both named and nominal references to places, grounding them where possible with geo-coordinates, including both relative and absolute locations, and characterizes relationships among places in terms of a region calculus. A freely available annotation editor has been developed for SpatialML, along with a corpus of annotated documents released by the Linguistic Data Consortium. Inter-annotator agreement on SpatialML is 77.0 F-measure for extents on that corpus. An automatic tagger for SpatialML extents scores 78.5 F-measure. A disambiguator scores 93.0 F-measure and 93.4 Predictive Accuracy. In adapting the extent tagger to new domains, merging the training data from the above corpus with annotated data in the new domain provides the best performance.</abstract>
@@ -198,9 +198,9 @@
     </paper>
     <paper id="18">
       <author><first>Steven</first><last>Bethard</last></author>
-      <author><first>William</first><last>Corvey</last></author>
+      <author id="william-j-corvey"><first>William</first><last>Corvey</last></author>
       <author><first>Sara</first><last>Klingenstein</last></author>
-      <author><first>James H.</first><last>Martin</last></author>
+      <author id="james-h-martin"><first>James H.</first><last>Martin</last></author>
       <title>Building a Corpus of Temporal-Causal Structure</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/229_paper.pdf</url>
       <abstract>While recent corpus annotation efforts cover a wide variety of semantic structures, work on temporal and causal relations is still in its early stages. Annotation efforts have typically considered either temporal relations or causal relations, but not both, and no corpora currently exist that allow the relation between temporals and causals to be examined empirically. We have annotated a corpus of 1000 event pairs for both temporal and causal relations, focusing on a relatively frequent construction in which the events are conjoined by the word and. Temporal relations were annotated using an extension of the BEFORE and AFTER scheme used in the TempEval competition, and causal relations were annotated using a scheme based on connective phrases like and as a result. The annotators achieved 81.2% agreement on temporal relations and 77.8% agreement on causal relations. Analysis of the resulting corpus revealed some interesting findings, for example, that over 30% of CAUSAL relations do not have an underlying BEFORE relation. The corpus was also explored using machine learning methods, and while model performance exceeded all baselines, the results suggested that simple grammatical cues may be insufficient for identifying the more difficult temporal and causal relations.</abstract>
@@ -215,7 +215,7 @@
       <bibkey>zarcone-lenci-2008-computational</bibkey>
     </paper>
     <paper id="20">
-      <author><first>Corina</first><last>Forăscu</last></author>
+      <author id="corina-forascu"><first>Corina</first><last>Forăscu</last></author>
       <title><fixed-case>GMT</fixed-case> to +2 or how can <fixed-case>T</fixed-case>ime<fixed-case>ML</fixed-case> be used in <fixed-case>R</fixed-case>omanian</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/817_paper.pdf</url>
       <abstract>The paper describes the construction and usage of the Romanian version of the TimeBank corpus. The success rate of 96.53% for the automatic import of the temporal annotation from English to Romanian shows that the automatic transfer is a worth doing enterprise if temporality is to be studied in another language than the one for which TimeML, the annotation standard used, was developed. A preliminary study identifies the main situations that occurred during the automatic transfer, as well as temporal elements not (yet) marked in the English corpus.</abstract>
@@ -224,15 +224,15 @@
     <paper id="21">
       <author><first>Nianwen</first><last>Xue</last></author>
       <author><first>Hua</first><last>Zhong</last></author>
-      <author><first>Kai-Yun</first><last>Chen</last></author>
+      <author id="helen-kaiyun-chen"><first>Kai-Yun</first><last>Chen</last></author>
       <title>Annotating “tense” in a Tense-less Language</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/877_paper.pdf</url>
       <abstract>In the context of Natural Language Processing, annotation is about recovering implicit information that is useful for natural language applications. In this paper we describe a tense annotation task for Chinese - a language that does not have grammatical tense - that is designed to infer the temporal location of a situation in relation to the temporal deixis, the moment of speech. If successful, this would be a highly rewarding endeavor as it has application in many natural language systems. Our preliminary experiments show that while this is a very challenging annotation task for which high annotation consistency is very difficult but not impossible to achieve. We show that guidelines that provide a conceptually intuitive framework will be crucial to the success of this annotation effort.</abstract>
       <bibkey>xue-etal-2008-annotating</bibkey>
     </paper>
     <paper id="22">
-      <author><first>Barbara</first><last>Plank</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <title>Subdomain Sensitive Statistical Parsing using Raw Corpora</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/120_paper.pdf</url>
       <abstract>Modern statistical parsers are trained on large annotated corpora (treebanks). These treebanks usually consist of sentences addressing different subdomains (e.g. sports, politics, music), which implies that the statistics gathered by current statistical parsers are mixtures of subdomains of language use. In this paper we present a method that exploits raw subdomain corpora gathered from the web to introduce subdomain sensitivity into a given parser. We employ statistical techniques for creating an ensemble of domain sensitive parsers, and explore methods for amalgamating their predictions. Our experiments show that introducing domain sensitivity by exploiting raw corpora can improve over a tough, state-of-the-art baseline.</abstract>
@@ -254,8 +254,8 @@
       <author><first>Stephan</first><last>Oepen</last></author>
       <author><first>Ulrich</first><last>Callmeier</last></author>
       <author><first>Berthold</first><last>Crysmann</last></author>
-      <author><first>Dan</first><last>Flickinger</last></author>
-      <author><first>Bernd</first><last>Kiefer</last></author>
+      <author id="dan-flickinger"><first>Dan</first><last>Flickinger</last></author>
+      <author id="bernd-kiefer"><first>Bernd</first><last>Kiefer</last></author>
       <title>Some Fine Points of Hybrid Natural Language Parsing</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/349_paper.pdf</url>
       <abstract>Large-scale grammar-based parsing systems nowadays increasingly rely on independently developed, more specialized components for pre-processing their input. However, different tools make conflicting assumptions about very basic properties such as tokenization. To make linguistic annotation gathered in pre-processing available to deep parsing, a hybrid NLP system needs to establish a coherent mapping between the two universes. Our basic assumption is that tokens are best described by attribute value matrices (AVMs) that may be arbitrarily complex. We propose a powerful resource-sensitive rewrite formalism, chart mapping, that allows us to mediate between the token descriptions delivered by shallow pre-processing components and the input expected by the grammar. We furthermore propose a novel way of unknown word treatment where all generic lexical entries are instantiated that are licensed by a particular token AVM. Again, chart mapping is used to give the grammar writer full control as to which items (e.g. native vs. generic lexical items) enter syntactic parsing. We discuss several further uses of the original idea and report on early experiences with the new machinery.</abstract>
@@ -265,7 +265,7 @@
       <author><first>Jeremy</first><last>Nicholson</last></author>
       <author><first>Valia</first><last>Kordoni</last></author>
       <author><first>Yi</first><last>Zhang</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Rebecca</first><last>Dridan</last></author>
       <title>Evaluating and Extending the Coverage of <fixed-case>HPSG</fixed-case> Grammars: A Case Study for <fixed-case>G</fixed-case>erman</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/794_paper.pdf</url>
@@ -281,8 +281,8 @@
       <bibkey>zhang-kordoni-2008-robust</bibkey>
     </paper>
     <paper id="27">
-      <author><first>Jahna</first><last>Otterbacher</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="jahna-otterbacher"><first>Jahna</first><last>Otterbacher</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <title>Modeling Document Dynamics: an Evolutionary Approach</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/115_paper.pdf</url>
       <abstract>News articles about the same event published over time have properties that challenge NLP and IR applications. A cluster of such texts typically exhibits instances of paraphrase and contradiction, as sources update the facts surrounding the story, often due to an ongoing investigation. The current hypothesis is that the stories evolve over time, beginning with the first text published on a given topic. This is tested using a phylogenetic approach as well as one based on language modeling. The fit of the evolutionary models is evaluated with respect to how well they facilitate the recovery of chronological relationships between the documents. Over all data clusters, the language modeling approach consistently outperforms the phylogenetics model. However, on manually collected clusters in which the documents are published within short time spans of one another, both have a similar performance, and produce statistically significant results on the document chronology recovery evaluation.</abstract>
@@ -290,7 +290,7 @@
     </paper>
     <paper id="28">
       <author><first>Dominic</first><last>Widdows</last></author>
-      <author><first>Kathleen</first><last>Ferraro</last></author>
+      <author id="kathleen-ferraro"><first>Kathleen</first><last>Ferraro</last></author>
       <title>Semantic Vectors: a Scalable Open Source Package and Online Technology Management Application</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/300_paper.pdf</url>
       <abstract>This paper describes the open source SemanticVectors package that efficiently creates semantic vectors for words and documents from a corpus of free text articles. We believe that this package can play an important role in furthering research in distributional semantics, and (perhaps more importantly) can help to significantly reduce the current gap that exists between good research results and valuable applications in production software. Two clear principles that have guided the creation of the package so far include ease-of-use and scalability. The basic package installs and runs easily on any Java-enabled platform, and depends only on Apache Lucene. Dimension reduction is performed using Random Projection, which enables the system to scale much more effectively than other algorithms used for the same purpose. This paper also describes a trial application in the Technology Management domain, which highlights some user-centred design challenges which we believe are also key to successful deployment of this technology.</abstract>
@@ -306,7 +306,7 @@
     </paper>
     <paper id="30">
       <author><first>Kim</first><last>Luyckx</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <title><fixed-case>P</fixed-case>ersonae: a Corpus for Author and Personality Prediction from Text</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/759_paper.pdf</url>
       <abstract>We present a new corpus for computational stylometry, more specifically authorship attribution and the prediction of author personality from text. Because of the large number of authors (145), the corpus will allow previously impossible studies of variation in features considered predictive for writing style. The innovative meta-information (personality profiles of the authors) associated with these texts allows the study of personality prediction, a not yet very well researched aspect of style. In this paper, we describe the contents of the corpus and show its use in both authorship attribution and personality prediction. We focus on features that have been proven useful in the field of author recognition. Syntactic features like part-of-speech n-grams are generally accepted as not being under the authors conscious control and therefore providing good clues for predicting gender or authorship. We want to test whether these features are helpful for personality prediction and authorship attribution on a large set of authors. Both tasks are approached as text categorization tasks. First a document representation is constructed based on feature selection from the linguistically analyzed corpus (using the Memory-Based Shallow Parser (MBSP)). These are associated with each of the 145 authors or each of the four components of the Myers-Briggs Type Indicator (Introverted-Extraverted, Sensing-iNtuitive, Thinking-Feeling, Judging-Perceiving). Authorship attribution on 145 authors achieves results around 50%-accuracy. Preliminary results indicate that the first two personality dimensions can be predicted fairly accurately.</abstract>
@@ -314,7 +314,7 @@
     </paper>
     <paper id="31">
       <author><first>Leanne</first><last>Spracklin</last></author>
-      <author><first>Diana</first><last>Inkpen</last></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last></author>
       <author><first>Amiya</first><last>Nayak</last></author>
       <title>Using the Complexity of the Distribution of Lexical Elements as a Feature in Authorship Attribution</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/892_paper.pdf</url>
@@ -338,9 +338,9 @@
     </paper>
     <paper id="33">
       <author><first>Laura</first><last>Stoia</last></author>
-      <author><first>Darla Magdalene</first><last>Shockley</last></author>
-      <author><first>Donna K.</first><last>Byron</last></author>
-      <author><first>Eric</first><last>Fosler-Lussier</last></author>
+      <author id="darla-magdalene-shockley"><first>Darla Magdalene</first><last>Shockley</last></author>
+      <author id="donna-byron"><first>Donna K.</first><last>Byron</last></author>
+      <author id="eric-fosler-lussier"><first>Eric</first><last>Fosler-Lussier</last></author>
       <title><fixed-case>SCARE</fixed-case>: a Situated Corpus with Annotated Referring Expressions</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/164_paper.pdf</url>
       <abstract>Even though a wealth of speech data is available for the dialog systems research community, the particular field of situated language has yet to find an appropriate free resource. The corpus required to answer research questions related to situated language should connect world information to the human language. In this paper we report on the release of a corpus of English spontaneous instruction giving situated dialogs. The corpus was collected using the Quake environment, a first-person virtual reality game, and consists of pairs of participants completing a direction giver- direction follower scenario. The corpus contains the collected audio and video, as well as word-aligned transcriptions and the positional/gaze information of the player. Referring expressions in the corpus are annotated with the IDs of their virtual world referents.</abstract>
@@ -348,14 +348,14 @@
     </paper>
     <paper id="34">
       <author><first>Han</first><last>Sloetjes</last></author>
-      <author><first>Peter</first><last>Wittenburg</last></author>
+      <author id="peter-wittenburg"><first>Peter</first><last>Wittenburg</last></author>
       <title>Annotation by Category: <fixed-case>ELAN</fixed-case> and <fixed-case>ISO</fixed-case> <fixed-case>DCR</fixed-case></title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/208_paper.pdf</url>
       <abstract>The Data Category Registry is one of the ISO initiatives towards the establishment of standards for Language Resource management, creation and coding. Successful application of the DCR depends on the availability of tools that can interact with it. This paper describes the first steps that have been taken to provide users of the multimedia annotation tool ELAN, with the means to create references from tiers and annotations to data categories defined in the ISO Data Category Registry. It first gives a brief description of the capabilities of ELAN and the structure of the documents it creates. After a concise overview of the goals and current state of the ISO DCR infrastructure, a description is given of how the preliminary connectivity with the DCR is implemented in ELAN.</abstract>
       <bibkey>sloetjes-wittenburg-2008-annotation</bibkey>
     </paper>
     <paper id="35">
-      <author><first>Hennie</first><last>Brugman</last></author>
+      <author id="hennie-brugman"><first>Hennie</first><last>Brugman</last></author>
       <author><first>Véronique</first><last>Malaisé</last></author>
       <author><first>Laura</first><last>Hollink</last></author>
       <title>A Common Multimedia Annotation Framework for Cross Linking Cultural Heritage Digital Collections</title>
@@ -366,7 +366,7 @@
     <paper id="36">
       <author><first>Philippe</first><last>Blache</last></author>
       <author><first>Roxane</first><last>Bertrand</last></author>
-      <author><first>Gaëlle</first><last>Ferré</last></author>
+      <author id="gaelle-ferre"><first>Gaëlle</first><last>Ferré</last></author>
       <title>Creating and Exploiting Multimodal Annotated Corpora</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/449_paper.pdf</url>
       <abstract>The paper presents a project of the Laboratoire Parole &amp; Langage which aims at collecting, annotating and exploiting a corpus of spoken French in a multimodal perspective. The project directly meets the present needs in linguistics where a growing number of researchers become aware of the fact that a theory of communication which aims at describing real interactions should take into account the complexity of these interactions. However, in order to take into account such a complexity, linguists should have access to spoken corpora annotated in different fields. The paper presents the annotation schemes used in phonetics, morphology and syntax, prosody, gestuality at the LPL together with the type of linguistic description made from the annotations seen in two examples.</abstract>
@@ -374,7 +374,7 @@
     </paper>
     <paper id="37">
       <author><first>Annie</first><last>Zaenen</last></author>
-      <author><first>Daniel</first><last>Bobrow</last></author>
+      <author id="daniel-bobrow"><first>Daniel</first><last>Bobrow</last></author>
       <author><first>Cleo</first><last>Condoravdi</last></author>
       <title>The Encoding of lexical implications in <fixed-case>V</fixed-case>erb<fixed-case>N</fixed-case>et Predicates of change of locations</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/101_paper.pdf</url>
@@ -390,8 +390,8 @@
       <bibkey>burchardt-pennacchiotti-2008-fate</bibkey>
     </paper>
     <paper id="39">
-      <author><first>Stephen</first><last>Boxwell</last></author>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="stephen-boxwell"><first>Stephen</first><last>Boxwell</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <title>Projecting <fixed-case>P</fixed-case>ropbank Roles onto the <fixed-case>CCG</fixed-case>bank</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/789_paper.pdf</url>
       <abstract>This paper describes a method of accurately projecting Propbank roles onto constituents in the CCGbank and automatically annotating verbal categories with the semantic roles of their arguments. This method will be used to improve the structure of the derivations in the CCGbank and to facilitate research on semantic role tagging and broad coverage generation with CCG.</abstract>
@@ -400,8 +400,8 @@
     <paper id="40">
       <author><first>Piek</first><last>Vossen</last></author>
       <author><first>Isa</first><last>Maks</last></author>
-      <author><first>Roxane</first><last>Segers</last></author>
-      <author><first>Hennie</first><last>VanderVliet</last></author>
+      <author id="roxane-segers"><first>Roxane</first><last>Segers</last></author>
+      <author id="hennie-van-der-vliet"><first>Hennie</first><last>VanderVliet</last></author>
       <title>Integrating Lexical Units, Synsets and Ontology in the Cornetto Database</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/255_paper.pdf</url>
       <abstract>Cornetto is a two-year Stevin project (project number STE05039) in which a lexical semantic database is built that combines Wordnet with Framenet-like information for Dutch. The combination of the two lexical resources (the Dutch Wordnet and the Referentie Bestand Nederlands) will result in a much richer relational database that may improve natural language processing (NLP) technologies, such as word sense-disambiguation, and language-generation systems. In addition to merging the Dutch lexicons, the database is also mapped to a formal ontology to provide a more solid semantic backbone. Since the database represents different traditions and perspectives of semantic organization, a key issue in the project is the alignment of concepts across the resources. This paper discusses our methodology to first automatically align the word meanings and secondly to manually revise the most critical cases.</abstract>
@@ -409,12 +409,12 @@
     </paper>
     <paper id="41">
       <author><first>Javier</first><last>Álvez</last></author>
-      <author><first>Jordi</first><last>Atserias</last></author>
+      <author id="jordi-atserias"><first>Jordi</first><last>Atserias</last></author>
       <author><first>Jordi</first><last>Carrera</last></author>
       <author><first>Salvador</first><last>Climent</last></author>
       <author><first>Egoitz</first><last>Laparra</last></author>
       <author><first>Antoni</first><last>Oliver</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <title>Complete and Consistent Annotation of <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et using the Top Concept Ontology</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/390_paper.pdf</url>
       <abstract>This paper presents the complete and consistent ontological annotation of the nominal part of WordNet. The annotation has been carried out using the semantic features defined in the EuroWordNet Top Concept Ontology and made available to the NLP community. Up to now only an initial core set of 1,024 synsets, the so-called Base Concepts, was ontologized in such a way. The work has been achieved by following a methodology based on an iterative and incremental expansion of the initial labeling through the hierarchy while setting inheritance blockage points. Since this labeling has been set on the EuroWordNets Interlingual Index (ILI), it can be also used to populate any other wordnet linked to it through a simple porting process. This feature-annotated WordNet is intended to be useful for a large number of semantic NLP tasks and for testing for the first time componential analysis on real environments. Moreover, the quantitative analysis of the work shows that more than 40% of the nominal part of WordNet is involved in structure errors or inadequacies.</abstract>
@@ -430,7 +430,7 @@
     </paper>
     <paper id="43">
       <author><first>Gwénolé</first><last>Lecorvé</last></author>
-      <author><first>Guillaume</first><last>Gravier</last></author>
+      <author id="guillaume-gravier"><first>Guillaume</first><last>Gravier</last></author>
       <author><first>Pascale</first><last>Sébillot</last></author>
       <title>On the Use of Web Resources and Natural Language Processing Techniques to Improve Automatic Speech Recognition Systems</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/155_paper.pdf</url>
@@ -439,8 +439,8 @@
     </paper>
     <paper id="44">
       <author><first>Stanislas</first><last>Oger</last></author>
-      <author><first>Georges</first><last>Linarès</last></author>
-      <author><first>Frédéric</first><last>Béchet</last></author>
+      <author id="georges-linares"><first>Georges</first><last>Linarès</last></author>
+      <author id="frederic-bechet"><first>Frédéric</first><last>Béchet</last></author>
       <title>Local Methods for On-Demand Out-of-Vocabulary Word Retrieval</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/193_paper.pdf</url>
       <abstract>Most of the Web-based methods for lexicon augmenting consist in capturing global semantic features of the targeted domain in order to collect relevant documents from the Web. We suggest that the local context of the out-of-vocabulary (OOV) words contains relevant information on the OOV words. With this information, we propose to use the Web to build locally-augmented lexicons which are used in a final local decoding pass. First, an automatic web based OOV word detection method is proposed. Then, we demonstrate the relevance of the Web for the OOV word retrieval. Different methods are proposed to retrieve the hypothesis words. We finally retrieve about 26% of the OOV words with a lexicon increase of less than 1000 words using the reference context.</abstract>
@@ -448,11 +448,11 @@
     </paper>
     <paper id="45">
       <author><first>Marc</first><last>Kemps-Snijders</last></author>
-      <author><first>Alex</first><last>Klassmann</last></author>
+      <author id="alex-klassmann"><first>Alex</first><last>Klassmann</last></author>
       <author><first>Claus</first><last>Zinn</last></author>
       <author><first>Peter</first><last>Berck</last></author>
-      <author><first>Albert</first><last>Russel</last></author>
-      <author><first>Peter</first><last>Wittenburg</last></author>
+      <author id="albert-russel"><first>Albert</first><last>Russel</last></author>
+      <author id="peter-wittenburg"><first>Peter</first><last>Wittenburg</last></author>
       <title>Exploring and Enriching a Language Resource Archive via the Web</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/205_paper.pdf</url>
       <abstract>The download first, then process paradigm is still the predominant working method amongst the research community. The web-based paradigm, however, offers many advantages from a tool development and data management perspective as they allow a quick adaptation to changing research environments. Moreover, new ways of combining tools and data are increasingly becoming available and will eventually enable a true web-based workflow approach, thus challenging the download first, then process paradigm. The necessary infrastructure for managing, exploring and enriching language resources via the Web will need to be delivered by projects like CLARIN and DARIAH.</abstract>
@@ -467,7 +467,7 @@
       <bibkey>schiel-mogele-2008-talking</bibkey>
     </paper>
     <paper id="47">
-      <author><first>Erhard</first><last>Hinrichs</last></author>
+      <author id="erhard-hinrichs"><first>Erhard</first><last>Hinrichs</last></author>
       <author><first>Monica</first><last>Lău</last></author>
       <title>In Contrast - A Complex Discourse Connective</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/75_paper.pdf</url>
@@ -499,18 +499,18 @@
     <paper id="50">
       <author><first>Lucie</first><last>Mladová</last></author>
       <author><first>Šárka</first><last>Zikánová</last></author>
-      <author><first>Eva</first><last>Hajičová</last></author>
+      <author id="eva-hajicova"><first>Eva</first><last>Hajičová</last></author>
       <title>From Sentence to Discourse: Building an Annotation Scheme for Discourse Based on <fixed-case>P</fixed-case>rague Dependency Treebank</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/638_paper.pdf</url>
       <abstract>The present paper reports on a preparatory research for building a language corpus annotation scenario capturing the discourse relations in Czech. We primarily focus on the description of the syntactically motivated relations in discourse, basing our findings on the theoretical background of the Prague Dependency Treebank 2.0 and the Penn Discourse Treebank 2. Our aim is to revisit the present-day syntactico-semantic (tectogrammatical) annotation in the Prague Dependency Treebank, extend it for the purposes of a sentence-boundary-crossing representation and eventually to design a new, discourse level of annotation. In this paper, we propose a feasible process of such a transfer, comparing the possibilities the Praguian dependency-based approach offers with the Penn discourse annotation based primarily on the analysis and classification of discourse connectives.</abstract>
       <bibkey>mladova-etal-2008-sentence</bibkey>
     </paper>
     <paper id="51">
-      <author><first>David</first><last>Day</last></author>
-      <author><first>Janet</first><last>Hitzeman</last></author>
+      <author id="david-day"><first>David</first><last>Day</last></author>
+      <author id="janet-hitzeman"><first>Janet</first><last>Hitzeman</last></author>
       <author><first>Michael</first><last>Wick</last></author>
       <author><first>Keith</first><last>Crouch</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <title>A Corpus for Cross-Document Co-reference</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/762_paper.pdf</url>
       <abstract>This paper describes a newly created text corpus of news articles that has been annotated for cross-document co-reference. Being able to robustly resolve references to entities across document boundaries will provide a useful capability for a variety of tasks, ranging from practical information retrieval applications to challenging research in information extraction and natural language understanding. This annotated corpus is intended to encourage the development of systems that can more accurately address this problem. A manual annotation tool was developed that allowed the complete corpus to be searched for likely co-referring entity mentions. This corpus of 257K words links mentions of co-referent people, locations and organizations (subject to some additional constraints). Each of the documents had already been annotated for within-document co-reference by the LDC as part of the ACE series of evaluations. The annotation process was bootstrapped with a string-matching-based linking procedure, and we report on some of initial experimentation with the data. The cross-document linking information will be made publicly available.</abstract>
@@ -518,7 +518,7 @@
     </paper>
     <paper id="52">
       <author><first>Antonio</first><last>Toral</last></author>
-      <author><first>Rafael</first><last>Muñoz</last></author>
+      <author id="rafael-munoz"><first>Rafael</first><last>Muñoz</last></author>
       <author><first>Monica</first><last>Monachini</last></author>
       <title>Named Entity <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/188_paper.pdf</url>
@@ -527,7 +527,7 @@
     </paper>
     <paper id="53">
       <author><first>Cristina</first><last>Mota</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <title>Is this <fixed-case>NE</fixed-case> tagger getting old?</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/303_paper.pdf</url>
       <abstract>This paper focuses on the influence of changing the text time frame on the performance of a named entity tagger. We followed a twofold approach to investigate this subject: on the one hand, we analyzed a corpus that spans 8 years, and, on the other hand, we assessed the performance of a name tagger trained and tested on that corpus. We created 8 samples from the corpus, each drawn from the articles for a particular year. In terms of corpus analysis, we calculated the corpus similarity and names shared between samples. To see the effect on tagger performance, we implemented a semi-supervised name tagger based on co-training; then, we trained and tested our tagger on those samples. We observed that corpus similarity, names shared between samples, and tagger performance all decay as the time gap between the samples increases. Furthermore, we observed that the corpus similarity and names shared correlate with the tagger F-measure. These results show that named entity recognition systems may become obsolete in a short period of time.</abstract>
@@ -535,9 +535,9 @@
     </paper>
     <paper id="54">
       <author><first>Benjamin</first><last>Farber</last></author>
-      <author><first>Dayne</first><last>Freitag</last></author>
+      <author id="dayne-freitag"><first>Dayne</first><last>Freitag</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <title>Improving <fixed-case>NER</fixed-case> in <fixed-case>A</fixed-case>rabic Using a Morphological Tagger</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/625_paper.pdf</url>
       <abstract>We discuss a named entity recognition system for Arabic, and show how we incorporated the information provided by MADA, a full morphological tagger which uses a morphological analyzer. Surprisingly, the relevant features used are the capitalization of the English gloss chosen by the tagger, and the fact that an analysis is returned (that a word is not OOV to the morphological analyzer). The use of the tagger also improves over a third system which just uses a morphological analyzer, yielding a 14\% reduction in error over the baseline. We conduct a thorough error analysis to identify sources of success and failure among the variations, and show that by combining the systems in simple ways we can significantly influence the precision-recall trade-off.</abstract>
@@ -552,7 +552,7 @@
       <bibkey>busemann-zhang-2008-identifying</bibkey>
     </paper>
     <paper id="56">
-      <author><first>Marius</first><last>Paşca</last></author>
+      <author id="marius-pasca"><first>Marius</first><last>Paşca</last></author>
       <title>Low-Complexity Heuristics for Deriving Fine-Grained Classes of Named Entities from Web Textual Data</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/886_paper.pdf</url>
       <abstract>We introduce a low-complexity method for acquiring fine-grained classes of named entities from the Web. The method exploits the large amounts of textual data available on the Web, while avoiding the use of any expensive text processing techniques or tools. The quality of the extracted classes is encouraging with respect to both the precision of the sets of named entities acquired within various classes, and the labels assigned to the sets of named entities.</abstract>
@@ -560,7 +560,7 @@
     </paper>
     <paper id="57">
       <author><first>Jin-Ji</first><last>Li</last></author>
-      <author><first>Dong-Il</first><last>Kim</last></author>
+      <author id="dong-il-kim"><first>Dong-Il</first><last>Kim</last></author>
       <author><first>Jong-Hyeok</first><last>Lee</last></author>
       <title>Annotation Guidelines for <fixed-case>C</fixed-case>hinese-<fixed-case>K</fixed-case>orean Word Alignment</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/137_paper.pdf</url>
@@ -568,9 +568,9 @@
       <bibkey>li-etal-2008-annotation</bibkey>
     </paper>
     <paper id="58">
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <author><first>Miroslav</first><last>Janíček</last></author>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
       <author><first>Pavel</first><last>Češka</last></author>
       <author><first>Peter</first><last>Beňa</last></author>
       <title><fixed-case>C</fixed-case>z<fixed-case>E</fixed-case>ng 0.7: Parallel Corpus with Community-Supplied Translations</title>
@@ -579,9 +579,9 @@
       <bibkey>bojar-etal-2008-czeng</bibkey>
     </paper>
     <paper id="59">
-      <author><first>Jonathan</first><last>Clark</last></author>
-      <author><first>Robert</first><last>Frederking</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
+      <author id="jonathan-h-clark"><first>Jonathan</first><last>Clark</last></author>
+      <author id="robert-frederking"><first>Robert</first><last>Frederking</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
       <title>Toward Active Learning in Data Selection: Automatic Discovery of Language Features During Elicitation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/308_paper.pdf</url>
       <abstract>Data Selection has emerged as a common issue in language technologies. We define Data Selection as the choosing of a subset of training data that is most effective for a given task. This paper describes deductive feature detection, one component of a data selection system for machine translation. Feature detection determines whether features such as tense, number, and person are expressed in a language. The database of the World Atlas of Language Structures provides a gold standard against which to evaluate feature detection. The discovered features can be used as input to a Navigator, which uses active learning to determine which piece of language data is the most important to acquire next.</abstract>
@@ -589,7 +589,7 @@
     </paper>
     <paper id="60">
       <author><first>Michael</first><last>Mohler</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <title>Babylon Parallel Text Builder: Gathering Parallel Texts for Low-Density Languages</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/313_paper.pdf</url>
       <abstract>This paper describes Babylon, a system that attempts to overcome the shortage of parallel texts in low-density languages by supplementing existing parallel texts with texts gathered automatically from the Web. In addition to the identification of entire Web pages, we also propose a new feature specifically designed to find parallel text chunks within a single document. Experiments carried out on the Quechua-Spanish language pair show that the system is successful in automatically identifying a significant amount of parallel texts on the Web. Evaluations of a machine translation system trained on this corpus indicate that the Web-gathered parallel texts can supplement manually compiled parallel texts and perform significantly better than the manually compiled texts when tested on other Web-gathered data.</abstract>
@@ -597,8 +597,8 @@
     </paper>
     <paper id="61">
       <author><first>Cong-Phap</first><last>Huynh</last></author>
-      <author><first>Christian</first><last>Boitet</last></author>
-      <author><first>Hervé</first><last>Blanchon</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
+      <author id="herve-blanchon"><first>Hervé</first><last>Blanchon</last></author>
       <title><fixed-case>SECT</fixed-case>ra_w.1: an Online Collaborative System for Evaluating, Post-editing and Presenting <fixed-case>MT</fixed-case> Translation Corpora</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/639_paper.pdf</url>
       <abstract>SECTra_w is a web-oriented system mainly dedicated to the evaluation of MT systems. After importing a source corpus, and possibly reference translations, one can call various MT systems, store their results, and have a collection of human judges perform subjective evaluation online (fluidity, adequacy). It is also possible to perform objective, task-oriented evaluation by letting humans post-edit the MT results, using a web translation editor, and measuring an edit distance and/or the post-editing time. The post-edited results can be added to the set of reference translations, or constitute it if there were no references. SECTra_w makes it possible to show not only tables of figures as results of an evaluation campaign, but also the real data (source, MT outputs, references, post-edited outputs), and to make the post-edition effort sensible by transforming the trace of the edit distance computation in an intuitive presentation, much like a revision presentation in Word. The system is written in java under Xwiki and uses the Ajax technique. It can handle large, multilingual and multimedia corpora: EuroParl, BTEC, ERIM (bilingual interpreted dialogues with audio and text), Unesco-B@bel, and a test corpus by France Telecom have been loaded together and used in tests.</abstract>
@@ -607,14 +607,14 @@
     <paper id="62">
       <author><first>Mark</first><last>Arehart</last></author>
       <author><first>Chris</first><last>Wolf</last></author>
-      <author><first>Keith J.</first><last>Miller</last></author>
+      <author id="keith-j-miller"><first>Keith J.</first><last>Miller</last></author>
       <title>Adjudicator Agreement and System Rankings for Person Name Search</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/647_paper.pdf</url>
       <abstract>We have analyzed system rankings for person name search algorithms using a data set for which several versions of ground truth were developed by employing different means of resolving adjudicator conflicts. Thirteen algorithms were ranked by F-score, using bootstrap resampling for significance testing, on a dataset containing 70,000 romanized names from various cultures. We found some disagreement among the four adjudicators, with kappa ranging from 0.57 to 0.78. Truth sets based on a single adjudicator, and on the intersection or union of positive adjudications produced sizeable variability in scoring sensitivity - and to a lesser degree rank order - compared to the consensus truth set. However, results on truth sets constructed by randomly choosing an adjudicator for each item were highly consistent with the consensus. The implication is that an evaluation where one adjudicator has judged each item is nearly as good as a more expensive and labor-intensive one where multiple adjudicators have judged each item and conflicts are resolved through voting.</abstract>
       <bibkey>arehart-etal-2008-adjudicator</bibkey>
     </paper>
     <paper id="63">
-      <author><first>Paulo C F</first><last>de Oliveira</last></author>
+      <author id="paulo-c-f-de-oliveira"><first>Paulo C F</first><last>de Oliveira</last></author>
       <author><first>Edson Wilson</first><last>Torrens</last></author>
       <author><first>Alexandre</first><last>Cidral</last></author>
       <author><first>Sidney</first><last>Schossland</last></author>
@@ -633,24 +633,24 @@
       <bibkey>poibeau-messiant-2008-still</bibkey>
     </paper>
     <paper id="65">
-      <author><first>Peter</first><last>Spyns</last></author>
-      <author><first>Elisabeth</first><last>D’Halleweyn</last></author>
-      <author><first>Catia</first><last>Cucchiarini</last></author>
+      <author id="peter-spyns"><first>Peter</first><last>Spyns</last></author>
+      <author id="elisabeth-dhalleweyn"><first>Elisabeth</first><last>D’Halleweyn</last></author>
+      <author id="catia-cucchiarini"><first>Catia</first><last>Cucchiarini</last></author>
       <title>The <fixed-case>D</fixed-case>utch-<fixed-case>F</fixed-case>lemish Comprehensive Approach to <fixed-case>HLT</fixed-case> Stimulation and Innovation: <fixed-case>STEVIN</fixed-case>, <fixed-case>HLT</fixed-case> Agency and beyond</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/385_paper.pdf</url>
       <abstract>This paper shows how a research and industry stimulation programme on human language technologies (HLT) for Dutch can be enhanced with more specific innovation policy aspects to support the take-up by the HLT industry in the Netherlands and Flanders. Important to note is the distinction between the HLT programme itself (called STEVIN) with its specific related committees and actions and the overall policy instruments (HLT Agency, HLT steering board?) that try to span the entire domain of HLT for Dutch and have a more permanent character. The establishment of a pricing committee and a PR &amp; communication working group is explained as a consequence of adopting the notion of innovation system as a theoretical framework. It means that a stronger emphasis is put on improving knowledge transfer and exchange amongst actors in the field. Therefore, the focus at the programme management level is shifting from the projects research activities producing results to gathering the results, making them available at a certain cost and advertising them through the appropriate channels to the appropriate potential customers. Our conclusion is that this policy stimulates the transfer from academia to industry though it is too soon for an in-depth assessment of the STEVIN programme and other HLT innovation policy instruments.</abstract>
       <bibkey>spyns-etal-2008-dutch</bibkey>
     </paper>
     <paper id="66">
-      <author><first>Christopher</first><last>Cieri</last></author>
-      <author><first>Mark</first><last>Liberman</last></author>
+      <author id="christopher-cieri"><first>Christopher</first><last>Cieri</last></author>
+      <author id="mark-liberman"><first>Mark</first><last>Liberman</last></author>
       <title>15 Years of Language Resource Creation and Sharing: a Progress Report on <fixed-case>LDC</fixed-case> Activities</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/861_paper.pdf</url>
       <abstract>This paper, the fifth in a series of biennial progress reports, reviews the activities of the Linguistic Data Consortium with particular emphasis on general trends in the language resource landscape and on changes that distinguish the two years since LDCs last report at LREC from the preceding 8 years. After providing a perspective on the current landscape of language resources, the paper goes on to describe our vision of the role of LDC within the research communities it serves before sketching briefly specific publications and resources creations projects that have been the focus our attention since the last report.</abstract>
       <bibkey>cieri-liberman-2008-15</bibkey>
     </paper>
     <paper id="67">
-      <author><first>Anil Kumar</first><last>Singh</last></author>
+      <author id="anil-kumar-singh"><first>Anil Kumar</first><last>Singh</last></author>
       <author><first>Kiran</first><last>Pala</last></author>
       <author><first>Harshit</first><last>Surana</last></author>
       <title>Estimating the Resource Adaption Cost from a Resource Rich Language to a Similar Resource Poor Language</title>
@@ -659,7 +659,7 @@
       <bibkey>singh-etal-2008-estimating</bibkey>
     </paper>
     <paper id="68">
-      <author><first>Valérie</first><last>Mapelli</last></author>
+      <author id="valerie-mapelli"><first>Valérie</first><last>Mapelli</last></author>
       <author><first>Victoria</first><last>Arranz</last></author>
       <author><first>Hélène</first><last>Mazo</last></author>
       <author><first>Khalid</first><last>Choukri</last></author>
@@ -671,7 +671,7 @@
     <paper id="69">
       <author><first>Carol</first><last>Peters</last></author>
       <author><first>Martin</first><last>Braschler</last></author>
-      <author><first>Giorgio</first><last>Di Nunzio</last></author>
+      <author id="giorgio-maria-di-nunzio"><first>Giorgio</first><last>Di Nunzio</last></author>
       <author><first>Nicola</first><last>Ferro</last></author>
       <author><first>Julio</first><last>Gonzalo</last></author>
       <author><first>Mark</first><last>Sanderson</last></author>
@@ -681,8 +681,8 @@
       <bibkey>peters-etal-2008-research</bibkey>
     </paper>
     <paper id="70">
-      <author><first>Scott</first><last>Piao</last></author>
-      <author><first>John</first><last>McNaught</last></author>
+      <author id="scott-s-l-piao"><first>Scott</first><last>Piao</last></author>
+      <author id="john-mcnaught"><first>John</first><last>McNaught</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
       <title>Clustering Related Terms with Definitions</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/515_paper.pdf</url>
@@ -692,7 +692,7 @@
     <paper id="71">
       <author><first>Ngan</first><last>Nguyen</last></author>
       <author><first>Jin-Dong</first><last>Kim</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <title>Challenges in Pronoun Resolution System for Biomedical Text</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/607_paper.pdf</url>
       <abstract>This paper presents our findings on the feasibility of doing pronoun resolution for biomedical texts, in comparison with conducting pronoun resolution for the newswire domain. In our experiments, we built a simple machine learning-based pronoun resolution system, and evaluated the system on three different corpora: MUC, ACE, and GENIA. Comparative statistics not only reveal the noticeable issues in constructing an effective pronoun resolution system for a new domain, but also provides a comprehensive view of those corpora often used for this task.</abstract>
@@ -700,17 +700,17 @@
     </paper>
     <paper id="72">
       <author><first>Barry</first><last>Haddow</last></author>
-      <author><first>Beatrice</first><last>Alex</last></author>
+      <author id="beatrice-alex"><first>Beatrice</first><last>Alex</last></author>
       <title>Exploiting Multiply Annotated Corpora in Biomedical Information Extraction Tasks</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/516_paper.pdf</url>
       <abstract>This paper discusses the problem of utilising multiply annotated data in training biomedical information extraction systems. Two corpora, annotated with entities and relations, and containing a number of multiply annotated documents, are used to train named entity recognition and relation extraction systems. Several methods of automatically combining the multiple annotations to produce a single annotation are compared, but none produces better results than simply picking one of the annotated versions at random. It is also shown that adding extra singly annotated documents produces faster performance gains than adding extra multiply annotated documents.</abstract>
       <bibkey>haddow-alex-2008-exploiting</bibkey>
     </paper>
     <paper id="73">
-      <author><first>Yuka</first><last>Tateisi</last></author>
+      <author id="yuka-tateisi"><first>Yuka</first><last>Tateisi</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
       <author><first>Kenji</first><last>Sagae</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <title><fixed-case>GENIA</fixed-case>-<fixed-case>GR</fixed-case>: a Grammatical Relation Corpus for Parser Evaluation in the Biomedical Domain</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/496_paper.pdf</url>
       <abstract>We report the construction of a corpus for parser evaluation in the biomedical domain. A 50-abstract subset (492 sentences) of the GENIA corpus (Kim et al., 2003) is annotated with labeled head-dependent relations using the grammatical relations (GR) evaluation scheme (Carroll et al., 1998) ,which has been used for parser evaluation in the newswire domain.</abstract>
@@ -754,7 +754,7 @@
       <author><first>Bartosz</first><last>Broda</last></author>
       <author><first>Magdalena</first><last>Derwojedowa</last></author>
       <author><first>Maciej</first><last>Piasecki</last></author>
-      <author><first>Stanislaw</first><last>Szpakowicz</last></author>
+      <author id="stan-szpakowicz"><first>Stanislaw</first><last>Szpakowicz</last></author>
       <title>Corpus-based Semantic Relatedness for the Construction of <fixed-case>P</fixed-case>olish <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/459_paper.pdf</url>
       <abstract>The construction of a wordnet, a labour-intensive enterprise, can be significantly assisted by automatic grouping of lexical material and discovery of lexical semantic relations. The objective is to ensure high quality of automatically acquired results before they are presented for lexicographers approval. We discuss a software tool that suggests synset members using a measure of semantic relatedness with a given verb or adjective; this extends previous work on nominal synsets in Polish WordNet. Syntactically-motivated constraints are deployed on a large morphologically annotated corpus of Polish. Evaluation has been performed via the WordNet-Based Similarity Test and additionally supported by human raters. A lexicographer also manually assessed a suitable sample of suggestions. The results compare favourably with other known methods of acquiring semantic relations.</abstract>
@@ -764,15 +764,15 @@
       <author><first>Rafiya</first><last>Begum</last></author>
       <author><first>Samar</first><last>Husain</last></author>
       <author><first>Lakshmi</first><last>Bai</last></author>
-      <author><first>Dipti Misra</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></author>
       <title>Developing Verb Frames for <fixed-case>H</fixed-case>indi</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/491_paper.pdf</url>
       <abstract>This paper introduces an ongoing work on developing verb frames for Hindi. Verb frames capture syntactic commonalities of semantically related verbs. The main objective of this work is to create a linguistic resource which will prove to be indispensable for various NLP applications. We also hope this resource to help us better understand Hindi verbs. We motivate the basic verb argument structure using relations as introduced by Panini. We show the methodology used in preparing these frames and the criteria followed for classifying Hindi verbs.</abstract>
       <bibkey>begum-etal-2008-developing</bibkey>
     </paper>
     <paper id="80">
-      <author><first>Kate</first><last>Forbes-Riley</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="kate-forbes-riley"><first>Kate</first><last>Forbes-Riley</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <author><first>Scott</first><last>Silliman</last></author>
       <author><first>Amruta</first><last>Purandare</last></author>
       <title>Uncertainty Corpus: Resource to Study User Affect in Complex Spoken Dialogue Systems</title>
@@ -782,7 +782,7 @@
     </paper>
     <paper id="81">
       <author><first>Milan</first><last>Gnjatović</last></author>
-      <author><first>Dietmar</first><last>Roesner</last></author>
+      <author id="dietmar-rosner"><first>Dietmar</first><last>Roesner</last></author>
       <title>On the Role of the <fixed-case>NIMITEK</fixed-case> Corpus in Developing an Emotion Adaptive Spoken Dialogue System</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/149_paper.pdf</url>
       <abstract>This paper reports on the creation of the multimodal NIMITEK corpus of affected behavior in human-machine interaction and its role in the development of the NIMITEK prototype system. The NIMITEK prototype system is a spoken dialogue system for supporting users while they solve problems in a graphics system. The central feature of the system is adaptive dialogue management. The system dynamically defines a dialogue strategy according to the current state of the interaction (including also the emotional state of the user). Particular emphasis is devoted to the level of naturalness of interaction. We discuss that a higher level of naturalness can be achieved by combining a habitable natural language interface and an appropriate dialogue strategy. The role of the NIMITEK multimodal corpus in achieving these requirements is twofold: (1) in developing the model of attentional state on the level of users commands that facilitates processing of flexibly formulated commands, and (2) in defining the dialogue strategy that takes the emotional state of the user into account. Finally, we sketch the implemented prototype system and describe the incorporated dialogue management module. Whereas the prototype system itself is task-specific, the described underlying concepts are intended to be task-independent.</abstract>
@@ -804,7 +804,7 @@
     <paper id="83">
       <author><first>Laure</first><last>Charonnat</last></author>
       <author><first>Gaëlle</first><last>Vidal</last></author>
-      <author><first>Olivier</first><last>Boeffard</last></author>
+      <author id="olivier-boeffard"><first>Olivier</first><last>Boeffard</last></author>
       <title>Automatic Phone Segmentation of Expressive Speech</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/596_paper.pdf</url>
       <abstract>In order to improve the flexibility and the precision of an automatic phone segmentation system for a type of expressive speech, the dubbing into French of fiction movies, we developed both the phonetic labeling process and the alignment process. The automatic labelling system relies on an automatic grapheme-to-phoneme conversion including all the variants of the phonetic chain and on HMM modeling. In this article, we will distinguish three sets of phone models: a set of context independent models, a set of left and right context dependant models and finally a mixing of the two that combines phone and triphone models according to the precision of alignment obtained for each phonetic broad-class. The three models are evaluated on a test corpus. On the one hand we notice a little decrease in the score of phonetic labelling mainly due to pauses insertions, but on the other hand the mixed set of models gives the best results for the score of precision of the alignment.</abstract>
@@ -824,7 +824,7 @@
     </paper>
     <paper id="85">
       <author><first>Wei-Hao</first><last>Lin</last></author>
-      <author><first>Alexander</first><last>Hauptmann</last></author>
+      <author id="alexander-g-hauptmann"><first>Alexander</first><last>Hauptmann</last></author>
       <title>Vox Populi Annotation: Measuring Intensity of Ideological Perspectives by Aggregating Group Judgments</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/600_paper.pdf</url>
       <abstract>Polarizing discussions about political and social issues are common in mass media. Annotations on the degree to which a sentence expresses an ideological perspective can be valuable for evaluating computer programs that can automatically identify strongly biased sentences, but such annotations remain scarce. We annotated the intensity of ideological perspectives expressed in 250 sentences by aggregating judgments from 18 annotators. We proposed methods of determining the number of annotators and assessing reliability, and showed the annotations were highly consistent across different annotator groups.</abstract>
@@ -832,8 +832,8 @@
     </paper>
     <paper id="86">
       <author><first>Carmen</first><last>Banea</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <title>A Bootstrapping Method for Building Subjectivity Lexicons for Languages with Scarce Resources</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/700_paper.pdf</url>
       <abstract>This paper introduces a method for creating a subjectivity lexicon for languages with scarce resources. The method is able to build a subjectivity lexicon by using a small seed set of subjective words, an online dictionary, and a small raw corpus, coupled with a bootstrapping process that ranks new candidate words based on a similarity measure. Experiments performed with a rule-based sentence level subjectivity classifier show an 18% absolute improvement in F-measure as compared to previously proposed semi-supervised methods.</abstract>
@@ -842,7 +842,7 @@
     <paper id="87">
       <author><first>Josef</first><last>Ruppenhofer</last></author>
       <author><first>Swapna</first><last>Somasundaran</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <title>Finding the Sources and Targets of Subjective Expressions</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/709_paper.pdf</url>
       <abstract>As many popular text genres such as blogs or news contain opinions by multiple sources and about multiple targets, finding the sources and targets of subjective expressions becomes an important sub-task for automatic opinion analysis systems. We argue that while automatic semantic role labeling systems (ASRL) have an important contribution to make, they cannot solve the problem for all cases. Based on the experience of manually annotating opinions, sources, and targets in various genres, we present linguistic phenomena that require knowledge beyond that of ASRL systems. In particular, we address issues relating to the attribution of opinions to sources; sources and targets that are realized as zero-forms; and inferred opinions. We also discuss in some depth that for arguing attitudes we need to be able to recover propositions and not only argued-about entities. A recurrent theme of the discussion is that close attention to specific discourse contexts is needed to identify sources and targets correctly.</abstract>
@@ -850,7 +850,7 @@
     </paper>
     <paper id="88">
       <author><first>Veselin</first><last>Stoyanov</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <title>Annotating Topics of Opinions</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/813_paper.pdf</url>
       <abstract>Fine-grained subjectivity analysis has been the subject of much recent research attention. As a result, the field has gained a number of working definitions, technical approaches and manually annotated corpora that cover many facets of subjectivity. Little work has been done, however, on one aspect of fine-grained opinions - the specification and identification of opinion topics. In particular, due to the difficulty of manual opinion topic annotation, no general-purpose opinion corpus with information about topics of fine-grained opinions currently exists. In this paper, we propose a methodology for the manual annotation of opinion topics and use it to annotate a portion of an existing general-purpose opinion corpus with opinion topic information. Inter-annotator agreement results according to a number of metrics suggest that the annotations are reliable.</abstract>
@@ -868,8 +868,8 @@
     <paper id="90">
       <author><first>Jette</first><last>Viethen</last></author>
       <author><first>Robert</first><last>Dale</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
-      <author><first>Mariët</first><last>Theune</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
+      <author id="mariet-theune"><first>Mariët</first><last>Theune</last></author>
       <author><first>Pascal</first><last>Touset</last></author>
       <title>Controlling Redundancy in Referring Expressions</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/239_paper.pdf</url>
@@ -877,7 +877,7 @@
       <bibkey>viethen-etal-2008-controlling</bibkey>
     </paper>
     <paper id="91">
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <author><first>Ron</first><last>Artstein</last></author>
       <title>Anaphoric Annotation in the <fixed-case>ARRAU</fixed-case> Corpus</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/297_paper.pdf</url>
@@ -885,7 +885,7 @@
       <bibkey>poesio-artstein-2008-anaphoric</bibkey>
     </paper>
     <paper id="92">
-      <author><first>Mark-Christoph</first><last>Mueller</last></author>
+      <author id="mark-christoph-muller"><first>Mark-Christoph</first><last>Mueller</last></author>
       <author><first>Margot</first><last>Mieskes</last></author>
       <author><first>Michael</first><last>Strube</last></author>
       <title>Knowledge Sources for Bridging Resolution in Multi-Party Dialog</title>
@@ -899,8 +899,8 @@
       <author><first>Alan</first><last>Lee</last></author>
       <author><first>Eleni</first><last>Miltsakaki</last></author>
       <author><first>Livio</first><last>Robaldo</last></author>
-      <author><first>Aravind</first><last>Joshi</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <title>The <fixed-case>P</fixed-case>enn <fixed-case>D</fixed-case>iscourse <fixed-case>T</fixed-case>ree<fixed-case>B</fixed-case>ank 2.0.</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/754_paper.pdf</url>
       <abstract>We present the second version of the Penn Discourse Treebank, PDTB-2.0, describing its lexically-grounded annotations of discourse relations and their two abstract object arguments over the 1 million word Wall Street Journal corpus. We describe all aspects of the annotation, including (a) the argument structure of discourse relations, (b) the sense annotation of the relations, and (c) the attribution of discourse relations and each of their arguments. We list the differences between PDTB-1.0 and PDTB-2.0. We present representative statistics for several aspects of the annotation in the corpus.</abstract>
@@ -910,8 +910,8 @@
       <author><first>Iris</first><last>Hendrickx</last></author>
       <author><first>Gosse</first><last>Bouma</last></author>
       <author><first>Frederik</first><last>Coppens</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
-      <author><first>Veronique</first><last>Hoste</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
+      <author id="veronique-hoste"><first>Veronique</first><last>Hoste</last></author>
       <author><first>Geert</first><last>Kloosterman</last></author>
       <author><first>Anne-Marie</first><last>Mineur</last></author>
       <author><first>Joeri</first><last>Van Der Vloet</last></author>
@@ -930,7 +930,7 @@
       <bibkey>baker-brew-2008-statistical</bibkey>
     </paper>
     <paper id="96">
-      <author><first>Diana</first><last>Trandabăţ</last></author>
+      <author id="diana-trandabat"><first>Diana</first><last>Trandabăţ</last></author>
       <author><first>Maria</first><last>Husarciuc</last></author>
       <title><fixed-case>R</fixed-case>omanian Semantic Role Resource</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/715_paper.pdf</url>
@@ -940,7 +940,7 @@
     <paper id="97">
       <author><first>Alessandro</first><last>Lenci</last></author>
       <author><first>Barbara</first><last>McGillivray</last></author>
-      <author><first>Simonetta</first><last>Montemagni</last></author>
+      <author id="simonetta-montemagni"><first>Simonetta</first><last>Montemagni</last></author>
       <author><first>Vito</first><last>Pirrelli</last></author>
       <title>Unsupervised Acquisition of Verb Subcategorization Frames from Shallow-Parsed Corpora</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/763_paper.pdf</url>
@@ -956,22 +956,22 @@
       <bibkey>kawahara-uchimoto-2008-method</bibkey>
     </paper>
     <paper id="99">
-      <author><first>Núria</first><last>Bel</last></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last></author>
       <author><first>Sergio</first><last>Espeja</last></author>
-      <author><first>Montserrat</first><last>Marimon</last></author>
+      <author id="montserrat-marimon"><first>Montserrat</first><last>Marimon</last></author>
       <title>Automatic Acquisition for low frequency lexical items</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/334_paper.pdf</url>
       <abstract>This paper addresses a specific case of the task of lexical acquisition understood as the induction of information about the linguistic characteristics of lexical items on the basis of information gathered from their occurrences in texts. Most of the recent works in the area of lexical acquisition have used methods that take as much textual data as possible as source of evidence, but their performance decreases notably when only few occurrences of a word are available. The importance of covering such low frequency items lies in the fact that a large quantity of the words in any particular collection of texts will be occurring few times, if not just once. Our work proposes to compensate the lack of information resorting to linguistic knowledge on the characteristics of lexical classes. This knowledge, obtained from a lexical typology, is formulated probabilistically to be used in a Bayesian method to maximize the information gathered from single occurrences as to predict the full set of characteristics of the word. Our results show that our method achieves better results than others for the treatment of low frequency items.</abstract>
       <bibkey>bel-etal-2008-automatic</bibkey>
     </paper>
     <paper id="100">
-      <author><first>Doroteo</first><last>Toledano</last></author>
-      <author><first>Daniel</first><last>Hernandez-Lopez</last></author>
+      <author id="doroteo-t-toledano"><first>Doroteo</first><last>Toledano</last></author>
+      <author id="daniel-hernandez-lopez"><first>Daniel</first><last>Hernandez-Lopez</last></author>
       <author><first>Cristina</first><last>Esteve-Elizalde</last></author>
       <author><first>Julian</first><last>Fierrez</last></author>
-      <author><first>Javier</first><last>Ortega-Garcia</last></author>
+      <author id="javier-ortega-garcia"><first>Javier</first><last>Ortega-Garcia</last></author>
       <author><first>Daniel</first><last>Ramos</last></author>
-      <author><first>Joaquin</first><last>Gonzalez-Rodriguez</last></author>
+      <author id="joaquin-gonzalez-rodriguez"><first>Joaquin</first><last>Gonzalez-Rodriguez</last></author>
       <title><fixed-case>B</fixed-case>io<fixed-case>S</fixed-case>ec Multimodal Biometric Database in Text-Dependent Speaker Recognition</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/226_paper.pdf</url>
       <abstract>In this paper we briefly describe the BioSec multimodal biometric database and analyze its use in automatic text-dependent speaker recognition research. The paper is structured into four parts: a short introduction to the problem of text-dependent speaker recognition; a brief review of other existing databases, including monomodal text-dependent speaker recognition databases and multimodal biometric recognition databases; a description of the BioSec database; and, finally, an experimental section in which speaker recognition results on some of these databases are presented and compared, using the same underlying speaker recognition technique in all cases.</abstract>
@@ -979,12 +979,12 @@
     </paper>
     <paper id="101">
       <author><first>Iker</first><last>Luengo</last></author>
-      <author><first>Eva</first><last>Navas</last></author>
+      <author id="eva-navas"><first>Eva</first><last>Navas</last></author>
       <author><first>Iñaki</first><last>Sainz</last></author>
       <author><first>Ibon</first><last>Saratxaga</last></author>
-      <author><first>Jon</first><last>Sanchez</last></author>
+      <author id="jon-sanchez"><first>Jon</first><last>Sanchez</last></author>
       <author><first>Igor</first><last>Odriozola</last></author>
-      <author><first>Inma</first><last>Hernaez</last></author>
+      <author id="inmaculada-hernaez"><first>Inma</first><last>Hernaez</last></author>
       <title>Text Independent Speaker Identification in Multilingual Environments</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/461_paper.pdf</url>
       <abstract>Speaker identification and verification systems have a poor performance when model training is done in one language while the testing is done in another. This situation is not unusual in multilingual environments, where people should be able to access the system in any language he or she prefers in each moment, without noticing a performance drop. In this work we study the possibility of using features derived from prosodic parameters in order to reinforce the language robustness of these systems. First the features properties in terms of language and session variability are studied, predicting an increase in the language robustness when frame-wise intonation and energy values are combined with traditional MFCC features. The experimental results confirm that these features provide an improvement in the speaker recognition rates under language-mismatch conditions. The whole study is carried out in the Basque Country, a bilingual region in which Basque and Spanish languages co-exist.</abstract>
@@ -992,21 +992,21 @@
     </paper>
     <paper id="102">
       <author><first>Udhyakumar</first><last>Nallasamy</last></author>
-      <author><first>Alan</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan</first><last>Black</last></author>
       <author><first>Tanja</first><last>Schultz</last></author>
-      <author><first>Robert</first><last>Frederking</last></author>
+      <author id="robert-frederking"><first>Robert</first><last>Frederking</last></author>
       <title><fixed-case>N</fixed-case>ine<fixed-case>O</fixed-case>ne<fixed-case>O</fixed-case>ne: Recognizing and Classifying Speech for Handling Minority Language Emergency Calls</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/735_paper.pdf</url>
       <abstract>In this paper, we describe NineOneOne (9-1-1), a system designed to recognize and translate Spanish emergency calls for better dispatching. We analyze the research challenges in adapting speech translation technology to 9-1-1 domain. We report our initial research towards building the system and the results of our initial experiments.</abstract>
       <bibkey>nallasamy-etal-2008-nineoneone</bibkey>
     </paper>
     <paper id="103">
-      <author><first>Christopher</first><last>Cieri</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
-      <author><first>Meghan</first><last>Glenn</last></author>
+      <author id="christopher-cieri"><first>Christopher</first><last>Cieri</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
+      <author id="meghan-glenn"><first>Meghan</first><last>Glenn</last></author>
       <author><first>Reva</first><last>Schwartz</last></author>
       <author><first>Wade</first><last>Shen</last></author>
-      <author><first>Joseph</first><last>Campbell</last></author>
+      <author id="joseph-p-campbell"><first>Joseph</first><last>Campbell</last></author>
       <title>Bridging the Gap between Linguists and Technology Developers: Large-Scale, Sociolinguistic Annotation for Dialect and Speaker Recognition</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/793_paper.pdf</url>
       <abstract>Recent years have seen increased interest within the speaker recognition community in high-level features including, for example, lexical choice, idiomatic expressions or syntactic structures. The promise of speaker recognition in forensic applications drives development toward systems robust to channel differences by selecting features inherently robust to channel difference. Within the language recognition community, there is growing interest in differentiating not only languages but also mutually intelligible dialects of a single language. Decades of research in dialectology suggest that high-level features can enable systems to cluster speakers according to the dialects they speak. The Phanotics (Phonetic Annotation of Typicality in Conversational Speech) project seeks to identify high-level features characteristic of American dialects, annotate a corpus for these features, use the data to dialect recognition systems and also use the categorization to create better models for speaker recognition. The data, once published, should be useful to other developers of speaker and dialect recognition systems and to dialectologists and sociolinguists. We expect the methods will generalize well beyond the speakers, dialects, and languages discussed here and should, if successful, provide a model for how linguists and technology developers can collaborate in the future for the benefit of both groups and toward a deeper understanding of how languages vary and change.</abstract>
@@ -1014,7 +1014,7 @@
     </paper>
     <paper id="104">
       <author><first>Linda</first><last>Brandschain</last></author>
-      <author><first>Christopher</first><last>Cieri</last></author>
+      <author id="christopher-cieri"><first>Christopher</first><last>Cieri</last></author>
       <author><first>David</first><last>Graff</last></author>
       <author><first>Abby</first><last>Neely</last></author>
       <author><first>Kevin</first><last>Walker</last></author>
@@ -1024,11 +1024,11 @@
       <bibkey>brandschain-etal-2008-speaker</bibkey>
     </paper>
     <paper id="105">
-      <author><first>Nancy</first><last>Ide</last></author>
-      <author><first>Collin</first><last>Baker</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
+      <author id="collin-f-baker"><first>Collin</first><last>Baker</last></author>
       <author><first>Christiane</first><last>Fellbaum</last></author>
-      <author><first>Charles</first><last>Fillmore</last></author>
-      <author><first>Rebecca</first><last>Passonneau</last></author>
+      <author id="charles-j-fillmore"><first>Charles</first><last>Fillmore</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca</first><last>Passonneau</last></author>
       <title><fixed-case>MASC</fixed-case>: the Manually Annotated Sub-Corpus of <fixed-case>A</fixed-case>merican <fixed-case>E</fixed-case>nglish</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/617_paper.pdf</url>
       <abstract>To answer the critical need for sharable, reusable annotated resources with rich linguistic annotations, we are developing a Manually Annotated Sub-Corpus (MASC) including texts from diverse genres and manual annotations or manually-validated annotations for multiple levels, including WordNet senses and FrameNet frames and frame elements, both of which have become significant resources in the international computational linguistics community. To derive maximal benefit from the semantic information provided by these resources, the MASC will also include manually-validated shallow parses and named entities, which will enable linking WordNet senses and FrameNet frames within the same sentences into more complex semantic structures and, because named entities will often be the role fillers of FrameNet frames, enrich the semantic and pragmatic information derivable from the sub-corpus. All MASC annotations will be published with detailed inter-annotator agreement measures. The MASC and its annotations will be freely downloadable from the ANC website, thus providing maximum accessibility for researchers from around the globe.</abstract>
@@ -1038,7 +1038,7 @@
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <author><first>Lung-Hao</first><last>Lee</last></author>
       <author><first>Wei-guang</first><last>Qu</last></author>
-      <author><first>Jia-Fei</first><last>Hong</last></author>
+      <author id="jia-fei-hong"><first>Jia-Fei</first><last>Hong</last></author>
       <author><first>Shiwen</first><last>Yu</last></author>
       <title>Quality Assurance of Automatic Annotation of Very Large Corpora: a Study based on heterogeneous Tagging System</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/686_paper.pdf</url>
@@ -1046,7 +1046,7 @@
       <bibkey>huang-etal-2008-quality</bibkey>
     </paper>
     <paper id="107">
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <author><first>Cynthia</first><last>Farina</last></author>
       <author><first>Matt</first><last>Rawding</last></author>
       <author><first>Adil</first><last>Aijaz</last></author>
@@ -1056,8 +1056,8 @@
       <bibkey>cardie-etal-2008-erulemaking</bibkey>
     </paper>
     <paper id="108">
-      <author><first>Branimir</first><last>Boguraev</last></author>
-      <author><first>Mary</first><last>Neff</last></author>
+      <author id="branimir-boguraev"><first>Branimir</first><last>Boguraev</last></author>
+      <author id="mary-s-neff"><first>Mary</first><last>Neff</last></author>
       <title>Navigating through Dense Annotation Spaces</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/769_paper.pdf</url>
       <abstract>Pattern matching, or querying, over annotations is a general purpose paradigm for inspecting, navigating, mining, and transforming annotation repositories - the common representation basis for modern pipelined text-processing frameworks. Configurability of such frameworks and expressiveness of feature structure-based annotation schemes account for the high density of some such annotation repositories. This particular characteristic makes challenging the design of a pattern matching engine, capable of interpreting (or imposing) flat patterns over an arbitrarily dense annotation lattice. We present an approach where a finite state device carries out the application of (compiled) grammars over what is, in effect, a linearized projection of a unique route through the lattice; a route derived by a mix of static pattern (grammar) analysis and interpretation of navigational directives within the extended grammar formalism. Our approach achieves a mix of finite state scanning and lattice traversal for expressive and efficient pattern matching in dense annotations stores.</abstract>
@@ -1065,8 +1065,8 @@
     </paper>
     <paper id="109">
       <author><first>David</first><last>Guthrie</last></author>
-      <author><first>Louise</first><last>Guthrie</last></author>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="louise-guthrie"><first>Louise</first><last>Guthrie</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <title>An Unsupervised Probabilistic Approach for the Detection of Outliers in Corpora</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/866_paper.pdf</url>
       <abstract>Many applications of computational linguistics are greatly influenced by the quality of corpora available and as automatically generated corpora continue to play an increasingly common role, it is essential that we not overlook the importance of well-constructed and homogeneous corpora. This paper describes an automatic approach to improving the homogeneity of corpora using an unsupervised method of statistical outlier detection to find documents and segments that do not belong in a corpus. We consider collections of corpora that are homogeneous with respect to topic (i.e. about the same subject), or genre (written for the same audience or from the same source) and use a combination of stylistic and lexical features of the texts to automatically identify pieces of text in these collections that break the homogeneity. These pieces of text that are significantly different from the rest of the corpus are likely to be errors that are out of place and should be removed from the corpus before it is used for other tasks. We evaluate our techniques by running extensive experiments over large artificially constructed corpora that each contain single pieces of text from a different topic, author, or genre than the rest of the collection and measure the accuracy of identifying these pieces of text without the use of training data. We show that when these pieces of text are reasonably large (1,000 words) we can reliably identify them in a corpus.</abstract>
@@ -1082,7 +1082,7 @@
     <paper id="111">
       <author><first>Bogdan</first><last>Babych</last></author>
       <author><first>Serge</first><last>Sharoff</last></author>
-      <author><first>Anthony</first><last>Hartley</last></author>
+      <author id="anthony-hartley"><first>Anthony</first><last>Hartley</last></author>
       <title>Generalising Lexical Translation Strategies for <fixed-case>MT</fixed-case> Using Comparable Corpora</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/340_paper.pdf</url>
       <abstract>We report on an on-going research project aimed at increasing the range of translation equivalents which can be automatically discovered by MT systems. The methodology is based on semi-supervised learning of indirect translation strategies from large comparable corpora and applying them in run-time to generate novel, previously unseen translation equivalents. This approach is different from methods based on parallel resources, which currently can reuse only individual translation equivalents. Instead it models translation strategies which generalise individual equivalents and can successfully generate an open class of new translation solutions. The task of the project is integration of the developed technology into open-source MT systems.</abstract>
@@ -1097,8 +1097,8 @@
       <bibkey>itagaki-aikawa-2008-post</bibkey>
     </paper>
     <paper id="113">
-      <author><first>Germán</first><last>Sanchis</last></author>
-      <author><first>Joan Andreu</first><last>Sánchez</last></author>
+      <author id="german-sanchis-trilles"><first>Germán</first><last>Sanchis</last></author>
+      <author id="joan-andreu-sanchez"><first>Joan Andreu</first><last>Sánchez</last></author>
       <title>Using Parsed Corpora for Estimating Stochastic Inversion Transduction Grammars</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/465_paper.pdf</url>
       <abstract>An important problem when using Stochastic Inversion Transduction Grammars is their computational cost. More specifically, when dealing with corpora such as Europarl. only one iteration of the estimation algorithm becomes prohibitive. In this work, we apply a reduction of the cost by taking profit of the bracketing information in parsed corpora and show machine translation results obtained with a bracketed Europarl corpus, yielding interresting improvements when increasing the number of non-terminal symbols.</abstract>
@@ -1106,7 +1106,7 @@
     </paper>
     <paper id="114">
       <author><first>Mark</first><last>Fishel</last></author>
-      <author><first>Heiki-Jaan</first><last>Kaalep</last></author>
+      <author id="heiki-jaan-kaalep"><first>Heiki-Jaan</first><last>Kaalep</last></author>
       <title>Experiments on Processing Overlapping Parallel Corpora</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/776_paper.pdf</url>
       <abstract>The number and sizes of parallel corpora keep growing, which makes it necessary to have automatic methods of processing them: combining, checking and improving corpora quality, etc. We here introduce a method which enables performing many of these by exploiting overlapping parallel corpora. The method finds the correspondence between sentence pairs in two corpora: first the corresponding language parts of the corpora are aligned and then the two resulting alignments are compared. The method takes into consideration slight differences in the source documents, different levels of segmentation of the input corpora, encoding differences and other aspects of the task. The paper describes two experiments conducted to test the method. In the first experiment, the Estonian-English part of the JRC-Acquis corpus was combined with another corpus of legislation texts. In the second experiment alternatively aligned versions of the JRC-Acquis are compared to each other with the example of all language pairs between English, Estonian and Latvian. Several additional conclusions about the corpora can be drawn from the results. The method proves to be effective for several parallel corpora processing tasks.</abstract>
@@ -1114,17 +1114,17 @@
     </paper>
     <paper id="115">
       <author><first>Jennifer</first><last>Foster</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <title>Parser Evaluation and the <fixed-case>BNC</fixed-case>: Evaluating 4 constituency parsers with 3 metrics</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/774_paper.pdf</url>
       <abstract>We evaluate discriminative parse reranking and parser self-training on a new English test set using four versions of the Charniak parser and a variety of parser evaluation metrics. The new test set consists of 1,000 hand-corrected British National Corpus parse trees. We directly evaluate parser output using both the Parseval and the Leaf Ancestor metrics. We also convert the hand-corrected and parser output phrase structure trees to dependency trees using a state-of-the-art functional tag labeller and constituent-to-dependency conversion tool, and then calculate label accuracy, unlabelled attachment and labelled attachment scores over the dependency structures. We find that reranking leads to a performance improvement on the new test set (albeit a modest one). We find that self-training using BNC data leads to significantly better results. However, it is not clear how effective self-training is when the training material comes from the North American News Corpus.</abstract>
       <bibkey>foster-van-genabith-2008-parser</bibkey>
     </paper>
     <paper id="116">
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <author><first>Isabelle</first><last>Robba</last></author>
       <author><first>Anne</first><last>Vilnat</last></author>
-      <author><first>Christelle</first><last>Ayache</last></author>
+      <author id="christelle-ayache"><first>Christelle</first><last>Ayache</last></author>
       <title><fixed-case>EASY</fixed-case>, Evaluation of Parsers of <fixed-case>F</fixed-case>rench: what are the Results?</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/621_paper.pdf</url>
       <abstract>This paper presents EASY, which has been the first campaign evaluating syntactic parsers on all the common syntactic phenomena and a large set of dependency relations. The language analyzed was French. During this campaign, an annotation scheme has been elaborated with the different actors: participants and corpus providers; then a corpus made of several syntactic materials has been built and annotated: it reflects a great variety of linguistic styles (from literature to oral transcriptions, and from newspapers to medical texts). Both corpus and annotation scheme are here briefly presented. Moreover, evaluation measures are explained and detailed results are given. The results of the 15 parsers coming from 12 teams are analyzed. To conclude, a first experiment aiming to combine the outputs of the different systems is shown.</abstract>
@@ -1148,11 +1148,11 @@
       <bibkey>grothe-etal-2008-comparative</bibkey>
     </paper>
     <paper id="119">
-      <author><first>Éric</first><last>Villemonte de la Clergerie</last></author>
-      <author><first>Olivier</first><last>Hamon</last></author>
-      <author><first>Djamel</first><last>Mostefa</last></author>
-      <author><first>Christelle</first><last>Ayache</last></author>
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Éric</first><last>Villemonte de la Clergerie</last></author>
+      <author id="olivier-hamon"><first>Olivier</first><last>Hamon</last></author>
+      <author id="djamel-mostefa"><first>Djamel</first><last>Mostefa</last></author>
+      <author id="christelle-ayache"><first>Christelle</first><last>Ayache</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <author><first>Anne</first><last>Vilnat</last></author>
       <title><fixed-case>PASSAGE</fixed-case>: from <fixed-case>F</fixed-case>rench Parser Evaluation to Large Sized Treebank</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/908_paper.pdf</url>
@@ -1171,7 +1171,7 @@
       <author><first>Markpong</first><last>Jongtaveesataporn</last></author>
       <author><first>Chai</first><last>Wutiwiwatchai</last></author>
       <author><first>Koji</first><last>Iwano</last></author>
-      <author><first>Sadaoki</first><last>Furui</last></author>
+      <author id="sadaoki-furui"><first>Sadaoki</first><last>Furui</last></author>
       <title><fixed-case>T</fixed-case>hai Broadcast News Corpus Construction and Evaluation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/319_paper.pdf</url>
       <abstract>Large speech and text corpora are crucial to the development of a state-of-the-art speech recognition system. This paper reports on the construction and evaluation of the first Thai broadcast news speech and text corpora. Specifications and conventions used in the transcription process are described in the paper. The speech corpus contains about 17 hours of speech data while the text corpus was transcribed from around 35 hours of television broadcast news. The characteristics of the corpus were analyzed and shown in the paper. The speech corpus was split according to the evaluation focus condition used in the DARPA Hub-4 evaluation. An 18K-word Thai speech recognition system was setup to test with this speech corpus as a preliminary experiment. Acoustic model adaptations were performed to improve the system performance. The best system yielded a word error rate of about 20% for clean and planned speech, and below 30% for the overall condition.</abstract>
@@ -1189,21 +1189,21 @@
     </paper>
     <paper id="123">
       <author><first>Sopheap</first><last>Seng</last></author>
-      <author><first>Sethserey</first><last>Sam</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="sethserey-sam"><first>Sethserey</first><last>Sam</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <author><first>Brigitte</first><last>Bigi</last></author>
-      <author><first>Eric</first><last>Castelli</last></author>
+      <author id="eric-castelli"><first>Eric</first><last>Castelli</last></author>
       <title>First Broadcast News Transcription System for <fixed-case>K</fixed-case>hmer Language</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/661_paper.pdf</url>
       <abstract>In this paper we present an overview on the development of a large vocabulary continuous speech recognition (LVCSR) system for Khmer, the official language of Cambodia, spoken by more than 15 million people. As an under-resourced language, develop a LVCSR system for Khmer is a challenging task. We describe our methodologies for quick language data collection and processing for language modeling and acoustic modeling. For language modeling, we investigate the use of word and sub-word as basic modeling unit in order to see the potential of sub-word units in the case of unsegmented language like Khmer. Grapheme-based acoustic modeling is used to quickly build our Khmer language acoustic model. Furthermore, the approaches and tools used for the development of our system are documented and made publicly available on the web. We hope this will contribute to accelerate the development of LVCSR system for a new language, especially for under-resource languages of developing countries where resources and expertise are limited.</abstract>
       <bibkey>seng-etal-2008-first</bibkey>
     </paper>
     <paper id="124">
-      <author><first>Chomicha</first><last>Bendahman</last></author>
-      <author><first>Meghan</first><last>Glenn</last></author>
-      <author><first>Djamel</first><last>Mostefa</last></author>
-      <author><first>Niklas</first><last>Paulsson</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="chomicha-bendahman"><first>Chomicha</first><last>Bendahman</last></author>
+      <author id="meghan-glenn"><first>Meghan</first><last>Glenn</last></author>
+      <author id="djamel-mostefa"><first>Djamel</first><last>Mostefa</last></author>
+      <author id="niklas-paulsson"><first>Niklas</first><last>Paulsson</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <title>Quick Rich Transcriptions of <fixed-case>A</fixed-case>rabic Broadcast News Speech Data</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/915_paper.pdf</url>
       <abstract>This paper describes the collect and transcription of a large set of Arabic broadcast news speech data. A total of more than 2000 hours of data was transcribed. The transcription factor for transcribing the broadcast news data has been reduced using a method such as Quick Rich Transcription (QRTR) as well as reducing the number of quality controls performed on the data. The data was collected from several Arabic TV and radio sources and from both Modern Standard Arabic and dialectal Arabic. The orthographic transcriptions included segmentation, speaker turns, topics, sentence unit types and a minimal noise mark-up. The transcripts were produced as a part of the GALE project.</abstract>
@@ -1224,8 +1224,8 @@
       <bibkey>sekine-2008-extended</bibkey>
     </paper>
     <paper id="127">
-      <author><first>Mari Carmen</first><last>Suárez-Figueroa</last></author>
-      <author><first>Asunción</first><last>Gómez-Pérez</last></author>
+      <author id="mari-carmen-suarez-figueroa"><first>Mari Carmen</first><last>Suárez-Figueroa</last></author>
+      <author id="asuncion-gomez-perez"><first>Asunción</first><last>Gómez-Pérez</last></author>
       <title>Towards a Glossary of Activities in the Ontology Engineering Field</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/219_paper.pdf</url>
       <abstract>The Semantic Web of the future will be characterized by using a very large number of ontologies embedded in ontology networks. It is important to provide strong methodological support for collaborative and context-sensitive development of networks of ontologies. This methodological support includes the identification and definition of which activities should be carried out when ontology networks are collaboratively built. In this paper we present the consensus reaching process followed within the NeOn consortium for the identification and definition of the activities involved in the ontology network development process. The consensus reaching process here presented produces as a result the NeOn Glossary of Activities. This work was conceived due to the lack of standardization in the Ontology Engineering terminology, which clearly contrasts with the Software Engineering field. Our future aim is to standardize the NeOn Glossary of Activities.</abstract>
@@ -1233,7 +1233,7 @@
     </paper>
     <paper id="128">
       <author><first>Yirong</first><last>Chen</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <author><first>Wenjie</first><last>Li</last></author>
       <author><first>Gaoying</first><last>Cui</last></author>
       <title><fixed-case>C</fixed-case>hinese Core Ontology Construction from a Bilingual Term Bank</title>
@@ -1251,8 +1251,8 @@
     </paper>
     <paper id="130">
       <author><first>Takashi</first><last>Tsunakawa</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <title>Building Bilingual Lexicons using Lexical Translation Probabilities via Pivot Languages</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/423_paper.pdf</url>
       <abstract>This paper proposes a method of increasing the size of a bilingual lexicon obtained from two other bilingual lexicons via a pivot language. When we apply this approach, there are two main challenges, ambiguity and mismatch of terms; we target the latter problem by improving the utilization ratio of the bilingual lexicons. Given two bilingual lexicons between language pairs Lf-Lp and Lp-Le, we compute lexical translation probabilities of word pairs by using a statistical word-alignment model, and term decomposition/composition techniques. We compare three approaches to generate the bilingual lexicon: exact merging, word-based merging, and our proposed alignment-based merging. In our method, we combine lexical translation probabilities and a simple language model for estimating the probabilities of translation pairs. The experimental results show that our method could drastically improve the number of translation terms compared to the two methods mentioned above. Additionally, we evaluated and discussed the quality of the translation outputs.</abstract>
@@ -1260,7 +1260,7 @@
     </paper>
     <paper id="131">
       <author><first>Yu</first><last>Chen</last></author>
-      <author><first>Andreas</first><last>Eisele</last></author>
+      <author id="andreas-eisele"><first>Andreas</first><last>Eisele</last></author>
       <author><first>Martin</first><last>Kay</last></author>
       <title>Improving Statistical Machine Translation Efficiency by Triangulation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/733_paper.pdf</url>
@@ -1270,7 +1270,7 @@
     <paper id="132">
       <author><first>Caroline</first><last>Lavecchia</last></author>
       <author><first>David</first><last>Langlois</last></author>
-      <author><first>Kamel</first><last>Smaïli</last></author>
+      <author id="kamel-smaili"><first>Kamel</first><last>Smaïli</last></author>
       <title>Phrase-Based Machine Translation based on Simulated Annealing</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/791_paper.pdf</url>
       <abstract>In this paper, we propose a new phrase-based translation model based on inter-lingual triggers. The originality of our method is double. First we identify common source phrases. Then we use inter-lingual triggers in order to retrieve their translations. Furthermore, we consider the way of extracting phrase translations as an optimization issue. For that we use simulated annealing algorithm to find out the best phrase translations among all those determined by inter-lingual triggers. The best phrases are those which improve the translation quality in terms of Bleu score. Tests are achieved on movie subtitle corpora. They show that our phrase-based machine translation (PBMT) system outperforms a state-of-the-art PBMT system by almost 7 points.</abstract>
@@ -1285,8 +1285,8 @@
       <bibkey>carpuat-wu-2008-evaluation</bibkey>
     </paper>
     <paper id="134">
-      <author><first>Saša</first><last>Hasan</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="sasa-hasan"><first>Saša</first><last>Hasan</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <title>A Multi-Genre <fixed-case>SMT</fixed-case> System for <fixed-case>A</fixed-case>rabic to <fixed-case>F</fixed-case>rench</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/549_paper.pdf</url>
       <abstract>This work presents improvements of a large-scale Arabic to French statistical machine translation system over a period of three years. The development includes better preprocessing, more training data, additional genre-specific tuning for different domains, namely newswire text and broadcast news transcripts, and improved domain-dependent language models. Starting with an early prototype in 2005 that participated in the second CESTA evaluation, the system was further upgraded to achieve favorable BLEU scores of 44.8% for the text and 41.1% for the audio setting. These results are compared to a system based on the freely available Moses toolkit. We show significant gains both in terms of translation quality (up to +1.2% BLEU absolute) and translation speed (up to 16 times faster) for comparable configuration settings.</abstract>
@@ -1294,16 +1294,16 @@
     </paper>
     <paper id="135">
       <author><first>Estelle</first><last>Delpech</last></author>
-      <author><first>Patrick</first><last>Saint-Dizier</last></author>
+      <author id="patrick-saint-dizier"><first>Patrick</first><last>Saint-Dizier</last></author>
       <title>Investigating the Structure of Procedural Texts for Answering How-to Questions</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/20_paper.pdf</url>
       <abstract>This paper presents ongoing work dedicated to parsing the textual structure of procedural texts. We propose here a model for the intructional structure and criteria to identify its main components: titles, instructions, warnings and prerequisites. The main aim of this project, besides a contribution to text processing, is to be able to answer procedural questions (How-to? questions), where the answer is a well-formed portion of a text, not a small set of words as for factoid questions.</abstract>
       <bibkey>delpech-saint-dizier-2008-investigating</bibkey>
     </paper>
     <paper id="136">
-      <author><first>Igor</first><last>Leturia</last></author>
-      <author><first>Antton</first><last>Gurrutxaga</last></author>
-      <author><first>Nerea</first><last>Areta</last></author>
+      <author id="igor-leturia"><first>Igor</first><last>Leturia</last></author>
+      <author id="antton-gurrutxaga"><first>Antton</first><last>Gurrutxaga</last></author>
+      <author id="nerea-areta"><first>Nerea</first><last>Areta</last></author>
       <author><first>Eli</first><last>Pociello</last></author>
       <title>Analysis and Performance of Morphological Query Expansion and Language-Filtering Words on <fixed-case>B</fixed-case>asque Web Searching</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/185_paper.pdf</url>
@@ -1328,7 +1328,7 @@
     </paper>
     <paper id="139">
       <author><first>Torsten</first><last>Zesch</last></author>
-      <author><first>Christof</first><last>Müller</last></author>
+      <author id="christof-muller"><first>Christof</first><last>Müller</last></author>
       <author><first>Iryna</first><last>Gurevych</last></author>
       <title>Extracting Lexical Semantic Knowledge from <fixed-case>W</fixed-case>ikipedia and <fixed-case>W</fixed-case>iktionary</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/420_paper.pdf</url>
@@ -1336,9 +1336,9 @@
       <bibkey>zesch-etal-2008-extracting</bibkey>
     </paper>
     <paper id="140">
-      <author><first>Gregory</first><last>Sanders</last></author>
+      <author id="gregory-sanders"><first>Gregory</first><last>Sanders</last></author>
       <author><first>Sébastien</first><last>Bronsart</last></author>
-      <author><first>Sherri</first><last>Condon</last></author>
+      <author id="sherri-condon"><first>Sherri</first><last>Condon</last></author>
       <author><first>Craig</first><last>Schlenoff</last></author>
       <title>Odds of Successful Transfer of Low-Level Concepts: a Key Metric for Bidirectional Speech-to-Speech Machine Translation in <fixed-case>DARPA</fixed-case>’s <fixed-case>TRANSTAC</fixed-case> Program</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/399_paper.pdf</url>
@@ -1346,12 +1346,12 @@
       <bibkey>sanders-etal-2008-odds</bibkey>
     </paper>
     <paper id="141">
-      <author><first>Lori</first><last>Lamel</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
-      <author><first>Christelle</first><last>Ayache</last></author>
-      <author><first>Djamel</first><last>Mostefa</last></author>
-      <author><first>Jordi</first><last>Turmo</last></author>
-      <author><first>Pere</first><last>Comas</last></author>
+      <author id="lori-lamel"><first>Lori</first><last>Lamel</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
+      <author id="christelle-ayache"><first>Christelle</first><last>Ayache</last></author>
+      <author id="djamel-mostefa"><first>Djamel</first><last>Mostefa</last></author>
+      <author id="jordi-turmo"><first>Jordi</first><last>Turmo</last></author>
+      <author id="pere-comas"><first>Pere</first><last>Comas</last></author>
       <title>Question Answering on Speech Transcriptions: the <fixed-case>QAST</fixed-case> evaluation in <fixed-case>CLEF</fixed-case></title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/511_paper.pdf</url>
       <abstract>This paper reports on the QAST track of CLEF aiming to evaluate Question Answering on Speech Transcriptions. Accessing information in spoken documents provides additional challenges to those of text-based QA, needing to address the characteristics of spoken language, as well as errors in the case of automatic transcriptions of spontaneous speech. The framework and results of the pilot QAst evaluation held as part of CLEF 2007 is described, illustrating some of the additional challenges posed by QA in spoken documents relative to written ones. The current plans for future multiple-language and multiple-task QAst evaluations are described.</abstract>
@@ -1369,13 +1369,13 @@
       <bibkey>heeren-etal-2008-evaluation</bibkey>
     </paper>
     <paper id="143">
-      <author><first>Sherri</first><last>Condon</last></author>
-      <author><first>Jon</first><last>Phillips</last></author>
+      <author id="sherri-condon"><first>Sherri</first><last>Condon</last></author>
+      <author id="jon-phillips"><first>Jon</first><last>Phillips</last></author>
       <author><first>Christy</first><last>Doran</last></author>
       <author><first>John</first><last>Aberdeen</last></author>
       <author><first>Dan</first><last>Parvaz</last></author>
-      <author><first>Beatrice</first><last>Oshika</last></author>
-      <author><first>Greg</first><last>Sanders</last></author>
+      <author id="beatrice-oshika"><first>Beatrice</first><last>Oshika</last></author>
+      <author id="gregory-sanders"><first>Greg</first><last>Sanders</last></author>
       <author><first>Craig</first><last>Schlenoff</last></author>
       <title>Applying Automated Metrics to Speech Translation Dialogs</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/535_paper.pdf</url>
@@ -1391,8 +1391,8 @@
       <bibkey>mieskes-strube-2008-three</bibkey>
     </paper>
     <paper id="145">
-      <author><first>Jesús</first><last>Giménez</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="jesus-gimenez"><first>Jesús</first><last>Giménez</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <title>Towards Heterogeneous Automatic <fixed-case>MT</fixed-case> Error Analysis</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/483_paper.pdf</url>
       <abstract>This work studies the viability of performing heterogeneous automatic MT error analyses. Error analysis is, undoubtly, one of the most crucial stages in the development cycle of an MT system. However, often not enough attention is paid to this process. The reason is that performing an accurate error analysis requires intensive human labor. In order to speed up the error analysis process, we suggest partially automatizing it by having automatic evaluation metrics play a more active role. For that purpose, we have compiled a large and heterogeneous set of features at different linguistic levels and at different levels of granularity. Through a practical case study, we show how these features provide an effective means of ellaborating interpretable and detailed automatic reports of translation quality.</abstract>
@@ -1400,14 +1400,14 @@
     </paper>
     <paper id="146">
       <author><first>Bogdan</first><last>Babych</last></author>
-      <author><first>Anthony</first><last>Hartley</last></author>
+      <author id="anthony-hartley"><first>Anthony</first><last>Hartley</last></author>
       <title>Sensitivity of Automated <fixed-case>MT</fixed-case> Evaluation Metrics on Higher Quality <fixed-case>MT</fixed-case> Output: <fixed-case>BLEU</fixed-case> vs Task-Based Evaluation Methods</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/542_paper.pdf</url>
       <abstract>We report the results of our experiment on assessing the ability of automated MT evaluation metrics to remain sensitive to variations in MT quality as the average quality of the compared systems goes up. We compare two groups of metrics: those, which measure the proximity of MT output to some reference translation, and those which evaluate the performance of some automated process on degraded MT output. The experiment shows that proximity-based metrics (such as BLEU) loose sensitivity as the scores go up, but performance-based metrics (e.g., Named Entity recognition from MT output) remain sensitive across the scale. We suggest a model for explaining this result, which attributes stable sensitivity of performance-based metrics to measuring cumulative functional effect of different language levels, while proximity-based metrics measure structural matches on a lexical level and therefore miss higher-level errors that are more typical for better MT systems. Development of new automated metrics should take into account possible decline in sensitivity on higher-quality MT, which should be tested as part of meta-evaluation of the metrics.</abstract>
       <bibkey>babych-hartley-2008-sensitivity</bibkey>
     </paper>
     <paper id="147">
-      <author><first>Mark</first><last>Przybocki</last></author>
+      <author id="mark-przybocki"><first>Mark</first><last>Przybocki</last></author>
       <author><first>Kay</first><last>Peterson</last></author>
       <author><first>Sébastien</first><last>Bronsart</last></author>
       <title>Translation Adequacy and Preference Evaluation Tool (<fixed-case>TAP</fixed-case>-<fixed-case>ET</fixed-case>)</title>
@@ -1416,7 +1416,7 @@
       <bibkey>przybocki-etal-2008-translation</bibkey>
     </paper>
     <paper id="148">
-      <author><first>Constantin</first><last>Orăsan</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orăsan</last></author>
       <author><first>Oana Andreea</first><last>Chiorean</last></author>
       <title>Evaluation of a Cross-lingual <fixed-case>R</fixed-case>omanian-<fixed-case>E</fixed-case>nglish Multi-document Summariser</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/539_paper.pdf</url>
@@ -1424,10 +1424,10 @@
       <bibkey>orasan-chiorean-2008-evaluation</bibkey>
     </paper>
     <paper id="149">
-      <author><first>Øistein E.</first><last>Andersen</last></author>
+      <author id="oistein-e-andersen"><first>Øistein E.</first><last>Andersen</last></author>
       <author><first>Julien</first><last>Nioche</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
-      <author><first>John</first><last>Carroll</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
       <title>The <fixed-case>BNC</fixed-case> Parsed with <fixed-case>RASP</fixed-case>4<fixed-case>UIMA</fixed-case></title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/218_paper.pdf</url>
       <abstract>We have integrated the RASP system with the UIMA framework (RASP4UIMA) and used this to parse the XML-encoded version of the British National Corpus (BNC). All original annotation is preserved, and parsing information, mainly in the form of grammatical relations, is added in an XML format. A few specific adaptations of the system to give better results with the BNC are discussed briefly. The RASP4UIMA system is publicly available and can be used to parse other corpora or document collections, and the final parsed version of the BNC will be deposited with the Oxford Text Archive.</abstract>
@@ -1459,7 +1459,7 @@
       <bibkey>pustylnikov-etal-2008-unified</bibkey>
     </paper>
     <paper id="153">
-      <author><first>Aicha</first><last>Bouhjar</last></author>
+      <author id="aicha-bouhjar"><first>Aicha</first><last>Bouhjar</last></author>
       <title><fixed-case>A</fixed-case>mazigh Language Terminology in <fixed-case>M</fixed-case>orocco or Management of a “Multidimensional” Variation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/912_paper.pdf</url>
       <abstract>The present communication brings to the fore the work undertaken at the Royal Institute of the Amazigh Culture (IRCAM, henceforth) within the Language Planning Center known as Centre de lAménagement Linguistique (CAL) within the framework of the language planning of Amazigh, particularly on the side of terminology. The focus will be on the concept of variation that affects different levels in the course of standardizing a language: orthography, spelling, grammar and lexis. Thus, after a brief survey of the main features of the Amazigh (Berber) language in general, the missions and the projects far achieved by CAL will be presented, particularly the objectives that relate to the work on the multiply varied corpus-based terminology. It appears that eliciting the pertinent information, for the most part, requires a whole amount of work on the re-writing of corpora so that the latter become exploitable in the standardization process. It should be pointed out that this stage of data homogenization, seemingly unwieldy for optimal exploitation, cannot be undertaken Amazighist linguists being involved in theoretical and methodological presuppositions that are at the root of this variation.</abstract>
@@ -1467,8 +1467,8 @@
     </paper>
     <paper id="154">
       <author><first>Yuhang</first><last>Yang</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <title><fixed-case>C</fixed-case>hinese Term Extraction Based on Delimiters</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/72_paper.pdf</url>
       <abstract>Existing techniques extract term candidates by looking for internal and contextual information associated with domain specific terms. The algorithms always face the dilemma that fewer features are not enough to distinguish terms from non-terms whereas more features lead to more conflicts among selected features. This paper presents a novel approach for term extraction based on delimiters which are much more stable and domain independent. The proposed approach is not as sensitive to term frequency as that of previous works. This approach has no strict limit or hard rules and thus they can deal with all kinds of terms. It also requires no prior domain knowledge and no additional training to adapt to new domains. Consequently, the proposed approach can be applied to different domains easily and it is especially useful for resource-limited domains. Evaluations conducted on two different domains for Chinese term extraction show significant improvements over existing techniques which verifies its efficiency and domain independent nature. Experiments on new term extraction indicate that the proposed approach can also serve as an effective tool for domain lexicon expansion.</abstract>
@@ -1476,7 +1476,7 @@
     </paper>
     <paper id="155">
       <author><first>Siham</first><last>Boulaknadel</last></author>
-      <author><first>Beatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Beatrice</first><last>Daille</last></author>
       <author><first>Driss</first><last>Aboutajdine</last></author>
       <title>A Multi-Word Term Extraction Program for <fixed-case>A</fixed-case>rabic Language</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/378_paper.pdf</url>
@@ -1485,7 +1485,7 @@
     </paper>
     <paper id="156">
       <author><first>Jonathan</first><last>Butters</last></author>
-      <author><first>Fabio</first><last>Ciravegna</last></author>
+      <author id="fabio-ciravegna"><first>Fabio</first><last>Ciravegna</last></author>
       <title>Using Similarity Metrics For Terminology Recognition</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/717_paper.pdf</url>
       <abstract>In this paper we present an approach to terminology recognition whereby a sublanguage term (e.g. an aircraft engine component term extracted from a maintenance log) is matched to its corresponding term from a pre-defined list (such as a taxonomy representing the official break-down of the engine). Terminology recognition is addressed as a classification task whereby the extracted term is associated to one or more potential terms in the official description list via the application of string similarity metrics. The solution described in the paper uses dynamically computed similarity cut-off thresholds calculated on the basis of modeling a noise curve. Dissimilar string matches form a Gaussian distributed noise curve that can be identified and extracted leaving only mostly similar string matches. Dynamically calculated thresholds are preferable over fixed similarity thresholds as fixed thresholds are inherently imprecise, that is, there is no similarity boundary beyond which any two strings always describe the same concept.</abstract>
@@ -1509,8 +1509,8 @@
       <bibkey>pitel-grefenstette-2008-semi</bibkey>
     </paper>
     <paper id="159">
-      <author><first>Laurence</first><last>Devillers</last></author>
-      <author><first>Jean-Claude</first><last>Martin</last></author>
+      <author id="laurence-devillers"><first>Laurence</first><last>Devillers</last></author>
+      <author id="jean-claude-martin"><first>Jean-Claude</first><last>Martin</last></author>
       <title>Coding Emotional Events in Audiovisual Corpora</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/322_paper.pdf</url>
       <abstract>The modelling of realistic emotional behaviour is needed for various applications in multimodal human-machine interaction such as the design of emotional conversational agents (Martin et al., 2005) or of emotional detection systems (Devillers and Vidrascu, 2007). Yet, building such models requires appropriate definition of various levels for representing the emotions themselves but also some contextual information such as the events that elicit these emotions. This paper presents a coding scheme that has been defined following annotations of a corpus of TV interviews (EmoTV). Deciding which events triggered or may trigger which emotion is a challenge for building efficient emotion eliciting protocols. In this paper, we present the protocol that we defined for collecting another corpus of spontaneous human-human interactions recorded in laboratory conditions (EmoTaboo). We discuss the events that we designed for eliciting emotions. Part of this scheme for coding emotional event is being included in the specifications that are currently defined by a working group of the W3C (the W3C Emotion Incubator Working group). This group is investigating the feasibility of working towards a standard representation of emotions and related states in technological contexts.</abstract>
@@ -1528,8 +1528,8 @@
     <paper id="161">
       <author><first>Isa</first><last>Maks</last></author>
       <author><first>Piek</first><last>Vossen</last></author>
-      <author><first>Roxane</first><last>Segers</last></author>
-      <author><first>Hennie</first><last>van der Vliet</last></author>
+      <author id="roxane-segers"><first>Roxane</first><last>Segers</last></author>
+      <author id="hennie-van-der-vliet"><first>Hennie</first><last>van der Vliet</last></author>
       <title>Adjectives in the <fixed-case>D</fixed-case>utch Semantic Lexical Database <fixed-case>CORNETTO</fixed-case></title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/184_paper.pdf</url>
       <abstract>The goal of this paper is to describe how adjectives are encoded in Cornetto, a semantic lexical database for Dutch. Cornetto combines two existing lexical resources with different semantic organisation, i.e. Dutch Wordnet (DWN) with a synset organisation and Referentie Bestand Nederlands (RBN) with an organisation in Lexical Units. Both resources will be aligned and mapped on the formal ontology SUMO. In this paper, we will first present details of the description of adjectives in each of the the two resources. We will then address the problems that are encountered during alignment to the SUMO ontology which are greatly due to the fact that SUMO has never been tested for its adequacy with respect to adjectives. We contrasted SUMO with an existing semantic classification which resulted in a further refined and extended SUMO geared for the description of adjectives.</abstract>
@@ -1537,7 +1537,7 @@
     </paper>
     <paper id="162">
       <author><first>Markus</first><last>Dickinson</last></author>
-      <author><first>Chong Min</first><last>Lee</last></author>
+      <author id="chungmin-lee"><first>Chong Min</first><last>Lee</last></author>
       <title>Detecting Errors in Semantic Annotation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/157_paper.pdf</url>
       <abstract>We develop a method for detecting errors in semantic predicate-argument annotation, based on the variation n-gram error detection method. After establishing an appropriate data representation, we detect inconsistencies by searching for identical text with varying annotation. By remaining data-driven, we are able to detect inconsistencies arising from errors at lower layers of annotation.</abstract>
@@ -1545,16 +1545,16 @@
     </paper>
     <paper id="163">
       <author><first>Michael</first><last>Roth</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <title>Corpus Co-Occurrence, Dictionary and <fixed-case>W</fixed-case>ikipedia Entries as Resources for Semantic Relatedness Information</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/473_paper.pdf</url>
       <abstract>Distributional, corpus-based descriptions have frequently been applied to model aspects of word meaning. However, distributional models that use corpus data as their basis have one well-known disadvantage: even though the distributional features based on corpus co-occurrence were often successful in capturing meaning aspects of the words to be described, they generally fail to capture those meaning aspects that refer to world knowledge, because coherent texts tend not to provide redundant information that is presumably available knowledge. The question we ask in this paper is whether dictionary and encyclopaedic resources might complement the distributional information in corpus data, and provide world knowledge that is missing in corpora. As test case for meaning aspects, we rely on a collection of semantic associates to German verbs and nouns. Our results indicate that a combination of the knowledge resources should be helpful in work on distributional descriptions.</abstract>
       <bibkey>roth-schulte-im-walde-2008-corpus</bibkey>
     </paper>
     <paper id="164">
-      <author><first>Emiliano</first><last>Giovannetti</last></author>
+      <author id="emiliano-giovannetti"><first>Emiliano</first><last>Giovannetti</last></author>
       <author><first>Simone</first><last>Marchi</last></author>
-      <author><first>Simonetta</first><last>Montemagni</last></author>
+      <author id="simonetta-montemagni"><first>Simonetta</first><last>Montemagni</last></author>
       <author><first>Roberto</first><last>Bartolini</last></author>
       <title>Ontology Learning and Semantic Annotation: a Necessary Symbiosis</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/534_paper.pdf</url>
@@ -1562,7 +1562,7 @@
       <bibkey>giovannetti-etal-2008-ontology</bibkey>
     </paper>
     <paper id="165">
-      <author><first>Jordi</first><last>Atserias</last></author>
+      <author id="jordi-atserias"><first>Jordi</first><last>Atserias</last></author>
       <author><first>Hugo</first><last>Zaragoza</last></author>
       <author><first>Massimiliano</first><last>Ciaramita</last></author>
       <author><first>Giuseppe</first><last>Attardi</last></author>
@@ -1572,20 +1572,20 @@
       <bibkey>atserias-etal-2008-semantically</bibkey>
     </paper>
     <paper id="166">
-      <author><first>Rodney D.</first><last>Nielsen</last></author>
-      <author><first>Wayne</first><last>Ward</last></author>
-      <author><first>James</first><last>Martin</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="rodney-nielsen"><first>Rodney D.</first><last>Nielsen</last></author>
+      <author id="wayne-ward"><first>Wayne</first><last>Ward</last></author>
+      <author id="james-h-martin"><first>James</first><last>Martin</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <title>Annotating Students’ Understanding of Science Concepts</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/873_paper.pdf</url>
       <abstract>This paper summarizes the annotation of fine-grained entailment relationships in the context of student answers to science assessment questions. We annotated a corpus of 15,357 answer pairs with 145,911 fine-grained entailment relationships. We provide the rationale for such fine-grained analysis and discuss its perceived benefits to an Intelligent Tutoring System. The corpus also has potential applications in other areas, such as question answering and multi-document summarization. Annotators achieved 86.2% inter-annotator agreement (Kappa=0.728, corresponding to substantial agreement) annotating the fine-grained facets of reference answers with regard to understanding expressed in student answers and labeling from one of five possible detailed relationship categories. The corpus described in this paper, which is the only one providing such detailed entailment annotations, is available as a public resource for the research community. The corpus is expected to enable application development, not only for intelligent tutoring systems, but also for general textual entailment applications, that is currently not practical.</abstract>
       <bibkey>nielsen-etal-2008-annotating</bibkey>
     </paper>
     <paper id="167">
-      <author><first>Rebecca</first><last>Passonneau</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca</first><last>Passonneau</last></author>
       <author><first>Tom</first><last>Lippincott</last></author>
       <author><first>Tae</first><last>Yano</last></author>
-      <author><first>Judith</first><last>Klavans</last></author>
+      <author id="judith-l-klavans"><first>Judith</first><last>Klavans</last></author>
       <title>Relation between Agreement Measures on Human Labeling and Machine Learning Performance: Results from an Art History Domain</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/722_paper.pdf</url>
       <abstract>We discuss factors that affect human agreement on a semantic labeling task in the art history domain, based on the results of four experiments where we varied the number of labels annotators could assign, the number of annotators, the type and amount of training they received, and the size of the text span being labeled. Using the labelings from one experiment involving seven annotators, we investigate the relation between interannotator agreement and machine learning performance. We construct binary classifiers and vary the training and test data by swapping the labelings from the seven annotators. First, we find performance is often quite good despite lower than recommended interannotator agreement. Second, we find that on average, learning performance for a given functional semantic category correlates with the overall agreement among the seven annotators for that category. Third, we find that learning performance on the data from a given annotator does not correlate with the quality of that annotators labeling. We offer recommendations for the use of labeled data in machine learning, and argue that learners should attempt to accommodate human variation. We also note implications for large scale corpus annotation projects that deal with similarly subjective phenomena.</abstract>
@@ -1627,7 +1627,7 @@
     </paper>
     <paper id="172">
       <author><first>Viktor</first><last>Bielický</last></author>
-      <author><first>Otakar</first><last>Smrž</last></author>
+      <author id="otakar-smrz"><first>Otakar</first><last>Smrž</last></author>
       <title>Building the Valency Lexicon of <fixed-case>A</fixed-case>rabic Verbs</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/578_paper.pdf</url>
       <abstract>This paper describes the building of a valency lexicon of Arabic verbs using a morphologically and syntactically annotated corpus, the Prague Arabic Dependency Treebank (PADT), as its primary source. We present the theoretical account on valency developed within the Functional Generative Description (FGD) theory. We apply the framework to Modern Standard Arabic and discuss various valency-related phenomena with respect to examples from the corpus. We then outline the methodology and the linguistic and technical resources used in the building of the lexicon. The key concept in our scenario is that of PDT-VALLEX of Czech. Our lexicon will be developed by linking the conceivable entries with their instances in the treebank. Conversely, the treebanks annotations will be linked to the lexicon. While a comparable scheme has been developed for Czech, our own contribution is to design and implement this model thoroughly for Arabic and the PADT data. The Arabic valency lexicon is intended for applications in computational parsing or language generation, and for use by human researchers. The proposed valency lexicon will be exploited in particular during further tectogrammatical annotations of PADT and might serve for enriching the expected second edition of the corpus-based Arabic-Czech Dictionary.</abstract>
@@ -1646,7 +1646,7 @@
     <paper id="174">
       <author><first>Rogelio</first><last>Nazar</last></author>
       <author><first>Jorge</first><last>Vivaldi</last></author>
-      <author><first>Teresa</first><last>Cabré</last></author>
+      <author id="maria-teresa-cabre"><first>Teresa</first><last>Cabré</last></author>
       <title>A Suite to Compile and Analyze an <fixed-case>LSP</fixed-case> Corpus</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/296_paper.pdf</url>
       <abstract>This paper presents a series of tools for the extraction of specialized corpora from the web and its subsequent analysis mainly with statistical techniques. It is an integrated system of original as well as standard tools and has a modular conception that facilitates its re-integration on different systems. The first part of the paper describes the original techniques, which are devoted to the categorization of documents as relevant or irrelevant to the corpus under construction, considering relevant a specialized document of the selected technical domain. Evaluation figures are provided for the original part, but not for the second part involving the analysis of the corpus, which is composed of algorithms that are well known in the field of Natural Language Processing, such as Kwic search, measures of vocabulary richness, the sorting of n-grams by frequency of occurrence or by measures of statistical association, distribution or similarity.</abstract>
@@ -1654,17 +1654,17 @@
     </paper>
     <paper id="175">
       <author><first>Eduardo</first><last>Blanco</last></author>
-      <author><first>Nuria</first><last>Castell</last></author>
-      <author><first>Dan</first><last>Moldovan</last></author>
+      <author id="nuria-castell"><first>Nuria</first><last>Castell</last></author>
+      <author id="dan-moldovan"><first>Dan</first><last>Moldovan</last></author>
       <title>Causal Relation Extraction</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/87_paper.pdf</url>
       <abstract>This paper presents a supervised method for the detection and extraction of Causal Relations from open domain text. First we give a brief outline of the definition of causation and how it relates to other Semantic Relations, as well as a characterization of their encoding. In this work, we only consider marked and explicit causations. Our approach first identifies the syntactic patterns that may encode a causation, then we use Machine Learning techniques to decide whether or not a pattern instance encodes a causation. We focus on the most productive pattern, a verb phrase followed by a relator and a clause, and its reverse version, a relator followed by a clause and a verb phrase. As relators we consider the words as, after, because and since. We present a set of lexical, syntactic and semantic features for the classification task, their rationale and some examples. The results obtained are discussed and the errors analyzed.</abstract>
       <bibkey>blanco-etal-2008-causal</bibkey>
     </paper>
     <paper id="176">
-      <author><first>Grzegorz</first><last>Chrupala</last></author>
-      <author><first>Georgiana</first><last>Dinu</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="grzegorz-chrupala"><first>Grzegorz</first><last>Chrupala</last></author>
+      <author id="georgiana-dinu"><first>Georgiana</first><last>Dinu</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <title>Learning Morphology with <fixed-case>M</fixed-case>orfette</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/594_paper.pdf</url>
       <abstract>Morfette is a modular, data-driven, probabilistic system which learns to perform joint morphological tagging and lemmatization from morphologically annotated corpora. The system is composed of two learning modules which are trained to predict morphological tags and lemmas using the Maximum Entropy classifier. The third module dynamically combines the predictions of the Maximum-Entropy models and outputs a probability distribution over tag-lemma pair sequences. The lemmatization module exploits the idea of recasting lemmatization as a classification task by using class labels which encode mappings from word forms to lemmas. Experimental evaluation results and error analysis on three morphologically rich languages show that the system achieves high accuracy with no language-specific feature engineering or additional resources.</abstract>
@@ -1672,7 +1672,7 @@
     </paper>
     <paper id="177">
       <author><first>Gaoying</first><last>Cui</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <author><first>Wenjie</first><last>Li</last></author>
       <author><first>Yirong</first><last>Chen</last></author>
       <title>Corpus Exploitation from <fixed-case>W</fixed-case>ikipedia for Ontology Construction</title>
@@ -1683,9 +1683,9 @@
     <paper id="178">
       <author><first>Shiyan</first><last>Ou</last></author>
       <author><first>Viktor</first><last>Pekar</last></author>
-      <author><first>Constantin</first><last>Orasan</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orasan</last></author>
       <author><first>Christian</first><last>Spurk</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <title>Development and Alignment of a Domain-Specific Ontology for Question Answering</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/561_paper.pdf</url>
       <abstract>With the appearance of Semantic Web technologies, it becomes possible to develop novel, sophisticated question answering systems, where ontologies are usually used as the core knowledge component. In the EU-funded project, QALL-ME, a domain-specific ontology was developed and applied for question answering in the domain of tourism, along with the assistance of two upper ontologies for concept expansion and reasoning. This paper focuses on the development of the QALL-ME ontology in the tourism domain and its alignment with the upper ontologies - WordNet and SUMO. The design of the ontology is presented in the paper, and a semi-automatic alignment procedure is described with some alignment results given as well. Furthermore, the aligned ontology was used to semantically annotate original data obtained from the tourism web sites and natural language questions. The storage schema of the annotated data and the data access method for retrieving answers from the annotated data are also reported in the paper.</abstract>
@@ -1693,7 +1693,7 @@
     </paper>
     <paper id="179">
       <author><first>David</first><last>Manzano-Macho</last></author>
-      <author><first>Asunción</first><last>Gómez-Pérez</last></author>
+      <author id="asuncion-gomez-perez"><first>Asunción</first><last>Gómez-Pérez</last></author>
       <author><first>Daniel</first><last>Borrajo</last></author>
       <title>Unsupervised and Domain Independent Ontology Learning: Combining Heterogeneous Sources of Evidence</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/418_paper.pdf</url>
@@ -1702,7 +1702,7 @@
     </paper>
     <paper id="180">
       <author><first>Alessandra</first><last>Potrich</last></author>
-      <author><first>Emanuele</first><last>Pianta</last></author>
+      <author id="emanuele-pianta"><first>Emanuele</first><last>Pianta</last></author>
       <title><fixed-case>L</fixed-case>-<fixed-case>ISA</fixed-case>: Learning Domain Specific Isa-Relations from the Web</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/595_paper.pdf</url>
       <abstract>Automated extraction of ontological knowledge from text corpora is a relevant task in Natural Language Processing. In this paper, we focus on the problem of finding hypernyms for relevant concepts in a specific domain (e.g. Optical Recording) in the context of a concrete and challenging application scenario (patent processing). To this end information available on the Web is exploited. The extraction method includes four mains steps. Firstly, the Google search engine is exploited to retrieve possible instances of isa-patterns reported in the literature. Then, the returned snippets are filtered on the basis of lexico-syntactic criteria (e.g. the candidate hypernym must be expressed as a noun phrase without complex modifiers). In a further filtering step, only candidate hypernyms compatible with the target domain are kept. Finally a candidate ranking mechanism is applied to select one hypernym as output of the algorithm. The extraction method was evaluated on 100 concepts of the Optical Recording domain. Moreover, the reliability of isa-patterns reported in the literature as predictors of isa-relations was assessed by manually evaluating the template instances remaining after lexico-syntactic filtering, for 3 concepts of the same domain. While more extensive testing is needed the method appears promising especially for its portability across different domains.</abstract>
@@ -1711,8 +1711,8 @@
     <paper id="181">
       <author><first>Arno</first><last>Hartholt</last></author>
       <author><first>Thomas</first><last>Russ</last></author>
-      <author><first>David</first><last>Traum</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <author><first>Susan</first><last>Robinson</last></author>
       <title>A Common Ground for Virtual Humans: Using an Ontology in a Natural Language Oriented Virtual Human Architecture</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/811_paper.pdf</url>
@@ -1720,8 +1720,8 @@
       <bibkey>hartholt-etal-2008-common</bibkey>
     </paper>
     <paper id="182">
-      <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>Aitor</first><last>Soroa</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
+      <author id="aitor-soroa"><first>Aitor</first><last>Soroa</last></author>
       <title>Using the Multilingual Central Repository for Graph-Based Word Sense Disambiguation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/351_paper.pdf</url>
       <abstract>This paper presents the results of a graph-based method for performing knowledge-based Word Sense Disambiguation (WSD). The technique exploits the structural properties of the graph underlying the chosen knowledge base. The method is general, in the sense that it is not tied to any particular knowledge base, but in this work we have applied it to the Multilingual Central Repository (MCR). The evaluation has been performed on the Senseval-3 all-words task. The main contributions of the paper are twofold: (1) We have evaluated the separate and combined performance of each type of relation in the MCR, and thus indirectly validated the contents of the MCR and their potential for WSD. (2) We obtain state-of-the-art results, and in fact yield the best results that can be obtained using publicly available data.</abstract>
@@ -1729,7 +1729,7 @@
     </paper>
     <paper id="183">
       <author><first>Fredric</first><last>Gey</last></author>
-      <author><first>David Kirk</first><last>Evans</last></author>
+      <author id="david-k-evans"><first>David Kirk</first><last>Evans</last></author>
       <author><first>Noriko</first><last>Kando</last></author>
       <title>A <fixed-case>J</fixed-case>apanese-<fixed-case>E</fixed-case>nglish Technical Lexicon for Translation and Language Research</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/363_paper.pdf</url>
@@ -1739,17 +1739,17 @@
     <paper id="184">
       <author><first>Le An</first><last>Ha</last></author>
       <author><first>Gabriela</first><last>Fernandez</last></author>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
-      <author><first>Gloria</first><last>Corpas</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="gloria-corpas-pastor"><first>Gloria</first><last>Corpas</last></author>
       <title>Mutual Bilingual Terminology Extraction</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/463_paper.pdf</url>
       <abstract>This paper describes a novel methodology to perform bilingual terminology extraction, in which automatic alignment is used to improve the performance of terminology extraction for each language. The strengths of monolingual terminology extraction for each language are exploited to improve the performance of terminology extraction in the other language, thanks to the availability of a sentence-level aligned bilingual corpus, and an automatic noun phrase alignment mechanism. The experiment indicates that weaknesses in monolingual terminology extraction due to the limitation of resources in certain languages can be overcome by using another language which has no such limitation.</abstract>
       <bibkey>ha-etal-2008-mutual</bibkey>
     </paper>
     <paper id="185">
-      <author><first>João</first><last>Graça</last></author>
+      <author id="joao-graca"><first>João</first><last>Graça</last></author>
       <author><first>Joana Paulo</first><last>Pardal</last></author>
-      <author><first>Luísa</first><last>Coheur</last></author>
+      <author id="luisa-coheur"><first>Luísa</first><last>Coheur</last></author>
       <author><first>Diamantino</first><last>Caseiro</last></author>
       <title>Building a Golden Collection of Parallel Multi-Language Word Alignment</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/250_paper.pdf</url>
@@ -1759,13 +1759,13 @@
     <paper id="186">
       <author><first>Elena</first><last>Cabrio</last></author>
       <author><first>Milen</first><last>Kouylekov</last></author>
-      <author><first>Bernardo</first><last>Magnini</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Laura</first><last>Hasler</last></author>
-      <author><first>Constantin</first><last>Orasan</last></author>
-      <author><first>David</first><last>Tomás</last></author>
-      <author><first>Jose Luis</first><last>Vicedo</last></author>
-      <author><first>Guenter</first><last>Neumann</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orasan</last></author>
+      <author id="david-tomas"><first>David</first><last>Tomás</last></author>
+      <author id="jose-luis-vicedo"><first>Jose Luis</first><last>Vicedo</last></author>
+      <author id="gunter-neumann"><first>Guenter</first><last>Neumann</last></author>
       <author><first>Corinna</first><last>Weber</last></author>
       <title>The <fixed-case>QALL</fixed-case>-<fixed-case>ME</fixed-case> Benchmark: a Multilingual Resource of Annotated Spoken Requests for Question Answering</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/628_paper.pdf</url>
@@ -1780,7 +1780,7 @@
       <bibkey>campbell-2008-tools</bibkey>
     </paper>
     <paper id="188">
-      <author><first>Maria Teresa</first><last>Pazienza</last></author>
+      <author id="maria-teresa-pazienza"><first>Maria Teresa</first><last>Pazienza</last></author>
       <author><first>Marco</first><last>Pennacchiotti</last></author>
       <author><first>Armando</first><last>Stellato</last></author>
       <title>A Web Browser Extension for Growing-up Ontological Knowledge from Traditional Web Content</title>
@@ -1790,7 +1790,7 @@
     </paper>
     <paper id="189">
       <author><first>Youssef</first><last>Drissi</last></author>
-      <author><first>Branimir</first><last>Boguraev</last></author>
+      <author id="branimir-boguraev"><first>Branimir</first><last>Boguraev</last></author>
       <author><first>David</first><last>Ferrucci</last></author>
       <author><first>Paul</first><last>Keyser</last></author>
       <author><first>Anthony</first><last>Levas</last></author>
@@ -1818,7 +1818,7 @@
     </paper>
     <paper id="192">
       <author><first>Lynette</first><last>Melnar</last></author>
-      <author id="chen-liu"><first>Chen</first><last>Liu</last></author>
+      <author><first>Chen</first><last>Liu</last></author>
       <title>Borrowing Language Resources for Development of Automatic Speech Recognition for Low- and Middle-Density Languages</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/68_paper.pdf</url>
       <abstract>In this paper we describe an approach that both creates crosslingual acoustic monophone model sets for speech recognition tasks and objectively predicts their performance without target-language speech data or acoustic measurement techniques. This strategy is based on a series of linguistic metrics characterizing the articulatory phonetic and phonological distances of target-language phonemes from source-language phonemes. We term these algorithms the Combined Phonetic and Phonological Crosslingual Distance (CPP-CD) metric and the Combined Phonetic and Phonological Crosslingual Prediction (CPP-CP) metric. The particular motivations for this project are the current unavailability and often prohibitively high production cost of speech databases for many strategically important low- and middle-density languages. First, we describe the CPP-CD approach and compare the performance of CPP-CD-specified models to both native language models and crosslingual models selected by the Bhattacharyya acoustic-model distance metric in automatic speech recognition (ASR) experiments. Results confirm that the CPP-CD approach nearly matches those achieved by the acoustic distance metric. We then test the CPP-CP algorithm on the CPP-CD models by comparing the CPP-CP scores to the recognition phoneme error rates. Based on this comparison, we conclude that the CPP-CP algorithm is a reliable indicator of crosslingual model performance in speech recognition tasks.</abstract>
@@ -1840,7 +1840,7 @@
       <author><first>Melissa</first><last>Kronenthal</last></author>
       <author><first>Robert</first><last>Logie</last></author>
       <author><first>Neil</first><last>Mayo</last></author>
-      <author><first>Johanna</first><last>Moore</last></author>
+      <author id="johanna-d-moore"><first>Johanna</first><last>Moore</last></author>
       <author><first>Matt</first><last>Watson</last></author>
       <title>A Fully Annotated Corpus for Studying the Effect of Cognitive Ageing on Users’ Interactions with Spoken Dialogue Systems</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/237_paper.pdf</url>
@@ -1848,9 +1848,9 @@
       <bibkey>georgila-etal-2008-fully</bibkey>
     </paper>
     <paper id="195">
-      <author><first>Catia</first><last>Cucchiarini</last></author>
+      <author id="catia-cucchiarini"><first>Catia</first><last>Cucchiarini</last></author>
       <author><first>Joris</first><last>Driesen</last></author>
-      <author><first>Hugo</first><last>Van hamme</last></author>
+      <author id="hugo-van-hamme"><first>Hugo</first><last>Van hamme</last></author>
       <author><first>Eric</first><last>Sanders</last></author>
       <title>Recording Speech of Children, Non-Natives and Elderly People for <fixed-case>HLT</fixed-case> Applications: the <fixed-case>JASMIN</fixed-case>-<fixed-case>CGN</fixed-case> Corpus.</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/366_paper.pdf</url>
@@ -1867,7 +1867,7 @@
       <bibkey>draxler-etal-2008-f0</bibkey>
     </paper>
     <paper id="197">
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <author><first>David</first><last>Benyon</last></author>
       <author><first>Christopher</first><last>Brewster</last></author>
       <author><first>Pavel</first><last>Ircing</last></author>
@@ -1878,7 +1878,7 @@
       <bibkey>wilks-etal-2008-dialogue</bibkey>
     </paper>
     <paper id="198">
-      <author><first>Jade</first><last>Goldstein-Stewart</last></author>
+      <author id="jade-goldstein"><first>Jade</first><last>Goldstein-Stewart</last></author>
       <author><first>Kerri</first><last>Goodwin</last></author>
       <author><first>Roberta</first><last>Sabin</last></author>
       <author><first>Ransom</first><last>Winder</last></author>
@@ -1888,10 +1888,10 @@
       <bibkey>goldstein-stewart-etal-2008-creating</bibkey>
     </paper>
     <paper id="199">
-      <author><first>Roberta</first><last>Catizone</last></author>
+      <author id="roberta-catizone"><first>Roberta</first><last>Catizone</last></author>
       <author><first>Alexiei</first><last>Dingli</last></author>
       <author><first>Hugo</first><last>Pinto</last></author>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <title>Information Extraction Tools and Methods for Understanding Dialogue in a Companion</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/819_paper.pdf</url>
       <abstract>This paper discusses how Information Extraction is used to understand and manage Dialogue in the EU-funded Companions project. This will be discussed with respect to the Senior Companion, one of two applications under development in the EU-funded Companions project. Over the last few years, research in human-computer dialogue systems has increased and much attention has focused on applying learning methods to improving a key part of any dialogue system, namely the dialogue manager. Since the dialogue manager in all dialogue systems relies heavily on the quality of the semantic interpretation of the users utterance, our research in the Companions project, focuses on how to improve the semantic interpretation and combine it with knowledge from the Knowledge Base to increase the performance of the Dialogue Manager. Traditionally the semantic interpretation of a user utterance is handled by a natural language understanding module which embodies a variety of natural language processing techniques, from sentence splitting, to full parsing. In this paper we discuss the use of a variety of NLU processes and in particular Information Extraction as a key part of the NLU module in order to improve performance of the dialogue manager and hence the overall dialogue system.</abstract>
@@ -1899,16 +1899,16 @@
     </paper>
     <paper id="200">
       <author><first>Carlos Gómez</first><last>Gallo</last></author>
-      <author><first>T. Florian</first><last>Jaeger</last></author>
-      <author><first>James</first><last>Allen</last></author>
-      <author><first>Mary</first><last>Swift</last></author>
+      <author id="t-florian-jaeger"><first>T. Florian</first><last>Jaeger</last></author>
+      <author id="james-allen"><first>James</first><last>Allen</last></author>
+      <author id="mary-swift"><first>Mary</first><last>Swift</last></author>
       <title>Production in a Multimodal Corpus: how Speakers Communicate Complex Actions</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/740_paper.pdf</url>
       <abstract>We describe a new multimodal corpus currently under development. The corpus consists of videos of task-oriented dialogues that are annotated for speakers verbal requests and domain action executions. This resource provides data for new research on language production and comprehension. The corpus can be used to study speakers decisions as to how to structure their utterances given the complexity of the message they are trying to convey.</abstract>
       <bibkey>gallo-etal-2008-production</bibkey>
     </paper>
     <paper id="201">
-      <author><first>Harry</first><last>Bunt</last></author>
+      <author id="harry-bunt"><first>Harry</first><last>Bunt</last></author>
       <author><first>Chwhynny</first><last>Overbeeke</last></author>
       <title>Towards Formal Interpretation of Semantic Annotation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/93_paper.pdf</url>
@@ -1919,14 +1919,14 @@
       <author><first>Marco</first><last>Pennacchiotti</last></author>
       <author><first>Diego</first><last>De Cao</last></author>
       <author><first>Paolo</first><last>Marocco</last></author>
-      <author><first>Roberto</first><last>Basili</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
       <title>Towards a Vector Space Model for <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et-like Resources</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/202_paper.pdf</url>
       <abstract>In this paper, we present an original framework to model frame semantic resources (namely, FrameNet) using minimal supervision. This framework can be leveraged both to expand an existing FrameNet with new knowledge, and to induce a FrameNet in a new language. Our hypothesis is that a frame semantic resource can be modeled and represented by a suitable semantic space model. The intuition is that semantic spaces are an effective model of the notion of being characteristic of a frame for both lexical elements and full sentences. The paper gives two main contributions. First, it shows that our hypothesis is valid and can be successfully implemented. Second, it explores different types of semantic VSMs, outlining which one is more suitable for representing a frame semantic resource. In the paper, VSMs are used for modeling the linguistic core of a frame, the lexical units. Indeed, if the hypothesis is verified for these units, the proposed framework has a much wider application.</abstract>
       <bibkey>pennacchiotti-etal-2008-towards</bibkey>
     </paper>
     <paper id="203">
-      <author><first>Pavel</first><last>Smrž</last></author>
+      <author id="pavel-smrz"><first>Pavel</first><last>Smrž</last></author>
       <title><fixed-case>K</fixed-case>no<fixed-case>F</fixed-case>usius: a New Knowledge Fusion System for Interpretation of Gene Expression Data</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/904_paper.pdf</url>
       <abstract>This paper introduces a new architecture that aims at combining molecular biology data with information automatically extracted from relevant scientific literature (using text mining techniques on PubMed abstracts and fulltext papers) to help biomedical experts to interpret experimental results in hand. The infrastructural level bears on semantic-web technologies and standards that facilitate the actual fusion of the multi-source knowledge.</abstract>
@@ -1946,7 +1946,7 @@
       <author><first>Leen</first><last>Cleuren</last></author>
       <author><first>Jacques</first><last>Duchateau</last></author>
       <author><first>Pol</first><last>Ghesquière</last></author>
-      <author><first>Hugo</first><last>Van hamme</last></author>
+      <author id="hugo-van-hamme"><first>Hugo</first><last>Van hamme</last></author>
       <title>Children’s Oral Reading Corpus (<fixed-case>CHOREC</fixed-case>): Description and Assessment of Annotator Agreement</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/254_paper.pdf</url>
       <abstract>Within the scope of the SPACE project, the CHildrens Oral REading Corpus (CHOREC) is developed. This database contains recorded, transcribed and annotated read speech (42 GB or 130 hours) of 400 Dutch speaking elementary school children with or without reading difficulties. Analyses of inter- and intra-annotator agreement are carried out in order to investigate the consistency with which reading errors are detected, orthographic and phonetic transcriptions are made, and reading errors and reading strategies are labeled. Percentage agreement scores and kappa values both show that agreement between annotations, and therefore the quality of the annotations, is high. Taken all double or triple annotations (for 10% resp. 30% of the corpus) together, % agreement varies between 86.4% and 98.6%, whereas kappa varies between 0.72 and 0.97 depending on the annotation tier that is being assessed. School type and reading type seem to account for systematic differences in % agreement, but these differences disappear when kappa values are calculated that correct for chance agreement. To conclude, an analysis of the annotation differences with respect to the *s label (i.e. a label that is used to annotate undistinguishable spelling behaviour), phoneme labels, reading strategy and error labels is given.</abstract>
@@ -1954,7 +1954,7 @@
     </paper>
     <paper id="206">
       <author><first>Tommaso</first><last>Caselli</last></author>
-      <author><first>Nancy</first><last>Ide</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
       <author><first>Roberto</first><last>Bartolini</last></author>
       <title>A Bilingual Corpus of Inter-linked Events</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/610_paper.pdf</url>
@@ -1962,7 +1962,7 @@
       <bibkey>caselli-etal-2008-bilingual</bibkey>
     </paper>
     <paper id="207">
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <author><first>Lauren</first><last>Friedman</last></author>
       <author><first>Safa</first><last>Ismael</last></author>
       <author><first>Linda</first><last>Brandschain</last></author>
@@ -1982,11 +1982,11 @@
     <paper id="209">
       <author><first>Thorsten</first><last>Trippel</last></author>
       <author><first>Michael</first><last>Maxwell</last></author>
-      <author><first>Greville</first><last>Corbett</last></author>
+      <author id="greville-c-corbett"><first>Greville</first><last>Corbett</last></author>
       <author><first>Cambell</first><last>Prince</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <author><first>Stephen</first><last>Grimes</last></author>
-      <author><first>Steve</first><last>Moran</last></author>
+      <author id="steven-moran"><first>Steve</first><last>Moran</last></author>
       <title>Lexicon Schemas and Related Data Models: when Standards Meet Users</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/812_paper.pdf</url>
       <abstract>Lexicon schemas and their use are discussed in this paper from the perspective of lexicographers and field linguists. A variety of lexicon schemas have been developed, with goals ranging from computational lexicography (DATR) through archiving (LIFT, TEI) to standardization (LMF, FSR). A number of requirements for lexicon schemas are given. The lexicon schemas are introduced and compared to each other in terms of conversion and usability for this particular user group, using a common lexicon entry and providing examples for each schema under consideration. The formats are assessed and the final recommendation is given for the potential users, namely to request standard compliance from the developers of the tools used. This paper should foster a discussion between authors of standards, lexicographers and field linguists.</abstract>
@@ -2002,12 +2002,12 @@
       <bibkey>messiant-etal-2008-lexschem</bibkey>
     </paper>
     <paper id="211">
-      <author><first>Horacio</first><last>Rodríguez</last></author>
-      <author><first>David</first><last>Farwell</last></author>
+      <author id="horacio-rodriguez"><first>Horacio</first><last>Rodríguez</last></author>
+      <author id="david-farwell"><first>David</first><last>Farwell</last></author>
       <author><first>Javi</first><last>Ferreres</last></author>
       <author><first>Manuel</first><last>Bertran</last></author>
       <author><first>Musa</first><last>Alkhalifa</last></author>
-      <author><first>M. Antonia</first><last>Martí</last></author>
+      <author id="m-antonia-marti"><first>M. Antonia</first><last>Martí</last></author>
       <title><fixed-case>A</fixed-case>rabic <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et: Semi-automatic Extensions using <fixed-case>B</fixed-case>ayesian Inference</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/434_paper.pdf</url>
       <abstract>This presentation focuses on the semi-automatic extension of Arabic WordNet (AWN) using lexical and morphological rules and applying Bayesian inference. We briefly report on the current status of AWN and propose a way of extending its coverage by taking advantage of a limited set of highly productive Arabic morphological rules for deriving a range of semantically related word forms from verb entries. The application of this set of rules, combined with the use of bilingual Arabic-English resources and Princetons WordNet, allows the generation of a graph representing the semantic neighbourhood of the original word. In previous work, a set of associations between the hypothesized Arabic words and English synsets was proposed on the basis of this graph. Here, a novel approach to extending AWN is presented whereby a Bayesian Network is automatically built from the graph and then the net is used as an inferencing mechanism for scoring the set of candidate associations. Both on its own and in combination with the previous technique, this new approach has led to improved results.</abstract>
@@ -2016,9 +2016,9 @@
     <paper id="212">
       <author><first>Iñaki</first><last>Sainz</last></author>
       <author><first>Ibon</first><last>Saratxaga</last></author>
-      <author><first>Eva</first><last>Navas</last></author>
-      <author><first>Inmaculada</first><last>Hernáez</last></author>
-      <author><first>Jon</first><last>Sanchez</last></author>
+      <author id="eva-navas"><first>Eva</first><last>Navas</last></author>
+      <author id="inmaculada-hernaez"><first>Inmaculada</first><last>Hernáez</last></author>
+      <author id="jon-sanchez"><first>Jon</first><last>Sanchez</last></author>
       <author><first>Iker</first><last>Luengo</last></author>
       <author><first>Igor</first><last>Odriozola</last></author>
       <title>Subjective Evaluation of an Emotional Speech Database for <fixed-case>B</fixed-case>asque</title>
@@ -2027,7 +2027,7 @@
       <bibkey>sainz-etal-2008-subjective</bibkey>
     </paper>
     <paper id="213">
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <author><first>Wolfgang</first><last>Maier</last></author>
       <author><first>Ines</first><last>Rehbein</last></author>
       <author><first>Yannick</first><last>Versley</last></author>
@@ -2039,8 +2039,8 @@
     <paper id="214">
       <author><first>Romaric</first><last>Besançon</last></author>
       <author><first>Stéphane</first><last>Chaudiron</last></author>
-      <author><first>Djamel</first><last>Mostefa</last></author>
-      <author><first>Ismaïl</first><last>Timimi</last></author>
+      <author id="djamel-mostefa"><first>Djamel</first><last>Mostefa</last></author>
+      <author id="ismail-timimi"><first>Ismaïl</first><last>Timimi</last></author>
       <author><first>Khalid</first><last>Choukri</last></author>
       <title>The <fixed-case>INFILE</fixed-case> Project: a Crosslingual Filtering Systems Evaluation Campaign</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/907_paper.pdf</url>
@@ -2048,8 +2048,8 @@
       <bibkey>besancon-etal-2008-infile</bibkey>
     </paper>
     <paper id="215">
-      <author><first>Dan</first><last>Tufiş</last></author>
-      <author><first>Alexandru</first><last>Ceauşu</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufiş</last></author>
+      <author id="alexandru-ceausu"><first>Alexandru</first><last>Ceauşu</last></author>
       <title><fixed-case>DIAC</fixed-case>+: a Professional Diacritics Recovering System</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/54_paper.pdf</url>
       <abstract>In languages that use diacritical characters, if these special signs are stripped-off from a word, the resulted string of characters may not exist in the language, and therefore its normative form is, in general, easy to recover. However, this is not always the case, as presence or absence of a diacritical sign attached to a base letter of a word which exists in both variants, may change its grammatical properties or even the meaning, making the recovery of the missing diacritics a difficult task, not only for a program but sometimes even for a human reader. We describe and evaluate an accurate knowledge-based system for automatic recovery of the missing diacritics in MS-Office documents written in Romanian. For the rare cases when the system is not able to make a reliable decision, it either provides the user a list of words with their recovery suggestions, or probabilistically chooses one of the possible changes, but leaves a trace (a highlighted comment) on each word the modification of which was uncertain.</abstract>
@@ -2059,7 +2059,7 @@
       <author><first>Ghazi</first><last>Abuhakema</last></author>
       <author><first>Reem</first><last>Faraj</last></author>
       <author><first>Anna</first><last>Feldman</last></author>
-      <author><first>Eileen</first><last>Fitzpatrick</last></author>
+      <author id="eileen-fitzpatrick"><first>Eileen</first><last>Fitzpatrick</last></author>
       <title>Annotating an <fixed-case>A</fixed-case>rabic Learner Corpus for Error</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/343_paper.pdf</url>
       <abstract>This paper describes an ongoing project in which we are collecting a learner corpus of Arabic, developing a tagset for error annotation and performing Computer-aided Error Analysis (CEA) on the data. We adapted the French Interlanguage Database FRIDA tagset (Granger, 2003a) to the data. We chose FRIDA in order to follow a known standard and to see whether the changes needed to move from a French to an Arabic tagset would give us a measure of the distance between the two languages with respect to learner difficulty. The current collection of texts, which is constantly growing, contains intermediate and advanced-level student writings. We describe the need for such corpora, the learner data we have collected and the tagset we have developed. We also describe the error frequency distribution of both proficiency levels and the ongoing work.</abstract>
@@ -2084,7 +2084,7 @@
       <author><first>Rob</first><last>van Son</last></author>
       <author><first>Wieneke</first><last>Wesseling</last></author>
       <author><first>Eric</first><last>Sanders</last></author>
-      <author><first>Henk</first><last>van den Heuvel</last></author>
+      <author id="henk-van-den-heuvel"><first>Henk</first><last>van den Heuvel</last></author>
       <title>The <fixed-case>IFADV</fixed-case> Corpus: a Free Dialog Video Corpus</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/132_paper.pdf</url>
       <abstract>Research into spoken language has become more visual over the years. Both fundamental and applied research have progressively included gestures, gaze, and facial expression. Corpora of multi-modal conversational speech are rare and frequently difficult to use due to privacy and copyright restrictions. A freely available annotated corpus is presented, gratis and libre, of high quality video recordings of face-to-face conversational speech. Annotations include orthography, POS tags, and automatically generated phonemes transcriptions and word boundaries. In addition, labeling of both simple conversational function and gaze direction has been a performed. Within the bounds of the law, everything has been done to remove copyright and use restrictions. Annotations have been processed to RDBMS tables that allow SQL queries and direct connections to statistical software. From our experiences we would like to advocate the formulation of best practises for both legal handling and database storage of recordings and annotations.</abstract>
@@ -2092,10 +2092,10 @@
     </paper>
     <paper id="220">
       <author><first>Alessio</first><last>Brutti</last></author>
-      <author><first>Luca</first><last>Cristoforetti</last></author>
+      <author id="luca-cristoforetti"><first>Luca</first><last>Cristoforetti</last></author>
       <author><first>Walter</first><last>Kellermann</last></author>
       <author><first>Lutz</first><last>Marquardt</last></author>
-      <author><first>Maurizio</first><last>Omologo</last></author>
+      <author id="maurizio-omologo"><first>Maurizio</first><last>Omologo</last></author>
       <title><fixed-case>WOZ</fixed-case> Acoustic Data Collection for Interactive <fixed-case>TV</fixed-case></title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/584_paper.pdf</url>
       <abstract>This paper describes a multichannel acoustic data collection recorded under the European DICIT project, during the Wizard of Oz (WOZ) experiments carried out at FAU and FBK-irst laboratories. The scenario is a distant-talking interface for interactive control of a TV. The experiments involve the acquisition of multichannel data for signal processing front-end and were carried out due to the need to collect a database for testing acoustic pre-processing algorithms. In this way, realistic scenarios can be simulated at a preliminary stage, instead of real-time implementations, allowing for repeatable experiments. To match the project requirements, the WOZ experiments were recorded in three languages: English, German and Italian. Besides the user inputs, the database also contains non-speech related acoustic events, room impulse response measurements and video data, the latter used to compute 3D labels. Sessions were manually transcribed and segmented at word level, introducing also specific labels for acoustic events.</abstract>
@@ -2109,8 +2109,8 @@
       <bibkey>lounela-2008-process</bibkey>
     </paper>
     <paper id="222">
-      <author><first>Mariona</first><last>Taulé</last></author>
-      <author><first>M. Antònia</first><last>Martí</last></author>
+      <author id="mariona-taule"><first>Mariona</first><last>Taulé</last></author>
+      <author id="m-antonia-marti"><first>M. Antònia</first><last>Martí</last></author>
       <author><first>Marta</first><last>Recasens</last></author>
       <title><fixed-case>A</fixed-case>n<fixed-case>C</fixed-case>ora: Multilevel Annotated Corpora for <fixed-case>C</fixed-case>atalan and <fixed-case>S</fixed-case>panish</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/35_paper.pdf</url>
@@ -2119,8 +2119,8 @@
     </paper>
     <paper id="223">
       <author><first>Stephen</first><last>Purpura</last></author>
-      <author><first>John</first><last>Wilkerson</last></author>
-      <author><first>Dustin</first><last>Hillard</last></author>
+      <author id="john-wilkerson"><first>John</first><last>Wilkerson</last></author>
+      <author id="dustin-hillard"><first>Dustin</first><last>Hillard</last></author>
       <title>The <fixed-case>U</fixed-case>.<fixed-case>S</fixed-case>. Policy Agenda Legislation Corpus Volume 1 - a Language Resource from 1947 - 1998</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/105_paper.pdf</url>
       <abstract>We introduce the corpus of United States Congressional bills from 1947 to 1998 for use by language research communities. The U.S. Policy Agenda Legislation Corpus Volume 1 (USPALCV1) includes more than 375,000 legislative bills annotated with a hierarchical policy area category. The human annotations in USPALCV1 have been reliably applied over time to enable social science analysis of legislative trends. The corpus is a member of an emerging family of corpora that are annotated by policy area to enable comparative parallel trend recognition across countries and domains (legislation, political speeches, newswire articles, budgetary expenditures, web sites, etc.). This paper describes the origins of the corpus, its creation, ways to access it, design criteria, and an analysis with common supervised machine learning methods. The use of machine learning methods establishes a baseline proposed modeling for the topic classification of legal documents.</abstract>
@@ -2146,7 +2146,7 @@
       <author><first>Nelleke</first><last>Oostdijk</last></author>
       <author><first>Martin</first><last>Reynaert</last></author>
       <author><first>Paola</first><last>Monachesi</last></author>
-      <author><first>Gertjan</first><last>Van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>Van Noord</last></author>
       <author><first>Roeland</first><last>Ordelman</last></author>
       <author><first>Ineke</first><last>Schuurman</last></author>
       <author><first>Vincent</first><last>Vandeghinste</last></author>
@@ -2156,7 +2156,7 @@
       <bibkey>oostdijk-etal-2008-coi</bibkey>
     </paper>
     <paper id="227">
-      <author><first>Hiromi Itoh</first><last>Ozaku</last></author>
+      <author id="hiromi-itoh-ozaku"><first>Hiromi Itoh</first><last>Ozaku</last></author>
       <author><first>Akinori</first><last>Abe</last></author>
       <author><first>Kaoru</first><last>Sagara</last></author>
       <author><first>Kiyoshi</first><last>Kogure</last></author>
@@ -2166,8 +2166,8 @@
       <bibkey>ozaku-etal-2008-relationships</bibkey>
     </paper>
     <paper id="228">
-      <author><first>Meghan Lammie</first><last>Glenn</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="meghan-glenn"><first>Meghan Lammie</first><last>Glenn</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <author><first>Lauren</first><last>Friedman</last></author>
       <author><first>Haejoong</first><last>Lee</last></author>
       <author><first>Shawn</first><last>Medero</last></author>
@@ -2187,7 +2187,7 @@
       <bibkey>hammarstrom-etal-2008-bootstrapping</bibkey>
     </paper>
     <paper id="230">
-      <author><first>Satoshi</first><last>Sato</last></author>
+      <author id="satoshi-sato"><first>Satoshi</first><last>Sato</last></author>
       <author><first>Suguru</first><last>Matsuyoshi</last></author>
       <author><first>Yohsuke</first><last>Kondoh</last></author>
       <title>Automatic Assessment of <fixed-case>J</fixed-case>apanese Text Readability Based on a Textbook Corpus</title>
@@ -2198,9 +2198,9 @@
     <paper id="231">
       <author><first>Paul</first><last>Thompson</last></author>
       <author><first>Philip</first><last>Cotter</last></author>
-      <author><first>John</first><last>McNaught</last></author>
+      <author id="john-mcnaught"><first>John</first><last>McNaught</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
-      <author><first>Simonetta</first><last>Montemagni</last></author>
+      <author id="simonetta-montemagni"><first>Simonetta</first><last>Montemagni</last></author>
       <author><first>Andrea</first><last>Trabucco</last></author>
       <author><first>Giulia</first><last>Venturi</last></author>
       <title>Building a Bio-Event Annotated Corpus for the Acquisition of Semantic Frames from Biomedical Corpora</title>
@@ -2209,9 +2209,9 @@
       <bibkey>thompson-etal-2008-building</bibkey>
     </paper>
     <paper id="232">
-      <author><first>C.J.</first><last>Rupp</last></author>
+      <author id="c-j-rupp"><first>C.J.</first><last>Rupp</last></author>
       <author><first>Ann</first><last>Copestake</last></author>
-      <author><first>Peter</first><last>Corbett</last></author>
+      <author id="peter-corbett"><first>Peter</first><last>Corbett</last></author>
       <author><first>Peter</first><last>Murray-Rust</last></author>
       <author><first>Advaith</first><last>Siddharthan</last></author>
       <author><first>Simone</first><last>Teufel</last></author>
@@ -2236,7 +2236,7 @@
     <paper id="234">
       <author><first>Valeria</first><last>Quochi</last></author>
       <author><first>Monica</first><last>Monachini</last></author>
-      <author><first>Riccardo</first><last>Del Gratta</last></author>
+      <author id="riccardo-del-gratta"><first>Riccardo</first><last>Del Gratta</last></author>
       <author><first>Nicoletta</first><last>Calzolari</last></author>
       <title>A lexicon for biology and bioinformatics: the <fixed-case>BOOTS</fixed-case>trep experience.</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/576_paper.pdf</url>
@@ -2294,7 +2294,7 @@
     </paper>
     <paper id="241">
       <author><first>Meni</first><last>Adler</last></author>
-      <author><first>Yael</first><last>Netzer</last></author>
+      <author id="yael-netzer"><first>Yael</first><last>Netzer</last></author>
       <author><first>Yoav</first><last>Goldberg</last></author>
       <author><first>David</first><last>Gabay</last></author>
       <author><first>Michael</first><last>Elhadad</last></author>
@@ -2307,7 +2307,7 @@
       <author><first>Joydeep</first><last>Nath</last></author>
       <author><first>Monojit</first><last>Choudhury</last></author>
       <author><first>Animesh</first><last>Mukherjee</last></author>
-      <author><first>Christian</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Christian</first><last>Biemann</last></author>
       <author><first>Niloy</first><last>Ganguly</last></author>
       <title>Unsupervised Parts-of-Speech Induction for <fixed-case>B</fixed-case>engali</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/309_paper.pdf</url>
@@ -2315,7 +2315,7 @@
       <bibkey>nath-etal-2008-unsupervised</bibkey>
     </paper>
     <paper id="243">
-      <author><first>Guadalupe Aguado</first><last>de Cea</last></author>
+      <author id="guadalupe-aguado-de-cea"><first>Guadalupe Aguado</first><last>de Cea</last></author>
       <author><first>Javier</first><last>Puche</last></author>
       <author><first>José Ángel</first><last>Ramos</last></author>
       <title>Tagging <fixed-case>S</fixed-case>panish Texts: the Problem of Problem of “<fixed-case>SE</fixed-case>”</title>
@@ -2339,7 +2339,7 @@
     </paper>
     <paper id="246">
       <author><first>Natalie</first><last>Schluter</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <title>Treebank-Based Acquisition of <fixed-case>LFG</fixed-case> Parsing Resources for <fixed-case>F</fixed-case>rench</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/739_paper.pdf</url>
       <abstract>Motivated by the expense in time and other resources to produce hand-crafted grammars, there has been increased interest in automatically obtained wide-coverage grammars from treebanks for natural language processing. In particular, recent years have seen the growth in interest in automatically obtained deep resources that can represent information absent from simple CFG-type structured treebanks and which are considered to produce more language-neutral linguistic representations, such as dependency syntactic trees. As is often the case in early pioneering work on natural language processing, English has provided the focus of first efforts towards acquiring deep-grammar resources, followed by successful treatments of, for example, German, Japanese, Chinese and Spanish. However, no comparable large-scale automatically acquired deep-grammar resources have been obtained for French to date. The goal of this paper is to present the application of treebank-based language acquisition to the case of French. We show that with modest changes to the established parsing architectures, encouraging results can be obtained for French, with an overall best dependency structure f-score of 86.73%.</abstract>
@@ -2359,7 +2359,7 @@
       <author><first>Georgios</first><last>Petasis</last></author>
       <author><first>Aris</first><last>Theodorakos</last></author>
       <author><first>Vangelis</first><last>Karkaletsis</last></author>
-      <author><first>Constantine</first><last>Spyropoulos</last></author>
+      <author id="constantine-d-spyropoulos"><first>Constantine</first><last>Spyropoulos</last></author>
       <title><fixed-case>BOEMIE</fixed-case> Ontology-Based Text Annotation Tool</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/324_paper.pdf</url>
       <abstract>The huge amount of the available information in the Web creates the need of effective information extraction systems that are able to produce metadata that satisfy users information needs. The development of such systems, in the majority of cases, depends on the availability of an appropriately annotated corpus in order to learn extraction models. The production of such corpora can be significantly facilitated by annotation tools that are able to annotate, according to a defined ontology, not only named entities but most importantly relations between them. This paper describes the BOEMIE ontology-based annotation tool which is able to locate blocks of text that correspond to specific types of named entities, fill tables corresponding to ontology concepts with those named entities and link the filled tables based on relations defined in the domain ontology. Additionally, it can perform annotation of blocks of text that refer to the same topic. The tool has a user-friendly interface, supports automatic pre-annotation, annotation comparison as well as customization to other annotation schemata. The annotation tool has been used in a large scale annotation task involving 3,000 web pages regarding athletics. It has also been used in another annotation task involving 503 web pages with medical information, in different languages.</abstract>
@@ -2376,19 +2376,19 @@
     </paper>
     <paper id="250">
       <author><first>Piek</first><last>Vossen</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <author><first>Nicoletta</first><last>Calzolari</last></author>
       <author><first>Christiane</first><last>Fellbaum</last></author>
-      <author><first>Shu-kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-kai</first><last>Hsieh</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <author><first>Hitoshi</first><last>Isahara</last></author>
       <author><first>Kyoko</first><last>Kanzaki</last></author>
       <author><first>Andrea</first><last>Marchetti</last></author>
       <author><first>Monica</first><last>Monachini</last></author>
       <author><first>Federico</first><last>Neri</last></author>
-      <author><first>Remo</first><last>Raffaelli</last></author>
-      <author><first>German</first><last>Rigau</last></author>
-      <author><first>Maurizio</first><last>Tescon</last></author>
+      <author id="remo-raffaelli"><first>Remo</first><last>Raffaelli</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
+      <author id="maurizio-tesconi"><first>Maurizio</first><last>Tescon</last></author>
       <author><first>Joop</first><last>VanGent</last></author>
       <title><fixed-case>KYOTO</fixed-case>: a System for Mining, Structuring and Distributing Knowledge across Languages and Cultures</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/373_paper.pdf</url>
@@ -2396,7 +2396,7 @@
       <bibkey>vossen-etal-2008-kyoto</bibkey>
     </paper>
     <paper id="251">
-      <author><first>Ulrich</first><last>Schäfer</last></author>
+      <author id="ulrich-schafer"><first>Ulrich</first><last>Schäfer</last></author>
       <author><first>Hans</first><last>Uszkoreit</last></author>
       <author><first>Christian</first><last>Federmann</last></author>
       <author><first>Torsten</first><last>Marek</last></author>
@@ -2408,7 +2408,7 @@
     </paper>
     <paper id="252">
       <author><first>Adrian</first><last>Iftene</last></author>
-      <author><first>Alexandra</first><last>Balahur-Dobrescu</last></author>
+      <author id="alexandra-balahur"><first>Alexandra</first><last>Balahur-Dobrescu</last></author>
       <title>Named Entity Relation Mining using <fixed-case>W</fixed-case>ikipedia</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/192_paper.pdf</url>
       <abstract>Discovering relations among Named Entities (NEs) from large corpora is both a challenging, as well as useful task in the domain of Natural Language Processing, with applications in Information Retrieval (IR), Summarization (SUM), Question Answering (QA) and Textual Entailment (TE). The work we present resulted from the attempt to solve practical issues we were confronted with while building systems for the tasks of Textual Entailment Recognition and Question Answering, respectively. The approach consists in applying grammar induced extraction patterns on a large corpus - Wikipedia - for the extraction of relations between a given Named Entity and other Named Entities. The results obtained are high in precision, determining a reliable and useful application of the built resource.</abstract>
@@ -2426,7 +2426,7 @@
     </paper>
     <paper id="254">
       <author><first>Zhiyi</first><last>Song</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <title>Entity Translation and Alignment in the <fixed-case>ACE</fixed-case>-07 <fixed-case>ET</fixed-case> Task</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/551_paper.pdf</url>
       <abstract>Entities - people, organizations, locations and the like - have long been a central focus of natural language processing technology development, since entities convey essential content in human languages. For multilingual systems, accurate translation of named entities and their descriptors is critical. LDC produced Entity Translation pilot data to support the ACE ET 2007 Evaluation and the current paper delves more deeply into the entity alignment issue across languages, combining the automatic alignment techniques developed for ACE-07 with manual alignment. Altogether 84% of the Chinese-English entity mentions and 74% of the Arabic-English entity mentions are perfect aligned. The results of this investigation offer several important insights. Automatic alignment algorithms predicted that perfect alignment for the ET corpus was likely to be no greater than 55%; perfect alignment on the 15 pilot documents was predicted at 62.5%. Our results suggest the actual perfect alignment rate is substantially higher (82% average, 92% for NAM entities). The careful analysis of alignment errors also suggests strategies for human translation to support the ET task; for instance, translators might be given additional guidance about preferred treatments of name versus nominal translation. These results can also contribute to refined methods of evaluating ET systems.</abstract>
@@ -2434,8 +2434,8 @@
     </paper>
     <paper id="255">
       <author><first>Yoji</first><last>Kiyota</last></author>
-      <author><first>Noriyuki</first><last>Tamura</last></author>
-      <author><first>Satoshi</first><last>Sakai</last></author>
+      <author id="noriyuki-tamura"><first>Noriyuki</first><last>Tamura</last></author>
+      <author id="satoshi-sakai"><first>Satoshi</first><last>Sakai</last></author>
       <author><first>Hiroshi</first><last>Nakagawa</last></author>
       <author><first>Hidetaka</first><last>Masuda</last></author>
       <title>Automated Subject Induction from Query Keywords through <fixed-case>W</fixed-case>ikipedia Categories and Subject Headings</title>
@@ -2445,14 +2445,14 @@
     </paper>
     <paper id="256">
       <author><first>Linus</first><last>Sellberg</last></author>
-      <author><first>Arne</first><last>Jönsson</last></author>
+      <author id="arne-jonsson"><first>Arne</first><last>Jönsson</last></author>
       <title>Using Random Indexing to improve Singular Value Decomposition for Latent Semantic Analysis</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/586_paper.pdf</url>
       <abstract>In this paper we present results from using Random indexing for Latent Semantic Analysis to handle Singular Value Decomposition tractability issues. In the paper we compare Latent Semantic Analysis, Random Indexing and Latent Semantic Analysis on Random Indexing reduced matrices. Our results show that Latent Semantic Analysis on Random Indexing reduced matrices provide better results on Precision and Recall than Random Indexing only. Furthermore, computation time for Singular Value Decomposition on a Random indexing reduced matrix is almost halved compared to Latent Semantic Analysis.</abstract>
       <bibkey>sellberg-jonsson-2008-using</bibkey>
     </paper>
     <paper id="257">
-      <author><first>Špela</first><last>Vintar</last></author>
+      <author id="spela-vintar"><first>Špela</first><last>Vintar</last></author>
       <author><first>Darja</first><last>Fišer</last></author>
       <title>Harvesting Multi-Word Expressions from Parallel Corpora</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/281_paper.pdf</url>
@@ -2472,16 +2472,16 @@
     <paper id="259">
       <author><first>Daiga</first><last>Deksne</last></author>
       <author><first>Raivis</first><last>Skadiņš</last></author>
-      <author><first>Inguna</first><last>Skadiņa</last></author>
+      <author id="inguna-skadina"><first>Inguna</first><last>Skadiņa</last></author>
       <title>Dictionary of Multiword Expressions for Translation into highly Inflected Languages</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/353_paper.pdf</url>
       <abstract>Treatment of Multiword Expressions (MWEs) is one of the most complicated issues in natural language processing, especially in Machine Translation (MT). The paper presents dictionary of MWEs for a English-Latvian MT system, demonstrating a way how MWEs could be handled for inflected languages with rich morphology and rather free word order. The proposed dictionary of MWEs consists of two constituents: a lexicon of phrases and a set of MWE rules. The lexicon of phrases is rather similar to translation lexicon of the MT system, while MWE rules describe syntactic structure of the source and target sentence allowing correct transformation of different MWE types into the target language and ensuring correct syntactic structure. The paper demonstrates this approach on different MWE types, starting from simple syntactic structures, followed by more complicated cases and including fully idiomatic expressions. Automatic evaluation shows that the described approach increases the quality of translation by 0.6 BLEU points.</abstract>
       <bibkey>deksne-etal-2008-dictionary</bibkey>
     </paper>
     <paper id="260">
-      <author><first>Grazyna</first><last>Vetulani</last></author>
+      <author id="grazyna-vetulani"><first>Grazyna</first><last>Vetulani</last></author>
       <author><first>Zygmunt</first><last>Vetulani</last></author>
-      <author><first>Tomasz</first><last>Obrębski</last></author>
+      <author id="tomasz-obrebski"><first>Tomasz</first><last>Obrębski</last></author>
       <title>Verb-Noun Collocation <fixed-case>S</fixed-case>ynt<fixed-case>L</fixed-case>ex Dictionary: Corpus-Based Approach</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/398_paper.pdf</url>
       <abstract>The project presented here is a part of a long term research program aiming at a full lexicon grammar for Polish (SyntLex). The main concern of this project is computer-assisted acquisition and morpho-syntactic description of verb-noun collocations in Polish. We present methodology and resources obtained in three main project phases which are: dictionary-based acquisition of collocation lexicon, feasibility study for corpus-based lexicon enlargement phase, corpus-based lexicon enlargement and collocation description. In this paper we focus on the results of the third phase. The presented here corpus-based approach permitted us to triple the size the verb-noun collocation dictionary for Polish. In the paper we describe the SyntLex Dictionary of Collocations and announce some future research intended to be a separate project continuation.</abstract>
@@ -2497,8 +2497,8 @@
       <bibkey>qu-etal-2008-targeting</bibkey>
     </paper>
     <paper id="262">
-      <author><first>Margarita Alonso</first><last>Ramos</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="margarita-alonso-ramos"><first>Margarita Alonso</first><last>Ramos</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <author><first>Leo</first><last>Wanner</last></author>
       <title>Using Semantically Annotated Corpora to Build Collocation Resources</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/294_paper.pdf</url>
@@ -2506,10 +2506,10 @@
       <bibkey>ramos-etal-2008-using</bibkey>
     </paper>
     <paper id="263">
-      <author><first>Katia Lida</first><last>Kermanidis</last></author>
+      <author id="katia-lida-kermanidis"><first>Katia Lida</first><last>Kermanidis</last></author>
       <author><first>Aristomenis</first><last>Thanopoulos</last></author>
       <author><first>Manolis</first><last>Maragoudakis</last></author>
-      <author><first>Nikos</first><last>Fakotakis</last></author>
+      <author id="nikos-fakotakis"><first>Nikos</first><last>Fakotakis</last></author>
       <title><fixed-case>E</fixed-case>ksairesis: A Domain-Adaptable System for Ontology Building from Unstructured Text</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/147_paper.pdf</url>
       <abstract>This paper describes Eksairesis, a system for learning economic domain knowledge automatically from Modern Greek text. The knowledge is in the form of economic terms and the semantic relations that govern them. The entire process in based on the use of minimal language-dependent tools, no external linguistic resources, and merely free, unstructured text. The methodology is thereby easily portable to other domains and other languages. The text is pre-processed with basic morphological annotation, and semantic (named and other) entities are identified using supervised learning techniques. Statistical filtering, i.e. corpora comparison is used to extract domain terms and supervised learning is again employed to detect the semantic relations between pairs of terms. Advanced classification schemata, ensemble learning, and one-sided sampling, are experimented with in order to deal with the noise in the data, which is unavoidable due to the low pre-processing level and the lack of sophisticated resources. An average 68.5% f-score over all the classes is achieved when learning semantic relations. Bearing in mind the use of minimal resources and the highly automated nature of the process, classification performance is very promising, compared to results reported in previous work.</abstract>
@@ -2533,9 +2533,9 @@
       <bibkey>buitelaar-eigner-2008-ontology</bibkey>
     </paper>
     <paper id="266">
-      <author><first>Cássia</first><last>Trojahn</last></author>
+      <author id="cassia-trojahn"><first>Cássia</first><last>Trojahn</last></author>
       <author><first>Paulo</first><last>Quaresma</last></author>
-      <author><first>Renata</first><last>Vieira</last></author>
+      <author id="renata-vieira"><first>Renata</first><last>Vieira</last></author>
       <title>A Framework for Multilingual Ontology Mapping</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/270_paper.pdf</url>
       <abstract>In the field of ontology mapping, multilingual ontology mapping is an issue that is not well explored. This paper proposes a framework for mapping of multilingual Description Logics (DL) ontologies. First, the DL source ontology is translated to the target ontology language, using a lexical database or a dictionary, generating a DL translated ontology. The target and the translated ontologies are then used as input for the mapping process. The mappings are computed by specialized agents using different mapping approaches. Next, these agents use argumentation to exchange their local results, in order to agree on the obtained mappings. Based on their preferences and confidence of the arguments, the agents compute their preferred mapping sets. The arguments in such preferred sets are viewed as the set of globally acceptable arguments. A DL mapping ontology is generated as result of the mapping process. In this paper we focus on the process of generating the DL translated ontology.</abstract>
@@ -2543,7 +2543,7 @@
     </paper>
     <paper id="267">
       <author><first>Laura</first><last>Kassner</last></author>
-      <author><first>Vivi</first><last>Nastase</last></author>
+      <author id="vivi-nastase"><first>Vivi</first><last>Nastase</last></author>
       <author><first>Michael</first><last>Strube</last></author>
       <title>Acquiring a Taxonomy from the <fixed-case>G</fixed-case>erman <fixed-case>W</fixed-case>ikipedia</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/544_paper.pdf</url>
@@ -2552,7 +2552,7 @@
     </paper>
     <paper id="268">
       <author><first>Davide</first><last>Picca</last></author>
-      <author><first>Alfio Massimiliano</first><last>Gliozzo</last></author>
+      <author id="alfio-gliozzo"><first>Alfio Massimiliano</first><last>Gliozzo</last></author>
       <author><first>Aldo</first><last>Gangemi</last></author>
       <title><fixed-case>LMM</fixed-case>: an <fixed-case>OWL</fixed-case>-<fixed-case>DL</fixed-case> <fixed-case>M</fixed-case>eta<fixed-case>M</fixed-case>odel to Represent Heterogeneous Lexical Knowledge</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/608_paper.pdf</url>
@@ -2617,11 +2617,11 @@
       <bibkey>jouis-bourdaillet-2008-representation</bibkey>
     </paper>
     <paper id="275">
-      <author><first>Siaw-Fong</first><last>Chung</last></author>
-      <author><first>Laurent</first><last>Prévot</last></author>
+      <author id="siaw-fong-chung"><first>Siaw-Fong</first><last>Chung</last></author>
+      <author id="laurent-prevot"><first>Laurent</first><last>Prévot</last></author>
       <author><first>Mingwei</first><last>Xu</last></author>
       <author><first>Kathleen</first><last>Ahrens</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <title>Extracting Concrete Senses of Lexicon through Measurement of Conceptual Similarity in Ontologies</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/501_paper.pdf</url>
@@ -2655,7 +2655,7 @@
       <bibkey>vivaldi-etal-2008-turning</bibkey>
     </paper>
     <paper id="279">
-      <author><first>Peter</first><last>Anick</last></author>
+      <author id="peter-anick"><first>Peter</first><last>Anick</last></author>
       <author><first>Vijay</first><last>Murthi</last></author>
       <author><first>Shaji</first><last>Sebastian</last></author>
       <title>Similar Term Discovery using Web Search</title>
@@ -2674,16 +2674,16 @@
     </paper>
     <paper id="281">
       <author><first>Ziqi</first><last>Zhang</last></author>
-      <author><first>Jose</first><last>Iria</last></author>
+      <author id="jose-iria"><first>Jose</first><last>Iria</last></author>
       <author><first>Christopher</first><last>Brewster</last></author>
-      <author><first>Fabio</first><last>Ciravegna</last></author>
+      <author id="fabio-ciravegna"><first>Fabio</first><last>Ciravegna</last></author>
       <title>A Comparative Evaluation of Term Recognition Algorithms</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/538_paper.pdf</url>
       <abstract>Automatic Term recognition (ATR) is a fundamental processing step preceding more complex tasks such as semantic search and ontology learning. From a large number of methodologies available in the literature only a few are able to handle both single and multi-word terms. In this paper we present a comparison of five such algorithms and propose a combined approach us¬ing a voting mechanism. We evaluated the six approaches using two different corpora and show how the voting algo¬rithm performs best on one corpus (a collection of texts from Wikipedia) and less well using the Genia corpus (a standard life science corpus). This indicates that choice and design of corpus has a major impact on the evaluation of term recog¬nition algorithms. Our experiments also showed that single-word terms can be equally important and occupy a fairly large proportion in certain domains. As a result, algorithms that ignore single-word terms may cause problems to tasks built on top of ATR. Effective ATR systems also need to take into account both the unstructured text and the structured aspects and this means information extraction techniques need to be integrated into the term recognition process.</abstract>
       <bibkey>zhang-etal-2008-comparative</bibkey>
     </paper>
     <paper id="282">
-      <author><first>Veronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Veronique</first><last>Hoste</last></author>
       <author><first>Els</first><last>Lefever</last></author>
       <author><first>Klaar</first><last>Vanopstal</last></author>
       <author><first>Isabelle</first><last>Delaere</last></author>
@@ -2694,10 +2694,10 @@
     </paper>
     <paper id="283">
       <author><first>Eli</first><last>Pociello</last></author>
-      <author><first>Antton</first><last>Gurrutxaga</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>Izaskun</first><last>Aldezabal</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="antton-gurrutxaga"><first>Antton</first><last>Gurrutxaga</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
+      <author id="izaskun-aldezabal"><first>Izaskun</first><last>Aldezabal</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <title><fixed-case>WNTERM</fixed-case>: Enriching the <fixed-case>MCR</fixed-case> with a Terminological Dictionary</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/451_paper.pdf</url>
       <abstract>In this paper we describe the methodology and the first steps for the creation of WNTERM (from WordNet and Terminology), a specialized lexicon produced from the merger of the EuroWordNet-based Multilingual Central Repository (MCR) and the Basic Encyclopaedic Dictionary of Science and Technology (BDST). As an example, the ecology domain has been used. The final result is a multilingual (Basque and English) light-weight domain ontology, including taxonomic and other semantic relations among its concepts, which is tightly connected to other wordnets.</abstract>
@@ -2715,7 +2715,7 @@
     <paper id="285">
       <author><first>Thomas</first><last>Mandl</last></author>
       <author><first>Fredric</first><last>Gey</last></author>
-      <author><first>Giorgio</first><last>Di Nunzio</last></author>
+      <author id="giorgio-maria-di-nunzio"><first>Giorgio</first><last>Di Nunzio</last></author>
       <author><first>Nicola</first><last>Ferro</last></author>
       <author><first>Mark</first><last>Sanderson</last></author>
       <author><first>Diana</first><last>Santos</last></author>
@@ -2727,7 +2727,7 @@
     </paper>
     <paper id="286">
       <author><first>Jorge</first><last>Civera</last></author>
-      <author><first>Alfons</first><last>Juan-Císcar</last></author>
+      <author id="alfons-juan"><first>Alfons</first><last>Juan-Císcar</last></author>
       <title>Bilingual Text Classification using the <fixed-case>IBM</fixed-case> 1 Translation Model</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/22_paper.pdf</url>
       <abstract>Manual categorisation of documents is a time-consuming task that has been significantly alleviated with the deployment of automatic and machine-aided text categorisation systems. However, the proliferation of multilingual documentation has become a common phenomenon in many international organisations, while most of the current systems have focused on the categorisation of monolingual text. It has been recently shown that the inherent redundancy in bilingual documents can be effectively exploited by relatively simple, bilingual naive Bayes (multinomial) models. In this work, we present a refined version of these models in which this redundancy is explicitly captured by a combination of a unigram (multinomial) model and the well-known IBM 1 translation model. The proposed model is evaluated on two bilingual classification tasks and compared to previous work.</abstract>
@@ -2750,9 +2750,9 @@
       <bibkey>shinnou-sasaki-2008-spectral</bibkey>
     </paper>
     <paper id="289">
-      <author><first>Danica</first><last>Damljanovic</last></author>
+      <author id="danica-damljanovic"><first>Danica</first><last>Damljanovic</last></author>
       <author><first>Valentin</first><last>Tablan</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
       <title>A Text-based Query Interface to <fixed-case>OWL</fixed-case> Ontologies</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/64_paper.pdf</url>
       <abstract>Accessing structured data in the form of ontologies requires training and learning formal query languages (e.g., SeRQL or SPARQL) which poses significant difficulties for non-expert users. One of the ways to lower the learning overhead and make ontology queries more straightforward is through a Natural Language Interface (NLI). While there are existing NLIs to structured data with reasonable performance, they tend to require expensive customisation to each new domain or ontology. Additionally, they often require specific adherence to a pre-defined syntax which, in turn, means that users still have to undergo training. In this paper we present Question-based Interface to Ontologies (QuestIO) - a tool for querying ontologies using unconstrained language-based queries. QuestIO has a very simple interface, requires no user training and can be easily embedded in any system or used with any ontology or knowledge base without prior customisation.</abstract>
@@ -2760,7 +2760,7 @@
     </paper>
     <paper id="290">
       <author><first>Han</first><last>Ren</last></author>
-      <author><first>Donghong</first><last>Ji</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
       <author><first>Lei</first><last>Han</last></author>
       <title>A Research on Automatic <fixed-case>C</fixed-case>hinese Catchword Extraction</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/96_paper.pdf</url>
@@ -2788,7 +2788,7 @@
     </paper>
     <paper id="293">
       <author><first>Michael</first><last>Wiegand</last></author>
-      <author><first>Jochen L.</first><last>Leidner</last></author>
+      <author id="jochen-l-leidner"><first>Jochen L.</first><last>Leidner</last></author>
       <author><first>Dietrich</first><last>Klakow</last></author>
       <title>Cost-Sensitive Learning in Answer Extraction</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/177_paper.pdf</url>
@@ -2806,7 +2806,7 @@
     </paper>
     <paper id="295">
       <author><first>Francesca</first><last>Fallucchi</last></author>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last></author>
       <title>Yet another Platform for Extracting Knowledge from Corpora</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/284_paper.pdf</url>
       <abstract>The research field of extracting knowledge bases from text collections seems to be mature: its target and its working hypotheses are clear. In this paper we propose a platform, YAPEK, i.e., Yet Another Platform for Extracting Knowledge from corpora, that wants to be the base to collect the majority of algorithms for extracting knowledge bases from corpora. The idea is that, when many knowledge extraction algorithms are collected under the same platform, relative comparisons are clearer and many algorithms can be leveraged to extract more valuable knowledge for final tasks such as Textual Entailment Recognition. As we want to collect many knowledge extraction algorithms, YAPEK is based on the three working hypotheses of the area: the basic hypothesis, the distributional hypothesis, and the point-wise assertion patterns. In YAPEK, these three hypotheses define two spaces: the space of the target textual forms and the space of the contexts. This platform guarantees the possibility of rapidly implementing many models for extracting knowledge from corpora as the platform gives clear entry points to model what is really different in the different algorithms: the feature spaces, the distances in these spaces, and the actual algorithm.</abstract>
@@ -2815,7 +2815,7 @@
     <paper id="296">
       <author><first>Milena</first><last>Yankova</last></author>
       <author><first>Horacio</first><last>Saggion</last></author>
-      <author><first>Hamish</first><last>Cunningham</last></author>
+      <author id="hamish-cunningham"><first>Hamish</first><last>Cunningham</last></author>
       <title>A Framework for Identity Resolution and Merging for Multi-source Information Extraction</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/347_paper.pdf</url>
       <abstract>In the context of ontology-based information extraction, identity resolution is the process of deciding whether an instance extracted from text refers to a known entity in the target domain (e.g. the ontology). We present an ontology-based framework for identity resolution which can be customized to different application domains and extraction tasks. Rules for identify resolution, which compute similarities between target and source entities based on class information and instance properties and values, can be defined for each class in the ontology. We present a case study of the application of the framework to the problem of multi-source job vacancy extraction</abstract>
@@ -2831,7 +2831,7 @@
       <bibkey>karlgren-etal-2008-experiments</bibkey>
     </paper>
     <paper id="298">
-      <author><first>Fidelia</first><last>Ibekwe-SanJuan</last></author>
+      <author id="fidelia-ibekwe-sanjuan"><first>Fidelia</first><last>Ibekwe-SanJuan</last></author>
       <author><first>Chaomei</first><last>Chen</last></author>
       <author><first>Roberto</first><last>Pinho</last></author>
       <title>Identifying Strategic Information from Scientific Articles through Sentence Classification</title>
@@ -2841,7 +2841,7 @@
     </paper>
     <paper id="299">
       <author><first>Susana</first><last>Azeredo</last></author>
-      <author><first>Silvia</first><last>Moraes</last></author>
+      <author id="silvia-moraes"><first>Silvia</first><last>Moraes</last></author>
       <author><first>Vera</first><last>Lima</last></author>
       <title>Keywords, k-<fixed-case>NN</fixed-case> and Neural Networks: a Support for Hierarchical Categorization of Texts in <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/402_paper.pdf</url>
@@ -2868,16 +2868,16 @@
       <bibkey>yamamoto-etal-2008-extraction</bibkey>
     </paper>
     <paper id="302">
-      <author><first>Rune</first><last>Sætre</last></author>
+      <author id="rune-saetre"><first>Rune</first><last>Sætre</last></author>
       <author><first>Brian</first><last>Kemper</last></author>
       <author><first>Kanae</first><last>Oda</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Yukiko</first><last>Matsuoka</last></author>
       <author><first>Norihiro</first><last>Kikuchi</last></author>
       <author><first>Hiroaki</first><last>Kitano</last></author>
       <author><first>Yoshimasa</first><last>Tsuruoka</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <title>Connecting Text Mining and Pathways using the <fixed-case>P</fixed-case>ath<fixed-case>T</fixed-case>ext Resource</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/442_paper.pdf</url>
       <abstract>Many systems have been developed in the past few years to assist researchers in the discovery of knowledge published as English text, for example in the PubMed database. At the same time, higher level collective knowledge is often published using a graphical notation representing all the entities in a pathway and their interactions. We believe that these pathway visualizations could serve as an effective user interface for knowledge discovery if they can be linked to the text in publications. Since the graphical elements in a Pathway are of a very different nature than their corresponding descriptions in English text, we developed a prototype system called PathText. The goal of PathText is to serve as a bridge between these two different representations. In this paper, we first describe the overall architecture and the interfaces of the PathText system, and then provide some details about the core Text Mining components.</abstract>
@@ -2885,7 +2885,7 @@
     </paper>
     <paper id="303">
       <author><first>Jan</first><last>Pomikálek</last></author>
-      <author><first>Pavel</first><last>Rychlý</last></author>
+      <author id="pavel-rychly"><first>Pavel</first><last>Rychlý</last></author>
       <title>Detecting Co-Derivative Documents in Large Text Collections</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/481_paper.pdf</url>
       <abstract>We have analyzed the SPEX algorithm by Bernstein and Zobel (2004) for detecting co-derivative documents using duplicate n-grams. Although we totally agree with the claim that not using unique n-grams can greatly increase the efficiency and scalability of the process of detecting co-derivative documents, we have found serious bottlenecks in the way SPEX finds the duplicate n-grams. While the memory requirements for computing co-derivative documents can be reduced to up to 1% by only using duplicate n-grams, SPEX needs about 40 times more memory for computing the list of duplicate n-grams itself. Therefore the memory requirements of the whole process are not reduced enough to make the algorithm practical for very large collections. We propose a solution for this problem using an external sort with the suffix array in-memory sorting and temporary file compression. The proposed algorithm for computing duplicate n-grams uses a fixed amount of memory for any input size.</abstract>
@@ -2903,7 +2903,7 @@
       <author><first>Peng</first><last>Zhang</last></author>
       <author><first>Wenjie</first><last>Li</last></author>
       <author><first>Furu</first><last>Wei</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <author><first>Yuexian</first><last>Hou</last></author>
       <title>Exploiting the Role of Position Feature in <fixed-case>C</fixed-case>hinese Relation Extraction</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/540_paper.pdf</url>
@@ -2912,15 +2912,15 @@
     </paper>
     <paper id="306">
       <author><first>Ben</first><last>Allison</last></author>
-      <author><first>Louise</first><last>Guthrie</last></author>
+      <author id="louise-guthrie"><first>Louise</first><last>Guthrie</last></author>
       <title>Authorship Attribution of <fixed-case>E</fixed-case>-Mail: Comparing Classifiers over a New Corpus for Evaluation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/552_paper.pdf</url>
       <abstract>The release of the Enron corpus provided a unique resource for studying aspects of email use, because it is largely unfiltered, and therefore presents a relatively complete collection of emails for a reasonably large number of correspondents. This paper describes a newly created subcorpus of the Enron emails which we suggest can be used to test techniqes for authorship attribution, and further shows the application of three different classification methods to this task to present baseline results. Two of the classifiers used are are standard, and have been shown to perform well in the literature, and one of the classifiers is novel and based on concurrent work that proposes a Bayesian hierarchical distribution for word counts in documents. For each of the classifiers, we present results using six text representations, including use of linguistic structures derived from a parser as well as lexical information.</abstract>
       <bibkey>allison-guthrie-2008-authorship</bibkey>
     </paper>
     <paper id="307">
-      <author><first>Michael</first><last>Kaisser</last></author>
-      <author><first>John</first><last>Lowe</last></author>
+      <author id="michael-kaisser"><first>Michael</first><last>Kaisser</last></author>
+      <author id="john-b-lowe"><first>John</first><last>Lowe</last></author>
       <title>Creating a Research Collection of Question Answer Sentence Pairs with <fixed-case>A</fixed-case>mazon’s <fixed-case>M</fixed-case>echanical <fixed-case>T</fixed-case>urk</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/565_paper.pdf</url>
       <abstract>Each year NIST releases a set of question, document id, answer-triples for the factoid questions used in the TREC Question Answering track. While this resource is widely used and proved itself useful for many purposes, it also is too coarse a grain-size for a lot of other purposes. In this paper we describe how we have used Amazons Mechanical Turk to have multiple subjects read the documents and identify the sentences themselves which contain the answer. For most of the 1911 questions in the test sets from 2002 to 2006 and each of the documents said to contain an answer, the Question-Answer Sentence Pairs (QASP) corpus introduced in this paper contains the identified answer sentences. We believe that this corpus, which we will make available to the public, can further stimulate research in QA, especially linguistically motivated research, where matching the question to the answer sentence by either syntactic or semantic means is a central concern.</abstract>
@@ -2966,7 +2966,7 @@
     </paper>
     <paper id="312">
       <author><first>Lei</first><last>Xia</last></author>
-      <author><first>José</first><last>Iria</last></author>
+      <author id="jose-iria"><first>José</first><last>Iria</last></author>
       <title>An Approach to Modeling Heterogeneous Resources for Information Extraction</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/702_paper.pdf</url>
       <abstract>In this paper, we describe an approach that aims to model heterogeneous resources for information extraction. Document is modeled in graph representation that enables better understanding of multi-media document and its structure which ultimately could result better cross-media information extraction. We also describe our proposed algorithm that segment document-based on the document modeling approach we described in this paper.</abstract>
@@ -2982,7 +2982,7 @@
     <paper id="314">
       <author><first>Lorraine</first><last>Goeuriot</last></author>
       <author><first>Natalia</first><last>Grabar</last></author>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <title>Characterization of Scientific and Popular Science Discourse in <fixed-case>F</fixed-case>rench, <fixed-case>J</fixed-case>apanese and <fixed-case>R</fixed-case>ussian</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/743_paper.pdf</url>
       <abstract>We aim to characterize the comparability of corpora, we address this issue in the trilingual context through the distinction of expert and non expert documents. We work separately with corpora composed of documents from the medical domain in three languages (French, Japanese and Russian) which present an important linguistic distance between them. In our approach, documents are characterized in each language by their topic and by a discursive typology positioned at three levels of document analysis: structural, modal and lexical. The document typology is implemented with two learning algorithms (SVMlight and C4.5). Evaluation of results shows that the proposed discursive typology can be transposed from one language to another, as it indeed allows to distinguish the two aimed discourses (science and popular science). However, we observe that performances vary a lot according to languages, algorithms and types of discursive characteristics.</abstract>
@@ -2999,7 +2999,7 @@
     <paper id="316">
       <author><first>Nasser</first><last>Abouzakhar</last></author>
       <author><first>Ben</first><last>Allison</last></author>
-      <author><first>Louise</first><last>Guthrie</last></author>
+      <author id="louise-guthrie"><first>Louise</first><last>Guthrie</last></author>
       <title>Unsupervised Learning-based Anomalous <fixed-case>A</fixed-case>rabic Text Detection</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/83_paper.pdf</url>
       <abstract>The growing dependence of modern society on the Web as a vital source of information and communication has become inevitable. However, the Web has become an ideal channel for various terrorist organisations to publish their misleading information and send unintelligible messages to communicate with their clients as well. The increase in the number of published anomalous misleading information on the Web has led to an increase in security threats. The existing Web security mechanisms and protocols are not appropriately designed to deal with such recently developed problems. Developing technology to detect anomalous textual information has become one of the major challenges within the NLP community. This paper introduces the problem of anomalous text detection by automatically extracting linguistic features from documents and evaluating those features for patterns of suspicious and/or inconsistent information in Arabic documents. In order to achieve that, we defined specific linguistic features that characterise various Arabic writing styles. Also, the paper introduces the main challenges in Arabic processing and describes the proposed unsupervised learning model for detecting anomalous Arabic textual information.</abstract>
@@ -3052,7 +3052,7 @@
     <paper id="322">
       <author><first>Matthieu</first><last>Hermet</last></author>
       <author><first>Alain</first><last>Désilets</last></author>
-      <author><first>Stan</first><last>Szpakowicz</last></author>
+      <author id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></author>
       <title>Using the Web as a Linguistic Resource to Automatically Correct Lexico-Syntactic Errors</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/220_paper.pdf</url>
       <abstract>This paper presents an algorithm for correcting language errors typical of second-language learners. We focus on preposition errors, which are very common among second-language learners but are not addressed well by current commercial grammar correctors and editing aids. The algorithm takes as input a sentence containing a preposition error (and possibly other errors as well), and outputs the correct preposition for that particular sentence context. We use a two-phase hybrid rule-based and statistical approach. In the first phase, rule-based processing is used to generate a short expression that captures the context of use of the preposition in the input sentence. In the second phase, Web searches are used to evaluate the frequency of this expression, when alternative prepositions are used instead of the original one. We tested this algorithm on a corpus of 133 French sentences written by intermediate second-language learners, and found that it could address 69.9% of those cases. In contrast, we found that the best French grammar and spell checker currently on the market, Antidote, addressed only 3% of those cases. We also showed that performance degrades gracefully when using a corpus of frequent n-grams to evaluate frequencies.</abstract>
@@ -3081,20 +3081,20 @@
       <bibkey>quixal-etal-2008-user</bibkey>
     </paper>
     <paper id="325">
-      <author id="wei-liu"><first>Wei</first><last>Liu</last></author>
+      <author><first>Wei</first><last>Liu</last></author>
       <author><first>Ben</first><last>Allison</last></author>
-      <author><first>Louise</first><last>Guthrie</last></author>
+      <author id="louise-guthrie"><first>Louise</first><last>Guthrie</last></author>
       <title>Professor or Screaming Beast? Detecting Anomalous Words in <fixed-case>C</fixed-case>hinese</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/37_paper.pdf</url>
       <abstract>The Internet has become the most popular platform for communication. However because most of the modern computer keyboard is Latin-based, Asian languages such as Chinese cannot input its characters (Hanzi) directly with these keyboards. As a result, methods for representing Chinese characters using Latin alphabets were introduced. The most popular method among these is the Pinyin input system. Pinyin is also called Romanised Chinese in that it phonetically resembles a Chinese character. Due to the highly ambiguous mapping from Pinyin to Chinese characters, word misuses can occur using standard computer keyboard, and more commonly so in internet chat-rooms or instant messengers where the language used is less formal. In this paper we aim to develop a system that can automatically identify such anomalies, whether they are simple typos or whether they are intentional. After identifying them, the system should suggest the correct word to be used.</abstract>
       <bibkey>liu-etal-2008-professor</bibkey>
     </paper>
     <paper id="326">
-      <author><first>Iñaki</first><last>Alegria</last></author>
+      <author id="inaki-alegria"><first>Iñaki</first><last>Alegria</last></author>
       <author><first>Klara</first><last>Ceberio</last></author>
-      <author><first>Nerea</first><last>Ezeiza</last></author>
-      <author><first>Aitor</first><last>Soroa</last></author>
-      <author><first>Gregorio</first><last>Hernandez</last></author>
+      <author id="nerea-ezeiza"><first>Nerea</first><last>Ezeiza</last></author>
+      <author id="aitor-soroa"><first>Aitor</first><last>Soroa</last></author>
+      <author id="gregorio-hernandez"><first>Gregorio</first><last>Hernandez</last></author>
       <title>Spelling Correction: from Two-Level Morphology to Open Source</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/274_paper.pdf</url>
       <abstract>Basque is a highly inflected and agglutinative language (Alegria et al., 1996). Two-level morphology has been applied successfully to this kind of languages and there are two-level based descriptions for very different languages. After doing the morphological description for a language, it is easy to develop a spelling checker/corrector for this language. However, what happens if we want to use the speller in the free world (OpenOffice, Mozilla, emacs, LaTeX, etc.)? Ispell and similar tools (aspell, hunspell, myspell) are the usual mechanisms for these purposes, but they do not fit the two-level model. In the absence of two-level morphology based mechanisms, an automatic conversion from two-level description to hunspell is described in this paper.</abstract>
@@ -3110,11 +3110,11 @@
     </paper>
     <paper id="328">
       <author><first>Yannick</first><last>Versley</last></author>
-      <author><first>Simone</first><last>Ponzetto</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone</first><last>Ponzetto</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <author><first>Vladimir</first><last>Eidelman</last></author>
       <author><first>Alan</first><last>Jern</last></author>
-      <author><first>Jason</first><last>Smith</last></author>
+      <author id="jason-smith"><first>Jason</first><last>Smith</last></author>
       <author><first>Xiaofeng</first><last>Yang</last></author>
       <author><first>Alessandro</first><last>Moschitti</last></author>
       <title><fixed-case>BART</fixed-case>: A modular toolkit for coreference resolution</title>
@@ -3123,8 +3123,8 @@
       <bibkey>versley-etal-2008-bart-modular</bibkey>
     </paper>
     <paper id="329">
-      <author><first>Massimo</first><last>Poesio</last></author>
-      <author><first>Udo</first><last>Kruschwitz</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
+      <author id="udo-kruschwitz"><first>Udo</first><last>Kruschwitz</last></author>
       <author><first>Jon</first><last>Chamberlain</last></author>
       <title><fixed-case>ANAWIKI</fixed-case>: Creating Anaphorically Annotated Resources through Web Cooperation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/590_paper.pdf</url>
@@ -3181,7 +3181,7 @@
     </paper>
     <paper id="336">
       <author><first>Michaela</first><last>Atterer</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <title>An Inverted Index for Storing and Retrieving Grammatical Dependencies</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/23_paper.pdf</url>
       <abstract>Web count statistics gathered from search engines have been widely used as a resource in a variety of NLP tasks. For some tasks, however, the information they exploit is not fine-grained enough. We propose an inverted index over grammatical relations as a fast and reliable resource to access more general and also more detailed frequency information. To build the index, we use a dependency parser to parse a large corpus. We extract binary dependency relations, such as he-subj-say (he is the subject of say) as index terms and construct the index using publicly available open-source indexing software. The unit we index over is the sentence. The index can be used to extract grammatical relations and frequency counts for these relations. The framework also provides the possibility to search for partial dependencies (say, the frequency of he occurring in subject position), words, strings and a combination of these. One possible application is the disambiguation of syntactic structures.</abstract>
@@ -3222,7 +3222,7 @@
       <bibkey>saito-etal-2008-japanese</bibkey>
     </paper>
     <paper id="341">
-      <author><first>Maria Teresa</first><last>Pazienza</last></author>
+      <author id="maria-teresa-pazienza"><first>Maria Teresa</first><last>Pazienza</last></author>
       <author><first>Armando</first><last>Stellato</last></author>
       <author><first>Alexandra</first><last>Tudorache</last></author>
       <title><fixed-case>JMWNL</fixed-case>: an Extensible Multilingual Library for Accessing Wordnets in Different Languages</title>
@@ -3238,7 +3238,7 @@
       <bibkey>maynard-2008-benchmarking</bibkey>
     </paper>
     <paper id="343">
-      <author><first>Liviu</first><last>Dinu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu</first><last>Dinu</last></author>
       <author><first>Marius</first><last>Popescu</last></author>
       <author><first>Anca</first><last>Dinu</last></author>
       <title>Authorship Identification of <fixed-case>R</fixed-case>omanian Texts with Controversial Paternity</title>
@@ -3273,7 +3273,7 @@
       <bibkey>santaholma-chatzichrisafis-2008-knowledge</bibkey>
     </paper>
     <paper id="347">
-      <author><first>Michael</first><last>Rosner</last></author>
+      <author id="michael-rosner"><first>Michael</first><last>Rosner</last></author>
       <title><fixed-case>ODL</fixed-case>: an Object Description Language for Lexical Information</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/871_paper.pdf</url>
       <abstract>This paper describes ODL, a description language for lexical information that is being developed within the context of a national project called MLRS (Maltese Language Resource Server) whose goal is to create a national corpus and computational lexicon for the Maltese language. The main aim of ODL is to make the task of the lexicographer easier by allowing lexical specifications to be set out formally so that actual entries will conform to them. The paper describes some of the background motivation, the ODL language itself, and concludes with a short example of how lexical values expressed in ODL can be mapped to an existing tagset together with some speculations about future work.</abstract>
@@ -3281,7 +3281,7 @@
     </paper>
     <paper id="348">
       <author><first>Dan</first><last>Cristea</last></author>
-      <author><first>Corina</first><last>Forăscu</last></author>
+      <author id="corina-forascu"><first>Corina</first><last>Forăscu</last></author>
       <author><first>Marius</first><last>Răschip</last></author>
       <author><first>Michael</first><last>Zock</last></author>
       <title>How to Evaluate and Raise the Quality in a Collaborative Lexicographic Approach</title>
@@ -3290,7 +3290,7 @@
       <bibkey>cristea-etal-2008-evaluate</bibkey>
     </paper>
     <paper id="349">
-      <author><first>Bolette Sandford</first><last>Pedersen</last></author>
+      <author id="bolette-sandford-pedersen"><first>Bolette Sandford</first><last>Pedersen</last></author>
       <author><first>Anna</first><last>Braasch</last></author>
       <author><first>Lina</first><last>Henriksen</last></author>
       <author><first>Sussi</first><last>Olsen</last></author>
@@ -3309,8 +3309,8 @@
     </paper>
     <paper id="351">
       <author><first>Míriam</first><last>Luján</last></author>
-      <author><first>Carlos D.</first><last>Martínez</last></author>
-      <author><first>Vicent</first><last>Alabau</last></author>
+      <author id="carlos-d-martinez-hinarejos"><first>Carlos D.</first><last>Martínez</last></author>
+      <author id="vicent-alabau"><first>Vicent</first><last>Alabau</last></author>
       <title>Evaluation of several Maximum Likelihood Linear Regression Variants for Language Adaptation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/217_paper.pdf</url>
       <abstract>Multilingual Automatic Speech Recognition (ASR) systems are of great interest in multilingual environments. We studied the case of the Comunitat Valenciana where the two official languages are Spanish and Valencian. These two languages share most of their phonemes, and their syntax and vocabulary are also quite similar since they have influenced each other for many years. We constructed a system, and trained its acoustic models with a small corpus of Spanish and Valencian, which has produced poor results due to the lack of data. Adaptation techniques can be used to adapt acoustic models that are trained with a large corpus of a language inr order to obtain acoustic models for a phonetically similar language. This process is known as language adaptation. The Maximum Likelihood Linear Regression (MLLR) technique has commonly been used in speaker adaptation; however we have used MLLR in language adaptation. We compared several MLLR variants (mean square, diagonal matrix and full matrix) for language adaptation in order to choose the best alternative for our system.</abstract>
@@ -3318,7 +3318,7 @@
     </paper>
     <paper id="352">
       <author><first>Laurianne</first><last>Sitbon</last></author>
-      <author><first>Patrice</first><last>Bellot</last></author>
+      <author id="patrice-bellot"><first>Patrice</first><last>Bellot</last></author>
       <author><first>Philippe</first><last>Blache</last></author>
       <title>Evaluation of Lexical Resources and Semantic Networks on a Corpus of Mental Associations</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/246_paper.pdf</url>
@@ -3344,11 +3344,11 @@
     </paper>
     <paper id="355">
       <author><first>Quang Thắng</first><last>Đinh</last></author>
-      <author><first>Hồng Phương</first><last>Lê</last></author>
-      <author><first>Thị Minh Huyền</first><last>Nguyễn</last></author>
-      <author><first>Cẩm Tú</first><last>Nguyễn</last></author>
+      <author id="phuong-le-hong"><first>Hồng Phương</first><last>Lê</last></author>
+      <author id="thi-minh-huyen-nguyen"><first>Thị Minh Huyền</first><last>Nguyễn</last></author>
+      <author id="cam-tu-nguyen"><first>Cẩm Tú</first><last>Nguyễn</last></author>
       <author><first>Mathias</first><last>Rossignol</last></author>
-      <author><first>Xuân Lương</first><last>Vũ</last></author>
+      <author id="xuan-luong-vu"><first>Xuân Lương</first><last>Vũ</last></author>
       <title>Word Segmentation of <fixed-case>V</fixed-case>ietnamese Texts: a Comparison of Approaches</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/493_paper.pdf</url>
       <abstract>We present in this paper a comparison between three segmentation systems for the Vietnamese language. Indeed, the majority of Vietnamese words is built by semantic composition from about 7,000 syllables, which also have a meaning as isolated words. So the identification of word boundaries in a text is not a simple task, and ambiguities often appear. Beyond the presentation of the tested systems, we also propose a standard definition for word segmentation in Vietnamese, and introduce a reference corpus developed for the purpose of evaluating such a task. The results observed confirm that it can be relatively well treated by automatic means, although a solution needs to be found to take into account out-of-vocabulary words.</abstract>
@@ -3356,12 +3356,12 @@
     </paper>
     <paper id="356">
       <author><first>Cristina</first><last>Bosco</last></author>
-      <author><first>Alessandro</first><last>Mazzei</last></author>
+      <author id="alessandro-mazzei"><first>Alessandro</first><last>Mazzei</last></author>
       <author><first>Vincenzo</first><last>Lombardo</last></author>
       <author><first>Giuseppe</first><last>Attardi</last></author>
       <author><first>Anna</first><last>Corazza</last></author>
-      <author><first>Alberto</first><last>Lavelli</last></author>
-      <author><first>Leonardo</first><last>Lesmo</last></author>
+      <author id="alberto-lavelli"><first>Alberto</first><last>Lavelli</last></author>
+      <author id="leonardo-lesmo"><first>Leonardo</first><last>Lesmo</last></author>
       <author><first>Giorgio</first><last>Satta</last></author>
       <author><first>Maria</first><last>Simi</last></author>
       <title>Comparing <fixed-case>I</fixed-case>talian parsers on a common Treebank: the <fixed-case>EVALITA</fixed-case> experience</title>
@@ -3370,25 +3370,25 @@
       <bibkey>bosco-etal-2008-comparing</bibkey>
     </paper>
     <paper id="357">
-      <author><first>Bernardo</first><last>Magnini</last></author>
-      <author><first>Amedeo</first><last>Cappelli</last></author>
-      <author><first>Fabio</first><last>Tamburini</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
+      <author id="amedeo-cappelli"><first>Amedeo</first><last>Cappelli</last></author>
+      <author id="fabio-tamburini"><first>Fabio</first><last>Tamburini</last></author>
       <author><first>Cristina</first><last>Bosco</last></author>
-      <author><first>Alessandro</first><last>Mazzei</last></author>
+      <author id="alessandro-mazzei"><first>Alessandro</first><last>Mazzei</last></author>
       <author><first>Vincenzo</first><last>Lombardo</last></author>
       <author><first>Francesca</first><last>Bertagna</last></author>
       <author><first>Nicoletta</first><last>Calzolari</last></author>
       <author><first>Antonio</first><last>Toral</last></author>
-      <author><first>Valentina</first><last>Bartalesi Lenzi</last></author>
-      <author><first>Rachele</first><last>Sprugnoli</last></author>
-      <author><first>Manuela</first><last>Speranza</last></author>
+      <author id="valentina-bartalesi-lenzi"><first>Valentina</first><last>Bartalesi Lenzi</last></author>
+      <author id="rachele-sprugnoli"><first>Rachele</first><last>Sprugnoli</last></author>
+      <author id="manuela-speranza"><first>Manuela</first><last>Speranza</last></author>
       <title>Evaluation of Natural Language Tools for <fixed-case>I</fixed-case>talian: <fixed-case>EVALITA</fixed-case> 2007</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/630_paper.pdf</url>
       <abstract>EVALITA 2007, the first edition of the initiative devoted to the evaluation of Natural Language Processing tools for Italian, provided a shared framework where participants systems had the possibility to be evaluated on five different tasks, namely Part of Speech Tagging (organised by the University of Bologna), Parsing (organised by the University of Torino), Word Sense Disambiguation (organised by CNR-ILC, Pisa), Temporal Expression Recognition and Normalization (organised by CELCT, Trento), and Named Entity Recognition (organised by FBK, Trento). We believe that the diffusion of shared tasks and shared evaluation practices is a crucial step towards the development of resources and tools for Natural Language Processing. Experiences of this kind, in fact, are a valuable contribution to the validation of existing models and data, allowing for consistent comparisons among approaches and among representation schemes. The good response obtained by EVALITA, both in the number of participants and in the quality of results, showed that pursuing such goals is feasible not only for English, but also for other languages.</abstract>
       <bibkey>magnini-etal-2008-evaluation</bibkey>
     </paper>
     <paper id="358">
-      <author><first>Maria Teresa</first><last>Pazienza</last></author>
+      <author id="maria-teresa-pazienza"><first>Maria Teresa</first><last>Pazienza</last></author>
       <author><first>Armando</first><last>Stellato</last></author>
       <author><first>Alexandra</first><last>Tudorache</last></author>
       <title>A Bottom-up Comparative Study of <fixed-case>E</fixed-case>uro<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et and <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et 3.0 Lexical and Semantic Relations</title>
@@ -3408,14 +3408,14 @@
     </paper>
     <paper id="360">
       <author><first>Václav</first><last>Novák</last></author>
-      <author><first>Keith</first><last>Hall</last></author>
+      <author id="keith-hall"><first>Keith</first><last>Hall</last></author>
       <title>Inter-sentential Coreferences in Semantic Networks: An Evaluation of Manual Annotation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/695_paper.pdf</url>
       <abstract>We present an evaluation of inter-sentential coreference annotation in the context of manually created semantic networks. The semantic networks are constructed independently be each annotator and require an entity mapping priori to evaluating the coreference. We introduce a model used for mapping the semantic entities as well as an algorithm used for our evaluation task. Finally, we report the raw statistics for inter-annotator agreement and describe the inherent difficulty in evaluating coreference in semantic networks.</abstract>
       <bibkey>novak-hall-2008-inter</bibkey>
     </paper>
     <paper id="361">
-      <author><first>Mohamed</first><last>Maamouri</last></author>
+      <author id="mohamed-maamouri"><first>Mohamed</first><last>Maamouri</last></author>
       <author><first>Seth</first><last>Kulick</last></author>
       <author><first>Ann</first><last>Bies</last></author>
       <title>Diacritic Annotation in the <fixed-case>A</fixed-case>rabic Treebank and its Impact on Parser Evaluation</title>
@@ -3424,7 +3424,7 @@
       <bibkey>maamouri-etal-2008-diacritic</bibkey>
     </paper>
     <paper id="362">
-      <author><first>Chantal</first><last>Enguehard</last></author>
+      <author id="chantal-enguehard"><first>Chantal</first><last>Enguehard</last></author>
       <author><first>Harouna</first><last>Naroua</last></author>
       <title>Evaluation of Virtual Keyboards for <fixed-case>W</fixed-case>est-<fixed-case>A</fixed-case>frican Languages</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/710_paper.pdf</url>
@@ -3432,10 +3432,10 @@
       <bibkey>enguehard-naroua-2008-evaluation</bibkey>
     </paper>
     <paper id="363">
-      <author><first>Constantin</first><last>Orăsan</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orăsan</last></author>
       <author><first>Dan</first><last>Cristea</last></author>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
-      <author><first>António</first><last>Branco</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
       <title>Anaphora Resolution Exercise: an Overview</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/713_paper.pdf</url>
       <abstract>Evaluation campaigns have become an established way to evaluate automatic systems which tackle the same task. This paper presents the first edition of the Anaphora Resolution Exercise (ARE) and the lessons learnt from it. This first edition focused only on English pronominal anaphora and NP coreference, and was organised as an exploratory exercise where various issues were investigated. ARE proposed four different tasks: pronominal anaphora resolution and NP coreference resolution on a predefined set of entities, pronominal anaphora resolution and NP coreference resolution on raw texts. For each of these tasks different inputs and evaluation metrics were prepared. This paper presents the four tasks, their input data and evaluation metrics used. Even though a large number of researchers in the field expressed their interest to participate, only three institutions took part in the formal evaluation. The paper briefly presents their results, but does not try to interpret them because in this edition of ARE our aim was not about finding why certain methods are better, but to prepare the ground for a fully-fledged edition.</abstract>
@@ -3450,10 +3450,10 @@
       <bibkey>santos-simoes-2008-portuguese</bibkey>
     </paper>
     <paper id="365">
-      <author><first>Karin</first><last>Schuler</last></author>
+      <author id="karin-kipper"><first>Karin</first><last>Schuler</last></author>
       <author><first>Vinod</first><last>Kaggal</last></author>
       <author><first>James</first><last>Masanz</last></author>
-      <author><first>Philip</first><last>Ogren</last></author>
+      <author id="philip-ogren"><first>Philip</first><last>Ogren</last></author>
       <author><first>Guergana</first><last>Savova</last></author>
       <title>System Evaluation on a Named Entity Corpus from Clinical Notes</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/764_paper.pdf</url>
@@ -3461,16 +3461,16 @@
       <bibkey>schuler-etal-2008-system</bibkey>
     </paper>
     <paper id="366">
-      <author><first>Philip</first><last>Ogren</last></author>
+      <author id="philip-ogren"><first>Philip</first><last>Ogren</last></author>
       <author><first>Guergana</first><last>Savova</last></author>
-      <author><first>Christopher</first><last>Chute</last></author>
+      <author id="christopher-chute"><first>Christopher</first><last>Chute</last></author>
       <title>Constructing Evaluation Corpora for Automated Clinical Named Entity Recognition</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/796_paper.pdf</url>
       <abstract>We report on the construction of a gold-standard dataset consisting of annotated clinical notes suitable for evaluating our biomedical named entity recognition system. The dataset is the result of consensus between four human annotators and contains 1,556 annotations on 160 clinical notes using 658 unique concept codes from SNOMED-CT corresponding to human disorders. Inter-annotator agreement was calculated on annotations from 100 of the documents for span (90.9%), concept code (81.7%), context (84.8%), and status (86.0%) agreement. Complete agreement for span, concept code, context, and status was 74.6%. We found that creating a consensus set based on annotations from two independently-created annotation sets can reduce inter-annotator disagreement by 32.3%. We found little benefit to pre-annotating the corpus with a third-party named entity recognizer.</abstract>
       <bibkey>ogren-etal-2008-constructing</bibkey>
     </paper>
     <paper id="367">
-      <author><first>Eric</first><last>Ringger</last></author>
+      <author id="eric-ringger"><first>Eric</first><last>Ringger</last></author>
       <author><first>Marc</first><last>Carmen</last></author>
       <author><first>Robbie</first><last>Haertel</last></author>
       <author><first>Kevin</first><last>Seppi</last></author>
@@ -3485,14 +3485,14 @@
     </paper>
     <paper id="368">
       <author><first>Alexandre</first><last>Allauzen</last></author>
-      <author><first>Hélène</first><last>Bonneau-Maynard</last></author>
+      <author id="helene-bonneau-maynard"><first>Hélène</first><last>Bonneau-Maynard</last></author>
       <title>Training and Evaluation of <fixed-case>POS</fixed-case> Taggers on the <fixed-case>F</fixed-case>rench <fixed-case>MULTITAG</fixed-case> Corpus</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/856_paper.pdf</url>
       <abstract>The explicit introduction of morphosyntactic information into statistical machine translation approaches is receiving an important focus of attention. The current freely available Part of Speech (POS) taggers for the French language are based on a limited tagset which does not account for some flectional particularities. Moreover, there is a lack of a unified framework of training and evaluation for these kinds of linguistic resources. Therefore in this paper, three standard POS taggers (Treetagger, Brills tagger and the standard HMM POS tagger) are trained and evaluated in the same conditions on the French MULTITAG corpus. This POS-tagged corpus provides a tagset richer than the usual ones, including gender and number distinctions, for example. Experimental results show significant differences of performance between the taggers. According to the tagging accuracy estimated with a tagset of 300 items, taggers may be ranked as follows: Treetagger (95.7%), Brills tagger (94.6%), HMM tagger (93.4%). Examples of translation outputs illustrate how considering gender and number distinctions in the POS tagset can be relevant.</abstract>
       <bibkey>allauzen-bonneau-maynard-2008-training</bibkey>
     </paper>
     <paper id="369">
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <author><first>Francis</first><last>Chantree</last></author>
       <author><first>Adam</first><last>Kilgarriff</last></author>
       <author><first>Serge</first><last>Sharoff</last></author>
@@ -3503,7 +3503,7 @@
     </paper>
     <paper id="370">
       <author><first>Mark</first><last>Arehart</last></author>
-      <author><first>Keith J.</first><last>Miller</last></author>
+      <author id="keith-j-miller"><first>Keith J.</first><last>Miller</last></author>
       <title>A Ground Truth Dataset for Matching Culturally Diverse <fixed-case>R</fixed-case>omanized Person Names</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/291_paper.pdf</url>
       <abstract>This paper describes the development of a ground truth dataset of culturally diverse Romanized names in which approximately 70,000 names are matched against a subset of 700. We ran the subset as queries against the complete list using several matchers, created adjudication pools, adjudicated the results, and compiled two versions of ground truth based on different sets of adjudication guidelines and methods for resolving adjudicator conflicts. The name list, drawn from publicly available sources, was manually seeded with over 1500 name variants. These names include transliteration variation, database fielding errors, segmentation differences, incomplete names, titles, initials, abbreviations, nicknames, typos, OCR errors, and truncated data. These diverse types of matches, along with the coincidental name similarities already in the list, make possible a comprehensive evaluation of name matching systems. We have used the dataset to evaluate several open source and commercial algorithms and provide some of those results.</abstract>
@@ -3540,7 +3540,7 @@
       <author><first>Tetsuya</first><last>Takiguchi</last></author>
       <author><first>Satoshi</first><last>Tamura</last></author>
       <author><first>Shingo</first><last>Kuroiwa</last></author>
-      <author><first>Kazuya</first><last>Takeda</last></author>
+      <author id="kazuya-takeda"><first>Kazuya</first><last>Takeda</last></author>
       <author><first>Satoshi</first><last>Nakamura</last></author>
       <title>Evaluation Framework for Distant-talking Speech Recognition under Reverberant Environments: newest Part of the <fixed-case>CENSREC</fixed-case> Series -</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/468_paper.pdf</url>
@@ -3548,15 +3548,15 @@
       <bibkey>nishiura-etal-2008-evaluation</bibkey>
     </paper>
     <paper id="374">
-      <author><first>Olivier</first><last>Hamon</last></author>
-      <author><first>Djamel</first><last>Mostefa</last></author>
+      <author id="olivier-hamon"><first>Olivier</first><last>Hamon</last></author>
+      <author id="djamel-mostefa"><first>Djamel</first><last>Mostefa</last></author>
       <title>An Experimental Methodology for an End-to-End Evaluation in Speech-to-Speech Translation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/900_paper.pdf</url>
       <abstract>This paper describes the evaluation methodology used to evaluate the TC-STAR speech-to-speech translation (SST) system and the results from the third year of the project. It follows the results presented in Hamon (2007), dealing with the first end-to-end evaluation of the project. In this paper, we try to experiment with the methodology and the protocol during a second end-to-end evaluation, by comparing outputs from the TC-STAR system with interpreters from the European parliament. For this purpose, we test different criteria of evaluation and type of questions within a comprehension test. The results show that interpreters do not translate all the information (as opposed to the automatic system), but the quality of SST is still far from that of human translation. The experimental comprehension test used provides new information to study the quality of automatic systems, but without settling the issue of which protocol is the best. This depends on what the evaluator wants to know about the SST: either to have a subjective end-user evaluation or a more objective one.</abstract>
       <bibkey>hamon-mostefa-2008-experimental</bibkey>
     </paper>
     <paper id="375">
-      <author><first>Carlos D.</first><last>Martínez-Hinarejos</last></author>
+      <author id="carlos-d-martinez-hinarejos"><first>Carlos D.</first><last>Martínez-Hinarejos</last></author>
       <author><first>Vicent</first><last>Tamarit</last></author>
       <title>Evaluation of Different Segmentation Techniques for Dialogue Turns</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/119_paper.pdf</url>
@@ -3565,9 +3565,9 @@
     </paper>
     <paper id="376">
       <author><first>David</first><last>Griol</last></author>
-      <author><first>Lluís F.</first><last>Hurtado</last></author>
+      <author id="lluis-f-hurtado"><first>Lluís F.</first><last>Hurtado</last></author>
       <author><first>Encarna</first><last>Segarra</last></author>
-      <author><first>Emilio</first><last>Sanchis</last></author>
+      <author id="emilio-sanchis"><first>Emilio</first><last>Sanchis</last></author>
       <title>Acquisition and Evaluation of a Dialog Corpus through <fixed-case>WO</fixed-case>z and Dialog Simulation Techniques</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/197_paper.pdf</url>
       <abstract>In this paper, we present a comparison between two corpora acquired by means of two different techniques. The first corpus was acquired by means of the Wizard of Oz technique. A dialog simulation technique has been developed for the acquisition of the second corpus. A random selection of the user and system turns has been used, defining stop conditions for automatically deciding if the simulated dialog is successful or not. We use several evaluation measures proposed in previous research to compare between our two acquired corpora, and then discuss the similarities and differences between the two corpora with regard to these measures.</abstract>
@@ -3575,7 +3575,7 @@
     </paper>
     <paper id="377">
       <author><first>Susan</first><last>Robinson</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <author><first>Midhun</first><last>Ittycheriah</last></author>
       <author><first>Joe</first><last>Henderer</last></author>
       <title>What would you Ask a conversational Agent? Observations of Human-Agent Dialogues in a Museum Setting</title>
@@ -3585,10 +3585,10 @@
     </paper>
     <paper id="378">
       <author><first>Dave</first><last>Toney</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <author><first>Aurélien</first><last>Max</last></author>
       <author><first>Olivier</first><last>Galibert</last></author>
-      <author><first>Eric</first><last>Bilinski</last></author>
+      <author id="eric-bilinski"><first>Eric</first><last>Bilinski</last></author>
       <title>An Evaluation of Spoken and Textual Interaction in the <fixed-case>RITEL</fixed-case> Interactive Question Answering System</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/825_paper.pdf</url>
       <abstract>The RITEL project aims to integrate a spoken language dialogue system and an open-domain information retrieval system in order to enable human users to ask a general question and to refine their search for information interactively. This type of system is often referred to as an Interactive Question Answering (IQA) system. In this paper, we present an evaluation of how the performance of the RITEL system differs when users interact with it using spoken versus textual input and output. Our results indicate that while users do not perceive the two versions to perform significantly differently, many more questions are asked in a typical text-based dialogue.</abstract>
@@ -3613,7 +3613,7 @@
     </paper>
     <paper id="381">
       <author><first>Diana</first><last>Maynard</last></author>
-      <author><first>Wim</first><last>Peters</last></author>
+      <author id="wim-peters"><first>Wim</first><last>Peters</last></author>
       <author><first>Yaoyong</first><last>Li</last></author>
       <title>Evaluating Evaluation Metrics for Ontology-Based Applications: Infinite Reflection</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/273_paper.pdf</url>
@@ -3621,7 +3621,7 @@
       <bibkey>maynard-etal-2008-evaluating</bibkey>
     </paper>
     <paper id="382">
-      <author><first>Diana</first><last>McCarthy</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
       <title>Lexical Substitution as a Framework for Multiword Evaluation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/275_paper.pdf</url>
       <abstract>In this paper we analyse data from the SemEval lexical substitution task in those cases where the annotators indicated that the target word was part of a phrase before substituting the target with a synonym. We classify the types of phrases that were provided in this way by the annotators in order to evaluate the utility of the method as a means of producing a gold-standard for multiword evaluation. Multiword evaluation is a difficult area because lexical resources are not complete and peoples judgments on multiwords vary. Whilst we do not believe lexical substitution is necessarily a panacea for multiword evaluation, we do believe it is a useful methodology because the annotator is focused on the task of substitution. Following the analysis, we make some recommendations which would make the data easier to classify.</abstract>
@@ -3635,9 +3635,9 @@
       <bibkey>emms-2008-tree</bibkey>
     </paper>
     <paper id="384">
-      <author><first>A. Cüneyd</first><last>Tantuǧ</last></author>
+      <author id="ahmet-cuneyd-tantug"><first>A. Cüneyd</first><last>Tantuǧ</last></author>
       <author><first>Kemal</first><last>Oflazer</last></author>
-      <author><first>Ilknur Durgar</first><last>El-Kahlout</last></author>
+      <author id="ilknur-durgar-el-kahlout"><first>Ilknur Durgar</first><last>El-Kahlout</last></author>
       <title><fixed-case>BLEU</fixed-case>+: a Tool for Fine-Grained <fixed-case>BLEU</fixed-case> Computation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/382_paper.pdf</url>
       <abstract>We present a tool, BLEU+, which implements various extension to BLEU computation to allow for a better understanding of the translation performance, especially for morphologically complex languages. BLEU+ takes into account both closeness in morphological structure, closeness of the root words in the WordNet hierarchy while comparing tokens in the candidate and reference sentence. In addition to gauging performance at a finer level of granularity, BLEU+ also allows the computation of various upper bound oracle scores: comparing all tokens considering only the roots allows us to get an upper bound when all errors due to morphological structure are fixed, while comparing tokens in an error-tolerant way considering minor morpheme edit operations, allows us to get a (more realistic) upper bound when tokens that differ in morpheme insertions/deletions and substitutions are fixed. We use BLEU+ in the fine-grained evaluation of the output of our English-to-Turkish statistical MT system.</abstract>
@@ -3646,7 +3646,7 @@
     <paper id="385">
       <author><first>C. Ray</first><last>Graham</last></author>
       <author><first>Deryle</first><last>Lonsdale</last></author>
-      <author><first>Casey</first><last>Kennington</last></author>
+      <author id="casey-kennington"><first>Casey</first><last>Kennington</last></author>
       <author><first>Aaron</first><last>Johnson</last></author>
       <author><first>Jeremiah</first><last>McGhee</last></author>
       <title>Elicited Imitation as an Oral Proficiency Measure with <fixed-case>ASR</fixed-case> Scoring</title>
@@ -3655,9 +3655,9 @@
       <bibkey>graham-etal-2008-elicited</bibkey>
     </paper>
     <paper id="386">
-      <author><first>Pedro</first><last>Concejero</last></author>
+      <author id="pedro-concejero-cerezo"><first>Pedro</first><last>Concejero</last></author>
       <author><first>Daniel</first><last>Tapias</last></author>
-      <author><first>Juan José</first><last>Rodríguez</last></author>
+      <author id="juan-jose-rodriguez-soler"><first>Juan José</first><last>Rodríguez</last></author>
       <author><first>Juan Carlos</first><last>Luengo</last></author>
       <author><first>Sebastián</first><last>Sánchez</last></author>
       <title>Methodology for Evaluating the Usability of User Interfaces in Mobile Services</title>
@@ -3666,14 +3666,14 @@
       <bibkey>concejero-etal-2008-methodology</bibkey>
     </paper>
     <paper id="387">
-      <author><first>Edouard</first><last>Geoffrois</last></author>
+      <author id="edouard-geoffrois"><first>Edouard</first><last>Geoffrois</last></author>
       <title>An Economic View on Human Language Technology Evaluation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/616_paper.pdf</url>
       <abstract>This paper analyses some general issues about human language technology evaluation, focusing on economic aspects. It first provides a scientific rationale for the need to organize evaluation in the form of campaigns, by relating this need to some basic characteristics of human language technologies, namely that they involve learning to process information in a way which reproduces human capabilities. It then reviews the benefits and constraints of these evaluation campaigns. Borrowing concepts from the field of economics, it also provides an analysis of the economic incentives to organize evaluation campaigns. It entails from this analysis that fitting evaluation campaigns to the needs of scientific research requires a strong implication in term of research policy and public funding.</abstract>
       <bibkey>geoffrois-2008-economic</bibkey>
     </paper>
     <paper id="388">
-      <author><first>Beatrice</first><last>Alex</last></author>
+      <author id="beatrice-alex"><first>Beatrice</first><last>Alex</last></author>
       <title>Comparing Corpus-based to Web-based Lookup Techniques for Automatic <fixed-case>E</fixed-case>nglish Inclusion Detection</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/674_paper.pdf</url>
       <abstract>The influence of English as a global language continues to grow to an extent that its words and expressions permeate the original forms of other languages. This paper evaluates a modular Web-based sub-component of an existing English inclusion classifier and compares it to a corpus-based lookup technique. Both approaches are evaluated on a German gold standard data set. It is demonstrated to what extent the Web-based approach benefits from the amount of data available online and the fact that this data is constantly updated.</abstract>
@@ -3687,8 +3687,8 @@
       <bibkey>hasler-2008-centering</bibkey>
     </paper>
     <paper id="390">
-      <author><first>Stephanie</first><last>Strassel</last></author>
-      <author><first>Mark</first><last>Przybocki</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
+      <author id="mark-przybocki"><first>Mark</first><last>Przybocki</last></author>
       <author><first>Kay</first><last>Peterson</last></author>
       <author><first>Zhiyi</first><last>Song</last></author>
       <author><first>Kazuaki</first><last>Maeda</last></author>
@@ -3706,21 +3706,21 @@
     </paper>
     <paper id="392">
       <author><first>David</first><last>Hardcastle</last></author>
-      <author><first>Donia</first><last>Scott</last></author>
+      <author id="donia-scott"><first>Donia</first><last>Scott</last></author>
       <title>Can we Evaluate the Quality of Generated Text?</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/797_paper.pdf</url>
       <abstract>Evaluating the output of NLG systems is notoriously difficult, and performing assessments of text quality even more so. A range of automated and subject-based approaches to the evaluation of text quality have been taken, including comparison with a putative gold standard text, analysis of specific linguistic features of the output, expert review and task-based evaluation. In this paper we present the results of a variety of such approaches in the context of a case study application. We discuss the problems encountered in the implementation of each approach in the context of the literature, and propose that a test based on the Turing test for machine intelligence offers a way forward in the evaluation of the subjective notion of text quality.</abstract>
       <bibkey>hardcastle-scott-2008-evaluate</bibkey>
     </paper>
     <paper id="393">
-      <author><first>Keith J.</first><last>Miller</last></author>
+      <author id="keith-j-miller"><first>Keith J.</first><last>Miller</last></author>
       <author><first>Mark</first><last>Arehart</last></author>
-      <author><first>Catherine</first><last>Ball</last></author>
+      <author id="catherine-n-ball"><first>Catherine</first><last>Ball</last></author>
       <author><first>John</first><last>Polk</last></author>
       <author><first>Alan</first><last>Rubenstein</last></author>
-      <author><first>Kenneth</first><last>Samuel</last></author>
-      <author><first>Elizabeth</first><last>Schroeder</last></author>
-      <author><first>Eva</first><last>Vecchi</last></author>
+      <author id="ken-samuel"><first>Kenneth</first><last>Samuel</last></author>
+      <author id="elizabeth-schroeder"><first>Elizabeth</first><last>Schroeder</last></author>
+      <author id="eva-maria-vecchi"><first>Eva</first><last>Vecchi</last></author>
       <author><first>Chris</first><last>Wolf</last></author>
       <title>An Infrastructure, Tools and Methodology for Evaluation of Multicultural Name Matching Systems</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/805_paper.pdf</url>
@@ -3729,7 +3729,7 @@
     </paper>
     <paper id="394">
       <author><first>Laurianne</first><last>Sitbon</last></author>
-      <author><first>Patrice</first><last>Bellot</last></author>
+      <author id="patrice-bellot"><first>Patrice</first><last>Bellot</last></author>
       <author><first>Philippe</first><last>Blache</last></author>
       <title>Evaluating Robustness Of A <fixed-case>QA</fixed-case> System Through A Corpus Of Real-Life Questions</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/244_paper.pdf</url>
@@ -3753,7 +3753,7 @@
     </paper>
     <paper id="397">
       <author><first>Laurent</first><last>Blin</last></author>
-      <author><first>Olivier</first><last>Boeffard</last></author>
+      <author id="olivier-boeffard"><first>Olivier</first><last>Boeffard</last></author>
       <author><first>Vincent</first><last>Barreaud</last></author>
       <title><fixed-case>WEB</fixed-case>-Based Listening Test System for Speech Synthesis and Speech Conversion Evaluation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/573_paper.pdf</url>
@@ -3770,8 +3770,8 @@
       <bibkey>dividino-etal-2008-semiotic</bibkey>
     </paper>
     <paper id="399">
-      <author><first>George</first><last>Demetriou</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="george-demetriou"><first>George</first><last>Demetriou</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <author><first>Haotian</first><last>Sun</last></author>
       <author><first>Angus</first><last>Roberts</last></author>
       <title><fixed-case>ANNALIST</fixed-case> - <fixed-case>ANN</fixed-case>otation <fixed-case>ALI</fixed-case>gnment and Scoring Tool</title>
@@ -3780,7 +3780,7 @@
       <bibkey>demetriou-etal-2008-annalist</bibkey>
     </paper>
     <paper id="400">
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <author><first>Mike</first><last>Flynn</last></author>
       <author><first>Pierre</first><last>Wellner</last></author>
       <author><first>Philippe</first><last>Baudrion</last></author>
@@ -3791,7 +3791,7 @@
     </paper>
     <paper id="401">
       <author><first>Paula</first><last>Estrella</last></author>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <author><first>Maghi</first><last>King</last></author>
       <title>Improving Contextual Quality Models for <fixed-case>MT</fixed-case> Evaluation Based on Evaluators’ Feedback</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/236_paper.pdf</url>
@@ -3801,10 +3801,10 @@
     <paper id="402">
       <author><first>Brian</first><last>Weiss</last></author>
       <author><first>Craig</first><last>Schlenoff</last></author>
-      <author><first>Greg</first><last>Sanders</last></author>
+      <author id="gregory-sanders"><first>Greg</first><last>Sanders</last></author>
       <author><first>Michelle</first><last>Steves</last></author>
-      <author><first>Sherri</first><last>Condon</last></author>
-      <author><first>Jon</first><last>Phillips</last></author>
+      <author id="sherri-condon"><first>Sherri</first><last>Condon</last></author>
+      <author id="jon-phillips"><first>Jon</first><last>Phillips</last></author>
       <author><first>Dan</first><last>Parvaz</last></author>
       <title>Performance Evaluation of Speech Translation Systems</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/99_paper.pdf</url>
@@ -3813,18 +3813,18 @@
     </paper>
     <paper id="403">
       <author><first>Arne</first><last>Mauser</last></author>
-      <author><first>Saša</first><last>Hasan</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="sasa-hasan"><first>Saša</first><last>Hasan</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <title>Automatic Evaluation Measures for Statistical Machine Translation System Optimization</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/785_paper.pdf</url>
       <abstract>Evaluation of machine translation (MT) output is a challenging task. In most cases, there is no single correct translation. In the extreme case, two translations of the same input can have completely different words and sentence structure while still both being perfectly valid. Large projects and competitions for MT research raised the need for reliable and efficient evaluation of MT systems. For the funding side, the obvious motivation is to measure performance and progress of research. This often results in a specific measure or metric taken as primarily evaluation criterion. Do improvements in one measure really lead to improved MT performance? How does a gain in one evaluation metric affect other measures? This paper is going to answer these questions by a number of experiments.</abstract>
       <bibkey>mauser-etal-2008-automatic</bibkey>
     </paper>
     <paper id="404">
-      <author><first>Dan</first><last>Tufiş</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufiş</last></author>
       <author><first>Radu</first><last>Ion</last></author>
-      <author><first>Alexandru</first><last>Ceauşu</last></author>
-      <author><first>Dan</first><last>Ştefănescu</last></author>
+      <author id="alexandru-ceausu"><first>Alexandru</first><last>Ceauşu</last></author>
+      <author id="dan-stefanescu"><first>Dan</first><last>Ştefănescu</last></author>
       <title><fixed-case>RACAI</fixed-case>’s Linguistic Web Services</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/90_paper.pdf</url>
       <abstract>Nowadays, there are hundreds of Natural Language Processing applications and resources for different languages that are developed and/or used, almost exclusively with a few but notable exceptions, by their creators. Assuming that the right to use a particular application or resource is licensed by the rightful owner, the user is faced with the often not so easy task of interfacing it with his/her own systems. Even if standards are defined that provide a unified way of encoding resources, few are the cases when the resources are actually coded in conformance to the standard (and, at present time, there is no such thing as general NLP application interoperability). Semantic Web came with the promise that the web will be a universal medium for information exchange whatever its content. In this context, the present article outlines a collection of linguistic web services for Romanian and English, developed at the Research Institute for AI for the Romanian Academy (RACAI) which are ready to provide a standardized way of calling particular NLP operations and extract the results without caring about what exactly is going on in the background.</abstract>
@@ -3840,7 +3840,7 @@
       <bibkey>biber-etal-2008-words</bibkey>
     </paper>
     <paper id="406">
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <author><first>Uwe</first><last>Quasthoff</last></author>
       <author><first>Gerhard</first><last>Heyer</last></author>
       <author><first>Florian</first><last>Holz</last></author>
@@ -3850,11 +3850,11 @@
       <bibkey>biemann-etal-2008-asv</bibkey>
     </paper>
     <paper id="407">
-      <author><first>António</first><last>Branco</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
       <author><first>Francisco</first><last>Costa</last></author>
       <author><first>Pedro</first><last>Martins</last></author>
       <author><first>Filipe</first><last>Nunes</last></author>
-      <author><first>João</first><last>Silva</last></author>
+      <author id="joao-silva"><first>João</first><last>Silva</last></author>
       <author><first>Sara</first><last>Silveira</last></author>
       <title><fixed-case>LX</fixed-case>-Service: Web Services of Language Technology for <fixed-case>P</fixed-case>ortuguese</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/640_paper.pdf</url>
@@ -3862,8 +3862,8 @@
       <bibkey>branco-etal-2008-lx</bibkey>
     </paper>
     <paper id="408">
-      <author><first>Emanuele</first><last>Pianta</last></author>
-      <author><first>Christian</first><last>Girardi</last></author>
+      <author id="emanuele-pianta"><first>Emanuele</first><last>Pianta</last></author>
+      <author id="christian-girardi"><first>Christian</first><last>Girardi</last></author>
       <author><first>Roberto</first><last>Zanoli</last></author>
       <title>The <fixed-case>T</fixed-case>ext<fixed-case>P</fixed-case>ro Tool Suite</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/645_paper.pdf</url>
@@ -3871,8 +3871,8 @@
       <bibkey>pianta-etal-2008-textpro</bibkey>
     </paper>
     <paper id="409">
-      <author><first>Bayan Abu</first><last>Shawar</last></author>
-      <author><first>Eric</first><last>Atwell</last></author>
+      <author id="bayan-abu-shawar"><first>Bayan Abu</first><last>Shawar</last></author>
+      <author id="eric-atwell"><first>Eric</first><last>Atwell</last></author>
       <title>An <fixed-case>AI</fixed-case>-inspired intelligent agent/student architecture to combine Language Resources research and teaching</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/777_paper.pdf</url>
       <abstract>This paper describes experimental use of the multi-agent architecture to integrate Natural Language and Information Systems research and teaching, by casting a group of students as intelligent agents to collect and analyse English language resources from around the world. Section 2 and section 3 describe the hybrid intelligent information systems experiments at the University of Leeds and the results generated, including several research papers accepted at international conferences, and a finalist entry in the British Computer Society Machine Intelligence contest. Our proposals for applying the multi-agent idea in other universities such as the Arab Open University are presented in section 4. The conclusion is presented in section 5: the success of hybrid intelligent information systems experiments in generating research papers within a limited time.</abstract>
@@ -3881,7 +3881,7 @@
     <paper id="410">
       <author><first>Kjell</first><last>Elenius</last></author>
       <author><first>Eva</first><last>Forsbom</last></author>
-      <author><first>Beáta</first><last>Megyesi</last></author>
+      <author id="beata-megyesi"><first>Beáta</first><last>Megyesi</last></author>
       <title>Language Resources and Tools for <fixed-case>S</fixed-case>wedish: A Survey</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/156_paper.pdf</url>
       <abstract>Language resources and tools to create and process these resources are necessary components in human language technology and natural language applications. In this paper, we describe a survey of existing language resources for Swedish, and the need for Swedish language resources to be used in research and real-world applications in language technology as well as in linguistic research. The survey is based on a questionnaire sent to industry and academia, institutions and organizations, and to experts involved in the development of Swedish language resources in Sweden, the Nordic countries and world-wide.</abstract>
@@ -3889,9 +3889,9 @@
     </paper>
     <paper id="411">
       <author><first>Lars</first><last>Nygaard</last></author>
-      <author><first>Joel</first><last>Priestley</last></author>
+      <author id="joel-priestley"><first>Joel</first><last>Priestley</last></author>
       <author><first>Anders</first><last>Nøklestad</last></author>
-      <author><first>Janne Bondi</first><last>Johannessen</last></author>
+      <author id="janne-bondi-johannessen"><first>Janne Bondi</first><last>Johannessen</last></author>
       <title><fixed-case>G</fixed-case>lossa: a Multilingual, Multimodal, Configurable User Interface</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/159_paper.pdf</url>
       <abstract>We describe a web-based corpus query system, Glossa, which combines the expressiveness of regular query languages with the user-friendliness of a graphical interface. Since corpus users are usually linguists with little interest in technical matters, we have developed a system where the user need not have any prior knowledge of the search system. Furthermore, no previous knowledge of abbreviations for metavariables such as part of speech and source text is needed. All searches are done using checkboxes, pull-down menus, or writing simple letters to make words or other strings. Querying for more than one word is simply done by adding an additional query box, and for parts of words by choosing a feature such as start of word. The Glossa system also allows a wide range of viewing and post-processing options. Collocations can be viewed and counted in a number of ways, and be viewed as different kinds of graphical charts. Further annotation and deletion of single results for further processing is also easy. The Glossa system is already in use for a number of corpora. Corpus administrators can easily adapt the system to a wide range of corpora, including multilingual corpora and corpora with audio and video content.</abstract>
@@ -3900,27 +3900,27 @@
     <paper id="412">
       <author><first>Ekaterina</first><last>Buyko</last></author>
       <author><first>Christian</first><last>Chiarcos</last></author>
-      <author><first>Antonio</first><last>Pareja Lora</last></author>
+      <author id="antonio-pareja-lora"><first>Antonio</first><last>Pareja Lora</last></author>
       <title>Ontology-Based Interface Specifications for a <fixed-case>NLP</fixed-case> Pipeline Architecture</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/215_paper.pdf</url>
       <abstract>The high level of heterogeneity between linguistic annotations usually complicates the interoperability of processing modules within an NLP pipeline. In this paper, a framework for the interoperation of NLP components, based on a data-driven architecture, is presented. Here, ontologies of linguistic annotation are employed to provide a conceptual basis for the tagset-neutral processing of linguistic annotations. The framework proposed here is based on a set of structured OWL ontologies: a reference ontology, a set of annotation models which formalize different annotation schemes, and a declarative linking between these, specified separately. This modular architecture is particularly scalable and flexible as it allows for the integration of different reference ontologies of linguistic annotations in order to overcome the absence of a consensus for an ontology of linguistic terminology. Our proposal originates from three lines of research from different fields: research on annotation type systems in UIMA; the ontological architecture OLiA, originally developed for sustainable documentation and annotation-independent corpus browsing, and the ontologies of the OntoTag model, targeted towards the processing of linguistic annotations in Semantic Web applications. We describe how UIMA annotations can be backed up by ontological specifications of annotation schemes as in the OLiA model, and how these are linked to the OntoTag ontologies, which allow for further ontological processing.</abstract>
       <bibkey>buyko-etal-2008-ontology</bibkey>
     </paper>
     <paper id="413">
-      <author><first>Daan</first><last>Broeder</last></author>
+      <author id="daan-broeder"><first>Daan</first><last>Broeder</last></author>
       <author><first>Thierry</first><last>Declerck</last></author>
-      <author><first>Erhard</first><last>Hinrichs</last></author>
+      <author id="erhard-hinrichs"><first>Erhard</first><last>Hinrichs</last></author>
       <author><first>Stelios</first><last>Piperidis</last></author>
-      <author><first>Laurent</first><last>Romary</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
       <author><first>Nicoletta</first><last>Calzolari</last></author>
-      <author><first>Peter</first><last>Wittenburg</last></author>
+      <author id="peter-wittenburg"><first>Peter</first><last>Wittenburg</last></author>
       <title>Foundation of a Component-based Flexible Registry for Language Resources and Technology</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/364_paper.pdf</url>
       <abstract>Within the CLARIN e-science infrastructure project it is foreseen to develop a component-based registry for metadata for Language Resources and Language Technology. With this registry it is hoped to overcome the problems of the current available systems with respect to inflexible fixed schema, unsuitable terminology and interoperability problems. The registry will address interoperability needs by refering to a shared vocabulary registered in data category registries as they are suggested by ISO.</abstract>
       <bibkey>broeder-etal-2008-foundation</bibkey>
     </paper>
     <paper id="414">
-      <author><first>Daan</first><last>Broeder</last></author>
+      <author id="daan-broeder"><first>Daan</first><last>Broeder</last></author>
       <author><first>David</first><last>Nathan</last></author>
       <author><first>Sven</first><last>Strömqvist</last></author>
       <author><first>Remco</first><last>van Veenendaal</last></author>
@@ -3931,26 +3931,26 @@
     </paper>
     <paper id="415">
       <author><first>Paul</first><last>Trilsbeek</last></author>
-      <author><first>Daan</first><last>Broeder</last></author>
+      <author id="daan-broeder"><first>Daan</first><last>Broeder</last></author>
       <author><first>Tobias</first><last>Valkenhoef</last></author>
-      <author><first>Peter</first><last>Wittenburg</last></author>
+      <author id="peter-wittenburg"><first>Peter</first><last>Wittenburg</last></author>
       <title>A Grid of Regional Language Archives</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/376_paper.pdf</url>
       <abstract>About two years ago, the Max Planck Institute for Psycholinguistics in Nijmegen, The Netherlands, started an initiative to install regional language archives in various places around the world, particularly in places where a large number of endangered languages exist and are being documented. These digital archives make use of the LAT archiving framework that the MPI has developed over the past nine years. This framework consists of a number of web-based tools for depositing, organizing and utilizing linguistic resources in a digital archive. The regional archives are in principle autonomous archives, but they can decide to share metadata descriptions and language resources with the MPI archive in Nijmegen and become part of a grid of linked LAT archives. By doing so, they will also take advantage of the long-term preservation strategy of the MPI archive. This paper describes the reasoning behind this initiative and how in practice such an archive is set up.</abstract>
       <bibkey>trilsbeek-etal-2008-grid</bibkey>
     </paper>
     <paper id="416">
-      <author><last>Tokunaga</last><first>Takenobu</first></author>
+      <author><first>Takenobu</first><last>Tokunaga</last></author>
       <author><first>Dain</first><last>Kaplan</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
-      <author><first>Nicoletta</first><last>Calzolari</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author><last>Calzolari</last><first>Nicoletta</first></author>
       <author><first>Monica</first><last>Monachini</last></author>
       <author><first>Claudia</first><last>Soria</last></author>
       <author><first>Kiyoaki</first><last>Shirai</last></author>
       <author><first>Virach</first><last>Sornlertlamvanich</last></author>
       <author><first>Thatsanee</first><last>Charoenporn</last></author>
-      <author><last>Xia</last><first>YingJu</first></author>
+      <author id="yingju-xia"><first>YingJu</first><last>Xia</last></author>
       <title>Adapting International Standard for <fixed-case>A</fixed-case>sian Language Technologies</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/422_paper.pdf</url>
       <abstract>Corpus-based approaches and statistical approaches have been the main stream of natural language processing research for the past two decades. Language resources play a key role in such approaches, but there is an insufficient amount of language resources in many Asian languages. In this situation, standardisation of language resources would be of great help in developing resources in new languages. This paper presents the latest development efforts of our project which aims at creating a common standard for Asian language resources that is compatible with an international standard. In particular, the paper focuses on i) lexical specification and data categories relevant for building multilingual lexical resources for Asian languages; ii) a core upper-layer ontology needed for ensuring multilingual interoperability and iii) the evaluation platform used to test the entire architectural framework.</abstract>
@@ -3967,7 +3967,7 @@
       <bibkey>shinzato-etal-2008-large</bibkey>
     </paper>
     <paper id="418">
-      <author><first>Riccardo</first><last>Del Gratta</last></author>
+      <author id="riccardo-del-gratta"><first>Riccardo</first><last>Del Gratta</last></author>
       <author><first>Roberto</first><last>Bartolini</last></author>
       <author><first>Tommaso</first><last>Caselli</last></author>
       <author><first>Monica</first><last>Monachini</last></author>
@@ -4015,16 +4015,16 @@
       <bibkey>tohyama-etal-2008-construction-metadata</bibkey>
     </paper>
     <paper id="422">
-      <author><first>Bodil Nistrup</first><last>Madsen</last></author>
-      <author><first>Hanne Erdman</first><last>Thomsen</last></author>
+      <author id="bodil-nistrup-madsen"><first>Bodil Nistrup</first><last>Madsen</last></author>
+      <author id="hanne-erdman-thomsen"><first>Hanne Erdman</first><last>Thomsen</last></author>
       <title>A Taxonomy of Lexical Metadata Categories</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/864_paper.pdf</url>
       <abstract>Metadata registries comprising sets of categories to be used in data collections exist in many fields. The purpose of a metadata registry is to facilitate data exchange and interoperability within a domain, and registries often contain definitions and examples. In this paper we will argue that in order to ensure completeness, consistency, user-friendliness and extensibility, metadata registries should be structured as taxonomies. Furthermore we will illustrate the usefulness of using terminological ontologies as the basis for developing metadata taxonomies. In this connection we will discuss the principles of developing ontologies and the differences between taxonomies and ontologies. The paper includes examples of initiatives for developing metadata standards within the field of language resources, more specifically lexical data categories, elaborated at international and national level. However, the principles that we introduce for the development of data category registries are relevant not only for metadata registries for lexical resources, but for all kinds of metadata registries.</abstract>
       <bibkey>madsen-thomsen-2008-taxonomy</bibkey>
     </paper>
     <paper id="423">
-      <author><first>Shuichi</first><last>Itahashi</last></author>
-      <author><first>Chiu-yu</first><last>Tseng</last></author>
+      <author id="shuichi-itahashi"><first>Shuichi</first><last>Itahashi</last></author>
+      <author id="chiu-yu-tseng"><first>Chiu-yu</first><last>Tseng</last></author>
       <title>The 2008 Oriental <fixed-case>COCOSDA</fixed-case> Book Project: in Commemoration of the First Decade of Sustained Activities in <fixed-case>A</fixed-case>sia</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/28_paper.pdf</url>
       <abstract>The purpose of Oriental COCOSDA is to provide the Asian community a platform to exchange ideas, to share information and to discuss regional matters on creation, utilization, dissemination of spoken language corpora of oriental languages and also on the assessment methods of speech recognition/synthesis systems as well as to promote speech research on oriental languages. Since its preparatory meeting in Hong Kong in 1997, annual workshops have been organized and held in Japan, Taiwan, China, Korea, Thailand, Singapore, India, Indonesia, Malaysia, and Vietnam from 1998 onwards. The organization is managed by a convener, three advisory members, and 26 committee members from 13 regions in Oriental area. In order to commemorate 10 years of continued activities, the members have decided to publish a book which covers a wide range of speech research. Special focus will be on speech resources or speech corpora in Oriental countries and standardization of speech input/output systems performance evaluation methods on which key technologies for speech systems development are based. The book will also include linguistic outlines of oriental languages, annotation, labeling, and software tools for speech processing.</abstract>
@@ -4033,7 +4033,7 @@
     <paper id="424">
       <author><first>Adam</first><last>Przepiórkowski</last></author>
       <author><first>Rafał L.</first><last>Górski</last></author>
-      <author><first>Barbara</first><last>Lewandowska-Tomaszyk</last></author>
+      <author id="barbara-lewandowska-tomaszyk"><first>Barbara</first><last>Lewandowska-Tomaszyk</last></author>
       <author><first>Marek</first><last>Łaziński</last></author>
       <title>Towards the <fixed-case>N</fixed-case>ational <fixed-case>C</fixed-case>orpus of <fixed-case>P</fixed-case>olish</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/211_paper.pdf</url>
@@ -4052,9 +4052,9 @@
       <author><first>Bente</first><last>Maegaard</last></author>
       <author><first>Mohammed</first><last>Atiyya</last></author>
       <author><first>Khalid</first><last>Choukri</last></author>
-      <author><first>Steven</first><last>Krauwer</last></author>
+      <author id="steven-krauwer"><first>Steven</first><last>Krauwer</last></author>
       <author><first>Chafic</first><last>Mokbel</last></author>
-      <author><first>Mustafa</first><last>Yaseen</last></author>
+      <author id="mustafa-yaseen"><first>Mustafa</first><last>Yaseen</last></author>
       <title><fixed-case>MEDAR</fixed-case>: Collaboration between <fixed-case>E</fixed-case>uropean and Mediterranean <fixed-case>A</fixed-case>rabic Partners to Support the Development of Language Technology for <fixed-case>A</fixed-case>rabic</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/917_paper.pdf</url>
       <abstract>After the successful completion of the NEMLAR project 2003-2005, a new opportunity for a project was opened by the European Commission, and a group of largely the same partners is now executing the MEDAR project. MEDAR will be updating the surveys and BLARK for Arabic already made, and will then focus on machine translation (and other tools for translation) and information retrieval with a focus on language resources, tools and evaluation for these applications. A very important part of the MEDAR project is to reinforce and extend the NEMLAR network and to create a cooperation roadmap for Human Language Technologies for Arabic. It is expected that the cooperation roadmap will attract wide attention from other parties and that it can help create a larger platform for collaborative projects. Finally, the project will focus on dissemination of knowledge about existing resources and tools, as well as actors and activities; this will happen through newsletter, website and an international conference which will follow up on the Cairo conference of 2004. Dissemination to user communities will also be important, e.g. through participation in translators? conferences. The goal of these activities is to create a stronger and lasting collaboration between EU countries and Arabic speaking countries.</abstract>
@@ -4071,14 +4071,14 @@
     </paper>
     <paper id="428">
       <author><first>Volha</first><last>Petukhova</last></author>
-      <author><first>Harry</first><last>Bunt</last></author>
+      <author id="harry-bunt"><first>Harry</first><last>Bunt</last></author>
       <title><fixed-case>LIRICS</fixed-case> Semantic Role Annotation: Design and Evaluation of a Set of Data Categories</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/17_paper.pdf</url>
       <abstract>Semantic roles have often proved to be useful labels for stating linguistic generalisations of various sorts. There is, however, a lack of agreement on their defining criteria, which causes serious problems for semantic roles to be a useful classificatory device for predicate-argument relations. These criteria should (a) support the design of a semantic role set which is complete but does not contain redundant relations; (b) be based on semantic rather than morphological, lexical or syntactic properties; and (c) enable formal interpretation. In this paper we report on the analyses of alternative approaches to annotation and representation of semantic role information (such as FrameNet, PropBank and VerbNet) with respect to their models of description, granularity of semantic role sets, definitions of semantic roles concepts, consistency and reliability of annotations. We present methodological principles for characterising well-defined concepts which were developed within the LIRICS (Linguistic InfRastructure for Interoperable ResourCes and Systems; see <url>http://lirics.loria.fr</url>) project, as well as the designed set of semantic roles and their definitions in ISO 12620 format. We discuss evaluation results of the defined concepts for semantic role annotation concerning the redundancy and completeness of the tagset and the reliability of annotations in terms of inter-annotator agreement.</abstract>
       <bibkey>petukhova-bunt-2008-lirics</bibkey>
     </paper>
     <paper id="429">
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <title>Reusable Tagset Conversion Using Tagset Drivers</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/66_paper.pdf</url>
       <abstract>Part-of-speech or morphological tags are important means of annotation in a vast number of corpora. However, different sets of tags are used in different corpora, even for the same language. Tagset conversion is difficult, and solutions tend to be tailored to a particular pair of tagsets. We propose a universal approach that makes the conversion tools reusable. We also provide an indirect evaluation in the context of a parsing task.</abstract>
@@ -4095,8 +4095,8 @@
     <paper id="431">
       <author><first>Marc</first><last>Kemps-Snijders</last></author>
       <author><first>Menzo</first><last>Windhouwer</last></author>
-      <author><first>Peter</first><last>Wittenburg</last></author>
-      <author><first>Sue Ellen</first><last>Wright</last></author>
+      <author id="peter-wittenburg"><first>Peter</first><last>Wittenburg</last></author>
+      <author id="sue-ellen-wright"><first>Sue Ellen</first><last>Wright</last></author>
       <title><fixed-case>ISO</fixed-case>cat: Corralling Data Categories in the Wild</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/222_paper.pdf</url>
       <abstract>To achieve true interoperability for valuable linguistic resources different levels of variation need to be addressed. ISO Technical Committee 37, Terminology and other language and content resources, is developing a Data Category Registry. This registry will provide a reusable set of data categories. A new implementation, dubbed ISOcat, of the registry is currently under construction. This paper shortly describes the new data model for data categories that will be introduced in this implementation. It goes on with a sketch of the standardization process. Completed data categories can be reused by the community. This is done by either making a selection of data categories using the ISOcat web interface, or by other tools which interact with the ISOcat system using one of its various Application Programming Interfaces. Linguistic resources that use data categories from the registry should include persistent references, e.g. in the metadata or schemata of the resource, which point back to their origin. These data category references can then be used to determine if two or more resources share common semantics, thus providing a level of interoperability close to the source data and a promising layer for semantic alignment on higher levels.</abstract>
@@ -4121,7 +4121,7 @@
     <paper id="434">
       <author><first>Victoria</first><last>Arranz</last></author>
       <author><first>Franck</first><last>Gandcher</last></author>
-      <author><first>Valérie</first><last>Mapelli</last></author>
+      <author id="valerie-mapelli"><first>Valérie</first><last>Mapelli</last></author>
       <author><first>Khalid</first><last>Choukri</last></author>
       <title>A Guide for the Production of Reusable Language Resources</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/898_paper.pdf</url>
@@ -4136,7 +4136,7 @@
       <bibkey>maurel-2008-prolexbase</bibkey>
     </paper>
     <paper id="436">
-      <author><first>Yoshihiko</first><last>Hayashi</last></author>
+      <author id="yoshihiko-hayashi"><first>Yoshihiko</first><last>Hayashi</last></author>
       <author><first>Chiharu</first><last>Narawa</last></author>
       <author><first>Monica</first><last>Monachini</last></author>
       <author><first>Claudia</first><last>Soria</last></author>
@@ -4161,7 +4161,7 @@
       <bibkey>fujii-2008-producing</bibkey>
     </paper>
     <paper id="439">
-      <author><first>Folkert</first><last>de Vriend</last></author>
+      <author id="folkert-de-vriend"><first>Folkert</first><last>de Vriend</last></author>
       <author><first>Jan Pieter</first><last>Kunst</last></author>
       <author><first>Louis</first><last>ten Bosch</last></author>
       <author><first>Charlotte</first><last>Giesbers</last></author>
@@ -4181,7 +4181,7 @@
     </paper>
     <paper id="441">
       <author><first>Claire</first><last>Brierley</last></author>
-      <author><first>Eric</first><last>Atwell</last></author>
+      <author id="eric-atwell"><first>Eric</first><last>Atwell</last></author>
       <title><fixed-case>P</fixed-case>ro<fixed-case>POSEL</fixed-case>: A Prosody and <fixed-case>POS</fixed-case> <fixed-case>E</fixed-case>nglish Lexicon for Language Engineering</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/724_paper.pdf</url>
       <abstract>ProPOSEL is a prototype prosody and PoS (part-of-speech) English lexicon for Language Engineering, derived from the following language resources: the computer-usable dictionary CUVPlus, the CELEX-2 database, the Carnegie-Mellon Pronouncing Dictionary, and the BNC, LOB and Penn Treebank PoS-tagged corpora. The lexicon is designed for the target application of prosodic phrase break prediction but is also relevant to other machine learning and language engineering tasks. It supplements the existing record structure for wordform entries in CUVPlus with syntactic annotations from rival PoS-tagging schemes, mapped to fields for default closed and open-class word categories and for lexical stress patterns representing the rhythmic structure of wordforms and interpreted as potential new text-based features for automatic phrase break classifiers. The current version of the lexicon comes as a textfile of 104052 separate entries and is intended for distribution with the Natural Language ToolKit; it is therefore accompanied by supporting Python software for manipulating the data so that it can be used for Natural Language Processing (NLP) and corpus-based research in speech synthesis and speech recognition.</abstract>
@@ -4196,7 +4196,7 @@
       <bibkey>westerhout-monachesi-2008-creating</bibkey>
     </paper>
     <paper id="443">
-      <author><first>Lynne</first><last>Cahill</last></author>
+      <author id="lynne-cahill"><first>Lynne</first><last>Cahill</last></author>
       <title>Using Similarity Measures to Extend the <fixed-case>L</fixed-case>in<fixed-case>GO</fixed-case> Lexicon</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/823_paper.pdf</url>
       <abstract>Deep processing of natural language requires large scale lexical resources that have sufficient coverage at a sufficient level of detail and accuracy (i.e. both recall and precision). Hand-crafted lexicons are extremely labour-intensive to create and maintain, and require continuous updating and extension to retain their level of usability. In this paper we present a technique for extending lexicons using similarity measures that can be extracted from corpora. The technique involves creating lexical entries for unknown words based on entries for words that are known and that are deemed to be distributionally similar. We demonstrate the applicability of the approach by providing an extended lexicon for the LinGO system using similarity measures extracted from the BNC. We also discuss the advantages and disadvantages of using such lexical extensions in different ways: principally either as part of the main lexicon or as a separate resource used only for last resort use.</abstract>
@@ -4210,9 +4210,9 @@
       <bibkey>adolphs-2008-acquiring</bibkey>
     </paper>
     <paper id="445">
-      <author><first>Núria</first><last>Bel</last></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last></author>
       <author><first>Sergio</first><last>Espeja</last></author>
-      <author><first>Montserrat</first><last>Marimon</last></author>
+      <author id="montserrat-marimon"><first>Montserrat</first><last>Marimon</last></author>
       <author><first>Marta</first><last>Villegas</last></author>
       <title><fixed-case>COLDIC</fixed-case>, a Lexicographic Platform for <fixed-case>LMF</fixed-case> compliant lexica</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/42_paper.pdf</url>
@@ -4223,30 +4223,30 @@
       <author><first>David</first><last>Bamman</last></author>
       <author><first>Marco</first><last>Passarotti</last></author>
       <author><first>Roberto</first><last>Busa</last></author>
-      <author><first>Gregory</first><last>Crane</last></author>
+      <author id="gregory-crane"><first>Gregory</first><last>Crane</last></author>
       <title>The Annotation Guidelines of the <fixed-case>L</fixed-case>atin Dependency Treebank and Index <fixed-case>T</fixed-case>homisticus Treebank: the Treatment of some specific Syntactic Constructions in <fixed-case>L</fixed-case>atin</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/25_paper.pdf</url>
       <abstract>The paper describes the treatment of some specific syntactic constructions in two treebanks of Latin according to a common set of annotation guidelines. Both projects work within the theoretical framework of Dependency Grammar, which has been demonstrated to be an especially appropriate framework for the representation of languages with a moderately free word order, where the linear order of constituents is broken up with elements of other constituents. The two projects are the first of their kind for Latin, so no prior established guidelines for syntactic annotation are available to rely on. The general model for the adopted style of representation is that used by the Prague Dependency Treebank, with departures arising from the Latin grammar of Pinkster, specifically in the traditional grammatical categories of the ablative absolute, the accusative + infinitive, and gerunds/gerundives. Sharing common annotation guidelines allows us to compare the datasets of the two treebanks for tasks such as mutually checking annotation consistency, diachronically studying specific syntactic constructions, and training statistical dependency parsers.</abstract>
       <bibkey>bamman-etal-2008-annotation</bibkey>
     </paper>
     <paper id="447">
-      <author><first>Dan</first><last>Tufiş</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufiş</last></author>
       <author><first>Elena</first><last>Irimia</last></author>
       <author><first>Radu</first><last>Ion</last></author>
-      <author><first>Alexandru</first><last>Ceauşu</last></author>
+      <author id="alexandru-ceausu"><first>Alexandru</first><last>Ceauşu</last></author>
       <title>Unsupervised Lexical Acquisition for Part of Speech Tagging</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/56_paper.pdf</url>
       <abstract>It is known that POS tagging is not very accurate for unknown words (words which the POS tagger has not seen in the training corpora). Thus, a first step to improve the tagging accuracy would be to extend the coverage of the taggers learned lexicon. It turns out that, through the use of a simple procedure, one can extend this lexicon without using additional, hard to obtain, hand-validated training corpora. The basic idea consists of merely adding new words along with their (correct) POS tags to the lexicon and trying to estimate the lexical distribution of these words according to similar ambiguity classes already present in the lexicon. We present a method of automatically acquire high quality POS tagging lexicons based on morphologic analysis and generation. Currently, this procedure works on Romanian for which we have a required paradigmatic generation procedure but the architecture remains general in the sense that given the appropriate substitutes for the morphological generator and POS tagger, one should obtain similar results.</abstract>
       <bibkey>tufis-etal-2008-unsupervised</bibkey>
     </paper>
     <paper id="448">
-      <author><first>Amalia</first><last>Todiraşcu</last></author>
-      <author><first>Dan</first><last>Tufiş</last></author>
+      <author id="amalia-todirascu"><first>Amalia</first><last>Todiraşcu</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufiş</last></author>
       <author><first>Ulrich</first><last>Heid</last></author>
       <author><first>Christopher</first><last>Gledhill</last></author>
-      <author><first>Dan</first><last>Ştefanescu</last></author>
+      <author id="dan-stefanescu"><first>Dan</first><last>Ştefanescu</last></author>
       <author><first>Marion</first><last>Weller</last></author>
-      <author><first>François</first><last>Rousselot</last></author>
+      <author id="francois-rousselot"><first>François</first><last>Rousselot</last></author>
       <title>A Hybrid Approach to Extracting and Classifying <fixed-case>V</fixed-case>erb+<fixed-case>N</fixed-case>oun Constructions</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/500_paper.pdf</url>
       <abstract>We present the main findings and preliminary results of an ongoing project aimed at developing a system for collocation extraction based on contextual morpho-syntactic properties. We explored two hybrid extraction methods: the first method applies language-indepedent statistical techniques followed by a linguistic filtering, while the second approach, available only for German, is based on a set of lexico-syntactic patterns to extract collocation candidates. To define extraction and filtering patterns, we studied a specific collocation category, the Verb-Noun constructions, using a model inspired by the systemic functional grammar, proposing three level analysis: lexical, functional and semantic criteria. From tagged and lemmatized corpus, we identify some contextual morpho-syntactic properties helping to filter the output of the statistical methods and to extract some potential interesting VN constructions (complex predicates vs complex predicators). The extracted candidates are validated and classified manually.</abstract>
@@ -4270,7 +4270,7 @@
       <bibkey>kountz-etal-2008-laf</bibkey>
     </paper>
     <paper id="451">
-      <author><first>Tomaž</first><last>Erjavec</last></author>
+      <author id="tomaz-erjavec"><first>Tomaž</first><last>Erjavec</last></author>
       <author><first>Simon</first><last>Krek</last></author>
       <title>The <fixed-case>JOS</fixed-case> Morphosyntactically Tagged Corpus of <fixed-case>S</fixed-case>lovene</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/89_paper.pdf</url>
@@ -4294,14 +4294,14 @@
     </paper>
     <paper id="454">
       <author><first>Steliana</first><last>Ivanova</last></author>
-      <author><first>Sandra</first><last>Kuebler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kuebler</last></author>
       <title><fixed-case>POS</fixed-case> Tagging for <fixed-case>G</fixed-case>erman: how important is the Right Context?</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/253_paper.pdf</url>
       <abstract>Part-of-Speech tagging is generally performed by Markov models, based on bigram or trigram models. While Markov models have a strong concentration on the left context of a word, many languages require the inclusion of right context for correct disambiguation. We show for German that the best results are reached by a combination of left and right context. If only left context is available, then changing the direction of analysis and going from right to left improves the results. In a version of MBT with default parameter settings, the inclusion of the right context improved POS tagging accuracy from 94.00% to 96.08%, thus corroborating our hypothesis. The version with optimized parameters reaches 96.73%.</abstract>
       <bibkey>ivanova-kuebler-2008-pos</bibkey>
     </paper>
     <paper id="455">
-      <author><first>Christian</first><last>Hänig</last></author>
+      <author id="christian-hanig"><first>Christian</first><last>Hänig</last></author>
       <author><first>Stefan</first><last>Bordag</last></author>
       <author><first>Uwe</first><last>Quasthoff</last></author>
       <title><fixed-case>U</fixed-case>nsu<fixed-case>P</fixed-case>arse: unsupervised Parsing with unsupervised Part of Speech Tagging</title>
@@ -4311,7 +4311,7 @@
     </paper>
     <paper id="456">
       <author><first>Sara</first><last>Tonelli</last></author>
-      <author><first>Rodolfo</first><last>Delmonte</last></author>
+      <author id="rodolfo-delmonte"><first>Rodolfo</first><last>Delmonte</last></author>
       <author><first>Antonella</first><last>Bristot</last></author>
       <title>Enriching the Venice <fixed-case>I</fixed-case>talian Treebank with Dependency and Grammatical Relations</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/490_paper.pdf</url>
@@ -4319,7 +4319,7 @@
       <bibkey>tonelli-etal-2008-enriching</bibkey>
     </paper>
     <paper id="457">
-      <author><first>Kristina</first><last>Vučković</last></author>
+      <author id="kristina-vuckovic"><first>Kristina</first><last>Vučković</last></author>
       <author><first>Marko</first><last>Tadić</last></author>
       <author><first>Zdravko</first><last>Dovedan</last></author>
       <title>Rule-Based Chunker for <fixed-case>C</fixed-case>roatian</title>
@@ -4344,7 +4344,7 @@
       <bibkey>banik-lee-2008-study</bibkey>
     </paper>
     <paper id="460">
-      <author><first>Mohamed</first><last>Maamouri</last></author>
+      <author id="mohamed-maamouri"><first>Mohamed</first><last>Maamouri</last></author>
       <author><first>Ann</first><last>Bies</last></author>
       <author><first>Seth</first><last>Kulick</last></author>
       <title>Enhancing the <fixed-case>A</fixed-case>rabic Treebank: a Collaborative Effort toward New Annotation Guidelines</title>
@@ -4353,11 +4353,11 @@
       <bibkey>maamouri-etal-2008-enhancing</bibkey>
     </paper>
     <paper id="461">
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Olga</first><last>Babko-Malaya</last></author>
       <author><first>Ann</first><last>Bies</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
-      <author><first>Mohamed</first><last>Maamouri</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
+      <author id="mohamed-maamouri"><first>Mohamed</first><last>Maamouri</last></author>
       <author><first>Aous</first><last>Mansouri</last></author>
       <author><first>Wajdi</first><last>Zaghouani</last></author>
       <title>A Pilot <fixed-case>A</fixed-case>rabic <fixed-case>P</fixed-case>ropbank</title>
@@ -4366,9 +4366,9 @@
       <bibkey>palmer-etal-2008-pilot</bibkey>
     </paper>
     <paper id="462">
-      <author><first>Mark</first><last>Greenwood</last></author>
-      <author><first>José</first><last>Iria</last></author>
-      <author><first>Fabio</first><last>Ciravegna</last></author>
+      <author id="mark-a-greenwood"><first>Mark</first><last>Greenwood</last></author>
+      <author id="jose-iria"><first>José</first><last>Iria</last></author>
+      <author id="fabio-ciravegna"><first>Fabio</first><last>Ciravegna</last></author>
       <title><fixed-case>S</fixed-case>axon: an Extensible Multimedia Annotator</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/158_paper.pdf</url>
       <abstract>This paper introduces Saxon, a rule-based document annotator that is capable of processing and annotating several document formats and media, both within and across documents. Furthermore, Saxon is readily extensible to support other input formats due to both its flexible rule formalism and the modular plugin architecture of the Runes framework upon which it is built. In this paper we introduce the Saxon rule formalism through examples aimed at highlighting its power and flexibility.</abstract>
@@ -4396,7 +4396,7 @@
       <author><first>Shawn</first><last>Medero</last></author>
       <author><first>Julie</first><last>Medero</last></author>
       <author><first>Robert</first><last>Parker</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <title>Annotation Tool Development for Large-Scale Corpus Creation Projects at the <fixed-case>L</fixed-case>inguistic <fixed-case>D</fixed-case>ata <fixed-case>C</fixed-case>onsortium</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/775_paper.pdf</url>
       <abstract>The Linguistic Data Consortium (LDC) creates a variety of linguistic resources - data, annotations, tools, standards and best practices - for many sponsored projects. The programming staff at LDC has created the tools and technical infrastructures to support the data creation efforts for these projects, creating tools and technical infrastructures for all aspects of data creation projects: data scouting, data collection, data selection, annotation, search, data tracking and worklow management. This paper introduces a number of samples of LDC programming staffs work, with particular focus on the recent additions and updates to the suite of software tools developed by LDC. Tools introduced include the GScout Web Data Scouting Tool, LDC Data Selection Toolkit, ACK - Annotation Collection Kit, XTrans Transcription and Speech Annotation Tool, GALE Distillation Toolkit, and the GALE MT Post Editing Workflow Management System.</abstract>
@@ -4436,7 +4436,7 @@
     </paper>
     <paper id="468">
       <author><first>Emilie</first><last>Chételat-Pelé</last></author>
-      <author><first>Annelies</first><last>Braffort</last></author>
+      <author id="annelies-braffort"><first>Annelies</first><last>Braffort</last></author>
       <title>Sign Language Corpus Annotation: toward a new Methodology</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/168_paper.pdf</url>
       <abstract>This paper deals with non manual gestures annotation involved in Sign Language within the context of automatic generation of Sign Language. We will tackle linguistic researches in sign language, present descriptions of non manual gestures and problems lead to movement description. Then, we will propose a new annotation methodology, which allows non manual gestures description. This methodology can describe all Non Manual Gestures with precision, economy and simplicity. It is based on four points: Movement description (instead of position description); Movement decomposition (the diagonal movement is described with horizontal movement and vertical movement separately); Element decomposition (we separate higher eyelid and lower eyelid); Use of a set of symbols rather than words. One symbol can describe many phenomena (with use of colours, height...). First analysis results allow us to define precisely the structure of eye blinking and give the very first ideas for the rules to be designed. All the results must be refined and confirmed by extending the study on the whole corpus. In a second step, our annotation will be used to produce analyses in order to define rules and structure definition of Non Manual Gestures that will be evaluate in LIMSIs automatic French Sign Language generation system.</abstract>
@@ -4447,7 +4447,7 @@
       <author><first>Carol</first><last>Neidle</last></author>
       <author><first>Vassilis</first><last>Athitsos</last></author>
       <author><first>Stan</first><last>Sclaroff</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <title>Benchmark Databases for Video-Based Automatic Sign Language Recognition</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/287_paper.pdf</url>
       <abstract>A new, linguistically annotated, video database for automatic sign language recognition is presented. The new RWTH-BOSTON-400 corpus, which consists of 843 sentences, several speakers and separate subsets for training, development, and testing is described in detail. For evaluation and benchmarking of automatic sign language recognition, large corpora are needed. Recent research has focused mainly on isolated sign language recognition methods using video sequences that have been recorded under lab conditions using special hardware like data gloves. Such databases have often consisted generally of only one speaker and thus have been speaker-dependent, and have had only small vocabularies. A new database access interface, which was designed and created to provide fast access to the database statistics and content, makes it possible to easily browse and retrieve particular subsets of the video database. Preliminary baseline results on the new corpora are presented. In contradistinction to other research in this area, all databases presented in this paper will be publicly available.</abstract>
@@ -4457,7 +4457,7 @@
       <author><first>Jan</first><last>Bungeroth</last></author>
       <author><first>Daniel</first><last>Stein</last></author>
       <author><first>Philippe</first><last>Dreuw</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <author><first>Sara</first><last>Morrissey</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <author><first>Lynette</first><last>van Zijl</last></author>
@@ -4501,7 +4501,7 @@
       <author><first>Theodoros</first><last>Kostoulas</last></author>
       <author><first>Todor</first><last>Ganchev</last></author>
       <author><first>Iosif</first><last>Mporas</last></author>
-      <author><first>Nikos</first><last>Fakotakis</last></author>
+      <author id="nikos-fakotakis"><first>Nikos</first><last>Fakotakis</last></author>
       <title>A Real-World Emotional Speech Corpus for <fixed-case>M</fixed-case>odern <fixed-case>G</fixed-case>reek</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/664_paper.pdf</url>
       <abstract>The present paper deals with the design and the annotation of a Greek real-world emotional speech corpus. The speech data consist of recordings collected during the interaction of naïve users with a smart-home dialogue system. Annotation of the speech data with respect to the uttered command and emotional state was performed. Initial experimentations towards recognizing negative emotional states were performed and the experimental results indicate the range of difficulties when dealing with real-world data.</abstract>
@@ -4515,7 +4515,7 @@
       <bibkey>wilson-2008-annotating</bibkey>
     </paper>
     <paper id="476">
-      <author><first>Henk</first><last>van den Heuvel</last></author>
+      <author id="henk-van-den-heuvel"><first>Henk</first><last>van den Heuvel</last></author>
       <author><first>Jean-Pierre</first><last>Martens</last></author>
       <author><first>Bart</first><last>D’hoore</last></author>
       <author><first>Kristof</first><last>D’hanens</last></author>
@@ -4546,19 +4546,19 @@
       <author><first>Téva</first><last>Merlin</last></author>
       <author><first>Sylvain</first><last>Meignier</last></author>
       <author><first>Yannick</first><last>Estève</last></author>
-      <author><first>Paul</first><last>Deléglise</last></author>
+      <author id="paul-deleglise"><first>Paul</first><last>Deléglise</last></author>
       <title>Combined Systems for Automatic Phonetic Transcription of Proper Nouns</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/455_paper.pdf</url>
       <abstract>Large vocabulary automatic speech recognition (ASR) technologies perform well in known, controlled contexts. However recognition of proper nouns is commonly considered as a difficult task. Accurate phonetic transcription of a proper noun is difficult to obtain, although it can be one of the most important resources for a recognition system. In this article, we propose methods of automatic phonetic transcription applied to proper nouns. The methods are based on combinations of the rule-based phonetic transcription generator LIA_PHON and an acoustic-phonetic decoding system. On the ESTER corpus, we observed that the combined systems obtain better results than our reference system (LIA_PHON). The WER (Word Error Rate) decreased on segments of speech containing proper nouns, without affecting negatively the results on the rest of the corpus. On the same corpus, the Proper Noun Error Rate (PNER, which is a WER computed on proper nouns only), decreased with our new system.</abstract>
       <bibkey>laurent-etal-2008-combined</bibkey>
     </paper>
     <paper id="480">
-      <author><first>Harald</first><last>Höge</last></author>
-      <author><first>Zdravko</first><last>Kacic</last></author>
+      <author id="harald-hoge"><first>Harald</first><last>Höge</last></author>
+      <author id="zdravko-kacic"><first>Zdravko</first><last>Kacic</last></author>
       <author><first>Bojan</first><last>Kotnik</last></author>
       <author><first>Matej</first><last>Rojc</last></author>
       <author><first>Nicolas</first><last>Moreau</last></author>
-      <author><first>Horst-Udo</first><last>Hain</last></author>
+      <author id="horst-udo-hain"><first>Horst-Udo</first><last>Hain</last></author>
       <title>Evaluation of Modules and Tools for Speech Synthesis: the <fixed-case>ECESS</fixed-case> Framework</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/32_paper.pdf</url>
       <abstract>The consortium ECESS (European Center of Excellence for Speech Synthesis) has set up a framework for evaluation of software modules and tools relevant for speech synthesis. Till now two lines of evaluation campaigns have been established: (1) Evaluation of the ECESS TTS modules (text processing, prosody, acoustic synthesis). (2) Evaluation of ECESS tools (pitch extraction, voice activity detection, phonetic segmentation). The functionality and interfaces of the ECESS TTS have been developed by a joint effort between ECESS and the EC-funded project TC-STAR . First evaluation campaigns were conducted within TC-STAR using the ECESS framework. As TC-STAR finished in March 2007, ECESS continued and extended the evaluation of ECESS TTS modules and tools by its own. Within the paper we describe a novel framework which allows performing remote evaluation for modules via the web. First experimental results are reported. Further the result of several evaluation campaigns for tools handling pitch extraction and voice activity detection are presented.</abstract>
@@ -4590,7 +4590,7 @@
       <bibkey>matousek-etal-2008-building</bibkey>
     </paper>
     <paper id="484">
-      <author><first>Luís</first><last>Oliveira</last></author>
+      <author id="jose-luis-oliveira"><first>Luís</first><last>Oliveira</last></author>
       <author><first>Sérgio</first><last>Paulo</last></author>
       <author><first>Luís</first><last>Figueira</last></author>
       <author><first>Carlos</first><last>Mendes</last></author>
@@ -4603,17 +4603,17 @@
     </paper>
     <paper id="485">
       <author><first>Alexandre</first><last>Patry</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <title><fixed-case>MISTRAL</fixed-case>: a Statistical Machine Translation Decoder for Speech Recognition Lattices</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/293_paper.pdf</url>
       <abstract>This paper presents MISTRAL, an open source statistical machine translation decoder dedicated to spoken language translation. While typical machine translation systems take a written text as input, MISTRAL translates word lattices produced by automatic speech recognition systems. The lattices are translated in two passes using a phrase-based model. Our experiments reveal an improvement in BLEU when translating lattices instead of sentences returned by a speech recognition system.</abstract>
       <bibkey>patry-langlais-2008-mistral</bibkey>
     </paper>
     <paper id="486">
-      <author><first>Ute</first><last>Ziegenhain</last></author>
-      <author><first>Hanne</first><last>Fersoe</last></author>
-      <author><first>Henk</first><last>van den Heuvel</last></author>
-      <author><first>Asuncion</first><last>Moreno</last></author>
+      <author id="ute-ziegenhain"><first>Ute</first><last>Ziegenhain</last></author>
+      <author id="hanne-fersoe"><first>Hanne</first><last>Fersoe</last></author>
+      <author id="henk-van-den-heuvel"><first>Henk</first><last>van den Heuvel</last></author>
+      <author id="asuncion-moreno"><first>Asuncion</first><last>Moreno</last></author>
       <title><fixed-case>LC</fixed-case>-<fixed-case>STAR</fixed-case> <fixed-case>II</fixed-case>: Starring more Lexica</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/358_paper.pdf</url>
       <abstract>LC-STAR II is a follow-up project of the EU funded project LC-STAR (Lexica and Corpora for Speech-to-Speech Translation Components, IST-2001-32216). LC-STAR II develops large lexica containing information for speech processing in ten languages targeting especially automatic speech recognition and text to speech synthesis but also other applications like speech-to-speech translation and tagging. The project follows by large the specifications developed within the scope of LC-STAR covering thirteen languages: Catalan, Finnish, German, Greek, Hebrew, Italian, Mandarin Chinese, Russian, Turkish, Slovenian, Spanish, Standard Arabic and US-English. The ten new LC-STAR II languages are: Brazilian-Portuguese, Cantonese, Czech, English-UK, French, Hindi, Polish, Portuguese, Slovak, and Urdu. The project started in 2006 with a lifetime of two years. The project is funded by a consortium, which includes Microsoft (USA), Nokia (Finland), NSC (Israel), Siemens (Germany) and Harmann/Becker (Germany). The project is coordinated by UPC (Spain) and validation is performed by SPEX (The Netherlands), and CST (Denmark). The developed language resources will be shared among partners. This paper presents a summary of the creation of word lists and lexica and an overview of adaptations of the specifications and conceptual representation model from LC-STAR to the new languages. The validation procedure will be presented too.</abstract>
@@ -4621,23 +4621,23 @@
     </paper>
     <paper id="487">
       <author><first>Matthias</first><last>Eck</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <title>Communicating Unknown Words in Machine Translation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/392_paper.pdf</url>
       <abstract>A new approach to handle unknown words in machine translation is presented. The basic idea is to find definitions for the unknown words on the source language side and translate those definitions instead. Only monolingual resources are required, which generally offer a broader coverage than bilingual resources and are available for a large number of languages. In order to use this in a machine translation system definitions are extracted automatically from online dictionaries and encyclopedias. The translated definition is then inserted and clearly marked in the original hypothesis. This is shown to lead to significant improvements in (subjective) translation quality.</abstract>
       <bibkey>eck-etal-2008-communicating</bibkey>
     </paper>
     <paper id="488">
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Sonia</first><last>Halimi</last></author>
       <author><first>Yukie</first><last>Nakao</last></author>
       <author><first>Kyoko</first><last>Kanzaki</last></author>
       <author><first>Hitoshi</first><last>Isahara</last></author>
       <author><first>Nikos</first><last>Tsourakis</last></author>
       <author><first>Marianne</first><last>Starlander</last></author>
-      <author><first>Beth Ann</first><last>Hockey</last></author>
-      <author><first>Manny</first><last>Rayner</last></author>
+      <author id="beth-ann-hockey"><first>Beth Ann</first><last>Hockey</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
       <title>Developing Non-<fixed-case>E</fixed-case>uropean Translation Pairs in a Medium-Vocabulary Medical Speech Translation System</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/443_paper.pdf</url>
       <abstract>We describe recent work on MedSLT, a medium-vocabulary interlingua-based medical speech translation system, focussing on issues that arise when handling languages of which the grammar engineer has little or no knowledge. We show how we can systematically create and maintain multiple forms of grammars, lexica and interlingual representations, with some versions being used by language informants, and some by grammar engineers. In particular, we describe the advantages of structuring the interlingua definition as a simple semantic grammar, which includes a human-readable surface form. We show how this allows us to rationalise the process of evaluating translations between languages lacking common speakers, and also makes it possible to create a simple generic tool for debugging to-interlingua translation rules. Examples presented focus on the concrete case of translation between Japanese and Arabic in both directions.</abstract>
@@ -4653,7 +4653,7 @@
       <bibkey>perera-etal-2008-clios</bibkey>
     </paper>
     <paper id="490">
-      <author><first>Takahiro</first><last>Ono</last></author>
+      <author id="takahiro-ohno"><first>Takahiro</first><last>Ono</last></author>
       <author><first>Hitomi</first><last>Tohyama</last></author>
       <author><first>Shigeki</first><last>Matsubara</last></author>
       <title>Construction and Analysis of Word-level Time-aligned Simultaneous Interpretation Corpus</title>
@@ -4664,19 +4664,19 @@
     <paper id="491">
       <author><first>Marie-Jean</first><last>Meurs</last></author>
       <author><first>Frédéric</first><last>Duvert</last></author>
-      <author><first>Frédéric</first><last>Béchet</last></author>
-      <author><first>Fabrice</first><last>Lefèvre</last></author>
-      <author><first>Renato</first><last>de Mori</last></author>
+      <author id="frederic-bechet"><first>Frédéric</first><last>Béchet</last></author>
+      <author id="fabrice-lefevre"><first>Fabrice</first><last>Lefèvre</last></author>
+      <author id="renato-de-mori"><first>Renato</first><last>de Mori</last></author>
       <title>Semantic Frame Annotation on the <fixed-case>F</fixed-case>rench <fixed-case>MEDIA</fixed-case> corpus</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/256_paper.pdf</url>
       <abstract>This paper introduces a knowledge representation formalism used for annotation of the French MEDIA dialogue corpus in terms of high level semantic structures. The semantic annotation, worked out according to the Berkeley FrameNet paradigm, is incremental and partially automated. We describe an automatic interpretation process for composing semantic structures from basic semantic constituents using patterns involving words and constituents. This process contains procedures which provide semantic compositions and generating frame hypotheses by inference. The MEDIA corpus is a French dialogue corpus recorded using a Wizard of Oz system simulating a telephone server for tourist information and hotel booking. It had been manually transcribed and annotated at the word and semantic constituent levels. These levels support the automatic interpretation process which provides a high level semantic frame annotation. The Frame based Knowledge Source we composed contains Frame definitions and composition rules. We finally provide some results obtained on the automatically-derived annotation.</abstract>
       <bibkey>meurs-etal-2008-semantic</bibkey>
     </paper>
     <paper id="492">
-      <author><first>Nick</first><last>Webb</last></author>
+      <author id="nick-webb"><first>Nick</first><last>Webb</last></author>
       <author><first>Ting</first><last>Liu</last></author>
       <author><first>Mark</first><last>Hepple</last></author>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <title>Cross-Domain Dialogue Act Tagging</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/502_paper.pdf</url>
       <abstract>We present recent work in the area of Cross-Domain Dialogue Act (DA) tagging. We have previously reported on the use of a simple dialogue act classifier based on purely intra-utterance features - principally involving word n-gram cue phrases automatically generated from a training corpus. Such a classifier performs surprisingly well, rivalling scores obtained using far more sophisticated language modelling techniques. In this paper, we apply these automatically extracted cues to a new annotated corpus, to determine the portability and generality of the cues we learn.</abstract>
@@ -4685,8 +4685,8 @@
     <paper id="493">
       <author><first>Nikos</first><last>Tsourakis</last></author>
       <author><first>Maria</first><last>Georgescul</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
-      <author><first>Manny</first><last>Rayner</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
       <title>Building Mobile Spoken Dialogue Applications Using Regulus</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/620_paper.pdf</url>
       <abstract>Regulus is an Open Source platform that supports construction of rule-based medium-vocabulary spoken dialogue applications. It has already been used to build several substantial speech-enabled applications, including NASAs Clarissa procedure navigator and Geneva Universitys MedSLT medical speech translator. System like these would be far more useful if they were available on a hand-held device, rather than, as with the present version, on a laptop. In this paper we describe the Open Source framework we have developed, which makes it possible to run Regulus applications on generally available mobile devices, using a distributed client-server architecture that offers transparent and reliable integration with different types of ASR systems. We describe the architecture, an implemented calendar application prototype hosted on a mobile device, and an evaluation. The evaluation shows that performance on the mobile device is as good as performance on a normal desktop PC.</abstract>
@@ -4694,7 +4694,7 @@
     </paper>
     <paper id="494">
       <author><first>Christian</first><last>Raymond</last></author>
-      <author><first>Kepa Joseba</first><last>Rodriguez</last></author>
+      <author id="kepa-joseba-rodriguez"><first>Kepa Joseba</first><last>Rodriguez</last></author>
       <author><first>Giuseppe</first><last>Riccardi</last></author>
       <title>Active Annotation in the <fixed-case>LUNA</fixed-case> <fixed-case>I</fixed-case>talian Corpus of Spontaneous Dialogues</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/499_paper.pdf</url>
@@ -4705,7 +4705,7 @@
       <author><first>Stefan</first><last>Hahn</last></author>
       <author><first>Patrick</first><last>Lehnen</last></author>
       <author><first>Christian</first><last>Raymond</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <title>A Comparison of Various Methods for Concept Tagging for Spoken Language Understanding</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/749_paper.pdf</url>
       <abstract>The extraction of flat concepts out of a given word sequence is usually one of the first steps in building a spoken language understanding (SLU) or dialogue system. This paper explores five different modelling approaches for this task and presents results on a French state-of-the-art corpus, MEDIA. Additionally, two log-linear modelling approaches could be further improved by adding morphologic knowledge. This paper goes beyond what has been reported in the literature. We applied the models on the same training and testing data and used the NIST scoring toolkit to evaluate the experimental results to ensure identical conditions for each of the experiments and the comparability of the results. Using a model based on conditional random fields, we achieve a concept error rate of 11.8% on the MEDIA evaluation corpus.</abstract>
@@ -4713,7 +4713,7 @@
     </paper>
     <paper id="496">
       <author><first>Stéphane</first><last>Huet</last></author>
-      <author><first>Guillaume</first><last>Gravier</last></author>
+      <author id="guillaume-gravier"><first>Guillaume</first><last>Gravier</last></author>
       <author><first>Pascale</first><last>Sébillot</last></author>
       <title>Morphosyntactic Resources for Automatic Speech Recognition</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/174_paper.pdf</url>
@@ -4721,11 +4721,11 @@
       <bibkey>huet-etal-2008-morphosyntactic</bibkey>
     </paper>
     <paper id="497">
-      <author><first>Nicolás</first><last>Morales</last></author>
+      <author id="nicolas-morales"><first>Nicolás</first><last>Morales</last></author>
       <author><first>Javier</first><last>Tejedor</last></author>
       <author><first>Javier</first><last>Garrido</last></author>
       <author><first>José</first><last>Colás</last></author>
-      <author><first>Doroteo T.</first><last>Toledano</last></author>
+      <author id="doroteo-t-toledano"><first>Doroteo T.</first><last>Toledano</last></author>
       <title><fixed-case>STC</fixed-case>-<fixed-case>TIMIT</fixed-case>: Generation of a Single-channel Telephone Corpus</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/102_paper.pdf</url>
       <abstract>This paper describes a new speech corpus, STC-TIMIT, and discusses the process of design, development and its distribution through LDC. The STC-TIMIT corpus is derived from the widely used TIMIT corpus by sending it through a real and single telephone channel. TIMIT is phonetically balanced, covers the dialectal diversity in continental USA and has been extensively used as a benchmark for speech recognition algorithms, especially in early stages of development. The experimental usability of TIMIT has been increased eventually with the creation of derived corpora, passing the original data through different channels. One such example is the well-known NTIMIT corpus, where the original files in TIMIT are re-recorded after being sent through different telephone calls, resulting in a corpus that characterizes telephone channels in a wide sense. In STC-TIMIT, we followed a similar procedure, but the whole corpus was transmitted in a single telephone call with the goal of obtaining data from a real and yet highly stable telephone channel across the whole corpus. Files in STC-TIMIT are aligned to those of TIMIT with a theoretical precision of 0.125 ms, making TIMIT labels valid for the new corpus. The experimental section presents several results on speech recognition accuracy.</abstract>
@@ -4733,12 +4733,12 @@
     </paper>
     <paper id="498">
       <author><first>Eric</first><last>Sanders</last></author>
-      <author><first>Asuncion</first><last>Moreno</last></author>
+      <author id="asuncion-moreno"><first>Asuncion</first><last>Moreno</last></author>
       <author><first>Herbert</first><last>Tropf</last></author>
       <author><first>Lynette</first><last>Melnar</last></author>
       <author><first>Nurit</first><last>Dekel</last></author>
       <author><first>Breanna</first><last>Gillies</last></author>
-      <author><first>Niklas</first><last>Paulsson</last></author>
+      <author id="niklas-paulsson"><first>Niklas</first><last>Paulsson</last></author>
       <title><fixed-case>LILA</fixed-case>: Cellular Telephone Speech Databases from <fixed-case>A</fixed-case>sia</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/278_paper.pdf</url>
       <abstract>The goal of the LILA project was the collection of speech databases over cellular telephone networks of five languages in three Asian countries. Three languages were recorded in India: Hindi by first language speakers, Hindi by second language speakers and Indian English. Furthermore, Mandarin was recorded in China and Korean in South-Korea. The databases are part of the SpeechDat-family and follow the SpeechDat rules in many respects. All databases have been finished and have passed the validation tests. Both Hindi databases and the Korean database will be available to the public for sale.</abstract>
@@ -4792,11 +4792,11 @@
     </paper>
     <paper id="503">
       <author><first>Rubén</first><last>Fernández</last></author>
-      <author><first>Luis A.</first><last>Hernández</last></author>
+      <author id="luis-hernandez"><first>Luis A.</first><last>Hernández</last></author>
       <author><first>Eduardo</first><last>López</last></author>
       <author><first>José</first><last>Alcázar</last></author>
       <author><first>Guillermo</first><last>Portillo</last></author>
-      <author><first>Doroteo T.</first><last>Toledano</last></author>
+      <author id="doroteo-t-toledano"><first>Doroteo T.</first><last>Toledano</last></author>
       <title>Design of a Multimodal Database for Research on Automatic Detection of Severe Apnoea Cases</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/454_paper.pdf</url>
       <abstract>The aim of this paper is to present the design of a multimodal database suitable for research on new possibilities for automatic diagnosis of patients with severe obstructive sleep apnoea (OSA). Early detection of severe apnoea cases can be very useful to give priority to their early treatment optimizing the expensive and time-consuming tests of current diagnosis methods based on full overnight sleep in a hospital. This work is part of an on-going collaborative project between medical and signal processing groups towards the design of a multimodal database as an innovative resource to promote new research efforts on automatic OSA diagnosis through speech and image processing technologies. In this contribution we present the multimodal design criteria derived from the analysis of specific voice properties related to OSA physiological effects as well as from the morphological facial characteristics in apnoea patients. Details on the database structure and data collection methodology are also given as it is intended to be an open resource to promote further research in this field. Finally, preliminary experimental results on automatic OSA voice assessment are presented for the collected speech data in our OSA multimodal database. Standard GMM speaker recognition techniques obtain an overall correct classification rate of 82%. This represents an initial promising result underlining the interest of this research framework and opening further perspectives for improvement using more specific speech and image recognition technologies.</abstract>
@@ -4809,8 +4809,8 @@
       <author><first>Tatsuya</first><last>Kawahara</last></author>
       <author><first>Hiroaki</first><last>Nanjo</last></author>
       <author><first>Hiromitsu</first><last>Nishizaki</last></author>
-      <author><first>Norihito</first><last>Yasuda</last></author>
-      <author><first>Yoichi</first><last>Yamashita</last></author>
+      <author id="norihito-yasuda"><first>Norihito</first><last>Yasuda</last></author>
+      <author id="yoichi-yamashita"><first>Yoichi</first><last>Yamashita</last></author>
       <author><first>Katunobu</first><last>Itou</last></author>
       <title>Test Collections for Spoken Document Retrieval from Lecture Audio Data</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/400_paper.pdf</url>
@@ -4825,7 +4825,7 @@
       <author><first>Takanori</first><last>Nishino</last></author>
       <author><first>Norihide</first><last>Kitaoka</last></author>
       <author><first>Katunobu</first><last>Itou</last></author>
-      <author><first>Kazuya</first><last>Takeda</last></author>
+      <author id="kazuya-takeda"><first>Kazuya</first><last>Takeda</last></author>
       <title>In-car Speech Data Collection along with Various Multimodal Signals</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/472_paper.pdf</url>
       <abstract>In this paper, a large-scale real-world speech database is introduced along with other multimedia driving data. We designed a data collection vehicle equipped with various sensors to synchronously record twelve-channel speech, three-channel video, driving behavior including gas and brake pedal pressures, steering angles, and vehicle velocities, physiological signals including driver heart rate, skin conductance, and emotion-based sweating on the palms and soles, etc. These multimodal data are collected while driving on city streets and expressways under four different driving task conditions including two kinds of monologues, human-human dialog, and human-machine dialog. We investigated the response timing of drivers against navigator utterances and found that most overlapped with the preceding utterance due to the task characteristics and the features of Japanese. When comparing utterance length, speaking rate, and the filler rate of driver utterances in human-human and human-machine dialogs, we found that drivers tended to use longer and faster utterances with more fillers to talk with humans than machines.</abstract>
@@ -4836,7 +4836,7 @@
       <author><first>Satoru</first><last>Kogure</last></author>
       <author><first>Hiromitsu</first><last>Nishizaki</last></author>
       <author><first>Kengo</first><last>Ohta</last></author>
-      <author><first>Seiichi</first><last>Nakagawa</last></author>
+      <author id="seiichi-nakagawa"><first>Seiichi</first><last>Nakagawa</last></author>
       <title>Developing Corpus of <fixed-case>J</fixed-case>apanese Classroom Lecture Speech Contents</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/524_paper.pdf</url>
       <abstract>This paper explains our developing Corpus of Japanese classroom Lecture speech Contents (henceforth, denoted as CJLC). Increasing e-Learning contents demand a sophisticated interactive browsing system for themselves, however, existing tools do not satisfy such a requirement. Many researches including large vocabulary continuous speech recognition and extraction of important sentences against lecture contents are necessary in order to realize the above system. CJLC is designed as their fundamental basis, and consists of speech, transcriptions, and slides that were collected in real university classroom lectures. This paper also explains the difference about disfluency acts between classroom lectures and academic presentations.</abstract>
@@ -4857,8 +4857,8 @@
       <author><first>Richard</first><last>Adderley</last></author>
       <author><first>Christian</first><last>Bonkowski</last></author>
       <author><first>Todor</first><last>Ganchev</last></author>
-      <author><first>Joachim</first><last>Köhler</last></author>
-      <author><first>Nikos</first><last>Fakotakis</last></author>
+      <author id="joachim-kohler"><first>Joachim</first><last>Köhler</last></author>
+      <author id="nikos-fakotakis"><first>Nikos</first><last>Fakotakis</last></author>
       <title>The <fixed-case>M</fixed-case>ove<fixed-case>O</fixed-case>n Motorcycle Speech Corpus</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/557_paper.pdf</url>
       <abstract>A speech and noise corpus dealing with the extreme conditions of the motorcycle environment is developed within the MoveOn project. Speech utterances in British English are recorded and processed approaching the issue of command and control and template driven dialog systems on the motorcycle. The major part of the corpus comprises noisy speech and environmental noise recorded on a motorcycle, but several clean speech recordings in a silent environment are also available. The corpus development focuses on distortion free recordings and accurate descriptions of both recorded speech and noise. Not only speech segments are annotated but also annotation of environmental noise is performed. The corpus is a small-sized speech corpus with about 12 hours of clean and noisy speech utterances and about 30 hours of segments with environmental noise without speech. This paper addresses the motivation and development of the speech corpus and finally presents some statistics and results of the database creation.</abstract>
@@ -4868,7 +4868,7 @@
       <author><first>Stavros</first><last>Ntalampiras</last></author>
       <author><first>Ilyas</first><last>Potamitis</last></author>
       <author><first>Todor</first><last>Ganchev</last></author>
-      <author><first>Nikos</first><last>Fakotakis</last></author>
+      <author id="nikos-fakotakis"><first>Nikos</first><last>Fakotakis</last></author>
       <title>Audio Database in Support of Potentiel Threat and Crisis Situation Management</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/327_paper.pdf</url>
       <abstract>This paper describes a corpus consisting of audio data for automatic space monitoring based solely on the perceived acoustic information. The particular database is created as part of a project aiming at the detection of abnormal events, which lead to life-threatening situations or property damage. The audio corpus is composed of vocal reactions and environmental sounds that are usually encountered in atypical situations. The audio data is composed of three parts: Phase I - professional sound effects collections, Phase II recordings obtained from action and drama movies and Phase III - vocal reactions related to real-world emergency events as retrieved from television, radio broadcast news, documentaries etc. The annotation methodology is given in details along with preliminary classification results and statistical analysis of the dataset regarding Phase I. The main objective of such a dataset is to provide training data for automatic recognition machines that detect hazardous situations and to provide security enhancement in public environments, which otherwise require human supervision.</abstract>
@@ -4876,11 +4876,11 @@
     </paper>
     <paper id="510">
       <author><first>Martine</first><last>Garnier-Rizet</last></author>
-      <author><first>Gilles</first><last>Adda</last></author>
+      <author id="gilles-adda"><first>Gilles</first><last>Adda</last></author>
       <author><first>Frederik</first><last>Cailliau</last></author>
       <author><first>Sylvie</first><last>Guillemin-Lanne</last></author>
       <author><first>Claire</first><last>Waast-Richard</last></author>
-      <author><first>Lori</first><last>Lamel</last></author>
+      <author id="lori-lamel"><first>Lori</first><last>Lamel</last></author>
       <author><first>Stephan</first><last>Vanni</last></author>
       <author><first>Claire</first><last>Waast-Richard</last></author>
       <title><fixed-case>C</fixed-case>all<fixed-case>S</fixed-case>urf: Automatic Transcription, Indexing and Structuration of Call Center Conversational Speech for Knowledge Extraction and Query by Content</title>
@@ -4889,7 +4889,7 @@
       <bibkey>garnier-rizet-etal-2008-callsurf</bibkey>
     </paper>
     <paper id="511">
-      <author><first>Djamel</first><last>Mostefa</last></author>
+      <author id="djamel-mostefa"><first>Djamel</first><last>Mostefa</last></author>
       <author><first>Arnaud</first><last>Vallee</last></author>
       <title>New Telephone Speech Databases for <fixed-case>F</fixed-case>rench: a Children Database and an optimized Adult Corpus</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/901_paper.pdf</url>
@@ -4916,7 +4916,7 @@
       <author><first>Tiit</first><last>Hennoste</last></author>
       <author><first>Olga</first><last>Gerassimenko</last></author>
       <author><first>Riina</first><last>Kasterpalu</last></author>
-      <author><first>Mare</first><last>Koit</last></author>
+      <author id="mare-koit"><first>Mare</first><last>Koit</last></author>
       <author><first>Andriela</first><last>Rääbis</last></author>
       <author><first>Krista</first><last>Strandson</last></author>
       <title>From Human Communication to Intelligent User Interfaces: Corpora of Spoken <fixed-case>E</fixed-case>stonian</title>
@@ -4942,11 +4942,11 @@
       <bibkey>brinckmann-etal-2008-german</bibkey>
     </paper>
     <paper id="517">
-      <author><first>Antonio</first><last>Bonafonte</last></author>
+      <author id="antonio-bonafonte"><first>Antonio</first><last>Bonafonte</last></author>
       <author><first>Jordi</first><last>Adell</last></author>
       <author><first>Ignasi</first><last>Esquerra</last></author>
       <author><first>Silvia</first><last>Gallego</last></author>
-      <author><first>Asunción</first><last>Moreno</last></author>
+      <author id="asuncion-moreno"><first>Asunción</first><last>Moreno</last></author>
       <author><first>Javier</first><last>Pérez</last></author>
       <title>Corpus and Voices for <fixed-case>C</fixed-case>atalan Speech Synthesis</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/835_paper.pdf</url>
@@ -4954,10 +4954,10 @@
       <bibkey>bonafonte-etal-2008-corpus</bibkey>
     </paper>
     <paper id="518">
-      <author><first>Martine</first><last>Adda-Decker</last></author>
+      <author id="martine-adda-decker"><first>Martine</first><last>Adda-Decker</last></author>
       <author><first>Thomas</first><last>Pellegrini</last></author>
-      <author><first>Eric</first><last>Bilinski</last></author>
-      <author><first>Gilles</first><last>Adda</last></author>
+      <author id="eric-bilinski"><first>Eric</first><last>Bilinski</last></author>
+      <author id="gilles-adda"><first>Gilles</first><last>Adda</last></author>
       <title>Developments of “Lëtzebuergesch” Resources for Automatic Speech Processing and Linguistic Studies</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/855_paper.pdf</url>
       <abstract>In the present contribution we start with an overview of the linguistic situation of Luxembourg. We then describe specificities of spoken and written Lëtzebuergesch, with respect to automatic speech processing. Multilingual code-switching and code-mixing, poor writing standardization as compared to languages such as English or French, a large diversity of spoken varieties, together with a limited written production of Lëtzebuergesch language contribute to pose many interesting challenges to automatic speech processing both for speech technologies and linguistic studies. Multilingual filtering has been investigated to sort out Luxembourgish from German and French. Word list coverage and language model perplexity results, using sibling resources collected from the Web, are presented. A phonemic inventory has been adopted for pronunciation dictionary development, a grapheme-phoneme tool has been developed and pronunciation research issues related to the multilingual context are highlighted. Results achieved in resource development allow to envision the realisation of an ASR system.</abstract>
@@ -4965,8 +4965,8 @@
     </paper>
     <paper id="519">
       <author><first>Rena</first><last>Nemoto</last></author>
-      <author><first>Ioana</first><last>Vasilescu</last></author>
-      <author><first>Martine</first><last>Adda-Decker</last></author>
+      <author id="ioana-vasilescu"><first>Ioana</first><last>Vasilescu</last></author>
+      <author id="martine-adda-decker"><first>Martine</first><last>Adda-Decker</last></author>
       <title>Speech Errors on Frequently Observed Homophones in <fixed-case>F</fixed-case>rench: Perceptual Evaluation vs Automatic Classification</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/554_paper.pdf</url>
       <abstract>The present contribution aims at increasing our understanding of automatic speech recognition (ASR) errors involving frequent homophone or almost homophone words by confronting them to perceptual results. The long-term aim is to improve acoustic modelling of these items to reduce automatic transcription errors. A first question of interest addressed in this paper is whether homophone words such as et (and); and est (to be), for which ASR systems rely on language model weights, can be discriminated in a perceptual transcription test with similar n-gram constraints. A second question concerns the acoustic separability of the two homophone words using appropriate acoustic and prosodic attributes. The perceptual test reveals that even though automatic and perceptual errors correlate positively, human listeners deal with local ambiguity more efficiently than the ASR system in conditions which attempt to approximate the information available for decision for a 4-gram language model. The corresponding acoustic analysis shows that the two homophone words may be distinguished thanks to some relevant acoustic and prosodic attributes. A first experiment in automatic classification of the two words using data mining techniques highlights the role of the prosodic (duration and voicing) and contextual information (pauses co-occurrence) in distinguishing the two words. Current results, even though preliminary, suggests that new levels of information, so far unexplored in pronunciations modelling for ASR, may be considered in order to efficiently factorize the word variants observed in speech and to improve the automatic speech transcription.</abstract>
@@ -5001,18 +5001,18 @@
       <bibkey>bazillon-etal-2008-manual</bibkey>
     </paper>
     <paper id="523">
-      <author><first>Antonio Moreno</first><last>Sandoval</last></author>
-      <author><first>Doroteo Torre</first><last>Toledano</last></author>
+      <author id="antonio-moreno-sandoval"><first>Antonio Moreno</first><last>Sandoval</last></author>
+      <author id="doroteo-t-toledano"><first>Doroteo Torre</first><last>Toledano</last></author>
       <author><first>Raúl</first><last>de la Torre</last></author>
-      <author><first>Marta</first><last>Garrote</last></author>
-      <author><first>José M.</first><last>Guirao</last></author>
+      <author id="marta-garrote-salazar"><first>Marta</first><last>Garrote</last></author>
+      <author id="jose-m-guirao"><first>José M.</first><last>Guirao</last></author>
       <title>Developing a Phonemic and Syllabic Frequency Inventory for Spontaneous Spoken Castilian <fixed-case>S</fixed-case>panish and their Comparison to Text-Based Inventories</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/283_paper.pdf</url>
       <abstract>In this paper we present our recent work to develop phonemic and syllabic inventories for Castilian Spanish based on the C-ORAL-ROM corpus, a spontaneous spoken resource with varying degrees of naturalness and in different communicative contexts. These inventories have been developed by means of a phonemic and syllabic automatic transcriptor whose output has been assessed by manually reviewing most of the transcriptions. The inventories include absolute frequencies of occurrence of the different phones and syllables. These frequencies have been contrasted against an inventory extracted from a comparable textual corpus, finding evidence that the available inventories, based mainly on text, do not provide an accurate description of spontaneously spoken Castilian Spanish.</abstract>
       <bibkey>sandoval-etal-2008-developing</bibkey>
     </paper>
     <paper id="524">
-      <author><first>Petr</first><last>Pollák</last></author>
+      <author id="petr-pollak"><first>Petr</first><last>Pollák</last></author>
       <author><first>Jan</first><last>Volín</last></author>
       <author><first>Radek</first><last>Skarnitzl</last></author>
       <title>Phone Segmentation Tool with Integrated Pronunciation Lexicon and <fixed-case>C</fixed-case>zech Phonetically Labelled Reference Database.</title>
@@ -5040,7 +5040,7 @@
     <paper id="527">
       <author><first>Jonathan</first><last>Chevelu</last></author>
       <author><first>Nelly</first><last>Barbot</last></author>
-      <author><first>Olivier</first><last>Boeffard</last></author>
+      <author id="olivier-boeffard"><first>Olivier</first><last>Boeffard</last></author>
       <author><first>Arnaud</first><last>Delhay</last></author>
       <title>Comparing Set-Covering Strategies for Optimal Corpus Design</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/750_paper.pdf</url>
@@ -5068,7 +5068,7 @@
     </paper>
     <paper id="530">
       <author><first>Erin</first><last>Fitzgerald</last></author>
-      <author><first>Frederick</first><last>Jelinek</last></author>
+      <author id="frederick-jelinek"><first>Frederick</first><last>Jelinek</last></author>
       <title>Linguistic Resources for Reconstructing Spontaneous Speech Text</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/874_paper.pdf</url>
       <abstract>The output of a speech recognition system is not always ideal for subsequent downstream processing, in part because speakers themselves often make mistakes. A system would accomplish speech reconstruction of its spontaneous speech input if its output were to represent, in flawless, fluent, and content-preserving English, the message that the speaker intended to convey. These cleaner speech transcripts would allow for more accurate language processing as needed for NLP tasks such as machine translation and conversation summarization, which often rely on grammatical input. Recognizing that supervised statistical methods to identify and transform ill-formed areas of the transcript will require richly labeled resources, we have built the Spontaneous Speech Reconstruction corpus. This small corpus of reconstructed and aligned conversational telephone speech transcriptions for the Fisher conversational telephone speech corpus (Strassel and Walker, 2004) was annotated on several levels including string transformations and predicate-argument structure, and will be shared with the linguistic research community.</abstract>
@@ -5091,7 +5091,7 @@
     </paper>
     <paper id="533">
       <author><first>Florian</first><last>Koehler</last></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <author><first>Michaela</first><last>Atterer</last></author>
       <title>A Question Answering System for <fixed-case>G</fixed-case>erman. Experiments with Morphological Linguistic Resources</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/24_paper.pdf</url>
@@ -5124,7 +5124,7 @@
       <bibkey>tsarfaty-goldberg-2008-word</bibkey>
     </paper>
     <paper id="537">
-      <author><first>Sonja</first><last>Bosch</last></author>
+      <author id="sonja-bosch"><first>Sonja</first><last>Bosch</last></author>
       <author><first>Laurette</first><last>Pretorius</last></author>
       <author><first>Kholisa</first><last>Podile</last></author>
       <author><first>Axel</first><last>Fleisch</last></author>
@@ -5145,7 +5145,7 @@
     <paper id="539">
       <author><first>Serge</first><last>Sharoff</last></author>
       <author><first>Mikhail</first><last>Kopotev</last></author>
-      <author><first>Tomaž</first><last>Erjavec</last></author>
+      <author id="tomaz-erjavec"><first>Tomaž</first><last>Erjavec</last></author>
       <author><first>Anna</first><last>Feldman</last></author>
       <author><first>Dagmar</first><last>Divjak</last></author>
       <title>Designing and Evaluating a <fixed-case>R</fixed-case>ussian Tagset</title>
@@ -5164,7 +5164,7 @@
     </paper>
     <paper id="541">
       <author><first>Nizar</first><last>Habash</last></author>
-      <author><first>Ryan</first><last>Roth</last></author>
+      <author id="ryan-roth"><first>Ryan</first><last>Roth</last></author>
       <title>Identification of Naturally Occurring Numerical Expressions in <fixed-case>A</fixed-case>rabic</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/843_paper.pdf</url>
       <abstract>In this paper, we define the task of Number Identification in natural context. We present and validate a language-independent semi-automatic approach to quickly building a gold standard for evaluating number identification systems by exploiting hand-aligned parallel data. We also present and extensively evaluate a robust rule-based system for number identification in natural context for Arabic for a variety of number formats and types. The system is shown to have strong performance, achieving, on a blind test, a 94.8% F-score for the task of correctly identifying number expression spans in natural text, and a 92.1% F-score for the task of correctly determining the core numerical value.</abstract>
@@ -5182,23 +5182,23 @@
     </paper>
     <paper id="543">
       <author><first>Mehrnoush</first><last>Shamsfard</last></author>
-      <author><first>Hakimeh</first><last>Fadaee</last></author>
+      <author id="hakimeh-fadaee"><first>Hakimeh</first><last>Fadaee</last></author>
       <title>A Hybrid Morphology-Based <fixed-case>POS</fixed-case> Tagger for <fixed-case>P</fixed-case>ersian</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/875_paper.pdf</url>
       <abstract>In many applications of natural language processing (NLP) grammatically tagged corpora are needed. Thus Part of Speech (POS) Tagging is of high importance in the domain of NLP. Many taggers are designed with different approaches to reach high performance and accuracy. These taggers usually deal with inter-word relations and they make use of lexicons. In this paper we present a new tagging algorithm with a hybrid approach. This algorithm combines the features of probabilistic and rule-based taggers to tag Persian unknown words. In contrast with many other tagging algorithms this algorithm deals with the internal structure of the words and it does not need any built in knowledge. The introduced tagging algorithm is domain independent because it uses morphological rules. In this algorithm POS tags are assigned to unknown word with a probability which shows the accuracy of the assigned POS tag. Although this tagger is proposed for Persian, it can be adapted to other languages by applying their morphological rules.</abstract>
       <bibkey>shamsfard-fadaee-2008-hybrid</bibkey>
     </paper>
     <paper id="544">
-      <author><first>Baskaran</first><last>Sankaran</last></author>
+      <author id="baskaran-sankaran"><first>Baskaran</first><last>Sankaran</last></author>
       <author><first>Kalika</first><last>Bali</last></author>
       <author><first>Monojit</first><last>Choudhury</last></author>
       <author><first>Tanmoy</first><last>Bhattacharya</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
-      <author><first>Girish Nath</first><last>Jha</last></author>
-      <author><first>S.</first><last>Rajendran</last></author>
-      <author><first>K.</first><last>Saravanan</last></author>
-      <author><first>L.</first><last>Sobha</last></author>
-      <author><first>K.V.</first><last>Subbarao</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="girish-nath-jha"><first>Girish Nath</first><last>Jha</last></author>
+      <author id="s-rajendran"><first>S.</first><last>Rajendran</last></author>
+      <author id="k-saravanan"><first>K.</first><last>Saravanan</last></author>
+      <author id="sobha-l"><first>L.</first><last>Sobha</last></author>
+      <author id="subbarao-k-v"><first>K.V.</first><last>Subbarao</last></author>
       <title>A Common Parts-of-Speech Tagset Framework for <fixed-case>I</fixed-case>ndian Languages</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/337_paper.pdf</url>
       <abstract>We present a universal Parts-of-Speech (POS) tagset framework covering most of the Indian languages (ILs) following the hierarchical and decomposable tagset schema. In spite of significant number of speakers, there is no workable POS tagset and tagger for most ILs, which serve as fundamental building blocks for NLP research. Existing IL POS tagsets are often designed for a specific language; the few that have been designed for multiple languages cover only shallow linguistic features ignoring linguistic richness and the idiosyncrasies. The new framework that is proposed here addresses these deficiencies in an efficient and principled manner. We follow a hierarchical schema similar to that of EAGLES and this enables the framework to be flexible enough to capture rich features of a language/ language family, even while capturing the shared linguistic structures in a methodical way. The proposed common framework further facilitates the sharing and reusability of scarce resources in these languages and ensures cross-linguistic compatibility.</abstract>
@@ -5206,7 +5206,7 @@
     </paper>
     <paper id="545">
       <author><first>Rajat</first><last>Mohanty</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <title>Lexical Resources for Semantics Extraction</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/619_paper.pdf</url>
       <abstract>In this paper, we report our work on the creation of a number of lexical resources that are crucial for an interlingua based MT from English to other languages. These lexical resources are in the form of sub-categorization frames, verb knowledge bases and rule templates for establishing semantic relations and speech act like attributes. We have created these resources over a long period of time from Oxford Advanced Learners Dictionary (OALD) [1], VerbNet [2], Princeton WordNet 2.1 [3], LCS database [4], Penn Tree Bank [5], and XTAG lexicon [6]. On the challenging problem of generating interlingua from domain and structure unrestricted English sentences, we are able to demonstrate that the use of these lexical resources makes a difference in terms of accuracy figures.</abstract>
@@ -5223,7 +5223,7 @@
     <paper id="547">
       <author><first>Ya-Min</first><last>Chou</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
-      <author><first>Jia-Fei</first><last>Hong</last></author>
+      <author id="jia-fei-hong"><first>Jia-Fei</first><last>Hong</last></author>
       <title>The Extended Architecture of Hantology for <fixed-case>J</fixed-case>apan Kanji</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/429_paper.pdf</url>
       <abstract>Chinese writing system is not only used by Chinese but also used by Japanese. The motivation of this paper is to extend the architecture of Hantology which describes the features of Chinese writing system to integrate Japan Kanji and Chinese characters into the same ontology. The problem is Chinese characters adopted by Japan have been changed, thus, the modification of the original architecture of Hantology is needed. A extended architecture consists orthographic, pronunciation, sense and derived lexicon dimensions. is proposed in this paper. The contribution of this study is that the extension architecture of Hantology provides a platform to analyze the variation of Chinese characters used in Japan. The analytic results of variation for a specific Kanji can be integrated into Hantology, so it is easier to study the variation of Chinese characters systematically</abstract>
@@ -5231,7 +5231,7 @@
     </paper>
     <paper id="548">
       <author><first>Petya</first><last>Osenova</last></author>
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <author><first>Eelco</first><last>Mossel</last></author>
       <title>Language Resources for Semantic Document Annotation and Crosslingual Retrieval</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/478_paper.pdf</url>
@@ -5241,7 +5241,7 @@
     <paper id="549">
       <author><first>Sanaz</first><last>Jabbari</last></author>
       <author><first>Ben</first><last>Allison</last></author>
-      <author><first>Louise</first><last>Guthrie</last></author>
+      <author id="louise-guthrie"><first>Louise</first><last>Guthrie</last></author>
       <title>Using a Probabilistic Model of Context to Detect Word Obfuscation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/560_paper.pdf</url>
       <abstract>This paper proposes a distributional model of word use and word meaning which is derived purely from a body of text, and then applies this model to determine whether certain words are used in or out of context. We suggest that we can view the contexts of words as multinomially distributed random variables. We illustrate how using this basic idea, we can formulate the problem of detecting whether or not a word is used in context as a likelihood ratio test. We also define a measure of semantic relatedness between a word and its context using the same model. We assume that words that typically appear together are related, and thus have similar probability distributions and that words used in an unusual way will have probability distributions which are dissimilar from those of their surrounding context. The relatedness of a word to its context is based on Kullback-Leibler divergence between probability distributions assigned to the constituent words in the given sentence. We employed our methods on a defense-oriented application where certain words are substituted with other words in an intercepted communication.</abstract>
@@ -5249,7 +5249,7 @@
     </paper>
     <paper id="550">
       <author><first>Sara</first><last>Tonelli</last></author>
-      <author><first>Emanuele</first><last>Pianta</last></author>
+      <author id="emanuele-pianta"><first>Emanuele</first><last>Pianta</last></author>
       <title>Frame Information Transfer from <fixed-case>E</fixed-case>nglish to <fixed-case>I</fixed-case>talian</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/567_paper.pdf</url>
       <abstract>We describe an automatic projection algorithm for transferring frame-semantic information from English to Italian texts as a first sep towards the creation of Italian FrameNet. Given an English text with frame information and its Italian translation, we project the annotation in four steps: first the Italian text is parsed, then English-Italian alignment is automatically carried out at word level, then we extract the semantic head for every annotated constituent on the English corpus side and finally we project annotation from English to Italian using aligned semantic heads as bridge. With our work, we point out typical features of the Italian language as regards frame-semantic annotation, in particular we describe peculiarities of Italian that at the moment make the projection task more difficult than in the above-mentioned examples. Besides, we created a gold standard with 987 manually annotated sentences to evaluate the algorithm.</abstract>
@@ -5257,7 +5257,7 @@
     </paper>
     <paper id="551">
       <author><first>Jordi</first><last>Carrera</last></author>
-      <author><first>Irene</first><last>Castellón</last></author>
+      <author id="irene-castellon"><first>Irene</first><last>Castellón</last></author>
       <author><first>Salvador</first><last>Climent</last></author>
       <author><first>Marta</first><last>Coll-Florit</last></author>
       <title>Towards <fixed-case>S</fixed-case>panish Verbs’ Selectional Preferences Automatic Acquisition: Semantic Annotation of the <fixed-case>S</fixed-case>en<fixed-case>S</fixed-case>em Corpus</title>
@@ -5267,26 +5267,26 @@
     </paper>
     <paper id="552">
       <author><first>Paula Cristina</first><last>Vaz</last></author>
-      <author><first>David Martins</first><last>de Matos</last></author>
-      <author><first>Nuno J.</first><last>Mamede</last></author>
+      <author id="david-martins-de-matos"><first>David Martins</first><last>de Matos</last></author>
+      <author id="nuno-mamede"><first>Nuno J.</first><last>Mamede</last></author>
       <title>Using Lexical Acquisition to Enrich a Predicate Argument Reusable Database</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/627_paper.pdf</url>
       <abstract>The work described in this paper aims to enrich the noun classifications of an existing database of lexical resources (de Matos and Ribeiro, 2004) adding missing information such as semantic relations. Relations are extracted from an annotated and manually corrected corpus. Semantic relations added to the database are retrieved from noun-appositive relations found in the corpus. The method uses clustering to generate labeled sets of words with hypernym relations between set label and set elements.</abstract>
       <bibkey>vaz-etal-2008-using</bibkey>
     </paper>
     <paper id="553">
-      <author><first>Chris</first><last>Reed</last></author>
+      <author id="chris-reed"><first>Chris</first><last>Reed</last></author>
       <author><first>Raquel Mochales</first><last>Palau</last></author>
       <author><first>Glenn</first><last>Rowe</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <title>Language Resources for Studying Argument</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/648_paper.pdf</url>
       <abstract>This paper describes the development of a written corpus of argumentative reasoning. Arguments in the corpus have been analysed using state of the art techniques from argumentation theory and have been marked up using an open, reusable markup language. A number of the key challenges enountered during the process are explored, and preliminary observations about features such as inter-coder reliability and corpus statistics are discussed. In addition, several examples are offered of how this kind of language resource can be used in linguistic, computational and philosophical research, and in particular, how the corpus has been used to initiate a programme investigating the automatic detection of argumentative structure.</abstract>
       <bibkey>reed-etal-2008-language</bibkey>
     </paper>
     <paper id="554">
-      <author><first>Cosmin</first><last>Bejan</last></author>
-      <author><first>Sanda</first><last>Harabagiu</last></author>
+      <author id="cosmin-adrian-bejan"><first>Cosmin</first><last>Bejan</last></author>
+      <author id="sanda-harabagiu"><first>Sanda</first><last>Harabagiu</last></author>
       <title>A Linguistic Resource for Discovering Event Structures and Resolving Event Coreference</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/734_paper.pdf</url>
       <abstract>In this paper, we present a linguistic resource that annotates event structures in texts. We consider an event structure as a collection of events that interact with each other in a given situation. We interpret the interactions between events as event relations. In this regard, we propose and annotate a set of six relations that best capture the concept of event structure. These relations are: subevent, reason, purpose, enablement, precedence and related. A document from this resource can encode multiple event structures and an event structure can be described across multiple documents. In order to unify event structures, we also annotate inter- and intra-document event coreference. Moreover, we provide methodologies for automatic discovery of event structures from texts. First, we group the events that constitute an event structure into event clusters and then, we use supervised learning frameworks to classify the relations that exist between events from the same cluster</abstract>
@@ -5308,7 +5308,7 @@
       <bibkey>ruimy-toral-2008-semantic</bibkey>
     </paper>
     <paper id="557">
-      <author><first>Riccardo</first><last>Del Gratta</last></author>
+      <author id="riccardo-del-gratta"><first>Riccardo</first><last>Del Gratta</last></author>
       <author><first>Nilda</first><last>Ruimy</last></author>
       <author><first>Antonio</first><last>Toral</last></author>
       <title>Simple-Clips ongoing research: more information with less data by implementing inheritance</title>
@@ -5364,7 +5364,7 @@
     </paper>
     <paper id="563">
       <author><first>Georgiana</first><last>Puşcaşu</last></author>
-      <author><first>Verginica Barbu</first><last>Mititelu</last></author>
+      <author id="verginica-barbu-mititelu"><first>Verginica Barbu</first><last>Mititelu</last></author>
       <title>Annotation of <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Verbs with <fixed-case>T</fixed-case>ime<fixed-case>ML</fixed-case> Event Classes</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/712_paper.pdf</url>
       <abstract>This paper reports on the annotation of all English verbs included in WordNet 2.0 with TimeML event classes. Two annotators assign each verb present in WordNet the most relevant event class capturing most of that verbs meanings. At the end of the annotation process, inter-annotator agreement is measured using kappa statistics, yielding a kappa value of 0.87. The cases of disagreement between the two independent annotations are clarified by obtaining a third, and in some cases, a fourth opinion, and finally each of the 11,306 WordNet verbs is mapped to a unique event class. The resulted annotation is then employed to automatically assign the corresponding class to each occurrence of a finite or non-finite verb in a given text. The evaluation performed on TimeBank reveals an F-measure of 86.43% achieved for the identification of verbal events, and an accuracy of 85.25% in the task of classifying them into TimeML event classes.</abstract>
@@ -5390,13 +5390,13 @@
       <author><first>Vincent</first><last>Vandeghinste</last></author>
       <author><first>Peter</first><last>Dirix</last></author>
       <author><first>Ineke</first><last>Schuurman</last></author>
-      <author><first>Stella</first><last>Markantonatou</last></author>
+      <author id="stella-markantonatou"><first>Stella</first><last>Markantonatou</last></author>
       <author><first>Sokratis</first><last>Sofianopoulos</last></author>
       <author><first>Marina</first><last>Vassiliou</last></author>
       <author><first>Olga</first><last>Yannoutsou</last></author>
       <author><first>Toni</first><last>Badia</last></author>
       <author><first>Maite</first><last>Melero</last></author>
-      <author><first>Gemma</first><last>Boleda</last></author>
+      <author id="gemma-boleda"><first>Gemma</first><last>Boleda</last></author>
       <author><first>Michael</first><last>Carl</last></author>
       <author><first>Paul</first><last>Schmidt</last></author>
       <title>Evaluation of a Machine Translation System for Low Resource Languages: <fixed-case>METIS</fixed-case>-<fixed-case>II</fixed-case></title>
@@ -5405,8 +5405,8 @@
       <bibkey>vandeghinste-etal-2008-evaluation</bibkey>
     </paper>
     <paper id="567">
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
-      <author><first>José A. R.</first><last>Fonollosa</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="jose-a-r-fonollosa"><first>José A. R.</first><last>Fonollosa</last></author>
       <author><first>Enric</first><last>Monte</last></author>
       <title>Using Reordering in Statistical Machine Translation based on Alignment Block Classification</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/444_paper.pdf</url>
@@ -5414,7 +5414,7 @@
       <bibkey>costa-jussa-etal-2008-using</bibkey>
     </paper>
     <paper id="568">
-      <author><first>Janne Bondi</first><last>Johannessen</last></author>
+      <author id="janne-bondi-johannessen"><first>Janne Bondi</first><last>Johannessen</last></author>
       <author><first>Torbjørn</first><last>Nordgård</last></author>
       <author><first>Lars</first><last>Nygaard</last></author>
       <title>Evaluation of Linguistics-Based Translation</title>
@@ -5444,7 +5444,7 @@
       <bibkey>ma-etal-2008-selection</bibkey>
     </paper>
     <paper id="571">
-      <author><first>Beáta</first><last>Megyesi</last></author>
+      <author id="beata-megyesi"><first>Beáta</first><last>Megyesi</last></author>
       <author><first>Bengt</first><last>Dahlqvist</last></author>
       <author><first>Eva</first><last>Pettersson</last></author>
       <author><first>Joakim</first><last>Nivre</last></author>
@@ -5483,14 +5483,14 @@
     <paper id="575">
       <author><first>Svitlana</first><last>Kurella</last></author>
       <author><first>Serge</first><last>Sharoff</last></author>
-      <author><first>Anthony</first><last>Hartley</last></author>
+      <author id="anthony-hartley"><first>Anthony</first><last>Hartley</last></author>
       <title>Corpus-Based Tools for Computer-Assisted Acquisition of Reading Abilities in Cognate Languages</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/479_paper.pdf</url>
       <abstract>This paper presents an approach to computer-assisted teaching of reading abilities using corpus data. The approach is supported by a set of tools for automatically selecting and classifying texts retrieved from the Internet. The approach is based on a linguistic model of textual cohesion which describes relations between larger textual units that go beyond the sentence level. We show that textual connectors that link such textual units reliably predict different types of texts, such as information and opinion: using only textual connectors as features, an SVM classifier achieves an F-score of between 0.85 and 0.93 for predicting these classes. The tools are used in our project on teaching reading skills in a cognate foreign language (L3) which is cognate to a known foreign language (L2).</abstract>
       <bibkey>kurella-etal-2008-corpus</bibkey>
     </paper>
     <paper id="576">
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <title>Synchronizing Translated Movie Subtitles</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/484_paper.pdf</url>
       <abstract>This paper addresses the problem of synchronizing movie subtitles, which is necessary to improve alignment quality when building a parallel corpus out of translated subtitles. In particular, synchronization is done on the basis of aligned anchor points. Previous studies have shown that cognate filters are useful for the identification of such points. However, this restricts the approach to related languages with similar alphabets. Here, we propose a dictionary-based approach using automatic word alignment. We can show an improvement in alignment quality even for related languages compared to the cognate-based approach.</abstract>
@@ -5506,7 +5506,7 @@
     </paper>
     <paper id="578">
       <author><first>Violaine</first><last>Prince</last></author>
-      <author><first>Jacques</first><last>Chauché</last></author>
+      <author id="jacques-chauche"><first>Jacques</first><last>Chauché</last></author>
       <title>Building a Bilingual Representation of the <fixed-case>R</fixed-case>oget Thesaurus for <fixed-case>F</fixed-case>rench to <fixed-case>E</fixed-case>nglish Machine Translation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/626_paper.pdf</url>
       <abstract>This paper describes a solution to lexical transfer as a trade-off between a dictionary and an ontology. It shows its association to a translation tool based on morpho-syntactical parsing of the source language. It is based on the English Roget Thesaurus and its equivalent, the French Larousse Thesaurus, in a computational framework. Both thesaurii are transformed into vector spaces, and all monolingual entries are represented as vectors, with 1,000 components for English and 873 for French. The indexing concepts of the respective thesaurii are the generation families of the vector spaces. A bilingual data structure transforms French entries into vectors in the English space, by using their equivalencies representations. Word sense disambiguation consists in choosing the appropriate vector among these bilingual vectors, by computing the contextualized vector of a given word in its source sentence, wading it in the English vector space, and computing the closest distance to the different entries in the bilingual data structure beginning with the same source string (i.e. French word). The process has been experimented on a 20,000 words extract of a French novel, Le Petit Prince, and lexical transfer results were found quite encouraging with a recall of 71% and a precision of 86%.</abstract>
@@ -5514,7 +5514,7 @@
     </paper>
     <paper id="579">
       <author><first>Luka</first><last>Nerima</last></author>
-      <author><first>Eric</first><last>Wehrli</last></author>
+      <author id="eric-wehrli"><first>Eric</first><last>Wehrli</last></author>
       <title>Generating Bilingual Dictionaries by Transitivity</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/641_paper.pdf</url>
       <abstract>Recently the LATL has undertaken the development of a multilingual translation system based on a symbolic parsing technology and on a transfer-based translation model. A crucial component of the system is the lexical database, notably the bilingual dictionaries containing the information for the lexical transfer from one language to another. As the number of necessary bilingual dictionaries is a quadratic function of the number of languages considered, we will face the problem of getting a large number of dictionaries. In this paper we discuss a solution to derive a bilingual dictionary by transitivity using existing ones and to check the generated translations in a parallel corpus. Our first experiments concerns the generation of two bilingual dictionaries and the quality of the entries are very promising. The number of generated entries could however be improved and we conclude the paper with the possible ways we plan to explore.</abstract>
@@ -5531,14 +5531,14 @@
     </paper>
     <paper id="581">
       <author><first>Christian</first><last>Monson</last></author>
-      <author><first>Ariadna</first><last>Font Llitjós</last></author>
+      <author id="ariadna-font-llitjos"><first>Ariadna</first><last>Font Llitjós</last></author>
       <author><first>Vamshi</first><last>Ambati</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <author><first>Alison</first><last>Alvarez</last></author>
       <author><first>Roberto</first><last>Aranovich</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
-      <author><first>Robert</first><last>Frederking</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
+      <author id="robert-frederking"><first>Robert</first><last>Frederking</last></author>
       <author><first>Erik</first><last>Peterson</last></author>
       <author><first>Katharina</first><last>Probst</last></author>
       <title>Linguistic Structure and Bilingual Informants Help Induce Machine Translation of Lesser-Resourced Languages</title>
@@ -5549,7 +5549,7 @@
     <paper id="582">
       <author><first>Kazuaki</first><last>Maeda</last></author>
       <author><first>Xiaoyi</first><last>Ma</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <title>Creating Sentence-Aligned Parallel Text Corpora from a Large Archive of Potential Parallel Text using <fixed-case>BITS</fixed-case> and Champollion</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/779_paper.pdf</url>
       <abstract>Parallel text is one of the most valuable resources for development of statistical machine translation systems and other NLP applications. The Linguistic Data Consortium (LDC) has supported research on statistical machine translations and other NLP applications by creating and distributing a large amount of parallel text resources for the research communities. However, manual translations are very costly, and the number of known providers that offer complete parallel text is limited. This paper presents a cost effective approach to identify parallel document pairs from sources that provide potential parallel text - namely, sources that may contain whole or partial translations of documents in the source language - using the BITS and Champollion parallel text alignment systems developed by LDC.</abstract>
@@ -5570,7 +5570,7 @@
       <author><first>Wolodja</first><last>Wentland</last></author>
       <author><first>Johannes</first><last>Knopp</last></author>
       <author><first>Carina</first><last>Silberer</last></author>
-      <author><first>Matthias</first><last>Hartung</last></author>
+      <author id="matthias-hartung"><first>Matthias</first><last>Hartung</last></author>
       <title>Building a Multilingual Lexical Resource for Named Entity Disambiguation, Translation and Transliteration</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/816_paper.pdf</url>
       <abstract>In this paper, we present HeiNER, the multilingual Heidelberg Named Entity Resource. HeiNER contains 1,547,586 disambiguated English Named Entities together with translations and transliterations to 15 languages. Our work builds on the approach described in (Bunescu and Pasca, 2006), yet extends it to a multilingual dimension. Translating Named Entities into the various target languages is carried out by exploiting crosslingual information contained in the online encyclopedia Wikipedia. In addition, HeiNER provides linguistic contexts for every NE in all target languages which makes it a valuable resource for multilingual Named Entity Recognition, Disambiguation and Classification. The results of our evaluation against the assessments of human annotators yield a high precision of 0.95 for the NEs we extract from the English Wikipedia. These source language NEs are thus very reliable seeds for our multilingual NE translation method.</abstract>
@@ -5585,17 +5585,17 @@
     </paper>
     <paper id="586">
       <author><first>Todor</first><last>Arnaudov</last></author>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <title>Smarty - Extendable Framework for Bilingual and Multilingual Comprehension Assistants</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/826_paper.pdf</url>
       <abstract>This paper discusses a framework for development of bilingual and multilingual comprehension assistants and presents a prototype implementation of an English-Bulgarian comprehension assistant. The framework is based on the application of advanced graphical user interface techniques, WordNet and compatible lexical databases as well as a series of NLP preprocessing tasks, including POS-tagging, lemmatisation, multiword expressions recognition and word sense disambiguation. The aim of this framework is to speed up the process of dictionary look-up, to offer enhanced look-up functionalities and to perform a context-sensitive narrowing-down of the set of translation alternatives proposed to the user.</abstract>
       <bibkey>arnaudov-mitkov-2008-smarty</bibkey>
     </paper>
     <paper id="587">
-      <author><first>Péter</first><last>Halácsy</last></author>
+      <author id="peter-halacsy"><first>Péter</first><last>Halácsy</last></author>
       <author><first>András</first><last>Kornai</last></author>
       <author><first>Péter</first><last>Németh</last></author>
-      <author><first>Dániel</first><last>Varga</last></author>
+      <author id="daniel-varga"><first>Dániel</first><last>Varga</last></author>
       <title>Parallel Creation of <fixed-case>G</fixed-case>igaword Corpora for Medium Density Languages - an Interim Report</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/858_paper.pdf</url>
       <abstract>For increased speed in developing gigaword language resources for medium resource density languages we integrated several FOSS tools in the HUN* toolkit. While the speed and efficiency of the resulting pipeline has surpassed our expectations, our experience in developing LDC-style resource packages for Uzbek and Kurdish makes clear that neither the data collection nor the subsequent processing stages can be fully automated.</abstract>
@@ -5604,14 +5604,14 @@
     <paper id="588">
       <author><first>Reginald</first><last>Hobbs</last></author>
       <author><first>Jamal</first><last>Laoudi</last></author>
-      <author><first>Clare</first><last>Voss</last></author>
+      <author id="clare-voss"><first>Clare</first><last>Voss</last></author>
       <title><fixed-case>MT</fixed-case>riage: Web-enabled Software for the Creation, Machine Translation, and Annotation of Smart Documents</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/844_paper.pdf</url>
       <abstract>Progress in the Machine Translation (MT) research community, particularly for statistical approaches, is intensely data-driven. Acquiring source language documents for testing, creating training datasets for customized MT lexicons, and building parallel corpora for MT evaluation require translators and non-native speaking analysts to handle large document collections. These collections are further complicated by differences in format, encoding, source media, and access to metadata describing the documents. Automated tools that allow language professionals to quickly annotate, translate, and evaluate foreign language documents are essential to improving MT quality and efficacy. The purpose of this paper is present our research approach to improving MT through pre-processing source language documents. In particular, we will discuss the development and use of MTriage, an application environment that enables the translator to markup documents with metadata for MT parameterization and routing. The use of MTriage as a web-enabled front end to multiple MT engines has leveraged the capabilities of our human translators for creating lexicons from NFW (Not-Found-Word) lists, writing reference translations, and creating parallel corpora for MT development and evaluation.</abstract>
       <bibkey>hobbs-etal-2008-mtriage</bibkey>
     </paper>
     <paper id="589">
-      <author><first>Clare</first><last>Voss</last></author>
+      <author id="clare-voss"><first>Clare</first><last>Voss</last></author>
       <author><first>Jamal</first><last>Laoudi</last></author>
       <author><first>Jeffrey</first><last>Micher</last></author>
       <title>Exploitation of an <fixed-case>A</fixed-case>rabic Language Resource for Machine Translation Evaluation: using <fixed-case>B</fixed-case>uckwalter-based Lookup Tool to Augment <fixed-case>CMU</fixed-case> Alignment Algorithm</title>
@@ -5653,7 +5653,7 @@
       <bibkey>santos-etal-2008-whats</bibkey>
     </paper>
     <paper id="594">
-      <author><first>Beata</first><last>Trawiński</last></author>
+      <author id="beata-trawinski"><first>Beata</first><last>Trawiński</last></author>
       <author><first>Jan-Philipp</first><last>Soehn</last></author>
       <title>A Multilingual Database of Polarity Items</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/77_paper.pdf</url>
@@ -5662,7 +5662,7 @@
     </paper>
     <paper id="595">
       <author><first>Ernesto William</first><last>De Luca</last></author>
-      <author><first>Birte</first><last>Lönneker-Rodman</last></author>
+      <author id="birte-lonneker"><first>Birte</first><last>Lönneker-Rodman</last></author>
       <title>Integrating Metaphor Information into <fixed-case>RDF</fixed-case>/<fixed-case>OWL</fixed-case> <fixed-case>E</fixed-case>uro<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/108_paper.pdf</url>
       <abstract>In this paper, we discuss the integration of metaphor information into the RDF/OWL representation of EuroWordNet. First, the lexical database WordNet and its variants are presented. After a brief description of the Hamburg Metaphor Database, examples of its conversion into the RDF/OWL representation of EuroWordNet are discussed. The metaphor information is added to the general EuroWordNet data and the new resulting RDF/OWL structure is shown in LexiRes, a visualization tool developed and adapted for handling structures of ontological and lexical databases. We show how LexiRes can be used to further edit the newly added metaphor information, and explain some problems with this new type of information on the basis of examples.</abstract>
@@ -5678,8 +5678,8 @@
     </paper>
     <paper id="597">
       <author><first>Juan</first><last>Aparicio</last></author>
-      <author><first>Mariona</first><last>Taulé</last></author>
-      <author><first>M. Antònia</first><last>Martí</last></author>
+      <author id="mariona-taule"><first>Mariona</first><last>Taulé</last></author>
+      <author id="m-antonia-marti"><first>M. Antònia</first><last>Martí</last></author>
       <title><fixed-case>A</fixed-case>n<fixed-case>C</fixed-case>ora-Verb: A Lexical Resource for the Semantic Annotation of Corpora</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/203_paper.pdf</url>
       <abstract>In this paper we present two large-scale verbal lexicons, AnCora-Verb-Ca for Catalan and AnCora-Verb-Es for Spanish, which are the basis for the semantic annotation with arguments and thematic roles of AnCora corpora. In AnCora-Verb lexicons, the mapping between syntactic functions, arguments and thematic roles of each verbal predicate it is established taking into account the verbal semantic class and the diatheses alternations in which the predicate can participate. Each verbal predicate is related to one or more semantic classes basically differentiated according to the four event classes -accomplishments, achievements, states and activities-, and on the diatheses alternations in which a verb can occur. AnCora-Verb-Es contains a total of 1,965 different verbs corresponding to 3,671 senses and AnCora-Verb-Ca contains 2,151 verbs and 4,513 senses. These figures correspond to the total of 500,000 words contained in each corpus, AnCora-Ca and AnCora-Es. The lexicons and the annotated corpora constitute the richest linguistic resources of this kind freely available for Spanish and Catalan. The big amount of linguistic information contained in both resources should be of great interest for computational applications and linguistic studies. Currently, a consulting interface for these lexicons is available at (<url>http://clic.ub.edu/ancora/</url>).</abstract>
@@ -5709,9 +5709,9 @@
       <bibkey>furstenau-2008-enriching</bibkey>
     </paper>
     <paper id="601">
-      <author><first>Bento Carlos</first><last>Dias-da-Silva</last></author>
-      <author><first>Ariani</first><last>Di Felippo</last></author>
-      <author><first>Maria</first><last>das Graças Volpe Nunes</last></author>
+      <author id="bento-carlos-dias-da-silva"><first>Bento Carlos</first><last>Dias-da-Silva</last></author>
+      <author id="ariani-di-felippo"><first>Ariani</first><last>Di Felippo</last></author>
+      <author id="maria-das-gracas-volpe-nunes"><first>Maria</first><last>das Graças Volpe Nunes</last></author>
       <title>The Automatic Mapping of <fixed-case>P</fixed-case>rinceton <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Lexical-Conceptual Relations onto the <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Database</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/391_paper.pdf</url>
       <abstract>Princeton WordNet (WN.Pr) lexical database has motivated efficient compilations of bulky relational lexicons since its inception in the 1980´s. The EuroWordNet project, the first multilingual initiative built upon WN.Pr, opened up ways of building individual wordnets, and inter-relating them by means of the so-called Inter-Lingual-Index, an unstructured list of the WN.Pr synsets. Other important initiative, relying on a slightly different method of building multilingual wordnets, is the MultiWordNet project, where the key strategy is building language specific wordnets keeping as much as possible of the semantic relations available in the WN.Pr. This paper, in particular, stresses that the additional advantage of using WN.Pr lexical database as a resource for building wordnets for other languages is to explore possibilities of implementing an automatic procedure to map the WN.Pr conceptual relations as hyponymy, co-hyponymy, troponymy, meronymy, cause, and entailment onto the lexical database of the wordnet under construction, a viable possibility, for those are language-independent relations that hold between lexicalized concepts, not between lexical units. Accordingly, combining methods from both initiatives, this paper presents the ongoing implementation of the WN.Br lexical database and the aforementioned automation procedure illustrated with a sample of the automatic encoding of the hyponymy and co-hyponymy relations.</abstract>
@@ -5727,7 +5727,7 @@
     <paper id="603">
       <author><first>Evi</first><last>Marzelou</last></author>
       <author><first>Maria</first><last>Zourari</last></author>
-      <author><first>Voula</first><last>Giouli</last></author>
+      <author id="voula-giouli"><first>Voula</first><last>Giouli</last></author>
       <author><first>Stelios</first><last>Piperidis</last></author>
       <title>Building a <fixed-case>G</fixed-case>reek corpus for Textual Entailment</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/427_paper.pdf</url>
@@ -5754,7 +5754,7 @@
     </paper>
     <paper id="606">
       <author><first>Davide</first><last>Picca</last></author>
-      <author><first>Alfio Massimiliano</first><last>Gliozzo</last></author>
+      <author id="alfio-gliozzo"><first>Alfio Massimiliano</first><last>Gliozzo</last></author>
       <author><first>Massimiliano</first><last>Ciaramita</last></author>
       <title>Supersense Tagger for <fixed-case>I</fixed-case>talian</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/599_paper.pdf</url>
@@ -5762,7 +5762,7 @@
       <bibkey>picca-etal-2008-supersense</bibkey>
     </paper>
     <paper id="607">
-      <author><first>Maria Teresa</first><last>Pazienza</last></author>
+      <author id="maria-teresa-pazienza"><first>Maria Teresa</first><last>Pazienza</last></author>
       <author><first>Armando</first><last>Stellato</last></author>
       <title>Clustering of Terms from Translation Dictionaries and Synonyms Lists to Automatically Build more Structured Linguistic Resources</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/629_paper.pdf</url>
@@ -5782,7 +5782,7 @@
       <author><first>Attila</first><last>Almási</last></author>
       <author><first>Dóra</first><last>Szauter</last></author>
       <author><first>Róbert</first><last>Ormándi</last></author>
-      <author><first>Richárd</first><last>Farkas</last></author>
+      <author id="richard-farkas"><first>Richárd</first><last>Farkas</last></author>
       <author><first>Csaba</first><last>Hatvani</last></author>
       <author><first>János</first><last>Csirik</last></author>
       <title><fixed-case>H</fixed-case>ungarian Word-Sense Disambiguated Corpus</title>
@@ -5791,7 +5791,7 @@
       <bibkey>vincze-etal-2008-hungarian</bibkey>
     </paper>
     <paper id="610">
-      <author><first>Olga N.</first><last>Lashevskaja</last></author>
+      <author id="olga-n-lashevskaja"><first>Olga N.</first><last>Lashevskaja</last></author>
       <author><first>Olga Yu.</first><last>Shemanaeva</last></author>
       <title>Semantic Annotation Layer in <fixed-case>R</fixed-case>ussian National Corpus: Lexical Classes of Nouns and Adjectives</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/849_paper.pdf</url>
@@ -5799,9 +5799,9 @@
       <bibkey>lashevskaja-shemanaeva-2008-semantic</bibkey>
     </paper>
     <paper id="611">
-      <author><first>Mohamed</first><last>Attia</last></author>
-      <author><first>Mohsen</first><last>Rashwan</last></author>
-      <author><first>Ahmed</first><last>Ragheb</last></author>
+      <author id="mohamed-attia"><first>Mohamed</first><last>Attia</last></author>
+      <author id="mohsen-rashwan"><first>Mohsen</first><last>Rashwan</last></author>
+      <author id="ahmed-ragheb"><first>Ahmed</first><last>Ragheb</last></author>
       <author><first>Mohamed</first><last>Al-Badrashiny</last></author>
       <author><first>Husein</first><last>Al-Basoumy</last></author>
       <title>A Compact <fixed-case>A</fixed-case>rabic Lexical Semantics Language Resource Based on the Theory of Semantic Fields</title>
@@ -5811,7 +5811,7 @@
     </paper>
     <paper id="612">
       <author><first>Doaa</first><last>Samy</last></author>
-      <author><first>Ana</first><last>González-Ledesma</last></author>
+      <author id="ana-gonzalez-ledesma"><first>Ana</first><last>González-Ledesma</last></author>
       <title>Pragmatic Annotation of Discourse Markers in a Multilingual Parallel Corpus (<fixed-case>A</fixed-case>rabic- <fixed-case>S</fixed-case>panish-<fixed-case>E</fixed-case>nglish)</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/828_paper.pdf</url>
       <abstract>Discourse structure and coherence relations are one of the main inferential challenges addressed by computational pragmatics. The present study focuses on discourse markers as key elements in guiding the inferences of the statements in natural language. Through a rule-based approach for the automatic identification, classification and annotation of the discourse markers in a multilingual parallel corpus (Arabic-Spanish-English), this research provides a valuable resource for the community. Two main aspects define the novelty of the present study. First, it offers a multilingual computational processing of discourse markers, grounded on a theoritical framework and implemented in a XML tagging scheme. The XML scheme represents a set of pragmatic and grammatical attributes, considered as basic features for the different kinds of discourse markers. Besides, the scheme provides a typology of discourse markers based on their discursive functions including hypothesis, co-argumentation, cause, consequence, concession, generalization, topicalization, reformulation, enumeration, synthesis, etc. Second, Arabic language is addressed from a computational pragmatic perspective where the identification, classification and annotation processes are carried out using the information provided from the tagging of Spanish discourse markers and the alignments.</abstract>
@@ -5830,7 +5830,7 @@
     </paper>
     <paper id="614">
       <author><first>Jonas</first><last>Sjöbergh</last></author>
-      <author><first>Kenji</first><last>Araki</last></author>
+      <author id="kenji-araki"><first>Kenji</first><last>Araki</last></author>
       <title>A Multi-Lingual Dictionary of Dirty Words</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/133_paper.pdf</url>
       <abstract>We present a multi-lingual dictionary of dirty words. We have collected about 3,200 dirty words in several languages and built a database of these. The language with the most words in the database is English, though there are several hundred dirty words in for instance Japanese too. Words are classified into their general meaning, such as what part of the human anatomy they refer to. Words can also be assigned a nuance label to indicate if it is a cute word used when speaking to children, a very rude word, a clinical word etc. The database is available online and will hopefully be enlarged over time. It has already been used in research on for instance automatic joke generation and emotion detection.</abstract>
@@ -5838,7 +5838,7 @@
     </paper>
     <paper id="615">
       <author><first>Jonas</first><last>Sjöbergh</last></author>
-      <author><first>Kenji</first><last>Araki</last></author>
+      <author id="kenji-araki"><first>Kenji</first><last>Araki</last></author>
       <title>What is poorly Said is a Little Funny</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/79_paper.pdf</url>
       <abstract>We implement several different methods for generating jokes in English. The common theme is to intentionally produce poor utterances by breaking Grices maxims of conversation. The generated jokes are evaluated and compared to human made jokes. They are in general quite weak jokes, though there are a few high scoring jokes and many jokes that score higher than the most boring human joke.</abstract>
@@ -5854,7 +5854,7 @@
     <paper id="617">
       <author><first>Ruifeng</first><last>Xu</last></author>
       <author><first>Yunqing</first><last>Xia</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <author><first>Wenjie</first><last>Li</last></author>
       <title>Opinion Annotation in On-line <fixed-case>C</fixed-case>hinese Product Reviews</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/415_paper.pdf</url>
diff --git a/data/xml/L10.xml b/data/xml/L10.xml
index 625018ca29..98918c3e10 100644
--- a/data/xml/L10.xml
+++ b/data/xml/L10.xml
@@ -3,14 +3,14 @@
   <volume id="1" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Seventh International Conference on Language Resources and Evaluation (<fixed-case>LREC</fixed-case>'10)</booktitle>
-      <editor><first>Nicoletta</first><last>Calzolari</last></editor>
-      <editor><first>Khalid</first><last>Choukri</last></editor>
-      <editor><first>Bente</first><last>Maegaard</last></editor>
-      <editor><first>Joseph</first><last>Mariani</last></editor>
-      <editor><first>Jan</first><last>Odijk</last></editor>
-      <editor><first>Stelios</first><last>Piperidis</last></editor>
-      <editor><first>Mike</first><last>Rosner</last></editor>
-      <editor><first>Daniel</first><last>Tapias</last></editor>
+      <editor id="nicoletta-calzolari"><first>Nicoletta</first><last>Calzolari</last></editor>
+      <editor id="khalid-choukri"><first>Khalid</first><last>Choukri</last></editor>
+      <editor id="bente-maegaard"><first>Bente</first><last>Maegaard</last></editor>
+      <editor id="joseph-mariani"><first>Joseph</first><last>Mariani</last></editor>
+      <editor id="jan-odijk"><first>Jan</first><last>Odijk</last></editor>
+      <editor id="stelios-piperidis"><first>Stelios</first><last>Piperidis</last></editor>
+      <editor id="michael-rosner"><first>Mike</first><last>Rosner</last></editor>
+      <editor id="daniel-tapias"><first>Daniel</first><last>Tapias</last></editor>
       <publisher>European Language Resources Association (ELRA)</publisher>
       <address>Valletta, Malta</address>
       <month>May</month>
@@ -30,11 +30,11 @@
       <bibkey>dalianis-etal-2010-creating</bibkey>
     </paper>
     <paper id="2">
-      <author><first>Lluís</first><last>Padró</last></author>
+      <author id="lluis-padro"><first>Lluís</first><last>Padró</last></author>
       <author><first>Miquel</first><last>Collado</last></author>
       <author><first>Samuel</first><last>Reese</last></author>
       <author><first>Marina</first><last>Lloberes</last></author>
-      <author><first>Irene</first><last>Castellón</last></author>
+      <author id="irene-castellon"><first>Irene</first><last>Castellón</last></author>
       <title><fixed-case>F</fixed-case>ree<fixed-case>L</fixed-case>ing 2.1: Five Years of Open-source Language Processing Tools</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/14_Paper.pdf</url>
       <abstract>FreeLing is an open-source multilingual language processing library providing a wide range of language analyzers for several languages. It offers text processing and language annotation facilities to natural language processing application developers, simplifying the task of building those applications. FreeLing is customizable and extensible. Developers can use the default linguistic resources (dictionaries, lexicons, grammars, etc.) directly, or extend them, adapt them to specific domains, or even develop new ones for specific languages. This paper overviews the recent history of this tool, summarizes the improvements and extensions incorporated in the latest version, and depicts the architecture of the library. Special focus is brought to the fact and consequences of the library being open-source: After five years and over 35,000 downloads, a growing user community has extended the initial threelanguages (English, Spanish and Catalan) to eight (adding Galician, Italian, Welsh, Portuguese, and Asturian), proving that the collaborative open model is a productive approach for the development of NLP tools and resources.</abstract>
@@ -66,8 +66,8 @@
     </paper>
     <paper id="6">
       <author><first>Roberto</first><last>Navigli</last></author>
-      <author><first>Paola</first><last>Velardi</last></author>
-      <author><first>Juana Maria</first><last>Ruiz-Martínez</last></author>
+      <author id="paola-velardi"><first>Paola</first><last>Velardi</last></author>
+      <author id="juana-maria-ruiz-martinez"><first>Juana Maria</first><last>Ruiz-Martínez</last></author>
       <title>An Annotated Dataset for Extracting Definitions and Hypernyms from the Web</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/20_Paper.pdf</url>
       <abstract>This paper presents and analyzes an annotated corpus of definitions, created to train an algorithm for the automatic extraction of definitions and hypernyms from web documents. As an additional resource, we also include a corpus of non-definitions with syntactic patterns similar to those of definition sentences, e.g.: ""An android is a robot"" vs. ""Snowcap is unmistakable"". Domain and style independence is obtained thanks to the annotation of a large and domain-balanced corpus and to a novel pattern generalization algorithm based on word-class lattices (WCL). A lattice is a directed acyclic graph (DAG), a subclass of nondeterministic finite state automata (NFA). The lattice structure has the purpose of preserving the salient differences among distinct sequences, while eliminating redundant information. The WCL algorithm will be integrated into an improved version of the GlossExtractor Web application (Velardi et al., 2008). This paper is mostly concerned with a description of the corpus, the annotation strategy, and a linguistic analysis of the data. A summary of the WCL algorithm is also provided for the sake of completeness.</abstract>
@@ -82,8 +82,8 @@
       <bibkey>khokhlova-zakharov-2010-studying</bibkey>
     </paper>
     <paper id="8">
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
-      <author><first>José A. R.</first><last>Fonollosa</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="jose-a-r-fonollosa"><first>José A. R.</first><last>Fonollosa</last></author>
       <title>Using Linear Interpolation and Weighted Reordering Hypotheses in the <fixed-case>M</fixed-case>oses System</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/23_Paper.pdf</url>
       <abstract>This paper proposes to introduce a novel reordering model in the open-source Moses toolkit. The main idea is to provide weighted reordering hypotheses to the SMT decoder. These hypotheses are built using a first-step Ngram-based SMT translation from a source language into a third representation that is called reordered source language. Each hypothesis has its own weight provided by the Ngram-based decoder. This proposed reordering technique offers a better and more efficient translation when compared to both the distance-based and the lexicalized reordering. In addition to this reordering approach, this paper describes a domain adaptation technique which is based on a linear combination of an specific in-domain and an extra out-domain translation models. Results for both approaches are reported in the Arabic-to-English 2008 IWSLT task. When implementing the weighted reordering hypotheses and the domain adaptation technique in the final translation system, translation results reach improvements up to 2.5 BLEU compared to a standard state-of-the-art Moses baseline system.</abstract>
@@ -98,7 +98,7 @@
     </paper>
     <paper id="10">
       <author><first>Antoinette</first><last>Hawayek</last></author>
-      <author><first>Riccardo</first><last>Del Gratta</last></author>
+      <author id="riccardo-del-gratta"><first>Riccardo</first><last>Del Gratta</last></author>
       <author><first>Giuseppe</first><last>Cappelli</last></author>
       <title>A Bilingual Dictionary <fixed-case>M</fixed-case>exican <fixed-case>S</fixed-case>ign <fixed-case>L</fixed-case>anguage-<fixed-case>S</fixed-case>panish/<fixed-case>S</fixed-case>panish-<fixed-case>M</fixed-case>exican <fixed-case>S</fixed-case>ign <fixed-case>L</fixed-case>anguage</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/27_Paper.pdf</url>
@@ -115,7 +115,7 @@
       <bibkey>sharoff-etal-2010-web</bibkey>
     </paper>
     <paper id="12">
-      <author><first>Hans-Ulrich</first><last>Krieger</last></author>
+      <author id="hans-ulrich-krieger"><first>Hans-Ulrich</first><last>Krieger</last></author>
       <title>A General Methodology for Equipping Ontologies with Time</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/29_Paper.pdf</url>
       <abstract>In the first part of this paper, we present a framework for enriching arbitrary upper or domain-specific ontologies with a concept of time. To do so, we need the notion of a time slice. Contrary to other approaches, we directly interpret the original entities as time slices in order to (i) avoid a duplication of the original ontology and (ii) to prevent a knowledge engineer from ontology rewriting. The diachronic representation of time is complemented by a sophisticated time ontology that supports underspecification and an arbitrarily fine granularity of time. As a showcase, we describe how the time ontology has been interfaced with the PROTON upper ontology. The second part investigates a temporal extension of RDF that replaces the usual triple notation by a more general tuple representation. In this setting, Hayes/ter Horst-like entailment rules are replaced by their temporal counterparts. Our motivation to move towards this direction is twofold: firstly, extending binary relation instances with time leads to a massive proliferation of useless objects (independently of the encoding); secondly, reasoning and querying with such extended relations is extremely complex, expensive, and error-prone.</abstract>
@@ -124,20 +124,20 @@
     <paper id="13">
       <author><first>Ting</first><last>Qian</last></author>
       <author><first>Kristy</first><last>Hollingshead</last></author>
-      <author><first>Su-youn</first><last>Yoon</last></author>
-      <author><first>Kyoung-young</first><last>Kim</last></author>
-      <author><first>Richard</first><last>Sproat</last></author>
+      <author id="su-youn-yoon"><first>Su-youn</first><last>Yoon</last></author>
+      <author id="kyoung-young-kim"><first>Kyoung-young</first><last>Kim</last></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last></author>
       <title>A Python Toolkit for Universal Transliteration</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/30_Paper.pdf</url>
       <abstract>We describe ScriptTranscriber, an open source toolkit for extracting transliterations in comparable corpora from languages written in different scripts. The system includes various methods for extracting potential terms of interest from raw text, for providing guesses on the pronunciations of terms, and for comparing two strings as possible transliterations using both phonetic and temporal measures. The system works with any script in the Unicode Basic Multilingual Plane and is easily extended to include new modules. Given comparable corpora, such as newswire text, in a pair of languages that use different scripts, ScriptTranscriber provides an easy way to mine transliterations from the comparable texts. This is particularly useful for underresourced languages, where training data for transliteration may be lacking, and where it is thus hard to train good transliterators. ScriptTranscriber provides an open source package that allows for ready incorporation of more sophisticated modules ― e.g. a trained transliteration model for a particular language pair. ScriptTranscriber is available as part of the nltk contrib source tree at <url>http://code.google.com/p/nltk/</url>.</abstract>
       <bibkey>qian-etal-2010-python</bibkey>
     </paper>
     <paper id="14">
-      <author><first>K. Bretonnel</first><last>Cohen</last></author>
-      <author><first>Christophe</first><last>Roeder</last></author>
-      <author><first>William A.</first><last>Baumgartner Jr.</last></author>
-      <author><first>Lawrence E.</first><last>Hunter</last></author>
-      <author><first>Karin</first><last>Verspoor</last></author>
+      <author id="k-bretonnel-cohen"><first>K. Bretonnel</first><last>Cohen</last></author>
+      <author id="christophe-roeder"><first>Christophe</first><last>Roeder</last></author>
+      <author id="william-a-baumgartner-jr"><first>William A.</first><last>Baumgartner Jr.</last></author>
+      <author id="lawrence-hunter"><first>Lawrence E.</first><last>Hunter</last></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last></author>
       <title>Test Suite Design for Biomedical Ontology Concept Recognition Systems</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/31_Paper.pdf</url>
       <abstract>Systems that locate mentions of concepts from ontologies in free text are known as ontology concept recognition systems. This paper describes an approach to the evaluation of the workings of ontology concept recognition systems through use of a structured test suite and presents a publicly available test suite for this purpose. It is built using the principles of descriptive linguistic fieldwork and of software testing. More broadly, we also seek to investigate what general principles might inform the construction of such test suites. The test suite was found to be effective in identifying performance errors in an ontology concept recognition system. The system could not recognize 2.1% of all canonical forms and no non-canonical forms at all. Regarding the question of general principles of test suite construction, we compared this test suite to a named entity recognition test suite constructor. We found that they had twenty features in total and that seven were shared between the two models, suggesting that there is a core of feature types that may be applicable to test suite construction for any similar type of application.</abstract>
@@ -145,7 +145,7 @@
     </paper>
     <paper id="15">
       <author><first>Els</first><last>Lefever</last></author>
-      <author><first>Véronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Véronique</first><last>Hoste</last></author>
       <title>Construction of a Benchmark Data Set for Cross-lingual Word Sense Disambiguation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/34_Paper.pdf</url>
       <abstract>Given the recent trend to evaluate the performance of word sense disambiguation systems in a more application-oriented set-up, we report on the construction of a multilingual benchmark data set for cross-lingual word sense disambiguation. The data set was created for a lexical sample of 25 English nouns, for which translations were retrieved in 5 languages, namely Dutch, German, French, Italian and Spanish. The corpus underlying the sense inventory was the parallel data set Europarl. The gold standard sense inventory was based on the automatic word alignments of the parallel corpus, which were manually verified. The resulting word alignments were used to perform a manual clustering of the translations over all languages in the parallel corpus. The inventory then served as input for the annotators of the sentences, who were asked to provide a maximum of three contextually relevant translations per language for a given focus word. The data set was released in the framework of the SemEval-2010 competition.</abstract>
@@ -163,7 +163,7 @@
     </paper>
     <paper id="17">
       <author><first>Claus</first><last>Zinn</last></author>
-      <author><first>Peter</first><last>Wittenburg</last></author>
+      <author id="peter-wittenburg"><first>Peter</first><last>Wittenburg</last></author>
       <author><first>Jacquelijn</first><last>Ringersma</last></author>
       <title>An Evolving e<fixed-case>S</fixed-case>cience Environment for Research Data in Linguistics</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/36_Paper.pdf</url>
@@ -200,7 +200,7 @@
       <bibkey>rentoumi-etal-2010-united</bibkey>
     </paper>
     <paper id="21">
-      <author><first>Núria</first><last>Bel</last></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last></author>
       <title>Handling of Missing Values in Lexical Acquisition</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/45_Paper.pdf</url>
       <abstract>In this work we propose a strategy to reduce the impact of the sparse data problem in the tasks of lexical information acquisition based on the observation of linguistic cues. We propose a way to handle the uncertainty created by missing values, that is, when a zero value could mean either that the cue has not been observed because the word in question does not belong to the class, i.e. negative evidence, or that the word in question has just not been observed in the context sought by chance, i.e. lack of evidence. This uncertainty creates problems to the learner, because zero values for incompatible labelled examples make the cue lose its predictive capacity and even though some samples display the sought context, it is not taken into account. In this paper we present the results of our experiments to try to reduce this uncertainty by, as other authors do (Joanis et al. 2007, for instance), substituting zero values for pre-processed estimates. Here we present a first round of experiments that have been the basis for the estimates of linguistic information motivated by lexical classes. We obtained experimental results that show a clear benefit of the proposed approach.</abstract>
@@ -215,17 +215,17 @@
       <bibkey>carlsson-dalianis-2010-influence</bibkey>
     </paper>
     <paper id="23">
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
       <author><first>Mireia</first><last>Farrús</last></author>
-      <author><first>José B.</first><last>Mariño</last></author>
-      <author><first>José A. R.</first><last>Fonollosa</last></author>
+      <author id="jose-b-marino"><first>José B.</first><last>Mariño</last></author>
+      <author id="jose-a-r-fonollosa"><first>José A. R.</first><last>Fonollosa</last></author>
       <title>Automatic and Human Evaluation Study of a Rule-based and a Statistical <fixed-case>C</fixed-case>atalan-<fixed-case>S</fixed-case>panish Machine Translation Systems</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/47_Paper.pdf</url>
       <abstract>Machine translation systems can be classified into rule-based and corpus-based approaches, in terms of their core technology. Since both paradigms have largely been used during the last years, one of the aims in the research community is to know how these systems differ in terms of translation quality. To this end, this paper reports a study and comparison of a rule-based and a corpus-based (particularly, statistical) Catalan-Spanish machine translation systems, both of them freely available in the web. The translation quality analysis is performed under two different domains: journalistic and medical. The systems are evaluated by using standard automatic measures, as well as by native human evaluators. Automatic results show that the statistical system performs better than the rule-based system. Human judgements show that in the Spanish-to-Catalan direction the statistical system also performs better than the rule-based system, while in the Catalan-to-Spanish direction is the other way round. Although the statistical system obtains the best automatic scores, its errors tend to be more penalized by human judgements than the errors of the rule-based system. This can be explained because statistical errors are usually unexpected and they do not follow any pattern.</abstract>
       <bibkey>costa-jussa-etal-2010-automatic</bibkey>
     </paper>
     <paper id="24">
-      <author><first>Anil Kumar</first><last>Singh</last></author>
+      <author id="anil-kumar-singh"><first>Anil Kumar</first><last>Singh</last></author>
       <author><first>Bharat Ram</first><last>Ambati</last></author>
       <title>An Integrated Digital Tool for Accessing Language Resources</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/48_Paper.pdf</url>
@@ -249,18 +249,18 @@
       <bibkey>benajiba-zitouni-2010-arabic</bibkey>
     </paper>
     <paper id="27">
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <author><first>Kiyong</first><last>Lee</last></author>
-      <author><first>Harry</first><last>Bunt</last></author>
-      <author><first>Laurent</first><last>Romary</last></author>
+      <author id="harry-bunt"><first>Harry</first><last>Bunt</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
       <title><fixed-case>ISO</fixed-case>-<fixed-case>T</fixed-case>ime<fixed-case>ML</fixed-case>: An International Standard for Semantic Annotation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/55_Paper.pdf</url>
       <abstract>In this paper, we present ISO-TimeML, a revised and interoperable version of the temporal markup language, TimeML. We describe the changes and enrichments made, while framing the effort in a more general methodology of semantic annotation. In particular, we assume a principled distinction between the annotation of an expression and the representation which that annotation denotes. This involves not only the specification of an annotation language for a particular phenomenon, but also the development of a meta-model that allows one to interpret the syntactic expressions of the specification semantically.</abstract>
       <bibkey>pustejovsky-etal-2010-iso</bibkey>
     </paper>
     <paper id="28">
-      <author><first>Ranka</first><last>Stanković</last></author>
-      <author><first>Ivan</first><last>Obradović</last></author>
+      <author id="ranka-stankovic"><first>Ranka</first><last>Stanković</last></author>
+      <author id="ivan-obradovic"><first>Ivan</first><last>Obradović</last></author>
       <author><first>Olivera</first><last>Kitanović</last></author>
       <title><fixed-case>GIS</fixed-case> Application Improvement with Multilingual Lexical and Terminological Resources</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/57_Paper.pdf</url>
@@ -268,8 +268,8 @@
       <bibkey>stankovic-etal-2010-gis</bibkey>
     </paper>
     <paper id="29">
-      <author><first>Nathanael</first><last>Chambers</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="nathanael-chambers"><first>Nathanael</first><last>Chambers</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <title>A Database of Narrative Schemas</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/58_Paper.pdf</url>
       <abstract>This paper describes a new language resource of events and semantic roles that characterize real-world situations. Narrative schemas contain sets of related events (edit and publish), a temporal ordering of the events (edit before publish), and the semantic roles of the participants (authors publish books). This type of world knowledge was central to early research in natural language understanding, scripts being one of the main formalisms, they represented common sequences of events that occur in the world. Unfortunately, most of this knowledge was hand-coded and time consuming to create. Current machine learning techniques, as well as a new approach to learning through coreference chains, has allowed us to automatically extract rich event structure from open domain text in the form of narrative schemas. The narrative schema resource described in this paper contains approximately 5000 unique events combined into schemas of varying sizes. We describe the resource, how it is learned, and a new evaluation of the coverage of these schemas over unseen documents.</abstract>
@@ -292,15 +292,15 @@
       <bibkey>proisl-kabashi-2010-using</bibkey>
     </paper>
     <paper id="32">
-      <author><first>Xabier</first><last>Saralegi</last></author>
-      <author><first>Maddalen</first><last>Lopez de Lacalle</last></author>
+      <author id="xabier-saralegi"><first>Xabier</first><last>Saralegi</last></author>
+      <author id="maddalen-lopez-de-lacalle"><first>Maddalen</first><last>Lopez de Lacalle</last></author>
       <title>Dictionary and Monolingual Corpus-based Query Translation for <fixed-case>B</fixed-case>asque-<fixed-case>E</fixed-case>nglish <fixed-case>CLIR</fixed-case></title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/63_Paper.pdf</url>
       <abstract>This paper deals with the main problems that arise in the query translation process in dictionary-based Cross-lingual Information Retrieval (CLIR): translation selection, presence of Out-Of-Vocabulary (OOV) terms and translation of Multi-Word Expressions (MWE). We analyse to what extent each problem affects the retrieval performance for the Basque-English pair of languages, and the improvement obtained when using parallel corpora free methods to address them. To tackle the translation selection problem we provide novel extensions of an already existing monolingual target co-occurrence-based method, the Out-Of Vocabulary terms are dealt with by means of a cognate detection-based method and finally, for the Multi-Word Expression translation problem, a naïve matching technique is applied. The error analysis shows significant differences in the deterioration of the performance depending on the problem, in terms of Mean Average Precision (MAP), the translation selection problem being the cause of most of the errors. Otherwise, the proposed combined strategy shows a good performance to tackle the three above-mentioned main problems.</abstract>
       <bibkey>saralegi-lopez-de-lacalle-2010-dictionary</bibkey>
     </paper>
     <paper id="33">
-      <author><first>Véronika</first><last>Lux-Pogodalla</last></author>
+      <author id="veronika-lux"><first>Véronika</first><last>Lux-Pogodalla</last></author>
       <author><first>Dominique</first><last>Besagni</last></author>
       <author><first>Karën</first><last>Fort</last></author>
       <title><fixed-case>F</fixed-case>ast<fixed-case>K</fixed-case>wic, an “Intelligent“ Concordancer Using <fixed-case>FASTR</fixed-case></title>
@@ -310,7 +310,7 @@
     </paper>
     <paper id="34">
       <author><first>Cvetana</first><last>Krstev</last></author>
-      <author><first>Ranka</first><last>Stanković</last></author>
+      <author id="ranka-stankovic"><first>Ranka</first><last>Stanković</last></author>
       <author><first>Duško</first><last>Vitas</last></author>
       <title>A Description of Morphological Features of <fixed-case>S</fixed-case>erbian: a Revision using Feature System Declaration</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/66_Paper.pdf</url>
@@ -318,7 +318,7 @@
       <bibkey>krstev-etal-2010-description</bibkey>
     </paper>
     <paper id="35">
-      <author><first>Plaban Kr.</first><last>Bhowmick</last></author>
+      <author id="plaban-kr-bhowmick"><first>Plaban Kr.</first><last>Bhowmick</last></author>
       <author><first>Anupam</first><last>Basu</last></author>
       <author><first>Pabitra</first><last>Mitra</last></author>
       <title>Determining Reliability of Subjective and Multi-label Emotion Annotation through Novel Fuzzy Agreement Measure</title>
@@ -328,7 +328,7 @@
     </paper>
     <paper id="36">
       <author><first>Xavier</first><last>Tannier</last></author>
-      <author><first>Véronique</first><last>Moriceau</last></author>
+      <author id="veronique-moriceau"><first>Véronique</first><last>Moriceau</last></author>
       <title><fixed-case>FIDJI</fixed-case>: Web Question-Answering at Quaero 2009</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/68_Paper.pdf</url>
       <abstract>This paper presents the participation of FIDJI system to the Web Question-Answering evaluation campaign organized by Quaero in 2009. FIDJI is an open-domain question-answering system which combines syntactic information with traditional QA techniques such as named entity recognition and term weighting in order to validate answers through multiple documents. It was originally designed to process ``clean'' document collections. Overall results are significantly lower than in traditional campaigns but results (for French evaluation) are quite good compared to other state-of-the-art systems. They show that a syntax-based strategy, applied on uncleaned Web data, can still obtain good results. Moreover, we obtain much higher scores on ``complex'' questions, i.e. `how' and `why' questions, which are more representative of real user needs. These results show that questioning the Web with advanced linguistic techniques can be done without heavy pre-processing and with results that come near to best systems that use strong resources and large structured indexes.</abstract>
@@ -358,9 +358,9 @@
       <bibkey>sornlertlamvanich-etal-2010-language</bibkey>
     </paper>
     <paper id="40">
-      <author><first>Jinho D.</first><last>Choi</last></author>
-      <author><first>Claire</first><last>Bonial</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
+      <author id="claire-bonial"><first>Claire</first><last>Bonial</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <title><fixed-case>P</fixed-case>ropbank Frameset Annotation Guidelines Using a Dedicated Editor, Cornerstone</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/73_Paper.pdf</url>
       <abstract>This paper gives guidelines of how to create and update Propbank frameset files using a dedicated editor, Cornerstone. Propbank is a corpus in which the arguments of each verb predicate are annotated with their semantic roles in relation to the predicate. Propbank annotation also requires the choice of a sense ID for each predicate. Thus, for each predicate in Propbank, there exists a corresponding frameset file showing the expected predicate argument structure of each sense related to the predicate. Since most Propbank annotations are based on the predicate argument structure defined in the frameset files, it is important to keep the files consistent, simple to read as well as easy to update. The frameset files are written in XML, which can be difficult to edit when using a simple text editor. Therefore, it is helpful to develop a user-friendly editor such as Cornerstone, specifically customized to create and edit frameset files. Cornerstone runs platform independently, is light enough to run as an X11 application and supports multiple languages such as Arabic, Chinese, English, Hindi and Korean.</abstract>
@@ -410,7 +410,7 @@
     </paper>
     <paper id="46">
       <author><first>Olivier</first><last>Galibert</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <author><first>Xavier</first><last>Tannier</last></author>
       <author><first>Fanny</first><last>Grandry</last></author>
       <title>Hybrid Citation Extraction from Patents</title>
@@ -419,8 +419,8 @@
       <bibkey>galibert-etal-2010-hybrid</bibkey>
     </paper>
     <paper id="47">
-      <author><first>Luca</first><last>Dini</last></author>
-      <author><first>Giampaolo</first><last>Mazzini</last></author>
+      <author id="luca-dini"><first>Luca</first><last>Dini</last></author>
+      <author id="giampaolo-mazzini"><first>Giampaolo</first><last>Mazzini</last></author>
       <title>The Impact of Grammar Enhancement on Semantic Resources Induction</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/82_Paper.pdf</url>
       <abstract>In this paper describes the effects of the evolution of an Italian dependency grammar on a task of multilingual FrameNet acquisition. The task is based on the creation of virtual English/Italian parallel annotation corpora, which are then aligned at dependency level by using two manually encoded grammar based dependency parsers. We show how the evolution of the LAS (Labeled Attachment Score) metric for the considered grammar has a direct impact on the quality of the induced FrameNet, thus proving that the evolution of the quality of syntactic resources is mirrored by an analogous evolution in semantic ones. In particular we show that an improvement of 30% in LAS causes an improvement of precision for the induced resource ranging from 5% to 10%, depending on the type of evaluation.</abstract>
@@ -429,7 +429,7 @@
     <paper id="48">
       <author><first>Yiou</first><last>Wang</last></author>
       <author><first>Kiyotaka</first><last>Uchimoto</last></author>
-      <author><first>Jun’ichi</first><last>Kazama</last></author>
+      <author id="junichi-kazama"><first>Jun’ichi</first><last>Kazama</last></author>
       <author><first>Canasai</first><last>Kruengkrai</last></author>
       <author><first>Kentaro</first><last>Torisawa</last></author>
       <title>Adapting <fixed-case>C</fixed-case>hinese Word Segmentation for Machine Translation Based on Short Units</title>
@@ -450,11 +450,11 @@
     </paper>
     <paper id="50">
       <author><first>Samira</first><last>Shaikh</last></author>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
-      <author><first>Aaron</first><last>Broadwell</last></author>
-      <author><first>Jennifer</first><last>Stromer-Galley</last></author>
-      <author><first>Sarah</first><last>Taylor</last></author>
-      <author><first>Nick</first><last>Webb</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="george-aaron-broadwell"><first>Aaron</first><last>Broadwell</last></author>
+      <author id="jennifer-stromer-galley"><first>Jennifer</first><last>Stromer-Galley</last></author>
+      <author id="sarah-taylor"><first>Sarah</first><last>Taylor</last></author>
+      <author id="nick-webb"><first>Nick</first><last>Webb</last></author>
       <title><fixed-case>MPC</fixed-case>: A Multi-Party Chat Corpus for Modeling Social Phenomena in Discourse</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/85_Paper.pdf</url>
       <abstract>In this paper, we describe our experience with collecting and creating an annotated corpus of multi-party online conversations in a chat-room environment. This effort is part of a larger project to develop computational models of social phenomena such as agenda control, influence, and leadership in on-line interactions. Such models will help capturing the dialogue dynamics that are essential for developing, among others, realistic human-machine dialogue systems, including autonomous virtual chat agents. In this paper we describe data collection method used and the characteristics of the initial dataset of English chat. We have devised a multi-tiered collection process in which the subjects start from simple, free-flowing conversations and progress towards more complex and structured interactions. In this paper, we report on the first two stages of this process, which were recently completed. The third, large-scale collection effort is currently being conducted. All English dialogue has been annotated at four levels: communication links, dialogue acts, local topics and meso-topics. Some details of these annotations will be discussed later in this paper, although a full description is impossible within the scope of this article.</abstract>
@@ -469,7 +469,7 @@
     </paper>
     <paper id="52">
       <author><first>Alberto</first><last>Simões</last></author>
-      <author><first>José João</first><last>Almeida</last></author>
+      <author id="jose-joao-almeida"><first>José João</first><last>Almeida</last></author>
       <author><first>Rita</first><last>Farinha</last></author>
       <title>Processing and Extracting Data from Dicionário Aberto</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/90_Paper.pdf</url>
@@ -484,8 +484,8 @@
       <bibkey>waltinger-2010-germanpolarityclues</bibkey>
     </paper>
     <paper id="54">
-      <author><first>Antonio</first><last>Pareja-Lora</last></author>
-      <author><first>Guadalupe Aguado</first><last>de Cea</last></author>
+      <author id="antonio-pareja-lora"><first>Antonio</first><last>Pareja-Lora</last></author>
+      <author id="guadalupe-aguado-de-cea"><first>Guadalupe Aguado</first><last>de Cea</last></author>
       <title>Ontology-based Interoperation of Linguistic Tools for an Improved Lemma Annotation in <fixed-case>S</fixed-case>panish</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/92_Paper.pdf</url>
       <abstract>In this paper, we present an ontology-based methodology and architecture for the comparison, assessment, combination (and, to some extent, also contrastive evaluation) of the results of different linguistic tools. More specifically, we describe an experiment aiming at the improvement of the correctness of lemma tagging for Spanish. This improvement was achieved by means of the standardisation and combination of the results of three different linguistic annotation tools (Bitexts DataLexica, Connexors FDG Parser and LACELLs POS tagger), using (1) ontologies, (2) a set of lemma tagging correction rules, determined empirically during the experiment, and (3) W3C standard languages, such as XML, RDF(S) and OWL. As we show in the results of the experiment, the interoperation of these tools by means of ontologies and the correction rules applied in the experiment improved significantly the quality of the resulting lemma tagging (when compared to the separate lemma tagging performed by each of the tools that we made interoperate).</abstract>
@@ -508,8 +508,8 @@
       <bibkey>campbell-tabata-2010-software</bibkey>
     </paper>
     <paper id="57">
-      <author><first>Ana Cristina</first><last>Mendes</last></author>
-      <author><first>Luísa</first><last>Coheur</last></author>
+      <author id="ana-cristina-mendes"><first>Ana Cristina</first><last>Mendes</last></author>
+      <author id="luisa-coheur"><first>Luísa</first><last>Coheur</last></author>
       <author><first>Paula Vaz</first><last>Lobo</last></author>
       <title>Named Entity Recognition in Questions: Towards a Golden Collection</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/97_Paper.pdf</url>
@@ -519,8 +519,8 @@
     <paper id="58">
       <author><first>Patrizia</first><last>Paggio</last></author>
       <author><first>Jens</first><last>Allwood</last></author>
-      <author><first>Elisabeth</first><last>Ahlsén</last></author>
-      <author><first>Kristiina</first><last>Jokinen</last></author>
+      <author id="elisabeth-ahlsen"><first>Elisabeth</first><last>Ahlsén</last></author>
+      <author id="kristiina-jokinen"><first>Kristiina</first><last>Jokinen</last></author>
       <author><first>Costanza</first><last>Navarretta</last></author>
       <title>The <fixed-case>NOMCO</fixed-case> Multimodal <fixed-case>N</fixed-case>ordic Resource - Goals and Characteristics</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/98_Paper.pdf</url>
@@ -528,14 +528,14 @@
       <bibkey>paggio-etal-2010-nomco</bibkey>
     </paper>
     <paper id="59">
-      <author><first>Kikuo</first><last>Maekawa</last></author>
+      <author id="kikuo-maekawa"><first>Kikuo</first><last>Maekawa</last></author>
       <author><first>Makoto</first><last>Yamazaki</last></author>
       <author><first>Takehiko</first><last>Maruyama</last></author>
       <author><first>Masaya</first><last>Yamaguchi</last></author>
       <author><first>Hideki</first><last>Ogura</last></author>
       <author><first>Wakako</first><last>Kashino</last></author>
       <author><first>Toshinobu</first><last>Ogiso</last></author>
-      <author><first>Hanae</first><last>Koiso</last></author>
+      <author id="hanae-koiso"><first>Hanae</first><last>Koiso</last></author>
       <author><first>Yasuharu</first><last>Den</last></author>
       <title>Design, Compilation, and Preliminary Analyses of <fixed-case>B</fixed-case>alanced <fixed-case>C</fixed-case>orpus of <fixed-case>C</fixed-case>ontemporary <fixed-case>W</fixed-case>ritten <fixed-case>J</fixed-case>apanese</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/99_Paper.pdf</url>
@@ -561,7 +561,7 @@
     </paper>
     <paper id="62">
       <author><first>Ahmet</first><last>Aker</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <title>Model Summaries for Location-related Images</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/102_Paper.pdf</url>
       <abstract>At present there is no publicly available data set to evaluate the performance of different summarization systems on the task of generating location-related extended image captions. In this paper we describe a corpus of human generated model captions in English and German. We have collected 932 model summaries in English from existing image descriptions and machine translated these summaries into German. We also performed post-editing on the translated German summaries to ensure high quality. Both English and German summaries are evaluated using a readability assessment as in DUC and TAC to assess their quality. Our model summaries performed similar to the ones reported in Dang (2005) and thus are suitable for evaluating automatic summarization systems on the task of generating image descriptions for location related images. In addition, we also investigated whether post-editing of machine-translated model summaries is necessary for automated ROUGE evaluations. We found a high correlation in ROUGE scores between post-edited and non-post-edited model summaries which indicates that the expensive process of post-editing is not necessary.</abstract>
@@ -587,7 +587,7 @@
       <bibkey>jacquemin-2010-derivational</bibkey>
     </paper>
     <paper id="65">
-      <author><first>Sherri</first><last>Condon</last></author>
+      <author id="sherri-condon"><first>Sherri</first><last>Condon</last></author>
       <author><first>Dan</first><last>Parvaz</last></author>
       <author><first>John</first><last>Aberdeen</last></author>
       <author><first>Christy</first><last>Doran</last></author>
@@ -600,7 +600,7 @@
     </paper>
     <paper id="66">
       <author><first>Mahdi</first><last>Mohseni</last></author>
-      <author><first>Behrouz</first><last>Minaei-bidgoli</last></author>
+      <author id="behrouz-minaei-bidgoli"><first>Behrouz</first><last>Minaei-bidgoli</last></author>
       <title>A <fixed-case>P</fixed-case>ersian Part-Of-Speech Tagger Based on Morphological Analysis</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/107_Paper.pdf</url>
       <abstract>This paper describes a method based on morphological analysis of words for a Persian Part-Of-Speech (POS) tagging system. This is a main part of a process for expanding a large Persian corpus called Peyekare (or Textual Corpus of Persian Language). Peykare is arranged into two parts: annotated and unannotated parts. We use the annotated part in order to create an automatic morphological analyzer, a main segment of the system. Morphosyntactic features of Persian words cause two problems: the number of tags is increased in the corpus (586 tags) and the form of the words is changed. This high number of tags debilitates any taggers to work efficiently. From other side the change of word forms reduces the frequency of words with the same lemma; and the number of words belonging to a specific tag reduces as well. This problem also has a bad effect on statistical taggers. The morphological analyzer by removing the problems helps the tagger to cover a large number of tags in the corpus. Using a Markov tagger the method is evaluated on the corpus. The experiments show the efficiency of the method in Persian POS tagging.</abstract>
@@ -608,7 +608,7 @@
     </paper>
     <paper id="67">
       <author><first>Olga</first><last>Babko-Malaya</last></author>
-      <author><first>Dan</first><last>Hunter</last></author>
+      <author id="dan-hunter"><first>Dan</first><last>Hunter</last></author>
       <author><first>Connie</first><last>Fournelle</last></author>
       <author><first>Jim</first><last>White</last></author>
       <title>Evaluation of Document Citations in Phase 2 Gale Distillation</title>
@@ -637,14 +637,14 @@
     </paper>
     <paper id="70">
       <author><first>Silvia</first><last>Pareti</last></author>
-      <author><first>Irina</first><last>Prodanof</last></author>
+      <author id="irina-prodanof"><first>Irina</first><last>Prodanof</last></author>
       <title>Annotating Attribution Relations: Towards an <fixed-case>I</fixed-case>talian Discourse Treebank</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/111_Paper.pdf</url>
       <abstract>In this paper we describe the development of a schema for the annotation of attribution relations and present the first findings and some relevant issues concerning this phenomenon. Following the D-LTAG approach to discourse, we have developed a lexically anchored description of attribution, considering this relation, contrary to the approach in the PDTB, independently from other discourse relations. This approach has allowed us to deal with the phenomenon in a broader perspective than previous studies, reaching therefore a more accurate description of it and making it possible to raise some still unaddressed issues. Following this analysis, we propose an annotation schema and discuss the first results concerning its applicability. The schema has been applied to a pilot portion of the ISST corpus of Italian and represents the initial phase of a project aiming at the creation of an Italian Discourse Treebank. We believe this work will raise some awareness concerning the fundamental importance of attribution relations. The identification of the source has in fact strong implications for the attributed material. Moreover, it will make overt the complexity of a phenomenon for long underestimated.</abstract>
       <bibkey>pareti-prodanof-2010-annotating</bibkey>
     </paper>
     <paper id="71">
-      <author><first>Nick</first><last>Webb</last></author>
+      <author id="nick-webb"><first>Nick</first><last>Webb</last></author>
       <author><first>David</first><last>Benyon</last></author>
       <author><first>Preben</first><last>Hansen</last></author>
       <author><first>Oil</first><last>Mival</last></author>
@@ -654,7 +654,7 @@
       <bibkey>webb-etal-2010-evaluating</bibkey>
     </paper>
     <paper id="72">
-      <author><first>Beáta</first><last>Megyesi</last></author>
+      <author id="beata-megyesi"><first>Beáta</first><last>Megyesi</last></author>
       <author><first>Bengt</first><last>Dahlqvist</last></author>
       <author><first>Éva Á.</first><last>Csató</last></author>
       <author><first>Joakim</first><last>Nivre</last></author>
@@ -683,7 +683,7 @@
     </paper>
     <paper id="75">
       <author><first>Oscar</first><last>Saz</last></author>
-      <author><first>Eduardo</first><last>Lleida</last></author>
+      <author id="eduardo-lleida"><first>Eduardo</first><last>Lleida</last></author>
       <author><first>Carlos</first><last>Vaquero</last></author>
       <author><first>W.-Ricardo</first><last>Rodríguez</last></author>
       <title>The Alborada-<fixed-case>I</fixed-case>3<fixed-case>A</fixed-case> Corpus of Disordered Speech</title>
@@ -693,7 +693,7 @@
     </paper>
     <paper id="76">
       <author><first>Sophia</first><last>Ananiadou</last></author>
-      <author><first>John</first><last>McNaught</last></author>
+      <author id="john-mcnaught"><first>John</first><last>McNaught</last></author>
       <author><first>James</first><last>Thomas</last></author>
       <author><first>Mark</first><last>Rickinson</last></author>
       <author><first>Sandy</first><last>Oliver</last></author>
@@ -769,9 +769,9 @@
       <bibkey>nakano-etal-2010-construction</bibkey>
     </paper>
     <paper id="84">
-      <author><first>João</first><last>Silva</last></author>
-      <author><first>António</first><last>Branco</last></author>
-      <author><first>Patricia</first><last>Gonçalves</last></author>
+      <author id="joao-silva"><first>João</first><last>Silva</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
+      <author id="patricia-goncalves"><first>Patricia</first><last>Gonçalves</last></author>
       <title>Top-Performing Robust Constituency Parsing of <fixed-case>P</fixed-case>ortuguese: Freely Available in as Many Ways as you Can Get it</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/136_Paper.pdf</url>
       <abstract>In this paper we present LX-Parser, a probabilistic, robust constituency parser for Portuguese. This parser achieves ca. 88% f-score in the labeled bracketing task, thus reaching a state-of-the-art performance score that is in line with those that are currently obtained by top-ranking parsers for English, the most studied natural language. To the best of our knowledge, LX-Parser is the first state-of-the-art, robust constituency parser for Portuguese that is made freely available. This parser is being distributed in a variety of ways, each suited for a different type of usage. More specifically, LX-Parser is being made available (i) as a downloadable, stand-alone parsing tool that can be run locally by its users; (ii) as a Web service that exposes an interface that can be invoked remotely and transparently by client applications; and finally (iii) as an on-line parsing service, aimed at human users, that can be accessed through any common Web browser.</abstract>
@@ -781,21 +781,21 @@
       <author><first>Sylviane</first><last>Cardey</last></author>
       <author><first>Krzysztof</first><last>Bogacki</last></author>
       <author><first>Xavier</first><last>Blanco</last></author>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <title>Resources for Controlled Languages for Alert Messages and Protocols in the <fixed-case>E</fixed-case>uropean Perspective</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/137_Paper.pdf</url>
       <abstract>This paper is concerned with resources for controlled languages for alert messages and protocols in the European perspective. These resources have been produced as the outcome of a project (Alert Messages and Protocols: MESSAGE) which has been funded with the support of the European Commission - Directorate-General Justice, Freedom and Security, and with the specific objective of 'promoting and supporting the development of security standards, and an exchange of know-how and experience on protection of people'. The MESSAGE project involved the development and transfer of a methodology for writing safe and safely translatable alert messages and protocols created by Centre Tesnière in collaboration with the aircraft industry, the health profession, and emergency services by means of a consortium of four partners to their four European member states in their languages (ES, FR (Coordinator), GB, PL). The paper describes alert messages and protocols, controlled languages for safety and security, the target groups involved, controlled language evaluation, dissemination, the resources that are available, both Freely available and From Owner, together with illustrations of the resources, and the potential transferability to other sectors and users.</abstract>
       <bibkey>cardey-etal-2010-resources</bibkey>
     </paper>
     <paper id="86">
-      <author><first>Tomaž</first><last>Erjavec</last></author>
+      <author id="tomaz-erjavec"><first>Tomaž</first><last>Erjavec</last></author>
       <title><fixed-case>MULTEXT</fixed-case>-East Version 4: Multilingual Morphosyntactic Specifications, Lexicons and Corpora</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/138_Paper.pdf</url>
       <abstract>The paper presents the fourth, ``Mondilex'' edition of the MULTEXT-East language resources, a multilingual dataset for language engineering research and development, focused on the morphosyntactic level of linguistic description. This standardised and linked set of resources covers a large number of mainly Central and Eastern European languages and includes the EAGLES-based morphosyntactic specifications; morphosyntactic lexica; and annotated parallel, comparable, and speech corpora. The fourth release of these resources introduces XML-encoded morphosyntactic specifications and adds six new languages, bringing the total to 16: to Bulgarian, Croatian, Czech, Estonian, English, Hungarian, Romanian, Serbian, Slovene, and the Resian dialect of Slovene it adds Macedonian, Persian, Polish, Russian, Slovak, and Ukrainian. This dataset, unique in terms of languages covered and the wealth of encoding, is extensively documented, and freely available for research purposes at <url>http://nl.ijs.si/ME/V4/</url>.</abstract>
       <bibkey>erjavec-2010-multext</bibkey>
     </paper>
     <paper id="87">
-      <author><first>Tomaž</first><last>Erjavec</last></author>
+      <author id="tomaz-erjavec"><first>Tomaž</first><last>Erjavec</last></author>
       <author><first>Darja</first><last>Fišer</last></author>
       <author><first>Simon</first><last>Krek</last></author>
       <author><first>Nina</first><last>Ledinek</last></author>
@@ -816,15 +816,15 @@
     <paper id="89">
       <author><first>Darja</first><last>Fišer</last></author>
       <author><first>Senja</first><last>Pollak</last></author>
-      <author><first>Špela</first><last>Vintar</last></author>
+      <author id="spela-vintar"><first>Špela</first><last>Vintar</last></author>
       <title>Learning to Mine Definitions from <fixed-case>S</fixed-case>lovene Structured and Unstructured Knowledge-Rich Resources</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/141_Paper.pdf</url>
       <abstract>The paper presents an innovative approach to extract Slovene definition candidates from domain-specific corpora using morphosyntactic patterns, automatic terminology recognition and semantic tagging with wordnet senses. First, a classification model was trained on examples from Slovene Wikipedia which was then used to find well-formed definitions among the extracted candidates. The results of the experiment are encouraging, with accuracy ranging from 67% to 71%. The paper also addresses some drawbacks of the approach and suggests ways to overcome them in future work.</abstract>
       <bibkey>fiser-etal-2010-learning</bibkey>
     </paper>
     <paper id="90">
-      <author><first>Dan</first><last>Tufiş</last></author>
-      <author><first>Dan</first><last>Ştefănescu</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufiş</last></author>
+      <author id="dan-stefanescu"><first>Dan</first><last>Ştefănescu</last></author>
       <title>A Differential Semantics Approach to the Annotation of Synsets in <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/142_Paper.pdf</url>
       <abstract>We describe a new method for sentiment load annotation of the synsets of a wordnet, along the principles of Osgoods Semantic Differential theory and extending the Kamp and Marx calculus, by taking into account not only the WordNet structure but also the SUMO/MILO (Niles &amp; Pease, 2001) and DOMAINS (Bentivogli et al., 2004) knowledge sources. We discuss the method to annotate all the synsets in PWN2.0, irrespective of their part of speech. As the number of possible factors (semantic oppositions, along which the synsets are ranked) is very large, we developed also an application allowing the text analyst to select the most discriminating factors for the type of text to be analyzed. Once the factors have been selected, the underlying wordnet is marked-up on the fly and it can be used for the intended textual analysis. We anticipate that these annotations can be imported in other language wordnets, provided they are aligned to PWN2.0. The method for the synsets annotation generalizes the usual subjectivity mark-up (positive, negative and objective) according to a user-based multi-criteria differential semantics model.</abstract>
@@ -838,7 +838,7 @@
       <bibkey>grishina-2010-multimodal</bibkey>
     </paper>
     <paper id="92">
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <title>Lingua-Align: An Experimental Toolbox for Automatic Tree-to-Tree Alignment</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/144_Paper.pdf</url>
       <abstract>In this paper we present an experimental toolbox for automatic tree-to-tree alignment based on a binary classification model. The aligner implements a recurrent architecture for structural prediction using history features and a sequential classification procedure. The discriminative base classifier uses a log-linear model in the current setup which enables simple integration of various features extracted from the data. The Lingua-Align toolbox provides a flexible framework for feature extraction including contextual properties and implements several alignment inference procedures. Various settings and constraints can be controlled via a simple frontend or called from external scripts. Lingua-Align supports different treebank formats and includes additional tools for conversion and evaluation. In our experiments we can show that our tree aligner produces results with high quality and outperforms unsupervised techniques proposed otherwise. It also integrates well with another existing tool for manual tree alignment which makes it possible to quickly integrate additional training material and to run semi-automatic alignment strategies.</abstract>
@@ -869,12 +869,12 @@
       <bibkey>wawer-2010-sentiment</bibkey>
     </paper>
     <paper id="96">
-      <author><first>Iñaki</first><last>Alegria</last></author>
+      <author id="inaki-alegria"><first>Iñaki</first><last>Alegria</last></author>
       <author><first>Garbiñe</first><last>Aranbarri</last></author>
       <author><first>Klara</first><last>Ceberio</last></author>
-      <author><first>Gorka</first><last>Labaka</last></author>
+      <author id="gorka-labaka"><first>Gorka</first><last>Labaka</last></author>
       <author><first>Bittor</first><last>Laskurain</last></author>
-      <author><first>Ruben</first><last>Urizar</last></author>
+      <author id="ruben-urizar"><first>Ruben</first><last>Urizar</last></author>
       <title>A Morphological Processor Based on <fixed-case>F</fixed-case>oma for <fixed-case>B</fixed-case>iscayan (a <fixed-case>B</fixed-case>asque dialect)</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/150_Paper.pdf</url>
       <abstract>We present a new morphological processor for Biscayan, a dialect of Basque, developed on the description of the morphology of standard Basque. The database for the standard morphology has been extended for dialects and an open-source tool for morphological description named foma is used for building the processor. Biscayan is a dialect of the Basque language spoken mainly in Biscay, a province on the western of the Basque Country. The description of the lexicon and the morphotactics (or word grammar) for the standard Basque was carried out using a relational database and the database has been extended in order to include dialectal variants linked to the standard entries. XuxenB, a spelling checker/corrector for this dialect, is the first application of this work. Additionally to the basic analyzer used for spelling, a new transducer is included. It is an enhanced analyzer for linking standard form with the corresponding standard ones. It is used in correction for generation of proposals when in the input text appear standard forms which we want to replace with dialectal forms.</abstract>
@@ -891,14 +891,14 @@
       <bibkey>przepiorkowski-etal-2010-recent</bibkey>
     </paper>
     <paper id="98">
-      <author><first>António</first><last>Branco</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
       <author><first>Francisco</first><last>Costa</last></author>
-      <author><first>João</first><last>Silva</last></author>
+      <author id="joao-silva"><first>João</first><last>Silva</last></author>
       <author><first>Sara</first><last>Silveira</last></author>
       <author><first>Sérgio</first><last>Castro</last></author>
       <author><first>Mariana</first><last>Avelãs</last></author>
       <author><first>Clara</first><last>Pinto</last></author>
-      <author><first>João</first><last>Graça</last></author>
+      <author id="joao-graca"><first>João</first><last>Graça</last></author>
       <title>Developing a Deep Linguistic Databank Supporting a Collection of Treebanks: the <fixed-case>CINTIL</fixed-case> <fixed-case>D</fixed-case>eep<fixed-case>G</fixed-case>ram<fixed-case>B</fixed-case>ank</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/154_Paper.pdf</url>
       <abstract>Corpora of sentences annotated with grammatical information have been deployed by extending the basic lexical and morphological data with increasingly complex information, such as phrase constituency, syntactic functions, semantic roles, etc. As these corpora grow in size and the linguistic information to be encoded reaches higher levels of sophistication, the utilization of annotation tools and, above all, supporting computational grammars appear no longer as a matter of convenience but of necessity. In this paper, we report on the design features, the development conditions and the methodological options of a deep linguistic databank, the CINTIL DeepGramBank. In this corpus, sentences are annotated with fully fledged linguistically informed grammatical representations that are produced by a deep linguistic processing grammar, thus consistently integrating morphological, syntactic and semantic information. We also report on how such corpus permits to straightforwardly obtain a whole range of past generation annotated corpora (POS, NER and morphology), current generation treebanks (constituency treebanks, dependency banks, propbanks) and next generation databanks (logical form banks) simply by means of a very residual selection/extraction effort to get the appropriate ""views"" exposing the relevant layers of information.</abstract>
@@ -914,8 +914,8 @@
       <bibkey>borin-etal-2010-diabase</bibkey>
     </paper>
     <paper id="100">
-      <author><first>Anne</first><last>Abeillé</last></author>
-      <author><first>Danièle</first><last>Godard</last></author>
+      <author id="anne-abeille"><first>Anne</first><last>Abeillé</last></author>
+      <author id="daniele-godard"><first>Danièle</first><last>Godard</last></author>
       <title>The Grande Grammaire du Français Project</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/157_Paper.pdf</url>
       <abstract>We present a new reference Grammar of French (La Grande Grammaire du français), which is a collective project (gathering around fifty contributors), producing a book (about 2200 pages, to be published en 2011) and associated databases. Like the recent reference grammars of the other Romance Languages, it takes into account the important results of the linguistic research of the past thrity years, while aiming at a non specialist audience and avoiding formalization. We differ from existing French grammar by being focused on contemporary French from a purely descriptive point of view, and by taking spoken data into account. We include a description of all the syntactic phenomena, as well as lexical, semantic, pragmatic and prosodic insights, specially as they interact with syntax. The analysis concerns the data from contemporary written French, but also includes data from spoken corpora and regional or non standard French (when accessible). Throughout the grammar, a simple phrase structure grammar is used, in order to maintain a common representation. The analyses are modular with a strict division of labor between morphology, syntax and semantics. From the syntactic point of view, POS are also distinguished from grammatical relations (or functions). The databases include a terminological glossary, different lexical databases for certain POS, certain valence frames and certain semantic classes, and a bibliographical database.</abstract>
@@ -941,8 +941,8 @@
     </paper>
     <paper id="103">
       <author><first>Marta</first><last>Recasens</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
-      <author><first>M. Antònia</first><last>Martí</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
+      <author id="m-antonia-marti"><first>M. Antònia</first><last>Martí</last></author>
       <title>A Typology of Near-Identity Relations for Coreference (<fixed-case>NIDENT</fixed-case>)</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/160_Paper.pdf</url>
       <abstract>The task of coreference resolution requires people or systems to decide when two referring expressions refer to the 'same' entity or event. In real text, this is often a difficult decision because identity is never adequately defined, leading to contradictory treatment of cases in previous work. This paper introduces the concept of 'near-identity', a middle ground category between identity and non-identity, to handle such cases systematically. We present a typology of Near-Identity Relations (NIDENT) that includes fifteen types―grouped under four main families―that capture a wide range of ways in which (near-)coreference relations hold between discourse entities. We validate the theoretical model by annotating a small sample of real data and showing that inter-annotator agreement is high enough for stability (K=0.58, and up to K=0.65 and K=0.84 when leaving out one and two outliers, respectively). This work enables subsequent creation of the first internally consistent language resource of this type through larger annotation efforts.</abstract>
@@ -950,7 +950,7 @@
     </paper>
     <paper id="104">
       <author><first>Ineke</first><last>Schuurman</last></author>
-      <author><first>Véronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Véronique</first><last>Hoste</last></author>
       <author><first>Paola</first><last>Monachesi</last></author>
       <title>Interacting Semantic Layers of Annotation in <fixed-case>S</fixed-case>o<fixed-case>N</fixed-case>a<fixed-case>R</fixed-case>, a Reference Corpus of Contemporary Written <fixed-case>D</fixed-case>utch</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/162_Paper.pdf</url>
@@ -958,12 +958,12 @@
       <bibkey>schuurman-etal-2010-interacting</bibkey>
     </paper>
     <paper id="105">
-      <author><first>Daan</first><last>Broeder</last></author>
+      <author id="daan-broeder"><first>Daan</first><last>Broeder</last></author>
       <author><first>Marc</first><last>Kemps-Snijders</last></author>
-      <author><first>Dieter</first><last>Van Uytvanck</last></author>
+      <author id="dieter-van-uytvanck"><first>Dieter</first><last>Van Uytvanck</last></author>
       <author><first>Menzo</first><last>Windhouwer</last></author>
       <author><first>Peter</first><last>Withers</last></author>
-      <author><first>Peter</first><last>Wittenburg</last></author>
+      <author id="peter-wittenburg"><first>Peter</first><last>Wittenburg</last></author>
       <author><first>Claus</first><last>Zinn</last></author>
       <title>A Data Category Registry- and Component-based Metadata Framework</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/163_Paper.pdf</url>
@@ -995,7 +995,7 @@
     </paper>
     <paper id="109">
       <author><first>Naushad</first><last>UzZaman</last></author>
-      <author><first>James</first><last>Allen</last></author>
+      <author id="james-allen"><first>James</first><last>Allen</last></author>
       <title><fixed-case>TRIOS</fixed-case>-<fixed-case>T</fixed-case>ime<fixed-case>B</fixed-case>ank Corpus: Extended <fixed-case>T</fixed-case>ime<fixed-case>B</fixed-case>ank Corpus with Help of Deep Understanding of Text</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/169_Paper.pdf</url>
       <abstract>TimeBank (Pustejovsky et al, 2003a), a reference for TimeML (Pustejovsky et al, 2003b) compliant annotation, is widely used temporally annotated corpus in the community. It captures time expressions, events, and relations between events and event and temporal expression; but there is room for improvements in this hand-annotated widely used TimeBank corpus. This work is one such effort to extend the TimeBank corpus. Our first goal is to suggest missing TimeBank events and temporal expressions, i.e. events and temporal expressions that were missed by TimeBank annotators. Along with that this paper also suggests some additions to TimeML language by adding new event features (ontology type), some more SLINKs and also relations between events with their arguments, which we call RLINK (relation link). With our new suggestions we present the TRIOS-TimeBank corpus, an extended TimeBank corpus. We conclude by suggesting our future work to clean the TimeBank corpus even more and automatically generating larger temporally annotated corpus for the community.</abstract>
@@ -1003,7 +1003,7 @@
     </paper>
     <paper id="110">
       <author><first>Adam</first><last>Funk</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
       <title>Ontology-Based Categorization of Web Services with Machine Learning</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/170_Paper.pdf</url>
       <abstract>We present the problem of categorizing web services according to a shallow ontology for presentation on a specialist portal, using their WSDL and associated textual documents found by a crawler. We treat this as a text classification problem and apply first information extraction (IE) techniques (voting using keywords weight according to their context), then machine learning (ML), and finally a combined approach in which ML has priority over weighted keywords, but the latter can still make up categorizations for services for which ML does not produce enough. We evaluate the techniques (using data manually annotated through the portal, which we also use as the training data for ML) according to standard IE measures for flat categorization as well as the Balanced Distance Metric (more suitable for ontological classification) and compare them with related work in web service categorization. The ML and combined categorization results are good and the system is designed to take users' contributions through the portal's Web 2.0 features as additional training data.</abstract>
@@ -1027,7 +1027,7 @@
       <bibkey>nir-etal-2010-morphologically</bibkey>
     </paper>
     <paper id="113">
-      <author><first>Kristiina</first><last>Jokinen</last></author>
+      <author id="kristiina-jokinen"><first>Kristiina</first><last>Jokinen</last></author>
       <title>Non-verbal Signals for Turn-taking and Feedback</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/173_Paper.pdf</url>
       <abstract>This paper concerns non-verbal communication, and describes especially the use of eye-gaze to signal turn-taking and feedback in conversational settings. Eye-gaze supports smooth interaction by providing signals that the interlocutors interpret with respect to such conversational functions as taking turns and giving feedback. New possibilities to study the effect of eye-gaze on the interlocutors communicative behaviour have appeared with the eye-tracking technology which in the past years has matured to the level where its use to study naturally occurring dialogues have become easier and more reliable to conduct. It enables the tracking of eye-fixations and gaze-paths, and thus allows analysis of the persons turn-taking and feedback behaviour through the analysis of their focus of attention. In this paper, experiments on the interlocutors non-verbal communication in conversational settings using the eye-tracker are reported, and results of classifying turn-taking using eye-gaze and gesture information are presented. Also the hybrid method that combines signal level analysis with human interpretation is discussed.</abstract>
@@ -1035,10 +1035,10 @@
     </paper>
     <paper id="114">
       <author><first>Alejandro</first><last>Abejón</last></author>
-      <author><first>Doroteo T.</first><last>Toledano</last></author>
+      <author id="doroteo-t-toledano"><first>Doroteo T.</first><last>Toledano</last></author>
       <author><first>Danilo</first><last>Spada</last></author>
       <author><first>González</first><last>Victor</last></author>
-      <author><first>Daniel Hernández</first><last>López</last></author>
+      <author id="daniel-hernandez-lopez"><first>Daniel Hernández</first><last>López</last></author>
       <title>A Study of the Influence of Speech Type on Automatic Language Recognition Performance</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/174_Paper.pdf</url>
       <abstract>Automatic language recognition on spontaneous speech has experienced a rapid development in the last few years. This development has been in part due to the competitive technological Language Recognition Evaluations (LRE) organized by the National Institute of Standards and Technology (NIST). Until now, the need to have clearly defined and consistent evaluations has kept some real-life application issues out of these evaluations. In particular, all past NIST LREs have used exclusively conversational telephone speech (CTS) for development and test. Fortunately this has changed in the current NIST LRE since it includes also broadcast speech. However, for testing only the telephone speech found in broadcast data will be used. In real-life applications, there could be several more types of speech and systems could be forced to use a mix of different types of data for training and development and recognition. In this article, we have defined a test-bed including several types of speech data and have analyzed how a typical language recognition system works using different types of speech, and also a combination of different types of speech, for training and testing.</abstract>
@@ -1053,7 +1053,7 @@
     </paper>
     <paper id="116">
       <author><first>François</first><last>Lefebvre-Albaret</last></author>
-      <author><first>Patrice</first><last>Dalle</last></author>
+      <author id="patrice-dalle"><first>Patrice</first><last>Dalle</last></author>
       <title>Video Retrieval in Sign Language Videos : How to Model and Compare Signs?</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/176_Paper.pdf</url>
       <abstract>This paper deals with the problem of finding sign occurrences in a sign language (SL) video. It begins with an analysis of sign models and the way they can take into account the sign variability. Then, we review the most popular technics dedicated to automatic sign language processing and we focus on their adaptation to model sign variability. We present a new method to provide a parametric description of the sign as a set of continuous and discrete parameters. Signs are classified according to there categories (ballistic movements, circles ...), the symmetry between the hand movements, hand absolute and relative locations. Membership grades to sign categories and continuous parameter comparisons can be combined to estimate the similarity between two signs. We set out our system and we evaluate how much time can be saved when looking for a sign in a french sign language video. By now, our formalism only uses hand 2D locations, we finally discuss about the way of integrating other parameters as hand shape or facial expression in our framework.</abstract>
@@ -1116,7 +1116,7 @@
       <author><first>Sara</first><last>Tonelli</last></author>
       <author><first>Giuseppe</first><last>Riccardi</last></author>
       <author><first>Rashmi</first><last>Prasad</last></author>
-      <author><first>Aravind</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
       <title>Annotation of Discourse Relations for Conversational Spoken Dialogs</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/184_Paper.pdf</url>
       <abstract>In this paper, we make a qualitative and quantitative analysis of discourse relations within the LUNA conversational spoken dialog corpus. In particular, we first describe the Penn Discourse Treebank (PDTB) and then we detail the adaptation of its annotation scheme to the LUNA corpus of Italian task-oriented dialogs in the domain of software/hardware assistance. We discuss similarities and differences between our approach and the PDTB paradigm and point out the peculiarities of spontaneous dialogs w.r.t. written text, which motivated some changes in the annotation strategy. In particular, we introduced the annotation of relations between non-contiguous arguments and we modified the sense hierarchy in order to take into account the important role of pragmatics in dialogs. In the final part of the paper, we present a comparison between the sense and connective frequency in a representative subset of the LUNA corpus and in the PDTB. Such analysis confirmed the differences between the two corpora and corroborates our choice to introduce dialog-specific adaptations.</abstract>
@@ -1133,11 +1133,11 @@
     <paper id="125">
       <author><first>Ludovic</first><last>Quintard</last></author>
       <author><first>Olivier</first><last>Galibert</last></author>
-      <author><first>Gilles</first><last>Adda</last></author>
+      <author id="gilles-adda"><first>Gilles</first><last>Adda</last></author>
       <author><first>Brigitte</first><last>Grau</last></author>
       <author><first>Dominique</first><last>Laurent</last></author>
-      <author><first>Véronique</first><last>Moriceau</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="veronique-moriceau"><first>Véronique</first><last>Moriceau</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <author><first>Xavier</first><last>Tannier</last></author>
       <author><first>Anne</first><last>Vilnat</last></author>
       <title>Question Answering on Web Data: The <fixed-case>QA</fixed-case> Evaluation in Quæro</title>
@@ -1156,11 +1156,11 @@
     <paper id="127">
       <author><first>Olivier</first><last>Galibert</last></author>
       <author><first>Ludovic</first><last>Quintard</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
-      <author><first>Claire</first><last>Nédellec</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="claire-nedellec"><first>Claire</first><last>Nédellec</last></author>
       <author><first>Sophie</first><last>Aubin</last></author>
-      <author><first>Laurent</first><last>Gillard</last></author>
+      <author id="laurent-gillard"><first>Laurent</first><last>Gillard</last></author>
       <author><first>Jean-Pierre</first><last>Raysz</last></author>
       <author><first>Delphine</first><last>Pois</last></author>
       <author><first>Xavier</first><last>Tannier</last></author>
@@ -1192,7 +1192,7 @@
     </paper>
     <paper id="130">
       <author><first>Muhammad Kamran</first><last>Malik</last></author>
-      <author><first>Tafseer</first><last>Ahmed</last></author>
+      <author id="tafseer-ahmed"><first>Tafseer</first><last>Ahmed</last></author>
       <author><first>Sebastian</first><last>Sulger</last></author>
       <author><first>Tina</first><last>Bögel</last></author>
       <author><first>Atif</first><last>Gulzar</last></author>
@@ -1206,7 +1206,7 @@
     </paper>
     <paper id="131">
       <author><first>Volha</first><last>Petukhova</last></author>
-      <author><first>Harry</first><last>Bunt</last></author>
+      <author id="harry-bunt"><first>Harry</first><last>Bunt</last></author>
       <title>Towards an Integrated Scheme for Semantic Annotation of Multimodal Dialogue Data</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/195_Paper.pdf</url>
       <abstract>Recent years witness a growing interest in the use of multimodal data for modelling of communicative behaviour in dialogue. Dybkjaer and Bernsen (2002), point out that coding schemes for multimodal data are used solely by their creators. Standardisation has been achieved to some extent for coding behavioural features for certain nonverbal expressions, e.g. for facial expression, however, for the semantic annotation of such expressions combined with other modalities such as speech there is still a long way to go. The majority of existing dialogue act annotation schemes that are designed to code semantic and pragmatic dialogue information are limited to analysis of spoken modality. This paper investigates the applicability of existing dialogue act annotation schemes to the semantic annotation of multimodal data, and the way a dialogue act annotation scheme can be extended to cover dialogue phenomena from multiple modalities. The general conclusion of our explorative study is that a multidimensional dialogue act taxonomy is usable for this purpose when some adjustments are made. We proposed a solution for adding these aspects to a dialogue act annotation scheme without changing its set of communicative functions, in the form of qualifiers that can be attached to communicative function tags.</abstract>
@@ -1214,15 +1214,15 @@
     </paper>
     <paper id="132">
       <author><first>Cristina</first><last>Bosco</last></author>
-      <author><first>Simonetta</first><last>Montemagni</last></author>
-      <author><first>Alessandro</first><last>Mazzei</last></author>
+      <author id="simonetta-montemagni"><first>Simonetta</first><last>Montemagni</last></author>
+      <author id="alessandro-mazzei"><first>Alessandro</first><last>Mazzei</last></author>
       <author><first>Vincenzo</first><last>Lombardo</last></author>
       <author><first>Felice</first><last>Dell’Orletta</last></author>
       <author><first>Alessandro</first><last>Lenci</last></author>
-      <author><first>Leonardo</first><last>Lesmo</last></author>
+      <author id="leonardo-lesmo"><first>Leonardo</first><last>Lesmo</last></author>
       <author><first>Giuseppe</first><last>Attardi</last></author>
       <author><first>Maria</first><last>Simi</last></author>
-      <author><first>Alberto</first><last>Lavelli</last></author>
+      <author id="alberto-lavelli"><first>Alberto</first><last>Lavelli</last></author>
       <author><first>Johan</first><last>Hall</last></author>
       <author><first>Jens</first><last>Nilsson</last></author>
       <author><first>Joakim</first><last>Nivre</last></author>
@@ -1241,14 +1241,14 @@
     <paper id="134">
       <author><first>Hai</first><last>Zhao</last></author>
       <author><first>Yan</first><last>Song</last></author>
-      <author><first>Chunyu</first><last>Kit</last></author>
+      <author id="chunyu-kit"><first>Chunyu</first><last>Kit</last></author>
       <title>How Large a Corpus Do We Need: Statistical Method Versus Rule-based Method</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/199_Paper.pdf</url>
       <abstract>We investigate the impact of input data scale in corpus-based learning using a study style of Zipfs law. In our research, Chinese word segmentation is chosen as the study case and a series of experiments are specially conducted for it, in which two types of segmentation techniques, statistical learning and rule-based methods, are examined. The empirical results show that a linear performance improvement in statistical learning requires an exponential increasing of training corpus size at least. As for the rule-based method, an approximate negative inverse relationship between the performance and the size of the input lexicon can be observed.</abstract>
       <bibkey>zhao-etal-2010-large</bibkey>
     </paper>
     <paper id="135">
-      <author><first>Bolette S.</first><last>Pedersen</last></author>
+      <author id="bolette-sandford-pedersen"><first>Bolette S.</first><last>Pedersen</last></author>
       <author><first>Sanni</first><last>Nimb</last></author>
       <author><first>Anna</first><last>Braasch</last></author>
       <title>Merging Specialist Taxonomies and Folk Taxonomies in Wordnets - A case Study of Plants, Animals and Foods in the <fixed-case>D</fixed-case>anish <fixed-case>W</fixed-case>ordnet</title>
@@ -1258,14 +1258,14 @@
     </paper>
     <paper id="136">
       <author><first>Marta</first><last>Tatu</last></author>
-      <author><first>Dan</first><last>Moldovan</last></author>
+      <author id="dan-moldovan"><first>Dan</first><last>Moldovan</last></author>
       <title>Inducing Ontologies from Folksonomies using Natural Language Understanding</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/203_Paper.pdf</url>
       <abstract>Folksonomies are unsystematic, unsophisticated collections of keywords associated by social bookmarking users to web content and, despite their inconsistency problems (typographical errors, spelling variations, use of space or punctuation as delimiters, same tag applied in different context, synonymy of concepts, etc.), their popularity is increasing among Web 2.0 application developers. In this paper, in addition to eliminating folksonomic irregularities existing at the lexical, syntactic or semantic understanding levels, we propose an algorithm that automatically builds a semantic representation of the folksonomy by exploiting the tags, their social bookmarking associations (co-occuring tags) and, more importantly, the content of labeled documents. We derive the semantics of each tag, discover semantic links between the folksonomic tags and expose the underlying semantic structure of the folksonomy, thus, enabling a number of information discovery and ontology-based reasoning applications.</abstract>
       <bibkey>tatu-moldovan-2010-inducing</bibkey>
     </paper>
     <paper id="137">
-      <author><first>Orphée</first><last>De Clercq</last></author>
+      <author id="orphee-de-clercq"><first>Orphée</first><last>De Clercq</last></author>
       <author><first>Maribel Montero</first><last>Perez</last></author>
       <title>Data Collection and <fixed-case>IPR</fixed-case> in Multilingual Parallel Corpora. <fixed-case>D</fixed-case>utch Parallel Corpus</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/204_Paper.pdf</url>
@@ -1273,7 +1273,7 @@
       <bibkey>de-clercq-perez-2010-data</bibkey>
     </paper>
     <paper id="138">
-      <author><first>Agata</first><last>Cybulska</last></author>
+      <author id="agata-cybulska"><first>Agata</first><last>Cybulska</last></author>
       <author><first>Piek</first><last>Vossen</last></author>
       <title>Event Models for Historical Perspectives: Determining Relations between High and Low Level Events in Text, Based on the Classification of Time, Location and Participants.</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/205_Paper.pdf</url>
@@ -1297,7 +1297,7 @@
     <paper id="141">
       <author><first>Matthieu</first><last>Vernier</last></author>
       <author><first>Laura</first><last>Monceaux</last></author>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <title>Learning Subjectivity Phrases missing from Resources through a Large Set of Semantic Tests</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/208_Paper.pdf</url>
       <abstract>In recent years, blogs and social networks have particularly boosted interests for opinion mining research. In order to satisfy real-scale applicative needs, a main task is to create or to enhance lexical and semantic resources on evaluative language. Classical resources of the area are mostly built for english, they contain simple opinion word markers and are far to cover the lexical richness of this linguistic phenomenon. In particular, infrequent subjective words, idiomatic expressions, and cultural stereotypes are missing from resources. We propose a new method, applied on french, to enhance automatically an opinion word lexicon. This learning method relies on linguistic uses of internet users and on semantic tests to infer the degree of subjectivity of many new adjectives, nouns, verbs, noun phrases, verbal phrases which are usually forgotten by other resources. The final appraisal lexicon contains 3,456 entries. We evaluate the lexicon enhancement with and without textual context.</abstract>
@@ -1314,7 +1314,7 @@
     </paper>
     <paper id="143">
       <author><first>Bart</first><last>Desmet</last></author>
-      <author><first>Véronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Véronique</first><last>Hoste</last></author>
       <title>Towards a Balanced Named Entity Corpus for <fixed-case>D</fixed-case>utch</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/210_Paper.pdf</url>
       <abstract>This paper introduces a new named entity corpus for Dutch. State-of-the-art named entity recognition systems require a substantial annotated corpus to be trained on. Such corpora exist for English, but not for Dutch. The STEVIN-funded SoNaR project aims to produce a diverse 500-million-word reference corpus of written Dutch, with four semantic annotation layers: named entities, coreference relations, semantic roles and spatiotemporal expressions. A 1-million-word subset will be manually corrected. Named entity annotation guidelines for Dutch were developed, adapted from the MUC and ACE guidelines. Adaptations include the annotation of products and events, the classification into subtypes, and the markup of metonymic usage. Inter-annotator agreement experiments were conducted to corroborate the reliability of the guidelines, which yielded satisfactory results (Kappa scores above 0.90). We are building a NER system, trained on the 1-million-word subcorpus, to automatically classify the remainder of the SoNaR corpus. To this end, experiments with various classification algorithms (MBL, SVM, CRF) and features have been carried out and evaluated.</abstract>
@@ -1322,7 +1322,7 @@
     </paper>
     <paper id="144">
       <author><first>Grégory</first><last>Senay</last></author>
-      <author><first>Georges</first><last>Linarès</last></author>
+      <author id="georges-linares"><first>Georges</first><last>Linarès</last></author>
       <author><first>Benjamin</first><last>Lecouteux</last></author>
       <author><first>Stanislas</first><last>Oger</last></author>
       <author><first>Thierry</first><last>Michel</last></author>
@@ -1342,8 +1342,8 @@
     </paper>
     <paper id="146">
       <author><first>Ziqi</first><last>Zhang</last></author>
-      <author><first>José</first><last>Iria</last></author>
-      <author><first>Fabio</first><last>Ciravegna</last></author>
+      <author id="jose-iria"><first>José</first><last>Iria</last></author>
+      <author id="fabio-ciravegna"><first>Fabio</first><last>Ciravegna</last></author>
       <title>Improving Domain-specific Entity Recognition with Automatic Term Recognition and Feature Extraction</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/214_Paper.pdf</url>
       <abstract>Domain specific entity recognition often relies on domain-specific knowledge to improve system performance. However, such knowledge often suffers from limited domain portability and is expensive to build and maintain. Therefore, obtaining it in a generic and unsupervised manner would be a desirable feature for domain-specific entity recognition systems. In this paper, we introduce an approach that exploits domain-specificity of words as a form of domain-knowledge for entity-recognition tasks. Compared to prior work in the field, our approach is generic and completely unsupervised. We empirically show an improvement in entity extraction accuracy when features derived by our unsupervised method are used, with respect to baseline methods that do not employ domain knowledge. We also compared the results against those of existing systems that use manually crafted domain knowledge, and found them to be competitive.</abstract>
@@ -1359,10 +1359,10 @@
     </paper>
     <paper id="148">
       <author><first>Giuseppe</first><last>Attardi</last></author>
-      <author><first>Stefano Dei</first><last>Rossi</last></author>
+      <author id="stefano-dei-rossi"><first>Stefano Dei</first><last>Rossi</last></author>
       <author><first>Giulia</first><last>Di Pietro</last></author>
       <author><first>Alessandro</first><last>Lenci</last></author>
-      <author><first>Simonetta</first><last>Montemagni</last></author>
+      <author id="simonetta-montemagni"><first>Simonetta</first><last>Montemagni</last></author>
       <author><first>Maria</first><last>Simi</last></author>
       <title>A Resource and Tool for Super-sense Tagging of <fixed-case>I</fixed-case>talian Texts</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/216_Paper.pdf</url>
@@ -1370,9 +1370,9 @@
       <bibkey>attardi-etal-2010-resource</bibkey>
     </paper>
     <paper id="149">
-      <author><first>Izaskun</first><last>Aldezabal</last></author>
+      <author id="izaskun-aldezabal"><first>Izaskun</first><last>Aldezabal</last></author>
       <author><first>María Jesús</first><last>Aranzabe</last></author>
-      <author><first>Arantza</first><last>Díaz de Ilarraza</last></author>
+      <author id="arantza-diaz-de-ilarraza"><first>Arantza</first><last>Díaz de Ilarraza</last></author>
       <author><first>Ainara</first><last>Estarrona</last></author>
       <title>Building the <fixed-case>B</fixed-case>asque <fixed-case>P</fixed-case>rop<fixed-case>B</fixed-case>ank</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/217_Paper.pdf</url>
@@ -1392,8 +1392,8 @@
     <paper id="151">
       <author><first>Nikos</first><last>Tsourakis</last></author>
       <author><first>Agnes</first><last>Lisowska</last></author>
-      <author><first>Manny</first><last>Rayner</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <title>Examining the Effects of Rephrasing User Input on Two Mobile Spoken Language Systems</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/220_Paper.pdf</url>
       <abstract>During the construction of a spoken dialogue system much effort is spent on improving the quality of speech recognition as possible. However, even if an application perfectly recognizes the input, its understanding may be far from what the user originally meant. The user should be informed about what the system actually understood so that an error will not have a negative impact in the later stages of the dialogue. One important aspect that this work tries to address is the effect of presenting the systems understanding during interaction with users. We argue that for specific kinds of applications its important to confirm the understanding of the system before obtaining the output. In this way the user can avoid misconceptions and problems occurring in the dialogue flow and he can enhance his confidence in the system. Nevertheless this has an impact on the interaction, as the mental workload increases, and the users behavior may adapt to the systems coverage. We focus on two applications that implement the notion of rephrasing users input in a different way. Our study took place among 14 subjects that used both systems on a Nokia N810 Internet Tablet.</abstract>
@@ -1401,10 +1401,10 @@
     </paper>
     <paper id="152">
       <author><first>Samuel</first><last>Reese</last></author>
-      <author><first>Gemma</first><last>Boleda</last></author>
+      <author id="gemma-boleda"><first>Gemma</first><last>Boleda</last></author>
       <author><first>Montse</first><last>Cuadros</last></author>
-      <author><first>Lluís</first><last>Padró</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="lluis-padro"><first>Lluís</first><last>Padró</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <title><fixed-case>W</fixed-case>ikicorpus: A Word-Sense Disambiguated Multilingual <fixed-case>W</fixed-case>ikipedia Corpus</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/222_Paper.pdf</url>
       <abstract>This article presents a new freely available trilingual corpus (Catalan, Spanish, English) that contains large portions of the Wikipedia and has been automatically enriched with linguistic information. To our knowledge, this is the largest such corpus that is freely available to the community: In its present version, it contains over 750 million words. The corpora have been annotated with lemma and part of speech information using the open source library FreeLing. Also, they have been sense annotated with the state of the art Word Sense Disambiguation algorithm UKB. As UKB assigns WordNet senses, and WordNet has been aligned across languages via the InterLingual Index, this sort of annotation opens the way to massive explorations in lexical semantics that were not possible before. We present a first attempt at creating a trilingual lexical resource from the sense-tagged Wikipedia corpora, namely, WikiNet. Moreover, we present two by-products of the project that are of use for the NLP community: An open source Java-based parser for Wikipedia pages developed for the construction of the corpus, and the integration of the WSD algorithm UKB in FreeLing.</abstract>
@@ -1420,10 +1420,10 @@
     </paper>
     <paper id="154">
       <author><first>Maxim</first><last>Khalilov</last></author>
-      <author><first>José A. R.</first><last>Fonollosa</last></author>
-      <author><first>Inguna</first><last>Skadin̨a</last></author>
+      <author id="jose-a-r-fonollosa"><first>José A. R.</first><last>Fonollosa</last></author>
+      <author id="inguna-skadina"><first>Inguna</first><last>Skadin̨a</last></author>
       <author><first>Edgars</first><last>Brālītis</last></author>
-      <author><first>Lauma</first><last>Pretkalnin̨a</last></author>
+      <author id="lauma-pretkalnina"><first>Lauma</first><last>Pretkalnin̨a</last></author>
       <title>Towards Improving <fixed-case>E</fixed-case>nglish-<fixed-case>L</fixed-case>atvian Translation: A System Comparison and a New Rescoring Feature</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/228_Paper.pdf</url>
       <abstract>Translation into the languages with relatively free word order has received a lot less attention than translation into fixed word order languages (English), or into analytical languages (Chinese). At the same time this translation task is found among the most difficult challenges for machine translation (MT), and intuitively it seems that there is some space in improvement intending to reflect the free word order structure of the target language. This paper presents a comparative study of two alternative approaches to statistical machine translation (SMT) and their application to a task of English-to-Latvian translation. Furthermore, a novel feature intending to reflect the relatively free word order scheme of the Latvian language is proposed and successfully applied on the n-best list rescoring step. Moving beyond classical automatic scores of translation quality that are classically presented in MT research papers, we contribute presenting a manual error analysis of MT systems output that helps to shed light on advantages and disadvantages of the SMT systems under consideration.</abstract>
@@ -1439,9 +1439,9 @@
       <bibkey>sidorov-etal-2010-english</bibkey>
     </paper>
     <paper id="156">
-      <author><first>Fernando</first><last>Fernández-Martínez</last></author>
-      <author><first>Juan Manuel</first><last>Lucas-Cuesta</last></author>
-      <author><first>Roberto Barra</first><last>Chicote</last></author>
+      <author id="fernando-fernandez-martinez"><first>Fernando</first><last>Fernández-Martínez</last></author>
+      <author id="juan-manuel-lucas-cuesta"><first>Juan Manuel</first><last>Lucas-Cuesta</last></author>
+      <author id="roberto-barra-chicote"><first>Roberto Barra</first><last>Chicote</last></author>
       <author><first>Javier</first><last>Ferreiros</last></author>
       <author><first>Javier</first><last>Macías-Guarasa</last></author>
       <title><fixed-case>HIFI</fixed-case>-<fixed-case>AV</fixed-case>: An Audio-visual Corpus for Spoken Language Human-Machine Dialogue Research in <fixed-case>S</fixed-case>panish</title>
@@ -1458,7 +1458,7 @@
     </paper>
     <paper id="158">
       <author><first>Dekang</first><last>Lin</last></author>
-      <author><first>Kenneth</first><last>Church</last></author>
+      <author id="kenneth-church"><first>Kenneth</first><last>Church</last></author>
       <author><first>Heng</first><last>Ji</last></author>
       <author><first>Satoshi</first><last>Sekine</last></author>
       <author><first>David</first><last>Yarowsky</last></author>
@@ -1476,9 +1476,9 @@
     </paper>
     <paper id="159">
       <author><first>Eric</first><last>Auer</last></author>
-      <author><first>Albert</first><last>Russel</last></author>
+      <author id="albert-russel"><first>Albert</first><last>Russel</last></author>
       <author><first>Han</first><last>Sloetjes</last></author>
-      <author><first>Peter</first><last>Wittenburg</last></author>
+      <author id="peter-wittenburg"><first>Peter</first><last>Wittenburg</last></author>
       <author><first>Oliver</first><last>Schreer</last></author>
       <author><first>S.</first><last>Masnieri</last></author>
       <author><first>Daniel</first><last>Schneider</last></author>
@@ -1490,9 +1490,9 @@
     </paper>
     <paper id="160">
       <author><first>Damjan</first><last>Vlaj</last></author>
-      <author><first>Aleksandra Zögling</first><last>Markuš</last></author>
+      <author id="aleksandra-zogling-markus"><first>Aleksandra Zögling</first><last>Markuš</last></author>
       <author><first>Marko</first><last>Kos</last></author>
-      <author><first>Zdravko</first><last>Kačič</last></author>
+      <author id="zdravko-kacic"><first>Zdravko</first><last>Kačič</last></author>
       <title>Acquisition and Annotation of <fixed-case>S</fixed-case>lovenian <fixed-case>L</fixed-case>ombard Speech Database</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/235_Paper.pdf</url>
       <abstract>This paper presents the acquisition and annotation of Slovenian Lombard Speech Database, the recording of which started in the year 2008. The database was recorded at the University of Maribor, Slovenia. The goal of this paper is to describe the hardware platform used for the acquisition of speech material, recording scenarios and tools used for the annotation of Slovenian Lombard Speech Database. The database consists of recordings of 10 Slovenian native speakers. Five males and five females were recorded. Each speaker pronounced a set of eight corpuses in two recording sessions with at least one week pause between recordings. The structure of the corpus is similar to SpeechDat II database. Approximately 30 minutes of speech material per speaker and per session was recorded. The manual annotation of speech material is performed with the LombardSpeechLabel tool developed at the University of Maribor. The speech and annotation material was saved on 10 DVDs (one speaker on one DVD).</abstract>
@@ -1516,7 +1516,7 @@
     </paper>
     <paper id="163">
       <author><first>Lun-Wei</first><last>Ku</last></author>
-      <author><first>Ting-Hao</first><last>Huang</last></author>
+      <author id="ting-hao-huang"><first>Ting-Hao</first><last>Huang</last></author>
       <author><first>Hsin-Hsi</first><last>Chen</last></author>
       <title>Construction of a <fixed-case>C</fixed-case>hinese Opinion Treebank</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/242_Paper.pdf</url>
@@ -1526,7 +1526,7 @@
     <paper id="164">
       <author><first>Takeshi</first><last>Abekawa</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Kyo</first><last>Kageura</last></author>
       <title>Community-based Construction of Draft and Final Translation Corpus Through a Translation Hosting Site Minna no Hon’yaku (<fixed-case>MNH</fixed-case>)</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/243_Paper.pdf</url>
@@ -1535,8 +1535,8 @@
     </paper>
     <paper id="165">
       <author><first>Vamshi</first><last>Ambati</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
       <title>Active Learning and Crowd-Sourcing for Machine Translation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/244_Paper.pdf</url>
       <abstract>Large scale parallel data generation for new language pairs requires intensive human effort and availability of experts. It becomes immensely difficult and costly to provide Statistical Machine Translation (SMT) systems for most languages due to the paucity of expert translators to provide parallel data. Even if experts are present, it appears infeasible due to the impending costs. In this paper we propose Active Crowd Translation (ACT), a new paradigm where active learning and crowd-sourcing come together to enable automatic translation for low-resource language pairs. Active learning aims at reducing cost of label acquisition by prioritizing the most informative data for annotation, while crowd-sourcing reduces cost by using the power of the crowds to make do for the lack of expensive language experts. We experiment and compare our active learning strategies with strong baselines and see significant improvements in translation quality. Similarly, our experiments with crowd-sourcing on Mechanical Turk have shown that it is possible to create parallel corpora using non-experts and with sufficient quality assurance, a translation system that is trained using this corpus approaches expert quality.</abstract>
@@ -1551,7 +1551,7 @@
       <bibkey>dalianis-velupillai-2010-certain</bibkey>
     </paper>
     <paper id="167">
-      <author><first>Winston</first><last>Anderson</last></author>
+      <author id="winston-n-anderson"><first>Winston</first><last>Anderson</last></author>
       <author><first>Laurette</first><last>Pretorius</last></author>
       <author><first>Albert</first><last>Kotzé</last></author>
       <title>Base Concepts in the <fixed-case>A</fixed-case>frican Languages Compared to Upper Ontologies and the <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Top Ontology</title>
@@ -1563,7 +1563,7 @@
       <author><first>Keyan</first><last>Zhou</last></author>
       <author><first>Aijun</first><last>Li</last></author>
       <author><first>Zhigang</first><last>Yin</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <title><fixed-case>CASIA</fixed-case>-<fixed-case>CASSIL</fixed-case>: a <fixed-case>C</fixed-case>hinese Telephone Conversation Corpus in Real Scenarios with Multi-leveled Annotation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/248_Paper.pdf</url>
       <abstract>CASIA-CASSIL is a large-scale corpus base of Chinese human-human naturally-occurring telephone conversations in restricted domains. The first edition consists of 792 90-second conversations belonging to tourism domain, which are selected from 7,639 spontaneous telephone recordings in real scenarios. The corpus is now being annotated with wide range of linguistic and paralinguistic information in multi-levels. The annotations include Turns, Speaker Gender, Orthographic Transcription, Chinese Syllable, Chinese Phonetic Transcription, Prosodic Boundary, Stress of Sentence, Non-Speech Sounds, Voice Quality, Topic, Dialog-act and Adjacency Pairs, Ill-formedness, and Expressive Emotion as well, 13 levels in total. The abundant annotation will be effective especially for studying Chinese spoken language phenomena. This paper describes the whole process to build the conversation corpus, including collecting and selecting the original data, and the follow-up process such as transcribing, annotating, and so on. CASIA-CASSIL is being extended to a large scale corpus base of annotated Chinese dialogs for spoken Chinese study.</abstract>
@@ -1580,7 +1580,7 @@
     </paper>
     <paper id="170">
       <author><first>Ruud</first><last>Koolen</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <title>The <fixed-case>D</fixed-case>-<fixed-case>TUNA</fixed-case> Corpus: A <fixed-case>D</fixed-case>utch Dataset for the Evaluation of Referring Expression Generation Algorithms</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/251_Paper.pdf</url>
       <abstract>We present the D-TUNA corpus, which is the first semantically annotated corpus of referring expressions in Dutch. Its primary function is to evaluate and improve the performance of REG algorithms. Such algorithms are computational models that automatically generate referring expressions by computing how a specific target can be identified to an addressee by distinguishing it from a set of distractor objects. We performed a large-scale production experiment, in which participants were asked to describe furniture items and people, and provided all descriptions with semantic information regarding the target and the distractor objects. Besides being useful for evaluating REG algorithms, the corpus addresses several other research goals. Firstly, the corpus contains both written and spoken referring expressions uttered in the direction of an addressee, which enables systematic analyses of how modality (text or speech) influences the human production of referring expressions. Secondly, due to its comparability with the English TUNA corpus, our Dutch corpus can be used to explore the differences between Dutch and English speakers regarding the production of referring expressions.</abstract>
@@ -1588,9 +1588,9 @@
     </paper>
     <paper id="171">
       <author><first>Aina</first><last>Peris</last></author>
-      <author><first>Mariona</first><last>Taulé</last></author>
-      <author><first>Gemma</first><last>Boleda</last></author>
-      <author><first>Horacio</first><last>Rodríguez</last></author>
+      <author id="mariona-taule"><first>Mariona</first><last>Taulé</last></author>
+      <author id="gemma-boleda"><first>Gemma</first><last>Boleda</last></author>
+      <author id="horacio-rodriguez"><first>Horacio</first><last>Rodríguez</last></author>
       <title><fixed-case>ADN</fixed-case>-Classifier:Automatically Assigning Denotation Types to Nominalizations</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/252_Paper.pdf</url>
       <abstract>This paper presents the ADN-Classifier, an Automatic classification system of Spanish Deverbal Nominalizations aimed at identifying its semantic denotation (i.e. event, result, underspecified, or lexicalized). The classifier can be used for NLP tasks such as coreference resolution or paraphrase detection. To our knowledge, the ADN-Classifier is the first effort in acquisition of denotations for nominalizations using Machine Learning. We compare the results of the classifier when using a decreasing number of Knowledge Sources, namely (1) the complete nominal lexicon (AnCora-Nom) that includes sense distictions, (2) the nominal lexicon (AnCora-Nom) removing the sense-specific information, (3) nominalizations context information obtained from a treebank corpus (AnCora-Es) and (4) the combination of the previous linguistic resources. In a realistic scenario, that is, without sense distinction, the best results achieved are those taking into account the information declared in the lexicon (89.40% accuracy). This shows that the lexicon contains crucial information (such as argument structure) that corpus-derived features cannot substitute for.</abstract>
@@ -1615,8 +1615,8 @@
     </paper>
     <paper id="174">
       <author><first>Natalie D.</first><last>Snoeren</last></author>
-      <author><first>Martine</first><last>Adda-Decker</last></author>
-      <author><first>Gilles</first><last>Adda</last></author>
+      <author id="martine-adda-decker"><first>Martine</first><last>Adda-Decker</last></author>
+      <author id="gilles-adda"><first>Gilles</first><last>Adda</last></author>
       <title>The Study of Writing Variants in an Under-resourced Language: Some Evidence from Mobile N-Deletion in <fixed-case>L</fixed-case>uxembourgish</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/258_Paper.pdf</url>
       <abstract>The national language of the Grand-Duchy of Luxembourg, Luxembourgish, has often been characterized as one of Europe's under-described and under-resourced languages. Because of a limited written production of Luxembourgish, poorly observed writing standardization (as compared to other languages such as English and French) and a large diversity of spoken varieties, the study of Luxembourgish poses many interesting challenges to automatic speech processing studies as well as to linguistic enquiries. In the present paper, we make use of large corpora to focus on typical writing and derived pronunciation variants in Luxembourgish, elicited by mobile -n deletion (hereafter shortened to MND). Using transcriptions from the House of Parliament debates and 10k words from news reports, we examine the reality of MND variants in written transcripts of speech. The goal of this study is manyfold: quantify the potential of variation due to MND in written Luxembourgish, check the mandatory status of the MND rule and discuss the arising problems for automatic spoken Luxembourgish processing.</abstract>
@@ -1634,7 +1634,7 @@
       <author><first>Yuki</first><last>Kamiya</last></author>
       <author><first>Tomohiro</first><last>Ohno</last></author>
       <author><first>Shigeki</first><last>Matsubara</last></author>
-      <author><first>Hideki</first><last>Kashioka</last></author>
+      <author id="hideki-kashioka"><first>Hideki</first><last>Kashioka</last></author>
       <title>Construction of Back-Channel Utterance Corpus for Responsive Spoken Dialogue System Development</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/260_Paper.pdf</url>
       <abstract>In spoken dialogues, if a spoken dialogue system does not respond at all during users utterances, the user might feel uneasy because the user does not know whether or not the system has recognized the utterances. In particular, back-channel utterances, which the system outputs as voices such as yeah and uh huh in English have important roles for a driver in in-car speech dialogues because the driver does not look owards a listener while driving. This paper describes construction of a back-channel utterance corpus and its analysis to develop the system which can output back-channel utterances at the proper timing in the responsive in-car speech dialogue. First, we constructed the back-channel utterance corpus by integrating the back-channel utterances that four subjects provided for the drivers utterances in 60 dialogues in the CIAIR in-car speech dialogue corpus. Next, we analyzed the corpus and revealed the relation between back-channel utterance timings and information on bunsetsu, clause, pause and rate of speech. Based on the analysis, we examined the possibility of detecting back-channel utterance timings by machine learning technique. As the result of the experiment, we confirmed that our technique achieved as same detection capability as a human.</abstract>
@@ -1643,9 +1643,9 @@
     <paper id="177">
       <author><first>Marina B.</first><last>Ruiter</last></author>
       <author><first>Toni C. M.</first><last>Rietveld</last></author>
-      <author><first>Catia</first><last>Cucchiarini</last></author>
-      <author><first>Emiel J.</first><last>Krahmer</last></author>
-      <author><first>Helmer</first><last>Strik</last></author>
+      <author id="catia-cucchiarini"><first>Catia</first><last>Cucchiarini</last></author>
+      <author id="emiel-krahmer"><first>Emiel J.</first><last>Krahmer</last></author>
+      <author id="helmer-strik"><first>Helmer</first><last>Strik</last></author>
       <title>Human Language Technology and Communicative Disabilities: Requirements and Possibilities for the Future</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/261_Paper.pdf</url>
       <abstract>For some years now, the Nederlandse Taalunie (Dutch Language Union) has been active in promoting the development of human language technology (HLT) applications for users of Dutch with communication disabilities. The reason is that HLT products and services may enable these users to improve their verbal autonomy and communication skills. We sought to identify a minimum common set of HLT resources that is required to develop tools for a wide range of communication disabilities. In order to reach this goal, we investigated the specific HLT needs of communicatively disabled people and related these needs to the underlying HLT software components. By analysing the availability and quality of these essential HLT resources, we were able to identify which of the crucial elements need further research and development to become usable for developing applications for communicatively disabled users of Dutch. The results obtained in the current survey can be used to inform policy institutions on how they can stimulate the development of HLT resources for this target group. In the current study results were obtained for Dutch, but a similar approach can also be used for other languages.</abstract>
@@ -1671,14 +1671,14 @@
     </paper>
     <paper id="180">
       <author><first>Verena</first><last>Henrich</last></author>
-      <author><first>Erhard</first><last>Hinrichs</last></author>
+      <author id="erhard-hinrichs"><first>Erhard</first><last>Hinrichs</last></author>
       <title><fixed-case>G</fixed-case>ern<fixed-case>E</fixed-case>di<fixed-case>T</fixed-case> - The <fixed-case>G</fixed-case>erma<fixed-case>N</fixed-case>et Editing Tool</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/264_Paper.pdf</url>
       <abstract>This paper introduces GernEdiT (short for: GermaNet Editing Tool), a new graphical user interface for the lexicographers and developers of GermaNet, the German version of the Princeton WordNet. GermaNet is a lexical-semantic net that relates German nouns, verbs, and adjectives. Traditionally, lexicographic work for extending the coverage of GermaNet utilized the Princeton WordNet development environment of lexicographer files. Due to a complex data format and no opportunity of automatic consistency checks, this process was very error prone and time consuming. The GermaNet Editing Tool GernEdiT was developed to overcome these shortcomings. The main purposes of the GernEdiT tool are, besides supporting lexicographers to access, modify, and extend GermaNet data in an easy and adaptive way, as follows: Replace the standard editing tools by a more user-friendly tool, use a relational database as data storage, support export formats in the form of XML, and facilitate internal consistency and correctness of the linguistic resource. All these core functionalities of GernEdiT along with the main aspects of the underlying lexical resource GermaNet and its current database format are presented in this paper.</abstract>
       <bibkey>henrich-hinrichs-2010-gernedit</bibkey>
     </paper>
     <paper id="181">
-      <author><first>Erhard</first><last>Hinrichs</last></author>
+      <author id="erhard-hinrichs"><first>Erhard</first><last>Hinrichs</last></author>
       <author><first>Verena</first><last>Henrich</last></author>
       <author><first>Thomas</first><last>Zastrow</last></author>
       <title>Sustainability of Linguistic Data and Analysis in the Context of a Collaborative e<fixed-case>S</fixed-case>cience Environment</title>
@@ -1704,9 +1704,9 @@
       <bibkey>gorog-vossen-2010-computer</bibkey>
     </paper>
     <paper id="184">
-      <author><first>Marie</first><last>Hinrichs</last></author>
+      <author id="marie-hinrichs"><first>Marie</first><last>Hinrichs</last></author>
       <author><first>Thomas</first><last>Zastrow</last></author>
-      <author><first>Erhard</first><last>Hinrichs</last></author>
+      <author id="erhard-hinrichs"><first>Erhard</first><last>Hinrichs</last></author>
       <title><fixed-case>W</fixed-case>eb<fixed-case>L</fixed-case>icht: Web-based <fixed-case>LRT</fixed-case> Services in a Distributed e<fixed-case>S</fixed-case>cience Infrastructure</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/270_Paper.pdf</url>
       <abstract>eScience - enhanced science - is a new paradigm of scientific work and research. In the humanities, eScience environments can be helpful in establishing new workflows and lifecycles of scientific data. WebLicht is such an eScience environment for linguistic analysis, making linguistic tools and resources available network-wide. Today, most digital language resources and tools (LRT) are available by download only. This is inconvenient for someone who wants to use and combine several tools because these tools are normally not compatible with each other. To overcome this restriction, WebLicht makes the functionality of linguistic tools and the resources themselves available via the internet as web services. In WebLicht, several kinds of linguistic tools are available which cover the basic functionality of automatic and incremental creation of annotated text corpora. To make use of the more than 70 tools and resources currently available, the end user needs nothing more than just a common web browser.</abstract>
@@ -1722,28 +1722,28 @@
     </paper>
     <paper id="186">
       <author><first>Diana</first><last>Santos</last></author>
-      <author><first>Luís Miguel</first><last>Cabral</last></author>
-      <author><first>Corina</first><last>Forascu</last></author>
+      <author id="luis-miguel-cabral"><first>Luís Miguel</first><last>Cabral</last></author>
+      <author id="corina-forascu"><first>Corina</first><last>Forascu</last></author>
       <author><first>Pamela</first><last>Forner</last></author>
       <author><first>Fredric</first><last>Gey</last></author>
       <author><first>Katrin</first><last>Lamm</last></author>
       <author><first>Thomas</first><last>Mandl</last></author>
       <author><first>Petya</first><last>Osenova</last></author>
-      <author><first>Anselmo</first><last>Peñas</last></author>
-      <author><first>Álvaro</first><last>Rodrigo</last></author>
-      <author><first>Julia</first><last>Schulz</last></author>
+      <author id="anselmo-penas"><first>Anselmo</first><last>Peñas</last></author>
+      <author id="alvaro-rodrigo"><first>Álvaro</first><last>Rodrigo</last></author>
+      <author id="julia-maria-schulz"><first>Julia</first><last>Schulz</last></author>
       <author><first>Yvonne</first><last>Skalban</last></author>
-      <author><first>Erik</first><last>Tjong Kim Sang</last></author>
+      <author id="erik-tjong-kim-sang"><first>Erik</first><last>Tjong Kim Sang</last></author>
       <title><fixed-case>G</fixed-case>iki<fixed-case>CLEF</fixed-case>: Crosscultural Issues in Multilingual Information Access</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/272_Paper.pdf</url>
       <abstract>In this paper we describe GikiCLEF, the first evaluation contest that, to our knowledge, was specifically designed to expose and investigate cultural and linguistic issues involved in structured multimedia collections and searching, and which was organized under the scope of CLEF 2009. GikiCLEF evaluated systems that answered hard questions for both human and machine, in ten different Wikipedia collections, namely Bulgarian, Dutch, English, German, Italian, Norwegian (Bokmäl and Nynorsk), Portuguese, Romanian, and Spanish. After a short historical introduction, we present the task, together with its motivation, and discuss how the topics were chosen. Then we provide another description from the point of view of the participants. Before disclosing their results, we introduce the SIGA management system explaining the several tasks which were carried out behind the scenes. We quantify in turn the GIRA resource, offered to the community for training and further evaluating systems with the help of the 50 topics gathered and the solutions identified. We end the paper with a critical discussion of what was learned, advancing possible ways to reuse the data.</abstract>
       <bibkey>santos-etal-2010-gikiclef</bibkey>
     </paper>
     <paper id="187">
-      <author><first>Dieter</first><last>Van Uytvanck</last></author>
+      <author id="dieter-van-uytvanck"><first>Dieter</first><last>Van Uytvanck</last></author>
       <author><first>Claus</first><last>Zinn</last></author>
-      <author><first>Daan</first><last>Broeder</last></author>
-      <author><first>Peter</first><last>Wittenburg</last></author>
+      <author id="daan-broeder"><first>Daan</first><last>Broeder</last></author>
+      <author id="peter-wittenburg"><first>Peter</first><last>Wittenburg</last></author>
       <author><first>Mariano</first><last>Gardellini</last></author>
       <title>Virtual Language Observatory: The Portal to the Language Resources and Technology Universe</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/273_Paper.pdf</url>
@@ -1765,8 +1765,8 @@
     <paper id="189">
       <author><first>Werner</first><last>Spiegl</last></author>
       <author><first>Korbinian</first><last>Riedhammer</last></author>
-      <author><first>Stefan</first><last>Steidl</last></author>
-      <author><first>Elmar</first><last>Nöth</last></author>
+      <author id="stefan-steidl"><first>Stefan</first><last>Steidl</last></author>
+      <author id="elmar-noth"><first>Elmar</first><last>Nöth</last></author>
       <title><fixed-case>FAU</fixed-case> <fixed-case>IISAH</fixed-case> Corpus – A <fixed-case>G</fixed-case>erman Speech Database Consisting of Human-Machine and Human-Human Interaction Acquired by Close-Talking and Far-Distance Microphones</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/275_Paper.pdf</url>
       <abstract>In this paper the FAU IISAH corpus and its recording conditions are described: a new speech database consisting of human-machine and human-human interaction recordings. Beside close-talking microphones for the best possible audio quality of the recorded speech, far-distance microphones were used to acquire the interaction and communication. The recordings took place during a Wizard-of-Oz experiment in the intelligent, senior-adapted house (ISA-House). That is a living room with a speech controlled home assistance system for elderly people, based on a dialogue system, which is able to process spontaneous speech. During the studies in the ISA-House more than eight hours of interaction data were recorded including 3 hours and 27 minutes of spontaneous speech. The data were annotated in terms of human-human (off-talk) and human-machine (on-talk) interaction. The test persons used 2891 turns of off-talk and 2752 turns of on-talk including 1751 different words. Still in progress is the analysis under statistical and linguistical aspects.</abstract>
@@ -1789,8 +1789,8 @@
     </paper>
     <paper id="192">
       <author><first>Kais</first><last>Dukes</last></author>
-      <author><first>Eric</first><last>Atwell</last></author>
-      <author><first>Abdul-Baquee M.</first><last>Sharaf</last></author>
+      <author id="eric-atwell"><first>Eric</first><last>Atwell</last></author>
+      <author id="abdul-baquee-sharaf"><first>Abdul-Baquee M.</first><last>Sharaf</last></author>
       <title>Syntactic Annotation Guidelines for the <fixed-case>Q</fixed-case>uranic <fixed-case>A</fixed-case>rabic Dependency Treebank</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/278_Paper.pdf</url>
       <abstract>The Quranic Arabic Dependency Treebank (QADT) is part of the Quranic Arabic Corpus (<url>http://corpus.quran.com</url>), an online linguistic resource organized by the University of Leeds, and developed through online collaborative annotation. The website has become a popular study resource for Arabic and the Quran, and is now used by over 1,500 researchers and students daily. This paper presents the treebank, explains the choice of syntactic representation, and highlights key parts of the annotation guidelines. The text being analyzed is the Quran, the central religious book of Islam, written in classical Quranic Arabic (c. 600 CE). To date, all 77,430 words of the Quran have a manually verified morphological analysis, and syntactic analysis is in progress. 11,000 words of Quranic Arabic have been syntactically annotated as part of a gold standard treebank. Annotation guidelines are especially important to promote consistency for a corpus which is being developed through online collaboration, since often many people will participate from different backgrounds and with different levels of linguistic expertise. The treebank is available online for collaborative correction to improve accuracy, with suggestions reviewed by expert Arabic linguists, and compared against existing published books of Quranic Syntax.</abstract>
@@ -1798,7 +1798,7 @@
     </paper>
     <paper id="193">
       <author><first>Tommi</first><last>Vatanen</last></author>
-      <author><first>Jaakko J.</first><last>Väyrynen</last></author>
+      <author id="jaakko-vayrynen"><first>Jaakko J.</first><last>Väyrynen</last></author>
       <author><first>Sami</first><last>Virpioja</last></author>
       <title>Language Identification of Short Text Segments with N-gram Models</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/279_Paper.pdf</url>
@@ -1806,8 +1806,8 @@
       <bibkey>vatanen-etal-2010-language</bibkey>
     </paper>
     <paper id="194">
-      <author><first>Joseph</first><last>Polifroni</last></author>
-      <author><first>Imre</first><last>Kiss</last></author>
+      <author id="joseph-polifroni"><first>Joseph</first><last>Polifroni</last></author>
+      <author id="imre-kiss"><first>Imre</first><last>Kiss</last></author>
       <author><first>Mark</first><last>Adler</last></author>
       <title>Bootstrapping Named Entity Extraction for the Creation of Mobile Services</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/280_Paper.pdf</url>
@@ -1817,7 +1817,7 @@
     <paper id="195">
       <author><first>Bert</first><last>Réveil</last></author>
       <author><first>Jean-Pierre</first><last>Martens</last></author>
-      <author><first>Henk</first><last>van den Heuvel</last></author>
+      <author id="henk-van-den-heuvel"><first>Henk</first><last>van den Heuvel</last></author>
       <title>Improving Proper Name Recognition by Adding Automatically Learned Pronunciation Variants to the Lexicon</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/281_Paper.pdf</url>
       <abstract>This paper deals with the task of large vocabulary proper name recognition. In order to accomodate a wide diversity of possible name pronunciations (due to non-native name origins or speaker tongues) a multilingual acoustic model is combined with a lexicon comprising 3 grapheme-to-phoneme (G2P) transcriptions from G2P transcribers for 3 different languages) and up to 4 so-called phoneme-to-phoneme (P2P) transcriptions. The latter are generated with (speaker tongue, name source) specific P2P converters that try to transform a set of baseline name transcriptions into a pool of transcription variants that lie closer to the `true name pronunciations. The experimental results show that the generated P2P variants can be employed to improve name recognition, and that the obtained accuracy is comparable to what is achieved with typical (TY) transcriptions (made by a human expert). Furthermore, it is demonstrated that the P2P conversion can best be instantiated from a baseline transcription in the name source language, and that knowledge of the speaker tongue is an important input as well for the P2P transcription process.</abstract>
@@ -1825,7 +1825,7 @@
     </paper>
     <paper id="196">
       <author><first>Majdi</first><last>Sawalha</last></author>
-      <author><first>Eric</first><last>Atwell</last></author>
+      <author id="eric-atwell"><first>Eric</first><last>Atwell</last></author>
       <title>Fine-Grain Morphological Analyzer and Part-of-Speech Tagger for <fixed-case>A</fixed-case>rabic Text</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/282_Paper.pdf</url>
       <abstract>Morphological analyzers and part-of-speech taggers are key technologies for most text analysis applications. Our aim is to develop a part-of-speech tagger for annotating a wide range of Arabic text formats, domains and genres including both vowelized and non-vowelized text. Enriching the text with linguistic analysis will maximize the potential for corpus re-use in a wide range of applications. We foresee the advantage of enriching the text with part-of-speech tags of very fine-grained grammatical distinctions, which reflect expert interest in syntax and morphology, but not specific needs of end-users, because end-user applications are not known in advance. In this paper we review existing Arabic Part-of-Speech Taggers and tag-sets, and illustrate four different Arabic PoS tag-sets for a sample of Arabic text from the Quran. We describe the detailed fine-grained morphological feature tag set of Arabic, and the fine-grained Arabic morphological analyzer algorithm. We faced practical challenges in applying the morphological analyzer to the 100-million-word Web Arabic Corpus: we had to port the software to the National Grid Service, adapt the analyser to cope with spelling variations and errors, and utilise a Broad-Coverage Lexical Resource combining 23 traditional Arabic lexicons. Finally we outline the construction of a Gold Standard for comparative evaluation.</abstract>
@@ -1851,7 +1851,7 @@
     <paper id="199">
       <author><first>Philip</first><last>van Oosten</last></author>
       <author><first>Dries</first><last>Tanghe</last></author>
-      <author><first>Véronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Véronique</first><last>Hoste</last></author>
       <title>Towards an Improved Methodology for Automated Readability Prediction</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/286_Paper.pdf</url>
       <abstract>Since the first half of the 20th century, readability formulas have been widely employed to automatically predict the readability of an unseen text. In this article, the formulas and the text characteristics they are composed of are evaluated in the context of large Dutch and English corpora. We describe the behaviour of the formulas and the text characteristics by means of correlation matrices and a principal component analysis, and test the methodological validity of the formulas by means of collinearity tests. Both the correlation matrices and the principal component analysis show that the formulas described in this paper strongly correspond, regardless of the language for which they were designed. Furthermore, the collinearity test reveals shortcomings in the methodology that was used to create some of the existing readability formulas. All of this leads us to conclude that a new readability prediction method is needed. We finally make suggestions to come to a cleaner methodology and present web applications that will help us collect data to compile a new gold standard for readability prediction.</abstract>
@@ -1859,7 +1859,7 @@
     </paper>
     <paper id="200">
       <author><first>Majdi</first><last>Sawalha</last></author>
-      <author><first>Eric</first><last>Atwell</last></author>
+      <author id="eric-atwell"><first>Eric</first><last>Atwell</last></author>
       <title>Constructing and Using Broad-coverage Lexical Resource for Enhancing Morphological Analysis of <fixed-case>A</fixed-case>rabic</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/287_Paper.pdf</url>
       <abstract>Broad-coverage language resources which provide prior linguistic knowledge must improve the accuracy and the performance of NLP applications. We are constructing a broad-coverage lexical resource to improve the accuracy of morphological analyzers and part-of-speech taggers of Arabic text. Over the past 1200 years, many different kinds of Arabic language lexicons were constructed; these lexicons are different in ordering, size and aim or goal of construction. We collected 23 machine-readable lexicons, which are freely available on the web. We combined lexical resources into one large broad-coverage lexical resource by extracting information from disparate formats and merging traditional Arabic lexicons. To evaluate the broad-coverage lexical resource we computed coverage over the Quran, the Corpus of Contemporary Arabic, and a sample from the Arabic Web Corpus, using two methods. Counting exact word matches between test corpora and lexicon scored about 65-68%; Arabic has a rich morphology with many combinations of roots, affixes and clitics, so about a third of words in the corpora did not have an exact match in the lexicon. The second approach is to compute coverage in terms of use in a lemmatizer program, which strips clitics to look for a match for the underlying lexeme; this scored about 82-85%.</abstract>
@@ -1882,7 +1882,7 @@
     </paper>
     <paper id="202">
       <author><first>Gerlof</first><last>Bouma</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <author><first>Jonas</first><last>Kuhn</last></author>
       <title>Towards a Large Parallel Corpus of Cleft Constructions</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/291_Paper.pdf</url>
@@ -1893,7 +1893,7 @@
       <author><first>Ziqi</first><last>Zhang</last></author>
       <author><first>Anna Lisa</first><last>Gentile</last></author>
       <author><first>Lei</first><last>Xia</last></author>
-      <author><first>José</first><last>Iria</last></author>
+      <author id="jose-iria"><first>José</first><last>Iria</last></author>
       <author><first>Sam</first><last>Chapman</last></author>
       <title>A Random Graph Walk based Approach to Computing Semantic Relatedness Using Knowledge from <fixed-case>W</fixed-case>ikipedia</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/292_Paper.pdf</url>
@@ -1935,7 +1935,7 @@
     </paper>
     <paper id="208">
       <author><first>Anne</first><last>Garcia-Fernandez</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <author><first>Anne</first><last>Vilnat</last></author>
       <title><fixed-case>MACAQ</fixed-case> : A Multi Annotated Corpus to Study how we Adapt Answers to Various Questions</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/301_Paper.pdf</url>
@@ -1943,9 +1943,9 @@
       <bibkey>garcia-fernandez-etal-2010-macaq</bibkey>
     </paper>
     <paper id="209">
-      <author><first>Carlos-D.</first><last>Martínez-Hinarejos</last></author>
+      <author id="carlos-d-martinez-hinarejos"><first>Carlos-D.</first><last>Martínez-Hinarejos</last></author>
       <author><first>Vicent</first><last>Tamarit</last></author>
-      <author><first>José-M.</first><last>Benedí</last></author>
+      <author id="jose-miguel-benedi"><first>José-M.</first><last>Benedí</last></author>
       <title>Evaluation of <fixed-case>HMM</fixed-case>-based Models for the Annotation of Unsegmented Dialogue Turns</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/303_Paper.pdf</url>
       <abstract>Corpus-based dialogue systems rely on statistical models, whose parameters are inferred from annotated dialogues. The dialogues are usually annotated in terms of Dialogue Acts (DA), and the manual annotation is difficult (as annotation rule are hard to define), error-prone and time-consuming. Therefore, several semi-automatic annotation processes have been proposed to speed-up the process and consequently obtain a dialogue system in less total time. These processes are usually based on statistical models. The standard statistical annotation model is based on Hidden Markov Models (HMM). In this work, we explore the impact of different types of HMM, with different number of states, on annotation accuracy. We performed experiments using these models on two dialogue corpora (Dihana and SwitchBoard) of dissimilar features. The results show that some types of models improve standard HMM in a human-computer task-oriented dialogue corpus (Dihana corpus), but their impact is lower in a human-human non-task-oriented dialogue corpus (SwitchBoard corpus).</abstract>
@@ -1954,7 +1954,7 @@
     <paper id="210">
       <author><first>Raheel</first><last>Nawaz</last></author>
       <author><first>Paul</first><last>Thompson</last></author>
-      <author><first>John</first><last>McNaught</last></author>
+      <author id="john-mcnaught"><first>John</first><last>McNaught</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
       <title>Meta-Knowledge Annotation of Bio-Events</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/306_Paper.pdf</url>
@@ -1966,24 +1966,24 @@
       <author><first>Ruben</first><last>Dorado</last></author>
       <author><first>Luke</first><last>McCrohon</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <title><fixed-case>U</fixed-case>-Compare: An Integrated Language Resource Evaluation Platform Including a Comprehensive <fixed-case>UIMA</fixed-case> Resource Library</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/307_Paper.pdf</url>
       <abstract>Language resources, including corpus and tools, are normally required to be combined in order to achieve a users specific task. However, resources tend to be developed independently in different, incompatible formats. In this paper we describe about U-Compare, which consists of the U-Compare component repository and the U-Compare platform. We have been building a highly interoperable resource library, providing the world largest ready-to-use UIMA component repository including wide variety of corpus readers and state-of-the-art language tools. These resources can be deployed as local services or web services, even possible to be hosted in clustered machines to increase the performance, while users do not need to be aware of such differences. In addition to the resource library, an integrated language processing platform is provided, allowing workflow creation, comparison, evaluation and visualization, using the resources in the library or any UIMA component, without any programming via graphical user interfaces, while a command line launcher is also available without GUIs. The evaluation itself is processed in a UIMA component, users can create and plug their own evaluation metrics in addition to the predefined metrics. U-Compare has been successfully used in many projects including BioCreative, Conll and the BioNLP shared task.</abstract>
       <bibkey>kano-etal-2010-u</bibkey>
     </paper>
     <paper id="212">
-      <author><first>Janne Bondi</first><last>Johannessen</last></author>
+      <author id="janne-bondi-johannessen"><first>Janne Bondi</first><last>Johannessen</last></author>
       <author><first>Kristin</first><last>Hagen</last></author>
       <author><first>Anders</first><last>Nøklestad</last></author>
-      <author><first>Joel</first><last>Priestley</last></author>
+      <author id="joel-priestley"><first>Joel</first><last>Priestley</last></author>
       <title>Enhancing Language Resources with Maps</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/308_Paper.pdf</url>
       <abstract>We will look at how maps can be integrated in research resources, such as language databases and language corpora. By using maps, search results can be illustrated in a way that immediately gives the user information that words or numbers on their own would not give. We will illustrate with two different resources, into which we have now added a Google Maps application: The Nordic Dialect Corpus (Johannessen et al. 2009) and The Nordic Syntactic Judgments Database (Lindstad et al. 2009). We have integrated Google Maps into these applications. The database contains some hundred syntactic test sentences that have been evaluated by four speakers in more than hundred locations in Norway and Sweden. Searching for the evaluations of a particular sentence gives a list of several hundred judgments, which are difficult for a human researcher to assess. With the map option, isoglosses are immediately visible. We show in the paper that both with the maps depicting corpus hits and with the maps depicting database results, the map visualizations actually show clear geographical differences that would be very difficult to spot just by reading concordance lines or database tables.</abstract>
       <bibkey>johannessen-etal-2010-enhancing</bibkey>
     </paper>
     <paper id="213">
-      <author><first>Jana Z.</first><last>Sukkarieh</last></author>
+      <author id="jana-sukkarieh"><first>Jana Z.</first><last>Sukkarieh</last></author>
       <author><first>Eleanor</first><last>Bolge</last></author>
       <title>Building a Textual Entailment Suite for the Evaluation of Automatic Content Scoring Technologies</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/310_Paper.pdf</url>
@@ -1992,7 +1992,7 @@
     </paper>
     <paper id="214">
       <author><first>Haïfa</first><last>Zargayouna</last></author>
-      <author><first>Adeline</first><last>Nazarenko</last></author>
+      <author id="adeline-nazarenko"><first>Adeline</first><last>Nazarenko</last></author>
       <title>Evaluation of Textual Knowledge Acquisition Tools: a Challenging Task</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/311_Paper.pdf</url>
       <abstract>A large effort has been devoted to the development of textual knowledge acquisition (KA) tools, but it is still difficult to assess the progress that has been made. The results produced by these tools are difficult to compare, due to the heterogeneity of the proposed methods and of their goals. Various experiments have been made to evaluate terminological and ontological tools. They show that in terminology as well as in ontology acquisition, it remains difficult to compare existing tools and to analyse their advantages and drawbacks. From our own experiments in evaluating terminology and ontology acquisition tools, it appeared that the difficulties and solutions are similar for both tasks. We propose a unified approach for the evaluation of textual KA tools that can be instantiated in different ways for various tasks. The main originality of this approach lies in the way it takes into account the subjectivity of evaluation and the relativity of gold standards. In this paper, we highlight the major difficulties of KA evaluation, we then present a unified proposal for the evaluation of terminologies and ontologies acquisition tools and the associated experiments. The proposed protocols take into consideration the specificity of this type of evaluation.</abstract>
@@ -2030,7 +2030,7 @@
     <paper id="218">
       <author><first>Svetla</first><last>Koeva</last></author>
       <author><first>Diana</first><last>Blagoeva</last></author>
-      <author><first>Siya</first><last>Kolkovska</last></author>
+      <author id="sia-kolkovska"><first>Siya</first><last>Kolkovska</last></author>
       <title><fixed-case>B</fixed-case>ulgarian National Corpus Project</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/316_Paper.pdf</url>
       <abstract>The paper presents Bulgarian National Corpus project (BulNC) - a large-scale, representative, online available corpus of Bulgarian. The BulNC is also a monolingual general corpus, fully morpho-syntactically (and partially semantically) annotated, and manually provided with detailed meta-data descriptions. Presently the Bulgarian National corpus consists of about 320 000 000 graphical words and includes more than 10 000 samples. Briefly the corpus structure and the accepted criteria for representativeness and well-balancing are presented. The query language for advance search of collocations and concordances is demonstrated with some examples - it allows to retrieve word combinations, ordered queries, inflexionally and semantically related words, part-of-speech tags, utilising Boolean operations and grouping as well. The BulNC already plays a significant role in natural language processing of Bulgarian contributing to scientific advances in spelling and grammar checking, word sense disambiguation, speech recognition, text categorisation, topic extraction and machine translation. The BulNC can also be used in different investigations going beyond the linguistics: library studies, social sciences research, teaching methods studies, etc.</abstract>
@@ -2056,7 +2056,7 @@
       <bibkey>haselbach-heid-2010-development</bibkey>
     </paper>
     <paper id="221">
-      <author><first>Sophia Yat Mei</first><last>Lee</last></author>
+      <author id="sophia-yat-mei-lee"><first>Sophia Yat Mei</first><last>Lee</last></author>
       <author><first>Ying</first><last>Chen</last></author>
       <author><first>Shoushan</first><last>Li</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
@@ -2097,7 +2097,7 @@
       <bibkey>williams-etal-2010-cambridge</bibkey>
     </paper>
     <paper id="225">
-      <author><first>Henk</first><last>van den Heuvel</last></author>
+      <author id="henk-van-den-heuvel"><first>Henk</first><last>van den Heuvel</last></author>
       <author><first>René</first><last>van Horik</last></author>
       <author><first>Stef</first><last>Scagliola</last></author>
       <author><first>Eric</first><last>Sanders</last></author>
@@ -2108,7 +2108,7 @@
       <bibkey>van-den-heuvel-etal-2010-veterantapes</bibkey>
     </paper>
     <paper id="226">
-      <author><first>Raffaella</first><last>Bernardi</last></author>
+      <author id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last></author>
       <author><first>Manuel</first><last>Kirschner</last></author>
       <author><first>Zorana</first><last>Ratkovic</last></author>
       <title>Context Fusion: The Role of Discourse Structure and Centering Theory</title>
@@ -2125,9 +2125,9 @@
       <bibkey>okamoto-ishizaki-2010-homographic</bibkey>
     </paper>
     <paper id="228">
-      <author><first>Cheikh M. Bamba</first><last>Dione</last></author>
+      <author id="cheikh-m-bamba-dione"><first>Cheikh M. Bamba</first><last>Dione</last></author>
       <author><first>Jonas</first><last>Kuhn</last></author>
-      <author><first>Sina</first><last>Zarrieß</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
       <title>Design and Development of Part-of-Speech-Tagging Resources for <fixed-case>W</fixed-case>olof (<fixed-case>N</fixed-case>iger-<fixed-case>C</fixed-case>ongo, spoken in <fixed-case>S</fixed-case>enegal)</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/333_Paper.pdf</url>
       <abstract>In this paper, we report on the design of a part-of-speech-tagset for Wolof and on the creation of a semi-automatically annotated gold standard. In order to achieve high-quality annotation relatively fast, we first generated an accurate lexicon that draws on existing word and name lists and takes into account inflectional and derivational morphology. The main motivation for the tagged corpus is to obtain data for training automatic taggers with machine learning approaches. Hence, we took machine learning considerations into account during tagset design and we present training experiments as part of this paper. The best automatic tagger achieves an accuracy of 95.2% in cross-validation experiments. We also wanted to create a basis for experimenting with annotation projection techniques, which exploit parallel corpora. For this reason, it was useful to use a part of the Bible as the gold standard corpus, for which sentence-aligned parallel versions in many languages are easy to obtain. We also report on preliminary experiments exploiting a statistical word alignment of the parallel text.</abstract>
@@ -2160,7 +2160,7 @@
     </paper>
     <paper id="232">
       <author><first>Lubomir</first><last>Otrusina</last></author>
-      <author><first>Pavel</first><last>Smrz</last></author>
+      <author id="pavel-smrz"><first>Pavel</first><last>Smrz</last></author>
       <title>A New Approach to Pseudoword Generation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/339_Paper.pdf</url>
       <abstract>Sense-tagged corpora are used to evaluate word sense disambiguation (WSD) systems. Manual creation of such resources is often prohibitively expensive. That is why the concept of pseudowords - conflations of two or more unambiguous words - has been integrated into WSD evaluation experiments. This paper presents a new method of pseudoword generation which takes into account semantic-relatedness of the candidate words forming parts of the pseudowords to the particular senses of the word to be disambiguated. We compare the new approach to its alternatives and show that the results on pseudowords, that are more similar to real ambiguous words, better correspond to the actual results. Two techniques assessing the similarity are studied - the first one takes advantage of manually created dictionaries (wordnets), the second one builds on the automatically computed statistical data obtained from large corpora. Pros and cons of the two techniques are discussed and the results on a standard task are demonstrated.</abstract>
@@ -2186,7 +2186,7 @@
       <bibkey>gibbon-etal-2010-medefaidrin</bibkey>
     </paper>
     <paper id="235">
-      <author><first>Satoshi</first><last>Sato</last></author>
+      <author id="satoshi-sato"><first>Satoshi</first><last>Sato</last></author>
       <author><first>Sayoko</first><last>Kaide</last></author>
       <title>A Person-Name Filter for Automatic Compilation of Bilingual Person-Name Lexicons</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/343_Paper.pdf</url>
@@ -2195,7 +2195,7 @@
     </paper>
     <paper id="236">
       <author><first>Rania</first><last>Al-Sabbagh</last></author>
-      <author><first>Roxana</first><last>Girju</last></author>
+      <author id="roxana-girju"><first>Roxana</first><last>Girju</last></author>
       <title>Mining the Web for the Induction of a Dialectical <fixed-case>A</fixed-case>rabic Lexicon</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/344_Paper.pdf</url>
       <abstract>This paper describes the first phase of building a lexicon of Egyptian Cairene Arabic (ECA) ― one of the most widely understood dialects in the Arab World ― and Modern Standard Arabic (MSA). Each ECA entry is mapped to its MSA synonym, Part-of-Speech (POS) tag and top-ranked contexts based on Web queries; and thus each entry is provided with basic syntactic and semantic information for a generic lexicon compatible with multiple NLP applications. Moreover, through their MSA synonyms, ECA entries acquire access to MSA available NLP tools and resources which are considerably available. Using an associationist approach based on the correlations between word co-occurrence patterns in both dialects, we change the direction of the acquisition process from parallel to circular to overcome a bottleneck of current research on Arabic dialects, namely the lack of parallel corpora, and to alleviate accuracy rates for using unrelated Web documents which are more frequently available. Manually evaluated for 1,000 word entries by two native speakers of the ECA-MSA varieties, the proposed approach achieves a promising F-measured performance rate of 70.9%. In discussion to the proposed algorithm, different semantic issues are highlighted for upcoming phases of the induction of a more comprehensive ECA-MSA lexicon.</abstract>
@@ -2213,7 +2213,7 @@
     </paper>
     <paper id="238">
       <author><first>Philippe</first><last>Dreuw</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <author><first>Gregorio</first><last>Martinez</last></author>
       <author><first>Onno</first><last>Crasborn</last></author>
       <author><first>Justus</first><last>Piater</last></author>
@@ -2235,7 +2235,7 @@
     </paper>
     <paper id="240">
       <author><first>Violeta</first><last>Seretan</last></author>
-      <author><first>Eric</first><last>Wehrli</last></author>
+      <author id="eric-wehrli"><first>Eric</first><last>Wehrli</last></author>
       <author><first>Luka</first><last>Nerima</last></author>
       <author><first>Gabriela</first><last>Soare</last></author>
       <title><fixed-case>F</fixed-case>ips<fixed-case>R</fixed-case>omanian: Towards a <fixed-case>R</fixed-case>omanian Version of the Fips Syntactic Parser</title>
@@ -2248,7 +2248,7 @@
       <author><first>Jonas</first><last>Beskow</last></author>
       <author><first>Kjell</first><last>Elenius</last></author>
       <author><first>Kahl</first><last>Hellmer</last></author>
-      <author><first>Sofia</first><last>Strönbergsson</last></author>
+      <author id="sofia-stronbergsson"><first>Sofia</first><last>Strönbergsson</last></author>
       <author><first>David</first><last>House</last></author>
       <title><fixed-case>S</fixed-case>pontal: A <fixed-case>S</fixed-case>wedish Spontaneous Dialogue Corpus of Audio, Video and Motion Capture</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/352_Paper.pdf</url>
@@ -2259,7 +2259,7 @@
       <author><first>Walid</first><last>Magdy</last></author>
       <author><first>Jinming</first><last>Min</last></author>
       <author><first>Johannes</first><last>Leveling</last></author>
-      <author><first>Gareth J. F.</first><last>Jones</last></author>
+      <author id="gareth-j-f-jones"><first>Gareth J. F.</first><last>Jones</last></author>
       <title>Building a Domain-specific Document Collection for Evaluating Metadata Effects on Information Retrieval</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/353_Paper.pdf</url>
       <abstract>This paper describes the development of a structured document collection containing user-generated text and numerical metadata for exploring the exploitation of metadata in information retrieval (IR). The collection consists of more than 61,000 documents extracted from YouTube video pages on basketball in general and NBA (National Basketball Association) in particular, together with a set of 40 topics and their relevance judgements. In addition, a collection of nearly 250,000 user profiles related to the NBA collection is available. Several baseline IR experiments report the effect of using video-associated metadata on retrieval effectiveness. The results surprisingly show that searching the videos titles only performs significantly better than searching additional metadata text fields of the videos such as the tags or the description.</abstract>
@@ -2280,7 +2280,7 @@
       <author><first>Jochen</first><last>Schwenninger</last></author>
       <author><first>Barbara</first><last>Samlowski</last></author>
       <author><first>Thomas</first><last>Winkler</last></author>
-      <author><first>Joachim</first><last>Köhler</last></author>
+      <author id="joachim-kohler"><first>Joachim</first><last>Köhler</last></author>
       <title><fixed-case>D</fixed-case>i<fixed-case>SC</fixed-case>o - A <fixed-case>G</fixed-case>erman Evaluation Corpus for Challenging Problems in the Broadcast Domain</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/355_Paper.pdf</url>
       <abstract>Typical broadcast material contains not only studio-recorded texts read by trained speakers, but also spontaneous and dialect speech, debates with cross-talk, voice-overs, and on-site reports with difficult acoustic environments. Standard approaches to speech and speaker recognition usually deteriorate under such conditions. This paper reports on the design, construction, and experimental analysis of DiSCo, a German corpus for the evaluation of speech and speaker recognition on challenging material from the broadcast domain. One of the key requirements for the design of this corpus was a good coverage of different types of serious programmes beyond clean speech and planned speech broadcast news. Corpus annotation encompasses manual segmentation, an orthographic transcription, and labelling with speech mode, dialect, and noise type. We indicate typical use cases for the corpus by reporting results from ASR, speech search, and speaker recognition on the new corpus, thereby obtaining insights into the difficulty of audio recognition on the various classes.</abstract>
@@ -2324,7 +2324,7 @@
       <author><first>Paul</first><last>Felt</last></author>
       <author><first>Owen</first><last>Merkling</last></author>
       <author><first>Marc</first><last>Carmen</last></author>
-      <author><first>Eric</first><last>Ringger</last></author>
+      <author id="eric-ringger"><first>Eric</first><last>Ringger</last></author>
       <author><first>Warren</first><last>Lemmon</last></author>
       <author><first>Kevin</first><last>Seppi</last></author>
       <author><first>Robbie</first><last>Haertel</last></author>
@@ -2358,8 +2358,8 @@
     <paper id="251">
       <author><first>Ulrich</first><last>Heid</last></author>
       <author><first>Fabienne</first><last>Fritzinger</last></author>
-      <author><first>Erhard</first><last>Hinrichs</last></author>
-      <author><first>Marie</first><last>Hinrichs</last></author>
+      <author id="erhard-hinrichs"><first>Erhard</first><last>Hinrichs</last></author>
+      <author id="marie-hinrichs"><first>Marie</first><last>Hinrichs</last></author>
       <author><first>Thomas</first><last>Zastrow</last></author>
       <title>Term and Collocation Extraction by Means of Complex Linguistic Web Services</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/363_Paper.pdf</url>
@@ -2377,7 +2377,7 @@
     <paper id="253">
       <author><first>Nicoletta</first><last>Calzolari</last></author>
       <author><first>Claudia</first><last>Soria</last></author>
-      <author><first>Riccardo</first><last>Del Gratta</last></author>
+      <author id="riccardo-del-gratta"><first>Riccardo</first><last>Del Gratta</last></author>
       <author><first>Sara</first><last>Goggi</last></author>
       <author><first>Valeria</first><last>Quochi</last></author>
       <author><first>Irene</first><last>Russo</last></author>
@@ -2391,13 +2391,13 @@
     </paper>
     <paper id="254">
       <author><first>Nicolas</first><last>Moreau</last></author>
-      <author><first>Olivier</first><last>Hamon</last></author>
-      <author><first>Djamel</first><last>Mostefa</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="olivier-hamon"><first>Olivier</first><last>Hamon</last></author>
+      <author id="djamel-mostefa"><first>Djamel</first><last>Mostefa</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <author><first>Olivier</first><last>Galibert</last></author>
-      <author><first>Lori</first><last>Lamel</last></author>
-      <author><first>Jordi</first><last>Turmo</last></author>
-      <author><first>Pere R.</first><last>Comas</last></author>
+      <author id="lori-lamel"><first>Lori</first><last>Lamel</last></author>
+      <author id="jordi-turmo"><first>Jordi</first><last>Turmo</last></author>
+      <author id="pere-comas"><first>Pere R.</first><last>Comas</last></author>
       <author><first>Paolo</first><last>Rosso</last></author>
       <author><first>Davide</first><last>Buscaldi</last></author>
       <author><first>Khalid</first><last>Choukri</last></author>
@@ -2408,7 +2408,7 @@
     </paper>
     <paper id="255">
       <author><first>Roser</first><last>Sanromà</last></author>
-      <author><first>Gemma</first><last>Boleda</last></author>
+      <author id="gemma-boleda"><first>Gemma</first><last>Boleda</last></author>
       <title>The Database of <fixed-case>C</fixed-case>atalan Adjectives</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/373_Paper.pdf</url>
       <abstract>We present the Database of Catalan Adjectives (DCA), a database with 2,296 adjective lemmata enriched with morphological, syntactic and semantic information. This set of adjectives has been collected from a fragment of the Corpus Textual Informatitzat de la Llengua Catalana of the Institut dEstudis Catalans and constitutes a representative sample of the adjective class in Catalan as a whole. The database includes both manually coded and automatically extracted information regarding the most prominent properties used in the literature regarding the semantics of adjectives, such as morphological origin, suffix (if any), predicativity, gradability, adjective position with respect to the head noun, adjective modifiers, or semantic class. The DCA can be useful for NLP applications using adjectives (from POS-taggers to Opinion Mining applications) and for linguistic analysis regarding the morphological, syntactic, and semantic properties of adjectives. We now make it available to the research community under a Creative Commons Attribution Share Alike 3.0 Spain license.</abstract>
@@ -2417,7 +2417,7 @@
     <paper id="256">
       <author><first>Amal</first><last>Zouaq</last></author>
       <author><first>Michel</first><last>Gagnon</last></author>
-      <author><first>Benoit</first><last>Ozell</last></author>
+      <author id="benoit-ozell"><first>Benoit</first><last>Ozell</last></author>
       <title>Can Syntactic and Logical Graphs help Word Sense Disambiguation?</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/374_Paper.pdf</url>
       <abstract>This paper presents a word sense disambiguation (WSD) approach based on syntactic and logical representations. The objective here is to run a number of experiments to compare standard contexts (word windows, sentence windows) with contexts provided by a dependency parser (syntactic context) and a logical analyzer (logico-semantic context). The approach presented here relies on a dependency grammar for the syntactic representations. We also use a pattern knowledge base over the syntactic dependencies to extract flat predicative logical representations. These representations (syntactic and logical) are then used to build context vectors that are exploited in the WSD process. Various state-of-the-art algorithms including Simplified Lesk, Banerjee and Pedersen and frequency of co-occurrences are tested with these syntactic and logical contexts. Preliminary results show that defining context vectors based on these features may improve WSD by comparison with classical word and sentence context windows. However, future experiments are needed to provide more evidence over these issues.</abstract>
@@ -2427,7 +2427,7 @@
       <author><first>Meng</first><last>Wang</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <author><first>Shiwen</first><last>Yu</last></author>
-      <author><first>Weiwei</first><last>Sun</last></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last></author>
       <title>Automatic Acquisition of <fixed-case>C</fixed-case>hinese Novel Noun Compounds</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/377_Paper.pdf</url>
       <abstract>Automatic acquisition of novel compounds is notoriously difficult because most novel compounds have relatively low frequency in a corpus. The current study proposes a new method to deal with the novel compound acquisition challenge. We model this task as a two-class classification problem in which a candidate compound is either classified as a compound or a non-compound. A machine learning method using SVM, incorporating two types of linguistically motivated features: semantic features and character features, is applied to identify rare but valid noun compounds. We explore two kinds of training data: one is virtual training data which is obtained by three statistical scores, i.e. co-occurrence frequency, mutual information and dependent ratio, from the frequent compounds; the other is real training data which is randomly selected from the infrequent compounds. We conduct comparative experiments, and the experimental results show that even with limited direct evidence in the corpus for the novel compounds, we can make full use of the typical frequent compounds to help in the discovery of the novel compounds.</abstract>
@@ -2443,7 +2443,7 @@
       <bibkey>oostdijk-etal-2010-constructing</bibkey>
     </paper>
     <paper id="259">
-      <author><first>Paul</first><last>Bedaride</last></author>
+      <author id="paul-bedaride"><first>Paul</first><last>Bedaride</last></author>
       <author><first>Claire</first><last>Gardent</last></author>
       <title>Syntactic Testsuites and Textual Entailment Recognition</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/379_Paper.pdf</url>
@@ -2459,7 +2459,7 @@
       <bibkey>stepanek-pajas-2010-querying</bibkey>
     </paper>
     <paper id="261">
-      <author><first>Rodolfo</first><last>Delmonte</last></author>
+      <author id="rodolfo-delmonte"><first>Rodolfo</first><last>Delmonte</last></author>
       <author><first>Antonella</first><last>Bristot</last></author>
       <author><first>Vincenzo</first><last>Pallotta</last></author>
       <title>Deep Linguistic Processing with <fixed-case>GETARUNS</fixed-case> for Spoken Dialogue Understanding</title>
@@ -2469,7 +2469,7 @@
     </paper>
     <paper id="262">
       <author><first>Emad</first><last>Mohamed</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <title><fixed-case>A</fixed-case>rabic Part of Speech Tagging</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/384_Paper.pdf</url>
       <abstract>Arabic is a morphologically rich language, which presents a challenge for part of speech tagging. In this paper, we compare two novel methods for POS tagging of Arabic without the use of gold standard word segmentation but with the full POS tagset of the Penn Arabic Treebank. The first approach uses complex tags that describe full words and does not require any word segmentation. The second approach is segmentation-based, using a machine learning segmenter. In this approach, the words are first segmented, then the segments are annotated with POS tags. Because of the word-based approach, we evaluate full word accuracy rather than segment accuracy. Word-based POS tagging yields better results than segment-based tagging (93.93% vs. 93.41%). Word based tagging also gives the best results on known words, the segmentation-based approach gives better results on unknown words. Combining both methods results in a word accuracy of 94.37%, which is very close to the result obtained by using gold standard segmentation (94.91%).</abstract>
@@ -2477,7 +2477,7 @@
     </paper>
     <paper id="263">
       <author><first>Alexander</first><last>Pak</last></author>
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <title><fixed-case>T</fixed-case>witter as a Corpus for Sentiment Analysis and Opinion Mining</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/385_Paper.pdf</url>
       <abstract>Microblogging today has become a very popular communication tool among Internet users. Millions of users share opinions on different aspects of life everyday. Therefore microblogging web-sites are rich sources of data for opinion mining and sentiment analysis. Because microblogging has appeared relatively recently, there are a few research works that were devoted to this topic. In our paper, we focus on using Twitter, the most popular microblogging platform, for the task of sentiment analysis. We show how to automatically collect a corpus for sentiment analysis and opinion mining purposes. We perform linguistic analysis of the collected corpus and explain discovered phenomena. Using the corpus, we build a sentiment classifier, that is able to determine positive, negative and neutral sentiments for a document. Experimental evaluations show that our proposed techniques are efficient and performs better than previously proposed methods. In our research, we worked with English, however, the proposed technique can be used with any other language.</abstract>
@@ -2485,7 +2485,7 @@
     </paper>
     <paper id="264">
       <author><first>Rena</first><last>Nemoto</last></author>
-      <author><first>Martine</first><last>Adda-Decker</last></author>
+      <author id="martine-adda-decker"><first>Martine</first><last>Adda-Decker</last></author>
       <author><first>Jacques</first><last>Durand</last></author>
       <title>Word Boundaries in <fixed-case>F</fixed-case>rench: Evidence from Large Speech Corpora</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/386_Paper.pdf</url>
@@ -2493,7 +2493,7 @@
       <bibkey>nemoto-etal-2010-word</bibkey>
     </paper>
     <paper id="265">
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <author><first>Laurence</first><last>Danlos</last></author>
       <author><first>Rosa</first><last>Stern</last></author>
       <title>A Lexicon of <fixed-case>F</fixed-case>rench Quotation Verbs for Automatic Quotation Extraction</title>
@@ -2521,9 +2521,9 @@
     </paper>
     <paper id="268">
       <author><first>Yasuharu</first><last>Den</last></author>
-      <author><first>Hanae</first><last>Koiso</last></author>
+      <author id="hanae-koiso"><first>Hanae</first><last>Koiso</last></author>
       <author><first>Takehiko</first><last>Maruyama</last></author>
-      <author><first>Kikuo</first><last>Maekawa</last></author>
+      <author id="kikuo-maekawa"><first>Kikuo</first><last>Maekawa</last></author>
       <author><first>Katsuya</first><last>Takanashi</last></author>
       <author><first>Mika</first><last>Enomoto</last></author>
       <author><first>Nao</first><last>Yoshida</last></author>
@@ -2533,8 +2533,8 @@
       <bibkey>den-etal-2010-two</bibkey>
     </paper>
     <paper id="269">
-      <author><first>Marie</first><last>Candito</last></author>
-      <author><first>Benoît</first><last>Crabbé</last></author>
+      <author id="marie-candito"><first>Marie</first><last>Candito</last></author>
+      <author id="benoit-crabbe"><first>Benoît</first><last>Crabbé</last></author>
       <author><first>Pascal</first><last>Denis</last></author>
       <title>Statistical <fixed-case>F</fixed-case>rench Dependency Parsing: Treebank Conversion and First Results</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/392_Paper.pdf</url>
@@ -2543,7 +2543,7 @@
     </paper>
     <paper id="270">
       <author><first>Yue</first><last>Ma</last></author>
-      <author><first>Adeline</first><last>Nazarenko</last></author>
+      <author id="adeline-nazarenko"><first>Adeline</first><last>Nazarenko</last></author>
       <author><first>Laurent</first><last>Audibert</last></author>
       <title>Formal Description of Resources for Ontology-based Semantic Annotation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/393_Paper.pdf</url>
@@ -2551,18 +2551,18 @@
       <bibkey>ma-etal-2010-formal</bibkey>
     </paper>
     <paper id="271">
-      <author><first>Luis Javier</first><last>Rodríguez-Fuentes</last></author>
-      <author><first>Mikel</first><last>Penagarikano</last></author>
-      <author><first>Germán</first><last>Bordel</last></author>
+      <author id="luis-javier-rodriguez-fuentes"><first>Luis Javier</first><last>Rodríguez-Fuentes</last></author>
+      <author id="mikel-penagarikano"><first>Mikel</first><last>Penagarikano</last></author>
+      <author id="german-bordel"><first>Germán</first><last>Bordel</last></author>
       <author><first>Amparo</first><last>Varona</last></author>
-      <author><first>Mireia</first><last>Díez</last></author>
+      <author id="mireia-diez"><first>Mireia</first><last>Díez</last></author>
       <title><fixed-case>KALAKA</fixed-case>: A <fixed-case>TV</fixed-case> Broadcast Speech Database for the Evaluation of Language Recognition Systems</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/394_Paper.pdf</url>
       <abstract>A speech database, named KALAKA, was created to support the Albayzin 2008 Evaluation of Language Recognition Systems, organized by the Spanish Network on Speech Technologies from May to November 2008. This evaluation, designed according to the criteria and methodology applied in the NIST Language Recognition Evaluations, involved four target languages: Basque, Catalan, Galician and Spanish (official languages in Spain), and included speech signals in other (unknown) languages to allow open-set verification trials. In this paper, the process of designing, collecting data and building the train, development and evaluation datasets of KALAKA is described. Results attained in the Albayzin 2008 LRE are presented as a means of evaluating the database. The performance of a state-of-the-art language recognition system on a closed-set evaluation task is also presented for reference. Future work includes extending KALAKA by adding Portuguese and English as target languages and renewing the set of unknown languages needed to carry out open-set evaluations.</abstract>
       <bibkey>rodriguez-fuentes-etal-2010-kalaka</bibkey>
     </paper>
     <paper id="272">
-      <author><first>Jarmila</first><last>Panevová</last></author>
+      <author id="jarmila-panevova"><first>Jarmila</first><last>Panevová</last></author>
       <author><first>Magda</first><last>Ševčíková</last></author>
       <title>Annotation of Morphological Meanings of Verbs Revisited</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/395_Paper.pdf</url>
@@ -2571,13 +2571,13 @@
     </paper>
     <paper id="273">
       <author><first>Andrew</first><last>Hickl</last></author>
-      <author><first>Sanda</first><last>Harabagiu</last></author>
+      <author id="sanda-harabagiu"><first>Sanda</first><last>Harabagiu</last></author>
       <title>Unsupervised Discovery of Collective Action Frames for Socio-Cultural Analysis</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/396_Paper.pdf</url>
       <bibkey>hickl-harabagiu-2010-unsupervised</bibkey>
     </paper>
     <paper id="274">
-      <author><first>Ting-Hao</first><last>Huang</last></author>
+      <author id="ting-hao-huang"><first>Ting-Hao</first><last>Huang</last></author>
       <author><first>Lun-Wei</first><last>Ku</last></author>
       <author><first>Hsin-Hsi</first><last>Chen</last></author>
       <title>Predicting Morphological Types of <fixed-case>C</fixed-case>hinese Bi-Character Words by Machine Learning Approaches</title>
@@ -2595,16 +2595,16 @@
     </paper>
     <paper id="276">
       <author><first>Jorge</first><last>Vivaldi</last></author>
-      <author><first>Iria</first><last>da Cunha</last></author>
-      <author><first>Juan-Manuel</first><last>Torres-Moreno</last></author>
-      <author><first>Patricia</first><last>Velázquez-Morales</last></author>
+      <author id="iria-da-cunha"><first>Iria</first><last>da Cunha</last></author>
+      <author id="juan-manuel-torres-moreno"><first>Juan-Manuel</first><last>Torres-Moreno</last></author>
+      <author id="patricia-velazquez-morales"><first>Patricia</first><last>Velázquez-Morales</last></author>
       <title>Automatic Summarization Using Terminological and Semantic Resources</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/400_Paper.pdf</url>
       <abstract>This paper presents a new algorithm for automatic summarization of specialized texts combining terminological and semantic resources: a term extractor and an ontology. The term extractor provides the list of the terms that are present in the text together their corresponding termhood. The ontology is used to calculate the semantic similarity among the terms found in the main body and those present in the document title. The general idea is to obtain a relevance score for each sentence taking into account both the termhood of the terms found in such sentence and the similarity among such terms and those terms present in the title of the document. The phrases with the highest score are chosen to take part of the final summary. We evaluate the algorithm with Rouge, comparing the resulting summaries with the summaries of other summarizers. The sentence selection algorithm was also tested as part of a standalone summarizer. In both cases it obtains quite good results although the perception is that there is a space for improvement.</abstract>
       <bibkey>vivaldi-etal-2010-automatic</bibkey>
     </paper>
     <paper id="277">
-      <author><first>Olivier</first><last>Hamon</last></author>
+      <author id="olivier-hamon"><first>Olivier</first><last>Hamon</last></author>
       <title>Is my Judge a good One?</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/402_Paper.pdf</url>
       <abstract>This paper aims at measuring the reliability of judges in MT evaluation. The scope is two evaluation campaigns from the CESTA project, during which human evaluations were carried out on fluency and adequacy criteria for English-to-French documents. Our objectives were threefold: observe both inter- and intra-judge agreements, and then study the influence of the evaluation design especially implemented for the need of the campaigns. Indeed, a web interface was especially developed to help with the human judgments and store the results, but some design changes were made between the first and the second campaign. Considering the low agreements observed, the judges' behaviour has been analysed in that specific context. We also asked several judges to repeat their own evaluations a few times after the first judgments done during the official evaluation campaigns. Even if judges did not seem to agree fully at first sight, a less strict comparison led to a strong agreement. Furthermore, the evolution of the design during the project seemed to have been a source for the difficulties that judges encountered to keep the same interpretation of quality.</abstract>
@@ -2613,14 +2613,14 @@
     <paper id="278">
       <author><first>Mátyás</first><last>Brendel</last></author>
       <author><first>Riccardo</first><last>Zaccarelli</last></author>
-      <author><first>Laurence</first><last>Devillers</last></author>
+      <author id="laurence-devillers"><first>Laurence</first><last>Devillers</last></author>
       <title>Building a System for Emotions Detection from Speech to Control an Affective Avatar</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/403_Paper.pdf</url>
       <abstract>In this paper we describe a corpus set together from two sub-corpora. The CINEMO corpus contains acted emotional expression obtained by playing dubbing exercises. This new protocol is a way to collect mood-induced data in large amount which show several complex and shaded emotions. JEMO is a corpus collected with an emotion-detection game and contains more prototypical emotions than CINEMO. We show how the two sub-corpora balance and enrich each other and result in a better performance. We built male and female emotion models and use Sequential Fast Forward Feature Selection to improve detection performances. After feature-selection we obtain good results even with our strict speaker independent testing method. The global corpus contains 88 speakers (38 females, 50 males). This study has been done within the scope of the ANR (National Research Agency) Affective Avatar project which deals with building a system of emotions detection for monitoring an Artificial Agent by voice.</abstract>
       <bibkey>brendel-etal-2010-building</bibkey>
     </paper>
     <paper id="279">
-      <author><first>Roxane</first><last>Segers</last></author>
+      <author id="roxane-segers"><first>Roxane</first><last>Segers</last></author>
       <author><first>Piek</first><last>Vossen</last></author>
       <title>Facilitating Non-expert Users of the <fixed-case>KYOTO</fixed-case> Platform: the <fixed-case>TMEKO</fixed-case> Editing Protocol for Synset to Ontology Mappings</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/406_Paper.pdf</url>
@@ -2655,7 +2655,7 @@
     </paper>
     <paper id="283">
       <author><first>Richard</first><last>Schwarz</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <author><first>Fabienne</first><last>Martin</last></author>
       <author><first>Achim</first><last>Stein</last></author>
       <title>Identification of Rare &amp; Novel Senses Using Translations in a Parallel Corpus</title>
@@ -2664,10 +2664,10 @@
       <bibkey>schwarz-etal-2010-identification</bibkey>
     </paper>
     <paper id="284">
-      <author><first>Cláudia</first><last>Freitas</last></author>
+      <author id="claudia-freitas"><first>Cláudia</first><last>Freitas</last></author>
       <author><first>Cristina</first><last>Mota</last></author>
       <author><first>Diana</first><last>Santos</last></author>
-      <author><first>Hugo Gonçalo</first><last>Oliveira</last></author>
+      <author id="hugo-goncalo-oliveira"><first>Hugo Gonçalo</first><last>Oliveira</last></author>
       <author><first>Paula</first><last>Carvalho</last></author>
       <title>Second <fixed-case>HAREM</fixed-case>: Advancing the State of the Art of Named Entity Recognition in <fixed-case>P</fixed-case>ortuguese</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/412_Paper.pdf</url>
@@ -2694,9 +2694,9 @@
     </paper>
     <paper id="287">
       <author><first>Marta</first><last>Villegas</last></author>
-      <author><first>Núria</first><last>Bel</last></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last></author>
       <author><first>Santiago</first><last>Bel</last></author>
-      <author><first>Víctor</first><last>Rodríguez</last></author>
+      <author id="victor-rodriguez-doncel"><first>Víctor</first><last>Rodríguez</last></author>
       <title>A Case Study on Interoperability for Language Resources and Applications</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/418_Paper.pdf</url>
       <abstract>This paper reports our experience when integrating differ resources and services into a grid environment. The use case we address implies the deployment of several NLP applications as web services. The ultimate objective of this task was to create a scenario where researchers have access to a variety of services they can operate. These services should be easy to invoke and able to interoperate between one another. We essentially describe the interoperability problems we faced, which involve metadata interoperability, data interoperability and service interoperability. We devote special attention to service interoperability and explore the possibility to define common interfaces and semantic description of services. While the web services paradigm suits the integration of different services very well, this requires mutual understanding and the accommodation to common interfaces that not only provide technical solution but also ease the userâs work. Defining common interfaces benefits interoperability but requires the agreement about operations and the set of inputs/outputs. Semantic annotation allows defining some sort of taxonomy that organizes and collects the set of admissible operations and types input/output parameters.</abstract>
@@ -2704,7 +2704,7 @@
     </paper>
     <paper id="288">
       <author><first>Bruno</first><last>Cartoni</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <title>Semi-Automated Extension of a Specialized Medical Lexicon for <fixed-case>F</fixed-case>rench</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/420_Paper.pdf</url>
       <abstract>This paper describes the development of a specialized lexical resource for a specialized domain, namely medicine. First, in order to assess the linguistic phenomena that need to be adressed, we based our observation on a large collection of more than 300'000 terms, organised around conceptual identifiers. Based on these observations, we highlight the specificities that such a lexicon should take into account, namely in terms of inflectional and derivational knowledge. In a first experiment, we show that general resources lack a large part of the words needed to process specialized language. Secondly, we describe an experiment to feed semi-automatically a medical lexicon and populate it with inflectional information. This experiment is based on a semi-automatic methods that tries to acquire inflectional knowledge from frequent endings of words recorded in existing lexicon. Thanks to this, we increased the coverage of the target vocabulary from 14.1% to 25.7%.</abstract>
@@ -2719,9 +2719,9 @@
       <bibkey>duarte-gibet-2010-heterogeneous</bibkey>
     </paper>
     <paper id="290">
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <author><first>Alexander</first><last>Pak</last></author>
-      <author><first>Djamel</first><last>Mostefa</last></author>
+      <author id="djamel-mostefa"><first>Djamel</first><last>Mostefa</last></author>
       <title>Annotations for Opinion Mining Evaluation in the Industrial Context of the <fixed-case>DOXA</fixed-case> project</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/422_Paper.pdf</url>
       <abstract>After presenting opinion and sentiment analysis state of the art and the DOXA project, we review the few evaluation campaigns that have dealt in the past with opinion mining. Then we present the two level opinion and sentiment model that we will use for evaluation in the DOXA project and the annotation interface we use for hand annotating a reference corpus. We then present the corpus which will be used on DOXA and report on the hand-annotation task on a corpus of comments on video games and the solution adopted to obtain a sufficient level of inter-annotator agreement.</abstract>
@@ -2730,7 +2730,7 @@
     <paper id="291">
       <author><first>Milen</first><last>Kouylekov</last></author>
       <author><first>Yashar</first><last>Mehdad</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <title>Mining <fixed-case>W</fixed-case>ikipedia for Large-scale Repositories of Context-Sensitive Entailment Rules</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/425_Paper.pdf</url>
       <abstract>This paper focuses on the central role played by lexical information in the task of Recognizing Textual Entailment. In particular, the usefulness of lexical knowledge extracted from several widely used static resources, represented in the form of entailment rules, is compared with a method to extract lexical information from Wikipedia as a dynamic knowledge resource. The proposed acquisition method aims at maximizing two key features of the resulting entailment rules: coverage (i.e. the proportion of rules successfully applied over a dataset of TE pairs), and context sensitivity (i.e. the proportion of rules applied in appropriate contexts). Evaluation results show that Wikipedia can be effectively used as a source of lexical entailment rules, featuring both higher coverage and context sensitivity with respect to other resources.</abstract>
@@ -2753,9 +2753,9 @@
       <bibkey>weller-heid-2010-extraction</bibkey>
     </paper>
     <paper id="294">
-      <author><first>Patrick</first><last>Paroubek</last></author>
-      <author><first>Olivier</first><last>Hamon</last></author>
-      <author><first>Eric</first><last>de La Clergerie</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
+      <author id="olivier-hamon"><first>Olivier</first><last>Hamon</last></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Eric</first><last>de La Clergerie</last></author>
       <author><first>Cyril</first><last>Grouin</last></author>
       <author><first>Anne</first><last>Vilnat</last></author>
       <title>The Second Evaluation Campaign of <fixed-case>PASSAGE</fixed-case> on Parsing of <fixed-case>F</fixed-case>rench</title>
@@ -2763,18 +2763,18 @@
       <bibkey>paroubek-etal-2010-second</bibkey>
     </paper>
     <paper id="295">
-      <author><first>Kepa Joseba</first><last>Rodríguez</last></author>
+      <author id="kepa-joseba-rodriguez"><first>Kepa Joseba</first><last>Rodríguez</last></author>
       <author><first>Francesca</first><last>Delogu</last></author>
       <author><first>Yannick</first><last>Versley</last></author>
-      <author><first>Egon W.</first><last>Stemle</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="egon-stemle"><first>Egon W.</first><last>Stemle</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <title>Anaphoric Annotation of <fixed-case>W</fixed-case>ikipedia and Blogs in the Live Memories Corpus</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/431_Paper.pdf</url>
       <abstract>The Live Memories corpus is an Italian corpus annotated for anaphoric relations. This annotation effort aims to contribute to two significant issues for the CL research: the lack of annotated anaphoric resources for Italian and the increasing interest for the social Web. The Live Memories Corpus contains texts from the Italian Wikipedia about the region Trentino/Süd Tirol and from blog sites with users' comments. It is planned to add a set of articles of local news papers. The corpus includes manual annotated information about morphosyntactic agreement, anaphoricity, and semantic class of the NPs. The anaphoric annotation includes discourse deixis, bridging relations and markes cases of ambiguity with the annotation of alternative interpretations. For the annotation of the anaphoric links the corpus takes into account specific phenomena of the Italian language like incorporated clitics and phonetically non realized pronouns. Reliability studies for the annotation of the mentioned phenomena and for annotation of anaphoric links in general offer satisfactory results. The Wikipedia and blogs dataset will be distributed under Creative Commons Attributions licence.</abstract>
       <bibkey>rodriguez-etal-2010-anaphoric</bibkey>
     </paper>
     <paper id="296">
-      <author><first>Dan</first><last>Flickinger</last></author>
+      <author id="dan-flickinger"><first>Dan</first><last>Flickinger</last></author>
       <author><first>Stephan</first><last>Oepen</last></author>
       <author><first>Gisle</first><last>Ytrestøl</last></author>
       <title><fixed-case>W</fixed-case>iki<fixed-case>W</fixed-case>oods: Syntacto-Semantic Annotation for <fixed-case>E</fixed-case>nglish <fixed-case>W</fixed-case>ikipedia</title>
@@ -2792,7 +2792,7 @@
       <bibkey>ruppenhofer-etal-2010-speaker</bibkey>
     </paper>
     <paper id="298">
-      <author><first>Nick</first><last>Webb</last></author>
+      <author id="nick-webb"><first>Nick</first><last>Webb</last></author>
       <author><first>David</first><last>Benyon</last></author>
       <author><first>Jay</first><last>Bradley</last></author>
       <author><first>Preben</first><last>Hansen</last></author>
@@ -2804,7 +2804,7 @@
     </paper>
     <paper id="299">
       <author><first>Carlos Gómez</first><last>Gallo</last></author>
-      <author><first>T. Florian</first><last>Jaeger</last></author>
+      <author id="t-florian-jaeger"><first>T. Florian</first><last>Jaeger</last></author>
       <author><first>Katrina</first><last>Furth</last></author>
       <title>A Database for the Exploration of <fixed-case>S</fixed-case>panish Planning</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/436_Paper.pdf</url>
@@ -2863,7 +2863,7 @@
       <bibkey>rytting-etal-2010-error</bibkey>
     </paper>
     <paper id="304">
-      <author><first>Christopher R</first><last>Walker</last></author>
+      <author id="christopher-r-walker"><first>Christopher R</first><last>Walker</last></author>
       <author><first>Hannah</first><last>Copperman</last></author>
       <title>Evaluating Complex Semantic Artifacts</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/441_Paper.pdf</url>
@@ -2871,9 +2871,9 @@
       <bibkey>walker-copperman-2010-evaluating</bibkey>
     </paper>
     <paper id="305">
-      <author><first>Mohamed</first><last>Altantawy</last></author>
+      <author id="mohamed-altantawy"><first>Mohamed</first><last>Altantawy</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <author><first>Ibrahim</first><last>Saleh</last></author>
       <title>Morphological Analysis and Generation of <fixed-case>A</fixed-case>rabic Nouns: A Morphemic Functional Approach</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/442_Paper.pdf</url>
@@ -2891,7 +2891,7 @@
     </paper>
     <paper id="307">
       <author><first>Hannah</first><last>Copperman</last></author>
-      <author><first>Christopher R.</first><last>Walker</last></author>
+      <author id="christopher-r-walker"><first>Christopher R.</first><last>Walker</last></author>
       <title>Fred’s Reusable Evaluation Device: Providing Support for Quick and Reliable Linguistic Annotation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/444_Paper.pdf</url>
       <abstract>This paper describes an interface that was developed for processing large amounts of human judgments of linguistically annotated data. Freds Reusable Evaluation Device (Fred) provides administrators with a tool to submit linguistic evaluation tasks to judges. Each evaluation task is then presented to exactly two judges, who can submit their judgments at their own leisure. Fred then provides several metrics to administrators. The most important metric is precision, which is provided for each evaluation task and each annotator. Administrators can look at precision for a given data set over time, as well as by evaluation type, data set, or annotator. Inter-annotator agreement is also reported, and that can be tracked over time as well. The interface was developed to provide a tool for evaluating semantically marked up text. The types of evaluations Fred has been used for so far include things like correctness of subject-relation identification, and correctness of temporal relations. However, Freds full versatility has not yet been fully exploited.</abstract>
@@ -2899,19 +2899,19 @@
     </paper>
     <paper id="308">
       <author><first>Alexis</first><last>Baird</last></author>
-      <author><first>Christopher R.</first><last>Walker</last></author>
+      <author id="christopher-r-walker"><first>Christopher R.</first><last>Walker</last></author>
       <title>The Creation of a Large-Scale <fixed-case>LFG</fixed-case>-Based Gold Parsebank</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/445_Paper.pdf</url>
       <abstract>Systems for syntactically parsing sentences have long been recognized as a priority in Natural Language Processing. Statistics-based systems require large amounts of high quality syntactically parsed data. Using the XLE toolkit developed at PARC and the LFG Parsebanker interface developed at Bergen, the Parsebank Project at Powerset has generated a rapidly increasing volume of syntactically parsed data. By using these tools, we are able to leverage the LFG framework to provide richer analyses via both constituent (c-) and functional (f-) structures. Additionally, the Parsebanking Project uses source data from Wikipedia rather than source data limited to a specific genre, such as the Wall Street Journal. This paper outlines the process we used in creating a large-scale LFG-Based Parsebank to address many of the shortcomings of previously-created parse banks such as the Penn Treebank. While the Parsebank corpus is still in progress, preliminary results using the data in a variety of contexts already show promise.</abstract>
       <bibkey>baird-walker-2010-creation</bibkey>
     </paper>
     <paper id="309">
-      <author><first>Kathryn</first><last>Baker</last></author>
+      <author id="kathryn-baker"><first>Kathryn</first><last>Baker</last></author>
       <author><first>Michael</first><last>Bloodgood</last></author>
-      <author><first>Bonnie</first><last>Dorr</last></author>
+      <author id="bonnie-dorr"><first>Bonnie</first><last>Dorr</last></author>
       <author><first>Nathaniel W.</first><last>Filardo</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
-      <author><first>Christine</first><last>Piatko</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
+      <author id="christine-piatko"><first>Christine</first><last>Piatko</last></author>
       <title>A Modality Lexicon and its use in Automatic Tagging</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/446_Paper.pdf</url>
       <abstract>This paper describes our resource-building results for an eight-week JHU Human Language Technology Center of Excellence Summer Camp for Applied Language Exploration (SCALE-2009) on Semantically-Informed Machine Translation. Specifically, we describe the construction of a modality annotation scheme, a modality lexicon, and two automated modality taggers that were built using the lexicon and annotation scheme. Our annotation scheme is based on identifying three components of modality: a trigger, a target and a holder. We describe how our modality lexicon was produced semi-automatically, expanding from an initial hand-selected list of modality trigger words and phrases. The resulting expanded modality lexicon is being made publicly available. We demonstrate that one tagger―a structure-based tagger―results in precision around 86% (depending on genre) for tagging of a standard LDC data set. In a machine translation application, using the structure-based tagger to annotate English modalities on an English-Urdu training corpus improved the translation quality score for Urdu by 0.3 Bleu points in the face of sparse training data.</abstract>
@@ -2927,7 +2927,7 @@
       <bibkey>tanenblatt-etal-2010-conceptmapper</bibkey>
     </paper>
     <paper id="311">
-      <author><first>Yoshihiko</first><last>Hayashi</last></author>
+      <author id="yoshihiko-hayashi"><first>Yoshihiko</first><last>Hayashi</last></author>
       <author><first>Thierry</first><last>Declerck</last></author>
       <author><first>Chiharu</first><last>Narawa</last></author>
       <title><fixed-case>LAF</fixed-case>/<fixed-case>G</fixed-case>r<fixed-case>AF</fixed-case>-grounded Representation of Dependency Structures</title>
@@ -2942,7 +2942,7 @@
       <author><first>Deryle</first><last>Lonsdale</last></author>
       <author><first>Peter</first><last>McClanahan</last></author>
       <author><first>Owen</first><last>Merkling</last></author>
-      <author><first>Eric</first><last>Ringger</last></author>
+      <author id="eric-ringger"><first>Eric</first><last>Ringger</last></author>
       <author><first>Kevin</first><last>Seppi</last></author>
       <title>Tag Dictionaries Accelerate Manual Annotation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/451_Paper.pdf</url>
@@ -2957,8 +2957,8 @@
       <bibkey>konstantopoulos-2010-learning</bibkey>
     </paper>
     <paper id="314">
-      <author><first>Chris Irwin</first><last>Davis</last></author>
-      <author><first>Dan</first><last>Moldovan</last></author>
+      <author id="chris-irwin-davis"><first>Chris Irwin</first><last>Davis</last></author>
+      <author id="dan-moldovan"><first>Dan</first><last>Moldovan</last></author>
       <title>Feasibility of Automatically Bootstrapping a <fixed-case>P</fixed-case>ersian <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/453_Paper.pdf</url>
       <abstract>In this paper we describe a proof-of-concept for the bootstrapping of a Persian WordNet. This effort was motivated by previous work done at Stanford University on bootstrapping an Arabic WordNet using a parallel corpus and an English WordNet. The principle of that work is based on the premise that paradigmatic relations are by nature deeply semantic, and as such, are likely to remain intact between languages. We performed our task on a Persian-English bilingual corpus of George Orwells Nineteen Eighty-Four. The corpus was neither aligned nor sense tagged, so it was necessary that these were undertaken first. A combination of manual and semiautomated methods were used to tag and sentence align the corpus. Actual mapping of English word senses onto Persian was done using automated techniques. Although Persian is written in Arabic script, it is an Indo-European language, while Arabic is a Central Semitic language. Despite their linguistic differences, we endeavor to test the applicability of the Stanford strategy to our task.</abstract>
@@ -2966,7 +2966,7 @@
     </paper>
     <paper id="315">
       <author><first>Aditi Sharma</first><last>Grover</last></author>
-      <author><first>Gerhard B.</first><last>van Huyssteen</last></author>
+      <author id="gerhard-b-van-huyssteen"><first>Gerhard B.</first><last>van Huyssteen</last></author>
       <author><first>Marthinus W.</first><last>Pretorius</last></author>
       <title>The <fixed-case>S</fixed-case>outh <fixed-case>A</fixed-case>frican Human Language Technologies Audit</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/454_Paper.pdf</url>
@@ -2974,13 +2974,13 @@
       <bibkey>grover-etal-2010-south</bibkey>
     </paper>
     <paper id="316">
-      <author><first>Massimo</first><last>Poesio</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <author><first>Oswald</first><last>Lanz</last></author>
       <author><first>Alessandro</first><last>Lenci</last></author>
       <author><first>Alexandros</first><last>Potamianos</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <author><first>Luca</first><last>Surian</last></author>
       <title><fixed-case>B</fixed-case>aby<fixed-case>E</fixed-case>xp: Constructing a Huge Multimodal Resource to Acquire Commonsense Knowledge Like Children Do</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/455_Paper.pdf</url>
@@ -2989,10 +2989,10 @@
     </paper>
     <paper id="317">
       <author><first>Iñaki</first><last>Sainz</last></author>
-      <author><first>Eva</first><last>Navas</last></author>
-      <author><first>Inma</first><last>Hernáez</last></author>
-      <author><first>Antonio</first><last>Bonafonte</last></author>
-      <author><first>Francisco</first><last>Campillo</last></author>
+      <author id="eva-navas"><first>Eva</first><last>Navas</last></author>
+      <author id="inmaculada-hernaez"><first>Inma</first><last>Hernáez</last></author>
+      <author id="antonio-bonafonte"><first>Antonio</first><last>Bonafonte</last></author>
+      <author id="francisco-campillo"><first>Francisco</first><last>Campillo</last></author>
       <title><fixed-case>TTS</fixed-case> Evaluation Campaign with a Common <fixed-case>S</fixed-case>panish Database</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/456_Paper.pdf</url>
       <abstract>This paper describes the first TTS evaluation campaign designed for Spanish. Seven research institutions took part in the evaluation campaign and developed a voice from a common speech database provided by the organisation. Each participating team had a period of seven weeks to generate a voice. Next, a set of sentences were released and each team had to synthesise them within a week period. Finally, some of the synthesised test audio files were subjectively evaluated via an online test according to the following criteria: similarity to the original voice, naturalness and intelligibility. Box-plots, Wilcoxon tests and WER have been generated in order to analyse the results. Two main conclusions can be drawn: On the one hand, there is considerable margin for improvement to reach the quality level of the natural voice. On the other hand, two systems get significantly better results than the rest: one is based on statistical parametric synthesis and the other one is a concatenative system that makes use of a sinusoidal model to modify both prosody and smooth spectral joints. Therefore, it seems that some kind of spectral control is needed when building voices with a medium size database for unrestricted domains.</abstract>
@@ -3017,9 +3017,9 @@
     <paper id="320">
       <author><first>Pamela</first><last>Forner</last></author>
       <author><first>Danilo</first><last>Giampiccolo</last></author>
-      <author><first>Bernardo</first><last>Magnini</last></author>
-      <author><first>Anselmo</first><last>Peñas</last></author>
-      <author><first>Álvaro</first><last>Rodrigo</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
+      <author id="anselmo-penas"><first>Anselmo</first><last>Peñas</last></author>
+      <author id="alvaro-rodrigo"><first>Álvaro</first><last>Rodrigo</last></author>
       <author><first>Richard</first><last>Sutcliffe</last></author>
       <title>Evaluating Multilingual Question Answering Systems at <fixed-case>CLEF</fixed-case></title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/464_Paper.pdf</url>
@@ -3031,7 +3031,7 @@
       <author><first>Dóra</first><last>Szauter</last></author>
       <author><first>Attila</first><last>Almási</last></author>
       <author><first>György</first><last>Móra</last></author>
-      <author><first>Zoltán</first><last>Alexin</last></author>
+      <author id="zoltan-alexin"><first>Zoltán</first><last>Alexin</last></author>
       <author><first>János</first><last>Csirik</last></author>
       <title><fixed-case>H</fixed-case>ungarian Dependency Treebank</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/465_Paper.pdf</url>
@@ -3040,8 +3040,8 @@
     </paper>
     <paper id="322">
       <author><first>Francesca</first><last>Fallucchi</last></author>
-      <author><first>Maria Teresa</first><last>Pazienza</last></author>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last></author>
+      <author id="maria-teresa-pazienza"><first>Maria Teresa</first><last>Pazienza</last></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last></author>
       <title>Generic Ontology Learners on Application Domains</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/466_Paper.pdf</url>
       <abstract>In ontology learning from texts, we have ontology-rich domains where we have large structured domain knowledge repositories or we have large general corpora with large general structured knowledge repositories such as WordNet (Miller, 1995). Ontology learning methods are more useful in ontology-poor domains. Yet, in these conditions, these methods have not a particularly high performance as training material is not sufficient. In this paper we present an LSP ontology learning method that can exploit models learned from a generic domain to extract new information in a specific domain. In our model, we firstly learn a model from training data and then we use the learned model to discover knowledge in a specific domain. We tested our model adaptation strategy using a background domain that is applied to learn the isa networks in the Earth Observation Domain as a specific domain. We will demonstrate that our method captures domain knowledge better than other generic models: our model better captures what is expected by domain experts than a baseline method based only on WordNet. This latter is better correlated with non-domain annotators asked to produce the ontology for the specific domain.</abstract>
@@ -3068,7 +3068,7 @@
     </paper>
     <paper id="325">
       <author><first>Yan</first><last>Zhao</last></author>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <title><fixed-case>POS</fixed-case> Multi-tagging Based on Combined Models</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/470_Paper.pdf</url>
       <abstract>In the POS tagging task, there are two kinds of statistical models: one is generative model, such as the HMM, the others are discriminative models, such as the Maximum Entropy Model (MEM). POS multi-tagging decoding method includes the N-best paths method and forward-backward method. In this paper, we use the forward-backward decoding method based on a combined model of HMM and MEM. If P(t) is the forward-backward probability of each possible tag t, we first calculate P(t) according HMM and MEM separately. For all tags options in a certain position in a sentence, we normalize P(t) in HMM and MEM separately. Probability of the combined model is the sum of normalized forward-backward probabilities P norm(t) in HMM and MEM. For each word w, we select the best tag in which the probability of combined model is the highest. In the experiments, we use combined model and get higher accuracy than any single model on POS tagging tasks of three languages, which are Chinese, English and Dutch. The result indicates that our combined model is effective.</abstract>
@@ -3076,11 +3076,11 @@
     </paper>
     <paper id="326">
       <author><first>Ibon</first><last>Saratxaga</last></author>
-      <author><first>Inmaculada</first><last>Hernáez</last></author>
-      <author><first>Eva</first><last>Navas</last></author>
+      <author id="inmaculada-hernaez"><first>Inmaculada</first><last>Hernáez</last></author>
+      <author id="eva-navas"><first>Eva</first><last>Navas</last></author>
       <author><first>Iñaki</first><last>Sainz</last></author>
       <author><first>Iker</first><last>Luengo</last></author>
-      <author><first>Jon</first><last>Sánchez</last></author>
+      <author id="jon-sanchez"><first>Jon</first><last>Sánchez</last></author>
       <author><first>Igor</first><last>Odriozola</last></author>
       <author><first>Daniel</first><last>Erro</last></author>
       <title><fixed-case>A</fixed-case>ho<fixed-case>T</fixed-case>ransf: A Tool for Multiband Excitation Based Speech Analysis and Modification</title>
@@ -3090,7 +3090,7 @@
     </paper>
     <paper id="327">
       <author><first>Louise</first><last>Deléger</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <title>Identifying Paraphrases between Technical and Lay Corpora</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/472_Paper.pdf</url>
       <abstract>In previous work, we presented a preliminary study to identify paraphrases between technical and lay discourse types from medical corpora dedicated to the French language. In this paper, we test the hypothesis that the same kinds of paraphrases as for French can be detected between English technical and lay discourse types and report the adaptation of our method from French to English. Starting from the constitution of monolingual comparable corpora, we extract two kinds of paraphrases: paraphrases between nominalizations and verbal constructions and paraphrases between neo-classical compounds and modern-language phrases. We do this relying on morphological resources and a set of extraction rules we adapt from the original approach for French. Results show that paraphrases could be identified with a rather good precision, and that these types of paraphrase are relevant in the context of the opposition between technical and lay discourse types. These observations are consistent with the results obtained for French, which demonstrates the portability of the approach as well as the similarity of the two languages as regards the use of those kinds of expressions in technical and lay discourse types.</abstract>
@@ -3100,7 +3100,7 @@
       <author><first>Stavros</first><last>Ntalampiras</last></author>
       <author><first>Todor</first><last>Ganchev</last></author>
       <author><first>Ilyas</first><last>Potamitis</last></author>
-      <author><first>Nikos</first><last>Fakotakis</last></author>
+      <author id="nikos-fakotakis"><first>Nikos</first><last>Fakotakis</last></author>
       <title>Heterogeneous Sensor Database in Support of Human Behaviour Analysis in Unrestricted Environments: The Audio Part</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/474_Paper.pdf</url>
       <abstract>In the present paper we report on a recent effort that resulted in the establishment of a unique multimodal database, referred to as the PROMETHEUS database. This database was created in support of research and development activities, performed within the European Commission FP7 PROMETHEUS project, aiming at the creation of a framework for monitoring and interpretation of human behaviours in unrestricted indoors and outdoors environments. In the present paper we discuss the design and the implementation of the audio part of the database and offer statistical information about the audio content. Specifically, it contains single-person and multi-person scenarios, but also covers scenarios with interactions between groups of people. The database design was conceived with extended support of research and development activities devoted to detection of typical and atypical events, emergency and crisis situations, which assist for achieving situational awareness and more reliable interpretation of the context in which humans behave. The PROMETHEUS database allows for embracing a wide range of real-world applications, including smart-home and human-robot interaction interfaces, indoors/outdoors public areas surveillance, airport terminals or city park supervision, etc. A major portion of the PROMETHEUS database will be made publically available by the end of year 2010.</abstract>
@@ -3108,16 +3108,16 @@
     </paper>
     <paper id="329">
       <author><first>Khalil</first><last>Dahab</last></author>
-      <author><first>Anja</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anja</first><last>Belz</last></author>
       <title>A Game-based Approach to Transcribing Images of Text</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/476_Paper.pdf</url>
       <abstract>Creating language resources is expensive and time-consuming, and this forms a bottleneck in the development of language technology, for less-studied non-European languages in particular. The recent internet phenomenon of crowd-sourcing offers a cost-effective and potentially fast way of overcoming such language resource acquisition bottlenecks. We present a methodology that takes as its input scanned documents of typed or hand-written text, and produces transcriptions of the text as its output. Instead of using Optical Character Recognition (OCR) technology, the methodology is game-based and produces such transcriptions as a by-product. The approach is intended particularly for languages for which language technology and resources are scarce and reliable OCR technology may not exist. It can be used in place of OCR for transcribing individual documents, or to create corpora of paired images and transcriptions required to train OCR tools. We present Minefield, a prototype implementation of the approach which is currently collecting Arabic transcriptions.</abstract>
       <bibkey>dahab-belz-2010-game</bibkey>
     </paper>
     <paper id="330">
-      <author><first>Nicolas</first><last>Serrano</last></author>
+      <author id="nicolas-serrano"><first>Nicolas</first><last>Serrano</last></author>
       <author><first>Francisco</first><last>Castro</last></author>
-      <author><first>Alfons</first><last>Juan</last></author>
+      <author id="alfons-juan"><first>Alfons</first><last>Juan</last></author>
       <title>The <fixed-case>RODRIGO</fixed-case> Database</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/477_Paper.pdf</url>
       <abstract>Annotation of digitized pages from historical document collections is very important to research on automatic extraction of text blocks, lines, and handwriting recognition. We have recently introduced a new handwritten text database, GERMANA, which is based on a Spanish manuscript from 1891. To our knowledge, GERMANA is the first publicly available database mostly written in Spanish and comparable in size to standard databases. In this paper, we present another handwritten text database, RODRIGO, completely written in Spanish and comparable in size to GERMANA. However, RODRIGO comes from a much older manuscript, from 1545, where the typical difficult characteristics of historical documents are more evident. In particular, the writing style, which has clear Gothic influences, is significantly more complex than that of GERMANA. We also provide baseline results of handwriting recognition for reference in future studies, using standard techniques and tools for preprocessing, feature extraction, HMM-based image modelling, and language modelling.</abstract>
@@ -3129,14 +3129,14 @@
       <author><first>Ido</first><last>Dagan</last></author>
       <author><first>Danilo</first><last>Giampiccolo</last></author>
       <author><first>Medea Lo</first><last>Leggio</last></author>
-      <author><first>Bernardo</first><last>Magnini</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
       <title>Building Textual Entailment Specialized Data Sets: a Methodology for Isolating Linguistic Phenomena Relevant to Inference</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/478_Paper.pdf</url>
       <abstract>This paper proposes a methodology for the creation of specialized data sets for Textual Entailment, made of monothematic Text-Hypothesis pairs (i.e. pairs in which only one linguistic phenomenon relevant to the entailment relation is highlighted and isolated). The expected benefits derive from the intuition that investigating the linguistic phenomena separately, i.e. decomposing the complexity of the TE problem, would yield an improvement in the development of specific strategies to cope with them. The annotation procedure assumes that humans have knowledge about the linguistic phenomena relevant to inference, and a classification of such phenomena both into fine grained and macro categories is suggested. We experimented with the proposed methodology over a sample of pairs taken from the RTE-5 data set, and investigated critical issues arising when entailment, contradiction or unknown pairs are considered. The result is a new resource, which can be profitably used both to advance the comprehension of the linguistic phenomena relevant to entailment judgments and to make a first step towards the creation of large-scale specialized data sets.</abstract>
       <bibkey>bentivogli-etal-2010-building</bibkey>
     </paper>
     <paper id="332">
-      <author><first>Amal</first><last>Al-Saif</last></author>
+      <author id="amal-al-saif"><first>Amal</first><last>Al-Saif</last></author>
       <author><first>Katja</first><last>Markert</last></author>
       <title>The <fixed-case>L</fixed-case>eeds <fixed-case>A</fixed-case>rabic Discourse Treebank: Annotating Discourse Connectives for <fixed-case>A</fixed-case>rabic</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/479_Paper.pdf</url>
@@ -3144,19 +3144,19 @@
       <bibkey>al-saif-markert-2010-leeds</bibkey>
     </paper>
     <paper id="333">
-      <author><first>Ioana</first><last>Vasilescu</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
-      <author><first>Martine</first><last>Adda-Decker</last></author>
+      <author id="ioana-vasilescu"><first>Ioana</first><last>Vasilescu</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
+      <author id="martine-adda-decker"><first>Martine</first><last>Adda-Decker</last></author>
       <title>On the Role of Discourse Markers in Interactive Spoken Question Answering Systems</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/481_Paper.pdf</url>
       <abstract>This paper presents a preliminary analysis of the role of some discourse markers and the vocalic hesitation ""euh"" in a corpus of spoken human utterances collected with the Ritel system, an open domain and spoken dialog system. The frequency and contextual combinatory of classical discourse markers and of the vocalic hesitation have been studied. This analysis pointed out some specificity in terms of combinatory of the analyzed items. The classical discourse markers seem to help initiating larger discursive blocks both at initial and medial positions of the on-going turns. The vocalic hesitation stand also for marking the user's embarrassments and wish to close the dialog.</abstract>
       <bibkey>vasilescu-etal-2010-role</bibkey>
     </paper>
     <paper id="334">
-      <author><first>Björn</first><last>Schuller</last></author>
+      <author id="bjorn-schuller"><first>Björn</first><last>Schuller</last></author>
       <author><first>Riccardo</first><last>Zaccarelli</last></author>
       <author><first>Nicolas</first><last>Rollet</last></author>
-      <author><first>Laurence</first><last>Devillers</last></author>
+      <author id="laurence-devillers"><first>Laurence</first><last>Devillers</last></author>
       <title><fixed-case>CINEMO</fixed-case> — A <fixed-case>F</fixed-case>rench Spoken Language Resource for Complex Emotions: Facts and Baselines</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/483_Paper.pdf</url>
       <abstract>The CINEMO corpus of French emotional speech provides a richly annotated resource to help overcome the apparent lack of learning and testing speech material for complex, i.e. blended or mixed emotions. The protocol for its collection was dubbing selected emotional scenes from French movies. 51 speakers are contained and the total speech time amounts to 2 hours and 13 minutes and 4k speech chunks after segmentation. Extensive labelling was carried out in 16 categories for major and minor emotions and in 6 continuous dimensions. In this contribution we give insight into the corpus statistics focusing in particular on the topic of complex emotions, and provide benchmark recognition results obtained in exemplary large feature space evaluations. In the result the labelling oft he collected speech clearly demonstrates that a complex handling of emotion seems needed. Further, the automatic recognition experiments provide evidence that the automatic recognition of blended emotions appears to be feasible.</abstract>
@@ -3194,7 +3194,7 @@
       <author><first>Ido</first><last>Dagan</last></author>
       <author><first>Danilo</first><last>Giampiccolo</last></author>
       <author><first>Shachar</first><last>Mirkin</last></author>
-      <author><first>Emanuele</first><last>Pianta</last></author>
+      <author id="emanuele-pianta"><first>Emanuele</first><last>Pianta</last></author>
       <author><first>Asher</first><last>Stern</last></author>
       <title>A Resource for Investigating the Impact of Anaphora and Coreference on Inference.</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/488_Paper.pdf</url>
@@ -3229,7 +3229,7 @@
     </paper>
     <paper id="342">
       <author><first>Max</first><last>Jakob</last></author>
-      <author><first>Markéta</first><last>Lopatková</last></author>
+      <author id="marketa-lopatkova"><first>Markéta</first><last>Lopatková</last></author>
       <author><first>Valia</first><last>Kordoni</last></author>
       <title>Mapping between Dependency Structures and Compositional Semantic Representations</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/493_Paper.pdf</url>
@@ -3246,7 +3246,7 @@
       <bibkey>ben-gera-etal-2010-semantic</bibkey>
     </paper>
     <paper id="344">
-      <author><first>Nuria</first><last>Gala</last></author>
+      <author id="nuria-gala"><first>Nuria</first><last>Gala</last></author>
       <author><first>Véronique</first><last>Rey</last></author>
       <author><first>Michael</first><last>Zock</last></author>
       <title>A Tool for Linking Stems and Conceptual Fragments to Enhance word Access</title>
@@ -3267,7 +3267,7 @@
       <author><first>Florian</first><last>Laws</last></author>
       <author><first>Beate</first><last>Dorow</last></author>
       <author><first>Ulrich</first><last>Heid</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <title>Building a Cross-lingual Relatedness Thesaurus using a Graph Similarity Measure</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/499_Paper.pdf</url>
       <abstract>The Internet is an ever growing source of information stored in documents of different languages. Hence, cross-lingual resources are needed for more and more NLP applications. This paper presents (i) a graph-based method for creating one such resource and (ii) a resource created using the method, a cross-lingual relatedness thesaurus. Given a word in one language, the thesaurus suggests words in a second language that are semantically related. The method requires two monolingual corpora and a basic dictionary. Our general approach is to build two monolingual word graphs, with nodes representing words and edges representing linguistic relations between words. A bilingual dictionary containing basic vocabulary provides seed translations relating nodes from both graphs. We then use an inter-graph node-similarity algorithm to discover related words. Evaluation with three human judges revealed that 49% of the English and 57% of the German words discovered by our method are semantically related to the target words. We publish two resources in conjunction with this paper. First, noun coordinations extracted from the German and English Wikipedias. Second, the cross-lingual relatedness thesaurus which can be used in experiments involving interactive cross-lingual query expansion.</abstract>
@@ -3275,9 +3275,9 @@
     </paper>
     <paper id="347">
       <author><first>Samuel</first><last>Broscheit</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <author><first>Yannick</first><last>Versley</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <title>Extending <fixed-case>BART</fixed-case> to Provide a Coreference Resolution System for <fixed-case>G</fixed-case>erman</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/500_Paper.pdf</url>
       <abstract>We present a flexible toolkit-based approach to automatic coreference resolution on German text. We start with our previous work aimed at reimplementing the system from Soon et al. (2001) for English, and extend it to duplicate a version of the state-of-the-art proposal from Klenner and Ailloud (2009). Evaluation performed on a benchmarking dataset, namely the TueBa-D/Z corpus (Hinrichs et al., 2005b), shows that machine learning based coreference resolution can be robustly performed in a language other than English.</abstract>
@@ -3287,7 +3287,7 @@
       <author><first>Ulrich</first><last>Heid</last></author>
       <author><first>Helmut</first><last>Schmid</last></author>
       <author><first>Kerstin</first><last>Eckart</last></author>
-      <author><first>Erhard</first><last>Hinrichs</last></author>
+      <author id="erhard-hinrichs"><first>Erhard</first><last>Hinrichs</last></author>
       <title>A Corpus Representation Format for Linguistic Web Services: The <fixed-case>D</fixed-case>-<fixed-case>SPIN</fixed-case> Text Corpus Format and its Relationship with <fixed-case>ISO</fixed-case> Standards</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/503_Paper.pdf</url>
       <abstract>In the framework of the preparation of linguistic web services for corpus processing, the need for a representation format was felt, which supports interoperability between different web services in a corpus processing pipeline, but also provides a well-defined interface to both, legacy tools and their data formats and upcoming international standards. We present the D-SPIN text corpus format, TCF, which was designed for this purpose. It is a stand-off XML format, inspired by the philosophy of the emerging standards LAF (Linguistic Annotation Framework) and its ``instances'' MAF for morpho-syntactic annotation and SynAF for syntactic annotation. Tools for the exchange with existing (best practice) formats are available, and a converter from MAF to TCF is being tested in spring 2010. We describe the usage scenario where TCF is embedded and the properties and architecture of TCF. We also give examples of TCF encoded data and describe the aspects of syntactic and semantic interoperability already addressed.</abstract>
@@ -3304,7 +3304,7 @@
     </paper>
     <paper id="350">
       <author><first>Jakob</first><last>Halskov</last></author>
-      <author><first>Dorte Haltrup</first><last>Hansen</last></author>
+      <author id="dorte-haltrup-hansen"><first>Dorte Haltrup</first><last>Hansen</last></author>
       <author><first>Anna</first><last>Braasch</last></author>
       <author><first>Sussi</first><last>Olsen</last></author>
       <title>Quality Indicators of <fixed-case>LSP</fixed-case> Texts — Selection and Measurements Measuring the Terminological Usefulness of Documents for an <fixed-case>LSP</fixed-case> Corpus</title>
@@ -3353,15 +3353,15 @@
       <author><first>Stefano</first><last>Bortoli</last></author>
       <author><first>Noemi</first><last>Scarpato</last></author>
       <author><first>Andrea</first><last>Turbati</last></author>
-      <author><first>Paolo</first><last>Bouquet</last></author>
-      <author><first>Maria Teresa</first><last>Pazienza</last></author>
+      <author id="paolo-bouquet"><first>Paolo</first><last>Bouquet</last></author>
+      <author id="maria-teresa-pazienza"><first>Maria Teresa</first><last>Pazienza</last></author>
       <title><fixed-case>M</fixed-case>askkot — An Entity-centric Annotation Platform</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/515_Paper.pdf</url>
       <abstract>The Semantic Web is facing the important challenge to maintain its promise of a real world-wide graph of interconnected resources. Unfortunately, while URIs almost guarantee a direct reference to entities, the relation between the two is not bijective. Many different URI references to same concepts and entities can arise when -- in such a heterogeneous setting as the WWW -- people independently build new ontologies, or populate shared ones with new arbitrarily identified individuals. The proliferation of URIs is an unwanted, though natural effect strictly bound to the same principles which characterize the Semantic Web; reducing this phenomenon will improve the recall of Semantic Search engines, which could rely on explicit links between heterogeneous information sources. To address this problem, in this paper we present an integrated environment combining the semantic annotation and ontology building features available in the Semantic Turkey web browser extension, with globally unique identifiers for entities provided by the okkam Entity Name System, thus realizing a valuable resource for preventing diffusion of multiple URIs on the (Semantic) Web.</abstract>
       <bibkey>stellato-etal-2010-maskkot</bibkey>
     </paper>
     <paper id="356">
-      <author><first>Petr</first><last>Pollák</last></author>
+      <author id="petr-pollak"><first>Petr</first><last>Pollák</last></author>
       <author><first>Josef</first><last>Rajnoha</last></author>
       <title>Multi-Channel Database of Spontaneous <fixed-case>C</fixed-case>zech with Synchronization of Channels Recorded by Independent Devices</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/516_Paper.pdf</url>
@@ -3370,8 +3370,8 @@
     </paper>
     <paper id="357">
       <author><first>Guillaume</first><last>Bernard</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
-      <author><first>Martine</first><last>Adda-Decker</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
+      <author id="martine-adda-decker"><first>Martine</first><last>Adda-Decker</last></author>
       <author><first>Olivier</first><last>Galibert</last></author>
       <title>A Question-answer Distance Measure to Investigate <fixed-case>QA</fixed-case> System Progress</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/518_Paper.pdf</url>
@@ -3379,8 +3379,8 @@
       <bibkey>bernard-etal-2010-question</bibkey>
     </paper>
     <paper id="358">
-      <author><first>Andre</first><last>Blessing</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="andre-blessing"><first>Andre</first><last>Blessing</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <title>Fine-Grained Geographical Relation Extraction from <fixed-case>W</fixed-case>ikipedia</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/519_Paper.pdf</url>
       <abstract>In this paper, we present work on enhancing the basic data resource of a context-aware system. Electronic text offers a wealth of information about geospatial data and can be used to improve the completeness and accuracy of geospatial resources (e.g., gazetteers). First, we introduce a supervised approach to extracting geographical relations on a fine-grained level. Second, we present a novel way of using Wikipedia as a corpus based on self-annotation. A self-annotation is an automatically created high-quality annotation that can be used for training and evaluation. Wikipedia contains two types of different context: (i) unstructured text and (ii) structured data: templates (e.g., infoboxes about cities), lists and tables. We use the structured data to annotate the unstructured text. Finally, the extracted fine-grained relations are used to complete gazetteer data. The precision and recall scores of more than 97 percent confirm that a statistical IE pipeline can be used to improve the data quality of community-based resources.</abstract>
@@ -3405,9 +3405,9 @@
       <bibkey>tatsumi-etal-2010-evaluating</bibkey>
     </paper>
     <paper id="361">
-      <author><first>Danica</first><last>Damljanovic</last></author>
+      <author id="danica-damljanovic"><first>Danica</first><last>Damljanovic</last></author>
       <author><first>Milan</first><last>Agatonovic</last></author>
-      <author><first>Hamish</first><last>Cunningham</last></author>
+      <author id="hamish-cunningham"><first>Hamish</first><last>Cunningham</last></author>
       <title>Identification of the Question Focus: Combining Syntactic Analysis and Ontology-based Lookup through the User Interaction</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/524_Paper.pdf</url>
       <abstract>Most question-answering systems contain a classifier module which determines a question category, based on which each question is assigned an answer type. However, setting up syntactic patterns for this classification is a big challenge. In addition, in the case of ontology-based systems, the answer type should be aligned to the queried knowledge structure. In this paper, we present an approach for determining the answer type semi-automatically. We first identify the question focus using syntactic parsing, and then try to identify the answer type by combining the head of the focus with the ontology-based lookup. When this combination is not enough to make conclusions automatically, the user is engaged into a dialog in order to resolve the answer type. User selections are saved and used for training the system in order to improve its performance over time. Further on, the answer type is used to show the feedback and the concise answer to the user. Our approach is evaluated using 250 questions from the Mooney Geoquery dataset.</abstract>
@@ -3418,7 +3418,7 @@
       <author><first>Brigitte</first><last>Grau</last></author>
       <author><first>Olivier</first><last>Ferret</last></author>
       <author><first>Cyril</first><last>Grouin</last></author>
-      <author><first>Véronique</first><last>Moriceau</last></author>
+      <author id="veronique-moriceau"><first>Véronique</first><last>Moriceau</last></author>
       <author><first>Isabelle</first><last>Robba</last></author>
       <author><first>Xavier</first><last>Tannier</last></author>
       <author><first>Anne</first><last>Vilnat</last></author>
@@ -3430,7 +3430,7 @@
     </paper>
     <paper id="363">
       <author><first>Silvana Marianela Bernaola</first><last>Biggio</last></author>
-      <author><first>Manuela</first><last>Speranza</last></author>
+      <author id="manuela-speranza"><first>Manuela</first><last>Speranza</last></author>
       <author><first>Roberto</first><last>Zanoli</last></author>
       <title>Entity Mention Detection using a Combination of Redundancy-Driven Classifiers</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/530_Paper.pdf</url>
@@ -3469,19 +3469,19 @@
       <bibkey>vorwerk-etal-2010-wapusk20</bibkey>
     </paper>
     <paper id="367">
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <author><first>Montse</first><last>Cuadros</last></author>
-      <author><first>German</first><last>Rigau</last></author>
-      <author><first>Aitor</first><last>Soroa</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
+      <author id="aitor-soroa"><first>Aitor</first><last>Soroa</last></author>
       <title>Exploring Knowledge Bases for Similarity</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/534_Paper.pdf</url>
       <abstract>Graph-based similarity over WordNet has been previously shown to perform very well on word similarity. This paper presents a study of the performance of such a graph-based algorithm when using different relations and versions of Wordnet. The graph algorithm is based on Personalized PageRank, a random-walk based algorithm which computes the probability of a random-walk initiated in the target word to reach any synset following the relations in WordNet (Haveliwala, 2002). Similarity is computed as the cosine of the probability distributions for each word over WordNet. The best combination of relations includes all relations in WordNet 3.0, included disambiguated glosses, and automatically disambiguated topic signatures called KnowNets. All relations are part of the official release of WordNet, except KnowNets, which have been derived automatically. The results over the WordSim 353 dataset show that using the adequate relations the performance improves over previously published WordNet-based results on the WordSim353 dataset (Finkelstein et al., 2002). The similarity software and some graphs used in this paper are publicly available at <url>http://ixa2.si.ehu.es/ukb</url>.</abstract>
       <bibkey>agirre-etal-2010-exploring</bibkey>
     </paper>
     <paper id="368">
-      <author><first>Cristina</first><last>Sánchez-Marco</last></author>
-      <author><first>Gemma</first><last>Boleda</last></author>
-      <author><first>Josep Maria</first><last>Fontana</last></author>
+      <author id="cristina-sanchez-marco"><first>Cristina</first><last>Sánchez-Marco</last></author>
+      <author id="gemma-boleda"><first>Gemma</first><last>Boleda</last></author>
+      <author id="josep-maria-fontana"><first>Josep Maria</first><last>Fontana</last></author>
       <author><first>Judith</first><last>Domingo</last></author>
       <title>Annotation and Representation of a Diachronic Corpus of <fixed-case>S</fixed-case>panish</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/535_Paper.pdf</url>
@@ -3499,9 +3499,9 @@
       <author><first>Romaric</first><last>Besançon</last></author>
       <author><first>Gaël</first><last>de Chalendar</last></author>
       <author><first>Olivier</first><last>Ferret</last></author>
-      <author><first>Faiza</first><last>Gara</last></author>
+      <author id="faiza-elkateb-gara"><first>Faiza</first><last>Gara</last></author>
       <author><first>Olivier</first><last>Mesnard</last></author>
-      <author><first>Meriama</first><last>Laïb</last></author>
+      <author id="meriama-laib"><first>Meriama</first><last>Laïb</last></author>
       <author><first>Nasredine</first><last>Semmar</last></author>
       <title><fixed-case>LIMA</fixed-case> : A Multilingual Framework for Linguistic Analysis and Linguistic Resources Development and Evaluation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/537_Paper.pdf</url>
@@ -3509,7 +3509,7 @@
       <bibkey>besancon-etal-2010-lima</bibkey>
     </paper>
     <paper id="371">
-      <author><first>Grzegorz</first><last>Chrupała</last></author>
+      <author id="grzegorz-chrupala"><first>Grzegorz</first><last>Chrupała</last></author>
       <author><first>Dietrich</first><last>Klakow</last></author>
       <title>A Named Entity Labeler for <fixed-case>G</fixed-case>erman: Exploiting <fixed-case>W</fixed-case>ikipedia and Distributional Clusters</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/538_Paper.pdf</url>
@@ -3524,10 +3524,10 @@
       <bibkey>rosell-2010-text</bibkey>
     </paper>
     <paper id="373">
-      <author><first>Jesús</first><last>González-Rubio</last></author>
+      <author id="jesus-gonzalez-rubio"><first>Jesús</first><last>González-Rubio</last></author>
       <author><first>Jorge</first><last>Civera</last></author>
-      <author><first>Alfons</first><last>Juan</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="alfons-juan"><first>Alfons</first><last>Juan</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <title><fixed-case>S</fixed-case>aturnalia: A <fixed-case>L</fixed-case>atin-<fixed-case>C</fixed-case>atalan Parallel Corpus for Statistical <fixed-case>MT</fixed-case></title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/541_Paper.pdf</url>
       <abstract>Currently, a great effort is being carried out in the digitalisation of large historical document collections for preservation purposes. The documents in these collections are usually written in ancient languages, such as Latin or Greek, which limits the access of the general public to their content due to the language barrier. Therefore, digital libraries aim not only at storing raw images of digitalised documents, but also to annotate them with their corresponding text transcriptions and translations into modern languages. Unfortunately, ancient languages have at their disposal scarce electronic resources to be exploited by natural language processing techniques. This paper describes the compilation process of a novel Latin-Catalan parallel corpus as a new task for statistical machine translation (SMT). Preliminary experimental results are also reported using a state-of-the-art phrase-based SMT system. The results presented in this work reveal the complexity of the task and its challenging, but interesting nature for future development.</abstract>
@@ -3538,15 +3538,15 @@
       <author><first>Sean</first><last>Neilan</last></author>
       <author><first>Gary</first><last>An</last></author>
       <author><first>Noriko</first><last>Tomuro</last></author>
-      <author><first>Steven</first><last>Lytinen</last></author>
+      <author id="steven-l-lytinen"><first>Steven</first><last>Lytinen</last></author>
       <title><fixed-case>D</fixed-case>jangology: A Light-weight Web-based Tool for Distributed Collaborative Text Annotation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/543_Paper.pdf</url>
       <abstract>Manual text annotation is a resource-consuming endeavor necessary for NLP systems when they target new tasks or domains for which there are no existing annotated corpora. Distributing the annotation work across multiple contributors is a natural solution to reduce and manage the effort required. Although there are a few publicly available tools which support distributed collaborative text annotation, most of them have complex user interfaces and require a significant amount of involvement from the annotators/contributors as well as the project developers and administrators. We present a light-weight web application for highly distributed annotation projects - Djangology. The application takes advantage of the recent advances in web framework architecture that allow rapid development and deployment of web applications thus minimizing development time for customization. The application's web-based interface gives project administrators the ability to easily upload data, define project schemas, assign annotators, monitor progress, and review inter-annotator agreement statistics. The intuitive web-based user interface encourages annotator participation as contributors are not burdened by tool manuals, local installation, or configuration. The system has achieved a user response rate of 70% in two annotation projects involving more than 250 medical experts from various geographic locations.</abstract>
       <bibkey>apostolova-etal-2010-djangology</bibkey>
     </paper>
     <paper id="375">
-      <author><first>Leon</first><last>Derczynski</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <title>Analysing Temporally Annotated Corpora with <fixed-case>CAV</fixed-case>a<fixed-case>T</fixed-case></title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/546_Paper.pdf</url>
       <abstract>We present CAVaT, a tool that performs Corpus Analysis and Validation for TimeML. CAVaT is an open source, modular checking utility for statistical analysis of features specific to temporally-annotated natural language corpora. It provides reporting, highlights salient links between a variety of general and time-specific linguistic features, and also validates a temporal annotation to ensure that it is logically consistent and sufficiently annotated. Uniquely, CAVaT provides analysis specific to TimeML-annotated temporal information. TimeML is a standard for annotating temporal information in natural language text. In this paper, we present the reporting part of CAVaT, and then its error-checking ability, including the workings of several novel TimeML document verification methods. This is followed by the execution of some example tasks using the tool to show relations between times, events, signals and links. We also demonstrate inconsistencies in a TimeML corpus (TimeBank) that have been detected with CAVaT.</abstract>
@@ -3555,8 +3555,8 @@
     <paper id="376">
       <author><first>Martin</first><last>Reynaert</last></author>
       <author><first>Nelleke</first><last>Oostdijk</last></author>
-      <author><first>Orphée</first><last>De Clercq</last></author>
-      <author><first>Henk</first><last>van den Heuvel</last></author>
+      <author id="orphee-de-clercq"><first>Orphée</first><last>De Clercq</last></author>
+      <author id="henk-van-den-heuvel"><first>Henk</first><last>van den Heuvel</last></author>
       <author><first>Franciska</first><last>de Jong</last></author>
       <title>Balancing <fixed-case>S</fixed-case>o<fixed-case>N</fixed-case>a<fixed-case>R</fixed-case>: <fixed-case>IPR</fixed-case> versus Processing Issues in a 500-Million-Word Written <fixed-case>D</fixed-case>utch Reference Corpus</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/549_Paper.pdf</url>
@@ -3566,7 +3566,7 @@
     <paper id="377">
       <author><first>Samuel</first><last>Cruz-Lara</last></author>
       <author><first>Gil</first><last>Francopoulo</last></author>
-      <author><first>Laurent</first><last>Romary</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
       <author><first>Nasredine</first><last>Semmar</last></author>
       <title><fixed-case>MLIF</fixed-case> : A Metamodel to Represent and Exchange Multilingual Textual Information</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/550_Paper.pdf</url>
@@ -3585,7 +3585,7 @@
     <paper id="379">
       <author><first>Francesca</first><last>Bonin</last></author>
       <author><first>Felice</first><last>Dell’Orletta</last></author>
-      <author><first>Simonetta</first><last>Montemagni</last></author>
+      <author id="simonetta-montemagni"><first>Simonetta</first><last>Montemagni</last></author>
       <author><first>Giulia</first><last>Venturi</last></author>
       <title>A Contrastive Approach to Multi-word Extraction from Domain-specific Corpora</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/553_Paper.pdf</url>
@@ -3594,7 +3594,7 @@
     </paper>
     <paper id="380">
       <author><first>Olivier</first><last>Blanc</last></author>
-      <author><first>Matthieu</first><last>Constant</last></author>
+      <author id="matthieu-constant"><first>Matthieu</first><last>Constant</last></author>
       <author><first>Anne</first><last>Dister</last></author>
       <author><first>Patrick</first><last>Watrin</last></author>
       <title>Partial Parsing of Spontaneous Spoken <fixed-case>F</fixed-case>rench</title>
@@ -3603,13 +3603,13 @@
       <bibkey>blanc-etal-2010-partial</bibkey>
     </paper>
     <paper id="381">
-      <author><first>Annelies</first><last>Braffort</last></author>
+      <author id="annelies-braffort"><first>Annelies</first><last>Braffort</last></author>
       <author><first>Laurence</first><last>Bolot</last></author>
       <author><first>Emilie</first><last>Chételat-Pelé</last></author>
-      <author><first>Annick</first><last>Choisier</last></author>
+      <author id="annick-choisier"><first>Annick</first><last>Choisier</last></author>
       <author><first>Maxime</first><last>Delorme</last></author>
       <author><first>Michael</first><last>Filhol</last></author>
-      <author><first>Jérémie</first><last>Segouat</last></author>
+      <author id="jeremie-segouat"><first>Jérémie</first><last>Segouat</last></author>
       <author><first>Cyril</first><last>Verrecchia</last></author>
       <author><first>Flora</first><last>Badin</last></author>
       <author><first>Nadège</first><last>Devos</last></author>
@@ -3620,8 +3620,8 @@
     </paper>
     <paper id="382">
       <author><first>Sara</first><last>Tonelli</last></author>
-      <author><first>Emanuele</first><last>Pianta</last></author>
-      <author><first>Rodolfo</first><last>Delmonte</last></author>
+      <author id="emanuele-pianta"><first>Emanuele</first><last>Pianta</last></author>
+      <author id="rodolfo-delmonte"><first>Rodolfo</first><last>Delmonte</last></author>
       <author><first>Michele</first><last>Brunelli</last></author>
       <title><fixed-case>V</fixed-case>en<fixed-case>P</fixed-case>ro: A Morphological Analyzer for Venetan</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/556_Paper.pdf</url>
@@ -3629,7 +3629,7 @@
       <bibkey>tonelli-etal-2010-venpro</bibkey>
     </paper>
     <paper id="383">
-      <author><first>Mohamed</first><last>Maamouri</last></author>
+      <author id="mohamed-maamouri"><first>Mohamed</first><last>Maamouri</last></author>
       <author><first>Ann</first><last>Bies</last></author>
       <author><first>Seth</first><last>Kulick</last></author>
       <author><first>Wajdi</first><last>Zaghouani</last></author>
@@ -3648,18 +3648,18 @@
       <bibkey>heja-2010-role</bibkey>
     </paper>
     <paper id="385">
-      <author><first>Harry</first><last>Bunt</last></author>
+      <author id="harry-bunt"><first>Harry</first><last>Bunt</last></author>
       <author><first>Jan</first><last>Alexandersson</last></author>
       <author><first>Jean</first><last>Carletta</last></author>
       <author><first>Jae-Woong</first><last>Choe</last></author>
-      <author><first>Alex Chengyu</first><last>Fang</last></author>
-      <author><first>Koiti</first><last>Hasida</last></author>
+      <author id="alex-chengyu-fang"><first>Alex Chengyu</first><last>Fang</last></author>
+      <author id="koiti-hasida"><first>Koiti</first><last>Hasida</last></author>
       <author><first>Kiyong</first><last>Lee</last></author>
       <author><first>Volha</first><last>Petukhova</last></author>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
-      <author><first>Laurent</first><last>Romary</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
       <author><first>Claudia</first><last>Soria</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <title>Towards an <fixed-case>ISO</fixed-case> Standard for Dialogue Act Annotation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/560_Paper.pdf</url>
       <abstract>This paper describes an ISO project which aims at developing a standard for annotating spoken and multimodal dialogue with semantic information concerning the communicative functions of utterances, the kind of semantic content they address, and their relations with what was said and done earlier in the dialogue. The project, ISO 24617-2 ""Semantic annotation framework, Part 2: Dialogue acts"", is currently at DIS stage. The proposed annotation schema distinguishes 9 orthogonal dimensions, allowing each functional segment in dialogue to have a function in each of these dimensions, thus accounting for the multifunctionality that utterances in dialogue often have. A number of core communicative functions is defined in the form of ISO data categories, available at <url>http://semantic-annotation.uvt.nl/dialogue-acts/iso-datcats.pdf</url>; they are divided into ""dimension-specific"" functions, which can be used only in a particular dimension, such as Turn Accept in the Turn Management dimension, and ""general-purpose"" functions, which can be used in any dimension, such as Inform and Request. An XML-based annotation language, ""DiAML"" is defined, with an abstract syntax, a semantics, and a concrete syntax.</abstract>
@@ -3667,11 +3667,11 @@
     </paper>
     <paper id="386">
       <author><first>Archna</first><last>Bhatia</last></author>
-      <author><first>Rajesh</first><last>Bhatt</last></author>
+      <author id="rajesh-bhatt"><first>Rajesh</first><last>Bhatt</last></author>
       <author><first>Bhuvana</first><last>Narasimhan</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
-      <author><first>Dipti Misra</first><last>Sharma</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></author>
       <author><first>Michael</first><last>Tepper</last></author>
       <author><first>Ashwini</first><last>Vaidya</last></author>
       <author><first>Fei</first><last>Xia</last></author>
@@ -3682,24 +3682,24 @@
     </paper>
     <paper id="387">
       <author><first>Marina</first><last>Lloberes</last></author>
-      <author><first>Irene</first><last>Castellón</last></author>
-      <author><first>Lluís</first><last>Padró</last></author>
+      <author id="irene-castellon"><first>Irene</first><last>Castellón</last></author>
+      <author id="lluis-padro"><first>Lluís</first><last>Padró</last></author>
       <title><fixed-case>S</fixed-case>panish <fixed-case>F</fixed-case>ree<fixed-case>L</fixed-case>ing Dependency Grammar</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/562_Paper.pdf</url>
       <abstract>This paper presents the development of an open-source Spanish Dependency Grammar implemented in FreeLing environment. This grammar was designed as a resource for NLP applications that require a step further in natural language automatic analysis, as is the case of Spanish-to-Basque translation. The development of wide-coverage rule-based grammars using linguistic knowledge contributes to extend the existing Spanish deep parsers collection, which sometimes is limited. Spanish FreeLing Dependency Grammar, named EsTxala, provides deep and robust parse trees, solving attachments for any structure and assigning syntactic functions to dependencies. These steps are dealt with hand-written rules based on linguistic knowledge. As a result, FreeLing Dependency Parser gives a unique analysis as a dependency tree for each sentence analyzed. Since it is a resource open to the scientific community, exhaustive grammar evaluation is being done to determine its accuracy as well as strategies for its manteinance and improvement. In this paper, we show the results of an experimental evaluation carried out over EsTxala in order to test our evaluation methodology.</abstract>
       <bibkey>lloberes-etal-2010-spanish</bibkey>
     </paper>
     <paper id="388">
-      <author><first>Magali Sanches</first><last>Duran</last></author>
-      <author><first>Marcelo Adriano</first><last>Amâncio</last></author>
-      <author><first>Sandra Maria</first><last>Aluísio</last></author>
+      <author id="magali-sanches-duran"><first>Magali Sanches</first><last>Duran</last></author>
+      <author id="marcelo-adriano-amancio"><first>Marcelo Adriano</first><last>Amâncio</last></author>
+      <author id="sandra-aluisio"><first>Sandra Maria</first><last>Aluísio</last></author>
       <title>Assigning Wh-Questions to Verbal Arguments: Annotation Tools Evaluation and Corpus Building</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/564_Paper.pdf</url>
       <abstract>This work reports the evaluation and selection of annotation tools to assign wh-question labels to verbal arguments in a sentence. Wh-question assignment discussed herein is a kind of semantic annotation which involves two tasks: making delimitation of verbs and arguments, and linking verbs to its arguments by question labels. As it is a new type of semantic annotation, there is no report about requirements an annotation tool should have to face it. For this reason, we decided to select the most appropriated tool in two phases. In the first phase, we executed the task with an annotation tool we have used before in another task. Such phase helped us to test the task and enabled us to know which features were or not desirable in an annotation tool for our purpose. In the second phase, guided by such requirements, we evaluated several tools and selected a tool for the real task. After corpus annotation conclusion, we report some of the annotation results and some comments on the improvements there should be made in an annotation tool to better support such kind of annotation task.</abstract>
       <bibkey>duran-etal-2010-assigning</bibkey>
     </paper>
     <paper id="389">
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <title>The Impact of Task and Corpus on Event Extraction Systems</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/565_Paper.pdf</url>
       <abstract>The term event extraction covers a wide range of information extraction tasks, and methods developed and evaluated for one task may prove quite unsuitable for another. Understanding these task differences is essential to making broad progress in event extraction. We look back at the MUC and ACE tasks in terms of one characteristic, the breadth of the scenario ― how wide a range of information is subsumed in a single extraction task. We examine how this affects strategies for collecting information and methods for semi-supervised training of new extractors. We also consider the heterogeneity of corpora ― how varied the topics of documents in a corpus are. Extraction systems may be intended in principle for general news but are typically evaluated on topic-focused corpora, and this evaluation context may affect system design. As one case study, we examine the task of identifying physical attack events in news corpora, observing the effect on system performance of shifting from an attack-event-rich corpus to a more varied corpus and considering how the impact of this shift may be mitigated.</abstract>
@@ -3708,7 +3708,7 @@
     <paper id="390">
       <author><first>Seth</first><last>Kulick</last></author>
       <author><first>Ann</first><last>Bies</last></author>
-      <author><first>Mohamed</first><last>Maamouri</last></author>
+      <author id="mohamed-maamouri"><first>Mohamed</first><last>Maamouri</last></author>
       <title>Consistent and Flexible Integration of Morphological Annotation in the <fixed-case>A</fixed-case>rabic Treebank</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/566_Paper.pdf</url>
       <abstract>Complications arise for standoff annotation when the annotation is not on the source text itself, but on a more abstract representation. This is particularly the case in a language such as Arabic with morphological and orthographic challenges, and we discuss various aspects of these issues in the context of the Arabic Treebank. The Standard Arabic Morphological Analyzer (SAMA) is closely integrated into the annotation workflow, as the basis for the abstraction between the explicit source text and the more abstract token representation. However, this integration with SAMA gives rise to various problems for the annotation workflow and for maintaining the link between the Treebank and SAMA. In this paper we discuss how we have overcome these problems with consistent and more precise categorization of all of the tokens for their relationship with SAMA. We also discuss how we have improved the creation of several distinct alternative forms of the tokens used in the syntactic trees. As a result, the Treebank provides a resource relating the different forms of the same underlying token with varying degrees of vocalization, in terms of how they relate (1) to each other, (2) to the syntactic structure, and (3) to the morphological analyzer.</abstract>
@@ -3723,8 +3723,8 @@
       <bibkey>zaninello-nissim-2010-creation</bibkey>
     </paper>
     <paper id="392">
-      <author><first>Jana</first><last>Šindlerová</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="jana-sindlerova"><first>Jana</first><last>Šindlerová</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <title>Building a Bilingual <fixed-case>V</fixed-case>al<fixed-case>L</fixed-case>ex Using Treebank Token Alignment: First Observations</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/568_Paper.pdf</url>
       <abstract>We explore the potential and limitations of a concept of building a bilingual valency lexicon based on the alignment of nodes in a parallel treebank. Our aim is to build an electronic Czech-&gt;English Valency Lexicon by collecting equivalences from bilingual treebank data and storing them in two already existing electronic valency lexicons, PDT-VALLEX and Engvallex. For this task a special annotation interface has been built upon the TrEd editor, allowing quick and easy collecting of frame equivalences in either of the source lexicons. The issues encountered so far include limitations of technical character, theory-dependent limitations and limitations concerning the achievable degree of quality of human annotation. The issues of special interest for both linguists and MT specialists involved in the project include linguistically motivated non-balance between the frame equivalents, either in number or in type of valency participants. The first phases of annotation so far attest the assumption that there is a unique correspondence between the functors of the translation-equivalent frames. Also, hardly any linguistically significant non-balance between the frames has been found, which is partly promising considering the linguistic theory used and partly caused by little stylistic variety of the annotated corpus texts.</abstract>
@@ -3732,7 +3732,7 @@
     </paper>
     <paper id="393">
       <author><first>Alberto</first><last>Díaz</last></author>
-      <author><first>Pablo</first><last>Gervás</last></author>
+      <author id="pablo-gervas"><first>Pablo</first><last>Gervás</last></author>
       <author><first>Antonio</first><last>García</last></author>
       <author><first>Laura</first><last>Plaza</last></author>
       <title>Development and Use of an Evaluation Collection for Personalisation of Digital Newspapers</title>
@@ -3741,19 +3741,19 @@
       <bibkey>diaz-etal-2010-development</bibkey>
     </paper>
     <paper id="394">
-      <author><first>Jonathan H.</first><last>Clark</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="jonathan-h-clark"><first>Jonathan H.</first><last>Clark</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <title><fixed-case>L</fixed-case>oony<fixed-case>B</fixed-case>in: Keeping Language Technologists Sane through Automated Management of Experimental (Hyper)Workflows</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/570_Paper.pdf</url>
       <abstract>Many contemporary language technology systems are characterized by long pipelines of tools with complex dependencies. Too often, these workflows are implemented by ad hoc scripts; or, worse, tools are run manually, making experiments difficult to reproduce. These practices are difficult to maintain in the face of rapidly evolving workflows while they also fail to expose and record important details about intermediate data. Further complicating these systems are hyperparameters, which often cannot be directly optimized by conventional methods, requiring users to determine which combination of values is best via trial and error. We describe LoonyBin, an open-source tool that addresses these issues by providing: 1) a visual interface for the user to create and modify workflows; 2) a well-defined mechanism for tracking metadata and provenance; 3) a script generator that compiles visual workflows into shell scripts; and 4) a new workflow representation we call a HyperWorkflow, which intuitively and succinctly encodes small experimental variations within a larger workflow.</abstract>
       <bibkey>clark-lavie-2010-loonybin</bibkey>
     </paper>
     <paper id="395">
-      <author><first>Keith J.</first><last>Miller</last></author>
+      <author id="keith-j-miller"><first>Keith J.</first><last>Miller</last></author>
       <author><first>Sarah</first><last>McLeod</last></author>
-      <author><first>Elizabeth</first><last>Schroeder</last></author>
+      <author id="elizabeth-schroeder"><first>Elizabeth</first><last>Schroeder</last></author>
       <author><first>Mark</first><last>Arehart</last></author>
-      <author><first>Kenneth</first><last>Samuel</last></author>
+      <author id="ken-samuel"><first>Kenneth</first><last>Samuel</last></author>
       <author><first>James</first><last>Finley</last></author>
       <author><first>Vanesa</first><last>Jurica</last></author>
       <author><first>John</first><last>Polk</last></author>
@@ -3783,7 +3783,7 @@
     <paper id="398">
       <author><first>Sunao</first><last>Hara</last></author>
       <author><first>Norihide</first><last>Kitaoka</last></author>
-      <author><first>Kazuya</first><last>Takeda</last></author>
+      <author id="kazuya-takeda"><first>Kazuya</first><last>Takeda</last></author>
       <title>Estimation Method of User Satisfaction Using N-gram-based Dialog History Model for Spoken Dialog System</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/579_Paper.pdf</url>
       <abstract>In this paper, we propose an estimation method of user satisfaction for a spoken dialog system using an N-gram-based dialog history model. We have collected a large amount of spoken dialog data accompanied by usability evaluation scores by users in real environments. The database is made by a field-test in which naive users used a client-server music retrieval system with a spoken dialog interface on their own PCs. An N-gram model is trained from the sequences that consist of users' dialog acts and/or the system's dialog acts for each one of six user satisfaction levels: from 1 to 5 and φ (task not completed). Then, the satisfaction level is estimated based on the N-gram likelihood. Experiments were conducted on the large real data and the results show that our proposed method achieved good classification performance; the classification accuracy was 94.7% in the experiment on a classification into dialogs with task completion and those without task completion. Even if the classifier detected all of the task incomplete dialog correctly, our proposed method achieved the false detection rate of only 6%.</abstract>
@@ -3792,7 +3792,7 @@
     <paper id="399">
       <author><first>Peng-Wen</first><last>Chen</last></author>
       <author><first>Snehal Kumar</first><last>Chennuru</last></author>
-      <author><first>Ying</first><last>Zhang</last></author>
+      <author id="ying-zhang"><first>Ying</first><last>Zhang</last></author>
       <title>A Language Approach to Modeling Human Behaviors</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/580_Paper.pdf</url>
       <abstract>The modeling of human behavior becomes more and more important due to the increasing popularity of context-aware computing and people-centric mobile applications. Inspired by the principle of action-as-language, we propose that human ambulatory behavior shares similar properties as natural languages. In addition, by exploiting this similarity, we will be able to index, recognize, cluster, retrieve, and infer high-level semantic meanings of human behaviors via the use of natural language processing techniques. In this paper, we developed a Life Logger system to help build the behavior language corpus which supports our ""Behavior as Language"" research. The constructed behavior corpus shows Zipf's distribution over the frequency of vocabularies which is aligned with our ""Behavior as Language"" assumption. Our preliminary results of using smoothed n-gram language model for activity recognition achieved an average accuracy rate of 94% in distinguishing among human ambulatory behaviors including walking, running, and cycling. This behavior-as-language corpus will enable researchers to study higher level human behavior based on the syntactic and semantic analysis of the corpus data.</abstract>
@@ -3821,7 +3821,7 @@
     <paper id="402">
       <author><first>Shu</first><last>Zhang</last></author>
       <author><first>Wenjie</first><last>Jia</last></author>
-      <author><first>Yingju</first><last>Xia</last></author>
+      <author id="yingju-xia"><first>Yingju</first><last>Xia</last></author>
       <author><first>Yao</first><last>Meng</last></author>
       <author><first>Hao</first><last>Yu</last></author>
       <title>Extracting Product Features and Sentiments from <fixed-case>C</fixed-case>hinese Customer Reviews</title>
@@ -3859,9 +3859,9 @@
       <author><first>Roberto P. A.</first><last>Araujo</last></author>
       <author><first>Rafael L.</first><last>de Oliveira</last></author>
       <author><first>Eder M.</first><last>de Novais</last></author>
-      <author><first>Thiago D.</first><last>Tadeu</last></author>
-      <author><first>Daniel B.</first><last>Pereira</last></author>
-      <author><first>Ivandré</first><last>Paraboni</last></author>
+      <author id="thiago-d-tadeu"><first>Thiago D.</first><last>Tadeu</last></author>
+      <author id="daniel-bastos-pereira"><first>Daniel B.</first><last>Pereira</last></author>
+      <author id="ivandre-paraboni"><first>Ivandré</first><last>Paraboni</last></author>
       <title><fixed-case>SIN</fixed-case>otas: the Evaluation of a <fixed-case>NLG</fixed-case> Application</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/593_Paper.pdf</url>
       <abstract>SINotas is a data-to-text NLG application intended to produce short textual reports on students academic performance from a database conveying their grades, weekly attendance rates and related academic information. Although developed primarily as a testbed for Portuguese Natural Language Generation, SINotas generates reports of interest to both students keen to learn how their professors would describe their efforts, and to the professors themselves, who may benefit from an at-a-glance view of the students performance. In a traditional machine learning approach, SINotas uses a data-text aligned corpus as training data for decision-tree induction. The current system comprises a series of classifiers that implement major Document Planning subtasks (namely, data interpretation, content selection, within- and between-sentence structuring), and a small surface realisation grammar of Brazilian Portuguese. In this paper we focus on the evaluation work of the system, applying a number of intrinsic and user-based evaluation metrics to a collection of text reports generated from real application data.</abstract>
@@ -3882,7 +3882,7 @@
       <author><first>Guillaume</first><last>Aimetti</last></author>
       <author><first>Christos</first><last>Koniaris</last></author>
       <author><first>Kris</first><last>Demuynck</last></author>
-      <author><first>Henk</first><last>van den Heuvel</last></author>
+      <author id="henk-van-den-heuvel"><first>Henk</first><last>van den Heuvel</last></author>
       <title>A Speech Corpus for Modeling Language Acquisition: <fixed-case>CAREGIVER</fixed-case></title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/597_Paper.pdf</url>
       <abstract>A multi-lingual speech corpus used for modeling language acquisition called CAREGIVER has been designed and recorded within the framework of the EU funded Acquisition of Communication and Recognition Skills (ACORNS) project. The paper describes the motivation behind the corpus and its design by relying on current knowledge regarding infant language acquisition. Instead of recording infants and children, the voices of their primary and secondary caregivers were captured in both infant-directed and adult-directed speech modes over four languages in a read speech manner. The challenges and methods applied to obtain similar prompts in terms of complexity and semantics across different languages, as well as the normalized recording procedures employed at different locations, is covered. The corpus contains nearly 66000 utterance based audio files spoken over a two-year period by 17 male and 17 female native speakers of Dutch, English, Finnish, and Swedish. An orthographical transcription is available for every utterance. Also, time-aligned word and phone annotations for many of the sub-corpora also exist. The CAREGIVER corpus will be published via ELRA.</abstract>
@@ -3891,9 +3891,9 @@
     <paper id="409">
       <author><first>Sanja</first><last>Seljan</last></author>
       <author><first>Marko</first><last>Tadić</last></author>
-      <author><first>Željko</first><last>Agić</last></author>
+      <author id="zeljko-agic"><first>Željko</first><last>Agić</last></author>
       <author><first>Jan</first><last>Šnajder</last></author>
-      <author><first>Bojana Dalbelo</first><last>Bašić</last></author>
+      <author id="bojana-dalbelo-basic"><first>Bojana Dalbelo</first><last>Bašić</last></author>
       <author><first>Vjekoslav</first><last>Osmann</last></author>
       <title>Corpus Aligner (<fixed-case>C</fixed-case>or<fixed-case>A</fixed-case>l) Evaluation on <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>roatian Parallel Corpora</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/599_Paper.pdf</url>
@@ -3911,7 +3911,7 @@
       <bibkey>orasmaa-etal-2010-information</bibkey>
     </paper>
     <paper id="411">
-      <author><first>Montserrat</first><last>Marimon</last></author>
+      <author id="montserrat-marimon"><first>Montserrat</first><last>Marimon</last></author>
       <title>The <fixed-case>S</fixed-case>panish Resource Grammar</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/602_Paper.pdf</url>
       <abstract>This paper describes the Spanish Resource Grammar, an open-source multi-purpose broad-coverage precise grammar for Spanish. The grammar is implemented on the Linguistic Knowledge Builder (LKB) system, it is grounded in the theoretical framework of Head-driven Phrase Structure Grammar (HPSG), and it uses Minimal Recursion Semantics (MRS) for the semantic representation. We have developed a hybrid architecture which integrates shallow processing functionalities -- morphological analysis, and Named Entity recognition and classification -- into the parsing process. The SRG has a full coverage lexicon of closed word classes and it contains 50,852 lexical entries for open word classes. The grammar also has 64 lexical rules to perform valence changing operations on lexical items, and 191 phrase structure rules that combine words and phrases into larger constituents and compositionally build up their semantic representation. The annotation of each parsed sentence in an LKB grammar simultaneously represents a traditional phrase structure tree, and a MRS semantic representation. We provide evaluation results on sentences from newspaper texts and discuss future work.</abstract>
@@ -3919,8 +3919,8 @@
     </paper>
     <paper id="412">
       <author><first>Anne</first><last>Vilnat</last></author>
-      <author><first>Patrick</first><last>Paroubek</last></author>
-      <author><first>Eric</first><last>Villemonte de la Clergerie</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Eric</first><last>Villemonte de la Clergerie</last></author>
       <author><first>Gil</first><last>Francopoulo</last></author>
       <author><first>Marie-Laure</first><last>Guénot</last></author>
       <title><fixed-case>PASSAGE</fixed-case> Syntactic Representation: a Minimal Common Ground for Evaluation</title>
@@ -3957,15 +3957,15 @@
     <paper id="416">
       <author><first>Claudia</first><last>Borg</last></author>
       <author><first>Mike</first><last>Rosner</last></author>
-      <author><first>Gordon J.</first><last>Pace</last></author>
+      <author id="gordon-pace"><first>Gordon J.</first><last>Pace</last></author>
       <title>Automatic Grammar Rule Extraction and Ranking for Definitions</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/609_Paper.pdf</url>
       <abstract>Plain text corpora contain much information which can only be accessed through human annotation and semantic analysis, which is typically very time consuming to perform. Analysis of such texts at a syntactic or grammatical structure level can however extract some of this information in an automated manner, even if identifying effective rules can be extremely difficult. One such type of implicit information present in texts is that of definitional phrases and sentences. In this paper, we investigate the use of evolutionary algorithms to learn classifiers to discriminate between definitional and non-definitional sentences in non-technical texts, and show how effective grammar-based definition discriminators can be automatically learnt with minor human intervention.</abstract>
       <bibkey>borg-etal-2010-automatic</bibkey>
     </paper>
     <paper id="417">
-      <author><first>Manny</first><last>Rayner</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Nikos</first><last>Tsourakis</last></author>
       <author><first>Johanna</first><last>Gerlach</last></author>
       <author><first>Maria</first><last>Georgescul</last></author>
@@ -3979,7 +3979,7 @@
     <paper id="418">
       <author><first>Peter</first><last>Adolphs</last></author>
       <author><first>Xiwen</first><last>Cheng</last></author>
-      <author><first>Tina</first><last>Klüwer</last></author>
+      <author id="tina-kluwer"><first>Tina</first><last>Klüwer</last></author>
       <author><first>Hans</first><last>Uszkoreit</last></author>
       <author><first>Feiyu</first><last>Xu</last></author>
       <title>Question Answering Biographic Information and Social Network Powered by the Semantic Web</title>
@@ -4009,17 +4009,17 @@
       <author><first>Theodoros</first><last>Kostoulas</last></author>
       <author><first>Todor</first><last>Ganchev</last></author>
       <author><first>Iosif</first><last>Mporas</last></author>
-      <author><first>Nikos</first><last>Fakotakis</last></author>
+      <author id="nikos-fakotakis"><first>Nikos</first><last>Fakotakis</last></author>
       <title><fixed-case>V</fixed-case>ergina: A <fixed-case>M</fixed-case>odern <fixed-case>G</fixed-case>reek Speech Database for Speech Synthesis</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/614_Paper.pdf</url>
       <abstract>The present paper outlines the Vergina speech database, which was developed in support of research and development of corpus-based unit selection and statistical parametric speech synthesis systems for Modern Greek language. In the following, we describe the design, development and implementation of the recording campaign, as well as the annotation of the database. Specifically, a text corpus of approximately 5 million words, collected from newspaper articles, periodicals, and paragraphs of literature, was processed in order to select the utterances-sentences needed for producing the speech database and to achieve a reasonable phonetic coverage. The broad coverage and contents of the selected utterances-sentences of the database ― text corpus collected from different domains and writing styles ― makes this database appropriate for various application domains. The database, recorded in audio studio, consists of approximately 3,000 phonetically balanced Modern Greek utterances corresponding to approximately four hours of speech. Annotation of the Vergina speech database was performed using task-specific tools, which are based on a hidden Markov model (HMM) segmentation method, and then manual inspection and corrections were performed.</abstract>
       <bibkey>lazaridis-etal-2010-vergina</bibkey>
     </paper>
     <paper id="422">
-      <author><first>Vivi</first><last>Nastase</last></author>
+      <author id="vivi-nastase"><first>Vivi</first><last>Nastase</last></author>
       <author><first>Michael</first><last>Strube</last></author>
-      <author><first>Benjamin</first><last>Boerschinger</last></author>
-      <author><first>Caecilia</first><last>Zirn</last></author>
+      <author id="benjamin-borschinger"><first>Benjamin</first><last>Boerschinger</last></author>
+      <author id="cacilia-zirn"><first>Caecilia</first><last>Zirn</last></author>
       <author><first>Anas</first><last>Elghafari</last></author>
       <title><fixed-case>W</fixed-case>iki<fixed-case>N</fixed-case>et: A Very Large Scale Multi-Lingual Concept Network</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/615_Paper.pdf</url>
@@ -4028,7 +4028,7 @@
     </paper>
     <paper id="423">
       <author><first>Niraj</first><last>Aswani</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <title>Developing Morphological Analysers for <fixed-case>S</fixed-case>outh <fixed-case>A</fixed-case>sian Languages: Experimenting with the <fixed-case>H</fixed-case>indi and <fixed-case>G</fixed-case>ujarati Languages</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/616_Paper.pdf</url>
       <abstract>A considerable amount of work has been put into development of stemmers and morphological analysers. The majority of these approaches use hand-crafted suffix-replacement rules but a few try to discover such rules from corpora. While most of the approaches remove or replace suffixes, there are examples of derivational stemmers which are based on prefixes as well. In this paper we present a rule-based morphological analyser. We propose an approach that takes both prefixes as well as suffixes into account. Given a corpus and a dictionary, our method can be used to obtain a set of suffix-replacement rules for deriving an inflected words root form. We developed an approach for the Hindi language but show that the approach is portable, at least to related languages, by adapting it to the Gujarati language. Given that the entire process of developing such a ruleset is simple and fast, our approach can be used for rapid development of morphological analysers and yet it can obtain competitive results with analysers built relying on human authored rules.</abstract>
@@ -4037,7 +4037,7 @@
     <paper id="424">
       <author><first>Hiroki</first><last>Hanaoka</last></author>
       <author><first>Hideki</first><last>Mima</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <title>A <fixed-case>J</fixed-case>apanese Particle Corpus Built by Example-Based Annotation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/617_Paper.pdf</url>
       <abstract>This paper is a report on an on-going project of creating a new corpus focusing on Japanese particles. The corpus will provide deeper syntactic/semantic information than the existing resources. The initial target particle is ``to'' which occurs 22,006 times in 38,400 sentences of the existing corpus: the Kyoto Text Corpus. In this annotation task, an ``example-based'' methodology is adopted for the corpus annotation, which is different from the traditional annotation style. This approach provides the annotators with an example sentence rather than a linguistic category label. By avoiding linguistic technical terms, it is expected that any native speakers, with no special knowledge on linguistic analysis, can be an annotator without long training, and hence it can reduce the annotation cost. So far, 10,475 occurrences have been already annotated, with an inter-annotator agreement of 0.66 calculated by Cohen's kappa. The initial disagreement analyses and future directions are discussed in the paper.</abstract>
@@ -4046,7 +4046,7 @@
     <paper id="425">
       <author><first>Caroline</first><last>Sporleder</last></author>
       <author><first>Linlin</first><last>Li</last></author>
-      <author><first>Philip</first><last>Gorinski</last></author>
+      <author id="philip-gorinski"><first>Philip</first><last>Gorinski</last></author>
       <author><first>Xaver</first><last>Koch</last></author>
       <title>Idioms in Context: The <fixed-case>IDIX</fixed-case> Corpus</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/618_Paper.pdf</url>
@@ -4062,7 +4062,7 @@
       <author><first>Susana</first><last>Jiménez-Murcia</last></author>
       <author><first>Maher Ben</first><last>Moussa</last></author>
       <author><first>Nadia</first><last>Magnenat-Thalmann</last></author>
-      <author><first>Nikos</first><last>Fakotakis</last></author>
+      <author id="nikos-fakotakis"><first>Nikos</first><last>Fakotakis</last></author>
       <title>The <fixed-case>P</fixed-case>lay<fixed-case>M</fixed-case>ancer Database: A Multimodal Affect Database in Support of Research and Development Activities in Serious Game Environment</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/619_Paper.pdf</url>
       <abstract>The present paper reports on a recent effort that resulted in the establishment of a unique multimodal affect database, referred to as the PlayMancer database. This database was created in support of the research and development activities, taking place within the PlayMancer project, which aim at the development of a serious game environment in support of treatment of patients with behavioural and addictive disorders, such as eating disorders and gambling addictions. Specifically, for the purpose of data collection, we designed and implemented a pilot trial with healthy test subjects. Speech, video and bio-signals (pulse-rate, SpO2) were captured synchronously, during the interaction of healthy people with a number of video games. The collected data were annotated by the test subjects (self-annotation), targeting proper interpretation of the underlying affective states. The broad-shouldered design of the PlayMancer database allows its use for the needs of research on multimodal affect-emotion recognition and multimodal human-computer interaction in serious games environment.</abstract>
@@ -4086,7 +4086,7 @@
       <bibkey>nicolae-etal-2010-c</bibkey>
     </paper>
     <paper id="429">
-      <author><first>Stephen A.</first><last>Boxwell</last></author>
+      <author id="stephen-boxwell"><first>Stephen A.</first><last>Boxwell</last></author>
       <author><first>Chris</first><last>Brew</last></author>
       <title>A Pilot <fixed-case>A</fixed-case>rabic <fixed-case>CCG</fixed-case>bank</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/623_Paper.pdf</url>
@@ -4100,18 +4100,18 @@
       <author><first>Alain</first><last>Ghio</last></author>
       <author><first>Christine</first><last>Meunier</last></author>
       <author><first>Claude</first><last>Chevrie-Muller</last></author>
-      <author><first>Jean-Francois</first><last>Bonastre</last></author>
+      <author id="jean-francois-bonastre"><first>Jean-Francois</first><last>Bonastre</last></author>
       <author><first>Antonia</first><last>Colazo Simon</last></author>
-      <author><first>Céline</first><last>Delooze</last></author>
+      <author id="celine-de-looze"><first>Céline</first><last>Delooze</last></author>
       <author><first>Danielle</first><last>Duez</last></author>
-      <author><first>Cédric</first><last>Gendrot</last></author>
+      <author id="cedric-gendrot"><first>Cédric</first><last>Gendrot</last></author>
       <author><first>Thierry</first><last>Legou</last></author>
       <author><first>Nathalie</first><last>Levèque</last></author>
       <author><first>Claire</first><last>Pillot-Loiseau</last></author>
       <author><first>Serge</first><last>Pinto</last></author>
       <author><first>Gilles</first><last>Pouchoulin</last></author>
       <author><first>Danièle</first><last>Robert</last></author>
-      <author><first>Jacqueline</first><last>Vaissiere</last></author>
+      <author id="jacqueline-vaissiere"><first>Jacqueline</first><last>Vaissiere</last></author>
       <author><first>François</first><last>Viallet</last></author>
       <author><first>Coralie</first><last>Vincent</last></author>
       <title>The <fixed-case>D</fixed-case>es<fixed-case>P</fixed-case>ho-<fixed-case>AP</fixed-case>a<fixed-case>D</fixed-case>y Project: Developing an Acoustic-phonetic Characterization of Dysarthric Speech in <fixed-case>F</fixed-case>rench</title>
@@ -4121,7 +4121,7 @@
     </paper>
     <paper id="431">
       <author><first>Mithun</first><last>Balakrishna</last></author>
-      <author><first>Dan</first><last>Moldovan</last></author>
+      <author id="dan-moldovan"><first>Dan</first><last>Moldovan</last></author>
       <author><first>Marta</first><last>Tatu</last></author>
       <author><first>Marian</first><last>Olteanu</last></author>
       <title>Semi-Automatic Domain Ontology Creation from Text Resources</title>
@@ -4131,20 +4131,20 @@
     </paper>
     <paper id="432">
       <author><first>Maite</first><last>Melero</last></author>
-      <author><first>Gemma</first><last>Boleda</last></author>
+      <author id="gemma-boleda"><first>Gemma</first><last>Boleda</last></author>
       <author><first>Montse</first><last>Cuadros</last></author>
       <author><first>Cristina</first><last>España-Bonet</last></author>
-      <author><first>Lluís</first><last>Padró</last></author>
+      <author id="lluis-padro"><first>Lluís</first><last>Padró</last></author>
       <author><first>Martí</first><last>Quixal</last></author>
-      <author><first>Carlos</first><last>Rodríguez</last></author>
-      <author><first>Roser</first><last>Saurí</last></author>
+      <author id="carlos-rodriguez-penagos"><first>Carlos</first><last>Rodríguez</last></author>
+      <author id="roser-sauri"><first>Roser</first><last>Saurí</last></author>
       <title>Language Technology Challenges of a ‘Small’ Language (<fixed-case>C</fixed-case>atalan)</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/628_Paper.pdf</url>
       <abstract>In this paper, we present a brief snapshot of the state of affairs in computational processing of Catalan and the initiatives that are starting to take place in an effort to bring the field a step forward, by making a better and more efficient use of the already existing resources and tools, by bridging the gap between research and market, and by establishing periodical meeting points for the community. In particular, we present the results of the First Workshop on the Computational Processing of Catalan, which succeeded in putting together a fair representation of the research in the area, and received attention from both the industry and the administration. Aside from facilitating communication among researchers and between developers and users, the Workshop provided the organizers with valuable information about existing resources, tools, developers and providers. This information has allowed us to go a step further by setting up a harvesting procedure which will hopefully build the seed of a portal-catalogue-observatory of language resources and technologies in Catalan.</abstract>
       <bibkey>melero-etal-2010-language</bibkey>
     </paper>
     <paper id="433">
-      <author><first>John</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
       <author><first>Dag</first><last>Haug</last></author>
       <title>Porting an <fixed-case>A</fixed-case>ncient <fixed-case>G</fixed-case>reek and <fixed-case>L</fixed-case>atin Treebank</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/631_Paper.pdf</url>
@@ -4152,7 +4152,7 @@
       <bibkey>lee-haug-2010-porting</bibkey>
     </paper>
     <paper id="434">
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <title>Comparing Computational Models of Selectional Preferences - Second-order Co-Occurrence vs. Latent Semantic Clusters</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/632_Paper.pdf</url>
       <abstract>This paper presents a comparison of three computational approaches to selectional preferences: (i) an intuitive distributional approach that uses second-order co-occurrence of predicates and complement properties; (ii) an EM-based clustering approach that models the strengths of predicate--noun relationships by latent semantic clusters (Rooth et al., 1999); and (iii) an extension of the latent semantic clusters by incorporating the MDL principle into the EM training, thus explicitly modelling the predicate--noun selectional preferences by WordNet classes (Schulte im Walde et al., 2008). Concerning the distributional approach, we were interested not only in how well the model describes selectional preferences, but moreover which second-order properties are most salient. For example, a typical direct object of the verb 'drink' is usually fluid, might be hot or cold, can be bought, might be bottled, etc. The general question we ask is: what characterises the predicate's restrictions to the semantic realisation of its complements? Our second interest lies in the actual comparison of the models: How does a very simple distributional model compare to much more complex approaches, and which representation of selectional preferences is more appropriate, using (i) second-order properties, (ii) an implicit generalisation of nouns (by clusters), or (iii) an explicit generalisation of nouns by WordNet classes within clusters? We describe various experiments on German data and two evaluations, and demonstrate that the simple distributional model outperforms the more complex cluster-based models in most cases, but does itself not always beat the powerful frequency baseline.</abstract>
@@ -4160,20 +4160,20 @@
     </paper>
     <paper id="435">
       <author><first>Paul</first><last>McNamee</last></author>
-      <author><first>Hoa Trang</first><last>Dang</last></author>
+      <author id="hoa-trang-dang"><first>Hoa Trang</first><last>Dang</last></author>
       <author><first>Heather</first><last>Simpson</last></author>
       <author><first>Patrick</first><last>Schone</last></author>
-      <author><first>Stephanie M.</first><last>Strassel</last></author>
+      <author id="stephanie-strassel"><first>Stephanie M.</first><last>Strassel</last></author>
       <title>An Evaluation of Technologies for Knowledge Base Population</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/634_Paper.pdf</url>
       <abstract>Previous content extraction evaluations have neglected to address problems which complicate the incorporation of extracted information into an existing knowledge base. Previous question answering evaluations have likewise avoided tasks such as explicit disambiguation of target entities and handling a fixed set of questions about entities without previous determination of possible answers. In 2009 NIST conducted a Knowledge Base Population track at its Text Analysis Conference to unite the content extraction and question answering communities and jointly explore some of these issues. This exciting new evaluation attracted 13 teams from 6 countries that submitted results in two tasks, Entity Linking and Slot Filling. This paper explains the motivation and design of the tasks, describes the language resources that were developed for this evaluation, offers comparisons to previous community evaluations, and briefly summarizes the performance obtained by systems. We also identify relevant issues pertaining to target selection, challenging queries, and performance measures.</abstract>
       <bibkey>mcnamee-etal-2010-evaluation</bibkey>
     </paper>
     <paper id="436">
-      <author><first>Óscar</first><last>Ferrández</last></author>
-      <author><first>Michael</first><last>Ellsworth</last></author>
-      <author><first>Rafael</first><last>Muñoz</last></author>
-      <author><first>Collin F.</first><last>Baker</last></author>
+      <author id="oscar-ferrandez"><first>Óscar</first><last>Ferrández</last></author>
+      <author id="michael-ellsworth"><first>Michael</first><last>Ellsworth</last></author>
+      <author id="rafael-munoz"><first>Rafael</first><last>Muñoz</last></author>
+      <author id="collin-f-baker"><first>Collin F.</first><last>Baker</last></author>
       <title>Aligning <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et and <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et based on Semantic Neighborhoods</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/636_Paper.pdf</url>
       <abstract>This paper presents an algorithm for aligning FrameNet lexical units to WordNet synsets. Both, FrameNet and WordNet, are well-known as well as widely-used resources by the entire research community. They help systems in the comprehension of the semantics of texts, and therefore, finding strategies to link FrameNet and WordNet involves challenges related to a better understanding of the human language. Such deep analysis is exploited by researchers to improve the performance of their applications. The alignment is achieved by exploiting the particular characteristics of each lexical-semantic resource, with special emphasis on the explicit, formal semantic relations in each. Semantic neighborhoods are computed for each alignment of lemmas, and the algorithm calculates correlation scores by comparing such neighborhoods. The results suggest that the proposed algorithm is appropriate for aligning the FrameNet and WordNet hierarchies. Furthermore, the algorithm can aid research on increasing the coverage of FrameNet, building FrameNets in other languages, and creating a system for querying a joint FrameNet-WordNet hierarchy.</abstract>
@@ -4199,9 +4199,9 @@
       <bibkey>kemps-snijders-etal-2010-lat</bibkey>
     </paper>
     <paper id="439">
-      <author><first>Ondřej</first><last>Bojar</last></author>
-      <author><first>Adam</first><last>Liška</last></author>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
+      <author id="adam-liska"><first>Adam</first><last>Liška</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
       <title>Evaluating Utility of Data Sources in a Large Parallel <fixed-case>C</fixed-case>zech-<fixed-case>E</fixed-case>nglish Corpus <fixed-case>C</fixed-case>z<fixed-case>E</fixed-case>ng 0.9</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/642_Paper.pdf</url>
       <abstract>CzEng 0.9 is the third release of a large parallel corpus of Czech and English. For the current release, CzEng was extended by significant amount of texts from various types of sources, including parallel web pages, electronically available books and subtitles. This paper describes and evaluates filtering techniques employed in the process in order to avoid misaligned or otherwise damaged parallel sentences in the collection. We estimate the precision and recall of two sets of filters. The first set was used to process the data before their inclusion into CzEng. The filters from the second set were newly created to improve the filtering process for future releases of CzEng. Given the overall amount and variance of sources of the data, our experiments illustrate the utility of parallel data sources with respect to extractable parallel segments. As a similar behaviour can be expected for other language pairs, our results can be interpreted as guidelines indicating which sources should other researchers exploit first.</abstract>
@@ -4211,7 +4211,7 @@
       <author><first>Maria</first><last>Liakata</last></author>
       <author><first>Simone</first><last>Teufel</last></author>
       <author><first>Advaith</first><last>Siddharthan</last></author>
-      <author><first>Colin</first><last>Batchelor</last></author>
+      <author id="colin-batchelor"><first>Colin</first><last>Batchelor</last></author>
       <title>Corpora for the Conceptualisation and Zoning of Scientific Papers</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/644_Paper.pdf</url>
       <abstract>We present two complementary annotation schemes for sentence based annotation of full scientific papers, CoreSC and AZ-II, applied to primary research articles in chemistry. AZ-II is the extension of AZ for chemistry papers. AZ has been shown to have been reliably annotated by independent human coders and useful for various information access tasks. Like AZ, AZ-II follows the rhetorical structure of a scientific paper and the knowledge claims made by the authors. The CoreSC scheme takes a different view of scientific papers, treating them as the humanly readable representations of scientific investigations. It seeks to retrieve the structure of the investigation from the paper as generic high-level Core Scientific Concepts (CoreSC). CoreSCs have been annotated by 16 chemistry experts over a total of 265 full papers in physical chemistry and biochemistry. We describe the differences and similarities between the two schemes in detail and present the two corpora produced using each scheme. There are 36 shared papers in the corpora, which allows us to quantitatively compare aspects of the annotation schemes. We show the correlation between the two schemes, their strengths and weeknesses and discuss the benefits of combining a rhetorical based analysis of the papers with a content-based one.</abstract>
@@ -4229,7 +4229,7 @@
       <author><first>Yannick</first><last>Estève</last></author>
       <author><first>Thierry</first><last>Bazillon</last></author>
       <author><first>Jean-Yves</first><last>Antoine</last></author>
-      <author><first>Frédéric</first><last>Béchet</last></author>
+      <author id="frederic-bechet"><first>Frédéric</first><last>Béchet</last></author>
       <author><first>Jérôme</first><last>Farinas</last></author>
       <title>The <fixed-case>EPAC</fixed-case> Corpus: Manual and Automatic Annotations of Conversational Speech in <fixed-case>F</fixed-case>rench Broadcast News</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/650_Paper.pdf</url>
@@ -4257,7 +4257,7 @@
       <author><first>Piroska</first><last>Lendvai</last></author>
       <author><first>Thierry</first><last>Declerck</last></author>
       <author><first>Sándor</first><last>Darányi</last></author>
-      <author><first>Pablo</first><last>Gervás</last></author>
+      <author id="pablo-gervas"><first>Pablo</first><last>Gervás</last></author>
       <author><first>Raquel</first><last>Hervás</last></author>
       <author><first>Scott</first><last>Malec</last></author>
       <author><first>Federico</first><last>Peinado</last></author>
@@ -4268,7 +4268,7 @@
     </paper>
     <paper id="446">
       <author><first>Bora</first><last>Savas</last></author>
-      <author><first>Yoshihiko</first><last>Hayashi</last></author>
+      <author id="yoshihiko-hayashi"><first>Yoshihiko</first><last>Hayashi</last></author>
       <author><first>Monica</first><last>Monachini</last></author>
       <author><first>Claudia</first><last>Soria</last></author>
       <author><first>Nicoletta</first><last>Calzolari</last></author>
@@ -4278,7 +4278,7 @@
       <bibkey>savas-etal-2010-lmf</bibkey>
     </paper>
     <paper id="447">
-      <author><first>Jordi</first><last>Atserias</last></author>
+      <author id="jordi-atserias"><first>Jordi</first><last>Atserias</last></author>
       <author><first>Giuseppe</first><last>Attardi</last></author>
       <author><first>Maria</first><last>Simi</last></author>
       <author><first>Hugo</first><last>Zaragoza</last></author>
@@ -4297,7 +4297,7 @@
     </paper>
     <paper id="449">
       <author><first>Anton</first><last>Leuski</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <title><fixed-case>NPCE</fixed-case>ditor: A Tool for Building Question-Answering Characters</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/660_Paper.pdf</url>
       <abstract>NPCEditor is a system for building and deploying virtual characters capable of engaging a user in spoken dialog on a limited domain. The dialogue may take any form as long as the character responses can be specified a priori. For example, NPCEditor has been used for constructing question answering characters where a user asks questions and the character responds, but other scenarios are possible. At the core of the system is a state of the art statistical language classification technology for mapping from user's text input to system responses. NPCEditor combines the classifier with a database that stores the character information and relevant language data, a server that allows the character designer to deploy the completed characters, and a user-friendly editor that helps the designer to accomplish both character design and deployment tasks. In the paper we define the overall system architecture, describe individual NPCEditor components, and guide the reader through the steps of building a virtual character.</abstract>
@@ -4324,10 +4324,10 @@
       <author><first>Yi</first><last>Liu</last></author>
       <author><first>Pascale</first><last>Fung</last></author>
       <author><first>Yongsheng</first><last>Yang</last></author>
-      <author><first>Denise</first><last>DiPersio</last></author>
-      <author><first>Meghan</first><last>Glenn</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
-      <author><first>Christopher</first><last>Cieri</last></author>
+      <author id="denise-dipersio"><first>Denise</first><last>DiPersio</last></author>
+      <author id="meghan-glenn"><first>Meghan</first><last>Glenn</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
+      <author id="christopher-cieri"><first>Christopher</first><last>Cieri</last></author>
       <title>A Very Large Scale <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese Broadcast Corpus for <fixed-case>GALE</fixed-case> Project</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/664_Paper.pdf</url>
       <abstract>In this paper, we present the design, collection, transcription and analysis of a Mandarin Chinese Broadcast Collection of over 3000 hours. The data was collected by Hong Kong University of Science and Technology (HKUST) in China on a cable TV and satellite transmission platform established in support of the DARPA Global Autonomous Language Exploitation (GALE) program. The collection includes broadcast news (BN) and broadcast conversation (BC) including talk shows, roundtable discussions, call-in shows, editorials and other conversational programs that focus on news and current events. HKUST also collects detailed information about all recorded programs. A subset of BC and BN recordings are manually transcribed with standard Chinese characters in UTF-8 encoding, using specific mark-ups for a small set of spontaneous and conversational speech phenomena. The collection is among the largest and first of its kind for Mandarin Chinese Broadcast speech, providing abundant and diverse samples for Mandarin speech recognition and other application-dependent tasks, such as spontaneous speech processing and recognition, topic detection, information retrieval, and speaker recognition. HKUSTâs acoustic analysis of 500 hours of the speech and transcripts demonstrates the positive impact this data could have on system performance.</abstract>
@@ -4351,8 +4351,8 @@
     </paper>
     <paper id="455">
       <author><first>Kevin</first><last>Walker</last></author>
-      <author><first>Christopher</first><last>Caruso</last></author>
-      <author><first>Denise</first><last>DiPersio</last></author>
+      <author id="christopher-caruso"><first>Christopher</first><last>Caruso</last></author>
+      <author id="denise-dipersio"><first>Denise</first><last>DiPersio</last></author>
       <title>Large Scale Multilingual Broadcast Data Collection to Support Machine Translation and Distillation Technology Development</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/667_Paper.pdf</url>
       <abstract>The development of technologies to address machine translation and distillation of multilingual broadcast data depends heavily on the collection of large volumes of material from modern data providers. To address the needs of GALE researchers, the Linguistic Data Consortium (LDC) developed a system for collecting broadcast news and conversation from a variety of Arabic, Chinese and English broadcasters. The system is highly automated, easily extensible and robust and is capable of collecting, processing and evaluating hundreds of hours of content from several dozen sources per day. In addition to this extensive system, LDC manages three remote collection sites to maximize the variety of available broadcast data and has designed a portable broadcast collection platform to facilitate remote collection. This paper will present a detailed a description of the design and implementation of LDCs collection system, the technical challenges and solutions to large scale broadcast data collection efforts and an overview of the systems operation. This paper will also discuss the challenges of managing remote collections, in particular, the strategies used to normalize data formats, naming conventions and delivery methods to achieve optimal integration of remotely-collected data into LDCs collection database and downstream tasking workflow.</abstract>
@@ -4392,7 +4392,7 @@
       <author><first>Xuansong</first><last>Li</last></author>
       <author><first>Niyu</first><last>Ge</last></author>
       <author><first>Stephen</first><last>Grimes</last></author>
-      <author><first>Stephanie M.</first><last>Strassel</last></author>
+      <author id="stephanie-strassel"><first>Stephanie M.</first><last>Strassel</last></author>
       <author><first>Kazuaki</first><last>Maeda</last></author>
       <title>Enriching Word Alignment with Linguistic Tags</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/670_Paper.pdf</url>
@@ -4402,7 +4402,7 @@
     <paper id="459">
       <author><first>Kathleen</first><last>Eberhard</last></author>
       <author><first>Hannele</first><last>Nicholson</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <author><first>Susan</first><last>Gundersen</last></author>
       <author><first>Matthias</first><last>Scheutz</last></author>
       <title>The <fixed-case>I</fixed-case>ndiana “Cooperative Remote Search Task” (<fixed-case>CR</fixed-case>e<fixed-case>ST</fixed-case>) Corpus</title>
@@ -4423,7 +4423,7 @@
       <author><first>Bharat Ram</first><last>Ambati</last></author>
       <author><first>Mridul</first><last>Gupta</last></author>
       <author><first>Samar</first><last>Husain</last></author>
-      <author><first>Dipti Misra</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></author>
       <title>A High Recall Error Identification Tool for <fixed-case>H</fixed-case>indi Treebank Validation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/673_Paper.pdf</url>
       <abstract>This paper describes the development of a hybrid tool for a semi-automated process for validation of treebank annotation at various levels. The tool is developed for error detection at the part-of-speech, chunk and dependency levels of a Hindi treebank, currently under development. The tool aims to identify as many errors as possible at these levels to achieve consistency in the task of annotation. Consistency in treebank annotation is a must for making data as error-free as possible and for providing quality assurance. The tool is aimed at ensuring consistency and to make manual validation cost effective. We discuss a rule based and a hybrid approach (statistical methods combined with rule-based methods) by which a high-recall system can be developed and used to identify errors in the treebank. We report some results of using the tool on a sample of data extracted from the Hindi treebank. We also argue how the tool can prove useful in improving the annotation guidelines which would in turn, better the quality of annotation in subsequent iterations.</abstract>
@@ -4432,7 +4432,7 @@
     <paper id="462">
       <author><first>Susan</first><last>Robinson</last></author>
       <author><first>Antonio</first><last>Roque</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <title>Dialogues in Context: An Objective User-Oriented Evaluation Approach for Virtual Human Dialogue</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/674_Paper.pdf</url>
       <abstract>As conversational agents are now being developed to encounter more complex dialogue situations it is increasingly difficult to find satisfactory methods for evaluating these agents. Task-based measures are insufficient where there is no clearly defined task. While user-based evaluation methods may give a general sense of the quality of an agent's performance, they shed little light on the relative quality or success of specific features of dialogue that are necessary for system improvement. This paper examines current dialogue agent evaluation practices and motivates the need for a more detailed approach for defining and measuring the quality of dialogues between agent and user. We present a framework for evaluating the dialogue competence of artificial agents involved in complex and underspecified tasks when conversing with people. A multi-part coding scheme is proposed that provides a qualitative analysis of human utterances, and rates the appropriateness of the agent's responses to these utterances. The scheme is outlined, and then used to evaluate Staff Duty Officer Moleno, a virtual guide in Second Life.</abstract>
@@ -4444,7 +4444,7 @@
       <author><first>Kallirroi</first><last>Georgila</last></author>
       <author><first>Kenji</first><last>Sagae</last></author>
       <author><first>Ron</first><last>Artstein</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <title>Practical Evaluation of Speech Recognizers for Virtual Human Dialogue Systems</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/675_Paper.pdf</url>
       <abstract>We perform a large-scale evaluation of multiple off-the-shelf speech recognizers across diverse domains for virtual human dialogue systems. Our evaluation is aimed at speech recognition consumers and potential consumers with limited experience with readily available recognizers. We focus on practical factors to determine what levels of performance can be expected from different available recognizers in various projects featuring different types of conversational utterances. Our results show that there is no single recognizer that outperforms all other recognizers in all domains. The performance of each recognizer may vary significantly depending on the domain, the size and perplexity of the corpus, the out-of-vocabulary rate, and whether acoustic and language model adaptation has been used or not. We expect that our evaluation will prove useful to other speech recognition consumers, especially in the dialogue community, and will shed some light on the key problem in spoken dialogue systems of selecting the most suitable available speech recognition system for a particular application, and what impact training will have.</abstract>
@@ -4454,7 +4454,7 @@
       <author><first>Kiyonori</first><last>Ohtake</last></author>
       <author><first>Teruhisa</first><last>Misu</last></author>
       <author><first>Chiori</first><last>Hori</last></author>
-      <author><first>Hideki</first><last>Kashioka</last></author>
+      <author id="hideki-kashioka"><first>Hideki</first><last>Kashioka</last></author>
       <author><first>Satoshi</first><last>Nakamura</last></author>
       <title>Dialogue Acts Annotation for <fixed-case>NICT</fixed-case> <fixed-case>K</fixed-case>yoto Tour Dialogue Corpus to Construct Statistical Dialogue Systems</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/676_Paper.pdf</url>
@@ -4463,14 +4463,14 @@
     </paper>
     <paper id="465">
       <author><first>Bal Krishna</first><last>Bal</last></author>
-      <author><first>Patrick</first><last>Saint Dizier</last></author>
+      <author id="patrick-saint-dizier"><first>Patrick</first><last>Saint Dizier</last></author>
       <title>Towards Building Annotated Resources for Analyzing Opinions and Argumentation in News Editorials</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/677_Paper.pdf</url>
       <abstract>This paper describes an annotation scheme for argumentation in opinionated texts such as newspaper editorials, developed from a corpus of approximately 500 English texts from Nepali and international newspaper sources. We present the results of analysis and evaluation of the corpus annotation ― currently, the inter-annotator agreement kappa value being 0.80 which indicates substantial agreement between the annotators. We also discuss some of linguistic resources (key factors for distinguishing facts from opinions, opinion lexicon, intensifier lexicon, pre-modifier lexicon, modal verb lexicon, reporting verb lexicon, general opinion patterns from the corpus etc.) developed as a result of our corpus analysis, which can be used to identify an opinion or a controversial issue, arguments supporting an opinion, orientation of the supporting arguments and their strength (intrinsic, relative and in terms of persuasion). These resources form the backbone of our work especially for performing the opinion analysis in the lower levels, i.e., in the lexical and sentence levels. Finally, we shed light on the perspectives of the given work clearly outlining the challenges.</abstract>
       <bibkey>bal-saint-dizier-2010-towards</bibkey>
     </paper>
     <paper id="466">
-      <author><first>Iris</first><last>Eshkol</last></author>
+      <author id="iris-eshkol"><first>Iris</first><last>Eshkol</last></author>
       <author><first>Denis</first><last>Maurel</last></author>
       <author><first>Nathalie</first><last>Friburger</last></author>
       <title><fixed-case>E</fixed-case>slo: From Transcription to Speakers’ Personal Information Annotation</title>
@@ -4479,22 +4479,22 @@
       <bibkey>eshkol-etal-2010-eslo</bibkey>
     </paper>
     <paper id="467">
-      <author><first>Peter</first><last>Wittenburg</last></author>
-      <author><first>Nuria</first><last>Bel</last></author>
+      <author id="peter-wittenburg"><first>Peter</first><last>Wittenburg</last></author>
+      <author id="nuria-bel"><first>Nuria</first><last>Bel</last></author>
       <author><first>Lars</first><last>Borin</last></author>
       <author><first>Gerhard</first><last>Budin</last></author>
       <author><first>Nicoletta</first><last>Calzolari</last></author>
-      <author><first>Eva</first><last>Hajicova</last></author>
+      <author id="eva-hajicova"><first>Eva</first><last>Hajicova</last></author>
       <author><first>Kimmo</first><last>Koskenniemi</last></author>
       <author><first>Lothar</first><last>Lemnitzer</last></author>
       <author><first>Bente</first><last>Maegaard</last></author>
       <author><first>Maciej</first><last>Piasecki</last></author>
       <author><first>Jean-Marie</first><last>Pierrel</last></author>
       <author><first>Stelios</first><last>Piperidis</last></author>
-      <author><first>Inguna</first><last>Skadina</last></author>
-      <author><first>Dan</first><last>Tufis</last></author>
+      <author id="inguna-skadina"><first>Inguna</first><last>Skadina</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufis</last></author>
       <author><first>Remco</first><last>van Veenendaal</last></author>
-      <author><first>Tamas</first><last>Váradi</last></author>
+      <author id="tamas-varadi"><first>Tamas</first><last>Váradi</last></author>
       <author><first>Martin</first><last>Wynne</last></author>
       <title>Resource and Service Centres as the Backbone for a Sustainable Service Infrastructure</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/679_Paper.pdf</url>
@@ -4514,7 +4514,7 @@
       <author><first>Chitose</first><last>Sao</last></author>
       <author><first>Koji</first><last>Murakami</last></author>
       <author><first>Kentaro</first><last>Inui</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <title>Annotating Event Mentions in Text with Modality, Focus, and Source Information</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/682_Paper.pdf</url>
       <abstract>Many natural language processing tasks, including information extraction, question answering and recognizing textual entailment, require analysis of the polarity, focus of polarity, tense, aspect, mood and source of the event mentions in a text in addition to its predicate-argument structure analysis. We refer to modality, polarity and other associated information as extended modality. In this paper, we propose a new annotation scheme for representing the extended modality of event mentions in a sentence. Our extended modality consists of the following seven components: Source, Time, Conditional, Primary modality type, Actuality, Evaluation and Focus. We reviewed the literature about extended modality in Linguistics and Natural Language Processing (NLP) and defined appropriate labels of each component. In the proposed annotation scheme, information of extended modality of an event mention is summarized at the core predicate of the event mention for immediate use in NLP applications. We also report on the current progress of our manual annotation of a Japanese corpus of about 50,000 event mentions, showing a reasonably high ratio of inter-annotator agreement.</abstract>
@@ -4522,7 +4522,7 @@
     </paper>
     <paper id="470">
       <author><first>Sisay</first><last>Adugna</last></author>
-      <author><first>Andreas</first><last>Eisele</last></author>
+      <author id="andreas-eisele"><first>Andreas</first><last>Eisele</last></author>
       <title><fixed-case>E</fixed-case>nglish — <fixed-case>O</fixed-case>romo Machine Translation: An Experiment Using a Statistical Approach</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/683_Paper.pdf</url>
       <abstract>This paper deals with translation of English documents to Oromo using statistical methods. Whereas English is the lingua franca of online information, Oromo, despite its relative wide distribution within Ethiopia and neighbouring countries like Kenya and Somalia, is one of the most resource scarce languages. The paper has two main goals: one is to test how far we can go with the available limited parallel corpus for the English ― Oromo language pair and the applicability of existing Statistical Machine Translation (SMT) systems on this language pair. The second goal is to analyze the output of the system with the objective of identifying the challenges that need to be tackled. Since the language is resource scarce as mentioned above, we cannot get as many parallel documents as we want for the experiment. However, using a limited corpus of 20,000 bilingual sentences and 163,000 monolingual sentences, translation accuracy in terms of BLEU Score of 17.74% was achieved.</abstract>
@@ -4536,7 +4536,7 @@
       <bibkey>fujii-2010-modeling</bibkey>
     </paper>
     <paper id="472">
-      <author><first>Matthias</first><last>Hartung</last></author>
+      <author id="matthias-hartung"><first>Matthias</first><last>Hartung</last></author>
       <author><first>Anette</first><last>Frank</last></author>
       <title>A Semi-supervised Type-based Classification of Adjectives: Distinguishing Properties and Relations</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/685_Paper.pdf</url>
@@ -4544,7 +4544,7 @@
       <bibkey>hartung-frank-2010-semi</bibkey>
     </paper>
     <paper id="473">
-      <author><first>Andreas</first><last>Eisele</last></author>
+      <author id="andreas-eisele"><first>Andreas</first><last>Eisele</last></author>
       <author><first>Yu</first><last>Chen</last></author>
       <title><fixed-case>M</fixed-case>ulti<fixed-case>UN</fixed-case>: A Multilingual Corpus from United Nation Documents</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/686_Paper.pdf</url>
@@ -4553,21 +4553,21 @@
     </paper>
     <paper id="474">
       <author><first>Myriam</first><last>Rakho</last></author>
-      <author><first>Matthieu</first><last>Constant</last></author>
+      <author id="matthieu-constant"><first>Matthieu</first><last>Constant</last></author>
       <title>Evaluating the Impact of Some Linguistic Information on the Performances of a Similarity-based and Translation-oriented Word-Sense Disambiguation Method</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/687_Paper.pdf</url>
       <abstract>In this article, we present an experiment of linguistic parameter tuning in the representation of the semantic space of polysemous words. We evaluate quantitatively the influence of some basic linguistic knowledge (lemmas, multi-word expressions, grammatical tags and syntactic relations) on the performances of a similarity-based Word-Sense disambiguation method. The question we try to answer, by this experiment, is which kinds of linguistic knowledge are most useful for the semantic disambiguation of polysemous words, in a multilingual framework. The experiment is about 20 French polysemous words (16 nouns and 4 verbs) and we make use of the French-English part of the sentence-aligned EuroParl Corpus for training and testing. Our results show a strong correlation between the system accuracy and the degree of precision of the linguistic features used, particularly the syntactic dependency relations. Furthermore, the lemma-based approach absolutely outperforms the word form-based approach. The best accuracy achieved by our system amounts to 90%.</abstract>
       <bibkey>rakho-constant-2010-evaluating</bibkey>
     </paper>
     <paper id="475">
-      <author><first>Eckhard</first><last>Bick</last></author>
+      <author id="eckhard-bick"><first>Eckhard</first><last>Bick</last></author>
       <title><fixed-case>F</fixed-case>r<fixed-case>AG</fixed-case>, a Hybrid Constraint Grammar Parser for <fixed-case>F</fixed-case>rench</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/688_Paper.pdf</url>
       <abstract>This paper describes a hybrid system (FrAG) for tagging / parsing French text, and presents results from ongoing development work, corpus annotation and evaluation. The core of the system is a sentence scope Constraint Grammar (CG), with linguist-written rules. However, unlike traditional CG, the system uses hybrid techniques on both its morphological input side and its syntactic output side. Thus, FrAG draws on a pre-existing probabilistic Decision Tree Tagger (DTT) before and in parallel with its own lexical stage, and feeds its output into a Phrase Structure Grammar (PSG) that uses CG syntactic function tags rather than ordinary terminals in its rewriting rules. As an alternative architecture, dependency tree structures are also supported. In the newest version, dependencies are assigned within the CG-framework itself, and can interact with other rules. To provide semantic context, a semantic prototype ontology for nouns is used, covering a large part of the lexicon. In a recent test run on Parliamentary debate transcripts, FrAG achieved F-scores of 98.7 % for part of speech (PoS) and between 93.1 % and 96.2 % for syntactic function tags. Dependency links were correct in 95.9 %.</abstract>
       <bibkey>bick-2010-frag</bibkey>
     </paper>
     <paper id="476">
-      <author><first>Julia Maria</first><last>Schulz</last></author>
+      <author id="julia-maria-schulz"><first>Julia Maria</first><last>Schulz</last></author>
       <author><first>Christa</first><last>Womser-Hacker</last></author>
       <author><first>Thomas</first><last>Mandl</last></author>
       <title>Multilingual Corpus Development for Opinion Mining</title>
@@ -4585,9 +4585,9 @@
       <bibkey>broda-etal-2010-building</bibkey>
     </paper>
     <paper id="478">
-      <author><first>Cássia</first><last>Trojahn</last></author>
+      <author id="cassia-trojahn"><first>Cássia</first><last>Trojahn</last></author>
       <author><first>Paulo</first><last>Quaresma</last></author>
-      <author><first>Renata</first><last>Vieira</last></author>
+      <author id="renata-vieira"><first>Renata</first><last>Vieira</last></author>
       <title>An <fixed-case>API</fixed-case> for Multi-lingual Ontology Matching</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/691_Paper.pdf</url>
       <abstract>Ontology matching consists of generating a set of correspondences between the entities of two ontologies. This process is seen as a solution to data heterogeneity in ontology-based applications, enabling the interoperability between them. However, existing matching systems are designed by assuming that the entities of both source and target ontologies are written in the same languages ( English, for instance). Multi-lingual ontology matching is an open research issue. This paper describes an API for multi-lingual matching that implements two strategies, direct translation-based and indirect. The first strategy considers direct matching between two ontologies (i.e., without intermediary ontologies), with the help of external resources, i.e., translations. The indirect alignment strategy, proposed by (Jung et al., 2009), is based on composition of alignments. We evaluate these strategies using simple string similarity based matchers and three ontologies written in English, French, and Portuguese, an extension of the OAEI benchmark test 206.</abstract>
@@ -4604,15 +4604,15 @@
     </paper>
     <paper id="480">
       <author><first>Niraj</first><last>Aswani</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <title><fixed-case>E</fixed-case>nglish-<fixed-case>H</fixed-case>indi Transliteration using Multiple Similarity Metrics</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/694_Paper.pdf</url>
       <abstract>In this paper, we present an approach to measure the transliteration similarity of English-Hindi word pairs. Our approach has two components. First we propose a bi-directional mapping between one or more characters in the Devanagari script and one or more characters in the Roman script (pronounced as in English). This allows a given Hindi word written in Devanagari to be transliterated into the Roman script and vice-versa. Second, we present an algorithm for computing a similarity measure that is a variant of Dices coefficient measure and the LCSR measure and which also takes into account the constraints needed to match English-Hindi transliterated words. Finally, by evaluating various similarity metrics individually and together under a multiple measure agreement scenario, we show that it is possible to achieve a 0.92 f-measure in identifying English-Hindi word pairs that are transliterations. In order to assess the portability of our approach to other similar languages we adapt our system to the Gujarati language.</abstract>
       <bibkey>aswani-gaizauskas-2010-english</bibkey>
     </paper>
     <paper id="481">
-      <author><first>Rodrigo</first><last>Agerri</last></author>
-      <author><first>Ana</first><last>García-Serrano</last></author>
+      <author id="rodrigo-agerri"><first>Rodrigo</first><last>Agerri</last></author>
+      <author id="ana-garcia-serrano"><first>Ana</first><last>García-Serrano</last></author>
       <title><fixed-case>Q</fixed-case>-<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et: Extracting Polarity from <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Senses</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/695_Paper.pdf</url>
       <abstract>This paper presents Q-WordNet, a lexical resource consisting of WordNet senses automatically annotated by positive and negative polarity. Polarity classification amounts to decide whether a text (sense, sentence, etc.) may be associated to positive or negative connotations. Polarity classification is becoming important within the fields of Opinion Mining and Sentiment Analysis for determining opinions about commercial products, on companies reputation management, brand monitoring, or to track attitudes by mining online forums, blogs, etc. Inspired by work on classification of word senses by polarity (e.g., SentiWordNet), and taking WordNet as a starting point, we build Q-WordNet. Instead of applying external tools such as supervised classifiers to annotated WordNet synsets by polarity, we try to effectively maximize the linguistic information contained in WordNet, thereby taking advantage of the human effort put by lexicographers and annotators. The resulting resource is a subset of WordNet senses classified as positive or negative. In this approach, neutral polarity is seen as the absence of positive or negative polarity. The evaluation of Q-WordNet shows an improvement with respect to previous approaches. We believe that Q-WordNet can be used as a starting point for data-driven approaches in sentiment analysis.</abstract>
@@ -4657,7 +4657,7 @@
       <bibkey>nishikawa-etal-2010-context</bibkey>
     </paper>
     <paper id="486">
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <author><first>Géraldine</first><last>Walther</last></author>
       <title>A Morphological Lexicon for the <fixed-case>P</fixed-case>ersian Language</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/700_Paper.pdf</url>
@@ -4665,7 +4665,7 @@
       <bibkey>sagot-walther-2010-morphological</bibkey>
     </paper>
     <paper id="487">
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <title>The Lefff, a Freely Available and Large-coverage Morphological and Syntactic Lexicon for <fixed-case>F</fixed-case>rench</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/701_Paper.pdf</url>
       <abstract>In this paper, we introduce the Lefff, a freely available, accurate and large-coverage morphological and syntactic lexicon for French, used in many NLP tools such as large-coverage parsers. We first describe Alexina, the lexical framework in which the Lefff is developed as well as the linguistic notions and formalisms it is based on. Next, we describe the various sources of lexical data we used for building the Lefff, in particular semi-automatic lexical development techniques and conversion and merging of existing resources. Finally, we illustrate the coverage and precision of the resource by comparing it with other resources and by assessing its impact in various NLP tools.</abstract>
@@ -4674,7 +4674,7 @@
     <paper id="488">
       <author><first>Montse</first><last>Cuadros</last></author>
       <author><first>Egoitz</first><last>Laparra</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <author><first>Piek</first><last>Vossen</last></author>
       <author><first>Wauter</first><last>Bosma</last></author>
       <title>Integrating a Large Domain Ontology of Species into <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et</title>
@@ -4685,7 +4685,7 @@
     <paper id="489">
       <author><first>Jean-Luc</first><last>Rouas</last></author>
       <author><first>Mayumi</first><last>Beppu</last></author>
-      <author><first>Martine</first><last>Adda-Decker</last></author>
+      <author id="martine-adda-decker"><first>Martine</first><last>Adda-Decker</last></author>
       <title>Comparison of Spectral Properties of Read, Prepared and Casual Speech in <fixed-case>F</fixed-case>rench</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/704_Paper.pdf</url>
       <abstract>In this paper, we investigate the acoustic properties of phonemes in three speaking styles: read speech, prepared speech and spontaneous speech. Our aim is to better understand why speech recognition systems still fails to achieve good performances on spontaneous speech. This work follows the work of Nakamura et al. on Japanese speaking styles, with the difference that we here focus on French. Using Nakamura's method, we use classical speech recognition features, MFCC, and try to represent the effects of the speaking styles on the spectral space. Two measurements are defined in order to represent the spectral space reduction and the spectral variance extension. Experiments are then carried on to investigate if indeed we find some differences between the three speaking styles using these measurements. We finally compare our results to those obtained by Nakamura on Japanese to see if the same phenomenon appears. We happen to find some cues, and it also seems that phone duration also plays an important role regarding spectral reduction, especially for spontaneous speech.</abstract>
@@ -4699,8 +4699,8 @@
       <bibkey>koeva-2010-lexicon</bibkey>
     </paper>
     <paper id="491">
-      <author><first>Peter</first><last>Spyns</last></author>
-      <author><first>Elisabeth</first><last>D’Halleweyn</last></author>
+      <author id="peter-spyns"><first>Peter</first><last>Spyns</last></author>
+      <author id="elisabeth-dhalleweyn"><first>Elisabeth</first><last>D’Halleweyn</last></author>
       <title><fixed-case>F</fixed-case>lemish-<fixed-case>D</fixed-case>utch <fixed-case>HLT</fixed-case> Policy: Evolving to New Forms of Collaboration</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/707_Paper.pdf</url>
       <abstract>In the last decade, the Dutch Language Union has taken a serious interest in digital language resources and human language technologies (HLT), because they are crucial for a language to be able to survive in the information society. In this paper we report on the current state of the joint Flemish-Dutch efforts in the field of HLT for Dutch (HLTD) and how follow-up activities are being prepared. We explain the overall mechanism of evaluating an R&amp;D programme and the role of evaluation in the policy cycle to establish new R&amp;D funding activities. This is applied to the joint Flemish-Dutch STEVIN programme. Outcomes of the STEVIN scientific midterm review are shortly discussed as the overall final evaluation is currently still on-going. As part of preparing for future policy plans, an HLTD forecast is presented. Also new opportunities are outlined, in particular in the context of the European CLARIN infrastructure project that can lead to new avenues for joint Flemish-Dutch cooperation on HLTD.</abstract>
@@ -4715,7 +4715,7 @@
       <bibkey>jezek-quochi-2010-capturing</bibkey>
     </paper>
     <paper id="493">
-      <author><first>Brigitte</first><last>Jörg</last></author>
+      <author id="brigitte-jorg"><first>Brigitte</first><last>Jörg</last></author>
       <author><first>Hans</first><last>Uszkoreit</last></author>
       <author><first>Alastair</first><last>Burt</last></author>
       <title><fixed-case>LT</fixed-case> World: Ontology and Reference Information Portal</title>
@@ -4724,9 +4724,9 @@
       <bibkey>jorg-etal-2010-lt</bibkey>
     </paper>
     <paper id="494">
-      <author><first>Thiago D.</first><last>Tadeu</last></author>
+      <author id="thiago-d-tadeu"><first>Thiago D.</first><last>Tadeu</last></author>
       <author><first>Eder M.</first><last>de Novais</last></author>
-      <author><first>Ivandré</first><last>Paraboni</last></author>
+      <author id="ivandre-paraboni"><first>Ivandré</first><last>Paraboni</last></author>
       <title>Extracting Surface Realisation Templates from Corpora</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/715_Paper.pdf</url>
       <abstract>In Natural Language Generation (NLG), template-based surface realisation is an effective solution to the problem of producing surface strings from a given semantic representation, but many applications may not be able to provide the input knowledge in the required level of detail, which in turn may limit the use of the available NLG resources. However, if we know in advance what the most likely output sentences are (e.g., because a corpus on the relevant application domain happens to be available), then corpus knowledge may be used to quickly deploy a surface realisation engine for small-scale applications, for which it may be sufficient to select a sentence (in natural language) that resembles the desired output, and then modify some or all of its constituents accordingly. In other words, the application may simply 'point to' an existing sentence in the corpus and specify only the changes that need to take place to obtain the desired surface string. In this paper we describe one such approach to surface realisation, in which we extract syntactically-structured templates from a target corpus, and use these templates to produce existing and modified versions of the target sentences by a combination of canned text and basic dependency-tree operations.</abstract>
@@ -4734,7 +4734,7 @@
     </paper>
     <paper id="495">
       <author><first>Arif</first><last>Bramantoro</last></author>
-      <author><first>Ulrich</first><last>Schäfer</last></author>
+      <author id="ulrich-schafer"><first>Ulrich</first><last>Schäfer</last></author>
       <author><first>Toru</first><last>Ishida</last></author>
       <title>Towards an Integrated Architecture for Composite Language Services and Multiple Linguistic Processing Components</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/717_Paper.pdf</url>
@@ -4751,8 +4751,8 @@
     </paper>
     <paper id="497">
       <author><first>Mohamed</first><last>Belgacem</last></author>
-      <author><first>Georges</first><last>Antoniadis</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="georges-antoniadis"><first>Georges</first><last>Antoniadis</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <title>Automatic Identification of <fixed-case>A</fixed-case>rabic Dialects</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/719_Paper.pdf</url>
       <abstract>In this work, automatic recognition of Arabic dialects is proposed. An acoustic survey of the proportion of vocalic intervals and the standard deviation of consonantal intervals in nine dialects (Tunisia, Morocco, Algeria, Egypt, Syria, Lebanon, Yemen, Golfs Countries and Iraq) is performed using the platform Alize and Gaussian Mixture Models (GMM). The results show the complexity of the automatic identification of Arabic dialects since. No clear border can be found between the dialects, but a gradual transition between them. They can even vary slightly from one city to another. The existence of this gradual change is easy to understand: it corresponds to a human and social reality, to the contact, friendships forged and affinity in the environment more or less immediate of the individual. This document also raises questions about the classes or macro classes of Arabic dialects noticed from the confusion matrix and the design of the hierarchical tree obtained.</abstract>
@@ -4760,7 +4760,7 @@
     </paper>
     <paper id="498">
       <author><first>Sathish</first><last>Pammi</last></author>
-      <author><first>Marcela</first><last>Charfuelan</last></author>
+      <author id="marcela-charfuelan"><first>Marcela</first><last>Charfuelan</last></author>
       <author><first>Marc</first><last>Schröder</last></author>
       <title>Multilingual Voice Creation Toolkit for the <fixed-case>MARY</fixed-case> <fixed-case>TTS</fixed-case> Platform</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/720_Paper.pdf</url>
@@ -4770,7 +4770,7 @@
     <paper id="499">
       <author><first>Petya</first><last>Osenova</last></author>
       <author><first>Laska</first><last>Laskova</last></author>
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <title>Exploring Co-Reference Chains for Concept Annotation of Domain Texts</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/721_Paper.pdf</url>
       <abstract>The paper explores the co-reference chains as a way for improving the density of concept annotation over domain texts. The idea extends authors previous work on relating the ontology to the text terms in two domains ― IT and textile. Here IT domain is used. The challenge is to enhance relations among concepts instead of text entities, the latter pursued in most works. Our ultimate goal is to exploit these additional chains for concept disambiguation as well as sparseness resolution at concept level. First, a gold standard was prepared with manually connected links among concepts, anaphoric pronouns and contextual equivalents. This step was necessary not only for test purposes, but also for better orientation in the co-referent types and distribution. Then, two automatic systems were tested on the gold standard. Note that these systems were not designed specially for concept chaining. The conclusion is that the state-of-the-art co-reference resolution systems might address the concept sparseness problem, but not so much the concept disambiguation task. For the latter, word-sense disambiguation systems have to be integrated.</abstract>
@@ -4778,7 +4778,7 @@
     </paper>
     <paper id="500">
       <author><first>Kathrin</first><last>Spreyer</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <author><first>Jonas</first><last>Kuhn</last></author>
       <title>Training Parsers on Partial Trees: A Cross-language Comparison</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/722_Paper.pdf</url>
@@ -4823,9 +4823,9 @@
     </paper>
     <paper id="505">
       <author><first>Daniel</first><last>Cer</last></author>
-      <author><first>Marie-Catherine</first><last>de Marneffe</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
-      <author><first>Chris</first><last>Manning</last></author>
+      <author id="marie-catherine-de-marneffe"><first>Marie-Catherine</first><last>de Marneffe</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
+      <author id="christopher-d-manning"><first>Chris</first><last>Manning</last></author>
       <title>Parsing to <fixed-case>S</fixed-case>tanford Dependencies: Trade-offs between Speed and Accuracy</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/730_Paper.pdf</url>
       <abstract>We investigate a number of approaches to generating Stanford Dependencies, a widely used semantically-oriented dependency representation. We examine algorithms specifically designed for dependency parsing (Nivre, Nivre Eager, Covington, Eisner, and RelEx) as well as dependencies extracted from constituent parse trees created by phrase structure parsers (Charniak, Charniak-Johnson, Bikel, Berkeley and Stanford). We found that constituent parsers systematically outperform algorithms designed specifically for dependency parsing. The most accurate method for generating dependencies is the Charniak-Johnson reranking parser, with 89% (labeled) attachment F1 score. The fastest methods are Nivre, Nivre Eager, and Covington, used with a linear classifier to make local parsing decisions, which can parse the entire Penn Treebank development set (section 22) in less than 10 seconds on an Intel Xeon E5520. However, this speed comes with a substantial drop in F1 score (about 76% for labeled attachment) compared to competing methods. By tuning how much of the search space is explored by the Charniak-Johnson parser, we are able to arrive at a balanced configuration that is both fast and nearly as good as the most accurate approaches.</abstract>
@@ -4850,14 +4850,14 @@
       <bibkey>kawahara-kurohashi-2010-acquiring</bibkey>
     </paper>
     <paper id="508">
-      <author><first>Emiliano</first><last>Giovannetti</last></author>
+      <author id="emiliano-giovannetti"><first>Emiliano</first><last>Giovannetti</last></author>
       <title>An Unsupervised Approach for Semantic Relation Interpretation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/734_Paper.pdf</url>
       <abstract>In this work we propose a hybrid unsupervised approach for semantic relation extraction from Italian and English texts. The system takes as input pairs of ""distributionally similar"" terms, possibly involved in a semantic relation. To validate and label the anonymous relations holding between the terms in input, the candidate pairs of terms are looked for on the Web in the context of reliable lexico-syntactic patterns. This paper focuses on the definition of the patterns, on the measures used to assess the reliability of the suggested specific semantic relation and on the evaluation of the implemented system. So far, the system is able to extract the following types of semantic relations: hyponymy, meronymy, and co-hyponymy. The approach can however be easily extended to manage other relations by defining the appropriate battery of reliable lexico-syntactic patterns. Accuracy of the system was measured with scores of 83.3% for hyponymy, 75% for meronymy and 72.2% for co-hyponymy extraction.</abstract>
       <bibkey>giovannetti-2010-unsupervised</bibkey>
     </paper>
     <paper id="509">
-      <author><first>Oi Yee</first><last>Kwong</last></author>
+      <author id="olivia-o-y-kwong"><first>Oi Yee</first><last>Kwong</last></author>
       <title>Constructing an Annotated Story Corpus: Some Observations and Issues</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/736_Paper.pdf</url>
       <abstract>This paper discusses our ongoing work on constructing an annotated corpus of childrens stories for further studies on the linguistic, computational, and cognitive aspects of story structure and understanding. Given its semantic nature and the need for extensive common sense and world knowledge, story understanding has been a notoriously difficult topic in natural language processing. In particular, the notion of story structure for maintaining coherence has received much attention, while its strong version in the form of story grammar has triggered much debate. The relation between discourse coherence and the interestingness, or the point, of a story has not been satisfactorily settled. Introspective analysis on story comprehension has led to some important observations, based on which we propose a preliminary annotation scheme covering the structural, functional, and emotional aspects connecting discourse segments in stories. The annotation process will shed light on how story structure interacts with story point via various linguistic devices, and the annotated corpus is expected to be a useful resource for computational discourse processing, especially for studying various issues regarding the interface between coherence and interestingness of stories.</abstract>
@@ -4866,7 +4866,7 @@
     <paper id="510">
       <author><first>Klaar</first><last>Vanopstal</last></author>
       <author><first>Bart</first><last>Desmet</last></author>
-      <author><first>Véronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Véronique</first><last>Hoste</last></author>
       <title>Towards a Learning Approach for Abbreviation Detection and Resolution.</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/737_Paper.pdf</url>
       <abstract>The explosion of biomedical literature and with it the -uncontrolled- creation of abbreviations presents some special challenges for both human readers and computer applications. We developed an annotated corpus of Dutch medical text, and experimented with two approaches to abbreviation detection and resolution. Our corpus is composed of abstracts from two medical journals from the Low Countries in which approximately 65 percent (NTvG) and 48 percent (TvG) of the abbreviations have a corresponding full form in the abstract. Our first approach, a pattern-based system, consists of two steps: abbreviation detection and definition matching. This system has an average F-score of 0.82 for the detection of both defined and undefined abbreviations and an average F-score of 0.77 was obtained for the definitions. For our second approach, an SVM-based classifier was used on the preprocessed data sets, leading to an average F-score of 0.93 for the abbreviations; for the definitions an average F-score of 0.82 was obtained.</abstract>
@@ -4883,7 +4883,7 @@
       <author><first>Mridul</first><last>Gupta</last></author>
       <author><first>Vineet</first><last>Yadav</last></author>
       <author><first>Samar</first><last>Husain</last></author>
-      <author><first>Dipti Misra</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></author>
       <title>Partial Parsing as a Method to Expedite Dependency Annotation of a <fixed-case>H</fixed-case>indi Treebank</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/739_Paper.pdf</url>
       <abstract>The paper describes an approach to expedite the process of manual annotation of a Hindi dependency treebank which is currently under development. We propose a way by which consistency among a set of manual annotators could be improved. Furthermore, we show that our setup can also prove useful for evaluating when an inexperienced annotator is ready to start participating in the production of the treebank. We test our approach on sample sets of data obtained from an ongoing work on creation of this treebank. The results asserting our proposal are reported in this paper. We report results from a semi-automated approach of dependency annotation experiment. We find out the rate of agreement between annotators using Cohens Kappa. We also compare results with respect to the total time taken to annotate sample data-sets using a completely manual approach as opposed to a semi-automated approach. It is observed from the results that this semi-automated approach when carried out with experienced and trained human annotators improves the overall quality of treebank annotation and also speeds up the process.</abstract>
@@ -4898,10 +4898,10 @@
     </paper>
     <paper id="514">
       <author><first>Iker</first><last>Luengo</last></author>
-      <author><first>Eva</first><last>Navas</last></author>
+      <author id="eva-navas"><first>Eva</first><last>Navas</last></author>
       <author><first>Igor</first><last>Odriozola</last></author>
       <author><first>Ibon</first><last>Saratxaga</last></author>
-      <author><first>Inmaculada</first><last>Hernaez</last></author>
+      <author id="inmaculada-hernaez"><first>Inmaculada</first><last>Hernaez</last></author>
       <author><first>Iñaki</first><last>Sainz</last></author>
       <author><first>Daniel</first><last>Erro</last></author>
       <title>Modified <fixed-case>LTSE</fixed-case>-<fixed-case>VAD</fixed-case> Algorithm for Applications Requiring Reduced Silence Frame Misclassification</title>
@@ -4910,7 +4910,7 @@
       <bibkey>luengo-etal-2010-modified</bibkey>
     </paper>
     <paper id="515">
-      <author><first>Nancy</first><last>Ide</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
       <author><first>Keith</first><last>Suderman</last></author>
       <author><first>Brian</first><last>Simms</last></author>
       <title><fixed-case>ANC</fixed-case>2<fixed-case>G</fixed-case>o: A Web Application for Customized Corpus Creation</title>
@@ -4929,7 +4929,7 @@
       <bibkey>kozawa-etal-2010-collection</bibkey>
     </paper>
     <paper id="517">
-      <author><first>Nick</first><last>Rizzolo</last></author>
+      <author id="nick-rizzolo"><first>Nick</first><last>Rizzolo</last></author>
       <author><first>Dan</first><last>Roth</last></author>
       <title>Learning Based <fixed-case>J</fixed-case>ava for Rapid Development of <fixed-case>NLP</fixed-case> Systems</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/747_Paper.pdf</url>
@@ -4938,7 +4938,7 @@
     </paper>
     <paper id="518">
       <author><first>Jorge</first><last>Vivaldi</last></author>
-      <author><first>Horacio</first><last>Rodríguez</last></author>
+      <author id="horacio-rodriguez"><first>Horacio</first><last>Rodríguez</last></author>
       <title>Finding Domain Terms using <fixed-case>W</fixed-case>ikipedia</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/748_Paper.pdf</url>
       <abstract>In this paper we present a new approach for obtaining the terminology of a given domain using the category and page structures of the Wikipedia in a language independent way. Our approach consists basically, for each domain, on navigating the Category graph of the Wikipedia starting from the root nodes associated to the domain. A heavy filtering mechanism is carried out for preventing as much as possible the inclusion of spurious categories. For each selected category all the pages belonging to it are then recovered and filtered. This procedure is iterate several times until achieving convergence. Both category names and page names are considered candidates to belong to the terminology of the domain. This approach has been applied to three broad coverage domains: astronomy, chemistry and medicine, and two languages, English and Spanish, showing a promising performance.</abstract>
@@ -4946,14 +4946,14 @@
     </paper>
     <paper id="519">
       <author><first>Claire</first><last>Brierley</last></author>
-      <author><first>Eric</first><last>Atwell</last></author>
+      <author id="eric-atwell"><first>Eric</first><last>Atwell</last></author>
       <title><fixed-case>P</fixed-case>ro<fixed-case>POSEC</fixed-case>: A Prosody and <fixed-case>P</fixed-case>o<fixed-case>S</fixed-case> Annotated Spoken <fixed-case>E</fixed-case>nglish Corpus</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/749_Paper.pdf</url>
       <abstract>We have previously reported on ProPOSEL, a purpose-built Prosody and PoS English Lexicon compatible with the Python Natural Language ToolKit. ProPOSEC is a new corpus research resource built using this lexicon, intended for distribution with the Aix-MARSEC dataset. ProPOSEC comprises multi-level parallel annotations, juxtaposing prosodic and syntactic information from different versions of the Spoken English Corpus, with canonical dictionary forms, in a query format optimized for Perl, Python, and text processing programs. The order and content of fields in the text file is as follows: (1) Aix-MARSEC file number; (2) word; (3) LOB PoS-tag; (4) C5 PoS-tag; (5) Aix SAM-PA phonetic transcription; (6) SAM-PA phonetic transcription from ProPOSEL; (7) syllable count; (8) lexical stress pattern; (9) default content or function word tag; (10) DISC stressed and syllabified phonetic transcription; (11) alternative DISC representation, incorporating lexical stress pattern; (12) nested arrays of phonemes and tonic stress marks from Aix. As an experimental dataset, ProPOSEC can be used to study correlations between these annotation tiers, where significant findings are then expressed as additional features for phrasing models integral to Text-to-Speech and Speech Recognition. As a training set, ProPOSEC can be used for machine learning tasks in Information Retrieval and Speech Understanding systems.</abstract>
       <bibkey>brierley-atwell-2010-proposec</bibkey>
     </paper>
     <paper id="520">
-      <author><first>Margarita Alonso</first><last>Ramos</last></author>
+      <author id="margarita-alonso-ramos"><first>Margarita Alonso</first><last>Ramos</last></author>
       <author><first>Leo</first><last>Wanner</last></author>
       <author><first>Orsolya</first><last>Vincze</last></author>
       <author><first>Gerard Casamayor</first><last>del Bosque</last></author>
@@ -4975,14 +4975,14 @@
     </paper>
     <paper id="522">
       <author><first>Yu</first><last>Chen</last></author>
-      <author><first>Andreas</first><last>Eisele</last></author>
+      <author id="andreas-eisele"><first>Andreas</first><last>Eisele</last></author>
       <title>Integrating a Rule-based with a Hierarchical Translation System</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/754_Paper.pdf</url>
       <abstract>Recent developments on hybrid systems that combine rule-based machine translation (RBMT) systems with statistical machine translation (SMT) generally neglect the fact that RBMT systems tend to produce more syntactically well-formed translations than data-driven systems. This paper proposes a method that alleviates this issue by preserving more useful structures produced by RBMT systems and utilizing them in a SMT system that operates on hierarchical structures instead of flat phrases alone. For our experiments, we use Joshua as the decoder. It is the first attempt towards a tighter integration of MT systems from different paradigms that both support hierarchical analysis. Preliminary results show consistent improvements over the previous approach.</abstract>
       <bibkey>chen-eisele-2010-integrating</bibkey>
     </paper>
     <paper id="523">
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <author><first>Olga</first><last>Uryupina</last></author>
       <author><first>Yannick</first><last>Versley</last></author>
       <title>Creating a Coreference Resolution System for <fixed-case>I</fixed-case>talian</title>
@@ -4991,9 +4991,9 @@
       <bibkey>poesio-etal-2010-creating</bibkey>
     </paper>
     <paper id="524">
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <author><first>Pavel</first><last>Straňák</last></author>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <title>Data Issues in <fixed-case>E</fixed-case>nglish-to-<fixed-case>H</fixed-case>indi Machine Translation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/756_Paper.pdf</url>
       <abstract>Statistical machine translation to morphologically richer languages is a challenging task and more so if the source and target languages differ in word order. Current state-of-the-art MT systems thus deliver mediocre results. Adding more parallel data often helps improve the results; if it doesn't, it may be caused by various problems such as different domains, bad alignment or noise in the new data. In this paper we evaluate the English-to-Hindi MT task from this data perspective. We discuss several available parallel data sources and provide cross-evaluation results on their combinations using two freely available statistical MT systems. We demonstrate various problems encountered in the data and describe automatic methods of data cleaning and normalization. We also show that the contents of two independently distributed data sets can unexpectedly overlap, which negatively affects translation quality. Together with the error analysis, we also present a new tool for viewing aligned corpora, which makes it easier to detect difficult parts in the data even for a developer not speaking the target language.</abstract>
@@ -5074,14 +5074,14 @@
     <paper id="533">
       <author><first>Diego</first><last>De Cao</last></author>
       <author><first>Danilo</first><last>Croce</last></author>
-      <author><first>Roberto</first><last>Basili</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
       <title>Extensive Evaluation of a <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et-<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et mapping resource</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/773_Paper.pdf</url>
       <abstract>Lexical resources are basic components of many text processing system devoted to information extraction, question answering or dialogue. In paste years many resources have been developed such as FrameNet and WordNet. FrameNet describes prototypical situations (i.e. Frames) while WordNet defines lexical meaning (senses) for the majority of English nouns, verbs, adjectives and adverbs. A major difference between FrameNet and WordNet refers to their coverage. Due of this lack of coverage, in recent years some approaches have been studied to make a bridge between this two resources, so a resource is used to extend the coverage of the other one. The nature of these approaches leave from supervised to supervised methods. The major problem is that there is not a standard in evaluation of the mapping. Each different work have tested own approach with a custom gold standard. This work give an extensive evaluation of the model proposed in (De Cao et al., 2008) using gold standard proposed in other works. Moreover this work give an empirical comparison between other available resources. As outcome of this work we also release the full mapping resource made according to the model proposed in (De Cao et al., 2008).</abstract>
       <bibkey>de-cao-etal-2010-extensive</bibkey>
     </paper>
     <paper id="534">
-      <author><first>Djamé</first><last>Seddah</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
       <title>Exploring the Spinal-<fixed-case>STIG</fixed-case> Model for Parsing <fixed-case>F</fixed-case>rench</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/775_Paper.pdf</url>
       <abstract>We evaluate statistical parsing of French using two probabilistic models derived from the Tree Adjoining Grammar framework: a Stochastic Tree Insertion Grammars model (STIG) and a specific instance of this formalism, called Spinal Tree Insertion Grammar model which exhibits interesting properties with regard to data sparseness issues common to small treebanks such as the Paris 7 French Treebank. Using David Chiangs STIG parser (Chiang, 2003), we present results of various experiments we conducted to explore those models for French parsing. The grammar induction makes use of a head percolation table tailored for the French Treebank and which is provided in this paper. Using two evaluation metrics, we found that the parsing performance of a STIG model is tied to the size of the underlying Tree Insertion Grammar, with a more compact grammar, a spinal STIG, outperforming a genuine STIG. We finally note that a ""spinal"" framework seems to emerge in the literature. Indeed, the use of vertical grammars such as Spinal STIG instead of horizontal grammars such as PCFGs, afflicted with well known data sparseness issues, seems to be a promising path toward better parsing performance.</abstract>
@@ -5089,7 +5089,7 @@
     </paper>
     <paper id="535">
       <author><first>Tommaso</first><last>Caselli</last></author>
-      <author><first>Irina</first><last>Prodanof</last></author>
+      <author id="irina-prodanof"><first>Irina</first><last>Prodanof</last></author>
       <title>Annotating Event Anaphora: A Case Study</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/776_Paper.pdf</url>
       <abstract>In recent years we have resgitered a renewed interest in event detection and temporal processing of text/discourse. TimeML (Pustejovsky et al., 2003a) has shed new lights on the notion of event and developed a new methodology for its annotation. On a parallel, works on anaphora resolution have developed a reliable methodology for the annotation and pointed out the core role of this phenomenon for the improvement of NLP systems. This paper tries to put together these two lines of research by describing a case study for the creation of an annotation scheme on event anaphora. We claim that this work could have consequences for the annotation of eventualities as proposed in TimeML and on the use of the tag and on the study of anaphora and its annotation. The annotation scheme and its guidelines have been developed on the basis of a coarse grained bottom up approach. In order to do this, we have performed a small sampling annotation which has highlighted shortcomings and open issues which need to be resolved.</abstract>
@@ -5097,11 +5097,11 @@
     </paper>
     <paper id="536">
       <author><first>Bente</first><last>Maegaard</last></author>
-      <author><first>Mohamed</first><last>Attia</last></author>
+      <author id="mohamed-attia"><first>Mohamed</first><last>Attia</last></author>
       <author><first>Khalid</first><last>Choukri</last></author>
-      <author><first>Olivier</first><last>Hamon</last></author>
-      <author><first>Steven</first><last>Krauwer</last></author>
-      <author><first>Mustafa</first><last>Yaseen</last></author>
+      <author id="olivier-hamon"><first>Olivier</first><last>Hamon</last></author>
+      <author id="steven-krauwer"><first>Steven</first><last>Krauwer</last></author>
+      <author id="mustafa-yaseen"><first>Mustafa</first><last>Yaseen</last></author>
       <title>Cooperation for <fixed-case>A</fixed-case>rabic Language Resources and Tools — The <fixed-case>MEDAR</fixed-case> Project</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/777_Paper.pdf</url>
       <abstract>The paper describes some of the work carried out within the European funded project MEDAR. The project has three streams of activity: the technical stream, the cooperation stream and the dissemination stream. MEDAR has first updated the existing surveys and BLARK for Arabic, and then the technical stream focused on machine translation. The consortium identified a number of freely available MT systems and then customized two versions of the famous MOSES package. The Consortium addressed the needs to package MOSES for English to Arabic (while the main MT stream is on Arabic to English). For performance assessment purposes, the partners produced test data that allowed carrying out an evaluation campaign with 5 different systems (including from outside the consortium) and two online ones. Both the MT baselines and the collected data will be made available via ELRA catalogue. The cooperation stream focuses mostly on the cooperation roadmap for Human Language Technologies for Arabic. Cooperation Roadmap for the region directed towards the Arabic HLT in general. It is the purpose of the roadmap to outline areas and priorities for collaboration, in terms of collaboration between EU countries and Arabic speaking countries, as well as cooperation in general: between countries, between universities, and last but not least between universities and industry.</abstract>
@@ -5126,7 +5126,7 @@
       <author><first>Christine</first><last>Meunier</last></author>
       <author><first>Irina</first><last>Nesterenko</last></author>
       <author><first>Berthille</first><last>Pallaud</last></author>
-      <author><first>Laurent</first><last>Prévot</last></author>
+      <author id="laurent-prevot"><first>Laurent</first><last>Prévot</last></author>
       <author><first>Béatrice</first><last>Priego-Valverde</last></author>
       <author><first>Stéphane</first><last>Rauzy</last></author>
       <title>The <fixed-case>OTIM</fixed-case> Formal Annotation Model: A Preliminary Step before Annotation Scheme</title>
@@ -5137,7 +5137,7 @@
     <paper id="539">
       <author><first>Sanaz</first><last>Jabbari</last></author>
       <author><first>Mark</first><last>Hepple</last></author>
-      <author><first>Louise</first><last>Guthrie</last></author>
+      <author id="louise-guthrie"><first>Louise</first><last>Guthrie</last></author>
       <title>Evaluating Lexical Substitution: Analysis and New Measures</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/782_Paper.pdf</url>
       <abstract>Lexical substitution is the task of finding a replacement for a target word in a sentence so as to preserve, as closely as possible, the meaning of the original sentence. It has been proposed that lexical substitution be used as a basis for assessing the performance of word sense disambiguation systems, an idea realised in the English Lexical Substitution Task of SemEval-2007. In this paper, we examine the evaluation metrics used for the English Lexical Substitution Task and identify some problems that arise for them. We go on to propose some alternative measures for this purpose, that avoid these problems, and which in turn can be seen as redefining the key tasks that lexical substitution systems should be expected to perform. We hope that these new metrics will better serve to guide the development of lexical substitution systems in future work. One of the new metrics addresses how effective systems are in ranking substitution candidates, a key ability for lexical substitution systems, and we report some results concerning the assessment of systems produced by this measure as compared to the relevant measure from SemEval-2007.</abstract>
@@ -5145,7 +5145,7 @@
     </paper>
     <paper id="540">
       <author><first>Mehrnoush</first><last>Shamsfard</last></author>
-      <author><first>Hakimeh</first><last>Fadaei</last></author>
+      <author id="hakimeh-fadaee"><first>Hakimeh</first><last>Fadaei</last></author>
       <author><first>Elham</first><last>Fekri</last></author>
       <title>Extracting Lexico-conceptual Knowledge for Developing <fixed-case>P</fixed-case>ersian <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/784_Paper.pdf</url>
@@ -5154,7 +5154,7 @@
     </paper>
     <paper id="541">
       <author><first>Paula Vaz</first><last>Lobo</last></author>
-      <author><first>David Martins</first><last>de Matos</last></author>
+      <author id="david-martins-de-matos"><first>David Martins</first><last>de Matos</last></author>
       <title>Fairy Tale Corpus Organization Using Latent Semantic Mapping and an Item-to-item Top-n Recommendation Algorithm</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/786_Paper.pdf</url>
       <abstract>In this paper we present a fairy tale corpus that was semantically organized and tagged. The proposed method uses latent semantic mapping to represent the stories and a top-n item-to-item recommendation algorithm to define clusters of similar stories. Each story can be placed in more than one cluster and stories in the same cluster are related to the same concepts. The results were manually evaluated regarding the groupings as perceived by human judges. The evaluation resulted in a precision of 0.81, a recall of 0.69, and an f-measure of 0.75 when using tf*idf for word frequency. Our method is topic- and language-independent, and, contrary to traditional clustering methods, automatically defines the number of clusters based on the set of documents. This method can be used as a setup for traditional clustering or classification. The resulting corpus will be used for recommendation purposes, although it can also be used for emotion extraction, semantic role extraction, meaning extraction, text classification, among others.</abstract>
@@ -5162,7 +5162,7 @@
     </paper>
     <paper id="542">
       <author><first>Alistair</first><last>Willis</last></author>
-      <author><first>David</first><last>King</last></author>
+      <author id="david-king"><first>David</first><last>King</last></author>
       <author><first>David</first><last>Morse</last></author>
       <author><first>Anton</first><last>Dil</last></author>
       <author><first>Chris</first><last>Lyal</last></author>
@@ -5175,9 +5175,9 @@
     <paper id="543">
       <author><first>Linda</first><last>Brandschain</last></author>
       <author><first>David</first><last>Graff</last></author>
-      <author><first>Christopher</first><last>Cieri</last></author>
+      <author id="christopher-cieri"><first>Christopher</first><last>Cieri</last></author>
       <author><first>Kevin</first><last>Walker</last></author>
-      <author><first>Chris</first><last>Caruso</last></author>
+      <author id="christopher-caruso"><first>Chris</first><last>Caruso</last></author>
       <author><first>Abby</first><last>Neely</last></author>
       <title>Greybeard Longitudinal Speech Study</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/789_Paper.pdf</url>
@@ -5185,12 +5185,12 @@
       <bibkey>brandschain-etal-2010-greybeard</bibkey>
     </paper>
     <paper id="544">
-      <author><first>Francisco</first><last>Campillo</last></author>
+      <author id="francisco-campillo"><first>Francisco</first><last>Campillo</last></author>
       <author><first>Daniela</first><last>Braga</last></author>
       <author><first>Ana Belén</first><last>Mourín</last></author>
-      <author><first>Carmen</first><last>García-Mateo</last></author>
+      <author id="carmen-garcia-mateo"><first>Carmen</first><last>García-Mateo</last></author>
       <author><first>Pedro</first><last>Silva</last></author>
-      <author><first>Miguel Sales</first><last>Dias</last></author>
+      <author id="miguel-sales-dias"><first>Miguel Sales</first><last>Dias</last></author>
       <author><first>Francisco</first><last>Méndez</last></author>
       <title>Building High Quality Databases for Minority Languages such as <fixed-case>G</fixed-case>alician</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/790_Paper.pdf</url>
@@ -5198,7 +5198,7 @@
       <bibkey>campillo-etal-2010-building</bibkey>
     </paper>
     <paper id="545">
-      <author><first>William D.</first><last>Lewis</last></author>
+      <author id="william-lewis"><first>William D.</first><last>Lewis</last></author>
       <author><first>Chris</first><last>Wendt</last></author>
       <author><first>David</first><last>Bullock</last></author>
       <title>Achieving Domain Specificity in <fixed-case>SMT</fixed-case> without Overt Siloing</title>
@@ -5209,9 +5209,9 @@
     <paper id="546">
       <author><first>Linda</first><last>Brandschain</last></author>
       <author><first>David</first><last>Graff</last></author>
-      <author><first>Chris</first><last>Cieri</last></author>
+      <author id="christopher-cieri"><first>Chris</first><last>Cieri</last></author>
       <author><first>Kevin</first><last>Walker</last></author>
-      <author><first>Chris</first><last>Caruso</last></author>
+      <author id="christopher-caruso"><first>Chris</first><last>Caruso</last></author>
       <author><first>Abby</first><last>Neely</last></author>
       <title>Mixer 6</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/792_Paper.pdf</url>
@@ -5231,7 +5231,7 @@
       <author><first>Antonio</first><last>Toral</last></author>
       <author><first>Lamia</first><last>Tounsi</last></author>
       <author><first>Monica</first><last>Monachini</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <title>An Automatically Built Named Entity Lexicon for <fixed-case>A</fixed-case>rabic</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/797_Paper.pdf</url>
       <abstract>We have adapted and extended the automatic Multilingual, Interoperable Named Entity Lexicon approach to Arabic, using Arabic WordNet (AWN) and Arabic Wikipedia (AWK). First, we extract AWNs instantiable nouns and identify the corresponding categories and hyponym subcategories in AWK. Then, we exploit Wikipedia inter-lingual links to locate correspondences between articles in ten different languages in order to identify Named Entities (NEs). We apply keyword search on AWK abstracts to provide for Arabic articles that do not have a correspondence in any of the other languages. In addition, we perform a post-processing step to fetch further NEs from AWK not reachable through AWN. Finally, we investigate diacritization using matching with geonames databases, MADA-TOKAN tools and different heuristics for restoring vowel marks of Arabic NEs. Using this methodology, we have extracted approximately 45,000 Arabic NEs and built, to the best of our knowledge, the largest, most mature and well-structured Arabic NE lexical resource to date. We have stored and organised this lexicon following the LMF ISO standard. We conduct a quantitative and qualitative evaluation against a manually annotated gold standard and achieve precision scores from 95.83% (with 66.13% recall) to 99.31% (with 61.45% recall) according to different values of a threshold.</abstract>
@@ -5239,7 +5239,7 @@
     </paper>
     <paper id="549">
       <author><first>Zhiyi</first><last>Song</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <author><first>Gary</first><last>Krug</last></author>
       <author><first>Kazuaki</first><last>Maeda</last></author>
       <title>Enhanced Infrastructure for Creation and Collection of Translation Resources</title>
@@ -5249,14 +5249,14 @@
     </paper>
     <paper id="550">
       <author><first>Egoitz</first><last>Laparra</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <title>e<fixed-case>X</fixed-case>tended <fixed-case>W</fixed-case>ord<fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/799_Paper.pdf</url>
       <abstract>This paper presents a novel automatic approach to partially integrate FrameNet and WordNet. In that way we expect to extend FrameNet coverage, to enrich WordNet with frame semantic information and possibly to extend FrameNet to languages other than English. The method uses a knowledge-based Word Sense Disambiguation algorithm for matching the FrameNet lexical units to WordNet synsets. Specifically, we exploit a graph-based Word Sense Disambiguation algorithm that uses a large-scale knowledge-base derived from existing semantic resources. We have developed and tested additional versions of this algorithm showing substantial improvements over state-of-the-art results. Finally, we show some examples and figures of the resulting semantic resource.</abstract>
       <bibkey>laparra-rigau-2010-extended</bibkey>
     </paper>
     <paper id="551">
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <title>Improved Statistical Measures to Assess Natural Language Parser Performance across Domains</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/801_Paper.pdf</url>
       <abstract>We examine the performance of three dependency parsing systems, in particular, their performance variation across Wikipedia domains. We assess the performance variation of (i) Alpino, a deep grammar-based system coupled with a statistical disambiguation versus (ii) MST and Malt, two purely data-driven statistical dependency parsing systems. The question is how the performance of each parser correlates with simple statistical measures of the text (e.g. sentence length, unknown word rate, etc.). This would give us an idea of how sensitive the different systems are to domain shifts, i.e. which system is more in need for domain adaptation techniques. To this end, we extend the statistical measures used by Zhang and Wang (2009) for English and evaluate the systems on several Wikipedia domains by focusing on a freer word-order language, Dutch. The results confirm the general findings of Zhang and Wang (2009), i.e. different parsing systems have different sensitivity against various statistical measure of the text, where the highest correlation to parsing accuracy was found for the measure we added, sentence perplexity.</abstract>
@@ -5275,7 +5275,7 @@
     <paper id="553">
       <author><first>Carlos</first><last>Ramisch</last></author>
       <author><first>Aline</first><last>Villavicencio</last></author>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <title>mwetoolkit: a Framework for Multiword Expression Identification</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/803_Paper.pdf</url>
       <abstract>This paper presents the Multiword Expression Toolkit (mwetoolkit), an environment for type and language-independent MWE identification from corpora. The mwetoolkit provides a targeted list of MWE candidates, extracted and filtered according to a number of user-defined criteria and a set of standard statistical association measures. For generating corpus counts, the toolkit provides both a corpus indexation facility and a tool for integration with web search engines, while for evaluation, it provides validation and annotation facilities. The mwetoolkit also allows easy integration with a machine learning tool for the creation and application of supervised MWE extraction models if annotated data is available. In our experiment, the mwetoolkit was tested and evaluated in the context of MWE extraction in the biomedical domain. Our preliminary results show that the toolkit performs better than other approaches, especially concerning recall. Moreover, this first version can also be extended in several ways in order to improve the quality of the results.</abstract>
@@ -5315,7 +5315,7 @@
       <bibkey>shamsfard-etal-2010-step</bibkey>
     </paper>
     <paper id="558">
-      <author><first>Drahomíra „johanka“</first><last>Spoustová</last></author>
+      <author id="drahomira-johanka-spoustova"><first>Drahomíra „johanka“</first><last>Spoustová</last></author>
       <author><first>Miroslav</first><last>Spousta</last></author>
       <author><first>Pavel</first><last>Pecina</last></author>
       <title>Building a Web Corpus of <fixed-case>C</fixed-case>zech</title>
@@ -5352,9 +5352,9 @@
       <bibkey>de-luca-2010-corpus</bibkey>
     </paper>
     <paper id="563">
-      <author><first>Roberta</first><last>Catizone</last></author>
+      <author id="roberta-catizone"><first>Roberta</first><last>Catizone</last></author>
       <author><first>Alexiei</first><last>Dingli</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <title>Using Dialogue Corpora to Extend Information Extraction Patterns for Natural Language Understanding of Dialogue</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/818_Paper.pdf</url>
       <abstract>This paper examines how Natural Language Process (NLP) resources and online dialogue corpora can be used to extend coverage of Information Extraction (IE) templates in a Spoken Dialogue system. IE templates are used as part of a Natural Language Understanding module for identifying meaning in a user utterance. The use of NLP tools in Dialogue systems is a difficult task given 1) spoken dialogue is often not well-formed and 2) there is a serious lack of dialogue data. In spite of that, we have devised a method for extending IE patterns using standard NLP tools and available dialogue corpora found on the web. In this paper, we explain our method which includes using a set of NLP modules developed using GATE (a General Architecture for Text Engineering), as well as a general purpose editing tool that we built to facilitate the IE rule creation process. Lastly, we present directions for future work in this area.</abstract>
@@ -5362,7 +5362,7 @@
     </paper>
     <paper id="564">
       <author><first>Lamia</first><last>Tounsi</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <title><fixed-case>A</fixed-case>rabic Parsing Using Grammar Transforms</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/819_Paper.pdf</url>
       <abstract>We investigate Arabic Context Free Grammar parsing with dependency annotation comparing lexicalised and unlexicalised parsers. We study how morphosyntactic as well as function tag information percolation in the form of grammar transforms (Johnson, 1998, Kulick et al., 2006) affects the performance of a parser and helps dependency assignment. We focus on the three most frequent functional tags in the Arabic Penn Treebank: subjects, direct objects and predicates . We merge these functional tags with their phrasal categories and (where appropriate) percolate case information to the non-terminal (POS) category to train the parsers. We then automatically enrich the output of these parsers with full dependency information in order to annotate trees with Lexical Functional Grammar (LFG) f-structure equations with produce f-structures, i.e. attribute-value matrices approximating to basic predicate-argument-adjunct structure representations. We present a series of experiments evaluating how well lexicalized, history-based, generative (Bikel) as well as latent variable PCFG (Berkeley) parsers cope with the enriched Arabic data. We measure quality and coverage of both the output trees and the generated LFG f-structures. We show that joint functional and morphological information percolation improves both the recovery of trees as well as dependency results in the form of LFG f-structures.</abstract>
@@ -5378,7 +5378,7 @@
     </paper>
     <paper id="566">
       <author><first>Na-Rae</first><last>Han</last></author>
-      <author><first>Joel</first><last>Tetreault</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
       <author><first>Soo-Hwa</first><last>Lee</last></author>
       <author><first>Jin-Young</first><last>Ha</last></author>
       <title>Using an Error-Annotated Learner Corpus to Develop an <fixed-case>ESL</fixed-case>/<fixed-case>EFL</fixed-case> Error Correction System</title>
@@ -5388,9 +5388,9 @@
     </paper>
     <paper id="567">
       <author><first>Ian</first><last>McGraw</last></author>
-      <author><first>Chia-ying</first><last>Lee</last></author>
+      <author id="chia-ying-lee"><first>Chia-ying</first><last>Lee</last></author>
       <author><first>Lee</first><last>Hetherington</last></author>
-      <author><first>Stephanie</first><last>Seneff</last></author>
+      <author id="stephanie-seneff"><first>Stephanie</first><last>Seneff</last></author>
       <author><first>Jim</first><last>Glass</last></author>
       <title>Collecting Voices from the Cloud</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/822_Paper.pdf</url>
@@ -5399,7 +5399,7 @@
     </paper>
     <paper id="568">
       <author><first>Aurélien</first><last>Max</last></author>
-      <author><first>Josep Maria</first><last>Crego</last></author>
+      <author id="josep-m-crego"><first>Josep Maria</first><last>Crego</last></author>
       <author><first>François</first><last>Yvon</last></author>
       <title>Contrastive Lexical Evaluation of Machine Translation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/823_Paper.pdf</url>
@@ -5407,8 +5407,8 @@
       <bibkey>max-etal-2010-contrastive</bibkey>
     </paper>
     <paper id="569">
-      <author><first>Elaine</first><last>Uí Dhonnchadha</last></author>
-      <author><first>Josef</first><last>Van Genabith</last></author>
+      <author id="elaine-ui-dhonnchadha"><first>Elaine</first><last>Uí Dhonnchadha</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>Van Genabith</last></author>
       <title>Partial Dependency Parsing for <fixed-case>I</fixed-case>rish</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/824_Paper.pdf</url>
       <abstract>We present a partial dependency parser for Irish. Constraint Grammar (CG) based rules are used to annotate dependency relations and grammatical functions. Chunking is performed using a regular-expression grammar which operates on the dependency tagged sentences. As this is the first implementation of a parser for unrestricted Irish text (to our knowledge), there were no guidelines or precedents available. Therefore deciding what constitutes a syntactic unit, and how it should be annotated, accounts for a major part of the early development effort. Currently, all tokens in a sentence are tagged for grammatical function and local dependency. Long-distance dependencies, prepositional attachments or coordination are not handled, resulting in a partial dependency analysis. Evaluations show that the partial dependency analysis achieves an f-score of 93.60% on development data and 94.28% on unseen test data, while the chunker achieves an f-score of 97.20% on development data and 93.50% on unseen test data.</abstract>
@@ -5433,7 +5433,7 @@
     <paper id="572">
       <author><first>Sara</first><last>Rosenthal</last></author>
       <author><first>William</first><last>Lipovsky</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <author><first>Kapil</first><last>Thadani</last></author>
       <author><first>Jacob</first><last>Andreas</last></author>
       <title>Towards Semi-Automated Annotation for Prepositional Phrase Attachment</title>
@@ -5443,7 +5443,7 @@
     </paper>
     <paper id="573">
       <author><first>Patrice</first><last>Lopez</last></author>
-      <author><first>Laurent</first><last>Romary</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
       <title><fixed-case>GRISP</fixed-case>: A Massive Multilingual Terminological Database for Scientific and Technical Domains</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/829_Paper.pdf</url>
       <abstract>The development of a multilingual terminology is a very long and costly process. We present the creation of a multilingual terminological database called GRISP covering multiple technical and scientific fields from various open resources. A crucial aspect is the merging of the different resources which is based in our proposal on the definition of a sound conceptual model, different domain mapping and the use of structural constraints and machine learning techniques for controlling the fusion process. The result is a massive terminological database of several millions terms, concepts, semantic relations and definitions. The accuracy of the concept merging between several resources have been evaluated following several methods. This resource has allowed us to improve significantly the mean average precision of an information retrieval system applied to a large collection of multilingual and multidomain patent documents. New specialized terminologies, not specifically created for text processing applications, can be aggregated and merged to GRISP with minimal manual efforts.</abstract>
@@ -5476,8 +5476,8 @@
       <bibkey>murakami-etal-2010-language</bibkey>
     </paper>
     <paper id="577">
-      <author><first>Kristina</first><last>Vučković</last></author>
-      <author><first>Željko</first><last>Agić</last></author>
+      <author id="kristina-vuckovic"><first>Kristina</first><last>Vučković</last></author>
+      <author id="zeljko-agic"><first>Željko</first><last>Agić</last></author>
       <author><first>Marko</first><last>Tadić</last></author>
       <title>Improving Chunking Accuracy on <fixed-case>C</fixed-case>roatian Texts by Morphosyntactic Tagging</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/834_Paper.pdf</url>
@@ -5485,8 +5485,8 @@
       <bibkey>vuckovic-etal-2010-improving</bibkey>
     </paper>
     <paper id="578">
-      <author><first>David K.</first><last>Elson</last></author>
-      <author><first>Kathleen R.</first><last>McKeown</last></author>
+      <author id="david-elson"><first>David K.</first><last>Elson</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen R.</first><last>McKeown</last></author>
       <title>Building a Bank of Semantically Encoded Narratives</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/835_Paper.pdf</url>
       <abstract>We propose a methodology for a novel type of discourse annotation whose model is tuned to the analysis of a text as narrative. This is intended to be the basis of a story bank resource that would facilitate the automatic analysis of narrative structure and content. The methodology calls for annotators to construct propositions that approximate a reference text, by selecting predicates and arguments from among controlled vocabularies drawn from resources such as WordNet and VerbNet. Annotators then integrate the propositions into a conceptual graph that maps out the entire discourse; the edges represent temporal, causal and other relationships at the level of story content. Because annotators must identify the recurring objects and themes that appear in the text, they also perform coreference resolution and word sense disambiguation as they encode propositions. We describe a collection experiment and a method for determining inter-annotator agreement when multiple annotators encode the same short story. Finally, we describe ongoing work toward extending the method to integrate the annotators interpretations of character agency (the goals, plans and beliefs that are relevant, yet not explictly stated in the text).</abstract>
@@ -5500,8 +5500,8 @@
       <bibkey>wong-2010-semantic</bibkey>
     </paper>
     <paper id="580">
-      <author><first>Bento Carlos</first><last>Dias-da-Silva</last></author>
-      <author><first>Ariani</first><last>Di Felippo</last></author>
+      <author id="bento-carlos-dias-da-silva"><first>Bento Carlos</first><last>Dias-da-Silva</last></author>
+      <author id="ariani-di-felippo"><first>Ariani</first><last>Di Felippo</last></author>
       <title><fixed-case>REBECA</fixed-case>: Turning <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Databases into “Ontolexicons”</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/838_Paper.pdf</url>
       <abstract>In this paper we outline the design and present a sample of the REBECA bilingual lexical-conceptual database constructed by linking two monolingual lexical resources in which a set of lexicalized concepts of the North-American English database, the Princeton WordNet (WN.Pr) synsets, is aligned with its corresponding set of lexicalized concepts of the Brazilian Portuguese database, the Brazilian Portuguese WordNet synsets under construction, by means of the MultiNet-based interlingual schema, the concepts of which are the ones represented by the Princeton WordNet synsets. Implemented in the Protégé-OWL editor, the alignment of the two databases illustrates how wordnets can be turned into ontolexicons. At the current stage of development, the wheeled-vehicle conceptual domain was modeled to develop and to test REBECAs design and contents, respectively. The collection of 205 ontological concepts worked out, i.e. REBECA´s alignment indexes, is exemplified in the wheeled- vehicle conceptual domain, e.g. [CAR], [RAILCAR], etc., and it was selected in the WN.Pr database, version 2.0. Future work includes the population of the database with more lexical data and other conceptual domains so that the intricacies of adding more concepts and devising the spreading or pruning the relationships between them can be properly evaluated.</abstract>
@@ -5534,7 +5534,7 @@
     <paper id="584">
       <author><first>Parisa</first><last>Kordjamshidi</last></author>
       <author><first>Martijn</first><last>Van Otterlo</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <title>Spatial Role Labeling: Task Definition and Annotation Scheme</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/846_Paper.pdf</url>
       <abstract>One of the essential functions of natural language is to talk about spatial relationships between objects. Linguistic constructs can express highly complex, relational structures of objects, spatial relations between them, and patterns of motion through spaces relative to some reference point. Learning how to map this information onto a formal representation from a text is a challenging problem. At present no well-defined framework for automatic spatial information extraction exists that can handle all of these issues. In this paper we introduce the task of spatial role labeling and propose an annotation scheme that is language-independent and facilitates the application of machine learning techniques. Our framework consists of a set of spatial roles based on the theory of holistic spatial semantics with the intent of covering all aspects of spatial concepts, including both static and dynamic spatial relations. We illustrate our annotation scheme with many examples throughout the paper, and in addition we highlight how to connect to spatial calculi such as region connection calculus and also how our approach fits into related work.</abstract>
@@ -5548,7 +5548,7 @@
       <bibkey>russo-2010-discovering</bibkey>
     </paper>
     <paper id="586">
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <author><first>Petya</first><last>Osenova</last></author>
       <title>Constructing of an Ontology-based Lexicon for <fixed-case>B</fixed-case>ulgarian</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/848_Paper.pdf</url>
@@ -5556,8 +5556,8 @@
       <bibkey>simov-osenova-2010-constructing</bibkey>
     </paper>
     <paper id="587">
-      <author><first>Meghan Lammie</first><last>Glenn</last></author>
-      <author><first>Stephanie M.</first><last>Strassel</last></author>
+      <author id="meghan-glenn"><first>Meghan Lammie</first><last>Glenn</last></author>
+      <author id="stephanie-strassel"><first>Stephanie M.</first><last>Strassel</last></author>
       <author><first>Haejoong</first><last>Lee</last></author>
       <author><first>Kazuaki</first><last>Maeda</last></author>
       <author><first>Ramez</first><last>Zakhary</last></author>
@@ -5570,7 +5570,7 @@
     <paper id="588">
       <author><first>Claudiu</first><last>Mihăilă</last></author>
       <author><first>Iustina</first><last>Ilisei</last></author>
-      <author><first>Diana</first><last>Inkpen</last></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last></author>
       <title><fixed-case>R</fixed-case>omanian Zero Pronoun Distribution: A Comparative Study</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/851_Paper.pdf</url>
       <abstract>Anaphora resolution is still a challenging research field in natural language processing, lacking a algorithm that correctly resolves anaphoric pronouns. Anaphoric zero pronouns pose an even greater challenge, since this category is not lexically realised. Thus, their resolution is conditioned by their prior identification stage. This paper reports on the distribution of zero pronouns in Romanian in various genres: encyclopaedic, legal, literary, and news-wire texts. For this purpose, the RoZP corpus has been created, containing almost 50000 tokens and 800 zero pronouns which are manually annotated. The distribution patterns are compared across genres, and exceptional cases are presented in order to facilitate the methodological process of developing a future zero pronoun identification and resolution algorithm. The evaluation results emphasise that zero pronouns appear frequently in Romanian, and their distribution depends largely on the genre. Additionally, possible features are revealed for their identification, and a search scope for the antecedent has been determined, increasing the chances of correct resolution.</abstract>
@@ -5586,18 +5586,18 @@
     <paper id="590">
       <author><first>Prasanth</first><last>Kolachina</last></author>
       <author><first>Sudheer</first><last>Kolachina</last></author>
-      <author><first>Anil Kumar</first><last>Singh</last></author>
+      <author id="anil-kumar-singh"><first>Anil Kumar</first><last>Singh</last></author>
       <author><first>Samar</first><last>Husain</last></author>
       <author><first>Viswanath</first><last>Naidu</last></author>
       <author><first>Rajeev</first><last>Sangal</last></author>
-      <author><first>Akshar</first><last>Bharati</last></author>
+      <author id="akshar-bharati"><first>Akshar</first><last>Bharati</last></author>
       <title>Grammar Extraction from Treebanks for <fixed-case>H</fixed-case>indi and <fixed-case>T</fixed-case>elugu</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/854_Paper.pdf</url>
       <abstract>Grammars play an important role in many Natural Language Processing (NLP) applications. The traditional approach to creating grammars manually, besides being labor-intensive, has several limitations. With the availability of large scale syntactically annotated treebanks, it is now possible to automatically extract an approximate grammar of a language in any of the existing formalisms from a corresponding treebank. In this paper, we present a basic approach to extract grammars from dependency treebanks of two Indian languages, Hindi and Telugu. The process of grammar extraction requires a generalization mechanism. Towards this end, we explore an approach which relies on generalization of argument structure over the verbs based on their syntactic similarity. Such a generalization counters the effect of data sparseness in the treebanks. A grammar extracted using this system can not only expand already existing knowledge bases for NLP tasks such as parsing, but also aid in the creation of grammars for languages where none exist. Further, we show that the grammar extraction process can help in identifying annotation errors and thus aid in the task of the treebank validation.</abstract>
       <bibkey>kolachina-etal-2010-grammar</bibkey>
     </paper>
     <paper id="591">
-      <author><first>Andrejs</first><last>Vasiljevs</last></author>
+      <author id="andrejs-vasiljevs"><first>Andrejs</first><last>Vasiljevs</last></author>
       <author><first>Kaspars</first><last>Balodis</last></author>
       <title>Corpus Based Analysis for Multilingual Terminology Entry Compounding</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/855_Paper.pdf</url>
@@ -5618,7 +5618,7 @@
       <bibkey>maeda-etal-2010-technical</bibkey>
     </paper>
     <paper id="593">
-      <author><first>José M.</first><last>García-Miguel</last></author>
+      <author id="jose-m-garcia-miguel"><first>José M.</first><last>García-Miguel</last></author>
       <author><first>Gael</first><last>Vaamonde</last></author>
       <author><first>Fita González</first><last>Domínguez</last></author>
       <title><fixed-case>ADESSE</fixed-case>, a Database with Syntactic and Semantic Annotation of a Corpus of <fixed-case>S</fixed-case>panish</title>
@@ -5629,14 +5629,14 @@
     <paper id="594">
       <author><first>David</first><last>Guthrie</last></author>
       <author><first>Mark</first><last>Hepple</last></author>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last></author>
+      <author><first>Wei</first><last>Liu</last></author>
       <title>Efficient Minimal Perfect Hash Language Models</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/860_Paper.pdf</url>
       <abstract>The availability of large collections of text have made it possible to build language models that incorporate counts of billions of n-grams. This paper proposes two new methods of efficiently storing large language models that allow O(1) random access and use significantly less space than all known approaches. We introduce two novel data structures that take advantage of the distribution of n-grams in corpora and make use of various numbers of minimal perfect hashes to compactly store language models containing full frequency counts of billions of n-grams using 2.5 Bytes per n-gram and language models of quantized probabilities using 2.26 Bytes per n-gram. These methods allow language processing applications to take advantage of much larger language models than previously was possible using the same hardware and we additionally describe how they can be used in a distributed environment to store even larger models. We show that our approaches are simple to implement and can easily be combined with pruning and quantization to achieve additional reductions in the size of the language model.</abstract>
       <bibkey>guthrie-etal-2010-efficient</bibkey>
     </paper>
     <paper id="595">
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <author><first>Dan</first><last>Adams</last></author>
       <author><first>Henry</first><last>Goldberg</last></author>
       <author><first>Jonathan</first><last>Herr</last></author>
@@ -5652,7 +5652,7 @@
     </paper>
     <paper id="596">
       <author><first>Heather</first><last>Simpson</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <author><first>Robert</first><last>Parker</last></author>
       <author><first>Paul</first><last>McNamee</last></author>
       <title><fixed-case>W</fixed-case>ikipedia and the Web of Confusable Entities: Experience from Entity Linking Query Creation for <fixed-case>TAC</fixed-case> 2009 Knowledge Base Population</title>
@@ -5681,7 +5681,7 @@
     <paper id="599">
       <author><first>Thepchai</first><last>Supnithi</last></author>
       <author><first>Taneth</first><last>Ruangrajitpakorn</last></author>
-      <author><first>Kanokorn</first><last>Trakultaweekool</last></author>
+      <author id="kanokorn-trakultaweekoon"><first>Kanokorn</first><last>Trakultaweekool</last></author>
       <author><first>Peerachet</first><last>Porkaew</last></author>
       <title><fixed-case>A</fixed-case>uto<fixed-case>T</fixed-case>ag<fixed-case>TCG</fixed-case> : A Framework for Automatic <fixed-case>T</fixed-case>hai <fixed-case>CG</fixed-case> Tagging</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/868_Paper.pdf</url>
@@ -5698,21 +5698,21 @@
     <paper id="601">
       <author><first>Noureddine</first><last>Loukil</last></author>
       <author><first>Kais</first><last>Haddar</last></author>
-      <author><first>Abdelmajid</first><last>Benhamadou</last></author>
+      <author id="abdelmajid-ben-hamadou"><first>Abdelmajid</first><last>Benhamadou</last></author>
       <title>A Syntactic Lexicon for <fixed-case>A</fixed-case>rabic Verbs</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/873_Paper.pdf</url>
       <abstract>In this paper, we present a modeling of a syntactic lexicon for Arabic verbs. The structure of the lexicon is based on the recently introduced ISO standard called the Lexical Markup Framework. This standard enables us to describe the lexical information in a versatile way using general guidelines and make possible to share the resources developed in compliance with it. We discuss the syntactic information associated to verbs and the model we propose to structure and represent the entries within the lexicon. To study the usability of the lexicon in a real application, we designed a rule-based system that translates a LMF syntactic resource into Type Description Language compliant resource. The rules are mapping information from LMF entries and types to TDL types. The generated lexicon is used as input for a previously written HPSG grammar for Arabic built within the Language Knowledge Builder platform. Finally, we discuss improvements in parsing results and possible perspectives of this work.</abstract>
       <bibkey>loukil-etal-2010-syntactic</bibkey>
     </paper>
     <paper id="602">
-      <author><first>Girish Nath</first><last>Jha</last></author>
+      <author id="girish-nath-jha"><first>Girish Nath</first><last>Jha</last></author>
       <title>The <fixed-case>TDIL</fixed-case> Program and the <fixed-case>I</fixed-case>ndian Langauge Corpora Intitiative (<fixed-case>ILCI</fixed-case>)</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/874_Paper.pdf</url>
       <abstract>India is considered a linguistic ocean with 4 language families and 22 scheduled national languages, and 100 un-scheduled languages reported by the 2001 census. This puts tremendous pressures on the Indian government to not only have comprehensive language policies, but also to create resources for their maintenance and development. In the age of information technology, there is a greater need to have a fine balance between allocation of resources to each language keeping in view the political compulsions, electoral potential of a linguistic community and other issues. In this connection, the government of India through various ministries and a think tank consisting of eminent linguistics and policy makers has done a commendable job despite the obvious roadblocks. This paper describes the Indian governments policies towards language development and maintenance in the age of technology through the Ministry of HRD through its various agencies and the Ministry of Communications &amp; Information Technology (MCIT) through its dedicated program called TDIL (Technology Development for Indian Languages). The paper also describes some of the recent activities of the TDIL in general and in particular, an innovative corpora project called ILCI - Indian Languages Corpora Initiative.</abstract>
       <bibkey>jha-2010-tdil</bibkey>
     </paper>
     <paper id="603">
-      <author><first>Željko</first><last>Agić</last></author>
+      <author id="zeljko-agic"><first>Željko</first><last>Agić</last></author>
       <author><first>Nikola</first><last>Ljubešić</last></author>
       <author><first>Marko</first><last>Tadić</last></author>
       <title>Towards Sentiment Analysis of Financial Texts in <fixed-case>C</fixed-case>roatian</title>
@@ -5730,7 +5730,7 @@
     </paper>
     <paper id="605">
       <author><first>Agata</first><last>Savary</last></author>
-      <author><first>Jakub</first><last>Waszczuk</last></author>
+      <author id="jakub-waszczuk"><first>Jakub</first><last>Waszczuk</last></author>
       <author><first>Adam</first><last>Przepiórkowski</last></author>
       <title>Towards the Annotation of Named Entities in the <fixed-case>N</fixed-case>ational <fixed-case>C</fixed-case>orpus of <fixed-case>P</fixed-case>olish</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/879_Paper.pdf</url>
@@ -5752,7 +5752,7 @@
       <author><first>Somara</first><last>Seng</last></author>
       <author><first>Nicolas</first><last>Kuchmann-Beauger</last></author>
       <author><first>Anass</first><last>Talby</last></author>
-      <author><first>Claude</first><last>de Loupy</last></author>
+      <author id="claude-de-loupy"><first>Claude</first><last>de Loupy</last></author>
       <title><fixed-case>OAL</fixed-case>: A <fixed-case>NLP</fixed-case> Architecture to Improve the Development of Linguistic Resources for <fixed-case>NLP</fixed-case></title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/882_Paper.pdf</url>
       <abstract>The performance of most NLP applications relies upon the quality of linguistic resources. The creation, maintenance and enrichment of those resources are a labour-intensive task, especially when no tools are available. In this paper we present the NLP architecture OAL, designed to assist computational linguists in the whole process of the development of resources in an industrial context: from corpora compilation to quality assurance. To add new words more easily to the morphosyntactic lexica, a guesser that lemmatizes and assigns morphosyntactic tags as well as inflection paradigms to a new word has been developed. Moreover, different control mechanisms are set up to check the coherence and consistency of the resources. Today OAL manages resources in five European languages: French, English, Spanish, Italian and Polish. Chinese and Portuguese are in process. The development of OAL has followed an incremental strategy. At present, semantic lexica, a named entities guesser and a named entities phonetizer are being developed.</abstract>
@@ -5761,20 +5761,20 @@
     <paper id="608">
       <author><first>Karel</first><last>Pala</last></author>
       <author><first>Christiane</first><last>Fellbaum</last></author>
-      <author><first>Sonja</first><last>Bosch</last></author>
+      <author id="sonja-bosch"><first>Sonja</first><last>Bosch</last></author>
       <title>Lexical Resources for Noun Compounds in <fixed-case>C</fixed-case>zech, <fixed-case>E</fixed-case>nglish and <fixed-case>Z</fixed-case>ulu</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/883_Paper.pdf</url>
       <abstract>In this paper we discuss noun compounding, a highly generative, productive process, in three distinct languages: Czech, English and Zulu. Derivational morphology presents a large grey area between regular, compositional and idiosyncratic, non-compositional word forms. The structural properties of compounds in each of the languages are reviewed and contrasted. Whereas English compounds are head-final and thus left-branching, Czech and Zulu compounds usually consist of a leftmost governing head and a rightmost dependent element. Semantic properties of compounds are discussed with special reference to semantic relations between compound members which cross-linguistically show universal patterns, but idiosyncratic, language specific compounds are also identified. The integration of compounds into lexical resources, and WordNets in particular, remains a challenge that needs to be considered in terms of the compounds syntactic idiosyncrasy and semantic compositionality. Experiments with processing compounds in Czech, English and Zulu are reported and partly evaluated. The obtained partial lists of the Czech, English and Zulu compounds are also described.</abstract>
       <bibkey>pala-etal-2010-lexical</bibkey>
     </paper>
     <paper id="609">
-      <author><first>Dietrich</first><last>Rebholz-Schuhmann</last></author>
-      <author><first>Antonio José</first><last>Jimeno Yepes</last></author>
-      <author><first>Erik M.</first><last>van Mulligen</last></author>
+      <author id="dietrich-rebholz-schuhmann"><first>Dietrich</first><last>Rebholz-Schuhmann</last></author>
+      <author id="antonio-jimeno-yepes"><first>Antonio José</first><last>Jimeno Yepes</last></author>
+      <author id="erik-van-mulligen"><first>Erik M.</first><last>van Mulligen</last></author>
       <author><first>Ning</first><last>Kang</last></author>
-      <author><first>Jan</first><last>Kors</last></author>
+      <author id="jan-kors"><first>Jan</first><last>Kors</last></author>
       <author><first>David</first><last>Milward</last></author>
-      <author><first>Peter</first><last>Corbett</last></author>
+      <author id="peter-corbett"><first>Peter</first><last>Corbett</last></author>
       <author><first>Ekaterina</first><last>Buyko</last></author>
       <author><first>Katrin</first><last>Tomanek</last></author>
       <author><first>Elena</first><last>Beisswanger</last></author>
@@ -5795,7 +5795,7 @@
       <author><first>Petra-Maria</first><last>Strauß</last></author>
       <author><first>Stefan</first><last>Scherer</last></author>
       <author><first>Georg</first><last>Layher</last></author>
-      <author><first>Holger</first><last>Hoffmann</last></author>
+      <author id="holger-hoffmann"><first>Holger</first><last>Hoffmann</last></author>
       <title>Evaluation of the <fixed-case>PIT</fixed-case> Corpus Or What a Difference a Face Makes?</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/890_Paper.pdf</url>
       <abstract>This paper presents the evaluation of the PIT Corpus of multi-party dialogues recorded in a Wizard-of-Oz environment. An evaluation has been performed with two different foci: First, a usability evaluation was used to take a look at the overall ratings of the system. A shortened version of the SASSI questionnaire, namely the SASSISV, and the well established AttrakDiff questionnaire assessing the hedonistic and pragmatic dimension of computer systems have been analysed. In a second evaluation, the user's gaze direction was analysed in order to assess the difference in the user's (gazing) behaviour if interacting with the computer versus the other dialogue partner. Recordings have been performed in different setups of the system, e.g. with and without avatar. Thus, the presented evaluation further focuses on the difference in the interaction caused by deploying an avatar. The quantitative analysis of the gazing behaviour has resulted in several encouraging significant differences. As a possible interpretation it could be argued that users are more attentive towards systems with an avatar - the difference a face makes.</abstract>
@@ -5803,7 +5803,7 @@
     </paper>
     <paper id="612">
       <author><first>Luka</first><last>Nerima</last></author>
-      <author><first>Eric</first><last>Wehrli</last></author>
+      <author id="eric-wehrli"><first>Eric</first><last>Wehrli</last></author>
       <author><first>Violeta</first><last>Seretan</last></author>
       <title>A Recursive Treatment of Collocations</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/891_Paper.pdf</url>
@@ -5821,7 +5821,7 @@
     <paper id="614">
       <author><first>Timo</first><last>Sowa</last></author>
       <author><first>Fiorenza</first><last>Arisio</last></author>
-      <author><first>Luca</first><last>Cristoforetti</last></author>
+      <author id="luca-cristoforetti"><first>Luca</first><last>Cristoforetti</last></author>
       <title><fixed-case>DICIT</fixed-case>: Evaluation of a Distant-talking Speech Interface for Television</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/894_Paper.pdf</url>
       <abstract>The EC-funded project DICIT developed distant-talking interfaces for interactive TV. The final DICIT prototype system processes multimodal user input by speech and remote control. It was designed to understand both natural language and command-and-control-style speech input. We conducted an evaluation campaign to examine the usability and performance of the prototype. The task-oriented evaluation involved naive test persons and consisted of a subjective part with a usability questionnaire and an objective part. We used three groups of objective metrics to assess the system: one group related to speech component performance, one related to interface design and user awareness, and a final group related to task-based effectiveness and usability. These metrics were acquired with a dedicated transcription and annotation tool. The evaluation revealed a quite positive subjective assessments of the system and reasonable objective results. We report how the objective metrics helped us to determine problems in specific areas and to distinguish design-related issues from technical problems. The metrics computed over modality-specific groups also show that speech input gives a usability advantage over remote control for certain types of tasks.</abstract>
@@ -5829,7 +5829,7 @@
     </paper>
     <paper id="615">
       <author><first>Arianne</first><last>Reimerink</last></author>
-      <author><first>Pilar León</first><last>Araúz</last></author>
+      <author id="pilar-leon-arauz"><first>Pilar León</first><last>Araúz</last></author>
       <author><first>Pedro J. Magaña</first><last>Redondo</last></author>
       <title><fixed-case>E</fixed-case>co<fixed-case>L</fixed-case>exicon: An Environmental <fixed-case>TKB</fixed-case></title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/895_Paper.pdf</url>
@@ -5837,7 +5837,7 @@
       <bibkey>reimerink-etal-2010-ecolexicon</bibkey>
     </paper>
     <paper id="616">
-      <author><first>José João</first><last>Almeida</last></author>
+      <author id="jose-joao-almeida"><first>José João</first><last>Almeida</last></author>
       <author><first>André</first><last>Santos</last></author>
       <author><first>Alberto</first><last>Simões</last></author>
       <title>Bigorna – A Toolkit for Orthography Migration Challenges</title>
@@ -5847,15 +5847,15 @@
     </paper>
     <paper id="617">
       <author><first>Jan Jona</first><last>Javoršek</last></author>
-      <author><first>Tomaž</first><last>Erjavec</last></author>
+      <author id="tomaz-erjavec"><first>Tomaž</first><last>Erjavec</last></author>
       <title>Experimental Deployment of a Grid Virtual Organization for Human Language Technologies</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/899_Paper.pdf</url>
       <abstract>We propose to create a grid virtual organization for human language technologies, at first chiefly with the task of enabling linguistic researches to use existing distributed computing facilities of the European grid infrastructure for more efficient processing of large data sets. After a brief overview of modern grid computing, a number of common use-cases of natural language processing tasks running on the grid are presented, notably corpus annotation with morpho-syntactic tagging (600+ million-word corpus annotated in less than a day), $n$-gram statistics processing of a corpus and creation of grid-backed web-accessible services with annotation and term-extraction as examples. Implementation considerations and common problems of using grid for this type of tasks are laid out. We conclude with an outline of a simple action plan for evolving the infrastructure created for these experiments into a fully functional Human Language Technology grid Virtual Organization with the goal of making the power of European grid infrastructure available to the linguistic community.</abstract>
       <bibkey>javorsek-erjavec-2010-experimental</bibkey>
     </paper>
     <paper id="618">
-      <author><first>Eric</first><last>Charton</last></author>
-      <author><first>Juan-Manuel</first><last>Torres-Moreno</last></author>
+      <author id="eric-charton"><first>Eric</first><last>Charton</last></author>
+      <author id="juan-manuel-torres-moreno"><first>Juan-Manuel</first><last>Torres-Moreno</last></author>
       <title><fixed-case>NLG</fixed-case>b<fixed-case>A</fixed-case>se: A Free Linguistic Resource for Natural Language Processing Systems</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/900_Paper.pdf</url>
       <abstract>Availability of labeled language resources, such as annotated corpora and domain dependent labeled language resources is crucial for experiments in the field of Natural Language Processing. Most often, due to lack of resources, manual verification and annotation of electronic text material is a prerequisite for the development of NLP tools. In the context of under-resourced language, the lack of copora becomes a crucial problem because most of the research efforts are supported by organizations with limited funds. Using free, multilingual and highly structured corpora like Wikipedia to produce automatically labeled language resources can be an answer to those needs. This paper introduces NLGbAse, a multilingual linguistic resource built from the Wikipedia encyclopedic content. This system produces structured metadata which make possible the automatic annotation of corpora with syntactical and semantical labels. A metadata contains semantical and statistical informations related to an encyclopedic document. To validate our approach, we built and evaluated a Named Entity Recognition tool, trained with Wikipedia corpora annotated by our system.</abstract>
@@ -5870,9 +5870,9 @@
       <bibkey>bosma-vossen-2010-bootstrapping</bibkey>
     </paper>
     <paper id="620">
-      <author><first>Jinho D.</first><last>Choi</last></author>
-      <author><first>Claire</first><last>Bonial</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
+      <author id="claire-bonial"><first>Claire</first><last>Bonial</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <title><fixed-case>P</fixed-case>ropbank Instance Annotation Guidelines Using a Dedicated Editor, Jubilee</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/903_Paper.pdf</url>
       <abstract>This paper gives guidelines of how to annotate Propbank instances using a dedicated editor, Jubilee. Propbank is a corpus in which the arguments of each verb predicate are annotated with their semantic roles in relation to the predicate. Propbank annotation also requires the choice of a sense ID for each predicate. Jubilee facilitates this annotation process by displaying several resources of syntactic and semantic information simultaneously: the syntactic structure of a sentence is displayed in the main frame, the available senses with their corresponding argument structures are displayed in another frame, all available Propbank arguments are displayed for the annotators choice, and example annotations of each sense of the predicate are available to the annotator for viewing. Easy access to each of these resources allows the annotator to quickly absorb and apply the necessary syntactic and semantic information pertinent to each predicate for consistent and efficient annotation. Jubilee has been successfully adapted to many Propbank projects in several universities. The tool runs platform independently, is light enough to run as an X11 application and supports multiple languages such as Arabic, Chinese, English, Hindi and Korean.</abstract>
@@ -5894,11 +5894,11 @@
       <bibkey>nabende-2010-applying</bibkey>
     </paper>
     <paper id="623">
-      <author><first>Alexandra</first><last>Balahur</last></author>
+      <author id="alexandra-balahur"><first>Alexandra</first><last>Balahur</last></author>
       <author><first>Ralf</first><last>Steinberger</last></author>
-      <author><first>Mijail</first><last>Kabadjov</last></author>
+      <author id="mijail-kabadjov"><first>Mijail</first><last>Kabadjov</last></author>
       <author><first>Vanni</first><last>Zavarella</last></author>
-      <author><first>Erik</first><last>van der Goot</last></author>
+      <author id="erik-van-der-goot"><first>Erik</first><last>van der Goot</last></author>
       <author><first>Matina</first><last>Halkia</last></author>
       <author><first>Bruno</first><last>Pouliquen</last></author>
       <author><first>Jenya</first><last>Belyaeva</last></author>
@@ -5923,11 +5923,11 @@
       <bibkey>strunk-2010-enriching</bibkey>
     </paper>
     <paper id="626">
-      <author><first>Claude</first><last>de Loupy</last></author>
+      <author id="claude-de-loupy"><first>Claude</first><last>de Loupy</last></author>
       <author><first>Marie</first><last>Guégan</last></author>
-      <author><first>Christelle</first><last>Ayache</last></author>
+      <author id="christelle-ayache"><first>Christelle</first><last>Ayache</last></author>
       <author><first>Somara</first><last>Seng</last></author>
-      <author><first>Juan-Manuel Torres</first><last>Moreno</last></author>
+      <author id="juan-manuel-torres-moreno"><first>Juan-Manuel Torres</first><last>Moreno</last></author>
       <title>A <fixed-case>F</fixed-case>rench Human Reference Corpus for Multi-Document Summarization and Sentence Compression</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/919_Paper.pdf</url>
       <abstract>This paper presents two corpora produced within the RPM2 project: a multi-document summarization corpus and a sentence compression corpus. Both corpora are in French. The first one is the only one we know in this language. It contains 20 topics with 20 documents each. A first set of 10 documents per topic is summarized and then the second set is used to produce an update summarization (new information). 4 annotators were involved and produced a total of 160 abstracts. The second corpus contains all the sentences of the first one. 4 annotators were asked to compress the 8432 sentences. This is the biggest corpus of compressed sentences we know, whatever the language. The paper provides some figures in order to compare the different annotators: compression rates, number of tokens per sentence, percentage of tokens kept according to their POS, position of dropped tokens in the sentence compression phase, etc. These figures show important differences from an annotator to the other. Another point is the different strategies of compression used according to the length of the sentence.</abstract>
@@ -5936,17 +5936,17 @@
     <paper id="627">
       <author><first>Fei</first><last>Xia</last></author>
       <author><first>Carrie</first><last>Lewis</last></author>
-      <author><first>William D.</first><last>Lewis</last></author>
+      <author id="william-lewis"><first>William D.</first><last>Lewis</last></author>
       <title>The Problems of Language Identification within Hugely Multilingual Data Sets</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/921_Paper.pdf</url>
       <abstract>As the data for more and more languages is finding its way into digital form, with an increasing amount of this data being posted to the Web, it has become possible to collect language data from the Web and create large multilingual resources, covering hundreds or even thousands of languages. ODIN, the Online Database of INterlinear text (Lewis, 2006), is such a resource. It currently consists of nearly 200,000 data points for over 1,000 languages, the data for which was harvested from linguistic documents on the Web. We identify a number of issues with language identification for such broad-coverage resources including the lack of training data, ambiguous language names, incomplete language code sets, and incorrect uses of language names and codes. After providing a short overview of existing language code sets maintained by the linguistic community, we discuss what linguists and the linguistic community can do to make the process of language identification easier.</abstract>
       <bibkey>xia-etal-2010-problems</bibkey>
     </paper>
     <paper id="628">
-      <author><first>Rebecca J.</first><last>Passonneau</last></author>
-      <author><first>Ansaf</first><last>Salleb-Aoussi</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca J.</first><last>Passonneau</last></author>
+      <author id="ansaf-salleb-aouissi"><first>Ansaf</first><last>Salleb-Aoussi</last></author>
       <author><first>Vikas</first><last>Bhardwaj</last></author>
-      <author><first>Nancy</first><last>Ide</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
       <title>Word Sense Annotation of Polysemous Words by Multiple Annotators</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/922_Paper.pdf</url>
       <abstract>We describe results of a word sense annotation task using WordNet, involving half a dozen well-trained annotators on ten polysemous words for three parts of speech. One hundred sentences for each word were annotated. Annotators had the same level of training and experience, but interannotator agreement (IA) varied across words. There was some effect of part of speech, with higher agreement on nouns and adjectives, but within the words for each part of speech there was wide variation. This variation in IA does not correlate with number of senses in the inventory, or the number of senses actually selected by annotators. In fact, IA was sometimes quite high for words with many senses. We claim that the IA variation is due to the word meanings, contexts of use, and individual differences among annotators. We find some correlation of IA with sense confusability as measured by a sense confusion threshhold (CT). Data mining for association rules on a flattened data representation indicating each annotator's sense choices identifies outliers for some words, and systematic differences among pairs of annotators on others.</abstract>
@@ -5960,17 +5960,17 @@
       <bibkey>gasser-2010-expanding</bibkey>
     </paper>
     <paper id="630">
-      <author><first>Susan Windisch</first><last>Brown</last></author>
+      <author id="susan-windisch-brown"><first>Susan Windisch</first><last>Brown</last></author>
       <author><first>Travis</first><last>Rood</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <title>Number or Nuance: Which Factors Restrict Reliable Word Sense Annotation?</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/927_Paper.pdf</url>
       <abstract>This study attempts to pinpoint the factors that restrict reliable word sense annotation, focusing on the influence of the number of senses annotators use and the semantic granularity of those senses. Both of these factors may be possible causes of low interannotator agreement (ITA) when tagging with fine-grained word senses, and, consequently, low WSD system performance (Ng et al., 1999; Snyder &amp; Palmer, 2004; Chklovski &amp; Mihalcea, 2002). If number of senses is the culprit, modifying the task to show fewer senses at a time could improve annotator reliability. However, if overly nuanced distinctions are the problem, then more general, coarse-grained distinctions may be necessary for annotator success and may be all that is needed to supply systems with the types of distinctions that people make. We describe three experiments that explore the role of these factors in annotation performance. Our results indicate that of these two factors, only the granularity of the senses restricts interannotator agreement, with broader senses resulting in higher annotation reliability.</abstract>
       <bibkey>brown-etal-2010-number</bibkey>
     </paper>
     <paper id="631">
-      <author><first>Joshua B.</first><last>Gordon</last></author>
-      <author><first>Rebecca J.</first><last>Passonneau</last></author>
+      <author id="joshua-b-gordon"><first>Joshua B.</first><last>Gordon</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca J.</first><last>Passonneau</last></author>
       <title>An Evaluation Framework for Natural Language Understanding in Spoken Dialogue Systems</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/928_Paper.pdf</url>
       <abstract>We present an evaluation framework to enable developers of information seeking, transaction based spoken dialogue systems to compare the robustness of natural language understanding (NLU) approaches across varying levels of word error rate and contrasting domains. We develop statistical and semantic parsing based approaches to dialogue act identification and concept retrieval. Voice search is used in each approach to ultimately query the database. Included in the framework is a method for developers to bootstrap a representative pseudo-corpus, which is used to estimate NLU performance in a new domain. We illustrate the relative merits of these NLU techniques by contrasting our statistical NLU approach with a semantic parsing method over two contrasting applications, our CheckItOut library system and the deployed Lets Go Public! system, across four levels of word error rate. We find that with respect to both dialogue act identification and concept retrieval, our statistical NLU approach is more likely to robustly accommodate the freer form, less constrained utterances of CheckItOut at higher word error rates than is possible with semantic parsing.</abstract>
@@ -5995,15 +5995,15 @@
     </paper>
     <paper id="634">
       <author><first>Rashmi</first><last>Prasad</last></author>
-      <author><first>Aravind</first><last>Joshi</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <title>Exploiting Scope for Shallow Discourse Parsing</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/935_Paper.pdf</url>
       <abstract>We present an approach to automatically identifying the arguments of discourse connectives based on data from the Penn Discourse Treebank. Of the two arguments of connectives, called Arg1 and Arg2, we focus on Arg1, which has proven more challenging to identify. Our approach employs a sentence-based representation of arguments, and distinguishes ""intra-sentential connectives"", which take both their arguments in the same sentence, from ""inter-sentential connectives"", whose arguments are found in different sentences. The latter are further distinguished by paragraph position into ""ParaInit"" connectives, which appear in a paragraph-initial sentence, and ""ParaNonInit"" connectives, which appear elsewhere. The paper focusses on predicting Arg1 of Inter-sentential ParaNonInit connectives, presenting a set of scope-based filters that reduce the search space for Arg1 from all the previous sentences in the paragraph to a subset of them. For cases where these filters do not uniquely identify Arg1, coreference-based heuristics are employed. Our analysis shows an absolute 3% performance improvement over the high baseline of 83.3% for identifying Arg1 of Inter-sentential ParaNonInit connectives.</abstract>
       <bibkey>prasad-etal-2010-exploiting</bibkey>
     </paper>
     <paper id="635">
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <title><fixed-case>I</fixed-case>ndo<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/939_Paper.pdf</url>
       <abstract>India is a multilingual country where machine translation and cross lingual search are highly relevant problems. These problems require large resources- like wordnets and lexicons- of high quality and coverage. Wordnets are lexical structures composed of synsets and semantic relations. Synsets are sets of synonyms. They are linked by semantic relations like hypernymy (is-a), meronymy (part-of), troponymy (manner-of) etc. IndoWordnet is a linked structure of wordnets of major Indian languages from Indo-Aryan, Dravidian and Sino-Tibetan families. These wordnets have been created by following the expansion approach from Hindi wordnet which was made available free for research in 2006. Since then a number of Indian languages have been creating their wordnets. In this paper we discuss the methodology, coverage, important considerations and multifarious benefits of IndoWordnet. Case studies are provided for Marathi, Sanskrit, Bodo and Telugu, to bring out the basic methodology of and challenges involved in the expansion approach. The guidelines the lexicographers follow for wordnet construction are enumerated. The difference between IndoWordnet and EuroWordnet also is discussed.</abstract>
@@ -6012,8 +6012,8 @@
     <paper id="636">
       <author><first>Kirk</first><last>Roberts</last></author>
       <author><first>Srikanth</first><last>Gullapalli</last></author>
-      <author><first>Cosmin Adrian</first><last>Bejan</last></author>
-      <author><first>Sanda</first><last>Harabagiu</last></author>
+      <author id="cosmin-adrian-bejan"><first>Cosmin Adrian</first><last>Bejan</last></author>
+      <author id="sanda-harabagiu"><first>Sanda</first><last>Harabagiu</last></author>
       <title>A Linguistic Resource for Semantic Parsing of Motion Events</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/941_Paper.pdf</url>
       <abstract>This paper presents a corpus of annotated motion events and their event structure. We consider motion events triggered by a set of motion evoking words and contemplate both literal and figurative interpretations of them. Figurative motion events are extracted into the same event structure but are marked as figurative in the corpus. To represent the event structure of motion, we use the FrameNet annotation standard, which encodes motion in over 70 frames. In order to acquire a diverse set of texts that are different from FrameNet's, we crawled blog and news feeds for five different domains: sports, newswire, finance, military, and gossip. We then annotated these documents with an automatic FrameNet parser. Its output was manually corrected to account for missing and incorrect frames as well as missing and incorrect frame elements. The corpus, UTD-MotionEvent, may act as a resource for semantic parsing, detection of figurative language, spatial reasoning, and other tasks.</abstract>
@@ -6040,7 +6040,7 @@
     <paper id="639">
       <author><first>Zygmunt</first><last>Vetulani</last></author>
       <author><first>Marek</first><last>Kubis</last></author>
-      <author><first>Tomasz</first><last>Obrębski</last></author>
+      <author id="tomasz-obrebski"><first>Tomasz</first><last>Obrębski</last></author>
       <title><fixed-case>P</fixed-case>ol<fixed-case>N</fixed-case>et — <fixed-case>P</fixed-case>olish <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et: Data and Tools</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/947_Paper.pdf</url>
       <abstract>This paper presents the PolNet-Polish WordNet project which aims at building a linguistically oriented ontology for Polish compatible with other WordNet projects such as Princeton WordNet, EuroWordNet and other similarly organized ontologies. The main idea behind this kind of ontologies is to use words related by synonymy to construct formal representations of concepts. In the paper we sketch the PolNet project methodology and implementation. We present data obtained so far, as well as the WQuery tool for querying and maintaining PolNet. WQuery is a query language that make use of data types based on synsets, word senses and various semantic relations which occur in wordnet-like lexical databases. The tool is particularly useful to deal with complex querying tasks like searching for cycles in semantic relations, finding isolated synsets or computing overall statistics. Both data and tools presented in this paper have been applied within an advanced AI system POLINT-112-SMS with emulated natural language competence, where they are used in the understanding subsystem.</abstract>
@@ -6056,21 +6056,21 @@
     </paper>
     <paper id="641">
       <author><first>Youssef Aït</first><last>Ouguengay</last></author>
-      <author><first>Aïcha</first><last>Bouhjar</last></author>
+      <author id="aicha-bouhjar"><first>Aïcha</first><last>Bouhjar</last></author>
       <title>For Standardised <fixed-case>A</fixed-case>mazigh Linguistic Resources</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/949_Paper.pdf</url>
       <abstract>Amazigh language and culture may well be viewed to have known an unprecedented booming in Morocco : more than a hundred- which are published by the Royal Institute of Amazigh Culture (IRCAM), an institution created in 2001 to preserve, promote and endorse Amazigh culture in all its dimensions. Crucially, publications in the Amazigh language would not have seen light without the valiant attempts to upgrade the language on the linguistic and technological levels. The central thrust of this contribution is to provide a vista about the whole range of actions carried out by IRCAM. Of prime utility to this presentation is what was accomplished to supply Amazigh with the necessary tools and corpora without which the Amazigh language would emphatically fail to have a place in the world of NITCs. After a brief description of the prime specificities that characterise the standardisation of Amazigh in Morocco, a retrospective on the basic computer tools now available for the processing of Amazigh will be set out. It is concluded that the homogenisation of a considerable number of corpora should, by right, be viewed as a strategic move and an incontrovertible prerequisite to the computerisation of Amazigh,</abstract>
       <bibkey>ouguengay-bouhjar-2010-standardised</bibkey>
     </paper>
     <paper id="642">
-      <author><first>Christopher</first><last>Cieri</last></author>
+      <author id="christopher-cieri"><first>Christopher</first><last>Cieri</last></author>
       <author><first>Khalid</first><last>Choukri</last></author>
       <author><first>Nicoletta</first><last>Calzolari</last></author>
-      <author><first>D. Terence</first><last>Langendoen</last></author>
+      <author id="d-terence-langendoen"><first>D. Terence</first><last>Langendoen</last></author>
       <author><first>Johannes</first><last>Leveling</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
-      <author><first>Nancy</first><last>Ide</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <title>A Road Map for Interoperable Language Resource Metadata</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/951_Paper.pdf</url>
       <abstract>LRs remain expensive to create and thus rare relative to demand across languages and technology types. The accidental re-creation of an LR that already exists is a nearly unforgivable waste of scarce resources that is unfortunately not so easy to avoid. The number of catalogs the HLT researcher must search, with their different formats, make it possible to overlook an existing resource. This paper sketches the sources of this problem and outlines a proposal to rectify along with a new vision of LR cataloging that will to facilitates the documentation and exploitation of a much wider range of LRs than previously considered.</abstract>
@@ -6094,8 +6094,8 @@
       <bibkey>gishri-etal-2010-lexicon</bibkey>
     </paper>
     <paper id="645">
-      <author><first>Christopher</first><last>Cieri</last></author>
-      <author><first>Mark</first><last>Liberman</last></author>
+      <author id="christopher-cieri"><first>Christopher</first><last>Cieri</last></author>
+      <author id="mark-liberman"><first>Mark</first><last>Liberman</last></author>
       <title>Adapting to Trends in Language Resource Development: A Progress Report on <fixed-case>LDC</fixed-case> Activities</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/954_Paper.pdf</url>
       <abstract>This paper describes changing needs among the communities that exploit language resources and recent LDC activities and publications that support those needs by providing greater volumes of data and associated resources in a growing inventory of languages with ever more sophisticated annotation. Specifically, it covers the evolving role of data centers with specific emphasis on the LDC, the publications released by the LDC in the two years since our last report and the sponsored research programs that provide LRs initially to participants in those programs but eventually to the larger HLT research communities and beyond.</abstract>
diff --git a/data/xml/L12.xml b/data/xml/L12.xml
index f6d04bbd82..22b7e928d9 100644
--- a/data/xml/L12.xml
+++ b/data/xml/L12.xml
@@ -3,15 +3,15 @@
   <volume id="1" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Eighth International Conference on Language Resources and Evaluation (<fixed-case>LREC</fixed-case>'12)</booktitle>
-      <editor><first>Nicoletta</first><last>Calzolari</last></editor>
-      <editor><first>Khalid</first><last>Choukri</last></editor>
+      <editor id="nicoletta-calzolari"><first>Nicoletta</first><last>Calzolari</last></editor>
+      <editor id="khalid-choukri"><first>Khalid</first><last>Choukri</last></editor>
       <editor><first>Thierry</first><last>Declerck</last></editor>
       <editor><first>Mehmet Uğur</first><last>Doğan</last></editor>
-      <editor><first>Bente</first><last>Maegaard</last></editor>
-      <editor><first>Joseph</first><last>Mariani</last></editor>
-      <editor><first>Asuncion</first><last>Moreno</last></editor>
-      <editor><first>Jan</first><last>Odijk</last></editor>
-      <editor><first>Stelios</first><last>Piperidis</last></editor>
+      <editor id="bente-maegaard"><first>Bente</first><last>Maegaard</last></editor>
+      <editor id="joseph-mariani"><first>Joseph</first><last>Mariani</last></editor>
+      <editor id="asuncion-moreno"><first>Asuncion</first><last>Moreno</last></editor>
+      <editor id="jan-odijk"><first>Jan</first><last>Odijk</last></editor>
+      <editor id="stelios-piperidis"><first>Stelios</first><last>Piperidis</last></editor>
       <publisher>European Language Resources Association (ELRA)</publisher>
       <address>Istanbul, Turkey</address>
       <month>May</month>
@@ -22,7 +22,7 @@
       <bibkey>lrec-2012-international</bibkey>
     </frontmatter>
     <paper id="1">
-      <author><first>Kristiina</first><last>Jokinen</last></author>
+      <author id="kristiina-jokinen"><first>Kristiina</first><last>Jokinen</last></author>
       <author><first>Silvi</first><last>Tenjes</last></author>
       <title>Investigating Engagement - intercultural and technological aspects of the collection, analysis, and use of the <fixed-case>E</fixed-case>stonian Multiparty Conversational video data</title>
       <pages>2764–2769</pages>
@@ -47,8 +47,8 @@
       <bibkey>burkhardt-2012-fast</bibkey>
     </paper>
     <paper id="4">
-      <author><first>Peter</first><last>Spyns</last></author>
-      <author><first>Elisabeth</first><last>D’Halleweyn</last></author>
+      <author id="peter-spyns"><first>Peter</first><last>Spyns</last></author>
+      <author id="elisabeth-dhalleweyn"><first>Elisabeth</first><last>D’Halleweyn</last></author>
       <title>Smooth Sailing for <fixed-case>STEVIN</fixed-case></title>
       <pages>1021–1028</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/112_Paper.pdf</url>
@@ -65,7 +65,7 @@
       <bibkey>stein-usabaev-2012-automatic</bibkey>
     </paper>
     <paper id="6">
-      <author><first>Xabier</first><last>Saralegi</last></author>
+      <author id="xabier-saralegi"><first>Xabier</first><last>Saralegi</last></author>
       <author><first>Iker</first><last>Manterola</last></author>
       <author><first>Iñaki</first><last>San Vicente</last></author>
       <title>Building a <fixed-case>B</fixed-case>asque-<fixed-case>C</fixed-case>hinese Dictionary by Using <fixed-case>E</fixed-case>nglish as Pivot</title>
@@ -84,7 +84,7 @@
       <bibkey>tang-chen-2012-mining</bibkey>
     </paper>
     <paper id="8">
-      <author><first>Johanka</first><last>Spoustová</last></author>
+      <author id="drahomira-johanka-spoustova"><first>Johanka</first><last>Spoustová</last></author>
       <author><first>Miroslav</first><last>Spousta</last></author>
       <title>A High-Quality Web Corpus of <fixed-case>C</fixed-case>zech</title>
       <pages>311–315</pages>
@@ -102,7 +102,7 @@
     </paper>
     <paper id="10">
       <author><first>Diana</first><last>Maynard</last></author>
-      <author><first>Mark A.</first><last>Greenwood</last></author>
+      <author id="mark-a-greenwood"><first>Mark A.</first><last>Greenwood</last></author>
       <title>Large Scale Semantic Annotation, Indexing and Search at The National Archives</title>
       <pages>3487–3494</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/122_Paper.pdf</url>
@@ -110,8 +110,8 @@
       <bibkey>maynard-greenwood-2012-large</bibkey>
     </paper>
     <paper id="11">
-      <author><first>Abdul-Baquee</first><last>Sharaf</last></author>
-      <author><first>Eric</first><last>Atwell</last></author>
+      <author id="abdul-baquee-sharaf"><first>Abdul-Baquee</first><last>Sharaf</last></author>
+      <author id="eric-atwell"><first>Eric</first><last>Atwell</last></author>
       <title><fixed-case>Q</fixed-case>ur<fixed-case>A</fixed-case>na: Corpus of the <fixed-case>Q</fixed-case>uran annotated with Pronominal Anaphora</title>
       <pages>130–137</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/123_Paper.pdf</url>
@@ -120,7 +120,7 @@
     </paper>
     <paper id="12">
       <author><first>Christian</first><last>Scheible</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <title>Bootstrapping Sentiment Labels For Unannotated Documents With Polarity <fixed-case>P</fixed-case>age<fixed-case>R</fixed-case>ank</title>
       <pages>1230–1234</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/124_Paper.pdf</url>
@@ -145,9 +145,9 @@
     <paper id="14">
       <author><first>Iñaki</first><last>Sainz</last></author>
       <author><first>Daniel</first><last>Erro</last></author>
-      <author><first>Eva</first><last>Navas</last></author>
-      <author><first>Inma</first><last>Hernáez</last></author>
-      <author><first>Jon</first><last>Sanchez</last></author>
+      <author id="eva-navas"><first>Eva</first><last>Navas</last></author>
+      <author id="inmaculada-hernaez"><first>Inma</first><last>Hernáez</last></author>
+      <author id="jon-sanchez"><first>Jon</first><last>Sanchez</last></author>
       <author><first>Ibon</first><last>Saratxaga</last></author>
       <author><first>Igor</first><last>Odriozola</last></author>
       <title>Versatile Speech Databases for High Quality Synthesis for <fixed-case>B</fixed-case>asque</title>
@@ -157,10 +157,10 @@
       <bibkey>sainz-etal-2012-versatile</bibkey>
     </paper>
     <paper id="15">
-      <author><first>Hector</first><last>Llorens</last></author>
-      <author><first>Leon</first><last>Derczynski</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
-      <author><first>Estela</first><last>Saquete</last></author>
+      <author id="hector-llorens"><first>Hector</first><last>Llorens</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="estela-saquete"><first>Estela</first><last>Saquete</last></author>
       <title><fixed-case>TIMEN</fixed-case>: An Open Temporal Expression Normalisation Resource</title>
       <pages>3044–3051</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/128_Paper.pdf</url>
@@ -177,7 +177,7 @@
       <bibkey>brooke-hirst-2012-measuring</bibkey>
     </paper>
     <paper id="17">
-      <author><first>Patrick</first><last>Saint-Dizier</last></author>
+      <author id="patrick-saint-dizier"><first>Patrick</first><last>Saint-Dizier</last></author>
       <title><fixed-case>DISLOG</fixed-case>: A logic-based language for processing discourse structures</title>
       <pages>2770–2777</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/130_Paper.pdf</url>
@@ -198,7 +198,7 @@
     </paper>
     <paper id="19">
       <author><first>Sarah</first><last>Bourse</last></author>
-      <author><first>Patrick</first><last>Saint-Dizier</last></author>
+      <author id="patrick-saint-dizier"><first>Patrick</first><last>Saint-Dizier</last></author>
       <title>A Repository of Rules and Lexical Resources for Discourse Structure Analysis: the Case of Explanation Structures</title>
       <pages>2778–2785</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/137_Paper.pdf</url>
@@ -209,7 +209,7 @@
       <author><first>Flore</first><last>Barcellini</last></author>
       <author><first>Camille</first><last>Albert</last></author>
       <author><first>Corinne</first><last>Grosse</last></author>
-      <author><first>Patrick</first><last>Saint-Dizier</last></author>
+      <author id="patrick-saint-dizier"><first>Patrick</first><last>Saint-Dizier</last></author>
       <title>Risk Analysis and Prevention: <fixed-case>LELIE</fixed-case>, a Tool dedicated to Procedure and Requirement Authoring</title>
       <pages>698–705</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/139_Paper.pdf</url>
@@ -244,8 +244,8 @@
       <bibkey>patejuk-przepiorkowski-2012-towards</bibkey>
     </paper>
     <paper id="24">
-      <author><first>Kristiina</first><last>Jokinen</last></author>
-      <author><first>Graham</first><last>Wilcock</last></author>
+      <author id="kristiina-jokinen"><first>Kristiina</first><last>Jokinen</last></author>
+      <author id="graham-wilcock"><first>Graham</first><last>Wilcock</last></author>
       <title>Constructive Interaction for Talking about Interesting Topics</title>
       <pages>404–410</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/151_Paper.pdf</url>
@@ -255,8 +255,8 @@
     <paper id="25">
       <author><first>Hilder</first><last>Pereira</last></author>
       <author><first>Eder</first><last>Novais</last></author>
-      <author><first>André</first><last>Mariotti</last></author>
-      <author><first>Ivandré</first><last>Paraboni</last></author>
+      <author id="andre-mariotti"><first>André</first><last>Mariotti</last></author>
+      <author id="ivandre-paraboni"><first>Ivandré</first><last>Paraboni</last></author>
       <title>Corpus-based Referring Expressions Generation</title>
       <pages>4004–4009</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/152_Paper.pdf</url>
@@ -265,7 +265,7 @@
     </paper>
     <paper id="26">
       <author><first>Eder</first><last>Novais</last></author>
-      <author><first>Ivandré</first><last>Paraboni</last></author>
+      <author id="ivandre-paraboni"><first>Ivandré</first><last>Paraboni</last></author>
       <author><first>Douglas</first><last>Silva</last></author>
       <title><fixed-case>P</fixed-case>ortuguese Text Generation from Large Corpora</title>
       <pages>4010–4014</pages>
@@ -275,11 +275,11 @@
     </paper>
     <paper id="27">
       <author><first>Volha</first><last>Petukhova</last></author>
-      <author><first>Rodrigo</first><last>Agerri</last></author>
+      <author id="rodrigo-agerri"><first>Rodrigo</first><last>Agerri</last></author>
       <author><first>Mark</first><last>Fishel</last></author>
       <author><first>Sergio</first><last>Penkale</last></author>
       <author><first>Arantza</first><last>del Pozo</last></author>
-      <author><first>Mirjam Sepesy</first><last>Maučec</last></author>
+      <author id="mirjam-sepesy-maucec"><first>Mirjam Sepesy</first><last>Maučec</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <author><first>Panayota</first><last>Georgakopoulou</last></author>
       <author><first>Martin</first><last>Volk</last></author>
@@ -312,9 +312,9 @@
     </paper>
     <paper id="30">
       <author><first>Lieve</first><last>Macken</last></author>
-      <author><first>Veronique</first><last>Hoste</last></author>
-      <author><first>Mariëlle</first><last>Leijten</last></author>
-      <author><first>Luuk</first><last>Van Waes</last></author>
+      <author id="veronique-hoste"><first>Veronique</first><last>Hoste</last></author>
+      <author id="marielle-leijten"><first>Mariëlle</first><last>Leijten</last></author>
+      <author id="luuk-van-waes"><first>Luuk</first><last>Van Waes</last></author>
       <title>From keystrokes to annotated process data: Enriching the output of Inputlog with linguistic information</title>
       <pages>2224–2229</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/161_Paper.pdf</url>
@@ -323,7 +323,7 @@
     </paper>
     <paper id="31">
       <author><first>Verena</first><last>Henrich</last></author>
-      <author><first>Erhard</first><last>Hinrichs</last></author>
+      <author id="erhard-hinrichs"><first>Erhard</first><last>Hinrichs</last></author>
       <title>A Comparative Evaluation of Word Sense Disambiguation Algorithms for <fixed-case>G</fixed-case>erman</title>
       <pages>576–583</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/164_Paper.pdf</url>
@@ -344,7 +344,7 @@
       <bibkey>varges-etal-2012-semscribe</bibkey>
     </paper>
     <paper id="33">
-      <author><first>Erhard</first><last>Hinrichs</last></author>
+      <author id="erhard-hinrichs"><first>Erhard</first><last>Hinrichs</last></author>
       <author><first>Thomas</first><last>Zastrow</last></author>
       <title>Automatic Annotation and Manual Evaluation of the Diachronic <fixed-case>G</fixed-case>erman Corpus <fixed-case>T</fixed-case>ü<fixed-case>B</fixed-case>a-<fixed-case>D</fixed-case>/<fixed-case>DC</fixed-case></title>
       <pages>1622–1627</pages>
@@ -362,9 +362,9 @@
       <bibkey>joubert-lafourcade-2012-new</bibkey>
     </paper>
     <paper id="35">
-      <author><first>Hongsuck</first><last>Seo</last></author>
+      <author id="hongsuck-seo"><first>Hongsuck</first><last>Seo</last></author>
       <author><first>Kyusong</first><last>Lee</last></author>
-      <author><first>Gary Geunbae</first><last>Lee</last></author>
+      <author id="gary-geunbae-lee"><first>Gary Geunbae</first><last>Lee</last></author>
       <author><first>Soo-Ok</first><last>Kweon</last></author>
       <author><first>Hae-Ri</first><last>Kim</last></author>
       <title>Grammatical Error Annotation for <fixed-case>K</fixed-case>orean Learners of Spoken <fixed-case>E</fixed-case>nglish</title>
@@ -405,7 +405,7 @@
     </paper>
     <paper id="39">
       <author><first>Marta</first><last>Tatu</last></author>
-      <author><first>Dan</first><last>Moldovan</last></author>
+      <author id="dan-moldovan"><first>Dan</first><last>Moldovan</last></author>
       <title>A Tool for Extracting Conversational Implicatures</title>
       <pages>2708–2715</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/175_Paper.pdf</url>
@@ -413,7 +413,7 @@
       <bibkey>tatu-moldovan-2012-tool</bibkey>
     </paper>
     <paper id="40">
-      <author><first>Dan</first><last>Moldovan</last></author>
+      <author id="dan-moldovan"><first>Dan</first><last>Moldovan</last></author>
       <author><first>Eduardo</first><last>Blanco</last></author>
       <title><fixed-case>P</fixed-case>olaris: Lymba’s Semantic Parser</title>
       <pages>66–72</pages>
@@ -450,7 +450,7 @@
     </paper>
     <paper id="44">
       <author><first>Volha</first><last>Petukhova</last></author>
-      <author><first>Harry</first><last>Bunt</last></author>
+      <author id="harry-bunt"><first>Harry</first><last>Bunt</last></author>
       <title>The coding and annotation of multimodal dialogue acts</title>
       <pages>1293–1300</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/180_Paper.pdf</url>
@@ -496,8 +496,8 @@
     <paper id="49">
       <author><first>Piek</first><last>Vossen</last></author>
       <author><first>Attila</first><last>Görög</last></author>
-      <author><first>Rubén</first><last>Izquierdo</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="ruben-izquierdo"><first>Rubén</first><last>Izquierdo</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <title><fixed-case>D</fixed-case>utch<fixed-case>S</fixed-case>em<fixed-case>C</fixed-case>or: Targeting the ideal sense-tagged corpus</title>
       <pages>584–589</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/187_Paper.pdf</url>
@@ -514,8 +514,8 @@
       <bibkey>cartoni-meyer-2012-extracting</bibkey>
     </paper>
     <paper id="51">
-      <author><first>Abdul-Baquee</first><last>Sharaf</last></author>
-      <author><first>Eric</first><last>Atwell</last></author>
+      <author id="abdul-baquee-sharaf"><first>Abdul-Baquee</first><last>Sharaf</last></author>
+      <author id="eric-atwell"><first>Eric</first><last>Atwell</last></author>
       <title><fixed-case>Q</fixed-case>ur<fixed-case>S</fixed-case>im: A corpus for evaluation of relatedness in short texts</title>
       <pages>2295–2302</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/190_Paper.pdf</url>
@@ -546,7 +546,7 @@
     </paper>
     <paper id="54">
       <author><first>Jyrki</first><last>Niemi</last></author>
-      <author><first>Krister</first><last>Lindén</last></author>
+      <author id="krister-linden"><first>Krister</first><last>Lindén</last></author>
       <title>Representing the Translation Relation in a Bilingual <fixed-case>W</fixed-case>ordnet</title>
       <pages>2439–2446</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/194_Paper.pdf</url>
@@ -554,7 +554,7 @@
       <bibkey>niemi-linden-2012-representing</bibkey>
     </paper>
     <paper id="55">
-      <author><first>Marianna J.</first><last>Martindale</last></author>
+      <author id="marianna-martindale"><first>Marianna J.</first><last>Martindale</last></author>
       <title>Can Statistical Post-Editing with a Small Parallel Corpus Save a Weak <fixed-case>MT</fixed-case> Engine?</title>
       <pages>2138–2142</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/196_Paper.pdf</url>
@@ -562,7 +562,7 @@
       <bibkey>martindale-2012-statistical</bibkey>
     </paper>
     <paper id="56">
-      <author><first>Gülşen</first><last>Eryiğit</last></author>
+      <author id="gulsen-eryigit"><first>Gülşen</first><last>Eryiğit</last></author>
       <title>The Impact of Automatic Morphological Analysis &amp; Disambiguation on Dependency Parsing of <fixed-case>T</fixed-case>urkish</title>
       <pages>1960–1965</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/198_Paper.pdf</url>
@@ -594,7 +594,7 @@
       <author><first>Michael A.</first><last>Roach</last></author>
       <author><first>Joseph</first><last>Johnson</last></author>
       <author><first>Josh</first><last>Guthrie</last></author>
-      <author><first>Sanda M.</first><last>Harabagiu</last></author>
+      <author id="sanda-harabagiu"><first>Sanda M.</first><last>Harabagiu</last></author>
       <title><fixed-case>E</fixed-case>mpa<fixed-case>T</fixed-case>weet: Annotating and Detecting Emotions on <fixed-case>T</fixed-case>witter</title>
       <pages>3806–3813</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/201_Paper.pdf</url>
@@ -643,7 +643,7 @@
     </paper>
     <paper id="64">
       <author><first>Geneviève</first><last>Caelen-Haumont</last></author>
-      <author><first>Sethserey</first><last>Sam</last></author>
+      <author id="sethserey-sam"><first>Sethserey</first><last>Sam</last></author>
       <title>Comparison between two models of language for the automatic phonetic labeling of an undocumented language of the <fixed-case>S</fixed-case>outh-<fixed-case>A</fixed-case>sia: the case of <fixed-case>M</fixed-case>o <fixed-case>P</fixed-case>iu</title>
       <pages>956–962</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/208_Paper.pdf</url>
@@ -653,7 +653,7 @@
     <paper id="65">
       <author><first>Cristina</first><last>Bosco</last></author>
       <author><first>Manuela</first><last>Sanguinetti</last></author>
-      <author><first>Leonardo</first><last>Lesmo</last></author>
+      <author id="leonardo-lesmo"><first>Leonardo</first><last>Lesmo</last></author>
       <title>The Parallel-<fixed-case>TUT</fixed-case>: a multilingual and multiformat treebank</title>
       <pages>1932–1938</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/209_Paper.pdf</url>
@@ -683,7 +683,7 @@
     </paper>
     <paper id="68">
       <author><first>Markus</first><last>Forsberg</last></author>
-      <author><first>Torbjörn</first><last>Lager</last></author>
+      <author id="torbjorn-lager"><first>Torbjörn</first><last>Lager</last></author>
       <title>Cloud Logic Programming for Integrating Language Technology Resources</title>
       <pages>2935–2940</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/212_Paper.pdf</url>
@@ -691,7 +691,7 @@
       <bibkey>forsberg-lager-2012-cloud</bibkey>
     </paper>
     <paper id="69">
-      <author><first>Fabio</first><last>Tamburini</last></author>
+      <author id="fabio-tamburini"><first>Fabio</first><last>Tamburini</last></author>
       <author><first>Matias</first><last>Melandri</last></author>
       <title><fixed-case>A</fixed-case>n<fixed-case>I</fixed-case>ta: a powerful morphological analyser for <fixed-case>I</fixed-case>talian</title>
       <pages>941–947</pages>
@@ -701,8 +701,8 @@
     </paper>
     <paper id="70">
       <author><first>Sylvia</first><last>Springorum</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
-      <author><first>Antje</first><last>Roßdeutscher</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="antje-rossdeutscher"><first>Antje</first><last>Roßdeutscher</last></author>
       <title>Automatic classification of <fixed-case>G</fixed-case>erman <i>an</i> particle verbs</title>
       <pages>73–80</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/214_Paper.pdf</url>
@@ -710,10 +710,10 @@
       <bibkey>springorum-etal-2012-automatic</bibkey>
     </paper>
     <paper id="71">
-      <author><first>Roberta</first><last>Catizone</last></author>
-      <author><first>Louise</first><last>Guthrie</last></author>
+      <author id="roberta-catizone"><first>Roberta</first><last>Catizone</last></author>
+      <author id="louise-guthrie"><first>Louise</first><last>Guthrie</last></author>
       <author><first>Arthur</first><last>Thomas</last></author>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <title><fixed-case>LIE</fixed-case>: Leadership, Influence and Expertise</title>
       <pages>3692–3696</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/215_Paper.pdf</url>
@@ -721,9 +721,9 @@
       <bibkey>catizone-etal-2012-lie</bibkey>
     </paper>
     <paper id="72">
-      <author><first>Valentina</first><last>Bartalesi Lenzi</last></author>
+      <author id="valentina-bartalesi-lenzi"><first>Valentina</first><last>Bartalesi Lenzi</last></author>
       <author><first>Giovanni</first><last>Moretti</last></author>
-      <author><first>Rachele</first><last>Sprugnoli</last></author>
+      <author id="rachele-sprugnoli"><first>Rachele</first><last>Sprugnoli</last></author>
       <title><fixed-case>CAT</fixed-case>: the <fixed-case>CELCT</fixed-case> Annotation Tool</title>
       <pages>333–338</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/216_Paper.pdf</url>
@@ -734,7 +734,7 @@
       <author><first>Yulan</first><last>He</last></author>
       <author><first>Hassan</first><last>Saif</last></author>
       <author><first>Zhongyu</first><last>Wei</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <title>Quantising Opinions for Political Tweets Analysis</title>
       <pages>3901–3906</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/217_Paper.pdf</url>
@@ -744,8 +744,8 @@
     <paper id="74">
       <author><first>Radu</first><last>Ion</last></author>
       <author><first>Elena</first><last>Irimia</last></author>
-      <author><first>Dan</first><last>Ştefănescu</last></author>
-      <author><first>Dan</first><last>Tufiș</last></author>
+      <author id="dan-stefanescu"><first>Dan</first><last>Ştefănescu</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufiș</last></author>
       <title><fixed-case>ROMBAC</fixed-case>: The <fixed-case>R</fixed-case>omanian Balanced Annotated Corpus</title>
       <pages>339–344</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/218_Paper.pdf</url>
@@ -766,8 +766,8 @@
       <bibkey>konstantopoulos-etal-2012-task</bibkey>
     </paper>
     <paper id="76">
-      <author><first>Ismaïl</first><last>El Maarouf</last></author>
-      <author><first>Jeanne</first><last>Villaneau</last></author>
+      <author id="ismail-el-maarouf"><first>Ismaïl</first><last>El Maarouf</last></author>
+      <author id="jeanne-villaneau"><first>Jeanne</first><last>Villaneau</last></author>
       <title>A <fixed-case>F</fixed-case>rench Fairy Tale Corpus syntactically and semantically annotated</title>
       <pages>345–350</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/220_Paper.pdf</url>
@@ -776,7 +776,7 @@
     </paper>
     <paper id="77">
       <author><first>Roser</first><last>Morante</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <title><fixed-case>C</fixed-case>onan<fixed-case>D</fixed-case>oyle-neg: Annotation of negation cues and their scope in Conan Doyle stories</title>
       <pages>1563–1568</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/221_Paper.pdf</url>
@@ -796,8 +796,8 @@
       <bibkey>ploch-etal-2012-gerned</bibkey>
     </paper>
     <paper id="79">
-      <author><first>Silvia</first><last>Vázquez</last></author>
-      <author><first>Núria</first><last>Bel</last></author>
+      <author id="silvia-vazquez"><first>Silvia</first><last>Vázquez</last></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last></author>
       <title>A Classification of Adjectives for Polarity Lexicons Enhancement</title>
       <pages>3557–3561</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/223_Paper.pdf</url>
@@ -805,9 +805,9 @@
       <bibkey>vazquez-bel-2012-classification</bibkey>
     </paper>
     <paper id="80">
-      <author><first>Héctor Martínez</first><last>Alonso</last></author>
-      <author><first>Núria</first><last>Bel</last></author>
-      <author><first>Bolette Sandford</first><last>Pedersen</last></author>
+      <author id="hector-martinez-alonso"><first>Héctor Martínez</first><last>Alonso</last></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last></author>
+      <author id="bolette-sandford-pedersen"><first>Bolette Sandford</first><last>Pedersen</last></author>
       <title>A voting scheme to detect semantic underspecification</title>
       <pages>569–575</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/225_Paper.pdf</url>
@@ -817,7 +817,7 @@
     <paper id="81">
       <author><first>Aditi Sharma</first><last>Grover</last></author>
       <author><first>Annamart</first><last>Nieman</last></author>
-      <author><first>Gerhard</first><last>Van Huyssteen</last></author>
+      <author id="gerhard-b-van-huyssteen"><first>Gerhard</first><last>Van Huyssteen</last></author>
       <author><first>Justus</first><last>Roux</last></author>
       <title>Aspects of a Legal Framework for Language Resource Management</title>
       <pages>1035–1039</pages>
@@ -836,7 +836,7 @@
       <bibkey>polakova-etal-2012-interplay</bibkey>
     </paper>
     <paper id="83">
-      <author><first>Heiki-Jaan</first><last>Kaalep</last></author>
+      <author id="heiki-jaan-kaalep"><first>Heiki-Jaan</first><last>Kaalep</last></author>
       <author><first>Kadri</first><last>Muischnek</last></author>
       <title>Robust clause boundary identification for corpus annotation</title>
       <pages>1632–1636</pages>
@@ -894,9 +894,9 @@
       <bibkey>seeker-kuhn-2012-making</bibkey>
     </paper>
     <paper id="89">
-      <author><first>Jorge Carrillo</first><last>de Albornoz</last></author>
+      <author id="jorge-carrillo-de-albornoz"><first>Jorge Carrillo</first><last>de Albornoz</last></author>
       <author><first>Laura</first><last>Plaza</last></author>
-      <author><first>Pablo</first><last>Gervás</last></author>
+      <author id="pablo-gervas"><first>Pablo</first><last>Gervás</last></author>
       <title><fixed-case>S</fixed-case>enti<fixed-case>S</fixed-case>ense: An easily scalable concept-based affective lexicon for sentiment analysis</title>
       <pages>3562–3567</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/236_Paper.pdf</url>
@@ -915,7 +915,7 @@
     <paper id="91">
       <author><first>Majdi</first><last>Sawalha</last></author>
       <author><first>Claire</first><last>Brierley</last></author>
-      <author><first>Eric</first><last>Atwell</last></author>
+      <author id="eric-atwell"><first>Eric</first><last>Atwell</last></author>
       <title>Predicting Phrase Breaks in Classical and <fixed-case>M</fixed-case>odern <fixed-case>S</fixed-case>tandard <fixed-case>A</fixed-case>rabic Text</title>
       <pages>3868–3872</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/239_Paper.pdf</url>
@@ -925,7 +925,7 @@
     <paper id="92">
       <author><first>Claire</first><last>Brierley</last></author>
       <author><first>Majdi</first><last>Sawalha</last></author>
-      <author><first>Eric</first><last>Atwell</last></author>
+      <author id="eric-atwell"><first>Eric</first><last>Atwell</last></author>
       <title>Open-Source Boundary-Annotated Corpus for <fixed-case>A</fixed-case>rabic Speech and Language Processing</title>
       <pages>1011–1016</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/240_Paper.pdf</url>
@@ -946,7 +946,7 @@
       <author><first>Philip</first><last>Webster</last></author>
       <author><first>Victoria</first><last>Uren</last></author>
       <author><first>Andrea</first><last>Varga</last></author>
-      <author><first>Fabio</first><last>Ciravegna</last></author>
+      <author id="fabio-ciravegna"><first>Fabio</first><last>Ciravegna</last></author>
       <title>Automatically Extracting Procedural Knowledge from Instructional Texts using Natural Language Processing</title>
       <pages>520–527</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/244_Paper.pdf</url>
@@ -963,7 +963,7 @@
     </paper>
     <paper id="96">
       <author><first>Francisco</first><last>Costa</last></author>
-      <author><first>António</first><last>Branco</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
       <title><fixed-case>T</fixed-case>ime<fixed-case>B</fixed-case>ank<fixed-case>PT</fixed-case>: A <fixed-case>T</fixed-case>ime<fixed-case>ML</fixed-case> Annotated Corpus of <fixed-case>P</fixed-case>ortuguese</title>
       <pages>3727–3734</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/246_Paper.pdf</url>
@@ -972,7 +972,7 @@
     </paper>
     <paper id="97">
       <author><first>Sanni</first><last>Nimb</last></author>
-      <author><first>Bolette Sandford</first><last>Pedersen</last></author>
+      <author id="bolette-sandford-pedersen"><first>Bolette Sandford</first><last>Pedersen</last></author>
       <title>Towards a richer wordnet representation of properties</title>
       <pages>3452–3456</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/247_Paper.pdf</url>
@@ -1011,7 +1011,7 @@
       <bibkey>kulick-etal-2012-developments</bibkey>
     </paper>
     <paper id="101">
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <title>Turk Bootstrap Word Sense Inventory 2.0: A Large-Scale Resource for Lexical Substitution</title>
       <pages>4038–4042</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/252_Paper.pdf</url>
@@ -1029,7 +1029,7 @@
       <bibkey>alazard-etal-2012-multiphonia</bibkey>
     </paper>
     <paper id="103">
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <author><first>Thomas</first><last>Meyer</last></author>
       <author><first>Jeevanthi</first><last>Liyanapathirana</last></author>
       <author><first>Bruno</first><last>Cartoni</last></author>
@@ -1042,7 +1042,7 @@
     </paper>
     <paper id="104">
       <author><first>Bonan</first><last>Min</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <title>Challenges in the Knowledge Base Population Slot Filling Task</title>
       <pages>1137–1142</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/256_Paper.pdf</url>
@@ -1051,7 +1051,7 @@
     </paper>
     <paper id="105">
       <author><first>Alessandra</first><last>Zarcone</last></author>
-      <author><first>Stefan</first><last>Rued</last></author>
+      <author id="stefan-rued"><first>Stefan</first><last>Rued</last></author>
       <title>Logical metonymies and qualia structures: an annotated database of logical metonymies for <fixed-case>G</fixed-case>erman</title>
       <pages>1799–1804</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/259_Paper.pdf</url>
@@ -1061,7 +1061,7 @@
     <paper id="106">
       <author><first>Martina Katalin</first><last>Szabó</last></author>
       <author><first>Veronika</first><last>Vincze</last></author>
-      <author><first>István</first><last>Nagy T.</last></author>
+      <author id="istvan-nagy-t"><first>István</first><last>Nagy T.</last></author>
       <title><fixed-case>H</fixed-case>un<fixed-case>O</fixed-case>r: A <fixed-case>H</fixed-case>ungarian—<fixed-case>R</fixed-case>ussian Parallel Corpus</title>
       <pages>2453–2458</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/262_Paper.pdf</url>
@@ -1081,7 +1081,7 @@
     </paper>
     <paper id="108">
       <author><first>Elena</first><last>Volodina</last></author>
-      <author><first>Sofie Johansson</first><last>Kokkinakis</last></author>
+      <author id="sofie-johansson-kokkinakis"><first>Sofie Johansson</first><last>Kokkinakis</last></author>
       <title>Introducing the <fixed-case>S</fixed-case>wedish Kelly-list, a new lexical e-resource for <fixed-case>S</fixed-case>wedish</title>
       <pages>1040–1046</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/264_Paper.pdf</url>
@@ -1089,8 +1089,8 @@
       <bibkey>volodina-kokkinakis-2012-introducing</bibkey>
     </paper>
     <paper id="109">
-      <author><first>Valentin I.</first><last>Spitkovsky</last></author>
-      <author><first>Angel X.</first><last>Chang</last></author>
+      <author id="valentin-i-spitkovsky"><first>Valentin I.</first><last>Spitkovsky</last></author>
+      <author id="angel-chang"><first>Angel X.</first><last>Chang</last></author>
       <title>A Cross-Lingual Dictionary for <fixed-case>E</fixed-case>nglish <fixed-case>W</fixed-case>ikipedia Concepts</title>
       <pages>3168–3175</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/266_Paper.pdf</url>
@@ -1099,7 +1099,7 @@
     </paper>
     <paper id="110">
       <author><first>Martin</first><last>Majliš</last></author>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
       <title>Language Richness of the Web</title>
       <pages>2927–2934</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/267_Paper.pdf</url>
@@ -1127,7 +1127,7 @@
     </paper>
     <paper id="113">
       <author><first>Sigrid</first><last>Klerke</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <title><fixed-case>DS</fixed-case>im, a <fixed-case>D</fixed-case>anish Parallel Corpus for Text Simplification</title>
       <pages>4015–4018</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/270_Paper.pdf</url>
@@ -1135,8 +1135,8 @@
       <bibkey>klerke-sogaard-2012-dsim</bibkey>
     </paper>
     <paper id="114">
-      <author><first>Magali Sanches</first><last>Duran</last></author>
-      <author><first>Sandra Maria</first><last>Aluísio</last></author>
+      <author id="magali-sanches-duran"><first>Magali Sanches</first><last>Duran</last></author>
+      <author id="sandra-aluisio"><first>Sandra Maria</first><last>Aluísio</last></author>
       <title><fixed-case>P</fixed-case>ropbank-Br: a <fixed-case>B</fixed-case>razilian Treebank annotated with semantic role labels</title>
       <pages>1862–1867</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/272_Paper.pdf</url>
@@ -1157,7 +1157,7 @@
       <author><first>Aline</first><last>Villavicencio</last></author>
       <author><first>Beracah</first><last>Yankama</last></author>
       <author><first>Marco</first><last>Idiart</last></author>
-      <author><first>Robert</first><last>Berwick</last></author>
+      <author id="robert-c-berwick"><first>Robert</first><last>Berwick</last></author>
       <title>A large scale annotated child language construction database</title>
       <pages>2370–2374</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/276_Paper.pdf</url>
@@ -1166,10 +1166,10 @@
     </paper>
     <paper id="117">
       <author><first>Xuansong</first><last>Li</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <author><first>Stephen</first><last>Grimes</last></author>
       <author><first>Safa</first><last>Ismael</last></author>
-      <author><first>Mohamed</first><last>Maamouri</last></author>
+      <author id="mohamed-maamouri"><first>Mohamed</first><last>Maamouri</last></author>
       <author><first>Ann</first><last>Bies</last></author>
       <author><first>Nianwen</first><last>Xue</last></author>
       <title>Parallel Aligned Treebanks at <fixed-case>LDC</fixed-case>: New Challenges Interfacing Existing Infrastructures</title>
@@ -1180,7 +1180,7 @@
     </paper>
     <paper id="118">
       <author><first>Xuansong</first><last>Li</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <author><first>Heng</first><last>Ji</last></author>
       <author><first>Kira</first><last>Griffitt</last></author>
       <author><first>Joe</first><last>Ellis</last></author>
@@ -1205,7 +1205,7 @@
       <author><first>Shota</first><last>Yamasaki</last></author>
       <author><first>Hirohisa</first><last>Furukawa</last></author>
       <author><first>Masafumi</first><last>Nishida</last></author>
-      <author><first>Kristiina</first><last>Jokinen</last></author>
+      <author id="kristiina-jokinen"><first>Kristiina</first><last>Jokinen</last></author>
       <author><first>Seiichi</first><last>Yamamoto</last></author>
       <title>Multimodal Corpus of Multi-party Conversations in Second Language</title>
       <pages>416–421</pages>
@@ -1214,7 +1214,7 @@
       <bibkey>yamasaki-etal-2012-multimodal</bibkey>
     </paper>
     <paper id="121">
-      <author><first>Satoshi</first><last>Sato</last></author>
+      <author id="satoshi-sato"><first>Satoshi</first><last>Sato</last></author>
       <title>Dictionary Look-up with Katakana Variant Recognition</title>
       <pages>249–255</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/282_Paper.pdf</url>
@@ -1222,8 +1222,8 @@
       <bibkey>sato-2012-dictionary</bibkey>
     </paper>
     <paper id="122">
-      <author><first>Angel X.</first><last>Chang</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="angel-chang"><first>Angel X.</first><last>Chang</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <title><fixed-case>SUT</fixed-case>ime: A library for recognizing and normalizing time expressions</title>
       <pages>3735–3740</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/284_Paper.pdf</url>
@@ -1245,7 +1245,7 @@
     </paper>
     <paper id="124">
       <author><first>Eleanor</first><last>Clark</last></author>
-      <author><first>Kenji</first><last>Araki</last></author>
+      <author id="kenji-araki"><first>Kenji</first><last>Araki</last></author>
       <title>Two Database Resources for Processing Social Media <fixed-case>E</fixed-case>nglish Text</title>
       <pages>3790–3793</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/288_Paper.pdf</url>
@@ -1255,7 +1255,7 @@
     <paper id="125">
       <author><first>Maristella</first><last>Agosti</last></author>
       <author><first>Birgit</first><last>Alber</last></author>
-      <author><first>Giorgio Maria</first><last>Di Nunzio</last></author>
+      <author id="giorgio-maria-di-nunzio"><first>Giorgio Maria</first><last>Di Nunzio</last></author>
       <author><first>Marco</first><last>Dussin</last></author>
       <author><first>Stefan</first><last>Rabanus</last></author>
       <author><first>Alessandra</first><last>Tomaselli</last></author>
@@ -1266,7 +1266,7 @@
       <bibkey>agosti-etal-2012-curated</bibkey>
     </paper>
     <paper id="126">
-      <author><first>Alexandros</first><last>Papangelis</last></author>
+      <author id="alexandros-papangelis"><first>Alexandros</first><last>Papangelis</last></author>
       <author><first>Vangelis</first><last>Karkaletsis</last></author>
       <author><first>Fillia</first><last>Makedon</last></author>
       <title>Evaluation of Online Dialogue Policy Learning Techniques</title>
@@ -1278,11 +1278,11 @@
     <paper id="127">
       <author><first>Anoop</first><last>Kunchukuttan</last></author>
       <author><first>Shourya</first><last>Roy</last></author>
-      <author><first>Pratik</first><last>Patel</last></author>
+      <author id="pratikkumar-patel"><first>Pratik</first><last>Patel</last></author>
       <author><first>Kushal</first><last>Ladha</last></author>
       <author><first>Somya</first><last>Gupta</last></author>
-      <author><first>Mitesh M.</first><last>Khapra</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh M.</first><last>Khapra</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <title>Experiences in Resource Generation for Machine Translation through Crowdsourcing</title>
       <pages>384–391</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/292_Paper.pdf</url>
@@ -1290,9 +1290,9 @@
       <bibkey>kunchukuttan-etal-2012-experiences</bibkey>
     </paper>
     <paper id="128">
-      <author><first>Aitor</first><last>Gonzalez-Agirre</last></author>
+      <author id="aitor-gonzalez-agirre"><first>Aitor</first><last>Gonzalez-Agirre</last></author>
       <author><first>Egoitz</first><last>Laparra</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <title>Multilingual Central Repository version 3.0</title>
       <pages>2525–2529</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/293_Paper.pdf</url>
@@ -1303,7 +1303,7 @@
       <author><first>Eleftherios</first><last>Avramidis</last></author>
       <author><first>Aljoscha</first><last>Burchardt</last></author>
       <author><first>Christian</first><last>Federmann</last></author>
-      <author><first>Maja</first><last>Popović</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
       <author><first>Cindy</first><last>Tscherwinka</last></author>
       <author><first>David</first><last>Vilar</last></author>
       <title>Involving Language Professionals in the Evaluation of Machine Translation</title>
@@ -1313,10 +1313,10 @@
       <bibkey>avramidis-etal-2012-involving</bibkey>
     </paper>
     <paper id="130">
-      <author><first>Paola</first><last>Velardi</last></author>
+      <author id="paola-velardi"><first>Paola</first><last>Velardi</last></author>
       <author><first>Roberto</first><last>Navigli</last></author>
       <author><first>Stefano</first><last>Faralli</last></author>
-      <author><first>Juana Maria</first><last>Ruiz Martinez</last></author>
+      <author id="juana-maria-ruiz-martinez"><first>Juana Maria</first><last>Ruiz Martinez</last></author>
       <title>A New Method for Evaluating Automatically Learned Terminological Taxonomies</title>
       <pages>1498–1504</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/295_Paper.pdf</url>
@@ -1326,7 +1326,7 @@
     <paper id="131">
       <author><first>Gloria</first><last>Gagliardi</last></author>
       <author><first>Edoardo Lombardi</first><last>Vallauri</last></author>
-      <author><first>Fabio</first><last>Tamburini</last></author>
+      <author id="fabio-tamburini"><first>Fabio</first><last>Tamburini</last></author>
       <title>A topologic view of Topic and Focus marking in <fixed-case>I</fixed-case>talian</title>
       <pages>948–955</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/296_Paper.pdf</url>
@@ -1345,7 +1345,7 @@
       <bibkey>ghosh-etal-2012-improving</bibkey>
     </paper>
     <paper id="133">
-      <author><first>Verginica Barbu</first><last>Mititelu</last></author>
+      <author id="verginica-barbu-mititelu"><first>Verginica Barbu</first><last>Mititelu</last></author>
       <title>Adding Morpho-semantic Relations to the <fixed-case>R</fixed-case>omanian <fixed-case>W</fixed-case>ordnet</title>
       <pages>2596–2601</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/299_Paper.pdf</url>
@@ -1353,9 +1353,9 @@
       <bibkey>mititelu-2012-adding</bibkey>
     </paper>
     <paper id="134">
-      <author><first>Ioana</first><last>Vasilescu</last></author>
-      <author><first>Martine</first><last>Adda-Decker</last></author>
-      <author><first>Lori</first><last>Lamel</last></author>
+      <author id="ioana-vasilescu"><first>Ioana</first><last>Vasilescu</last></author>
+      <author id="martine-adda-decker"><first>Martine</first><last>Adda-Decker</last></author>
+      <author id="lori-lamel"><first>Lori</first><last>Lamel</last></author>
       <title>Cross-lingual studies of <fixed-case>ASR</fixed-case> errors: paradigms for perceptual evaluations</title>
       <pages>3511–3518</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/300_Paper.pdf</url>
@@ -1363,8 +1363,8 @@
       <bibkey>vasilescu-etal-2012-cross</bibkey>
     </paper>
     <paper id="135">
-      <author><first>Karin Friberg</first><last>Heppin</last></author>
-      <author><first>Maria Toporowska</first><last>Gronostaj</last></author>
+      <author id="karin-friberg-heppin"><first>Karin Friberg</first><last>Heppin</last></author>
+      <author id="maria-toporowska-gronostaj"><first>Maria Toporowska</first><last>Gronostaj</last></author>
       <title>The Rocky Road towards a <fixed-case>S</fixed-case>wedish <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et - Creating <fixed-case>S</fixed-case>we<fixed-case>FN</fixed-case></title>
       <pages>256–261</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/301_Paper.pdf</url>
@@ -1383,7 +1383,7 @@
     </paper>
     <paper id="137">
       <author><first>Przemyslaw</first><last>Lenkiewicz</last></author>
-      <author><first>Binyam Gebrekidan</first><last>Gebre</last></author>
+      <author id="binyam-gebrekidan-gebre"><first>Binyam Gebrekidan</first><last>Gebre</last></author>
       <author><first>Oliver</first><last>Schreer</last></author>
       <author><first>Stefano</first><last>Masneri</last></author>
       <author><first>Daniel</first><last>Schneider</last></author>
@@ -1403,10 +1403,10 @@
       <bibkey>seretan-2012-acquisition</bibkey>
     </paper>
     <paper id="139">
-      <author><first>K</first><last>Saravanan</last></author>
+      <author id="k-saravanan"><first>K</first><last>Saravanan</last></author>
       <author><first>Monojit</first><last>Choudhury</last></author>
-      <author><first>Raghavendra</first><last>Udupa</last></author>
-      <author><first>A</first><last>Kumaran</last></author>
+      <author id="raghavendra-udupa"><first>Raghavendra</first><last>Udupa</last></author>
+      <author id="a-kumaran"><first>A</first><last>Kumaran</last></author>
       <title>An Empirical Study of the Occurrence and Co-Occurrence of Named Entities in Natural Language Corpora</title>
       <pages>3118–3125</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/305_Paper.pdf</url>
@@ -1426,7 +1426,7 @@
     <paper id="141">
       <author><first>Carlos</first><last>Morell</last></author>
       <author><first>Jorge</first><last>Vivaldi</last></author>
-      <author><first>Núria</first><last>Bel</last></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last></author>
       <title><fixed-case>I</fixed-case>ula2<fixed-case>S</fixed-case>tandoff: a tool for creating standoff documents for the <fixed-case>IULACT</fixed-case></title>
       <pages>351–356</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/307_Paper.pdf</url>
@@ -1436,7 +1436,7 @@
     <paper id="142">
       <author><first>André</first><last>Bittar</last></author>
       <author><first>Caroline</first><last>Hagège</last></author>
-      <author><first>Véronique</first><last>Moriceau</last></author>
+      <author id="veronique-moriceau"><first>Véronique</first><last>Moriceau</last></author>
       <author><first>Xavier</first><last>Tannier</last></author>
       <author><first>Charles</first><last>Teissèdre</last></author>
       <title>Temporal Annotation: A Proposal for Guidelines and an Experiment with Inter-annotator Agreement</title>
@@ -1457,7 +1457,7 @@
     </paper>
     <paper id="144">
       <author><first>Patrick</first><last>Ziering</last></author>
-      <author><first>Sina</first><last>Zarrieß</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
       <author><first>Jonas</first><last>Kuhn</last></author>
       <title>A Corpus-based Study of the <fixed-case>G</fixed-case>erman Recipient Passive</title>
       <pages>1637–1644</pages>
@@ -1467,7 +1467,7 @@
     </paper>
     <paper id="145">
       <author><first>Tom</first><last>De Smedt</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <title>“Vreselijk mooi!” (terribly beautiful): A Subjectivity Lexicon for <fixed-case>D</fixed-case>utch Adjectives.</title>
       <pages>3568–3572</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/312_Paper.pdf</url>
@@ -1485,7 +1485,7 @@
     </paper>
     <paper id="147">
       <author><first>Xavier</first><last>Tannier</last></author>
-      <author><first>Véronique</first><last>Moriceau</last></author>
+      <author id="veronique-moriceau"><first>Véronique</first><last>Moriceau</last></author>
       <author><first>Béatrice</first><last>Arnulphy</last></author>
       <author><first>Ruixin</first><last>He</last></author>
       <title>Evolution of Event Designation in Media: Preliminary Study</title>
@@ -1495,12 +1495,12 @@
       <bibkey>tannier-etal-2012-evolution</bibkey>
     </paper>
     <paper id="148">
-      <author><first>Anselmo</first><last>Peñas</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="anselmo-penas"><first>Anselmo</first><last>Peñas</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <author><first>Pamela</first><last>Forner</last></author>
-      <author><first>Álvaro</first><last>Rodrigo</last></author>
+      <author id="alvaro-rodrigo"><first>Álvaro</first><last>Rodrigo</last></author>
       <author><first>Richard</first><last>Sutcliffe</last></author>
-      <author><first>Corina</first><last>Forascu</last></author>
+      <author id="corina-forascu"><first>Corina</first><last>Forascu</last></author>
       <author><first>Caroline</first><last>Sporleder</last></author>
       <title>Evaluating Machine Reading Systems through Comprehension Tests</title>
       <pages>1143–1147</pages>
@@ -1511,7 +1511,7 @@
     <paper id="149">
       <author><first>Xinkai</first><last>Wang</last></author>
       <author><first>Paul</first><last>Thompson</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
       <title>Biomedical <fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish <fixed-case>CLIR</fixed-case> Using an Extended <fixed-case>CM</fixed-case>e<fixed-case>SH</fixed-case> Resource to Expand Queries</title>
       <pages>1148–1155</pages>
@@ -1520,9 +1520,9 @@
       <bibkey>wang-etal-2012-biomedical</bibkey>
     </paper>
     <paper id="150">
-      <author><first>Aitor</first><last>González-Agirre</last></author>
+      <author id="aitor-gonzalez-agirre"><first>Aitor</first><last>González-Agirre</last></author>
       <author><first>Mauro</first><last>Castillo</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <title>A proposal for improving <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Domains</title>
       <pages>3457–3462</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/319_Paper.pdf</url>
@@ -1530,7 +1530,7 @@
       <bibkey>gonzalez-agirre-etal-2012-proposal</bibkey>
     </paper>
     <paper id="151">
-      <author><first>Henk</first><last>van den Heuvel</last></author>
+      <author id="henk-van-den-heuvel"><first>Henk</first><last>van den Heuvel</last></author>
       <author><first>Eric</first><last>Sanders</last></author>
       <author><first>Robin</first><last>Rutten</last></author>
       <author><first>Stef</first><last>Scagliola</last></author>
@@ -1557,7 +1557,7 @@
     <paper id="153">
       <author><first>Juan Pablo Martínez</first><last>Cortés</last></author>
       <author><first>Jim</first><last>O’Regan</last></author>
-      <author><first>Francis</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last></author>
       <title>Free/Open Source Shallow-Transfer Based Machine Translation for <fixed-case>S</fixed-case>panish and <fixed-case>A</fixed-case>ragonese</title>
       <pages>2153–2157</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/326_Paper.pdf</url>
@@ -1575,8 +1575,8 @@
       <bibkey>goldhahn-etal-2012-building</bibkey>
     </paper>
     <paper id="155">
-      <author><first>Thomas Ulrich</first><last>Christiansen</last></author>
-      <author><first>Peter Juel</first><last>Henrichsen</last></author>
+      <author id="thomas-ulrich-christiansen"><first>Thomas Ulrich</first><last>Christiansen</last></author>
+      <author id="peter-juel-henrichsen"><first>Peter Juel</first><last>Henrichsen</last></author>
       <title>Sense Meets Nonsense - Sense Meets Nonsense - a dual-layer <fixed-case>D</fixed-case>anish speech corpus for perception studies</title>
       <pages>3356–3361</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/330_Paper.pdf</url>
@@ -1584,9 +1584,9 @@
       <bibkey>christiansen-henrichsen-2012-sense</bibkey>
     </paper>
     <paper id="156">
-      <author><first>Lluís-F.</first><last>Hurtado</last></author>
-      <author><first>Fernando</first><last>García</last></author>
-      <author><first>Emilio</first><last>Sanchis</last></author>
+      <author id="lluis-f-hurtado"><first>Lluís-F.</first><last>Hurtado</last></author>
+      <author id="fernando-garcia"><first>Fernando</first><last>García</last></author>
+      <author id="emilio-sanchis"><first>Emilio</first><last>Sanchis</last></author>
       <author><first>Encarna</first><last>Segarra</last></author>
       <title>The acquisition and dialog act labeling of the <fixed-case>EDECAN</fixed-case>-<fixed-case>SPORTS</fixed-case> corpus</title>
       <pages>1416–1420</pages>
@@ -1616,7 +1616,7 @@
     <paper id="159">
       <author><first>Christian</first><last>Smith</last></author>
       <author><first>Henrik</first><last>Danielsson</last></author>
-      <author><first>Arne</first><last>Jönsson</last></author>
+      <author id="arne-jonsson"><first>Arne</first><last>Jönsson</last></author>
       <title>A good space: Lexical predictors in word space evaluation</title>
       <pages>2530–2535</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/335_Paper.pdf</url>
@@ -1625,10 +1625,10 @@
     </paper>
     <paper id="160">
       <author><first>Jan</first><last>Berka</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <author><first>Mark</first><last>Fishel</last></author>
-      <author><first>Maja</first><last>Popović</last></author>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <title>Automatic <fixed-case>MT</fixed-case> Error Analysis: Hjerson Helping Addicter</title>
       <pages>2158–2163</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/336_Paper.pdf</url>
@@ -1637,7 +1637,7 @@
     </paper>
     <paper id="161">
       <author><first>Daniele</first><last>Pighin</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <author><first>Jonathan</first><last>May</last></author>
       <title>An Analysis (and an Annotated Corpus) of User Responses to Machine Translation Output</title>
       <pages>1131–1136</pages>
@@ -1647,7 +1647,7 @@
     </paper>
     <paper id="162">
       <author><first>Mojgan</first><last>Seraji</last></author>
-      <author><first>Beáta</first><last>Megyesi</last></author>
+      <author id="beata-megyesi"><first>Beáta</first><last>Megyesi</last></author>
       <author><first>Joakim</first><last>Nivre</last></author>
       <title>A Basic Language Resource Kit for <fixed-case>P</fixed-case>ersian</title>
       <pages>2245–2252</pages>
@@ -1657,7 +1657,7 @@
     </paper>
     <paper id="163">
       <author><first>Amit</first><last>Sangodkar</last></author>
-      <author><first>Om</first><last>Damani</last></author>
+      <author id="om-p-damani"><first>Om</first><last>Damani</last></author>
       <title>Re-ordering Source Sentences for <fixed-case>SMT</fixed-case></title>
       <pages>2164–2171</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/340_Paper.pdf</url>
@@ -1666,7 +1666,7 @@
     </paper>
     <paper id="164">
       <author><first>Alex</first><last>Judea</last></author>
-      <author><first>Vivi</first><last>Nastase</last></author>
+      <author id="vivi-nastase"><first>Vivi</first><last>Nastase</last></author>
       <author><first>Michael</first><last>Strube</last></author>
       <title>Concept-based Selectional Preferences and Distributional Representations from <fixed-case>W</fixed-case>ikipedia Articles</title>
       <pages>2985–2990</pages>
@@ -1675,7 +1675,7 @@
       <bibkey>judea-etal-2012-concept</bibkey>
     </paper>
     <paper id="165">
-      <author><first>Behrang</first><last>QasemiZadeh</last></author>
+      <author id="behrang-qasemizadeh"><first>Behrang</first><last>QasemiZadeh</last></author>
       <author><first>Paul</first><last>Buitelaar</last></author>
       <author><first>Tianqi</first><last>Chen</last></author>
       <author><first>Georgeta</first><last>Bordea</last></author>
@@ -1687,9 +1687,9 @@
     </paper>
     <paper id="166">
       <author><first>Olivier</first><last>Galibert</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <author><first>Cyril</first><last>Grouin</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <author><first>Ludovic</first><last>Quintard</last></author>
       <title>Extended Named Entities Annotation on <fixed-case>OCR</fixed-case>ed Documents: From Corpus Constitution to Evaluation Campaign</title>
       <pages>3126–3131</pages>
@@ -1716,8 +1716,8 @@
       <bibkey>vetulani-2012-wordnet</bibkey>
     </paper>
     <paper id="169">
-      <author><first>Annette</first><last>Rios</last></author>
-      <author><first>Anne</first><last>Göhring</last></author>
+      <author id="annette-rios-gonzales"><first>Annette</first><last>Rios</last></author>
+      <author id="anne-gohring"><first>Anne</first><last>Göhring</last></author>
       <title>A tree is a <fixed-case>B</fixed-case>aum is an árbol is a sach’a: Creating a trilingual treebank</title>
       <pages>1874–1879</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/350_Paper.pdf</url>
@@ -1726,7 +1726,7 @@
     </paper>
     <paper id="170">
       <author><first>Kseniya</first><last>Zablotskaya</last></author>
-      <author><first>Fernando Fernández</first><last>Martínez</last></author>
+      <author id="fernando-fernandez-martinez"><first>Fernando Fernández</first><last>Martínez</last></author>
       <author><first>Wolfgang</first><last>Minker</last></author>
       <title>Investigating Verbal Intelligence Using the <fixed-case>TF</fixed-case>-<fixed-case>IDF</fixed-case> Approach</title>
       <pages>1573–1576</pages>
@@ -1737,7 +1737,7 @@
     <paper id="171">
       <author><first>Kseniya</first><last>Zablotskaya</last></author>
       <author><first>Umair</first><last>Rahim</last></author>
-      <author><first>Fernando Fernández</first><last>Martínez</last></author>
+      <author id="fernando-fernandez-martinez"><first>Fernando Fernández</first><last>Martínez</last></author>
       <author><first>Wolfgang</first><last>Minker</last></author>
       <title>Relating Dominance of Dialogue Participants with their Verbal Intelligence Scores</title>
       <pages>1289–1292</pages>
@@ -1746,8 +1746,8 @@
       <bibkey>zablotskaya-etal-2012-relating</bibkey>
     </paper>
     <paper id="172">
-      <author><first>Sanja</first><last>Štajner</last></author>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="sanja-stajner"><first>Sanja</first><last>Štajner</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <title>Diachronic Changes in Text Complexity in 20th Century <fixed-case>E</fixed-case>nglish Language: An <fixed-case>NLP</fixed-case> Approach</title>
       <pages>1577–1584</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/355_Paper.pdf</url>
@@ -1755,11 +1755,11 @@
       <bibkey>stajner-mitkov-2012-diachronic</bibkey>
     </paper>
     <paper id="173">
-      <author><first>Ângela</first><last>Costa</last></author>
+      <author id="angela-costa"><first>Ângela</first><last>Costa</last></author>
       <author><first>Tiago</first><last>Luís</last></author>
       <author><first>Joana</first><last>Ribeiro</last></author>
-      <author><first>Ana Cristina</first><last>Mendes</last></author>
-      <author><first>Luísa</first><last>Coheur</last></author>
+      <author id="ana-cristina-mendes"><first>Ana Cristina</first><last>Mendes</last></author>
+      <author id="luisa-coheur"><first>Luísa</first><last>Coheur</last></author>
       <title>An <fixed-case>E</fixed-case>nglish-<fixed-case>P</fixed-case>ortuguese parallel corpus of questions: translation guidelines and application in <fixed-case>SMT</fixed-case></title>
       <pages>2172–2176</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/356_Paper.pdf</url>
@@ -1767,8 +1767,8 @@
       <bibkey>costa-etal-2012-english</bibkey>
     </paper>
     <paper id="174">
-      <author><first>Peter Juel</first><last>Henrichsen</last></author>
-      <author><first>Marcus</first><last>Uneson</last></author>
+      <author id="peter-juel-henrichsen"><first>Peter Juel</first><last>Henrichsen</last></author>
+      <author id="marcus-uneson"><first>Marcus</first><last>Uneson</last></author>
       <title><fixed-case>SMALLW</fixed-case>orlds – Multilingual Content-Controlled Monologues</title>
       <pages>3362–3368</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/357_Paper.pdf</url>
@@ -1808,7 +1808,7 @@
     <paper id="178">
       <author><first>Maria</first><last>Aloni</last></author>
       <author><first>Andreas</first><last>van Cranenburgh</last></author>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <author><first>Marta</first><last>Sznajder</last></author>
       <title>Building a Corpus of Indefinite Uses Annotated with Fine-grained Semantic Functions</title>
       <pages>1511–1515</pages>
@@ -1827,7 +1827,7 @@
       <bibkey>gupta-etal-2012-mining</bibkey>
     </paper>
     <paper id="180">
-      <author><first>Marie-Claude</first><last>L’Homme</last></author>
+      <author id="marie-claude-lhomme"><first>Marie-Claude</first><last>L’Homme</last></author>
       <author><first>Janine</first><last>Pimentel</last></author>
       <title>Capturing syntactico-semantic regularities among terms: An application of the <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et methodology to terminology</title>
       <pages>262–268</pages>
@@ -1837,8 +1837,8 @@
     </paper>
     <paper id="181">
       <author><first>Daniele</first><last>Pighin</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
-      <author><first>Lluís</first><last>Formiga</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
+      <author id="lluis-formiga"><first>Lluís</first><last>Formiga</last></author>
       <title>The <fixed-case>FAUST</fixed-case> Corpus of Adequacy Assessments for Real-World Machine Translation Output</title>
       <pages>29–35</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/370_Paper.pdf</url>
@@ -1848,7 +1848,7 @@
     <paper id="182">
       <author><first>Steven</first><last>Bethard</last></author>
       <author><first>Oleksandr</first><last>Kolomiyets</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <title>Annotating Story Timelines as Temporal Dependency Structures</title>
       <pages>2721–2726</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/371_Paper.pdf</url>
@@ -1867,13 +1867,13 @@
       <bibkey>seinturier-etal-2012-ontological</bibkey>
     </paper>
     <paper id="184">
-      <author><first>António</first><last>Branco</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
       <author><first>Catarina</first><last>Carvalheiro</last></author>
       <author><first>Sílvia</first><last>Pereira</last></author>
       <author><first>Sara</first><last>Silveira</last></author>
-      <author><first>João</first><last>Silva</last></author>
+      <author id="joao-silva"><first>João</first><last>Silva</last></author>
       <author><first>Sérgio</first><last>Castro</last></author>
-      <author><first>João</first><last>Graça</last></author>
+      <author id="joao-graca"><first>João</first><last>Graça</last></author>
       <title>A <fixed-case>P</fixed-case>rop<fixed-case>B</fixed-case>ank for <fixed-case>P</fixed-case>ortuguese: the <fixed-case>CINTIL</fixed-case>-<fixed-case>P</fixed-case>rop<fixed-case>B</fixed-case>ank</title>
       <pages>1516–1521</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/373_Paper.pdf</url>
@@ -1882,8 +1882,8 @@
     </paper>
     <paper id="185">
       <author><first>Montse</first><last>Cuadros</last></author>
-      <author><first>Lluís</first><last>Padró</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="lluis-padro"><first>Lluís</first><last>Padró</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <title>Highlighting relevant concepts from Topic Signatures</title>
       <pages>3841–3848</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/374_Paper.pdf</url>
@@ -1891,9 +1891,9 @@
       <bibkey>cuadros-etal-2012-highlighting</bibkey>
     </paper>
     <paper id="186">
-      <author><first>Ranka</first><last>Stanković</last></author>
+      <author id="ranka-stankovic"><first>Ranka</first><last>Stanković</last></author>
       <author><first>Cvetana</first><last>Krstev</last></author>
-      <author><first>Ivan</first><last>Obradović</last></author>
+      <author id="ivan-obradovic"><first>Ivan</first><last>Obradović</last></author>
       <author><first>Aleksandra</first><last>Trtovac</last></author>
       <author><first>Miloš</first><last>Utvić</last></author>
       <title>A tool for enhanced search of multilingual digital libraries of e-journals</title>
@@ -1905,8 +1905,8 @@
     <paper id="187">
       <author><first>Pedro</first><last>Fialho</last></author>
       <author><first>Sérgio</first><last>Curto</last></author>
-      <author><first>Ana Cristina</first><last>Mendes</last></author>
-      <author><first>Luísa</first><last>Coheur</last></author>
+      <author id="ana-cristina-mendes"><first>Ana Cristina</first><last>Mendes</last></author>
+      <author id="luisa-coheur"><first>Luísa</first><last>Coheur</last></author>
       <title>Extending a wordnet framework for simplicity and scalability</title>
       <pages>3701–3705</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/376_Paper.pdf</url>
@@ -1915,7 +1915,7 @@
     </paper>
     <paper id="188">
       <author><first>Tommaso</first><last>Fornaciari</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <title><fixed-case>D</fixed-case>e<fixed-case>C</fixed-case>our: a corpus of <fixed-case>DE</fixed-case>ceptive statements in <fixed-case>I</fixed-case>talian <fixed-case>COUR</fixed-case>ts</title>
       <pages>1585–1590</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/377_Paper.pdf</url>
@@ -1924,11 +1924,11 @@
     </paper>
     <paper id="189">
       <author><first>Teresa</first><last>Lynn</last></author>
-      <author><first>Özlem</first><last>Çetinoğlu</last></author>
+      <author id="ozlem-cetinoglu"><first>Özlem</first><last>Çetinoğlu</last></author>
       <author><first>Jennifer</first><last>Foster</last></author>
-      <author><first>Elaine</first><last>Uí Dhonnchadha</last></author>
+      <author id="elaine-ui-dhonnchadha"><first>Elaine</first><last>Uí Dhonnchadha</last></author>
       <author><first>Mark</first><last>Dras</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <title><fixed-case>I</fixed-case>rish Treebanking and Parsing: A Preliminary Evaluation</title>
       <pages>1939–1946</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/378_Paper.pdf</url>
@@ -1949,7 +1949,7 @@
     <paper id="191">
       <author><first>Mehmet Talha</first><last>Çakmak</last></author>
       <author><first>Süleyman</first><last>Acar</last></author>
-      <author><first>Gülşen</first><last>Eryiğit</last></author>
+      <author id="gulsen-eryigit"><first>Gülşen</first><last>Eryiğit</last></author>
       <title>Word Alignment for <fixed-case>E</fixed-case>nglish-<fixed-case>T</fixed-case>urkish Language Pair</title>
       <pages>2177–2180</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/380_Paper.pdf</url>
@@ -1958,7 +1958,7 @@
     </paper>
     <paper id="192">
       <author><first>Nelly</first><last>Barbot</last></author>
-      <author><first>Olivier</first><last>Boeffard</last></author>
+      <author id="olivier-boeffard"><first>Olivier</first><last>Boeffard</last></author>
       <author><first>Arnaud</first><last>Delhay</last></author>
       <title>Comparing performance of different set-covering strategies for linguistic content optimization in speech corpora</title>
       <pages>969–974</pages>
@@ -1976,7 +1976,7 @@
     </paper>
     <paper id="194">
       <author><first>Mohammad Hoseyn</first><last>Sheykholeslam</last></author>
-      <author><first>Behrouz</first><last>Minaei-Bidgoli</last></author>
+      <author id="behrouz-minaei-bidgoli"><first>Behrouz</first><last>Minaei-Bidgoli</last></author>
       <author><first>Hossein</first><last>Juzi</last></author>
       <title>A Framework for Spelling Correction in <fixed-case>P</fixed-case>ersian Language Using Noisy Channel Model</title>
       <pages>706–710</pages>
@@ -1985,7 +1985,7 @@
       <bibkey>sheykholeslam-etal-2012-framework</bibkey>
     </paper>
     <paper id="195">
-      <author><first>Gilles</first><last>Sérasset</last></author>
+      <author id="gilles-serasset"><first>Gilles</first><last>Sérasset</last></author>
       <title><fixed-case>D</fixed-case>bnary: <fixed-case>W</fixed-case>iktionary as a <fixed-case>LMF</fixed-case> based Multilingual <fixed-case>RDF</fixed-case> network</title>
       <pages>2466–2472</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/387_Paper.pdf</url>
@@ -1994,7 +1994,7 @@
     </paper>
     <paper id="196">
       <author><first>Dae-Lim</first><last>Choi</last></author>
-      <author><first>Bong-Wan</first><last>Kim</last></author>
+      <author id="bong-wan-kim"><first>Bong-Wan</first><last>Kim</last></author>
       <author><first>Yeon-Whoa</first><last>Kim</last></author>
       <author><first>Yong-Ju</first><last>Lee</last></author>
       <author><first>Yongnam</first><last>Um</last></author>
@@ -2047,7 +2047,7 @@
     </paper>
     <paper id="201">
       <author><first>Nava</first><last>Maroto</last></author>
-      <author><first>Marie-Claude</first><last>L’Homme</last></author>
+      <author id="marie-claude-lhomme"><first>Marie-Claude</first><last>L’Homme</last></author>
       <author><first>Amparo</first><last>Alcina</last></author>
       <title>Semantic Relations Established by Specialized Processes Expressed by Nouns and Verbs: Identification in a Corpus by means of Syntactico-semantic Annotation</title>
       <pages>3814–3819</pages>
@@ -2056,7 +2056,7 @@
       <bibkey>maroto-etal-2012-semantic</bibkey>
     </paper>
     <paper id="202">
-      <author><first>Riccardo</first><last>Del Gratta</last></author>
+      <author id="riccardo-del-gratta"><first>Riccardo</first><last>Del Gratta</last></author>
       <author><first>Francesca</first><last>Frontini</last></author>
       <author><first>Francesco</first><last>Rubino</last></author>
       <author><first>Irene</first><last>Russo</last></author>
@@ -2071,7 +2071,7 @@
       <author><first>Benoît</first><last>Weber</last></author>
       <author><first>Geneviève</first><last>Caelen-Haumont</last></author>
       <author><first>Binh Hai</first><last>Pham</last></author>
-      <author><first>Do-Dat</first><last>Tran</last></author>
+      <author id="do-dat-tran"><first>Do-Dat</first><last>Tran</last></author>
       <title><fixed-case>MISTRAL</fixed-case>+: A Melody Intonation Speaker Tonal Range semi-automatic Analysis using variable Levels</title>
       <pages>963–968</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/397_Paper.pdf</url>
@@ -2088,7 +2088,7 @@
       <bibkey>kaeshammer-demberg-2012-german</bibkey>
     </paper>
     <paper id="205">
-      <author><first>Helen Kaiyun</first><last>Chen</last></author>
+      <author id="helen-kaiyun-chen"><first>Helen Kaiyun</first><last>Chen</last></author>
       <title>Annotating a corpus of human interaction with prosodic profiles — focusing on <fixed-case>M</fixed-case>andarin repair/disfluency</title>
       <pages>986–990</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/399_Paper.pdf</url>
@@ -2108,9 +2108,9 @@
     </paper>
     <paper id="207">
       <author><first>Hongzhi</first><last>Xu</last></author>
-      <author><first>Helen Kaiyun</first><last>Chen</last></author>
+      <author id="helen-kaiyun-chen"><first>Helen Kaiyun</first><last>Chen</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <author><first>Dingxu</first><last>Shi</last></author>
       <author><first>Tin-Shing</first><last>Chiu</last></author>
       <title>A Grammar-informed Corpus-based Sentence Database for Linguistic and Computational Studies</title>
@@ -2129,13 +2129,13 @@
       <bibkey>sloetjes-somasundaram-2012-elan</bibkey>
     </paper>
     <paper id="209">
-      <author><first>Ching-Sheng</first><last>Lin</last></author>
+      <author id="ching-sheng-lin"><first>Ching-Sheng</first><last>Lin</last></author>
       <author><first>Zumrut</first><last>Akcam</last></author>
       <author><first>Samira</first><last>Shaikh</last></author>
-      <author><first>Sharon</first><last>Small</last></author>
+      <author id="sharon-small"><first>Sharon</first><last>Small</last></author>
       <author><first>Ken</first><last>Stahl</last></author>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
-      <author><first>Nick</first><last>Webb</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="nick-webb"><first>Nick</first><last>Webb</last></author>
       <title>Revealing Contentious Concepts Across Social Groups</title>
       <pages>2838–2841</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/403_Paper.pdf</url>
@@ -2145,7 +2145,7 @@
     <paper id="210">
       <author><first>Fabrizio</first><last>Borgia</last></author>
       <author><first>Claudia S.</first><last>Bianchini</last></author>
-      <author><first>Patrice</first><last>Dalle</last></author>
+      <author id="patrice-dalle"><first>Patrice</first><last>Dalle</last></author>
       <author><first>Maria</first><last>De Marsico</last></author>
       <title>Resource production of written forms of Sign Languages by a user-centered editor, <fixed-case>SW</fixed-case>ift (<fixed-case>S</fixed-case>ign<fixed-case>W</fixed-case>riting improved fast transcriber)</title>
       <pages>3779–3784</pages>
@@ -2154,9 +2154,9 @@
       <bibkey>borgia-etal-2012-resource</bibkey>
     </paper>
     <paper id="211">
-      <author><first>Balamurali</first><last>AR</last></author>
+      <author id="balamurali-ar"><first>Balamurali</first><last>AR</last></author>
       <author><first>Aditya</first><last>Joshi</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <title>Cost and Benefit of Using <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Senses for Sentiment Analysis</title>
       <pages>3090–3097</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/405_Paper.pdf</url>
@@ -2173,7 +2173,7 @@
     </paper>
     <paper id="213">
       <author><first>Bogdan</first><last>Sacaleanu</last></author>
-      <author><first>Günter</first><last>Neumann</last></author>
+      <author id="gunter-neumann"><first>Günter</first><last>Neumann</last></author>
       <title>An Adaptive Framework for Named Entity Combination</title>
       <pages>1244–1249</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/411_Paper.pdf</url>
@@ -2181,7 +2181,7 @@
       <bibkey>sacaleanu-neumann-2012-adaptive</bibkey>
     </paper>
     <paper id="214">
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <author><first>Patrick</first><last>Drouin</last></author>
       <author><first>Amélie</first><last>Paulus</last></author>
       <author><first>Eugénie Rompré</first><last>Brodeur</last></author>
@@ -2209,7 +2209,7 @@
       <bibkey>rysova-2012-alternative</bibkey>
     </paper>
     <paper id="217">
-      <author><first>Kikuo</first><last>Maekawa</last></author>
+      <author id="kikuo-maekawa"><first>Kikuo</first><last>Maekawa</last></author>
       <title>Prediction of Non-Linguistic Information of Spontaneous Speech from the Prosodic Annotation: Evaluation of the <fixed-case>X</fixed-case>-<fixed-case>JT</fixed-case>o<fixed-case>BI</fixed-case> system</title>
       <pages>991–996</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/422_Paper.pdf</url>
@@ -2217,7 +2217,7 @@
       <bibkey>maekawa-2012-prediction</bibkey>
     </paper>
     <paper id="218">
-      <author><first>Maria Teresa</first><last>Pazienza</last></author>
+      <author id="maria-teresa-pazienza"><first>Maria Teresa</first><last>Pazienza</last></author>
       <author><first>Armando</first><last>Stellato</last></author>
       <author><first>Andrea</first><last>Turbati</last></author>
       <title><fixed-case>PEARL</fixed-case>: <fixed-case>P</fixed-case>roj<fixed-case>E</fixed-case>ction of Annotations Rule Language, a Language for Projecting (<fixed-case>UIMA</fixed-case>) Annotations over <fixed-case>RDF</fixed-case> Knowledge Bases</title>
@@ -2236,10 +2236,10 @@
       <bibkey>strotgen-gertz-2012-temporal</bibkey>
     </paper>
     <paper id="220">
-      <author><first>Monica Lestari</first><last>Paramita</last></author>
-      <author><first>Paul</first><last>Clough</last></author>
+      <author id="monica-lestari-paramita"><first>Monica Lestari</first><last>Paramita</last></author>
+      <author id="paul-clough"><first>Paul</first><last>Clough</last></author>
       <author><first>Ahmet</first><last>Aker</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <title>Correlation between Similarity Measures for Inter-Language Linked <fixed-case>W</fixed-case>ikipedia Articles</title>
       <pages>790–797</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/426_Paper.pdf</url>
@@ -2270,13 +2270,13 @@
       <bibkey>moneglia-etal-2012-imagact</bibkey>
     </paper>
     <paper id="223">
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <author><first>David</first><last>Mareček</last></author>
       <author><first>Martin</first><last>Popel</last></author>
       <author><first>Loganathan</first><last>Ramasamy</last></author>
       <author><first>Jan</first><last>Štěpánek</last></author>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
       <title><fixed-case>H</fixed-case>amle<fixed-case>DT</fixed-case>: To Parse or Not to Parse?</title>
       <pages>2735–2741</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/429_Paper.pdf</url>
@@ -2284,7 +2284,7 @@
       <bibkey>zeman-etal-2012-hamledt</bibkey>
     </paper>
     <paper id="224">
-      <author><first>Lluís</first><last>Padró</last></author>
+      <author id="lluis-padro"><first>Lluís</first><last>Padró</last></author>
       <author><first>Evgeny</first><last>Stanilovsky</last></author>
       <title><fixed-case>F</fixed-case>ree<fixed-case>L</fixed-case>ing 3.0: Towards Wider Multilinguality</title>
       <pages>2473–2479</pages>
@@ -2293,8 +2293,8 @@
       <bibkey>padro-stanilovsky-2012-freeling</bibkey>
     </paper>
     <paper id="225">
-      <author><first>Manny</first><last>Rayner</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Johanna</first><last>Gerlach</last></author>
       <title>Evaluating Appropriateness Of System Responses In A Spoken <fixed-case>CALL</fixed-case> Game</title>
       <pages>2690–2694</pages>
@@ -2305,7 +2305,7 @@
     <paper id="226">
       <author><first>Matthew</first><last>Fuchs</last></author>
       <author><first>Nikos</first><last>Tsourakis</last></author>
-      <author><first>Manny</first><last>Rayner</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
       <title>A Scalable Architecture For Web Deployment of Spoken Dialogue Systems</title>
       <pages>1309–1314</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/436_Paper.pdf</url>
@@ -2313,7 +2313,7 @@
       <bibkey>fuchs-etal-2012-scalable</bibkey>
     </paper>
     <paper id="227">
-      <author><first>Dieter</first><last>Van Uytvanck</last></author>
+      <author id="dieter-van-uytvanck"><first>Dieter</first><last>Van Uytvanck</last></author>
       <author><first>Herman</first><last>Stehouwer</last></author>
       <author><first>Lari</first><last>Lampen</last></author>
       <title>Semantic metadata mapping in practice: the Virtual Language Observatory</title>
@@ -2323,10 +2323,10 @@
       <bibkey>van-uytvanck-etal-2012-semantic</bibkey>
     </paper>
     <paper id="228">
-      <author><first>Eiríkur</first><last>Rögnvaldsson</last></author>
-      <author><first>Anton Karl</first><last>Ingason</last></author>
+      <author id="eirikur-rognvaldsson"><first>Eiríkur</first><last>Rögnvaldsson</last></author>
+      <author id="anton-karl-ingason"><first>Anton Karl</first><last>Ingason</last></author>
       <author><first>Einar Freyr</first><last>Sigurðsson</last></author>
-      <author><first>Joel</first><last>Wallenberg</last></author>
+      <author id="joel-wallenberg"><first>Joel</first><last>Wallenberg</last></author>
       <title>The <fixed-case>I</fixed-case>celandic Parsed Historical Corpus (<fixed-case>I</fixed-case>ce<fixed-case>P</fixed-case>a<fixed-case>HC</fixed-case>)</title>
       <pages>1977–1984</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/440_Paper.pdf</url>
@@ -2335,8 +2335,8 @@
     </paper>
     <paper id="229">
       <author><first>Ashwini</first><last>Vaidya</last></author>
-      <author><first>Jinho D.</first><last>Choi</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Bhuvana</first><last>Narasimhan</last></author>
       <title>Empty Argument Insertion in the <fixed-case>H</fixed-case>indi <fixed-case>P</fixed-case>rop<fixed-case>B</fixed-case>ank</title>
       <pages>1522–1526</pages>
@@ -2345,7 +2345,7 @@
       <bibkey>vaidya-etal-2012-empty</bibkey>
     </paper>
     <paper id="230">
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <author><first>Xavier</first><last>Tannier</last></author>
       <title>A Rough Set Formalization of Quantitative Evaluation with Ambiguity</title>
       <pages>2311–2317</pages>
@@ -2355,9 +2355,9 @@
     </paper>
     <paper id="231">
       <author><first>Eleftherios</first><last>Avramidis</last></author>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
       <author><first>Christian</first><last>Federmann</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <author><first>Maite</first><last>Melero</last></author>
       <author><first>Pavel</first><last>Pecina</last></author>
       <title>A Richly Annotated, Multilingual Parallel Corpus for Hybrid Machine Translation</title>
@@ -2367,7 +2367,7 @@
       <bibkey>avramidis-etal-2012-richly</bibkey>
     </paper>
     <paper id="232">
-      <author><first>Tomaž</first><last>Erjavec</last></author>
+      <author id="tomaz-erjavec"><first>Tomaž</first><last>Erjavec</last></author>
       <title>The goo300k corpus of historical <fixed-case>S</fixed-case>lovene</title>
       <pages>2257–2260</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/445_Paper.pdf</url>
@@ -2386,8 +2386,8 @@
     </paper>
     <paper id="234">
       <author><first>Myriam</first><last>Rakho</last></author>
-      <author><first>Éric</first><last>Laporte</last></author>
-      <author><first>Matthieu</first><last>Constant</last></author>
+      <author id="eric-laporte"><first>Éric</first><last>Laporte</last></author>
+      <author id="matthieu-constant"><first>Matthieu</first><last>Constant</last></author>
       <title>A new semantically annotated corpus with syntactic-semantic and cross-lingual senses</title>
       <pages>597–600</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/447_Paper.pdf</url>
@@ -2413,9 +2413,9 @@
       <bibkey>nicolas-etal-2012-unsupervised</bibkey>
     </paper>
     <paper id="237">
-      <author><first>Leon</first><last>Derczynski</last></author>
-      <author><first>Héctor</first><last>Llorens</last></author>
-      <author><first>Estela</first><last>Saquete</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
+      <author id="hector-llorens"><first>Héctor</first><last>Llorens</last></author>
+      <author id="estela-saquete"><first>Estela</first><last>Saquete</last></author>
       <title>Massively Increasing <fixed-case>TIMEX</fixed-case>3 Resources: A Transduction Approach</title>
       <pages>3754–3761</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/451_Paper.pdf</url>
@@ -2443,8 +2443,8 @@
       <bibkey>sundberg-etal-2012-visualizing</bibkey>
     </paper>
     <paper id="240">
-      <author><first>Binyam Gebrekidan</first><last>Gebre</last></author>
-      <author><first>Peter</first><last>Wittenburg</last></author>
+      <author id="binyam-gebrekidan-gebre"><first>Binyam Gebrekidan</first><last>Gebre</last></author>
+      <author id="peter-wittenburg"><first>Peter</first><last>Wittenburg</last></author>
       <author><first>Przemyslaw</first><last>Lenkiewicz</last></author>
       <title>Towards Automatic Gesture Stroke Detection</title>
       <pages>231–235</pages>
@@ -2454,7 +2454,7 @@
     </paper>
     <paper id="241">
       <author><first>Richard</first><last>Johansson</last></author>
-      <author><first>Karin Friberg</first><last>Heppin</last></author>
+      <author id="karin-friberg-heppin"><first>Karin Friberg</first><last>Heppin</last></author>
       <author><first>Dimitrios</first><last>Kokkinakis</last></author>
       <title>Semantic Role Labeling with the <fixed-case>S</fixed-case>wedish <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et</title>
       <pages>3697–3700</pages>
@@ -2464,7 +2464,7 @@
     </paper>
     <paper id="242">
       <author><first>Loganathan</first><last>Ramasamy</last></author>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
       <title><fixed-case>P</fixed-case>rague Dependency Style Treebank for <fixed-case>T</fixed-case>amil</title>
       <pages>1888–1894</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/456_Paper.pdf</url>
@@ -2472,8 +2472,8 @@
       <bibkey>ramasamy-zabokrtsky-2012-prague</bibkey>
     </paper>
     <paper id="243">
-      <author><first>Jörg</first><last>Tiedemann</last></author>
-      <author><first>Dorte Haltrup</first><last>Hansen</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="dorte-haltrup-hansen"><first>Dorte Haltrup</first><last>Hansen</last></author>
       <author><first>Lene</first><last>Offersgaard</last></author>
       <author><first>Sussi</first><last>Olsen</last></author>
       <author><first>Matthias</first><last>Zumpe</last></author>
@@ -2484,9 +2484,9 @@
       <bibkey>tiedemann-etal-2012-distributed</bibkey>
     </paper>
     <paper id="244">
-      <author><first>Patrícia</first><last>Gonçalves</last></author>
+      <author id="patricia-goncalves"><first>Patrícia</first><last>Gonçalves</last></author>
       <author><first>Rita</first><last>Santos</last></author>
-      <author><first>António</first><last>Branco</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
       <title>Treebanking by Sentence and Tree Transformation: Building a Treebank to support Question Answering in <fixed-case>P</fixed-case>ortuguese</title>
       <pages>1895–1901</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/460_Paper.pdf</url>
@@ -2495,7 +2495,7 @@
     </paper>
     <paper id="245">
       <author><first>David</first><last>Graff</last></author>
-      <author><first>Mohamed</first><last>Maamouri</last></author>
+      <author id="mohamed-maamouri"><first>Mohamed</first><last>Maamouri</last></author>
       <title>Developing <fixed-case>LMF</fixed-case>-<fixed-case>XML</fixed-case> Bilingual Dictionaries for Colloquial <fixed-case>A</fixed-case>rabic Dialects</title>
       <pages>269–274</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/461_Paper.pdf</url>
@@ -2503,7 +2503,7 @@
       <bibkey>graff-maamouri-2012-developing</bibkey>
     </paper>
     <paper id="246">
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <title>Parallel Data, Tools and Interfaces in <fixed-case>OPUS</fixed-case></title>
       <pages>2214–2218</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/463_Paper.pdf</url>
@@ -2530,7 +2530,7 @@
       <bibkey>mohamed-etal-2012-annotating</bibkey>
     </paper>
     <paper id="249">
-      <author><first>Julia Maria</first><last>Schulz</last></author>
+      <author id="julia-maria-schulz"><first>Julia Maria</first><last>Schulz</last></author>
       <author><first>Daniela</first><last>Becks</last></author>
       <author><first>Christa</first><last>Womser-Hacker</last></author>
       <author><first>Thomas</first><last>Mandl</last></author>
@@ -2542,7 +2542,7 @@
     </paper>
     <paper id="250">
       <author><first>Mathieu-Henri</first><last>Falco</last></author>
-      <author><first>Véronique</first><last>Moriceau</last></author>
+      <author id="veronique-moriceau"><first>Véronique</first><last>Moriceau</last></author>
       <author><first>Anne</first><last>Vilnat</last></author>
       <title><fixed-case>K</fixed-case>itten: a tool for normalizing <fixed-case>HTML</fixed-case> and extracting its textual content</title>
       <pages>2261–2267</pages>
@@ -2553,7 +2553,7 @@
     <paper id="251">
       <author><first>Emanuel</first><last>Dima</last></author>
       <author><first>Christina</first><last>Hoppermann</last></author>
-      <author><first>Erhard</first><last>Hinrichs</last></author>
+      <author id="erhard-hinrichs"><first>Erhard</first><last>Hinrichs</last></author>
       <author><first>Thorsten</first><last>Trippel</last></author>
       <author><first>Claus</first><last>Zinn</last></author>
       <title>A Metadata Editor to Support the Description of Linguistic Resources</title>
@@ -2574,8 +2574,8 @@
     <paper id="253">
       <author><first>Emanuel</first><last>Dima</last></author>
       <author><first>Verena</first><last>Henrich</last></author>
-      <author><first>Erhard</first><last>Hinrichs</last></author>
-      <author><first>Marie</first><last>Hinrichs</last></author>
+      <author id="erhard-hinrichs"><first>Erhard</first><last>Hinrichs</last></author>
+      <author id="marie-hinrichs"><first>Marie</first><last>Hinrichs</last></author>
       <author><first>Christina</first><last>Hoppermann</last></author>
       <author><first>Thorsten</first><last>Trippel</last></author>
       <author><first>Thomas</first><last>Zastrow</last></author>
@@ -2588,8 +2588,8 @@
     </paper>
     <paper id="254">
       <author><first>Montserrat</first><last>Arza</last></author>
-      <author><first>José M.</first><last>García Miguel</last></author>
-      <author><first>Francisco</first><last>Campillo</last></author>
+      <author id="jose-m-garcia-miguel"><first>José M.</first><last>García Miguel</last></author>
+      <author id="francisco-campillo"><first>Francisco</first><last>Campillo</last></author>
       <author><first>Miguel Cuevas -</first><last>Alonso</last></author>
       <title>A <fixed-case>G</fixed-case>alician Syntactic Corpus with Application to Intonation Modeling</title>
       <pages>1650–1654</pages>
@@ -2598,9 +2598,9 @@
       <bibkey>arza-etal-2012-galician</bibkey>
     </paper>
     <paper id="255">
-      <author><first>Tafseer</first><last>Ahmed</last></author>
+      <author id="tafseer-ahmed"><first>Tafseer</first><last>Ahmed</last></author>
       <author><first>Miriam</first><last>Butt</last></author>
-      <author><first>Annette</first><last>Hautli</last></author>
+      <author id="annette-hautli"><first>Annette</first><last>Hautli</last></author>
       <author><first>Sebastian</first><last>Sulger</last></author>
       <title>A Reference Dependency Bank for Analyzing Complex Predicates</title>
       <pages>3145–3152</pages>
@@ -2632,7 +2632,7 @@
     </paper>
     <paper id="258">
       <author><first>Marianna</first><last>Apidianaki</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <title>Applying cross-lingual <fixed-case>WSD</fixed-case> to wordnet development</title>
       <pages>833–840</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/478_Paper.pdf</url>
@@ -2640,7 +2640,7 @@
       <bibkey>apidianaki-sagot-2012-applying</bibkey>
     </paper>
     <paper id="259">
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Elisabetta</first><last>Jezek</last></author>
       <author><first>Chiara</first><last>Melloni</last></author>
       <author><first>Aurélie</first><last>Picton</last></author>
@@ -2652,8 +2652,8 @@
     </paper>
     <paper id="260">
       <author><first>Mark</first><last>Fishel</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
-      <author><first>Maja</first><last>Popović</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
       <title><fixed-case>T</fixed-case>erra: a Collection of Translation Error-Annotated Corpora</title>
       <pages>7–14</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/481_Paper.pdf</url>
@@ -2693,11 +2693,11 @@
       <bibkey>zablotskiy-etal-2012-speech</bibkey>
     </paper>
     <paper id="264">
-      <author><first>Luis Javier</first><last>Rodríguez-Fuentes</last></author>
-      <author><first>Mikel</first><last>Penagarikano</last></author>
+      <author id="luis-javier-rodriguez-fuentes"><first>Luis Javier</first><last>Rodríguez-Fuentes</last></author>
+      <author id="mikel-penagarikano"><first>Mikel</first><last>Penagarikano</last></author>
       <author><first>Amparo</first><last>Varona</last></author>
-      <author><first>Mireia</first><last>Diez</last></author>
-      <author><first>Germán</first><last>Bordel</last></author>
+      <author id="mireia-diez"><first>Mireia</first><last>Diez</last></author>
+      <author id="german-bordel"><first>Germán</first><last>Bordel</last></author>
       <title><fixed-case>KALAKA</fixed-case>-2: a <fixed-case>TV</fixed-case> Broadcast Speech Database for the Recognition of <fixed-case>I</fixed-case>berian Languages in Clean and Noisy Environments</title>
       <pages>99–105</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/486_Paper.pdf</url>
@@ -2756,9 +2756,9 @@
       <bibkey>wang-etal-2012-ntusocialrec</bibkey>
     </paper>
     <paper id="270">
-      <author><first>Guillaume</first><last>Gravier</last></author>
-      <author><first>Gilles</first><last>Adda</last></author>
-      <author><first>Niklas</first><last>Paulsson</last></author>
+      <author id="guillaume-gravier"><first>Guillaume</first><last>Gravier</last></author>
+      <author id="gilles-adda"><first>Gilles</first><last>Adda</last></author>
+      <author id="niklas-paulsson"><first>Niklas</first><last>Paulsson</last></author>
       <author><first>Matthieu</first><last>Carré</last></author>
       <author><first>Aude</first><last>Giraudel</last></author>
       <author><first>Olivier</first><last>Galibert</last></author>
@@ -2769,9 +2769,9 @@
       <bibkey>gravier-etal-2012-etape</bibkey>
     </paper>
     <paper id="271">
-      <author><first>Núria</first><last>Bel</last></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last></author>
       <author><first>Lauren</first><last>Romeo</last></author>
-      <author><first>Muntsa</first><last>Padró</last></author>
+      <author id="muntsa-padro"><first>Muntsa</first><last>Padró</last></author>
       <title>Automatic lexical semantic classification of nouns</title>
       <pages>1448–1455</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/496_Paper.pdf</url>
@@ -2800,12 +2800,12 @@
       <author><first>Anna</first><last>Braasch</last></author>
       <author><first>Lina</first><last>Henriksen</last></author>
       <author><first>Csaba</first><last>Huszka</last></author>
-      <author><first>Anders</first><last>Johannsen</last></author>
+      <author id="anders-johannsen"><first>Anders</first><last>Johannsen</last></author>
       <author><first>Lars</first><last>Kayser</last></author>
       <author><first>Bente</first><last>Maegaard</last></author>
       <author><first>Ole</first><last>Norgaard</last></author>
-      <author><first>Stefan</first><last>Schulz</last></author>
-      <author><first>Jürgen</first><last>Wedekind</last></author>
+      <author id="stefan-schulz"><first>Stefan</first><last>Schulz</last></author>
+      <author id="jurgen-wedekind"><first>Jürgen</first><last>Wedekind</last></author>
       <title>Creation and use of Language Resources in a Question-Answering e<fixed-case>H</fixed-case>ealth System</title>
       <pages>2536–2542</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/504_Paper.pdf</url>
@@ -2813,7 +2813,7 @@
       <bibkey>andersen-etal-2012-creation</bibkey>
     </paper>
     <paper id="275">
-      <author><first>Lina M.</first><last>Rojas-Barahona</last></author>
+      <author id="lina-m-rojas-barahona"><first>Lina M.</first><last>Rojas-Barahona</last></author>
       <author><first>Alejandra</first><last>Lorenzo</last></author>
       <author><first>Claire</first><last>Gardent</last></author>
       <title>Building and Exploiting a Corpus of Dialog Interactions between <fixed-case>F</fixed-case>rench Speaking Virtual and Human Agents</title>
@@ -2825,8 +2825,8 @@
     <paper id="276">
       <author><first>Marion</first><last>Potet</last></author>
       <author><first>Emmanuelle</first><last>Esperança-Rodier</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
-      <author><first>Hervé</first><last>Blanchon</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
+      <author id="herve-blanchon"><first>Hervé</first><last>Blanchon</last></author>
       <title>Collection of a Large Database of <fixed-case>F</fixed-case>rench-<fixed-case>E</fixed-case>nglish <fixed-case>SMT</fixed-case> Output Corrections</title>
       <pages>4043–4048</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/506_Paper.pdf</url>
@@ -2845,7 +2845,7 @@
     </paper>
     <paper id="278">
       <author><first>Els</first><last>Lefever</last></author>
-      <author><first>Véronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Véronique</first><last>Hoste</last></author>
       <author><first>Martine</first><last>De Cock</last></author>
       <title>Discovering Missing <fixed-case>W</fixed-case>ikipedia Inter-language Links by means of Cross-lingual Word Sense Disambiguation</title>
       <pages>841–846</pages>
@@ -2855,7 +2855,7 @@
     </paper>
     <paper id="279">
       <author><first>Saab</first><last>Mansour</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <title><fixed-case>A</fixed-case>rabic-Segmentation Combination Strategies for Statistical Machine Translation</title>
       <pages>3915–3920</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/509_Paper.pdf</url>
@@ -2863,22 +2863,22 @@
       <bibkey>mansour-ney-2012-arabic</bibkey>
     </paper>
     <paper id="280">
-      <author><first>Jan</first><last>Hajič</last></author>
-      <author><first>Eva</first><last>Hajičová</last></author>
-      <author><first>Jarmila</first><last>Panevová</last></author>
-      <author><first>Petr</first><last>Sgall</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
+      <author id="eva-hajicova"><first>Eva</first><last>Hajičová</last></author>
+      <author id="jarmila-panevova"><first>Jarmila</first><last>Panevová</last></author>
+      <author id="petr-sgall"><first>Petr</first><last>Sgall</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <author><first>Silvie</first><last>Cinková</last></author>
-      <author><first>Eva</first><last>Fučíková</last></author>
+      <author id="eva-fucikova"><first>Eva</first><last>Fučíková</last></author>
       <author><first>Marie</first><last>Mikulová</last></author>
       <author><first>Petr</first><last>Pajas</last></author>
       <author><first>Jan</first><last>Popelka</last></author>
-      <author><first>Jiří</first><last>Semecký</last></author>
-      <author><first>Jana</first><last>Šindlerová</last></author>
+      <author id="jiri-semecky"><first>Jiří</first><last>Semecký</last></author>
+      <author id="jana-sindlerova"><first>Jana</first><last>Šindlerová</last></author>
       <author><first>Jan</first><last>Štěpánek</last></author>
       <author><first>Josef</first><last>Toman</last></author>
-      <author><first>Zdeňka</first><last>Urešová</last></author>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="zdenka-uresova"><first>Zdeňka</first><last>Urešová</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
       <title>Announcing <fixed-case>P</fixed-case>rague <fixed-case>C</fixed-case>zech-<fixed-case>E</fixed-case>nglish <fixed-case>D</fixed-case>ependency <fixed-case>T</fixed-case>reebank 2.0</title>
       <pages>3153–3160</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/510_Paper.pdf</url>
@@ -2887,7 +2887,7 @@
     </paper>
     <paper id="281">
       <author><first>Paul</first><last>Felt</last></author>
-      <author><first>Eric</first><last>Ringger</last></author>
+      <author id="eric-ringger"><first>Eric</first><last>Ringger</last></author>
       <author><first>Kevin</first><last>Seppi</last></author>
       <author><first>Kristian</first><last>Heal</last></author>
       <author><first>Robbie</first><last>Haertel</last></author>
@@ -2899,7 +2899,7 @@
       <bibkey>felt-etal-2012-first</bibkey>
     </paper>
     <paper id="282">
-      <author><first>Emina</first><last>Kurtić</last></author>
+      <author id="emina-kurtic"><first>Emina</first><last>Kurtić</last></author>
       <author><first>Bill</first><last>Wells</last></author>
       <author><first>Guy J.</first><last>Brown</last></author>
       <author><first>Timothy</first><last>Kempton</last></author>
@@ -2911,8 +2911,8 @@
       <bibkey>kurtic-etal-2012-corpus</bibkey>
     </paper>
     <paper id="283">
-      <author><first>Antton</first><last>Gurrutxaga</last></author>
-      <author><first>Iñaki</first><last>Alegria</last></author>
+      <author id="antton-gurrutxaga"><first>Antton</first><last>Gurrutxaga</last></author>
+      <author id="inaki-alegria"><first>Iñaki</first><last>Alegria</last></author>
       <title>Measuring the compositionality of <fixed-case>NV</fixed-case> expressions in <fixed-case>B</fixed-case>asque by means of distributional similarity techniques</title>
       <pages>2389–2394</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/514_Paper.pdf</url>
@@ -2921,9 +2921,9 @@
     </paper>
     <paper id="284">
       <author><first>Jorge</first><last>Vivaldi</last></author>
-      <author><first>Luis Adrián</first><last>Cabrera-Diego</last></author>
-      <author><first>Gerardo</first><last>Sierra</last></author>
-      <author><first>María</first><last>Pozzi</last></author>
+      <author id="luis-adrian-cabrera-diego"><first>Luis Adrián</first><last>Cabrera-Diego</last></author>
+      <author id="gerardo-sierra"><first>Gerardo</first><last>Sierra</last></author>
+      <author id="maria-pozzi"><first>María</first><last>Pozzi</last></author>
       <title>Using <fixed-case>W</fixed-case>ikipedia to Validate the Terminology found in a Corpus of Basic Textbooks</title>
       <pages>3820–3827</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/515_Paper.pdf</url>
@@ -2932,7 +2932,7 @@
     </paper>
     <paper id="285">
       <author><first>Javier</first><last>Caminero</last></author>
-      <author><first>Mari Carmen</first><last>Rodríguez</last></author>
+      <author id="mari-carmen-rodriguez-gancedo"><first>Mari Carmen</first><last>Rodríguez</last></author>
       <author><first>Jean</first><last>Vanderdonckt</last></author>
       <author><first>Fabio</first><last>Paternò</last></author>
       <author><first>Joerg</first><last>Rett</last></author>
@@ -2946,8 +2946,8 @@
       <bibkey>caminero-etal-2012-serenoa</bibkey>
     </paper>
     <paper id="286">
-      <author><first>Amalia</first><last>Todirascu</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="amalia-todirascu"><first>Amalia</first><last>Todirascu</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <author><first>Jennifer</first><last>Krisch</last></author>
       <author><first>Max</first><last>Kisselew</last></author>
       <author><first>Ulrich</first><last>Heid</last></author>
@@ -2958,14 +2958,14 @@
       <bibkey>todirascu-etal-2012-french</bibkey>
     </paper>
     <paper id="287">
-      <author><first>Montserrat</first><last>Marimon</last></author>
+      <author id="montserrat-marimon"><first>Montserrat</first><last>Marimon</last></author>
       <author><first>Beatriz</first><last>Fisas</last></author>
-      <author><first>Núria</first><last>Bel</last></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last></author>
       <author><first>Marta</first><last>Villegas</last></author>
       <author><first>Jorge</first><last>Vivaldi</last></author>
       <author><first>Sergi</first><last>Torner</last></author>
       <author><first>Mercè</first><last>Lorente</last></author>
-      <author><first>Silvia</first><last>Vázquez</last></author>
+      <author id="silvia-vazquez"><first>Silvia</first><last>Vázquez</last></author>
       <author><first>Marta</first><last>Villegas</last></author>
       <title>The <fixed-case>IULA</fixed-case> Treebank</title>
       <pages>1920–1926</pages>
@@ -2995,7 +2995,7 @@
     </paper>
     <paper id="290">
       <author><first>Md. Faisal Mahbub</first><last>Chowdhury</last></author>
-      <author><first>Alberto</first><last>Lavelli</last></author>
+      <author id="alberto-lavelli"><first>Alberto</first><last>Lavelli</last></author>
       <title>An Evaluation of the Effect of Automatic Preprocessing on Syntactic Parsing for Biomedical Relation Extraction</title>
       <pages>544–551</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/522_Paper.pdf</url>
@@ -3004,9 +3004,9 @@
     </paper>
     <paper id="291">
       <author><first>Herman</first><last>Stehouwer</last></author>
-      <author><first>Matej</first><last>Durco</last></author>
+      <author id="matej-durco"><first>Matej</first><last>Durco</last></author>
       <author><first>Eric</first><last>Auer</last></author>
-      <author><first>Daan</first><last>Broeder</last></author>
+      <author id="daan-broeder"><first>Daan</first><last>Broeder</last></author>
       <title>Federated Search: Towards a Common Search Infrastructure</title>
       <pages>3255–3259</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/524_Paper.pdf</url>
@@ -3015,8 +3015,8 @@
     </paper>
     <paper id="292">
       <author><first>Elsa</first><last>Tolone</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
-      <author><first>Éric</first><last>Villemonte de La Clergerie</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Éric</first><last>Villemonte de La Clergerie</last></author>
       <title>Evaluating and improving syntactic lexica by plugging them within a parser</title>
       <pages>2742–2749</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/525_Paper.pdf</url>
@@ -3024,9 +3024,9 @@
       <bibkey>tolone-etal-2012-evaluating</bibkey>
     </paper>
     <paper id="293">
-      <author><first>Jing Guang</first><last>Han</last></author>
+      <author id="jingguang-han"><first>Jing Guang</first><last>Han</last></author>
       <author><first>Emer</first><last>Gilmartin</last></author>
-      <author><first>Celine</first><last>De Looze</last></author>
+      <author id="celine-de-looze"><first>Celine</first><last>De Looze</last></author>
       <author><first>Brian</first><last>Vaughan</last></author>
       <author><first>Nick</first><last>Campbell</last></author>
       <title>The Herme Database of Spontaneous Multimodal Human-Robot Dialogues</title>
@@ -3036,8 +3036,8 @@
       <bibkey>han-etal-2012-herme</bibkey>
     </paper>
     <paper id="294">
-      <author><first>Víctor M.</first><last>Sánchez-Cartagena</last></author>
-      <author><first>Miquel</first><last>Esplà-Gomis</last></author>
+      <author id="victor-m-sanchez-cartagena"><first>Víctor M.</first><last>Sánchez-Cartagena</last></author>
+      <author id="miquel-espla-gomis"><first>Miquel</first><last>Esplà-Gomis</last></author>
       <author><first>Juan Antonio</first><last>Pérez-Ortiz</last></author>
       <title>Source-Language Dictionaries Help Non-Expert Users to Enlarge Target-Language Dictionaries for Machine Translation</title>
       <pages>3422–3429</pages>
@@ -3054,14 +3054,14 @@
       <bibkey>schmidt-2012-exmaralda</bibkey>
     </paper>
     <paper id="296">
-      <author><first>Harry</first><last>Bunt</last></author>
+      <author id="harry-bunt"><first>Harry</first><last>Bunt</last></author>
       <author><first>Jan</first><last>Alexandersson</last></author>
       <author><first>Jae-Woong</first><last>Choe</last></author>
-      <author><first>Alex Chengyu</first><last>Fang</last></author>
-      <author><first>Koiti</first><last>Hasida</last></author>
+      <author id="alex-chengyu-fang"><first>Alex Chengyu</first><last>Fang</last></author>
+      <author id="koiti-hasida"><first>Koiti</first><last>Hasida</last></author>
       <author><first>Volha</first><last>Petukhova</last></author>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <title><fixed-case>ISO</fixed-case> 24617-2: A semantically-based standard for dialogue annotation</title>
       <pages>430–437</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/530_Paper.pdf</url>
@@ -3079,11 +3079,11 @@
     </paper>
     <paper id="298">
       <author><first>Natalia</first><last>Konstantinova</last></author>
-      <author><first>Sheila C.M.</first><last>de Sousa</last></author>
-      <author><first>Noa P.</first><last>Cruz</last></author>
-      <author><first>Manuel J.</first><last>Maña</last></author>
-      <author><first>Maite</first><last>Taboada</last></author>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="sheila-c-m-de-sousa"><first>Sheila C.M.</first><last>de Sousa</last></author>
+      <author id="noa-p-cruz-diaz"><first>Noa P.</first><last>Cruz</last></author>
+      <author id="manuel-j-mana-lopez"><first>Manuel J.</first><last>Maña</last></author>
+      <author id="maite-taboada"><first>Maite</first><last>Taboada</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <title>A review corpus annotated for negation, speculation and their scope</title>
       <pages>3190–3195</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/533_Paper.pdf</url>
@@ -3094,7 +3094,7 @@
       <author><first>Valerio</first><last>Basile</last></author>
       <author><first>Johan</first><last>Bos</last></author>
       <author><first>Kilian</first><last>Evang</last></author>
-      <author><first>Noortje</first><last>Venhuizen</last></author>
+      <author id="noortje-venhuizen"><first>Noortje</first><last>Venhuizen</last></author>
       <title>Developing a large semantically annotated corpus</title>
       <pages>3196–3200</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/534_Paper.pdf</url>
@@ -3115,7 +3115,7 @@
     <paper id="301">
       <author><first>Elias</first><last>Iosif</last></author>
       <author><first>Maria</first><last>Giannoudaki</last></author>
-      <author><first>Eric</first><last>Fosler-Lussier</last></author>
+      <author id="eric-fosler-lussier"><first>Eric</first><last>Fosler-Lussier</last></author>
       <author><first>Alexandros</first><last>Potamianos</last></author>
       <title>Associative and Semantic Features Extracted From Web-Harvested Corpora</title>
       <pages>2991–2998</pages>
@@ -3125,8 +3125,8 @@
     </paper>
     <paper id="302">
       <author><first>Maaske</first><last>Treurniet</last></author>
-      <author><first>Orphée</first><last>De Clercq</last></author>
-      <author><first>Henk</first><last>van den Heuvel</last></author>
+      <author id="orphee-de-clercq"><first>Orphée</first><last>De Clercq</last></author>
+      <author id="henk-van-den-heuvel"><first>Henk</first><last>van den Heuvel</last></author>
       <author><first>Nelleke</first><last>Oostdijk</last></author>
       <title>Collection of a corpus of <fixed-case>D</fixed-case>utch <fixed-case>SMS</fixed-case></title>
       <pages>2268–2273</pages>
@@ -3147,7 +3147,7 @@
     </paper>
     <paper id="304">
       <author><first>Nikos</first><last>Tsourakis</last></author>
-      <author><first>Manny</first><last>Rayner</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
       <title>A Corpus for a Gesture-Controlled Mobile Spoken Dialogue System</title>
       <pages>1315–1322</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/539_Paper.pdf</url>
@@ -3157,9 +3157,9 @@
     <paper id="305">
       <author><first>Marc</first><last>Poch</last></author>
       <author><first>Antonio</first><last>Toral</last></author>
-      <author><first>Olivier</first><last>Hamon</last></author>
+      <author id="olivier-hamon"><first>Olivier</first><last>Hamon</last></author>
       <author><first>Valeria</first><last>Quochi</last></author>
-      <author><first>Núria</first><last>Bel</last></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last></author>
       <title>Towards a User-Friendly Platform for Building Language Resources based on Web Services</title>
       <pages>1156–1163</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/543_Paper.pdf</url>
@@ -3167,9 +3167,9 @@
       <bibkey>poch-etal-2012-towards</bibkey>
     </paper>
     <paper id="306">
-      <author><first>John</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John</first><last>McCrae</last></author>
       <author><first>Elena</first><last>Montiel-Ponsoda</last></author>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <title>Collaborative semantic editing of linked data lexica</title>
       <pages>2619–2625</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/544_Paper.pdf</url>
@@ -3190,8 +3190,8 @@
     </paper>
     <paper id="308">
       <author><first>Willem</first><last>Elbers</last></author>
-      <author><first>Daan</first><last>Broeder</last></author>
-      <author><first>Dieter</first><last>van Uytvanck</last></author>
+      <author id="daan-broeder"><first>Daan</first><last>Broeder</last></author>
+      <author id="dieter-van-uytvanck"><first>Dieter</first><last>van Uytvanck</last></author>
       <title>Proper Language Resource Centers</title>
       <pages>3260–3263</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/547_Paper.pdf</url>
@@ -3212,7 +3212,7 @@
     </paper>
     <paper id="310">
       <author><first>Karën</first><last>Fort</last></author>
-      <author><first>Claire</first><last>François</last></author>
+      <author id="claire-francois"><first>Claire</first><last>François</last></author>
       <author><first>Olivier</first><last>Galibert</last></author>
       <author><first>Maha</first><last>Ghribi</last></author>
       <title>Analyzing the Impact of Prevalence on the Evaluation of a Manual Annotation Campaign</title>
@@ -3222,7 +3222,7 @@
       <bibkey>fort-etal-2012-analyzing</bibkey>
     </paper>
     <paper id="311">
-      <author><first>Dietmar</first><last>Rösner</last></author>
+      <author id="dietmar-rosner"><first>Dietmar</first><last>Rösner</last></author>
       <author><first>Jörg</first><last>Frommer</last></author>
       <author><first>Rafael</first><last>Friesen</last></author>
       <author><first>Matthias</first><last>Haase</last></author>
@@ -3266,7 +3266,7 @@
       <bibkey>bouamor-etal-2012-contrastive</bibkey>
     </paper>
     <paper id="315">
-      <author><first>Mohamed</first><last>Maamouri</last></author>
+      <author id="mohamed-maamouri"><first>Mohamed</first><last>Maamouri</last></author>
       <author><first>Ann</first><last>Bies</last></author>
       <author><first>Seth</first><last>Kulick</last></author>
       <title>Expanding <fixed-case>A</fixed-case>rabic Treebank to Speech: Results from Broadcast News</title>
@@ -3288,7 +3288,7 @@
       <author><first>Anita</first><last>Alicante</last></author>
       <author><first>Cristina</first><last>Bosco</last></author>
       <author><first>Anna</first><last>Corazza</last></author>
-      <author><first>Alberto</first><last>Lavelli</last></author>
+      <author id="alberto-lavelli"><first>Alberto</first><last>Lavelli</last></author>
       <title>A treebank-based study on the influence of <fixed-case>I</fixed-case>talian word order on parsing performance</title>
       <pages>1985–1992</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/561_Paper.pdf</url>
@@ -3297,9 +3297,9 @@
     </paper>
     <paper id="318">
       <author><first>Kallirroi</first><last>Georgila</last></author>
-      <author><first>Alan</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan</first><last>Black</last></author>
       <author><first>Kenji</first><last>Sagae</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <title>Practical Evaluation of Human and Synthesized Speech for Virtual Human Dialogue Systems</title>
       <pages>3519–3526</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/562_Paper.pdf</url>
@@ -3332,7 +3332,7 @@
       <bibkey>roche-2012-ontoterminology</bibkey>
     </paper>
     <paper id="322">
-      <author><first>Donia</first><last>Scott</last></author>
+      <author id="donia-scott"><first>Donia</first><last>Scott</last></author>
       <author><first>Rossano</first><last>Barone</last></author>
       <author><first>Rob</first><last>Koeling</last></author>
       <title>Corpus Annotation as a Scientific Task</title>
@@ -3352,7 +3352,7 @@
       <bibkey>mendes-etal-2012-dbpedia</bibkey>
     </paper>
     <paper id="324">
-      <author><first>Cheikh M. Bamba</first><last>Dione</last></author>
+      <author id="cheikh-m-bamba-dione"><first>Cheikh M. Bamba</first><last>Dione</last></author>
       <title>A Morphological Analyzer For <fixed-case>W</fixed-case>olof Using Finite-State Techniques</title>
       <pages>894–901</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/572_Paper.pdf</url>
@@ -3360,7 +3360,7 @@
       <bibkey>dione-2012-morphological</bibkey>
     </paper>
     <paper id="325">
-      <author><first>Leonardo Campillos</first><last>Llanos</last></author>
+      <author id="leonardo-campillos-llanos"><first>Leonardo Campillos</first><last>Llanos</last></author>
       <title>Designing a search interface for a <fixed-case>S</fixed-case>panish learner spoken corpus: the end-user’s evaluation</title>
       <pages>241–248</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/574_Paper.pdf</url>
@@ -3368,7 +3368,7 @@
       <bibkey>llanos-2012-designing</bibkey>
     </paper>
     <paper id="326">
-      <author><first>Carla Parra</first><last>Escartín</last></author>
+      <author id="carla-parra-escartin"><first>Carla Parra</first><last>Escartín</last></author>
       <title>Design and compilation of a specialized <fixed-case>S</fixed-case>panish-<fixed-case>G</fixed-case>erman parallel corpus</title>
       <pages>2199–2206</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/577_Paper.pdf</url>
@@ -3386,8 +3386,8 @@
     </paper>
     <paper id="328">
       <author><first>Nizar</first><last>Habash</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <title>Conventional Orthography for Dialectal <fixed-case>A</fixed-case>rabic</title>
       <pages>711–718</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/579_Paper.pdf</url>
@@ -3395,9 +3395,9 @@
       <bibkey>habash-etal-2012-conventional</bibkey>
     </paper>
     <paper id="329">
-      <author><first>Daan</first><last>Broeder</last></author>
-      <author><first>Dieter</first><last>van Uytvanck</last></author>
-      <author><first>Maria</first><last>Gavrilidou</last></author>
+      <author id="daan-broeder"><first>Daan</first><last>Broeder</last></author>
+      <author id="dieter-van-uytvanck"><first>Dieter</first><last>van Uytvanck</last></author>
+      <author id="maria-gavrilidou"><first>Maria</first><last>Gavrilidou</last></author>
       <author><first>Thorsten</first><last>Trippel</last></author>
       <author><first>Menzo</first><last>Windhouwer</last></author>
       <title>Standardizing a Component Metadata Infrastructure</title>
@@ -3410,7 +3410,7 @@
       <author><first>Ahmet</first><last>Aker</last></author>
       <author><first>Mahmoud</first><last>El-Haj</last></author>
       <author><first>M-Dyaa</first><last>Albakour</last></author>
-      <author><first>Udo</first><last>Kruschwitz</last></author>
+      <author id="udo-kruschwitz"><first>Udo</first><last>Kruschwitz</last></author>
       <title>Assessing Crowdsourcing Quality through Objective Tasks</title>
       <pages>1456–1461</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/583_Paper.pdf</url>
@@ -3418,7 +3418,7 @@
       <bibkey>aker-etal-2012-assessing</bibkey>
     </paper>
     <paper id="331">
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <author><first>Susanne</first><last>Borgwaldt</last></author>
       <author><first>Ronny</first><last>Jauch</last></author>
       <title>Association Norms of <fixed-case>G</fixed-case>erman Noun Compounds</title>
@@ -3438,7 +3438,7 @@
       <bibkey>ambati-etal-2012-word</bibkey>
     </paper>
     <paper id="333">
-      <author><first>Eckhard</first><last>Bick</last></author>
+      <author id="eckhard-bick"><first>Eckhard</first><last>Bick</last></author>
       <author><first>Heliana</first><last>Mello</last></author>
       <author><first>Alessandro</first><last>Panunzi</last></author>
       <author><first>Tommaso</first><last>Raso</last></author>
@@ -3461,10 +3461,10 @@
       <bibkey>koeva-etal-2012-bulgarian</bibkey>
     </paper>
     <paper id="335">
-      <author><first>Rebecca J.</first><last>Passonneau</last></author>
-      <author><first>Collin F.</first><last>Baker</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca J.</first><last>Passonneau</last></author>
+      <author id="collin-f-baker"><first>Collin F.</first><last>Baker</last></author>
       <author><first>Christiane</first><last>Fellbaum</last></author>
-      <author><first>Nancy</first><last>Ide</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
       <title>The <fixed-case>MASC</fixed-case> Word Sense Corpus</title>
       <pages>3025–3030</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/589_Paper.pdf</url>
@@ -3474,8 +3474,8 @@
     <paper id="336">
       <author><first>Cristina</first><last>Mota</last></author>
       <author><first>Alberto</first><last>Simões</last></author>
-      <author><first>Cláudia</first><last>Freitas</last></author>
-      <author><first>Luís</first><last>Costa</last></author>
+      <author id="claudia-freitas"><first>Cláudia</first><last>Freitas</last></author>
+      <author id="luis-fernando-costa"><first>Luís</first><last>Costa</last></author>
       <author><first>Diana</first><last>Santos</last></author>
       <title><fixed-case>P</fixed-case>ágico: Evaluating <fixed-case>W</fixed-case>ikipedia-based information retrieval in <fixed-case>P</fixed-case>ortuguese</title>
       <pages>2015–2022</pages>
@@ -3484,7 +3484,7 @@
       <bibkey>mota-etal-2012-pagico</bibkey>
     </paper>
     <paper id="337">
-      <author><first>Alexandre</first><last>Denis</last></author>
+      <author id="alexandre-denis"><first>Alexandre</first><last>Denis</last></author>
       <author><first>Ingrid</first><last>Falk</last></author>
       <author><first>Claire</first><last>Gardent</last></author>
       <author><first>Laura</first><last>Perez-Beltrachini</last></author>
@@ -3509,9 +3509,9 @@
       <author><first>Ron</first><last>Artstein</last></author>
       <author><first>Jillian</first><last>Gerten</last></author>
       <author><first>Athanasios</first><last>Katsamanis</last></author>
-      <author><first>Shrikanth</first><last>Narayanan</last></author>
+      <author id="shrikanth-narayanan"><first>Shrikanth</first><last>Narayanan</last></author>
       <author><first>Angela</first><last>Nazarian</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <title>The Twins Corpus of Museum Visitor Questions</title>
       <pages>2355–2361</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/595_Paper.pdf</url>
@@ -3530,10 +3530,10 @@
     </paper>
     <paper id="341">
       <author><first>Doaa</first><last>Samy</last></author>
-      <author><first>Antonio</first><last>Moreno-Sandoval</last></author>
+      <author id="antonio-moreno-sandoval"><first>Antonio</first><last>Moreno-Sandoval</last></author>
       <author><first>Conchi</first><last>Bueno-Díaz</last></author>
-      <author><first>Marta</first><last>Garrote-Salazar</last></author>
-      <author><first>José M.</first><last>Guirao</last></author>
+      <author id="marta-garrote-salazar"><first>Marta</first><last>Garrote-Salazar</last></author>
+      <author id="jose-m-guirao"><first>José M.</first><last>Guirao</last></author>
       <title>Medical Term Extraction in an <fixed-case>A</fixed-case>rabic Medical Corpus</title>
       <pages>640–645</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/597_Paper.pdf</url>
@@ -3563,7 +3563,7 @@
       <author><first>Mohammed</first><last>Attia</last></author>
       <author><first>Pavel</first><last>Pecina</last></author>
       <author><first>Younes</first><last>Samih</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <title><fixed-case>A</fixed-case>rabic Word Generation and Modelling for Spell Checking</title>
       <pages>719–725</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/603_Paper.pdf</url>
@@ -3572,7 +3572,7 @@
     </paper>
     <paper id="345">
       <author><first>Yasuharu</first><last>Den</last></author>
-      <author><first>Hanae</first><last>Koiso</last></author>
+      <author id="hanae-koiso"><first>Hanae</first><last>Koiso</last></author>
       <author><first>Katsuya</first><last>Takanashi</last></author>
       <author><first>Nao</first><last>Yoshida</last></author>
       <title>Annotation of response tokens and their triggering expressions in <fixed-case>J</fixed-case>apanese multi-party conversations</title>
@@ -3592,8 +3592,8 @@
     </paper>
     <paper id="347">
       <author><first>Takahiro</first><last>Miyajima</last></author>
-      <author><first>Hideaki</first><last>Kikuchi</last></author>
-      <author><first>Katsuhiko</first><last>Shirai</last></author>
+      <author id="hideaki-kikuchi"><first>Hideaki</first><last>Kikuchi</last></author>
+      <author id="katsuhiko-shirai"><first>Katsuhiko</first><last>Shirai</last></author>
       <author><first>Shigeki</first><last>Okawa</last></author>
       <title>Method for Collection of Acted Speech Using Various Situation Scripts</title>
       <pages>1179–1182</pages>
@@ -3602,8 +3602,8 @@
       <bibkey>miyajima-etal-2012-method</bibkey>
     </paper>
     <paper id="348">
-      <author><first>Daan</first><last>Broeder</last></author>
-      <author><first>Dieter</first><last>van Uytvanck</last></author>
+      <author id="daan-broeder"><first>Daan</first><last>Broeder</last></author>
+      <author id="dieter-van-uytvanck"><first>Dieter</first><last>van Uytvanck</last></author>
       <author><first>Gunter</first><last>Senft</last></author>
       <title>Citing on-line Language Resources</title>
       <pages>1391–1394</pages>
@@ -3615,7 +3615,7 @@
       <author><first>Mohammed</first><last>Attia</last></author>
       <author><first>Khaled</first><last>Shaalan</last></author>
       <author><first>Lamia</first><last>Tounsi</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <title>Automatic Extraction and Evaluation of <fixed-case>A</fixed-case>rabic <fixed-case>LFG</fixed-case> Resources</title>
       <pages>1947–1954</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/609_Paper.pdf</url>
@@ -3623,7 +3623,7 @@
       <bibkey>attia-etal-2012-automatic</bibkey>
     </paper>
     <paper id="350">
-      <author><first>Matthieu</first><last>Constant</last></author>
+      <author id="matthieu-constant"><first>Matthieu</first><last>Constant</last></author>
       <author><first>Isabelle</first><last>Tellier</last></author>
       <title>Evaluating the Impact of External Lexical Resources into a <fixed-case>CRF</fixed-case>-based Multiword Segmenter and Part-of-Speech Tagger</title>
       <pages>646–650</pages>
@@ -3633,7 +3633,7 @@
     </paper>
     <paper id="351">
       <author><first>Brett</first><last>Drury</last></author>
-      <author><first>José João</first><last>Almeida</last></author>
+      <author id="jose-joao-almeida"><first>José João</first><last>Almeida</last></author>
       <title>The Minho Quotation Resource</title>
       <pages>2280–2285</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/611_Paper.pdf</url>
@@ -3705,7 +3705,7 @@
     <paper id="359">
       <author><first>Ahmet</first><last>Aker</last></author>
       <author><first>Evangelos</first><last>Kanoulas</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <title>A light way to collect comparable corpora from the Web</title>
       <pages>15–20</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/626_Paper.pdf</url>
@@ -3714,7 +3714,7 @@
     </paper>
     <paper id="360">
       <author><first>Maite</first><last>Melero</last></author>
-      <author><first>Marta R.</first><last>Costa-Jussà</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-Jussà</last></author>
       <author><first>Judith</first><last>Domingo</last></author>
       <author><first>Montse</first><last>Marquina</last></author>
       <author><first>Martí</first><last>Quixal</last></author>
@@ -3725,8 +3725,8 @@
       <bibkey>melero-etal-2012-holaaa</bibkey>
     </paper>
     <paper id="361">
-      <author><first>Danica</first><last>Damljanović</last></author>
-      <author><first>Udo</first><last>Kruschwitz</last></author>
+      <author id="danica-damljanovic"><first>Danica</first><last>Damljanović</last></author>
+      <author id="udo-kruschwitz"><first>Udo</first><last>Kruschwitz</last></author>
       <author><first>M-Dyaa</first><last>Albakour</last></author>
       <author><first>Johann</first><last>Petrak</last></author>
       <author><first>Mihai</first><last>Lupu</last></author>
@@ -3738,7 +3738,7 @@
     </paper>
     <paper id="362">
       <author><first>Marilisa</first><last>Amoia</last></author>
-      <author><first>Kerstin</first><last>Kunz</last></author>
+      <author id="kerstin-kunz"><first>Kerstin</first><last>Kunz</last></author>
       <author><first>Ekaterina</first><last>Lapshinova-Koltunski</last></author>
       <title>Coreference in Spoken vs. Written Texts: a Corpus-based Analysis</title>
       <pages>158–164</pages>
@@ -3747,9 +3747,9 @@
       <bibkey>amoia-etal-2012-coreference</bibkey>
     </paper>
     <paper id="363">
-      <author><first>Olivier</first><last>Boeffard</last></author>
+      <author id="olivier-boeffard"><first>Olivier</first><last>Boeffard</last></author>
       <author><first>Laure</first><last>Charonnat</last></author>
-      <author><first>Sébastien Le</first><last>Maguer</last></author>
+      <author id="sebastien-le-maguer"><first>Sébastien Le</first><last>Maguer</last></author>
       <author><first>Damien</first><last>Lolive</last></author>
       <title>Towards Fully Automatic Annotation of Audio Books for <fixed-case>TTS</fixed-case></title>
       <pages>975–980</pages>
@@ -3760,7 +3760,7 @@
     <paper id="364">
       <author><first>Ian</first><last>Lewin</last></author>
       <author><first>Şenay</first><last>Kafkas</last></author>
-      <author><first>Dietrich</first><last>Rebholz-Schuhmann</last></author>
+      <author id="dietrich-rebholz-schuhmann"><first>Dietrich</first><last>Rebholz-Schuhmann</last></author>
       <title><fixed-case>C</fixed-case>entroids: Gold standards with distributional variation</title>
       <pages>3894–3900</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/633_Paper.pdf</url>
@@ -3777,7 +3777,7 @@
       <bibkey>navarretta-paggio-2012-multimodal</bibkey>
     </paper>
     <paper id="366">
-      <author><first>David</first><last>Lewis</last></author>
+      <author id="david-d-lewis"><first>David</first><last>Lewis</last></author>
       <author><first>Alexander</first><last>O’Connor</last></author>
       <author><first>Andrzej</first><last>Zydroń</last></author>
       <author><first>Gerd</first><last>Sjögren</last></author>
@@ -3810,9 +3810,9 @@
     </paper>
     <paper id="369">
       <author><first>Costanza</first><last>Navarretta</last></author>
-      <author><first>Elisabeth</first><last>Ahlsén</last></author>
+      <author id="elisabeth-ahlsen"><first>Elisabeth</first><last>Ahlsén</last></author>
       <author><first>Jens</first><last>Allwood</last></author>
-      <author><first>Kristiina</first><last>Jokinen</last></author>
+      <author id="kristiina-jokinen"><first>Kristiina</first><last>Jokinen</last></author>
       <author><first>Patrizia</first><last>Paggio</last></author>
       <title>Feedback in <fixed-case>N</fixed-case>ordic First-Encounters: a Comparative Study</title>
       <pages>2494–2499</pages>
@@ -3834,7 +3834,7 @@
     </paper>
     <paper id="371">
       <author><first>Yu</first><last>Chen</last></author>
-      <author><first>Andreas</first><last>Eisele</last></author>
+      <author id="andreas-eisele"><first>Andreas</first><last>Eisele</last></author>
       <title><fixed-case>M</fixed-case>ulti<fixed-case>UN</fixed-case> v2: <fixed-case>UN</fixed-case> Documents with Multilingual Alignments</title>
       <pages>2500–2504</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/641_Paper.pdf</url>
@@ -3852,7 +3852,7 @@
     </paper>
     <paper id="373">
       <author><first>Gracinda</first><last>Carvalho</last></author>
-      <author><first>David Martins</first><last>de Matos</last></author>
+      <author id="david-martins-de-matos"><first>David Martins</first><last>de Matos</last></author>
       <author><first>Vitor</first><last>Rocio</last></author>
       <title>Building and Exploring Semantic Equivalences Resources</title>
       <pages>2038–2042</pages>
@@ -3861,7 +3861,7 @@
       <bibkey>carvalho-etal-2012-building</bibkey>
     </paper>
     <paper id="374">
-      <author><first>Septina Dian</first><last>Larasati</last></author>
+      <author id="septina-dian-larasati"><first>Septina Dian</first><last>Larasati</last></author>
       <title><fixed-case>IDENTIC</fixed-case> Corpus: Morphologically Enriched <fixed-case>I</fixed-case>ndonesian-<fixed-case>E</fixed-case>nglish Parallel Corpus</title>
       <pages>902–906</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/644_Paper.pdf</url>
@@ -3869,8 +3869,8 @@
       <bibkey>larasati-2012-identic</bibkey>
     </paper>
     <paper id="375">
-      <author><first>Ondřej</first><last>Bojar</last></author>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
       <author><first>Ondřej</first><last>Dušek</last></author>
       <author><first>Petra</first><last>Galuščáková</last></author>
       <author><first>Martin</first><last>Majliš</last></author>
@@ -3887,7 +3887,7 @@
     </paper>
     <paper id="376">
       <author><first>Kais</first><last>Dukes</last></author>
-      <author><first>Eric</first><last>Atwell</last></author>
+      <author id="eric-atwell"><first>Eric</first><last>Atwell</last></author>
       <title><fixed-case>LAMP</fixed-case>: A Multimodal Web Platform for Collaborative Linguistic Analysis</title>
       <pages>3268–3275</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/646_Paper.pdf</url>
@@ -3904,7 +3904,7 @@
       <bibkey>ogrodniczuk-lenart-2012-web</bibkey>
     </paper>
     <paper id="378">
-      <author><first>Casey Redd</first><last>Kennington</last></author>
+      <author id="casey-kennington"><first>Casey Redd</first><last>Kennington</last></author>
       <author><first>Martin</first><last>Kay</last></author>
       <author><first>Annemarie</first><last>Friedrich</last></author>
       <title>Suffix Trees as Language Models</title>
@@ -3914,9 +3914,9 @@
       <bibkey>kennington-etal-2012-suffix</bibkey>
     </paper>
     <paper id="379">
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <author><first>Vlad</first><last>Niculae</last></author>
-      <author><first>Octavia-Maria</first><last>Şulea</last></author>
+      <author id="octavia-maria-sulea"><first>Octavia-Maria</first><last>Şulea</last></author>
       <title>The <fixed-case>R</fixed-case>omanian Neuter Examined Through A Two-Gender N-Gram Classification System</title>
       <pages>907–910</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/651_Paper.pdf</url>
@@ -3926,7 +3926,7 @@
     <paper id="380">
       <author><first>Soojeong</first><last>Eom</last></author>
       <author><first>Markus</first><last>Dickinson</last></author>
-      <author><first>Graham</first><last>Katz</last></author>
+      <author id="graham-katz"><first>Graham</first><last>Katz</last></author>
       <title>Using semi-experts to derive judgments on word sense alignment: a pilot study</title>
       <pages>605–611</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/652_Paper.pdf</url>
@@ -3945,7 +3945,7 @@
       <author><first>Dawn</first><last>Lawrie</last></author>
       <author><first>James</first><last>Mayfield</last></author>
       <author><first>Paul</first><last>McNamee</last></author>
-      <author><first>Douglas</first><last>Oard</last></author>
+      <author id="douglas-w-oard"><first>Douglas</first><last>Oard</last></author>
       <title>Creating and Curating a Cross-Language Person-Entity Linking Collection</title>
       <pages>3106–3110</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/655_Paper.pdf</url>
@@ -3954,7 +3954,7 @@
     </paper>
     <paper id="383">
       <author><first>Marc</first><last>Verhagen</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <title>The <fixed-case>TARSQI</fixed-case> Toolkit</title>
       <pages>2043–2048</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/656_Paper.pdf</url>
@@ -3974,7 +3974,7 @@
       <author><first>Jonathan</first><last>Wright</last></author>
       <author><first>Kira</first><last>Griffitt</last></author>
       <author><first>Joe</first><last>Ellis</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <author><first>Brendan</first><last>Callahan</last></author>
       <title>Annotation Trees: <fixed-case>LDC</fixed-case>’s customizable, extensible, scalable, annotation infrastructure</title>
       <pages>479–485</pages>
@@ -3992,7 +3992,7 @@
     </paper>
     <paper id="387">
       <author><first>Rania</first><last>Al-Sabbagh</last></author>
-      <author><first>Roxana</first><last>Girju</last></author>
+      <author id="roxana-girju"><first>Roxana</first><last>Girju</last></author>
       <title><fixed-case>YADAC</fixed-case>: Yet another Dialectal <fixed-case>A</fixed-case>rabic Corpus</title>
       <pages>2882–2889</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/663_Paper.pdf</url>
@@ -4011,11 +4011,11 @@
       <bibkey>clarke-etal-2012-nlp</bibkey>
     </paper>
     <paper id="389">
-      <author><first>Luís</first><last>Marujo</last></author>
+      <author id="luis-marujo"><first>Luís</first><last>Marujo</last></author>
       <author><first>Anatole</first><last>Gershman</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
-      <author><first>Robert</first><last>Frederking</last></author>
-      <author><first>João P.</first><last>Neto</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
+      <author id="robert-frederking"><first>Robert</first><last>Frederking</last></author>
+      <author id="joao-p-neto"><first>João P.</first><last>Neto</last></author>
       <title>Supervised Topical Key Phrase Extraction of News Stories using Crowdsourcing, Light Filtering and Co-reference Normalization</title>
       <pages>399–403</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/672_Paper.pdf</url>
@@ -4034,8 +4034,8 @@
     </paper>
     <paper id="391">
       <author><first>Marta</first><last>Recasens</last></author>
-      <author><first>M. Antònia</first><last>Martí</last></author>
-      <author><first>Constantin</first><last>Orasan</last></author>
+      <author id="m-antonia-marti"><first>M. Antònia</first><last>Martí</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orasan</last></author>
       <title>Annotating Near-Identity from Coreference Disagreements</title>
       <pages>165–172</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/674_Paper.pdf</url>
@@ -4071,7 +4071,7 @@
       <bibkey>schumann-2012-knowledge</bibkey>
     </paper>
     <paper id="395">
-      <author><first>Gözde</first><last>Özbal</last></author>
+      <author id="gozde-ozbal"><first>Gözde</first><last>Özbal</last></author>
       <author><first>Carlo</first><last>Strapparava</last></author>
       <author><first>Marco</first><last>Guerini</last></author>
       <title>Brand Pitt: A Corpus to Explore the Art of Naming</title>
@@ -4081,8 +4081,8 @@
       <bibkey>ozbal-etal-2012-brand</bibkey>
     </paper>
     <paper id="396">
-      <author><first>Orphée</first><last>De Clercq</last></author>
-      <author><first>Veronique</first><last>Hoste</last></author>
+      <author id="orphee-de-clercq"><first>Orphée</first><last>De Clercq</last></author>
+      <author id="veronique-hoste"><first>Veronique</first><last>Hoste</last></author>
       <author><first>Paola</first><last>Monachesi</last></author>
       <title>Evaluating automatic cross-domain <fixed-case>D</fixed-case>utch semantic role annotation</title>
       <pages>88–93</pages>
@@ -4093,9 +4093,9 @@
     <paper id="397">
       <author><first>Thierry</first><last>Bazillon</last></author>
       <author><first>Melanie</first><last>Deplano</last></author>
-      <author><first>Frederic</first><last>Bechet</last></author>
+      <author id="frederic-bechet"><first>Frederic</first><last>Bechet</last></author>
       <author><first>Alexis</first><last>Nasr</last></author>
-      <author><first>Benoit</first><last>Favre</last></author>
+      <author id="benoit-favre"><first>Benoit</first><last>Favre</last></author>
       <title>Syntactic annotation of spontaneous speech: application to call-center conversation data</title>
       <pages>1338–1342</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/682_Paper.pdf</url>
@@ -4113,12 +4113,12 @@
       <bibkey>hong-etal-2012-korean</bibkey>
     </paper>
     <paper id="399">
-      <author><first>Frederic</first><last>Bechet</last></author>
+      <author id="frederic-bechet"><first>Frederic</first><last>Bechet</last></author>
       <author><first>Benjamin</first><last>Maza</last></author>
       <author><first>Nicolas</first><last>Bigouroux</last></author>
       <author><first>Thierry</first><last>Bazillon</last></author>
-      <author><first>Marc</first><last>El-Bèze</last></author>
-      <author><first>Renato</first><last>De Mori</last></author>
+      <author id="marc-el-beze"><first>Marc</first><last>El-Bèze</last></author>
+      <author id="renato-de-mori"><first>Renato</first><last>De Mori</last></author>
       <author><first>Eric</first><last>Arbillot</last></author>
       <title><fixed-case>DECODA</fixed-case>: a call-centre human-human spoken conversation corpus</title>
       <pages>1343–1347</pages>
@@ -4169,15 +4169,15 @@
       <bibkey>akiba-etal-2012-designing</bibkey>
     </paper>
     <paper id="404">
-      <author><first>Antonio</first><last>Moreno-Sandoval</last></author>
-      <author><first>Leonardo Campillos</first><last>Llanos</last></author>
+      <author id="antonio-moreno-sandoval"><first>Antonio</first><last>Moreno-Sandoval</last></author>
+      <author id="leonardo-campillos-llanos"><first>Leonardo Campillos</first><last>Llanos</last></author>
       <author><first>Yang</first><last>Dong</last></author>
       <author><first>Emi</first><last>Takamori</last></author>
-      <author><first>José M.</first><last>Guirao</last></author>
+      <author id="jose-m-guirao"><first>José M.</first><last>Guirao</last></author>
       <author><first>Paula</first><last>Gozalo</last></author>
       <author><first>Chieko</first><last>Kimura</last></author>
       <author><first>Kengo</first><last>Matsui</last></author>
-      <author><first>Marta</first><last>Garrote-Salazar</last></author>
+      <author id="marta-garrote-salazar"><first>Marta</first><last>Garrote-Salazar</last></author>
       <title>Spontaneous Speech Corpora for language learners of <fixed-case>S</fixed-case>panish, <fixed-case>C</fixed-case>hinese and <fixed-case>J</fixed-case>apanese</title>
       <pages>2695–2701</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/697_Paper.pdf</url>
@@ -4186,7 +4186,7 @@
     </paper>
     <paper id="405">
       <author><first>Anthony</first><last>Rousseau</last></author>
-      <author><first>Paul</first><last>Deléglise</last></author>
+      <author id="paul-deleglise"><first>Paul</first><last>Deléglise</last></author>
       <author><first>Yannick</first><last>Estève</last></author>
       <title><fixed-case>TED</fixed-case>-<fixed-case>LIUM</fixed-case>: an Automatic Speech Recognition dedicated corpus</title>
       <pages>125–129</pages>
@@ -4203,11 +4203,11 @@
       <bibkey>petasis-2012-sync3</bibkey>
     </paper>
     <paper id="407">
-      <author><first>Valérie</first><last>Mapelli</last></author>
+      <author id="valerie-mapelli"><first>Valérie</first><last>Mapelli</last></author>
       <author><first>Victoria</first><last>Arranz</last></author>
       <author><first>Matthieu</first><last>Carré</last></author>
       <author><first>Hélène</first><last>Mazo</last></author>
-      <author><first>Djamel</first><last>Mostefa</last></author>
+      <author id="djamel-mostefa"><first>Djamel</first><last>Mostefa</last></author>
       <author><first>Khalid</first><last>Choukri</last></author>
       <title><fixed-case>ELRA</fixed-case> in the heart of a cooperative <fixed-case>HLT</fixed-case> world</title>
       <pages>55–59</pages>
@@ -4218,7 +4218,7 @@
     <paper id="408">
       <author><first>Patrik</first><last>Lambert</last></author>
       <author><first>Holger</first><last>Schwenk</last></author>
-      <author><first>Frédéric</first><last>Blain</last></author>
+      <author id="frederic-blain"><first>Frédéric</first><last>Blain</last></author>
       <title>Automatic Translation of Scientific Documents in the <fixed-case>HAL</fixed-case> Archive</title>
       <pages>3933–3936</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/703_Paper.pdf</url>
@@ -4239,7 +4239,7 @@
     <paper id="410">
       <author><first>Aude</first><last>Giraudel</last></author>
       <author><first>Matthieu</first><last>Carré</last></author>
-      <author><first>Valérie</first><last>Mapelli</last></author>
+      <author id="valerie-mapelli"><first>Valérie</first><last>Mapelli</last></author>
       <author><first>Juliette</first><last>Kahn</last></author>
       <author><first>Olivier</first><last>Galibert</last></author>
       <author><first>Ludovic</first><last>Quintard</last></author>
@@ -4251,7 +4251,7 @@
     </paper>
     <paper id="411">
       <author><first>Andrea</first><last>Gesmundo</last></author>
-      <author><first>Tanja</first><last>Samardžić</last></author>
+      <author id="tanja-samardzic"><first>Tanja</first><last>Samardžić</last></author>
       <title>Lemmatising <fixed-case>S</fixed-case>erbian as Category Tagging with Bidirectional Sequence Classification</title>
       <pages>2103–2106</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/708_Paper.pdf</url>
@@ -4269,7 +4269,7 @@
     </paper>
     <paper id="413">
       <author><first>David</first><last>Tavarez</last></author>
-      <author><first>Eva</first><last>Navas</last></author>
+      <author id="eva-navas"><first>Eva</first><last>Navas</last></author>
       <author><first>Daniel</first><last>Erro</last></author>
       <author><first>Ibon</first><last>Saratxaga</last></author>
       <title>Strategies to Improve a Speaker Diarisation Tool</title>
@@ -4301,7 +4301,7 @@
       <author><first>Alistair</first><last>Conkie</last></author>
       <author><first>Thomas</first><last>Okken</last></author>
       <author><first>Yeon-Jun</first><last>Kim</last></author>
-      <author><first>Giuseppe</first><last>Di Fabbrizio</last></author>
+      <author id="giuseppe-di-fabbrizio"><first>Giuseppe</first><last>Di Fabbrizio</last></author>
       <title>Building Text-To-Speech Voices in the Cloud</title>
       <pages>3317–3321</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/716_Paper.pdf</url>
@@ -4319,7 +4319,7 @@
     </paper>
     <paper id="418">
       <author><first>Daša</first><last>Berović</last></author>
-      <author><first>Željko</first><last>Agić</last></author>
+      <author id="zeljko-agic"><first>Željko</first><last>Agić</last></author>
       <author><first>Marko</first><last>Tadić</last></author>
       <title><fixed-case>C</fixed-case>roatian Dependency Treebank: Recent Development and Initial Experiments</title>
       <pages>1902–1906</pages>
@@ -4328,7 +4328,7 @@
       <bibkey>berovic-etal-2012-croatian</bibkey>
     </paper>
     <paper id="419">
-      <author><first>Tina</first><last>Kluewer</last></author>
+      <author id="tina-kluwer"><first>Tina</first><last>Kluewer</last></author>
       <author><first>Feiyu</first><last>Xu</last></author>
       <author><first>Peter</first><last>Adolphs</last></author>
       <author><first>Hans</first><last>Uszkoreit</last></author>
@@ -4340,7 +4340,7 @@
     </paper>
     <paper id="420">
       <author><first>Marta</first><last>Villegas</last></author>
-      <author><first>Nuria</first><last>Bel</last></author>
+      <author id="nuria-bel"><first>Nuria</first><last>Bel</last></author>
       <author><first>Carlos</first><last>Gonzalo</last></author>
       <author><first>Amparo</first><last>Moreno</last></author>
       <author><first>Nuria</first><last>Simelio</last></author>
@@ -4352,7 +4352,7 @@
     </paper>
     <paper id="421">
       <author><first>Petya</first><last>Osenova</last></author>
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <author><first>Laska</first><last>Laskova</last></author>
       <author><first>Stanislava</first><last>Kancheva</last></author>
       <title>A Treebank-driven Creation of an <fixed-case>O</fixed-case>nto<fixed-case>V</fixed-case>alence Verb lexicon for <fixed-case>B</fixed-case>ulgarian</title>
@@ -4375,7 +4375,7 @@
       <author><first>Shaohua</first><last>Yang</last></author>
       <author><first>Hai</first><last>Zhao</last></author>
       <author><first>Xiaolin</first><last>Wang</last></author>
-      <author><first>Bao-liang</first><last>Lu</last></author>
+      <author id="bao-liang-lu"><first>Bao-liang</first><last>Lu</last></author>
       <title>Spell Checking for <fixed-case>C</fixed-case>hinese</title>
       <pages>730–736</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/727_Paper.pdf</url>
@@ -4383,7 +4383,7 @@
       <bibkey>yang-etal-2012-spell</bibkey>
     </paper>
     <paper id="424">
-      <author><first>Zahurul</first><last>Islam</last></author>
+      <author id="zahurul-islam"><first>Zahurul</first><last>Islam</last></author>
       <author><first>Alexander</first><last>Mehler</last></author>
       <title>Customization of the <fixed-case>E</fixed-case>uroparl Corpus for Translation Studies</title>
       <pages>2505–2510</pages>
@@ -4393,7 +4393,7 @@
     </paper>
     <paper id="425">
       <author><first>Carlo</first><last>Strapparava</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <author><first>Alberto</first><last>Battocchi</last></author>
       <title>A Parallel Corpus of Music and Lyrics Annotated with Emotions</title>
       <pages>2343–2346</pages>
@@ -4404,7 +4404,7 @@
     <paper id="426">
       <author><first>Elisa</first><last>Bianchi</last></author>
       <author><first>Mirko</first><last>Tavosanis</last></author>
-      <author><first>Emiliano</first><last>Giovannetti</last></author>
+      <author id="emiliano-giovannetti"><first>Emiliano</first><last>Giovannetti</last></author>
       <title>Creation of a bottom-up corpus-based ontology for <fixed-case>I</fixed-case>talian Linguistics</title>
       <pages>2641–2647</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/732_Paper.pdf</url>
@@ -4422,13 +4422,13 @@
     </paper>
     <paper id="428">
       <author><first>Carmen</first><last>Dayrell</last></author>
-      <author><first>Arnaldo</first><last>Candido Jr.</last></author>
+      <author id="arnaldo-candido-jr"><first>Arnaldo</first><last>Candido Jr.</last></author>
       <author><first>Gabriel</first><last>Lima</last></author>
       <author><first>Danilo</first><last>Machado Jr.</last></author>
       <author><first>Ann</first><last>Copestake</last></author>
-      <author><first>Valéria</first><last>Feltrim</last></author>
+      <author id="valeria-delisandra-feltrim"><first>Valéria</first><last>Feltrim</last></author>
       <author><first>Stella</first><last>Tagnin</last></author>
-      <author><first>Sandra</first><last>Aluisio</last></author>
+      <author id="sandra-aluisio"><first>Sandra</first><last>Aluisio</last></author>
       <title>Rhetorical Move Detection in <fixed-case>E</fixed-case>nglish Abstracts: Multi-label Sentence Classifiers and their Annotated Corpora</title>
       <pages>1604–1609</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/734_Paper.pdf</url>
@@ -4462,7 +4462,7 @@
       <bibkey>lis-2012-polish</bibkey>
     </paper>
     <paper id="432">
-      <author><first>Annelies</first><last>Braffort</last></author>
+      <author id="annelies-braffort"><first>Annelies</first><last>Braffort</last></author>
       <author><first>Leïla</first><last>Boutora</last></author>
       <title><fixed-case>DEGELS</fixed-case>1: A comparable corpus of <fixed-case>F</fixed-case>rench <fixed-case>S</fixed-case>ign <fixed-case>L</fixed-case>anguage and co-speech gestures</title>
       <pages>2426–2429</pages>
@@ -4473,7 +4473,7 @@
     <paper id="433">
       <author><first>Matilde</first><last>Gonzalez</last></author>
       <author><first>Michael</first><last>Filhol</last></author>
-      <author><first>Christophe</first><last>Collet</last></author>
+      <author id="christophe-collet"><first>Christophe</first><last>Collet</last></author>
       <title>Semi-Automatic Sign Language Corpora Annotation using Lexical Representations of Signs</title>
       <pages>2430–2434</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/741_Paper.pdf</url>
@@ -4490,19 +4490,19 @@
       <bibkey>iliev-genov-2012-expanding</bibkey>
     </paper>
     <paper id="435">
-      <author><first>Andrejs</first><last>Vasiļjevs</last></author>
+      <author id="andrejs-vasiljevs"><first>Andrejs</first><last>Vasiļjevs</last></author>
       <author><first>Markus</first><last>Forsberg</last></author>
       <author><first>Tatiana</first><last>Gornostay</last></author>
-      <author><first>Dorte Haltrup</first><last>Hansen</last></author>
+      <author id="dorte-haltrup-hansen"><first>Dorte Haltrup</first><last>Hansen</last></author>
       <author><first>Kristín</first><last>Jóhannsdóttir</last></author>
-      <author><first>Gunn</first><last>Lyse</last></author>
-      <author><first>Krister</first><last>Lindén</last></author>
+      <author id="gunn-inger-lyse"><first>Gunn</first><last>Lyse</last></author>
+      <author id="krister-linden"><first>Krister</first><last>Lindén</last></author>
       <author><first>Lene</first><last>Offersgaard</last></author>
       <author><first>Sussi</first><last>Olsen</last></author>
-      <author><first>Bolette</first><last>Pedersen</last></author>
-      <author><first>Eiríkur</first><last>Rögnvaldsson</last></author>
-      <author><first>Inguna</first><last>Skadiņa</last></author>
-      <author><first>Koenraad</first><last>De Smedt</last></author>
+      <author id="bolette-sandford-pedersen"><first>Bolette</first><last>Pedersen</last></author>
+      <author id="eirikur-rognvaldsson"><first>Eiríkur</first><last>Rögnvaldsson</last></author>
+      <author id="inguna-skadina"><first>Inguna</first><last>Skadiņa</last></author>
+      <author id="koenraad-de-smedt"><first>Koenraad</first><last>De Smedt</last></author>
       <author><first>Ville</first><last>Oksanen</last></author>
       <author><first>Roberts</first><last>Rozis</last></author>
       <title>Creation of an Open Shared Language Resource Repository in the <fixed-case>N</fixed-case>ordic and <fixed-case>B</fixed-case>altic Countries</title>
@@ -4526,7 +4526,7 @@
     <paper id="437">
       <author><first>Martin</first><last>Reynaert</last></author>
       <author><first>Ineke</first><last>Schuurman</last></author>
-      <author><first>Véronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Véronique</first><last>Hoste</last></author>
       <author><first>Nelleke</first><last>Oostdijk</last></author>
       <author><first>Maarten</first><last>van Gompel</last></author>
       <title>Beyond <fixed-case>S</fixed-case>o<fixed-case>N</fixed-case>a<fixed-case>R</fixed-case>: towards the facilitation of large corpus building efforts</title>
@@ -4536,15 +4536,15 @@
       <bibkey>reynaert-etal-2012-beyond</bibkey>
     </paper>
     <paper id="438">
-      <author><first>Fabrice</first><last>Lefèvre</last></author>
-      <author><first>Djamel</first><last>Mostefa</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="fabrice-lefevre"><first>Fabrice</first><last>Lefèvre</last></author>
+      <author id="djamel-mostefa"><first>Djamel</first><last>Mostefa</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <author><first>Yannick</first><last>Estève</last></author>
-      <author><first>Matthieu</first><last>Quignard</last></author>
+      <author id="matthieu-quignard"><first>Matthieu</first><last>Quignard</last></author>
       <author><first>Nathalie</first><last>Camelin</last></author>
-      <author><first>Benoit</first><last>Favre</last></author>
+      <author id="benoit-favre"><first>Benoit</first><last>Favre</last></author>
       <author><first>Bassam</first><last>Jabaian</last></author>
-      <author><first>Lina M.</first><last>Rojas-Barahona</last></author>
+      <author id="lina-m-rojas-barahona"><first>Lina M.</first><last>Rojas-Barahona</last></author>
       <title>Leveraging study of robustness and portability of spoken language understanding systems across languages and domains: the <fixed-case>PORTMEDIA</fixed-case> corpora</title>
       <pages>1436–1442</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/751_Paper.pdf</url>
@@ -4554,7 +4554,7 @@
     <paper id="439">
       <author><first>Rahul</first><last>Agarwal</last></author>
       <author><first>Bharat Ram</first><last>Ambati</last></author>
-      <author><first>Anil Kumar</first><last>Singh</last></author>
+      <author id="anil-kumar-singh"><first>Anil Kumar</first><last>Singh</last></author>
       <title>A <fixed-case>GUI</fixed-case> to Detect and Correct Errors in <fixed-case>H</fixed-case>indi Dependency Treebank</title>
       <pages>1907–1911</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/753_Paper.pdf</url>
@@ -4562,8 +4562,8 @@
       <bibkey>agarwal-etal-2012-gui</bibkey>
     </paper>
     <paper id="440">
-      <author><first>Jordi</first><last>Atserias</last></author>
-      <author><first>Maria</first><last>Fuentes</last></author>
+      <author id="jordi-atserias"><first>Jordi</first><last>Atserias</last></author>
+      <author id="maria-fuentes"><first>Maria</first><last>Fuentes</last></author>
       <author><first>Rogelio</first><last>Nazar</last></author>
       <author><first>Irene</first><last>Renau</last></author>
       <title>Spell Checking in <fixed-case>S</fixed-case>panish: The Case of Diacritic Accents</title>
@@ -4589,7 +4589,7 @@
     <paper id="442">
       <author><first>Liesbeth</first><last>Augustinus</last></author>
       <author><first>Vincent</first><last>Vandeghinste</last></author>
-      <author><first>Frank</first><last>Van Eynde</last></author>
+      <author id="frank-van-eynde"><first>Frank</first><last>Van Eynde</last></author>
       <title>Example-Based Treebank Querying</title>
       <pages>3161–3167</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/756_Paper.pdf</url>
@@ -4635,10 +4635,10 @@
       <bibkey>bott-etal-2012-text</bibkey>
     </paper>
     <paper id="447">
-      <author><first>Elisabet</first><last>Comelles</last></author>
-      <author><first>Jordi</first><last>Atserias</last></author>
+      <author id="elisabet-comelles"><first>Elisabet</first><last>Comelles</last></author>
+      <author id="jordi-atserias"><first>Jordi</first><last>Atserias</last></author>
       <author><first>Victoria</first><last>Arranz</last></author>
-      <author><first>Irene</first><last>Castellón</last></author>
+      <author id="irene-castellon"><first>Irene</first><last>Castellón</last></author>
       <title><fixed-case>VERT</fixed-case>a: Linguistic features in <fixed-case>MT</fixed-case> evaluation</title>
       <pages>3944–3950</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/763_Paper.pdf</url>
@@ -4649,7 +4649,7 @@
       <author><first>Atro</first><last>Voutilainen</last></author>
       <author><first>Kristiina</first><last>Muhonen</last></author>
       <author><first>Tanja</first><last>Purtonen</last></author>
-      <author><first>Krister</first><last>Lindén</last></author>
+      <author id="krister-linden"><first>Krister</first><last>Lindén</last></author>
       <title>Specifying Treebanks, Outsourcing Parsebanks: <fixed-case>F</fixed-case>inn<fixed-case>T</fixed-case>ree<fixed-case>B</fixed-case>ank 3</title>
       <pages>1927–1931</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/766_Paper.pdf</url>
@@ -4668,7 +4668,7 @@
     </paper>
     <paper id="450">
       <author><first>Nicoletta</first><last>Calzolari</last></author>
-      <author><first>Riccardo</first><last>Del Gratta</last></author>
+      <author id="riccardo-del-gratta"><first>Riccardo</first><last>Del Gratta</last></author>
       <author><first>Gil</first><last>Francopoulo</last></author>
       <author><first>Joseph</first><last>Mariani</last></author>
       <author><first>Francesco</first><last>Rubino</last></author>
@@ -4681,8 +4681,8 @@
       <bibkey>calzolari-etal-2012-lre</bibkey>
     </paper>
     <paper id="451">
-      <author><first>Corina</first><last>Forăscu</last></author>
-      <author><first>Dan</first><last>Tufiş</last></author>
+      <author id="corina-forascu"><first>Corina</first><last>Forăscu</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufiş</last></author>
       <title><fixed-case>R</fixed-case>omanian <fixed-case>T</fixed-case>ime<fixed-case>B</fixed-case>ank: An Annotated Parallel Corpus for Temporal Information</title>
       <pages>3762–3766</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/770_Paper.pdf</url>
@@ -4690,7 +4690,7 @@
       <bibkey>forascu-tufis-2012-romanian</bibkey>
     </paper>
     <paper id="452">
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Yashar</first><last>Mehdad</last></author>
       <author><first>Alessandro</first><last>Marchetti</last></author>
       <author><first>Danilo</first><last>Giampiccolo</last></author>
@@ -4702,8 +4702,8 @@
       <bibkey>negri-etal-2012-chinese</bibkey>
     </paper>
     <paper id="453">
-      <author><first>Janne Bondi</first><last>Johannessen</last></author>
-      <author><first>Joel</first><last>Priestley</last></author>
+      <author id="janne-bondi-johannessen"><first>Janne Bondi</first><last>Johannessen</last></author>
+      <author id="joel-priestley"><first>Joel</first><last>Priestley</last></author>
       <author><first>Kristin</first><last>Hagen</last></author>
       <author><first>Anders</first><last>Nøklestad</last></author>
       <author><first>André</first><last>Lynum</last></author>
@@ -4715,10 +4715,10 @@
     </paper>
     <paper id="454">
       <author><first>Jonathon</first><last>Read</last></author>
-      <author><first>Dan</first><last>Flickinger</last></author>
+      <author id="dan-flickinger"><first>Dan</first><last>Flickinger</last></author>
       <author><first>Rebecca</first><last>Dridan</last></author>
       <author><first>Stephan</first><last>Oepen</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <title>The <fixed-case>W</fixed-case>e<fixed-case>S</fixed-case>earch Corpus, Treebank, and Treecache – A Comprehensive Sample of User-Generated Content</title>
       <pages>1829–1835</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/774_Paper.pdf</url>
@@ -4728,7 +4728,7 @@
     <paper id="455">
       <author><first>František</first><last>Cvrček</last></author>
       <author><first>Karel</first><last>Pala</last></author>
-      <author><first>Pavel</first><last>Rychlý</last></author>
+      <author id="pavel-rychly"><first>Pavel</first><last>Rychlý</last></author>
       <title>Legal electronic dictionary for <fixed-case>C</fixed-case>zech</title>
       <pages>283–287</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/775_Paper.pdf</url>
@@ -4739,7 +4739,7 @@
       <author><first>Thomas</first><last>Kaspersson</last></author>
       <author><first>Christian</first><last>Smith</last></author>
       <author><first>Henrik</first><last>Danielsson</last></author>
-      <author><first>Arne</first><last>Jönsson</last></author>
+      <author id="arne-jonsson"><first>Arne</first><last>Jönsson</last></author>
       <title>This also affects the context - Errors in extraction based summaries</title>
       <pages>173–178</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/776_Paper.pdf</url>
@@ -4748,7 +4748,7 @@
     </paper>
     <paper id="457">
       <author><first>Claudia</first><last>Soria</last></author>
-      <author><first>Núria</first><last>Bel</last></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last></author>
       <author><first>Khalid</first><last>Choukri</last></author>
       <author><first>Joseph</first><last>Mariani</last></author>
       <author><first>Monica</first><last>Monachini</last></author>
@@ -4784,7 +4784,7 @@
     <paper id="460">
       <author><first>Jörg</first><last>Frommer</last></author>
       <author><first>Bernd</first><last>Michaelis</last></author>
-      <author><first>Dietmar</first><last>Rösner</last></author>
+      <author id="dietmar-rosner"><first>Dietmar</first><last>Rösner</last></author>
       <author><first>Andreas</first><last>Wendemuth</last></author>
       <author><first>Rafael</first><last>Friesen</last></author>
       <author><first>Matthias</first><last>Haase</last></author>
@@ -4802,7 +4802,7 @@
     <paper id="461">
       <author><first>Attila</first><last>Zséder</last></author>
       <author><first>Gábor</first><last>Recski</last></author>
-      <author><first>Dániel</first><last>Varga</last></author>
+      <author id="daniel-varga"><first>Dániel</first><last>Varga</last></author>
       <author><first>András</first><last>Kornai</last></author>
       <title>Rapid creation of large-scale corpora and frequency dictionaries</title>
       <pages>1462–1465</pages>
@@ -4824,7 +4824,7 @@
       <author><first>Safa</first><last>Ismael</last></author>
       <author><first>Stephen</first><last>Grimes</last></author>
       <author><first>David</first><last>Doermann</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <title>Linguistic Resources for Handwriting Recognition and Translation Evaluation</title>
       <pages>3951–3955</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/785_Paper.pdf</url>
@@ -4841,10 +4841,10 @@
       <bibkey>origlia-alfano-2012-prosomarker</bibkey>
     </paper>
     <paper id="465">
-      <author><first>Helmer</first><last>Strik</last></author>
+      <author id="helmer-strik"><first>Helmer</first><last>Strik</last></author>
       <author><first>Jozef</first><last>Colpaert</last></author>
       <author><first>Joost</first><last>van Doremalen</last></author>
-      <author><first>Catia</first><last>Cucchiarini</last></author>
+      <author id="catia-cucchiarini"><first>Catia</first><last>Cucchiarini</last></author>
       <title>The <fixed-case>DISCO</fixed-case> <fixed-case>ASR</fixed-case>-based <fixed-case>CALL</fixed-case> system: practicing <fixed-case>L</fixed-case>2 oral skills and beyond</title>
       <pages>2702–2707</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/787_Paper.pdf</url>
@@ -4853,7 +4853,7 @@
     </paper>
     <paper id="466">
       <author><first>Utku</first><last>Şirin</last></author>
-      <author><first>Ruket</first><last>Çakıcı</last></author>
+      <author id="ruket-cakici"><first>Ruket</first><last>Çakıcı</last></author>
       <author><first>Deniz</first><last>Zeyrek</last></author>
       <title><fixed-case>METU</fixed-case> <fixed-case>T</fixed-case>urkish Discourse Bank Browser</title>
       <pages>2808–2812</pages>
@@ -4887,7 +4887,7 @@
       <bibkey>quarteroni-etal-2012-evaluating</bibkey>
     </paper>
     <paper id="469">
-      <author><first>Maria Teresa</first><last>Pazienza</last></author>
+      <author id="maria-teresa-pazienza"><first>Maria Teresa</first><last>Pazienza</last></author>
       <author><first>Noemi</first><last>Scarpato</last></author>
       <author><first>Armando</first><last>Stellato</last></author>
       <title>Application of a Semantic Search Algorithm to Semi-Automatic <fixed-case>GUI</fixed-case> Generation</title>
@@ -4897,9 +4897,9 @@
       <bibkey>pazienza-etal-2012-application</bibkey>
     </paper>
     <paper id="470">
-      <author><first>Vanja Mladen</first><last>Karan</last></author>
+      <author id="vanja-m-karan"><first>Vanja Mladen</first><last>Karan</last></author>
       <author><first>Jan</first><last>Šnajder</last></author>
-      <author><first>Bojana Dalbelo</first><last>Bašić</last></author>
+      <author id="bojana-dalbelo-basic"><first>Bojana Dalbelo</first><last>Bašić</last></author>
       <title>Evaluation of Classification Algorithms and Features for Collocation Extraction in <fixed-case>C</fixed-case>roatian</title>
       <pages>657–662</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/796_Paper.pdf</url>
@@ -4938,7 +4938,7 @@
     </paper>
     <paper id="474">
       <author><first>Benjamin</first><last>Weitz</last></author>
-      <author><first>Ulrich</first><last>Schäfer</last></author>
+      <author id="ulrich-schafer"><first>Ulrich</first><last>Schäfer</last></author>
       <title>A Graphical Citation Browser for the <fixed-case>ACL</fixed-case> <fixed-case>A</fixed-case>nthology</title>
       <pages>1718–1722</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/805_Paper.pdf</url>
@@ -4956,7 +4956,7 @@
       <bibkey>wattam-etal-2012-document</bibkey>
     </paper>
     <paper id="476">
-      <author><first>Anja</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anja</first><last>Belz</last></author>
       <author><first>Albert</first><last>Gatt</last></author>
       <title>A Repository of Data and Evaluation Resources for Natural Language Generation</title>
       <pages>4027–4032</pages>
@@ -4984,7 +4984,7 @@
     </paper>
     <paper id="479">
       <author><first>Thibault</first><last>Mondary</last></author>
-      <author><first>Adeline</first><last>Nazarenko</last></author>
+      <author id="adeline-nazarenko"><first>Adeline</first><last>Nazarenko</last></author>
       <author><first>Haïfa</first><last>Zargayouna</last></author>
       <author><first>Sabine</first><last>Barreaux</last></author>
       <title>The Quaero Evaluation Initiative on Term Extraction</title>
@@ -5005,7 +5005,7 @@
     </paper>
     <paper id="481">
       <author><first>Ralf</first><last>Steinberger</last></author>
-      <author><first>Andreas</first><last>Eisele</last></author>
+      <author id="andreas-eisele"><first>Andreas</first><last>Eisele</last></author>
       <author><first>Szymon</first><last>Klocek</last></author>
       <author><first>Spyridon</first><last>Pilos</last></author>
       <author><first>Patrick</first><last>Schlüter</last></author>
@@ -5017,7 +5017,7 @@
     </paper>
     <paper id="482">
       <author><first>Heba</first><last>Elfardy</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <title>Simplified guidelines for the creation of Large Scale Dialectal <fixed-case>A</fixed-case>rabic Annotations</title>
       <pages>371–378</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/815_Paper.pdf</url>
@@ -5027,8 +5027,8 @@
     <paper id="483">
       <author><first>Christian</first><last>Federmann</last></author>
       <author><first>Ioanna</first><last>Giannopoulou</last></author>
-      <author><first>Christian</first><last>Girardi</last></author>
-      <author><first>Olivier</first><last>Hamon</last></author>
+      <author id="christian-girardi"><first>Christian</first><last>Girardi</last></author>
+      <author id="olivier-hamon"><first>Olivier</first><last>Hamon</last></author>
       <author><first>Dimitris</first><last>Mavroeidis</last></author>
       <author><first>Salvatore</first><last>Minutoli</last></author>
       <author><first>Marc</first><last>Schröder</last></author>
@@ -5080,11 +5080,11 @@
     </paper>
     <paper id="488">
       <author><first>Igor</first><last>Odriozola</last></author>
-      <author><first>Eva</first><last>Navas</last></author>
-      <author><first>Inma</first><last>Hernaez</last></author>
+      <author id="eva-navas"><first>Eva</first><last>Navas</last></author>
+      <author id="inmaculada-hernaez"><first>Inma</first><last>Hernaez</last></author>
       <author><first>Iñaki</first><last>Sainz</last></author>
       <author><first>Ibon</first><last>Saratxaga</last></author>
-      <author><first>Jon</first><last>Sánchez</last></author>
+      <author id="jon-sanchez"><first>Jon</first><last>Sánchez</last></author>
       <author><first>Daniel</first><last>Erro</last></author>
       <title>Using an <fixed-case>ASR</fixed-case> database to design a pronunciation evaluation system in <fixed-case>B</fixed-case>asque</title>
       <pages>4122–4126</pages>
@@ -5099,8 +5099,8 @@
       <author><first>Andrzej</first><last>Zuczkowski</last></author>
       <author><first>Cinzia</first><last>Buldorini</last></author>
       <author><first>Ricardo</first><last>Pietrobon</last></author>
-      <author><first>Alberto</first><last>Lavelli</last></author>
-      <author><first>Bernardo</first><last>Magnini</last></author>
+      <author id="alberto-lavelli"><first>Alberto</first><last>Lavelli</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
       <title>A Corpus of Scientific Biomedical Texts Spanning over 168 Years Annotated for Uncertainty</title>
       <pages>2009–2014</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/823_Paper.pdf</url>
@@ -5108,7 +5108,7 @@
       <bibkey>bongelli-etal-2012-corpus</bibkey>
     </paper>
     <paper id="490">
-      <author><first>Djamel</first><last>Mostefa</last></author>
+      <author id="djamel-mostefa"><first>Djamel</first><last>Mostefa</last></author>
       <author><first>Khalid</first><last>Choukri</last></author>
       <author><first>Sylvie</first><last>Brunessaux</last></author>
       <author><first>Karim</first><last>Boudahmane</last></author>
@@ -5121,10 +5121,10 @@
     <paper id="491">
       <author><first>Ting</first><last>Liu</last></author>
       <author><first>Samira</first><last>Shaikh</last></author>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
-      <author><first>Aaron</first><last>Broadwell</last></author>
-      <author><first>Jennifer</first><last>Stromer-Galley</last></author>
-      <author><first>Sarah</first><last>Taylor</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="george-aaron-broadwell"><first>Aaron</first><last>Broadwell</last></author>
+      <author id="jennifer-stromer-galley"><first>Jennifer</first><last>Stromer-Galley</last></author>
+      <author id="sarah-taylor"><first>Sarah</first><last>Taylor</last></author>
       <author><first>Umit</first><last>Boz</last></author>
       <author><first>Xiaoai</first><last>Ren</last></author>
       <author><first>Jingsi</first><last>Wu</last></author>
@@ -5138,10 +5138,10 @@
       <author><first>Şenay</first><last>Kafkas</last></author>
       <author><first>Ian</first><last>Lewin</last></author>
       <author><first>David</first><last>Milward</last></author>
-      <author><first>Erik</first><last>van Mulligen</last></author>
-      <author><first>Jan</first><last>Kors</last></author>
+      <author id="erik-van-mulligen"><first>Erik</first><last>van Mulligen</last></author>
+      <author id="jan-kors"><first>Jan</first><last>Kors</last></author>
       <author><first>Udo</first><last>Hahn</last></author>
-      <author><first>Dietrich</first><last>Rebholz-Schuhmann</last></author>
+      <author id="dietrich-rebholz-schuhmann"><first>Dietrich</first><last>Rebholz-Schuhmann</last></author>
       <title><fixed-case>CALBC</fixed-case>: Releasing the Final Corpora</title>
       <pages>2923–2926</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/827_Paper.pdf</url>
@@ -5150,13 +5150,13 @@
     </paper>
     <paper id="493">
       <author><first>Jordi</first><last>Adell</last></author>
-      <author><first>Antonio</first><last>Bonafonte</last></author>
-      <author><first>Antonio</first><last>Cardenal</last></author>
-      <author><first>Marta R.</first><last>Costa-Jussà</last></author>
-      <author><first>José A. R.</first><last>Fonollosa</last></author>
+      <author id="antonio-bonafonte"><first>Antonio</first><last>Bonafonte</last></author>
+      <author id="antonio-cardenal"><first>Antonio</first><last>Cardenal</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-Jussà</last></author>
+      <author id="jose-a-r-fonollosa"><first>José A. R.</first><last>Fonollosa</last></author>
       <author><first>Asunción</first><last>Moreno</last></author>
-      <author><first>Eva</first><last>Navas</last></author>
-      <author><first>Eduardo R.</first><last>Banga</last></author>
+      <author id="eva-navas"><first>Eva</first><last>Navas</last></author>
+      <author id="eduardo-r-banga"><first>Eduardo R.</first><last>Banga</last></author>
       <title><fixed-case>BUCEADOR</fixed-case>, a multi-language search engine for digital libraries</title>
       <pages>1705–1709</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/828_Paper.pdf</url>
@@ -5168,7 +5168,7 @@
       <author><first>Laska</first><last>Laskova</last></author>
       <author><first>Stanislava</first><last>Kancheva</last></author>
       <author><first>Petya</first><last>Osenova</last></author>
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <title>Linguistic Analysis Processing Line for <fixed-case>B</fixed-case>ulgarian</title>
       <pages>2959–2964</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/829_Paper.pdf</url>
@@ -5177,7 +5177,7 @@
     </paper>
     <paper id="495">
       <author><first>Jirka</first><last>Hana</last></author>
-      <author><first>Barbora</first><last>Hladká</last></author>
+      <author id="barbora-hladka"><first>Barbora</first><last>Hladká</last></author>
       <title>Getting more data – Schoolkids as annotators</title>
       <pages>4049–4054</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/830_Paper.pdf</url>
@@ -5204,15 +5204,15 @@
     </paper>
     <paper id="498">
       <author><first>Stergos</first><last>Afantenos</last></author>
-      <author><first>Nicholas</first><last>Asher</last></author>
-      <author><first>Farah</first><last>Benamara</last></author>
+      <author id="nicholas-asher"><first>Nicholas</first><last>Asher</last></author>
+      <author id="farah-benamara"><first>Farah</first><last>Benamara</last></author>
       <author><first>Myriam</first><last>Bras</last></author>
-      <author><first>Cécile</first><last>Fabre</last></author>
-      <author><first>Mai</first><last>Ho-dac</last></author>
+      <author id="cecile-fabre"><first>Cécile</first><last>Fabre</last></author>
+      <author id="lydia-mai-ho-dac"><first>Mai</first><last>Ho-dac</last></author>
       <author><first>Anne Le</first><last>Draoulec</last></author>
       <author><first>Philippe</first><last>Muller</last></author>
-      <author><first>Marie-Paule</first><last>Péry-Woodley</last></author>
-      <author><first>Laurent</first><last>Prévot</last></author>
+      <author id="marie-paule-pery-woodley"><first>Marie-Paule</first><last>Péry-Woodley</last></author>
+      <author id="laurent-prevot"><first>Laurent</first><last>Prévot</last></author>
       <author><first>Josette</first><last>Rebeyrolles</last></author>
       <author><first>Ludovic</first><last>Tanguy</last></author>
       <author><first>Marianne</first><last>Vergez-Couret</last></author>
@@ -5225,7 +5225,7 @@
     </paper>
     <paper id="499">
       <author><first>Amalia</first><last>Zahra</last></author>
-      <author><first>Julie</first><last>Carson-Berndsen</last></author>
+      <author id="julie-carson-berndsen"><first>Julie</first><last>Carson-Berndsen</last></author>
       <title><fixed-case>E</fixed-case>nglish to <fixed-case>I</fixed-case>ndonesian Transliteration to Support <fixed-case>E</fixed-case>nglish Pronunciation Practice</title>
       <pages>4132–4135</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/838_Paper.pdf</url>
@@ -5235,8 +5235,8 @@
     <paper id="500">
       <author><first>Kata</first><last>Gábor</last></author>
       <author><first>Marianna</first><last>Apidianaki</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
-      <author><first>Éric</first><last>Villemonte de La Clergerie</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Éric</first><last>Villemonte de La Clergerie</last></author>
       <title>Boosting the Coverage of a Semantic Lexicon by Automatically Extracted Event Nominalizations</title>
       <pages>1466–1473</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/839_Paper.pdf</url>
@@ -5246,7 +5246,7 @@
     <paper id="501">
       <author><first>Khalid</first><last>Choukri</last></author>
       <author><first>Victoria</first><last>Arranz</last></author>
-      <author><first>Olivier</first><last>Hamon</last></author>
+      <author id="olivier-hamon"><first>Olivier</first><last>Hamon</last></author>
       <author><first>Jungyeul</first><last>Park</last></author>
       <title>Using the International Standard Language Resource Number: Practical and Technical Aspects</title>
       <pages>50–54</pages>
@@ -5258,7 +5258,7 @@
       <author><first>Claire</first><last>Jaja</last></author>
       <author><first>Douglas</first><last>Briesch</last></author>
       <author><first>Jamal</first><last>Laoudi</last></author>
-      <author><first>Clare</first><last>Voss</last></author>
+      <author id="clare-voss"><first>Clare</first><last>Voss</last></author>
       <title>Assessing Divergence Measures for Automated Document Routing in an Adaptive <fixed-case>MT</fixed-case> System</title>
       <pages>3963–3970</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/843_Paper.pdf</url>
@@ -5272,7 +5272,7 @@
       <author><first>Oscar</first><last>Koller</last></author>
       <author><first>Uwe</first><last>Zelle</last></author>
       <author><first>Justus</first><last>Piater</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <title><fixed-case>RWTH</fixed-case>-<fixed-case>PHOENIX</fixed-case>-Weather: A Large Vocabulary Sign Language Recognition and Translation Corpus</title>
       <pages>3785–3789</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/844_Paper.pdf</url>
@@ -5282,9 +5282,9 @@
     <paper id="504">
       <author><first>Roldano</first><last>Cattoni</last></author>
       <author><first>Francesco</first><last>Corcoglioniti</last></author>
-      <author><first>Christian</first><last>Girardi</last></author>
-      <author><first>Bernardo</first><last>Magnini</last></author>
-      <author><first>Luciano</first><last>Serafini</last></author>
+      <author id="christian-girardi"><first>Christian</first><last>Girardi</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
+      <author id="luciano-serafini"><first>Luciano</first><last>Serafini</last></author>
       <author><first>Roberto</first><last>Zanoli</last></author>
       <title>The <fixed-case>K</fixed-case>nowledge<fixed-case>S</fixed-case>tore: an Entity-Based Storage System</title>
       <pages>3639–3646</pages>
@@ -5313,7 +5313,7 @@
     </paper>
     <paper id="507">
       <author><first>Young-Min</first><last>Kim</last></author>
-      <author><first>Patrice</first><last>Bellot</last></author>
+      <author id="patrice-bellot"><first>Patrice</first><last>Bellot</last></author>
       <author><first>Elodie</first><last>Faath</last></author>
       <author><first>Marin</first><last>Dacos</last></author>
       <title>Annotated Bibliographical Reference Corpora in Digital Humanities</title>
@@ -5325,7 +5325,7 @@
     <paper id="508">
       <author><first>Marie</first><last>Tahon</last></author>
       <author><first>Agnes</first><last>Delaborde</last></author>
-      <author><first>Laurence</first><last>Devillers</last></author>
+      <author id="laurence-devillers"><first>Laurence</first><last>Devillers</last></author>
       <title>Corpus of Children Voices for Mid-level Markers and Affect Bursts Analysis</title>
       <pages>2366–2369</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/853_Paper.pdf</url>
@@ -5334,7 +5334,7 @@
     </paper>
     <paper id="509">
       <author><first>Souhir</first><last>Gahbiche-Braham</last></author>
-      <author><first>Hélène</first><last>Bonneau-Maynard</last></author>
+      <author id="helene-bonneau-maynard"><first>Hélène</first><last>Bonneau-Maynard</last></author>
       <author><first>Thomas</first><last>Lavergne</last></author>
       <author><first>François</first><last>Yvon</last></author>
       <title>Joint Segmentation and <fixed-case>POS</fixed-case> Tagging for <fixed-case>A</fixed-case>rabic Using a <fixed-case>CRF</fixed-case>-based Classifier</title>
@@ -5362,7 +5362,7 @@
       <bibkey>nakagawa-den-2012-annotation</bibkey>
     </paper>
     <paper id="512">
-      <author><first>Yoshihiko</first><last>Hayashi</last></author>
+      <author id="yoshihiko-hayashi"><first>Yoshihiko</first><last>Hayashi</last></author>
       <author><first>Chiharu</first><last>Narawa</last></author>
       <title>Classifying Standard Linguistic Processing Functionalities based on Fundamental Data Operation Types</title>
       <pages>1169–1173</pages>
@@ -5372,10 +5372,10 @@
     </paper>
     <paper id="513">
       <author><first>Éva</first><last>Székely</last></author>
-      <author><first>Joao Paulo</first><last>Cabral</last></author>
+      <author id="joao-paulo-cabral"><first>Joao Paulo</first><last>Cabral</last></author>
       <author><first>Mohamed</first><last>Abou-Zleikha</last></author>
       <author><first>Peter</first><last>Cahill</last></author>
-      <author><first>Julie</first><last>Carson-Berndsen</last></author>
+      <author id="julie-carson-berndsen"><first>Julie</first><last>Carson-Berndsen</last></author>
       <title>Evaluating expressive speech synthesis from audiobook corpora for conversational phrases</title>
       <pages>3335–3339</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/864_Paper.pdf</url>
@@ -5383,12 +5383,12 @@
       <bibkey>szekely-etal-2012-evaluating</bibkey>
     </paper>
     <paper id="514">
-      <author><first>Juan María</first><last>Garrido</last></author>
+      <author id="juan-maria-garrido"><first>Juan María</first><last>Garrido</last></author>
       <author><first>Yesika</first><last>Laplaza</last></author>
       <author><first>Montse</first><last>Marquina</last></author>
       <author><first>Andrea</first><last>Pearman</last></author>
       <author><first>José Gregorio</first><last>Escalada</last></author>
-      <author><first>Miguel Ángel</first><last>Rodríguez</last></author>
+      <author id="miguel-rodriguez-hernandez"><first>Miguel Ángel</first><last>Rodríguez</last></author>
       <author><first>Ana</first><last>Armenta</last></author>
       <title>The <fixed-case>I</fixed-case>3<fixed-case>MEDIA</fixed-case> speech database: a trilingual annotated corpus for the analysis and synthesis of emotional speech</title>
       <pages>1197–1202</pages>
@@ -5397,7 +5397,7 @@
       <bibkey>garrido-etal-2012-i3media</bibkey>
     </paper>
     <paper id="515">
-      <author><first>David</first><last>Elson</last></author>
+      <author id="david-elson"><first>David</first><last>Elson</last></author>
       <title><fixed-case>D</fixed-case>rama<fixed-case>B</fixed-case>ank: Annotating Agency in Narrative Discourse</title>
       <pages>2813–2819</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/866_Paper.pdf</url>
@@ -5406,7 +5406,7 @@
     </paper>
     <paper id="516">
       <author><first>Alessio</first><last>Bosca</last></author>
-      <author><first>Luca</first><last>Dini</last></author>
+      <author id="luca-dini"><first>Luca</first><last>Dini</last></author>
       <author><first>Milen</first><last>Kouylekov</last></author>
       <author><first>Marco</first><last>Trevisan</last></author>
       <title><fixed-case>L</fixed-case>inguagrid: a network of Linguistic and Semantic Services for the <fixed-case>I</fixed-case>talian Language.</title>
@@ -5427,7 +5427,7 @@
     </paper>
     <paper id="518">
       <author><first>Victoria</first><last>Arranz</last></author>
-      <author><first>Olivier</first><last>Hamon</last></author>
+      <author id="olivier-hamon"><first>Olivier</first><last>Hamon</last></author>
       <title>On the Way to a Legal Sharing of Web Applications in <fixed-case>NLP</fixed-case></title>
       <pages>2965–2970</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/872_Paper.pdf</url>
@@ -5446,10 +5446,10 @@
     </paper>
     <paper id="520">
       <author><first>David</first><last>Doukhan</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <author><first>Albert</first><last>Rilliard</last></author>
       <author><first>Christophe</first><last>d’Alessandro</last></author>
-      <author><first>Martine</first><last>Adda-Decker</last></author>
+      <author id="martine-adda-decker"><first>Martine</first><last>Adda-Decker</last></author>
       <title>Designing <fixed-case>F</fixed-case>rench Tale Corpora for Entertaining Text To Speech Synthesis</title>
       <pages>1003–1010</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/876_Paper.pdf</url>
@@ -5458,7 +5458,7 @@
     </paper>
     <paper id="521">
       <author><first>Helen</first><last>Aristar-Dry</last></author>
-      <author><first>Sebastian</first><last>Drude</last></author>
+      <author id="sebastian-drude"><first>Sebastian</first><last>Drude</last></author>
       <author><first>Menzo</first><last>Windhouwer</last></author>
       <author><first>Jost</first><last>Gippert</last></author>
       <author><first>Irina</first><last>Nevskaya</last></author>
@@ -5469,7 +5469,7 @@
       <bibkey>aristar-dry-etal-2012-rendering</bibkey>
     </paper>
     <paper id="522">
-      <author><first>Ronaldo</first><last>Martins</last></author>
+      <author id="ronaldo-teixeira-martins"><first>Ronaldo</first><last>Martins</last></author>
       <title>Le Petit Prince in <fixed-case>UNL</fixed-case></title>
       <pages>3201–3204</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/879_Paper.pdf</url>
@@ -5478,8 +5478,8 @@
     </paper>
     <paper id="523">
       <author><first>Andrea</first><last>Varga</last></author>
-      <author><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
-      <author><first>Fabio</first><last>Ciravegna</last></author>
+      <author id="daniel-preotiuc-pietro"><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
+      <author id="fabio-ciravegna"><first>Fabio</first><last>Ciravegna</last></author>
       <title>Unsupervised document zone identification using probabilistic graphical models</title>
       <pages>1610–1617</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/881_Paper.pdf</url>
@@ -5487,9 +5487,9 @@
       <bibkey>varga-etal-2012-unsupervised</bibkey>
     </paper>
     <paper id="524">
-      <author><first>Maria</first><last>Fuentes</last></author>
-      <author><first>Horacio</first><last>Rodríguez</last></author>
-      <author><first>Jordi</first><last>Turmo</last></author>
+      <author id="maria-fuentes"><first>Maria</first><last>Fuentes</last></author>
+      <author id="horacio-rodriguez"><first>Horacio</first><last>Rodríguez</last></author>
+      <author id="jordi-turmo"><first>Jordi</first><last>Turmo</last></author>
       <title>Summarizing a multimodal set of documents in a Smart Room</title>
       <pages>2553–2558</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/882_Paper.pdf</url>
@@ -5498,9 +5498,9 @@
     </paper>
     <paper id="525">
       <author><first>Gerard</first><last>de Melo</last></author>
-      <author><first>Collin F.</first><last>Baker</last></author>
-      <author><first>Nancy</first><last>Ide</last></author>
-      <author><first>Rebecca J.</first><last>Passonneau</last></author>
+      <author id="collin-f-baker"><first>Collin F.</first><last>Baker</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca J.</first><last>Passonneau</last></author>
       <author><first>Christiane</first><last>Fellbaum</last></author>
       <title>Empirical Comparisons of <fixed-case>MASC</fixed-case> Word Sense Annotations</title>
       <pages>3036–3043</pages>
@@ -5509,10 +5509,10 @@
       <bibkey>de-melo-etal-2012-empirical</bibkey>
     </paper>
     <paper id="526">
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <author><first>Amanda</first><last>Morris</last></author>
-      <author><first>Jonathan</first><last>Fiscus</last></author>
-      <author><first>Christopher</first><last>Caruso</last></author>
+      <author id="jonathan-g-fiscus"><first>Jonathan</first><last>Fiscus</last></author>
+      <author id="christopher-caruso"><first>Christopher</first><last>Caruso</last></author>
       <author><first>Haejoong</first><last>Lee</last></author>
       <author><first>Paul</first><last>Over</last></author>
       <author><first>James</first><last>Fiumara</last></author>
@@ -5528,7 +5528,7 @@
     <paper id="527">
       <author><first>Dhouha</first><last>Bouamor</last></author>
       <author><first>Nasredine</first><last>Semmar</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <title>Identifying bilingual Multi-Word Expressions for Statistical Machine Translation</title>
       <pages>674–679</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/886_Paper.pdf</url>
@@ -5558,10 +5558,10 @@
       <bibkey>rapp-etal-2012-identifying</bibkey>
     </paper>
     <paper id="530">
-      <author><first>Sebastian</first><last>Drude</last></author>
-      <author><first>Daan</first><last>Broeder</last></author>
+      <author id="sebastian-drude"><first>Sebastian</first><last>Drude</last></author>
+      <author id="daan-broeder"><first>Daan</first><last>Broeder</last></author>
       <author><first>Paul</first><last>Trilsbeek</last></author>
-      <author><first>Peter</first><last>Wittenburg</last></author>
+      <author id="peter-wittenburg"><first>Peter</first><last>Wittenburg</last></author>
       <title>The Language Archive — a new hub for language resources</title>
       <pages>3264–3267</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/891_Paper.pdf</url>
@@ -5580,7 +5580,7 @@
       <bibkey>khademian-etal-2012-holistic</bibkey>
     </paper>
     <paper id="532">
-      <author><first>Natalia</first><last>Loukachevitch</last></author>
+      <author id="natalia-loukachevitch"><first>Natalia</first><last>Loukachevitch</last></author>
       <title>Automatic Term Recognition Needs Multiple Evidence</title>
       <pages>2401–2407</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/893_Paper.pdf</url>
@@ -5590,8 +5590,8 @@
     <paper id="533">
       <author><first>Sudheer</first><last>Kolachina</last></author>
       <author><first>Rashmi</first><last>Prasad</last></author>
-      <author><first>Dipti Misra</first><last>Sharma</last></author>
-      <author><first>Aravind</first><last>Joshi</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
       <title>Evaluation of Discourse Relation Annotation in the <fixed-case>H</fixed-case>indi Discourse Relation Bank</title>
       <pages>823–828</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/894_Paper.pdf</url>
@@ -5608,8 +5608,8 @@
     </paper>
     <paper id="535">
       <author><first>Irina</first><last>Temnikova</last></author>
-      <author><first>Constantin</first><last>Orasan</last></author>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orasan</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <title><fixed-case>CLCM</fixed-case> - A Linguistic Resource for Effective Simplification of Instructions in the Crisis Management Domain and its Evaluations</title>
       <pages>3007–3014</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/898_Paper.pdf</url>
@@ -5638,7 +5638,7 @@
     </paper>
     <paper id="538">
       <author><first>Mohammad Hossein</first><last>Elahimanesh</last></author>
-      <author><first>Behrouz</first><last>Minaei</last></author>
+      <author id="behrouz-minaei-bidgoli"><first>Behrouz</first><last>Minaei</last></author>
       <author><first>Hossein</first><last>Malekinezhad</last></author>
       <title>Improving K-Nearest Neighbor Efficacy for <fixed-case>F</fixed-case>arsi Text Classification</title>
       <pages>1618–1621</pages>
@@ -5658,7 +5658,7 @@
       <author><first>Toshinobu</first><last>Ogiso</last></author>
       <author><first>Mamoru</first><last>Komachi</last></author>
       <author><first>Yasuharu</first><last>Den</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <title><fixed-case>U</fixed-case>ni<fixed-case>D</fixed-case>ic for Early Middle <fixed-case>J</fixed-case>apanese: a Dictionary for Morphological Analysis of Classical <fixed-case>J</fixed-case>apanese</title>
       <pages>911–915</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/906_Paper.pdf</url>
@@ -5678,7 +5678,7 @@
     </paper>
     <paper id="542">
       <author><first>Egoitz</first><last>Laparra</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <author><first>Piek</first><last>Vossen</last></author>
       <title>Mapping <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et to the <fixed-case>K</fixed-case>yoto ontology</title>
       <pages>2584–2589</pages>
@@ -5698,7 +5698,7 @@
     </paper>
     <paper id="544">
       <author><first>Maria</first><last>Eskevich</last></author>
-      <author><first>Gareth J.F.</first><last>Jones</last></author>
+      <author id="gareth-j-f-jones"><first>Gareth J.F.</first><last>Jones</last></author>
       <author><first>Martha</first><last>Larson</last></author>
       <author><first>Roeland</first><last>Ordelman</last></author>
       <title>Creating a Data Collection for Evaluating Rich Speech Retrieval</title>
@@ -5719,7 +5719,7 @@
       <author><first>Christian</first><last>Chiarcos</last></author>
       <author><first>Sebastian</first><last>Hellmann</last></author>
       <author><first>Sebastian</first><last>Nordhoff</last></author>
-      <author><first>Steven</first><last>Moran</last></author>
+      <author id="steven-moran"><first>Steven</first><last>Moran</last></author>
       <author><first>Richard</first><last>Littauer</last></author>
       <author><first>Judith</first><last>Eckle-Kohler</last></author>
       <author><first>Iryna</first><last>Gurevych</last></author>
@@ -5734,8 +5734,8 @@
     </paper>
     <paper id="547">
       <author><first>Mehdi</first><last>Manshadi</last></author>
-      <author><first>James</first><last>Allen</last></author>
-      <author><first>Mary</first><last>Swift</last></author>
+      <author id="james-allen"><first>James</first><last>Allen</last></author>
+      <author id="mary-swift"><first>Mary</first><last>Swift</last></author>
       <title>An Annotation Scheme for Quantifier Scope Disambiguation</title>
       <pages>1546–1553</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/914_Paper.pdf</url>
@@ -5752,8 +5752,8 @@
     </paper>
     <paper id="549">
       <author><first>Eleftheria</first><last>Ahtaridis</last></author>
-      <author><first>Christopher</first><last>Cieri</last></author>
-      <author><first>Denise</first><last>DiPersio</last></author>
+      <author id="christopher-cieri"><first>Christopher</first><last>Cieri</last></author>
+      <author id="denise-dipersio"><first>Denise</first><last>DiPersio</last></author>
       <title><fixed-case>LDC</fixed-case> Language Resource Database: Building a Bibliographic Database</title>
       <pages>1723–1728</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/916_Paper.pdf</url>
@@ -5770,7 +5770,7 @@
       <bibkey>giannoulis-potamianos-2012-hierarchical</bibkey>
     </paper>
     <paper id="551">
-      <author><first>Anil Kumar</first><last>Singh</last></author>
+      <author id="anil-kumar-singh"><first>Anil Kumar</first><last>Singh</last></author>
       <title>A Concise Query Language with Search and Transform Operations for Corpora with Multiple Levels of Annotation</title>
       <pages>1490–1497</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/919_Paper.pdf</url>
@@ -5790,7 +5790,7 @@
       <author><first>Gideon</first><last>Kotzé</last></author>
       <author><first>Vincent</first><last>Vandeghinste</last></author>
       <author><first>Scott</first><last>Martens</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <title>Large aligned treebanks for syntax-based machine translation</title>
       <pages>467–473</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/924_Paper.pdf</url>
@@ -5798,19 +5798,19 @@
       <bibkey>kotze-etal-2012-large</bibkey>
     </paper>
     <paper id="554">
-      <author><first>Inguna</first><last>Skadiņa</last></author>
+      <author id="inguna-skadina"><first>Inguna</first><last>Skadiņa</last></author>
       <author><first>Ahmet</first><last>Aker</last></author>
       <author><first>Nikos</first><last>Mastropavlos</last></author>
       <author><first>Fangzhong</first><last>Su</last></author>
-      <author><first>Dan</first><last>Tufis</last></author>
-      <author><first>Mateja</first><last>Verlic</last></author>
-      <author><first>Andrejs</first><last>Vasiļjevs</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufis</last></author>
+      <author id="mateja-verlic"><first>Mateja</first><last>Verlic</last></author>
+      <author id="andrejs-vasiljevs"><first>Andrejs</first><last>Vasiļjevs</last></author>
       <author><first>Bogdan</first><last>Babych</last></author>
-      <author><first>Paul</first><last>Clough</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="paul-clough"><first>Paul</first><last>Clough</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <author><first>Nikos</first><last>Glaros</last></author>
-      <author><first>Monica Lestari</first><last>Paramita</last></author>
-      <author><first>Mārcis</first><last>Pinnis</last></author>
+      <author id="monica-lestari-paramita"><first>Monica Lestari</first><last>Paramita</last></author>
+      <author id="marcis-pinnis"><first>Mārcis</first><last>Pinnis</last></author>
       <title>Collecting and Using Comparable Corpora for Statistical Machine Translation</title>
       <pages>438–445</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/925_Paper.pdf</url>
@@ -5819,7 +5819,7 @@
     </paper>
     <paper id="555">
       <author><first>Maciej</first><last>Piasecki</last></author>
-      <author><first>Radoslaw</first><last>Ramocki</last></author>
+      <author id="radoslaw-ramocki"><first>Radoslaw</first><last>Ramocki</last></author>
       <author><first>Marek</first><last>Maziarz</last></author>
       <title>Recognition of <fixed-case>P</fixed-case>olish Derivational Relations Based on Supervised Learning Scheme</title>
       <pages>916–922</pages>
@@ -5828,7 +5828,7 @@
       <bibkey>piasecki-etal-2012-recognition</bibkey>
     </paper>
     <paper id="556">
-      <author><first>Sílvia</first><last>Moraes</last></author>
+      <author id="silvia-moraes"><first>Sílvia</first><last>Moraes</last></author>
       <author><first>Vera</first><last>Lima</last></author>
       <title>Combining Formal Concept Analysis and semantic information for building ontological structures from texts : an exploratory study</title>
       <pages>3653–3660</pages>
@@ -5854,7 +5854,7 @@
       <author><first>Roser</first><last>Morante</last></author>
       <author><first>Frederik</first><last>Vaassen</last></author>
       <author><first>Janneke</first><last>van de Loo</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <title>The Netlog Corpus. A Resource for the Study of <fixed-case>F</fixed-case>lemish <fixed-case>D</fixed-case>utch <fixed-case>I</fixed-case>nternet Language</title>
       <pages>1569–1572</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/938_Paper.pdf</url>
@@ -5863,8 +5863,8 @@
     </paper>
     <paper id="559">
       <author><first>Iskandar</first><last>Keskes</last></author>
-      <author><first>Farah</first><last>Benamara</last></author>
-      <author><first>Lamia Hadrich</first><last>Belguith</last></author>
+      <author id="farah-benamara"><first>Farah</first><last>Benamara</last></author>
+      <author id="lamia-hadrich-belguith"><first>Lamia Hadrich</first><last>Belguith</last></author>
       <title>Clause-based Discourse Segmentation of <fixed-case>A</fixed-case>rabic Texts</title>
       <pages>2826–2832</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/939_Paper.pdf</url>
@@ -5874,7 +5874,7 @@
     <paper id="560">
       <author><first>Yuichiroh</first><last>Matsubayashi</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <title>Building <fixed-case>J</fixed-case>apanese Predicate-argument Structure Corpus using Lexical Conceptual Structure</title>
       <pages>1554–1558</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/941_Paper.pdf</url>
@@ -5892,7 +5892,7 @@
     </paper>
     <paper id="562">
       <author><first>Olga</first><last>Uryupina</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <title>Domain-specific vs. Uniform Modeling for Coreference Resolution</title>
       <pages>187–191</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/944_Paper.pdf</url>
@@ -5900,7 +5900,7 @@
       <bibkey>uryupina-poesio-2012-domain</bibkey>
     </paper>
     <paper id="563">
-      <author><first>Alexandra</first><last>Balahur</last></author>
+      <author id="alexandra-balahur"><first>Alexandra</first><last>Balahur</last></author>
       <author><first>Jesús M.</first><last>Hermida</last></author>
       <title>Extending the <fixed-case>E</fixed-case>moti<fixed-case>N</fixed-case>et Knowledge Base to Improve the Automatic Detection of Implicitly Expressed Emotions from Text</title>
       <pages>1207–1214</pages>
@@ -5912,7 +5912,7 @@
       <author><first>Elsa</first><last>Tolone</last></author>
       <author><first>Stavroula</first><last>Voyatzi</last></author>
       <author><first>Claude</first><last>Martineau</last></author>
-      <author><first>Matthieu</first><last>Constant</last></author>
+      <author id="matthieu-constant"><first>Matthieu</first><last>Constant</last></author>
       <title>Extending the adverbial coverage of a <fixed-case>F</fixed-case>rench morphological lexicon</title>
       <pages>2856–2862</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/946_Paper.pdf</url>
@@ -5930,7 +5930,7 @@
       <bibkey>cristea-etal-2012-reconstructing</bibkey>
     </paper>
     <paper id="566">
-      <author><first>Mārcis</first><last>Pinnis</last></author>
+      <author id="marcis-pinnis"><first>Mārcis</first><last>Pinnis</last></author>
       <title><fixed-case>L</fixed-case>atvian and <fixed-case>L</fixed-case>ithuanian Named Entity Recognition with <fixed-case>T</fixed-case>ilde<fixed-case>NER</fixed-case></title>
       <pages>1258–1265</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/948_Paper.pdf</url>
@@ -5956,7 +5956,7 @@
     </paper>
     <paper id="569">
       <author><first>Petya</first><last>Osenova</last></author>
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <title>The Political Speech Corpus of <fixed-case>B</fixed-case>ulgarian</title>
       <pages>1744–1747</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/956_Paper.pdf</url>
@@ -5965,7 +5965,7 @@
     </paper>
     <paper id="570">
       <author><first>Eric</first><last>Kow</last></author>
-      <author><first>Anja</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anja</first><last>Belz</last></author>
       <title><fixed-case>LG</fixed-case>-Eval: A Toolkit for Creating Online Language Evaluation Experiments</title>
       <pages>4033–4037</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/957_Paper.pdf</url>
@@ -6015,9 +6015,9 @@
     <paper id="575">
       <author><first>Paulo</first><last>Fernandes</last></author>
       <author><first>Lucelene</first><last>Lopes</last></author>
-      <author><first>Carlos A.</first><last>Prolo</last></author>
+      <author id="carlos-a-prolo"><first>Carlos A.</first><last>Prolo</last></author>
       <author><first>Afonso</first><last>Sales</last></author>
-      <author><first>Renata</first><last>Vieira</last></author>
+      <author id="renata-vieira"><first>Renata</first><last>Vieira</last></author>
       <title>A Fast, Memory Efficient, Scalable and Multilingual Dictionary Retriever</title>
       <pages>2520–2524</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/966_Paper.pdf</url>
@@ -6026,7 +6026,7 @@
     </paper>
     <paper id="576">
       <author><first>André</first><last>Santos</last></author>
-      <author><first>José João</first><last>Almeida</last></author>
+      <author id="jose-joao-almeida"><first>José João</first><last>Almeida</last></author>
       <author><first>Nuno</first><last>Carvalho</last></author>
       <title>Structural alignment of plain text books</title>
       <pages>2069–2074</pages>
@@ -6036,7 +6036,7 @@
     </paper>
     <paper id="577">
       <author><first>Seniz</first><last>Demir</last></author>
-      <author><first>İlknur Durgar</first><last>El-Kahlout</last></author>
+      <author id="ilknur-durgar-el-kahlout"><first>İlknur Durgar</first><last>El-Kahlout</last></author>
       <author><first>Erdem</first><last>Unal</last></author>
       <author><first>Hamza</first><last>Kaya</last></author>
       <title><fixed-case>T</fixed-case>urkish Paraphrase Corpus</title>
@@ -6046,8 +6046,8 @@
       <bibkey>demir-etal-2012-turkish</bibkey>
     </paper>
     <paper id="578">
-      <author><first>Keith J.</first><last>Miller</last></author>
-      <author><first>Elizabeth Schroeder</first><last>Richerson</last></author>
+      <author id="keith-j-miller"><first>Keith J.</first><last>Miller</last></author>
+      <author id="elizabeth-schroeder"><first>Elizabeth Schroeder</first><last>Richerson</last></author>
       <author><first>Sarah</first><last>McLeod</last></author>
       <author><first>James</first><last>Finley</last></author>
       <author><first>Aaron</first><last>Schein</last></author>
@@ -6060,7 +6060,7 @@
     <paper id="579">
       <author><first>Ryan</first><last>Georgi</last></author>
       <author><first>Fei</first><last>Xia</last></author>
-      <author><first>William</first><last>Lewis</last></author>
+      <author id="william-lewis"><first>William</first><last>Lewis</last></author>
       <title>Measuring the Divergence of Dependency Structures Cross-Linguistically to Improve Syntactic Projection Algorithms</title>
       <pages>771–778</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/971_Paper.pdf</url>
@@ -6077,7 +6077,7 @@
       <bibkey>song-xia-2012-using</bibkey>
     </paper>
     <paper id="581">
-      <author><first>Joao Paulo</first><last>Cabral</last></author>
+      <author id="joao-paulo-cabral"><first>Joao Paulo</first><last>Cabral</last></author>
       <author><first>Mark</first><last>Kane</last></author>
       <author><first>Zeeshan</first><last>Ahmed</last></author>
       <author><first>Mohamed</first><last>Abou-Zleikha</last></author>
@@ -6085,7 +6085,7 @@
       <author><first>Amalia</first><last>Zahra</last></author>
       <author><first>Kalu</first><last>Ogbureke</last></author>
       <author><first>Peter</first><last>Cahill</last></author>
-      <author><first>Julie</first><last>Carson-Berndsen</last></author>
+      <author id="julie-carson-berndsen"><first>Julie</first><last>Carson-Berndsen</last></author>
       <author><first>Stephan</first><last>Schlögl</last></author>
       <title>Rapidly Testing the Interaction Model of a Pronunciation Training System via <fixed-case>W</fixed-case>izard-of-<fixed-case>O</fixed-case>z</title>
       <pages>4136–4142</pages>
@@ -6094,8 +6094,8 @@
       <bibkey>cabral-etal-2012-rapidly</bibkey>
     </paper>
     <paper id="582">
-      <author><first>Pēteris</first><last>Paikens</last></author>
-      <author><first>Normunds</first><last>Grūzītis</last></author>
+      <author id="peteris-paikens"><first>Pēteris</first><last>Paikens</last></author>
+      <author id="normunds-gruzitis"><first>Normunds</first><last>Grūzītis</last></author>
       <title>An implementation of a <fixed-case>L</fixed-case>atvian resource grammar in Grammatical Framework</title>
       <pages>1680–1685</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/976_Paper.pdf</url>
@@ -6116,7 +6116,7 @@
       <author><first>Silke</first><last>Scheible</last></author>
       <author><first>Richard J.</first><last>Whitt</last></author>
       <author><first>Martin</first><last>Durrell</last></author>
-      <author><first>Paul</first><last>Bennett</last></author>
+      <author id="paul-bennett"><first>Paul</first><last>Bennett</last></author>
       <title><fixed-case>GATE</fixed-case>to<fixed-case>G</fixed-case>er<fixed-case>M</fixed-case>an<fixed-case>C</fixed-case>: A <fixed-case>GATE</fixed-case>-based Annotation Pipeline for Historical <fixed-case>G</fixed-case>erman</title>
       <pages>3611–3617</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/978_Paper.pdf</url>
@@ -6124,9 +6124,9 @@
       <bibkey>scheible-etal-2012-gatetogermanc</bibkey>
     </paper>
     <paper id="585">
-      <author><first>João</first><last>Silva</last></author>
-      <author><first>Luísa</first><last>Coheur</last></author>
-      <author><first>Ângela</first><last>Costa</last></author>
+      <author id="joao-silva"><first>João</first><last>Silva</last></author>
+      <author id="luisa-coheur"><first>Luísa</first><last>Coheur</last></author>
+      <author id="angela-costa"><first>Ângela</first><last>Costa</last></author>
       <author><first>Isabel</first><last>Trancoso</last></author>
       <title>Dealing with unknown words in statistical machine translation</title>
       <pages>3911–3981</pages>
@@ -6135,7 +6135,7 @@
       <bibkey>silva-etal-2012-dealing</bibkey>
     </paper>
     <paper id="586">
-      <author><first>Eric</first><last>Charton</last></author>
+      <author id="eric-charton"><first>Eric</first><last>Charton</last></author>
       <author><first>Michel</first><last>Gagnon</last></author>
       <title>A disambiguation resource extracted from <fixed-case>W</fixed-case>ikipedia for semantic annotation</title>
       <pages>3665–3671</pages>
@@ -6155,7 +6155,7 @@
     </paper>
     <paper id="588">
       <author><first>Alessandro</first><last>Lenci</last></author>
-      <author><first>Simonetta</first><last>Montemagni</last></author>
+      <author id="simonetta-montemagni"><first>Simonetta</first><last>Montemagni</last></author>
       <author><first>Giulia</first><last>Venturi</last></author>
       <author><first>Maria Grazia</first><last>Cutrullà</last></author>
       <title>Enriching the <fixed-case>ISST</fixed-case>-<fixed-case>TANL</fixed-case> Corpus with Semantic Frames</title>
@@ -6167,7 +6167,7 @@
     <paper id="589">
       <author><first>Kengo</first><last>Ohta</last></author>
       <author><first>Masatoshi</first><last>Tsuchiya</last></author>
-      <author><first>Seiichi</first><last>Nakagawa</last></author>
+      <author id="seiichi-nakagawa"><first>Seiichi</first><last>Nakagawa</last></author>
       <title>Developing Partially-Transcribed Speech Corpus from Edited Transcriptions</title>
       <pages>3399–3404</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/987_Paper.pdf</url>
@@ -6206,8 +6206,8 @@
     <paper id="592">
       <author><first>Christian</first><last>Federmann</last></author>
       <author><first>Eleftherios</first><last>Avramidis</last></author>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <author><first>Maite</first><last>Melero</last></author>
       <author><first>Pavel</first><last>Pecina</last></author>
       <title>The <fixed-case>ML</fixed-case>4<fixed-case>HMT</fixed-case> Workshop on Optimising the Division of Labour in Hybrid Machine Translation</title>
@@ -6217,17 +6217,17 @@
       <bibkey>federmann-etal-2012-ml4hmt</bibkey>
     </paper>
     <paper id="593">
-      <author><first>Maria</first><last>Gavrilidou</last></author>
-      <author><first>Penny</first><last>Labropoulou</last></author>
-      <author><first>Elina</first><last>Desipri</last></author>
+      <author id="maria-gavrilidou"><first>Maria</first><last>Gavrilidou</last></author>
+      <author id="penny-labropoulou"><first>Penny</first><last>Labropoulou</last></author>
+      <author id="elina-desipri"><first>Elina</first><last>Desipri</last></author>
       <author><first>Stelios</first><last>Piperidis</last></author>
-      <author><first>Haris</first><last>Papageorgiou</last></author>
+      <author id="harris-papageorgiou"><first>Haris</first><last>Papageorgiou</last></author>
       <author><first>Monica</first><last>Monachini</last></author>
       <author><first>Francesca</first><last>Frontini</last></author>
       <author><first>Thierry</first><last>Declerck</last></author>
       <author><first>Gil</first><last>Francopoulo</last></author>
       <author><first>Victoria</first><last>Arranz</last></author>
-      <author><first>Valerie</first><last>Mapelli</last></author>
+      <author id="valerie-mapelli"><first>Valerie</first><last>Mapelli</last></author>
       <title>The <fixed-case>META</fixed-case>-<fixed-case>SHARE</fixed-case> Metadata Schema for the Description of Language Resources</title>
       <pages>1090–1097</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/998_Paper.pdf</url>
@@ -6255,7 +6255,7 @@
     </paper>
     <paper id="596">
       <author><first>Monica</first><last>Gavrila</last></author>
-      <author><first>Walther</first><last>v. Hahn</last></author>
+      <author id="walther-von-hahn"><first>Walther</first><last>v. Hahn</last></author>
       <author><first>Cristina</first><last>Vertan</last></author>
       <title>Same domain different discourse style - A case study on Language Resources for data-driven Machine Translation</title>
       <pages>3441–3446</pages>
@@ -6266,8 +6266,8 @@
     <paper id="597">
       <author><first>Vinodkumar</first><last>Prabhakaran</last></author>
       <author><first>Huzaifa</first><last>Neralwala</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <title>Annotations for Power Relations on Email Threads</title>
       <pages>806–811</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/1006_Paper.pdf</url>
@@ -6275,11 +6275,11 @@
       <bibkey>prabhakaran-etal-2012-annotations</bibkey>
     </paper>
     <paper id="598">
-      <author><first>William J.</first><last>Corvey</last></author>
+      <author id="william-j-corvey"><first>William J.</first><last>Corvey</last></author>
       <author><first>Sudha</first><last>Verma</last></author>
-      <author><first>Sarah</first><last>Vieweg</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
-      <author><first>James H.</first><last>Martin</last></author>
+      <author id="sarah-vieweg"><first>Sarah</first><last>Vieweg</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
+      <author id="james-h-martin"><first>James H.</first><last>Martin</last></author>
       <title>Foundations of a Multilayer Annotation Framework for <fixed-case>T</fixed-case>witter Communications During Crisis Events</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/1008_Paper.pdf</url>
       <abstract>In times of mass emergency, vast amounts of data are generated via computer-mediated communication (CMC) that are difficult to manually collect and organize into a coherent picture. Yet valuable information is broadcast, and can provide useful insight into time- and safety-critical situations if captured and analyzed efficiently and effectively. We describe a natural language processing component of the EPIC (Empowering the Public with Information in Crisis) Project infrastructure, designed to extract linguistic and behavioral information from tweet text to aid in the task of information integration. The system incorporates linguistic annotation, in the form of Named Entity Tagging, as well as behavioral annotations to capture tweets contributing to situational awareness and analyze the information type of the tweet content. We show classification results and describe future integration of these classifiers in the larger EPIC infrastructure.</abstract>
@@ -6298,7 +6298,7 @@
     <paper id="600">
       <author><first>Stefan</first><last>Scherer</last></author>
       <author><first>Georg</first><last>Layher</last></author>
-      <author><first>John</first><last>Kane</last></author>
+      <author id="john-keane"><first>John</first><last>Kane</last></author>
       <author><first>Heiko</first><last>Neumann</last></author>
       <author><first>Nick</first><last>Campbell</last></author>
       <title>An audiovisual political speech analysis incorporating eye-tracking and perception data</title>
@@ -6308,7 +6308,7 @@
       <bibkey>scherer-etal-2012-audiovisual</bibkey>
     </paper>
     <paper id="601">
-      <author><first>Chris Irwin</first><last>Davis</last></author>
+      <author id="chris-irwin-davis"><first>Chris Irwin</first><last>Davis</last></author>
       <title><fixed-case>T</fixed-case>ajik-<fixed-case>F</fixed-case>arsi <fixed-case>P</fixed-case>ersian Transliteration Using Statistical Machine Translation</title>
       <pages>3988–3995</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/1012_Paper.pdf</url>
@@ -6328,7 +6328,7 @@
       <author><first>Anna</first><last>Rumshisky</last></author>
       <author><first>Nick</first><last>Botchan</last></author>
       <author><first>Sophie</first><last>Kushkuley</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <title>Word Sense Inventories by Non-Experts.</title>
       <pages>4055–4059</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/1014_Paper.pdf</url>
@@ -6351,7 +6351,7 @@
       <author><first>Daniel</first><last>Capurro</last></author>
       <author><first>Fei</first><last>Xia</last></author>
       <author><first>Lucy</first><last>Vanderwende</last></author>
-      <author><first>Meliha</first><last>Yetisgen-Yildiz</last></author>
+      <author id="meliha-yetisgen-yildiz"><first>Meliha</first><last>Yetisgen-Yildiz</last></author>
       <title>Statistical Section Segmentation in Free-Text Clinical Records</title>
       <pages>2001–2008</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/1016_Paper.pdf</url>
@@ -6386,10 +6386,10 @@
       <bibkey>grezka-poudat-2012-building</bibkey>
     </paper>
     <paper id="609">
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <author><first>Ander</first><last>Barrena</last></author>
-      <author><first>Oier Lopez</first><last>de Lacalle</last></author>
-      <author><first>Aitor</first><last>Soroa</last></author>
+      <author id="oier-lopez-de-lacalle"><first>Oier Lopez</first><last>de Lacalle</last></author>
+      <author id="aitor-soroa"><first>Aitor</first><last>Soroa</last></author>
       <author><first>Samuel</first><last>Fernando</last></author>
       <author><first>Mark</first><last>Stevenson</last></author>
       <title>Matching Cultural Heritage items to <fixed-case>W</fixed-case>ikipedia</title>
@@ -6401,7 +6401,7 @@
     <paper id="610">
       <author><first>John</first><last>Vogel</last></author>
       <author><first>Marc</first><last>Verhagen</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <title><fixed-case>ATLIS</fixed-case>: Identifying Locational Information in Text Automatically</title>
       <pages>612–616</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/1022_Paper.pdf</url>
@@ -6474,8 +6474,8 @@
       <bibkey>ferreira-etal-2012-common</bibkey>
     </paper>
     <paper id="617">
-      <author><first>Guido</first><last>Boella</last></author>
-      <author><first>Luigi</first><last>di Caro</last></author>
+      <author id="guido-boella"><first>Guido</first><last>Boella</last></author>
+      <author id="luigi-di-caro"><first>Luigi</first><last>di Caro</last></author>
       <author><first>Llio</first><last>Humphreys</last></author>
       <author><first>Livio</first><last>Robaldo</last></author>
       <author><first>Leon</first><last>van der Torre</last></author>
@@ -6501,7 +6501,7 @@
     <paper id="619">
       <author><first>Daniel</first><last>Bauer</last></author>
       <author><first>Hagen</first><last>Fürstenau</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <title>The Dependency-Parsed <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et Corpus</title>
       <pages>3861–3867</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/1037_Paper.pdf</url>
@@ -6509,7 +6509,7 @@
       <bibkey>bauer-etal-2012-dependency</bibkey>
     </paper>
     <paper id="620">
-      <author><first>Michael</first><last>Rosner</last></author>
+      <author id="michael-rosner"><first>Michael</first><last>Rosner</last></author>
       <author><first>Albert</first><last>Gatt</last></author>
       <author><first>Andrew</first><last>Attard</last></author>
       <author><first>Jan</first><last>Joachimsen</last></author>
@@ -6521,7 +6521,7 @@
     </paper>
     <paper id="621">
       <author><first>Emília Garcia</first><last>Casademont</last></author>
-      <author><first>Antonio</first><last>Bonafonte</last></author>
+      <author id="antonio-bonafonte"><first>Antonio</first><last>Bonafonte</last></author>
       <author><first>Asunción</first><last>Moreno</last></author>
       <title>Building Synthetic Voices in the <fixed-case>META</fixed-case>-<fixed-case>NET</fixed-case> Framework</title>
       <pages>3322–3326</pages>
@@ -6533,7 +6533,7 @@
       <author><first>Hidetsugu</first><last>Nanba</last></author>
       <author><first>Toshiyuki</first><last>Takezawa</last></author>
       <author><first>Kiyoko</first><last>Uchiyama</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <title>Automatic Translation of Scholarly Terms into Patent Terms Using Synonym Extraction Techniques</title>
       <pages>3447–3451</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/1043_Paper.pdf</url>
@@ -6542,7 +6542,7 @@
     </paper>
     <paper id="623">
       <author><first>Marco</first><last>Dinarelli</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <title>Tree-Structured Named Entity Recognition on <fixed-case>OCR</fixed-case> Data: Analysis, Processing and Results</title>
       <pages>1266–1272</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/1046_Paper.pdf</url>
@@ -6552,7 +6552,7 @@
     <paper id="624">
       <author><first>Jan</first><last>Pomikálek</last></author>
       <author><first>Miloš</first><last>Jakubíček</last></author>
-      <author><first>Pavel</first><last>Rychlý</last></author>
+      <author id="pavel-rychly"><first>Pavel</first><last>Rychlý</last></author>
       <title>Building a 70 billion word corpus of <fixed-case>E</fixed-case>nglish from <fixed-case>C</fixed-case>lue<fixed-case>W</fixed-case>eb</title>
       <pages>502–506</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/1047_Paper.pdf</url>
@@ -6595,7 +6595,7 @@
       <bibkey>popescu-2012-buildind</bibkey>
     </paper>
     <paper id="629">
-      <author><first>William</first><last>Black</last></author>
+      <author id="william-j-black"><first>William</first><last>Black</last></author>
       <author><first>Rob</first><last>Procter</last></author>
       <author><first>Steven</first><last>Gray</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
@@ -6607,7 +6607,7 @@
     </paper>
     <paper id="630">
       <author><first>Muhammad</first><last>Abdul-Mageed</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <title><fixed-case>AWATIF</fixed-case>: A Multi-Genre Corpus for <fixed-case>M</fixed-case>odern <fixed-case>S</fixed-case>tandard <fixed-case>A</fixed-case>rabic Subjectivity and Sentiment Analysis</title>
       <pages>3907–3914</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/1057_Paper.pdf</url>
@@ -6617,7 +6617,7 @@
     <paper id="631">
       <author><first>Sunao</first><last>Hara</last></author>
       <author><first>Norihide</first><last>Kitaoka</last></author>
-      <author><first>Kazuya</first><last>Takeda</last></author>
+      <author id="kazuya-takeda"><first>Kazuya</first><last>Takeda</last></author>
       <title>Causal analysis of task completion errors in spoken music retrieval interactions</title>
       <pages>1365–1372</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/1059_Paper.pdf</url>
@@ -6626,7 +6626,7 @@
     </paper>
     <paper id="632">
       <author><first>Krešimir</first><last>Šojat</last></author>
-      <author><first>Nives Mikelić</first><last>Preradović</last></author>
+      <author id="nives-mikelic-preradovic"><first>Nives Mikelić</first><last>Preradović</last></author>
       <author><first>Marko</first><last>Tadić</last></author>
       <title>Generation of Verbal Stems in Derivationally Rich Language</title>
       <pages>928–933</pages>
@@ -6639,7 +6639,7 @@
       <author><first>Roger</first><last>Granada</last></author>
       <author><first>Breno</first><last>Meneghetti</last></author>
       <author><first>Leonardo</first><last>Carvalho</last></author>
-      <author><first>Renata</first><last>Vieira</last></author>
+      <author id="renata-vieira"><first>Renata</first><last>Vieira</last></author>
       <title><fixed-case>C</fixed-case>orpus+<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et thesaurus generation for ontology enriching</title>
       <pages>3463–3467</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/1062_Paper.pdf</url>
@@ -6674,7 +6674,7 @@
     </paper>
     <paper id="637">
       <author><first>Emma</first><last>Barker</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <title>Assessing the Comparability of News Texts</title>
       <pages>3996–4003</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/1069_Paper.pdf</url>
@@ -6683,7 +6683,7 @@
     </paper>
     <paper id="638">
       <author><first>Nur-Hana</first><last>Samsudin</last></author>
-      <author><first>Mark</first><last>Lee</last></author>
+      <author id="mark-lee"><first>Mark</first><last>Lee</last></author>
       <title>Building Text-to-Speech Systems for Resource Poor Languages</title>
       <pages>3327–3334</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/1070_Paper.pdf</url>
@@ -6710,7 +6710,7 @@
     </paper>
     <paper id="641">
       <author><first>Jennifer</first><last>Williams</last></author>
-      <author><first>Graham</first><last>Katz</last></author>
+      <author id="graham-katz"><first>Graham</first><last>Katz</last></author>
       <title>A New <fixed-case>T</fixed-case>witter Verb Lexicon for Natural Language Processing</title>
       <pages>293–298</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/1076_Paper.pdf</url>
@@ -6718,9 +6718,9 @@
       <bibkey>williams-katz-2012-new</bibkey>
     </paper>
     <paper id="642">
-      <author><first>Jonathan</first><last>Washington</last></author>
+      <author id="jonathan-washington"><first>Jonathan</first><last>Washington</last></author>
       <author><first>Mirlan</first><last>Ipasov</last></author>
-      <author><first>Francis</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last></author>
       <title>A finite-state morphological transducer for <fixed-case>K</fixed-case>yrgyz</title>
       <pages>934–940</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/1077_Paper.pdf</url>
@@ -6728,9 +6728,9 @@
       <bibkey>washington-etal-2012-finite</bibkey>
     </paper>
     <paper id="643">
-      <author><first>Marilyn</first><last>Walker</last></author>
-      <author><first>Jean Fox</first><last>Tree</last></author>
-      <author><first>Pranav</first><last>Anand</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
+      <author id="jean-e-fox-tree"><first>Jean Fox</first><last>Tree</last></author>
+      <author id="pranav-anand"><first>Pranav</first><last>Anand</last></author>
       <author><first>Rob</first><last>Abbott</last></author>
       <author><first>Joseph</first><last>King</last></author>
       <title>A Corpus for Research on Deliberation and Debate</title>
@@ -6752,7 +6752,7 @@
     <paper id="645">
       <author><first>Verónica</first><last>Pérez-Rosas</last></author>
       <author><first>Carmen</first><last>Banea</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <title>Learning Sentiment Lexicons in <fixed-case>S</fixed-case>panish</title>
       <pages>3077–3081</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/1081_Paper.pdf</url>
@@ -6761,7 +6761,7 @@
     </paper>
     <paper id="646">
       <author><first>Erwin</first><last>Fernandez-Ordoñez</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <author><first>Samer</first><last>Hassan</last></author>
       <title>Unsupervised Word Sense Disambiguation with Multilingual Representations</title>
       <pages>847–851</pages>
@@ -6789,7 +6789,7 @@
     <paper id="649">
       <author><first>Kirk</first><last>Roberts</last></author>
       <author><first>Travis</first><last>Goodwin</last></author>
-      <author><first>Sanda M.</first><last>Harabagiu</last></author>
+      <author id="sanda-harabagiu"><first>Sanda M.</first><last>Harabagiu</last></author>
       <title>Annotating Spatial Containment Relations Between Events</title>
       <pages>3052–3059</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/1091_Paper.pdf</url>
@@ -6799,7 +6799,7 @@
     <paper id="650">
       <author><first>Jacob</first><last>Andreas</last></author>
       <author><first>Sara</first><last>Rosenthal</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <title>Annotating Agreement and Disagreement in Threaded Discussion</title>
       <pages>818–822</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/1095_Paper.pdf</url>
@@ -6832,7 +6832,7 @@
       <bibkey>janssen-2012-neotag</bibkey>
     </paper>
     <paper id="654">
-      <author><first>Harry</first><last>Bunt</last></author>
+      <author id="harry-bunt"><first>Harry</first><last>Bunt</last></author>
       <author><first>Michael</first><last>Kipp</last></author>
       <author><first>Volha</first><last>Petukhova</last></author>
       <title>Using <fixed-case>D</fixed-case>i<fixed-case>AML</fixed-case> and <fixed-case>ANVIL</fixed-case> for multimodal dialogue annotations</title>
@@ -6860,7 +6860,7 @@
       <bibkey>caselli-etal-2012-assigning</bibkey>
     </paper>
     <paper id="657">
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <author><first>Grace</first><last>Lin</last></author>
       <author><first>Jennifer</first><last>Sawyer</last></author>
       <title>An Annotated Corpus of Film Dialogue for Learning and Characterizing Character Style</title>
@@ -6878,10 +6878,10 @@
       <bibkey>bigi-2012-sppas-tool</bibkey>
     </paper>
     <paper id="659">
-      <author><first>Christopher</first><last>Cieri</last></author>
+      <author id="christopher-cieri"><first>Christopher</first><last>Cieri</last></author>
       <author><first>Marian</first><last>Reed</last></author>
-      <author><first>Denise</first><last>DiPersio</last></author>
-      <author><first>Mark</first><last>Liberman</last></author>
+      <author id="denise-dipersio"><first>Denise</first><last>DiPersio</last></author>
+      <author id="mark-liberman"><first>Mark</first><last>Liberman</last></author>
       <title>Twenty Years of Language Resource Development and Distribution: A Progress Report on <fixed-case>LDC</fixed-case> Activities</title>
       <pages>60–65</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/1117_Paper.pdf</url>
@@ -6898,12 +6898,12 @@
       <bibkey>boruta-jastrzebska-2012-phonemic</bibkey>
     </paper>
     <paper id="661">
-      <author><first>Sebastian</first><last>Stüker</last></author>
+      <author id="sebastian-stuker"><first>Sebastian</first><last>Stüker</last></author>
       <author><first>Florian</first><last>Kraft</last></author>
       <author><first>Christian</first><last>Mohr</last></author>
       <author><first>Teresa</first><last>Herrmann</last></author>
       <author><first>Eunah</first><last>Cho</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <title>The <fixed-case>KIT</fixed-case> Lecture Corpus for Speech Translation</title>
       <pages>3409–3414</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/1121_Paper.pdf</url>
@@ -6921,8 +6921,8 @@
       <bibkey>bigi-etal-2012-orthographic</bibkey>
     </paper>
     <paper id="663">
-      <author><first>James</first><last>Pustejovsky</last></author>
-      <author><first>Jessica</first><last>Moszkowicz</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
+      <author id="jessica-moszkowicz"><first>Jessica</first><last>Moszkowicz</last></author>
       <title>The Role of Model Testing in Standards Development: The Case of <fixed-case>ISO</fixed-case>-Space</title>
       <pages>3060–3063</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/1123_Paper.pdf</url>
@@ -6930,7 +6930,7 @@
       <bibkey>pustejovsky-moszkowicz-2012-role</bibkey>
     </paper>
     <paper id="664">
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <author><first>Rosa</first><last>Stern</last></author>
       <title>Aleda, a free large-scale entity database for <fixed-case>F</fixed-case>rench</title>
       <pages>1273–1276</pages>
@@ -6940,9 +6940,9 @@
     </paper>
     <paper id="665">
       <author><first>Marcello</first><last>Federico</last></author>
-      <author><first>Sebastian</first><last>Stüker</last></author>
+      <author id="sebastian-stuker"><first>Sebastian</first><last>Stüker</last></author>
       <author><first>Luisa</first><last>Bentivogli</last></author>
-      <author><first>Michael</first><last>Paul</last></author>
+      <author id="michael-paul"><first>Michael</first><last>Paul</last></author>
       <author><first>Mauro</first><last>Cettolo</last></author>
       <author><first>Teresa</first><last>Herrmann</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
@@ -6954,7 +6954,7 @@
       <bibkey>federico-etal-2012-iwslt</bibkey>
     </paper>
     <paper id="666">
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <author><first>Darja</first><last>Fišer</last></author>
       <title>Cleaning noisy wordnets</title>
       <pages>3468–3472</pages>
@@ -6971,10 +6971,10 @@
       <bibkey>hernandez-2012-tackling</bibkey>
     </paper>
     <paper id="668">
-      <author><first>Djamé</first><last>Seddah</last></author>
-      <author><first>Marie</first><last>Candito</last></author>
-      <author><first>Benoit</first><last>Crabbé</last></author>
-      <author><first>Enrique Henestroza</first><last>Anguiano</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
+      <author id="marie-candito"><first>Marie</first><last>Candito</last></author>
+      <author id="benoit-crabbe"><first>Benoit</first><last>Crabbé</last></author>
+      <author id="enrique-henestroza-anguiano"><first>Enrique Henestroza</first><last>Anguiano</last></author>
       <title>Ubiquitous Usage of a Broad Coverage <fixed-case>F</fixed-case>rench Corpus: Processing the <fixed-case>E</fixed-case>st <fixed-case>R</fixed-case>epublicain corpus</title>
       <pages>3249–3254</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/1130_Paper.pdf</url>
@@ -6983,7 +6983,7 @@
     </paper>
     <paper id="669">
       <author><first>Valérie</first><last>Hanoka</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <title><fixed-case>W</fixed-case>ordnet extension made simple: A multilingual lexicon-based approach using wiki resources</title>
       <pages>3473–3478</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/1131_Paper.pdf</url>
@@ -6991,7 +6991,7 @@
       <bibkey>hanoka-sagot-2012-wordnet</bibkey>
     </paper>
     <paper id="670">
-      <author><first>Shyam</first><last>Agrawal</last></author>
+      <author id="shyam-sundar-agrawal"><first>Shyam</first><last>Agrawal</last></author>
       <author><first>Shweta</first><last>Sinha</last></author>
       <author><first>Pooja</first><last>Singh</last></author>
       <author><first>Jesper</first><last>Olson</last></author>
diff --git a/data/xml/L14.xml b/data/xml/L14.xml
index 2628a59806..d3194272b6 100644
--- a/data/xml/L14.xml
+++ b/data/xml/L14.xml
@@ -3,15 +3,15 @@
   <volume id="1" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Ninth International Conference on Language Resources and Evaluation (<fixed-case>LREC</fixed-case>'14)</booktitle>
-      <editor><first>Nicoletta</first><last>Calzolari</last></editor>
-      <editor><first>Khalid</first><last>Choukri</last></editor>
+      <editor id="nicoletta-calzolari"><first>Nicoletta</first><last>Calzolari</last></editor>
+      <editor id="khalid-choukri"><first>Khalid</first><last>Choukri</last></editor>
       <editor><first>Thierry</first><last>Declerck</last></editor>
       <editor><first>Hrafn</first><last>Loftsson</last></editor>
-      <editor><first>Bente</first><last>Maegaard</last></editor>
-      <editor><first>Joseph</first><last>Mariani</last></editor>
-      <editor><first>Asuncion</first><last>Moreno</last></editor>
-      <editor><first>Jan</first><last>Odijk</last></editor>
-      <editor><first>Stelios</first><last>Piperidis</last></editor>
+      <editor id="bente-maegaard"><first>Bente</first><last>Maegaard</last></editor>
+      <editor id="joseph-mariani"><first>Joseph</first><last>Mariani</last></editor>
+      <editor id="asuncion-moreno"><first>Asuncion</first><last>Moreno</last></editor>
+      <editor id="jan-odijk"><first>Jan</first><last>Odijk</last></editor>
+      <editor id="stelios-piperidis"><first>Stelios</first><last>Piperidis</last></editor>
       <publisher>European Language Resources Association (ELRA)</publisher>
       <address>Reykjavik, Iceland</address>
       <month>May</month>
@@ -23,7 +23,7 @@
     </frontmatter>
     <paper id="1">
       <author><first>Ben</first><last>Verhoeven</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <title><fixed-case>CL</fixed-case>i<fixed-case>PS</fixed-case> Stylometry Investigation (<fixed-case>CSI</fixed-case>) corpus: A <fixed-case>D</fixed-case>utch corpus for the detection of age, gender, personality, sentiment and deception in text</title>
       <pages>3081–3085</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/1_Paper.pdf</url>
@@ -59,7 +59,7 @@
       <bibkey>claveau-kijak-2014-generating</bibkey>
     </paper>
     <paper id="5">
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <author><first>Iliana</first><last>Simova</last></author>
       <author><first>Ginka</first><last>Ivanova</last></author>
       <author><first>Maria</first><last>Mateva</last></author>
@@ -81,7 +81,7 @@
       <bibkey>solorio-etal-2014-sockpuppet</bibkey>
     </paper>
     <paper id="7">
-      <author><first>Andre</first><last>Blessing</last></author>
+      <author id="andre-blessing"><first>Andre</first><last>Blessing</last></author>
       <author><first>Jonas</first><last>Kuhn</last></author>
       <title>Textual Emigration Analysis (<fixed-case>TEA</fixed-case>)</title>
       <pages>2089–2093</pages>
@@ -102,7 +102,7 @@
     </paper>
     <paper id="9">
       <author><first>Amel</first><last>Fraisse</last></author>
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <title>Toward a unifying model for Opinion, Sentiment and Emotion information extraction</title>
       <pages>3881–3886</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/1010_Paper.pdf</url>
@@ -111,8 +111,8 @@
     </paper>
     <paper id="10">
       <author><first>Thorsten</first><last>Trippel</last></author>
-      <author><first>Daan</first><last>Broeder</last></author>
-      <author><first>Matej</first><last>Durco</last></author>
+      <author id="daan-broeder"><first>Daan</first><last>Broeder</last></author>
+      <author id="matej-durco"><first>Matej</first><last>Durco</last></author>
       <author><first>Oddrun</first><last>Ohren</last></author>
       <title>Towards automatic quality assessment of component metadata</title>
       <pages>3851–3856</pages>
@@ -121,11 +121,11 @@
       <bibkey>trippel-etal-2014-towards</bibkey>
     </paper>
     <paper id="11">
-      <author><first>Claire</first><last>Bonial</last></author>
+      <author id="claire-bonial"><first>Claire</first><last>Bonial</last></author>
       <author><first>Julia</first><last>Bonn</last></author>
       <author><first>Kathryn</first><last>Conger</last></author>
       <author><first>Jena D.</first><last>Hwang</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <title><fixed-case>P</fixed-case>rop<fixed-case>B</fixed-case>ank: Semantics of New Predicate Types</title>
       <pages>3013–3019</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/1012_Paper.pdf</url>
@@ -163,10 +163,10 @@
       <bibkey>kobylinski-2014-polita</bibkey>
     </paper>
     <paper id="15">
-      <author><first>Siddharth</first><last>Jain</last></author>
+      <author id="siddharth-jain"><first>Siddharth</first><last>Jain</last></author>
       <author><first>Archna</first><last>Bhatia</last></author>
       <author><first>Angelique</first><last>Rein</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <title>A Corpus of Participant Roles in Contentious Discussions</title>
       <pages>1751–1756</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/1019_Paper.pdf</url>
@@ -187,12 +187,12 @@
       <author><first>Vera</first><last>Cabarrão</last></author>
       <author><first>Helena</first><last>Moniz</last></author>
       <author><first>Fernando</first><last>Batista</last></author>
-      <author><first>Ricardo</first><last>Ribeiro</last></author>
-      <author><first>Nuno</first><last>Mamede</last></author>
+      <author id="ricardo-ribeiro"><first>Ricardo</first><last>Ribeiro</last></author>
+      <author id="nuno-mamede"><first>Nuno</first><last>Mamede</last></author>
       <author><first>Hugo</first><last>Meinedo</last></author>
       <author><first>Isabel</first><last>Trancoso</last></author>
       <author><first>Ana Isabel</first><last>Mata</last></author>
-      <author><first>David Martins</first><last>de Matos</last></author>
+      <author id="david-martins-de-matos"><first>David Martins</first><last>de Matos</last></author>
       <title>Revising the annotation of a Broadcast News corpus: a linguistic approach</title>
       <pages>3908–3913</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/1020_Paper.pdf</url>
@@ -211,7 +211,7 @@
       <bibkey>takala-etal-2014-gold</bibkey>
     </paper>
     <paper id="19">
-      <author><first>Damir</first><last>Cavar</last></author>
+      <author id="damir-cavar"><first>Damir</first><last>Cavar</last></author>
       <author><first>Malgorzata</first><last>Cavar</last></author>
       <title>Visualization of Language Relations and Families: <fixed-case>M</fixed-case>ulti<fixed-case>T</fixed-case>ree</title>
       <pages>698–701</pages>
@@ -222,7 +222,7 @@
     <paper id="20">
       <author><first>Goran</first><last>Glavaš</last></author>
       <author><first>Jan</first><last>Šnajder</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <author><first>Parisa</first><last>Kordjamshidi</last></author>
       <title><fixed-case>H</fixed-case>i<fixed-case>E</fixed-case>ve: A Corpus for Extracting Event Hierarchies from News Stories</title>
       <pages>3678–3683</pages>
@@ -231,7 +231,7 @@
       <bibkey>glavas-etal-2014-hieve</bibkey>
     </paper>
     <paper id="21">
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <author><first>Zachary</first><last>Yocum</last></author>
       <title>Image Annotation with <fixed-case>ISO</fixed-case>-Space: Distinguishing Content from Structure</title>
       <pages>426–431</pages>
@@ -241,10 +241,10 @@
     </paper>
     <paper id="22">
       <author><first>Olivier</first><last>Galibert</last></author>
-      <author><first>Jeremy</first><last>Leixa</last></author>
-      <author><first>Gilles</first><last>Adda</last></author>
+      <author id="jeremy-leixa"><first>Jeremy</first><last>Leixa</last></author>
+      <author id="gilles-adda"><first>Gilles</first><last>Adda</last></author>
       <author><first>Khalid</first><last>Choukri</last></author>
-      <author><first>Guillaume</first><last>Gravier</last></author>
+      <author id="guillaume-gravier"><first>Guillaume</first><last>Gravier</last></author>
       <title>The <fixed-case>ETAPE</fixed-case> speech processing evaluation</title>
       <pages>3995–3999</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/1027_Paper.pdf</url>
@@ -262,7 +262,7 @@
       <bibkey>kamholz-etal-2014-panlex</bibkey>
     </paper>
     <paper id="24">
-      <author><first>Dan</first><last>Tufiş</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufiş</last></author>
       <title>Large <fixed-case>SMT</fixed-case> data-sets extracted from <fixed-case>W</fixed-case>ikipedia</title>
       <pages>656–663</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/103_Paper.pdf</url>
@@ -270,7 +270,7 @@
       <bibkey>tufis-2014-large</bibkey>
     </paper>
     <paper id="25">
-      <author><first>Valeria</first><last>de Paiva</last></author>
+      <author id="valeria-de-paiva"><first>Valeria</first><last>de Paiva</last></author>
       <author><first>Livy</first><last>Real</last></author>
       <author><first>Alexandre</first><last>Rademaker</last></author>
       <author><first>Gerard</first><last>de Melo</last></author>
@@ -281,10 +281,10 @@
       <bibkey>de-paiva-etal-2014-nomlex</bibkey>
     </paper>
     <paper id="26">
-      <author><first>Elisabet</first><last>Comelles</last></author>
-      <author><first>Jordi</first><last>Atserias</last></author>
+      <author id="elisabet-comelles"><first>Elisabet</first><last>Comelles</last></author>
+      <author id="jordi-atserias"><first>Jordi</first><last>Atserias</last></author>
       <author><first>Victoria</first><last>Arranz</last></author>
-      <author><first>Irene</first><last>Castellón</last></author>
+      <author id="irene-castellon"><first>Irene</first><last>Castellón</last></author>
       <author><first>Jordi</first><last>Sesé</last></author>
       <title><fixed-case>VERT</fixed-case>a: Facing a Multilingual Experience of a Linguistically-based <fixed-case>MT</fixed-case> Evaluation</title>
       <pages>2701–2707</pages>
@@ -294,7 +294,7 @@
     </paper>
     <paper id="27">
       <author><first>Yifan</first><last>He</last></author>
-      <author><first>Adam</first><last>Meyers</last></author>
+      <author id="adam-meyers"><first>Adam</first><last>Meyers</last></author>
       <title>Corpus and Method for Identifying Citations in Non-Academic Text</title>
       <pages>4316–4319</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/1036_Paper.pdf</url>
@@ -304,7 +304,7 @@
     <paper id="28">
       <author><first>Vanessa</first><last>Loza</last></author>
       <author><first>Shibamouli</first><last>Lahiri</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <author><first>Po-Hsiang</first><last>Lai</last></author>
       <title>Building a Dataset for Summarization and Keyword Extraction from Emails</title>
       <pages>2441–2446</pages>
@@ -322,7 +322,7 @@
       <bibkey>schmidek-barbosa-2014-improving</bibkey>
     </paper>
     <paper id="30">
-      <author><first>Juan</first><last>Soler Company</last></author>
+      <author id="juan-soler-company"><first>Juan</first><last>Soler Company</last></author>
       <author><first>Leo</first><last>Wanner</last></author>
       <title>How to Use less Features and Reach Better Performance in Author Gender Identification</title>
       <pages>1315–1319</pages>
@@ -350,7 +350,7 @@
       <author><first>Sándor</first><last>Szeverényi</last></author>
       <author><first>Zsuzsa</first><last>Várnai</last></author>
       <author><first>Paul</first><last>Trilsbeek</last></author>
-      <author><first>Tamás</first><last>Váradi</last></author>
+      <author id="tamas-varadi"><first>Tamás</first><last>Váradi</last></author>
       <title>Languagesindanger.eu - Including Multimedia Language Resources to disseminate Knowledge and Create Educational Material on less-Resourced Languages</title>
       <pages>530–535</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/1046_Paper.pdf</url>
@@ -368,7 +368,7 @@
       <bibkey>andreeva-etal-2014-cross</bibkey>
     </paper>
     <paper id="34">
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <title><fixed-case>D</fixed-case>e<fixed-case>L</fixed-case>ex, a freely-avaible, large-scale and linguistically grounded morphological lexicon for <fixed-case>G</fixed-case>erman</title>
       <pages>2778–2784</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/105_Paper.pdf</url>
@@ -377,7 +377,7 @@
     </paper>
     <paper id="35">
       <author><first>Peter</first><last>Baumann</last></author>
-      <author><first>Janet</first><last>Pierrehumbert</last></author>
+      <author id="janet-pierrehumbert"><first>Janet</first><last>Pierrehumbert</last></author>
       <title>Using Resource-Rich Languages to Improve Morphological Analysis of Under-Resourced Languages</title>
       <pages>3355–3359</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/1051_Paper.pdf</url>
@@ -388,7 +388,7 @@
       <author><first>Clara</first><last>Bacciu</last></author>
       <author><first>Angelica Lo</first><last>Duca</last></author>
       <author><first>Andrea</first><last>Marchetti</last></author>
-      <author><first>Maurizio</first><last>Tesconi</last></author>
+      <author id="maurizio-tesconi"><first>Maurizio</first><last>Tesconi</last></author>
       <title>Accommodations in Tuscany as Linked Data</title>
       <pages>3542–3545</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/1052_Paper.pdf</url>
@@ -399,7 +399,7 @@
       <author><first>Przemyslaw</first><last>Lenkiewicz</last></author>
       <author><first>Olha</first><last>Shkaravska</last></author>
       <author><first>Twan</first><last>Goosen</last></author>
-      <author><first>Daan</first><last>Broeder</last></author>
+      <author id="daan-broeder"><first>Daan</first><last>Broeder</last></author>
       <author><first>Menzo</first><last>Windhouwer</last></author>
       <author><first>Stephanie</first><last>Roth</last></author>
       <author><first>Olof</first><last>Olsson</last></author>
@@ -430,7 +430,7 @@
       <bibkey>lonsdale-christensen-2014-combining</bibkey>
     </paper>
     <paper id="40">
-      <author><first>Pavel</first><last>Smrz</last></author>
+      <author id="pavel-smrz"><first>Pavel</first><last>Smrz</last></author>
       <author><first>Jan</first><last>Kouril</last></author>
       <title>Semantic Search in Documents Enriched by <fixed-case>LOD</fixed-case>-based Annotations</title>
       <pages>3724–3727</pages>
@@ -440,7 +440,7 @@
     </paper>
     <paper id="41">
       <author><first>Manuel</first><last>Fiorelli</last></author>
-      <author><first>Maria Teresa</first><last>Pazienza</last></author>
+      <author id="maria-teresa-pazienza"><first>Maria Teresa</first><last>Pazienza</last></author>
       <author><first>Armando</first><last>Stellato</last></author>
       <title>A Meta-data Driven Platform for Semi-automatic Configuration of Ontology Mediators</title>
       <pages>4178–4183</pages>
@@ -450,7 +450,7 @@
     </paper>
     <paper id="42">
       <author><first>Huijing</first><last>Deng</last></author>
-      <author><first>Grzegorz</first><last>Chrupała</last></author>
+      <author id="grzegorz-chrupala"><first>Grzegorz</first><last>Chrupała</last></author>
       <title>Semantic approaches to software component retrieval with <fixed-case>E</fixed-case>nglish queries</title>
       <pages>3248–3252</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/106_Paper.pdf</url>
@@ -476,13 +476,13 @@
       <bibkey>wolff-etal-2014-missed</bibkey>
     </paper>
     <paper id="45">
-      <author><first>Marie-Catherine</first><last>de Marneffe</last></author>
+      <author id="marie-catherine-de-marneffe"><first>Marie-Catherine</first><last>de Marneffe</last></author>
       <author><first>Timothy</first><last>Dozat</last></author>
       <author><first>Natalia</first><last>Silveira</last></author>
       <author><first>Katri</first><last>Haverinen</last></author>
       <author><first>Filip</first><last>Ginter</last></author>
       <author><first>Joakim</first><last>Nivre</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <title>Universal <fixed-case>S</fixed-case>tanford dependencies: A cross-linguistic typology</title>
       <pages>4585–4592</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/1062_Paper.pdf</url>
@@ -491,10 +491,10 @@
     </paper>
     <paper id="46">
       <author><first>Reid</first><last>Swanson</last></author>
-      <author><first>Stephanie</first><last>Lukin</last></author>
+      <author id="stephanie-lukin"><first>Stephanie</first><last>Lukin</last></author>
       <author><first>Luke</first><last>Eisenberg</last></author>
       <author><first>Thomas</first><last>Corcoran</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <title>Getting Reliable Annotations for Sarcasm in Online Dialogues</title>
       <pages>4250–4257</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/1063_Paper.pdf</url>
@@ -505,7 +505,7 @@
       <author><first>Johannes</first><last>Hellrich</last></author>
       <author><first>Simon</first><last>Clematide</last></author>
       <author><first>Udo</first><last>Hahn</last></author>
-      <author><first>Dietrich</first><last>Rebholz-Schuhmann</last></author>
+      <author id="dietrich-rebholz-schuhmann"><first>Dietrich</first><last>Rebholz-Schuhmann</last></author>
       <title>Collaboratively Annotating Multilingual Parallel Corpora in the Biomedical Domain—some <fixed-case>MANTRA</fixed-case>s</title>
       <pages>4033–4040</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/1064_Paper.pdf</url>
@@ -516,7 +516,7 @@
       <author><first>Heeyoung</first><last>Lee</last></author>
       <author><first>Mihai</first><last>Surdeanu</last></author>
       <author><first>Bill</first><last>MacCartney</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <title>On the Importance of Text Analysis for Stock Price Prediction</title>
       <pages>1170–1175</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/1065_Paper.pdf</url>
@@ -537,7 +537,7 @@
     </paper>
     <paper id="50">
       <author><first>Antonio</first><last>San Martín</last></author>
-      <author><first>Marie-Claude</first><last>L’Homme</last></author>
+      <author id="marie-claude-lhomme"><first>Marie-Claude</first><last>L’Homme</last></author>
       <title>Definition patterns for predicative terms in specialized lexical resources</title>
       <pages>3748–3755</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/1067_Paper.pdf</url>
@@ -580,9 +580,9 @@
       <author><first>Yuri</first><last>Bizzoni</last></author>
       <author><first>Federico</first><last>Boschetti</last></author>
       <author><first>Harry</first><last>Diakoff</last></author>
-      <author><first>Riccardo</first><last>Del Gratta</last></author>
+      <author id="riccardo-del-gratta"><first>Riccardo</first><last>Del Gratta</last></author>
       <author><first>Monica</first><last>Monachini</last></author>
-      <author><first>Gregory</first><last>Crane</last></author>
+      <author id="gregory-crane"><first>Gregory</first><last>Crane</last></author>
       <title>The Making of <fixed-case>A</fixed-case>ncient <fixed-case>G</fixed-case>reek <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et</title>
       <pages>1140–1147</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/1071_Paper.pdf</url>
@@ -591,10 +591,10 @@
     </paper>
     <paper id="55">
       <author><first>Fei</first><last>Xia</last></author>
-      <author><first>William</first><last>Lewis</last></author>
-      <author><first>Michael Wayne</first><last>Goodman</last></author>
+      <author id="william-lewis"><first>William</first><last>Lewis</last></author>
+      <author id="michael-wayne-goodman"><first>Michael Wayne</first><last>Goodman</last></author>
       <author><first>Joshua</first><last>Crowgey</last></author>
-      <author><first>Emily M.</first><last>Bender</last></author>
+      <author id="emily-m-bender"><first>Emily M.</first><last>Bender</last></author>
       <title>Enriching <fixed-case>ODIN</fixed-case></title>
       <pages>3151–3157</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/1072_Paper.pdf</url>
@@ -602,7 +602,7 @@
       <bibkey>xia-etal-2014-enriching</bibkey>
     </paper>
     <paper id="56">
-      <author><first>Özlem</first><last>Çetinoğlu</last></author>
+      <author id="ozlem-cetinoglu"><first>Özlem</first><last>Çetinoğlu</last></author>
       <title><fixed-case>T</fixed-case>urkish Treebank as a Gold Standard for Morphological Disambiguation and Its Influence on Parsing</title>
       <pages>3360–3365</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/1073_Paper.pdf</url>
@@ -638,7 +638,7 @@
     </paper>
     <paper id="60">
       <author><first>Dana</first><last>Dannélls</last></author>
-      <author><first>Normunds</first><last>Gruzitis</last></author>
+      <author id="normunds-gruzitis"><first>Normunds</first><last>Gruzitis</last></author>
       <title>Extracting a bilingual semantic grammar from <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et-annotated corpora</title>
       <pages>2466–2473</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/1079_Paper.pdf</url>
@@ -677,7 +677,7 @@
       <author><first>Victoria</first><last>Rosén</last></author>
       <author><first>Petter</first><last>Haugereid</last></author>
       <author><first>Martha</first><last>Thunes</last></author>
-      <author><first>Gyri S.</first><last>Losnegaard</last></author>
+      <author id="gyri-smordal-losnegaard"><first>Gyri S.</first><last>Losnegaard</last></author>
       <author><first>Helge</first><last>Dyvik</last></author>
       <title>The Interplay Between Lexical and Syntactic Resources in Incremental Parsebanking</title>
       <pages>1617–1624</pages>
@@ -689,7 +689,7 @@
       <author><first>Rafal</first><last>Rak</last></author>
       <author><first>Jacob</first><last>Carter</last></author>
       <author><first>Andrew</first><last>Rowley</last></author>
-      <author><first>Riza Theresa</first><last>Batista-Navarro</last></author>
+      <author id="riza-theresa-batista-navarro"><first>Riza Theresa</first><last>Batista-Navarro</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
       <title>Interoperability and Customisation of Annotation Schemata in Argo</title>
       <pages>3837–3842</pages>
@@ -710,11 +710,11 @@
     <paper id="67">
       <author><first>Natalia</first><last>Silveira</last></author>
       <author><first>Timothy</first><last>Dozat</last></author>
-      <author><first>Marie-Catherine</first><last>de Marneffe</last></author>
-      <author><first>Samuel</first><last>Bowman</last></author>
+      <author id="marie-catherine-de-marneffe"><first>Marie-Catherine</first><last>de Marneffe</last></author>
+      <author id="samuel-bowman"><first>Samuel</first><last>Bowman</last></author>
       <author><first>Miriam</first><last>Connor</last></author>
       <author><first>John</first><last>Bauer</last></author>
-      <author><first>Chris</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Chris</first><last>Manning</last></author>
       <title>A Gold Standard Dependency Corpus for <fixed-case>E</fixed-case>nglish</title>
       <pages>2897–2904</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/1089_Paper.pdf</url>
@@ -731,7 +731,7 @@
     </paper>
     <paper id="69">
       <author><first>Simon</first><last>Scerri</last></author>
-      <author><first>Behrang Q.</first><last>Zadeh</last></author>
+      <author id="behrang-qasemizadeh"><first>Behrang Q.</first><last>Zadeh</last></author>
       <author><first>Maciej</first><last>Dabrowski</last></author>
       <author><first>Ismael</first><last>Rivera</last></author>
       <title>Extracting Information for Context-aware Meeting Preparation</title>
@@ -742,8 +742,8 @@
     </paper>
     <paper id="70">
       <author><first>Kai</first><last>Hong</last></author>
-      <author><first>John</first><last>Conroy</last></author>
-      <author><first>Benoit</first><last>Favre</last></author>
+      <author id="john-conroy"><first>John</first><last>Conroy</last></author>
+      <author id="benoit-favre"><first>Benoit</first><last>Favre</last></author>
       <author><first>Alex</first><last>Kulesza</last></author>
       <author><first>Hui</first><last>Lin</last></author>
       <author><first>Ani</first><last>Nenkova</last></author>
@@ -755,7 +755,7 @@
     </paper>
     <paper id="71">
       <author><first>Zhiyi</first><last>Song</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <author><first>Haejoong</first><last>Lee</last></author>
       <author><first>Kevin</first><last>Walker</last></author>
       <author><first>Jonathan</first><last>Wright</last></author>
@@ -777,7 +777,7 @@
       <author><first>Viviane</first><last>Moreira</last></author>
       <author><first>Aline</first><last>Villavicencio</last></author>
       <author><first>Carlos</first><last>Ramisch</last></author>
-      <author><first>Maria José</first><last>Finatto</last></author>
+      <author id="maria-jose-b-finatto"><first>Maria José</first><last>Finatto</last></author>
       <title>Comparing the Quality of Focused Crawlers and of the Translation Resources Obtained from them</title>
       <pages>3572–3578</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/1095_Paper.pdf</url>
@@ -790,7 +790,7 @@
       <author><first>Dirk</first><last>Hovy</last></author>
       <author><first>Archna</first><last>Bhatia</last></author>
       <author><first>Manaal</first><last>Faruqui</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <title>Augmenting <fixed-case>E</fixed-case>nglish Adjective Senses with Supersenses</title>
       <pages>4359–4365</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/1096_Paper.pdf</url>
@@ -810,7 +810,7 @@
     <paper id="75">
       <author><first>Tim</first><last>vor der Brück</last></author>
       <author><first>Alexander</first><last>Mehler</last></author>
-      <author><first>Zahurul</first><last>Islam</last></author>
+      <author id="zahurul-islam"><first>Zahurul</first><last>Islam</last></author>
       <title><fixed-case>C</fixed-case>ol<fixed-case>L</fixed-case>ex.en: Automatically Generating and Evaluating a Full-form Lexicon for <fixed-case>E</fixed-case>nglish</title>
       <pages>3756–3760</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/1099_Paper.pdf</url>
@@ -850,7 +850,7 @@
     </paper>
     <paper id="79">
       <author><first>Anthony</first><last>Rousseau</last></author>
-      <author><first>Paul</first><last>Deléglise</last></author>
+      <author id="paul-deleglise"><first>Paul</first><last>Deléglise</last></author>
       <author><first>Yannick</first><last>Estève</last></author>
       <title>Enhancing the <fixed-case>TED</fixed-case>-<fixed-case>LIUM</fixed-case> Corpus with Selected Data for Language Modeling and More <fixed-case>TED</fixed-case> Talks</title>
       <pages>3935–3939</pages>
@@ -872,7 +872,7 @@
     <paper id="81">
       <author><first>Darja</first><last>Fišer</last></author>
       <author><first>Aleš</first><last>Tavčar</last></author>
-      <author><first>Tomaž</first><last>Erjavec</last></author>
+      <author id="tomaz-erjavec"><first>Tomaž</first><last>Erjavec</last></author>
       <title>slo<fixed-case>WC</fixed-case>rowd: A crowdsourcing tool for lexicographic tasks</title>
       <pages>3471–3475</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/1106_Paper.pdf</url>
@@ -891,9 +891,9 @@
     </paper>
     <paper id="83">
       <author><first>Thomas</first><last>François</last></author>
-      <author><first>Nùria</first><last>Gala</last></author>
+      <author id="nuria-gala"><first>Nùria</first><last>Gala</last></author>
       <author><first>Patrick</first><last>Watrin</last></author>
-      <author><first>Cédrick</first><last>Fairon</last></author>
+      <author id="cedrick-fairon"><first>Cédrick</first><last>Fairon</last></author>
       <title><fixed-case>FLEL</fixed-case>ex: a graded lexical resource for <fixed-case>F</fixed-case>rench foreign learners</title>
       <pages>3766–3773</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/1108_Paper.pdf</url>
@@ -901,12 +901,12 @@
       <bibkey>francois-etal-2014-flelex</bibkey>
     </paper>
     <paper id="84">
-      <author><first>Lucas</first><last>Hilgert</last></author>
+      <author id="lucas-welter-hilgert"><first>Lucas</first><last>Hilgert</last></author>
       <author><first>Lucelene</first><last>Lopes</last></author>
       <author><first>Artur</first><last>Freitas</last></author>
-      <author><first>Renata</first><last>Vieira</last></author>
+      <author id="renata-vieira"><first>Renata</first><last>Vieira</last></author>
       <author><first>Denise</first><last>Hogetop</last></author>
-      <author><first>Aline</first><last>Vanin</last></author>
+      <author id="aline-a-vanin"><first>Aline</first><last>Vanin</last></author>
       <title>Building Domain Specific Bilingual Dictionaries</title>
       <pages>2772–2777</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/1112_Paper.pdf</url>
@@ -915,7 +915,7 @@
     </paper>
     <paper id="85">
       <author><first>Guillaume</first><last>Wisniewski</last></author>
-      <author><first>Natalie</first><last>Kübler</last></author>
+      <author id="natalie-kubler"><first>Natalie</first><last>Kübler</last></author>
       <author><first>François</first><last>Yvon</last></author>
       <title>A Corpus of Machine Translation Errors Extracted from Translation Students Exercises</title>
       <pages>3585–3588</pages>
@@ -924,7 +924,7 @@
       <bibkey>wisniewski-etal-2014-corpus</bibkey>
     </paper>
     <paper id="86">
-      <author><first>Clare</first><last>Voss</last></author>
+      <author id="clare-voss"><first>Clare</first><last>Voss</last></author>
       <author><first>Stephen</first><last>Tratz</last></author>
       <author><first>Jamal</first><last>Laoudi</last></author>
       <author><first>Douglas</first><last>Briesch</last></author>
@@ -957,9 +957,9 @@
     <paper id="89">
       <author><first>David</first><last>Graff</last></author>
       <author><first>Kevin</first><last>Walker</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <author><first>Xiaoyi</first><last>Ma</last></author>
-      <author><first>Karen</first><last>Jones</last></author>
+      <author id="karen-sparck-jones"><first>Karen</first><last>Jones</last></author>
       <author><first>Ann</first><last>Sawyer</last></author>
       <title>The <fixed-case>RATS</fixed-case> Collection: Supporting <fixed-case>HLT</fixed-case> Research with Degraded Audio Data</title>
       <pages>1970–1977</pages>
@@ -968,8 +968,8 @@
       <bibkey>graff-etal-2014-rats</bibkey>
     </paper>
     <paper id="90">
-      <author><first>Chris</first><last>Hokamp</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="chris-hokamp"><first>Chris</first><last>Hokamp</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <author><first>Peter</first><last>Schuelke</last></author>
       <title>Modeling Language Proficiency Using Implicit Feedback</title>
       <pages>3983–3986</pages>
@@ -981,8 +981,8 @@
       <author><first>Kevin</first><last>Reschke</last></author>
       <author><first>Martin</first><last>Jankowiak</last></author>
       <author><first>Mihai</first><last>Surdeanu</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
-      <author><first>Daniel</first><last>Jurafsky</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
+      <author id="dan-jurafsky"><first>Daniel</first><last>Jurafsky</last></author>
       <title>Event Extraction Using Distant Supervision</title>
       <pages>4527–4531</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/1127_Paper.pdf</url>
@@ -1009,8 +1009,8 @@
     </paper>
     <paper id="94">
       <author><first>Sander</first><last>Wubben</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <title>Creating and using large monolingual parallel corpora for sentential paraphrase generation</title>
       <pages>4292–4299</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/1135_Paper.pdf</url>
@@ -1020,7 +1020,7 @@
     <paper id="95">
       <author><first>Jayendra Rakesh</first><last>Yeka</last></author>
       <author><first>Prasanth</first><last>Kolachina</last></author>
-      <author><first>Dipti Misra</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></author>
       <title>Benchmarking of <fixed-case>E</fixed-case>nglish-<fixed-case>H</fixed-case>indi parallel corpora</title>
       <pages>1812–1818</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/1137_Paper.pdf</url>
@@ -1050,7 +1050,7 @@
     </paper>
     <paper id="98">
       <author><first>Nathan</first><last>Green</last></author>
-      <author><first>Septina Dian</first><last>Larasati</last></author>
+      <author id="septina-dian-larasati"><first>Septina Dian</first><last>Larasati</last></author>
       <title>Votter Corpus: A Corpus of Social Polling Language</title>
       <pages>3693–3697</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/1143_Paper.pdf</url>
@@ -1067,7 +1067,7 @@
       <bibkey>chen-ng-2014-sinocoreferencer</bibkey>
     </paper>
     <paper id="100">
-      <author><first>Mohamed</first><last>Maamouri</last></author>
+      <author id="mohamed-maamouri"><first>Mohamed</first><last>Maamouri</last></author>
       <author><first>Ann</first><last>Bies</last></author>
       <author><first>Seth</first><last>Kulick</last></author>
       <author><first>Michael</first><last>Ciul</last></author>
@@ -1088,8 +1088,8 @@
       <bibkey>scheffler-2014-german</bibkey>
     </paper>
     <paper id="102">
-      <author><first>Helen</first><last>Hastie</last></author>
-      <author><first>Anja</first><last>Belz</last></author>
+      <author id="helen-hastie"><first>Helen</first><last>Hastie</last></author>
+      <author id="anja-belz"><first>Anja</first><last>Belz</last></author>
       <title>A Comparative Evaluation Methodology for <fixed-case>NLG</fixed-case> in Interactive Systems</title>
       <pages>4004–4011</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/1147_Paper.pdf</url>
@@ -1105,7 +1105,7 @@
       <bibkey>ohara-2014-relating</bibkey>
     </paper>
     <paper id="104">
-      <author><first>Hans-Ulrich</first><last>Krieger</last></author>
+      <author id="hans-ulrich-krieger"><first>Hans-Ulrich</first><last>Krieger</last></author>
       <author><first>Thierry</first><last>Declerck</last></author>
       <title><fixed-case>TMO</fixed-case> — The Federated Ontology of the <fixed-case>T</fixed-case>rend<fixed-case>M</fixed-case>iner Project</title>
       <pages>4164–4171</pages>
@@ -1114,7 +1114,7 @@
       <bibkey>krieger-declerck-2014-tmo</bibkey>
     </paper>
     <paper id="105">
-      <author><first>Gemma Bel</first><last>Enguix</last></author>
+      <author id="gemma-bel-enguix"><first>Gemma Bel</first><last>Enguix</last></author>
       <author><first>Reinhard</first><last>Rapp</last></author>
       <author><first>Michael</first><last>Zock</last></author>
       <title>A Graph-Based Approach for Computing Free Word Associations</title>
@@ -1135,7 +1135,7 @@
     <paper id="107">
       <author><first>Paul</first><last>Felt</last></author>
       <author><first>Robbie</first><last>Haertel</last></author>
-      <author><first>Eric</first><last>Ringger</last></author>
+      <author id="eric-ringger"><first>Eric</first><last>Ringger</last></author>
       <author><first>Kevin</first><last>Seppi</last></author>
       <title><fixed-case>M</fixed-case>omresp: A <fixed-case>B</fixed-case>ayesian Model for Multi-Annotator Document Labeling</title>
       <pages>3704–3711</pages>
@@ -1145,7 +1145,7 @@
     </paper>
     <paper id="108">
       <author><first>Jessica</first><last>Ouyang</last></author>
-      <author><first>Kathy</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathy</first><last>McKeown</last></author>
       <title>Towards Automatic Detection of Narrative Structure</title>
       <pages>4624–4631</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/1154_Paper.pdf</url>
@@ -1155,7 +1155,7 @@
     <paper id="109">
       <author><first>Anabela</first><last>Barreiro</last></author>
       <author><first>Fernando</first><last>Batista</last></author>
-      <author><first>Ricardo</first><last>Ribeiro</last></author>
+      <author id="ricardo-ribeiro"><first>Ricardo</first><last>Ribeiro</last></author>
       <author><first>Helena</first><last>Moniz</last></author>
       <author><first>Isabel</first><last>Trancoso</last></author>
       <title><fixed-case>O</fixed-case>pen<fixed-case>L</fixed-case>ogos Semantico-Syntactic Knowledge-Rich Bilingual Dictionaries</title>
@@ -1175,7 +1175,7 @@
       <bibkey>ellendorff-etal-2014-using</bibkey>
     </paper>
     <paper id="111">
-      <author><first>Rachele</first><last>Sprugnoli</last></author>
+      <author id="rachele-sprugnoli"><first>Rachele</first><last>Sprugnoli</last></author>
       <author><first>Alessandro</first><last>Lenci</last></author>
       <title>Crowdsourcing for the identification of event nominals: an experiment</title>
       <pages>1949–1955</pages>
@@ -1213,7 +1213,7 @@
       <bibkey>sennrich-kunz-2014-zmorge</bibkey>
     </paper>
     <paper id="115">
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <author><first>Mohamed</first><last>Al-Badrashiny</last></author>
       <author><first>Maryam</first><last>Aminian</last></author>
       <author><first>Mohammed</first><last>Attia</last></author>
@@ -1250,7 +1250,7 @@
     </paper>
     <paper id="118">
       <author><first>Heather</first><last>Pon-Barry</last></author>
-      <author><first>Stuart</first><last>Shieber</last></author>
+      <author id="stuart-m-shieber"><first>Stuart</first><last>Shieber</last></author>
       <author><first>Nicholas</first><last>Longenbaugh</last></author>
       <title>Eliciting and Annotating Uncertainty in Spoken Language</title>
       <pages>1978–1983</pages>
@@ -1268,7 +1268,7 @@
       <bibkey>gleize-grau-2014-hierarchical</bibkey>
     </paper>
     <paper id="120">
-      <author><first>Michael</first><last>Rosner</last></author>
+      <author id="michael-rosner"><first>Michael</first><last>Rosner</last></author>
       <author><first>Kurt</first><last>Sultana</last></author>
       <title>Automatic Methods for the Extension of a Bilingual Dictionary using Comparable Corpora</title>
       <pages>3790–3797</pages>
@@ -1295,7 +1295,7 @@
       <author><first>Daniela</first><last>Braga</last></author>
       <author><first>Hyongsil</first><last>Cho</last></author>
       <author><first>Amadeu</first><last>Ferreira</last></author>
-      <author><first>Miguel</first><last>Dias</last></author>
+      <author id="miguel-sales-dias"><first>Miguel</first><last>Dias</last></author>
       <title>Casa de la Lhéngua: a set of language resources and natural language processing tools for <fixed-case>M</fixed-case>irandese</title>
       <pages>536–540</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/1174_Paper.pdf</url>
@@ -1317,7 +1317,7 @@
       <author><first>Dorothee</first><last>Beermann</last></author>
       <author><first>Tore</first><last>Bruland</last></author>
       <author><first>Mary Esther Kropp</first><last>Dakubu</last></author>
-      <author><first>Montserrat</first><last>Marimon</last></author>
+      <author id="montserrat-marimon"><first>Montserrat</first><last>Marimon</last></author>
       <title><fixed-case>M</fixed-case>ulti<fixed-case>V</fixed-case>al - towards a multilingual valence lexicon</title>
       <pages>2478–2485</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/1179_Paper.pdf</url>
@@ -1338,10 +1338,10 @@
       <bibkey>vacher-etal-2014-sweet</bibkey>
     </paper>
     <paper id="126">
-      <author><first>David</first><last>Lewis</last></author>
+      <author id="david-d-lewis"><first>David</first><last>Lewis</last></author>
       <author><first>Rob</first><last>Brennan</last></author>
       <author><first>Leroy</first><last>Finn</last></author>
-      <author><first>Dominic</first><last>Jones</last></author>
+      <author id="dominic-r-jones"><first>Dominic</first><last>Jones</last></author>
       <author><first>Alan</first><last>Meehan</last></author>
       <author><first>Declan</first><last>O’Sullivan</last></author>
       <author><first>Sebastian</first><last>Hellmann</last></author>
@@ -1353,8 +1353,8 @@
       <bibkey>lewis-etal-2014-global</bibkey>
     </paper>
     <paper id="127">
-      <author><first>Liviu</first><last>Dinu</last></author>
-      <author><first>Alina Maria</first><last>Ciobanu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu</first><last>Dinu</last></author>
+      <author id="alina-maria-ciobanu"><first>Alina Maria</first><last>Ciobanu</last></author>
       <title>On the <fixed-case>R</fixed-case>omance Languages Mutual Intelligibility</title>
       <pages>3313–3318</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/1183_Paper.pdf</url>
@@ -1363,8 +1363,8 @@
     </paper>
     <paper id="128">
       <author><first>Anca</first><last>Dinu</last></author>
-      <author><first>Liviu</first><last>Dinu</last></author>
-      <author><first>Ionut</first><last>Sorodoc</last></author>
+      <author id="liviu-p-dinu"><first>Liviu</first><last>Dinu</last></author>
+      <author id="ionut-sorodoc"><first>Ionut</first><last>Sorodoc</last></author>
       <title>Aggregation methods for efficient collocation detection</title>
       <pages>4041–4045</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/1184_Paper.pdf</url>
@@ -1384,8 +1384,8 @@
       <author><first>Juris</first><last>Borzovs</last></author>
       <author><first>Ilze</first><last>Ilziņa</last></author>
       <author><first>Iveta</first><last>Keiša</last></author>
-      <author><first>Mārcis</first><last>Pinnis</last></author>
-      <author><first>Andrejs</first><last>Vasiļjevs</last></author>
+      <author id="marcis-pinnis"><first>Mārcis</first><last>Pinnis</last></author>
+      <author id="andrejs-vasiljevs"><first>Andrejs</first><last>Vasiļjevs</last></author>
       <title>Terminology localization guidelines for the national scenario</title>
       <pages>4012–4017</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/1189_Paper.pdf</url>
@@ -1395,7 +1395,7 @@
     <paper id="131">
       <author><first>Claire</first><last>Brierley</last></author>
       <author><first>Majdi</first><last>Sawalha</last></author>
-      <author><first>Eric</first><last>Atwell</last></author>
+      <author id="eric-atwell"><first>Eric</first><last>Atwell</last></author>
       <title>Tools for <fixed-case>A</fixed-case>rabic Natural Language Processing: a case study in qalqalah prosody</title>
       <pages>283–287</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/119_Paper.pdf</url>
@@ -1406,7 +1406,7 @@
       <author><first>Ana Isabel</first><last>Mata</last></author>
       <author><first>Helena</first><last>Moniz</last></author>
       <author><first>Fernando</first><last>Batista</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
       <title>Teenage and adult speech in school context: building and processing a corpus of <fixed-case>E</fixed-case>uropean <fixed-case>P</fixed-case>ortuguese</title>
       <pages>3914–3919</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/1193_Paper.pdf</url>
@@ -1416,9 +1416,9 @@
     <paper id="133">
       <author><first>Archna</first><last>Bhatia</last></author>
       <author><first>Mandy</first><last>Simons</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
       <author><first>Yulia</first><last>Tsvetkov</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <author><first>Jordan</first><last>Bender</last></author>
       <title>A Unified Annotation Scheme for the Semantic/Pragmatic Components of Definiteness</title>
       <pages>910–916</pages>
@@ -1457,7 +1457,7 @@
     <paper id="137">
       <author><first>Shikun</first><last>Zhang</last></author>
       <author><first>Wang</first><last>Ling</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <title>Dual Subtitles as Parallel Corpora</title>
       <pages>1869–1874</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/1199_Paper.pdf</url>
@@ -1475,8 +1475,8 @@
     </paper>
     <paper id="139">
       <author><first>Guiyao</first><last>Ke</last></author>
-      <author><first>Pierre-Francois</first><last>Marteau</last></author>
-      <author><first>Gildas</first><last>Menier</last></author>
+      <author id="pierre-francois-marteau"><first>Pierre-Francois</first><last>Marteau</last></author>
+      <author id="gildas-menier"><first>Gildas</first><last>Menier</last></author>
       <title>Variations on quantitative comparability measures and their evaluations on synthetic <fixed-case>F</fixed-case>rench-<fixed-case>E</fixed-case>nglish comparable corpora</title>
       <pages>133–139</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/120_Paper.pdf</url>
@@ -1484,8 +1484,8 @@
       <bibkey>ke-etal-2014-variations</bibkey>
     </paper>
     <paper id="140">
-      <author><first>Liviu</first><last>Dinu</last></author>
-      <author><first>Alina Maria</first><last>Ciobanu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu</first><last>Dinu</last></author>
+      <author id="alina-maria-ciobanu"><first>Alina Maria</first><last>Ciobanu</last></author>
       <author><first>Ioana</first><last>Chitoran</last></author>
       <author><first>Vlad</first><last>Niculae</last></author>
       <title>Using a machine learning model to assess the complexity of stress systems</title>
@@ -1512,7 +1512,7 @@
     </paper>
     <paper id="142">
       <author><first>Kevin</first><last>Black</last></author>
-      <author><first>Eric</first><last>Ringger</last></author>
+      <author id="eric-ringger"><first>Eric</first><last>Ringger</last></author>
       <author><first>Paul</first><last>Felt</last></author>
       <author><first>Kevin</first><last>Seppi</last></author>
       <author><first>Kristian</first><last>Heal</last></author>
@@ -1524,9 +1524,9 @@
       <bibkey>black-etal-2014-evaluating</bibkey>
     </paper>
     <paper id="143">
-      <author><first>Jonathan</first><last>Washington</last></author>
+      <author id="jonathan-washington"><first>Jonathan</first><last>Washington</last></author>
       <author><first>Ilnar</first><last>Salimzyanov</last></author>
-      <author><first>Francis</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last></author>
       <title>Finite-state morphological transducers for three Kypchak languages</title>
       <pages>3378–3385</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/1207_Paper.pdf</url>
@@ -1561,7 +1561,7 @@
       <bibkey>schultz-schlippe-2014-globalphone</bibkey>
     </paper>
     <paper id="147">
-      <author><first>Alexandru</first><last>Ceausu</last></author>
+      <author id="alexandru-ceausu"><first>Alexandru</first><last>Ceausu</last></author>
       <author><first>Sabine</first><last>Hunsicker</last></author>
       <title>Pre-ordering of phrase-based machine translation input in translation workflow</title>
       <pages>3589–3592</pages>
@@ -1571,7 +1571,7 @@
     </paper>
     <paper id="148">
       <author><first>Emanuele</first><last>Di Buccio</last></author>
-      <author><first>Giorgio Maria</first><last>Di Nunzio</last></author>
+      <author id="giorgio-maria-di-nunzio"><first>Giorgio Maria</first><last>Di Nunzio</last></author>
       <author><first>Gianmaria</first><last>Silvello</last></author>
       <title>A Vector Space Model for Syntactic Distances Between Dialects</title>
       <pages>2486–2489</pages>
@@ -1602,7 +1602,7 @@
     <paper id="151">
       <author><first>Spandana</first><last>Gella</last></author>
       <author><first>Carlo</first><last>Strapparava</last></author>
-      <author><first>Vivi</first><last>Nastase</last></author>
+      <author id="vivi-nastase"><first>Vivi</first><last>Nastase</last></author>
       <title>Mapping <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Domains, <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Topics and <fixed-case>W</fixed-case>ikipedia Categories to Generate Multilingual Domain Specific Resources</title>
       <pages>1117–1121</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/122_Paper.pdf</url>
@@ -1620,7 +1620,7 @@
       <bibkey>lee-etal-2014-annotating</bibkey>
     </paper>
     <paper id="153">
-      <author><first>Shyam Sundar</first><last>Agrawal</last></author>
+      <author id="shyam-sundar-agrawal"><first>Shyam Sundar</first><last>Agrawal</last></author>
       <author><first/><last>Abhimanue</last></author>
       <author><first>Shweta</first><last>Bansal</last></author>
       <author><first>Minakshi</first><last>Mahajan</last></author>
@@ -1631,11 +1631,11 @@
       <bibkey>agrawal-etal-2014-statistical</bibkey>
     </paper>
     <paper id="154">
-      <author><first>Christopher</first><last>Cieri</last></author>
-      <author><first>Denise</first><last>DiPersio</last></author>
-      <author><first>Mark</first><last>Liberman</last></author>
+      <author id="christopher-cieri"><first>Christopher</first><last>Cieri</last></author>
+      <author id="denise-dipersio"><first>Denise</first><last>DiPersio</last></author>
+      <author id="mark-liberman"><first>Mark</first><last>Liberman</last></author>
       <author><first>Andrea</first><last>Mazzucchi</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <author><first>Jonathan</first><last>Wright</last></author>
       <title>New Directions for Language Resource Development and Distribution</title>
       <pages>1539–1546</pages>
@@ -1645,9 +1645,9 @@
     </paper>
     <paper id="155">
       <author><first>Joseph</first><last>Mariani</last></author>
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <author><first>Gil</first><last>Francopoulo</last></author>
-      <author><first>Olivier</first><last>Hamon</last></author>
+      <author id="olivier-hamon"><first>Olivier</first><last>Hamon</last></author>
       <title>Rediscovering 15 Years of Discoveries in Language Resources and Evaluation: The <fixed-case>LREC</fixed-case> Anthology Analysis</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/1228_Paper.pdf</url>
       <abstract>This paper aims at analyzing the content of the LREC conferences contained in the ELRA Anthology over the past 15 years (1998-2013). It follows similar exercises that have been conducted, such as the survey on the IEEE ICASSP conference series from 1976 to 1990, which served in the launching of the ESCA Eurospeech conference, a survey of the Association of Computational Linguistics (ACL) over 50 years of existence, which was presented at the ACL conference in 2012, or a survey over the 25 years (1987-2012) of the conferences contained in the ISCA Archive, presented at Interspeech 2013. It contains first an analysis of the evolution of the number of papers and authors over time, including the study of their gender, nationality and affiliation, and of the collaboration among authors. It then studies the funding sources of the research investigations that are reported in the papers. It conducts an analysis of the evolution of the research topics within the community over time. It finally looks at reuse and plagiarism in the papers. The survey shows the present trends in the conference series and in the Language Resources and Evaluation scientific community. Conducting this survey also demonstrated the importance of a clear and unique identification of authors, papers and other sources to facilitate the analysis. This survey is preliminary, as many other aspects also deserve attention. But we hope it will help better understanding and forging our community in the global village.</abstract>
@@ -1683,7 +1683,7 @@
       <bibkey>angelov-2014-bootstrapping</bibkey>
     </paper>
     <paper id="159">
-      <author><first>Mathieu</first><last>Mangeot</last></author>
+      <author id="mathieu-mangeot"><first>Mathieu</first><last>Mangeot</last></author>
       <title><fixed-case>M</fixed-case>otà<fixed-case>M</fixed-case>ot project: conversion of a <fixed-case>F</fixed-case>rench-<fixed-case>K</fixed-case>hmer published dictionary for building a multilingual lexical system</title>
       <pages>1024–1031</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/128_Paper.pdf</url>
@@ -1692,10 +1692,10 @@
     </paper>
     <paper id="160">
       <author><first>Sérgio</first><last>Curto</last></author>
-      <author><first>Ana C.</first><last>Mendes</last></author>
+      <author id="ana-cristina-mendes"><first>Ana C.</first><last>Mendes</last></author>
       <author><first>Pedro</first><last>Curto</last></author>
-      <author><first>Luísa</first><last>Coheur</last></author>
-      <author><first>Ângela</first><last>Costa</last></author>
+      <author id="luisa-coheur"><first>Luísa</first><last>Coheur</last></author>
+      <author id="angela-costa"><first>Ângela</first><last>Costa</last></author>
       <title><fixed-case>JUST</fixed-case>.<fixed-case>ASK</fixed-case>, a <fixed-case>QA</fixed-case> system that learns to answer new questions from previous interactions</title>
       <pages>2603–2607</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/130_Paper.pdf</url>
@@ -1715,7 +1715,7 @@
     <paper id="162">
       <author><first>Mirjam</first><last>Ernestus</last></author>
       <author><first>Lucie</first><last>Kočková-Amortová</last></author>
-      <author><first>Petr</first><last>Pollak</last></author>
+      <author id="petr-pollak"><first>Petr</first><last>Pollak</last></author>
       <title>The Nijmegen Corpus of Casual <fixed-case>C</fixed-case>zech</title>
       <pages>365–370</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/134_Paper.pdf</url>
@@ -1741,7 +1741,7 @@
       <bibkey>gruszczynski-ogrodniczuk-2014-digital</bibkey>
     </paper>
     <paper id="165">
-      <author><first>Kristiina</first><last>Jokinen</last></author>
+      <author id="kristiina-jokinen"><first>Kristiina</first><last>Jokinen</last></author>
       <title>Open-domain Interaction and Online Content in the <fixed-case>S</fixed-case>ami Language</title>
       <pages>517–522</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/143_Paper.pdf</url>
@@ -1767,7 +1767,7 @@
     </paper>
     <paper id="168">
       <author><first>Paul</first><last>Felt</last></author>
-      <author><first>Eric</first><last>Ringger</last></author>
+      <author id="eric-ringger"><first>Eric</first><last>Ringger</last></author>
       <author><first>Kevin</first><last>Seppi</last></author>
       <author><first>Kristian</first><last>Heal</last></author>
       <title>Using Transfer Learning to Assist Exploratory Corpus Annotation</title>
@@ -1778,13 +1778,13 @@
     </paper>
     <paper id="169">
       <author><first>Judith</first><last>Muzerelle</last></author>
-      <author><first>Anaïs</first><last>Lefeuvre</last></author>
+      <author id="anais-lefeuvre"><first>Anaïs</first><last>Lefeuvre</last></author>
       <author><first>Emmanuel</first><last>Schang</last></author>
       <author><first>Jean-Yves</first><last>Antoine</last></author>
       <author><first>Aurore</first><last>Pelletier</last></author>
       <author><first>Denis</first><last>Maurel</last></author>
-      <author><first>Iris</first><last>Eshkol</last></author>
-      <author><first>Jeanne</first><last>Villaneau</last></author>
+      <author id="iris-eshkol"><first>Iris</first><last>Eshkol</last></author>
+      <author id="jeanne-villaneau"><first>Jeanne</first><last>Villaneau</last></author>
       <title><fixed-case>ANCOR</fixed-case>_<fixed-case>C</fixed-case>entre, a large free spoken <fixed-case>F</fixed-case>rench coreference corpus: description of the resource and reliability measures</title>
       <pages>843–847</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/150_Paper.pdf</url>
@@ -1792,7 +1792,7 @@
       <bibkey>muzerelle-etal-2014-ancor</bibkey>
     </paper>
     <paper id="170">
-      <author><first>Alex</first><last>Rudnick</last></author>
+      <author id="alex-rudnick"><first>Alex</first><last>Rudnick</last></author>
       <author><first>Taylor</first><last>Skidmore</last></author>
       <author><first>Alberto</first><last>Samaniego</last></author>
       <author><first>Michael</first><last>Gasser</last></author>
@@ -1803,7 +1803,7 @@
       <bibkey>rudnick-etal-2014-guampa</bibkey>
     </paper>
     <paper id="171">
-      <author><first>Daan</first><last>Broeder</last></author>
+      <author id="daan-broeder"><first>Daan</first><last>Broeder</last></author>
       <author><first>Ineke</first><last>Schuurman</last></author>
       <author><first>Menzo</first><last>Windhouwer</last></author>
       <title>Experiences with the <fixed-case>ISO</fixed-case>cat Data Category Registry</title>
@@ -1832,7 +1832,7 @@
       <bibkey>iida-tokunaga-2014-building</bibkey>
     </paper>
     <paper id="174">
-      <author><first>Matej</first><last>Ďurčo</last></author>
+      <author id="matej-durco"><first>Matej</first><last>Ďurčo</last></author>
       <author><first>Menzo</first><last>Windhouwer</last></author>
       <title>The <fixed-case>CMD</fixed-case> Cloud</title>
       <pages>687–690</pages>
@@ -1881,7 +1881,7 @@
       <author><first>Hiroaki</first><last>Shimizu</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
       <author><first>Sakriani</first><last>Sakti</last></author>
-      <author><first>Tomoki</first><last>Toda</last></author>
+      <author id="tomoki-toda"><first>Tomoki</first><last>Toda</last></author>
       <author><first>Satoshi</first><last>Nakamura</last></author>
       <title>Collection of a Simultaneous Translation Corpus for Comparative Analysis</title>
       <pages>670–673</pages>
@@ -1903,7 +1903,7 @@
     <paper id="180">
       <author><first>Volha</first><last>Petukhova</last></author>
       <author><first>Andrei</first><last>Malchanau</last></author>
-      <author><first>Harry</first><last>Bunt</last></author>
+      <author id="harry-bunt"><first>Harry</first><last>Bunt</last></author>
       <title>Interoperability of Dialogue Corpora through <fixed-case>ISO</fixed-case> 24617-2-based Querying</title>
       <pages>4407–4414</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/165_Paper.pdf</url>
@@ -1911,11 +1911,11 @@
       <bibkey>petukhova-etal-2014-interoperability</bibkey>
     </paper>
     <paper id="181">
-      <author><first>Catia</first><last>Cucchiarini</last></author>
+      <author id="catia-cucchiarini"><first>Catia</first><last>Cucchiarini</last></author>
       <author><first>Steve</first><last>Bodnar</last></author>
       <author><first>Bart Penning</first><last>de Vries</last></author>
       <author><first>Roeland</first><last>van Hout</last></author>
-      <author><first>Helmer</first><last>Strik</last></author>
+      <author id="helmer-strik"><first>Helmer</first><last>Strik</last></author>
       <title><fixed-case>ASR</fixed-case>-based <fixed-case>CALL</fixed-case> systems and learner speech data: new resources and opportunities for research and development in second language learning</title>
       <pages>2708–2714</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/168_Paper.pdf</url>
@@ -1952,8 +1952,8 @@
       <bibkey>schmidt-2014-database</bibkey>
     </paper>
     <paper id="184">
-      <author><first>Liviu</first><last>Dinu</last></author>
-      <author><first>Alina Maria</first><last>Ciobanu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu</first><last>Dinu</last></author>
+      <author id="alina-maria-ciobanu"><first>Alina Maria</first><last>Ciobanu</last></author>
       <title>Building a Dataset of Multilingual Cognates for the <fixed-case>R</fixed-case>omanian Lexicon</title>
       <pages>1038–1043</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/175_Paper.pdf</url>
@@ -1963,7 +1963,7 @@
     <paper id="185">
       <author><first>Giuseppe</first><last>Rizzo</last></author>
       <author><first>Marieke</first><last>van Erp</last></author>
-      <author><first>Raphaël</first><last>Troncy</last></author>
+      <author id="raphael-troncy"><first>Raphaël</first><last>Troncy</last></author>
       <title>Benchmarking the Extraction and Disambiguation of Named Entities on the Semantic Web</title>
       <pages>4593–4600</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/176_Paper.pdf</url>
@@ -1973,17 +1973,17 @@
     <paper id="186">
       <author><first>Ting</first><last>Liu</last></author>
       <author><first>Kit</first><last>Cho</last></author>
-      <author><first>G. Aaron</first><last>Broadwell</last></author>
+      <author id="george-aaron-broadwell"><first>G. Aaron</first><last>Broadwell</last></author>
       <author><first>Samira</first><last>Shaikh</last></author>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
       <author><first>John</first><last>Lien</last></author>
-      <author><first>Sarah</first><last>Taylor</last></author>
-      <author><first>Laurie</first><last>Feldman</last></author>
+      <author id="sarah-taylor"><first>Sarah</first><last>Taylor</last></author>
+      <author id="laurie-feldman"><first>Laurie</first><last>Feldman</last></author>
       <author><first>Boris</first><last>Yamrom</last></author>
-      <author><first>Nick</first><last>Webb</last></author>
+      <author id="nick-webb"><first>Nick</first><last>Webb</last></author>
       <author><first>Umit</first><last>Boz</last></author>
       <author><first>Ignacio</first><last>Cases</last></author>
-      <author><first>Ching-sheng</first><last>Lin</last></author>
+      <author id="ching-sheng-lin"><first>Ching-sheng</first><last>Lin</last></author>
       <title>Automatic Expansion of the <fixed-case>MRC</fixed-case> Psycholinguistic Database Imageability Ratings</title>
       <pages>2800–2805</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/178_Paper.pdf</url>
@@ -1991,9 +1991,9 @@
       <bibkey>liu-etal-2014-automatic-expansion</bibkey>
     </paper>
     <paper id="187">
-      <author><first>Riyaz Ahmad</first><last>Bhat</last></author>
+      <author id="riyaz-ahmad-bhat"><first>Riyaz Ahmad</first><last>Bhat</last></author>
       <author><first>Shahid Mushtaq</first><last>Bhat</last></author>
-      <author><first>Dipti Misra</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></author>
       <title>Towards building a <fixed-case>K</fixed-case>ashmiri Treebank: Setting up the Annotation Pipeline</title>
       <pages>748–752</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/18_Paper.pdf</url>
@@ -2002,7 +2002,7 @@
     </paper>
     <paper id="188">
       <author><first>Olga</first><last>Uryupina</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <author><first>Aliaksei</first><last>Severyn</last></author>
       <author><first>Agata</first><last>Rotondi</last></author>
       <author><first>Alessandro</first><last>Moschitti</last></author>
@@ -2024,9 +2024,9 @@
     <paper id="190">
       <author><first>Arantza</first><last>del Pozo</last></author>
       <author><first>Carlo</first><last>Aliprandi</last></author>
-      <author><first>Aitor</first><last>Álvarez</last></author>
+      <author id="aitor-alvarez"><first>Aitor</first><last>Álvarez</last></author>
       <author><first>Carlos</first><last>Mendes</last></author>
-      <author><first>Joao P.</first><last>Neto</last></author>
+      <author id="joao-p-neto"><first>Joao P.</first><last>Neto</last></author>
       <author><first>Sérgio</first><last>Paulo</last></author>
       <author><first>Nicola</first><last>Piccinini</last></author>
       <author><first>Matteo</first><last>Raffaelli</last></author>
@@ -2056,9 +2056,9 @@
       <bibkey>darwish-gao-2014-simple</bibkey>
     </paper>
     <paper id="193">
-      <author><first>Miguel B.</first><last>Almeida</last></author>
+      <author id="miguel-b-almeida"><first>Miguel B.</first><last>Almeida</last></author>
       <author><first>Mariana S. C.</first><last>Almeida</last></author>
-      <author><first>André F. T.</first><last>Martins</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
       <author><first>Helena</first><last>Figueira</last></author>
       <author><first>Pedro</first><last>Mendes</last></author>
       <author><first>Cláudia</first><last>Pinto</last></author>
@@ -2090,7 +2090,7 @@
     </paper>
     <paper id="196">
       <author><first>Mohamed</first><last>Morchid</last></author>
-      <author><first>Georges</first><last>Linarès</last></author>
+      <author id="georges-linares"><first>Georges</first><last>Linarès</last></author>
       <author><first>Richard</first><last>Dufour</last></author>
       <title>Characterizing and Predicting Bursty Events: The Buzz Case Study on <fixed-case>T</fixed-case>witter</title>
       <pages>2766–2771</pages>
@@ -2099,12 +2099,12 @@
       <bibkey>morchid-etal-2014-characterizing</bibkey>
     </paper>
     <paper id="197">
-      <author><first>Hans-Ulrich</first><last>Krieger</last></author>
+      <author id="hans-ulrich-krieger"><first>Hans-Ulrich</first><last>Krieger</last></author>
       <author><first>Christian</first><last>Spurk</last></author>
       <author><first>Hans</first><last>Uszkoreit</last></author>
       <author><first>Feiyu</first><last>Xu</last></author>
       <author><first>Yi</first><last>Zhang</last></author>
-      <author><first>Frank</first><last>Müller</last></author>
+      <author id="frank-henrik-muller"><first>Frank</first><last>Müller</last></author>
       <author><first>Thomas</first><last>Tolxdorff</last></author>
       <title>Information Extraction from <fixed-case>G</fixed-case>erman Patient Records via Hybrid Parsing and Relation Extraction Strategies</title>
       <pages>2043–2048</pages>
@@ -2115,7 +2115,7 @@
     <paper id="198">
       <author><first>Dietmar</first><last>Schabus</last></author>
       <author><first>Michael</first><last>Pucher</last></author>
-      <author><first>Phil</first><last>Hoole</last></author>
+      <author id="philip-hoole"><first>Phil</first><last>Hoole</last></author>
       <title>The <fixed-case>MMASCS</fixed-case> multi-modal annotated synchronous corpus of audio, video, facial motion and tongue motion data of normal, fast and slow speech</title>
       <pages>3411–3416</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/192_Paper.pdf</url>
@@ -2135,7 +2135,7 @@
     <paper id="200">
       <author><first>Joris</first><last>Pelemans</last></author>
       <author><first>Kris</first><last>Demuynck</last></author>
-      <author><first>Hugo</first><last>Van hamme</last></author>
+      <author id="hugo-van-hamme"><first>Hugo</first><last>Van hamme</last></author>
       <author><first>Patrick</first><last>Wambacq</last></author>
       <title>Speech Recognition Web Services for <fixed-case>D</fixed-case>utch</title>
       <pages>3041–3044</pages>
@@ -2144,9 +2144,9 @@
       <bibkey>pelemans-etal-2014-speech</bibkey>
     </paper>
     <paper id="201">
-      <author><first>Angela</first><last>Costa</last></author>
+      <author id="angela-costa"><first>Angela</first><last>Costa</last></author>
       <author><first>Tiago</first><last>Luís</last></author>
-      <author><first>Luísa</first><last>Coheur</last></author>
+      <author id="luisa-coheur"><first>Luísa</first><last>Coheur</last></author>
       <title>Translation errors from <fixed-case>E</fixed-case>nglish to <fixed-case>P</fixed-case>ortuguese: an annotated corpus</title>
       <pages>1231–1234</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/199_Paper.pdf</url>
@@ -2183,7 +2183,7 @@
     <paper id="205">
       <author><first>Sharid</first><last>Loáiciga</last></author>
       <author><first>Thomas</first><last>Meyer</last></author>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <title><fixed-case>E</fixed-case>nglish-<fixed-case>F</fixed-case>rench Verb Phrase Alignment in <fixed-case>E</fixed-case>uroparl for Tense Translation Modeling</title>
       <pages>674–681</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/205_Paper.pdf</url>
@@ -2192,7 +2192,7 @@
     </paper>
     <paper id="206">
       <author><first>Nelleke</first><last>Oostdijk</last></author>
-      <author><first>Henk</first><last>van den Heuvel</last></author>
+      <author id="henk-van-den-heuvel"><first>Henk</first><last>van den Heuvel</last></author>
       <title>The evolving infrastructure for language resources and the role for data scientists</title>
       <pages>608–612</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/206_Paper.pdf</url>
@@ -2254,7 +2254,7 @@
       <author><first>Ingrid</first><last>Hove</last></author>
       <author><first>Ibrahim</first><last>Almajai</last></author>
       <author><first>Volker</first><last>Dellwo</last></author>
-      <author><first>Steven</first><last>Moran</last></author>
+      <author id="steven-moran"><first>Steven</first><last>Moran</last></author>
       <title>A Crowdsourcing Smartphone Application for <fixed-case>S</fixed-case>wiss <fixed-case>G</fixed-case>erman: Putting Language Documentation in the Hands of the Users</title>
       <pages>3444–3447</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/214_Paper.pdf</url>
@@ -2263,8 +2263,8 @@
     </paper>
     <paper id="213">
       <author><first>Steven</first><last>Bethard</last></author>
-      <author><first>Philip</first><last>Ogren</last></author>
-      <author><first>Lee</first><last>Becker</last></author>
+      <author id="philip-ogren"><first>Philip</first><last>Ogren</last></author>
+      <author id="lee-becker"><first>Lee</first><last>Becker</last></author>
       <title><fixed-case>C</fixed-case>lear<fixed-case>TK</fixed-case> 2.0: Design Patterns for Machine Learning in <fixed-case>UIMA</fixed-case></title>
       <pages>3289–3293</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/218_Paper.pdf</url>
@@ -2275,8 +2275,8 @@
       <author><first>Inès</first><last>Zribi</last></author>
       <author><first>Rahma</first><last>Boujelbane</last></author>
       <author><first>Abir</first><last>Masmoudi</last></author>
-      <author><first>Mariem</first><last>Ellouze</last></author>
-      <author><first>Lamia</first><last>Belguith</last></author>
+      <author id="mariem-ellouze-khemekhem"><first>Mariem</first><last>Ellouze</last></author>
+      <author id="lamia-hadrich-belguith"><first>Lamia</first><last>Belguith</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
       <title>A Conventional Orthography for <fixed-case>T</fixed-case>unisian <fixed-case>A</fixed-case>rabic</title>
       <pages>2355–2361</pages>
@@ -2313,12 +2313,12 @@
     </paper>
     <paper id="218">
       <author><first>Blanca</first><last>Arias</last></author>
-      <author><first>Núria</first><last>Bel</last></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last></author>
       <author><first>Mercè</first><last>Lorente</last></author>
-      <author><first>Montserrat</first><last>Marimón</last></author>
+      <author id="montserrat-marimon"><first>Montserrat</first><last>Marimón</last></author>
       <author><first>Alba</first><last>Milà</last></author>
       <author><first>Jorge</first><last>Vivaldi</last></author>
-      <author><first>Muntsa</first><last>Padró</last></author>
+      <author id="muntsa-padro"><first>Muntsa</first><last>Padró</last></author>
       <author><first>Marina</first><last>Fomicheva</last></author>
       <author><first>Imanol</first><last>Larrea</last></author>
       <title>Boosting the creation of a treebank</title>
@@ -2358,7 +2358,7 @@
       <author><first>Wolfgang</first><last>Maier</last></author>
       <author><first>Miriam</first><last>Kaeshammer</last></author>
       <author><first>Peter</first><last>Baumann</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <title>Discosuite - A parser test suite for <fixed-case>G</fixed-case>erman discontinuous structures</title>
       <pages>2905–2912</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/230_Paper.pdf</url>
@@ -2377,8 +2377,8 @@
     <paper id="224">
       <author><first>Gregor</first><last>Titze</last></author>
       <author><first>Volha</first><last>Bryl</last></author>
-      <author><first>Cäcilia</first><last>Zirn</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="cacilia-zirn"><first>Cäcilia</first><last>Zirn</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <title><fixed-case>DB</fixed-case>pedia Domains: augmenting <fixed-case>DB</fixed-case>pedia with domain information</title>
       <pages>1438–1442</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/233_Paper.pdf</url>
@@ -2412,7 +2412,7 @@
       <bibkey>stein-2014-parsing</bibkey>
     </paper>
     <paper id="228">
-      <author><first>Eckhard</first><last>Bick</last></author>
+      <author id="eckhard-bick"><first>Eckhard</first><last>Bick</last></author>
       <title><fixed-case>ML</fixed-case>-Optimization of Ported Constraint Grammars</title>
       <pages>4483–4487</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/24_Paper.pdf</url>
@@ -2421,17 +2421,17 @@
     </paper>
     <paper id="229">
       <author><first>Samira</first><last>Shaikh</last></author>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
       <author><first>Ting</first><last>Liu</last></author>
-      <author><first>George Aaron</first><last>Broadwell</last></author>
+      <author id="george-aaron-broadwell"><first>George Aaron</first><last>Broadwell</last></author>
       <author><first>Boris</first><last>Yamrom</last></author>
-      <author><first>Sarah</first><last>Taylor</last></author>
-      <author><first>Laurie</first><last>Feldman</last></author>
+      <author id="sarah-taylor"><first>Sarah</first><last>Taylor</last></author>
+      <author id="laurie-feldman"><first>Laurie</first><last>Feldman</last></author>
       <author><first>Kit</first><last>Cho</last></author>
       <author><first>Umit</first><last>Boz</last></author>
       <author><first>Ignacio</first><last>Cases</last></author>
       <author><first>Yuliya</first><last>Peshkova</last></author>
-      <author><first>Ching-Sheng</first><last>Lin</last></author>
+      <author id="ching-sheng-lin"><first>Ching-Sheng</first><last>Lin</last></author>
       <title>A Multi-Cultural Repository of Automatically Discovered Linguistic and Conceptual Metaphors</title>
       <pages>2495–2500</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/241_Paper.pdf</url>
@@ -2450,8 +2450,8 @@
     </paper>
     <paper id="231">
       <author><first>Lianet</first><last>Sepúlveda Torres</last></author>
-      <author><first>Magali Sanches</first><last>Duran</last></author>
-      <author><first>Sandra</first><last>Aluísio</last></author>
+      <author id="magali-sanches-duran"><first>Magali Sanches</first><last>Duran</last></author>
+      <author id="sandra-aluisio"><first>Sandra</first><last>Aluísio</last></author>
       <title>Generating a Lexicon of Errors in <fixed-case>P</fixed-case>ortuguese to Support an Error Identification System for <fixed-case>S</fixed-case>panish Native Learners</title>
       <pages>3952–3957</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/247_Paper.pdf</url>
@@ -2506,7 +2506,7 @@
     <paper id="236">
       <author><first>Marie</first><last>Kopřivová</last></author>
       <author><first>Hana</first><last>Goláňová</last></author>
-      <author><first>Petra</first><last>Klimešová</last></author>
+      <author id="petra-poukarova"><first>Petra</first><last>Klimešová</last></author>
       <author><first>David</first><last>Lukeš</last></author>
       <title>Mapping Diatopic and Diachronic Variation in Spoken <fixed-case>C</fixed-case>zech: The <fixed-case>ORTOFON</fixed-case> and <fixed-case>DIALEKT</fixed-case> Corpora</title>
       <pages>376–382</pages>
@@ -2540,7 +2540,7 @@
       <author><first>Katrin</first><last>Hein</last></author>
       <author><first>Rémi</first><last>Lavalley</last></author>
       <author><first>Ludwig</first><last>Linhuber</last></author>
-      <author><first>Sebastian</first><last>Stüker</last></author>
+      <author id="sebastian-stuker"><first>Sebastian</first><last>Stüker</last></author>
       <title>A Database of Freely Written Texts of <fixed-case>G</fixed-case>erman School Students for the Purpose of Automatic Spelling Error Classification</title>
       <pages>1212–1217</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/255_Paper.pdf</url>
@@ -2548,7 +2548,7 @@
       <bibkey>berkling-etal-2014-database</bibkey>
     </paper>
     <paper id="240">
-      <author><first>Christian</first><last>Haenig</last></author>
+      <author id="christian-hanig"><first>Christian</first><last>Haenig</last></author>
       <author><first>Andreas</first><last>Niekler</last></author>
       <author><first>Carsten</first><last>Wuensch</last></author>
       <title><fixed-case>PACE</fixed-case> Corpus: a multilingual corpus of Polarity-annotated textual data from the domains Automotive and <fixed-case>CE</fixed-case>llphone</title>
@@ -2560,10 +2560,10 @@
     <paper id="241">
       <author><first>Veronika</first><last>Vincze</last></author>
       <author><first>Viktor</first><last>Varga</last></author>
-      <author><first>Katalin Ilona</first><last>Simkó</last></author>
+      <author id="katalin-ilona-simko"><first>Katalin Ilona</first><last>Simkó</last></author>
       <author><first>János</first><last>Zsibrita</last></author>
       <author><first>Ágoston</first><last>Nagy</last></author>
-      <author><first>Richárd</first><last>Farkas</last></author>
+      <author id="richard-farkas"><first>Richárd</first><last>Farkas</last></author>
       <author><first>János</first><last>Csirik</last></author>
       <title><fixed-case>S</fixed-case>zeged Corpus 2.5: Morphological Modifications in a Manually <fixed-case>POS</fixed-case>-tagged <fixed-case>H</fixed-case>ungarian Corpus</title>
       <pages>1074–1078</pages>
@@ -2573,7 +2573,7 @@
     </paper>
     <paper id="242">
       <author><first>Pierre André</first><last>Ménard</last></author>
-      <author><first>Caroline</first><last>Barrière</last></author>
+      <author id="caroline-barriere"><first>Caroline</first><last>Barrière</last></author>
       <title>Linked Open Data and Web Corpus Data for noun compound bracketing</title>
       <pages>702–709</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/263_Paper.pdf</url>
@@ -2583,7 +2583,7 @@
     <paper id="243">
       <author><first>João</first><last>Freitas</last></author>
       <author><first>António</first><last>Teixeira</last></author>
-      <author><first>Miguel</first><last>Dias</last></author>
+      <author id="miguel-sales-dias"><first>Miguel</first><last>Dias</last></author>
       <title>Multimodal Corpora for Silent Speech Interaction</title>
       <pages>4507–4511</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/264_Paper.pdf</url>
@@ -2604,7 +2604,7 @@
     </paper>
     <paper id="245">
       <author><first>Evelina</first><last>Rennes</last></author>
-      <author><first>Arne</first><last>Jönsson</last></author>
+      <author id="arne-jonsson"><first>Arne</first><last>Jönsson</last></author>
       <title>The Impact of Cohesion Errors in Extraction Based Summaries</title>
       <pages>1575–1582</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/27_Paper.pdf</url>
@@ -2615,7 +2615,7 @@
       <author><first>Lanjun</first><last>Zhou</last></author>
       <author><first>Binyang</first><last>Li</last></author>
       <author><first>Zhongyu</first><last>Wei</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <title>The <fixed-case>CUHK</fixed-case> Discourse <fixed-case>T</fixed-case>ree<fixed-case>B</fixed-case>ank for <fixed-case>C</fixed-case>hinese: Annotating Explicit Discourse Connectives for the <fixed-case>C</fixed-case>hinese <fixed-case>T</fixed-case>ree<fixed-case>B</fixed-case>ank</title>
       <pages>942–949</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/270_Paper.pdf</url>
@@ -2633,7 +2633,7 @@
       <bibkey>sadamitsu-etal-2014-extraction</bibkey>
     </paper>
     <paper id="248">
-      <author><first>Sandipan</first><last>Dandapat</last></author>
+      <author id="sandipan-dandapat"><first>Sandipan</first><last>Dandapat</last></author>
       <author><first>Declan</first><last>Groves</last></author>
       <title><fixed-case>MTW</fixed-case>atch: A Tool for the Analysis of Noisy Parallel Data</title>
       <pages>41–45</pages>
@@ -2644,7 +2644,7 @@
     <paper id="249">
       <author><first>Martin</first><last>Riedl</last></author>
       <author><first>Richard</first><last>Steuer</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <title>Distributed Distributional Similarities of <fixed-case>G</fixed-case>oogle <fixed-case>B</fixed-case>ooks Over the Centuries</title>
       <pages>1401–1405</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/274_Paper.pdf</url>
@@ -2657,9 +2657,9 @@
       <author><first>Asad</first><last>Mustafa</last></author>
       <author><first>Rahila</first><last>Parveen</last></author>
       <author><first>Farah</first><last>Adeeba</last></author>
-      <author><first>Tafseer</first><last>Ahmed Khan</last></author>
+      <author id="tafseer-ahmed"><first>Tafseer</first><last>Ahmed Khan</last></author>
       <author><first>Miriam</first><last>Butt</last></author>
-      <author><first>Annette</first><last>Hautli</last></author>
+      <author id="annette-hautli"><first>Annette</first><last>Hautli</last></author>
       <title>The <fixed-case>CLE</fixed-case> <fixed-case>U</fixed-case>rdu <fixed-case>POS</fixed-case> Tagset</title>
       <pages>2920–2925</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/275_Paper.pdf</url>
@@ -2668,7 +2668,7 @@
     </paper>
     <paper id="251">
       <author><first>Darina</first><last>Benikova</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <author><first>Marc</first><last>Reznicek</last></author>
       <title><fixed-case>N</fixed-case>o<fixed-case>S</fixed-case>ta-<fixed-case>D</fixed-case> Named Entity Annotation for <fixed-case>G</fixed-case>erman: Guidelines and Dataset</title>
       <pages>2524–2531</pages>
@@ -2711,7 +2711,7 @@
       <bibkey>przepiorkowski-etal-2014-walenty</bibkey>
     </paper>
     <paper id="255">
-      <author><first>Balamurali</first><last>A.R</last></author>
+      <author id="balamurali-ar"><first>Balamurali</first><last>A.R</last></author>
       <title>Can the Crowd be Controlled?: A Case Study on Crowd Sourcing and Automatic Validation of Completed Tasks based on User Modeling</title>
       <pages>189–195</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/28_Paper.pdf</url>
@@ -2729,7 +2729,7 @@
       <bibkey>bogel-etal-2014-computational</bibkey>
     </paper>
     <paper id="257">
-      <author><first>Mārcis</first><last>Pinnis</last></author>
+      <author id="marcis-pinnis"><first>Mārcis</first><last>Pinnis</last></author>
       <author><first>Ilze</first><last>Auziņa</last></author>
       <author><first>Kārlis</first><last>Goba</last></author>
       <title>Designing the <fixed-case>L</fixed-case>atvian Speech Recognition Corpus</title>
@@ -2748,8 +2748,8 @@
     </paper>
     <paper id="259">
       <author><first>Panot</first><last>Chaimongkol</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
-      <author><first>Yuka</first><last>Tateisi</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
+      <author id="yuka-tateisi"><first>Yuka</first><last>Tateisi</last></author>
       <title>Corpus for Coreference Resolution on Scientific Papers</title>
       <pages>3187–3190</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/286_Paper.pdf</url>
@@ -2767,16 +2767,16 @@
       <bibkey>falk-etal-2014-non</bibkey>
     </paper>
     <paper id="261">
-      <author><first>Nancy</first><last>Underwood</last></author>
+      <author id="nancy-underwood"><first>Nancy</first><last>Underwood</last></author>
       <author><first>Bartolomé</first><last>Mesa-Lao</last></author>
-      <author><first>Mercedes García</first><last>Martínez</last></author>
+      <author id="mercedes-garcia-martinez"><first>Mercedes García</first><last>Martínez</last></author>
       <author><first>Michael</first><last>Carl</last></author>
-      <author><first>Vicent</first><last>Alabau</last></author>
-      <author><first>Jesús</first><last>González-Rubio</last></author>
-      <author><first>Luis A.</first><last>Leiva</last></author>
-      <author><first>Germán</first><last>Sanchis-Trilles</last></author>
-      <author><first>Daniel</first><last>Ortíz-Martínez</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="vicent-alabau"><first>Vicent</first><last>Alabau</last></author>
+      <author id="jesus-gonzalez-rubio"><first>Jesús</first><last>González-Rubio</last></author>
+      <author id="luis-a-leiva"><first>Luis A.</first><last>Leiva</last></author>
+      <author id="german-sanchis-trilles"><first>Germán</first><last>Sanchis-Trilles</last></author>
+      <author id="daniel-ortiz-martinez"><first>Daniel</first><last>Ortíz-Martínez</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <title>Evaluating the effects of interactivity in a post-editing workbench</title>
       <pages>553–559</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/289_Paper.pdf</url>
@@ -2823,7 +2823,7 @@
     </paper>
     <paper id="266">
       <author><first>Patrik</first><last>Lambert</last></author>
-      <author><first>Carlos</first><last>Rodríguez-Penagos</last></author>
+      <author id="carlos-rodriguez-penagos"><first>Carlos</first><last>Rodríguez-Penagos</last></author>
       <title>Adapting Freely Available Resources to Build an Opinion Mining Pipeline in <fixed-case>P</fixed-case>ortuguese</title>
       <pages>2225–2228</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/293_Paper.pdf</url>
@@ -2834,7 +2834,7 @@
       <author><first>Milena</first><last>Hnátková</last></author>
       <author><first>Michal</first><last>Křen</last></author>
       <author><first>Pavel</first><last>Procházka</last></author>
-      <author><first>Hana</first><last>Skoumalová</last></author>
+      <author id="hana-skoumalova"><first>Hana</first><last>Skoumalová</last></author>
       <title>The <fixed-case>SYN</fixed-case>-series corpora of written <fixed-case>C</fixed-case>zech</title>
       <pages>160–164</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/294_Paper.pdf</url>
@@ -2845,8 +2845,8 @@
       <author><first>Liane</first><last>Guillou</last></author>
       <author><first>Christian</first><last>Hardmeier</last></author>
       <author><first>Aaron</first><last>Smith</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <title><fixed-case>P</fixed-case>ar<fixed-case>C</fixed-case>or 1.0: A Parallel Pronoun-Coreference Corpus to Support Statistical <fixed-case>MT</fixed-case></title>
       <pages>3191–3198</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/298_Paper.pdf</url>
@@ -2901,9 +2901,9 @@
     <paper id="273">
       <author><first>Per Erik</first><last>Solberg</last></author>
       <author><first>Arne</first><last>Skjærholt</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <author><first>Kristin</first><last>Hagen</last></author>
-      <author><first>Janne Bondi</first><last>Johannessen</last></author>
+      <author id="janne-bondi-johannessen"><first>Janne Bondi</first><last>Johannessen</last></author>
       <title>The <fixed-case>N</fixed-case>orwegian Dependency Treebank</title>
       <pages>789–795</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/303_Paper.pdf</url>
@@ -2928,7 +2928,7 @@
       <bibkey>laki-orosz-2014-efficient</bibkey>
     </paper>
     <paper id="276">
-      <author><first>Peter</first><last>Spyns</last></author>
+      <author id="peter-spyns"><first>Peter</first><last>Spyns</last></author>
       <author><first>Remco</first><last>van Veenendaal</last></author>
       <title>A decade of <fixed-case>HLT</fixed-case> Agency activities in the Low Countries: from resource maintenance (<fixed-case>BLARK</fixed-case>) to service offerings (<fixed-case>BLAISE</fixed-case>)</title>
       <pages>2158–2165</pages>
@@ -2939,8 +2939,8 @@
     <paper id="277">
       <author><first>Eunah</first><last>Cho</last></author>
       <author><first>Sarah</first><last>Fünfer</last></author>
-      <author><first>Sebastian</first><last>Stüker</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="sebastian-stuker"><first>Sebastian</first><last>Stüker</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <title>A Corpus of Spontaneous Speech in Lectures: The <fixed-case>KIT</fixed-case> Lecture Corpus for Spoken Language Processing and Translation</title>
       <pages>1554–1559</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/311_Paper.pdf</url>
@@ -2958,7 +2958,7 @@
     </paper>
     <paper id="279">
       <author><first>Begüm</first><last>Erten</last></author>
-      <author><first>Cem</first><last>Bozsahin</last></author>
+      <author id="cem-bozsahin"><first>Cem</first><last>Bozsahin</last></author>
       <author><first>Deniz</first><last>Zeyrek</last></author>
       <title><fixed-case>T</fixed-case>urkish Resources for Visual Word Recognition</title>
       <pages>2106–2110</pages>
@@ -2977,10 +2977,10 @@
     </paper>
     <paper id="281">
       <author><first>Massimo</first><last>Moneglia</last></author>
-      <author><first>Susan</first><last>Brown</last></author>
+      <author id="susan-windisch-brown"><first>Susan</first><last>Brown</last></author>
       <author><first>Francesca</first><last>Frontini</last></author>
       <author><first>Gloria</first><last>Gagliardi</last></author>
-      <author><first>Fahad</first><last>Khan</last></author>
+      <author id="fahad-khan"><first>Fahad</first><last>Khan</last></author>
       <author><first>Monica</first><last>Monachini</last></author>
       <author><first>Alessandro</first><last>Panunzi</last></author>
       <title>The <fixed-case>IMAGACT</fixed-case> Visual Ontology. An Extendable Multilingual Infrastructure for the representation of lexical encoding of Action</title>
@@ -3010,8 +3010,8 @@
       <author><first>Bogdan</first><last>Ludusan</last></author>
       <author><first>Maarten</first><last>Versteegh</last></author>
       <author><first>Aren</first><last>Jansen</last></author>
-      <author><first>Guillaume</first><last>Gravier</last></author>
-      <author><first>Xuan-Nga</first><last>Cao</last></author>
+      <author id="guillaume-gravier"><first>Guillaume</first><last>Gravier</last></author>
+      <author id="xuan-nga-cao"><first>Xuan-Nga</first><last>Cao</last></author>
       <author><first>Mark</first><last>Johnson</last></author>
       <author><first>Emmanuel</first><last>Dupoux</last></author>
       <title>Bridging the gap between speech technology and natural language processing: an evaluation toolbox for term discovery systems</title>
@@ -3021,7 +3021,7 @@
       <bibkey>ludusan-etal-2014-bridging</bibkey>
     </paper>
     <paper id="285">
-      <author><first>Dietmar</first><last>Rösner</last></author>
+      <author id="dietmar-rosner"><first>Dietmar</first><last>Rösner</last></author>
       <author><first>Rafael</first><last>Friesen</last></author>
       <author><first>Stephan</first><last>Günther</last></author>
       <author><first>Rico</first><last>Andrich</last></author>
@@ -3051,7 +3051,7 @@
       <bibkey>alsop-nesi-2014-pragmatic</bibkey>
     </paper>
     <paper id="288">
-      <author><first>Dorte Haltrup</first><last>Hansen</last></author>
+      <author id="dorte-haltrup-hansen"><first>Dorte Haltrup</first><last>Hansen</last></author>
       <author><first>Lene</first><last>Offersgaard</last></author>
       <author><first>Sussi</first><last>Olsen</last></author>
       <title>Using <fixed-case>TEI</fixed-case>, <fixed-case>CMDI</fixed-case> and <fixed-case>ISO</fixed-case>cat in <fixed-case>CLARIN</fixed-case>-<fixed-case>DK</fixed-case></title>
@@ -3072,7 +3072,7 @@
     </paper>
     <paper id="290">
       <author><first>Mircea</first><last>Petic</last></author>
-      <author><first>Daniela</first><last>Gîfu</last></author>
+      <author id="daniela-gifu"><first>Daniela</first><last>Gîfu</last></author>
       <title>Transliteration and alignment of parallel texts from <fixed-case>C</fixed-case>yrillic to <fixed-case>L</fixed-case>atin</title>
       <pages>1819–1823</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/328_Paper.pdf</url>
@@ -3082,7 +3082,7 @@
     <paper id="291">
       <author><first>Corina</first><last>Dima</last></author>
       <author><first>Verena</first><last>Henrich</last></author>
-      <author><first>Erhard</first><last>Hinrichs</last></author>
+      <author id="erhard-hinrichs"><first>Erhard</first><last>Hinrichs</last></author>
       <author><first>Christina</first><last>Hoppermann</last></author>
       <title>How to Tell a Schneemann from a Milchmann: An Annotation Scheme for Compound-Internal Relations</title>
       <pages>1194–1201</pages>
@@ -3101,7 +3101,7 @@
     </paper>
     <paper id="293">
       <author><first>Anita</first><last>Rácz</last></author>
-      <author><first>István</first><last>Nagy T.</last></author>
+      <author id="istvan-nagy-t"><first>István</first><last>Nagy T.</last></author>
       <author><first>Veronika</first><last>Vincze</last></author>
       <title>4<fixed-case>FX</fixed-case>: Light Verb Constructions in a Multilingual Parallel Corpus</title>
       <pages>710–715</pages>
@@ -3111,7 +3111,7 @@
     </paper>
     <paper id="294">
       <author><first>Fritz</first><last>Kliche</last></author>
-      <author><first>André</first><last>Blessing</last></author>
+      <author id="andre-blessing"><first>André</first><last>Blessing</last></author>
       <author><first>Ulrich</first><last>Heid</last></author>
       <author><first>Jonathan</first><last>Sonntag</last></author>
       <title>The e<fixed-case>I</fixed-case>dentity Text Exploration Workbench</title>
@@ -3169,7 +3169,7 @@
       <bibkey>jansche-2014-computer</bibkey>
     </paper>
     <paper id="300">
-      <author><first>Ismail</first><last>El Maarouf</last></author>
+      <author id="ismail-el-maarouf"><first>Ismail</first><last>El Maarouf</last></author>
       <author><first>Jane</first><last>Bradbury</last></author>
       <author><first>Vít</first><last>Baisa</last></author>
       <author><first>Patrick</first><last>Hanks</last></author>
@@ -3219,9 +3219,9 @@
       <bibkey>schneider-2014-genitivdb</bibkey>
     </paper>
     <paper id="305">
-      <author><first>Jana</first><last>Šindlerová</last></author>
-      <author><first>Zdeňka</first><last>Urešová</last></author>
-      <author><first>Eva</first><last>Fucikova</last></author>
+      <author id="jana-sindlerova"><first>Jana</first><last>Šindlerová</last></author>
+      <author id="zdenka-uresova"><first>Zdeňka</first><last>Urešová</last></author>
+      <author id="eva-fucikova"><first>Eva</first><last>Fucikova</last></author>
       <title>Resources in Conflict: A Bilingual Valency Lexicon vs. a Bilingual Treebank vs. a Linguistic Theory</title>
       <pages>2490–2494</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/349_Paper.pdf</url>
@@ -3229,7 +3229,7 @@
       <bibkey>sindlerova-etal-2014-resources</bibkey>
     </paper>
     <paper id="306">
-      <author><first>Roser</first><last>Saurí</last></author>
+      <author id="roser-sauri"><first>Roser</first><last>Saurí</last></author>
       <author><first>Judith</first><last>Domingo</last></author>
       <author><first>Toni</first><last>Badia</last></author>
       <title>The <fixed-case>N</fixed-case>ew<fixed-case>S</fixed-case>o<fixed-case>M</fixed-case>e Corpus: A Unifying Opinion Annotation Framework across Genres and in Multiple Languages</title>
@@ -3258,7 +3258,7 @@
     </paper>
     <paper id="309">
       <author><first>André</first><last>Bittar</last></author>
-      <author><first>Luca</first><last>Dini</last></author>
+      <author id="luca-dini"><first>Luca</first><last>Dini</last></author>
       <author><first>Sigrid</first><last>Maurel</last></author>
       <author><first>Mathieu</first><last>Ruhlmann</last></author>
       <title>The Dangerous Myth of the Star System</title>
@@ -3270,7 +3270,7 @@
     <paper id="310">
       <author><first>Haibo</first><last>Li</last></author>
       <author><first>Masato</first><last>Hagiwara</last></author>
-      <author id="qi-li"><first>Qi</first><last>Li</last></author>
+      <author><first>Qi</first><last>Li</last></author>
       <author><first>Heng</first><last>Ji</last></author>
       <title>Comparison of the Impact of Word Segmentation on Name Tagging for <fixed-case>C</fixed-case>hinese and <fixed-case>J</fixed-case>apanese</title>
       <pages>2532–2536</pages>
@@ -3279,9 +3279,9 @@
       <bibkey>li-etal-2014-comparison</bibkey>
     </paper>
     <paper id="311">
-      <author><first>Verginica Barbu</first><last>Mititelu</last></author>
+      <author id="verginica-barbu-mititelu"><first>Verginica Barbu</first><last>Mititelu</last></author>
       <author><first>Elena</first><last>Irimia</last></author>
-      <author><first>Dan</first><last>Tufiș</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufiș</last></author>
       <title><fixed-case>C</fixed-case>o<fixed-case>R</fixed-case>o<fixed-case>L</fixed-case>a — The Reference Corpus of Contemporary <fixed-case>R</fixed-case>omanian Language</title>
       <pages>1235–1239</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/360_Paper.pdf</url>
@@ -3309,9 +3309,9 @@
     <paper id="314">
       <author><first>Marco</first><last>Marelli</last></author>
       <author><first>Stefano</first><last>Menini</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <author><first>Luisa</first><last>Bentivogli</last></author>
-      <author><first>Raffaella</first><last>Bernardi</last></author>
+      <author id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last></author>
       <author><first>Roberto</first><last>Zamparelli</last></author>
       <title>A <fixed-case>SICK</fixed-case> cure for the evaluation of compositional distributional semantic models</title>
       <pages>216–223</pages>
@@ -3333,7 +3333,7 @@
       <author><first>Annika</first><last>Hämäläinen</last></author>
       <author><first>Jairo</first><last>Avelar</last></author>
       <author><first>Silvia</first><last>Rodrigues</last></author>
-      <author><first>Miguel Sales</first><last>Dias</last></author>
+      <author id="miguel-sales-dias"><first>Miguel Sales</first><last>Dias</last></author>
       <author><first>Artur</first><last>Kolesiński</last></author>
       <author><first>Tibor</first><last>Fegyó</last></author>
       <author><first>Géza</first><last>Németh</last></author>
@@ -3349,7 +3349,7 @@
     <paper id="317">
       <author><first>Matteo</first><last>Abrate</last></author>
       <author><first>Angelo Mario</first><last>Del Grosso</last></author>
-      <author><first>Emiliano</first><last>Giovannetti</last></author>
+      <author id="emiliano-giovannetti"><first>Emiliano</first><last>Giovannetti</last></author>
       <author><first>Angelica Lo</first><last>Duca</last></author>
       <author><first>Damiana</first><last>Luzzi</last></author>
       <author><first>Lorenzo</first><last>Mancini</last></author>
@@ -3384,10 +3384,10 @@
       <bibkey>geer-keane-2014-exploring</bibkey>
     </paper>
     <paper id="320">
-      <author><first>Nobal</first><last>Niraula</last></author>
+      <author id="nobal-bikram-niraula"><first>Nobal</first><last>Niraula</last></author>
       <author><first>Vasile</first><last>Rus</last></author>
       <author><first>Rajendra</first><last>Banjade</last></author>
-      <author><first>Dan</first><last>Stefanescu</last></author>
+      <author id="dan-stefanescu"><first>Dan</first><last>Stefanescu</last></author>
       <author><first>William</first><last>Baggett</last></author>
       <author><first>Brent</first><last>Morgan</last></author>
       <title>The <fixed-case>DARE</fixed-case> Corpus: A Resource for Anaphora Resolution in Dialogue Based Intelligent Tutoring Systems</title>
@@ -3397,7 +3397,7 @@
       <bibkey>niraula-etal-2014-dare</bibkey>
     </paper>
     <paper id="321">
-      <author><first>Mikel</first><last>Forcada</last></author>
+      <author id="mikel-l-forcada"><first>Mikel</first><last>Forcada</last></author>
       <title>On the annotation of <fixed-case>TMX</fixed-case> translation memories for advanced leveraging in computer-aided translation</title>
       <pages>4374–4378</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/373_Paper.pdf</url>
@@ -3418,7 +3418,7 @@
       <author><first>Andrea</first><last>Moro</last></author>
       <author><first>Roberto</first><last>Navigli</last></author>
       <author><first>Francesco Maria</first><last>Tucci</last></author>
-      <author><first>Rebecca J.</first><last>Passonneau</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca J.</first><last>Passonneau</last></author>
       <title>Annotating the <fixed-case>MASC</fixed-case> Corpus with <fixed-case>B</fixed-case>abel<fixed-case>N</fixed-case>et</title>
       <pages>4214–4219</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/375_Paper.pdf</url>
@@ -3427,7 +3427,7 @@
     </paper>
     <paper id="324">
       <author><first>Jasmijn</first><last>Bastings</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <title>All Fragments Count in Parser Evaluation</title>
       <pages>78–82</pages>
       <url hash="ba2b8c48">L14-1324</url>
@@ -3435,7 +3435,7 @@
       <bibkey>bastings-simaan-2014-fragments</bibkey>
     </paper>
     <paper id="325">
-      <author><first>Juan María</first><last>Garrido</last></author>
+      <author id="juan-maria-garrido"><first>Juan María</first><last>Garrido</last></author>
       <author><first>Yesika</first><last>Laplaza</last></author>
       <author><first>Benjamin</first><last>Kolz</last></author>
       <author><first>Miquel</first><last>Cornudella</last></author>
@@ -3448,7 +3448,7 @@
     <paper id="326">
       <author><first>Mojgan</first><last>Seraji</last></author>
       <author><first>Carina</first><last>Jahani</last></author>
-      <author><first>Beáta</first><last>Megyesi</last></author>
+      <author id="beata-megyesi"><first>Beáta</first><last>Megyesi</last></author>
       <author><first>Joakim</first><last>Nivre</last></author>
       <title>A <fixed-case>P</fixed-case>ersian Treebank with <fixed-case>S</fixed-case>tanford Typed Dependencies</title>
       <pages>796–801</pages>
@@ -3458,7 +3458,7 @@
     </paper>
     <paper id="327">
       <author><first>Marion</first><last>Baranes</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <title>A Language-independent Approach to Extracting Derivational Relations from an Inflectional Lexicon</title>
       <pages>2793–2799</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/379_Paper.pdf</url>
@@ -3476,14 +3476,14 @@
       <bibkey>kucuk-etal-2014-named</bibkey>
     </paper>
     <paper id="329">
-      <author><first>Anne</first><last>Lacheret</last></author>
+      <author id="anne-lacheret"><first>Anne</first><last>Lacheret</last></author>
       <author><first>Sylvain</first><last>Kahane</last></author>
-      <author><first>Julie</first><last>Beliao</last></author>
+      <author id="julie-beliao"><first>Julie</first><last>Beliao</last></author>
       <author><first>Anne</first><last>Dister</last></author>
       <author><first>Kim</first><last>Gerdes</last></author>
       <author><first>Jean-Philippe</first><last>Goldman</last></author>
       <author><first>Nicolas</first><last>Obin</last></author>
-      <author><first>Paola</first><last>Pietrandrea</last></author>
+      <author id="paola-pietrandrea"><first>Paola</first><last>Pietrandrea</last></author>
       <author><first>Atanas</first><last>Tchobanov</last></author>
       <title><fixed-case>R</fixed-case>hapsodie: a Prosodic-Syntactic Treebank for Spoken <fixed-case>F</fixed-case>rench</title>
       <pages>295–301</pages>
@@ -3492,11 +3492,11 @@
       <bibkey>lacheret-etal-2014-rhapsodie</bibkey>
     </paper>
     <paper id="330">
-      <author><first>Montserrat</first><last>Marimon</last></author>
-      <author><first>Núria</first><last>Bel</last></author>
+      <author id="montserrat-marimon"><first>Montserrat</first><last>Marimon</last></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last></author>
       <author><first>Beatriz</first><last>Fisas</last></author>
       <author><first>Blanca</first><last>Arias</last></author>
-      <author><first>Silvia</first><last>Vázquez</last></author>
+      <author id="silvia-vazquez"><first>Silvia</first><last>Vázquez</last></author>
       <author><first>Jorge</first><last>Vivaldi</last></author>
       <author><first>Carlos</first><last>Morell</last></author>
       <author><first>Mercè</first><last>Lorente</last></author>
@@ -3509,8 +3509,8 @@
     <paper id="331">
       <author><first>Maria</first><last>Goryainova</last></author>
       <author><first>Cyril</first><last>Grouin</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
-      <author><first>Ioana</first><last>Vasilescu</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
+      <author id="ioana-vasilescu"><first>Ioana</first><last>Vasilescu</last></author>
       <title>Morpho-Syntactic Study of Errors from Speech Recognition System</title>
       <pages>3045–3049</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/383_Paper.pdf</url>
@@ -3519,10 +3519,10 @@
     </paper>
     <paper id="332">
       <author><first>Nianwen</first><last>Xue</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
-      <author><first>Zdeňka</first><last>Urešová</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
+      <author id="zdenka-uresova"><first>Zdeňka</first><last>Urešová</last></author>
       <author><first>Xiuhong</first><last>Zhang</last></author>
       <title>Not an Interlingua, But Close: Comparison of <fixed-case>E</fixed-case>nglish <fixed-case>AMR</fixed-case>s to <fixed-case>C</fixed-case>hinese and <fixed-case>C</fixed-case>zech</title>
       <pages>1765–1772</pages>
@@ -3531,7 +3531,7 @@
       <bibkey>xue-etal-2014-interlingua</bibkey>
     </paper>
     <paper id="333">
-      <author><first>Adam</first><last>Meyers</last></author>
+      <author id="adam-meyers"><first>Adam</first><last>Meyers</last></author>
       <author><first>Giancarlo</first><last>Lee</last></author>
       <author><first>Angus</first><last>Grieve-Smith</last></author>
       <author><first>Yifan</first><last>He</last></author>
@@ -3546,7 +3546,7 @@
       <author><first>Prescott</first><last>Klassen</last></author>
       <author><first>Fei</first><last>Xia</last></author>
       <author><first>Lucy</first><last>Vanderwende</last></author>
-      <author><first>Meliha</first><last>Yetisgen</last></author>
+      <author id="meliha-yetisgen-yildiz"><first>Meliha</first><last>Yetisgen</last></author>
       <title>Annotating Clinical Events in Text Snippets for Phenotype Detection</title>
       <pages>2753–2757</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/386_Paper.pdf</url>
@@ -3554,8 +3554,8 @@
       <bibkey>klassen-etal-2014-annotating</bibkey>
     </paper>
     <paper id="335">
-      <author><first>Pablo</first><last>Ruiz</last></author>
-      <author><first>Aitor</first><last>Álvarez</last></author>
+      <author id="pablo-ruiz-fabo"><first>Pablo</first><last>Ruiz</last></author>
+      <author id="aitor-alvarez"><first>Aitor</first><last>Álvarez</last></author>
       <author><first>Haritz</first><last>Arzelus</last></author>
       <title>Phoneme Similarity Matrices to Improve Long Audio Alignment for Automatic Subtitling</title>
       <pages>437–442</pages>
@@ -3566,7 +3566,7 @@
     <paper id="336">
       <author><first>Maria Evangelia</first><last>Chatzimina</last></author>
       <author><first>Cyril</first><last>Grouin</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <title>Use of unsupervised word classes for entity recognition: Application to the detection of disorders in clinical reports</title>
       <pages>3264–3271</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/389_Paper.pdf</url>
@@ -3574,7 +3574,7 @@
       <bibkey>chatzimina-etal-2014-use</bibkey>
     </paper>
     <paper id="337">
-      <author><first>Eva</first><last>Hajičová</last></author>
+      <author id="eva-hajicova"><first>Eva</first><last>Hajičová</last></author>
       <title>Three dimensions of the so-called “interoperability” of annotation schemes”</title>
       <pages>4559–4564</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/39_Paper.pdf</url>
@@ -3594,7 +3594,7 @@
       <author><first>Dimitrios</first><last>Kokkinakis</last></author>
       <author><first>Jyrki</first><last>Niemi</last></author>
       <author><first>Sam</first><last>Hardwick</last></author>
-      <author><first>Krister</first><last>Lindén</last></author>
+      <author id="krister-linden"><first>Krister</first><last>Lindén</last></author>
       <author><first>Lars</first><last>Borin</last></author>
       <title><fixed-case>HFST</fixed-case>-<fixed-case>S</fixed-case>we<fixed-case>NER</fixed-case> — A New <fixed-case>NER</fixed-case> Resource for <fixed-case>S</fixed-case>wedish</title>
       <pages>2537–2543</pages>
@@ -3605,7 +3605,7 @@
     <paper id="340">
       <author><first>Motaz</first><last>Saad</last></author>
       <author><first>David</first><last>Langlois</last></author>
-      <author><first>Kamel</first><last>Smaïli</last></author>
+      <author id="kamel-smaili"><first>Kamel</first><last>Smaïli</last></author>
       <title>Building and Modelling Multilingual Subjective Corpora</title>
       <pages>3086–3091</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/392_Paper.pdf</url>
@@ -3644,8 +3644,8 @@
     </paper>
     <paper id="344">
       <author><first>Paula</first><last>Lopez-Otero</last></author>
-      <author><first>Laura</first><last>Docio-Fernandez</last></author>
-      <author><first>Carmen</first><last>Garcia-Mateo</last></author>
+      <author id="laura-docio-fernandez"><first>Laura</first><last>Docio-Fernandez</last></author>
+      <author id="carmen-garcia-mateo"><first>Carmen</first><last>Garcia-Mateo</last></author>
       <title>Introducing a Framework for the Evaluation of Music Detection Tools</title>
       <pages>568–572</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/398_Paper.pdf</url>
@@ -3674,7 +3674,7 @@
       <author><first>Eleftherios</first><last>Avramidis</last></author>
       <author><first>Aljoscha</first><last>Burchardt</last></author>
       <author><first>Sabine</first><last>Hunsicker</last></author>
-      <author><first>Maja</first><last>Popović</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
       <author><first>Cindy</first><last>Tscherwinka</last></author>
       <author><first>David</first><last>Vilar</last></author>
       <author><first>Hans</first><last>Uszkoreit</last></author>
@@ -3687,7 +3687,7 @@
     <paper id="348">
       <author><first>Mahmoud</first><last>El-Haj</last></author>
       <author><first>Paul</first><last>Rayson</last></author>
-      <author><first>Steve</first><last>Young</last></author>
+      <author id="steve-young"><first>Steve</first><last>Young</last></author>
       <author><first>Martin</first><last>Walker</last></author>
       <title>Detecting Document Structure in a Very Large Corpus of <fixed-case>UK</fixed-case> Financial Reports</title>
       <pages>1335–1338</pages>
@@ -3696,7 +3696,7 @@
       <bibkey>el-haj-etal-2014-detecting</bibkey>
     </paper>
     <paper id="349">
-      <author><first>Dan</first><last>Ștefănescu</last></author>
+      <author id="dan-stefanescu"><first>Dan</first><last>Ștefănescu</last></author>
       <author><first>Rajendra</first><last>Banjade</last></author>
       <author><first>Vasile</first><last>Rus</last></author>
       <title>Latent Semantic Analysis Models on <fixed-case>W</fixed-case>ikipedia and <fixed-case>TASA</fixed-case></title>
@@ -3709,27 +3709,27 @@
       <author><first>Georg</first><last>Rehm</last></author>
       <author><first>Hans</first><last>Uszkoreit</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
-      <author><first>Núria</first><last>Bel</last></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last></author>
       <author><first>Audronė</first><last>Bielevičienė</last></author>
       <author><first>Lars</first><last>Borin</last></author>
-      <author><first>António</first><last>Branco</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
       <author><first>Gerhard</first><last>Budin</last></author>
       <author><first>Nicoletta</first><last>Calzolari</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
-      <author><first>Radovan</first><last>Garabík</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
+      <author id="radovan-garabik"><first>Radovan</first><last>Garabík</last></author>
       <author><first>Marko</first><last>Grobelnik</last></author>
-      <author><first>Carmen</first><last>García-Mateo</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
-      <author><first>Inma</first><last>Hernáez</last></author>
+      <author id="carmen-garcia-mateo"><first>Carmen</first><last>García-Mateo</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
+      <author id="inmaculada-hernaez"><first>Inma</first><last>Hernáez</last></author>
       <author><first>John</first><last>Judge</last></author>
       <author><first>Svetla</first><last>Koeva</last></author>
       <author><first>Simon</first><last>Krek</last></author>
       <author><first>Cvetana</first><last>Krstev</last></author>
-      <author><first>Krister</first><last>Lindén</last></author>
-      <author><first>Bernardo</first><last>Magnini</last></author>
+      <author id="krister-linden"><first>Krister</first><last>Lindén</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
       <author><first>Joseph</first><last>Mariani</last></author>
-      <author><first>John</first><last>McNaught</last></author>
+      <author id="john-mcnaught"><first>John</first><last>McNaught</last></author>
       <author><first>Maite</first><last>Melero</last></author>
       <author><first>Monica</first><last>Monachini</last></author>
       <author><first>Asunción</first><last>Moreno</last></author>
@@ -3738,16 +3738,16 @@
       <author><first>Piotr</first><last>Pęzik</last></author>
       <author><first>Stelios</first><last>Piperidis</last></author>
       <author><first>Adam</first><last>Przepiórkowski</last></author>
-      <author><first>Eiríkur</first><last>Rögnvaldsson</last></author>
-      <author><first>Michael</first><last>Rosner</last></author>
-      <author><first>Bolette</first><last>Pedersen</last></author>
-      <author><first>Inguna</first><last>Skadiņa</last></author>
-      <author><first>Koenraad</first><last>De Smedt</last></author>
+      <author id="eirikur-rognvaldsson"><first>Eiríkur</first><last>Rögnvaldsson</last></author>
+      <author id="michael-rosner"><first>Michael</first><last>Rosner</last></author>
+      <author id="bolette-sandford-pedersen"><first>Bolette</first><last>Pedersen</last></author>
+      <author id="inguna-skadina"><first>Inguna</first><last>Skadiņa</last></author>
+      <author id="koenraad-de-smedt"><first>Koenraad</first><last>De Smedt</last></author>
       <author><first>Marko</first><last>Tadić</last></author>
       <author><first>Paul</first><last>Thompson</last></author>
-      <author><first>Dan</first><last>Tufiş</last></author>
-      <author><first>Tamás</first><last>Váradi</last></author>
-      <author><first>Andrejs</first><last>Vasiļjevs</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufiş</last></author>
+      <author id="tamas-varadi"><first>Tamás</first><last>Váradi</last></author>
+      <author id="andrejs-vasiljevs"><first>Andrejs</first><last>Vasiļjevs</last></author>
       <author><first>Kadri</first><last>Vider</last></author>
       <author><first>Jolanta</first><last>Zabarskaite</last></author>
       <title>The Strategic Impact of <fixed-case>META</fixed-case>-<fixed-case>NET</fixed-case> on the Regional, National and International Level</title>
@@ -3775,16 +3775,16 @@
       <bibkey>schiel-kisler-2014-german</bibkey>
     </paper>
     <paper id="353">
-      <author><first>Koenraad</first><last>De Smedt</last></author>
-      <author><first>Erhard</first><last>Hinrichs</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
-      <author><first>Inguna</first><last>Skadiņa</last></author>
-      <author><first>Bolette</first><last>Pedersen</last></author>
+      <author id="koenraad-de-smedt"><first>Koenraad</first><last>De Smedt</last></author>
+      <author id="erhard-hinrichs"><first>Erhard</first><last>Hinrichs</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
+      <author id="inguna-skadina"><first>Inguna</first><last>Skadiņa</last></author>
+      <author id="bolette-sandford-pedersen"><first>Bolette</first><last>Pedersen</last></author>
       <author><first>Costanza</first><last>Navarretta</last></author>
-      <author><first>Núria</first><last>Bel</last></author>
-      <author><first>Krister</first><last>Lindén</last></author>
-      <author><first>Markéta</first><last>Lopatková</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last></author>
+      <author id="krister-linden"><first>Krister</first><last>Lindén</last></author>
+      <author id="marketa-lopatkova"><first>Markéta</first><last>Lopatková</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
       <author><first>Gisle</first><last>Andersen</last></author>
       <author><first>Przemyslaw</first><last>Lenkiewicz</last></author>
       <title><fixed-case>CLARA</fixed-case>: A New Generation of Researchers in Common Language Resources and Their Applications</title>
@@ -3796,11 +3796,11 @@
     <paper id="354">
       <author><first>Nathan</first><last>Hartmann</last></author>
       <author><first>Lucas</first><last>Avanço</last></author>
-      <author><first>Pedro</first><last>Balage</last></author>
-      <author><first>Magali</first><last>Duran</last></author>
-      <author><first>Maria</first><last>das Graças Volpe Nunes</last></author>
+      <author id="pedro-balage-filho"><first>Pedro</first><last>Balage</last></author>
+      <author id="magali-sanches-duran"><first>Magali</first><last>Duran</last></author>
+      <author id="maria-das-gracas-volpe-nunes"><first>Maria</first><last>das Graças Volpe Nunes</last></author>
       <author><first>Thiago</first><last>Pardo</last></author>
-      <author><first>Sandra</first><last>Aluísio</last></author>
+      <author id="sandra-aluisio"><first>Sandra</first><last>Aluísio</last></author>
       <title>A Large Corpus of Product Reviews in <fixed-case>P</fixed-case>ortuguese: Tackling Out-Of-Vocabulary Words</title>
       <pages>3865–3871</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/413_Paper.pdf</url>
@@ -3810,9 +3810,9 @@
     <paper id="355">
       <author><first>Anoop</first><last>Kunchukuttan</last></author>
       <author><first>Abhijit</first><last>Mishra</last></author>
-      <author><first>Rajen</first><last>Chatterjee</last></author>
-      <author><first>Ritesh</first><last>Shah</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="rajen-chatterjee"><first>Rajen</first><last>Chatterjee</last></author>
+      <author id="ritesh-shah"><first>Ritesh</first><last>Shah</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <title>Shata-Anuvadak: Tackling Multiway Translation of <fixed-case>I</fixed-case>ndian Languages</title>
       <pages>1781–1787</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/414_Paper.pdf</url>
@@ -3820,8 +3820,8 @@
       <bibkey>kunchukuttan-etal-2014-shata</bibkey>
     </paper>
     <paper id="356">
-      <author><first>Erhard</first><last>Hinrichs</last></author>
-      <author><first>Steven</first><last>Krauwer</last></author>
+      <author id="erhard-hinrichs"><first>Erhard</first><last>Hinrichs</last></author>
+      <author id="steven-krauwer"><first>Steven</first><last>Krauwer</last></author>
       <title>The <fixed-case>CLARIN</fixed-case> Research Infrastructure: Resources and Tools for e<fixed-case>H</fixed-case>umanities Scholars</title>
       <pages>1525–1531</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/415_Paper.pdf</url>
@@ -3830,9 +3830,9 @@
     </paper>
     <paper id="357">
       <author><first>Renlong</first><last>Ai</last></author>
-      <author><first>Marcela</first><last>Charfuelan</last></author>
-      <author><first>Walter</first><last>Kasper</last></author>
-      <author><first>Tina</first><last>Klüwer</last></author>
+      <author id="marcela-charfuelan"><first>Marcela</first><last>Charfuelan</last></author>
+      <author id="walter-kasper"><first>Walter</first><last>Kasper</last></author>
+      <author id="tina-kluwer"><first>Tina</first><last>Klüwer</last></author>
       <author><first>Hans</first><last>Uszkoreit</last></author>
       <author><first>Feiyu</first><last>Xu</last></author>
       <author><first>Sandra</first><last>Gasber</last></author>
@@ -3876,7 +3876,7 @@
     <paper id="361">
       <author><first>Hege</first><last>Fromreide</last></author>
       <author><first>Dirk</first><last>Hovy</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <title>Crowdsourcing and annotating <fixed-case>NER</fixed-case> for <fixed-case>T</fixed-case>witter #drift</title>
       <pages>2544–2547</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/421_Paper.pdf</url>
@@ -3899,7 +3899,7 @@
     <paper id="363">
       <author><first>Alain</first><last>Couillault</last></author>
       <author><first>Karën</first><last>Fort</last></author>
-      <author><first>Gilles</first><last>Adda</last></author>
+      <author id="gilles-adda"><first>Gilles</first><last>Adda</last></author>
       <author><first>Hugues</first><last>de Mazancourt</last></author>
       <title>Evaluating corpora documentation with regards to the Ethics and Big Data Charter</title>
       <pages>4225–4229</pages>
@@ -3909,9 +3909,9 @@
     </paper>
     <paper id="364">
       <author><first>Ahmet</first><last>Aker</last></author>
-      <author><first>Monica</first><last>Paramita</last></author>
+      <author id="monica-lestari-paramita"><first>Monica</first><last>Paramita</last></author>
       <author><first>Emma</first><last>Barker</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <title>Bootstrapping Term Extractors for Multiple Languages</title>
       <pages>483–489</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/425_Paper.pdf</url>
@@ -3921,7 +3921,7 @@
     <paper id="365">
       <author><first>Els</first><last>Lefever</last></author>
       <author><first>Marjan</first><last>Van de Kauter</last></author>
-      <author><first>Véronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Véronique</first><last>Hoste</last></author>
       <title>Evaluation of Automatic Hypernym Extraction from Technical Corpora in <fixed-case>E</fixed-case>nglish and <fixed-case>D</fixed-case>utch</title>
       <pages>490–497</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/426_Paper.pdf</url>
@@ -3950,7 +3950,7 @@
     </paper>
     <paper id="368">
       <author><first>Lise</first><last>Rebout</last></author>
-      <author><first>Phillippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Phillippe</first><last>Langlais</last></author>
       <title>An Iterative Approach for Mining Parallel Sentences in a Comparable Corpus</title>
       <pages>648–655</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/43_Paper.pdf</url>
@@ -3977,10 +3977,10 @@
       <bibkey>zaghouani-dukes-2014-crowdsourcing</bibkey>
     </paper>
     <paper id="371">
-      <author><first>Hanae</first><last>Koiso</last></author>
+      <author id="hanae-koiso"><first>Hanae</first><last>Koiso</last></author>
       <author><first>Yasuharu</first><last>Den</last></author>
       <author><first>Ken’ya</first><last>Nishikawa</last></author>
-      <author><first>Kikuo</first><last>Maekawa</last></author>
+      <author id="kikuo-maekawa"><first>Kikuo</first><last>Maekawa</last></author>
       <title>Design and development of an <fixed-case>RDB</fixed-case> version of the Corpus of Spontaneous <fixed-case>J</fixed-case>apanese</title>
       <pages>1471–1476</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/432_Paper.pdf</url>
@@ -4008,11 +4008,11 @@
     </paper>
     <paper id="374">
       <author><first>Piek</first><last>Vossen</last></author>
-      <author><first>German</first><last>Rigau</last></author>
-      <author><first>Luciano</first><last>Serafini</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
+      <author id="luciano-serafini"><first>Luciano</first><last>Serafini</last></author>
       <author><first>Pim</first><last>Stouten</last></author>
       <author><first>Francis</first><last>Irving</last></author>
-      <author><first>Willem</first><last>Van Hage</last></author>
+      <author id="willem-robert-van-hage"><first>Willem</first><last>Van Hage</last></author>
       <title><fixed-case>N</fixed-case>ews<fixed-case>R</fixed-case>eader: recording history from daily news streams</title>
       <pages>2000–2007</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/436_Paper.pdf</url>
@@ -4062,14 +4062,14 @@
       <bibkey>ghayoomi-kuhn-2014-converting</bibkey>
     </paper>
     <paper id="379">
-      <author><first>Iñaki</first><last>Alegria</last></author>
+      <author id="inaki-alegria"><first>Iñaki</first><last>Alegria</last></author>
       <author><first>Nora</first><last>Aranberri</last></author>
-      <author><first>Pere</first><last>Comas</last></author>
+      <author id="pere-comas"><first>Pere</first><last>Comas</last></author>
       <author><first>Víctor</first><last>Fresno</last></author>
       <author><first>Pablo</first><last>Gamallo</last></author>
-      <author><first>Lluis</first><last>Padró</last></author>
+      <author id="lluis-padro"><first>Lluis</first><last>Padró</last></author>
       <author><first>Iñaki</first><last>San Vicente</last></author>
-      <author><first>Jordi</first><last>Turmo</last></author>
+      <author id="jordi-turmo"><first>Jordi</first><last>Turmo</last></author>
       <author><first>Arkaitz</first><last>Zubiaga</last></author>
       <title><fixed-case>T</fixed-case>weet<fixed-case>N</fixed-case>orm_es: an annotated corpus for <fixed-case>S</fixed-case>panish microtext normalization</title>
       <pages>2274–2278</pages>
@@ -4090,10 +4090,10 @@
       <author><first>Ralf</first><last>Steinberger</last></author>
       <author><first>Maud</first><last>Ehrmann</last></author>
       <author><first>Mohamed</first><last>Ebrahim</last></author>
-      <author><first>Leonida</first><last>Della Rocca</last></author>
+      <author id="leonida-della-rocca"><first>Leonida</first><last>Della Rocca</last></author>
       <author><first>Stefano</first><last>Bucci</last></author>
       <author><first>Eszter</first><last>Simon</last></author>
-      <author><first>Tamás</first><last>Váradi</last></author>
+      <author id="tamas-varadi"><first>Tamás</first><last>Váradi</last></author>
       <title>Media monitoring and information extraction for the highly inflected agglutinative language <fixed-case>H</fixed-case>ungarian</title>
       <pages>2049–2056</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/449_Paper.pdf</url>
@@ -4101,7 +4101,7 @@
       <bibkey>pajzs-etal-2014-media</bibkey>
     </paper>
     <paper id="382">
-      <author><first>Véronique</first><last>Moriceau</last></author>
+      <author id="veronique-moriceau"><first>Véronique</first><last>Moriceau</last></author>
       <author><first>Xavier</first><last>Tannier</last></author>
       <title><fixed-case>F</fixed-case>rench Resources for Extraction and Normalization of Temporal Expressions with <fixed-case>H</fixed-case>eidel<fixed-case>T</fixed-case>ime</title>
       <pages>3239–3243</pages>
@@ -4120,7 +4120,7 @@
     </paper>
     <paper id="384">
       <author><first>Angelina</first><last>Ivanova</last></author>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <title>Treelet Probabilities for <fixed-case>HPSG</fixed-case> Parsing and Error Correction</title>
       <pages>2887–2892</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/453_Paper.pdf</url>
@@ -4129,9 +4129,9 @@
     </paper>
     <paper id="385">
       <author><first>Abir</first><last>Masmoudi</last></author>
-      <author><first>Mariem Ellouze</first><last>Khmekhem</last></author>
+      <author id="mariem-ellouze-khemekhem"><first>Mariem Ellouze</first><last>Khmekhem</last></author>
       <author><first>Yannick</first><last>Estève</last></author>
-      <author><first>Lamia Hadrich</first><last>Belguith</last></author>
+      <author id="lamia-hadrich-belguith"><first>Lamia Hadrich</first><last>Belguith</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
       <title>A Corpus and Phonetic Dictionary for <fixed-case>T</fixed-case>unisian <fixed-case>A</fixed-case>rabic Speech Recognition</title>
       <pages>306–310</pages>
@@ -4140,9 +4140,9 @@
       <bibkey>masmoudi-etal-2014-corpus</bibkey>
     </paper>
     <paper id="386">
-      <author><first>Marie-Claude</first><last>L’Homme</last></author>
+      <author id="marie-claude-lhomme"><first>Marie-Claude</first><last>L’Homme</last></author>
       <author><first>Benoît</first><last>Robichaud</last></author>
-      <author><first>Carlos Subirats</first><last>Rüggeberg</last></author>
+      <author id="carlos-subirats-ruggeberg"><first>Carlos Subirats</first><last>Rüggeberg</last></author>
       <title>Discovering frames in specialized domains</title>
       <pages>1364–1371</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/455_Paper.pdf</url>
@@ -4150,13 +4150,13 @@
       <bibkey>lhomme-etal-2014-discovering</bibkey>
     </paper>
     <paper id="387">
-      <author><first>Lori</first><last>Levin</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
       <author><first>Teruko</first><last>Mitamura</last></author>
       <author><first>Brian</first><last>MacWhinney</last></author>
       <author><first>Davida</first><last>Fromm</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
       <author><first>Weston</first><last>Feely</last></author>
-      <author><first>Robert</first><last>Frederking</last></author>
+      <author id="robert-frederking"><first>Robert</first><last>Frederking</last></author>
       <author><first>Anatole</first><last>Gershman</last></author>
       <author><first>Carlos</first><last>Ramirez</last></author>
       <title>Resources for the Detection of Conventionalized Metaphors in Four Languages</title>
@@ -4174,7 +4174,7 @@
       <bibkey>odijk-2014-clarin</bibkey>
     </paper>
     <paper id="389">
-      <author><first>Hugo Gonçalo</first><last>Oliveira</last></author>
+      <author id="hugo-goncalo-oliveira"><first>Hugo Gonçalo</first><last>Oliveira</last></author>
       <author><first>Inês</first><last>Coelho</last></author>
       <author><first>Paulo</first><last>Gomes</last></author>
       <title>Exploiting <fixed-case>P</fixed-case>ortuguese Lexical Knowledge Bases for Answering Open Domain Cloze Questions Automatically</title>
@@ -4184,10 +4184,10 @@
       <bibkey>oliveira-etal-2014-exploiting</bibkey>
     </paper>
     <paper id="390">
-      <author><first>Yuka</first><last>Tateisi</last></author>
+      <author id="yuka-tateisi"><first>Yuka</first><last>Tateisi</last></author>
       <author><first>Yo</first><last>Shidahara</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <title>Annotation of Computer Science Papers for Semantic Relation Extrac-tion</title>
       <pages>1423–1429</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/461_Paper.pdf</url>
@@ -4213,7 +4213,7 @@
       <author><first>Jie</first><last>Jiang</last></author>
       <author><first>Gerard</first><last>van Loenhout</last></author>
       <author><first>Arantza</first><last>del Pozo</last></author>
-      <author><first>Mirjam Sepesy</first><last>Maučec</last></author>
+      <author id="mirjam-sepesy-maucec"><first>Mirjam Sepesy</first><last>Maučec</last></author>
       <author><first>Anja</first><last>Turner</last></author>
       <author><first>Martin</first><last>Volk</last></author>
       <title>Machine Translation for Subtitling: A Large-Scale Evaluation</title>
@@ -4224,7 +4224,7 @@
     </paper>
     <paper id="393">
       <author><first>Elisabetta</first><last>Jezek</last></author>
-      <author><first>Bernardo</first><last>Magnini</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
       <author><first>Anna</first><last>Feltracco</last></author>
       <author><first>Alessia</first><last>Bianchini</last></author>
       <author><first>Octavian</first><last>Popescu</last></author>
@@ -4244,7 +4244,7 @@
     </paper>
     <paper id="395">
       <author><first>Subhabrata</first><last>Mukherjee</last></author>
-      <author><first>Sachindra</first><last>Joshi</last></author>
+      <author id="sachindra-joshi"><first>Sachindra</first><last>Joshi</last></author>
       <title>Author-Specific Sentiment Aggregation for Polarity Prediction of Reviews</title>
       <pages>3092–3099</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/467_Paper.pdf</url>
@@ -4262,7 +4262,7 @@
       <bibkey>jacquet-etal-2014-clustering</bibkey>
     </paper>
     <paper id="397">
-      <author><first>Richard</first><last>Sproat</last></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last></author>
       <author><first>Bruno</first><last>Cartoni</last></author>
       <author><first>HyunJeong</first><last>Choe</last></author>
       <author><first>David</first><last>Huynh</last></author>
@@ -4276,7 +4276,7 @@
       <bibkey>sproat-etal-2014-database</bibkey>
     </paper>
     <paper id="398">
-      <author><first>Noushin Rezapour</first><last>Asheghi</last></author>
+      <author id="noushin-rezapour-asheghi"><first>Noushin Rezapour</first><last>Asheghi</last></author>
       <author><first>Serge</first><last>Sharoff</last></author>
       <author><first>Katja</first><last>Markert</last></author>
       <title>Designing and Evaluating a Reliable Corpus of Web Genres via Crowd-Sourcing</title>
@@ -4286,7 +4286,7 @@
       <bibkey>asheghi-etal-2014-designing</bibkey>
     </paper>
     <paper id="399">
-      <author><first>Héctor Martínez</first><last>Alonso</last></author>
+      <author id="hector-martinez-alonso"><first>Héctor Martínez</first><last>Alonso</last></author>
       <author><first>Lauren</first><last>Romeo</last></author>
       <title>Crowdsourcing as a preprocessing for complex semantic annotation tasks</title>
       <pages>229–234</pages>
@@ -4296,7 +4296,7 @@
     </paper>
     <paper id="400">
       <author><first>Marco</first><last>Turchi</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <title>Automatic Annotation of Machine Translation Datasets with Binary Quality Judgements</title>
       <pages>1788–1792</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/473_Paper.pdf</url>
@@ -4306,7 +4306,7 @@
     <paper id="401">
       <author><first>Marianna</first><last>Apidianaki</last></author>
       <author><first>Emilia</first><last>Verzeni</last></author>
-      <author><first>Diana</first><last>McCarthy</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
       <title>Semantic Clustering of Pivot Paraphrases</title>
       <pages>4270–4275</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/475_Paper.pdf</url>
@@ -4315,8 +4315,8 @@
     </paper>
     <paper id="402">
       <author><first>Dirk</first><last>Hovy</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <title>When <fixed-case>POS</fixed-case> data sets don’t add up: Combatting sample bias</title>
       <pages>4472–4475</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/476_Paper.pdf</url>
@@ -4332,7 +4332,7 @@
       <bibkey>shardlow-2014-open</bibkey>
     </paper>
     <paper id="404">
-      <author><first>Mark</first><last>Finlayson</last></author>
+      <author id="mark-finlayson"><first>Mark</first><last>Finlayson</last></author>
       <author><first>Jeffry</first><last>Halverson</last></author>
       <author><first>Steven</first><last>Corman</last></author>
       <title>The N2 corpus: A semantically annotated collection of Islamist extremist stories</title>
@@ -4372,7 +4372,7 @@
       <author><first>Jeanin</first><last>Jügler</last></author>
       <author><first>Yves</first><last>Laprie</last></author>
       <author><first>Odile</first><last>Mella</last></author>
-      <author><first>Bernd</first><last>Möbius</last></author>
+      <author id="bernd-mobius"><first>Bernd</first><last>Möbius</last></author>
       <title>Designing a Bilingual Speech Corpus for <fixed-case>F</fixed-case>rench and <fixed-case>G</fixed-case>erman Language Learners: a Two-Step Process</title>
       <pages>1477–1482</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/484_Paper.pdf</url>
@@ -4399,13 +4399,13 @@
       <bibkey>rapp-2014-using-word</bibkey>
     </paper>
     <paper id="410">
-      <author><first>Marie</first><last>Candito</last></author>
+      <author id="marie-candito"><first>Marie</first><last>Candito</last></author>
       <author><first>Guy</first><last>Perrier</last></author>
       <author><first>Bruno</first><last>Guillaume</last></author>
       <author><first>Corentin</first><last>Ribeyre</last></author>
       <author><first>Karën</first><last>Fort</last></author>
-      <author><first>Djamé</first><last>Seddah</last></author>
-      <author><first>Éric</first><last>de la Clergerie</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Éric</first><last>de la Clergerie</last></author>
       <title>Deep Syntax Annotation of the Sequoia <fixed-case>F</fixed-case>rench Treebank</title>
       <pages>2298–2305</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/494_Paper.pdf</url>
@@ -4413,17 +4413,17 @@
       <bibkey>candito-etal-2014-deep</bibkey>
     </paper>
     <paper id="411">
-      <author><first>Marie</first><last>Candito</last></author>
+      <author id="marie-candito"><first>Marie</first><last>Candito</last></author>
       <author><first>Pascal</first><last>Amsili</last></author>
       <author><first>Lucie</first><last>Barque</last></author>
-      <author><first>Farah</first><last>Benamara</last></author>
+      <author id="farah-benamara"><first>Farah</first><last>Benamara</last></author>
       <author><first>Gaël</first><last>de Chalendar</last></author>
       <author><first>Marianne</first><last>Djemaa</last></author>
       <author><first>Pauline</first><last>Haas</last></author>
       <author><first>Richard</first><last>Huyghe</last></author>
-      <author><first>Yvette Yannick</first><last>Mathieu</last></author>
+      <author id="yannick-mathieu"><first>Yvette Yannick</first><last>Mathieu</last></author>
       <author><first>Philippe</first><last>Muller</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <author><first>Laure</first><last>Vieu</last></author>
       <title>Developing a <fixed-case>F</fixed-case>rench <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et: Methodology and First results</title>
       <pages>1372–1379</pages>
@@ -4433,8 +4433,8 @@
     </paper>
     <paper id="412">
       <author><first>Marta</first><last>Sabou</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
-      <author><first>Leon</first><last>Derczynski</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
       <author><first>Arno</first><last>Scharl</last></author>
       <title>Corpus Annotation through Crowdsourcing: Towards Best Practice Guidelines</title>
       <pages>859–866</pages>
@@ -4463,7 +4463,7 @@
     </paper>
     <paper id="415">
       <author><first>Thierry</first><last>Declerck</last></author>
-      <author><first>Hans-Ulrich</first><last>Krieger</last></author>
+      <author id="hans-ulrich-krieger"><first>Hans-Ulrich</first><last>Krieger</last></author>
       <title>Harmonization of <fixed-case>G</fixed-case>erman Lexical Resources for Opinion Mining</title>
       <pages>3872–3876</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/500_Paper.pdf</url>
@@ -4472,7 +4472,7 @@
     </paper>
     <paper id="416">
       <author><first>Magda</first><last>Ševčíková</last></author>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
       <title>Word-Formation Network for <fixed-case>C</fixed-case>zech</title>
       <pages>1087–1093</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/501_Paper.pdf</url>
@@ -4481,7 +4481,7 @@
     </paper>
     <paper id="417">
       <author><first>Jamie</first><last>Bost</last></author>
-      <author><first>Johanna</first><last>Moore</last></author>
+      <author id="johanna-d-moore"><first>Johanna</first><last>Moore</last></author>
       <title>An Analysis of Older Users’ Interactions with Spoken Dialogue Systems</title>
       <pages>1176–1181</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/502_Paper.pdf</url>
@@ -4509,7 +4509,7 @@
     </paper>
     <paper id="420">
       <author><first>Mikaël</first><last>Morardo</last></author>
-      <author><first>Éric</first><last>Villemonte de la Clergerie</last></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Éric</first><last>Villemonte de la Clergerie</last></author>
       <title>Towards an environment for the production and the validation of lexical semantic resources</title>
       <pages>867–874</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/507_Paper.pdf</url>
@@ -4527,8 +4527,8 @@
       <author><first>Jill</first><last>Boberg</last></author>
       <author><first>David</first><last>DeVault</last></author>
       <author><first>Stacy</first><last>Marsella</last></author>
-      <author><first>David</first><last>Traum</last></author>
-      <author><first>Skip</first><last>Rizzo</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
+      <author id="albert-a-rizzo"><first>Skip</first><last>Rizzo</last></author>
       <author><first>Louis-Philippe</first><last>Morency</last></author>
       <title>The Distress Analysis Interview Corpus of human and computer interviews</title>
       <pages>3123–3128</pages>
@@ -4539,7 +4539,7 @@
     <paper id="422">
       <author><first>Brigitte</first><last>Bigi</last></author>
       <author><first>Tatsuya</first><last>Watanabe</last></author>
-      <author><first>Laurent</first><last>Prévot</last></author>
+      <author id="laurent-prevot"><first>Laurent</first><last>Prévot</last></author>
       <title>Representing Multimodal Linguistic Annotated data</title>
       <pages>3386–3392</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/51_Paper.pdf</url>
@@ -4549,7 +4549,7 @@
     <paper id="423">
       <author><first>Timur</first><last>Gilmanov</last></author>
       <author><first>Olga</first><last>Scrivner</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <title><fixed-case>SWIFT</fixed-case> Aligner, A Multifunctional Tool for Parallel Corpora: Visualization, Word Alignment, and (Morpho)-Syntactic Cross-Language Transfer</title>
       <pages>2913–2919</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/510_Paper.pdf</url>
@@ -4560,7 +4560,7 @@
       <author><first>Rosemary</first><last>Orr</last></author>
       <author><first>Marijn</first><last>Huijbregts</last></author>
       <author><first>Roeland</first><last>van Beek</last></author>
-      <author><first>Lisa</first><last>Teunissen</last></author>
+      <author id="lisanne-teunissen"><first>Lisa</first><last>Teunissen</last></author>
       <author><first>Kate</first><last>Backhouse</last></author>
       <author><first>David</first><last>van Leeuwen</last></author>
       <title>Semi-automatic annotation of the <fixed-case>UCU</fixed-case> accents speech corpus</title>
@@ -4571,9 +4571,9 @@
     </paper>
     <paper id="425">
       <author><first>Daniela</first><last>Amaral</last></author>
-      <author><first>Evandro</first><last>Fonseca</last></author>
+      <author id="evandro-b-fonseca"><first>Evandro</first><last>Fonseca</last></author>
       <author><first>Lucelene</first><last>Lopes</last></author>
-      <author><first>Renata</first><last>Vieira</last></author>
+      <author id="renata-vieira"><first>Renata</first><last>Vieira</last></author>
       <title>Comparative Analysis of <fixed-case>P</fixed-case>ortuguese Named Entities Recognition Tools</title>
       <pages>2554–2558</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/513_Paper.pdf</url>
@@ -4593,10 +4593,10 @@
       <bibkey>santos-etal-2014-corpus</bibkey>
     </paper>
     <paper id="427">
-      <author><first>Guntis</first><last>Barzdins</last></author>
-      <author><first>Didzis</first><last>Gosko</last></author>
+      <author id="guntis-barzdins"><first>Guntis</first><last>Barzdins</last></author>
+      <author id="didzis-gosko"><first>Didzis</first><last>Gosko</last></author>
       <author><first>Laura</first><last>Rituma</last></author>
-      <author><first>Peteris</first><last>Paikens</last></author>
+      <author id="peteris-paikens"><first>Peteris</first><last>Paikens</last></author>
       <title>Using C5.0 and Exhaustive Search for Boosting Frame-Semantic Parsing Accuracy</title>
       <pages>4476–4482</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/515_Paper.pdf</url>
@@ -4606,7 +4606,7 @@
     <paper id="428">
       <author><first>Verena</first><last>Lyding</last></author>
       <author><first>Lionel</first><last>Nicolas</last></author>
-      <author><first>Egon</first><last>Stemle</last></author>
+      <author id="egon-stemle"><first>Egon</first><last>Stemle</last></author>
       <title>‘inter<fixed-case>H</fixed-case>ist’ - an interactive visual interface for corpus exploration</title>
       <pages>635–641</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/517_Paper.pdf</url>
@@ -4626,7 +4626,7 @@
     <paper id="430">
       <author><first>Lis</first><last>Pereira</last></author>
       <author><first>Elga</first><last>Strafella</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <title>Collocation or Free Combination? — Applying Machine Translation Techniques to identify collocations in <fixed-case>J</fixed-case>apanese</title>
       <pages>736–739</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/519_Paper.pdf</url>
@@ -4635,9 +4635,9 @@
     </paper>
     <paper id="431">
       <author><first>Adam</first><last>Kilgarriff</last></author>
-      <author><first>Pavel</first><last>Rychlý</last></author>
+      <author id="pavel-rychly"><first>Pavel</first><last>Rychlý</last></author>
       <author><first>Miloš</first><last>Jakubíček</last></author>
-      <author><first>Vojtěch</first><last>Kovář</last></author>
+      <author id="vojtech-kovar"><first>Vojtěch</first><last>Kovář</last></author>
       <author><first>Vít</first><last>Baisa</last></author>
       <author><first>Lucia</first><last>Kocincová</last></author>
       <title>Extrinsic Corpus Evaluation with a Collocation Dictionary Task</title>
@@ -4647,7 +4647,7 @@
       <bibkey>kilgarriff-etal-2014-extrinsic</bibkey>
     </paper>
     <paper id="432">
-      <author><first>Dominique</first><last>Estival</last></author>
+      <author id="dominique-estival"><first>Dominique</first><last>Estival</last></author>
       <author><first>Steve</first><last>Cassidy</last></author>
       <author><first>Felicity</first><last>Cox</last></author>
       <author><first>Denis</first><last>Burnham</last></author>
@@ -4664,7 +4664,7 @@
       <author><first>Emily</first><last>Danchik</last></author>
       <author><first>Michael T.</first><last>Mordowanec</last></author>
       <author><first>Henrietta</first><last>Conrad</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <title>Comprehensive Annotation of Multiword Expressions in a Social Web Corpus</title>
       <pages>455–461</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/521_Paper.pdf</url>
@@ -4700,10 +4700,10 @@
       <bibkey>navarretta-lis-2014-transfer</bibkey>
     </paper>
     <paper id="437">
-      <author><first>Miquel</first><last>Esplà-Gomis</last></author>
+      <author id="miquel-espla-gomis"><first>Miquel</first><last>Esplà-Gomis</last></author>
       <author><first>Filip</first><last>Klubička</last></author>
       <author><first>Nikola</first><last>Ljubešić</last></author>
-      <author><first>Sergio</first><last>Ortiz-Rojas</last></author>
+      <author id="sergio-ortiz-rojas"><first>Sergio</first><last>Ortiz-Rojas</last></author>
       <author><first>Vassilis</first><last>Papavassiliou</last></author>
       <author><first>Prokopis</first><last>Prokopidis</last></author>
       <title>Comparing two acquisition systems for automatically building an <fixed-case>E</fixed-case>nglish—<fixed-case>C</fixed-case>roatian parallel corpus from multilingual websites</title>
@@ -4714,7 +4714,7 @@
     </paper>
     <paper id="438">
       <author><first>Fabrizio</first><last>Gotti</last></author>
-      <author><first>Phillippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Phillippe</first><last>Langlais</last></author>
       <author><first>Atefeh</first><last>Farzindar</last></author>
       <title>Hashtag Occurrences, Layout and Translation: A Corpus-driven Analysis of Tweets Published by the <fixed-case>C</fixed-case>anadian Government</title>
       <pages>2254–2261</pages>
@@ -4735,7 +4735,7 @@
       <author><first>Giuseppe</first><last>Castellucci</last></author>
       <author><first>Danilo</first><last>Croce</last></author>
       <author><first>Luca</first><last>Iocchi</last></author>
-      <author><first>Roberto</first><last>Basili</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
       <author><first>Daniele</first><last>Nardi</last></author>
       <title><fixed-case>H</fixed-case>u<fixed-case>RIC</fixed-case>: a Human Robot Interaction Corpus</title>
       <pages>4519–4526</pages>
@@ -4766,8 +4766,8 @@
       <author><first>Matěj</first><last>Korvas</last></author>
       <author><first>Ondřej</first><last>Plátek</last></author>
       <author><first>Ondřej</first><last>Dušek</last></author>
-      <author><first>Lukáš</first><last>Žilka</last></author>
-      <author><first>Filip</first><last>Jurčíček</last></author>
+      <author id="lukas-zilka"><first>Lukáš</first><last>Žilka</last></author>
+      <author id="filip-jurcicek"><first>Filip</first><last>Jurčíček</last></author>
       <title>Free <fixed-case>E</fixed-case>nglish and <fixed-case>C</fixed-case>zech telephone speech corpus shared under the <fixed-case>CC</fixed-case>-<fixed-case>BY</fixed-case>-<fixed-case>SA</fixed-case> 3.0 license</title>
       <pages>4423–4428</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/535_Paper.pdf</url>
@@ -4804,7 +4804,7 @@
       <bibkey>wroblewska-przepiorkowski-2014-projection</bibkey>
     </paper>
     <paper id="447">
-      <author><first>Gianluca</first><last>Lebani</last></author>
+      <author id="gianluca-e-lebani"><first>Gianluca</first><last>Lebani</last></author>
       <author><first>Veronica</first><last>Viola</last></author>
       <author><first>Alessandro</first><last>Lenci</last></author>
       <title>Bootstrapping an <fixed-case>I</fixed-case>talian <fixed-case>V</fixed-case>erb<fixed-case>N</fixed-case>et: data-driven analysis of verb alternations</title>
@@ -4815,7 +4815,7 @@
     </paper>
     <paper id="448">
       <author><first>Arda</first><last>Çelebi</last></author>
-      <author><first>Arzucan</first><last>Özgür</last></author>
+      <author id="arzucan-ozgur"><first>Arzucan</first><last>Özgür</last></author>
       <title>Self-training a Constituency Parser using n-gram Trees</title>
       <pages>2893–2896</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/543_Paper.pdf</url>
@@ -4825,7 +4825,7 @@
     <paper id="449">
       <author><first>Bushra</first><last>Jawaid</last></author>
       <author><first>Amir</first><last>Kamran</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <title>A Tagged Corpus and a Tagger for <fixed-case>U</fixed-case>rdu</title>
       <pages>2938–2943</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/544_Paper.pdf</url>
@@ -4834,7 +4834,7 @@
     </paper>
     <paper id="450">
       <author><first>Kostadin</first><last>Cholakov</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <author><first>Judith</first><last>Eckle-Kohler</last></author>
       <author><first>Iryna</first><last>Gurevych</last></author>
       <title>Lexical Substitution Dataset for <fixed-case>G</fixed-case>erman</title>
@@ -4846,7 +4846,7 @@
     <paper id="451">
       <author><first>Lauren</first><last>Romeo</last></author>
       <author><first>Sara</first><last>Mendes</last></author>
-      <author><first>Núria</first><last>Bel</last></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last></author>
       <title>A cascade approach for complex-type classification</title>
       <pages>4451–4458</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/546_Paper.pdf</url>
@@ -4855,10 +4855,10 @@
     </paper>
     <paper id="452">
       <author><first>Cédric</first><last>Lopez</last></author>
-      <author><first>Frédérique</first><last>Segond</last></author>
+      <author id="frederique-segond"><first>Frédérique</first><last>Segond</last></author>
       <author><first>Olivier</first><last>Hondermarck</last></author>
       <author><first>Paolo</first><last>Curtoni</last></author>
-      <author><first>Luca</first><last>Dini</last></author>
+      <author id="luca-dini"><first>Luca</first><last>Dini</last></author>
       <title>Generating a Resource for Products and Brandnames Recognition. Application to the Cosmetic Domain.</title>
       <pages>2559–2564</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/549_Paper.pdf</url>
@@ -4869,8 +4869,8 @@
       <author><first>Louise</first><last>Deléger</last></author>
       <author><first>Anne-Laure</first><last>Ligozat</last></author>
       <author><first>Cyril</first><last>Grouin</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <title>Annotation of specialized corpora using a comprehensive entity and relation scheme</title>
       <pages>1267–1274</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/552_Paper.pdf</url>
@@ -4889,7 +4889,7 @@
     <paper id="455">
       <author><first>Francesca</first><last>Frontini</last></author>
       <author><first>Valeria</first><last>Quochi</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <author><first>Monica</first><last>Monachini</last></author>
       <author><first>Jason</first><last>Utt</last></author>
       <title>Polysemy Index for Nouns: an Experiment on <fixed-case>I</fixed-case>talian using the <fixed-case>PAROLE</fixed-case> <fixed-case>SIMPLE</fixed-case> <fixed-case>CLIPS</fixed-case> Lexical Database</title>
@@ -4910,8 +4910,8 @@
       <bibkey>salama-etal-2014-youdacc</bibkey>
     </paper>
     <paper id="457">
-      <author><first>Dan</first><last>Flickinger</last></author>
-      <author><first>Emily M.</first><last>Bender</last></author>
+      <author id="dan-flickinger"><first>Dan</first><last>Flickinger</last></author>
+      <author id="emily-m-bender"><first>Emily M.</first><last>Bender</last></author>
       <author><first>Stephan</first><last>Oepen</last></author>
       <title>Towards an Encyclopedia of Compositional Semantics: Documenting the Interface of the <fixed-case>E</fixed-case>nglish <fixed-case>R</fixed-case>esource <fixed-case>G</fixed-case>rammar</title>
       <pages>875–881</pages>
@@ -4960,7 +4960,7 @@
     <paper id="462">
       <author><first>Teresa</first><last>Herrmann</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <title>Manual Analysis of Structurally Informed Reordering in <fixed-case>G</fixed-case>erman-<fixed-case>E</fixed-case>nglish Machine Translation</title>
       <pages>4379–4386</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/569_Paper.pdf</url>
@@ -4982,7 +4982,7 @@
     <paper id="464">
       <author><first>Moritz</first><last>Wittmann</last></author>
       <author><first>Marion</first><last>Weller</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <title>Automatic Extraction of Synonyms for <fixed-case>G</fixed-case>erman Particle Verbs from Parallel Data with Distributional Similarity as a Re-Ranking Feature</title>
       <pages>1430–1437</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/574_Paper.pdf</url>
@@ -5053,8 +5053,8 @@
     </paper>
     <paper id="471">
       <author><first>Lauren</first><last>Romeo</last></author>
-      <author><first>Gianluca</first><last>Lebani</last></author>
-      <author><first>Núria</first><last>Bel</last></author>
+      <author id="gianluca-e-lebani"><first>Gianluca</first><last>Lebani</last></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last></author>
       <author><first>Alessandro</first><last>Lenci</last></author>
       <title>Choosing which to use? A study of distributional models for nominal lexical semantic classification</title>
       <pages>4366–4373</pages>
@@ -5067,7 +5067,7 @@
       <author><first>Christoph</first><last>Schmidt</last></author>
       <author><first>Oscar</first><last>Koller</last></author>
       <author><first>Martin</first><last>Bellgardt</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <title>Extensions of the Sign Language Recognition and Translation Corpus <fixed-case>RWTH</fixed-case>-<fixed-case>PHOENIX</fixed-case>-Weather</title>
       <pages>1911–1916</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/585_Paper.pdf</url>
@@ -5078,9 +5078,9 @@
       <author><first>Sara</first><last>Candeias</last></author>
       <author><first>Dirce</first><last>Celorico</last></author>
       <author><first>Jorge</first><last>Proença</last></author>
-      <author><first>Arlindo</first><last>Veiga</last></author>
+      <author id="arlindo-veiga"><first>Arlindo</first><last>Veiga</last></author>
       <author><first>Carla</first><last>Lopes</last></author>
-      <author><first>Fernando</first><last>Perdigão</last></author>
+      <author id="fernando-perdigao"><first>Fernando</first><last>Perdigão</last></author>
       <title><fixed-case>HESITA</fixed-case>(te) in <fixed-case>P</fixed-case>ortuguese</title>
       <pages>1564–1567</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/587_Paper.pdf</url>
@@ -5096,9 +5096,9 @@
       <bibkey>alansary-2014-muhit</bibkey>
     </paper>
     <paper id="475">
-      <author><first>Maddalen</first><last>Lopez de Lacalle</last></author>
+      <author id="maddalen-lopez-de-lacalle"><first>Maddalen</first><last>Lopez de Lacalle</last></author>
       <author><first>Egoitz</first><last>Laparra</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <title>Predicate Matrix: extending <fixed-case>S</fixed-case>em<fixed-case>L</fixed-case>ink through <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et mappings</title>
       <pages>903–909</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/589_Paper.pdf</url>
@@ -5106,7 +5106,7 @@
       <bibkey>lopez-de-lacalle-etal-2014-predicate</bibkey>
     </paper>
     <paper id="476">
-      <author><first>AiTi</first><last>Aw</last></author>
+      <author id="aiti-aw"><first>AiTi</first><last>Aw</last></author>
       <author><first>Sharifah Mahani</first><last>Aljunied</last></author>
       <author><first>Nattadaporn</first><last>Lertcheva</last></author>
       <author><first>Sasiwimon</first><last>Kalunsima</last></author>
@@ -5120,7 +5120,7 @@
       <author><first>Felice</first><last>Dell’Orletta</last></author>
       <author><first>Giulia</first><last>Venturi</last></author>
       <author><first>Andrea</first><last>Cimino</last></author>
-      <author><first>Simonetta</first><last>Montemagni</last></author>
+      <author id="simonetta-montemagni"><first>Simonetta</first><last>Montemagni</last></author>
       <title><fixed-case>T</fixed-case>2<fixed-case>K</fixed-case>^2: a System for Automatically Extracting and Organizing Knowledge from Texts</title>
       <pages>2062–2070</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/590_Paper.pdf</url>
@@ -5141,13 +5141,13 @@
     <paper id="479">
       <author><first>Arfath</first><last>Pasha</last></author>
       <author><first>Mohamed</first><last>Al-Badrashiny</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <author><first>Ahmed</first><last>El Kholy</last></author>
       <author><first>Ramy</first><last>Eskander</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
       <author><first>Manoj</first><last>Pooleery</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
-      <author><first>Ryan</first><last>Roth</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
+      <author id="ryan-roth"><first>Ryan</first><last>Roth</last></author>
       <title><fixed-case>MADAMIRA</fixed-case>: A Fast, Comprehensive Tool for Morphological Analysis and Disambiguation of <fixed-case>A</fixed-case>rabic</title>
       <pages>1094–1101</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/593_Paper.pdf</url>
@@ -5165,8 +5165,8 @@
     <paper id="481">
       <author><first>Weston</first><last>Feely</last></author>
       <author><first>Mehdi</first><last>Manshadi</last></author>
-      <author><first>Robert</first><last>Frederking</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
+      <author id="robert-frederking"><first>Robert</first><last>Frederking</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
       <title>The <fixed-case>CMU</fixed-case> <fixed-case>METAL</fixed-case> <fixed-case>F</fixed-case>arsi <fixed-case>NLP</fixed-case> Approach</title>
       <pages>4052–4055</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/596_Paper.pdf</url>
@@ -5175,7 +5175,7 @@
     </paper>
     <paper id="482">
       <author><first>Bart</first><last>Desmet</last></author>
-      <author><first>Véronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Véronique</first><last>Hoste</last></author>
       <title>Recognising suicidal messages in <fixed-case>D</fixed-case>utch social media</title>
       <pages>830–835</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/597_Paper.pdf</url>
@@ -5184,7 +5184,7 @@
     </paper>
     <paper id="483">
       <author><first>Maximilian</first><last>Köper</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <title>A Rank-based Distance Measure to Detect Polysemy and to Determine Salient Vector-Space Features for <fixed-case>G</fixed-case>erman Prepositions</title>
       <pages>4459–4466</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/599_Paper.pdf</url>
@@ -5219,7 +5219,7 @@
       <author><first>Bruno</first><last>Guillaume</last></author>
       <author><first>Karën</first><last>Fort</last></author>
       <author><first>Guy</first><last>Perrier</last></author>
-      <author><first>Paul</first><last>Bédaride</last></author>
+      <author id="paul-bedaride"><first>Paul</first><last>Bédaride</last></author>
       <title>Mapping the Lexique des Verbes du Français (Lexicon of <fixed-case>F</fixed-case>rench Verbs) to a <fixed-case>NLP</fixed-case> lexicon using examples</title>
       <pages>2806–2810</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/602_Paper.pdf</url>
@@ -5227,10 +5227,10 @@
       <bibkey>guillaume-etal-2014-mapping</bibkey>
     </paper>
     <paper id="487">
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <author><first>Julien</first><last>Grosjean</last></author>
-      <author><first>Stéfan</first><last>Darmoni</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="stefan-darmoni"><first>Stéfan</first><last>Darmoni</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <title>Language Resources for <fixed-case>F</fixed-case>rench in the Biomedical Domain</title>
       <pages>2146–2151</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/604_Paper.pdf</url>
@@ -5241,7 +5241,7 @@
       <author><first>Adriane</first><last>Boyd</last></author>
       <author><first>Jirka</first><last>Hana</last></author>
       <author><first>Lionel</first><last>Nicolas</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <author><first>Katrin</first><last>Wisniewski</last></author>
       <author><first>Andrea</first><last>Abel</last></author>
       <author><first>Karin</first><last>Schöne</last></author>
@@ -5268,7 +5268,7 @@
     </paper>
     <paper id="490">
       <author><first>Bushra</first><last>Jawaid</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <title>Two-Step Machine Translation with Lattices</title>
       <pages>682–686</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/610_Paper.pdf</url>
@@ -5276,7 +5276,7 @@
       <bibkey>jawaid-bojar-2014-two</bibkey>
     </paper>
     <paper id="491">
-      <author><first>Björn</first><last>Schuller</last></author>
+      <author id="bjorn-schuller"><first>Björn</first><last>Schuller</last></author>
       <author><first>Felix</first><last>Friedmann</last></author>
       <author><first>Florian</first><last>Eyben</last></author>
       <title>The <fixed-case>M</fixed-case>unich Biovoice Corpus: Effects of Physical Exercising, Heart Rate, and Skin Conductance on Human Speech Production</title>
@@ -5297,7 +5297,7 @@
     </paper>
     <paper id="493">
       <author><first>Raymond</first><last>Shen</last></author>
-      <author><first>Hideaki</first><last>Kikuchi</last></author>
+      <author id="hideaki-kikuchi"><first>Hideaki</first><last>Kikuchi</last></author>
       <title>Estimation of Speaking Style in Speech Corpora Focusing on speech transcriptions</title>
       <pages>2747–2752</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/616_Paper.pdf</url>
@@ -5316,7 +5316,7 @@
     </paper>
     <paper id="495">
       <author><first>Travis</first><last>Goodwin</last></author>
-      <author><first>Sanda</first><last>Harabagiu</last></author>
+      <author id="sanda-harabagiu"><first>Sanda</first><last>Harabagiu</last></author>
       <title>Clinical Data-Driven Probabilistic Graph Processing</title>
       <pages>101–108</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/618_Paper.pdf</url>
@@ -5324,7 +5324,7 @@
       <bibkey>goodwin-harabagiu-2014-clinical</bibkey>
     </paper>
     <paper id="496">
-      <author><first>Muntsa</first><last>Padró</last></author>
+      <author id="muntsa-padro"><first>Muntsa</first><last>Padró</last></author>
       <author><first>Marco</first><last>Idiart</last></author>
       <author><first>Aline</first><last>Villavicencio</last></author>
       <author><first>Carlos</first><last>Ramisch</last></author>
@@ -5335,7 +5335,7 @@
       <bibkey>padro-etal-2014-comparing</bibkey>
     </paper>
     <paper id="497">
-      <author><first>Cheikh M. Bamba</first><last>Dione</last></author>
+      <author id="cheikh-m-bamba-dione"><first>Cheikh M. Bamba</first><last>Dione</last></author>
       <title>Pruning the Search Space of the <fixed-case>W</fixed-case>olof <fixed-case>LFG</fixed-case> Grammar Using a Probabilistic and a Constraint Grammar Parser</title>
       <pages>2863–2870</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/62_Paper.pdf</url>
@@ -5344,8 +5344,8 @@
     </paper>
     <paper id="498">
       <author><first>Maha</first><last>Althobaiti</last></author>
-      <author><first>Udo</first><last>Kruschwitz</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="udo-kruschwitz"><first>Udo</first><last>Kruschwitz</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <title><fixed-case>A</fixed-case>ra<fixed-case>NLP</fixed-case>: a <fixed-case>J</fixed-case>ava-based Library for the Processing of <fixed-case>A</fixed-case>rabic Text.</title>
       <pages>4134–4138</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/621_Paper.pdf</url>
@@ -5355,7 +5355,7 @@
     <paper id="499">
       <author><first>Jena D.</first><last>Hwang</last></author>
       <author><first>Annie</first><last>Zaenen</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <title>Criteria for Identifying and Annotating Caused Motion Constructions in Corpus Data</title>
       <pages>1297–1304</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/624_Paper.pdf</url>
@@ -5363,7 +5363,7 @@
       <bibkey>hwang-etal-2014-criteria</bibkey>
     </paper>
     <paper id="500">
-      <author><first>Yoshihiko</first><last>Hayashi</last></author>
+      <author id="yoshihiko-hayashi"><first>Yoshihiko</first><last>Hayashi</last></author>
       <title>Web-imageability of the Behavioral Features of Basic-level Concepts</title>
       <pages>3609–3614</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/627_Paper.pdf</url>
@@ -5372,7 +5372,7 @@
     </paper>
     <paper id="501">
       <author><first>Steve</first><last>Cassidy</last></author>
-      <author><first>Dominique</first><last>Estival</last></author>
+      <author id="dominique-estival"><first>Dominique</first><last>Estival</last></author>
       <author><first>Timothy</first><last>Jones</last></author>
       <author><first>Denis</first><last>Burnham</last></author>
       <author><first>Jared</first><last>Burghold</last></author>
@@ -5383,7 +5383,7 @@
       <bibkey>cassidy-etal-2014-alveo</bibkey>
     </paper>
     <paper id="502">
-      <author><first>Chris</first><last>Culy</last></author>
+      <author id="chris-culy"><first>Chris</first><last>Culy</last></author>
       <author><first>Marco</first><last>Passarotti</last></author>
       <author><first>Ulla</first><last>König-Cardanobile</last></author>
       <title>A Compact Interactive Visualization of Dependency Treebank Query Results</title>
@@ -5416,7 +5416,7 @@
       <bibkey>wu-etal-2014-illinoiscloudnlp</bibkey>
     </paper>
     <paper id="505">
-      <author><first>Satoshi</first><last>Sato</last></author>
+      <author id="satoshi-sato"><first>Satoshi</first><last>Sato</last></author>
       <title>Text Readability and Word Distribution in <fixed-case>J</fixed-case>apanese</title>
       <pages>2811–2815</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/633_Paper.pdf</url>
@@ -5433,7 +5433,7 @@
     </paper>
     <paper id="507">
       <author><first>Octavian</first><last>Popescu</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Patrick</first><last>Hanks</last></author>
       <title>Mapping <fixed-case>CPA</fixed-case> Patterns onto <fixed-case>O</fixed-case>nto<fixed-case>N</fixed-case>otes Senses</title>
       <pages>882–889</pages>
@@ -5442,7 +5442,7 @@
       <bibkey>popescu-etal-2014-mapping</bibkey>
     </paper>
     <paper id="508">
-      <author><first>Emily M.</first><last>Bender</last></author>
+      <author id="emily-m-bender"><first>Emily M.</first><last>Bender</last></author>
       <title>Language <fixed-case>C</fixed-case>o<fixed-case>LLAGE</fixed-case>: Grammatical Description with the <fixed-case>L</fixed-case>in<fixed-case>GO</fixed-case> Grammar Matrix</title>
       <pages>2447–2451</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/639_Paper.pdf</url>
@@ -5450,8 +5450,8 @@
       <bibkey>bender-2014-language</bibkey>
     </paper>
     <paper id="509">
-      <author><first>Silvia Rodríguez</first><last>Vázquez</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="silvia-vazquez"><first>Silvia Rodríguez</first><last>Vázquez</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Anton</first><last>Bolfing</last></author>
       <title>Applying Accessibility-Oriented Controlled Language (<fixed-case>CL</fixed-case>) Rules to Improve Appropriateness of Text Alternatives for Images: an Exploratory Study</title>
       <pages>4139–4146</pages>
@@ -5492,7 +5492,7 @@
     <paper id="513">
       <author><first>Zhengzhong</first><last>Liu</last></author>
       <author><first>Jun</first><last>Araki</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <author><first>Teruko</first><last>Mitamura</last></author>
       <title>Supervised Within-Document Event Coreference using Information Propagation</title>
       <pages>4539–4544</pages>
@@ -5523,9 +5523,9 @@
       <bibkey>mori-neubig-2014-language</bibkey>
     </paper>
     <paper id="516">
-      <author><first>Luca</first><last>Cristoforetti</last></author>
+      <author id="luca-cristoforetti"><first>Luca</first><last>Cristoforetti</last></author>
       <author><first>Mirco</first><last>Ravanelli</last></author>
-      <author><first>Maurizio</first><last>Omologo</last></author>
+      <author id="maurizio-omologo"><first>Maurizio</first><last>Omologo</last></author>
       <author><first>Alessandro</first><last>Sosi</last></author>
       <author><first>Alberto</first><last>Abad</last></author>
       <author><first>Martin</first><last>Hagmueller</last></author>
@@ -5539,7 +5539,7 @@
     <paper id="517">
       <author><first>Daniel</first><last>Hladek</last></author>
       <author><first>Jan</first><last>Stas</last></author>
-      <author><first>Jozef</first><last>Juhar</last></author>
+      <author id="jozef-juhar"><first>Jozef</first><last>Juhar</last></author>
       <title>The <fixed-case>S</fixed-case>lovak Categorized News Corpus</title>
       <pages>1705–1708</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/656_Paper.pdf</url>
@@ -5577,8 +5577,8 @@
       <bibkey>ganitkevitch-callison-burch-2014-multilingual</bibkey>
     </paper>
     <paper id="521">
-      <author><first>Menno</first><last>van Zaanen</last></author>
-      <author><first>Gerhard</first><last>van Huyssteen</last></author>
+      <author id="menno-van-zaanen"><first>Menno</first><last>van Zaanen</last></author>
+      <author id="gerhard-b-van-huyssteen"><first>Gerhard</first><last>van Huyssteen</last></author>
       <author><first>Suzanne</first><last>Aussems</last></author>
       <author><first>Chris</first><last>Emmery</last></author>
       <author><first>Roald</first><last>Eiselen</last></author>
@@ -5589,8 +5589,8 @@
       <bibkey>van-zaanen-etal-2014-development</bibkey>
     </paper>
     <paper id="522">
-      <author><first>Lluís</first><last>Padró</last></author>
-      <author><first>Željko</first><last>Agić</last></author>
+      <author id="lluis-padro"><first>Lluís</first><last>Padró</last></author>
+      <author id="zeljko-agic"><first>Željko</first><last>Agić</last></author>
       <author><first>Xavier</first><last>Carreras</last></author>
       <author><first>Blaz</first><last>Fortuna</last></author>
       <author><first>Esteban</first><last>García-Cuesta</last></author>
@@ -5614,7 +5614,7 @@
     <paper id="524">
       <author><first>Marta</first><last>Villegas</last></author>
       <author><first>Maite</first><last>Melero</last></author>
-      <author><first>Núria</first><last>Bel</last></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last></author>
       <title>Metadata as Linked Open Data: mapping disparate <fixed-case>XML</fixed-case> metadata registries into one <fixed-case>RDF</fixed-case>/<fixed-case>OWL</fixed-case> registry.</title>
       <pages>393–400</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/664_Paper.pdf</url>
@@ -5623,7 +5623,7 @@
     </paper>
     <paper id="525">
       <author><first>Grégoire</first><last>Détrez</last></author>
-      <author><first>Víctor M.</first><last>Sánchez-Cartagena</last></author>
+      <author id="victor-m-sanchez-cartagena"><first>Víctor M.</first><last>Sánchez-Cartagena</last></author>
       <author><first>Aarne</first><last>Ranta</last></author>
       <title>Sharing resources between free/open-source rule-based machine translation systems: Grammatical Framework and Apertium</title>
       <pages>4394–4400</pages>
@@ -5641,7 +5641,7 @@
     </paper>
     <paper id="527">
       <author><first>Diana</first><last>Maynard</last></author>
-      <author><first>Mark</first><last>Greenwood</last></author>
+      <author id="mark-a-greenwood"><first>Mark</first><last>Greenwood</last></author>
       <title>Who cares about Sarcastic Tweets? Investigating the Impact of Sarcasm on Sentiment Analysis.</title>
       <pages>4238–4243</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/67_Paper.pdf</url>
@@ -5649,9 +5649,9 @@
       <bibkey>maynard-greenwood-2014-cares</bibkey>
     </paper>
     <paper id="528">
-      <author><first>Xabier</first><last>Artola</last></author>
+      <author id="xabier-artola"><first>Xabier</first><last>Artola</last></author>
       <author><first>Zuhaitz</first><last>Beloki</last></author>
-      <author><first>Aitor</first><last>Soroa</last></author>
+      <author id="aitor-soroa"><first>Aitor</first><last>Soroa</last></author>
       <title>A stream computing approach towards scalable <fixed-case>NLP</fixed-case></title>
       <pages>8–13</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/670_Paper.pdf</url>
@@ -5678,13 +5678,13 @@
     </paper>
     <paper id="531">
       <author><first>Irina</first><last>Temnikova</last></author>
-      <author><first>William A.</first><last>Baumgartner Jr.</last></author>
-      <author><first>Negacy D.</first><last>Hailu</last></author>
+      <author id="william-a-baumgartner-jr"><first>William A.</first><last>Baumgartner Jr.</last></author>
+      <author id="negacy-hailu"><first>Negacy D.</first><last>Hailu</last></author>
       <author><first>Ivelina</first><last>Nikolova</last></author>
       <author><first>Tony</first><last>McEnery</last></author>
       <author><first>Adam</first><last>Kilgarriff</last></author>
       <author><first>Galia</first><last>Angelova</last></author>
-      <author><first>K. Bretonnel</first><last>Cohen</last></author>
+      <author id="k-bretonnel-cohen"><first>K. Bretonnel</first><last>Cohen</last></author>
       <title>Sublanguage Corpus Analysis Toolkit: A tool for assessing the representativeness and sublanguage characteristics of corpora</title>
       <pages>1714–1718</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/675_Paper.pdf</url>
@@ -5693,7 +5693,7 @@
     </paper>
     <paper id="532">
       <author><first>Violeta</first><last>Seretan</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Johanna</first><last>Gerlach</last></author>
       <title>A Large-Scale Evaluation of Pre-editing Strategies for Improving User-Generated Content Translation</title>
       <pages>1793–1799</pages>
@@ -5704,7 +5704,7 @@
     <paper id="533">
       <author><first>Sigrún</first><last>Helgadóttir</last></author>
       <author><first>Hrafn</first><last>Loftsson</last></author>
-      <author><first>Eiríkur</first><last>Rögnvaldsson</last></author>
+      <author id="eirikur-rognvaldsson"><first>Eiríkur</first><last>Rögnvaldsson</last></author>
       <title>Correcting Errors in a New Gold Standard for Tagging <fixed-case>I</fixed-case>celandic Text</title>
       <pages>2944–2948</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/677_Paper.pdf</url>
@@ -5712,7 +5712,7 @@
       <bibkey>helgadottir-etal-2014-correcting</bibkey>
     </paper>
     <paper id="534">
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <author><first>Amir</first><last>Hazem</last></author>
       <title>Semi-compositional Method for Synonym Extraction of Multi-Word Terms</title>
       <pages>1202–1207</pages>
@@ -5721,8 +5721,8 @@
       <bibkey>daille-hazem-2014-semi</bibkey>
     </paper>
     <paper id="535">
-      <author><first>Matúš</first><last>Pleva</last></author>
-      <author><first>Jozef</first><last>Juhár</last></author>
+      <author id="matus-pleva"><first>Matúš</first><last>Pleva</last></author>
+      <author id="jozef-juhar"><first>Jozef</first><last>Juhár</last></author>
       <title><fixed-case>TUKE</fixed-case>-<fixed-case>BN</fixed-case>ews-<fixed-case>SK</fixed-case>: <fixed-case>S</fixed-case>lovak Broadcast News Corpus Construction and Evaluation</title>
       <pages>1709–1713</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/680_Paper.pdf</url>
@@ -5731,7 +5731,7 @@
     </paper>
     <paper id="536">
       <author><first>Csaba</first><last>Oravecz</last></author>
-      <author><first>Tamás</first><last>Váradi</last></author>
+      <author id="tamas-varadi"><first>Tamás</first><last>Váradi</last></author>
       <author><first>Bálint</first><last>Sass</last></author>
       <title>The <fixed-case>H</fixed-case>ungarian <fixed-case>G</fixed-case>igaword Corpus</title>
       <pages>1719–1723</pages>
@@ -5743,7 +5743,7 @@
       <author><first>Kunal</first><last>Sachdeva</last></author>
       <author><first>Rishabh</first><last>Srivastava</last></author>
       <author><first>Sambhav</first><last>Jain</last></author>
-      <author><first>Dipti</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti</first><last>Sharma</last></author>
       <title><fixed-case>H</fixed-case>indi to <fixed-case>E</fixed-case>nglish Machine Translation: Using Effective Selection in Multi-Model <fixed-case>SMT</fixed-case></title>
       <pages>1807–1811</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/682_Paper.pdf</url>
@@ -5771,12 +5771,12 @@
       <bibkey>wattam-etal-2014-experiences</bibkey>
     </paper>
     <paper id="540">
-      <author><first>Younggyun</first><last>Hahm</last></author>
+      <author id="younggyun-hahm"><first>Younggyun</first><last>Hahm</last></author>
       <author><first>Jungyeul</first><last>Park</last></author>
-      <author><first>Kyungtae</first><last>Lim</last></author>
+      <author id="kyungtae-lim"><first>Kyungtae</first><last>Lim</last></author>
       <author><first>Youngsik</first><last>Kim</last></author>
       <author><first>Dosam</first><last>Hwang</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <title>Named Entity Corpus Construction using <fixed-case>W</fixed-case>ikipedia and <fixed-case>DB</fixed-case>pedia Ontology</title>
       <pages>2565–2569</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/688_Paper.pdf</url>
@@ -5795,7 +5795,7 @@
       <bibkey>callejas-etal-2014-model</bibkey>
     </paper>
     <paper id="542">
-      <author><first>Željko</first><last>Agić</last></author>
+      <author id="zeljko-agic"><first>Željko</first><last>Agić</last></author>
       <author><first>Nikola</first><last>Ljubešić</last></author>
       <title>The <fixed-case>SET</fixed-case>imes.<fixed-case>HR</fixed-case> Linguistically Annotated Corpus of <fixed-case>C</fixed-case>roatian</title>
       <pages>1724–1727</pages>
@@ -5827,7 +5827,7 @@
       <bibkey>pho-etal-2014-multiple</bibkey>
     </paper>
     <paper id="545">
-      <author><first>Željko</first><last>Agić</last></author>
+      <author id="zeljko-agic"><first>Željko</first><last>Agić</last></author>
       <author><first>Daša</first><last>Berović</last></author>
       <author><first>Danijela</first><last>Merkler</last></author>
       <author><first>Marko</first><last>Tadić</last></author>
@@ -5847,7 +5847,7 @@
     </paper>
     <paper id="547">
       <author><first>Masood</first><last>Ghayoomi</last></author>
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <author><first>Petya</first><last>Osenova</last></author>
       <title>Constituency Parsing of <fixed-case>B</fixed-case>ulgarian: Word- vs Class-based Parsing</title>
       <pages>4056–4060</pages>
@@ -5857,7 +5857,7 @@
     </paper>
     <paper id="548">
       <author><first>Maike</first><last>Paetzel</last></author>
-      <author><first>David Nicolas</first><last>Racca</last></author>
+      <author id="david-nicolas-racca"><first>David Nicolas</first><last>Racca</last></author>
       <author><first>David</first><last>DeVault</last></author>
       <title>A Multimodal Corpus of Rapid Dialogue Games</title>
       <pages>4189–4195</pages>
@@ -5870,7 +5870,7 @@
       <author><first>Julián David</first><last>Arias-Londoño</last></author>
       <author><first>Jesús Francisco</first><last>Vargas-Bonilla</last></author>
       <author><first>María Claudia</first><last>González-Rátiva</last></author>
-      <author><first>Elmar</first><last>Nöth</last></author>
+      <author id="elmar-noth"><first>Elmar</first><last>Nöth</last></author>
       <title>New <fixed-case>S</fixed-case>panish speech corpus database for the analysis of people suffering from <fixed-case>P</fixed-case>arkinson’s disease</title>
       <pages>342–347</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/7_Paper.pdf</url>
@@ -5887,9 +5887,9 @@
       <bibkey>martens-passarotti-2014-thomas</bibkey>
     </paper>
     <paper id="551">
-      <author><first>Peter</first><last>Anick</last></author>
+      <author id="peter-anick"><first>Peter</first><last>Anick</last></author>
       <author><first>Marc</first><last>Verhagen</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <title>Identification of Technology Terms in Patents</title>
       <pages>2008–2014</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/701_Paper.pdf</url>
@@ -5908,7 +5908,7 @@
     <paper id="553">
       <author><first>Eduard</first><last>Bejček</last></author>
       <author><first>Václava</first><last>Kettnerová</last></author>
-      <author><first>Markéta</first><last>Lopatková</last></author>
+      <author id="marketa-lopatkova"><first>Markéta</first><last>Lopatková</last></author>
       <title>Automatic Mapping Lexical Resources: A Lexical Unit as the Keystone</title>
       <pages>2826–2832</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/704_Paper.pdf</url>
@@ -5943,7 +5943,7 @@
       <author><first>Keigo</first><last>Kubo</last></author>
       <author><first>Sho</first><last>Matsumiya</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
-      <author><first>Tomoki</first><last>Toda</last></author>
+      <author id="tomoki-toda"><first>Tomoki</first><last>Toda</last></author>
       <author><first>Satoshi</first><last>Nakamura</last></author>
       <author><first>Fumihiro</first><last>Adachi</last></author>
       <author><first>Ryosuke</first><last>Isotani</last></author>
@@ -5986,7 +5986,7 @@
       <bibkey>bartolini-etal-2014-synsets</bibkey>
     </paper>
     <paper id="560">
-      <author><first>Vidas</first><last>Daudaravičius</last></author>
+      <author id="vidas-daudaravicius"><first>Vidas</first><last>Daudaravičius</last></author>
       <title>Language Editing Dataset of Academic Texts</title>
       <pages>1738–1742</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/714_Paper.pdf</url>
@@ -6005,7 +6005,7 @@
     <paper id="562">
       <author><first>Yuichi</first><last>Ishimoto</last></author>
       <author><first>Tomoyuki</first><last>Tsuchiya</last></author>
-      <author><first>Hanae</first><last>Koiso</last></author>
+      <author id="hanae-koiso"><first>Hanae</first><last>Koiso</last></author>
       <author><first>Yasuharu</first><last>Den</last></author>
       <title>Towards Automatic Transformation between Different Transcription Conventions: Prediction of Intonation Markers from Linguistic and Acoustic Features</title>
       <pages>311–315</pages>
@@ -6028,7 +6028,7 @@
       <author><first>Corine</first><last>Astésano</last></author>
       <author><first>Ellen Gurman</first><last>Bard</last></author>
       <author><first>Brigitte</first><last>Bigi</last></author>
-      <author><first>Laurent</first><last>Prévot</last></author>
+      <author id="laurent-prevot"><first>Laurent</first><last>Prévot</last></author>
       <title>Aix Map Task corpus: The <fixed-case>F</fixed-case>rench multimodal corpus of task-oriented dialogue</title>
       <pages>2648–2652</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/719_Paper.pdf</url>
@@ -6048,7 +6048,7 @@
     <paper id="566">
       <author><first>Milan</first><last>Rusko</last></author>
       <author><first>Sakhia</first><last>Darjaa</last></author>
-      <author><first>Marián</first><last>Trnka</last></author>
+      <author id="marian-trnka"><first>Marián</first><last>Trnka</last></author>
       <author><first>Marián</first><last>Ritomský</last></author>
       <author><first>Róbert</first><last>Sabo</last></author>
       <title>Alert!... Calm Down, There is Nothing to Worry About. Warning and Soothing Speech Synthesis.</title>
@@ -6067,9 +6067,9 @@
       <bibkey>kordoni-simova-2014-multiword</bibkey>
     </paper>
     <paper id="568">
-      <author><first>Christian</first><last>Girardi</last></author>
-      <author><first>Manuela</first><last>Speranza</last></author>
-      <author><first>Rachele</first><last>Sprugnoli</last></author>
+      <author id="christian-girardi"><first>Christian</first><last>Girardi</last></author>
+      <author id="manuela-speranza"><first>Manuela</first><last>Speranza</last></author>
+      <author id="rachele-sprugnoli"><first>Rachele</first><last>Sprugnoli</last></author>
       <author><first>Sara</first><last>Tonelli</last></author>
       <title><fixed-case>CROMER</fixed-case>: a Tool for Cross-Document Event and Entity Coreference</title>
       <pages>3204–3208</pages>
@@ -6078,10 +6078,10 @@
       <bibkey>girardi-etal-2014-cromer</bibkey>
     </paper>
     <paper id="569">
-      <author><first>Tiberiu</first><last>Boroș</last></author>
+      <author id="tiberiu-boros"><first>Tiberiu</first><last>Boroș</last></author>
       <author><first>Adriana</first><last>Stan</last></author>
       <author><first>Oliver</first><last>Watts</last></author>
-      <author><first>Stefan Daniel</first><last>Dumitrescu</last></author>
+      <author id="stefan-daniel-dumitrescu"><first>Stefan Daniel</first><last>Dumitrescu</last></author>
       <title><fixed-case>RSS</fixed-case>-<fixed-case>TOBI</fixed-case> - A Prosodically Enhanced <fixed-case>R</fixed-case>omanian Speech Corpus</title>
       <pages>316–320</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/727_Paper.pdf</url>
@@ -6089,8 +6089,8 @@
       <bibkey>boros-etal-2014-rss</bibkey>
     </paper>
     <paper id="570">
-      <author><first>Artūrs</first><last>Znotiņš</last></author>
-      <author><first>Pēteris</first><last>Paikens</last></author>
+      <author id="arturs-znotins"><first>Artūrs</first><last>Znotiņš</last></author>
+      <author id="peteris-paikens"><first>Pēteris</first><last>Paikens</last></author>
       <title>Coreference Resolution for <fixed-case>L</fixed-case>atvian</title>
       <pages>3209–3213</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/729_Paper.pdf</url>
@@ -6118,9 +6118,9 @@
     </paper>
     <paper id="573">
       <author><first>Thomas</first><last>Lavergne</last></author>
-      <author><first>Gilles</first><last>Adda</last></author>
-      <author><first>Martine</first><last>Adda-Decker</last></author>
-      <author><first>Lori</first><last>Lamel</last></author>
+      <author id="gilles-adda"><first>Gilles</first><last>Adda</last></author>
+      <author id="martine-adda-decker"><first>Martine</first><last>Adda-Decker</last></author>
+      <author id="lori-lamel"><first>Lori</first><last>Lamel</last></author>
       <title>Automatic language identity tagging on word and sentence-level in multilingual text sources: a case-study on <fixed-case>L</fixed-case>uxembourgish</title>
       <pages>3300–3304</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/732_Paper.pdf</url>
@@ -6128,10 +6128,10 @@
       <bibkey>lavergne-etal-2014-automatic</bibkey>
     </paper>
     <paper id="574">
-      <author><first>Orphée</first><last>De Clercq</last></author>
+      <author id="orphee-de-clercq"><first>Orphée</first><last>De Clercq</last></author>
       <author><first>Sarah</first><last>Schulz</last></author>
       <author><first>Bart</first><last>Desmet</last></author>
-      <author><first>Véronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Véronique</first><last>Hoste</last></author>
       <title>Towards Shared Datasets for Normalization Research</title>
       <pages>1218–1223</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/733_Paper.pdf</url>
@@ -6149,11 +6149,11 @@
       <bibkey>pecheux-etal-2014-rule</bibkey>
     </paper>
     <paper id="576">
-      <author><first>Luis Javier</first><last>Rodríguez-Fuentes</last></author>
-      <author><first>Mikel</first><last>Penagarikano</last></author>
+      <author id="luis-javier-rodriguez-fuentes"><first>Luis Javier</first><last>Rodríguez-Fuentes</last></author>
+      <author id="mikel-penagarikano"><first>Mikel</first><last>Penagarikano</last></author>
       <author><first>Amparo</first><last>Varona</last></author>
-      <author><first>Mireia</first><last>Diez</last></author>
-      <author><first>Germán</first><last>Bordel</last></author>
+      <author id="mireia-diez"><first>Mireia</first><last>Diez</last></author>
+      <author id="german-bordel"><first>Germán</first><last>Bordel</last></author>
       <title><fixed-case>KALAKA</fixed-case>-3: a database for the recognition of spoken <fixed-case>E</fixed-case>uropean languages on <fixed-case>Y</fixed-case>ou<fixed-case>T</fixed-case>ube audios</title>
       <pages>443–449</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/736_Paper.pdf</url>
@@ -6162,7 +6162,7 @@
     </paper>
     <paper id="577">
       <author><first>Andrew</first><last>Gargett</last></author>
-      <author><first>John</first><last>Barnden</last></author>
+      <author id="john-barnden"><first>John</first><last>Barnden</last></author>
       <title>Mining Online Discussion Forums for Metaphors</title>
       <pages>2507–2512</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/737_Paper.pdf</url>
@@ -6179,8 +6179,8 @@
       <bibkey>togia-copestake-2014-tagntext</bibkey>
     </paper>
     <paper id="579">
-      <author><first>Carmen</first><last>García-Mateo</last></author>
-      <author><first>Antonio</first><last>Cardenal</last></author>
+      <author id="carmen-garcia-mateo"><first>Carmen</first><last>García-Mateo</last></author>
+      <author id="antonio-cardenal"><first>Antonio</first><last>Cardenal</last></author>
       <author><first>Xosé Luis</first><last>Regueira</last></author>
       <author><first>Elisa Fernández</first><last>Rei</last></author>
       <author><first>Marta</first><last>Martinez</last></author>
@@ -6213,7 +6213,7 @@
       <author><first>Brice</first><last>Isableu</last></author>
       <author><first>Sylvie</first><last>Gibet</last></author>
       <author><first>Pierre</first><last>De Loor</last></author>
-      <author><first>Jean-Claude</first><last>Martin</last></author>
+      <author id="jean-claude-martin"><first>Jean-Claude</first><last>Martin</last></author>
       <title>A Database of Full Body Virtual Interactions Annotated with Expressivity Scores</title>
       <pages>3505–3510</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/741_Paper.pdf</url>
@@ -6234,11 +6234,11 @@
     </paper>
     <paper id="583">
       <author><first>Igor</first><last>Odriozola</last></author>
-      <author><first>Inma</first><last>Hernaez</last></author>
-      <author><first>María Inés</first><last>Torres</last></author>
-      <author><first>Luis Javier</first><last>Rodriguez-Fuentes</last></author>
-      <author><first>Mikel</first><last>Penagarikano</last></author>
-      <author><first>Eva</first><last>Navas</last></author>
+      <author id="inmaculada-hernaez"><first>Inma</first><last>Hernaez</last></author>
+      <author id="m-ines-torres"><first>María Inés</first><last>Torres</last></author>
+      <author id="luis-javier-rodriguez-fuentes"><first>Luis Javier</first><last>Rodriguez-Fuentes</last></author>
+      <author id="mikel-penagarikano"><first>Mikel</first><last>Penagarikano</last></author>
+      <author id="eva-navas"><first>Eva</first><last>Navas</last></author>
       <title><fixed-case>B</fixed-case>asque Speecon-like and <fixed-case>B</fixed-case>asque <fixed-case>S</fixed-case>peech<fixed-case>D</fixed-case>at <fixed-case>MDB</fixed-case>-600: speech databases for the development of <fixed-case>ASR</fixed-case> technology for <fixed-case>B</fixed-case>asque</title>
       <pages>2658–2665</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/744_Paper.pdf</url>
@@ -6257,8 +6257,8 @@
     </paper>
     <paper id="585">
       <author><first>Coline</first><last>Claude-Lachenaud</last></author>
-      <author><first>Éric</first><last>Charton</last></author>
-      <author><first>Benoît</first><last>Ozell</last></author>
+      <author id="eric-charton"><first>Éric</first><last>Charton</last></author>
+      <author id="benoit-ozell"><first>Benoît</first><last>Ozell</last></author>
       <author><first>Michel</first><last>Gagnon</last></author>
       <title>A multimodal interpreter for 3<fixed-case>D</fixed-case> visualization and animation of verbal concepts</title>
       <pages>3620–3627</pages>
@@ -6271,7 +6271,7 @@
       <author><first>Andreas</first><last>Maier</last></author>
       <author><first>Korbinian</first><last>Riedhammer</last></author>
       <author><first>Ulrich</first><last>Eysholdt</last></author>
-      <author><first>Elmar</first><last>Nöth</last></author>
+      <author id="elmar-noth"><first>Elmar</first><last>Nöth</last></author>
       <title>Erlangen-<fixed-case>CLP</fixed-case>: A Large Annotated Corpus of Speech from Children with Cleft Lip and Palate</title>
       <pages>2671–2674</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/748_Paper.pdf</url>
@@ -6292,7 +6292,7 @@
       <author><first>Anindya</first><last>Roy</last></author>
       <author><first>Camille</first><last>Guinaudeau</last></author>
       <author><first>Hervé</first><last>Bredin</last></author>
-      <author><first>Claude</first><last>Barras</last></author>
+      <author id="claude-barras"><first>Claude</first><last>Barras</last></author>
       <title><fixed-case>TVD</fixed-case>: A Reproducible and Multiply Aligned <fixed-case>TV</fixed-case> Series Dataset</title>
       <pages>418–425</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/751_Paper.pdf</url>
@@ -6339,9 +6339,9 @@
       <bibkey>goto-etal-2014-crowdsourcing</bibkey>
     </paper>
     <paper id="593">
-      <author><first>Billy T.M.</first><last>Wong</last></author>
+      <author id="billy-t-m-wong"><first>Billy T.M.</first><last>Wong</last></author>
       <author><first>Ian C.</first><last>Chow</last></author>
-      <author><first>Jonathan J.</first><last>Webster</last></author>
+      <author id="jonathan-j-webster"><first>Jonathan J.</first><last>Webster</last></author>
       <author><first>Hengbin</first><last>Yan</last></author>
       <title>The Halliday Centre Tagger: An Online Platform for Semi-automatic Text Annotation and Analysis</title>
       <pages>1664–1667</pages>
@@ -6403,8 +6403,8 @@
     <paper id="599">
       <author><first>Kasia</first><last>Budzynska</last></author>
       <author><first>Mathilde</first><last>Janier</last></author>
-      <author><first>Chris</first><last>Reed</last></author>
-      <author><first>Patrick</first><last>Saint-Dizier</last></author>
+      <author id="chris-reed"><first>Chris</first><last>Reed</last></author>
+      <author id="patrick-saint-dizier"><first>Patrick</first><last>Saint-Dizier</last></author>
       <author><first>Manfred</first><last>Stede</last></author>
       <author><first>Olena</first><last>Yakorska</last></author>
       <title>A Model for Processing Illocutionary Structures and Argumentation in Debates</title>
@@ -6425,14 +6425,14 @@
     <paper id="601">
       <author><first>Daniel</first><last>Luzzati</last></author>
       <author><first>Cyril</first><last>Grouin</last></author>
-      <author><first>Ioana</first><last>Vasilescu</last></author>
-      <author><first>Martine</first><last>Adda-Decker</last></author>
-      <author><first>Eric</first><last>Bilinski</last></author>
+      <author id="ioana-vasilescu"><first>Ioana</first><last>Vasilescu</last></author>
+      <author id="martine-adda-decker"><first>Martine</first><last>Adda-Decker</last></author>
+      <author id="eric-bilinski"><first>Eric</first><last>Bilinski</last></author>
       <author><first>Nathalie</first><last>Camelin</last></author>
       <author><first>Juliette</first><last>Kahn</last></author>
       <author><first>Carole</first><last>Lailler</last></author>
-      <author><first>Lori</first><last>Lamel</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="lori-lamel"><first>Lori</first><last>Lamel</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <title>Human annotation of <fixed-case>ASR</fixed-case> error regions: Is “gravity” a sharable concept for human annotators?</title>
       <pages>3050–3056</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/771_Paper.pdf</url>
@@ -6444,7 +6444,7 @@
       <author><first>Luka</first><last>Nerima</last></author>
       <author><first>Lorenza</first><last>Russo</last></author>
       <author><first>Maria</first><last>Ivanova</last></author>
-      <author><first>Eric</first><last>Wehrli</last></author>
+      <author id="eric-wehrli"><first>Eric</first><last>Wehrli</last></author>
       <title><fixed-case>S</fixed-case>wiss<fixed-case>A</fixed-case>dmin: A multilingual tagged parallel corpus of press releases</title>
       <pages>1832–1836</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/772_Paper.pdf</url>
@@ -6454,7 +6454,7 @@
     <paper id="603">
       <author><first>Anna</first><last>Vernerová</last></author>
       <author><first>Václava</first><last>Kettnerová</last></author>
-      <author><first>Markéta</first><last>Lopatková</last></author>
+      <author id="marketa-lopatkova"><first>Markéta</first><last>Lopatková</last></author>
       <title>To Pay or to Get Paid: Enriching a Valency Lexicon with Diatheses</title>
       <pages>2452–2459</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/773_Paper.pdf</url>
@@ -6478,9 +6478,9 @@
       <bibkey>tian-etal-2014-um</bibkey>
     </paper>
     <paper id="605">
-      <author><first>Rodrigo</first><last>Agerri</last></author>
+      <author id="rodrigo-agerri"><first>Rodrigo</first><last>Agerri</last></author>
       <author><first>Josu</first><last>Bermudez</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <title><fixed-case>IXA</fixed-case> pipeline: Efficient and Ready to Use Multilingual <fixed-case>NLP</fixed-case> tools</title>
       <pages>3823–3828</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/775_Paper.pdf</url>
@@ -6498,7 +6498,7 @@
       <bibkey>matsuyoshi-etal-2014-annotating</bibkey>
     </paper>
     <paper id="607">
-      <author><first>Mohamed</first><last>Sherif</last></author>
+      <author id="mohamed-ahmed-sherif"><first>Mohamed</first><last>Sherif</last></author>
       <author><first>Sandro</first><last>Coelho</last></author>
       <author><first>Ricardo</first><last>Usbeck</last></author>
       <author><first>Sebastian</first><last>Hellmann</last></author>
@@ -6524,9 +6524,9 @@
     </paper>
     <paper id="609">
       <author><first>Senka</first><last>Drobac</last></author>
-      <author><first>Krister</first><last>Lindén</last></author>
-      <author><first>Tommi</first><last>Pirinen</last></author>
-      <author><first>Miikka</first><last>Silfverberg</last></author>
+      <author id="krister-linden"><first>Krister</first><last>Lindén</last></author>
+      <author id="tommi-a-pirinen"><first>Tommi</first><last>Pirinen</last></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last></author>
       <title>Heuristic Hyper-minimization of Finite State Lexicons</title>
       <pages>3319–3324</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/784_Paper.pdf</url>
@@ -6535,15 +6535,15 @@
     </paper>
     <paper id="610">
       <author><first>Stelios</first><last>Piperidis</last></author>
-      <author><first>Harris</first><last>Papageorgiou</last></author>
+      <author id="harris-papageorgiou"><first>Harris</first><last>Papageorgiou</last></author>
       <author><first>Christian</first><last>Spurk</last></author>
       <author><first>Georg</first><last>Rehm</last></author>
       <author><first>Khalid</first><last>Choukri</last></author>
-      <author><first>Olivier</first><last>Hamon</last></author>
+      <author id="olivier-hamon"><first>Olivier</first><last>Hamon</last></author>
       <author><first>Nicoletta</first><last>Calzolari</last></author>
-      <author><first>Riccardo</first><last>del Gratta</last></author>
-      <author><first>Bernardo</first><last>Magnini</last></author>
-      <author><first>Christian</first><last>Girardi</last></author>
+      <author id="riccardo-del-gratta"><first>Riccardo</first><last>del Gratta</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
+      <author id="christian-girardi"><first>Christian</first><last>Girardi</last></author>
       <title><fixed-case>META</fixed-case>-<fixed-case>SHARE</fixed-case>: One year after</title>
       <pages>1532–1538</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/786_Paper.pdf</url>
@@ -6552,7 +6552,7 @@
     </paper>
     <paper id="611">
       <author><first>Claudia</first><last>Baur</last></author>
-      <author><first>Manny</first><last>Rayner</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
       <author><first>Nikos</first><last>Tsourakis</last></author>
       <title>Using a Serious Game to Collect a Child Learner Speech Corpus</title>
       <pages>2726–2732</pages>
@@ -6561,7 +6561,7 @@
       <bibkey>baur-etal-2014-using</bibkey>
     </paper>
     <paper id="612">
-      <author><first>Riccardo</first><last>Del Gratta</last></author>
+      <author id="riccardo-del-gratta"><first>Riccardo</first><last>Del Gratta</last></author>
       <author><first>Gabriella</first><last>Pardelli</last></author>
       <author><first>Sara</first><last>Goggi</last></author>
       <title>The <fixed-case>LRE</fixed-case> Map disclosed</title>
@@ -6570,7 +6570,7 @@
       <bibkey>del-gratta-etal-2014-lre</bibkey>
     </paper>
     <paper id="613">
-      <author><first>Evgeny</first><last>Stepanov</last></author>
+      <author id="evgeny-stepanov"><first>Evgeny</first><last>Stepanov</last></author>
       <author><first>Giuseppe</first><last>Riccardi</last></author>
       <author><first>Ali Orkan</first><last>Bayer</last></author>
       <title>The Development of the Multilingual <fixed-case>LUNA</fixed-case> Corpus for Spoken Language System Porting</title>
@@ -6589,7 +6589,7 @@
     </paper>
     <paper id="615">
       <author><first>Marc</first><last>Poch</last></author>
-      <author><first>Núria</first><last>Bel</last></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last></author>
       <author><first>Sergio</first><last>Espeja</last></author>
       <author><first>Felipe</first><last>Navío</last></author>
       <title>Ranking Job Offers for Candidates: learning hidden knowledge from Big Data</title>
@@ -6600,7 +6600,7 @@
     </paper>
     <paper id="616">
       <author><first>Valérie</first><last>Hanoka</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <title>An Open-Source Heavily Multilingual Translation Graph Extracted from Wiktionaries and Parallel Corpora</title>
       <pages>3179–3186</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/792_Paper.pdf</url>
@@ -6626,7 +6626,7 @@
     </paper>
     <paper id="619">
       <author><first>Yves</first><last>Scherrer</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <title>A language-independent and fully unsupervised approach to lexicon induction and part-of-speech tagging for closely related languages</title>
       <pages>502–508</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/797_Paper.pdf</url>
@@ -6635,10 +6635,10 @@
     </paper>
     <paper id="620">
       <author><first>David</first><last>Tavarez</last></author>
-      <author><first>Eva</first><last>Navas</last></author>
+      <author id="eva-navas"><first>Eva</first><last>Navas</last></author>
       <author><first>Daniel</first><last>Erro</last></author>
       <author><first>Ibon</first><last>Saratxaga</last></author>
-      <author><first>Inma</first><last>Hernaez</last></author>
+      <author id="inmaculada-hernaez"><first>Inma</first><last>Hernaez</last></author>
       <title>New bilingual speech databases for audio diarization</title>
       <pages>2666–2670</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/799_Paper.pdf</url>
@@ -6648,7 +6648,7 @@
     <paper id="621">
       <author><first>Mohamed</first><last>Morchid</last></author>
       <author><first>Richard</first><last>Dufour</last></author>
-      <author><first>Georges</first><last>Linarès</last></author>
+      <author id="georges-linares"><first>Georges</first><last>Linarès</last></author>
       <title>A <fixed-case>LDA</fixed-case>-Based Topic Classification Approach From Highly Imperfect Automatic Transcriptions</title>
       <pages>1309–1314</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/8_Paper.pdf</url>
@@ -6656,7 +6656,7 @@
       <bibkey>morchid-etal-2014-lda</bibkey>
     </paper>
     <paper id="622">
-      <author><first>Cristina Sánchez</first><last>Marco</last></author>
+      <author id="cristina-sanchez-marco"><first>Cristina Sánchez</first><last>Marco</last></author>
       <title>An open source part-of-speech tagger for <fixed-case>N</fixed-case>orwegian: Building on existing language resources</title>
       <pages>4111–4117</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/801_Paper.pdf</url>
@@ -6665,9 +6665,9 @@
     </paper>
     <paper id="623">
       <author><first>Ahmet</first><last>Aker</last></author>
-      <author><first>Monica</first><last>Paramita</last></author>
-      <author><first>Mārcis</first><last>Pinnis</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="monica-lestari-paramita"><first>Monica</first><last>Paramita</last></author>
+      <author id="marcis-pinnis"><first>Mārcis</first><last>Pinnis</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <title>Bilingual dictionaries for all <fixed-case>EU</fixed-case> languages</title>
       <pages>2839–2845</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/803_Paper.pdf</url>
@@ -6683,10 +6683,10 @@
       <bibkey>reynaert-2014-synergy</bibkey>
     </paper>
     <paper id="625">
-      <author><first>Raphael</first><last>Rubino</last></author>
+      <author id="raphael-rubino"><first>Raphael</first><last>Rubino</last></author>
       <author><first>Antonio</first><last>Toral</last></author>
       <author><first>Nikola</first><last>Ljubešić</last></author>
-      <author><first>Gema</first><last>Ramírez-Sánchez</last></author>
+      <author id="gema-ramirez-sanchez"><first>Gema</first><last>Ramírez-Sánchez</last></author>
       <title>Quality Estimation for Synthetic Parallel Data Generation</title>
       <pages>1843–1849</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/807_Paper.pdf</url>
@@ -6712,8 +6712,8 @@
       <author><first>Maud</first><last>Ehrmann</last></author>
       <author><first>Francesco</first><last>Cecconi</last></author>
       <author><first>Daniele</first><last>Vannella</last></author>
-      <author><first>John Philip</first><last>McCrae</last></author>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="john-philip-mccrae"><first>John Philip</first><last>McCrae</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <author><first>Roberto</first><last>Navigli</last></author>
       <title>Representing Multilingual Data as Linked Data: the Case of <fixed-case>B</fixed-case>abel<fixed-case>N</fixed-case>et 2.0</title>
       <pages>401–408</pages>
@@ -6735,9 +6735,9 @@
     </paper>
     <paper id="630">
       <author><first>Lina</first><last>Henriksen</last></author>
-      <author><first>Dorte Haltrup</first><last>Hansen</last></author>
+      <author id="dorte-haltrup-hansen"><first>Dorte Haltrup</first><last>Hansen</last></author>
       <author><first>Bente</first><last>Maegaard</last></author>
-      <author><first>Bolette Sandford</first><last>Pedersen</last></author>
+      <author id="bolette-sandford-pedersen"><first>Bolette Sandford</first><last>Pedersen</last></author>
       <author><first>Claus</first><last>Povlsen</last></author>
       <title>Encompassing a spectrum of <fixed-case>LT</fixed-case> users in the <fixed-case>CLARIN</fixed-case>-<fixed-case>DK</fixed-case> Infrastructure</title>
       <pages>2175–2181</pages>
@@ -6747,11 +6747,11 @@
     </paper>
     <paper id="631">
       <author><first>Alexis</first><last>Nasr</last></author>
-      <author><first>Frederic</first><last>Bechet</last></author>
-      <author><first>Benoit</first><last>Favre</last></author>
+      <author id="frederic-bechet"><first>Frederic</first><last>Bechet</last></author>
+      <author id="benoit-favre"><first>Benoit</first><last>Favre</last></author>
       <author><first>Thierry</first><last>Bazillon</last></author>
-      <author><first>Jose</first><last>Deulofeu</last></author>
-      <author><first>Andre</first><last>Valli</last></author>
+      <author id="jose-deulofeu"><first>Jose</first><last>Deulofeu</last></author>
+      <author id="andre-valli"><first>Andre</first><last>Valli</last></author>
       <title>Automatically enriching spoken corpora with syntactic information for linguistic studies</title>
       <pages>854–858</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/816_Paper.pdf</url>
@@ -6770,7 +6770,7 @@
     <paper id="633">
       <author><first>Maria</first><last>Simi</last></author>
       <author><first>Cristina</first><last>Bosco</last></author>
-      <author><first>Simonetta</first><last>Montemagni</last></author>
+      <author id="simonetta-montemagni"><first>Simonetta</first><last>Montemagni</last></author>
       <title>Less is More? Towards a Reduced Inventory of Categories for Training a Parser for the <fixed-case>I</fixed-case>talian <fixed-case>S</fixed-case>tanford Dependencies</title>
       <pages>83–90</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/818_Paper.pdf</url>
@@ -6806,7 +6806,7 @@
       <bibkey>sonntag-stede-2014-grapat</bibkey>
     </paper>
     <paper id="637">
-      <author><first>Antonio</first><last>Pareja-Lora</last></author>
+      <author id="antonio-pareja-lora"><first>Antonio</first><last>Pareja-Lora</last></author>
       <author><first>Guillermo</first><last>Cárcamo-Escorza</last></author>
       <author><first>Alicia</first><last>Ballesteros-Calvo</last></author>
       <title>Standardisation and Interoperation of Morphosyntactic and Syntactic Annotation Tools for <fixed-case>S</fixed-case>panish and their Annotations</title>
@@ -6829,7 +6829,7 @@
       <author><first>Jason</first><last>Utt</last></author>
       <author><first>Sylvia</first><last>Springorum</last></author>
       <author><first>Maximilian</first><last>Köper</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <title>Fuzzy <fixed-case>V</fixed-case>-Measure - An Evaluation Method for Cluster Analyses of Ambiguous Data</title>
       <pages>581–587</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/829_Paper.pdf</url>
@@ -6841,7 +6841,7 @@
       <author><first>Yunqing</first><last>Xia</last></author>
       <author><first>Weizhi</first><last>Wang</last></author>
       <author><first>Raymond</first><last>Lau</last></author>
-      <author><first>Fang</first><last>Zheng</last></author>
+      <author id="fang-zheng"><first>Fang</first><last>Zheng</last></author>
       <title>Clustering tweets using<fixed-case>W</fixed-case>ikipedia concepts</title>
       <pages>2262–2267</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/83_Paper.pdf</url>
@@ -6868,7 +6868,7 @@
     <paper id="642">
       <author><first>Nikola</first><last>Ljubešić</last></author>
       <author><first>Darja</first><last>Fišer</last></author>
-      <author><first>Tomaž</first><last>Erjavec</last></author>
+      <author id="tomaz-erjavec"><first>Tomaž</first><last>Erjavec</last></author>
       <title><fixed-case>T</fixed-case>weet<fixed-case>C</fixed-case>a<fixed-case>T</fixed-case>: a tool for building <fixed-case>T</fixed-case>witter corpora of smaller languages</title>
       <pages>2279–2283</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/834_Paper.pdf</url>
@@ -6876,13 +6876,13 @@
       <bibkey>ljubesic-etal-2014-tweetcat</bibkey>
     </paper>
     <paper id="643">
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <author><first>Vojtěch</first><last>Diatka</last></author>
-      <author><first>Pavel</first><last>Rychlý</last></author>
+      <author id="pavel-rychly"><first>Pavel</first><last>Rychlý</last></author>
       <author><first>Pavel</first><last>Straňák</last></author>
-      <author><first>Vít</first><last>Suchomel</last></author>
+      <author id="vit-suchomel"><first>Vít</first><last>Suchomel</last></author>
       <author><first>Aleš</first><last>Tamchyna</last></author>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <title><fixed-case>H</fixed-case>ind<fixed-case>E</fixed-case>n<fixed-case>C</fixed-case>orp - <fixed-case>H</fixed-case>indi-<fixed-case>E</fixed-case>nglish and <fixed-case>H</fixed-case>indi-only Corpus for Machine Translation</title>
       <pages>3550–3555</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/835_Paper.pdf</url>
@@ -6890,9 +6890,9 @@
       <bibkey>bojar-etal-2014-hindencorp</bibkey>
     </paper>
     <paper id="644">
-      <author><first>Silvia</first><last>Necşulescu</last></author>
+      <author id="silvia-necsulescu"><first>Silvia</first><last>Necşulescu</last></author>
       <author><first>Sara</first><last>Mendes</last></author>
-      <author><first>Núria</first><last>Bel</last></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last></author>
       <title>Combining dependency information and generalization in a pattern-based approach to the classification of lexical-semantic relation instances</title>
       <pages>4308–4315</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/837_Paper.pdf</url>
@@ -6900,10 +6900,10 @@
       <bibkey>necsulescu-etal-2014-combining</bibkey>
     </paper>
     <paper id="645">
-      <author><first>Aimilios</first><last>Chalamandaris</last></author>
-      <author><first>Pirros</first><last>Tsiakoulis</last></author>
+      <author id="aimilios-chalamandaris"><first>Aimilios</first><last>Chalamandaris</last></author>
+      <author id="pirros-tsiakoulis"><first>Pirros</first><last>Tsiakoulis</last></author>
       <author><first>Sotiris</first><last>Karabetsos</last></author>
-      <author><first>Spyros</first><last>Raptis</last></author>
+      <author id="spyros-raptis"><first>Spyros</first><last>Raptis</last></author>
       <title>Using Audio Books for Training a Text-to-Speech System</title>
       <pages>3076–3080</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/838_Paper.pdf</url>
@@ -6911,7 +6911,7 @@
       <bibkey>chalamandaris-etal-2014-using</bibkey>
     </paper>
     <paper id="646">
-      <author><first>Agata</first><last>Cybulska</last></author>
+      <author id="agata-cybulska"><first>Agata</first><last>Cybulska</last></author>
       <author><first>Piek</first><last>Vossen</last></author>
       <title>Using a sledgehammer to crack a nut? Lexical diversity and event coreference resolution</title>
       <pages>4545–4552</pages>
@@ -6930,7 +6930,7 @@
     </paper>
     <paper id="648">
       <author><first>Marc</first><last>Kupietz</last></author>
-      <author><first>Harald</first><last>Lüngen</last></author>
+      <author id="harald-lungen"><first>Harald</first><last>Lüngen</last></author>
       <title>Recent Developments in <fixed-case>D</fixed-case>e<fixed-case>R</fixed-case>e<fixed-case>K</fixed-case>o</title>
       <pages>2378–2385</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/842_Paper.pdf</url>
@@ -6938,7 +6938,7 @@
       <bibkey>kupietz-lungen-2014-recent</bibkey>
     </paper>
     <paper id="649">
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <title>Why <fixed-case>C</fixed-case>hinese Web-as-Corpus is Wacky? Or: How Big Data is Killing <fixed-case>C</fixed-case>hinese Corpus Linguistics</title>
       <pages>2386–2389</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/843_Paper.pdf</url>
@@ -6946,7 +6946,7 @@
       <bibkey>hsieh-2014-chinese</bibkey>
     </paper>
     <paper id="650">
-      <author><first>Tafseer</first><last>Ahmed Khan</last></author>
+      <author id="tafseer-ahmed"><first>Tafseer</first><last>Ahmed Khan</last></author>
       <title>Automatic acquisition of <fixed-case>U</fixed-case>rdu nouns (along with gender and irregular plurals)</title>
       <pages>2846–2850</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/844_Paper.pdf</url>
@@ -6956,7 +6956,7 @@
     <paper id="651">
       <author><first>Clare</first><last>Llewellyn</last></author>
       <author><first>Claire</first><last>Grover</last></author>
-      <author><first>Jon</first><last>Oberlander</last></author>
+      <author id="jon-oberlander"><first>Jon</first><last>Oberlander</last></author>
       <author><first>Ewan</first><last>Klein</last></author>
       <title>Re-using an Argument Corpus to Aid in the Curation of Social Media Collections</title>
       <pages>462–468</pages>
@@ -6966,7 +6966,7 @@
     </paper>
     <paper id="652">
       <author><first>Raivis</first><last>Skadiņš</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <author><first>Roberts</first><last>Rozis</last></author>
       <author><first>Daiga</first><last>Deksne</last></author>
       <title>Billions of Parallel Words for Free: Building and Using the <fixed-case>EU</fixed-case> Bookshop Corpus</title>
@@ -6977,9 +6977,9 @@
     </paper>
     <paper id="653">
       <author><first>Isa</first><last>Maks</last></author>
-      <author><first>Ruben</first><last>Izquierdo</last></author>
+      <author id="ruben-izquierdo"><first>Ruben</first><last>Izquierdo</last></author>
       <author><first>Francesca</first><last>Frontini</last></author>
-      <author><first>Rodrigo</first><last>Agerri</last></author>
+      <author id="rodrigo-agerri"><first>Rodrigo</first><last>Agerri</last></author>
       <author><first>Piek</first><last>Vossen</last></author>
       <author><first>Andoni</first><last>Azpeitia</last></author>
       <title>Generating Polarity Lexicons with <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et propagation in 5 languages</title>
@@ -6990,9 +6990,9 @@
     </paper>
     <paper id="654">
       <author><first>Mara</first><last>Chinea Rios</last></author>
-      <author><first>Germán</first><last>Sanchis-Trilles</last></author>
-      <author><first>Daniel</first><last>Ortiz-Martínez</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="german-sanchis-trilles"><first>Germán</first><last>Sanchis-Trilles</last></author>
+      <author id="daniel-ortiz-martinez"><first>Daniel</first><last>Ortiz-Martínez</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <title>Online optimisation of log-linear weights in interactive machine translation</title>
       <pages>3556–3559</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/848_Paper.pdf</url>
@@ -7014,7 +7014,7 @@
     </paper>
     <paper id="656">
       <author><first>Roman</first><last>Klinger</last></author>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <title>The <fixed-case>USAGE</fixed-case> review corpus for fine grained multi lingual opinion analysis</title>
       <pages>2211–2218</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/85_Paper.pdf</url>
@@ -7025,7 +7025,7 @@
       <author><first>Nadjet</first><last>Bouayad-Agha</last></author>
       <author><first>Alicia</first><last>Burga</last></author>
       <author><first>Gerard</first><last>Casamayor</last></author>
-      <author><first>Joan</first><last>Codina</last></author>
+      <author id="joan-codina-filba"><first>Joan</first><last>Codina</last></author>
       <author><first>Rogelio</first><last>Nazar</last></author>
       <author><first>Leo</first><last>Wanner</last></author>
       <title>An Exercise in Reuse of Resources: Adapting General Discourse Coreference Resolution for Detecting Lexical Chains in Patent Documentation</title>
@@ -7036,8 +7036,8 @@
     </paper>
     <paper id="658">
       <author><first>Bernardo</first><last>Severo</last></author>
-      <author><first>Cassia</first><last>Trojahn</last></author>
-      <author><first>Renata</first><last>Vieira</last></author>
+      <author id="cassia-trojahn"><first>Cassia</first><last>Trojahn</last></author>
+      <author id="renata-vieira"><first>Renata</first><last>Vieira</last></author>
       <title><fixed-case>VOAR</fixed-case>: A Visual and Integrated Ontology Alignment Environment</title>
       <pages>3671–3677</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/851_Paper.pdf</url>
@@ -7059,8 +7059,8 @@
     <paper id="660">
       <author><first>Chahinez</first><last>Benkoussas</last></author>
       <author><first>Hussam</first><last>Hamdan</last></author>
-      <author><first>Patrice</first><last>Bellot</last></author>
-      <author><first>Frédéric</first><last>Béchet</last></author>
+      <author id="patrice-bellot"><first>Patrice</first><last>Bellot</last></author>
+      <author id="frederic-bechet"><first>Frédéric</first><last>Béchet</last></author>
       <author><first>Elodie</first><last>Faath</last></author>
       <title>A Collection of Scholarly Book Reviews from the Platforms of electronic sources in Humanities and Social Sciences <fixed-case>O</fixed-case>pen<fixed-case>E</fixed-case>dition.org</title>
       <pages>4172–4177</pages>
@@ -7069,11 +7069,11 @@
       <bibkey>benkoussas-etal-2014-collection</bibkey>
     </paper>
     <paper id="661">
-      <author><first>Anton Karl</first><last>Ingason</last></author>
+      <author id="anton-karl-ingason"><first>Anton Karl</first><last>Ingason</last></author>
       <author><first>Hrafn</first><last>Loftsson</last></author>
-      <author><first>Eiríkur</first><last>Rögnvaldsson</last></author>
+      <author id="eirikur-rognvaldsson"><first>Eiríkur</first><last>Rögnvaldsson</last></author>
       <author><first>Einar Freyr</first><last>Sigurðsson</last></author>
-      <author><first>Joel C.</first><last>Wallenberg</last></author>
+      <author id="joel-wallenberg"><first>Joel C.</first><last>Wallenberg</last></author>
       <title>Rapid Deployment of Phrase Structure Parsing for Related Languages: A Case Study of <fixed-case>I</fixed-case>nsular <fixed-case>S</fixed-case>candinavian</title>
       <pages>91–95</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/855_Paper.pdf</url>
@@ -7106,7 +7106,7 @@
       <author><first>Michael</first><last>Stadtschnitzer</last></author>
       <author><first>Jochen</first><last>Schwenninger</last></author>
       <author><first>Daniel</first><last>Stein</last></author>
-      <author><first>Joachim</first><last>Koehler</last></author>
+      <author id="joachim-kohler"><first>Joachim</first><last>Koehler</last></author>
       <title>Exploiting the large-scale <fixed-case>G</fixed-case>erman Broadcast Corpus to boost the Fraunhofer <fixed-case>IAIS</fixed-case> Speech Recognition System</title>
       <pages>3887–3890</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/858_Paper.pdf</url>
@@ -7114,7 +7114,7 @@
       <bibkey>stadtschnitzer-etal-2014-exploiting</bibkey>
     </paper>
     <paper id="665">
-      <author><first>Natalia</first><last>Loukachevitch</last></author>
+      <author id="natalia-loukachevitch"><first>Natalia</first><last>Loukachevitch</last></author>
       <author><first>Aleksey</first><last>Alekseev</last></author>
       <title>Summarizing News Clusters on the Basis of Thematic Chains</title>
       <pages>1600–1607</pages>
@@ -7123,7 +7123,7 @@
       <bibkey>loukachevitch-alekseev-2014-summarizing</bibkey>
     </paper>
     <paper id="666">
-      <author><first>Kilian A.</first><last>Foth</last></author>
+      <author id="kilian-a-foth"><first>Kilian A.</first><last>Foth</last></author>
       <author><first>Arne</first><last>Köhn</last></author>
       <author><first>Niels</first><last>Beuck</last></author>
       <author><first>Wolfgang</first><last>Menzel</last></author>
@@ -7151,7 +7151,7 @@
       <author><first>Jorge</first><last>Gracia</last></author>
       <author><first>Elena</first><last>Montiel-Ponsoda</last></author>
       <author><first>Daniel</first><last>Vila-Suero</last></author>
-      <author><first>Guadalupe</first><last>Aguado-de-Cea</last></author>
+      <author id="guadalupe-aguado-de-cea"><first>Guadalupe</first><last>Aguado-de-Cea</last></author>
       <title>Enabling Language Resources to Expose Translations as Linked Data on the Web</title>
       <pages>409–413</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/863_Paper.pdf</url>
@@ -7178,7 +7178,7 @@
     <paper id="671">
       <author><first>Tatiana</first><last>Erekhinskaya</last></author>
       <author><first>Meghana</first><last>Satpute</last></author>
-      <author><first>Dan</first><last>Moldovan</last></author>
+      <author id="dan-moldovan"><first>Dan</first><last>Moldovan</last></author>
       <title>Multilingual e<fixed-case>X</fixed-case>tended <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Knowledge Base: Semantic Parsing and Translation of Glosses</title>
       <pages>2990–2994</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/866_Paper.pdf</url>
@@ -7196,7 +7196,7 @@
     </paper>
     <paper id="673">
       <author><first>Verónica</first><last>Pérez-Rosas</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <author><first>Alexis</first><last>Narvaez</last></author>
       <author><first>Mihai</first><last>Burzo</last></author>
       <title>A Multimodal Dataset for Deception Detection</title>
@@ -7217,9 +7217,9 @@
     </paper>
     <paper id="675">
       <author><first>Ahmed</first><last>Abdelali</last></author>
-      <author><first>Francisco</first><last>Guzman</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzman</last></author>
       <author><first>Hassan</first><last>Sajjad</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <title>The <fixed-case>AMARA</fixed-case> Corpus: Building Parallel Language Resources for the Educational Domain</title>
       <pages>1856–1862</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/877_Paper.pdf</url>
@@ -7227,10 +7227,10 @@
       <bibkey>abdelali-etal-2014-amara</bibkey>
     </paper>
     <paper id="676">
-      <author><first>Lauma</first><last>Pretkalniņa</last></author>
-      <author><first>Artūrs</first><last>Znotiņš</last></author>
+      <author id="lauma-pretkalnina"><first>Lauma</first><last>Pretkalniņa</last></author>
+      <author id="arturs-znotins"><first>Artūrs</first><last>Znotiņš</last></author>
       <author><first>Laura</first><last>Rituma</last></author>
-      <author><first>Didzis</first><last>Goško</last></author>
+      <author id="didzis-gosko"><first>Didzis</first><last>Goško</last></author>
       <title>Dependency parsing representation effects on the accuracy of semantic applications — an example of an inflective language</title>
       <pages>4074–4081</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/879_Paper.pdf</url>
@@ -7239,7 +7239,7 @@
     </paper>
     <paper id="677">
       <author><first>Guiyao</first><last>Ke</last></author>
-      <author><first>Pierre-Francois</first><last>Marteau</last></author>
+      <author id="pierre-francois-marteau"><first>Pierre-Francois</first><last>Marteau</last></author>
       <title>Co-clustering of bilingual datasets as a mean for assisting the construction of thematic bilingual comparable corpora</title>
       <pages>1992–1999</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/88_Paper.pdf</url>
@@ -7288,7 +7288,7 @@
     <paper id="682">
       <author><first>Jetske</first><last>Klatter</last></author>
       <author><first>Roeland</first><last>van Hout</last></author>
-      <author><first>Henk</first><last>van den Heuvel</last></author>
+      <author id="henk-van-den-heuvel"><first>Henk</first><last>van den Heuvel</last></author>
       <author><first>Paula</first><last>Fikkert</last></author>
       <author><first>Anne</first><last>Baker</last></author>
       <author><first>Jan</first><last>de Jong</last></author>
@@ -7335,7 +7335,7 @@
     <paper id="686">
       <author><first>Patrick</first><last>Littell</last></author>
       <author><first>Kaitlyn</first><last>Price</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
       <title>Morphological parsing of <fixed-case>S</fixed-case>wahili using crowdsourced lexical resources</title>
       <pages>3333–3339</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/896_Paper.pdf</url>
@@ -7343,7 +7343,7 @@
       <bibkey>littell-etal-2014-morphological</bibkey>
     </paper>
     <paper id="687">
-      <author><first>Eric</first><last>Charton</last></author>
+      <author id="eric-charton"><first>Eric</first><last>Charton</last></author>
       <author><first>Marie-Jean</first><last>Meurs</last></author>
       <author><first>Ludovic</first><last>Jean-Louis</last></author>
       <author><first>Michel</first><last>Gagnon</last></author>
@@ -7356,7 +7356,7 @@
     <paper id="688">
       <author><first>Victoria</first><last>Arranz</last></author>
       <author><first>Khalid</first><last>Choukri</last></author>
-      <author><first>Valérie</first><last>Mapelli</last></author>
+      <author id="valerie-mapelli"><first>Valérie</first><last>Mapelli</last></author>
       <author><first>Hélène</first><last>Mazo</last></author>
       <title><fixed-case>ELRA</fixed-case>’s Consolidated Services for the <fixed-case>HLT</fixed-case> Community</title>
       <pages>1511–1516</pages>
@@ -7366,7 +7366,7 @@
     </paper>
     <paper id="689">
       <author><first>Daisuke</first><last>Kawahara</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <title>Single Classifier Approach for Verb Sense Disambiguation based on Generalized Features</title>
       <pages>4210–4213</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/90_Paper.pdf</url>
@@ -7413,7 +7413,7 @@
       <bibkey>alfano-etal-2014-volip</bibkey>
     </paper>
     <paper id="694">
-      <author><first>Carla Parra</first><last>Escartín</last></author>
+      <author id="carla-parra-escartin"><first>Carla Parra</first><last>Escartín</last></author>
       <title>Chasing the Perfect Splitter: A Comparison of Different Compound Splitting Tools</title>
       <pages>3340–3347</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/909_Paper.pdf</url>
@@ -7462,8 +7462,8 @@
       <author><first>Jan</first><last>Mašek</last></author>
       <author><first>David</first><last>Mareček</last></author>
       <author><first>Martin</first><last>Popel</last></author>
-      <author><first>Daniel</first><last>Zeman</last></author>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
       <title><fixed-case>H</fixed-case>amle<fixed-case>DT</fixed-case> 2.0: Thirty Dependency Treebanks Stanfordized</title>
       <pages>2334–2341</pages>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/915_Paper.pdf</url>
@@ -7488,14 +7488,14 @@
     </paper>
     <paper id="702">
       <author><first>Muhammad</first><last>Abdul-Mageed</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <title><fixed-case>SANA</fixed-case>: A Large Scale Multi-Genre, Multi-Dialect Lexicon for <fixed-case>A</fixed-case>rabic Subjectivity and Sentiment Analysis</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/919_Paper.pdf</url>
       <abstract>The computational treatment of subjectivity and sentiment in natural language is usually significantly improved by applying features exploiting lexical resources where entries are tagged with semantic orientation (e.g., positive, negative values). In spite of the fair amount of work on Arabic sentiment analysis over the past few years (e.g., (Abbasi et al., 2008; Abdul-Mageed et al., 2014; Abdul-Mageed et al., 2012; Abdul-Mageed and Diab, 2012a; Abdul-Mageed and Diab, 2012b; Abdul-Mageed et al., 2011a; Abdul-Mageed and Diab, 2011)), the language remains under-resourced as to these polarity repositories compared to the English language. In this paper, we report efforts to build and present SANA, a large-scale, multi-genre, multi-dialect multi-lingual lexicon for the subjectivity and sentiment analysis of the Arabic language and dialects.</abstract>
       <bibkey>abdul-mageed-diab-2014-sana</bibkey>
     </paper>
     <paper id="703">
-      <author><first>Behrang</first><last>Zadeh</last></author>
+      <author id="behrang-qasemizadeh"><first>Behrang</first><last>Zadeh</last></author>
       <author><first>Siegfried</first><last>Handschuh</last></author>
       <title>Evaluation of Technology Term Recognition with Random Indexing</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/920_Paper.pdf</url>
@@ -7504,7 +7504,7 @@
     </paper>
     <paper id="704">
       <author><first>Stefan</first><last>Bott</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <title>Optimizing a Distributional Semantic Model for the Prediction of <fixed-case>G</fixed-case>erman Particle Verb Compositionality</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/921_Paper.pdf</url>
       <abstract>In the work presented here we assess the degree of compositionality of German Particle Verbs with a Distributional Semantics Model which only relies on word window information and has no access to syntactic information as such. Our method only takes the lexical distributional distance between the Particle Verb to its Base Verb as a predictor for compositionality. We show that the ranking of distributional similarity correlates significantly with the ranking of human judgements on semantic compositionality for a series of Particle Verbs and the Base Verbs they are derived from. We also investigate the influence of further linguistic factors, such as the ambiguity and the overall frequency of the verbs and a syntactically separate occurrences of verbs and particles that causes difficulties for the correct lemmatization of Particle Verbs. We analyse in how far these factors may influence the success with which the compositionality of the Particle Verbs may be predicted.</abstract>
@@ -7519,10 +7519,10 @@
       <bibkey>dey-fung-2014-hindi</bibkey>
     </paper>
     <paper id="706">
-      <author><first>Nancy</first><last>Ide</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
-      <author><first>Christopher</first><last>Cieri</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
+      <author id="christopher-cieri"><first>Christopher</first><last>Cieri</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <author><first>Di</first><last>Wang</last></author>
       <author><first>Keith</first><last>Suderman</last></author>
       <author><first>Marc</first><last>Verhagen</last></author>
@@ -7542,7 +7542,7 @@
       <bibkey>christodoulides-etal-2014-dismo</bibkey>
     </paper>
     <paper id="708">
-      <author><first>Trang Mai</first><last>Xuan</last></author>
+      <author id="trang-mai-xuan"><first>Trang Mai</first><last>Xuan</last></author>
       <author><first>Yohei</first><last>Murakami</last></author>
       <author><first>Donghui</first><last>Lin</last></author>
       <author><first>Toru</first><last>Ishida</last></author>
@@ -7553,7 +7553,7 @@
     </paper>
     <paper id="709">
       <author><first>Klim</first><last>Peshkov</last></author>
-      <author><first>Laurent</first><last>Prévot</last></author>
+      <author id="laurent-prevot"><first>Laurent</first><last>Prévot</last></author>
       <title>Segmentation evaluation metrics, a comparison grounded on prosodic and discourse units</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/931_Paper.pdf</url>
       <abstract>Knowledge on evaluation metrics and best practices of using them have improved fast in the recent years Fort et al. (2012). However, the advances concern mostly evaluation of classification related tasks. Segmentation tasks have received less attention. Nevertheless, there are crucial in a large number of linguistic studies. A range of metrics is available (F-score on boundaries, F-score on units, WindowDiff ((WD), Boundary Similarity (BS) but it is still relatively difficult to interpret these metrics on various linguistic segmentation tasks, such as prosodic and discourse segmentation. In this paper, we consider real segmented datasets (introduced in Peshkov et al. (2012)) as references which we deteriorate in different ways (random addition of boundaries, random removal boundaries, near-miss errors introduction). This provide us with various measures on controlled datasets and with an interesting benchmark for various linguistic segmentation tasks.</abstract>
@@ -7563,14 +7563,14 @@
       <author><first>Andrea</first><last>Abel</last></author>
       <author><first>Aivars</first><last>Glaznieks</last></author>
       <author><first>Lionel</first><last>Nicolas</last></author>
-      <author><first>Egon</first><last>Stemle</last></author>
+      <author id="egon-stemle"><first>Egon</first><last>Stemle</last></author>
       <title><fixed-case>K</fixed-case>o<fixed-case>K</fixed-case>o: an <fixed-case>L</fixed-case>1 Learner Corpus for <fixed-case>G</fixed-case>erman</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/934_Paper.pdf</url>
       <abstract>We introduce the KoKo corpus, a collection of German L1 learner texts annotated with learner errors, along with the methods and tools used in its construction and evaluation. The corpus contains both texts and corresponding survey information from 1,319 pupils and amounts to around 716,000 tokens. The evaluation of the performed transcriptions and annotations shows an accuracy of orthographic error annotations of approximately 80% as well as high accuracies of transcriptions (&gt;99%), automatic tokenisation (&gt;99%), sentence splitting (&gt;96%) and POS-tagging (&gt;94%). The KoKo corpus will be published at the end of 2014. It will be the first accessible linguistically annotated German L1 learner corpus and a valuable source for research on L1 learner language as well as for teachers of German as L1, in particular with regards to writing skills.</abstract>
       <bibkey>abel-etal-2014-koko</bibkey>
     </paper>
     <paper id="711">
-      <author><first>Petra</first><last>Barančíková</last></author>
+      <author id="petra-barancikova"><first>Petra</first><last>Barančíková</last></author>
       <author><first>Rudolf</first><last>Rosa</last></author>
       <author><first>Aleš</first><last>Tamchyna</last></author>
       <title>Improving Evaluation of <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>zech <fixed-case>MT</fixed-case> through Paraphrasing</title>
@@ -7588,11 +7588,11 @@
       <bibkey>faessler-etal-2014-disclose</bibkey>
     </paper>
     <paper id="713">
-      <author><first>Mitesh M.</first><last>Khapra</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh M.</first><last>Khapra</last></author>
       <author><first>Ananthakrishnan</first><last>Ramanathan</last></author>
       <author><first>Anoop</first><last>Kunchukuttan</last></author>
       <author><first>Karthik</first><last>Visweswariah</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <title>When Transliteration Met Crowdsourcing : An Empirical Study of Transliteration via Crowdsourcing using Efficient, Non-redundant and Fair Quality Control</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/94_Paper.pdf</url>
       <abstract>Sufficient parallel transliteration pairs are needed for training state of the art transliteration engines. Given the cost involved, it is often infeasible to collect such data using experts. Crowdsourcing could be a cheaper alternative, provided that a good quality control (QC) mechanism can be devised for this task. Most QC mechanisms employed in crowdsourcing are aggressive (unfair to workers) and expensive (unfair to requesters). In contrast, we propose a low-cost QC mechanism which is fair to both workers and requesters. At the heart of our approach, lies a rule based Transliteration Equivalence approach which takes as input a list of vowels in the two languages and a mapping of the consonants in the two languages. We empirically show that our approach outperforms other popular QC mechanisms (<i>viz.</i>, consensus and sampling) on two vital parameters : (i) fairness to requesters (lower cost per correct transliteration) and (ii) fairness to workers (lower rate of rejecting correct answers). Further, as an extrinsic evaluation we use the standard NEWS 2010 test set and show that such quality controlled crowdsourced data compares well to expert data when used for training a transliteration engine.</abstract>
@@ -7601,13 +7601,13 @@
     <paper id="714">
       <author><first>Frederik</first><last>Baumgardt</last></author>
       <author><first>Giuseppe</first><last>Celano</last></author>
-      <author><first>Gregory R.</first><last>Crane</last></author>
+      <author id="gregory-crane"><first>Gregory R.</first><last>Crane</last></author>
       <author><first>Stella</first><last>Dee</last></author>
       <author><first>Maryam</first><last>Foradi</last></author>
       <author><first>Emily</first><last>Franzini</last></author>
       <author><first>Greta</first><last>Franzini</last></author>
       <author><first>Monica</first><last>Lent</last></author>
-      <author><first>Maria</first><last>Moritz</last></author>
+      <author id="maria-berger"><first>Maria</first><last>Moritz</last></author>
       <author><first>Simona</first><last>Stoyanova</last></author>
       <title>Open Philology at the <fixed-case>U</fixed-case>niversity of <fixed-case>L</fixed-case>eipzig</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/940_Paper.pdf</url>
@@ -7624,10 +7624,10 @@
     </paper>
     <paper id="716">
       <author><first>Najeh</first><last>Hajlaoui</last></author>
-      <author><first>David</first><last>Kolovratnik</last></author>
-      <author><first>Jaakko</first><last>Väyrynen</last></author>
+      <author id="david-kolovratnik"><first>David</first><last>Kolovratnik</last></author>
+      <author id="jaakko-vayrynen"><first>Jaakko</first><last>Väyrynen</last></author>
       <author><first>Ralf</first><last>Steinberger</last></author>
-      <author><first>Daniel</first><last>Varga</last></author>
+      <author id="daniel-varga"><first>Daniel</first><last>Varga</last></author>
       <title><fixed-case>DCEP</fixed-case> -Digital Corpus of the <fixed-case>E</fixed-case>uropean Parliament</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/943_Paper.pdf</url>
       <abstract>We are presenting a new highly multilingual document-aligned parallel corpus called DCEP - Digital Corpus of the European Parliament. It consists of various document types covering a wide range of subject domains. With a total of 1.37 billion words in 23 languages (253 language pairs), gathered in the course of ten years, this is the largest single release of documents by a European Union institution. DCEP contains most of the content of the European Parliament’s official Website. It includes different document types produced between 2001 and 2012, excluding only the documents already exist in the Europarl corpus to avoid overlapping. We are presenting the typical acquisition steps of the DCEP corpus: data access, document alignment, sentence splitting, normalisation and tokenisation, and sentence alignment efforts. The sentence-level alignment is still in progress but based on some first experiments; we showed that DCEP is very useful for NLP applications, in particular for Statistical Machine Translation.</abstract>
@@ -7635,9 +7635,9 @@
     </paper>
     <paper id="717">
       <author><first>Joseph</first><last>Mariani</last></author>
-      <author><first>Christopher</first><last>Cieri</last></author>
+      <author id="christopher-cieri"><first>Christopher</first><last>Cieri</last></author>
       <author><first>Gil</first><last>Francopoulo</last></author>
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <author><first>Marine</first><last>Delaborde</last></author>
       <title>Facing the Identification Problem in Language-Related Scientific Data Analysis.</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/945_Paper.pdf</url>
@@ -7646,7 +7646,7 @@
     </paper>
     <paper id="718">
       <author><first>Mariette</first><last>Soury</last></author>
-      <author><first>Laurence</first><last>Devillers</last></author>
+      <author id="laurence-devillers"><first>Laurence</first><last>Devillers</last></author>
       <title>Smile and Laughter in Human-Machine Interaction: a study of engagement</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/947_Paper.pdf</url>
       <abstract>This article presents a corpus featuring adults playing games in interaction with machine trying to induce laugh. This corpus was collected during Interspeech 2013 in Lyon to study behavioral differences correlated to different personalities and cultures. We first present the collection protocol, then the corpus obtained and finally different quantitative and qualitative measures. Smiles and laughs are types of affect bursts which are defined as short emotional non-speech expressions. Here we correlate smile and laugh with personality traits and cultural background. Our final objective is to propose a measure of engagement deduced from those affect bursts.</abstract>
@@ -7654,8 +7654,8 @@
     </paper>
     <paper id="719">
       <author><first>Livio</first><last>Robaldo</last></author>
-      <author><first>Guido</first><last>Boella</last></author>
-      <author><first>Luigi</first><last>Di Caro</last></author>
+      <author id="guido-boella"><first>Guido</first><last>Boella</last></author>
+      <author id="luigi-di-caro"><first>Luigi</first><last>Di Caro</last></author>
       <author><first>Andrea</first><last>Violato</last></author>
       <title>Exploiting networks in Law</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/95_Paper.pdf</url>
@@ -7688,7 +7688,7 @@
     <paper id="722">
       <author><first>Thomas</first><last>Pellegrini</last></author>
       <author><first>Vahid</first><last>Hedayati</last></author>
-      <author><first>Angela</first><last>Costa</last></author>
+      <author id="angela-costa"><first>Angela</first><last>Costa</last></author>
       <title>El-<fixed-case>WOZ</fixed-case>: a client-server wizard-of-oz interface</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/959_Paper.pdf</url>
       <abstract>In this paper, we present a speech recording interface developed in the context of a project on automatic speech recognition for elderly native speakers of European Portuguese. In order to collect spontaneous speech in a situation of interaction with a machine, this interface was designed as a Wizard-of-Oz (WOZ) plateform. In this setup, users interact with a fake automated dialog system controled by a human wizard. It was implemented as a client-server application and the subjects interact with a talking head. The human wizard chooses pre-defined questions or sentences in a graphical user interface, which are then synthesized and spoken aloud by the avatar on the client side. A small spontaneous speech corpus was collected in a daily center. Eight speakers between 75 and 90 years old were recorded. They appreciated the interface and felt at ease with the avatar. Manual orthographic transcriptions were created for the total of about 45 minutes of speech.</abstract>
@@ -7697,7 +7697,7 @@
     <paper id="723">
       <author><first>Fei</first><last>Cheng</last></author>
       <author><first>Kevin</first><last>Duh</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <title>Parsing <fixed-case>C</fixed-case>hinese Synthetic Words with a Character-based Dependency Model</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/96_Paper.pdf</url>
       <abstract>Synthetic word analysis is a potentially important but relatively unexplored problem in Chinese natural language processing. Two issues with the conventional pipeline methods involving word segmentation are (1) the lack of a common segmentation standard and (2) the poor segmentation performance on OOV words. These issues may be circumvented if we adopt the view of character-based parsing, providing both internal structures to synthetic words and global structure to sentences in a seamless fashion. However, the accuracy of synthetic word parsing is not yet satisfactory, due to the lack of research. In view of this, we propose and present experiments on several synthetic word parsers. Additionally, we demonstrate the usefulness of incorporating large unlabelled corpora and a dictionary for this task. Our parsers significantly outperform the baseline (a pipeline method).</abstract>
@@ -7705,10 +7705,10 @@
     </paper>
     <paper id="724">
       <author><first>Mohamed</first><last>Ben Jannet</last></author>
-      <author><first>Martine</first><last>Adda-Decker</last></author>
+      <author id="martine-adda-decker"><first>Martine</first><last>Adda-Decker</last></author>
       <author><first>Olivier</first><last>Galibert</last></author>
       <author><first>Juliette</first><last>Kahn</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <title><fixed-case>ETER</fixed-case> : a new metric for the evaluation of hierarchical named entity recognition</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/960_Paper.pdf</url>
       <abstract>This paper addresses the question of hierarchical named entity evaluation. In particular, we focus on metrics to deal with complex named entity structures as those introduced within the QUAERO project. The intended goal is to propose a smart way of evaluating partially correctly detected complex entities, beyond the scope of traditional metrics. None of the existing metrics are fully adequate to evaluate the proposed QUAERO task involving entity detection, classification and decomposition. We are discussing the strong and weak points of the existing metrics. We then introduce a new metric, the Entity Tree Error Rate (ETER), to evaluate hierarchical and structured named entity detection, classification and decomposition. The ETER metric builds upon the commonly accepted SER metric, but it takes the complex entity structure into account by measuring errors not only at the slot (or complex entity) level but also at a basic (atomic) entity level. We are comparing our new metric to the standard one using first some examples and then a set of real data selected from the ETAPE evaluation results.</abstract>
@@ -7717,7 +7717,7 @@
     <paper id="725">
       <author><first>Jun</first><last>Araki</last></author>
       <author><first>Zhengzhong</first><last>Liu</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <author><first>Teruko</first><last>Mitamura</last></author>
       <title>Detecting Subevent Structure for Event Coreference Resolution</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/963_Paper.pdf</url>
@@ -7734,14 +7734,14 @@
       <bibkey>shah-etal-2014-efficient</bibkey>
     </paper>
     <paper id="727">
-      <author><first>Alexandra</first><last>Balahur</last></author>
+      <author id="alexandra-balahur"><first>Alexandra</first><last>Balahur</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <author><first>Ralf</first><last>Steinberger</last></author>
-      <author><first>Jose-Manuel</first><last>Perea-Ortega</last></author>
+      <author id="jose-manuel-perea-ortega"><first>Jose-Manuel</first><last>Perea-Ortega</last></author>
       <author><first>Guillaume</first><last>Jacquet</last></author>
       <author><first>Dilek</first><last>Küçük</last></author>
       <author><first>Vanni</first><last>Zavarella</last></author>
-      <author><first>Adil</first><last>El Ghali</last></author>
+      <author id="adil-el-ghali"><first>Adil</first><last>El Ghali</last></author>
       <title>Resource Creation and Evaluation for Multilingual Sentiment Analysis in Social Media Texts</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/965_Paper.pdf</url>
       <abstract>This paper presents an evaluation of the use of machine translation to obtain and employ data for training multilingual sentiment classifiers. We show that the use of machine translated data obtained similar results as the use of native-speaker translations of the same data. Additionally, our evaluations pinpoint to the fact that the use of multilingual data, including that obtained through machine translation, leads to improved results in sentiment classification. Finally, we show that the performance of the sentiment classifiers built on machine translated data can be improved using original data from the target language and that even a small amount of such texts can lead to significant growth in the classification performance.</abstract>
@@ -7765,14 +7765,14 @@
     </paper>
     <paper id="730">
       <author><first>Renlong</first><last>Ai</last></author>
-      <author><first>Marcela</first><last>Charfuelan</last></author>
+      <author id="marcela-charfuelan"><first>Marcela</first><last>Charfuelan</last></author>
       <title><fixed-case>MAT</fixed-case>: a tool for <fixed-case>L</fixed-case>2 pronunciation errors annotation</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/971_Paper.pdf</url>
       <abstract>In the area of Computer Assisted Language Learning(CALL), second language (L2) learners spoken data is an important resource for analysing and annotating typical L2 pronunciation errors. The annotation of L2 pronunciation errors in spoken data is not an easy task though, normally it requires manual annotation from trained linguists or phoneticians. In order to facilitate this task, in this paper, we present the MAT tool, a web-based tool intended to facilitate the annotation of L2 learners’ pronunciation errors at various levels. The tool has been designed taking into account recent studies on error detection in pronunciation training. It also aims at providing an easy and fast annotation process via a comprehensive and friendly user interface. The tool is based on the MARY TTS open source platform, from which it uses the components: text analyser (tokeniser, syllabifier, phonemiser), phonetic aligner and speech signal processor. Annotation results at sentence, word, syllable and phoneme levels are stored in XML format. The tool is currently under evaluation with a L2 learners spoken corpus recorded in the SPRINTER (Language Technology for Interactive, Multi-Media Online Language Learning) project.</abstract>
       <bibkey>ai-charfuelan-2014-mat</bibkey>
     </paper>
     <paper id="731">
-      <author><first>Kalliopi</first><last>Zervanou</last></author>
+      <author id="kalliopi-zervanou"><first>Kalliopi</first><last>Zervanou</last></author>
       <author><first>Elias</first><last>Iosif</last></author>
       <author><first>Alexandros</first><last>Potamianos</last></author>
       <title>Word Semantic Similarity for Morphologically Rich Languages</title>
@@ -7784,15 +7784,15 @@
       <author><first>Joshua</first><last>Elliot</last></author>
       <author><first>Logan</first><last>Kearsley</last></author>
       <author><first>Jason</first><last>Housley</last></author>
-      <author><first>Alan</first><last>Melby</last></author>
+      <author id="alan-k-melby"><first>Alan</first><last>Melby</last></author>
       <title><fixed-case>L</fixed-case>ex<fixed-case>T</fixed-case>erm Manager: Design for an Integrated Lexicography and Terminology System</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/975_Paper.pdf</url>
       <abstract>We present a design for a multi-modal database system for lexical information that can be accessed in either lexicographical or terminological views. The use of a single merged data model makes it easy to transfer common information between termbases and dictionaries, thus facilitating information sharing and re-use. Our combined model is based on the LMF and TMF metamodels for lexicographical and terminological databases and is compatible with both, thus allowing for the import of information from existing dictionaries and termbases, which may be transferred to the complementary view and re-exported. We also present a new Linguistic Configuration Model, analogous to a TBX XCS file, which can be used to specify multiple language-specific schemata for validating and understanding lexical information in a single database. Linguistic configurations are mutable and can be refined and evolved over time as understanding of documentary needs improves. The system is designed with a client-server architecture using the HTTP protocol, allowing for the independent implementation of multiple clients for specific use cases and easy deployment over the web.</abstract>
       <bibkey>elliot-etal-2014-lexterm</bibkey>
     </paper>
     <paper id="733">
-      <author><first>Daniel</first><last>Peterson</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="daniel-peterson"><first>Daniel</first><last>Peterson</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Shumin</first><last>Wu</last></author>
       <title>Focusing Annotation for Semantic Role Labeling</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/977_Paper.pdf</url>
@@ -7810,16 +7810,16 @@
       <bibkey>lapponi-etal-2014-road</bibkey>
     </paper>
     <paper id="735">
-      <author><first>Penny</first><last>Labropoulou</last></author>
-      <author><first>Christopher</first><last>Cieri</last></author>
-      <author><first>Maria</first><last>Gavrilidou</last></author>
+      <author id="penny-labropoulou"><first>Penny</first><last>Labropoulou</last></author>
+      <author id="christopher-cieri"><first>Christopher</first><last>Cieri</last></author>
+      <author id="maria-gavrilidou"><first>Maria</first><last>Gavrilidou</last></author>
       <title>Developing a Framework for Describing Relations among Language Resources</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/979_Paper.pdf</url>
       <abstract>In this paper, we study relations holding between language resources as implemented in activities concerned with their documentation. We envision the term language resources with an inclusive definition covering datasets (corpora, lexica, ontologies, grammars, etc.), tools (including web services, workflows, platforms etc.), related publications and documentation, specifications and guidelines. However, the scope of the paper is limited to relations holding for datasets and tools. The study fosuses on the META-SHARE infrastructure and the Linguistic Data Consortium and takes into account the ISOcat DCR relations. Based on this study, we propose a taxonomy of relations, discuss their semantics and provide specifications for their use in order to cater for semantic interoperability. Issues of granularity, redundancy in codification, naming conventions and semantics of the relations are presented.</abstract>
       <bibkey>labropoulou-etal-2014-developing</bibkey>
     </paper>
     <paper id="736">
-      <author><first>Clément</first><last>de Groc</last></author>
+      <author id="clement-de-groc"><first>Clément</first><last>de Groc</last></author>
       <author><first>Xavier</first><last>Tannier</last></author>
       <title>Evaluating Web-as-corpus Topical Document Retrieval with an Index of the <fixed-case>O</fixed-case>pen<fixed-case>D</fixed-case>irectory</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/980_Paper.pdf</url>
@@ -7828,8 +7828,8 @@
     </paper>
     <paper id="737">
       <author><first>Santanu</first><last>Pal</last></author>
-      <author><first>Sudip Kumar</first><last>Naskar</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <title>Word Alignment-Based Reordering of Source Chunks in <fixed-case>PB</fixed-case>-<fixed-case>SMT</fixed-case></title>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/982_Paper.pdf</url>
       <abstract>Reordering poses a big challenge in statistical machine translation between distant language pairs. The paper presents how reordering between distant language pairs can be handled efficiently in phrase-based statistical machine translation. The problem of reordering between distant languages has been approached with prior reordering of the source text at chunk level to simulate the target language ordering. Prior reordering of the source chunks is performed in the present work by following the target word order suggested by word alignment. The testset is reordered using monolingual MT trained on source and reordered source. This approach of prior reordering of the source chunks was compared with pre-ordering of source words based on word alignments and the traditional approach of prior source reordering based on language-pair specific reordering rules. The effects of these reordering approaches were studied on an English–Bengali translation task, a language pair with different word order. From the experimental results it was found that word alignment based reordering of the source chunks is more effective than the other reordering approaches, and it produces statistically significant improvements over the baseline system on BLEU. On manual inspection we found significant improvements in terms of word alignments.</abstract>
@@ -7855,8 +7855,8 @@
       <bibkey>yates-etal-2014-framework</bibkey>
     </paper>
     <paper id="740">
-      <author><first>Zdeňka</first><last>Urešová</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
+      <author id="zdenka-uresova"><first>Zdeňka</first><last>Urešová</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
       <author><first>Pavel</first><last>Pecina</last></author>
       <author><first>Ondřej</first><last>Dušek</last></author>
       <title>Multilingual Test Sets for Machine Translation of Search Queries for Cross-Lingual Information Retrieval in the Medical Domain</title>
@@ -7875,9 +7875,9 @@
       <bibkey>ngonga-ngomo-etal-2014-tool</bibkey>
     </paper>
     <paper id="742">
-      <author><first>Clément</first><last>de Groc</last></author>
+      <author id="clement-de-groc"><first>Clément</first><last>de Groc</last></author>
       <author><first>Xavier</first><last>Tannier</last></author>
-      <author><first>Claude</first><last>de Loupy</last></author>
+      <author id="claude-de-loupy"><first>Claude</first><last>de Loupy</last></author>
       <title>Thematic Cohesion: measuring terms discriminatory power toward themes</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/991_Paper.pdf</url>
       <abstract>We present a new measure of thematic cohesion. This measure associates each term with a weight representing its discriminatory power toward a theme, this theme being itself expressed by a list of terms (a thematic lexicon). This thematic cohesion criterion can be used in many applications, such as query expansion, computer-assisted translation, or iterative construction of domain-specific lexicons and corpora. The measure is computed in two steps. First, a set of documents related to the terms is gathered from the Web by querying a Web search engine. Then, we produce an oriented co-occurrence graph, where vertices are the terms and edges represent the fact that two terms co-occur in a document. This graph can be interpreted as a recommendation graph, where two terms occurring in a same document means that they recommend each other. This leads to using a random walk algorithm that assigns a global importance value to each vertex of the graph. After observing the impact of various parameters on those importance values, we evaluate their correlation with retrieval effectiveness.</abstract>
@@ -7885,7 +7885,7 @@
     </paper>
     <paper id="743">
       <author><first>Tatiana</first><last>Gornostay</last></author>
-      <author><first>Andrejs</first><last>Vasiļjevs</last></author>
+      <author id="andrejs-vasiljevs"><first>Andrejs</first><last>Vasiļjevs</last></author>
       <title>Terminology Resources and Terminology Work Benefit from Cloud Services</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/992_Paper.pdf</url>
       <abstract>This paper presents the concept of the innovative platform TaaS Terminology as a Service. TaaS brings the benefits of cloud services to the user, in order to foster the creation of terminology resources and to maintain their up-to-datedness by integrating automated data extraction and user-supported clean-up of raw terminological data and sharing user-validated terminology. The platform is based on cutting-edge technologies, provides single-access-point terminology services, and facilitates the establishment of emerging trends beyond conventional praxis and static models in terminology work. A cloud-based, user-oriented, collaborative, portable, interoperable, and multilingual platform offers such terminology services as terminology project creation and sharing, data collection for translation lookup, user document upload and management, terminology extraction customisation and execution, raw terminological data management, validated terminological data export and reuse, and other terminology services.</abstract>
@@ -7893,7 +7893,7 @@
     </paper>
     <paper id="744">
       <author><first>Munshi</first><last>Asadullah</last></author>
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <author><first>Anne</first><last>Vilnat</last></author>
       <title>Bidirectionnal converter between syntactic annotations : from <fixed-case>F</fixed-case>rench Treebank Dependencies to <fixed-case>PASSAGE</fixed-case> annotations, and back</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/995_Paper.pdf</url>
@@ -7902,7 +7902,7 @@
     </paper>
     <paper id="745">
       <author><first>Marcos</first><last>Zampieri</last></author>
-      <author><first>Binyam</first><last>Gebre</last></author>
+      <author id="binyam-gebrekidan-gebre"><first>Binyam</first><last>Gebre</last></author>
       <title><fixed-case>V</fixed-case>ar<fixed-case>C</fixed-case>lass: An Open-source Language Identification Tool for Language Varieties</title>
       <url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/996_Paper.pdf</url>
       <abstract>This paper presents VarClass, an open-source tool for language identification available both to be downloaded as well as through a graphical user-friendly interface. The main difference of VarClass in comparison to other state-of-the-art language identification tools is its focus on language varieties. General purpose language identification tools do not take language varieties into account and our work aims to fill this gap. VarClass currently contains language models for over 27 languages in which 10 of them are language varieties. We report an average performance of over 90.5% accuracy in a challenging dataset. More language models will be included in the upcoming months.</abstract>
diff --git a/data/xml/L16.xml b/data/xml/L16.xml
index e1d534b6c7..4ae22ab719 100644
--- a/data/xml/L16.xml
+++ b/data/xml/L16.xml
@@ -3,17 +3,17 @@
   <volume id="1" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Tenth International Conference on Language Resources and Evaluation (<fixed-case>LREC</fixed-case>'16)</booktitle>
-      <editor><first>Nicoletta</first><last>Calzolari</last></editor>
-      <editor><first>Khalid</first><last>Choukri</last></editor>
+      <editor id="nicoletta-calzolari"><first>Nicoletta</first><last>Calzolari</last></editor>
+      <editor id="khalid-choukri"><first>Khalid</first><last>Choukri</last></editor>
       <editor><first>Thierry</first><last>Declerck</last></editor>
       <editor><first>Sara</first><last>Goggi</last></editor>
       <editor><first>Marko</first><last>Grobelnik</last></editor>
-      <editor><first>Bente</first><last>Maegaard</last></editor>
-      <editor><first>Joseph</first><last>Mariani</last></editor>
+      <editor id="bente-maegaard"><first>Bente</first><last>Maegaard</last></editor>
+      <editor id="joseph-mariani"><first>Joseph</first><last>Mariani</last></editor>
       <editor><first>Helene</first><last>Mazo</last></editor>
-      <editor><first>Asuncion</first><last>Moreno</last></editor>
-      <editor><first>Jan</first><last>Odijk</last></editor>
-      <editor><first>Stelios</first><last>Piperidis</last></editor>
+      <editor id="asuncion-moreno"><first>Asuncion</first><last>Moreno</last></editor>
+      <editor id="jan-odijk"><first>Jan</first><last>Odijk</last></editor>
+      <editor id="stelios-piperidis"><first>Stelios</first><last>Piperidis</last></editor>
       <publisher>European Language Resources Association (ELRA)</publisher>
       <address>Portorož, Slovenia</address>
       <month>May</month>
@@ -25,9 +25,9 @@
     </frontmatter>
     <paper id="1">
       <title>Evaluating Machine Translation in a Usage Scenario</title>
-      <author><first>Rosa</first><last>Gaudio</last></author>
+      <author id="rosa-del-gaudio"><first>Rosa</first><last>Gaudio</last></author>
       <author><first>Aljoscha</first><last>Burchardt</last></author>
-      <author><first>António</first><last>Branco</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
       <pages>1–8</pages>
       <abstract>In this document we report on a user-scenario-based evaluation aiming at assessing the performance of machine translation (MT) systems in a real context of use. We describe a sequel of experiments that has been performed to estimate the usefulness of MT and to test if improvements of MT technology lead to better performance in the usage scenario. One goal is to find the best methodology for evaluating the eventual benefit of a machine translation system in an application. The evaluation is based on the QTLeap corpus, a novel multilingual language resource that was collected through a real-life support service via chat. It is composed of naturally occurring utterances produced by users while interacting with a human technician providing answers. The corpus is available in eight different languages: Basque, Bulgarian, Czech, Dutch, English, German, Portuguese and Spanish.</abstract>
       <url hash="8298d51f">L16-1001</url>
@@ -46,8 +46,8 @@
     <paper id="3">
       <title>Enhancing Access to Online Education: Quality Machine Translation of <fixed-case>MOOC</fixed-case> Content</title>
       <author><first>Valia</first><last>Kordoni</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
-      <author><first>Katia Lida</first><last>Kermanidis</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
+      <author id="katia-lida-kermanidis"><first>Katia Lida</first><last>Kermanidis</last></author>
       <author><first>Vilelmini</first><last>Sosoni</last></author>
       <author><first>Kostadin</first><last>Cholakov</last></author>
       <author><first>Iris</first><last>Hendrickx</last></author>
@@ -68,8 +68,8 @@
     </paper>
     <paper id="5">
       <title><fixed-case>PE</fixed-case>2rr Corpus: Manual Error Annotation of Automatically Pre-annotated <fixed-case>MT</fixed-case> Post-edits</title>
-      <author><first>Maja</first><last>Popović</last></author>
-      <author><first>Mihael</first><last>Arčan</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
+      <author id="mihael-arcan"><first>Mihael</first><last>Arčan</last></author>
       <pages>27–32</pages>
       <abstract>We present a freely available corpus containing source language texts from different domains along with their automatically generated translations into several distinct morphologically rich languages, their post-edited versions, and error annotations of the performed post-edit operations. We believe that the corpus will be useful for many different applications. The main advantage of the approach used for creation of the corpus is the fusion of post-editing and error classification tasks, which have usually been seen as two independent tasks, although naturally they are not. We also show benefits of coupling automatic and manual error classification which facilitates the complex manual error annotation task as well as the development of automatic error classification tools. In addition, the approach facilitates annotation of language pair related issues.</abstract>
       <url hash="b8e8e989">L16-1005</url>
@@ -77,7 +77,7 @@
     </paper>
     <paper id="6">
       <title>Sentiment Lexicons for <fixed-case>A</fixed-case>rabic Social Media</title>
-      <author><first>Saif</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
       <author><first>Mohammad</first><last>Salameh</last></author>
       <author><first>Svetlana</first><last>Kiritchenko</last></author>
       <pages>33–37</pages>
@@ -89,7 +89,7 @@
       <title>A Language Independent Method for Generating Large Scale Polarity Lexicons</title>
       <author><first>Giuseppe</first><last>Castellucci</last></author>
       <author><first>Danilo</first><last>Croce</last></author>
-      <author><first>Roberto</first><last>Basili</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
       <pages>38–45</pages>
       <abstract>Sentiment Analysis systems aims at detecting opinions and sentiments that are expressed in texts. Many approaches in literature are based on resources that model the prior polarity of words or multi-word expressions, i.e. a polarity lexicon. Such resources are defined by teams of annotators, i.e. a manual annotation is provided to associate emotional or sentiment facets to the lexicon entries. The development of such lexicons is an expensive and language dependent process, making them often not covering all the linguistic sentiment phenomena. Moreover, once a lexicon is defined it can hardly be adopted in a different language or even a different domain. In this paper, we present several Distributional Polarity Lexicons (DPLs), i.e. large-scale polarity lexicons acquired with an unsupervised methodology based on Distributional Models of Lexical Semantics. Given a set of heuristically annotated sentences from Twitter, we transfer the sentiment information from sentences to words. The approach is mostly unsupervised, and experimental evaluations on Sentiment Analysis tasks in two languages show the benefits of the generated resources. The generated DPLs are publicly available in English and Italian.</abstract>
       <url hash="e21daf55">L16-1007</url>
@@ -110,7 +110,7 @@
       <title>A Comparison of Domain-based Word Polarity Estimation using different Word Embeddings</title>
       <author><first>Aitor</first><last>García Pablos</last></author>
       <author><first>Montse</first><last>Cuadros</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <pages>54–60</pages>
       <abstract>A key point in Sentiment Analysis is to determine the polarity of the sentiment implied by a certain word or expression. In basic Sentiment Analysis systems this sentiment polarity of the words is accounted and weighted in different ways to provide a degree of positivity/negativity. Currently words are also modelled as continuous dense vectors, known as word embeddings, which seem to encode interesting semantic knowledge. With regard to Sentiment Analysis, word embeddings are used as features to more complex supervised classification systems to obtain sentiment classifiers. In this paper we compare a set of existing sentiment lexicons and sentiment lexicon generation techniques. We also show a simple but effective technique to calculate a word polarity value for each word in a domain using existing continuous word embeddings generation methods. Further, we also show that word embeddings calculated on in-domain corpus capture the polarity better than the ones calculated on general-domain corpus.</abstract>
       <url hash="c3a2f087">L16-1009</url>
@@ -162,7 +162,7 @@
       <author><first>Alessia</first><last>Barbagli</last></author>
       <author><first>Pietro</first><last>Lucisano</last></author>
       <author><first>Felice</first><last>Dell’Orletta</last></author>
-      <author><first>Simonetta</first><last>Montemagni</last></author>
+      <author id="simonetta-montemagni"><first>Simonetta</first><last>Montemagni</last></author>
       <author><first>Giulia</first><last>Venturi</last></author>
       <pages>88–95</pages>
       <abstract>In this paper, we present the CItA corpus (Corpus Italiano di Apprendenti L1), a collection of essays written by Italian L1 learners collected during the first and second year of lower secondary school. The corpus was built in the framework of an interdisciplinary study jointly carried out by computational linguistics and experimental pedagogists and aimed at tracking the development of written language competence over the years and students’ background information.</abstract>
@@ -173,8 +173,8 @@
       <title>If You <fixed-case>E</fixed-case>ven Don’t Have a Bit of <fixed-case>B</fixed-case>ible: Learning Delexicalized <fixed-case>POS</fixed-case> Taggers</title>
       <author><first>Zhiwei</first><last>Yu</last></author>
       <author><first>David</first><last>Mareček</last></author>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <pages>96–103</pages>
       <abstract>Part-of-speech (POS) induction is one of the most popular tasks in research on unsupervised NLP. Various unsupervised and semi-supervised methods have been proposed to tag an unseen language. However, many of them require some partial understanding of the target language because they rely on dictionaries or parallel corpora such as the Bible. In this paper, we propose a different method named delexicalized tagging, for which we only need a raw corpus of the target language. We transfer tagging models trained on annotated corpora of one or more resource-rich languages. We employ language-independent features such as word length, frequency, neighborhood entropy, character classes (alphabetic vs. numeric vs. punctuation) etc. We demonstrate that such features can, to certain extent, serve as predictors of the part of speech, represented by the universal POS tag.</abstract>
       <url hash="fc161c1a">L16-1015</url>
@@ -208,7 +208,7 @@
     <paper id="18">
       <title>Towards a Multi-dimensional Taxonomy of Stories in Dialogue</title>
       <author><first>Kathryn J.</first><last>Collins</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <pages>118–124</pages>
       <abstract>In this paper, we present a taxonomy of stories told in dialogue. We based our scheme on prior work analyzing narrative structure and method of telling, relation to storyteller identity, as well as some categories particular to dialogue, such as how the story gets introduced. Our taxonomy currently has 5 major dimensions, with most having sub-dimensions - each dimension has an associated set of dimension-specific labels. We adapted an annotation tool for this taxonomy and have annotated portions of two different dialogue corpora, Switchboard and the Distress Analysis Interview Corpus. We present examples of some of the tags and concepts with stories from Switchboard, and some initial statistics of frequencies of the tags.</abstract>
       <url hash="f3e52a17">L16-1018</url>
@@ -216,12 +216,12 @@
     </paper>
     <paper id="19">
       <title><fixed-case>P</fixed-case>ento<fixed-case>R</fixed-case>ef: A Corpus of Spoken References in Task-oriented Dialogues</title>
-      <author><first>Sina</first><last>Zarrieß</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
       <author><first>Julian</first><last>Hough</last></author>
-      <author><first>Casey</first><last>Kennington</last></author>
-      <author><first>Ramesh</first><last>Manuvinakurike</last></author>
+      <author id="casey-kennington"><first>Casey</first><last>Kennington</last></author>
+      <author id="ramesh-manuvinakurike"><first>Ramesh</first><last>Manuvinakurike</last></author>
       <author><first>David</first><last>DeVault</last></author>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <author><first>David</first><last>Schlangen</last></author>
       <pages>125–131</pages>
       <abstract>PentoRef is a corpus of task-oriented dialogues collected in systematically manipulated settings. The corpus is multilingual, with English and German sections, and overall comprises more than 20000 utterances. The dialogues are fully transcribed and annotated with referring expressions mapped to objects in corresponding visual scenes, which makes the corpus a rich resource for research on spoken referring expressions in generation and resolution. The corpus includes several sub-corpora that correspond to different dialogue situations where parameters related to interactivity, visual access, and verbal channel have been manipulated in systematic ways. The corpus thus lends itself to very targeted studies of reference in spontaneous dialogue.</abstract>
@@ -231,7 +231,7 @@
     <paper id="20">
       <title>Transfer of Corpus-Specific Dialogue Act Annotation to <fixed-case>ISO</fixed-case> Standard: Is it worth it?</title>
       <author><first>Shammur Absar</first><last>Chowdhury</last></author>
-      <author><first>Evgeny</first><last>Stepanov</last></author>
+      <author id="evgeny-stepanov"><first>Evgeny</first><last>Stepanov</last></author>
       <author><first>Giuseppe</first><last>Riccardi</last></author>
       <pages>132–135</pages>
       <abstract>Spoken conversation corpora often adapt existing Dialogue Act (DA) annotation specifications, such as DAMSL, DIT++, etc., to task specific needs, yielding incompatible annotations; thus, limiting corpora re-usability. Recently accepted ISO standard for DA annotation – Dialogue Act Markup Language (DiAML) – is designed as domain and application independent. Moreover, the clear separation of dialogue dimensions and communicative functions, coupled with the hierarchical organization of the latter, allows for classification at different levels of granularity. However, re-annotating existing corpora with the new scheme might require significant effort. In this paper we test the utility of the ISO standard through comparative evaluation of the corpus-specific legacy and the semi-automatically transferred DiAML DA annotations on supervised dialogue act classification task. To test the domain independence of the resulting annotations, we perform cross-domain and data aggregation evaluation. Compared to the legacy annotation scheme, on the Italian LUNA Human-Human corpus, the DiAML annotation scheme exhibits better cross-domain and data aggregation classification performance, while maintaining comparable in-domain performance.</abstract>
@@ -241,7 +241,7 @@
     <paper id="21">
       <title><fixed-case>W</fixed-case>iki<fixed-case>C</fixed-case>oref: An <fixed-case>E</fixed-case>nglish Coreference-annotated Corpus of <fixed-case>W</fixed-case>ikipedia Articles</title>
       <author><first>Abbas</first><last>Ghaddar</last></author>
-      <author><first>Phillippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Phillippe</first><last>Langlais</last></author>
       <pages>136–142</pages>
       <abstract>This paper presents WikiCoref, an English corpus annotated for anaphoric relations, where all documents are from the English version of Wikipedia. Our annotation scheme follows the one of OntoNotes with a few disparities. We annotated each markable with coreference type, mention type and the equivalent Freebase topic. Since most similar annotation efforts concentrate on very specific types of written text, mainly newswire, there is a lack of resources for otherwise over-used Wikipedia texts. The corpus described in this paper addresses this issue. We present a freely available resource we initially devised for improving coreference resolution algorithms dedicated to Wikipedia texts. Our corpus has no restriction on the topics of the documents being annotated, and documents of various sizes have been considered for annotation.</abstract>
       <url hash="8a897e0e">L16-1021</url>
@@ -257,9 +257,9 @@
     </paper>
     <paper id="23">
       <title>Adapting an Entity Centric Model for <fixed-case>P</fixed-case>ortuguese Coreference Resolution</title>
-      <author><first>Evandro</first><last>Fonseca</last></author>
-      <author><first>Renata</first><last>Vieira</last></author>
-      <author><first>Aline</first><last>Vanin</last></author>
+      <author id="evandro-b-fonseca"><first>Evandro</first><last>Fonseca</last></author>
+      <author id="renata-vieira"><first>Renata</first><last>Vieira</last></author>
+      <author id="aline-a-vanin"><first>Aline</first><last>Vanin</last></author>
       <pages>150–154</pages>
       <abstract>This paper presents the adaptation of an Entity Centric Model for Portuguese coreference resolution, considering 10 named entity categories. The model was evaluated on named e using the HAREM Portuguese corpus and the results are 81.0% of precision and 58.3% of recall overall, the resulting system is freely available</abstract>
       <url hash="363cb8f5">L16-1023</url>
@@ -267,7 +267,7 @@
     </paper>
     <paper id="24">
       <title><fixed-case>IMS</fixed-case> <fixed-case>H</fixed-case>ot<fixed-case>C</fixed-case>oref <fixed-case>DE</fixed-case>: A Data-driven Co-reference Resolver for <fixed-case>G</fixed-case>erman</title>
-      <author><first>Ina</first><last>Roesiger</last></author>
+      <author id="ina-roesiger"><first>Ina</first><last>Roesiger</last></author>
       <author><first>Jonas</first><last>Kuhn</last></author>
       <pages>155–160</pages>
       <abstract>This paper presents a data-driven co-reference resolution system for German that has been adapted from IMS HotCoref, a co-reference resolver for English. It describes the difficulties when resolving co-reference in German text, the adaptation process and the features designed to address linguistic challenges brought forth by German. We report performance on the reference dataset TüBa-D/Z and include a post-task SemEval 2010 evaluation, showing that the resolver achieves state-of-the-art performance. We also include ablation experiments that indicate that integrating linguistic features increases results. The paper also describes the steps and the format necessary to use the resolver on new texts. The tool is freely available for download.</abstract>
@@ -278,7 +278,7 @@
       <title>Coreference Annotation Scheme and Relation Types for <fixed-case>H</fixed-case>indi</title>
       <author><first>Vandan</first><last>Mujadia</last></author>
       <author><first>Palash</first><last>Gupta</last></author>
-      <author><first>Dipti Misra</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></author>
       <pages>161–168</pages>
       <abstract>This paper describes a coreference annotation scheme, coreference annotation specific issues and their solutions through our proposed annotation scheme for Hindi. We introduce different co-reference relation types between continuous mentions of the same coreference chain such as “Part-of”, “Function-value pair” etc. We used Jaccard similarity based Krippendorff‘s’ alpha to demonstrate consistency in annotation scheme, annotation and corpora. To ease the coreference annotation process, we built a semi-automatic Coreference Annotation Tool (CAT). We also provide statistics of coreference annotation on Hindi Dependency Treebank (HDTB).</abstract>
       <url hash="a3e38e0c">L16-1025</url>
@@ -299,8 +299,8 @@
     <paper id="27">
       <title>Sieve-based Coreference Resolution in the Biomedical Domain</title>
       <author><first>Dane</first><last>Bell</last></author>
-      <author><first>Gus</first><last>Hahn-Powell</last></author>
-      <author><first>Marco A.</first><last>Valenzuela-Escárcega</last></author>
+      <author id="gus-hahn-powell"><first>Gus</first><last>Hahn-Powell</last></author>
+      <author id="marco-a-valenzuela-escarcega"><first>Marco A.</first><last>Valenzuela-Escárcega</last></author>
       <author><first>Mihai</first><last>Surdeanu</last></author>
       <pages>177–183</pages>
       <abstract>We describe challenges and advantages unique to coreference resolution in the biomedical domain, and a sieve-based architecture that leverages domain knowledge for both entity and event coreference resolution. Domain-general coreference resolution algorithms perform poorly on biomedical documents, because the cues they rely on such as gender are largely absent in this domain, and because they do not encode domain-specific knowledge such as the number and type of participants required in chemical reactions. Moreover, it is difficult to directly encode this knowledge into most coreference resolution algorithms because they are not rule-based. Our rule-based architecture uses sequentially applied hand-designed “sieves”, with the output of each sieve informing and constraining subsequent sieves. This architecture provides a 3.2% increase in throughput to our Reach event extraction system with precision parallel to that of the stricter system that relies solely on syntactic patterns for extraction.</abstract>
@@ -322,7 +322,7 @@
     <paper id="29">
       <title>Error Typology and Remediation Strategies for Requirements Written in <fixed-case>E</fixed-case>nglish by Non-Native Speakers</title>
       <author><first>Marie</first><last>Garnier</last></author>
-      <author><first>Patrick</first><last>Saint-Dizier</last></author>
+      <author id="patrick-saint-dizier"><first>Patrick</first><last>Saint-Dizier</last></author>
       <pages>190–197</pages>
       <abstract>In most international industries, English is the main language of communication for technical documents. These documents are designed to be as unambiguous as possible for their users. For international industries based in non-English speaking countries, the professionals in charge of writing requirements are often non-native speakers of English, who rarely receive adequate training in the use of English for this task. As a result, requirements can contain a relatively large diversity of lexical and grammatical errors, which are not eliminated by the use of guidelines from controlled languages. This article investigates the distribution of errors in a corpus of requirements written in English by native speakers of French. Errors are defined on the basis of grammaticality and acceptability principles, and classified using comparable categories. Results show a high proportion of errors in the Noun Phrase, notably through modifier stacking, and errors consistent with simplification strategies. Comparisons with similar corpora in other genres reveal the specificity of the distribution of errors in requirements. This research also introduces possible applied uses, in the form of strategies for the automatic detection of errors, and in-person training provided by certification boards in requirements authoring.</abstract>
       <url hash="b510dec1">L16-1029</url>
@@ -388,7 +388,7 @@
       <author><first>Anaïs</first><last>Tack</last></author>
       <author><first>Thomas</first><last>François</last></author>
       <author><first>Anne-Laure</first><last>Ligozat</last></author>
-      <author><first>Cédrick</first><last>Fairon</last></author>
+      <author id="cedrick-fairon"><first>Cédrick</first><last>Fairon</last></author>
       <pages>230–236</pages>
       <abstract>This study examines two possibilities of using the FLELex graded lexicon for the automated assessment of text complexity in French as a foreign language learning. From the lexical frequency distributions described in FLELex, we derive a single level of difficulty for each word in a parallel corpus of original and simplified texts. We then use this data to automatically address the lexical complexity of texts in two ways. On the one hand, we evaluate the degree of lexical simplification in manually simplified texts with respect to their original version. Our results show a significant simplification effect, both in the case of French narratives simplified for non-native readers and in the case of simplified Wikipedia texts. On the other hand, we define a predictive model which identifies the number of words in a text that are expected to be known at a particular learning level. We assess the accuracy with which these predictions are able to capture actual word knowledge as reported by Dutch-speaking learners of French. Our study shows that although the predictions seem relatively accurate in general (87.4% to 92.3%), they do not yet seem to cover the learners’ lack of knowledge very well.</abstract>
       <url hash="d668464d">L16-1035</url>
@@ -398,9 +398,9 @@
       <title>A Shared Task for Spoken <fixed-case>CALL</fixed-case>?</title>
       <author><first>Claudia</first><last>Baur</last></author>
       <author><first>Johanna</first><last>Gerlach</last></author>
-      <author><first>Manny</first><last>Rayner</last></author>
-      <author><first>Martin</first><last>Russell</last></author>
-      <author><first>Helmer</first><last>Strik</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
+      <author id="martin-russell"><first>Martin</first><last>Russell</last></author>
+      <author id="helmer-strik"><first>Helmer</first><last>Strik</last></author>
       <pages>237–244</pages>
       <abstract>We argue that the field of spoken CALL needs a shared task in order to facilitate comparisons between different groups and methodologies, and describe a concrete example of such a task, based on data collected from a speech-enabled online tool which has been used to help young Swiss German teens practise skills in English conversation. Items are prompt-response pairs, where the prompt is a piece of German text and the response is a recorded English audio file. The task is to label pairs as “accept” or “reject”, accepting responses which are grammatically and linguistically correct to match a set of hidden gold standard answers as closely as possible. Initial resources are provided so that a scratch system can be constructed with a minimal investment of effort, and in particular without necessarily using a speech recogniser. Training data for the task will be released in June 2016, and test data in January 2017.</abstract>
       <url hash="0521ba19">L16-1036</url>
@@ -427,7 +427,7 @@
     </paper>
     <paper id="39">
       <title>Evaluating Interactive System Adaptation</title>
-      <author><first>Edouard</first><last>Geoffrois</last></author>
+      <author id="edouard-geoffrois"><first>Edouard</first><last>Geoffrois</last></author>
       <pages>256–260</pages>
       <abstract>Enabling users of intelligent systems to enhance the system performance by providing feedback on their errors is an important need. However, the ability of systems to learn from user feedback is difficult to evaluate in an objective and comparative way. Indeed, the involvement of real users in the adaptation process is an impediment to objective evaluation. This issue can be solved by using an oracle approach, where users are simulated by oracles having access to the reference test data. Another difficulty is to find a meaningful metric despite the fact that system improvements depend on the feedback provided and on the system itself. A solution is to measure the minimal amount of information needed to correct all system errors. It can be shown that for any well defined non interactive task, the interactively supervised version of the task can be evaluated by combining such an oracle-based approach and a minimum supervision rate metric. This new evaluation protocol for adaptive systems is not only expected to drive progress for such systems, but also to pave the way for a specialisation of actors along the value chain of their technological development.</abstract>
       <url hash="54c9b5fe">L16-1039</url>
@@ -435,7 +435,7 @@
     </paper>
     <paper id="40">
       <title>Complementarity, <fixed-case>F</fixed-case>-score, and <fixed-case>NLP</fixed-case> Evaluation</title>
-      <author><first>Leon</first><last>Derczynski</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
       <pages>261–266</pages>
       <abstract>This paper addresses the problem of quantifying the differences between entity extraction systems, where in general only a small proportion a document should be selected. Comparing overall accuracy is not very useful in these cases, as small differences in accuracy may correspond to huge differences in selections over the target minority class. Conventionally, one may use per-token complementarity to describe these differences, but it is not very useful when the set is heavily skewed. In such situations, which are common in information retrieval and entity recognition, metrics like precision and recall are typically used to describe performance. However, precision and recall fail to describe the differences between sets of objects selected by different decision strategies, instead just describing the proportional amount of correct and incorrect objects selected. This paper presents a method for measuring complementarity for precision, recall and F-score, quantifying the difference between entity extraction approaches.</abstract>
       <url hash="957f86a6">L16-1040</url>
@@ -455,7 +455,7 @@
       <title>Evaluating a Topic Modelling Approach to Measuring Corpus Similarity</title>
       <author><first>Richard</first><last>Fothergill</last></author>
       <author><first>Paul</first><last>Cook</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>273–279</pages>
       <abstract>Web corpora are often constructed automatically, and their contents are therefore often not well understood. One technique for assessing the composition of such a web corpus is to empirically measure its similarity to a reference corpus whose composition is known. In this paper we evaluate a number of measures of corpus similarity, including a method based on topic modelling which has not been previously evaluated for this task. To evaluate these methods we use known-similarity corpora that have been previously used for this purpose, as well as a number of newly-constructed known-similarity corpora targeting differences in genre, topic, time, and region. Our findings indicate that, overall, the topic modelling approach did not improve on a chi-square method that had previously been found to work well for measuring corpus similarity.</abstract>
       <url hash="49bf3180">L16-1042</url>
@@ -480,9 +480,9 @@
     </paper>
     <paper id="44">
       <title>Building a Corpus of Errors and Quality in Machine Translation: Experiments on Error Impact</title>
-      <author><first>Ângela</first><last>Costa</last></author>
+      <author id="angela-costa"><first>Ângela</first><last>Costa</last></author>
       <author><first>Rui</first><last>Correia</last></author>
-      <author><first>Luísa</first><last>Coheur</last></author>
+      <author id="luisa-coheur"><first>Luísa</first><last>Coheur</last></author>
       <pages>288–292</pages>
       <abstract>In this paper we describe a corpus of automatic translations annotated with both error type and quality. The 300 sentences that we have selected were generated by Google Translate, Systran and two in-house Machine Translation systems that use Moses technology. The errors present on the translations were annotated with an error taxonomy that divides errors in five main linguistic categories (Orthography, Lexis, Grammar, Semantics and Discourse), reflecting the language level where the error is located. After the error annotation process, we accessed the translation quality of each sentence using a four point comprehension scale from 1 to 5. Both tasks of error and quality annotation were performed by two different annotators, achieving good levels of inter-annotator agreement. The creation of this corpus allowed us to use it as training data for a translation quality classifier. We concluded on error severity by observing the outputs of two machine learning classifiers: a decision tree and a regression model.</abstract>
       <url hash="ffb48574">L16-1044</url>
@@ -492,7 +492,7 @@
       <title>Evaluating the Readability of Text Simplification Output for Readers with Cognitive Disabilities</title>
       <author><first>Victoria</first><last>Yaneva</last></author>
       <author><first>Irina</first><last>Temnikova</last></author>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <pages>293–299</pages>
       <abstract>This paper presents an approach for automatic evaluation of the readability of text simplification output for readers with cognitive disabilities. First, we present our work towards the development of the EasyRead corpus, which contains easy-to-read documents created especially for people with cognitive disabilities. We then compare the EasyRead corpus to the simplified output contained in the LocalNews corpus (Feng, 2009), the accessibility of which has been evaluated through reading comprehension experiments including 20 adults with mild intellectual disability. This comparison is made on the basis of 13 disability-specific linguistic features. The comparison reveals that there are no major differences between the two corpora, which shows that the EasyRead corpus is to a similar reading level as the user-evaluated texts. We also discuss the role of Simple Wikipedia (Zhu et al., 2010) as a widely-used accessibility benchmark, in light of our finding that it is significantly more complex than both the EasyRead and the LocalNews corpora.</abstract>
       <url hash="8e439b86">L16-1045</url>
@@ -501,7 +501,7 @@
     <paper id="46">
       <title>Word Embedding Evaluation and Combination</title>
       <author><first>Sahar</first><last>Ghannay</last></author>
-      <author><first>Benoit</first><last>Favre</last></author>
+      <author id="benoit-favre"><first>Benoit</first><last>Favre</last></author>
       <author><first>Yannick</first><last>Estève</last></author>
       <author><first>Nathalie</first><last>Camelin</last></author>
       <pages>300–305</pages>
@@ -513,7 +513,7 @@
       <title>Benchmarking multimedia technologies with the <fixed-case>CAMOMILE</fixed-case> platform: the case of Multimodal Person Discovery at <fixed-case>M</fixed-case>edia<fixed-case>E</fixed-case>val 2015</title>
       <author><first>Johann</first><last>Poignant</last></author>
       <author><first>Hervé</first><last>Bredin</last></author>
-      <author><first>Claude</first><last>Barras</last></author>
+      <author id="claude-barras"><first>Claude</first><last>Barras</last></author>
       <author><first>Mickael</first><last>Stefas</last></author>
       <author><first>Pierrick</first><last>Bruneau</last></author>
       <author><first>Thomas</first><last>Tamisier</last></author>
@@ -543,8 +543,8 @@
     </paper>
     <paper id="50">
       <title>Odin’s Runes: A Rule Language for Information Extraction</title>
-      <author><first>Marco A.</first><last>Valenzuela-Escárcega</last></author>
-      <author><first>Gus</first><last>Hahn-Powell</last></author>
+      <author id="marco-a-valenzuela-escarcega"><first>Marco A.</first><last>Valenzuela-Escárcega</last></author>
+      <author id="gus-hahn-powell"><first>Gus</first><last>Hahn-Powell</last></author>
       <author><first>Mihai</first><last>Surdeanu</last></author>
       <pages>322–329</pages>
       <abstract>Odin is an information extraction framework that applies cascades of finite state automata over both surface text and syntactic dependency graphs. Support for syntactic patterns allow us to concisely define relations that are otherwise difficult to express in languages such as Common Pattern Specification Language (CPSL), which are currently limited to shallow linguistic features. The interaction of lexical and syntactic automata provides robustness and flexibility when writing extraction rules. This paper describes Odin’s declarative language for writing these cascaded automata.</abstract>
@@ -554,7 +554,7 @@
     <paper id="51">
       <title>A Classification-based Approach to Economic Event Detection in <fixed-case>D</fixed-case>utch News Text</title>
       <author><first>Els</first><last>Lefever</last></author>
-      <author><first>Véronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Véronique</first><last>Hoste</last></author>
       <pages>330–335</pages>
       <abstract>Breaking news on economic events such as stock splits or mergers and acquisitions has been shown to have a substantial impact on the financial markets. As it is important to be able to automatically identify events in news items accurately and in a timely manner, we present in this paper proof-of-concept experiments for a supervised machine learning approach to economic event detection in newswire text. For this purpose, we created a corpus of Dutch financial news articles in which 10 types of company-specific economic events were annotated. We trained classifiers using various lexical, syntactic and semantic features. We obtain good results based on a basic set of shallow features, thus showing that this method is a viable approach for economic event detection in news text.</abstract>
       <url hash="9093cb71">L16-1051</url>
@@ -564,7 +564,7 @@
       <title>Predictive Modeling: Guessing the <fixed-case>NLP</fixed-case> Terms of Tomorrow</title>
       <author><first>Gil</first><last>Francopoulo</last></author>
       <author><first>Joseph</first><last>Mariani</last></author>
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <pages>336–343</pages>
       <abstract>Predictive modeling, often called “predictive analytics” in a commercial context, encompasses a variety of statistical techniques that analyze historical and present facts to make predictions about unknown events. Often the unknown events are in the future, but prediction can be applied to any type of unknown whether it be in the past or future. In our case, we present some experiments applying predictive modeling to the usage of technical terms within the NLP domain.</abstract>
       <url hash="bded4eb0">L16-1052</url>
@@ -601,7 +601,7 @@
       <author><first>Won-Tae</first><last>Joo</last></author>
       <author><first>Hyun-Woo</first><last>Do</last></author>
       <author><first>Chae-Gyun</first><last>Lim</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <author><first>Ho-Jin</first><last>Choi</last></author>
       <pages>356–359</pages>
       <abstract>Many emerging documents usually contain temporal information. Because the temporal information is useful for various applications, it became important to develop a system of extracting the temporal information from the documents. Before developing the system, it first necessary to define or design the structure of temporal information. In other words, it is necessary to design a language which defines how to annotate the temporal information. There have been some studies about the annotation languages, but most of them was applicable to only a specific target language (e.g., English). Thus, it is necessary to design an individual annotation language for each language. In this paper, we propose a revised version of Koreain Time Mark-up Language (K-TimeML), and also introduce a dataset, named Korean TimeBank, that is constructed basd on the K-TimeML. We believe that the new K-TimeML and Korean TimeBank will be used in many further researches about extraction of temporal information.</abstract>
@@ -616,7 +616,7 @@
       <author><first>Stefano</first><last>Faralli</last></author>
       <author><first>Robert</first><last>Meusel</last></author>
       <author><first>Heiko</first><last>Paulheim</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <pages>360–367</pages>
       <abstract>Hypernymy relations (those where an hyponym term shares a “isa” relationship with his hypernym) play a key role for many Natural Language Processing (NLP) tasks, e.g. ontology learning, automatically building or extending knowledge bases, or word sense disambiguation and induction. In fact, such relations may provide the basis for the construction of more complex structures such as taxonomies, or be used as effective background knowledge for many word understanding applications. We present a publicly available database containing more than 400 million hypernymy relations we extracted from the CommonCrawl web corpus. We describe the infrastructure we developed to iterate over the web corpus for extracting the hypernymy relations and store them effectively into a large database. This collection of relations represents a rich source of knowledge and may be useful for many researchers. We offer the tuple dataset for public download and an Application Programming Interface (API) to help other researchers programmatically query the database.</abstract>
       <url hash="a912855d">L16-1056</url>
@@ -645,7 +645,7 @@
     </paper>
     <paper id="59">
       <title>Legal Text Interpretation: Identifying Hohfeldian Relations from Text</title>
-      <author><first>Wim</first><last>Peters</last></author>
+      <author id="wim-peters"><first>Wim</first><last>Peters</last></author>
       <author><first>Adam</first><last>Wyner</last></author>
       <pages>379–384</pages>
       <abstract>The paper investigates the extent of the support semi-automatic analysis can provide for the specific task of assigning Hohfeldian relations of Duty, using the General Architecture for Text Engineering tool for the automated extraction of Duty instances and the bearers of associated roles. The outcome of the analysis supports scholars in identifying Hohfeldian structures in legal text when performing close reading of the texts. A cyclic workflow involving automated annotation and expert feedback will incrementally increase the quality and coverage of the automatic extraction process, and increasingly reduce the amount of manual work required of the scholar.</abstract>
@@ -663,9 +663,9 @@
     </paper>
     <paper id="61">
       <title>Finding Definitions in Large Corpora with <fixed-case>S</fixed-case>ketch <fixed-case>E</fixed-case>ngine</title>
-      <author><first>Vojtěch</first><last>Kovář</last></author>
+      <author id="vojtech-kovar"><first>Vojtěch</first><last>Kovář</last></author>
       <author><first>Monika</first><last>Močiariková</last></author>
-      <author><first>Pavel</first><last>Rychlý</last></author>
+      <author id="pavel-rychly"><first>Pavel</first><last>Rychlý</last></author>
       <pages>391–394</pages>
       <abstract>The paper describes automatic definition finding implemented within the leading corpus query and management tool, Sketch Engine. The implementation exploits complex pattern-matching queries in the corpus query language (CQL) and the indexing mechanism of word sketches for finding and storing definition candidates throughout the corpus. The approach is evaluated for Czech and English corpora, showing that the results are usable in practice: precision of the tool ranges between 30 and 75 percent (depending on the major corpus text types) and we were able to extract nearly 2 million definition candidates from an English corpus with 1.4 billion words. The feature is embedded into the interface as a concordance filter, so that users can search for definitions of any query to the corpus, including very specific multi-word queries. The results also indicate that ordinary texts (unlike explanatory texts) contain rather low number of definitions, which is perhaps the most important problem with automatic definition finding in general.</abstract>
       <url hash="fafc5f96">L16-1061</url>
@@ -686,7 +686,7 @@
       <title><fixed-case>NLP</fixed-case> and Public Engagement: The Case of the <fixed-case>I</fixed-case>talian School Reform</title>
       <author><first>Tommaso</first><last>Caselli</last></author>
       <author><first>Giovanni</first><last>Moretti</last></author>
-      <author><first>Rachele</first><last>Sprugnoli</last></author>
+      <author id="rachele-sprugnoli"><first>Rachele</first><last>Sprugnoli</last></author>
       <author><first>Sara</first><last>Tonelli</last></author>
       <author><first>Damien</first><last>Lanfrey</last></author>
       <author><first>Donatella Solda</first><last>Kutzmann</last></author>
@@ -697,9 +697,9 @@
     </paper>
     <paper id="64">
       <title>Evaluating Translation Quality and <fixed-case>CLIR</fixed-case> Performance of Query Sessions</title>
-      <author><first>Xabier</first><last>Saralegi</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>Iñaki</first><last>Alegria</last></author>
+      <author id="xabier-saralegi"><first>Xabier</first><last>Saralegi</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
+      <author id="inaki-alegria"><first>Iñaki</first><last>Alegria</last></author>
       <pages>407–411</pages>
       <abstract>This paper presents the evaluation of the translation quality and Cross-Lingual Information Retrieval (CLIR) performance when using session information as the context of queries. The hypothesis is that previous queries provide context that helps to solve ambiguous translations in the current query. We tested several strategies on the TREC 2010 Session track dataset, which includes query reformulations grouped by generalization, specification, and drifting types. We study the Basque to English direction, evaluating both the translation quality and CLIR performance, with positive results in both cases. The results show that the quality of translation improved, reducing error rate by 12% (HTER) when using session information, which improved CLIR results 5% (nDCG). We also provide an analysis of the improvements across the three kinds of sessions: generalization, specification, and drifting. Translation quality improved in all three types (generalization, specification, and drifting), and CLIR improved for generalization and specification sessions, preserving the performance in drifting sessions.</abstract>
       <url hash="ea3c9868">L16-1064</url>
@@ -752,7 +752,7 @@
     <paper id="69">
       <title>“Who was Pietro Badoglio?” Towards a <fixed-case>QA</fixed-case> system for <fixed-case>I</fixed-case>talian History</title>
       <author><first>Stefano</first><last>Menini</last></author>
-      <author><first>Rachele</first><last>Sprugnoli</last></author>
+      <author id="rachele-sprugnoli"><first>Rachele</first><last>Sprugnoli</last></author>
       <author><first>Antonio</first><last>Uva</last></author>
       <pages>430–435</pages>
       <abstract>This paper presents QUANDHO (QUestion ANswering Data for italian HistOry), an Italian question answering dataset created to cover a specific domain, i.e. the history of Italy in the first half of the XX century. The dataset includes questions manually classified and annotated with Lexical Answer Types, and a set of question-answer pairs. This resource, freely available for research purposes, has been used to retrain a domain independent question answering system so to improve its performances in the domain of interest. Ongoing experiments on the development of a question classifier and an automatic tagger of Lexical Answer Types are also presented.</abstract>
@@ -762,8 +762,8 @@
     <paper id="70">
       <title>A Document Repository for Social Media and Speech Conversations</title>
       <author><first>Adam</first><last>Funk</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
-      <author><first>Benoit</first><last>Favre</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="benoit-favre"><first>Benoit</first><last>Favre</last></author>
       <pages>436–440</pages>
       <abstract>We present a successfully implemented document repository REST service for flexible SCRUD (search, crate, read, update, delete) storage of social media conversations, using a GATE/TIPSTER-like document object model and providing a query language for document features. This software is currently being used in the SENSEI research project and will be published as open-source software before the project ends. It is, to the best of our knowledge, the first freely available, general purpose data repository to support large-scale multimodal (i.e., speech or text) conversation analytics.</abstract>
       <url hash="4e692972">L16-1070</url>
@@ -771,10 +771,10 @@
     </paper>
     <paper id="71">
       <title>Towards a Linguistic Ontology with an Emphasis on Reasoning and Knowledge Reuse</title>
-      <author><first>Artemis</first><last>Parvizi</last></author>
+      <author id="artemis-parvizi"><first>Artemis</first><last>Parvizi</last></author>
       <author><first>Matt</first><last>Kohl</last></author>
-      <author><first>Meritxell</first><last>Gonzàlez</last></author>
-      <author><first>Roser</first><last>Saurí</last></author>
+      <author id="meritxell-gonzalez"><first>Meritxell</first><last>Gonzàlez</last></author>
+      <author id="roser-sauri"><first>Roser</first><last>Saurí</last></author>
       <pages>441–448</pages>
       <abstract>The Dictionaries division at Oxford University Press (OUP) is aiming to model, integrate, and publish lexical content for 100 languages focussing on digitally under-represented languages. While there are multiple ontologies designed for linguistic resources, none had adequate features for meeting our requirements, chief of which was the capability to losslessly capture diverse features of many different languages in a dictionary format, while supplying a framework for inferring relations like translation, derivation, etc., between the data. Building on valuable features of existing models, and working with OUP monolingual and bilingual dictionary datasets, we have designed and implemented a new linguistic ontology. The ontology has been reviewed by a number of computational linguists, and we are working to move more dictionary data into it. We have also developed APIs to surface the linked data to dictionary websites.</abstract>
       <url hash="1b82461b">L16-1071</url>
@@ -797,11 +797,11 @@
     </paper>
     <paper id="73">
       <title>The Language Application Grid and Galaxy</title>
-      <author><first>Nancy</first><last>Ide</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
       <author><first>Keith</first><last>Suderman</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <author><first>Marc</first><last>Verhagen</last></author>
-      <author><first>Christopher</first><last>Cieri</last></author>
+      <author id="christopher-cieri"><first>Christopher</first><last>Cieri</last></author>
       <pages>457–462</pages>
       <abstract>The NSF-SI2-funded LAPPS Grid project is a collaborative effort among Brandeis University, Vassar College, Carnegie-Mellon University (CMU), and the Linguistic Data Consortium (LDC), which has developed an open, web-based infrastructure through which resources can be easily accessed and within which tailored language services can be efficiently composed, evaluated, disseminated and consumed by researchers, developers, and students across a wide variety of disciplines. The LAPPS Grid project recently adopted Galaxy (Giardine et al., 2005), a robust, well-developed, and well-supported front end for workflow configuration, management, and persistence. Galaxy allows data inputs and processing steps to be selected from graphical menus, and results are displayed in intuitive plots and summaries that encourage interactive workflows and the exploration of hypotheses. The Galaxy workflow engine provides significant advantages for deploying pipelines of LAPPS Grid web services, including not only means to create and deploy locally-run and even customized versions of the LAPPS Grid as well as running the LAPPS Grid in the cloud, but also access to a huge array of statistical and visualization tools that have been developed for use in genomics research.</abstract>
       <url hash="0642063f">L16-1073</url>
@@ -810,7 +810,7 @@
     <paper id="74">
       <title><fixed-case>ELRA</fixed-case> Activities and Services</title>
       <author><first>Khalid</first><last>Choukri</last></author>
-      <author><first>Valérie</first><last>Mapelli</last></author>
+      <author id="valerie-mapelli"><first>Valérie</first><last>Mapelli</last></author>
       <author><first>Hélène</first><last>Mazo</last></author>
       <author><first>Vladimir</first><last>Popescu</last></author>
       <pages>463–468</pages>
@@ -828,9 +828,9 @@
     </paper>
     <paper id="76">
       <title>Humor in Collective Discourse: Unsupervised Funniness Detection in the New Yorker Cartoon Caption Contest</title>
-      <author><first>Dragomir</first><last>Radev</last></author>
-      <author><first>Amanda</first><last>Stent</last></author>
-      <author><first>Joel</first><last>Tetreault</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
+      <author id="amanda-stent"><first>Amanda</first><last>Stent</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
       <author><first>Aasish</first><last>Pappu</last></author>
       <author><first>Aikaterini</first><last>Iliakopoulou</last></author>
       <author><first>Agustin</first><last>Chanfreau</last></author>
@@ -848,7 +848,7 @@
       <title>A Corpus of Text Data and Gaze Fixations from Autistic and Non-Autistic Adults</title>
       <author><first>Victoria</first><last>Yaneva</last></author>
       <author><first>Irina</first><last>Temnikova</last></author>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <pages>480–487</pages>
       <abstract>The paper presents a corpus of text data and its corresponding gaze fixations obtained from autistic and non-autistic readers. The data was elicited through reading comprehension testing combined with eye-tracking recording. The corpus consists of 1034 content words tagged with their POS, syntactic role and three gaze-based measures corresponding to the autistic and control participants. The reading skills of the participants were measured through multiple-choice questions and, based on the answers given, they were divided into groups of skillful and less-skillful readers. This division of the groups informs researchers on whether particular fixations were elicited from skillful or less-skillful readers and allows a fair between-group comparison for two levels of reading ability. In addition to describing the process of data collection and corpus development, we present a study on the effect that word length has on reading in autism. The corpus is intended as a resource for investigating the particular linguistic constructions which pose reading difficulties for people with autism and hopefully, as a way to inform future text simplification research intended for this population.</abstract>
       <url hash="d44747ae">L16-1077</url>
@@ -877,7 +877,7 @@
     <paper id="80">
       <title>An Empirical Study of <fixed-case>A</fixed-case>rabic Formulaic Sequence Extraction Methods</title>
       <author><first>Ayman</first><last>Alghamdi</last></author>
-      <author><first>Eric</first><last>Atwell</last></author>
+      <author id="eric-atwell"><first>Eric</first><last>Atwell</last></author>
       <author><first>Claire</first><last>Brierley</last></author>
       <pages>502–506</pages>
       <abstract>This paper aims to implement what is referred to as the collocation of the Arabic keywords approach for extracting formulaic sequences (FSs) in the form of high frequency but semantically regular formulas that are not restricted to any syntactic construction or semantic domain. The study applies several distributional semantic models in order to automatically extract relevant FSs related to Arabic keywords. The data sets used in this experiment are rendered from a new developed corpus-based Arabic wordlist consisting of 5,189 lexical items which represent a variety of modern standard Arabic (MSA) genres and regions, the new wordlist being based on an overlapping frequency based on a comprehensive comparison of four large Arabic corpora with a total size of over 8 billion running words. Empirical n-best precision evaluation methods are used to determine the best association measures (AMs) for extracting high frequency and meaningful FSs. The gold standard reference FSs list was developed in previous studies and manually evaluated against well-established quantitative and qualitative criteria. The results demonstrate that the MI.log_f AM achieved the highest results in extracting significant FSs from the large MSA corpus, while the T-score association measure achieved the worst results.</abstract>
@@ -886,9 +886,9 @@
     </paper>
     <paper id="81">
       <title>Rule-based Automatic Multi-word Term Extraction and Lemmatization</title>
-      <author><first>Ranka</first><last>Stanković</last></author>
+      <author id="ranka-stankovic"><first>Ranka</first><last>Stanković</last></author>
       <author><first>Cvetana</first><last>Krstev</last></author>
-      <author><first>Ivan</first><last>Obradović</last></author>
+      <author id="ivan-obradovic"><first>Ivan</first><last>Obradović</last></author>
       <author><first>Biljana</first><last>Lazić</last></author>
       <author><first>Aleksandra</first><last>Trtovac</last></author>
       <pages>507–514</pages>
@@ -908,7 +908,7 @@
     <paper id="83">
       <title>A Lexical Resource of <fixed-case>H</fixed-case>ebrew Verb-Noun Multi-Word Expressions</title>
       <author><first>Chaya</first><last>Liebeskind</last></author>
-      <author><first>Yaakov</first><last>HaCohen-Kerner</last></author>
+      <author id="yaakov-hacohen-kerner"><first>Yaakov</first><last>HaCohen-Kerner</last></author>
       <pages>522–527</pages>
       <abstract>A verb-noun Multi-Word Expression (MWE) is a combination of a verb and a noun with or without other words, in which the combination has a meaning different from the meaning of the words considered separately. In this paper, we present a new lexical resource of Hebrew Verb-Noun MWEs (VN-MWEs). The VN-MWEs of this resource were manually collected and annotated from five different web resources. In addition, we analyze the lexical properties of Hebrew VN-MWEs by classifying them to three types: morphological, syntactic, and semantic. These two contributions are essential for designing algorithms for automatic VN-MWEs extraction. The analysis suggests some interesting features of VN-MWEs for exploration. The lexical resource enables to sample a set of positive examples for Hebrew VN-MWEs. This set of examples can either be used for training supervised algorithms or as seeds in unsupervised bootstrapping algorithms. Thus, this resource is a first step towards automatic identification of Hebrew VN-MWEs, which is important for natural language understanding, generation and translation systems.</abstract>
       <url hash="11eeb60e">L16-1083</url>
@@ -919,7 +919,7 @@
       <author><first>Guillaume</first><last>Jacquet</last></author>
       <author><first>Maud</first><last>Ehrmann</last></author>
       <author><first>Ralf</first><last>Steinberger</last></author>
-      <author><first>Jaakko</first><last>Väyrynen</last></author>
+      <author id="jaakko-vayrynen"><first>Jaakko</first><last>Väyrynen</last></author>
       <pages>528–535</pages>
       <abstract>This paper reports on an approach and experiments to automatically build a cross-lingual multi-word entity resource. Starting from a collection of millions of acronym/expansion pairs for 22 languages where expansion variants were grouped into monolingual clusters, we experiment with several aggregation strategies to link these clusters across languages. Aggregation strategies make use of string similarity distances and translation probabilities and they are based on vector space and graph representations. The accuracy of the approach is evaluated against Wikipedia’s redirection and cross-lingual linking tables. The resulting multi-word entity resource contains 64,000 multi-word entities with unique identifiers and their 600,000 multilingual lexical variants. We intend to make this new resource publicly available.</abstract>
       <url hash="4f5b1594">L16-1084</url>
@@ -930,7 +930,7 @@
       <author><first>Marie-Jean</first><last>Meurs</last></author>
       <author><first>Hayda</first><last>Almeida</last></author>
       <author><first>Ludovic</first><last>Jean-Louis</last></author>
-      <author><first>Eric</first><last>Charton</last></author>
+      <author id="eric-charton"><first>Eric</first><last>Charton</last></author>
       <pages>536–540</pages>
       <abstract>This paper presents SemLinker, an open source system that discovers named entities, connects them to a reference knowledge base, and clusters them semantically. SemLinker relies on several modules that perform surface form generation, mutual disambiguation, entity clustering, and make use of two annotation engines. SemLinker was evaluated in the English Entity Discovery and Linking track of the Text Analysis Conference on Knowledge Base Population, organized by the US National Institute of Standards and Technology. Along with the SemLinker source code, we release our annotation files containing the discovered named entities, their types, and position across processed documents.</abstract>
       <url hash="0360b73a">L16-1085</url>
@@ -942,7 +942,7 @@
       <author><first>Giuseppe</first><last>Rizzo</last></author>
       <author><first>Marieke</first><last>van Erp</last></author>
       <author><first>Julien</first><last>Plu</last></author>
-      <author><first>Raphaël</first><last>Troncy</last></author>
+      <author id="raphael-troncy"><first>Raphaël</first><last>Troncy</last></author>
       <pages>541–548</pages>
       <abstract>More and more knowledge bases are publicly available as linked data. Since these knowledge bases contain structured descriptions of real-world entities, they can be exploited by entity linking systems that anchor entity mentions from text to the most relevant resources describing those entities. In this paper, we investigate adaptation of the entity linking task using contextual knowledge. The key intuition is that entity linking can be customized depending on the textual content, as well as on the application that would make use of the extracted information. We present an adaptive approach that relies on contextual knowledge from text to enhance the performance of ADEL, a hybrid linguistic and graph-based entity linking system. We evaluate our approach on a domain-specific corpus consisting of annotated WikiNews articles.</abstract>
       <url hash="381c05dd">L16-1086</url>
@@ -952,7 +952,7 @@
       <title>Named Entity Recognition on <fixed-case>T</fixed-case>witter for <fixed-case>T</fixed-case>urkish using Semi-supervised Learning with Word Embeddings</title>
       <author><first>Eda</first><last>Okur</last></author>
       <author><first>Hakan</first><last>Demir</last></author>
-      <author><first>Arzucan</first><last>Özgür</last></author>
+      <author id="arzucan-ozgur"><first>Arzucan</first><last>Özgür</last></author>
       <pages>549–555</pages>
       <abstract>Recently, due to the increasing popularity of social media, the necessity for extracting information from informal text types, such as microblog texts, has gained significant attention. In this study, we focused on the Named Entity Recognition (NER) problem on informal text types for Turkish. We utilized a semi-supervised learning approach based on neural networks. We applied a fast unsupervised method for learning continuous representations of words in vector space. We made use of these obtained word embeddings, together with language independent features that are engineered to work better on informal text types, for generating a Turkish NER system on microblog texts. We evaluated our Turkish NER system on Twitter messages and achieved better F-score performances than the published results of previously proposed NER systems on Turkish tweets. Since we did not employ any language dependent features, we believe that our method can be easily adapted to microblog texts in other morphologically rich languages.</abstract>
       <url hash="2812d2d4">L16-1087</url>
@@ -962,7 +962,7 @@
       <title>Entity Linking with a Paraphrase Flavor</title>
       <author><first>Maria</first><last>Pershina</last></author>
       <author><first>Yifan</first><last>He</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <pages>556–560</pages>
       <abstract>The task of Named Entity Linking is to link entity mentions in the document to their correct entries in a knowledge base and to cluster NIL mentions. Ambiguous, misspelled, and incomplete entity mention names are the main challenges in the linking process. We propose a novel approach that combines two state-of-the-art models ― for entity disambiguation and for paraphrase detection ― to overcome these challenges. We consider name variations as paraphrases of the same entity mention and adopt a paraphrase model for this task. Our approach utilizes a graph-based disambiguation model based on Personalized Page Rank, and then refines and clusters its output using the paraphrase similarity between entity mention strings. It achieves a competitive performance of 80.5% in B3+F clustering score on diagnostic TAC EDL 2014 data.</abstract>
       <url hash="d3018207">L16-1088</url>
@@ -981,7 +981,7 @@
     </paper>
     <paper id="90">
       <title><fixed-case>IRIS</fixed-case>: <fixed-case>E</fixed-case>nglish-<fixed-case>I</fixed-case>rish Machine Translation System</title>
-      <author><first>Mihael</first><last>Arcan</last></author>
+      <author id="mihael-arcan"><first>Mihael</first><last>Arcan</last></author>
       <author><first>Caoilfhionn</first><last>Lane</last></author>
       <author><first>Eoin Ó</first><last>Droighneáin</last></author>
       <author><first>Paul</first><last>Buitelaar</last></author>
@@ -1010,7 +1010,7 @@
     <paper id="93">
       <title>Syntax-based Multi-system Machine Translation</title>
       <author><first>Matīss</first><last>Rikters</last></author>
-      <author><first>Inguna</first><last>Skadiņa</last></author>
+      <author id="inguna-skadina"><first>Inguna</first><last>Skadiņa</last></author>
       <pages>585–591</pages>
       <abstract>This paper describes a hybrid machine translation system that explores a parser to acquire syntactic chunks of a source sentence, translates the chunks with multiple online machine translation (MT) system application program interfaces (APIs) and creates output by combining translated chunks to obtain the best possible translation. The selection of the best translation hypothesis is performed by calculating the perplexity for each translated chunk. The goal of this approach is to enhance the baseline multi-system hybrid translation (MHyT) system that uses only a language model to select best translation from translations obtained with different APIs and to improve overall English ― Latvian machine translation quality over each of the individual MT APIs. The presented syntax-based multi-system translation (SyMHyT) system demonstrates an improvement in terms of BLEU and NIST scores compared to the baseline system. Improvements reach from 1.74 up to 2.54 BLEU points.</abstract>
       <url hash="76c00b11">L16-1093</url>
@@ -1018,11 +1018,11 @@
     </paper>
     <paper id="94">
       <title>Use of Domain-Specific Language Resources in Machine Translation</title>
-      <author><first>Sanja</first><last>Štajner</last></author>
+      <author id="sanja-stajner"><first>Sanja</first><last>Štajner</last></author>
       <author><first>Andreia</first><last>Querido</last></author>
       <author><first>Nuno</first><last>Rendeiro</last></author>
       <author><first>João António</first><last>Rodrigues</last></author>
-      <author><first>António</first><last>Branco</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
       <pages>592–598</pages>
       <abstract>In this paper, we address the problem of Machine Translation (MT) for a specialised domain in a language pair for which only a very small domain-specific parallel corpus is available. We conduct a series of experiments using a purely phrase-based SMT (PBSMT) system and a hybrid MT system (TectoMT), testing three different strategies to overcome the problem of the small amount of in-domain training data. Our results show that adding a small size in-domain bilingual terminology to the small in-domain training corpus leads to the best improvements of a hybrid MT system, while the PBSMT system achieves the best results by adding a combination of in-domain bilingual terminology and a larger out-of-domain corpus. We focus on qualitative human evaluation of the output of two best systems (one for each approach) and perform a systematic in-depth error analysis which revealed advantages of the hybrid MT system over the pure PBSMT system for this specific task.</abstract>
       <url hash="81e298d0">L16-1094</url>
@@ -1032,10 +1032,10 @@
       <title><fixed-case>CAT</fixed-case>a<fixed-case>L</fixed-case>og Online: Porting a Post-editing Tool to the Web</title>
       <author><first>Santanu</first><last>Pal</last></author>
       <author><first>Marcos</first><last>Zampieri</last></author>
-      <author><first>Sudip Kumar</first><last>Naskar</last></author>
-      <author><first>Tapas</first><last>Nayak</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last></author>
+      <author id="tapas-nayak"><first>Tapas</first><last>Nayak</last></author>
       <author><first>Mihaela</first><last>Vela</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>599–604</pages>
       <abstract>This paper presents CATaLog online, a new web-based MT and TM post-editing tool. CATaLog online is a freeware software that can be used through a web browser and it requires only a simple registration. The tool features a number of editing and log functions similar to the desktop version of CATaLog enhanced with several new features that we describe in detail in this paper. CATaLog online is designed to allow users to post-edit both translation memory segments as well as machine translation output. The tool provides a complete set of log information currently not available in most commercial CAT tools. Log information can be used both for project management purposes as well as for the study of the translation process and translator’s productivity.</abstract>
       <url hash="c28d51d3">L16-1095</url>
@@ -1067,7 +1067,7 @@
     <paper id="98">
       <title>Lexical Resources to Enrich <fixed-case>E</fixed-case>nglish <fixed-case>M</fixed-case>alayalam Machine Translation</title>
       <author><first>Sreelekha</first><last>S</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>620–627</pages>
       <abstract>In this paper we present our work on the usage of lexical resources for the Machine Translation English and Malayalam. We describe a comparative performance between different Statistical Machine Translation (SMT) systems on top of phrase based SMT system as baseline. We explore different ways of utilizing lexical resources to improve the quality of English Malayalam statistical machine translation. In order to enrich the training corpus we have augmented the lexical resources in two ways (a) additional vocabulary and (b) inflected verbal forms. Lexical resources include IndoWordnet semantic relation set, lexical words and verb phrases etc. We have described case studies, evaluations and have given detailed error analysis for both Malayalam to English and English to Malayalam machine translation systems. We observed significant improvement in evaluations of translation quality. Lexical resources do help uplift performance when parallel corpora are scanty.</abstract>
       <url hash="aaaea6a0">L16-1098</url>
@@ -1125,8 +1125,8 @@
       <author><first>Hao</first><last>Zhou</last></author>
       <author><first>Yue</first><last>Zhang</last></author>
       <author><first>Shujian</first><last>Huang</last></author>
-      <author><first>Xin-Yu</first><last>Dai</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
+      <author id="xinyu-dai"><first>Xin-Yu</first><last>Dai</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
       <pages>659–663</pages>
       <abstract>Greedy transition-based parsers are appealing for their very fast speed, with reasonably high accuracies. In this paper, we build a fast shift-reduce neural constituent parser by using a neural network to make local decisions. One challenge to the parsing speed is the large hidden and output layer sizes caused by the number of constituent labels and branching options. We speed up the parser by using a hierarchical output layer, inspired by the hierarchical log-bilinear neural language model. In standard WSJ experiments, the neural parser achieves an almost 2.4 time speed up (320 sen/sec) compared to a non-hierarchical baseline without significant accuracy loss (89.06 vs 89.13 F-score).</abstract>
       <url hash="d95e624b">L16-1104</url>
@@ -1158,8 +1158,8 @@
       <author><first>Daniel</first><last>van Niekerk</last></author>
       <author><first>Ineke</first><last>Schuurman</last></author>
       <author><first>Vincent</first><last>Vandeghinste</last></author>
-      <author><first>Frank</first><last>Van Eynde</last></author>
-      <author><first>Gerhard</first><last>van Huyssteen</last></author>
+      <author id="frank-van-eynde"><first>Frank</first><last>Van Eynde</last></author>
+      <author id="gerhard-b-van-huyssteen"><first>Gerhard</first><last>van Huyssteen</last></author>
       <pages>677–682</pages>
       <abstract>Compared to well-resourced languages such as English and Dutch, natural language processing (NLP) tools for Afrikaans are still not abundant. In the context of the AfriBooms project, KU Leuven and the North-West University collaborated to develop a first, small treebank, a dependency parser, and an easy to use online linguistic search engine for Afrikaans for use by researchers and students in the humanities and social sciences. The search tool is based on a similar development for Dutch, i.e. GrETEL, a user-friendly search engine which allows users to query a treebank by means of a natural language example instead of a formal search instruction.</abstract>
       <url hash="f083d1a4">L16-1107</url>
@@ -1231,7 +1231,7 @@
     <paper id="115">
       <title><fixed-case>FABIOLE</fixed-case>, a Speech Database for Forensic Speaker Comparison</title>
       <author><first>Moez</first><last>Ajili</last></author>
-      <author><first>Jean-François</first><last>Bonastre</last></author>
+      <author id="jean-francois-bonastre"><first>Jean-François</first><last>Bonastre</last></author>
       <author><first>Juliette</first><last>Kahn</last></author>
       <author><first>Solange</first><last>Rossato</last></author>
       <author><first>Guillaume</first><last>Bernard</last></author>
@@ -1252,7 +1252,7 @@
     <paper id="117">
       <title><fixed-case>AIMU</fixed-case>: Actionable Items for Meeting Understanding</title>
       <author><first>Yun-Nung</first><last>Chen</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tür</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tür</last></author>
       <pages>739–743</pages>
       <abstract>With emerging conversational data, automated content analysis is needed for better data interpretation, so that it is accurately understood and can be effectively integrated and utilized in various applications. ICSI meeting corpus is a publicly released data set of multi-party meetings in an organization that has been released over a decade ago, and has been fostering meeting understanding research since then. The original data collection includes transcription of participant turns as well as meta-data annotations, such as disfluencies and dialog act tags. This paper presents an extended set of annotations for the ICSI meeting corpus with a goal of deeply understanding meeting conversations, where participant turns are annotated by actionable items that could be performed by an automated meeting assistant. In addition to the user utterances that contain an actionable item, annotations also include the arguments associated with the actionable item. The set of actionable items are determined by aligning human-human interactions to human-machine interactions, where a data annotation schema designed for a virtual personal assistant (human-machine genre) is adapted to the meetings domain (human-human genre). The data set is formed by annotating participants’ utterances in meetings with potential intents/actions considering their contexts. The set of actions target what could be accomplished by an automated meeting assistant, such as taking a note of action items that a participant commits to, or finding emails or topic related documents that were mentioned during the meeting. A total of 10 defined intents/actions are considered as actionable items in meetings. Turns that include actionable intents were annotated for 22 public ICSI meetings, that include a total of 21K utterances, segmented by speaker turns. Participants’ spoken turns, possible actions along with associated arguments and their vector representations as computed by convolutional deep structured semantic models are included in the data set for future research. We present a detailed statistical analysis of the data set and analyze the performance of applying convolutional deep structured semantic models for an actionable item detection task. The data is available at <url>http://research.microsoft.com/projects/meetingunderstanding/</url>.</abstract>
       <url hash="c320a681">L16-1117</url>
@@ -1261,7 +1261,7 @@
     <paper id="118">
       <title>A Taxonomy of Specific Problem Classes in Text-to-Speech Synthesis: Comparing Commercial and Open Source Performance</title>
       <author><first>Felix</first><last>Burkhardt</last></author>
-      <author><first>Uwe D.</first><last>Reichel</last></author>
+      <author id="uwe-reichel"><first>Uwe D.</first><last>Reichel</last></author>
       <pages>744–749</pages>
       <abstract>Current state-of-the-art speech synthesizers for domain-independent systems still struggle with the challenge of generating understandable and natural-sounding speech. This is mainly because the pronunciation of words of foreign origin, inflections and compound words often cannot be handled by rules. Furthermore there are too many of these for inclusion in exception dictionaries. We describe an approach to evaluating text-to-speech synthesizers with a subjective listening experiment. The focus is to differentiate between known problem classes for speech synthesizers. The target language is German but we believe that many of the described phenomena are not language specific. We distinguish the following problem categories: Normalization, Foreign linguistics, Natural writing, Language specific and General. Each of them is divided into five to three problem classes. Word lists for each of the above mentioned categories were compiled and synthesized by both a commercial and an open source synthesizer, both being based on the non-uniform unit-selection approach. The synthesized speech was evaluated by human judges using the Speechalyzer toolkit and the results are discussed. It shows that, as expected, the commercial synthesizer performs much better than the open-source one, and especially words of foreign origin were pronounced badly by both systems.</abstract>
       <url hash="5548505a">L16-1118</url>
@@ -1281,11 +1281,11 @@
     <paper id="120">
       <title>A Singing Voice Database in <fixed-case>B</fixed-case>asque for Statistical Singing Synthesis of Bertsolaritza</title>
       <author><first>Xabier</first><last>Sarasola</last></author>
-      <author><first>Eva</first><last>Navas</last></author>
+      <author id="eva-navas"><first>Eva</first><last>Navas</last></author>
       <author><first>David</first><last>Tavarez</last></author>
       <author><first>Daniel</first><last>Erro</last></author>
       <author><first>Ibon</first><last>Saratxaga</last></author>
-      <author><first>Inma</first><last>Hernaez</last></author>
+      <author id="inmaculada-hernaez"><first>Inma</first><last>Hernaez</last></author>
       <pages>756–759</pages>
       <abstract>This paper describes the characteristics and structure of a Basque singing voice database of bertsolaritza. Bertsolaritza is a popular singing style from Basque Country sung exclusively in Basque that is improvised and a capella. The database is designed to be used in statistical singing voice synthesis for bertsolaritza style. Starting from the recordings and transcriptions of numerous singers, diarization and phoneme alignment experiments have been made to extract the singing voice from the recordings and create phoneme alignments. This labelling processes have been performed applying standard speech processing techniques and the results prove that these techniques can be used in this specific singing style.</abstract>
       <url hash="b00acce2">L16-1120</url>
@@ -1326,7 +1326,7 @@
     </paper>
     <paper id="124">
       <title>Designing a Speech Corpus for the Development and Evaluation of Dictation Systems in <fixed-case>L</fixed-case>atvian</title>
-      <author><first>Mārcis</first><last>Pinnis</last></author>
+      <author id="marcis-pinnis"><first>Mārcis</first><last>Pinnis</last></author>
       <author><first>Askars</first><last>Salimbajevs</last></author>
       <author><first>Ilze</first><last>Auziņa</last></author>
       <pages>775–780</pages>
@@ -1340,7 +1340,7 @@
       <author><first>Dirce</first><last>Celorico</last></author>
       <author><first>Sara</first><last>Candeias</last></author>
       <author><first>Carla</first><last>Lopes</last></author>
-      <author><first>Fernando</first><last>Perdigão</last></author>
+      <author id="fernando-perdigao"><first>Fernando</first><last>Perdigão</last></author>
       <pages>781–785</pages>
       <abstract>This paper introduces the LetsRead Corpus of European Portuguese read speech from 6 to 10 years old children. The motivation for the creation of this corpus stems from the inexistence of databases with recordings of reading tasks of Portuguese children with different performance levels and including all the common reading aloud disfluencies. It is also essential to develop techniques to fulfill the main objective of the LetsRead project: to automatically evaluate the reading performance of children through the analysis of reading tasks. The collected data amounts to 20 hours of speech from 284 children from private and public Portuguese schools, with each child carrying out two tasks: reading sentences and reading a list of pseudowords, both with varying levels of difficulty throughout the school grades. In this paper, the design of the reading tasks presented to children is described, as well as the collection procedure. Manually annotated data is analyzed according to disfluencies and reading performance. The considered word difficulty parameter is also confirmed to be suitable for the pseudoword reading tasks.</abstract>
       <url hash="07f042a3">L16-1125</url>
@@ -1348,7 +1348,7 @@
     </paper>
     <paper id="126">
       <title>The <fixed-case>BAS</fixed-case> Speech Data Repository</title>
-      <author><first>Uwe</first><last>Reichel</last></author>
+      <author id="uwe-reichel"><first>Uwe</first><last>Reichel</last></author>
       <author><first>Florian</first><last>Schiel</last></author>
       <author><first>Thomas</first><last>Kisler</last></author>
       <author><first>Christoph</first><last>Draxler</last></author>
@@ -1363,8 +1363,8 @@
       <author><first>Emre</first><last>Yilmaz</last></author>
       <author><first>Mario</first><last>Ganzeboom</last></author>
       <author><first>Lilian</first><last>Beijer</last></author>
-      <author><first>Catia</first><last>Cucchiarini</last></author>
-      <author><first>Helmer</first><last>Strik</last></author>
+      <author id="catia-cucchiarini"><first>Catia</first><last>Cucchiarini</last></author>
+      <author id="helmer-strik"><first>Helmer</first><last>Strik</last></author>
       <pages>792–795</pages>
       <abstract>We present a new Dutch dysarthric speech database containing utterances of neurological patients with Parkinson’s disease, traumatic brain injury and cerebrovascular accident. The speech content is phonetically and linguistically diversified by using numerous structured sentence and word lists. Containing more than 6 hours of mildly to moderately dysarthric speech, this database can be used for research on dysarthria and for developing and testing speech-to-text systems designed for medical applications. Current activities aimed at extending this database are also discussed.</abstract>
       <url hash="b432483a">L16-1127</url>
@@ -1401,9 +1401,9 @@
     </paper>
     <paper id="131">
       <title>The <fixed-case>O</fixed-case>n<fixed-case>F</fixed-case>orum<fixed-case>S</fixed-case> corpus from the Shared Task on Online Forum Summarisation at <fixed-case>M</fixed-case>ulti<fixed-case>L</fixed-case>ing 2015</title>
-      <author><first>Mijail</first><last>Kabadjov</last></author>
-      <author><first>Udo</first><last>Kruschwitz</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="mijail-kabadjov"><first>Mijail</first><last>Kabadjov</last></author>
+      <author id="udo-kruschwitz"><first>Udo</first><last>Kruschwitz</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <author><first>Josef</first><last>Steinberger</last></author>
       <author><first>Jorge</first><last>Valderrama</last></author>
       <author><first>Hugo</first><last>Zaragoza</last></author>
@@ -1414,8 +1414,8 @@
     </paper>
     <paper id="132">
       <title>Automatic Enrichment of <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et with Common-Sense Knowledge</title>
-      <author><first>Luigi</first><last>Di Caro</last></author>
-      <author><first>Guido</first><last>Boella</last></author>
+      <author id="luigi-di-caro"><first>Luigi</first><last>Di Caro</last></author>
+      <author id="guido-boella"><first>Guido</first><last>Boella</last></author>
       <pages>819–822</pages>
       <abstract>WordNet represents a cornerstone in the Computational Linguistics field, linking words to meanings (or senses) through a taxonomical representation of synsets, i.e., clusters of words with an equivalent meaning in a specific context often described by few definitions (or glosses) and examples. Most of the approaches to the Word Sense Disambiguation task fully rely on these short texts as a source of contextual information to match with the input text to disambiguate. This paper presents the first attempt to enrich synsets data with common-sense definitions, automatically retrieved from ConceptNet 5, and disambiguated accordingly to WordNet. The aim was to exploit the shared- and immediate-thinking nature of common-sense knowledge to extend the short but incredibly useful contextual information of the synsets. A manual evaluation on a subset of the entire result (which counts a total of almost 600K synset enrichments) shows a very high precision with an estimated good recall.</abstract>
       <url hash="c90268d4">L16-1132</url>
@@ -1461,13 +1461,13 @@
     </paper>
     <paper id="136">
       <title>The <fixed-case>S</fixed-case>em<fixed-case>D</fixed-case>a<fixed-case>X</fixed-case> Corpus ― Sense Annotations with Scalable Sense Inventories</title>
-      <author><first>Bolette</first><last>Pedersen</last></author>
+      <author id="bolette-sandford-pedersen"><first>Bolette</first><last>Pedersen</last></author>
       <author><first>Anna</first><last>Braasch</last></author>
-      <author><first>Anders</first><last>Johannsen</last></author>
-      <author><first>Héctor Martínez</first><last>Alonso</last></author>
+      <author id="anders-johannsen"><first>Anders</first><last>Johannsen</last></author>
+      <author id="hector-martinez-alonso"><first>Héctor Martínez</first><last>Alonso</last></author>
       <author><first>Sanni</first><last>Nimb</last></author>
       <author><first>Sussi</first><last>Olsen</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <author><first>Nicolai Hartvig</first><last>Sørensen</last></author>
       <pages>842–847</pages>
       <abstract>We launch the SemDaX corpus which is a recently completed Danish human-annotated corpus available through a CLARIN academic license. The corpus includes approx. 90,000 words, comprises six textual domains, and is annotated with sense inventories of different granularity. The aim of the developed corpus is twofold: i) to assess the reliability of the different sense annotation schemes for Danish measured by qualitative analyses and annotation agreement scores, and ii) to serve as training and test data for machine learning algorithms with the practical purpose of developing sense taggers for Danish. To these aims, we take a new approach to human-annotated corpus resources by double annotating a much larger part of the corpus than what is normally seen: for the all-words task we double annotated 60% of the material and for the lexical sample task 100%. We include in the corpus not only the adjucated files, but also the diverging annotations. In other words, we consider not all disagreement to be noise, but rather to contain valuable linguistic information that can help us improve our annotation schemes and our learning algorithms.</abstract>
@@ -1489,7 +1489,7 @@
       <title>Multi-prototype <fixed-case>C</fixed-case>hinese Character Embedding</title>
       <author><first>Yanan</first><last>Lu</last></author>
       <author><first>Yue</first><last>Zhang</last></author>
-      <author><first>Donghong</first><last>Ji</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
       <pages>855–859</pages>
       <abstract>Chinese sentences are written as sequences of characters, which are elementary units of syntax and semantics. Characters are highly polysemous in forming words. We present a position-sensitive skip-gram model to learn multi-prototype Chinese character embeddings, and explore the usefulness of such character embeddings to Chinese NLP tasks. Evaluation on character similarity shows that multi-prototype embeddings are significantly better than a single-prototype baseline. In addition, used as features in the Chinese NER task, the embeddings result in a 1.74% F-score improvement over a state-of-the-art baseline.</abstract>
       <url hash="c3612940">L16-1138</url>
@@ -1497,10 +1497,10 @@
     </paper>
     <paper id="139">
       <title>A comparison of Named-Entity Disambiguation and Word Sense Disambiguation</title>
-      <author><first>Angel</first><last>Chang</last></author>
-      <author><first>Valentin I.</first><last>Spitkovsky</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="angel-chang"><first>Angel</first><last>Chang</last></author>
+      <author id="valentin-i-spitkovsky"><first>Valentin I.</first><last>Spitkovsky</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <pages>860–867</pages>
       <abstract>Named Entity Disambiguation (NED) is the task of linking a named-entity mention to an instance in a knowledge-base, typically Wikipedia-derived resources like DBpedia. This task is closely related to word-sense disambiguation (WSD), where the mention of an open-class word is linked to a concept in a knowledge-base, typically WordNet. This paper analyzes the relation between two annotated datasets on NED and WSD, highlighting the commonalities and differences. We detail the methods to construct a NED system following the WSD word-expert approach, where we need a dictionary and one classifier is built for each target entity mention string. Constructing a dictionary for NED proved challenging, and although similarity and ambiguity are higher for NED, the results are also higher due to the larger number of training data, and the more crisp and skewed meaning differences.</abstract>
       <url hash="3fb6e2ea">L16-1139</url>
@@ -1510,7 +1510,7 @@
       <title>Leveraging <fixed-case>RDF</fixed-case> Graphs for Crossing Multiple Bilingual Dictionaries</title>
       <author><first>Marta</first><last>Villegas</last></author>
       <author><first>Maite</first><last>Melero</last></author>
-      <author><first>Núria</first><last>Bel</last></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last></author>
       <author><first>Jorge</first><last>Gracia</last></author>
       <pages>868–876</pages>
       <abstract>The experiments presented here exploit the properties of the Apertium RDF Graph, principally cycle density and nodes’ degree, to automatically generate new translation relations between words, and therefore to enrich existing bilingual dictionaries with new entries. Currently, the Apertium RDF Graph includes data from 22 Apertium bilingual dictionaries and constitutes a large unified array of linked lexical entries and translations that are available and accessible on the Web (<url>http://linguistic.linkeddata.es/apertium/</url>). In particular, its graph structure allows for interesting exploitation opportunities, some of which are addressed in this paper. Two ‘massive’ experiments are reported: in the first one, the original EN-ES translation set was removed from the Apertium RDF Graph and a new EN-ES version was generated. The results were compared against the previously removed EN-ES data and against the Concise Oxford Spanish Dictionary. In the second experiment, a new non-existent EN-FR translation set was generated. In this case the results were compared against a converted wiktionary English-French file. The results we got are really good and perform well for the extreme case of correlated polysemy. This lead us to address the possibility to use cycles and nodes degree to identify potential oddities in the source data. If cycle density proves efficient when considering potential targets, we can assume that in dense graphs nodes with low degree may indicate potential errors.</abstract>
@@ -1533,7 +1533,7 @@
       <author><first>Fabricio</first><last>Chalub</last></author>
       <author><first>Livy</first><last>Real</last></author>
       <author><first>Alexandre</first><last>Rademaker</last></author>
-      <author><first>Valeria</first><last>de Paiva</last></author>
+      <author id="valeria-de-paiva"><first>Valeria</first><last>de Paiva</last></author>
       <pages>885–891</pages>
       <abstract>This paper describes work on incorporating Princenton’s WordNet morphosemantics links to the fabric of the Portuguese OpenWordNet-PT. Morphosemantic links are relations between verbs and derivationally related nouns that are semantically typed (such as for tune-tuner ― in Portuguese “afinar-afinador” – linked through an “agent” link). Morphosemantic links have been discussed for Princeton’s WordNet for a while, but have not been added to the official database. These links are very useful, they help us to improve our Portuguese WordNet. Thus we discuss the integration of these links in our base and the issues we encountered with the integration.</abstract>
       <url hash="b1bd7a75">L16-1142</url>
@@ -1563,14 +1563,14 @@
     <paper id="145">
       <title>Large Multi-lingual, Multi-level and Multi-genre Annotation Corpus</title>
       <author><first>Xuansong</first><last>Li</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Nianwen</first><last>Xue</last></author>
-      <author><first>Lance</first><last>Ramshaw</last></author>
-      <author><first>Mohamed</first><last>Maamouri</last></author>
+      <author id="lance-ramshaw"><first>Lance</first><last>Ramshaw</last></author>
+      <author id="mohamed-maamouri"><first>Mohamed</first><last>Maamouri</last></author>
       <author><first>Ann</first><last>Bies</last></author>
       <author><first>Kathryn</first><last>Conger</last></author>
       <author><first>Stephen</first><last>Grimes</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <pages>906–913</pages>
       <abstract>High accuracy for automated translation and information retrieval calls for linguistic annotations at various language levels. The plethora of informal internet content sparked the demand for porting state-of-art natural language processing (NLP) applications to new social media as well as diverse language adaptation. Effort launched by the BOLT (Broad Operational Language Translation) program at DARPA (Defense Advanced Research Projects Agency) successfully addressed the internet information with enhanced NLP systems. BOLT aims for automated translation and linguistic analysis for informal genres of text and speech in online and in-person communication. As a part of this program, the Linguistic Data Consortium (LDC) developed valuable linguistic resources in support of the training and evaluation of such new technologies. This paper focuses on methodologies, infrastructure, and procedure for developing linguistic annotation at various language levels, including Treebank (TB), word alignment (WA), PropBank (PB), and co-reference (CoRef). Inspired by the OntoNotes approach with adaptations to the tasks to reflect the goals and scope of the BOLT project, this effort has introduced more annotation types of informal and free-style genres in English, Chinese and Egyptian Arabic. The corpus produced is by far the largest multi-lingual, multi-level and multi-genre annotation corpus of informal text and speech.</abstract>
       <url hash="aaa92e5a">L16-1145</url>
@@ -1589,7 +1589,7 @@
     <paper id="147">
       <title><fixed-case>O</fixed-case>pen<fixed-case>S</fixed-case>ubtitles2016: Extracting Large Parallel Corpora from Movie and <fixed-case>TV</fixed-case> Subtitles</title>
       <author><first>Pierre</first><last>Lison</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>923–929</pages>
       <abstract>We present a new major release of the OpenSubtitles collection of parallel corpora. The release is compiled from a large database of movie and TV subtitles and includes a total of 1689 bitexts spanning 2.6 billion sentences across 60 languages. The release also incorporates a number of enhancements in the preprocessing and alignment of the subtitles, such as the automatic correction of OCR errors and the use of meta-data to estimate the quality of each subtitle and score subtitle pairs.</abstract>
       <url hash="48b8cf33">L16-1147</url>
@@ -1598,8 +1598,8 @@
     <paper id="148">
       <title><fixed-case>L</fixed-case>ex<fixed-case>F</fixed-case>r: Adapting the <fixed-case>L</fixed-case>ex<fixed-case>I</fixed-case>t Framework to Build a Corpus-based <fixed-case>F</fixed-case>rench Subcategorization Lexicon</title>
       <author><first>Giulia</first><last>Rambelli</last></author>
-      <author><first>Gianluca</first><last>Lebani</last></author>
-      <author><first>Laurent</first><last>Prévot</last></author>
+      <author id="gianluca-e-lebani"><first>Gianluca</first><last>Lebani</last></author>
+      <author id="laurent-prevot"><first>Laurent</first><last>Prévot</last></author>
       <author><first>Alessandro</first><last>Lenci</last></author>
       <pages>930–937</pages>
       <abstract>This paper introduces LexFr, a corpus-based French lexical resource built by adapting the framework LexIt, originally developed to describe the combinatorial potential of Italian predicates. As in the original framework, the behavior of a group of target predicates is characterized by a series of syntactic (i.e., subcategorization frames) and semantic (i.e., selectional preferences) statistical information (a.k.a. distributional profiles) whose extraction process is mostly unsupervised. The first release of LexFr includes information for 2,493 verbs, 7,939 nouns and 2,628 adjectives. In these pages we describe the adaptation process and evaluated the final resource by comparing the information collected for 20 test verbs against the information available in a gold standard dictionary. In the best performing setting, we obtained 0.74 precision, 0.66 recall and 0.70 F-measure.</abstract>
@@ -1609,7 +1609,7 @@
     <paper id="149">
       <title>Polarity Lexicon Building: to what Extent Is the Manual Effort Worth?</title>
       <author><first>Iñaki San</first><last>Vicente</last></author>
-      <author><first>Xabier</first><last>Saralegi</last></author>
+      <author id="xabier-saralegi"><first>Xabier</first><last>Saralegi</last></author>
       <pages>938–942</pages>
       <abstract>Polarity lexicons are a basic resource for analyzing the sentiments and opinions expressed in texts in an automated way. This paper explores three methods to construct polarity lexicons: translating existing lexicons from other languages, extracting polarity lexicons from corpora, and annotating sentiments Lexical Knowledge Bases. Each of these methods require a different degree of human effort. We evaluate how much manual effort is needed and to what extent that effort pays in terms of performance improvement. Experiment setup includes generating lexicons for Basque, and evaluating them against gold standard datasets in different domains. Results show that extracting polarity lexicons from corpora is the best solution for achieving a good performance with reasonable human effort.</abstract>
       <url hash="00366054">L16-1149</url>
@@ -1620,7 +1620,7 @@
       <author><first>Ouafae</first><last>Nahli</last></author>
       <author><first>Francesca</first><last>Frontini</last></author>
       <author><first>Monica</first><last>Monachini</last></author>
-      <author><first>Fahad</first><last>Khan</last></author>
+      <author id="fahad-khan"><first>Fahad</first><last>Khan</last></author>
       <author><first>Arsalan</first><last>Zarghili</last></author>
       <author><first>Mustapha</first><last>Khalfi</last></author>
       <pages>943–950</pages>
@@ -1678,7 +1678,7 @@
     </paper>
     <paper id="156">
       <title>Argument Mining: the Bottleneck of Knowledge and Language Resources</title>
-      <author><first>Patrick</first><last>Saint-Dizier</last></author>
+      <author id="patrick-saint-dizier"><first>Patrick</first><last>Saint-Dizier</last></author>
       <pages>983–990</pages>
       <abstract>Given a controversial issue, argument mining from natural language texts (news papers, and any form of text on the Internet) is extremely challenging: domain knowledge is often required together with appropriate forms of inferences to identify arguments. This contribution explores the types of knowledge that are required and how they can be paired with reasoning schemes, language processing and language resources to accurately mine arguments. We show via corpus analysis that the Generative Lexicon, enhanced in different manners and viewed as both a lexicon and a domain knowledge representation, is a relevant approach. In this paper, corpus annotation for argument mining is first developed, then we show how the generative lexicon approach must be adapted and how it can be paired with language processing patterns to extract and specify the nature of arguments. Our approach to argument mining is thus knowledge driven.</abstract>
       <url hash="8c737db8">L16-1156</url>
@@ -1687,7 +1687,7 @@
     <paper id="157">
       <title>From Interoperable Annotations towards Interoperable Resources: A Multilingual Approach to the Analysis of Discourse</title>
       <author><first>Ekaterina</first><last>Lapshinova-Koltunski</last></author>
-      <author><first>Kerstin Anna</first><last>Kunz</last></author>
+      <author id="kerstin-kunz"><first>Kerstin Anna</first><last>Kunz</last></author>
       <author><first>Anna</first><last>Nedoluzhko</last></author>
       <pages>991–997</pages>
       <abstract>In the present paper, we analyse variation of discourse phenomena in two typologically different languages, i.e. in German and Czech. The novelty of our approach lies in the nature of the resources we are using. Advantage is taken of existing resources, which are, however, annotated on the basis of two different frameworks. We use an interoperable scheme unifying discourse phenomena in both frameworks into more abstract categories and considering only those phenomena that have a direct match in German and Czech. The discourse properties we focus on are relations of identity, semantic similarity, ellipsis and discourse relations. Our study shows that the application of interoperable schemes allows an exploitation of discourse-related phenomena analysed in different projects and on the basis of different frameworks. As corpus compilation and annotation is a time-consuming task, positive results of this experiment open up new paths for contrastive linguistics, translation studies and NLP, including machine translation.</abstract>
@@ -1696,7 +1696,7 @@
     </paper>
     <paper id="158">
       <title>Falling silent, lost for words ... Tracing personal involvement in interviews with <fixed-case>D</fixed-case>utch war veterans</title>
-      <author><first>Henk</first><last>van den Heuvel</last></author>
+      <author id="henk-van-den-heuvel"><first>Henk</first><last>van den Heuvel</last></author>
       <author><first>Nelleke</first><last>Oostdijk</last></author>
       <pages>998–1001</pages>
       <abstract>In sources used in oral history research (such as interviews with eye witnesses), passages where the degree of personal emotional involvement is found to be high can be of particular interest, as these may give insight into how historical events were experienced, and what moral dilemmas and psychological or religious struggles were encountered. In a pilot study involving a large corpus of interview recordings with Dutch war veterans, we have investigated if it is possible to develop a method for automatically identifying those passages where the degree of personal emotional involvement is high. The method is based on the automatic detection of exceptionally large silences and filled pause segments (using Automatic Speech Recognition), and cues taken from specific n-grams. The first results appear to be encouraging enough for further elaboration of the method.</abstract>
@@ -1705,9 +1705,9 @@
     </paper>
     <paper id="159">
       <title>A Bilingual Discourse Corpus and Its Applications</title>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <author><first>Jiajun</first><last>Zhang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <author><first>Yating</first><last>Yang</last></author>
       <author><first>Xi</first><last>Zhou</last></author>
       <pages>1002–1007</pages>
@@ -1727,7 +1727,7 @@
     <paper id="161">
       <title>Corpus Resources for Dispute Mediation Discourse</title>
       <author><first>Mathilde</first><last>Janier</last></author>
-      <author><first>Chris</first><last>Reed</last></author>
+      <author id="chris-reed"><first>Chris</first><last>Reed</last></author>
       <pages>1014–1021</pages>
       <abstract>Dispute mediation is a growing activity in the resolution of conflicts, and more and more research emerge to enhance and better understand this (until recently) understudied practice. Corpus analyses are necessary to study discourse in this context; yet, little data is available, mainly because of its confidentiality principle. After proposing hints and avenues to acquire transcripts of mediation sessions, this paper presents the Dispute Mediation Corpus, which gathers annotated excerpts of mediation dialogues. Although developed as part of a project on argumentation, it is freely available and the text data can be used by anyone. This first-ever open corpus of mediation interactions can be of interest to scholars studying discourse, but also conflict resolution, argumentation, linguistics, communication, etc. We advocate for using and extending this resource that may be valuable to a large variety of domains of research, particularly those striving to enhance the study of the rapidly growing activity of dispute mediation.</abstract>
       <url hash="8aed9daf">L16-1161</url>
@@ -1745,10 +1745,10 @@
     </paper>
     <paper id="163">
       <title><fixed-case>P</fixed-case>ersona<fixed-case>B</fixed-case>ank: A Corpus of Personal Narratives and Their Story Intention Graphs</title>
-      <author><first>Stephanie</first><last>Lukin</last></author>
+      <author id="stephanie-lukin"><first>Stephanie</first><last>Lukin</last></author>
       <author><first>Kevin</first><last>Bowden</last></author>
       <author><first>Casey</first><last>Barackman</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <pages>1026–1033</pages>
       <abstract>We present a new corpus, PersonaBank, consisting of 108 personal stories from weblogs that have been annotated with their Story Intention Graphs, a deep representation of the content of a story. We describe the topics of the stories and the basis of the Story Intention Graph representation, as well as the process of annotating the stories to produce the Story Intention Graphs and the challenges of adapting the tool to this new personal narrative domain. We also discuss how the corpus can be used in applications that retell the story using different styles of tellings, co-tellings, or as a content planner.</abstract>
       <url hash="ca008bf5">L16-1163</url>
@@ -1779,9 +1779,9 @@
       <title>Enhancing The <fixed-case>RATP</fixed-case>-<fixed-case>DECODA</fixed-case> Corpus With Linguistic Annotations For Performing A Large Range Of <fixed-case>NLP</fixed-case> Tasks</title>
       <author><first>Carole</first><last>Lailler</last></author>
       <author><first>Anaïs</first><last>Landeau</last></author>
-      <author><first>Frédéric</first><last>Béchet</last></author>
+      <author id="frederic-bechet"><first>Frédéric</first><last>Béchet</last></author>
       <author><first>Yannick</first><last>Estève</last></author>
-      <author><first>Paul</first><last>Deléglise</last></author>
+      <author id="paul-deleglise"><first>Paul</first><last>Deléglise</last></author>
       <pages>1047–1050</pages>
       <abstract>In this article, we present the RATP-DECODA Corpus which is composed by a set of 67 hours of speech from telephone conversations of a Customer Care Service (CCS). This corpus is already available on line at <url>http://sldr.org/sldr000847/fr</url> in its first version. However, many enhancements have been made in order to allow the development of automatic techniques to transcript conversations and to capture their meaning. These enhancements fall into two categories: firstly, we have increased the size of the corpus with manual transcriptions from a new operational day; secondly we have added new linguistic annotations to the whole corpus (either manually or through an automatic processing) in order to perform various linguistic tasks from syntactic and semantic parsing to dialog act tagging and dialog summarization.</abstract>
       <url hash="2a7ca5e0">L16-1166</url>
@@ -1792,7 +1792,7 @@
       <author><first>Manfred</first><last>Stede</last></author>
       <author><first>Stergos</first><last>Afantenos</last></author>
       <author><first>Andreas</first><last>Peldszus</last></author>
-      <author><first>Nicholas</first><last>Asher</last></author>
+      <author id="nicholas-asher"><first>Nicholas</first><last>Asher</last></author>
       <author><first>Jérémy</first><last>Perret</last></author>
       <pages>1051–1058</pages>
       <abstract>We present the first corpus of texts annotated with two alternative approaches to discourse structure, Rhetorical Structure Theory (Mann and Thompson, 1988) and Segmented Discourse Representation Theory (Asher and Lascarides, 2003). 112 short argumentative texts have been analyzed according to these two theories. Furthermore, in previous work, the same texts have already been annotated for their argumentation structure, according to the scheme of Peldszus and Stede (2013). This corpus therefore enables studies of correlations between the two accounts of discourse structure, and between discourse and argumentation. We converted the three annotation formats to a common dependency tree format that enables to compare the structures, and we describe some initial findings.</abstract>
@@ -1801,7 +1801,7 @@
     </paper>
     <paper id="168">
       <title>An Annotated Corpus of Direct Speech</title>
-      <author><first>John</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
       <author><first>Chak Yan</first><last>Yeung</last></author>
       <pages>1059–1063</pages>
       <abstract>We propose a scheme for annotating direct speech in literary texts, based on the Text Encoding Initiative (TEI) and the coreference annotation guidelines from the Message Understanding Conference (MUC). The scheme encodes the speakers and listeners of utterances in a text, as well as the quotative verbs that reports the utterances. We measure inter-annotator agreement on this annotation task. We then present statistics on a manually annotated corpus that consists of books from the New Testament. Finally, we visualize the corpus as a conversational network.</abstract>
@@ -1811,7 +1811,7 @@
     <paper id="169">
       <title>Evaluating the Noisy Channel Model for the Normalization of Historical Texts: <fixed-case>B</fixed-case>asque, <fixed-case>S</fixed-case>panish and <fixed-case>S</fixed-case>lovene</title>
       <author><first>Izaskun</first><last>Etxeberria</last></author>
-      <author><first>Iñaki</first><last>Alegria</last></author>
+      <author id="inaki-alegria"><first>Iñaki</first><last>Alegria</last></author>
       <author><first>Larraitz</first><last>Uria</last></author>
       <author><first>Mans</first><last>Hulden</last></author>
       <pages>1064–1069</pages>
@@ -1830,7 +1830,7 @@
     </paper>
     <paper id="171">
       <title>A Morphological Lexicon of <fixed-case>E</fixed-case>speranto with Morpheme Frequencies</title>
-      <author><first>Eckhard</first><last>Bick</last></author>
+      <author id="eckhard-bick"><first>Eckhard</first><last>Bick</last></author>
       <pages>1075–1078</pages>
       <abstract>This paper discusses the internal structure of complex Esperanto words (CWs). Using a morphological analyzer, possible affixation and compounding is checked for over 50,000 Esperanto lexemes against a list of 17,000 root words. Morpheme boundaries in the resulting analyses were then checked manually, creating a CW dictionary of 28,000 words, representing 56.4% of the lexicon, or 19.4% of corpus tokens. The error percentage of the EspGram morphological analyzer for new corpus CWs was 4.3% for types and 6.4% for tokens, with a recall of almost 100%, and wrong/spurious boundaries being more common than missing ones. For pedagogical purposes a morpheme frequency dictionary was constructed for a 16 million word corpus, confirming the importance of agglutinative derivational morphemes in the Esperanto lexicon. Finally, as a means to reduce the morphological ambiguity of CWs, we provide POS likelihoods for Esperanto suffixes.</abstract>
       <url hash="97033d57">L16-1171</url>
@@ -1848,7 +1848,7 @@
     <paper id="173">
       <title>Giving Lexical Resources a Second Life: Démonette, a Multi-sourced Morpho-semantic Network for <fixed-case>F</fixed-case>rench</title>
       <author><first>Nabil</first><last>Hathout</last></author>
-      <author><first>Fiammetta</first><last>Namer</last></author>
+      <author id="fiammetta-namer"><first>Fiammetta</first><last>Namer</last></author>
       <pages>1084–1091</pages>
       <abstract>Démonette is a derivational morphological network designed for the description of French. Its original architecture enables its use as a formal framework for the description of morphological analyses and as a repository for existing lexicons. It is fed with a variety of resources, which all are already validated. The harmonization of their content into a unified format provides them a second life, in which they are enriched with new properties, provided these are deductible from their contents. Démonette is released under a Creative Commons license. It is usable for theoretical and descriptive research in morphology, as a source of experimental material for psycholinguistics, natural language processing (NLP) and information retrieval (IR), where it fills a gap, since French lacks a large-coverage derivational resources database. The article presents the integration of two existing lexicons into Démonette. The first is Verbaction, a lexicon of deverbal action nouns. The second is Lexeur, a database of agent nouns in -eur derived from verbs or from nouns.</abstract>
       <url hash="6d48dc52">L16-1173</url>
@@ -1883,7 +1883,7 @@
     <paper id="177">
       <title>Encoding Adjective Scales for Fine-grained Resources</title>
       <author><first>Cédric</first><last>Lopez</last></author>
-      <author><first>Frédérique</first><last>Segond</last></author>
+      <author id="frederique-segond"><first>Frédérique</first><last>Segond</last></author>
       <author><first>Christiane</first><last>Fellbaum</last></author>
       <pages>1109–1113</pages>
       <abstract>We propose an automatic approach towards determining the relative location of adjectives on a common scale based on their strength. We focus on adjectives expressing different degrees of goodness occurring in French product (perfumes) reviews. Using morphosyntactic patterns, we extract from the reviews short phrases consisting of a noun that encodes a particular aspect of the perfume and an adjective modifying that noun. We then associate each such n-gram with the corresponding product aspect and its related star rating. Next, based on the star scores, we generate adjective scales reflecting the relative strength of specific adjectives associated with a shared attribute of the product. An automatic ordering of the adjectives “correct” (correct), “sympa” (nice), “bon” (good) and “excellent” (excellent) according to their score in our resource is consistent with an intuitive scale based on human judgments. Our long-term objective is to generate different adjective scales in an empirical manner, which could allow the enrichment of lexical resources.</abstract>
@@ -1916,11 +1916,11 @@
       <title><fixed-case>ANEW</fixed-case>+: Automatic Expansion and Validation of Affective Norms of Words Lexicons in Multiple Languages</title>
       <author><first>Samira</first><last>Shaikh</last></author>
       <author><first>Kit</first><last>Cho</last></author>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
-      <author><first>Laurie</first><last>Feldman</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="laurie-feldman"><first>Laurie</first><last>Feldman</last></author>
       <author><first>John</first><last>Lien</last></author>
       <author><first>Ting</first><last>Liu</last></author>
-      <author><first>George Aaron</first><last>Broadwell</last></author>
+      <author id="george-aaron-broadwell"><first>George Aaron</first><last>Broadwell</last></author>
       <pages>1127–1132</pages>
       <abstract>In this article we describe our method of automatically expanding an existing lexicon of words with affective valence scores. The automatic expansion process was done in English. In addition, we describe our procedure for automatically creating lexicons in languages where such resources may not previously exist. The foreign languages we discuss in this paper are Spanish, Russian and Farsi. We also describe the procedures to systematically validate our newly created resources. The main contributions of this work are: 1) A general method for expansion and creation of lexicons with scores of words on psychological constructs such as valence, arousal or dominance; and 2) a procedure for ensuring validity of the newly constructed resources.</abstract>
       <url hash="b1b7fadb">L16-1180</url>
@@ -1937,7 +1937,7 @@
     <paper id="182">
       <title>Challenges of Evaluating Sentiment Analysis Tools on Social Media</title>
       <author><first>Diana</first><last>Maynard</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
       <pages>1142–1148</pages>
       <abstract>This paper discusses the challenges in carrying out fair comparative evaluations of sentiment analysis systems. Firstly, these are due to differences in corpus annotation guidelines and sentiment class distribution. Secondly, different systems often make different assumptions about how to interpret certain statements, e.g. tweets with URLs. In order to study the impact of these on evaluation results, this paper focuses on tweet sentiment analysis in particular. One existing and two newly created corpora are used, and the performance of four different sentiment analysis systems is reported; we make our annotated datasets and sentiment analysis applications publicly available. We see considerable variations in results across the different corpora, which calls into question the validity of many existing annotated datasets and evaluations, and we make some observations about both the systems and the datasets as a result.</abstract>
       <url hash="f16ad780">L16-1182</url>
@@ -1946,8 +1946,8 @@
     <paper id="183">
       <title><fixed-case>E</fixed-case>mo<fixed-case>T</fixed-case>weet-28: A Fine-Grained Emotion Corpus for Sentiment Analysis</title>
       <author><first>Jasy Suet Yan</first><last>Liew</last></author>
-      <author><first>Howard R.</first><last>Turtle</last></author>
-      <author><first>Elizabeth D.</first><last>Liddy</last></author>
+      <author id="howard-r-turtle"><first>Howard R.</first><last>Turtle</last></author>
+      <author id="elizabeth-d-liddy"><first>Elizabeth D.</first><last>Liddy</last></author>
       <pages>1149–1156</pages>
       <abstract>This paper describes EmoTweet-28, a carefully curated corpus of 15,553 tweets annotated with 28 emotion categories for the purpose of training and evaluating machine learning models for emotion classification. EmoTweet-28 is, to date, the largest tweet corpus annotated with fine-grained emotion categories. The corpus contains annotations for four facets of emotion: valence, arousal, emotion category and emotion cues. We first used small-scale content analysis to inductively identify a set of emotion categories that characterize the emotions expressed in microblog text. We then expanded the size of the corpus using crowdsourcing. The corpus encompasses a variety of examples including explicit and implicit expressions of emotions as well as tweets containing multiple emotions. EmoTweet-28 represents an important resource to advance the development and evaluation of more emotion-sensitive systems.</abstract>
       <url hash="31187401">L16-1183</url>
@@ -1956,7 +1956,7 @@
     <paper id="184">
       <title>Happy Accident: A Sentiment Composition Lexicon for Opposing Polarity Phrases</title>
       <author><first>Svetlana</first><last>Kiritchenko</last></author>
-      <author><first>Saif</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
       <pages>1157–1164</pages>
       <abstract>Sentiment composition is the determining of sentiment of a multi-word linguistic unit, such as a phrase or a sentence, based on its constituents. We focus on sentiment composition in phrases formed by at least one positive and at least one negative word ― phrases like ‘happy accident’ and ‘best winter break’. We refer to such phrases as opposing polarity phrases. We manually annotate a collection of opposing polarity phrases and their constituent single words with real-valued sentiment intensity scores using a method known as Best―Worst Scaling. We show that the obtained annotations are consistent. We explore the entries in the lexicon for linguistic regularities that govern sentiment composition in opposing polarity phrases. Finally, we list the current and possible future applications of the lexicon.</abstract>
       <url hash="73e543a2">L16-1184</url>
@@ -1964,8 +1964,8 @@
     </paper>
     <paper id="185">
       <title>Detecting Implicit Expressions of Affect from Text using Semantic Knowledge on Common Concept Properties</title>
-      <author><first>Alexandra</first><last>Balahur</last></author>
-      <author><first>Hristo</first><last>Tanev</last></author>
+      <author id="alexandra-balahur"><first>Alexandra</first><last>Balahur</last></author>
+      <author id="hristo-tanev"><first>Hristo</first><last>Tanev</last></author>
       <pages>1165–1170</pages>
       <abstract>Emotions are an important part of the human experience. They are responsible for the adaptation and integration in the environment, offering, most of the time together with the cognitive system, the appropriate responses to stimuli in the environment. As such, they are an important component in decision-making processes. In today’s society, the avalanche of stimuli present in the environment (physical or virtual) makes people more prone to respond to stronger affective stimuli (i.e., those that are related to their basic needs and motivations ― survival, food, shelter, etc.). In media reporting, this is translated in the use of arguments (factual data) that are known to trigger specific (strong, affective) behavioural reactions from the readers. This paper describes initial efforts to detect such arguments from text, based on the properties of concepts. The final system able to retrieve and label this type of data from the news in traditional and social platforms is intended to be integrated Europe Media Monitor family of applications to detect texts that trigger certain (especially negative) reactions from the public, with consequences on citizen safety and security.</abstract>
       <url hash="68631c94">L16-1185</url>
@@ -1973,7 +1973,7 @@
     </paper>
     <paper id="186">
       <title>Creating a General <fixed-case>R</fixed-case>ussian Sentiment Lexicon</title>
-      <author><first>Natalia</first><last>Loukachevitch</last></author>
+      <author id="natalia-loukachevitch"><first>Natalia</first><last>Loukachevitch</last></author>
       <author><first>Anatolii</first><last>Levchik</last></author>
       <pages>1171–1176</pages>
       <abstract>The paper describes the new Russian sentiment lexicon - RuSentiLex. The lexicon was gathered from several sources: opinionated words from domain-oriented Russian sentiment vocabularies, slang and curse words extracted from Twitter, objective words with positive or negative connotations from a news collection. The words in the lexicon having different sentiment orientations in specific senses are linked to appropriate concepts of the thesaurus of Russian language RuThes. All lexicon entries are classified according to four sentiment categories and three sources of sentiment (opinion, emotion, or fact). The lexicon can serve as the first version for the construction of domain-specific sentiment lexicons or can be used for feature generation in machine-learning approaches. In this role, the RuSentiLex lexicon was utilized by the participants of the SentiRuEval-2016 Twitter reputation monitoring shared task and allowed them to achieve high results.</abstract>
@@ -2006,7 +2006,7 @@
     </paper>
     <paper id="189">
       <title>Specialising Paragraph Vectors for Text Polarity Detection</title>
-      <author><first>Fabio</first><last>Tamburini</last></author>
+      <author id="fabio-tamburini"><first>Fabio</first><last>Tamburini</last></author>
       <pages>1190–1195</pages>
       <abstract>This paper presents some experiments for specialising Paragraph Vectors, a new technique for creating text fragment (phrase, sentence, paragraph, text, ...) embedding vectors, for text polarity detection. The first extension regards the injection of polarity information extracted from a polarity lexicon into embeddings and the second extension aimed at inserting word order information into Paragraph Vectors. These two extensions, when training a logistic-regression classifier on the combined embeddings, were able to produce a relevant gain in performance when compared to the standard Paragraph Vector methods proposed by Le and Mikolov (2014).</abstract>
       <url hash="74b15d56">L16-1189</url>
@@ -2016,7 +2016,7 @@
       <title>Evaluating Lexical Similarity to build Sentiment Similarity</title>
       <author><first>Grégoire</first><last>Jadi</last></author>
       <author><first>Vincent</first><last>Claveau</last></author>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <author><first>Laura</first><last>Monceaux</last></author>
       <pages>1196–1201</pages>
       <abstract>In this article, we propose to evaluate the lexical similarity information provided by word representations against several opinion resources using traditional Information Retrieval tools. Word representation have been used to build and to extend opinion resources such as lexicon, and ontology and their performance have been evaluated on sentiment analysis tasks. We question this method by measuring the correlation between the sentiment proximity provided by opinion resources and the semantic similarity provided by word representations using different correlation coefficients. We also compare the neighbors found in word representations and list of similar opinion words. Our results show that the proximity of words in state-of-the-art word representations is not very effective to build sentiment similarity.</abstract>
@@ -2029,7 +2029,7 @@
       <author><first>Melanie</first><last>Zaiß</last></author>
       <author><first>Qi</first><last>Han</last></author>
       <author><first>Steffen</first><last>Koch</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>1202–1206</pages>
       <abstract>Vector space models and distributional information are widely used in NLP. The models typically rely on complex, high-dimensional objects. We present an interactive visualisation tool to explore salient lexical-semantic features of high-dimensional word objects and word similarities. Most visualisation tools provide only one low-dimensional map of the underlying data, so they are not capable of retaining the local and the global structure. We overcome this limitation by providing an additional trust-view to obtain a more realistic picture of the actual object distances. Additional tool options include the reference to a gold standard classification, the reference to a cluster analysis as well as listing the most salient (common) features for a selected subset of the words.</abstract>
       <url hash="2d1a4534">L16-1191</url>
@@ -2039,7 +2039,7 @@
       <title><fixed-case>S</fixed-case>em<fixed-case>A</fixed-case>ligner: A Method and Tool for Aligning Chunks with Semantic Relation Types and Semantic Similarity Scores</title>
       <author><first>Nabin</first><last>Maharjan</last></author>
       <author><first>Rajendra</first><last>Banjade</last></author>
-      <author><first>Nobal Bikram</first><last>Niraula</last></author>
+      <author id="nobal-bikram-niraula"><first>Nobal Bikram</first><last>Niraula</last></author>
       <author><first>Vasile</first><last>Rus</last></author>
       <pages>1207–1211</pages>
       <abstract>This paper introduces a ruled-based method and software tool, called SemAligner, for aligning chunks across texts in a given pair of short English texts. The tool, based on the top performing method at the Interpretable Short Text Similarity shared task at SemEval 2015, where it was used with human annotated (gold) chunks, can now additionally process plain text-pairs using two powerful chunkers we developed, e.g. using Conditional Random Fields. Besides aligning chunks, the tool automatically assigns semantic relations to the aligned chunks (such as EQUI for equivalent and OPPO for opposite) and semantic similarity scores that measure the strength of the semantic relation between the aligned chunks. Experiments show that SemAligner performs competitively for system generated chunks and that these results are also comparable to results obtained on gold chunks. SemAligner has other capabilities such as handling various input formats and chunkers as well as extending lookup resources.</abstract>
@@ -2057,7 +2057,7 @@
     </paper>
     <paper id="194">
       <title>mwetoolkit+sem: Integrating Word Embeddings in the mwetoolkit for Semantic <fixed-case>MWE</fixed-case> Processing</title>
-      <author><first>Silvio</first><last>Cordeiro</last></author>
+      <author id="silvio-cordeiro"><first>Silvio</first><last>Cordeiro</last></author>
       <author><first>Carlos</first><last>Ramisch</last></author>
       <author><first>Aline</first><last>Villavicencio</last></author>
       <pages>1221–1225</pages>
@@ -2077,7 +2077,7 @@
     </paper>
     <paper id="196">
       <title>Extending Monolingual Semantic Textual Similarity Task to Multiple Cross-lingual Settings</title>
-      <author><first>Yoshihiko</first><last>Hayashi</last></author>
+      <author id="yoshihiko-hayashi"><first>Yoshihiko</first><last>Hayashi</last></author>
       <author><first>Wentao</first><last>Luo</last></author>
       <pages>1233–1239</pages>
       <abstract>This paper describes our independent effort for extending the monolingual semantic textual similarity (STS) task setting to multiple cross-lingual settings involving English, Japanese, and Chinese. So far, we have adopted a “monolingual similarity after translation” strategy to predict the semantic similarity between a pair of sentences in different languages. With this strategy, a monolingual similarity method is applied after having (one of) the target sentences translated into a pivot language. Therefore, this paper specifically details the required and developed resources to implement this framework, while presenting our current results for English-Japanese-Chinese cross-lingual STS tasks that may exemplify the validity of the framework.</abstract>
@@ -2088,7 +2088,7 @@
       <title>Resources for building applications with Dependency <fixed-case>M</fixed-case>inimal <fixed-case>R</fixed-case>ecursion <fixed-case>S</fixed-case>emantics</title>
       <author><first>Ann</first><last>Copestake</last></author>
       <author><first>Guy</first><last>Emerson</last></author>
-      <author><first>Michael Wayne</first><last>Goodman</last></author>
+      <author id="michael-wayne-goodman"><first>Michael Wayne</first><last>Goodman</last></author>
       <author><first>Matic</first><last>Horvat</last></author>
       <author><first>Alexander</first><last>Kuhnle</last></author>
       <author><first>Ewa</first><last>Muszyńska</last></author>
@@ -2139,7 +2139,7 @@
     <paper id="202">
       <title>Collecting Language Resources for the <fixed-case>L</fixed-case>atvian e-Government Machine Translation Platform</title>
       <author><first>Roberts</first><last>Rozis</last></author>
-      <author><first>Andrejs</first><last>Vasiļjevs</last></author>
+      <author id="andrejs-vasiljevs"><first>Andrejs</first><last>Vasiļjevs</last></author>
       <author><first>Raivis</first><last>Skadiņš</last></author>
       <pages>1270–1276</pages>
       <abstract>This paper describes corpora collection activity for building large machine translation systems for Latvian e-Government platform. We describe requirements for corpora, selection and assessment of data sources, collection of the public corpora and creation of new corpora from miscellaneous sources. Methodology, tools and assessment methods are also presented along with the results achieved, challenges faced and conclusions made. Several approaches to address the data scarceness are discussed. We summarize the volume of obtained corpora and provide quality metrics of MT systems trained on this data. Resulting MT systems for English-Latvian, Latvian English and Latvian Russian are integrated in the Latvian e-service portal and are freely available on website HUGO.LV. This paper can serve as a guidance for similar activities initiated in other countries, particularly in the context of European Language Resource Coordination action.</abstract>
@@ -2148,12 +2148,12 @@
     </paper>
     <paper id="203">
       <title><fixed-case>N</fixed-case>ederlab: Towards a Single Portal and Research Environment for Diachronic <fixed-case>D</fixed-case>utch Text Corpora</title>
-      <author><first>Hennie</first><last>Brugman</last></author>
+      <author id="hennie-brugman"><first>Hennie</first><last>Brugman</last></author>
       <author><first>Martin</first><last>Reynaert</last></author>
       <author><first>Nicoline</first><last>van der Sijs</last></author>
       <author><first>René</first><last>van Stipriaan</last></author>
-      <author><first>Erik</first><last>Tjong Kim Sang</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="erik-tjong-kim-sang"><first>Erik</first><last>Tjong Kim Sang</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <pages>1277–1281</pages>
       <abstract>The Nederlab project aims to bring together all digitized texts relevant to the Dutch national heritage, the history of the Dutch language and culture (circa 800 – present) in one user friendly and tool enriched open access web interface. This paper describes Nederlab halfway through the project period and discusses the collections incorporated, back-office processes, system back-end as well as the Nederlab Research Portal end-user web application.</abstract>
       <url hash="4ad5ca0a">L16-1203</url>
@@ -2184,7 +2184,7 @@
       <author><first>Diyi</first><last>Yang</last></author>
       <author><first>Aaron</first><last>Halfaker</last></author>
       <author><first>Robert</first><last>Kraut</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>1295–1299</pages>
       <abstract>In this work, we introduced a corpus for categorizing edit types in Wikipedia. This fine-grained taxonomy of edit types enables us to differentiate editing actions and find editor roles in Wikipedia based on their low-level edit types. To do this, we first created an annotated corpus based on 1,996 edits obtained from 953 article revisions and built machine-learning models to automatically identify the edit categories associated with edits. Building on this automated measurement of edit types, we then applied a graphical model analogous to Latent Dirichlet Allocation to uncover the latent roles in editors’ edit histories. Applying this technique revealed eight different roles editors play, such as Social Networker, Substantive Expert, etc.</abstract>
       <url hash="7813aec2">L16-1206</url>
@@ -2196,7 +2196,7 @@
       <author><first>Aidan</first><last>Kaplan</last></author>
       <author><first>Ramy</first><last>Eskander</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>1300–1306</pages>
       <abstract>We present new language resources for Moroccan and Sanaani Yemeni Arabic. The resources include corpora for each dialect which have been morphologically annotated, and morphological analyzers for each dialect which are derived from these corpora. These are the first sets of resources for Moroccan and Yemeni Arabic. The resources will be made available to the public.</abstract>
       <url hash="e485eecd">L16-1207</url>
@@ -2204,7 +2204,7 @@
     </paper>
     <paper id="208">
       <title>Merging Data Resources for Inflectional and Derivational Morphology in <fixed-case>C</fixed-case>zech</title>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
       <author><first>Magda</first><last>Ševčíková</last></author>
       <author><first>Milan</first><last>Straka</last></author>
       <author><first>Jonáš</first><last>Vidra</last></author>
@@ -2229,7 +2229,7 @@
       <author><first>Eleanor</first><last>Chodroff</last></author>
       <author><first>Matthew</first><last>Maciejewski</last></author>
       <author><first>Jan</first><last>Trmal</last></author>
-      <author><first>Sanjeev</first><last>Khudanpur</last></author>
+      <author id="sanjeev-khudanpur"><first>Sanjeev</first><last>Khudanpur</last></author>
       <author><first>John</first><last>Godfrey</last></author>
       <pages>1323–1327</pages>
       <abstract>The Mixer series of speech corpora were collected over several years, principally to support annual NIST evaluations of speaker recognition (SR) technologies. These evaluations focused on conversational speech over a variety of channels and recording conditions. One of the series, Mixer-6, added a new condition, read speech, to support basic scientific research on speaker characteristics, as well as technology evaluation. With read speech it is possible to make relatively precise measurements of phonetic events and features, which can be correlated with the performance of speaker recognition algorithms, or directly used in phonetic analysis of speaker variability. The read speech, as originally recorded, was adequate for large-scale evaluations (e.g., fixed-text speaker ID algorithms) but only marginally suitable for acoustic-phonetic studies. Numerous errors due largely to speaker behavior remained in the corpus, with no record of their locations or rate of occurrence. We undertook the effort to correct this situation with automatic methods supplemented by human listening and annotation. The present paper describes the tools and methods, resulting corrections, and some examples of the kinds of research studies enabled by these enhancements.</abstract>
@@ -2242,9 +2242,9 @@
       <author><first>Florian</first><last>Hönig</last></author>
       <author><first>Yue</first><last>Zhang</last></author>
       <author><first>Simone</first><last>Hantke</last></author>
-      <author><first>Anton</first><last>Batliner</last></author>
-      <author><first>Elmar</first><last>Nöth</last></author>
-      <author><first>Björn</first><last>Schuller</last></author>
+      <author id="anton-batliner"><first>Anton</first><last>Batliner</last></author>
+      <author id="elmar-noth"><first>Elmar</first><last>Nöth</last></author>
+      <author id="bjorn-schuller"><first>Björn</first><last>Schuller</last></author>
       <pages>1328–1332</pages>
       <abstract>In this paper, we describe a new database with audio recordings of non-native (L2) speakers of English, and the perceptual evaluation experiment conducted with native English speakers for assessing the prosody of each recording. These annotations are then used to compute the gold standard using different methods, and a series of regression experiments is conducted to evaluate their impact on the performance of a regression model predicting the degree of naturalness of L2 speech. Further, we compare the relevance of different feature groups modelling prosody in general (without speech tempo), speech rate and pauses modelling speech tempo (fluency), voice quality, and a variety of spectral features. We also discuss the impact of various fusion strategies on performance. Overall, our results demonstrate that the prosody of non-native speakers of English as L2 can be reliably assessed using supra-segmental audio features; prosodic features seem to be the most important ones.</abstract>
       <url hash="2a73d5c9">L16-1211</url>
@@ -2261,7 +2261,7 @@
       <author><first>Jeanin</first><last>Jügler</last></author>
       <author><first>Yves</first><last>Laprie</last></author>
       <author><first>Odile</first><last>Mella</last></author>
-      <author><first>Bernd</first><last>Möbius</last></author>
+      <author id="bernd-mobius"><first>Bernd</first><last>Möbius</last></author>
       <author><first>Frank</first><last>Zimmerer</last></author>
       <pages>1333–1338</pages>
       <abstract>The IFCASL corpus is a French-German bilingual phonetic learner corpus designed, recorded and annotated in a project on individualized feedback in computer-assisted spoken language learning. The motivation for setting up this corpus was that there is no phonetically annotated and segmented corpus for this language pair of comparable of size and coverage. In contrast to most learner corpora, the IFCASL corpus incorporate data for a language pair in both directions, i.e. in our case French learners of German, and German learners of French. In addition, the corpus is complemented by two sub-corpora of native speech by the same speakers. The corpus provides spoken data by about 100 speakers with comparable productions, annotated and segmented on the word and the phone level, with more than 50% manually corrected data. The paper reports on inter-annotator agreement and the optimization of the acoustic models for forced speech-text alignment in exercises for computer-assisted pronunciation training. Example studies based on the corpus data with a phonetic focus include topics such as the realization of /h/ and glottal stop, final devoicing of obstruents, vowel quantity and quality, pitch range, and tempo.</abstract>
@@ -2270,7 +2270,7 @@
     </paper>
     <paper id="213">
       <title><fixed-case>LELIO</fixed-case>: An Auto-Adaptative System to Acquire Domain Lexical Knowledge in Technical Texts</title>
-      <author><first>Patrick</first><last>Saint-Dizier</last></author>
+      <author id="patrick-saint-dizier"><first>Patrick</first><last>Saint-Dizier</last></author>
       <pages>1339–1345</pages>
       <abstract>In this paper, we investigate some language acquisition facets of an auto-adaptative system that can automatically acquire most of the relevant lexical knowledge and authoring practices for an application in a given domain. This is the LELIO project: producing customized LELIE solutions. Our goal, within the framework of LELIE (a system that tags language uses that do not follow the Constrained Natural Language principles), is to automate the long, costly and error prone lexical customization of LELIE to a given application domain. Technical texts being relatively restricted in terms of syntax and lexicon, results obtained show that this approach is feasible and relatively reliable. By auto-adaptative, we mean that the system learns from a sample of the application corpus the various lexical terms and uses crucial for LELIE to work properly (e.g. verb uses, fuzzy terms, business terms, stylistic patterns). A technical writer validation method is developed at each step of the acquisition.</abstract>
       <url hash="953a9035">L16-1213</url>
@@ -2366,7 +2366,7 @@
     <paper id="222">
       <title>Semi-automatically Alignment of Predicates between Speech and <fixed-case>O</fixed-case>nto<fixed-case>N</fixed-case>otes data</title>
       <author><first>Niraj</first><last>Shrestha</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>1397–1401</pages>
       <abstract>Speech data currently receives a growing attention and is an important source of information. We still lack suitable corpora of transcribed speech annotated with semantic roles that can be used for semantic role labeling (SRL), which is not the case for written data. Semantic role labeling in speech data is a challenging and complex task due to the lack of sentence boundaries and the many transcription errors such as insertion, deletion and misspellings of words. In written data, SRL evaluation is performed at the sentence level, but in speech data sentence boundaries identification is still a bottleneck which makes evaluation more complex. In this work, we semi-automatically align the predicates found in transcribed speech obtained with an automatic speech recognizer (ASR) with the predicates found in the corresponding written documents of the OntoNotes corpus and manually align the semantic roles of these predicates thus obtaining annotated semantic frames in the speech data. This data can serve as gold standard alignments for future research in semantic role labeling of speech data.</abstract>
       <url hash="8f4f8acc">L16-1222</url>
@@ -2410,18 +2410,18 @@
       <author><first>Johann</first><last>Poignant</last></author>
       <author><first>Mateusz</first><last>Budnik</last></author>
       <author><first>Hervé</first><last>Bredin</last></author>
-      <author><first>Claude</first><last>Barras</last></author>
+      <author id="claude-barras"><first>Claude</first><last>Barras</last></author>
       <author><first>Mickael</first><last>Stefas</last></author>
       <author><first>Pierrick</first><last>Bruneau</last></author>
-      <author><first>Gilles</first><last>Adda</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="gilles-adda"><first>Gilles</first><last>Adda</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <author><first>Hazim</first><last>Ekenel</last></author>
       <author><first>Gil</first><last>Francopoulo</last></author>
       <author><first>Javier</first><last>Hernando</last></author>
       <author><first>Joseph</first><last>Mariani</last></author>
       <author><first>Ramon</first><last>Morros</last></author>
       <author><first>Georges</first><last>Quénot</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <author><first>Thomas</first><last>Tamisier</last></author>
       <pages>1421–1425</pages>
       <abstract>In this paper, we describe the organization and the implementation of the CAMOMILE collaborative annotation framework for multimodal, multimedia, multilingual (3M) data. Given the versatile nature of the analysis which can be performed on 3M data, the structure of the server was kept intentionally simple in order to preserve its genericity, relying on standard Web technologies. Layers of annotations, defined as data associated to a media fragment from the corpus, are stored in a database and can be managed through standard interfaces with authentication. Interfaces tailored specifically to the needed task can then be developed in an agile way, relying on simple but reliable services for the management of the centralized annotations. We then present our implementation of an active learning scenario for person annotation in video, relying on the CAMOMILE server; during a dry run experiment, the manual annotation of 716 speech segments was thus propagated to 3504 labeled tracks. The code of the CAMOMILE framework is distributed in open source.</abstract>
@@ -2454,7 +2454,7 @@
     <paper id="229">
       <title>A Web Tool for Building Parallel Corpora of Spoken and Sign Languages</title>
       <author><first>Alex</first><last>Becker</last></author>
-      <author><first>Fabio</first><last>Kepler</last></author>
+      <author id="fabio-kepler"><first>Fabio</first><last>Kepler</last></author>
       <author><first>Sara</first><last>Candeias</last></author>
       <pages>1438–1445</pages>
       <abstract>In this paper we describe our work in building an online tool for manually annotating texts in any spoken language with SignWriting in any sign language. The existence of such tool will allow the creation of parallel corpora between spoken and sign languages that can be used to bootstrap the creation of efficient tools for the Deaf community. As an example, a parallel corpus between English and American Sign Language could be used for training Machine Learning models for automatic translation between the two languages. Clearly, this kind of tool must be designed in a way that it eases the task of human annotators, not only by being easy to use, but also by giving smart suggestions as the annotation progresses, in order to save time and effort. By building a collaborative, online, easy to use annotation tool for building parallel corpora between spoken and sign languages we aim at helping the development of proper resources for sign languages that can then be used in state-of-the-art models currently used in tools for spoken languages. There are several issues and difficulties in creating this kind of resource, and our presented tool already deals with some of them, like adequate text representation of a sign and many to many alignments between words and signs.</abstract>
@@ -2465,7 +2465,7 @@
       <title>Issues and Challenges in Annotating <fixed-case>U</fixed-case>rdu Action Verbs on the <fixed-case>IMAGACT</fixed-case>4<fixed-case>ALL</fixed-case> Platform</title>
       <author><first>Sharmin</first><last>Muzaffar</last></author>
       <author><first>Pitambar</first><last>Behera</last></author>
-      <author><first>Girish</first><last>Jha</last></author>
+      <author id="girish-nath-jha"><first>Girish</first><last>Jha</last></author>
       <pages>1446–1451</pages>
       <abstract>In South-Asian languages such as Hindi and Urdu, action verbs having compound constructions and serial verbs constructions pose serious problems for natural language processing and other linguistic tasks. Urdu is an Indo-Aryan language spoken by 51, 500, 0001 speakers in India. Action verbs that occur spontaneously in day-to-day communication are highly ambiguous in nature semantically and as a consequence cause disambiguation issues that are relevant and applicable to Language Technologies (LT) like Machine Translation (MT) and Natural Language Processing (NLP). IMAGACT4ALL is an ontology-driven web-based platform developed by the University of Florence for storing action verbs and their inter-relations. This group is currently collaborating with Jawaharlal Nehru University (JNU) in India to connect Indian languages on this platform. Action verbs are frequently used in both written and spoken discourses and refer to various meanings because of their polysemic nature. The IMAGACT4ALL platform stores each 3d animation image, each one of them referring to a variety of possible ontological types, which in turn makes the annotation task for the annotator quite challenging with regard to selecting verb argument structure having a range of probability distribution. The authors, in this paper, discuss the issues and challenges such as complex predicates (compound and conjunct verbs), ambiguously animated video illustrations, semantic discrepancies, and the factors of verb-selection preferences that have produced significant problems in annotating Urdu verbs on the IMAGACT ontology.</abstract>
       <url hash="90fe6390">L16-1230</url>
@@ -2495,11 +2495,11 @@
     </paper>
     <paper id="233">
       <title>The Event and Implied Situation Ontology (<fixed-case>ESO</fixed-case>): Application and Evaluation</title>
-      <author><first>Roxane</first><last>Segers</last></author>
+      <author id="roxane-segers"><first>Roxane</first><last>Segers</last></author>
       <author><first>Marco</first><last>Rospocher</last></author>
       <author><first>Piek</first><last>Vossen</last></author>
       <author><first>Egoitz</first><last>Laparra</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <author><first>Anne-Lyse</first><last>Minard</last></author>
       <pages>1463–1470</pages>
       <abstract>This paper presents the Event and Implied Situation Ontology (ESO), a manually constructed resource which formalizes the pre and post situations of events and the roles of the entities affected by an event. The ontology is built on top of existing resources such as WordNet, SUMO and FrameNet. The ontology is injected to the Predicate Matrix, a resource that integrates predicate and role information from amongst others FrameNet, VerbNet, PropBank, NomBank and WordNet. We illustrate how these resources are used on large document collections to detect information that otherwise would have remained implicit. The ontology is evaluated on two aspects: recall and precision based on a manually annotated corpus and secondly, on the quality of the knowledge inferred by the situation assertions in the ontology. Evaluation results on the quality of the system show that 50% of the events typed and enriched with ESO assertions are correct.</abstract>
@@ -2588,7 +2588,7 @@
     <paper id="242">
       <title>Corpus vs. Lexicon Supervision in Morphosyntactic Tagging: the Case of <fixed-case>S</fixed-case>lovene</title>
       <author><first>Nikola</first><last>Ljubešić</last></author>
-      <author><first>Tomaž</first><last>Erjavec</last></author>
+      <author id="tomaz-erjavec"><first>Tomaž</first><last>Erjavec</last></author>
       <pages>1527–1531</pages>
       <abstract>In this paper we present a tagger developed for inflectionally rich languages for which both a training corpus and a lexicon are available. We do not constrain the tagger by the lexicon entries, allowing both for lexicon incompleteness and noisiness. By using the lexicon indirectly through features we allow for known and unknown words to be tagged in the same manner. We test our tagger on Slovene data, obtaining a 25% error reduction of the best previous results both on known and unknown words. Given that Slovene is, in comparison to some other Slavic languages, a well-resourced language, we perform experiments on the impact of token (corpus) vs. type (lexicon) supervision, obtaining useful insights in how to balance the effort of extending resources to yield better tagging results.</abstract>
       <url hash="5a04a3b7">L16-1242</url>
@@ -2596,7 +2596,7 @@
     </paper>
     <paper id="243">
       <title>Challenges and Solutions for Consistent Annotation of <fixed-case>V</fixed-case>ietnamese Treebank</title>
-      <author><first>Quy</first><last>Nguyen</last></author>
+      <author id="quy-nguyen"><first>Quy</first><last>Nguyen</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
       <author><first>Ha</first><last>Le</last></author>
       <author><first>Ngan</first><last>Nguyen</last></author>
@@ -2608,7 +2608,7 @@
     <paper id="244">
       <title>Correcting Errors in a Treebank Based on Tree Mining</title>
       <author><first>Kanta</first><last>Suzuki</last></author>
-      <author><first>Yoshihide</first><last>Kato</last></author>
+      <author id="yoshihide-kato"><first>Yoshihide</first><last>Kato</last></author>
       <author><first>Shigeki</first><last>Matsubara</last></author>
       <pages>1540–1545</pages>
       <abstract>This paper provides a new method to correct annotation errors in a treebank. The previous error correction method constructs a pseudo parallel corpus where incorrect partial parse trees are paired with correct ones, and extracts error correction rules from the parallel corpus. By applying these rules to a treebank, the method corrects errors. However, this method does not achieve wide coverage of error correction. To achieve wide coverage, our method adopts a different approach. In our method, we consider that an infrequent pattern which can be transformed to a frequent one is an annotation error pattern. Based on a tree mining technique, our method seeks such infrequent tree patterns, and constructs error correction rules each of which consists of an infrequent pattern and a corresponding frequent pattern. We conducted an experiment using the Penn Treebank. We obtained 1,987 rules which are not constructed by the previous method, and the rules achieved good precision.</abstract>
@@ -2618,8 +2618,8 @@
     <paper id="245">
       <title>4<fixed-case>C</fixed-case>ouv: A New Treebank for <fixed-case>F</fixed-case>rench</title>
       <author><first>Philippe</first><last>Blache</last></author>
-      <author><first>Grégoire</first><last>de Montcheuil</last></author>
-      <author><first>Laurent</first><last>Prévot</last></author>
+      <author id="gregoire-moreau-de-montcheuil"><first>Grégoire</first><last>de Montcheuil</last></author>
+      <author id="laurent-prevot"><first>Laurent</first><last>Prévot</last></author>
       <author><first>Stéphane</first><last>Rauzy</last></author>
       <pages>1546–1551</pages>
       <abstract>The question of the type of text used as primary data in treebanks is of certain importance. First, it has an influence at the discourse level: an article is not organized in the same way as a novel or a technical document. Moreover, it also has consequences in terms of semantic interpretation: some types of texts can be easier to interpret than others. We present in this paper a new type of treebank which presents the particularity to answer to specific needs of experimental linguistic. It is made of short texts (book backcovers) that presents a strong coherence in their organization and can be rapidly interpreted. This type of text is adapted to short reading sessions, making it easy to acquire physiological data (e.g. eye movement, electroencepholagraphy). Such a resource offers reliable data when looking for correlations between computational models and human language processing.</abstract>
@@ -2632,8 +2632,8 @@
       <author><first>Andreia</first><last>Querido</last></author>
       <author><first>Marisa</first><last>Campos</last></author>
       <author><first>Rita Valadas</first><last>Pereira</last></author>
-      <author><first>João</first><last>Silva</last></author>
-      <author><first>António</first><last>Branco</last></author>
+      <author id="joao-silva"><first>João</first><last>Silva</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
       <pages>1552–1557</pages>
       <abstract>This paper presents a new linguistic resource for the study and computational processing of Portuguese. CINTIL DependencyBank PREMIUM is a corpus of Portuguese news text, accurately manually annotated with a wide range of linguistic information (morpho-syntax, named-entities, syntactic function and semantic roles), making it an invaluable resource specially for the development and evaluation of data-driven natural language processing tools. The corpus is under active development, reaching 4,000 sentences in its current version. The paper also reports on the training and evaluation of a dependency parser over this corpus. CINTIL DependencyBank PREMIUM is freely-available for research purposes through META-SHARE.</abstract>
       <url hash="b2181ce7">L16-1246</url>
@@ -2664,7 +2664,7 @@
       <author><first>Win Pa</first><last>Pa</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
       <author><first>Andrew</first><last>Finch</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>1574–1578</pages>
       <abstract>This paper introduces the ALT project initiated by the Advanced Speech Translation Research and Development Promotion Center (ASTREC), NICT, Kyoto, Japan. The aim of this project is to accelerate NLP research for Asian languages such as Indonesian, Japanese, Khmer, Laos, Malay, Myanmar, Philippine, Thai and Vietnamese. The original resource for this project was English articles that were randomly selected from Wikinews. The project has so far created a corpus for Myanmar and will extend in scope to include other languages in the near future. A 20000-sentence corpus of Myanmar that has been manually translated from an English corpus has been word segmented, word aligned, part-of-speech tagged and constituency parsed by human annotators. In this paper, we present the implementation steps for creating the treebank in detail, including a description of the ALT web-based treebanking tool. Moreover, we report statistics on the annotation quality of the Myanmar treebank created so far.</abstract>
       <url hash="76e7a8f3">L16-1249</url>
@@ -2672,7 +2672,7 @@
     </paper>
     <paper id="250">
       <title><fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies for <fixed-case>N</fixed-case>orwegian</title>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <author><first>Petter</first><last>Hohle</last></author>
       <pages>1579–1585</pages>
       <abstract>This article describes the conversion of the Norwegian Dependency Treebank to the Universal Dependencies scheme. This paper details the mapping of PoS tags, morphological features and dependency relations and provides a description of the structural changes made to NDT analyses in order to make it compliant with the UD guidelines. We further present PoS tagging and dependency parsing experiments which report first results for the processing of the converted treebank. The full converted treebank was made available with the 1.2 release of the UD treebanks.</abstract>
@@ -2682,9 +2682,9 @@
     <paper id="251">
       <title>Fostering the Next Generation of <fixed-case>E</fixed-case>uropean Language Technology: Recent Developments ― Emerging Initiatives ― Challenges and Opportunities</title>
       <author><first>Georg</first><last>Rehm</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
-      <author><first>Andrejs</first><last>Vasiljevs</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
+      <author id="andrejs-vasiljevs"><first>Andrejs</first><last>Vasiljevs</last></author>
       <pages>1586–1592</pages>
       <abstract>META-NET is a European network of excellence, founded in 2010, that consists of 60 research centres in 34 European countries. One of the key visions and goals of META-NET is a truly multilingual Europe, which is substantially supported and realised through language technologies. In this article we provide an overview of recent developments around the multilingual Europe topic, we also describe recent and upcoming events as well as recent and upcoming strategy papers. Furthermore, we provide overviews of two new emerging initiatives, the CEF.AT and ELRC activity on the one hand and the Cracking the Language Barrier federation on the other. The paper closes with several suggested next steps in order to address the current challenges and to open up new opportunities.</abstract>
       <url hash="8cdd3c4c">L16-1251</url>
@@ -2701,11 +2701,11 @@
     </paper>
     <paper id="253">
       <title>Open Data Vocabularies for Assigning Usage Rights to Data Resources from Translation Projects</title>
-      <author><first>David</first><last>Lewis</last></author>
+      <author id="david-d-lewis"><first>David</first><last>Lewis</last></author>
       <author><first>Kaniz</first><last>Fatema</last></author>
-      <author><first>Alfredo</first><last>Maldonado</last></author>
+      <author id="alfredo-maldonado"><first>Alfredo</first><last>Maldonado</last></author>
       <author><first>Brian</first><last>Walshe</last></author>
-      <author><first>Arturo</first><last>Calvo</last></author>
+      <author id="arturo-calvo-devesa"><first>Arturo</first><last>Calvo</last></author>
       <pages>1601–1609</pages>
       <abstract>An assessment of the intellectual property requirements for data used in machine-aided translation is provided based on a recent EC-funded legal review. This is compared against the capabilities offered by current linked open data standards from the W3C for publishing and sharing translation memories from translation projects, and proposals for adequately addressing the intellectual property needs of stakeholders in translation projects using open data vocabularies are suggested.</abstract>
       <url hash="f79295ac">L16-1253</url>
@@ -2713,7 +2713,7 @@
     </paper>
     <paper id="254">
       <title>Language Resource Citation: the <fixed-case>ISLRN</fixed-case> Dissemination and Further Developments</title>
-      <author><first>Valérie</first><last>Mapelli</last></author>
+      <author id="valerie-mapelli"><first>Valérie</first><last>Mapelli</last></author>
       <author><first>Vladimir</first><last>Popescu</last></author>
       <author><first>Lin</first><last>Liu</last></author>
       <author><first>Khalid</first><last>Choukri</last></author>
@@ -2724,8 +2724,8 @@
     </paper>
     <paper id="255">
       <title>Trends in <fixed-case>HLT</fixed-case> Research: A Survey of <fixed-case>LDC</fixed-case>’s Data Scholarship Program</title>
-      <author><first>Denise</first><last>DiPersio</last></author>
-      <author><first>Christopher</first><last>Cieri</last></author>
+      <author id="denise-dipersio"><first>Denise</first><last>DiPersio</last></author>
+      <author id="christopher-cieri"><first>Christopher</first><last>Cieri</last></author>
       <pages>1614–1618</pages>
       <abstract>Since its inception in 2010, the Linguistic Data Consortium’s data scholarship program has awarded no cost grants in data to 64 recipients from 26 countries. A survey of the twelve cycles to date ― two awards each in the Fall and Spring semesters from Fall 2010 through Spring 2016 ― yields an interesting view into graduate program research trends in human language technology and related fields and the particular data sets deemed important to support that research. The survey also reveals regions in which such activity appears to be on a rise, including in Arabic-speaking regions and portions of the Americas and Asia.</abstract>
       <url hash="b3131f92">L16-1255</url>
@@ -2746,7 +2746,7 @@
       <title>Towards a Corpus of Violence Acts in <fixed-case>A</fixed-case>rabic Social Media</title>
       <author><first>Ayman</first><last>Alhelbawy</last></author>
       <author><first>Poesio</first><last>Massimo</last></author>
-      <author><first>Udo</first><last>Kruschwitz</last></author>
+      <author id="udo-kruschwitz"><first>Udo</first><last>Kruschwitz</last></author>
       <pages>1627–1631</pages>
       <abstract>In this paper we present a new corpus of Arabic tweets that mention some form of violent event, developed to support the automatic identification of Human Rights Abuse. The dataset was manually labelled for seven classes of violence using crowdsourcing.</abstract>
       <url hash="bf427158">L16-1257</url>
@@ -2755,8 +2755,8 @@
     <paper id="258">
       <title><fixed-case>T</fixed-case>wi<fixed-case>S</fixed-case>ty: A Multilingual <fixed-case>T</fixed-case>witter Stylometry Corpus for Gender and Personality Profiling</title>
       <author><first>Ben</first><last>Verhoeven</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>1632–1637</pages>
       <abstract>Personality profiling is the task of detecting personality traits of authors based on writing style. Several personality typologies exist, however, the Briggs-Myer Type Indicator (MBTI) is particularly popular in the non-scientific community, and many people use it to analyse their own personality and talk about the results online. Therefore, large amounts of self-assessed data on MBTI are readily available on social-media platforms such as Twitter. We present a novel corpus of tweets annotated with the MBTI personality type and gender of their author for six Western European languages (Dutch, German, French, Italian, Portuguese and Spanish). We outline the corpus creation and annotation, show statistics of the obtained data distributions and present first baselines on Myers-Briggs personality profiling and gender prediction for all six languages.</abstract>
       <url hash="19681396">L16-1258</url>
@@ -2792,7 +2792,7 @@
       <author><first>Sumire</first><last>Uematsu</last></author>
       <author><first>Hiroshi</first><last>Kanayama</last></author>
       <author><first>Shinsuke</first><last>Mori</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>1651–1658</pages>
       <abstract>We present an attempt to port the international syntactic annotation scheme, Universal Dependencies, to the Japanese language in this paper. Since the Japanese syntactic structure is usually annotated on the basis of unique chunk-based dependencies, we first introduce word-based dependencies by using a word unit called the Short Unit Word, which usually corresponds to an entry in the lexicon UniDic. Porting is done by mapping the part-of-speech tagset in UniDic to the universal part-of-speech tagset, and converting a constituent-based treebank to a typed dependency tree. The conversion is not straightforward, and we discuss the problems that arose in the conversion and the current solutions. A treebank consisting of 10,000 sentences was built by converting the existent resources and currently released to the public.</abstract>
       <url hash="39eb3664">L16-1261</url>
@@ -2801,17 +2801,17 @@
     <paper id="262">
       <title><fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies v1: A Multilingual Treebank Collection</title>
       <author><first>Joakim</first><last>Nivre</last></author>
-      <author><first>Marie-Catherine</first><last>de Marneffe</last></author>
+      <author id="marie-catherine-de-marneffe"><first>Marie-Catherine</first><last>de Marneffe</last></author>
       <author><first>Filip</first><last>Ginter</last></author>
       <author><first>Yoav</first><last>Goldberg</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <author><first>Ryan</first><last>McDonald</last></author>
       <author><first>Slav</first><last>Petrov</last></author>
       <author><first>Sampo</first><last>Pyysalo</last></author>
       <author><first>Natalia</first><last>Silveira</last></author>
       <author><first>Reut</first><last>Tsarfaty</last></author>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <pages>1659–1666</pages>
       <abstract>Cross-linguistically consistent annotation is necessary for sound comparative evaluation and cross-lingual learning experiments. It is also useful for multilingual system development and comparative linguistic studies. Universal Dependencies is an open community effort to create cross-linguistically consistent treebank annotation for many languages within a dependency-based lexicalist framework. In this paper, we describe v1 of the universal guidelines, the underlying design principles, and the currently available treebanks for 33 languages.</abstract>
       <url hash="e532aa2a">L16-1262</url>
@@ -2821,7 +2821,7 @@
       <title>Construction of an <fixed-case>E</fixed-case>nglish Dependency Corpus incorporating Compound Function Words</title>
       <author><first>Akihiko</first><last>Kato</last></author>
       <author><first>Hiroyuki</first><last>Shindo</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>1667–1671</pages>
       <abstract>The recognition of multiword expressions (MWEs) in a sentence is important for such linguistic analyses as syntactic and semantic parsing, because it is known that combining an MWE into a single token improves accuracy for various NLP tasks, such as dependency parsing and constituency parsing. However, MWEs are not annotated in Penn Treebank. Furthermore, when converting word-based dependency to MWE-aware dependency directly, one could combine nodes in an MWE into a single node. Nevertheless, this method often leads to the following problem: A node derived from an MWE could have multiple heads and the whole dependency structure including MWE might be cyclic. Therefore we converted a phrase structure to a dependency structure after establishing an MWE as a single subtree. This approach can avoid an occurrence of multiple heads and/or cycles. In this way, we constructed an English dependency corpus taking into account compound function words, which are one type of MWEs that serve as functional expressions. In addition, we report experimental results of dependency parsing using a constructed corpus.</abstract>
       <url hash="8d7a05db">L16-1263</url>
@@ -2839,7 +2839,7 @@
     <paper id="265">
       <title>A Dependency Treebank of the <fixed-case>C</fixed-case>hinese Buddhist Canon</title>
       <author><first>Tak-sum</first><last>Wong</last></author>
-      <author><first>John</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
       <pages>1679–1683</pages>
       <abstract>We present a dependency treebank of the Chinese Buddhist Canon, which contains 1,514 texts with about 50 million Chinese characters. The treebank was created by an automatic parser trained on a smaller treebank, containing four manually annotated sutras (Lee and Kong, 2014). We report results on word segmentation, part-of-speech tagging and dependency parsing, and discuss challenges posed by the processing of medieval Chinese. In a case study, we exploit the treebank to examine verbs frequently associated with Buddha, and to analyze usage patterns of quotative verbs in direct speech. Our results suggest that certain quotative verbs imply status differences between the speaker and the listener.</abstract>
       <url hash="c17cd432">L16-1265</url>
@@ -2848,7 +2848,7 @@
     <paper id="266">
       <title>Automatic Biomedical Term Polysemy Detection</title>
       <author><first>Juan Antonio</first><last>Lossio-Ventura</last></author>
-      <author><first>Clement</first><last>Jonquet</last></author>
+      <author id="clement-jonquet"><first>Clement</first><last>Jonquet</last></author>
       <author><first>Mathieu</first><last>Roche</last></author>
       <author><first>Maguelonne</first><last>Teisseire</last></author>
       <pages>1684–1688</pages>
@@ -2868,9 +2868,9 @@
     <paper id="268">
       <title>Addressing the <fixed-case>MFS</fixed-case> Bias in <fixed-case>WSD</fixed-case> systems</title>
       <author><first>Marten</first><last>Postma</last></author>
-      <author><first>Ruben</first><last>Izquierdo</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="ruben-izquierdo"><first>Ruben</first><last>Izquierdo</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <author><first>Piek</first><last>Vossen</last></author>
       <pages>1695–1700</pages>
       <abstract>Word Sense Disambiguation (WSD) systems tend to have a strong bias towards assigning the Most Frequent Sense (MFS), which results in high performance on the MFS but in a very low performance on the less frequent senses. We addressed the MFS bias in WSD systems by combining the output from a WSD system with a set of mostly static features to create a MFS classifier to decide when to and not to choose the MFS. The output from this MFS classifier, which is based on the Random Forest algorithm, is then used to modify the output from the original WSD system. We applied our classifier to one of the state-of-the-art supervised WSD systems, i.e. IMS, and to of the best state-of-the-art unsupervised WSD systems, i.e. UKB. Our main finding is that we are able to improve the system output in terms of choosing between the MFS and the less frequent senses. When we apply the MFS classifier to fine-grained WSD, we observe an improvement on the less frequent sense cases, whereas we maintain the overall recall.</abstract>
@@ -2879,7 +2879,7 @@
     </paper>
     <paper id="269">
       <title>A Large-Scale Multilingual Disambiguation of Glosses</title>
-      <author><first>José</first><last>Camacho-Collados</last></author>
+      <author id="jose-camacho-collados"><first>José</first><last>Camacho-Collados</last></author>
       <author><first>Claudio</first><last>Delli Bovi</last></author>
       <author><first>Alessandro</first><last>Raganato</last></author>
       <author><first>Roberto</first><last>Navigli</last></author>
@@ -2940,7 +2940,7 @@
     </paper>
     <paper id="275">
       <title><fixed-case>S</fixed-case>ci<fixed-case>C</fixed-case>orp: A Corpus of <fixed-case>E</fixed-case>nglish Scientific Articles Annotated for Information Status Analysis</title>
-      <author><first>Ina</first><last>Roesiger</last></author>
+      <author id="ina-roesiger"><first>Ina</first><last>Roesiger</last></author>
       <pages>1743–1749</pages>
       <abstract>This paper presents SciCorp, a corpus of full-text English scientific papers of two disciplines, genetics and computational linguistics. The corpus comprises co-reference and bridging information as well as information status labels. Since SciCorp is annotated with both labels and the respective co-referent and bridging links, we believe it is a valuable resource for NLP researchers working on scientific articles or on applications such as co-reference resolution, bridging resolution or information status classification. The corpus has been reliably annotated by independent human coders with moderate inter-annotator agreement (average kappa = 0.71). In total, we have annotated 14 full papers containing 61,045 tokens and marked 8,708 definite noun phrases. The paper describes in detail the annotation scheme as well as the resulting corpus. The corpus is available for download in two different formats: in an offset-based format and for the co-reference annotations in the widely-used, tabular CoNLL-2012 format.</abstract>
       <url hash="8b394831">L16-1275</url>
@@ -2950,7 +2950,7 @@
       <title>Using lexical and Dependency Features to Disambiguate Discourse Connectives in <fixed-case>H</fixed-case>indi</title>
       <author><first>Rohit</first><last>Jain</last></author>
       <author><first>Himanshu</first><last>Sharma</last></author>
-      <author><first>Dipti</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti</first><last>Sharma</last></author>
       <pages>1750–1754</pages>
       <abstract>Discourse parsing is a challenging task in NLP and plays a crucial role in discourse analysis. To enable discourse analysis for Hindi, Hindi Discourse Relations Bank was created on a subset of Hindi TreeBank. The benefits of a discourse analyzer in automated discourse analysis, question summarization and question answering domains has motivated us to begin work on a discourse analyzer for Hindi. In this paper, we focus on discourse connective identification for Hindi. We explore various available syntactic features for this task. We also explore the use of dependency tree parses present in the Hindi TreeBank and study the impact of the same on the performance of the system. We report that the novel dependency features introduced have a higher impact on precision, in comparison to the syntactic features previously used for this task. In addition, we report a high accuracy of 96% for this task.</abstract>
       <url hash="0542f21c">L16-1276</url>
@@ -3011,9 +3011,9 @@
     </paper>
     <paper id="282">
       <title>Character-Level Neural Translation for Multilingual Media Monitoring in the <fixed-case>SUMMA</fixed-case> Project</title>
-      <author><first>Guntis</first><last>Barzdins</last></author>
+      <author id="guntis-barzdins"><first>Guntis</first><last>Barzdins</last></author>
       <author><first>Steve</first><last>Renals</last></author>
-      <author><first>Didzis</first><last>Gosko</last></author>
+      <author id="didzis-gosko"><first>Didzis</first><last>Gosko</last></author>
       <pages>1789–1793</pages>
       <abstract>The paper steps outside the comfort-zone of the traditional NLP tasks like automatic speech recognition (ASR) and machine translation (MT) to addresses two novel problems arising in the automated multilingual news monitoring: segmentation of the TV and radio program ASR transcripts into individual stories, and clustering of the individual stories coming from various sources and languages into storylines. Storyline clustering of stories covering the same events is an essential task for inquisitorial media monitoring. We address these two problems jointly by engaging the low-dimensional semantic representation capabilities of the sequence to sequence neural translation models. To enable joint multi-task learning for multilingual neural translation of morphologically rich languages we replace the attention mechanism with the sliding-window mechanism and operate the sequence to sequence neural translation model on the character-level rather than on the word-level. The story segmentation and storyline clustering problem is tackled by examining the low-dimensional vectors produced as a side-product of the neural translation process. The results of this paper describe a novel approach to the automatic story segmentation and storyline clustering problem.</abstract>
       <url hash="53f63a53">L16-1282</url>
@@ -3023,7 +3023,7 @@
       <title>Exploring the Realization of Irony in <fixed-case>T</fixed-case>witter Data</title>
       <author><first>Cynthia</first><last>Van Hee</last></author>
       <author><first>Els</first><last>Lefever</last></author>
-      <author><first>Véronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Véronique</first><last>Hoste</last></author>
       <pages>1794–1799</pages>
       <abstract>Handling figurative language like irony is currently a challenging task in natural language processing. Since irony is commonly used in user-generated content, its presence can significantly undermine accurate analysis of opinions and sentiment in such texts. Understanding irony is therefore important if we want to push the state-of-the-art in tasks such as sentiment analysis. In this research, we present the construction of a Twitter dataset for two languages, being English and Dutch, and the development of new guidelines for the annotation of verbal irony in social media texts. Furthermore, we present some statistics on the annotated corpora, from which we can conclude that the detection of contrasting evaluations might be a good indicator for recognizing irony.</abstract>
       <url hash="c807bece">L16-1283</url>
@@ -3031,9 +3031,9 @@
     </paper>
     <paper id="284">
       <title>Discriminating Similar Languages: Evaluations and Explorations</title>
-      <author><first>Cyril</first><last>Goutte</last></author>
+      <author id="cyril-goutte"><first>Cyril</first><last>Goutte</last></author>
       <author><first>Serge</first><last>Léger</last></author>
-      <author><first>Shervin</first><last>Malmasi</last></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></author>
       <author><first>Marcos</first><last>Zampieri</last></author>
       <pages>1800–1807</pages>
       <abstract>We present an analysis of the performance of machine learning classifiers on discriminating between similar languages and language varieties. We carried out a number of experiments using the results of the two editions of the Discriminating between Similar Languages (DSL) shared task. We investigate the progress made between the two tasks, estimate an upper bound on possible performance using ensemble and oracle combination, and provide learning curves to help us understand which languages are more challenging. A number of difficult sentences are identified and investigated further with human annotation</abstract>
@@ -3045,7 +3045,7 @@
       <author><first>Latifa</first><last>Al-Sulaiti</last></author>
       <author><first>Noorhan</first><last>Abbas</last></author>
       <author><first>Claire</first><last>Brierley</last></author>
-      <author><first>Eric</first><last>Atwell</last></author>
+      <author id="eric-atwell"><first>Eric</first><last>Atwell</last></author>
       <author><first>Ayman</first><last>Alghamdi</last></author>
       <pages>1808–1812</pages>
       <abstract>Inspired by the Oxford Children’s Corpus, we have developed a prototype corpus of Arabic texts written and/or selected for children. Our Arabic Children’s Corpus of 2950 documents and nearly 2 million words has been collected manually from the web during a 3-month project. It is of high quality, and contains a range of different children’s genres based on sources located, including classic tales from The Arabian Nights, and popular fictional characters such as Goha. We anticipate that the current and subsequent versions of our corpus will lead to interesting studies in text classification, language use, and ideology in children’s texts.</abstract>
@@ -3064,7 +3064,7 @@
       <title>Learning Tone and Attribution for Financial Text Mining</title>
       <author><first>Mahmoud</first><last>El-Haj</last></author>
       <author><first>Paul</first><last>Rayson</last></author>
-      <author><first>Steve</first><last>Young</last></author>
+      <author id="steve-young"><first>Steve</first><last>Young</last></author>
       <author><first>Andrew</first><last>Moore</last></author>
       <author><first>Martin</first><last>Walker</last></author>
       <author><first>Thomas</first><last>Schleicher</last></author>
@@ -3117,7 +3117,7 @@
     </paper>
     <paper id="292">
       <title>Comparing the Level of Code-Switching in Corpora</title>
-      <author><first>Björn</first><last>Gambäck</last></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gambäck</last></author>
       <author><first>Amitava</first><last>Das</last></author>
       <pages>1850–1855</pages>
       <abstract>Social media texts are often fairly informal and conversational, and when produced by bilinguals tend to be written in several different languages simultaneously, in the same way as conversational speech. The recent availability of large social media corpora has thus also made large-scale code-switched resources available for research. The paper addresses the issues of evaluation and comparison these new corpora entail, by defining an objective measure of corpus level complexity of code-switched texts. It is also shown how this formal measure can be used in practice, by applying it to several code-switched corpora.</abstract>
@@ -3128,8 +3128,8 @@
       <title>Evaluation of the <fixed-case>KIT</fixed-case> Lecture Translation System</title>
       <author><first>Markus</first><last>Müller</last></author>
       <author><first>Sarah</first><last>Fünfer</last></author>
-      <author><first>Sebastian</first><last>Stüker</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="sebastian-stuker"><first>Sebastian</first><last>Stüker</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>1856–1861</pages>
       <abstract>To attract foreign students is among the goals of the Karlsruhe Institute of Technology (KIT). One obstacle to achieving this goal is that lectures at KIT are usually held in German which many foreign students are not sufficiently proficient in, as, e.g., opposed to English. While the students from abroad are learning German during their stay at KIT, it is challenging to become proficient enough in it in order to follow a lecture. As a solution to this problem we offer our automatic simultaneous lecture translation. It translates German lectures into English in real time. While not as good as human interpreters, the system is available at a price that KIT can afford in order to offer it in potentially all lectures. In order to assess whether the quality of the system we have conducted a user study. In this paper we present this study, the way it was conducted and its results. The results indicate that the quality of the system has passed a threshold as to be able to support students in their studies. The study has helped to identify the most crucial weaknesses of the systems and has guided us which steps to take next.</abstract>
       <url hash="eac59415">L16-1293</url>
@@ -3137,7 +3137,7 @@
     </paper>
     <paper id="294">
       <title>The <fixed-case>ACL</fixed-case> <fixed-case>RD</fixed-case>-<fixed-case>TEC</fixed-case> 2.0: A Language Resource for Evaluating Term Extraction and Entity Recognition Methods</title>
-      <author><first>Behrang</first><last>QasemiZadeh</last></author>
+      <author id="behrang-qasemizadeh"><first>Behrang</first><last>QasemiZadeh</last></author>
       <author><first>Anne-Kathrin</first><last>Schumann</last></author>
       <pages>1862–1868</pages>
       <abstract>This paper introduces the ACL Reference Dataset for Terminology Extraction and Classification, version 2.0 (ACL RD-TEC 2.0). The ACL RD-TEC 2.0 has been developed with the aim of providing a benchmark for the evaluation of term and entity recognition tasks based on specialised text from the computational linguistics domain. This release of the corpus consists of 300 abstracts from articles in the ACL Anthology Reference Corpus, published between 1978–2006. In these abstracts, terms (i.e., single or multi-word lexical units with a specialised meaning) are manually annotated. In addition to their boundaries in running text, annotated terms are classified into one of the seven categories method, tool, language resource (LR), LR product, model, measures and measurements, and other. To assess the quality of the annotations and to determine the difficulty of this annotation task, more than 171 of the abstracts are annotated twice, independently, by each of the two annotators. In total, 6,818 terms are identified and annotated in more than 1300 sentences, resulting in a specialised vocabulary made of 3,318 lexical forms, mapped to 3,471 concepts. We explain the development of the annotation guidelines and discuss some of the challenges we encountered in this annotation task.</abstract>
@@ -3164,7 +3164,7 @@
       <author><first>Aljoscha</first><last>Burchardt</last></author>
       <author><first>Ondřej</first><last>Klejch</last></author>
       <author><first>Martin</first><last>Popel</last></author>
-      <author><first>Maja</first><last>Popović</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
       <pages>1877–1882</pages>
       <abstract>This work addresses the need to aid Machine Translation (MT) development cycles with a complete workflow of MT evaluation methods. Our aim is to assess, compare and improve MT system variants. We hereby report on novel tools and practices that support various measures, developed in order to support a principled and informed approach of MT development. Our toolkit for automatic evaluation showcases quick and detailed comparison of MT system variants through automatic metrics and n-gram feedback, along with manual evaluation via edit-distance, error annotation and task-based feedback.</abstract>
       <url hash="55f7b129">L16-1296</url>
@@ -3175,7 +3175,7 @@
       <author><first>Olivier</first><last>Galibert</last></author>
       <author><first>Mohamed Ameur Ben</first><last>Jannet</last></author>
       <author><first>Juliette</first><last>Kahn</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <pages>1883–1889</pages>
       <abstract>Automatic Speech recognition (ASR) is one of the most widely used components in spoken language processing applications. ASR errors are of varying importance with respect to the application, making error analysis keys to improving speech processing applications. Knowing the most serious errors for the applicative case is critical to build better systems. In the context of Automatic Speech Recognition (ASR) used as a first step towards Named Entity Recognition (NER) in speech, error seriousness is usually determined by their frequency, due to the use of the WER as metric to evaluate the ASR output, despite the emergence of more relevant measures in the literature. We propose to use a different evaluation metric form the literature in order to classify ASR errors according to their seriousness for NER. Our results show that the ASR errors importance is ranked differently depending on the used evaluation metric. A more detailed analysis shows that the estimation of the error impact given by the ATENE metric is more adapted to the NER task than the estimation based only on the most used frequency metric WER.</abstract>
       <url hash="9240cbb5">L16-1297</url>
@@ -3185,7 +3185,7 @@
       <title>A Study of Reuse and Plagiarism in <fixed-case>LREC</fixed-case> papers</title>
       <author><first>Gil</first><last>Francopoulo</last></author>
       <author><first>Joseph</first><last>Mariani</last></author>
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <pages>1890–1897</pages>
       <abstract>The aim of this experiment is to present an easy way to compare fragments of texts in order to detect (supposed) results of copy &amp; paste operations between articles in the domain of Natural Language Processing (NLP). The search space of the comparisons is a corpus labeled as NLP4NLP gathering a large part of the NLP field. The study is centered on LREC papers in both directions, first with an LREC paper borrowing a fragment of text from the collection, and secondly in the reverse direction with fragments of LREC documents borrowed and inserted in the collection.</abstract>
       <url hash="6cc2bbc9">L16-1298</url>
@@ -3203,7 +3203,7 @@
     </paper>
     <paper id="300">
       <title>More than Word Cooccurrence: Exploring Support and Opposition in International Climate Negotiations with Semantic Parsing</title>
-      <author><first>Pablo</first><last>Ruiz Fabo</last></author>
+      <author id="pablo-ruiz-fabo"><first>Pablo</first><last>Ruiz Fabo</last></author>
       <author><first>Clément</first><last>Plancq</last></author>
       <author><first>Thierry</first><last>Poibeau</last></author>
       <pages>1902–1907</pages>
@@ -3213,9 +3213,9 @@
     </paper>
     <paper id="301">
       <title>A Sequence Model Approach to Relation Extraction in <fixed-case>P</fixed-case>ortuguese</title>
-      <author><first>Sandra</first><last>Collovini</last></author>
+      <author id="sandra-collovini"><first>Sandra</first><last>Collovini</last></author>
       <author><first>Gabriel</first><last>Machado</last></author>
-      <author><first>Renata</first><last>Vieira</last></author>
+      <author id="renata-vieira"><first>Renata</first><last>Vieira</last></author>
       <pages>1908–1912</pages>
       <abstract>The task of Relation Extraction from texts is one of the main challenges in the area of Information Extraction, considering the required linguistic knowledge and the sophistication of the language processing techniques employed. This task aims at identifying and classifying semantic relations that occur between entities recognized in a given text. In this paper, we evaluated a Conditional Random Fields classifier for the extraction of any relation descriptor occurring between named entities (Organisation, Person and Place categories), as well as pre-defined relation types between these entities in Portuguese texts.</abstract>
       <url hash="3a21f3cd">L16-1301</url>
@@ -3225,7 +3225,7 @@
       <title>Evaluation Set for <fixed-case>S</fixed-case>lovak News Information Retrieval</title>
       <author><first>Daniel</first><last>Hládek</last></author>
       <author><first>Jan</first><last>Staš</last></author>
-      <author><first>Jozef</first><last>Juhár</last></author>
+      <author id="jozef-juhar"><first>Jozef</first><last>Juhár</last></author>
       <pages>1913–1916</pages>
       <abstract>This work proposes an information retrieval evaluation set for the Slovak language. A set of 80 queries written in the natural language is given together with the set of relevant documents. The document set contains 3980 newspaper articles sorted into 6 categories. Each document in the result set is manually annotated for relevancy with its corresponding query. The evaluation set is mostly compatible with the Cranfield test collection using the same methodology for queries and annotation of relevancy. In addition to that it provides annotation for document title, author, publication date and category that can be used for evaluation of automatic document clustering and categorization.</abstract>
       <url hash="75d8af07">L16-1302</url>
@@ -3250,9 +3250,9 @@
       <title><fixed-case>T</fixed-case>erm<fixed-case>ITH</fixed-case>-Eval: a <fixed-case>F</fixed-case>rench Standard-Based Resource for Keyphrase Extraction Evaluation</title>
       <author><first>Adrien</first><last>Bougouin</last></author>
       <author><first>Sabine</first><last>Barreaux</last></author>
-      <author><first>Laurent</first><last>Romary</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
       <author><first>Florian</first><last>Boudin</last></author>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <pages>1924–1927</pages>
       <abstract>Keyphrase extraction is the task of finding phrases that represent the important content of a document. The main aim of keyphrase extraction is to propose textual units that represent the most important topics developed in a document. The output keyphrases of automatic keyphrase extraction methods for test documents are typically evaluated by comparing them to manually assigned reference keyphrases. Each output keyphrase is considered correct if it matches one of the reference keyphrases. However, the choice of the appropriate textual unit (keyphrase) for a topic is sometimes subjective and evaluating by exact matching underestimates the performance. This paper presents a dataset of evaluation scores assigned to automatically extracted keyphrases by human evaluators. Along with the reference keyphrases, the manual evaluations can be used to validate new evaluation measures. Indeed, an evaluation measure that is highly correlated to the manual evaluation is appropriate for the evaluation of automatic keyphrase extraction methods.</abstract>
       <url hash="8f652dc3">L16-1304</url>
@@ -3360,7 +3360,7 @@
       <author><first>Matthias</first><last>Sperber</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
       <author><first>Satoshi</first><last>Nakamura</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>1986–1992</pages>
       <abstract>Computer-assisted transcription promises high-quality speech transcription at reduced costs. This is achieved by limiting human effort to transcribing parts for which automatic transcription quality is insufficient. Our goal is to improve the human transcription quality via appropriate user interface design. We focus on iterative interfaces that allow humans to solve tasks based on an initially given suggestion, in this case an automatic transcription. We conduct a user study that reveals considerable quality gains for three variations of iterative interfaces over a non-iterative from-scratch transcription interface. Our iterative interfaces included post-editing, confidence-enhanced post-editing, and a novel retyping interface. All three yielded similar quality on average, but we found that the proposed retyping interface was less sensitive to the difficulty of the segment, and superior when the automatic transcription of the segment contained relatively many errors. An analysis using mixed-effects models allows us to quantify these and other factors and draw conclusions over which interface design should be chosen in which circumstance.</abstract>
       <url hash="103d0ae9">L16-1314</url>
@@ -3394,7 +3394,7 @@
       <author><first>Linne</first><last>Ha</last></author>
       <author><first>Martin</first><last>Jansche</last></author>
       <author><first>Knot</first><last>Pipatsrisawat</last></author>
-      <author><first>Richard</first><last>Sproat</last></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last></author>
       <pages>2005–2010</pages>
       <abstract>We present a text-to-speech (TTS) system designed for the dialect of Bengali spoken in Bangladesh. This work is part of an ongoing effort to address the needs of under-resourced languages. We propose a process for streamlining the bootstrapping of TTS systems for under-resourced languages. First, we use crowdsourcing to collect the data from multiple ordinary speakers, each speaker recording small amount of sentences. Second, we leverage an existing text normalization system for a related language (Hindi) to bootstrap a linguistic front-end for Bangla. Third, we employ statistical techniques to construct multi-speaker acoustic models using Long Short-Term Memory Recurrent Neural Network (LSTM-RNN) and Hidden Markov Model (HMM) approaches. We then describe our experiments that show that the resulting TTS voices score well in terms of their perceived quality as measured by Mean Opinion Score (MOS) evaluations.</abstract>
       <url hash="70292021">L16-1317</url>
@@ -3415,7 +3415,7 @@
     </paper>
     <paper id="319">
       <title>Web Chat Conversations from Contact Centers: a Descriptive Study</title>
-      <author><first>Géraldine</first><last>Damnati</last></author>
+      <author id="geraldine-damnati"><first>Géraldine</first><last>Damnati</last></author>
       <author><first>Aleksandra</first><last>Guerraz</last></author>
       <author><first>Delphine</first><last>Charlet</last></author>
       <pages>2017–2021</pages>
@@ -3427,7 +3427,7 @@
       <title>Identification of Drug-Related Medical Conditions in Social Media</title>
       <author><first>François</first><last>Morlane-Hondère</last></author>
       <author><first>Cyril</first><last>Grouin</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <pages>2022–2028</pages>
       <abstract>Monitoring social media has been shown to be an interesting approach for the early detection of drug adverse effects. In this paper, we describe a system which extracts medical entities in French drug reviews written by users. We focus on the identification of medical conditions, which is based on the concept of post-coordination: we first extract minimal medical-related entities (pain, stomach) then we combine them to identify complex ones (It was the worst [pain I ever felt in my stomach]). These two steps are respectively performed by two classifiers, the first being based on Conditional Random Fields and the second one on Support Vector Machines. The overall results of the minimal entity classifier are the following: P=0.926; R=0.849; F1=0.886. A thourough analysis of the feature set shows that, when combined with word lemmas, clusters generated by word2vec are the most valuable features. When trained on the output of the first classifier, the second classifier’s performances are the following: p=0.683;r=0.956;f1=0.797. The addition of post-processing rules did not add any significant global improvement but was found to modify the precision/recall ratio.</abstract>
       <url hash="b75573ee">L16-1320</url>
@@ -3446,7 +3446,7 @@
     <paper id="322">
       <title>A Corpus of <fixed-case>W</fixed-case>ikipedia Discussions: Over the Years, with Topic, Power and Gender Labels</title>
       <author><first>Vinodkumar</first><last>Prabhakaran</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>2034–2038</pages>
       <abstract>In order to gain a deep understanding of how social context manifests in interactions, we need data that represents interactions from a large community of people over a long period of time, capturing different aspects of social context. In this paper, we present a large corpus of Wikipedia Talk page discussions that are collected from a broad range of topics, containing discussions that happened over a period of 15 years. The dataset contains 166,322 discussion threads, across 1236 articles/topics that span 15 different topic categories or domains. The dataset also captures whether the post is made by an registered user or not, and whether he/she was an administrator at the time of making the post. It also captures the Wikipedia age of editors in terms of number of months spent as an editor, as well as their gender. This corpus will be a valuable resource to investigate a variety of computational sociolinguistics research questions regarding online social interactions.</abstract>
       <url hash="4434dcb7">L16-1322</url>
@@ -3455,8 +3455,8 @@
     <paper id="323">
       <title>Phrase Detectives Corpus 1.0 Crowdsourced Anaphoric Coreference.</title>
       <author><first>Jon</first><last>Chamberlain</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
-      <author><first>Udo</first><last>Kruschwitz</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
+      <author id="udo-kruschwitz"><first>Udo</first><last>Kruschwitz</last></author>
       <pages>2039–2046</pages>
       <abstract>Natural Language Engineering tasks require large and complex annotated datasets to build more advanced models of language. Corpora are typically annotated by several experts to create a gold standard; however, there are now compelling reasons to use a non-expert crowd to annotate text, driven by cost, speed and scalability. Phrase Detectives Corpus 1.0 is an anaphorically-annotated corpus of encyclopedic and narrative text that contains a gold standard created by multiple experts, as well as a set of annotations created by a large non-expert crowd. Analysis shows very good inter-expert agreement (kappa=.88-.93) but a more variable baseline crowd agreement (kappa=.52-.96). Encyclopedic texts show less agreement (and by implication are harder to annotate) than narrative texts. The release of this corpus is intended to encourage research into the use of crowds for text annotation and the development of more advanced, probabilistic language models, in particular for anaphoric coreference.</abstract>
       <url hash="1ab1e81f">L16-1323</url>
@@ -3464,11 +3464,11 @@
     </paper>
     <paper id="324">
       <title>Summ-it++: an Enriched Version of the Summ-it Corpus</title>
-      <author><first>Evandro</first><last>Fonseca</last></author>
+      <author id="evandro-b-fonseca"><first>Evandro</first><last>Fonseca</last></author>
       <author><first>André</first><last>Antonitsch</last></author>
-      <author><first>Sandra</first><last>Collovini</last></author>
+      <author id="sandra-collovini"><first>Sandra</first><last>Collovini</last></author>
       <author><first>Daniela</first><last>Amaral</last></author>
-      <author><first>Renata</first><last>Vieira</last></author>
+      <author id="renata-vieira"><first>Renata</first><last>Vieira</last></author>
       <author><first>Anny</first><last>Figueira</last></author>
       <pages>2047–2051</pages>
       <abstract>This paper presents Summ-it++, an enriched version the Summ-it corpus. In this new version, the corpus has received new semantic layers, named entity categories and relations between named entities, adding to the previous coreference annotation. In addition, we change the original Summ-it format to SemEval</abstract>
@@ -3479,7 +3479,7 @@
       <title>Towards Multiple Antecedent Coreference Resolution in Specialized Discourse</title>
       <author><first>Alicia</first><last>Burga</last></author>
       <author><first>Sergio</first><last>Cajal</last></author>
-      <author><first>Joan</first><last>Codina-Filbà</last></author>
+      <author id="joan-codina-filba"><first>Joan</first><last>Codina-Filbà</last></author>
       <author><first>Leo</first><last>Wanner</last></author>
       <pages>2052–2057</pages>
       <abstract>Despite the popularity of coreference resolution as a research topic, the overwhelming majority of the work in this area focused so far on single antecedence coreference only. Multiple antecedent coreference (MAC) has been largely neglected. This can be explained by the scarcity of the phenomenon of MAC in generic discourse. However, in specialized discourse such as patents, MAC is very dominant. It seems thus unavoidable to address the problem of MAC resolution in the context of tasks related to automatic patent material processing, among them abstractive summarization, deep parsing of patents, construction of concept maps of the inventions, etc. We present the first version of an operational rule-based MAC resolution strategy for patent material that covers the three major types of MAC: (i) nominal MAC, (ii) MAC with personal / relative pronouns, and MAC with reflexive / reciprocal pronouns. The evaluation shows that our strategy performs well in terms of precision and recall.</abstract>
@@ -3493,7 +3493,7 @@
       <author><first>Antonella</first><last>Bristot</last></author>
       <author><first>Federica</first><last>Cavicchio</last></author>
       <author><first>Kepa</first><last>Rodriguez</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>2058–2062</pages>
       <abstract>This paper presents a second release of the ARRAU dataset: a multi-domain corpus with thorough linguistically motivated annotation of anaphora and related phenomena. Building upon the first release almost a decade ago, a considerable effort had been invested in improving the data both quantitatively and qualitatively. Thus, we have doubled the corpus size, expanded the selection of covered phenomena to include referentiality and genericity and designed and implemented a methodology for enforcing the consistency of the manual annotation. We believe that the new release of ARRAU provides a valuable material for ongoing research in complex cases of coreference as well as for a variety of related tasks. The corpus is publicly available through LDC.</abstract>
       <url hash="48907f9d">L16-1326</url>
@@ -3503,7 +3503,7 @@
       <title>An Annotated Corpus and Method for Analysis of Ad-Hoc Structures Embedded in Text</title>
       <author><first>Eric</first><last>Yeh</last></author>
       <author><first>John</first><last>Niekrasz</last></author>
-      <author><first>Dayne</first><last>Freitag</last></author>
+      <author id="dayne-freitag"><first>Dayne</first><last>Freitag</last></author>
       <author><first>Richard</first><last>Rohwer</last></author>
       <pages>2063–2070</pages>
       <abstract>We describe a method for identifying and performing functional analysis of structured regions that are embedded in natural language documents, such as tables or key-value lists. Such regions often encode information according to ad hoc schemas and avail themselves of visual cues in place of natural language grammar, presenting problems for standard information extraction algorithms. Unlike previous work in table extraction, which assumes a relatively noiseless two-dimensional layout, our aim is to accommodate a wide variety of naturally occurring structure types. Our approach has three main parts. First, we collect and annotate a a diverse sample of “naturally” occurring structures from several sources. Second, we use probabilistic text segmentation techniques, featurized by skip bigrams over spatial and token category cues, to automatically identify contiguous regions of structured text that share a common schema. Finally, we identify the records and fields within each structured region using a combination of distributional similarity and sequence alignment methods, guided by minimal supervision in the form of a single annotated record. We evaluate the last two components individually, and conclude with a discussion of further work.</abstract>
@@ -3534,7 +3534,7 @@
     <paper id="330">
       <title><fixed-case>NNB</fixed-case>locks: A Deep Learning Framework for Computational Linguistics Neural Network Models</title>
       <author><first>Frederico Tommasi</first><last>Caroli</last></author>
-      <author><first>André</first><last>Freitas</last></author>
+      <author id="andre-freitas"><first>André</first><last>Freitas</last></author>
       <author><first>João Carlos Pereira</first><last>da Silva</last></author>
       <author><first>Siegfried</first><last>Handschuh</last></author>
       <pages>2081–2085</pages>
@@ -3550,7 +3550,7 @@
       <author><first>Enrico</first><last>Ghidoni</last></author>
       <author><first>Norina</first><last>Marcello</last></author>
       <author><first>Rema Rossini</first><last>Favretti</last></author>
-      <author><first>Fabio</first><last>Tamburini</last></author>
+      <author id="fabio-tamburini"><first>Fabio</first><last>Tamburini</last></author>
       <pages>2086–2093</pages>
       <abstract>This paper presents some preliminary results of the OPLON project. It aimed at identifying early linguistic symptoms of cognitive decline in the elderly. This pilot study was conducted on a corpus composed of spontaneous speech sample collected from 39 subjects, who underwent a neuropsychological screening for visuo-spatial abilities, memory, language, executive functions and attention. A rich set of linguistic features was extracted from the digitalised utterances (at phonetic, suprasegmental, lexical, morphological and syntactic levels) and the statistical significance in pinpointing the pathological process was measured. Our results show remarkable trends for what concerns both the linguistic traits selection and the automatic classifiers building.</abstract>
       <url hash="01b6791e">L16-1331</url>
@@ -3572,11 +3572,11 @@
     <paper id="333">
       <title>Building Language Resources for Exploring Autism Spectrum Disorders</title>
       <author><first>Julia</first><last>Parish-Morris</last></author>
-      <author><first>Christopher</first><last>Cieri</last></author>
-      <author><first>Mark</first><last>Liberman</last></author>
+      <author id="christopher-cieri"><first>Christopher</first><last>Cieri</last></author>
+      <author id="mark-liberman"><first>Mark</first><last>Liberman</last></author>
       <author><first>Leila</first><last>Bateman</last></author>
       <author><first>Emily</first><last>Ferguson</last></author>
-      <author><first>Robert T.</first><last>Schultz</last></author>
+      <author id="robert-t-schultz"><first>Robert T.</first><last>Schultz</last></author>
       <pages>2100–2107</pages>
       <abstract>Autism spectrum disorder (ASD) is a complex neurodevelopmental condition that would benefit from low-cost and reliable improvements to screening and diagnosis. Human language technologies (HLTs) provide one possible route to automating a series of subjective decisions that currently inform “Gold Standard” diagnosis based on clinical judgment. In this paper, we describe a new resource to support this goal, comprised of 100 20-minute semi-structured English language samples labeled with child age, sex, IQ, autism symptom severity, and diagnostic classification. We assess the feasibility of digitizing and processing sensitive clinical samples for data sharing, and identify areas of difficulty. Using the methods described here, we propose to join forces with researchers and clinicians throughout the world to establish an international repository of annotated language samples from individuals with ASD and related disorders. This project has the potential to improve the lives of individuals with ASD and their families by identifying linguistic features that could improve remote screening, inform personalized intervention, and promote advancements in clinically-oriented HLTs.</abstract>
       <url hash="fed4c462">L16-1333</url>
@@ -3633,7 +3633,7 @@
       <author><first>Anna</first><last>Feltracco</last></author>
       <author><first>Simone</first><last>Magnolini</last></author>
       <author><first>Elisabetta</first><last>Jezek</last></author>
-      <author><first>Bernardo</first><last>Magnini</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
       <pages>2138–2144</pages>
       <abstract>We describe an experiment for the acquisition of opposition relations among Italian verb senses, based on a crowdsourcing methodology. The goal of the experiment is to discuss whether the types of opposition we distinguish (i.e. complementarity, antonymy, converseness and reversiveness) are actually perceived by the crowd. In particular, we collect data for Italian by using the crowdsourcing platform CrowdFlower. We ask annotators to judge the type of opposition existing among pairs of sentences -previously judged as opposite- that differ only for a verb: the verb in the first sentence is opposite of the verb in second sentence. Data corroborate the hypothesis that some opposition relations exclude each other, while others interact, being recognized as compatible by the contributors.</abstract>
       <url hash="876b0f52">L16-1339</url>
@@ -3666,7 +3666,7 @@
       <title>Introducing the Weighted Trustability Evaluator for Crowdsourcing Exemplified by Speaker Likability Classification</title>
       <author><first>Simone</first><last>Hantke</last></author>
       <author><first>Erik</first><last>Marchi</last></author>
-      <author><first>Björn</first><last>Schuller</last></author>
+      <author id="bjorn-schuller"><first>Björn</first><last>Schuller</last></author>
       <pages>2156–2161</pages>
       <abstract>Crowdsourcing is an arising collaborative approach applicable among many other applications to the area of language and speech processing. In fact, the use of crowdsourcing was already applied in the field of speech processing with promising results. However, only few studies investigated the use of crowdsourcing in computational paralinguistics. In this contribution, we propose a novel evaluator for crowdsourced-based ratings termed Weighted Trustability Evaluator (WTE) which is computed from the rater-dependent consistency over the test questions. We further investigate the reliability of crowdsourced annotations as compared to the ones obtained with traditional labelling procedures, such as constrained listening experiments in laboratories or in controlled environments. This comparison includes an in-depth analysis of obtainable classification performances. The experiments were conducted on the Speaker Likability Database (SLD) already used in the INTERSPEECH Challenge 2012, and the results lend further weight to the assumption that crowdsourcing can be applied as a reliable annotation source for computational paralinguistics given a sufficient number of raters and suited measurements of their reliability.</abstract>
       <url hash="06f52ff0">L16-1342</url>
@@ -3739,8 +3739,8 @@
       <title>That’ll Do Fine!: A Coarse Lexical Resource for <fixed-case>E</fixed-case>nglish-<fixed-case>H</fixed-case>indi <fixed-case>MT</fixed-case>, Using Polylingual Topic Models</title>
       <author><first>Diptesh</first><last>Kanojia</last></author>
       <author><first>Aditya</first><last>Joshi</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
-      <author><first>Mark James</first><last>Carman</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="mark-carman"><first>Mark James</first><last>Carman</last></author>
       <pages>2199–2203</pages>
       <abstract>Parallel corpora are often injected with bilingual lexical resources for improved Indian language machine translation (MT). In absence of such lexical resources, multilingual topic models have been used to create coarse lexical resources in the past, using a Cartesian product approach. Our results show that for morphologically rich languages like Hindi, the Cartesian product approach is detrimental for MT. We then present a novel ‘sentential’ approach to use this coarse lexical resource from a multilingual topic model. Our coarse lexical resource when injected with a parallel corpus outperforms a system trained using parallel corpus and a good quality lexical resource. As demonstrated by the quality of our coarse lexical resource and its benefit to MT, we believe that our sentential approach to create such a resource will help MT for resource-constrained languages.</abstract>
       <url hash="bb595054">L16-1349</url>
@@ -3752,7 +3752,7 @@
       <author><first>Manabu</first><last>Yaguchi</last></author>
       <author><first>Kiyotaka</first><last>Uchimoto</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Sadao</first><last>Kurohashi</last></author>
       <author><first>Hitoshi</first><last>Isahara</last></author>
       <pages>2204–2208</pages>
@@ -3762,9 +3762,9 @@
     </paper>
     <paper id="351">
       <title>Domain Adaptation in <fixed-case>MT</fixed-case> Using Titles in <fixed-case>W</fixed-case>ikipedia as a Parallel Corpus: Resources and Evaluation</title>
-      <author><first>Gorka</first><last>Labaka</last></author>
-      <author><first>Iñaki</first><last>Alegria</last></author>
-      <author><first>Kepa</first><last>Sarasola</last></author>
+      <author id="gorka-labaka"><first>Gorka</first><last>Labaka</last></author>
+      <author id="inaki-alegria"><first>Iñaki</first><last>Alegria</last></author>
+      <author id="kepa-sarasola"><first>Kepa</first><last>Sarasola</last></author>
       <pages>2209–2213</pages>
       <abstract>This paper presents how an state-of-the-art SMT system is enriched by using an extra in-domain parallel corpora extracted from Wikipedia. We collect corpora from parallel titles and from parallel fragments in comparable articles from Wikipedia. We carried out an evaluation with a double objective: evaluating the quality of the extracted data and evaluating the improvement due to the domain-adaptation. We think this can be very useful for languages with limited amount of parallel corpora, where in-domain data is crucial to improve the performance of MT sytems. The experiments on the Spanish-English language pair improve a baseline trained with the Europarl corpus in more than 2 points of BLEU when translating in the Computer Science domain.</abstract>
       <url hash="136f5c74">L16-1351</url>
@@ -3784,7 +3784,7 @@
     <paper id="353">
       <title>Towards producing bilingual lexica from monolingual corpora</title>
       <author><first>Jingyi</first><last>Han</last></author>
-      <author><first>Núria</first><last>Bel</last></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last></author>
       <pages>2222–2227</pages>
       <abstract>Bilingual lexica are the basis for many cross-lingual natural language processing tasks. Recent works have shown success in learning bilingual dictionary by taking advantages of comparable corpora and a diverse set of signals derived from monolingual corpora. In the present work, we describe an approach to automatically learn bilingual lexica by training a supervised classifier using word embedding-based vectors of only a few hundred translation equivalent word pairs. The word embedding representations of translation pairs were obtained from source and target monolingual corpora, which are not necessarily related. Our classifier is able to predict whether a new word pair is under a translation relation or not. We tested it on two quite distinct language pairs Chinese-Spanish and English-Spanish. The classifiers achieved more than 0.90 precision and recall for both language pairs in different evaluation scenarios. These results show a high potential for this method to be used in bilingual lexica production for language pairs with reduced amount of parallel or comparable corpora, in particular for phrase table expansion in Statistical Machine Translation systems.</abstract>
       <url hash="638fdbf4">L16-1353</url>
@@ -3793,7 +3793,7 @@
     <paper id="354">
       <title>First Steps Towards Coverage-Based Sentence Alignment</title>
       <author><first>Luís</first><last>Gomes</last></author>
-      <author><first>Gabriel Pereira</first><last>Lopes</last></author>
+      <author id="gabriel-lopes"><first>Gabriel Pereira</first><last>Lopes</last></author>
       <pages>2228–2231</pages>
       <abstract>In this paper, we introduce a coverage-based scoring function that discriminates between parallel and non-parallel sentences. When plugged into Bleualign, a state-of-the-art sentence aligner, our function improves both precision and recall of alignments over the originally proposed BLEU score. Furthermore, since our scoring function uses Moses phrase tables directly we avoid the need to translate the texts to be aligned, which is time-consuming and a potential source of alignment errors.</abstract>
       <url hash="2016cc4a">L16-1354</url>
@@ -3802,7 +3802,7 @@
     <paper id="355">
       <title>Using the <fixed-case>TED</fixed-case> Talks to Evaluate Spoken Post-editing of Machine Translation</title>
       <author><first>Jeevanthi</first><last>Liyanapathirana</last></author>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <pages>2232–2239</pages>
       <abstract>This paper presents a solution to evaluate spoken post-editing of imperfect machine translation output by a human translator. We compare two approaches to the combination of machine translation (MT) and automatic speech recognition (ASR): a heuristic algorithm and a machine learning method. To obtain a data set with spoken post-editing information, we use the French version of TED talks as the source texts submitted to MT, and the spoken English counterparts as their corrections, which are submitted to an ASR system. We experiment with various levels of artificial ASR noise and also with a state-of-the-art ASR system. The results show that the combination of MT with ASR improves over both individual outputs of MT and ASR in terms of BLEU scores, especially when ASR performance is low.</abstract>
       <url hash="df1be3a1">L16-1355</url>
@@ -3810,7 +3810,7 @@
     </paper>
     <paper id="356">
       <title>Phrase Level Segmentation and Labelling of Machine Translation Errors</title>
-      <author><first>Frédéric</first><last>Blain</last></author>
+      <author id="frederic-blain"><first>Frédéric</first><last>Blain</last></author>
       <author><first>Varvara</first><last>Logacheva</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>2240–2245</pages>
@@ -3820,7 +3820,7 @@
     </paper>
     <paper id="357">
       <title><fixed-case>S</fixed-case>ub<fixed-case>C</fixed-case>o: A Learner Translation Corpus of Human and Machine Subtitles</title>
-      <author><first>José Manuel</first><last>Martínez Martínez</last><affiliation>Universität des Saarlandes</affiliation></author>
+      <author id="jose-manuel-martinez"><first>José Manuel</first><last>Martínez Martínez</last><affiliation>Universität des Saarlandes</affiliation></author>
       <author><first>Mihaela</first><last>Vela</last><affiliation>Universität des Saarlandes</affiliation></author>
       <pages>2246–2254</pages>
       <abstract>In this paper, we present a freely available corpus of human and automatic translations of subtitles. The corpus comprises the original English subtitles (SRC), both human (HT) and machine translations (MT) into German, as well as post-editions (PE) of the MT output. HT and MT are annotated with errors. Moreover, human evaluation is included in HT, MT, and PE. Such a corpus is a valuable resource for both human and machine translation communities, enabling the direct comparison – in terms of errors and evaluation – between human and machine translations and post-edited machine translations.</abstract>
@@ -3843,7 +3843,7 @@
       <title><fixed-case>JATE</fixed-case> 2.0: <fixed-case>J</fixed-case>ava Automatic Term Extraction with <fixed-case>A</fixed-case>pache <fixed-case>S</fixed-case>olr</title>
       <author><first>Ziqi</first><last>Zhang</last></author>
       <author><first>Jie</first><last>Gao</last></author>
-      <author><first>Fabio</first><last>Ciravegna</last></author>
+      <author id="fabio-ciravegna"><first>Fabio</first><last>Ciravegna</last></author>
       <pages>2262–2269</pages>
       <abstract>Automatic Term Extraction (ATE) or Recognition (ATR) is a fundamental processing step preceding many complex knowledge engineering tasks. However, few methods have been implemented as public tools and in particular, available as open-source freeware. Further, little effort is made to develop an adaptable and scalable framework that enables customization, development, and comparison of algorithms under a uniform environment. This paper introduces JATE 2.0, a complete remake of the free Java Automatic Term Extraction Toolkit (Zhang et al., 2008) delivering new features including: (1) highly modular, adaptable and scalable ATE thanks to integration with Apache Solr, the open source free-text indexing and search platform; (2) an extended collection of state-of-the-art algorithms. We carry out experiments on two well-known benchmarking datasets and compare the algorithms along the dimensions of effectiveness (precision) and efficiency (speed and memory consumption). To the best of our knowledge, this is by far the only free ATE library offering a flexible architecture and the most comprehensive collection of algorithms.</abstract>
       <url hash="958336cc">L16-1359</url>
@@ -3860,7 +3860,7 @@
     </paper>
     <paper id="361">
       <title><fixed-case>T</fixed-case>ermo<fixed-case>PL</fixed-case> - a Flexible Tool for Terminology Extraction</title>
-      <author><first>Malgorzata</first><last>Marciniak</last></author>
+      <author id="malgorzata-marciniak"><first>Malgorzata</first><last>Marciniak</last></author>
       <author><first>Agnieszka</first><last>Mykowiecka</last></author>
       <author><first>Piotr</first><last>Rychlik</last></author>
       <pages>2278–2284</pages>
@@ -3870,7 +3870,7 @@
     </paper>
     <paper id="362">
       <title><fixed-case>G</fixed-case>ho<fixed-case>S</fixed-case>t-<fixed-case>NN</fixed-case>: A Representative Gold Standard of <fixed-case>G</fixed-case>erman Noun-Noun Compounds</title>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <author><first>Anna</first><last>Hätty</last></author>
       <author><first>Stefan</first><last>Bott</last></author>
       <author><first>Nana</first><last>Khvtisavrishvili</last></author>
@@ -3883,8 +3883,8 @@
       <title><fixed-case>D</fixed-case>e<fixed-case>Q</fixed-case>ue: A Lexicon of Complex Prepositions and Conjunctions in <fixed-case>F</fixed-case>rench</title>
       <author><first>Carlos</first><last>Ramisch</last></author>
       <author><first>Alexis</first><last>Nasr</last></author>
-      <author><first>André</first><last>Valli</last></author>
-      <author><first>José</first><last>Deulofeu</last></author>
+      <author id="andre-valli"><first>André</first><last>Valli</last></author>
+      <author id="jose-deulofeu"><first>José</first><last>Deulofeu</last></author>
       <pages>2293–2298</pages>
       <abstract>We introduce DeQue, a lexicon covering French complex prepositions (CPRE) like “à partir de” (from) and complex conjunctions (CCONJ) like “bien que” (although). The lexicon includes fine-grained linguistic description based on empirical evidence. We describe the general characteristics of CPRE and CCONJ in French, with special focus on syntactic ambiguity. Then, we list the selection criteria used to build the lexicon and the corpus-based methodology employed to collect entries. Finally, we quantify the ambiguity of each construction by annotating around 100 sentences randomly taken from the FRWaC. In addition to its theoretical value, the resource has many potential practical applications. We intend to employ DeQue for treebank annotation and to train a dependency parser that can takes complex constructions into account.</abstract>
       <url hash="76131ffa">L16-1363</url>
@@ -3894,7 +3894,7 @@
       <title><fixed-case>PARSEME</fixed-case> Survey on <fixed-case>MWE</fixed-case> Resources</title>
       <author><first>Gyri Smørdal</first><last>Losnegaard</last></author>
       <author><first>Federico</first><last>Sangati</last></author>
-      <author><first>Carla Parra</first><last>Escartín</last></author>
+      <author id="carla-parra-escartin"><first>Carla Parra</first><last>Escartín</last></author>
       <author><first>Agata</first><last>Savary</last></author>
       <author><first>Sascha</first><last>Bargmann</last></author>
       <author><first>Johanna</first><last>Monti</last></author>
@@ -3916,10 +3916,10 @@
     <paper id="366">
       <title>Transfer-Based Learning-to-Rank Assessment of Medical Term Technicality</title>
       <author><first>Dhouha</first><last>Bouamor</last></author>
-      <author><first>Leonardo Campillos</first><last>Llanos</last></author>
+      <author id="leonardo-campillos-llanos"><first>Leonardo Campillos</first><last>Llanos</last></author>
       <author><first>Anne-Laure</first><last>Ligozat</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <pages>2312–2316</pages>
       <abstract>While measuring the readability of texts has been a long-standing research topic, assessing the technicality of terms has only been addressed more recently and mostly for the English language. In this paper, we train a learning-to-rank model to determine a specialization degree for each term found in a given list. Since no training data for this task exist for French, we train our system with non-lexical features on English data, namely, the Consumer Health Vocabulary, then apply it to French. The features include the likelihood ratio of the term based on specialized and lay language models, and tests for containing morphologically complex words. The evaluation of this approach is conducted on 134 terms from the UMLS Metathesaurus and 868 terms from the Eugloss thesaurus. The Normalized Discounted Cumulative Gain obtained by our system is over 0.8 on both test sets. Besides, thanks to the learning-to-rank approach, adding morphological features to the language model features improves the results on the Eugloss thesaurus.</abstract>
       <url hash="4f91d0d8">L16-1366</url>
@@ -3929,7 +3929,7 @@
       <title>Example-based Acquisition of Fine-grained Collocation Resources</title>
       <author><first>Sara</first><last>Rodríguez-Fernández</last></author>
       <author><first>Roberto</first><last>Carlini</last></author>
-      <author><first>Luis Espinosa</first><last>Anke</last></author>
+      <author id="luis-espinosa-anke"><first>Luis Espinosa</first><last>Anke</last></author>
       <author><first>Leo</first><last>Wanner</last></author>
       <pages>2317–2322</pages>
       <abstract>Collocations such as “heavy rain” or “make [a] decision”, are combinations of two elements where one (the base) is freely chosen, while the choice of the other (collocate) is restricted, depending on the base. Collocations present difficulties even to advanced language learners, who usually struggle to find the right collocate to express a particular meaning, e.g., both “heavy” and “strong” express the meaning ‘intense’, but while “rain” selects “heavy”, “wind” selects “strong”. Lexical Functions (LFs) describe the meanings that hold between the elements of collocations, such as ‘intense’, ‘perform’, ‘create’, ‘increase’, etc. Language resources with semantically classified collocations would be of great help for students, however they are expensive to build, since they are manually constructed, and scarce. We present an unsupervised approach to the acquisition and semantic classification of collocations according to LFs, based on word embeddings in which, given an example of a collocation for each of the target LFs and a set of bases, the system retrieves a list of collocates for each base and LF.</abstract>
@@ -3939,7 +3939,7 @@
     <paper id="368">
       <title><fixed-case>MWE</fixed-case>s in Treebanks: From Survey to Guidelines</title>
       <author><first>Victoria</first><last>Rosén</last></author>
-      <author><first>Koenraad</first><last>De Smedt</last></author>
+      <author id="koenraad-de-smedt"><first>Koenraad</first><last>De Smedt</last></author>
       <author><first>Gyri Smørdal</first><last>Losnegaard</last></author>
       <author><first>Eduard</first><last>Bejček</last></author>
       <author><first>Agata</first><last>Savary</last></author>
@@ -3953,7 +3953,7 @@
       <title>Multiword Expressions Dataset for <fixed-case>I</fixed-case>ndian Languages</title>
       <author><first>Dhirendra</first><last>Singh</last></author>
       <author><first>Sudha</first><last>Bhingardive</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>2331–2335</pages>
       <abstract>Multiword Expressions (MWEs) are used frequently in natural languages, but understanding the diversity in MWEs is one of the open problem in the area of Natural Language Processing. In the context of Indian languages, MWEs play an important role. In this paper, we present MWEs annotation dataset created for Indian languages viz., Hindi and Marathi. We extract possible MWE candidates using two repositories: 1) the POS-tagged corpus and 2) the IndoWordNet synsets. Annotation is done for two types of MWEs: compound nouns and light verb constructions. In the process of annotation, human annotators tag valid MWEs from these candidates based on the standard guidelines provided to them. We obtained 3178 compound nouns and 2556 light verb constructions in Hindi and 1003 compound nouns and 2416 light verb constructions in Marathi using two repositories mentioned before. This created resource is made available publicly and can be used as a gold standard for Hindi and Marathi MWE systems.</abstract>
       <url hash="e34fb5d2">L16-1369</url>
@@ -3971,7 +3971,7 @@
     </paper>
     <paper id="371">
       <title><fixed-case>E</fixed-case>asy<fixed-case>T</fixed-case>ree: A Graphical Tool for Dependency Tree Annotation</title>
-      <author><first>Alexa</first><last>Little</last></author>
+      <author id="alexa-n-little"><first>Alexa</first><last>Little</last></author>
       <author><first>Stephen</first><last>Tratz</last></author>
       <pages>2343–2347</pages>
       <abstract>This paper introduces EasyTree, a dynamic graphical tool for dependency tree annotation. Built in JavaScript using the popular D3 data visualization library, EasyTree allows annotators to construct and label trees entirely by manipulating graphics, and then export the corresponding data in JSON format. Human users are thus able to annotate in an intuitive way without compromising the machine-compatibility of the output. EasyTree has a number of features to assist annotators, including color-coded part-of-speech indicators and optional translation displays. It can also be customized to suit a wide range of projects; part-of-speech categories, edge labels, and many other settings can be edited from within the GUI. The system also utilizes UTF-8 encoding and properly handles both left-to-right and right-to-left scripts. By providing a user-friendly annotation tool, we aim to reduce time spent transforming data or learning to use the software, to improve the user experience for annotators, and to make annotation approachable even for inexperienced users. Unlike existing solutions, EasyTree is built entirely with standard web technologies–JavaScript, HTML, and CSS–making it ideal for web-based annotation efforts, including crowdsourcing efforts.</abstract>
@@ -4009,8 +4009,8 @@
     </paper>
     <paper id="375">
       <title>Hard Time Parsing Questions: Building a <fixed-case>Q</fixed-case>uestion<fixed-case>B</fixed-case>ank for <fixed-case>F</fixed-case>rench</title>
-      <author><first>Djamé</first><last>Seddah</last></author>
-      <author><first>Marie</first><last>Candito</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
+      <author id="marie-candito"><first>Marie</first><last>Candito</last></author>
       <pages>2366–2370</pages>
       <abstract>We present the French Question Bank, a treebank of 2600 questions. We show that classical parsing model performance drop while the inclusion of this data set is highly beneficial without harming the parsing of non-question data. when facing out-of- domain data with strong structural diver- gences. Two thirds being aligned with the QB (Judge et al., 2006) and being freely available, this treebank will prove useful to build robust NLP systems.</abstract>
       <url hash="a1079bfc">L16-1375</url>
@@ -4019,7 +4019,7 @@
     <paper id="376">
       <title>Enhanced <fixed-case>E</fixed-case>nglish <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies: An Improved Representation for Natural Language Understanding Tasks</title>
       <author><first>Sebastian</first><last>Schuster</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>2371–2378</pages>
       <abstract>Many shallow natural language understanding tasks use dependency trees to extract relations between content words. However, strict surface-structure dependency trees tend to follow the linguistic structure of sentences too closely and frequently fail to provide direct relations between content words. To mitigate this problem, the original Stanford Dependencies representation also defines two dependency graph representations which contain additional and augmented relations that explicitly capture otherwise implicit relations between content words. In this paper, we revisit and extend these dependency graph representations in light of the recent Universal Dependencies (UD) initiative and provide a detailed account of an enhanced and an enhanced++ English UD representation. We further present a converter from constituency to basic, i.e., strict surface structure, UD trees, and a converter from basic UD trees to enhanced and enhanced++ English UD graphs. We release both converters as part of Stanford CoreNLP and the Stanford Parser.</abstract>
       <url hash="c09cac4e">L16-1376</url>
@@ -4028,10 +4028,10 @@
     <paper id="377">
       <title>A <fixed-case>P</fixed-case>roposition <fixed-case>B</fixed-case>ank of <fixed-case>U</fixed-case>rdu</title>
       <author><first>Maaz</first><last>Anwar</last></author>
-      <author><first>Riyaz Ahmad</first><last>Bhat</last></author>
-      <author><first>Dipti</first><last>Sharma</last></author>
+      <author id="riyaz-ahmad-bhat"><first>Riyaz Ahmad</first><last>Bhat</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti</first><last>Sharma</last></author>
       <author><first>Ashwini</first><last>Vaidya</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Tafseer Ahmed</first><last>Khan</last></author>
       <pages>2379–2386</pages>
       <abstract>This paper describes our efforts for the development of a Proposition Bank for Urdu, an Indo-Aryan language. Our primary goal is the labeling of syntactic nodes in the existing Urdu dependency Treebank with specific argument labels. In essence, it involves annotation of predicate argument structures of both simple and complex predicates in the Treebank corpus. We describe the overall process of building the PropBank of Urdu. We discuss various statistics pertaining to the Urdu PropBank and the issues which the annotators encountered while developing the PropBank. We also discuss how these challenges were addressed to successfully expand the PropBank corpus. While reporting the Inter-annotator agreement between the two annotators, we show that the annotators share similar understanding of the annotation guidelines and of the linguistic phenomena present in the language. The present size of this Propbank is around 180,000 tokens which is double-propbanked by the two annotators for simple predicates. Another 100,000 tokens have been annotated for complex predicates of Urdu.</abstract>
@@ -4041,8 +4041,8 @@
     <paper id="378">
       <title><fixed-case>C</fixed-case>zech Legal Text Treebank 1.0</title>
       <author><first>Vincent</first><last>Kríž</last></author>
-      <author><first>Barbora</first><last>Hladká</last></author>
-      <author><first>Zdeňka</first><last>Urešová</last></author>
+      <author id="barbora-hladka"><first>Barbora</first><last>Hladká</last></author>
+      <author id="zdenka-uresova"><first>Zdeňka</first><last>Urešová</last></author>
       <pages>2387–2392</pages>
       <abstract>We introduce a new member of the family of Prague dependency treebanks. The Czech Legal Text Treebank 1.0 is a morphologically and syntactically annotated corpus of 1,128 sentences. The treebank contains texts from the legal domain, namely the documents from the Collection of Laws of the Czech Republic. Legal texts differ from other domains in several language phenomena influenced by rather high frequency of very long sentences. A manual annotation of such sentences presents a new challenge. We describe a strategy and tools for this task. The resulting treebank can be explored in various ways. It can be downloaded from the LINDAT/CLARIN repository and viewed locally using the TrEd editor or it can be accessed on-line using the KonText and TreeQuery tools.</abstract>
       <url hash="6e3008ed">L16-1378</url>
@@ -4121,18 +4121,18 @@
     </paper>
     <paper id="386">
       <title>The Open Linguistics Working Group: Developing the Linguistic Linked Open Data Cloud</title>
-      <author><first>John Philip</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John Philip</first><last>McCrae</last></author>
       <author><first>Christian</first><last>Chiarcos</last></author>
       <author><first>Francis</first><last>Bond</last></author>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <author><first>Thierry</first><last>Declerck</last></author>
       <author><first>Gerard</first><last>de Melo</last></author>
       <author><first>Jorge</first><last>Gracia</last></author>
       <author><first>Sebastian</first><last>Hellmann</last></author>
       <author><first>Bettina</first><last>Klimek</last></author>
-      <author><first>Steven</first><last>Moran</last></author>
+      <author id="steven-moran"><first>Steven</first><last>Moran</last></author>
       <author><first>Petya</first><last>Osenova</last></author>
-      <author><first>Antonio</first><last>Pareja-Lora</last></author>
+      <author id="antonio-pareja-lora"><first>Antonio</first><last>Pareja-Lora</last></author>
       <author><first>Jonathan</first><last>Pool</last></author>
       <pages>2435–2441</pages>
       <abstract>The Open Linguistics Working Group (OWLG) brings together researchers from various fields of linguistics, natural language processing, and information technology to present and discuss principles, case studies, and best practices for representing, publishing and linking linguistic data collections. A major outcome of our work is the Linguistic Linked Open Data (LLOD) cloud, an LOD (sub-)cloud of linguistic resources, which covers various linguistic databases, lexicons, corpora, terminologies, and metadata repositories. We present and summarize five years of progress on the development of the cloud and of advancements in open data in linguistics, and we describe recent community activities. The paper aims to serve as a guideline to orient and involve researchers with the community and/or Linguistic Linked Open Data.</abstract>
@@ -4174,7 +4174,7 @@
       <author><first>Timo</first><last>Petmanson</last></author>
       <author><first>Alexander</first><last>Tkachenko</last></author>
       <author><first>Sven</first><last>Laur</last></author>
-      <author><first>Heiki-Jaan</first><last>Kaalep</last></author>
+      <author id="heiki-jaan-kaalep"><first>Heiki-Jaan</first><last>Kaalep</last></author>
       <pages>2460–2466</pages>
       <abstract>Although there are many tools for natural language processing tasks in Estonian, these tools are very loosely interoperable, and it is not easy to build practical applications on top of them. In this paper, we introduce a new Python library for natural language processing in Estonian, which provides unified programming interface for various NLP components. The EstNLTK toolkit provides utilities for basic NLP tasks including tokenization, morphological analysis, lemmatisation and named entity recognition as well as offers more advanced features such as a clause segmentation, temporal expression extraction and normalization, verb chain detection, Estonian Wordnet integration and rule-based information extraction. Accompanied by a detailed API documentation and comprehensive tutorials, EstNLTK is suitable for a wide range of audience. We believe EstNLTK is mature enough to be used for developing NLP-backed systems both in industry and research. EstNLTK is freely available under the GNU GPL version 2+ license, which is standard for academic software.</abstract>
       <url hash="6b5095a1">L16-1390</url>
@@ -4232,8 +4232,8 @@
     </paper>
     <paper id="396">
       <title>Data Management Plans and Data Centers</title>
-      <author><first>Denise</first><last>DiPersio</last></author>
-      <author><first>Christopher</first><last>Cieri</last></author>
+      <author id="denise-dipersio"><first>Denise</first><last>DiPersio</last></author>
+      <author id="christopher-cieri"><first>Christopher</first><last>Cieri</last></author>
       <author><first>Daniel</first><last>Jaquette</last></author>
       <pages>2496–2501</pages>
       <abstract>Data management plans, data sharing plans and the like are now required by funders worldwide as part of research proposals. Concerned with promoting the notion of open scientific data, funders view such plans as the framework for satisfying the generally accepted requirements for data generated in funded research projects, among them that it be accessible, usable, standardized to the degree possible, secure and stable. This paper examines the origins of data management plans, their requirements and issues they raise for data centers and HLT resource development in general.</abstract>
@@ -4254,7 +4254,7 @@
     <paper id="398">
       <title>Facilitating Metadata Interoperability in <fixed-case>CLARIN</fixed-case>-<fixed-case>DK</fixed-case></title>
       <author><first>Lene</first><last>Offersgaard</last></author>
-      <author><first>Dorte Haltrup</first><last>Hansen</last></author>
+      <author id="dorte-haltrup-hansen"><first>Dorte Haltrup</first><last>Hansen</last></author>
       <pages>2510–2515</pages>
       <abstract>The issue for CLARIN archives at the metadata level is to facilitate the user’s possibility to describe their data, even with their own standard, and at the same time make these metadata meaningful for a variety of users with a variety of resource types, and ensure that the metadata are useful for search across all resources both at the national and at the European level. We see that different people from different research communities fill in the metadata in different ways even though the metadata was defined and documented. This has impacted when the metadata are harvested and displayed in different environments. A loss of information is at stake. In this paper we view the challenges of ensuring metadata interoperability through examples of propagation of metadata values from the CLARIN-DK archive to the VLO. We see that the CLARIN Community in many ways support interoperability, but argue that agreeing upon standards, making clear definitions of the semantics of the metadata and their content is inevitable for the interoperability to work successfully. The key points are clear and freely available definitions, accessible documentation and easily usable facilities and guidelines for the metadata creators.</abstract>
       <url hash="2d3f880f">L16-1398</url>
@@ -4262,11 +4262,11 @@
     </paper>
     <paper id="399">
       <title>The <fixed-case>IPR</fixed-case>-cleared Corpus of Contemporary Written and Spoken <fixed-case>R</fixed-case>omanian Language</title>
-      <author><first>Dan</first><last>Tufiș</last></author>
-      <author><first>Verginica Barbu</first><last>Mititelu</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufiș</last></author>
+      <author id="verginica-barbu-mititelu"><first>Verginica Barbu</first><last>Mititelu</last></author>
       <author><first>Elena</first><last>Irimia</last></author>
-      <author><first>Ștefan Daniel</first><last>Dumitrescu</last></author>
-      <author><first>Tiberiu</first><last>Boroș</last></author>
+      <author id="stefan-daniel-dumitrescu"><first>Ștefan Daniel</first><last>Dumitrescu</last></author>
+      <author id="tiberiu-boros"><first>Tiberiu</first><last>Boroș</last></author>
       <pages>2516–2521</pages>
       <abstract>The article describes the current status of a large national project, CoRoLa, aiming at building a reference corpus for the contemporary Romanian language. Unlike many other national corpora, CoRoLa contains only - IPR cleared texts and speech data, obtained from some of the country’s most representative publishing houses, broadcasting agencies, editorial offices, newspapers and popular bloggers. For the written component 500 million tokens are targeted and for the oral one 300 hours of recordings. The choice of texts is done according to their functional style, domain and subdomain, also with an eye to the international practice. A metadata file (following the CMDI model) is associated to each text file. Collected texts are cleaned and transformed in a format compatible with the tools for automatic processing (segmentation, tokenization, lemmatization, part-of-speech tagging). The paper also presents up-to-date statistics about the structure of the corpus almost two years before its official launching. The corpus will be freely available for searching. Users will be able to download the results of their searches and those original files when not against stipulations in the protocols we have with text providers.</abstract>
       <url hash="e34371e1">L16-1399</url>
@@ -4282,9 +4282,9 @@
       <author><first>Lucie</first><last>Chlumská</last></author>
       <author><first>Tomáš</first><last>Jelínek</last></author>
       <author><first>Dominika</first><last>Kováříková</last></author>
-      <author><first>Vladimír</first><last>Petkevič</last></author>
+      <author id="vladimir-petkevic"><first>Vladimír</first><last>Petkevič</last></author>
       <author><first>Pavel</first><last>Procházka</last></author>
-      <author><first>Hana</first><last>Skoumalová</last></author>
+      <author id="hana-skoumalova"><first>Hana</first><last>Skoumalová</last></author>
       <author><first>Michal</first><last>Škrabal</last></author>
       <author><first>Petr</first><last>Truneček</last></author>
       <author><first>Pavel</first><last>Vondřička</last></author>
@@ -4296,13 +4296,13 @@
     </paper>
     <paper id="401">
       <title><fixed-case>LREC</fixed-case> as a Graph: People and Resources in a Network</title>
-      <author><first>Riccardo</first><last>Del Gratta</last></author>
+      <author id="riccardo-del-gratta"><first>Riccardo</first><last>Del Gratta</last></author>
       <author><first>Francesca</first><last>Frontini</last></author>
       <author><first>Monica</first><last>Monachini</last></author>
       <author><first>Gabriella</first><last>Pardelli</last></author>
       <author><first>Irene</first><last>Russo</last></author>
       <author><first>Roberto</first><last>Bartolini</last></author>
-      <author><first>Fahad</first><last>Khan</last></author>
+      <author id="fahad-khan"><first>Fahad</first><last>Khan</last></author>
       <author><first>Claudia</first><last>Soria</last></author>
       <author><first>Nicoletta</first><last>Calzolari</last></author>
       <pages>2529–2532</pages>
@@ -4364,10 +4364,10 @@
     </paper>
     <paper id="407">
       <title>A Finite-state Morphological Analyser for Tuvan</title>
-      <author><first>Francis</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last></author>
       <author><first>Aziyana</first><last>Bayyr-ool</last></author>
       <author><first>Aelita</first><last>Salchak</last></author>
-      <author><first>Jonathan</first><last>Washington</last></author>
+      <author id="jonathan-washington"><first>Jonathan</first><last>Washington</last></author>
       <pages>2562–2567</pages>
       <abstract>~This paper describes the development of free/open-source finite-state morphological transducers for Tuvan, a Turkic language spoken in and around the Tuvan Republic in Russia. The finite-state toolkit used for the work is the Helsinki Finite-State Toolkit (HFST), we use the lexc formalism for modelling the morphotactics and twol formalism for modelling morphophonological alternations. We present a novel description of the morphological combinatorics of pseudo-derivational morphemes in Tuvan. An evaluation is presented which shows that the transducer has a reasonable coverage―around 93%―on freely-available corpora of the languages, and high precision―over 99%―on a manually verified test set.</abstract>
       <url hash="89337e86">L16-1407</url>
@@ -4378,11 +4378,11 @@
       <author><first>Andrejs</first><last>Spektors</last></author>
       <author><first>Ilze</first><last>Auzina</last></author>
       <author><first>Roberts</first><last>Dargis</last></author>
-      <author><first>Normunds</first><last>Gruzitis</last></author>
-      <author><first>Peteris</first><last>Paikens</last></author>
-      <author><first>Lauma</first><last>Pretkalnina</last></author>
+      <author id="normunds-gruzitis"><first>Normunds</first><last>Gruzitis</last></author>
+      <author id="peteris-paikens"><first>Peteris</first><last>Paikens</last></author>
+      <author id="lauma-pretkalnina"><first>Lauma</first><last>Pretkalnina</last></author>
       <author><first>Laura</first><last>Rituma</last></author>
-      <author><first>Baiba</first><last>Saulite</last></author>
+      <author id="baiba-saulite"><first>Baiba</first><last>Saulite</last></author>
       <pages>2568–2571</pages>
       <abstract>We describe an extensive and versatile lexical resource for Latvian, an under-resourced Indo-European language, which we call Tezaurs (Latvian for ‘thesaurus’). It comprises a large explanatory dictionary of more than 250,000 entries that are derived from more than 280 external sources. The dictionary is enriched with phonetic, morphological, semantic and other annotations, as well as augmented by various language processing tools allowing for the generation of inflectional forms and pronunciation, for on-the-fly selection of corpus examples, for suggesting synonyms, etc. Tezaurs is available as a public and widely used web application for end-users, as an open data set for the use in language technology (LT), and as an API ― a set of web services for the integration into third-party applications. The ultimate goal of Tezaurs is to be the central computational lexicon for Latvian, bringing together all Latvian words and frequently used multi-word units and allowing for the integration of other LT resources and tools.</abstract>
       <url hash="49d0bf55">L16-1408</url>
@@ -4391,8 +4391,8 @@
     <paper id="409">
       <title>A Finite-State Morphological Analyser for <fixed-case>S</fixed-case>indhi</title>
       <author><first>Raveesh</first><last>Motlani</last></author>
-      <author><first>Francis</first><last>Tyers</last></author>
-      <author><first>Dipti</first><last>Sharma</last></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti</first><last>Sharma</last></author>
       <pages>2572–2577</pages>
       <abstract>Morphological analysis is a fundamental task in natural-language processing, which is used in other NLP applications such as part-of-speech tagging, syntactic parsing, information retrieval, machine translation, etc. In this paper, we present our work on the development of free/open-source finite-state morphological analyser for Sindhi. We have used Apertium’s lttoolbox as our finite-state toolkit to implement the transducer. The system is developed using a paradigm-based approach, wherein a paradigm defines all the word forms and their morphological features for a given stem (lemma). We have evaluated our system on the Sindhi Wikipedia corpus and achieved a reasonable coverage of 81% and a precision of over 97%.</abstract>
       <url hash="244d8594">L16-1409</url>
@@ -4428,7 +4428,7 @@
     <paper id="413">
       <title>Automatically Generated Affective Norms of Abstractness, Arousal, Imageability and Valence for 350 000 <fixed-case>G</fixed-case>erman Lemmas</title>
       <author><first>Maximilian</first><last>Köper</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>2595–2598</pages>
       <abstract>This paper presents a collection of 350,000 German lemmatised words, rated on four psycholinguistic affective attributes. All ratings were obtained via a supervised learning algorithm that can automatically calculate a numerical rating of a word. We applied this algorithm to abstractness, arousal, imageability and valence. Comparison with human ratings reveals high correlation across all rating types. The full resource is publically available at: <url>http://www.ims.uni-stuttgart.de/data/affective_norms/</url></abstract>
       <url hash="d9feacb7">L16-1413</url>
@@ -4446,7 +4446,7 @@
     </paper>
     <paper id="415">
       <title>A Framework for Cross-lingual/Node-wise Alignment of Lexical-Semantic Resources</title>
-      <author><first>Yoshihiko</first><last>Hayashi</last></author>
+      <author id="yoshihiko-hayashi"><first>Yoshihiko</first><last>Hayashi</last></author>
       <pages>2607–2613</pages>
       <abstract>Given lexical-semantic resources in different languages, it is useful to establish cross-lingual correspondences, preferably with semantic relation labels, between the concept nodes in these resources. This paper presents a framework for enabling a cross-lingual/node-wise alignment of lexical-semantic resources, where cross-lingual correspondence candidates are first discovered and ranked, and then classified by a succeeding module. Indeed, we propose that a two-tier classifier configuration is feasible for the second module: the first classifier filters out possibly irrelevant correspondence candidates and the second classifier assigns a relatively fine-grained semantic relation label to each of the surviving candidates. The results of Japanese-to-English alignment experiments using EDR Electronic Dictionary and Princeton WordNet are described to exemplify the validity of the proposal.</abstract>
       <url hash="c5156c01">L16-1415</url>
@@ -4454,7 +4454,7 @@
     </paper>
     <paper id="416">
       <title>Lexical Coverage Evaluation of Large-scale Multilingual Semantic Lexicons for Twelve Languages</title>
-      <author><first>Scott</first><last>Piao</last></author>
+      <author id="scott-s-l-piao"><first>Scott</first><last>Piao</last></author>
       <author><first>Paul</first><last>Rayson</last></author>
       <author><first>Dawn</first><last>Archer</last></author>
       <author><first>Francesca</first><last>Bianchi</last></author>
@@ -4503,7 +4503,7 @@
     <paper id="420">
       <title>A Large Rated Lexicon with <fixed-case>F</fixed-case>rench Medical Words</title>
       <author><first>Natalia</first><last>Grabar</last></author>
-      <author><first>Thierry</first><last>Hamon</last></author>
+      <author id="thierry-hamon"><first>Thierry</first><last>Hamon</last></author>
       <pages>2643–2648</pages>
       <abstract>Patients are often exposed to medical terms, such as anosognosia, myelodysplastic, or hepatojejunostomy, that can be semantically complex and hardly understandable by non-experts in medicine. Hence, it is important to assess which words are potentially non-understandable and require further explanations. The purpose of our work is to build specific lexicon in which the words are rated according to whether they are understandable or non-understandable. We propose to work with medical words in French such as provided by an international medical terminology. The terms are segmented in single words and then each word is manually processed by three annotators. The objective is to assign each word into one of the three categories: I can understand, I am not sure, I cannot understand. The annotators do not have medical training nor they present specific medical problems. They are supposed to represent an average patient. The inter-annotator agreement is then computed. The content of the categories is analyzed. Possible applications in which this lexicon can be helpful are proposed and discussed. The rated lexicon is freely available for the research purposes. It is accessible online at <url>http://natalia.grabar.perso.sfr.fr/rated-lexicon.html</url></abstract>
       <url hash="9857b4ff">L16-1420</url>
@@ -4520,7 +4520,7 @@
     <paper id="422">
       <title><fixed-case>V</fixed-case>erb<fixed-case>L</fixed-case>ex<fixed-case>P</fixed-case>or: a lexical resource with semantic roles for <fixed-case>P</fixed-case>ortuguese</title>
       <author><first>Leonardo</first><last>Zilio</last></author>
-      <author><first>Maria José Bocorny</first><last>Finatto</last></author>
+      <author id="maria-jose-b-finatto"><first>Maria José Bocorny</first><last>Finatto</last></author>
       <author><first>Aline</first><last>Villavicencio</last></author>
       <pages>2656–2661</pages>
       <abstract>This paper presents a lexical resource developed for Portuguese. The resource contains sentences annotated with semantic roles. The sentences were extracted from two domains: Cardiology research papers and newspaper articles. Both corpora were analyzed with the PALAVRAS parser and subsequently processed with a subcategorization frames extractor, so that each sentence that contained at least one main verb was stored in a database together with its syntactic organization. The annotation was manually carried out by a linguist using an annotation interface. Both the annotated and non-annotated data were exported to an XML format, which is readily available for download. The reason behind exporting non-annotated data is that there is syntactic information collected from the parser annotation in the non-annotated data, and this could be useful for other researchers. The sentences from both corpora were annotated separately, so that it is possible to access sentences either from the Cardiology or from the newspaper corpus. The full resource presents more than seven thousand semantically annotated sentences, containing 192 different verbs and more than 15 thousand individual arguments and adjuncts.</abstract>
@@ -4529,10 +4529,10 @@
     </paper>
     <paper id="423">
       <title>A Multilingual Predicate Matrix</title>
-      <author><first>Maddalen</first><last>Lopez de Lacalle</last></author>
+      <author id="maddalen-lopez-de-lacalle"><first>Maddalen</first><last>Lopez de Lacalle</last></author>
       <author><first>Egoitz</first><last>Laparra</last></author>
       <author><first>Itziar</first><last>Aldabe</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <pages>2662–2668</pages>
       <abstract>This paper presents the Predicate Matrix 1.3, a lexical resource resulting from the integration of multiple sources of predicate information including FrameNet, VerbNet, PropBank and WordNet. This new version of the Predicate Matrix has been extended to cover nominal predicates by adding mappings to NomBank. Similarly, we have integrated resources in Spanish, Catalan and Basque. As a result, the Predicate Matrix 1.3 provides a multilingual lexicon to allow interoperable semantic analysis in multiple languages.</abstract>
       <url hash="cbde48ac">L16-1423</url>
@@ -4559,8 +4559,8 @@
     <paper id="426">
       <title>Enriching a <fixed-case>P</fixed-case>ortuguese <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et using Synonyms from a Monolingual Dictionary</title>
       <author><first>Alberto</first><last>Simões</last></author>
-      <author><first>Xavier</first><last>Gómez Guinovart</last></author>
-      <author><first>José João</first><last>Almeida</last></author>
+      <author id="xavier-gomez-guinovart"><first>Xavier</first><last>Gómez Guinovart</last></author>
+      <author id="jose-joao-almeida"><first>José João</first><last>Almeida</last></author>
       <pages>2682–2687</pages>
       <abstract>In this article we present an exploratory approach to enrich a WordNet-like lexical ontology with the synonyms present in a standard monolingual Portuguese dictionary. The dictionary was converted from PDF into XML and senses were automatically identified and annotated. This allowed us to extract them, independently of definitions, and to create sets of synonyms (synsets). These synsets were then aligned with WordNet synsets, both in the same language (Portuguese) and projecting the Portuguese terms into English, Spanish and Galician. This process allowed both the addition of new term variants to existing synsets, as to create new synsets for Portuguese.</abstract>
       <url hash="8bafed22">L16-1426</url>
@@ -4589,7 +4589,7 @@
       <title>Aspect based Sentiment Analysis in <fixed-case>H</fixed-case>indi: Resource Creation and Evaluation</title>
       <author><first>Md Shad</first><last>Akhtar</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>2703–2709</pages>
       <abstract>Due to the phenomenal growth of online product reviews, sentiment analysis (SA) has gained huge attention, for example, by online service providers. A number of benchmark datasets for a wide range of domains have been made available for sentiment analysis, especially in resource-rich languages. In this paper we assess the challenges of SA in Hindi by providing a benchmark setup, where we create an annotated dataset of high quality, build machine learning models for sentiment analysis in order to show the effective usage of the dataset, and finally make the resource available to the community for further advancement of research. The dataset comprises of Hindi product reviews crawled from various online sources. Each sentence of the review is annotated with aspect term and its associated sentiment. As classification algorithms we use Conditional Random Filed (CRF) and Support Vector Machine (SVM) for aspect term extraction and sentiment analysis, respectively. Evaluation results show the average F-measure of 41.07% for aspect term extraction and accuracy of 54.05% for sentiment classification.</abstract>
       <url hash="57f8de15">L16-1429</url>
@@ -4615,7 +4615,7 @@
     </paper>
     <paper id="432">
       <title>Discourse Structure and Dialogue Acts in Multiparty Dialogue: the <fixed-case>STAC</fixed-case> Corpus</title>
-      <author><first>Nicholas</first><last>Asher</last></author>
+      <author id="nicholas-asher"><first>Nicholas</first><last>Asher</last></author>
       <author><first>Julie</first><last>Hunter</last></author>
       <author><first>Mathieu</first><last>Morey</last></author>
       <author><first>Benamara</first><last>Farah</last></author>
@@ -4630,7 +4630,7 @@
       <author><first>Guillaume</first><last>Dubuisson Duplessis</last></author>
       <author><first>Vincent</first><last>Letard</last></author>
       <author><first>Anne-Laure</first><last>Ligozat</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <pages>2728–2735</pages>
       <abstract>This paper presents an automatic corpus-based process to author an open-domain conversational strategy usable both in chatterbot systems and as a fallback strategy for out-of-domain human utterances. Our approach is implemented on a corpus of television drama subtitles. This system is used as a chatterbot system to collect a corpus of 41 open-domain textual dialogues with 27 human participants. The general capabilities of the system are studied through objective measures and subjective self-reports in terms of understandability, repetition and coherence of the system responses selected in reaction to human utterances. Subjective evaluations of the collected dialogues are presented with respect to amusement, engagement and enjoyability. The main factors influencing those dimensions in our chatterbot experiment are discussed.</abstract>
       <url hash="993878d7">L16-1433</url>
@@ -4648,7 +4648,7 @@
     <paper id="435">
       <title>Towards Automatic Identification of Effective Clues for Team Word-Guessing Games</title>
       <author><first>Eli</first><last>Pincus</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <pages>2741–2747</pages>
       <abstract>Team word-guessing games where one player, the clue-giver, gives clues attempting to elicit a target-word from another player, the receiver, are a popular form of entertainment and also used for educational purposes. Creating an engaging computational agent capable of emulating a talented human clue-giver in a timed word-guessing game depends on the ability to provide effective clues (clues able to elicit a correct guess from a human receiver). There are many available web resources and databases that can be mined for the raw material for clues for target-words; however, a large number of those clues are unlikely to be able to elicit a correct guess from a human guesser. In this paper, we propose a method for automatically filtering a clue corpus for effective clues for an arbitrary target-word from a larger set of potential clues, using machine learning on a set of features of the clues, including point-wise mutual information between a clue’s constituent words and a clue’s target-word. The results of the experiments significantly improve the average clue quality over previous approaches, and bring quality rates in-line with measures of human clue quality derived from a corpus of human-human interactions. The paper also introduces the data used to develop this method; audio recordings of people making guesses after having heard the clues being spoken by a synthesized voice.</abstract>
       <url hash="9b8d2f5f">L16-1435</url>
@@ -4669,7 +4669,7 @@
     <paper id="437">
       <title>Using Contextual Information for Machine Translation Evaluation</title>
       <author><first>Marina</first><last>Fomicheva</last></author>
-      <author><first>Núria</first><last>Bel</last></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last></author>
       <pages>2755–2761</pages>
       <abstract>Automatic evaluation of Machine Translation (MT) is typically approached by measuring similarity between the candidate MT and a human reference translation. An important limitation of existing evaluation systems is that they are unable to distinguish candidate-reference differences that arise due to acceptable linguistic variation from the differences induced by MT errors. In this paper we present a new metric, UPF-Cobalt, that addresses this issue by taking into consideration the syntactic contexts of candidate and reference words. The metric applies a penalty when the words are similar but the contexts in which they occur are not equivalent. In this way, Machine Translations (MTs) that are different from the human translation but still essentially correct are distinguished from those that share high number of words with the reference but alter the meaning of the sentence due to translation errors. The results show that the method proposed is indeed beneficial for automatic MT evaluation. We report experiments based on two different evaluation tasks with various types of manual quality assessment. The metric significantly outperforms state-of-the-art evaluation systems in varying evaluation settings.</abstract>
       <url hash="c8ccd9bd">L16-1437</url>
@@ -4680,8 +4680,8 @@
       <author><first>João António</first><last>Rodrigues</last></author>
       <author><first>Nuno</first><last>Rendeiro</last></author>
       <author><first>Andreia</first><last>Querido</last></author>
-      <author><first>Sanja</first><last>Štajner</last></author>
-      <author><first>António</first><last>Branco</last></author>
+      <author id="sanja-stajner"><first>Sanja</first><last>Štajner</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
       <pages>2762–2765</pages>
       <abstract>The usual concern when opting for a rule-based or a hybrid machine translation (MT) system is how much effort is required to adapt the system to a different language pair or a new domain. In this paper, we describe a way of adapting an existing hybrid MT system to a new language pair, and show that such a system can outperform a standard phrase-based statistical machine translation system with an average of 10 persons/month of work. This is specifically important in the case of domain-specific MT for which there is not enough parallel data for training a statistical machine translation system.</abstract>
       <url hash="3b4d90cd">L16-1438</url>
@@ -4700,7 +4700,7 @@
       <author><first>Takuya</first><last>Matsuzaki</last></author>
       <author><first>Akira</first><last>Fujita</last></author>
       <author><first>Naoya</first><last>Todo</last></author>
-      <author><first>Noriko H.</first><last>Arai</last></author>
+      <author id="noriko-h-arai"><first>Noriko H.</first><last>Arai</last></author>
       <pages>2771–2776</pages>
       <abstract>This paper reports on an experiment where 795 human participants answered to the questions taken from second language proficiency tests that were translated to their native language. The output of three machine translation systems and two different human translations were used as the test material. We classified the translation errors in the questions according to an error taxonomy and analyzed the participants’ response on the basis of the type and frequency of the translation errors. Through the analysis, we identified several types of errors that deteriorated most the accuracy of the participants’ answers, their confidence on the answers, and their overall evaluation of the translation quality.</abstract>
       <url hash="a43e32a9">L16-1440</url>
@@ -4710,9 +4710,9 @@
       <title>Word Sense-Aware Machine Translation: Including Senses as Contextual Features for Improved Translation Models</title>
       <author><first>Steven</first><last>Neale</last></author>
       <author><first>Luís</first><last>Gomes</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>Oier Lopez</first><last>de Lacalle</last></author>
-      <author><first>António</first><last>Branco</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
+      <author id="oier-lopez-de-lacalle"><first>Oier Lopez</first><last>de Lacalle</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
       <pages>2777–2783</pages>
       <abstract>Although it is commonly assumed that word sense disambiguation (WSD) should help to improve lexical choice and improve the quality of machine translation systems, how to successfully integrate word senses into such systems remains an unanswered question. Some successful approaches have involved reformulating either WSD or the word senses it produces, but work on using traditional word senses to improve machine translation have met with limited success. In this paper, we build upon previous work that experimented on including word senses as contextual features in maxent-based translation models. Training on a large, open-domain corpus (Europarl), we demonstrate that this aproach yields significant improvements in machine translation from English to Portuguese.</abstract>
       <url hash="5c90dbd7">L16-1441</url>
@@ -4720,8 +4720,8 @@
     </paper>
     <paper id="442">
       <title><fixed-case>S</fixed-case>uper<fixed-case>CAT</fixed-case>: The (New and Improved) Corpus Analysis Toolkit</title>
-      <author><first>K. Bretonnel</first><last>Cohen</last></author>
-      <author><first>William A.</first><last>Baumgartner Jr.</last></author>
+      <author id="k-bretonnel-cohen"><first>K. Bretonnel</first><last>Cohen</last></author>
+      <author id="william-a-baumgartner-jr"><first>William A.</first><last>Baumgartner Jr.</last></author>
       <author><first>Irina</first><last>Temnikova</last></author>
       <pages>2784–2788</pages>
       <abstract>This paper reports SuperCAT, a corpus analysis toolkit. It is a radical extension of SubCAT, the Sublanguage Corpus Analysis Toolkit, from sublanguage analysis to corpus analysis in general. The idea behind SuperCAT is that representative corpora have no tendency towards closure―that is, they tend towards infinity. In contrast, non-representative corpora have a tendency towards closure―roughly, finiteness. SuperCAT focuses on general techniques for the quantitative description of the characteristics of any corpus (or other language sample), particularly concerning the characteristics of lexical distributions. Additionally, SuperCAT features a complete re-engineering of the previous SubCAT architecture.</abstract>
@@ -4783,7 +4783,7 @@
     <paper id="448">
       <title>Detecting Optional Arguments of Verbs</title>
       <author><first>András</first><last>Kornai</last></author>
-      <author><first>Dávid Márk</first><last>Nemeskey</last></author>
+      <author id="david-mark-nemeskey"><first>Dávid Márk</first><last>Nemeskey</last></author>
       <author><first>Gábor</first><last>Recski</last></author>
       <pages>2815–2818</pages>
       <abstract>We propose a novel method for detecting optional arguments of Hungarian verbs using only positive data. We introduce a custom variant of collexeme analysis that explicitly models the noise in verb frames. Our method is, for the most part, unsupervised: we use the spectral clustering algorithm described in Brew and Schulte in Walde (2002) to build a noise model from a short, manually verified seed list of verbs. We experimented with both raw count- and context-based clusterings and found their performance almost identical. The code for our algorithm and the frame list are freely available at <url>http://hlt.bme.hu/en/resources/tade</url>.</abstract>
@@ -4821,8 +4821,8 @@
     <paper id="452">
       <title>Sentence Similarity based on Dependency Tree Kernels for Multi-document Summarization</title>
       <author><first>Şaziye Betül</first><last>Özateş</last></author>
-      <author><first>Arzucan</first><last>Özgür</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="arzucan-ozgur"><first>Arzucan</first><last>Özgür</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <pages>2833–2838</pages>
       <abstract>We introduce an approach based on using the dependency grammar representations of sentences to compute sentence similarity for extractive multi-document summarization. We adapt and investigate the effects of two untyped dependency tree kernels, which have originally been proposed for relation extraction, to the multi-document summarization problem. In addition, we propose a series of novel dependency grammar based kernels to better represent the syntactic and semantic similarities among the sentences. The proposed methods incorporate the type information of the dependency relations for sentence similarity calculation. To our knowledge, this is the first study that investigates using dependency tree based sentence similarity for multi-document summarization.</abstract>
       <url hash="ba0b101e">L16-1452</url>
@@ -4851,7 +4851,7 @@
     <paper id="455">
       <title>Recent Advances in Development of a Lexicon-Grammar of <fixed-case>P</fixed-case>olish: <fixed-case>P</fixed-case>ol<fixed-case>N</fixed-case>et 3.0</title>
       <author><first>Zygmunt</first><last>Vetulani</last></author>
-      <author><first>Grażyna</first><last>Vetulani</last></author>
+      <author id="grazyna-vetulani"><first>Grażyna</first><last>Vetulani</last></author>
       <author><first>Bartłomiej</first><last>Kochanowski</last></author>
       <pages>2851–2854</pages>
       <abstract>The granularity of PolNet (Polish Wordnet) is the main theoretical issue discussed in the paper. We describe the latest extension of PolNet including valency information of simple verbs and noun-verb collocations using manual and machine-assisted methods. Valency is defined to include both semantic and syntactic selectional restrictions. We assume the valency structure of a verb to be an index of meaning. Consistently we consider it an attribute of a synset. Strict application of this principle results in fine granularity of the verb section of the wordnet. Considering valency as a distinctive feature of synsets was an essential step to transform the initial PolNet (first intended as a lexical ontology) into a lexicon-grammar. For the present refinement of PolNet we assume that the category of language register is a part of meaning. The totality of PolNet 2.0 synsets is being revised in order to split the PolNet 2.0 synsets that contain different register words into register-uniform sub-synsets. We completed this operation for synsets that were used as values of semantic roles. The operation augmented the number of considered synsets by 29%. In the paper we report an extension of the class of collocation-based verb synsets.</abstract>
@@ -4868,7 +4868,7 @@
     </paper>
     <paper id="457">
       <title>Improving corpus search via parsing</title>
-      <author><first>Natalia</first><last>Klyueva</last></author>
+      <author id="natalia-klyueva"><first>Natalia</first><last>Klyueva</last></author>
       <author><first>Pavel</first><last>Straňák</last></author>
       <pages>2862–2866</pages>
       <abstract>In this paper, we describe an addition to the corpus query system Kontext that enables to enhance the search using syntactic attributes in addition to the existing features, mainly lemmas and morphological categories. We present the enhancements of the corpus query system itself, the attributes we use to represent syntactic structures in data, and some examples of querying the syntactically annotated corpora, such as treebanks in various languages as well as an automatically parsed large corpus.</abstract>
@@ -4890,7 +4890,7 @@
       <title>A <fixed-case>H</fixed-case>ungarian Sentiment Corpus Manually Annotated at Aspect Level</title>
       <author><first>Martina Katalin</first><last>Szabó</last></author>
       <author><first>Veronika</first><last>Vincze</last></author>
-      <author><first>Katalin Ilona</first><last>Simkó</last></author>
+      <author id="katalin-ilona-simko"><first>Katalin Ilona</first><last>Simkó</last></author>
       <author><first>Viktor</first><last>Varga</last></author>
       <author><first>Viktor</first><last>Hangya</last></author>
       <pages>2873–2878</pages>
@@ -4918,7 +4918,7 @@
     </paper>
     <paper id="462">
       <title>Annotating Sentiment and Irony in the Online <fixed-case>I</fixed-case>talian Political Debate on #labuonascuola</title>
-      <author><first>Marco</first><last>Stranisci</last></author>
+      <author id="marco-antonio-stranisci"><first>Marco</first><last>Stranisci</last></author>
       <author><first>Cristina</first><last>Bosco</last></author>
       <author><first>Delia Irazú</first><last>Hernández Farías</last></author>
       <author><first>Viviana</first><last>Patti</last></author>
@@ -4929,7 +4929,7 @@
     </paper>
     <paper id="463">
       <title><fixed-case>N</fixed-case>ile<fixed-case>UL</fixed-case>ex: A Phrase and Word Level Sentiment Lexicon for <fixed-case>E</fixed-case>gyptian and <fixed-case>M</fixed-case>odern <fixed-case>S</fixed-case>tandard <fixed-case>A</fixed-case>rabic</title>
-      <author><first>Samhaa R.</first><last>El-Beltagy</last></author>
+      <author id="samhaa-r-el-beltagy"><first>Samhaa R.</first><last>El-Beltagy</last></author>
       <pages>2900–2905</pages>
       <abstract>This paper presents NileULex, which is an Arabic sentiment lexicon containing close to six thousands Arabic words and compound phrases. Forty five percent of the terms and expressions in the lexicon are Egyptian or colloquial while fifty five percent are Modern Standard Arabic. While the collection of many of the terms included in the lexicon was done automatically, the actual addition of any term was done manually. One of the important criterions for adding terms to the lexicon, was that they be as unambiguous as possible. The result is a lexicon with a much higher quality than any translated variant or automatically constructed one. To demonstrate that a lexicon such as this can directly impact the task of sentiment analysis, a very basic machine learning based sentiment analyser that uses unigrams, bigrams, and lexicon based features was applied on two different Twitter datasets. The obtained results were compared to a baseline system that only uses unigrams and bigrams. The same lexicon based features were also generated using a publicly available translation of a popular sentiment lexicon. The experiments show that usage of the developed lexicon improves the results over both the baseline and the publicly available lexicon.</abstract>
       <url hash="43a667c4">L16-1463</url>
@@ -4945,8 +4945,8 @@
     </paper>
     <paper id="465">
       <title>Rude waiter but mouthwatering pastries! An exploratory study into <fixed-case>D</fixed-case>utch Aspect-Based Sentiment Analysis</title>
-      <author><first>Orphée</first><last>De Clercq</last></author>
-      <author><first>Véronique</first><last>Hoste</last></author>
+      <author id="orphee-de-clercq"><first>Orphée</first><last>De Clercq</last></author>
+      <author id="veronique-hoste"><first>Véronique</first><last>Hoste</last></author>
       <pages>2910–2917</pages>
       <abstract>The fine-grained task of automatically detecting all sentiment expressions within a given document and the aspects to which they refer is known as aspect-based sentiment analysis. In this paper we present the first full aspect-based sentiment analysis pipeline for Dutch and apply it to customer reviews. To this purpose, we collected reviews from two different domains, i.e. restaurant and smartphone reviews. Both corpora have been manually annotated using newly developed guidelines that comply to standard practices in the field. For our experimental pipeline we perceive aspect-based sentiment analysis as a task consisting of three main subtasks which have to be tackled incrementally: aspect term extraction, aspect category classification and polarity classification. First experiments on our Dutch restaurant corpus reveal that this is indeed a feasible approach that yields promising results.</abstract>
       <url hash="61a8e89e">L16-1465</url>
@@ -4955,7 +4955,7 @@
     <paper id="466">
       <title>Building A Case-based Semantic <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>hinese Parallel Treebank</title>
       <author><first>Huaxing</first><last>Shi</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <author><first>Keh-Yih</first><last>Su</last></author>
       <pages>2918–2924</pages>
       <abstract>We construct a case-based English-to-Chinese semantic constituent parallel Treebank for a Statistical Machine Translation (SMT) task by labelling each node of the Deep Syntactic Tree (DST) with our refined semantic cases. Since subtree span-crossing is harmful in tree-based SMT, DST is adopted to alleviate this problem. At the same time, we tailor an existing case set to represent bilingual shallow semantic relations more precisely. This Treebank is a part of a semantic corpus building project, which aims to build a semantic bilingual corpus annotated with syntactic, semantic cases and word senses. Data in our Treebank is from the news domain of Datum corpus. 4,000 sentence pairs are selected to cover various lexicons and part-of-speech (POS) n-gram patterns as much as possible. This paper presents the construction of this case Treebank. Also, we have tested the effect of adopting DST structure in alleviating subtree span-crossing. Our preliminary analysis shows that the compatibility between Chinese and English trees can be significantly increased by transforming the parse-tree into the DST. Furthermore, the human agreement rate in annotation is found to be acceptable (90% in English nodes, 75% in Chinese nodes).</abstract>
@@ -4967,7 +4967,7 @@
       <author><first>Xuansong</first><last>Li</last></author>
       <author><first>Jennifer</first><last>Tracey</last></author>
       <author><first>Stephen</first><last>Grimes</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <pages>2925–2930</pages>
       <abstract>Morphologically-rich languages pose problems for machine translation (MT) systems, including word-alignment errors, data sparsity and multiple affixes. Current alignment models at word-level do not distinguish words and morphemes, thus yielding low-quality alignment and subsequently affecting end translation quality. Models using morpheme-level alignment can reduce the vocabulary size of morphologically-rich languages and overcomes data sparsity. The alignment data based on smallest units reveals subtle language features and enhances translation quality. Recent research proves such morpheme-level alignment (MA) data to be valuable linguistic resources for SMT, particularly for languages with rich morphology. In support of this research trend, the Linguistic Data Consortium (LDC) created Uzbek-English and Turkish-English alignment data which are manually aligned at the morpheme level. This paper describes the creation of MA corpora, including alignment and tagging process and approaches, highlighting annotation challenges and specific features of languages with rich morphology. The light tagging annotation on the alignment layer adds extra value to the MA data, facilitating users in flexibly tailoring the data for various MT model training.</abstract>
       <url hash="976adffd">L16-1467</url>
@@ -4986,11 +4986,11 @@
     <paper id="469">
       <title><fixed-case>T</fixed-case>weet<fixed-case>MT</fixed-case>: A Parallel Microblog Corpus</title>
       <author><first>Iñaki San</first><last>Vicente</last></author>
-      <author><first>Iñaki</first><last>Alegría</last></author>
+      <author id="inaki-alegria"><first>Iñaki</first><last>Alegría</last></author>
       <author><first>Cristina</first><last>España-Bonet</last></author>
       <author><first>Pablo</first><last>Gamallo</last></author>
-      <author><first>Hugo Gonçalo</first><last>Oliveira</last></author>
-      <author><first>Eva Martínez</first><last>Garcia</last></author>
+      <author id="hugo-goncalo-oliveira"><first>Hugo Gonçalo</first><last>Oliveira</last></author>
+      <author id="eva-martinez-garcia"><first>Eva Martínez</first><last>Garcia</last></author>
       <author><first>Antonio</first><last>Toral</last></author>
       <author><first>Arkaitz</first><last>Zubiaga</last></author>
       <author><first>Nora</first><last>Aranberri</last></author>
@@ -5002,8 +5002,8 @@
     <paper id="470">
       <title>The Scielo Corpus: a Parallel Corpus of Scientific Publications for Biomedicine</title>
       <author><first>Mariana</first><last>Neves</last></author>
-      <author><first>Antonio Jimeno</first><last>Yepes</last></author>
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="antonio-jimeno-yepes"><first>Antonio Jimeno</first><last>Yepes</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <pages>2942–2948</pages>
       <abstract>The biomedical scientific literature is a rich source of information not only in the English language, for which it is more abundant, but also in other languages, such as Portuguese, Spanish and French. We present the first freely available parallel corpus of scientific publications for the biomedical domain. Documents from the ”Biological Sciences” and ”Health Sciences” categories were retrieved from the Scielo database and parallel titles and abstracts are available for the following language pairs: Portuguese/English (about 86,000 documents in total), Spanish/English (about 95,000 documents) and French/English (about 2,000 documents). Additionally, monolingual data was also collected for all four languages. Sentences in the parallel corpus were automatically aligned and a manual analysis of 200 documents by native experts found that a minimum of 79% of sentences were correctly aligned in all language pairs. We demonstrate the utility of the corpus by running baseline machine translation experiments. We show that for all language pairs, a statistical machine translation system trained on the parallel corpora achieves performance that rivals or exceeds the state of the art in the biomedical domain. Furthermore, the corpora are currently being used in the biomedical task in the First Conference on Machine Translation (WMT’16).</abstract>
       <url hash="034d1116">L16-1470</url>
@@ -5012,9 +5012,9 @@
     <paper id="471">
       <title>Producing Monolingual and Parallel Web Corpora at the Same Time - <fixed-case>S</fixed-case>pider<fixed-case>L</fixed-case>ing and Bitextor’s Love Affair</title>
       <author><first>Nikola</first><last>Ljubešić</last></author>
-      <author><first>Miquel</first><last>Esplà-Gomis</last></author>
+      <author id="miquel-espla-gomis"><first>Miquel</first><last>Esplà-Gomis</last></author>
       <author><first>Antonio</first><last>Toral</last></author>
-      <author><first>Sergio Ortiz</first><last>Rojas</last></author>
+      <author id="sergio-ortiz-rojas"><first>Sergio Ortiz</first><last>Rojas</last></author>
       <author><first>Filip</first><last>Klubička</last></author>
       <pages>2949–2956</pages>
       <abstract>This paper presents an approach for building large monolingual corpora and, at the same time, extracting parallel data by crawling the top-level domain of a given language of interest. For gathering linguistically relevant data from top-level domains we use the SpiderLing crawler, modified to crawl data written in multiple languages. The output of this process is then fed to Bitextor, a tool for harvesting parallel data from a collection of documents. We call the system combining these two tools Spidextor, a blend of the names of its two crucial parts. We evaluate the described approach intrinsically by measuring the accuracy of the extracted bitexts from the Croatian top-level domain “.hr” and the Slovene top-level domain “.si”, and extrinsically on the English-Croatian language pair by comparing an SMT system built from the crawled data with third-party systems. We finally present parallel datasets collected with our approach for the English-Croatian, English-Finnish, English-Serbian and English-Slovene language pairs.</abstract>
@@ -5037,7 +5037,7 @@
       <title>Can Tweets Predict <fixed-case>TV</fixed-case> Ratings?</title>
       <author><first>Bridget</first><last>Sommerdijk</last></author>
       <author><first>Eric</first><last>Sanders</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <pages>2965–2970</pages>
       <abstract>We set out to investigate whether TV ratings and mentions of TV programmes on the Twitter social media platform are correlated. If such a correlation exists, Twitter may be used as an alternative source for estimating viewer popularity. Moreover, the Twitter-based rating estimates may be generated during the programme, or even before. We count the occurrences of programme-specific hashtags in an archive of Dutch tweets of eleven popular TV shows broadcast in the Netherlands in one season, and perform correlation tests. Overall we find a strong correlation of 0.82; the correlation remains strong, 0.79, if tweets are counted a half hour before broadcast time. However, the two most popular TV shows account for most of the positive effect; if we leave out the single and second most popular TV shows, the correlation drops to being moderate to weak. Also, within a TV show, correlations between ratings and tweet counts are mostly weak, while correlations between TV ratings of the previous and next shows are strong. In absence of information on previous shows, Twitter-based counts may be a viable alternative to classic estimation methods for TV ratings. Estimates are more reliable with more popular TV shows.</abstract>
       <url hash="df270fc0">L16-1473</url>
@@ -5059,7 +5059,7 @@
     <paper id="475">
       <title>Corpus for Customer Purchase Behavior Prediction in Social Media</title>
       <author><first>Shigeyuki</first><last>Sakaki</last></author>
-      <author><first>Francine</first><last>Chen</last></author>
+      <author id="francine-chen"><first>Francine</first><last>Chen</last></author>
       <author><first>Mandy</first><last>Korpusik</last></author>
       <author><first>Yan-Ying</first><last>Chen</last></author>
       <pages>2976–2980</pages>
@@ -5070,7 +5070,7 @@
     <paper id="476">
       <title>Segmenting Hashtags using Automatically Created Training Data</title>
       <author><first>Arda</first><last>Çelebi</last></author>
-      <author><first>Arzucan</first><last>Özgür</last></author>
+      <author id="arzucan-ozgur"><first>Arzucan</first><last>Özgür</last></author>
       <pages>2981–2985</pages>
       <abstract>Hashtags, which are commonly composed of multiple words, are increasingly used to convey the actual messages in tweets. Understanding what tweets are saying is getting more dependent on understanding hashtags. Therefore, identifying the individual words that constitute a hashtag is an important, yet a challenging task due to the abrupt nature of the language used in tweets. In this study, we introduce a feature-rich approach based on using supervised machine learning methods to segment hashtags. Our approach is unsupervised in the sense that instead of using manually segmented hashtags for training the machine learning classifiers, we automatically create our training data by using tweets as well as by automatically extracting hashtag segmentations from a large corpus. We achieve promising results with such automatically created noisy training data.</abstract>
       <url hash="ce1182c0">L16-1476</url>
@@ -5079,7 +5079,7 @@
     <paper id="477">
       <title>Exploring Language Variation Across <fixed-case>E</fixed-case>urope - A Web-based Tool for Computational Sociolinguistics</title>
       <author><first>Dirk</first><last>Hovy</last></author>
-      <author><first>Anders</first><last>Johannsen</last></author>
+      <author id="anders-johannsen"><first>Anders</first><last>Johannsen</last></author>
       <pages>2986–2989</pages>
       <abstract>Language varies not only between countries, but also along regional and socio-demographic lines. This variation is one of the driving factors behind language change. However, investigating language variation is a complex undertaking: the more factors we want to consider, the more data we need. Traditional qualitative methods are not well-suited to do this, an therefore restricted to isolated factors. This reduction limits the potential insights, and risks attributing undue importance to easily observed factors. While there is a large interest in linguistics to increase the quantitative aspect of such studies, it requires training in both variational linguistics and computational methods, a combination that is still not common. We take a first step here to alleviating the problem by providing an interface, www.languagevariation.com, to explore large-scale language variation along multiple socio-demographic factors – without programming knowledge. It makes use of large amounts of data and provides statistical analyses, maps, and interactive features that will enable scholars to explore language variation in a data-driven way.</abstract>
       <url hash="a165514d">L16-1477</url>
@@ -5140,14 +5140,14 @@
       <title><fixed-case>QTL</fixed-case>eap <fixed-case>WSD</fixed-case>/<fixed-case>NED</fixed-case> Corpora: Semantic Annotation of Parallel Corpora in Six Languages</title>
       <author><first>Arantxa</first><last>Otegi</last></author>
       <author><first>Nora</first><last>Aranberri</last></author>
-      <author><first>Antonio</first><last>Branco</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
+      <author id="antonio-branco"><first>Antonio</first><last>Branco</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
       <author><first>Martin</first><last>Popel</last></author>
-      <author><first>Kiril</first><last>Simov</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <author><first>Petya</first><last>Osenova</last></author>
       <author><first>Rita</first><last>Pereira</last></author>
-      <author><first>João</first><last>Silva</last></author>
+      <author id="joao-silva"><first>João</first><last>Silva</last></author>
       <author><first>Steven</first><last>Neale</last></author>
       <pages>3023–3030</pages>
       <abstract>This work presents parallel corpora automatically annotated with several NLP tools, including lemma and part-of-speech tagging, named-entity recognition and classification, named-entity disambiguation, word-sense disambiguation, and coreference. The corpora comprise both the well-known Europarl corpus and a domain-specific question-answer troubleshooting corpus on the IT domain. English is common in all parallel corpora, with translations in five languages, namely, Basque, Bulgarian, Czech, Portuguese and Spanish. We describe the annotated corpora and the tools used for annotation, as well as annotation statistics for each language. These new resources are freely available and will help research on semantic processing for machine translation and cross-lingual transfer.</abstract>
@@ -5172,7 +5172,7 @@
       <author><first>Jaya</first><last>Saraswati</last></author>
       <author><first>Laxmi</first><last>Kashyap</last></author>
       <author><first>Dhirendra</first><last>Singh</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>3039–3043</pages>
       <abstract>Word Sense Disambiguation (WSD) is one of the open problems in the area of natural language processing. Various supervised, unsupervised and knowledge based approaches have been proposed for automatically determining the sense of a word in a particular context. It has been observed that such approaches often find it difficult to beat the WordNet First Sense (WFS) baseline which assigns the sense irrespective of context. In this paper, we present our work on creating the WFS baseline for Hindi language by manually ranking the synsets of Hindi WordNet. A ranking tool is developed where human experts can see the frequency of the word senses in the sense-tagged corpora and have been asked to rank the senses of a word by using this information and also his/her intuition. The accuracy of WFS baseline is tested on several standard datasets. F-score is found to be 60%, 65% and 55% on Health, Tourism and News datasets respectively. The created rankings can also be used in other NLP applications viz., Machine Translation, Information Retrieval, Text Summarization, etc.</abstract>
       <url hash="545e33a4">L16-1485</url>
@@ -5189,12 +5189,12 @@
     </paper>
     <paper id="487">
       <title>Impact of Automatic Segmentation on the Quality, Productivity and Self-reported Post-editing Effort of Intralingual Subtitles</title>
-      <author><first>Aitor</first><last>Álvarez</last></author>
+      <author id="aitor-alvarez"><first>Aitor</first><last>Álvarez</last></author>
       <author><first>Marina</first><last>Balenciaga</last></author>
       <author><first>Arantza</first><last>del Pozo</last></author>
       <author><first>Haritz</first><last>Arzelus</last></author>
       <author><first>Anna</first><last>Matamala</last></author>
-      <author><first>Carlos-D.</first><last>Martínez-Hinarejos</last></author>
+      <author id="carlos-d-martinez-hinarejos"><first>Carlos-D.</first><last>Martínez-Hinarejos</last></author>
       <pages>3049–3053</pages>
       <abstract>This paper describes the evaluation methodology followed to measure the impact of using a machine learning algorithm to automatically segment intralingual subtitles. The segmentation quality, productivity and self-reported post-editing effort achieved with such approach are shown to improve those obtained by the technique based in counting characters, mainly employed for automatic subtitle segmentation currently. The corpus used to train and test the proposed automated segmentation method is also described and shared with the community, in order to foster further research in this area.</abstract>
       <url hash="5aa52703">L16-1487</url>
@@ -5212,7 +5212,7 @@
     <paper id="489">
       <title>Cross-validating Image Description Datasets and Evaluation Metrics</title>
       <author><first>Josiah</first><last>Wang</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <pages>3059–3066</pages>
       <abstract>The task of automatically generating sentential descriptions of image content has become increasingly popular in recent years, resulting in the development of large-scale image description datasets and the proposal of various metrics for evaluating image description generation systems. However, not much work has been done to analyse and understand both datasets and the metrics. In this paper, we propose using a leave-one-out cross validation (LOOCV) process as a means to analyse multiply annotated, human-authored image description datasets and the various evaluation metrics, i.e. evaluating one image description against other human-authored descriptions of the same image. Such an evaluation process affords various insights into the image description datasets and evaluation metrics, such as the variations of image descriptions within and across datasets and also what the metrics capture. We compute and analyse (i) human upper-bound performance; (ii) ranked correlation between metric pairs across datasets; (iii) lower-bound performance by comparing a set of descriptions describing one image to another sentence not describing that image. Interesting observations are made about the evaluation metrics and image description datasets, and we conclude that such cross-validation methods are extremely useful for assessing and gaining insights into image description datasets and evaluation metrics for image descriptions.</abstract>
       <url hash="7805440a">L16-1489</url>
@@ -5230,7 +5230,7 @@
     </paper>
     <paper id="491">
       <title>Benchmarking Lexical Simplification Systems</title>
-      <author><first>Gustavo</first><last>Paetzold</last></author>
+      <author id="gustavo-paetzold"><first>Gustavo</first><last>Paetzold</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>3074–3080</pages>
       <abstract>Lexical Simplification is the task of replacing complex words in a text with simpler alternatives. A variety of strategies have been devised for this challenge, yet there has been little effort in comparing their performance. In this contribution, we present a benchmarking of several Lexical Simplification systems. By combining resources created in previous work with automatic spelling and inflection correction techniques, we introduce BenchLS: a new evaluation dataset for the task. Using BenchLS, we evaluate the performance of solutions for various steps in the typical Lexical Simplification pipeline, both individually and jointly. This is the first time Lexical Simplification systems are compared in such fashion on the same data, and the findings introduce many contributions to the field, revealing several interesting properties of the systems evaluated.</abstract>
@@ -5250,9 +5250,9 @@
     <paper id="493">
       <title>Extractive Summarization under Strict Length Constraints</title>
       <author><first>Yashar</first><last>Mehdad</last></author>
-      <author><first>Amanda</first><last>Stent</last></author>
+      <author id="amanda-stent"><first>Amanda</first><last>Stent</last></author>
       <author><first>Kapil</first><last>Thadani</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <author><first>Youssef</first><last>Billawala</last></author>
       <author><first>Karolina</first><last>Buchner</last></author>
       <pages>3089–3093</pages>
@@ -5263,13 +5263,13 @@
     <paper id="494">
       <title>What’s the Issue Here?: Task-based Evaluation of Reader Comment Summarization Systems</title>
       <author><first>Emma</first><last>Barker</last></author>
-      <author><first>Monica</first><last>Paramita</last></author>
+      <author id="monica-lestari-paramita"><first>Monica</first><last>Paramita</last></author>
       <author><first>Adam</first><last>Funk</last></author>
-      <author><first>Emina</first><last>Kurtic</last></author>
+      <author id="emina-kurtic"><first>Emina</first><last>Kurtic</last></author>
       <author><first>Ahmet</first><last>Aker</last></author>
       <author><first>Jonathan</first><last>Foster</last></author>
       <author><first>Mark</first><last>Hepple</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <pages>3094–3101</pages>
       <abstract>Automatic summarization of reader comments in on-line news is an extremely challenging task and a capability for which there is a clear need. Work to date has focussed on producing extractive summaries using well-known techniques imported from other areas of language processing. But are extractive summaries of comments what users really want? Do they support users in performing the sorts of tasks they are likely to want to perform with reader comments? In this paper we address these questions by doing three things. First, we offer a specification of one possible summary type for reader comment, based on an analysis of reader comment in terms of issues and viewpoints. Second, we define a task-based evaluation framework for reader comment summarization that allows summarization systems to be assessed in terms of how well they support users in a time-limited task of identifying issues and characterising opinion on issues in comments. Third, we describe a pilot evaluation in which we used the task-based evaluation framework to evaluate a prototype reader comment clustering and summarization system, demonstrating the viability of the evaluation framework and illustrating the sorts of insight such an evaluation affords.</abstract>
       <url hash="e4ae6e48">L16-1494</url>
@@ -5287,7 +5287,7 @@
     <paper id="496">
       <title>Bilingual Lexicon Extraction at the Morpheme Level Using Distributional Analysis</title>
       <author><first>Amir</first><last>Hazem</last></author>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <pages>3110–3115</pages>
       <abstract>Bilingual lexicon extraction from comparable corpora is usually based on distributional methods when dealing with single word terms (SWT). These methods often treat SWT as single tokens without considering their compositional property. However, many SWT are compositional (composed of roots and affixes) and this information, if taken into account can be very useful to match translational pairs, especially for infrequent terms where distributional methods often fail. For instance, the English compound <i>xenograft</i> which is composed of the root <i>xeno</i> and the lexeme <i>graft</i> can be translated into French compositionally by aligning each of its elements (<i>xeno</i> with <i>xéno</i> and <i>graft</i> with <i>greffe</i>) resulting in the translation: <i>xénogreffe</i>. In this paper, we experiment several distributional modellings at the morpheme level that we apply to perform compositional translation to a subset of French and English compounds. We show promising results using distributional analysis at the root and affix levels. We also show that the adapted approach significantly improve bilingual lexicon extraction from comparable corpora compared to the approach at the word level.</abstract>
       <url hash="91e5eb4b">L16-1496</url>
@@ -5321,7 +5321,7 @@
       <author><first>Zhenhao</first><last>Hua</last></author>
       <author><first>Yulian</first><last>Tamres-Rudnicky</last></author>
       <author><first>Arnab</first><last>Dash</last></author>
-      <author><first>Alexander</first><last>Rudnicky</last></author>
+      <author id="alexander-rudnicky"><first>Alexander</first><last>Rudnicky</last></author>
       <pages>3127–3132</pages>
       <abstract>Users will interact with an individual app on smart devices (e.g., phone, TV, car) to fulfill a specific goal (e.g. find a photographer), but users may also pursue more complex tasks that will span multiple domains and apps (e.g. plan a wedding ceremony). Planning and executing such multi-app tasks are typically managed by users, considering the required global context awareness. To investigate how users arrange domains/apps to fulfill complex tasks in their daily life, we conducted a user study on 14 participants to collect such data from their Android smart phones. This document 1) summarizes the techniques used in the data collection and 2) provides a brief statistical description of the data. This data guilds the future direction for researchers in the fields of conversational agent and personal assistant, etc. This data is available at <url>http://AppDialogue.com</url>.</abstract>
       <url hash="ab6d9292">L16-1499</url>
@@ -5364,7 +5364,7 @@
     </paper>
     <paper id="503">
       <title>The <fixed-case>D</fixed-case>ialog<fixed-case>B</fixed-case>ank</title>
-      <author><first>Harry</first><last>Bunt</last></author>
+      <author id="harry-bunt"><first>Harry</first><last>Bunt</last></author>
       <author><first>Volha</first><last>Petukhova</last></author>
       <author><first>Andrei</first><last>Malchanau</last></author>
       <author><first>Kars</first><last>Wijnhoven</last></author>
@@ -5377,8 +5377,8 @@
     <paper id="504">
       <title>Coordinating Communication in the Wild: The Artwalk Dialogue Corpus of Pedestrian Navigation and Mobile Referential Communication</title>
       <author><first>Kris</first><last>Liu</last></author>
-      <author><first>Jean</first><last>Fox Tree</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="jean-e-fox-tree"><first>Jean</first><last>Fox Tree</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <pages>3159–3166</pages>
       <abstract>The Artwalk Corpus is a collection of 48 mobile phone conversations between 24 pairs of friends and 24 pairs of strangers performing a novel, naturalistically-situated referential communication task. This task produced dialogues which, on average, are just under 40 minutes. The task requires the identification of public art while walking around and navigating pedestrian routes in the downtown area of Santa Cruz, California. The task involves a Director on the UCSC campus with access to maps providing verbal instructions to a Follower executing the task. The task provides a setting for real-world situated dialogic language and is designed to: (1) elicit entrainment and coordination of referring expressions between the dialogue participants, (2) examine the effect of friendship on dialogue strategies, and (3) examine how the need to complete the task while negotiating myriad, unanticipated events in the real world ― such as avoiding cars and other pedestrians ― affects linguistic coordination and other dialogue behaviors. Previous work on entrainment and coordinating communication has primarily focused on similar tasks in laboratory settings where there are no interruptions and no need to navigate from one point to another in a complex space. The corpus provides a general resource for studies on how coordinated task-oriented dialogue changes when we move outside the laboratory and into the world. It can also be used for studies of entrainment in dialogue, and the form and style of pedestrian instruction dialogues, as well as the effect of friendship on dialogic behaviors.</abstract>
       <url hash="37a1fea2">L16-1504</url>
@@ -5386,10 +5386,10 @@
     </paper>
     <paper id="505">
       <title>Managing Linguistic and Terminological Variation in a Medical Dialogue System</title>
-      <author><first>Leonardo Campillos</first><last>Llanos</last></author>
+      <author id="leonardo-campillos-llanos"><first>Leonardo Campillos</first><last>Llanos</last></author>
       <author><first>Dhouha</first><last>Bouamor</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <pages>3167–3173</pages>
       <abstract>We introduce a dialogue task between a virtual patient and a doctor where the dialogue system, playing the patient part in a simulated consultation, must reconcile a specialized level, to understand what the doctor says, and a lay level, to output realistic patient-language utterances. This increases the challenges in the analysis and generation phases of the dialogue. This paper proposes methods to manage linguistic and terminological variation in that situation and illustrates how they help produce realistic dialogues. Our system makes use of lexical resources for processing synonyms, inflectional and derivational variants, or pronoun/verb agreement. In addition, specialized knowledge is used for processing medical roots and affixes, ontological relations and concept mapping, and for generating lay variants of terms according to the patient’s non-expert discourse. We also report the results of a first evaluation carried out by 11 users interacting with the system. We evaluated the non-contextual analysis module, which supports the Spoken Language Understanding step. The annotation of task domain entities obtained 91.8% of Precision, 82.5% of Recall, 86.9% of F-measure, 19.0% of Slot Error Rate, and 32.9% of Sentence Error Rate.</abstract>
       <url hash="06366d68">L16-1505</url>
@@ -5400,7 +5400,7 @@
       <author><first>Ajda</first><last>Gokcen</last></author>
       <author><first>Evan</first><last>Jaffe</last></author>
       <author><first>Johnsey</first><last>Erdmann</last></author>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <author><first>Douglas</first><last>Danforth</last></author>
       <pages>3174–3179</pages>
       <abstract>We present a corpus of virtual patient dialogues to which we have added manually annotated gold standard word alignments. Since each question asked by a medical student in the dialogues is mapped to a canonical, anticipated version of the question, the corpus implicitly defines a large set of paraphrase (and non-paraphrase) pairs. We also present a novel process for selecting the most useful data to annotate with word alignments and for ensuring consistent paraphrase status decisions. In support of this process, we have enhanced the earlier Edinburgh alignment tool (Cohn et al., 2008) and revised and extended the Edinburgh guidelines, in particular adding guidance intended to ensure that the word alignments are consistent with the overall paraphrase status decision. The finished corpus and the enhanced alignment tool are made freely available.</abstract>
@@ -5409,7 +5409,7 @@
     </paper>
     <paper id="507">
       <title>A <fixed-case>CUP</fixed-case> of <fixed-case>C</fixed-case>o<fixed-case>F</fixed-case>ee: A large Collection of feedback Utterances Provided with communicative function annotations</title>
-      <author><first>Laurent</first><last>Prévot</last></author>
+      <author id="laurent-prevot"><first>Laurent</first><last>Prévot</last></author>
       <author><first>Jan</first><last>Gorisch</last></author>
       <author><first>Roxane</first><last>Bertrand</last></author>
       <pages>3180–3185</pages>
@@ -5421,7 +5421,7 @@
       <title><fixed-case>P</fixed-case>alabras: Crowdsourcing Transcriptions of <fixed-case>L</fixed-case>2 Speech</title>
       <author><first>Eric</first><last>Sanders</last></author>
       <author><first>Pepi</first><last>Burgos</last></author>
-      <author><first>Catia</first><last>Cucchiarini</last></author>
+      <author id="catia-cucchiarini"><first>Catia</first><last>Cucchiarini</last></author>
       <author><first>Roeland</first><last>van Hout</last></author>
       <pages>3186–3191</pages>
       <abstract>We developed a web application for crowdsourcing transcriptions of Dutch words spoken by Spanish L2 learners. In this paper we discuss the design of the application and the influence of metadata and various forms of feedback. Useful data were obtained from 159 participants, with an average of over 20 transcriptions per item, which seems a satisfactory result for this type of research. Informing participants about how many items they still had to complete, and not how many they had already completed, turned to be an incentive to do more items. Assigning participants a score for their performance made it more attractive for them to carry out the transcription task, but this seemed to influence their performance. We discuss possible advantages and disadvantages in connection with the aim of the research and consider possible lessons for designing future experiments.</abstract>
@@ -5430,7 +5430,7 @@
     </paper>
     <paper id="509">
       <title>The <fixed-case>U</fixed-case>ppsala Corpus of Student Writings: Corpus Creation, Annotation, and Analysis</title>
-      <author><first>Beáta</first><last>Megyesi</last></author>
+      <author id="beata-megyesi"><first>Beáta</first><last>Megyesi</last></author>
       <author><first>Jesper</first><last>Näsman</last></author>
       <author><first>Anne</first><last>Palmér</last></author>
       <pages>3192–3199</pages>
@@ -5460,7 +5460,7 @@
     <paper id="512">
       <title><fixed-case>F</fixed-case>rench Learners Audio Corpus of <fixed-case>G</fixed-case>erman Speech (<fixed-case>FLACGS</fixed-case>)</title>
       <author><first>Jane</first><last>Wottawa</last></author>
-      <author><first>Martine</first><last>Adda-Decker</last></author>
+      <author id="martine-adda-decker"><first>Martine</first><last>Adda-Decker</last></author>
       <pages>3215–3219</pages>
       <abstract>The French Learners Audio Corpus of German Speech (FLACGS) was created to compare German speech production of German native speakers (GG) and French learners of German (FG) across three speech production tasks of increasing production complexity: repetition, reading and picture description. 40 speakers, 20 GG and 20 FG performed each of the three tasks, which in total leads to approximately 7h of speech. The corpus was manually transcribed and automatically aligned. Analysis that can be performed on this type of corpus are for instance segmental differences in the speech production of L2 learners compared to native speakers. We chose the realization of the velar nasal consonant engma. In spoken French, engma does not appear in a VCV context which leads to production difficulties in FG. With increasing speech production complexity (reading and picture description), engma is realized as engma + plosive by FG in over 50% of the cases. The results of a two way ANOVA with unequal sample sizes on the durations of the different realizations of engma indicate that duration is a reliable factor to distinguish between engma and engma + plosive in FG productions compared to the engma productions in GG in a VCV context. The FLACGS corpus allows to study L2 production and perception.</abstract>
       <url hash="06f50d0a">L16-1512</url>
@@ -5481,7 +5481,7 @@
       <author><first>Isabell</first><last>Hubert</last></author>
       <author><first>Antti</first><last>Arppe</last></author>
       <author><first>Jordan</first><last>Lachler</last></author>
-      <author><first>Eddie A.</first><last>Santos</last></author>
+      <author id="eddie-antonio-santos"><first>Eddie A.</first><last>Santos</last></author>
       <pages>3227–3234</pages>
       <abstract>We are presenting our work on the creation of the first optical character recognition (OCR) model for Northern Haida, also known as Masset or Xaad Kil, a nearly extinct First Nations language spoken in the Haida Gwaii archipelago in British Columbia, Canada. We are addressing the challenges of training an OCR model for a language with an extensive, non-standard Latin character set as follows: (1) We have compared various training approaches and present the results of practical analyses to maximize recognition accuracy and minimize manual labor. An approach using just one or two pages of Source Images directly performed better than the Image Generation approach, and better than models based on three or more pages. Analyses also suggest that a character’s frequency is directly correlated with its recognition accuracy. (2) We present an overview of current OCR accuracy analysis tools available. (3) We have ported the once de-facto standardized OCR accuracy tools to be able to cope with Unicode input. Our work adds to a growing body of research on OCR for particularly challenging character sets, and contributes to creating the largest electronic corpus for this severely endangered language.</abstract>
       <url hash="260ad5d6">L16-1514</url>
@@ -5505,7 +5505,7 @@
     </paper>
     <paper id="517">
       <title>Curation of <fixed-case>D</fixed-case>utch Regional Dictionaries</title>
-      <author><first>Henk</first><last>van den Heuvel</last></author>
+      <author id="henk-van-den-heuvel"><first>Henk</first><last>van den Heuvel</last></author>
       <author><first>Eric</first><last>Sanders</last></author>
       <author><first>Nicoline</first><last>van der Sijs</last></author>
       <pages>3249–3255</pages>
@@ -5519,7 +5519,7 @@
       <author><first>Irene</first><last>Russo</last></author>
       <author><first>Valeria</first><last>Quochi</last></author>
       <author><first>Davyth</first><last>Hicks</last></author>
-      <author><first>Antton</first><last>Gurrutxaga</last></author>
+      <author id="antton-gurrutxaga"><first>Antton</first><last>Gurrutxaga</last></author>
       <author><first>Anneli</first><last>Sarhimaa</last></author>
       <author><first>Matti</first><last>Tuomisto</last></author>
       <pages>3256–3260</pages>
@@ -5542,7 +5542,7 @@
       <author><first>Martijn</first><last>Wieling</last></author>
       <author><first>Eva</first><last>Sassolini</last></author>
       <author><first>Sebastiana</first><last>Cucurullo</last></author>
-      <author><first>Simonetta</first><last>Montemagni</last></author>
+      <author id="simonetta-montemagni"><first>Simonetta</first><last>Montemagni</last></author>
       <pages>3265–3272</pages>
       <abstract>In this paper, we illustrate the integration of an online dialectometric tool, Gabmap, together with an online dialect atlas, the Atlante Lessicale Toscano (ALT-Web). By using a newly created url-based interface to Gabmap, ALT-Web is able to take advantage of the sophisticated dialect visualization and exploration options incorporated in Gabmap. For example, distribution maps showing the distribution in the Tuscan dialect area of a specific dialectal form (selected via the ALT-Web website) are easily obtainable. Furthermore, the complete ALT-Web dataset as well as subsets of the data (selected via the ALT-Web website) can be automatically uploaded and explored in Gabmap. By combining these two online applications, macro- and micro-analyses of dialectal data (respectively offered by Gabmap and ALT-Web) are effectively and dynamically combined.</abstract>
       <url hash="be44c7a0">L16-1520</url>
@@ -5550,7 +5550,7 @@
     </paper>
     <paper id="521">
       <title><fixed-case>LORELEI</fixed-case> Language Packs: Data, Tools, and Resources for Technology Development in Low Resource Languages</title>
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <author><first>Jennifer</first><last>Tracey</last></author>
       <pages>3273–3280</pages>
       <abstract>In this paper, we describe the textual linguistic resources in nearly 3 dozen languages being produced by Linguistic Data Consortium for DARPA’s LORELEI (Low Resource Languages for Emergent Incidents) Program. The goal of LORELEI is to improve the performance of human language technologies for low-resource languages and enable rapid re-training of such technologies for new languages, with a focus on the use case of deployment of resources in sudden emergencies such as natural disasters. Representative languages have been selected to provide broad typological coverage for training, and surprise incident languages for testing will be selected over the course of the program. Our approach treats the full set of language packs as a coherent whole, maintaining LORELEI-wide specifications, tagsets, and guidelines, while allowing for adaptation to the specific needs created by each language. Each representative language corpus, therefore, both stands on its own as a resource for the specific language and forms part of a large multilingual resource for broader cross-language technology development.</abstract>
@@ -5559,8 +5559,8 @@
     </paper>
     <paper id="522">
       <title>A Computational Perspective on the <fixed-case>R</fixed-case>omanian Dialects</title>
-      <author><first>Alina Maria</first><last>Ciobanu</last></author>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="alina-maria-ciobanu"><first>Alina Maria</first><last>Ciobanu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <pages>3281–3285</pages>
       <abstract>In this paper we conduct an initial study on the dialects of Romanian. We analyze the differences between Romanian and its dialects using the Swadesh list. We analyze the predictive power of the orthographic and phonetic features of the words, building a classification problem for dialect identification.</abstract>
       <url hash="914281ca">L16-1522</url>
@@ -5589,7 +5589,7 @@
     <paper id="525">
       <title><fixed-case>WTF</fixed-case>-<fixed-case>LOD</fixed-case> - A New Resource for Large-Scale <fixed-case>NER</fixed-case> Evaluation</title>
       <author><first>Lubomir</first><last>Otrusina</last></author>
-      <author><first>Pavel</first><last>Smrz</last></author>
+      <author id="pavel-smrz"><first>Pavel</first><last>Smrz</last></author>
       <pages>3299–3302</pages>
       <abstract>This paper introduces the Web TextFull linkage to Linked Open Data (WTF-LOD) dataset intended for large-scale evaluation of named entity recognition (NER) systems. First, we present the process of collecting data from the largest publically-available textual corpora, including Wikipedia dumps, monthly runs of the CommonCrawl, and ClueWeb09/12. We discuss similarities and differences of related initiatives such as WikiLinks and WikiReverse. Our work primarily focuses on links from “textfull” documents (links surrounded by a text that provides a useful context for entity linking), de-duplication of the data and advanced cleaning procedures. Presented statistics demonstrate that the collected data forms one of the largest available resource of its kind. They also prove suitability of the result for complex NER evaluation campaigns, including an analysis of the most ambiguous name mentions appearing in the data.</abstract>
       <url hash="ae3db83e">L16-1525</url>
@@ -5621,7 +5621,7 @@
     <paper id="528">
       <title><fixed-case>ELMD</fixed-case>: An Automatically Generated Entity Linking Gold Standard Dataset in the Music Domain</title>
       <author><first>Sergio</first><last>Oramas</last></author>
-      <author><first>Luis Espinosa</first><last>Anke</last></author>
+      <author id="luis-espinosa-anke"><first>Luis Espinosa</first><last>Anke</last></author>
       <author><first>Mohamed</first><last>Sordo</last></author>
       <author><first>Horacio</first><last>Saggion</last></author>
       <author><first>Xavier</first><last>Serra</last></author>
@@ -5633,10 +5633,10 @@
     <paper id="529">
       <title>Bridge-Language Capitalization Inference in <fixed-case>W</fixed-case>estern <fixed-case>I</fixed-case>ranian: <fixed-case>S</fixed-case>orani, <fixed-case>K</fixed-case>urmanji, Zazaki, and <fixed-case>T</fixed-case>ajik</title>
       <author><first>Patrick</first><last>Littell</last></author>
-      <author><first>David R.</first><last>Mortensen</last></author>
+      <author id="david-r-mortensen"><first>David R.</first><last>Mortensen</last></author>
       <author><first>Kartik</first><last>Goyal</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
       <pages>3318–3324</pages>
       <abstract>In Sorani Kurdish, one of the most useful orthographic features in named-entity recognition – capitalization – is absent, as the language’s Perso-Arabic script does not make a distinction between uppercase and lowercase letters. We describe a system for deriving an inferred capitalization value from closely related languages by phonological similarity, and illustrate the system using several related Western Iranian languages.</abstract>
       <url hash="d107e653">L16-1529</url>
@@ -5658,7 +5658,7 @@
     </paper>
     <paper id="531">
       <title>A Regional News Corpora for Contextualized Entity Discovery and Linking</title>
-      <author><first>Adrian</first><last>Braşoveanu</last></author>
+      <author id="adrian-brasoveanu"><first>Adrian</first><last>Braşoveanu</last></author>
       <author><first>Lyndon J.B.</first><last>Nixon</last></author>
       <author><first>Albert</first><last>Weichselbraun</last></author>
       <author><first>Arno</first><last>Scharl</last></author>
@@ -5689,7 +5689,7 @@
       <title>Named Entity Resources - Overview and Outlook</title>
       <author><first>Maud</first><last>Ehrmann</last></author>
       <author><first>Damien</first><last>Nouvel</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <pages>3349–3356</pages>
       <abstract>Recognition of real-world entities is crucial for most NLP applications. Since its introduction some twenty years ago, named entity processing has undergone a significant evolution with, among others, the definition of new tasks (e.g. entity linking) and the emergence of new types of data (e.g. speech transcriptions, micro-blogging). These pose certainly new challenges which affect not only methods and algorithms but especially linguistic resources. Where do we stand with respect to named entity resources? This paper aims at providing a systematic overview of named entity resources, accounting for qualities such as multilingualism, dynamicity and interoperability, and to identify shortfalls in order to guide future developments.</abstract>
       <url hash="9c9b3630">L16-1534</url>
@@ -5705,9 +5705,9 @@
     </paper>
     <paper id="536">
       <title>Using Word Embeddings to Translate Named Entities</title>
-      <author><first>Octavia-Maria</first><last>Şulea</last></author>
+      <author id="octavia-maria-sulea"><first>Octavia-Maria</first><last>Şulea</last></author>
       <author><first>Sergiu</first><last>Nisioi</last></author>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <pages>3362–3366</pages>
       <abstract>In this paper we investigate the usefulness of neural word embeddings in the process of translating Named Entities (NEs) from a resource-rich language to a language low on resources relevant to the task at hand, introducing a novel, yet simple way of obtaining bilingual word vectors. Inspired by observations in (Mikolov et al., 2013b), which show that training their word vector model on comparable corpora yields comparable vector space representations of those corpora, reducing the problem of translating words to finding a rotation matrix, and results in (Zou et al., 2013), which showed that bilingual word embeddings can improve Chinese Named Entity Recognition (NER) and English to Chinese phrase translation, we use the sentence-aligned English-French EuroParl corpora and show that word embeddings extracted from a merged corpus (corpus resulted from the merger of the two aligned corpora) can be used to NE translation. We extrapolate that word embeddings trained on merged parallel corpora are useful in Named Entity Recognition and Translation tasks for resource-poor languages.</abstract>
       <url hash="abfd81cb">L16-1536</url>
@@ -5747,7 +5747,7 @@
     <paper id="540">
       <title>Can Topic Modelling benefit from Word Sense Information?</title>
       <author><first>Adriana</first><last>Ferrugento</last></author>
-      <author><first>Hugo Gonçalo</first><last>Oliveira</last></author>
+      <author id="hugo-goncalo-oliveira"><first>Hugo Gonçalo</first><last>Oliveira</last></author>
       <author><first>Ana</first><last>Alves</last></author>
       <author><first>Filipe</first><last>Rodrigues</last></author>
       <pages>3387–3393</pages>
@@ -5808,7 +5808,7 @@
       <title>Annotating and Detecting Medical Events in Clinical Notes</title>
       <author><first>Prescott</first><last>Klassen</last></author>
       <author><first>Fei</first><last>Xia</last></author>
-      <author><first>Meliha</first><last>Yetisgen</last></author>
+      <author id="meliha-yetisgen-yildiz"><first>Meliha</first><last>Yetisgen</last></author>
       <pages>3417–3421</pages>
       <abstract>Early detection and treatment of diseases that onset after a patient is admitted to a hospital, such as pneumonia, is critical to improving and reducing costs in healthcare. Previous studies (Tepper et al., 2013) showed that change-of-state events in clinical notes could be important cues for phenotype detection. In this paper, we extend the annotation schema proposed in (Klassen et al., 2014) to mark change-of-state events, diagnosis events, coordination, and negation. After we have completed the annotation, we build NLP systems to automatically identify named entities and medical events, which yield an f-score of 94.7% and 91.8%, respectively.</abstract>
       <url hash="347d184f">L16-1545</url>
@@ -5817,7 +5817,7 @@
     <paper id="546">
       <title>Speech Synthesis of Code-Mixed Text</title>
       <author><first>Sunayana</first><last>Sitaram</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <pages>3422–3428</pages>
       <abstract>Most Text to Speech (TTS) systems today assume that the input text is in a single language and is written in the same language that the text needs to be synthesized in. However, in bilingual and multilingual communities, code mixing or code switching occurs in speech, in which speakers switch between languages in the same utterance. Due to the popularity of social media, we now see code-mixing even in text in these multilingual communities. TTS systems capable of synthesizing such text need to be able to handle text that is written in multiple languages and scripts. Code-mixed text poses many challenges to TTS systems, such as language identification, spelling normalization and pronunciation modeling. In this work, we describe a preliminary framework for synthesizing code-mixed text. We carry out experiments on synthesizing code-mixed Hindi and English text. We find that there is a significant user preference for TTS systems that can correctly identify and pronounce words in different languages.</abstract>
       <url hash="a3ce684f">L16-1546</url>
@@ -5855,7 +5855,7 @@
       <author><first>Kai Frederic</first><last>Engelmann</last></author>
       <author><first>Florian</first><last>Lier</last></author>
       <author><first>Simon</first><last>Schulz</last></author>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <author><first>Friederike</first><last>Eyssel</last></author>
       <author><first>Thomas</first><last>Hermann</last></author>
       <author><first>Franz</first><last>Kummert</last></author>
@@ -5876,8 +5876,8 @@
       <author><first>Chung-Ning</first><last>Chang</last></author>
       <author><first>Kevin</first><last>Bowden</last></author>
       <author><first>Michael</first><last>Neff</last></author>
-      <author><first>Jean</first><last>Fox Tree</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="jean-e-fox-tree"><first>Jean</first><last>Fox Tree</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <pages>3447–3454</pages>
       <abstract>Story-telling is a fundamental and prevalent aspect of human social behavior. In the wild, stories are told conversationally in social settings, often as a dialogue and with accompanying gestures and other nonverbal behavior. This paper presents a new corpus, the Story Dialogue with Gestures (SDG) corpus, consisting of 50 personal narratives regenerated as dialogues, complete with annotations of gesture placement and accompanying gesture forms. The corpus includes dialogues generated by human annotators, gesture annotations on the human generated dialogues, videos of story dialogues generated from this representation, video clips of each gesture used in the gesture annotations, and annotations of the original personal narratives with a deep representation of story called a Story Intention Graph. Our long term goal is the automatic generation of story co-tellings as animated dialogues from the Story Intention Graph. We expect this corpus to be a useful resource for researchers interested in natural language generation, intelligent virtual agents, generation of nonverbal behavior, and story and narrative representations.</abstract>
       <url hash="f3a489cb">L16-1550</url>
@@ -5901,8 +5901,8 @@
       <author><first>Jackson</first><last>Tolins</last></author>
       <author><first>Kris</first><last>Liu</last></author>
       <author><first>Michael</first><last>Neff</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
-      <author><first>Jean</first><last>Fox Tree</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
+      <author id="jean-e-fox-tree"><first>Jean</first><last>Fox Tree</last></author>
       <pages>3461–3468</pages>
       <abstract>We present a corpus of 44 human-agent verbal and gestural story retellings designed to explore whether humans would gesturally entrain to an embodied intelligent virtual agent. We used a novel data collection method where an agent presented story components in installments, which the human would then retell to the agent. At the end of the installments, the human would then retell the embodied animated agent the story as a whole. This method was designed to allow us to observe whether changes in the agent’s gestural behavior would result in human gestural changes. The agent modified its gestures over the course of the story, by starting out the first installment with gestural behaviors designed to manifest extraversion, and slowly modifying gestures to express introversion over time, or the reverse. The corpus contains the verbal and gestural transcripts of the human story retellings. The gestures were coded for type, handedness, temporal structure, spatial extent, and the degree to which the participants’ gestures match those produced by the agent. The corpus illustrates the variation in expressive behaviors produced by users interacting with embodied virtual characters, and the degree to which their gestures were influenced by the agent’s dynamic changes in personality-based expressive style.</abstract>
       <url hash="9c96ff08">L16-1552</url>
@@ -5913,8 +5913,8 @@
       <author><first>Jackson</first><last>Tolins</last></author>
       <author><first>Kris</first><last>Liu</last></author>
       <author><first>Yingying</first><last>Wang</last></author>
-      <author><first>Jean E.</first><last>Fox Tree</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="jean-e-fox-tree"><first>Jean E.</first><last>Fox Tree</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <author><first>Michael</first><last>Neff</last></author>
       <pages>3469–3476</pages>
       <abstract>This paper presents a new corpus, the Personality Dyads Corpus, consisting of multimodal data for three conversations between three personality-matched, two-person dyads (a total of 9 separate dialogues). Participants were selected from a larger sample to be 0.8 of a standard deviation above or below the mean on the Big-Five Personality extraversion scale, to produce an Extravert-Extravert dyad, an Introvert-Introvert dyad, and an Extravert-Introvert dyad. Each pair carried out conversations for three different tasks. The conversations were recorded using optical motion capture for the body and data gloves for the hands. Dyads’ speech was transcribed and the gestural and postural behavior was annotated with ANVIL. The released corpus includes personality profiles, ANVIL files containing speech transcriptions and the gestural annotations, and BVH files containing body and hand motion in 3D.</abstract>
@@ -5925,7 +5925,7 @@
       <title>Crowdsourcing Ontology Lexicons</title>
       <author><first>Bettina</first><last>Lanser</last></author>
       <author><first>Christina</first><last>Unger</last></author>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <pages>3477–3484</pages>
       <abstract>In order to make the growing amount of conceptual knowledge available through ontologies and datasets accessible to humans, NLP applications need access to information on how this knowledge can be verbalized in natural language. One way to provide this kind of information are ontology lexicons, which apart from the actual verbalizations in a given target language can provide further, rich linguistic information about them. Compiling such lexicons manually is a very time-consuming task and requires expertise both in Semantic Web technologies and lexicon engineering, as well as a very good knowledge of the target language at hand. In this paper we present an alternative approach to generating ontology lexicons by means of crowdsourcing: We use CrowdFlower to generate a small Japanese ontology lexicon for ten exemplary ontology elements from the DBpedia ontology according to a two-stage workflow, the main underlying idea of which is to turn the task of generating lexicon entries into a translation task; the starting point of this translation task is a manually created English lexicon for DBpedia. Comparison of the results to a manually created Japanese lexicon shows that the presented workflow is a viable option if an English seed lexicon is already available.</abstract>
       <url hash="4db958e9">L16-1554</url>
@@ -5956,7 +5956,7 @@
     <paper id="557">
       <title>Temporal Information Annotation: Crowd vs. Experts</title>
       <author><first>Tommaso</first><last>Caselli</last></author>
-      <author><first>Rachele</first><last>Sprugnoli</last></author>
+      <author id="rachele-sprugnoli"><first>Rachele</first><last>Sprugnoli</last></author>
       <author><first>Oana</first><last>Inel</last></author>
       <pages>3502–3509</pages>
       <abstract>This paper describes two sets of crowdsourcing experiments on temporal information annotation conducted on two languages, i.e., English and Italian. The first experiment, launched on the CrowdFlower platform, was aimed at classifying temporal relations given target entities. The second one, relying on the CrowdTruth metric, consisted in two subtasks: one devoted to the recognition of events and temporal expressions and one to the detection and classification of temporal relations. The outcomes of the experiments suggest a valuable use of crowdsourcing annotations also for a complex task like Temporal Processing.</abstract>
@@ -5966,8 +5966,8 @@
     <paper id="558">
       <title>A Tangled Web: The Faint Signals of Deception in Text - Boulder Lies and Truth Corpus (<fixed-case>BLT</fixed-case>-<fixed-case>C</fixed-case>)</title>
       <author><first>Franco</first><last>Salvetti</last></author>
-      <author><first>John B.</first><last>Lowe</last></author>
-      <author><first>James H.</first><last>Martin</last></author>
+      <author id="john-b-lowe"><first>John B.</first><last>Lowe</last></author>
+      <author id="james-h-martin"><first>James H.</first><last>Martin</last></author>
       <pages>3510–3517</pages>
       <abstract>We present an approach to creating corpora for use in detecting deception in text, including a discussion of the challenges peculiar to this task. Our approach is based on soliciting several types of reviews from writers and was implemented using Amazon Mechanical Turk. We describe the multi-dimensional corpus of reviews built using this approach, available free of charge from LDC as the Boulder Lies and Truth Corpus (BLT-C). Challenges for both corpus creation and the deception detection include the fact that human performance on the task is typically at chance, that the signal is faint, that paid writers such as turkers are sometimes deceptive, and that deception is a complex human behavior; manifestations of deception depend on details of domain, intrinsic properties of the deceiver (such as education, linguistic competence, and the nature of the intention), and specifics of the deceptive act (e.g., lying vs. fabricating.) To overcome the inherent lack of ground truth, we have developed a set of semi-automatic techniques to ensure corpus validity. We present some preliminary results on the task of deception detection which suggest that the BLT-C is an improvement in the quality of resources available for this task.</abstract>
       <url hash="b3404a75">L16-1558</url>
@@ -5975,7 +5975,7 @@
     </paper>
     <paper id="559">
       <title>Finding Alternative Translations in a Large Corpus of Movie Subtitle</title>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>3518–3522</pages>
       <abstract>OpenSubtitles.org provides a large collection of user contributed subtitles in various languages for movies and TV programs. Subtitle translations are valuable resources for cross-lingual studies and machine translation research. A less explored feature of the collection is the inclusion of alternative translations, which can be very useful for training paraphrase systems or collecting multi-reference test suites for machine translation. However, differences in translation may also be due to misspellings, incomplete or corrupt data files, or wrongly aligned subtitles. This paper reports our efforts in recognising and classifying alternative subtitle translations with language independent techniques. We use time-based alignment with lexical re-synchronisation techniques and BLEU score filters and sort alternative translations into categories using edit distance metrics and heuristic rules. Our approach produces large numbers of sentence-aligned translation alternatives for over 50 languages provided via the OPUS corpus collection.</abstract>
       <url hash="a7eff068">L16-1559</url>
@@ -5985,7 +5985,7 @@
       <title>Exploiting a Large Strongly Comparable Corpus</title>
       <author><first>Thierry</first><last>Etchegoyhen</last></author>
       <author><first>Andoni</first><last>Azpeitia</last></author>
-      <author><first>Naiara</first><last>Pérez</last></author>
+      <author id="naiara-perez"><first>Naiara</first><last>Pérez</last></author>
       <pages>3523–3529</pages>
       <abstract>This article describes a large comparable corpus for Basque and Spanish and the methods employed to build a parallel resource from the original data. The EITB corpus, a strongly comparable corpus in the news domain, is to be shared with the research community, as an aid for the development and testing of methods in comparable corpora exploitation, and as basis for the improvement of data-driven machine translation systems for this language pair. Competing approaches were explored for the alignment of comparable segments in the corpus, resulting in the design of a simple method which outperformed a state-of-the-art method on the corpus test sets. The method we present is highly portable, computationally efficient, and significantly reduces deployment work, a welcome result for the exploitation of comparable corpora.</abstract>
       <url hash="bf40fbfa">L16-1560</url>
@@ -6005,7 +6005,7 @@
       <title><fixed-case>WAGS</fixed-case>: A Beautiful <fixed-case>E</fixed-case>nglish-<fixed-case>I</fixed-case>talian Benchmark Supporting Word Alignment Evaluation on Rare Words</title>
       <author><first>Luisa</first><last>Bentivogli</last></author>
       <author><first>Mauro</first><last>Cettolo</last></author>
-      <author><first>M. Amin</first><last>Farajian</last></author>
+      <author id="m-amin-farajian"><first>M. Amin</first><last>Farajian</last></author>
       <author><first>Marcello</first><last>Federico</last></author>
       <pages>3535–3542</pages>
       <abstract>This paper presents WAGS (Word Alignment Gold Standard), a novel benchmark which allows extensive evaluation of WA tools on out-of-vocabulary (OOV) and rare words. WAGS is a subset of the Common Test section of the Europarl English-Italian parallel corpus, and is specifically tailored to OOV and rare words. WAGS is composed of 6,715 sentence pairs containing 11,958 occurrences of OOV and rare words up to frequency 15 in the Europarl Training set (5,080 English words and 6,878 Italian words), representing almost 3% of the whole text. Since WAGS is focused on OOV/rare words, manual alignments are provided for these words only, and not for the whole sentences. Two off-the-shelf word aligners have been evaluated on WAGS, and results have been compared to those obtained on an existing benchmark tailored to full text alignment. The results obtained confirm that WAGS is a valuable resource, which allows a statistically sound evaluation of WA systems’ performance on OOV and rare words, as well as extensive data analyses. WAGS is publicly released under a Creative Commons Attribution license.</abstract>
@@ -6015,7 +6015,7 @@
     <paper id="563">
       <title>Manual and Automatic Paraphrases for <fixed-case>MT</fixed-case> Evaluation</title>
       <author><first>Aleš</first><last>Tamchyna</last></author>
-      <author><first>Petra</first><last>Barančíková</last></author>
+      <author id="petra-barancikova"><first>Petra</first><last>Barančíková</last></author>
       <pages>3543–3548</pages>
       <abstract>Paraphrasing of reference translations has been shown to improve the correlation with human judgements in automatic evaluation of machine translation (MT) outputs. In this work, we present a new dataset for evaluating English-Czech translation based on automatic paraphrases. We compare this dataset with an existing set of manually created paraphrases and find that even automatic paraphrases can improve MT evaluation. We have also propose and evaluate several criteria for selecting suitable reference translations from a larger set.</abstract>
       <url hash="64a83c8a">L16-1563</url>
@@ -6036,10 +6036,10 @@
       <author><first>Helge</first><last>Dyvik</last></author>
       <author><first>Paul</first><last>Meurer</last></author>
       <author><first>Victoria</first><last>Rosén</last></author>
-      <author><first>Koenraad</first><last>De Smedt</last></author>
+      <author id="koenraad-de-smedt"><first>Koenraad</first><last>De Smedt</last></author>
       <author><first>Petter</first><last>Haugereid</last></author>
       <author><first>Gyri Smørdal</first><last>Losnegaard</last></author>
-      <author><first>Gunn Inger</first><last>Lyse</last></author>
+      <author id="gunn-inger-lyse"><first>Gunn Inger</first><last>Lyse</last></author>
       <author><first>Martha</first><last>Thunes</last></author>
       <pages>3555–3562</pages>
       <abstract>We present NorGramBank, a treebank for Norwegian with highly detailed LFG analyses. It is one of many treebanks made available through the INESS treebanking infrastructure. NorGramBank was constructed as a parsebank, i.e. by automatically parsing a corpus, using the wide coverage grammar NorGram. One part consisting of 350,000 words has been manually disambiguated using computer-generated discriminants. A larger part of 50 M words has been stochastically disambiguated. The treebank is dynamic: by global reparsing at certain intervals it is kept compatible with the latest versions of the grammar and the lexicon, which are continually further developed in interaction with the annotators. A powerful query language, INESS Search, has been developed for search across formalisms in the INESS treebanks, including LFG c- and f-structures. Evaluation shows that the grammar provides about 85% of randomly selected sentences with good analyses. Agreement among the annotators responsible for manual disambiguation is satisfactory, but also suggests desirable simplifications of the grammar.</abstract>
@@ -6049,8 +6049,8 @@
     <paper id="566">
       <title>Accurate Deep Syntactic Parsing of Graphs: The Case of <fixed-case>F</fixed-case>rench</title>
       <author><first>Corentin</first><last>Ribeyre</last></author>
-      <author><first>Eric</first><last>Villemonte de la Clergerie</last></author>
-      <author><first>Djamé</first><last>Seddah</last></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Eric</first><last>Villemonte de la Clergerie</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
       <pages>3563–3568</pages>
       <abstract>Parsing predicate-argument structures in a deep syntax framework requires graphs to be predicted. Argument structures represent a higher level of abstraction than the syntactic ones and are thus more difficult to predict even for highly accurate parsing models on surfacic syntax. In this paper we investigate deep syntax parsing, using a French data set (Ribeyre et al., 2014a). We demonstrate that the use of topologically different types of syntactic features, such as dependencies, tree fragments, spines or syntactic paths, brings a much needed context to the parser. Our higher-order parsing model, gaining thus up to 4 points, establishes the state of the art for parsing French deep syntactic structures.</abstract>
       <url hash="9bd674e9">L16-1566</url>
@@ -6061,7 +6061,7 @@
       <author><first>Abdelati</first><last>Hawwari</last></author>
       <author><first>Mohammed</first><last>Attia</last></author>
       <author><first>Mahmoud</first><last>Ghoneim</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>3569–3577</pages>
       <abstract>Idafa in traditional Arabic grammar is an umbrella construction that covers several phenomena including what is expressed in English as noun-noun compounds and Saxon and Norman genitives. Additionally, Idafa participates in some other constructions, such as quantifiers, quasi-prepositions, and adjectives. Identifying the various types of the Idafa construction (IC) is of importance to Natural Language processing (NLP) applications. Noun-Noun compounds exhibit special behavior in most languages impacting their semantic interpretation. Hence distinguishing them could have an impact on downstream NLP applications. The most comprehensive syntactic representation of the Arabic language is the LDC Arabic Treebank (ATB). In the ATB, ICs are not explicitly labeled and furthermore, there is no distinction between ICs of noun-noun relations and other traditional ICs. Hence, we devise a detailed syntactic and semantic typification process of the IC phenomenon in Arabic. We target the ATB as a platform for this classification. We render the ATB annotated with explicit IC labels but with the further semantic characterization which is useful for syntactic, semantic and cross language processing. Our typification of IC comprises 3 main syntactic IC types: FIC, GIC, and TIC, and they are further divided into 10 syntactic subclasses. The TIC group is further classified into semantic relations. We devise a method for automatic IC labeling and compare its yield against the CATiB treebank. Our evaluation shows that we achieve the same level of accuracy, but with the additional fine-grained classification into the various syntactic and semantic types.</abstract>
       <url hash="e15e26a2">L16-1567</url>
@@ -6103,7 +6103,7 @@
       <author><first>Elif Ahsen</first><last>Acar</last></author>
       <author><first>Deniz</first><last>Zeyrek</last></author>
       <author><first>Murathan</first><last>Kurfalı</last></author>
-      <author><first>Cem</first><last>Bozşahin</last></author>
+      <author id="cem-bozsahin"><first>Cem</first><last>Bozşahin</last></author>
       <pages>3600–3606</pages>
       <abstract>This study primarily aims to build a Turkish psycholinguistic database including three variables: word frequency, age of acquisition (AoA), and imageability, where AoA and imageability information are limited to nouns. We used a corpus-based approach to obtain information about the AoA variable. We built two corpora: a child literature corpus (CLC) including 535 books written for 3-12 years old children, and a corpus of transcribed children’s speech (CSC) at ages 1;4-4;8. A comparison between the word frequencies of CLC and CSC gave positive correlation results, suggesting the usability of the CLC to extract AoA information. We assumed that frequent words of the CLC would correspond to early acquired words whereas frequent words of a corpus of adult language would correspond to late acquired words. To validate AoA results from our corpus-based approach, a rated AoA questionnaire was conducted on adults. Imageability values were collected via a different questionnaire conducted on adults. We conclude that it is possible to deduce AoA information for high frequency words with the corpus-based approach. The results about low frequency words were inconclusive, which is attributed to the fact that corpus-based AoA information is affected by the strong negative correlation between corpus frequency and rated AoA.</abstract>
       <url hash="eed76e6b">L16-1571</url>
@@ -6112,7 +6112,7 @@
     <paper id="572">
       <title>Domain-Specific Corpus Expansion with Focused Webcrawling</title>
       <author><first>Steffen</first><last>Remus</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>3607–3611</pages>
       <abstract>This work presents a straightforward method for extending or creating in-domain web corpora by focused webcrawling. The focused webcrawler uses statistical N-gram language models to estimate the relatedness of documents and weblinks and needs as input only N-grams or plain texts of a predefined domain and seed URLs as starting points. Two experiments demonstrate that our focused crawler is able to stay focused in domain and language. The first experiment shows that the crawler stays in a focused domain, the second experiment demonstrates that language models trained on focused crawls obtain better perplexity scores on in-domain corpora. We distribute the focused crawler as open source software.</abstract>
       <url hash="d1738839">L16-1572</url>
@@ -6121,7 +6121,7 @@
     <paper id="573">
       <title>Corpus-Based Diacritic Restoration for <fixed-case>S</fixed-case>outh <fixed-case>S</fixed-case>lavic Languages</title>
       <author><first>Nikola</first><last>Ljubešić</last></author>
-      <author><first>Tomaž</first><last>Erjavec</last></author>
+      <author id="tomaz-erjavec"><first>Tomaž</first><last>Erjavec</last></author>
       <author><first>Darja</first><last>Fišer</last></author>
       <pages>3612–3616</pages>
       <abstract>In computer-mediated communication, Latin-based scripts users often omit diacritics when writing. Such text is typically easily understandable to humans but very difficult for computational processing because many words become ambiguous or unknown. Letter-level approaches to diacritic restoration generalise better and do not require a lot of training data but word-level approaches tend to yield better results. However, they typically rely on a lexicon which is an expensive resource, not covering non-standard forms, and often not available for less-resourced languages. In this paper we present diacritic restoration models that are trained on easy-to-acquire corpora. We test three different types of corpora (Wikipedia, general web, Twitter) for three South Slavic languages (Croatian, Serbian and Slovene) and evaluate them on two types of text: standard (Wikipedia) and non-standard (Twitter). The proposed approach considerably outperforms charlifter, so far the only open source tool available for this task. We make the best performing systems freely available.</abstract>
@@ -6130,7 +6130,7 @@
     </paper>
     <paper id="574">
       <title>Automatic Recognition of Linguistic Replacements in Text Series Generated from Keystroke Logs</title>
-      <author><first>Daniel</first><last>Couto-Vale</last></author>
+      <author id="daniel-couto-vale"><first>Daniel</first><last>Couto-Vale</last></author>
       <author><first>Stella</first><last>Neumann</last></author>
       <author><first>Paula</first><last>Niemietz</last></author>
       <pages>3617–3623</pages>
@@ -6143,7 +6143,7 @@
       <author><first>Elena</first><last>Manishina</last></author>
       <author><first>Bassam</first><last>Jabaian</last></author>
       <author><first>Stéphane</first><last>Huet</last></author>
-      <author><first>Fabrice</first><last>Lefèvre</last></author>
+      <author id="fabrice-lefevre"><first>Fabrice</first><last>Lefèvre</last></author>
       <pages>3624–3631</pages>
       <abstract>As data-driven approaches started to make their way into the Natural Language Generation (NLG) domain, the need for automation of corpus building and extension became apparent. Corpus creation and extension in data-driven NLG domain traditionally involved manual paraphrasing performed by either a group of experts or with resort to crowd-sourcing. Building the training corpora manually is a costly enterprise which requires a lot of time and human resources. We propose to automate the process of corpus extension by integrating automatically obtained synonyms and paraphrases. Our methodology allowed us to significantly increase the size of the training corpus and its level of variability (the number of distinct tokens and specific syntactic structures). Our extension solutions are fully automatic and require only some initial validation. The human evaluation results confirm that in many cases native speakers favor the outputs of the model built on the extended corpus.</abstract>
       <url hash="d525059d">L16-1575</url>
@@ -6152,8 +6152,8 @@
     <paper id="576">
       <title>Bilbo-Val: Automatic Identification of Bibliographical Zone in Papers</title>
       <author><first>Amal</first><last>Htait</last></author>
-      <author><first>Sebastien</first><last>Fournier</last></author>
-      <author><first>Patrice</first><last>Bellot</last></author>
+      <author id="sebastien-fournier"><first>Sebastien</first><last>Fournier</last></author>
+      <author id="patrice-bellot"><first>Patrice</first><last>Bellot</last></author>
       <pages>3632–3636</pages>
       <abstract>In this paper, we present the automatic annotation of bibliographical references’ zone in papers and articles of XML/TEI format. Our work is applied through two phases: first, we use machine learning technology to classify bibliographical and non-bibliographical paragraphs in papers, by means of a model that was initially created to differentiate between the footnotes containing or not containing bibliographical references. The previous description is one of BILBO’s features, which is an open source software for automatic annotation of bibliographic reference. Also, we suggest some methods to minimize the margin of error. Second, we propose an algorithm to find the largest list of bibliographical references in the article. The improvement applied on our model results an increase in the model’s efficiency with an Accuracy equal to 85.89. And by testing our work, we are able to achieve 72.23% as an average for the percentage of success in detecting bibliographical references’ zone.</abstract>
       <url hash="a1db23cc">L16-1576</url>
@@ -6164,7 +6164,7 @@
       <author><first>Wajdi</first><last>Zaghouani</last></author>
       <author><first>Houda</first><last>Bouamor</last></author>
       <author><first>Abdelati</first><last>Hawwari</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <author><first>Ossama</first><last>Obeid</last></author>
       <author><first>Mahmoud</first><last>Ghoneim</last></author>
       <author><first>Sawsan</first><last>Alqahtani</last></author>
@@ -6178,7 +6178,7 @@
       <title>Applying the Cognitive Machine Translation Evaluation Approach to <fixed-case>A</fixed-case>rabic</title>
       <author><first>Irina</first><last>Temnikova</last></author>
       <author><first>Wajdi</first><last>Zaghouani</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
       <pages>3644–3651</pages>
       <abstract>The goal of the cognitive machine translation (MT) evaluation approach is to build classifiers which assign post-editing effort scores to new texts. The approach helps estimate fair compensation for post-editors in the translation industry by evaluating the cognitive difficulty of post-editing MT output. The approach counts the number of errors classified in different categories on the basis of how much cognitive effort they require in order to be corrected. In this paper, we present the results of applying an existing cognitive evaluation approach to Modern Standard Arabic (MSA). We provide a comparison of the number of errors and categories of errors in three MSA texts of different MT quality (without any language-specific adaptation), as well as a comparison between MSA texts and texts from three Indo-European languages (Russian, Spanish, and Bulgarian), taken from a previous experiment. The results show how the error distributions change passing from the MSA texts of worse MT quality to MSA texts of better MT quality, as well as a similarity in distinguishing the texts of better MT quality for all four languages.</abstract>
@@ -6187,7 +6187,7 @@
     </paper>
     <paper id="579">
       <title>A Reading Comprehension Corpus for Machine Translation Evaluation</title>
-      <author><first>Carolina</first><last>Scarton</last></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>3652–3658</pages>
       <abstract>Effectively assessing Natural Language Processing output tasks is a challenge for research in the area. In the case of Machine Translation (MT), automatic metrics are usually preferred over human evaluation, given time and budget constraints. However, traditional automatic metrics (such as BLEU) are not reliable for absolute quality assessment of documents, often producing similar scores for documents translated by the same MT system. For scenarios where absolute labels are necessary for building models, such as document-level Quality Estimation, these metrics can not be fully trusted. In this paper, we introduce a corpus of reading comprehension tests based on machine translated documents, where we evaluate documents based on answers to questions by fluent speakers of the target language. We describe the process of creating such a resource, the experiment design and agreement between the test takers. Finally, we discuss ways to convert the reading comprehension test into document-level quality scores.</abstract>
@@ -6218,7 +6218,7 @@
     <paper id="582">
       <title><fixed-case>MARMOT</fixed-case>: A Toolkit for Translation Quality Estimation at the Word Level</title>
       <author><first>Varvara</first><last>Logacheva</last></author>
-      <author><first>Chris</first><last>Hokamp</last></author>
+      <author id="chris-hokamp"><first>Chris</first><last>Hokamp</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>3671–3674</pages>
       <abstract>We present Marmot~― a new toolkit for quality estimation (QE) of machine translation output. Marmot contains utilities targeted at quality estimation at the word and phrase level. However, due to its flexibility and modularity, it can also be extended to work at the sentence level. In addition, it can be used as a framework for extracting features and learning models for many common natural language processing tasks. The tool has a set of state-of-the-art features for QE, and new features can easily be added. The tool is open-source and can be downloaded from <url>https://github.com/qe-team/marmot/</url></abstract>
@@ -6266,10 +6266,10 @@
     </paper>
     <paper id="587">
       <title><fixed-case>GATE</fixed-case>-Time: Extraction of Temporal Expressions and Events</title>
-      <author><first>Leon</first><last>Derczynski</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
       <author><first>Jannik</first><last>Strötgen</last></author>
       <author><first>Diana</first><last>Maynard</last></author>
-      <author><first>Mark A.</first><last>Greenwood</last></author>
+      <author id="mark-a-greenwood"><first>Mark A.</first><last>Greenwood</last></author>
       <author><first>Manuel</first><last>Jung</last></author>
       <pages>3702–3708</pages>
       <abstract>GATE is a widely used open-source solution for text processing with a large user community. It contains components for several natural language processing tasks. However, temporal information extraction functionality within GATE has been rather limited so far, despite being a prerequisite for many application scenarios in the areas of natural language processing and information retrieval. This paper presents an integrated approach to temporal information processing. We take state-of-the-art tools in temporal expression and event recognition and bring them together to form an openly-available resource within the GATE infrastructure. GATE-Time provides annotation in the form of TimeML events and temporal expressions complying with this mature ISO standard for temporal semantic annotation of documents. Major advantages of GATE-Time are (i) that it relies on HeidelTime for temporal tagging, so that temporal expressions can be extracted and normalized in multiple languages and across different domains, (ii) it includes a modern, fast event recognition and classification tool, and (iii) that it can be combined with different linguistic pre-processing annotations, and is thus not bound to license restricted preprocessing components.</abstract>
@@ -6290,7 +6290,7 @@
       <author><first>Justin</first><last>Mott</last></author>
       <author><first>Ann</first><last>Bies</last></author>
       <author><first>Zhiyi</first><last>Song</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <pages>3717–3722</pages>
       <abstract>This paper introduces the parallel Chinese-English Entities, Relations and Events (ERE) corpora developed by Linguistic Data Consortium under the DARPA Deep Exploration and Filtering of Text (DEFT) Program. Original Chinese newswire and discussion forum documents are annotated for two versions of the ERE task. The texts are manually translated into English and then annotated for the same ERE tasks on the English translation, resulting in a rich parallel resource that has utility for performers within the DEFT program, for participants in NIST’s Knowledge Base Population evaluations, and for cross-language projection research more generally.</abstract>
       <url hash="6bce29ae">L16-1589</url>
@@ -6310,8 +6310,8 @@
       <title>An Empirical Exploration of Moral Foundations Theory in Partisan News Sources</title>
       <author><first>Dean</first><last>Fulgoni</last></author>
       <author><first>Jordan</first><last>Carpenter</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
-      <author><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
+      <author id="daniel-preotiuc-pietro"><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
       <pages>3730–3736</pages>
       <abstract>News sources frame issues in different ways in order to appeal or control the perception of their readers. We present a large scale study of news articles from partisan sources in the US across a variety of different issues. We first highlight that differences between sides exist by predicting the political leaning of articles of unseen political bias. Framing can be driven by different types of morality that each group values. We emphasize differences in framing of different news building on the moral foundations theory quantified using hand crafted lexicons. Our results show that partisan sources frame political issues differently both in terms of words usage and through the moral foundations they relate to.</abstract>
       <url hash="836e521b">L16-1591</url>
@@ -6321,7 +6321,7 @@
       <title>Building a Dataset for Possessions Identification in Text</title>
       <author><first>Carmen</first><last>Banea</last></author>
       <author><first>Xi</first><last>Chen</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>3737–3740</pages>
       <abstract>Just as industrialization matured from mass production to customization and personalization, so has the Web migrated from generic content to public disclosures of one’s most intimately held thoughts, opinions and beliefs. This relatively new type of data is able to represent finer and more narrowly defined demographic slices. If until now researchers have primarily focused on leveraging personalized content to identify latent information such as gender, nationality, location, or age of the author, this study seeks to establish a structured way of extracting possessions, or items that people own or are entitled to, as a way to ultimately provide insights into people’s behaviors and characteristics. In order to promote more research in this area, we are releasing a set of 798 possessions extracted from blog genre, where possessions are marked at different confidence levels, as well as a detailed set of guidelines to help in future annotation studies.</abstract>
       <url hash="3aa13605">L16-1592</url>
@@ -6330,7 +6330,7 @@
     <paper id="593">
       <title>The Query of Everything: Developing Open-Domain, Natural-Language Queries for <fixed-case>BOLT</fixed-case> Information Retrieval</title>
       <author><first>Kira</first><last>Griffitt</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <pages>3741–3747</pages>
       <abstract>The DARPA BOLT Information Retrieval evaluations target open-domain natural-language queries over a large corpus of informal text in English, Chinese and Egyptian Arabic. We outline the goals of BOLT IR, comparing it with the prior GALE Distillation task. After discussing the properties of the BOLT IR corpus, we provide a detailed description of the query creation process, contrasting the summary query format presented to systems at run time with the full query format created by annotators. We describe the relevance criteria used to assess BOLT system responses, highlighting the evolution of the procedures used over the three evaluation phases. We provide a detailed review of the decision points model for relevance assessment introduced during Phase 2, and conclude with information about inter-assessor consistency achieved with the decision points assessment model.</abstract>
       <url hash="b609bae9">L16-1593</url>
@@ -6340,7 +6340,7 @@
       <title>The Validation of <fixed-case>MRCPD</fixed-case> Cross-language Expansions on Imageability Ratings</title>
       <author><first>Ting</first><last>Liu</last></author>
       <author><first>Kit</first><last>Cho</last></author>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
       <author><first>Samira</first><last>Shaikh</last></author>
       <author><first>Mehrdad</first><last>Mirzaei</last></author>
       <pages>3748–3751</pages>
@@ -6396,7 +6396,7 @@
     </paper>
     <paper id="600">
       <title><fixed-case>PROMETHEUS</fixed-case>: A Corpus of Proverbs Annotated with Metaphors</title>
-      <author><first>Gözde</first><last>Özbal</last></author>
+      <author id="gozde-ozbal"><first>Gözde</first><last>Özbal</last></author>
       <author><first>Carlo</first><last>Strapparava</last></author>
       <author><first>Serra Sinem</first><last>Tekiroğlu</last></author>
       <pages>3787–3793</pages>
@@ -6407,7 +6407,7 @@
     <paper id="601">
       <title>Corpus Annotation within the <fixed-case>F</fixed-case>rench <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et: a Domain-by-domain Methodology</title>
       <author><first>Marianne</first><last>Djemaa</last></author>
-      <author><first>Marie</first><last>Candito</last></author>
+      <author id="marie-candito"><first>Marie</first><last>Candito</last></author>
       <author><first>Philippe</first><last>Muller</last></author>
       <author><first>Laure</first><last>Vieu</last></author>
       <pages>3794–3801</pages>
@@ -6424,7 +6424,7 @@
       <author><first>Lotfi</first><last>Abouda</last></author>
       <author><first>Agata</first><last>Savary</last></author>
       <author><first>Denis</first><last>Maurel</last></author>
-      <author><first>Iris</first><last>Eshkol</last></author>
+      <author id="iris-eshkol"><first>Iris</first><last>Eshkol</last></author>
       <author><first>Delphine</first><last>Battistelli</last></author>
       <pages>3802–3806</pages>
       <abstract>This paper reports a critical analysis of the ISO TimeML standard, in the light of several experiences of temporal annotation that were conducted on spoken French. It shows that the norm suffers from weaknesses that should be corrected to fit a larger variety of needs inNLP and in corpus linguistics. We present our proposition of some improvements of the norm before it will be revised by the ISO Committee in 2017. These modifications concern mainly (1) Enrichments of well identified features of the norm: temporal function of TIMEX time expressions, additional types for TLINK temporal relations; (2) Deeper modifications concerning the units or features annotated: clarification between time and tense for EVENT units, coherence of representation between temporal signals (the SIGNAL unit) and TIMEX modifiers (the MOD feature); (3) A recommendation to perform temporal annotation on top of a syntactic (rather than lexical) layer (temporal annotation on a treebank).</abstract>
@@ -6435,7 +6435,7 @@
       <title>A General Framework for the Annotation of Causality Based on <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et</title>
       <author><first>Laure</first><last>Vieu</last></author>
       <author><first>Philippe</first><last>Muller</last></author>
-      <author><first>Marie</first><last>Candito</last></author>
+      <author id="marie-candito"><first>Marie</first><last>Candito</last></author>
       <author><first>Marianne</first><last>Djemaa</last></author>
       <pages>3807–3813</pages>
       <abstract>We present here a general set of semantic frames to annotate causal expressions, with a rich lexicon in French and an annotated corpus of about 5000 instances of causal lexical items with their corresponding semantic frames. The aim of our project is to have both the largest possible coverage of causal phenomena in French, across all parts of speech, and have it linked to a general semantic framework such as FN, to benefit in particular from the relations between other semantic frames, e.g., temporal ones or intentional ones, and the underlying upper lexical ontology that enable some forms of reasoning. This is part of the larger ASFALDA French FrameNet project, which focuses on a few different notional domains which are interesting in their own right (Djemma et al., 2016), including cognitive positions and communication frames. In the process of building the French lexicon and preparing the annotation of the corpus, we had to remodel some of the frames proposed in FN based on English data, with hopefully more precise frame definitions to facilitate human annotation. This includes semantic clarifications of frames and frame elements, redundancy elimination, and added coverage. The result is arguably a significant improvement of the treatment of causality in FN itself.</abstract>
@@ -6453,7 +6453,7 @@
     </paper>
     <paper id="605">
       <title><fixed-case>S</fixed-case>pace<fixed-case>R</fixed-case>ef: A corpus of street-level geographic descriptions</title>
-      <author><first>Jana</first><last>Götze</last></author>
+      <author id="jana-gotze"><first>Jana</first><last>Götze</last></author>
       <author><first>Johan</first><last>Boye</last></author>
       <pages>3822–3827</pages>
       <abstract>This article describes SPACEREF, a corpus of street-level geographic descriptions. Pedestrians are walking a route in a (real) urban environment, describing their actions. Their position is automatically logged, their speech is manually transcribed, and their references to objects are manually annotated with respect to a crowdsourced geographic database. We describe how the data was collected and annotated, and how it has been used in the context of creating resources for an automatic pedestrian navigation system.</abstract>
@@ -6471,11 +6471,11 @@
     </paper>
     <paper id="607">
       <title>Typed Entity and Relation Annotation on Computer Science Papers</title>
-      <author><first>Yuka</first><last>Tateisi</last></author>
+      <author id="yuka-tateisi"><first>Yuka</first><last>Tateisi</last></author>
       <author><first>Tomoko</first><last>Ohta</last></author>
       <author><first>Sampo</first><last>Pyysalo</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>3836–3843</pages>
       <abstract>We describe our ongoing effort to establish an annotation scheme for describing the semantic structures of research articles in the computer science domain, with the intended use of developing search systems that can refine their results by the roles of the entities denoted by the query keys. In our scheme, mentions of entities are annotated with ontology-based types, and the roles of the entities are annotated as relations with other entities described in the text. So far, we have annotated 400 abstracts from the ACL anthology and the ACM digital library. In this paper, the scheme and the annotated dataset are described, along with the problems found in the course of annotation. We also show the results of automatic annotation and evaluate the corpus in a practical setting in application to topic extraction.</abstract>
       <url hash="4c7d572e">L16-1607</url>
@@ -6508,7 +6508,7 @@
       <author><first>Claire</first><last>Li</last></author>
       <author><first>Sam</first><last>Lam</last></author>
       <author><first>Billy</first><last>Chiu</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <author><first>Minglei</first><last>Li</last></author>
       <author><first>Dan</first><last>Xiong</last></author>
       <author><first>Roy Shing</first><last>Yu</last></author>
@@ -6521,7 +6521,7 @@
     <paper id="611">
       <title>Collecting Resources in Sub-<fixed-case>S</fixed-case>aharan <fixed-case>A</fixed-case>frican Languages for Automatic Speech Recognition: a Case Study of <fixed-case>W</fixed-case>olof</title>
       <author><first>Elodie</first><last>Gauthier</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <author><first>Sylvie</first><last>Voisin</last></author>
       <author><first>Michael</first><last>Melese</last></author>
       <author><first>Uriel Pascal</first><last>Elingui</last></author>
@@ -6535,7 +6535,7 @@
       <author><first>Joris</first><last>Pelemans</last></author>
       <author><first>Lyan</first><last>Verwimp</last></author>
       <author><first>Kris</first><last>Demuynck</last></author>
-      <author><first>Hugo</first><last>Van hamme</last></author>
+      <author id="hugo-van-hamme"><first>Hugo</first><last>Van hamme</last></author>
       <author><first>Patrick</first><last>Wambacq</last></author>
       <pages>3868–3871</pages>
       <abstract>In this paper we present SCALE, a new Python toolkit that contains two extensions to n-gram language models. The first extension is a novel technique to model compound words called Semantic Head Mapping (SHM). The second extension, Bag-of-Words Language Modeling (BagLM), bundles popular models such as Latent Semantic Analysis and Continuous Skip-grams. Both extensions scale to large data and allow the integration into first-pass ASR decoding. The toolkit is open source, includes working examples and can be found on <url>http://github.com/jorispelemans/scale</url>.</abstract>
@@ -6555,7 +6555,7 @@
     <paper id="614">
       <title><fixed-case>BAS</fixed-case> Speech Science Web Services - an Update of Current Developments</title>
       <author><first>Thomas</first><last>Kisler</last></author>
-      <author><first>Uwe</first><last>Reichel</last></author>
+      <author id="uwe-reichel"><first>Uwe</first><last>Reichel</last></author>
       <author><first>Florian</first><last>Schiel</last></author>
       <author><first>Christoph</first><last>Draxler</last></author>
       <author><first>Bernhard</first><last>Jackl</last></author>
@@ -6574,8 +6574,8 @@
       <author><first>Jaime</first><last>Ferreira</last></author>
       <author><first>Eugénio</first><last>Ribeiro</last></author>
       <author><first>Helena</first><last>Moniz</last></author>
-      <author><first>David Martins</first><last>de Matos</last></author>
-      <author><first>Ricardo</first><last>Ribeiro</last></author>
+      <author id="david-martins-de-matos"><first>David Martins</first><last>de Matos</last></author>
+      <author id="ricardo-ribeiro"><first>Ricardo</first><last>Ribeiro</last></author>
       <pages>3886–3892</pages>
       <abstract>This paper presents SPA, a web-based Speech Analytics platform that integrates several speech processing modules and that makes it possible to use them through the web. It was developed with the aim of facilitating the usage of the modules, without the need to know about software dependencies and specific configurations. Apart from being accessed by a web-browser, the platform also provides a REST API for easy integration with other applications. The platform is flexible, scalable, provides authentication for access restrictions, and was developed taking into consideration the time and effort of providing new services. The platform is still being improved, but it already integrates a considerable number of audio and text processing modules, including: Automatic transcription, speech disfluency classification, emotion detection, dialog act recognition, age and gender classification, non-nativeness detection, hyper-articulation detection, dialog act recognition, and two external modules for feature extraction and DTMF detection. This paper describes the SPA architecture, presents the already integrated modules, and provides a detailed description for the ones most recently integrated.</abstract>
       <url hash="3a98bb9a">L16-1615</url>
@@ -6600,7 +6600,7 @@
       <author><first>John</first><last>Lawrence</last></author>
       <author><first>Joonsuk</first><last>Park</last></author>
       <author><first>Katarzyna</first><last>Budzynska</last></author>
-      <author><first>Chris</first><last>Reed</last></author>
+      <author id="chris-reed"><first>Chris</first><last>Reed</last></author>
       <pages>3899–3906</pages>
       <abstract>Governments are increasingly utilising online platforms in order to engage with, and ascertain the opinions of, their citizens. Whilst policy makers could potentially benefit from such enormous feedback from society, they first face the challenge of making sense out of the large volumes of data produced. This creates a demand for tools and technologies which will enable governments to quickly and thoroughly digest the points being made and to respond accordingly. By determining the argumentative and dialogical structures contained within a debate, we are able to determine the issues which are divisive and those which attract agreement. This paper proposes a method of graph-based analytics which uses properties of graphs representing networks of arguments pro- &amp; con- in order to automatically analyse issues which divide citizens about new regulations. By future application of the most recent advances in argument mining, the results reported here will have a chance to scale up to enable sense-making of the vast amount of feedback received from citizens on directions that policy should take.</abstract>
       <url hash="b3d098db">L16-1617</url>
@@ -6609,7 +6609,7 @@
     <paper id="618">
       <title>meta<fixed-case>TED</fixed-case>: a Corpus of Metadiscourse for Spoken Language</title>
       <author><first>Rui</first><last>Correia</last></author>
-      <author><first>Nuno</first><last>Mamede</last></author>
+      <author id="nuno-mamede"><first>Nuno</first><last>Mamede</last></author>
       <author><first>Jorge</first><last>Baptista</last></author>
       <author><first>Maxine</first><last>Eskenazi</last></author>
       <pages>3907–3913</pages>
@@ -6641,7 +6641,7 @@
       <title>Focus Annotation of Task-based Data: A Comparison of Expert and Crowd-Sourced Annotation in a Reading Comprehension Corpus</title>
       <author><first>Kordula</first><last>De Kuthy</last></author>
       <author><first>Ramon</first><last>Ziai</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <pages>3928–3935</pages>
       <abstract>While the formal pragmatic concepts in information structure, such as the focus of an utterance, are precisely defined in theoretical linguistics and potentially very useful in conceptual and practical terms, it has turned out to be difficult to reliably annotate such notions in corpus data. We present a large-scale focus annotation effort designed to overcome this problem. Our annotation study is based on the tasked-based corpus CREG, which consists of answers to explicitly given reading comprehension questions. We compare focus annotation by trained annotators with a crowd-sourcing setup making use of untrained native speakers. Given the task context and an annotation process incrementally making the question form and answer type explicit, the trained annotators reach substantial agreement for focus annotation. Interestingly, the crowd-sourcing setup also supports high-quality annotation ― for specific subtypes of data. Finally, we turn to the question whether the relevance of focus annotation can be extrinsically evaluated. We show that automatic short-answer assessment significantly improves for focus annotated data. The focus annotated CREG corpus is freely available and constitutes the largest such resource for German.</abstract>
       <url hash="31c05b5b">L16-1621</url>
@@ -6649,10 +6649,10 @@
     </paper>
     <paper id="622">
       <title>Homing in on <fixed-case>T</fixed-case>witter Users: Evaluating an Enhanced Geoparser for User Profile Locations</title>
-      <author><first>Beatrice</first><last>Alex</last></author>
+      <author id="beatrice-alex"><first>Beatrice</first><last>Alex</last></author>
       <author><first>Clare</first><last>Llewellyn</last></author>
       <author><first>Claire</first><last>Grover</last></author>
-      <author><first>Jon</first><last>Oberlander</last></author>
+      <author id="jon-oberlander"><first>Jon</first><last>Oberlander</last></author>
       <author><first>Richard</first><last>Tobin</last></author>
       <pages>3936–3944</pages>
       <abstract>Twitter-related studies often need to geo-locate Tweets or Twitter users, identifying their real-world geographic locations. As tweet-level geotagging remains rare, most prior work exploited tweet content, timezone and network information to inform geolocation, or else relied on off-the-shelf tools to geolocate users from location information in their user profiles. However, such user location metadata is not consistently structured, causing such tools to fail regularly, especially if a string contains multiple locations, or if locations are very fine-grained. We argue that user profile location (UPL) and tweet location need to be treated as distinct types of information from which differing inferences can be drawn. Here, we apply geoparsing to UPLs, and demonstrate how task performance can be improved by adapting our Edinburgh Geoparser, which was originally developed for processing English text. We present a detailed evaluation method and results, including inter-coder agreement. We demonstrate that the optimised geoparser can effectively extract and geo-reference multiple locations at different levels of granularity with an F1-score of around 0.90. We also illustrate how geoparsed UPLs can be exploited for international information trade studies and country-level sentiment analysis.</abstract>
@@ -6661,7 +6661,7 @@
     </paper>
     <paper id="623">
       <title>A Dataset for Detecting Stance in Tweets</title>
-      <author><first>Saif</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
       <author><first>Svetlana</first><last>Kiritchenko</last></author>
       <author><first>Parinaz</first><last>Sobhani</last></author>
       <author><first>Xiaodan</first><last>Zhu</last></author>
@@ -6673,7 +6673,7 @@
     </paper>
     <paper id="624">
       <title>Emotion Analysis on <fixed-case>T</fixed-case>witter: The Hidden Challenge</title>
-      <author><first>Luca</first><last>Dini</last></author>
+      <author id="luca-dini"><first>Luca</first><last>Dini</last></author>
       <author><first>André</first><last>Bittar</last></author>
       <pages>3953–3958</pages>
       <abstract>In this paper, we present an experiment to detect emotions in tweets. Unlike much previous research, we draw the important distinction between the tasks of emotion detection in a closed world assumption (i.e. every tweet is emotional) and the complicated task of identifying emotional versus non-emotional tweets. Given an apparent lack of appropriately annotated data, we created two corpora for these tasks. We describe two systems, one symbolic and one based on machine learning, which we evaluated on our datasets. Our evaluation shows that a machine learning classifier performs best on emotion detection, while a symbolic approach is better for identifying relevant (i.e. emotional) tweets.</abstract>
@@ -6711,8 +6711,8 @@
     </paper>
     <paper id="628">
       <title>Comprehensive and Consistent <fixed-case>P</fixed-case>rop<fixed-case>B</fixed-case>ank Light Verb Annotation</title>
-      <author><first>Claire</first><last>Bonial</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="claire-bonial"><first>Claire</first><last>Bonial</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>3980–3985</pages>
       <abstract>Recent efforts have focused on expanding the annotation coverage of PropBank from verb relations to adjective and noun relations, as well as light verb constructions (e.g., make an offer, take a bath). While each new relation type has presented unique annotation challenges, ensuring consistent and comprehensive annotation of light verb constructions has proved particularly challenging, given that light verb constructions are semi-productive, difficult to define, and there are often borderline cases. This research describes the iterative process of developing PropBank annotation guidelines for light verb constructions, the current guidelines, and a comparison to related resources.</abstract>
       <url hash="cf2b5ab9">L16-1628</url>
@@ -6722,7 +6722,7 @@
       <title>Inconsistency Detection in Semantic Annotation</title>
       <author><first>Nora</first><last>Hollenstein</last></author>
       <author><first>Nathan</first><last>Schneider</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <pages>3986–3990</pages>
       <abstract>Inconsistencies are part of any manually annotated corpus. Automatically finding these inconsistencies and correcting them (even manually) can increase the quality of the data. Past research has focused mainly on detecting inconsistency in syntactic annotation. This work explores new approaches to detecting inconsistency in semantic annotation. Two ranking methods are presented in this paper: a discrepancy ranking and an entropy ranking. Those methods are then tested and evaluated on multiple corpora annotated with multiword expressions and supersense labels. The results show considerable improvements in detecting inconsistency candidates over a random baseline. Possible applications of methods for inconsistency detection are improving the annotation procedure as well as the guidelines and correcting errors in completed annotations.</abstract>
       <url hash="56f7cee3">L16-1629</url>
@@ -6733,12 +6733,12 @@
       <author><first>Stephan</first><last>Oepen</last></author>
       <author><first>Marco</first><last>Kuhlmann</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <author><first>Silvie</first><last>Cinková</last></author>
-      <author><first>Dan</first><last>Flickinger</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
+      <author id="dan-flickinger"><first>Dan</first><last>Flickinger</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
       <author><first>Angelina</first><last>Ivanova</last></author>
-      <author><first>Zdeňka</first><last>Urešová</last></author>
+      <author id="zdenka-uresova"><first>Zdeňka</first><last>Urešová</last></author>
       <pages>3991–3995</pages>
       <abstract>We announce a new language resource for research on semantic parsing, a large, carefully curated collection of semantic dependency graphs representing multiple linguistic traditions. This resource is called SDP~2016 and provides an update and extension to previous versions used as Semantic Dependency Parsing target representations in the 2014 and 2015 Semantic Evaluation Exercises. For a common core of English text, this third edition comprises semantic dependency graphs from four distinct frameworks, packaged in a unified abstract format and aligned at the sentence and token levels. SDP 2016 is the first general release of this resource and available for licensing from the Linguistic Data Consortium in May 2016. The data is accompanied by an open-source SDP utility toolkit and system results from previous contrastive parsing evaluations against these target representations.</abstract>
       <url hash="756199cd">L16-1630</url>
@@ -6756,7 +6756,7 @@
     <paper id="632">
       <title>Endangered Language Documentation: Bootstrapping a Chatino Speech Corpus, Forced Aligner, <fixed-case>ASR</fixed-case></title>
       <author><first>Malgorzata</first><last>Ćavar</last></author>
-      <author><first>Damir</first><last>Ćavar</last></author>
+      <author id="damir-cavar"><first>Damir</first><last>Ćavar</last></author>
       <author><first>Hilaria</first><last>Cruz</last></author>
       <pages>4004–4011</pages>
       <abstract>This project approaches the problem of language documentation and revitalization from a rather untraditional angle. To improve and facilitate language documentation of endangered languages, we attempt to use corpus linguistic methods and speech and language technologies to reduce the time needed for transcription and annotation of audio and video language recordings. The paper demonstrates this approach on the example of the endangered and seriously under-resourced variety of Eastern Chatino (CTP). We show how initial speech corpora can be created that can facilitate the development of speech and language technologies for under-resourced languages by utilizing Forced Alignment tools to time align transcriptions. Time-aligned transcriptions can be used to train speech corpora and utilize automatic speech recognition tools for the transcription and annotation of untranscribed data. Speech technologies can be used to reduce the time and effort necessary for transcription and annotation of large collections of audio and video recordings in digital language archives, addressing the transcription bottleneck problem that most language archives and many under-documented languages are confronted with. This approach can increase the availability of language resources from low-resourced and endangered languages to speech and language technology research and development.</abstract>
@@ -6786,7 +6786,7 @@
     <paper id="635">
       <title><fixed-case>E</fixed-case>nglish-to-<fixed-case>J</fixed-case>apanese Translation vs. Dictation vs. Post-editing: Comparing Translation Modes in a Multilingual Setting</title>
       <author><first>Michael</first><last>Carl</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <author><first>Masaru</first><last>Yamada</last></author>
       <pages>4024–4031</pages>
       <abstract>Speech-enabled interfaces have the potential to become one of the most efficient and ergonomic environments for human-computer interaction and for text production. However, not much research has been carried out to investigate in detail the processes and strategies involved in the different modes of text production. This paper introduces and evaluates a corpus of more than 55 hours of English-to-Japanese user activity data that were collected within the ENJA15 project, in which translators were observed while writing and speaking translations (translation dictation) and during machine translation post-editing. The transcription of the spoken data, keyboard logging and eye-tracking data were recorded with Translog-II, post-processed and integrated into the CRITT Translation Process Research-DB (TPR-DB), which is publicly available under a creative commons license. The paper presents the ENJA15 data as part of a large multilingual Chinese, Danish, German, Hindi and Spanish translation process data collection of more than 760 translation sessions. It compares the ENJA15 data with the other language pairs and reviews some of its particularities.</abstract>
@@ -6824,8 +6824,8 @@
       <title>Interoperability of Annotation Schemes: Using the Pepper Framework to Display <fixed-case>AWA</fixed-case> Documents in the <fixed-case>ANNIS</fixed-case> Interface</title>
       <author><first>Talvany</first><last>Carlotto</last></author>
       <author><first>Zuhaitz</first><last>Beloki</last></author>
-      <author><first>Xabier</first><last>Artola</last></author>
-      <author><first>Aitor</first><last>Soroa</last></author>
+      <author id="xabier-artola"><first>Xabier</first><last>Artola</last></author>
+      <author id="aitor-soroa"><first>Aitor</first><last>Soroa</last></author>
       <pages>4049–4054</pages>
       <abstract>Natural language processing applications are frequently integrated to solve complex linguistic problems, but the lack of interoperability between these tools tends to be one of the main issues found in that process. That is often caused by the different linguistic formats used across the applications, which leads to attempts to both establish standard formats to represent linguistic information and to create conversion tools to facilitate this integration. Pepper is an example of the latter, as a framework that helps the conversion between different linguistic annotation formats. In this paper, we describe the use of Pepper to convert a corpus linguistically annotated by the annotation scheme AWA into the relANNIS format, with the ultimate goal of interacting with AWA documents through the ANNIS interface. The experiment converted 40 megabytes of AWA documents, allowed their use on the ANNIS interface, and involved making architectural decisions during the mapping from AWA into relANNIS using Pepper. The main issues faced during this process were due to technical issues mainly caused by the integration of the different systems and projects, namely AWA, Pepper and ANNIS.</abstract>
       <url hash="4f16d7d3">L16-1639</url>
@@ -6835,9 +6835,9 @@
       <title><fixed-case>SPLIT</fixed-case>: Smart Preprocessing (Quasi) Language Independent Tool</title>
       <author><first>Mohamed</first><last>Al-Badrashiny</last></author>
       <author><first>Arfath</first><last>Pasha</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <author><first>Wael</first><last>Salloum</last></author>
       <author><first>Ramy</first><last>Eskander</last></author>
       <pages>4055–4060</pages>
@@ -6847,7 +6847,7 @@
     </paper>
     <paper id="641">
       <title><fixed-case>A</fixed-case>rchi<fixed-case>M</fixed-case>ob - A Corpus of Spoken <fixed-case>S</fixed-case>wiss <fixed-case>G</fixed-case>erman</title>
-      <author><first>Tanja</first><last>Samardžić</last></author>
+      <author id="tanja-samardzic"><first>Tanja</first><last>Samardžić</last></author>
       <author><first>Yves</first><last>Scherrer</last></author>
       <author><first>Elvira</first><last>Glaser</last></author>
       <pages>4061–4066</pages>
@@ -6874,7 +6874,7 @@
     </paper>
     <paper id="644">
       <title>Graphical Annotation for Syntax-Semantics Mapping</title>
-      <author><first>Kôiti</first><last>Hasida</last></author>
+      <author id="koiti-hasida"><first>Kôiti</first><last>Hasida</last></author>
       <pages>4080–4084</pages>
       <abstract>A potential work item (PWI) for ISO standard (MAP) about linguistic annotation concerning syntax-semantics mapping is discussed. MAP is a framework for graphical linguistic annotation to specify a mapping (set of combinations) between possible syntactic and semantic structures of the annotated linguistic data. Just like a UML diagram, a MAP diagram is formal, in the sense that it accurately specifies such a mapping. MAP provides a diagrammatic sort of concrete syntax for linguistic annotation far easier to understand than textual concrete syntax such as in XML, so that it could better facilitate collaborations among people involved in research, standardization, and practical use of linguistic data. MAP deals with syntactic structures including dependencies, coordinations, ellipses, transsentential constructions, and so on. Semantic structures treated by MAP are argument structures, scopes, coreferences, anaphora, discourse relations, dialogue acts, and so forth. In order to simplify explicit annotations, MAP allows partial descriptions, and assumes a few general rules on correspondence between syntactic and semantic compositions.</abstract>
       <url hash="82342913">L16-1644</url>
@@ -6913,7 +6913,7 @@
     <paper id="647">
       <title>Modeling Language Change in Historical Corpora: The Case of <fixed-case>P</fixed-case>ortuguese</title>
       <author><first>Marcos</first><last>Zampieri</last></author>
-      <author><first>Shervin</first><last>Malmasi</last></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></author>
       <author><first>Mark</first><last>Dras</last></author>
       <pages>4098–4104</pages>
       <abstract>This paper presents a number of experiments to model changes in a historical Portuguese corpus composed of literary texts for the purpose of temporal text classification. Algorithms were trained to classify texts with respect to their publication date taking into account lexical variation represented as word n-grams, and morphosyntactic variation represented by part-of-speech (POS) distribution. We report results of 99.8% accuracy using word unigram features with a Support Vector Machines classifier to predict the publication date of documents in time intervals of both one century and half a century. A feature analysis is performed to investigate the most informative features for this task and how they are linked to language change.</abstract>
@@ -6922,7 +6922,7 @@
     </paper>
     <paper id="648">
       <title>“He Said She Said” ― a Male/Female Corpus of <fixed-case>P</fixed-case>olish</title>
-      <author><first>Filip</first><last>Graliński</last></author>
+      <author id="filip-gralinski"><first>Filip</first><last>Graliński</last></author>
       <author><first>Łukasz</first><last>Borchmann</last></author>
       <author><first>Piotr</first><last>Wierzchoń</last></author>
       <pages>4105–4110</pages>
@@ -6932,7 +6932,7 @@
     </paper>
     <paper id="649">
       <title><fixed-case>C</fixed-case>ohere: A Toolkit for Local Coherence</title>
-      <author><first>Karin Sim</first><last>Smith</last></author>
+      <author id="karin-sim-smith"><first>Karin Sim</first><last>Smith</last></author>
       <author><first>Wilker</first><last>Aziz</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>4111–4114</pages>
@@ -6964,7 +6964,7 @@
       <title>Evaluating Unsupervised <fixed-case>D</fixed-case>utch Word Embeddings as a Linguistic Resource</title>
       <author><first>Stéphan</first><last>Tulkens</last></author>
       <author><first>Chris</first><last>Emmery</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>4130–4136</pages>
       <abstract>Word embeddings have recently seen a strong increase in interest as a result of strong performance gains on a variety of tasks. However, most of this research also underlined the importance of benchmark datasets, and the difficulty of constructing these for a variety of language-specific tasks. Still, many of the datasets used in these tasks could prove to be fruitful linguistic resources, allowing for unique observations into language use and variability. In this paper we demonstrate the performance of multiple types of embeddings, created with both count and prediction-based architectures on a variety of corpora, in two language-specific tasks: relation evaluation, and dialect identification. For the latter, we compare unsupervised methods with a traditional, hand-crafted dictionary. With this research, we provide the embeddings themselves, the relation evaluation task benchmark for use in further research, and demonstrate how the benchmarked embeddings prove a useful unsupervised linguistic resource, effectively used in a downstream task.</abstract>
       <url hash="53860c85">L16-1652</url>
@@ -6991,7 +6991,7 @@
     <paper id="655">
       <title><fixed-case>EN</fixed-case>-<fixed-case>ES</fixed-case>-<fixed-case>CS</fixed-case>: An <fixed-case>E</fixed-case>nglish-<fixed-case>S</fixed-case>panish Code-Switching <fixed-case>T</fixed-case>witter Corpus for Multilingual Sentiment Analysis</title>
       <author><first>David</first><last>Vilares</last></author>
-      <author><first>Miguel A.</first><last>Alonso</last></author>
+      <author id="miguel-a-alonso"><first>Miguel A.</first><last>Alonso</last></author>
       <author><first>Carlos</first><last>Gómez-Rodríguez</last></author>
       <pages>4149–4153</pages>
       <abstract>Code-switching texts are those that contain terms in two or more different languages, and they appear increasingly often in social media. The aim of this paper is to provide a resource to the research community to evaluate the performance of sentiment classification techniques on this complex multilingual environment, proposing an English-Spanish corpus of tweets with code-switching (EN-ES-CS CORPUS). The tweets are labeled according to two well-known criteria used for this purpose: SentiStrength and a trinary scale (positive, neutral and negative categories). Preliminary work on the resource is already done, providing a set of baselines for the research community.</abstract>
@@ -7001,7 +7001,7 @@
     <paper id="656">
       <title><fixed-case>S</fixed-case>em<fixed-case>R</fixed-case>el<fixed-case>D</fixed-case>ata ― Multilingual Contextual Annotation of Semantic Relations between Nominals: Dataset and Guidelines</title>
       <author><first>Darina</first><last>Benikova</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>4154–4161</pages>
       <abstract>Semantic relations play an important role in linguistic knowledge representation. Although their role is relevant in the context of written text, there is no approach or dataset that makes use of contextuality of classic semantic relations beyond the boundary of one sentence. We present the SemRelData dataset that contains annotations of semantic relations between nominals in the context of one paragraph. To be able to analyse the universality of this context notion, the annotation was performed on a multi-lingual and multi-genre corpus. To evaluate the dataset, it is compared to large, manually created knowledge resources in the respective languages. The comparison shows that knowledge bases not only have coverage gaps; they also do not account for semantic relations that are manifested in particular contexts only, yet still play an important role for text cohesion.</abstract>
       <url hash="0bd27f42">L16-1656</url>
@@ -7011,7 +7011,7 @@
       <title>A Multilingual, Multi-style and Multi-granularity Dataset for Cross-language Textual Similarity Detection</title>
       <author><first>Jérémy</first><last>Ferrero</last></author>
       <author><first>Frédéric</first><last>Agnès</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <author><first>Didier</first><last>Schwab</last></author>
       <pages>4162–4169</pages>
       <abstract>In this paper we describe our effort to create a dataset for the evaluation of cross-language textual similarity detection. We present preexisting corpora and their limits and we explain the various gathered resources to overcome these limits and build our enriched dataset. The proposed dataset is multilingual, includes cross-language alignment for different granularities (from chunk to document), is based on both parallel and comparable corpora and contains human and machine translated texts. Moreover, it includes texts written by multiple types of authors (from average to professionals). With the obtained dataset, we conduct a systematic and rigorous evaluation of several state-of-the-art cross-language textual similarity detection methods. The evaluation results are reviewed and discussed. Finally, dataset and scripts are made publicly available on GitHub: <url>http://github.com/FerreroJeremy/Cross-Language-Dataset</url>.</abstract>
@@ -7070,9 +7070,9 @@
     <paper id="662">
       <title><fixed-case>M</fixed-case>ulti<fixed-case>V</fixed-case>ec: a Multilingual and Multilevel Representation Learning Toolkit for <fixed-case>NLP</fixed-case></title>
       <author><first>Alexandre</first><last>Bérard</last></author>
-      <author><first>Christophe</first><last>Servan</last></author>
+      <author id="christophe-servan"><first>Christophe</first><last>Servan</last></author>
       <author><first>Olivier</first><last>Pietquin</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <pages>4188–4192</pages>
       <abstract>We present MultiVec, a new toolkit for computing continuous representations for text at different granularity levels (word-level or sequences of words). MultiVec includes word2vec’s features, paragraph vector (batch and online) and bivec for bilingual distributed representations. MultiVec also includes different distance measures between words and sequences of words. The toolkit is written in C++ and is aimed at being fast (in the same order of magnitude as word2vec), easy to use, and easy to extend. It has been evaluated on several NLP tasks: the analogical reasoning task, sentiment analysis, and crosslingual document classification.</abstract>
       <url hash="5700afa5">L16-1662</url>
@@ -7092,7 +7092,7 @@
       <title>A Corpus of Native, Non-native and Translated Texts</title>
       <author><first>Sergiu</first><last>Nisioi</last></author>
       <author><first>Ella</first><last>Rabinovich</last></author>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <author><first>Shuly</first><last>Wintner</last></author>
       <pages>4197–4201</pages>
       <abstract>We describe a monolingual English corpus of original and (human) translated texts, with an accurate annotation of speaker properties, including the original language of the utterances and the speaker’s country of origin. We thus obtain three sub-corpora of texts reflecting native English, non-native English, and English translated from a variety of European languages. This dataset will facilitate the investigation of similarities and differences between these kinds of sub-languages. Moreover, it will facilitate a unified comparative study of translations and language produced by (highly fluent) non-native speakers, two closely-related phenomena that have only been studied in isolation so far.</abstract>
@@ -7115,7 +7115,7 @@
     <paper id="666">
       <title><fixed-case>A</fixed-case>xolotl: a Web Accessible Parallel Corpus for <fixed-case>S</fixed-case>panish-<fixed-case>N</fixed-case>ahuatl</title>
       <author><first>Ximena</first><last>Gutierrez-Vasques</last></author>
-      <author><first>Gerardo</first><last>Sierra</last></author>
+      <author id="gerardo-sierra"><first>Gerardo</first><last>Sierra</last></author>
       <author><first>Isaac Hernandez</first><last>Pompa</last></author>
       <pages>4210–4214</pages>
       <abstract>This paper describes the project called Axolotl which comprises a Spanish-Nahuatl parallel corpus and its search interface. Spanish and Nahuatl are distant languages spoken in the same country. Due to the scarcity of digital resources, we describe the several problems that arose when compiling this corpus: most of our sources were non-digital books, we faced errors when digitizing the sources and there were difficulties in the sentence alignment process, just to mention some. The documents of the parallel corpus are not homogeneous, they were extracted from different sources, there is dialectal, diachronical, and orthographical variation. Additionally, we present a web search interface that allows to make queries through the whole parallel corpus, the system is capable to retrieve the parallel fragments that contain a word or phrase searched by a user in any of the languages. To our knowledge, this is the first Spanish-Nahuatl public available digital parallel corpus. We think that this resource can be useful to develop language technologies and linguistic studies for this language pair.</abstract>
@@ -7124,7 +7124,7 @@
     </paper>
     <paper id="667">
       <title>A <fixed-case>T</fixed-case>urkish-<fixed-case>G</fixed-case>erman Code-Switching Corpus</title>
-      <author><first>Özlem</first><last>Çetinoğlu</last></author>
+      <author id="ozlem-cetinoglu"><first>Özlem</first><last>Çetinoğlu</last></author>
       <pages>4215–4220</pages>
       <abstract>Bilingual communities often alternate between languages both in spoken and written communication. One such community, Germany residents of Turkish origin produce Turkish-German code-switching, by heavily mixing two languages at discourse, sentence, or word level. Code-switching in general, and Turkish-German code-switching in particular, has been studied for a long time from a linguistic perspective. Yet resources to study them from a more computational perspective are limited due to either small size or licence issues. In this work we contribute the solution of this problem with a corpus. We present a Turkish-German code-switching corpus which consists of 1029 tweets, with a majority of intra-sentential switches. We share different type of code-switching we have observed in our collection and describe our processing steps. The first step is data collection and filtering. This is followed by manual tokenisation and normalisation. And finally, we annotate data with word-level language identification information. The resulting corpus is available for research purposes.</abstract>
       <url hash="d1342bca">L16-1667</url>
@@ -7143,7 +7143,7 @@
     </paper>
     <paper id="669">
       <title>Creating a Large Multi-Layered Representational Repository of Linguistic Code Switched <fixed-case>A</fixed-case>rabic Data</title>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <author><first>Mahmoud</first><last>Ghoneim</last></author>
       <author><first>Abdelati</first><last>Hawwari</last></author>
       <author><first>Fahad</first><last>AlGhamdi</last></author>
@@ -7195,8 +7195,8 @@
     </paper>
     <paper id="674">
       <title>Multi-language Speech Collection for <fixed-case>NIST</fixed-case> <fixed-case>LRE</fixed-case></title>
-      <author><first>Karen</first><last>Jones</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="karen-sparck-jones"><first>Karen</first><last>Jones</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <author><first>Kevin</first><last>Walker</last></author>
       <author><first>David</first><last>Graff</last></author>
       <author><first>Jonathan</first><last>Wright</last></author>
@@ -7218,7 +7218,7 @@
       <title>New Inflectional Lexicons and Training Corpora for Improved Morphosyntactic Annotation of <fixed-case>C</fixed-case>roatian and <fixed-case>S</fixed-case>erbian</title>
       <author><first>Nikola</first><last>Ljubešić</last></author>
       <author><first>Filip</first><last>Klubička</last></author>
-      <author><first>Željko</first><last>Agić</last></author>
+      <author id="zeljko-agic"><first>Željko</first><last>Agić</last></author>
       <author><first>Ivo-Pavao</first><last>Jazbec</last></author>
       <pages>4264–4270</pages>
       <abstract>In this paper we present newly developed inflectional lexcions and manually annotated corpora of Croatian and Serbian. We introduce hrLex and srLex - two freely available inflectional lexicons of Croatian and Serbian - and describe the process of building these lexicons, supported by supervised machine learning techniques for lemma and paradigm prediction. Furthermore, we introduce hr500k, a manually annotated corpus of Croatian, 500 thousand tokens in size. We showcase the three newly developed resources on the task of morphosyntactic annotation of both languages by using a recently developed CRF tagger. We achieve best results yet reported on the task for both languages, beating the HunPos baseline trained on the same datasets by a wide margin.</abstract>
@@ -7258,7 +7258,7 @@
     <paper id="680">
       <title><fixed-case>UDP</fixed-case>ipe: Trainable Pipeline for Processing <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>-<fixed-case>U</fixed-case> Files Performing Tokenization, Morphological Analysis, <fixed-case>POS</fixed-case> Tagging and Parsing</title>
       <author><first>Milan</first><last>Straka</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
       <author><first>Jana</first><last>Straková</last></author>
       <pages>4290–4297</pages>
       <abstract>Automatic natural language processing of large texts often presents recurring challenges in multiple languages: even for most advanced tasks, the texts are first processed by basic processing steps – from tokenization to parsing. We present an extremely simple-to-use tool consisting of one binary and one model (per language), which performs these tasks for multiple languages without the need for any other external data. UDPipe, a pipeline processing CoNLL-U-formatted files, performs tokenization, morphological analysis, part-of-speech tagging, lemmatization and dependency parsing for nearly all treebanks of Universal Dependencies 1.2 (namely, the whole pipeline is currently available for 32 out of 37 treebanks). In addition, the pipeline is easily trainable with training data in CoNLL-U format (and in some cases also with additional raw corpora) and requires minimal linguistic knowledge on the users’ part. The training code is also released.</abstract>
@@ -7315,7 +7315,7 @@
       <title><fixed-case>S</fixed-case>lang<fixed-case>N</fixed-case>et: A <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et like resource for <fixed-case>E</fixed-case>nglish Slang</title>
       <author><first>Shehzaad</first><last>Dhuliawala</last></author>
       <author><first>Diptesh</first><last>Kanojia</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>4329–4332</pages>
       <abstract>We present a WordNet like structured resource for slang words and neologisms on the internet. The dynamism of language is often an indication that current language technology tools trained on today’s data, may not be able to process the language in the future. Our resource could be (1) used to augment the WordNet, (2) used in several Natural Language Processing (NLP) applications which make use of noisy data on the internet like Information Retrieval and Web Mining. Such a resource can also be used to distinguish slang word senses from conventional word senses. To stimulate similar innovations widely in the NLP community, we test the efficacy of our resource for detecting slang using standard bag of words Word Sense Disambiguation (WSD) algorithms (Lesk and Extended Lesk) for English data on the internet.</abstract>
       <url hash="9029d6d5">L16-1686</url>
@@ -7323,7 +7323,7 @@
     </paper>
     <paper id="687">
       <title>Discovering Fuzzy Synsets from the Redundancy in Different Lexical-Semantic Resources</title>
-      <author><first>Hugo Gonçalo</first><last>Oliveira</last></author>
+      <author id="hugo-goncalo-oliveira"><first>Hugo Gonçalo</first><last>Oliveira</last></author>
       <author><first>Fábio</first><last>Santos</last></author>
       <pages>4333–4340</pages>
       <abstract>Although represented as such in wordnets, word senses are not discrete. To handle word senses as fuzzy objects, we exploit the graph structure of synonymy pairs acquired from different sources to discover synsets where words have different membership degrees that reflect confidence. Following this approach, a wide-coverage fuzzy thesaurus was discovered from a synonymy network compiled from seven Portuguese lexical-semantic resources. Based on a crowdsourcing evaluation, we can say that the quality of the obtained synsets is far from perfect but, as expected in a confidence measure, it increases significantly for higher cut-points on the membership and, at a certain point, reaches 100% correction rate.</abstract>
@@ -7349,7 +7349,7 @@
     </paper>
     <paper id="690">
       <title>Ambiguity Diagnosis for Terms in Digital Humanities</title>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <author><first>Evelyne</first><last>Jacquey</last></author>
       <author><first>Gaël</first><last>Lejeune</last></author>
       <author><first>Luis Felipe</first><last>Melo</last></author>
@@ -7361,7 +7361,7 @@
     </paper>
     <paper id="691">
       <title>Metrical Annotation of a Large Corpus of <fixed-case>S</fixed-case>panish Sonnets: Representation, Scansion and Evaluation</title>
-      <author><first>Borja</first><last>Navarro</last></author>
+      <author id="borja-navarro"><first>Borja</first><last>Navarro</last></author>
       <author><first>María</first><last>Ribes Lafoz</last></author>
       <author><first>Noelia</first><last>Sánchez</last></author>
       <pages>4360–4364</pages>
@@ -7393,10 +7393,10 @@
     </paper>
     <paper id="694">
       <title>Studying the Temporal Dynamics of Word Co-occurrences: An Application to Event Detection</title>
-      <author><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
+      <author id="daniel-preotiuc-pietro"><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
       <author><first>P. K.</first><last>Srijith</last></author>
       <author><first>Mark</first><last>Hepple</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>4380–4387</pages>
       <abstract>Streaming media provides a number of unique challenges for computational linguistics. This paper studies the temporal variation in word co-occurrence statistics, with application to event detection. We develop a spectral clustering approach to find groups of mutually informative terms occurring in discrete time frames. Experiments on large datasets of tweets show that these groups identify key real world events as they occur in time, despite no explicit supervision. The performance of our method rivals state-of-the-art methods for event detection on F-score, obtaining higher recall at the expense of precision.</abstract>
       <url hash="2e5d137a">L16-1694</url>
@@ -7404,7 +7404,7 @@
     </paper>
     <paper id="695">
       <title><fixed-case>M</fixed-case>arkov <fixed-case>L</fixed-case>ogic <fixed-case>N</fixed-case>etworks for Text Mining: A Qualitative and Empirical Comparison with Integer Linear Programming</title>
-      <author><first>Luis Gerardo</first><last>Mojica de la Vega</last></author>
+      <author id="luis-gerardo-mojica-de-la-vega"><first>Luis Gerardo</first><last>Mojica de la Vega</last></author>
       <author><first>Vincent</first><last>Ng</last></author>
       <pages>4388–4395</pages>
       <abstract>Joint inference approaches such as Integer Linear Programming (ILP) and Markov Logic Networks (MLNs) have recently been successfully applied to many natural language processing (NLP) tasks, often outperforming their pipeline counterparts. However, MLNs are arguably much less popular among NLP researchers than ILP. While NLP researchers who desire to employ these joint inference frameworks do not necessarily have to understand their theoretical underpinnings, it is imperative that they understand which of them should be applied under what circumstances. With the goal of helping NLP researchers better understand the relative strengths and weaknesses of MLNs and ILP; we will compare them along different dimensions of interest, such as expressiveness, ease of use, scalability, and performance. To our knowledge, this is the first systematic comparison of ILP and MLNs on an NLP task.</abstract>
@@ -7416,9 +7416,9 @@
       <author><first>Ayman Al</first><last>Zaatari</last></author>
       <author><first>Rim El</first><last>Ballouli</last></author>
       <author><first>Shady</first><last>ELbassouni</last></author>
-      <author><first>Wassim</first><last>El-Hajj</last></author>
+      <author id="wassim-el-hajj"><first>Wassim</first><last>El-Hajj</last></author>
       <author><first>Hazem</first><last>Hajj</last></author>
-      <author><first>Khaled</first><last>Shaban</last></author>
+      <author id="khaled-shaban"><first>Khaled</first><last>Shaban</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
       <author><first>Emad</first><last>Yahya</last></author>
       <pages>4396–4401</pages>
@@ -7439,7 +7439,7 @@
     </paper>
     <paper id="698">
       <title><fixed-case>QUEMDISSE</fixed-case>? Reported speech in <fixed-case>P</fixed-case>ortuguese</title>
-      <author><first>Cláudia</first><last>Freitas</last></author>
+      <author id="claudia-freitas"><first>Cláudia</first><last>Freitas</last></author>
       <author><first>Bianca</first><last>Freitas</last></author>
       <author><first>Diana</first><last>Santos</last></author>
       <pages>4410–4416</pages>
@@ -7450,8 +7450,8 @@
     <paper id="699">
       <title><fixed-case>MEANTIME</fixed-case>, the <fixed-case>N</fixed-case>ews<fixed-case>R</fixed-case>eader Multilingual Event and Time Corpus</title>
       <author><first>Anne-Lyse</first><last>Minard</last></author>
-      <author><first>Manuela</first><last>Speranza</last></author>
-      <author><first>Ruben</first><last>Urizar</last></author>
+      <author id="manuela-speranza"><first>Manuela</first><last>Speranza</last></author>
+      <author id="ruben-urizar"><first>Ruben</first><last>Urizar</last></author>
       <author><first>Begoña</first><last>Altuna</last></author>
       <author><first>Marieke</first><last>van Erp</last></author>
       <author><first>Anneleen</first><last>Schoen</last></author>
@@ -7463,7 +7463,7 @@
     </paper>
     <paper id="700">
       <title>The <fixed-case>ACQDIV</fixed-case> Database: <fixed-case>M</fixed-case>in(d)ing the Ambient Language</title>
-      <author><first>Steven</first><last>Moran</last></author>
+      <author id="steven-moran"><first>Steven</first><last>Moran</last></author>
       <pages>4423–4429</pages>
       <abstract>One of the most pressing questions in cognitive science remains unanswered: what cognitive mechanisms enable children to learn any of the world’s 7000 or so languages? Much discovery has been made with regard to specific learning mechanisms in specific languages, however, given the remarkable diversity of language structures (Evans and Levinson 2009, Bickel 2014) the burning question remains: what are the underlying processes that make language acquisition possible, despite substantial cross-linguistic variation in phonology, morphology, syntax, etc.? To investigate these questions, a comprehensive cross-linguistic database of longitudinal child language acquisition corpora from maximally diverse languages has been built.</abstract>
       <url hash="63abe2b5">L16-1700</url>
@@ -7473,9 +7473,9 @@
       <title>Summarizing Behaviours: An Experiment on the Annotation of Call-Centre Conversations</title>
       <author><first>Morena</first><last>Danieli</last></author>
       <author><first>Balamurali A</first><last>R</last></author>
-      <author><first>Evgeny</first><last>Stepanov</last></author>
-      <author><first>Benoit</first><last>Favre</last></author>
-      <author><first>Frederic</first><last>Bechet</last></author>
+      <author id="evgeny-stepanov"><first>Evgeny</first><last>Stepanov</last></author>
+      <author id="benoit-favre"><first>Benoit</first><last>Favre</last></author>
+      <author id="frederic-bechet"><first>Frederic</first><last>Bechet</last></author>
       <author><first>Giuseppe</first><last>Riccardi</last></author>
       <pages>4430–4433</pages>
       <abstract>Annotating and predicting behavioural aspects in conversations is becoming critical in the conversational analytics industry. In this paper we look into inter-annotator agreement of agent behaviour dimensions on two call center corpora. We find that the task can be annotated consistently over time, but that subjectivity issues impacts the quality of the annotation. The reformulation of some of the annotated dimensions is suggested in order to improve agreement.</abstract>
@@ -7484,7 +7484,7 @@
     </paper>
     <paper id="702">
       <title>Survey of Conversational Behavior: Towards the Design of a Balanced Corpus of Everyday <fixed-case>J</fixed-case>apanese Conversation</title>
-      <author><first>Hanae</first><last>Koiso</last></author>
+      <author id="hanae-koiso"><first>Hanae</first><last>Koiso</last></author>
       <author><first>Tomoyuki</first><last>Tsuchiya</last></author>
       <author><first>Ryoko</first><last>Watanabe</last></author>
       <author><first>Daisuke</first><last>Yokomori</last></author>
@@ -7508,8 +7508,8 @@
       <title><fixed-case>I</fixed-case>nternet Argument Corpus 2.0: An <fixed-case>SQL</fixed-case> schema for Dialogic Social Media and the Corpora to go with it</title>
       <author><first>Rob</first><last>Abbott</last></author>
       <author><first>Brian</first><last>Ecker</last></author>
-      <author><first>Pranav</first><last>Anand</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="pranav-anand"><first>Pranav</first><last>Anand</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <pages>4445–4452</pages>
       <abstract>Large scale corpora have benefited many areas of research in natural language processing, but until recently, resources for dialogue have lagged behind. Now, with the emergence of large scale social media websites incorporating a threaded dialogue structure, content feedback, and self-annotation (such as stance labeling), there are valuable new corpora available to researchers. In previous work, we released the INTERNET ARGUMENT CORPUS, one of the first larger scale resources available for opinion sharing dialogue. We now release the INTERNET ARGUMENT CORPUS 2.0 (IAC 2.0) in the hope that others will find it as useful as we have. The IAC 2.0 provides more data than IAC 1.0 and organizes it using an extensible, repurposable SQL schema. The database structure in conjunction with the associated code facilitates querying from and combining multiple dialogically structured data sources. The IAC 2.0 schema provides support for forum posts, quotations, markup (bold, italic, etc), and various annotations, including Stanford CoreNLP annotations. We demonstrate the generalizablity of the schema by providing code to import the ConVote corpus.</abstract>
       <url hash="1016aec4">L16-1704</url>
@@ -7539,7 +7539,7 @@
       <author><first>Christian</first><last>Fäth</last></author>
       <author><first>Heike</first><last>Renner-Westermann</last></author>
       <author><first>Frank</first><last>Abromeit</last></author>
-      <author><first>Vanya</first><last>Dimitrova</last></author>
+      <author id="vania-dimitrova"><first>Vanya</first><last>Dimitrova</last></author>
       <pages>4463–4471</pages>
       <abstract>This paper introduces a novel research tool for the field of linguistics: The Lin|gu|is|tik web portal provides a virtual library which offers scientific information on every linguistic subject. It comprises selected internet sources and databases as well as catalogues for linguistic literature, and addresses an interdisciplinary audience. The virtual library is the most recent outcome of the Special Subject Collection Linguistics of the German Research Foundation (DFG), and also integrates the knowledge accumulated in the Bibliography of Linguistic Literature. In addition to the portal, we describe long-term goals and prospects with a special focus on ongoing efforts regarding an extension towards integrating language resources and Linguistic Linked Open Data.</abstract>
       <url hash="a67d28ee">L16-1707</url>
@@ -7560,7 +7560,7 @@
       <title>Designing A Long Lasting Linguistic Project: The Case Study of <fixed-case>ASI</fixed-case>t</title>
       <author><first>Maristella</first><last>Agosti</last></author>
       <author><first>Emanuele</first><last>Di Buccio</last></author>
-      <author><first>Giorgio Maria</first><last>Di Nunzio</last></author>
+      <author id="giorgio-maria-di-nunzio"><first>Giorgio Maria</first><last>Di Nunzio</last></author>
       <author><first>Cecilia</first><last>Poletto</last></author>
       <author><first>Esther</first><last>Rinke</last></author>
       <pages>4479–4483</pages>
@@ -7570,7 +7570,7 @@
     </paper>
     <paper id="710">
       <title>Global Open Resources and Information for Language and Linguistic Analysis (<fixed-case>GORILLA</fixed-case>)</title>
-      <author><first>Damir</first><last>Cavar</last></author>
+      <author id="damir-cavar"><first>Damir</first><last>Cavar</last></author>
       <author><first>Malgorzata</first><last>Cavar</last></author>
       <author><first>Lwin</first><last>Moe</last></author>
       <pages>4484–4491</pages>
@@ -7611,8 +7611,8 @@
       <title>Two Architectures for Parallel Processing of Huge Amounts of Text</title>
       <author><first>Mathijs</first><last>Kattenberg</last></author>
       <author><first>Zuhaitz</first><last>Beloki</last></author>
-      <author><first>Aitor</first><last>Soroa</last></author>
-      <author><first>Xabier</first><last>Artola</last></author>
+      <author id="aitor-soroa"><first>Aitor</first><last>Soroa</last></author>
+      <author id="xabier-artola"><first>Xabier</first><last>Artola</last></author>
       <author><first>Antske</first><last>Fokkens</last></author>
       <author><first>Paul</first><last>Huygen</last></author>
       <author><first>Kees</first><last>Verstoep</last></author>
@@ -7633,7 +7633,7 @@
       <title>New Developments in the <fixed-case>LRE</fixed-case> Map</title>
       <author><first>Vladimir</first><last>Popescu</last></author>
       <author><first>Lin</first><last>Liu</last></author>
-      <author><first>Riccardo</first><last>Del Gratta</last></author>
+      <author id="riccardo-del-gratta"><first>Riccardo</first><last>Del Gratta</last></author>
       <author><first>Khalid</first><last>Choukri</last></author>
       <author><first>Nicoletta</first><last>Calzolari</last></author>
       <pages>4526–4530</pages>
@@ -7652,7 +7652,7 @@
     </paper>
     <paper id="718">
       <title>The <fixed-case>ELRA</fixed-case> License Wizard</title>
-      <author><first>Valérie</first><last>Mapelli</last></author>
+      <author id="valerie-mapelli"><first>Valérie</first><last>Mapelli</last></author>
       <author><first>Vladimir</first><last>Popescu</last></author>
       <author><first>Lin</first><last>Liu</last></author>
       <author><first>Meritxell Fernández</first><last>Barrera</last></author>
@@ -7665,7 +7665,7 @@
     <paper id="719">
       <title>Review on the Existing Language Resources for Languages of <fixed-case>F</fixed-case>rance</title>
       <author><first>Thibault</first><last>Grouas</last></author>
-      <author><first>Valérie</first><last>Mapelli</last></author>
+      <author id="valerie-mapelli"><first>Valérie</first><last>Mapelli</last></author>
       <author><first>Quentin</first><last>Samier</last></author>
       <pages>4539–4542</pages>
       <abstract>With the support of the DGLFLF, ELDA conducted an inventory of existing language resources for the regional languages of France. The main aim of this inventory was to assess the exploitability of the identified resources within technologies. A total of 2,299 Language Resources were identified. As a second step, a deeper analysis of a set of three language groups (Breton, Occitan, overseas languages) was carried out along with a focus of their exploitability within three technologies: automatic translation, voice recognition/synthesis and spell checkers. The survey was followed by the organisation of the TLRF2015 Conference which aimed to present the state of the art in the field of the Technologies for Regional Languages of France. The next step will be to activate the network of specialists built up during the TLRF conference and to begin the organisation of a second TLRF conference. Meanwhile, the French Ministry of Culture continues its actions related to linguistic diversity and technology, in particular through a project with Wikimedia France related to contributions to Wikipedia in regional languages, the upcoming new version of the “Corpus de la Parole” and the reinforcement of the DGLFLF’s Observatory of Linguistic Practices.</abstract>
@@ -7674,9 +7674,9 @@
     </paper>
     <paper id="720">
       <title>Selection Criteria for Low Resource Language Programs</title>
-      <author><first>Christopher</first><last>Cieri</last></author>
+      <author id="christopher-cieri"><first>Christopher</first><last>Cieri</last></author>
       <author><first>Mike</first><last>Maxwell</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <author><first>Jennifer</first><last>Tracey</last></author>
       <pages>4543–4549</pages>
       <abstract>This paper documents and describes the criteria used to select languages for study within programs that include low resource languages whether given that label or another similar one. It focuses on five US common task, Human Language Technology research and development programs in which the authors have provided information or consulting related to the choice of language. The paper does not describe the actual selection process which is the responsibility of program management and highly specific to a program’s individual goals and context. Instead it concentrates on the data and criteria that have been considered relevant previously with the thought that future program managers and their consultants may adapt these and apply them with different prioritization to future programs.</abstract>
@@ -7700,7 +7700,7 @@
       <author><first>Enrico</first><last>Santus</last></author>
       <author><first>Alessandro</first><last>Lenci</last></author>
       <author><first>Tin-Shing</first><last>Chiu</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <pages>4557–4564</pages>
       <abstract>ROOT9 is a supervised system for the classification of hypernyms, co-hyponyms and random words that is derived from the already introduced ROOT13 (Santus et al., 2016). It relies on a Random Forest algorithm and nine unsupervised corpus-based features. We evaluate it with a 10-fold cross validation on 9,600 pairs, equally distributed among the three classes and involving several Parts-Of-Speech (i.e. adjectives, nouns and verbs). When all the classes are present, ROOT9 achieves an F1 score of 90.7%, against a baseline of 57.2% (vector cosine). When the classification is binary, ROOT9 achieves the following results against the baseline. hypernyms-co-hyponyms 95.7% vs. 69.8%, hypernyms-random 91.8% vs. 64.1% and co-hyponyms-random 97.8% vs. 79.4%. In order to compare the performance with the state-of-the-art, we have also evaluated ROOT9 in subsets of the Weeds et al. (2014) datasets, proving that it is in fact competitive. Finally, we investigated whether the system learns the semantic relation or it simply learns the prototypical hypernyms, as claimed by Levy et al. (2015). The second possibility seems to be the most likely, even though ROOT9 can be trained on negative examples (i.e., switched hypernyms) to drastically reduce this bias.</abstract>
@@ -7712,7 +7712,7 @@
       <author><first>Enrico</first><last>Santus</last></author>
       <author><first>Alessandro</first><last>Lenci</last></author>
       <author><first>Tin-Shing</first><last>Chiu</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <pages>4565–4572</pages>
       <abstract>In this paper, we claim that Vector Cosine ― which is generally considered one of the most efficient unsupervised measures for identifying word similarity in Vector Space Models ― can be outperformed by a completely unsupervised measure that evaluates the extent of the intersection among the most associated contexts of two target words, weighting such intersection according to the rank of the shared contexts in the dependency ranked lists. This claim comes from the hypothesis that similar words do not simply occur in similar contexts, but they share a larger portion of their most relevant contexts compared to other related words. To prove it, we describe and evaluate APSyn, a variant of Average Precision that ― independently of the adopted parameters ― outperforms the Vector Cosine and the co-occurrence on the ESL and TOEFL test sets. In the best setting, APSyn reaches 0.73 accuracy on the ESL dataset and 0.70 accuracy in the TOEFL dataset, beating therefore the non-English US college applicants (whose average, as reported in the literature, is 64.50%) and several state-of-the-art approaches.</abstract>
@@ -7722,7 +7722,7 @@
     <paper id="724">
       <title>Assessing the Potential of Metaphoricity of verbs using corpus data</title>
       <author><first>Marco</first><last>Del Tredici</last></author>
-      <author><first>Núria</first><last>Bel</last></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last></author>
       <pages>4573–4577</pages>
       <abstract>The paper investigates the relation between metaphoricity and distributional characteristics of verbs, introducing POM, a corpus-derived index that can be used to define the upper bound of metaphoricity of any expression in which a given verb occurs. The work moves from the observation that while some verbs can be used to create highly metaphoric expressions, others can not. We conjecture that this fact is related to the number of contexts in which a verb occurs and to the frequency of each context. This intuition is modelled by introducing a method in which each context of a verb in a corpus is assigned a vector representation, and a clustering algorithm is employed to identify similar contexts. Eventually, the Standard Deviation of the relative frequency values of the clusters is computed and taken as the POM of the target verb. We tested POM in two experimental settings obtaining values of accuracy of 84% and 92%. Since we are convinced, along with (Shutoff, 2015), that metaphor detection systems should be concerned only with the identification of highly metaphoric expressions, we believe that POM could be profitably employed by these systems to a priori exclude expressions that, due to the verb they include, can only have low degrees of metaphoricity</abstract>
       <url hash="150349a1">L16-1724</url>
@@ -7751,7 +7751,7 @@
     <paper id="727">
       <title>Towards Building Semantic Role Labeler for <fixed-case>I</fixed-case>ndian Languages</title>
       <author><first>Maaz</first><last>Anwar</last></author>
-      <author><first>Dipti</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti</first><last>Sharma</last></author>
       <pages>4588–4595</pages>
       <abstract>We present a statistical system for identifying the semantic relationships or semantic roles for two major Indian Languages, Hindi and Urdu. Given an input sentence and a predicate/verb, the system first identifies the arguments pertaining to that verb and then classifies it into one of the semantic labels which can either be a DOER, THEME, LOCATIVE, CAUSE, PURPOSE etc. The system is based on 2 statistical classifiers trained on roughly 130,000 words for Urdu and 100,000 words for Hindi that were hand-annotated with semantic roles under the PropBank project for these two languages. Our system achieves an accuracy of 86% in identifying the arguments of a verb for Hindi and 75% for Urdu. At the subsequent task of classifying the constituents into their semantic roles, the Hindi system achieved 58% precision and 42% recall whereas Urdu system performed better and achieved 83% precision and 80% recall. Our study also allowed us to compare the usefulness of different linguistic features and feature combinations in the semantic role labeling task. We also examine the use of statistical syntactic parsing as feature in the role labeling task.</abstract>
       <url hash="582d8cee">L16-1727</url>
@@ -7759,7 +7759,7 @@
     </paper>
     <paper id="728">
       <title>A Framework for Automatic Acquisition of <fixed-case>C</fixed-case>roatian and <fixed-case>S</fixed-case>erbian Verb Aspect from Corpora</title>
-      <author><first>Tanja</first><last>Samardžić</last></author>
+      <author id="tanja-samardzic"><first>Tanja</first><last>Samardžić</last></author>
       <author><first>Maja</first><last>Miličević</last></author>
       <pages>4596–4601</pages>
       <abstract>Verb aspect is a grammatical and lexical category that encodes temporal unfolding and duration of events described by verbs. It is a potentially interesting source of information for various computational tasks, but has so far not been studied in much depth from the perspective of automatic processing. Slavic languages are particularly interesting in this respect, as they encode aspect through complex and not entirely consistent lexical derivations involving prefixation and suffixation. Focusing on Croatian and Serbian, in this paper we propose a novel framework for automatic classification of their verb types into a number of fine-grained aspectual classes based on the observable morphology of verb forms. In addition, we provide a set of around 2000 verbs classified based on our framework. This set can be used for linguistic research as well as for testing automatic classification on a larger scale. With minor adjustments the approach is also applicable to other Slavic languages</abstract>
@@ -7770,7 +7770,7 @@
       <title>Monolingual Social Media Datasets for Detecting Contradiction and Entailment</title>
       <author><first>Piroska</first><last>Lendvai</last></author>
       <author><first>Isabelle</first><last>Augenstein</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
       <author><first>Thierry</first><last>Declerck</last></author>
       <pages>4602–4605</pages>
       <abstract>Entailment recognition approaches are useful for application domains such as information extraction, question answering or summarisation, for which evidence from multiple sentences needs to be combined. We report on a new 3-way judgement Recognizing Textual Entailment (RTE) resource that originates in the Social Media domain, and explain our semi-automatic creation method for the special purpose of information verification, which draws on manually established rumourous claims reported during crisis events. From about 500 English tweets related to 70 unique claims we compile and evaluate 5.4k RTE pairs, while continue automatizing the workflow to generate similar-sized datasets in other languages.</abstract>
@@ -7779,7 +7779,7 @@
     </paper>
     <paper id="730">
       <title><fixed-case>V</fixed-case>ox<fixed-case>ML</fixed-case>: A Visualization Modeling Language</title>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <author><first>Nikhil</first><last>Krishnaswamy</last></author>
       <pages>4606–4613</pages>
       <abstract>We present the specification for a modeling language, VoxML, which encodes semantic knowledge of real-world objects represented as three-dimensional models, and of events and attributes related to and enacted over these objects. VoxML is intended to overcome the limitations of existing 3D visual markup languages by allowing for the encoding of a broad range of semantic knowledge that can be exploited by a variety of systems and platforms, leading to multimodal simulations of real-world scenarios using conceptual objects that represent their semantic values</abstract>
@@ -7797,7 +7797,7 @@
     <paper id="732">
       <title>Embedding Open-domain Common-sense Knowledge from Text</title>
       <author><first>Travis</first><last>Goodwin</last></author>
-      <author><first>Sanda</first><last>Harabagiu</last></author>
+      <author id="sanda-harabagiu"><first>Sanda</first><last>Harabagiu</last></author>
       <pages>4621–4628</pages>
       <abstract>Our ability to understand language often relies on common-sense knowledge ― background information the speaker can assume is known by the reader. Similarly, our comprehension of the language used in complex domains relies on access to domain-specific knowledge. Capturing common-sense and domain-specific knowledge can be achieved by taking advantage of recent advances in open information extraction (IE) techniques and, more importantly, of knowledge embeddings, which are multi-dimensional representations of concepts and relations. Building a knowledge graph for representing common-sense knowledge in which concepts discerned from noun phrases are cast as vertices and lexicalized relations are cast as edges leads to learning the embeddings of common-sense knowledge accounting for semantic compositionality as well as implied knowledge. Common-sense knowledge is acquired from a vast collection of blogs and books as well as from WordNet. Similarly, medical knowledge is learned from two large sets of electronic health records. The evaluation results of these two forms of knowledge are promising: the same knowledge acquisition methodology based on learning knowledge embeddings works well both for common-sense knowledge and for medical knowledge Interestingly, the common-sense knowledge that we have acquired was evaluated as being less neutral than than the medical knowledge, as it often reflected the opinion of the knowledge utterer. In addition, the acquired medical knowledge was evaluated as more plausible than the common-sense knowledge, reflecting the complexity of acquiring common-sense knowledge due to the pragmatics and economicity of language.</abstract>
       <url hash="7bd36a0d">L16-1732</url>
@@ -7805,7 +7805,7 @@
     </paper>
     <paper id="733">
       <title>Medical Concept Embeddings via Labeled Background Corpora</title>
-      <author><first>Eneldo Loza</first><last>Mencía</last></author>
+      <author id="eneldo-loza-mencia"><first>Eneldo Loza</first><last>Mencía</last></author>
       <author><first>Gerard</first><last>de Melo</last></author>
       <author><first>Jinseok</first><last>Nam</last></author>
       <pages>4629–4636</pages>
@@ -7851,7 +7851,7 @@
       <author><first>Shinsuke</first><last>Mori</last></author>
       <author><first>Fumihiko</first><last>Takahashi</last></author>
       <author><first>Katsutoshi</first><last>Itoyama</last></author>
-      <author><first>Hiroshi G.</first><last>Okuno</last></author>
+      <author id="hiroshi-g-okuno"><first>Hiroshi G.</first><last>Okuno</last></author>
       <pages>4652–4657</pages>
       <abstract>Binary file summaries/549.html matches</abstract>
       <url hash="9e12e87a">L16-1737</url>
@@ -7864,7 +7864,7 @@
       <author><first>Corinne</first><last>Fredouille</last></author>
       <author><first>Brigitte</first><last>Bigi</last></author>
       <author><first>Lise</first><last>Crevier-Buchman</last></author>
-      <author><first>Elisabeth</first><last>Delais-Roussarie</last></author>
+      <author id="elisabeth-delais-roussarie"><first>Elisabeth</first><last>Delais-Roussarie</last></author>
       <author><first>Laurianne</first><last>Georgeton</last></author>
       <author><first>Alain</first><last>Ghio</last></author>
       <author><first>Imed</first><last>Laaridh</last></author>
@@ -7886,7 +7886,7 @@
       <author><first>Hans</first><last>Van de Velde</last></author>
       <author><first>Frederik</first><last>Kampstra</last></author>
       <author><first>Jouke</first><last>Algra</last></author>
-      <author><first>Henk</first><last>van den Heuvel</last></author>
+      <author id="henk-van-den-heuvel"><first>Henk</first><last>van den Heuvel</last></author>
       <author><first>David</first><last>van Leeuwen</last></author>
       <pages>4666–4669</pages>
       <abstract>We present a new speech database containing 18.5 hours of annotated radio broadcasts in the Frisian language. Frisian is mostly spoken in the province Fryslan and it is the second official language of the Netherlands. The recordings are collected from the archives of Omrop Fryslan, the regional public broadcaster of the province Fryslan. The database covers almost a 50-year time span. The native speakers of Frisian are mostly bilingual and often code-switch in daily conversations due to the extensive influence of the Dutch language. Considering the longitudinal and code-switching nature of the data, an appropriate annotation protocol has been designed and the data is manually annotated with the orthographic transcription, speaker identities, dialect information, code-switching details and background noise/music information.</abstract>
@@ -7896,7 +7896,7 @@
     <paper id="740">
       <title>The <fixed-case>SI</fixed-case> <fixed-case>TED</fixed-case>x-<fixed-case>UM</fixed-case> speech database: a new <fixed-case>S</fixed-case>lovenian Spoken Language Resource</title>
       <author><first>Andrej</first><last>Žgank</last></author>
-      <author><first>Mirjam Sepesy</first><last>Maučec</last></author>
+      <author id="mirjam-sepesy-maucec"><first>Mirjam Sepesy</first><last>Maučec</last></author>
       <author><first>Darinka</first><last>Verdonik</last></author>
       <pages>4670–4673</pages>
       <abstract>This paper presents a new Slovenian spoken language resource built from TEDx Talks. The speech database contains 242 talks in total duration of 54 hours. The annotation and transcription of acquired spoken material was generated automatically, applying acoustic segmentation and automatic speech recognition. The development and evaluation subset was also manually transcribed using the guidelines specified for the Slovenian GOS corpus. The manual transcriptions were used to evaluate the quality of unsupervised transcriptions. The average word error rate for the SI TEDx-UM evaluation subset was 50.7%, with out of vocabulary rate of 24% and language model perplexity of 390. The unsupervised transcriptions contain 372k tokens, where 32k of them were different.</abstract>
@@ -7929,7 +7929,7 @@
       <author><first>Ján</first><last>Staš</last></author>
       <author><first>Tomáš</first><last>Koctúr</last></author>
       <author><first>Martin</first><last>Lojka</last></author>
-      <author><first>Jozef</first><last>Juhár</last></author>
+      <author id="jozef-juhar"><first>Jozef</first><last>Juhár</last></author>
       <pages>4684–4687</pages>
       <abstract>In this paper, we introduce an extension of our previously released TUKE-BNews-SK corpus based on a semi-automatic annotation scheme. It firstly relies on the automatic transcription of the BN data performed by our Slovak large vocabulary continuous speech recognition system. The generated hypotheses are then manually corrected and completed by trained human annotators. The corpus is composed of 25 hours of fully-annotated spontaneous and prepared speech. In addition, we have acquired 900 hours of another BN data, part of which we plan to annotate semi-automatically. We present a preliminary corpus evaluation that gives very promising results.</abstract>
       <url hash="77dc02ce">L16-1743</url>
@@ -7938,7 +7938,7 @@
     <paper id="744">
       <title>Generating a <fixed-case>Y</fixed-case>iddish Speech Corpus, Forced Aligner and Basic <fixed-case>ASR</fixed-case> System for the <fixed-case>AHEYM</fixed-case> Project</title>
       <author><first>Malgorzata</first><last>Ćavar</last></author>
-      <author><first>Damir</first><last>Ćavar</last></author>
+      <author id="damir-cavar"><first>Damir</first><last>Ćavar</last></author>
       <author><first>Dov-Ber</first><last>Kerler</last></author>
       <author><first>Anya</first><last>Quilitzsch</last></author>
       <pages>4688–4693</pages>
diff --git a/data/xml/L18.xml b/data/xml/L18.xml
index 2406e96bc4..4752300436 100644
--- a/data/xml/L18.xml
+++ b/data/xml/L18.xml
@@ -4,19 +4,19 @@
     <meta>
       <booktitle>Proceedings of the Eleventh International Conference on Language Resources and Evaluation (<fixed-case>LREC</fixed-case> 2018)</booktitle>
       <url hash="13ce3f15">L18-1</url>
-      <editor><first>Nicoletta</first><last>Calzolari</last></editor>
-      <editor><first>Khalid</first><last>Choukri</last></editor>
-      <editor><first>Christopher</first><last>Cieri</last></editor>
+      <editor id="nicoletta-calzolari"><first>Nicoletta</first><last>Calzolari</last></editor>
+      <editor id="khalid-choukri"><first>Khalid</first><last>Choukri</last></editor>
+      <editor id="christopher-cieri"><first>Christopher</first><last>Cieri</last></editor>
       <editor><first>Thierry</first><last>Declerck</last></editor>
       <editor><first>Sara</first><last>Goggi</last></editor>
-      <editor><first>Koiti</first><last>Hasida</last></editor>
+      <editor id="koiti-hasida"><first>Koiti</first><last>Hasida</last></editor>
       <editor><first>Hitoshi</first><last>Isahara</last></editor>
-      <editor><first>Bente</first><last>Maegaard</last></editor>
-      <editor><first>Joseph</first><last>Mariani</last></editor>
+      <editor id="bente-maegaard"><first>Bente</first><last>Maegaard</last></editor>
+      <editor id="joseph-mariani"><first>Joseph</first><last>Mariani</last></editor>
       <editor><first>Hélène</first><last>Mazo</last></editor>
-      <editor><first>Asuncion</first><last>Moreno</last></editor>
-      <editor><first>Jan</first><last>Odijk</last></editor>
-      <editor><first>Stelios</first><last>Piperidis</last></editor>
+      <editor id="asuncion-moreno"><first>Asuncion</first><last>Moreno</last></editor>
+      <editor id="jan-odijk"><first>Jan</first><last>Odijk</last></editor>
+      <editor id="stelios-piperidis"><first>Stelios</first><last>Piperidis</last></editor>
       <editor><first>Takenobu</first><last>Tokunaga</last></editor>
       <publisher>European Language Resources Association (ELRA)</publisher>
       <address>Miyazaki, Japan</address>
@@ -30,8 +30,8 @@
     <paper id="1">
       <title>Augmenting Librispeech with <fixed-case>F</fixed-case>rench Translations: A Multimodal Corpus for Direct Speech Translation Evaluation</title>
       <author><first>Ali Can</first><last>Kocabiyikoglu</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
-      <author><first>Olivier</first><last>Kraif</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
+      <author id="olivier-kraif"><first>Olivier</first><last>Kraif</last></author>
       <url hash="92fa5af5">L18-1001</url>
       <bibkey>kocabiyikoglu-etal-2018-augmenting</bibkey>
     </paper>
@@ -40,7 +40,7 @@
       <author><first>Thierry</first><last>Etchegoyhen</last></author>
       <author><first>Anna</first><last>Fernández Torné</last></author>
       <author><first>Andoni</first><last>Azpeitia</last></author>
-      <author><first>Eva</first><last>Martínez Garcia</last></author>
+      <author id="eva-martinez-garcia"><first>Eva</first><last>Martínez Garcia</last></author>
       <author><first>Anna</first><last>Matamala</last></author>
       <url hash="45b1d957">L18-1002</url>
       <bibkey>etchegoyhen-etal-2018-evaluating</bibkey>
@@ -54,9 +54,9 @@
     </paper>
     <paper id="4">
       <title><fixed-case>ESCAPE</fixed-case>: a Large-scale Synthetic Corpus for Automatic Post-Editing</title>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
-      <author><first>Rajen</first><last>Chatterjee</last></author>
+      <author id="rajen-chatterjee"><first>Rajen</first><last>Chatterjee</last></author>
       <author><first>Nicola</first><last>Bertoldi</last></author>
       <url hash="52e2ec5c">L18-1004</url>
       <bibkey>negri-etal-2018-escape</bibkey>
@@ -65,7 +65,7 @@
       <title>Evaluating Machine Translation Performance on <fixed-case>C</fixed-case>hinese Idioms with a Blacklist Method</title>
       <author><first>Yutong</first><last>Shao</last></author>
       <author><first>Rico</first><last>Sennrich</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <author><first>Federico</first><last>Fancellu</last></author>
       <url hash="7f32662a">L18-1005</url>
       <bibkey>shao-etal-2018-evaluating</bibkey>
@@ -86,8 +86,8 @@
     </paper>
     <paper id="8">
       <title>Advances in Pre-Training Distributed Word Representations</title>
-      <author><first>Tomas</first><last>Mikolov</last></author>
-      <author><first>Edouard</first><last>Grave</last></author>
+      <author id="tomas-mikolov"><first>Tomas</first><last>Mikolov</last></author>
+      <author id="edouard-grave"><first>Edouard</first><last>Grave</last></author>
       <author><first>Piotr</first><last>Bojanowski</last></author>
       <author><first>Christian</first><last>Puhrsch</last></author>
       <author><first>Armand</first><last>Joulin</last></author>
@@ -96,10 +96,10 @@
     </paper>
     <paper id="9">
       <title>Integrating <fixed-case>G</fixed-case>enerative <fixed-case>L</fixed-case>exicon Event Structures into <fixed-case>V</fixed-case>erb<fixed-case>N</fixed-case>et</title>
-      <author><first>Susan Windisch</first><last>Brown</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="susan-windisch-brown"><first>Susan Windisch</first><last>Brown</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <author><first>Annie</first><last>Zaenen</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <url hash="c0f3d2e0">L18-1009</url>
       <bibkey>brown-etal-2018-integrating</bibkey>
     </paper>
@@ -123,7 +123,7 @@
       <title>The Natural Stories Corpus</title>
       <author><first>Richard</first><last>Futrell</last></author>
       <author><first>Edward</first><last>Gibson</last></author>
-      <author><first>Harry J.</first><last>Tily</last></author>
+      <author id="harry-j-tily"><first>Harry J.</first><last>Tily</last></author>
       <author><first>Idan</first><last>Blank</last></author>
       <author><first>Anastasia</first><last>Vishnevetsky</last></author>
       <author><first>Steven</first><last>Piantadosi</last></author>
@@ -133,21 +133,21 @@
     </paper>
     <paper id="13">
       <title>Semi-automatic <fixed-case>K</fixed-case>orean <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et Annotation over <fixed-case>KAIST</fixed-case> Treebank</title>
-      <author><first>Younggyun</first><last>Hahm</last></author>
+      <author id="younggyun-hahm"><first>Younggyun</first><last>Hahm</last></author>
       <author><first>Jiseong</first><last>Kim</last></author>
       <author><first>Sunggoo</first><last>Kwon</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <url hash="0cc2ee30">L18-1013</url>
       <bibkey>hahm-etal-2018-semi</bibkey>
     </paper>
     <paper id="14">
       <title>Handling Normalization Issues for Part-of-Speech Tagging of Online Conversational Text</title>
-      <author><first>Géraldine</first><last>Damnati</last></author>
+      <author id="geraldine-damnati"><first>Géraldine</first><last>Damnati</last></author>
       <author><first>Jeremy</first><last>Auguste</last></author>
       <author><first>Alexis</first><last>Nasr</last></author>
       <author><first>Delphine</first><last>Charlet</last></author>
       <author><first>Johannes</first><last>Heinecke</last></author>
-      <author><first>Frédéric</first><last>Béchet</last></author>
+      <author id="frederic-bechet"><first>Frédéric</first><last>Béchet</last></author>
       <url hash="5ef716e7">L18-1014</url>
       <bibkey>damnati-etal-2018-handling</bibkey>
     </paper>
@@ -174,17 +174,17 @@
     </paper>
     <paper id="17">
       <title>Dialogue Structure Annotation for Multi-Floor Interaction</title>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <author><first>Cassidy</first><last>Henry</last></author>
-      <author><first>Stephanie</first><last>Lukin</last></author>
+      <author id="stephanie-lukin"><first>Stephanie</first><last>Lukin</last></author>
       <author><first>Ron</first><last>Artstein</last></author>
       <author><first>Felix</first><last>Gervits</last></author>
       <author><first>Kimberly</first><last>Pollard</last></author>
-      <author><first>Claire</first><last>Bonial</last></author>
+      <author id="claire-bonial"><first>Claire</first><last>Bonial</last></author>
       <author><first>Su</first><last>Lei</last></author>
-      <author><first>Clare</first><last>Voss</last></author>
+      <author id="clare-voss"><first>Clare</first><last>Voss</last></author>
       <author><first>Matthew</first><last>Marge</last></author>
-      <author><first>Cory</first><last>Hayes</last></author>
+      <author id="cory-hayes"><first>Cory</first><last>Hayes</last></author>
       <author><first>Susan</first><last>Hill</last></author>
       <url hash="d8445832">L18-1017</url>
       <bibkey>traum-etal-2018-dialogue</bibkey>
@@ -222,14 +222,14 @@
     <paper id="21">
       <title>Data Management Plan (<fixed-case>DMP</fixed-case>) for Language Data under the New General Da-ta Protection Regulation (<fixed-case>GDPR</fixed-case>)</title>
       <author><first>Pawel</first><last>Kamocki</last></author>
-      <author><first>Valérie</first><last>Mapelli</last></author>
+      <author id="valerie-mapelli"><first>Valérie</first><last>Mapelli</last></author>
       <author><first>Khalid</first><last>Choukri</last></author>
       <url hash="ca1076e8">L18-1021</url>
       <bibkey>kamocki-etal-2018-data</bibkey>
     </paper>
     <paper id="22">
       <title>We Are Depleting Our Research Subject as We Are Investigating It: In Language Technology, more Replication and Diversity Are Needed</title>
-      <author><first>António</first><last>Branco</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
       <url hash="33e37a8e">L18-1022</url>
       <bibkey>branco-2018-depleting</bibkey>
     </paper>
@@ -244,7 +244,7 @@
       <title>Introducing <fixed-case>NIEUW</fixed-case>: Novel Incentives and Workflows for Eliciting Linguistic Data</title>
       <author><first>Christopher</first><last>Cieri</last></author>
       <author><first>James</first><last>Fiumara</last></author>
-      <author><first>Mark</first><last>Liberman</last></author>
+      <author id="mark-liberman"><first>Mark</first><last>Liberman</last></author>
       <author><first>Chris</first><last>Callison-Burch</last></author>
       <author><first>Jonathan</first><last>Wright</last></author>
       <url hash="28857655">L18-1024</url>
@@ -252,16 +252,16 @@
     </paper>
     <paper id="25">
       <title>Three Dimensions of Reproducibility in Natural Language Processing</title>
-      <author><first>K. Bretonnel</first><last>Cohen</last></author>
+      <author id="k-bretonnel-cohen"><first>K. Bretonnel</first><last>Cohen</last></author>
       <author><first>Jingbo</first><last>Xia</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <author><first>Tiffany</first><last>Callahan</last></author>
       <author><first>Orin</first><last>Hargraves</last></author>
       <author><first>Foster</first><last>Goss</last></author>
-      <author><first>Nancy</first><last>Ide</last></author>
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <author><first>Cyril</first><last>Grouin</last></author>
-      <author><first>Lawrence E.</first><last>Hunter</last></author>
+      <author id="lawrence-hunter"><first>Lawrence E.</first><last>Hunter</last></author>
       <url hash="00c94bad">L18-1025</url>
       <bibkey>cohen-etal-2018-three</bibkey>
     </paper>
@@ -274,13 +274,13 @@
     </paper>
     <paper id="27">
       <title>Word Affect Intensities</title>
-      <author><first>Saif</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
       <url hash="18fc0b51">L18-1027</url>
       <bibkey>mohammad-2018-word</bibkey>
     </paper>
     <paper id="28">
       <title>Representation Mapping: A Novel Approach to Generate High-Quality Multi-Lingual Emotion Lexicons</title>
-      <author><first>Sven</first><last>Buechel</last></author>
+      <author id="sven-buechel"><first>Sven</first><last>Buechel</last></author>
       <author><first>Udo</first><last>Hahn</last></author>
       <url hash="86fe677d">L18-1028</url>
       <bibkey>buechel-hahn-2018-representation</bibkey>
@@ -298,7 +298,7 @@
     </paper>
     <paper id="30">
       <title>Understanding Emotions: A Dataset of Tweets to Study Interactions between Affect Categories</title>
-      <author><first>Saif</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
       <author><first>Svetlana</first><last>Kiritchenko</last></author>
       <url hash="8f506260">L18-1030</url>
       <bibkey>mohammad-kiritchenko-2018-understanding</bibkey>
@@ -308,7 +308,7 @@
       <author><first>Bonan</first><last>Min</last></author>
       <author><first>Marjorie</first><last>Freedman</last></author>
       <author><first>Roger</first><last>Bock</last></author>
-      <author><first>Ralph</first><last>Weischedel</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
       <url hash="e33ba9bd">L18-1031</url>
       <bibkey>min-etal-2018-ace</bibkey>
     </paper>
@@ -344,7 +344,7 @@
     <paper id="36">
       <title>Building Parallel Monolingual <fixed-case>G</fixed-case>an <fixed-case>C</fixed-case>hinese Dialects Corpus</title>
       <author><first>Fan</first><last>Xu</last></author>
-      <author><first>Mingwen</first><last>Wang</last></author>
+      <author id="mingwen-wang"><first>Mingwen</first><last>Wang</last></author>
       <author><first>Maoxi</first><last>Li</last></author>
       <url hash="eccfd922">L18-1036</url>
       <bibkey>xu-etal-2018-building</bibkey>
@@ -373,17 +373,17 @@
     </paper>
     <paper id="39">
       <title>A Lexical Tool for Academic Writing in <fixed-case>S</fixed-case>panish based on Expert and Novice Corpora</title>
-      <author><first>Marcos</first><last>García Salido</last></author>
+      <author id="marcos-garcia-salido"><first>Marcos</first><last>García Salido</last></author>
       <author><first>Marcos</first><last>García</last></author>
       <author><first>Milka</first><last>Villayandre-Llamazares</last></author>
-      <author><first>Margarita</first><last>Alonso-Ramos</last></author>
+      <author id="margarita-alonso-ramos"><first>Margarita</first><last>Alonso-Ramos</last></author>
       <url hash="59389359">L18-1039</url>
       <attachment type="supplementary" hash="59389359">L18-1039.Supplementary.pdf</attachment>
       <bibkey>garcia-salido-etal-2018-lexical</bibkey>
     </paper>
     <paper id="40">
       <title>Framing Named Entity Linking Error Types</title>
-      <author><first>Adrian</first><last>Braşoveanu</last></author>
+      <author id="adrian-brasoveanu"><first>Adrian</first><last>Braşoveanu</last></author>
       <author><first>Giuseppe</first><last>Rizzo</last></author>
       <author><first>Philipp</first><last>Kuntschik</last></author>
       <author><first>Albert</first><last>Weichselbraun</last></author>
@@ -407,7 +407,7 @@
       <author><first>Gareth</first><last>Owen</last></author>
       <author><first>Claire</first><last>O’Donovan</last></author>
       <author><first>Andrew</first><last>Leach</last></author>
-      <author><first>John</first><last>McNaught</last></author>
+      <author id="john-mcnaught"><first>John</first><last>McNaught</last></author>
       <author><first>Steve</first><last>Turner</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
       <url hash="633cbf8d">L18-1042</url>
@@ -415,10 +415,10 @@
     </paper>
     <paper id="43">
       <title>Parallel Corpora for the Biomedical Domain</title>
-      <author><first>Aurélie</first><last>Névéol</last></author>
-      <author><first>Antonio</first><last>Jimeno Yepes</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
+      <author id="antonio-jimeno-yepes"><first>Antonio</first><last>Jimeno Yepes</last></author>
       <author><first>Mariana</first><last>Neves</last></author>
-      <author><first>Karin</first><last>Verspoor</last></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last></author>
       <url hash="62be2c2c">L18-1043</url>
       <bibkey>neveol-etal-2018-parallel</bibkey>
     </paper>
@@ -436,7 +436,7 @@
     <paper id="45">
       <title>Word Embedding Approach for Synonym Extraction of Multi-Word Terms</title>
       <author><first>Amir</first><last>Hazem</last></author>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <url hash="25149efe">L18-1045</url>
       <bibkey>hazem-daille-2018-word</bibkey>
     </paper>
@@ -458,7 +458,7 @@
       <author><first>Ruchit</first><last>Agrawal</last></author>
       <author><first>Vighnesh</first><last>Chenthil Kumar</last></author>
       <author><first>Vigneshwaran</first><last>Muralidharan</last></author>
-      <author><first>Dipti</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti</first><last>Sharma</last></author>
       <url hash="9ddf92ae">L18-1048</url>
       <bibkey>agrawal-etal-2018-beating</bibkey>
     </paper>
@@ -466,7 +466,7 @@
       <title>Sentence Level Temporality Detection using an Implicit Time-sensed Resource</title>
       <author><first>Sabyasachi</first><last>Kamila</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <url hash="8b0134fe">L18-1049</url>
       <bibkey>kamila-etal-2018-sentence</bibkey>
     </paper>
@@ -510,28 +510,28 @@
       <title><fixed-case>SW</fixed-case>4<fixed-case>ALL</fixed-case>: a <fixed-case>CEFR</fixed-case> Classified and Aligned Corpus for Language Learning</title>
       <author><first>Rodrigo</first><last>Wilkens</last></author>
       <author><first>Leonardo</first><last>Zilio</last></author>
-      <author><first>Cédrick</first><last>Fairon</last></author>
+      <author id="cedrick-fairon"><first>Cédrick</first><last>Fairon</last></author>
       <url hash="a113851e">L18-1055</url>
       <bibkey>wilkens-etal-2018-sw4all</bibkey>
     </paper>
     <paper id="56">
       <title>Towards a Diagnosis of Textual Difficulties for Children with Dyslexia</title>
       <author><first>Solen</first><last>Quiniou</last></author>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <url hash="21b8d627">L18-1056</url>
       <bibkey>quiniou-daille-2018-towards</bibkey>
     </paper>
     <paper id="57">
       <title>Coreference Resolution in <fixed-case>F</fixed-case>ree<fixed-case>L</fixed-case>ing 4.0</title>
-      <author><first>Montserrat</first><last>Marimon</last></author>
-      <author><first>Lluís</first><last>Padró</last></author>
-      <author><first>Jordi</first><last>Turmo</last></author>
+      <author id="montserrat-marimon"><first>Montserrat</first><last>Marimon</last></author>
+      <author id="lluis-padro"><first>Lluís</first><last>Padró</last></author>
+      <author id="jordi-turmo"><first>Jordi</first><last>Turmo</last></author>
       <url hash="2da98779">L18-1057</url>
       <bibkey>marimon-etal-2018-coreference</bibkey>
     </paper>
     <paper id="58">
       <title><fixed-case>BASHI</fixed-case>: A Corpus of <fixed-case>W</fixed-case>all <fixed-case>S</fixed-case>treet <fixed-case>J</fixed-case>ournal Articles Annotated with Bridging Links</title>
-      <author><first>Ina</first><last>Rösiger</last></author>
+      <author id="ina-roesiger"><first>Ina</first><last>Rösiger</last></author>
       <url hash="f04611f1">L18-1058</url>
       <bibkey>rosiger-2018-bashi</bibkey>
     </paper>
@@ -554,13 +554,13 @@
       <author><first>Veronika</first><last>Vincze</last></author>
       <author><first>Klára</first><last>Hegedűs</last></author>
       <author><first>Alex</first><last>Sliz-Nagy</last></author>
-      <author><first>Richárd</first><last>Farkas</last></author>
+      <author id="richard-farkas"><first>Richárd</first><last>Farkas</last></author>
       <url hash="c744eed3">L18-1061</url>
       <bibkey>vincze-etal-2018-szegedkoref</bibkey>
     </paper>
     <paper id="62">
       <title>A Corpus to Learn Refer-to-as Relations for Nominals</title>
-      <author><first>Wasi</first><last>Ahmad</last></author>
+      <author id="wasi-ahmad"><first>Wasi</first><last>Ahmad</last></author>
       <author><first>Kai-Wei</first><last>Chang</last></author>
       <url hash="8254c8df">L18-1062</url>
       <bibkey>ahmad-chang-2018-corpus</bibkey>
@@ -572,7 +572,7 @@
       <author><first>Alberto</first><last>Tonon</last></author>
       <author><first>Philippe</first><last>Cudré-Mauroux</last></author>
       <author><first>Djellel Eddine</first><last>Difallah</last></author>
-      <author><first>Raphaël</first><last>Troncy</last></author>
+      <author id="raphael-troncy"><first>Raphaël</first><last>Troncy</last></author>
       <author><first>Giuseppe</first><last>Rizzo</last></author>
       <url hash="519306b7">L18-1063</url>
       <bibkey>plu-etal-2018-sanaphor</bibkey>
@@ -581,7 +581,7 @@
       <title><fixed-case>ANCOR</fixed-case>-<fixed-case>AS</fixed-case>: Enriching the <fixed-case>ANCOR</fixed-case> Corpus with Syntactic Annotations</title>
       <author><first>Loïc</first><last>Grobol</last></author>
       <author><first>Isabelle</first><last>Tellier</last></author>
-      <author><first>Éric</first><last>de la Clergerie</last></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Éric</first><last>de la Clergerie</last></author>
       <author><first>Marco</first><last>Dinarelli</last></author>
       <author><first>Frédéric</first><last>Landragin</last></author>
       <url hash="4fc9d825">L18-1064</url>
@@ -661,16 +661,16 @@
       <author><first>Iris</first><last>Hendrickx</last></author>
       <author><first>Eirini</first><last>Takoulidou</last></author>
       <author><first>Thanasis</first><last>Naskos</last></author>
-      <author><first>Katia Lida</first><last>Kermanidis</last></author>
+      <author id="katia-lida-kermanidis"><first>Katia Lida</first><last>Kermanidis</last></author>
       <author><first>Vilelmini</first><last>Sosoni</last></author>
       <author><first>Hugo</first><last>de Vos</last></author>
       <author><first>Maria</first><last>Stasimioti</last></author>
-      <author><first>Menno</first><last>van Zaanen</last></author>
+      <author id="menno-van-zaanen"><first>Menno</first><last>van Zaanen</last></author>
       <author><first>Panayota</first><last>Georgakopoulou</last></author>
       <author><first>Valia</first><last>Kordoni</last></author>
-      <author><first>Maja</first><last>Popovic</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popovic</last></author>
       <author><first>Markus</first><last>Egg</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <url hash="13df9d6e">L18-1073</url>
       <bibkey>hendrickx-etal-2018-multilingual</bibkey>
     </paper>
@@ -685,11 +685,11 @@
     <paper id="75">
       <title>Translation Crowdsourcing: Creating a Multilingual Corpus of Online Educational Content</title>
       <author><first>Vilelmini</first><last>Sosoni</last></author>
-      <author><first>Katia Lida</first><last>Kermanidis</last></author>
+      <author id="katia-lida-kermanidis"><first>Katia Lida</first><last>Kermanidis</last></author>
       <author><first>Maria</first><last>Stasimioti</last></author>
       <author><first>Thanasis</first><last>Naskos</last></author>
       <author><first>Eirini</first><last>Takoulidou</last></author>
-      <author><first>Menno</first><last>van Zaanen</last></author>
+      <author id="menno-van-zaanen"><first>Menno</first><last>van Zaanen</last></author>
       <author><first>Sheila</first><last>Castilho</last></author>
       <author><first>Panayota</first><last>Georgakopoulou</last></author>
       <author><first>Valia</first><last>Kordoni</last></author>
@@ -706,7 +706,7 @@
     <paper id="77">
       <title><fixed-case>C</fixed-case>hinese Relation Classification using Long Short Term Memory Networks</title>
       <author><first>Linrui</first><last>Zhang</last></author>
-      <author><first>Dan</first><last>Moldovan</last></author>
+      <author id="dan-moldovan"><first>Dan</first><last>Moldovan</last></author>
       <url hash="a6788a5a">L18-1077</url>
       <bibkey>zhang-moldovan-2018-chinese</bibkey>
     </paper>
@@ -719,7 +719,7 @@
       <author><first>Xiaoyan</first><last>Yu</last></author>
       <author><first>Ruifeng</first><last>Xu</last></author>
       <author><first>Tengjiao</first><last>Wang</last></author>
-      <author><first>Kam-fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-fai</first><last>Wong</last></author>
       <url hash="9af9da7f">L18-1078</url>
       <bibkey>li-etal-2018-uir</bibkey>
     </paper>
@@ -727,7 +727,7 @@
       <title><fixed-case>E</fixed-case>vent<fixed-case>W</fixed-case>iki: A Knowledge Base of Major Events</title>
       <author><first>Tao</first><last>Ge</last></author>
       <author><first>Lei</first><last>Cui</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <author><first>Zhifang</first><last>Sui</last></author>
       <author><first>Furu</first><last>Wei</last></author>
       <author><first>Ming</first><last>Zhou</last></author>
@@ -737,7 +737,7 @@
     <paper id="80">
       <title>Annotating Spin in Biomedical Scientific Publications : the case of Random Controlled Trials (<fixed-case>RCT</fixed-case>s)</title>
       <author><first>Anna</first><last>Koroleva</last></author>
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <url hash="0b3ad717">L18-1080</url>
       <bibkey>koroleva-paroubek-2018-annotating</bibkey>
     </paper>
@@ -777,8 +777,8 @@
       <author><first>Jannik</first><last>Strötgen</last></author>
       <author><first>Anne-Lyse</first><last>Minard</last></author>
       <author><first>Lukas</first><last>Lange</last></author>
-      <author><first>Manuela</first><last>Speranza</last></author>
-      <author><first>Bernardo</first><last>Magnini</last></author>
+      <author id="manuela-speranza"><first>Manuela</first><last>Speranza</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
       <url hash="94491fbb">L18-1085</url>
       <bibkey>strotgen-etal-2018-krauts</bibkey>
     </paper>
@@ -790,8 +790,8 @@
       <author><first>Tom</first><last>Redman</last></author>
       <author><first>Christos</first><last>Christodoulopoulos</last></author>
       <author><first>Vivek</first><last>Srikumar</last></author>
-      <author><first>Nicholas</first><last>Rizzolo</last></author>
-      <author><first>Lev</first><last>Ratinov</last></author>
+      <author id="nick-rizzolo"><first>Nicholas</first><last>Rizzolo</last></author>
+      <author id="lev-ratinov"><first>Lev</first><last>Ratinov</last></author>
       <author><first>Guanheng</first><last>Luo</last></author>
       <author><first>Quang</first><last>Do</last></author>
       <author><first>Chen-Tse</first><last>Tsai</last></author>
@@ -862,8 +862,8 @@
       <title>Enriching Frame Representations with Distributionally Induced Senses</title>
       <author><first>Stefano</first><last>Faralli</last></author>
       <author><first>Alexander</first><last>Panchenko</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <url hash="9c9e4648">L18-1093</url>
       <bibkey>faralli-etal-2018-enriching</bibkey>
     </paper>
@@ -945,7 +945,7 @@
       <author><first>Vivian</first><last>Li</last></author>
       <author><first>Andrei</first><last>Lopatenko</last></author>
       <author><first>Daniela</first><last>Stepanov</last></author>
-      <author><first>Yoshihiko</first><last>Suhara</last></author>
+      <author id="yoshi-suhara"><first>Yoshihiko</first><last>Suhara</last></author>
       <author><first>Wang-Chiew</first><last>Tan</last></author>
       <author><first>Yinzhan</first><last>Xu</last></author>
       <url hash="3aa25e9a">L18-1103</url>
@@ -963,7 +963,7 @@
       <title><fixed-case>B</fixed-case>log<fixed-case>S</fixed-case>et-<fixed-case>BR</fixed-case>: A <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese Blog Corpus</title>
       <author><first>Henrique</first><last>Santos</last></author>
       <author><first>Vinicius</first><last>Woloszyn</last></author>
-      <author><first>Renata</first><last>Vieira</last></author>
+      <author id="renata-vieira"><first>Renata</first><last>Vieira</last></author>
       <url hash="d9b97744">L18-1105</url>
       <bibkey>santos-etal-2018-blogset</bibkey>
     </paper>
@@ -978,7 +978,7 @@
       <author><first>Gideon</first><last>Mendels</last></author>
       <author><first>Victor</first><last>Soto</last></author>
       <author><first>Aaron</first><last>Jaech</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
       <url hash="3d4d1111">L18-1107</url>
       <bibkey>mendels-etal-2018-collecting</bibkey>
     </paper>
@@ -993,14 +993,14 @@
       <title>A Taxonomy for In-depth Evaluation of Normalization for User Generated Content</title>
       <author><first>Rob</first><last>van der Goot</last></author>
       <author><first>Rik</first><last>van Noord</last></author>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <url hash="49abf72f">L18-1109</url>
       <bibkey>van-der-goot-etal-2018-taxonomy</bibkey>
     </paper>
     <paper id="110">
       <title>Gaining and Losing Influence in Online Conversation</title>
       <author><first>Arun</first><last>Sharma</last></author>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
       <url hash="e76f76c3">L18-1110</url>
       <bibkey>sharma-strzalkowski-2018-gaining</bibkey>
     </paper>
@@ -1021,7 +1021,7 @@
     </paper>
     <paper id="113">
       <title>Correction of <fixed-case>OCR</fixed-case> Word Segmentation Errors in Articles from the <fixed-case>ACL</fixed-case> Collection through Neural Machine Translation Methods</title>
-      <author><first>Vivi</first><last>Nastase</last></author>
+      <author id="vivi-nastase"><first>Vivi</first><last>Nastase</last></author>
       <author><first>Julian</first><last>Hitschler</last></author>
       <url hash="26602d67">L18-1113</url>
       <bibkey>nastase-hitschler-2018-correction</bibkey>
@@ -1042,7 +1042,7 @@
     </paper>
     <paper id="116">
       <title><fixed-case>PDF</fixed-case>-to-Text Reanalysis for Linguistic Data Mining</title>
-      <author><first>Michael Wayne</first><last>Goodman</last></author>
+      <author id="michael-wayne-goodman"><first>Michael Wayne</first><last>Goodman</last></author>
       <author><first>Ryan</first><last>Georgi</last></author>
       <author><first>Fei</first><last>Xia</last></author>
       <url hash="9854c23b">L18-1116</url>
@@ -1097,7 +1097,7 @@
       <title>Towards Continuous Dialogue Corpus Creation: writing to corpus and generating from it</title>
       <author><first>Andrei</first><last>Malchanau</last></author>
       <author><first>Volha</first><last>Petukhova</last></author>
-      <author><first>Harry</first><last>Bunt</last></author>
+      <author id="harry-bunt"><first>Harry</first><last>Bunt</last></author>
       <url hash="2b6acc8c">L18-1121</url>
       <bibkey>malchanau-etal-2018-towards</bibkey>
     </paper>
@@ -1156,39 +1156,39 @@
       <title>Towards faithfully visualizing global linguistic diversity</title>
       <author><first>Garland</first><last>McNew</last></author>
       <author><first>Curdin</first><last>Derungs</last></author>
-      <author><first>Steven</first><last>Moran</last></author>
+      <author id="steven-moran"><first>Steven</first><last>Moran</last></author>
       <url hash="ed6f1992">L18-1129</url>
       <bibkey>mcnew-etal-2018-towards</bibkey>
     </paper>
     <paper id="130">
       <title>The <fixed-case>G</fixed-case>erma<fixed-case>P</fixed-case>arl Corpus of Parliamentary Protocols</title>
       <author><first>Andreas</first><last>Blätte</last></author>
-      <author><first>Andre</first><last>Blessing</last></author>
+      <author id="andre-blessing"><first>Andre</first><last>Blessing</last></author>
       <url hash="1a097e01">L18-1130</url>
       <bibkey>blatte-blessing-2018-germaparl</bibkey>
     </paper>
     <paper id="131">
       <title>Identifying Speakers and Addressees in Dialogues Extracted from Literary Fiction</title>
       <author><first>Adam</first><last>Ek</last></author>
-      <author><first>Mats</first><last>Wirén</last></author>
+      <author id="mats-wiren"><first>Mats</first><last>Wirén</last></author>
       <author><first>Robert</first><last>Östling</last></author>
-      <author><first>Kristina</first><last>N. Björkenstam</last></author>
-      <author><first>Gintarė</first><last>Grigonytė</last></author>
-      <author><first>Sofia</first><last>Gustafson Capková</last></author>
+      <author id="kristina-nilsson-bjorkenstam"><first>Kristina</first><last>N. Björkenstam</last></author>
+      <author id="gintare-grigonyte"><first>Gintarė</first><last>Grigonytė</last></author>
+      <author id="sofia-gustafson-capkova"><first>Sofia</first><last>Gustafson Capková</last></author>
       <url hash="cb9b267d">L18-1131</url>
       <bibkey>ek-etal-2018-identifying</bibkey>
     </paper>
     <paper id="132">
       <title>Word Embedding Evaluation Datasets and <fixed-case>W</fixed-case>ikipedia Title Embedding for <fixed-case>C</fixed-case>hinese</title>
       <author><first>Chi-Yen</first><last>Chen</last></author>
-      <author><first>Wei-Yun</first><last>Ma</last></author>
+      <author id="wei-yun-ma"><first>Wei-Yun</first><last>Ma</last></author>
       <url hash="87361689">L18-1132</url>
       <bibkey>chen-ma-2018-word</bibkey>
     </paper>
     <paper id="133">
       <title>An Automatic Learning of an <fixed-case>A</fixed-case>lgerian Dialect Lexicon by using Multilingual Word Embeddings</title>
       <author><first>Abidi</first><last>Karima</last></author>
-      <author><first>Kamel</first><last>Smaïli</last></author>
+      <author id="kamel-smaili"><first>Kamel</first><last>Smaïli</last></author>
       <url hash="fdf7a1be">L18-1133</url>
       <bibkey>karima-smaili-2018-automatic</bibkey>
     </paper>
@@ -1208,10 +1208,10 @@
     </paper>
     <paper id="136">
       <title>Tools for Building an Interlinked Synonym Lexicon Network</title>
-      <author><first>Zdeňka</first><last>Urešová</last></author>
-      <author><first>Eva</first><last>Fučíková</last></author>
-      <author><first>Eva</first><last>Hajičová</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
+      <author id="zdenka-uresova"><first>Zdeňka</first><last>Urešová</last></author>
+      <author id="eva-fucikova"><first>Eva</first><last>Fučíková</last></author>
+      <author id="eva-hajicova"><first>Eva</first><last>Hajičová</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
       <url hash="931d9c1f">L18-1136</url>
       <bibkey>uresova-etal-2018-tools</bibkey>
     </paper>
@@ -1267,15 +1267,15 @@
       <title>Exploiting Pre-Ordering for Neural Machine Translation</title>
       <author><first>Yang</first><last>Zhao</last></author>
       <author><first>Jiajun</first><last>Zhang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <url hash="6b10940f">L18-1143</url>
       <bibkey>zhao-etal-2018-exploiting</bibkey>
     </paper>
     <paper id="144">
       <title>Improving a Multi-Source Neural Machine Translation Model with Corpus Extension for Low-Resource Languages</title>
-      <author><first>Gyu-Hyeon</first><last>Choi</last></author>
+      <author id="gyuhyeon-choi"><first>Gyu-Hyeon</first><last>Choi</last></author>
       <author><first>Jong-Hun</first><last>Shin</last></author>
-      <author><first>Young-Kil</first><last>Kim</last></author>
+      <author id="young-gil-kim"><first>Young-Kil</first><last>Kim</last></author>
       <url hash="75e63f11">L18-1144</url>
       <bibkey>choi-etal-2018-improving</bibkey>
     </paper>
@@ -1284,8 +1284,8 @@
       <author><first>Zi-Yi</first><last>Dou</last></author>
       <author><first>Hao</first><last>Zhou</last></author>
       <author><first>Shu-Jian</first><last>Huang</last></author>
-      <author><first>Xin-Yu</first><last>Dai</last></author>
-      <author><first>Jia-Jun</first><last>Chen</last></author>
+      <author id="xinyu-dai"><first>Xin-Yu</first><last>Dai</last></author>
+      <author id="jiajun-chen"><first>Jia-Jun</first><last>Chen</last></author>
       <url hash="24a75fd3">L18-1145</url>
       <bibkey>dou-etal-2018-dynamic</bibkey>
     </paper>
@@ -1293,7 +1293,7 @@
       <title>One Sentence One Model for Neural Machine Translation</title>
       <author><first>Xiaoqing</first><last>Li</last></author>
       <author><first>Jiajun</first><last>Zhang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <url hash="2d192bd1">L18-1146</url>
       <bibkey>li-etal-2018-one</bibkey>
     </paper>
@@ -1301,7 +1301,7 @@
       <title>A Parallel Corpus of <fixed-case>A</fixed-case>rabic-<fixed-case>J</fixed-case>apanese News Articles</title>
       <author><first>Go</first><last>Inoue</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <author><first>Hiroyuki</first><last>Aoyama</last></author>
       <url hash="c9d27d50">L18-1147</url>
       <bibkey>inoue-etal-2018-parallel</bibkey>
@@ -1316,9 +1316,9 @@
     </paper>
     <paper id="149">
       <title>Automatic Enrichment of Terminological Resources: the <fixed-case>IATE</fixed-case> <fixed-case>RDF</fixed-case> Example</title>
-      <author><first>Mihael</first><last>Arcan</last></author>
+      <author id="mihael-arcan"><first>Mihael</first><last>Arcan</last></author>
       <author><first>Elena</first><last>Montiel-Ponsoda</last></author>
-      <author><first>John P.</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></author>
       <author><first>Paul</first><last>Buitelaar</last></author>
       <url hash="c88e077a">L18-1149</url>
       <bibkey>arcan-etal-2018-automatic</bibkey>
@@ -1333,7 +1333,7 @@
     <paper id="151">
       <title>Translating Web Search Queries into Natural Language Questions</title>
       <author><first>Adarsh</first><last>Kumar</last></author>
-      <author><first>Sandipan</first><last>Dandapat</last></author>
+      <author id="sandipan-dandapat"><first>Sandipan</first><last>Dandapat</last></author>
       <author><first>Sushil</first><last>Chordia</last></author>
       <url hash="19e560a6">L18-1151</url>
       <bibkey>kumar-etal-2018-translating</bibkey>
@@ -1348,7 +1348,7 @@
     <paper id="153">
       <title>Acquiring Verb Classes Through Bottom-Up Semantic Verb Clustering</title>
       <author><first>Olga</first><last>Majewska</last></author>
-      <author><first>Diana</first><last>McCarthy</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
       <author><first>Ivan</first><last>Vulić</last></author>
       <author><first>Anna</first><last>Korhonen</last></author>
       <url hash="36be85fe">L18-1153</url>
@@ -1356,7 +1356,7 @@
     </paper>
     <paper id="154">
       <title>Constructing High Quality Sense-specific Corpus and Word Embedding via Unsupervised Elimination of Pseudo Multi-sense</title>
-      <author><first>Haoyue</first><last>Shi</last></author>
+      <author id="freda-shi"><first>Haoyue</first><last>Shi</last></author>
       <author><first>Xihao</first><last>Wang</last></author>
       <author><first>Yuqi</first><last>Sun</last></author>
       <author><first>Junfeng</first><last>Hu</last></author>
@@ -1373,7 +1373,7 @@
       <title>Social Image Tags as a Source of Word Embeddings: A Task-oriented Evaluation</title>
       <author><first>Mika</first><last>Hasegawa</last></author>
       <author><first>Tetsunori</first><last>Kobayashi</last></author>
-      <author><first>Yoshihiko</first><last>Hayashi</last></author>
+      <author id="yoshihiko-hayashi"><first>Yoshihiko</first><last>Hayashi</last></author>
       <url hash="f98abcae">L18-1156</url>
       <bibkey>hasegawa-etal-2018-social</bibkey>
     </paper>
@@ -1386,7 +1386,7 @@
     </paper>
     <paper id="158">
       <title>Towards a <fixed-case>W</fixed-case>elsh Semantic Annotation System</title>
-      <author><first>Scott</first><last>Piao</last></author>
+      <author id="scott-s-l-piao"><first>Scott</first><last>Piao</last></author>
       <author><first>Paul</first><last>Rayson</last></author>
       <author><first>Dawn</first><last>Knight</last></author>
       <author><first>Gareth</first><last>Watkins</last></author>
@@ -1397,8 +1397,8 @@
       <title>Semantic Frame Parsing for Information Extraction : the <fixed-case>CALOR</fixed-case> corpus</title>
       <author><first>Gabriel</first><last>Marzinotto</last></author>
       <author><first>Jeremy</first><last>Auguste</last></author>
-      <author><first>Frederic</first><last>Bechet</last></author>
-      <author><first>Geraldine</first><last>Damnati</last></author>
+      <author id="frederic-bechet"><first>Frederic</first><last>Bechet</last></author>
+      <author id="geraldine-damnati"><first>Geraldine</first><last>Damnati</last></author>
       <author><first>Alexis</first><last>Nasr</last></author>
       <url hash="40154037">L18-1159</url>
       <bibkey>marzinotto-etal-2018-semantic</bibkey>
@@ -1414,7 +1414,7 @@
     <paper id="161">
       <title>A Multi- versus a Single-classifier Approach for the Identification of Modality in the <fixed-case>P</fixed-case>ortuguese Language</title>
       <author><first>João</first><last>Sequeira</last></author>
-      <author><first>Teresa</first><last>Gonçalves</last></author>
+      <author id="teresa-goncalves"><first>Teresa</first><last>Gonçalves</last></author>
       <author><first>Paulo</first><last>Quaresma</last></author>
       <author><first>Amália</first><last>Mendes</last></author>
       <author><first>Iris</first><last>Hendrickx</last></author>
@@ -1445,8 +1445,8 @@
       <author><first>Denis</first><last>Teslenko</last></author>
       <author><first>Alexander</first><last>Panchenko</last></author>
       <author><first>Mikhail</first><last>Chernoskutov</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <url hash="69338b9a">L18-1164</url>
       <bibkey>ustalov-etal-2018-unsupervised</bibkey>
     </paper>
@@ -1455,8 +1455,8 @@
       <author><first>Kijong</first><last>Han</last></author>
       <author><first>Sangha</first><last>Nam</last></author>
       <author><first>Jiseong</first><last>Kim</last></author>
-      <author><first>Younggyun</first><last>Hahm</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="younggyun-hahm"><first>Younggyun</first><last>Hahm</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <url hash="f7bca834">L18-1165</url>
       <bibkey>han-etal-2018-unsupervised</bibkey>
     </paper>
@@ -1471,7 +1471,7 @@
     <paper id="167">
       <title>Retrofitting Word Representations for Unsupervised Sense Aware Word Similarities</title>
       <author><first>Steffen</first><last>Remus</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <url hash="a8839015">L18-1167</url>
       <bibkey>remus-biemann-2018-retrofitting</bibkey>
     </paper>
@@ -1488,8 +1488,8 @@
       <title>Text Annotation Graphs: Annotating Complex Natural Language Phenomena</title>
       <author><first>Angus</first><last>Forbes</last></author>
       <author><first>Kristine</first><last>Lee</last></author>
-      <author><first>Gus</first><last>Hahn-Powell</last></author>
-      <author><first>Marco A.</first><last>Valenzuela-Escárcega</last></author>
+      <author id="gus-hahn-powell"><first>Gus</first><last>Hahn-Powell</last></author>
+      <author id="marco-a-valenzuela-escarcega"><first>Marco A.</first><last>Valenzuela-Escárcega</last></author>
       <author><first>Mihai</first><last>Surdeanu</last></author>
       <url hash="17cad705">L18-1169</url>
       <bibkey>forbes-etal-2018-text</bibkey>
@@ -1497,7 +1497,7 @@
     <paper id="170">
       <title><fixed-case>M</fixed-case>anzanilla: An Image Annotation Tool for <fixed-case>TKB</fixed-case> Building</title>
       <author><first>Arianne</first><last>Reimerink</last></author>
-      <author><first>Pilar</first><last>León-Araúz</last></author>
+      <author id="pilar-leon-arauz"><first>Pilar</first><last>León-Araúz</last></author>
       <url hash="a0af44c3">L18-1170</url>
       <bibkey>reimerink-leon-arauz-2018-manzanilla</bibkey>
     </paper>
@@ -1517,7 +1517,7 @@
     <paper id="173">
       <title><fixed-case>WASA</fixed-case>: A Web Application for Sequence Annotation</title>
       <author><first>Fahad</first><last>AlGhamdi</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <url hash="25a810f7">L18-1173</url>
       <bibkey>alghamdi-diab-2018-wasa</bibkey>
     </paper>
@@ -1533,7 +1533,7 @@
       <title><fixed-case>PDFA</fixed-case>nno: a Web-based Linguistic Annotation Tool for <fixed-case>PDF</fixed-case> Documents</title>
       <author><first>Hiroyuki</first><last>Shindo</last></author>
       <author><first>Yohei</first><last>Munesada</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <url hash="79bea093">L18-1175</url>
       <bibkey>shindo-etal-2018-pdfanno</bibkey>
     </paper>
@@ -1546,8 +1546,8 @@
     </paper>
     <paper id="177">
       <title>An Annotation Language for Semantic Search of Legal Sources</title>
-      <author><first>Adeline</first><last>Nazarenko</last></author>
-      <author><first>François</first><last>Levy</last></author>
+      <author id="adeline-nazarenko"><first>Adeline</first><last>Nazarenko</last></author>
+      <author id="francois-levy"><first>François</first><last>Levy</last></author>
       <author><first>Adam</first><last>Wyner</last></author>
       <url hash="34fe245e">L18-1177</url>
       <bibkey>nazarenko-etal-2018-annotation</bibkey>
@@ -1588,7 +1588,7 @@
       <title><fixed-case>JESC</fixed-case>: <fixed-case>J</fixed-case>apanese-<fixed-case>E</fixed-case>nglish Subtitle Corpus</title>
       <author><first>Reid</first><last>Pryzant</last></author>
       <author><first>Youngjoo</first><last>Chung</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <author><first>Denny</first><last>Britz</last></author>
       <url hash="c43f7cdb">L18-1182</url>
       <bibkey>pryzant-etal-2018-jesc</bibkey>
@@ -1599,7 +1599,7 @@
       <author><first>Georges</first><last>Neto</last></author>
       <author><first>Barbara</first><last>Silva</last></author>
       <author><first>Danielle</first><last>Monteiro</last></author>
-      <author><first>Ivandré</first><last>Paraboni</last></author>
+      <author id="ivandre-paraboni"><first>Ivandré</first><last>Paraboni</last></author>
       <author><first>Rafael</first><last>Dias</last></author>
       <url hash="aac915f7">L18-1183</url>
       <bibkey>ramos-etal-2018-building</bibkey>
@@ -1608,7 +1608,7 @@
       <title>Linguistic and Sociolinguistic Annotation of 17th Century <fixed-case>D</fixed-case>utch Letters</title>
       <author><first>Marijn</first><last>Schraagen</last></author>
       <author><first>Feike</first><last>Dietz</last></author>
-      <author><first>Marjo</first><last>van Koppen</last></author>
+      <author id="marjo-van-koppen"><first>Marjo</first><last>van Koppen</last></author>
       <url hash="88360bbd">L18-1184</url>
       <bibkey>schraagen-etal-2018-linguistic</bibkey>
     </paper>
@@ -1629,7 +1629,7 @@
     <paper id="187">
       <title><fixed-case>ASAP</fixed-case>++: Enriching the <fixed-case>ASAP</fixed-case> Automated Essay Grading Dataset with Essay Attribute Scores</title>
       <author><first>Sandeep</first><last>Mathias</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <url hash="db1f3be9">L18-1187</url>
       <bibkey>mathias-bhattacharyya-2018-asap</bibkey>
     </paper>
@@ -1645,16 +1645,16 @@
     </paper>
     <paper id="189">
       <title>The Reference Corpus of the Contemporary <fixed-case>R</fixed-case>omanian Language (<fixed-case>C</fixed-case>o<fixed-case>R</fixed-case>o<fixed-case>L</fixed-case>a)</title>
-      <author><first>Verginica</first><last>Barbu Mititelu</last></author>
-      <author><first>Dan</first><last>Tufiș</last></author>
+      <author id="verginica-barbu-mititelu"><first>Verginica</first><last>Barbu Mititelu</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufiș</last></author>
       <author><first>Elena</first><last>Irimia</last></author>
       <url hash="6638cbf1">L18-1189</url>
       <bibkey>barbu-mititelu-etal-2018-reference</bibkey>
     </paper>
     <paper id="190">
       <title>A Corpus of Drug Usage Guidelines Annotated with Type of Advice</title>
-      <author><first>Sarah Masud</first><last>Preum</last></author>
-      <author><first>Md. Rizwan</first><last>Parvez</last></author>
+      <author id="sarah-masud-preum"><first>Sarah Masud</first><last>Preum</last></author>
+      <author id="md-rizwan-parvez"><first>Md. Rizwan</first><last>Parvez</last></author>
       <author><first>Kai-Wei</first><last>Chang</last></author>
       <author><first>John</first><last>Stankovic</last></author>
       <url hash="e8eef91a">L18-1190</url>
@@ -1663,14 +1663,14 @@
     <paper id="191">
       <title><fixed-case>B</fixed-case>io<fixed-case>R</fixed-case>o: The Biomedical Corpus for the <fixed-case>R</fixed-case>omanian Language</title>
       <author><first>Maria</first><last>Mitrofan</last></author>
-      <author><first>Dan</first><last>Tufiş</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufiş</last></author>
       <url hash="f465c278">L18-1191</url>
       <bibkey>mitrofan-tufis-2018-bioro</bibkey>
     </paper>
     <paper id="192">
       <title>A Comparison Of Emotion Annotation Schemes And A New Annotated Data Set</title>
       <author><first>Ian</first><last>Wood</last></author>
-      <author><first>John P.</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></author>
       <author><first>Vladimir</first><last>Andryushechkin</last></author>
       <author><first>Paul</first><last>Buitelaar</last></author>
       <url hash="843e0deb">L18-1192</url>
@@ -1681,7 +1681,7 @@
       <author><first>Ankush</first><last>Khandelwal</last></author>
       <author><first>Sahil</first><last>Swami</last></author>
       <author><first>Syed S.</first><last>Akhtar</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <url hash="dd2104eb">L18-1193</url>
       <bibkey>khandelwal-etal-2018-humor</bibkey>
     </paper>
@@ -1713,7 +1713,7 @@
     </paper>
     <paper id="197">
       <title><fixed-case>W</fixed-case>iki<fixed-case>A</fixed-case>rt Emotions: An Annotated Dataset of Emotions Evoked by Art</title>
-      <author><first>Saif</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
       <author><first>Svetlana</first><last>Kiritchenko</last></author>
       <url hash="bfade424">L18-1197</url>
       <bibkey>mohammad-kiritchenko-2018-wikiart</bibkey>
@@ -1731,7 +1731,7 @@
     <paper id="199">
       <title>Sentence and Clause Level Emotion Annotation, Detection, and Classification in a Multi-Genre Corpus</title>
       <author><first>Shabnam</first><last>Tafreshi</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <url hash="7ee363a7">L18-1199</url>
       <bibkey>tafreshi-diab-2018-sentence</bibkey>
     </paper>
@@ -1739,7 +1739,7 @@
       <title>A <fixed-case>S</fixed-case>wedish Cookie-Theft Corpus</title>
       <author><first>Dimitrios</first><last>Kokkinakis</last></author>
       <author><first>Kristina</first><last>Lundholm Fors</last></author>
-      <author><first>Kathleen</first><last>Fraser</last></author>
+      <author id="kathleen-c-fraser"><first>Kathleen</first><last>Fraser</last></author>
       <author><first>Arto</first><last>Nordlund</last></author>
       <url hash="07f9b330">L18-1200</url>
       <bibkey>kokkinakis-etal-2018-swedish</bibkey>
@@ -1747,7 +1747,7 @@
     <paper id="201">
       <title>Sharing Copies of Synthetic Clinical Corpora without Physical Distribution — A Case Study to Get Around <fixed-case>IPR</fixed-case>s and Privacy Constraints Featuring the <fixed-case>G</fixed-case>erman <fixed-case>JSYNCC</fixed-case> Corpus</title>
       <author><first>Christina</first><last>Lohr</last></author>
-      <author><first>Sven</first><last>Buechel</last></author>
+      <author id="sven-buechel"><first>Sven</first><last>Buechel</last></author>
       <author><first>Udo</first><last>Hahn</last></author>
       <url hash="8edd90b2">L18-1201</url>
       <bibkey>lohr-etal-2018-sharing</bibkey>
@@ -1757,14 +1757,14 @@
       <author><first>Richard</first><last>Eckart de Castilho</last></author>
       <author><first>Giulia</first><last>Dore</last></author>
       <author><first>Thomas</first><last>Margoni</last></author>
-      <author><first>Penny</first><last>Labropoulou</last></author>
+      <author id="penny-labropoulou"><first>Penny</first><last>Labropoulou</last></author>
       <author><first>Iryna</first><last>Gurevych</last></author>
       <url hash="f79f5ad0">L18-1202</url>
       <bibkey>eckart-de-castilho-etal-2018-legal</bibkey>
     </paper>
     <paper id="203">
       <title><fixed-case>LREM</fixed-case>ap, a Song of Resources and Evaluation</title>
-      <author><first>Riccardo</first><last>Del Gratta</last></author>
+      <author id="riccardo-del-gratta"><first>Riccardo</first><last>Del Gratta</last></author>
       <author><first>Sara</first><last>Goggi</last></author>
       <author><first>Gabriella</first><last>Pardelli</last></author>
       <author><first>Nicoletta</first><last>Calzolari</last></author>
@@ -1773,7 +1773,7 @@
     </paper>
     <paper id="204">
       <title>Metadata Collection Records for Language Resources</title>
-      <author><first>Henk</first><last>van den Heuvel</last></author>
+      <author id="henk-van-den-heuvel"><first>Henk</first><last>van den Heuvel</last></author>
       <author><first>Erwin</first><last>Komen</last></author>
       <author><first>Nelleke</first><last>Oostdijk</last></author>
       <url hash="fb7adedc">L18-1204</url>
@@ -1782,7 +1782,7 @@
     <paper id="205">
       <title>Managing Public Sector Data for Multilingual Applications Development</title>
       <author><first>Stelios</first><last>Piperidis</last></author>
-      <author><first>Penny</first><last>Labropoulou</last></author>
+      <author id="penny-labropoulou"><first>Penny</first><last>Labropoulou</last></author>
       <author><first>Miltos</first><last>Deligiannis</last></author>
       <author><first>Maria</first><last>Giagkou</last></author>
       <url hash="4b0d3a3e">L18-1205</url>
@@ -1790,11 +1790,11 @@
     </paper>
     <paper id="206">
       <title>Bridging the <fixed-case>LAPPS</fixed-case> <fixed-case>G</fixed-case>rid and <fixed-case>CLARIN</fixed-case></title>
-      <author><first>Erhard</first><last>Hinrichs</last></author>
-      <author><first>Nancy</first><last>Ide</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
-      <author><first>Marie</first><last>Hinrichs</last></author>
+      <author id="erhard-hinrichs"><first>Erhard</first><last>Hinrichs</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
+      <author id="marie-hinrichs"><first>Marie</first><last>Hinrichs</last></author>
       <author><first>Mohammad Fazleh</first><last>Elahi</last></author>
       <author><first>Keith</first><last>Suderman</last></author>
       <author><first>Marc</first><last>Verhagen</last></author>
@@ -1806,22 +1806,22 @@
     </paper>
     <paper id="207">
       <title>Fluid Annotation: A Granularity-aware Annotation Tool for <fixed-case>C</fixed-case>hinese Word Fluidity</title>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <author><first>Yu-Hsiang</first><last>Tseng</last></author>
-      <author><first>Chih-Yao</first><last>Lee</last></author>
+      <author id="chi-yao-lee"><first>Chih-Yao</first><last>Lee</last></author>
       <author><first>Chiung-Yu</first><last>Chiang</last></author>
       <url hash="e229e84e">L18-1207</url>
       <bibkey>hsieh-etal-2018-fluid</bibkey>
     </paper>
     <paper id="208">
       <title><fixed-case>E</fixed-case>-magyar – A Digital Language Processing System</title>
-      <author><first>Tamás</first><last>Váradi</last></author>
+      <author id="tamas-varadi"><first>Tamás</first><last>Váradi</last></author>
       <author><first>Eszter</first><last>Simon</last></author>
       <author><first>Bálint</first><last>Sass</last></author>
       <author><first>Iván</first><last>Mittelholcz</last></author>
       <author><first>Attila</first><last>Novák</last></author>
       <author><first>Balázs</first><last>Indig</last></author>
-      <author><first>Richárd</first><last>Farkas</last></author>
+      <author id="richard-farkas"><first>Richárd</first><last>Farkas</last></author>
       <author><first>Veronika</first><last>Vincze</last></author>
       <url hash="adbd628d">L18-1208</url>
       <bibkey>varadi-etal-2018-e</bibkey>
@@ -1843,17 +1843,17 @@
       <title><fixed-case>CLARIN</fixed-case>’s Key Resource Families</title>
       <author><first>Darja</first><last>Fišer</last></author>
       <author><first>Jakob</first><last>Lenardič</last></author>
-      <author><first>Tomaž</first><last>Erjavec</last></author>
+      <author id="tomaz-erjavec"><first>Tomaž</first><last>Erjavec</last></author>
       <url hash="9daa58a2">L18-1210</url>
       <bibkey>fiser-etal-2018-clarins</bibkey>
     </paper>
     <paper id="211">
       <title><fixed-case>I</fixed-case>ndra: A Word Embedding and Semantic Relatedness Server</title>
-      <author><first>Juliano Efson</first><last>Sales</last></author>
+      <author id="juliano-efson-sales"><first>Juliano Efson</first><last>Sales</last></author>
       <author><first>Leonardo</first><last>Souza</last></author>
       <author><first>Siamak</first><last>Barzegar</last></author>
       <author><first>Brian</first><last>Davis</last></author>
-      <author><first>André</first><last>Freitas</last></author>
+      <author id="andre-freitas"><first>André</first><last>Freitas</last></author>
       <author><first>Siegfried</first><last>Handschuh</last></author>
       <url hash="d69539ea">L18-1211</url>
       <bibkey>sales-etal-2018-indra</bibkey>
@@ -1868,22 +1868,22 @@
     <paper id="213">
       <title><fixed-case>E</fixed-case>uropean Language Resource Coordination: Collecting Language Resources for Public Sector Multilingual Information Management</title>
       <author><first>Andrea</first><last>Lösch</last></author>
-      <author><first>Valérie</first><last>Mapelli</last></author>
+      <author id="valerie-mapelli"><first>Valérie</first><last>Mapelli</last></author>
       <author><first>Stelios</first><last>Piperidis</last></author>
-      <author><first>Andrejs</first><last>Vasiļjevs</last></author>
+      <author id="andrejs-vasiljevs"><first>Andrejs</first><last>Vasiļjevs</last></author>
       <author><first>Lilli</first><last>Smal</last></author>
       <author><first>Thierry</first><last>Declerck</last></author>
       <author><first>Eileen</first><last>Schnur</last></author>
       <author><first>Khalid</first><last>Choukri</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <url hash="61bd4fe7">L18-1213</url>
       <bibkey>losch-etal-2018-european</bibkey>
     </paper>
     <paper id="214">
       <title>Tilde <fixed-case>MT</fixed-case> Platform for Developing Client Specific <fixed-case>MT</fixed-case> Solutions</title>
-      <author><first>Mārcis</first><last>Pinnis</last></author>
-      <author><first>Andrejs</first><last>Vasiļjevs</last></author>
-      <author><first>Rihards</first><last>Kalniņš</last></author>
+      <author id="marcis-pinnis"><first>Mārcis</first><last>Pinnis</last></author>
+      <author id="andrejs-vasiljevs"><first>Andrejs</first><last>Vasiļjevs</last></author>
+      <author id="rihards-kalnins"><first>Rihards</first><last>Kalniņš</last></author>
       <author><first>Roberts</first><last>Rozis</last></author>
       <author><first>Raivis</first><last>Skadiņš</last></author>
       <author><first>Valters</first><last>Šics</last></author>
@@ -1902,7 +1902,7 @@
       <title>Text Normalization Infrastructure that Scales to Hundreds of Language Varieties</title>
       <author><first>Mason</first><last>Chua</last></author>
       <author><first>Daan</first><last>van Esch</last></author>
-      <author><first>Noah</first><last>Coccaro</last></author>
+      <author id="noah-coccaro"><first>Noah</first><last>Coccaro</last></author>
       <author><first>Eunjoon</first><last>Cho</last></author>
       <author><first>Sujeet</first><last>Bhandari</last></author>
       <author><first>Libin</first><last>Jia</last></author>
@@ -1911,7 +1911,7 @@
     </paper>
     <paper id="217">
       <title><fixed-case>D</fixed-case>e<fixed-case>M</fixed-case>odify: A Dataset for Analyzing Contextual Constraints on Modifier Deletion</title>
-      <author><first>Vivi</first><last>Nastase</last></author>
+      <author id="vivi-nastase"><first>Vivi</first><last>Nastase</last></author>
       <author><first>Devon</first><last>Fritz</last></author>
       <author><first>Anette</first><last>Frank</last></author>
       <url hash="5c87e796">L18-1217</url>
@@ -1934,14 +1934,14 @@
     <paper id="220">
       <title><fixed-case>SPADE</fixed-case>: Evaluation Dataset for Monolingual Phrase Alignment</title>
       <author><first>Yuki</first><last>Arase</last></author>
-      <author><first>Junichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Junichi</first><last>Tsujii</last></author>
       <url hash="a5501306">L18-1220</url>
       <bibkey>arase-tsujii-2018-spade</bibkey>
     </paper>
     <paper id="221">
       <title><fixed-case>ETPC</fixed-case> - A Paraphrase Identification Corpus Annotated with Extended Paraphrase Typology and Negation</title>
       <author><first>Venelin</first><last>Kovatchev</last></author>
-      <author><first>M. Antònia</first><last>Martí</last></author>
+      <author id="m-antonia-marti"><first>M. Antònia</first><last>Martí</last></author>
       <author><first>Maria</first><last>Salamó</last></author>
       <url hash="5c0fd7cd">L18-1221</url>
       <bibkey>kovatchev-etal-2018-etpc</bibkey>
@@ -1966,7 +1966,7 @@
     <paper id="224">
       <title>Quantifying Qualitative Data for Understanding Controversial Issues</title>
       <author><first>Michael</first><last>Wojatzki</last></author>
-      <author><first>Saif</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
       <author><first>Torsten</first><last>Zesch</last></author>
       <author><first>Svetlana</first><last>Kiritchenko</last></author>
       <url hash="a6a668df">L18-1224</url>
@@ -1991,17 +1991,17 @@
     </paper>
     <paper id="227">
       <title>Creating a Verb Synonym Lexicon Based on a Parallel Corpus</title>
-      <author><first>Zdeňka</first><last>Urešová</last></author>
-      <author><first>Eva</first><last>Fučíková</last></author>
-      <author><first>Eva</first><last>Hajičová</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
+      <author id="zdenka-uresova"><first>Zdeňka</first><last>Urešová</last></author>
+      <author id="eva-fucikova"><first>Eva</first><last>Fučíková</last></author>
+      <author id="eva-hajicova"><first>Eva</first><last>Hajičová</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
       <url hash="04ccd8fb">L18-1227</url>
       <bibkey>uresova-etal-2018-creating</bibkey>
     </paper>
     <paper id="228">
       <title>Evaluation of Domain-specific Word Embeddings using Knowledge Resources</title>
       <author><first>Farhad</first><last>Nooralahzadeh</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <author><first>Jan Tore</first><last>Lønning</last></author>
       <url hash="a7efb641">L18-1228</url>
       <bibkey>nooralahzadeh-etal-2018-evaluation</bibkey>
@@ -2017,17 +2017,17 @@
     <paper id="230">
       <title>Automatic <fixed-case>W</fixed-case>ordnet Mapping: from <fixed-case>C</fixed-case>ore<fixed-case>N</fixed-case>et to <fixed-case>P</fixed-case>rinceton <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et</title>
       <author><first>Jiseong</first><last>Kim</last></author>
-      <author><first>Younggyun</first><last>Hahm</last></author>
+      <author id="younggyun-hahm"><first>Younggyun</first><last>Hahm</last></author>
       <author><first>Sunggoo</first><last>Kwon</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <url hash="3983e7dd">L18-1230</url>
       <bibkey>kim-etal-2018-automatic</bibkey>
     </paper>
     <paper id="231">
       <title>The New <fixed-case>P</fixed-case>ropbank: Aligning <fixed-case>P</fixed-case>ropbank with <fixed-case>AMR</fixed-case> through <fixed-case>POS</fixed-case> Unification</title>
       <author><first>Tim</first><last>O’Gorman</last></author>
-      <author><first>Sameer</first><last>Pradhan</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="sameer-pradhan"><first>Sameer</first><last>Pradhan</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Julia</first><last>Bonn</last></author>
       <author><first>Katie</first><last>Conger</last></author>
       <author><first>James</first><last>Gung</last></author>
@@ -2045,8 +2045,8 @@
     <paper id="233">
       <title>The <fixed-case>F</fixed-case>rench-<fixed-case>A</fixed-case>lgerian Code-Switching Triggered audio corpus (<fixed-case>FACST</fixed-case>)</title>
       <author><first>Amazouz</first><last>Djegdjiga</last></author>
-      <author><first>Martine</first><last>Adda-Decker</last></author>
-      <author><first>Lori</first><last>Lamel</last></author>
+      <author id="martine-adda-decker"><first>Martine</first><last>Adda-Decker</last></author>
+      <author id="lori-lamel"><first>Lori</first><last>Lamel</last></author>
       <url hash="85181341">L18-1233</url>
       <bibkey>djegdjiga-etal-2018-french</bibkey>
     </paper>
@@ -2082,7 +2082,7 @@
       <title>Evaluating the <fixed-case>W</fixed-case>ords<fixed-case>E</fixed-case>ye Text-to-Scene System: Imaginative and Realistic Sentences</title>
       <author><first>Morgan</first><last>Ulinski</last></author>
       <author><first>Bob</first><last>Coyne</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
       <url hash="9e9c4b3b">L18-1237</url>
       <bibkey>ulinski-etal-2018-evaluating</bibkey>
     </paper>
@@ -2112,7 +2112,7 @@
     <paper id="241">
       <title><fixed-case>C</fixed-case>-<fixed-case>HTS</fixed-case>: A Concept-based Hierarchical Text Segmentation approach</title>
       <author><first>Mostafa</first><last>Bayomi</last></author>
-      <author><first>Séamus</first><last>Lawless</last></author>
+      <author id="seamus-lawless"><first>Séamus</first><last>Lawless</last></author>
       <url hash="a369360a">L18-1241</url>
       <bibkey>bayomi-lawless-2018-c</bibkey>
     </paper>
@@ -2126,7 +2126,7 @@
     <paper id="243">
       <title>A Corpus of Metaphor Novelty Scores for Syntactically-Related Word Pairs</title>
       <author><first>Natalie</first><last>Parde</last></author>
-      <author><first>Rodney</first><last>Nielsen</last></author>
+      <author id="rodney-nielsen"><first>Rodney</first><last>Nielsen</last></author>
       <url hash="8cc694b9">L18-1243</url>
       <bibkey>parde-nielsen-2018-corpus</bibkey>
     </paper>
@@ -2135,8 +2135,8 @@
       <author><first>Alexander</first><last>Panchenko</last></author>
       <author><first>Dmitry</first><last>Ustalov</last></author>
       <author><first>Stefano</first><last>Faralli</last></author>
-      <author><first>Simone P.</first><last>Ponzetto</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone P.</first><last>Ponzetto</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <url hash="cf475a1d">L18-1244</url>
       <bibkey>panchenko-etal-2018-improving</bibkey>
     </paper>
@@ -2144,7 +2144,7 @@
       <title>Laying the Groundwork for Knowledge Base Population: Nine Years of Linguistic Resources for <fixed-case>TAC</fixed-case> <fixed-case>KBP</fixed-case></title>
       <author><first>Jeremy</first><last>Getman</last></author>
       <author><first>Joe</first><last>Ellis</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <author><first>Zhiyi</first><last>Song</last></author>
       <author><first>Jennifer</first><last>Tracey</last></author>
       <url hash="990931a9">L18-1245</url>
@@ -2152,7 +2152,7 @@
     </paper>
     <paper id="246">
       <title>A Dataset for Inter-Sentence Relation Extraction using Distant Supervision</title>
-      <author><first>Angrosh</first><last>Mandya</last></author>
+      <author id="angrosh-mandya"><first>Angrosh</first><last>Mandya</last></author>
       <author><first>Danushka</first><last>Bollegala</last></author>
       <author><first>Frans</first><last>Coenen</last></author>
       <author><first>Katie</first><last>Atkinson</last></author>
@@ -2164,7 +2164,7 @@
       <author><first>Jakub</first><last>Náplava</last></author>
       <author><first>Milan</first><last>Straka</last></author>
       <author><first>Pavel</first><last>Straňák</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
       <url hash="fef72d58">L18-1247</url>
       <bibkey>naplava-etal-2018-diacritics</bibkey>
     </paper>
@@ -2172,7 +2172,7 @@
       <title>Ensemble <fixed-case>R</fixed-case>omanian Dependency Parsing with Neural Networks</title>
       <author><first>Radu</first><last>Ion</last></author>
       <author><first>Elena</first><last>Irimia</last></author>
-      <author><first>Verginica</first><last>Barbu Mititelu</last></author>
+      <author id="verginica-barbu-mititelu"><first>Verginica</first><last>Barbu Mititelu</last></author>
       <url hash="f4233d04">L18-1248</url>
       <bibkey>ion-etal-2018-ensemble</bibkey>
     </paper>
@@ -2186,7 +2186,7 @@
     </paper>
     <paper id="250">
       <title>Collection of Multimodal Dialog Data and Analysis of the Result of Annotation of Users’ Interest Level</title>
-      <author><first>Masahiro</first><last>Araki</last></author>
+      <author id="masahiro-araki"><first>Masahiro</first><last>Araki</last></author>
       <author><first>Sayaka</first><last>Tomimasu</last></author>
       <author><first>Mikio</first><last>Nakano</last></author>
       <author><first>Kazunori</first><last>Komatani</last></author>
@@ -2212,7 +2212,7 @@
       <author><first>Chao-Chun</first><last>Hsu</last></author>
       <author><first>Sheng-Yeh</first><last>Chen</last></author>
       <author><first>Chuan-Chun</first><last>Kuo</last></author>
-      <author><first>Ting-Hao</first><last>Huang</last></author>
+      <author id="ting-hao-huang"><first>Ting-Hao</first><last>Huang</last></author>
       <author><first>Lun-Wei</first><last>Ku</last></author>
       <url hash="bd1f8033">L18-1252</url>
       <bibkey>hsu-etal-2018-emotionlines</bibkey>
@@ -2258,7 +2258,7 @@
     <paper id="257">
       <title>A Corpus of e<fixed-case>R</fixed-case>ulemaking User Comments for Measuring Evaluability of Arguments</title>
       <author><first>Joonsuk</first><last>Park</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <url hash="79af694f">L18-1257</url>
       <bibkey>park-cardie-2018-corpus</bibkey>
     </paper>
@@ -2275,7 +2275,7 @@
     </paper>
     <paper id="259">
       <title>Discourse Coherence Through the Lens of an Annotated Text Corpus: A Case Study</title>
-      <author><first>Eva</first><last>Hajičová</last></author>
+      <author id="eva-hajicova"><first>Eva</first><last>Hajičová</last></author>
       <author><first>Jiří</first><last>Mírovský</last></author>
       <url hash="44d4a908">L18-1259</url>
       <bibkey>hajicova-mirovsky-2018-discourse</bibkey>
@@ -2302,9 +2302,9 @@
     <paper id="262">
       <title><fixed-case>BDPROTO</fixed-case>: A Database of Phonological Inventories from Ancient and Reconstructed Languages</title>
       <author><first>Egidio</first><last>Marsico</last></author>
-      <author><first>Sebastien</first><last>Flavier</last></author>
+      <author id="sebastien-flavier"><first>Sebastien</first><last>Flavier</last></author>
       <author><first>Annemarie</first><last>Verkerk</last></author>
-      <author><first>Steven</first><last>Moran</last></author>
+      <author id="steven-moran"><first>Steven</first><last>Moran</last></author>
       <url hash="a63990d8">L18-1262</url>
       <bibkey>marsico-etal-2018-bdproto</bibkey>
     </paper>
@@ -2320,7 +2320,7 @@
       <title>Building a Word Segmenter for <fixed-case>S</fixed-case>anskrit Overnight</title>
       <author><first>Vikas</first><last>Reddy</last></author>
       <author><first>Amrith</first><last>Krishna</last></author>
-      <author><first>Vishnu</first><last>Sharma</last></author>
+      <author id="vishnu-dutt-sharma"><first>Vishnu</first><last>Sharma</last></author>
       <author><first>Prateek</first><last>Gupta</last></author>
       <author><first>Vineeth</first><last>M R</last></author>
       <author><first>Pawan</first><last>Goyal</last></author>
@@ -2332,19 +2332,19 @@
       <author><first>Kira</first><last>Griffitt</last></author>
       <author><first>Jennifer</first><last>Tracey</last></author>
       <author><first>Ann</first><last>Bies</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <url hash="9aa4a442">L18-1265</url>
       <bibkey>griffitt-etal-2018-simple</bibkey>
     </paper>
     <paper id="266">
       <title><fixed-case>A</fixed-case>bstract <fixed-case>M</fixed-case>eaning <fixed-case>R</fixed-case>epresentation of Constructions: The More We Include, the Better the Representation</title>
-      <author><first>Claire</first><last>Bonial</last></author>
+      <author id="claire-bonial"><first>Claire</first><last>Bonial</last></author>
       <author><first>Bianca</first><last>Badarau</last></author>
       <author><first>Kira</first><last>Griffitt</last></author>
       <author><first>Ulf</first><last>Hermjakob</last></author>
       <author><first>Kevin</first><last>Knight</last></author>
       <author><first>Tim</first><last>O’Gorman</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Nathan</first><last>Schneider</last></author>
       <url hash="9c7f62dc">L18-1266</url>
       <bibkey>bonial-etal-2018-abstract</bibkey>
@@ -2415,7 +2415,7 @@
     <paper id="275">
       <title><fixed-case>O</fixed-case>pen<fixed-case>S</fixed-case>ubtitles2018: Statistical Rescoring of Sentence Alignments in Large, Noisy Parallel Corpora</title>
       <author><first>Pierre</first><last>Lison</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <author><first>Milen</first><last>Kouylekov</last></author>
       <url hash="d22104b5">L18-1275</url>
       <bibkey>lison-etal-2018-opensubtitles2018</bibkey>
@@ -2433,7 +2433,7 @@
     </paper>
     <paper id="277">
       <title><fixed-case>E</fixed-case>uro<fixed-case>G</fixed-case>ames16: Evaluating Change Detection in Online Conversation</title>
-      <author><first>Cyril</first><last>Goutte</last></author>
+      <author id="cyril-goutte"><first>Cyril</first><last>Goutte</last></author>
       <author><first>Yunli</first><last>Wang</last></author>
       <author><first>Fangming</first><last>Liao</last></author>
       <author><first>Zachary</first><last>Zanussi</last></author>
@@ -2444,9 +2444,9 @@
     </paper>
     <paper id="278">
       <title>A Deep Neural Network based Approach for Entity Extraction in Code-Mixed <fixed-case>I</fixed-case>ndian Social Media Text</title>
-      <author><first>Deepak</first><last>Gupta</last></author>
+      <author id="deepak-gupta"><first>Deepak</first><last>Gupta</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <url hash="34fdf607">L18-1278</url>
       <bibkey>gupta-etal-2018-deep</bibkey>
     </paper>
@@ -2454,10 +2454,10 @@
       <title><fixed-case>P</fixed-case>o<fixed-case>STWITA</fixed-case>-<fixed-case>UD</fixed-case>: an <fixed-case>I</fixed-case>talian <fixed-case>T</fixed-case>witter Treebank in <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies</title>
       <author><first>Manuela</first><last>Sanguinetti</last></author>
       <author><first>Cristina</first><last>Bosco</last></author>
-      <author><first>Alberto</first><last>Lavelli</last></author>
-      <author><first>Alessandro</first><last>Mazzei</last></author>
+      <author id="alberto-lavelli"><first>Alberto</first><last>Lavelli</last></author>
+      <author id="alessandro-mazzei"><first>Alessandro</first><last>Mazzei</last></author>
       <author><first>Oronzo</first><last>Antonelli</last></author>
-      <author><first>Fabio</first><last>Tamburini</last></author>
+      <author id="fabio-tamburini"><first>Fabio</first><last>Tamburini</last></author>
       <url hash="c5a64c08">L18-1279</url>
       <bibkey>sanguinetti-etal-2018-postwita</bibkey>
     </paper>
@@ -2478,8 +2478,8 @@
     </paper>
     <paper id="282">
       <title>Towards an <fixed-case>ISO</fixed-case> Standard for the Annotation of Quantification</title>
-      <author><first>Harry</first><last>Bunt</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="harry-bunt"><first>Harry</first><last>Bunt</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <author><first>Kiyong</first><last>Lee</last></author>
       <url hash="c55b2135">L18-1282</url>
       <bibkey>bunt-etal-2018-towards</bibkey>
@@ -2495,7 +2495,7 @@
     <paper id="284">
       <title>A Gold Standard for Multilingual Automatic Term Extraction from Comparable Corpora: Term Structure and Translation Equivalents</title>
       <author><first>Ayla</first><last>Rigouts Terryn</last></author>
-      <author><first>Véronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Véronique</first><last>Hoste</last></author>
       <author><first>Els</first><last>Lefever</last></author>
       <url hash="56c6d351">L18-1284</url>
       <bibkey>rigouts-terryn-etal-2018-gold</bibkey>
@@ -2504,7 +2504,7 @@
       <title>Handling Big Data and Sensitive Data Using <fixed-case>EUDAT</fixed-case>’s Generic Execution Framework and the <fixed-case>W</fixed-case>eb<fixed-case>L</fixed-case>icht Workflow Engine.</title>
       <author><first>Claus</first><last>Zinn</last></author>
       <author><first>Wei</first><last>Qui</last></author>
-      <author><first>Marie</first><last>Hinrichs</last></author>
+      <author id="marie-hinrichs"><first>Marie</first><last>Hinrichs</last></author>
       <author><first>Emanuel</first><last>Dima</last></author>
       <author><first>Alexandr</first><last>Chernov</last></author>
       <url hash="404437de">L18-1285</url>
@@ -2515,8 +2515,8 @@
       <author><first>Alexander</first><last>Panchenko</last></author>
       <author><first>Eugen</first><last>Ruppert</last></author>
       <author><first>Stefano</first><last>Faralli</last></author>
-      <author><first>Simone P.</first><last>Ponzetto</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone P.</first><last>Ponzetto</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <url hash="47a41a80">L18-1286</url>
       <bibkey>panchenko-etal-2018-building</bibkey>
     </paper>
@@ -2528,7 +2528,7 @@
       <author><first>Yusuke</first><last>Miyao</last></author>
       <author><first>Sumire</first><last>Uematsu</last></author>
       <author><first>Shinsuke</first><last>Mori</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <author><first>Mai</first><last>Omura</last></author>
       <author><first>Yugo</first><last>Murawaki</last></author>
       <url hash="f319e810">L18-1287</url>
@@ -2552,7 +2552,7 @@
     <paper id="290">
       <title>Parse Me if You Can: Artificial Treebanks for Parsing Experiments on Elliptical Constructions</title>
       <author><first>Kira</first><last>Droganova</last></author>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <author><first>Jenna</first><last>Kanerva</last></author>
       <author><first>Filip</first><last>Ginter</last></author>
       <url hash="68c86849">L18-1290</url>
@@ -2562,13 +2562,13 @@
       <title>Semi-Automatic Construction of Word-Formation Networks (for <fixed-case>P</fixed-case>olish and <fixed-case>S</fixed-case>panish)</title>
       <author><first>Mateusz</first><last>Lango</last></author>
       <author><first>Magda</first><last>Ševčíková</last></author>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
       <url hash="79f42dea">L18-1291</url>
       <bibkey>lango-etal-2018-semi</bibkey>
     </paper>
     <paper id="292">
       <title>A multilingual collection of <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>-<fixed-case>U</fixed-case>-compatible morphological lexicons</title>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <url hash="2fea7640">L18-1292</url>
       <bibkey>sagot-2018-multilingual</bibkey>
     </paper>
@@ -2581,11 +2581,11 @@
       <author><first>Ekaterina</first><last>Vylomova</last></author>
       <author><first>Patrick</first><last>Xia</last></author>
       <author><first>Manaal</first><last>Faruqui</last></author>
-      <author><first>Sabrina J.</first><last>Mielke</last></author>
-      <author><first>Arya</first><last>McCarthy</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sabrina-j-mielke"><first>Sabrina J.</first><last>Mielke</last></author>
+      <author id="arya-d-mccarthy"><first>Arya</first><last>McCarthy</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <author><first>David</first><last>Yarowsky</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <author><first>Mans</first><last>Hulden</last></author>
       <url hash="eac1f7d7">L18-1293</url>
       <revision id="1" href="L18-1293v1" hash="8fb77de0"/>
@@ -2596,7 +2596,7 @@
       <title>A Computational Architecture for the Morphology of <fixed-case>U</fixed-case>pper <fixed-case>T</fixed-case>anana</title>
       <author><first>Olga</first><last>Lovick</last></author>
       <author><first>Christopher</first><last>Cox</last></author>
-      <author><first>Miikka</first><last>Silfverberg</last></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last></author>
       <author><first>Antti</first><last>Arppe</last></author>
       <author><first>Mans</first><last>Hulden</last></author>
       <url hash="6516926a">L18-1294</url>
@@ -2620,13 +2620,13 @@
       <title>Measuring Innovation in Speech and Language Processing Publications.</title>
       <author><first>Joseph</first><last>Mariani</last></author>
       <author><first>Gil</first><last>Francopoulo</last></author>
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <url hash="6b827057">L18-1297</url>
       <bibkey>mariani-etal-2018-measuring</bibkey>
     </paper>
     <paper id="298">
       <title><fixed-case>PDF</fixed-case>digest: an Adaptable Layout-Aware <fixed-case>PDF</fixed-case>-to-<fixed-case>XML</fixed-case> Textual Content Extractor for Scientific Articles</title>
-      <author><first>Daniel</first><last>Ferrés</last></author>
+      <author id="daniel-ferres"><first>Daniel</first><last>Ferrés</last></author>
       <author><first>Horacio</first><last>Saggion</last></author>
       <author><first>Francesco</first><last>Ronzano</last></author>
       <author><first>Àlex</first><last>Bravo</last></author>
@@ -2669,7 +2669,7 @@
       <author><first>Xiaomin</first><last>Chu</last></author>
       <author><first>Feng</first><last>Jiang</last></author>
       <author><first>Sheng</first><last>Xu</last></author>
-      <author><first>Qiaoming</first><last>Zhu</last></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last></author>
       <url hash="833d3091">L18-1302</url>
       <bibkey>chu-etal-2018-building</bibkey>
     </paper>
@@ -2733,14 +2733,14 @@
     <paper id="310">
       <title>Extending the gold standard for a lexical substitution task: is it worth it?</title>
       <author><first>Ludovic</first><last>Tanguy</last></author>
-      <author><first>Cécile</first><last>Fabre</last></author>
+      <author id="cecile-fabre"><first>Cécile</first><last>Fabre</last></author>
       <author><first>Laura</first><last>Rivière</last></author>
       <url hash="d93edebc">L18-1310</url>
       <bibkey>tanguy-etal-2018-extending</bibkey>
     </paper>
     <paper id="311">
       <title>Lexical and Semantic Features for Cross-lingual Text Reuse Classification: an Experiment in <fixed-case>E</fixed-case>nglish and <fixed-case>L</fixed-case>atin Paraphrases</title>
-      <author><first>Maria</first><last>Moritz</last></author>
+      <author id="maria-berger"><first>Maria</first><last>Moritz</last></author>
       <author><first>David</first><last>Steding</last></author>
       <url hash="1605ef4e">L18-1311</url>
       <bibkey>moritz-steding-2018-lexical</bibkey>
@@ -2757,7 +2757,7 @@
       <author><first>Zsanett</first><last>Ferenczi</last></author>
       <author><first>Iván</first><last>Mittelholcz</last></author>
       <author><first>Eszter</first><last>Simon</last></author>
-      <author><first>Tamás</first><last>Váradi</last></author>
+      <author id="tamas-varadi"><first>Tamás</first><last>Váradi</last></author>
       <url hash="a1a95b45">L18-1313</url>
       <bibkey>ferenczi-etal-2018-evaluation</bibkey>
     </paper>
@@ -2785,7 +2785,7 @@
       <author><first>Gengyu</first><last>Wang</last></author>
       <author><first>Seungtaek</first><last>Choi</last></author>
       <author><first>Hyunsouk</first><last>Cho</last></author>
-      <author><first>Reinald</first><last>Kim Amplayo</last></author>
+      <author id="reinald-kim-amplayo"><first>Reinald</first><last>Kim Amplayo</last></author>
       <author><first>Seung-won</first><last>Hwang</last></author>
       <url hash="c40278eb">L18-1316</url>
       <bibkey>yeo-etal-2018-visual</bibkey>
@@ -2793,8 +2793,8 @@
     <paper id="317">
       <title>Is it worth it? Budget-related evaluation metrics for model selection</title>
       <author><first>Filip</first><last>Klubička</last></author>
-      <author><first>Giancarlo D.</first><last>Salton</last></author>
-      <author><first>John D.</first><last>Kelleher</last></author>
+      <author id="giancarlo-salton"><first>Giancarlo D.</first><last>Salton</last></author>
+      <author id="john-kelleher"><first>John D.</first><last>Kelleher</last></author>
       <url hash="2312e484">L18-1317</url>
       <bibkey>klubicka-etal-2018-worth</bibkey>
     </paper>
@@ -2802,7 +2802,7 @@
       <title>Automated Evaluation of Out-of-Context Errors</title>
       <author><first>Patrick</first><last>Huber</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <url hash="4ee2a8f6">L18-1318</url>
       <bibkey>huber-etal-2018-automated</bibkey>
     </paper>
@@ -2828,29 +2828,29 @@
       <title><fixed-case>MI</fixed-case>s<fixed-case>A</fixed-case>: Multilingual “<fixed-case>I</fixed-case>s<fixed-case>A</fixed-case>” Extraction from Corpora</title>
       <author><first>Stefano</first><last>Faralli</last></author>
       <author><first>Els</first><last>Lefever</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <url hash="e084d0aa">L18-1321</url>
       <bibkey>faralli-etal-2018-misa</bibkey>
     </paper>
     <paper id="322">
       <title>Biomedical term normalization of <fixed-case>EHR</fixed-case>s with <fixed-case>UMLS</fixed-case></title>
-      <author><first>Naiara</first><last>Perez-Miguel</last></author>
+      <author id="naiara-perez"><first>Naiara</first><last>Perez-Miguel</last></author>
       <author><first>Montse</first><last>Cuadros</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <url hash="6288f127">L18-1322</url>
       <bibkey>perez-miguel-etal-2018-biomedical</bibkey>
     </paper>
     <paper id="323">
       <title>Revisiting the Task of Scoring Open <fixed-case>IE</fixed-case> Relations</title>
       <author><first>William</first><last>Léchelle</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <url hash="5ef7f226">L18-1323</url>
       <bibkey>lechelle-langlais-2018-revisiting</bibkey>
     </paper>
     <paper id="324">
       <title>A supervised approach to taxonomy extraction using word embeddings</title>
       <author><first>Rajdeep</first><last>Sarkar</last></author>
-      <author><first>John P.</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></author>
       <author><first>Paul</first><last>Buitelaar</last></author>
       <url hash="567ac4f8">L18-1324</url>
       <bibkey>sarkar-etal-2018-supervised</bibkey>
@@ -2872,7 +2872,7 @@
     </paper>
     <paper id="327">
       <title>Mining Biomedical Publications With The <fixed-case>LAPPS</fixed-case> <fixed-case>G</fixed-case>rid</title>
-      <author><first>Nancy</first><last>Ide</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
       <author><first>Keith</first><last>Suderman</last></author>
       <author><first>Jin-Dong</first><last>Kim</last></author>
       <url hash="6440b3e3">L18-1327</url>
@@ -2881,15 +2881,15 @@
     <paper id="328">
       <title>An Initial Test Collection for Ranked Retrieval of <fixed-case>SMS</fixed-case> Conversations</title>
       <author><first>Rashmi</first><last>Sankepally</last></author>
-      <author><first>Douglas W.</first><last>Oard</last></author>
+      <author id="douglas-w-oard"><first>Douglas W.</first><last>Oard</last></author>
       <url hash="83a37a93">L18-1328</url>
       <bibkey>sankepally-oard-2018-initial</bibkey>
     </paper>
     <paper id="329">
       <title><fixed-case>F</fixed-case>r<fixed-case>N</fixed-case>ews<fixed-case>L</fixed-case>ink : a corpus linking <fixed-case>TV</fixed-case> Broadcast News Segments and Press Articles</title>
       <author><first>Nathalie</first><last>Camelin</last></author>
-      <author><first>Géraldine</first><last>Damnati</last></author>
-      <author><first>Abdessalam</first><last>Bouchekif</last></author>
+      <author id="geraldine-damnati"><first>Géraldine</first><last>Damnati</last></author>
+      <author id="abdessalam-bouchekif"><first>Abdessalam</first><last>Bouchekif</last></author>
       <author><first>Anais</first><last>Landeau</last></author>
       <author><first>Delphine</first><last>Charlet</last></author>
       <author><first>Yannick</first><last>Estève</last></author>
@@ -2909,7 +2909,7 @@
       <author><first>Lucie</first><last>Skorkovská</last></author>
       <author><first>Petr</first><last>Neduchal</last></author>
       <author><first>Pavel</first><last>Ircing</last></author>
-      <author><first>Josef V.</first><last>Psutka</last></author>
+      <author id="josef-psutka"><first>Josef V.</first><last>Psutka</last></author>
       <author><first>Marek</first><last>Hrúz</last></author>
       <author><first>Aleš</first><last>Pražák</last></author>
       <author><first>Daniel</first><last>Soutner</last></author>
@@ -2935,7 +2935,7 @@
     <paper id="334">
       <title>The Effects of Unimodal Representation Choices on Multimodal Learning</title>
       <author><first>Fernando Tadao</first><last>Ito</last></author>
-      <author><first>Helena</first><last>de Medeiros Caseli</last></author>
+      <author id="helena-de-medeiros-caseli"><first>Helena</first><last>de Medeiros Caseli</last></author>
       <author><first>Jander</first><last>Moreira</last></author>
       <url hash="6e39a7e5">L18-1334</url>
       <bibkey>ito-etal-2018-effects</bibkey>
@@ -2943,7 +2943,7 @@
     <paper id="335">
       <title>An Evaluation Framework for Multimodal Interaction</title>
       <author><first>Nikhil</first><last>Krishnaswamy</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <url hash="3a53ccae">L18-1335</url>
       <bibkey>krishnaswamy-pustejovsky-2018-evaluation</bibkey>
     </paper>
@@ -2952,7 +2952,7 @@
       <author><first>Ahmed</first><last>Abdelali</last></author>
       <author><first>Irina</first><last>Temnikova</last></author>
       <author><first>Samy</first><last>Hedaya</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <url hash="32fde2a9">L18-1336</url>
       <bibkey>abdelali-etal-2018-waw</bibkey>
     </paper>
@@ -2977,7 +2977,7 @@
       <author><first>Cédric</first><last>Fayet</last></author>
       <author><first>Arnaud</first><last>Delhay</last></author>
       <author><first>Damien</first><last>Lolive</last></author>
-      <author><first>Pierre-François</first><last>Marteau</last></author>
+      <author id="pierre-francois-marteau"><first>Pierre-François</first><last>Marteau</last></author>
       <url hash="ffd72dc4">L18-1339</url>
       <bibkey>fayet-etal-2018-emo</bibkey>
     </paper>
@@ -3038,13 +3038,13 @@
       <author><first>Jayeol</first><last>Chun</last></author>
       <author><first>Na-Rae</first><last>Han</last></author>
       <author><first>Jena D.</first><last>Hwang</last></author>
-      <author><first>Jinho D.</first><last>Choi</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
       <url hash="ab9e0b85">L18-1347</url>
       <bibkey>chun-etal-2018-building</bibkey>
     </paper>
     <paper id="348">
       <title>Moving <fixed-case>TIGER</fixed-case> beyond Sentence-Level</title>
-      <author><first>Agnieszka</first><last>Falenska</last></author>
+      <author id="agnieszka-falenska"><first>Agnieszka</first><last>Falenska</last></author>
       <author><first>Kerstin</first><last>Eckart</last></author>
       <author><first>Jonas</first><last>Kuhn</last></author>
       <url hash="d161590a">L18-1348</url>
@@ -3059,9 +3059,9 @@
     </paper>
     <paper id="350">
       <title><fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies for <fixed-case>A</fixed-case>mharic</title>
-      <author><first>Binyam Ephrem</first><last>Seyoum</last></author>
+      <author id="binyam-ephrem-seyoum"><first>Binyam Ephrem</first><last>Seyoum</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
-      <author><first>Baye Yimam</first><last>Mekonnen</last></author>
+      <author id="baye-yimam-mekonnen"><first>Baye Yimam</first><last>Mekonnen</last></author>
       <url hash="572849df">L18-1350</url>
       <bibkey>seyoum-etal-2018-universal</bibkey>
     </paper>
@@ -3074,7 +3074,7 @@
     </paper>
     <paper id="352">
       <title>Multilingual Dependency Parsing for Low-Resource Languages: Case Studies on North Saami and <fixed-case>K</fixed-case>omi-<fixed-case>Z</fixed-case>yrian</title>
-      <author><first>KyungTae</first><last>Lim</last></author>
+      <author id="kyungtae-lim"><first>KyungTae</first><last>Lim</last></author>
       <author><first>Niko</first><last>Partanen</last></author>
       <author><first>Thierry</first><last>Poibeau</last></author>
       <url hash="e263267e">L18-1352</url>
@@ -3104,7 +3104,7 @@
       <author><first>Noriko</first><last>Kawahara</last></author>
       <author><first>Miho</first><last>Sakamoto</last></author>
       <author><first>Yoshitaka</first><last>Uchida</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <url hash="ebc3badd">L18-1355</url>
       <bibkey>takaoka-etal-2018-sudachi</bibkey>
     </paper>
@@ -3115,7 +3115,7 @@
       <author><first>Yusuke</first><last>Koyanagi</last></author>
       <author><first>Noriko</first><last>Ikeda</last></author>
       <author><first>Hiroyuki</first><last>Shindo</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <url hash="ec85b4fe">L18-1356</url>
       <bibkey>tanaka-etal-2018-chemical</bibkey>
     </paper>
@@ -3145,7 +3145,7 @@
       <author><first>Jan</first><last>Odijk</last></author>
       <author><first>Alexis</first><last>Dimitriadis</last></author>
       <author><first>Martijn</first><last>van der Klis</last></author>
-      <author><first>Marjo</first><last>van Koppen</last></author>
+      <author id="marjo-van-koppen"><first>Marjo</first><last>van Koppen</last></author>
       <author><first>Meie</first><last>Otten</last></author>
       <author><first>Remco</first><last>van der Veen</last></author>
       <url hash="6f6aee6b">L18-1360</url>
@@ -3153,7 +3153,7 @@
     </paper>
     <paper id="361">
       <title><fixed-case>B</fixed-case>aby<fixed-case>C</fixed-case>loud, a Technological Platform for Parents and Researchers</title>
-      <author><first>Xuân-Nga</first><last>Cao</last></author>
+      <author id="xuan-nga-cao"><first>Xuân-Nga</first><last>Cao</last></author>
       <author><first>Cyrille</first><last>Dakhlia</last></author>
       <author><first>Patricia</first><last>Del Carmen</last></author>
       <author><first>Mohamed-Amine</first><last>Jaouani</last></author>
@@ -3173,8 +3173,8 @@
     <paper id="363">
       <title>Building a <fixed-case>TOCFL</fixed-case> Learner Corpus for <fixed-case>C</fixed-case>hinese Grammatical Error Diagnosis</title>
       <author><first>Lung-Hao</first><last>Lee</last></author>
-      <author><first>Yuen-Hsien</first><last>Tseng</last></author>
-      <author><first>Li-Ping</first><last>Chang</last></author>
+      <author id="yuen-hsien-tseng"><first>Yuen-Hsien</first><last>Tseng</last></author>
+      <author id="li-ping-chang"><first>Li-Ping</first><last>Chang</last></author>
       <url hash="af06a1ea">L18-1363</url>
       <bibkey>lee-etal-2018-building</bibkey>
     </paper>
@@ -3203,9 +3203,9 @@
     </paper>
     <paper id="367">
       <title>Developing New Linguistic Resources and Tools for the <fixed-case>G</fixed-case>alician Language</title>
-      <author><first>Rodrigo</first><last>Agerri</last></author>
-      <author><first>Xavier</first><last>Gómez Guinovart</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="rodrigo-agerri"><first>Rodrigo</first><last>Agerri</last></author>
+      <author id="xavier-gomez-guinovart"><first>Xavier</first><last>Gómez Guinovart</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <author><first>Miguel Anxo</first><last>Solla Portela</last></author>
       <url hash="bad579a7">L18-1367</url>
       <bibkey>agerri-etal-2018-developing</bibkey>
@@ -3215,7 +3215,7 @@
       <author><first>Jordan</first><last>Lachler</last></author>
       <author><first>Lene</first><last>Antonsen</last></author>
       <author><first>Trond</first><last>Trosterud</last></author>
-      <author><first>Sjur</first><last>Moshagen</last></author>
+      <author id="sjur-moshagen"><first>Sjur</first><last>Moshagen</last></author>
       <author><first>Antti</first><last>Arppe</last></author>
       <url hash="7e439954">L18-1368</url>
       <bibkey>lachler-etal-2018-modeling</bibkey>
@@ -3225,7 +3225,7 @@
       <author><first>Caitlin</first><last>Richter</last></author>
       <author><first>Matthew</first><last>Wickes</last></author>
       <author><first>Deniz</first><last>Beser</last></author>
-      <author><first>Mitch</first><last>Marcus</last></author>
+      <author id="mitch-marcus"><first>Mitch</first><last>Marcus</last></author>
       <url hash="47032bba">L18-1369</url>
       <bibkey>richter-etal-2018-low</bibkey>
     </paper>
@@ -3252,7 +3252,7 @@
       <author><first>Ralf</first><last>Grubenmann</last></author>
       <author><first>Don</first><last>Tuggener</last></author>
       <author><first>Pius</first><last>von Däniken</last></author>
-      <author><first>Jan</first><last>Deriu</last></author>
+      <author id="jan-milan-deriu"><first>Jan</first><last>Deriu</last></author>
       <author><first>Mark</first><last>Cieliebak</last></author>
       <url hash="e8d564c4">L18-1372</url>
       <bibkey>grubenmann-etal-2018-sb</bibkey>
@@ -3260,7 +3260,7 @@
     <paper id="373">
       <title><fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies for <fixed-case>A</fixed-case>inu</title>
       <author><first>Hajime</first><last>Senuma</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <url hash="3222a806">L18-1373</url>
       <bibkey>senuma-aizawa-2018-universal</bibkey>
     </paper>
@@ -3297,16 +3297,16 @@
     </paper>
     <paper id="377">
       <title>Evaluating <fixed-case>E</fixed-case>co<fixed-case>L</fixed-case>exi<fixed-case>CAT</fixed-case>: a Terminology-Enhanced <fixed-case>CAT</fixed-case> Tool</title>
-      <author><first>Pilar</first><last>León-Araúz</last></author>
+      <author id="pilar-leon-arauz"><first>Pilar</first><last>León-Araúz</last></author>
       <author><first>Arianne</first><last>Reimerink</last></author>
       <url hash="7b7a07f9">L18-1377</url>
       <bibkey>leon-arauz-reimerink-2018-evaluating</bibkey>
     </paper>
     <paper id="378">
       <title>A <fixed-case>D</fixed-case>anish <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et Lexicon and an Annotated Corpus Used for Training and Evaluating a Semantic Frame Classifier</title>
-      <author><first>Bolette</first><last>Pedersen</last></author>
+      <author id="bolette-sandford-pedersen"><first>Bolette</first><last>Pedersen</last></author>
       <author><first>Sanni</first><last>Nimb</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <author><first>Mareike</first><last>Hartmann</last></author>
       <author><first>Sussi</first><last>Olsen</last></author>
       <url hash="16ffc7af">L18-1378</url>
@@ -3324,7 +3324,7 @@
     <paper id="380">
       <title><fixed-case>P</fixed-case>ronounc<fixed-case>UR</fixed-case>: An <fixed-case>U</fixed-case>rdu Pronunciation Lexicon Generator</title>
       <author><first>Haris</first><last>Bin Zia</last></author>
-      <author><first>Agha Ali</first><last>Raza</last></author>
+      <author id="agha-ali-raza"><first>Agha Ali</first><last>Raza</last></author>
       <author><first>Awais</first><last>Athar</last></author>
       <url hash="49cd90b2">L18-1380</url>
       <bibkey>bin-zia-etal-2018-pronouncur</bibkey>
@@ -3332,22 +3332,22 @@
     <paper id="381">
       <title><fixed-case>S</fixed-case>im<fixed-case>L</fixed-case>ex-999 for <fixed-case>P</fixed-case>olish</title>
       <author><first>Agnieszka</first><last>Mykowiecka</last></author>
-      <author><first>Małgorzata</first><last>Marciniak</last></author>
+      <author id="malgorzata-marciniak"><first>Małgorzata</first><last>Marciniak</last></author>
       <author><first>Piotr</first><last>Rychlik</last></author>
       <url hash="8800d785">L18-1381</url>
       <bibkey>mykowiecka-etal-2018-simlex</bibkey>
     </paper>
     <paper id="382">
       <title>Finely Tuned, 2 Billion Token Based Word Embeddings for <fixed-case>P</fixed-case>ortuguese</title>
-      <author><first>João</first><last>Rodrigues</last></author>
-      <author><first>António</first><last>Branco</last></author>
+      <author id="joao-rodrigues"><first>João</first><last>Rodrigues</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
       <url hash="e9a53850">L18-1382</url>
       <bibkey>rodrigues-branco-2018-finely</bibkey>
     </paper>
     <paper id="383">
       <title><fixed-case>T</fixed-case>eanga: A Linked Data based platform for Natural Language Processing</title>
       <author><first>Housam</first><last>Ziad</last></author>
-      <author><first>John P.</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></author>
       <author><first>Paul</first><last>Buitelaar</last></author>
       <url hash="3391eaf3">L18-1383</url>
       <bibkey>ziad-etal-2018-teanga</bibkey>
@@ -3355,7 +3355,7 @@
     <paper id="384">
       <title>Automatic and Manual Web Annotations in an Infrastructure to handle Fake News and other Online Media Phenomena</title>
       <author><first>Georg</first><last>Rehm</last></author>
-      <author><first>Julian</first><last>Moreno-Schneider</last></author>
+      <author id="julian-moreno-schneider"><first>Julian</first><last>Moreno-Schneider</last></author>
       <author><first>Peter</first><last>Bourgonje</last></author>
       <url hash="0565925d">L18-1384</url>
       <bibkey>rehm-etal-2018-automatic</bibkey>
@@ -3389,7 +3389,7 @@
     </paper>
     <paper id="388">
       <title>A Bird’s-eye View of Language Processing Projects at the <fixed-case>R</fixed-case>omanian Academy</title>
-      <author><first>Dan</first><last>Tufiș</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufiș</last></author>
       <author><first>Dan</first><last>Cristea</last></author>
       <url hash="bb58e8d5">L18-1388</url>
       <bibkey>tufis-cristea-2018-birds</bibkey>
@@ -3411,8 +3411,8 @@
     </paper>
     <paper id="391">
       <title>Collecting Language Resources from Public Administrations in the <fixed-case>N</fixed-case>ordic and <fixed-case>B</fixed-case>altic Countries</title>
-      <author><first>Andrejs</first><last>Vasiļjevs</last></author>
-      <author><first>Rihards</first><last>Kalniņš</last></author>
+      <author id="andrejs-vasiljevs"><first>Andrejs</first><last>Vasiļjevs</last></author>
+      <author id="rihards-kalnins"><first>Rihards</first><last>Kalniņš</last></author>
       <author><first>Roberts</first><last>Rozis</last></author>
       <author><first>Aivars</first><last>Bērziņš</last></author>
       <url hash="b65c1a78">L18-1391</url>
@@ -3421,7 +3421,7 @@
     <paper id="392">
       <title><fixed-case>LI</fixed-case>dioms: A Multilingual Linked Idioms Data Set</title>
       <author><first>Diego</first><last>Moussallem</last></author>
-      <author><first>Mohamed Ahmed</first><last>Sherif</last></author>
+      <author id="mohamed-ahmed-sherif"><first>Mohamed Ahmed</first><last>Sherif</last></author>
       <author><first>Diego</first><last>Esteves</last></author>
       <author><first>Marcos</first><last>Zampieri</last></author>
       <author><first>Axel-Cyrille</first><last>Ngonga Ngomo</last></author>
@@ -3440,7 +3440,7 @@
     <paper id="394">
       <title>Annotating <fixed-case>C</fixed-case>hinese Light Verb Constructions according to <fixed-case>PARSEME</fixed-case> guidelines</title>
       <author><first>Menghan</first><last>Jiang</last></author>
-      <author><first>Natalia</first><last>Klyueva</last></author>
+      <author id="natalia-klyueva"><first>Natalia</first><last>Klyueva</last></author>
       <author><first>Hongzhi</first><last>Xu</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <url hash="12ce9fcc">L18-1394</url>
@@ -3450,7 +3450,7 @@
       <title>Using <fixed-case>E</fixed-case>nglish Baits to Catch <fixed-case>S</fixed-case>erbian Multi-Word Terminology</title>
       <author><first>Cvetana</first><last>Krstev</last></author>
       <author><first>Branislava</first><last>Šandrih</last></author>
-      <author><first>Ranka</first><last>Stanković</last></author>
+      <author id="ranka-stankovic"><first>Ranka</first><last>Stanković</last></author>
       <author><first>Miljana</first><last>Mladenović</last></author>
       <url hash="d5ccf267">L18-1395</url>
       <bibkey>krstev-etal-2018-using</bibkey>
@@ -3459,29 +3459,29 @@
       <title>Construction of Large-scale <fixed-case>E</fixed-case>nglish Verbal Multiword Expression Annotated Corpus</title>
       <author><first>Akihiko</first><last>Kato</last></author>
       <author><first>Hiroyuki</first><last>Shindo</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <url hash="81d9a550">L18-1396</url>
       <bibkey>kato-etal-2018-construction</bibkey>
     </paper>
     <paper id="397">
       <title><fixed-case>K</fixed-case>onbitzul: an <fixed-case>MWE</fixed-case>-specific database for <fixed-case>S</fixed-case>panish-<fixed-case>B</fixed-case>asque</title>
       <author><first>Uxoa</first><last>Iñurrieta</last></author>
-      <author><first>Itziar</first><last>Aduriz</last></author>
-      <author><first>Arantza</first><last>Díaz de Ilarraza</last></author>
-      <author><first>Gorka</first><last>Labaka</last></author>
-      <author><first>Kepa</first><last>Sarasola</last></author>
+      <author id="itziar-aduriz"><first>Itziar</first><last>Aduriz</last></author>
+      <author id="arantza-diaz-de-ilarraza"><first>Arantza</first><last>Díaz de Ilarraza</last></author>
+      <author id="gorka-labaka"><first>Gorka</first><last>Labaka</last></author>
+      <author id="kepa-sarasola"><first>Kepa</first><last>Sarasola</last></author>
       <url hash="56ede97d">L18-1397</url>
       <bibkey>inurrieta-etal-2018-konbitzul</bibkey>
     </paper>
     <paper id="398">
       <title>A Multilingual Test Collection for the Semantic Search of Entity Categories</title>
-      <author><first>Juliano Efson</first><last>Sales</last></author>
+      <author id="juliano-efson-sales"><first>Juliano Efson</first><last>Sales</last></author>
       <author><first>Siamak</first><last>Barzegar</last></author>
       <author><first>Wellington</first><last>Franco</last></author>
       <author><first>Bernhard</first><last>Bermeitinger</last></author>
       <author><first>Tiago</first><last>Cunha</last></author>
       <author><first>Brian</first><last>Davis</last></author>
-      <author><first>André</first><last>Freitas</last></author>
+      <author id="andre-freitas"><first>André</first><last>Freitas</last></author>
       <author><first>Siegfried</first><last>Handschuh</last></author>
       <url hash="4e6aa474">L18-1398</url>
       <bibkey>sales-etal-2018-multilingual</bibkey>
@@ -3489,7 +3489,7 @@
     <paper id="399">
       <title>Towards the Inference of Semantic Relations in Complex Nominals: a Pilot Study</title>
       <author><first>Melania</first><last>Cabezas-García</last></author>
-      <author><first>Pilar</first><last>León-Araúz</last></author>
+      <author id="pilar-leon-arauz"><first>Pilar</first><last>León-Araúz</last></author>
       <url hash="17c83359">L18-1399</url>
       <bibkey>cabezas-garcia-leon-arauz-2018-towards</bibkey>
     </paper>
@@ -3503,8 +3503,8 @@
     </paper>
     <paper id="401">
       <title>Improving a Neural-based Tagger for Multiword Expressions Identification</title>
-      <author><first>Dušan</first><last>Variš</last></author>
-      <author><first>Natalia</first><last>Klyueva</last></author>
+      <author id="dusan-varis"><first>Dušan</first><last>Variš</last></author>
+      <author id="natalia-klyueva"><first>Natalia</first><last>Klyueva</last></author>
       <url hash="b112634a">L18-1401</url>
       <bibkey>varis-klyueva-2018-improving</bibkey>
     </paper>
@@ -3526,7 +3526,7 @@
     <paper id="404">
       <title>Improving Hate Speech Detection with Deep Learning Ensembles</title>
       <author><first>Steven</first><last>Zimmerman</last></author>
-      <author><first>Udo</first><last>Kruschwitz</last></author>
+      <author id="udo-kruschwitz"><first>Udo</first><last>Kruschwitz</last></author>
       <author><first>Chris</first><last>Fox</last></author>
       <url hash="d975e62e">L18-1404</url>
       <bibkey>zimmerman-etal-2018-improving</bibkey>
@@ -3540,7 +3540,7 @@
     </paper>
     <paper id="406">
       <title>Can Domain Adaptation be Handled as Analogies?</title>
-      <author><first>Núria</first><last>Bel</last></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last></author>
       <author><first>Joel</first><last>Pocostales</last></author>
       <url hash="10cf6cc0">L18-1406</url>
       <bibkey>bel-pocostales-2018-domain</bibkey>
@@ -3549,13 +3549,13 @@
       <title>Author Profiling from <fixed-case>F</fixed-case>acebook Corpora</title>
       <author><first>Fernando</first><last>Hsieh</last></author>
       <author><first>Rafael</first><last>Dias</last></author>
-      <author><first>Ivandré</first><last>Paraboni</last></author>
+      <author id="ivandre-paraboni"><first>Ivandré</first><last>Paraboni</last></author>
       <url hash="c1d06dce">L18-1407</url>
       <bibkey>hsieh-etal-2018-author</bibkey>
     </paper>
     <paper id="408">
       <title>Semantic Relatedness of <fixed-case>W</fixed-case>ikipedia Concepts – Benchmark Data and a Working Solution</title>
-      <author><first>Liat</first><last>Ein Dor</last></author>
+      <author id="liat-ein-dor"><first>Liat</first><last>Ein Dor</last></author>
       <author><first>Alon</first><last>Halfon</last></author>
       <author><first>Yoav</first><last>Kantor</last></author>
       <author><first>Ran</first><last>Levy</last></author>
@@ -3586,10 +3586,10 @@
     </paper>
     <paper id="411">
       <title>Finite-state morphological analysis for <fixed-case>G</fixed-case>agauz</title>
-      <author><first>Francis</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last></author>
       <author><first>Sevilay</first><last>Bayatli</last></author>
       <author><first>Güllü</first><last>Karanfil</last></author>
-      <author><first>Memduh</first><last>Gökırmak</last></author>
+      <author id="memduh-gokirmak"><first>Memduh</first><last>Gökırmak</last></author>
       <author><first>Francis M.</first><last>Tyers</last></author>
       <url hash="20c74413">L18-1411</url>
       <bibkey>tyers-etal-2018-finite</bibkey>
@@ -3604,7 +3604,7 @@
     <paper id="413">
       <title>Morphology Injection for <fixed-case>E</fixed-case>nglish-<fixed-case>M</fixed-case>alayalam Statistical Machine Translation</title>
       <author><first>Sreelekha</first><last>S</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <url hash="fba462a9">L18-1413</url>
       <bibkey>s-bhattacharyya-2018-morphology</bibkey>
     </paper>
@@ -3650,21 +3650,21 @@
     <paper id="418">
       <title><fixed-case>EMTC</fixed-case>: Multilabel Corpus in Movie Domain for Emotion Analysis in Conversational Text</title>
       <author><last>Phan</last> <first>Duc-Anh</first></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <url hash="b90f69a4">L18-1418</url>
       <bibkey>phan-matsumoto-2018-emtc</bibkey>
     </paper>
     <paper id="419">
       <title>Complex and Precise Movie and Book Annotations in <fixed-case>F</fixed-case>rench Language for Aspect Based Sentiment Analysis</title>
       <author><first>Stefania</first><last>Pecore</last></author>
-      <author><first>Jeanne</first><last>Villaneau</last></author>
+      <author id="jeanne-villaneau"><first>Jeanne</first><last>Villaneau</last></author>
       <url hash="06f5f2d1">L18-1419</url>
       <bibkey>pecore-villaneau-2018-complex</bibkey>
     </paper>
     <paper id="420">
       <title><fixed-case>L</fixed-case>ingmotif-lex: a Wide-coverage, State-of-the-art Lexicon for Sentiment Analysis</title>
-      <author><first>Antonio</first><last>Moreno-Ortiz</last></author>
-      <author><first>Chantal</first><last>Pérez-Hernández</last></author>
+      <author id="antonio-moreno-ortiz"><first>Antonio</first><last>Moreno-Ortiz</last></author>
+      <author id="chantal-perez-hernandez"><first>Chantal</first><last>Pérez-Hernández</last></author>
       <url hash="56fdf1b2">L18-1420</url>
       <bibkey>moreno-ortiz-perez-hernandez-2018-lingmotif</bibkey>
     </paper>
@@ -3688,7 +3688,7 @@
       <title>The <fixed-case>SSIX</fixed-case> Corpora: Three Gold Standard Corpora for Sentiment Analysis in <fixed-case>E</fixed-case>nglish, <fixed-case>S</fixed-case>panish and <fixed-case>G</fixed-case>erman Financial Microblogs</title>
       <author><first>Thomas</first><last>Gaillat</last></author>
       <author><first>Manel</first><last>Zarrouk</last></author>
-      <author><first>André</first><last>Freitas</last></author>
+      <author id="andre-freitas"><first>André</first><last>Freitas</last></author>
       <author><first>Brian</first><last>Davis</last></author>
       <url hash="1da74825">L18-1423</url>
       <bibkey>gaillat-etal-2018-ssix</bibkey>
@@ -3697,8 +3697,8 @@
       <title>Sarcasm Target Identification: Dataset and An Introductory Approach</title>
       <author><first>Aditya</first><last>Joshi</last></author>
       <author id="pranav-goel-umd"><first>Pranav</first><last>Goel</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
-      <author><first>Mark</first><last>Carman</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="mark-carman"><first>Mark</first><last>Carman</last></author>
       <url hash="2c6f3ab4">L18-1424</url>
       <bibkey>joshi-etal-2018-sarcasm</bibkey>
     </paper>
@@ -3725,7 +3725,7 @@
       <title><fixed-case>W</fixed-case>ord<fixed-case>K</fixed-case>it: a Python Package for Orthographic and Phonological Featurization</title>
       <author><first>Stéphan</first><last>Tulkens</last></author>
       <author><first>Dominiek</first><last>Sandra</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <url hash="631972c5">L18-1427</url>
       <bibkey>tulkens-etal-2018-wordkit</bibkey>
     </paper>
@@ -3734,13 +3734,13 @@
       <author><first>David</first><last>Lukeš</last></author>
       <author><first>Marie</first><last>Kopřivová</last></author>
       <author><first>Zuzana</first><last>Komrsková</last></author>
-      <author><first>Petra</first><last>Poukarová</last></author>
+      <author id="petra-poukarova"><first>Petra</first><last>Poukarová</last></author>
       <url hash="c3817f55">L18-1428</url>
       <bibkey>lukes-etal-2018-pronunciation</bibkey>
     </paper>
     <paper id="429">
       <title><fixed-case>E</fixed-case>pitran: Precision <fixed-case>G</fixed-case>2<fixed-case>P</fixed-case> for Many Languages</title>
-      <author><first>David R.</first><last>Mortensen</last></author>
+      <author id="david-r-mortensen"><first>David R.</first><last>Mortensen</last></author>
       <author><first>Siddharth</first><last>Dalmia</last></author>
       <author><first>Patrick</first><last>Littell</last></author>
       <url hash="9fa28414">L18-1429</url>
@@ -3750,7 +3750,7 @@
       <title>A Multilingual Approach to Question Classification</title>
       <author><first>Aikaterini-Lida</first><last>Kalouli</last></author>
       <author><first>Katharina</first><last>Kaiser</last></author>
-      <author><first>Annette</first><last>Hautli-Janisz</last></author>
+      <author id="annette-hautli"><first>Annette</first><last>Hautli-Janisz</last></author>
       <author><first>Georg A.</first><last>Kaiser</last></author>
       <author><first>Miriam</first><last>Butt</last></author>
       <url hash="4cb139b0">L18-1430</url>
@@ -3777,7 +3777,7 @@
     </paper>
     <paper id="433">
       <title><fixed-case>W</fixed-case>orld<fixed-case>T</fixed-case>ree: A Corpus of Explanation Graphs for Elementary Science Questions supporting Multi-hop Inference</title>
-      <author><first>Peter</first><last>Jansen</last></author>
+      <author id="peter-jansen"><first>Peter</first><last>Jansen</last></author>
       <author><first>Elizabeth</first><last>Wainwright</last></author>
       <author><first>Steven</first><last>Marmorstein</last></author>
       <author><first>Clayton</first><last>Morrison</last></author>
@@ -3786,7 +3786,7 @@
     </paper>
     <paper id="434">
       <title>Analysis of Implicit Conditions in Database Search Dialogues</title>
-      <author><first>Shun-ya</first><last>Fukunaga</last></author>
+      <author id="shun-ya-fukunaga"><first>Shun-ya</first><last>Fukunaga</last></author>
       <author><first>Hitoshi</first><last>Nishikawa</last></author>
       <author><first>Takenobu</first><last>Tokunaga</last></author>
       <author><first>Hikaru</first><last>Yokono</last></author>
@@ -3825,7 +3825,7 @@
       <author><first>Andrei</first><last>Dulceanu</last></author>
       <author><first>Thang</first><last>Le Dinh</last></author>
       <author><first>Walter</first><last>Chang</last></author>
-      <author><first>Trung</first><last>Bui</last></author>
+      <author id="trung-bui"><first>Trung</first><last>Bui</last></author>
       <author><first>Doo Soon</first><last>Kim</last></author>
       <author><first>Manh Chien</first><last>Vu</last></author>
       <author><first>Seokhwan</first><last>Kim</last></author>
@@ -3836,23 +3836,23 @@
       <title><fixed-case>B</fixed-case>io<fixed-case>R</fixed-case>ead: A New Dataset for Biomedical Reading Comprehension</title>
       <author><first>Dimitris</first><last>Pappas</last></author>
       <author><first>Ion</first><last>Androutsopoulos</last></author>
-      <author><first>Haris</first><last>Papageorgiou</last></author>
+      <author id="harris-papageorgiou"><first>Haris</first><last>Papageorgiou</last></author>
       <url hash="dbb90f5c">L18-1439</url>
       <bibkey>pappas-etal-2018-bioread</bibkey>
     </paper>
     <paper id="440">
       <title><fixed-case>MMQA</fixed-case>: A Multi-domain Multi-lingual Question-Answering Framework for <fixed-case>E</fixed-case>nglish and <fixed-case>H</fixed-case>indi</title>
-      <author><first>Deepak</first><last>Gupta</last></author>
+      <author id="deepak-gupta"><first>Deepak</first><last>Gupta</last></author>
       <author><first>Surabhi</first><last>Kumari</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <url hash="83d79d11">L18-1440</url>
       <bibkey>gupta-etal-2018-mmqa</bibkey>
     </paper>
     <paper id="441">
       <title>The First 100 Days: A Corpus Of Political Agendas on <fixed-case>T</fixed-case>witter</title>
       <author><first>Nathan</first><last>Green</last></author>
-      <author><first>Septina</first><last>Larasati</last></author>
+      <author id="septina-dian-larasati"><first>Septina</first><last>Larasati</last></author>
       <url hash="98ba53f2">L18-1441</url>
       <bibkey>green-larasati-2018-first</bibkey>
     </paper>
@@ -3861,7 +3861,7 @@
       <author><first>Shweta</first><last>Yadav</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
       <author><first>Sriparna</first><last>Saha</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <url hash="c81a3fd6">L18-1442</url>
       <bibkey>yadav-etal-2018-medical</bibkey>
     </paper>
@@ -3871,7 +3871,7 @@
       <author><first>Fabio</first><last>Poletto</last></author>
       <author><first>Cristina</first><last>Bosco</last></author>
       <author><first>Viviana</first><last>Patti</last></author>
-      <author><first>Marco</first><last>Stranisci</last></author>
+      <author id="marco-antonio-stranisci"><first>Marco</first><last>Stranisci</last></author>
       <url hash="7f9d358d">L18-1443</url>
       <bibkey>sanguinetti-etal-2018-italian</bibkey>
     </paper>
@@ -3879,7 +3879,7 @@
       <title>A Large Multilingual and Multi-domain Dataset for Recommender Systems</title>
       <author><first>Giorgia</first><last>Di Tommaso</last></author>
       <author><first>Stefano</first><last>Faralli</last></author>
-      <author><first>Paola</first><last>Velardi</last></author>
+      <author id="paola-velardi"><first>Paola</first><last>Velardi</last></author>
       <url hash="f72d85cb">L18-1444</url>
       <bibkey>di-tommaso-etal-2018-large</bibkey>
     </paper>
@@ -3888,7 +3888,7 @@
       <author><first>Rob</first><last>Voigt</last></author>
       <author><first>David</first><last>Jurgens</last></author>
       <author><first>Vinodkumar</first><last>Prabhakaran</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <author><first>Yulia</first><last>Tsvetkov</last></author>
       <url hash="c8f2f79f">L18-1445</url>
       <bibkey>voigt-etal-2018-rtgender</bibkey>
@@ -3903,8 +3903,8 @@
     <paper id="447">
       <title>Utilizing Large <fixed-case>T</fixed-case>witter Corpora to Create Sentiment Lexica</title>
       <author><first>Valerij</first><last>Fredriksen</last></author>
-      <author><first>Brage</first><last>Jahren</last></author>
-      <author><first>Björn</first><last>Gambäck</last></author>
+      <author id="brage-ekroll-jahren"><first>Brage</first><last>Jahren</last></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gambäck</last></author>
       <url hash="698f7398">L18-1447</url>
       <bibkey>fredriksen-etal-2018-utilizing</bibkey>
     </paper>
@@ -3968,7 +3968,7 @@
     <paper id="455">
       <title>Discovering Canonical <fixed-case>I</fixed-case>ndian <fixed-case>E</fixed-case>nglish Accents: A Crowdsourcing-based Approach</title>
       <author><first>Sunayana</first><last>Sitaram</last></author>
-      <author><first>Varun</first><last>Manjunath</last></author>
+      <author id="varun-manjunatha"><first>Varun</first><last>Manjunath</last></author>
       <author><first>Varun</first><last>Bharadwaj</last></author>
       <author><first>Monojit</first><last>Choudhury</last></author>
       <author><first>Kalika</first><last>Bali</last></author>
@@ -3983,7 +3983,7 @@
       <author><first>Tomoko</first><last>Kajiyama</last></author>
       <author><first>Shunsuke</first><last>Kozawa</last></author>
       <author><first>Kiyotaka</first><last>Uchimoto</last></author>
-      <author><first>Shuichi</first><last>Itahashi</last></author>
+      <author id="shuichi-itahashi"><first>Shuichi</first><last>Itahashi</last></author>
       <url hash="442de298">L18-1456</url>
       <bibkey>ohsuga-etal-2018-extending</bibkey>
     </paper>
@@ -3992,9 +3992,9 @@
       <author><first>Katrin</first><last>Schweitzer</last></author>
       <author><first>Kerstin</first><last>Eckart</last></author>
       <author><first>Markus</first><last>Gärtner</last></author>
-      <author><first>Agnieszka</first><last>Falenska</last></author>
+      <author id="agnieszka-falenska"><first>Agnieszka</first><last>Falenska</last></author>
       <author><first>Arndt</first><last>Riester</last></author>
-      <author><first>Ina</first><last>Rösiger</last></author>
+      <author id="ina-roesiger"><first>Ina</first><last>Rösiger</last></author>
       <author><first>Antje</first><last>Schweitzer</last></author>
       <author><first>Sabrina</first><last>Stehwien</last></author>
       <author><first>Jonas</first><last>Kuhn</last></author>
@@ -4051,7 +4051,7 @@
       <author><first>Emmanuel</first><last>Johnson</last></author>
       <author><first>Anton</first><last>Leuski</last></author>
       <author><first>Gale</first><last>Lucas</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <url hash="75a4d3da">L18-1463</url>
       <bibkey>artstein-etal-2018-niki</bibkey>
     </paper>
@@ -4082,7 +4082,7 @@
       <title>A Semi-autonomous System for Creating a Human-Machine Interaction Corpus in Virtual Reality: Application to the <fixed-case>ACORFORM</fixed-case>ed System for Training Doctors to Break Bad News</title>
       <author><first>Magalie</first><last>Ochs</last></author>
       <author><first>Philippe</first><last>Blache</last></author>
-      <author><first>Grégoire</first><last>de Montcheuil</last></author>
+      <author id="gregoire-moreau-de-montcheuil"><first>Grégoire</first><last>de Montcheuil</last></author>
       <author><first>Jean-Marie</first><last>Pergandi</last></author>
       <author><first>Jorane</first><last>Saubesty</last></author>
       <author><first>Daniel</first><last>Francon</last></author>
@@ -4095,7 +4095,7 @@
       <author><first>Sashi</first><last>Novitasari</last></author>
       <author><first>Quoc Truong</first><last>Do</last></author>
       <author><first>Sakriani</first><last>Sakti</last></author>
-      <author><first>Dessi</first><last>Lestari</last></author>
+      <author id="dessi-puji-lestari"><first>Dessi</first><last>Lestari</last></author>
       <author><first>Satoshi</first><last>Nakamura</last></author>
       <url hash="bc732929">L18-1468</url>
       <bibkey>novitasari-etal-2018-construction</bibkey>
@@ -4114,7 +4114,7 @@
     <paper id="470">
       <title><fixed-case>TF</fixed-case>-<fixed-case>LM</fixed-case>: <fixed-case>T</fixed-case>ensor<fixed-case>F</fixed-case>low-based Language Modeling Toolkit</title>
       <author><first>Lyan</first><last>Verwimp</last></author>
-      <author><first>Hugo</first><last>Van hamme</last></author>
+      <author id="hugo-van-hamme"><first>Hugo</first><last>Van hamme</last></author>
       <author><first>Patrick</first><last>Wambacq</last></author>
       <url hash="654fa48d">L18-1470</url>
       <bibkey>verwimp-etal-2018-tf</bibkey>
@@ -4145,28 +4145,28 @@
     <paper id="474">
       <title>Reference production in human-computer interaction: Issues for Corpus-based Referring Expression Generation</title>
       <author><first>Danillo</first><last>Rocha</last></author>
-      <author><first>Ivandré</first><last>Paraboni</last></author>
+      <author id="ivandre-paraboni"><first>Ivandré</first><last>Paraboni</last></author>
       <url hash="b695c47e">L18-1474</url>
       <bibkey>rocha-paraboni-2018-reference</bibkey>
     </paper>
     <paper id="475">
       <title>Definite Description Lexical Choice: taking Speaker’s Personality into account</title>
       <author><first>Alex</first><last>Lan</last></author>
-      <author><first>Ivandré</first><last>Paraboni</last></author>
+      <author id="ivandre-paraboni"><first>Ivandré</first><last>Paraboni</last></author>
       <url hash="39ed168a">L18-1475</url>
       <bibkey>lan-paraboni-2018-definite</bibkey>
     </paper>
     <paper id="476">
       <title>Referring Expression Generation in time-constrained communication</title>
-      <author><first>André</first><last>Mariotti</last></author>
-      <author><first>Ivandré</first><last>Paraboni</last></author>
+      <author id="andre-mariotti"><first>André</first><last>Mariotti</last></author>
+      <author id="ivandre-paraboni"><first>Ivandré</first><last>Paraboni</last></author>
       <url hash="cf2cd6d0">L18-1476</url>
       <bibkey>mariotti-paraboni-2018-referring</bibkey>
     </paper>
     <paper id="477">
       <title>Incorporating Semantic Attention in Video Description Generation</title>
       <author><first>Natsuda</first><last>Laokulrat</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Hideki</first><last>Nakayama</last></author>
       <url hash="4d5e1896">L18-1477</url>
       <bibkey>laokulrat-etal-2018-incorporating</bibkey>
@@ -4183,7 +4183,7 @@
     </paper>
     <paper id="479">
       <title>A Detailed Evaluation of Neural Sequence-to-Sequence Models for In-domain and Cross-domain Text Simplification</title>
-      <author><first>Sanja</first><last>Štajner</last></author>
+      <author id="sanja-stajner"><first>Sanja</first><last>Štajner</last></author>
       <author><first>Sergiu</first><last>Nisioi</last></author>
       <url hash="3ff4c392">L18-1479</url>
       <bibkey>stajner-nisioi-2018-detailed</bibkey>
@@ -4193,17 +4193,17 @@
       <author><first>Piek</first><last>Vossen</last></author>
       <author><first>Filip</first><last>Ilievski</last></author>
       <author><first>Marten</first><last>Postma</last></author>
-      <author><first>Roxane</first><last>Segers</last></author>
+      <author id="roxane-segers"><first>Roxane</first><last>Segers</last></author>
       <url hash="24a7057e">L18-1480</url>
       <bibkey>vossen-etal-2018-dont</bibkey>
     </paper>
     <paper id="481">
       <title><fixed-case>RDF</fixed-case>2<fixed-case>PT</fixed-case>: Generating <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese Texts from <fixed-case>RDF</fixed-case> Data</title>
       <author><first>Diego</first><last>Moussallem</last></author>
-      <author><first>Thiago</first><last>Ferreira</last></author>
+      <author id="thiago-castro-ferreira"><first>Thiago</first><last>Ferreira</last></author>
       <author><first>Marcos</first><last>Zampieri</last></author>
       <author><first>Maria Claudia</first><last>Cavalcanti</last></author>
-      <author><first>Geraldo</first><last>Xexéo</last></author>
+      <author id="geraldo-bonorino-xexeo"><first>Geraldo</first><last>Xexéo</last></author>
       <author><first>Mariana</first><last>Neves</last></author>
       <author><first>Axel-Cyrille</first><last>Ngonga Ngomo</last></author>
       <url hash="c4f65fd9">L18-1481</url>
@@ -4219,7 +4219,7 @@
     <paper id="483">
       <title>Up-cycling Data for Natural Language Generation</title>
       <author><first>Amy</first><last>Isard</last></author>
-      <author><first>Jon</first><last>Oberlander</last></author>
+      <author id="jon-oberlander"><first>Jon</first><last>Oberlander</last></author>
       <author><first>Claire</first><last>Grover</last></author>
       <url hash="368a91e4">L18-1483</url>
       <bibkey>isard-etal-2018-cycling</bibkey>
@@ -4242,14 +4242,14 @@
     <paper id="486">
       <title>Annotating <fixed-case>A</fixed-case>bstract <fixed-case>M</fixed-case>eaning <fixed-case>R</fixed-case>epresentations for <fixed-case>S</fixed-case>panish</title>
       <author><first>Noelia</first><last>Migueles-Abraira</last></author>
-      <author><first>Rodrigo</first><last>Agerri</last></author>
-      <author><first>Arantza</first><last>Diaz de Ilarraza</last></author>
+      <author id="rodrigo-agerri"><first>Rodrigo</first><last>Agerri</last></author>
+      <author id="arantza-diaz-de-ilarraza"><first>Arantza</first><last>Diaz de Ilarraza</last></author>
       <url hash="bcd763fd">L18-1486</url>
       <bibkey>migueles-abraira-etal-2018-annotating</bibkey>
     </paper>
     <paper id="487">
       <title>Browsing the Terminological Structure of a Specialized Domain: A Method Based on Lexical Functions and their Classification</title>
-      <author><first>Marie-Claude</first><last>L’Homme</last></author>
+      <author id="marie-claude-lhomme"><first>Marie-Claude</first><last>L’Homme</last></author>
       <author><first>Benoît</first><last>Robichaud</last></author>
       <author><first>Nathalie</first><last>Prévil</last></author>
       <url hash="7fcb3e4a">L18-1487</url>
@@ -4257,7 +4257,7 @@
     </paper>
     <paper id="488">
       <title>Rollenwechsel-<fixed-case>E</fixed-case>nglish: a large-scale semantic role corpus</title>
-      <author><first>Asad</first><last>Sayeed</last></author>
+      <author id="asad-sayeed"><first>Asad</first><last>Sayeed</last></author>
       <author><first>Pavel</first><last>Shkadzko</last></author>
       <author><first>Vera</first><last>Demberg</last></author>
       <url hash="86821e4e">L18-1488</url>
@@ -4267,8 +4267,8 @@
       <title>Towards a Standardized Dataset for Noun Compound Interpretation</title>
       <author><first>Girishkumar</first><last>Ponkiya</last></author>
       <author><first>Kevin</first><last>Patel</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
-      <author><first>Girish K</first><last>Palshikar</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="girish-palshikar"><first>Girish K</first><last>Palshikar</last></author>
       <url hash="46dab7b5">L18-1489</url>
       <bibkey>ponkiya-etal-2018-towards</bibkey>
     </paper>
@@ -4281,9 +4281,9 @@
     </paper>
     <paper id="491">
       <title><fixed-case>NL</fixed-case>2<fixed-case>B</fixed-case>ash: A Corpus and Semantic Parser for Natural Language Interface to the Linux Operating System</title>
-      <author><first>Xi Victoria</first><last>Lin</last></author>
+      <author id="xi-victoria-lin"><first>Xi Victoria</first><last>Lin</last></author>
       <author><first>Chenglong</first><last>Wang</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <author><first>Michael D.</first><last>Ernst</last></author>
       <url hash="d0fa73e1">L18-1491</url>
       <bibkey>lin-etal-2018-nl2bash</bibkey>
@@ -4293,14 +4293,14 @@
       <author><first>Charles</first><last>Welch</last></author>
       <author><first>Jonathan K.</first><last>Kummerfeld</last></author>
       <author><first>Song</first><last>Feng</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <url hash="1c336240">L18-1492</url>
       <bibkey>welch-etal-2018-world</bibkey>
     </paper>
     <paper id="493">
       <title>Improved Transcription and Indexing of Oral History Interviews for Digital Humanities Research</title>
       <author><first>Michael</first><last>Gref</last></author>
-      <author><first>Joachim</first><last>Köhler</last></author>
+      <author id="joachim-kohler"><first>Joachim</first><last>Köhler</last></author>
       <author><first>Almut</first><last>Leh</last></author>
       <url hash="6b2988a6">L18-1493</url>
       <bibkey>gref-etal-2018-improved</bibkey>
@@ -4368,7 +4368,7 @@
     <paper id="501">
       <title>Creating New Language and Voice Components for the Updated <fixed-case>M</fixed-case>ary<fixed-case>TTS</fixed-case> Text-to-Speech Synthesis Platform</title>
       <author><first>Ingmar</first><last>Steiner</last></author>
-      <author><first>Sébastien</first><last>Le Maguer</last></author>
+      <author id="sebastien-le-maguer"><first>Sébastien</first><last>Le Maguer</last></author>
       <url hash="fb673996">L18-1501</url>
       <bibkey>steiner-le-maguer-2018-creating</bibkey>
     </paper>
@@ -4394,7 +4394,7 @@
     <paper id="504">
       <title>A New Annotated <fixed-case>P</fixed-case>ortuguese/<fixed-case>S</fixed-case>panish Corpus for the Multi-Sentence Compression Task</title>
       <author><first>Elvys</first><last>Linhares Pontes</last></author>
-      <author><first>Juan-Manuel</first><last>Torres-Moreno</last></author>
+      <author id="juan-manuel-torres-moreno"><first>Juan-Manuel</first><last>Torres-Moreno</last></author>
       <author><first>Stéphane</first><last>Huet</last></author>
       <author><first>Andréa Carneiro</first><last>Linhares</last></author>
       <url hash="049ee78c">L18-1504</url>
@@ -4412,8 +4412,8 @@
       <title><fixed-case>TS</fixed-case>ix: A Human-involved-creation Dataset for Tweet Summarization</title>
       <author><first>Minh-Tien</first><last>Nguyen</last></author>
       <author><first>Dac Viet</first><last>Lai</last></author>
-      <author><first>Huy-Tien</first><last>Nguyen</last></author>
-      <author><first>Le-Minh</first><last>Nguyen</last></author>
+      <author id="huy-tien-nguyen"><first>Huy-Tien</first><last>Nguyen</last></author>
+      <author id="minh-le-nguyen"><first>Le-Minh</first><last>Nguyen</last></author>
       <url hash="fd57ab26">L18-1506</url>
       <bibkey>nguyen-etal-2018-tsix</bibkey>
     </paper>
@@ -4450,7 +4450,7 @@
       <title><fixed-case>P</fixed-case>yr<fixed-case>E</fixed-case>val: An Automated Method for Summary Content Analysis</title>
       <author><first>Yanjun</first><last>Gao</last></author>
       <author><first>Andrew</first><last>Warner</last></author>
-      <author><first>Rebecca</first><last>Passonneau</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca</first><last>Passonneau</last></author>
       <url hash="b36eb08c">L18-1511</url>
       <bibkey>gao-etal-2018-pyreval</bibkey>
     </paper>
@@ -4465,10 +4465,10 @@
     </paper>
     <paper id="513">
       <title>Semantic Equivalence Detection: Are Interrogatives Harder than Declaratives?</title>
-      <author><first>João</first><last>Rodrigues</last></author>
+      <author id="joao-rodrigues"><first>João</first><last>Rodrigues</last></author>
       <author><first>Chakaveh</first><last>Saedi</last></author>
-      <author><first>António</first><last>Branco</last></author>
-      <author><first>João</first><last>Silva</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
+      <author id="joao-silva"><first>João</first><last>Silva</last></author>
       <url hash="808eaed4">L18-1513</url>
       <bibkey>rodrigues-etal-2018-semantic</bibkey>
     </paper>
@@ -4484,18 +4484,18 @@
       <title><fixed-case>CLARIN</fixed-case>: Towards <fixed-case>FAIR</fixed-case> and Responsible Data Science Using Language Resources</title>
       <author><first>Franciska</first><last>de Jong</last></author>
       <author><first>Bente</first><last>Maegaard</last></author>
-      <author><first>Koenraad</first><last>De Smedt</last></author>
+      <author id="koenraad-de-smedt"><first>Koenraad</first><last>De Smedt</last></author>
       <author><first>Darja</first><last>Fišer</last></author>
-      <author><first>Dieter</first><last>Van Uytvanck</last></author>
+      <author id="dieter-van-uytvanck"><first>Dieter</first><last>Van Uytvanck</last></author>
       <url hash="76e5f992">L18-1515</url>
       <bibkey>de-jong-etal-2018-clarin</bibkey>
     </paper>
     <paper id="516">
       <title>From ‘Solved Problems’ to New Challenges: A Report on <fixed-case>LDC</fixed-case> Activities</title>
       <author><first>Christopher</first><last>Cieri</last></author>
-      <author><first>Mark</first><last>Liberman</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
-      <author><first>Denise</first><last>DiPersio</last></author>
+      <author id="mark-liberman"><first>Mark</first><last>Liberman</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
+      <author id="denise-dipersio"><first>Denise</first><last>DiPersio</last></author>
       <author><first>Jonathan</first><last>Wright</last></author>
       <author><first>Andrea</first><last>Mazzucchi</last></author>
       <url hash="63e1de48">L18-1516</url>
@@ -4503,7 +4503,7 @@
     </paper>
     <paper id="517">
       <title>New directions in <fixed-case>ELRA</fixed-case> activities</title>
-      <author><first>Valérie</first><last>Mapelli</last></author>
+      <author id="valerie-mapelli"><first>Valérie</first><last>Mapelli</last></author>
       <author><first>Victoria</first><last>Arranz</last></author>
       <author><first>Hélène</first><last>Mazo</last></author>
       <author><first>Pawel</first><last>Kamocki</last></author>
@@ -4540,7 +4540,7 @@
       <author><first>Els</first><last>Lefever</last></author>
       <author><first>Iris</first><last>Hendrickx</last></author>
       <author><first>Ilja</first><last>Croijmans</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <author><first>Asifa</first><last>Majid</last></author>
       <url hash="5e9221c0">L18-1521</url>
       <bibkey>lefever-etal-2018-discovering</bibkey>
@@ -4580,8 +4580,8 @@
       <author><first>Reuben A</first><last>Farrugia</last></author>
       <author><first>Claudia</first><last>Borg</last></author>
       <author><first>Kenneth P</first><last>Camilleri</last></author>
-      <author><first>Michael</first><last>Rosner</last></author>
-      <author><first>Lonneke</first><last>van der Plas</last></author>
+      <author id="michael-rosner"><first>Michael</first><last>Rosner</last></author>
+      <author id="lonneke-van-der-plas"><first>Lonneke</first><last>van der Plas</last></author>
       <url hash="262cc997">L18-1525</url>
       <bibkey>gatt-etal-2018-face2text</bibkey>
     </paper>
@@ -4600,26 +4600,26 @@
       <author><first>Julie</first><last>Glikman</last></author>
       <author><first>Mathieu</first><last>Avanzi</last></author>
       <author><first>Christophe</first><last>Benzitoun</last></author>
-      <author><first>Philippe</first><last>Boula de Mareüil</last></author>
+      <author id="philippe-boula-de-mareuil"><first>Philippe</first><last>Boula de Mareüil</last></author>
       <url hash="33cb4764">L18-1527</url>
       <bibkey>goldman-etal-2018-crowdsourcing</bibkey>
     </paper>
     <paper id="528">
       <title>Improving Machine Translation of Educational Content via Crowdsourcing</title>
       <author><first>Maximiliana</first><last>Behnke</last></author>
-      <author><first>Antonio Valerio</first><last>Miceli Barone</last></author>
+      <author id="antonio-valerio-miceli-barone"><first>Antonio Valerio</first><last>Miceli Barone</last></author>
       <author><first>Rico</first><last>Sennrich</last></author>
       <author><first>Vilelmini</first><last>Sosoni</last></author>
       <author><first>Thanasis</first><last>Naskos</last></author>
       <author><first>Eirini</first><last>Takoulidou</last></author>
       <author><first>Maria</first><last>Stasimioti</last></author>
-      <author><first>Menno</first><last>van Zaanen</last></author>
+      <author id="menno-van-zaanen"><first>Menno</first><last>van Zaanen</last></author>
       <author><first>Sheila</first><last>Castilho</last></author>
       <author><first>Federico</first><last>Gaspari</last></author>
       <author><first>Panayota</first><last>Georgakopoulou</last></author>
       <author><first>Valia</first><last>Kordoni</last></author>
       <author><first>Markus</first><last>Egg</last></author>
-      <author><first>Katia Lida</first><last>Kermanidis</last></author>
+      <author id="katia-lida-kermanidis"><first>Katia Lida</first><last>Kermanidis</last></author>
       <url hash="174c19bd">L18-1528</url>
       <bibkey>behnke-etal-2018-improving</bibkey>
     </paper>
@@ -4636,7 +4636,7 @@
     <paper id="530">
       <title>Evaluation Phonemic Transcription of Low-Resource Tonal Languages for Language Documentation</title>
       <author><first>Oliver</first><last>Adams</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
       <author><first>Hilaria</first><last>Cruz</last></author>
       <author><first>Steven</first><last>Bird</last></author>
@@ -4647,17 +4647,17 @@
     <paper id="531">
       <title>A Very Low Resource Language Speech Corpus for Computational Language Documentation Experiments</title>
       <author><first>Pierre</first><last>Godard</last></author>
-      <author><first>Gilles</first><last>Adda</last></author>
-      <author><first>Martine</first><last>Adda-Decker</last></author>
+      <author id="gilles-adda"><first>Gilles</first><last>Adda</last></author>
+      <author id="martine-adda-decker"><first>Martine</first><last>Adda-Decker</last></author>
       <author><first>Juan</first><last>Benjumea</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <author><first>Jamison</first><last>Cooper-Leavitt</last></author>
-      <author><first>Guy-Noel</first><last>Kouarata</last></author>
-      <author><first>Lori</first><last>Lamel</last></author>
-      <author><first>Hélène</first><last>Maynard</last></author>
+      <author id="guy-noel-kouarata"><first>Guy-Noel</first><last>Kouarata</last></author>
+      <author id="lori-lamel"><first>Lori</first><last>Lamel</last></author>
+      <author id="helene-bonneau-maynard"><first>Hélène</first><last>Maynard</last></author>
       <author><first>Markus</first><last>Mueller</last></author>
       <author><first>Annie</first><last>Rialland</last></author>
-      <author><first>Sebastian</first><last>Stueker</last></author>
+      <author id="sebastian-stuker"><first>Sebastian</first><last>Stueker</last></author>
       <author><first>François</first><last>Yvon</last></author>
       <author><first>Marcely</first><last>Zanon-Boito</last></author>
       <url hash="0e3a6417">L18-1531</url>
@@ -4677,15 +4677,15 @@
       <author><first>Emmanuel-Moselly</first><last>Makasso</last></author>
       <author><first>Markus</first><last>Müller</last></author>
       <author><first>Jonas</first><last>Engelmann</last></author>
-      <author><first>Gilles</first><last>Adda</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
-      <author><first>Sebastian</first><last>Stüker</last></author>
+      <author id="gilles-adda"><first>Gilles</first><last>Adda</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
+      <author id="sebastian-stuker"><first>Sebastian</first><last>Stüker</last></author>
       <url hash="5fc495a5">L18-1533</url>
       <bibkey>hamlaoui-etal-2018-bulbasaa</bibkey>
     </paper>
     <paper id="534">
       <title>Researching Less-Resourced Languages – the <fixed-case>D</fixed-case>igi<fixed-case>S</fixed-case>ami Corpus</title>
-      <author><first>Kristiina</first><last>Jokinen</last></author>
+      <author id="kristiina-jokinen"><first>Kristiina</first><last>Jokinen</last></author>
       <url hash="5b3197fe">L18-1534</url>
       <bibkey>jokinen-2018-researching</bibkey>
     </paper>
@@ -4695,7 +4695,7 @@
       <author><first>Nizar</first><last>Habash</last></author>
       <author><first>Mohammad</first><last>Salameh</last></author>
       <author><first>Wajdi</first><last>Zaghouani</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <author><first>Dana</first><last>Abdulrahim</last></author>
       <author><first>Ossama</first><last>Obeid</last></author>
       <author><first>Salam</first><last>Khalifa</last></author>
@@ -4716,7 +4716,7 @@
     <paper id="537">
       <title>Constructing a Lexicon of Relational Nouns</title>
       <author><first>Edward</first><last>Newell</last></author>
-      <author><first>Jackie C.K.</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie C.K.</first><last>Cheung</last></author>
       <url hash="58d74362">L18-1537</url>
       <bibkey>newell-cheung-2018-constructing</bibkey>
     </paper>
@@ -4730,7 +4730,7 @@
     <paper id="539">
       <title>Lexical Profiling of Environmental Corpora</title>
       <author><first>Patrick</first><last>Drouin</last></author>
-      <author><first>Marie-Claude</first><last>L’Homme</last></author>
+      <author id="marie-claude-lhomme"><first>Marie-Claude</first><last>L’Homme</last></author>
       <author><first>Benoît</first><last>Robichaud</last></author>
       <url hash="e891a811">L18-1539</url>
       <bibkey>drouin-etal-2018-lexical</bibkey>
@@ -4753,7 +4753,7 @@
     <paper id="542">
       <title>Building a Knowledge Graph from Natural Language Definitions for Interpretable Text Entailment Recognition</title>
       <author><first>Vivian</first><last>Silva</last></author>
-      <author><first>André</first><last>Freitas</last></author>
+      <author id="andre-freitas"><first>André</first><last>Freitas</last></author>
       <author><first>Siegfried</first><last>Handschuh</last></author>
       <url hash="c809c31a">L18-1542</url>
       <bibkey>silva-etal-2018-building</bibkey>
@@ -4762,8 +4762,8 @@
       <title>Combining rule-based and embedding-based approaches to normalize textual entities with an ontology</title>
       <author><first>Arnaud</first><last>Ferré</last></author>
       <author><first>Louise</first><last>Deléger</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
-      <author><first>Claire</first><last>Nédellec</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="claire-nedellec"><first>Claire</first><last>Nédellec</last></author>
       <url hash="c7069a03">L18-1543</url>
       <bibkey>ferre-etal-2018-combining</bibkey>
     </paper>
@@ -4774,7 +4774,7 @@
       <author><first>Arslen</first><last>Remaci</last></author>
       <author><first>Christophe</first><last>Gravier</last></author>
       <author><first>Jonathon</first><last>Hare</last></author>
-      <author><first>Frederique</first><last>Laforest</last></author>
+      <author id="frederique-laforest"><first>Frederique</first><last>Laforest</last></author>
       <author><first>Elena</first><last>Simperl</last></author>
       <url hash="ab4f88a9">L18-1544</url>
       <bibkey>elsahar-etal-2018-rex</bibkey>
@@ -4782,7 +4782,7 @@
     <paper id="545">
       <title>Multilingual Parallel Corpus for Global Communication Plan</title>
       <author><first>Kenji</first><last>Imamura</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <url hash="7ca1393a">L18-1545</url>
       <bibkey>imamura-sumita-2018-multilingual</bibkey>
     </paper>
@@ -4798,7 +4798,7 @@
       <title><fixed-case>N</fixed-case>eg<fixed-case>P</fixed-case>ar: A parallel corpus annotated for negation</title>
       <author><first>Qianchu</first><last>Liu</last></author>
       <author><first>Federico</first><last>Fancellu</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <url hash="98b2bacb">L18-1547</url>
       <bibkey>liu-etal-2018-negpar</bibkey>
     </paper>
@@ -4806,7 +4806,7 @@
       <title>The <fixed-case>IIT</fixed-case> <fixed-case>B</fixed-case>ombay <fixed-case>E</fixed-case>nglish-<fixed-case>H</fixed-case>indi Parallel Corpus</title>
       <author><first>Anoop</first><last>Kunchukuttan</last></author>
       <author><first>Pratik</first><last>Mehta</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <url hash="94b2a832">L18-1548</url>
       <bibkey>kunchukuttan-etal-2018-iit</bibkey>
     </paper>
@@ -4820,11 +4820,11 @@
     </paper>
     <paper id="550">
       <title>Learning Word Vectors for 157 Languages</title>
-      <author><first>Edouard</first><last>Grave</last></author>
+      <author id="edouard-grave"><first>Edouard</first><last>Grave</last></author>
       <author><first>Piotr</first><last>Bojanowski</last></author>
       <author><first>Prakhar</first><last>Gupta</last></author>
       <author><first>Armand</first><last>Joulin</last></author>
-      <author><first>Tomas</first><last>Mikolov</last></author>
+      <author id="tomas-mikolov"><first>Tomas</first><last>Mikolov</last></author>
       <url hash="1add63bf">L18-1550</url>
       <bibkey>grave-etal-2018-learning</bibkey>
     </paper>
@@ -4833,9 +4833,9 @@
       <author><first>Milan</first><last>Straka</last></author>
       <author><first>Nikita</first><last>Mediankin</last></author>
       <author><first>Tom</first><last>Kocmi</last></author>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
       <author><first>Vojtěch</first><last>Hudeček</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
       <url hash="ecb6d357">L18-1551</url>
       <bibkey>straka-etal-2018-sumeczech</bibkey>
     </paper>
@@ -4848,8 +4848,8 @@
     </paper>
     <paper id="553">
       <title>Text Simplification from Professionally Produced Corpora</title>
-      <author><first>Carolina</first><last>Scarton</last></author>
-      <author><first>Gustavo</first><last>Paetzold</last></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last></author>
+      <author id="gustavo-paetzold"><first>Gustavo</first><last>Paetzold</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <url hash="03daf75f">L18-1553</url>
       <bibkey>scarton-etal-2018-text</bibkey>
@@ -4859,7 +4859,7 @@
       <author><first>Jacky</first><last>Visser</last></author>
       <author><first>Rory</first><last>Duthie</last></author>
       <author><first>John</first><last>Lawrence</last></author>
-      <author><first>Chris</first><last>Reed</last></author>
+      <author id="chris-reed"><first>Chris</first><last>Reed</last></author>
       <url hash="6133cdff">L18-1554</url>
       <bibkey>visser-etal-2018-intertextual</bibkey>
     </paper>
@@ -4880,12 +4880,12 @@
     </paper>
     <paper id="557">
       <title>Building Named Entity Recognition Taggers via Parallel Corpora</title>
-      <author><first>Rodrigo</first><last>Agerri</last></author>
+      <author id="rodrigo-agerri"><first>Rodrigo</first><last>Agerri</last></author>
       <author><first>Yiling</first><last>Chung</last></author>
       <author><first>Itziar</first><last>Aldabe</last></author>
       <author><first>Nora</first><last>Aranberri</last></author>
-      <author><first>Gorka</first><last>Labaka</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="gorka-labaka"><first>Gorka</first><last>Labaka</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <url hash="d36bfafd">L18-1557</url>
       <bibkey>agerri-etal-2018-building</bibkey>
     </paper>
@@ -4895,8 +4895,8 @@
       <author><first>Ann</first><last>Bies</last></author>
       <author><first>Justin</first><last>Mott</last></author>
       <author><first>Xuansong</first><last>Li</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
-      <author><first>Christopher</first><last>Caruso</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
+      <author id="christopher-caruso"><first>Christopher</first><last>Caruso</last></author>
       <url hash="06e47b1b">L18-1558</url>
       <bibkey>song-etal-2018-cross</bibkey>
     </paper>
@@ -4906,7 +4906,7 @@
       <author><first>Amitra</first><last>Salam</last></author>
       <author><first>Swati</first><last>Tiwari</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <url hash="d9e403c4">L18-1559</url>
       <bibkey>ghosal-etal-2018-tap</bibkey>
     </paper>
@@ -4928,14 +4928,14 @@
     <paper id="562">
       <title>Annotating Educational Questions for Student Response Analysis</title>
       <author><first>Andreea</first><last>Godea</last></author>
-      <author><first>Rodney</first><last>Nielsen</last></author>
+      <author id="rodney-nielsen"><first>Rodney</first><last>Nielsen</last></author>
       <url hash="2503e964">L18-1562</url>
       <bibkey>godea-nielsen-2018-annotating</bibkey>
     </paper>
     <paper id="563">
       <title>Incorporating Global Contexts into Sentence Embedding for Relational Extraction at the Paragraph Level with Distant Supervision</title>
-      <author><first>Eun-kyung</first><last>Kim</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="eun-kyung-kim"><first>Eun-kyung</first><last>Kim</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <url hash="deeed7a3">L18-1563</url>
       <bibkey>kim-choi-2018-incorporating</bibkey>
     </paper>
@@ -4963,7 +4963,7 @@
       <title>Revisiting Distant Supervision for Relation Extraction</title>
       <author><first>Tingsong</first><last>Jiang</last></author>
       <author><first>Jing</first><last>Liu</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <author><first>Zhifang</first><last>Sui</last></author>
       <url hash="796558a0">L18-1566</url>
       <bibkey>jiang-etal-2018-revisiting</bibkey>
@@ -4986,7 +4986,7 @@
     <paper id="569">
       <title>Comparison of Pun Detection Methods Using <fixed-case>J</fixed-case>apanese Pun Corpus</title>
       <author><first>Motoki</first><last>Yatsu</last></author>
-      <author><first>Kenji</first><last>Araki</last></author>
+      <author id="kenji-araki"><first>Kenji</first><last>Araki</last></author>
       <url hash="ff63b1fc">L18-1569</url>
       <bibkey>yatsu-araki-2018-comparison</bibkey>
     </paper>
@@ -5011,7 +5011,7 @@
       <author><first>Roeland</first><last>van Hout</last></author>
       <author><first>Nicoline</first><last>van der Sijs</last></author>
       <author><first>Erwin</first><last>Komen</last></author>
-      <author><first>Henk</first><last>van den Heuvel</last></author>
+      <author id="henk-van-den-heuvel"><first>Henk</first><last>van den Heuvel</last></author>
       <url hash="138d656e">L18-1572</url>
       <bibkey>van-hout-etal-2018-fast</bibkey>
     </paper>
@@ -5028,7 +5028,7 @@
       <author><first>Nizar</first><last>Habash</last></author>
       <author><first>Fadhl</first><last>Eryani</last></author>
       <author><first>Salam</first><last>Khalifa</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <author><first>Dana</first><last>Abdulrahim</last></author>
       <author><first>Alexander</first><last>Erdmann</last></author>
       <author><first>Reem</first><last>Faraj</last></author>
@@ -5047,9 +5047,9 @@
     </paper>
     <paper id="575">
       <title>Automatic Identification of Maghreb Dialects Using a Dictionary-Based Approach</title>
-      <author><first>Houda</first><last>Saâdane</last></author>
+      <author id="houda-saadane"><first>Houda</first><last>Saâdane</last></author>
       <author><first>Hosni</first><last>Seffih</last></author>
-      <author><first>Christian</first><last>Fluhr</last></author>
+      <author id="christian-fluhr"><first>Christian</first><last>Fluhr</last></author>
       <author><first>Khalid</first><last>Choukri</last></author>
       <author><first>Nasredine</first><last>Semmar</last></author>
       <url hash="96ebc389">L18-1575</url>
@@ -5104,8 +5104,8 @@
       <title>Automating Document Discovery in the Systematic Review Process: How to Use Chaff to Extract Wheat</title>
       <author><first>Christopher</first><last>Norman</last></author>
       <author><first>Mariska</first><last>Leeflang</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <url hash="e83e0d27">L18-1582</url>
       <bibkey>norman-etal-2018-automating</bibkey>
     </paper>
@@ -5113,7 +5113,7 @@
       <title>Two Multilingual Corpora Extracted from the Tenders Electronic Daily for Machine Learning and Machine Translation Applications.</title>
       <author><first>Oussama</first><last>Ahmia</last></author>
       <author><first>Nicolas</first><last>Béchet</last></author>
-      <author><first>Pierre-François</first><last>Marteau</last></author>
+      <author id="pierre-francois-marteau"><first>Pierre-François</first><last>Marteau</last></author>
       <url hash="5dc3dea8">L18-1583</url>
       <bibkey>ahmia-etal-2018-two</bibkey>
     </paper>
@@ -5121,14 +5121,14 @@
       <title>Using Adversarial Examples in Natural Language Processing</title>
       <author><first>Petr</first><last>Bělohlávek</last></author>
       <author><first>Ondřej</first><last>Plátek</last></author>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
       <author><first>Milan</first><last>Straka</last></author>
       <url hash="0e7de5df">L18-1584</url>
       <bibkey>belohlavek-etal-2018-using</bibkey>
     </paper>
     <paper id="585">
       <title>Modeling Trolling in Social Media Conversations</title>
-      <author><first>Luis Gerardo</first><last>Mojica de la Vega</last></author>
+      <author id="luis-gerardo-mojica-de-la-vega"><first>Luis Gerardo</first><last>Mojica de la Vega</last></author>
       <author><first>Vincent</first><last>Ng</last></author>
       <url hash="dc54746a">L18-1585</url>
       <bibkey>mojica-de-la-vega-ng-2018-modeling</bibkey>
@@ -5136,7 +5136,7 @@
     <paper id="586">
       <title>Automatic Annotation of Semantic Term Types in the Complete <fixed-case>ACL</fixed-case> <fixed-case>A</fixed-case>nthology Reference Corpus</title>
       <author><first>Anne-Kathrin</first><last>Schumann</last></author>
-      <author><first>Héctor</first><last>Martínez Alonso</last></author>
+      <author id="hector-martinez-alonso"><first>Héctor</first><last>Martínez Alonso</last></author>
       <url hash="4d2f2516">L18-1586</url>
       <bibkey>schumann-martinez-alonso-2018-automatic</bibkey>
     </paper>
@@ -5179,7 +5179,7 @@
       <author><first>Christy</first><last>Li</last></author>
       <author><first>Yuchen</first><last>Wang</last></author>
       <author><first>Kenneth</first><last>Resnicow</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <url hash="255b8329">L18-1591</url>
       <bibkey>perez-rosas-etal-2018-analyzing</bibkey>
     </paper>
@@ -5194,7 +5194,7 @@
     <paper id="593">
       <title>Text Mining for History: first steps on building a large dataset</title>
       <author><first>Suemi</first><last>Higuchi</last></author>
-      <author><first>Cláudia</first><last>Freitas</last></author>
+      <author id="claudia-freitas"><first>Cláudia</first><last>Freitas</last></author>
       <author><first>Bruno</first><last>Cuconato</last></author>
       <author><first>Alexandre</first><last>Rademaker</last></author>
       <url hash="6834b309">L18-1593</url>
@@ -5212,8 +5212,8 @@
     <paper id="595">
       <title>Training and Adapting Multilingual <fixed-case>NMT</fixed-case> for Less-resourced and Morphologically Rich Languages</title>
       <author><first>Matīss</first><last>Rikters</last></author>
-      <author><first>Mārcis</first><last>Pinnis</last></author>
-      <author><first>Rihards</first><last>Krišlauks</last></author>
+      <author id="marcis-pinnis"><first>Mārcis</first><last>Pinnis</last></author>
+      <author id="rihards-krislauks"><first>Rihards</first><last>Krišlauks</last></author>
       <url hash="e76f4d7f">L18-1595</url>
       <bibkey>rikters-etal-2018-training</bibkey>
     </paper>
@@ -5229,8 +5229,8 @@
     <paper id="597">
       <title>Machine Translation of Low-Resource Spoken Dialects: Strategies for Normalizing <fixed-case>S</fixed-case>wiss <fixed-case>G</fixed-case>erman</title>
       <author><first>Pierre-Edouard</first><last>Honnet</last></author>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
-      <author><first>Claudiu</first><last>Musat</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="claudiu-musat"><first>Claudiu</first><last>Musat</last></author>
       <author><first>Michael</first><last>Baeriswyl</last></author>
       <url hash="a06d0a7f">L18-1597</url>
       <bibkey>honnet-etal-2018-machine</bibkey>
@@ -5294,7 +5294,7 @@
     </paper>
     <paper id="605">
       <title>A Multilingual Dataset for Evaluating Parallel Sentence Extraction from Comparable Corpora</title>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <author><first>Serge</first><last>Sharoff</last></author>
       <author><first>Reinhard</first><last>Rapp</last></author>
       <url hash="c169b8d2">L18-1605</url>
@@ -5322,11 +5322,11 @@
     <paper id="608">
       <title><fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>-<fixed-case>UL</fixed-case>: Universal Morphological Lattices for <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependency Parsing</title>
       <author><first>Amir</first><last>More</last></author>
-      <author><first>Özlem</first><last>Çetinoğlu</last></author>
+      <author id="ozlem-cetinoglu"><first>Özlem</first><last>Çetinoğlu</last></author>
       <author><first>Çağrı</first><last>Çöltekin</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
-      <author><first>Djamé</first><last>Seddah</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
       <author><first>Dima</first><last>Taji</last></author>
       <author><first>Reut</first><last>Tsarfaty</last></author>
       <url hash="85f20ca5">L18-1608</url>
@@ -5353,13 +5353,13 @@
     <paper id="611">
       <title>Parser combinators for <fixed-case>T</fixed-case>igrinya and <fixed-case>O</fixed-case>romo morphology</title>
       <author><first>Patrick</first><last>Littell</last></author>
-      <author><first>Tom</first><last>McCoy</last></author>
+      <author id="r-thomas-mccoy"><first>Tom</first><last>McCoy</last></author>
       <author><first>Na-Rae</first><last>Han</last></author>
       <author><first>Shruti</first><last>Rijhwani</last></author>
       <author><first>Zaid</first><last>Sheikh</last></author>
-      <author><first>David</first><last>Mortensen</last></author>
+      <author id="david-r-mortensen"><first>David</first><last>Mortensen</last></author>
       <author><first>Teruko</first><last>Mitamura</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
       <url hash="16bea2ec">L18-1611</url>
       <bibkey>littell-etal-2018-parser</bibkey>
     </paper>
@@ -5379,17 +5379,17 @@
     </paper>
     <paper id="614">
       <title>Baselines and Test Data for Cross-Lingual Inference</title>
-      <author><first>Željko</first><last>Agić</last></author>
+      <author id="zeljko-agic"><first>Željko</first><last>Agić</last></author>
       <author><first>Natalie</first><last>Schluter</last></author>
       <url hash="0e5ed5c6">L18-1614</url>
       <bibkey>agic-schluter-2018-baselines</bibkey>
     </paper>
     <paper id="615">
       <title><fixed-case>CATS</fixed-case>: A Tool for Customized Alignment of Text Simplification Corpora</title>
-      <author><first>Sanja</first><last>Štajner</last></author>
+      <author id="sanja-stajner"><first>Sanja</first><last>Štajner</last></author>
       <author><first>Marc</first><last>Franco-Salvador</last></author>
       <author><first>Paolo</first><last>Rosso</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <url hash="11bd96ad">L18-1615</url>
       <bibkey>stajner-etal-2018-cats</bibkey>
     </paper>
@@ -5398,8 +5398,8 @@
       <author><first>Thanh-Le</first><last>Ha</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
       <author><first>Matthias</first><last>Sperber</last></author>
-      <author><first>Ngoc Quan</first><last>Pham</last></author>
-      <author><first>Alexander</first><last>Waibel</last></author>
+      <author id="ngoc-quan-pham"><first>Ngoc Quan</first><last>Pham</last></author>
+      <author id="alex-waibel"><first>Alexander</first><last>Waibel</last></author>
       <url hash="1b76fd8e">L18-1616</url>
       <bibkey>ha-etal-2018-kit</bibkey>
     </paper>
@@ -5422,7 +5422,7 @@
       <author><first>Brian</first><last>Davis</last></author>
       <author><first>Manel</first><last>Zarrouk</last></author>
       <author><first>Siegfried</first><last>Handschuh</last></author>
-      <author><first>Andre</first><last>Freitas</last></author>
+      <author id="andre-freitas"><first>Andre</first><last>Freitas</last></author>
       <url hash="a6209b43">L18-1618</url>
       <bibkey>barzegar-etal-2018-semr</bibkey>
     </paper>
@@ -5440,7 +5440,7 @@
       <author><first>Dominique</first><last>Huck</last></author>
       <author><first>Christophe</first><last>Rey</last></author>
       <author><first>Philippe</first><last>Reynés</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <author><first>Jean</first><last>Sibille</last></author>
       <author><first>Thomas</first><last>Lavergne</last></author>
       <url hash="3b1d3935">L18-1619</url>
@@ -5459,7 +5459,7 @@
     <paper id="621">
       <title>Web-based Annotation Tool for Inflectional Language Resources</title>
       <author><first>Abdulrahman</first><last>Alosaimy</last></author>
-      <author><first>Eric</first><last>Atwell</last></author>
+      <author id="eric-atwell"><first>Eric</first><last>Atwell</last></author>
       <url hash="25e14f03">L18-1621</url>
       <bibkey>alosaimy-atwell-2018-web</bibkey>
     </paper>
@@ -5525,7 +5525,7 @@
     </paper>
     <paper id="628">
       <title>Exploring Conversational Language Generation for Rich Content about Hotels</title>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <author><first>Albry</first><last>Smither</last></author>
       <author><first>Shereen</first><last>Oraby</last></author>
       <author><first>Vrindavan</first><last>Harrison</last></author>
@@ -5536,7 +5536,7 @@
     <paper id="629">
       <title>Identification of Personal Information Shared in Chat-Oriented Dialogue</title>
       <author><first>Sarah</first><last>Fillwock</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <url hash="fc9c9b1c">L18-1629</url>
       <bibkey>fillwock-traum-2018-identification</bibkey>
     </paper>
@@ -5551,13 +5551,13 @@
     <paper id="631">
       <title>Annotating Reflections for Health Behavior Change Therapy</title>
       <author><first>Nishitha</first><last>Guntakandla</last></author>
-      <author><first>Rodney</first><last>Nielsen</last></author>
+      <author id="rodney-nielsen"><first>Rodney</first><last>Nielsen</last></author>
       <url hash="53392f4e">L18-1631</url>
       <bibkey>guntakandla-nielsen-2018-annotating</bibkey>
     </paper>
     <paper id="632">
       <title>Annotating Attribution Relations in <fixed-case>A</fixed-case>rabic</title>
-      <author><first>Amal</first><last>Alsaif</last></author>
+      <author id="amal-al-saif"><first>Amal</first><last>Alsaif</last></author>
       <author><first>Tasniem</first><last>Alyahya</last></author>
       <author><first>Madawi</first><last>Alotaibi</last></author>
       <author id="huda-almuzaini"><first>Huda</first><last>Almuzaini</last></author>
@@ -5572,7 +5572,7 @@
       <author><first>Brendan</first><last>Spillane</last></author>
       <author><first>Maria</first><last>O’Reilly</last></author>
       <author><first>Ketong</first><last>Su</last></author>
-      <author><first>Arturo</first><last>Calvo</last></author>
+      <author id="arturo-calvo-devesa"><first>Arturo</first><last>Calvo</last></author>
       <author><first>Loredana</first><last>Cerrato</last></author>
       <author><first>Killian</first><last>Levacher</last></author>
       <author><first>Nick</first><last>Campbell</last></author>
@@ -5634,7 +5634,7 @@
       <author><first>Milagro</first><last>Teruel</last></author>
       <author><first>Cristian</first><last>Cardellino</last></author>
       <author><first>Fernando</first><last>Cardellino</last></author>
-      <author><first>Laura</first><last>Alonso Alemany</last></author>
+      <author id="laura-alonso-alemany"><first>Laura</first><last>Alonso Alemany</last></author>
       <author><first>Serena</first><last>Villata</last></author>
       <url hash="be40ce4d">L18-1640</url>
       <bibkey>teruel-etal-2018-increasing</bibkey>
@@ -5679,7 +5679,7 @@
     </paper>
     <paper id="646">
       <title>Cross-linguistically Small World Networks are Ubiquitous in Child-directed Speech</title>
-      <author><first>Steven</first><last>Moran</last></author>
+      <author id="steven-moran"><first>Steven</first><last>Moran</last></author>
       <author><first>Danica</first><last>Pajović</last></author>
       <author><first>Sabine</first><last>Stoll</last></author>
       <url hash="8b38915d">L18-1646</url>
@@ -5688,7 +5688,7 @@
     <paper id="647">
       <title><fixed-case>L</fixed-case>1-<fixed-case>L</fixed-case>2 Parallel Treebank of Learner <fixed-case>C</fixed-case>hinese: Overused and Underused Syntactic Structures</title>
       <author><first>Keying</first><last>Li</last></author>
-      <author><first>John</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
       <url hash="75d873e6">L18-1647</url>
       <bibkey>li-lee-2018-l1</bibkey>
     </paper>
@@ -5696,13 +5696,13 @@
       <title>The Use of Text Alignment in Semi-Automatic Error Analysis: Use Case in the Development of the Corpus of the <fixed-case>L</fixed-case>atvian Language Learners</title>
       <author><first>Roberts</first><last>Darģis</last></author>
       <author><first>Ilze</first><last>Auziņa</last></author>
-      <author><first>Kristīne</first><last>Levāne-Petrova</last></author>
+      <author id="kristine-levane-petrova"><first>Kristīne</first><last>Levāne-Petrova</last></author>
       <url hash="62519824">L18-1648</url>
       <bibkey>dargis-etal-2018-use</bibkey>
     </paper>
     <paper id="649">
       <title>Error annotation in a Learner Corpus of <fixed-case>P</fixed-case>ortuguese</title>
-      <author><first>Iria</first><last>del Río</last></author>
+      <author id="iria-del-rio-gayo"><first>Iria</first><last>del Río</last></author>
       <author><first>Amália</first><last>Mendes</last></author>
       <url hash="af897671">L18-1649</url>
       <bibkey>del-rio-mendes-2018-error</bibkey>
@@ -5711,7 +5711,7 @@
       <title>An <fixed-case>SLA</fixed-case> Corpus Annotated with Pedagogically Relevant Grammatical Structures</title>
       <author><first>Leonardo</first><last>Zilio</last></author>
       <author><first>Rodrigo</first><last>Wilkens</last></author>
-      <author><first>Cédrick</first><last>Fairon</last></author>
+      <author id="cedrick-fairon"><first>Cédrick</first><last>Fairon</last></author>
       <url hash="cf1bc3c6">L18-1650</url>
       <bibkey>zilio-etal-2018-sla</bibkey>
     </paper>
@@ -5719,13 +5719,13 @@
       <title>Portable Spelling Corrector for a Less-Resourced Language: <fixed-case>A</fixed-case>mharic</title>
       <author><first>Andargachew Mekonnen</first><last>Gezmu</last></author>
       <author><first>Andreas</first><last>Nürnberger</last></author>
-      <author><first>Binyam Ephrem</first><last>Seyoum</last></author>
+      <author id="binyam-ephrem-seyoum"><first>Binyam Ephrem</first><last>Seyoum</last></author>
       <url hash="0156cc62">L18-1651</url>
       <bibkey>gezmu-etal-2018-portable</bibkey>
     </paper>
     <paper id="652">
       <title>A Speaking Atlas of the Regional Languages of <fixed-case>F</fixed-case>rance</title>
-      <author><first>Philippe</first><last>Boula de Mareüil</last></author>
+      <author id="philippe-boula-de-mareuil"><first>Philippe</first><last>Boula de Mareüil</last></author>
       <author><first>Albert</first><last>Rilliard</last></author>
       <author><first>Frédéric</first><last>Vernier</last></author>
       <url hash="5fe9a711">L18-1652</url>
@@ -5750,8 +5750,8 @@
       <title><fixed-case>C</fixed-case>h<fixed-case>A</fixed-case>not: An Intelligent Annotation Tool for Indigenous and Highly Agglutinative Languages in <fixed-case>P</fixed-case>eru</title>
       <author><first>Rodolfo</first><last>Mercado-Gonzales</last></author>
       <author><first>José</first><last>Pereira-Noriega</last></author>
-      <author><first>Marco</first><last>Sobrevilla</last></author>
-      <author><first>Arturo</first><last>Oncevay</last></author>
+      <author id="marco-antonio-sobrevilla-cabezudo"><first>Marco</first><last>Sobrevilla</last></author>
+      <author id="arturo-oncevay"><first>Arturo</first><last>Oncevay</last></author>
       <url hash="f63f304e">L18-1655</url>
       <bibkey>mercado-gonzales-etal-2018-chanot</bibkey>
     </paper>
@@ -5766,28 +5766,28 @@
     <paper id="657">
       <title><fixed-case>ASR</fixed-case> for Documenting Acutely Under-Resourced Indigenous Languages</title>
       <author><first>Robbie</first><last>Jimerson</last></author>
-      <author><first>Emily</first><last>Prud’hommeaux</last></author>
+      <author id="emily-prudhommeaux"><first>Emily</first><last>Prud’hommeaux</last></author>
       <url hash="69a1e334">L18-1657</url>
       <bibkey>jimerson-prudhommeaux-2018-asr</bibkey>
     </paper>
     <paper id="658">
       <title>Building a Sentiment Corpus of Tweets in <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese</title>
       <author><first>Henrico</first><last>Brum</last></author>
-      <author><first>Maria das Graças</first><last>Volpe Nunes</last></author>
+      <author id="maria-das-gracas-volpe-nunes"><first>Maria das Graças</first><last>Volpe Nunes</last></author>
       <url hash="18820ecc">L18-1658</url>
       <bibkey>brum-volpe-nunes-2018-building</bibkey>
     </paper>
     <paper id="659">
       <title>‘Aye’ or ‘No’? Speech-level Sentiment Analysis of <fixed-case>H</fixed-case>ansard <fixed-case>UK</fixed-case> Parliamentary Debate Transcripts</title>
       <author><first>Gavin</first><last>Abercrombie</last></author>
-      <author><first>Riza</first><last>Batista-Navarro</last></author>
+      <author id="riza-theresa-batista-navarro"><first>Riza</first><last>Batista-Navarro</last></author>
       <url hash="99a72156">L18-1659</url>
       <bibkey>abercrombie-batista-navarro-2018-aye</bibkey>
     </paper>
     <paper id="660">
       <title>Scalable Visualisation of Sentiment and Stance</title>
       <author><first>Jon</first><last>Chamberlain</last></author>
-      <author><first>Udo</first><last>Kruschwitz</last></author>
+      <author id="udo-kruschwitz"><first>Udo</first><last>Kruschwitz</last></author>
       <author><first>Orland</first><last>Hoeber</last></author>
       <url hash="07a35905">L18-1660</url>
       <bibkey>chamberlain-etal-2018-scalable</bibkey>
@@ -5795,7 +5795,7 @@
     <paper id="661">
       <title><fixed-case>N</fixed-case>o<fixed-case>R</fixed-case>e<fixed-case>C</fixed-case>: The <fixed-case>N</fixed-case>orwegian Review Corpus</title>
       <author><first>Erik</first><last>Velldal</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <author><first>Eivind Alexander</first><last>Bergem</last></author>
       <author><first>Cathrine</first><last>Stadsnes</last></author>
       <author><first>Samia</first><last>Touileb</last></author>
@@ -5818,8 +5818,8 @@
       <author><first>Rodrigo</first><last>López</last></author>
       <author><first>Juanjosé</first><last>Tenorio</last></author>
       <author><first>Héctor</first><last>Gómez</last></author>
-      <author><first>Arturo</first><last>Oncevay-Marcos</last></author>
-      <author><first>Marco A.</first><last>Sobrevilla Cabezudo</last></author>
+      <author id="arturo-oncevay"><first>Arturo</first><last>Oncevay-Marcos</last></author>
+      <author id="marco-antonio-sobrevilla-cabezudo"><first>Marco A.</first><last>Sobrevilla Cabezudo</last></author>
       <url hash="2876ff88">L18-1663</url>
       <bibkey>penaloza-etal-2018-corpus</bibkey>
     </paper>
@@ -5876,7 +5876,7 @@
     <paper id="669">
       <title>Elicitation protocol and material for a corpus of long prepared monologues in Sign Language</title>
       <author><first>Michael</first><last>Filhol</last></author>
-      <author><first>Mohamed Nassime</first><last>Hadjadj</last></author>
+      <author id="mohamed-nassime-hadjadj"><first>Mohamed Nassime</first><last>Hadjadj</last></author>
       <url hash="2f922bfd">L18-1669</url>
       <bibkey>filhol-hadjadj-2018-elicitation</bibkey>
     </paper>
@@ -5889,15 +5889,15 @@
     </paper>
     <paper id="671">
       <title>Modeling <fixed-case>F</fixed-case>rench <fixed-case>S</fixed-case>ign <fixed-case>L</fixed-case>anguage: a proposal for a semantically compositional system</title>
-      <author><first>Mohamed Nassime</first><last>Hadjadj</last></author>
+      <author id="mohamed-nassime-hadjadj"><first>Mohamed Nassime</first><last>Hadjadj</last></author>
       <author><first>Michael</first><last>Filhol</last></author>
-      <author><first>Annelies</first><last>Braffort</last></author>
+      <author id="annelies-braffort"><first>Annelies</first><last>Braffort</last></author>
       <url hash="054860f9">L18-1671</url>
       <bibkey>hadjadj-etal-2018-modeling</bibkey>
     </paper>
     <paper id="672">
       <title>Construction of the Corpus of Everyday <fixed-case>J</fixed-case>apanese Conversation: An Interim Report</title>
-      <author><first>Hanae</first><last>Koiso</last></author>
+      <author id="hanae-koiso"><first>Hanae</first><last>Koiso</last></author>
       <author><first>Yasuharu</first><last>Den</last></author>
       <author><first>Yuriko</first><last>Iseki</last></author>
       <author><first>Wakako</first><last>Kashino</last></author>
@@ -5934,11 +5934,11 @@
     <paper id="674">
       <title>Parallel Corpora in <fixed-case>M</fixed-case>boshi (<fixed-case>B</fixed-case>antu <fixed-case>C</fixed-case>25, <fixed-case>C</fixed-case>ongo-<fixed-case>B</fixed-case>razzaville)</title>
       <author><first>Annie</first><last>Rialland</last></author>
-      <author><first>Martine</first><last>Adda-Decker</last></author>
-      <author><first>Guy-Noël</first><last>Kouarata</last></author>
-      <author><first>Gilles</first><last>Adda</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
-      <author><first>Lori</first><last>Lamel</last></author>
+      <author id="martine-adda-decker"><first>Martine</first><last>Adda-Decker</last></author>
+      <author id="guy-noel-kouarata"><first>Guy-Noël</first><last>Kouarata</last></author>
+      <author id="gilles-adda"><first>Gilles</first><last>Adda</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
+      <author id="lori-lamel"><first>Lori</first><last>Lamel</last></author>
       <author><first>Elodie</first><last>Gauthier</last></author>
       <author><first>Pierre</first><last>Godard</last></author>
       <author><first>Jamison</first><last>Cooper-Leavitt</last></author>
@@ -5947,7 +5947,7 @@
     </paper>
     <paper id="675">
       <title>A Multimodal Corpus of Expert Gaze and Behavior during Phonetic Segmentation Tasks</title>
-      <author><first>Arif</first><last>Khan</last></author>
+      <author id="mohammed-arif-khan"><first>Arif</first><last>Khan</last></author>
       <author><first>Ingmar</first><last>Steiner</last></author>
       <author><first>Yusuke</first><last>Sugano</last></author>
       <author><first>Andreas</first><last>Bulling</last></author>
@@ -5969,7 +5969,7 @@
       <author><first>Damien</first><last>Lolive</last></author>
       <author><first>Gaëlle</first><last>Vidal</last></author>
       <author><first>Marie</first><last>Tahon</last></author>
-      <author><first>Élisabeth</first><last>Delais-Roussarie</last></author>
+      <author id="elisabeth-delais-roussarie"><first>Élisabeth</first><last>Delais-Roussarie</last></author>
       <url hash="0551e863">L18-1677</url>
       <bibkey>sini-etal-2018-synpaflex</bibkey>
     </paper>
@@ -6008,15 +6008,15 @@
     <paper id="682">
       <title><fixed-case>VAST</fixed-case>: A Corpus of Video Annotation for Speech Technologies</title>
       <author><first>Jennifer</first><last>Tracey</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <url hash="71c79da1">L18-1682</url>
       <bibkey>tracey-strassel-2018-vast</bibkey>
     </paper>
     <paper id="683">
       <title>Edit me: A Corpus and a Framework for Understanding Natural Language Image Editing</title>
-      <author><first>Ramesh</first><last>Manuvinakurike</last></author>
+      <author id="ramesh-manuvinakurike"><first>Ramesh</first><last>Manuvinakurike</last></author>
       <author><first>Jacqueline</first><last>Brixey</last></author>
-      <author><first>Trung</first><last>Bui</last></author>
+      <author id="trung-bui"><first>Trung</first><last>Bui</last></author>
       <author><first>Walter</first><last>Chang</last></author>
       <author><first>Doo Soon</first><last>Kim</last></author>
       <author><first>Ron</first><last>Artstein</last></author>
@@ -6028,14 +6028,14 @@
       <title>Enriching a Lexicon of Discourse Connectives with Corpus-based Data</title>
       <author><first>Anna</first><last>Feltracco</last></author>
       <author><first>Elisabetta</first><last>Jezek</last></author>
-      <author><first>Bernardo</first><last>Magnini</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
       <url hash="b0dafca6">L18-1684</url>
       <bibkey>feltracco-etal-2018-enriching</bibkey>
     </paper>
     <paper id="685">
       <title><fixed-case>S</fixed-case>im<fixed-case>PA</fixed-case>: A Sentence-Level Simplification Corpus for the Public Administration Domain</title>
-      <author><first>Carolina</first><last>Scarton</last></author>
-      <author><first>Gustavo</first><last>Paetzold</last></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last></author>
+      <author id="gustavo-paetzold"><first>Gustavo</first><last>Paetzold</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <url hash="0d69dcc5">L18-1685</url>
       <bibkey>scarton-etal-2018-simpa</bibkey>
@@ -6067,7 +6067,7 @@
     <paper id="689">
       <title>The <fixed-case>G</fixed-case>erman Reference Corpus <fixed-case>D</fixed-case>e<fixed-case>R</fixed-case>e<fixed-case>K</fixed-case>o: New Developments – New Opportunities</title>
       <author><first>Marc</first><last>Kupietz</last></author>
-      <author><first>Harald</first><last>Lüngen</last></author>
+      <author id="harald-lungen"><first>Harald</first><last>Lüngen</last></author>
       <author><first>Paweł</first><last>Kamocki</last></author>
       <author><first>Andreas</first><last>Witt</last></author>
       <url hash="3c17d188">L18-1689</url>
@@ -6077,7 +6077,7 @@
       <title><fixed-case>R</fixed-case>isamálheild: A Very Large <fixed-case>I</fixed-case>celandic Text Corpus</title>
       <author><first>Steinþór</first><last>Steingrímsson</last></author>
       <author><first>Sigrún</first><last>Helgadóttir</last></author>
-      <author><first>Eiríkur</first><last>Rögnvaldsson</last></author>
+      <author id="eirikur-rognvaldsson"><first>Eiríkur</first><last>Rögnvaldsson</last></author>
       <author><first>Starkaður</first><last>Barkarson</last></author>
       <author><first>Jón</first><last>Guðnason</last></author>
       <url hash="9461d851">L18-1690</url>
@@ -6086,14 +6086,14 @@
     <paper id="691">
       <title><fixed-case>T</fixed-case>ri<fixed-case>MED</fixed-case>: A Multilingual Terminological Database</title>
       <author><first>Federica</first><last>Vezzani</last></author>
-      <author><first>Giorgio Maria</first><last>Di Nunzio</last></author>
+      <author id="giorgio-maria-di-nunzio"><first>Giorgio Maria</first><last>Di Nunzio</last></author>
       <author><first>Geneviève</first><last>Henrot</last></author>
       <url hash="6f162a57">L18-1691</url>
       <bibkey>vezzani-etal-2018-trimed</bibkey>
     </paper>
     <paper id="692">
       <title>Preparation and Usage of <fixed-case>X</fixed-case>hosa Lexicographical Data for a Multilingual, Federated Environment</title>
-      <author><first>Sonja</first><last>Bosch</last></author>
+      <author id="sonja-bosch"><first>Sonja</first><last>Bosch</last></author>
       <author><first>Thomas</first><last>Eckart</last></author>
       <author><first>Bettina</first><last>Klimek</last></author>
       <author><first>Dirk</first><last>Goldhahn</last></author>
@@ -6104,7 +6104,7 @@
     <paper id="693">
       <title>A Lexicon of Discourse Markers for <fixed-case>P</fixed-case>ortuguese – <fixed-case>LDM</fixed-case>-<fixed-case>PT</fixed-case></title>
       <author><first>Amália</first><last>Mendes</last></author>
-      <author><first>Iria</first><last>del Rio</last></author>
+      <author id="iria-del-rio-gayo"><first>Iria</first><last>del Rio</last></author>
       <author><first>Manfred</first><last>Stede</last></author>
       <author><first>Felix</first><last>Dombek</last></author>
       <url hash="52b15085">L18-1693</url>
@@ -6112,7 +6112,7 @@
     </paper>
     <paper id="694">
       <title>One Language to rule them all: modelling Morphological Patterns in a Large Scale <fixed-case>I</fixed-case>talian Lexicon with <fixed-case>SWRL</fixed-case></title>
-      <author><first>Fahad</first><last>Khan</last></author>
+      <author id="fahad-khan"><first>Fahad</first><last>Khan</last></author>
       <author><first>Andrea</first><last>Bellandi</last></author>
       <author><first>Francesca</first><last>Frontini</last></author>
       <author><first>Monica</first><last>Monachini</last></author>
@@ -6135,8 +6135,8 @@
     <paper id="697">
       <title><fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et-Shp: Towards the Building of a Lexical Database for a <fixed-case>P</fixed-case>eruvian Minority Language</title>
       <author><first>Diego</first><last>Maguiño-Valencia</last></author>
-      <author><first>Arturo</first><last>Oncevay-Marcos</last></author>
-      <author><first>Marco A.</first><last>Sobrevilla Cabezudo</last></author>
+      <author id="arturo-oncevay"><first>Arturo</first><last>Oncevay-Marcos</last></author>
+      <author id="marco-antonio-sobrevilla-cabezudo"><first>Marco A.</first><last>Sobrevilla Cabezudo</last></author>
       <url hash="fc0abe6c">L18-1697</url>
       <bibkey>maguino-valencia-etal-2018-wordnet</bibkey>
     </paper>
@@ -6151,7 +6151,7 @@
     <paper id="699">
       <title>Transforming <fixed-case>W</fixed-case>ikipedia into a Large-Scale Fine-Grained Entity Type Corpus</title>
       <author><first>Abbas</first><last>Ghaddar</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <url hash="25d17825">L18-1699</url>
       <bibkey>ghaddar-langlais-2018-transforming</bibkey>
     </paper>
@@ -6212,7 +6212,7 @@
     <paper id="706">
       <title><fixed-case>M</fixed-case>-<fixed-case>CNER</fixed-case>: A Corpus for <fixed-case>C</fixed-case>hinese Named Entity Recognition in Multi-Domains</title>
       <author><first>Qi</first><last>Lu</last></author>
-      <author><first>YaoSheng</first><last>Yang</last></author>
+      <author id="yaosheng-yang"><first>YaoSheng</first><last>Yang</last></author>
       <author><first>Zhenghua</first><last>Li</last></author>
       <author><first>Wenliang</first><last>Chen</last></author>
       <author><first>Min</first><last>Zhang</last></author>
@@ -6225,7 +6225,7 @@
       <author><first>Jiaqi</first><last>Wu</last></author>
       <author><first>Shereen</first><last>Oraby</last></author>
       <author><first>Amita</first><last>Misra</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <url hash="c1a1d755">L18-1707</url>
       <bibkey>bowden-etal-2018-slugnerds</bibkey>
     </paper>
@@ -6246,12 +6246,12 @@
     </paper>
     <paper id="710">
       <title>The <fixed-case>LIA</fixed-case> Treebank of Spoken <fixed-case>N</fixed-case>orwegian Dialects</title>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <author><first>Andre</first><last>Kåsen</last></author>
       <author><first>Kristin</first><last>Hagen</last></author>
       <author><first>Anders</first><last>Nøklestad</last></author>
       <author><first>Per Erik</first><last>Solberg</last></author>
-      <author><first>Janne Bondi</first><last>Johannessen</last></author>
+      <author id="janne-bondi-johannessen"><first>Janne Bondi</first><last>Johannessen</last></author>
       <url hash="ee12a60f">L18-1710</url>
       <bibkey>ovrelid-etal-2018-lia</bibkey>
     </paper>
@@ -6274,19 +6274,19 @@
     <paper id="713">
       <title><fixed-case>C</fixed-case>zech Legal Text Treebank 2.0</title>
       <author><first>Vincent</first><last>Kríž</last></author>
-      <author><first>Barbora</first><last>Hladká</last></author>
+      <author id="barbora-hladka"><first>Barbora</first><last>Hladká</last></author>
       <url hash="a7077d1b">L18-1713</url>
       <bibkey>kriz-hladka-2018-czech</bibkey>
     </paper>
     <paper id="714">
       <title>Creation of a Balanced State-of-the-Art Multilayer Corpus for <fixed-case>NLU</fixed-case></title>
-      <author><first>Normunds</first><last>Gruzitis</last></author>
-      <author><first>Lauma</first><last>Pretkalnina</last></author>
-      <author><first>Baiba</first><last>Saulite</last></author>
+      <author id="normunds-gruzitis"><first>Normunds</first><last>Gruzitis</last></author>
+      <author id="lauma-pretkalnina"><first>Lauma</first><last>Pretkalnina</last></author>
+      <author id="baiba-saulite"><first>Baiba</first><last>Saulite</last></author>
       <author><first>Laura</first><last>Rituma</last></author>
-      <author><first>Gunta</first><last>Nespore-Berzkalne</last></author>
-      <author><first>Arturs</first><last>Znotins</last></author>
-      <author><first>Peteris</first><last>Paikens</last></author>
+      <author id="gunta-nespore"><first>Gunta</first><last>Nespore-Berzkalne</last></author>
+      <author id="arturs-znotins"><first>Arturs</first><last>Znotins</last></author>
+      <author id="peteris-paikens"><first>Peteris</first><last>Paikens</last></author>
       <url hash="99cd1088">L18-1714</url>
       <bibkey>gruzitis-etal-2018-creation</bibkey>
     </paper>
@@ -6301,8 +6301,8 @@
       <title>Adding Syntactic Annotations to Flickr30k Entities Corpus for Multimodal Ambiguous Prepositional-Phrase Attachment Resolution</title>
       <author><first>Sebastien</first><last>Delecraz</last></author>
       <author><first>Alexis</first><last>Nasr</last></author>
-      <author><first>Frederic</first><last>Bechet</last></author>
-      <author><first>Benoit</first><last>Favre</last></author>
+      <author id="frederic-bechet"><first>Frederic</first><last>Bechet</last></author>
+      <author id="benoit-favre"><first>Benoit</first><last>Favre</last></author>
       <url hash="f1a968b9">L18-1716</url>
       <bibkey>delecraz-etal-2018-adding</bibkey>
     </paper>
@@ -6317,11 +6317,11 @@
     </paper>
     <paper id="718">
       <title>Cheating a Parser to Death: Data-driven Cross-Treebank Annotation Transfer</title>
-      <author><first>Djamé</first><last>Seddah</last></author>
-      <author><first>Eric</first><last>de la Clergerie</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
-      <author><first>Héctor</first><last>Martínez Alonso</last></author>
-      <author><first>Marie</first><last>Candito</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Eric</first><last>de la Clergerie</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
+      <author id="hector-martinez-alonso"><first>Héctor</first><last>Martínez Alonso</last></author>
+      <author id="marie-candito"><first>Marie</first><last>Candito</last></author>
       <url hash="2ad00295">L18-1718</url>
       <bibkey>seddah-etal-2018-cheating</bibkey>
     </paper>
@@ -6329,7 +6329,7 @@
       <title><fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies and Quantitative Typological Trends. A Case Study on Word Order</title>
       <author><first>Chiara</first><last>Alzetta</last></author>
       <author><first>Felice</first><last>Dell’Orletta</last></author>
-      <author><first>Simonetta</first><last>Montemagni</last></author>
+      <author id="simonetta-montemagni"><first>Simonetta</first><last>Montemagni</last></author>
       <author><first>Giulia</first><last>Venturi</last></author>
       <url hash="5c18ff9e">L18-1719</url>
       <bibkey>alzetta-etal-2018-universal</bibkey>
@@ -6343,7 +6343,7 @@
     </paper>
     <paper id="721">
       <title>Interoperability of Language-related Information: Mapping the <fixed-case>BLL</fixed-case> Thesaurus to Lexvo and Glottolog</title>
-      <author><first>Vanya</first><last>Dimitrova</last></author>
+      <author id="vania-dimitrova"><first>Vanya</first><last>Dimitrova</last></author>
       <author><first>Christian</first><last>Fäth</last></author>
       <author><first>Christian</first><last>Chiarcos</last></author>
       <author><first>Heike</first><last>Renner-Westermann</last></author>
@@ -6353,10 +6353,10 @@
     </paper>
     <paper id="722">
       <title>Browsing and Supporting Pluricentric Global <fixed-case>W</fixed-case>ordnet, or just your <fixed-case>W</fixed-case>ordnet of Interest</title>
-      <author><first>António</first><last>Branco</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
       <author><first>Ruben</first><last>Branco</last></author>
       <author><first>Chakaveh</first><last>Saedi</last></author>
-      <author><first>João</first><last>Silva</last></author>
+      <author id="joao-silva"><first>João</first><last>Silva</last></author>
       <url hash="c597ad68">L18-1722</url>
       <bibkey>branco-etal-2018-browsing</bibkey>
     </paper>
@@ -6364,20 +6364,20 @@
       <title>Cross-checking <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et and <fixed-case>SUMO</fixed-case> Using Meronymy</title>
       <author><first>Javier</first><last>Álvez</last></author>
       <author><first>Itziar</first><last>Gonzalez-Dios</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <url hash="d5fc92a2">L18-1723</url>
       <bibkey>alvez-etal-2018-cross</bibkey>
     </paper>
     <paper id="724">
       <title>Extended <fixed-case>H</fixed-case>ow<fixed-case>N</fixed-case>et 2.0 – An Entity-Relation Common-Sense Representation Model</title>
-      <author><first>Wei-Yun</first><last>Ma</last></author>
+      <author id="wei-yun-ma"><first>Wei-Yun</first><last>Ma</last></author>
       <author><first>Yueh-Yin</first><last>Shih</last></author>
       <url hash="fde4fc26">L18-1724</url>
       <bibkey>ma-shih-2018-extended</bibkey>
     </paper>
     <paper id="725">
       <title>The Circumstantial Event Ontology (<fixed-case>CEO</fixed-case>) and <fixed-case>ECB</fixed-case>+/<fixed-case>CEO</fixed-case>: an Ontology and Corpus for Implicit Causal Relations between Events</title>
-      <author><first>Roxane</first><last>Segers</last></author>
+      <author id="roxane-segers"><first>Roxane</first><last>Segers</last></author>
       <author><first>Tommaso</first><last>Caselli</last></author>
       <author><first>Piek</first><last>Vossen</last></author>
       <url hash="724721af">L18-1725</url>
@@ -6387,7 +6387,7 @@
       <title>Profiling Medical Journal Articles Using a Gene Ontology Semantic Tagger</title>
       <author><first>Mahmoud</first><last>El-Haj</last></author>
       <author><first>Paul</first><last>Rayson</last></author>
-      <author><first>Scott</first><last>Piao</last></author>
+      <author id="scott-s-l-piao"><first>Scott</first><last>Piao</last></author>
       <author><first>Jo</first><last>Knight</last></author>
       <url hash="e100f25b">L18-1726</url>
       <bibkey>el-haj-etal-2018-profiling</bibkey>
@@ -6403,7 +6403,7 @@
       <title><fixed-case>I</fixed-case>ndian Language Wordnets and their Linkages with <fixed-case>P</fixed-case>rinceton <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et</title>
       <author><first>Diptesh</first><last>Kanojia</last></author>
       <author><first>Kevin</first><last>Patel</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <url hash="049fbb12">L18-1728</url>
       <bibkey>kanojia-etal-2018-indian</bibkey>
     </paper>
diff --git a/data/xml/M91.xml b/data/xml/M91.xml
index 8dd46a2d60..e29aac8344 100644
--- a/data/xml/M91.xml
+++ b/data/xml/M91.xml
@@ -12,25 +12,25 @@
     </frontmatter>
     <paper id="1">
       <title>Overview of the Third <fixed-case>M</fixed-case>essage <fixed-case>U</fixed-case>nderstanding <fixed-case>E</fixed-case>valuation and <fixed-case>C</fixed-case>onference</title>
-      <author><first>Beth M.</first><last>Sundheim</last></author>
+      <author id="beth-m-sundheim"><first>Beth M.</first><last>Sundheim</last></author>
       <url hash="d7242d94">M91-1001</url>
       <bibkey>sundheim-1991-overview</bibkey>
     </paper>
     <paper id="2">
       <title><fixed-case>MUC</fixed-case>-3 Evaluation Metrics</title>
-      <author><first>Nancy</first><last>Chinchor</last></author>
+      <author id="nancy-chinchor"><first>Nancy</first><last>Chinchor</last></author>
       <url hash="9f3de5e4">M91-1002</url>
       <bibkey>chinchor-1991-muc</bibkey>
     </paper>
     <paper id="3">
       <title>Comparing <fixed-case>MUCK</fixed-case>-<fixed-case>II</fixed-case> and <fixed-case>MUC</fixed-case>-3: Assessing the Difficulty of Different Tasks</title>
-      <author><first>Lynette</first><last>Hirschman</last></author>
+      <author id="lynette-hirschman"><first>Lynette</first><last>Hirschman</last></author>
       <url hash="a8b220d0">M91-1003</url>
       <bibkey>hirschman-1991-comparing</bibkey>
     </paper>
     <paper id="4">
       <title><fixed-case>MUC</fixed-case>-3 Linguistic Phenomena Test Experiment</title>
-      <author><first>Nancy</first><last>Chinchor</last></author>
+      <author id="nancy-chinchor"><first>Nancy</first><last>Chinchor</last></author>
       <url hash="295bb34f">M91-1004</url>
       <bibkey>chinchor-1991-muc-3</bibkey>
     </paper>
@@ -43,20 +43,20 @@
     </paper>
     <paper id="6">
       <title><fixed-case>BBN</fixed-case> <fixed-case>PLUM</fixed-case>: <fixed-case>MUC</fixed-case>-3 Test Results and Analysis</title>
-      <author><first>Ralph</first><last>Weischedel</last></author>
-      <author><first>Damaris</first><last>Ayuso</last></author>
-      <author><first>Sean</first><last>Boisen</last></author>
-      <author><first>Robert</first><last>Ingria</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
+      <author id="damaris-ayuso"><first>Damaris</first><last>Ayuso</last></author>
+      <author id="sean-boisen"><first>Sean</first><last>Boisen</last></author>
+      <author id="robert-ingria"><first>Robert</first><last>Ingria</last></author>
       <author><first>Jeff</first><last>Palmucci</last></author>
       <url hash="daeaf592">M91-1006</url>
       <bibkey>weischedel-etal-1991-bbn</bibkey>
     </paper>
     <paper id="7">
       <title><fixed-case>GE</fixed-case> <fixed-case>NLT</fixed-case>oolset: <fixed-case>MUC</fixed-case>-3 Test Results and Analysis</title>
-      <author><first>George</first><last>Krupka</last></author>
+      <author id="george-krupka"><first>George</first><last>Krupka</last></author>
       <author><first>Lucja</first><last>Iwariska</last></author>
-      <author><first>Paul</first><last>Jacobs</last></author>
-      <author><first>Lisa</first><last>Rau</last></author>
+      <author id="paul-s-jacobs"><first>Paul</first><last>Jacobs</last></author>
+      <author id="lisa-rau"><first>Lisa</first><last>Rau</last></author>
       <url hash="714a9fee">M91-1007</url>
       <bibkey>krupka-etal-1991-ge</bibkey>
     </paper>
@@ -68,7 +68,7 @@
     </paper>
     <paper id="9">
       <title><fixed-case>H</fixed-case>ughes <fixed-case>T</fixed-case>rainable <fixed-case>T</fixed-case>ext <fixed-case>S</fixed-case>kimmer: <fixed-case>MUC</fixed-case>-3 Test Results and Analysis</title>
-      <author><first>Charles P.</first><last>Dolan</last></author>
+      <author id="charles-p-dolan"><first>Charles P.</first><last>Dolan</last></author>
       <author><first>Seth R.</first><last>Goldman</last></author>
       <author><first>Thomas V.</first><last>Cuda</last></author>
       <author><first>Alan M.</first><last>Nakamura</last></author>
@@ -77,19 +77,19 @@
     </paper>
     <paper id="10">
       <title><fixed-case>ITP</fixed-case> <fixed-case>I</fixed-case>nterpretext System: <fixed-case>MUC</fixed-case>-3 Test Results and Analysis</title>
-      <author><first>Kathleen</first><last>Dahlgren</last></author>
+      <author id="kathleen-dahlgren"><first>Kathleen</first><last>Dahlgren</last></author>
       <author><first>Carol</first><last>Lord</last></author>
       <author><first>Hajime</first><last>Wada</last></author>
-      <author><first>Joyce</first><last>McDowell</last></author>
-      <author><first>Edward P.</first><last>Stabler, Jr.</last></author>
+      <author id="joyce-mcdowell"><first>Joyce</first><last>McDowell</last></author>
+      <author id="edward-stabler"><first>Edward P.</first><last>Stabler, Jr.</last></author>
       <url hash="53d035cf">M91-1010</url>
       <bibkey>dahlgren-etal-1991-itp</bibkey>
     </paper>
     <paper id="11">
       <title><fixed-case>L</fixed-case>anguage <fixed-case>S</fixed-case>ystems, <fixed-case>I</fixed-case>nc.<fixed-case>MUC</fixed-case>-3 Test Results and Analysis</title>
-      <author><first>Christine A.</first><last>Montgomery</last></author>
-      <author><first>Bonnie Glover</first><last>Stalls</last></author>
-      <author><first>Robert S.</first><last>Belvin</last></author>
+      <author id="christine-a-montgomery"><first>Christine A.</first><last>Montgomery</last></author>
+      <author id="bonnie-glover-stalls"><first>Bonnie Glover</first><last>Stalls</last></author>
+      <author id="robert-s-belvin"><first>Robert S.</first><last>Belvin</last></author>
       <author><first>Robert E.</first><last>Stumberger</last></author>
       <url hash="5e67b517">M91-1011</url>
       <bibkey>montgomery-etal-1991-language</bibkey>
@@ -103,9 +103,9 @@
     </paper>
     <paper id="13">
       <title><fixed-case>N</fixed-case>ew <fixed-case>Y</fixed-case>ork <fixed-case>U</fixed-case>niversity <fixed-case>PROTEUS</fixed-case> System: <fixed-case>MUC</fixed-case>-3 Test Results and Analysis</title>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <author><first>John</first><last>Sterling</last></author>
-      <author><first>Catherine</first><last>Macleod</last></author>
+      <author id="catherine-macleod"><first>Catherine</first><last>Macleod</last></author>
       <url hash="b38e8059">M91-1013</url>
       <bibkey>grishman-etal-1991-new</bibkey>
     </paper>
@@ -117,7 +117,7 @@
     </paper>
     <paper id="15">
       <title><fixed-case>SRI</fixed-case> <fixed-case>I</fixed-case>nternational’s <fixed-case>TACITUS</fixed-case> System: <fixed-case>MUC</fixed-case>-3 Test Results and Analysis</title>
-      <author><first>Jerry R.</first><last>Hobbs</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry R.</first><last>Hobbs</last></author>
       <url hash="955ddf7a">M91-1015</url>
       <bibkey>hobbs-1991-sri</bibkey>
     </paper>
@@ -133,16 +133,16 @@
       <author><first>Carl</first><last>Weir</last></author>
       <author><first>Robin</first><last>McEntire</last></author>
       <author><first>Barry</first><last>Silk</last></author>
-      <author><first>Tim</first><last>Finin</last></author>
+      <author id="tim-finin"><first>Tim</first><last>Finin</last></author>
       <url hash="0c90644b">M91-1017</url>
       <bibkey>weir-etal-1991-unisys</bibkey>
     </paper>
     <paper id="18">
       <title><fixed-case>U</fixed-case>niversity of <fixed-case>M</fixed-case>assachusetts: <fixed-case>MUC</fixed-case>-3 Test Results and Analysis</title>
-      <author><first>Wendy</first><last>Lehnert</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
-      <author><first>David</first><last>Fisher</last></author>
-      <author><first>Ellen</first><last>Riloff</last></author>
+      <author id="wendy-lehnert"><first>Wendy</first><last>Lehnert</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
+      <author id="david-fisher"><first>David</first><last>Fisher</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
       <author><first>Robert</first><last>Williams</last></author>
       <url hash="a3de6d82">M91-1018</url>
       <bibkey>lehnert-etal-1991-university</bibkey>
@@ -163,20 +163,20 @@
     </paper>
     <paper id="21">
       <title><fixed-case>BBN</fixed-case>: Description of the <fixed-case>PLUM</fixed-case> System as Used for<fixed-case>MUC</fixed-case>-3</title>
-      <author><first>Ralph</first><last>Weischedel</last></author>
-      <author><first>Damaris</first><last>Ayuso</last></author>
-      <author><first>Sean</first><last>Boisen</last></author>
-      <author><first>Robert</first><last>Ingria</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
+      <author id="damaris-ayuso"><first>Damaris</first><last>Ayuso</last></author>
+      <author id="sean-boisen"><first>Sean</first><last>Boisen</last></author>
+      <author id="robert-ingria"><first>Robert</first><last>Ingria</last></author>
       <author><first>Jeff</first><last>Palmucci</last></author>
       <url hash="89ee160d">M91-1021</url>
       <bibkey>weischedel-etal-1991-bbn-description</bibkey>
     </paper>
     <paper id="22">
       <title><fixed-case>GE</fixed-case>: Description of the <fixed-case>NLT</fixed-case>oolset System as Used for <fixed-case>MUC</fixed-case>-3</title>
-      <author><first>George</first><last>Krupka</last></author>
-      <author><first>Paul</first><last>Jacobs</last></author>
-      <author><first>Lisa</first><last>Rau</last></author>
-      <author><first>Lucja</first><last>Iwanska</last></author>
+      <author id="george-krupka"><first>George</first><last>Krupka</last></author>
+      <author id="paul-s-jacobs"><first>Paul</first><last>Jacobs</last></author>
+      <author id="lisa-rau"><first>Lisa</first><last>Rau</last></author>
+      <author id="lucja-iwanska"><first>Lucja</first><last>Iwanska</last></author>
       <url hash="408d9d24">M91-1022</url>
       <bibkey>krupka-etal-1991-ge-description</bibkey>
     </paper>
@@ -188,7 +188,7 @@
     </paper>
     <paper id="24">
       <title><fixed-case>H</fixed-case>ughes <fixed-case>T</fixed-case>rainable <fixed-case>T</fixed-case>ext <fixed-case>S</fixed-case>kimmer: Description of the <fixed-case>TTS</fixed-case> System as Used for <fixed-case>MUC</fixed-case>-3</title>
-      <author><first>Charles P.</first><last>Dolan</last></author>
+      <author id="charles-p-dolan"><first>Charles P.</first><last>Dolan</last></author>
       <author><first>Seth R.</first><last>Goldman</last></author>
       <author><first>Thomas V.</first><last>Cuda</last></author>
       <author><first>Alan M.</first><last>Nakamura</last></author>
@@ -197,19 +197,19 @@
     </paper>
     <paper id="25">
       <title><fixed-case>ITP</fixed-case>: Description of the <fixed-case>I</fixed-case>nterpretext System as Used for <fixed-case>MUC</fixed-case>-3</title>
-      <author><first>Kathleen</first><last>Dahlgren</last></author>
+      <author id="kathleen-dahlgren"><first>Kathleen</first><last>Dahlgren</last></author>
       <author><first>Carol</first><last>Lord</last></author>
       <author><first>Hajime</first><last>Wada</last></author>
-      <author><first>Joyce</first><last>McDowell</last></author>
+      <author id="joyce-mcdowell"><first>Joyce</first><last>McDowell</last></author>
       <author><first>Jr.</first><last>Edward P. Stabler</last></author>
       <url hash="a7058209">M91-1025</url>
       <bibkey>dahlgren-etal-1991-itp-description</bibkey>
     </paper>
     <paper id="26">
       <title><fixed-case>L</fixed-case>anguage <fixed-case>S</fixed-case>ystems, <fixed-case>I</fixed-case>nc. Description of the <fixed-case>DBG</fixed-case> System as Used for <fixed-case>MUC</fixed-case>-3</title>
-      <author><first>Christine A.</first><last>Montgomery</last></author>
-      <author><first>Bonnie Glover</first><last>Stalls</last></author>
-      <author><first>Robert S.</first><last>Belvin</last></author>
+      <author id="christine-a-montgomery"><first>Christine A.</first><last>Montgomery</last></author>
+      <author id="bonnie-glover-stalls"><first>Bonnie Glover</first><last>Stalls</last></author>
+      <author id="robert-s-belvin"><first>Robert S.</first><last>Belvin</last></author>
       <author><first>Robert E.</first><last>Stumberger</last></author>
       <url hash="32469aaf">M91-1026</url>
       <bibkey>montgomery-etal-1991-language-systems</bibkey>
@@ -223,9 +223,9 @@
     </paper>
     <paper id="28">
       <title><fixed-case>N</fixed-case>ew <fixed-case>Y</fixed-case>ork <fixed-case>U</fixed-case>niversity: Description of the <fixed-case>PROTEUS</fixed-case> System as Used for <fixed-case>MUC</fixed-case>-3</title>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <author><first>John</first><last>Sterling</last></author>
-      <author><first>Catherine</first><last>Macleod</last></author>
+      <author id="catherine-macleod"><first>Catherine</first><last>Macleod</last></author>
       <url hash="583c0969">M91-1028</url>
       <bibkey>grishman-etal-1991-new-york</bibkey>
     </paper>
@@ -237,7 +237,7 @@
     </paper>
     <paper id="30">
       <title><fixed-case>SRI</fixed-case> <fixed-case>I</fixed-case>nternational: Description of the <fixed-case>TACITUS</fixed-case> System as Used for <fixed-case>MUC</fixed-case>-3</title>
-      <author><first>Jerry R.</first><last>Hobbs</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry R.</first><last>Hobbs</last></author>
       <url hash="56a98580">M91-1030</url>
       <bibkey>hobbs-1991-sri-international</bibkey>
     </paper>
@@ -250,7 +250,7 @@
     <paper id="32">
       <title><fixed-case>U</fixed-case>nisys: Description of the <fixed-case>U</fixed-case>nisys System Used for <fixed-case>MUC</fixed-case>-3</title>
       <author><first>Carl</first><last>Weir</last></author>
-      <author><first>Tim</first><last>Finin</last></author>
+      <author id="tim-finin"><first>Tim</first><last>Finin</last></author>
       <author><first>Robin</first><last>McEntire</last></author>
       <author><first>Barry</first><last>Silk</last></author>
       <url hash="938f0b0a">M91-1032</url>
@@ -258,10 +258,10 @@
     </paper>
     <paper id="33">
       <title><fixed-case>U</fixed-case>niversity of <fixed-case>M</fixed-case>assachusetts: Description of the <fixed-case>CIRCUS</fixed-case> System as Used for <fixed-case>MUC</fixed-case>-3</title>
-      <author><first>Wendy</first><last>Lehnert</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
-      <author><first>David</first><last>Fisher</last></author>
-      <author><first>Ellen</first><last>Riloff</last></author>
+      <author id="wendy-lehnert"><first>Wendy</first><last>Lehnert</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
+      <author id="david-fisher"><first>David</first><last>Fisher</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
       <author><first>Robert</first><last>Williams</last></author>
       <url hash="a66f9654">M91-1033</url>
       <bibkey>lehnert-etal-1991-university-massachusetts</bibkey>
@@ -275,21 +275,21 @@
     </paper>
     <paper id="35">
       <title>Data Extraction as Text Categorization: An Experiment With the <fixed-case>MUC</fixed-case>-3 Corpus</title>
-      <author><first>David D.</first><last>Lewis</last></author>
+      <author id="david-d-lewis"><first>David D.</first><last>Lewis</last></author>
       <url hash="77ba4094">M91-1035</url>
       <bibkey>lewis-1991-data</bibkey>
     </paper>
     <paper id="36">
       <title>Computational Aspects of Discourse in the Context of <fixed-case>MUC</fixed-case>-3</title>
-      <author><first>Lucja</first><last>Iwanska</last></author>
-      <author><first>Douglas</first><last>Appelt</last></author>
-      <author><first>Damaris</first><last>Ayuso</last></author>
+      <author id="lucja-iwanska"><first>Lucja</first><last>Iwanska</last></author>
+      <author id="douglas-appelt"><first>Douglas</first><last>Appelt</last></author>
+      <author id="damaris-ayuso"><first>Damaris</first><last>Ayuso</last></author>
       <author><first>Kathy</first><last>Dahlgren</last></author>
-      <author><first>Bonnie Glover</first><last>Stalls</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
-      <author><first>George</first><last>Krupka</last></author>
-      <author><first>Christine</first><last>Montgomery</last></author>
-      <author><first>Ellen</first><last>Riloff</last></author>
+      <author id="bonnie-glover-stalls"><first>Bonnie Glover</first><last>Stalls</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
+      <author id="george-krupka"><first>George</first><last>Krupka</last></author>
+      <author id="christine-a-montgomery"><first>Christine</first><last>Montgomery</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
       <url hash="6ee7ffe2">M91-1036</url>
       <bibkey>iwanska-etal-1991-computational</bibkey>
     </paper>
diff --git a/data/xml/M92.xml b/data/xml/M92.xml
index dd4bdac94d..a403edbbc0 100644
--- a/data/xml/M92.xml
+++ b/data/xml/M92.xml
@@ -12,65 +12,65 @@
     </frontmatter>
     <paper id="1">
       <title>Overview of the Fourth <fixed-case>M</fixed-case>essage <fixed-case>U</fixed-case>nderstanding <fixed-case>E</fixed-case>valuation and <fixed-case>C</fixed-case>onference</title>
-      <author><first>Beth M.</first><last>Sundheim</last></author>
+      <author id="beth-m-sundheim"><first>Beth M.</first><last>Sundheim</last></author>
       <url hash="f53d3ab0">M92-1001</url>
       <bibkey>sundheim-1992-overview</bibkey>
     </paper>
     <paper id="2">
       <title><fixed-case>MUC</fixed-case>-4 Evaluation Metrics</title>
-      <author><first>Nancy</first><last>Chinchor</last></author>
+      <author id="nancy-chinchor"><first>Nancy</first><last>Chinchor</last></author>
       <url hash="0c985ed8">M92-1002</url>
       <bibkey>chinchor-1992-muc</bibkey>
     </paper>
     <paper id="3">
       <title>The Statistical Significance of the <fixed-case>MUC</fixed-case>-4 Results</title>
-      <author><first>Nancy</first><last>Chinchor</last></author>
+      <author id="nancy-chinchor"><first>Nancy</first><last>Chinchor</last></author>
       <bibkey>chinchor-1992-statistical</bibkey>
     </paper>
     <paper id="4">
       <title>Text Filtering in <fixed-case>B/IUC</fixed-case>-3 and <fixed-case>MUC</fixed-case>-4</title>
-      <author><first>David D.</first><last>Lewis</last></author>
+      <author id="david-d-lewis"><first>David D.</first><last>Lewis</last></author>
       <author><first>Richard M.</first><last>Tong</last></author>
       <url hash="9ccf6d49">M92-1004</url>
       <bibkey>lewis-tong-1992-text</bibkey>
     </paper>
     <paper id="5">
       <title>An Adjunct Test for Discourse Processing in <fixed-case>MUC</fixed-case>-4</title>
-      <author><first>Lynette</first><last>Hirschman</last></author>
+      <author id="lynette-hirschman"><first>Lynette</first><last>Hirschman</last></author>
       <url hash="0ffb689b">M92-1005</url>
       <bibkey>hirschman-1992-adjunct</bibkey>
     </paper>
     <paper id="6">
       <title><fixed-case>GE</fixed-case> Adjunct Test Report: Object-Oriented Design and Scoring for <fixed-case>MUC</fixed-case>-4</title>
-      <author><first>George</first><last>Krupka</last></author>
-      <author><first>Lisa</first><last>Rau</last></author>
+      <author id="george-krupka"><first>George</first><last>Krupka</last></author>
+      <author id="lisa-rau"><first>Lisa</first><last>Rau</last></author>
       <url hash="d0e9776e">M92-1006</url>
       <bibkey>krupka-rau-1992-ge</bibkey>
     </paper>
     <paper id="7">
       <title><fixed-case>BBN</fixed-case> <fixed-case>PLUM</fixed-case>: <fixed-case>MUC</fixed-case>-4 Test Results and Analysis</title>
-      <author><first>Ralph</first><last>Weischedel</last></author>
-      <author><first>Damaris</first><last>Ayuso</last></author>
-      <author><first>Sean</first><last>Boisen</last></author>
-      <author><first>Heidi</first><last>Fox</last></author>
-      <author><first>Herbert</first><last>Gish</last></author>
-      <author><first>Robert</first><last>Ingria</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
+      <author id="damaris-ayuso"><first>Damaris</first><last>Ayuso</last></author>
+      <author id="sean-boisen"><first>Sean</first><last>Boisen</last></author>
+      <author id="heidi-fox"><first>Heidi</first><last>Fox</last></author>
+      <author id="herbert-gish"><first>Herbert</first><last>Gish</last></author>
+      <author id="robert-ingria"><first>Robert</first><last>Ingria</last></author>
       <url hash="93d4bce5">M92-1007</url>
       <bibkey>weischedel-etal-1992-bbn</bibkey>
     </paper>
     <paper id="8">
       <title><fixed-case>GE</fixed-case> <fixed-case>NLT</fixed-case>oolset: <fixed-case>MUC</fixed-case>-4 Test Results and Analysis</title>
-      <author><first>Lisa</first><last>Rau</last></author>
-      <author><first>George</first><last>Krupka</last></author>
-      <author><first>Paul</first><last>Jacobs</last></author>
+      <author id="lisa-rau"><first>Lisa</first><last>Rau</last></author>
+      <author id="george-krupka"><first>George</first><last>Krupka</last></author>
+      <author id="paul-s-jacobs"><first>Paul</first><last>Jacobs</last></author>
       <url hash="8e6a25c0">M92-1008</url>
       <bibkey>rau-etal-1992-ge</bibkey>
     </paper>
     <paper id="9">
       <title><fixed-case>TIPSTER</fixed-case> <fixed-case>SHOGUN</fixed-case> System (Joint <fixed-case>GE-CMU</fixed-case>): <fixed-case>MUC</fixed-case>-4 Test Results and Analysis</title>
-      <author><first>George</first><last>Krupka</last></author>
-      <author><first>Paul</first><last>Jacobs</last></author>
-      <author><first>Michael</first><last>Mauldin</last></author>
+      <author id="george-krupka"><first>George</first><last>Krupka</last></author>
+      <author id="paul-s-jacobs"><first>Paul</first><last>Jacobs</last></author>
+      <author id="michael-l-mauldin"><first>Michael</first><last>Mauldin</last></author>
       <author><first>Todd</first><last>Kaufmann</last></author>
       <author><first>Ira</first><last>Sider</last></author>
       <url hash="6147d3bf">M92-1009</url>
@@ -79,17 +79,17 @@
     <paper id="10">
       <title><fixed-case>H</fixed-case>ughes <fixed-case>R</fixed-case>esearch <fixed-case>L</fixed-case>aboratories <fixed-case>T</fixed-case>rainable <fixed-case>T</fixed-case>ext <fixed-case>S</fixed-case>kimmer: <fixed-case>MUC</fixed-case>-4 Test Results and Analysis</title>
       <author><first>Stephanie E.</first><last>August</last></author>
-      <author><first>Charles P.</first><last>Dolan</last></author>
+      <author id="charles-p-dolan"><first>Charles P.</first><last>Dolan</last></author>
       <url hash="c6b27264">M92-1010</url>
       <bibkey>august-dolan-1992-hughes</bibkey>
     </paper>
     <paper id="11">
       <title><fixed-case>L</fixed-case>anguage <fixed-case>S</fixed-case>ystems, <fixed-case>I</fixed-case>nc.<fixed-case>MUC</fixed-case>-4 Test Results and Analysis</title>
-      <author><first>Christine A.</first><last>Montgomery</last></author>
-      <author><first>Bonnie Glover</first><last>Stalls</last></author>
+      <author id="christine-a-montgomery"><first>Christine A.</first><last>Montgomery</last></author>
+      <author id="bonnie-glover-stalls"><first>Bonnie Glover</first><last>Stalls</last></author>
       <author><first>Robert R.</first><last>Stumberger</last></author>
       <author><first>Naicong</first><last>Li</last></author>
-      <author><first>Robert S.</first><last>Belvin</last></author>
+      <author id="robert-s-belvin"><first>Robert S.</first><last>Belvin</last></author>
       <author><first>Alfredo</first><last>Arnaiz</last></author>
       <author><first>Susan B.</first><last>Hirsh</last></author>
       <url hash="65a7885b">M92-1011</url>
@@ -108,24 +108,24 @@
       <author id="john-d-burger"><first>John</first><last>Burger</last></author>
       <author><first>Dennis</first><last>Connolly</last></author>
       <author><first>Susan</first><last>Roberts</last></author>
-      <author><first>Marc</first><last>Vilain</last></author>
+      <author id="marc-vilain"><first>Marc</first><last>Vilain</last></author>
       <url hash="3acff8eb">M92-1013</url>
       <bibkey>aberdeen-etal-1992-mitre</bibkey>
     </paper>
     <paper id="14">
       <title><fixed-case>CRL</fixed-case>/<fixed-case>NMSU</fixed-case> and <fixed-case>B</fixed-case>randeis <fixed-case>M</fixed-case>uc<fixed-case>B</fixed-case>ruce: <fixed-case>MUC</fixed-case>-4 Test Results and Analysis</title>
-      <author><first>Jim</first><last>Cowie</last></author>
-      <author><first>Louise</first><last>Guthrie</last></author>
-      <author><first>Yorick</first><last>Wilks</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="jim-cowie"><first>Jim</first><last>Cowie</last></author>
+      <author id="louise-guthrie"><first>Louise</first><last>Guthrie</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <url hash="1937cc1f">M92-1014</url>
       <bibkey>cowie-etal-1992-crl</bibkey>
     </paper>
     <paper id="15">
       <title><fixed-case>N</fixed-case>ew <fixed-case>Y</fixed-case>ork <fixed-case>U</fixed-case>niversity <fixed-case>PROTEUS</fixed-case> System: <fixed-case>MUC</fixed-case>-4 Test Results and Analysis</title>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <author><first>John</first><last>Sterling</last></author>
-      <author><first>Catherine</first><last>Macleod</last></author>
+      <author id="catherine-macleod"><first>Catherine</first><last>Macleod</last></author>
       <url hash="2c662246">M92-1015</url>
       <bibkey>grishman-etal-1992-new</bibkey>
     </paper>
@@ -145,18 +145,18 @@
     <paper id="18">
       <title><fixed-case>SRA</fixed-case> <fixed-case>S</fixed-case>olomon: <fixed-case>MUC</fixed-case>-4 Test Results and Analysis</title>
       <author><first>Chinatsu</first><last>Aone</last></author>
-      <author><first>Doug</first><last>McKee</last></author>
+      <author id="douglas-mckee"><first>Doug</first><last>McKee</last></author>
       <author><first>Sandy</first><last>Shinn</last></author>
-      <author><first>Hatte</first><last>Blejer</last></author>
+      <author id="hatte-blejer"><first>Hatte</first><last>Blejer</last></author>
       <url hash="7f53098a">M92-1018</url>
       <bibkey>aone-etal-1992-sra</bibkey>
     </paper>
     <paper id="19">
       <title><fixed-case>SRI</fixed-case> <fixed-case>I</fixed-case>nternational <fixed-case>FASTUS</fixed-case> System <fixed-case>MUC</fixed-case>-4 Test Results and Analysis</title>
-      <author><first>Douglas E.</first><last>Appelt</last></author>
-      <author><first>John</first><last>Bear</last></author>
-      <author><first>Jerry R.</first><last>Hobbs</last></author>
-      <author><first>David</first><last>Israel</last></author>
+      <author id="douglas-appelt"><first>Douglas E.</first><last>Appelt</last></author>
+      <author id="john-bear"><first>John</first><last>Bear</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry R.</first><last>Hobbs</last></author>
+      <author id="david-israel"><first>David</first><last>Israel</last></author>
       <author><first>Mabry</first><last>Tyson</last></author>
       <url hash="d3280021">M92-1019</url>
       <bibkey>appelt-etal-1992-sri</bibkey>
@@ -180,11 +180,11 @@
     </paper>
     <paper id="22">
       <title>The <fixed-case>LINK</fixed-case> System: <fixed-case>MUC</fixed-case>-4 Test Results and Analysis</title>
-      <author><first>Steven L.</first><last>Lytinen</last></author>
+      <author id="steven-l-lytinen"><first>Steven L.</first><last>Lytinen</last></author>
       <author><first>Sayan</first><last>Bhattacharyya</last></author>
       <author><first>Robert R.</first><last>Burridge</last></author>
       <author><first>Peter M.</first><last>Hastings</last></author>
-      <author><first>Christian</first><last>Huyck</last></author>
+      <author id="christian-huyck"><first>Christian</first><last>Huyck</last></author>
       <author><first>Karen A.</first><last>Lipinsky</last></author>
       <author><first>Eric S.</first><last>McDaniel</last></author>
       <author><first>Karenann K.</first><last>Terrell</last></author>
@@ -204,49 +204,49 @@
     </paper>
     <paper id="24">
       <title><fixed-case>BBN</fixed-case>: Description of the <fixed-case>PLUM</fixed-case> System as Used for <fixed-case>MUC</fixed-case>-4</title>
-      <author><first>Damaris</first><last>Ayuso</last></author>
-      <author><first>Sean</first><last>Boisen</last></author>
-      <author><first>Heidi</first><last>Fox</last></author>
-      <author><first>Herb</first><last>Gish</last></author>
-      <author><first>Robert</first><last>Ingria</last></author>
-      <author><first>Ralph</first><last>Weischedel</last></author>
+      <author id="damaris-ayuso"><first>Damaris</first><last>Ayuso</last></author>
+      <author id="sean-boisen"><first>Sean</first><last>Boisen</last></author>
+      <author id="heidi-fox"><first>Heidi</first><last>Fox</last></author>
+      <author id="herbert-gish"><first>Herb</first><last>Gish</last></author>
+      <author id="robert-ingria"><first>Robert</first><last>Ingria</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
       <url hash="1e543f28">M92-1024</url>
       <bibkey>ayuso-etal-1992-bbn</bibkey>
     </paper>
     <paper id="25">
       <title><fixed-case>GE</fixed-case> <fixed-case>NLT</fixed-case>oolset: Description of the System as Used for <fixed-case>MUC</fixed-case>-4</title>
-      <author><first>George</first><last>Krupka</last></author>
-      <author><first>Paul</first><last>Jacobs</last></author>
-      <author><first>Lisa</first><last>Rau</last></author>
-      <author><first>Lois</first><last>Childs</last></author>
+      <author id="george-krupka"><first>George</first><last>Krupka</last></author>
+      <author id="paul-s-jacobs"><first>Paul</first><last>Jacobs</last></author>
+      <author id="lisa-rau"><first>Lisa</first><last>Rau</last></author>
+      <author id="lois-c-childs"><first>Lois</first><last>Childs</last></author>
       <author><first>Ira</first><last>Sider</last></author>
       <url hash="67f04d44">M92-1025</url>
       <bibkey>krupka-etal-1992-ge</bibkey>
     </paper>
     <paper id="26">
       <title><fixed-case>GE</fixed-case>-<fixed-case>CMU</fixed-case>: Description of the <fixed-case>TIPSTER/SHOGUN</fixed-case> System as Used for <fixed-case>MUC</fixed-case>-4</title>
-      <author><first>Paul</first><last>Jacobs</last></author>
-      <author><first>George</first><last>Krupka</last></author>
-      <author><first>Lisa</first><last>Rau</last></author>
+      <author id="paul-s-jacobs"><first>Paul</first><last>Jacobs</last></author>
+      <author id="george-krupka"><first>George</first><last>Krupka</last></author>
+      <author id="lisa-rau"><first>Lisa</first><last>Rau</last></author>
       <author><first>Todd</first><last>Kaufmann</last></author>
-      <author><first>Michael</first><last>Mauldin</last></author>
+      <author id="michael-l-mauldin"><first>Michael</first><last>Mauldin</last></author>
       <url hash="a0eaa176">M92-1026</url>
       <bibkey>jacobs-etal-1992-ge</bibkey>
     </paper>
     <paper id="27">
       <title><fixed-case>H</fixed-case>ughes <fixed-case>R</fixed-case>esearch <fixed-case>L</fixed-case>aboratories: Description of the <fixed-case>T</fixed-case>rainable <fixed-case>T</fixed-case>ext <fixed-case>S</fixed-case>kimmer Used for <fixed-case>MUC</fixed-case>-4</title>
       <author><first>Stephanie E.</first><last>August</last></author>
-      <author><first>Charles P.</first><last>Dolan</last></author>
+      <author id="charles-p-dolan"><first>Charles P.</first><last>Dolan</last></author>
       <url hash="f0fab5e6">M92-1027</url>
       <bibkey>august-dolan-1992-hughes-research</bibkey>
     </paper>
     <paper id="28">
       <title><fixed-case>L</fixed-case>anguage <fixed-case>S</fixed-case>ystems, <fixed-case>I</fixed-case>nc. Description of the <fixed-case>DBG</fixed-case> System as Used for <fixed-case>MUC</fixed-case>-4</title>
-      <author><first>Christine A.</first><last>Montgomery</last></author>
-      <author><first>Bonnie Glover</first><last>Stalls</last></author>
+      <author id="christine-a-montgomery"><first>Christine A.</first><last>Montgomery</last></author>
+      <author id="bonnie-glover-stalls"><first>Bonnie Glover</first><last>Stalls</last></author>
       <author><first>Robert E.</first><last>Stumberger</last></author>
       <author><first>Naicong</first><last>Li</last></author>
-      <author><first>Robert S.</first><last>Belvin</last></author>
+      <author id="robert-s-belvin"><first>Robert S.</first><last>Belvin</last></author>
       <author><first>Alfredo</first><last>Arnaiz</last></author>
       <author><first>Susan B.</first><last>Hirsh</last></author>
       <url hash="8e6fe06d">M92-1028</url>
@@ -265,22 +265,22 @@
       <author id="john-d-burger"><first>John</first><last>Burger</last></author>
       <author><first>Dennis</first><last>Connolly</last></author>
       <author><first>Susan</first><last>Roberts</last></author>
-      <author><first>Marc</first><last>Vilain</last></author>
+      <author id="marc-vilain"><first>Marc</first><last>Vilain</last></author>
       <url hash="66ee8e7f">M92-1030</url>
       <bibkey>aberdeen-etal-1992-mitre-bedford</bibkey>
     </paper>
     <paper id="31">
       <title><fixed-case>CRL</fixed-case>/<fixed-case>NMSU</fixed-case> and <fixed-case>B</fixed-case>randeis: Description of the <fixed-case>M</fixed-case>uc<fixed-case>B</fixed-case>ruce System as Used for <fixed-case>MUC</fixed-case>-4</title>
-      <author><first>Jim</first><last>Cowie</last></author>
-      <author><first>Louise</first><last>Guthrie</last></author>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="jim-cowie"><first>Jim</first><last>Cowie</last></author>
+      <author id="louise-guthrie"><first>Louise</first><last>Guthrie</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <url hash="7061e96b">M92-1031</url>
       <bibkey>cowie-etal-1992-crl-nmsu</bibkey>
     </paper>
     <paper id="32">
       <title><fixed-case>N</fixed-case>ew <fixed-case>Y</fixed-case>ork <fixed-case>U</fixed-case>niversity Description of the <fixed-case>PROTEUS</fixed-case> System as Used for <fixed-case>MUC</fixed-case>-4</title>
-      <author><first>Ralph</first><last>Grishman</last></author>
-      <author><first>Catherine</first><last>Macleod</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
+      <author id="catherine-macleod"><first>Catherine</first><last>Macleod</last></author>
       <author><first>John</first><last>Sterling</last></author>
       <url hash="b4734232">M92-1032</url>
       <bibkey>grishman-etal-1992-new-york</bibkey>
@@ -301,19 +301,19 @@
     <paper id="35">
       <title><fixed-case>SRA</fixed-case>: Description of the <fixed-case>S</fixed-case>olomon System as Used <fixed-case>F</fixed-case>or<fixed-case>MUC</fixed-case>-4</title>
       <author><first>Chinatsu</first><last>Aone</last></author>
-      <author><first>Doug</first><last>McKee</last></author>
+      <author id="douglas-mckee"><first>Doug</first><last>McKee</last></author>
       <author><first>Sandy</first><last>Shinn</last></author>
-      <author><first>Hatte</first><last>Blejer</last></author>
+      <author id="hatte-blejer"><first>Hatte</first><last>Blejer</last></author>
       <url hash="bf4f2c20">M92-1035</url>
       <bibkey>aone-etal-1992-sra-description</bibkey>
     </paper>
     <paper id="36">
       <title><fixed-case>SRI</fixed-case> International: Description of the <fixed-case>FASTUS</fixed-case> System Used for <fixed-case>MUC</fixed-case>-4</title>
-      <author><first>Jerry R.</first><last>Hobbs</last></author>
-      <author><first>Douglas</first><last>Appelt</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry R.</first><last>Hobbs</last></author>
+      <author id="douglas-appelt"><first>Douglas</first><last>Appelt</last></author>
       <author><first>Mabry</first><last>Tyson</last></author>
-      <author><first>John</first><last>Bear</last></author>
-      <author><first>David</first><last>Israel</last></author>
+      <author id="john-bear"><first>John</first><last>Bear</last></author>
+      <author id="david-israel"><first>David</first><last>Israel</last></author>
       <url hash="c4018e5d">M92-1036</url>
       <bibkey>hobbs-etal-1992-sri</bibkey>
     </paper>
@@ -336,11 +336,11 @@
     </paper>
     <paper id="39">
       <title>Description of the <fixed-case>LINK</fixed-case> System Used for <fixed-case>MUC</fixed-case>- 4</title>
-      <author><first>Steven L.</first><last>Lytinen</last></author>
+      <author id="steven-l-lytinen"><first>Steven L.</first><last>Lytinen</last></author>
       <author><first>Sayan</first><last>Bhattacharyya</last></author>
       <author><first>Robert R.</first><last>Burridge</last></author>
       <author><first>Peter M.</first><last>Hastings</last></author>
-      <author><first>Christian</first><last>Huyck</last></author>
+      <author id="christian-huyck"><first>Christian</first><last>Huyck</last></author>
       <author><first>Karen A.</first><last>Lipinsky</last></author>
       <author><first>Eric S.</first><last>McDaniel</last></author>
       <author><first>Karenann K.</first><last>Terrell</last></author>
diff --git a/data/xml/M93.xml b/data/xml/M93.xml
index d3f2798ac5..0170bff6ac 100644
--- a/data/xml/M93.xml
+++ b/data/xml/M93.xml
@@ -13,14 +13,14 @@
     <paper id="1">
       <title>Corpora and Data Preparation</title>
       <author><first>Lynn</first><last>Carlson</last></author>
-      <author><first>Boyan</first><last>Onyshkevych</last></author>
+      <author id="boyan-onyshkevych"><first>Boyan</first><last>Onyshkevych</last></author>
       <author><first>Mary Ellen</first><last>Okurowski</last></author>
       <url hash="48693573">M93-1001</url>
       <bibkey>carlson-etal-1993-corpora</bibkey>
     </paper>
     <paper id="2">
       <title>Tasks, Domains, and Languages</title>
-      <author><first>Boyan</first><last>Onyshkevych</last></author>
+      <author id="boyan-onyshkevych"><first>Boyan</first><last>Onyshkevych</last></author>
       <author><first>Mary Ellen</first><last>Okurowski</last></author>
       <author><first>Lynn</first><last>Carlson</last></author>
       <url hash="f3d643a1">M93-1002</url>
@@ -28,13 +28,13 @@
     </paper>
     <paper id="3">
       <title>Template Design for Information Extraction</title>
-      <author><first>Boyan</first><last>Onyshkevych</last></author>
+      <author id="boyan-onyshkevych"><first>Boyan</first><last>Onyshkevych</last></author>
       <url hash="61fa9bfc">M93-1003</url>
       <bibkey>onyshkevych-1993-template</bibkey>
     </paper>
     <paper id="4">
       <title><fixed-case>TIPSTER</fixed-case>/<fixed-case>MUC</fixed-case>-5 Information Extraction System Evaluation</title>
-      <author><first>Beth M.</first><last>Sundheim</last></author>
+      <author id="beth-m-sundheim"><first>Beth M.</first><last>Sundheim</last></author>
       <url hash="3c9685a8">M93-1004</url>
       <bibkey>sundheim-1993-tipster</bibkey>
     </paper>
@@ -52,32 +52,32 @@
     </paper>
     <paper id="7">
       <title><fixed-case>MUC</fixed-case>-5 Evaluation Metrics</title>
-      <author><first>Nancy</first><last>Chinchor</last></author>
-      <author><first>Beth</first><last>Sundheim</last></author>
+      <author id="nancy-chinchor"><first>Nancy</first><last>Chinchor</last></author>
+      <author id="beth-m-sundheim"><first>Beth</first><last>Sundheim</last></author>
       <url hash="3c902acd">M93-1007</url>
       <bibkey>chinchor-sundheim-1993-muc</bibkey>
     </paper>
     <paper id="8">
       <title>The Statistical Significance of the <fixed-case>MUC</fixed-case>-5 Results</title>
-      <author><first>Nancy</first><last>Chinchor</last></author>
+      <author id="nancy-chinchor"><first>Nancy</first><last>Chinchor</last></author>
       <url hash="8f1fc07d">M93-1008</url>
       <bibkey>chinchor-1993-statistical</bibkey>
     </paper>
     <paper id="9">
       <title>The Generic Information Extraction System</title>
-      <author><first>Jerry R.</first><last>Hobbs</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry R.</first><last>Hobbs</last></author>
       <url hash="3453a589">M93-1009</url>
       <bibkey>hobbs-1993-generic</bibkey>
     </paper>
     <paper id="10">
       <title><fixed-case>BBN</fixed-case>: Description of the <fixed-case>PLUM</fixed-case> System as Used for <fixed-case>MUC</fixed-case>-5</title>
-      <author><first>Ralph</first><last>Weischedel</last></author>
-      <author><first>Damaris</first><last>Ayuso</last></author>
-      <author><first>Sean</first><last>Boisen</last></author>
-      <author><first>Heidi</first><last>Fox</last></author>
-      <author><first>Robert</first><last>Ingria</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
+      <author id="damaris-ayuso"><first>Damaris</first><last>Ayuso</last></author>
+      <author id="sean-boisen"><first>Sean</first><last>Boisen</last></author>
+      <author id="heidi-fox"><first>Heidi</first><last>Fox</last></author>
+      <author id="robert-ingria"><first>Robert</first><last>Ingria</last></author>
       <author><first>Tomoyoshi</first><last>Matsukawa</last></author>
-      <author><first>Constantine</first><last>Papageorgiou</last></author>
+      <author id="constantine-papageorgiou"><first>Constantine</first><last>Papageorgiou</last></author>
       <author><first>Dawn</first><last>MacLaughlin</last></author>
       <author><first>Masaichiro</first><last>Kitagawa</last></author>
       <author><first>Tsutomu</first><last>Sakai</last></author>
@@ -90,19 +90,19 @@
     </paper>
     <paper id="11">
       <title><fixed-case>GE</fixed-case>-<fixed-case>CMU</fixed-case>: Description of the <fixed-case>SHOGUN</fixed-case> System Used for <fixed-case>MUC</fixed-case>-5</title>
-      <author><first>Paul S.</first><last>Jacobs</last></author>
-      <author><first>George</first><last>Krupka</last></author>
-      <author><first>Lisa</first><last>Rau</last></author>
+      <author id="paul-s-jacobs"><first>Paul S.</first><last>Jacobs</last></author>
+      <author id="george-krupka"><first>George</first><last>Krupka</last></author>
+      <author id="lisa-rau"><first>Lisa</first><last>Rau</last></author>
       <url hash="5d531959">M93-1011</url>
       <bibkey>jacobs-etal-1993-ge</bibkey>
     </paper>
     <paper id="12">
       <title><fixed-case>L</fixed-case>anguage <fixed-case>S</fixed-case>ystems <fixed-case>I</fixed-case>nc: Description of the <fixed-case>DBG</fixed-case> System as Used for <fixed-case>MUC</fixed-case>-51</title>
-      <author><first>Christine A.</first><last>Montgomery</last></author>
+      <author id="christine-a-montgomery"><first>Christine A.</first><last>Montgomery</last></author>
       <author><first>Robert E.</first><last>Stumberger</last></author>
-      <author><first>Bonnie Glover</first><last>Stalls</last></author>
+      <author id="bonnie-glover-stalls"><first>Bonnie Glover</first><last>Stalls</last></author>
       <author><first>Naicong</first><last>Li</last></author>
-      <author><first>Robert S.</first><last>Belvin</last></author>
+      <author id="robert-s-belvin"><first>Robert S.</first><last>Belvin</last></author>
       <author><first>Susan Hirsh</first><last>Litenatsky</last></author>
       <url hash="1850a6df">M93-1012</url>
       <bibkey>montgomery-etal-1993-language</bibkey>
@@ -113,34 +113,34 @@
       <author id="john-d-burger"><first>John</first><last>Burger</last></author>
       <author><first>Dennis</first><last>Connolly</last></author>
       <author><first>Susan</first><last>Roberts</last></author>
-      <author><first>Marc</first><last>Vilain</last></author>
+      <author id="marc-vilain"><first>Marc</first><last>Vilain</last></author>
       <url hash="8dd49f25">M93-1013</url>
       <bibkey>aberdeen-etal-1993-mitre</bibkey>
     </paper>
     <paper id="14">
       <title><fixed-case>NEC</fixed-case>: Description of the <fixed-case>VENIEX</fixed-case> System as Used for <fixed-case>MUC</fixed-case>-5</title>
       <author><first>Kazunori</first><last>Muraki</last></author>
-      <author><first>Shinichi</first><last>Doi</last></author>
-      <author><first>Shinichi</first><last>Ando</last></author>
+      <author id="shinichi-doi"><first>Shinichi</first><last>Doi</last></author>
+      <author id="shinichi-ando"><first>Shinichi</first><last>Ando</last></author>
       <url hash="2ca599b6">M93-1014</url>
       <bibkey>muraki-etal-1993-nec</bibkey>
     </paper>
     <paper id="15">
       <title><fixed-case>CRL/B</fixed-case>randeis: Description of the <i>
           <fixed-case>D</fixed-case>iderot</i> System as Used for <fixed-case>MUC</fixed-case>-5</title>
-      <author><first>Jim</first><last>Cowie</last></author>
-      <author><first>Louise</first><last>Guthrie</last></author>
-      <author><last>Wang</last><first>Jin</first></author>
+      <author id="jim-cowie"><first>Jim</first><last>Cowie</last></author>
+      <author id="louise-guthrie"><first>Louise</first><last>Guthrie</last></author>
+      <author><first>Jin</first><last>Wang</last></author>
       <author><first>Rong</first><last>Wang</last></author>
-      <author><first>Takahiro</first><last>Wakao</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="takahiro-wakao"><first>Takahiro</first><last>Wakao</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <author><first>Scott</first><last>Waterman</last></author>
       <url hash="6bbb400d">M93-1015</url>
       <bibkey>cowie-etal-1993-crl</bibkey>
     </paper>
     <paper id="16">
       <title><fixed-case>N</fixed-case>ew <fixed-case>Y</fixed-case>ork <fixed-case>U</fixed-case>niversity Description of the <fixed-case>PROTEUS</fixed-case> System as Used for <fixed-case>MUC</fixed-case>-5</title>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <author><first>John</first><last>Sterling</last></author>
       <url hash="c8f18d19">M93-1016</url>
       <bibkey>grishman-sterling-1993-new</bibkey>
@@ -156,17 +156,17 @@
       <title><fixed-case>SRA</fixed-case>: Description of the <fixed-case>S</fixed-case>olomon System as Used for <fixed-case>MUC</fixed-case>-5</title>
       <author><first>Chinatsu</first><last>Aone</last></author>
       <author><first>Sharon</first><last>Flank</last></author>
-      <author><first>Doug</first><last>McKee</last></author>
+      <author id="douglas-mckee"><first>Doug</first><last>McKee</last></author>
       <author><first>Paul</first><last>Krause</last></author>
       <url hash="18290082">M93-1018</url>
       <bibkey>aone-etal-1993-sra</bibkey>
     </paper>
     <paper id="19">
       <title><fixed-case>SRI</fixed-case>: Description of the <fixed-case>JV</fixed-case>-<fixed-case>FASTUS</fixed-case> System Used for <fixed-case>MUC</fixed-case>-5</title>
-      <author><first>Douglas E.</first><last>Appelt</last></author>
-      <author><first>Jerry R.</first><last>Hobbs</last></author>
-      <author><first>John</first><last>Bear</last></author>
-      <author><first>David</first><last>Israel</last></author>
+      <author id="douglas-appelt"><first>Douglas E.</first><last>Appelt</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry R.</first><last>Hobbs</last></author>
+      <author id="john-bear"><first>John</first><last>Bear</last></author>
+      <author id="david-israel"><first>David</first><last>Israel</last></author>
       <author><first>Megumi</first><last>Kameyama</last></author>
       <author><first>Mabry</first><last>Tyson</last></author>
       <url hash="4bca0cc1">M93-1019</url>
@@ -182,7 +182,7 @@
     <paper id="21">
       <title><fixed-case>UNISYS</fixed-case>: Description of the <fixed-case>CBAS</fixed-case> System Used for <fixed-case>MUC</fixed-case>-5</title>
       <author><first>Carl</first><last>Weir</last></author>
-      <author><first>Rich</first><last>Fritzson</last></author>
+      <author id="richard-fritzson"><first>Rich</first><last>Fritzson</last></author>
       <url hash="a0925c24">M93-1021</url>
       <bibkey>weir-fritzson-1993-unisys</bibkey>
     </paper>
@@ -199,24 +199,24 @@
       <author id="stephen-soderland"><first>S.</first><last>Soderland</last></author>
       <author id="ellen-riloff"><first>E.</first><last>Riloff</last></author>
       <author id="claire-cardie"><first>C.</first><last>Cardie</last></author>
-      <author><first>J.</first><last>Peterson</last></author>
+      <author id="j-peterson"><first>J.</first><last>Peterson</last></author>
       <author id="fangfang-feng"><first>F.</first><last>Feng</last></author>
       <url hash="b23903c9">M93-1023</url>
       <bibkey>lehnert-etal-1993-umass</bibkey>
     </paper>
     <paper id="24">
       <title>Description of the <fixed-case>LINK</fixed-case> System Used for <fixed-case>MUC</fixed-case>-5</title>
-      <author><first>Steven L.</first><last>Lytinen</last></author>
+      <author id="steven-l-lytinen"><first>Steven L.</first><last>Lytinen</last></author>
       <author><first>Robert R.</first><last>Burridge</last></author>
       <author><first>Peter M.</first><last>Hastings</last></author>
-      <author><first>Christian</first><last>Huyck</last></author>
+      <author id="christian-huyck"><first>Christian</first><last>Huyck</last></author>
       <url hash="01683870">M93-1024</url>
       <bibkey>lytinen-etal-1993-description</bibkey>
     </paper>
     <paper id="25">
       <title><fixed-case>USC</fixed-case>: Description of the <fixed-case>SNAP</fixed-case> System Used for <fixed-case>MUC</fixed-case>-5</title>
-      <author><first>Dan</first><last>Moldovan</last></author>
-      <author><first>Seungho</first><last>Cha</last></author>
+      <author id="dan-moldovan"><first>Dan</first><last>Moldovan</last></author>
+      <author id="seungho-cha"><first>Seungho</first><last>Cha</last></author>
       <author><first>Minhwa</first><last>Chung</last></author>
       <author><first>Tony</first><last>Gallippi</last></author>
       <author><first>Kenneth J.</first><last>Hendrickson</last></author>
@@ -228,15 +228,15 @@
     </paper>
     <paper id="26">
       <title><fixed-case>S</fixed-case>ussex <fixed-case>U</fixed-case>niversity: Description of the <fixed-case>S</fixed-case>ussex System Used for <fixed-case>MUC</fixed-case>-5</title>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
-      <author><first>Lynne</first><last>Cahill</last></author>
-      <author><first>Roger</first><last>Evans</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="lynne-cahill"><first>Lynne</first><last>Cahill</last></author>
+      <author id="roger-evans"><first>Roger</first><last>Evans</last></author>
       <url hash="ae27f824">M93-1026</url>
       <bibkey>gaizauskas-etal-1993-sussex</bibkey>
     </paper>
     <paper id="27">
       <title>Summary of Workshop on Lexicons for Text Extraction</title>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <url hash="f29b72c5">M93-1027</url>
       <bibkey>pustejovsky-1993-summary</bibkey>
     </paper>
@@ -248,25 +248,25 @@
     </paper>
     <paper id="29">
       <title>Information Extraction for the Future</title>
-      <author><first>Paul S.</first><last>Jacobs</last></author>
+      <author id="paul-s-jacobs"><first>Paul S.</first><last>Jacobs</last></author>
       <url hash="e945a443">M93-1029</url>
       <bibkey>jacobs-1993-information</bibkey>
     </paper>
     <paper id="30">
       <title>Topic Session on <fixed-case>DISCOURSE</fixed-case></title>
-      <author><first>Damaris M.</first><last>Ayuso</last></author>
+      <author id="damaris-ayuso"><first>Damaris M.</first><last>Ayuso</last></author>
       <url hash="48d9f16c">M93-1030</url>
       <bibkey>ayuso-1993-topic</bibkey>
     </paper>
     <paper id="31">
       <title>Tools and Techniques for Rapid Porting</title>
-      <author><first>Joe</first><last>McCarthy</last></author>
+      <author id="joe-mccarthy"><first>Joe</first><last>McCarthy</last></author>
       <url hash="5d023d94">M93-1031</url>
       <bibkey>mccarthy-1993-tools</bibkey>
     </paper>
     <paper id="32">
       <title>Information Extraction and Evaluation</title>
-      <author><first>Lisa F.</first><last>Rau</last></author>
+      <author id="lisa-rau"><first>Lisa F.</first><last>Rau</last></author>
       <url hash="4c371b5f">M93-1032</url>
       <bibkey>rau-1993-information</bibkey>
     </paper>
diff --git a/data/xml/M95.xml b/data/xml/M95.xml
index 27e5b9fcb7..71b9e4a39e 100644
--- a/data/xml/M95.xml
+++ b/data/xml/M95.xml
@@ -12,37 +12,37 @@
     </frontmatter>
     <paper id="1">
       <title>Design of the <fixed-case>MUC</fixed-case>-6 Evaluation</title>
-      <author><first>Ralph</first><last>Grishman</last></author>
-      <author><first>Beth</first><last>Sundheim</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
+      <author id="beth-m-sundheim"><first>Beth</first><last>Sundheim</last></author>
       <url hash="bb47eb65">M95-1001</url>
       <bibkey>grishman-sundheim-1995-design</bibkey>
     </paper>
     <paper id="2">
       <title>Overview of Results of the <fixed-case>MUC</fixed-case>-6 Evaluation</title>
-      <author><first>Beth M.</first><last>Sundheim</last></author>
+      <author id="beth-m-sundheim"><first>Beth M.</first><last>Sundheim</last></author>
       <url hash="5a15e0d3">M95-1002</url>
       <bibkey>sundheim-1995-overview</bibkey>
     </paper>
     <paper id="3">
       <title>FOUR SCORERS AND SEVEN YEARS AGO: The Scoring Method for <fixed-case>MUC</fixed-case>-6</title>
-      <author><first>Nancy</first><last>Chinchor</last></author>
+      <author id="nancy-chinchor"><first>Nancy</first><last>Chinchor</last></author>
       <author><first>Gary</first><last>Dungca</last></author>
       <url hash="619fb1a0">M95-1003</url>
       <bibkey>chinchor-dungca-1995-four</bibkey>
     </paper>
     <paper id="4">
       <title>Statistical Significance of <fixed-case>MUC</fixed-case>-6 Results</title>
-      <author><first>Nancy</first><last>Chinchor</last></author>
+      <author id="nancy-chinchor"><first>Nancy</first><last>Chinchor</last></author>
       <url hash="b4d66d6e">M95-1004</url>
       <bibkey>chinchor-1995-statistical</bibkey>
     </paper>
     <paper id="5">
       <title>A Model-Theoretic Coreference Scoring Scheme</title>
-      <author><first>Marc</first><last>Vilain</last></author>
+      <author id="marc-vilain"><first>Marc</first><last>Vilain</last></author>
       <author id="john-d-burger"><first>John</first><last>Burger</last></author>
       <author><first>John</first><last>Aberdeen</last></author>
       <author><first>Dennis</first><last>Connolly</last></author>
-      <author><first>Lynette</first><last>Hirschman</last></author>
+      <author id="lynette-hirschman"><first>Lynette</first><last>Hirschman</last></author>
       <url hash="3e4bcba9">M95-1005</url>
       <bibkey>vilain-etal-1995-model</bibkey>
     </paper>
@@ -53,11 +53,11 @@
     </paper>
     <paper id="7">
       <title><fixed-case>U</fixed-case>niversity of <fixed-case>D</fixed-case>urham: Description of the <fixed-case>LOLITA</fixed-case> System as Used in <fixed-case>MUC</fixed-case>-6.</title>
-      <author><first>Richard</first><last>Morgan</last></author>
-      <author><first>Roberto</first><last>Garigliano</last></author>
+      <author id="richard-g-morgan"><first>Richard</first><last>Morgan</last></author>
+      <author id="roberto-garigliano"><first>Roberto</first><last>Garigliano</last></author>
       <author><first>Paul</first><last>Callaghan</last></author>
       <author><first>Sanjay</first><last>Poria</last></author>
-      <author><first>Mark</first><last>Smith</last></author>
+      <author id="mark-h-smith"><first>Mark</first><last>Smith</last></author>
       <author><first>Agnieszka</first><last>Urbanowicz</last></author>
       <author><first>Russell</first><last>Collingham</last></author>
       <author><first>Marco</first><last>Costantino</last></author>
@@ -74,9 +74,9 @@
     </paper>
     <paper id="9">
       <title><fixed-case>L</fixed-case>ockheed <fixed-case>M</fixed-case>artin: <fixed-case>LOUELLA PARSING</fixed-case>, An <fixed-case>NLT</fixed-case>oolset System for <fixed-case>MUC</fixed-case>-6</title>
-      <author><first>Lois</first><last>Childs</last></author>
-      <author><first>Deb</first><last>Brady</last></author>
-      <author><first>Louise</first><last>Guthrie</last></author>
+      <author id="lois-c-childs"><first>Lois</first><last>Childs</last></author>
+      <author id="deborah-brady"><first>Deb</first><last>Brady</last></author>
+      <author id="louise-guthrie"><first>Louise</first><last>Guthrie</last></author>
       <author><first>Jose</first><last>Franco</last></author>
       <author><first>Dan</first><last>Valdes-Dapena</last></author>
       <author><first>Bill</first><last>Reid</last></author>
@@ -94,11 +94,11 @@
     </paper>
     <paper id="11">
       <title>Description of the <fixed-case>UM</fixed-case>ass System as Used for <fixed-case>MUC</fixed-case>-6</title>
-      <author><first>David</first><last>Fisher</last></author>
-      <author><first>Stephen</first><last>Soderland</last></author>
+      <author id="david-fisher"><first>David</first><last>Fisher</last></author>
+      <author id="stephen-soderland"><first>Stephen</first><last>Soderland</last></author>
       <author><first>Joseph</first><last>McCarthy</last></author>
-      <author><first>Fangfang</first><last>Feng</last></author>
-      <author><first>Wendy</first><last>Lehnert</last></author>
+      <author id="fangfang-feng"><first>Fangfang</first><last>Feng</last></author>
+      <author id="wendy-lehnert"><first>Wendy</first><last>Lehnert</last></author>
       <url hash="36d9bd66">M95-1011</url>
       <bibkey>fisher-etal-1995-description</bibkey>
     </paper>
@@ -106,22 +106,22 @@
       <title><fixed-case>MITRE</fixed-case>: Description of the <fixed-case>A</fixed-case>lembic System Used for <fixed-case>MUC</fixed-case>-6</title>
       <author><first>John</first><last>Aberdeen</last></author>
       <author id="john-d-burger"><first>John</first><last>Burger</last></author>
-      <author><first>David</first><last>Day</last></author>
-      <author><first>Lynette</first><last>Hirschman</last></author>
-      <author><first>Patricia</first><last>Robinson</last></author>
-      <author><first>Marc</first><last>Vilain</last></author>
+      <author id="david-day"><first>David</first><last>Day</last></author>
+      <author id="lynette-hirschman"><first>Lynette</first><last>Hirschman</last></author>
+      <author id="patricia-robinson"><first>Patricia</first><last>Robinson</last></author>
+      <author id="marc-vilain"><first>Marc</first><last>Vilain</last></author>
       <url hash="bcd47a06">M95-1012</url>
       <bibkey>aberdeen-etal-1995-mitre</bibkey>
     </paper>
     <paper id="13">
       <title><fixed-case>CRL</fixed-case>/<fixed-case>NMSUD</fixed-case>escription of the <fixed-case>CRL</fixed-case>/<fixed-case>NMSU</fixed-case> Systems Used for <fixed-case>MUC</fixed-case>-6</title>
-      <author><first>Jim</first><last>Cowie</last></author>
+      <author id="jim-cowie"><first>Jim</first><last>Cowie</last></author>
       <url hash="5f16f625">M95-1013</url>
       <bibkey>cowie-1995-crl</bibkey>
     </paper>
     <paper id="14">
       <title>The <fixed-case>NYU</fixed-case> System for <fixed-case>MUC</fixed-case>-6 or Where’s the Syntax?</title>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <url hash="3c289609">M95-1014</url>
       <bibkey>grishman-1995-nyu</bibkey>
     </paper>
@@ -129,18 +129,18 @@
       <title><fixed-case>U</fixed-case>niversity of <fixed-case>P</fixed-case>ennsylvania: Description of the <fixed-case>U</fixed-case>niversity of <fixed-case>P</fixed-case>ennsylvania System Used for <fixed-case>MUC</fixed-case>-6</title>
       <author><first>Breck</first><last>Baldwin</last></author>
       <author><first>Jeff</first><last>Reynar</last></author>
-      <author><first>Mike</first><last>Collins</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
-      <author><first>Adwait</first><last>Ratnaparkhi</last></author>
+      <author id="michael-collins"><first>Mike</first><last>Collins</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
+      <author id="adwait-ratnaparkhi"><first>Adwait</first><last>Ratnaparkhi</last></author>
       <author><first>Joseph</first><last>Rosenzweig</last></author>
       <author><first>Anoop</first><last>Sarkar</last></author>
-      <author><first/><last>Srinivas</last></author>
+      <author id="srinivas-bangalore"><first/><last>Srinivas</last></author>
       <url hash="e8741603">M95-1015</url>
       <bibkey>baldwin-etal-1995-university</bibkey>
     </paper>
     <paper id="16">
       <title>Description of the <fixed-case>SAIC DX</fixed-case> System as Used for <fixed-case>MUC</fixed-case>-6</title>
-      <author><first>Lance A.</first><last>Miller</last></author>
+      <author id="lance-a-miller"><first>Lance A.</first><last>Miller</last></author>
       <url hash="c76909ed">M95-1016</url>
       <bibkey>miller-1995-description</bibkey>
     </paper>
@@ -156,18 +156,18 @@
     </paper>
     <paper id="18">
       <title><fixed-case>SRA</fixed-case>: Description of the <fixed-case>SRA</fixed-case> System as Used for <fixed-case>MUC</fixed-case>-6</title>
-      <author><first>George R.</first><last>Krupka</last></author>
+      <author id="george-krupka"><first>George R.</first><last>Krupka</last></author>
       <url hash="4faccf0c">M95-1018</url>
       <bibkey>krupka-1995-sra</bibkey>
     </paper>
     <paper id="19">
       <title><fixed-case>SRI</fixed-case> <fixed-case>I</fixed-case>nternational <fixed-case>FASTUS</fixed-case> <fixed-case>S</fixed-case>ystem<fixed-case>MUC</fixed-case>-6 Test Results and Analysis</title>
-      <author><first>Douglas E.</first><last>Appelt</last></author>
-      <author><first>Jerry R.</first><last>Hobbs</last></author>
-      <author><first>John</first><last>Bear</last></author>
-      <author><first>David</first><last>Israel</last></author>
+      <author id="douglas-appelt"><first>Douglas E.</first><last>Appelt</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry R.</first><last>Hobbs</last></author>
+      <author id="john-bear"><first>John</first><last>Bear</last></author>
+      <author id="david-israel"><first>David</first><last>Israel</last></author>
       <author><first>Megumi</first><last>Kameyama</last></author>
-      <author><first>Andy</first><last>Kehler</last></author>
+      <author id="andrew-kehler"><first>Andy</first><last>Kehler</last></author>
       <author><first>David</first><last>Martin</last></author>
       <author><first>Karen</first><last>Myers</last></author>
       <author><first>Mabry</first><last>Tyson</last></author>
@@ -182,7 +182,7 @@
     </paper>
     <paper id="21">
       <title><fixed-case>W</fixed-case>ayne <fixed-case>S</fixed-case>tate <fixed-case>U</fixed-case>niversity: Description of the <fixed-case>UNO</fixed-case> Natural Language Processing System as Used for <fixed-case>MUC</fixed-case>-6</title>
-      <author><first>Lucja</first><last>Iwanska</last></author>
+      <author id="lucja-iwanska"><first>Lucja</first><last>Iwanska</last></author>
       <author><first>Mary</first><last>Croll</last></author>
       <author><first>Taewan</first><last>Yoon</last></author>
       <author><first>Maria</first><last>Adams</last></author>
diff --git a/data/xml/M98.xml b/data/xml/M98.xml
index 7d9a10985f..b2e095f59b 100644
--- a/data/xml/M98.xml
+++ b/data/xml/M98.xml
@@ -12,7 +12,7 @@
     </frontmatter>
     <paper id="1">
       <title>Overview of <fixed-case>MUC</fixed-case>-7</title>
-      <author><first>Nancy A.</first><last>Chinchor</last></author>
+      <author id="nancy-chinchor"><first>Nancy A.</first><last>Chinchor</last></author>
       <url hash="edb86216">M98-1001</url>
       <bibkey>chinchor-1998-overview</bibkey>
     </paper>
@@ -40,7 +40,7 @@
     </paper>
     <paper id="5">
       <title><fixed-case>U</fixed-case>niversity of <fixed-case>D</fixed-case>urham: Description of the <fixed-case>LOLITA</fixed-case> system as Used in <fixed-case>MUC</fixed-case>-7</title>
-      <author><first>Roberto</first><last>Garigliano</last></author>
+      <author id="roberto-garigliano"><first>Roberto</first><last>Garigliano</last></author>
       <author><first>Agnieszka</first><last>Urbanowicz</last></author>
       <author><first>David J.</first><last>Nettleton</last></author>
       <url hash="04e04eba">M98-1005</url>
@@ -73,20 +73,20 @@
     <paper id="9">
       <title><fixed-case>BBN</fixed-case>: Description of the <fixed-case>SIFT</fixed-case> System as Used for <fixed-case>MUC</fixed-case>-7</title>
       <author><first>Scott</first><last>Miller</last></author>
-      <author><first>Michael</first><last>Crystal</last></author>
-      <author><first>Heidi</first><last>Fox</last></author>
-      <author><first>Lance</first><last>Ramshaw</last></author>
-      <author><first>Richard</first><last>Schwartz</last></author>
+      <author id="michael-crystal"><first>Michael</first><last>Crystal</last></author>
+      <author id="heidi-fox"><first>Heidi</first><last>Fox</last></author>
+      <author id="lance-ramshaw"><first>Lance</first><last>Ramshaw</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
       <author><first>Rebecca</first><last>Stone</last></author>
-      <author><first>Ralph</first><last>Weischedel</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
       <author><first/><last>The Annotation Group</last></author>
       <url hash="f2232a2b">M98-1009</url>
       <bibkey>miller-etal-1998-bbn</bibkey>
     </paper>
     <paper id="10">
       <title>Description of <fixed-case>L</fixed-case>ockheed <fixed-case>M</fixed-case>artin’s <fixed-case>NLT</fixed-case>oolset as Applied to <fixed-case>MUC</fixed-case>-7 (<fixed-case>AATM</fixed-case>7)</title>
-      <author><first>Deborah</first><last>Brady</last></author>
-      <author><first>Lois</first><last>Childs</last></author>
+      <author id="deborah-brady"><first>Deborah</first><last>Brady</last></author>
+      <author id="lois-c-childs"><first>Lois</first><last>Childs</last></author>
       <author><first>David</first><last>Cassel</last></author>
       <author><first>Bob</first><last>Magee</last></author>
       <author><first>Norris</first><last>Heintzelman</last></author>
@@ -97,7 +97,7 @@
     <paper id="11">
       <title><fixed-case>NYU</fixed-case>: Description of the Proteus/<fixed-case>PET</fixed-case> System as Used for <fixed-case>MUC</fixed-case>-7 <fixed-case>ST</fixed-case></title>
       <author><first>Roman</first><last>Yangarber</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <url hash="004cce6d">M98-1011</url>
       <bibkey>yangarber-grishman-1998-nyu</bibkey>
     </paper>
@@ -112,7 +112,7 @@
     </paper>
     <paper id="13">
       <title><fixed-case>TASC</fixed-case>: Description of the <fixed-case>TASC</fixed-case> System Used for <fixed-case>MUC</fixed-case>-7</title>
-      <author><first>Terry</first><last>Patten</last></author>
+      <author id="terry-patten"><first>Terry</first><last>Patten</last></author>
       <author><first>Beryl</first><last>Hoffman</last></author>
       <author><first>Martin</first><last>Thurn</last></author>
       <url hash="023d284d">M98-1013</url>
@@ -120,7 +120,7 @@
     </paper>
     <paper id="14">
       <title><fixed-case>FACILE</fixed-case>: Description of the <fixed-case>NE</fixed-case> System Used for <fixed-case>MUC</fixed-case>-7</title>
-      <author><first>William J</first><last>Black</last></author>
+      <author id="william-j-black"><first>William J</first><last>Black</last></author>
       <author><first>Fabio</first><last>Rinaldi</last></author>
       <author><first>David</first><last>Mowatt</last></author>
       <url hash="d8577e90">M98-1014</url>
@@ -128,7 +128,7 @@
     </paper>
     <paper id="15">
       <title><fixed-case>I</fixed-case>so<fixed-case>Q</fixed-case>uest Inc.: Description of the <fixed-case>N</fixed-case>et<fixed-case>O</fixed-case>wl™ Extractor System as Used for <fixed-case>MUC</fixed-case>-7</title>
-      <author><first>George R.</first><last>Krupka</last></author>
+      <author id="george-krupka"><first>George R.</first><last>Krupka</last></author>
       <author><first>Kevin</first><last>Hausman</last></author>
       <url hash="69b07ab0">M98-1015</url>
       <bibkey>krupka-hausman-1998-isoquest</bibkey>
@@ -155,7 +155,7 @@
       <author><first>Andrew</first><last>Borthwick</last></author>
       <author><first>John</first><last>Sterling</last></author>
       <author><first>Eugene</first><last>Agichtein</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <url hash="021f98f3">M98-1018</url>
       <bibkey>borthwick-etal-1998-nyu</bibkey>
     </paper>
@@ -202,7 +202,7 @@
     </paper>
     <paper id="24">
       <title>Appendix <fixed-case>B</fixed-case>: <fixed-case>MUC</fixed-case>-7 Test Scores Introduction</title>
-      <author><first>Nancy</first><last>Chinchor</last></author>
+      <author id="nancy-chinchor"><first>Nancy</first><last>Chinchor</last></author>
       <url hash="29d943cb">M98-1024</url>
       <bibkey>chinchor-1998-appendix</bibkey>
     </paper>
@@ -218,7 +218,7 @@
     </paper>
     <paper id="27">
       <title>Appendix <fixed-case>D</fixed-case>: <fixed-case>MUC</fixed-case>-7 Information Extraction Task Definition (version 5.1)</title>
-      <author><first>Nancy</first><last>Chinchor</last></author>
+      <author id="nancy-chinchor"><first>Nancy</first><last>Chinchor</last></author>
       <author><first>Elaine</first><last>Marsh</last></author>
       <url hash="c179d802">M98-1027</url>
       <bibkey>chinchor-marsh-1998-appendix</bibkey>
@@ -232,8 +232,8 @@
     </paper>
     <paper id="29">
       <title>Appendix <fixed-case>F</fixed-case>: <fixed-case>MUC</fixed-case>-7 Coreference Task Definition (version 3.0)</title>
-      <author><first>Lynette</first><last>Hirschman</last></author>
-      <author><first>Nancy</first><last>Chinchor</last></author>
+      <author id="lynette-hirschman"><first>Lynette</first><last>Hirschman</last></author>
+      <author id="nancy-chinchor"><first>Nancy</first><last>Chinchor</last></author>
       <url hash="876178ab">M98-1029</url>
       <bibkey>hirschman-chinchor-1998-appendix</bibkey>
     </paper>
diff --git a/data/xml/N01.xml b/data/xml/N01.xml
index 6ae9cb67b6..6d20d05334 100644
--- a/data/xml/N01.xml
+++ b/data/xml/N01.xml
@@ -12,23 +12,23 @@
     <paper id="1">
       <title>Instance-Based Natural Language Generation</title>
       <author><first>Sebastian</first><last>Varges</last></author>
-      <author><first>Chris</first><last>Mellish</last></author>
+      <author id="chris-mellish"><first>Chris</first><last>Mellish</last></author>
       <url hash="b3f70b1f">N01-1001</url>
       <bibkey>varges-mellish-2001-instance</bibkey>
     </paper>
     <paper id="2">
       <title>Corpus-based <fixed-case>NP</fixed-case> Modifier Generation</title>
       <author><first>Hua</first><last>Cheng</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
-      <author><first>Renate</first><last>Henschel</last></author>
-      <author><first>Chris</first><last>Mellish</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
+      <author id="renate-henschel"><first>Renate</first><last>Henschel</last></author>
+      <author id="chris-mellish"><first>Chris</first><last>Mellish</last></author>
       <url hash="dc74ee67">N01-1002</url>
       <bibkey>cheng-etal-2001-corpus</bibkey>
     </paper>
     <paper id="3">
       <title><fixed-case>SP</fixed-case>o<fixed-case>T</fixed-case>: A Trainable Sentence Planner</title>
-      <author><first>Marilyn A.</first><last>Walker</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="marilyn-walker"><first>Marilyn A.</first><last>Walker</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <author><first>Monica</first><last>Rogati</last></author>
       <url hash="5e4a2a29">N01-1003</url>
       <bibkey>walker-etal-2001-spot</bibkey>
@@ -44,14 +44,14 @@
       <author><first>Abraham</first><last>Ittycheriah</last></author>
       <author><first>Martin</first><last>Franz</last></author>
       <author><first>Wei-Jing</first><last>Zhu</last></author>
-      <author><first>Adwait</first><last>Ratnaparkhi</last></author>
+      <author id="adwait-ratnaparkhi"><first>Adwait</first><last>Ratnaparkhi</last></author>
       <url hash="4c1e6b08">N01-1005</url>
       <bibkey>ittycheriah-etal-2001-question</bibkey>
     </paper>
     <paper id="6">
       <title>Transformation Based Learning in the Fast Lane</title>
       <author><first>Grace</first><last>Ngai</last></author>
-      <author><first>Radu</first><last>Florian</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
       <url hash="fc75417f">N01-1006</url>
       <bibkey>ngai-florian-2001-transformation</bibkey>
     </paper>
@@ -63,9 +63,9 @@
     </paper>
     <paper id="8">
       <title>Text and Knowledge Mining for Coreference Resolution</title>
-      <author><first>Sanda M.</first><last>Harabagiu</last></author>
-      <author><first>Razvan C.</first><last>Bunescu</last></author>
-      <author><first>Steven J.</first><last>Maiorano</last></author>
+      <author id="sanda-harabagiu"><first>Sanda M.</first><last>Harabagiu</last></author>
+      <author id="razvan-bunescu"><first>Razvan C.</first><last>Bunescu</last></author>
+      <author id="steven-j-maiorano"><first>Steven J.</first><last>Maiorano</last></author>
       <url hash="1e318a5d">N01-1008</url>
       <bibkey>harabagiu-etal-2001-text</bibkey>
     </paper>
@@ -96,7 +96,7 @@
     <paper id="13">
       <title>Class-Based Probability Estimation Using a Semantic Hierarchy</title>
       <author><first>Stephen</first><last>Clark</last></author>
-      <author><first>David</first><last>Weir</last></author>
+      <author id="david-weir"><first>David</first><last>Weir</last></author>
       <url hash="367342d4">N01-1013</url>
       <bibkey>clark-weir-2001-class</bibkey>
     </paper>
@@ -121,15 +121,15 @@
     </paper>
     <paper id="17">
       <title>Generating Training Data for Medical Dictations</title>
-      <author><first>Sergey</first><last>Pakhomov</last></author>
+      <author id="sergey-v-pakhomov"><first>Sergey</first><last>Pakhomov</last></author>
       <author><first>Michael</first><last>Schonwetter</last></author>
-      <author><first>Joan</first><last>Bachenko</last></author>
+      <author id="joan-bachenko"><first>Joan</first><last>Bachenko</last></author>
       <url hash="7f89f213">N01-1017</url>
       <bibkey>pakhomov-etal-2001-generating</bibkey>
     </paper>
     <paper id="18">
       <title>A Finite-State Approach to Machine Translation</title>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
       <author><first>Giuseppe</first><last>Riccardi</last></author>
       <url hash="b666f457">N01-1018</url>
       <bibkey>bangalore-riccardi-2001-finite</bibkey>
@@ -142,20 +142,20 @@
     </paper>
     <paper id="20">
       <title>Multipath Translation Lexicon Induction via Bridge Languages</title>
-      <author><first>Gideon S.</first><last>Mann</last></author>
+      <author id="gideon-mann"><first>Gideon S.</first><last>Mann</last></author>
       <author><first>David</first><last>Yarowsky</last></author>
       <url hash="5372b34b">N01-1020</url>
       <bibkey>mann-yarowsky-2001-multipath</bibkey>
     </paper>
     <paper id="21">
       <title>A Probabilistic <fixed-case>E</fixed-case>arley Parser as a Psycholinguistic Model</title>
-      <author><first>John</first><last>Hale</last></author>
+      <author id="john-hale"><first>John</first><last>Hale</last></author>
       <url hash="3ff03738">N01-1021</url>
       <bibkey>hale-2001-probabilistic</bibkey>
     </paper>
     <paper id="22">
       <title>Refining Tabular Parsers for <fixed-case>TAG</fixed-case>s</title>
-      <author><first>Éric</first><last>Villemonte de la Clergerie</last></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Éric</first><last>Villemonte de la Clergerie</last></author>
       <bibkey>villemonte-de-la-clergerie-2001-refining</bibkey>
     </paper>
     <paper id="23">
@@ -167,14 +167,14 @@
     <paper id="24">
       <title>Knowledge-Free Induction of Inflectional Morphologies</title>
       <author><first>Patrick</first><last>Schone</last></author>
-      <author><first>Daniel</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Daniel</first><last>Jurafsky</last></author>
       <url hash="83d5b550">N01-1024</url>
       <bibkey>schone-jurafsky-2001-knowledge</bibkey>
     </paper>
     <paper id="25">
       <title>Chunking with Support Vector Machines</title>
-      <author><first>Taku</first><last>Kudo</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="taku-kudo"><first>Taku</first><last>Kudo</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <url hash="d622b418">N01-1025</url>
       <bibkey>kudo-matsumoto-2001-chunking</bibkey>
     </paper>
@@ -187,8 +187,8 @@
     </paper>
     <paper id="27">
       <title>Identifying User Corrections Automatically in Spoken Dialogue Systems</title>
-      <author><first>Julia</first><last>Hirschberg</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <author><first>Marc</first><last>Swerts</last></author>
       <url hash="29a2aa87">N01-1027</url>
       <bibkey>hirschberg-etal-2001-identifying</bibkey>
@@ -209,17 +209,17 @@
     </paper>
     <paper id="30">
       <title>Do <fixed-case>CFG</fixed-case>-Based Language Models Need Agreement Constraints?</title>
-      <author><first>Manny</first><last>Rayner</last></author>
-      <author><first>Genevieve</first><last>Gorrell</last></author>
-      <author><first>Beth Ann</first><last>Hockey</last></author>
-      <author><first>John</first><last>Dowding</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
+      <author id="genevieve-gorrell"><first>Genevieve</first><last>Gorrell</last></author>
+      <author id="beth-ann-hockey"><first>Beth Ann</first><last>Hockey</last></author>
+      <author id="john-dowding"><first>John</first><last>Dowding</last></author>
       <author><first>Johan</first><last>Boye</last></author>
       <url hash="11f06a9f">N01-1030</url>
       <bibkey>rayner-etal-2001-cfg</bibkey>
     </paper>
     <paper id="31">
       <title>You’re Not From ’Round Here, Are You? Naive <fixed-case>B</fixed-case>ayes Detection of Non-Native Utterances</title>
-      <author><first>Laura Mayfield</first><last>Tomokiyo</last></author>
+      <author id="laura-mayfield-tomokiyo"><first>Laura Mayfield</first><last>Tomokiyo</last></author>
       <author><first>Rosie</first><last>Jones</last></author>
       <url hash="decc1b08">N01-1031</url>
       <bibkey>tomokiyo-jones-2001-youre</bibkey>
diff --git a/data/xml/N03.xml b/data/xml/N03.xml
index 5358ef989c..c69c91ab03 100644
--- a/data/xml/N03.xml
+++ b/data/xml/N03.xml
@@ -12,7 +12,7 @@
     </frontmatter>
     <paper id="1">
       <title>Effective Utterance Classification with Unsupervised Phonotactic Models</title>
-      <author><first>Hiyan</first><last>Alshawi</last></author>
+      <author id="hiyan-alshawi"><first>Hiyan</first><last>Alshawi</last></author>
       <pages>1–7</pages>
       <url hash="41f904a7">N03-1001</url>
       <bibkey>alshawi-2003-effective</bibkey>
@@ -20,7 +20,7 @@
     <paper id="2">
       <title><fixed-case>J</fixed-case>apanese Named Entity Extraction with Redundant Morphological Analysis</title>
       <author><first>Masayuki</first><last>Asahara</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>8–15</pages>
       <url hash="8080cf75">N03-1002</url>
       <bibkey>asahara-matsumoto-2003-japanese</bibkey>
@@ -35,7 +35,7 @@
     </paper>
     <paper id="4">
       <title>In Question Answering, Two Heads Are Better Than One</title>
-      <author><first>Jennifer</first><last>Chu-Carroll</last></author>
+      <author id="jennifer-chu-carroll"><first>Jennifer</first><last>Chu-Carroll</last></author>
       <author><first>Krzysztof</first><last>Czuba</last></author>
       <author><first>John</first><last>Prager</last></author>
       <author><first>Abraham</first><last>Ittycheriah</last></author>
@@ -45,8 +45,8 @@
     </paper>
     <paper id="5">
       <title>Automatic Acquisition of Names Using Speak and Spell Mode in Spoken Dialogue Systems</title>
-      <author><first>Grace</first><last>Chung</last></author>
-      <author><first>Stephanie</first><last>Seneff</last></author>
+      <author id="grace-chung"><first>Grace</first><last>Chung</last></author>
+      <author id="stephanie-seneff"><first>Stephanie</first><last>Seneff</last></author>
       <author><first>Chao</first><last>Wang</last></author>
       <pages>32–39</pages>
       <url hash="00bf0a37">N03-1005</url>
@@ -54,7 +54,7 @@
     </paper>
     <paper id="6">
       <title>Minimally Supervised Induction of Grammatical Gender</title>
-      <author><first>Silviu</first><last>Cucerzan</last></author>
+      <author id="silviu-cucerzan"><first>Silviu</first><last>Cucerzan</last></author>
       <author><first>David</first><last>Yarowsky</last></author>
       <pages>40–47</pages>
       <url hash="e3d2f0dd">N03-1006</url>
@@ -71,14 +71,14 @@
     <paper id="8">
       <title>Latent Semantic Information in Maximum Entropy Language Models for Conversational Speech Recognition</title>
       <author><first>Yonggang</first><last>Deng</last></author>
-      <author><first>Sanjeev</first><last>Khudanpur</last></author>
+      <author id="sanjeev-khudanpur"><first>Sanjeev</first><last>Khudanpur</last></author>
       <pages>56–63</pages>
       <url hash="00b428eb">N03-1008</url>
       <bibkey>deng-khudanpur-2003-latent</bibkey>
     </paper>
     <paper id="9">
       <title>Simpler and More General Minimization for Weighted Finite-State Automata</title>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>64–71</pages>
       <url hash="d49da194">N03-1009</url>
       <bibkey>eisner-2003-simpler</bibkey>
@@ -92,9 +92,9 @@
     </paper>
     <paper id="11">
       <title>Learning Semantic Constraints for the Automatic Discovery of Part-Whole Relations</title>
-      <author><first>Roxana</first><last>Girju</last></author>
-      <author><first>Adriana</first><last>Badulescu</last></author>
-      <author><first>Dan</first><last>Moldovan</last></author>
+      <author id="roxana-girju"><first>Roxana</first><last>Girju</last></author>
+      <author id="adriana-badulescu"><first>Adriana</first><last>Badulescu</last></author>
+      <author id="dan-moldovan"><first>Dan</first><last>Moldovan</last></author>
       <pages>80–87</pages>
       <url hash="17b8b9c0">N03-1011</url>
       <bibkey>girju-etal-2003-learning</bibkey>
@@ -112,14 +112,14 @@
     <paper id="13">
       <title>A Categorial Variation Database for <fixed-case>E</fixed-case>nglish</title>
       <author><first>Nizar</first><last>Habash</last></author>
-      <author><first>Bonnie</first><last>Dorr</last></author>
+      <author id="bonnie-dorr"><first>Bonnie</first><last>Dorr</last></author>
       <pages>96–102</pages>
       <url hash="41f3dde8">N03-1013</url>
       <bibkey>habash-dorr-2003-categorial</bibkey>
     </paper>
     <paper id="14">
       <title>Inducing History Representations for Broad Coverage Statistical Parsing</title>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <pages>103–110</pages>
       <url hash="425d0d9e">N03-1014</url>
       <bibkey>henderson-2003-inducing</bibkey>
@@ -134,7 +134,7 @@
     <paper id="16">
       <title><fixed-case>A</fixed-case>* Parsing: Fast Exact <fixed-case>V</fixed-case>iterbi Parse Selection</title>
       <author><first>Dan</first><last>Klein</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>119–126</pages>
       <url hash="97cb3601">N03-1016</url>
       <bibkey>klein-manning-2003-parsing</bibkey>
@@ -142,7 +142,7 @@
     <paper id="17">
       <title>Statistical Phrase-Based Translation</title>
       <author><first>Philipp</first><last>Koehn</last></author>
-      <author><first>Franz J.</first><last>Och</last></author>
+      <author id="franz-josef-och"><first>Franz J.</first><last>Och</last></author>
       <author><first>Daniel</first><last>Marcu</last></author>
       <pages>127–133</pages>
       <url hash="5abd8377">N03-1017</url>
@@ -167,8 +167,8 @@
     </paper>
     <paper id="20">
       <title>Automatic Evaluation of Summaries Using N-gram Co-occurrence Statistics</title>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>150–157</pages>
       <url hash="de66dd22">N03-1020</url>
       <bibkey>lin-hovy-2003-automatic</bibkey>
@@ -182,10 +182,10 @@
     </paper>
     <paper id="22">
       <title><fixed-case>COGEX</fixed-case>: A Logic Prover for Question Answering</title>
-      <author><first>Dan</first><last>Moldovan</last></author>
-      <author><first>Christine</first><last>Clark</last></author>
-      <author><first>Sanda</first><last>Harabagiu</last></author>
-      <author><first>Steve</first><last>Maiorano</last></author>
+      <author id="dan-moldovan"><first>Dan</first><last>Moldovan</last></author>
+      <author id="chris-clark"><first>Christine</first><last>Clark</last></author>
+      <author id="sanda-harabagiu"><first>Sanda</first><last>Harabagiu</last></author>
+      <author id="steven-j-maiorano"><first>Steve</first><last>Maiorano</last></author>
       <pages>166–172</pages>
       <url hash="41f5500a">N03-1022</url>
       <bibkey>moldovan-etal-2003-cogex</bibkey>
@@ -193,7 +193,7 @@
     <paper id="23">
       <title>Weakly Supervised Natural Language Learning Without Redundant Views</title>
       <author><first>Vincent</first><last>Ng</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>173–180</pages>
       <url hash="7fd0c9c3">N03-1023</url>
       <bibkey>ng-cardie-2003-weakly</bibkey>
@@ -219,7 +219,7 @@
     <paper id="26">
       <title>Statistical Sentence Condensation using Ambiguity Packing and Stochastic Disambiguation Methods for <fixed-case>L</fixed-case>exical-<fixed-case>F</fixed-case>unctional <fixed-case>G</fixed-case>rammar</title>
       <author><first>Stefan</first><last>Riezler</last></author>
-      <author><first>Tracy H.</first><last>King</last></author>
+      <author id="tracy-holloway-king"><first>Tracy H.</first><last>King</last></author>
       <author><first>Richard</first><last>Crouch</last></author>
       <author><first>Annie</first><last>Zaenen</last></author>
       <pages>197–204</pages>
@@ -244,7 +244,7 @@
     </paper>
     <paper id="29">
       <title>Comma Restoration Using Constituency Information</title>
-      <author><first>Stuart M.</first><last>Shieber</last></author>
+      <author id="stuart-m-shieber"><first>Stuart M.</first><last>Shieber</last></author>
       <author><first>Xiaopeng</first><last>Tao</last></author>
       <pages>221–227</pages>
       <url hash="584890ba">N03-1029</url>
@@ -260,7 +260,7 @@
     </paper>
     <paper id="31">
       <title>Example Selection for Bootstrapping Statistical Parsers</title>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <author><first>Rebecca</first><last>Hwa</last></author>
       <author><first>Stephen</first><last>Clark</last></author>
       <author><first>Miles</first><last>Osborne</last></author>
@@ -275,8 +275,8 @@
     </paper>
     <paper id="32">
       <title>Frequency Estimates for Statistical Word Similarity Measures</title>
-      <author><first>Egidio L.</first><last>Terra</last></author>
-      <author><first>Charles L. A.</first><last>Clarke</last></author>
+      <author id="egidio-l-terra"><first>Egidio L.</first><last>Terra</last></author>
+      <author id="charles-l-a-clarke"><first>Charles L. A.</first><last>Clarke</last></author>
       <pages>244–251</pages>
       <url hash="6056306f">N03-1032</url>
       <bibkey>terra-clarke-2003-frequency</bibkey>
@@ -285,7 +285,7 @@
       <title>Feature-Rich Part-of-Speech Tagging with a Cyclic Dependency Network</title>
       <author><first>Kristina</first><last>Toutanova</last></author>
       <author><first>Dan</first><last>Klein</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <author><first>Yoram</first><last>Singer</last></author>
       <pages>252–259</pages>
       <url hash="929485de">N03-1033</url>
@@ -293,7 +293,7 @@
     </paper>
     <paper id="34">
       <title>Evaluating the Evaluation: A Case Study Using the <fixed-case>TREC</fixed-case> 2002 Question Answering Track</title>
-      <author><first>Ellen M.</first><last>Voorhees</last></author>
+      <author id="ellen-m-voorhees"><first>Ellen M.</first><last>Voorhees</last></author>
       <pages>260–267</pages>
       <url hash="e8f713c6">N03-1034</url>
       <bibkey>voorhees-2003-evaluating</bibkey>
@@ -316,7 +316,7 @@
     <paper id="37">
       <title>A Web-Trained Extraction Summarization System</title>
       <author><first>Liang</first><last>Zhou</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>284–290</pages>
       <url hash="c7c848df">N03-1037</url>
       <bibkey>zhou-hovy-2003-web</bibkey>
@@ -335,7 +335,7 @@
     <paper id="1">
       <title>Automating <fixed-case>XML</fixed-case> markup of text documents</title>
       <author><first>Shazia</first><last>Akhtar</last></author>
-      <author><first>Ronan G.</first><last>Reilly</last></author>
+      <author id="ronan-g-reilly"><first>Ronan G.</first><last>Reilly</last></author>
       <author><first>John</first><last>Dunnion</last></author>
       <pages>1–3</pages>
       <url hash="14f60246">N03-2001</url>
@@ -343,7 +343,7 @@
     </paper>
     <paper id="2">
       <title>Factored Language Models and Generalized Parallel Backoff</title>
-      <author><first>Jeff A.</first><last>Bilmes</last></author>
+      <author id="jeff-bilmes"><first>Jeff A.</first><last>Bilmes</last></author>
       <author><first>Katrin</first><last>Kirchhoff</last></author>
       <pages>4–6</pages>
       <url hash="384f264c">N03-2002</url>
@@ -352,8 +352,8 @@
     <paper id="3">
       <title>Getting More Mileage from Web Text Sources for Conversational Speech Language Modeling using Class-Dependent Mixtures</title>
       <author><first>Ivan</first><last>Bulyko</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
-      <author><first>Andreas</first><last>Stolcke</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
+      <author id="andreas-stolcke"><first>Andreas</first><last>Stolcke</last></author>
       <pages>7–9</pages>
       <url hash="92f84250">N03-2003</url>
       <bibkey>bulyko-etal-2003-getting</bibkey>
@@ -361,14 +361,14 @@
     <paper id="4">
       <title>Exploiting Diversity for Answering Questions</title>
       <author id="john-d-burger"><first>John</first><last>Burger</last></author>
-      <author><first>John</first><last>Henderson</last></author>
+      <author id="john-henderson"><first>John</first><last>Henderson</last></author>
       <pages>10–12</pages>
       <url hash="8aac10d5">N03-2004</url>
       <bibkey>burger-henderson-2003-exploiting</bibkey>
     </paper>
     <paper id="5">
       <title>Story Link Detection and New Event Detection are Asymmetric</title>
-      <author><first>Francine</first><last>Chen</last></author>
+      <author id="francine-chen"><first>Francine</first><last>Chen</last></author>
       <author><first>Ayman</first><last>Farahat</last></author>
       <author><first>Thorsten</first><last>Brants</last></author>
       <pages>13–15</pages>
@@ -378,8 +378,8 @@
     <paper id="6">
       <title>Adaptation Using Out-of-Domain Corpus within <fixed-case>EBMT</fixed-case></title>
       <author><first>Takao</first><last>Doi</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
-      <author><first>Hirofumi</first><last>Yamamoto</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="hirofumi-yamamoto"><first>Hirofumi</first><last>Yamamoto</last></author>
       <pages>16–18</pages>
       <url hash="68bfb36a">N03-2006</url>
       <bibkey>doi-etal-2003-adaptation</bibkey>
@@ -394,23 +394,23 @@
     <paper id="8">
       <title>A Maximum Entropy Approach to <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et Tagging</title>
       <author><first>Michael</first><last>Fleischman</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>22–24</pages>
       <url hash="15f4d080">N03-2008</url>
       <bibkey>fleischman-hovy-2003-maximum</bibkey>
     </paper>
     <paper id="9">
       <title>Target Word Detection and Semantic Role Chunking using Support Vector Machines</title>
-      <author><first>Kadri</first><last>Hacioglu</last></author>
-      <author><first>Wayne</first><last>Ward</last></author>
+      <author id="kadri-hacioglu"><first>Kadri</first><last>Hacioglu</last></author>
+      <author id="wayne-ward"><first>Wayne</first><last>Ward</last></author>
       <pages>25–27</pages>
       <url hash="c4c0052f">N03-2009</url>
       <bibkey>hacioglu-ward-2003-target</bibkey>
     </paper>
     <paper id="10">
       <title>Question Classification with Support Vector Machines and Error Correcting Codes</title>
-      <author><first>Kadri</first><last>Hacioglu</last></author>
-      <author><first>Wayne</first><last>Ward</last></author>
+      <author id="kadri-hacioglu"><first>Kadri</first><last>Hacioglu</last></author>
+      <author id="wayne-ward"><first>Wayne</first><last>Ward</last></author>
       <pages>28–30</pages>
       <url hash="3202e5d2">N03-2010</url>
       <bibkey>hacioglu-ward-2003-question</bibkey>
@@ -426,9 +426,9 @@
     </paper>
     <paper id="12">
       <title>Detection Of Agreement vs. Disagreement In Meetings: Training With Unlabeled Data</title>
-      <author><first>Dustin</first><last>Hillard</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
-      <author><first>Elizabeth</first><last>Shriberg</last></author>
+      <author id="dustin-hillard"><first>Dustin</first><last>Hillard</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
+      <author id="elizabeth-shriberg"><first>Elizabeth</first><last>Shriberg</last></author>
       <pages>34–36</pages>
       <url hash="98112e83">N03-2012</url>
       <bibkey>hillard-etal-2003-detection</bibkey>
@@ -437,7 +437,7 @@
       <title>Automatic Expansion of Equivalent Sentence Set Based on Syntactic Substitution</title>
       <author><first>Kenji</first><last>Imamura</last></author>
       <author><first>Yasuhiro</first><last>Akiba</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>37–39</pages>
       <url hash="fcda7fb6">N03-2013</url>
       <bibkey>imamura-etal-2003-automatic-expansion</bibkey>
@@ -445,11 +445,11 @@
     <paper id="14">
       <title>Identifying and Tracking Entity Mentions in a Maximum Entropy Framework</title>
       <author><first>Abraham</first><last>Ittycheriah</last></author>
-      <author><first>Lucian</first><last>Lita</last></author>
-      <author><first>Nanda</first><last>Kambhatla</last></author>
-      <author><first>Nicolas</first><last>Nicolov</last></author>
-      <author><first>Salim</first><last>Roukos</last></author>
-      <author><first>Margo</first><last>Stys</last></author>
+      <author id="lucian-vlad-lita"><first>Lucian</first><last>Lita</last></author>
+      <author id="nanda-kambhatla"><first>Nanda</first><last>Kambhatla</last></author>
+      <author id="nicolas-nicolov"><first>Nicolas</first><last>Nicolov</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
+      <author id="margo-stys-budzikowska"><first>Margo</first><last>Stys</last></author>
       <pages>40–42</pages>
       <url hash="f1dd5edd">N03-2014</url>
       <bibkey>ittycheriah-etal-2003-identifying</bibkey>
@@ -481,8 +481,8 @@
     </paper>
     <paper id="18">
       <title>Towards Emotion Prediction in Spoken Tutoring Dialogues</title>
-      <author><first>Diane</first><last>Litman</last></author>
-      <author><first>Kate</first><last>Forbes</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
+      <author id="kate-forbes-riley"><first>Kate</first><last>Forbes</last></author>
       <author><first>Scott</first><last>Silliman</last></author>
       <pages>52–54</pages>
       <url hash="ecbe7e48">N03-2018</url>
@@ -503,7 +503,7 @@
       <author><first>Takashi</first><last>Ninomiya</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
       <author><first>Tomoko</first><last>Ohta</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>58–60</pages>
       <url hash="60a9d11c">N03-2020</url>
       <bibkey>masuda-etal-2003-robust</bibkey>
@@ -512,7 +512,7 @@
       <title>Precision and Recall of Machine Translation</title>
       <author><first>I. Dan</first><last>Melamed</last></author>
       <author><first>Ryan</first><last>Green</last></author>
-      <author><first>Joseph P.</first><last>Turian</last></author>
+      <author id="joseph-turian"><first>Joseph P.</first><last>Turian</last></author>
       <pages>61–63</pages>
       <url hash="87c08b3d">N03-2021</url>
       <bibkey>melamed-etal-2003-precision</bibkey>
@@ -520,15 +520,15 @@
     <paper id="22">
       <title>Semantic Extraction with Wide-Coverage Lexical Resources</title>
       <author><first>Behrang</first><last>Mohit</last></author>
-      <author><first>Srini</first><last>Narayanan</last></author>
+      <author id="srini-narayanan"><first>Srini</first><last>Narayanan</last></author>
       <pages>64–66</pages>
       <url hash="d69bad91">N03-2022</url>
       <bibkey>mohit-narayanan-2003-semantic</bibkey>
     </paper>
     <paper id="23">
       <title>Category-based Pseudowords</title>
-      <author><first>Preslav I.</first><last>Nakov</last></author>
-      <author><first>Marti A.</first><last>Hearst</last></author>
+      <author id="preslav-nakov"><first>Preslav I.</first><last>Nakov</last></author>
+      <author id="marti-a-hearst"><first>Marti A.</first><last>Hearst</last></author>
       <pages>67–69</pages>
       <url hash="28971c06">N03-2023</url>
       <bibkey>nakov-hearst-2003-category</bibkey>
@@ -536,7 +536,7 @@
     <paper id="24">
       <title>References to Named Entities: a Corpus Study</title>
       <author><first>Ani</first><last>Nenkova</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>70–72</pages>
       <url hash="06566a23">N03-2024</url>
       <bibkey>nenkova-mckeown-2003-references</bibkey>
@@ -546,21 +546,21 @@
       <author><first>Cheng</first><last>Niu</last></author>
       <author><first>Wei</first><last>Li</last></author>
       <author><first>Jihong</first><last>Ding</last></author>
-      <author><first>Rohini K.</first><last>Srihari</last></author>
+      <author id="rohini-k-srihari"><first>Rohini K.</first><last>Srihari</last></author>
       <pages>73–75</pages>
       <url hash="b8b04f60">N03-2025</url>
       <bibkey>niu-etal-2003-bootstrapping</bibkey>
     </paper>
     <paper id="26">
       <title>Desparately Seeking <fixed-case>C</fixed-case>ebuano</title>
-      <author><first>Douglas W.</first><last>Oard</last></author>
+      <author id="douglas-w-oard"><first>Douglas W.</first><last>Oard</last></author>
       <author><first>David</first><last>Doermann</last></author>
-      <author><first>Bonnie</first><last>Dorr</last></author>
+      <author id="bonnie-dorr"><first>Bonnie</first><last>Dorr</last></author>
       <author><first>Daqing</first><last>He</last></author>
       <author><first>Philip</first><last>Resnik</last></author>
-      <author><first>Amy</first><last>Weinberg</last></author>
+      <author id="amy-weinberg"><first>Amy</first><last>Weinberg</last></author>
       <author id="bill-byrne"><first>William</first><last>Byrne</last></author>
-      <author><first>Sanjeev</first><last>Khudanpur</last></author>
+      <author id="sanjeev-khudanpur"><first>Sanjeev</first><last>Khudanpur</last></author>
       <author><first>David</first><last>Yarowsky</last></author>
       <author><first>Anton</first><last>Leuski</last></author>
       <author><first>Philipp</first><last>Koehn</last></author>
@@ -589,14 +589,14 @@
       <title>Automatic Derivation of Surface Text Patterns for a Maximum Entropy Based Question Answering System</title>
       <author><first>Deepak</first><last>Ravichandran</last></author>
       <author><first>Abraham</first><last>Ittycheriah</last></author>
-      <author><first>Salim</first><last>Roukos</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
       <pages>85–87</pages>
       <url hash="59720238">N03-2029</url>
       <bibkey>ravichandran-etal-2003-automatic</bibkey>
     </paper>
     <paper id="30">
       <title>A Hybrid Approach to Content Analysis for Automatic Essay Grading</title>
-      <author><first>Carolyn P.</first><last>Rose</last></author>
+      <author id="carolyn-rose"><first>Carolyn P.</first><last>Rose</last></author>
       <author><first>Antonio</first><last>Roque</last></author>
       <author><first>Dumisizwe</first><last>Bhembe</last></author>
       <author><first>Kurt</first><last>VanLehn</last></author>
@@ -608,7 +608,7 @@
       <title>Auditory-based Acoustic Distinctive Features and Spectral Cues for Robust Automatic Speech Recognition in Low-<fixed-case>SNR</fixed-case> Car Environments</title>
       <author><first>Sid-Ahmed</first><last>Selouani</last></author>
       <author><first>Hesham</first><last>Tolba</last></author>
-      <author><first>Douglas</first><last>O’Shaughnessy</last></author>
+      <author id="douglas-oshaughnessy"><first>Douglas</first><last>O’Shaughnessy</last></author>
       <pages>91–93</pages>
       <url hash="9b01ecdc">N03-2031</url>
       <bibkey>selouani-etal-2003-auditory</bibkey>
@@ -617,7 +617,7 @@
       <title>Latent Semantic Analysis for Dialogue Act Classification</title>
       <author><first>Riccardo</first><last>Serafin</last></author>
       <author><first>Barbara</first><last>Di Eugenio</last></author>
-      <author><first>Michael</first><last>Glass</last></author>
+      <author id="michael-glass"><first>Michael</first><last>Glass</last></author>
       <pages>94–96</pages>
       <url hash="889d103a">N03-2032</url>
       <bibkey>serafin-etal-2003-latent</bibkey>
@@ -626,8 +626,8 @@
       <title>Automatically Predicting Information Quality in News Documents</title>
       <author><first>Rong</first><last>Tang</last></author>
       <author><first>Kwong Bor</first><last>Ng</last></author>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
-      <author><first>Paul B.</first><last>Kantor</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="paul-kantor"><first>Paul B.</first><last>Kantor</last></author>
       <pages>97–99</pages>
       <url hash="22732d65">N03-2033</url>
       <bibkey>tang-etal-2003-automatically</bibkey>
@@ -651,7 +651,7 @@
     </paper>
     <paper id="36">
       <title>A Phrase-based Unigram Model for Statistical Machine Translation</title>
-      <author><first>Christoph</first><last>Tillmann</last></author>
+      <author id="christoph-tillmann"><first>Christoph</first><last>Tillmann</last></author>
       <author><first>Fei</first><last>Xia</last></author>
       <pages>106–108</pages>
       <url hash="1d92d5fc">N03-2036</url>
@@ -659,7 +659,7 @@
     </paper>
     <paper id="37">
       <title>Evaluating Answers to Definition Questions</title>
-      <author><first>Ellen M.</first><last>Voorhees</last></author>
+      <author id="ellen-m-voorhees"><first>Ellen M.</first><last>Voorhees</last></author>
       <pages>109–111</pages>
       <url hash="325639c4">N03-2037</url>
       <bibkey>voorhees-2003-evaluating-answers</bibkey>
@@ -766,16 +766,16 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>TIPS</fixed-case>: A Translingual Information Processing System</title>
-      <author><first>Yaser</first><last>Al-Onaizan</last></author>
-      <author><first>Radu</first><last>Florian</last></author>
+      <author id="yaser-al-onaizan"><first>Yaser</first><last>Al-Onaizan</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
       <author><first>Martin</first><last>Franz</last></author>
-      <author><first>Hany</first><last>Hassan</last></author>
+      <author id="hany-hassan-awadalla"><first>Hany</first><last>Hassan</last></author>
       <author><first>Young-Suk</first><last>Lee</last></author>
-      <author><first>J. Scott</first><last>McCarley</last></author>
+      <author id="j-scott-mccarley"><first>J. Scott</first><last>McCarley</last></author>
       <author><first>Kishore</first><last>Papineni</last></author>
-      <author><first>Salim</first><last>Roukos</last></author>
-      <author><first>Jeffrey</first><last>Sorensen</last></author>
-      <author><first>Christoph</first><last>Tillmann</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
+      <author id="jeffrey-sorensen"><first>Jeffrey</first><last>Sorensen</last></author>
+      <author id="christoph-tillmann"><first>Christoph</first><last>Tillmann</last></author>
       <author><first>Todd</first><last>Ward</last></author>
       <author><first>Fei</first><last>Xia</last></author>
       <pages>1–2</pages>
@@ -794,7 +794,7 @@
     <paper id="3">
       <title><fixed-case>DOGHED</fixed-case>: A Template-Based Generator for Multimodal Dialog Systems Targeting Heterogeneous Devices</title>
       <author><first>Songsak</first><last>Channarukul</last></author>
-      <author><first>Susan W.</first><last>McRoy</last></author>
+      <author id="susan-w-mcroy"><first>Susan W.</first><last>McRoy</last></author>
       <author><first>Syed S.</first><last>Ali</last></author>
       <pages>5–6</pages>
       <url hash="28c556d9">N03-4003</url>
@@ -803,15 +803,15 @@
     <paper id="4">
       <title><fixed-case>TAP</fixed-case>-<fixed-case>XL</fixed-case>: An Automated Analyst’s Assistant</title>
       <author><first>Sean</first><last>Colbath</last></author>
-      <author><first>Francis</first><last>Kubala</last></author>
+      <author id="francis-kubala"><first>Francis</first><last>Kubala</last></author>
       <pages>7–8</pages>
       <url hash="f160ee05">N03-4004</url>
       <bibkey>colbath-kubala-2003-tap</bibkey>
     </paper>
     <paper id="5">
       <title>A Spoken Dialogue Interface to a Geologist’s Field Assistant</title>
-      <author><first>John</first><last>Dowding</last></author>
-      <author><first>James</first><last>Hieronymus</last></author>
+      <author id="john-dowding"><first>John</first><last>Dowding</last></author>
+      <author id="james-hieronymus"><first>James</first><last>Hieronymus</last></author>
       <pages>9–10</pages>
       <url hash="e970dc14">N03-4005</url>
       <bibkey>dowding-hieronymus-2003-spoken</bibkey>
@@ -819,8 +819,8 @@
     <paper id="6">
       <title><fixed-case>QCS</fixed-case>: A Tool for Querying, Clustering, and Summarizing Documents</title>
       <author><first>Daniel M.</first><last>Dunlavy</last></author>
-      <author><first>John</first><last>Conroy</last></author>
-      <author><first>Dianne P.</first><last>O’Leary</last></author>
+      <author id="john-conroy"><first>John</first><last>Conroy</last></author>
+      <author id="dianne-p-oleary"><first>Dianne P.</first><last>O’Leary</last></author>
       <pages>11–12</pages>
       <url hash="7c5c3db8">N03-4006</url>
       <bibkey>dunlavy-etal-2003-qcs</bibkey>
@@ -828,26 +828,26 @@
     <paper id="7">
       <title>Demonstration of the <fixed-case>CROSSMARC</fixed-case> System</title>
       <author><first>Vangelis</first><last>Karkaletsis</last></author>
-      <author><first>Constantine D.</first><last>Spyropoulos</last></author>
+      <author id="constantine-d-spyropoulos"><first>Constantine D.</first><last>Spyropoulos</last></author>
       <author><first>Dimitris</first><last>Souflis</last></author>
       <author><first>Claire</first><last>Grover</last></author>
       <author><first>Ben</first><last>Hachey</last></author>
-      <author><first>Maria Teresa</first><last>Pazienza</last></author>
+      <author id="maria-teresa-pazienza"><first>Maria Teresa</first><last>Pazienza</last></author>
       <author><first>Michele</first><last>Vindigni</last></author>
       <author><first>Emmanuel</first><last>Cartier</last></author>
-      <author><first>Jose</first><last>Coch</last></author>
+      <author id="jose-coch"><first>Jose</first><last>Coch</last></author>
       <pages>13–14</pages>
       <url hash="aa73bed1">N03-4007</url>
       <bibkey>karkaletsis-etal-2003-demonstration</bibkey>
     </paper>
     <paper id="8">
       <title><fixed-case>C</fixed-case>olumbia’s Newsblaster: New Features and Future Directions</title>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <author><first>Regina</first><last>Barzilay</last></author>
       <author><first>John</first><last>Chen</last></author>
-      <author><first>David</first><last>Elson</last></author>
-      <author><first>David</first><last>Evans</last></author>
-      <author><first>Judith</first><last>Klavans</last></author>
+      <author id="david-elson"><first>David</first><last>Elson</last></author>
+      <author id="david-k-evans"><first>David</first><last>Evans</last></author>
+      <author id="judith-l-klavans"><first>Judith</first><last>Klavans</last></author>
       <author><first>Ani</first><last>Nenkova</last></author>
       <author><first>Barry</first><last>Schiffman</last></author>
       <author><first>Sergey</first><last>Sigelman</last></author>
@@ -857,7 +857,7 @@
     </paper>
     <paper id="9">
       <title><fixed-case>W</fixed-case>ord<fixed-case>F</fixed-case>reak: An Open Tool for Linguistic Annotation</title>
-      <author><first>Thomas</first><last>Morton</last></author>
+      <author id="thomas-s-morton"><first>Thomas</first><last>Morton</last></author>
       <author><first>Jeremy</first><last>LaCivita</last></author>
       <pages>17–18</pages>
       <url hash="1ee32330">N03-4009</url>
@@ -865,8 +865,8 @@
     </paper>
     <paper id="10">
       <title><fixed-case>JAVELIN</fixed-case>: A Flexible, Planner-Based Architecture for Question Answering</title>
-      <author><first>Eric</first><last>Nyberg</last></author>
-      <author><first>Robert</first><last>Frederking</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
+      <author id="robert-frederking"><first>Robert</first><last>Frederking</last></author>
       <pages>19–20</pages>
       <url hash="e402e68a">N03-4010</url>
       <bibkey>nyberg-frederking-2003-javelin</bibkey>
@@ -881,7 +881,7 @@
     </paper>
     <paper id="12">
       <title>Automatic Extraction of Semantic Networks from Text using Leximancer</title>
-      <author><first>Andrew E.</first><last>Smith</last></author>
+      <author id="andrew-smith"><first>Andrew E.</first><last>Smith</last></author>
       <pages>23–24</pages>
       <url hash="b8521c5f">N03-4012</url>
       <bibkey>smith-2003-automatic</bibkey>
@@ -890,7 +890,7 @@
       <title>pre-<fixed-case>CODIE</fixed-case>–Crosslingual On-Demand Information Extraction</title>
       <author><first>Kiyoshi</first><last>Sudo</last></author>
       <author><first>Satoshi</first><last>Sekine</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <pages>25–26</pages>
       <url hash="1dec8581">N03-4013</url>
       <bibkey>sudo-etal-2003-pre</bibkey>
@@ -904,19 +904,19 @@
     </paper>
     <paper id="15">
       <title><fixed-case>S</fixed-case>peechalator: Two-Way Speech-to-Speech Translation in Your Hand</title>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <author><first>Ahmed</first><last>Badran</last></author>
-      <author><first>Alan W.</first><last>Black</last></author>
-      <author><first>Robert</first><last>Frederking</last></author>
-      <author><first>Donna</first><last>Gates</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
+      <author id="alan-w-black"><first>Alan W.</first><last>Black</last></author>
+      <author id="robert-frederking"><first>Robert</first><last>Frederking</last></author>
+      <author id="donna-gates"><first>Donna</first><last>Gates</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
       <author><first>Kevin</first><last>Lenzo</last></author>
-      <author><first>Laura Mayfield</first><last>Tomokiyo</last></author>
+      <author id="laura-mayfield-tomokiyo"><first>Laura Mayfield</first><last>Tomokiyo</last></author>
       <author><first>Juergen</first><last>Reichert</last></author>
       <author><first>Tanja</first><last>Schultz</last></author>
       <author><first>Dorcas</first><last>Wallace</last></author>
-      <author><first>Monika</first><last>Woszczyna</last></author>
+      <author id="monika-woszczyna"><first>Monika</first><last>Woszczyna</last></author>
       <author><first>Jing</first><last>Zhang</last></author>
       <pages>29–30</pages>
       <url hash="181b664d">N03-4015</url>
@@ -933,8 +933,8 @@
     <paper id="17">
       <title>Identifying Opinionated Sentences</title>
       <author><first>Theresa</first><last>Wilson</last></author>
-      <author><first>David R.</first><last>Pierce</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="david-pierce"><first>David R.</first><last>Pierce</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <pages>33–34</pages>
       <url hash="4209bdd1">N03-4017</url>
       <bibkey>wilson-etal-2003-identifying</bibkey>
@@ -959,7 +959,7 @@
     </paper>
     <paper id="2">
       <title>Information Retrieval Systems as Integration Platforms for Language Technologies</title>
-      <author><first>Douglas W.</first><last>Oard</last></author>
+      <author id="douglas-w-oard"><first>Douglas W.</first><last>Oard</last></author>
       <pages>2–2</pages>
       <url hash="6d3e38d2">N03-5002</url>
       <bibkey>oard-2003-information</bibkey>
@@ -973,7 +973,7 @@
     </paper>
     <paper id="4">
       <title>The State of the Art in Language Modeling</title>
-      <author><first>Joshua</first><last>Goodman</last></author>
+      <author id="joshua-goodman"><first>Joshua</first><last>Goodman</last></author>
       <pages>4–4</pages>
       <url hash="97c22dcd">N03-5004</url>
       <bibkey>goodman-2003-state</bibkey>
@@ -988,7 +988,7 @@
     </paper>
     <paper id="6">
       <title>Annotation of Temporal and Event Expressions</title>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <author><first>Inderjeet</first><last>Mani</last></author>
       <pages>6–6</pages>
       <url hash="62cda8dd">N03-5006</url>
@@ -1003,7 +1003,7 @@
     </paper>
     <paper id="8">
       <title>Optimization, Maxent Models, and Conditional Estimation without Magic</title>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <author><first>Dan</first><last>Klein</last></author>
       <pages>8–8</pages>
       <url hash="dc11adf9">N03-5008</url>
diff --git a/data/xml/N04.xml b/data/xml/N04.xml
index aa3c26b7ee..b39f822995 100644
--- a/data/xml/N04.xml
+++ b/data/xml/N04.xml
@@ -30,7 +30,7 @@
     <paper id="2">
       <title>Cross-Document Coreference on a Large Scale Corpus</title>
       <author><first>Chung Heong</first><last>Gooi</last></author>
-      <author><first>James</first><last>Allan</last></author>
+      <author id="james-allan"><first>James</first><last>Allan</last></author>
       <pages>9–16</pages>
       <url hash="1413a1cb">N04-1002</url>
       <bibkey>gooi-allan-2004-cross</bibkey>
@@ -47,15 +47,15 @@
     <paper id="4">
       <title>A Salience-Based Approach to Gesture-Speech Alignment</title>
       <author><first>Jacob</first><last>Eisenstein</last></author>
-      <author><first>C. Mario</first><last>Christoudias</last></author>
+      <author id="c-mario-christoudias"><first>C. Mario</first><last>Christoudias</last></author>
       <pages>25–32</pages>
       <url hash="7a9af566">N04-1004</url>
       <bibkey>eisenstein-christoudias-2004-salience</bibkey>
     </paper>
     <paper id="5">
       <title>Balancing data-driven and rule-based approaches in the context of a Multimodal Conversational System</title>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
-      <author><first>Michael</first><last>Johnston</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="michael-johnston"><first>Michael</first><last>Johnston</last></author>
       <pages>33–40</pages>
       <url hash="c3403937">N04-1005</url>
       <bibkey>bangalore-johnston-2004-balancing</bibkey>
@@ -89,7 +89,7 @@
       <title>A Probabilistic Rasch Analysis of Question Answering Evaluations</title>
       <author><first>Rense</first><last>Lange</last></author>
       <author><first>Juan</first><last>Moran</last></author>
-      <author><first>Warren R.</first><last>Greiff</last></author>
+      <author id="warren-greiff"><first>Warren R.</first><last>Greiff</last></author>
       <author><first>Lisa</first><last>Ferro</last></author>
       <pages>65–72</pages>
       <url hash="629e44b1">N04-1009</url>
@@ -105,7 +105,7 @@
     </paper>
     <paper id="11">
       <title>Sentence-Internal Prosody Does not Help Parsing the Way Punctuation Does</title>
-      <author><first>Michelle</first><last>Gregory</last></author>
+      <author id="michelle-gregory"><first>Michelle</first><last>Gregory</last></author>
       <author><first>Mark</first><last>Johnson</last></author>
       <author><first>Eugene</first><last>Charniak</last></author>
       <pages>81–88</pages>
@@ -122,11 +122,11 @@
     </paper>
     <paper id="13">
       <title>Speed and Accuracy in Shallow and Deep Stochastic Parsing</title>
-      <author><first>Ron</first><last>Kaplan</last></author>
+      <author id="ronald-m-kaplan"><first>Ron</first><last>Kaplan</last></author>
       <author><first>Stefan</first><last>Riezler</last></author>
-      <author><first>Tracy H.</first><last>King</last></author>
-      <author><first>John T.</first><last>Maxwell III</last></author>
-      <author><first>Alex</first><last>Vasserman</last></author>
+      <author id="tracy-holloway-king"><first>Tracy H.</first><last>King</last></author>
+      <author id="john-t-maxwell-iii"><first>John T.</first><last>Maxwell III</last></author>
+      <author id="alexander-vasserman"><first>Alex</first><last>Vasserman</last></author>
       <author><first>Richard</first><last>Crouch</last></author>
       <pages>97–104</pages>
       <url hash="9d2eb148">N04-1013</url>
@@ -158,8 +158,8 @@
     </paper>
     <paper id="17">
       <title>Lattice-Based Search for Spoken Utterance Retrieval</title>
-      <author><first>Murat</first><last>Saraclar</last></author>
-      <author><first>Richard</first><last>Sproat</last></author>
+      <author id="murat-saraclar"><first>Murat</first><last>Saraclar</last></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last></author>
       <pages>129–136</pages>
       <url hash="9a8afa0f">N04-1017</url>
       <bibkey>saraclar-sproat-2004-lattice</bibkey>
@@ -167,8 +167,8 @@
     <paper id="18">
       <title>Detecting Structural Metadata with Decision Trees and Transformation-Based Learning</title>
       <author><first>Joungbum</first><last>Kim</last></author>
-      <author><first>Sarah E.</first><last>Schwarm</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
+      <author id="sarah-e-schwarm"><first>Sarah E.</first><last>Schwarm</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
       <pages>137–144</pages>
       <url hash="149c60c5">N04-1018</url>
       <bibkey>kim-etal-2004-detecting</bibkey>
@@ -176,7 +176,7 @@
     <paper id="19">
       <title>Evaluating Content Selection in Summarization: The Pyramid Method</title>
       <author><first>Ani</first><last>Nenkova</last></author>
-      <author><first>Rebecca</first><last>Passonneau</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca</first><last>Passonneau</last></author>
       <pages>145–152</pages>
       <url hash="d4eb060b">N04-1019</url>
       <bibkey>nenkova-passonneau-2004-evaluating</bibkey>
@@ -191,19 +191,19 @@
     </paper>
     <paper id="21">
       <title>A Smorgasbord of Features for Statistical Machine Translation</title>
-      <author><first>Franz Josef</first><last>Och</last></author>
+      <author id="franz-josef-och"><first>Franz Josef</first><last>Och</last></author>
       <author><first>Daniel</first><last>Gildea</last></author>
-      <author><first>Sanjeev</first><last>Khudanpur</last></author>
+      <author id="sanjeev-khudanpur"><first>Sanjeev</first><last>Khudanpur</last></author>
       <author><first>Anoop</first><last>Sarkar</last></author>
       <author><first>Kenji</first><last>Yamada</last></author>
-      <author><first>Alex</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alex</first><last>Fraser</last></author>
       <author><first>Shankar</first><last>Kumar</last></author>
       <author><first>Libin</first><last>Shen</last></author>
-      <author><first>David</first><last>Smith</last></author>
+      <author id="david-a-smith"><first>David</first><last>Smith</last></author>
       <author><first>Katherine</first><last>Eng</last></author>
       <author><first>Viren</first><last>Jain</last></author>
       <author><first>Zhen</first><last>Jin</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <pages>161–168</pages>
       <url hash="51f2596c">N04-1021</url>
       <bibkey>och-etal-2004-smorgasbord</bibkey>
@@ -220,7 +220,7 @@
       <title>Discriminative Reranking for Machine Translation</title>
       <author><first>Libin</first><last>Shen</last></author>
       <author><first>Anoop</first><last>Sarkar</last></author>
-      <author><first>Franz Josef</first><last>Och</last></author>
+      <author id="franz-josef-och"><first>Franz Josef</first><last>Och</last></author>
       <pages>177–184</pages>
       <url hash="7dcbfdd5">N04-1023</url>
       <bibkey>shen-etal-2004-discriminative</bibkey>
@@ -245,8 +245,8 @@
     </paper>
     <paper id="26">
       <title>Predicting Emotion in Spoken Dialogue from Multiple Knowledge Sources</title>
-      <author><first>Kate</first><last>Forbes-Riley</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="kate-forbes-riley"><first>Kate</first><last>Forbes-Riley</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <pages>201–208</pages>
       <url hash="bbcce013">N04-1026</url>
       <bibkey>forbes-riley-litman-2004-predicting</bibkey>
@@ -277,11 +277,11 @@
     </paper>
     <paper id="30">
       <title>Shallow Semantic Parsing using Support Vector Machines</title>
-      <author><first>Sameer S.</first><last>Pradhan</last></author>
-      <author><first>Wayne H.</first><last>Ward</last></author>
-      <author><first>Kadri</first><last>Hacioglu</last></author>
-      <author><first>James H.</first><last>Martin</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="sameer-pradhan"><first>Sameer S.</first><last>Pradhan</last></author>
+      <author id="wayne-ward"><first>Wayne H.</first><last>Ward</last></author>
+      <author id="kadri-hacioglu"><first>Kadri</first><last>Hacioglu</last></author>
+      <author id="james-h-martin"><first>James H.</first><last>Martin</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <pages>233–240</pages>
       <url hash="8fe51312">N04-1030</url>
       <bibkey>pradhan-etal-2004-shallow</bibkey>
@@ -298,7 +298,7 @@
     <paper id="32">
       <title>Shallow Semantic Parsing of <fixed-case>C</fixed-case>hinese</title>
       <author><first>Honglin</first><last>Sun</last></author>
-      <author><first>Daniel</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Daniel</first><last>Jurafsky</last></author>
       <pages>249–256</pages>
       <url hash="03330600">N04-1032</url>
       <bibkey>sun-jurafsky-2004-shallow</bibkey>
@@ -306,15 +306,15 @@
     <paper id="33">
       <title>Improvements in Phrase-Based Statistical Machine Translation</title>
       <author><first>Richard</first><last>Zens</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>257–264</pages>
       <url hash="fba57f05">N04-1033</url>
       <bibkey>zens-ney-2004-improvements</bibkey>
     </paper>
     <paper id="34">
       <title>Improved Machine Translation Performance via Parallel Sentence Extraction from Comparable Corpora</title>
-      <author><first>Dragos Stefan</first><last>Munteanu</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="dragos-stefan-munteanu"><first>Dragos Stefan</first><last>Munteanu</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <author><first>Daniel</first><last>Marcu</last></author>
       <pages>265–272</pages>
       <url hash="3149074f">N04-1034</url>
@@ -333,16 +333,16 @@
     <paper id="36">
       <title>Improving Named Entity Translation Combining Phonetic and Semantic Similarities</title>
       <author><first>Fei</first><last>Huang</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>281–288</pages>
       <url hash="2e9da600">N04-1036</url>
       <bibkey>huang-etal-2004-improving</bibkey>
     </paper>
     <paper id="37">
       <title>The (Non)Utility of Predicate-Argument Frequencies for Pronoun Interpretation</title>
-      <author><first>Andrew</first><last>Kehler</last></author>
-      <author><first>Douglas</first><last>Appelt</last></author>
+      <author id="andrew-kehler"><first>Andrew</first><last>Kehler</last></author>
+      <author id="douglas-appelt"><first>Douglas</first><last>Appelt</last></author>
       <author><first>Lara</first><last>Taylor</last></author>
       <author><first>Aleksandr</first><last>Simma</last></author>
       <pages>289–296</pages>
@@ -351,22 +351,22 @@
     </paper>
     <paper id="38">
       <title>Unsupervised Learning of Contextual Role Knowledge for Coreference Resolution</title>
-      <author><first>David</first><last>Bean</last></author>
-      <author><first>Ellen</first><last>Riloff</last></author>
+      <author id="david-l-bean"><first>David</first><last>Bean</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
       <pages>297–304</pages>
       <url hash="480415c7">N04-1038</url>
       <bibkey>bean-riloff-2004-unsupervised</bibkey>
     </paper>
     <paper id="39">
       <title>Exponential Priors for Maximum Entropy Models</title>
-      <author><first>Joshua</first><last>Goodman</last></author>
+      <author id="joshua-goodman"><first>Joshua</first><last>Goodman</last></author>
       <pages>305–312</pages>
       <url hash="b63f78f0">N04-1039</url>
       <bibkey>goodman-2004-exponential</bibkey>
     </paper>
     <paper id="40">
       <title>Multiple Similarity Measures and Source-Pair Information in Story Link Detection</title>
-      <author><first>Francine</first><last>Chen</last></author>
+      <author id="francine-chen"><first>Francine</first><last>Chen</last></author>
       <author><first>Ayman</first><last>Farahat</last></author>
       <author><first>Thorsten</first><last>Brants</last></author>
       <pages>313–320</pages>
@@ -423,7 +423,7 @@
     </paper>
     <paper id="2">
       <title>Identifying Chemical Names in Biomedical Text: an Investigation of Substring Co-occurrence Based Approaches</title>
-      <author><first>Alexander</first><last>Vasserman</last></author>
+      <author id="alexander-vasserman"><first>Alexander</first><last>Vasserman</last></author>
       <pages>7–12</pages>
       <url hash="f8a2c9d0">N04-2002</url>
       <bibkey>vasserman-2004-identifying</bibkey>
@@ -452,7 +452,7 @@
     </paper>
     <paper id="6">
       <title>Automatic Article Restoration</title>
-      <author><first>John</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
       <pages>31–36</pages>
       <url hash="07f259ba">N04-2006</url>
       <bibkey>lee-2004-automatic</bibkey>
@@ -501,16 +501,16 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>C</fixed-case>olumbia Newsblaster: Multilingual News Summarization on the Web</title>
-      <author><first>David Kirk</first><last>Evans</last></author>
-      <author><first>Judith L.</first><last>Klavans</last></author>
-      <author><first>Kathleen R.</first><last>McKeown</last></author>
+      <author id="david-k-evans"><first>David Kirk</first><last>Evans</last></author>
+      <author id="judith-l-klavans"><first>Judith L.</first><last>Klavans</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen R.</first><last>McKeown</last></author>
       <pages>1–4</pages>
       <url hash="f770fa62">N04-3001</url>
       <bibkey>evans-etal-2004-columbia</bibkey>
     </paper>
     <paper id="2">
       <title><fixed-case>ITSPOKE</fixed-case>: An Intelligent Tutoring Spoken Dialogue System</title>
-      <author><first>Diane J.</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane J.</first><last>Litman</last></author>
       <author><first>Scott</first><last>Silliman</last></author>
       <pages>5–8</pages>
       <url hash="2aa49ba5">N04-3002</url>
@@ -528,19 +528,19 @@
       <author><first>Susanne</first><last>Riehemann</last></author>
       <author><first>Dimitra</first><last>Vergyri</last></author>
       <author><first>Jing</first><last>Zheng</last></author>
-      <author><first>Christopher</first><last>Culy</last></author>
+      <author id="chris-culy"><first>Christopher</first><last>Culy</last></author>
       <pages>9–12</pages>
       <url hash="15f7ce4b">N04-3003</url>
       <bibkey>precoda-etal-2004-limited</bibkey>
     </paper>
     <paper id="4">
       <title><fixed-case>M</fixed-case>i<fixed-case>TAP</fixed-case> for <fixed-case>SARS</fixed-case> Detection</title>
-      <author><first>Laurie E.</first><last>Damianos</last></author>
-      <author><first>Samuel</first><last>Bayer</last></author>
+      <author id="laurie-damianos"><first>Laurie E.</first><last>Damianos</last></author>
+      <author id="samuel-bayer"><first>Samuel</first><last>Bayer</last></author>
       <author><first>Michael A.</first><last>Chisholm</last></author>
-      <author><first>John</first><last>Henderson</last></author>
-      <author><first>Lynette</first><last>Hirschman</last></author>
-      <author><first>William</first><last>Morgan</last></author>
+      <author id="john-henderson"><first>John</first><last>Henderson</last></author>
+      <author id="lynette-hirschman"><first>Lynette</first><last>Hirschman</last></author>
+      <author id="william-morgan"><first>William</first><last>Morgan</last></author>
       <author><first>Marc</first><last>Ubaldino</last></author>
       <author><first>Guido</first><last>Zarrella</last></author>
       <author><first>James M.</first><last>Wilson V</last></author>
@@ -551,13 +551,13 @@
     </paper>
     <paper id="5">
       <title>Multilingual Video and Audio News Alerting</title>
-      <author><first>David D.</first><last>Palmer</last></author>
+      <author id="david-d-palmer"><first>David D.</first><last>Palmer</last></author>
       <author><first>Patrick</first><last>Bray</last></author>
       <author><first>Marc</first><last>Reichman</last></author>
       <author><first>Katherine</first><last>Rhodes</last></author>
       <author><first>Noah</first><last>White</last></author>
       <author><first>Andrew</first><last>Merlino</last></author>
-      <author><first>Francis</first><last>Kubala</last></author>
+      <author id="francis-kubala"><first>Francis</first><last>Kubala</last></author>
       <pages>17–18</pages>
       <url hash="275b689c">N04-3005</url>
       <bibkey>palmer-etal-2004-multilingual</bibkey>
@@ -565,14 +565,14 @@
     <paper id="6">
       <title>Open Text Semantic Parsing Using <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et and <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et</title>
       <author><first>Lei</first><last>Shi</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>19–22</pages>
       <url hash="47d39373">N04-3006</url>
       <bibkey>shi-mihalcea-2004-open</bibkey>
     </paper>
     <paper id="7">
       <title>A Scaleable Multi-document Centroid-based Summarizer</title>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <author><first>Timothy</first><last>Allison</last></author>
       <author><first>Matthew</first><last>Craig</last></author>
       <author><first>Stanko</first><last>Dimitrov</last></author>
@@ -605,10 +605,10 @@
       <title>A <fixed-case>T</fixed-case>hai Speech Translation System for Medical Dialogs</title>
       <author><first>Tanja</first><last>Schultz</last></author>
       <author><first>Dorcas</first><last>Alexander</last></author>
-      <author><first>Alan W.</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W.</first><last>Black</last></author>
       <author><first>Kay</first><last>Peterson</last></author>
       <author><first>Sinaporn</first><last>Suebvisai</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>34–35</pages>
       <url hash="7f8e7d96">N04-3010</url>
       <bibkey>schultz-etal-2004-thai</bibkey>
@@ -656,15 +656,15 @@
     <paper id="2">
       <title><fixed-case>MMR</fixed-case>-based Feature Selection for Text Categorization</title>
       <author><first>Changki</first><last>Lee</last></author>
-      <author><first>Gary Geunbae</first><last>Lee</last></author>
+      <author id="gary-geunbae-lee"><first>Gary Geunbae</first><last>Lee</last></author>
       <pages>5–8</pages>
       <url hash="bea319f4">N04-4002</url>
       <bibkey>lee-lee-2004-mmr</bibkey>
     </paper>
     <paper id="3">
       <title>Example-based Rescoring of Statistical Machine Translation Output</title>
-      <author><first>Michael</first><last>Paul</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="michael-paul"><first>Michael</first><last>Paul</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Seiichi</first><last>Yamamoto</last></author>
       <pages>9–12</pages>
       <url hash="4d2aee15">N04-4003</url>
@@ -676,7 +676,7 @@
       <author><first>Yasuhiro</first><last>Kodama</last></author>
       <author><first>Tomohiro</first><last>Watanabe</last></author>
       <author><first>Hiromitsu</first><last>Nishizaki</last></author>
-      <author><first>Seiichi</first><last>Nakagawa</last></author>
+      <author id="seiichi-nakagawa"><first>Seiichi</first><last>Nakagawa</last></author>
       <pages>13–16</pages>
       <url hash="515f1915">N04-4004</url>
       <bibkey>utsuro-etal-2004-empirical</bibkey>
@@ -692,7 +692,7 @@
       <title>Language Model Adaptation with <fixed-case>MAP</fixed-case> Estimation and the Perceptron Algorithm</title>
       <author><first>Michiel</first><last>Bacchiani</last></author>
       <author><first>Brian</first><last>Roark</last></author>
-      <author><first>Murat</first><last>Saraclar</last></author>
+      <author id="murat-saraclar"><first>Murat</first><last>Saraclar</last></author>
       <pages>21–24</pages>
       <url hash="765c7688">N04-4006</url>
       <bibkey>bacchiani-etal-2004-language</bibkey>
@@ -700,9 +700,9 @@
     <paper id="7">
       <title>Advances in Children’s Speech Recognition within an Interactive Literacy Tutor</title>
       <author><first>Andreas</first><last>Hagen</last></author>
-      <author><first>Bryan</first><last>Pellom</last></author>
+      <author id="bryan-pellom"><first>Bryan</first><last>Pellom</last></author>
       <author><first>Sarel</first><last>van Vuuren</last></author>
-      <author><first>Ronald</first><last>Cole</last></author>
+      <author id="ronald-cole"><first>Ronald</first><last>Cole</last></author>
       <pages>25–28</pages>
       <url hash="17ef0aa5">N04-4007</url>
       <bibkey>hagen-etal-2004-advances</bibkey>
@@ -717,8 +717,8 @@
     </paper>
     <paper id="9">
       <title>Competitive Self-Trained Pronoun Interpretation</title>
-      <author><first>Andrew</first><last>Kehler</last></author>
-      <author><first>Douglas</first><last>Appelt</last></author>
+      <author id="andrew-kehler"><first>Andrew</first><last>Kehler</last></author>
+      <author id="douglas-appelt"><first>Douglas</first><last>Appelt</last></author>
       <author><first>Lara</first><last>Taylor</last></author>
       <author><first>Aleksandr</first><last>Simma</last></author>
       <pages>33–36</pages>
@@ -729,7 +729,7 @@
       <title>Using N-best lists for Named Entity Recognition from <fixed-case>C</fixed-case>hinese Speech</title>
       <author><first>Lufeng</first><last>Zhai</last></author>
       <author><first>Pascale</first><last>Fung</last></author>
-      <author><first>Richard</first><last>Schwartz</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
       <author><first>Marine</first><last>Carpuat</last></author>
       <author><first>Dekai</first><last>Wu</last></author>
       <pages>37–40</pages>
@@ -738,7 +738,7 @@
     </paper>
     <paper id="11">
       <title>Performance Evaluation and Error Analysis for Multimodal Reference Resolution in a Conversation System</title>
-      <author><first>Joyce Y.</first><last>Chai</last></author>
+      <author id="joyce-chai"><first>Joyce Y.</first><last>Chai</last></author>
       <author><first>Zahar</first><last>Prasov</last></author>
       <author><first>Pengyu</first><last>Hong</last></author>
       <pages>41–44</pages>
@@ -756,7 +756,7 @@
     </paper>
     <paper id="13">
       <title>Web Search Intent Induction via Automatic Query Reformulation</title>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <author><first>Eric</first><last>Brill</last></author>
       <pages>49–52</pages>
       <url hash="538d3d83">N04-4013</url>
@@ -764,8 +764,8 @@
     </paper>
     <paper id="14">
       <title><fixed-case>HITIQA</fixed-case>: A Data Driven Approach to Interactive Analytical Question Answering</title>
-      <author><first>Sharon</first><last>Small</last></author>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="sharon-small"><first>Sharon</first><last>Small</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
       <pages>53–56</pages>
       <url hash="944dac70">N04-4014</url>
       <bibkey>small-strzalkowski-2004-hitiqa</bibkey>
@@ -781,15 +781,15 @@
       <title>Correction Grammars for Error Handling in a Speech Dialog System</title>
       <author><first>Hirohiko</first><last>Sagawa</last></author>
       <author><first>Teruko</first><last>Mitamura</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <pages>61–64</pages>
       <url hash="e6095976">N04-4016</url>
       <bibkey>sagawa-etal-2004-correction</bibkey>
     </paper>
     <paper id="17">
       <title>A Comparison of Rule–Based and Statistical Methods for Semantic Language Modeling and Confidence Measurement</title>
-      <author><first>Ruhi</first><last>Srikaya</last></author>
-      <author><first>Yuqing</first><last>Gao</last></author>
+      <author id="ruhi-sarikaya"><first>Ruhi</first><last>Srikaya</last></author>
+      <author id="yuqing-guo"><first>Yuqing</first><last>Gao</last></author>
       <author><first>Michael</first><last>Picheny</last></author>
       <pages>65–68</pages>
       <url hash="4d03524d">N04-4017</url>
@@ -825,7 +825,7 @@
     <paper id="21">
       <title>Feature-based Pronunciation Modeling for Speech Recognition</title>
       <author><first>Karen</first><last>Livescu</last></author>
-      <author><first>James</first><last>Glass</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
       <pages>81–84</pages>
       <url hash="ac64e389">N04-4021</url>
       <bibkey>livescu-glass-2004-feature</bibkey>
@@ -833,14 +833,14 @@
     <paper id="22">
       <title>Context-based Speech Recognition Error Detection and Correction</title>
       <author><first>Arup</first><last>Sarma</last></author>
-      <author><first>David D.</first><last>Palmer</last></author>
+      <author id="david-d-palmer"><first>David D.</first><last>Palmer</last></author>
       <pages>85–88</pages>
       <url hash="b52bd52e">N04-4022</url>
       <bibkey>sarma-palmer-2004-context</bibkey>
     </paper>
     <paper id="23">
       <title>Feature Selection for Trainable Multilingual Broadcast News Segmentation</title>
-      <author><first>David D.</first><last>Palmer</last></author>
+      <author id="david-d-palmer"><first>David D.</first><last>Palmer</last></author>
       <author><first>Marc</first><last>Reichman</last></author>
       <author><first>Elyes</first><last>Yaich</last></author>
       <pages>89–92</pages>
@@ -849,31 +849,31 @@
     </paper>
     <paper id="24">
       <title>Direct Maximization of Average Precision by Hill-Climbing, with a Comparison to a Maximum Entropy Approach</title>
-      <author><first>William</first><last>Morgan</last></author>
-      <author><first>Warren</first><last>Greiff</last></author>
-      <author><first>John</first><last>Henderson</last></author>
+      <author id="william-morgan"><first>William</first><last>Morgan</last></author>
+      <author id="warren-greiff"><first>Warren</first><last>Greiff</last></author>
+      <author id="john-henderson"><first>John</first><last>Henderson</last></author>
       <pages>93–96</pages>
       <url hash="558b0141">N04-4024</url>
       <bibkey>morgan-etal-2004-direct</bibkey>
     </paper>
     <paper id="25">
       <title>Automated Team Discourse Annotation and Performance Prediction Using <fixed-case>LSA</fixed-case></title>
-      <author><first>Melanie J.</first><last>Martin</last></author>
-      <author><first>Peter W.</first><last>Foltz</last></author>
+      <author id="melanie-martin"><first>Melanie J.</first><last>Martin</last></author>
+      <author id="peter-foltz"><first>Peter W.</first><last>Foltz</last></author>
       <pages>97–100</pages>
       <url hash="19066eb0">N04-4025</url>
       <bibkey>martin-foltz-2004-automated</bibkey>
     </paper>
     <paper id="26">
       <title>A Unigram Orientation Model for Statistical Machine Translation</title>
-      <author><first>Christoph</first><last>Tillmann</last></author>
+      <author id="christoph-tillmann"><first>Christoph</first><last>Tillmann</last></author>
       <pages>101–104</pages>
       <url hash="e11517c0">N04-4026</url>
       <bibkey>tillmann-2004-unigram</bibkey>
     </paper>
     <paper id="27">
       <title>Summarizing Email Threads</title>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <author><first>Lokesh</first><last>Shrestha</last></author>
       <author><first>John</first><last>Chen</last></author>
       <author><first>Christy</first><last>Laurdisen</last></author>
@@ -898,7 +898,7 @@
       <author><first>B.</first><last>Schmidt-Nielsen</last></author>
       <author><first>Kent</first><last>Wittenburg</last></author>
       <author><first>Joseph</first><last>Woelfel</last></author>
-      <author><first>Fang-Fang</first><last>Zhang</last></author>
+      <author id="fang-fang-zhang"><first>Fang-Fang</first><last>Zhang</last></author>
       <pages>113–116</pages>
       <url hash="1a89696f">N04-4029</url>
       <bibkey>divi-etal-2004-speech</bibkey>
@@ -906,14 +906,14 @@
     <paper id="30">
       <title>Nearly-Automated Metadata Hierarchy Creation</title>
       <author><first>Emilia</first><last>Stoica</last></author>
-      <author><first>Marti A.</first><last>Hearst</last></author>
+      <author id="marti-a-hearst"><first>Marti A.</first><last>Hearst</last></author>
       <pages>117–120</pages>
       <url hash="7136ea81">N04-4030</url>
       <bibkey>stoica-hearst-2004-nearly</bibkey>
     </paper>
     <paper id="31">
       <title>Computational Linkuistics: Word Triggers across Hyperlinks</title>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <author><first>Hong</first><last>Qi</last></author>
       <author><first>Adam</first><last>Winkel</last></author>
       <author><first>Daniel</first><last>Tam</last></author>
@@ -923,8 +923,8 @@
     </paper>
     <paper id="32">
       <title>Parsing Conversational Speech Using Enhanced Segmentation</title>
-      <author><first>Jeremy G.</first><last>Kahn</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
+      <author id="jeremy-g-kahn"><first>Jeremy G.</first><last>Kahn</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
       <author><first>Ciprian</first><last>Chelba</last></author>
       <pages>125–128</pages>
       <url hash="744d8319">N04-4032</url>
@@ -947,54 +947,54 @@
     </paper>
     <paper id="35">
       <title>Prosody-based Topic Segmentation for <fixed-case>M</fixed-case>andarin Broadcast News</title>
-      <author><first>Gina-Anne</first><last>Levow</last></author>
+      <author id="gina-anne-levow"><first>Gina-Anne</first><last>Levow</last></author>
       <pages>137–140</pages>
       <url hash="395d8ac9">N04-4035</url>
       <bibkey>levow-2004-prosody</bibkey>
     </paper>
     <paper id="36">
       <title>Parsing Arguments of Nominalizations in <fixed-case>E</fixed-case>nglish and <fixed-case>C</fixed-case>hinese</title>
-      <author><first>Sameer</first><last>Pradhan</last></author>
+      <author id="sameer-pradhan"><first>Sameer</first><last>Pradhan</last></author>
       <author><first>Honglin</first><last>Sun</last></author>
-      <author><first>Wayne</first><last>Ward</last></author>
-      <author><first>James H.</first><last>Martin</last></author>
-      <author><first>Daniel</first><last>Jurafsky</last></author>
+      <author id="wayne-ward"><first>Wayne</first><last>Ward</last></author>
+      <author id="james-h-martin"><first>James H.</first><last>Martin</last></author>
+      <author id="dan-jurafsky"><first>Daniel</first><last>Jurafsky</last></author>
       <pages>141–144</pages>
       <url hash="14e3ea53">N04-4036</url>
       <bibkey>pradhan-etal-2004-parsing</bibkey>
     </paper>
     <paper id="37">
       <title>A Lightweight Semantic Chunker Based on Tagging</title>
-      <author><first>Kadri</first><last>Hacioglu</last></author>
+      <author id="kadri-hacioglu"><first>Kadri</first><last>Hacioglu</last></author>
       <pages>145–148</pages>
       <url hash="bf60b188">N04-4037</url>
       <bibkey>hacioglu-2004-lightweight</bibkey>
     </paper>
     <paper id="38">
       <title>Automatic Tagging of <fixed-case>A</fixed-case>rabic Text: From Raw Text to Base Phrase Chunks</title>
-      <author><first>Mona</first><last>Diab</last></author>
-      <author><first>Kadri</first><last>Hacioglu</last></author>
-      <author><first>Daniel</first><last>Jurafsky</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
+      <author id="kadri-hacioglu"><first>Kadri</first><last>Hacioglu</last></author>
+      <author id="dan-jurafsky"><first>Daniel</first><last>Jurafsky</last></author>
       <pages>149–152</pages>
       <url hash="cf2a0ac0">N04-4038</url>
       <bibkey>diab-etal-2004-automatic</bibkey>
     </paper>
     <paper id="39">
       <title>Converting Text into Agent Animations: Assigning Gestures to Text</title>
-      <author><first>Yukiko I.</first><last>Nakano</last></author>
+      <author id="yukiko-i-nakano"><first>Yukiko I.</first><last>Nakano</last></author>
       <author><first>Masashi</first><last>Okamoto</last></author>
       <author><first>Daisuke</first><last>Kawahara</last></author>
       <author><first>Qing</first><last>Li</last></author>
-      <author><first>Toyoaki</first><last>Nishida</last></author>
+      <author id="toyoaki-nishida"><first>Toyoaki</first><last>Nishida</last></author>
       <pages>153–156</pages>
       <url hash="3a505ed0">N04-4039</url>
       <bibkey>nakano-etal-2004-converting</bibkey>
     </paper>
     <paper id="40">
       <title>A Lexically-Driven Algorithm for Disfluency Detection</title>
-      <author><first>Matthew</first><last>Snover</last></author>
-      <author><first>Bonnie</first><last>Dorr</last></author>
-      <author><first>Richard</first><last>Schwartz</last></author>
+      <author id="matthew-snover"><first>Matthew</first><last>Snover</last></author>
+      <author id="bonnie-dorr"><first>Bonnie</first><last>Dorr</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
       <pages>157–160</pages>
       <url hash="c6b16520">N04-4040</url>
       <bibkey>snover-etal-2004-lexically</bibkey>
diff --git a/data/xml/N06.xml b/data/xml/N06.xml
index 8a50889a33..420eff9a5d 100644
--- a/data/xml/N06.xml
+++ b/data/xml/N06.xml
@@ -4,9 +4,9 @@
     <meta>
       <booktitle>Proceedings of the Human Language Technology Conference of the <fixed-case>NAACL</fixed-case>, Main Conference</booktitle>
       <url hash="cca0d79e">N06-1</url>
-      <editor><first>Robert C.</first><last>Moore</last></editor>
-      <editor><first>Jeff</first><last>Bilmes</last></editor>
-      <editor><first>Jennifer</first><last>Chu-Carroll</last></editor>
+      <editor id="robert-c-moore"><first>Robert C.</first><last>Moore</last></editor>
+      <editor id="jeff-bilmes"><first>Jeff</first><last>Bilmes</last></editor>
+      <editor id="jennifer-chu-carroll"><first>Jennifer</first><last>Chu-Carroll</last></editor>
       <editor><first>Mark</first><last>Sanderson</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>New York City, USA</address>
@@ -70,9 +70,9 @@
       <title>Learning to recognize features of valid textual entailments</title>
       <author><first>Bill</first><last>MacCartney</last></author>
       <author><first>Trond</first><last>Grenager</last></author>
-      <author><first>Marie-Catherine</first><last>de Marneffe</last></author>
+      <author id="marie-catherine-de-marneffe"><first>Marie-Catherine</first><last>de Marneffe</last></author>
       <author><first>Daniel</first><last>Cer</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>41–48</pages>
       <url hash="94ec5585">N06-1006</url>
       <bibkey>maccartney-etal-2006-learning</bibkey>
@@ -94,7 +94,7 @@
     <paper id="9">
       <title>Role of Local Context in Automatic Deidentification of Ungrammatical, Fragmented Text</title>
       <author><first>Tawanda</first><last>Sibanda</last></author>
-      <author><first>Ozlem</first><last>Uzuner</last></author>
+      <author id="ozlem-uzuner"><first>Ozlem</first><last>Uzuner</last></author>
       <author><first>Ozlem</first><last>Uzuner</last></author>
       <pages>65–73</pages>
       <url hash="2bb60356">N06-1009</url>
@@ -103,7 +103,7 @@
     <paper id="10">
       <title>Exploiting Domain Structure for Named Entity Recognition</title>
       <author><first>Jing</first><last>Jiang</last></author>
-      <author><first>ChengXiang</first><last>Zhai</last></author>
+      <author id="chengxiang-zhai"><first>ChengXiang</first><last>Zhai</last></author>
       <pages>74–81</pages>
       <url hash="8fc01649">N06-1010</url>
       <bibkey>jiang-zhai-2006-exploiting</bibkey>
@@ -127,8 +127,8 @@
     </paper>
     <paper id="13">
       <title>A Maximum Entropy Approach to Combining Word Alignments</title>
-      <author><first>Necip Fazil</first><last>Ayan</last></author>
-      <author><first>Bonnie J.</first><last>Dorr</last></author>
+      <author id="necip-fazil-ayan"><first>Necip Fazil</first><last>Ayan</last></author>
+      <author id="bonnie-dorr"><first>Bonnie J.</first><last>Dorr</last></author>
       <pages>96–103</pages>
       <url hash="852cf32a">N06-1013</url>
       <bibkey>ayan-dorr-2006-maximum</bibkey>
@@ -147,7 +147,7 @@
       <author><first>Simon</first><last>Lacoste-Julien</last></author>
       <author><first>Ben</first><last>Taskar</last></author>
       <author><first>Dan</first><last>Klein</last></author>
-      <author><first>Michael I.</first><last>Jordan</last></author>
+      <author id="michael-i-jordan"><first>Michael I.</first><last>Jordan</last></author>
       <pages>112–119</pages>
       <url hash="2c24f85e">N06-1015</url>
       <bibkey>lacoste-julien-etal-2006-word</bibkey>
@@ -156,8 +156,8 @@
       <title>An Empirical Study of the Behavior of Active Learning for Word Sense Disambiguation</title>
       <author><first>Jinying</first><last>Chen</last></author>
       <author><first>Andrew</first><last>Schein</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>120–127</pages>
       <url hash="1e6b5999">N06-1016</url>
       <bibkey>chen-etal-2006-empirical</bibkey>
@@ -172,8 +172,8 @@
     <paper id="18">
       <title>Understanding Temporal Expressions in Emails</title>
       <author><first>Benjamin</first><last>Han</last></author>
-      <author><first>Donna</first><last>Gates</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
+      <author id="donna-gates"><first>Donna</first><last>Gates</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
       <pages>136–143</pages>
       <url hash="d8395ed5">N06-1018</url>
       <bibkey>han-etal-2006-understanding</bibkey>
@@ -181,7 +181,7 @@
     <paper id="19">
       <title>Partial Training for a Lexicalized-Grammar Parser</title>
       <author><first>Stephen</first><last>Clark</last></author>
-      <author><first>James</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James</first><last>Curran</last></author>
       <pages>144–151</pages>
       <url hash="60527a93">N06-1019</url>
       <bibkey>clark-curran-2006-partial</bibkey>
@@ -197,10 +197,10 @@
     </paper>
     <paper id="21">
       <title>Multilingual Dependency Parsing using <fixed-case>B</fixed-case>ayes Point Machines</title>
-      <author><first>Simon</first><last>Corston-Oliver</last></author>
+      <author id="simon-corston-oliver"><first>Simon</first><last>Corston-Oliver</last></author>
       <author><first>Anthony</first><last>Aue</last></author>
       <author><first>Kevin</first><last>Duh</last></author>
-      <author><first>Eric</first><last>Ringger</last></author>
+      <author id="eric-ringger"><first>Eric</first><last>Ringger</last></author>
       <pages>160–167</pages>
       <url hash="0f0853a7">N06-1021</url>
       <bibkey>corston-oliver-etal-2006-multilingual</bibkey>
@@ -211,7 +211,7 @@
       <author><first>Mark</first><last>Johnson</last></author>
       <author><first>Micha</first><last>Elsner</last></author>
       <author><first>Joseph</first><last>Austerweil</last></author>
-      <author><first>David</first><last>Ellis</last></author>
+      <author id="david-ellis"><first>David</first><last>Ellis</last></author>
       <author><first>Isaac</first><last>Haxton</last></author>
       <author><first>Catherine</first><last>Hill</last></author>
       <author><first>R.</first><last>Shrivaths</last></author>
@@ -234,14 +234,14 @@
       <title>Fully Parsing the <fixed-case>P</fixed-case>enn <fixed-case>T</fixed-case>reebank</title>
       <author><first>Ryan</first><last>Gabbard</last></author>
       <author><first>Seth</first><last>Kulick</last></author>
-      <author><first>Mitchell</first><last>Marcus</last></author>
+      <author id="mitch-marcus"><first>Mitchell</first><last>Marcus</last></author>
       <pages>184–191</pages>
       <url hash="8869c887">N06-1024</url>
       <bibkey>gabbard-etal-2006-fully</bibkey>
     </paper>
     <paper id="25">
       <title>Exploiting Semantic Role Labeling, <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et and <fixed-case>W</fixed-case>ikipedia for Coreference Resolution</title>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <author><first>Michael</first><last>Strube</last></author>
       <pages>192–199</pages>
       <url hash="9cff0b24">N06-1025</url>
@@ -250,7 +250,7 @@
     <paper id="26">
       <title>Identifying and Analyzing Judgment Opinions</title>
       <author><first>Soo-Min</first><last>Kim</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>200–207</pages>
       <url hash="765a045a">N06-1026</url>
       <bibkey>kim-hovy-2006-identifying</bibkey>
@@ -260,7 +260,7 @@
       <author><first>Donghui</first><last>Feng</last></author>
       <author><first>Erin</first><last>Shaw</last></author>
       <author><first>Jihie</first><last>Kim</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>208–215</pages>
       <url hash="e78fb053">N06-1027</url>
       <bibkey>feng-etal-2006-learning</bibkey>
@@ -275,7 +275,7 @@
     </paper>
     <paper id="29">
       <title>Unsupervised and Semi-supervised Learning of Tone and Pitch Accent</title>
-      <author><first>Gina-Anne</first><last>Levow</last></author>
+      <author id="gina-anne-levow"><first>Gina-Anne</first><last>Levow</last></author>
       <pages>224–231</pages>
       <url hash="9149b631">N06-1029</url>
       <bibkey>levow-2006-unsupervised</bibkey>
@@ -283,7 +283,7 @@
     <paper id="30">
       <title>Learning Pronunciation Dictionaries: Language Complexity and Word Selection Strategies</title>
       <author><first>John</first><last>Kominek</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <pages>232–239</pages>
       <url hash="7c164e1c">N06-1030</url>
       <bibkey>kominek-black-2006-learning</bibkey>
@@ -299,7 +299,7 @@
     <paper id="32">
       <title>Grammatical Machine Translation</title>
       <author><first>Stefan</first><last>Riezler</last></author>
-      <author><first>John T.</first><last>Maxwell III</last></author>
+      <author id="john-t-maxwell-iii"><first>John T.</first><last>Maxwell III</last></author>
       <pages>248–255</pages>
       <url hash="dbf2b22f">N06-1032</url>
       <bibkey>riezler-maxwell-iii-2006-grammatical</bibkey>
@@ -316,16 +316,16 @@
     </paper>
     <paper id="34">
       <title>Modelling User Satisfaction and Student Learning in a Spoken Dialogue Tutoring System with Generic, Tutoring, and User Affect Parameters</title>
-      <author><first>Kate</first><last>Forbes-Riley</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="kate-forbes-riley"><first>Kate</first><last>Forbes-Riley</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <pages>264–271</pages>
       <url hash="549ca8b1">N06-1034</url>
       <bibkey>forbes-riley-litman-2006-modelling</bibkey>
     </paper>
     <paper id="35">
       <title>Comparing the Utility of State Features in Spoken Dialogue Using Reinforcement Learning</title>
-      <author><first>Joel</first><last>Tetreault</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <pages>272–279</pages>
       <url hash="ea531161">N06-1035</url>
       <bibkey>tetreault-litman-2006-comparing</bibkey>
@@ -383,7 +383,7 @@
     <paper id="42">
       <title>Learning Morphological Disambiguation Rules for <fixed-case>T</fixed-case>urkish</title>
       <author><first>Deniz</first><last>Yuret</last></author>
-      <author><first>Ferhan</first><last>Türe</last></author>
+      <author id="ferhan-ture"><first>Ferhan</first><last>Türe</last></author>
       <pages>328–334</pages>
       <url hash="b7515acc">N06-1042</url>
       <bibkey>yuret-ture-2006-learning</bibkey>
@@ -425,7 +425,7 @@
       <author><first>Gabriel</first><last>Murray</last></author>
       <author><first>Steve</first><last>Renals</last></author>
       <author><first>Jean</first><last>Carletta</last></author>
-      <author><first>Johanna</first><last>Moore</last></author>
+      <author id="johanna-d-moore"><first>Johanna</first><last>Moore</last></author>
       <pages>367–374</pages>
       <url hash="7604de96">N06-1047</url>
       <bibkey>murray-etal-2006-incorporating</bibkey>
@@ -468,7 +468,7 @@
       <author><first>Tao</first><last>Tao</last></author>
       <author><first>Xuanhui</first><last>Wang</last></author>
       <author><first>Qiaozhu</first><last>Mei</last></author>
-      <author><first>ChengXiang</first><last>Zhai</last></author>
+      <author id="chengxiang-zhai"><first>ChengXiang</first><last>Zhai</last></author>
       <pages>407–414</pages>
       <url hash="1d1e8f01">N06-1052</url>
       <bibkey>tao-etal-2006-language</bibkey>
@@ -485,8 +485,8 @@
     </paper>
     <paper id="54">
       <title>A fast finite-state relaxation method for enforcing global constraints on sequence decoding</title>
-      <author><first>Roy</first><last>Tromble</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="roy-tromble"><first>Roy</first><last>Tromble</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>423–430</pages>
       <url hash="2277de11">N06-1054</url>
       <bibkey>tromble-eisner-2006-fast</bibkey>
@@ -501,7 +501,7 @@
     <paper id="56">
       <title>Learning for Semantic Parsing with Statistical Machine Translation</title>
       <author><first>Yuk Wah</first><last>Wong</last></author>
-      <author><first>Raymond</first><last>Mooney</last></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last></author>
       <pages>439–446</pages>
       <url hash="e3d71bd2">N06-1056</url>
       <bibkey>wong-mooney-2006-learning</bibkey>
@@ -509,9 +509,9 @@
     <paper id="57">
       <title><fixed-case>P</fixed-case>ara<fixed-case>E</fixed-case>val: Using Paraphrases to Evaluate Summaries Automatically</title>
       <author><first>Liang</first><last>Zhou</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
-      <author><first>Dragos Stefan</first><last>Munteanu</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="dragos-stefan-munteanu"><first>Dragos Stefan</first><last>Munteanu</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>447–454</pages>
       <url hash="490cffb7">N06-1057</url>
       <bibkey>zhou-etal-2006-paraeval</bibkey>
@@ -526,10 +526,10 @@
     </paper>
     <paper id="59">
       <title>An Information-Theoretic Approach to Automatic Evaluation of Summaries</title>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <author><first>Guihong</first><last>Cao</last></author>
       <author><first>Jianfeng</first><last>Gao</last></author>
-      <author><first>Jian-Yun</first><last>Nie</last></author>
+      <author id="jian-yun-nie"><first>Jian-Yun</first><last>Nie</last></author>
       <pages>463–470</pages>
       <url hash="e5e4ef9d">N06-1059</url>
       <bibkey>lin-etal-2006-information</bibkey>
@@ -537,7 +537,7 @@
     <paper id="60">
       <title>Cross Linguistic Name Matching in <fixed-case>E</fixed-case>nglish and <fixed-case>A</fixed-case>rabic</title>
       <author><first>Andrew</first><last>Freeman</last></author>
-      <author><first>Sherri</first><last>Condon</last></author>
+      <author id="sherri-condon"><first>Sherri</first><last>Condon</last></author>
       <author><first>Christopher</first><last>Ackerman</last></author>
       <pages>471–478</pages>
       <url hash="8165fb0b">N06-1060</url>
@@ -545,7 +545,7 @@
     </paper>
     <paper id="61">
       <title>Language Model-Based Document Clustering Using Random Walks</title>
-      <author><first>Güneş</first><last>Erkan</last></author>
+      <author id="gunes-erkan"><first>Güneş</first><last>Erkan</last></author>
       <pages>479–486</pages>
       <url hash="1865cd8e">N06-1061</url>
       <bibkey>erkan-2006-language</bibkey>
@@ -559,7 +559,7 @@
       <author><first>Teemu</first><last>Hirsimäki</last></author>
       <author><first>Janne</first><last>Pylkkönen</last></author>
       <author><first>Tanel</first><last>Alumäe</last></author>
-      <author><first>Murat</first><last>Saraclar</last></author>
+      <author id="murat-saraclar"><first>Murat</first><last>Saraclar</last></author>
       <pages>487–494</pages>
       <url hash="a119526c">N06-1062</url>
       <bibkey>kurimo-etal-2006-unlimited</bibkey>
@@ -569,9 +569,9 @@
     <meta>
       <booktitle>Proceedings of the Human Language Technology Conference of the <fixed-case>NAACL</fixed-case>, Companion Volume: Short Papers</booktitle>
       <url hash="9db8f01a">N06-2</url>
-      <editor><first>Robert C.</first><last>Moore</last></editor>
-      <editor><first>Jeff</first><last>Bilmes</last></editor>
-      <editor><first>Jennifer</first><last>Chu-Carroll</last></editor>
+      <editor id="robert-c-moore"><first>Robert C.</first><last>Moore</last></editor>
+      <editor id="jeff-bilmes"><first>Jeff</first><last>Bilmes</last></editor>
+      <editor id="jennifer-chu-carroll"><first>Jennifer</first><last>Chu-Carroll</last></editor>
       <editor><first>Mark</first><last>Sanderson</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>New York City, USA</address>
@@ -594,10 +594,10 @@
     <paper id="2">
       <title>The <fixed-case>MILE</fixed-case> Corpus for Less Commonly Taught Languages</title>
       <author><first>Alison</first><last>Alvarez</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
-      <author><first>Robert</first><last>Frederking</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
+      <author id="robert-frederking"><first>Robert</first><last>Frederking</last></author>
       <author><first>Simon</first><last>Fung</last></author>
-      <author><first>Donna</first><last>Gates</last></author>
+      <author id="donna-gates"><first>Donna</first><last>Gates</last></author>
       <author><first>Jeff</first><last>Good</last></author>
       <pages>5–8</pages>
       <url hash="692a0412">N06-2002</url>
@@ -606,7 +606,7 @@
     <paper id="3">
       <title><fixed-case>M</fixed-case>useli: A Multi-Source Evidence Integration Approach to Topic Segmentation of Spontaneous Dialogue</title>
       <author><first>Jaime</first><last>Arguello</last></author>
-      <author><first>Carolyn</first><last>Rosé</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rosé</last></author>
       <pages>9–12</pages>
       <url hash="1b7b6fe2">N06-2003</url>
       <bibkey>arguello-rose-2006-museli</bibkey>
@@ -631,8 +631,8 @@
       <title>Class Model Adaptation for Speech Summarisation</title>
       <author><first>Pierre</first><last>Chatain</last></author>
       <author><first>Edward</first><last>Whittaker</last></author>
-      <author><first>Joanna</first><last>Mrozinski</last></author>
-      <author><first>Sadaoki</first><last>Furui</last></author>
+      <author id="joanna-mrozinski"><first>Joanna</first><last>Mrozinski</last></author>
+      <author id="sadaoki-furui"><first>Sadaoki</first><last>Furui</last></author>
       <pages>21–24</pages>
       <url hash="df459aee">N06-2006</url>
       <bibkey>chatain-etal-2006-class</bibkey>
@@ -640,9 +640,9 @@
     <paper id="7">
       <title>Semi-supervised Relation Extraction with Label Propagation</title>
       <author><first>Jinxiu</first><last>Chen</last></author>
-      <author><first>Donghong</first><last>Ji</last></author>
-      <author><first>Chew Lim</first><last>Tan</last></author>
-      <author><first>Zhengyu</first><last>Niu</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
+      <author id="chew-lim-tan"><first>Chew Lim</first><last>Tan</last></author>
+      <author id="zheng-yu-niu"><first>Zhengyu</first><last>Niu</last></author>
       <pages>25–28</pages>
       <url hash="ad0602bb">N06-2007</url>
       <bibkey>chen-etal-2006-semi</bibkey>
@@ -656,7 +656,7 @@
     </paper>
     <paper id="9">
       <title>Answering the question you wish they had asked: The impact of paraphrasing for Question Answering</title>
-      <author><first>Pablo</first><last>Duboue</last></author>
+      <author id="pablo-duboue"><first>Pablo</first><last>Duboue</last></author>
       <author><first>Jennifer</first><last>Chu-Carroll</last></author>
       <pages>33–36</pages>
       <url hash="038f74dc">N06-2009</url>
@@ -673,8 +673,8 @@
     <paper id="11">
       <title>Spectral Clustering for Example Based Machine Translation</title>
       <author><first>Rashmi</first><last>Gangadharaiah</last></author>
-      <author><first>Ralf</first><last>Brown</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
+      <author id="ralf-d-brown"><first>Ralf</first><last>Brown</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
       <pages>41–44</pages>
       <url hash="5c046225">N06-2011</url>
       <bibkey>gangadharaiah-etal-2006-spectral</bibkey>
@@ -698,28 +698,28 @@
       <title>Agreement/Disagreement Classification: Exploiting Unlabeled Data using Contrast Classifiers</title>
       <author><first>Sangyun</first><last>Hahn</last></author>
       <author><first>Richard</first><last>Ladner</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
       <pages>53–56</pages>
       <url hash="5ef5e8b8">N06-2014</url>
       <bibkey>hahn-etal-2006-agreement</bibkey>
     </paper>
     <paper id="15">
       <title><fixed-case>O</fixed-case>nto<fixed-case>N</fixed-case>otes: The 90% Solution</title>
-      <author><first>Eduard</first><last>Hovy</last></author>
-      <author><first>Mitchell</first><last>Marcus</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
-      <author><first>Lance</first><last>Ramshaw</last></author>
-      <author><first>Ralph</first><last>Weischedel</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
+      <author id="mitch-marcus"><first>Mitchell</first><last>Marcus</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
+      <author id="lance-ramshaw"><first>Lance</first><last>Ramshaw</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
       <pages>57–60</pages>
       <url hash="b15dfd9e">N06-2015</url>
       <bibkey>hovy-etal-2006-ontonotes</bibkey>
     </paper>
     <paper id="16">
       <title>Investigating Cross-Language Speech Retrieval for a Spontaneous Conversational Speech Collection</title>
-      <author><first>Diana</first><last>Inkpen</last></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last></author>
       <author><first>Muath</first><last>Alzghool</last></author>
       <author><first>Gareth</first><last>Jones</last></author>
-      <author><first>Douglas</first><last>Oard</last></author>
+      <author id="douglas-w-oard"><first>Douglas</first><last>Oard</last></author>
       <pages>61–64</pages>
       <url hash="5aeede3a">N06-2016</url>
       <bibkey>inkpen-etal-2006-investigating</bibkey>
@@ -736,8 +736,8 @@
       <author><first>Seokhwan</first><last>Kim</last></author>
       <author><first>Yu</first><last>Song</last></author>
       <author><first>Kyungduk</first><last>Kim</last></author>
-      <author><first>Jeong-Won</first><last>Cha</last></author>
-      <author><first>Gary Geunbae</first><last>Lee</last></author>
+      <author id="jeong-won-cha"><first>Jeong-Won</first><last>Cha</last></author>
+      <author id="gary-geunbae-lee"><first>Gary Geunbae</first><last>Lee</last></author>
       <pages>69–72</pages>
       <url hash="faeb9468">N06-2018</url>
       <bibkey>kim-etal-2006-mmr</bibkey>
@@ -768,8 +768,8 @@
     </paper>
     <paper id="22">
       <title>Automatic Recognition of Personality in Conversation</title>
-      <author><first>François</first><last>Mairesse</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="francois-mairesse"><first>François</first><last>Mairesse</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <pages>85–88</pages>
       <url hash="7b724d17">N06-2022</url>
       <bibkey>mairesse-walker-2006-automatic</bibkey>
@@ -777,7 +777,7 @@
     <paper id="23">
       <title>Summarizing Speech Without Text Using Hidden <fixed-case>M</fixed-case>arkov Models</title>
       <author><first>Sameer</first><last>Maskey</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
       <pages>89–92</pages>
       <url hash="31b17423">N06-2023</url>
       <bibkey>maskey-hirschberg-2006-summarizing</bibkey>
@@ -785,9 +785,9 @@
     <paper id="24">
       <title><fixed-case>NER</fixed-case> Systems that Suit User’s Preferences: Adjusting the Recall-Precision Trade-off for Entity Extraction</title>
       <author><first>Einat</first><last>Minkov</last></author>
-      <author><first>Richard</first><last>Wang</last></author>
+      <author id="richard-c-wang"><first>Richard</first><last>Wang</last></author>
       <author><first>Anthony</first><last>Tomasic</last></author>
-      <author><first>William</first><last>Cohen</last></author>
+      <author id="william-cohen"><first>William</first><last>Cohen</last></author>
       <pages>93–96</pages>
       <url hash="9ea79a24">N06-2024</url>
       <bibkey>minkov-etal-2006-ner</bibkey>
@@ -801,7 +801,7 @@
     </paper>
     <paper id="26">
       <title>Accurate Parsing of the <fixed-case>P</fixed-case>roposition <fixed-case>B</fixed-case>ank</title>
-      <author><first>Gabriele</first><last>Musillo</last></author>
+      <author id="gabriele-musillo"><first>Gabriele</first><last>Musillo</last></author>
       <author><first>Paola</first><last>Merlo</last></author>
       <pages>101–104</pages>
       <url hash="0939f7f8">N06-2026</url>
@@ -809,7 +809,7 @@
     </paper>
     <paper id="27">
       <title>Using Semantic Authoring for <fixed-case>B</fixed-case>lissymbols Communication Boards</title>
-      <author><first>Yael</first><last>Netzer</last></author>
+      <author id="yael-netzer"><first>Yael</first><last>Netzer</last></author>
       <author><first>Michael</first><last>Elhadad</last></author>
       <pages>105–108</pages>
       <url hash="309efd0b">N06-2027</url>
@@ -825,8 +825,8 @@
     </paper>
     <paper id="29">
       <title>Exploiting Variant Corpora for Machine Translation</title>
-      <author><first>Michael</first><last>Paul</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="michael-paul"><first>Michael</first><last>Paul</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>113–116</pages>
       <url hash="60a1d646">N06-2029</url>
       <bibkey>paul-sumita-2006-exploiting</bibkey>
@@ -843,7 +843,7 @@
       <title>Computational Modelling of Structural Priming in Dialogue</title>
       <author><first>David</first><last>Reitter</last></author>
       <author><first>Frank</first><last>Keller</last></author>
-      <author><first>Johanna D.</first><last>Moore</last></author>
+      <author id="johanna-d-moore"><first>Johanna D.</first><last>Moore</last></author>
       <pages>121–124</pages>
       <url hash="4e593624">N06-2031</url>
       <bibkey>reitter-etal-2006-computational</bibkey>
@@ -851,7 +851,7 @@
     <paper id="32">
       <title>Story Segmentation of Broadcast News in <fixed-case>E</fixed-case>nglish, <fixed-case>M</fixed-case>andarin and <fixed-case>A</fixed-case>rabic</title>
       <author><first>Andrew</first><last>Rosenberg</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
       <pages>125–128</pages>
       <url hash="cd2be72c">N06-2032</url>
       <bibkey>rosenberg-hirschberg-2006-story</bibkey>
@@ -859,7 +859,7 @@
     <paper id="33">
       <title>Parser Combination by Reparsing</title>
       <author><first>Kenji</first><last>Sagae</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <pages>129–132</pages>
       <url hash="dc2e6509">N06-2033</url>
       <bibkey>sagae-lavie-2006-parser</bibkey>
@@ -876,7 +876,7 @@
     <paper id="35">
       <title>Weblog Classification for Fast Splog Filtering: A <fixed-case>URL</fixed-case> Language Model Segmentation Approach</title>
       <author><first>Franco</first><last>Salvetti</last></author>
-      <author><first>Nicolas</first><last>Nicolov</last></author>
+      <author id="nicolas-nicolov"><first>Nicolas</first><last>Nicolov</last></author>
       <pages>137–140</pages>
       <url hash="56acd362">N06-2035</url>
       <bibkey>salvetti-nicolov-2006-weblog</bibkey>
@@ -885,7 +885,7 @@
       <title>Word Domain Disambiguation via Word Sense Disambiguation</title>
       <author><first>Antonio</first><last>Sanfilippo</last></author>
       <author><first>Stephen</first><last>Tratz</last></author>
-      <author><first>Michelle</first><last>Gregory</last></author>
+      <author id="michelle-gregory"><first>Michelle</first><last>Gregory</last></author>
       <pages>141–144</pages>
       <url hash="b6d3f6bc">N06-2036</url>
       <bibkey>sanfilippo-etal-2006-word</bibkey>
@@ -893,8 +893,8 @@
     <paper id="37">
       <title>Selecting relevant text subsets from web-data for building topic specific language models</title>
       <author><first>Abhinav</first><last>Sethy</last></author>
-      <author><first>Panayiotis</first><last>Georgiou</last></author>
-      <author><first>Shrikanth</first><last>Narayanan</last></author>
+      <author id="panayiotis-georgiou"><first>Panayiotis</first><last>Georgiou</last></author>
+      <author id="shrikanth-narayanan"><first>Shrikanth</first><last>Narayanan</last></author>
       <pages>145–148</pages>
       <url hash="380c2d20">N06-2037</url>
       <bibkey>sethy-etal-2006-selecting</bibkey>
@@ -909,7 +909,7 @@
     <paper id="39">
       <title>Unsupervised Induction of <fixed-case>M</fixed-case>odern <fixed-case>S</fixed-case>tandard <fixed-case>A</fixed-case>rabic Verb Classes</title>
       <author><first>Neal</first><last>Snider</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>153–156</pages>
       <url hash="2104513a">N06-2039</url>
       <bibkey>snider-diab-2006-unsupervised</bibkey>
@@ -917,16 +917,16 @@
     <paper id="40">
       <title>Sentence Planning for Realtime Navigational Instruction</title>
       <author><first>Laura</first><last>Stoia</last></author>
-      <author><first>Donna</first><last>Byron</last></author>
-      <author><first>Darla</first><last>Shockley</last></author>
-      <author><first>Eric</first><last>Fosler-Lussier</last></author>
+      <author id="donna-byron"><first>Donna</first><last>Byron</last></author>
+      <author id="darla-magdalene-shockley"><first>Darla</first><last>Shockley</last></author>
+      <author id="eric-fosler-lussier"><first>Eric</first><last>Fosler-Lussier</last></author>
       <pages>157–160</pages>
       <url hash="a31fc22e">N06-2040</url>
       <bibkey>stoia-etal-2006-sentence</bibkey>
     </paper>
     <paper id="41">
       <title>Using the Web to Disambiguate Acronyms</title>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Fumiaki</first><last>Sugaya</last></author>
       <pages>161–164</pages>
       <url hash="1e572046">N06-2041</url>
@@ -934,7 +934,7 @@
     </paper>
     <paper id="42">
       <title>Word Pronunciation Disambiguation using the Web</title>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Fumiaki</first><last>Sugaya</last></author>
       <pages>165–168</pages>
       <url hash="4b2122d8">N06-2042</url>
@@ -944,7 +944,7 @@
       <title>Illuminating Trouble Tickets with Sublanguage Theory</title>
       <author><first>Svetlana</first><last>Symonenko</last></author>
       <author><first>Steven</first><last>Rowe</last></author>
-      <author><first>Elizabeth D.</first><last>Liddy</last></author>
+      <author id="elizabeth-d-liddy"><first>Elizabeth D.</first><last>Liddy</last></author>
       <pages>169–172</pages>
       <url hash="b1890a90">N06-2043</url>
       <bibkey>symonenko-etal-2006-illuminating</bibkey>
@@ -952,7 +952,7 @@
     <paper id="44">
       <title>Evolving optimal inspectable strategies for spoken dialogue systems</title>
       <author><first>Dave</first><last>Toney</last></author>
-      <author><first>Johanna</first><last>Moore</last></author>
+      <author id="johanna-d-moore"><first>Johanna</first><last>Moore</last></author>
       <author><first>Oliver</first><last>Lemon</last></author>
       <pages>173–176</pages>
       <url hash="06283ff0">N06-2044</url>
@@ -976,7 +976,7 @@
     <paper id="47">
       <title>A Maximum Entropy Framework that Integrates Word Dependencies and Grammatical Relations for Reading Comprehension</title>
       <author><first>Kui</first><last>Xu</last></author>
-      <author><first>Helen</first><last>Meng</last></author>
+      <author id="helen-meng"><first>Helen</first><last>Meng</last></author>
       <author><first>Fuliang</first><last>Weng</last></author>
       <pages>185–188</pages>
       <url hash="1bb97676">N06-2047</url>
@@ -994,7 +994,7 @@
       <title>Subword-based Tagging by Conditional Random Fields for <fixed-case>C</fixed-case>hinese Word Segmentation</title>
       <author><first>Ruiqiang</first><last>Zhang</last></author>
       <author><first>Genichiro</first><last>Kikui</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>193–196</pages>
       <url hash="d88f11ed">N06-2049</url>
       <bibkey>zhang-etal-2006-subword</bibkey>
@@ -1011,7 +1011,7 @@
       <title>Bridging the Inflection Morphology Gap for <fixed-case>A</fixed-case>rabic Statistical Machine Translation</title>
       <author><first>Andreas</first><last>Zollmann</last></author>
       <author><first>Ashish</first><last>Venugopal</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>201–204</pages>
       <url hash="34ef1a4f">N06-2051</url>
       <bibkey>zollmann-etal-2006-bridging</bibkey>
@@ -1023,7 +1023,7 @@
       <url hash="74eac76c">N06-3</url>
       <editor><first>Matt</first><last>Huenerfauth</last></editor>
       <editor><first>Bo</first><last>Pang</last></editor>
-      <editor><first>Mitch</first><last>Marcus</last></editor>
+      <editor id="mitch-marcus"><first>Mitch</first><last>Marcus</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>New York City, USA</address>
       <month>June</month>
@@ -1050,7 +1050,7 @@
     </paper>
     <paper id="3">
       <title>Can the <fixed-case>I</fixed-case>nternet help improve Machine Translation?</title>
-      <author><first>Ariadna</first><last>Font Llitjós</last></author>
+      <author id="ariadna-font-llitjos"><first>Ariadna</first><last>Font Llitjós</last></author>
       <pages>219–222</pages>
       <url hash="354d010d">N06-3003</url>
       <bibkey>font-llitjos-2006-internet</bibkey>
@@ -1078,21 +1078,21 @@
     </paper>
     <paper id="7">
       <title>Document Representation and Multilevel Measures of Document Similarity</title>
-      <author><first>Irina</first><last>Matveeva</last></author>
+      <author id="irina-matveeva"><first>Irina</first><last>Matveeva</last></author>
       <pages>235–238</pages>
       <url hash="9285ce41">N06-3007</url>
       <bibkey>matveeva-2006-document</bibkey>
     </paper>
     <paper id="8">
       <title>Logical investigations on the adequacy of certain feature-based theories of natural language</title>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>239–242</pages>
       <url hash="d05ce7f5">N06-3008</url>
       <bibkey>sogaard-2006-logical</bibkey>
     </paper>
     <paper id="9">
       <title>A Hybrid Approach to Biomedical Named Entity Recognition and Semantic Role Labeling</title>
-      <author><first>Richard Tzong-Han</first><last>Tsai</last></author>
+      <author id="richard-tzong-han-tsai"><first>Richard Tzong-Han</first><last>Tsai</last></author>
       <pages>243–246</pages>
       <url hash="71b4af08">N06-3009</url>
       <bibkey>tsai-2006-hybrid</bibkey>
@@ -1109,8 +1109,8 @@
     <meta>
       <booktitle>Proceedings of the Human Language Technology Conference of the <fixed-case>NAACL</fixed-case>, Companion Volume: Demonstrations</booktitle>
       <url hash="62a37b88">N06-4</url>
-      <editor><first>Alex</first><last>Rudnicky</last></editor>
-      <editor><first>John</first><last>Dowding</last></editor>
+      <editor id="alexander-rudnicky"><first>Alex</first><last>Rudnicky</last></editor>
+      <editor id="john-dowding"><first>John</first><last>Dowding</last></editor>
       <editor><first>Natasa</first><last>Milic-Frayling</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>New York City, USA</address>
@@ -1125,14 +1125,14 @@
     <paper id="1">
       <title><fixed-case>I</fixed-case>nfo<fixed-case>M</fixed-case>agnets: Making Sense of Corpus Data</title>
       <author><first>Jaime</first><last>Arguello</last></author>
-      <author><first>Carolyn</first><last>Rosé</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rosé</last></author>
       <pages>253–256</pages>
       <url hash="ae589f2e">N06-4001</url>
       <bibkey>arguello-rose-2006-infomagnets</bibkey>
     </paper>
     <paper id="2">
       <title>From Pipedreams to Products, and Promise!</title>
-      <author><first>Janet M.</first><last>Baker</last></author>
+      <author id="janet-baker"><first>Janet M.</first><last>Baker</last></author>
       <author><first>Patri J.</first><last>Pugliese</last></author>
       <pages>257–260</pages>
       <url hash="1bb71adf">N06-4002</url>
@@ -1165,7 +1165,7 @@
     </paper>
     <paper id="6">
       <title><fixed-case>K</fixed-case>nowtator: A Protégé plug-in for annotated corpus construction</title>
-      <author><first>Philip V.</first><last>Ogren</last></author>
+      <author id="philip-ogren"><first>Philip V.</first><last>Ogren</last></author>
       <pages>273–275</pages>
       <url hash="a9fdaac7">N06-4006</url>
       <bibkey>ogren-2006-knowtator</bibkey>
@@ -1221,8 +1221,8 @@
     <meta>
       <booktitle>Proceedings of the Human Language Technology Conference of the <fixed-case>NAACL</fixed-case>, Companion Volume: Tutorial Abstracts</booktitle>
       <url hash="e22f2608">N06-5</url>
-      <editor><first>Chris</first><last>Manning</last></editor>
-      <editor><first>Doug</first><last>Oard</last></editor>
+      <editor id="christopher-d-manning"><first>Chris</first><last>Manning</last></editor>
+      <editor id="douglas-w-oard"><first>Doug</first><last>Oard</last></editor>
       <editor><first>Jim</first><last>Glass</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>New York City, USA</address>
@@ -1236,23 +1236,23 @@
     </frontmatter>
     <paper id="1">
       <title>What‘s in a Name: Current Methods, Applications, and Evaluation in Multilingual Name Search and Matching</title>
-      <author><first>Sherri</first><last>Condon</last></author>
-      <author><first>Keith</first><last>Miller</last></author>
+      <author id="sherri-condon"><first>Sherri</first><last>Condon</last></author>
+      <author id="keith-j-miller"><first>Keith</first><last>Miller</last></author>
       <pages>299–300</pages>
       <url hash="3bb8321e">N06-5001</url>
       <bibkey>condon-miller-2006-whats</bibkey>
     </paper>
     <paper id="2">
       <title>Beyond <fixed-case>EM</fixed-case>: <fixed-case>B</fixed-case>ayesian Techniques for Human Language Technology Researchers</title>
-      <author><first>Hal</first><last>Daume III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daume III</last></author>
       <pages>301–302</pages>
       <url hash="1f17b15f">N06-5002</url>
       <bibkey>daume-iii-2006-beyond</bibkey>
     </paper>
     <paper id="3">
       <title>Graph-based Algorithms for Natural Language Processing and Information Retrieval</title>
-      <author><first>Rada</first><last>Mihalcea</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <pages>303–304</pages>
       <url hash="9461a0c4">N06-5003</url>
       <bibkey>mihalcea-radev-2006-graph</bibkey>
@@ -1260,7 +1260,7 @@
     <paper id="4">
       <title>Automatic Spoken Document Processing for Retrieval and Browsing</title>
       <author><first>Ciprian</first><last>Chelba</last></author>
-      <author><first>T. J.</first><last>Hazen</last></author>
+      <author id="timothy-j-hazen"><first>T. J.</first><last>Hazen</last></author>
       <pages>305–306</pages>
       <url hash="861e2ce4">N06-5004</url>
       <bibkey>chelba-hazen-2006-automatic</bibkey>
@@ -1268,14 +1268,14 @@
     <paper id="5">
       <title>Tutorial on Inductive Semi-supervised Learning Methods: with Applicability to Natural Language Processing</title>
       <author><first>Anoop</first><last>Sarkar</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>307–308</pages>
       <url hash="f2619115">N06-5005</url>
       <bibkey>sarkar-haffari-2006-tutorial</bibkey>
     </paper>
     <paper id="6">
       <title>Automatic Semantic Role Labeling</title>
-      <author><first>Scott Wen-tau</first><last>Yih</last></author>
+      <author id="wen-tau-yih"><first>Scott Wen-tau</first><last>Yih</last></author>
       <author><first>Kristina</first><last>Toutanova</last></author>
       <pages>309–310</pages>
       <url hash="f567ddbc">N06-5006</url>
diff --git a/data/xml/N07.xml b/data/xml/N07.xml
index 11dc0646ea..9c7e205369 100644
--- a/data/xml/N07.xml
+++ b/data/xml/N07.xml
@@ -4,10 +4,10 @@
     <meta>
       <booktitle>Human Language Technologies 2007: The Conference of the North <fixed-case>A</fixed-case>merican Chapter of the Association for Computational Linguistics; Proceedings of the Main Conference</booktitle>
       <url hash="e5036a66">N07-1</url>
-      <editor><first>Candace</first><last>Sidner</last></editor>
+      <editor id="candace-l-sidner"><first>Candace</first><last>Sidner</last></editor>
       <editor><first>Tanja</first><last>Schultz</last></editor>
       <editor><first>Matthew</first><last>Stone</last></editor>
-      <editor><first>ChengXiang</first><last>Zhai</last></editor>
+      <editor id="chengxiang-zhai"><first>ChengXiang</first><last>Zhai</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Rochester, New York</address>
       <month>April</month>
@@ -21,8 +21,8 @@
     <paper id="1">
       <title>Exploiting Acoustic and Syntactic Features for Prosody Labeling in a Maximum Entropy Framework</title>
       <author><first>Vivek Kumar</first><last>Rangarajan Sridhar</last></author>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
-      <author><first>Shrikanth</first><last>Narayanan</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="shrikanth-narayanan"><first>Shrikanth</first><last>Narayanan</last></author>
       <pages>1–8</pages>
       <url hash="4c4f7b17">N07-1001</url>
       <bibkey>rangarajan-sridhar-etal-2007-exploiting</bibkey>
@@ -30,12 +30,12 @@
     <paper id="2">
       <title>To Memorize or to Predict: Prominence labeling in Conversational Speech</title>
       <author><first>Ani</first><last>Nenkova</last></author>
-      <author><first>Jason</first><last>Brenier</last></author>
+      <author id="jason-brenier"><first>Jason</first><last>Brenier</last></author>
       <author><first>Anubha</first><last>Kothari</last></author>
       <author><first>Sasha</first><last>Calhoun</last></author>
       <author><first>Laura</first><last>Whitton</last></author>
       <author><first>David</first><last>Beaver</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <pages>9–16</pages>
       <url hash="a22242d4">N07-1002</url>
       <bibkey>nenkova-etal-2007-memorize</bibkey>
@@ -43,7 +43,7 @@
     <paper id="3">
       <title>Avoiding and Resolving Initiative Conflicts in Dialogue</title>
       <author><first>Fan</first><last>Yang</last></author>
-      <author><first>Peter A.</first><last>Heeman</last></author>
+      <author id="peter-a-heeman"><first>Peter A.</first><last>Heeman</last></author>
       <pages>17–24</pages>
       <url hash="cb25ab1f">N07-1003</url>
       <bibkey>yang-heeman-2007-avoiding</bibkey>
@@ -51,7 +51,7 @@
     <paper id="4">
       <title>What Decisions Have You Made?: Automatic Decision Detection in Meeting Conversations</title>
       <author><first>Pei-Yun</first><last>Hsueh</last></author>
-      <author><first>Johanna D.</first><last>Moore</last></author>
+      <author id="johanna-d-moore"><first>Johanna D.</first><last>Moore</last></author>
       <pages>25–32</pages>
       <url hash="66992c88">N07-1004</url>
       <bibkey>hsueh-moore-2007-decisions</bibkey>
@@ -85,7 +85,7 @@
     <paper id="8">
       <title>Direct Translation Model 2</title>
       <author><first>Abraham</first><last>Ittycheriah</last></author>
-      <author><first>Salim</first><last>Roukos</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
       <pages>57–64</pages>
       <url hash="d0d2efdd">N07-1008</url>
       <bibkey>ittycheriah-roukos-2007-direct</bibkey>
@@ -93,14 +93,14 @@
     <paper id="9">
       <title>Structured Local Training and Biased Potential Functions for Conditional Random Fields with Application to Coreference Resolution</title>
       <author><first>Yejin</first><last>Choi</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>65–72</pages>
       <url hash="6d5ec9e4">N07-1009</url>
       <bibkey>choi-cardie-2007-structured</bibkey>
     </paper>
     <paper id="10">
       <title>Coreference or Not: A Twin Model for Coreference Resolution</title>
-      <author><first>Xiaoqiang</first><last>Luo</last></author>
+      <author id="xiaoqiang-luo"><first>Xiaoqiang</first><last>Luo</last></author>
       <pages>73–80</pages>
       <url hash="10bdb3f9">N07-1010</url>
       <bibkey>luo-2007-coreference</bibkey>
@@ -118,15 +118,15 @@
       <title>Information Retrieval On Empty Fields</title>
       <author><first>Victor</first><last>Lavrenko</last></author>
       <author><first>Xing</first><last>Yi</last></author>
-      <author><first>James</first><last>Allan</last></author>
+      <author id="james-allan"><first>James</first><last>Allan</last></author>
       <pages>89–96</pages>
       <url hash="9150b3eb">N07-1012</url>
       <bibkey>lavrenko-etal-2007-information</bibkey>
     </paper>
     <paper id="13">
       <title>Improving Diversity in Ranking using Absorbing Random Walks</title>
-      <author><first>Xiaojin</first><last>Zhu</last></author>
-      <author><first>Andrew</first><last>Goldberg</last></author>
+      <author id="xiaojin-zhu"><first>Xiaojin</first><last>Zhu</last></author>
+      <author id="andrew-b-goldberg"><first>Andrew</first><last>Goldberg</last></author>
       <author><first>Jurgen</first><last>Van Gael</last></author>
       <author><first>David</first><last>Andrzejewski</last></author>
       <pages>97–104</pages>
@@ -135,7 +135,7 @@
     </paper>
     <paper id="14">
       <title>A Random Text Model for the Generation of Statistical Language Invariants</title>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>105–112</pages>
       <url hash="a7d24d36">N07-1014</url>
       <bibkey>biemann-2007-random</bibkey>
@@ -158,7 +158,7 @@
     </paper>
     <paper id="17">
       <title>The Domain Restriction Hypothesis: Relating Term Similarity and Semantic Consistency</title>
-      <author><first>Alfio Massimiliano</first><last>Gliozzo</last></author>
+      <author id="alfio-gliozzo"><first>Alfio Massimiliano</first><last>Gliozzo</last></author>
       <author><first>Marco</first><last>Pennacchiotti</last></author>
       <author><first>Patrick</first><last>Pantel</last></author>
       <pages>131–138</pages>
@@ -168,8 +168,8 @@
     <paper id="18">
       <title><fixed-case>B</fixed-case>ayesian Inference for <fixed-case>PCFG</fixed-case>s via <fixed-case>M</fixed-case>arkov Chain <fixed-case>M</fixed-case>onte <fixed-case>C</fixed-case>arlo</title>
       <author><first>Mark</first><last>Johnson</last></author>
-      <author><first>Thomas</first><last>Griffiths</last></author>
-      <author><first>Sharon</first><last>Goldwater</last></author>
+      <author id="thomas-l-griffiths"><first>Thomas</first><last>Griffiths</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
       <pages>139–146</pages>
       <url hash="576c9285">N07-1018</url>
       <bibkey>johnson-etal-2007-bayesian</bibkey>
@@ -192,7 +192,7 @@
     </paper>
     <paper id="21">
       <title>Probabilistic Generation of Weather Forecast Texts</title>
-      <author><first>Anja</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anja</first><last>Belz</last></author>
       <pages>164–171</pages>
       <url hash="f4db415a">N07-1021</url>
       <bibkey>belz-2007-probabilistic</bibkey>
@@ -200,7 +200,7 @@
     <paper id="22">
       <title>Generation by Inverting a Semantic Parser that Uses Statistical Machine Translation</title>
       <author><first>Yuk Wah</first><last>Wong</last></author>
-      <author><first>Raymond</first><last>Mooney</last></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last></author>
       <pages>172–179</pages>
       <url hash="c3a6d9ca">N07-1022</url>
       <bibkey>wong-mooney-2007-generation</bibkey>
@@ -208,7 +208,7 @@
     <paper id="23">
       <title>Lexicalized <fixed-case>M</fixed-case>arkov Grammars for Sentence Compression</title>
       <author><first>Michel</first><last>Galley</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>180–187</pages>
       <url hash="8d8b9009">N07-1023</url>
       <bibkey>galley-mckeown-2007-lexicalized</bibkey>
@@ -222,7 +222,7 @@
     </paper>
     <paper id="25">
       <title>Using <fixed-case>W</fixed-case>ikipedia for Automatic Word Sense Disambiguation</title>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>196–203</pages>
       <url hash="0c899a8b">N07-1025</url>
       <bibkey>mihalcea-2007-using</bibkey>
@@ -245,19 +245,19 @@
     <paper id="28">
       <title>A Case For Shorter Queries, and Helping Users Create Them</title>
       <author><first>Giridhar</first><last>Kumaran</last></author>
-      <author><first>James</first><last>Allan</last></author>
+      <author id="james-allan"><first>James</first><last>Allan</last></author>
       <pages>220–227</pages>
       <url hash="8e31e474">N07-1028</url>
       <bibkey>kumaran-allan-2007-case</bibkey>
     </paper>
     <paper id="29">
       <title>Combining Outputs from Multiple Machine Translation Systems</title>
-      <author><first>Antti-Veikko</first><last>Rosti</last></author>
-      <author><first>Necip Fazil</first><last>Ayan</last></author>
+      <author id="antti-veikko-rosti"><first>Antti-Veikko</first><last>Rosti</last></author>
+      <author id="necip-fazil-ayan"><first>Necip Fazil</first><last>Ayan</last></author>
       <author><first>Bing</first><last>Xiang</last></author>
       <author><first>Spyros</first><last>Matsoukas</last></author>
-      <author><first>Richard</first><last>Schwartz</last></author>
-      <author><first>Bonnie</first><last>Dorr</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
+      <author id="bonnie-dorr"><first>Bonnie</first><last>Dorr</last></author>
       <pages>228–235</pages>
       <url hash="4e07f3e1">N07-1029</url>
       <bibkey>rosti-etal-2007-combining</bibkey>
@@ -273,7 +273,7 @@
     <paper id="31">
       <title>Automating Creation of Hierarchical Faceted Metadata Structures</title>
       <author><first>Emilia</first><last>Stoica</last></author>
-      <author><first>Marti</first><last>Hearst</last></author>
+      <author id="marti-a-hearst"><first>Marti</first><last>Hearst</last></author>
       <author><first>Megan</first><last>Richardson</last></author>
       <pages>244–251</pages>
       <url hash="7887458e">N07-1031</url>
@@ -282,8 +282,8 @@
     <paper id="32">
       <title>Cross-Instance Tuning of Unsupervised Document Clustering Algorithms</title>
       <author><first>Damianos</first><last>Karakos</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
-      <author><first>Sanjeev</first><last>Khudanpur</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
+      <author id="sanjeev-khudanpur"><first>Sanjeev</first><last>Khudanpur</last></author>
       <author><first>Carey</first><last>Priebe</last></author>
       <pages>252–259</pages>
       <url hash="a407f5d1">N07-1032</url>
@@ -291,25 +291,25 @@
     </paper>
     <paper id="33">
       <title>Using “Annotator Rationales” to Improve Machine Learning for Text Categorization</title>
-      <author><first>Omar</first><last>Zaidan</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
-      <author><first>Christine</first><last>Piatko</last></author>
+      <author id="omar-zaidan"><first>Omar</first><last>Zaidan</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
+      <author id="christine-piatko"><first>Christine</first><last>Piatko</last></author>
       <pages>260–267</pages>
       <url hash="d0795523">N07-1033</url>
       <bibkey>zaidan-etal-2007-using</bibkey>
     </paper>
     <paper id="34">
       <title>Combining Reinformation Learning with Information-State Update Rules</title>
-      <author><first>Peter</first><last>Heeman</last></author>
+      <author id="peter-a-heeman"><first>Peter</first><last>Heeman</last></author>
       <pages>268–275</pages>
       <url hash="b244bfd1">N07-1034</url>
       <bibkey>heeman-2007-combining</bibkey>
     </paper>
     <paper id="35">
       <title>Estimating the Reliability of <fixed-case>MDP</fixed-case> Policies: a Confidence Interval Approach</title>
-      <author><first>Joel</first><last>Tetreault</last></author>
-      <author><first>Dan</first><last>Bohus</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
+      <author id="dan-bohus"><first>Dan</first><last>Bohus</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <pages>276–283</pages>
       <url hash="097c8136">N07-1035</url>
       <bibkey>tetreault-etal-2007-estimating</bibkey>
@@ -317,7 +317,7 @@
     <paper id="36">
       <title>An Exploration of Eye Gaze in Spoken Language Processing for Multimodal Conversational Interfaces</title>
       <author><first>Shaolin</first><last>Qu</last></author>
-      <author><first>Joyce</first><last>Chai</last></author>
+      <author id="joyce-chai"><first>Joyce</first><last>Chai</last></author>
       <pages>284–291</pages>
       <url hash="16be66c2">N07-1036</url>
       <bibkey>qu-chai-2007-exploration</bibkey>
@@ -326,7 +326,7 @@
       <title>Extracting Semantic Orientations of Phrases from Dictionary</title>
       <author><first>Hiroya</first><last>Takamura</last></author>
       <author><first>Takashi</first><last>Inui</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>292–299</pages>
       <url hash="e06f9c0b">N07-1037</url>
       <bibkey>takamura-etal-2007-extracting</bibkey>
@@ -358,15 +358,15 @@
     </paper>
     <paper id="41">
       <title>Combining Probability-Based Rankers for Action-Item Detection</title>
-      <author><first>Paul N.</first><last>Bennett</last></author>
-      <author><first>Jaime G.</first><last>Carbonell</last></author>
+      <author id="paul-bennett"><first>Paul N.</first><last>Bennett</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime G.</first><last>Carbonell</last></author>
       <pages>324–331</pages>
       <url hash="c648f174">N07-1041</url>
       <bibkey>bennett-carbonell-2007-combining</bibkey>
     </paper>
     <paper id="42">
       <title>Multi-Document Relationship Fusion via Constraints on Probabilistic Databases</title>
-      <author><first>Gideon</first><last>Mann</last></author>
+      <author id="gideon-mann"><first>Gideon</first><last>Mann</last></author>
       <pages>332–339</pages>
       <url hash="b9beaf4e">N07-1042</url>
       <bibkey>mann-2007-multi</bibkey>
@@ -390,7 +390,7 @@
     </paper>
     <paper id="45">
       <title>Near-Synonym Choice in an Intelligent Thesaurus</title>
-      <author><first>Diana</first><last>Inkpen</last></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last></author>
       <pages>356–363</pages>
       <url hash="9ef753ac">N07-1045</url>
       <bibkey>inkpen-2007-near</bibkey>
@@ -400,7 +400,7 @@
       <author><first>Bing</first><last>Zhao</last></author>
       <author><first>Nguyen</first><last>Bach</last></author>
       <author><first>Ian</first><last>Lane</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>364–371</pages>
       <url hash="f33f32cc">N07-1046</url>
       <bibkey>zhao-etal-2007-log</bibkey>
@@ -424,8 +424,8 @@
       <author><first>Vesa</first><last>Siivola</last></author>
       <author><first>Matti</first><last>Varjokallio</last></author>
       <author><first>Ebru</first><last>Arisoy</last></author>
-      <author><first>Murat</first><last>Saraçlar</last></author>
-      <author><first>Andreas</first><last>Stolcke</last></author>
+      <author id="murat-saraclar"><first>Murat</first><last>Saraçlar</last></author>
+      <author id="andreas-stolcke"><first>Andreas</first><last>Stolcke</last></author>
       <pages>380–387</pages>
       <url hash="4e5ba69a">N07-1048</url>
       <bibkey>creutz-etal-2007-analysis</bibkey>
@@ -466,7 +466,7 @@
       <title>A Cascaded Machine Learning Approach to Interpreting Temporal Expressions</title>
       <author><first>David</first><last>Ahn</last></author>
       <author><first>Joris</first><last>van Rantwijk</last></author>
-      <author><first>Maarten</first><last>de Rijke</last></author>
+      <author id="maarten-de-rijke"><first>Maarten</first><last>de Rijke</last></author>
       <pages>420–427</pages>
       <url hash="b44e8317">N07-1053</url>
       <bibkey>ahn-etal-2007-cascaded</bibkey>
@@ -474,8 +474,8 @@
     <paper id="54">
       <title>Building and Refining Rhetorical-Semantic Relation Models</title>
       <author><first>Sasha</first><last>Blair-Goldensohn</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>428–435</pages>
       <url hash="f41d97c8">N07-1054</url>
       <bibkey>blair-goldensohn-etal-2007-building</bibkey>
@@ -493,7 +493,7 @@
       <title>Randomized Decoding for Selection-and-Ordering Problems</title>
       <author><first>Pawan</first><last>Deshpande</last></author>
       <author><first>Regina</first><last>Barzilay</last></author>
-      <author><first>David</first><last>Karger</last></author>
+      <author id="david-r-karger"><first>David</first><last>Karger</last></author>
       <pages>444–451</pages>
       <url hash="ee70526b">N07-1056</url>
       <bibkey>deshpande-etal-2007-randomized</bibkey>
@@ -501,7 +501,7 @@
     <paper id="57">
       <title>Multilingual Structural Projection across Interlinear Text</title>
       <author><first>Fei</first><last>Xia</last></author>
-      <author><first>William</first><last>Lewis</last></author>
+      <author id="william-lewis"><first>William</first><last>Lewis</last></author>
       <pages>452–459</pages>
       <url hash="ea896d48">N07-1057</url>
       <bibkey>xia-lewis-2007-multilingual</bibkey>
@@ -519,7 +519,7 @@
     <paper id="59">
       <title>Automatic Assessment of Student Translations for Foreign Language Tutoring</title>
       <author><first>Chao</first><last>Wang</last></author>
-      <author><first>Stephanie</first><last>Seneff</last></author>
+      <author id="stephanie-seneff"><first>Stephanie</first><last>Seneff</last></author>
       <pages>468–475</pages>
       <url hash="2106f4ac">N07-1059</url>
       <bibkey>wang-seneff-2007-automatic</bibkey>
@@ -543,7 +543,7 @@
     <paper id="62">
       <title>Efficient Phrase-Table Representation for Machine Translation with Applications to Online <fixed-case>MT</fixed-case> and Speech Translation</title>
       <author><first>Richard</first><last>Zens</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>492–499</pages>
       <url hash="945f3dbb">N07-1062</url>
       <bibkey>zens-ney-2007-efficient</bibkey>
@@ -552,7 +552,7 @@
       <title>An Efficient Two-Pass Approach to Synchronous-<fixed-case>CFG</fixed-case> Driven Statistical <fixed-case>MT</fixed-case></title>
       <author><first>Ashish</first><last>Venugopal</last></author>
       <author><first>Andreas</first><last>Zollmann</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>500–507</pages>
       <url hash="332f2e4d">N07-1063</url>
       <bibkey>venugopal-etal-2007-efficient</bibkey>
@@ -560,7 +560,7 @@
     <paper id="64">
       <title>Statistical Phrase-Based Post-Editing</title>
       <author><first>Michel</first><last>Simard</last></author>
-      <author><first>Cyril</first><last>Goutte</last></author>
+      <author id="cyril-goutte"><first>Cyril</first><last>Goutte</last></author>
       <author><first>Pierre</first><last>Isabelle</last></author>
       <pages>508–515</pages>
       <url hash="1b6df8f8">N07-1064</url>
@@ -578,7 +578,7 @@
       <title>A Probabilistic Framework for Answer Selection in Question Answering</title>
       <author><first>Jeongwoo</first><last>Ko</last></author>
       <author><first>Luo</first><last>Si</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <pages>524–531</pages>
       <url hash="61afbca6">N07-1066</url>
       <bibkey>ko-etal-2007-probabilistic</bibkey>
@@ -586,9 +586,9 @@
     <paper id="67">
       <title>Question Answering Using Integrated Information Retrieval and Information Extraction</title>
       <author><first>Barry</first><last>Schiffman</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
-      <author><first>James</first><last>Allan</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
+      <author id="james-allan"><first>James</first><last>Allan</last></author>
       <pages>532–539</pages>
       <url hash="b8af4ae1">N07-1067</url>
       <bibkey>schiffman-etal-2007-question</bibkey>
@@ -603,18 +603,18 @@
     </paper>
     <paper id="69">
       <title>Can Semantic Roles Generalize Across Genres?</title>
-      <author><first>Szu-ting</first><last>Yi</last></author>
+      <author id="szu-ting-yi"><first>Szu-ting</first><last>Yi</last></author>
       <author><first>Edward</first><last>Loper</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>548–555</pages>
       <url hash="1ea2cbbb">N07-1069</url>
       <bibkey>yi-etal-2007-semantic</bibkey>
     </paper>
     <paper id="70">
       <title>Towards Robust Semantic Role Labeling</title>
-      <author><first>Sameer</first><last>Pradhan</last></author>
-      <author><first>Wayne</first><last>Ward</last></author>
-      <author><first>James</first><last>Martin</last></author>
+      <author id="sameer-pradhan"><first>Sameer</first><last>Pradhan</last></author>
+      <author id="wayne-ward"><first>Wayne</first><last>Ward</last></author>
+      <author id="james-h-martin"><first>James</first><last>Martin</last></author>
       <pages>556–563</pages>
       <url hash="ceee16bb">N07-1070</url>
       <bibkey>pradhan-etal-2007-towards</bibkey>
@@ -624,8 +624,8 @@
       <author><first>Patrick</first><last>Pantel</last></author>
       <author><first>Rahul</first><last>Bhagat</last></author>
       <author><first>Bonaventura</first><last>Coppola</last></author>
-      <author><first>Timothy</first><last>Chklovski</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="timothy-chklovski"><first>Timothy</first><last>Chklovski</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>564–571</pages>
       <url hash="9a1bf71b">N07-1071</url>
       <bibkey>pantel-etal-2007-isp</bibkey>
@@ -643,10 +643,10 @@
     <meta>
       <booktitle>Human Language Technologies 2007: The Conference of the North <fixed-case>A</fixed-case>merican Chapter of the Association for Computational Linguistics; Companion Volume, Short Papers</booktitle>
       <url hash="ec4697b3">N07-2</url>
-      <editor><first>Candace</first><last>Sidner</last></editor>
+      <editor id="candace-l-sidner"><first>Candace</first><last>Sidner</last></editor>
       <editor><first>Tanja</first><last>Schultz</last></editor>
       <editor><first>Matthew</first><last>Stone</last></editor>
-      <editor><first>ChengXiang</first><last>Zhai</last></editor>
+      <editor id="chengxiang-zhai"><first>ChengXiang</first><last>Zhai</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Rochester, New York</address>
       <month>April</month>
@@ -660,24 +660,24 @@
     <paper id="1">
       <title>Comparing User Simulation Models For Dialog Strategy Learning</title>
       <author><first>Hua</first><last>Ai</last></author>
-      <author><first>Joel</first><last>Tetreault</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <pages>1–4</pages>
       <url hash="dffe8785">N07-2001</url>
       <bibkey>ai-etal-2007-comparing-user</bibkey>
     </paper>
     <paper id="2">
       <title>Automatic Acquisition of Grammatical Types for Nouns</title>
-      <author><first>Núria</first><last>Bel</last></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last></author>
       <author><first>Sergio</first><last>Espeja</last></author>
-      <author><first>Montserrat</first><last>Marimon</last></author>
+      <author id="montserrat-marimon"><first>Montserrat</first><last>Marimon</last></author>
       <pages>5–8</pages>
       <url hash="5cd2f929">N07-2002</url>
       <bibkey>bel-etal-2007-automatic</bibkey>
     </paper>
     <paper id="3">
       <title><fixed-case>C</fixed-case>onquest—<fixed-case>A</fixed-case>n Open-Source Dialog System for Conferences</title>
-      <author><first>Dan</first><last>Bohus</last></author>
+      <author id="dan-bohus"><first>Dan</first><last>Bohus</last></author>
       <author><first>Sergio</first><last>Grau Puerto</last></author>
       <author><first>David</first><last>Huggins-Daines</last></author>
       <author><first>Venkatesh</first><last>Keri</last></author>
@@ -693,14 +693,14 @@
       <title>Joint Versus Independent Phonological Feature Models within <fixed-case>CRF</fixed-case> Phone Recognition</title>
       <author><first>Ilana</first><last>Bromberg</last></author>
       <author><first>Jeremy</first><last>Morris</last></author>
-      <author><first>Eric</first><last>Fosler-Lussier</last></author>
+      <author id="eric-fosler-lussier"><first>Eric</first><last>Fosler-Lussier</last></author>
       <pages>13–16</pages>
       <url hash="9f5c93e8">N07-2004</url>
       <bibkey>bromberg-etal-2007-joint</bibkey>
     </paper>
     <paper id="5">
       <title><fixed-case>K</fixed-case>-Best Suffix Arrays</title>
-      <author><first>Kenneth</first><last>Church</last></author>
+      <author id="kenneth-church"><first>Kenneth</first><last>Church</last></author>
       <author><first>Bo</first><last>Thiesson</last></author>
       <author><first>Robert</first><last>Ragno</last></author>
       <pages>17–20</pages>
@@ -710,8 +710,8 @@
     <paper id="6">
       <title>Translation Model Pruning via Usage Statistics for Statistical Machine Translation</title>
       <author><first>Matthias</first><last>Eck</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>21–24</pages>
       <url hash="88148e14">N07-2006</url>
       <bibkey>eck-etal-2007-translation</bibkey>
@@ -735,7 +735,7 @@
     <paper id="9">
       <title>Generalized Graphical Abstractions for Statistical Machine Translation</title>
       <author><first>Karim</first><last>Filali</last></author>
-      <author><first>Jeff</first><last>Bilmes</last></author>
+      <author id="jeff-bilmes"><first>Jeff</first><last>Bilmes</last></author>
       <pages>33–36</pages>
       <url hash="9ba0169e">N07-2009</url>
       <bibkey>filali-bilmes-2007-generalized</bibkey>
@@ -743,17 +743,17 @@
     <paper id="10">
       <title>Situated Models of Meaning for Sports Video Retrieval</title>
       <author><first>Michael</first><last>Fleischman</last></author>
-      <author><first>Deb</first><last>Roy</last></author>
+      <author id="deb-roy"><first>Deb</first><last>Roy</last></author>
       <pages>37–40</pages>
       <url hash="1fe1a5d5">N07-2010</url>
       <bibkey>fleischman-roy-2007-situated</bibkey>
     </paper>
     <paper id="11">
       <title>Exploring Affect-Context Dependencies for Adaptive System Development</title>
-      <author><first>Kate</first><last>Forbes-Riley</last></author>
+      <author id="kate-forbes-riley"><first>Kate</first><last>Forbes-Riley</last></author>
       <author><first>Mihai</first><last>Rotaru</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
-      <author><first>Joel</first><last>Tetreault</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
       <pages>41–44</pages>
       <url hash="6d9b56ae">N07-2011</url>
       <bibkey>forbes-riley-etal-2007-exploring</bibkey>
@@ -778,34 +778,34 @@
     <paper id="14">
       <title><fixed-case>A</fixed-case>rabic Diacritization through Full Morphological Tagging</title>
       <author><first>Nizar</first><last>Habash</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>53–56</pages>
       <url hash="1d547f29">N07-2014</url>
       <bibkey>habash-rambow-2007-arabic</bibkey>
     </paper>
     <paper id="15">
       <title>Are Very Large <fixed-case>N</fixed-case>-Best Lists Useful for <fixed-case>SMT</fixed-case>?</title>
-      <author><first>Saša</first><last>Hasan</last></author>
+      <author id="sasa-hasan"><first>Saša</first><last>Hasan</last></author>
       <author><first>Richard</first><last>Zens</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>57–60</pages>
       <url hash="a91b32f0">N07-2015</url>
       <bibkey>hasan-etal-2007-large</bibkey>
     </paper>
     <paper id="16">
       <title>Relationship between Non-Projective Edges, Their Level Types, and Well-Nestedness</title>
-      <author><first>Jiří</first><last>Havelka</last></author>
+      <author id="jiri-havelka"><first>Jiří</first><last>Havelka</last></author>
       <pages>61–64</pages>
       <url hash="e697513a">N07-2016</url>
       <bibkey>havelka-2007-relationship</bibkey>
     </paper>
     <paper id="17">
       <title>i<fixed-case>ROVER</fixed-case>: Improving System Combination with Classification</title>
-      <author><first>Dustin</first><last>Hillard</last></author>
+      <author id="dustin-hillard"><first>Dustin</first><last>Hillard</last></author>
       <author><first>Bjoern</first><last>Hoffmeister</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
-      <author><first>Ralf</first><last>Schlueter</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
+      <author id="ralf-schlueter"><first>Ralf</first><last>Schlueter</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>65–68</pages>
       <url hash="0db9505e">N07-2017</url>
       <bibkey>hillard-etal-2007-irover</bibkey>
@@ -813,7 +813,7 @@
     <paper id="18">
       <title>Clustered Sub-Matrix Singular Value Decomposition</title>
       <author><first>Fang</first><last>Huang</last></author>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <pages>69–72</pages>
       <url hash="8bf60c7d">N07-2018</url>
       <bibkey>huang-wilks-2007-clustered</bibkey>
@@ -821,14 +821,14 @@
     <paper id="19">
       <title>Implicitly Supervised Language Model Adaptation for Meeting Transcription</title>
       <author><first>David</first><last>Huggins-Daines</last></author>
-      <author><first>Alexander I.</first><last>Rudnicky</last></author>
+      <author id="alexander-rudnicky"><first>Alexander I.</first><last>Rudnicky</last></author>
       <pages>73–76</pages>
       <url hash="25e93389">N07-2019</url>
       <bibkey>huggins-daines-rudnicky-2007-implicitly</bibkey>
     </paper>
     <paper id="20">
       <title><fixed-case>ILR</fixed-case>-Based <fixed-case>MT</fixed-case> Comprehension Test with Multi-Level Questions</title>
-      <author><first>Douglas</first><last>Jones</last></author>
+      <author id="douglas-jones"><first>Douglas</first><last>Jones</last></author>
       <author><first>Martha</first><last>Herzog</last></author>
       <author><first>Hussny</first><last>Ibrahim</last></author>
       <author><first>Arvind</first><last>Jairam</last></author>
@@ -841,8 +841,8 @@
     </paper>
     <paper id="21">
       <title>Semi-Supervised Learning for Semantic Parsing using Support Vector Machines</title>
-      <author><first>Rohit</first><last>Kate</last></author>
-      <author><first>Raymond</first><last>Mooney</last></author>
+      <author id="rohit-kate"><first>Rohit</first><last>Kate</last></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last></author>
       <pages>81–84</pages>
       <url hash="710e2433">N07-2021</url>
       <bibkey>kate-mooney-2007-semi</bibkey>
@@ -850,8 +850,8 @@
     <paper id="22">
       <title>Discriminative Alignment Training without Annotated Data for Machine Translation</title>
       <author><first>Patrik</first><last>Lambert</last></author>
-      <author><first>Rafael E.</first><last>Banchs</last></author>
-      <author><first>Josep M.</first><last>Crego</last></author>
+      <author id="rafael-e-banchs"><first>Rafael E.</first><last>Banchs</last></author>
+      <author id="josep-m-crego"><first>Josep M.</first><last>Crego</last></author>
       <pages>85–88</pages>
       <url hash="265c6f59">N07-2022</url>
       <bibkey>lambert-etal-2007-discriminative</bibkey>
@@ -866,7 +866,7 @@
     </paper>
     <paper id="24">
       <title>Detection of Non-Native Sentences Using Machine-Translated Training Data</title>
-      <author><first>John</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
       <author><first>Ming</first><last>Zhou</last></author>
       <author><first>Xiaohua</first><last>Liu</last></author>
       <pages>93–96</pages>
@@ -899,7 +899,7 @@
     </paper>
     <paper id="28">
       <title>Efficient Computation of Entropy Gradient for Semi-Supervised Conditional Random Fields</title>
-      <author><first>Gideon</first><last>Mann</last></author>
+      <author id="gideon-mann"><first>Gideon</first><last>Mann</last></author>
       <author><first>Andrew</first><last>McCallum</last></author>
       <pages>109–112</pages>
       <url hash="8870599e">N07-2028</url>
@@ -907,8 +907,8 @@
     </paper>
     <paper id="29">
       <title>Hybrid Document Indexing with Spectral Embedding</title>
-      <author><first>Irina</first><last>Matveeva</last></author>
-      <author><first>Gina-Anne</first><last>Levow</last></author>
+      <author id="irina-matveeva"><first>Irina</first><last>Matveeva</last></author>
+      <author id="gina-anne-levow"><first>Gina-Anne</first><last>Levow</last></author>
       <pages>113–116</pages>
       <url hash="614a45c2">N07-2029</url>
       <bibkey>matveeva-levow-2007-hybrid</bibkey>
@@ -922,7 +922,7 @@
     </paper>
     <paper id="31">
       <title><fixed-case>RH</fixed-case>: A Retro-Hybrid Parser</title>
-      <author><first>Paula</first><last>Newman</last></author>
+      <author id="paula-newman"><first>Paula</first><last>Newman</last></author>
       <pages>121–124</pages>
       <url hash="c092bf06">N07-2031</url>
       <bibkey>newman-2007-rh</bibkey>
@@ -949,34 +949,34 @@
       <title>An Integrated Architecture for Speech-Input Multi-Target Machine Translation</title>
       <author><first>Alicia</first><last>Pérez</last></author>
       <author><first>M. Teresa</first><last>González</last></author>
-      <author><first>M. Inés</first><last>Torres</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="m-ines-torres"><first>M. Inés</first><last>Torres</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <pages>133–136</pages>
       <url hash="5223669f">N07-2034</url>
       <bibkey>perez-etal-2007-integrated</bibkey>
     </paper>
     <paper id="35">
       <title>Analysis and System Combination of Phrase- and <fixed-case>N</fixed-case>-Gram-Based Statistical Machine Translation Systems</title>
-      <author><first>Marta</first><last>R. Costa-jussà</last></author>
-      <author><first>Josep M.</first><last>Crego</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta</first><last>R. Costa-jussà</last></author>
+      <author id="josep-m-crego"><first>Josep M.</first><last>Crego</last></author>
       <author><first>David</first><last>Vilar</last></author>
-      <author><first>José A.</first><last>R. Fonollosa</last></author>
-      <author><first>José B.</first><last>Mariño</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="jose-a-r-fonollosa"><first>José A.</first><last>R. Fonollosa</last></author>
+      <author id="jose-b-marino"><first>José B.</first><last>Mariño</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>137–140</pages>
       <url hash="03a17c75">N07-2035</url>
       <bibkey>r-costa-jussa-etal-2007-analysis</bibkey>
     </paper>
     <paper id="36">
       <title>Stating with Certainty or Stating with Doubt: Intercoder Reliability Results for Manual Annotation of Epistemically Modalized Statements</title>
-      <author><first>Victoria L.</first><last>Rubin</last></author>
+      <author id="victoria-l-rubin"><first>Victoria L.</first><last>Rubin</last></author>
       <pages>141–144</pages>
       <url hash="5e34c7d3">N07-2036</url>
       <bibkey>rubin-2007-stating</bibkey>
     </paper>
     <paper id="37">
       <title>Joint Morphological-Lexical Language Modeling for Machine Translation</title>
-      <author><first>Ruhi</first><last>Sarikaya</last></author>
+      <author id="ruhi-sarikaya"><first>Ruhi</first><last>Sarikaya</last></author>
       <author><first>Yonggang</first><last>Deng</last></author>
       <pages>145–148</pages>
       <url hash="82066913">N07-2037</url>
@@ -988,14 +988,14 @@
       <author><first>Blaise</first><last>Thomson</last></author>
       <author><first>Karl</first><last>Weilhammer</last></author>
       <author><first>Hui</first><last>Ye</last></author>
-      <author><first>Steve</first><last>Young</last></author>
+      <author id="steve-young"><first>Steve</first><last>Young</last></author>
       <pages>149–152</pages>
       <url hash="94dcbcff">N07-2038</url>
       <bibkey>schatzmann-etal-2007-agenda</bibkey>
     </paper>
     <paper id="39">
       <title>Reversible Sound-to-Letter/Letter-to-Sound Modeling Based on Syllable Structure</title>
-      <author><first>Stephanie</first><last>Seneff</last></author>
+      <author id="stephanie-seneff"><first>Stephanie</first><last>Seneff</last></author>
       <pages>153–156</pages>
       <url hash="b692d4d7">N07-2039</url>
       <bibkey>seneff-2007-reversible</bibkey>
@@ -1019,8 +1019,8 @@
     </paper>
     <paper id="42">
       <title>Virtual Evidence for Training Speech Recognizers Using Partially Labeled Data</title>
-      <author><first>Amarnag</first><last>Subramanya</last></author>
-      <author><first>Jeff</first><last>Bilmes</last></author>
+      <author id="amarnag-subramanya"><first>Amarnag</first><last>Subramanya</last></author>
+      <author id="jeff-bilmes"><first>Jeff</first><last>Bilmes</last></author>
       <pages>165–168</pages>
       <url hash="26ad003e">N07-2042</url>
       <bibkey>subramanya-bilmes-2007-virtual</bibkey>
@@ -1038,8 +1038,8 @@
       <author><first>Keith</first><last>Trnka</last></author>
       <author><first>Debra</first><last>Yarrington</last></author>
       <author><first>John</first><last>McCaw</last></author>
-      <author><first>Kathleen F.</first><last>McCoy</last></author>
-      <author><first>Christopher</first><last>Pennington</last></author>
+      <author id="kathleen-f-mccoy"><first>Kathleen F.</first><last>McCoy</last></author>
+      <author id="christopher-pennington"><first>Christopher</first><last>Pennington</last></author>
       <pages>173–176</pages>
       <url hash="6c48e1e6">N07-2044</url>
       <bibkey>trnka-etal-2007-effects</bibkey>
@@ -1054,7 +1054,7 @@
     </paper>
     <paper id="46">
       <title>Entity Extraction is a Boring Solved <fixed-case>P</fixed-case>roblem—<fixed-case>O</fixed-case>r is it?</title>
-      <author><first>Marc</first><last>Vilain</last></author>
+      <author id="marc-vilain"><first>Marc</first><last>Vilain</last></author>
       <author><first>Jennifer</first><last>Su</last></author>
       <author><first>Suzi</first><last>Lubar</last></author>
       <pages>181–184</pages>
@@ -1073,7 +1073,7 @@
     <paper id="48">
       <title>Modifying <fixed-case>SO</fixed-case>-<fixed-case>PMI</fixed-case> for <fixed-case>J</fixed-case>apanese Weblog Opinion Mining by Using a Balancing Factor and Detecting Neutral Expressions</title>
       <author><first>Guangwei</first><last>Wang</last></author>
-      <author><first>Kenji</first><last>Araki</last></author>
+      <author id="kenji-araki"><first>Kenji</first><last>Araki</last></author>
       <pages>189–192</pages>
       <url hash="f511ae90">N07-2048</url>
       <bibkey>wang-araki-2007-modifying</bibkey>
@@ -1082,7 +1082,7 @@
       <title>Combined Use of Speaker- and Tone-Normalized Pitch Reset with Pause Duration for Automatic Story Segmentation in <fixed-case>M</fixed-case>andarin Broadcast News</title>
       <author><first>Lei</first><last>Xie</last></author>
       <author><first>Chuan</first><last>Liu</last></author>
-      <author><first>Helen</first><last>Meng</last></author>
+      <author id="helen-meng"><first>Helen</first><last>Meng</last></author>
       <pages>193–196</pages>
       <url hash="be34a42e">N07-2049</url>
       <bibkey>xie-etal-2007-combined</bibkey>
@@ -1114,8 +1114,8 @@
     </paper>
     <paper id="53">
       <title>Selective Phrase Pair Extraction for Improved Statistical Machine Translation</title>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
-      <author><first>Robert</first><last>Moore</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="robert-c-moore"><first>Robert</first><last>Moore</last></author>
       <pages>209–212</pages>
       <url hash="3045c183">N07-2053</url>
       <bibkey>zettlemoyer-moore-2007-selective</bibkey>
@@ -1132,7 +1132,7 @@
       <title>A Semi-Automatic Evaluation Scheme: Automated Nuggetization for Manual Annotation</title>
       <author><first>Liang</first><last>Zhou</last></author>
       <author><first>Namhee</first><last>Kwon</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>217–220</pages>
       <url hash="13d03f79">N07-2055</url>
       <bibkey>zhou-etal-2007-semi</bibkey>
@@ -1156,7 +1156,7 @@
     </frontmatter>
     <paper id="1">
       <title>Query Expansion Using Domain Information in Compounds</title>
-      <author><first>Karin</first><last>Friberg</last></author>
+      <author id="karin-friberg-heppin"><first>Karin</first><last>Friberg</last></author>
       <pages>1–4</pages>
       <url hash="74d081b0">N07-3001</url>
       <bibkey>friberg-2007-query</bibkey>
@@ -1170,7 +1170,7 @@
     </paper>
     <paper id="3">
       <title>Creating a Knowledge Base from a Collaboratively Generated Encyclopedia</title>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <pages>9–12</pages>
       <url hash="1532ead7">N07-3003</url>
       <bibkey>ponzetto-2007-creating</bibkey>
@@ -1205,7 +1205,7 @@
     </paper>
     <paper id="8">
       <title>Semantic Frames in <fixed-case>R</fixed-case>omanian Natural Language Processing Systems</title>
-      <author><first>Diana Marie</first><last>Trandabăţ</last></author>
+      <author id="diana-trandabat"><first>Diana Marie</first><last>Trandabăţ</last></author>
       <pages>29–32</pages>
       <url hash="a40de834">N07-3008</url>
       <bibkey>trandabat-2007-semantic</bibkey>
@@ -1219,7 +1219,7 @@
     </paper>
     <paper id="10">
       <title>Unsupervised Natural Language Processing Using Graph Models</title>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>37–40</pages>
       <url hash="7a52a99f">N07-3010</url>
       <bibkey>biemann-2007-unsupervised</bibkey>
@@ -1230,8 +1230,8 @@
       <booktitle>Proceedings of Human Language Technologies: The Annual Conference of the North <fixed-case>A</fixed-case>merican Chapter of the Association for Computational Linguistics (<fixed-case>NAACL</fixed-case>-<fixed-case>HLT</fixed-case>)</booktitle>
       <url hash="5df82881">N07-4</url>
       <editor><first>Bob</first><last>Carpenter</last></editor>
-      <editor><first>Amanda</first><last>Stent</last></editor>
-      <editor><first>Jason D.</first><last>Williams</last></editor>
+      <editor id="amanda-stent"><first>Amanda</first><last>Stent</last></editor>
+      <editor id="jason-d-williams"><first>Jason D.</first><last>Williams</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Rochester, New York, USA</address>
       <month>April</month>
@@ -1244,12 +1244,12 @@
     </frontmatter>
     <paper id="1">
       <title>Demonstration of <fixed-case>PLOW</fixed-case>: A Dialogue System for One-Shot Task Learning</title>
-      <author><first>James</first><last>Allen</last></author>
-      <author><first>Nathanael</first><last>Chambers</last></author>
+      <author id="james-allen"><first>James</first><last>Allen</last></author>
+      <author id="nathanael-chambers"><first>Nathanael</first><last>Chambers</last></author>
       <author><first>George</first><last>Ferguson</last></author>
       <author><first>Lucian</first><last>Galescu</last></author>
       <author><first>Hyuckchul</first><last>Jung</last></author>
-      <author><first>Mary</first><last>Swift</last></author>
+      <author id="mary-swift"><first>Mary</first><last>Swift</last></author>
       <author><first>William</first><last>Taysom</last></author>
       <pages>1–2</pages>
       <url hash="8ec7a7b4">N07-4001</url>
@@ -1268,12 +1268,12 @@
     </paper>
     <paper id="3">
       <title>Adaptive Tutorial Dialogue Systems Using Deep <fixed-case>NLP</fixed-case> Techniques</title>
-      <author><first>Myroslava O.</first><last>Dzikovska</last></author>
-      <author><first>Charles B.</first><last>Callaway</last></author>
+      <author id="myroslava-o-dzikovska"><first>Myroslava O.</first><last>Dzikovska</last></author>
+      <author id="charles-b-callaway"><first>Charles B.</first><last>Callaway</last></author>
       <author><first>Elaine</first><last>Farrow</last></author>
       <author><first>Manuel</first><last>Marques-Pita</last></author>
       <author><first>Colin</first><last>Matheson</last></author>
-      <author><first>Johanna D.</first><last>Moore</last></author>
+      <author id="johanna-d-moore"><first>Johanna D.</first><last>Moore</last></author>
       <pages>5–6</pages>
       <url hash="12f73ef3">N07-4003</url>
       <bibkey>dzikovska-etal-2007-adaptive</bibkey>
@@ -1282,7 +1282,7 @@
       <title><fixed-case>POSSLT</fixed-case>: A <fixed-case>K</fixed-case>orean to <fixed-case>E</fixed-case>nglish Spoken Language Translation System</title>
       <author><first>Donghyeon</first><last>Lee</last></author>
       <author><first>Jonghoon</first><last>Lee</last></author>
-      <author><first>Gary Geunbae</first><last>Lee</last></author>
+      <author id="gary-geunbae-lee"><first>Gary Geunbae</first><last>Lee</last></author>
       <pages>7–8</pages>
       <url hash="acae68c9">N07-4004</url>
       <bibkey>lee-etal-2007-posslt</bibkey>
@@ -1294,7 +1294,7 @@
       <author><first>Simon</first><last>Tucker</last></author>
       <author><first>Jonathan</first><last>Kilgour</last></author>
       <author><first>Jean</first><last>Carletta</last></author>
-      <author><first>Johanna D.</first><last>Moore</last></author>
+      <author id="johanna-d-moore"><first>Johanna D.</first><last>Moore</last></author>
       <author><first>Steve</first><last>Renals</last></author>
       <pages>9–10</pages>
       <url hash="c8c6968c">N07-4005</url>
@@ -1309,7 +1309,7 @@
     </paper>
     <paper id="7">
       <title>Spoken Dialogue Systems for Language Learning</title>
-      <author><first>Stephanie</first><last>Seneff</last></author>
+      <author id="stephanie-seneff"><first>Stephanie</first><last>Seneff</last></author>
       <author><first>Chao</first><last>Wang</last></author>
       <author><first>Chih-yu</first><last>Chao</last></author>
       <pages>13–14</pages>
@@ -1337,7 +1337,7 @@
     <paper id="10">
       <title><fixed-case>OMS</fixed-case>-<fixed-case>J</fixed-case>: An Opinion Mining System for <fixed-case>J</fixed-case>apanese Weblog Reviews Using a Combination of Supervised and Unsupervised Approaches</title>
       <author><first>Guangwei</first><last>Wang</last></author>
-      <author><first>Kenji</first><last>Araki</last></author>
+      <author id="kenji-araki"><first>Kenji</first><last>Araki</last></author>
       <pages>19–20</pages>
       <url hash="956dcfc3">N07-4010</url>
       <bibkey>wang-araki-2007-oms</bibkey>
@@ -1345,7 +1345,7 @@
     <paper id="11">
       <title>Learning to Find Transliteration on the Web</title>
       <author><first>Chien-Cheng</first><last>Wu</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>21–22</pages>
       <url hash="62b13d7a">N07-4011</url>
       <bibkey>wu-chang-2007-learning-find</bibkey>
@@ -1367,7 +1367,7 @@
       <author><first>Rohit</first><last>Mishra</last></author>
       <author><first>Brian</first><last>Lathrop</last></author>
       <author><first>Zhaoxia</first><last>Zhang</last></author>
-      <author><first>Harry</first><last>Bratt</last></author>
+      <author id="harry-bratt"><first>Harry</first><last>Bratt</last></author>
       <author><first>Stanley</first><last>Peters</last></author>
       <pages>23–24</pages>
       <url hash="ccb4f8d0">N07-4012</url>
@@ -1378,16 +1378,16 @@
       <author><first>Alexander</first><last>Yates</last></author>
       <author><first>Michele</first><last>Banko</last></author>
       <author><first>Matthew</first><last>Broadhead</last></author>
-      <author><first>Michael</first><last>Cafarella</last></author>
+      <author id="michael-j-cafarella"><first>Michael</first><last>Cafarella</last></author>
       <author><first>Oren</first><last>Etzioni</last></author>
-      <author><first>Stephen</first><last>Soderland</last></author>
+      <author id="stephen-soderland"><first>Stephen</first><last>Soderland</last></author>
       <pages>25–26</pages>
       <url hash="d3869dbb">N07-4013</url>
       <bibkey>yates-etal-2007-textrunner</bibkey>
     </paper>
     <paper id="14">
       <title>The Hidden Information State Dialogue Manager: A Real-World <fixed-case>POMDP</fixed-case>-Based System</title>
-      <author><first>Steve</first><last>Young</last></author>
+      <author id="steve-young"><first>Steve</first><last>Young</last></author>
       <author><first>Jost</first><last>Schatzmann</last></author>
       <author><first>Blaise</first><last>Thomson</last></author>
       <author><first>Karl</first><last>Weilhammer</last></author>
@@ -1405,7 +1405,7 @@
     </paper>
     <paper id="16">
       <title><fixed-case>V</fixed-case>oice-<fixed-case>R</fixed-case>ate: A Dialog System for Consumer Ratings</title>
-      <author><first>Geoffrey</first><last>Zweig</last></author>
+      <author id="geoffrey-zweig"><first>Geoffrey</first><last>Zweig</last></author>
       <author><first>Y.C.</first><last>Ju</last></author>
       <author><first>Patrick</first><last>Nguyen</last></author>
       <author><first>Dong</first><last>Yu</last></author>
@@ -1420,9 +1420,9 @@
     <meta>
       <booktitle>Proceedings of the Human Language Technology Conference of the <fixed-case>NAACL</fixed-case>, Companion Volume: Tutorial Abstracts</booktitle>
       <url hash="f03a4c12">N07-5</url>
-      <editor><first>Marti</first><last>Hearst</last></editor>
-      <editor><first>Gina-Anne</first><last>Levow</last></editor>
-      <editor><first>James</first><last>Allan</last></editor>
+      <editor id="marti-a-hearst"><first>Marti</first><last>Hearst</last></editor>
+      <editor id="gina-anne-levow"><first>Gina-Anne</first><last>Levow</last></editor>
+      <editor id="james-allan"><first>James</first><last>Allan</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Rochester, New York</address>
       <month>April</month>
@@ -1435,21 +1435,21 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>B</fixed-case>io<fixed-case>NLP</fixed-case></title>
-      <author><first>K. Bretonnel</first><last>Cohen</last></author>
+      <author id="k-bretonnel-cohen"><first>K. Bretonnel</first><last>Cohen</last></author>
       <pages>1–2</pages>
       <url hash="daceeb2f">N07-5001</url>
       <bibkey>cohen-2007-bionlp</bibkey>
     </paper>
     <paper id="2">
       <title>Statistical Language Models for Information Retrieval</title>
-      <author><first>ChengXiang</first><last>Zhai</last></author>
+      <author id="chengxiang-zhai"><first>ChengXiang</first><last>Zhai</last></author>
       <pages>3–4</pages>
       <url hash="b73f854d">N07-5002</url>
       <bibkey>zhai-2007-statistical</bibkey>
     </paper>
     <paper id="3">
       <title><fixed-case>A</fixed-case>rabic Dialect Processing Tutorial</title>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
       <pages>5–6</pages>
       <url hash="d028b15c">N07-5003</url>
diff --git a/data/xml/N09.xml b/data/xml/N09.xml
index 3a5aa1e76a..88f5915d8c 100644
--- a/data/xml/N09.xml
+++ b/data/xml/N09.xml
@@ -4,10 +4,10 @@
     <meta>
       <booktitle>Proceedings of Human Language Technologies: The 2009 Annual Conference of the North <fixed-case>A</fixed-case>merican Chapter of the Association for Computational Linguistics</booktitle>
       <url hash="95751b0d">N09-1</url>
-      <editor><first>Mari</first><last>Ostendorf</last></editor>
-      <editor><first>Michael</first><last>Collins</last></editor>
-      <editor><first>Shri</first><last>Narayanan</last></editor>
-      <editor><first>Douglas W.</first><last>Oard</last></editor>
+      <editor id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></editor>
+      <editor id="michael-collins"><first>Michael</first><last>Collins</last></editor>
+      <editor id="shrikanth-narayanan"><first>Shri</first><last>Narayanan</last></editor>
+      <editor id="douglas-w-oard"><first>Douglas W.</first><last>Oard</last></editor>
       <editor><first>Lucy</first><last>Vanderwende</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Boulder, Colorado</address>
@@ -30,8 +30,8 @@
     <paper id="2">
       <title>Integrating Knowledge for Subjectivity Sense Labeling</title>
       <author><first>Yaw</first><last>Gyamfi</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <author><first>Cem</first><last>Akkaya</last></author>
       <pages>10–18</pages>
       <url hash="06d38af9">N09-1002</url>
@@ -39,12 +39,12 @@
     </paper>
     <paper id="3">
       <title>A Study on Similarity and Relatedness Using Distributional and <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et-based Approaches</title>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <author><first>Enrique</first><last>Alfonseca</last></author>
-      <author><first>Keith</first><last>Hall</last></author>
-      <author><first>Jana</first><last>Kravalova</last></author>
-      <author><first>Marius</first><last>Paşca</last></author>
-      <author><first>Aitor</first><last>Soroa</last></author>
+      <author id="keith-hall"><first>Keith</first><last>Hall</last></author>
+      <author id="jana-kravalova"><first>Jana</first><last>Kravalova</last></author>
+      <author id="marius-pasca"><first>Marius</first><last>Paşca</last></author>
+      <author id="aitor-soroa"><first>Aitor</first><last>Soroa</last></author>
       <pages>19–27</pages>
       <url hash="a60c5d82">N09-1003</url>
       <bibkey>agirre-etal-2009-study</bibkey>
@@ -85,15 +85,15 @@
       <author><first>Yaozhong</first><last>Zhang</last></author>
       <author><first>Takuya</first><last>Matsuzaki</last></author>
       <author><first>Yoshimasa</first><last>Tsuruoka</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>56–64</pages>
       <url hash="c203f977">N09-1007</url>
       <bibkey>sun-etal-2009-discriminative</bibkey>
     </paper>
     <paper id="8">
       <title>Improved Reconstruction of Protolanguage Word Forms</title>
-      <author><first>Alexandre</first><last>Bouchard-Côté</last></author>
-      <author><first>Thomas L.</first><last>Griffiths</last></author>
+      <author id="alexandre-bouchard-cote"><first>Alexandre</first><last>Bouchard-Côté</last></author>
+      <author id="thomas-l-griffiths"><first>Thomas L.</first><last>Griffiths</last></author>
       <author><first>Dan</first><last>Klein</last></author>
       <pages>65–73</pages>
       <url hash="03dc272a">N09-1008</url>
@@ -101,8 +101,8 @@
     </paper>
     <paper id="9">
       <title>Shared Logistic Normal Distributions for Soft Parameter Tying in Unsupervised Grammar Induction</title>
-      <author><first>Shay</first><last>Cohen</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="shay-b-cohen"><first>Shay</first><last>Cohen</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>74–82</pages>
       <url hash="a512b4ab">N09-1009</url>
       <bibkey>cohen-smith-2009-shared</bibkey>
@@ -120,7 +120,7 @@
     <paper id="11">
       <title>Efficiently Parsable Extensions to Tree-Local Multicomponent <fixed-case>TAG</fixed-case></title>
       <author><first>Rebecca</first><last>Nesson</last></author>
-      <author><first>Stuart</first><last>Shieber</last></author>
+      <author id="stuart-m-shieber"><first>Stuart</first><last>Shieber</last></author>
       <pages>92–100</pages>
       <url hash="be116082">N09-1011</url>
       <bibkey>nesson-shieber-2009-efficiently</bibkey>
@@ -137,7 +137,7 @@
     <paper id="13">
       <title>Context-Dependent Alignment Models for Statistical Machine Translation</title>
       <author><first>Jamie</first><last>Brunning</last></author>
-      <author><first>Adrià</first><last>de Gispert</last></author>
+      <author id="adria-de-gispert"><first>Adrià</first><last>de Gispert</last></author>
       <author id="bill-byrne"><first>William</first><last>Byrne</last></author>
       <pages>110–118</pages>
       <url hash="000f834c">N09-1013</url>
@@ -155,7 +155,7 @@
       <title>Intersecting Multilingual Data for Faster and Better Statistical Translations</title>
       <author><first>Yu</first><last>Chen</last></author>
       <author><first>Martin</first><last>Kay</last></author>
-      <author><first>Andreas</first><last>Eisele</last></author>
+      <author id="andreas-eisele"><first>Andreas</first><last>Eisele</last></author>
       <pages>128–136</pages>
       <url hash="1ed6b83a">N09-1015</url>
       <bibkey>chen-etal-2009-intersecting</bibkey>
@@ -171,16 +171,16 @@
     </paper>
     <paper id="17">
       <title>The Role of Implicit Argumentation in Nominal <fixed-case>SRL</fixed-case></title>
-      <author><first>Matthew</first><last>Gerber</last></author>
-      <author><first>Joyce</first><last>Chai</last></author>
-      <author><first>Adam</first><last>Meyers</last></author>
+      <author id="matthew-gerber"><first>Matthew</first><last>Gerber</last></author>
+      <author id="joyce-chai"><first>Joyce</first><last>Chai</last></author>
+      <author id="adam-meyers"><first>Adam</first><last>Meyers</last></author>
       <pages>146–154</pages>
       <url hash="9c8654c0">N09-1017</url>
       <bibkey>gerber-etal-2009-role</bibkey>
     </paper>
     <paper id="18">
       <title>Jointly Identifying Predicates, Arguments and Senses using <fixed-case>M</fixed-case>arkov <fixed-case>L</fixed-case>ogic</title>
-      <author><first>Ivan</first><last>Meza-Ruiz</last></author>
+      <author id="ivan-meza-ruiz"><first>Ivan</first><last>Meza-Ruiz</last></author>
       <author><first>Sebastian</first><last>Riedel</last></author>
       <pages>155–163</pages>
       <url hash="6d5f4c8a">N09-1018</url>
@@ -197,7 +197,7 @@
     </paper>
     <paper id="20">
       <title>Hierarchical <fixed-case>D</fixed-case>irichlet Trees for Information Retrieval</title>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <author><first>Yee Whye</first><last>Teh</last></author>
       <pages>173–181</pages>
       <url hash="9a32fa15">N09-1020</url>
@@ -259,8 +259,8 @@
       <title>Preference Grammars: Softening Syntactic Constraints to Improve Statistical Machine Translation</title>
       <author><first>Ashish</first><last>Venugopal</last></author>
       <author><first>Andreas</first><last>Zollmann</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>236–244</pages>
       <url hash="2c652758">N09-1027</url>
       <bibkey>venugopal-etal-2009-preference</bibkey>
@@ -270,7 +270,7 @@
       <author><first>Peng</first><last>Xu</last></author>
       <author><first>Jaeho</first><last>Kang</last></author>
       <author><first>Michael</first><last>Ringgaard</last></author>
-      <author><first>Franz</first><last>Och</last></author>
+      <author id="franz-josef-och"><first>Franz</first><last>Och</last></author>
       <pages>245–253</pages>
       <url hash="a242e7b7">N09-1028</url>
       <bibkey>xu-etal-2009-using</bibkey>
@@ -278,20 +278,20 @@
     <paper id="29">
       <title>Learning Bilingual Linguistic Reordering Model for Statistical Machine Translation</title>
       <author><first>Han-Bin</first><last>Chen</last></author>
-      <author><first>Jian-Cheng</first><last>Wu</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jian-cheng-wu"><first>Jian-Cheng</first><last>Wu</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>254–262</pages>
       <url hash="7cce513c">N09-1029</url>
       <bibkey>chen-etal-2009-learning</bibkey>
     </paper>
     <paper id="30">
       <title>May All Your Wishes Come True: A Study of Wishes and How to Recognize Them</title>
-      <author><first>Andrew B.</first><last>Goldberg</last></author>
+      <author id="andrew-b-goldberg"><first>Andrew B.</first><last>Goldberg</last></author>
       <author><first>Nathanael</first><last>Fillmore</last></author>
       <author><first>David</first><last>Andrzejewski</last></author>
       <author><first>Zhiting</first><last>Xu</last></author>
       <author><first>Bryan</first><last>Gibson</last></author>
-      <author><first>Xiaojin</first><last>Zhu</last></author>
+      <author id="xiaojin-zhu"><first>Xiaojin</first><last>Zhu</last></author>
       <pages>263–271</pages>
       <url hash="27eb2008">N09-1030</url>
       <bibkey>goldberg-etal-2009-may</bibkey>
@@ -300,9 +300,9 @@
       <title>Predicting Risk from Financial Reports with Regression</title>
       <author><first>Shimon</first><last>Kogan</last></author>
       <author><first>Dimitry</first><last>Levin</last></author>
-      <author><first>Bryan R.</first><last>Routledge</last></author>
+      <author id="bryan-r-routledge"><first>Bryan R.</first><last>Routledge</last></author>
       <author><first>Jacob S.</first><last>Sagi</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>272–280</pages>
       <url hash="da8cb92f">N09-1031</url>
       <bibkey>kogan-etal-2009-predicting</bibkey>
@@ -349,23 +349,23 @@
     <paper id="36">
       <title>Improving nonparameteric <fixed-case>B</fixed-case>ayesian inference: experiments on unsupervised word segmentation with adaptor grammars</title>
       <author><first>Mark</first><last>Johnson</last></author>
-      <author><first>Sharon</first><last>Goldwater</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
       <pages>317–325</pages>
       <url hash="d20836cf">N09-1036</url>
       <bibkey>johnson-goldwater-2009-improving</bibkey>
     </paper>
     <paper id="37">
       <title>Joint Parsing and Named Entity Recognition</title>
-      <author><first>Jenny Rose</first><last>Finkel</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="jenny-rose-finkel"><first>Jenny Rose</first><last>Finkel</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>326–334</pages>
       <url hash="a1dc3559">N09-1037</url>
       <bibkey>finkel-manning-2009-joint</bibkey>
     </paper>
     <paper id="38">
       <title>Minimal-length linearizations for mildly context-sensitive dependency trees</title>
-      <author><first>Y. Albert</first><last>Park</last></author>
-      <author><first>Roger</first><last>Levy</last></author>
+      <author id="y-albert-park"><first>Y. Albert</first><last>Park</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
       <pages>335–343</pages>
       <url hash="f28abf56">N09-1038</url>
       <bibkey>park-levy-2009-minimal</bibkey>
@@ -395,9 +395,9 @@
     <paper id="42">
       <title>Global Models of Document Structure using Latent Permutations</title>
       <author><first>Harr</first><last>Chen</last></author>
-      <author><first>S.R.K.</first><last>Branavan</last></author>
+      <author id="s-r-k-branavan"><first>S.R.K.</first><last>Branavan</last></author>
       <author><first>Regina</first><last>Barzilay</last></author>
-      <author><first>David R.</first><last>Karger</last></author>
+      <author id="david-r-karger"><first>David R.</first><last>Karger</last></author>
       <pages>371–379</pages>
       <url hash="921fc309">N09-1042</url>
       <bibkey>chen-etal-2009-global</bibkey>
@@ -413,10 +413,10 @@
     </paper>
     <paper id="44">
       <title>Geo-Centric Language Models for Local Business Voice Search</title>
-      <author><first>Amanda</first><last>Stent</last></author>
+      <author id="amanda-stent"><first>Amanda</first><last>Stent</last></author>
       <author><first>Ilija</first><last>Zeljković</last></author>
       <author><first>Diamantino</first><last>Caseiro</last></author>
-      <author><first>Jay</first><last>Wilpon</last></author>
+      <author id="jay-wilpon"><first>Jay</first><last>Wilpon</last></author>
       <pages>389–396</pages>
       <url hash="6ae884c7">N09-1044</url>
       <bibkey>stent-etal-2009-geo</bibkey>
@@ -425,21 +425,21 @@
       <title>Improving the <fixed-case>A</fixed-case>rabic Pronunciation Dictionary for Phone and Word Recognition with Linguistically-Based Pronunciation Rules</title>
       <author><first>Fadi</first><last>Biadsy</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
       <pages>397–405</pages>
       <url hash="cf06ae2c">N09-1045</url>
       <bibkey>biadsy-etal-2009-improving</bibkey>
     </paper>
     <paper id="46">
       <title>Using a maximum entropy model to build segmentation lattices for <fixed-case>MT</fixed-case></title>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <pages>406–414</pages>
       <url hash="36fc0175">N09-1046</url>
       <bibkey>dyer-2009-using</bibkey>
     </paper>
     <paper id="47">
       <title>Active Learning for Statistical Phrase-based Machine Translation</title>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <author><first>Maxim</first><last>Roy</last></author>
       <author><first>Anoop</first><last>Sarkar</last></author>
       <pages>415–423</pages>
@@ -449,8 +449,8 @@
     <paper id="48">
       <title>Semi-Supervised Lexicon Mining from Parenthetical Expressions in Monolingual Web Pages</title>
       <author><first>Xianchao</first><last>Wu</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>424–432</pages>
       <url hash="d738cba5">N09-1048</url>
       <bibkey>wu-etal-2009-semi</bibkey>
@@ -458,8 +458,8 @@
     <paper id="49">
       <title>Hierarchical Phrase-Based Translation with Weighted Finite State Transducers</title>
       <author><first>Gonzalo</first><last>Iglesias</last></author>
-      <author><first>Adrià</first><last>de Gispert</last></author>
-      <author><first>Eduardo</first><last>R. Banga</last></author>
+      <author id="adria-de-gispert"><first>Adrià</first><last>de Gispert</last></author>
+      <author id="eduardo-r-banga"><first>Eduardo</first><last>R. Banga</last></author>
       <author id="bill-byrne"><first>William</first><last>Byrne</last></author>
       <pages>433–441</pages>
       <url hash="c68569ef">N09-1049</url>
@@ -476,23 +476,23 @@
     </paper>
     <paper id="51">
       <title>Performance Prediction for Exponential Language Models</title>
-      <author><first>Stanley</first><last>Chen</last></author>
+      <author id="stanley-f-chen"><first>Stanley</first><last>Chen</last></author>
       <pages>450–458</pages>
       <url hash="7443d476">N09-1051</url>
       <bibkey>chen-2009-performance</bibkey>
     </paper>
     <paper id="52">
       <title>Tied-Mixture Language Modeling in Continuous Space</title>
-      <author><first>Ruhi</first><last>Sarikaya</last></author>
+      <author id="ruhi-sarikaya"><first>Ruhi</first><last>Sarikaya</last></author>
       <author><first>Mohamed</first><last>Afify</last></author>
-      <author><first>Brian</first><last>Kingsbury</last></author>
+      <author id="brian-kingsbury"><first>Brian</first><last>Kingsbury</last></author>
       <pages>459–467</pages>
       <url hash="cf5fca6d">N09-1052</url>
       <bibkey>sarikaya-etal-2009-tied</bibkey>
     </paper>
     <paper id="53">
       <title>Shrinking Exponential Language Models</title>
-      <author><first>Stanley</first><last>Chen</last></author>
+      <author id="stanley-f-chen"><first>Stanley</first><last>Chen</last></author>
       <pages>468–476</pages>
       <url hash="c1008589">N09-1053</url>
       <bibkey>chen-2009-shrinking</bibkey>
@@ -500,8 +500,8 @@
     <paper id="54">
       <title>Predicting Response to Political Blog Posts with Topic Models</title>
       <author><first>Tae</first><last>Yano</last></author>
-      <author><first>William W.</first><last>Cohen</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="william-cohen"><first>William W.</first><last>Cohen</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>477–485</pages>
       <url hash="5cac0f01">N09-1054</url>
       <bibkey>yano-etal-2009-predicting</bibkey>
@@ -535,7 +535,7 @@
     <paper id="58">
       <title>Streaming for large scale <fixed-case>NLP</fixed-case>: Language Modeling</title>
       <author><first>Amit</first><last>Goyal</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <author><first>Suresh</first><last>Venkatasubramanian</last></author>
       <pages>512–520</pages>
       <url hash="0e8b501c">N09-1058</url>
@@ -562,16 +562,16 @@
       <author><first>Carlos</first><last>Gómez-Rodríguez</last></author>
       <author><first>Marco</first><last>Kuhlmann</last></author>
       <author><first>Giorgio</first><last>Satta</last></author>
-      <author><first>David</first><last>Weir</last></author>
+      <author id="david-weir"><first>David</first><last>Weir</last></author>
       <pages>539–547</pages>
       <url hash="5816955f">N09-1061</url>
       <bibkey>gomez-rodriguez-etal-2009-optimal</bibkey>
     </paper>
     <paper id="62">
       <title>Inducing Compact but Accurate Tree-Substitution Grammars</title>
-      <author><first>Trevor</first><last>Cohn</last></author>
-      <author><first>Sharon</first><last>Goldwater</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
       <pages>548–556</pages>
       <url hash="96733b61">N09-1062</url>
       <bibkey>cohn-etal-2009-inducing</bibkey>
@@ -601,13 +601,13 @@
     </paper>
     <paper id="66">
       <title>Using Citations to Generate surveys of Scientific Paradigms</title>
-      <author><first>Saif</first><last>Mohammad</last></author>
-      <author><first>Bonnie</first><last>Dorr</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
+      <author id="bonnie-dorr"><first>Bonnie</first><last>Dorr</last></author>
       <author><first>Melissa</first><last>Egan</last></author>
-      <author><first>Ahmed</first><last>Hassan</last></author>
-      <author><first>Pradeep</first><last>Muthukrishan</last></author>
+      <author id="ahmed-hassan"><first>Ahmed</first><last>Hassan</last></author>
+      <author id="pradeep-muthukrishnan"><first>Pradeep</first><last>Muthukrishan</last></author>
       <author><first>Vahed</first><last>Qazvinian</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <author><first>David</first><last>Zajic</last></author>
       <pages>584–592</pages>
       <url hash="d385d231">N09-1066</url>
@@ -615,15 +615,15 @@
     </paper>
     <paper id="67">
       <title>Non-Parametric <fixed-case>B</fixed-case>ayesian Areal Linguistics</title>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <pages>593–601</pages>
       <url hash="98acc7d2">N09-1067</url>
       <bibkey>daume-iii-2009-non</bibkey>
     </paper>
     <paper id="68">
       <title>Hierarchical <fixed-case>B</fixed-case>ayesian Domain Adaptation</title>
-      <author><first>Jenny Rose</first><last>Finkel</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="jenny-rose-finkel"><first>Jenny Rose</first><last>Finkel</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>602–610</pages>
       <url hash="c25ae83a">N09-1068</url>
       <bibkey>finkel-manning-2009-hierarchical</bibkey>
@@ -656,9 +656,9 @@
     </paper>
     <paper id="72">
       <title>Extracting Social Meaning: Identifying Interactional Style in Spoken Conversation</title>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <author><first>Rajesh</first><last>Ranganath</last></author>
-      <author><first>Dan</first><last>McFarland</last></author>
+      <author id="dan-mcfarland"><first>Dan</first><last>McFarland</last></author>
       <pages>638–646</pages>
       <url hash="c2773c16">N09-1072</url>
       <bibkey>jurafsky-etal-2009-extracting</bibkey>
@@ -673,7 +673,7 @@
     </paper>
     <paper id="74">
       <title>Improved Syntactic Models for Parsing Speech with Repairs</title>
-      <author><first>Tim</first><last>Miller</last></author>
+      <author id="timothy-miller"><first>Tim</first><last>Miller</last></author>
       <pages>656–664</pages>
       <url hash="5ecfef78">N09-1074</url>
       <bibkey>miller-2009-improved</bibkey>
@@ -681,7 +681,7 @@
     <paper id="75">
       <title>A model of local coherence effects in human sentence processing as consequences of updates from bottom-up prior to posterior beliefs</title>
       <author><first>Klinton</first><last>Bicknell</last></author>
-      <author><first>Roger</first><last>Levy</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
       <pages>665–673</pages>
       <url hash="4d65e7f1">N09-1075</url>
       <bibkey>bicknell-levy-2009-model</bibkey>
@@ -691,10 +691,10 @@
     <meta>
       <booktitle>Proceedings of Human Language Technologies: The 2009 Annual Conference of the North <fixed-case>A</fixed-case>merican Chapter of the Association for Computational Linguistics, Companion Volume: Short Papers</booktitle>
       <url hash="fa32d9a3">N09-2</url>
-      <editor><first>Mari</first><last>Ostendorf</last></editor>
-      <editor><first>Michael</first><last>Collins</last></editor>
-      <editor><first>Shri</first><last>Narayanan</last></editor>
-      <editor><first>Douglas W.</first><last>Oard</last></editor>
+      <editor id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></editor>
+      <editor id="michael-collins"><first>Michael</first><last>Collins</last></editor>
+      <editor id="shrikanth-narayanan"><first>Shri</first><last>Narayanan</last></editor>
+      <editor id="douglas-w-oard"><first>Douglas W.</first><last>Oard</last></editor>
       <editor><first>Lucy</first><last>Vanderwende</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Boulder, Colorado</address>
@@ -709,7 +709,7 @@
     <paper id="1">
       <title>Cohesive Constraints in A Beam Search Phrase-based Decoder</title>
       <author><first>Nguyen</first><last>Bach</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <author><first>Colin</first><last>Cherry</last></author>
       <pages>1–4</pages>
       <url hash="e906c61f">N09-2001</url>
@@ -726,7 +726,7 @@
     <paper id="3">
       <title>Efficient Extraction of Oracle-best Translations from Hypergraphs</title>
       <author><first>Zhifei</first><last>Li</last></author>
-      <author><first>Sanjeev</first><last>Khudanpur</last></author>
+      <author id="sanjeev-khudanpur"><first>Sanjeev</first><last>Khudanpur</last></author>
       <pages>9–12</pages>
       <url hash="ff2c566a">N09-2003</url>
       <bibkey>li-khudanpur-2009-efficient</bibkey>
@@ -741,8 +741,8 @@
     </paper>
     <paper id="5">
       <title>Comparison of Extended Lexicon Models in Search and Rescoring for <fixed-case>SMT</fixed-case></title>
-      <author><first>Saša</first><last>Hasan</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="sasa-hasan"><first>Saša</first><last>Hasan</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>17–20</pages>
       <url hash="5a5b6c10">N09-2005</url>
       <bibkey>hasan-ney-2009-comparison</bibkey>
@@ -767,7 +767,7 @@
     <paper id="8">
       <title>Large-scale Computation of Distributional Similarities for Queries</title>
       <author><first>Enrique</first><last>Alfonseca</last></author>
-      <author><first>Keith</first><last>Hall</last></author>
+      <author id="keith-hall"><first>Keith</first><last>Hall</last></author>
       <author><first>Silvana</first><last>Hartmann</last></author>
       <pages>29–32</pages>
       <url hash="e45384ef">N09-2008</url>
@@ -786,7 +786,7 @@
       <title>Identifying Types of Claims in Online Customer Reviews</title>
       <author><first>Shilpa</first><last>Arora</last></author>
       <author><first>Mahesh</first><last>Joshi</last></author>
-      <author><first>Carolyn P.</first><last>Rosé</last></author>
+      <author id="carolyn-rose"><first>Carolyn P.</first><last>Rosé</last></author>
       <pages>37–40</pages>
       <url hash="7cd4da08">N09-2010</url>
       <bibkey>arora-etal-2009-identifying</bibkey>
@@ -801,21 +801,21 @@
     </paper>
     <paper id="12">
       <title><fixed-case>TESLA</fixed-case>: A Tool for Annotating Geospatial Language Corpora</title>
-      <author><first>Nate</first><last>Blaylock</last></author>
+      <author id="nate-blaylock"><first>Nate</first><last>Blaylock</last></author>
       <author><first>Bradley</first><last>Swain</last></author>
-      <author><first>James</first><last>Allen</last></author>
+      <author id="james-allen"><first>James</first><last>Allen</last></author>
       <pages>45–48</pages>
       <url hash="76e7bc5f">N09-2012</url>
       <bibkey>blaylock-etal-2009-tesla</bibkey>
     </paper>
     <paper id="13">
       <title>Modeling Dialogue Structure with Adjacency Pair Analysis and Hidden <fixed-case>M</fixed-case>arkov Models</title>
-      <author><first>Kristy Elizabeth</first><last>Boyer</last></author>
-      <author><first>Robert</first><last>Phillips</last></author>
-      <author><first>Eun Young</first><last>Ha</last></author>
+      <author id="kristy-boyer"><first>Kristy Elizabeth</first><last>Boyer</last></author>
+      <author id="robert-phillips"><first>Robert</first><last>Phillips</last></author>
+      <author id="eun-young-ha"><first>Eun Young</first><last>Ha</last></author>
       <author><first>Michael</first><last>Wallis</last></author>
       <author><first>Mladen</first><last>Vouk</last></author>
-      <author><first>James</first><last>Lester</last></author>
+      <author id="james-lester"><first>James</first><last>Lester</last></author>
       <pages>49–52</pages>
       <url hash="74294069">N09-2013</url>
       <bibkey>boyer-etal-2009-modeling</bibkey>
@@ -825,7 +825,7 @@
       <author><first>Kenji</first><last>Sagae</last></author>
       <author><first>Gwen</first><last>Christian</last></author>
       <author><first>David</first><last>DeVault</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <pages>53–56</pages>
       <url hash="c6bce32e">N09-2014</url>
       <bibkey>sagae-etal-2009-towards</bibkey>
@@ -842,8 +842,8 @@
     <paper id="16">
       <title>Learning <fixed-case>B</fixed-case>ayesian Networks for Semantic Frame Composition in a Spoken Dialog System</title>
       <author><first>Marie-Jean</first><last>Meurs</last></author>
-      <author><first>Fabrice</first><last>Lefèvre</last></author>
-      <author><first>Renato</first><last>de Mori</last></author>
+      <author id="fabrice-lefevre"><first>Fabrice</first><last>Lefèvre</last></author>
+      <author id="renato-de-mori"><first>Renato</first><last>de Mori</last></author>
       <pages>61–64</pages>
       <url hash="30409cae">N09-2016</url>
       <bibkey>meurs-etal-2009-learning</bibkey>
@@ -851,7 +851,7 @@
     <paper id="17">
       <title>Evaluation of a System for Noun Concepts Acquisition from Utterances about Images (<fixed-case>SINCA</fixed-case>) Using Daily Conversation Data</title>
       <author><first>Yuzu</first><last>Uchida</last></author>
-      <author><first>Kenji</first><last>Araki</last></author>
+      <author id="kenji-araki"><first>Kenji</first><last>Araki</last></author>
       <pages>65–68</pages>
       <url hash="befa4974">N09-2017</url>
       <bibkey>uchida-araki-2009-evaluation</bibkey>
@@ -859,14 +859,14 @@
     <paper id="18">
       <title>Web and Corpus Methods for <fixed-case>M</fixed-case>alay Count Classifier Prediction</title>
       <author><first>Jeremy</first><last>Nicholson</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>69–72</pages>
       <url hash="25056650">N09-2018</url>
       <bibkey>nicholson-baldwin-2009-web</bibkey>
     </paper>
     <paper id="19">
       <title>Minimum <fixed-case>B</fixed-case>ayes Risk Combination of Translation Hypotheses from Alternative Morphological Decompositions</title>
-      <author><first>Adrià</first><last>de Gispert</last></author>
+      <author id="adria-de-gispert"><first>Adrià</first><last>de Gispert</last></author>
       <author><first>Sami</first><last>Virpioja</last></author>
       <author><first>Mikko</first><last>Kurimo</last></author>
       <author id="bill-byrne"><first>William</first><last>Byrne</last></author>
@@ -877,8 +877,8 @@
     <paper id="20">
       <title>Generating Synthetic Children’s Acoustic Models from Adult Models</title>
       <author><first>Andreas</first><last>Hagen</last></author>
-      <author><first>Bryan</first><last>Pellom</last></author>
-      <author><first>Kadri</first><last>Hacioglu</last></author>
+      <author id="bryan-pellom"><first>Bryan</first><last>Pellom</last></author>
+      <author id="kadri-hacioglu"><first>Kadri</first><last>Hacioglu</last></author>
       <pages>77–80</pages>
       <url hash="a9b659f3">N09-2020</url>
       <bibkey>hagen-etal-2009-generating</bibkey>
@@ -886,7 +886,7 @@
     <paper id="21">
       <title>Detecting Pitch Accents at the Word, Syllable and Vowel Level</title>
       <author><first>Andrew</first><last>Rosenberg</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
       <pages>81–84</pages>
       <url hash="3dee65f2">N09-2021</url>
       <bibkey>rosenberg-hirschberg-2009-detecting</bibkey>
@@ -903,17 +903,17 @@
     <paper id="23">
       <title>Automatic Agenda Graph Construction from Human-Human Dialogs using Clustering Method</title>
       <author><first>Cheongjae</first><last>Lee</last></author>
-      <author><first>Sangkeun</first><last>Jung</last></author>
+      <author id="sangkeun-jung"><first>Sangkeun</first><last>Jung</last></author>
       <author><first>Kyungduk</first><last>Kim</last></author>
-      <author><first>Gary Geunbae</first><last>Lee</last></author>
+      <author id="gary-geunbae-lee"><first>Gary Geunbae</first><last>Lee</last></author>
       <pages>89–92</pages>
       <url hash="620d93b8">N09-2023</url>
       <bibkey>lee-etal-2009-automatic</bibkey>
     </paper>
     <paper id="24">
       <title>A Simple Sentence-Level Extraction Algorithm for Comparable Data</title>
-      <author><first>Christoph</first><last>Tillmann</last></author>
-      <author><first>Jian-ming</first><last>Xu</last></author>
+      <author id="christoph-tillmann"><first>Christoph</first><last>Tillmann</last></author>
+      <author id="jian-ming-xu"><first>Jian-ming</first><last>Xu</last></author>
       <pages>93–96</pages>
       <url hash="635d3774">N09-2024</url>
       <bibkey>tillmann-xu-2009-simple</bibkey>
@@ -921,15 +921,15 @@
     <paper id="25">
       <title>Learning Combination Features with <fixed-case>L</fixed-case>1 Regularization</title>
       <author><first>Daisuke</first><last>Okanohara</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>97–100</pages>
       <url hash="803ec394">N09-2025</url>
       <bibkey>okanohara-tsujii-2009-learning</bibkey>
     </paper>
     <paper id="26">
       <title>Multi-scale Personalization for Voice Search Applications</title>
-      <author><first>Daniel</first><last>Bolaños</last></author>
-      <author><first>Geoffrey</first><last>Zweig</last></author>
+      <author id="daniel-bolanos"><first>Daniel</first><last>Bolaños</last></author>
+      <author id="geoffrey-zweig"><first>Geoffrey</first><last>Zweig</last></author>
       <author><first>Patrick</first><last>Nguyen</last></author>
       <pages>101–104</pages>
       <url hash="c26d8187">N09-2026</url>
@@ -938,7 +938,7 @@
     <paper id="27">
       <title>The Importance of Sub-Utterance Prosody in Predicting Level of Certainty</title>
       <author><first>Heather</first><last>Pon-Barry</last></author>
-      <author><first>Stuart</first><last>Shieber</last></author>
+      <author id="stuart-m-shieber"><first>Stuart</first><last>Shieber</last></author>
       <pages>105–108</pages>
       <url hash="49396d18">N09-2027</url>
       <bibkey>pon-barry-shieber-2009-importance</bibkey>
@@ -961,7 +961,7 @@
     <paper id="30">
       <title>Topic Identification Using <fixed-case>W</fixed-case>ikipedia Graph Centrality</title>
       <author><first>Kino</first><last>Coursey</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>117–120</pages>
       <url hash="993c52a8">N09-2030</url>
       <bibkey>coursey-mihalcea-2009-topic</bibkey>
@@ -969,15 +969,15 @@
     <paper id="31">
       <title>Extracting Bilingual Dictionary from Comparable Corpora with Dependency Heterogeneity</title>
       <author><first>Kun</first><last>Yu</last></author>
-      <author><first>Junichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Junichi</first><last>Tsujii</last></author>
       <pages>121–124</pages>
       <url hash="c485b784">N09-2031</url>
       <bibkey>yu-tsujii-2009-extracting</bibkey>
     </paper>
     <paper id="32">
       <title>Domain Adaptation with Artificial Data for Semantic Parsing of Speech</title>
-      <author><first>Lonneke</first><last>van der Plas</last></author>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="lonneke-van-der-plas"><first>Lonneke</first><last>van der Plas</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <author><first>Paola</first><last>Merlo</last></author>
       <pages>125–128</pages>
       <url hash="d971ad16">N09-2032</url>
@@ -997,7 +997,7 @@
       <author><first>Kazunori</first><last>Komatani</last></author>
       <author><first>Kotaro</first><last>Funakoshi</last></author>
       <author><first>Tetsuya</first><last>Ogata</last></author>
-      <author><first>Hiroshi G.</first><last>Okuno</last></author>
+      <author id="hiroshi-g-okuno"><first>Hiroshi G.</first><last>Okuno</last></author>
       <pages>133–136</pages>
       <url hash="e5cfa9d6">N09-2034</url>
       <bibkey>katsumaru-etal-2009-speech</bibkey>
@@ -1005,7 +1005,7 @@
     <paper id="35">
       <title>Taking into Account the Differences between Actively and Passively Acquired Data: The Case of Active Learning with Support Vector Machines for Imbalanced Datasets</title>
       <author><first>Michael</first><last>Bloodgood</last></author>
-      <author><first>K.</first><last>Vijay-Shanker</last></author>
+      <author id="k-vijay-shanker"><first>K.</first><last>Vijay-Shanker</last></author>
       <pages>137–140</pages>
       <url hash="568b7aa6">N09-2035</url>
       <bibkey>bloodgood-vijay-shanker-2009-taking</bibkey>
@@ -1020,9 +1020,9 @@
     </paper>
     <paper id="37">
       <title>Evaluating the Syntactic Transformations in Gold Standard Corpora for Statistical Sentence Compression</title>
-      <author><first>Naman K.</first><last>Gupta</last></author>
+      <author id="naman-k-gupta"><first>Naman K.</first><last>Gupta</last></author>
       <author><first>Sourish</first><last>Chaudhuri</last></author>
-      <author><first>Carolyn P.</first><last>Rosé</last></author>
+      <author id="carolyn-rose"><first>Carolyn P.</first><last>Rosé</last></author>
       <pages>145–148</pages>
       <url hash="2c4126e0">N09-2037</url>
       <bibkey>gupta-etal-2009-evaluating</bibkey>
@@ -1033,11 +1033,11 @@
       <author><first>Roger</first><last>Hsiao</last></author>
       <author><first>Matthias</first><last>Eck</last></author>
       <author><first>Paisarn</first><last>Charoenpornsawat</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <author><first>Tanja</first><last>Schultz</last></author>
       <author><first>Ian</first><last>Lane</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
-      <author><first>Alan</first><last>Black</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
+      <author id="alan-w-black"><first>Alan</first><last>Black</last></author>
       <pages>149–152</pages>
       <url hash="4845b75b">N09-2038</url>
       <bibkey>bach-etal-2009-incremental</bibkey>
@@ -1059,8 +1059,8 @@
     </paper>
     <paper id="41">
       <title>Exploiting Named Entity Classes in <fixed-case>CCG</fixed-case> Surface Realization</title>
-      <author><first>Rajakrishnan</first><last>Rajkumar</last></author>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="rajakrishnan-rajkumar"><first>Rajakrishnan</first><last>Rajkumar</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <author><first>Dominic</first><last>Espinosa</last></author>
       <pages>161–164</pages>
       <url hash="65833676">N09-2041</url>
@@ -1072,7 +1072,7 @@
       <author><first>Yi</first><last>Chang</last></author>
       <author><first>Zhaohui</first><last>Zheng</last></author>
       <author><first>Donald</first><last>Metzler</last></author>
-      <author><first>Jian-yun</first><last>Nie</last></author>
+      <author id="jian-yun-nie"><first>Jian-yun</first><last>Nie</last></author>
       <pages>165–168</pages>
       <url hash="aa50d7c3">N09-2042</url>
       <bibkey>zhang-etal-2009-search</bibkey>
@@ -1081,7 +1081,7 @@
       <title>A Local Tree Alignment-based Soft Pattern Matching Approach for Information Extraction</title>
       <author><first>Seokhwan</first><last>Kim</last></author>
       <author><first>Minwoo</first><last>Jeong</last></author>
-      <author><first>Gary Geunbae</first><last>Lee</last></author>
+      <author id="gary-geunbae-lee"><first>Gary Geunbae</first><last>Lee</last></author>
       <pages>169–172</pages>
       <url hash="d5b2863f">N09-2043</url>
       <bibkey>kim-etal-2009-local</bibkey>
@@ -1102,7 +1102,7 @@
       <author><first>Luis</first><last>Tari</last></author>
       <author><first>Jörg</first><last>Hakenberg</last></author>
       <author><first>Chitta</first><last>Baral</last></author>
-      <author><first>Graciela</first><last>Gonzalez</last></author>
+      <author id="graciela-gonzalez"><first>Graciela</first><last>Gonzalez</last></author>
       <pages>177–180</pages>
       <url hash="3aee3be5">N09-2045</url>
       <bibkey>jonnalagadda-etal-2009-towards</bibkey>
@@ -1110,18 +1110,18 @@
     <paper id="46">
       <title>Improving <fixed-case>SCL</fixed-case> Model for Sentiment-Transfer Learning</title>
       <author><first>Songbo</first><last>Tan</last></author>
-      <author><first>Xueqi</first><last>Cheng</last></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last></author>
       <pages>181–184</pages>
       <url hash="24f95207">N09-2046</url>
       <bibkey>tan-cheng-2009-improving</bibkey>
     </paper>
     <paper id="47">
       <title><fixed-case>MICA</fixed-case>: A Probabilistic Dependency Parser Based on Tree Insertion Grammars (Application Note)</title>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
       <author><first>Pierre</first><last>Boullier</last></author>
       <author><first>Alexis</first><last>Nasr</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <pages>185–188</pages>
       <url hash="44c42244">N09-2047</url>
       <bibkey>bangalore-etal-2009-mica</bibkey>
@@ -1129,7 +1129,7 @@
     <paper id="48">
       <title>Lexical and Syntactic Adaptation and Their Impact in Deployed Spoken Dialog Systems</title>
       <author><first>Svetlana</first><last>Stoyanchev</last></author>
-      <author><first>Amanda</first><last>Stent</last></author>
+      <author id="amanda-stent"><first>Amanda</first><last>Stent</last></author>
       <pages>189–192</pages>
       <url hash="a32008bc">N09-2048</url>
       <bibkey>stoyanchev-stent-2009-lexical</bibkey>
@@ -1145,15 +1145,15 @@
     <paper id="50">
       <title>The independence of dimensions in multidimensional dialogue act annotation</title>
       <author><first>Volha</first><last>Petukhova</last></author>
-      <author><first>Harry</first><last>Bunt</last></author>
+      <author id="harry-bunt"><first>Harry</first><last>Bunt</last></author>
       <pages>197–200</pages>
       <url hash="60c80b73">N09-2050</url>
       <bibkey>petukhova-bunt-2009-independence</bibkey>
     </paper>
     <paper id="51">
       <title>Improving Coreference Resolution by Using Conversational Metadata</title>
-      <author><first>Xiaoqiang</first><last>Luo</last></author>
-      <author><first>Radu</first><last>Florian</last></author>
+      <author id="xiaoqiang-luo"><first>Xiaoqiang</first><last>Luo</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
       <author><first>Todd</first><last>Ward</last></author>
       <pages>201–204</pages>
       <url hash="06973da7">N09-2051</url>
@@ -1179,16 +1179,16 @@
       <title>Improving A Simple Bigram <fixed-case>HMM</fixed-case> Part-of-Speech Tagger by Latent Annotation and Self-Training</title>
       <author><first>Zhongqiang</first><last>Huang</last></author>
       <author><first>Vladimir</first><last>Eidelman</last></author>
-      <author><first>Mary</first><last>Harper</last></author>
+      <author id="mary-harper"><first>Mary</first><last>Harper</last></author>
       <pages>213–216</pages>
       <url hash="42e44eff">N09-2054</url>
       <bibkey>huang-etal-2009-improving</bibkey>
     </paper>
     <paper id="55">
       <title>Statistical Post-Editing of a Rule-Based Machine Translation System</title>
-      <author><first>Antonio-L.</first><last>Lagarda</last></author>
-      <author><first>Vicent</first><last>Alabau</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="antonio-l-lagarda"><first>Antonio-L.</first><last>Lagarda</last></author>
+      <author id="vicent-alabau"><first>Vicent</first><last>Alabau</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <author><first>Roberto</first><last>Silva</last></author>
       <author><first>Enrique</first><last>Díaz-de-Liaño</last></author>
       <pages>217–220</pages>
@@ -1197,9 +1197,9 @@
     </paper>
     <paper id="56">
       <title>On the Importance of Pivot Language Selection for Statistical Machine Translation</title>
-      <author><first>Michael</first><last>Paul</last></author>
-      <author><first>Hirofumi</first><last>Yamamoto</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="michael-paul"><first>Michael</first><last>Paul</last></author>
+      <author id="hirofumi-yamamoto"><first>Hirofumi</first><last>Yamamoto</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Satoshi</first><last>Nakamura</last></author>
       <pages>221–224</pages>
       <url hash="56d5a9a4">N09-2056</url>
@@ -1216,7 +1216,7 @@
     <paper id="58">
       <title>Determining the position of adverbial phrases in <fixed-case>E</fixed-case>nglish</title>
       <author><first>Huayan</first><last>Zhong</last></author>
-      <author><first>Amanda</first><last>Stent</last></author>
+      <author id="amanda-stent"><first>Amanda</first><last>Stent</last></author>
       <pages>229–232</pages>
       <url hash="9fac5678">N09-2058</url>
       <bibkey>zhong-stent-2009-determining</bibkey>
@@ -1224,9 +1224,9 @@
     <paper id="59">
       <title>Estimating and Exploiting the Entropy of Sense Distributions</title>
       <author><first>Peng</first><last>Jin</last></author>
-      <author><first>Diana</first><last>McCarthy</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
       <author><first>Rob</first><last>Koeling</last></author>
-      <author><first>John</first><last>Carroll</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
       <pages>233–236</pages>
       <url hash="8268b46d">N09-2059</url>
       <bibkey>jin-etal-2009-estimating</bibkey>
@@ -1240,14 +1240,14 @@
     </paper>
     <paper id="61">
       <title>Sentence Boundary Detection and the Problem with the <fixed-case>U</fixed-case>.<fixed-case>S</fixed-case>.</title>
-      <author><first>Dan</first><last>Gillick</last></author>
+      <author id="dan-gillick"><first>Dan</first><last>Gillick</last></author>
       <pages>241–244</pages>
       <url hash="ca3b1788">N09-2061</url>
       <bibkey>gillick-2009-sentence</bibkey>
     </paper>
     <paper id="62">
       <title>Quadratic Features and Deep Architectures for Chunking</title>
-      <author><first>Joseph</first><last>Turian</last></author>
+      <author id="joseph-turian"><first>Joseph</first><last>Turian</last></author>
       <author><first>James</first><last>Bergstra</last></author>
       <author><first>Yoshua</first><last>Bengio</last></author>
       <pages>245–248</pages>
@@ -1263,7 +1263,7 @@
     </paper>
     <paper id="64">
       <title>Combining Constituent Parsers</title>
-      <author><first>Victoria</first><last>Fossum</last></author>
+      <author id="victoria-fossum"><first>Victoria</first><last>Fossum</last></author>
       <author><first>Kevin</first><last>Knight</last></author>
       <pages>253–256</pages>
       <url hash="a4569b89">N09-2064</url>
@@ -1272,7 +1272,7 @@
     <paper id="65">
       <title>Recognising the Predicate-argument Structure of <fixed-case>T</fixed-case>agalog</title>
       <author><first>Meladel</first><last>Mistica</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>257–260</pages>
       <url hash="b8eaf0ca">N09-2065</url>
       <bibkey>mistica-baldwin-2009-recognising</bibkey>
@@ -1288,38 +1288,38 @@
     <paper id="67">
       <title>Anchored Speech Recognition for Question Answering</title>
       <author><first>Sibel</first><last>Yaman</last></author>
-      <author><first>Gokhan</first><last>Tur</last></author>
+      <author id="gokhan-tur"><first>Gokhan</first><last>Tur</last></author>
       <author><first>Dimitra</first><last>Vergyri</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last></author>
-      <author><first>Mary</first><last>Harper</last></author>
-      <author><first>Wen</first><last>Wang</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></author>
+      <author id="mary-harper"><first>Mary</first><last>Harper</last></author>
+      <author id="wen-wang"><first>Wen</first><last>Wang</last></author>
       <pages>265–268</pages>
       <url hash="3d16b7d1">N09-2067</url>
       <bibkey>yaman-etal-2009-anchored</bibkey>
     </paper>
     <paper id="68">
       <title>Score Distribution Based Term Specific Thresholding for Spoken Term Detection</title>
-      <author><first>Doğan</first><last>Can</last></author>
-      <author><first>Murat</first><last>Saraçlar</last></author>
+      <author id="dogan-can"><first>Doğan</first><last>Can</last></author>
+      <author id="murat-saraclar"><first>Murat</first><last>Saraçlar</last></author>
       <pages>269–272</pages>
       <url hash="16c05a4e">N09-2068</url>
       <bibkey>can-saraclar-2009-score</bibkey>
     </paper>
     <paper id="69">
       <title>Automatic <fixed-case>C</fixed-case>hinese Abbreviation Generation Using Conditional Random Field</title>
-      <author><first>Dong</first><last>Yang</last></author>
-      <author><first>Yi-cheng</first><last>Pan</last></author>
-      <author><first>Sadaoki</first><last>Furui</last></author>
+      <author id="dong-yang"><first>Dong</first><last>Yang</last></author>
+      <author id="yi-cheng-pan"><first>Yi-cheng</first><last>Pan</last></author>
+      <author id="sadaoki-furui"><first>Sadaoki</first><last>Furui</last></author>
       <pages>273–276</pages>
       <url hash="dbf055ea">N09-2069</url>
       <bibkey>yang-etal-2009-automatic</bibkey>
     </paper>
     <paper id="70">
       <title>Fast decoding for open vocabulary spoken term detection</title>
-      <author><first>Bhuvana</first><last>Ramabhadran</last></author>
+      <author id="bhuvana-ramabhadran"><first>Bhuvana</first><last>Ramabhadran</last></author>
       <author><first>Abhinav</first><last>Sethy</last></author>
       <author><first>Jonathan</first><last>Mamou</last></author>
-      <author><first>Brian</first><last>Kingsbury</last></author>
+      <author id="brian-kingsbury"><first>Brian</first><last>Kingsbury</last></author>
       <author><first>Upendra</first><last>Chaudhari</last></author>
       <pages>277–280</pages>
       <url hash="aa787936">N09-2070</url>
@@ -1328,7 +1328,7 @@
     <paper id="71">
       <title>Tightly coupling Speech Recognition and Search</title>
       <author><first>Taniya</first><last>Mishra</last></author>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
       <pages>281–284</pages>
       <url hash="b32ed945">N09-2071</url>
       <bibkey>mishra-bangalore-2009-tightly</bibkey>
@@ -1341,7 +1341,7 @@
       <editor><first>Ulrich</first><last>Germann</last></editor>
       <editor><first>Chirag</first><last>Shah</last></editor>
       <editor><first>Svetlana</first><last>Stoyanchev</last></editor>
-      <editor><first>Carolyn Penstein</first><last>Rosé</last></editor>
+      <editor id="carolyn-rose"><first>Carolyn Penstein</first><last>Rosé</last></editor>
       <editor><first>Anoop</first><last>Sarkar</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Boulder, Colorado</address>
@@ -1356,8 +1356,8 @@
     <paper id="1">
       <title>Classifier Combination Techniques Applied to Coreference Resolution</title>
       <author><first>Smita</first><last>Vemulapalli</last></author>
-      <author><first>Xiaoqiang</first><last>Luo</last></author>
-      <author><first>John F.</first><last>Pitrelli</last></author>
+      <author id="xiaoqiang-luo"><first>Xiaoqiang</first><last>Luo</last></author>
+      <author id="john-f-pitrelli"><first>John F.</first><last>Pitrelli</last></author>
       <author><first>Imed</first><last>Zitouni</last></author>
       <pages>1–6</pages>
       <url hash="c41eba5f">N09-3001</url>
@@ -1366,7 +1366,7 @@
     <paper id="2">
       <title>Solving the “<fixed-case>W</fixed-case>ho’s <fixed-case>M</fixed-case>ark <fixed-case>J</fixed-case>ohnson <fixed-case>P</fixed-case>uzzle”: <fixed-case>I</fixed-case>nformation Extraction Based Cross Document Coreference</title>
       <author><first>Jian</first><last>Huang</last></author>
-      <author><first>Sarah M.</first><last>Taylor</last></author>
+      <author id="sarah-taylor"><first>Sarah M.</first><last>Taylor</last></author>
       <author><first>Jonathan L.</first><last>Smith</last></author>
       <author><first>Konstantinos A.</first><last>Fotiadis</last></author>
       <author><first>C. Lee</first><last>Giles</last></author>
@@ -1377,7 +1377,7 @@
     <paper id="3">
       <title>Exploring Topic Continuation Follow-up Questions using Machine Learning</title>
       <author><first>Manuel</first><last>Kirschner</last></author>
-      <author><first>Raffaella</first><last>Bernardi</last></author>
+      <author id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last></author>
       <pages>13–18</pages>
       <url hash="f27e4a5e">N09-3003</url>
       <bibkey>kirschner-bernardi-2009-exploring</bibkey>
@@ -1393,7 +1393,7 @@
     <paper id="5">
       <title>Using Language Modeling to Select Useful Annotation Data</title>
       <author><first>Dmitriy</first><last>Dligach</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>25–30</pages>
       <url hash="97332182">N09-3005</url>
       <bibkey>dligach-palmer-2009-using</bibkey>
@@ -1409,7 +1409,7 @@
       <title>Building a Semantic Lexicon of <fixed-case>E</fixed-case>nglish Nouns via Bootstrapping</title>
       <author><first>Ting</first><last>Qian</last></author>
       <author><first>Benjamin</first><last>Van Durme</last></author>
-      <author><first>Lenhart</first><last>Schubert</last></author>
+      <author id="lenhart-schubert"><first>Lenhart</first><last>Schubert</last></author>
       <pages>37–42</pages>
       <url hash="205bf588">N09-3007</url>
       <bibkey>qian-etal-2009-building</bibkey>
@@ -1434,7 +1434,7 @@
     <paper id="10">
       <title>Interactive Annotation Learning with Indirect Feature Voting</title>
       <author><first>Shilpa</first><last>Arora</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <pages>55–60</pages>
       <url hash="a374fe18">N09-3010</url>
       <bibkey>arora-nyberg-2009-interactive</bibkey>
@@ -1443,7 +1443,7 @@
       <title>Loss-Sensitive Discriminative Training of Machine Transliteration Models</title>
       <author><first>Kedar</first><last>Bellare</last></author>
       <author><first>Koby</first><last>Crammer</last></author>
-      <author><first>Dayne</first><last>Freitag</last></author>
+      <author id="dayne-freitag"><first>Dayne</first><last>Freitag</last></author>
       <pages>61–65</pages>
       <url hash="a13c79bd">N09-3011</url>
       <bibkey>bellare-etal-2009-loss</bibkey>
@@ -1460,9 +1460,9 @@
     <paper id="13">
       <title>Towards Building a Competitive Opinion Summarization System: <fixed-case>C</fixed-case>hallenges and Keys</title>
       <author><first>Elena</first><last>Lloret</last></author>
-      <author><first>Alexandra</first><last>Balahur</last></author>
-      <author><first>Manuel</first><last>Palomar</last></author>
-      <author><first>Andrés</first><last>Montoyo</last></author>
+      <author id="alexandra-balahur"><first>Alexandra</first><last>Balahur</last></author>
+      <author id="manuel-palomar"><first>Manuel</first><last>Palomar</last></author>
+      <author id="andres-montoyo"><first>Andrés</first><last>Montoyo</last></author>
       <pages>72–77</pages>
       <url hash="58bd315e">N09-3013</url>
       <bibkey>lloret-etal-2009-towards</bibkey>
@@ -1485,7 +1485,7 @@
     <paper id="16">
       <title>Modeling Letter-to-Phoneme Conversion as a Phrase Based Statistical Machine Translation Problem with <fixed-case>M</fixed-case>inimum <fixed-case>E</fixed-case>rror <fixed-case>R</fixed-case>ate Training</title>
       <author><first>Taraka</first><last>Rama</last></author>
-      <author><first>Anil Kumar</first><last>Singh</last></author>
+      <author id="anil-kumar-singh"><first>Anil Kumar</first><last>Singh</last></author>
       <author><first>Sudheer</first><last>Kolachina</last></author>
       <pages>90–95</pages>
       <url hash="b5aeaa75">N09-3016</url>
@@ -1505,7 +1505,7 @@
       <booktitle>Proceedings of Human Language Technologies: The 2009 Annual Conference of the North <fixed-case>A</fixed-case>merican Chapter of the Association for Computational Linguistics, Companion Volume: Tutorial Abstracts</booktitle>
       <url hash="ee31140b">N09-4</url>
       <editor><first>Ciprian</first><last>Chelba</last></editor>
-      <editor><first>Paul</first><last>Kantor</last></editor>
+      <editor id="paul-kantor"><first>Paul</first><last>Kantor</last></editor>
       <editor><first>Brian</first><last>Roark</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Boulder, Colorado</address>
@@ -1520,7 +1520,7 @@
     <paper id="1">
       <title>Data Intensive Text Processing with <fixed-case>M</fixed-case>ap<fixed-case>R</fixed-case>educe</title>
       <author><first>Jimmy</first><last>Lin</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <pages>1–2</pages>
       <url hash="f48ba9a2">N09-4001</url>
       <bibkey>lin-dyer-2009-data</bibkey>
@@ -1542,7 +1542,7 @@
     </paper>
     <paper id="4">
       <title>Extracting World and Linguistic Knowledge from <fixed-case>W</fixed-case>ikipedia</title>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <author><first>Michael</first><last>Strube</last></author>
       <pages>7–8</pages>
       <url hash="6e4c895c">N09-4004</url>
@@ -1550,7 +1550,7 @@
     </paper>
     <paper id="5">
       <title><fixed-case>O</fixed-case>pen<fixed-case>F</fixed-case>st: An Open-Source, Weighted Finite-State Transducer Library and its Applications to Speech and Language</title>
-      <author><first>Michael</first><last>Riley</last></author>
+      <author id="michael-riley"><first>Michael</first><last>Riley</last></author>
       <author><first>Cyril</first><last>Allauzen</last></author>
       <author><first>Martin</first><last>Jansche</last></author>
       <pages>9–10</pages>
@@ -1559,7 +1559,7 @@
     </paper>
     <paper id="6">
       <title><fixed-case>O</fixed-case>nto<fixed-case>N</fixed-case>otes: The 90% Solution</title>
-      <author><first>Sameer S.</first><last>Pradhan</last></author>
+      <author id="sameer-pradhan"><first>Sameer S.</first><last>Pradhan</last></author>
       <author><first>Nianwen</first><last>Xue</last></author>
       <pages>11–12</pages>
       <url hash="bfa58762">N09-4006</url>
@@ -1567,9 +1567,9 @@
     </paper>
     <paper id="7">
       <title><fixed-case>V</fixed-case>erb<fixed-case>N</fixed-case>et overview, extensions, mappings and applications</title>
-      <author><first>Karin</first><last>Kipper Schuler</last></author>
+      <author id="karin-kipper"><first>Karin</first><last>Kipper Schuler</last></author>
       <author><first>Anna</first><last>Korhonen</last></author>
-      <author><first>Susan</first><last>Brown</last></author>
+      <author id="susan-windisch-brown"><first>Susan</first><last>Brown</last></author>
       <pages>13–14</pages>
       <url hash="bf26ee27">N09-4007</url>
       <bibkey>kipper-schuler-etal-2009-verbnet</bibkey>
@@ -1577,7 +1577,7 @@
     <paper id="8">
       <title>Writing Systems, Transliteration and Decipherment</title>
       <author><first>Kevin</first><last>Knight</last></author>
-      <author><first>Richard</first><last>Sproat</last></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last></author>
       <pages>15–16</pages>
       <url hash="35fd4eaa">N09-4008</url>
       <bibkey>knight-sproat-2009-writing</bibkey>
@@ -1587,7 +1587,7 @@
     <meta>
       <booktitle>Proceedings of Human Language Technologies: The 2009 Annual Conference of the North <fixed-case>A</fixed-case>merican Chapter of the Association for Computational Linguistics, Companion Volume: Demonstration Session</booktitle>
       <url hash="d6fa87d7">N09-5</url>
-      <editor><first>Michael</first><last>Johnston</last></editor>
+      <editor id="michael-johnston"><first>Michael</first><last>Johnston</last></editor>
       <editor><first>Fred</first><last>Popowich</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Boulder, Colorado</address>
@@ -1610,16 +1610,16 @@
     <paper id="2">
       <title>Building Conversational Agents with Basilica</title>
       <author><first>Rohit</first><last>Kumar</last></author>
-      <author><first>Carolyn P.</first><last>Rosé</last></author>
-      <author><first>Michael J.</first><last>Witbrock</last></author>
+      <author id="carolyn-rose"><first>Carolyn P.</first><last>Rosé</last></author>
+      <author id="michael-j-witbrock"><first>Michael J.</first><last>Witbrock</last></author>
       <pages>5–8</pages>
       <url hash="a6652f7c">N09-5002</url>
       <bibkey>kumar-etal-2009-building</bibkey>
     </paper>
     <paper id="3">
       <title><fixed-case>STAT</fixed-case>: Speech Transcription Analysis Tool</title>
-      <author><first>Stephen A.</first><last>Kunath</last></author>
-      <author><first>Steven H.</first><last>Weinberger</last></author>
+      <author id="stephen-kunath"><first>Stephen A.</first><last>Kunath</last></author>
+      <author id="steven-h-weinberger"><first>Steven H.</first><last>Weinberger</last></author>
       <pages>9–12</pages>
       <url hash="d7f36c38">N09-5003</url>
       <bibkey>kunath-weinberger-2009-stat</bibkey>
diff --git a/data/xml/N10.xml b/data/xml/N10.xml
index 82a78cdeb0..627e4e5842 100644
--- a/data/xml/N10.xml
+++ b/data/xml/N10.xml
@@ -4,9 +4,9 @@
     <meta>
       <booktitle>Human Language Technologies: The 2010 Annual Conference of the North <fixed-case>A</fixed-case>merican Chapter of the Association for Computational Linguistics</booktitle>
       <url hash="84575e54">N10-1</url>
-      <editor><first>Ron</first><last>Kaplan</last></editor>
+      <editor id="ronald-m-kaplan"><first>Ron</first><last>Kaplan</last></editor>
       <editor><first>Jill</first><last>Burstein</last></editor>
-      <editor><first>Mary</first><last>Harper</last></editor>
+      <editor id="mary-harper"><first>Mary</first><last>Harper</last></editor>
       <editor><first>Gerald</first><last>Penn</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Los Angeles, California</address>
@@ -28,9 +28,9 @@
     <paper id="2">
       <title>Chart Mining-based Lexical Acquisition with Precision Grammars</title>
       <author><first>Yi</first><last>Zhang</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Valia</first><last>Kordoni</last></author>
-      <author><first>David</first><last>Martinez</last></author>
+      <author id="david-martinez"><first>David</first><last>Martinez</last></author>
       <author><first>Jeremy</first><last>Nicholson</last></author>
       <pages>10–18</pages>
       <url hash="93d07058">N10-1002</url>
@@ -71,7 +71,7 @@
     <paper id="7">
       <title>Qme! : A Speech-based Question-Answering system on Mobile Devices</title>
       <author><first>Taniya</first><last>Mishra</last></author>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
       <pages>55–63</pages>
       <url hash="acc06a2d">N10-1007</url>
       <bibkey>mishra-bangalore-2010-qme</bibkey>
@@ -79,8 +79,8 @@
     <paper id="8">
       <title>Dialogue-Oriented Review Summary Generation for Spoken Dialogue Recommendation Systems</title>
       <author><first>Jingjing</first><last>Liu</last></author>
-      <author><first>Stephanie</first><last>Seneff</last></author>
-      <author><first>Victor</first><last>Zue</last></author>
+      <author id="stephanie-seneff"><first>Stephanie</first><last>Seneff</last></author>
+      <author id="victor-zue"><first>Victor</first><last>Zue</last></author>
       <pages>64–72</pages>
       <url hash="1370d72e">N10-1008</url>
       <bibkey>liu-etal-2010-dialogue</bibkey>
@@ -88,7 +88,7 @@
     <paper id="9">
       <title>Minimally-Supervised Extraction of Entities from Text Advertisements</title>
       <author><first>Sameer</first><last>Singh</last></author>
-      <author><first>Dustin</first><last>Hillard</last></author>
+      <author id="dustin-hillard"><first>Dustin</first><last>Hillard</last></author>
       <author><first>Chris</first><last>Leggetter</last></author>
       <pages>73–81</pages>
       <url hash="26e15360">N10-1009</url>
@@ -96,7 +96,7 @@
     </paper>
     <paper id="10">
       <title>Taxonomy Learning Using Word Sense Induction</title>
-      <author><first>Ioannis P.</first><last>Klapaftis</last></author>
+      <author id="ioannis-klapaftis"><first>Ioannis P.</first><last>Klapaftis</last></author>
       <author><first>Suresh</first><last>Manandhar</last></author>
       <pages>82–90</pages>
       <url hash="f6b7ba38">N10-1010</url>
@@ -115,7 +115,7 @@
       <author><first>David</first><last>Newman</last></author>
       <author><first>Jey Han</first><last>Lau</last></author>
       <author><first>Karl</first><last>Grieser</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>100–108</pages>
       <url hash="81942c08">N10-1012</url>
       <bibkey>newman-etal-2010-automatic</bibkey>
@@ -123,7 +123,7 @@
     <paper id="13">
       <title>Multi-Prototype Vector-Space Models of Word Meaning</title>
       <author><first>Joseph</first><last>Reisinger</last></author>
-      <author><first>Raymond J.</first><last>Mooney</last></author>
+      <author id="raymond-mooney"><first>Raymond J.</first><last>Mooney</last></author>
       <pages>109–117</pages>
       <url hash="fb8631df">N10-1013</url>
       <bibkey>reisinger-mooney-2010-multi</bibkey>
@@ -149,7 +149,7 @@
     </paper>
     <paper id="16">
       <title>Learning Translation Boundaries for Phrase-Based Decoding</title>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Min</first><last>Zhang</last></author>
       <author><first>Haizhou</first><last>Li</last></author>
       <pages>136–144</pages>
@@ -183,14 +183,14 @@
       <title>Unsupervised Modeling of <fixed-case>T</fixed-case>witter Conversations</title>
       <author><first>Alan</first><last>Ritter</last></author>
       <author><first>Colin</first><last>Cherry</last></author>
-      <author><first>Bill</first><last>Dolan</last></author>
+      <author id="william-b-dolan"><first>Bill</first><last>Dolan</last></author>
       <pages>172–180</pages>
       <url hash="452a7614">N10-1020</url>
       <bibkey>ritter-etal-2010-unsupervised</bibkey>
     </paper>
     <paper id="21">
       <title>Streaming First Story Detection with application to <fixed-case>T</fixed-case>witter</title>
-      <author><first>Saša</first><last>Petrović</last></author>
+      <author id="sasa-petrovic"><first>Saša</first><last>Petrović</last></author>
       <author><first>Miles</first><last>Osborne</last></author>
       <author><first>Victor</first><last>Lavrenko</last></author>
       <pages>181–189</pages>
@@ -200,9 +200,9 @@
     <paper id="22">
       <title>Unsupervised Model Adaptation using Information-Theoretic Criterion</title>
       <author><first>Ariya</first><last>Rastrow</last></author>
-      <author><first>Frederick</first><last>Jelinek</last></author>
+      <author id="frederick-jelinek"><first>Frederick</first><last>Jelinek</last></author>
       <author><first>Abhinav</first><last>Sethy</last></author>
-      <author><first>Bhuvana</first><last>Ramabhadran</last></author>
+      <author id="bhuvana-ramabhadran"><first>Bhuvana</first><last>Ramabhadran</last></author>
       <pages>190–197</pages>
       <url hash="1f936ff9">N10-1022</url>
       <bibkey>rastrow-etal-2010-unsupervised</bibkey>
@@ -227,7 +227,7 @@
       <author><first>Carolina</first><last>Parada</last></author>
       <author><first>Mark</first><last>Dredze</last></author>
       <author><first>Denis</first><last>Filimonov</last></author>
-      <author><first>Frederick</first><last>Jelinek</last></author>
+      <author id="frederick-jelinek"><first>Frederick</first><last>Jelinek</last></author>
       <pages>216–224</pages>
       <url hash="cfc4a6ce">N10-1025</url>
       <bibkey>parada-etal-2010-contextual</bibkey>
@@ -242,7 +242,7 @@
     </paper>
     <paper id="27">
       <title>Language Identification: The Long and the Short of the Matter</title>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Marco</first><last>Lui</last></author>
       <pages>229–237</pages>
       <url hash="a74c2bc9">N10-1027</url>
@@ -250,8 +250,8 @@
     </paper>
     <paper id="28">
       <title>Inducing Synchronous Grammars with Slice Sampling</title>
-      <author><first>Phil</first><last>Blunsom</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>238–241</pages>
       <url hash="4d2291dd">N10-1028</url>
       <bibkey>blunsom-cohn-2010-inducing</bibkey>
@@ -259,7 +259,7 @@
     <paper id="29">
       <title>Task-based Evaluation of Multiword Expressions: a Pilot Study in Statistical Machine Translation</title>
       <author><first>Marine</first><last>Carpuat</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>242–245</pages>
       <url hash="debfa57c">N10-1029</url>
       <bibkey>carpuat-diab-2010-task</bibkey>
@@ -276,7 +276,7 @@
     <paper id="31">
       <title>Extending the <fixed-case>METEOR</fixed-case> Machine Translation Evaluation Metric to the Phrase Level</title>
       <author><first>Michael</first><last>Denkowski</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <pages>250–253</pages>
       <url hash="b9f8fd2b">N10-1031</url>
       <bibkey>denkowski-lavie-2010-extending</bibkey>
@@ -290,7 +290,7 @@
     </paper>
     <paper id="33">
       <title>Two monolingual parses are better than one (synchronous parse)</title>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <pages>263–266</pages>
       <url hash="cbdcdcca">N10-1033</url>
       <bibkey>dyer-2010-two</bibkey>
@@ -318,7 +318,7 @@
       <author><first>Zheng</first><last>Chen</last></author>
       <author><first>Jonathan</first><last>Feldman</last></author>
       <author><first>Antonio</first><last>Gonzalez</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <author><first>Vivek</first><last>Upadhyay</last></author>
       <pages>285–288</pages>
       <url hash="343d5b58">N10-1036</url>
@@ -328,7 +328,7 @@
       <title>Evaluation Metrics for the Lexical Substitution Task</title>
       <author><first>Sanaz</first><last>Jabbari</last></author>
       <author><first>Mark</first><last>Hepple</last></author>
-      <author><first>Louise</first><last>Guthrie</last></author>
+      <author id="louise-guthrie"><first>Louise</first><last>Guthrie</last></author>
       <pages>289–292</pages>
       <url hash="b73cad99">N10-1037</url>
       <bibkey>jabbari-etal-2010-evaluation</bibkey>
@@ -338,7 +338,7 @@
       <author><first>Mahesh</first><last>Joshi</last></author>
       <author><first>Dipanjan</first><last>Das</last></author>
       <author><first>Kevin</first><last>Gimpel</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>293–296</pages>
       <url hash="574d207b">N10-1038</url>
       <bibkey>joshi-etal-2010-movie</bibkey>
@@ -364,7 +364,7 @@
       <title>Putting the User in the Loop: Interactive Maximal Marginal Relevance for Query-Focused Summarization</title>
       <author><first>Jimmy</first><last>Lin</last></author>
       <author><first>Nitin</first><last>Madnani</last></author>
-      <author><first>Bonnie</first><last>Dorr</last></author>
+      <author id="bonnie-dorr"><first>Bonnie</first><last>Dorr</last></author>
       <pages>305–308</pages>
       <url hash="f5a89f35">N10-1041</url>
       <bibkey>lin-etal-2010-putting</bibkey>
@@ -389,7 +389,7 @@
     </paper>
     <paper id="44">
       <title>Time-Efficient Creation of an Accurate Sentence Fusion Corpus</title>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <author><first>Sara</first><last>Rosenthal</last></author>
       <author><first>Kapil</first><last>Thadani</last></author>
       <author><first>Coleman</first><last>Moore</last></author>
@@ -400,7 +400,7 @@
     <paper id="45">
       <title>Towards Cross-Lingual Textual Entailment</title>
       <author><first>Yashar</first><last>Mehdad</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marcello</first><last>Federico</last></author>
       <pages>321–324</pages>
       <url hash="95152d8e">N10-1045</url>
@@ -409,7 +409,7 @@
     <paper id="46">
       <title>A Comparative Study of Word Co-occurrence for Term Clustering in Language Model-based Sentence Retrieval</title>
       <author><first>Saeedeh</first><last>Momtazi</last></author>
-      <author><first>Sanjeev</first><last>Khudanpur</last></author>
+      <author id="sanjeev-khudanpur"><first>Sanjeev</first><last>Khudanpur</last></author>
       <author><first>Dietrich</first><last>Klakow</last></author>
       <pages>325–328</pages>
       <url hash="adc5369b">N10-1046</url>
@@ -432,7 +432,7 @@
     </paper>
     <paper id="49">
       <title>The Simple Truth about Dependency and Phrase Structure Representations: An Opinion Piece</title>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>337–340</pages>
       <url hash="4da762a2">N10-1049</url>
       <bibkey>rambow-2010-simple</bibkey>
@@ -448,11 +448,11 @@
     </paper>
     <paper id="51">
       <title>Crowdsourcing the evaluation of a domain-adapted named entity recognition system</title>
-      <author><first>Asad B.</first><last>Sayeed</last></author>
+      <author id="asad-sayeed"><first>Asad B.</first><last>Sayeed</last></author>
       <author><first>Timothy J.</first><last>Meyer</last></author>
       <author><first>Hieu C.</first><last>Nguyen</last></author>
       <author><first>Olivia</first><last>Buzek</last></author>
-      <author><first>Amy</first><last>Weinberg</last></author>
+      <author id="amy-weinberg"><first>Amy</first><last>Weinberg</last></author>
       <pages>345–348</pages>
       <url hash="0f63a975">N10-1051</url>
       <bibkey>sayeed-etal-2010-crowdsourcing</bibkey>
@@ -501,7 +501,7 @@
     </paper>
     <paper id="57">
       <title>Predicting Human-Targeted Translation Edit Rate via Untrained Human Annotators</title>
-      <author><first>Omar F.</first><last>Zaidan</last></author>
+      <author id="omar-zaidan"><first>Omar F.</first><last>Zaidan</last></author>
       <author><first>Chris</first><last>Callison-Burch</last></author>
       <pages>369–372</pages>
       <url hash="7302ffa7">N10-1057</url>
@@ -510,8 +510,8 @@
     <paper id="58">
       <title>Improving Semantic Role Classification with Selectional Preferences</title>
       <author><first>Beñat</first><last>Zapirain</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <author><first>Mihai</first><last>Surdeanu</last></author>
       <pages>373–376</pages>
       <url hash="0ff938b5">N10-1058</url>
@@ -553,7 +553,7 @@
     </paper>
     <paper id="63">
       <title>Extracting Parallel Sentences from Comparable Corpora using Document Level Alignment</title>
-      <author><first>Jason R.</first><last>Smith</last></author>
+      <author id="jason-smith"><first>Jason R.</first><last>Smith</last></author>
       <author><first>Chris</first><last>Quirk</last></author>
       <author><first>Kristina</first><last>Toutanova</last></author>
       <pages>403–411</pages>
@@ -571,9 +571,9 @@
     </paper>
     <paper id="65">
       <title>Everybody loves a rich cousin: An empirical study of transliteration through bridge languages</title>
-      <author><first>Mitesh M.</first><last>Khapra</last></author>
-      <author><first>A</first><last>Kumaran</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh M.</first><last>Khapra</last></author>
+      <author id="a-kumaran"><first>A</first><last>Kumaran</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>420–428</pages>
       <url hash="aa9bce98">N10-1065</url>
       <bibkey>khapra-etal-2010-everybody</bibkey>
@@ -610,16 +610,16 @@
     <paper id="69">
       <title>Distributed Training Strategies for the Structured Perceptron</title>
       <author><first>Ryan</first><last>McDonald</last></author>
-      <author><first>Keith</first><last>Hall</last></author>
-      <author><first>Gideon</first><last>Mann</last></author>
+      <author id="keith-hall"><first>Keith</first><last>Hall</last></author>
+      <author id="gideon-mann"><first>Gideon</first><last>Mann</last></author>
       <pages>456–464</pages>
       <url hash="0c754f6d">N10-1069</url>
       <bibkey>mcdonald-etal-2010-distributed</bibkey>
     </paper>
     <paper id="70">
       <title>Term Weighting Schemes for <fixed-case>L</fixed-case>atent <fixed-case>D</fixed-case>irichlet <fixed-case>A</fixed-case>llocation</title>
-      <author><first>Andrew T.</first><last>Wilson</last></author>
-      <author><first>Peter A.</first><last>Chew</last></author>
+      <author id="andrew-wilson"><first>Andrew T.</first><last>Wilson</last></author>
+      <author id="peter-a-chew"><first>Peter A.</first><last>Chew</last></author>
       <pages>465–473</pages>
       <url hash="afbd4fa6">N10-1070</url>
       <bibkey>wilson-chew-2010-term</bibkey>
@@ -628,9 +628,9 @@
       <title>Learning Dense Models of Query Similarity from User Click Logs</title>
       <author><first>Fabio</first><last>De Bona</last></author>
       <author><first>Stefan</first><last>Riezler</last></author>
-      <author><first>Keith</first><last>Hall</last></author>
+      <author id="keith-hall"><first>Keith</first><last>Hall</last></author>
       <author><first>Massimiliano</first><last>Ciaramita</last></author>
-      <author><first>Amaç</first><last>Herdaǧdelen</last></author>
+      <author id="amac-herdagdelen"><first>Amaç</first><last>Herdaǧdelen</last></author>
       <author><first>Maria</first><last>Holmqvist</last></author>
       <pages>474–482</pages>
       <url hash="b49f67a9">N10-1071</url>
@@ -648,17 +648,17 @@
     </paper>
     <paper id="73">
       <title>Improving the Multilingual User Experience of <fixed-case>W</fixed-case>ikipedia Using Cross-Language Name Search</title>
-      <author><first>Raghavendra</first><last>Udupa</last></author>
-      <author><first>Mitesh M.</first><last>Khapra</last></author>
+      <author id="raghavendra-udupa"><first>Raghavendra</first><last>Udupa</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh M.</first><last>Khapra</last></author>
       <pages>492–500</pages>
       <url hash="f6b4848e">N10-1073</url>
       <bibkey>udupa-khapra-2010-improving</bibkey>
     </paper>
     <paper id="74">
       <title>Learning Words and Their Meanings from Unsegmented Child-directed Speech</title>
-      <author><first>Bevan K.</first><last>Jones</last></author>
+      <author id="bevan-jones"><first>Bevan K.</first><last>Jones</last></author>
       <author><first>Mark</first><last>Johnson</last></author>
-      <author><first>Michael C.</first><last>Frank</last></author>
+      <author id="michael-c-frank"><first>Michael C.</first><last>Frank</last></author>
       <pages>501–509</pages>
       <url hash="6d201241">N10-1074</url>
       <bibkey>jones-etal-2010-learning</bibkey>
@@ -666,7 +666,7 @@
     <paper id="75">
       <title>Subword Variation in Text Message Classification</title>
       <author><first>Robert</first><last>Munro</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>510–518</pages>
       <url hash="323a9293">N10-1075</url>
       <bibkey>munro-manning-2010-subword</bibkey>
@@ -675,7 +675,7 @@
       <title>Automatic Diacritization for Low-Resource Languages Using a Hybrid Word and Consonant <fixed-case>CMM</fixed-case></title>
       <author><first>Robbie</first><last>Haertel</last></author>
       <author><first>Peter</first><last>McClanahan</last></author>
-      <author><first>Eric K.</first><last>Ringger</last></author>
+      <author id="eric-ringger"><first>Eric K.</first><last>Ringger</last></author>
       <pages>519–527</pages>
       <url hash="824ea0e3">N10-1076</url>
       <bibkey>haertel-etal-2010-automatic</bibkey>
@@ -697,9 +697,9 @@
     </paper>
     <paper id="79">
       <title>Online Learning for Interactive Statistical Machine Translation</title>
-      <author><first>Daniel</first><last>Ortiz-Martínez</last></author>
-      <author><first>Ismael</first><last>García-Varea</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="daniel-ortiz-martinez"><first>Daniel</first><last>Ortiz-Martínez</last></author>
+      <author id="ismael-garcia-varea"><first>Ismael</first><last>García-Varea</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <pages>546–554</pages>
       <url hash="7cb60d39">N10-1079</url>
       <bibkey>ortiz-martinez-etal-2010-online</bibkey>
@@ -707,17 +707,17 @@
     <paper id="80">
       <title>The Best Lexical Metric for Phrase-Based Statistical <fixed-case>MT</fixed-case> System Optimization</title>
       <author><first>Daniel</first><last>Cer</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
-      <author><first>Daniel</first><last>Jurafsky</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
+      <author id="dan-jurafsky"><first>Daniel</first><last>Jurafsky</last></author>
       <pages>555–563</pages>
       <url hash="7438bb66">N10-1080</url>
       <bibkey>cer-etal-2010-best</bibkey>
     </paper>
     <paper id="81">
       <title>Variational Inference for <fixed-case>A</fixed-case>daptor <fixed-case>G</fixed-case>rammars</title>
-      <author><first>Shay B.</first><last>Cohen</last></author>
-      <author><first>David M.</first><last>Blei</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
+      <author id="david-blei"><first>David M.</first><last>Blei</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>564–572</pages>
       <url hash="edaf8450">N10-1081</url>
       <bibkey>cohen-etal-2010-variational</bibkey>
@@ -725,7 +725,7 @@
     <paper id="82">
       <title>Type-Based <fixed-case>MCMC</fixed-case></title>
       <author><first>Percy</first><last>Liang</last></author>
-      <author><first>Michael I.</first><last>Jordan</last></author>
+      <author id="michael-i-jordan"><first>Michael I.</first><last>Jordan</last></author>
       <author><first>Dan</first><last>Klein</last></author>
       <pages>573–581</pages>
       <url hash="57182040">N10-1082</url>
@@ -734,7 +734,7 @@
     <paper id="83">
       <title>Painless Unsupervised Learning with Features</title>
       <author><first>Taylor</first><last>Berg-Kirkpatrick</last></author>
-      <author><first>Alexandre</first><last>Bouchard-Côté</last></author>
+      <author id="alexandre-bouchard-cote"><first>Alexandre</first><last>Bouchard-Côté</last></author>
       <author><first>John</first><last>DeNero</last></author>
       <author><first>Dan</first><last>Klein</last></author>
       <pages>582–590</pages>
@@ -743,7 +743,7 @@
     </paper>
     <paper id="84">
       <title>Linguistic Steganography Using Automatically Generated Paraphrases</title>
-      <author><first>Ching-Yun</first><last>Chang</last></author>
+      <author id="ching-yun-chang"><first>Ching-Yun</first><last>Chang</last></author>
       <author><first>Stephen</first><last>Clark</last></author>
       <pages>591–599</pages>
       <url hash="e204896b">N10-1084</url>
@@ -761,7 +761,7 @@
     <paper id="86">
       <title>Good Question! Statistical Ranking for Question Generation</title>
       <author><first>Michael</first><last>Heilman</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>609–617</pages>
       <url hash="75258b5c">N10-1086</url>
       <bibkey>heilman-smith-2010-good</bibkey>
@@ -769,7 +769,7 @@
     <paper id="87">
       <title>Not All Seeds Are Equal: Measuring the Quality of Text Mining Seeds</title>
       <author><first>Zornitsa</first><last>Kozareva</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>618–626</pages>
       <url hash="d43fbf7b">N10-1087</url>
       <bibkey>kozareva-hovy-2010-seeds</bibkey>
@@ -792,9 +792,9 @@
     </paper>
     <paper id="90">
       <title>A Simple Approach for <fixed-case>HPSG</fixed-case> Supertagging Using Dependency Information</title>
-      <author><first>Yao-zhong</first><last>Zhang</last></author>
+      <author id="yao-zhong-zhang"><first>Yao-zhong</first><last>Zhang</last></author>
       <author><first>Takuya</first><last>Matsuzaki</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>645–648</pages>
       <url hash="1cdf0579">N10-1090</url>
       <bibkey>zhang-etal-2010-simple</bibkey>
@@ -802,7 +802,7 @@
     <paper id="91">
       <title>Ensemble Models for Dependency Parsing: Cheap and Good?</title>
       <author><first>Mihai</first><last>Surdeanu</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>649–652</pages>
       <url hash="239aece5">N10-1091</url>
       <bibkey>surdeanu-manning-2010-ensemble</bibkey>
@@ -810,8 +810,8 @@
     <paper id="92">
       <title>Enlarged Search Space for <fixed-case>SITG</fixed-case> Parsing</title>
       <author><first>Guillem</first><last>Gascó</last></author>
-      <author><first>Joan-Andreu</first><last>Sánchez</last></author>
-      <author><first>José-Miguel</first><last>Benedí</last></author>
+      <author id="joan-andreu-sanchez"><first>Joan-Andreu</first><last>Sánchez</last></author>
+      <author id="jose-miguel-benedi"><first>José-Miguel</first><last>Benedí</last></author>
       <pages>653–656</pages>
       <url hash="5bb24924">N10-1092</url>
       <bibkey>gasco-etal-2010-enlarged</bibkey>
@@ -821,7 +821,7 @@
       <author><first>Phani</first><last>Gadde</last></author>
       <author><first>Karan</first><last>Jindal</last></author>
       <author><first>Samar</first><last>Husain</last></author>
-      <author><first>Dipti Misra</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></author>
       <author><first>Rajeev</first><last>Sangal</last></author>
       <pages>657–660</pages>
       <url hash="f77832c9">N10-1093</url>
@@ -845,9 +845,9 @@
     </paper>
     <paper id="96">
       <title>An Exploration of Off Topic Conversation</title>
-      <author><first>Whitney L.</first><last>Cade</last></author>
+      <author id="whitney-l-cade"><first>Whitney L.</first><last>Cade</last></author>
       <author><first>Blair A.</first><last>Lehman</last></author>
-      <author><first>Andrew</first><last>Olney</last></author>
+      <author id="andrew-olney"><first>Andrew</first><last>Olney</last></author>
       <pages>669–672</pages>
       <url hash="0e1d3b04">N10-1096</url>
       <bibkey>cade-etal-2010-exploration</bibkey>
@@ -855,7 +855,7 @@
     <paper id="97">
       <title>Making Conversational Structure Explicit: Identification of Initiation-response Pairs within Online Discussions</title>
       <author><first>Yi-Chia</first><last>Wang</last></author>
-      <author><first>Carolyn P.</first><last>Rosé</last></author>
+      <author id="carolyn-rose"><first>Carolyn P.</first><last>Rosé</last></author>
       <pages>673–676</pages>
       <url hash="9be67495">N10-1097</url>
       <bibkey>wang-rose-2010-making</bibkey>
@@ -863,7 +863,7 @@
     <paper id="98">
       <title>Engaging learning groups using Social Interaction Strategies</title>
       <author><first>Rohit</first><last>Kumar</last></author>
-      <author><first>Carolyn P.</first><last>Rosé</last></author>
+      <author id="carolyn-rose"><first>Carolyn P.</first><last>Rosé</last></author>
       <pages>677–680</pages>
       <url hash="4623dc14">N10-1098</url>
       <bibkey>kumar-rose-2010-engaging</bibkey>
@@ -871,7 +871,7 @@
     <paper id="99">
       <title>Using Entity-Based Features to Model Coherence in Student Essays</title>
       <author><first>Jill</first><last>Burstein</last></author>
-      <author><first>Joel</first><last>Tetreault</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
       <author><first>Slava</first><last>Andreyev</last></author>
       <pages>681–684</pages>
       <url hash="a6fff05c">N10-1099</url>
@@ -881,7 +881,7 @@
       <title>Summarizing Microblogs Automatically</title>
       <author><first>Beaux</first><last>Sharifi</last></author>
       <author><first>Mark-Anthony</first><last>Hutton</last></author>
-      <author><first>Jugal</first><last>Kalita</last></author>
+      <author id="jugal-kalita"><first>Jugal</first><last>Kalita</last></author>
       <pages>685–688</pages>
       <url hash="37cd48cb">N10-1100</url>
       <bibkey>sharifi-etal-2010-summarizing</bibkey>
@@ -890,7 +890,7 @@
       <title>Automatic Generation of Personalized Annotation Tags for <fixed-case>T</fixed-case>witter Users</title>
       <author><first>Wei</first><last>Wu</last></author>
       <author><first>Bin</first><last>Zhang</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
       <pages>689–692</pages>
       <url hash="cd2e10ca">N10-1101</url>
       <bibkey>wu-etal-2010-automatic</bibkey>
@@ -916,8 +916,8 @@
     <paper id="104">
       <title>A Hybrid Morphologically Decomposed Factored Language Models for <fixed-case>A</fixed-case>rabic <fixed-case>LVCSR</fixed-case></title>
       <author><first>Amr</first><last>El-Desoky</last></author>
-      <author><first>Ralf</first><last>Schlüter</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="ralf-schlueter"><first>Ralf</first><last>Schlüter</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>701–704</pages>
       <url hash="8033095c">N10-1104</url>
       <bibkey>el-desoky-etal-2010-hybrid</bibkey>
@@ -925,7 +925,7 @@
     <paper id="105">
       <title>Is <fixed-case>A</fixed-case>rabic Part of Speech Tagging Feasible Without Word Segmentation?</title>
       <author><first>Emad</first><last>Mohamed</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <pages>705–708</pages>
       <url hash="d66086d3">N10-1105</url>
       <bibkey>mohamed-kubler-2010-arabic-part</bibkey>
@@ -951,7 +951,7 @@
       <author><first>Bin</first><last>Zhang</last></author>
       <author><first>Brian</first><last>Hutchinson</last></author>
       <author><first>Wei</first><last>Wu</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
       <pages>717–720</pages>
       <url hash="840ca226">N10-1108</url>
       <bibkey>zhang-etal-2010-extracting-phrase</bibkey>
@@ -969,7 +969,7 @@
       <author><first>Preethi</first><last>Jyothi</last></author>
       <author><first>William</first><last>Hartmann</last></author>
       <author><first>Jeremy</first><last>Morris</last></author>
-      <author><first>Eric</first><last>Fosler-Lussier</last></author>
+      <author id="eric-fosler-lussier"><first>Eric</first><last>Fosler-Lussier</last></author>
       <pages>725–728</pages>
       <url hash="8639e386">N10-1110</url>
       <bibkey>prabhavalkar-etal-2010-investigations</bibkey>
@@ -987,7 +987,7 @@
     <paper id="112">
       <title>Softmax-Margin <fixed-case>CRF</fixed-case>s: Training Log-Linear Models with Cost Functions</title>
       <author><first>Kevin</first><last>Gimpel</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>733–736</pages>
       <url hash="673c2693">N10-1112</url>
       <bibkey>gimpel-smith-2010-softmax</bibkey>
@@ -995,8 +995,8 @@
     <paper id="113">
       <title>Bitext-Based Resolution of <fixed-case>G</fixed-case>erman Subject-Object Ambiguities</title>
       <author><first>Florian</first><last>Schwarck</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>737–740</pages>
       <url hash="3509d0f5">N10-1113</url>
       <bibkey>schwarck-etal-2010-bitext</bibkey>
@@ -1018,9 +1018,9 @@
     </paper>
     <paper id="116">
       <title>From Baby Steps to Leapfrog: How “Less is More” in Unsupervised Dependency Parsing</title>
-      <author><first>Valentin I.</first><last>Spitkovsky</last></author>
-      <author><first>Hiyan</first><last>Alshawi</last></author>
-      <author><first>Daniel</first><last>Jurafsky</last></author>
+      <author id="valentin-i-spitkovsky"><first>Valentin I.</first><last>Spitkovsky</last></author>
+      <author id="hiyan-alshawi"><first>Hiyan</first><last>Alshawi</last></author>
+      <author id="dan-jurafsky"><first>Daniel</first><last>Jurafsky</last></author>
       <pages>751–759</pages>
       <url hash="30fbe925">N10-1116</url>
       <bibkey>spitkovsky-etal-2010-baby</bibkey>
@@ -1028,7 +1028,7 @@
     <paper id="117">
       <title>Relaxed Marginal Inference and its Application to Dependency Parsing</title>
       <author><first>Sebastian</first><last>Riedel</last></author>
-      <author><first>David A.</first><last>Smith</last></author>
+      <author id="david-a-smith"><first>David A.</first><last>Smith</last></author>
       <pages>760–768</pages>
       <url hash="5900ede6">N10-1117</url>
       <bibkey>riedel-smith-2010-relaxed</bibkey>
@@ -1070,7 +1070,7 @@
     <paper id="122">
       <title>An Unsupervised Aspect-Sentiment Model for Online Reviews</title>
       <author><first>Samuel</first><last>Brody</last></author>
-      <author><first>Noemie</first><last>Elhadad</last></author>
+      <author id="noemie-elhadad"><first>Noemie</first><last>Elhadad</last></author>
       <pages>804–812</pages>
       <url hash="c98abfaa">N10-1122</url>
       <bibkey>brody-elhadad-2010-unsupervised</bibkey>
@@ -1086,7 +1086,7 @@
     <paper id="124">
       <title>Clinical Information Retrieval using Document and <fixed-case>PICO</fixed-case> Structure</title>
       <author><first>Florian</first><last>Boudin</last></author>
-      <author><first>Jian-Yun</first><last>Nie</last></author>
+      <author id="jian-yun-nie"><first>Jian-Yun</first><last>Nie</last></author>
       <author><first>Martin</first><last>Dawes</last></author>
       <pages>822–830</pages>
       <url hash="b0ea61c2">N10-1124</url>
@@ -1102,10 +1102,10 @@
     </paper>
     <paper id="126">
       <title>Learning about Voice Search for Spoken Dialogue Systems</title>
-      <author><first>Rebecca</first><last>Passonneau</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca</first><last>Passonneau</last></author>
       <author><first>Susan L.</first><last>Epstein</last></author>
       <author><first>Tiziana</first><last>Ligorio</last></author>
-      <author><first>Joshua B.</first><last>Gordon</last></author>
+      <author id="joshua-b-gordon"><first>Joshua B.</first><last>Gordon</last></author>
       <author><first>Pravin</first><last>Bhutada</last></author>
       <pages>840–848</pages>
       <url hash="1765a864">N10-1126</url>
@@ -1120,7 +1120,7 @@
     </paper>
     <paper id="128">
       <title>Context-free reordering, finite-state translation</title>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <author><first>Philip</first><last>Resnik</last></author>
       <pages>858–866</pages>
       <url hash="be03e150">N10-1128</url>
@@ -1130,7 +1130,7 @@
       <title>Improved Models of Distortion Cost for Statistical Machine Translation</title>
       <author><first>Spence</first><last>Green</last></author>
       <author><first>Michel</first><last>Galley</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>867–875</pages>
       <url hash="cb40c147">N10-1129</url>
       <bibkey>green-etal-2010-improved</bibkey>
@@ -1144,7 +1144,7 @@
     </paper>
     <paper id="131">
       <title>An extractive supervised two-stage method for sentence compression</title>
-      <author><first>Dimitrios</first><last>Galanis</last></author>
+      <author id="dimitrios-galanis"><first>Dimitrios</first><last>Galanis</last></author>
       <author><first>Ion</first><last>Androutsopoulos</last></author>
       <pages>885–893</pages>
       <url hash="ea5691aa">N10-1131</url>
@@ -1154,7 +1154,7 @@
       <title>Interpretation and Transformation for Abstracting Conversations</title>
       <author><first>Gabriel</first><last>Murray</last></author>
       <author><first>Giuseppe</first><last>Carenini</last></author>
-      <author><first>Raymond</first><last>Ng</last></author>
+      <author id="raymond-ng"><first>Raymond</first><last>Ng</last></author>
       <pages>894–902</pages>
       <url hash="b7d10964">N10-1132</url>
       <bibkey>murray-etal-2010-interpretation</bibkey>
@@ -1162,10 +1162,10 @@
     <paper id="133">
       <title>Quantifying the Limits and Success of Extractive Summarization Systems Across Domains</title>
       <author><first>Hakan</first><last>Ceylan</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <author><first>Umut</first><last>Özertem</last></author>
       <author><first>Elena</first><last>Lloret</last></author>
-      <author><first>Manuel</first><last>Palomar</last></author>
+      <author id="manuel-palomar"><first>Manuel</first><last>Palomar</last></author>
       <pages>903–911</pages>
       <url hash="859d7e47">N10-1133</url>
       <bibkey>ceylan-etal-2010-quantifying</bibkey>
@@ -1173,7 +1173,7 @@
     <paper id="134">
       <title>Multi-document Summarization via Budgeted Maximization of Submodular Functions</title>
       <author><first>Hui</first><last>Lin</last></author>
-      <author><first>Jeff</first><last>Bilmes</last></author>
+      <author id="jeff-bilmes"><first>Jeff</first><last>Bilmes</last></author>
       <pages>912–920</pages>
       <url hash="345e8b81">N10-1134</url>
       <bibkey>lin-bilmes-2010-multi</bibkey>
@@ -1181,7 +1181,7 @@
     <paper id="135">
       <title>Cross-lingual Induction of Selectional Preferences with Bilingual Vector Spaces</title>
       <author><first>Yves</first><last>Peirsman</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <pages>921–929</pages>
       <url hash="1aa92c2f">N10-1135</url>
       <bibkey>peirsman-pado-2010-cross</bibkey>
@@ -1199,7 +1199,7 @@
       <author><first>Dipanjan</first><last>Das</last></author>
       <author><first>Nathan</first><last>Schneider</last></author>
       <author><first>Desai</first><last>Chen</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>948–956</pages>
       <url hash="c890fd03">N10-1138</url>
       <bibkey>das-etal-2010-probabilistic</bibkey>
@@ -1210,7 +1210,7 @@
       <author><first>Shankar</first><last>Kumar</last></author>
       <author><first>Wolfgang</first><last>Macherey</last></author>
       <author><first>Mehryar</first><last>Mohri</last></author>
-      <author><first>Michael</first><last>Riley</last></author>
+      <author id="michael-riley"><first>Michael</first><last>Riley</last></author>
       <pages>957–965</pages>
       <url hash="d716bbde">N10-1139</url>
       <bibkey>allauzen-etal-2010-expected</bibkey>
@@ -1218,7 +1218,7 @@
     <paper id="140">
       <title>Accurate Non-Hierarchical Phrase-Based Translation</title>
       <author><first>Michel</first><last>Galley</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>966–974</pages>
       <url hash="03eb078b">N10-1140</url>
       <bibkey>galley-manning-2010-accurate</bibkey>
@@ -1228,7 +1228,7 @@
       <author><first>John</first><last>DeNero</last></author>
       <author><first>Shankar</first><last>Kumar</last></author>
       <author><first>Ciprian</first><last>Chelba</last></author>
-      <author><first>Franz</first><last>Och</last></author>
+      <author id="franz-josef-och"><first>Franz</first><last>Och</last></author>
       <pages>975–983</pages>
       <url hash="92e37867">N10-1141</url>
       <bibkey>denero-etal-2010-model</bibkey>
@@ -1237,7 +1237,7 @@
       <title>Detecting Emails Containing Requests for Action</title>
       <author><first>Andrew</first><last>Lampert</last></author>
       <author><first>Robert</first><last>Dale</last></author>
-      <author><first>Cecile</first><last>Paris</last></author>
+      <author id="cecile-paris"><first>Cecile</first><last>Paris</last></author>
       <pages>984–992</pages>
       <url hash="2d312148">N10-1142</url>
       <bibkey>lampert-etal-2010-detecting</bibkey>
@@ -1260,7 +1260,7 @@
     <paper id="145">
       <title>Tree Edit Models for Recognizing Textual Entailments, Paraphrases, and Answers to Questions</title>
       <author><first>Michael</first><last>Heilman</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>1011–1019</pages>
       <url hash="24f31191">N10-1145</url>
       <bibkey>heilman-smith-2010-tree</bibkey>
@@ -1269,7 +1269,7 @@
       <title>Syntactic/Semantic Structures for Textual Entailment Recognition</title>
       <author><first>Yashar</first><last>Mehdad</last></author>
       <author><first>Alessandro</first><last>Moschitti</last></author>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last></author>
       <pages>1020–1028</pages>
       <url hash="ea289030">N10-1146</url>
       <bibkey>mehdad-etal-2010-syntactic</bibkey>
@@ -1286,7 +1286,7 @@
     <meta>
       <booktitle>Proceedings of the <fixed-case>NAACL</fixed-case> <fixed-case>HLT</fixed-case> 2010 Demonstration Session</booktitle>
       <url hash="741b7176">N10-2</url>
-      <editor><first>Carolyn Penstein</first><last>Rosé</last></editor>
+      <editor id="carolyn-rose"><first>Carolyn Penstein</first><last>Rosé</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Los Angeles, California</address>
       <month>June</month>
@@ -1299,7 +1299,7 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>C</fixed-case>amtology: Intelligent Information Access for Science</title>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <author><first>Karl</first><last>Harrison</last></author>
       <author><first>Andrew</first><last>Naish-Guzman</last></author>
       <author><first>Andy</first><last>Parker</last></author>
@@ -1314,7 +1314,7 @@
     <paper id="2">
       <title>Summarizing Textual Information about Locations In a Geo-Spatial Information Display System</title>
       <author><first>Congxing</first><last>Cai</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>5–8</pages>
       <url hash="265b82da">N10-2002</url>
       <bibkey>cai-hovy-2010-summarizing</bibkey>
@@ -1323,17 +1323,17 @@
       <title><fixed-case>P</fixed-case>hrasal: A Statistical Machine Translation Toolkit for Exploring New Model Features</title>
       <author><first>Daniel</first><last>Cer</last></author>
       <author><first>Michel</first><last>Galley</last></author>
-      <author><first>Daniel</first><last>Jurafsky</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="dan-jurafsky"><first>Daniel</first><last>Jurafsky</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>9–12</pages>
       <url hash="8a18c74c">N10-2003</url>
       <bibkey>cer-etal-2010-phrasal</bibkey>
     </paper>
     <paper id="4">
       <title>Multilingual <fixed-case>P</fixed-case>ropbank Annotation Tools: Cornerstone and Jubilee</title>
-      <author><first>Jinho</first><last>Choi</last></author>
-      <author><first>Claire</first><last>Bonial</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="jinho-d-choi"><first>Jinho</first><last>Choi</last></author>
+      <author id="claire-bonial"><first>Claire</first><last>Bonial</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>13–16</pages>
       <url hash="24896e67">N10-2004</url>
       <bibkey>choi-etal-2010-multilingual</bibkey>
@@ -1342,7 +1342,7 @@
       <title><fixed-case>KSC</fixed-case>-<fixed-case>P</fixed-case>a<fixed-case>L</fixed-case>: A Peer Learning Agent that Encourages Students to take the Initiative</title>
       <author><first>Cynthia</first><last>Kersey</last></author>
       <author><first>Barbara</first><last>Di Eugenio</last></author>
-      <author><first>Pamela</first><last>Jordan</last></author>
+      <author id="pamela-jordan"><first>Pamela</first><last>Jordan</last></author>
       <author><first>Sandra</first><last>Katz</last></author>
       <pages>17–20</pages>
       <url hash="d68b2c36">N10-2005</url>
@@ -1376,7 +1376,7 @@
       <title>Interpretation of Partial Utterances in Virtual Human Dialogue Systems</title>
       <author><first>Kenji</first><last>Sagae</last></author>
       <author><first>David</first><last>DeVault</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <pages>33–36</pages>
       <url hash="d61137db">N10-2009</url>
       <bibkey>sagae-etal-2010-interpretation</bibkey>
@@ -1384,20 +1384,20 @@
     <paper id="10">
       <title>Interactive Predictive Parsing using a Web-based Architecture</title>
       <author><first>Ricardo</first><last>Sánchez-Sáez</last></author>
-      <author><first>Luis A.</first><last>Leiva</last></author>
-      <author><first>Joan-Andreu</first><last>Sánchez</last></author>
-      <author><first>José-Miguel</first><last>Benedí</last></author>
+      <author id="luis-a-leiva"><first>Luis A.</first><last>Leiva</last></author>
+      <author id="joan-andreu-sanchez"><first>Joan-Andreu</first><last>Sánchez</last></author>
+      <author id="jose-miguel-benedi"><first>José-Miguel</first><last>Benedí</last></author>
       <pages>37–40</pages>
       <url hash="740ef2b0">N10-2010</url>
       <bibkey>sanchez-saez-etal-2010-interactive</bibkey>
     </paper>
     <paper id="11">
       <title><fixed-case>SIMPLIFICA</fixed-case>: a tool for authoring simplified texts in <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese guided by readability assessments</title>
-      <author><first>Carolina</first><last>Scarton</last></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last></author>
       <author><first>Matheus</first><last>Oliveira</last></author>
-      <author><first>Arnaldo</first><last>Candido Jr.</last></author>
+      <author id="arnaldo-candido-jr"><first>Arnaldo</first><last>Candido Jr.</last></author>
       <author><first>Caroline</first><last>Gasperin</last></author>
-      <author><first>Sandra</first><last>Aluísio</last></author>
+      <author id="sandra-aluisio"><first>Sandra</first><last>Aluísio</last></author>
       <pages>41–44</pages>
       <url hash="7e68499f">N10-2011</url>
       <bibkey>scarton-etal-2010-simplifica</bibkey>
@@ -1408,7 +1408,7 @@
       <author><first>Chris</first><last>Thrasher</last></author>
       <author><first>Evelyne</first><last>Viegas</last></author>
       <author><first>Xiaolong</first><last>Li</last></author>
-      <author><first>Bo-june Paul</first><last>Hsu</last></author>
+      <author id="bo-june-paul-hsu"><first>Bo-june Paul</first><last>Hsu</last></author>
       <pages>45–48</pages>
       <url hash="130a6b25">N10-2012</url>
       <bibkey>wang-etal-2010-overview</bibkey>
@@ -1419,7 +1419,7 @@
       <booktitle>Proceedings of the <fixed-case>NAACL</fixed-case> <fixed-case>HLT</fixed-case> 2010 Student Research Workshop</booktitle>
       <url hash="c6564a2d">N10-3</url>
       <editor><first>Julia</first><last>Hockenmaier</last></editor>
-      <editor><first>Diane</first><last>Litman</last></editor>
+      <editor id="diane-litman"><first>Diane</first><last>Litman</last></editor>
       <editor><first>Adriane</first><last>Boyd</last></editor>
       <editor><first>Mahesh</first><last>Joshi</last></editor>
       <editor><first>Frank</first><last>Rudzicz</last></editor>
@@ -1435,7 +1435,7 @@
     </frontmatter>
     <paper id="1">
       <title>Improving Syntactic Coordination Resolution using Language Modeling</title>
-      <author><first>Philip</first><last>Ogren</last></author>
+      <author id="philip-ogren"><first>Philip</first><last>Ogren</last></author>
       <pages>1–6</pages>
       <url hash="bde2d3b4">N10-3001</url>
       <bibkey>ogren-2010-improving</bibkey>
@@ -1487,7 +1487,7 @@
     </paper>
     <paper id="8">
       <title>Temporal Relation Identification with Endpoints</title>
-      <author><first>Chong Min</first><last>Lee</last></author>
+      <author id="chungmin-lee"><first>Chong Min</first><last>Lee</last></author>
       <pages>40–45</pages>
       <url hash="9b403051">N10-3008</url>
       <bibkey>lee-2010-temporal</bibkey>
@@ -1520,7 +1520,7 @@
       <url hash="7221129d">N10-4</url>
       <editor><first>Jason</first><last>Baldwin</last></editor>
       <editor><first>Peter</first><last>Clark</last></editor>
-      <editor><first>Gokhan</first><last>Tur</last></editor>
+      <editor id="gokhan-tur"><first>Gokhan</first><last>Tur</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Los Angeles, California</address>
       <month>June</month>
@@ -1534,7 +1534,7 @@
     <paper id="1">
       <title>Data-Intensive Text Processing with <fixed-case>M</fixed-case>ap<fixed-case>R</fixed-case>educe</title>
       <author><first>Jimmy</first><last>Lin</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <pages>1–2</pages>
       <url hash="5c3d6901">N10-4001</url>
       <bibkey>lin-dyer-2010-data</bibkey>
@@ -1548,7 +1548,7 @@
     </paper>
     <paper id="3">
       <title>Noisy Text Analytics</title>
-      <author><first>L. Venkata</first><last>Subramaniam</last></author>
+      <author id="l-venkata-subramaniam"><first>L. Venkata</first><last>Subramaniam</last></author>
       <pages>5–6</pages>
       <url hash="25fd0550">N10-4003</url>
       <bibkey>subramaniam-2010-noisy</bibkey>
@@ -1563,8 +1563,8 @@
     </paper>
     <paper id="5">
       <title>Integer Linear Programming in <fixed-case>NLP</fixed-case> - Constrained Conditional Models</title>
-      <author><first>Ming-Wei</first><last>Wang</last></author>
-      <author><first>Nicholas</first><last>Rizzolo</last></author>
+      <author id="mingwen-wang"><first>Ming-Wei</first><last>Wang</last></author>
+      <author id="nick-rizzolo"><first>Nicholas</first><last>Rizzolo</last></author>
       <author><first>Dan</first><last>Roth</last></author>
       <pages>9–14</pages>
       <url hash="65a5b691">N10-4005</url>
@@ -1579,7 +1579,7 @@
     </paper>
     <paper id="7">
       <title>Computational psycholinguistics</title>
-      <author><first>Roger</first><last>Levy</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
       <author><first>Klinton</first><last>Bicknell</last></author>
       <author><first>Nathaniel</first><last>Smith</last></author>
       <pages>19–20</pages>
diff --git a/data/xml/N12.xml b/data/xml/N12.xml
index b5bdbbed8b..9426614e6a 100644
--- a/data/xml/N12.xml
+++ b/data/xml/N12.xml
@@ -4,9 +4,9 @@
     <meta>
       <booktitle>Proceedings of the 2012 Conference of the North <fixed-case>A</fixed-case>merican Chapter of the Association for Computational Linguistics: Human Language Technologies</booktitle>
       <url hash="3f75f43d">N12-1</url>
-      <editor><first>Eric</first><last>Fosler-Lussier</last></editor>
-      <editor><first>Ellen</first><last>Riloff</last></editor>
-      <editor><first>Srinivas</first><last>Bangalore</last></editor>
+      <editor id="eric-fosler-lussier"><first>Eric</first><last>Fosler-Lussier</last></editor>
+      <editor id="ellen-riloff"><first>Ellen</first><last>Riloff</last></editor>
+      <editor id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Montréal, Canada</address>
       <month>June</month>
@@ -19,7 +19,7 @@
     </frontmatter>
     <paper id="1">
       <title>Multiple Narrative Disentanglement: Unraveling Infinite Jest</title>
-      <author><first>Byron</first><last>Wallace</last></author>
+      <author id="byron-c-wallace"><first>Byron</first><last>Wallace</last></author>
       <pages>1–10</pages>
       <url hash="59a50842">N12-1001</url>
       <bibkey>wallace-2012-multiple</bibkey>
@@ -27,10 +27,10 @@
     <paper id="2">
       <title>Acoustic-Prosodic Entrainment and Social Behavior</title>
       <author><first>Rivka</first><last>Levitan</last></author>
-      <author><first>Agustín</first><last>Gravano</last></author>
+      <author id="agustin-gravano"><first>Agustín</first><last>Gravano</last></author>
       <author><first>Laura</first><last>Willson</last></author>
-      <author><first>S̆tefan</first><last>Ben̆us̆</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
+      <author id="stefan-benus"><first>S̆tefan</first><last>Ben̆us̆</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
       <author><first>Ani</first><last>Nenkova</last></author>
       <pages>11–19</pages>
       <url hash="df99e3d8">N12-1002</url>
@@ -40,8 +40,8 @@
       <title>Identifying High-Level Organizational Elements in Argumentative Discourse</title>
       <author><first>Nitin</first><last>Madnani</last></author>
       <author><first>Michael</first><last>Heilman</last></author>
-      <author><first>Joel</first><last>Tetreault</last></author>
-      <author><first>Martin</first><last>Chodorow</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
+      <author id="martin-chodorow"><first>Martin</first><last>Chodorow</last></author>
       <pages>20–28</pages>
       <url hash="daa71425">N12-1003</url>
       <bibkey>madnani-etal-2012-identifying</bibkey>
@@ -56,7 +56,7 @@
     </paper>
     <paper id="5">
       <title>Continuous Space Translation Models with Neural Networks</title>
-      <author><first>Hai Son</first><last>Le</last></author>
+      <author id="hai-son-le"><first>Hai Son</first><last>Le</last></author>
       <author><first>Alexandre</first><last>Allauzen</last></author>
       <author><first>François</first><last>Yvon</last></author>
       <pages>39–48</pages>
@@ -68,11 +68,11 @@
       <author><first>Rabih</first><last>Zbib</last></author>
       <author><first>Erika</first><last>Malchiodi</last></author>
       <author><first>Jacob</first><last>Devlin</last></author>
-      <author><first>David</first><last>Stallard</last></author>
+      <author id="david-stallard"><first>David</first><last>Stallard</last></author>
       <author><first>Spyros</first><last>Matsoukas</last></author>
-      <author><first>Richard</first><last>Schwartz</last></author>
-      <author><first>John</first><last>Makhoul</last></author>
-      <author><first>Omar F.</first><last>Zaidan</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
+      <author id="john-makhoul"><first>John</first><last>Makhoul</last></author>
+      <author id="omar-zaidan"><first>Omar F.</first><last>Zaidan</last></author>
       <author><first>Chris</first><last>Callison-Burch</last></author>
       <pages>49–59</pages>
       <url hash="68386d24">N12-1006</url>
@@ -82,9 +82,9 @@
       <title>Entity Clustering Across Languages</title>
       <author><first>Spence</first><last>Green</last></author>
       <author><first>Nicholas</first><last>Andrews</last></author>
-      <author><first>Matthew R.</first><last>Gormley</last></author>
+      <author id="matthew-r-gormley"><first>Matthew R.</first><last>Gormley</last></author>
       <author><first>Mark</first><last>Dredze</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>60–69</pages>
       <url hash="48cd009f">N12-1007</url>
       <bibkey>green-etal-2012-entity</bibkey>
@@ -100,15 +100,15 @@
     <paper id="9">
       <title>Reference Scope Identification in Citing Sentences</title>
       <author><first>Amjad</first><last>Abu-Jbara</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <pages>80–90</pages>
       <url hash="93d9b943">N12-1009</url>
       <bibkey>abu-jbara-radev-2012-reference</bibkey>
     </paper>
     <paper id="10">
       <title>Intrinsic and Extrinsic Evaluation of an Automatic User Disengagement Detector for an Uncertainty-Adaptive Spoken Dialogue System</title>
-      <author><first>Kate</first><last>Forbes-Riley</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="kate-forbes-riley"><first>Kate</first><last>Forbes-Riley</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <author><first>Heather</first><last>Friedberg</last></author>
       <author><first>Joanna</first><last>Drummond</last></author>
       <pages>91–102</pages>
@@ -136,15 +136,15 @@
     <paper id="13">
       <title>Minimum-Risk Training of Approximate <fixed-case>CRF</fixed-case>-Based <fixed-case>NLP</fixed-case> Systems</title>
       <author><first>Veselin</first><last>Stoyanov</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>120–130</pages>
       <url hash="4682a496">N12-1013</url>
       <bibkey>stoyanov-eisner-2012-minimum</bibkey>
     </paper>
     <paper id="14">
       <title>Unsupervised Learning on an Approximate Corpus</title>
-      <author><first>Jason</first><last>Smith</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-smith"><first>Jason</first><last>Smith</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>131–141</pages>
       <url hash="e87bba53">N12-1014</url>
       <bibkey>smith-eisner-2012-unsupervised</bibkey>
@@ -161,7 +161,7 @@
     <paper id="16">
       <title>Segmentation Similarity and Agreement</title>
       <author><first>Chris</first><last>Fournier</last></author>
-      <author><first>Diana</first><last>Inkpen</last></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last></author>
       <pages>152–161</pages>
       <url hash="dc259327">N12-1016</url>
       <bibkey>fournier-inkpen-2012-segmentation</bibkey>
@@ -185,15 +185,15 @@
     <paper id="19">
       <title>Re-examining Machine Translation Metrics for Paraphrase Identification</title>
       <author><first>Nitin</first><last>Madnani</last></author>
-      <author><first>Joel</first><last>Tetreault</last></author>
-      <author><first>Martin</first><last>Chodorow</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
+      <author id="martin-chodorow"><first>Martin</first><last>Chodorow</last></author>
       <pages>182–190</pages>
       <url hash="8ef28122">N12-1019</url>
       <bibkey>madnani-etal-2012-examining</bibkey>
     </paper>
     <paper id="20">
       <title>A Dependency Treebank of Classical <fixed-case>C</fixed-case>hinese Poems</title>
-      <author><first>John</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
       <author><first>Yin Hei</first><last>Kong</last></author>
       <pages>191–199</pages>
       <url hash="262a0ed1">N12-1020</url>
@@ -201,8 +201,8 @@
     </paper>
     <paper id="21">
       <title>Towards Effective Tutorial Feedback for Explanation Questions: A Dataset and Baselines</title>
-      <author><first>Myroslava O.</first><last>Dzikovska</last></author>
-      <author><first>Rodney D.</first><last>Nielsen</last></author>
+      <author id="myroslava-o-dzikovska"><first>Myroslava O.</first><last>Dzikovska</last></author>
+      <author id="rodney-nielsen"><first>Rodney D.</first><last>Nielsen</last></author>
       <author><first>Chris</first><last>Brew</last></author>
       <pages>200–210</pages>
       <url hash="63b2fe05">N12-1021</url>
@@ -211,7 +211,7 @@
     <paper id="22">
       <title>Topical Segmentation: a Study of Human Performance and a New Measure of Quality.</title>
       <author><first>Anna</first><last>Kazantseva</last></author>
-      <author><first>Stan</first><last>Szpakowicz</last></author>
+      <author id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></author>
       <pages>211–220</pages>
       <url hash="94051040">N12-1022</url>
       <bibkey>kazantseva-szpakowicz-2012-topical</bibkey>
@@ -219,15 +219,15 @@
     <paper id="23">
       <title>Structured Ramp Loss Minimization for Machine Translation</title>
       <author><first>Kevin</first><last>Gimpel</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>221–231</pages>
       <url hash="deee5637">N12-1023</url>
       <bibkey>gimpel-smith-2012-structured</bibkey>
     </paper>
     <paper id="24">
       <title>Implicitly Intersecting Weighted Automata using Dual Decomposition</title>
-      <author><first>Michael J.</first><last>Paul</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="michael-paul"><first>Michael J.</first><last>Paul</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>232–242</pages>
       <url hash="ae9d209e">N12-1024</url>
       <bibkey>paul-eisner-2012-implicitly</bibkey>
@@ -268,8 +268,8 @@
     <paper id="29">
       <title>Correcting Comma Errors in Learner Essays, and Restoring Commas in Newswire Text</title>
       <author><first>Ross</first><last>Israel</last></author>
-      <author><first>Joel</first><last>Tetreault</last></author>
-      <author><first>Martin</first><last>Chodorow</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
+      <author id="martin-chodorow"><first>Martin</first><last>Chodorow</last></author>
       <pages>284–294</pages>
       <url hash="a0dbd1fd">N12-1029</url>
       <bibkey>israel-etal-2012-correcting</bibkey>
@@ -277,7 +277,7 @@
     <paper id="30">
       <title>The Challenges of Parsing <fixed-case>C</fixed-case>hinese with <fixed-case>C</fixed-case>ombinatory <fixed-case>C</fixed-case>ategorial <fixed-case>G</fixed-case>rammar</title>
       <author><first>Daniel</first><last>Tse</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <pages>295–304</pages>
       <url hash="295b5d82">N12-1030</url>
       <bibkey>tse-curran-2012-challenges</bibkey>
@@ -294,7 +294,7 @@
     <paper id="32">
       <title>Getting More from Morphology in Multilingual Dependency Parsing</title>
       <author><first>Matt</first><last>Hohensee</last></author>
-      <author><first>Emily M.</first><last>Bender</last></author>
+      <author id="emily-m-bender"><first>Emily M.</first><last>Bender</last></author>
       <pages>315–326</pages>
       <url hash="b4070e11">N12-1032</url>
       <bibkey>hohensee-bender-2012-getting</bibkey>
@@ -310,7 +310,7 @@
     </paper>
     <paper id="34">
       <title>Using paraphrases for improving first story detection in news and <fixed-case>T</fixed-case>witter</title>
-      <author><first>Saša</first><last>Petrović</last></author>
+      <author id="sasa-petrovic"><first>Saša</first><last>Petrović</last></author>
       <author><first>Miles</first><last>Osborne</last></author>
       <author><first>Victor</first><last>Lavrenko</last></author>
       <pages>338–346</pages>
@@ -320,24 +320,24 @@
     <paper id="35">
       <title>Insertion and Deletion Models for Statistical Machine Translation</title>
       <author><first>Matthias</first><last>Huck</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>347–351</pages>
       <url hash="5e638440">N12-1035</url>
       <bibkey>huck-ney-2012-insertion</bibkey>
     </paper>
     <paper id="36">
       <title><fixed-case>T</fixed-case>rans<fixed-case>A</fixed-case>head: A Computer-Assisted Translation and Writing Tool</title>
-      <author><first>Chung-chi</first><last>Huang</last></author>
-      <author><first>Ping-che</first><last>Yang</last></author>
-      <author><first>Keh-jiann</first><last>Chen</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="chung-chi-huang"><first>Chung-chi</first><last>Huang</last></author>
+      <author id="ping-che-yang"><first>Ping-che</first><last>Yang</last></author>
+      <author id="keh-jiann-chen"><first>Keh-jiann</first><last>Chen</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>352–356</pages>
       <url hash="85252ee8">N12-1036</url>
       <bibkey>huang-etal-2012-transahead-computer</bibkey>
     </paper>
     <paper id="37">
       <title>Correction Detection and Error Type Selection as an <fixed-case>ESL</fixed-case> Educational Aid</title>
-      <author><first>Ben</first><last>Swanson</last></author>
+      <author id="ben-swanson"><first>Ben</first><last>Swanson</last></author>
       <author><first>Elif</first><last>Yamangil</last></author>
       <pages>357–361</pages>
       <url hash="917f59b5">N12-1037</url>
@@ -346,7 +346,7 @@
     <paper id="38">
       <title>Getting More from Segmentation Evaluation</title>
       <author><first>Martin</first><last>Scaiano</last></author>
-      <author><first>Diana</first><last>Inkpen</last></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last></author>
       <pages>362–366</pages>
       <url hash="6c1a1634">N12-1038</url>
       <bibkey>scaiano-inkpen-2012-getting</bibkey>
@@ -362,8 +362,8 @@
     <paper id="40">
       <title>Evaluating a Morphological Analyser of <fixed-case>I</fixed-case>nuktitut</title>
       <author><first>Jeremy</first><last>Nicholson</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>372–376</pages>
       <url hash="0dea7a03">N12-1040</url>
       <bibkey>nicholson-etal-2012-evaluating</bibkey>
@@ -379,7 +379,7 @@
     <paper id="42">
       <title>Towards Using <fixed-case>EEG</fixed-case> to Improve <fixed-case>ASR</fixed-case> Accuracy</title>
       <author><first>Yun-Nung</first><last>Chen</last></author>
-      <author><first>Kai-Min</first><last>Chang</last></author>
+      <author id="kai-min-kevin-chang"><first>Kai-Min</first><last>Chang</last></author>
       <author><first>Jack</first><last>Mostow</last></author>
       <pages>382–385</pages>
       <url hash="111af64d">N12-1042</url>
@@ -387,8 +387,8 @@
     </paper>
     <paper id="43">
       <title>A Comparative Investigation of Morphological Language Modeling for the Languages of the <fixed-case>E</fixed-case>uropean <fixed-case>U</fixed-case>nion</title>
-      <author><first>Thomas</first><last>Mueller</last></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="thomas-mueller"><first>Thomas</first><last>Mueller</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <author><first>Helmut</first><last>Schmid</last></author>
       <pages>386–395</pages>
       <url hash="f17025df">N12-1043</url>
@@ -412,8 +412,8 @@
     </paper>
     <paper id="46">
       <title>Encouraging Consistent Translation Choices</title>
-      <author><first>Ferhan</first><last>Ture</last></author>
-      <author><first>Douglas W.</first><last>Oard</last></author>
+      <author id="ferhan-ture"><first>Ferhan</first><last>Ture</last></author>
+      <author id="douglas-w-oard"><first>Douglas W.</first><last>Oard</last></author>
       <author><first>Philip</first><last>Resnik</last></author>
       <pages>417–426</pages>
       <url hash="657f3e5a">N12-1046</url>
@@ -443,8 +443,8 @@
     <paper id="49">
       <title>Parsing Time: Learning to Interpret Time Expressions</title>
       <author><first>Gabor</first><last>Angeli</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
-      <author><first>Daniel</first><last>Jurafsky</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
+      <author id="dan-jurafsky"><first>Daniel</first><last>Jurafsky</last></author>
       <pages>446–455</pages>
       <url hash="25d4b6b3">N12-1049</url>
       <bibkey>angeli-etal-2012-parsing</bibkey>
@@ -452,7 +452,7 @@
     <paper id="50">
       <title>Fine-Grained Focus for Pinpointing Positive Implicit Meaning from Negated Statements</title>
       <author><first>Eduardo</first><last>Blanco</last></author>
-      <author><first>Dan</first><last>Moldovan</last></author>
+      <author id="dan-moldovan"><first>Dan</first><last>Moldovan</last></author>
       <pages>456–465</pages>
       <url hash="3fa31e2c">N12-1050</url>
       <bibkey>blanco-moldovan-2012-fine</bibkey>
@@ -484,7 +484,7 @@
     </paper>
     <paper id="54">
       <title>Vine Pruning for Efficient Multi-Pass Dependency Parsing</title>
-      <author><first>Alexander</first><last>Rush</last></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last></author>
       <author><first>Slav</first><last>Petrov</last></author>
       <pages>498–507</pages>
       <url hash="e9fd02bc">N12-1054</url>
@@ -494,7 +494,7 @@
       <title>Active Learning for Coreference Resolution</title>
       <author><first>Florian</first><last>Laws</last></author>
       <author><first>Florian</first><last>Heimerl</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>508–512</pages>
       <url hash="965a15db">N12-1055</url>
       <bibkey>laws-etal-2012-active</bibkey>
@@ -510,8 +510,8 @@
     <paper id="57">
       <title>Predicting Overt Display of Power in Written Dialogs</title>
       <author><first>Vinodkumar</first><last>Prabhakaran</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>518–522</pages>
       <url hash="d2840de7">N12-1057</url>
       <bibkey>prabhakaran-etal-2012-predicting</bibkey>
@@ -534,7 +534,7 @@
     </paper>
     <paper id="60">
       <title>Improved Reordering for Shallow-n Grammar based Hierarchical Phrase-based Translation</title>
-      <author><first>Baskaran</first><last>Sankaran</last></author>
+      <author id="baskaran-sankaran"><first>Baskaran</first><last>Sankaran</last></author>
       <author><first>Anoop</first><last>Sarkar</last></author>
       <pages>533–537</pages>
       <url hash="7d3891c6">N12-1060</url>
@@ -569,7 +569,7 @@
     <paper id="64">
       <title>How Text Segmentation Algorithms Gain from Topic Models</title>
       <author><first>Martin</first><last>Riedl</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>553–557</pages>
       <url hash="b6693f85">N12-1064</url>
       <bibkey>riedl-biemann-2012-text</bibkey>
@@ -584,7 +584,7 @@
     <paper id="66">
       <title>Behavioral Factors in Interactive Training of Text Classifiers</title>
       <author><first>Burr</first><last>Settles</last></author>
-      <author><first>Xiaojin</first><last>Zhu</last></author>
+      <author id="xiaojin-zhu"><first>Xiaojin</first><last>Zhu</last></author>
       <pages>563–567</pages>
       <url hash="d0775f1d">N12-1066</url>
       <revision id="1" href="N12-1066v1" hash="9837540f"/>
@@ -610,7 +610,7 @@
     <paper id="69">
       <title>Concavity and Initialization for Unsupervised Dependency Parsing</title>
       <author><first>Kevin</first><last>Gimpel</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>577–581</pages>
       <url hash="d14efefc">N12-1069</url>
       <bibkey>gimpel-smith-2012-concavity</bibkey>
@@ -618,7 +618,7 @@
     <paper id="70">
       <title>Multimodal Grammar Implementation</title>
       <author><first>Katya</first><last>Alahverdzhieva</last></author>
-      <author><first>Dan</first><last>Flickinger</last></author>
+      <author id="dan-flickinger"><first>Dan</first><last>Flickinger</last></author>
       <author><first>Alex</first><last>Lascarides</last></author>
       <pages>582–586</pages>
       <url hash="d33bbf59">N12-1070</url>
@@ -626,15 +626,15 @@
     </paper>
     <paper id="71">
       <title>Portable Features for Classifying Emotional Text</title>
-      <author><first>Saif</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
       <pages>587–591</pages>
       <url hash="25439bfc">N12-1071</url>
       <bibkey>mohammad-2012-portable</bibkey>
     </paper>
     <paper id="72">
       <title>Stance Classification using Dialogic Properties of Persuasion</title>
-      <author><first>Marilyn</first><last>Walker</last></author>
-      <author><first>Pranav</first><last>Anand</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
+      <author id="pranav-anand"><first>Pranav</first><last>Anand</last></author>
       <author><first>Rob</first><last>Abbott</last></author>
       <author><first>Ricky</first><last>Grant</last></author>
       <pages>592–596</pages>
@@ -662,7 +662,7 @@
     </paper>
     <paper id="75">
       <title>The Intelius Nickname Collection: Quantitative Analyses from Billions of Public Records</title>
-      <author><first>Vitor</first><last>Carvalho</last></author>
+      <author id="vitor-carvalho"><first>Vitor</first><last>Carvalho</last></author>
       <author><first>Yigit</first><last>Kiran</last></author>
       <author><first>Andrew</first><last>Borthwick</last></author>
       <pages>607–610</pages>
@@ -671,7 +671,7 @@
     </paper>
     <paper id="76">
       <title>A comparison of models of word meaning in context</title>
-      <author><first>Georgiana</first><last>Dinu</last></author>
+      <author id="georgiana-dinu"><first>Georgiana</first><last>Dinu</last></author>
       <author><first>Stefan</first><last>Thater</last></author>
       <author><first>Soeren</first><last>Laue</last></author>
       <pages>611–615</pages>
@@ -680,7 +680,7 @@
     </paper>
     <paper id="77">
       <title>Measuring Word Relatedness Using Heterogeneous Vector Space Models</title>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <author><first>Vahed</first><last>Qazvinian</last></author>
       <pages>616–620</pages>
       <url hash="6ef4ab30">N12-1077</url>
@@ -698,7 +698,7 @@
     </paper>
     <paper id="79">
       <title>Why Not Grab a Free Lunch? Mining Large Corpora for Parallel Sentences to Improve Translation Modeling</title>
-      <author><first>Ferhan</first><last>Ture</last></author>
+      <author id="ferhan-ture"><first>Ferhan</first><last>Ture</last></author>
       <author><first>Jimmy</first><last>Lin</last></author>
       <pages>626–630</pages>
       <url hash="0475edc7">N12-1079</url>
@@ -707,7 +707,7 @@
     <paper id="80">
       <title>Summarization of Historical Articles Using Temporal Event Clustering</title>
       <author><first>James</first><last>Gung</last></author>
-      <author><first>Jugal</first><last>Kalita</last></author>
+      <author id="jugal-kalita"><first>Jugal</first><last>Kalita</last></author>
       <pages>631–635</pages>
       <url hash="4a3fe513">N12-1080</url>
       <bibkey>gung-kalita-2012-summarization</bibkey>
@@ -723,8 +723,8 @@
     <paper id="82">
       <title>On The Feasibility of Open Domain Referring Expression Generation Using Large Scale Folksonomies</title>
       <author><first>Fabián</first><last>Pacheco</last></author>
-      <author><first>Pablo</first><last>Duboue</last></author>
-      <author><first>Martín</first><last>Domínguez</last></author>
+      <author id="pablo-duboue"><first>Pablo</first><last>Duboue</last></author>
+      <author id="martin-ariel-dominguez"><first>Martín</first><last>Domínguez</last></author>
       <pages>641–645</pages>
       <url hash="8c3a0ed5">N12-1082</url>
       <bibkey>pacheco-etal-2012-feasibility</bibkey>
@@ -733,7 +733,7 @@
       <title>Structured Event Retrieval over Microblog Archives</title>
       <author><first>Donald</first><last>Metzler</last></author>
       <author><first>Congxing</first><last>Cai</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>646–655</pages>
       <url hash="15968b54">N12-1083</url>
       <bibkey>metzler-etal-2012-structured</bibkey>
@@ -742,7 +742,7 @@
       <title>Learning from Bullying Traces in Social Media</title>
       <author><first>Jun-Ming</first><last>Xu</last></author>
       <author><first>Kwang-Sung</first><last>Jun</last></author>
-      <author><first>Xiaojin</first><last>Zhu</last></author>
+      <author id="xiaojin-zhu"><first>Xiaojin</first><last>Zhu</last></author>
       <author><first>Amy</first><last>Bellmore</last></author>
       <pages>656–666</pages>
       <url hash="e527d403">N12-1084</url>
@@ -750,10 +750,10 @@
     </paper>
     <paper id="85">
       <title>Grammatical structures for word-level sentiment detection</title>
-      <author><first>Asad</first><last>Sayeed</last></author>
+      <author id="asad-sayeed"><first>Asad</first><last>Sayeed</last></author>
       <author><first>Jordan</first><last>Boyd-Graber</last></author>
       <author><first>Bryan</first><last>Rusk</last></author>
-      <author><first>Amy</first><last>Weinberg</last></author>
+      <author id="amy-weinberg"><first>Amy</first><last>Weinberg</last></author>
       <pages>667–676</pages>
       <url hash="9cdb23a4">N12-1085</url>
       <bibkey>sayeed-etal-2012-grammatical</bibkey>
@@ -761,7 +761,7 @@
     <paper id="86">
       <title>Graph-Based Lexicon Expansion with Sparsity-Inducing Penalties</title>
       <author><first>Dipanjan</first><last>Das</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>677–687</pages>
       <url hash="86434191">N12-1086</url>
       <bibkey>das-smith-2012-graph</bibkey>
@@ -778,7 +778,7 @@
     <paper id="88">
       <title>Low-Dimensional Discriminative Reranking</title>
       <author><first>Jagadeesh</first><last>Jagarlamudi</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <pages>699–709</pages>
       <url hash="732b33c7">N12-1088</url>
       <bibkey>jagarlamudi-daume-iii-2012-low</bibkey>
@@ -786,7 +786,7 @@
     <paper id="89">
       <title>Autonomous Self-Assessment of Autocorrections: Exploring Text Message Dialogues</title>
       <author><first>Tyler</first><last>Baldwin</last></author>
-      <author><first>Joyce</first><last>Chai</last></author>
+      <author id="joyce-chai"><first>Joyce</first><last>Chai</last></author>
       <pages>710–719</pages>
       <url hash="3c97e747">N12-1089</url>
       <bibkey>baldwin-chai-2012-autonomous</bibkey>
@@ -803,14 +803,14 @@
       <title>Exploring Semi-Supervised Coreference Resolution of Medical Concepts using Semantic and Temporal Features</title>
       <author><first>Preethi</first><last>Raghavan</last></author>
       <author><first>Eric</first><last>Fosler-Lussier</last></author>
-      <author><first>Albert</first><last>Lai</last></author>
+      <author id="albert-m-lai"><first>Albert</first><last>Lai</last></author>
       <pages>731–741</pages>
       <url hash="df83e195">N12-1091</url>
       <bibkey>raghavan-etal-2012-exploring</bibkey>
     </paper>
     <paper id="92">
       <title>Mind the Gap: Learning to Choose Gaps for Question Generation</title>
-      <author><first>Lee</first><last>Becker</last></author>
+      <author id="lee-becker"><first>Lee</first><last>Becker</last></author>
       <author><first>Sumit</first><last>Basu</last></author>
       <author><first>Lucy</first><last>Vanderwende</last></author>
       <pages>742–751</pages>
@@ -835,9 +835,9 @@
       <author><first>Karl</first><last>Stratos</last></author>
       <author><first>Kota</first><last>Yamaguchi</last></author>
       <author><first>Yejin</first><last>Choi</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
-      <author><first>Alex</first><last>Berg</last></author>
-      <author><first>Tamara</first><last>Berg</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
+      <author id="alexander-berg"><first>Alex</first><last>Berg</last></author>
+      <author id="tamara-berg"><first>Tamara</first><last>Berg</last></author>
       <pages>762–772</pages>
       <url hash="c97aad64">N12-1094</url>
       <bibkey>dodge-etal-2012-detecting</bibkey>
@@ -853,10 +853,10 @@
     </paper>
     <paper id="96">
       <title>Shared Components Topic Models</title>
-      <author><first>Matthew R.</first><last>Gormley</last></author>
+      <author id="matthew-r-gormley"><first>Matthew R.</first><last>Gormley</last></author>
       <author><first>Mark</first><last>Dredze</last></author>
       <author><first>Benjamin</first><last>Van Durme</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>783–792</pages>
       <url hash="48ab942b">N12-1096</url>
       <bibkey>gormley-etal-2012-shared</bibkey>
@@ -864,8 +864,8 @@
     <paper id="97">
       <title>Textual Predictors of Bill Survival in Congressional Committees</title>
       <author><first>Tae</first><last>Yano</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
-      <author><first>John D.</first><last>Wilkerson</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
+      <author id="john-wilkerson"><first>John D.</first><last>Wilkerson</last></author>
       <pages>793–802</pages>
       <url hash="ae554b15">N12-1097</url>
       <bibkey>yano-etal-2012-textual</bibkey>
@@ -877,7 +877,7 @@
       <url hash="c68a938c">N12-2</url>
       <editor><first>Rivka</first><last>Levitan</last></editor>
       <editor><first>Myle</first><last>Ott</last></editor>
-      <editor><first>Roger</first><last>Levy</last></editor>
+      <editor id="roger-levy"><first>Roger</first><last>Levy</last></editor>
       <editor><first>Ani</first><last>Nenkova</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Montréal, Canada</address>
@@ -892,7 +892,7 @@
     <paper id="1">
       <title>Finding the Right Supervisor: Expert-Finding in a University Domain</title>
       <author><first>Fawaz</first><last>Alarfaj</last></author>
-      <author><first>Udo</first><last>Kruschwitz</last></author>
+      <author id="udo-kruschwitz"><first>Udo</first><last>Kruschwitz</last></author>
       <author><first>David</first><last>Hunter</last></author>
       <author><first>Chris</first><last>Fox</last></author>
       <pages>1–6</pages>
@@ -901,7 +901,7 @@
     </paper>
     <paper id="2">
       <title>Automatic <fixed-case>A</fixed-case>nimacy Classification</title>
-      <author><first>Samuel R.</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel R.</first><last>Bowman</last></author>
       <author><first>Harshit</first><last>Chopra</last></author>
       <pages>7–10</pages>
       <url hash="3c1e0125">N12-2002</url>
@@ -984,7 +984,7 @@
       <booktitle>Proceedings of the Demonstration Session at the Conference of the North <fixed-case>A</fixed-case>merican Chapter of the Association for Computational Linguistics: Human Language Technologies</booktitle>
       <url hash="64eebb02">N12-3</url>
       <editor><first>Aria</first><last>Haghighi</last></editor>
-      <editor><first>Yaser</first><last>Al-Onaizan</last></editor>
+      <editor id="yaser-al-onaizan"><first>Yaser</first><last>Al-Onaizan</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Montréal, Canada</address>
       <month>June</month>
@@ -1000,14 +1000,14 @@
       <author><first>Enrique</first><last>Flores</last></author>
       <author><first>Alberto</first><last>Barrón-Cedeño</last></author>
       <author><first>Paolo</first><last>Rosso</last></author>
-      <author><first>Lidia</first><last>Moreno</last></author>
+      <author id="lidia-moreno"><first>Lidia</first><last>Moreno</last></author>
       <pages>1–4</pages>
       <url hash="cf1652be">N12-3001</url>
       <bibkey>flores-etal-2012-desocore</bibkey>
     </paper>
     <paper id="2">
       <title>A Graphical User Interface for Feature-Based Opinion Mining</title>
-      <author><first>Pedro Paulo</first><last>Balage Filho</last></author>
+      <author id="pedro-balage-filho"><first>Pedro Paulo</first><last>Balage Filho</last></author>
       <author><first>Caroline</first><last>Brun</last></author>
       <author><first>Gilbert</first><last>Rondeau</last></author>
       <pages>5–8</pages>
@@ -1016,9 +1016,9 @@
     </paper>
     <paper id="3">
       <title>Navigating Large Comment Threads with <fixed-case>C</fixed-case>o<fixed-case>F</fixed-case>i</title>
-      <author><first>Christine</first><last>Doran</last></author>
+      <author id="christine-doran"><first>Christine</first><last>Doran</last></author>
       <author><first>Guido</first><last>Zarrella</last></author>
-      <author><first>John C.</first><last>Henderson</last></author>
+      <author id="john-henderson"><first>John C.</first><last>Henderson</last></author>
       <pages>9–12</pages>
       <url hash="7b4077a2">N12-3003</url>
       <bibkey>doran-etal-2012-navigating</bibkey>
@@ -1034,7 +1034,7 @@
     <paper id="5">
       <title>An Interactive Humanoid Robot Exhibiting Flexible Sub-Dialogues</title>
       <author><first>Heriberto</first><last>Cuayáhuitl</last></author>
-      <author><first>Ivana</first><last>Kruijff-Korbayová</last></author>
+      <author id="ivana-kruijff-korbayova"><first>Ivana</first><last>Kruijff-Korbayová</last></author>
       <pages>17–20</pages>
       <url hash="5c9148e2">N12-3005</url>
       <bibkey>cuayahuitl-kruijff-korbayova-2012-interactive</bibkey>
@@ -1047,7 +1047,7 @@
       <author><first>Hisami</first><last>Suzuki</last></author>
       <author><first>Kristina</first><last>Toutanova</last></author>
       <author><first>Michael</first><last>Gamon</last></author>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <author><first>Colin</first><last>Cherry</last></author>
       <author><first>Lucy</first><last>Vanderwende</last></author>
       <pages>21–24</pages>
@@ -1057,7 +1057,7 @@
     <paper id="7">
       <title>Incremental Speech Understanding in a Multi-Party Virtual Human Dialogue System</title>
       <author><first>David</first><last>DeVault</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <pages>25–28</pages>
       <url hash="2680ccd0">N12-3007</url>
       <bibkey>devault-traum-2012-incremental</bibkey>
@@ -1074,8 +1074,8 @@
     <paper id="9">
       <title><fixed-case>A</fixed-case>ttitude<fixed-case>M</fixed-case>iner: Mining Attitude from Online Discussions</title>
       <author><first>Amjad</first><last>Abu-Jbara</last></author>
-      <author><first>Ahmed</first><last>Hassan</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="ahmed-hassan"><first>Ahmed</first><last>Hassan</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <pages>33–36</pages>
       <url hash="3f924cce">N12-3009</url>
       <bibkey>abu-jbara-etal-2012-attitudeminer</bibkey>
@@ -1084,7 +1084,7 @@
   <volume id="4" type="proceedings">
     <meta>
       <booktitle>Tutorial Abstracts at the Conference of the North <fixed-case>A</fixed-case>merican Chapter of the Association for Computational Linguistics: Human Language Technologies</booktitle>
-      <editor><first>Radu</first><last>Florian</last></editor>
+      <editor id="radu-florian"><first>Radu</first><last>Florian</last></editor>
       <editor><first>Jacob</first><last>Eisenstein</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Montréal, Canada</address>
@@ -1097,28 +1097,28 @@
     </frontmatter>
     <paper id="1">
       <title>100 Things You Always Wanted to Know about Linguistics But Were Afraid to Ask*</title>
-      <author><first>Emily M.</first><last>Bender</last></author>
+      <author id="emily-m-bender"><first>Emily M.</first><last>Bender</last></author>
       <url hash="d3248ab2">N12-4001</url>
       <bibkey>bender-2012-100</bibkey>
     </paper>
     <paper id="2">
       <title>Structured Sparsity in Natural Language Processing: Models, Algorithms and Applications</title>
-      <author><first>André F. T.</first><last>Martins</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
       <author><first>Mário A. T.</first><last>Figueiredo</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <url hash="52613eb2">N12-4002</url>
       <bibkey>martins-etal-2012-structured</bibkey>
     </paper>
     <paper id="3">
       <title><fixed-case>A</fixed-case>rabic Dialect Processing Tutorial</title>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
       <url hash="3c033d25">N12-4003</url>
       <bibkey>diab-habash-2012-arabic</bibkey>
     </paper>
     <paper id="4">
       <title>Natural Language Processing in <fixed-case>W</fixed-case>atson</title>
-      <author><first>Alfio M.</first><last>Gliozzo</last></author>
+      <author id="alfio-gliozzo"><first>Alfio M.</first><last>Gliozzo</last></author>
       <author><first>Aditya</first><last>Kalyanpur</last></author>
       <author><first>James</first><last>Fan</last></author>
       <url hash="6045cf8e">N12-4004</url>
diff --git a/data/xml/N13.xml b/data/xml/N13.xml
index b9b1a51e96..4eb88764d3 100644
--- a/data/xml/N13.xml
+++ b/data/xml/N13.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the 2013 Conference of the North <fixed-case>A</fixed-case>merican Chapter of the Association for Computational Linguistics: Human Language Technologies</booktitle>
       <url hash="4899d148">N13-1</url>
       <editor><first>Lucy</first><last>Vanderwende</last></editor>
-      <editor><first>Hal</first><last>Daumé III</last></editor>
+      <editor id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></editor>
       <editor><first>Katrin</first><last>Kirchhoff</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Atlanta, Georgia</address>
@@ -20,7 +20,7 @@
     <paper id="1">
       <title>Model With Minimal Translation Units, But Decode With Phrases</title>
       <author><first>Nadir</first><last>Durrani</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <author><first>Helmut</first><last>Schmid</last></author>
       <pages>1–11</pages>
       <url hash="21ddfb22">N13-1001</url>
@@ -59,7 +59,7 @@
     <paper id="5">
       <title>Multi-faceted Event Recognition with Bootstrapped Dictionaries</title>
       <author><first>Ruihong</first><last>Huang</last></author>
-      <author><first>Ellen</first><last>Riloff</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
       <pages>41–51</pages>
       <url hash="a80dcd43">N13-1005</url>
       <bibkey>huang-riloff-2013-multi</bibkey>
@@ -69,7 +69,7 @@
       <title>Named Entity Recognition with Bilingual Constraints</title>
       <author><first>Wanxiang</first><last>Che</last></author>
       <author><first>Mengqiu</first><last>Wang</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <author><first>Ting</first><last>Liu</last></author>
       <pages>52–62</pages>
       <url hash="d4d499b8">N13-1006</url>
@@ -82,7 +82,7 @@
       <author><first>Chikara</first><last>Hashimoto</last></author>
       <author><first>Kentaro</first><last>Torisawa</last></author>
       <author><first>Takao</first><last>Kawai</last></author>
-      <author><first>Jun’ichi</first><last>Kazama</last></author>
+      <author id="junichi-kazama"><first>Jun’ichi</first><last>Kazama</last></author>
       <author><first>Stijn</first><last>De Saeger</last></author>
       <pages>63–73</pages>
       <url hash="4ddd38d9">N13-1007</url>
@@ -104,7 +104,7 @@
     </paper>
     <paper id="9">
       <title>Extracting the Native Language Signal for Second Language Acquisition</title>
-      <author><first>Ben</first><last>Swanson</last></author>
+      <author id="ben-swanson"><first>Ben</first><last>Swanson</last></author>
       <author><first>Eugene</first><last>Charniak</last></author>
       <pages>85–94</pages>
       <url hash="8ca62ffb">N13-1009</url>
@@ -113,7 +113,7 @@
     </paper>
     <paper id="10">
       <title>An Analysis of Frequency- and Memory-Based Processing Costs</title>
-      <author><first>Marten</first><last>van Schijndel</last></author>
+      <author id="marten-van-schijndel"><first>Marten</first><last>van Schijndel</last></author>
       <author><first>William</first><last>Schuler</last></author>
       <pages>95–105</pages>
       <url hash="dc18eb19">N13-1010</url>
@@ -123,7 +123,7 @@
     <paper id="11">
       <title>Cross-Lingual Semantic Similarity of Words as the Similarity of Their Semantic Word Responses</title>
       <author><first>Ivan</first><last>Vulić</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>106–116</pages>
       <url hash="9a47f61e">N13-1011</url>
       <bibkey>vulic-moens-2013-cross</bibkey>
@@ -132,7 +132,7 @@
     <paper id="12">
       <title>Combining multiple information types in <fixed-case>B</fixed-case>ayesian word segmentation</title>
       <author><first>Gabriel</first><last>Doyle</last></author>
-      <author><first>Roger</first><last>Levy</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
       <pages>117–126</pages>
       <url hash="882b3600">N13-1012</url>
       <bibkey>doyle-levy-2013-combining</bibkey>
@@ -157,11 +157,11 @@
     </paper>
     <paper id="15">
       <title>Experiments with Spectral Learning of Latent-Variable <fixed-case>PCFG</fixed-case>s</title>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <author><first>Karl</first><last>Stratos</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
-      <author><first>Dean P.</first><last>Foster</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
+      <author id="dean-foster"><first>Dean P.</first><last>Foster</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <pages>148–157</pages>
       <url hash="d5a29c0c">N13-1015</url>
       <bibkey>cohen-etal-2013-experiments</bibkey>
@@ -178,7 +178,7 @@
     </paper>
     <paper id="17">
       <title>Drug Extraction from the Web: Summarizing Drug Experiences with Multi-Dimensional Topic Models</title>
-      <author><first>Michael J.</first><last>Paul</last></author>
+      <author id="michael-paul"><first>Michael J.</first><last>Paul</last></author>
       <author><first>Mark</first><last>Dredze</last></author>
       <pages>168–178</pages>
       <url hash="5a5c2dd8">N13-1017</url>
@@ -189,8 +189,8 @@
       <title>Towards Topic Labeling with Phrase Entailment and Aggregation</title>
       <author><first>Yashar</first><last>Mehdad</last></author>
       <author><first>Giuseppe</first><last>Carenini</last></author>
-      <author><first>Raymond T.</first><last>Ng</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="raymond-ng"><first>Raymond T.</first><last>Ng</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <pages>179–189</pages>
       <url hash="dfd3a008">N13-1018</url>
       <bibkey>mehdad-etal-2013-towards</bibkey>
@@ -210,8 +210,8 @@
       <title>Text Alignment for Real-Time Crowd Captioning</title>
       <author><first>Iftekhar</first><last>Naim</last></author>
       <author><first>Daniel</first><last>Gildea</last></author>
-      <author><first>Walter</first><last>Lasecki</last></author>
-      <author><first>Jeffrey P.</first><last>Bigham</last></author>
+      <author id="walter-lasecki"><first>Walter</first><last>Lasecki</last></author>
+      <author id="jeffrey-p-bigham"><first>Jeffrey P.</first><last>Bigham</last></author>
       <pages>201–210</pages>
       <url hash="a0df10a9">N13-1020</url>
       <bibkey>naim-etal-2013-text</bibkey>
@@ -221,7 +221,7 @@
       <title>Discriminative Joint Modeling of Lexical Variation and Acoustic Confusion for Automated Narrative Retelling Assessment</title>
       <author><first>Maider</first><last>Lehr</last></author>
       <author><first>Izhak</first><last>Shafran</last></author>
-      <author><first>Emily</first><last>Prud’hommeaux</last></author>
+      <author id="emily-prudhommeaux"><first>Emily</first><last>Prud’hommeaux</last></author>
       <author><first>Brian</first><last>Roark</last></author>
       <pages>211–220</pages>
       <url hash="486453c5">N13-1021</url>
@@ -231,8 +231,8 @@
     <paper id="22">
       <title>Using Out-of-Domain Data for Lexical Addressee Detection in Human-Human-Computer Dialog</title>
       <author><first>Heeyoung</first><last>Lee</last></author>
-      <author><first>Andreas</first><last>Stolcke</last></author>
-      <author><first>Elizabeth</first><last>Shriberg</last></author>
+      <author id="andreas-stolcke"><first>Andreas</first><last>Stolcke</last></author>
+      <author id="elizabeth-shriberg"><first>Elizabeth</first><last>Shriberg</last></author>
       <pages>221–229</pages>
       <url hash="b6877fd7">N13-1022</url>
       <bibkey>lee-etal-2013-using</bibkey>
@@ -242,8 +242,8 @@
       <title>Segmentation Strategies for Streaming Speech Translation</title>
       <author><first>Vivek Kumar</first><last>Rangarajan Sridhar</last></author>
       <author><first>John</first><last>Chen</last></author>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
-      <author><first>Andrej</first><last>Ljolje</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="andrej-ljolje"><first>Andrej</first><last>Ljolje</last></author>
       <author><first>Rathinavelu</first><last>Chengalvarayan</last></author>
       <pages>230–238</pages>
       <url hash="32bfaa33">N13-1023</url>
@@ -254,7 +254,7 @@
       <title>Enforcing Subcategorization Constraints in a Parser Using Sub-parses Recombining</title>
       <author><first>Seyed Abolghasem</first><last>Mirroshandel</last></author>
       <author><first>Alexis</first><last>Nasr</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <pages>239–247</pages>
       <url hash="86596105">N13-1024</url>
       <bibkey>mirroshandel-etal-2013-enforcing</bibkey>
@@ -262,8 +262,8 @@
     <paper id="25">
       <title>Large-Scale Discriminative Training for Statistical Machine Translation Using Held-Out Line Search</title>
       <author><first>Jeffrey</first><last>Flanigan</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
       <pages>248–258</pages>
       <url hash="d168d171">N13-1025</url>
       <bibkey>flanigan-etal-2013-large</bibkey>
@@ -293,7 +293,7 @@
     <paper id="29">
       <title>Improving Syntax-Augmented Machine Translation by Coarsening the Label Set</title>
       <author><first>Greg</first><last>Hanneman</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <pages>288–297</pages>
       <url hash="df643cb4">N13-1029</url>
       <bibkey>hanneman-lavie-2013-improving</bibkey>
@@ -319,7 +319,7 @@
     </paper>
     <paper id="32">
       <title>Improving reordering performance using higher order and structural features</title>
-      <author><first>Mitesh M.</first><last>Khapra</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh M.</first><last>Khapra</last></author>
       <author><first>Ananthakrishnan</first><last>Ramanathan</last></author>
       <author><first>Karthik</first><last>Visweswariah</last></author>
       <pages>315–324</pages>
@@ -377,10 +377,10 @@
       <title>Improved Part-of-Speech Tagging for Online Conversational Text with Word Clusters</title>
       <author><first>Olutobi</first><last>Owoputi</last></author>
       <author><first>Brendan</first><last>O’Connor</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <author><first>Kevin</first><last>Gimpel</last></author>
       <author><first>Nathan</first><last>Schneider</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>380–390</pages>
       <url hash="b7e34b52">N13-1039</url>
       <bibkey>owoputi-etal-2013-improved</bibkey>
@@ -388,7 +388,7 @@
     <paper id="40">
       <title>Parser lexicalisation through self-learning</title>
       <author><first>Marek</first><last>Rei</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <pages>391–400</pages>
       <url hash="c9fba349">N13-1040</url>
       <bibkey>rei-briscoe-2013-parser</bibkey>
@@ -414,8 +414,8 @@
       <author><first>Mark</first><last>Yatskar</last></author>
       <author><first>Svitlana</first><last>Volkova</last></author>
       <author><first>Asli</first><last>Celikyilmaz</last></author>
-      <author><first>Bill</first><last>Dolan</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="william-b-dolan"><first>Bill</first><last>Dolan</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>416–425</pages>
       <url hash="ee01a692">N13-1043</url>
       <bibkey>yatskar-etal-2013-learning</bibkey>
@@ -423,8 +423,8 @@
     <paper id="44">
       <title>Morphological Analysis and Disambiguation for Dialectal <fixed-case>A</fixed-case>rabic</title>
       <author><first>Nizar</first><last>Habash</last></author>
-      <author><first>Ryan</first><last>Roth</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="ryan-roth"><first>Ryan</first><last>Roth</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <author><first>Ramy</first><last>Eskander</last></author>
       <author><first>Nadi</first><last>Tomeh</last></author>
       <pages>426–432</pages>
@@ -433,8 +433,8 @@
     </paper>
     <paper id="45">
       <title>Using a Supertagged Dependency Language Model to Select a Good Translation in System Combination</title>
-      <author><first>Wei-Yun</first><last>Ma</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="wei-yun-ma"><first>Wei-Yun</first><last>Ma</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>433–438</pages>
       <url hash="ec8490a0">N13-1045</url>
       <bibkey>ma-mckeown-2013-using</bibkey>
@@ -452,7 +452,7 @@
     <paper id="47">
       <title>Better <fixed-case>T</fixed-case>witter Summaries?</title>
       <author><first>Joel</first><last>Judd</last></author>
-      <author><first>Jugal</first><last>Kalita</last></author>
+      <author id="jugal-kalita"><first>Jugal</first><last>Kalita</last></author>
       <pages>445–449</pages>
       <url hash="60fbda1e">N13-1047</url>
       <bibkey>judd-kalita-2013-better</bibkey>
@@ -469,7 +469,7 @@
       <title>Automatic Morphological Enrichment of a Morphologically Underspecified Treebank</title>
       <author><first>Sarah</first><last>Alkuhlani</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
-      <author><first>Ryan</first><last>Roth</last></author>
+      <author id="ryan-roth"><first>Ryan</first><last>Roth</last></author>
       <pages>460–470</pages>
       <url hash="c57fb499">N13-1049</url>
       <bibkey>alkuhlani-etal-2013-automatic</bibkey>
@@ -493,9 +493,9 @@
     </paper>
     <paper id="52">
       <title>Approximate <fixed-case>PCFG</fixed-case> Parsing Using Tensor Decomposition</title>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <author><first>Giorgio</first><last>Satta</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <pages>487–496</pages>
       <url hash="40ab83db">N13-1052</url>
       <bibkey>cohen-etal-2013-approximate</bibkey>
@@ -503,7 +503,7 @@
     <paper id="53">
       <title>Negative Deceptive Opinion Spam</title>
       <author><first>Myle</first><last>Ott</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <author><first>Jeffrey T.</first><last>Hancock</last></author>
       <pages>497–501</pages>
       <url hash="552ebf1a">N13-1053</url>
@@ -513,7 +513,7 @@
       <title>Improving speech synthesis quality by reducing pitch peaks in the source recordings</title>
       <author><first>Luisina</first><last>Violante</last></author>
       <author><first>Pablo</first><last>Rodríguez Zivic</last></author>
-      <author><first>Agustín</first><last>Gravano</last></author>
+      <author id="agustin-gravano"><first>Agustín</first><last>Gravano</last></author>
       <pages>502–506</pages>
       <url hash="3d518572">N13-1054</url>
       <bibkey>violante-etal-2013-improving</bibkey>
@@ -522,7 +522,7 @@
       <title>Robust Systems for Preposition Error Correction Using <fixed-case>W</fixed-case>ikipedia Revisions</title>
       <author><first>Aoife</first><last>Cahill</last></author>
       <author><first>Nitin</first><last>Madnani</last></author>
-      <author><first>Joel</first><last>Tetreault</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
       <author><first>Diane</first><last>Napolitano</last></author>
       <pages>507–517</pages>
       <url hash="de8c6b34">N13-1055</url>
@@ -539,7 +539,7 @@
     <paper id="57">
       <title>Creating Reverse Bilingual Dictionaries</title>
       <author><first>Khang Nhut</first><last>Lam</last></author>
-      <author><first>Jugal</first><last>Kalita</last></author>
+      <author id="jugal-kalita"><first>Jugal</first><last>Kalita</last></author>
       <pages>524–528</pages>
       <url hash="00b6d98a">N13-1057</url>
       <bibkey>lam-kalita-2013-creating</bibkey>
@@ -563,7 +563,7 @@
     </paper>
     <paper id="60">
       <title>Modeling Syntactic and Semantic Structures in Hierarchical Phrase-based Translation</title>
-      <author><first>Junhui</first><last>Li</last></author>
+      <author id="junhui-li"><first>Junhui</first><last>Li</last></author>
       <author><first>Philip</first><last>Resnik</last></author>
       <author><first>Hal</first><last>Daumé III</last></author>
       <pages>540–549</pages>
@@ -575,9 +575,9 @@
       <author><first>Seth</first><last>Kulick</last></author>
       <author><first>Ann</first><last>Bies</last></author>
       <author><first>Justin</first><last>Mott</last></author>
-      <author><first>Mohamed</first><last>Maamouri</last></author>
-      <author><first>Beatrice</first><last>Santorini</last></author>
-      <author><first>Anthony</first><last>Kroch</last></author>
+      <author id="mohamed-maamouri"><first>Mohamed</first><last>Maamouri</last></author>
+      <author id="beatrice-santorini"><first>Beatrice</first><last>Santorini</last></author>
+      <author id="anthony-kroch"><first>Anthony</first><last>Kroch</last></author>
       <pages>550–555</pages>
       <url hash="3db9e3e1">N13-1061</url>
       <bibkey>kulick-etal-2013-using</bibkey>
@@ -592,7 +592,7 @@
     <paper id="63">
       <title>Compound Embedding Features for Semi-supervised Learning</title>
       <author><first>Mo</first><last>Yu</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <author><first>Daxiang</first><last>Dong</last></author>
       <author><first>Hao</first><last>Tian</last></author>
       <author><first>Dianhai</first><last>Yu</last></author>
@@ -622,7 +622,7 @@
       <title>Processing Spontaneous Orthography</title>
       <author><first>Ramy</first><last>Eskander</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <author><first>Nadi</first><last>Tomeh</last></author>
       <pages>585–595</pages>
       <url hash="96df42bd">N13-1066</url>
@@ -632,14 +632,14 @@
       <title>Purpose and Polarity of Citation: Towards <fixed-case>NLP</fixed-case>-based Bibliometrics</title>
       <author><first>Amjad</first><last>Abu-Jbara</last></author>
       <author><first>Jefferson</first><last>Ezra</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <pages>596–606</pages>
       <url hash="f3bc273e">N13-1067</url>
       <bibkey>abu-jbara-etal-2013-purpose</bibkey>
     </paper>
     <paper id="68">
       <title>Estimating effect size across datasets</title>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>607–611</pages>
       <url hash="d9d4b033">N13-1068</url>
       <bibkey>sogaard-2013-estimating</bibkey>
@@ -649,8 +649,8 @@
       <author><first>Rabih</first><last>Zbib</last></author>
       <author><first>Gretchen</first><last>Markiewicz</last></author>
       <author><first>Spyros</first><last>Matsoukas</last></author>
-      <author><first>Richard</first><last>Schwartz</last></author>
-      <author><first>John</first><last>Makhoul</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
+      <author id="john-makhoul"><first>John</first><last>Makhoul</last></author>
       <pages>612–616</pages>
       <url hash="845cb97b">N13-1069</url>
       <bibkey>zbib-etal-2013-systematic</bibkey>
@@ -658,11 +658,11 @@
     <paper id="70">
       <title>Down-stream effects of tree-to-dependency conversions</title>
       <author><first>Jakob</first><last>Elming</last></author>
-      <author><first>Anders</first><last>Johannsen</last></author>
+      <author id="anders-johannsen"><first>Anders</first><last>Johannsen</last></author>
       <author><first>Sigrid</first><last>Klerke</last></author>
       <author><first>Emanuele</first><last>Lapponi</last></author>
-      <author><first>Hector</first><last>Martinez Alonso</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="hector-martinez-alonso"><first>Hector</first><last>Martinez Alonso</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>617–626</pages>
       <url hash="4541823a">N13-1070</url>
       <bibkey>elming-etal-2013-stream</bibkey>
@@ -670,7 +670,7 @@
     <paper id="71">
       <title>The Life and Death of Discourse Entities: Identifying Singleton Mentions</title>
       <author><first>Marta</first><last>Recasens</last></author>
-      <author><first>Marie-Catherine</first><last>de Marneffe</last></author>
+      <author id="marie-catherine-de-marneffe"><first>Marie-Catherine</first><last>de Marneffe</last></author>
       <author><first>Christopher</first><last>Potts</last></author>
       <pages>627–633</pages>
       <url hash="ff4d2d01">N13-1071</url>
@@ -688,9 +688,9 @@
     </paper>
     <paper id="73">
       <title>A Simple, Fast, and Effective Reparameterization of <fixed-case>IBM</fixed-case> Model 2</title>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <author><first>Victor</first><last>Chahuneau</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>644–648</pages>
       <url hash="8237f226">N13-1073</url>
       <bibkey>dyer-etal-2013-simple</bibkey>
@@ -699,7 +699,7 @@
     <paper id="74">
       <title>Phrase Training Based Adaptation for Statistical Machine Translation</title>
       <author><first>Saab</first><last>Mansour</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>649–654</pages>
       <url hash="9f6afade">N13-1074</url>
       <bibkey>mansour-ney-2013-phrase</bibkey>
@@ -709,7 +709,7 @@
       <title>Translation Acquisition Using Synonym Sets</title>
       <author><first>Daniel</first><last>Andrade</last></author>
       <author><first>Masaaki</first><last>Tsuchida</last></author>
-      <author><first>Takashi</first><last>Onishi</last></author>
+      <author id="takashi-onishi"><first>Takashi</first><last>Onishi</last></author>
       <author><first>Kai</first><last>Ishikawa</last></author>
       <pages>655–660</pages>
       <url hash="32ee4877">N13-1075</url>
@@ -720,9 +720,9 @@
       <title>Supersense Tagging for <fixed-case>A</fixed-case>rabic: the <fixed-case>MT</fixed-case>-in-the-Middle Attack</title>
       <author><first>Nathan</first><last>Schneider</last></author>
       <author><first>Behrang</first><last>Mohit</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <author><first>Kemal</first><last>Oflazer</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>661–667</pages>
       <url hash="f46aba7b">N13-1076</url>
       <bibkey>schneider-etal-2013-supersense</bibkey>
@@ -730,7 +730,7 @@
     </paper>
     <paper id="77">
       <title><fixed-case>Z</fixed-case>ipfian corruptions for robust <fixed-case>POS</fixed-case> tagging</title>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>668–672</pages>
       <url hash="e3379688">N13-1077</url>
       <bibkey>sogaard-2013-zipfian</bibkey>
@@ -758,8 +758,8 @@
       <title>What’s in a Domain? Multi-Domain Learning for Multi-Attribute Data</title>
       <author><first>Mahesh</first><last>Joshi</last></author>
       <author><first>Mark</first><last>Dredze</last></author>
-      <author><first>William W.</first><last>Cohen</last></author>
-      <author><first>Carolyn P.</first><last>Rosé</last></author>
+      <author id="william-cohen"><first>William W.</first><last>Cohen</last></author>
+      <author id="carolyn-rose"><first>Carolyn P.</first><last>Rosé</last></author>
       <pages>685–690</pages>
       <url hash="1aa6ee28">N13-1080</url>
       <bibkey>joshi-etal-2013-whats</bibkey>
@@ -767,7 +767,7 @@
     </paper>
     <paper id="81">
       <title>An opinion about opinions about opinions: subjectivity and the aggregate reader</title>
-      <author><first>Asad</first><last>Sayeed</last></author>
+      <author id="asad-sayeed"><first>Asad</first><last>Sayeed</last></author>
       <pages>691–696</pages>
       <url hash="6d7ff9d5">N13-1081</url>
       <bibkey>sayeed-2013-opinion</bibkey>
@@ -777,7 +777,7 @@
       <title>An Examination of Regret in Bullying Tweets</title>
       <author><first>Jun-Ming</first><last>Xu</last></author>
       <author><first>Benjamin</first><last>Burchfiel</last></author>
-      <author><first>Xiaojin</first><last>Zhu</last></author>
+      <author id="xiaojin-zhu"><first>Xiaojin</first><last>Zhu</last></author>
       <author><first>Amy</first><last>Bellmore</last></author>
       <pages>697–702</pages>
       <url hash="0fc97111">N13-1082</url>
@@ -786,10 +786,10 @@
     </paper>
     <paper id="83">
       <title>A Cross-language Study on Automatic Speech Disfluency Detection</title>
-      <author><first>Wen</first><last>Wang</last></author>
-      <author><first>Andreas</first><last>Stolcke</last></author>
+      <author id="wen-wang"><first>Wen</first><last>Wang</last></author>
+      <author id="andreas-stolcke"><first>Andreas</first><last>Stolcke</last></author>
       <author><first>Jiahong</first><last>Yuan</last></author>
-      <author><first>Mark</first><last>Liberman</last></author>
+      <author id="mark-liberman"><first>Mark</first><last>Liberman</last></author>
       <pages>703–708</pages>
       <url hash="b8fdd867">N13-1083</url>
       <bibkey>wang-etal-2013-cross</bibkey>
@@ -798,7 +798,7 @@
     <paper id="84">
       <title>Distributional semantic models for the evaluation of disordered language</title>
       <author><first>Masoud</first><last>Rouhizadeh</last></author>
-      <author><first>Emily</first><last>Prud’hommeaux</last></author>
+      <author id="emily-prudhommeaux"><first>Emily</first><last>Prud’hommeaux</last></author>
       <author><first>Brian</first><last>Roark</last></author>
       <author><first>Jan</first><last>van Santen</last></author>
       <pages>709–714</pages>
@@ -809,7 +809,7 @@
     <paper id="85">
       <title>Atypical Prosodic Structure as an Indicator of Reading Level and Text Difficulty</title>
       <author><first>Julie</first><last>Medero</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
       <pages>715–720</pages>
       <url hash="b50c8a57">N13-1085</url>
       <bibkey>medero-ostendorf-2013-atypical</bibkey>
@@ -820,7 +820,7 @@
       <author><first>Kai</first><last>Wei</last></author>
       <author><first>Yuzong</first><last>Liu</last></author>
       <author><first>Katrin</first><last>Kirchhoff</last></author>
-      <author><first>Jeff</first><last>Bilmes</last></author>
+      <author id="jeff-bilmes"><first>Jeff</first><last>Bilmes</last></author>
       <pages>721–726</pages>
       <url hash="27596b22">N13-1086</url>
       <bibkey>wei-etal-2013-using</bibkey>
@@ -829,7 +829,7 @@
     <paper id="87">
       <title>Semi-Supervised Discriminative Language Modeling with Out-of-Domain Text Data</title>
       <author><first>Arda</first><last>Çelebi</last></author>
-      <author><first>Murat</first><last>Saraçlar</last></author>
+      <author id="murat-saraclar"><first>Murat</first><last>Saraçlar</last></author>
       <pages>727–732</pages>
       <url hash="814a891e">N13-1087</url>
       <bibkey>celebi-saraclar-2013-semi</bibkey>
@@ -839,7 +839,7 @@
       <title>More than meets the eye: Study of Human Cognition in Sense Annotation</title>
       <author><first>Salil</first><last>Joshi</last></author>
       <author><first>Diptesh</first><last>Kanojia</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>733–738</pages>
       <url hash="f9f31d4b">N13-1088</url>
       <bibkey>joshi-etal-2013-meets</bibkey>
@@ -848,7 +848,7 @@
     <paper id="89">
       <title>Improving Lexical Semantics for Sentential Semantics: Modeling Selectional Preference and Similar Words in a Latent Variable Model</title>
       <author><first>Weiwei</first><last>Guo</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>739–745</pages>
       <url hash="34c81c66">N13-1089</url>
       <bibkey>guo-diab-2013-improving</bibkey>
@@ -856,9 +856,9 @@
     </paper>
     <paper id="90">
       <title>Linguistic Regularities in Continuous Space Word Representations</title>
-      <author><first>Tomas</first><last>Mikolov</last></author>
-      <author><first>Wen-tau</first><last>Yih</last></author>
-      <author><first>Geoffrey</first><last>Zweig</last></author>
+      <author id="tomas-mikolov"><first>Tomas</first><last>Mikolov</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
+      <author id="geoffrey-zweig"><first>Geoffrey</first><last>Zweig</last></author>
       <pages>746–751</pages>
       <url hash="6e10533f">N13-1090</url>
       <bibkey>mikolov-etal-2013-linguistic</bibkey>
@@ -887,7 +887,7 @@
     <paper id="93">
       <title>Exploiting the Scope of Negations and Heterogeneous Features for Relation Extraction: A Case Study for Drug-Drug Interaction Extraction</title>
       <author><first>Md. Faisal Mahbub</first><last>Chowdhury</last></author>
-      <author><first>Alberto</first><last>Lavelli</last></author>
+      <author id="alberto-lavelli"><first>Alberto</first><last>Lavelli</last></author>
       <pages>765–771</pages>
       <url hash="7ebcf400">N13-1093</url>
       <bibkey>chowdhury-lavelli-2013-exploiting</bibkey>
@@ -896,7 +896,7 @@
     <paper id="94">
       <title>Graph-Based Seed Set Expansion for Relation Extraction Using Random Walk Hitting Times</title>
       <author><first>Joel</first><last>Lang</last></author>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <pages>772–776</pages>
       <url hash="97199c4c">N13-1094</url>
       <bibkey>lang-henderson-2013-graph</bibkey>
@@ -905,7 +905,7 @@
     <paper id="95">
       <title>Distant Supervision for Relation Extraction with an Incomplete Knowledge Base</title>
       <author><first>Bonan</first><last>Min</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <author><first>Li</first><last>Wan</last></author>
       <author><first>Chang</first><last>Wang</last></author>
       <author><first>David</first><last>Gondek</last></author>
@@ -917,9 +917,9 @@
     <paper id="96">
       <title>Measuring the Structural Importance through Rhetorical Structure Index</title>
       <author><first>Narine</first><last>Kokhlikyan</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <author><first>Yuqi</first><last>Zhang</last></author>
-      <author><first>Joy Ying</first><last>Zhang</last></author>
+      <author id="ying-zhang"><first>Joy Ying</first><last>Zhang</last></author>
       <pages>783–788</pages>
       <url hash="2d5a22eb">N13-1096</url>
       <bibkey>kokhlikyan-etal-2013-measuring</bibkey>
@@ -928,7 +928,7 @@
     <paper id="97">
       <title>Separating Fact from Fear: Tracking Flu Infections on <fixed-case>T</fixed-case>witter</title>
       <author><first>Alex</first><last>Lamb</last></author>
-      <author><first>Michael J.</first><last>Paul</last></author>
+      <author id="michael-paul"><first>Michael J.</first><last>Paul</last></author>
       <author><first>Mark</first><last>Dredze</last></author>
       <pages>789–795</pages>
       <url hash="55731683">N13-1097</url>
@@ -938,7 +938,7 @@
     <paper id="98">
       <title>Differences in User Responses to a <fixed-case>W</fixed-case>izard-of-<fixed-case>O</fixed-case>z versus Automated System</title>
       <author><first>Jesse</first><last>Thomason</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <pages>796–801</pages>
       <url hash="9346ea14">N13-1098</url>
       <bibkey>thomason-litman-2013-differences</bibkey>
@@ -948,7 +948,7 @@
       <title>Improving the Quality of Minority Class Identification in Dialog Act Tagging</title>
       <author><first>Adinoyi</first><last>Omuya</last></author>
       <author><first>Vinodkumar</first><last>Prabhakaran</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>802–807</pages>
       <url hash="f7bc8f71">N13-1099</url>
       <bibkey>omuya-etal-2013-improving</bibkey>
@@ -993,7 +993,7 @@
     </paper>
     <paper id="104">
       <title>Probabilistic Frame Induction</title>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <author><first>Hoifung</first><last>Poon</last></author>
       <author><first>Lucy</first><last>Vanderwende</last></author>
       <pages>837–846</pages>
@@ -1026,7 +1026,7 @@
       <title>Open Information Extraction with Tree Kernels</title>
       <author><first>Ying</first><last>Xu</last></author>
       <author><first>Mi-Young</first><last>Kim</last></author>
-      <author><first>Kevin</first><last>Quinn</last></author>
+      <author id="kevin-m-quinn"><first>Kevin</first><last>Quinn</last></author>
       <author><first>Randy</first><last>Goebel</last></author>
       <author><first>Denilson</first><last>Barbosa</last></author>
       <pages>868–877</pages>
@@ -1038,11 +1038,11 @@
     </paper>
     <paper id="108">
       <title>Finding What Matters in Questions</title>
-      <author><first>Xiaoqiang</first><last>Luo</last></author>
+      <author id="xiaoqiang-luo"><first>Xiaoqiang</first><last>Luo</last></author>
       <author><first>Hema</first><last>Raghavan</last></author>
       <author><first>Vittorio</first><last>Castelli</last></author>
       <author><first>Sameer</first><last>Maskey</last></author>
-      <author><first>Radu</first><last>Florian</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
       <pages>878–887</pages>
       <url hash="391273a3">N13-1108</url>
       <bibkey>luo-etal-2013-finding</bibkey>
@@ -1053,7 +1053,7 @@
       <author><first>Hyun-Je</first><last>Song</last></author>
       <author><first>Junho</first><last>Go</last></author>
       <author><first>Seong-Bae</first><last>Park</last></author>
-      <author><first>Se-Young</first><last>Park</last></author>
+      <author id="se-young-park"><first>Se-Young</first><last>Park</last></author>
       <pages>888–896</pages>
       <url hash="3c2fbcd0">N13-1109</url>
       <bibkey>song-etal-2013-just</bibkey>
@@ -1063,7 +1063,7 @@
       <title>Same Referent, Different Words: Unsupervised Mining of Opaque Coreferent Mentions</title>
       <author><first>Marta</first><last>Recasens</last></author>
       <author><first>Matthew</first><last>Can</last></author>
-      <author><first>Daniel</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Daniel</first><last>Jurafsky</last></author>
       <pages>897–906</pages>
       <url hash="2a1cbac1">N13-1110</url>
       <bibkey>recasens-etal-2013-referent</bibkey>
@@ -1110,7 +1110,7 @@
     </paper>
     <paper id="115">
       <title>Multi-Metric Optimization Using Ensemble Tuning</title>
-      <author><first>Baskaran</first><last>Sankaran</last></author>
+      <author id="baskaran-sankaran"><first>Baskaran</first><last>Sankaran</last></author>
       <author><first>Anoop</first><last>Sarkar</last></author>
       <author><first>Kevin</first><last>Duh</last></author>
       <pages>947–957</pages>
@@ -1122,7 +1122,7 @@
       <title>Grouping Language Model Boundary Words to Speed K–Best Extraction from Hypergraphs</title>
       <author><first>Kenneth</first><last>Heafield</last></author>
       <author><first>Philipp</first><last>Koehn</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <pages>958–968</pages>
       <url hash="9e305675">N13-1116</url>
       <bibkey>heafield-etal-2013-grouping</bibkey>
@@ -1131,7 +1131,7 @@
     <paper id="117">
       <title>A Systematic <fixed-case>B</fixed-case>ayesian Treatment of the <fixed-case>IBM</fixed-case> Alignment Models</title>
       <author><first>Yarin</first><last>Gal</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
       <pages>969–977</pages>
       <url hash="09b1d913">N13-1117</url>
       <bibkey>gal-blunsom-2013-systematic</bibkey>
@@ -1149,7 +1149,7 @@
     <paper id="119">
       <title>Three Knowledge-Free Methods for Automatic Lexical Chain Extraction</title>
       <author><first>Steffen</first><last>Remus</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>989–999</pages>
       <url hash="14bd5b42">N13-1119</url>
       <bibkey>remus-biemann-2013-three</bibkey>
@@ -1158,10 +1158,10 @@
     <paper id="120">
       <title>Combining Heterogeneous Models for Measuring Relational Similarity</title>
       <author><first>Alisa</first><last>Zhila</last></author>
-      <author><first>Wen-tau</first><last>Yih</last></author>
-      <author><first>Christopher</first><last>Meek</last></author>
-      <author><first>Geoffrey</first><last>Zweig</last></author>
-      <author><first>Tomas</first><last>Mikolov</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
+      <author id="christopher-meek"><first>Christopher</first><last>Meek</last></author>
+      <author id="geoffrey-zweig"><first>Geoffrey</first><last>Zweig</last></author>
+      <author id="tomas-mikolov"><first>Tomas</first><last>Mikolov</last></author>
       <pages>1000–1009</pages>
       <url hash="5d959deb">N13-1120</url>
       <attachment type="presentation" hash="530133e6">N13-1120.Presentation.pptx</attachment>
@@ -1235,7 +1235,7 @@
       <author><first>Adam</first><last>Vogel</last></author>
       <author><first>Max</first><last>Bodoia</last></author>
       <author><first>Christopher</first><last>Potts</last></author>
-      <author><first>Daniel</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Daniel</first><last>Jurafsky</last></author>
       <pages>1072–1081</pages>
       <url hash="df1b03e6">N13-1127</url>
       <bibkey>vogel-etal-2013-emergence</bibkey>
@@ -1244,7 +1244,7 @@
     <paper id="128">
       <title>Open Dialogue Management for Relational Databases</title>
       <author><first>Ben</first><last>Hixon</last></author>
-      <author><first>Rebecca J.</first><last>Passonneau</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca J.</first><last>Passonneau</last></author>
       <pages>1082–1091</pages>
       <url hash="f815dc57">N13-1128</url>
       <bibkey>hixon-passonneau-2013-open</bibkey>
@@ -1253,7 +1253,7 @@
     <paper id="129">
       <title>A method for the approximation of incremental understanding of explicit utterance meaning using predictive models in finite domains</title>
       <author><first>David</first><last>DeVault</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <pages>1092–1099</pages>
       <url hash="278208fd">N13-1129</url>
       <bibkey>devault-traum-2013-method</bibkey>
@@ -1271,7 +1271,7 @@
     <paper id="131">
       <title>Labeling the Languages of Words in Mixed-Language Documents using Weakly Supervised Methods</title>
       <author><first>Ben</first><last>King</last></author>
-      <author><first>Steven</first><last>Abney</last></author>
+      <author id="steven-abney"><first>Steven</first><last>Abney</last></author>
       <pages>1110–1119</pages>
       <url hash="cd52e21e">N13-1131</url>
       <bibkey>king-abney-2013-labeling</bibkey>
@@ -1282,7 +1282,7 @@
       <author><first>Dirk</first><last>Hovy</last></author>
       <author><first>Taylor</first><last>Berg-Kirkpatrick</last></author>
       <author><first>Ashish</first><last>Vaswani</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>1120–1130</pages>
       <url hash="f182e8b0">N13-1132</url>
       <bibkey>hovy-etal-2013-learning</bibkey>
@@ -1291,7 +1291,7 @@
     <paper id="133">
       <title>Supervised All-Words Lexical Substitution using Delexicalized Features</title>
       <author><first>György</first><last>Szarvas</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <author><first>Iryna</first><last>Gurevych</last></author>
       <pages>1131–1141</pages>
       <url hash="99e98409">N13-1133</url>
@@ -1300,7 +1300,7 @@
     </paper>
     <paper id="134">
       <title>A Tensor-based Factorization Model of Semantic Compositionality</title>
-      <author><first>Tim</first><last>Van de Cruys</last></author>
+      <author id="tim-van-de-cruys"><first>Tim</first><last>Van de Cruys</last></author>
       <author><first>Thierry</first><last>Poibeau</last></author>
       <author><first>Anna</first><last>Korhonen</last></author>
       <pages>1142–1151</pages>
@@ -1323,7 +1323,7 @@
       <title>Towards Coherent Multi-Document Summarization</title>
       <author><first>Janara</first><last>Christensen</last></author>
       <author><first/><last>Mausam</last></author>
-      <author><first>Stephen</first><last>Soderland</last></author>
+      <author id="stephen-soderland"><first>Stephen</first><last>Soderland</last></author>
       <author><first>Oren</first><last>Etzioni</last></author>
       <pages>1163–1173</pages>
       <url hash="29fea217">N13-1136</url>
@@ -1333,7 +1333,7 @@
     <paper id="137">
       <title>Generating Expressions that Refer to Visible Objects</title>
       <author><first>Margaret</first><last>Mitchell</last></author>
-      <author><first>Kees</first><last>van Deemter</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
       <author><first>Ehud</first><last>Reiter</last></author>
       <pages>1174–1184</pages>
       <url hash="bf5d3eec">N13-1137</url>
@@ -1361,8 +1361,8 @@
     <paper id="140">
       <title>Knowledge-Rich Morphological Priors for <fixed-case>B</fixed-case>ayesian Language Models</title>
       <author><first>Victor</first><last>Chahuneau</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <pages>1206–1215</pages>
       <url hash="e1d80031">N13-1140</url>
       <bibkey>chahuneau-etal-2013-knowledge</bibkey>
@@ -1376,7 +1376,7 @@
       <editor><first>Annie</first><last>Louis</last></editor>
       <editor><first>Richard</first><last>Socher</last></editor>
       <editor><first>Julia</first><last>Hockenmaier</last></editor>
-      <editor><first>Eric K.</first><last>Ringger</last></editor>
+      <editor id="eric-ringger"><first>Eric K.</first><last>Ringger</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Atlanta, Georgia</address>
       <month>June</month>
@@ -1389,7 +1389,7 @@
     </frontmatter>
     <paper id="1">
       <title>Critical Reflections on Evaluation Practices in Coreference Resolution</title>
-      <author><first>Gordana Ilić</first><last>Holen</last></author>
+      <author id="gordana-ilic-holen"><first>Gordana Ilić</first><last>Holen</last></author>
       <pages>1–7</pages>
       <url hash="1c5326bf">N13-2001</url>
       <bibkey>holen-2013-critical</bibkey>
@@ -1397,7 +1397,7 @@
     <paper id="2">
       <title>Reducing Annotation Effort on Unbalanced Corpus based on Cost Matrix</title>
       <author><first>Wencan</first><last>Luo</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <author><first>Joel</first><last>Chan</last></author>
       <pages>8–15</pages>
       <url hash="4a27d44a">N13-2002</url>
@@ -1407,7 +1407,7 @@
       <title>A Machine Learning Approach to Automatic Term Extraction using a Rich Feature Set</title>
       <author><first>Merley</first><last>Conrado</last></author>
       <author><first>Thiago</first><last>Pardo</last></author>
-      <author><first>Solange</first><last>Rezende</last></author>
+      <author id="solange-oliveira-rezende"><first>Solange</first><last>Rezende</last></author>
       <pages>16–23</pages>
       <url hash="fdfe4a77">N13-2003</url>
       <bibkey>conrado-etal-2013-machine</bibkey>
@@ -1430,7 +1430,7 @@
     </paper>
     <paper id="6">
       <title>Ontology Label Translation</title>
-      <author><first>Mihael</first><last>Arcan</last></author>
+      <author id="mihael-arcan"><first>Mihael</first><last>Arcan</last></author>
       <author><first>Paul</first><last>Buitelaar</last></author>
       <pages>40–46</pages>
       <url hash="a4c33cf4">N13-2006</url>
@@ -1492,7 +1492,7 @@
     <meta>
       <booktitle>Proceedings of the 2013 <fixed-case>NAACL</fixed-case> <fixed-case>HLT</fixed-case> Demonstration Session</booktitle>
       <url hash="5dc7fde5">N13-3</url>
-      <editor><first>Chris</first><last>Dyer</last></editor>
+      <editor id="chris-dyer"><first>Chris</first><last>Dyer</last></editor>
       <editor><first>Derrick</first><last>Higgins</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Atlanta, Georgia</address>
@@ -1517,7 +1517,7 @@
       <author><first>Justin</first><last>Snyder</last></author>
       <author><first>Rebecca</first><last>Knowles</last></author>
       <author><first>Mark</first><last>Dredze</last></author>
-      <author><first>Matthew</first><last>Gormley</last></author>
+      <author id="matthew-r-gormley"><first>Matthew</first><last>Gormley</last></author>
       <author><first>Travis</first><last>Wolfe</last></author>
       <pages>5–9</pages>
       <url hash="06658a24">N13-3002</url>
@@ -1525,10 +1525,10 @@
     </paper>
     <paper id="3">
       <title><fixed-case>TMT</fixed-case>prime: A Recommender System for <fixed-case>MT</fixed-case> and <fixed-case>TM</fixed-case> Integration</title>
-      <author><first>Aswarth Abhilash</first><last>Dara</last></author>
-      <author><first>Sandipan</first><last>Dandapat</last></author>
+      <author id="aswarth-abhilash-dara"><first>Aswarth Abhilash</first><last>Dara</last></author>
+      <author id="sandipan-dandapat"><first>Sandipan</first><last>Dandapat</last></author>
       <author><first>Declan</first><last>Groves</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>10–13</pages>
       <url hash="890c65ed">N13-3003</url>
       <bibkey>dara-etal-2013-tmtprime</bibkey>
@@ -1544,7 +1544,7 @@
     <paper id="5">
       <title>A Web Application for the Diagnostic Evaluation of Machine Translation over Specific Linguistic Phenomena</title>
       <author><first>Antonio</first><last>Toral</last></author>
-      <author><first>Sudip</first><last>Kumar Naskar</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip</first><last>Kumar Naskar</last></author>
       <author><first>Joris</first><last>Vreeke</last></author>
       <author><first>Federico</first><last>Gaspari</last></author>
       <author><first>Declan</first><last>Groves</last></author>
@@ -1562,9 +1562,9 @@
     </paper>
     <paper id="7">
       <title><fixed-case>UMLS</fixed-case>::<fixed-case>S</fixed-case>imilarity: Measuring the Relatedness and Similarity of Biomedical Concepts</title>
-      <author><first>Bridget</first><last>McInnes</last></author>
+      <author id="bridget-mcinnes"><first>Bridget</first><last>McInnes</last></author>
       <author><first>Ted</first><last>Pedersen</last></author>
-      <author><first>Serguei</first><last>Pakhomov</last></author>
+      <author id="serguei-pakhomov"><first>Serguei</first><last>Pakhomov</last></author>
       <author><first>Ying</first><last>Liu</last></author>
       <author><first>Genevieve</first><last>Melton-Meaux</last></author>
       <pages>28–31</pages>
@@ -1575,11 +1575,11 @@
       <title><fixed-case>KELVIN</fixed-case>: a tool for automated knowledge base construction</title>
       <author><first>Paul</first><last>McNamee</last></author>
       <author><first>James</first><last>Mayfield</last></author>
-      <author><first>Tim</first><last>Finin</last></author>
+      <author id="tim-finin"><first>Tim</first><last>Finin</last></author>
       <author><first>Tim</first><last>Oates</last></author>
       <author><first>Dawn</first><last>Lawrie</last></author>
       <author><first>Tan</first><last>Xu</last></author>
-      <author><first>Douglas</first><last>Oard</last></author>
+      <author id="douglas-w-oard"><first>Douglas</first><last>Oard</last></author>
       <pages>32–35</pages>
       <url hash="109652a1">N13-3008</url>
       <bibkey>mcnamee-etal-2013-kelvin</bibkey>
@@ -1614,7 +1614,7 @@
     <paper id="1">
       <title>Deep Learning for <fixed-case>NLP</fixed-case> (without Magic)</title>
       <author><first>Richard</first><last>Socher</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>1–3</pages>
       <url hash="5eeea463">N13-4001</url>
       <bibkey>socher-manning-2013-deep</bibkey>
@@ -1642,7 +1642,7 @@
     </paper>
     <paper id="4">
       <title>Semantic Role Labeling</title>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Ivan</first><last>Titov</last></author>
       <author><first>Shumin</first><last>Wu</last></author>
       <pages>10–12</pages>
@@ -1654,11 +1654,11 @@
     </paper>
     <paper id="5">
       <title>Spectral Learning Algorithms for Natural Language Processing</title>
-      <author><first>Shay</first><last>Cohen</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
-      <author><first>Dean</first><last>Foster</last></author>
+      <author id="shay-b-cohen"><first>Shay</first><last>Cohen</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
+      <author id="dean-foster"><first>Dean</first><last>Foster</last></author>
       <author><first>Karl</first><last>Stratos</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <pages>13–15</pages>
       <url hash="f70ffe09">N13-4005</url>
       <bibkey>cohen-etal-2013-spectral</bibkey>
@@ -1667,7 +1667,7 @@
     </paper>
     <paper id="6">
       <title>Morphological, Syntactical and Semantic Knowledge in Statistical Machine Translation</title>
-      <author><first>Marta</first><last>Ruiz Costa-jussà</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta</first><last>Ruiz Costa-jussà</last></author>
       <author><first>Chris</first><last>Quirk</last></author>
       <pages>16–18</pages>
       <url hash="8feb8767">N13-4006</url>
diff --git a/data/xml/N15.xml b/data/xml/N15.xml
index cfcc4a1fac..33f74ee670 100644
--- a/data/xml/N15.xml
+++ b/data/xml/N15.xml
@@ -4,8 +4,8 @@
     <meta>
       <booktitle>Proceedings of the 2015 Conference of the North <fixed-case>A</fixed-case>merican Chapter of the Association for Computational Linguistics: Human Language Technologies</booktitle>
       <url hash="b789b4ea">N15-1</url>
-      <editor><first>Rada</first><last>Mihalcea</last></editor>
-      <editor><first>Joyce</first><last>Chai</last></editor>
+      <editor id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></editor>
+      <editor id="joyce-chai"><first>Joyce</first><last>Chai</last></editor>
       <editor><first>Anoop</first><last>Sarkar</last></editor>
       <doi>10.3115/v1/N15-1</doi>
       <publisher>Association for Computational Linguistics</publisher>
@@ -42,7 +42,7 @@
     <paper id="3">
       <title>Improving unsupervised vector-space thematic fit evaluation via role-filler prototype clustering</title>
       <author><first>Clayton</first><last>Greenberg</last></author>
-      <author><first>Asad</first><last>Sayeed</last></author>
+      <author id="asad-sayeed"><first>Asad</first><last>Sayeed</last></author>
       <author><first>Vera</first><last>Demberg</last></author>
       <pages>21–31</pages>
       <url hash="8901bbc9">N15-1003</url>
@@ -53,9 +53,9 @@
       <title>A Compositional and Interpretable Semantic Space</title>
       <author><first>Alona</first><last>Fyshe</last></author>
       <author><first>Leila</first><last>Wehbe</last></author>
-      <author><first>Partha P.</first><last>Talukdar</last></author>
+      <author id="partha-talukdar"><first>Partha P.</first><last>Talukdar</last></author>
       <author><first>Brian</first><last>Murphy</last></author>
-      <author><first>Tom M.</first><last>Mitchell</last></author>
+      <author id="tom-mitchell"><first>Tom M.</first><last>Mitchell</last></author>
       <pages>32–41</pages>
       <url hash="76c39b5c">N15-1004</url>
       <doi>10.3115/v1/N15-1004</doi>
@@ -79,7 +79,7 @@
       <author><first>Bharat Ram</first><last>Ambati</last></author>
       <author><first>Tejaswini</first><last>Deoskar</last></author>
       <author><first>Mark</first><last>Johnson</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>53–63</pages>
       <url hash="735243a5">N15-1006</url>
       <doi>10.3115/v1/N15-1006</doi>
@@ -88,8 +88,8 @@
     <paper id="7">
       <title>Because Syntax Does Matter: Improving Predicate-Argument Structures Parsing with Syntactic Features</title>
       <author><first>Corentin</first><last>Ribeyre</last></author>
-      <author><first>Eric</first><last>Villemonte de la Clergerie</last></author>
-      <author><first>Djamé</first><last>Seddah</last></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Eric</first><last>Villemonte de la Clergerie</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
       <pages>64–74</pages>
       <url hash="d03030fa">N15-1007</url>
       <doi>10.3115/v1/N15-1007</doi>
@@ -109,7 +109,7 @@
       <author><first>Young-Bum</first><last>Kim</last></author>
       <author><first>Minwoo</first><last>Jeong</last></author>
       <author><first>Karl</first><last>Stratos</last></author>
-      <author><first>Ruhi</first><last>Sarikaya</last></author>
+      <author id="ruhi-sarikaya"><first>Ruhi</first><last>Sarikaya</last></author>
       <pages>84–92</pages>
       <url hash="e397e977">N15-1009</url>
       <doi>10.3115/v1/N15-1009</doi>
@@ -119,7 +119,7 @@
       <title>Not All Character N-grams Are Created Equal: A Study in Authorship Attribution</title>
       <author><first>Upendra</first><last>Sapkota</last></author>
       <author><first>Steven</first><last>Bethard</last></author>
-      <author><first>Manuel</first><last>Montes</last></author>
+      <author id="manuel-montes"><first>Manuel</first><last>Montes</last></author>
       <author><first>Thamar</first><last>Solorio</last></author>
       <pages>93–102</pages>
       <url hash="dec692a0">N15-1010</url>
@@ -128,7 +128,7 @@
     </paper>
     <paper id="11">
       <title>Effective Use of Word Order for Text Categorization with Convolutional Neural Networks</title>
-      <author><first>Rie</first><last>Johnson</last></author>
+      <author id="rie-johnson"><first>Rie</first><last>Johnson</last></author>
       <author><first>Tong</first><last>Zhang</last></author>
       <pages>103–112</pages>
       <url hash="74db37fd">N15-1011</url>
@@ -184,8 +184,8 @@
     <paper id="16">
       <title>Combining Language and Vision with a Multimodal Skip-gram Model</title>
       <author><first>Angeliki</first><last>Lazaridou</last></author>
-      <author><first>Nghia The</first><last>Pham</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="nghia-the-pham"><first>Nghia The</first><last>Pham</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <pages>153–163</pages>
       <url hash="ad2df4d7">N15-1016</url>
       <doi>10.3115/v1/N15-1016</doi>
@@ -194,7 +194,7 @@
     <paper id="17">
       <title>Discriminative Unsupervised Alignment of Natural Language Instructions with Corresponding Video Segments</title>
       <author><first>Iftekhar</first><last>Naim</last></author>
-      <author><first>Young C.</first><last>Song</last></author>
+      <author id="young-chol-song"><first>Young C.</first><last>Song</last></author>
       <author><first>Qiguang</first><last>Liu</last></author>
       <author><first>Liang</first><last>Huang</last></author>
       <author><first>Henry</first><last>Kautz</last></author>
@@ -209,7 +209,7 @@
       <title><fixed-case>T</fixed-case>opic<fixed-case>C</fixed-case>heck: Interactive Alignment for Assessing Topic Model Stability</title>
       <author><first>Jason</first><last>Chuang</last></author>
       <author><first>Margaret E.</first><last>Roberts</last></author>
-      <author><first>Brandon M.</first><last>Stewart</last></author>
+      <author id="brandon-m-stewart"><first>Brandon M.</first><last>Stewart</last></author>
       <author><first>Rebecca</first><last>Weiss</last></author>
       <author><first>Dustin</first><last>Tingley</last></author>
       <author><first>Justin</first><last>Grimmer</last></author>
@@ -221,7 +221,7 @@
     </paper>
     <paper id="19">
       <title>Inferring latent attributes of <fixed-case>T</fixed-case>witter users with label regularization</title>
-      <author><first>Ehsan</first><last>Mohammady Ardehaly</last></author>
+      <author id="ehsan-mohammady-ardehaly"><first>Ehsan</first><last>Mohammady Ardehaly</last></author>
       <author><first>Aron</first><last>Culotta</last></author>
       <pages>185–195</pages>
       <url hash="c511ec68">N15-1019</url>
@@ -236,9 +236,9 @@
       <author><first>Chris</first><last>Brockett</last></author>
       <author><first>Yangfeng</first><last>Ji</last></author>
       <author><first>Margaret</first><last>Mitchell</last></author>
-      <author><first>Jian-Yun</first><last>Nie</last></author>
+      <author id="jian-yun-nie"><first>Jian-Yun</first><last>Nie</last></author>
       <author><first>Jianfeng</first><last>Gao</last></author>
-      <author><first>Bill</first><last>Dolan</last></author>
+      <author id="william-b-dolan"><first>Bill</first><last>Dolan</last></author>
       <pages>196–205</pages>
       <url hash="c68b04f6">N15-1020</url>
       <doi>10.3115/v1/N15-1020</doi>
@@ -257,7 +257,7 @@
       <title>Aligning Sentences from Standard <fixed-case>W</fixed-case>ikipedia to <fixed-case>S</fixed-case>imple <fixed-case>W</fixed-case>ikipedia</title>
       <author><first>William</first><last>Hwang</last></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
       <author><first>Wei</first><last>Wu</last></author>
       <pages>211–217</pages>
       <url hash="bfacc96a">N15-1022</url>
@@ -285,7 +285,7 @@
     <paper id="25">
       <title>Spinning Straw into Gold: Using Free Text to Train Monolingual Alignment Models for Non-factoid Question Answering</title>
       <author><first>Rebecca</first><last>Sharp</last></author>
-      <author><first>Peter</first><last>Jansen</last></author>
+      <author id="peter-jansen"><first>Peter</first><last>Jansen</last></author>
       <author><first>Mihai</first><last>Surdeanu</last></author>
       <author><first>Peter</first><last>Clark</last></author>
       <pages>231–237</pages>
@@ -297,7 +297,7 @@
       <title>Personalized Page Rank for Named Entity Disambiguation</title>
       <author><first>Maria</first><last>Pershina</last></author>
       <author><first>Yifan</first><last>He</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <pages>238–243</pages>
       <url hash="e377f79e">N15-1026</url>
       <doi>10.3115/v1/N15-1026</doi>
@@ -346,7 +346,7 @@
     </paper>
     <paper id="31">
       <title>Incrementally Tracking Reference in Human/Human Dialogue Using Linguistic and Extra-Linguistic Information</title>
-      <author><first>Casey</first><last>Kennington</last></author>
+      <author id="casey-kennington"><first>Casey</first><last>Kennington</last></author>
       <author><first>Ryu</first><last>Iida</last></author>
       <author><first>Takenobu</first><last>Tokunaga</last></author>
       <author><first>David</first><last>Schlangen</last></author>
@@ -405,7 +405,7 @@
     </paper>
     <paper id="37">
       <title>Interpreting Compound Noun Phrases Using Web Search Queries</title>
-      <author><first>Marius</first><last>Paşca</last></author>
+      <author id="marius-pasca"><first>Marius</first><last>Paşca</last></author>
       <pages>335–344</pages>
       <url hash="71eb323e">N15-1037</url>
       <doi>10.3115/v1/N15-1037</doi>
@@ -413,10 +413,10 @@
     </paper>
     <paper id="38">
       <title>Lexicon-Free Conversational Speech Recognition with Neural Networks</title>
-      <author><first>Andrew</first><last>Maas</last></author>
+      <author id="andrew-maas"><first>Andrew</first><last>Maas</last></author>
       <author><first>Ziang</first><last>Xie</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
-      <author><first>Andrew</first><last>Ng</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
+      <author id="andrew-y-ng"><first>Andrew</first><last>Ng</last></author>
       <pages>345–354</pages>
       <url hash="21eacb1a">N15-1038</url>
       <doi>10.3115/v1/N15-1038</doi>
@@ -435,7 +435,7 @@
       <title>A Transition-based Algorithm for <fixed-case>AMR</fixed-case> Parsing</title>
       <author><first>Chuan</first><last>Wang</last></author>
       <author><first>Nianwen</first><last>Xue</last></author>
-      <author><first>Sameer</first><last>Pradhan</last></author>
+      <author id="sameer-pradhan"><first>Sameer</first><last>Pradhan</last></author>
       <pages>366–375</pages>
       <url hash="1b481ed3">N15-1040</url>
       <doi>10.3115/v1/N15-1040</doi>
@@ -464,7 +464,7 @@
     <paper id="43">
       <title>Latent Domain Word Alignment for Heterogeneous Corpora</title>
       <author><first>Hoang</first><last>Cuong</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <pages>398–408</pages>
       <url hash="f030b9fa">N15-1043</url>
       <doi>10.3115/v1/N15-1043</doi>
@@ -472,17 +472,17 @@
     </paper>
     <paper id="44">
       <title>Extracting Human Temporal Orientation from <fixed-case>F</fixed-case>acebook Language</title>
-      <author><first>H. Andrew</first><last>Schwartz</last></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last></author>
       <author><first>Gregory</first><last>Park</last></author>
       <author><first>Maarten</first><last>Sap</last></author>
       <author><first>Evan</first><last>Weingarten</last></author>
       <author><first>Johannes</first><last>Eichstaedt</last></author>
-      <author><first>Margaret</first><last>Kern</last></author>
+      <author id="margaret-kern"><first>Margaret</first><last>Kern</last></author>
       <author><first>David</first><last>Stillwell</last></author>
       <author><first>Michal</first><last>Kosinski</last></author>
       <author><first>Jonah</first><last>Berger</last></author>
       <author><first>Martin</first><last>Seligman</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <pages>409–419</pages>
       <url hash="9c5856bf">N15-1044</url>
       <doi>10.3115/v1/N15-1044</doi>
@@ -500,9 +500,9 @@
     <paper id="46">
       <title>Using Summarization to Discover Argument Facets in Online Idealogical Dialog</title>
       <author><first>Amita</first><last>Misra</last></author>
-      <author><first>Pranav</first><last>Anand</last></author>
-      <author><first>Jean E.</first><last>Fox Tree</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="pranav-anand"><first>Pranav</first><last>Anand</last></author>
+      <author id="jean-e-fox-tree"><first>Jean E.</first><last>Fox Tree</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <pages>430–440</pages>
       <url hash="fd5c6fbc">N15-1046</url>
       <doi>10.3115/v1/N15-1046</doi>
@@ -530,7 +530,7 @@
     <paper id="49">
       <title>A Dynamic Programming Algorithm for Tree Trimming-based Text Summarization</title>
       <author><first>Masaaki</first><last>Nishino</last></author>
-      <author><first>Norihito</first><last>Yasuda</last></author>
+      <author id="norihito-yasuda"><first>Norihito</first><last>Yasuda</last></author>
       <author><first>Tsutomu</first><last>Hirao</last></author>
       <author><first>Shin-ichi</first><last>Minato</last></author>
       <author><first>Masaaki</first><last>Nagata</last></author>
@@ -552,9 +552,9 @@
     <paper id="51">
       <title>Corpus-based discovery of semantic intensity scales</title>
       <author><first>Chaitanya</first><last>Shivade</last></author>
-      <author><first>Marie-Catherine</first><last>de Marneffe</last></author>
-      <author><first>Eric</first><last>Fosler-Lussier</last></author>
-      <author><first>Albert M.</first><last>Lai</last></author>
+      <author id="marie-catherine-de-marneffe"><first>Marie-Catherine</first><last>de Marneffe</last></author>
+      <author id="eric-fosler-lussier"><first>Eric</first><last>Fosler-Lussier</last></author>
+      <author id="albert-m-lai"><first>Albert M.</first><last>Lai</last></author>
       <pages>483–493</pages>
       <url hash="07f0706c">N15-1051</url>
       <doi>10.3115/v1/N15-1051</doi>
@@ -564,7 +564,7 @@
       <title>Dialogue focus tracking for zero pronoun resolution</title>
       <author><first>Sudha</first><last>Rao</last></author>
       <author><first>Allyson</first><last>Ettinger</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <author><first>Philip</first><last>Resnik</last></author>
       <pages>494–503</pages>
       <url hash="e3bc7ddb">N15-1052</url>
@@ -593,8 +593,8 @@
     </paper>
     <paper id="55">
       <title>Robust Morphological Tagging with Word Representations</title>
-      <author><first>Thomas</first><last>Müller</last></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="thomas-mueller"><first>Thomas</first><last>Müller</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <pages>526–536</pages>
       <url hash="b6f5ece0">N15-1055</url>
       <doi>10.3115/v1/N15-1055</doi>
@@ -634,7 +634,7 @@
     </paper>
     <paper id="59">
       <title><fixed-case>NASARI</fixed-case>: a Novel Approach to a Semantically-Aware Representation of Items</title>
-      <author><first>José</first><last>Camacho-Collados</last></author>
+      <author id="jose-camacho-collados"><first>José</first><last>Camacho-Collados</last></author>
       <author><first>Mohammad Taher</first><last>Pilehvar</last></author>
       <author><first>Roberto</first><last>Navigli</last></author>
       <pages>567–577</pages>
@@ -645,7 +645,7 @@
     <paper id="60">
       <title>Towards a standard evaluation method for grammatical error detection and correction</title>
       <author><first>Mariano</first><last>Felice</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <pages>578–587</pages>
       <url hash="35bcc295">N15-1060</url>
       <doi>10.3115/v1/N15-1060</doi>
@@ -654,7 +654,7 @@
     <paper id="61">
       <title>Using Zero-Resource Spoken Term Discovery for Ranked Retrieval</title>
       <author><first>Jerome</first><last>White</last></author>
-      <author><first>Douglas</first><last>Oard</last></author>
+      <author id="douglas-w-oard"><first>Douglas</first><last>Oard</last></author>
       <author><first>Aren</first><last>Jansen</last></author>
       <author><first>Jiaul</first><last>Paik</last></author>
       <author><first>Rashmi</first><last>Sankepally</last></author>
@@ -667,7 +667,7 @@
       <title>Constraint-Based Models of Lexical Borrowing</title>
       <author><first>Yulia</first><last>Tsvetkov</last></author>
       <author><first>Waleed</first><last>Ammar</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <pages>598–608</pages>
       <url hash="18d37274">N15-1062</url>
       <doi>10.3115/v1/N15-1062</doi>
@@ -687,7 +687,7 @@
       <title>Jointly Modeling Inter-Slot Relations by Random Walk on Knowledge Graphs for Unsupervised Spoken Language Understanding</title>
       <author><first>Yun-Nung</first><last>Chen</last></author>
       <author><first>William Yang</first><last>Wang</last></author>
-      <author><first>Alexander</first><last>Rudnicky</last></author>
+      <author id="alexander-rudnicky"><first>Alexander</first><last>Rudnicky</last></author>
       <pages>619–629</pages>
       <url hash="7d58a803">N15-1064</url>
       <doi>10.3115/v1/N15-1064</doi>
@@ -705,8 +705,8 @@
     <paper id="66">
       <title>Diamonds in the Rough: Event Extraction from Imperfect Microblog Data</title>
       <author><first>Ander</first><last>Intxaurrondo</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>Oier</first><last>Lopez de Lacalle</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
+      <author id="oier-lopez-de-lacalle"><first>Oier</first><last>Lopez de Lacalle</last></author>
       <author><first>Mihai</first><last>Surdeanu</last></author>
       <pages>641–650</pages>
       <url hash="6ec25d1b">N15-1066</url>
@@ -743,8 +743,8 @@
     <paper id="70">
       <title>Ontologically Grounded Multi-sense Representation Learning for Semantic Vector Space Models</title>
       <author><first>Sujay Kumar</first><last>Jauhar</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>683–693</pages>
       <url hash="f1cf799c">N15-1070</url>
       <doi>10.3115/v1/N15-1070</doi>
@@ -771,9 +771,9 @@
     </paper>
     <paper id="73">
       <title>Multitask Learning for Adaptive Quality Estimation of Automatically Transcribed Utterances</title>
-      <author><first>José G.</first><last>C. de Souza</last></author>
+      <author id="jose-g-c-de-souza"><first>José G.</first><last>C. de Souza</last></author>
       <author><first>Hamed</first><last>Zamani</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <author><first>Daniele</first><last>Falavigna</last></author>
       <pages>714–724</pages>
@@ -785,7 +785,7 @@
       <title>Incorporating Word Correlation Knowledge into Topic Modeling</title>
       <author><first>Pengtao</first><last>Xie</last></author>
       <author><first>Diyi</first><last>Yang</last></author>
-      <author><first>Eric</first><last>Xing</last></author>
+      <author id="eric-xing"><first>Eric</first><last>Xing</last></author>
       <pages>725–734</pages>
       <url hash="bf6b4b54">N15-1074</url>
       <doi>10.3115/v1/N15-1074</doi>
@@ -806,7 +806,7 @@
       <author><first>Jordan</first><last>Boyd-Graber</last></author>
       <author><first>Jeffrey</first><last>Lund</last></author>
       <author><first>Kevin</first><last>Seppi</last></author>
-      <author><first>Eric</first><last>Ringger</last></author>
+      <author id="eric-ringger"><first>Eric</first><last>Ringger</last></author>
       <pages>746–755</pages>
       <url hash="a96cecac">N15-1076</url>
       <doi>10.3115/v1/N15-1076</doi>
@@ -814,7 +814,7 @@
     </paper>
     <paper id="77">
       <title>Grounded Semantic Parsing for Complex Knowledge Extraction</title>
-      <author><first>Ankur P.</first><last>Parikh</last></author>
+      <author id="ankur-parikh"><first>Ankur P.</first><last>Parikh</last></author>
       <author><first>Hoifung</first><last>Poon</last></author>
       <author><first>Kristina</first><last>Toutanova</last></author>
       <pages>756–766</pages>
@@ -825,7 +825,7 @@
     <paper id="78">
       <title>Sentiment after Translation: A Case-Study on <fixed-case>A</fixed-case>rabic Social Media Posts</title>
       <author><first>Mohammad</first><last>Salameh</last></author>
-      <author><first>Saif</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
       <author><first>Svetlana</first><last>Kiritchenko</last></author>
       <pages>767–777</pages>
       <url hash="dd26d349">N15-1078</url>
@@ -845,8 +845,8 @@
     <paper id="80">
       <title>Transforming Dependencies into Phrase Structures</title>
       <author><first>Lingpeng</first><last>Kong</last></author>
-      <author><first>Alexander M.</first><last>Rush</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="alexander-m-rush"><first>Alexander M.</first><last>Rush</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>788–798</pages>
       <url hash="271f0474">N15-1080</url>
       <doi>10.3115/v1/N15-1080</doi>
@@ -874,7 +874,7 @@
     <paper id="83">
       <title>Pragmatic Neural Language Modelling in Machine Translation</title>
       <author><first>Paul</first><last>Baltescu</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
       <pages>820–829</pages>
       <url hash="28f6516f">N15-1083</url>
       <doi>10.3115/v1/N15-1083</doi>
@@ -895,7 +895,7 @@
     <paper id="85">
       <title>Semantic Grounding in Dialogue for Complex Problem Solving</title>
       <author><first>Xiaolong</first><last>Li</last></author>
-      <author><first>Kristy</first><last>Boyer</last></author>
+      <author id="kristy-boyer"><first>Kristy</first><last>Boyer</last></author>
       <pages>841–850</pages>
       <url hash="b6c0fae2">N15-1085</url>
       <doi>10.3115/v1/N15-1085</doi>
@@ -913,10 +913,10 @@
     </paper>
     <paper id="87">
       <title>Sentence segmentation of aphasic speech</title>
-      <author><first>Kathleen C.</first><last>Fraser</last></author>
+      <author id="kathleen-c-fraser"><first>Kathleen C.</first><last>Fraser</last></author>
       <author><first>Naama</first><last>Ben-David</last></author>
       <author><first>Graeme</first><last>Hirst</last></author>
-      <author><first>Naida</first><last>Graham</last></author>
+      <author id="naida-graham"><first>Naida</first><last>Graham</last></author>
       <author><first>Elizabeth</first><last>Rochon</last></author>
       <pages>862–871</pages>
       <url hash="3e584853">N15-1087</url>
@@ -926,7 +926,7 @@
     <paper id="88">
       <title>Semantic parsing of speech using grammars learned with weak supervision</title>
       <author><first>Judith</first><last>Gaspers</last></author>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <author><first>Britta</first><last>Wrede</last></author>
       <pages>872–881</pages>
       <url hash="b4a4cd7f">N15-1088</url>
@@ -937,7 +937,7 @@
       <title>Early Gains Matter: A Case for Preferring Generative over Discriminative Crowdsourcing Models</title>
       <author><first>Paul</first><last>Felt</last></author>
       <author><first>Kevin</first><last>Black</last></author>
-      <author><first>Eric</first><last>Ringger</last></author>
+      <author id="eric-ringger"><first>Eric</first><last>Ringger</last></author>
       <author><first>Kevin</first><last>Seppi</last></author>
       <author><first>Robbie</first><last>Haertel</last></author>
       <pages>882–891</pages>
@@ -948,7 +948,7 @@
     </paper>
     <paper id="90">
       <title>Optimizing Multivariate Performance Measures for Learning Relation Extraction Models</title>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <author><first>Ajay</first><last>Nagesh</last></author>
       <author><first>Ganesh</first><last>Ramakrishnan</last></author>
       <pages>892–900</pages>
@@ -959,7 +959,7 @@
     <paper id="91">
       <title>Convolutional Neural Network for Paraphrase Identification</title>
       <author><first>Wenpeng</first><last>Yin</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>901–911</pages>
       <url hash="65dfdd42">N15-1091</url>
       <doi>10.3115/v1/N15-1091</doi>
@@ -992,7 +992,7 @@
     <paper id="94">
       <title>Penalized Expectation Propagation for Graphical Models over Strings</title>
       <author><first>Ryan</first><last>Cotterell</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>932–942</pages>
       <url hash="b63af20b">N15-1094</url>
       <doi>10.3115/v1/N15-1094</doi>
@@ -1020,8 +1020,8 @@
     </paper>
     <paper id="97">
       <title>So similar and yet incompatible: Toward the automated identification of semantically compatible words</title>
-      <author><first>Germán</first><last>Kruszewski</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="german-kruszewski"><first>Germán</first><last>Kruszewski</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <pages>964–969</pages>
       <url hash="cc818f70">N15-1097</url>
       <doi>10.3115/v1/N15-1097</doi>
@@ -1031,7 +1031,7 @@
       <title>Do Supervised Distributional Methods Really Learn Lexical Inference Relations?</title>
       <author><first>Omer</first><last>Levy</last></author>
       <author><first>Steffen</first><last>Remus</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <author><first>Ido</first><last>Dagan</last></author>
       <pages>970–976</pages>
       <url hash="21a0b4a4">N15-1098</url>
@@ -1042,7 +1042,7 @@
       <title>A Word Embedding Approach to Predicting the Compositionality of Multiword Expressions</title>
       <author><first>Bahar</first><last>Salehi</last></author>
       <author><first>Paul</first><last>Cook</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>977–983</pages>
       <url hash="643293cd">N15-1099</url>
       <doi>10.3115/v1/N15-1099</doi>
@@ -1072,7 +1072,7 @@
       <author><first>Katrin</first><last>Kirchhoff</last></author>
       <author><first>Yik-Cheung</first><last>Tam</last></author>
       <author><first>Colleen</first><last>Richey</last></author>
-      <author><first>Wen</first><last>Wang</last></author>
+      <author id="wen-wang"><first>Wen</first><last>Wang</last></author>
       <pages>995–1000</pages>
       <url hash="f14ce781">N15-1102</url>
       <doi>10.3115/v1/N15-1102</doi>
@@ -1080,7 +1080,7 @@
     </paper>
     <paper id="103">
       <title>Continuous Adaptation to User Feedback for Statistical Machine Translation</title>
-      <author><first>Frédéric</first><last>Blain</last></author>
+      <author id="frederic-blain"><first>Frédéric</first><last>Blain</last></author>
       <author><first>Fethi</first><last>Bougares</last></author>
       <author><first>Amir</first><last>Hazem</last></author>
       <author><first>Loïc</first><last>Barrault</last></author>
@@ -1103,7 +1103,7 @@
     </paper>
     <paper id="105">
       <title>Fast and Accurate Preordering for <fixed-case>SMT</fixed-case> using Neural Networks</title>
-      <author><first>Adrià</first><last>de Gispert</last></author>
+      <author id="adria-de-gispert"><first>Adrià</first><last>de Gispert</last></author>
       <author><first>Gonzalo</first><last>Iglesias</last></author>
       <author id="bill-byrne"><first>Bill</first><last>Byrne</last></author>
       <pages>1012–1017</pages>
@@ -1173,7 +1173,7 @@
     <paper id="112">
       <title>Socially-Informed Timeline Generation for Complex Events</title>
       <author><first>Lu</first><last>Wang</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <author><first>Galen</first><last>Marchetti</last></author>
       <pages>1055–1065</pages>
       <url hash="7879ffbc">N15-1112</url>
@@ -1183,7 +1183,7 @@
     </paper>
     <paper id="113">
       <title>Movie Script Summarization as Graph-based Scene Extraction</title>
-      <author><first>Philip John</first><last>Gorinski</last></author>
+      <author id="philip-gorinski"><first>Philip John</first><last>Gorinski</last></author>
       <author><first>Mirella</first><last>Lapata</last></author>
       <pages>1066–1076</pages>
       <url hash="a4aa7a26">N15-1113</url>
@@ -1196,7 +1196,7 @@
       <author><first>Jeffrey</first><last>Flanigan</last></author>
       <author><first>Sam</first><last>Thomson</last></author>
       <author><first>Norman</first><last>Sadeh</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>1077–1086</pages>
       <url hash="be8012f3">N15-1114</url>
       <doi>10.3115/v1/N15-1114</doi>
@@ -1237,7 +1237,7 @@
     </paper>
     <paper id="118">
       <title>Injecting Logical Background Knowledge into Embeddings for Relation Extraction</title>
-      <author><first>Tim</first><last>Rocktäschel</last></author>
+      <author id="tim-rocktaschel"><first>Tim</first><last>Rocktäschel</last></author>
       <author><first>Sameer</first><last>Singh</last></author>
       <author><first>Sebastian</first><last>Riedel</last></author>
       <pages>1119–1129</pages>
@@ -1272,7 +1272,7 @@
       <title>High-Order Low-Rank Tensors for Semantic Role Labeling</title>
       <author><first>Tao</first><last>Lei</last></author>
       <author><first>Yuan</first><last>Zhang</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <author><first>Alessandro</first><last>Moschitti</last></author>
       <author><first>Regina</first><last>Barzilay</last></author>
       <pages>1150–1160</pages>
@@ -1283,8 +1283,8 @@
     <paper id="122">
       <title>Lexical Event Ordering with an Edge-Factored Model</title>
       <author><first>Omri</first><last>Abend</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>1161–1171</pages>
       <url hash="4da1622b">N15-1122</url>
       <doi>10.3115/v1/N15-1122</doi>
@@ -1302,7 +1302,7 @@
     <paper id="124">
       <title>Accurate Evaluation of Segment-level Machine Translation Metrics</title>
       <author><first>Yvette</first><last>Graham</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Nitika</first><last>Mathur</last></author>
       <pages>1183–1191</pages>
       <url hash="13384c92">N15-1124</url>
@@ -1312,9 +1312,9 @@
     <paper id="125">
       <title>Leveraging Small Multilingual Corpora for <fixed-case>SMT</fixed-case> Using Many Pivot Languages</title>
       <author><first>Raj</first><last>Dabre</last></author>
-      <author><first>Fabien</first><last>Cromieres</last></author>
+      <author id="fabien-cromieres"><first>Fabien</first><last>Cromieres</last></author>
       <author><first>Sadao</first><last>Kurohashi</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>1192–1202</pages>
       <url hash="0b78d496">N15-1125</url>
       <doi>10.3115/v1/N15-1125</doi>
@@ -1325,7 +1325,7 @@
       <author><first>Dian</first><last>Yu</last></author>
       <author><first>Heng</first><last>Ji</last></author>
       <author><first>Sujian</first><last>Li</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <pages>1203–1208</pages>
       <url hash="5ab598e8">N15-1126</url>
       <doi>10.3115/v1/N15-1126</doi>
@@ -1342,7 +1342,7 @@
     <paper id="128">
       <title>Distributed Representations of Words to Guide Bootstrapped Entity Classifiers</title>
       <author><first>Sonal</first><last>Gupta</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>1215–1220</pages>
       <url hash="d96366da">N15-1128</url>
       <doi>10.3115/v1/N15-1128</doi>
@@ -1383,7 +1383,7 @@
       <author><first>Dhirendra</first><last>Singh</last></author>
       <author><first>Rudramurthy</first><last>V</last></author>
       <author><first>Hanumant</first><last>Redkar</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>1238–1243</pages>
       <url hash="9a5e5f82">N15-1132</url>
       <doi>10.3115/v1/N15-1132</doi>
@@ -1410,9 +1410,9 @@
     <paper id="135">
       <title>Mining for unambiguous instances to adapt part-of-speech taggers to new domains</title>
       <author><first>Dirk</first><last>Hovy</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
-      <author><first>Héctor</first><last>Martínez Alonso</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
+      <author id="hector-martinez-alonso"><first>Héctor</first><last>Martínez Alonso</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>1256–1261</pages>
       <url hash="a8536b7d">N15-1135</url>
       <doi>10.3115/v1/N15-1135</doi>
@@ -1420,7 +1420,7 @@
     </paper>
     <paper id="136">
       <title>Clustering Sentences with Density Peaks for Multi-document Summarization</title>
-      <author id="yang-zhang"><first>Yang</first><last>Zhang</last></author>
+      <author><first>Yang</first><last>Zhang</last></author>
       <author><first>Yunqing</first><last>Xia</last></author>
       <author><first>Yi</first><last>Liu</last></author>
       <author><first>Wenmin</first><last>Wang</last></author>
@@ -1431,7 +1431,7 @@
     </paper>
     <paper id="137">
       <title>Development of the Multilingual Semantic Annotation System</title>
-      <author><first>Scott</first><last>Piao</last></author>
+      <author id="scott-s-l-piao"><first>Scott</first><last>Piao</last></author>
       <author><first>Francesca</first><last>Bianchi</last></author>
       <author><first>Carmen</first><last>Dayrell</last></author>
       <author><first>Angela</first><last>D’Egidio</last></author>
@@ -1453,8 +1453,8 @@
     <paper id="139">
       <title>#<fixed-case>W</fixed-case>hy<fixed-case>IS</fixed-case>tayed, #<fixed-case>W</fixed-case>hy<fixed-case>IL</fixed-case>eft: Microblogging to Make Sense of Domestic Abuse</title>
       <author><first>Nicolas</first><last>Schrading</last></author>
-      <author><first>Cecilia</first><last>Ovesdotter Alm</last></author>
-      <author><first>Raymond</first><last>Ptucha</last></author>
+      <author id="cecilia-ovesdotter-alm"><first>Cecilia</first><last>Ovesdotter Alm</last></author>
+      <author id="raymond-ptucha"><first>Raymond</first><last>Ptucha</last></author>
       <author><first>Christopher</first><last>Homan</last></author>
       <pages>1281–1286</pages>
       <url hash="8925016a">N15-1139</url>
@@ -1464,7 +1464,7 @@
     <paper id="140">
       <title>Morphological Word-Embeddings</title>
       <author><first>Ryan</first><last>Cotterell</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>1287–1292</pages>
       <url hash="1f0926f3">N15-1140</url>
       <doi>10.3115/v1/N15-1140</doi>
@@ -1474,7 +1474,7 @@
       <title>Recognizing Social Constructs from Textual Conversation</title>
       <author><first>Somak</first><last>Aditya</last></author>
       <author><first>Chitta</first><last>Baral</last></author>
-      <author><first>Nguyen</first><last>Ha Vo</last></author>
+      <author id="nguyen-vo"><first>Nguyen</first><last>Ha Vo</last></author>
       <author><first>Joohyung</first><last>Lee</last></author>
       <author><first>Jieping</first><last>Ye</last></author>
       <author><first>Zaw</first><last>Naung</last></author>
@@ -1491,8 +1491,8 @@
     <paper id="142">
       <title>Two/Too Simple Adaptations of <fixed-case>W</fixed-case>ord2<fixed-case>V</fixed-case>ec for Syntax Problems</title>
       <author><first>Wang</first><last>Ling</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Alan W.</first><last>Black</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="alan-w-black"><first>Alan W.</first><last>Black</last></author>
       <author><first>Isabel</first><last>Trancoso</last></author>
       <pages>1299–1304</pages>
       <url hash="e5e9e5e5">N15-1142</url>
@@ -1502,7 +1502,7 @@
     <paper id="143">
       <title>Estimating Numerical Attributes by Bringing Together Fragmentary Clues</title>
       <author><first>Hiroya</first><last>Takamura</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>1305–1310</pages>
       <url hash="a7a05dc3">N15-1143</url>
       <doi>10.3115/v1/N15-1143</doi>
@@ -1512,8 +1512,8 @@
       <title>Unsupervised <fixed-case>POS</fixed-case> Induction with Word Embeddings</title>
       <author><first>Chu-Cheng</first><last>Lin</last></author>
       <author><first>Waleed</first><last>Ammar</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
       <pages>1311–1316</pages>
       <url hash="0c2e02f4">N15-1144</url>
       <doi>10.3115/v1/N15-1144</doi>
@@ -1532,7 +1532,7 @@
     <paper id="146">
       <title><fixed-case>MPQA</fixed-case> 3.0: An Entity/Event-Level Sentiment Corpus</title>
       <author><first>Lingjia</first><last>Deng</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <pages>1323–1328</pages>
       <url hash="c5ad31f3">N15-1146</url>
       <doi>10.3115/v1/N15-1146</doi>
@@ -1569,7 +1569,7 @@
       <author><first>Andrew</first><last>Shin</last></author>
       <author><first>Ryohei</first><last>Sasano</last></author>
       <author><first>Hiroya</first><last>Takamura</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>1345–1350</pages>
       <url hash="40205ce3">N15-1150</url>
       <doi>10.3115/v1/N15-1150</doi>
@@ -1586,10 +1586,10 @@
     </paper>
     <paper id="152">
       <title>Learning to parse with <fixed-case>IAA</fixed-case>-weighted loss</title>
-      <author><first>Héctor</first><last>Martínez Alonso</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="hector-martinez-alonso"><first>Héctor</first><last>Martínez Alonso</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <author><first>Arne</first><last>Skjærholt</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>1357–1361</pages>
       <url hash="a8595f7b">N15-1152</url>
       <doi>10.3115/v1/N15-1152</doi>
@@ -1599,8 +1599,8 @@
       <title>Exploiting Text and Network Context for Geolocation of Social Media Users</title>
       <author><first>Afshin</first><last>Rahimi</last></author>
       <author><first>Duy</first><last>Vu</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>1362–1367</pages>
       <url hash="eb2342eb">N15-1153</url>
       <doi>10.3115/v1/N15-1153</doi>
@@ -1609,7 +1609,7 @@
     <paper id="154">
       <title>Discriminative Phrase Embedding for Paraphrase Identification</title>
       <author><first>Wenpeng</first><last>Yin</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>1368–1373</pages>
       <url hash="c9de42e0">N15-1154</url>
       <doi>10.3115/v1/N15-1154</doi>
@@ -1618,7 +1618,7 @@
     <paper id="155">
       <title>Combining Word Embeddings and Feature Embeddings for Fine-grained Relation Extraction</title>
       <author><first>Mo</first><last>Yu</last></author>
-      <author><first>Matthew R.</first><last>Gormley</last></author>
+      <author id="matthew-r-gormley"><first>Matthew R.</first><last>Gormley</last></author>
       <author><first>Mark</first><last>Dredze</last></author>
       <pages>1374–1379</pages>
       <url hash="9ddb0e96">N15-1155</url>
@@ -1638,7 +1638,7 @@
     <paper id="157">
       <title>Simple task-specific bilingual word embeddings</title>
       <author><first>Stephan</first><last>Gouws</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>1386–1390</pages>
       <url hash="055865c2">N15-1157</url>
       <doi>10.3115/v1/N15-1157</doi>
@@ -1665,7 +1665,7 @@
     </paper>
     <paper id="160">
       <title>Large-Scale Native Language Identification with Cross-Corpus Evaluation</title>
-      <author><first>Shervin</first><last>Malmasi</last></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></author>
       <author><first>Mark</first><last>Dras</last></author>
       <pages>1403–1409</pages>
       <url hash="93571e42">N15-1160</url>
@@ -1675,7 +1675,7 @@
     <paper id="161">
       <title><fixed-case>U</fixed-case>nediting: Detecting Disfluencies Without Careful Transcripts</title>
       <author><first>Victoria</first><last>Zayats</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last></author>
       <pages>1410–1415</pages>
       <url hash="7444a5ea">N15-1161</url>
@@ -1713,8 +1713,8 @@
     <paper id="165">
       <title>Random Walks and Neural Network Language Models on Knowledge Bases</title>
       <author><first>Josu</first><last>Goikoetxea</last></author>
-      <author><first>Aitor</first><last>Soroa</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="aitor-soroa"><first>Aitor</first><last>Soroa</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <pages>1434–1439</pages>
       <url hash="bfe45501">N15-1165</url>
       <doi>10.3115/v1/N15-1165</doi>
@@ -1743,7 +1743,7 @@
     <paper id="168">
       <title>Extracting Information about Medication Use from Veterinary Discussions</title>
       <author><first>Haibo</first><last>Ding</last></author>
-      <author><first>Ellen</first><last>Riloff</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
       <pages>1452–1458</pages>
       <url hash="25f43f53">N15-1168</url>
       <doi>10.3115/v1/N15-1168</doi>
@@ -1784,7 +1784,7 @@
     <paper id="172">
       <title>Echoes of Persuasion: The Effect of Euphony in Persuasive Communication</title>
       <author><first>Marco</first><last>Guerini</last></author>
-      <author><first>Gözde</first><last>Özbal</last></author>
+      <author id="gozde-ozbal"><first>Gözde</first><last>Özbal</last></author>
       <author><first>Carlo</first><last>Strapparava</last></author>
       <pages>1483–1493</pages>
       <url hash="6f29adf1">N15-1172</url>
@@ -1797,7 +1797,7 @@
       <author><first>Huijuan</first><last>Xu</last></author>
       <author><first>Jeff</first><last>Donahue</last></author>
       <author><first>Marcus</first><last>Rohrbach</last></author>
-      <author><first>Raymond</first><last>Mooney</last></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last></author>
       <author><first>Kate</first><last>Saenko</last></author>
       <pages>1494–1504</pages>
       <url hash="96c0631c">N15-1173</url>
@@ -1820,7 +1820,7 @@
       <author><first>Sebastian</first><last>Muehr</last></author>
       <author><first>Patrick</first><last>Lehnen</last></author>
       <author><first>Stephan</first><last>Peitz</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>1516–1526</pages>
       <url hash="129a971a">N15-1175</url>
       <doi>10.3115/v1/N15-1175</doi>
@@ -1829,7 +1829,7 @@
     <paper id="176">
       <title>Learning Translation Models from Monolingual Continuous Representations</title>
       <author><first>Kai</first><last>Zhao</last></author>
-      <author><first>Hany</first><last>Hassan</last></author>
+      <author id="hany-hassan-awadalla"><first>Hany</first><last>Hassan</last></author>
       <author><first>Michael</first><last>Auli</last></author>
       <pages>1527–1536</pages>
       <url hash="cb5d0153">N15-1176</url>
@@ -1839,7 +1839,7 @@
     <paper id="177">
       <title>A Corpus and Model Integrating Multiword Expressions and Supersenses</title>
       <author><first>Nathan</first><last>Schneider</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>1537–1547</pages>
       <url hash="67a2d7e9">N15-1177</url>
       <doi>10.3115/v1/N15-1177</doi>
@@ -1856,7 +1856,7 @@
     </paper>
     <paper id="179">
       <title>Do We Really Need Lexical Information? Towards a Top-down Approach to Sentiment Analysis of Product Reviews</title>
-      <author><first>Yulia</first><last>Otmakhova</last></author>
+      <author id="julia-otmakhova"><first>Yulia</first><last>Otmakhova</last></author>
       <author><first>Hyopil</first><last>Shin</last></author>
       <pages>1559–1568</pages>
       <url hash="e29cbb6f">N15-1179</url>
@@ -1884,7 +1884,7 @@
     <paper id="182">
       <title>Shared common ground influences information density in microblog texts</title>
       <author><first>Gabriel</first><last>Doyle</last></author>
-      <author><first>Michael</first><last>Frank</last></author>
+      <author id="michael-c-frank"><first>Michael</first><last>Frank</last></author>
       <pages>1587–1596</pages>
       <url hash="f68f5a47">N15-1182</url>
       <doi>10.3115/v1/N15-1182</doi>
@@ -1892,7 +1892,7 @@
     </paper>
     <paper id="183">
       <title>Hierarchic syntax improves reading time prediction</title>
-      <author><first>Marten</first><last>van Schijndel</last></author>
+      <author id="marten-van-schijndel"><first>Marten</first><last>van Schijndel</last></author>
       <author><first>William</first><last>Schuler</last></author>
       <pages>1597–1605</pages>
       <url hash="7d408ab4">N15-1183</url>
@@ -1904,9 +1904,9 @@
       <author><first>Manaal</first><last>Faruqui</last></author>
       <author><first>Jesse</first><last>Dodge</last></author>
       <author><first>Sujay Kumar</first><last>Jauhar</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>1606–1615</pages>
       <url hash="917eed9f">N15-1184</url>
       <doi>10.3115/v1/N15-1184</doi>
@@ -1924,7 +1924,7 @@
     <paper id="186">
       <title>Unsupervised Morphology Induction Using Word Embeddings</title>
       <author><first>Radu</first><last>Soricut</last></author>
-      <author><first>Franz</first><last>Och</last></author>
+      <author id="franz-josef-och"><first>Franz</first><last>Och</last></author>
       <pages>1627–1637</pages>
       <url hash="dff5e791">N15-1186</url>
       <doi>10.3115/v1/N15-1186</doi>
@@ -1935,7 +1935,7 @@
     <meta>
       <booktitle>Proceedings of the 2015 Conference of the North <fixed-case>A</fixed-case>merican Chapter of the Association for Computational Linguistics: Student Research Workshop</booktitle>
       <url hash="0049c8c0">N15-2</url>
-      <editor><first>Diana</first><last>Inkpen</last></editor>
+      <editor id="diana-inkpen"><first>Diana</first><last>Inkpen</last></editor>
       <editor><first>Smaranda</first><last>Muresan</last></editor>
       <editor><first>Shibamouli</first><last>Lahiri</last></editor>
       <editor><first>Karen</first><last>Mazidi</last></editor>
@@ -1961,7 +1961,7 @@
     </paper>
     <paper id="2">
       <title>Reliable Lexical Simplification for Non-Native Speakers</title>
-      <author><first>Gustavo</first><last>Paetzold</last></author>
+      <author id="gustavo-paetzold"><first>Gustavo</first><last>Paetzold</last></author>
       <pages>9–16</pages>
       <url hash="cf393646">N15-2002</url>
       <doi>10.3115/v1/N15-2002</doi>
@@ -1990,7 +1990,7 @@
     <paper id="5">
       <title>Towards a Better Semantic Role Labeling of Complex Predicates</title>
       <author><first>Glorianna</first><last>Jagfeld</last></author>
-      <author><first>Lonneke</first><last>van der Plas</last></author>
+      <author id="lonneke-van-der-plas"><first>Lonneke</first><last>van der Plas</last></author>
       <pages>33–39</pages>
       <url hash="14779e30">N15-2005</url>
       <doi>10.3115/v1/N15-2005</doi>
@@ -2057,7 +2057,7 @@
     <paper id="13">
       <title>Relation Extraction from Community Generated Question-Answer Pairs</title>
       <author><first>Denis</first><last>Savenkov</last></author>
-      <author><first>Wei-Lwun</first><last>Lu</last></author>
+      <author id="wei-lun-lu"><first>Wei-Lwun</first><last>Lu</last></author>
       <author><first>Jeff</first><last>Dalton</last></author>
       <author><first>Eugene</first><last>Agichtein</last></author>
       <pages>96–102</pages>
@@ -2084,7 +2084,7 @@
     </paper>
     <paper id="16">
       <title>Discourse and Document-level Information for Evaluating Language Output Tasks</title>
-      <author><first>Carolina</first><last>Scarton</last></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last></author>
       <pages>118–125</pages>
       <url hash="933c026a">N15-2016</url>
       <doi>10.3115/v1/N15-2016</doi>
@@ -2110,7 +2110,7 @@
     <paper id="19">
       <title>Semantics-based Graph Approach to Complex Question-Answering</title>
       <author><first>Tomasz</first><last>Jurczyk</last></author>
-      <author><first>Jinho D.</first><last>Choi</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
       <pages>140–146</pages>
       <url hash="667dd11a">N15-2019</url>
       <doi>10.3115/v1/N15-2019</doi>
@@ -2145,7 +2145,7 @@
     <paper id="23">
       <title>Computational Exploration to Linguistic Structures of Future: Classification and Categorization</title>
       <author><first>Aiming</first><last>Ni</last></author>
-      <author><first>Jinho D.</first><last>Choi</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
       <author><first>Jason</first><last>Shepard</last></author>
       <author><first>Phillip</first><last>Wolff</last></author>
       <pages>168–173</pages>
@@ -2158,9 +2158,9 @@
     <meta>
       <booktitle>Proceedings of the 2015 Conference of the North <fixed-case>A</fixed-case>merican Chapter of the Association for Computational Linguistics: Demonstrations</booktitle>
       <url hash="416844aa">N15-3</url>
-      <editor><first>Matt</first><last>Gerber</last></editor>
+      <editor id="matthew-gerber"><first>Matt</first><last>Gerber</last></editor>
       <editor><first>Catherine</first><last>Havasi</last></editor>
-      <editor><first>Finley</first><last>Lacatusu</last></editor>
+      <editor id="finley-lacatusu"><first>Finley</first><last>Lacatusu</last></editor>
       <doi>10.3115/v1/N15-3</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Denver, Colorado</address>
@@ -2176,7 +2176,7 @@
       <title>Two Practical <fixed-case>R</fixed-case>hetorical <fixed-case>S</fixed-case>tructure <fixed-case>T</fixed-case>heory Parsers</title>
       <author><first>Mihai</first><last>Surdeanu</last></author>
       <author><first>Tom</first><last>Hicks</last></author>
-      <author><first>Marco Antonio</first><last>Valenzuela-Escárcega</last></author>
+      <author id="marco-a-valenzuela-escarcega"><first>Marco Antonio</first><last>Valenzuela-Escárcega</last></author>
       <pages>1–5</pages>
       <url hash="9c4f5353">N15-3001</url>
       <doi>10.3115/v1/N15-3001</doi>
@@ -2207,7 +2207,7 @@
       <author><first>Xiangmin</first><last>Fan</last></author>
       <author><first>Muhsin</first><last>Menekse</last></author>
       <author><first>Jingtao</first><last>Wang</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <pages>16–20</pages>
       <url hash="cb4e4070">N15-3004</url>
       <doi>10.3115/v1/N15-3004</doi>
@@ -2216,7 +2216,7 @@
     <paper id="5">
       <title><fixed-case>RE</fixed-case>xtractor: a Robust Information Extractor</title>
       <author><first>Vincent</first><last>Kríž</last></author>
-      <author><first>Barbora</first><last>Hladká</last></author>
+      <author id="barbora-hladka"><first>Barbora</first><last>Hladká</last></author>
       <pages>21–25</pages>
       <url hash="9a907d6d">N15-3005</url>
       <doi>10.3115/v1/N15-3005</doi>
@@ -2235,7 +2235,7 @@
     <paper id="7">
       <title><fixed-case>ICE</fixed-case>: Rapid Information Extraction Customization for <fixed-case>NLP</fixed-case> Novices</title>
       <author><first>Yifan</first><last>He</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <pages>31–35</pages>
       <url hash="fd669420">N15-3007</url>
       <doi>10.3115/v1/N15-3007</doi>
@@ -2255,7 +2255,7 @@
       <author><first>Yusuke</first><last>Oda</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
       <author><first>Sakriani</first><last>Sakti</last></author>
-      <author><first>Tomoki</first><last>Toda</last></author>
+      <author id="tomoki-toda"><first>Tomoki</first><last>Toda</last></author>
       <author><first>Satoshi</first><last>Nakamura</last></author>
       <pages>41–45</pages>
       <url hash="0d77b798">N15-3009</url>
@@ -2264,7 +2264,7 @@
     </paper>
     <paper id="10">
       <title><fixed-case>ELCO</fixed-case>3: Entity Linking with Corpus Coherence Combining Open Source Annotators</title>
-      <author><first>Pablo</first><last>Ruiz</last></author>
+      <author id="pablo-ruiz-fabo"><first>Pablo</first><last>Ruiz</last></author>
       <author><first>Thierry</first><last>Poibeau</last></author>
       <author><first>Frédérique</first><last>Mélanie</last></author>
       <pages>46–50</pages>
@@ -2285,7 +2285,7 @@
     </paper>
     <paper id="12">
       <title>Visualizing Deep-Syntactic Parser Output</title>
-      <author><first>Juan</first><last>Soler-Company</last></author>
+      <author id="juan-soler-company"><first>Juan</first><last>Soler-Company</last></author>
       <author><first>Miguel</first><last>Ballesteros</last></author>
       <author><first>Bernd</first><last>Bohnet</last></author>
       <author><first>Simon</first><last>Mille</last></author>
@@ -2298,7 +2298,7 @@
     <paper id="13">
       <title><fixed-case>WOLFE</fixed-case>: An <fixed-case>NLP</fixed-case>-friendly Declarative Machine Learning Stack</title>
       <author><first>Sameer</first><last>Singh</last></author>
-      <author><first>Tim</first><last>Rocktäschel</last></author>
+      <author id="tim-rocktaschel"><first>Tim</first><last>Rocktäschel</last></author>
       <author><first>Luke</first><last>Hewitt</last></author>
       <author><first>Jason</first><last>Naradowsky</last></author>
       <author><first>Sebastian</first><last>Riedel</last></author>
@@ -2337,7 +2337,7 @@
       <title>Brahmi-Net: A transliteration and script conversion system for languages of the <fixed-case>I</fixed-case>ndian subcontinent</title>
       <author><first>Anoop</first><last>Kunchukuttan</last></author>
       <author><first>Ratish</first><last>Puduppully</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>81–85</pages>
       <url hash="95a956d0">N15-3017</url>
       <doi>10.3115/v1/N15-3017</doi>
@@ -2351,7 +2351,7 @@
       <author><first>Nicholas</first><last>Andrews</last></author>
       <author><first>Jay</first><last>DeYoung</last></author>
       <author><first>Max</first><last>Thomas</last></author>
-      <author><first>Matthew R.</first><last>Gormley</last></author>
+      <author id="matthew-r-gormley"><first>Matthew R.</first><last>Gormley</last></author>
       <author><first>Travis</first><last>Wolfe</last></author>
       <author><first>Craig</first><last>Harman</last></author>
       <author><first>Benjamin</first><last>Van Durme</last></author>
@@ -2366,7 +2366,7 @@
       <author><first>Hubert</first><last>Soyer</last></author>
       <author><first>Goran</first><last>Topić</last></author>
       <author><first>Pontus</first><last>Stenetorp</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>91–95</pages>
       <url hash="cc1588eb">N15-3019</url>
       <doi>10.3115/v1/N15-3019</doi>
@@ -2375,7 +2375,7 @@
     <paper id="20">
       <title>Online Readability and Text Complexity Analysis with <fixed-case>T</fixed-case>ext<fixed-case>E</fixed-case>valuator</title>
       <author><first>Diane</first><last>Napolitano</last></author>
-      <author><first>Kathleen</first><last>Sheehan</last></author>
+      <author id="kathleen-m-sheehan"><first>Kathleen</first><last>Sheehan</last></author>
       <author><first>Robert</first><last>Mundkowsky</last></author>
       <pages>96–100</pages>
       <url hash="086c7ccf">N15-3020</url>
@@ -2395,8 +2395,8 @@
     </paper>
     <paper id="22">
       <title><fixed-case>W</fixed-case>rite<fixed-case>A</fixed-case>head2: Mining Lexical Grammar Patterns for Assisted Writing</title>
-      <author><first>Jim</first><last>Chang</last></author>
-      <author><first>Jason</first><last>Chang</last></author>
+      <author id="jim-chang"><first>Jim</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason</first><last>Chang</last></author>
       <pages>106–110</pages>
       <url hash="9dde4ae0">N15-3022</url>
       <doi>10.3115/v1/N15-3022</doi>
@@ -2409,7 +2409,7 @@
       <author><first>Byungsoo</first><last>Kim</last></author>
       <author><first>Sangdo</first><last>Han</last></author>
       <author><first>Hyosup</first><last>Shim</last></author>
-      <author><first>Gary Geunbae</first><last>Lee</last></author>
+      <author id="gary-geunbae-lee"><first>Gary Geunbae</first><last>Lee</last></author>
       <pages>111–115</pages>
       <url hash="416488e3">N15-3023</url>
       <doi>10.3115/v1/N15-3023</doi>
@@ -2418,8 +2418,8 @@
     <paper id="24">
       <title>Using Word Semantics To Assist <fixed-case>E</fixed-case>nglish as a Second Language Learners</title>
       <author><first>Mahmoud</first><last>Azab</last></author>
-      <author><first>Chris</first><last>Hokamp</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="chris-hokamp"><first>Chris</first><last>Hokamp</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>116–120</pages>
       <url hash="9aac968f">N15-3024</url>
       <doi>10.3115/v1/N15-3024</doi>
@@ -2445,7 +2445,7 @@
     </frontmatter>
     <paper id="1">
       <title>Hands-on Learning to Search for Structured Prediction</title>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <author><first>John</first><last>Langford</last></author>
       <author><first>Kai-Wei</first><last>Chang</last></author>
       <author><first>He</first><last>He</last></author>
@@ -2458,7 +2458,7 @@
     <paper id="2">
       <title>Crowdsourcing for <fixed-case>NLP</fixed-case></title>
       <author><first>Chris</first><last>Callison-Burch</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <author><first>Ellie</first><last>Pavlick</last></author>
       <pages>2–3</pages>
       <url hash="2c89a006">N15-4002</url>
@@ -2477,7 +2477,7 @@
     </paper>
     <paper id="4">
       <title>Deep Learning and Continuous Representations for Natural Language Processing</title>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <author><first>Xiaodong</first><last>He</last></author>
       <author><first>Jianfeng</first><last>Gao</last></author>
       <pages>6–8</pages>
@@ -2498,10 +2498,10 @@
     </paper>
     <paper id="6">
       <title>Getting the Roles Right: Using <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et in <fixed-case>NLP</fixed-case></title>
-      <author><first>Collin F.</first><last>Baker</last></author>
+      <author id="collin-f-baker"><first>Collin F.</first><last>Baker</last></author>
       <author><first>Nathan</first><last>Schneider</last></author>
-      <author><first>Miriam R. L.</first><last>Petruck</last></author>
-      <author><first>Michael</first><last>Ellsworth</last></author>
+      <author id="miriam-r-l-petruck"><first>Miriam R. L.</first><last>Petruck</last></author>
+      <author id="michael-ellsworth"><first>Michael</first><last>Ellsworth</last></author>
       <pages>10–12</pages>
       <url hash="0dd19dc7">N15-4006</url>
       <doi>10.3115/v1/N15-4006</doi>
diff --git a/data/xml/N16.xml b/data/xml/N16.xml
index f5dd1a67e9..559b567642 100644
--- a/data/xml/N16.xml
+++ b/data/xml/N16.xml
@@ -6,7 +6,7 @@
       <url hash="44b79889">N16-1</url>
       <editor><first>Kevin</first><last>Knight</last></editor>
       <editor><first>Ani</first><last>Nenkova</last></editor>
-      <editor><first>Owen</first><last>Rambow</last></editor>
+      <editor id="owen-rambow"><first>Owen</first><last>Rambow</last></editor>
       <doi>10.18653/v1/N16-1</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>San Diego, California</address>
@@ -30,7 +30,7 @@
     <paper id="2">
       <title>Flexible Non-Terminals for Dependency Tree-to-Tree Reordering</title>
       <author><first>John</first><last>Richardson</last></author>
-      <author><first>Fabien</first><last>Cromières</last></author>
+      <author id="fabien-cromieres"><first>Fabien</first><last>Cromières</last></author>
       <author><first>Toshiaki</first><last>Nakazawa</last></author>
       <author><first>Sadao</first><last>Kurohashi</last></author>
       <pages>11–19</pages>
@@ -42,7 +42,7 @@
       <title>Selecting Syntactic, Non-redundant Segments in Active Learning for Machine Translation</title>
       <author><first>Akiva</first><last>Miura</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
-      <author><first>Michael</first><last>Paul</last></author>
+      <author id="michael-paul"><first>Michael</first><last>Paul</last></author>
       <author><first>Satoshi</first><last>Nakamura</last></author>
       <pages>20–29</pages>
       <url hash="c894eb83">N16-1003</url>
@@ -89,8 +89,8 @@
       <title>A Low-Rank Approximation Approach to Learning Joint Embeddings of News Stories and Images for Timeline Summarization</title>
       <author><first>William Yang</first><last>Wang</last></author>
       <author><first>Yashar</first><last>Mehdad</last></author>
-      <author><first>Dragomir R.</first><last>Radev</last></author>
-      <author><first>Amanda</first><last>Stent</last></author>
+      <author id="dragomir-radev"><first>Dragomir R.</first><last>Radev</last></author>
+      <author id="amanda-stent"><first>Amanda</first><last>Stent</last></author>
       <pages>58–68</pages>
       <url hash="330063d6">N16-1008</url>
       <doi>10.18653/v1/N16-1008</doi>
@@ -111,7 +111,7 @@
       <author><first>Wencan</first><last>Luo</last></author>
       <author id="fei-liu-utdallas"><first>Fei</first><last>Liu</last></author>
       <author><first>Zitao</first><last>Liu</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <pages>80–85</pages>
       <url hash="ea7ae1a4">N16-1010</url>
       <doi>10.18653/v1/N16-1010</doi>
@@ -130,7 +130,7 @@
       <title>Abstractive Sentence Summarization with Attentive Recurrent Neural Networks</title>
       <author><first>Sumit</first><last>Chopra</last></author>
       <author><first>Michael</first><last>Auli</last></author>
-      <author><first>Alexander M.</first><last>Rush</last></author>
+      <author id="alexander-m-rush"><first>Alexander M.</first><last>Rush</last></author>
       <pages>93–98</pages>
       <url hash="8691a74a">N16-1012</url>
       <doi>10.18653/v1/N16-1012</doi>
@@ -140,7 +140,7 @@
       <title>Integer Linear Programming for Discourse Parsing</title>
       <author><first>Jérémy</first><last>Perret</last></author>
       <author><first>Stergos</first><last>Afantenos</last></author>
-      <author><first>Nicholas</first><last>Asher</last></author>
+      <author id="nicholas-asher"><first>Nicholas</first><last>Asher</last></author>
       <author><first>Mathieu</first><last>Morey</last></author>
       <pages>99–109</pages>
       <url hash="658ec4f6">N16-1013</url>
@@ -153,7 +153,7 @@
       <author><first>Michel</first><last>Galley</last></author>
       <author><first>Chris</first><last>Brockett</last></author>
       <author><first>Jianfeng</first><last>Gao</last></author>
-      <author><first>Bill</first><last>Dolan</last></author>
+      <author id="william-b-dolan"><first>Bill</first><last>Dolan</last></author>
       <pages>110–119</pages>
       <url hash="aee4c708">N16-1014</url>
       <doi>10.18653/v1/N16-1014</doi>
@@ -162,12 +162,12 @@
     <paper id="15">
       <title>Multi-domain Neural Network Language Generation for Spoken Dialogue Systems</title>
       <author><first>Tsung-Hsien</first><last>Wen</last></author>
-      <author><first>Milica</first><last>Gašić</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gašić</last></author>
       <author><first>Nikola</first><last>Mrkšić</last></author>
-      <author><first>Lina M.</first><last>Rojas-Barahona</last></author>
+      <author id="lina-m-rojas-barahona"><first>Lina M.</first><last>Rojas-Barahona</last></author>
       <author><first>Pei-Hao</first><last>Su</last></author>
       <author><first>David</first><last>Vandyke</last></author>
-      <author><first>Steve</first><last>Young</last></author>
+      <author id="steve-young"><first>Steve</first><last>Young</last></author>
       <pages>120–129</pages>
       <url hash="7ff0e0d1">N16-1015</url>
       <doi>10.18653/v1/N16-1015</doi>
@@ -198,12 +198,12 @@
       <author><first>Nikola</first><last>Mrkšić</last></author>
       <author><first>Diarmuid</first><last>Ó Séaghdha</last></author>
       <author><first>Blaise</first><last>Thomson</last></author>
-      <author><first>Milica</first><last>Gašić</last></author>
-      <author><first>Lina M.</first><last>Rojas-Barahona</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gašić</last></author>
+      <author id="lina-m-rojas-barahona"><first>Lina M.</first><last>Rojas-Barahona</last></author>
       <author><first>Pei-Hao</first><last>Su</last></author>
       <author><first>David</first><last>Vandyke</last></author>
       <author><first>Tsung-Hsien</first><last>Wen</last></author>
-      <author><first>Steve</first><last>Young</last></author>
+      <author id="steve-young"><first>Steve</first><last>Young</last></author>
       <pages>142–148</pages>
       <url hash="e3814cc5">N16-1018</url>
       <doi>10.18653/v1/N16-1018</doi>
@@ -215,8 +215,8 @@
       <author><first>Qiaozi</first><last>Gao</last></author>
       <author><first>Changsong</first><last>Liu</last></author>
       <author><first>Caiming</first><last>Xiong</last></author>
-      <author><first>Song-Chun</first><last>Zhu</last></author>
-      <author><first>Joyce Y.</first><last>Chai</last></author>
+      <author id="song-chun-zhu"><first>Song-Chun</first><last>Zhu</last></author>
+      <author id="joyce-chai"><first>Joyce Y.</first><last>Chai</last></author>
       <pages>149–159</pages>
       <url hash="ab76921f">N16-1019</url>
       <doi>10.18653/v1/N16-1019</doi>
@@ -235,9 +235,9 @@
     <paper id="21">
       <title>Bridge Correlational Neural Networks for Multilingual Multimodal Representation Learning</title>
       <author><first>Janarthanan</first><last>Rajendran</last></author>
-      <author><first>Mitesh M.</first><last>Khapra</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh M.</first><last>Khapra</last></author>
       <author><first>Sarath</first><last>Chandar</last></author>
-      <author><first>Balaraman</first><last>Ravindran</last></author>
+      <author id="balaraman-ravindran"><first>Balaraman</first><last>Ravindran</last></author>
       <pages>171–181</pages>
       <url hash="a1aa228d">N16-1021</url>
       <doi>10.18653/v1/N16-1021</doi>
@@ -265,10 +265,10 @@
     </paper>
     <paper id="24">
       <title>Recurrent Neural Network Grammars</title>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <author><first>Adhiguna</first><last>Kuncoro</last></author>
       <author><first>Miguel</first><last>Ballesteros</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>199–209</pages>
       <url hash="95b6014d">N16-1024</url>
       <doi>10.18653/v1/N16-1024</doi>
@@ -288,7 +288,7 @@
       <title><fixed-case>LSTM</fixed-case> <fixed-case>CCG</fixed-case> Parsing</title>
       <author><first>Mike</first><last>Lewis</last></author>
       <author><first>Kenton</first><last>Lee</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>221–231</pages>
       <url hash="076a8d68">N16-1026</url>
       <doi>10.18653/v1/N16-1026</doi>
@@ -334,7 +334,7 @@
       <author><first>Miguel</first><last>Ballesteros</last></author>
       <author><first>Sandeep</first><last>Subramanian</last></author>
       <author><first>Kazuya</first><last>Kawakami</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <pages>260–270</pages>
       <url hash="08531377">N16-1030</url>
       <doi>10.18653/v1/N16-1030</doi>
@@ -342,7 +342,7 @@
     </paper>
     <paper id="31">
       <title>Dynamic Feature Induction: The Last Gist to the State-of-the-Art</title>
-      <author><first>Jinho D.</first><last>Choi</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
       <pages>271–281</pages>
       <url hash="f198068d">N16-1031</url>
       <doi>10.18653/v1/N16-1031</doi>
@@ -350,9 +350,9 @@
     </paper>
     <paper id="32">
       <title>Drop-out Conditional Random Fields for <fixed-case>T</fixed-case>witter with Huge Mined Gazetteer</title>
-      <author><first>Eunsuk</first><last>Yang</last></author>
+      <author id="eun-suk-yang"><first>Eunsuk</first><last>Yang</last></author>
       <author><first>Young-Bum</first><last>Kim</last></author>
-      <author><first>Ruhi</first><last>Sarikaya</last></author>
+      <author id="ruhi-sarikaya"><first>Ruhi</first><last>Sarikaya</last></author>
       <author><first>Yu-Seop</first><last>Kim</last></author>
       <pages>282–288</pages>
       <url hash="1b802ac8">N16-1032</url>
@@ -362,7 +362,7 @@
     <paper id="33">
       <title>Joint Extraction of Events and Entities within a Document Context</title>
       <author><first>Bishan</first><last>Yang</last></author>
-      <author><first>Tom M.</first><last>Mitchell</last></author>
+      <author id="tom-mitchell"><first>Tom M.</first><last>Mitchell</last></author>
       <pages>289–299</pages>
       <url hash="058d6323">N16-1033</url>
       <doi>10.18653/v1/N16-1033</doi>
@@ -372,7 +372,7 @@
       <title>Joint Event Extraction via Recurrent Neural Networks</title>
       <author><first>Thien Huu</first><last>Nguyen</last></author>
       <author><first>Kyunghyun</first><last>Cho</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <pages>300–309</pages>
       <url hash="609dc5d4">N16-1034</url>
       <doi>10.18653/v1/N16-1034</doi>
@@ -390,7 +390,7 @@
     </paper>
     <paper id="36">
       <title>Recurrent Memory Networks for Language Modeling</title>
-      <author><first>Ke</first><last>Tran</last></author>
+      <author id="ke-m-tran"><first>Ke</first><last>Tran</last></author>
       <author><first>Arianna</first><last>Bisazza</last></author>
       <author><first>Christof</first><last>Monz</last></author>
       <pages>321–331</pages>
@@ -401,7 +401,7 @@
     <paper id="37">
       <title>A Latent Variable Recurrent Neural Network for Discourse-Driven Language Models</title>
       <author><first>Yangfeng</first><last>Ji</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <author><first>Jacob</first><last>Eisenstein</last></author>
       <pages>332–342</pages>
       <url hash="3b7d3e13">N16-1037</url>
@@ -411,7 +411,7 @@
     <paper id="38">
       <title>Questioning Arbitrariness in Language: a Data-Driven Study of Conventional Iconicity</title>
       <author><first>Ekaterina</first><last>Abramova</last></author>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <pages>343–352</pages>
       <url hash="1135659a">N16-1038</url>
       <doi>10.18653/v1/N16-1038</doi>
@@ -420,7 +420,7 @@
     <paper id="39">
       <title>Distinguishing Literal and Non-Literal Usage of <fixed-case>G</fixed-case>erman Particle Verbs</title>
       <author><first>Maximilian</first><last>Köper</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>353–362</pages>
       <url hash="c195bcf4">N16-1039</url>
       <doi>10.18653/v1/N16-1039</doi>
@@ -447,7 +447,7 @@
     <paper id="42">
       <title>Grammatical error correction using neural machine translation</title>
       <author id="zheng-yuan-cambridge"><first>Zheng</first><last>Yuan</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <pages>380–386</pages>
       <url hash="40738e84">N16-1042</url>
       <doi>10.18653/v1/N16-1042</doi>
@@ -456,9 +456,9 @@
     <paper id="43">
       <title>Multimodal Semantic Learning from Child-Directed Input</title>
       <author><first>Angeliki</first><last>Lazaridou</last></author>
-      <author><first>Grzegorz</first><last>Chrupała</last></author>
-      <author><first>Raquel</first><last>Fernández</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="grzegorz-chrupala"><first>Grzegorz</first><last>Chrupała</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <pages>387–392</pages>
       <url hash="62ea541d">N16-1043</url>
       <doi>10.18653/v1/N16-1043</doi>
@@ -470,8 +470,8 @@
       <author><first>Kaisheng</first><last>Yao</last></author>
       <author><first>Hu</first><last>Chen</last></author>
       <author><first>Dong</first><last>Yu</last></author>
-      <author><first>Yi-Cheng</first><last>Pan</last></author>
-      <author><first>Mei-Yuh</first><last>Hwang</last></author>
+      <author id="yi-cheng-pan"><first>Yi-Cheng</first><last>Pan</last></author>
+      <author id="mei-yuh-hwang"><first>Mei-Yuh</first><last>Hwang</last></author>
       <pages>393–399</pages>
       <url hash="c9567618">N16-1044</url>
       <doi>10.18653/v1/N16-1044</doi>
@@ -479,7 +479,7 @@
     </paper>
     <paper id="45">
       <title>Expectation-Regulated Neural Model for Event Mention Extraction</title>
-      <author><first>Ching-Yun</first><last>Chang</last></author>
+      <author id="ching-yun-chang"><first>Ching-Yun</first><last>Chang</last></author>
       <author><first>Zhiyang</first><last>Teng</last></author>
       <author><first>Yue</first><last>Zhang</last></author>
       <pages>400–410</pages>
@@ -492,7 +492,7 @@
       <author><first>Lemao</first><last>Liu</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
       <author><first>Andrew</first><last>Finch</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>411–416</pages>
       <url hash="83ad7921">N16-1046</url>
       <doi>10.18653/v1/N16-1046</doi>
@@ -501,7 +501,7 @@
     <paper id="47">
       <title>Psycholinguistic Features for Deceptive Role Detection in Werewolf</title>
       <author><first>Codruta</first><last>Girlea</last></author>
-      <author><first>Roxana</first><last>Girju</last></author>
+      <author id="roxana-girju"><first>Roxana</first><last>Girju</last></author>
       <author><first>Eyal</first><last>Amir</last></author>
       <pages>417–422</pages>
       <url hash="685618e6">N16-1047</url>
@@ -510,8 +510,8 @@
     </paper>
     <paper id="48">
       <title>Individual Variation in the Choice of Referential Form</title>
-      <author><first>Thiago</first><last>Castro Ferreira</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="thiago-castro-ferreira"><first>Thiago</first><last>Castro Ferreira</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <author><first>Sander</first><last>Wubben</last></author>
       <pages>423–427</pages>
       <url hash="5a96725f">N16-1048</url>
@@ -522,7 +522,7 @@
       <title>Joint Learning Templates and Slots for Event Schema Induction</title>
       <author><first>Lei</first><last>Sha</last></author>
       <author><first>Sujian</first><last>Li</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <author><first>Zhifang</first><last>Sui</last></author>
       <pages>428–434</pages>
       <url hash="17329881">N16-1049</url>
@@ -531,7 +531,7 @@
     </paper>
     <paper id="50">
       <title>Inferring Psycholinguistic Properties of Words</title>
-      <author><first>Gustavo</first><last>Paetzold</last></author>
+      <author id="gustavo-paetzold"><first>Gustavo</first><last>Paetzold</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>435–440</pages>
       <url hash="56130667">N16-1050</url>
@@ -554,7 +554,7 @@
       <title>Shift-Reduce <fixed-case>CCG</fixed-case> Parsing using Neural Network Models</title>
       <author><first>Bharat Ram</first><last>Ambati</last></author>
       <author><first>Tejaswini</first><last>Deoskar</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>447–453</pages>
       <url hash="17386945">N16-1052</url>
       <doi>10.18653/v1/N16-1052</doi>
@@ -563,7 +563,7 @@
     <paper id="53">
       <title>Online Multilingual Topic Models with Multi-Level Hyperpriors</title>
       <author><first>Kriste</first><last>Krstovski</last></author>
-      <author><first>David</first><last>Smith</last></author>
+      <author id="david-a-smith"><first>David</first><last>Smith</last></author>
       <author><first>Michael J.</first><last>Kurtz</last></author>
       <pages>454–459</pages>
       <url hash="c284ab0b">N16-1053</url>
@@ -592,7 +592,7 @@
     </paper>
     <paper id="56">
       <title>Bidirectional <fixed-case>RNN</fixed-case> for Medical Event Detection in Electronic Health Records</title>
-      <author><first>Abhyuday N</first><last>Jagannatha</last></author>
+      <author id="abhyuday-jagannatha"><first>Abhyuday N</first><last>Jagannatha</last></author>
       <author><first>Hong</first><last>Yu</last></author>
       <pages>473–482</pages>
       <url hash="da9441f3">N16-1056</url>
@@ -602,7 +602,7 @@
     <paper id="57">
       <title>The Sensitivity of Topic Coherence Evaluation to Topic Cardinality</title>
       <author><first>Jey Han</first><last>Lau</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>483–487</pages>
       <url hash="56b55f5f">N16-1057</url>
       <doi>10.18653/v1/N16-1057</doi>
@@ -612,7 +612,7 @@
       <title>Transition-Based Syntactic Linearization with Lookahead Features</title>
       <author><first>Ratish</first><last>Puduppully</last></author>
       <author><first>Yue</first><last>Zhang</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>488–493</pages>
       <url hash="613aaec1">N16-1058</url>
       <doi>10.18653/v1/N16-1058</doi>
@@ -669,7 +669,7 @@
       <title>Learning Distributed Word Representations For Bidirectional <fixed-case>LSTM</fixed-case> Recurrent Neural Network</title>
       <author><first>Peilu</first><last>Wang</last></author>
       <author><first>Yao</first><last>Qian</last></author>
-      <author><first>Frank K.</first><last>Soong</last></author>
+      <author id="frank-k-soong"><first>Frank K.</first><last>Soong</last></author>
       <author><first>Lei</first><last>He</last></author>
       <author><first>Hai</first><last>Zhao</last></author>
       <pages>527–533</pages>
@@ -682,7 +682,7 @@
       <author><first>Ngoc Thang</first><last>Vu</last></author>
       <author><first>Heike</first><last>Adel</last></author>
       <author><first>Pankaj</first><last>Gupta</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>534–539</pages>
       <url hash="ea24391b">N16-1065</url>
       <doi>10.18653/v1/N16-1065</doi>
@@ -690,7 +690,7 @@
     </paper>
     <paper id="66">
       <title>Building <fixed-case>C</fixed-case>hinese Affective Resources in Valence-Arousal Dimensions</title>
-      <author><first>Liang-Chih</first><last>Yu</last></author>
+      <author id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last></author>
       <author><first>Lung-Hao</first><last>Lee</last></author>
       <author><first>Shuai</first><last>Hao</last></author>
       <author><first>Jin</first><last>Wang</last></author>
@@ -716,7 +716,7 @@
       <title>Structured Prediction with Output Embeddings for Semantic Image Annotation</title>
       <author><first>Ariadna</first><last>Quattoni</last></author>
       <author><first>Arnau</first><last>Ramisa</last></author>
-      <author><first>Pranava Swaroop</first><last>Madhyastha</last></author>
+      <author id="pranava-swaroop-madhyastha"><first>Pranava Swaroop</first><last>Madhyastha</last></author>
       <author><first>Edgar</first><last>Simo-Serra</last></author>
       <author><first>Francesc</first><last>Moreno-Noguer</last></author>
       <pages>552–557</pages>
@@ -782,7 +782,7 @@
     <paper id="75">
       <title>Unsupervised Compound Splitting With Distributional Semantics Rivals Supervised Methods</title>
       <author><first>Martin</first><last>Riedl</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>617–622</pages>
       <url hash="0b5f68bf">N16-1075</url>
       <doi>10.18653/v1/N16-1075</doi>
@@ -792,7 +792,7 @@
       <title>Weighting Finite-State Transductions With Neural Context</title>
       <author><first>Pushpendre</first><last>Rastogi</last></author>
       <author><first>Ryan</first><last>Cotterell</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>623–633</pages>
       <url hash="24785365">N16-1076</url>
       <doi>10.18653/v1/N16-1076</doi>
@@ -803,7 +803,7 @@
       <author><first>Manaal</first><last>Faruqui</last></author>
       <author><first>Yulia</first><last>Tsvetkov</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <pages>634–643</pages>
       <url hash="dc9de4f0">N16-1077</url>
       <doi>10.18653/v1/N16-1077</doi>
@@ -812,7 +812,7 @@
     <paper id="78">
       <title>Towards Unsupervised and Language-independent Compound Splitting using Inflectional Morphological Transformations</title>
       <author><first>Patrick</first><last>Ziering</last></author>
-      <author><first>Lonneke</first><last>van der Plas</last></author>
+      <author id="lonneke-van-der-plas"><first>Lonneke</first><last>van der Plas</last></author>
       <pages>644–653</pages>
       <url hash="84f48c68">N16-1078</url>
       <doi>10.18653/v1/N16-1078</doi>
@@ -821,8 +821,8 @@
     <paper id="79">
       <title>Phonological Pun-derstanding</title>
       <author><first>Aaron</first><last>Jaech</last></author>
-      <author><first>Rik</first><last>Koncel-Kedziorski</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
+      <author id="rik-koncel-kedziorski"><first>Rik</first><last>Koncel-Kedziorski</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
       <pages>654–663</pages>
       <url hash="9e97b058">N16-1079</url>
       <doi>10.18653/v1/N16-1079</doi>
@@ -832,7 +832,7 @@
       <title>A Joint Model of Orthography and Morphological Segmentation</title>
       <author><first>Ryan</first><last>Cotterell</last></author>
       <author><first>Tim</first><last>Vieira</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>664–669</pages>
       <url hash="b0ae4cc4">N16-1080</url>
       <doi>10.18653/v1/N16-1080</doi>
@@ -853,8 +853,8 @@
       <title>Visualizing and Understanding Neural Models in <fixed-case>NLP</fixed-case></title>
       <author><first>Jiwei</first><last>Li</last></author>
       <author><first>Xinlei</first><last>Chen</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <pages>681–691</pages>
       <url hash="35e6b665">N16-1082</url>
       <doi>10.18653/v1/N16-1082</doi>
@@ -871,9 +871,9 @@
     </paper>
     <paper id="84">
       <title>Joint Learning with Global Inference for Comment Classification in Community Question Answering</title>
-      <author><first>Shafiq</first><last>Joty</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>703–713</pages>
       <url hash="a4060808">N16-1084</url>
       <doi>10.18653/v1/N16-1084</doi>
@@ -904,9 +904,9 @@
     <paper id="87">
       <title>Generation from <fixed-case>A</fixed-case>bstract <fixed-case>M</fixed-case>eaning <fixed-case>R</fixed-case>epresentation using Tree Transducers</title>
       <author><first>Jeffrey</first><last>Flanigan</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
       <pages>731–739</pages>
       <url hash="f3a6688d">N16-1087</url>
       <doi>10.18653/v1/N16-1087</doi>
@@ -933,9 +933,9 @@
     </paper>
     <paper id="90">
       <title>Inter-document Contextual Language model</title>
-      <author><first>Quan Hung</first><last>Tran</last></author>
+      <author id="quan-hung-tran"><first>Quan Hung</first><last>Tran</last></author>
       <author><first>Ingrid</first><last>Zukerman</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>762–766</pages>
       <url hash="1bc79336">N16-1090</url>
       <doi>10.18653/v1/N16-1090</doi>
@@ -945,7 +945,7 @@
       <title>Ultradense Word Embeddings by Orthogonal Transformation</title>
       <author><first>Sascha</first><last>Rothe</last></author>
       <author><first>Sebastian</first><last>Ebert</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>767–777</pages>
       <url hash="cc3b3ec4">N16-1091</url>
       <doi>10.18653/v1/N16-1091</doi>
@@ -985,7 +985,7 @@
     <paper id="95">
       <title>Capturing Reliable Fine-Grained Sentiment Associations by Crowdsourcing and Best–Worst Scaling</title>
       <author><first>Svetlana</first><last>Kiritchenko</last></author>
-      <author><first>Saif M.</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif M.</first><last>Mohammad</last></author>
       <pages>811–817</pages>
       <url hash="a22ba79f">N16-1095</url>
       <doi>10.18653/v1/N16-1095</doi>
@@ -993,8 +993,8 @@
     </paper>
     <paper id="96">
       <title>Mapping Verbs in Different Languages to Knowledge Base Relations using Web Text as Interlingua</title>
-      <author><first>Derry Tanti</first><last>Wijaya</last></author>
-      <author><first>Tom M.</first><last>Mitchell</last></author>
+      <author id="derry-tanti-wijaya"><first>Derry Tanti</first><last>Wijaya</last></author>
+      <author id="tom-mitchell"><first>Tom M.</first><last>Mitchell</last></author>
       <pages>818–827</pages>
       <url hash="42d547dd">N16-1096</url>
       <doi>10.18653/v1/N16-1096</doi>
@@ -1004,7 +1004,7 @@
       <title>Comparing Convolutional Neural Networks to Traditional Models for Slot Filling</title>
       <author><first>Heike</first><last>Adel</last></author>
       <author><first>Benjamin</first><last>Roth</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>828–838</pages>
       <url hash="9224b9d6">N16-1097</url>
       <doi>10.18653/v1/N16-1097</doi>
@@ -1013,13 +1013,13 @@
     <paper id="98">
       <title>A Corpus and Cloze Evaluation for Deeper Understanding of Commonsense Stories</title>
       <author><first>Nasrin</first><last>Mostafazadeh</last></author>
-      <author><first>Nathanael</first><last>Chambers</last></author>
+      <author id="nathanael-chambers"><first>Nathanael</first><last>Chambers</last></author>
       <author><first>Xiaodong</first><last>He</last></author>
       <author><first>Devi</first><last>Parikh</last></author>
       <author><first>Dhruv</first><last>Batra</last></author>
       <author><first>Lucy</first><last>Vanderwende</last></author>
       <author><first>Pushmeet</first><last>Kohli</last></author>
-      <author><first>James</first><last>Allen</last></author>
+      <author id="james-allen"><first>James</first><last>Allen</last></author>
       <pages>839–849</pages>
       <url hash="542d720c">N16-1098</url>
       <doi>10.18653/v1/N16-1098</doi>
@@ -1029,7 +1029,7 @@
       <title>Dynamic Entity Representation with Max-pooling Improves Machine Reading</title>
       <author><first>Sosuke</first><last>Kobayashi</last></author>
       <author><first>Ran</first><last>Tian</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Kentaro</first><last>Inui</last></author>
       <pages>850–855</pages>
       <url hash="8e654371">N16-1099</url>
@@ -1038,8 +1038,8 @@
     </paper>
     <paper id="100">
       <title>Speed-Constrained Tuning for Statistical Machine Translation Using <fixed-case>B</fixed-case>ayesian Optimization</title>
-      <author><first>Daniel</first><last>Beck</last></author>
-      <author><first>Adrià</first><last>de Gispert</last></author>
+      <author id="daniel-beck"><first>Daniel</first><last>Beck</last></author>
+      <author id="adria-de-gispert"><first>Adrià</first><last>de Gispert</last></author>
       <author><first>Gonzalo</first><last>Iglesias</last></author>
       <author><first>Aurelien</first><last>Waite</last></author>
       <author id="bill-byrne"><first>Bill</first><last>Byrne</last></author>
@@ -1060,12 +1060,12 @@
     </paper>
     <paper id="102">
       <title>Incorporating Structural Alignment Biases into an Attentional Neural Translation Model</title>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <author><first>Cong Duy Vu</first><last>Hoang</last></author>
       <author><first>Ekaterina</first><last>Vymolova</last></author>
       <author><first>Kaisheng</first><last>Yao</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>876–885</pages>
       <url hash="a1c5e5a6">N16-1102</url>
       <doi>10.18653/v1/N16-1102</doi>
@@ -1086,11 +1086,11 @@
     <paper id="104">
       <title>Effective Crowd Annotation for Relation Extraction</title>
       <author><first>Angli</first><last>Liu</last></author>
-      <author><first>Stephen</first><last>Soderland</last></author>
+      <author id="stephen-soderland"><first>Stephen</first><last>Soderland</last></author>
       <author><first>Jonathan</first><last>Bragg</last></author>
       <author><first>Christopher H.</first><last>Lin</last></author>
       <author><first>Xiao</first><last>Ling</last></author>
-      <author><first>Daniel S.</first><last>Weld</last></author>
+      <author id="daniel-s-weld"><first>Daniel S.</first><last>Weld</last></author>
       <pages>897–906</pages>
       <url hash="dcb51990">N16-1104</url>
       <doi>10.18653/v1/N16-1104</doi>
@@ -1101,7 +1101,7 @@
       <author><first>Hee-Geun</first><last>Yoon</last></author>
       <author><first>Hyun-Je</first><last>Song</last></author>
       <author><first>Seong-Bae</first><last>Park</last></author>
-      <author><first>Se-Young</first><last>Park</last></author>
+      <author id="se-young-park"><first>Se-Young</first><last>Park</last></author>
       <pages>907–916</pages>
       <url hash="671120bf">N16-1105</url>
       <doi>10.18653/v1/N16-1105</doi>
@@ -1119,7 +1119,7 @@
     </paper>
     <paper id="107">
       <title><fixed-case>B</fixed-case>ayesian Supervised Domain Adaptation for Short Text Similarity</title>
-      <author><first>Md Arafat</first><last>Sultan</last></author>
+      <author id="md-arafat-sultan"><first>Md Arafat</first><last>Sultan</last></author>
       <author><first>Jordan</first><last>Boyd-Graber</last></author>
       <author><first>Tamara</first><last>Sumner</last></author>
       <pages>927–936</pages>
@@ -1138,11 +1138,11 @@
     </paper>
     <paper id="109">
       <title>An Attentional Model for Speech Translation Without Transcription</title>
-      <author><first>Long</first><last>Duong</last></author>
+      <author id="long-duong"><first>Long</first><last>Duong</last></author>
       <author><first>Antonios</first><last>Anastasopoulos</last></author>
       <author><first>David</first><last>Chiang</last></author>
       <author><first>Steven</first><last>Bird</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>949–959</pages>
       <url hash="ae57d047">N16-1109</url>
       <doi>10.18653/v1/N16-1109</doi>
@@ -1150,9 +1150,9 @@
     </paper>
     <paper id="110">
       <title>Information Density and Quality Estimation Features as Translationese Indicators for Human Translation Classification</title>
-      <author><first>Raphael</first><last>Rubino</last></author>
+      <author id="raphael-rubino"><first>Raphael</first><last>Rubino</last></author>
       <author><first>Ekaterina</first><last>Lapshinova-Koltunski</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>960–970</pages>
       <url hash="f623cb38">N16-1110</url>
       <doi>10.18653/v1/N16-1110</doi>
@@ -1162,7 +1162,7 @@
       <title>Interpretese vs. Translationese: The Uniqueness of Human Strategies in Simultaneous Interpretation</title>
       <author><first>He</first><last>He</last></author>
       <author><first>Jordan</first><last>Boyd-Graber</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <pages>971–976</pages>
       <url hash="713fce92">N16-1111</url>
       <doi>10.18653/v1/N16-1111</doi>
@@ -1194,8 +1194,8 @@
     <paper id="114">
       <title>Learning Global Features for Coreference Resolution</title>
       <author><first>Sam</first><last>Wiseman</last></author>
-      <author><first>Alexander M.</first><last>Rush</last></author>
-      <author><first>Stuart M.</first><last>Shieber</last></author>
+      <author id="alexander-m-rush"><first>Alexander M.</first><last>Rush</last></author>
+      <author id="stuart-m-shieber"><first>Stuart M.</first><last>Shieber</last></author>
       <pages>994–1004</pages>
       <url hash="ce8bc05c">N16-1114</url>
       <doi>10.18653/v1/N16-1114</doi>
@@ -1203,7 +1203,7 @@
     </paper>
     <paper id="115">
       <title>Search Space Pruning: A Simple Solution for Better Coreference Resolvers</title>
-      <author><first>Nafise Sadat</first><last>Moosavi</last></author>
+      <author id="nafise-sadat-moosavi"><first>Nafise Sadat</first><last>Moosavi</last></author>
       <author><first>Michael</first><last>Strube</last></author>
       <pages>1005–1011</pages>
       <url hash="6178334a">N16-1115</url>
@@ -1214,7 +1214,7 @@
       <title>Unsupervised Ranking Model for Entity Coreference Resolution</title>
       <author><first>Xuezhe</first><last>Ma</last></author>
       <author><first>Zhengzhong</first><last>Liu</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>1012–1018</pages>
       <url hash="521e277f">N16-1116</url>
       <doi>10.18653/v1/N16-1116</doi>
@@ -1225,7 +1225,7 @@
       <author><first>Mo</first><last>Yu</last></author>
       <author><first>Mark</first><last>Dredze</last></author>
       <author><first>Raman</first><last>Arora</last></author>
-      <author><first>Matthew R.</first><last>Gormley</last></author>
+      <author id="matthew-r-gormley"><first>Matthew R.</first><last>Gormley</last></author>
       <pages>1019–1029</pages>
       <url hash="5946e1a1">N16-1117</url>
       <doi>10.18653/v1/N16-1117</doi>
@@ -1258,7 +1258,7 @@
       <title>Assessing Relative Sentence Complexity using an Incremental <fixed-case>CCG</fixed-case> Parser</title>
       <author><first>Bharat Ram</first><last>Ambati</last></author>
       <author><first>Siva</first><last>Reddy</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>1051–1057</pages>
       <url hash="3d6f503d">N16-1120</url>
       <doi>10.18653/v1/N16-1120</doi>
@@ -1287,7 +1287,7 @@
     </paper>
     <paper id="123">
       <title>Fast and Easy Short Answer Grading with High Accuracy</title>
-      <author><first>Md Arafat</first><last>Sultan</last></author>
+      <author id="md-arafat-sultan"><first>Md Arafat</first><last>Sultan</last></author>
       <author><first>Cristobal</first><last>Salazar</last></author>
       <author><first>Tamara</first><last>Sumner</last></author>
       <pages>1070–1075</pages>
@@ -1299,7 +1299,7 @@
       <title>Interlocking Phrases in Phrase-based Statistical Machine Translation</title>
       <author><first>Ye</first><last>Kyaw Thu</last></author>
       <author><first>Andrew</first><last>Finch</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>1076–1081</pages>
       <url hash="8210ddeb">N16-1124</url>
       <doi>10.18653/v1/N16-1124</doi>
@@ -1308,12 +1308,12 @@
     <paper id="125">
       <title>Eyes Don’t Lie: Predicting Machine Translation Quality Using Eye Movement</title>
       <author><first>Hassan</first><last>Sajjad</last></author>
-      <author><first>Francisco</first><last>Guzmán</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzmán</last></author>
       <author><first>Nadir</first><last>Durrani</last></author>
       <author><first>Ahmed</first><last>Abdelali</last></author>
       <author><first>Houda</first><last>Bouamor</last></author>
       <author><first>Irina</first><last>Temnikova</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>1082–1088</pages>
       <url hash="6c3540d0">N16-1125</url>
       <doi>10.18653/v1/N16-1125</doi>
@@ -1331,8 +1331,8 @@
     </paper>
     <paper id="127">
       <title>Deep Lexical Segmentation and Syntactic Parsing in the Easy-First Dependency Framework</title>
-      <author><first>Matthieu</first><last>Constant</last></author>
-      <author><first>Joseph</first><last>Le Roux</last></author>
+      <author id="matthieu-constant"><first>Matthieu</first><last>Constant</last></author>
+      <author id="joseph-le-roux"><first>Joseph</first><last>Le Roux</last></author>
       <author><first>Nadi</first><last>Tomeh</last></author>
       <pages>1095–1101</pages>
       <url hash="307bec4c">N16-1127</url>
@@ -1342,7 +1342,7 @@
     <paper id="128">
       <title>Sentiment Composition of Words with Opposing Polarities</title>
       <author><first>Svetlana</first><last>Kiritchenko</last></author>
-      <author><first>Saif M.</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif M.</first><last>Mohammad</last></author>
       <pages>1102–1108</pages>
       <url hash="74b030ba">N16-1128</url>
       <doi>10.18653/v1/N16-1128</doi>
@@ -1361,7 +1361,7 @@
       <title>Learning a <fixed-case>POS</fixed-case> tagger for <fixed-case>AAVE</fixed-case>-like language</title>
       <author><first>Anna</first><last>Jørgensen</last></author>
       <author><first>Dirk</first><last>Hovy</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>1115–1120</pages>
       <url hash="1074e8e3">N16-1130</url>
       <doi>10.18653/v1/N16-1130</doi>
@@ -1379,7 +1379,7 @@
     <paper id="132">
       <title>Bootstrapping Translation Detection and Sentence Extraction from Comparable Corpora</title>
       <author><first>Kriste</first><last>Krstovski</last></author>
-      <author><first>David</first><last>Smith</last></author>
+      <author id="david-a-smith"><first>David</first><last>Smith</last></author>
       <pages>1127–1132</pages>
       <url hash="ba9cb713">N16-1132</url>
       <doi>10.18653/v1/N16-1132</doi>
@@ -1388,7 +1388,7 @@
     <paper id="133">
       <title>Discriminative Reranking for Grammatical Error Correction with Statistical Machine Translation</title>
       <author><first>Tomoya</first><last>Mizumoto</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>1133–1138</pages>
       <url hash="2e394fa3">N16-1133</url>
       <doi>10.18653/v1/N16-1133</doi>
@@ -1397,7 +1397,7 @@
     <paper id="134">
       <title>Patterns of Wisdom: Discourse-Level Style in Multi-Sentence Quotations</title>
       <author><first>Kyle</first><last>Booten</last></author>
-      <author><first>Marti A.</first><last>Hearst</last></author>
+      <author id="marti-a-hearst"><first>Marti A.</first><last>Hearst</last></author>
       <pages>1139–1144</pages>
       <url hash="2adf87c5">N16-1134</url>
       <doi>10.18653/v1/N16-1134</doi>
@@ -1414,7 +1414,7 @@
     </paper>
     <paper id="136">
       <title><fixed-case>MAWPS</fixed-case>: A Math Word Problem Repository</title>
-      <author><first>Rik</first><last>Koncel-Kedziorski</last></author>
+      <author id="rik-koncel-kedziorski"><first>Rik</first><last>Koncel-Kedziorski</last></author>
       <author><first>Subhro</first><last>Roy</last></author>
       <author><first>Aida</first><last>Amini</last></author>
       <author><first>Nate</first><last>Kushman</last></author>
@@ -1446,7 +1446,7 @@
       <title><fixed-case>BIRA</fixed-case>: Improved Predictive Exchange Word Clustering</title>
       <author><first>Jon</first><last>Dehdari</last></author>
       <author><first>Liling</first><last>Tan</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>1169–1174</pages>
       <url hash="863f423e">N16-1139</url>
       <doi>10.18653/v1/N16-1139</doi>
@@ -1482,13 +1482,13 @@
     </paper>
     <paper id="143">
       <title>Automatic Prediction of Linguistic Decline in Writings of Subjects with Degenerative Dementia</title>
-      <author><first>Davy</first><last>Weissenbacher</last></author>
+      <author id="davy-weissenbacher"><first>Davy</first><last>Weissenbacher</last></author>
       <author><first>Travis A.</first><last>Johnson</last></author>
       <author><first>Laura</first><last>Wojtulewicz</last></author>
       <author><first>Amylou</first><last>Dueck</last></author>
       <author><first>Dona</first><last>Locke</last></author>
       <author><first>Richard</first><last>Caselli</last></author>
-      <author><first>Graciela</first><last>Gonzalez</last></author>
+      <author id="graciela-gonzalez"><first>Graciela</first><last>Gonzalez</last></author>
       <pages>1198–1207</pages>
       <url hash="2e3c2721">N16-1143</url>
       <doi>10.18653/v1/N16-1143</doi>
@@ -1496,9 +1496,9 @@
     </paper>
     <paper id="144">
       <title>Consensus Maximization Fusion of Probabilistic Information Extractors</title>
-      <author><first>Miguel</first><last>Rodríguez</last></author>
+      <author id="miguel-rodriguez-hernandez"><first>Miguel</first><last>Rodríguez</last></author>
       <author><first>Sean</first><last>Goldberg</last></author>
-      <author><first>Daisy Zhe</first><last>Wang</last></author>
+      <author id="daisy-zhe-wang"><first>Daisy Zhe</first><last>Wang</last></author>
       <pages>1208–1216</pages>
       <url hash="7e416436">N16-1144</url>
       <doi>10.18653/v1/N16-1144</doi>
@@ -1518,8 +1518,8 @@
     <paper id="146">
       <title>Automatically Inferring Implicit Properties in Similes</title>
       <author><first>Ashequl</first><last>Qadir</last></author>
-      <author><first>Ellen</first><last>Riloff</last></author>
-      <author><first>Marilyn A.</first><last>Walker</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
+      <author id="marilyn-walker"><first>Marilyn A.</first><last>Walker</last></author>
       <pages>1223–1232</pages>
       <url hash="88006a62">N16-1146</url>
       <doi>10.18653/v1/N16-1146</doi>
@@ -1527,7 +1527,7 @@
     </paper>
     <paper id="147">
       <title>Visual Storytelling</title>
-      <author><first>Ting-Hao Kenneth</first><last>Huang</last></author>
+      <author id="ting-hao-huang"><first>Ting-Hao Kenneth</first><last>Huang</last></author>
       <author><first>Francis</first><last>Ferraro</last></author>
       <author><first>Nasrin</first><last>Mostafazadeh</last></author>
       <author><first>Ishan</first><last>Misra</last></author>
@@ -1554,8 +1554,8 @@
       <author><first>Shanbo</first><last>Cheng</last></author>
       <author><first>Shujian</first><last>Huang</last></author>
       <author><first>Huadong</first><last>Chen</last></author>
-      <author><first>Xin-Yu</first><last>Dai</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
+      <author id="xinyu-dai"><first>Xin-Yu</first><last>Dai</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
       <pages>1240–1249</pages>
       <url hash="aa0f0590">N16-1148</url>
       <doi>10.18653/v1/N16-1148</doi>
@@ -1564,8 +1564,8 @@
     <paper id="149">
       <title>Incorporating Side Information into Recurrent Neural Network Language Models</title>
       <author><first>Cong Duy Vu</first><last>Hoang</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>1250–1255</pages>
       <url hash="b33c570c">N16-1149</url>
       <doi>10.18653/v1/N16-1149</doi>
@@ -1583,7 +1583,7 @@
     </paper>
     <paper id="151">
       <title><tex-math>K</tex-math>-Embeddings: Learning Conceptual Embeddings for Words using Context</title>
-      <author><first>Thuy</first><last>Vu</last></author>
+      <author id="thuy-vu"><first>Thuy</first><last>Vu</last></author>
       <author><first>D. Stott</first><last>Parker</last></author>
       <pages>1262–1267</pages>
       <url hash="c58ffa11">N16-1151</url>
@@ -1608,7 +1608,7 @@
       <author><first>Tommi</first><last>Jaakkola</last></author>
       <author><first>Kateryna</first><last>Tymoshenko</last></author>
       <author><first>Alessandro</first><last>Moschitti</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <pages>1279–1289</pages>
       <url hash="92cd4186">N16-1153</url>
       <doi>10.18653/v1/N16-1153</doi>
@@ -1625,10 +1625,10 @@
     </paper>
     <paper id="155">
       <title>Multilingual Language Processing From Bytes</title>
-      <author><first>Dan</first><last>Gillick</last></author>
+      <author id="dan-gillick"><first>Dan</first><last>Gillick</last></author>
       <author><first>Cliff</first><last>Brunk</last></author>
       <author><first>Oriol</first><last>Vinyals</last></author>
-      <author><first>Amarnag</first><last>Subramanya</last></author>
+      <author id="amarnag-subramanya"><first>Amarnag</first><last>Subramanya</last></author>
       <pages>1296–1306</pages>
       <url hash="3c7a2d58">N16-1155</url>
       <doi>10.18653/v1/N16-1155</doi>
@@ -1668,9 +1668,9 @@
       <author><first>Sakshi</first><last>Gupta</last></author>
       <author><first>Raveesh</first><last>Motlani</last></author>
       <author><first>Piyush</first><last>Bansal</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <author><first>Radhika</first><last>Mamidi</last></author>
-      <author><first>Dipti M.</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti M.</first><last>Sharma</last></author>
       <pages>1340–1345</pages>
       <url hash="d060bab4">N16-1159</url>
       <doi>10.18653/v1/N16-1159</doi>
@@ -1678,9 +1678,9 @@
     </paper>
     <paper id="160">
       <title>Bilingual Learning of Multi-sense Embeddings with Discrete Autoencoders</title>
-      <author><first>Simon</first><last>Šuster</last></author>
+      <author id="simon-suster"><first>Simon</first><last>Šuster</last></author>
       <author><first>Ivan</first><last>Titov</last></author>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <pages>1346–1356</pages>
       <url hash="efaefd49">N16-1160</url>
       <doi>10.18653/v1/N16-1160</doi>
@@ -1693,10 +1693,10 @@
       <author><first>Manaal</first><last>Faruqui</last></author>
       <author><first>Guillaume</first><last>Lample</last></author>
       <author><first>Patrick</first><last>Littell</last></author>
-      <author><first>David</first><last>Mortensen</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="david-r-mortensen"><first>David</first><last>Mortensen</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <pages>1357–1366</pages>
       <url hash="239dbd16">N16-1161</url>
       <doi>10.18653/v1/N16-1161</doi>
@@ -1733,7 +1733,7 @@
     </paper>
     <paper id="165">
       <title>Cross-Domain Mining of Argumentative Text through Distant Supervision</title>
-      <author><first>Khalid</first><last>Al-Khatib</last></author>
+      <author id="khalid-al-khatib"><first>Khalid</first><last>Al-Khatib</last></author>
       <author><first>Henning</first><last>Wachsmuth</last></author>
       <author><first>Matthias</first><last>Hagen</last></author>
       <author><first>Jonas</first><last>Köhler</last></author>
@@ -1745,7 +1745,7 @@
     </paper>
     <paper id="166">
       <title>A Study of the Impact of Persuasive Argumentation in Political Debates</title>
-      <author><first>Amparo Elizabeth</first><last>Cano-Basave</last></author>
+      <author id="amparo-elizabeth-cano-basave"><first>Amparo Elizabeth</first><last>Cano-Basave</last></author>
       <author><first>Yulan</first><last>He</last></author>
       <pages>1405–1413</pages>
       <url hash="bab446bb">N16-1166</url>
@@ -1764,7 +1764,7 @@
     <paper id="168">
       <title>Using Context to Predict the Purpose of Argumentative Writing Revisions</title>
       <author><first>Fan</first><last>Zhang</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <pages>1424–1430</pages>
       <url hash="b0934d38">N16-1168</url>
       <doi>10.18653/v1/N16-1168</doi>
@@ -1793,7 +1793,7 @@
       <author><first>Ashequl</first><last>Qadir</last></author>
       <author><first>Michael</first><last>Gamon</last></author>
       <author><first>Patrick</first><last>Pantel</last></author>
-      <author><first>Ahmed Hassan</first><last>Awadallah</last></author>
+      <author id="ahmed-hassan"><first>Ahmed Hassan</first><last>Awadallah</last></author>
       <pages>1452–1462</pages>
       <url hash="5bf28817">N16-1171</url>
       <doi>10.18653/v1/N16-1171</doi>
@@ -1821,10 +1821,10 @@
       <title>Hierarchical Attention Networks for Document Classification</title>
       <author><first>Zichao</first><last>Yang</last></author>
       <author><first>Diyi</first><last>Yang</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <author><first>Xiaodong</first><last>He</last></author>
       <author><first>Alex</first><last>Smola</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>1480–1489</pages>
       <url hash="95f14e9a">N16-1174</url>
       <doi>10.18653/v1/N16-1174</doi>
@@ -1854,7 +1854,7 @@
       <title>Dependency Sensitive Convolutional Neural Networks for Modeling Sentences and Documents</title>
       <author><first>Rui</first><last>Zhang</last></author>
       <author><first>Honglak</first><last>Lee</last></author>
-      <author><first>Dragomir R.</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir R.</first><last>Radev</last></author>
       <pages>1512–1521</pages>
       <url hash="29f2e9ba">N16-1177</url>
       <doi>10.18653/v1/N16-1177</doi>
@@ -1864,7 +1864,7 @@
       <title><fixed-case>MGNC</fixed-case>-<fixed-case>CNN</fixed-case>: A Simple Approach to Exploiting Multiple Word Embeddings for Sentence Classification</title>
       <author><first>Ye</first><last>Zhang</last></author>
       <author><first>Stephen</first><last>Roller</last></author>
-      <author><first>Byron C.</first><last>Wallace</last></author>
+      <author id="byron-c-wallace"><first>Byron C.</first><last>Wallace</last></author>
       <pages>1522–1527</pages>
       <url hash="72357f02">N16-1178</url>
       <doi>10.18653/v1/N16-1178</doi>
@@ -1874,7 +1874,7 @@
       <title>Improving sentence compression by learning to predict gaze</title>
       <author><first>Sigrid</first><last>Klerke</last></author>
       <author><first>Yoav</first><last>Goldberg</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>1528–1533</pages>
       <url hash="88c999b9">N16-1179</url>
       <doi>10.18653/v1/N16-1179</doi>
@@ -1886,7 +1886,7 @@
       <author><first>Anupam</first><last>Guha</last></author>
       <author><first>Snigdha</first><last>Chaturvedi</last></author>
       <author><first>Jordan</first><last>Boyd-Graber</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <pages>1534–1544</pages>
       <url hash="28413a9c">N16-1180</url>
       <doi>10.18653/v1/N16-1180</doi>
@@ -1961,9 +1961,9 @@
     </paper>
     <paper id="5">
       <title>Combining syntactic patterns and <fixed-case>W</fixed-case>ikipedia’s hierarchy of hyperlinks to extract meronym relations</title>
-      <author><first>Debela Tesfaye</first><last>Gemechu</last></author>
+      <author id="debela-tesfaye-gemechu"><first>Debela Tesfaye</first><last>Gemechu</last></author>
       <author><first>Michael</first><last>Zock</last></author>
-      <author><first>Solomon</first><last>Teferra</last></author>
+      <author id="solomon-teferra-abate"><first>Solomon</first><last>Teferra</last></author>
       <pages>29–36</pages>
       <url hash="bf9b37c5">N16-2005</url>
       <doi>10.18653/v1/N16-2005</doi>
@@ -2004,7 +2004,7 @@
       <author><first>Francesca</first><last>Delogu</last></author>
       <author><first>Clayton</first><last>Greenberg</last></author>
       <author><first>Mindaugas</first><last>Mozuraitis</last></author>
-      <author><first>Matthew</first><last>Crocker</last></author>
+      <author id="matthew-crocker"><first>Matthew</first><last>Crocker</last></author>
       <pages>59–65</pages>
       <url hash="a6811c2a">N16-2009</url>
       <doi>10.18653/v1/N16-2009</doi>
@@ -2013,7 +2013,7 @@
     <paper id="10">
       <title>Explicit Argument Identification for Discourse Parsing In <fixed-case>H</fixed-case>indi: A Hybrid Pipeline</title>
       <author><first>Rohit</first><last>Jain</last></author>
-      <author><first>Dipti</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti</first><last>Sharma</last></author>
       <pages>66–72</pages>
       <url hash="e5778fcf">N16-2010</url>
       <doi>10.18653/v1/N16-2010</doi>
@@ -2022,7 +2022,7 @@
     <paper id="11">
       <title>Exploring Fine-Grained Emotion Detection in Tweets</title>
       <author><first>Jasy Suet Yan</first><last>Liew</last></author>
-      <author><first>Howard R.</first><last>Turtle</last></author>
+      <author id="howard-r-turtle"><first>Howard R.</first><last>Turtle</last></author>
       <pages>73–80</pages>
       <url hash="236edcde">N16-2011</url>
       <doi>10.18653/v1/N16-2011</doi>
@@ -2040,7 +2040,7 @@
     </paper>
     <paper id="13">
       <title>Hateful Symbols or Hateful People? Predictive Features for Hate Speech Detection on <fixed-case>T</fixed-case>witter</title>
-      <author><first>Zeerak</first><last>Waseem</last></author>
+      <author id="zeerak-talat"><first>Zeerak</first><last>Waseem</last></author>
       <author><first>Dirk</first><last>Hovy</last></author>
       <pages>88–93</pages>
       <url hash="472412f8">N16-2013</url>
@@ -2049,11 +2049,11 @@
     </paper>
     <paper id="14">
       <title>Non-decreasing Sub-modular Function for Comprehensible Summarization</title>
-      <author><first>Litton</first><last>J Kurisinkel</last></author>
+      <author id="litton-j-kurisinkel"><first>Litton</first><last>J Kurisinkel</last></author>
       <author><first>Pruthwik</first><last>Mishra</last></author>
       <author><first>Vigneshwaran</first><last>Muralidaran</last></author>
       <author><first>Vasudeva</first><last>Varma</last></author>
-      <author><first>Dipti</first><last>Misra Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti</first><last>Misra Sharma</last></author>
       <pages>94–101</pages>
       <url hash="0b8708a1">N16-2014</url>
       <doi>10.18653/v1/N16-2014</doi>
@@ -2062,7 +2062,7 @@
     <paper id="15">
       <title>Phylogenetic simulations over constraint-based grammar formalisms</title>
       <author><first>Andrew</first><last>Lamont</last></author>
-      <author><first>Jonathan</first><last>Washington</last></author>
+      <author id="jonathan-washington"><first>Jonathan</first><last>Washington</last></author>
       <pages>102–108</pages>
       <url hash="89fe00cb">N16-2015</url>
       <doi>10.18653/v1/N16-2015</doi>
@@ -2092,7 +2092,7 @@
       <booktitle>Proceedings of the 2016 Conference of the North <fixed-case>A</fixed-case>merican Chapter of the Association for Computational Linguistics: Demonstrations</booktitle>
       <url hash="e51c86e0">N16-3</url>
       <editor><first>John</first><last>DeNero</last></editor>
-      <editor><first>Mark</first><last>Finlayson</last></editor>
+      <editor id="mark-finlayson"><first>Mark</first><last>Finlayson</last></editor>
       <editor><first>Sravana</first><last>Reddy</last></editor>
       <doi>10.18653/v1/N16-3</doi>
       <publisher>Association for Computational Linguistics</publisher>
@@ -2117,7 +2117,7 @@
       <title>Instant Feedback for Increasing the Presence of Solutions in Peer Reviews</title>
       <author id="huy-nguyen-pgh"><first>Huy</first><last>Nguyen</last></author>
       <author><first>Wenting</first><last>Xiong</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <pages>6–10</pages>
       <url hash="41e6e02d">N16-3002</url>
       <doi>10.18653/v1/N16-3002</doi>
@@ -2138,7 +2138,7 @@
       <title>i<fixed-case>A</fixed-case>ppraise: A Manual Machine Translation Evaluation Environment Supporting Eye-tracking</title>
       <author><first>Ahmed</first><last>Abdelali</last></author>
       <author><first>Nadir</first><last>Durrani</last></author>
-      <author><first>Francisco</first><last>Guzmán</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzmán</last></author>
       <pages>17–21</pages>
       <url hash="596941a9">N16-3004</url>
       <doi>10.18653/v1/N16-3004</doi>
@@ -2172,7 +2172,7 @@
       <author><first>Kallirroi</first><last>Georgila</last></author>
       <author><first>Anton</first><last>Leuski</last></author>
       <author><first>Ari</first><last>Shapiro</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <pages>32–36</pages>
       <url hash="9b7fa4e6">N16-3007</url>
       <doi>10.18653/v1/N16-3007</doi>
@@ -2182,7 +2182,7 @@
       <title><fixed-case>A</fixed-case>rg<fixed-case>R</fixed-case>ewrite: A Web-based Revision Assistant for Argumentative Writings</title>
       <author><first>Fan</first><last>Zhang</last></author>
       <author><first>Rebecca</first><last>Hwa</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <author><first>Homa B.</first><last>Hashemi</last></author>
       <pages>37–41</pages>
       <url hash="6f8b5b0f">N16-3008</url>
@@ -2193,7 +2193,7 @@
       <title>Scaling Up Word Clustering</title>
       <author><first>Jon</first><last>Dehdari</last></author>
       <author><first>Liling</first><last>Tan</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>42–46</pages>
       <url hash="d1cd5362">N16-3009</url>
       <doi>10.18653/v1/N16-3009</doi>
@@ -2201,7 +2201,7 @@
     </paper>
     <paper id="10">
       <title>Task Completion Platform: A self-serve multi-domain goal oriented dialogue platform</title>
-      <author><first>Paul</first><last>Crook</last></author>
+      <author id="paul-a-crook"><first>Paul</first><last>Crook</last></author>
       <author><first>Alex</first><last>Marin</last></author>
       <author><first>Vipul</first><last>Agarwal</last></author>
       <author><first>Khushboo</first><last>Aggarwal</last></author>
@@ -2223,7 +2223,7 @@
       <author><first>Jean-Phillipe</first><last>Robichaud</last></author>
       <author><first>Alexandre</first><last>Rochette</last></author>
       <author><first>Logan</first><last>Stromberg</last></author>
-      <author><first>Ruhi</first><last>Sarikaya</last></author>
+      <author id="ruhi-sarikaya"><first>Ruhi</first><last>Sarikaya</last></author>
       <pages>47–51</pages>
       <url hash="574092b8">N16-3010</url>
       <doi>10.18653/v1/N16-3010</doi>
@@ -2241,7 +2241,7 @@
     <paper id="12">
       <title><fixed-case>L</fixed-case>ingo<fixed-case>T</fixed-case>urk: managing crowdsourced tasks for psycholinguistics</title>
       <author><first>Florian</first><last>Pusse</last></author>
-      <author><first>Asad</first><last>Sayeed</last></author>
+      <author id="asad-sayeed"><first>Asad</first><last>Sayeed</last></author>
       <author><first>Vera</first><last>Demberg</last></author>
       <pages>57–61</pages>
       <url hash="81840cfa">N16-3012</url>
@@ -2274,7 +2274,7 @@
     <paper id="15">
       <title>Cross-media Event Extraction and Recommendation</title>
       <author><first>Di</first><last>Lu</last></author>
-      <author><first>Clare</first><last>Voss</last></author>
+      <author id="clare-voss"><first>Clare</first><last>Voss</last></author>
       <author><first>Fangbo</first><last>Tao</last></author>
       <author><first>Xiang</first><last>Ren</last></author>
       <author><first>Rachel</first><last>Guan</last></author>
@@ -2284,10 +2284,10 @@
       <author><first>Hongzhi</first><last>Li</last></author>
       <author><first>Taylor</first><last>Cassidy</last></author>
       <author><first>Heng</first><last>Ji</last></author>
-      <author><first>Shih-fu</first><last>Chang</last></author>
+      <author id="shih-fu-chang"><first>Shih-fu</first><last>Chang</last></author>
       <author><first>Jiawei</first><last>Han</last></author>
       <author><first>William</first><last>Wallace</last></author>
-      <author><first>James</first><last>Hendler</last></author>
+      <author id="james-hendler"><first>James</first><last>Hendler</last></author>
       <author><first>Mei</first><last>Si</last></author>
       <author><first>Lance</first><last>Kaplan</last></author>
       <pages>72–76</pages>
@@ -2298,7 +2298,7 @@
     <paper id="16">
       <title><fixed-case>SODA</fixed-case>:Service Oriented Domain Adaptation Architecture for Microblog Categorization</title>
       <author><first>Himanshu Sharad</first><last>Bhatt</last></author>
-      <author><first>Sandipan</first><last>Dandapat</last></author>
+      <author id="sandipan-dandapat"><first>Sandipan</first><last>Dandapat</last></author>
       <author><first>Peddamuthu</first><last>Balaji</last></author>
       <author><first>Shourya</first><last>Roy</last></author>
       <author><first>Sharmistha</first><last>Jat</last></author>
@@ -2319,8 +2319,8 @@
       <author><first>Kevin</first><last>Kilgour</last></author>
       <author><first>Matthias</first><last>Sperber</last></author>
       <author><first>Mohammed</first><last>Mediani</last></author>
-      <author><first>Sebastian</first><last>Stüker</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="sebastian-stuker"><first>Sebastian</first><last>Stüker</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>82–86</pages>
       <url hash="63a7f540">N16-3017</url>
       <doi>10.18653/v1/N16-3017</doi>
@@ -2342,10 +2342,10 @@
     </paper>
     <paper id="19">
       <title><fixed-case>K</fixed-case>athaa: A Visual Programming Framework for <fixed-case>NLP</fixed-case> Applications</title>
-      <author><first>Sharada Prasanna</first><last>Mohanty</last></author>
+      <author id="sharada-prasanna-mohanty"><first>Sharada Prasanna</first><last>Mohanty</last></author>
       <author><first>Nehal J</first><last>Wani</last></author>
-      <author><first>Manish</first><last>Srivastava</last></author>
-      <author><first>Dipti Misra</first><last>Sharma</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Srivastava</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></author>
       <pages>92–96</pages>
       <url hash="12a2868a">N16-3019</url>
       <doi>10.18653/v1/N16-3019</doi>
@@ -2353,7 +2353,7 @@
     </paper>
     <paper id="20">
       <title>“Why Should <fixed-case>I</fixed-case> Trust You?”: Explaining the Predictions of Any Classifier</title>
-      <author><first>Marco</first><last>Ribeiro</last></author>
+      <author id="marco-tulio-ribeiro"><first>Marco</first><last>Ribeiro</last></author>
       <author><first>Sameer</first><last>Singh</last></author>
       <author><first>Carlos</first><last>Guestrin</last></author>
       <pages>97–101</pages>
@@ -2367,7 +2367,7 @@
       <booktitle>Proceedings of the 2016 Conference of the North <fixed-case>A</fixed-case>merican Chapter of the Association for Computational Linguistics: Tutorial Abstracts</booktitle>
       <url hash="eeaf8c32">N16-4</url>
       <editor><first>Mohit</first><last>Bansal</last></editor>
-      <editor><first>Alexander M.</first><last>Rush</last></editor>
+      <editor id="alexander-m-rush"><first>Alexander M.</first><last>Rush</last></editor>
       <doi>10.18653/v1/N16-4</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>San Diego, California</address>
@@ -2381,8 +2381,8 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>E</fixed-case>nglish <fixed-case>R</fixed-case>esource <fixed-case>S</fixed-case>emantics</title>
-      <author><first>Dan</first><last>Flickinger</last></author>
-      <author><first>Emily M.</first><last>Bender</last></author>
+      <author id="dan-flickinger"><first>Dan</first><last>Flickinger</last></author>
+      <author id="emily-m-bender"><first>Emily M.</first><last>Bender</last></author>
       <author><first>Woodley</first><last>Packard</last></author>
       <pages>1–5</pages>
       <url hash="8d5e9534">N16-4001</url>
@@ -2391,7 +2391,7 @@
     </paper>
     <paper id="2">
       <title>Multilingual Multimodal Language Processing Using Neural Networks</title>
-      <author><first>Mitesh M</first><last>Khapra</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh M</first><last>Khapra</last></author>
       <author><first>Sarath</first><last>Chandar</last></author>
       <pages>6–7</pages>
       <url hash="14ac8ca8">N16-4002</url>
@@ -2400,7 +2400,7 @@
     </paper>
     <paper id="3">
       <title>Question Answering with Knowledge Base, Web and Beyond</title>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <author><first>Hao</first><last>Ma</last></author>
       <pages>8–10</pages>
       <url hash="8deb42b7">N16-4003</url>
@@ -2420,7 +2420,7 @@
     <paper id="5">
       <title>Scalable Statistical Relational Learning for <fixed-case>NLP</fixed-case></title>
       <author><first>William Yang</first><last>Wang</last></author>
-      <author><first>William</first><last>Cohen</last></author>
+      <author id="william-cohen"><first>William</first><last>Cohen</last></author>
       <pages>14–16</pages>
       <url hash="9a75c41f">N16-4005</url>
       <doi>10.18653/v1/N16-4005</doi>
@@ -2428,8 +2428,8 @@
     </paper>
     <paper id="6">
       <title>Statistical Machine Translation between Related Languages</title>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
-      <author><first>Mitesh M.</first><last>Khapra</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh M.</first><last>Khapra</last></author>
       <author><first>Anoop</first><last>Kunchukuttan</last></author>
       <pages>17–20</pages>
       <url hash="83be1f81">N16-4006</url>
diff --git a/data/xml/N18.xml b/data/xml/N18.xml
index 9422c64272..ff9da97de4 100644
--- a/data/xml/N18.xml
+++ b/data/xml/N18.xml
@@ -4,9 +4,9 @@
     <meta>
       <booktitle>Proceedings of the 2018 Conference of the North <fixed-case>A</fixed-case>merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long Papers)</booktitle>
       <url hash="28b6ea08">N18-1</url>
-      <editor><first>Marilyn</first><last>Walker</last></editor>
+      <editor id="marilyn-walker"><first>Marilyn</first><last>Walker</last></editor>
       <editor><first>Heng</first><last>Ji</last></editor>
-      <editor><first>Amanda</first><last>Stent</last></editor>
+      <editor id="amanda-stent"><first>Amanda</first><last>Stent</last></editor>
       <doi>10.18653/v1/N18-1</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>New Orleans, Louisiana</address>
@@ -24,7 +24,7 @@
       <author><first>Yanru</first><last>Qu</last></author>
       <author><first>Liheng</first><last>Chen</last></author>
       <author><first>Jian</first><last>Shen</last></author>
-      <author><first>Weinan</first><last>Zhang</last></author>
+      <author id="weinan-zhang"><first>Weinan</first><last>Zhang</last></author>
       <author><first>Shaodian</first><last>Zhang</last></author>
       <author><first>Yimei</first><last>Gao</last></author>
       <author><first>Gen</first><last>Gu</last></author>
@@ -52,7 +52,7 @@
       <title>Joint Bootstrapping Machines for High Confidence Relation Extraction</title>
       <author><first>Pankaj</first><last>Gupta</last></author>
       <author><first>Benjamin</first><last>Roth</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>26–36</pages>
       <abstract>Semi-supervised bootstrapping techniques for relationship extraction from text iteratively expand a set of initial seed instances. Due to the lack of labeled data, a key challenge in bootstrapping is semantic drift: if a false positive instance is added during an iteration, then all following iterations are contaminated. We introduce BREX, a new bootstrapping method that protects against such contamination by highly effective confidence assessment. This is achieved by using entity and template seeds jointly (as opposed to just one as in previous work), by expanding entities and templates in parallel and in a mutually constraining fashion in each iteration and by introducing higherquality similarity measures for templates. Experimental results show that BREX achieves an F1 that is 0.13 (0.87 vs. 0.74) better than the state of the art for four relationships.</abstract>
       <url hash="14990d5d">N18-1003</url>
@@ -63,7 +63,7 @@
     <paper id="4">
       <title>A Deep Generative Model of Vowel Formant Typology</title>
       <author><first>Ryan</first><last>Cotterell</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>37–46</pages>
       <abstract>What makes some types of languages more probable than others? For instance, we know that almost all spoken languages contain the vowel phoneme /i/; why should that be? The field of linguistic typology seeks to answer these questions and, thereby, divine the mechanisms that underlie human language. In our work, we tackle the problem of vowel system typology, i.e., we propose a generative probability model of which vowels a language contains. In contrast to previous work, we work directly with the acoustic information—the first two formant values—rather than modeling discrete sets of symbols from the international phonetic alphabet. We develop a novel generative probability model and report results on over 200 languages.</abstract>
       <url hash="2fb72df8">N18-1004</url>
@@ -73,10 +73,10 @@
     </paper>
     <paper id="5">
       <title>Fortification of Neural Morphological Segmentation Models for Polysynthetic Minimal-Resource Languages</title>
-      <author><first>Katharina</first><last>Kann</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
       <author><first>Jesus Manuel</first><last>Mager Hois</last></author>
-      <author><first>Ivan Vladimir</first><last>Meza-Ruiz</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="ivan-meza-ruiz"><first>Ivan Vladimir</first><last>Meza-Ruiz</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>47–57</pages>
       <abstract>Morphological segmentation for polysynthetic languages is challenging, because a word may consist of many individual morphemes and training data can be extremely scarce. Since neural sequence-to-sequence (seq2seq) models define the state of the art for morphological segmentation in high-resource settings and for (mostly) European languages, we first show that they also obtain competitive performance for Mexican polysynthetic languages in minimal-resource settings. We then propose two novel multi-task training approaches—one with, one without need for external unlabeled resources—, and two corresponding data augmentation methods, improving over the neural baseline for all languages. Finally, we explore cross-lingual transfer as a third way to fortify our neural model and show that we can train one single multi-lingual model for related languages while maintaining comparable or even improved performance, thus reducing the amount of parameters by close to 75%. We provide our morphological segmentation datasets for Mexicanero, Nahuatl, Wixarika and Yorem Nokki for future research.</abstract>
       <url hash="fa13b64d">N18-1005</url>
@@ -103,7 +103,7 @@
       <author><first>Mohit</first><last>Bansal</last></author>
       <author><first>Kevin</first><last>Gimpel</last></author>
       <author><first>Karen</first><last>Livescu</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
       <pages>69–81</pages>
       <abstract>In conversational speech, the acoustic signal provides cues that help listeners disambiguate difficult parses. For automatically parsing spoken utterances, we introduce a model that integrates transcribed text and acoustic-prosodic features using a convolutional neural network over energy and pitch trajectories coupled with an attention-based recurrent neural network that accepts text and prosodic features. We find that different types of acoustic-prosodic features are individually helpful, and together give statistically significant improvements in parse and disfluency detection F1 scores over a strong text-only baseline. For this study with known sentence boundaries, error analyses show that the main benefit of acoustic-prosodic features is in sentences with disfluencies, attachment decisions are most improved, and transcription errors obscure gains from prosody.</abstract>
       <url hash="78f679ba">N18-1007</url>
@@ -139,7 +139,7 @@
       <author><first>Shivani</first><last>Poddar</last></author>
       <author><first>Byungsoo</first><last>Jeon</last></author>
       <author><first>Qinlan</first><last>Shen</last></author>
-      <author><first>Carolyn</first><last>Rosé</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rosé</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
       <pages>103–116</pages>
       <abstract>We present a neural architecture for modeling argumentative dialogue that explicitly models the interplay between an Opinion Holder’s (OH’s) reasoning and a challenger’s argument, with the goal of predicting if the argument successfully changes the OH’s view. The model has two components: (1) vulnerable region detection, an attention model that identifies parts of the OH’s reasoning that are amenable to change, and (2) interaction encoding, which identifies the relationship between the content of the OH’s reasoning and that of the challenger’s argument. Based on evaluation on discussions from the Change My View forum on Reddit, the two components work together to predict an OH’s change in view, outperforming several baselines. A posthoc analysis suggests that sentences picked out by the attention model are addressed more frequently by successful arguments than by unsuccessful ones.</abstract>
@@ -150,7 +150,7 @@
     <paper id="11">
       <title>Automatic Focus Annotation: Bringing Formal Pragmatics Alive in Analyzing the Information Structure of Authentic Data</title>
       <author><first>Ramon</first><last>Ziai</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <pages>117–128</pages>
       <abstract>Analyzing language in context, both from a theoretical and from a computational perspective, is receiving increased interest. Complementing the research in linguistics on discourse and information structure, in computational linguistics identifying discourse concepts was also shown to improve the performance of certain applications, for example, Short Answer Assessment systems (Ziai and Meurers, 2014). Building on the research that established detailed annotation guidelines for manual annotation of information structural concepts for written (Dipper et al., 2007; Ziai and Meurers, 2014) and spoken language data (Calhoun et al., 2010), this paper presents the first approach automating the analysis of focus in authentic written data. Our classification approach combines a range of lexical, syntactic, and semantic features to achieve an accuracy of 78.1% for identifying focus.</abstract>
       <url hash="db276356">N18-1011</url>
@@ -160,7 +160,7 @@
     <paper id="12">
       <title>Dear Sir or Madam, May <fixed-case>I</fixed-case> Introduce the <fixed-case>GYAFC</fixed-case> Dataset: Corpus, Benchmarks and Metrics for Formality Style Transfer</title>
       <author><first>Sudha</first><last>Rao</last></author>
-      <author><first>Joel</first><last>Tetreault</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
       <pages>129–140</pages>
       <abstract>Style transfer is the task of automatically transforming a piece of text in one particular style into another. A major barrier to progress in this field has been a lack of training and evaluation datasets, as well as benchmarks and automatic metrics. In this work, we create the largest corpus for a particular stylistic transfer (formality) and show that techniques from the machine translation community can serve as strong baselines for future work. We also discuss challenges of using automatic metrics.</abstract>
       <attachment type="note" hash="4966978a">N18-1012.Notes.pdf</attachment>
@@ -259,7 +259,7 @@
       <title>Zero-Shot Question Generation from Knowledge Graphs for Unseen Predicates and Entity Types</title>
       <author><first>Hady</first><last>Elsahar</last></author>
       <author><first>Christophe</first><last>Gravier</last></author>
-      <author><first>Frederique</first><last>Laforest</last></author>
+      <author id="frederique-laforest"><first>Frederique</first><last>Laforest</last></author>
       <pages>218–228</pages>
       <abstract>We present a neural model for question generation from knowledge graphs triples in a “Zero-shot” setup, that is generating questions for predicate, subject types or object types that were not seen at training time. Our model leverages triples occurrences in the natural language corpus in a encoder-decoder architecture, paired with an original part-of-speech copy action mechanism to generate questions. Benchmark and human evaluation show that our model outperforms state-of-the-art on this task.</abstract>
       <url hash="bb3d75c4">N18-1020</url>
@@ -308,7 +308,7 @@
       <title>Neural Automated Essay Scoring and Coherence Modeling for Adversarially Crafted Input</title>
       <author><first>Youmna</first><last>Farag</last></author>
       <author><first>Helen</first><last>Yannakoudakis</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <pages>263–271</pages>
       <abstract>We demonstrate that current state-of-the-art approaches to Automated Essay Scoring (AES) are not well-suited to capturing adversarially crafted input of grammatical but incoherent sequences of sentences. We develop a neural model of local coherence that can effectively learn connectedness features between sentences, and propose a framework for integrating and jointly training the local coherence model with a state-of-the-art AES model. We evaluate our approach against a number of baselines and experimentally demonstrate its effectiveness on both the AES task and the task of flagging adversarial input, further contributing to the development of an approach that strengthens the validity of neural essay scoring models.</abstract>
       <url hash="533d9c0e">N18-1024</url>
@@ -339,7 +339,7 @@
     <paper id="27">
       <title>Zero-Shot Sequence Labeling: Transferring Knowledge from Sentences to Tokens</title>
       <author><first>Marek</first><last>Rei</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>293–302</pages>
       <abstract>Can attention- or gradient-based visualization techniques be used to infer token-level labels for binary sequence tagging problems, using networks trained only on sentence-level labels? We construct a neural network architecture based on soft attention, train it as a binary sentence classifier and evaluate against token-level annotation on four different datasets. Inferring token labels from a network provides a method for quantitatively evaluating what the model is learning, along with generating useful feedback in assistance systems. Our results indicate that attention-based methods are able to predict token-level labels more accurately, compared to gradient-based methods, sometimes even rivaling the supervised oracle network.</abstract>
       <url hash="0393e889">N18-1027</url>
@@ -400,7 +400,7 @@
     </paper>
     <paper id="31">
       <title>Improving Lexical Choice in Neural Machine Translation</title>
-      <author><first>Toan</first><last>Nguyen</last></author>
+      <author id="toan-q-nguyen"><first>Toan</first><last>Nguyen</last></author>
       <author><first>David</first><last>Chiang</last></author>
       <pages>334–343</pages>
       <abstract>We explore two solutions to the problem of mistranslating rare words in neural machine translation. First, we argue that the standard output layer, which computes the inner product of a vector representing the context with all possible output word embeddings, rewards frequent words disproportionately, and we propose to fix the norms of both vectors to a constant value. Second, we integrate a simple lexical module which is jointly trained with the rest of the model. We evaluate our approaches on eight language pairs with data sizes ranging from 100k to 8M words, and achieve improvements of up to +4.3 BLEU, surpassing phrase-based translation in nearly all settings.</abstract>
@@ -412,9 +412,9 @@
     <paper id="32">
       <title>Universal Neural Machine Translation for Extremely Low Resource Languages</title>
       <author><first>Jiatao</first><last>Gu</last></author>
-      <author><first>Hany</first><last>Hassan</last></author>
+      <author id="hany-hassan-awadalla"><first>Hany</first><last>Hassan</last></author>
       <author><first>Jacob</first><last>Devlin</last></author>
-      <author><first>Victor O.K.</first><last>Li</last></author>
+      <author id="victor-o-k-li"><first>Victor O.K.</first><last>Li</last></author>
       <pages>344–354</pages>
       <abstract>In this paper, we propose a new universal machine translation approach focusing on languages with a limited amount of parallel data. Our proposed approach utilizes a transfer-learning approach to share lexical and sentence level representations across multiple source languages into one target language. The lexical part is shared through a Universal Lexical Representation to support multi-lingual word-level sharing. The sentence-level sharing is represented by a model of experts from all source languages that share the source encoders with all other languages. This enables the low-resource language to utilize the lexical and sentence representations of the higher resource languages. Our approach is able to achieve 23 BLEU on Romanian-English WMT2016 using a tiny parallel corpus of 6k sentences, compared to the 18 BLEU of strong baseline system which uses multi-lingual training and back-translation. Furthermore, we show that the proposed approach can achieve almost 20 BLEU on the same dataset through fine-tuning a pre-trained multi-lingual system in a zero-shot setting.</abstract>
       <url hash="cae703da">N18-1032</url>
@@ -454,7 +454,7 @@
       <author><first>Lu</first><last>Wang</last></author>
       <author><first>Nicholas</first><last>Beauchamp</last></author>
       <author><first>Sarah</first><last>Shugars</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <pages>375–385</pages>
       <abstract>Millions of conversations are generated every day on social media platforms. With limited attention, it is challenging for users to select which discussions they would like to participate in. Here we propose a new method for microblog conversation recommendation. While much prior work has focused on post-level recommendation, we exploit both the conversational context, and user content and behavior preferences. We propose a statistical model that jointly captures: (1) topics for representing user interests and conversation content, and (2) discourse modes for describing user replying behavior and conversation dynamics. Experimental results on two Twitter datasets demonstrate that our system outperforms methods that only model content without considering discourse.</abstract>
       <url hash="c921a72a">N18-1035</url>
@@ -505,8 +505,8 @@
     <paper id="39">
       <title>Comparatives, Quantifiers, Proportions: a Multi-Task Model for the Learning of Quantities from Vision</title>
       <author><first>Sandro</first><last>Pezzelle</last></author>
-      <author><first>Ionut-Teodor</first><last>Sorodoc</last></author>
-      <author><first>Raffaella</first><last>Bernardi</last></author>
+      <author id="ionut-sorodoc"><first>Ionut-Teodor</first><last>Sorodoc</last></author>
+      <author id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last></author>
       <pages>419–430</pages>
       <abstract>The present work investigates whether different quantification mechanisms (set comparison, vague quantification, and proportional estimation) can be jointly learned from visual scenes by a multi-task computational model. The motivation is that, in humans, these processes underlie the same cognitive, non-symbolic ability, which allows an automatic estimation and comparison of set magnitudes. We show that when information about lower-complexity tasks is available, the higher-level proportional task becomes more accurate than when performed in isolation. Moreover, the multi-task model is able to generalize to unseen combinations of target/non-target objects. Consistently with behavioral evidence showing the interference of absolute number in the proportional task, the multi-task model no longer works when asked to provide the number of target objects in the scene.</abstract>
       <url hash="d12d3989">N18-1039</url>
@@ -531,7 +531,7 @@
       <title><fixed-case>A</fixed-case>bstract <fixed-case>M</fixed-case>eaning <fixed-case>R</fixed-case>epresentation for Paraphrase Detection</title>
       <author><first>Fuad</first><last>Issa</last></author>
       <author><first>Marco</first><last>Damonte</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <author><first>Xiaohui</first><last>Yan</last></author>
       <author><first>Yi</first><last>Chang</last></author>
       <pages>442–452</pages>
@@ -545,7 +545,7 @@
       <author><first>Fabio</first><last>Petroni</last></author>
       <author><first>Vassilis</first><last>Plachouras</last></author>
       <author><first>Timothy</first><last>Nugent</last></author>
-      <author><first>Jochen L.</first><last>Leidner</last></author>
+      <author id="jochen-l-leidner"><first>Jochen L.</first><last>Leidner</last></author>
       <pages>453–462</pages>
       <abstract>The widespread use of word embeddings is associated with the recent successes of many natural language processing (NLP) systems. The key approach of popular models such as word2vec and GloVe is to learn dense vector representations from the context of words. More recently, other approaches have been proposed that incorporate different types of contextual information, including topics, dependency relations, n-grams, and sentiment. However, these models typically integrate only limited additional contextual information, and often in ad hoc ways. In this work, we introduce attr2vec, a novel framework for jointly learning embeddings for words and contextual attributes based on factorization machines. We perform experiments with different types of contextual information. Our experimental results on a text classification task demonstrate that using attr2vec to jointly learn embeddings for words and Part-of-Speech (POS) tags improves results compared to learning the embeddings independently. Moreover, we use attr2vec to train dependency-based embeddings and we show that they exhibit higher similarity between functionally related words compared to traditional approaches.</abstract>
       <url hash="6dde3bb1">N18-1042</url>
@@ -671,7 +671,7 @@
       <author><first>Palaash</first><last>Sawant</last></author>
       <author><first>Sukanta</first><last>Sen</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>572–582</pages>
       <abstract>Efficient word representations play an important role in solving various problems related to Natural Language Processing (NLP), data mining, text mining etc. The issue of data sparsity poses a great challenge in creating efficient word representation model for solving the underlying problem. The problem is more intensified in resource-poor scenario due to the absence of sufficient amount of corpus. In this work we propose to minimize the effect of data sparsity by leveraging bilingual word embeddings learned through a parallel corpus. We train and evaluate Long Short Term Memory (LSTM) based architecture for aspect level sentiment classification. The neural network architecture is further assisted by the hand-crafted features for the prediction. We show the efficacy of the proposed model against state-of-the-art methods in two experimental setups i.e. multi-lingual and cross-lingual.</abstract>
       <url hash="350dca54">N18-1053</url>
@@ -720,8 +720,8 @@
       <author><first>Ziang</first><last>Xie</last></author>
       <author><first>Guillaume</first><last>Genthial</last></author>
       <author><first>Stanley</first><last>Xie</last></author>
-      <author><first>Andrew</first><last>Ng</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="andrew-y-ng"><first>Andrew</first><last>Ng</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <pages>619–628</pages>
       <abstract>Translation-based methods for grammar correction that directly map noisy, ungrammatical text to their clean counterparts are able to correct a broad range of errors; however, such techniques are bottlenecked by the need for a large parallel corpus of noisy and clean sentence pairs. In this paper, we consider synthesizing parallel data by noising a clean monolingual corpus. While most previous approaches introduce perturbations using features computed from local context windows, we instead develop error generation processes using a neural sequence transduction model trained to translate clean examples to their noisy counterparts. Given a corpus of clean examples, we propose beam search noising procedures to synthesize additional noisy examples that human evaluators were nearly unable to discriminate from nonsynthesized examples. Surprisingly, when trained on additional data synthesized using our best-performing noising scheme, our model approaches the same performance as when trained on additional nonsynthesized data.</abstract>
       <url hash="ec282973">N18-1057</url>
@@ -732,7 +732,7 @@
     <paper id="58">
       <title>Self-Training for Jointly Learning to Ask and Answer Questions</title>
       <author><first>Mrinmaya</first><last>Sachan</last></author>
-      <author><first>Eric</first><last>Xing</last></author>
+      <author id="eric-xing"><first>Eric</first><last>Xing</last></author>
       <pages>629–640</pages>
       <abstract>Building curious machines that can answer as well as ask questions is an important challenge for AI. The two tasks of question answering and question generation are usually tackled separately in the NLP literature. At the same time, both require significant amounts of supervised data which is hard to obtain in many domains. To alleviate these issues, we propose a self-training method for jointly learning to ask as well as answer questions, leveraging unlabeled text along with labeled question answer pairs for learning. We evaluate our approach on four benchmark datasets: SQUAD, MS MARCO, WikiQA and TrecQA, and show significant improvements over a number of established baselines on both question answering and question generation tasks. We also achieved new state-of-the-art results on two competitive answer sentence selection tasks: WikiQA and TrecQA.</abstract>
       <url hash="4a79e7ad">N18-1058</url>
@@ -769,7 +769,7 @@
       <author><first>Sabyasachi</first><last>Kamila</last></author>
       <author><first>Mohammed</first><last>Hasanuzzaman</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <pages>663–674</pages>
       <abstract>Temporal orientation refers to an individual’s tendency to connect to the psychological concepts of past, present or future, and it affects personality, motivation, emotion, decision making and stress coping processes. The study of the social media users’ psycho-demographic attributes from the perspective of human temporal orientation can be of utmost interest and importance to the business and administrative decision makers as it can provide an extra precious information for them to make informed decisions. In this paper, we propose a very first study to demonstrate the association between the sentiment view of the temporal orientation of the users and their different psycho-demographic attributes by analyzing their tweets. We first create a temporal orientation classifier in a minimally supervised way which classifies each tweet of the users in one of the three temporal categories, namely past, present, and future. A deep Bi-directional Long Short Term Memory (BLSTM) is used for the tweet classification task. Our tweet classifier achieves an accuracy of 78.27% when tested on a manually created test set. We then determine the users’ overall temporal orientation based on their tweets on the social media. The sentiment is added to the tweets at the fine-grained level where each temporal tweet is given a sentiment with either of the positive, negative or neutral. Our experiment reveals that depending upon the sentiment view of temporal orientation, a user’s attributes vary. We finally measure the correlation between the users’ sentiment view of temporal orientation and their different psycho-demographic factors using regression.</abstract>
@@ -779,9 +779,9 @@
     </paper>
     <paper id="62">
       <title>Querying Word Embeddings for Similarity and Relatedness</title>
-      <author><first>Fatemeh</first><last>Torabi Asr</last></author>
+      <author id="fatemeh-torabi-asr"><first>Fatemeh</first><last>Torabi Asr</last></author>
       <author><first>Robert</first><last>Zinkov</last></author>
-      <author><first>Michael</first><last>Jones</last></author>
+      <author id="michael-jones"><first>Michael</first><last>Jones</last></author>
       <pages>675–684</pages>
       <abstract>Word embeddings obtained from neural network models such as Word2Vec Skipgram have become popular representations of word meaning and have been evaluated on a variety of word similarity and relatedness norming data. Skipgram generates a set of word and context embeddings, the latter typically discarded after training. We demonstrate the usefulness of context embeddings in predicting asymmetric association between words from a recently published dataset of production norms (Jouravlev &amp; McRae, 2016). Our findings suggest that humans respond with words closer to the cue within the context embedding space (rather than the word embedding space), when asked to generate thematically related words.</abstract>
       <url hash="6106f487">N18-1062</url>
@@ -802,7 +802,7 @@
     </paper>
     <paper id="64">
       <title>Entity Commonsense Representation for Neural Abstractive Summarization</title>
-      <author><first>Reinald Kim</first><last>Amplayo</last></author>
+      <author id="reinald-kim-amplayo"><first>Reinald Kim</first><last>Amplayo</last></author>
       <author><first>Seonjae</first><last>Lim</last></author>
       <author><first>Seung-won</first><last>Hwang</last></author>
       <pages>697–707</pages>
@@ -877,9 +877,9 @@
       <title>Automatic Stance Detection Using End-to-End Memory Networks</title>
       <author><first>Mitra</first><last>Mohtarami</last></author>
       <author><first>Ramy</first><last>Baly</last></author>
-      <author><first>James</first><last>Glass</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <author><first>Alessandro</first><last>Moschitti</last></author>
       <pages>767–776</pages>
       <abstract>We present an effective end-to-end memory network model that jointly (i) predicts whether a given document can be considered as relevant evidence for a given claim, and (ii) extracts snippets of evidence that can be used to reason about the factuality of the target claim. Our model combines the advantages of convolutional and recurrent neural networks as part of a memory network. We further introduce a similarity matrix at the inference level of the memory network in order to extract snippets of evidence for input claims more accurately. Our experiments on a public benchmark dataset, FakeNewsChallenge, demonstrate the effectiveness of our approach.</abstract>
@@ -890,7 +890,7 @@
     <paper id="71">
       <title>Collective Entity Disambiguation with Structured Gradient Tree Boosting</title>
       <author><first>Yi</first><last>Yang</last></author>
-      <author><first>Ozan</first><last>Irsoy</last></author>
+      <author id="ozan-irsoy"><first>Ozan</first><last>Irsoy</last></author>
       <author><first>Kazi Shefaet</first><last>Rahman</last></author>
       <pages>777–786</pages>
       <abstract>We present a gradient-tree-boosting-based structured learning model for jointly disambiguating named entities in a document. Gradient tree boosting is a widely used machine learning algorithm that underlies many top-performing natural language processing systems. Surprisingly, most works limit the use of gradient tree boosting as a tool for regular classification or regression problems, despite the structured nature of language. To the best of our knowledge, our work is the first one that employs the structured gradient tree boosting (SGTB) algorithm for collective entity disambiguation. By defining global features over previous disambiguation decisions and jointly modeling them with local features, our system is able to produce globally optimized entity assignments for mentions in a document. Exact inference is prohibitively expensive for our globally normalized model. To solve this problem, we propose Bidirectional Beam Search with Gold path (BiBSG), an approximate inference algorithm that is a variant of the standard beam search algorithm. BiBSG makes use of global information from both past and future to perform better local search. Experiments on standard benchmark datasets show that SGTB significantly improves upon published results. Specifically, SGTB outperforms the previous state-of-the-art neural system by near 1% absolute accuracy on the popular AIDA-CoNLL dataset.</abstract>
@@ -915,7 +915,7 @@
       <author><first>Lijun</first><last>Wu</last></author>
       <author><first>Li</first><last>Zhao</last></author>
       <author><first>Tao</first><last>Qin</last></author>
-      <author><first>Xueqi</first><last>Cheng</last></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last></author>
       <author><first>Tie-Yan</first><last>Liu</last></author>
       <pages>799–808</pages>
       <abstract>Recurrent neural networks have achieved state-of-the-art results in many artificial intelligence tasks, such as language modeling, neural machine translation, speech recognition and so on. One of the key factors to these successes is big models. However, training such big models usually takes days or even weeks of time even if using tens of GPU cards. In this paper, we propose an efficient architecture to improve the efficiency of such RNN model training, which adopts the group strategy for recurrent layers, while exploiting the representation rearrangement strategy between layers as well as time steps. To demonstrate the advantages of our models, we conduct experiments on several datasets and tasks. The results show that our architecture achieves comparable or better accuracy comparing with baselines, with a much smaller number of parameters and at a much lower computational cost.</abstract>
@@ -978,7 +978,7 @@
       <title>Multimodal Named Entity Recognition for Short Social Media Posts</title>
       <author><first>Seungwhan</first><last>Moon</last></author>
       <author><first>Leonardo</first><last>Neves</last></author>
-      <author><first>Vitor</first><last>Carvalho</last></author>
+      <author id="vitor-carvalho"><first>Vitor</first><last>Carvalho</last></author>
       <pages>852–860</pages>
       <abstract>We introduce a new task called Multimodal Named Entity Recognition (MNER) for noisy user-generated data such as tweets or Snapchat captions, which comprise short text with accompanying images. These social media posts often come in inconsistent or incomplete syntax and lexical notations with very limited surrounding textual contexts, bringing significant challenges for NER. To this end, we create a new dataset for MNER called SnapCaptions (Snapchat image-caption pairs submitted to public and crowd-sourced stories with fully annotated named entities). We then build upon the state-of-the-art Bi-LSTM word/character based NER models with 1) a deep image network which incorporates relevant visual context to augment textual information, and 2) a generic <i>modality-attention</i>
  module which learns to attenuate irrelevant modalities while amplifying
@@ -994,7 +994,7 @@
     <paper id="79">
       <title>Nested Named Entity Recognition Revisited</title>
       <author><first>Arzoo</first><last>Katiyar</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>861–871</pages>
       <abstract>We propose a novel recurrent neural network-based approach to simultaneously handle nested named entity recognition and nested entity mention detection. The model learns a hypergraph representation for nested entities using features extracted from a recurrent neural network. In evaluations on three standard data sets, we show that our approach significantly outperforms existing state-of-the-art methods, which are feature-based. The approach is also efficient: it operates linearly in the number of tokens and the number of possible output labels at any token. Finally, we present an extension of our model that jointly learns the head of each entity mention.</abstract>
       <url hash="c077ecb9">N18-1079</url>
@@ -1016,7 +1016,7 @@
       <title>Supervised Open Information Extraction</title>
       <author><first>Gabriel</first><last>Stanovsky</last></author>
       <author><first>Julian</first><last>Michael</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <author><first>Ido</first><last>Dagan</last></author>
       <pages>885–895</pages>
       <abstract>We present data and methods that enable a supervised learning approach to Open Information Extraction (Open IE). Central to the approach is a novel formulation of Open IE as a sequence tagging problem, addressing challenges such as encoding multiple extractions for a predicate. We also develop a bi-LSTM transducer, extending recent deep Semantic Role Labeling models to extract Open IE tuples and provide confidence scores for tuning their precision-recall tradeoff. Furthermore, we show that the recently released Question-Answer Meaning Representation dataset can be automatically converted into an Open IE corpus which significantly increases the amount of available training data. Our supervised model outperforms the existing state-of-the-art Open IE systems on benchmark datasets.</abstract>
@@ -1069,7 +1069,7 @@
     <paper id="85">
       <title>Neural Particle Smoothing for Sampling from Conditional Sequence Models</title>
       <author><first>Chu-Cheng</first><last>Lin</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>929–941</pages>
       <abstract>We introduce neural particle smoothing, a sequential Monte Carlo method for sampling annotations of an input string from a given probability model. In contrast to conventional particle filtering algorithms, we train a proposal distribution that looks ahead to the end of the input string by means of a right-to-left LSTM. We demonstrate that this innovation can improve the quality of the sample. To motivate our formal choices, we explain how neural transduction models and our sampler can be viewed as low-dimensional but nonlinear approximations to working with HMMs over very large state spaces.</abstract>
       <attachment type="note" hash="75780600">N18-1085.Notes.pdf</attachment>
@@ -1080,7 +1080,7 @@
     <paper id="86">
       <title>Neural Syntactic Generative Models with Exact Marginalization</title>
       <author><first>Jan</first><last>Buys</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
       <pages>942–952</pages>
       <abstract>We present neural syntactic generative models with exact marginalization that support both dependency parsing and language modeling. Exact marginalization is made tractable through dynamic programming over shift-reduce parsing and minimal RNN-based feature sets. Our algorithms complement previous approaches by supporting batched training and enabling online computation of next word probabilities. For supervised dependency parsing, our model achieves a state-of-the-art result among generative approaches. We also report empirical results on unsupervised syntactic models and their role in language modeling. We find that our model formulation of latent dependencies with exact marginalization do not lead to better intrinsic language modeling performance than vanilla RNNs, and that parsing accuracy is not correlated with language modeling perplexity in stack-based models.</abstract>
       <url hash="066dbb26">N18-1086</url>
@@ -1105,7 +1105,7 @@
       <author><first>Wanxiang</first><last>Che</last></author>
       <author><first>Bing</first><last>Qin</last></author>
       <author><first>Nathan</first><last>Schneider</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>965–975</pages>
       <abstract>We study the problem of analyzing tweets with universal dependencies (UD). We extend the UD guidelines to cover special constructions in tweets that affect tokenization, part-of-speech tagging, and labeled dependencies. Using the extended guidelines, we create a new tweet treebank for English (Tweebank v2) that is four times larger than the (unlabeled) Tweebank v1 introduced by Kong et al. (2014). We characterize the disagreements between our annotators and show that it is challenging to deliver consistent annotation due to ambiguity in understanding and explaining tweets. Nonetheless, using the new treebank, we build a pipeline system to parse raw tweets into UD. To overcome the annotation noise without sacrificing computational efficiency, we propose a new method to distill an ensemble of 20 transition-based parsers into a single one. Our parser achieves an improvement of 2.2 in LAS over the un-ensembled baseline and outperforms parsers that are state-of-the-art on other treebanks in both accuracy and speed.</abstract>
       <url hash="fed787fb">N18-1088</url>
@@ -1116,7 +1116,7 @@
       <title>Robust Multilingual Part-of-Speech Tagging via Adversarial Training</title>
       <author><first>Michihiro</first><last>Yasunaga</last></author>
       <author><first>Jungo</first><last>Kasai</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <pages>976–986</pages>
       <abstract>Adversarial training (AT) is a powerful regularization method for neural networks, aiming to achieve robustness to input perturbations. Yet, the specific effects of the robustness obtained from AT are still unclear in the context of natural language processing. In this paper, we propose and analyze a neural POS tagging model that exploits AT. In our experiments on the Penn Treebank WSJ corpus and the Universal Dependencies (UD) dataset (27 languages), we find that AT not only improves the overall tagging accuracy, but also 1) prevents over-fitting well in low resource languages and 2) boosts tagging accuracy for rare / unseen words. We also demonstrate that 3) the improved tagging performance by AT contributes to the downstream task of dependency parsing, and that 4) AT helps the model to learn cleaner word representations. 5) The proposed AT model is generally effective in different sequence labeling tasks. These positive results motivate further use of AT for natural language tasks.</abstract>
       <url hash="2e495106">N18-1089</url>
@@ -1125,10 +1125,10 @@
     </paper>
     <paper id="90">
       <title><fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependency Parsing for <fixed-case>H</fixed-case>indi-<fixed-case>E</fixed-case>nglish Code-Switching</title>
-      <author><first>Irshad</first><last>Bhat</last></author>
-      <author><first>Riyaz A.</first><last>Bhat</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
-      <author><first>Dipti</first><last>Sharma</last></author>
+      <author id="irshad-bhat"><first>Irshad</first><last>Bhat</last></author>
+      <author id="riyaz-ahmad-bhat"><first>Riyaz A.</first><last>Bhat</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti</first><last>Sharma</last></author>
       <pages>987–998</pages>
       <abstract>Code-switching is a phenomenon of mixing grammatical structures of two or more languages under varied social constraints. The code-switching data differ so radically from the benchmark corpora used in NLP community that the application of standard technologies to these data degrades their performance sharply. Unlike standard corpora, these data often need to go through additional processes such as language identification, normalization and/or back-transliteration for their efficient processing. In this paper, we investigate these indispensable processes and other problems associated with syntactic parsing of code-switching data and propose methods to mitigate their effects. In particular, we study dependency parsing of code-switching data of Hindi and English multilingual speakers from Twitter. We present a treebank of Hindi-English code-switching tweets under Universal Dependencies scheme and propose a neural stacking model for parsing that efficiently leverages the part-of-speech tag and syntactic tree annotations in the code-switching treebank and the preexisting Hindi and English treebanks. We also present normalization and back-transliteration models with a decoding process tailored for code-switching data. Results show that our neural stacking parser is 1.5% LAS points better than the augmented parsing model and 3.8% LAS points better than the one which uses first-best normalization and/or back-transliteration.</abstract>
       <url hash="befb7c56">N18-1090</url>
@@ -1151,7 +1151,7 @@
       <title>Deep Generative Model for Joint Alignment and Word Representation</title>
       <author><first>Miguel</first><last>Rios</last></author>
       <author><first>Wilker</first><last>Aziz</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <pages>1011–1023</pages>
       <abstract>This work exploits translation data as a source of semantically relevant learning signal for models of word representation. In particular, we exploit equivalence through translation as a form of distributional context and jointly learn how to embed and align with a deep generative model. Our EmbedAlign model embeds words in their complete observed context and learns by marginalisation of latent lexical alignments. Besides, it embeds words as posterior probability densities, rather than point estimates, which allows us to compare words in context using a measure of overlap between distributions (e.g. KL divergence). We investigate our model’s performance on a range of lexical semantics tasks achieving competitive results on several standard benchmarks including natural language inference, paraphrasing, and text similarity.</abstract>
       <url hash="b330bae3">N18-1092</url>
@@ -1176,7 +1176,7 @@
     <paper id="94">
       <title>Exploring the Role of Prior Beliefs for Argument Persuasion</title>
       <author><first>Esin</first><last>Durmus</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>1035–1045</pages>
       <abstract>Public debate forums provide a common platform for exchanging opinions on a topic of interest. While recent studies in natural language processing (NLP) have provided empirical evidence that the language of the debaters and their patterns of interaction play a key role in changing the mind of a reader, research in psychology has shown that prior beliefs can affect our interpretation of an argument and could therefore constitute a competing alternative explanation for resistance to changing one’s stance. To study the actual effect of language use vs. prior beliefs on persuasion, we provide a new dataset and propose a controlled setting that takes into consideration two reader-level factors: political and religious ideology. We find that prior beliefs affected by these reader-level factors play a more important role than language use effects and argue that it is important to account for them in NLP studies of persuasion.</abstract>
       <url hash="515760f9">N18-1094</url>
@@ -1201,7 +1201,7 @@
       <title>Author Commitment and Social Power: Automatic Belief Tagging to Infer the Social Context of Interactions</title>
       <author><first>Vinodkumar</first><last>Prabhakaran</last></author>
       <author><first>Premkumar</first><last>Ganeshkumar</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>1057–1068</pages>
       <abstract>Understanding how social power structures affect the way we interact with one another is of great interest to social scientists who want to answer fundamental questions about human behavior, as well as to computer scientists who want to build automatic methods to infer the social contexts of interactions. In this paper, we employ advancements in extra-propositional semantics extraction within NLP to study how author commitment reflects the social context of an interactions. Specifically, we investigate whether the level of commitment expressed by individuals in an organizational interaction reflects the hierarchical power structures they are part of. We find that subordinates use significantly more instances of non-commitment than superiors. More importantly, we also find that subordinates attribute propositions to other agents more often than superiors do — an aspect that has not been studied before. Finally, we show that enriching lexical features with commitment labels captures important distinctions in social meanings.</abstract>
       <url hash="a556c532">N18-1096</url>
@@ -1222,7 +1222,7 @@
       <title>Deep Temporal-Recurrent-Replicated-Softmax for Topical Trends over Time</title>
       <author><first>Pankaj</first><last>Gupta</last></author>
       <author><first>Subburam</first><last>Rajaram</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <author><first>Bernt</first><last>Andrassy</last></author>
       <pages>1079–1089</pages>
       <abstract>Dynamic topic modeling facilitates the identification of topical trends over time in temporal collections of unstructured documents. We introduce a novel unsupervised neural dynamic topic model named as Recurrent Neural Network-Replicated Softmax Model (RNNRSM), where the discovered topics at each time influence the topic discovery in the subsequent time steps. We account for the temporal ordering of documents by explicitly modeling a joint distribution of latent topical dependencies over time, using distributional estimators with temporal recurrent connections. Applying RNN-RSM to 19 years of articles on NLP research, we demonstrate that compared to state-of-the art topic models, RNNRSM shows better generalization, topic interpretation, evolution and trends. We also introduce a metric (named as SPAN) to quantify the capability of dynamic topic model to capture word evolution in topics over time.</abstract>
@@ -1235,7 +1235,7 @@
       <title>Lessons from the <fixed-case>B</fixed-case>ible on Modern Topics: Low-Resource Multilingual Topic Model Evaluation</title>
       <author><first>Shudong</first><last>Hao</last></author>
       <author><first>Jordan</first><last>Boyd-Graber</last></author>
-      <author><first>Michael J.</first><last>Paul</last></author>
+      <author id="michael-paul"><first>Michael J.</first><last>Paul</last></author>
       <pages>1090–1100</pages>
       <abstract>Multilingual topic models enable document analysis across languages through coherent multilingual summaries of the data. However, there is no standard and effective metric to evaluate the quality of multilingual topics. We introduce a new intrinsic evaluation of multilingual topic models that correlates well with human judgments of multilingual topic coherence as well as performance in downstream applications. Importantly, we also study evaluation for low-resource languages. Because standard metrics fail to accurately measure topic quality when robust external resources are unavailable, we propose an adaptation model that improves the accuracy and reliability of these metrics in low-resource settings.</abstract>
       <url hash="2f339f58">N18-1099</url>
@@ -1260,7 +1260,7 @@
       <title>A Broad-Coverage Challenge Corpus for Sentence Understanding through Inference</title>
       <author><first>Adina</first><last>Williams</last></author>
       <author><first>Nikita</first><last>Nangia</last></author>
-      <author><first>Samuel</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel</first><last>Bowman</last></author>
       <pages>1112–1122</pages>
       <abstract>This paper introduces the Multi-Genre Natural Language Inference (MultiNLI) corpus, a dataset designed for use in the development and evaluation of machine learning models for sentence understanding. At 433k examples, this resource is one of the largest corpora available for natural language inference (a.k.a. recognizing textual entailment), improving upon available resources in both its coverage and difficulty. MultiNLI accomplishes this by offering data from ten distinct genres of written and spoken English, making it possible to evaluate systems on nearly the full complexity of the language, while supplying an explicit setting for evaluating cross-genre domain adaptation. In addition, an evaluation using existing machine learning models designed for the Stanford NLI corpus shows that it represents a substantially more difficult task than does that corpus, despite the two showing similar levels of inter-annotator agreement.</abstract>
       <url hash="388df3db">N18-1101</url>
@@ -1294,7 +1294,7 @@
     <paper id="104">
       <title>Cross-Lingual <fixed-case>A</fixed-case>bstract <fixed-case>M</fixed-case>eaning <fixed-case>R</fixed-case>epresentation Parsing</title>
       <author><first>Marco</first><last>Damonte</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <pages>1146–1155</pages>
       <abstract>Abstract Meaning Representation (AMR) research has mostly focused on English. We show that it is possible to use AMR annotations for English as a semantic representation for sentences written in other languages. We exploit an AMR parser for English and parallel corpora to learn AMR parsers for Italian, Spanish, German and Chinese. Qualitative analysis show that the new parsers overcome structural differences between the languages. We further propose a method to evaluate the parsers that does not require gold standard data in the target languages. This method highly correlates with the gold standard evaluation, obtaining a Pearson correlation coefficient of 0.95.</abstract>
       <url hash="a54bffe1">N18-1104</url>
@@ -1306,7 +1306,7 @@
       <title>Sentences with Gapping: Parsing and Reconstructing Elided Predicates</title>
       <author><first>Sebastian</first><last>Schuster</last></author>
       <author><first>Joakim</first><last>Nivre</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>1156–1168</pages>
       <abstract>Sentences with gapping, such as Paul likes coffee and Mary tea, lack an overt predicate to indicate the relation between two or more arguments. Surface syntax representations of such sentences are often produced poorly by parsers, and even if correct, not well suited to downstream natural language understanding tasks such as relation extraction that are typically designed to extract information from sentences with canonical clause structure. In this paper, we present two methods for parsing to a Universal Dependencies graph representation that explicitly encodes the elided material with additional nodes and edges. We find that both methods can reconstruct elided material from dependency trees with high accuracy when the parser correctly predicts the existence of a gap. We further demonstrate that one of our methods can be applied to other languages based on a case study on Swedish.</abstract>
       <url hash="9c15a708">N18-1105</url>
@@ -1332,7 +1332,7 @@
       <author><first>Robert</first><last>Frank</last></author>
       <author><first>Pauli</first><last>Xu</last></author>
       <author><first>William</first><last>Merrill</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>1181–1194</pages>
       <abstract>We present a graph-based Tree Adjoining Grammar (TAG) parser that uses BiLSTMs, highway connections, and character-level CNNs. Our best end-to-end parser, which jointly performs supertagging, POS tagging, and parsing, outperforms the previously reported best results by more than 2.2 LAS and UAS points. The graph-based parsing architecture allows for global inference and rich feature representations for TAG parsing, alleviating the fundamental trade-off between transition-based and graph-based parsing systems. We also demonstrate that the proposed parser achieves state-of-the-art performance in the downstream tasks of Parsing Evaluation using Textual Entailments (PETE) and Unbounded Dependency Recovery. This provides further support for the claim that TAG is a viable formalism for problems that require rich structural analysis of sentences.</abstract>
       <url hash="607e6074">N18-1107</url>
@@ -1344,9 +1344,9 @@
       <title>Colorless Green Recurrent Networks Dream Hierarchically</title>
       <author><first>Kristina</first><last>Gulordava</last></author>
       <author><first>Piotr</first><last>Bojanowski</last></author>
-      <author><first>Edouard</first><last>Grave</last></author>
+      <author id="edouard-grave"><first>Edouard</first><last>Grave</last></author>
       <author><first>Tal</first><last>Linzen</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <pages>1195–1205</pages>
       <abstract>Recurrent neural networks (RNNs) achieved impressive results in a variety of linguistic processing tasks, suggesting that they can induce non-trivial properties of language. We investigate to what extent RNNs learn to track abstract hierarchical syntactic structure. We test whether RNNs trained with a generic language modeling objective in four languages (Italian, English, Hebrew, Russian) can predict long-distance number agreement in various constructions. We include in our evaluation nonsensical sentences where RNNs cannot rely on semantic or lexical cues (“The colorless green ideas I ate with the chair sleep furiously”), and, for Italian, we compare model performance to human intuitions. Our language-model-trained RNNs make reliable predictions about long-distance agreement, and do not lag much behind human performance. We thus bring support to the hypothesis that RNNs are not just shallow-pattern extractors, but they also acquire deeper grammatical competence.</abstract>
       <url hash="b7ae0d9f">N18-1108</url>
@@ -1375,8 +1375,8 @@
     <paper id="110">
       <title>Early Text Classification Using Multi-Resolution Concept Representations</title>
       <author><first>Adrian Pastor</first><last>López-Monroy</last></author>
-      <author><first>Fabio A.</first><last>González</last></author>
-      <author><first>Manuel</first><last>Montes</last></author>
+      <author id="fabio-a-gonzalez"><first>Fabio A.</first><last>González</last></author>
+      <author id="manuel-montes"><first>Manuel</first><last>Montes</last></author>
       <author><first>Hugo Jair</first><last>Escalante</last></author>
       <author><first>Thamar</first><last>Solorio</last></author>
       <pages>1216–1225</pages>
@@ -1388,7 +1388,7 @@
     <paper id="111">
       <title>Multinomial Adversarial Networks for Multi-Domain Text Classification</title>
       <author><first>Xilun</first><last>Chen</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>1226–1240</pages>
       <abstract>Many text classification tasks are known to be highly domain-dependent. Unfortunately, the availability of training data can vary drastically across domains. Worse still, for some domains there may not be any annotated data at all. In this work, we propose a multinomial adversarial network (MAN) to tackle this real-world problem of multi-domain text classification (MDTC) in which labeled data may exist for multiple domains, but in insufficient amounts to train effective classifiers for one or more of the domains. We provide theoretical justifications for the MAN framework, proving that different instances of MANs are essentially minimizers of various f-divergence metrics (Ali and Silvey, 1966) among multiple probability distributions. MANs are thus a theoretically sound generalization of traditional adversarial networks that discriminate over two distributions. More specifically, for the MDTC task, MAN learns features that are invariant across multiple domains by resorting to its ability to reduce the divergence among the feature distributions of each domain. We present experimental results showing that MANs significantly outperform the prior art on the MDTC task. We also show that MANs achieve state-of-the-art performance for domains with no labeled data.</abstract>
       <attachment type="software" hash="706e4c6e">N18-1111.Software.tgz</attachment>
@@ -1432,11 +1432,11 @@
     </paper>
     <paper id="115">
       <title>The Context-Dependent Additive Recurrent Neural Net</title>
-      <author><first>Quan Hung</first><last>Tran</last></author>
-      <author><first>Tuan</first><last>Lai</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="quan-hung-tran"><first>Quan Hung</first><last>Tran</last></author>
+      <author id="tuan-lai"><first>Tuan</first><last>Lai</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <author><first>Ingrid</first><last>Zukerman</last></author>
-      <author><first>Trung</first><last>Bui</last></author>
+      <author id="trung-bui"><first>Trung</first><last>Bui</last></author>
       <author><first>Hung</first><last>Bui</last></author>
       <pages>1274–1283</pages>
       <abstract>Contextual sequence mapping is one of the fundamental problems in Natural Language Processing (NLP). Here, instead of relying solely on the information presented in the text, the learning agents have access to a strong external signal given to assist the learning process. In this paper, we propose a novel family of Recurrent Neural Network unit: the Context-dependent Additive Recurrent Neural Network (CARNN) that is designed specifically to address this type of problem. The experimental results on public datasets in the dialog problem (Babi dialog Task 6 and Frame), contextual language model (Switchboard and Penn Tree Bank) and question answering (Trec QA) show that our novel CARNN-based architectures outperform previous methods.</abstract>
@@ -1449,8 +1449,8 @@
       <author><first>Huadong</first><last>Chen</last></author>
       <author><first>Shujian</first><last>Huang</last></author>
       <author><first>David</first><last>Chiang</last></author>
-      <author><first>Xinyu</first><last>Dai</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
+      <author id="xinyu-dai"><first>Xinyu</first><last>Dai</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
       <pages>1284–1293</pages>
       <abstract>Natural language sentences, being hierarchical, can be represented at different levels of granularity, like words, subwords, or characters. But most neural machine translation systems require the sentence to be represented as a sequence at a single level of granularity. It can be difficult to determine which granularity is better for a particular translation task. In this paper, we improve the model by incorporating multiple levels of granularity. Specifically, we propose (1) an encoder with character attention which augments the (sub)word-level representation with character-level information; (2) a decoder with multiple attentions that enable the representations from different levels of granularity to control the translation cooperatively. Experiments on three translation tasks demonstrate that our proposed models outperform the standard word-based model, the subword-based model, and a strong character-based model.</abstract>
       <url hash="cc97ebb2">N18-1116</url>
@@ -1497,7 +1497,7 @@
       <title>Guiding Neural Machine Translation with Retrieved Translation Pieces</title>
       <author><first>Jingyi</first><last>Zhang</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichro</first><last>Sumita</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
       <author><first>Satoshi</first><last>Nakamura</last></author>
       <pages>1325–1335</pages>
@@ -1533,7 +1533,7 @@
     <paper id="123">
       <title>Neural Machine Translation for Bilingually Scarce Scenarios: a Deep Multi-Task Learning Approach</title>
       <author><first>Poorya</first><last>Zaremoodi</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>1356–1365</pages>
       <abstract>Neural machine translation requires large amount of parallel training text to learn a reasonable quality translation model. This is particularly inconvenient for language pairs for which enough parallel text is not available. In this paper, we use monolingual linguistic resources in the source side to address this challenging problem based on a multi-task learning approach. More specifically, we scaffold the machine translation task on auxiliary tasks including semantic parsing, syntactic parsing, and named-entity recognition. This effectively injects semantic and/or syntactic knowledge into the translation model, which would otherwise require a large amount of training bitext to learn from. We empirically analyze and show the effectiveness of our multitask learning approach on three translation tasks: English-to-French, English-to-Farsi, and English-to-Vietnamese.</abstract>
       <url hash="6f6875e2">N18-1123</url>
@@ -1542,10 +1542,10 @@
     </paper>
     <paper id="124">
       <title>Self-Attentive Residual Decoder for Neural Machine Translation</title>
-      <author><first>Lesly</first><last>Miculicich Werlen</last></author>
+      <author id="lesly-miculicich-werlen"><first>Lesly</first><last>Miculicich Werlen</last></author>
       <author><first>Nikolaos</first><last>Pappas</last></author>
       <author><first>Dhananjay</first><last>Ram</last></author>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <pages>1366–1379</pages>
       <abstract>Neural sequence-to-sequence networks with attention have achieved remarkable performance for machine translation. One of the reasons for their effectiveness is their ability to capture relevant source-side contextual information at each time-step prediction through an attention mechanism. However, the target-side context is solely based on the sequence model which, in practice, is prone to a recency bias and lacks the ability to capture effectively non-sequential dependencies among words. To address this limitation, we propose a target-side-attentive residual recurrent network for decoding, where attention over previous words contributes directly to the prediction of the next word. The residual learning facilitates the flow of information from the distant past and is able to emphasize any of the previously translated words, hence it gains access to a wider context. The proposed model outperforms a neural MT baseline as well as a memory and self-attention network on three language pairs. The analysis of the attention learned by the decoder confirms that it emphasizes a wider context, and that it captures syntactic-like structures.</abstract>
       <attachment type="note" hash="eebad672">N18-1124.Notes.pdf</attachment>
@@ -1569,7 +1569,7 @@
     <paper id="126">
       <title>Context Sensitive Neural Lemmatization with <fixed-case>L</fixed-case>ematus</title>
       <author><first>Toms</first><last>Bergmanis</last></author>
-      <author><first>Sharon</first><last>Goldwater</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
       <pages>1391–1400</pages>
       <abstract>The main motivation for developing contextsensitive lemmatizers is to improve performance on unseen and ambiguous words. Yet previous systems have not carefully evaluated whether the use of context actually helps in these cases. We introduce Lematus, a lemmatizer based on a standard encoder-decoder architecture, which incorporates character-level sentence context. We evaluate its lemmatization accuracy across 20 languages in both a full data setting and a lower-resource setting with 10k training examples in each language. In both settings, we show that including context significantly improves results against a context-free version of the model. Context helps more for ambiguous words than for unseen words, though the latter has a greater effect on overall performance differences between languages. We also compare to three previous context-sensitive lemmatization systems, which all use pre-extracted edit trees as well as hand-selected features and/or additional sources of information such as tagged training data. Without using any of these, our context-sensitive model outperforms the best competitor system (Lemming) in the fulldata setting, and performs on par in the lowerresource setting.</abstract>
       <url hash="4cf50646">N18-1126</url>
@@ -1580,7 +1580,7 @@
       <title>Modeling Noisiness to Recognize Named Entities using Multitask Neural Networks on Social Media</title>
       <author><first>Gustavo</first><last>Aguilar</last></author>
       <author><first>Adrian Pastor</first><last>López-Monroy</last></author>
-      <author><first>Fabio</first><last>González</last></author>
+      <author id="fabio-a-gonzalez"><first>Fabio</first><last>González</last></author>
       <author><first>Thamar</first><last>Solorio</last></author>
       <pages>1401–1412</pages>
       <abstract>Recognizing named entities in a document is a key task in many NLP applications. Although current state-of-the-art approaches to this task reach a high performance on clean text (e.g. newswire genres), those algorithms dramatically degrade when they are moved to noisy environments such as social media domains. We present two systems that address the challenges of processing social media data using character-level phonetics and phonology, word embeddings, and Part-of-Speech tags as features. The first model is a multitask end-to-end Bidirectional Long Short-Term Memory (BLSTM)-Conditional Random Field (CRF) network whose output layer contains two CRF classifiers. The second model uses a multitask BLSTM network as feature extractor that transfers the learning to a CRF classifier for the final prediction. Our systems outperform the current F1 scores of the state of the art on the Workshop on Noisy User-generated Text 2017 dataset by 2.45% and 3.69%, establishing a more suitable approach for social media environments.</abstract>
@@ -1612,7 +1612,7 @@
       <title>Using Morphological Knowledge in Open-Vocabulary Neural Language Models</title>
       <author><first>Austin</first><last>Matthews</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <pages>1435–1445</pages>
       <abstract>Languages with productive morphology pose problems for language models that generate words from a fixed vocabulary. Although character-based models allow any possible word type to be generated, they are linguistically naïve: they must discover that words exist and are delimited by spaces—basic linguistic facts that are built in to the structure of word-based models. We introduce an open-vocabulary language model that incorporates more sophisticated linguistic knowledge by predicting words using a mixture of three generative processes: (1) by generating words as a sequence of characters, (2) by directly generating full word forms, and (3) by generating words as a sequence of morphemes that are combined using a hand-written morphological analyzer. Experiments on Finnish, Turkish, and Russian show that our model outperforms character sequence models and other strong baselines on intrinsic and extrinsic measures. Furthermore, we show that our model learns to exploit morphological knowledge encoded in the analyzer, and, as a byproduct, it can perform effective unsupervised morphological disambiguation.</abstract>
       <url hash="b4a053e8">N18-1130</url>
@@ -1634,7 +1634,7 @@
     <paper id="132">
       <title><fixed-case>DR</fixed-case>-<fixed-case>B</fixed-case>i<fixed-case>LSTM</fixed-case>: Dependent Reading Bidirectional <fixed-case>LSTM</fixed-case> for Natural Language Inference</title>
       <author><first>Reza</first><last>Ghaeini</last></author>
-      <author><first>Sadid A.</first><last>Hasan</last></author>
+      <author id="sadid-a-hasan"><first>Sadid A.</first><last>Hasan</last></author>
       <author><first>Vivek</first><last>Datla</last></author>
       <author><first>Joey</first><last>Liu</last></author>
       <author><first>Kathy</first><last>Lee</last></author>
@@ -1682,7 +1682,7 @@
       <author><first>Hao</first><last>Peng</last></author>
       <author><first>Sam</first><last>Thomson</last></author>
       <author><first>Swabha</first><last>Swayamdipta</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>1492–1502</pages>
       <abstract>We present a new approach to learning a semantic parser from multiple datasets, even when the target semantic formalisms are drastically different and the underlying corpora do not overlap. We handle such “disjoint” data by treating annotations for unobserved formalisms as latent structured variables. Building on state-of-the-art baselines, we show improvements both in frame-semantic parsing and semantic dependency parsing by modeling them jointly.</abstract>
       <url hash="39519ad9">N18-1135</url>
@@ -1733,7 +1733,7 @@
       <author><first>Parag</first><last>Jain</last></author>
       <author><first>Anirban</first><last>Laha</last></author>
       <author><first>Karthik</first><last>Sankaranarayanan</last></author>
-      <author><first>Mitesh M.</first><last>Khapra</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh M.</first><last>Khapra</last></author>
       <pages>1539–1550</pages>
       <abstract>In this work, we focus on the task of generating natural language descriptions from a structured table of facts containing fields (such as nationality, occupation, etc) and values (such as Indian, actor, director, etc). One simple choice is to treat the table as a sequence of fields and values and then use a standard seq2seq model for this task. However, such a model is too generic and does not exploit task specific characteristics. For example, while generating descriptions from a table, a human would attend to information at two levels: (i) the fields (macro level) and (ii) the values within the field (micro level). Further, a human would continue attending to a field for a few timesteps till all the information from that field has been rendered and then never return back to this field (because there is nothing left to say about it). To capture this behavior we use (i) a fused bifocal attention mechanism which exploits and combines this micro and macro level information and (ii) a gated orthogonalization mechanism which tries to ensure that a field is remembered for a few time steps and then forgotten. We experiment with a recently released dataset which contains fact tables about people and their corresponding one line biographical descriptions in English. In addition, we also introduce two similar datasets for French and German. Our experiments show that the proposed model gives 21% relative improvement over a recently proposed state of the art method and 10% relative improvement over basic seq2seq models. The code and the datasets developed as a part of this work are publicly available on <url>https://github.com/PrekshaNema25/StructuredData_To_Descriptions</url></abstract>
       <url hash="7ecfee9f">N18-1139</url>
@@ -1742,8 +1742,8 @@
     </paper>
     <paper id="140">
       <title><fixed-case>C</fixed-case>li<fixed-case>CR</fixed-case>: a Dataset of Clinical Case Reports for Machine Reading Comprehension</title>
-      <author><first>Simon</first><last>Šuster</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="simon-suster"><first>Simon</first><last>Šuster</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>1551–1563</pages>
       <abstract>We present a new dataset for machine comprehension in the medical domain. Our dataset uses clinical case reports with around 100,000 gap-filling queries about these cases. We apply several baselines and state-of-the-art neural readers to the dataset, and observe a considerable gap in performance (20% F1) between the best human and machine readers. We analyze the skills required for successful answering and show how reader performance varies depending on the applicable skills. We find that inferences using domain knowledge and object tracking are the most frequently required skills, and that recognizing omitted information and spatio-temporal reasoning are the most difficult for the machines.</abstract>
       <url hash="81d8179e">N18-1140</url>
@@ -1781,7 +1781,7 @@
       <title>Supervised and Unsupervised Transfer Learning for Question Answering</title>
       <author><first>Yu-An</first><last>Chung</last></author>
       <author><first>Hung-Yi</first><last>Lee</last></author>
-      <author><first>James</first><last>Glass</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
       <pages>1585–1594</pages>
       <abstract>Although transfer learning has been shown to be successful for tasks like object and speech recognition, its applicability to question answering (QA) has yet to be well-studied. In this paper, we conduct extensive experiments to investigate the transferability of knowledge learned from a source QA dataset to a target dataset using two QA models. The performance of both models on a TOEFL listening comprehension test (Tseng et al., 2016) and MCTest (Richardson et al., 2013) is significantly improved via a simple transfer learning technique from MovieQA (Tapaswi et al., 2016). In particular, one of the models achieves the state-of-the-art on all target datasets; for the TOEFL listening comprehension test, it outperforms the previous best model by 7%. Finally, we show that transfer learning is helpful even in unsupervised scenarios when correct answers for target QA dataset examples are not available.</abstract>
       <url hash="72a495db">N18-1143</url>
@@ -1790,10 +1790,10 @@
     </paper>
     <paper id="144">
       <title>Tracking State Changes in Procedural Text: a Challenge Dataset and Models for Process Paragraph Comprehension</title>
-      <author><first>Bhavana</first><last>Dalvi</last></author>
+      <author id="bhavana-dalvi"><first>Bhavana</first><last>Dalvi</last></author>
       <author><first>Lifu</first><last>Huang</last></author>
       <author><first>Niket</first><last>Tandon</last></author>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <author><first>Peter</first><last>Clark</last></author>
       <pages>1595–1604</pages>
       <abstract>We present a new dataset and models for comprehending paragraphs about processes (e.g., photosynthesis), an important genre of text describing a dynamic world. The new dataset, ProPara, is the first to contain natural (rather than machine-generated) text about a changing world along with a full annotation of entity states (location and existence) during those changes (81k datapoints). The end-task, tracking the location and existence of entities through the text, is challenging because the causal effects of actions are often implicit and need to be inferred. We find that previous models that have worked well on synthetic data achieve only mediocre performance on ProPara, and introduce two new neural models that exploit alternative mechanisms for state prediction, in particular using LSTM input encoding and span prediction. The new models improve accuracy by up to 19%. We are releasing the ProPara dataset and our models to the community.</abstract>
@@ -1817,8 +1817,8 @@
       <title>Deconfounded Lexicon Induction for Interpretable Social Science</title>
       <author><first>Reid</first><last>Pryzant</last></author>
       <author><first>Kelly</first><last>Shen</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
-      <author><first>Stefan</first><last>Wagner</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
+      <author id="stefan-wagner"><first>Stefan</first><last>Wagner</last></author>
       <pages>1615–1625</pages>
       <abstract>NLP algorithms are increasingly used in computational social science to take linguistic observations and predict outcomes like human preferences or actions. Making these social models transparent and interpretable often requires identifying features in the input that predict outcomes while also controlling for potential confounds. We formalize this need as a new task: inducing a lexicon that is predictive of a set of target variables yet uncorrelated to a set of confounding variables. We introduce two deep learning algorithms for the task. The first uses a bifurcated architecture to separate the explanatory power of the text and confounds. The second uses an adversarial discriminator to force confound-invariant text encodings. Both elicit lexicons from learned weights and attentional scores. We use them to induce lexicons that are predictive of timely responses to consumer complaints (controlling for product), enrollment from course descriptions (controlling for subject), and sales from product descriptions (controlling for seller). In each domain our algorithms pick words that are associated with <i>narrative persuasion</i>; more
  predictive and less confound-related than those of standard
@@ -1830,7 +1830,7 @@
     </paper>
     <paper id="147">
       <title>Detecting Denial-of-Service Attacks from Social Media Text: Applying <fixed-case>NLP</fixed-case> to Computer Security</title>
-      <author><first>Nathanael</first><last>Chambers</last></author>
+      <author id="nathanael-chambers"><first>Nathanael</first><last>Chambers</last></author>
       <author><first>Ben</first><last>Fry</last></author>
       <author><first>James</first><last>McMasters</last></author>
       <pages>1626–1635</pages>
@@ -1842,7 +1842,7 @@
     <paper id="148">
       <title>The Importance of Calibration for Estimating Proportions from Annotations</title>
       <author><first>Dallas</first><last>Card</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>1636–1646</pages>
       <abstract>Estimating label proportions in a target corpus is a type of measurement that is useful for answering certain types of social-scientific questions. While past work has described a number of relevant approaches, nearly all are based on an assumption which we argue is invalid for many problems, particularly when dealing with human annotations. In this paper, we identify and differentiate between two relevant data generating scenarios (intrinsic vs. extrinsic labels), introduce a simple but novel method which emphasizes the importance of calibration, and then analyze and experimentally validate the appropriateness of various methods for each of the two scenarios.</abstract>
       <attachment type="note" hash="599f4db3">N18-1148.Notes.pdf</attachment>
@@ -1854,10 +1854,10 @@
       <title>A Dataset of Peer Reviews (<fixed-case>P</fixed-case>eer<fixed-case>R</fixed-case>ead): Collection, Insights and <fixed-case>NLP</fixed-case> Applications</title>
       <author><first>Dongyeop</first><last>Kang</last></author>
       <author><first>Waleed</first><last>Ammar</last></author>
-      <author><first>Bhavana</first><last>Dalvi</last></author>
+      <author id="bhavana-dalvi"><first>Bhavana</first><last>Dalvi</last></author>
       <author><first>Madeleine</first><last>van Zuylen</last></author>
       <author><first>Sebastian</first><last>Kohlmeier</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <author><first>Roy</first><last>Schwartz</last></author>
       <pages>1647–1661</pages>
       <abstract>Peer reviewing is a central component in the scientific publishing process. We present the first public dataset of scientific peer reviews available for research purposes (PeerRead v1),1 providing an opportunity to study this important artifact. The dataset consists of 14.7K paper drafts and the corresponding accept/reject decisions in top-tier venues including ACL, NIPS and ICLR. The dataset also includes 10.7K textual peer reviews written by experts for a subset of the papers. We describe the data collection process and report interesting observed phenomena in the peer reviews. We also propose two novel NLP tasks based on this dataset and provide simple baseline models. In the first task, we show that simple models can predict whether a paper is accepted with up to 21% error reduction compared to the majority baseline. In the second task, we predict the numerical scores of review aspects and show that simple models can outperform the mean baseline for aspects with high variance such as ‘originality’ and ‘impact’.</abstract>
@@ -1965,7 +1965,7 @@
     <paper id="158">
       <title>Ranking Sentences for Extractive Summarization with Reinforcement Learning</title>
       <author><first>Shashi</first><last>Narayan</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <author><first>Mirella</first><last>Lapata</last></author>
       <pages>1747–1759</pages>
       <abstract>Single document summarization is the task of producing a shorter version of a document while preserving its principal information content. In this paper we conceptualize extractive summarization as a sentence ranking task and propose a novel training algorithm which globally optimizes the ROUGE evaluation metric through a reinforcement learning objective. We use our algorithm to train a neural summarization model on the CNN and DailyMail datasets and demonstrate experimentally that it outperforms state-of-the-art extractive and abstractive systems when evaluated automatically and by humans.</abstract>
@@ -1985,7 +1985,7 @@
     </paper>
     <paper id="160">
       <title>What’s This Movie About? A Joint Neural Network Architecture for Movie Content Analysis</title>
-      <author><first>Philip John</first><last>Gorinski</last></author>
+      <author id="philip-gorinski"><first>Philip John</first><last>Gorinski</last></author>
       <author><first>Mirella</first><last>Lapata</last></author>
       <pages>1770–1781</pages>
       <abstract>This work takes a first step toward movie content analysis by tackling the novel task of movie overview generation. Overviews are natural language texts that give a first impression of a movie, describing aspects such as its genre, plot, mood, or artistic style. We create a dataset that consists of movie scripts, attribute-value pairs for the movies’ aspects, as well as overviews, which we extract from an online database. We present a novel end-to-end model for overview generation, consisting of a multi-label encoder for identifying screenplay attributes, and an LSTM decoder to generate natural language sentences conditioned on the identified attributes. Automatic and human evaluation show that the encoder is able to reliably assign good labels for the movie’s attributes, and the overviews provide descriptions of the movie’s content which are informative and faithful.</abstract>
@@ -1996,7 +1996,7 @@
     <paper id="161">
       <title>Which Scores to Predict in Sentence Regression for Text Summarization?</title>
       <author><first>Markus</first><last>Zopf</last></author>
-      <author><first>Eneldo</first><last>Loza Mencía</last></author>
+      <author id="eneldo-loza-mencia"><first>Eneldo</first><last>Loza Mencía</last></author>
       <author><first>Johannes</first><last>Fürnkranz</last></author>
       <pages>1782–1791</pages>
       <abstract>The task of automatic text summarization is to generate a short text that summarizes the most important information in a given set of documents. Sentence regression is an emerging branch in automatic text summarizations. Its key idea is to estimate the importance of information via learned utility scores for individual sentences. These scores are then used for selecting sentences from the source documents, typically according to a greedy selection strategy. Recently proposed state-of-the-art models learn to predict ROUGE recall scores of individual sentences, which seems reasonable since the final summaries are evaluated according to ROUGE recall. In this paper, we show in extensive experiments that following this intuition leads to suboptimal results and that learning to predict ROUGE precision scores leads to better results. The crucial difference is to aim not at covering as much information as possible but at wasting as little space as possible in every greedy step.</abstract>
@@ -2035,7 +2035,7 @@
     <paper id="164">
       <title>Learning to Disentangle Interleaved Conversational Threads with a <fixed-case>S</fixed-case>iamese Hierarchical Network and Similarity Ranking</title>
       <author><first>Jyun-Yu</first><last>Jiang</last></author>
-      <author><first>Francine</first><last>Chen</last></author>
+      <author id="francine-chen"><first>Francine</first><last>Chen</last></author>
       <author><first>Yan-Ying</first><last>Chen</last></author>
       <author><first>Wei</first><last>Wang</last></author>
       <pages>1812–1822</pages>
@@ -2072,7 +2072,7 @@
     <paper id="167">
       <title><fixed-case>ELDEN</fixed-case>: Improved Entity Linking Using Densified Knowledge Graphs</title>
       <author><first>Priya</first><last>Radhakrishnan</last></author>
-      <author><first>Partha</first><last>Talukdar</last></author>
+      <author id="partha-talukdar"><first>Partha</first><last>Talukdar</last></author>
       <author><first>Vasudeva</first><last>Varma</last></author>
       <pages>1844–1853</pages>
       <abstract>Entity Linking (EL) systems aim to automatically map mentions of an entity in text to the corresponding entity in a Knowledge Graph (KG). Degree of connectivity of an entity in the KG directly affects an EL system’s ability to correctly link mentions in text to the entity in KG. This causes many EL systems to perform well for entities well connected to other entities in KG, bringing into focus the role of KG density in EL. In this paper, we propose Entity Linking using Densified Knowledge Graphs (ELDEN). ELDEN is an EL system which first densifies the KG with co-occurrence statistics from a large text corpus, and then uses the densified KG to train entity embeddings. Entity similarity measured using these trained entity embeddings result in improved EL. ELDEN outperforms state-of-the-art EL system on benchmark datasets. Due to such densification, ELDEN performs well for sparsely connected entities in the KG too. ELDEN’s approach is simple, yet effective. We have made ELDEN’s code and data publicly available.</abstract>
@@ -2087,7 +2087,7 @@
       <author><first>Hai</first><last>Ye</last></author>
       <author><first>Xin</first><last>Jiang</last></author>
       <author><first>Zhunchen</first><last>Luo</last></author>
-      <author><first>Wenhan</first><last>Chao</last></author>
+      <author id="wenhan-chao"><first>Wenhan</first><last>Chao</last></author>
       <pages>1854–1864</pages>
       <abstract>In this paper, we propose to study the problem of court view generation from the fact description in a criminal case. The task aims to improve the interpretability of charge prediction systems and help automatic legal document generation. We formulate this task as a text-to-text natural language generation (NLG) problem. Sequence-to-sequence model has achieved cutting-edge performances in many NLG tasks. However, due to the non-distinctions of fact descriptions, it is hard for Seq2Seq model to generate charge-discriminative court views. In this work, we explore charge labels to tackle this issue. We propose a label-conditioned Seq2Seq model with attention for this problem, to decode court views conditioned on encoded charge labels. Experimental results show the effectiveness of our method.</abstract>
       <url hash="5b543332">N18-1168</url>
@@ -2114,7 +2114,7 @@
       <author><first>Mohit</first><last>Iyyer</last></author>
       <author><first>John</first><last>Wieting</last></author>
       <author><first>Kevin</first><last>Gimpel</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>1875–1885</pages>
       <abstract>We propose syntactically controlled paraphrase networks (SCPNs) and use them to generate adversarial examples. Given a sentence and a target syntactic form (e.g., a constituency parse), SCPNs are trained to produce a paraphrase of the sentence with the desired syntax. We show it is possible to create training data for this task by first doing backtranslation at a very large scale, and then using a parser to label the syntactic transformations that naturally occur during this process. Such data allows us to train a neural encoder-decoder model with extra inputs to specify the target syntax. A combination of automated and human evaluations show that SCPNs generate paraphrases that follow their target specifications without decreasing paraphrase quality when compared to baseline (uncontrolled) paraphrase systems. Furthermore, they are more capable of generating syntactically adversarial examples that both (1) “fool” pretrained models and (2) improve the robustness of these models to syntactic variation when used to augment their training data.</abstract>
       <url hash="c37554e7">N18-1170</url>
@@ -2149,7 +2149,7 @@
       <title>Multi-Task Learning of Pairwise Sequence Classification Tasks over Disparate Label Spaces</title>
       <author><first>Isabelle</first><last>Augenstein</last></author>
       <author><first>Sebastian</first><last>Ruder</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>1896–1906</pages>
       <abstract>We combine multi-task learning and semi-supervised learning by inducing a joint embedding space between disparate label spaces and learning transfer functions between label embeddings, enabling us to jointly leverage unlabelled data and auxiliary, annotated datasets. We evaluate our approach on a variety of tasks with disparate label spaces. We outperform strong single and multi-task baselines and achieve a new state of the art for aspect-based and topic-based sentiment analysis.</abstract>
       <url hash="ab1b7ba5">N18-1172</url>
@@ -2159,7 +2159,7 @@
     </paper>
     <paper id="173">
       <title>Word Emotion Induction for Multiple Languages as a Deep Multi-Task Learning Problem</title>
-      <author><first>Sven</first><last>Buechel</last></author>
+      <author id="sven-buechel"><first>Sven</first><last>Buechel</last></author>
       <author><first>Udo</first><last>Hahn</last></author>
       <pages>1907–1918</pages>
       <abstract>Predicting the emotional value of lexical items is a well-known problem in sentiment analysis. While research has focused on polarity for quite a long time, meanwhile this early focus has been shifted to more expressive emotion representation models (such as Basic Emotions or Valence-Arousal-Dominance). This change resulted in a proliferation of heterogeneous formats and, in parallel, often small-sized, non-interoperable resources (lexicons and corpus annotations). In particular, the limitations in size hampered the application of deep learning methods in this area because they typically require large amounts of input data. We here present a solution to get around this language data bottleneck by rephrasing word emotion induction as a multi-task learning problem. In this approach, the prediction of each independent emotion dimension is considered as an individual task and hidden layers are shared between these dimensions. We investigate whether multi-task learning is more advantageous than single-task learning for emotion prediction by comparing our model against a wide range of alternative emotion and polarity induction methods featuring 9 typologically diverse languages and a total of 15 conditions. Our model turns out to outperform each one of them. Against all odds, the proposed deep learning approach yields the largest gain on the smallest data sets, merely composed of one thousand samples.</abstract>
@@ -2171,7 +2171,7 @@
     <paper id="174">
       <title>Human Needs Categorization of Affective Events Using Labeled and Unlabeled Data</title>
       <author><first>Haibo</first><last>Ding</last></author>
-      <author><first>Ellen</first><last>Riloff</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
       <pages>1919–1929</pages>
       <abstract>We often talk about events that impact us positively or negatively. For example “I got a job” is good news, but “I lost my job” is bad news. When we discuss an event, we not only understand its affective polarity but also the reason why the event is beneficial or detrimental. For example, getting or losing a job has affective polarity primarily because it impacts us financially. Our work aims to categorize affective events based upon human need categories that often explain people’s motivations and desires: PHYSIOLOGICAL, HEALTH, LEISURE, SOCIAL, FINANCIAL, COGNITION, and FREEDOM. We create classification models based on event expressions as well as models that use contexts surrounding event mentions. We also design a co-training model that learns from unlabeled data by simultaneously training event expression and event context classifiers in an iterative learning process. Our results show that co-training performs well, producing substantially better results than the individual classifiers.</abstract>
       <url hash="deed86cf">N18-1174</url>
@@ -2197,7 +2197,7 @@
       <title>Linguistic Cues to Deception and Perceived Deception in Interview Dialogues</title>
       <author><first>Sarah Ita</first><last>Levitan</last></author>
       <author><first>Angel</first><last>Maredia</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
       <pages>1941–1950</pages>
       <abstract>We explore deception detection in interview dialogues. We analyze a set of linguistic features in both truthful and deceptive responses to interview questions. We also study the perception of deception, identifying characteristics of statements that are perceived as truthful or deceptive by interviewers. Our analysis show significant differences between truthful and deceptive question responses, as well as variations in deception patterns across gender and native language. This analysis motivated our selection of features for machine learning experiments aimed at classifying globally deceptive speech. Our best classification performance is 72.74% F1-Score (about 17% better than human performance), which is achieved using a combination of linguistic features and individual traits.</abstract>
       <url hash="40b14761">N18-1176</url>
@@ -2219,9 +2219,9 @@
     </paper>
     <paper id="178">
       <title>Hierarchical Structured Model for Fine-to-Coarse Manifesto Text Analysis</title>
-      <author><first>Shivashankar</first><last>Subramanian</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="shivashankar-subramanian"><first>Shivashankar</first><last>Subramanian</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>1964–1974</pages>
       <abstract>Election manifestos document the intentions, motives, and views of political parties. They are often used for analysing a party’s fine-grained position on a particular issue, as well as for coarse-grained positioning of a party on the left–right spectrum. In this paper we propose a two-stage model for automatically performing both levels of analysis over manifestos. In the first step we employ a hierarchical multi-task structured deep model to predict fine- and coarse-grained positions, and in the second step we perform post-hoc calibration of coarse-grained positions using probabilistic soft logic. We empirically show that the proposed model outperforms state-of-art approaches at both granularities using manifestos from twelve countries, written in ten different languages.</abstract>
       <url hash="f79e4ba6">N18-1178</url>
@@ -2246,7 +2246,7 @@
       <title>Assessing Language Proficiency from Eye Movements in Reading</title>
       <author><first>Yevgeni</first><last>Berzak</last></author>
       <author><first>Boris</first><last>Katz</last></author>
-      <author><first>Roger</first><last>Levy</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
       <pages>1986–1996</pages>
       <abstract>We present a novel approach for determining learners’ second language proficiency which utilizes behavioral traces of eye movements during reading. Our approach provides stand-alone eyetracking based English proficiency scores which reflect the extent to which the learner’s gaze patterns in reading are similar to those of native English speakers. We show that our scores correlate strongly with standardized English proficiency tests. We also demonstrate that gaze information can be used to accurately predict the outcomes of such tests. Our approach yields the strongest performance when the test taker is presented with a suite of sentences for which we have eyetracking data from other readers. However, it remains effective even using eyetracking with sentences for which eye movement data have not been previously collected. By deriving proficiency as an automatic byproduct of eye movements during ordinary reading, our approach offers a potentially valuable new tool for second language proficiency assessment. More broadly, our results open the door to future methods for inferring reader characteristics from the behavioral traces of reading.</abstract>
       <url hash="38816e98">N18-1180</url>
@@ -2256,7 +2256,7 @@
     <paper id="181">
       <title>Comparing Theories of Speaker Choice Using a Model of Classifier Production in <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese</title>
       <author><first>Meilin</first><last>Zhan</last></author>
-      <author><first>Roger</first><last>Levy</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
       <pages>1997–2005</pages>
       <abstract>Speakers often have more than one way to express the same meaning. What general principles govern speaker choice in the face of optionality when near semantically invariant alternation exists? Studies have shown that optional reduction in language is sensitive to contextual predictability, such that more predictable a linguistic unit is, the more likely it is to get reduced. Yet it is unclear whether these cases of speaker choice are driven by audience design versus toward facilitating production. Here we argue that for a different optionality phenomenon, namely classifier choice in Mandarin Chinese, Uniform Information Density and at least one plausible variant of availability-based production make opposite predictions regarding the relationship between the predictability of the upcoming material and speaker choices. In a corpus analysis of Mandarin Chinese, we show that the distribution of speaker choices supports the availability-based production account and not the Uniform Information Density.</abstract>
       <url hash="9b3bba09">N18-1181</url>
@@ -2266,7 +2266,7 @@
     <paper id="182">
       <title>Spotting Spurious Data with Neural Networks</title>
       <author><first>Hadi</first><last>Amiri</last></author>
-      <author><first>Timothy</first><last>Miller</last></author>
+      <author id="timothy-miller"><first>Timothy</first><last>Miller</last></author>
       <author><first>Guergana</first><last>Savova</last></author>
       <pages>2006–2016</pages>
       <abstract>Automatic identification of spurious instances (those with potentially wrong labels in datasets) can improve the quality of existing language resources, especially when annotations are obtained through crowdsourcing or automatically generated based on coded rankings. In this paper, we present effective approaches inspired by queueing theory and psychology of learning to automatically identify spurious instances in datasets. Our approaches discriminate instances based on their “difficulty to learn,” determined by a downstream learner. Our methods can be applied to any dataset assuming the existence of a neural network model for the target task of the dataset. Our best approach outperforms competing state-of-the-art baselines and has a MAP of 0.85 and 0.22 in identifying spurious instances in synthetic and carefully-crowdsourced real-world datasets respectively.</abstract>
@@ -2289,7 +2289,7 @@
       <author><first>Maria</first><last>Barrett</last></author>
       <author><first>Ana Valeria</first><last>González-Garduño</last></author>
       <author><first>Lea</first><last>Frermann</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>2028–2038</pages>
       <abstract>When learning POS taggers and syntactic chunkers for low-resource languages, different resources may be available, and often all we have is a small tag dictionary, motivating type-constrained unsupervised induction. Even small dictionaries can improve the performance of unsupervised induction algorithms. This paper shows that performance can be further improved by including data that is readily available or can be easily obtained for most languages, i.e., eye-tracking, speech, or keystroke logs (or any combination thereof). We project information from all these data sources into shared spaces, in which the union of words is represented. For English unsupervised POS induction, the additional information, which is not required at test time, leads to an average error reduction on Ontonotes domains of 1.5% over systems augmented with state-of-the-art word embeddings. On Penn Treebank the best model achieves 5.4% error reduction over a word embeddings baseline. We also achieve significant improvements for syntactic chunk induction. Our analysis shows that improvements are even bigger when the available tag dictionaries are smaller.</abstract>
       <url hash="022843c2">N18-1184</url>
@@ -2300,7 +2300,7 @@
       <title>Challenging Reading Comprehension on Daily Conversation: Passage Completion on Multiparty Dialog</title>
       <author><first>Kaixin</first><last>Ma</last></author>
       <author><first>Tomasz</first><last>Jurczyk</last></author>
-      <author><first>Jinho D.</first><last>Choi</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
       <pages>2039–2048</pages>
       <abstract>This paper presents a new corpus and a robust deep learning architecture for a task in reading comprehension, passage completion, on multiparty dialog. Given a dialog in text and a passage containing factual descriptions about the dialog where mentions of the characters are replaced by blanks, the task is to fill the blanks with the most appropriate character names that reflect the contexts in the dialog. Since there is no dataset that challenges the task of passage completion in this genre, we create a corpus by selecting transcripts from a TV show that comprise 1,681 dialogs, generating passages for each dialog through crowdsourcing, and annotating mentions of characters in both the dialog and the passages. Given this dataset, we build a deep neural model that integrates rich feature extraction from convolutional neural networks into sequence modeling in recurrent neural networks, optimized by utterance and dialog level attentions. Our model outperforms the previous state-of-the-art model on this task in a different genre using bidirectional LSTM, showing a 13.0+% improvement for longer dialogs. Our analysis shows the effectiveness of the attention mechanisms and suggests a direction to machine comprehension on multiparty dialog.</abstract>
       <url hash="b3d99285">N18-1185</url>
@@ -2321,8 +2321,8 @@
     <paper id="187">
       <title>Dialogue Learning with Human Teaching and Feedback in End-to-End Trainable Task-Oriented Dialogue Systems</title>
       <author><first>Bing</first><last>Liu</last></author>
-      <author><first>Gokhan</first><last>Tür</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tür</last></author>
+      <author id="gokhan-tur"><first>Gokhan</first><last>Tür</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tür</last></author>
       <author><first>Pararth</first><last>Shah</last></author>
       <author><first>Larry</first><last>Heck</last></author>
       <pages>2060–2069</pages>
@@ -2335,7 +2335,7 @@
       <title><fixed-case>LSDSCC</fixed-case>: a Large Scale Domain-Specific Conversational Corpus for Response Generation with Diversity Oriented Evaluation Metrics</title>
       <author><first>Zhen</first><last>Xu</last></author>
       <author><first>Nan</first><last>Jiang</last></author>
-      <author><first>Bingquan</first><last>Liu</last></author>
+      <author id="bingquan-liu"><first>Bingquan</first><last>Liu</last></author>
       <author><first>Wenge</first><last>Rong</last></author>
       <author><first>Bowen</first><last>Wu</last></author>
       <author><first>Baoxun</first><last>Wang</last></author>
@@ -2361,9 +2361,9 @@
     </paper>
     <paper id="190">
       <title>Factors Influencing the Surprising Instability of Word Embeddings</title>
-      <author><first>Laura</first><last>Wendlandt</last></author>
+      <author id="laura-burdick"><first>Laura</first><last>Wendlandt</last></author>
       <author><first>Jonathan K.</first><last>Kummerfeld</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>2092–2102</pages>
       <abstract>Despite the recent popularity of word embedding methods, there is only a small body of work exploring the limitations of these representations. In this paper, we consider one aspect of embedding spaces, namely their stability. We show that even relatively high frequency words (100-200 occurrences) are often unstable. We provide empirical evidence for how various factors contribute to the stability of word embeddings, and we analyze the effects of stability on downstream tasks.</abstract>
       <url hash="6a28c7fe">N18-1190</url>
@@ -2453,7 +2453,7 @@
     <paper id="198">
       <title>Object Counts! Bringing Explicit Detections Back into Image Captioning</title>
       <author><first>Josiah</first><last>Wang</last></author>
-      <author><first>Pranava Swaroop</first><last>Madhyastha</last></author>
+      <author id="pranava-swaroop-madhyastha"><first>Pranava Swaroop</first><last>Madhyastha</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>2180–2193</pages>
       <abstract>The use of explicit object detectors as an intermediate step to image captioning – which used to constitute an essential stage in early work – is often bypassed in the currently dominant end-to-end approaches, where the language model is conditioned directly on a mid-level image embedding. We argue that explicit detections provide rich semantic information, and can thus be used as an interpretable representation to better understand why end-to-end image captioning systems work well. We provide an in-depth analysis of end-to-end image captioning by exploring a variety of cues that can be derived from such object detections. Our study reveals that end-to-end image captioning systems rely on matching image representations to generate captions, and that encoding the frequency, size and position of objects are complementary and all play a role in forming a good image representation. It also reveals that different object categories contribute in different ways towards image captioning.</abstract>
@@ -2480,7 +2480,7 @@
       <author><first>Max</first><last>Smith</last></author>
       <author><first>Noriyuki</first><last>Kojima</last></author>
       <author><first>Jia</first><last>Deng</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>2206–2216</pages>
       <abstract>We propose a new model for speaker naming in movies that leverages visual, textual, and acoustic modalities in an unified optimization framework. To evaluate the performance of our model, we introduce a new dataset consisting of six episodes of the Big Bang Theory TV show and eighteen full movies covering different genres. Our experiments show that our multimodal model significantly outperforms several competitive baselines on the average weighted F-score metric. To demonstrate the effectiveness of our framework, we design an end-to-end memory network model that leverages our speaker naming model and achieves state-of-the-art results on the subtitles task of the MovieQA 2017 Challenge.</abstract>
       <url hash="26e49beb">N18-1200</url>
@@ -2490,7 +2490,7 @@
     <paper id="201">
       <title>Stacking with Auxiliary Features for Visual Question Answering</title>
       <author><first>Nazneen Fatema</first><last>Rajani</last></author>
-      <author><first>Raymond</first><last>Mooney</last></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last></author>
       <pages>2217–2226</pages>
       <abstract>Visual Question Answering (VQA) is a well-known and challenging task that requires systems to jointly reason about natural language and vision. Deep learning models in various forms have been the standard for solving VQA. However, some of these VQA models are better at certain types of image-question pairs than other models. Ensembling VQA models intelligently to leverage their diverse expertise is, therefore, advantageous. Stacking With Auxiliary Features (SWAF) is an intelligent ensembling technique which learns to combine the results of multiple models using features of the current problem as context. We propose four categories of auxiliary features for ensembling for VQA. Three out of the four categories of features can be inferred from an image-question pair and do not require querying the component models. The fourth category of auxiliary features uses model-specific explanations. In this paper, we describe how we use these various categories of auxiliary features to improve performance for VQA. Using SWAF to effectively ensemble three recent systems, we obtain a new state-of-the-art. Our work also highlights the advantages of explainable AI models.</abstract>
       <url hash="2366057c">N18-1201</url>
@@ -2499,13 +2499,13 @@
     </paper>
     <paper id="202">
       <title>Deep Contextualized Word Representations</title>
-      <author><first>Matthew E.</first><last>Peters</last></author>
+      <author id="matthew-e-peters"><first>Matthew E.</first><last>Peters</last></author>
       <author><first>Mark</first><last>Neumann</last></author>
       <author><first>Mohit</first><last>Iyyer</last></author>
       <author><first>Matt</first><last>Gardner</last></author>
       <author><first>Christopher</first><last>Clark</last></author>
       <author><first>Kenton</first><last>Lee</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>2227–2237</pages>
       <abstract>We introduce a new type of deep contextualized word representation that models both (1) complex characteristics of word use (e.g., syntax and semantics), and (2) how these uses vary across linguistic contexts (i.e., to model polysemy). Our word vectors are learned functions of the internal states of a deep bidirectional language model (biLM), which is pre-trained on a large text corpus. We show that these representations can be easily added to existing models and significantly improve the state of the art across six challenging NLP problems, including question answering, textual entailment and sentiment analysis. We also present an analysis showing that exposing the deep internals of the pre-trained network is crucial, allowing downstream models to mix different types of semi-supervision signals.</abstract>
       <attachment type="note" hash="a00be88e">N18-1202.Notes.pdf</attachment>
@@ -2531,7 +2531,7 @@
       <title>Neural Text Generation in Stories Using Entity Representations as Context</title>
       <author><first>Elizabeth</first><last>Clark</last></author>
       <author><first>Yangfeng</first><last>Ji</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>2250–2260</pages>
       <abstract>We introduce an approach to neural text generation that explicitly represents entities mentioned in the text. Entity representations are vectors that are updated as the text proceeds; they are designed specifically for narrative text like fiction or news stories. Our experiments demonstrate that modeling entities offers a benefit in two automatic evaluations: mention generation (in which a model chooses which entity to mention next and which words to use in the mention) and selection between a correct next sentence and a distractor from later in the same story. We also conduct a human evaluation on automatically generated text in story contexts; this study supports our emphasis on entities and suggests directions for further research.</abstract>
       <url hash="b228c9b3">N18-1204</url>
@@ -2558,9 +2558,9 @@
     <meta>
       <booktitle>Proceedings of the 2018 Conference of the North <fixed-case>A</fixed-case>merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 2 (Short Papers)</booktitle>
       <url hash="77ab36c3">N18-2</url>
-      <editor><first>Marilyn</first><last>Walker</last></editor>
+      <editor id="marilyn-walker"><first>Marilyn</first><last>Walker</last></editor>
       <editor><first>Heng</first><last>Ji</last></editor>
-      <editor><first>Amanda</first><last>Stent</last></editor>
+      <editor id="amanda-stent"><first>Amanda</first><last>Stent</last></editor>
       <doi>10.18653/v1/N18-2</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>New Orleans, Louisiana</address>
@@ -2611,10 +2611,10 @@
       <title>Integrating Stance Detection and Fact Checking in a Unified Corpus</title>
       <author><first>Ramy</first><last>Baly</last></author>
       <author><first>Mitra</first><last>Mohtarami</last></author>
-      <author><first>James</first><last>Glass</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <author><first>Alessandro</first><last>Moschitti</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>21–27</pages>
       <abstract>A reasonable approach for fact checking a claim involves retrieving potentially relevant documents from different sources (e.g., news websites, social media, etc.), determining the stance of each document with respect to the claim, and finally making a prediction about the claim’s factuality by aggregating the strength of the stances, while taking the reliability of the source into account. Moreover, a fact checking system should be able to explain its decision by providing relevant extracts (rationales) from the documents. Yet, this setup is not directly supported by existing datasets, which treat fact checking, document retrieval, source credibility, stance detection and rationale extraction as independent tasks. In this paper, we support the interdependencies between these tasks as annotations in the same corpus. We implement this setup on an Arabic fact checking corpus, the first of its kind.</abstract>
       <url hash="950eeb12">N18-2004</url>
@@ -2650,7 +2650,7 @@
       <author><first>Bhuwan</first><last>Dhingra</last></author>
       <author><first>Qiao</first><last>Jin</last></author>
       <author><first>Zhilin</first><last>Yang</last></author>
-      <author><first>William</first><last>Cohen</last></author>
+      <author id="william-cohen"><first>William</first><last>Cohen</last></author>
       <author><first>Ruslan</first><last>Salakhutdinov</last></author>
       <pages>42–48</pages>
       <abstract>Many problems in NLP require aggregating information from multiple mentions of the same entity which may be far apart in the text. Existing Recurrent Neural Network (RNN) layers are biased towards short-term dependencies and hence not suited to such tasks. We present a recurrent layer which is instead biased towards coreferent dependencies. The layer uses coreference annotations extracted from an external system to connect entity mentions belonging to the same cluster. Incorporating this layer into a state-of-the-art reading comprehension model improves performance on three datasets – Wikihop, LAMBADA and the bAbi AI tasks – with large gains when training data is scarce.</abstract>
@@ -2661,7 +2661,7 @@
     <paper id="8">
       <title>Automatic Dialogue Generation with Expressed Emotions</title>
       <author><first>Chenyang</first><last>Huang</last></author>
-      <author><first>Osmar</first><last>Zaïane</last></author>
+      <author id="osmar-r-zaiane"><first>Osmar</first><last>Zaïane</last></author>
       <author><first>Amine</first><last>Trabelsi</last></author>
       <author><first>Nouha</first><last>Dziri</last></author>
       <pages>49–54</pages>
@@ -2718,7 +2718,7 @@
     </paper>
     <paper id="13">
       <title>Sentence Simplification with Memory-Augmented Neural Networks</title>
-      <author><first>Tu</first><last>Vu</last></author>
+      <author id="tu-vu"><first>Tu</first><last>Vu</last></author>
       <author><first>Baotian</first><last>Hu</last></author>
       <author><first>Tsendsuren</first><last>Munkhdalai</last></author>
       <author><first>Hong</first><last>Yu</last></author>
@@ -2731,7 +2731,7 @@
     <paper id="14">
       <title>A Corpus of Non-Native Written <fixed-case>E</fixed-case>nglish Annotated for Metaphor</title>
       <author><first>Beata</first><last>Beigman Klebanov</last></author>
-      <author><first>Chee Wee (Ben)</first><last>Leong</last></author>
+      <author id="chee-wee-leong"><first>Chee Wee (Ben)</first><last>Leong</last></author>
       <author><first>Michael</first><last>Flor</last></author>
       <pages>86–91</pages>
       <abstract>We present a corpus of 240 argumentative essays written by non-native speakers of English annotated for metaphor. The corpus is made publicly available. We provide benchmark performance of state-of-the-art systems on this new corpus, and explore the relationship between writing proficiency and metaphor use.</abstract>
@@ -2770,8 +2770,8 @@
       <author><first>Swabha</first><last>Swayamdipta</last></author>
       <author><first>Omer</first><last>Levy</last></author>
       <author><first>Roy</first><last>Schwartz</last></author>
-      <author><first>Samuel</first><last>Bowman</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="samuel-bowman"><first>Samuel</first><last>Bowman</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>107–112</pages>
       <abstract>Large-scale datasets for natural language inference are created by presenting crowd workers with a sentence (premise), and asking them to generate three new sentences (hypotheses) that it entails, contradicts, or is logically neutral with respect to. We show that, in a significant portion of such data, this protocol leaves clues that make it possible to identify the label by looking only at the hypothesis, without observing the premise. Specifically, we show that a simple text categorization model can correctly classify the hypothesis alone in about 67% of SNLI (Bowman et. al, 2015) and 53% of MultiNLI (Williams et. al, 2017). Our analysis reveals that specific linguistic phenomena such as negation and vagueness are highly correlated with certain inference classes. Our findings suggest that the success of natural language inference models to date has been overestimated, and that the task remains a hard open problem.</abstract>
       <url hash="6cfcfb08">N18-2017</url>
@@ -2781,7 +2781,7 @@
     <paper id="18">
       <title>Humor Recognition Using Deep Learning</title>
       <author><first>Peng-Yu</first><last>Chen</last></author>
-      <author><first>Von-Wun</first><last>Soo</last></author>
+      <author id="von-wun-soo"><first>Von-Wun</first><last>Soo</last></author>
       <pages>113–117</pages>
       <abstract>Humor is an essential but most fascinating element in personal communication. How to build computational models to discover the structures of humor, recognize humor and even generate humor remains a challenge and there have been yet few attempts on it. In this paper, we construct and collect four datasets with distinct joke types in both English and Chinese and conduct learning experiments on humor recognition. We implement a Convolutional Neural Network (CNN) with extensive filter size, number and Highway Networks to increase the depth of networks. Results show that our model outperforms in recognition of different types of humor with benchmarks collected in both English and Chinese languages on accuracy, precision, and recall in comparison to previous works.</abstract>
       <url hash="7fc68d18">N18-2018</url>
@@ -2848,7 +2848,7 @@
     <paper id="24">
       <title>Analogies in Complex Verb Meaning Shifts: the Effect of Affect in Semantic Similarity Models</title>
       <author><first>Maximilian</first><last>Köper</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>150–156</pages>
       <abstract>We present a computational model to detect and distinguish analogies in meaning shifts between German base and complex verbs. In contrast to corpus-based studies, a novel dataset demonstrates that “regular” shifts represent the smallest class. Classification experiments relying on a standard similarity model successfully distinguish between four types of shifts, with verb classes boosting the performance, and affective features for abstractness, emotion and sentiment representing the most salient indicators.</abstract>
       <url hash="8310cf4f">N18-2024</url>
@@ -2879,7 +2879,7 @@
     <paper id="27">
       <title>Diachronic Usage Relatedness (<fixed-case>DUR</fixed-case>el): A Framework for the Annotation of Lexical Semantic Change</title>
       <author><first>Dominik</first><last>Schlechtweg</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <author><first>Stefanie</first><last>Eckmann</last></author>
       <pages>169–174</pages>
       <abstract>We propose a framework that extends synchronic polysemy annotation to diachronic changes in lexical meaning, to counteract the lack of resources for evaluating computational models of lexical semantic change. Our framework exploits an intuitive notion of semantic relatedness, and distinguishes between innovative and reductive meaning changes with high inter-annotator agreement. The resulting test set for German comprises ratings from five annotators for the relatedness of 1,320 use pairs across 22 target words.</abstract>
@@ -2918,7 +2918,7 @@
       <author><first>Fabienne</first><last>Braune</last></author>
       <author><first>Viktor</first><last>Hangya</last></author>
       <author><first>Tobias</first><last>Eder</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>188–193</pages>
       <abstract>Bilingual word embeddings are useful for bilingual lexicon induction, the task of mining translations of given words. Many studies have shown that bilingual word embeddings perform well for bilingual lexicon induction but they focused on frequent words in general domains. For many applications, bilingual lexicon induction of rare and domain-specific words is of critical importance. Therefore, we design a new task to evaluate bilingual word embeddings on rare words in different domains. We show that state-of-the-art approaches fail on this task and present simple new techniques to improve bilingual word embeddings for mining rare words. We release new gold standard datasets and code to stimulate research on this task.</abstract>
       <url hash="f53d2d5e">N18-2030</url>
@@ -2938,7 +2938,7 @@
     <paper id="32">
       <title>Introducing Two <fixed-case>V</fixed-case>ietnamese Datasets for Evaluating Semantic Models of (Dis-)Similarity and Relatedness</title>
       <author><first>Kim Anh</first><last>Nguyen</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <author><first>Ngoc Thang</first><last>Vu</last></author>
       <pages>199–205</pages>
       <abstract>We present two novel datasets for the low-resource language Vietnamese to assess models of semantic similarity: ViCon comprises pairs of synonyms and antonyms across word classes, thus offering data to distinguish between similarity and dissimilarity. ViSim-400 provides degrees of similarity across five semantic relations, as rated by human judges. The two datasets are verified through standard co-occurrence and neural network models, showing results comparable to the respective English datasets.</abstract>
@@ -2949,7 +2949,7 @@
     <paper id="33">
       <title>Lexical Substitution for Evaluating Compositional Distributional Models</title>
       <author><first>Maja</first><last>Buljan</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <author><first>Jan</first><last>Šnajder</last></author>
       <pages>206–211</pages>
       <abstract>Compositional Distributional Semantic Models (CDSMs) model the meaning of phrases and sentences in vector space. They have been predominantly evaluated on limited, artificial tasks such as semantic sentence similarity on hand-constructed datasets. This paper argues for lexical substitution (LexSub) as a means to evaluate CDSMs. LexSub is a more natural task, enables us to evaluate meaning composition at the level of individual words, and provides a common ground to compare CDSMs with dedicated LexSub models. We create a LexSub dataset for CDSM evaluation from a corpus with manual “all-words” LexSub annotation. Our experiments indicate that the Practical Lexical Function CDSM outperforms simple component-wise CDSMs and performs on par with the context2vec LexSub model using the same context.</abstract>
@@ -3006,7 +3006,7 @@
       <title>Sluice Resolution without Hand-Crafted Features over Brittle Syntax Trees</title>
       <author><first>Ola</first><last>Rønning</last></author>
       <author><first>Daniel</first><last>Hardt</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>236–241</pages>
       <abstract>Sluice resolution in English is the problem of finding antecedents of <i>wh</i>-fronted ellipses. Previous work has relied on hand-crafted features over syntax trees that scale poorly to other languages and domains; in particular, to dialogue, which is one of the most interesting applications of sluice resolution. Syntactic information is arguably important for sluice resolution, but we show that multi-task learning with partial parsing as auxiliary tasks effectively closes the gap and buys us an additional 9% error reduction over previous work. Since we are not directly relying on features from partial parsers, our system is more robust to domain shifts, giving a 26% error reduction on embedded sluices in dialogue.</abstract>
       <attachment type="dataset" hash="5a09e6fe">N18-2038.Datasets.zip</attachment>
@@ -3049,8 +3049,8 @@
       <title>Letting Emotions Flow: Success Prediction by Modeling the Flow of Emotions in Books</title>
       <author><first>Suraj</first><last>Maharjan</last></author>
       <author><first>Sudipta</first><last>Kar</last></author>
-      <author><first>Manuel</first><last>Montes</last></author>
-      <author><first>Fabio A.</first><last>González</last></author>
+      <author id="manuel-montes"><first>Manuel</first><last>Montes</last></author>
+      <author id="fabio-a-gonzalez"><first>Fabio A.</first><last>González</last></author>
       <author><first>Thamar</first><last>Solorio</last></author>
       <pages>259–265</pages>
       <abstract>Books have the power to make us feel happiness, sadness, pain, surprise, or sorrow. An author’s dexterity in the use of these emotions captivates readers and makes it difficult for them to put the book down. In this paper, we model the flow of emotions over a book using recurrent neural networks and quantify its usefulness in predicting success in books. We obtained the best weighted F1-score of 69% for predicting books’ success in a multitask setting (simultaneously predicting success and genre of books).</abstract>
@@ -3077,7 +3077,7 @@
       <author><first>Shweta</first><last>Yadav</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
       <author><first>Sriparna</first><last>Saha</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <author><first>Amit</first><last>Sheth</last></author>
       <pages>271–277</pages>
       <abstract>In recent past, social media has emerged as an active platform in the context of healthcare and medicine. In this paper, we present a study where medical user’s opinions on health-related issues are analyzed to capture the medical sentiment at a blog level. The medical sentiments can be studied in various facets such as medical condition, treatment, and medication that characterize the overall health status of the user. Considering these facets, we treat analysis of this information as a multi-task classification problem. In this paper, we adopt a novel adversarial learning approach for our multi-task learning framework to learn the sentiment’s strengths expressed in a medical blog. Our evaluation shows promising results for our target tasks.</abstract>
@@ -3088,8 +3088,8 @@
     <paper id="45">
       <title>Recurrent Entity Networks with Delayed Memory Update for Targeted Aspect-Based Sentiment Analysis</title>
       <author id="fei-liu-unimelb"><first>Fei</first><last>Liu</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>278–283</pages>
       <abstract>While neural networks have been shown to achieve impressive results for sentence-level sentiment analysis, targeted aspect-based sentiment analysis (TABSA) — extraction of fine-grained opinion polarity w.r.t. a pre-defined set of aspects — remains a difficult task. Motivated by recent advances in memory-augmented models for machine reading, we propose a novel architecture, utilising external “memory chains” with a delayed memory update mechanism to track entities. On a TABSA task, the proposed model demonstrates substantial improvements over state-of-the-art approaches, including those using external knowledge bases.</abstract>
       <url hash="aeeb6733">N18-2045</url>
@@ -3159,7 +3159,7 @@
       <author><first>Vassilis</first><last>Plachouras</last></author>
       <author><first>Fabio</first><last>Petroni</last></author>
       <author><first>Timothy</first><last>Nugent</last></author>
-      <author><first>Jochen L.</first><last>Leidner</last></author>
+      <author id="jochen-l-leidner"><first>Jochen L.</first><last>Leidner</last></author>
       <pages>315–320</pages>
       <abstract>Taxonomies are often used to look up the concepts they contain in text documents (for instance, to classify a document). The more comprehensive the taxonomy, the higher recall the application has that uses the taxonomy. In this paper, we explore automatic taxonomy augmentation with paraphrases. We compare two state-of-the-art paraphrase models based on Moses, a statistical Machine Translation system, and a sequence-to-sequence neural network, trained on a paraphrase datasets with respect to their abilities to add novel nodes to an existing taxonomy from the risk domain. We conduct component-based and task-based evaluations. Our results show that paraphrasing is a viable method to enrich a taxonomy with more terms, and that Moses consistently outperforms the sequence-to-sequence neural model. To the best of our knowledge, this is the first approach to augment taxonomies with paraphrases.</abstract>
       <url hash="3f510cc6">N18-2051</url>
@@ -3169,7 +3169,7 @@
     <paper id="52">
       <title>A Laypeople Study on Terminology Identification across Domains and Task Definitions</title>
       <author><first>Anna</first><last>Hätty</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>321–326</pages>
       <abstract>This paper introduces a new dataset of term annotation. Given that even experts vary significantly in their understanding of termhood, and that term identification is mostly performed as a binary task, we offer a novel perspective to explore the common, natural understanding of what constitutes a term: Laypeople annotate single-word and multi-word terms, across four domains and across four task definitions. Analyses based on inter-annotator agreement offer insights into differences in term specificity, term granularity and subtermhood.</abstract>
       <url hash="a47b9523">N18-2052</url>
@@ -3190,8 +3190,8 @@
     </paper>
     <paper id="54">
       <title>Cross-language Article Linking Using Cross-Encyclopedia Entity Embedding</title>
-      <author><first>Chun-Kai</first><last>Wu</last></author>
-      <author><first>Richard Tzong-Han</first><last>Tsai</last></author>
+      <author id="chun-kai-wu"><first>Chun-Kai</first><last>Wu</last></author>
+      <author id="richard-tzong-han-tsai"><first>Richard Tzong-Han</first><last>Tsai</last></author>
       <pages>334–339</pages>
       <abstract>Cross-language article linking (CLAL) is the task of finding corresponding article pairs of different languages across encyclopedias. This task is a difficult disambiguation problem in which one article must be selected among several candidate articles with similar titles and contents. Existing works focus on engineering text-based or link-based features for this task, which is a time-consuming job, and some of these features are only applicable within the same encyclopedia. In this paper, we address these problems by proposing cross-encyclopedia entity embedding. Unlike other works, our proposed method does not rely on known cross-language pairs. We apply our method to CLAL between English Wikipedia and Chinese Baidu Baike. Our features improve performance relative to the baseline by 29.62%. Tested 30 times, our system achieved an average improvement of 2.76% over the current best system (26.86% over baseline), a statistically significant result.</abstract>
       <url hash="94a66088">N18-2054</url>
@@ -3200,7 +3200,7 @@
     </paper>
     <paper id="55">
       <title>Identifying the Most Dominant Event in a News Article by Mining Event Coreference Relations</title>
-      <author><first>Prafulla Kumar</first><last>Choubey</last></author>
+      <author id="prafulla-kumar-choubey"><first>Prafulla Kumar</first><last>Choubey</last></author>
       <author><first>Kaushik</first><last>Raju</last></author>
       <author><first>Ruihong</first><last>Huang</last></author>
       <pages>340–345</pages>
@@ -3235,7 +3235,7 @@
       <title>Semi-Supervised Event Extraction with Paraphrase Clusters</title>
       <author><first>James</first><last>Ferguson</last></author>
       <author><first>Colin</first><last>Lockard</last></author>
-      <author><first>Daniel</first><last>Weld</last></author>
+      <author id="daniel-s-weld"><first>Daniel</first><last>Weld</last></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last></author>
       <pages>359–364</pages>
       <abstract>Supervised event extraction systems are limited in their accuracy due to the lack of available training data. We present a method for self-training event extraction systems by bootstrapping additional training data. This is done by taking advantage of the occurrence of multiple mentions of the same event instances across newswire articles from multiple sources. If our system can make a high-confidence extraction of some mentions in such a cluster, it can then acquire diverse training examples by adding the other mentions as well. Our experiments show significant performance improvements on multiple event extractors over ACE 2005 and TAC-KBP 2015 datasets.</abstract>
@@ -3260,9 +3260,9 @@
       <title>Syntactic Patterns Improve Information Extraction for Medical Search</title>
       <author><first>Roma</first><last>Patel</last></author>
       <author><first>Yinfei</first><last>Yang</last></author>
-      <author><first>Iain</first><last>Marshall</last></author>
+      <author id="iain-marshall"><first>Iain</first><last>Marshall</last></author>
       <author><first>Ani</first><last>Nenkova</last></author>
-      <author><first>Byron</first><last>Wallace</last></author>
+      <author id="byron-c-wallace"><first>Byron</first><last>Wallace</last></author>
       <pages>371–377</pages>
       <abstract>Medical professionals search the published literature by specifying the type of patients, the medical intervention(s) and the outcome measure(s) of interest. In this paper we demonstrate how features encoding syntactic patterns improve the performance of state-of-the-art sequence tagging models (both neural and linear) for information extraction of these medically relevant categories. We present an analysis of the type of patterns exploited and of the semantic space induced for these, i.e., the distributed representations learned for identified multi-token patterns. We show that these learned representations differ substantially from those of the constituent unigrams, suggesting that the patterns capture contextual information that is otherwise lost.</abstract>
       <url hash="d4cc6dbe">N18-2060</url>
@@ -3271,7 +3271,7 @@
     </paper>
     <paper id="61">
       <title>Syntactically Aware Neural Architectures for Definition Extraction</title>
-      <author><first>Luis</first><last>Espinosa-Anke</last></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa-Anke</last></author>
       <author><first>Steven</first><last>Schockaert</last></author>
       <pages>378–385</pages>
       <abstract>Automatically identifying definitional knowledge in text corpora (Definition Extraction or DE) is an important task with direct applications in, among others, Automatic Glossary Generation, Taxonomy Learning, Question Answering and Semantic Search. It is generally cast as a binary classification problem between definitional and non-definitional sentences. In this paper we present a set of neural architectures combining Convolutional and Recurrent Neural Networks, which are further enriched by incorporating linguistic information via syntactic dependencies. Our experimental results in the task of sentence classification, on two benchmarking DE datasets (one generic, one domain-specific), show that these models obtain consistent state of the art results. Furthermore, we demonstrate that models trained on clean Wikipedia-like definitions can successfully be applied to more noisy domain-specific corpora.</abstract>
@@ -3362,7 +3362,7 @@
     </paper>
     <paper id="69">
       <title>Defoiling Foiled Image Captions</title>
-      <author><first>Pranava Swaroop</first><last>Madhyastha</last></author>
+      <author id="pranava-swaroop-madhyastha"><first>Pranava Swaroop</first><last>Madhyastha</last></author>
       <author><first>Josiah</first><last>Wang</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>433–438</pages>
@@ -3375,7 +3375,7 @@
     <paper id="70">
       <title>Pragmatically Informative Image Captioning with Character-Level Inference</title>
       <author><first>Reuben</first><last>Cohn-Gordon</last></author>
-      <author><first>Noah</first><last>Goodman</last></author>
+      <author id="noah-goodman"><first>Noah</first><last>Goodman</last></author>
       <author><first>Christopher</first><last>Potts</last></author>
       <pages>439–443</pages>
       <abstract>We combine a neural image captioner with a Rational Speech Acts (RSA) model to make a system that is pragmatically informative: its objective is to produce captions that are not merely true but also distinguish their inputs from similar images. Previous attempts to combine RSA with neural image captioning require an inference which normalizes over the entire set of possible utterances. This poses a serious problem of efficiency, previously solved by sampling a small subset of possible utterances. We instead solve this problem by implementing a version of RSA which operates at the level of characters (“a”, “b”, “c”, ...) during the unrolling of the caption. We find that the utterance-level effect of referential captions can be obtained with only character-level decisions. Finally, we introduce an automatic method for testing the performance of pragmatic speaker models, and show that our model outperforms a non-pragmatic baseline as well as a word-level RSA captioner.</abstract>
@@ -3444,8 +3444,8 @@
     <paper id="76">
       <title>What’s in a Domain? Learning Domain-Robust Text Representations using Adversarial Training</title>
       <author><first>Yitong</first><last>Li</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>474–479</pages>
       <abstract>Most real world language problems require learning from heterogenous corpora, raising the problem of learning robust models which generalise well to both similar (<i>in domain</i>)
  and dissimilar (<i>out of domain</i>) instances to those seen in
@@ -3491,7 +3491,7 @@
       <author><first>Fahim</first><last>Dalvi</last></author>
       <author><first>Nadir</first><last>Durrani</last></author>
       <author><first>Hassan</first><last>Sajjad</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>493–499</pages>
       <abstract>We address the problem of simultaneous translation by modifying the Neural MT decoder to operate with dynamically built encoder and attention. We propose a tunable agent which decides the best segmentation strategy for a user-defined BLEU loss and Average Proportion (AP) constraint. Our agent outperforms previously proposed Wait-if-diff and Wait-if-worse agents (Cho and Esipova, 2016) on BLEU with a lower latency. Secondly we proposed data-driven changes to Neural MT training to better match the incremental decoding framework.</abstract>
       <attachment type="note" hash="bedee891">N18-2079.Notes.pdf</attachment>
@@ -3511,7 +3511,7 @@
     <paper id="81">
       <title>Neural Machine Translation Decoding with Terminology Constraints</title>
       <author><first>Eva</first><last>Hasler</last></author>
-      <author><first>Adrià</first><last>de Gispert</last></author>
+      <author id="adria-de-gispert"><first>Adrià</first><last>de Gispert</last></author>
       <author><first>Gonzalo</first><last>Iglesias</last></author>
       <author id="bill-byrne"><first>Bill</first><last>Byrne</last></author>
       <pages>506–512</pages>
@@ -3524,7 +3524,7 @@
       <title>On the Evaluation of Semantic Phenomena in Neural Machine Translation Using Natural Language Inference</title>
       <author><first>Adam</first><last>Poliak</last></author>
       <author><first>Yonatan</first><last>Belinkov</last></author>
-      <author><first>James</first><last>Glass</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
       <author><first>Benjamin</first><last>Van Durme</last></author>
       <pages>513–523</pages>
       <abstract>We propose a process for investigating the extent to which sentence representations arising from neural machine translation (NMT) systems encode distinct semantic phenomena. We use these representations as features to train a natural language inference (NLI) classifier based on datasets recast from existing semantic annotations. In applying this process to a representative NMT system, we find its encoder appears most suited to supporting inferences at the syntax-semantics interface, as compared to anaphora resolution requiring world knowledge. We conclude with a discussion on the merits and potential deficiencies of the existing process, and how it may be improved and extended as a broader framework for evaluating semantic coverage</abstract>
@@ -3560,8 +3560,8 @@
     <paper id="85">
       <title>Are All Languages Equally Hard to Language-Model?</title>
       <author><first>Ryan</first><last>Cotterell</last></author>
-      <author><first>Sabrina J.</first><last>Mielke</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="sabrina-j-mielke"><first>Sabrina J.</first><last>Mielke</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <author><first>Brian</first><last>Roark</last></author>
       <pages>536–541</pages>
       <abstract>For general modeling methods applied to diverse languages, a natural question is: how well should we expect our models to work on languages with differing typological profiles? In this work, we develop an evaluation framework for fair cross-linguistic comparison of language models, using translated text so that all models are asked to predict approximately the same information. We then conduct a study on 21 languages, demonstrating that in some languages, the textual expression of the information is harder to predict with both n-gram and LSTM language models. We show complex inflectional morphology to be a cause of performance differences among languages.</abstract>
@@ -3586,8 +3586,8 @@
       <title>Unsupervised Disambiguation of Syncretism in Inflected Lexicons</title>
       <author><first>Ryan</first><last>Cotterell</last></author>
       <author><first>Christo</first><last>Kirov</last></author>
-      <author><first>Sabrina J.</first><last>Mielke</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="sabrina-j-mielke"><first>Sabrina J.</first><last>Mielke</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>548–553</pages>
       <abstract>Lexical ambiguity makes it difficult to compute useful statistics of a corpus. A given word form might represent any of several morphological feature bundles. One can, however, use unsupervised learning (as in EM) to fit a model that probabilistically disambiguates word forms. We present such an approach, which employs a neural network to smoothly model a prior distribution over feature bundles (even rare ones). Although this basic model does not consider a token’s context, that very property allows it to operate on a simple list of unigram type counts, partitioning each count among different analyses of that unigram. We discuss evaluation metrics for this novel task and report results on 5 languages.</abstract>
       <url hash="3d2aca63">N18-2087</url>
@@ -3613,7 +3613,7 @@
       <author><first>Gabriel</first><last>Stanovsky</last></author>
       <author><first>Luheng</first><last>He</last></author>
       <author><first>Ido</first><last>Dagan</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>560–568</pages>
       <abstract>We introduce Question-Answer Meaning Representations (QAMRs), which represent the predicate-argument structure of a sentence as a set of question-answer pairs. We develop a crowdsourcing scheme to show that QAMRs can be labeled with very little training, and gather a dataset with over 5,000 sentences and 100,000 questions. A qualitative analysis demonstrates that the crowd-generated question-answer pairs cover the vast majority of predicate-argument relationships in existing datasets (including PropBank, NomBank, and QA-SRL) along with many previously under-resourced ones, including implicit arguments and relations. We also report baseline models for question generation and answering, and summarize a recent approach for using QAMR labels to improve an Open IE system. These results suggest the freely available QAMR data and annotation scheme should support significant future work.</abstract>
       <url hash="736e2089">N18-2089</url>
@@ -3660,7 +3660,7 @@
       <author><first>Zifan</first><last>Li</last></author>
       <author><first>Zilin</first><last>Zhang</last></author>
       <author><first>Rui</first><last>Zhang</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <pages>588–594</pages>
       <abstract>Interacting with relational databases through natural language helps users with any background easily query and analyze a vast amount of data. This requires a system that understands users’ questions and converts them to SQL queries automatically. In this paper, we present a novel approach TypeSQL which formats the problem as a slot filling task in a more reasonable way. In addition, TypeSQL utilizes type information to better understand rare entities and numbers in the questions. We experiment this idea on the WikiSQL dataset and outperform the prior art by 6% in much shorter time. We also show that accessing the content of databases can significantly improve the performance when users’ queries are not well-formed. TypeSQL can reach 82.6% accuracy, a 17.5% absolute improvement compared to the previous content-sensitive model.</abstract>
       <url hash="9f4efcdc">N18-2093</url>
@@ -3671,7 +3671,7 @@
       <title>Community Member Retrieval on Social Media Using Textual Information</title>
       <author><first>Aaron</first><last>Jaech</last></author>
       <author><first>Shobhit</first><last>Hathi</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
       <pages>595–601</pages>
       <abstract>This paper addresses the problem of community membership detection using only text features in a scenario where a small number of positive labeled examples defines the community. The solution introduces an unsupervised proxy task for learning user embeddings: user re-identification. Experiments with 16 different communities show that the resulting embeddings are more effective for community membership identification than common unsupervised representations.</abstract>
       <url hash="093dbbe5">N18-2094</url>
@@ -3684,7 +3684,7 @@
       <author><first>Yinfei</first><last>Yang</last></author>
       <author><first>Jun</first><last>Zhou</last></author>
       <author><first>Xiaolong</first><last>Li</last></author>
-      <author><first>Forrest Sheng</first><last>Bao</last></author>
+      <author id="forrest-bao"><first>Forrest Sheng</first><last>Bao</last></author>
       <pages>602–607</pages>
       <abstract>With the growing amount of reviews in e-commerce websites, it is critical to assess the helpfulness of reviews and recommend them accordingly to consumers. Recent studies on review helpfulness require plenty of labeled samples for each domain/category of interests. However, such an approach based on close-world assumption is not always practical, especially for domains with limited reviews or the “out-of-vocabulary” problem. Therefore, we propose a convolutional neural network (CNN) based model which leverages both word-level and character-based representations. To transfer knowledge between domains, we further extend our model to jointly model different domains with auxiliary domain discriminators. On the Amazon product review dataset, our approach significantly outperforms the state of the art in terms of both accuracy and cross-domain robustness.</abstract>
       <url hash="58299a62">N18-2095</url>
@@ -3707,7 +3707,7 @@
       <author><first>Arman</first><last>Cohan</last></author>
       <author><first>Franck</first><last>Dernoncourt</last></author>
       <author><first>Doo Soon</first><last>Kim</last></author>
-      <author><first>Trung</first><last>Bui</last></author>
+      <author id="trung-bui"><first>Trung</first><last>Bui</last></author>
       <author><first>Seokhwan</first><last>Kim</last></author>
       <author><first>Walter</first><last>Chang</last></author>
       <author><first>Nazli</first><last>Goharian</last></author>
@@ -3723,7 +3723,7 @@
       <author><first>Anirban</first><last>Laha</last></author>
       <author><first>Karthik</first><last>Sankaranarayanan</last></author>
       <author><first>Preksha</first><last>Nema</last></author>
-      <author><first>Mitesh M.</first><last>Khapra</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh M.</first><last>Khapra</last></author>
       <author><first>Shreyas</first><last>Shetty</last></author>
       <pages>622–627</pages>
       <abstract>Structured data summarization involves generation of natural language summaries from structured input data. In this work, we consider summarizing structured data occurring in the form of tables as they are prevalent across a wide variety of domains. We formulate the standard table summarization problem, which deals with tables conforming to a single predefined schema. To this end, we propose a mixed hierarchical attention based encoder-decoder model which is able to leverage the structure in addition to the content of the tables. Our experiments on the publicly available weathergov dataset show around 18 BLEU (around 30%) improvement over the current state-of-the-art.</abstract>
@@ -3736,7 +3736,7 @@
       <author><first>Youxuan</first><last>Jiang</last></author>
       <author><first>Catherine</first><last>Finegan-Dollak</last></author>
       <author><first>Jonathan K.</first><last>Kummerfeld</last></author>
-      <author><first>Walter</first><last>Lasecki</last></author>
+      <author id="walter-lasecki"><first>Walter</first><last>Lasecki</last></author>
       <pages>628–633</pages>
       <abstract>Most summarization research focuses on summarizing the entire given text, but in practice readers are often interested in only one aspect of the document or conversation. We propose targeted summarization as an umbrella category for summarization tasks that intentionally consider only parts of the input data. This covers query-based summarization, update summarization, and a new task we propose where the goal is to summarize a particular aspect of a document. However, collecting data for this new task is hard because directly asking annotators (e.g., crowd workers) to write summaries leads to data with low accuracy when there are a large number of facts to include. We introduce a novel crowdsourcing workflow, Pin-Refine, that allows us to collect high-quality summaries for our task, a necessary step for the development of automatic systems.</abstract>
       <attachment type="dataset" hash="a2cdf112">N18-2099.Datasets.zip</attachment>
@@ -3748,7 +3748,7 @@
       <title><fixed-case>K</fixed-case>ey2<fixed-case>V</fixed-case>ec: Automatic Ranked Keyphrase Extraction from Scientific Articles using Phrase Embeddings</title>
       <author><first>Debanjan</first><last>Mahata</last></author>
       <author><first>John</first><last>Kuriakose</last></author>
-      <author><first>Rajiv Ratn</first><last>Shah</last></author>
+      <author id="rajiv-shah"><first>Rajiv Ratn</first><last>Shah</last></author>
       <author><first>Roger</first><last>Zimmermann</last></author>
       <pages>634–639</pages>
       <abstract>Keyphrase extraction is a fundamental task in natural language processing that facilitates mapping of documents to a set of representative phrases. In this paper, we present an unsupervised technique (Key2Vec) that leverages phrase embeddings for ranking keyphrases extracted from scientific articles. Specifically, we propose an effective way of processing text documents for training multi-word phrase embeddings that are used for thematic representation of scientific articles and ranking of keyphrases extracted from them using theme-weighted PageRank. Evaluations are performed on benchmark datasets producing state-of-the-art results.</abstract>
@@ -3763,7 +3763,7 @@
       <author><first>Hady</first><last>Elsahar</last></author>
       <author><first>Pavlos</first><last>Vougiouklis</last></author>
       <author><first>Christophe</first><last>Gravier</last></author>
-      <author><first>Frédérique</first><last>Laforest</last></author>
+      <author id="frederique-laforest"><first>Frédérique</first><last>Laforest</last></author>
       <author><first>Jonathon</first><last>Hare</last></author>
       <author><first>Elena</first><last>Simperl</last></author>
       <pages>640–645</pages>
@@ -3841,7 +3841,7 @@
       <title>Higher-Order Coreference Resolution with Coarse-to-Fine Inference</title>
       <author><first>Kenton</first><last>Lee</last></author>
       <author><first>Luheng</first><last>He</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>687–692</pages>
       <abstract>We introduce a fully-differentiable approximation to higher-order inference for coreference resolution. Our approach uses the antecedent distribution from a span-ranking architecture as an attention mechanism to iteratively refine span representations. This enables the model to softly consider multiple hops in the predicted clusters. To alleviate the computational cost of this iterative process, we introduce a coarse-to-fine approach that incorporates a less accurate but more efficient bilinear factor, enabling more aggressive pruning without hurting accuracy. Compared to the existing state-of-the-art span-ranking approach, our model significantly improves accuracy on the English OntoNotes benchmark, while being far more computationally efficient.</abstract>
       <url hash="2ebc6900">N18-2108</url>
@@ -3887,9 +3887,9 @@
       <author><first>Paweł</first><last>Budzianowski</last></author>
       <author><first>Pei-Hao</first><last>Su</last></author>
       <author><first>Stefan</first><last>Ultes</last></author>
-      <author><first>Lina M.</first><last>Rojas-Barahona</last></author>
+      <author id="lina-m-rojas-barahona"><first>Lina M.</first><last>Rojas-Barahona</last></author>
       <author><first>Bo-Hsiang</first><last>Tseng</last></author>
-      <author><first>Milica</first><last>Gašić</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gašić</last></author>
       <pages>714–719</pages>
       <abstract>Reinforcement learning (RL) is a promising approach to solve dialogue policy optimisation. Traditional RL algorithms, however, fail to scale to large domains due to the curse of dimensionality. We propose a novel Dialogue Management architecture, based on Feudal RL, which decomposes the decision into two steps; a first step where a master policy selects a subset of primitive actions, and a second step where a primitive action is chosen from the selected subset. The structural information included in the domain ontology is used to abstract the dialogue state space, taking the decisions at each step using different parts of the abstracted state. This, combined with an information sharing mechanism between slots, increases the scalability to large domains. We show that an implementation of this approach, based on Deep-Q Networks, significantly outperforms previous state of the art in several dialogue domains and environments, without the need of any additional reward signal.</abstract>
       <url hash="4df78d86">N18-2112</url>
@@ -3899,7 +3899,7 @@
     <paper id="113">
       <title>Evaluating Historical Text Normalization Systems: How Well Do They Generalize?</title>
       <author><first>Alexander</first><last>Robertson</last></author>
-      <author><first>Sharon</first><last>Goldwater</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
       <pages>720–725</pages>
       <abstract>We highlight several issues in the evaluation of historical text normalization systems that make it hard to tell how well these systems would actually work in practice—i.e., for new datasets or languages; in comparison to more naïve systems; or as a preprocessing step for downstream NLP tools. We illustrate these issues and exemplify our proposed evaluation practices by comparing two neural models against a naïve baseline system. We show that the neural models generalize well to unseen words in tests on five languages; nevertheless, they provide no clear benefit over the naïve baseline for downstream POS tagging of an English historical collection. We conclude that future work should include more rigorous evaluation, including both intrinsic and extrinsic measures where possible.</abstract>
       <url hash="3b703d81">N18-2113</url>
@@ -3922,7 +3922,7 @@
       <author><first>Po-Sen</first><last>Huang</last></author>
       <author><first>Chenglong</first><last>Wang</last></author>
       <author><first>Rishabh</first><last>Singh</last></author>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <author><first>Xiaodong</first><last>He</last></author>
       <pages>732–738</pages>
       <abstract>In conventional supervised training, a model is trained to fit all the training examples. However, having a monolithic model may not always be the best strategy, as examples could vary widely. In this work, we explore a different learning protocol that treats each example as a unique pseudo-task, by reducing the original learning problem to a few-shot meta-learning scenario with the help of a domain-dependent relevance function. When evaluated on the WikiSQL dataset, our approach leads to faster convergence and achieves 1.1%–5.4% absolute accuracy gains over the non-meta-learning counterparts.</abstract>
@@ -3945,7 +3945,7 @@
       <title>Role-specific Language Models for Processing Recorded Neuropsychological Exams</title>
       <author><first>Tuka</first><last>Al Hanai</last></author>
       <author><first>Rhoda</first><last>Au</last></author>
-      <author><first>James</first><last>Glass</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
       <pages>746–752</pages>
       <abstract>Neuropsychological examinations are an important screening tool for the presence of cognitive conditions (e.g. Alzheimer’s, Parkinson’s Disease), and require a trained tester to conduct the exam through spoken interactions with the subject. While audio is relatively easy to record, it remains a challenge to automatically diarize (who spoke when?), decode (what did they say?), and assess a subject’s cognitive health. This paper demonstrates a method to determine the cognitive health (impaired or not) of 92 subjects, from audio that was diarized using an automatic speech recognition system trained on TED talks and on the structured language used by testers and subjects. Using leave-one-out cross validation and logistic regression modeling we show that even with noisily decoded data (81% WER) we can still perform accurate enough diarization (0.02% confusion rate) to determine the cognitive state of a subject (0.76 AUC).</abstract>
       <url hash="acf68ccf">N18-2117</url>
@@ -3979,7 +3979,7 @@
     </paper>
     <paper id="120">
       <title>Learning to Color from Language</title>
-      <author><first>Varun</first><last>Manjunatha</last></author>
+      <author id="varun-manjunatha"><first>Varun</first><last>Manjunatha</last></author>
       <author><first>Mohit</first><last>Iyyer</last></author>
       <author><first>Jordan</first><last>Boyd-Graber</last></author>
       <author><first>Larry</first><last>Davis</last></author>
@@ -4050,8 +4050,8 @@
     <meta>
       <booktitle>Proceedings of the 2018 Conference of the North <fixed-case>A</fixed-case>merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 3 (Industry Papers)</booktitle>
       <url hash="a1b90050">N18-3</url>
-      <editor><first>Srinivas</first><last>Bangalore</last></editor>
-      <editor><first>Jennifer</first><last>Chu-Carroll</last></editor>
+      <editor id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></editor>
+      <editor id="jennifer-chu-carroll"><first>Jennifer</first><last>Chu-Carroll</last></editor>
       <editor><first>Yunyao</first><last>Li</last></editor>
       <doi>10.18653/v1/N18-3</doi>
       <publisher>Association for Computational Linguistics</publisher>
@@ -4068,7 +4068,7 @@
       <title>Scalable Wide and Deep Learning for Computer Assisted Coding</title>
       <author><first>Marilisa</first><last>Amoia</last></author>
       <author><first>Frank</first><last>Diehl</last></author>
-      <author><first>Jesus</first><last>Gimenez</last></author>
+      <author id="jesus-gimenez"><first>Jesus</first><last>Gimenez</last></author>
       <author><first>Joel</first><last>Pinto</last></author>
       <author><first>Raphael</first><last>Schumann</last></author>
       <author><first>Fabian</first><last>Stemmer</last></author>
@@ -4084,8 +4084,8 @@
     <paper id="2">
       <title>Neural Network based Extreme Classification and Similarity Models for Product Matching</title>
       <author><first>Kashif</first><last>Shah</last></author>
-      <author><first>Selcuk</first><last>Kopru</last></author>
-      <author><first>Jean-David</first><last>Ruvini</last></author>
+      <author id="selcuk-kopru"><first>Selcuk</first><last>Kopru</last></author>
+      <author id="jean-david-ruvini"><first>Jean-David</first><last>Ruvini</last></author>
       <pages>8–15</pages>
       <abstract>Matching a seller listed item to an appropriate product has become a fundamental and one of the most significant step for e-commerce platforms for product based experience. It has a huge impact on making the search effective, search engine optimization, providing product reviews and product price estimation etc. along with many other advantages for a better user experience. As significant and vital it has become, the challenge to tackle the complexity has become huge with the exponential growth of individual and business sellers trading millions of products everyday. We explored two approaches; classification based on shallow neural network and similarity based on deep siamese network. These models outperform the baseline by more than 5% in term of accuracy and are capable of extremely efficient training and inference.</abstract>
       <url hash="69d7ab91">N18-3002</url>
@@ -4098,7 +4098,7 @@
       <author><first>Young-Bum</first><last>Kim</last></author>
       <author><first>Dongchan</first><last>Kim</last></author>
       <author><first>Joo-Kyung</first><last>Kim</last></author>
-      <author><first>Ruhi</first><last>Sarikaya</last></author>
+      <author id="ruhi-sarikaya"><first>Ruhi</first><last>Sarikaya</last></author>
       <pages>16–24</pages>
       <abstract>Intelligent personal digital assistants (IPDAs), a popular real-life application with spoken language understanding capabilities, can cover potentially thousands of overlapping domains for natural language understanding, and the task of finding the best domain to handle an utterance becomes a challenging problem on a large scale. In this paper, we propose a set of efficient and scalable shortlisting-reranking neural models for effective large-scale domain classification for IPDAs. The shortlisting stage focuses on efficiently trimming all domains down to a list of k-best candidate domains, and the reranking stage performs a list-wise reranking of the initial k-best domains with additional contextual information. We show the effectiveness of our approach with extensive experiments on 1,500 IPDA domains.</abstract>
       <url hash="2f6b398e">N18-3003</url>
@@ -4135,9 +4135,9 @@
     <paper id="6">
       <title>Bootstrapping a Neural Conversational Agent with Dialogue Self-Play, Crowdsourcing and On-Line Reinforcement Learning</title>
       <author><first>Pararth</first><last>Shah</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tür</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tür</last></author>
       <author><first>Bing</first><last>Liu</last></author>
-      <author><first>Gokhan</first><last>Tür</last></author>
+      <author id="gokhan-tur"><first>Gokhan</first><last>Tür</last></author>
       <pages>41–51</pages>
       <abstract>End-to-end neural models show great promise towards building conversational agents that are trained from data and on-line experience using supervised and reinforcement learning. However, these models require a large corpus of dialogues to learn effectively. For goal-oriented dialogues, such datasets are expensive to collect and annotate, since each task involves a separate schema and database of entities. Further, the Wizard-of-Oz approach commonly used for dialogue collection does not provide sufficient coverage of salient dialogue flows, which is critical for guaranteeing an acceptable task completion rate in consumer-facing conversational agents. In this paper, we study a recently proposed approach for building an agent for arbitrary tasks by combining dialogue self-play and crowd-sourcing to generate fully-annotated dialogues with diverse and natural utterances. We discuss the advantages of this approach for industry applications of conversational agents, wherein an agent can be rapidly bootstrapped to deploy in front of users and further optimized via interactive learning from actual users of the system.</abstract>
       <url hash="6c9e2991">N18-3006</url>
@@ -4148,7 +4148,7 @@
     <paper id="7">
       <title>Quality Estimation for Automatically Generated Titles of e<fixed-case>C</fixed-case>ommerce Browse Pages</title>
       <author><first>Nicola</first><last>Ueffing</last></author>
-      <author><first>José G.</first><last>C. de Souza</last></author>
+      <author id="jose-g-c-de-souza"><first>José G.</first><last>C. de Souza</last></author>
       <author><first>Gregor</first><last>Leusch</last></author>
       <pages>52–59</pages>
       <abstract>At eBay, we are automatically generating a large amount of natural language titles for eCommerce browse pages using machine translation (MT) technology. While automatic approaches can generate millions of titles very fast, they are prone to errors. We therefore develop quality estimation (QE) methods which can automatically detect titles with low quality in order to prevent them from going live. In this paper, we present different approaches: The first one is a Random Forest (RF) model that explores hand-crafted, robust features, which are a mix of established features commonly used in Machine Translation Quality Estimation (MTQE) and new features developed specifically for our task. The second model is based on Siamese Networks (SNs) which embed the metadata input sequence and the generated title in the same space and do not require hand-crafted features at all. We thoroughly evaluate and compare those approaches on in-house data. While the RF models are competitive for scenarios with smaller amounts of training data and somewhat more robust, they are clearly outperformed by the SN models when the amount of training data is larger.</abstract>
@@ -4158,7 +4158,7 @@
     </paper>
     <paper id="8">
       <title>Atypical Inputs in Educational Applications</title>
-      <author><first>Su-Youn</first><last>Yoon</last></author>
+      <author id="su-youn-yoon"><first>Su-Youn</first><last>Yoon</last></author>
       <author><first>Aoife</first><last>Cahill</last></author>
       <author><first>Anastassia</first><last>Loukina</last></author>
       <author><first>Klaus</first><last>Zechner</last></author>
@@ -4188,7 +4188,7 @@
       <author><first>Laura</first><last>Chiticariu</last></author>
       <author><first>Marina</first><last>Danilevsky</last></author>
       <author><first>Yunyao</first><last>Li</last></author>
-      <author><first>Frederick</first><last>Reiss</last></author>
+      <author id="frederick-reiss"><first>Frederick</first><last>Reiss</last></author>
       <author><first>Huaiyu</first><last>Zhu</last></author>
       <pages>76–83</pages>
       <abstract>The rise of enterprise applications over unstructured and semi-structured documents poses new challenges to text understanding systems across multiple dimensions. We present SystemT, a declarative text understanding system that addresses these challenges and has been deployed in a wide range of enterprise applications. We highlight the design considerations and decisions behind SystemT in addressing the needs of the enterprise setting. We also summarize the impact of SystemT on business and education.</abstract>
@@ -4213,12 +4213,12 @@
       <author><first>Kyle</first><last>Lo</last></author>
       <author><first>Tyler</first><last>Murray</last></author>
       <author><first>Hsu-Han</first><last>Ooi</last></author>
-      <author><first>Matthew</first><last>Peters</last></author>
+      <author id="matthew-e-peters"><first>Matthew</first><last>Peters</last></author>
       <author><first>Joanna</first><last>Power</last></author>
       <author><first>Sam</first><last>Skjonsberg</last></author>
-      <author><first>Lucy Lu</first><last>Wang</last></author>
+      <author id="lucy-lu-wang"><first>Lucy Lu</first><last>Wang</last></author>
       <author><first>Chris</first><last>Wilhelm</last></author>
-      <author id="zheng-yuan"><first>Zheng</first><last>Yuan</last></author>
+      <author><first>Zheng</first><last>Yuan</last></author>
       <author><first>Madeleine</first><last>van Zuylen</last></author>
       <author><first>Oren</first><last>Etzioni</last></author>
       <pages>84–91</pages>
@@ -4244,7 +4244,7 @@
       <title>Accelerating <fixed-case>NMT</fixed-case> Batched Beam Decoding with <fixed-case>LMBR</fixed-case> Posteriors for Deployment</title>
       <author><first>Gonzalo</first><last>Iglesias</last></author>
       <author><first>William</first><last>Tambellini</last></author>
-      <author><first>Adrià</first><last>De Gispert</last></author>
+      <author id="adria-de-gispert"><first>Adrià</first><last>De Gispert</last></author>
       <author><first>Eva</first><last>Hasler</last></author>
       <author id="bill-byrne"><first>Bill</first><last>Byrne</last></author>
       <pages>106–113</pages>
@@ -4267,7 +4267,7 @@
     </paper>
     <paper id="15">
       <title>From dictations to clinical reports using machine translation</title>
-      <author><first>Gregory</first><last>Finley</last></author>
+      <author id="gregory-finley"><first>Gregory</first><last>Finley</last></author>
       <author><first>Wael</first><last>Salloum</last></author>
       <author><first>Najmeh</first><last>Sadoughi</last></author>
       <author><first>Erik</first><last>Edwards</last></author>
@@ -4275,7 +4275,7 @@
       <author><first>Nico</first><last>Axtmann</last></author>
       <author><first>Michael</first><last>Brenndoerfer</last></author>
       <author><first>Mark</first><last>Miller</last></author>
-      <author><first>David</first><last>Suendermann-Oeft</last></author>
+      <author id="david-suendermann-oeft"><first>David</first><last>Suendermann-Oeft</last></author>
       <pages>121–128</pages>
       <abstract>A typical workflow to document clinical encounters entails dictating a summary, running speech recognition, and post-processing the resulting text into a formatted letter. Post-processing entails a host of transformations including punctuation restoration, truecasing, marking sections and headers, converting dates and numerical expressions, parsing lists, etc. In conventional implementations, most of these tasks are accomplished by individual modules. We introduce a novel holistic approach to post-processing that relies on machine callytranslation. We show how this technique outperforms an alternative conventional system—even learning to correct speech recognition errors during post-processing—while being much simpler to maintain.</abstract>
       <url hash="307979b8">N18-3015</url>
@@ -4298,7 +4298,7 @@
       <title>Selecting Machine-Translated Data for Quick Bootstrapping of a Natural Language Understanding System</title>
       <author><first>Judith</first><last>Gaspers</last></author>
       <author><first>Penny</first><last>Karanasou</last></author>
-      <author><first>Rajen</first><last>Chatterjee</last></author>
+      <author id="rajen-chatterjee"><first>Rajen</first><last>Chatterjee</last></author>
       <pages>137–144</pages>
       <abstract>This paper investigates the use of Machine Translation (MT) to bootstrap a Natural Language Understanding (NLU) system for a new language for the use case of a large-scale voice-controlled device. The goal is to decrease the cost and time needed to get an annotated corpus for the new language, while still having a large enough coverage of user requests. Different methods of filtering MT data in order to keep utterances that improve NLU performance and language-specific post-processing methods are investigated. These methods are tested in a large-scale NLU task with translating around 10 millions training utterances from English to German. The results show a large improvement for using MT data over a grammar-based and over an in-house data collection baseline, while reducing the manual effort greatly. Both filtering and post-processing approaches improve results further.</abstract>
       <url hash="7e31ed3d">N18-3017</url>
@@ -4345,8 +4345,8 @@
     </paper>
     <paper id="21">
       <title>A Novel Approach to Part Name Discovery in Noisy Text</title>
-      <author><first>Nobal Bikram</first><last>Niraula</last></author>
-      <author><first>Daniel</first><last>Whyatt</last></author>
+      <author id="nobal-bikram-niraula"><first>Nobal Bikram</first><last>Niraula</last></author>
+      <author id="daniel-whyatt"><first>Daniel</first><last>Whyatt</last></author>
       <author><first>Anne</first><last>Kao</last></author>
       <pages>170–176</pages>
       <abstract>As a specialized example of information extraction, part name extraction is an area that presents unique challenges. Part names are typically multi-word terms longer than two words. There is little consistency in how terms are described in noisy free text, with variations spawned by typos, ad hoc abbreviations, acronyms, and incomplete names. This makes search and analyses of parts in these data extremely challenging. In this paper, we present our algorithm, PANDA (Part Name Discovery Analytics), based on a unique method that exploits statistical, linguistic and machine learning techniques to discover part names in noisy text such as that in manufacturing quality documentation, supply chain management records, service communication logs, and maintenance reports. Experiments show that PANDA is scalable and outperforms existing techniques significantly.</abstract>
@@ -4425,7 +4425,7 @@
       <title>Document-based Recommender System for Job Postings using Dense Representations</title>
       <author><first>Ahmed</first><last>Elsafty</last></author>
       <author><first>Martin</first><last>Riedl</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>216–224</pages>
       <abstract>Job boards and professional social networks heavily use recommender systems in order to better support users in exploring job advertisements. Detecting the similarity between job advertisements is important for job recommendation systems as it allows, for example, the application of item-to-item based recommendations. In this work, we research the usage of dense vector representations to enhance a large-scale job recommendation system and to rank German job advertisements regarding their similarity. We follow a two-folded evaluation scheme: (1) we exploit historic user interactions to automatically create a dataset of similar jobs that enables an offline evaluation. (2) In addition, we conduct an online A/B test and evaluate the best performing method on our platform reaching more than 1 million users. We achieve the best results by combining job titles with full-text job descriptions. In particular, this method builds dense document representation using words of the titles to weigh the importance of words of the full-text description. In the online evaluation, this approach allows us to increase the click-through rate on job recommendations for active users by 8.0%.</abstract>
       <url hash="64cc1231">N18-3027</url>
@@ -4437,7 +4437,7 @@
     <meta>
       <booktitle>Proceedings of the 2018 Conference of the North <fixed-case>A</fixed-case>merican Chapter of the Association for Computational Linguistics: Student Research Workshop</booktitle>
       <url hash="afb35b52">N18-4</url>
-      <editor><first>Silvio Ricardo</first><last>Cordeiro</last></editor>
+      <editor id="silvio-cordeiro"><first>Silvio Ricardo</first><last>Cordeiro</last></editor>
       <editor><first>Shereen</first><last>Oraby</last></editor>
       <editor><first>Umashanthi</first><last>Pavalanathan</last></editor>
       <editor><first>Kyeongmin</first><last>Rim</last></editor>
@@ -4467,7 +4467,7 @@
       <title>Combining Abstractness and Language-specific Theoretical Indicators for Detecting Non-Literal Usage of <fixed-case>E</fixed-case>stonian Particle Verbs</title>
       <author><first>Eleri</first><last>Aedmaa</last></author>
       <author><first>Maximilian</first><last>Köper</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>9–16</pages>
       <abstract>This paper presents two novel datasets and a random-forest classifier to automatically predict literal vs. non-literal language usage for a highly frequent type of multi-word expression in a low-resource language, i.e., Estonian. We demonstrate the value of language-specific indicators induced from theoretical linguistic research, which outperform a high majority baseline when combined with language-independent features of non-literal language (such as abstractness).</abstract>
       <url hash="085ea058">N18-4002</url>
@@ -4488,7 +4488,7 @@
       <author><first>Ali</first><last>Emami</last></author>
       <author><first>Adam</first><last>Trischler</last></author>
       <author><first>Kaheer</first><last>Suleman</last></author>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <pages>25–31</pages>
       <abstract>We introduce an automatic system that performs well on two common-sense reasoning tasks, the Winograd Schema Challenge (WSC) and the Choice of Plausible Alternatives (COPA). Problem instances from these tasks require diverse, complex forms of inference and knowledge to solve. Our method uses a knowledge-hunting module to gather text from the web, which serves as evidence for candidate problem resolutions. Given an input problem, our system generates relevant queries to send to a search engine. It extracts and classifies knowledge from the returned results and weighs it to make a resolution. Our approach improves F1 performance on the WSC by 0.16 over the previous best and is competitive with the state-of-the-art on COPA, demonstrating its general applicability.</abstract>
       <url hash="cb81b1cf">N18-4004</url>
@@ -4518,7 +4518,7 @@
     </paper>
     <paper id="7">
       <title>Learning Word Embeddings for Data Sparse and Sentiment Rich Data Sets</title>
-      <author><first>Prathusha</first><last>Kameswara Sarma</last></author>
+      <author id="prathusha-kameswara-sarma"><first>Prathusha</first><last>Kameswara Sarma</last></author>
       <pages>46–53</pages>
       <abstract>This research proposal describes two algorithms that are aimed at learning word embeddings for data sparse and sentiment rich data sets. The goal is to use word embeddings adapted for domain specific data sets in downstream applications such as sentiment classification. The first approach learns word embeddings in a supervised fashion via SWESA (Supervised Word Embeddings for Sentiment Analysis), an algorithm for sentiment analysis on data sets that are of modest size. SWESA leverages document labels to jointly learn polarity-aware word embeddings and a classifier to classify unseen documents. In the second approach domain adapted (DA) word embeddings are learned by exploiting the specificity of domain specific data sets and the breadth of generic word embeddings. The new embeddings are formed by aligning corresponding word vectors using Canonical Correlation Analysis (CCA) or the related nonlinear Kernel CCA. Experimental results on binary sentiment classification tasks using both approaches for standard data sets are presented.</abstract>
       <url hash="45e49859">N18-4007</url>
@@ -4563,7 +4563,7 @@
       <title>Towards Generating Personalized Hospitalization Summaries</title>
       <author><first>Sabita</first><last>Acharya</last></author>
       <author><first>Barbara</first><last>Di Eugenio</last></author>
-      <author><first>Andrew</first><last>Boyd</last></author>
+      <author id="andrew-boyd"><first>Andrew</first><last>Boyd</last></author>
       <author><first>Richard</first><last>Cameron</last></author>
       <author><first>Karen</first><last>Dunn Lopez</last></author>
       <author><first>Pamela</first><last>Martyn-Nemeth</last></author>
@@ -4589,7 +4589,7 @@
     <paper id="13">
       <title><fixed-case>L</fixed-case>ist<fixed-case>O</fixed-case>ps: A Diagnostic Dataset for Latent Tree Learning</title>
       <author><first>Nikita</first><last>Nangia</last></author>
-      <author><first>Samuel</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel</first><last>Bowman</last></author>
       <pages>92–99</pages>
       <abstract>Latent tree learning models learn to parse a sentence without syntactic supervision, and use that parse to build the sentence representation. Existing work on such models has shown that, while they perform well on tasks like sentence classification, they do not learn grammars that conform to any plausible semantic or syntactic formalism (Williams et al., 2018a). Studying the parsing ability of such models in natural language can be challenging due to the inherent complexities of natural language, like having several valid parses for a single sentence. In this paper we introduce ListOps, a toy dataset created to study the parsing ability of latent tree models. ListOps sequences are in the style of prefix arithmetic. The dataset is designed to have a single correct parsing strategy that a system needs to learn to succeed at the task. We show that the current leading latent tree models are unable to learn to parse and succeed at ListOps. These models achieve accuracies worse than purely sequential RNNs.</abstract>
       <url hash="bb25b528">N18-4013</url>
@@ -4632,7 +4632,7 @@
     <paper id="17">
       <title>Training a Ranking Function for Open-Domain Question Answering</title>
       <author><first>Phu Mon</first><last>Htut</last></author>
-      <author><first>Samuel</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel</first><last>Bowman</last></author>
       <author><first>Kyunghyun</first><last>Cho</last></author>
       <pages>120–127</pages>
       <abstract>In recent years, there have been amazing advances in deep learning methods for machine reading. In machine reading, the machine reader has to extract the answer from the given ground truth paragraph. Recently, the state-of-the-art machine reading models achieve human level performance in SQuAD which is a reading comprehension-style question answering (QA) task. The success of machine reading has inspired researchers to combine Information Retrieval with machine reading to tackle open-domain QA. However, these systems perform poorly compared to reading comprehension-style QA because it is difficult to retrieve the pieces of paragraphs that contain the answer to the question. In this study, we propose two neural network rankers that assign scores to different passages based on their likelihood of containing the answer to a given question. Additionally, we analyze the relative importance of semantic similarity and word level relevance matching in open-domain QA.</abstract>
@@ -4646,7 +4646,7 @@
       <author><first>Aditya</first><last>Bohra</last></author>
       <author><first>Vinay</first><last>Singh</last></author>
       <author><first>Syed Sarfaraz</first><last>Akhtar</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>128–135</pages>
       <abstract>Emotion Prediction is a Natural Language Processing (NLP) task dealing with detection and classification of emotions in various monolingual and bilingual texts. While some work has been done on code-mixed social media text and in emotion prediction separately, our work is the first attempt which aims at identifying the emotion associated with Hindi-English code-mixed social media text. In this paper, we analyze the problem of emotion identification in code-mixed content and present a Hindi-English code-mixed corpus extracted from twitter and annotated with the associated emotion. For every tweet in the dataset, we annotate the source language of all the words present, and also the causal language of the expressed emotion. Finally, we propose a supervised classification system which uses various machine learning techniques for detecting the emotion associated with the text using a variety of character level, word level, and lexicon based features.</abstract>
       <url hash="5a09cd5a">N18-4018</url>
@@ -4658,8 +4658,8 @@
       <author><first>McKenna</first><last>Tornblad</last></author>
       <author><first>Luke</first><last>Lapresi</last></author>
       <author><first>Christopher</first><last>Homan</last></author>
-      <author><first>Raymond</first><last>Ptucha</last></author>
-      <author><first>Cecilia</first><last>Ovesdotter Alm</last></author>
+      <author id="raymond-ptucha"><first>Raymond</first><last>Ptucha</last></author>
+      <author id="cecilia-ovesdotter-alm"><first>Cecilia</first><last>Ovesdotter Alm</last></author>
       <pages>136–143</pages>
       <abstract>While labor issues and quality assurance in crowdwork are increasingly studied, how annotators make sense of texts and how they are personally impacted by doing so are not. We study these questions via a narrative-sorting annotation task, where carefully selected (by sequentiality, topic, emotional content, and length) collections of tweets serve as examples of everyday storytelling. As readers process these narratives, we measure their facial expressions, galvanic skin response, and self-reported reactions. From the perspective of annotator well-being, a reassuring outcome was that the sorting task did not cause a measurable stress response, however readers reacted to humor. In terms of sensemaking, readers were more confident when sorting sequential, target-topical, and highly emotional tweets. As crowdsourcing becomes more common, this research sheds light onto the perceptive capabilities and emotional impact of human readers.</abstract>
       <url hash="3a149396">N18-4019</url>
@@ -4722,7 +4722,7 @@
     </paper>
     <paper id="3">
       <title>An automated medical scribe for documenting clinical encounters</title>
-      <author><first>Gregory</first><last>Finley</last></author>
+      <author id="gregory-finley"><first>Gregory</first><last>Finley</last></author>
       <author><first>Erik</first><last>Edwards</last></author>
       <author><first>Amanda</first><last>Robinson</last></author>
       <author><first>Michael</first><last>Brenndoerfer</last></author>
@@ -4730,7 +4730,7 @@
       <author><first>James</first><last>Fone</last></author>
       <author><first>Nico</first><last>Axtmann</last></author>
       <author><first>Mark</first><last>Miller</last></author>
-      <author><first>David</first><last>Suendermann-Oeft</last></author>
+      <author id="david-suendermann-oeft"><first>David</first><last>Suendermann-Oeft</last></author>
       <pages>11–15</pages>
       <abstract>A medical scribe is a clinical professional who charts patient–physician encounters in real time, relieving physicians of most of their administrative burden and substantially increasing productivity and job satisfaction. We present a complete implementation of an automated medical scribe. Our system can serve either as a scalable, standardized, and economical alternative to human scribes; or as an assistive tool for them, providing a first draft of a report along with a convenient means to modify it. This solution is, to our knowledge, the first automated scribe ever presented and relies upon multiple speech and language technologies, including speaker diarization, medical speech recognition, knowledge extraction, and natural language generation.</abstract>
       <url hash="48940bd9">N18-5003</url>
@@ -4739,7 +4739,7 @@
     </paper>
     <paper id="4">
       <title><fixed-case>CL</fixed-case> Scholar: The <fixed-case>ACL</fixed-case> <fixed-case>A</fixed-case>nthology Knowledge Graph Miner</title>
-      <author id="mayank-singh"><first>Mayank</first><last>Singh</last></author>
+      <author><first>Mayank</first><last>Singh</last></author>
       <author><first>Pradeep</first><last>Dogga</last></author>
       <author><first>Sohan</first><last>Patro</last></author>
       <author><first>Dhiraj</first><last>Barnwal</last></author>
@@ -4776,8 +4776,8 @@
       <author><first>Israa</first><last>Jaradat</last></author>
       <author><first>Pepa</first><last>Gencheva</last></author>
       <author><first>Alberto</first><last>Barrón-Cedeño</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>26–30</pages>
       <abstract>We present ClaimRank, an online system for detecting check-worthy claims. While originally trained on political debates, the system can work for any kind of text, e.g., interviews or just regular news articles. Its aim is to facilitate manual fact-checking efforts by prioritizing the claims that fact-checkers should consider first. ClaimRank supports both Arabic and English, it is trained on actual annotations from nine reputable fact-checking organizations (PolitiFact, FactCheck, ABC, CNN, NPR, NYT, Chicago Tribune, The Guardian, and Washington Post), and thus it can mimic the claim selection strategies for each and any of them, as well as for the union of them all.</abstract>
       <url hash="4f37e4bc">N18-5006</url>
@@ -4841,7 +4841,7 @@
       <title>Madly Ambiguous: A Game for Learning about Structural Ambiguity and Why It’s Hard for Computers</title>
       <author><first>Ajda</first><last>Gokcen</last></author>
       <author><first>Ethan</first><last>Hill</last></author>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <pages>51–55</pages>
       <abstract>Madly Ambiguous is an open source, online game aimed at teaching audiences of all ages about structural ambiguity and why it’s hard for computers. After a brief introduction to structural ambiguity, users are challenged to complete a sentence in a way that tricks the computer into guessing an incorrect interpretation. Behind the scenes are two different NLP-based methods for classifying the user’s input, one representative of classic rule-based approaches to disambiguation and the other representative of recent neural network approaches. Qualitative feedback from the system’s use in online, classroom, and science museum settings indicates that it is engaging and successful in conveying the intended take home messages. A demo of Madly Ambiguous can be played at <url>http://madlyambiguous.osu.edu</url>.</abstract>
       <url hash="48d7c358">N18-5011</url>
@@ -4876,7 +4876,7 @@
     <paper id="14">
       <title>Generating Continuous Representations of Medical Texts</title>
       <author><first>Graham</first><last>Spinks</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>66–70</pages>
       <abstract>We present an architecture that generates medical texts while learning an informative, continuous representation with discriminative features. During training the input to the system is a dataset of captions for medical X-Rays. The acquired continuous representations are of particular interest for use in many machine learning techniques where the discrete and high-dimensional nature of textual input is an obstacle. We use an Adversarially Regularized Autoencoder to create realistic text in both an unconditional and conditional setting. We show that this technique is applicable to medical texts which often contain syntactic and domain-specific shorthands. A quantitative evaluation shows that we achieve a lower model perplexity than a traditional LSTM generator.</abstract>
       <url hash="b0c5ef5b">N18-5014</url>
@@ -4914,7 +4914,7 @@
       <author><first>Yu-Wen</first><last>Liu</last></author>
       <author><first>Liang-Chih</first><last>Liu</last></author>
       <author><first>Chuan-Ju</first><last>Wang</last></author>
-      <author><first>Ming-Feng</first><last>Tsai</last></author>
+      <author id="ming-feng-tsai"><first>Ming-Feng</first><last>Tsai</last></author>
       <pages>81–85</pages>
       <abstract>This paper presents a web-based information system, RiskFinder, for facilitating the analyses of soft and hard information in financial reports. In particular, the system broadens the analyses from the word level to sentence level, which makes the system useful for practitioner communities and unprecedented among financial academics. The proposed system has four main components: 1) a Form 10-K risk-sentiment dataset, consisting of a set of risk-labeled financial sentences and pre-trained sentence embeddings; 2) metadata, including basic information on each company that published the Form 10-K financial report as well as several relevant financial measures; 3) an interface that highlights risk-related sentences in the financial reports based on the latest sentence embedding techniques; 4) a visualization of financial time-series data for a corresponding company. This paper also conducts some case studies to showcase that the system can be of great help in capturing valuable insight within large amounts of textual information. The system is now online available at <url>https://cfda.csie.org/RiskFinder/</url>.</abstract>
       <url hash="3ca4504d">N18-5017</url>
@@ -4927,7 +4927,7 @@
       <author><first>David</first><last>Salter</last></author>
       <author><first>Luke</first><last>DeLuccia</last></author>
       <author><first>Kilho</first><last>Son</last></author>
-      <author><first>Mohamed R.</first><last>Amer</last></author>
+      <author id="mohamed-r-amer"><first>Mohamed R.</first><last>Amer</last></author>
       <author><first>Amir</first><last>Tamrakar</last></author>
       <pages>86–90</pages>
       <abstract>We demonstrate an intelligent conversational agent system designed for advancing human-machine collaborative tasks. The agent is able to interpret a user’s communicative intent from both their verbal utterances and non-verbal behaviors, such as gestures. The agent is also itself able to communicate both with natural language and gestures, through its embodiment as an avatar thus facilitating natural symmetric multi-modal interactions. We demonstrate two intelligent agents with specialized skills in the Blocks World as use-cases of our system.</abstract>
@@ -4956,8 +4956,8 @@
       <author><first>Elizabeth</first><last>Clark</last></author>
       <author><first>Ari</first><last>Holtzman</last></author>
       <author><first>Yejin</first><last>Choi</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
       <pages>96–100</pages>
       <abstract>We present Sounding Board, a social chatbot that won the 2017 Amazon Alexa Prize. The system architecture consists of several components including spoken language processing, dialogue management, language generation, and content management, with emphasis on user-centric and content-driven design. We also share insights gained from large-scale online logs based on 160,000 conversations with real-world users.</abstract>
       <url hash="ea6d9206">N18-5020</url>
@@ -4970,7 +4970,7 @@
       <booktitle>Proceedings of the 2018 Conference of the North <fixed-case>A</fixed-case>merican Chapter of the Association for Computational Linguistics: Tutorial Abstracts</booktitle>
       <url hash="d59437ff">N18-6</url>
       <editor><first>Mohit</first><last>Bansal</last></editor>
-      <editor><first>Rebecca</first><last>Passonneau</last></editor>
+      <editor id="rebecca-j-passonneau"><first>Rebecca</first><last>Passonneau</last></editor>
       <doi>10.18653/v1/N18-6</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>New Orleans, Louisiana</address>
@@ -5017,8 +5017,8 @@
     </paper>
     <paper id="4">
       <title>The interplay between lexical resources and Natural Language Processing</title>
-      <author><first>Jose</first><last>Camacho-Collados</last></author>
-      <author><first>Luis</first><last>Espinosa Anke</last></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa Anke</last></author>
       <author><first>Mohammad Taher</first><last>Pilehvar</last></author>
       <pages>17–23</pages>
       <abstract>Incorporating linguistic, world and common sense knowledge into AI/NLP systems is currently an important research area, with several open problems and challenges. At the same time, processing and storing this knowledge in lexical resources is not a straightforward task. We propose to address these complementary goals from two methodological perspectives: the use of NLP methods to help the process of constructing and enriching lexical resources and the use of lexical resources for improving NLP applications. This tutorial may be useful for two main types of audience: those working on language resources who are interested in becoming acquainted with automatic NLP techniques, with the end goal of speeding and/or easing up the process of resource curation; and on the other hand, researchers in NLP who would like to benefit from the knowledge of lexical resources to improve their systems and models.</abstract>
diff --git a/data/xml/N19.xml b/data/xml/N19.xml
index 140a8fda19..cdbd2c02ed 100644
--- a/data/xml/N19.xml
+++ b/data/xml/N19.xml
@@ -31,11 +31,11 @@
     <paper id="2">
       <title>The emergence of number and syntax units in <fixed-case>LSTM</fixed-case> language models</title>
       <author><first>Yair</first><last>Lakretz</last></author>
-      <author><first>German</first><last>Kruszewski</last></author>
-      <author><first>Theo</first><last>Desbordes</last></author>
+      <author id="german-kruszewski"><first>German</first><last>Kruszewski</last></author>
+      <author id="theo-desbordes"><first>Theo</first><last>Desbordes</last></author>
       <author><first>Dieuwke</first><last>Hupkes</last></author>
       <author><first>Stanislas</first><last>Dehaene</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <pages>11–20</pages>
       <abstract>Recent work has shown that LSTMs trained on a generic language modeling objective capture syntax-sensitive generalizations such as long-distance number agreement. We have however no mechanistic understanding of how they accomplish this remarkable feat. Some have conjectured it depends on heuristics that do not truly take hierarchical structure into account. We present here a detailed study of the inner mechanics of number tracking in LSTMs at the single neuron level. We discover that long-distance number information is largely managed by two “number units”. Importantly, the behaviour of these units is partially controlled by other units independently shown to track syntactic structure. We conclude that LSTMs are, to some extent, implementing genuinely syntactic processing mechanisms, paving the way to a more general understanding of grammatical encoding in LSTMs.</abstract>
       <url hash="4ee63d7e">N19-1002</url>
@@ -61,7 +61,7 @@
       <author><first>Takashi</first><last>Morita</last></author>
       <author><first>Peng</first><last>Qian</last></author>
       <author><first>Miguel</first><last>Ballesteros</last></author>
-      <author><first>Roger</first><last>Levy</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
       <pages>32–42</pages>
       <abstract>We investigate the extent to which the behavior of neural network language models reflects incremental representations of syntactic state. To do so, we employ experimental methodologies which were originally developed in the field of psycholinguistics to study syntactic representation in the human mind. We examine neural network model behavior on sets of artificial sentences containing a variety of syntactically complex structures. These sentences not only test whether the networks have a representation of syntactic state, they also reveal the specific lexical cues that networks use to update these states. We test four models: two publicly available LSTM sequence models of English (Jozefowicz et al., 2016; Gulordava et al., 2018) trained on large datasets; an RNN Grammar (Dyer et al., 2016) trained on a small, parsed dataset; and an LSTM trained on the same small corpus as the RNNG. We find evidence for basic syntactic state representations in all models, but only the models trained on large datasets are sensitive to subtle lexical cues signaling changes in syntactic state.</abstract>
       <url hash="88ac5627">N19-1004</url>
@@ -72,7 +72,7 @@
     <paper id="5">
       <title>Understanding language-elicited <fixed-case>EEG</fixed-case> data by predicting it from a fine-tuned language model</title>
       <author><first>Dan</first><last>Schwartz</last></author>
-      <author><first>Tom</first><last>Mitchell</last></author>
+      <author id="tom-mitchell"><first>Tom</first><last>Mitchell</last></author>
       <pages>43–57</pages>
       <abstract>Electroencephalography (EEG) recordings of brain activity taken while participants read or listen to language are widely used within the cognitive neuroscience and psycholinguistics communities as a tool to study language comprehension. Several time-locked stereotyped EEG responses to word-presentations – known collectively as event-related potentials (ERPs) – are thought to be markers for semantic or syntactic processes that take place during comprehension. However, the characterization of each individual ERP in terms of what features of a stream of language trigger the response remains controversial. Improving this characterization would make ERPs a more useful tool for studying language comprehension. We take a step towards better understanding the ERPs by finetuning a language model to predict them. This new approach to analysis shows for the first time that all of the ERPs are predictable from embeddings of a stream of language. Prior work has only found two of the ERPs to be predictable. In addition to this analysis, we examine which ERPs benefit from sharing parameters during joint training. We find that two pairs of ERPs previously identified in the literature as being related to each other benefit from joint training, while several other pairs of ERPs that benefit from joint training are suggestive of potential relationships. Extensions of this analysis that further examine what kinds of information in the model embeddings relate to each ERP have the potential to elucidate the processes involved in human language comprehension.</abstract>
       <url hash="2cd2eebf">N19-1005</url>
@@ -86,7 +86,7 @@
       <author><first>Herman</first><last>Kamper</last></author>
       <author><first>Karen</first><last>Livescu</last></author>
       <author><first>Adam</first><last>Lopez</last></author>
-      <author><first>Sharon</first><last>Goldwater</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
       <pages>58–68</pages>
       <abstract>We present a simple approach to improve direct speech-to-text translation (ST) when the source language is low-resource: we pre-train the model on a high-resource automatic speech recognition (ASR) task, and then fine-tune its parameters for ST. We demonstrate that our approach is effective by pre-training on 300 hours of English ASR data to improve Spanish English ST from 10.8 to 20.2 BLEU when only 20 hours of Spanish-English ST training data are available. Through an ablation study, we find that the pre-trained encoder (acoustic model) accounts for most of the improvement, despite the fact that the shared language in these tasks is the target language text, not the source language audio. Applying this insight, we show that pre-training on ASR helps ST even when the ASR language differs from both source and target ST languages: pre-training on French ASR also improves Spanish-English ST. Finally, we show that the approach improves performance on a true low-resource task: pre-training on a combination of English ASR and French ASR improves Mboshi-French ST, where only 4 hours of data are available, from 3.5 to 7.1 BLEU.</abstract>
       <url hash="e7ba2d4c">N19-1006</url>
@@ -109,7 +109,7 @@
     <paper id="8">
       <title>Giving Attention to the Unexpected: Using Prosody Innovations in Disfluency Detection</title>
       <author><first>Vicky</first><last>Zayats</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
       <pages>86–95</pages>
       <abstract>Disfluencies in spontaneous speech are known to be associated with prosodic disruptions. However, most algorithms for disfluency detection use only word transcripts. Integrating prosodic cues has proved difficult because of the many sources of variability affecting the acoustic correlates. This paper introduces a new approach to extracting acoustic-prosodic cues using text-based distributional prediction of acoustic cues to derive vector z-score features (innovations). We explore both early and late fusion techniques for integrating text and prosody, showing gains over a high-accuracy text-only model.</abstract>
       <url hash="df1260be">N19-1008</url>
@@ -169,7 +169,7 @@
     <paper id="13">
       <title><fixed-case>A</fixed-case>nswer-based <fixed-case>A</fixed-case>dversarial <fixed-case>T</fixed-case>raining for <fixed-case>G</fixed-case>enerating <fixed-case>C</fixed-case>larification <fixed-case>Q</fixed-case>uestions</title>
       <author><first>Sudha</first><last>Rao</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <pages>143–155</pages>
       <abstract>We present an approach for generating clarification questions with the goal of eliciting new information that would make the given textual context more complete. We propose that modeling hypothetical answers (to clarification questions) as latent variables can guide our approach into generating more useful clarification questions. We develop a Generative Adversarial Network (GAN) where the generator is a sequence-to-sequence model and the discriminator is a utility function that models the value of updating the context with the answer to the clarification question. We evaluate on two datasets, using both automatic metrics and human judgments of usefulness, specificity and relevance, showing that our approach outperforms both a retrieval-based model and ablations that exclude the utility model and the adversarial training.</abstract>
       <url hash="a35dfddf">N19-1013</url>
@@ -232,7 +232,7 @@
     <paper id="18">
       <title>Discontinuous Constituency Parsing with a Stack-Free Transition System and a Dynamic Oracle</title>
       <author><first>Maximin</first><last>Coavoux</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <pages>204–217</pages>
       <abstract>We introduce a novel transition system for discontinuous constituency parsing. Instead of storing subtrees in a stack –i.e. a data structure with linear-time sequential access– the proposed system uses a set of parsing items, with constant-time random access. This change makes it possible to construct any discontinuous constituency tree in exactly <tex-math>4n–2</tex-math> transitions for a sentence of length n. At each parsing step, the parser considers every item in the set to be combined with a focus item and to construct a new constituent in a bottom-up fashion. The parsing strategy is based on the assumption that most syntactic structures can be parsed incrementally and that the set –the memory of the parser– remains reasonably small on average. Moreover, we introduce a provably correct dynamic oracle for the new transition system, and present the first experiments in discontinuous constituency parsing using a dynamic oracle. Our parser obtains state-of-the-art results on three English and German discontinuous treebanks.</abstract>
       <url hash="78e32d04">N19-1018</url>
@@ -252,7 +252,7 @@
     <paper id="20">
       <title><fixed-case>CCG</fixed-case> Parsing Algorithm with Incremental Tree Rotation</title>
       <author><first>Miloš</first><last>Stanojević</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>228–239</pages>
       <abstract>The main obstacle to incremental sentence processing arises from right-branching constituent structures, which are present in the majority of English sentences, as well as optional constituents that adjoin on the right, such as right adjuncts and right conjuncts. In CCG, many right-branching derivations can be replaced by semantically equivalent left-branching incremental derivations. The problem of right-adjunction is more resistant to solution, and has been tackled in the past using revealing-based approaches that often rely either on the higher-order unification over lambda terms (Pareschi and Steedman,1987) or heuristics over dependency representations that do not cover the whole CCGbank (Ambati et al., 2015). We propose a new incremental parsing algorithm for CCG following the same revealing tradition of work but having a purely syntactic approach that does not depend on access to a distinct level of semantic representation. This algorithm can cover the whole CCGbank, with greater incrementality and accuracy than previous proposals.</abstract>
       <url hash="6cec6c5f">N19-1020</url>
@@ -286,7 +286,7 @@
     <paper id="23">
       <title>Evaluating Composition Models for Verb Phrase Elliptical Sentence Embeddings</title>
       <author><first>Gijs</first><last>Wijnholds</last></author>
-      <author><first>Mehrnoosh</first><last>Sadrzadeh</last></author>
+      <author id="mehrnoosh-sadrzadeh"><first>Mehrnoosh</first><last>Sadrzadeh</last></author>
       <pages>261–271</pages>
       <abstract>Ellipsis is a natural language phenomenon where part of a sentence is missing and its information must be recovered from its surrounding context, as in “Cats chase dogs and so do foxes.”. Formal semantics has different methods for resolving ellipsis and recovering the missing information, but the problem has not been considered for distributional semantics, where words have vector embeddings and combinations thereof provide embeddings for sentences. In elliptical sentences these combinations go beyond linear as copying of elided information is necessary. In this paper, we develop different models for embedding VP-elliptical sentences. We extend existing verb disambiguation and sentence similarity datasets to ones containing elliptical phrases and evaluate our models on these datasets for a variety of non-linear combinations and their linear counterparts. We compare results of these compositional models to state of the art holistic sentence encoders. Our results show that non-linear addition and a non-linear tensor-based composition outperform the naive non-compositional baselines and the linear models, and that sentence encoders perform well on sentence similarity, but not on verb disambiguation.</abstract>
       <url hash="c029f150">N19-1023</url>
@@ -297,8 +297,8 @@
       <title>Neural Finite-State Transducers: Beyond Rational Relations</title>
       <author><first>Chu-Cheng</first><last>Lin</last></author>
       <author><first>Hao</first><last>Zhu</last></author>
-      <author><first>Matthew R.</first><last>Gormley</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="matthew-r-gormley"><first>Matthew R.</first><last>Gormley</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>272–283</pages>
       <abstract>We introduce neural finite state transducers (NFSTs), a family of string transduction models defining joint and conditional probability distributions over pairs of strings. The probability of a string pair is obtained by marginalizing over all its accepting paths in a finite state transducer. In contrast to ordinary weighted FSTs, however, each path is scored using an arbitrary function such as a recurrent neural network, which breaks the usual conditional independence assumption (Markov property). NFSTs are more powerful than previous finite-state models with neural features (Rastogi et al., 2016.) We present training and inference algorithms for locally and globally normalized variants of NFSTs. In experiments on different transduction tasks, they compete favorably against seq2seq models while offering interpretable paths that correspond to hard monotonic alignments.</abstract>
       <url hash="c9234c48">N19-1024</url>
@@ -405,7 +405,7 @@
     <paper id="33">
       <title>Vector of Locally-Aggregated Word Embeddings (<fixed-case>VLAWE</fixed-case>): A Novel Document-level Representation</title>
       <author><first>Radu Tudor</first><last>Ionescu</last></author>
-      <author><first>Andrei</first><last>Butnaru</last></author>
+      <author id="andrei-butnaru"><first>Andrei</first><last>Butnaru</last></author>
       <pages>363–369</pages>
       <abstract>In this paper, we propose a novel representation for text documents based on aggregating word embedding vectors into document embeddings. Our approach is inspired by the Vector of Locally-Aggregated Descriptors used for image representation, and it works as follows. First, the word embeddings gathered from a collection of documents are clustered by k-means in order to learn a codebook of semnatically-related word embeddings. Each word embedding is then associated to its nearest cluster centroid (codeword). The Vector of Locally-Aggregated Word Embeddings (VLAWE) representation of a document is then computed by accumulating the differences between each codeword vector and each word vector (from the document) associated to the respective codeword. We plug the VLAWE representation, which is learned in an unsupervised manner, into a classifier and show that it is useful for a diverse set of text classification tasks. We compare our approach with a broad range of recent state-of-the-art methods, demonstrating the effectiveness of our approach. Furthermore, we obtain a considerable improvement on the Movie Review data set, reporting an accuracy of 93.3%, which represents an absolute gain of 10% over the state-of-the-art approach.</abstract>
       <url hash="b4de5c5c">N19-1033</url>
@@ -419,7 +419,7 @@
       <author><first>Deepanway</first><last>Ghosal</last></author>
       <author><first>Soujanya</first><last>Poria</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>370–379</pages>
       <abstract>Related tasks often have inter-dependence on each other and perform better when solved in a joint framework. In this paper, we present a deep multi-task learning framework that jointly performs sentiment and emotion analysis both. The multi-modal inputs (i.e. text, acoustic and visual frames) of a video convey diverse and distinctive information, and usually do not have equal contribution in the decision making. We propose a context-level inter-modal attention framework for simultaneously predicting the sentiment and expressed emotions of an utterance. We evaluate our proposed approach on CMU-MOSEI dataset for multi-modal sentiment and emotion analysis. Evaluation results suggest that multi-task learning framework offers improvement over the single-task framework. The proposed approach reports new state-of-the-art performance for both sentiment analysis and emotion analysis.</abstract>
       <url hash="86d9b1a3">N19-1034</url>
@@ -475,7 +475,7 @@
     </paper>
     <paper id="39">
       <title>Simplified Neural Unsupervised Domain Adaptation</title>
-      <author><first>Timothy</first><last>Miller</last></author>
+      <author id="timothy-miller"><first>Timothy</first><last>Miller</last></author>
       <pages>414–419</pages>
       <abstract>Unsupervised domain adaptation (UDA) is the task of training a statistical model on labeled data from a source domain to achieve better performance on data from a target domain, with access to only unlabeled data in the target domain. Existing state-of-the-art UDA approaches use neural networks to learn representations that are trained to predict the values of subset of important features called “pivot features” on combined data from the source and target domains. In this work, we show that it is possible to improve on existing neural domain adaptation algorithms by 1) jointly training the representation learner with the task learner; and 2) removing the need for heuristically-selected “pivot features.” Our results show competitive performance with a simpler model.</abstract>
       <url hash="7397afd5">N19-1039</url>
@@ -508,7 +508,7 @@
     <paper id="42">
       <title>Lost in Machine Translation: A Method to Reduce Meaning Loss</title>
       <author><first>Reuben</first><last>Cohn-Gordon</last></author>
-      <author><first>Noah</first><last>Goodman</last></author>
+      <author id="noah-goodman"><first>Noah</first><last>Goodman</last></author>
       <pages>437–441</pages>
       <abstract>A desideratum of high-quality translation systems is that they preserve meaning, in the sense that two sentences with different meanings should not translate to one and the same sentence in another language. However, state-of-the-art systems often fail in this regard, particularly in cases where the source and target languages partition the “meaning space” in different ways. For instance, “I cut my finger.” and “I cut my finger off.” describe different states of the world but are translated to French (by both Fairseq and Google Translate) as “Je me suis coupé le doigt.”, which is ambiguous as to whether the finger is detached. More generally, translation systems are typically many-to-one (non-injective) functions from source to target language, which in many cases results in important distinctions in meaning being lost in translation. Building on Bayesian models of informative utterance production, we present a method to define a less ambiguous translation system in terms of an underlying pre-trained neural sequence-to-sequence model. This method increases injectivity, resulting in greater preservation of meaning as measured by improvement in cycle-consistency, without impeding translation quality (measured by BLEU score).</abstract>
       <url hash="c9314908">N19-1042</url>
@@ -557,7 +557,7 @@
       <author><first>Lemao</first><last>Liu</last></author>
       <author><first>Xintong</first><last>Li</last></author>
       <author><first>Conghui</first><last>Zhu</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <author><first>Shuming</first><last>Shi</last></author>
       <pages>466–477</pages>
       <abstract>Multilayer architectures are currently the gold standard for large-scale neural machine translation. Existing works have explored some methods for understanding the hidden representations, however, they have not sought to improve the translation quality rationally according to their understanding. Towards understanding for performance improvement, we first artificially construct a sequence of nested relative tasks and measure the feature generalization ability of the learned hidden representation over these tasks. Based on our understanding, we then propose to regularize the layer-wise representations with all tree-induced tasks. To overcome the computational bottleneck resulting from the large number of regularization terms, we design efficient approximation methods by selecting a few coarse-to-fine tasks for regularization. Extensive experiments on two widely-used datasets demonstrate the proposed methods only lead to small extra overheads in training but no additional overheads in testing, and achieve consistent improvements (up to +1.3 BLEU) compared to the state-of-the-art translation model.</abstract>
@@ -582,7 +582,7 @@
     <paper id="48">
       <title>Attentive Mimicking: Better Word Embeddings by Attending to Informative Contexts</title>
       <author><first>Timo</first><last>Schick</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>489–494</pages>
       <abstract>Learning high-quality embeddings for rare words is a hard problem because of sparse context information. Mimicking (Pinter et al., 2017) has been proposed as a solution: given embeddings learned by a standard algorithm, a model is first trained to reproduce embeddings of frequent words from their surface form and then used to compute embeddings for rare words. In this paper, we introduce attentive mimicking: the mimicking model is given access not only to a word’s surface form, but also to all available contexts and learns to attend to the most informative and reliable contexts for computing an embedding. In an evaluation on four tasks, we show that attentive mimicking outperforms previous work for both rare and medium-frequency words. Thus, compared to previous work, attentive mimicking improves embeddings for a much larger part of the vocabulary, including the medium-frequency range.</abstract>
       <url hash="e3fd68d1">N19-1048</url>
@@ -605,7 +605,7 @@
     <paper id="50">
       <title>Big <fixed-case>B</fixed-case>i<fixed-case>RD</fixed-case>: A Large, Fine-Grained, Bigram Relatedness Dataset for Examining Semantic Composition</title>
       <author><first>Shima</first><last>Asaadi</last></author>
-      <author><first>Saif</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
       <author><first>Svetlana</first><last>Kiritchenko</last></author>
       <pages>505–516</pages>
       <abstract>Bigrams (two-word sequences) hold a special place in semantic composition research since they are the smallest unit formed by composing words. A semantic relatedness dataset that includes bigrams will thus be useful in the development of automatic methods of semantic composition. However, existing relatedness datasets only include pairs of unigrams (single words). Further, existing datasets were created using rating scales and thus suffer from limitations such as in consistent annotations and scale region bias. In this paper, we describe how we created a large, fine-grained, bigram relatedness dataset (BiRD), using a comparative annotation technique called Best–Worst Scaling. Each of BiRD’s 3,345 English term pairs involves at least one bigram. We show that the relatedness scores obtained are highly reliable (split-half reliability r= 0.937). We analyze the data to obtain insights into bigram semantic relatedness. Finally, we present benchmark experiments on using the relatedness dataset as a testbed to evaluate simple unsupervised measures of semantic composition. BiRD is made freely available to foster further research on how meaning can be represented and how meaning can be composed.</abstract>
@@ -658,7 +658,7 @@
       <title><fixed-case>IMHO</fixed-case> Fine-Tuning Improves Claim Detection</title>
       <author><first>Tuhin</first><last>Chakrabarty</last></author>
       <author><first>Christopher</first><last>Hidey</last></author>
-      <author><first>Kathy</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathy</first><last>McKeown</last></author>
       <pages>558–563</pages>
       <abstract>Claims are the central component of an argument. Detecting claims across different domains or data sets can often be challenging due to their varying conceptualization. We propose to alleviate this problem by fine-tuning a language model using a Reddit corpus of 5.5 million opinionated claims. These claims are self-labeled by their authors using the internet acronyms IMO/IMHO (in my (humble) opinion). Empirical results show that using this approach improves the state of art performance across four benchmark argumentation data sets by an average of 4 absolute F1 points in claim detection. As these data sets include diverse domains such as social media and student essays this improvement demonstrates the robustness of fine-tuning on this novel corpus.</abstract>
       <url hash="fb741808">N19-1054</url>
@@ -694,7 +694,7 @@
     <paper id="57">
       <title>Improving Dialogue State Tracking by Discerning the Relevant Context</title>
       <author><first>Sanuj</first><last>Sharma</last></author>
-      <author><first>Prafulla Kumar</first><last>Choubey</last></author>
+      <author id="prafulla-kumar-choubey"><first>Prafulla Kumar</first><last>Choubey</last></author>
       <author><first>Ruihong</first><last>Huang</last></author>
       <pages>576–581</pages>
       <abstract>A typical conversation comprises of multiple turns between participants where they go back and forth between different topics. At each user turn, dialogue state tracking (DST) aims to estimate user’s goal by processing the current utterance. However, in many turns, users implicitly refer to the previous goal, necessitating the use of relevant dialogue history. Nonetheless, distinguishing relevant history is challenging and a popular method of using dialogue recency for that is inefficient. We, therefore, propose a novel framework for DST that identifies relevant historical context by referring to the past utterances where a particular slot-value changes and uses that together with weighted system utterance to identify the relevant context. Specifically, we use the current user utterance and the most recent system utterance to determine the relevance of a system utterance. Empirical analyses show that our method improves joint goal accuracy by 2.75% and 2.36% on WoZ 2.0 and Multi-WoZ restaurant domain datasets respectively over the previous state-of-the-art GLAD model.</abstract>
@@ -754,7 +754,7 @@
       <title><fixed-case>B</fixed-case>lack is to Criminal as <fixed-case>C</fixed-case>aucasian is to Police: Detecting and Removing Multiclass Bias in Word Embeddings</title>
       <author><first>Thomas</first><last>Manzini</last></author>
       <author><first>Lim</first><last>Yao Chong</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <author><first>Yulia</first><last>Tsvetkov</last></author>
       <pages>615–621</pages>
       <abstract>Online texts - across genres, registers, domains, and styles - are riddled with human stereotypes, expressed in overt or subtle ways. Word embeddings, trained on these texts, perpetuate and amplify these stereotypes, and propagate biases to machine learning models that use word embeddings as features. In this work, we propose a method to debias word embeddings in multiclass settings such as race and religion, extending the work of (Bolukbasi et al., 2016) from the binary setting, such as binary gender. Next, we propose a novel methodology for the evaluation of multiclass debiasing. We demonstrate that our multiclass debiasing is robust and maintains the efficacy in standard NLP tasks.</abstract>
@@ -769,7 +769,7 @@
       <author><first>Chandler</first><last>May</last></author>
       <author><first>Alex</first><last>Wang</last></author>
       <author><first>Shikha</first><last>Bordia</last></author>
-      <author><first>Samuel R.</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel R.</first><last>Bowman</last></author>
       <author><first>Rachel</first><last>Rudinger</last></author>
       <pages>622–628</pages>
       <abstract>The Word Embedding Association Test shows that GloVe and word2vec word embeddings exhibit human-like implicit biases based on gender, race, and other social constructs (Caliskan et al., 2017). Meanwhile, research on learning reusable text representations has begun to explore sentence-level texts, with some sentence encoders seeing enthusiastic adoption. Accordingly, we extend the Word Embedding Association Test to measure bias in sentence encoders. We then test several sentence encoders, including state-of-the-art methods such as ELMo and BERT, for the social biases studied in prior work and two important biases that are difficult or impossible to test at the word level. We observe mixed results including suspicious patterns of sensitivity that suggest the test’s assumptions may not hold in general. We conclude by proposing directions for future work on measuring bias in sentence encoders.</abstract>
@@ -799,7 +799,7 @@
       <title><fixed-case>C</fixed-case>ombining <fixed-case>S</fixed-case>entiment <fixed-case>L</fixed-case>exica with a <fixed-case>M</fixed-case>ulti-<fixed-case>V</fixed-case>iew <fixed-case>V</fixed-case>ariational <fixed-case>A</fixed-case>utoencoder</title>
       <author><first>Alexander Miserlis</first><last>Hoyle</last></author>
       <author><first>Lawrence</first><last>Wolf-Sonkin</last></author>
-      <author><first>Hanna</first><last>Wallach</last></author>
+      <author id="hanna-wallach"><first>Hanna</first><last>Wallach</last></author>
       <author><first>Ryan</first><last>Cotterell</last></author>
       <author><first>Isabelle</first><last>Augenstein</last></author>
       <pages>635–640</pages>
@@ -863,7 +863,7 @@
     <paper id="70">
       <title>Keyphrase Generation: A Text Summarization Struggle</title>
       <author><first>Erion</first><last>Çano</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>666–672</pages>
       <abstract>Authors’ keyphrases assigned to scientific articles are essential for recognizing content and topic aspects. Most of the proposed supervised and unsupervised methods for keyphrase generation are unable to produce terms that are valuable but do not appear in the text. In this paper, we explore the possibility of considering the keyphrase string as an abstractive summary of the title and the abstract. First, we collect, process and release a large dataset of scientific paper metadata that contains 2.2 million records. Then we experiment with popular text summarization neural architectures. Despite using advanced deep learning models, large quantities of data and many days of computation, our systematic evaluation on four test datasets reveals that the explored text summarization methods could not produce better keyphrases than the simpler unsupervised methods, or the existing supervised ones.</abstract>
       <url hash="778b3792">N19-1070</url>
@@ -931,8 +931,8 @@
       <author><first>Jungo</first><last>Kasai</last></author>
       <author><first>Dan</first><last>Friedman</last></author>
       <author><first>Robert</first><last>Frank</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>701–709</pages>
       <abstract>We introduce a new syntax-aware model for dependency-based semantic role labeling that outperforms syntax-agnostic models for English and Spanish. We use a BiLSTM to tag the text with supertags extracted from dependency parses, and we feed these supertags, along with words and parts of speech, into a deep highway BiLSTM for semantic role labeling. Our model combines the strengths of earlier models that performed SRL on the basis of a full dependency parse with more recent models that use no syntactic information at all. Our local and non-ensemble model achieves state-of-the-art performance on the CoNLL 09 English and Spanish datasets. SRL models benefit from syntactic information, and we show that supertagging is a simple, powerful, and robust way to incorporate syntax into a neural SRL system.</abstract>
       <url hash="713a7501">N19-1075</url>
@@ -1139,7 +1139,7 @@
       <author><first>Hitesh</first><last>Golchha</last></author>
       <author><first>Mauajama</first><last>Firdaus</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>851–860</pages>
       <abstract>In this paper, we propose an effective deep learning framework for inducing courteous behavior in customer care responses. The interaction between a customer and the customer care representative contributes substantially to the overall customer experience. Thus it is imperative for customer care agents and chatbots engaging with humans to be personal, cordial and emphatic to ensure customer satisfaction and retention. Our system aims at automatically transforming neutral customer care responses into courteous replies. Along with stylistic transfer (of courtesy), our system ensures that responses are coherent with the conversation history, and generates courteous expressions consistent with the emotional state of the customer. Our technique is based on a reinforced pointer-generator model for the sequence to sequence task. The model is also conditioned on a hierarchically encoded and emotionally aware conversational context. We use real interactions on Twitter between customer care professionals and aggrieved customers to create a large conversational dataset having both forms of agent responses: ‘generic’ and ‘courteous’. We perform quantitative and qualitative analyses on established and task-specific metrics, both automatic and human evaluation based. Our evaluation shows that the proposed models can generate emotionally-appropriate courteous expressions while preserving the content. Experimental results also prove that our proposed approach performs better than the baseline models.</abstract>
       <url hash="f6b8f7ed">N19-1091</url>
@@ -1392,11 +1392,11 @@
     </paper>
     <paper id="112">
       <title>Linguistic Knowledge and Transferability of Contextual Representations</title>
-      <author><first>Nelson F.</first><last>Liu</last></author>
+      <author id="nelson-f-liu"><first>Nelson F.</first><last>Liu</last></author>
       <author><first>Matt</first><last>Gardner</last></author>
       <author><first>Yonatan</first><last>Belinkov</last></author>
-      <author><first>Matthew E.</first><last>Peters</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="matthew-e-peters"><first>Matthew E.</first><last>Peters</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>1073–1094</pages>
       <abstract>Contextual word representations derived from large-scale neural language models are successful across a diverse set of NLP tasks, suggesting that they encode useful and transferable features of language. To shed light on the linguistic knowledge they capture, we study the representations produced by several recent pretrained contextualizers (variants of ELMo, the OpenAI transformer language model, and BERT) with a suite of sixteen diverse probing tasks. We find that linear models trained on top of frozen contextual representations are competitive with state-of-the-art task-specific models in many cases, but fail on tasks requiring fine-grained linguistic knowledge (e.g., conjunct identification). To investigate the transferability of contextual word representations, we quantify differences in the transferability of individual layers within contextualizers, especially between recurrent neural networks (RNNs) and transformers. For instance, higher layers of RNNs are more task-specific, while transformer layers do not exhibit the same monotonic trend. In addition, to better understand what makes contextual word representations transferable, we compare language model pretraining with eleven supervised pretraining tasks. For any given task, pretraining on a closely related task yields better performance than language model pretraining (which is better on average) when the pretraining dataset is fixed. However, language model pretraining on more data gives the best results.</abstract>
       <url hash="9368583d">N19-1112</url>
@@ -1417,10 +1417,10 @@
     <paper id="114">
       <title>Unsupervised Recurrent Neural Network Grammars</title>
       <author><first>Yoon</first><last>Kim</last></author>
-      <author><first>Alexander</first><last>Rush</last></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last></author>
       <author><first>Lei</first><last>Yu</last></author>
       <author><first>Adhiguna</first><last>Kuncoro</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <author><first>Gábor</first><last>Melis</last></author>
       <pages>1105–1117</pages>
       <abstract>Recurrent neural network grammars (RNNG) are generative models of language which jointly model syntax and surface structure by incrementally generating a syntax tree and sentence in a top-down, left-to-right order. Supervised RNNGs achieve strong language modeling and parsing performance, but require an annotated corpus of parse trees. In this work, we experiment with unsupervised learning of RNNGs. Since directly marginalizing over the space of latent trees is intractable, we instead apply amortized variational inference. To maximize the evidence lower bound, we develop an inference network parameterized as a neural CRF constituency parser. On language modeling, unsupervised RNNGs perform as well their supervised counterparts on benchmarks in English and Chinese. On constituency grammar induction, they are competitive with recent neural language models that induce tree structures from words through attention mechanisms.</abstract>
@@ -1432,7 +1432,7 @@
     <paper id="115">
       <title>Cooperative Learning of Disjoint Syntax and Semantics</title>
       <author><first>Serhii</first><last>Havrylov</last></author>
-      <author><first>Germán</first><last>Kruszewski</last></author>
+      <author id="german-kruszewski"><first>Germán</first><last>Kruszewski</last></author>
       <author><first>Armand</first><last>Joulin</last></author>
       <pages>1118–1128</pages>
       <abstract>There has been considerable attention devoted to models that learn to jointly infer an expression’s syntactic structure and its semantics. Yet, Nangia and Bowman (2018) has recently shown that the current best systems fail to learn the correct parsing strategy on mathematical expressions generated from a simple context-free grammar. In this work, we present a recursive model inspired by Choi et al. (2018) that reaches near perfect accuracy on this task. Our model is composed of two separated modules for syntax and semantics. They are cooperatively trained with standard continuous and discrete optimisation schemes. Our model does not require any linguistic structure for supervision, and its recursive nature allows for out-of-domain generalisation. Additionally, our approach performs competitively on several natural language tasks, such as Natural Language Inference and Sentiment Analysis.</abstract>
@@ -1486,7 +1486,7 @@
       <author><first>Otilia</first><last>Stretcu</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
       <author><first>Barnabas</first><last>Poczos</last></author>
-      <author><first>Tom</first><last>Mitchell</last></author>
+      <author id="tom-mitchell"><first>Tom</first><last>Mitchell</last></author>
       <pages>1162–1172</pages>
       <abstract>Current state-of-the-art NMT systems use large neural networks that are not only slow to train, but also often require many heuristics and optimization tricks, such as specialized learning rate schedules and large batch sizes. This is undesirable as it requires extensive hyperparameter tuning. In this paper, we propose a curriculum learning framework for NMT that reduces training time, reduces the need for specialized heuristics or large batch sizes, and results in overall better performance. Our framework consists of a principled way of deciding which training samples are shown to the model at different times during training, based on the estimated difficulty of a sample and the current competence of the model. Filtering training samples in this manner prevents the model from getting stuck in bad local optima, making it converge faster and reach a better solution than the common approach of uniformly sampling training examples. Furthermore, the proposed method can be easily applied to existing NMT models by simply modifying their input data pipelines. We show that our framework can help improve the training time and the performance of both recurrent neural network models and Transformers, achieving up to a 70% decrease in training time, while at the same time obtaining accuracy improvements of up to 2.2 BLEU.</abstract>
       <url hash="1851f7f5">N19-1119</url>
@@ -1509,7 +1509,7 @@
     <paper id="121">
       <title>Consistency by Agreement in Zero-Shot Neural Machine Translation</title>
       <author><first>Maruan</first><last>Al-Shedivat</last></author>
-      <author><first>Ankur</first><last>Parikh</last></author>
+      <author id="ankur-parikh"><first>Ankur</first><last>Parikh</last></author>
       <pages>1184–1197</pages>
       <abstract>Generalization and reliability of multilingual translation often highly depend on the amount of available parallel data for each language pair of interest. In this paper, we focus on zero-shot generalization—a challenging setup that tests models on translation directions they have not been optimized for at training time. To solve the problem, we (i) reformulate multilingual translation as probabilistic inference, (ii) define the notion of zero-shot consistency and show why standard training often results in models unsuitable for zero-shot tasks, and (iii) introduce a consistent agreement-based training method that encourages the model to produce equivalent translations of parallel sentences in auxiliary languages. We test our multilingual NMT models on multiple public zero-shot translation benchmarks (IWSLT17, UN corpus, Europarl) and show that agreement-based learning often results in 2-3 BLEU zero-shot improvement over strong baselines without any loss in performance on supervised translation directions.</abstract>
       <url hash="90db10e8">N19-1121</url>
@@ -1561,12 +1561,12 @@
     <paper id="125">
       <title>Jointly Optimizing Diversity and Relevance in Neural Response Generation</title>
       <author><first>Xiang</first><last>Gao</last></author>
-      <author><first>Sungjin</first><last>Lee</last></author>
+      <author id="sungjin-lee"><first>Sungjin</first><last>Lee</last></author>
       <author><first>Yizhe</first><last>Zhang</last></author>
       <author><first>Chris</first><last>Brockett</last></author>
       <author><first>Michel</first><last>Galley</last></author>
       <author><first>Jianfeng</first><last>Gao</last></author>
-      <author><first>Bill</first><last>Dolan</last></author>
+      <author id="william-b-dolan"><first>Bill</first><last>Dolan</last></author>
       <pages>1229–1238</pages>
       <abstract>Although recent neural conversation models have shown great potential, they often generate bland and generic responses. While various approaches have been explored to diversify the output of the conversation model, the improvement often comes at the cost of decreased relevance. In this paper, we propose a SpaceFusion model to jointly optimize diversity and relevance that essentially fuses the latent space of a sequence-to-sequence model and that of an autoencoder model by leveraging novel regularization terms. As a result, our approach induces a latent space in which the distance and direction from the predicted response vector roughly match the relevance and diversity, respectively. This property also lends itself well to an intuitive visualization of the latent space. Both automatic and human evaluation results demonstrate that the proposed approach brings significant improvement compared to strong baselines in both diversity and relevance.</abstract>
       <url hash="eea2eeda">N19-1125</url>
@@ -1601,7 +1601,7 @@
     <paper id="128">
       <title><fixed-case>W</fixed-case>i<fixed-case>C</fixed-case>: the Word-in-Context Dataset for Evaluating Context-Sensitive Meaning Representations</title>
       <author><first>Mohammad Taher</first><last>Pilehvar</last></author>
-      <author><first>Jose</first><last>Camacho-Collados</last></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></author>
       <pages>1267–1273</pages>
       <abstract>By design, word embeddings are unable to model the dynamic nature of words’ semantics, i.e., the property of words to correspond to potentially different meanings. To address this limitation, dozens of specialized meaning representation techniques such as sense or contextualized embeddings have been proposed. However, despite the popularity of research on this topic, very few evaluation benchmarks exist that specifically focus on the dynamic semantics of words. In this paper we show that existing models have surpassed the performance ceiling of the standard evaluation dataset for the purpose, i.e., Stanford Contextual Word Similarity, and highlight its shortcomings. To address the lack of a suitable benchmark, we put forward a large-scale Word in Context dataset, called WiC, based on annotations curated by experts, for generic evaluation of context-sensitive representations. WiC is released in <url>https://pilehvar.github.io/wic/</url>.</abstract>
       <url hash="46292ddb">N19-1128</url>
@@ -1674,9 +1674,9 @@
     </paper>
     <paper id="134">
       <title>Adaptation of Hierarchical Structured Models for Speech Act Recognition in Asynchronous Conversation</title>
-      <author><first>Tasnim</first><last>Mohiuddin</last></author>
+      <author id="muhammad-tasnim-mohiuddin"><first>Tasnim</first><last>Mohiuddin</last></author>
       <author><first>Thanh-Tung</first><last>Nguyen</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <pages>1326–1336</pages>
       <abstract>We address the problem of speech act recognition (SAR) in asynchronous conversations (forums, emails). Unlike synchronous conversations (e.g., meetings, phone), asynchronous domains lack large labeled datasets to train an effective SAR model. In this paper, we propose methods to effectively leverage abundant unlabeled conversational data and the available labeled data from synchronous domains. We carry out our research in three main steps. First, we introduce a neural architecture based on hierarchical LSTMs and conditional random fields (CRF) for SAR, and show that our method outperforms existing methods when trained on in-domain data only. Second, we improve our initial SAR models by semi-supervised learning in the form of pretrained word embeddings learned from a large unlabeled conversational corpus. Finally, we employ adversarial training to improve the results further by leveraging the labeled data from synchronous domains and by explicitly modeling the distributional shift in two domains.</abstract>
       <url hash="11d3a4cc">N19-1134</url>
@@ -1710,7 +1710,7 @@
     <paper id="137">
       <title>Multi-Channel Convolutional Neural Network for <fixed-case>T</fixed-case>witter Emotion and Sentiment Recognition</title>
       <author><first>Jumayel</first><last>Islam</last></author>
-      <author><first>Robert E.</first><last>Mercer</last></author>
+      <author id="robert-e-mercer"><first>Robert E.</first><last>Mercer</last></author>
       <author><first>Lu</first><last>Xiao</last></author>
       <pages>1355–1365</pages>
       <abstract>The advent of micro-blogging sites has paved the way for researchers to collect and analyze huge volumes of data in recent years. Twitter, being one of the leading social networking sites worldwide, provides a great opportunity to its users for expressing their states of mind via short messages which are called tweets. The urgency of identifying emotions and sentiments conveyed through tweets has led to several research works. It provides a great way to understand human psychology and impose a challenge to researchers to analyze their content easily. In this paper, we propose a novel use of a multi-channel convolutional neural architecture which can effectively use different emotion and sentiment indicators such as hashtags, emoticons and emojis that are present in the tweets and improve the performance of emotion and sentiment identification. We also investigate the incorporation of different lexical features in the neural network model and its effect on the emotion and sentiment identification task. We analyze our model on some standard datasets and compare its effectiveness with existing techniques.</abstract>
@@ -1775,7 +1775,7 @@
       <author><first>Mareike</first><last>Hartmann</last></author>
       <author><first>Tallulah</first><last>Jansen</last></author>
       <author><first>Isabelle</first><last>Augenstein</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>1401–1407</pages>
       <abstract>In online discussion fora, speakers often make arguments for or against something, say birth control, by highlighting certain aspects of the topic. In social science, this is referred to as issue framing. In this paper, we introduce a new issue frame annotated corpus of online discussions. We explore to what extent models trained to detect issue frames in newswire and social media can be transferred to the domain of discussion fora, using a combination of multi-task and adversarial training, assuming only unlabeled training data in the target domain.</abstract>
       <url hash="526d100d">N19-1142</url>
@@ -1795,8 +1795,8 @@
     <paper id="144">
       <title>Predicting the Type and Target of Offensive Posts in Social Media</title>
       <author><first>Marcos</first><last>Zampieri</last></author>
-      <author><first>Shervin</first><last>Malmasi</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Sara</first><last>Rosenthal</last></author>
       <author><first>Noura</first><last>Farra</last></author>
       <author><first>Ritesh</first><last>Kumar</last></author>
@@ -1831,7 +1831,7 @@
     </paper>
     <paper id="147">
       <title>Relation Extraction using Explicit Context Conditioning</title>
-      <author><first>Gaurav</first><last>Singh</last></author>
+      <author id="gaurav-singh-tomar"><first>Gaurav</first><last>Singh</last></author>
       <author><first>Parminder</first><last>Bhatia</last></author>
       <pages>1442–1447</pages>
       <abstract>Relation extraction (RE) aims to label relations between groups of marked entities in raw text. Most current RE models learn context-aware representations of the target entities that are then used to establish relation between them. This works well for intra-sentence RE, and we call them first-order relations. However, this methodology can sometimes fail to capture complex and long dependencies. To address this, we hypothesize that at times the target entities can be connected via a context token. We refer to such indirect relations as second-order relations, and describe an efficient implementation for computing them. These second-order relation scores are then combined with first-order relation scores to obtain final relation scores. Our empirical results show that the proposed method leads to state-of-the-art performance over two biomedical datasets.</abstract>
@@ -1843,7 +1843,7 @@
       <title>Conversation Model Fine-Tuning for Classifying Client Utterances in Counseling Dialogues</title>
       <author><first>Sungjoon</first><last>Park</last></author>
       <author><first>Donghyun</first><last>Kim</last></author>
-      <author><first>Alice</first><last>Oh</last></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last></author>
       <pages>1448–1459</pages>
       <abstract>The recent surge of text-based online counseling applications enables us to collect and analyze interactions between counselors and clients. A dataset of those interactions can be used to learn to automatically classify the client utterances into categories that help counselors in diagnosing client status and predicting counseling outcome. With proper anonymization, we collect counselor-client dialogues, define meaningful categories of client utterances with professional counselors, and develop a novel neural network model for classifying the client utterances. The central idea of our model, ConvMFiT, is a pre-trained conversation model which consists of a general language model built from an out-of-domain corpus and two role-specific language models built from unlabeled in-domain dialogues. The classification result shows that ConvMFiT outperforms state-of-the-art comparison models. Further, the attention weights in the learned model confirm that the model finds expected linguistic patterns for each category.</abstract>
       <url hash="ec6504f4">N19-1148</url>
@@ -1852,10 +1852,10 @@
     </paper>
     <paper id="149">
       <title>Using Similarity Measures to Select Pretraining Data for <fixed-case>NER</fixed-case></title>
-      <author><first>Xiang</first><last>Dai</last></author>
+      <author id="xiang-dai"><first>Xiang</first><last>Dai</last></author>
       <author><first>Sarvnaz</first><last>Karimi</last></author>
       <author><first>Ben</first><last>Hachey</last></author>
-      <author><first>Cecile</first><last>Paris</last></author>
+      <author id="cecile-paris"><first>Cecile</first><last>Paris</last></author>
       <pages>1460–1470</pages>
       <abstract>Word vectors and Language Models (LMs) pretrained on a large amount of unlabelled data can dramatically improve various Natural Language Processing (NLP) tasks. However, the measure and impact of similarity between pretraining data and target task data are left to intuition. We propose three cost-effective measures to quantify different aspects of similarity between source pretraining and target task data. We demonstrate that these measures are good predictors of the usefulness of pretrained models for Named Entity Recognition (NER) over 30 data pairs. Results also suggest that pretrained LMs are more effective and more predictable than pretrained word vectors, but pretrained word vectors are better when pretraining data is dissimilar.</abstract>
       <url hash="29b66e78">N19-1149</url>
@@ -1867,7 +1867,7 @@
       <author><first>Yinfei</first><last>Yang</last></author>
       <author><first>Oshin</first><last>Agarwal</last></author>
       <author><first>Chris</first><last>Tar</last></author>
-      <author><first>Byron C.</first><last>Wallace</last></author>
+      <author id="byron-c-wallace"><first>Byron C.</first><last>Wallace</last></author>
       <author><first>Ani</first><last>Nenkova</last></author>
       <pages>1471–1480</pages>
       <abstract>Modern NLP systems require high-quality annotated data. For specialized domains, expert annotations may be prohibitively expensive; the alternative is to rely on crowdsourcing to reduce costs at the risk of introducing noise. In this paper we demonstrate that directly modeling instance difficulty can be used to improve model performance and to route instances to appropriate annotators. Our difficulty prediction model combines two learned representations: a ‘universal’ encoder trained on out of domain data, and a task-specific encoder. Experiments on a complex biomedical information extraction task using expert and lay annotators show that: (i) simply excluding from the training data instances predicted to be difficult yields a small boost in performance; (ii) using difficulty scores to weight instances during training provides further, consistent gains; (iii) assigning instances predicted to be difficult to domain experts is an effective strategy for task routing. Further, our experiments confirm the expectation that for such domain-specific tasks expert annotations are of much higher quality and preferable to obtain if practical and that augmenting small amounts of expert data with a larger set of lay annotations leads to further improvements in model performance.</abstract>
@@ -1880,7 +1880,7 @@
       <author><first>Mario Ezra</first><last>Aragón</last></author>
       <author><first>Adrian Pastor</first><last>López-Monroy</last></author>
       <author><first>Luis Carlos</first><last>González-Gurrola</last></author>
-      <author><first>Manuel</first><last>Montes-y-Gómez</last></author>
+      <author id="manuel-montes"><first>Manuel</first><last>Montes-y-Gómez</last></author>
       <pages>1481–1486</pages>
       <abstract>Nowadays social media platforms are the most popular way for people to share information, from work issues to personal matters. For example, people with health disorders tend to share their concerns for advice, support or simply to relieve suffering. This provides a great opportunity to proactively detect these users and refer them as soon as possible to professional help. We propose a new representation called Bag of Sub-Emotions (BoSE), which represents social media documents by a set of fine-grained emotions automatically generated using a lexical resource of emotions and subword embeddings. The proposed representation is evaluated in the task of depression detection. The results are encouraging; the usage of fine-grained emotions improved the results from a representation based on the core emotions and obtained competitive results in comparison to state of the art approaches.</abstract>
       <url hash="f426431a">N19-1151</url>
@@ -1890,8 +1890,8 @@
     <paper id="152">
       <title>A Silver Standard Corpus of Human Phenotype-Gene Relations</title>
       <author><first>Diana</first><last>Sousa</last></author>
-      <author><first>Andre</first><last>Lamurias</last></author>
-      <author><first>Francisco M.</first><last>Couto</last></author>
+      <author id="andre-lamurias"><first>Andre</first><last>Lamurias</last></author>
+      <author id="francisco-m-couto"><first>Francisco M.</first><last>Couto</last></author>
       <pages>1487–1492</pages>
       <abstract>Human phenotype-gene relations are fundamental to fully understand the origin of some phenotypic abnormalities and their associated diseases. Biomedical literature is the most comprehensive source of these relations, however, we need Relation Extraction tools to automatically recognize them. Most of these tools require an annotated corpus and to the best of our knowledge, there is no corpus available annotated with human phenotype-gene relations. This paper presents the Phenotype-Gene Relations (PGR) corpus, a silver standard corpus of human phenotype and gene annotations and their relations. The corpus consists of 1712 abstracts, 5676 human phenotype annotations, 13835 gene annotations, and 4283 relations. We generated this corpus using Named-Entity Recognition tools, whose results were partially evaluated by eight curators, obtaining a precision of 87.01%. By using the corpus we were able to obtain promising results with two state-of-the-art deep learning tools, namely 78.05% of precision. The PGR corpus was made publicly available to the research community.</abstract>
       <url hash="ac37382d">N19-1152</url>
@@ -1901,7 +1901,7 @@
     </paper>
     <paper id="153">
       <title>Improving Lemmatization of Non-Standard Languages with Joint Learning</title>
-      <author><first>Enrique</first><last>Manjavacas</last></author>
+      <author id="enrique-manjavacas"><first>Enrique</first><last>Manjavacas</last></author>
       <author><first>Ákos</first><last>Kádár</last></author>
       <author><first>Mike</first><last>Kestemont</last></author>
       <pages>1493–1503</pages>
@@ -1916,7 +1916,7 @@
       <author><first>Fahim</first><last>Dalvi</last></author>
       <author><first>Hassan</first><last>Sajjad</last></author>
       <author><first>Yonatan</first><last>Belinkov</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>1504–1516</pages>
       <abstract>Recent work has shown that contextualized word representations derived from neural machine translation are a viable alternative to such from simple word predictions tasks. This is because the internal understanding that needs to be built in order to be able to translate from one language to another is much more comprehensive. Unfortunately, computational and memory limitations as of present prevent NMT models from using large word vocabularies, and thus alternatives such as subword units (BPE and morphological segmentations) and characters have been used. Here we study the impact of using different kinds of units on the quality of the resulting representations when used to model morphology, syntax, and semantics. We found that while representations derived from subwords are slightly better for modeling syntax, character-based representations are superior for modeling morphology and are also more robust to noisy input.</abstract>
       <url hash="a141c3a4">N19-1154</url>
@@ -1949,8 +1949,8 @@
     </paper>
     <paper id="157">
       <title>Quantifying the morphosyntactic content of Brown Clusters</title>
-      <author><first>Manuel R.</first><last>Ciosici</last></author>
-      <author><first>Leon</first><last>Derczynski</last></author>
+      <author id="manuel-r-ciosici"><first>Manuel R.</first><last>Ciosici</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
       <author><first>Ira</first><last>Assent</last></author>
       <pages>1541–1550</pages>
       <abstract>Brown and Exchange word clusters have long been successfully used as word representations in Natural Language Processing (NLP) systems. Their success has been attributed to their seeming ability to represent both semantic and syntactic information. Using corpora representing several language families, we test the hypothesis that Brown and Exchange word clusters are highly effective at encoding morphosyntactic information. Our experiments show that word clusters are highly capable at distinguishing Parts of Speech. We show that increases in Average Mutual Information, the clustering algorithms’ optimization goal, are highly correlated with improvements in encoding of morphosyntactic information. Our results provide empirical evidence that downstream NLP systems addressing tasks dependent on morphosyntactic information can benefit from word cluster features.</abstract>
@@ -1962,7 +1962,7 @@
     <paper id="158">
       <title>Analyzing <fixed-case>B</fixed-case>ayesian Crosslingual Transfer in Topic Models</title>
       <author><first>Shudong</first><last>Hao</last></author>
-      <author><first>Michael J.</first><last>Paul</last></author>
+      <author id="michael-paul"><first>Michael J.</first><last>Paul</last></author>
       <pages>1551–1565</pages>
       <abstract>We introduce a theoretical analysis of crosslingual transfer in probabilistic topic models. By formulating posterior inference through Gibbs sampling as a process of language transfer, we propose a new measure that quantifies the loss of knowledge across languages during this process. This measure enables us to derive a PAC-Bayesian bound that elucidates the factors affecting model quality, both during training and in downstream applications. We provide experimental validation of the analysis on a diverse set of five languages, and discuss best practices for data collection and model design based on our analysis.</abstract>
       <url hash="3cb60757">N19-1158</url>
@@ -2053,7 +2053,7 @@
     <paper id="165">
       <title>Text Processing Like Humans Do: Visually Attacking and Shielding <fixed-case>NLP</fixed-case> Systems</title>
       <author><first>Steffen</first><last>Eger</last></author>
-      <author><first>Gözde Gül</first><last>Şahin</last></author>
+      <author id="gozde-gul-sahin"><first>Gözde Gül</first><last>Şahin</last></author>
       <author><first>Andreas</first><last>Rücklé</last></author>
       <author><first>Ji-Ung</first><last>Lee</last></author>
       <author><first>Claudia</first><last>Schulz</last></author>
@@ -2106,7 +2106,7 @@
     </paper>
     <paper id="169">
       <title>Unifying Human and Statistical Evaluation for Natural Language Generation</title>
-      <author><first>Tatsunori B.</first><last>Hashimoto</last></author>
+      <author id="tatsunori-b-hashimoto"><first>Tatsunori B.</first><last>Hashimoto</last></author>
       <author><first>Hugh</first><last>Zhang</last></author>
       <author><first>Percy</first><last>Liang</last></author>
       <pages>1689–1701</pages>
@@ -2132,7 +2132,7 @@
     <paper id="171">
       <title><fixed-case>A</fixed-case>n Empirical Investigation of Global and Local Normalization for Recurrent Neural Sequence Models Using a Continuous Relaxation to Beam Search</title>
       <author><first>Kartik</first><last>Goyal</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <author><first>Taylor</first><last>Berg-Kirkpatrick</last></author>
       <pages>1724–1733</pages>
       <abstract>Globally normalized neural sequence models are considered superior to their locally normalized equivalents because they may ameliorate the effects of label bias. However, when considering high-capacity neural parametrizations that condition on the whole input sequence, both model classes are theoretically equivalent in terms of the distributions they are capable of representing. Thus, the practical advantage of global normalization in the context of modern neural methods remains unclear. In this paper, we attempt to shed light on this problem through an empirical study. We extend an approach for search-aware training via a continuous relaxation of beam search (Goyal et al., 2017b) in order to enable training of globally normalized recurrent sequence models through simple backpropagation. We then use this technique to conduct an empirical study of the interaction between global normalization, high-capacity encoders, and search-aware optimization. We observe that in the context of inexact search, globally normalized neural models are still more effective than their locally normalized counterparts. Further, since our training approach is sensitive to warm-starting with pre-trained models, we also propose a novel initialization strategy based on self-normalization for pre-training globally normalized models. We perform analysis of our approach on two tasks: CCG supertagging and Machine Translation, and demonstrate the importance of global normalization under different conditions while using search-aware training.</abstract>
@@ -2165,7 +2165,7 @@
     <paper id="174">
       <title>Fixed That for You: Generating Contrastive Claims with Semantic Edits</title>
       <author><first>Christopher</first><last>Hidey</last></author>
-      <author><first>Kathy</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathy</first><last>McKeown</last></author>
       <pages>1756–1767</pages>
       <abstract>Understanding contrastive opinions is a key component of argument generation. Central to an argument is the claim, a statement that is in dispute. Generating a counter-argument then requires generating a response in contrast to the main claim of the original argument. To generate contrastive claims, we create a corpus of Reddit comment pairs self-labeled by posters using the acronym FTFY (fixed that for you). We then train neural models on these pairs to edit the original claim and produce a new claim with a different view. We demonstrate significant improvement over a sequence-to-sequence baseline in BLEU score and a human evaluation for fluency, coherence, and contrast.</abstract>
       <url hash="6b6a39d1">N19-1174</url>
@@ -2176,7 +2176,7 @@
       <title>Box of Lies: Multimodal Deception Detection in Dialogues</title>
       <author><first>Felix</first><last>Soldner</last></author>
       <author><first>Verónica</first><last>Pérez-Rosas</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>1768–1777</pages>
       <abstract>Deception often takes place during everyday conversations, yet conversational dialogues remain largely unexplored by current work on automatic deception detection. In this paper, we address the task of detecting multimodal deceptive cues during conversational dialogues. We introduce a multimodal dataset containing deceptive conversations between participants playing the Box of Lies game from The Tonight Show Starring Jimmy Fallon, in which they try to guess whether an object description provided by their opponent is deceptive or not. We conduct annotations of multimodal communication behaviors, including facial and linguistic behaviors, and derive several learning features based on these annotations. Initial classification experiments show promising results, performing well above both a random and a human baseline, and reaching up to 69% accuracy in distinguishing deceptive and truthful behaviors.</abstract>
       <url hash="b8791133">N19-1175</url>
@@ -2185,12 +2185,12 @@
     </paper>
     <paper id="176">
       <title>A Crowdsourced Corpus of Multiple Judgments and Disagreement on Anaphoric Interpretation</title>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <author><first>Jon</first><last>Chamberlain</last></author>
       <author><first>Silviu</first><last>Paun</last></author>
       <author><first>Juntao</first><last>Yu</last></author>
       <author><first>Alexandra</first><last>Uma</last></author>
-      <author><first>Udo</first><last>Kruschwitz</last></author>
+      <author id="udo-kruschwitz"><first>Udo</first><last>Kruschwitz</last></author>
       <pages>1778–1789</pages>
       <abstract>We present a corpus of anaphoric information (coreference) crowdsourced through a game-with-a-purpose. The corpus, containing annotations for about 108,000 markables, is one of the largest corpora for coreference for English, and one of the largest crowdsourced NLP corpora, but its main feature is the large number of judgments per markable: 20 on average, and over 2.2M in total. This characteristic makes the corpus a unique resource for the study of disagreements on anaphoric interpretation. A second distinctive feature is its rich annotation scheme, covering singletons, expletives, and split-antecedent plurals. Finally, the corpus also comes with labels inferred using a recently proposed probabilistic model of annotation for coreference. The labels are of high quality and make it possible to successfully train a state of the art coreference resolver, including training on singletons and non-referring expressions. The annotation model can also result in more than one label, or no label, being proposed for a markable, thus serving as a baseline method for automatically identifying ambiguous markables. A preliminary analysis of the results is presented.</abstract>
       <url hash="2cc57360">N19-1176</url>
@@ -2222,7 +2222,7 @@
     <paper id="179">
       <title>Modeling Document-level Causal Structures for Event Causal Relation Identification</title>
       <author><first>Lei</first><last>Gao</last></author>
-      <author><first>Prafulla Kumar</first><last>Choubey</last></author>
+      <author id="prafulla-kumar-choubey"><first>Prafulla Kumar</first><last>Choubey</last></author>
       <author><first>Ruihong</first><last>Huang</last></author>
       <pages>1808–1817</pages>
       <abstract>We aim to comprehensively identify all the event causal relations in a document, both within a sentence and across sentences, which is important for reconstructing pivotal event structures. The challenges we identified are two: 1) event causal relations are sparse among all possible event pairs in a document, in addition, 2) few causal relations are explicitly stated. Both challenges are especially true for identifying causal relations between events across sentences. To address these challenges, we model rich aspects of document-level causal structures for achieving comprehensive causal relation identification. The causal structures include heavy involvements of document-level main events in causal relations as well as several types of fine-grained constraints that capture implications from certain sentential syntactic relations and discourse relations as well as interactions between event causal relations and event coreference relations. Our experimental results show that modeling the global and fine-grained aspects of causal structures using Integer Linear Programming (ILP) greatly improves the performance of causal relation identification, especially in identifying cross-sentence causal relations.</abstract>
@@ -2296,9 +2296,9 @@
     </paper>
     <paper id="186">
       <title>Word Embedding-Based Automatic <fixed-case>MT</fixed-case> Evaluation Metric using Word Position Information</title>
-      <author><first>Hiroshi</first><last>Echizen’ya</last></author>
-      <author><first>Kenji</first><last>Araki</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="hiroshi-echizen-ya"><first>Hiroshi</first><last>Echizen’ya</last></author>
+      <author id="kenji-araki"><first>Kenji</first><last>Araki</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>1874–1883</pages>
       <abstract>We propose a new automatic evaluation metric for machine translation. Our proposed metric is obtained by adjusting the Earth Mover’s Distance (EMD) to the evaluation task. The EMD measure is used to obtain the distance between two probability distributions consisting of some signatures having a feature and a weight. We use word embeddings, sentence-level tf-idf, and cosine similarity between two word embeddings, respectively, as the features, weight, and the distance between two features. Results show that our proposed metric can evaluate machine translation based on word meaning. Moreover, for distance, cosine similarity and word position information are used to address word-order differences. We designate this metric as Word Embedding-Based automatic MT evaluation using Word Position Information (WE_WPI). A meta-evaluation using WMT16 metrics shared task set indicates that our WE_WPI achieves the highest correlation with human judgment among several representative metrics.</abstract>
       <url hash="936ef680">N19-1186</url>
@@ -2321,7 +2321,7 @@
       <author><first>Geert</first><last>Heyman</last></author>
       <author><first>Bregt</first><last>Verreet</last></author>
       <author><first>Ivan</first><last>Vulić</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>1890–1902</pages>
       <abstract>Recent research has discovered that a shared bilingual word embedding space can be induced by projecting monolingual word embedding spaces from two languages using a self-learning paradigm without any bilingual supervision. However, it has also been shown that for distant language pairs such fully unsupervised self-learning methods are unstable and often get stuck in poor local optima due to reduced isomorphism between starting monolingual spaces. In this work, we propose a new robust framework for learning unsupervised multilingual word embeddings that mitigates the instability issues. We learn a shared multilingual embedding space for a variable number of languages by incrementally adding new languages one by one to the current multilingual space. Through the gradual language addition the method can leverage the interdependencies between the new language and all other languages in the current multilingual space. We find that it is beneficial to project more distant languages later in the iterative process. Our fully unsupervised multilingual embedding spaces yield results that are on par with the state-of-the-art methods in the bilingual lexicon induction (BLI) task, and simultaneously obtain state-of-the-art scores on two downstream tasks: multilingual document classification and multilingual dependency parsing, outperforming even supervised baselines. This finding also accentuates the need to establish evaluation protocols for cross-lingual word embeddings beyond the omnipresent intrinsic BLI task in future work.</abstract>
       <url hash="e71e7891">N19-1188</url>
@@ -2369,8 +2369,8 @@
       <author><first>Hao-Ran</first><last>Wei</last></author>
       <author><first>Shujian</first><last>Huang</last></author>
       <author><first>Ran</first><last>Wang</last></author>
-      <author><first>Xin-yu</first><last>Dai</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
+      <author id="xinyu-dai"><first>Xin-yu</first><last>Dai</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
       <pages>1932–1941</pages>
       <abstract>Current predominant neural machine translation (NMT) models often have a deep structure with large amounts of parameters, making these models hard to train and easily suffering from over-fitting. A common practice is to utilize a validation set to evaluate the training process and select the best checkpoint. Average and ensemble techniques on checkpoints can lead to further performance improvement. However, as these methods do not affect the training process, the system performance is restricted to the checkpoints generated in original training procedure. In contrast, we propose an online knowledge distillation method. Our method on-the-fly generates a teacher model from checkpoints, guiding the training process to obtain better performance. Experiments on several datasets and language pairs show steady improvement over a strong self-attention-based baseline system. We also provide analysis on data-limited setting against over-fitting. Furthermore, our method leads to an improvement in a machine reading experiment as well.</abstract>
       <url hash="20d76f06">N19-1192</url>
@@ -2437,7 +2437,7 @@
       <author><first>Soham</first><last>Ghosh</last></author>
       <author><first>Anuva</first><last>Agarwal</last></author>
       <author><first>Zarana</first><last>Parekh</last></author>
-      <author><first>Alexander</first><last>Hauptmann</last></author>
+      <author id="alexander-g-hauptmann"><first>Alexander</first><last>Hauptmann</last></author>
       <pages>1984–1990</pages>
       <abstract>The task of retrieving clips within videos based on a given natural language query requires cross-modal reasoning over multiple frames. Prior approaches such as sliding window classifiers are inefficient, while text-clip similarity driven ranking-based approaches such as segment proposal networks are far more complicated. In order to select the most relevant video clip corresponding to the given text description, we propose a novel extractive approach that predicts the start and end frames by leveraging cross-modal interactions between the text and video - this removes the need to retrieve and re-rank multiple proposal segments. Using recurrent networks we encode the two modalities into a joint representation which is then used in different variants of start-end frame predictor networks. Through extensive experimentation and ablative analysis, we demonstrate that our simple and elegant approach significantly outperforms state of the art on two datasets and has comparable performance on a third.</abstract>
       <url hash="d500f7ef">N19-1198</url>
@@ -2471,8 +2471,8 @@
     <paper id="201">
       <title>Subword-Level Language Identification for Intra-Word Code-Switching</title>
       <author><first>Manuel</first><last>Mager</last></author>
-      <author><first>Özlem</first><last>Çetinoğlu</last></author>
-      <author><first>Katharina</first><last>Kann</last></author>
+      <author id="ozlem-cetinoglu"><first>Özlem</first><last>Çetinoğlu</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
       <pages>2005–2011</pages>
       <abstract>Language identification for code-switching (CS), the phenomenon of alternating between two or more languages in conversations, has traditionally been approached under the assumption of a single language per token. However, if at least one language is morphologically rich, a large number of words can be composed of morphemes from more than one language (intra-word CS). In this paper, we extend the language identification task to the subword-level, such that it includes splitting mixed words while tagging each part with a language ID. We further propose a model for this task, which is based on a segmental recurrent neural network. In experiments on a new Spanish–Wixarika dataset and on an adapted German–Turkish dataset, our proposed model performs slightly better than or roughly on par with our best baseline, respectively. Considering only mixed words, however, it strongly outperforms all baselines.</abstract>
       <url hash="0e3ece6f">N19-1201</url>
@@ -2482,10 +2482,10 @@
     </paper>
     <paper id="202">
       <title><fixed-case>M</fixed-case>u<fixed-case>ST</fixed-case>-<fixed-case>C</fixed-case>: a <fixed-case>M</fixed-case>ultilingual <fixed-case>S</fixed-case>peech <fixed-case>T</fixed-case>ranslation <fixed-case>C</fixed-case>orpus</title>
-      <author><first>Mattia A.</first><last>Di Gangi</last></author>
+      <author id="mattia-a-di-gangi"><first>Mattia A.</first><last>Di Gangi</last></author>
       <author><first>Roldano</first><last>Cattoni</last></author>
       <author><first>Luisa</first><last>Bentivogli</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <pages>2012–2017</pages>
       <abstract>Current research on spoken language translation (SLT) has to confront with the scarcity of sizeable and publicly available training corpora. This problem hinders the adoption of neural end-to-end approaches, which represent the state of the art in the two parent tasks of SLT: automatic speech recognition and machine translation. To fill this gap, we created MuST-C, a multilingual speech translation corpus whose size and quality will facilitate the training of end-to-end systems for SLT from English into 8 languages. For each target language, MuST-C comprises at least 385 hours of audio recordings from English TED Talks, which are automatically aligned at the sentence level with their manual transcriptions and translations. Together with a description of the corpus creation methodology (scalable to add new data and cover new languages), we provide an empirical verification of its quality and SLT results computed with a state-of-the-art approach on each language direction.</abstract>
@@ -2499,9 +2499,9 @@
       <title>Contextualization of Morphological Inflection</title>
       <author><first>Ekaterina</first><last>Vylomova</last></author>
       <author><first>Ryan</first><last>Cotterell</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>2018–2024</pages>
       <abstract>Critical to natural language generation is the production of correctly inflected text. In this paper, we isolate the task of predicting a fully inflected sentence from its partially lemmatized version. Unlike traditional morphological inflection or surface realization, our task input does not provide “gold” tags that specify what morphological features to realize on each lemmatized word; rather, such features must be inferred from sentential context. We develop a neural hybrid graphical model that explicitly reconstructs morphological features before predicting the inflected forms, and compare this to a system that directly predicts the inflected forms without relying on any morphological annotation. We experiment on several typologically diverse languages from the Universal Dependencies treebanks, showing the utility of incorporating linguistically-motivated latent variables into NLP models.</abstract>
       <url hash="0e455fa5">N19-1203</url>
@@ -2513,7 +2513,7 @@
       <title>A Robust Abstractive System for Cross-Lingual Summarization</title>
       <author><first>Jessica</first><last>Ouyang</last></author>
       <author><first>Boya</first><last>Song</last></author>
-      <author><first>Kathy</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathy</first><last>McKeown</last></author>
       <pages>2025–2031</pages>
       <abstract>We present a robust neural abstractive summarization system for cross-lingual summarization. We construct summarization corpora for documents automatically translated from three low-resource languages, Somali, Swahili, and Tagalog, using machine translation and the New York Times summarization corpus. We train three language-specific abstractive summarizers and evaluate on documents originally written in the source languages, as well as on a fourth, unseen language: Arabic. Our systems achieve significantly higher fluency than a standard copy-attention summarizer on automatically translated input documents, as well as comparable content selection.</abstract>
       <url hash="205d9c46">N19-1204</url>
@@ -2525,8 +2525,8 @@
       <author><first>Chunpeng</first><last>Ma</last></author>
       <author><first>Akihiro</first><last>Tamura</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <pages>2032–2037</pages>
       <abstract>The explicit use of syntactic information has been proved useful for neural machine translation (NMT). However, previous methods resort to either tree-structured neural networks or long linearized sequences, both of which are inefficient. Neural syntactic distance (NSD) enables us to represent a constituent tree using a sequence whose length is identical to the number of words in the sentence. NSD has been used for constituent parsing, but not in machine translation. We propose five strategies to improve NMT with NSD. Experiments show that it is not trivial to improve NMT with NSD; however, the proposed strategies are shown to improve translation performance of the baseline model (+2.1 (En–Ja), +1.3 (Ja–En), +1.2 (En–Ch), and +1.0 (Ch–En) BLEU).</abstract>
       <url hash="f0f97aea">N19-1205</url>
@@ -2590,8 +2590,8 @@
     <paper id="210">
       <title>Short-Term Meaning Shift: A Distributional Exploration</title>
       <author><first>Marco</first><last>Del Tredici</last></author>
-      <author><first>Raquel</first><last>Fernández</last></author>
-      <author><first>Gemma</first><last>Boleda</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
+      <author id="gemma-boleda"><first>Gemma</first><last>Boleda</last></author>
       <pages>2069–2075</pages>
       <abstract>We present the first exploration of meaning shift over short periods of time in online communities using distributional representations. We create a small annotated dataset and use it to assess the performance of a standard model for meaning shift detection on short-term meaning shift. We find that the model has problems distinguishing meaning shift from referential phenomena, and propose a measure of contextual variability to remedy this.</abstract>
       <url hash="eb31c2a1">N19-1210</url>
@@ -2663,8 +2663,8 @@
       <author><first>Ramy</first><last>Baly</last></author>
       <author><first>Georgi</first><last>Karadzhov</last></author>
       <author><first>Abdelrhman</first><last>Saleh</last></author>
-      <author><first>James</first><last>Glass</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>2109–2116</pages>
       <abstract>In the context of fake news, bias, and propaganda, we study two important but relatively under-explored problems: (i) trustworthiness estimation (on a 3-point scale) and (ii) political ideology detection (left/right bias on a 7-point scale) of entire news outlets, as opposed to evaluating individual articles. In particular, we propose a multi-task ordinal regression framework that models the two problems jointly. This is motivated by the observation that hyper-partisanship is often linked to low trustworthiness, e.g., appealing to emotions rather than sticking to the facts, while center media tend to be generally more impartial and trustworthy. We further use several auxiliary tasks, modeling centrality, hyper-partisanship, as well as left-vs.-right bias on a coarse-grained scale. The evaluation results show sizable performance gains by the joint models over models that target the problems in isolation.</abstract>
       <url hash="b7d21ab8">N19-1216</url>
@@ -2757,7 +2757,7 @@
       <title>A Crowdsourced Frame Disambiguation Corpus with Ambiguity</title>
       <author><first>Anca</first><last>Dumitrache</last></author>
       <author><first>Lora</first><last>Aroyo</last></author>
-      <author><first>Chris</first><last>Welty</last></author>
+      <author id="chris-welty"><first>Chris</first><last>Welty</last></author>
       <pages>2164–2170</pages>
       <abstract>We present a resource for the task of FrameNet semantic frame disambiguation of over 5,000 word-sentence pairs from the Wikipedia corpus. The annotations were collected using a novel crowdsourcing approach with multiple workers per sentence to capture inter-annotator disagreement. In contrast to the typical approach of attributing the best single frame to each word, we provide a list of frames with disagreement-based scores that express the confidence with which each frame applies to the word. This is based on the idea that inter-annotator disagreement is at least partly caused by ambiguity that is inherent to the text and frames. We have found many examples where the semantics of individual frames overlap sufficiently to make them acceptable alternatives for interpreting a sentence. We have argued that ignoring this ambiguity creates an overly arbitrary target for training and evaluating natural language processing systems - if humans cannot agree, why would we expect the correct answer from a machine to be any different? To process this data we also utilized an expanded lemma-set provided by the Framester system, which merges FN with WordNet to enhance coverage. Our dataset includes annotations of 1,000 sentence-word pairs whose lemmas are not part of FN. Finally we present metrics for evaluating frame disambiguation systems that account for ambiguity.</abstract>
       <url hash="bdae8c83">N19-1224</url>
@@ -2769,9 +2769,9 @@
     </paper>
     <paper id="225">
       <title>Inoculation by Fine-Tuning: A Method for Analyzing Challenge Datasets</title>
-      <author><first>Nelson F.</first><last>Liu</last></author>
+      <author id="nelson-f-liu"><first>Nelson F.</first><last>Liu</last></author>
       <author><first>Roy</first><last>Schwartz</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>2171–2179</pages>
       <abstract>Several datasets have recently been constructed to expose brittleness in models trained on existing benchmarks. While model performance on these challenge datasets is significantly lower compared to the original benchmark, it is unclear what particular weaknesses they reveal. For example, a challenge dataset may be difficult because it targets phenomena that current models cannot capture, or because it simply exploits blind spots in a model’s specific training set. We introduce inoculation by fine-tuning, a new analysis method for studying challenge datasets by exposing models (the metaphorical patient) to a small amount of data from the challenge dataset (a metaphorical pathogen) and assessing how well they can adapt. We apply our method to analyze the NLI “stress tests” (Naik et al., 2018) and the Adversarial SQuAD dataset (Jia and Liang, 2017). We show that after slight exposure, some of these datasets are no longer challenging, while others remain difficult. Our results indicate that failures on challenge datasets may lead to very different conclusions about models, training datasets, and the challenge datasets themselves.</abstract>
       <url hash="47a6a8b5">N19-1225</url>
@@ -2851,7 +2851,7 @@
       <author><first>Yukun</first><last>Feng</last></author>
       <author><first>Brian</first><last>Joseph</last></author>
       <author><first>Béatrice</first><last>Joyeux-Prunel</last></author>
-      <author><first>Marie-Catherine</first><last>de Marneffe</last></author>
+      <author id="marie-catherine-de-marneffe"><first>Marie-Catherine</first><last>de Marneffe</last></author>
       <pages>2223–2234</pages>
       <abstract>Scholars in inter-disciplinary fields like the Digital Humanities are increasingly interested in semantic annotation of specialized corpora. Yet, under-resourced languages, imperfect or noisily structured data, and user-specific classification tasks make it difficult to meet their needs using off-the-shelf models. Manual annotation of large corpora from scratch, meanwhile, can be prohibitively expensive. Thus, we propose an active learning solution for named entity recognition, attempting to maximize a custom model’s improvement per additional unit of manual annotation. Our system robustly handles any domain or user-defined label set and requires no external resources, enabling quality named entity recognition for Humanities corpora where such resources are not available. Evaluating on typologically disparate languages and datasets, we reduce required annotation by 20-60% and greatly outperform a competitive active learning baseline.</abstract>
       <url hash="edd74903">N19-1231</url>
@@ -2896,8 +2896,8 @@
       <title>Neural Text Generation from Rich Semantic Representations</title>
       <author><first>Valerie</first><last>Hajdik</last></author>
       <author><first>Jan</first><last>Buys</last></author>
-      <author><first>Michael Wayne</first><last>Goodman</last></author>
-      <author><first>Emily M.</first><last>Bender</last></author>
+      <author id="michael-wayne-goodman"><first>Michael Wayne</first><last>Goodman</last></author>
+      <author id="emily-m-bender"><first>Emily M.</first><last>Bender</last></author>
       <pages>2259–2266</pages>
       <abstract>We propose neural models to generate high-quality text from structured representations based on Minimal Recursion Semantics (MRS). MRS is a rich semantic representation that encodes more precise semantic detail than other representations such as Abstract Meaning Representation (AMR). We show that a sequence-to-sequence model that maps a linearization of Dependency MRS, a graph-based representation of MRS, to text can achieve a BLEU score of 66.11 when trained on gold data. The performance of the model can be improved further using a high-precision, broad coverage grammar-based parser to generate a large silver training corpus, achieving a final BLEU score of 77.17 on the full test set, and 83.37 on the subset of test data most closely matching the silver data domain. Our results suggest that MRS-based representations are a good choice for applications that need both structured semantics and the ability to produce natural language text as output.</abstract>
       <url hash="d8b217c5">N19-1235</url>
@@ -2930,7 +2930,7 @@
     </paper>
     <paper id="238">
       <title><fixed-case>T</fixed-case>ext <fixed-case>G</fixed-case>eneration from <fixed-case>K</fixed-case>nowledge <fixed-case>G</fixed-case>raphs with <fixed-case>G</fixed-case>raph <fixed-case>T</fixed-case>ransformers</title>
-      <author><first>Rik</first><last>Koncel-Kedziorski</last></author>
+      <author id="rik-koncel-kedziorski"><first>Rik</first><last>Koncel-Kedziorski</last></author>
       <author><first>Dhanush</first><last>Bekal</last></author>
       <author><first>Yi</first><last>Luan</last></author>
       <author><first>Mirella</first><last>Lapata</last></author>
@@ -2944,7 +2944,7 @@
     <paper id="239">
       <title>Open Information Extraction from Question-Answer Pairs</title>
       <author><first>Nikita</first><last>Bhutani</last></author>
-      <author><first>Yoshihiko</first><last>Suhara</last></author>
+      <author id="yoshi-suhara"><first>Yoshihiko</first><last>Suhara</last></author>
       <author><first>Wang-Chiew</first><last>Tan</last></author>
       <author><first>Alon</first><last>Halevy</last></author>
       <author><first>H. V.</first><last>Jagadish</last></author>
@@ -2979,7 +2979,7 @@
       <author><first>Hu</first><last>Xu</last></author>
       <author><first>Bing</first><last>Liu</last></author>
       <author><first>Lei</first><last>Shu</last></author>
-      <author><first>Philip</first><last>Yu</last></author>
+      <author id="philip-s-yu"><first>Philip</first><last>Yu</last></author>
       <pages>2324–2335</pages>
       <abstract>Question-answering plays an important role in e-commerce as it allows potential customers to actively seek crucial information about products or services to help their purchase decision making. Inspired by the recent success of machine reading comprehension (MRC) on formal documents, this paper explores the potential of turning customer reviews into a large source of knowledge that can be exploited to answer user questions. We call this problem Review Reading Comprehension (RRC). To the best of our knowledge, no existing work has been done on RRC. In this work, we first build an RRC dataset called ReviewRC based on a popular benchmark for aspect-based sentiment analysis. Since ReviewRC has limited training examples for RRC (and also for aspect-based sentiment analysis), we then explore a novel post-training approach on the popular language model BERT to enhance the performance of fine-tuning of BERT for RRC. To show the generality of the approach, the proposed post-training is also applied to some other review-based tasks such as aspect extraction and aspect sentiment classification in aspect-based sentiment analysis. Experimental results demonstrate that the proposed post-training is highly effective.</abstract>
       <url hash="cf993df7">N19-1242</url>
@@ -3004,12 +3004,12 @@
     <paper id="244">
       <title>Be Consistent! Improving Procedural Text Comprehension using Label Consistency</title>
       <author><first>Xinya</first><last>Du</last></author>
-      <author><first>Bhavana</first><last>Dalvi</last></author>
+      <author id="bhavana-dalvi"><first>Bhavana</first><last>Dalvi</last></author>
       <author><first>Niket</first><last>Tandon</last></author>
       <author><first>Antoine</first><last>Bosselut</last></author>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <author><first>Peter</first><last>Clark</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>2347–2356</pages>
       <abstract>Our goal is procedural text comprehension, namely tracking how the properties of entities (e.g., their location) change with time given a procedural text (e.g., a paragraph about photosynthesis, a recipe). This task is challenging as the world is changing throughout the text, and despite recent advances, current systems still struggle with this task. Our approach is to leverage the fact that, for many procedural texts, multiple independent descriptions are readily available, and that predictions from them should be consistent (label consistency). We present a new learning framework that leverages label consistency during training, allowing consistency bias to be built into the model. Evaluation on a standard benchmark dataset for procedural text, ProPara (Dalvi et al., 2018), shows that our approach significantly improves prediction performance (F1) over prior state-of-the-art systems.</abstract>
       <url hash="eba535ac">N19-1244</url>
@@ -3021,7 +3021,7 @@
       <author><first>Aida</first><last>Amini</last></author>
       <author><first>Saadia</first><last>Gabriel</last></author>
       <author><first>Shanchuan</first><last>Lin</last></author>
-      <author><first>Rik</first><last>Koncel-Kedziorski</last></author>
+      <author id="rik-koncel-kedziorski"><first>Rik</first><last>Koncel-Kedziorski</last></author>
       <author><first>Yejin</first><last>Choi</last></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last></author>
       <pages>2357–2367</pages>
@@ -3047,7 +3047,7 @@
     </paper>
     <paper id="247">
       <title>An Encoding Strategy Based Word-Character <fixed-case>LSTM</fixed-case> for <fixed-case>C</fixed-case>hinese <fixed-case>NER</fixed-case></title>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last></author>
+      <author><first>Wei</first><last>Liu</last></author>
       <author><first>Tongge</first><last>Xu</last></author>
       <author><first>Qinghua</first><last>Xu</last></author>
       <author><first>Jiayu</first><last>Song</last></author>
@@ -3076,7 +3076,7 @@
       <title><fixed-case>SC</fixed-case>-<fixed-case>LSTM</fixed-case>: Learning Task-Specific Representations in Multi-Task Learning for Sequence Labeling</title>
       <author><first>Peng</first><last>Lu</last></author>
       <author><first>Ting</first><last>Bai</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <pages>2396–2406</pages>
       <abstract>Multi-task learning (MTL) has been studied recently for sequence labeling. Typically, auxiliary tasks are selected specifically in order to improve the performance of a target task. Jointly learning multiple tasks in a way that benefit all of them simultaneously can increase the utility of MTL. In order to do so, we propose a new LSTM cell which contains both shared parameters that can learn from all tasks, and task-specific parameters that can learn task-specific information. We name it a Shared-Cell Long-Short Term Memory SC-LSTM. Experimental results on three sequence labeling benchmarks (named-entity recognition, text chunking, and part-of-speech tagging) demonstrate the effectiveness of our SC-LSTM cell.</abstract>
       <url hash="3475d65c">N19-1249</url>
@@ -3099,7 +3099,7 @@
       <author><first>Ophélie</first><last>Lacroix</last></author>
       <author><first>Marek</first><last>Rei</last></author>
       <author><first>Helen</first><last>Yannakoudakis</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>2418–2427</pages>
       <abstract>While rule-based detection of subject-verb agreement (SVA) errors is sensitive to syntactic parsing errors and irregularities and exceptions to the main rules, neural sequential labelers have a tendency to overfit their training data. We observe that rule-based error generation is less sensitive to syntactic parsing errors and irregularities than error detection and explore a simple, yet efficient approach to getting the best of both worlds: We train neural sequential labelers on the combination of large volumes of silver standard data, obtained through rule-based error generation, and gold standard data. We show that our simple protocol leads to more robust detection of SVA errors on both in-domain and out-of-domain data, as well as in the context of other errors and long-distance dependencies; and across four standard benchmarks, the induced model on average achieves a new state of the art.</abstract>
       <url hash="827b874e">N19-1251</url>
@@ -3120,10 +3120,10 @@
     </paper>
     <paper id="253">
       <title>On Difficulties of Cross-Lingual Transfer with Order Differences: A Case Study on Dependency Parsing</title>
-      <author><first>Wasi</first><last>Ahmad</last></author>
+      <author id="wasi-ahmad"><first>Wasi</first><last>Ahmad</last></author>
       <author><first>Zhisong</first><last>Zhang</last></author>
       <author><first>Xuezhe</first><last>Ma</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <author><first>Kai-Wei</first><last>Chang</last></author>
       <author><first>Nanyun</first><last>Peng</last></author>
       <pages>2440–2452</pages>
@@ -3150,7 +3150,7 @@
       <author><first>Hong-You</first><last>Chen</last></author>
       <author><first>Chin-Hua</first><last>Hu</last></author>
       <author><first>Leila</first><last>Wehbe</last></author>
-      <author><first>Shou-De</first><last>Lin</last></author>
+      <author id="shou-de-lin"><first>Shou-De</first><last>Lin</last></author>
       <pages>2465–2474</pages>
       <abstract>Unsupervised document representation learning is an important task providing pre-trained features for NLP applications. Unlike most previous work which learn the embedding based on self-prediction of the surface of text, we explicitly exploit the inter-document information and directly model the relations of documents in embedding space with a discriminative network and a novel objective. Extensive experiments on both small and large public datasets show the competitiveness of the proposed method. In evaluations on standard document classification, our model has errors that are 5 to 13% lower than state-of-the-art unsupervised embedding models. The reduction in error is even more pronounced in scarce label setting.</abstract>
       <url hash="2f2582a4">N19-1255</url>
@@ -3173,7 +3173,7 @@
     <paper id="257">
       <title><fixed-case>Z</fixed-case>ero-Shot Cross-Lingual Opinion Target Extraction</title>
       <author><first>Soufian</first><last>Jebbara</last></author>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <pages>2486–2495</pages>
       <abstract>Aspect-based sentiment analysis involves the recognition of so called opinion target expressions (OTEs). To automatically extract OTEs, supervised learning algorithms are usually employed which are trained on manually annotated corpora. The creation of these corpora is labor-intensive and sufficiently large datasets are therefore usually only available for a very narrow selection of languages and domains. In this work, we address the lack of available annotated data for specific languages by proposing a zero-shot cross-lingual approach for the extraction of opinion target expressions. We leverage multilingual word embeddings that share a common vector space across various languages and incorporate these into a convolutional neural network architecture for OTE extraction. Our experiments with 5 languages give promising results: We can successfully train a model on annotated data of a source language and perform accurate prediction on a target language without ever using any annotated samples in that target language. Depending on the source and target language pairs, we reach performances in a zero-shot regime of up to 77% of a model trained on target language data. Furthermore, we can increase this performance up to 87% of a baseline model trained on target language data by performing cross-lingual learning from multiple source languages.</abstract>
       <url hash="277f240f">N19-1257</url>
@@ -3199,9 +3199,9 @@
       <title>Target-oriented Opinion Words Extraction with Target-fused Neural Sequence Labeling</title>
       <author><first>Zhifang</first><last>Fan</last></author>
       <author><first>Zhen</first><last>Wu</last></author>
-      <author><first>Xin-Yu</first><last>Dai</last></author>
+      <author id="xinyu-dai"><first>Xin-Yu</first><last>Dai</last></author>
       <author><first>Shujian</first><last>Huang</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
       <pages>2509–2518</pages>
       <abstract>Opinion target extraction and opinion words extraction are two fundamental subtasks in Aspect Based Sentiment Analysis (ABSA). Recently, many methods have made progress on these two tasks. However, few works aim at extracting opinion targets and opinion words as pairs. In this paper, we propose a novel sequence labeling subtask for ABSA named TOWE (Target-oriented Opinion Words Extraction), which aims at extracting the corresponding opinion words for a given opinion target. A target-fused sequence labeling neural network model is designed to perform this task. The opinion target information is well encoded into context by an Inward-Outward LSTM. Then left and right contexts of the opinion target and the global context are combined to find the corresponding opinion words. We build four datasets for TOWE based on several popular ABSA benchmarks from laptop and restaurant reviews. The experimental results show that our proposed model outperforms the other compared methods significantly. We believe that our work may not only be helpful for downstream sentiment analysis task, but can also be used for pair-wise opinion summarization.</abstract>
       <url hash="6eb42bf6">N19-1259</url>
@@ -3224,7 +3224,7 @@
       <title>Automatic learner summary assessment for reading comprehension</title>
       <author><first>Menglin</first><last>Xia</last></author>
       <author><first>Ekaterina</first><last>Kochmar</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <pages>2532–2542</pages>
       <abstract>Automating the assessment of learner summary provides a useful tool for assessing learner reading comprehension. We present a summarization task for evaluating non-native reading comprehension and propose three novel approaches to automatically assess the learner summaries. We evaluate our models on two datasets we created and show that our models outperform traditional approaches that rely on exact word match on this task. Our best model produces quality assessments close to professional examiners.</abstract>
       <url hash="01290ebe">N19-1261</url>
@@ -3244,7 +3244,7 @@
     <paper id="263">
       <title>Text Generation with Exemplar-based Adaptive Decoding</title>
       <author><first>Hao</first><last>Peng</last></author>
-      <author><first>Ankur</first><last>Parikh</last></author>
+      <author id="ankur-parikh"><first>Ankur</first><last>Parikh</last></author>
       <author><first>Manaal</first><last>Faruqui</last></author>
       <author><first>Bhuwan</first><last>Dhingra</last></author>
       <author><first>Dipanjan</first><last>Das</last></author>
@@ -3272,9 +3272,9 @@
       <author><first>Aashish</first><last>Venkatesh</last></author>
       <author><first>Tim</first><last>Baumgärtner</last></author>
       <author><first>Elia</first><last>Bruni</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
-      <author><first>Raffaella</first><last>Bernardi</last></author>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
+      <author id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <pages>2578–2587</pages>
       <abstract>We propose a grounded dialogue state encoder which addresses a foundational issue on how to integrate visual grounding with dialogue system components. As a test-bed, we focus on the GuessWhat?! game, a two-player game where the goal is to identify an object in a complex visual scene by asking a sequence of yes/no questions. Our visually-grounded encoder leverages synergies between guessing and asking questions, as it is trained jointly using multi-task learning. We further enrich our model via a cooperative learning regime. We show that the introduction of both the joint architecture and cooperative learning lead to accuracy improvements over the baseline system. We compare our approach to an alternative system which extends the baseline with reinforcement learning. Our in-depth analysis shows that the linguistic skills of the two models differ dramatically, despite approaching comparable performance levels. This points at the importance of analyzing the linguistic output of competing systems beyond numeric comparison solely based on task success.</abstract>
       <url hash="6785731d">N19-1265</url>
@@ -3334,7 +3334,7 @@
       <author><first>Kai</first><last>Sun</last></author>
       <author><first>Dian</first><last>Yu</last></author>
       <author><first>Dong</first><last>Yu</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>2633–2643</pages>
       <abstract>Reading strategies have been shown to improve comprehension levels, especially for readers lacking adequate prior knowledge. Just as the process of knowledge accumulation is time-consuming for human readers, it is resource-demanding to impart rich general domain knowledge into a deep language model via pre-training. Inspired by reading strategies identified in cognitive science, and given limited computational resources - just a pre-trained model and a fixed number of training instances - we propose three general strategies aimed to improve non-extractive machine reading comprehension (MRC): (i) BACK AND FORTH READING that considers both the original and reverse order of an input sequence, (ii) HIGHLIGHTING, which adds a trainable embedding to the text embedding of tokens that are relevant to the question and candidate answers, and (iii) SELF-ASSESSMENT that generates practice questions and candidate answers directly from the text in an unsupervised manner. By fine-tuning a pre-trained language model (Radford et al., 2018) with our proposed strategies on the largest general domain multiple-choice MRC dataset RACE, we obtain a 5.8% absolute increase in accuracy over the previous best result achieved by the same pre-trained model fine-tuned on RACE without the use of strategies. We further fine-tune the resulting model on a target MRC task, leading to an absolute improvement of 6.2% in average accuracy over previous state-of-the-art approaches on six representative non-extractive MRC datasets from different domains (i.e., ARC, OpenBookQA, MCTest, SemEval-2018 Task 11, ROCStories, and MultiRC). These results demonstrate the effectiveness of our proposed strategies and the versatility and general applicability of our fine-tuned models that incorporate these strategies. Core code is available at <url>https://github.com/nlpdata/strategy/</url>.</abstract>
       <url hash="164121b2">N19-1270</url>
@@ -3372,8 +3372,8 @@
       <author><first>Pradeep</first><last>Dasigi</last></author>
       <author><first>Matt</first><last>Gardner</last></author>
       <author><first>Shikhar</first><last>Murty</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>2669–2680</pages>
       <abstract>Training semantic parsers from question-answer pairs typically involves searching over an exponentially large space of logical forms, and an unguided search can easily be misled by spurious logical forms that coincidentally evaluate to the correct answer. We propose a novel iterative training algorithm that alternates between searching for consistent logical forms and maximizing the marginal likelihood of the retrieved ones. This training scheme lets us iteratively train models that provide guidance to subsequent ones to search for logical forms of increasing complexity, thus dealing with the problem of spuriousness. We evaluate these techniques on two hard datasets: WikiTableQuestions (WTQ) and Cornell Natural Language Visual Reasoning (NLVR), and show that our training algorithm outperforms the previous best systems, on WTQ in a comparable setting, and on NLVR with significantly less supervision.</abstract>
       <url hash="25daa9ab">N19-1273</url>
@@ -3398,7 +3398,7 @@
       <author><first>Shiva</first><last>Taslimipoor</last></author>
       <author><first>Samaneh</first><last>Kouchaki</last></author>
       <author><first>Le An</first><last>Ha</last></author>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <pages>2692–2698</pages>
       <abstract>We introduce a new method to tag Multiword Expressions (MWEs) using a linguistically interpretable language-independent deep learning architecture. We specifically target discontinuity, an under-explored aspect that poses a significant challenge to computational treatment of MWEs. Two neural architectures are explored: Graph Convolutional Network (GCN) and multi-head self-attention. GCN leverages dependency parse information, and self-attention attends to long-range relations. We finally propose a combined model that integrates complementary information from both, through a gating mechanism. The experiments on a standard multilingual dataset for verbal MWEs show that our model outperforms the baselines not only in the case of discontinuous MWEs but also in overall F-score.</abstract>
       <url hash="668e8d28">N19-1275</url>
@@ -3409,7 +3409,7 @@
       <title>Incorporating Word Attention into Character-Based Word Segmentation</title>
       <author><first>Shohei</first><last>Higashiyama</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Masao</first><last>Ideuchi</last></author>
       <author><first>Yoshiaki</first><last>Oida</last></author>
       <author><first>Yohei</first><last>Sakamoto</last></author>
@@ -3424,7 +3424,7 @@
       <title><fixed-case>VCWE</fixed-case>: Visual Character-Enhanced Word Embeddings</title>
       <author><first>Chi</first><last>Sun</last></author>
       <author><first>Xipeng</first><last>Qiu</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>2710–2719</pages>
       <abstract>Chinese is a logographic writing system, and the shape of Chinese characters contain rich syntactic and semantic information. In this paper, we propose a model to learn Chinese word embeddings via three-level composition: (1) a convolutional neural network to extract the intra-character compositionality from the visual shape of a character; (2) a recurrent neural network with self-attention to compose character representation into word embeddings; (3) the Skip-Gram framework to capture non-compositionality directly from the contextual information. Evaluations demonstrate the superior performance of our model on four tasks: word similarity, sentiment analysis, named entity recognition and part-of-speech tagging.</abstract>
       <url hash="363725b5">N19-1277</url>
@@ -3446,7 +3446,7 @@
     <paper id="279">
       <title>Improving Cross-Domain <fixed-case>C</fixed-case>hinese Word Segmentation with Word Embeddings</title>
       <author><first>Yuxiao</first><last>Ye</last></author>
-      <author><first>Weikang</first><last>Li</last></author>
+      <author id="weigang-li"><first>Weikang</first><last>Li</last></author>
       <author><first>Yue</first><last>Zhang</last></author>
       <author><first>Likun</first><last>Qiu</last></author>
       <author><first>Jian</first><last>Sun</last></author>
@@ -3460,7 +3460,7 @@
       <title>Neural Semi-<fixed-case>M</fixed-case>arkov Conditional Random Fields for Robust Character-Based Part-of-Speech Tagging</title>
       <author><first>Apostolos</first><last>Kemos</last></author>
       <author><first>Heike</first><last>Adel</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>2736–2743</pages>
       <abstract>Character-level models of tokens have been shown to be effective at dealing with within-token noise and out-of-vocabulary words. However, they often still rely on correct token boundaries. In this paper, we propose to eliminate the need for tokenizers with an end-to-end character-level semi-Markov conditional random field. It uses neural networks for its character and segment representations. We demonstrate its effectiveness in multilingual settings and when token boundaries are noisy: It matches state-of-the-art part-of-speech taggers for various languages and significantly outperforms them on a noisy English version of a benchmark dataset. Our code and the noisy dataset are publicly available at <url>http://cistern.cis.lmu.de/semiCRF</url>.</abstract>
       <url hash="c5e310ca">N19-1280</url>
@@ -3503,7 +3503,7 @@
       <title>A Dynamic Speaker Model for Conversational Interactions</title>
       <author><first>Hao</first><last>Cheng</last></author>
       <author><first>Hao</first><last>Fang</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
       <pages>2772–2785</pages>
       <abstract>Individual differences in speakers are reflected in their language use as well as in their interests and opinions. Characterizing these differences can be useful in human-computer interaction, as well as analysis of human-human conversations. In this work, we introduce a neural model for learning a dynamically updated speaker embedding in a conversational context. Initial model training is unsupervised, using context-sensitive language generation as an objective, with the context being the conversation history. Further fine-tuning can leverage task-dependent supervised training. The learned neural representation of speakers is shown to be useful for content ranking in a socialbot and dialog act prediction in human-human conversations.</abstract>
       <url hash="9ed24849">N19-1284</url>
@@ -3514,7 +3514,7 @@
       <title>Fluent Translations from Disfluent Speech in End-to-End Speech Translation</title>
       <author><first>Elizabeth</first><last>Salesky</last></author>
       <author><first>Matthias</first><last>Sperber</last></author>
-      <author><first>Alexander</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alexander</first><last>Waibel</last></author>
       <pages>2786–2792</pages>
       <abstract>Spoken language translation applications for speech suffer due to conversational speech phenomena, particularly the presence of disfluencies. With the rise of end-to-end speech translation models, processing steps such as disfluency removal that were previously an intermediate step between speech recognition and machine translation need to be incorporated into model architectures. We use a sequence-to-sequence model to translate from noisy, disfluent speech to fluent text with disfluencies removed using the recently collected ‘copy-edited’ references for the Fisher Spanish-English dataset. We are able to directly generate fluent translations and introduce considerations about how to evaluate success on this task. This work provides a baseline for a new task, implicitly removing disfluencies in end-to-end translation of conversational speech.</abstract>
       <url hash="6f270e4d">N19-1285</url>
@@ -3526,7 +3526,7 @@
       <author><first>Van-Hien</first><last>Tran</last></author>
       <author><first>Van-Thuy</first><last>Phi</last></author>
       <author><first>Hiroyuki</first><last>Shindo</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>2793–2798</pages>
       <abstract>Recently, relation classification has gained much success by exploiting deep neural networks. In this paper, we propose a new model effectively combining Segment-level Attention-based Convolutional Neural Networks (SACNNs) and Dependency-based Recurrent Neural Networks (DepRNNs). While SACNNs allow the model to selectively focus on the important information segment from the raw sequence, DepRNNs help to handle the long-distance relations from the shortest dependency path of relation entities. Experiments on the SemEval-2010 Task 8 dataset show that our model is comparable to the state-of-the-art without using any external lexical features.</abstract>
       <url hash="4c852a3b">N19-1286</url>
@@ -3537,8 +3537,8 @@
       <title>Document-Level Event Factuality Identification via Adversarial Neural Network</title>
       <author><first>Zhong</first><last>Qian</last></author>
       <author><first>Peifeng</first><last>Li</last></author>
-      <author><first>Qiaoming</first><last>Zhu</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>2799–2809</pages>
       <abstract>Document-level event factuality identification is an important subtask in event factuality and is crucial for discourse understanding in Natural Language Processing (NLP). Previous studies mainly suffer from the scarcity of suitable corpus and effective methods. To solve these two issues, we first construct a corpus annotated with both document- and sentence-level event factuality information on both English and Chinese texts. Then we present an LSTM neural network based on adversarial training with both intra- and inter-sequence attentions to identify document-level event factuality. Experimental results show that our neural network model can outperform various baselines on the constructed corpus.</abstract>
       <url hash="69ad89c4">N19-1287</url>
@@ -3673,8 +3673,8 @@
     <paper id="298">
       <title>A Richer-but-Smarter Shortest Dependency Path with Attentive Augmentation for Relation Extraction</title>
       <author><first>Duy-Cat</first><last>Can</last></author>
-      <author><first>Hoang-Quynh</first><last>Le</last></author>
-      <author><first>Quang-Thuy</first><last>Ha</last></author>
+      <author id="hoang-quynh-le"><first>Hoang-Quynh</first><last>Le</last></author>
+      <author id="quang-thuy-ha"><first>Quang-Thuy</first><last>Ha</last></author>
       <author><first>Nigel</first><last>Collier</last></author>
       <pages>2902–2912</pages>
       <abstract>To extract the relationship between two entities in a sentence, two common approaches are (1) using their shortest dependency path (SDP) and (2) using an attention model to capture a context-based representation of the sentence. Each approach suffers from its own disadvantage of either missing or redundant information. In this work, we propose a novel model that combines the advantages of these two approaches. This is based on the basic information in the SDP enhanced with information selected by several attention mechanisms with kernel filters, namely RbSP (Richer-but-Smarter SDP). To exploit the representation behind the RbSP structure effectively, we develop a combined deep neural model with a LSTM network on word sequences and a CNN on RbSP. Experimental results on the SemEval-2010 dataset demonstrate improved performance over competitive baselines. The data and source code are available at <url>https://github.com/catcd/RbSP</url>.</abstract>
@@ -3699,7 +3699,7 @@
       <author><first>Kenton</first><last>Lee</last></author>
       <author><first>Ming-Wei</first><last>Chang</last></author>
       <author><first>Tom</first><last>Kwiatkowski</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <author><first>Kristina</first><last>Toutanova</last></author>
       <pages>2924–2936</pages>
       <abstract>In this paper we study yes/no questions that are naturally occurring — meaning that they are generated in unprompted and unconstrained settings. We build a reading comprehension dataset, BoolQ, of such questions, and show that they are unexpectedly challenging. They often query for complex, non-factoid information, and require difficult entailment-like inference to solve. We also explore the effectiveness of a range of transfer learning baselines. We find that transferring from entailment data is more effective than transferring from paraphrase or extractive QA data, and that it, surprisingly, continues to be very beneficial even when starting from massive pre-trained language models such as BERT. Our best method trains BERT on MultiNLI and then re-trains it on our train set. It achieves 80.4% accuracy compared to 90% accuracy of human annotators (and 62% majority-baseline), leaving a significant gap for future work.</abstract>
@@ -3752,7 +3752,7 @@
       <author><first>James</first><last>Zou</last></author>
       <author><first>Jesse</first><last>Shapiro</last></author>
       <author><first>Matthew</first><last>Gentzkow</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <pages>2970–3005</pages>
       <abstract>We provide an NLP framework to uncover four linguistic dimensions of political polarization in social media: topic choice, framing, affect and illocutionary force. We quantify these aspects with existing lexical methods, and propose clustering of tweet embeddings as a means to identify salient topics for analysis across events; human evaluations show that our approach generates more cohesive topics than traditional LDA-based models. We apply our methods to study 4.4M tweets on 21 mass shootings. We provide evidence that the discussion of these events is highly polarized politically and that this polarization is primarily driven by partisan differences in framing rather than topic choice. We identify framing devices, such as grounding and the contrasting use of the terms “terrorist” and “crazy”, that contribute to polarization. Results pertaining to topic choice, affect and illocutionary force suggest that Republicans focus more on the shooter and event-specific facts (news) while Democrats focus more on the victims and call for policy changes. Our work contributes to a deeper understanding of the way group divisions manifest in language and to computational methods for studying them.</abstract>
       <url hash="c32776f5">N19-1304</url>
@@ -3806,7 +3806,7 @@
       <author><first>Dave</first><last>Wadden</last></author>
       <author><first>Luheng</first><last>He</last></author>
       <author><first>Amy</first><last>Shah</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last></author>
       <pages>3036–3046</pages>
       <abstract>We introduce a general framework for several information extraction tasks that share span representations using dynamically constructed span graphs. The graphs are dynamically constructed by selecting the most confident entity spans and linking these nodes with confidence-weighted relation types and coreferences. The dynamic span graph allow coreference and relation type confidences to propagate through the graph to iteratively refine the span representations. This is unlike previous multi-task frameworks for information extraction in which the only interaction between tasks is in the shared first-layer LSTM. Our framework significantly outperforms state-of-the-art on multiple information extraction tasks across multiple datasets reflecting different domains. We further observe that the span enumeration approach is good at detecting nested span entities, with significant F1 score improvement on the ACE dataset.</abstract>
@@ -3819,7 +3819,7 @@
       <title><fixed-case>O</fixed-case>pen<fixed-case>C</fixed-case>eres: <fixed-case>W</fixed-case>hen Open Information Extraction Meets the Semi-Structured Web</title>
       <author><first>Colin</first><last>Lockard</last></author>
       <author><first>Prashant</first><last>Shiralkar</last></author>
-      <author><first>Xin Luna</first><last>Dong</last></author>
+      <author id="xin-luna-dong"><first>Xin Luna</first><last>Dong</last></author>
       <pages>3047–3056</pages>
       <abstract>Open Information Extraction (OpenIE), the problem of harvesting triples from natural language text whose predicate relations are not aligned to any pre-defined ontology, has been a popular subject of research for the last decade. However, this research has largely ignored the vast quantity of facts available in semi-structured webpages. In this paper, we define the problem of OpenIE from semi-structured websites to extract such facts, and present an approach for solving it. We also introduce a labeled evaluation dataset to motivate research in this area. Given a semi-structured website and a set of seed facts for some relations existing on its pages, we employ a semi-supervised label propagation technique to automatically create training data for the relations present on the site. We then use this training data to learn a classifier for relation extraction. Experimental results of this method on our new benchmark dataset obtained a precision of over 70%. A larger scale extraction experiment on 31 websites in the movie vertical resulted in the extraction of over 2 million triples.</abstract>
       <url hash="eb5c2c10">N19-1309</url>
@@ -3841,7 +3841,7 @@
       <title>Neural Machine Translation of Text from Non-Native Speakers</title>
       <author><first>Antonios</first><last>Anastasopoulos</last></author>
       <author><first>Alison</first><last>Lui</last></author>
-      <author><first>Toan Q.</first><last>Nguyen</last></author>
+      <author id="toan-q-nguyen"><first>Toan Q.</first><last>Nguyen</last></author>
       <author><first>David</first><last>Chiang</last></author>
       <pages>3070–3080</pages>
       <abstract>Neural Machine Translation (NMT) systems are known to degrade when confronted with noisy data, especially when the system is trained only on clean data. In this paper, we show that augmenting training data with sentences containing artificially-introduced grammatical errors can make the system more robust to such errors. In combination with an automatic grammar error correction system, we can recover 1.0 BLEU out of 2.4 BLEU lost due to grammatical errors. We also present a set of Spanish translations of the JFLEG grammar error correction corpus, which allows for testing NMT robustness to real grammatical errors.</abstract>
@@ -3864,8 +3864,8 @@
     <paper id="313">
       <title>Selective Attention for Context-aware Neural Machine Translation</title>
       <author><first>Sameen</first><last>Maruf</last></author>
-      <author><first>André F. T.</first><last>Martins</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>3092–3102</pages>
       <abstract>Despite the progress made in sentence-level NMT, current systems still fall short at achieving fluent, good quality translation for a full document. Recent works in context-aware NMT consider only a few previous sentences as context and may not scale to entire documents. To this end, we propose a novel and scalable top-down approach to hierarchical attention for context-aware NMT which uses sparse attention to selectively focus on relevant sentences in the document context and then attends to key words in those sentences. We also propose single-level attention approaches based on sentence or word-level information in the context. The document-level context representation, produced from these attention modules, is integrated into the encoder or decoder of the Transformer model depending on whether we use monolingual or bilingual context. Our experiments and evaluation on English-German datasets in different document MT settings show that our selective attention approach not only significantly outperforms context-agnostic baselines but also surpasses context-aware baselines in most cases.</abstract>
       <url hash="8ed8f169">N19-1313</url>
@@ -4023,9 +4023,9 @@
       <title>Exploiting Noisy Data in Distant Supervision Relation Classification</title>
       <author><first>Kaijia</first><last>Yang</last></author>
       <author><first>Liang</first><last>He</last></author>
-      <author><first>Xin-yu</first><last>Dai</last></author>
+      <author id="xinyu-dai"><first>Xin-yu</first><last>Dai</last></author>
       <author><first>Shujian</first><last>Huang</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
       <pages>3216–3225</pages>
       <abstract>Distant supervision has obtained great progress on relation classification task. However, it still suffers from noisy labeling problem. Different from previous works that underutilize noisy data which inherently characterize the property of classification, in this paper, we propose RCEND, a novel framework to enhance Relation Classification by Exploiting Noisy Data. First, an instance discriminator with reinforcement learning is designed to split the noisy data into correctly labeled data and incorrectly labeled data. Second, we learn a robust relation classifier in semi-supervised learning way, whereby the correctly and incorrectly labeled data are treated as labeled and unlabeled data respectively. The experimental results show that our method outperforms the state-of-the-art models.</abstract>
       <url hash="d9decd35">N19-1325</url>
@@ -4037,7 +4037,7 @@
       <author><first>Aleksandra</first><last>Piktus</last></author>
       <author><first>Necati Bora</first><last>Edizel</last></author>
       <author><first>Piotr</first><last>Bojanowski</last></author>
-      <author><first>Edouard</first><last>Grave</last></author>
+      <author id="edouard-grave"><first>Edouard</first><last>Grave</last></author>
       <author><first>Rui</first><last>Ferreira</last></author>
       <author><first>Fabrizio</first><last>Silvestri</last></author>
       <pages>3226–3234</pages>
@@ -4049,10 +4049,10 @@
     <paper id="327">
       <title>Learning Relational Representations by Analogy using Hierarchical <fixed-case>S</fixed-case>iamese Networks</title>
       <author><first>Gaetano</first><last>Rossiello</last></author>
-      <author><first>Alfio</first><last>Gliozzo</last></author>
+      <author id="alfio-gliozzo"><first>Alfio</first><last>Gliozzo</last></author>
       <author><first>Robert</first><last>Farrell</last></author>
-      <author><first>Nicolas</first><last>Fauceglia</last></author>
-      <author><first>Michael</first><last>Glass</last></author>
+      <author id="nicolas-r-fauceglia"><first>Nicolas</first><last>Fauceglia</last></author>
+      <author id="michael-glass"><first>Michael</first><last>Glass</last></author>
       <pages>3235–3245</pages>
       <abstract>We address relation extraction as an analogy problem by proposing a novel approach to learn representations of relations expressed by their textual mentions. In our assumption, if two pairs of entities belong to the same relation, then those two pairs are analogous. Following this idea, we collect a large set of analogous pairs by matching triples in knowledge bases with web-scale corpora through distant supervision. We leverage this dataset to train a hierarchical siamese network in order to learn entity-entity embeddings which encode relational information through the different linguistic paraphrasing expressing the same relation. We evaluate our model in a one-shot learning task by showing a promising generalization capability in order to classify unseen relation types, which makes this approach suitable to perform automatic knowledge base population with minimal supervision. Moreover, the model can be used to generate pre-trained embeddings which provide a valuable signal when integrated into an existing neural-based model by outperforming the state-of-the-art methods on a downstream relation extraction task.</abstract>
       <url hash="0a3d0f8b">N19-1327</url>
@@ -4095,7 +4095,7 @@
     <paper id="331">
       <title>Continual Learning for Sentence Representations Using Conceptors</title>
       <author><first>Tianlin</first><last>Liu</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <author><first>João</first><last>Sedoc</last></author>
       <pages>3274–3279</pages>
       <abstract>Distributed representations of sentences have become ubiquitous in natural language processing tasks. In this paper, we consider a continual learning scenario for sentence representations: Given a sequence of corpora, we aim to optimize the sentence encoder with respect to the new corpus while maintaining its accuracy on the old corpora. To address this problem, we propose to initialize sentence encoders with the help of corpus-independent features, and then sequentially update sentence encoders using Boolean operations of conceptor matrices to learn corpus-dependent features. We evaluate our approach on semantic textual similarity tasks and show that our proposed sentence encoder can continually learn features from new corpora while retaining its competence on previously encountered corpora.</abstract>
@@ -4136,7 +4136,7 @@
       <author><first>Peng</first><last>Qian</last></author>
       <author><first>Richard</first><last>Futrell</last></author>
       <author><first>Miguel</first><last>Ballesteros</last></author>
-      <author><first>Roger</first><last>Levy</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
       <pages>3302–3312</pages>
       <abstract>State-of-the-art LSTM language models trained on large corpora learn sequential contingencies in impressive detail, and have been shown to acquire a number of non-local grammatical dependencies with some success. Here we investigate whether supervision with hierarchical structure enhances learning of a range of grammatical dependencies, a question that has previously been addressed only for subject-verb agreement. Using controlled experimental methods from psycholinguistics, we compare the performance of word-based LSTM models versus Recurrent Neural Network Grammars (RNNGs) (Dyer et al. 2016) which represent hierarchical syntactic structure and use neural control to deploy it in left-to-right processing, on two classes of non-local grammatical dependencies in English—Negative Polarity licensing and Filler-Gap Dependencies—tested in a range of configurations. Using the same training data for both models, we find that the RNNG outperforms the LSTM on both types of grammatical dependencies and even learns many of the Island Constraints on the filler-gap dependency. Structural supervision thus provides data efficiency advantages over purely string-based training of neural language models in acquiring human-like generalizations about non-local grammatical dependencies.</abstract>
       <url hash="f88ff96b">N19-1334</url>
@@ -4202,7 +4202,7 @@
       <title>Better, Faster, Stronger Sequence Tagging Constituent Parsers</title>
       <author><first>David</first><last>Vilares</last></author>
       <author><first>Mostafa</first><last>Abdou</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>3372–3383</pages>
       <abstract>Sequence tagging models for constituent parsing are faster, but less accurate than other types of parsers. In this work, we address the following weaknesses of such constituent parsers: (a) high error rates around closing brackets of long constituents, (b) large label sets, leading to sparsity, and (c) error propagation arising from greedy decoding. To effectively close brackets, we train a model that learns to switch between tagging schemes. To reduce sparsity, we decompose the label set and use multi-task learning to jointly learn to predict sublabels. Finally, we mitigate issues from greedy decoding through auxiliary losses and sentence-level fine-tuning with policy gradient. Combining these techniques, we clearly surpass the performance of sequence tagging constituent parsers on the English and Chinese Penn Treebanks, and reduce their parsing time even further. On the SPMRL datasets, we observe even greater improvements across the board, including a new state of the art on Basque, Hebrew, Polish and Swedish.</abstract>
       <url hash="b153eb64">N19-1341</url>
@@ -4223,7 +4223,7 @@
       <title>Decomposed Local Models for Coordinate Structure Parsing</title>
       <author><first>Hiroki</first><last>Teranishi</last></author>
       <author><first>Hiroyuki</first><last>Shindo</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>3394–3403</pages>
       <abstract>We propose a simple and accurate model for coordination boundary identification. Our model decomposes the task into three sub-tasks during training; finding a coordinator, identifying inside boundaries of a pair of conjuncts, and selecting outside boundaries of it. For inference, we make use of probabilities of coordinators and conjuncts in the CKY parsing to find the optimal combination of coordinate structures. Experimental results demonstrate that our model achieves state-of-the-art results, ensuring that the global structure of coordinations is consistent.</abstract>
       <url hash="e6ca9756">N19-1343</url>
@@ -4318,7 +4318,7 @@
     <paper id="351">
       <title>Mining Discourse Markers for Unsupervised Sentence Representation Learning</title>
       <author><first>Damien</first><last>Sileo</last></author>
-      <author><first>Tim</first><last>Van De Cruys</last></author>
+      <author id="tim-van-de-cruys"><first>Tim</first><last>Van De Cruys</last></author>
       <author><first>Camille</first><last>Pradel</last></author>
       <author><first>Philippe</first><last>Muller</last></author>
       <pages>3477–3486</pages>
@@ -4333,7 +4333,7 @@
       <author><first>Wenhu</first><last>Chen</last></author>
       <author><first>Yu</first><last>Su</last></author>
       <author><first>Yilin</first><last>Shen</last></author>
-      <author id="zhiyu-chen"><first>Zhiyu</first><last>Chen</last></author>
+      <author><first>Zhiyu</first><last>Chen</last></author>
       <author><first>Xifeng</first><last>Yan</last></author>
       <author><first>William Yang</first><last>Wang</last></author>
       <pages>3487–3497</pages>
@@ -4396,7 +4396,7 @@
     <paper id="357">
       <title><fixed-case>A</fixed-case>ttention is not <fixed-case>E</fixed-case>xplanation</title>
       <author><first>Sarthak</first><last>Jain</last></author>
-      <author><first>Byron C.</first><last>Wallace</last></author>
+      <author id="byron-c-wallace"><first>Byron C.</first><last>Wallace</last></author>
       <pages>3543–3556</pages>
       <abstract>Attention mechanisms have seen wide adoption in neural NLP models. In addition to improving predictive performance, these are often touted as affording transparency: models equipped with attention provide a distribution over attended-to input units, and this is often presented (at least implicitly) as communicating the relative importance of inputs. However, it is unclear what relationship exists between attention weights and model outputs. In this work we perform extensive experiments across a variety of NLP tasks that aim to assess the degree to which attention weights provide meaningful “explanations” for predictions. We find that they largely do not. For example, learned attention weights are frequently uncorrelated with gradient-based measures of feature importance, and one can identify very different attention distributions that nonetheless yield equivalent predictions. Our findings show that standard attention modules do not provide meaningful explanations and should not be treated as though they do.</abstract>
       <url hash="7dd3a9ed">N19-1357</url>
@@ -4429,8 +4429,8 @@
     </paper>
     <paper id="360">
       <title>Context Dependent Semantic Parsing over Temporally Structured Data</title>
-      <author><first>Charles</first><last>Chen</last></author>
-      <author><first>Razvan</first><last>Bunescu</last></author>
+      <author id="charles-chen-jr"><first>Charles</first><last>Chen</last></author>
+      <author id="razvan-bunescu"><first>Razvan</first><last>Bunescu</last></author>
       <pages>3576–3585</pages>
       <abstract>We describe a new semantic parsing setting that allows users to query the system using both natural language questions and actions within a graphical user interface. Multiple time series belonging to an entity of interest are stored in a database and the user interacts with the system to obtain a better understanding of the entity’s state and behavior, entailing sequences of actions and questions whose answers may depend on previous factual or navigational interactions. We design an LSTM-based encoder-decoder architecture that models context dependency through copying mechanisms and multiple levels of attention over inputs and previous outputs. When trained to predict tokens using supervised learning, the proposed architecture substantially outperforms standard sequence generation baselines. Training the architecture using policy gradient leads to further improvements in performance, reaching a sequence-level accuracy of 88.7% on artificial data and 74.8% on real data.</abstract>
       <url hash="e624e949">N19-1360</url>
@@ -4455,8 +4455,8 @@
       <author><first>Mandar</first><last>Joshi</last></author>
       <author><first>Eunsol</first><last>Choi</last></author>
       <author><first>Omer</first><last>Levy</last></author>
-      <author><first>Daniel</first><last>Weld</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="daniel-s-weld"><first>Daniel</first><last>Weld</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>3597–3608</pages>
       <abstract>Reasoning about implied relationships (e.g. paraphrastic, common sense, encyclopedic) between pairs of words is crucial for many cross-sentence inference problems. This paper proposes new methods for learning and using embeddings of word pairs that implicitly represent background knowledge about such relationships. Our pairwise embeddings are computed as a compositional function of each word’s representation, which is learned by maximizing the pointwise mutual information (PMI) with the contexts in which the the two words co-occur. We add these representations to the cross-sentence attention layer of existing inference models (e.g. BiDAF for QA, ESIM for NLI), instead of extending or replacing existing word embeddings. Experiments show a gain of 2.7% on the recently released SQuAD 2.0 and 1.3% on MultiNLI. Our representations also aid in better generalization with gains of around 6-7% on adversarial SQuAD datasets, and 8.8% on the adversarial entailment test set by Glockner et al. (2018).</abstract>
       <url hash="a34e3b80">N19-1362</url>
@@ -4468,7 +4468,7 @@
       <author><first>Ashutosh</first><last>Kumar</last></author>
       <author><first>Satwik</first><last>Bhattamishra</last></author>
       <author><first>Manik</first><last>Bhandari</last></author>
-      <author><first>Partha</first><last>Talukdar</last></author>
+      <author id="partha-talukdar"><first>Partha</first><last>Talukdar</last></author>
       <pages>3609–3619</pages>
       <abstract>Inducing diversity in the task of paraphrasing is an important problem in NLP with applications in data augmentation and conversational agents. Previous paraphrasing approaches have mainly focused on the issue of generating semantically similar paraphrases while paying little attention towards diversity. In fact, most of the methods rely solely on top-k beam search sequences to obtain a set of paraphrases. The resulting set, however, contains many structurally similar sentences. In this work, we focus on the task of obtaining highly diverse paraphrases while not compromising on paraphrasing quality. We provide a novel formulation of the problem in terms of monotone submodular function maximization, specifically targeted towards the task of paraphrasing. Additionally, we demonstrate the effectiveness of our method for data augmentation on multiple tasks such as intent classification and paraphrase recognition. In order to drive further research, we have made the source code available.</abstract>
       <url hash="e74e296a">N19-1363</url>
@@ -4482,8 +4482,8 @@
       <author><first>Diyi</first><last>Yang</last></author>
       <author><first>Jiaao</first><last>Chen</last></author>
       <author><first>Zichao</first><last>Yang</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>3620–3630</pages>
       <abstract>Modeling what makes a request persuasive - eliciting the desired response from a reader - is critical to the study of propaganda, behavioral economics, and advertising. Yet current models can’t quantify the persuasiveness of requests or extract successful persuasive strategies. Building on theories of persuasion, we propose a neural network to quantify persuasiveness and identify the persuasive strategies in advocacy requests. Our semi-supervised hierarchical neural network model is supervised by the number of people persuaded to take actions and partially supervised at the sentence level with human-labeled rhetorical strategies. Our method outperforms several baselines, uncovers persuasive strategies - offering increased interpretability of persuasive speech - and has applications for other situations with document-level supervision but only partial sentence supervision.</abstract>
       <url hash="bbd0c86f">N19-1364</url>
@@ -4501,7 +4501,7 @@
       <author><first>Olivia</first><last>Li</last></author>
       <author><first>Sandhini</first><last>Agarwal</last></author>
       <author><first>Joshua D.</first><last>Greene</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <author><first>Christopher</first><last>Potts</last></author>
       <author><first>Lauri</first><last>Karttunen</last></author>
       <pages>3631–3648</pages>
@@ -4513,7 +4513,7 @@
     <paper id="366">
       <title>Structural Neural Encoders for <fixed-case>AMR</fixed-case>-to-text Generation</title>
       <author><first>Marco</first><last>Damonte</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <pages>3649–3658</pages>
       <abstract>AMR-to-text generation is a problem recently introduced to the NLP community, in which the goal is to generate sentences from Abstract Meaning Representation (AMR) graphs. Sequence-to-sequence models can be used to this end by converting the AMR graphs to strings. Approaching the problem while working directly with graphs requires the use of graph-to-sequence models that encode the AMR graph into a vector representation. Such encoding has been shown to be beneficial in the past, and unlike sequential encoding, it allows us to explicitly capture reentrant structures in the AMR graphs. We investigate the extent to which reentrancies (nodes with multiple parents) have an impact on AMR-to-text generation by comparing graph encoders to tree encoders, where reentrancies are not preserved. We show that improvements in the treatment of reentrancies and long-range dependencies contribute to higher overall scores for graph encoders. Our best model achieves 24.40 BLEU on LDC2015E86, outperforming the state of the art by 1.1 points and 24.54 BLEU on LDC2017T10, outperforming the state of the art by 1.24 points.</abstract>
       <url hash="818d9d96">N19-1366</url>
@@ -4523,7 +4523,7 @@
     </paper>
     <paper id="367">
       <title>Multilingual prediction of <fixed-case>A</fixed-case>lzheimer’s disease through domain adaptation and concept-based language modelling</title>
-      <author><first>Kathleen C.</first><last>Fraser</last></author>
+      <author id="kathleen-c-fraser"><first>Kathleen C.</first><last>Fraser</last></author>
       <author><first>Nicklas</first><last>Linz</last></author>
       <author><first>Bai</first><last>Li</last></author>
       <author><first>Kristina</first><last>Lundholm Fors</last></author>
@@ -4554,7 +4554,7 @@
       <title><fixed-case>NLP</fixed-case> Whack-A-Mole: <fixed-case>C</fixed-case>hallenges in Cross-Domain Temporal Expression Extraction</title>
       <author><first>Amy</first><last>Olex</last></author>
       <author><first>Luke</first><last>Maffey</last></author>
-      <author><first>Bridget</first><last>McInnes</last></author>
+      <author id="bridget-mcinnes"><first>Bridget</first><last>McInnes</last></author>
       <pages>3682–3692</pages>
       <abstract>Incorporating domain knowledge is vital in building successful natural language processing (NLP) applications. Many times, cross-domain application of a tool results in poor performance as the tool does not account for domain-specific attributes. The clinical domain is challenging in this aspect due to specialized medical terms and nomenclature, shorthand notation, fragmented text, and a variety of writing styles used by different medical units. Temporal resolution is an NLP task that, in general, is domain-agnostic because temporal information is represented using a limited lexicon. However, domain-specific aspects of temporal resolution are present in clinical texts. Here we explore parsing issues that arose when running our system, a tool built on Newswire text, on clinical notes in the THYME corpus. Many parsing issues were straightforward to correct; however, a few code changes resulted in a cascading series of parsing errors that had to be resolved before an improvement in performance was observed, revealing the complexity temporal resolution and rule-based parsing. Our system now outperforms current state-of-the-art systems on the THYME corpus with little change in its performance on Newswire texts.</abstract>
       <url hash="a68eb4f8">N19-1369</url>
@@ -4577,7 +4577,7 @@
       <author><first>Eric</first><last>Lehman</last></author>
       <author><first>Jay</first><last>DeYoung</last></author>
       <author><first>Regina</first><last>Barzilay</last></author>
-      <author><first>Byron C.</first><last>Wallace</last></author>
+      <author id="byron-c-wallace"><first>Byron C.</first><last>Wallace</last></author>
       <pages>3705–3717</pages>
       <abstract>How do we know if a particular medical treatment actually works? Ideally one would consult all available evidence from relevant clinical trials. Unfortunately, such results are primarily disseminated in natural language scientific articles, imposing substantial burden on those trying to make sense of them. In this paper, we present a new task and corpus for making this unstructured published scientific evidence actionable. The task entails inferring reported findings from a full-text article describing randomized controlled trials (RCT) with respect to a given intervention, comparator, and outcome of interest, e.g., inferring if a given article provides evidence supporting the use of aspirin to reduce risk of stroke, as compared to placebo. We present a new corpus for this task comprising 10,000+ prompts coupled with full-text articles describing RCTs. Results using a suite of baseline models — ranging from heuristic (rule-based) approaches to attentive neural architectures — demonstrate the difficulty of the task, which we believe largely owes to the lengthy, technical input texts. To facilitate further work on this important, challenging problem we make the corpus, documentation, a website and leaderboard, and all source code for baselines and evaluation publicly available.</abstract>
       <url hash="547ffe6d">N19-1371</url>
@@ -4598,7 +4598,7 @@
     <paper id="373">
       <title><fixed-case>D</fixed-case>ialogue <fixed-case>A</fixed-case>ct <fixed-case>C</fixed-case>lassification with <fixed-case>C</fixed-case>ontext-<fixed-case>A</fixed-case>ware <fixed-case>S</fixed-case>elf-<fixed-case>A</fixed-case>ttention</title>
       <author><first>Vipul</first><last>Raheja</last></author>
-      <author><first>Joel</first><last>Tetreault</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
       <pages>3727–3733</pages>
       <abstract>Recent work in Dialogue Act classification has treated the task as a sequence labeling problem using hierarchical deep neural networks. We build on this prior work by leveraging the effectiveness of a context-aware self-attention mechanism coupled with a hierarchical recurrent neural network. We conduct extensive evaluations on standard Dialogue Act classification datasets and show significant improvement over state-of-the-art results on the Switchboard Dialogue Act (SwDA) Corpus. We also investigate the impact of different utterance-level representation learning methods and show that our method is effective at capturing utterance-level semantic text representations while maintaining high accuracy.</abstract>
       <url hash="cbc7bdc2">N19-1373</url>
@@ -4623,7 +4623,7 @@
       <author><first>Revanth</first><last>Gangi Reddy</last></author>
       <author><first>Danish</first><last>Contractor</last></author>
       <author><first>Dinesh</first><last>Raghu</last></author>
-      <author><first>Sachindra</first><last>Joshi</last></author>
+      <author id="sachindra-joshi"><first>Sachindra</first><last>Joshi</last></author>
       <pages>3744–3754</pages>
       <abstract>Recent end-to-end task oriented dialog systems use memory architectures to incorporate external knowledge in their dialogs. Current work makes simplifying assumptions about the structure of the knowledge base, such as the use of triples to represent knowledge, and combines dialog utterances (context) as well as knowledge base (KB) results as part of the same memory. This causes an explosion in the memory size, and makes the reasoning over memory harder. In addition, such a memory design forces hierarchical properties of the data to be fit into a triple structure of memory. This requires the memory reader to infer relationships across otherwise connected attributes. In this paper we relax the strong assumptions made by existing architectures and separate memories used for modeling dialog context and KB results. Instead of using triples to store KB results, we introduce a novel multi-level memory architecture consisting of cells for each query and their corresponding results. The multi-level memory first addresses queries, followed by results and finally each key-value pair within a result. We conduct detailed experiments on three publicly available task oriented dialog data sets and we find that our method conclusively outperforms current state-of-the-art models. We report a 15-25% increase in both entity F1 and BLEU scores.</abstract>
       <url hash="09802fab">N19-1375</url>
@@ -4646,7 +4646,7 @@
     <paper id="377">
       <title>Top-Down Structurally-Constrained Neural Response Generation with Lexicalized Probabilistic Context-Free Grammar</title>
       <author><first>Wenchao</first><last>Du</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <pages>3762–3771</pages>
       <abstract>We consider neural language generation under a novel problem setting: generating the words of a sentence according to the order of their first appearance in its lexicalized PCFG parse tree, in a depth-first, left-to-right manner. Unlike previous tree-based language generation methods, our approach is both (i) top-down and (ii) explicitly generating syntactic structure at the same time. In addition, our method combines neural model with symbolic approach: word choice at each step is constrained by its predicted syntactic function. We applied our model to the task of dialog response generation, and found it significantly improves over sequence-to-sequence baseline, in terms of diversity and relevance. We also investigated the effect of lexicalization on language generation, and found that lexicalization schemes that give priority to content words have certain advantages over those focusing on dependency relations.</abstract>
       <url hash="f600048f">N19-1377</url>
@@ -4657,9 +4657,9 @@
       <title>What do Entity-Centric Models Learn? Insights from Entity Linking in Multi-Party Dialogue</title>
       <author><first>Laura</first><last>Aina</last></author>
       <author><first>Carina</first><last>Silberer</last></author>
-      <author><first>Ionut-Teodor</first><last>Sorodoc</last></author>
+      <author id="ionut-sorodoc"><first>Ionut-Teodor</first><last>Sorodoc</last></author>
       <author><first>Matthijs</first><last>Westera</last></author>
-      <author><first>Gemma</first><last>Boleda</last></author>
+      <author id="gemma-boleda"><first>Gemma</first><last>Boleda</last></author>
       <pages>3772–3783</pages>
       <abstract>Humans use language to refer to entities in the external world. Motivated by this, in recent years several models that incorporate a bias towards learning entity representations have been proposed. Such entity-centric models have shown empirical success, but we still know little about why. In this paper we analyze the behavior of two recently proposed entity-centric models in a referential task, Entity Linking in Multi-party Dialogue (SemEval 2018 Task 4). We show that these models outperform the state of the art on this task, and that they do better on lower frequency entities than a counterpart model that is not entity-centric, with the same model size. We argue that making models entity-centric naturally fosters good architectural decisions. However, we also show that these models do not really build entity representations and that they make poor use of linguistic context. These negative results underscore the need for model analysis, to test whether the motivations for particular architectures are borne out in how models behave when deployed.</abstract>
       <url hash="6e993e6d">N19-1378</url>
@@ -4671,7 +4671,7 @@
       <author><first>Han</first><last>Li</last></author>
       <author><first>Jihwan</first><last>Lee</last></author>
       <author><first>Sidharth</first><last>Mudgal</last></author>
-      <author><first>Ruhi</first><last>Sarikaya</last></author>
+      <author id="ruhi-sarikaya"><first>Ruhi</first><last>Sarikaya</last></author>
       <author><first>Young-Bum</first><last>Kim</last></author>
       <pages>3784–3794</pages>
       <abstract>Domain classification is the task to map spoken language utterances to one of the natural language understanding domains in intelligent personal digital assistants (IPDAs). This is observed in mainstream IPDAs in industry and third-party domains are developed to enhance the capability of the IPDAs. As more and more new domains are developed very frequently, how to continuously accommodate the new domains still remains challenging. Moreover, if one wants to use personalized information dynamically for better domain classification, it is infeasible to directly adopt existing continual learning approaches. In this paper, we propose CoNDA, a neural-based approach for continuous domain adaption with normalization and regularization. Unlike existing methods that often conduct full model parameter update, CoNDA only updates the necessary parameters in the model for the new domains. Empirical evaluation shows that CoNDA achieves high accuracy on both the accommodated new domains and the existing known domains for which input samples come with personal information, and outperforms the baselines by a large margin.</abstract>
@@ -4696,7 +4696,7 @@
       <author><first>Nouha</first><last>Dziri</last></author>
       <author><first>Ehsan</first><last>Kamalloo</last></author>
       <author><first>Kory</first><last>Mathewson</last></author>
-      <author><first>Osmar</first><last>Zaiane</last></author>
+      <author id="osmar-r-zaiane"><first>Osmar</first><last>Zaiane</last></author>
       <pages>3806–3812</pages>
       <abstract>Evaluating open-domain dialogue systems is difficult due to the diversity of possible correct answers. Automatic metrics such as BLEU correlate weakly with human annotations, resulting in a significant bias across different models and datasets. Some researchers resort to human judgment experimentation for assessing response quality, which is expensive, time consuming, and not scalable. Moreover, judges tend to evaluate a small number of dialogues, meaning that minor differences in evaluation configuration may lead to dissimilar results. In this paper, we present interpretable metrics for evaluating topic coherence by making use of distributed sentence representations. Furthermore, we introduce calculable approximations of human judgment based on conversational coherence by adopting state-of-the-art entailment techniques. Results show that our metrics can be used as a surrogate for human judgment, making it easy to evaluate dialogue systems on large-scale datasets and allowing an unbiased estimate for the quality of the responses.</abstract>
       <url hash="4c1e491b">N19-1381</url>
@@ -4706,7 +4706,7 @@
     <paper id="382">
       <title>On Knowledge distillation from complex networks for response prediction</title>
       <author><first>Siddhartha</first><last>Arora</last></author>
-      <author><first>Mitesh M.</first><last>Khapra</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh M.</first><last>Khapra</last></author>
       <author><first>Harish G.</first><last>Ramaswamy</last></author>
       <pages>3813–3822</pages>
       <abstract>Recent advances in Question Answering have lead to the development of very complex models which compute rich representations for query and documents by capturing all pairwise interactions between query and document words. This makes these models expensive in space and time, and in practice one has to restrict the length of the documents that can be fed to these models. Such models have also been recently employed for the task of predicting dialog responses from available background documents (e.g., Holl-E dataset). However, here the documents are longer, thereby rendering these complex models infeasible except in select restricted settings. In order to overcome this, we use standard simple models which do not capture all pairwise interactions, but learn to emulate certain characteristics of a complex teacher network. Specifically, we first investigate the conicity of representations learned by a complex model and observe that it is significantly lower than that of simpler models. Based on this insight, we modify the simple architecture to mimic this characteristic. We go further by using knowledge distillation approaches, where the simple model acts as a student and learns to match the output from the complex teacher network. We experiment with the Holl-E dialog data set and show that by mimicking characteristics and matching outputs from a teacher, even a simple network can give improved performance.</abstract>
@@ -4738,7 +4738,7 @@
     <paper id="385">
       <title>Low-Resource Syntactic Transfer with Unsupervised Source Reordering</title>
       <author><first>Mohammad Sadegh</first><last>Rasooli</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <pages>3845–3856</pages>
       <abstract>We describe a cross-lingual transfer method for dependency parsing that takes into account the problem of word order differences between source and target languages. Our model only relies on the Bible, a considerably smaller parallel data than the commonly used parallel data in transfer methods. We use the concatenation of projected trees from the Bible corpus, and the gold-standard treebanks in multiple source languages along with cross-lingual word representations. We demonstrate that reordering the source treebanks before training on them for a target language improves the accuracy of languages outside the European language family. Our experiments on 68 treebanks (38 languages) in the Universal Dependencies corpus achieve a high accuracy for all languages. Among them, our experiments on 16 treebanks of 12 non-European languages achieve an average UAS absolute improvement of 3.3% over a state-of-the-art method.</abstract>
       <url hash="d422a050">N19-1385</url>
@@ -4747,8 +4747,8 @@
     </paper>
     <paper id="386">
       <title>Revisiting Adversarial Autoencoder for Unsupervised Word Translation with Cycle Consistency and Improved Training</title>
-      <author><first>Tasnim</first><last>Mohiuddin</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="muhammad-tasnim-mohiuddin"><first>Tasnim</first><last>Mohiuddin</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <pages>3857–3867</pages>
       <abstract>Adversarial training has shown impressive success in learning bilingual dictionary without any parallel data by mapping monolingual embeddings to a shared space. However, recent work has shown superior performance for non-adversarial methods in more challenging language pairs. In this work, we revisit adversarial autoencoder for unsupervised word translation and propose two novel extensions to it that yield more stable training and improved results. Our method includes regularization terms to enforce cycle consistency and input reconstruction, and puts the target encoders as an adversary against the corresponding discriminator. Extensive experimentations with European, non-European and low-resource languages show that our method is more robust and achieves better performance than recently proposed adversarial and non-adversarial approaches.</abstract>
       <url hash="6d23b414">N19-1386</url>
@@ -4759,7 +4759,7 @@
       <title>Addressing word-order Divergence in Multilingual Neural Machine Translation for extremely Low Resource Languages</title>
       <author><first>Rudra</first><last>Murthy</last></author>
       <author><first>Anoop</first><last>Kunchukuttan</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>3868–3873</pages>
       <abstract>Transfer learning approaches for Neural Machine Translation (NMT) train a NMT model on an assisting language-target language pair (parent model) which is later fine-tuned for the source language-target language pair of interest (child model), with the target language being the same. In many cases, the assisting language has a different word order from the source language. We show that divergent word order adversely limits the benefits from transfer learning when little to no parallel corpus between the source and target language is available. To bridge this divergence, we propose to pre-order the assisting language sentences to match the word order of the source language and train the parent model. Our experiments on many language pairs show that bridging the word order gap leads to significant improvement in the translation quality in extremely low-resource scenarios.</abstract>
       <url hash="565aa35f">N19-1387</url>
@@ -4801,7 +4801,7 @@
     <paper id="391">
       <title>Context-Aware Cross-Lingual Mapping</title>
       <author><first>Hanan</first><last>Aldarmaki</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>3906–3911</pages>
       <abstract>Cross-lingual word vectors are typically obtained by fitting an orthogonal matrix that maps the entries of a bilingual dictionary from a source to a target vector space. Word vectors, however, are most commonly used for sentence or document-level representations that are calculated as the weighted average of word embeddings. In this paper, we propose an alternative to word-level mapping that better reflects sentence-level cross-lingual similarity. We incorporate context in the transformation matrix by directly mapping the averaged embeddings of aligned sentences in a parallel corpus. We also implement cross-lingual mapping of deep contextualized word embeddings using parallel sentences with word alignments. In our experiments, both approaches resulted in cross-lingual sentence embeddings that outperformed context-independent word mapping in sentence translation retrieval. Furthermore, the sentence-level transformation could be used for word-level mapping without loss in word translation quality.</abstract>
       <url hash="b5ff30ec">N19-1391</url>
@@ -4812,7 +4812,7 @@
       <title>Polyglot Contextual Representations Improve Crosslingual Transfer</title>
       <author><first>Phoebe</first><last>Mulcaire</last></author>
       <author><first>Jungo</first><last>Kasai</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>3912–3918</pages>
       <abstract>We introduce Rosita, a method to produce multilingual contextual word representations by training a single language model on text from multiple languages. Our method combines the advantages of contextual word representations with those of multilingual representation learning. We produce language models from dissimilar language pairs (English/Arabic and English/Chinese) and use them in dependency parsing, semantic role labeling, and named entity recognition, with comparisons to monolingual and non-contextual variants. Our results provide further evidence for the benefits of polyglot learning, in which representations are shared across multiple languages.</abstract>
       <url hash="17a553e9">N19-1392</url>
@@ -4824,7 +4824,7 @@
       <author><first>Manon</first><last>Scholivet</last></author>
       <author><first>Franck</first><last>Dary</last></author>
       <author><first>Alexis</first><last>Nasr</last></author>
-      <author><first>Benoit</first><last>Favre</last></author>
+      <author id="benoit-favre"><first>Benoit</first><last>Favre</last></author>
       <author><first>Carlos</first><last>Ramisch</last></author>
       <pages>3919–3930</pages>
       <abstract>The existence of universal models to describe the syntax of languages has been debated for decades. The availability of resources such as the Universal Dependencies treebanks and the World Atlas of Language Structures make it possible to study the plausibility of universal grammar from the perspective of dependency parsing. Our work investigates the use of high-level language descriptions in the form of typological features for multilingual dependency parsing. Our experiments on multilingual parsing for 40 languages show that typological information can indeed guide parsers to share information between similar languages beyond simple language identification.</abstract>
@@ -4856,7 +4856,7 @@
     <paper id="396">
       <title><fixed-case>U</fixed-case>nderstanding the <fixed-case>B</fixed-case>ehaviour of <fixed-case>N</fixed-case>eural <fixed-case>A</fixed-case>bstractive <fixed-case>S</fixed-case>ummarizers using <fixed-case>C</fixed-case>ontrastive <fixed-case>E</fixed-case>xamples</title>
       <author><first>Krtin</first><last>Kumar</last></author>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <pages>3949–3954</pages>
       <abstract>Neural abstractive summarizers generate summary texts using a language model conditioned on the input source text, and have recently achieved high ROUGE scores on benchmark summarization datasets. We investigate how they achieve this performance with respect to human-written gold-standard abstracts, and whether the systems are able to understand deeper syntactic and semantic structures. We generate a set of contrastive summaries which are perturbed, deficient versions of human-written summaries, and test whether existing neural summarizers score them more highly than the human-written summaries. We analyze their performance on different datasets and find that these systems fail to understand the source text, in a majority of the cases.</abstract>
       <url hash="8b2a5350">N19-1396</url>
@@ -4865,12 +4865,12 @@
     </paper>
     <paper id="397">
       <title>Jointly Extracting and Compressing Documents with Summary State Representations</title>
-      <author><first>Afonso</first><last>Mendes</last></author>
+      <author id="alfonso-mendes"><first>Afonso</first><last>Mendes</last></author>
       <author><first>Shashi</first><last>Narayan</last></author>
       <author><first>Sebastião</first><last>Miranda</last></author>
       <author><first>Zita</first><last>Marinho</last></author>
-      <author><first>André F. T.</first><last>Martins</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <pages>3955–3966</pages>
       <abstract>We present a new neural model for text summarization that first extracts sentences from a document and then compresses them. The pro-posed model offers a balance that sidesteps thedifficulties in abstractive methods while gener-ating more concise summaries than extractivemethods. In addition, our model dynamically determines the length of the output summary based on the gold summaries it observes during training and does not require length constraints typical to extractive summarization. The model achieves state-of-the-art results on the CNN/DailyMail and Newsroom datasets, improving over current extractive and abstractive methods. Human evaluations demonstratethat our model generates concise and informa-tive summaries. We also make available a new dataset of oracle compressive summaries derived automatically from the CNN/DailyMailreference summaries.</abstract>
       <url hash="81eb867f">N19-1397</url>
@@ -4883,7 +4883,7 @@
       <author><first>Sanjeev Kumar</first><last>Karn</last></author>
       <author><first>Mark</first><last>Buckley</last></author>
       <author><first>Ulli</first><last>Waltinger</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>3967–3977</pages>
       <abstract>In this work, we define the task of teaser generation and provide an evaluation benchmark and baseline systems for the process of generating teasers. A teaser is a short reading suggestion for an article that is illustrative and includes curiosity-arousing elements to entice potential readers to read particular news items. Teasers are one of the main vehicles for transmitting news to social media users. We compile a novel dataset of teasers by systematically accumulating tweets and selecting those that conform to the teaser definition. We have compared a number of neural abstractive architectures on the task of teaser generation and the overall best performing system is See et al. seq2seq with pointer network.</abstract>
       <url hash="f5364c63">N19-1398</url>
@@ -4917,7 +4917,7 @@
     <paper id="401">
       <title>Positional Encoding to Control Output Sequence Length</title>
       <author><first>Sho</first><last>Takase</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <pages>3999–4004</pages>
       <abstract>Neural encoder-decoder models have been successful in natural language generation tasks. However, real applications of abstractive summarization must consider an additional constraint that a generated summary should not exceed a desired length. In this paper, we propose a simple but effective extension of a sinusoidal positional encoding (Vaswani et al., 2017) so that a neural encoder-decoder model preserves the length constraint. Unlike previous studies that learn length embeddings, the proposed method can generate a text of any length even if the target length is unseen in training data. The experimental results show that the proposed method is able not only to control generation length but also improve ROUGE scores.</abstract>
       <url hash="6ed95c97">N19-1401</url>
@@ -4936,7 +4936,7 @@
     </paper>
     <paper id="403">
       <title>Using Natural Language Relations between Answer Choices for Machine Comprehension</title>
-      <author><first>Rajkumar</first><last>Pujari</last></author>
+      <author id="rajkumar-pujari"><first>Rajkumar</first><last>Pujari</last></author>
       <author><first>Dan</first><last>Goldwasser</last></author>
       <pages>4010–4015</pages>
       <abstract>While evaluating an answer choice for Reading Comprehension task, other answer choices available for the question and the answers of related questions about the same paragraph often provide valuable information. In this paper, we propose a method to leverage the natural language relations between the answer choices, such as entailment and contradiction, to improve the performance of machine comprehension. We use a stand-alone question answering (QA) system to perform QA task and a Natural Language Inference (NLI) system to identify the relations between the choice pairs. Then we perform inference using an Integer Linear Programming (ILP)-based relational framework to re-evaluate the decisions made by the standalone QA system in light of the relations identified by the NLI system. We also propose a multitask learning model that learns both the tasks jointly.</abstract>
@@ -5115,7 +5115,7 @@
     <paper id="418">
       <title>Data Augmentation for Context-Sensitive Neural Lemmatization Using Inflection Tables and Raw Text</title>
       <author><first>Toms</first><last>Bergmanis</last></author>
-      <author><first>Sharon</first><last>Goldwater</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
       <pages>4119–4128</pages>
       <abstract>Lemmatization aims to reduce the sparse data problem by relating the inflected forms of a word to its dictionary form. Using context can help, both for unseen and ambiguous words. Yet most context-sensitive approaches require full lemma-annotated sentences for training, which may be scarce or unavailable in low-resource languages. In addition (as shown here), in a low-resource setting, a lemmatizer can learn more from n labeled examples of distinct words (types) than from n (contiguous) labeled tokens, since the latter contain far fewer distinct types. To combine the efficiency of type-based learning with the benefits of context, we propose a way to train a context-sensitive lemmatizer with little or no labeled corpus data, using inflection tables from the UniMorph project and raw text examples from Wikipedia that provide sentence contexts for the unambiguous UniMorph examples. Despite these being unambiguous examples, the model successfully generalizes from them, leading to improved results (both overall, and especially on unseen words) in comparison to a baseline that does not use context.</abstract>
       <url hash="5b5287c8">N19-1418</url>
@@ -5126,7 +5126,7 @@
     <paper id="419">
       <title><fixed-case>A</fixed-case> Structural Probe for Finding Syntax in Word Representations</title>
       <author><first>John</first><last>Hewitt</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>4129–4138</pages>
       <abstract>Recent work has improved our ability to detect linguistic knowledge in word representations. However, current methods for detecting syntactic knowledge do not test whether syntax trees are represented in their entirety. In this work, we propose a structural probe, which evaluates whether syntax trees are embedded in a linear transformation of a neural network’s word representation space. The probe identifies a linear transformation under which squared L2 distance encodes the distance between words in the parse tree, and one in which squared L2 norm encodes depth in the parse tree. Using our probe, we show that such transformations exist for both ELMo and BERT but not in baselines, providing evidence that entire syntax trees are embedded implicitly in deep models’ vector geometry.</abstract>
       <url hash="33672bf1">N19-1419</url>
@@ -5161,7 +5161,7 @@
     <paper id="422">
       <title>Probing the Need for Visual Context in Multimodal Machine Translation</title>
       <author><first>Ozan</first><last>Caglayan</last></author>
-      <author><first>Pranava</first><last>Madhyastha</last></author>
+      <author id="pranava-swaroop-madhyastha"><first>Pranava</first><last>Madhyastha</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <author><first>Loïc</first><last>Barrault</last></author>
       <pages>4159–4170</pages>
@@ -5189,7 +5189,7 @@
       <title>What’s in a Name? <fixed-case>R</fixed-case>educing Bias in Bios without Access to Protected Attributes</title>
       <author><first>Alexey</first><last>Romanov</last></author>
       <author><first>Maria</first><last>De-Arteaga</last></author>
-      <author><first>Hanna</first><last>Wallach</last></author>
+      <author id="hanna-wallach"><first>Hanna</first><last>Wallach</last></author>
       <author><first>Jennifer</first><last>Chayes</last></author>
       <author><first>Christian</first><last>Borgs</last></author>
       <author><first>Alexandra</first><last>Chouldechova</last></author>
@@ -5238,7 +5238,7 @@
     <paper id="2">
       <title>Locale-agnostic Universal Domain Classification Model in Spoken Language Understanding</title>
       <author><first>Jihwan</first><last>Lee</last></author>
-      <author><first>Ruhi</first><last>Sarikaya</last></author>
+      <author id="ruhi-sarikaya"><first>Ruhi</first><last>Sarikaya</last></author>
       <author><first>Young-Bum</first><last>Kim</last></author>
       <pages>9–15</pages>
       <abstract>In this paper, we introduce an approach for leveraging available data across multiple locales sharing the same language to 1) improve domain classification model accuracy in Spoken Language Understanding and user experience even if new locales do not have sufficient data and 2) reduce the cost of scaling the domain classifier to a large number of locales. We propose a locale-agnostic universal domain classification model based on selective multi-task learning that learns a joint representation of an utterance over locales with different sets of domains and allows locales to share knowledge selectively depending on the domains. The experimental results demonstrate the effectiveness of our approach on domain classification task in the scenario of multiple locales with imbalanced data and disparate domain sets. The proposed approach outperforms other baselines models especially when classifying locale-specific domains and also low-resourced domains.</abstract>
@@ -5271,7 +5271,7 @@
       <author><first>Savitha</first><last>Ramasamy</last></author>
       <author><first>Pavitra</first><last>Krishnaswamy</last></author>
       <author><first>Wai Leng</first><last>Chow</last></author>
-      <author><first>Nancy F.</first><last>Chen</last></author>
+      <author id="nancy-chen"><first>Nancy F.</first><last>Chen</last></author>
       <pages>24–31</pages>
       <abstract>Data for human-human spoken dialogues for research and development are currently very limited in quantity, variety, and sources; such data are even scarcer in healthcare. In this work, we investigate fast prototyping of a dialogue comprehension system by leveraging on minimal nurse-to-patient conversations. We propose a framework inspired by nurse-initiated clinical symptom monitoring conversations to construct a simulated human-human dialogue dataset, embodying linguistic characteristics of spoken interactions like thinking aloud, self-contradiction, and topic drift. We then adopt an established bidirectional attention pointer network on this simulated dataset, achieving more than 80% F1 score on a held-out test set from real-world nurse-to-patient conversations. The ability to automatically comprehend conversations in the healthcare domain by exploiting only limited data has implications for improving clinical workflows through red flag symptom detection and triaging capabilities. We demonstrate the feasibility for efficient and effective extraction, retrieval and comprehension of symptom checking information discussed in multi-turn human-human spoken conversations.</abstract>
       <url hash="c5b27aaf">N19-2004</url>
@@ -5343,7 +5343,7 @@
       <author><first>Yao</first><last>Wan</last></author>
       <author><first>Xiuming</first><last>Pan</last></author>
       <author><first>Yu</first><last>Gong</last></author>
-      <author><first>Philip S.</first><last>Yu</last></author>
+      <author id="philip-s-yu"><first>Philip S.</first><last>Yu</last></author>
       <pages>64–72</pages>
       <abstract>Nowadays, more and more customers browse and purchase products in favor of using mobile E-Commerce Apps such as Taobao and Amazon. Since merchants are usually inclined to describe redundant and over-informative product titles to attract attentions from customers, it is important to concisely display short product titles on limited screen of mobile phones. To address this discrepancy, previous studies mainly consider textual information of long product titles and lacks of human-like view during training and evaluation process. In this paper, we propose a Multi-Modal Generative Adversarial Network (MM-GAN) for short product title generation in E-Commerce, which innovatively incorporates image information and attribute tags from product, as well as textual information from original long titles. MM-GAN poses short title generation as a reinforcement learning process, where the generated titles are evaluated by the discriminator in a human-like view. Extensive experiments on a large-scale E-Commerce dataset demonstrate that our algorithm outperforms other state-of-the-art methods. Moreover, we deploy our model into a real-world online E-Commerce environment and effectively boost the performance of click through rate and click conversion rate by 1.66% and 1.87%, respectively.</abstract>
       <url hash="4d3c9243">N19-2009</url>
@@ -5379,8 +5379,8 @@
       <title>Active Learning for New Domains in Natural Language Understanding</title>
       <author><first>Stanislav</first><last>Peshterliev</last></author>
       <author><first>John</first><last>Kearney</last></author>
-      <author><first>Abhyuday</first><last>Jagannatha</last></author>
-      <author><first>Imre</first><last>Kiss</last></author>
+      <author id="abhyuday-jagannatha"><first>Abhyuday</first><last>Jagannatha</last></author>
+      <author id="imre-kiss"><first>Imre</first><last>Kiss</last></author>
       <author><first>Spyros</first><last>Matsoukas</last></author>
       <pages>90–96</pages>
       <abstract>We explore active learning (AL) for improving the accuracy of new domains in a natural language understanding (NLU) system. We propose an algorithm called Majority-CRF that uses an ensemble of classification models to guide the selection of relevant utterances, as well as a sequence labeling model to help prioritize informative examples. Experiments with three domains show that Majority-CRF achieves 6.6%-9% relative error rate reduction compared to random sampling with the same annotation budget, and statistically significant improvements compared to other AL approaches. Additionally, case studies with human-in-the-loop AL on six new domains show 4.6%-9% improvement on an existing NLU system.</abstract>
@@ -5402,7 +5402,7 @@
     </paper>
     <paper id="14">
       <title>Are the Tools up to the Task? an Evaluation of Commercial Dialog Tools in Developing Conversational Enterprise-grade Dialog Systems</title>
-      <author><first>Marie</first><last>Meteer</last></author>
+      <author id="marie-meteer"><first>Marie</first><last>Meteer</last></author>
       <author><first>Meghan</first><last>Hickey</last></author>
       <author><first>Carmi</first><last>Rothberg</last></author>
       <author><first>David</first><last>Nahamoo</last></author>
@@ -5441,13 +5441,13 @@
     </paper>
     <paper id="17">
       <title>Extraction of Message Sequence Charts from Software Use-Case Descriptions</title>
-      <author><first>Girish</first><last>Palshikar</last></author>
+      <author id="girish-palshikar"><first>Girish</first><last>Palshikar</last></author>
       <author><first>Nitin</first><last>Ramrakhiyani</last></author>
       <author><first>Sangameshwar</first><last>Patil</last></author>
       <author><first>Sachin</first><last>Pawar</last></author>
       <author><first>Swapnil</first><last>Hingmire</last></author>
       <author><first>Vasudeva</first><last>Varma</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>130–137</pages>
       <abstract>Software Requirement Specification documents provide natural language descriptions of the core functional requirements as a set of use-cases. Essentially, each use-case contains a set of actors and sequences of steps describing the interactions among them. Goals of use-case reviews and analyses include their correctness, completeness, detection of ambiguities, prototyping, verification, test case generation and traceability. Message Sequence Chart (MSC) have been proposed as a expressive, rigorous yet intuitive visual representation of use-cases. In this paper, we describe a linguistic knowledge-based approach to extract MSCs from use-cases. Compared to existing techniques, we extract richer constructs of the MSC notation such as timers, conditions and alt-boxes. We apply this tool to extract MSCs from several real-life software use-case descriptions and show that it performs better than the existing techniques. We also discuss the benefits and limitations of the extracted MSCs to meet the above goals.</abstract>
       <url hash="cd1f3f05">N19-2017</url>
@@ -5482,7 +5482,7 @@
       <author><first>Lahari</first><last>Poddar</last></author>
       <author><first>Leonardo</first><last>Neves</last></author>
       <author><first>William</first><last>Brendel</last></author>
-      <author><first>Luis</first><last>Marujo</last></author>
+      <author id="luis-marujo"><first>Luis</first><last>Marujo</last></author>
       <author><first>Sergey</first><last>Tulyakov</last></author>
       <author><first>Pradeep</first><last>Karuturi</last></author>
       <pages>157–165</pages>
@@ -5494,9 +5494,9 @@
     <paper id="21">
       <title>Robust Semantic Parsing with Adversarial Learning for Domain Generalization</title>
       <author><first>Gabriel</first><last>Marzinotto</last></author>
-      <author><first>Géraldine</first><last>Damnati</last></author>
-      <author><first>Frédéric</first><last>Béchet</last></author>
-      <author><first>Benoît</first><last>Favre</last></author>
+      <author id="geraldine-damnati"><first>Géraldine</first><last>Damnati</last></author>
+      <author id="frederic-bechet"><first>Frédéric</first><last>Béchet</last></author>
+      <author id="benoit-favre"><first>Benoît</first><last>Favre</last></author>
       <pages>166–173</pages>
       <abstract>This paper addresses the issue of generalization for Semantic Parsing in an adversarial framework. Building models that are more robust to inter-document variability is crucial for the integration of Semantic Parsing technologies in real applications. The underlying question throughout this study is whether adversarial learning can be used to train models on a higher level of abstraction in order to increase their robustness to lexical and stylistic variations. We propose to perform Semantic Parsing with a domain classification adversarial task, covering various use-cases with or without explicit knowledge of the domain. The strategy is first evaluated on a French corpus of encyclopedic documents, annotated with FrameNet, in an information retrieval perspective. This corpus constitutes a new public benchmark, gathering documents from various thematic domains and various sources. We show that adversarial learning yields improved results when using explicit domain classification as the adversarial task. We also propose an unsupervised domain discovery approach that yields equivalent improvements. The latter is also evaluated on a PropBank Semantic Role Labeling task on the CoNLL-2005 benchmark and is shown to increase the model’s generalization capabilities on out-of-domain data.</abstract>
       <url hash="2ea7c865">N19-2021</url>
@@ -5561,7 +5561,7 @@
       <title>In Other News: a Bi-style Text-to-speech Model for Synthesizing Newscaster Voice with Limited Data</title>
       <author><first>Nishant</first><last>Prateek</last></author>
       <author><first>Mateusz</first><last>Łajszczak</last></author>
-      <author><first>Roberto</first><last>Barra-Chicote</last></author>
+      <author id="roberto-barra-chicote"><first>Roberto</first><last>Barra-Chicote</last></author>
       <author><first>Thomas</first><last>Drugman</last></author>
       <author><first>Jaime</first><last>Lorenzo-Trueba</last></author>
       <author><first>Thomas</first><last>Merritt</last></author>
@@ -5603,7 +5603,7 @@
       <url hash="18980fd1">N19-3</url>
       <editor><first>Sudipta</first><last>Kar</last></editor>
       <editor><first>Farah</first><last>Nadeem</last></editor>
-      <editor><first>Laura</first><last>Burdick</last></editor>
+      <editor id="laura-burdick"><first>Laura</first><last>Burdick</last></editor>
       <editor><first>Greg</first><last>Durrett</last></editor>
       <editor><first>Na-Rae</first><last>Han</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -5630,7 +5630,7 @@
     <paper id="2">
       <title>Identifying and Reducing Gender Bias in Word-Level Language Models</title>
       <author><first>Shikha</first><last>Bordia</last></author>
-      <author><first>Samuel R.</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel R.</first><last>Bowman</last></author>
       <pages>7–15</pages>
       <abstract>Many text corpora exhibit socially problematic biases, which can be propagated or amplified in the models trained on such data. For example, doctor cooccurs more frequently with male pronouns than female pronouns. In this study we (i) propose a metric to measure gender bias; (ii) measure bias in a text corpus and the text generated from a recurrent neural network language model trained on the text corpus; (iii) propose a regularization loss term for the language model that minimizes the projection of encoder-trained embeddings onto an embedding subspace that encodes gender; (iv) finally, evaluate efficacy of our proposed method on reducing gender bias. We find this regularization method to be effective in reducing gender bias up to an optimal weight assigned to the loss term, beyond which the model becomes unstable as the perplexity increases. We replicate this study on three training corpora—Penn Treebank, WikiText-2, and CNN/Daily Mail—resulting in similar conclusions.</abstract>
       <url hash="6853b895">N19-3002</url>
@@ -5691,7 +5691,7 @@
     <paper id="7">
       <title>A Bag-of-concepts Model Improves Relation Extraction in a Narrow Knowledge Domain with Limited Data</title>
       <author><first>Jiyu</first><last>Chen</last></author>
-      <author><first>Karin</first><last>Verspoor</last></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last></author>
       <author><first>Zenan</first><last>Zhai</last></author>
       <pages>43–52</pages>
       <abstract>This paper focuses on a traditional relation extraction task in the context of limited annotated data and a narrow knowledge domain. We explore this task with a clinical corpus consisting of 200 breast cancer follow-up treatment letters in which 16 distinct types of relations are annotated. We experiment with an approach to extracting typed relations called window-bounded co-occurrence (WBC), which uses an adjustable context window around entity mentions of a relevant type, and compare its performance with a more typical intra-sentential co-occurrence baseline. We further introduce a new bag-of-concepts (BoC) approach to feature engineering based on the state-of-the-art word embeddings and word synonyms. We demonstrate the competitiveness of BoC by comparing with methods of higher complexity, and explore its effectiveness on this small dataset.</abstract>
@@ -5766,7 +5766,7 @@
       <title>Data Augmentation by Data Noising for Open-vocabulary Slots in Spoken Language Understanding</title>
       <author><first>Hwa-Yeon</first><last>Kim</last></author>
       <author><first>Yoon-Hyung</first><last>Roh</last></author>
-      <author><first>Young-Kil</first><last>Kim</last></author>
+      <author id="young-gil-kim"><first>Young-Kil</first><last>Kim</last></author>
       <pages>97–102</pages>
       <abstract>One of the main challenges in Spoken Language Understanding (SLU) is dealing with ‘open-vocabulary’ slots. Recently, SLU models based on neural network were proposed, but it is still difficult to recognize the slots of unknown words or ‘open-vocabulary’ slots because of the high cost of creating a manually tagged SLU dataset. This paper proposes data noising, which reflects the characteristics of the ‘open-vocabulary’ slots, for data augmentation. We applied it to an attention based bi-directional recurrent neural network (Liu and Lane, 2016) and experimented with three datasets: Airline Travel Information System (ATIS), Snips, and MIT-Restaurant. We achieved performance improvements of up to 0.57% and 3.25 in intent prediction (accuracy) and slot filling (f1-score), respectively. Our method is advantageous because it does not require additional memory and it can be applied simultaneously with the training process of the model.</abstract>
       <url hash="f39b3ad1">N19-3014</url>
@@ -5776,7 +5776,7 @@
     <paper id="15">
       <title>Expectation and Locality Effects in the Prediction of Disfluent Fillers and Repairs in <fixed-case>E</fixed-case>nglish Speech</title>
       <author><first>Samvit</first><last>Dammalapati</last></author>
-      <author><first>Rajakrishnan</first><last>Rajkumar</last></author>
+      <author id="rajakrishnan-rajkumar"><first>Rajakrishnan</first><last>Rajkumar</last></author>
       <author><first>Sumeet</first><last>Agarwal</last></author>
       <pages>103–109</pages>
       <abstract>This study examines the role of three influential theories of language processing, <i>viz.</i>, Surprisal Theory, Uniform Information Density (UID) hypothesis and Dependency Locality Theory (DLT), in predicting disfluencies in speech production. To this end, we incorporate features based on lexical surprisal, word duration and DLT integration and storage costs into logistic regression classifiers aimed to predict disfluencies in the Switchboard corpus of English conversational speech. We find that disfluencies occur in the face of upcoming difficulties and speakers tend to handle this by lessening cognitive load before disfluencies occur. Further, we see that reparandums behave differently from disfluent fillers possibly due to the lessening of the cognitive load also happening in the word choice of the reparandum, i.e., in the disfluency itself. While the UID hypothesis does not seem to play a significant role in disfluency prediction, lexical surprisal and DLT costs do give promising results in explaining language production. Further, we also find that as a means to lessen cognitive load for upcoming difficulties speakers take more time on words preceding disfluencies, making duration a key element in understanding disfluencies.</abstract>
@@ -5798,7 +5798,7 @@
     <paper id="17">
       <title>A Pregroup Representation of Word Order Alternation Using <fixed-case>H</fixed-case>indi Syntax</title>
       <author><first>Alok</first><last>Debnath</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>125–135</pages>
       <abstract>Pregroup calculus has been used for the representation of free word order languages (Sanskrit and Hungarian), using a construction called precyclicity. However, restricted word order alternation has not been handled before. This paper aims at introducing and formally expressing three methods of representing word order alternation in the pregroup representation of any language. This paper describes the word order alternation patterns of Hindi, and creates a basic pregroup representation for the language. In doing so, the shortcoming of correct reductions for ungrammatical sentences due to the current apparatus is highlighted, and the aforementioned methods are invoked for a grammatically accurate representation of restricted word order alternation. The replicability of these methods is explained in the representation of adverbs and prepositional phrases in English.</abstract>
       <url hash="dcf4e6f0">N19-3017</url>
@@ -5852,7 +5852,7 @@
     </frontmatter>
     <paper id="1">
       <title>Abbreviation Explorer - an interactive system for pre-evaluation of Unsupervised Abbreviation Disambiguation</title>
-      <author><first>Manuel R.</first><last>Ciosici</last></author>
+      <author id="manuel-r-ciosici"><first>Manuel R.</first><last>Ciosici</last></author>
       <author><first>Ira</first><last>Assent</last></author>
       <pages>1–5</pages>
       <abstract>We present Abbreviation Explorer, a system that supports interactive exploration of abbreviations that are challenging for Unsupervised Abbreviation Disambiguation (UAD). Abbreviation Explorer helps to identify long-forms that are easily confused, and to pinpoint likely causes such as limitations of normalization, language switching, or inconsistent typing. It can also support determining which long-forms would benefit from additional input text for unsupervised abbreviation disambiguation. The system provides options for creating corrective rules that merge redundant long-forms with identical meaning. The identified rules can be easily applied to the already existing vector spaces used by UAD to improve disambiguation performance, while also avoiding the cost of retraining.</abstract>
@@ -5876,10 +5876,10 @@
       <title>Enabling Search and Collaborative Assembly of Causal Interactions Extracted from Multilingual and Multi-domain Free Text</title>
       <author><first>George C. G.</first><last>Barbosa</last></author>
       <author><first>Zechy</first><last>Wong</last></author>
-      <author><first>Gus</first><last>Hahn-Powell</last></author>
+      <author id="gus-hahn-powell"><first>Gus</first><last>Hahn-Powell</last></author>
       <author><first>Dane</first><last>Bell</last></author>
       <author><first>Rebecca</first><last>Sharp</last></author>
-      <author><first>Marco A.</first><last>Valenzuela-Escárcega</last></author>
+      <author id="marco-a-valenzuela-escarcega"><first>Marco A.</first><last>Valenzuela-Escárcega</last></author>
       <author><first>Mihai</first><last>Surdeanu</last></author>
       <pages>12–17</pages>
       <abstract>Many of the most pressing current research problems (e.g., public health, food security, or climate change) require multi-disciplinary collaborations. In order to facilitate this process, we propose a system that incorporates multi-domain extractions of causal interactions into a single searchable knowledge graph. Our system enables users to search iteratively over direct and indirect connections in this knowledge graph, and collaboratively build causal models in real time. To enable the aggregation of causal information from multiple languages, we extend an open-domain machine reader to Portuguese. The new Portuguese reader extracts over 600 thousand causal statements from 120 thousand Portuguese publications with a precision of 62%, which demonstrates the value of mining multilingual scientific information.</abstract>
@@ -5901,7 +5901,7 @@
     <paper id="5">
       <title>Learning to Respond to Mixed-code Queries using Bilingual Word Embeddings</title>
       <author><first>Chia-Fang</first><last>Ho</last></author>
-      <author><first>Jason</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason</first><last>Chang</last></author>
       <author><first>Jhih-Jie</first><last>Chen</last></author>
       <author><first>Chingyu</first><last>Yang</last></author>
       <pages>24–28</pages>
@@ -5945,7 +5945,7 @@
       <author><first>Benjamin</first><last>Gyori</last></author>
       <author><first>Keith</first><last>Alcock</last></author>
       <author><first>Egoitz</first><last>Laparra</last></author>
-      <author><first>Marco A.</first><last>Valenzuela-Escárcega</last></author>
+      <author id="marco-a-valenzuela-escarcega"><first>Marco A.</first><last>Valenzuela-Escárcega</last></author>
       <author><first>Ajay</first><last>Nagesh</last></author>
       <author><first>Vikas</first><last>Yadav</last></author>
       <author><first>John</first><last>Bachman</last></author>
@@ -5999,7 +5999,7 @@
       <author><first>Daphne</first><last>Ippolito</last></author>
       <author><first>Arun</first><last>Kirubarajan</last></author>
       <author><first>Jai</first><last>Thirani</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <author><first>Chris</first><last>Callison-Burch</last></author>
       <pages>60–65</pages>
       <abstract>Open-domain dialog systems (i.e. chatbots) are difficult to evaluate. The current best practice for analyzing and comparing these dialog systems is the use of human judgments. However, the lack of standardization in evaluation procedures, and the fact that model parameters and code are rarely published hinder systematic human evaluation experiments. We introduce a unified framework for human evaluation of chatbots that augments existing tools and provides a web-based hub for researchers to share and compare their dialog systems. Researchers can submit their trained models to the ChatEval web interface and obtain comparisons with baselines and prior work. The evaluation code is open-source to ensure standardization and transparency. In addition, we introduce open-source baseline models and evaluation datasets. ChatEval can be found at <url>https://chateval.org</url>.</abstract>
@@ -6040,7 +6040,7 @@
       <author><first>Wei</first><last>Fang</last></author>
       <author><first>Brian</first><last>Xu</last></author>
       <author><first>Mitra</first><last>Mohtarami</last></author>
-      <author><first>James</first><last>Glass</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
       <pages>78–83</pages>
       <abstract>We present FAKTA which is a unified framework that integrates various components of a fact-checking process: document retrieval from media sources with various types of reliability, stance detection of documents with respect to given claims, evidence extraction, and linguistic analysis. FAKTA predicts the factuality of given claims and provides evidence at the document and sentence level to explain its predictions.</abstract>
       <url hash="b9a3d462">N19-4014</url>
@@ -6051,7 +6051,7 @@
       <title>i<fixed-case>C</fixed-case>omposer: An Automatic Songwriting System for <fixed-case>C</fixed-case>hinese Popular Music</title>
       <author><first>Hsin-Pei</first><last>Lee</last></author>
       <author><first>Jhih-Sheng</first><last>Fang</last></author>
-      <author><first>Wei-Yun</first><last>Ma</last></author>
+      <author id="wei-yun-ma"><first>Wei-Yun</first><last>Ma</last></author>
       <pages>84–88</pages>
       <abstract>In this paper, we introduce iComposer, an interactive web-based songwriting system designed to assist human creators by greatly simplifying music production. iComposer automatically creates melodies to accompany any given text. It also enables users to generate a set of lyrics given arbitrary melodies. iComposer is based on three sequence-to-sequence models, which are used to predict melody, rhythm, and lyrics, respectively. Songs generated by iComposer are compared with human-composed and randomly-generated ones in a subjective test, the experimental results of which demonstrate the capability of the proposed system to write pleasing melodies and meaningful lyrics at a level similar to that of humans.</abstract>
       <url hash="61b9af05">N19-4015</url>
@@ -6073,7 +6073,7 @@
       <title><fixed-case>LT</fixed-case> Expertfinder: An Evaluation Framework for Expert Finding Methods</title>
       <author><first>Tim</first><last>Fischer</last></author>
       <author><first>Steffen</first><last>Remus</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>98–104</pages>
       <abstract>Expert finding is the task of ranking persons for a predefined topic or search query. Finding experts for a specified area is an important task and has attracted much attention in the information retrieval community. Most approaches for this task are evaluated in a supervised fashion, which depend on predefined topics of interest as well as gold standard expert rankings. Famous representatives of such datasets are enriched versions of DBLP provided by the ArnetMiner projet or the W3C Corpus of TREC. However, manually ranking experts can be considered highly subjective and detailed rankings are hardly distinguishable. Evaluating these datasets does not necessarily guarantee a good or bad performance of the system. Particularly for dynamic systems, where topics are not predefined but formulated as a search query, we believe a more informative approach is to perform user studies for directly comparing different methods in the same view. In order to accomplish this in a user-friendly way, we present the LT Expert Finder web-application, which is equipped with various query-based expert finding methods that can be easily extended, a detailed expert profile view, detailed evidence in form of relevant documents and statistics, and an evaluation component that allows the qualitative comparison between different rankings.</abstract>
       <url hash="90cc02dd">N19-4017</url>
@@ -6098,7 +6098,7 @@
       <author><first>Ying</first><last>Lin</last></author>
       <author><first>Joseph</first><last>Hoover</last></author>
       <author><first>Spencer</first><last>Whitehead</last></author>
-      <author><first>Clare</first><last>Voss</last></author>
+      <author id="clare-voss"><first>Clare</first><last>Voss</last></author>
       <author><first>Morteza</first><last>Dehghani</last></author>
       <author><first>Heng</first><last>Ji</last></author>
       <pages>110–115</pages>
@@ -6135,9 +6135,9 @@
     </paper>
     <paper id="22">
       <title>Visualizing Inferred Morphotactic Systems</title>
-      <author><first>Haley</first><last>Lepp</last></author>
+      <author id="haley-lepp"><first>Haley</first><last>Lepp</last></author>
       <author><first>Olga</first><last>Zamaraeva</last></author>
-      <author><first>Emily M.</first><last>Bender</last></author>
+      <author id="emily-m-bender"><first>Emily M.</first><last>Bender</last></author>
       <pages>127–131</pages>
       <abstract>We present a web-based system that facilitates the exploration of complex morphological patterns found in morphologically very rich languages. The need for better understanding of such patterns is urgent for linguistics and important for cross-linguistically applicable natural language processing. In this paper we give an overview of the system architecture and describe a sample case study on Abui [abz], a Trans-New Guinea language spoken in Indonesia.</abstract>
       <url hash="cf88370a">N19-4022</url>
@@ -6148,11 +6148,11 @@
       <title>A <fixed-case>R</fixed-case>esearch <fixed-case>P</fixed-case>latform for <fixed-case>M</fixed-case>ulti-<fixed-case>R</fixed-case>obot <fixed-case>D</fixed-case>ialogue with <fixed-case>H</fixed-case>umans</title>
       <author><first>Matthew</first><last>Marge</last></author>
       <author><first>Stephen</first><last>Nogar</last></author>
-      <author><first>Cory J.</first><last>Hayes</last></author>
-      <author><first>Stephanie M.</first><last>Lukin</last></author>
+      <author id="cory-hayes"><first>Cory J.</first><last>Hayes</last></author>
+      <author id="stephanie-lukin"><first>Stephanie M.</first><last>Lukin</last></author>
       <author><first>Jesse</first><last>Bloecker</last></author>
       <author><first>Eric</first><last>Holder</last></author>
-      <author><first>Clare</first><last>Voss</last></author>
+      <author id="clare-voss"><first>Clare</first><last>Voss</last></author>
       <pages>132–137</pages>
       <abstract>This paper presents a research platform that supports spoken dialogue interaction with multiple robots. The demonstration showcases our crafted MultiBot testing scenario in which users can verbally issue search, navigate, and follow instructions to two robotic teammates: a simulated ground robot and an aerial robot. This flexible language and robotic platform takes advantage of existing tools for speech recognition and dialogue management that are compatible with new domains, and implements an inter-agent communication protocol (tactical behavior specification), where verbal instructions are encoded for tasks assigned to the appropriate robot.</abstract>
       <url hash="7d39e661">N19-4023</url>
@@ -6202,7 +6202,7 @@
     </paper>
     <paper id="2">
       <title>Deep Learning for Natural Language Inference</title>
-      <author><first>Samuel</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel</first><last>Bowman</last></author>
       <author><first>Xiaodan</first><last>Zhu</last></author>
       <pages>6–8</pages>
       <abstract>This tutorial discusses cutting-edge research on NLI, including recent advance on dataset development, cutting-edge deep learning models, and highlights from recent research on using NLI to understand capabilities and limits of deep learning models for language understanding and reasoning.</abstract>
@@ -6226,7 +6226,7 @@
     <paper id="4">
       <title>Transfer Learning in Natural Language Processing</title>
       <author><first>Sebastian</first><last>Ruder</last></author>
-      <author><first>Matthew E.</first><last>Peters</last></author>
+      <author id="matthew-e-peters"><first>Matthew E.</first><last>Peters</last></author>
       <author><first>Swabha</first><last>Swayamdipta</last></author>
       <author><first>Thomas</first><last>Wolf</last></author>
       <pages>15–18</pages>
@@ -6240,7 +6240,7 @@
       <title>Language Learning and Processing in People and Machines</title>
       <author><first>Aida</first><last>Nematzadeh</last></author>
       <author><first>Richard</first><last>Futrell</last></author>
-      <author><first>Roger</first><last>Levy</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
       <pages>19–21</pages>
       <abstract>The goal of this tutorial is to bring the fields of computational linguistics and computational cognitive science closer: we will introduce different stages of language acquisition and their parallel problems in NLP. As an example, one of the early challenges children face is mapping the meaning of word labels (such as “cat”) to their referents (the furry animal in the living room). Word learning is similar to the word alignment problem in machine translation. We explain the current computational models of language acquisition, their limitations, and how the insights from these models can be incorporated into NLP applications. Moreover, we discuss how we can take advantage of the cognitive science of language in computational linguistics: for example, by designing cognitively-motivated evaluations task or buildings language-learning inductive biases into our models.</abstract>
       <url hash="18ead548">N19-5005</url>
diff --git a/data/xml/O00.xml b/data/xml/O00.xml
index e4f7bf85d1..7a211ab892 100644
--- a/data/xml/O00.xml
+++ b/data/xml/O00.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of Research on Computational Linguistics Conference <fixed-case>XIII</fixed-case></booktitle>
       <url hash="6b778550">O00-1</url>
       <editor><first>Lee-Feng</first><last>Chien</last></editor>
-      <editor><first>Kuang-Hua</first><last>Chen</last></editor>
+      <editor id="kuang-hua-chen"><first>Kuang-Hua</first><last>Chen</last></editor>
       <publisher>The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)</publisher>
       <address>Taipei, Taiwan</address>
       <month>August</month>
@@ -32,9 +32,9 @@
     </paper>
     <paper id="3">
       <title>The Improving Techniques for Disambiguating Non-alphabet Sense Categories</title>
-      <author><first>Feng-Long</first><last>Hwang</last></author>
-      <author><first>Ming-Shing</first><last>Yu</last></author>
-      <author><first>Min-Jer</first><last>Wu</last></author>
+      <author id="feng-long-huang"><first>Feng-Long</first><last>Hwang</last></author>
+      <author id="ming-shing-yu"><first>Ming-Shing</first><last>Yu</last></author>
+      <author id="ming-jer-wu"><first>Min-Jer</first><last>Wu</last></author>
       <pages>67–86</pages>
       <url hash="721308a3">O00-1003</url>
       <bibkey>hwang-etal-2000-improving</bibkey>
@@ -42,7 +42,7 @@
     <paper id="4">
       <title>Building A <fixed-case>C</fixed-case>hinese Text Summarizer with Phrasal Chunks and Domain Knowledge</title>
       <author><first>Weiquan</first><last>Liu</last></author>
-      <author><first>Joe</first><last>Zhou</last></author>
+      <author id="joe-zhou"><first>Joe</first><last>Zhou</last></author>
       <pages>87–96</pages>
       <url hash="37a4a1c8">O00-1004</url>
       <bibkey>liu-zhou-2000-building</bibkey>
@@ -91,7 +91,7 @@
     </paper>
     <paper id="10">
       <title>具有累進學習能力之貝氏預測法則在汽車語音辨識之應用 (<fixed-case>B</fixed-case>ayesian Predictive Classification with Incremental Learning Capability for Car Speech Recognition) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Jen-Tzung</first><last>Chien</last></author>
+      <author id="jen-tzung-chien"><first>Jen-Tzung</first><last>Chien</last></author>
       <author><first>Guo-Hong</first><last>Liao</last></author>
       <pages>179–197</pages>
       <bibkey>chien-liao-2000-ju</bibkey>
@@ -99,7 +99,7 @@
     <paper id="11">
       <title>結合麥克風陣列及模型調整技術之遠距離語音辨識系統 (Far-Distant Speech Recognition System Using Combined Techniques of Microphone Array and Model Adaptation)[In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Jain-Ray</first><last>Lai</last></author>
-      <author><first>Jen-Tzung</first><last>Chien</last></author>
+      <author id="jen-tzung-chien"><first>Jen-Tzung</first><last>Chien</last></author>
       <pages>199–213</pages>
       <url hash="50c5941b">O00-1011</url>
       <bibkey>lai-chien-2000-jie</bibkey>
@@ -128,8 +128,8 @@
     </frontmatter>
     <paper id="1">
       <title>漢語動詞辭彙語意分析：表達模式與研究方法 (A Lexical-Semantic Analysis of <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese Verbs: Representation and Methodology) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Li-Li</first><last>Chang</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="li-li-chang"><first>Li-Li</first><last>Chang</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <pages>1–18</pages>
       <url hash="11abdaa9">O00-2001</url>
@@ -139,10 +139,10 @@
       <title>The Module-Attribute Representation of Verbal Semantics: From Semantic to Argument Structure</title>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <author><first>Kathleen</first><last>Ahrens</last></author>
-      <author><first>Li-Li</first><last>Chang</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
-      <author><first>Mei-Chun</first><last>Liu</last></author>
-      <author><first>Mei-Chih</first><last>Tsai</last></author>
+      <author id="li-li-chang"><first>Li-Li</first><last>Chang</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="mei-chun-liu"><first>Mei-Chun</first><last>Liu</last></author>
+      <author id="mei-chih-tsai"><first>Mei-Chih</first><last>Tsai</last></author>
       <pages>19–46</pages>
       <url hash="ed4c59b6">O00-2002</url>
       <bibkey>huang-etal-2000-module</bibkey>
@@ -151,17 +151,17 @@
       <title>What Can Near Synonyms Tell Us</title>
       <author><first>Lian-Cheng</first><last>Chief</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
-      <author><first>Mei-Chih</first><last>Tsai</last></author>
-      <author><first>Li-Li</first><last>Chang</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="mei-chih-tsai"><first>Mei-Chih</first><last>Tsai</last></author>
+      <author id="li-li-chang"><first>Li-Li</first><last>Chang</last></author>
       <pages>47–60</pages>
       <url hash="03f95093">O00-2003</url>
       <bibkey>chief-etal-2000-near</bibkey>
     </paper>
     <paper id="4">
       <title>Alternation Across Semantic Fields: A Study on <fixed-case>M</fixed-case>andarin Verbs of Emotion</title>
-      <author><first>Li-Li</first><last>Chang</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="li-li-chang"><first>Li-Li</first><last>Chang</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <pages>61–80</pages>
       <url hash="d072c50d">O00-2004</url>
@@ -169,9 +169,9 @@
     </paper>
     <paper id="5">
       <title>When Endpoint Meets Endpoint: A Corpus-based Lexical Semantic Study of <fixed-case>M</fixed-case>andarin Verbs of Throwing</title>
-      <author><first>Mei-Chun</first><last>Liu</last></author>
+      <author id="mei-chun-liu"><first>Mei-Chun</first><last>Liu</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
-      <author><first>Charles</first><last>Lee</last></author>
+      <author id="charles-c-lee"><first>Charles</first><last>Lee</last></author>
       <author><first>Ching-Yi</first><last>Lee</last></author>
       <pages>81–96</pages>
       <url hash="b8e7129c">O00-2005</url>
@@ -191,7 +191,7 @@
     </frontmatter>
     <paper id="1">
       <title>Adaptive Word Sense Disambiguation Using Lexical Knowledge in a Machine-readable Dictionary</title>
-      <author><first>Jen Nan</first><last>Chen</last></author>
+      <author id="jen-nan-chen"><first>Jen Nan</first><last>Chen</last></author>
       <pages>1–42</pages>
       <url hash="3a0b3736">O00-3001</url>
       <bibkey>chen-2000-adaptive</bibkey>
@@ -206,15 +206,15 @@
     </paper>
     <paper id="3">
       <title><fixed-case>J</fixed-case>apanese-<fixed-case>C</fixed-case>hinese Cross-Language Information Retrieval: An Interlingua Approach</title>
-      <author><first>Md. Maruf</first><last>Hasan</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="md-maruf-hasan"><first>Md. Maruf</first><last>Hasan</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>59–86</pages>
       <url hash="6dd3cd7e">O00-3003</url>
       <bibkey>hasan-matsumoto-2000-japanese</bibkey>
     </paper>
     <paper id="4">
       <title>Compiling <fixed-case>T</fixed-case>aiwanese Learner Corpus of <fixed-case>E</fixed-case>nglish</title>
-      <author><first>Rebecca Hsue-Hueh</first><last>Shih</last></author>
+      <author id="hsue-hueh-shih"><first>Rebecca Hsue-Hueh</first><last>Shih</last></author>
       <pages>87–100</pages>
       <url hash="ef22b1d7">O00-3004</url>
       <bibkey>shih-2000-compiling</bibkey>
diff --git a/data/xml/O01.xml b/data/xml/O01.xml
index c7d0a64524..5e19c0cb30 100644
--- a/data/xml/O01.xml
+++ b/data/xml/O01.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of Research on Computational Linguistics Conference <fixed-case>XIV</fixed-case></booktitle>
       <url hash="b8954ba2">O01-1</url>
       <editor><first>Chung-Hsien</first><last>Wu</last></editor>
-      <editor><first>Jen-Tzung</first><last>Chien</last></editor>
+      <editor id="jen-tzung-chien"><first>Jen-Tzung</first><last>Chien</last></editor>
       <publisher>The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)</publisher>
       <address>Tainan, Taiwan</address>
       <month>August</month>
@@ -17,8 +17,8 @@
     </frontmatter>
     <paper id="1">
       <title>新聞文件摘要之研究 (Text Summarization on News) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Hsiang-Pin</first><last>Lee</last></author>
-      <author><first>Su-Jin</first><last>Ker</last></author>
+      <author id="hsiang-pin-lee"><first>Hsiang-Pin</first><last>Lee</last></author>
+      <author id="sue-j-ker"><first>Su-Jin</first><last>Ker</last></author>
       <pages>23–42</pages>
       <url hash="fcbf74bb">O01-1001</url>
       <bibkey>lee-ker-2001-xin</bibkey>
@@ -34,7 +34,7 @@
     <paper id="3">
       <title>多篇文件自動摘要系統 (Multi-Document Summarization System) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Jian-Cheng</first><last>Shen</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>65–87</pages>
       <url hash="206aadf1">O01-1003</url>
       <bibkey>shen-chang-2001-duo</bibkey>
@@ -66,9 +66,9 @@
     <paper id="7">
       <title>Optimiztion of <fixed-case>HMM</fixed-case> by the Tabu Search Algorithm</title>
       <author><first>Xiao-dan</first><last>Mei</last></author>
-      <author><first>Sheng-he</first><last>Sun</last></author>
+      <author id="sheng-he-sun"><first>Sheng-he</first><last>Sun</last></author>
       <author><first>Jeng-shuang</first><last>Pan</last></author>
-      <author><first>Tsong-yi</first><last>Chen</last></author>
+      <author id="tsong-yi-chen"><first>Tsong-yi</first><last>Chen</last></author>
       <pages>147–153</pages>
       <url hash="04914e1f">O01-1007</url>
       <bibkey>mei-etal-2001-optimiztion</bibkey>
@@ -84,10 +84,10 @@
     </paper>
     <paper id="9">
       <title>中文語料庫構建及管理系統設計 (Design of Management System for <fixed-case>C</fixed-case>hinese Corpus Construction) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Wei-Yun</first><last>Ma</last></author>
+      <author id="wei-yun-ma"><first>Wei-Yun</first><last>Ma</last></author>
       <author><first>Yu-Ming</first><last>Hsieh</last></author>
       <author><first>Chang-Hua</first><last>Yang</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <pages>175–191</pages>
       <url hash="234f2c60">O01-1009</url>
       <bibkey>ma-etal-2001-zhong</bibkey>
@@ -95,9 +95,9 @@
     <paper id="10">
       <title>Design, Compilation and Processing of <fixed-case>CUC</fixed-case>all: A Set of <fixed-case>C</fixed-case>antonese Spoken Language Corpora Collected Over Telephone Networks</title>
       <author><first>W.K.</first><last>Lo</last></author>
-      <author><first>P.C.</first><last>Ching</last></author>
+      <author id="p-c-ching"><first>P.C.</first><last>Ching</last></author>
       <author><first>Tan</first><last>Lee</last></author>
-      <author><first>Helen</first><last>Meng</last></author>
+      <author id="helen-meng"><first>Helen</first><last>Meng</last></author>
       <pages>193–212</pages>
       <url hash="c297d855">O01-1010</url>
       <bibkey>lo-etal-2001-design</bibkey>
@@ -112,17 +112,17 @@
     </paper>
     <paper id="12">
       <title>中文動詞自動分類研究 (Automatic Classification of <fixed-case>C</fixed-case>hinese Unknown Verbs) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Hui-hsin</first><last>Tseng</last></author>
+      <author id="huihsin-tseng"><first>Hui-hsin</first><last>Tseng</last></author>
       <author><first>Chao-Lin</first><last>Liu</last></author>
-      <author><first>Zhao Ming</first><last>Gao</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="zhao-ming-gao"><first>Zhao Ming</first><last>Gao</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <pages>253–272</pages>
       <url hash="c536bac7">O01-1012</url>
       <bibkey>tseng-etal-2001-zhong</bibkey>
     </paper>
     <paper id="13">
       <title>統計式片語翻譯模型(A Statistical Model of Terminology Translation) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <author><first>Ta-wei</first><last>Yu</last></author>
       <pages>273–297</pages>
       <url hash="60af66cb">O01-1013</url>
@@ -130,7 +130,7 @@
     </paper>
     <paper id="14">
       <title>Metaphor, Inference, and Conceptualisation : On the Development of <fixed-case>V</fixed-case>-diao Construction in <fixed-case>M</fixed-case>andarin</title>
-      <author><first>Wei-lun</first><last>Lu</last></author>
+      <author id="wei-lun-lu"><first>Wei-lun</first><last>Lu</last></author>
       <pages>299–316</pages>
       <url hash="ffa9ff78">O01-1014</url>
       <bibkey>lu-2001-metaphor</bibkey>
@@ -159,7 +159,7 @@
       <title>Improving Translation Selection with a New Translation Model Trained by Independent Monolingual Corpora</title>
       <author><first>Ming</first><last>Zhou</last></author>
       <author><first>Yuan</first><last>Ding</last></author>
-      <author><first>Changning</first><last>Huang</last></author>
+      <author id="changning-huang"><first>Changning</first><last>Huang</last></author>
       <pages>1–26</pages>
       <url hash="aaa2340f">O01-2001</url>
       <bibkey>zhou-etal-2001-improving</bibkey>
@@ -167,7 +167,7 @@
     <paper id="2">
       <title>The Use of Clustering Techniques for Language Modeling <fixed-case>V</fixed-case> Application to <fixed-case>A</fixed-case>sian Language</title>
       <author><first>Jianfeng</first><last>Gao</last></author>
-      <author><first>Joshua T.</first><last>Goodman</last></author>
+      <author id="joshua-goodman"><first>Joshua T.</first><last>Goodman</last></author>
       <author><first>Jiangbo</first><last>Miao</last></author>
       <pages>27–60</pages>
       <url hash="41e5569b">O01-2002</url>
@@ -183,11 +183,11 @@
     </paper>
     <paper id="4">
       <title>Automatic Translation Template Acquisition Based on Bilingual Structure Alignment</title>
-      <author><first>Yajuan</first><last>Lu</last></author>
+      <author id="yajuan-lu"><first>Yajuan</first><last>Lu</last></author>
       <author><first>Ming</first><last>Zhou</last></author>
       <author><first>Sheng</first><last>Li</last></author>
-      <author><first>Changning</first><last>Huang</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="changning-huang"><first>Changning</first><last>Huang</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <pages>83–108</pages>
       <url hash="e9a04443">O01-2004</url>
       <bibkey>lu-etal-2001-automatic</bibkey>
@@ -216,7 +216,7 @@
     </frontmatter>
     <paper id="1">
       <title>Metaphorical Transfer and Pragmatic Strengthening: On the Development of <fixed-case>V</fixed-case>-diao in <fixed-case>M</fixed-case>andarin</title>
-      <author><first>Louis Wei-lun</first><last>Lu</last></author>
+      <author id="wei-lun-lu"><first>Louis Wei-lun</first><last>Lu</last></author>
       <pages>1–10</pages>
       <url hash="fbf48480">O01-3001</url>
       <bibkey>lu-2001-metaphorical</bibkey>
@@ -240,9 +240,9 @@
     </paper>
     <paper id="4">
       <title>統計式片語翻譯模型 (Statistical Translation Model for Phrases) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <author><first>David</first><last>Yu</last></author>
-      <author><first>Chun-Jun</first><last>Lee</last></author>
+      <author id="chun-jen-lee"><first>Chun-Jun</first><last>Lee</last></author>
       <pages>43–64</pages>
       <url hash="4e9cc5b2">O01-3004</url>
       <bibkey>chang-etal-2001-tong</bibkey>
diff --git a/data/xml/O02.xml b/data/xml/O02.xml
index 29d5200a54..f0000f8c71 100644
--- a/data/xml/O02.xml
+++ b/data/xml/O02.xml
@@ -13,17 +13,17 @@
     </frontmatter>
     <paper id="1">
       <title>以構詞律與相似法為本的中文動詞自動分類研究 (A Hybrid Approach for Automatic Classification of <fixed-case>C</fixed-case>hinese Unknown Verbs) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Hui-Hsin</first><last>Tseng</last></author>
+      <author id="huihsin-tseng"><first>Hui-Hsin</first><last>Tseng</last></author>
       <author><first>Chao-Lin</first><last>Liu</last></author>
-      <author><first>Zhao-Ming</first><last>Gao</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="zhao-ming-gao"><first>Zhao-Ming</first><last>Gao</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <pages>1–28</pages>
       <bibkey>tseng-etal-2002-yi</bibkey>
     </paper>
     <paper id="2">
       <title>Word Sense Disambiguation and Sense-Based <fixed-case>NV</fixed-case> Event Frame Identifier</title>
       <author><first>Jia-Lin</first><last>Tsai</last></author>
-      <author><first>Wen-Lian</first><last>Hsu</last></author>
+      <author id="wen-lian-hsu"><first>Wen-Lian</first><last>Hsu</last></author>
       <author><first>Jeng-Woei</first><last>Su</last></author>
       <pages>29–46</pages>
       <url hash="34a97495">O02-1002</url>
@@ -32,7 +32,7 @@
     <paper id="3">
       <title>一種基於知網的語義排歧模型研究 (A Study of Semantic Disambiguation Based on <fixed-case>H</fixed-case>ow<fixed-case>N</fixed-case>et) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Xiaofeng</first><last>Yang</last></author>
-      <author><first>Tangqiu</first><last>Li</last></author>
+      <author id="tangqiu-li"><first>Tangqiu</first><last>Li</last></author>
       <pages>47–78</pages>
       <url hash="61513915">O02-1003</url>
       <bibkey>yang-li-2002-yi</bibkey>
@@ -41,7 +41,7 @@
       <title>基於文本概念和k<fixed-case>NN</fixed-case>的跨語種文本過濾 (Cross-Language Text Filtering Based on Text Concepts and k<fixed-case>NN</fixed-case>) [In <fixed-case>C</fixed-case>hinese</title>
       <author><first>Weifeng</first><last>Su</last></author>
       <author><first>Shaozi</first><last>Li</last></author>
-      <author><first>Tanqiu</first><last>Li</last></author>
+      <author id="tangqiu-li"><first>Tanqiu</first><last>Li</last></author>
       <author><first>Wenjian</first><last>You</last></author>
       <pages>79–90</pages>
       <url hash="cf8d3cc6">O02-1004</url>
@@ -70,7 +70,7 @@
     </paper>
     <paper id="2">
       <title>A Study on Word Similarity using Context Vector Models</title>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <author><first>Jia-Ming</first><last>You</last></author>
       <pages>37–58</pages>
       <url hash="44238bd5">O02-2002</url>
diff --git a/data/xml/O03.xml b/data/xml/O03.xml
index 8a8747acd6..4bc299a35c 100644
--- a/data/xml/O03.xml
+++ b/data/xml/O03.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of Research on Computational Linguistics Conference <fixed-case>XV</fixed-case></booktitle>
       <url hash="0fb1d8a8">O03-1</url>
-      <editor><first>Jason J.</first><last>Chang</last></editor>
+      <editor id="jason-s-chang"><first>Jason J.</first><last>Chang</last></editor>
       <editor><first>Hsien-Chin</first><last>Liou</last></editor>
       <publisher>The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)</publisher>
       <address>Hsinchu, Taiwan</address>
@@ -51,14 +51,14 @@
     </paper>
     <paper id="5">
       <title>從語域及借詞觀點探討台語文寫作風格 (Discussion on <fixed-case>T</fixed-case>aiwanese Writing Style from The Viewpoint of Register and Loanword) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Un-Gian</first><last>Iunn</last></author>
+      <author id="un-gian-iunn"><first>Un-Gian</first><last>Iunn</last></author>
       <pages>73–86</pages>
       <url hash="e76bb74e">O03-1005</url>
       <bibkey>iunn-2003-cong</bibkey>
     </paper>
     <paper id="6">
       <title><fixed-case>ECONOMY</fixed-case> <fixed-case>IS</fixed-case> <fixed-case>A</fixed-case> <fixed-case>PERSON</fixed-case>: A <fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish Corpora and Ontological-based Comparison Using the <fixed-case>C</fixed-case>onceptual <fixed-case>M</fixed-case>apping <fixed-case>M</fixed-case>odel</title>
-      <author><first>Siaw-Fong</first><last>Chung</last></author>
+      <author id="siaw-fong-chung"><first>Siaw-Fong</first><last>Chung</last></author>
       <author><first>Kathleen</first><last>Ahrens</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <pages>87–110</pages>
@@ -75,7 +75,7 @@
     </paper>
     <paper id="8">
       <title>文件自我擴展於自動分類之應用 (Application of Document Self-Expansion to Text Categorization) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Yuen-Hsien</first><last>Tseng</last></author>
+      <author id="yuen-hsien-tseng"><first>Yuen-Hsien</first><last>Tseng</last></author>
       <author><first>Da-Wei</first><last>Juang</last></author>
       <pages>129–141</pages>
       <url hash="f0557781">O03-1008</url>
@@ -85,7 +85,7 @@
       <title>Auto-Discovery of <fixed-case>NVEF</fixed-case> Word-Pairs in <fixed-case>C</fixed-case>hinese</title>
       <author><first>Jia-Lin</first><last>Tsai</last></author>
       <author><first>Gladys</first><last>Hsieh</last></author>
-      <author><first>Wen-Lian</first><last>Hsu</last></author>
+      <author id="wen-lian-hsu"><first>Wen-Lian</first><last>Hsu</last></author>
       <pages>143–160</pages>
       <url hash="39beeca2">O03-1009</url>
       <bibkey>tsai-etal-2003-auto</bibkey>
@@ -93,7 +93,7 @@
     <paper id="10">
       <title>Reliable and Cost-Effective <fixed-case>P</fixed-case>o<fixed-case>S</fixed-case>-Tagging</title>
       <author><first>Yu-Fang</first><last>Tsai</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <pages>161–173</pages>
       <url hash="bbd4ad8b">O03-1010</url>
       <bibkey>tsai-chen-2003-reliable</bibkey>
@@ -102,16 +102,16 @@
       <title><fixed-case>C</fixed-case>hinese Word Auto-Confirmation Agent</title>
       <author><first>Jia-Lin</first><last>Tsai</last></author>
       <author><first>Cheng-Lung</first><last>Sung</last></author>
-      <author><first>Wen-Lian</first><last>Hsu</last></author>
+      <author id="wen-lian-hsu"><first>Wen-Lian</first><last>Hsu</last></author>
       <pages>175–191</pages>
       <url hash="596ea0d2">O03-1011</url>
       <bibkey>tsai-etal-2003-chinese</bibkey>
     </paper>
     <paper id="12">
       <title><fixed-case>M</fixed-case>encius: A <fixed-case>C</fixed-case>hinese Named Entity Recognizer Using Hybrid Model</title>
-      <author><first>Tzong-Han</first><last>Tsai</last></author>
+      <author id="richard-tzong-han-tsai"><first>Tzong-Han</first><last>Tsai</last></author>
       <author><first>Shih-Hung</first><last>Wu</last></author>
-      <author><first>Wen-Lian</first><last>Hsu</last></author>
+      <author id="wen-lian-hsu"><first>Wen-Lian</first><last>Hsu</last></author>
       <pages>193–209</pages>
       <url hash="2ae501a6">O03-1012</url>
       <bibkey>tsai-etal-2003-mencius</bibkey>
@@ -171,14 +171,14 @@
     </paper>
     <paper id="3">
       <title>Extracting Verb-Noun Collocations from Text</title>
-      <author><first>Jia Yan</first><last>Jian</last></author>
+      <author id="jia-yan-jian"><first>Jia Yan</first><last>Jian</last></author>
       <pages>295–302</pages>
       <url hash="c2f847cb">O03-2003</url>
       <bibkey>jian-2003-extracting</bibkey>
     </paper>
     <paper id="4">
       <title>Bilingual Sentence Alignment Based on Punctuation Marks</title>
-      <author><first>Kevin C.</first><last>Yeh</last></author>
+      <author id="kevin-c-yeh"><first>Kevin C.</first><last>Yeh</last></author>
       <pages>303–312</pages>
       <url hash="c448390a">O03-2004</url>
       <bibkey>yeh-2003-bilingual</bibkey>
@@ -207,9 +207,9 @@
     </paper>
     <paper id="2">
       <title>Interleaving Text and Punctuations for Bilingual Sub-sentential Alignment</title>
-      <author><first>Wen-Chi</first><last>Hsie</last></author>
-      <author><first>Kevin</first><last>Yeh</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="wen-chi-hsien"><first>Wen-Chi</first><last>Hsie</last></author>
+      <author id="kevin-c-yeh"><first>Kevin</first><last>Yeh</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <author><first>Thomas C.</first><last>Chuang</last></author>
       <pages>327–333</pages>
       <url hash="5c6c91b9">O03-3002</url>
@@ -218,16 +218,16 @@
     <paper id="3">
       <title>Restoration of Case Information in All-Cap <fixed-case>E</fixed-case>nglish Broadcast Transcription</title>
       <author><first>Yu-Ting</first><last>Liang</last></author>
-      <author><first>Jian-Chen</first><last>Wu</last></author>
+      <author id="jian-chen-wu"><first>Jian-Chen</first><last>Wu</last></author>
       <pages>335–337</pages>
       <url hash="38082c0d">O03-3003</url>
       <bibkey>liang-wu-2003-restoration</bibkey>
     </paper>
     <paper id="4">
       <title>Using Punctuations and Lengths for Bilingual Sub-sentential Alignment</title>
-      <author><first>Wen-Chi</first><last>Hsien</last></author>
-      <author><first>Kevin</first><last>Yeh</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="wen-chi-hsien"><first>Wen-Chi</first><last>Hsien</last></author>
+      <author id="kevin-c-yeh"><first>Kevin</first><last>Yeh</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <author><first>Thomas C.</first><last>Chuang</last></author>
       <pages>339–345</pages>
       <url hash="c27672b8">O03-3004</url>
@@ -235,16 +235,16 @@
     </paper>
     <paper id="5">
       <title><fixed-case>T</fixed-case>otal<fixed-case>R</fixed-case>ecall: A Bilingual Concordance in National Digital Learning Project - <fixed-case>CANDLE</fixed-case></title>
-      <author><first>Jian-Cheng</first><last>Wu</last></author>
+      <author id="jian-cheng-wu"><first>Jian-Cheng</first><last>Wu</last></author>
       <author><first>Wen-Chi</first><last>Shei</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>347–353</pages>
       <url hash="a1702ce5">O03-3005</url>
       <bibkey>wu-etal-2003-totalrecall</bibkey>
     </paper>
     <paper id="6">
       <title>Unsupervised Word Segmentation Without Dictionary</title>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <author><first>Tracy</first><last>Lin</last></author>
       <pages>355–359</pages>
       <url hash="5a9f1b5a">O03-3006</url>
@@ -252,9 +252,9 @@
     </paper>
     <paper id="7">
       <title>盲胞有聲書語音查詢系統 (A Speech-enabled Talking Book Retrieval System for the Blind) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Cheng-Yuan</first><last>Lin</last></author>
+      <author id="cheng-yuan-lin"><first>Cheng-Yuan</first><last>Lin</last></author>
       <author><first>Ming-Feng</first><last>Hsieh</last></author>
-      <author><first>Jyh-Shing</first><last>Jang</last></author>
+      <author id="jyh-shing-roger-jang"><first>Jyh-Shing</first><last>Jang</last></author>
       <pages>361–367</pages>
       <url hash="df65d238">O03-3007</url>
       <bibkey>lin-etal-2003-mang</bibkey>
@@ -263,7 +263,7 @@
       <title>線上新聞語音檢索系統 (Online New Retrieval Based on Speech Input) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Jiang-Chun</first><last>Chen</last></author>
       <author><first>Jui-Lin</first><last>Lo</last></author>
-      <author><first>Jyh-Shing</first><last>Jang</last></author>
+      <author id="jyh-shing-roger-jang"><first>Jyh-Shing</first><last>Jang</last></author>
       <pages>369–376</pages>
       <url hash="79644072">O03-3008</url>
       <bibkey>chen-etal-2003-xian</bibkey>
@@ -303,7 +303,7 @@
     </paper>
     <paper id="4">
       <title>Extension of <fixed-case>Z</fixed-case>ipf’s Law to Word and Character N-grams for <fixed-case>E</fixed-case>nglish and <fixed-case>C</fixed-case>hinese</title>
-      <author><first>Le Quan</first><last>Ha</last></author>
+      <author id="le-quan-ha"><first>Le Quan</first><last>Ha</last></author>
       <author id="elvira-i-sicilia-garcia"><first>E. I.</first><last>Sicilia-Garcia</last></author>
       <author><first>Ji</first><last>Ming</last></author>
       <author id="francis-j-smith"><first>F. J.</first><last>Smith</last></author>
@@ -337,7 +337,7 @@
       <author><first>Hua-Ping</first><last>Zhang</last></author>
       <author><first>Qun</first><last>Liu</last></author>
       <author><first>Hong-Kui</first><last>Yu</last></author>
-      <author><first>Xue-Qi</first><last>Cheng</last></author>
+      <author id="xueqi-cheng"><first>Xue-Qi</first><last>Cheng</last></author>
       <author><first>Shuo</first><last>Bai</last></author>
       <pages>29–60</pages>
       <url hash="31d1444d">O03-5002</url>
@@ -345,7 +345,7 @@
     </paper>
     <paper id="3">
       <title>Building A <fixed-case>C</fixed-case>hinese <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Via Class-Based Translation Model</title>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <author><first>Tracy</first><last>Lin</last></author>
       <author><first>Geeng-Neng</first><last>You</last></author>
       <author><first>Thomas C.</first><last>Chuang</last></author>
@@ -364,7 +364,7 @@
     </paper>
     <paper id="5">
       <title>從詞網出發的中文複合名詞的語意表達 (Learning the Semantic Meaning of a <fixed-case>C</fixed-case>hinese Compound from the <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Sue-Jin</first><last>Ker</last></author>
+      <author id="sue-j-ker"><first>Sue-Jin</first><last>Ker</last></author>
       <pages>93-108</pages>
       <url hash="e351bc89">O03-5005</url>
       <bibkey>ker-2003-cong</bibkey>
diff --git a/data/xml/O04.xml b/data/xml/O04.xml
index 3ee5485ad2..f177e4a86a 100644
--- a/data/xml/O04.xml
+++ b/data/xml/O04.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the 16th Conference on Computational Linguistics and Speech Processing</booktitle>
       <url hash="facb9185">O04-1</url>
       <editor><first>Lee-Feng</first><last>Chien</last></editor>
-      <editor><first>Hsin-Min</first><last>Wang</last></editor>
+      <editor id="hsin-min-wang"><first>Hsin-Min</first><last>Wang</last></editor>
       <publisher>The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)</publisher>
       <address>Taipei, Taiwan</address>
       <month>September</month>
@@ -28,7 +28,7 @@
       <title>聚集事後機率線性迴歸調適演算法應用於語音辨識 (Aggregate a Posteriori Linear Regression for Speech Recognition) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Chih-Hsien</first><last>Huang</last></author>
       <author><first>Yii-Kai</first><last>Wang</last></author>
-      <author><first>Jen-Tzung</first><last>Chien</last></author>
+      <author id="jen-tzung-chien"><first>Jen-Tzung</first><last>Chien</last></author>
       <pages>11–20</pages>
       <url hash="81ba598b">O04-1002</url>
       <bibkey>huang-etal-2004-ju</bibkey>
@@ -45,7 +45,7 @@
     <paper id="4">
       <title>A Noise Estimator with Rapid Adaptation in Variable-Level Noisy Environments</title>
       <author><first>Bing-Fei</first><last>Wu</last></author>
-      <author><first>Kun-Ching</first><last>Wang</last></author>
+      <author id="kun-ching-wang"><first>Kun-Ching</first><last>Wang</last></author>
       <author><first>Lung-Yi</first><last>Kuo</last></author>
       <pages>33–38</pages>
       <url hash="4a107878">O04-1004</url>
@@ -86,7 +86,7 @@
       <title>Applying Meaningful Word-Pair Identifier to the <fixed-case>C</fixed-case>hinese Syllable-to-Word Conversion Problem</title>
       <author><first>Jia-Lin</first><last>Tsai</last></author>
       <author><first>Tien-Jien</first><last>Chiang</last></author>
-      <author><first>Wen-Lian</first><last>Hsu</last></author>
+      <author id="wen-lian-hsu"><first>Wen-Lian</first><last>Hsu</last></author>
       <pages>79–88</pages>
       <url hash="481e47ac">O04-1009</url>
       <bibkey>tsai-etal-2004-applying</bibkey>
@@ -111,7 +111,7 @@
       <title>利用自然語言處理技術自動產生英文克漏詞試題之研究 (A Study on Natural Language Processing Aided Grneration of Multiple-Choice Cloze Items) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Chun-Hung</first><last>Wang</last></author>
       <author><first>Chao-Lin</first><last>Liu</last></author>
-      <author><first>Zhao Ming</first><last>Gao</last></author>
+      <author id="zhao-ming-gao"><first>Zhao Ming</first><last>Gao</last></author>
       <pages>111–120</pages>
       <url hash="2c639e20">O04-1012</url>
       <bibkey>wang-etal-2004-li-yong</bibkey>
@@ -127,9 +127,9 @@
     </paper>
     <paper id="14">
       <title>現代漢語複合動詞之詞首詞尾研究 (Compositional Semantics of <fixed-case>M</fixed-case>andarin Affix Verbs) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Chih-ming</first><last>Chiu</last></author>
+      <author id="chih-ming-chiu"><first>Chih-ming</first><last>Chiu</last></author>
       <author><first>Ji-Chin</first><last>Lo</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <pages>131–139</pages>
       <url hash="2a443ae6">O04-1014</url>
       <bibkey>chiu-etal-2004-xian</bibkey>
@@ -138,7 +138,7 @@
       <title>語法規律的抽取及普遍化與精確化的研究 (Grammar Extraction, Generalization and Specialization) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Yu-Ming</first><last>Hsieh</last></author>
       <author><first>Duen-Chi</first><last>Yang</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <pages>141–150</pages>
       <url hash="199ff916">O04-1015</url>
       <bibkey>hsieh-etal-2004-yu</bibkey>
@@ -161,14 +161,14 @@
     <paper id="18">
       <title>Functional Distinction between Zai and Zhengzai in <fixed-case>M</fixed-case>andarin: Evidence from Collocations</title>
       <author><first>Tsi-chun</first><last>Lin</last></author>
-      <author><first>Mei-chun</first><last>Liu</last></author>
+      <author id="mei-chun-liu"><first>Mei-chun</first><last>Liu</last></author>
       <pages>169–175</pages>
       <url hash="27f0362c">O04-1018</url>
       <bibkey>lin-liu-2004-functional</bibkey>
     </paper>
     <paper id="19">
       <title>中文手機新聞簡訊自動摘要 (Automated Summarization for <fixed-case>C</fixed-case>hinese News Brief Service in Cellular Phones) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Yuen-Hsien</first><last>Tseng</last></author>
+      <author id="yuen-hsien-tseng"><first>Yuen-Hsien</first><last>Tseng</last></author>
       <pages>177–189</pages>
       <url hash="3d929804">O04-1019</url>
       <bibkey>tseng-2004-zhong</bibkey>
@@ -176,9 +176,9 @@
     <paper id="20">
       <title>Using the Web as Corpus for Un-supervised Learning in Question Answering</title>
       <author><first>Yi-Chia</first><last>Wang</last></author>
-      <author><first>Jian-Cheng</first><last>Wu</last></author>
+      <author id="jian-cheng-wu"><first>Jian-Cheng</first><last>Wu</last></author>
       <author><first>Tyne</first><last>Liang</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>191–198</pages>
       <url hash="de4ff02f">O04-1020</url>
       <bibkey>wang-etal-2004-using</bibkey>
@@ -194,7 +194,7 @@
     </paper>
     <paper id="22">
       <title>具相關資訊回饋能力之貝氏混合式機率檢索模型 (Using Relevance Feedback in <fixed-case>B</fixed-case>ayesian Probabilistic Mixture Retrieval Model) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Jen-Tzung</first><last>Chien</last></author>
+      <author id="jen-tzung-chien"><first>Jen-Tzung</first><last>Chien</last></author>
       <author><first>Duen-Chi</first><last>Yang</last></author>
       <pages>209–218</pages>
       <url hash="d8aa4deb">O04-1022</url>
@@ -212,13 +212,13 @@
     </paper>
     <paper id="24">
       <title>華台雙語發音變異性之語音辨識研究及<fixed-case>PDA</fixed-case>之應用 (The study of pronunciation variations in <fixed-case>M</fixed-case>andarin and <fixed-case>T</fixed-case>aiwanese and its application in <fixed-case>PDA</fixed-case>) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Dau-cheng</first><last>Lyu</last></author>
+      <author id="dau-cheng-lyu"><first>Dau-cheng</first><last>Lyu</last></author>
       <author><first>Hong-Wen</first><last>Hsien</last></author>
       <author><first>Yung-Xian</first><last>Lee</last></author>
       <author><first>Zhong-Ing</first><last>Liou</last></author>
-      <author><first>Chun-Nan</first><last>Hsu</last></author>
+      <author id="chun-nan-hsu"><first>Chun-Nan</first><last>Hsu</last></author>
       <author><first>Yung-Jien</first><last>Chiang</last></author>
-      <author><first>Ren-yuan</first><last>Lyu</last></author>
+      <author id="ren-yuan-lyu"><first>Ren-yuan</first><last>Lyu</last></author>
       <pages>229–238</pages>
       <url hash="e31d389f">O04-1024</url>
       <bibkey>lyu-etal-2004-hua</bibkey>
@@ -227,8 +227,8 @@
       <title>以語音辨識與評分輔助口說英文學習 (Spoken <fixed-case>E</fixed-case>nglish Learning Based on Speech Recognition and Assessment) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Jiang-Chun</first><last>Chen</last></author>
       <author><first>Jui-Lin</first><last>Lo</last></author>
-      <author><first>Jyh-Shing</first><last>Jang</last></author>
-      <author><first>Chun-Jen</first><last>Lee</last></author>
+      <author id="jyh-shing-roger-jang"><first>Jyh-Shing</first><last>Jang</last></author>
+      <author id="chun-jen-lee"><first>Chun-Jen</first><last>Lee</last></author>
       <pages>239–248</pages>
       <url hash="d29e7461">O04-1025</url>
       <bibkey>chen-etal-2004-yi</bibkey>
@@ -243,9 +243,9 @@
     </paper>
     <paper id="27">
       <title>Collocational Translation Memory Extraction Based on Statistical and Linguistic Information</title>
-      <author><first>Jia-Yan</first><last>Jian</last></author>
+      <author id="jia-yan-jian"><first>Jia-Yan</first><last>Jian</last></author>
       <author><first>Yu-Chia</first><last>Chang</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>257–264</pages>
       <url hash="4e176695">O04-1027</url>
       <bibkey>jian-etal-2004-collocational</bibkey>
@@ -262,7 +262,7 @@
     <paper id="29">
       <title>結合統計與語言訊息的混合式中英雙語句對應演算法 (Combining Linguistic and Statistical Information in <fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish Bilingual Sentence Alignment) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Yu-Chun</first><last>Lin</last></author>
-      <author><first>Zhao Ming</first><last>Gao</last></author>
+      <author id="zhao-ming-gao"><first>Zhao Ming</first><last>Gao</last></author>
       <pages>273–283</pages>
       <url hash="80897979">O04-1029</url>
       <bibkey>lin-gao-2004-jie</bibkey>
@@ -286,10 +286,10 @@
     <paper id="32">
       <title>The Construction of a <fixed-case>C</fixed-case>hinese Named Entity Tagged Corpus: <fixed-case>CNEC</fixed-case>1.0</title>
       <author><first>Cheng-Wei</first><last>Shih</last></author>
-      <author><first>Tzong-Han</first><last>Tsai</last></author>
+      <author id="richard-tzong-han-tsai"><first>Tzong-Han</first><last>Tsai</last></author>
       <author><first>Shih-Hung</first><last>Wu</last></author>
       <author><first>Chiu-Chen</first><last>Hsieh</last></author>
-      <author><first>Wen-Lian</first><last>Hsu</last></author>
+      <author id="wen-lian-hsu"><first>Wen-Lian</first><last>Hsu</last></author>
       <pages>305–313</pages>
       <url hash="6f7f8408">O04-1032</url>
       <bibkey>shih-etal-2004-construction</bibkey>
@@ -305,7 +305,7 @@
     </paper>
     <paper id="34">
       <title>仿趙氏音高尺度之基週軌跡正規化方法及其應用 (A Pitch-Contour Normalization Method Following Zhao’s Pitch Scale and Its Application) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Hung-yan</first><last>Gu</last></author>
+      <author id="hung-yan-gu"><first>Hung-yan</first><last>Gu</last></author>
       <author><first>Hsiao-Fen</first><last>Chang</last></author>
       <author><first>Chun Hsin</first><last>Wu</last></author>
       <pages>325–334</pages>
@@ -314,10 +314,10 @@
     </paper>
     <paper id="35">
       <title>基於反轉檔查找與最佳片段選取演算法的中文語音合成系統 (A <fixed-case>M</fixed-case>andarin Text-to-speech System based on Inverted File Indexing and Unit Selection) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Cheng Yuan</first><last>Lin</last></author>
+      <author id="cheng-yuan-lin"><first>Cheng Yuan</first><last>Lin</last></author>
       <author><first>Ming-Feng</first><last>Hsieh</last></author>
       <author><first>Kuan-Ting</first><last>Chen</last></author>
-      <author><first>Jyh-Shing</first><last>Jang</last></author>
+      <author id="jyh-shing-roger-jang"><first>Jyh-Shing</first><last>Jang</last></author>
       <pages>335–344</pages>
       <url hash="1e7c093b">O04-1035</url>
       <bibkey>lin-etal-2004-ji</bibkey>
@@ -325,7 +325,7 @@
     <paper id="36">
       <title>Improved Prosody Module in a Text-to-Speech System</title>
       <author><first>Wen-Wei</first><last>Liao</last></author>
-      <author><first>Jia-Lin</first><last>Shen</last></author>
+      <author id="jia-lin-shen"><first>Jia-Lin</first><last>Shen</last></author>
       <pages>345–354</pages>
       <url hash="e627fd5a">O04-1036</url>
       <bibkey>liao-shen-2004-improved</bibkey>
@@ -365,7 +365,7 @@
     <paper id="1">
       <title>Bilingual Collocation Extraction Based on Syntactic and Statistical Analyses</title>
       <author><first>Chien-Cheng</first><last>Wu</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>1–20</pages>
       <url hash="8356b9fc">O04-2001</url>
       <bibkey>wu-chang-2004-bilingual</bibkey>
@@ -382,18 +382,18 @@
       <title>Auto-Generation of <fixed-case>NVEF</fixed-case> Knowledge in <fixed-case>C</fixed-case>hinese</title>
       <author><first>Jia-Lin</first><last>Tsai</last></author>
       <author><first>Gladys</first><last>Hsieh</last></author>
-      <author><first>Wen-Lian</first><last>Hsu</last></author>
+      <author id="wen-lian-hsu"><first>Wen-Lian</first><last>Hsu</last></author>
       <pages>41–64</pages>
       <url hash="0c1311a6">O04-2003</url>
       <bibkey>tsai-etal-2004-auto</bibkey>
     </paper>
     <paper id="4">
       <title><fixed-case>M</fixed-case>encius: A <fixed-case>C</fixed-case>hinese Named Entity Recognizer Using the Maximum Entropy-based Hybrid Model</title>
-      <author><first>Tzong-Han</first><last>Tsai</last></author>
+      <author id="richard-tzong-han-tsai"><first>Tzong-Han</first><last>Tsai</last></author>
       <author><first>Shih-Hung</first><last>Wu</last></author>
       <author><first>Cheng-Wei</first><last>Lee</last></author>
       <author><first>Cheng-Wei</first><last>Shih</last></author>
-      <author><first>Wen-Lian</first><last>Hsu</last></author>
+      <author id="wen-lian-hsu"><first>Wen-Lian</first><last>Hsu</last></author>
       <pages>65–82</pages>
       <url hash="e3a3d1bc">O04-2004</url>
       <bibkey>tsai-etal-2004-mencius</bibkey>
@@ -401,7 +401,7 @@
     <paper id="5">
       <title>Reliable and Cost-Effective Pos-Tagging</title>
       <author><first>Yu-Fang</first><last>Tsai</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <pages>83–96</pages>
       <url hash="8d750660">O04-2005</url>
       <bibkey>tsai-chen-2004-reliable</bibkey>
@@ -416,7 +416,7 @@
     <paper id="7">
       <title>The Properties and Further Applications of <fixed-case>C</fixed-case>hinese Frequent Strings</title>
       <author><first>Yih-Jeng</first><last>Lin</last></author>
-      <author><first>Ming-Shing</first><last>Yu</last></author>
+      <author id="ming-shing-yu"><first>Ming-Shing</first><last>Yu</last></author>
       <pages>113–128</pages>
       <url hash="376ca533">O04-2007</url>
       <bibkey>lin-yu-2004-properties</bibkey>
@@ -435,9 +435,9 @@
     </frontmatter>
     <paper id="1">
       <title>Toward Constructing A Multilingual Speech Corpus for <fixed-case>T</fixed-case>aiwanese (<fixed-case>M</fixed-case>in-nan), <fixed-case>H</fixed-case>akka, and <fixed-case>M</fixed-case>andarin</title>
-      <author><first>Ren-Yuan</first><last>Lyu</last></author>
+      <author id="ren-yuan-lyu"><first>Ren-Yuan</first><last>Lyu</last></author>
       <author><first>Min-Siong</first><last>Liang</last></author>
-      <author><first>Yuang-Chin</first><last>Chiang</last></author>
+      <author id="yuang-chin-chiang"><first>Yuang-Chin</first><last>Chiang</last></author>
       <pages>1–12</pages>
       <url hash="2c86ecf5">O04-3001</url>
       <bibkey>lyu-etal-2004-toward</bibkey>
@@ -454,7 +454,7 @@
     </paper>
     <paper id="3">
       <title>Latent Semantic Language Modeling and Smoothing</title>
-      <author><first>Jen-Tzung</first><last>Chien</last></author>
+      <author id="jen-tzung-chien"><first>Jen-Tzung</first><last>Chien</last></author>
       <author><first>Meng-Sung</first><last>Wu</last></author>
       <author><first>Hua-Jui</first><last>Peng</last></author>
       <pages>29–44</pages>
diff --git a/data/xml/O05.xml b/data/xml/O05.xml
index d235665e3d..7d65516f1e 100644
--- a/data/xml/O05.xml
+++ b/data/xml/O05.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the 17th Conference on Computational Linguistics and Speech Processing</booktitle>
       <url hash="7d2d4469">O05-1</url>
       <editor><first>Chung-Hsien</first><last>Wu</last></editor>
-      <editor><first>Jen-Tzung</first><last>Chien</last></editor>
+      <editor id="jen-tzung-chien"><first>Jen-Tzung</first><last>Chien</last></editor>
       <editor><first>Wen-Hsiang</first><last>Lu</last></editor>
       <publisher>The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)</publisher>
       <address>Tainan, Taiwan</address>
@@ -39,7 +39,7 @@
       <author><first>Yuan-Fu</first><last>Liao</last></author>
       <author><first>Zhi-Xian</first><last>Zhuang</last></author>
       <author><first>Zi-He</first><last>Chen</last></author>
-      <author><first>Yau-Tarng</first><last>Juang</last></author>
+      <author id="yau-tarng-juang"><first>Yau-Tarng</first><last>Juang</last></author>
       <pages>37–53</pages>
       <url hash="3b95462e">O05-1003</url>
       <bibkey>liao-etal-2005-jie</bibkey>
@@ -89,8 +89,8 @@
     </paper>
     <paper id="9">
       <title>日本學生學習華語的聲調偏誤分析:以二字調為例 (Tonal errors of <fixed-case>J</fixed-case>apanese students learning <fixed-case>C</fixed-case>hinese: A study of disyllabic words) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Ke-Jia</first><last>Zhang</last></author>
-      <author><first>Li-Mei</first><last>Chen</last></author>
+      <author id="ke-jia-zhang"><first>Ke-Jia</first><last>Zhang</last></author>
+      <author id="li-mei-chen"><first>Li-Mei</first><last>Chen</last></author>
       <pages>125–139</pages>
       <url hash="d1bf0b16">O05-1009</url>
       <bibkey>zhang-chen-2005-ri</bibkey>
@@ -100,7 +100,7 @@
       <author><first>Chia-Yin</first><last>Chen</last></author>
       <author><first>Ming Hsien</first><last>Ko</last></author>
       <author><first>Tzu-Wei</first><last>Wu</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>155–163</pages>
       <url hash="7b943a81">O05-1010</url>
       <bibkey>chen-etal-2005-fast</bibkey>
@@ -116,7 +116,7 @@
     </paper>
     <paper id="12">
       <title>使用韻律階層及大量詞彙的中文文轉音系統 (A <fixed-case>M</fixed-case>andarin Text-to-Speech System Using Prosodic Hierarchy and a Large Number of Words) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Ming-Shing</first><last>Yu</last></author>
+      <author id="ming-shing-yu"><first>Ming-Shing</first><last>Yu</last></author>
       <author><first>Tang-Yu</first><last>Zhang</last></author>
       <author><first>Tsan-Huang</first><last>Shiu</last></author>
       <author><first>Yu-Her</first><last>Tsai</last></author>
@@ -136,7 +136,7 @@
     </paper>
     <paper id="14">
       <title>閩南語語句基週軌跡產生: 兩種模型之混合與比較 (<fixed-case>M</fixed-case>in-<fixed-case>N</fixed-case>an Sentence Pitch-contour Generation: Mixing and Comparison of Two Kinds of Models) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Hung-Yan</first><last>Gu</last></author>
+      <author id="hung-yan-gu"><first>Hung-Yan</first><last>Gu</last></author>
       <author><first>Wei</first><last>Huang</last></author>
       <pages>213–225</pages>
       <url hash="7129d60b">O05-1014</url>
@@ -147,16 +147,16 @@
       <author><first>Bin</first><last>Yan</last></author>
       <author><first>Zhe-Ming</first><last>Lu</last></author>
       <author><first>Jeng-Shyang</first><last>Pan</last></author>
-      <author><first>Sheng-He</first><last>Sun</last></author>
+      <author id="sheng-he-sun"><first>Sheng-He</first><last>Sun</last></author>
       <pages>239–246</pages>
       <url hash="5b2a69f1">O05-1015</url>
       <bibkey>yan-etal-2005-statistical</bibkey>
     </paper>
     <paper id="16">
       <title>A Probe into Ambiguities of Determinative-Measure Compounds</title>
-      <author><first>Shih-Min</first><last>Li</last></author>
+      <author id="shih-min-li"><first>Shih-Min</first><last>Li</last></author>
       <author><first>Su-Chu</first><last>Lin</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <pages>247–255</pages>
       <url hash="3a931730">O05-1016</url>
       <bibkey>li-etal-2005-probe</bibkey>
@@ -166,8 +166,8 @@
       <author><first>Shih-Hung</first><last>Wu</last></author>
       <author><first>Cheng-Wei</first><last>Shih</last></author>
       <author><first>Chia-Wei</first><last>Wu</last></author>
-      <author><first>Tzong-Han</first><last>Tsai</last></author>
-      <author><first>Wen-Lian</first><last>Hsu</last></author>
+      <author id="richard-tzong-han-tsai"><first>Tzong-Han</first><last>Tsai</last></author>
+      <author id="wen-lian-hsu"><first>Wen-Lian</first><last>Hsu</last></author>
       <pages>257–271</pages>
       <url hash="1e0672ce">O05-1017</url>
       <bibkey>wu-etal-2005-applying</bibkey>
@@ -182,10 +182,10 @@
     </paper>
     <paper id="19">
       <title>台語變調系統實作研究 (A Study on Implementation of <fixed-case>T</fixed-case>aiwanese Tone Sandhi System) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Ún-giân</first><last>Iû</last></author>
+      <author id="un-gian-iunn"><first>Ún-giân</first><last>Iû</last></author>
       <author><first>Sheng-an</first><last>Li</last></author>
-      <author><first>Kiãt-gãk</first><last>Lâu</last></author>
-      <author><first>Cheng-yan</first><last>Kao</last></author>
+      <author id="kiat-gak-lau"><first>Kiãt-gãk</first><last>Lâu</last></author>
+      <author id="cheng-yan-kao"><first>Cheng-yan</first><last>Kao</last></author>
       <pages>293–304</pages>
       <url hash="f9602d89">O05-1019</url>
       <bibkey>iu-etal-2005-tai</bibkey>
@@ -193,8 +193,8 @@
     <paper id="20">
       <title>利用雙語學術名詞庫抽取中英字詞互譯及詞義解歧 (Sense Extraction and Disambiguation for <fixed-case>C</fixed-case>hinese Words from Bilingual Terminology Bank) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Ming-Hong</first><last>Bai</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>305–316</pages>
       <url hash="61419840">O05-1020</url>
       <bibkey>bai-etal-2005-li</bibkey>
@@ -202,7 +202,7 @@
     <paper id="21">
       <title>利用向量支撐機辨識中文基底名詞組的初步研究 (A Preliminary Study on <fixed-case>C</fixed-case>hinese Base <fixed-case>NP</fixed-case> Detection using <fixed-case>SVM</fixed-case>) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Hsi-Wei</first><last>Chang</last></author>
-      <author><first>Zhao Ming</first><last>Gao</last></author>
+      <author id="zhao-ming-gao"><first>Zhao Ming</first><last>Gao</last></author>
       <author><first>Chao-Lin</first><last>Liu</last></author>
       <pages>317–331</pages>
       <url hash="4f7ed45d">O05-1021</url>
@@ -219,7 +219,7 @@
     <paper id="23">
       <title>國語廣播新聞語料轉述系統之效能評估 (Evaluation of <fixed-case>M</fixed-case>andarin Broadcast News Transcription System) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Lung-Hsun</first><last>Chang</last></author>
-      <author><first>Yih-ru</first><last>Wang</last></author>
+      <author id="yih-ru-wang"><first>Yih-ru</first><last>Wang</last></author>
       <author><first>Sin-Horng</first><last>Chen</last></author>
       <pages>347–360</pages>
       <url hash="a0eb909b">O05-1023</url>
@@ -246,7 +246,7 @@
     <paper id="26">
       <title>基於統計與佚代的中英雙語詞及小句對應演算法 (An Iterative Algorithm for Bilingual <fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish Word and Clause Alignment based on Statistics) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Tzu-Huan</first><last>Huang</last></author>
-      <author><first>Zhao Ming</first><last>Gao</last></author>
+      <author id="zhao-ming-gao"><first>Zhao Ming</first><last>Gao</last></author>
       <pages>385–395</pages>
       <url hash="319226f3">O05-1026</url>
       <bibkey>huang-gao-2005-ji</bibkey>
@@ -254,7 +254,7 @@
     <paper id="27">
       <title>電視新聞語料場景的自動切割與分類 (Automatic Scene Segmentation and Classification in Television News Database) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Bo-Syuan</first><last>Ziang</last></author>
-      <author><first>Ren-Yuan</first><last>Lyu</last></author>
+      <author id="ren-yuan-lyu"><first>Ren-Yuan</first><last>Lyu</last></author>
       <author><first>Bor-Ho</first><last>Yaung</last></author>
       <author><first>Hong-Wen</first><last>Hsien</last></author>
       <pages>397–409</pages>
@@ -303,7 +303,7 @@
     </paper>
     <paper id="2">
       <title>Reduced N-Grams for <fixed-case>C</fixed-case>hinese Evaluation</title>
-      <author><first>Le Quan</first><last>Ha</last></author>
+      <author id="le-quan-ha"><first>Le Quan</first><last>Ha</last></author>
       <author><first>R.</first><last>Seymour</last></author>
       <author id="philip-hanna"><first>P.</first><last>Hanna</last></author>
       <author id="francis-j-smith"><first>F. J.</first><last>Smith</last></author>
@@ -316,7 +316,7 @@
       <author><first>Jui-Feng</first><last>Yeh</last></author>
       <author><first>Chung-Hsien</first><last>Wu</last></author>
       <author><first>Ming-Jun</first><last>Chen</last></author>
-      <author><first>Liang-Chih</first><last>Yu</last></author>
+      <author id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last></author>
       <pages>35–52</pages>
       <url hash="885836d2">O05-2003</url>
       <bibkey>yeh-etal-2005-automated</bibkey>
@@ -324,8 +324,8 @@
     <paper id="4">
       <title><fixed-case>C</fixed-case>hinese Main Verb Identification: From Specification to Realization</title>
       <author><first>Bing-Gong</first><last>Ding</last></author>
-      <author><first>Chang-Ning</first><last>Huang</last></author>
-      <author><first>De-Gen</first><last>Huang</last></author>
+      <author id="changning-huang"><first>Chang-Ning</first><last>Huang</last></author>
+      <author id="degen-huang"><first>De-Gen</first><last>Huang</last></author>
       <pages>53–94</pages>
       <url hash="ec241d15">O05-2004</url>
       <bibkey>ding-etal-2005-chinese</bibkey>
@@ -333,7 +333,7 @@
     <paper id="5">
       <title>Aligning Parallel Bilingual Corpora Statistically with Punctuation Criteria</title>
       <author><first>Thomas C.</first><last>Chuang</last></author>
-      <author><first>Kevin C.</first><last>Yeh</last></author>
+      <author id="kevin-c-yeh"><first>Kevin C.</first><last>Yeh</last></author>
       <pages>95–122</pages>
       <url hash="a33c9662">O05-2005</url>
       <bibkey>chuang-yeh-2005-aligning</bibkey>
@@ -341,7 +341,7 @@
     <paper id="6">
       <title>Similarity Based <fixed-case>C</fixed-case>hinese Synonym Collocation Extraction</title>
       <author><first>Wanyin</first><last>Li</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <author><first>Ruifeng</first><last>Xu</last></author>
       <pages>123–144</pages>
       <url hash="afb42abc">O05-2006</url>
@@ -361,8 +361,8 @@
     </frontmatter>
     <paper id="1">
       <title>Automatic Segmentation and Labeling for <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese Speech Corpora for Concatenation-based <fixed-case>TTS</fixed-case></title>
-      <author><first>Cheng-Yuan</first><last>Lin</last></author>
-      <author><first>Jyh-Shing Roger</first><last>Jang</last></author>
+      <author id="cheng-yuan-lin"><first>Cheng-Yuan</first><last>Lin</last></author>
+      <author id="jyh-shing-roger-jang"><first>Jyh-Shing Roger</first><last>Jang</last></author>
       <author><first>Kuan-Ting</first><last>Chen</last></author>
       <pages>145–166</pages>
       <url hash="23127159">O05-3001</url>
@@ -385,7 +385,7 @@
     </paper>
     <paper id="4">
       <title><fixed-case>MATBN</fixed-case>: A <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese Broadcast News Corpus</title>
-      <author><first>Hsin-Min</first><last>Wang</last></author>
+      <author id="hsin-min-wang"><first>Hsin-Min</first><last>Wang</last></author>
       <author><first>Berlin</first><last>Chen</last></author>
       <author><first>Jen-Wei</first><last>Kuo</last></author>
       <author><first>Shih-Sian</first><last>Cheng</last></author>
@@ -399,7 +399,7 @@
       <author><first>Chung-Hsien</first><last>Yang</last></author>
       <author><first>Jhing-Fa</first><last>Wang</last></author>
       <author><first>Chung-Hsien</first><last>Wu</last></author>
-      <author><first>Jen-Tzung</first><last>Chien</last></author>
+      <author id="jen-tzung-chien"><first>Jen-Tzung</first><last>Chien</last></author>
       <pages>237–250</pages>
       <url hash="b1f9e772">O05-3005</url>
       <bibkey>wang-etal-2005-taicar</bibkey>
@@ -407,7 +407,7 @@
     <paper id="6">
       <title>Design and Development of a Bilingual Reading Comprehension Corpus</title>
       <author><first>Kui</first><last>Xu</last></author>
-      <author><first>Helen</first><last>Meng</last></author>
+      <author id="helen-meng"><first>Helen</first><last>Meng</last></author>
       <pages>251–276</pages>
       <url hash="9af76b37">O05-3006</url>
       <bibkey>xu-meng-2005-design</bibkey>
@@ -438,7 +438,7 @@
       <title>Using Lexical Constraints to Enhance the Quality of Computer-Generated Multiple-Choice Cloze Items</title>
       <author><first>Chao-Lin</first><last>Liu</last></author>
       <author><first>Chun-Hung</first><last>Wang</last></author>
-      <author><first>Zhao-Ming</first><last>Gao</last></author>
+      <author id="zhao-ming-gao"><first>Zhao-Ming</first><last>Gao</last></author>
       <pages>303-328</pages>
       <url hash="f9272ff7">O05-4001</url>
       <bibkey>liu-etal-2005-using</bibkey>
@@ -446,9 +446,9 @@
     <paper id="2">
       <title>Collocational Translation Memory Extraction Based on Statistical and Linguistic Information</title>
       <author><first>Thomas C.</first><last>Chuang</last></author>
-      <author><first>Jia-Yan</first><last>Jian</last></author>
+      <author id="jia-yan-jian"><first>Jia-Yan</first><last>Jian</last></author>
       <author><first>Yu-Chia</first><last>Chang</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>329–346</pages>
       <url hash="1c871d00">O05-4002</url>
       <bibkey>chuang-etal-2005-collocational</bibkey>
@@ -465,19 +465,19 @@
     </paper>
     <paper id="4">
       <title>Modeling Pronunciation Variation for Bi-Lingual <fixed-case>M</fixed-case>andarin/<fixed-case>T</fixed-case>aiwanese Speech Recognition</title>
-      <author><first>Dau-Cheng</first><last>Lyu</last></author>
-      <author><first>Ren-Yuan</first><last>Lyu</last></author>
-      <author><first>Yuang-Chin</first><last>Chiang</last></author>
-      <author><first>Chun-Nan</first><last>Hsu</last></author>
+      <author id="dau-cheng-lyu"><first>Dau-Cheng</first><last>Lyu</last></author>
+      <author id="ren-yuan-lyu"><first>Ren-Yuan</first><last>Lyu</last></author>
+      <author id="yuang-chin-chiang"><first>Yuang-Chin</first><last>Chiang</last></author>
+      <author id="chun-nan-hsu"><first>Chun-Nan</first><last>Hsu</last></author>
       <pages>363–380</pages>
       <url hash="e8de06c6">O05-4004</url>
       <bibkey>lyu-etal-2005-modeling</bibkey>
     </paper>
     <paper id="5">
       <title><fixed-case>C</fixed-case>hinese Word Segmentation by Classification of Characters</title>
-      <author><first>Chooi-Ling</first><last>Goh</last></author>
+      <author id="chooi-ling-goh"><first>Chooi-Ling</first><last>Goh</last></author>
       <author><first>Masayuki</first><last>Asahara</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>381–396</pages>
       <url hash="4e37ec07">O05-4005</url>
       <bibkey>goh-etal-2005-chinese</bibkey>
@@ -485,7 +485,7 @@
     <paper id="6">
       <title>The Design and Construction of the <fixed-case>P</fixed-case>oly<fixed-case>U</fixed-case> Shallow Treebank</title>
       <author><first>Ruifeng</first><last>Xu</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <author><first>Yin</first><last>Li</last></author>
       <author><first>Wanyin</first><last>Li</last></author>
       <pages>397–416</pages>
@@ -509,16 +509,16 @@
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <author><first>Chun-Ling</first><last>Chen</last></author>
       <author><first>Cui-Xia</first><last>Weng</last></author>
-      <author><first>Hsiang-Ping</first><last>Lee</last></author>
+      <author id="hsiang-pin-lee"><first>Hsiang-Ping</first><last>Lee</last></author>
       <author><first>Yong-Xiang</first><last>Chen</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <pages>417–430</pages>
       <url hash="e7d63f32">O05-5001</url>
       <bibkey>huang-etal-2005-sinica</bibkey>
     </paper>
     <paper id="2">
       <title>From Frame to Subframe: Collocational Asymmetry in <fixed-case>M</fixed-case>andarin Verbs of Conversation</title>
-      <author><first>Mei-Chun</first><last>Liu</last></author>
+      <author id="mei-chun-liu"><first>Mei-Chun</first><last>Liu</last></author>
       <author><first>Chun Edison</first><last>Chang</last></author>
       <pages>431–444</pages>
       <url hash="4b5cd716">O05-5002</url>
@@ -526,9 +526,9 @@
     </paper>
     <paper id="3">
       <title>Feature Representations and Logical Compatibility between Temporal Adverbs and Aspects</title>
-      <author><first>Shih-Min</first><last>Li</last></author>
+      <author id="shih-min-li"><first>Shih-Min</first><last>Li</last></author>
       <author><first>Su-Chu</first><last>Lin</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <pages>445–458</pages>
       <url hash="0c0d4dd1">O05-5003</url>
       <bibkey>li-etal-2005-feature</bibkey>
@@ -544,7 +544,7 @@
       <title>An Unsupervised Approach to <fixed-case>C</fixed-case>hinese Word Sense Disambiguation Based on Hownet</title>
       <author><first>Hao</first><last>Chen</last></author>
       <author><first>Tingting</first><last>He</last></author>
-      <author><first>Donghong</first><last>Ji</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
       <author><first>Changqin</first><last>Quan</last></author>
       <pages>473–482</pages>
       <url hash="999ef4b3">O05-5005</url>
@@ -552,7 +552,7 @@
     </paper>
     <paper id="6">
       <title>以句式為本的多義詞詞義辨識 (Word Sense Disambiguation Based on Syntactic Construction) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Mei-Chih</first><last>Tsai</last></author>
+      <author id="mei-chih-tsai"><first>Mei-Chih</first><last>Tsai</last></author>
       <pages>483–494</pages>
       <url hash="296048e2">O05-5006</url>
       <bibkey>tsai-2005-yi</bibkey>
@@ -575,15 +575,15 @@
     </paper>
     <paper id="9">
       <title>A Synchronous Corpus-Based Study on the Usage and Perception of Judgement Terms in the Pan-<fixed-case>C</fixed-case>hinese Context</title>
-      <author><first>Oi Yee</first><last>Kwong</last></author>
-      <author><first>Benjamin K.</first><last>Tsou</last></author>
+      <author id="olivia-o-y-kwong"><first>Oi Yee</first><last>Kwong</last></author>
+      <author id="benjamin-k-tsou"><first>Benjamin K.</first><last>Tsou</last></author>
       <pages>519–532</pages>
       <url hash="50b1b38e">O05-5009</url>
       <bibkey>kwong-tsou-2005-synchronous</bibkey>
     </paper>
     <paper id="10">
       <title>《人民日報》語料庫命名實体分類的研究 (The <fixed-case>C</fixed-case>hinese Named Entity Categorization Based on the People’s Daily Corpus) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>YingJu</first><last>Xia</last></author>
+      <author id="yingju-xia"><first>YingJu</first><last>Xia</last></author>
       <author><first>Hao</first><last>Yu</last></author>
       <author><first>Fumihito</first><last>Nishino</last></author>
       <pages>533–542</pages>
@@ -602,7 +602,7 @@
     </paper>
     <paper id="12">
       <title>Source Domains as Concept Domains in Metaphorical Expressions</title>
-      <author><first>Siaw-Fong</first><last>Chung</last></author>
+      <author id="siaw-fong-chung"><first>Siaw-Fong</first><last>Chung</last></author>
       <author><first>Kathleen</first><last>Ahrens</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <pages>553–570</pages>
diff --git a/data/xml/O06.xml b/data/xml/O06.xml
index 8903799725..15d7690e10 100644
--- a/data/xml/O06.xml
+++ b/data/xml/O06.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the 18th Conference on Computational Linguistics and Speech Processing</booktitle>
       <url hash="1fc78972">O06-1</url>
-      <editor><first>Yih-Ru</first><last>Wang</last></editor>
+      <editor id="yih-ru-wang"><first>Yih-Ru</first><last>Wang</last></editor>
       <editor><first>Zaho-Ming</first><last>Gao</last></editor>
       <publisher>The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)</publisher>
       <address>Hsinchu, Taiwan</address>
@@ -26,7 +26,7 @@
     </paper>
     <paper id="2">
       <title>中文動詞名物化判斷的統計式模型設計 (A Stochastic Model for Prediction of Deverbal Nouns in <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Wei-Yun</first><last>Ma</last></author>
+      <author id="wei-yun-ma"><first>Wei-Yun</first><last>Ma</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <pages>29–40</pages>
       <url hash="0a4d6dfd">O06-1002</url>
@@ -34,7 +34,7 @@
     </paper>
     <paper id="3">
       <title>大規模詞彙語意關係自動標示之初步研究: 以中文詞網(<fixed-case>C</fixed-case>hinese <fixed-case>W</fixed-case>ordnet)為例 (A Preliminary Study on Large-scale Automatic Labeling of Lexical Semantic Relations: A Case study of <fixed-case>C</fixed-case>hinese <fixed-case>W</fixed-case>ordnet) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <author><first>Petr</first><last>Šimon</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <pages>41–51</pages>
@@ -54,14 +54,14 @@
       <title>Improve Parsing Performance by Self-Learning</title>
       <author><first>Yu-Ming</first><last>Hsieh</last></author>
       <author><first>Duen-Chi</first><last>Yang</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <pages>63–76</pages>
       <url hash="f2a61a7d">O06-1005</url>
       <bibkey>hsieh-etal-2006-improve</bibkey>
     </paper>
     <paper id="6">
       <title>國語雙字語詞聲調評分系統 (A Scoring System for <fixed-case>M</fixed-case>andarin Tones Uttered in Disyllabic Words) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Hung-Yan</first><last>Gu</last></author>
+      <author id="hung-yan-gu"><first>Hung-Yan</first><last>Gu</last></author>
       <author><first>Shih-Yan</first><last>Sun</last></author>
       <author><first>Hsiao-Fen</first><last>Chang</last></author>
       <pages>77–89</pages>
@@ -70,7 +70,7 @@
     </paper>
     <paper id="7">
       <title>一種用於網路電話之遺失封包補償方法 (A Packet Loss Concealment Method for Voice over <fixed-case>IP</fixed-case> ) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Hung-Yan</first><last>Gu</last></author>
+      <author id="hung-yan-gu"><first>Hung-Yan</first><last>Gu</last></author>
       <author><first>Zia-Sin</first><last>Chen</last></author>
       <pages>91–110</pages>
       <url hash="f13b45d9">O06-1007</url>
@@ -123,7 +123,7 @@
       <title>鑑別性事前資訊應用於強健性語音辨識 (Robust Speech Recognition Using Discriminative Prior Statistics) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Chuan-Wei</first><last>Ting</last></author>
       <author><first>Bo-Shu</first><last>Wu</last></author>
-      <author><first>Jen-Tzung</first><last>Chien</last></author>
+      <author id="jen-tzung-chien"><first>Jen-Tzung</first><last>Chien</last></author>
       <pages>189–204</pages>
       <url hash="466671a0">O06-1013</url>
       <bibkey>ting-etal-2006-jian</bibkey>
@@ -135,7 +135,7 @@
       <author><first>Zi-He</first><last>Chen</last></author>
       <author><first>Zhi-Ren</first><last>Zeng</last></author>
       <author><first>Yuan-Fu</first><last>Liao</last></author>
-      <author><first>Yau-Tang</first><last>Juang</last></author>
+      <author id="yau-tarng-juang"><first>Yau-Tang</first><last>Juang</last></author>
       <pages>205–219</pages>
       <url hash="94c24ba6">O06-1014</url>
       <bibkey>chang-etal-2006-jie</bibkey>
@@ -189,8 +189,8 @@
     </paper>
     <paper id="21">
       <title>Learning to Parse Bilingual Sentences Using Bilingual Corpus and Monolingual <fixed-case>CFG</fixed-case></title>
-      <author><first>Chung-Chi</first><last>Huang</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="chung-chi-huang"><first>Chung-Chi</first><last>Huang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>329–351</pages>
       <url hash="901104b8">O06-1021</url>
       <bibkey>huang-chang-2006-learning</bibkey>
@@ -236,7 +236,7 @@
       <title>Modeling <fixed-case>C</fixed-case>antonese Pronunciation Variations for Large-Vocabulary Continuous Speech Recognition</title>
       <author><first>Tan</first><last>Lee</last></author>
       <author><first>Patgi</first><last>Kam</last></author>
-      <author><first>Frank K.</first><last>Soong</last></author>
+      <author id="frank-k-soong"><first>Frank K.</first><last>Soong</last></author>
       <pages>17–36</pages>
       <url hash="1db4d63d">O06-2002</url>
       <bibkey>lee-etal-2006-modeling</bibkey>
@@ -244,8 +244,8 @@
     <paper id="3">
       <title>A Maximum Entropy Approach for Semantic Language Modeling</title>
       <author><first>Chuang-Hua</first><last>Chueh</last></author>
-      <author><first>Hsin-Min</first><last>Wang</last></author>
-      <author><first>Jen-Tzung</first><last>Chien</last></author>
+      <author id="hsin-min-wang"><first>Hsin-Min</first><last>Wang</last></author>
+      <author id="jen-tzung-chien"><first>Jen-Tzung</first><last>Chien</last></author>
       <pages>37–56</pages>
       <url hash="a15d3855">O06-2003</url>
       <bibkey>chueh-etal-2006-maximum</bibkey>
@@ -274,7 +274,7 @@
     <paper id="6">
       <title>Voice Activity Detection Based on Auto-Correlation Function Using Wavelet Transform and Teager Energy Operator</title>
       <author><first>Bing-Fei</first><last>Wu</last></author>
-      <author><first>Kun-Ching</first><last>Wang</last></author>
+      <author id="kun-ching-wang"><first>Kun-Ching</first><last>Wang</last></author>
       <pages>87–100</pages>
       <url hash="44fc1274">O06-2006</url>
       <bibkey>wu-wang-2006-voice</bibkey>
@@ -294,7 +294,7 @@
     <paper id="1">
       <title>Two-Fold Filtering for <fixed-case>C</fixed-case>hinese Subcategorization Acquisition with Diathesis Alternations Used as Heuristic Information</title>
       <author><first>Xiwu</first><last>Han</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <pages>101–114</pages>
       <url hash="f20faef3">O06-3001</url>
       <bibkey>han-zhao-2006-two</bibkey>
@@ -302,9 +302,9 @@
     <paper id="2">
       <title><fixed-case>C</fixed-case>hinese Chunking Based on Maximum Entropy <fixed-case>M</fixed-case>arkov Models</title>
       <author><first>Guang-Lu</first><last>Sun</last></author>
-      <author><first>Chang-Ning</first><last>Huang</last></author>
-      <author><first>Xiao-Long</first><last>Wang</last></author>
-      <author><first>Zhi-Ming</first><last>Xu</last></author>
+      <author id="changning-huang"><first>Chang-Ning</first><last>Huang</last></author>
+      <author id="xiao-long-wang"><first>Xiao-Long</first><last>Wang</last></author>
+      <author id="zhiming-xu"><first>Zhi-Ming</first><last>Xu</last></author>
       <pages>115–136</pages>
       <url hash="efa168ae">O06-3002</url>
       <bibkey>sun-etal-2006-chinese</bibkey>
@@ -313,7 +313,7 @@
       <title>A Structural-Based Approach to <fixed-case>C</fixed-case>antonese-<fixed-case>E</fixed-case>nglish Machine Translation</title>
       <author><first>Yan</first><last>Wu</last></author>
       <author><first>Xiukun</first><last>Li</last></author>
-      <author><first>Caesar</first><last>Lun</last></author>
+      <author id="suen-caesar-lun"><first>Caesar</first><last>Lun</last></author>
       <pages>137–158</pages>
       <url hash="6dbfff79">O06-3003</url>
       <bibkey>wu-etal-2006-structural</bibkey>
@@ -351,7 +351,7 @@
       <title>An Empirical Study of Word Error Minimization Approaches for <fixed-case>M</fixed-case>andarin Large Vocabulary Continuous Speech Recognition</title>
       <author><first>Jen-Wei</first><last>Kuo</last></author>
       <author><first>Shih-Hung</first><last>Liu</last></author>
-      <author><first>Hsin-Min</first><last>Wang</last></author>
+      <author id="hsin-min-wang"><first>Hsin-Min</first><last>Wang</last></author>
       <author><first>Berlin</first><last>Chen</last></author>
       <pages>201–222</pages>
       <url hash="11128bd9">O06-4002</url>
@@ -360,26 +360,26 @@
     <paper id="3">
       <title>Sense Extraction and Disambiguation for <fixed-case>C</fixed-case>hinese Words from Bilingual Terminology Bank</title>
       <author><first>Ming-Hong</first><last>Bai</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>223–244</pages>
       <url hash="c27970f8">O06-4003</url>
       <bibkey>bai-etal-2006-sense</bibkey>
     </paper>
     <paper id="4">
       <title>A Probe into Ambiguities of Determinative-Measure Compounds</title>
-      <author><first>Shih-Min</first><last>Li</last></author>
+      <author id="shih-min-li"><first>Shih-Min</first><last>Li</last></author>
       <author><first>Su-Chu</first><last>Lin</last></author>
-      <author><first>Chia-Hung</first><last>Tai</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="chia-hung-tai"><first>Chia-Hung</first><last>Tai</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <pages>245–280</pages>
       <url hash="5fb1bea1">O06-4004</url>
       <bibkey>li-etal-2006-probe</bibkey>
     </paper>
     <paper id="5">
       <title>Tonal Errors of <fixed-case>J</fixed-case>apanese Students Learning <fixed-case>C</fixed-case>hinese: A Study of Disyllabic Words</title>
-      <author><first>Ke-Jia</first><last>Chang</last></author>
-      <author><first>Li-Mei</first><last>Chen</last></author>
+      <author id="ke-jia-zhang"><first>Ke-Jia</first><last>Chang</last></author>
+      <author id="li-mei-chen"><first>Li-Mei</first><last>Chen</last></author>
       <author><first>Nien-Chen</first><last>Lee</last></author>
       <pages>281–296</pages>
       <url hash="a11f9a31">O06-4005</url>
@@ -387,10 +387,10 @@
     </paper>
     <paper id="6">
       <title>Performance Analysis and Visualization of Machine Translation Evaluation</title>
-      <author><first>Jianmin</first><last>Yao</last></author>
-      <author><first>Yunqian</first><last>Qu</last></author>
+      <author id="jianmin-yao"><first>Jianmin</first><last>Yao</last></author>
+      <author id="yun-qian-qu"><first>Yunqian</first><last>Qu</last></author>
       <author><first>Qiang</first><last>Lv</last></author>
-      <author><first>Qiaoming</first><last>Zhu</last></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last></author>
       <author><first>Jing</first><last>Zhang</last></author>
       <pages>297–314</pages>
       <url hash="cd1d3260">O06-4006</url>
@@ -420,7 +420,7 @@
     <paper id="2">
       <title>Multiply Quantified Internally Headed Relative Clause in <fixed-case>J</fixed-case>apanese: A Skolem Term Based Approach</title>
       <author><first>Rui</first><last>Otake</last></author>
-      <author><first>Kei</first><last>Yoshimoto</last></author>
+      <author id="kei-yoshimoto"><first>Kei</first><last>Yoshimoto</last></author>
       <pages>333-348</pages>
       <url hash="10f26540">O06-5002</url>
       <bibkey>otake-yoshimoto-2006-multiply</bibkey>
@@ -429,7 +429,7 @@
       <title>Data Management in <fixed-case>QRL</fixed-case>ex, an Online Aid System for Volunteer Translators’</title>
       <author><first>Youcef</first><last>Bey</last></author>
       <author><first>Kyo</first><last>Kageura</last></author>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <pages>349–376</pages>
       <url hash="a2ebcf96">O06-5003</url>
       <bibkey>bey-etal-2006-data</bibkey>
@@ -445,7 +445,7 @@
       <title>A Pragmatic <fixed-case>C</fixed-case>hinese Word Segmentation Approach Based on Mixing Models</title>
       <author><first>Wei</first><last>Jiang</last></author>
       <author><first>Yi</first><last>Guan</last></author>
-      <author><first>Xiao-Long</first><last>Wang</last></author>
+      <author id="xiao-long-wang"><first>Xiao-Long</first><last>Wang</last></author>
       <pages>393–416</pages>
       <url hash="f8cda8c7">O06-5005</url>
       <bibkey>jiang-etal-2006-pragmatic</bibkey>
diff --git a/data/xml/O07.xml b/data/xml/O07.xml
index bbed4fc677..9b554220ed 100644
--- a/data/xml/O07.xml
+++ b/data/xml/O07.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the 19th Conference on Computational Linguistics and Speech Processing</booktitle>
       <url hash="3eaeedc4">O07-1</url>
-      <editor><first>Kuang-Hua</first><last>Chen</last></editor>
+      <editor id="kuang-hua-chen"><first>Kuang-Hua</first><last>Chen</last></editor>
       <editor><first>Berlin</first><last>Chen</last></editor>
       <publisher>The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)</publisher>
       <address>Taipei, Taiwan</address>
@@ -28,7 +28,7 @@
       <title>貝氏主題混合資訊檢索模型 (<fixed-case>B</fixed-case>ayesian Topic Mixture Model for Information Retrieval) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Meng-Sung</first><last>Wu</last></author>
       <author><first>Hsuan-Jui</first><last>Hsu</last></author>
-      <author><first>Jen-Tzung</first><last>Chien</last></author>
+      <author id="jen-tzung-chien"><first>Jen-Tzung</first><last>Chien</last></author>
       <pages>21–35</pages>
       <url hash="7a06a445">O07-1002</url>
       <bibkey>wu-etal-2007-bei</bibkey>
@@ -36,9 +36,9 @@
     <paper id="3">
       <title><fixed-case>K</fixed-case>orean-<fixed-case>C</fixed-case>hinese Cross-Language Information Retrieval Based on Extension of Dictionaries and Transliteration</title>
       <author><first>Yu-Chun</first><last>Wang</last></author>
-      <author><first>Tzong-Han Richard</first><last>Tsai</last></author>
+      <author id="richard-tzong-han-tsai"><first>Tzong-Han Richard</first><last>Tsai</last></author>
       <author><first>Hsu-Chun</first><last>Yen</last></author>
-      <author><first>Wen-Lian</first><last>Hsu</last></author>
+      <author id="wen-lian-hsu"><first>Wen-Lian</first><last>Hsu</last></author>
       <pages>37–44</pages>
       <url hash="686e6ee0">O07-1003</url>
       <bibkey>wang-etal-2007-korean</bibkey>
@@ -46,7 +46,7 @@
     <paper id="4">
       <title>加成性雜訊環境下運用特徵參數統計補償法於強健性語音辨識 (Feature Statistics Compensation for Robust Speech Recognition in Additive Noise Environments) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Tsung-hsueh</first><last>Hsieh</last></author>
-      <author><first>Jeih-weih</first><last>Hung</last></author>
+      <author id="jeih-weih-hung"><first>Jeih-weih</first><last>Hung</last></author>
       <pages>45–59</pages>
       <url hash="f8ac7a96">O07-1004</url>
       <bibkey>hsieh-hung-2007-jia</bibkey>
@@ -70,15 +70,15 @@
     </paper>
     <paper id="7">
       <title>端點偵測技術在強健語音參數擷取之研究 (Study of the Voice Activity Detection Techniques for Robust Speech Feature Extraction) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Wen-Hsiang</first><last>Tu</last></author>
-      <author><first>Jeih-weih</first><last>Hung</last></author>
+      <author id="wen-hsiang-tu"><first>Wen-Hsiang</first><last>Tu</last></author>
+      <author id="jeih-weih-hung"><first>Jeih-weih</first><last>Hung</last></author>
       <pages>89–102</pages>
       <url hash="d6f3478c">O07-1007</url>
       <bibkey>tu-hung-2007-duan</bibkey>
     </paper>
     <paper id="8">
       <title>從不同韻律格式驗證階層式韻律架構並兼論對語音科技的應用 (One Base Form of Discourse Prosody Goes a Long Way– Evidence of Sytle Dependent Contribution and Possible Applilcation to Technology Development) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Chiu-Yu</first><last>Tseng</last></author>
+      <author id="chiu-yu-tseng"><first>Chiu-Yu</first><last>Tseng</last></author>
       <author><first>Zhao-Yu</first><last>Su</last></author>
       <pages>103–115</pages>
       <url hash="8f0af4b4">O07-1008</url>
@@ -86,10 +86,10 @@
     </paper>
     <paper id="9">
       <title>多語聲學單位分類之最佳化研究 (The Study of Acoustic Model Clustering in Multilingual Speech Recognition) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Dau-cheng</first><last>Lyu</last></author>
-      <author><first>Ren-yuan</first><last>Lyu</last></author>
+      <author id="dau-cheng-lyu"><first>Dau-cheng</first><last>Lyu</last></author>
+      <author id="ren-yuan-lyu"><first>Ren-yuan</first><last>Lyu</last></author>
       <author><first>Yung-Jien</first><last>Chiang</last></author>
-      <author><first>Chun-nan</first><last>Hsu</last></author>
+      <author id="chun-nan-hsu"><first>Chun-nan</first><last>Hsu</last></author>
       <pages>117–130</pages>
       <url hash="c2f8ae54">O07-1009</url>
       <bibkey>lyu-etal-2007-duo</bibkey>
@@ -97,7 +97,7 @@
     <paper id="10">
       <title>詞義辨識:機器學習演算法特徵的選取與組合 (Feature Selections in Word Sense Disambiguation) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Shao Hang</first><last>Kao</last></author>
-      <author><first>Zhao Ming</first><last>Gao</last></author>
+      <author id="zhao-ming-gao"><first>Zhao Ming</first><last>Gao</last></author>
       <pages>131–144</pages>
       <url hash="6c3a334b">O07-1010</url>
       <bibkey>kao-gao-2007-ci</bibkey>
@@ -106,7 +106,7 @@
       <title>Word Translation Disambiguation via Dependency (利用依存關係之辭彙翻譯)</title>
       <author><first>Meng-Chin</first><last>Hsiao</last></author>
       <author><first>Kun-Ju</first><last>Yang</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>145–159</pages>
       <url hash="51d6b5be">O07-1011</url>
       <bibkey>hsiao-etal-2007-word</bibkey>
@@ -114,9 +114,9 @@
     <paper id="12">
       <title>Knowledge Representation for Interrogatives in <fixed-case>E</fixed-case>-<fixed-case>H</fixed-case>ow<fixed-case>N</fixed-case>et</title>
       <author><first>Shu-Ling</first><last>Huang</last></author>
-      <author><first>You-Shan</first><last>Chung</last></author>
+      <author id="you-shan-chung"><first>You-Shan</first><last>Chung</last></author>
       <author><first>Yueh-Yin</first><last>Shih</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <pages>161–175</pages>
       <url hash="7df887ac">O07-1012</url>
       <bibkey>huang-etal-2007-knowledge</bibkey>
@@ -133,7 +133,7 @@
     <paper id="14">
       <title>基於統計方法之中文搭配詞自動擷取 (<fixed-case>C</fixed-case>hinese Collocation Extracting Automation Based on Statistical Methods) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Tsui-Yun</first><last>Chang</last></author>
-      <author><first>Su-Jin</first><last>Ker</last></author>
+      <author id="sue-j-ker"><first>Su-Jin</first><last>Ker</last></author>
       <pages>191–203</pages>
       <url hash="e4d127b6">O07-1014</url>
       <bibkey>chang-ker-2007-ji</bibkey>
@@ -150,16 +150,16 @@
     <paper id="16">
       <title>混合語言之語音的語言辨認 (Language Identification on Code-Switching Speech) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Chyng-Leei</first><last>Chu</last></author>
-      <author><first>Dau-cheng</first><last>Lyu</last></author>
-      <author><first>Ren-yuan</first><last>Lyu</last></author>
+      <author id="dau-cheng-lyu"><first>Dau-cheng</first><last>Lyu</last></author>
+      <author id="ren-yuan-lyu"><first>Ren-yuan</first><last>Lyu</last></author>
       <pages>219–231</pages>
       <url hash="0fc6e888">O07-1016</url>
       <bibkey>chu-etal-2007-hun</bibkey>
     </paper>
     <paper id="17">
       <title>基於<fixed-case>HNM</fixed-case> 之國語音節信號的合成方法 (An <fixed-case>HNM</fixed-case> Based Method for Synthesizing <fixed-case>M</fixed-case>andarin Syllable Signal) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Hung-yan</first><last>Gu</last></author>
-      <author><first>Yen-zuo</first><last>Zhou</last></author>
+      <author id="hung-yan-gu"><first>Hung-yan</first><last>Gu</last></author>
+      <author id="yan-zuo-zhou"><first>Yen-zuo</first><last>Zhou</last></author>
       <pages>233–243</pages>
       <url hash="f302b752">O07-1017</url>
       <bibkey>gu-zhou-2007-ji</bibkey>
@@ -176,7 +176,7 @@
     <meta>
       <booktitle><fixed-case>ROCLING</fixed-case> 2007 Poster Papers</booktitle>
       <url hash="f18a68b4">O07-2</url>
-      <editor><first>Kuang-Hua</first><last>Chen</last></editor>
+      <editor id="kuang-hua-chen"><first>Kuang-Hua</first><last>Chen</last></editor>
       <editor><first>Berlin</first><last>Chen</last></editor>
       <publisher>The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)</publisher>
       <address>Taipei, Taiwan</address>
@@ -189,7 +189,7 @@
     </frontmatter>
     <paper id="1">
       <title>中文詞彙語意資料的整合與擷取：詞彙語意學的觀點 (Extraction and Integration of <fixed-case>C</fixed-case>hinese Lexical Semantic Information) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Zhao Ming</first><last>Gao</last></author>
+      <author id="zhao-ming-gao"><first>Zhao Ming</first><last>Gao</last></author>
       <pages>257–271</pages>
       <url hash="7835b00f">O07-2001</url>
       <bibkey>gao-2007-zhong</bibkey>
@@ -198,7 +198,7 @@
       <title>中文單詞之韻律模式研究 (A Study on Prosodic Modeling for Isolated <fixed-case>M</fixed-case>andarin Words) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Chi-Feng</first><last>Chen</last></author>
       <author><first>Chen-Yu</first><last>Chiang</last></author>
-      <author><first>Yih-Ru</first><last>Wang</last></author>
+      <author id="yih-ru-wang"><first>Yih-Ru</first><last>Wang</last></author>
       <author><first>Sin-Horng</first><last>Chen</last></author>
       <pages>273–286</pages>
       <url hash="10d2770b">O07-2002</url>
@@ -206,7 +206,7 @@
     </paper>
     <paper id="3">
       <title>以中文十億詞語料庫為基礎之兩岸詞彙對比研究 (A Study of Lexical Differences between <fixed-case>C</fixed-case>hina and <fixed-case>T</fixed-case>aiwan based on the <fixed-case>C</fixed-case>hinese <fixed-case>G</fixed-case>igaword Corpus) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Jia-Fei</first><last>Hung</last></author>
+      <author id="jia-fei-hong"><first>Jia-Fei</first><last>Hung</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <author><first>Ming-Wei</first><last>Xu</last></author>
       <pages>287–301</pages>
@@ -215,7 +215,7 @@
     </paper>
     <paper id="4">
       <title><fixed-case>VOT</fixed-case> productions of word-initial stops in <fixed-case>M</fixed-case>andarin and <fixed-case>E</fixed-case>nglish: A cross-language study</title>
-      <author><first>Li-mei</first><last>Chen</last></author>
+      <author id="li-mei-chen"><first>Li-mei</first><last>Chen</last></author>
       <author><first>Kuan-Yi</first><last>Chao</last></author>
       <author><first>Jui-Feng</first><last>Peng</last></author>
       <pages>303–317</pages>
@@ -224,16 +224,16 @@
     </paper>
     <paper id="5">
       <title>台灣共通語言 (<fixed-case>T</fixed-case>aiwan Common Language) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Ming-Shing</first><last>Yu</last></author>
+      <author id="ming-shing-yu"><first>Ming-Shing</first><last>Yu</last></author>
       <pages>319–333</pages>
       <url hash="476720b2">O07-2005</url>
       <bibkey>yu-2007-tai</bibkey>
     </paper>
     <paper id="6">
       <title>中文詞義全文標記語料庫之設計與雛形製作 (Design and Prototype of a Fully Sense-tagged Corpus) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Su-Jin</first><last>Ker</last></author>
+      <author id="sue-j-ker"><first>Su-Jin</first><last>Ker</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
-      <author><first>Jia-Fei</first><last>Hung</last></author>
+      <author id="jia-fei-hong"><first>Jia-Fei</first><last>Hung</last></author>
       <author><first>Shih-yin</first><last>Liu</last></author>
       <author><first>Hui-Ling</first><last>Chien</last></author>
       <author><first>I-Li</first><last>Su</last></author>
@@ -271,7 +271,7 @@
     <paper id="10">
       <title>應用文件重排序與局部查詢擴展於中文文件檢索之研究 (Improving Retrieval Effectiveness by Document Reranking and Local Expansion) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Wen-Chi</first><last>Wang</last></author>
-      <author><first>Bor-Shen</first><last>Lin</last></author>
+      <author id="bor-shen-lin"><first>Bor-Shen</first><last>Lin</last></author>
       <pages>391–405</pages>
       <url hash="16817b3f">O07-2010</url>
       <bibkey>wang-lin-2007-ying</bibkey>
@@ -279,7 +279,7 @@
     <paper id="11">
       <title>針對數學與科學教育領域之電腦輔助英中試題翻譯系統 (An Exploration of Computer Assisted Translation of Test Items for Mathematics and Sciences) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Ming-Shin</first><last>Lu</last></author>
-      <author><first>Zhao Ming</first><last>Gao</last></author>
+      <author id="zhao-ming-gao"><first>Zhao Ming</first><last>Gao</last></author>
       <author><first>Chao-Lin</first><last>Liu</last></author>
       <author><first>Chun-Yen</first><last>Chang</last></author>
       <pages>407–421</pages>
@@ -289,7 +289,7 @@
     <paper id="12">
       <title>Word sense induction using independent component analysis</title>
       <author><first>Petr</first><last>Šimon</last></author>
-      <author><first>Jia-Fei</first><last>Hong</last></author>
+      <author id="jia-fei-hong"><first>Jia-Fei</first><last>Hong</last></author>
       <pages>423–433</pages>
       <url hash="2b2207c1">O07-2012</url>
       <bibkey>simon-hong-2007-word</bibkey>
@@ -385,15 +385,15 @@
       <author><first>Yi</first><last>Hu</last></author>
       <author><first>Ruzhan</first><last>Lu</last></author>
       <author><first>Yuquan</first><last>Chen</last></author>
-      <author><first>Jianyong</first><last>Duan</last></author>
+      <author id="jianyong-duan"><first>Jianyong</first><last>Duan</last></author>
       <pages>107–126</pages>
       <url hash="e395b35d">O07-4001</url>
       <bibkey>hu-etal-2007-using-generative</bibkey>
     </paper>
     <paper id="2">
       <title>An Empirical Study of Non-Stationary Ngram Model and its Smoothing Techniques</title>
-      <author><first>Jinghui</first><last>Xiao</last></author>
-      <author><first>Bingquan</first><last>Liu</last></author>
+      <author id="jinghui-xiao"><first>Jinghui</first><last>Xiao</last></author>
+      <author id="bingquan-liu"><first>Bingquan</first><last>Liu</last></author>
       <author><first>Xiaolong</first><last>Wang</last></author>
       <pages>127–154</pages>
       <url hash="01f12e6c">O07-4002</url>
@@ -419,7 +419,7 @@
       <title>Improve Parsing Performance by Self-Learning</title>
       <author><first>Yu-Ming</first><last>Hsieh</last></author>
       <author><first>Duen-Chi</first><last>Yang</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <pages>195–216</pages>
       <url hash="4b714234">O07-4005</url>
       <bibkey>hsieh-etal-2007-improve</bibkey>
@@ -458,7 +458,7 @@
     <paper id="2">
       <title>A Novel Characterization of the Alternative Hypothesis Using Kernel Discriminant Analysis for <fixed-case>LLR</fixed-case>-Based Speaker Verification</title>
       <author><first>Yi-Hsiang</first><last>Chao</last></author>
-      <author><first>Hsin-Min</first><last>Wang</last></author>
+      <author id="hsin-min-wang"><first>Hsin-Min</first><last>Wang</last></author>
       <author><first>Ruei-Chuan</first><last>Chang</last></author>
       <pages>255–272</pages>
       <url hash="2c891280">O07-5002</url>
@@ -469,7 +469,7 @@
       <author><first>Nengheng</first><last>Zheng</last></author>
       <author><first>Tan</first><last>Lee</last></author>
       <author><first>Ning</first><last>Wang</last></author>
-      <author><first>P. C.</first><last>Ching</last></author>
+      <author id="p-c-ching"><first>P. C.</first><last>Ching</last></author>
       <pages>273–290</pages>
       <url hash="2abbafc6">O07-5003</url>
       <bibkey>zheng-etal-2007-integrating</bibkey>
@@ -478,7 +478,7 @@
       <title>Performance of Discriminative <fixed-case>HMM</fixed-case> Training in Noise</title>
       <author><first>Jun</first><last>Du</last></author>
       <author><first>Peng</first><last>Liu</last></author>
-      <author><first>Frank K.</first><last>Soong</last></author>
+      <author id="frank-k-soong"><first>Frank K.</first><last>Soong</last></author>
       <author><first>Jian-Lai</first><last>Zhou</last></author>
       <author><first>Ren-Hua</first><last>Wang</last></author>
       <pages>291–302</pages>
@@ -490,15 +490,15 @@
       <author><first>Toshiyuki</first><last>Takezawa</last></author>
       <author><first>Genichiro</first><last>Kikui</last></author>
       <author><first>Masahide</first><last>Mizushima</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>303–324</pages>
       <url hash="b1c5fd93">O07-5005</url>
       <bibkey>takezawa-etal-2007-multilingual</bibkey>
     </paper>
     <paper id="6">
       <title>Exploiting <fixed-case>P</fixed-case>inyin Constraints in <fixed-case>P</fixed-case>inyin-to-Character Conversion Task: a Class-Based Maximum Entropy <fixed-case>M</fixed-case>arkov Model Approach</title>
-      <author><first>Jinghui</first><last>Xiao</last></author>
-      <author><first>Bingquan</first><last>Liu</last></author>
+      <author id="jinghui-xiao"><first>Jinghui</first><last>Xiao</last></author>
+      <author id="bingquan-liu"><first>Bingquan</first><last>Liu</last></author>
       <author><first>Xiaolong</first><last>Wang</last></author>
       <pages>325–348</pages>
       <url hash="821e0c0c">O07-5006</url>
@@ -518,19 +518,19 @@
     </frontmatter>
     <paper id="1">
       <title>Modeling <fixed-case>T</fixed-case>aiwanese <fixed-case>S</fixed-case>outhern-<fixed-case>M</fixed-case>in Tone Sandhi Using Rule-Based Methods</title>
-      <author><first>Un-Gian</first><last>Iunn</last></author>
-      <author><first>Kiat-Gak</first><last>Lau</last></author>
+      <author id="un-gian-iunn"><first>Un-Gian</first><last>Iunn</last></author>
+      <author id="kiat-gak-lau"><first>Kiat-Gak</first><last>Lau</last></author>
       <author><first>Hong-Giau</first><last>Tan-Tenn</last></author>
       <author><first>Sheng-An</first><last>Lee</last></author>
-      <author><first>Cheng-Yan</first><last>Kao</last></author>
+      <author id="cheng-yan-kao"><first>Cheng-Yan</first><last>Kao</last></author>
       <pages>349–370</pages>
       <url hash="6673e4b8">O07-6001</url>
       <bibkey>iunn-etal-2007-modeling</bibkey>
     </paper>
     <paper id="2">
       <title>A System Framework for Integrated Synthesis of <fixed-case>M</fixed-case>andarin, <fixed-case>M</fixed-case>in-<fixed-case>N</fixed-case>an, and <fixed-case>H</fixed-case>akka Speech</title>
-      <author><first>Hung-Yan</first><last>Gu</last></author>
-      <author><first>Yan-Zuo</first><last>Zhou</last></author>
+      <author id="hung-yan-gu"><first>Hung-Yan</first><last>Gu</last></author>
+      <author id="yan-zuo-zhou"><first>Yan-Zuo</first><last>Zhou</last></author>
       <author><first>Huang-Liang</first><last>Liau</last></author>
       <pages>371–390</pages>
       <url hash="4d615d97">O07-6002</url>
@@ -543,7 +543,7 @@
       <author><first>Xiang-Rui</first><last>Zhong</last></author>
       <author><first>Zhen-Feng</first><last>Liang</last></author>
       <author><first>Hsiu-Min</first><last>Yu</last></author>
-      <author><first>Yih-Ru</first><last>Wang</last></author>
+      <author id="yih-ru-wang"><first>Yih-Ru</first><last>Wang</last></author>
       <author><first>Sin-Horng</first><last>Chen</last></author>
       <pages>391–410</pages>
       <url hash="39452fb0">O07-6003</url>
@@ -559,7 +559,7 @@
     <paper id="5">
       <title>Automatic Pronunciation Assessment for <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese: Approaches and System Overview</title>
       <author><first>Jiang-Chun</first><last>Chen</last></author>
-      <author><first>Jyh-Shing Roger</first><last>Jang</last></author>
+      <author id="jyh-shing-roger-jang"><first>Jyh-Shing Roger</first><last>Jang</last></author>
       <author><first>Te-Lu</first><last>Tsai</last></author>
       <pages>443–458</pages>
       <url hash="3f23903c">O07-6005</url>
@@ -568,7 +568,7 @@
     <paper id="6">
       <title>A Knowledge-Based Approach for Unsupervised <fixed-case>C</fixed-case>hinese Coreference Resolution</title>
       <author><first>Grace</first><last>Ngai</last></author>
-      <author><first>Chi-Shing</first><last>Wang</last></author>
+      <author id="chi-shing-wang"><first>Chi-Shing</first><last>Wang</last></author>
       <pages>459–484</pages>
       <url hash="d679ab64">O07-6006</url>
       <bibkey>ngai-wang-2007-knowledge</bibkey>
diff --git a/data/xml/O08.xml b/data/xml/O08.xml
index ca28cacd29..d8159b5484 100644
--- a/data/xml/O08.xml
+++ b/data/xml/O08.xml
@@ -17,37 +17,37 @@
     </frontmatter>
     <paper id="1">
       <title>Measuring Text Readability by Lexical Relations Retrieved from <fixed-case>W</fixed-case>ordnet</title>
-      <author><first>Shu-yen</first><last>Lin</last></author>
-      <author><first>Cheng-chao</first><last>Su</last></author>
-      <author><first>Yu-da</first><last>Lai</last></author>
-      <author><first>Li-chin</first><last>Yang</last></author>
-      <author><first>Shu-kai</first><last>Hsieh</last></author>
+      <author id="shu-yen-lin"><first>Shu-yen</first><last>Lin</last></author>
+      <author id="cheng-chao-su"><first>Cheng-chao</first><last>Su</last></author>
+      <author id="yu-da-lai"><first>Yu-da</first><last>Lai</last></author>
+      <author id="li-chin-yang"><first>Li-chin</first><last>Yang</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-kai</first><last>Hsieh</last></author>
       <pages>1–17</pages>
       <url hash="6db9fb11">O08-1001</url>
       <bibkey>lin-etal-2008-measuring</bibkey>
     </paper>
     <paper id="2">
       <title>A Semantic Composition Method for Deriving Sense Representations of Determinative-Measure Compounds in <fixed-case>E</fixed-case>-<fixed-case>H</fixed-case>ow<fixed-case>N</fixed-case>et</title>
-      <author><first>Chia-hung</first><last>Tai</last></author>
+      <author id="chia-hung-tai"><first>Chia-hung</first><last>Tai</last></author>
       <author><first>Shu-Ling</first><last>Huang</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <pages>18–37</pages>
       <url hash="669b98f3">O08-1002</url>
       <bibkey>tai-etal-2008-semantic-composition</bibkey>
     </paper>
     <paper id="3">
       <title>A Thesaurus-Based Semantic Classification of <fixed-case>E</fixed-case>nglish Collocations</title>
-      <author><first>Chung-chi</first><last>Huang</last></author>
-      <author><first>Chiung-hui</first><last>Tseng</last></author>
+      <author id="chung-chi-huang"><first>Chung-chi</first><last>Huang</last></author>
+      <author id="chiung-hui-tseng"><first>Chiung-hui</first><last>Tseng</last></author>
       <author><first>Kate H.</first><last>Kao</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>38–52</pages>
       <url hash="2128857b">O08-1003</url>
       <bibkey>huang-etal-2008-thesaurus</bibkey>
     </paper>
     <paper id="4">
       <title>以<fixed-case>F</fixed-case>ujisaki模型驗證連續語流中字調及韻律詞對應於階層性韻律架構<fixed-case>HPG</fixed-case>的意義 (<fixed-case>M</fixed-case>andarin Discourse Prosody Other than Tones and Intonation – Decomposing the F0 Constitution by Prosodic Hierarchy with the Fujisaki Model) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Chiu-Yu</first><last>Tseng</last></author>
+      <author id="chiu-yu-tseng"><first>Chiu-Yu</first><last>Tseng</last></author>
       <author><first>Zhao-Yu</first><last>Su</last></author>
       <pages>53–65</pages>
       <url hash="a52b1456">O08-1004</url>
@@ -55,7 +55,7 @@
     </paper>
     <paper id="5">
       <title>基於<fixed-case>ANN</fixed-case>之頻譜演進模型及其於國語語音合成之應用 (An <fixed-case>ANN</fixed-case> based Spectrum-progression Model and Its Application to <fixed-case>M</fixed-case>andarin Speech Synthesis) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Hung-Yan</first><last>Gu</last></author>
+      <author id="hung-yan-gu"><first>Hung-Yan</first><last>Gu</last></author>
       <author><first>Chang-Yi</first><last>Wu</last></author>
       <pages>66–77</pages>
       <url hash="f59c884b">O08-1005</url>
@@ -73,8 +73,8 @@
     <paper id="7">
       <title>調變頻譜正規化法使用於強健語音辨識之研究 (Study of Modulation Spectrum Normalization Techniques for Robust Speech Recognition) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Chih-Cheng</first><last>Wang</last></author>
-      <author><first>Wen-hsiang</first><last>Tu</last></author>
-      <author><first>Jeih-weih</first><last>Hung</last></author>
+      <author id="wen-hsiang-tu"><first>Wen-hsiang</first><last>Tu</last></author>
+      <author id="jeih-weih-hung"><first>Jeih-weih</first><last>Hung</last></author>
       <pages>93–107</pages>
       <url hash="310cc4f8">O08-1007</url>
       <bibkey>wang-etal-2008-diao</bibkey>
@@ -94,14 +94,14 @@
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <author><first>Ting-Shuo</first><last>Yo</last></author>
       <author><first>Petr</first><last>Šimon</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <pages>123–136</pages>
       <url hash="ea0ef253">O08-1009</url>
       <bibkey>huang-etal-2008-realistic</bibkey>
     </paper>
     <paper id="10">
       <title>國台語無聲調拼音輸入法實作 (An Implementation of Toneless Input for <fixed-case>M</fixed-case>andarin and <fixed-case>T</fixed-case>aiwanese) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Ming-Shing</first><last>Yu</last></author>
+      <author id="ming-shing-yu"><first>Ming-Shing</first><last>Yu</last></author>
       <author><first>Cheng-Rong</first><last>Tsai</last></author>
       <pages>137–150</pages>
       <url hash="f83d2d08">O08-1010</url>
@@ -117,11 +117,11 @@
     </paper>
     <paper id="12">
       <title>利用統計方法及中文訓練資料處理台語文詞性標記 (Modeling <fixed-case>T</fixed-case>aiwanese <fixed-case>POS</fixed-case> tagging with statistical methods and <fixed-case>M</fixed-case>andarin training data) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Un-Gian</first><last>Iunn</last></author>
-      <author><first>Chia-hung</first><last>Tai</last></author>
-      <author><first>Kiat-gak</first><last>Lau</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
-      <author><first>Cheng Yan</first><last>Kao</last></author>
+      <author id="un-gian-iunn"><first>Un-Gian</first><last>Iunn</last></author>
+      <author id="chia-hung-tai"><first>Chia-hung</first><last>Tai</last></author>
+      <author id="kiat-gak-lau"><first>Kiat-gak</first><last>Lau</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="cheng-yan-kao"><first>Cheng Yan</first><last>Kao</last></author>
       <pages>166–179</pages>
       <url hash="15333e28">O08-1012</url>
       <bibkey>iunn-etal-2008-li</bibkey>
@@ -129,8 +129,8 @@
     <paper id="13">
       <title>中文名詞組的辨識：監督式與半監督式學習法的實驗 (<fixed-case>C</fixed-case>hinese <fixed-case>NP</fixed-case> Chunking: Experiments with Supervised,and Semisupervised Learning) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Yen Hsi</first><last>Lin</last></author>
-      <author><first>Zhao Ming</first><last>Gao</last></author>
-      <author><first>Cheng Yan</first><last>Kao</last></author>
+      <author id="zhao-ming-gao"><first>Zhao Ming</first><last>Gao</last></author>
+      <author id="cheng-yan-kao"><first>Cheng Yan</first><last>Kao</last></author>
       <pages>180–193</pages>
       <url hash="51d60c30">O08-1013</url>
       <bibkey>lin-etal-2008-zhong</bibkey>
@@ -138,8 +138,8 @@
     <paper id="14">
       <title>強健性語音辨識中能量相關特徵之改良式正規化技術的研究 (Study of the Improved Normalization Techniques of Energy-Related Features for Robust Speech Recognition) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Chi-an</first><last>Pan</last></author>
-      <author><first>Wen-hsiang</first><last>Tu</last></author>
-      <author><first>Jeih-weih</first><last>Hung</last></author>
+      <author id="wen-hsiang-tu"><first>Wen-hsiang</first><last>Tu</last></author>
+      <author id="jeih-weih-hung"><first>Jeih-weih</first><last>Hung</last></author>
       <pages>194–208</pages>
       <url hash="b693b326">O08-1014</url>
       <bibkey>pan-etal-2008-qiang</bibkey>
@@ -154,16 +154,16 @@
     </paper>
     <paper id="16">
       <title>Robust Voice Activity Detection Based on Discrete Wavelet Transform</title>
-      <author><first>Kun-Ching</first><last>Wang</last></author>
+      <author id="kun-ching-wang"><first>Kun-Ching</first><last>Wang</last></author>
       <pages>216–228</pages>
       <url hash="38983fbb">O08-1016</url>
       <bibkey>wang-2008-robust</bibkey>
     </paper>
     <paper id="17">
       <title>組合式倒頻譜統計正規化法於強健性語音辨識之研究 (Associative Cepstral Statistics Normalization Techniques for Robust Speech Recognition) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Wen-hsiang</first><last>Tu</last></author>
+      <author id="wen-hsiang-tu"><first>Wen-hsiang</first><last>Tu</last></author>
       <author><first>Kuang-chieh</first><last>Wu</last></author>
-      <author><first>Jeih-weih</first><last>Hung</last></author>
+      <author id="jeih-weih-hung"><first>Jeih-weih</first><last>Hung</last></author>
       <pages>229–243</pages>
       <url hash="13cd4ebd">O08-1017</url>
       <bibkey>tu-etal-2008-zu</bibkey>
@@ -207,16 +207,16 @@
     <paper id="4">
       <title>Automatic labeling of troponymy for <fixed-case>C</fixed-case>hinese verbs</title>
       <author><first>Chiao-Shan</first><last>Lo</last></author>
-      <author><first>Yi-Rung</first><last>Chen</last></author>
+      <author id="yi-rong-chen"><first>Yi-Rung</first><last>Chen</last></author>
       <author><first>Chih-Yu</first><last>Lin</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <pages>284–292</pages>
       <url hash="bf7be5a0">O08-2004</url>
       <bibkey>lo-etal-2008-automatic</bibkey>
     </paper>
     <paper id="5">
       <title>電腦輔助中學程度漢英翻譯習作環境之建置 (Computer Assisted Learning of <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>hinese Translation for Middle Schoolers) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Min Hua</first><last>Lai</last></author>
+      <author id="min-hua-lai"><first>Min Hua</first><last>Lai</last></author>
       <author><first>Chao-Lin</first><last>Liu</last></author>
       <pages>293–307</pages>
       <url hash="2f73eeca">O08-2005</url>
@@ -242,7 +242,7 @@
       <title>多領域文件集之詞彙概念擴展與知識架構之建立 (Conceptual Expansion and Ontological Mapping of Multi-domain Documents) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Yong-Xiang</first><last>Chen</last></author>
       <author><first>Xiu-Ling</first><last>Ke</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <pages>338–350</pages>
       <url hash="e08c0dec">O08-2008</url>
@@ -261,7 +261,7 @@
       <author><first>Chih-Hao</first><last>Yeh</last></author>
       <author><first>Wei-Chi</first><last>Tsai</last></author>
       <author><first>Yu-Chun</first><last>Wang</last></author>
-      <author><first>Richard Tzong-Han</first><last>Tsai</last></author>
+      <author id="richard-tzong-han-tsai"><first>Richard Tzong-Han</first><last>Tsai</last></author>
       <pages>366–372</pages>
       <url hash="ccf87597">O08-2010</url>
       <bibkey>yeh-etal-2008-generating</bibkey>
@@ -282,7 +282,7 @@
       <title>Exploring Shallow Answer Ranking Features in Cross-Lingual and Monolingual Factoid Question Answering</title>
       <author><first>Cheng-Wei</first><last>Lee</last></author>
       <author><first>Yi-Hsun</first><last>Lee</last></author>
-      <author><first>Wen-Lian</first><last>Hsu</last></author>
+      <author id="wen-lian-hsu"><first>Wen-Lian</first><last>Hsu</last></author>
       <pages>1–26</pages>
       <url hash="48248220">O08-3001</url>
       <bibkey>lee-etal-2008-exploring</bibkey>
@@ -290,9 +290,9 @@
     <paper id="2">
       <title>Two Approaches for Multilingual Question Answering: Merging Passages vs. Merging Answers</title>
       <author><first>Rita M.</first><last>Aceves-Pérez</last></author>
-      <author><first>Manuel</first><last>Montes-y-Gómez</last></author>
-      <author><first>Luis</first><last>Villaseñor-Pineda</last></author>
-      <author><first>L. Alfonso</first><last>Ureña-López</last></author>
+      <author id="manuel-montes"><first>Manuel</first><last>Montes-y-Gómez</last></author>
+      <author id="luis-villasenor-pineda"><first>Luis</first><last>Villaseñor-Pineda</last></author>
+      <author id="l-alfonso-urena-lopez"><first>L. Alfonso</first><last>Ureña-López</last></author>
       <pages>27–40</pages>
       <url hash="c7eba7a2">O08-3002</url>
       <bibkey>aceves-perez-etal-2008-two</bibkey>
@@ -357,7 +357,7 @@
     <paper id="2">
       <title>A Study on Consistency Checking Method of Part-Of-Speech Tagging for <fixed-case>C</fixed-case>hinese Corpora</title>
       <author><first>Hu</first><last>Zhang</last></author>
-      <author><first>Jiaheng</first><last>Zheng</last></author>
+      <author id="jiaheng-zheng"><first>Jiaheng</first><last>Zheng</last></author>
       <pages>157–170</pages>
       <url hash="0d84d32e">O08-4002</url>
       <bibkey>zhang-zheng-2008-study</bibkey>
@@ -366,7 +366,7 @@
       <title>Constructing a Temporal Relation Tagged Corpus of <fixed-case>C</fixed-case>hinese Based on Dependency Structure Analysis</title>
       <author><first>Yuchang</first><last>Cheng</last></author>
       <author><first>Masayuki</first><last>Asahara</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>171–196</pages>
       <url hash="69bedc22">O08-4003</url>
       <bibkey>cheng-etal-2008-constructing</bibkey>
@@ -381,7 +381,7 @@
     <paper id="5">
       <title>A Cross-Linguistic Study of Voice Onset Time in Stop Consonant Productions</title>
       <author><first>Kuan-Yi</first><last>Chao</last></author>
-      <author><first>Li-mei</first><last>Chen</last></author>
+      <author id="li-mei-chen"><first>Li-mei</first><last>Chen</last></author>
       <pages>215–232</pages>
       <url hash="3d6cc0de">O08-4005</url>
       <bibkey>chao-chen-2008-cross</bibkey>
@@ -389,8 +389,8 @@
     <paper id="6">
       <title>Data Driven Approaches to Phonetic Transcription with Integration of Automatic Speech Recognition and Grapheme-to-Phoneme for Spoken Buddhist Sutra</title>
       <author><first>Min-Siong</first><last>Liang</last></author>
-      <author><first>Ren-Yuan</first><last>Lyu</last></author>
-      <author><first>Yuang-Chin</first><last>Chiang</last></author>
+      <author id="ren-yuan-lyu"><first>Ren-Yuan</first><last>Lyu</last></author>
+      <author id="yuang-chin-chiang"><first>Yuang-Chin</first><last>Chiang</last></author>
       <pages>233-254</pages>
       <url hash="418366bb">O08-4006</url>
       <bibkey>liang-etal-2008-data</bibkey>
@@ -409,7 +409,7 @@
     <paper id="1">
       <title>Knowledge Representation and Sense Disambiguation for Interrogatives in <fixed-case>E</fixed-case>-<fixed-case>H</fixed-case>ow<fixed-case>N</fixed-case>et</title>
       <author><first>Shu-Ling</first><last>Huang</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <pages>255-278</pages>
       <url hash="03ea3e5f">O08-5001</url>
       <bibkey>huang-chen-2008-knowledge</bibkey>
@@ -434,8 +434,8 @@
     </paper>
     <paper id="4">
       <title>An <fixed-case>HNM</fixed-case> Based Scheme for Synthesizing <fixed-case>M</fixed-case>andarin Syllable Signal</title>
-      <author><first>Hung-Yan</first><last>Gu</last></author>
-      <author><first>Yan-Zuo</first><last>Zhou</last></author>
+      <author id="hung-yan-gu"><first>Hung-Yan</first><last>Gu</last></author>
+      <author id="yan-zuo-zhou"><first>Yan-Zuo</first><last>Zhou</last></author>
       <pages>327–342</pages>
       <url hash="6e4503a6">O08-5004</url>
       <bibkey>gu-zhou-2008-hnm</bibkey>
@@ -452,10 +452,10 @@
     </paper>
     <paper id="6">
       <title>Acoustic Model Optimization for Multilingual Speech Recognition</title>
-      <author><first>Dau-Cheng</first><last>Lyu</last></author>
-      <author><first>Chun-Nan</first><last>Hsu</last></author>
-      <author><first>Yuang-Chin</first><last>Chiang</last></author>
-      <author><first>Ren-Yuan</first><last>Lyu</last></author>
+      <author id="dau-cheng-lyu"><first>Dau-Cheng</first><last>Lyu</last></author>
+      <author id="chun-nan-hsu"><first>Chun-Nan</first><last>Hsu</last></author>
+      <author id="yuang-chin-chiang"><first>Yuang-Chin</first><last>Chiang</last></author>
+      <author id="ren-yuan-lyu"><first>Ren-Yuan</first><last>Lyu</last></author>
       <pages>363–386</pages>
       <url hash="816d7189">O08-5006</url>
       <bibkey>lyu-etal-2008-acoustic</bibkey>
@@ -484,10 +484,10 @@
     </paper>
     <paper id="2">
       <title>Corpus Cleanup of Mistaken Agreement Using Word Sense Disambiguation</title>
-      <author><first>Liang-Chih</first><last>Yu</last></author>
+      <author id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last></author>
       <author><first>Chung-Hsien</first><last>Wu</last></author>
       <author><first>Jui-Feng</first><last>Yeh</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>405–420</pages>
       <url hash="1283e47f">O08-6002</url>
       <bibkey>yu-etal-2008-corpus</bibkey>
diff --git a/data/xml/O09.xml b/data/xml/O09.xml
index cbea71080c..a84abb9450 100644
--- a/data/xml/O09.xml
+++ b/data/xml/O09.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the 21st Conference on Computational Linguistics and Speech Processing</booktitle>
       <url hash="ccb1192c">O09-1</url>
       <editor><first>June-Jei</first><last>Kuo</last></editor>
-      <editor><first>Jeih-Weih</first><last>Hung</last></editor>
+      <editor id="jeih-weih-hung"><first>Jeih-Weih</first><last>Hung</last></editor>
       <publisher>The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)</publisher>
       <address>Taichung, Taiwan</address>
       <month>September</month>
@@ -43,7 +43,7 @@
     <paper id="4">
       <title>強健性語音辨識中分頻段調變頻譜補償之研究 (A Study of Sub-band Modulation Spectrum Compensation for Robust Speech Recognition) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Sheng-yuan</first><last>Huang</last></author>
-      <author><first>Wen-hsiang</first><last>Tu</last></author>
+      <author id="wen-hsiang-tu"><first>Wen-hsiang</first><last>Tu</last></author>
       <author><first>Jeih-weih</first><last>Hung</last></author>
       <pages>39–52</pages>
       <url hash="29e71d32">O09-1004</url>
@@ -74,7 +74,7 @@
     <paper id="7">
       <title>中英文專利文書之文句對列 (Sentence alignment of <fixed-case>E</fixed-case>nglish and <fixed-case>C</fixed-case>hinese patent documents) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Kan-Wen</first><last>Tien</last></author>
-      <author><first>Yuen-Hsien</first><last>Tseng</last></author>
+      <author id="yuen-hsien-tseng"><first>Yuen-Hsien</first><last>Tseng</last></author>
       <author><first>Chao-Lin</first><last>Liu</last></author>
       <pages>85–100</pages>
       <url hash="32b62352">O09-1007</url>
@@ -82,7 +82,7 @@
     </paper>
     <paper id="8">
       <title>意見持有者辨識之研究 (A Study on Identification of Opinion Holders) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Chia-Ying</first><last>Lee</last></author>
+      <author id="chia-ying-lee"><first>Chia-Ying</first><last>Lee</last></author>
       <author><first>Lun-Wei</first><last>Ku</last></author>
       <author><first>Hsin-Hsi</first><last>Chen</last></author>
       <pages>101–114</pages>
@@ -92,7 +92,7 @@
     <paper id="9">
       <title>Tonal effects on voice onset time: Stops in <fixed-case>M</fixed-case>andarin and <fixed-case>H</fixed-case>akka (聲調對嗓音起始時間的影響：以國語和客語為研究對象)</title>
       <author><first>Jui-Feng</first><last>Peng</last></author>
-      <author><first>Li-mei</first><last>Chen</last></author>
+      <author id="li-mei-chen"><first>Li-mei</first><last>Chen</last></author>
       <author><first>Yi-Yun</first><last>Lin</last></author>
       <pages>115–124</pages>
       <url hash="d7f235cb">O09-1009</url>
@@ -102,7 +102,7 @@
       <title>Latent Prosody Model-Assisted <fixed-case>M</fixed-case>andarin Accent Identification</title>
       <author><first>Yuan-Fu</first><last>Liao</last></author>
       <author><first>Shuan-Chen</first><last>Yeh</last></author>
-      <author><first>Ming-Feng</first><last>Tsai</last></author>
+      <author id="ming-feng-tsai"><first>Ming-Feng</first><last>Tsai</last></author>
       <author><first>Wei-Hsiung</first><last>Ting</last></author>
       <author><first>Sen-Chia</first><last>Chang</last></author>
       <pages>125–136</pages>
@@ -112,14 +112,14 @@
     <paper id="11">
       <title>高解析度之國語類音素單元端點自動標示 (Sample-based Phone-like Unit Automatic Labeling in <fixed-case>M</fixed-case>andarin Speech) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>You-Yu</first><last>Lin</last></author>
-      <author><first>Yih-Ru</first><last>Wang</last></author>
+      <author id="yih-ru-wang"><first>Yih-Ru</first><last>Wang</last></author>
       <pages>137–150</pages>
       <url hash="efee84dc">O09-1011</url>
       <bibkey>lin-wang-2009-gao</bibkey>
     </paper>
     <paper id="12">
       <title>基於離散倒頻譜之頻譜包絡估計架構及其於語音轉換之應用 (A Discrete-cepstrum Based Spectrum-envelope Estimation Scheme and Its Application to Voice Transformation) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Hung-Yan</first><last>Gu</last></author>
+      <author id="hung-yan-gu"><first>Hung-Yan</first><last>Gu</last></author>
       <author><first>Song-Fong</first><last>Tsai</last></author>
       <pages>151–164</pages>
       <url hash="20d8efb4">O09-1012</url>
@@ -131,7 +131,7 @@
       <author><first>Chao-Lin</first><last>Liu</last></author>
       <author><first>Wei-Ti</first><last>Kuo</last></author>
       <author><first>Ying-Tse</first><last>Sun</last></author>
-      <author><first>Min-Hua</first><last>Lai</last></author>
+      <author id="min-hua-lai"><first>Min-Hua</first><last>Lai</last></author>
       <pages>165–178</pages>
       <url hash="18f25a5c">O09-1013</url>
       <bibkey>huang-etal-2009-dian</bibkey>
@@ -154,9 +154,9 @@
     </paper>
     <paper id="16">
       <title>Minimally Supervised Question Classification and Answering based on <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et and <fixed-case>W</fixed-case>ikipedia</title>
-      <author><first>Joseph</first><last>Chang</last></author>
+      <author id="joseph-z-chang"><first>Joseph</first><last>Chang</last></author>
       <author><first>Tzu-Hsi</first><last>Yen</last></author>
-      <author><first>Tzong-Han</first><last>Tsai</last></author>
+      <author id="richard-tzong-han-tsai"><first>Tzong-Han</first><last>Tsai</last></author>
       <pages>209–222</pages>
       <url hash="305765f5">O09-1016</url>
       <bibkey>chang-etal-2009-minimally</bibkey>
@@ -183,7 +183,7 @@
     <paper id="19">
       <title>強健性語音辨識中基於小波轉換之分頻統計補償技術的研究 (A Study of Sub-band Feature Statistics Compensation Techniques Based on a Discrete Wavelet Transform for Robust Speech Recognition) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Hao-teng</first><last>Fan</last></author>
-      <author><first>Wen-Hsiang</first><last>Tu</last></author>
+      <author id="wen-hsiang-tu"><first>Wen-Hsiang</first><last>Tu</last></author>
       <author><first>Jeih-weih</first><last>Hung</last></author>
       <pages>251–264</pages>
       <url hash="b72d9fd8">O09-1019</url>
@@ -192,7 +192,7 @@
     <paper id="20">
       <title>併合式倒頻譜統計正規化技術於強健性語音辨識之研究 (A Study of Hybrid-based Cepstral Statistics Normalization Techniques for Robust Speech Recognition) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Guan-min</first><last>He</last></author>
-      <author><first>Wen-Hsiang</first><last>Tu</last></author>
+      <author id="wen-hsiang-tu"><first>Wen-Hsiang</first><last>Tu</last></author>
       <author><first>Jeih-weih</first><last>Hung</last></author>
       <pages>265–278</pages>
       <url hash="cec0d1ac">O09-1020</url>
@@ -214,7 +214,7 @@
     </frontmatter>
     <paper id="1">
       <title>專利雙語語料之中、英對照詞自動擷取 (Automatic Term Pair Extraction from Bilingual Patent Corpus) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Yuen-Hsien</first><last>Tseng</last></author>
+      <author id="yuen-hsien-tseng"><first>Yuen-Hsien</first><last>Tseng</last></author>
       <author><first>Chao-Lin</first><last>Liu</last></author>
       <author><first>Ze-Jing</first><last>Chuang</last></author>
       <pages>279–292</pages>
@@ -285,7 +285,7 @@
     </paper>
     <paper id="9">
       <title>Voice Activity Detection Using Spectral Entropy in Bark-Scale Wavelet Domain</title>
-      <author><first>Kun-ching</first><last>Wang</last></author>
+      <author id="kun-ching-wang"><first>Kun-ching</first><last>Wang</last></author>
       <author><first>Tzuen-lin</first><last>Hou</last></author>
       <author><first>Chuin-li</first><last>Chin</last></author>
       <pages>385–398</pages>
@@ -294,12 +294,12 @@
     </paper>
     <paper id="10">
       <title>讓格書寫以及台華互譯初探 (<fixed-case>L</fixed-case>ang<fixed-case>G</fixed-case>eh Orthography and an Initial Study of Statistical Translation Between <fixed-case>T</fixed-case>aiwanese and <fixed-case>M</fixed-case>andarin) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Yuang-Chin</first><last>Chiang</last></author>
+      <author id="yuang-chin-chiang"><first>Yuang-Chin</first><last>Chiang</last></author>
       <author><first>Pei-Chi</first><last>Yang</last></author>
       <author><first>Shu-Chin</first><last>Lin</last></author>
       <author><first>Chun-huang</first><last>Chang</last></author>
       <author><first>Ming-Tat</first><last>Ko</last></author>
-      <author><first>Ren-Yuan</first><last>Lyu</last></author>
+      <author id="ren-yuan-lyu"><first>Ren-Yuan</first><last>Lyu</last></author>
       <author><first>Meng Chang</first><last>Chen</last></author>
       <pages>399–414</pages>
       <url hash="247e8c3a">O09-2010</url>
@@ -319,44 +319,44 @@
     </frontmatter>
     <paper id="1">
       <title>Fertility-based Source-Language-biased Inversion Transduction Grammar for Word Alignment</title>
-      <author><first>Chung-Chi</first><last>Huang</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="chung-chi-huang"><first>Chung-Chi</first><last>Huang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>1–18</pages>
       <url hash="735e8cc0">O09-3001</url>
       <bibkey>huang-chang-2009-fertility</bibkey>
     </paper>
     <paper id="2">
       <title>Automatic Sense Derivation for Determinative-Measure Compounds under the Framework of <fixed-case>E</fixed-case>-<fixed-case>H</fixed-case>ow<fixed-case>N</fixed-case>et</title>
-      <author><first>Chia-Hung</first><last>Tai</last></author>
+      <author id="chia-hung-tai"><first>Chia-Hung</first><last>Tai</last></author>
       <author><first>Jia-Zen</first><last>Fan</last></author>
       <author><first>Shu-Ling</first><last>Huang</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <pages>19–44</pages>
       <url hash="44e97774">O09-3002</url>
       <bibkey>tai-etal-2009-automatic</bibkey>
     </paper>
     <paper id="3">
       <title>Assessing Text Readability Using Hierarchical Lexical Relations Retrieved from <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et</title>
-      <author><first>Shu-Yen</first><last>Lin</last></author>
-      <author><first>Cheng-Chao</first><last>Su</last></author>
-      <author><first>Yu-Da</first><last>Lai</last></author>
-      <author><first>Li-Chin</first><last>Yang</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-yen-lin"><first>Shu-Yen</first><last>Lin</last></author>
+      <author id="cheng-chao-su"><first>Cheng-Chao</first><last>Su</last></author>
+      <author id="yu-da-lai"><first>Yu-Da</first><last>Lai</last></author>
+      <author id="li-chin-yang"><first>Li-Chin</first><last>Yang</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <pages>45–84</pages>
       <url hash="5969ec8c">O09-3003</url>
       <bibkey>lin-etal-2009-assessing</bibkey>
     </paper>
     <paper id="4">
       <title>Summarization Assistant for News Brief Services on Cellular Phones</title>
-      <author><first>Yuen-Hsien</first><last>Tseng</last></author>
+      <author id="yuen-hsien-tseng"><first>Yuen-Hsien</first><last>Tseng</last></author>
       <pages>85–104</pages>
       <url hash="3a052870">O09-3004</url>
       <bibkey>tseng-2009-summarization</bibkey>
     </paper>
     <paper id="5">
       <title>Study of Associative Cepstral Statistics Normalization Techniques for Robust Speech Recognition in Additive Noise Environments</title>
-      <author><first>Wen-Hsiang</first><last>Tu</last></author>
-      <author><first>Jeih-weih</first><last>Hung</last></author>
+      <author id="wen-hsiang-tu"><first>Wen-Hsiang</first><last>Tu</last></author>
+      <author id="jeih-weih-hung"><first>Jeih-weih</first><last>Hung</last></author>
       <pages>105–132</pages>
       <url hash="462f282d">O09-3005</url>
       <bibkey>tu-hung-2009-study</bibkey>
@@ -367,7 +367,7 @@
       <booktitle>International Journal of Computational Linguistics &amp; <fixed-case>C</fixed-case>hinese Language Processing, Volume 14, Number 2, June 2009-Special Issue on Computer Assisted Language Learning</booktitle>
       <url hash="9aeb4773">O09-4</url>
       <editor><first>Chao-Lin</first><last>Liu</last></editor>
-      <editor><first>Zhao-Ming</first><last>Gao</last></editor>
+      <editor id="zhao-ming-gao"><first>Zhao-Ming</first><last>Gao</last></editor>
       <month>June</month>
       <year>2009</year>
       <venue>ijclclp</venue>
@@ -378,7 +378,7 @@
     <paper id="1">
       <title>Speech-Based Interactive Games for Language Learning: Reading, Translation, and Question-Answering</title>
       <author><first>Yushi</first><last>Xu</last></author>
-      <author><first>Stephanie</first><last>Seneff</last></author>
+      <author id="stephanie-seneff"><first>Stephanie</first><last>Seneff</last></author>
       <url hash="fd920a0c">O09-4001</url>
       <bibkey>xu-seneff-2009-speech</bibkey>
     </paper>
@@ -403,7 +403,7 @@
     </paper>
     <paper id="5">
       <title>A Corpus-based Study on Figurative Language through the <fixed-case>C</fixed-case>hinese Five Elements and Body Part Terms</title>
-      <author><first>Siaw-Fong</first><last>Chung</last></author>
+      <author id="siaw-fong-chung"><first>Siaw-Fong</first><last>Chung</last></author>
       <url hash="02687534">O09-4005</url>
       <bibkey>chung-2009-corpus</bibkey>
     </paper>
@@ -412,8 +412,8 @@
     <meta>
       <booktitle>International Journal of Computational Linguistics &amp; <fixed-case>C</fixed-case>hinese Language Processing, Volume 14, Number 3, September 2009</booktitle>
       <url hash="53aa1822">O09-5</url>
-      <editor><first>Yuen-Hsien</first><last>Tseng</last></editor>
-      <editor><first>Kuang-Hua</first><last>Chen</last></editor>
+      <editor id="yuen-hsien-tseng"><first>Yuen-Hsien</first><last>Tseng</last></editor>
+      <editor id="kuang-hua-chen"><first>Kuang-Hua</first><last>Chen</last></editor>
       <month>September</month>
       <year>2009</year>
       <venue>ijclclp</venue>
@@ -423,20 +423,20 @@
     </frontmatter>
     <paper id="1">
       <title>Modeling <fixed-case>T</fixed-case>aiwanese <fixed-case>POS</fixed-case> Tagging Using Statistical Methods and <fixed-case>M</fixed-case>andarin Training Data</title>
-      <author><first>Un-Gian</first><last>Iunn</last></author>
+      <author id="un-gian-iunn"><first>Un-Gian</first><last>Iunn</last></author>
       <author><first>Jia-hung</first><last>Tai</last></author>
-      <author><first>Kiat-Gak</first><last>Lau</last></author>
-      <author><first>Cheng-yan</first><last>Kao</last></author>
-      <author><first>Keh-jiann</first><last>Chen</last></author>
+      <author id="kiat-gak-lau"><first>Kiat-Gak</first><last>Lau</last></author>
+      <author id="cheng-yan-kao"><first>Cheng-yan</first><last>Kao</last></author>
+      <author id="keh-jiann-chen"><first>Keh-jiann</first><last>Chen</last></author>
       <url hash="dc375731">O09-5001</url>
       <bibkey>iunn-etal-2009-modeling</bibkey>
     </paper>
     <paper id="2">
       <title>A Thesaurus-Based Semantic Classification of <fixed-case>E</fixed-case>nglish Collocations</title>
-      <author><first>Chung-Chi</first><last>Huang</last></author>
+      <author id="chung-chi-huang"><first>Chung-Chi</first><last>Huang</last></author>
       <author><first>Kate H.</first><last>Kao</last></author>
-      <author><first>Chiung-Hui</first><last>Tseng</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="chiung-hui-tseng"><first>Chiung-Hui</first><last>Tseng</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <url hash="749ea709">O09-5002</url>
       <bibkey>huang-etal-2009-thesaurus</bibkey>
     </paper>
@@ -444,14 +444,14 @@
       <title>Automatic Recognition of <fixed-case>C</fixed-case>antonese-<fixed-case>E</fixed-case>nglish Code-Mixing Speech</title>
       <author><first>Joyce Y. C.</first><last>Chan</last></author>
       <author><first>Houwei</first><last>Cao</last></author>
-      <author><first>P. C.</first><last>Ching</last></author>
+      <author id="p-c-ching"><first>P. C.</first><last>Ching</last></author>
       <author><first>Tan</first><last>Lee</last></author>
       <url hash="0b4abb5a">O09-5003</url>
       <bibkey>chan-etal-2009-automatic</bibkey>
     </paper>
     <paper id="4">
       <title>Corpus, Lexicon, and Construction: A Quantitative Corpus Approach to <fixed-case>M</fixed-case>andarin Possessive Construction</title>
-      <author><first>Cheng-Hsien</first><last>Chen</last></author>
+      <author id="alvin-cheng-hsien-chen"><first>Cheng-Hsien</first><last>Chen</last></author>
       <url hash="694ce690">O09-5004</url>
       <bibkey>chen-2009-corpus</bibkey>
     </paper>
@@ -460,8 +460,8 @@
     <meta>
       <booktitle>International Journal of Computational Linguistics &amp; <fixed-case>C</fixed-case>hinese Language Processing, Volume 14, Number 4, <fixed-case>D</fixed-case>ecember 2009</booktitle>
       <url hash="178dc1bc">O09-6</url>
-      <editor><first>Yuen-Hsien</first><last>Tseng</last></editor>
-      <editor><first>Kuang-Hua</first><last>Chen</last></editor>
+      <editor id="yuen-hsien-tseng"><first>Yuen-Hsien</first><last>Tseng</last></editor>
+      <editor id="kuang-hua-chen"><first>Kuang-Hua</first><last>Chen</last></editor>
       <month>December</month>
       <year>2009</year>
       <venue>ijclclp</venue>
@@ -472,22 +472,22 @@
     <paper id="1">
       <title>Tonal Effects on Voice Onset Time</title>
       <author><first>Jui-Feng</first><last>Peng</last></author>
-      <author><first>Li-mei</first><last>Chen</last></author>
+      <author id="li-mei-chen"><first>Li-mei</first><last>Chen</last></author>
       <author><first>Chia-Cheng</first><last>Lee</last></author>
       <url hash="4dfea8a9">O09-6001</url>
       <bibkey>peng-etal-2009-tonal-effects</bibkey>
     </paper>
     <paper id="2">
       <title>A Discrete-cepstrum Based Spectrum-envelope Estimation Scheme and Its Example Application of Voice Transformation</title>
-      <author><first>Hung-Yan</first><last>Gu</last></author>
-      <author><first>Sung-Feng</first><last>Tsai</last></author>
+      <author id="hung-yan-gu"><first>Hung-Yan</first><last>Gu</last></author>
+      <author id="sung-fung-tsai"><first>Sung-Feng</first><last>Tsai</last></author>
       <url hash="4dae43fc">O09-6002</url>
       <bibkey>gu-tsai-2009-discrete</bibkey>
     </paper>
     <paper id="3">
       <title>Identification of Opinion Holders</title>
       <author><first>Lun-Wei</first><last>Ku</last></author>
-      <author><first>Chia-Ying</first><last>Lee</last></author>
+      <author id="chia-ying-lee"><first>Chia-Ying</first><last>Lee</last></author>
       <author><first>Hsin-Hsi</first><last>Chen</last></author>
       <url hash="710025c7">O09-6003</url>
       <bibkey>ku-etal-2009-identification</bibkey>
diff --git a/data/xml/O10.xml b/data/xml/O10.xml
index a5f68936b3..67a3287528 100644
--- a/data/xml/O10.xml
+++ b/data/xml/O10.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the 22nd Conference on Computational Linguistics and Speech Processing (<fixed-case>ROCLING</fixed-case> 2010)</booktitle>
       <editor><first>Shih-Hung</first><last>Wu</last></editor>
-      <editor><first>Jeih-weih</first><last>Hung</last></editor>
+      <editor id="jeih-weih-hung"><first>Jeih-weih</first><last>Hung</last></editor>
       <publisher>The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)</publisher>
       <address>Nantou, Taiwan</address>
       <month>September</month>
@@ -80,7 +80,7 @@
       <title>基於對照表以及語言模型之簡繁字體轉換 (<fixed-case>C</fixed-case>hinese Characters Conversion System based on Lookup Table and Language Model) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Min-Hsiang</first><last>Li</last></author>
       <author><first>Shih-Hung</first><last>Wu</last></author>
-      <author><first>Ping-che</first><last>Yang</last></author>
+      <author id="ping-che-yang"><first>Ping-che</first><last>Yang</last></author>
       <author><first>Tsun</first><last>Ku</last></author>
       <pages>113–127</pages>
       <url hash="4416ac98">O10-1008</url>
@@ -91,17 +91,17 @@
       <author><first>Chao-Shainn</first><last>Huang</last></author>
       <author><first>Yu-Chi</first><last>Chang</last></author>
       <author><first>Chao-Lin</first><last>Liu</last></author>
-      <author><first>Yuan-Hsien</first><last>Tseng</last></author>
+      <author id="yuen-hsien-tseng"><first>Yuan-Hsien</first><last>Tseng</last></author>
       <pages>128–142</pages>
       <url hash="42c212ed">O10-1009</url>
       <bibkey>huang-etal-2010-yi-gong</bibkey>
     </paper>
     <paper id="10">
       <title>Term Contributed Boundary Feature using Conditional Random Fields for <fixed-case>C</fixed-case>hinese Word Segmentation Task</title>
-      <author><first>Tian-Jian</first><last>Jiang</last></author>
+      <author id="mike-tian-jian-jiang"><first>Tian-Jian</first><last>Jiang</last></author>
       <author><first>Shih-Hung</first><last>Liu</last></author>
       <author><first>Cheng-Lung</first><last>Sung</last></author>
-      <author><first>Wen-Lian</first><last>Hsu</last></author>
+      <author id="wen-lian-hsu"><first>Wen-Lian</first><last>Hsu</last></author>
       <pages>143–156</pages>
       <url hash="e9d8fa93">O10-1010</url>
       <bibkey>jiang-etal-2010-term</bibkey>
@@ -111,7 +111,7 @@
       <author><first>Jyun-Wei</first><last>Huang</last></author>
       <author><first>Chia Pei</first><last>Kao</last></author>
       <author><first>Chun-Yu</first><last>Chen</last></author>
-      <author><first>Tzong-Han</first><last>Tsai</last></author>
+      <author id="richard-tzong-han-tsai"><first>Tzong-Han</first><last>Tsai</last></author>
       <pages>157–171</pages>
       <url hash="185593f7">O10-1011</url>
       <bibkey>huang-etal-2010-ji</bibkey>
@@ -123,7 +123,7 @@
       <author><first>Chang-An</first><last>Shih</last></author>
       <author><first>Yen-Ching</first><last>Hsu</last></author>
       <author><first>Pei-Yu</first><last>Hsu</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <pages>172–183</pages>
       <url hash="9404f693">O10-1012</url>
       <bibkey>chen-etal-2010-classifying</bibkey>
@@ -134,7 +134,7 @@
       <author><first>Chien-Liang</first><last>Chen</last></author>
       <author><first>Chun-Chieh</first><last>Liu</last></author>
       <author><first>Chao-Lin</first><last>Liu</last></author>
-      <author><first>Von-Wun</first><last>Soo</last></author>
+      <author id="von-wun-soo"><first>Von-Wun</first><last>Soo</last></author>
       <pages>184–198</pages>
       <url hash="4ff644e5">O10-1013</url>
       <bibkey>sun-etal-2010-zhong</bibkey>
@@ -160,9 +160,9 @@
     </paper>
     <paper id="16">
       <title>可變速中文文字轉語音系統 (Variable Speech Rate <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese Text-to-Speech System) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Qi-Quan</first><last>Huang</last></author>
+      <author id="qi-quan-huang"><first>Qi-Quan</first><last>Huang</last></author>
       <author><first>Chen-Yu</first><last>Chiang</last></author>
-      <author><first>Yih-Ru</first><last>Wang</last></author>
+      <author id="yih-ru-wang"><first>Yih-Ru</first><last>Wang</last></author>
       <author><first>Hsiu-Min</first><last>Yu</last></author>
       <author><first>Sin-Horng</first><last>Chen</last></author>
       <pages>222–235</pages>
@@ -172,7 +172,7 @@
     <paper id="17">
       <title>進階式調變頻譜補償法於強健性語音辨識之研究 (Advanced Modulation Spectrum Compensation Techniques for Robust Speech Recognition) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Wei-Jeih</first><last>Yeh</last></author>
-      <author><first>Wen-hsiang</first><last>Tu</last></author>
+      <author id="wen-hsiang-tu"><first>Wen-hsiang</first><last>Tu</last></author>
       <author><first>Jeih-weih</first><last>Hung</last></author>
       <pages>236–250</pages>
       <url hash="b294fa05">O10-1017</url>
@@ -193,10 +193,10 @@
     </frontmatter>
     <paper id="1">
       <title>Identifying Correction Rules for Auto Editing</title>
-      <author><first>Anta</first><last>Huang</last></author>
+      <author id="an-ta-huang"><first>Anta</first><last>Huang</last></author>
       <author><first>Tsung-Ting</first><last>Kuo</last></author>
       <author><first>Ying-Chun</first><last>Lai</last></author>
-      <author><first>Shou-de</first><last>Lin</last></author>
+      <author id="shou-de-lin"><first>Shou-de</first><last>Lin</last></author>
       <pages>251–265</pages>
       <url hash="ca128244">O10-2001</url>
       <bibkey>huang-etal-2010-identifying</bibkey>
@@ -204,24 +204,24 @@
     <paper id="2">
       <title>台灣學生英文寫作冠詞錯誤分析 (<fixed-case>E</fixed-case>nglish article errors in <fixed-case>T</fixed-case>aiwanese college students’ <fixed-case>EFL</fixed-case> writing)</title>
       <author><first>Neil Edward</first><last>Barrett</last></author>
-      <author><first>Li-mei</first><last>Chen</last></author>
+      <author id="li-mei-chen"><first>Li-mei</first><last>Chen</last></author>
       <pages>266–280</pages>
       <url hash="aa037b84">O10-2002</url>
       <bibkey>barrett-chen-2010-tai</bibkey>
     </paper>
     <paper id="3">
       <title>結合<fixed-case>HMM</fixed-case> 頻譜模型與<fixed-case>ANN</fixed-case> 韻律模型之國語語音合成系統 (A <fixed-case>M</fixed-case>andarin Speech Synthesis System Combining <fixed-case>HMM</fixed-case> Spectrum Model and <fixed-case>ANN</fixed-case> Prosody Model) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Hung-Yan</first><last>Gu</last></author>
+      <author id="hung-yan-gu"><first>Hung-Yan</first><last>Gu</last></author>
       <author><first>Ming-Yen</first><last>Lai</last></author>
-      <author><first>Sung-Fung</first><last>Tsai</last></author>
+      <author id="sung-fung-tsai"><first>Sung-Fung</first><last>Tsai</last></author>
       <pages>281–295</pages>
       <url hash="f5629275">O10-2003</url>
       <bibkey>gu-etal-2010-jie</bibkey>
     </paper>
     <paper id="4">
       <title>美國專利書「獨立項數」之搭配詞初探: 以<fixed-case>L</fixed-case>exis<fixed-case>N</fixed-case>exis 法律資料庫為例 (Collocation Features of Independent Claim in <fixed-case>U</fixed-case>.<fixed-case>S</fixed-case>. Patent Documents: Information Retrieval from <fixed-case>L</fixed-case>exis<fixed-case>N</fixed-case>exis)</title>
-      <author><first>Hsin-Hung</first><last>Lin</last></author>
-      <author><first>Ching-yu</first><last>Hsieh</last></author>
+      <author id="darren-hsin-hung-lin"><first>Hsin-Hung</first><last>Lin</last></author>
+      <author id="shelley-ching-yu-hsieh"><first>Ching-yu</first><last>Hsieh</last></author>
       <pages>296–310</pages>
       <url hash="e27fb39f">O10-2004</url>
       <bibkey>lin-hsieh-2010-mei</bibkey>
@@ -244,7 +244,7 @@
     </paper>
     <paper id="7">
       <title>結合音長與發音特徵於<fixed-case>GTB</fixed-case> 之腔調化語音辨識 (Accented Speech Recognition based on Gradient Tree Boosting with Duration and Articulation Features) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Ming-chin</first><last>Yen</last></author>
+      <author id="ming-chin-yen"><first>Ming-chin</first><last>Yen</last></author>
       <author><first>Po-San</first><last>Lai</last></author>
       <author><first>Jui-Feng</first><last>Yeh</last></author>
       <pages>335–349</pages>
@@ -254,7 +254,7 @@
     <paper id="8">
       <title>Discerning Emotions of Bloggers based on Topics – a Supervised Coreference Approach in <fixed-case>B</fixed-case>engali</title>
       <author><first>Dipankar</first><last>Das</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>350–363</pages>
       <url hash="8ce71d9a">O10-2008</url>
       <bibkey>das-bandyopadhyay-2010-discerning</bibkey>
@@ -262,17 +262,17 @@
     <paper id="9">
       <title>應用直方圖均化於統計式未知詞萃取之研究 (Histogram Equalization for Statistical Unknown Word Extraction) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Yi-Cong</first><last>Chen</last></author>
-      <author><first>Bor-Shen</first><last>Lin</last></author>
+      <author id="bor-shen-lin"><first>Bor-Shen</first><last>Lin</last></author>
       <pages>364–378</pages>
       <url hash="3c1ff26a">O10-2009</url>
       <bibkey>chen-lin-2010-ying</bibkey>
     </paper>
     <paper id="10">
       <title>Qualia Modification in Noun-Noun Compounds: A Cross-Language Survey</title>
-      <author><first>Chih-yao</first><last>Lee</last></author>
+      <author id="chi-yao-lee"><first>Chih-yao</first><last>Lee</last></author>
       <author><first>Chia-hao</first><last>Chang</last></author>
       <author><first>Wei-chieh</first><last>Hsu</last></author>
-      <author><first>Shu-kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-kai</first><last>Hsieh</last></author>
       <pages>379–390</pages>
       <url hash="7f8fc3d6">O10-2010</url>
       <bibkey>lee-etal-2010-qualia</bibkey>
@@ -282,8 +282,8 @@
     <meta>
       <booktitle>International Journal of Computational Linguistics &amp; <fixed-case>C</fixed-case>hinese Language Processing, Volume 15, Number 1, March 2010</booktitle>
       <url hash="69f879b2">O10-3</url>
-      <editor><first>Yuen-Hsien</first><last>Tseng</last></editor>
-      <editor><first>Kuang-hua</first><last>Chen</last></editor>
+      <editor id="yuen-hsien-tseng"><first>Yuen-Hsien</first><last>Tseng</last></editor>
+      <editor id="kuang-hua-chen"><first>Kuang-hua</first><last>Chen</last></editor>
       <month>March</month>
       <year>2010</year>
       <venue>ijclclp</venue>
@@ -304,7 +304,7 @@
       <author><first>Min-Hsiang</first><last>Li</last></author>
       <author><first>Shih-Hung</first><last>Wu</last></author>
       <author><first>Yi-Ching</first><last>Zeng</last></author>
-      <author><first>Ping-che</first><last>Yang</last></author>
+      <author id="ping-che-yang"><first>Ping-che</first><last>Yang</last></author>
       <author><first>Tsun</first><last>Ku</last></author>
       <url hash="962bdf48">O10-3002</url>
       <bibkey>li-etal-2010-ji-yu</bibkey>
@@ -320,7 +320,7 @@
       <title>Cross-Validation and Minimum Generation Error based Decision Tree Pruning for <fixed-case>HMM</fixed-case>-based Speech Synthesis</title>
       <author><first>Heng</first><last>Lu</last></author>
       <author><first>Zhen-Hua</first><last>Ling</last></author>
-      <author><first>Li-Rong</first><last>Dai</last></author>
+      <author id="li-rong-dai"><first>Li-Rong</first><last>Dai</last></author>
       <author><first>Ren-Hua</first><last>Wang</last></author>
       <url hash="91df363a">O10-3004</url>
       <bibkey>lu-etal-2010-cross</bibkey>
@@ -330,8 +330,8 @@
     <meta>
       <booktitle>International Journal of Computational Linguistics &amp; <fixed-case>C</fixed-case>hinese Language Processing, Volume 15, Number 2, June 2010</booktitle>
       <url hash="200a8c5e">O10-4</url>
-      <editor><first>Yuen-Hsien</first><last>Tseng</last></editor>
-      <editor><first>Kuang-hua</first><last>Chen</last></editor>
+      <editor id="yuen-hsien-tseng"><first>Yuen-Hsien</first><last>Tseng</last></editor>
+      <editor id="kuang-hua-chen"><first>Kuang-hua</first><last>Chen</last></editor>
       <month>June</month>
       <year>2010</year>
       <venue>ijclclp</venue>
@@ -342,13 +342,13 @@
     <paper id="1">
       <title>A <fixed-case>P</fixed-case>unjabi to <fixed-case>H</fixed-case>indi Machine Transliteration System</title>
       <author><first>Gurpreet Singh</first><last>Josan</last></author>
-      <author><first>Gurpreet Singh</first><last>Lehal</last></author>
+      <author id="gurpreet-singh-lehal"><first>Gurpreet Singh</first><last>Lehal</last></author>
       <url hash="708be79c">O10-4001</url>
       <bibkey>josan-lehal-2010-punjabi</bibkey>
     </paper>
     <paper id="2">
       <title>A Posteriori Individual Word Language Models for <fixed-case>V</fixed-case>ietnamese Language</title>
-      <author><first>Le Quan</first><last>Ha</last></author>
+      <author id="le-quan-ha"><first>Le Quan</first><last>Ha</last></author>
       <author><first>Tran Thi Thu</first><last>Van</last></author>
       <author><first>Hoang Tien</first><last>Long</last></author>
       <author><first>Nguyen Huu</first><last>Tinh</last></author>
@@ -361,7 +361,7 @@
       <title>Improving the Template Generation for <fixed-case>C</fixed-case>hinese Character Error Detection with Confusion Sets</title>
       <author><first>Yong-Zhi</first><last>Chen</last></author>
       <author><first>Shih-Hung</first><last>Wu</last></author>
-      <author><first>Ping-che</first><last>Yang</last></author>
+      <author id="ping-che-yang"><first>Ping-che</first><last>Yang</last></author>
       <author><first>Tsun</first><last>Ku</last></author>
       <url hash="6836c74e">O10-4003</url>
       <bibkey>chen-etal-2010-improving</bibkey>
@@ -369,9 +369,9 @@
     <paper id="4">
       <title>以最佳化及機率分佈標記形聲字聲符之研究 (Annotating Phonetic Component of <fixed-case>C</fixed-case>hinese Characters Using Constrained Optimization and Pronunciation Distribution) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Chia-Hui</first><last>Chang</last></author>
-      <author><first>Shu-Yen</first><last>Lin</last></author>
+      <author id="shu-yen-lin"><first>Shu-Yen</first><last>Lin</last></author>
       <author><first>Shu-Ying</first><last>Li</last></author>
-      <author><first>Meng-Feng</first><last>Tsai</last></author>
+      <author id="ming-feng-tsai"><first>Meng-Feng</first><last>Tsai</last></author>
       <author><first>Shu-Ping</first><last>Li</last></author>
       <author><first>Hsiang-Mei</first><last>Liao</last></author>
       <author><first>Chih-Wen</first><last>Sun</last></author>
@@ -384,8 +384,8 @@
     <meta>
       <booktitle>International Journal of Computational Linguistics &amp; <fixed-case>C</fixed-case>hinese Language Processing, Volume 15, Number 3-4, September/<fixed-case>D</fixed-case>ecember 2010</booktitle>
       <url hash="199e3f3b">O10-5</url>
-      <editor><first>Yuen-Hsien</first><last>Tseng</last></editor>
-      <editor><first>Kuang-hua</first><last>Chen</last></editor>
+      <editor id="yuen-hsien-tseng"><first>Yuen-Hsien</first><last>Tseng</last></editor>
+      <editor id="kuang-hua-chen"><first>Kuang-hua</first><last>Chen</last></editor>
       <month>September/December</month>
       <year>2010</year>
       <venue>ijclclp</venue>
@@ -403,7 +403,7 @@
     </paper>
     <paper id="2">
       <title>Word Sense Disambiguation Using Multiple Contextual Features</title>
-      <author><first>Liang-Chih</first><last>Yu</last></author>
+      <author id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last></author>
       <author><first>Chung-Hsien</first><last>Wu</last></author>
       <author><first>Jui-Feng</first><last>Yeh</last></author>
       <url hash="08994bf5">O10-5002</url>
@@ -419,10 +419,10 @@
     </paper>
     <paper id="4">
       <title>Discovering Correction Rules for Auto Editing</title>
-      <author><first>An-Ta</first><last>Huang</last></author>
+      <author id="an-ta-huang"><first>An-Ta</first><last>Huang</last></author>
       <author><first>Tsung-Ting</first><last>Kuo</last></author>
       <author><first>Ying-Chun</first><last>Lai</last></author>
-      <author><first>Shou-De</first><last>Lin</last></author>
+      <author id="shou-de-lin"><first>Shou-De</first><last>Lin</last></author>
       <url hash="1cd66f22">O10-5004</url>
       <bibkey>huang-etal-2010-discovering</bibkey>
     </paper>
diff --git a/data/xml/O11.xml b/data/xml/O11.xml
index ade309c131..069b2c9f6d 100644
--- a/data/xml/O11.xml
+++ b/data/xml/O11.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the 23rd Conference on Computational Linguistics and Speech Processing (<fixed-case>ROCLING</fixed-case> 2011)</booktitle>
       <url hash="b2c474cc">O11-1</url>
       <editor><first>Wei-Ho</first><last>Tsai</last></editor>
-      <editor><first>Liang-Chih</first><last>Yu</last></editor>
+      <editor id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last></editor>
       <publisher>The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)</publisher>
       <address>Taipei, Taiwan</address>
       <month>September</month>
@@ -29,8 +29,8 @@
     <paper id="2">
       <title>Compensating the Speech Features via Discrete Cosine Transform for Robust Speech Recognition (基於離散餘弦轉換之語音特徵的強健性補償法)</title>
       <author><first>Hsin-Ju</first><last>Hsieh</last></author>
-      <author><first>Wen-hsiang</first><last>Tu</last></author>
-      <author><first>Jeih-weih</first><last>Hung</last></author>
+      <author id="wen-hsiang-tu"><first>Wen-hsiang</first><last>Tu</last></author>
+      <author id="jeih-weih-hung"><first>Jeih-weih</first><last>Hung</last></author>
       <pages>21–42</pages>
       <url hash="bd1d0523">O11-1002</url>
       <bibkey>hsieh-etal-2011-compensating</bibkey>
@@ -54,11 +54,11 @@
     <paper id="5">
       <title>片語式機器翻譯中未知詞與落單字的問題探討 (Learning to Deal with the <fixed-case>OOV</fixed-case> Problem in Phrase-based <fixed-case>MT</fixed-case> System) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Ming-Chuan</first><last>Chiang</last></author>
-      <author><first>Chung-Chi</first><last>Huang</last></author>
+      <author id="chung-chi-huang"><first>Chung-Chi</first><last>Huang</last></author>
       <author><first>“He,Ho-Ching”</first><last>Yen</last></author>
-      <author><first>Shih-Ting</first><last>Huang</last></author>
+      <author id="shih-ting-huang"><first>Shih-Ting</first><last>Huang</last></author>
       <author><first>Chun-Sheng</first><last>Chang</last></author>
-      <author><first>Ping-Che</first><last>Yang</last></author>
+      <author id="ping-che-yang"><first>Ping-Che</first><last>Yang</last></author>
       <author><first>Tsun</first><last>Ku</last></author>
       <pages>79–93</pages>
       <url hash="531c1081">O11-1005</url>
@@ -76,11 +76,11 @@
     </paper>
     <paper id="7">
       <title>Unsupervised Overlapping Feature Selection for Conditional Random Fields Learning in <fixed-case>C</fixed-case>hinese Word Segmentation</title>
-      <author><first>Ting-hao</first><last>Yang</last></author>
-      <author><first>Tian-Jian</first><last>Jiang</last></author>
-      <author><first>Chan-hung</first><last>Kuo</last></author>
-      <author><first>Richard Tzong-han</first><last>Tsai</last></author>
-      <author><first>Wen-lian</first><last>Hsu</last></author>
+      <author id="ting-hao-yang"><first>Ting-hao</first><last>Yang</last></author>
+      <author id="mike-tian-jian-jiang"><first>Tian-Jian</first><last>Jiang</last></author>
+      <author id="chan-hung-kuo"><first>Chan-hung</first><last>Kuo</last></author>
+      <author id="richard-tzong-han-tsai"><first>Richard Tzong-han</first><last>Tsai</last></author>
+      <author id="wen-lian-hsu"><first>Wen-lian</first><last>Hsu</last></author>
       <pages>109–122</pages>
       <url hash="2b0348ae">O11-1007</url>
       <bibkey>yang-etal-2011-unsupervised</bibkey>
@@ -97,8 +97,8 @@
     </paper>
     <paper id="9">
       <title>動補結構的及物性及修飾對象 (Transitivity of a <fixed-case>C</fixed-case>hinese Verb-result Compound and Affected Argument of the Result Verb) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>You-shan</first><last>Chung</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="you-shan-chung"><first>You-shan</first><last>Chung</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <pages>139–150</pages>
       <url hash="3e2dc389">O11-1009</url>
       <bibkey>chung-chen-2011-dong</bibkey>
@@ -115,7 +115,7 @@
     <paper id="11">
       <title>聲符部件排序與形聲字發音規則探勘 (Pronunciation Rules Discovery for Picto-Phonetic <fixed-case>C</fixed-case>hinese Characters) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Chia-Hui</first><last>Chang</last></author>
-      <author><first>Shu-Yen</first><last>Lin</last></author>
+      <author id="shu-yen-lin"><first>Shu-Yen</first><last>Lin</last></author>
       <pages>166–178</pages>
       <url hash="a46cfa92">O11-1011</url>
       <bibkey>chang-lin-2011-sheng</bibkey>
@@ -126,7 +126,7 @@
       <author><first>Jing-Chen</first><last>Yang</last></author>
       <author><first>Yu-Yun</first><last>Chang</last></author>
       <author><first>Yu-Wen</first><last>Liu</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <pages>179–193</pages>
       <url hash="763a1e7c">O11-1012</url>
       <bibkey>wang-etal-2011-frequency</bibkey>
@@ -150,7 +150,7 @@
       <author><first>Wen-Yi</first><last>Chu</last></author>
       <author><first>Yu-Chen</first><last>Kao</last></author>
       <author><first>Berlin</first><last>Chen</last></author>
-      <author><first>Jeih-Weih</first><last>Hung</last></author>
+      <author id="jeih-weih-hung"><first>Jeih-Weih</first><last>Hung</last></author>
       <pages>194–206</pages>
       <url hash="3d3d6ba1">O11-2001</url>
       <bibkey>chu-etal-2011-ji</bibkey>
@@ -166,8 +166,8 @@
     </paper>
     <paper id="3">
       <title>使用分段式<fixed-case>GMM</fixed-case> 及自動<fixed-case>GMM</fixed-case> 挑選之語音轉換方法 (A Voice Conversion Method Using Segmental <fixed-case>GMM</fixed-case>s and Automatic <fixed-case>GMM</fixed-case> Selection) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Hung-Yan</first><last>Gu</last></author>
-      <author><first>Sung-Fung</first><last>Tsai</last></author>
+      <author id="hung-yan-gu"><first>Hung-Yan</first><last>Gu</last></author>
+      <author id="sung-fung-tsai"><first>Sung-Fung</first><last>Tsai</last></author>
       <pages>216–226</pages>
       <url hash="650cba85">O11-2003</url>
       <bibkey>gu-tsai-2011-shi</bibkey>
@@ -220,7 +220,7 @@
     <paper id="9">
       <title>結合語言模型與網路知識源於列印前檢查 (Print Pickets Combined Language Models and Knowledge Resources in Web) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Yu-Jui</first><last>Huang</last></author>
-      <author><first>Ming-Chin</first><last>Yen</last></author>
+      <author id="ming-chin-yen"><first>Ming-Chin</first><last>Yen</last></author>
       <author><first>Guan-Huei</first><last>Wu</last></author>
       <author><first>Yao-Yi</first><last>Wang</last></author>
       <author><first>Jui-Feng</first><last>Yeh</last></author>
@@ -230,7 +230,7 @@
     <paper id="10">
       <title>Diagnosing Discoursal Organization in Learner Writing via Conjunctive Adverbials (診斷學習者英語寫作篇章結構：以篇章連接副詞為例)</title>
       <author><first>Tung-yu</first><last>Kao</last></author>
-      <author><first>Li-mei</first><last>Chen</last></author>
+      <author id="li-mei-chen"><first>Li-mei</first><last>Chen</last></author>
       <pages>310–322</pages>
       <url hash="70198af4">O11-2010</url>
       <bibkey>kao-chen-2011-diagnosing</bibkey>
@@ -255,7 +255,7 @@
       <author><first>Chaio-Wen</first><last>Hsieh</last></author>
       <author><first>Wei-Hsuan</first><last>Lin</last></author>
       <author><first>Chun-Yi</first><last>Liu</last></author>
-      <author><first>Liang-Chih</first><last>Yu</last></author>
+      <author id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last></author>
       <pages>349–360</pages>
       <url hash="ebe391e2">O11-2013</url>
       <bibkey>wu-etal-2011-duo</bibkey>
@@ -265,8 +265,8 @@
     <meta>
       <booktitle>International Journal of Computational Linguistics &amp; <fixed-case>C</fixed-case>hinese Language Processing, Volume 16, Number 1-2, March/June 2011</booktitle>
       <url hash="44ed23e0">O11-3</url>
-      <editor><first>Yuen-Hsien</first><last>Tseng</last></editor>
-      <editor><first>Kuang-hua</first><last>Chen</last></editor>
+      <editor id="yuen-hsien-tseng"><first>Yuen-Hsien</first><last>Tseng</last></editor>
+      <editor id="kuang-hua-chen"><first>Kuang-hua</first><last>Chen</last></editor>
       <month>March/June</month>
       <year>2011</year>
       <venue>ijclclp</venue>
@@ -310,8 +310,8 @@
     <meta>
       <booktitle>International Journal of Computational Linguistics &amp; <fixed-case>C</fixed-case>hinese Language Processing, Volume 16, Number 3-4, September/<fixed-case>D</fixed-case>ecember 2011</booktitle>
       <url hash="2545a1b2">O11-4</url>
-      <editor><first>Yuen-Hsien</first><last>Tseng</last></editor>
-      <editor><first>Kuang-hua</first><last>Chen</last></editor>
+      <editor id="yuen-hsien-tseng"><first>Yuen-Hsien</first><last>Tseng</last></editor>
+      <editor id="kuang-hua-chen"><first>Kuang-hua</first><last>Chen</last></editor>
       <month>September/December</month>
       <year>2011</year>
       <venue>ijclclp</venue>
@@ -322,20 +322,20 @@
     <paper id="1">
       <title><fixed-case>E</fixed-case>nglish Article Errors in <fixed-case>T</fixed-case>aiwanese College Students’ <fixed-case>EFL</fixed-case> Writing</title>
       <author><first>Neil Edward</first><last>Barrett</last></author>
-      <author><first>Li-mei</first><last>Chen</last></author>
+      <author id="li-mei-chen"><first>Li-mei</first><last>Chen</last></author>
       <url hash="c340e196">O11-4001</url>
       <bibkey>barrett-chen-2011-english</bibkey>
     </paper>
     <paper id="2">
       <title>基於辭典辭彙釋義之多階層釋義關聯程度計量-以「目」字部為例 (A Measurement of Multi-Level Semantic Relations among <fixed-case>M</fixed-case>andarin Lexemes with Radical mu4: A Study based on Dictionary Explanations)</title>
-      <author><first>F. Y. August</first><last>Chao</last></author>
-      <author><first>Siaw-Fong</first><last>Chung</last></author>
+      <author id="f-y-august-chao"><first>F. Y. August</first><last>Chao</last></author>
+      <author id="siaw-fong-chung"><first>Siaw-Fong</first><last>Chung</last></author>
       <url hash="fdd0478a">O11-4002</url>
       <bibkey>chao-chung-2011-ji</bibkey>
     </paper>
     <paper id="3">
       <title>Histogram Equalization on Statistical Approaches for <fixed-case>C</fixed-case>hinese Unknown Word Extraction</title>
-      <author><first>Bor-Shen</first><last>Lin</last></author>
+      <author id="bor-shen-lin"><first>Bor-Shen</first><last>Lin</last></author>
       <author><first>Yi-Cong</first><last>Chen</last></author>
       <url hash="69bb2747">O11-4003</url>
       <bibkey>lin-chen-2011-histogram</bibkey>
@@ -349,8 +349,8 @@
     </paper>
     <paper id="5">
       <title>Characteristics of Independent Claim: A Corpus-Linguistic Approach to Contemporary <fixed-case>E</fixed-case>nglish Patents</title>
-      <author><first>Darren Hsin-Hung</first><last>Lin</last></author>
-      <author><first>Shelley Ching-Yu</first><last>Hsieh</last></author>
+      <author id="darren-hsin-hung-lin"><first>Darren Hsin-Hung</first><last>Lin</last></author>
+      <author id="shelley-ching-yu-hsieh"><first>Shelley Ching-Yu</first><last>Hsieh</last></author>
       <url hash="25c6a551">O11-4005</url>
       <bibkey>lin-hsieh-2011-characteristics</bibkey>
     </paper>
diff --git a/data/xml/O12.xml b/data/xml/O12.xml
index 84f9256c16..757a4f5573 100644
--- a/data/xml/O12.xml
+++ b/data/xml/O12.xml
@@ -4,8 +4,8 @@
     <meta>
       <booktitle>Proceedings of the 24th Conference on Computational Linguistics and Speech Processing (<fixed-case>ROCLING</fixed-case> 2012)</booktitle>
       <url hash="3d6261c8">O12-1</url>
-      <editor><first>Richard Tzong-Han</first><last>Tsai</last></editor>
-      <editor><first>Liang-Chih</first><last>Yu</last></editor>
+      <editor id="richard-tzong-han-tsai"><first>Richard Tzong-Han</first><last>Tsai</last></editor>
+      <editor id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last></editor>
       <publisher>The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)</publisher>
       <address>Chung-Li, Taiwan</address>
       <month>September</month>
@@ -19,7 +19,7 @@
     <paper id="1">
       <title>改良式統計圖等化法強鍵性語音辨識之研究 (Improved Histogram Equalization Methods for Robust Speech Recognition) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Hsin-Ju</first><last>Hsieh</last></author>
-      <author><first>Jeih-weih</first><last>Hung</last></author>
+      <author id="jeih-weih-hung"><first>Jeih-weih</first><last>Hung</last></author>
       <author><first>Berlin</first><last>Chen</last></author>
       <pages>1–2</pages>
       <url hash="7fea410d">O12-1001</url>
@@ -27,7 +27,7 @@
     </paper>
     <paper id="2">
       <title>以線性多變量迴歸來對映分段後音框之語音轉換方法 (A Voice Conversion Method Mapping Segmented Frames with Linear Multivariate Regression) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Hung-Yan</first><last>Gu</last></author>
+      <author id="hung-yan-gu"><first>Hung-Yan</first><last>Gu</last></author>
       <author><first>Jia-Wei</first><last>Chang</last></author>
       <author><first>Zan-Wei</first><last>Wang</last></author>
       <pages>3–14</pages>
@@ -36,7 +36,7 @@
     </paper>
     <paper id="3">
       <title>Acoustic Variability in the Speech of Children with Cerebral Palsy</title>
-      <author><first>Li-mei</first><last>Chen</last></author>
+      <author id="li-mei-chen"><first>Li-mei</first><last>Chen</last></author>
       <author><first>Han-chih</first><last>Ni</last></author>
       <author><first>Tzu-Wen</first><last>Kuo</last></author>
       <author><first>Kuei-Ling</first><last>Hsu</last></author>
@@ -47,7 +47,7 @@
     <paper id="4">
       <title>領域相關詞彙極性分析及文件情緒分類之研究 (Domain Dependent Word Polarity Analysis for Sentiment Classification) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Ho-Cheng</first><last>Yu</last></author>
-      <author><first>Ting-Hao</first><last>Huang</last></author>
+      <author id="ting-hao-huang"><first>Ting-Hao</first><last>Huang</last></author>
       <author><first>Hsin-Hsi</first><last>Chen</last></author>
       <pages>30–31</pages>
       <url hash="7ec7af16">O12-1004</url>
@@ -63,9 +63,9 @@
     </paper>
     <paper id="6">
       <title>Associating Collocations with <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Senses Using Hybrid Models</title>
-      <author><first>Yi-Chun</first><last>Chen</last></author>
+      <author id="yichun-chen"><first>Yi-Chun</first><last>Chen</last></author>
       <author><first>Tzu-Xi</first><last>Yen</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>47–60</pages>
       <url hash="21b403bc">O12-1006</url>
       <bibkey>chen-etal-2012-associating</bibkey>
@@ -73,7 +73,7 @@
     <paper id="7">
       <title>Measuring Individual Differences in Word Recognition: The Role of Individual Lexical Behaviors</title>
       <author><first>Hsin-Ni</first><last>Lin</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <author><first>Shiao-Hui</first><last>Chan</last></author>
       <pages>61–74</pages>
       <url hash="d778d1f9">O12-1007</url>
@@ -92,7 +92,7 @@
     <paper id="9">
       <title>基於決策樹演算法之台語連音變調預估模組 (A Prediction Module for <fixed-case>T</fixed-case>aiwanese Tone Sandhi Based on the Decision Tree Algorithm) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Neng-Huang</first><last>Pan</last></author>
-      <author><first>Ming-Shing</first><last>Yu</last></author>
+      <author id="ming-shing-yu"><first>Ming-Shing</first><last>Yu</last></author>
       <author><first>Pei-Chun</first><last>Tsai</last></author>
       <pages>92–101</pages>
       <url hash="93c4ae70">O12-1009</url>
@@ -101,10 +101,10 @@
     <paper id="10">
       <title>台語文字與語音語料庫之建置 (Development of a <fixed-case>T</fixed-case>aiwanese Speech and Text Corpus) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Tzu-Yu</first><last>Liao</last></author>
-      <author><first>Ren-yuan</first><last>Lyu</last></author>
+      <author id="ren-yuan-lyu"><first>Ren-yuan</first><last>Lyu</last></author>
       <author><first>Ming-Tat</first><last>Ko</last></author>
-      <author><first>Yuang-chin</first><last>Chiang</last></author>
-      <author><first>Jyh-Shing</first><last>Jang</last></author>
+      <author id="yuang-chin-chiang"><first>Yuang-chin</first><last>Chiang</last></author>
+      <author id="jyh-shing-roger-jang"><first>Jyh-Shing</first><last>Jang</last></author>
       <pages>102–111</pages>
       <url hash="68449242">O12-1010</url>
       <bibkey>liao-etal-2012-tai</bibkey>
@@ -133,15 +133,15 @@
       <author><first>Tung-Jia</first><last>Chang</last></author>
       <author><first>Chueh-An</first><last>Yen</last></author>
       <author><first>Chao-Ju</first><last>Chen</last></author>
-      <author><first>Shou-de</first><last>Lin</last></author>
+      <author id="shou-de-lin"><first>Shou-de</first><last>Lin</last></author>
       <pages>140–141</pages>
       <url hash="13a6e30d">O12-1013</url>
       <bibkey>lin-etal-2012-li</bibkey>
     </paper>
     <paper id="14">
       <title>Detecting and Correcting Syntactic Errors in Machine Translation Using Feature-Based <fixed-case>L</fixed-case>exicalized <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars</title>
-      <author><first>Wei-Yun</first><last>Ma</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="wei-yun-ma"><first>Wei-Yun</first><last>Ma</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>142–143</pages>
       <url hash="ecf00544">O12-1014</url>
       <bibkey>ma-mckeown-2012-detecting</bibkey>
@@ -206,8 +206,8 @@
       <title>Disambiguating Main <fixed-case>POS</fixed-case> tags for <fixed-case>T</fixed-case>urkish</title>
       <author><first>Razieh</first><last>Ehsani</last></author>
       <author><first>Muzaffer Ege</first><last>Alper</last></author>
-      <author><first>Gülşen</first><last>Eryiğit</last></author>
-      <author><first>Eşref</first><last>Adali</last></author>
+      <author id="gulsen-eryigit"><first>Gülşen</first><last>Eryiğit</last></author>
+      <author id="esref-adali"><first>Eşref</first><last>Adali</last></author>
       <pages>202–213</pages>
       <url hash="8a74218a">O12-1021</url>
       <bibkey>ehsani-etal-2012-disambiguating</bibkey>
@@ -216,9 +216,9 @@
       <title>台語朗讀資料庫之自動切音技術應用於音文同步有聲書之建立 (Automatic Time Alignment for a <fixed-case>T</fixed-case>aiwanese Read Speech Corpus and its Application to Constructing Audiobooks with Text-Speech Synchronization) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Wei-jay</first><last>Huang</last></author>
       <author><first>Jhih-rou</first><last>Lin</last></author>
-      <author><first>Ren-yuan</first><last>Lyu</last></author>
-      <author><first>Yuang-chin</first><last>Chiang</last></author>
-      <author><first>Jyh-Shing Roger</first><last>Jang</last></author>
+      <author id="ren-yuan-lyu"><first>Ren-yuan</first><last>Lyu</last></author>
+      <author id="yuang-chin-chiang"><first>Yuang-chin</first><last>Chiang</last></author>
+      <author id="jyh-shing-roger-jang"><first>Jyh-Shing Roger</first><last>Jang</last></author>
       <author><first>Ming-Tat</first><last>Ko</last></author>
       <pages>214–230</pages>
       <url hash="a4ba400f">O12-1022</url>
@@ -237,9 +237,9 @@
       <title>Translating Collocation using Monolingual and Parallel Corpus</title>
       <author><first>Ming-Zhuan</first><last>Jiang</last></author>
       <author><first>Tzu-Xi</first><last>Yen</last></author>
-      <author><first>Chung-Chi</first><last>Huang</last></author>
-      <author><first>Mei-Hua</first><last>Chen</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="chung-chi-huang"><first>Chung-Chi</first><last>Huang</last></author>
+      <author id="mei-hua-chen"><first>Mei-Hua</first><last>Chen</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>246–260</pages>
       <url hash="607d8f4d">O12-1024</url>
       <bibkey>jiang-etal-2012-translating</bibkey>
@@ -250,7 +250,7 @@
       <author><first>Bilel</first><last>Elayeb</last></author>
       <author><first>Ibrahim</first><last>Bounhas</last></author>
       <author><first>Fabrice</first><last>Evrard</last></author>
-      <author><first>Narjès Bellamine Ben</first><last>Saoud</last></author>
+      <author id="narjes-bellamine-ben-saoud"><first>Narjès Bellamine Ben</first><last>Saoud</last></author>
       <pages>261–275</pages>
       <url hash="f98a92ae">O12-1025</url>
       <bibkey>khiroun-etal-2012-possibilistic</bibkey>
@@ -258,7 +258,7 @@
     <paper id="26">
       <title>利用關聯式規則解決台語文轉音系統中一詞多音之歧異 (Applying Association Rules in Solving the Polysemy Problem in a <fixed-case>C</fixed-case>hinese to <fixed-case>T</fixed-case>aiwanese <fixed-case>TTS</fixed-case> System) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Yih-Jeng</first><last>Lin</last></author>
-      <author><first>Ming-Shing</first><last>Yu</last></author>
+      <author id="ming-shing-yu"><first>Ming-Shing</first><last>Yu</last></author>
       <author><first>Wei-Lun</first><last>Li</last></author>
       <pages>276–291</pages>
       <url hash="f32482f9">O12-1026</url>
@@ -269,7 +269,7 @@
       <author><first>Yu-Hao</first><last>Lin</last></author>
       <author><first>Yu-Lan</first><last>Liu</last></author>
       <author><first>Tzu-Xi</first><last>Yen</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>292–306</pages>
       <url hash="ad518564">O12-1027</url>
       <bibkey>lin-etal-2012-context</bibkey>
@@ -278,7 +278,7 @@
       <title>Implementation of <fixed-case>M</fixed-case>alayalam Morphological Analyzer Based on Hybrid Approach</title>
       <author><first>Vinod P.</first><last>M</last></author>
       <author><first>Jayan</first><last>V</last></author>
-      <author><first>Bhadran V.</first><last>K</last></author>
+      <author id="bhadran-v-k"><first>Bhadran V.</first><last>K</last></author>
       <pages>307–317</pages>
       <url hash="208f3614">O12-1028</url>
       <bibkey>m-etal-2012-implementation</bibkey>
@@ -290,7 +290,7 @@
       <author><first>Swapan</first><last>Debbarma</last></author>
       <author><first>Dipankar</first><last>Das</last></author>
       <author><first>Amitava</first><last>Das</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>318–325</pages>
       <url hash="856df621">O12-1029</url>
       <bibkey>patra-etal-2012-light</bibkey>
@@ -299,9 +299,9 @@
       <title>台語關鍵詞辨識之實作與比較 (Implementation and Comparison of Keyword Spotting for <fixed-case>T</fixed-case>aiwanese) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Chung-Che</first><last>Wang</last></author>
       <author><first>Che-Hsuan</first><last>Chou</last></author>
-      <author><first>Liang-Yu</first><last>Chen</last></author>
+      <author id="liang-yu-chen"><first>Liang-Yu</first><last>Chen</last></author>
       <author><first>Yu-Jhe</first><last>Li</last></author>
-      <author><first>Jyh-Shing</first><last>Jang</last></author>
+      <author id="jyh-shing-roger-jang"><first>Jyh-Shing</first><last>Jang</last></author>
       <author><first>Hsun-Cheng</first><last>Hu</last></author>
       <author><first>Shih-Peng</first><last>Lin</last></author>
       <author><first>You-Lian</first><last>Huang</last></author>
@@ -328,7 +328,7 @@
     <paper id="33">
       <title>Collaborative Annotation and Visualization of Functional and Discourse Structures</title>
       <author><first>Hengbin</first><last>Yan</last></author>
-      <author><first>Jonathan</first><last>Webster</last></author>
+      <author id="jonathan-j-webster"><first>Jonathan</first><last>Webster</last></author>
       <pages>366–374</pages>
       <url hash="fcf9470b">O12-1033</url>
       <bibkey>yan-webster-2012-collaborative</bibkey>
@@ -339,7 +339,7 @@
       <author><first>Shih-Hung</first><last>Wu</last></author>
       <author><first>Liang-Pu</first><last>Chen</last></author>
       <author><first>Wen-Tai</first><last>Hsieh</last></author>
-      <author><first>Seng-Cho T.</first><last>Chou</last></author>
+      <author id="seng-cho-t-chou"><first>Seng-Cho T.</first><last>Chou</last></author>
       <pages>375–384</pages>
       <url hash="1bfeeebb">O12-1034</url>
       <bibkey>yang-etal-2012-ji</bibkey>
@@ -349,8 +349,8 @@
     <meta>
       <booktitle>International Journal of Computational Linguistics &amp; <fixed-case>C</fixed-case>hinese Language Processing, Volume 17, Number 1, March 2012</booktitle>
       <url hash="39f947ac">O12-2</url>
-      <editor><first>Yuen-Hsien</first><last>Tseng</last></editor>
-      <editor><first>Kuang-hua</first><last>Chen</last></editor>
+      <editor id="yuen-hsien-tseng"><first>Yuen-Hsien</first><last>Tseng</last></editor>
+      <editor id="kuang-hua-chen"><first>Kuang-hua</first><last>Chen</last></editor>
       <month>March</month>
       <year>2012</year>
       <venue>ijclclp</venue>
@@ -370,8 +370,8 @@
     <paper id="2">
       <title>可變速中文文字轉語音系統 (Variable Speech Rate <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese Text-to-Speech System) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Chen-Yu</first><last>Chiang</last></author>
-      <author><first>Qi-Quan</first><last>Huang</last></author>
-      <author><first>Yih-Ru</first><last>Wang</last></author>
+      <author id="qi-quan-huang"><first>Qi-Quan</first><last>Huang</last></author>
+      <author id="yih-ru-wang"><first>Yih-Ru</first><last>Wang</last></author>
       <author><first>Hsiu-Min</first><last>Yu</last></author>
       <author><first>Sin-Horng</first><last>Chen</last></author>
       <url hash="fe6cacac">O12-2002</url>
@@ -379,7 +379,7 @@
     </paper>
     <paper id="3">
       <title>The Polysemy Problem, an Important Issue in a <fixed-case>C</fixed-case>hinese to <fixed-case>T</fixed-case>aiwanese <fixed-case>TTS</fixed-case> System</title>
-      <author><first>Ming-Shing</first><last>Yu</last></author>
+      <author id="ming-shing-yu"><first>Ming-Shing</first><last>Yu</last></author>
       <author><first>Yih-Jeng</first><last>Lin</last></author>
       <url hash="e6eac033">O12-2003</url>
       <bibkey>yu-lin-2012-polysemy</bibkey>
@@ -396,7 +396,7 @@
     <meta>
       <booktitle>International Journal of Computational Linguistics &amp; <fixed-case>C</fixed-case>hinese Language Processing, Volume 17, Number 2, June 2012—Special Issue on Selected Papers from <fixed-case>ROCLING</fixed-case> <fixed-case>XXIII</fixed-case></booktitle>
       <url hash="a42977d8">O12-3</url>
-      <editor><first>Liang-Chih</first><last>Yu</last></editor>
+      <editor id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last></editor>
       <editor><first>Wei-Ho</first><last>Tsai</last></editor>
       <month>June</month>
       <year>2012</year>
@@ -407,8 +407,8 @@
     </frontmatter>
     <paper id="1">
       <title>Transitivity of a <fixed-case>C</fixed-case>hinese Verb-Result Compound and Affected Argument of the Result Verb</title>
-      <author><first>You-shan</first><last>Chung</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="you-shan-chung"><first>You-shan</first><last>Chung</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <url hash="03a4856f">O12-3001</url>
       <bibkey>chung-chen-2012-transitivity</bibkey>
     </paper>
@@ -426,7 +426,7 @@
       <author><first>Jing-Chen</first><last>Yang</last></author>
       <author><first>Yu-Yun</first><last>Chang</last></author>
       <author><first>Yu-Wen</first><last>Liu</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <url hash="be30cd86">O12-3003</url>
       <bibkey>wang-etal-2012-frequency</bibkey>
     </paper>
@@ -441,8 +441,8 @@
     <meta>
       <booktitle>International Journal of Computational Linguistics &amp; <fixed-case>C</fixed-case>hinese Language Processing, Volume 17, Number 3, September 2012</booktitle>
       <url hash="2b634309">O12-4</url>
-      <editor><first>Yuen-Hsien</first><last>Tseng</last></editor>
-      <editor><first>Kuang-hua</first><last>Chen</last></editor>
+      <editor id="yuen-hsien-tseng"><first>Yuen-Hsien</first><last>Tseng</last></editor>
+      <editor id="kuang-hua-chen"><first>Kuang-hua</first><last>Chen</last></editor>
       <month>September</month>
       <year>2012</year>
       <venue>ijclclp</venue>
@@ -461,8 +461,8 @@
     <paper id="2">
       <title>聲符部件排序與形聲字發音規則探勘 (Phonetic Component Ranking and Pronunciation Rules Discovery for Picto-Phonetic <fixed-case>C</fixed-case>hinese Characters) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Chia-Hui</first><last>Chang</last></author>
-      <author><first>Shu-Yen</first><last>Lin</last></author>
-      <author><first>Meng-Feng</first><last>Tsai</last></author>
+      <author id="shu-yen-lin"><first>Shu-Yen</first><last>Lin</last></author>
+      <author id="ming-feng-tsai"><first>Meng-Feng</first><last>Tsai</last></author>
       <author><first>Shu-Ping</first><last>Li</last></author>
       <author><first>Hsiang-Mei</first><last>Liao</last></author>
       <author><first>Norden E.</first><last>Huang</last></author>
@@ -471,12 +471,12 @@
     </paper>
     <paper id="3">
       <title>Enhancement of Feature Engineering for Conditional Random Field Learning in <fixed-case>C</fixed-case>hinese Word Segmentation Using Unlabeled Data</title>
-      <author><first>Mike Tian-Jian</first><last>Jiang</last></author>
+      <author id="mike-tian-jian-jiang"><first>Mike Tian-Jian</first><last>Jiang</last></author>
       <author><first>Cheng-Wei</first><last>Shih</last></author>
-      <author><first>Ting-Hao</first><last>Yang</last></author>
-      <author><first>Chan-Hung</first><last>Kuo</last></author>
-      <author><first>Richard Tzong-Han</first><last>Tsai</last></author>
-      <author><first>Wen-Lian</first><last>Hsu</last></author>
+      <author id="ting-hao-yang"><first>Ting-Hao</first><last>Yang</last></author>
+      <author id="chan-hung-kuo"><first>Chan-Hung</first><last>Kuo</last></author>
+      <author id="richard-tzong-han-tsai"><first>Richard Tzong-Han</first><last>Tsai</last></author>
+      <author id="wen-lian-hsu"><first>Wen-Lian</first><last>Hsu</last></author>
       <url hash="057097cb">O12-4003</url>
       <bibkey>jiang-etal-2012-enhancement</bibkey>
     </paper>
@@ -500,11 +500,11 @@
     <meta>
       <booktitle>International Journal of Computational Linguistics &amp; <fixed-case>C</fixed-case>hinese Language Processing, Volume 17, Number 4, <fixed-case>D</fixed-case>ecember 2012-Special Issue on Selected Papers from <fixed-case>ROCLING</fixed-case> <fixed-case>XXIV</fixed-case></booktitle>
       <url hash="23737482">O12-5</url>
-      <editor><first>Liang-Chih</first><last>Yu</last></editor>
-      <editor><first>Richard Tzong-Han</first><last>Tsai</last></editor>
+      <editor id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last></editor>
+      <editor id="richard-tzong-han-tsai"><first>Richard Tzong-Han</first><last>Tsai</last></editor>
       <editor><first>Chia-Ping</first><last>Chen</last></editor>
       <editor><first>Cheng-Zen</first><last>Yang</last></editor>
-      <editor><first>Shu-Kai</first><last>Hsieh</last></editor>
+      <editor id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></editor>
       <month>December</month>
       <year>2012</year>
       <venue>ijclclp</venue>
@@ -514,8 +514,8 @@
     </frontmatter>
     <paper id="1">
       <title>Detecting and Correcting Syntactic Errors in Machine Translation Using Feature-Based <fixed-case>L</fixed-case>exicalized <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars</title>
-      <author><first>Wei-Yun</first><last>Ma</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="wei-yun-ma"><first>Wei-Yun</first><last>Ma</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <url hash="14369cac">O12-5001</url>
       <bibkey>ma-mckeown-2012-detecting-correcting</bibkey>
     </paper>
@@ -530,7 +530,7 @@
     <paper id="3">
       <title>領域相關詞彙極性分析及文件情緒分類之研究 (Domain Dependent Word Polarity Analysis for Sentiment Classification) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Ho-Cheng</first><last>Yu</last></author>
-      <author><first>Ting-Hao Kenneth</first><last>Huang</last></author>
+      <author id="ting-hao-huang"><first>Ting-Hao Kenneth</first><last>Huang</last></author>
       <author><first>Hsin-Hsi</first><last>Chen</last></author>
       <url hash="b7d7524a">O12-5003</url>
       <bibkey>yu-etal-2012-ling-yu</bibkey>
@@ -542,14 +542,14 @@
       <author><first>Tung-Jia</first><last>Chang</last></author>
       <author><first>Chueh-An</first><last>Yen</last></author>
       <author><first>Chao-Ju</first><last>Chen</last></author>
-      <author><first>Shou-de</first><last>Lin</last></author>
+      <author id="shou-de-lin"><first>Shou-de</first><last>Lin</last></author>
       <url hash="13a23d5e">O12-5004</url>
       <bibkey>lin-etal-2012-li-yong-ji</bibkey>
     </paper>
     <paper id="5">
       <title>語音辨識使用統計圖等化方法 (Speech Recognition Leveraging Histogram Equalization Methods) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Hsin-Ju</first><last>Hsieh</last></author>
-      <author><first>Jeih-weih</first><last>Hung</last></author>
+      <author id="jeih-weih-hung"><first>Jeih-weih</first><last>Hung</last></author>
       <author><first>Berlin</first><last>Chen</last></author>
       <url hash="50129343">O12-5005</url>
       <bibkey>hsieh-etal-2012-yu</bibkey>
diff --git a/data/xml/O13.xml b/data/xml/O13.xml
index 892ebc6cbc..56c0688ccc 100644
--- a/data/xml/O13.xml
+++ b/data/xml/O13.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the 25th Conference on Computational Linguistics and Speech Processing (<fixed-case>ROCLING</fixed-case> 2013)</booktitle>
       <url hash="eacc3aa5">O13-1</url>
       <editor><first>Hung-Duen</first><last>Yang</last></editor>
-      <editor><first>Wen-Lian</first><last>Hsu</last></editor>
+      <editor id="wen-lian-hsu"><first>Wen-Lian</first><last>Hsu</last></editor>
       <editor><first>Chia-Ping</first><last>Chen</last></editor>
       <publisher>The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)</publisher>
       <address>Kaohsiung, Taiwan</address>
@@ -21,7 +21,7 @@
       <title>改良語句模型技術於節錄式語音摘要之研究 (Improved Sentence Modeling Techniques for Extractive Speech Summarization) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Shih-Hung</first><last>Liu</last></author>
       <author><first>Kuan-Yu</first><last>Chen</last></author>
-      <author><first>Hsin-Min</first><last>Wang</last></author>
+      <author id="hsin-min-wang"><first>Hsin-Min</first><last>Wang</last></author>
       <author><first>Wen-Lian</first><last>Hsu</last></author>
       <author><first>Berlin</first><last>Chen</last></author>
       <pages>5–21</pages>
@@ -32,7 +32,7 @@
       <title>分頻式調變頻譜分解於強健性語音辨識 (Sub-band modulation spectrum factorization in robust speech recognition) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Hao-teng</first><last>Fan</last></author>
       <author><first>Yi-zhang</first><last>Cai</last></author>
-      <author><first>Jeih-weih</first><last>Hung</last></author>
+      <author id="jeih-weih-hung"><first>Jeih-weih</first><last>Hung</last></author>
       <pages>22–36</pages>
       <url hash="c85bf9c9">O13-1002</url>
       <bibkey>fan-etal-2013-fen</bibkey>
@@ -41,9 +41,9 @@
       <title>使用語音評分技術輔助台語語料的驗證 (Using Speech Assessment Technique for the Validation of <fixed-case>T</fixed-case>aiwanese Speech Corpus) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Yu-Jhe</first><last>Li</last></author>
       <author><first>Chung-Che</first><last>Wang</last></author>
-      <author><first>Liang-Yu</first><last>Chen</last></author>
-      <author><first>Jyh-Shing Roger</first><last>Jang</last></author>
-      <author><first>Ren-Yuan</first><last>Lyu</last></author>
+      <author id="liang-yu-chen"><first>Liang-Yu</first><last>Chen</last></author>
+      <author id="jyh-shing-roger-jang"><first>Jyh-Shing Roger</first><last>Jang</last></author>
+      <author id="ren-yuan-lyu"><first>Ren-Yuan</first><last>Lyu</last></author>
       <pages>37–38</pages>
       <url hash="108b05f5">O13-1003</url>
       <bibkey>li-etal-2013-shi</bibkey>
@@ -58,18 +58,18 @@
     </paper>
     <paper id="5">
       <title>機器翻譯為本的中文拼字改錯系統 (<fixed-case>C</fixed-case>hinese Spelling Checker Based on Statistical Machine Translation)</title>
-      <author><first>Hsun-wen</first><last>Chiu</last></author>
-      <author><first>Jian-cheng</first><last>Wu</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="hsun-wen-chiu"><first>Hsun-wen</first><last>Chiu</last></author>
+      <author id="jian-cheng-wu"><first>Jian-cheng</first><last>Wu</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>53–55</pages>
       <url hash="e6b74078">O13-1005</url>
       <bibkey>chiu-etal-2013-ji</bibkey>
     </paper>
     <paper id="6">
       <title>Detecting <fixed-case>E</fixed-case>nglish Grammatical Errors based on Machine Translation</title>
-      <author><first>Jim</first><last>Chang</last></author>
-      <author><first>Jiancheng</first><last>Wu</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jim-chang"><first>Jim</first><last>Chang</last></author>
+      <author id="jian-cheng-wu"><first>Jiancheng</first><last>Wu</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>56–58</pages>
       <url hash="c2b40f24">O13-1006</url>
       <bibkey>chang-etal-2013-detecting</bibkey>
@@ -102,7 +102,7 @@
     </paper>
     <paper id="10">
       <title>基於音段式<fixed-case>LMR</fixed-case> 對映之語音轉換方法的改進 (Improving of Segmental <fixed-case>LMR</fixed-case>-Mapping Based Voice Conversion Methods) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Hung-Yan</first><last>Gu</last></author>
+      <author id="hung-yan-gu"><first>Hung-Yan</first><last>Gu</last></author>
       <author><first>Jia-Wei</first><last>Chang</last></author>
       <pages>90–104</pages>
       <url hash="832586ca">O13-1010</url>
@@ -130,8 +130,8 @@
     </paper>
     <paper id="13">
       <title>A Semantic-Based Approach to Noun-Noun Compound Interpretation</title>
-      <author><first>You-shan</first><last>Chung</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="you-shan-chung"><first>You-shan</first><last>Chung</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <pages>122–123</pages>
       <url hash="90452fb6">O13-1013</url>
       <bibkey>chung-chen-2013-semantic</bibkey>
@@ -148,7 +148,7 @@
       <title>雜訊環境下應用線性估測編碼於特徵時序列之強健性語音辨識 (Employing linear prediction coding in feature time sequences for robust speech recognition in noisy environments) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Hao-teng</first><last>Fan</last></author>
       <author><first>Wen-yu</first><last>Tseng</last></author>
-      <author><first>Jeih-weih</first><last>Hung</last></author>
+      <author id="jeih-weih-hung"><first>Jeih-weih</first><last>Hung</last></author>
       <pages>139–140</pages>
       <url hash="c4052eed">O13-1015</url>
       <bibkey>fan-etal-2013-za</bibkey>
@@ -197,7 +197,7 @@
     <paper id="20">
       <title>主要漢字形聲字發音規則探勘與視覺化 (Primary <fixed-case>C</fixed-case>hinese Semantic-Phonetic Compounds Pronunciation Rules Mining and Visualization) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Chien-Hui</first><last>Hsu</last></author>
-      <author><first>Meng-Feng</first><last>Tsai</last></author>
+      <author id="ming-feng-tsai"><first>Meng-Feng</first><last>Tsai</last></author>
       <author><first>Chia-Hui</first><last>Chang</last></author>
       <author><first>Hsiang-Mei</first><last>Liao</last></author>
       <author><first>Shu-Ping</first><last>Li</last></author>
@@ -209,7 +209,7 @@
     <paper id="21">
       <title>語料庫導向之方位短句於固定框架的共現概念統計分析 (A Corpus-driven Pattern Analysis in Locative Phrases: A Statistical Comparison of Co-appearing Concepts in Fixed Frames) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>August F.Y.</first><last>Chao</last></author>
-      <author><first>Siaw-Fong</first><last>Chung</last></author>
+      <author id="siaw-fong-chung"><first>Siaw-Fong</first><last>Chung</last></author>
       <pages>198–210</pages>
       <url hash="d09b14c6">O13-1021</url>
       <bibkey>chao-chung-2013-yu</bibkey>
@@ -217,7 +217,7 @@
     <paper id="22">
       <title>A simple real-word error detection and correction using local word bigram and trigram</title>
       <author><first>Pratip</first><last>Samanta</last></author>
-      <author><first>Bidyut B.</first><last>Chaudhuri</last></author>
+      <author id="bidyut-baran-chaudhuri"><first>Bidyut B.</first><last>Chaudhuri</last></author>
       <pages>211–220</pages>
       <url hash="7cd9c666">O13-1022</url>
       <bibkey>samanta-chaudhuri-2013-simple</bibkey>
@@ -226,7 +226,7 @@
       <title>結合關鍵詞驗證及語者驗證之雲端身份驗證系統 (A Cloud Speaker Authentication System Based on Keyword Verification and Speaker Verification) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Yi-Chin</first><last>Chiu</last></author>
       <author><first>Chuan-Yen</first><last>Fan</last></author>
-      <author><first>Bor-Shen</first><last>Lin</last></author>
+      <author id="bor-shen-lin"><first>Bor-Shen</first><last>Lin</last></author>
       <pages>221–235</pages>
       <url hash="fab18478">O13-1023</url>
       <bibkey>chiu-etal-2013-jie</bibkey>
@@ -235,7 +235,7 @@
       <title>Causing Emotion in Collocation:An Exploratory Data Analysis</title>
       <author><first>Pei-Yu</first><last>Lu</last></author>
       <author><first>Yu-Yun</first><last>Chang</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <pages>236–249</pages>
       <url hash="a758cad4">O13-1024</url>
       <bibkey>lu-etal-2013-causing</bibkey>
@@ -243,15 +243,15 @@
     <paper id="25">
       <title>Observing Features of <fixed-case>PTT</fixed-case> Neologisms: A Corpus-driven Study with N-gram Model</title>
       <author><first>Tsun-Jui</first><last>Liu</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
-      <author><first>Laurent</first><last>Prevot</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="laurent-prevot"><first>Laurent</first><last>Prevot</last></author>
       <pages>250–259</pages>
       <url hash="596ddd03">O13-1025</url>
       <bibkey>liu-etal-2013-observing</bibkey>
     </paper>
     <paper id="26">
       <title>Variability in vowel formant frequencies of children with cerebral palsy</title>
-      <author><first>Li-mei</first><last>Chen</last></author>
+      <author id="li-mei-chen"><first>Li-mei</first><last>Chen</last></author>
       <author><first>Yung-Chieh</first><last>Lin</last></author>
       <author><first>Wei Chen</first><last>Hsu</last></author>
       <author><first>Fang-hsin</first><last>Liao</last></author>
@@ -290,8 +290,8 @@
   <volume id="2" type="proceedings">
     <meta>
       <booktitle>International Journal of Computational Linguistics &amp; <fixed-case>C</fixed-case>hinese Language Processing, Volume 18, Number 1, March 2013</booktitle>
-      <editor><first>Yuen-Hsien</first><last>Tseng</last></editor>
-      <editor><first>Kuang-hua</first><last>Chen</last></editor>
+      <editor id="yuen-hsien-tseng"><first>Yuen-Hsien</first><last>Tseng</last></editor>
+      <editor id="kuang-hua-chen"><first>Kuang-hua</first><last>Chen</last></editor>
       <month>March</month>
       <year>2013</year>
       <venue>rocling</venue>
@@ -308,9 +308,9 @@
     </paper>
     <paper id="2">
       <title>Learning to Find Translations and Transliterations on the Web based on Conditional Random Fields</title>
-      <author><first>Joseph Z.</first><last>Chang</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
-      <author><first>Jyh-Shing Roger</first><last>Jang</last></author>
+      <author id="joseph-z-chang"><first>Joseph Z.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
+      <author id="jyh-shing-roger-jang"><first>Jyh-Shing Roger</first><last>Jang</last></author>
       <url hash="6dfd1545">O13-2002</url>
       <bibkey>chang-etal-2013-learning</bibkey>
     </paper>
@@ -323,7 +323,7 @@
     <paper id="4">
       <title>Emotion Co-referencing - Emotional Expression, Holder, and Topic</title>
       <author><first>Dipankar</first><last>Das</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <url hash="8052f8ec">O13-2004</url>
       <bibkey>das-bandyopadhyay-2013-emotion</bibkey>
     </paper>
@@ -331,7 +331,7 @@
   <volume id="3" type="proceedings">
     <meta>
       <booktitle>International Journal of Computational Linguistics &amp; <fixed-case>C</fixed-case>hinese Language Processing, Volume 18, Number 2, June 2013-Special Issue on <fixed-case>C</fixed-case>hinese Lexical Resources: Theories and Applications</booktitle>
-      <editor><first>Shu-Kai</first><last>Hsieh</last></editor>
+      <editor id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></editor>
       <month>June</month>
       <year>2013</year>
       <venue>rocling</venue>
@@ -350,15 +350,15 @@
     </paper>
     <paper id="2">
       <title>以中文十億詞語料庫為基礎之兩岸詞彙對比研究 (Cross-Strait Lexical Differences: A Comparative Study based on <fixed-case>C</fixed-case>hinese <fixed-case>G</fixed-case>igaword Corpus) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Jia-Fei</first><last>Hong</last></author>
+      <author id="jia-fei-hong"><first>Jia-Fei</first><last>Hong</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <url hash="c89099a4">O13-3002</url>
       <bibkey>hong-huang-2013-yi</bibkey>
     </paper>
     <paper id="3">
       <title>基於字典釋義關聯方法的同義詞概念擷取:以《同義詞詞林(擴展版)》為例 (A Definition-based Shared-concept Extraction within Groups of <fixed-case>C</fixed-case>hinese Synonyms: A Study Utilizing the Extended <fixed-case>C</fixed-case>hinese Synonym Forest) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>F. Y. August</first><last>Chao</last></author>
-      <author><first>Siaw-Fong</first><last>Chung</last></author>
+      <author id="f-y-august-chao"><first>F. Y. August</first><last>Chao</last></author>
+      <author id="siaw-fong-chung"><first>Siaw-Fong</first><last>Chung</last></author>
       <url hash="9149b3fa">O13-3003</url>
       <bibkey>chao-chung-2013-ji</bibkey>
     </paper>
@@ -398,7 +398,7 @@
     <paper id="3">
       <title>Acoustic Correlates of Contrastive Stress in Compound Words versus Verbal Phrase in <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese</title>
       <author><first>Weilin</first><last>Shen</last></author>
-      <author><first>Jacqueline</first><last>Vaissière</last></author>
+      <author id="jacqueline-vaissiere"><first>Jacqueline</first><last>Vaissière</last></author>
       <author><first>Frédéric</first><last>Isel</last></author>
       <url hash="21599e04">O13-4003</url>
       <bibkey>shen-etal-2013-acoustic</bibkey>
@@ -444,24 +444,24 @@
     </paper>
     <paper id="2">
       <title>Integrating Dictionary and Web N-grams for <fixed-case>C</fixed-case>hinese Spell Checking</title>
-      <author><first>Jian-cheng</first><last>Wu</last></author>
-      <author><first>Hsun-wen</first><last>Chiu</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jian-cheng-wu"><first>Jian-cheng</first><last>Wu</last></author>
+      <author id="hsun-wen-chiu"><first>Hsun-wen</first><last>Chiu</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <url hash="11df4b8b">O13-5002</url>
       <bibkey>wu-etal-2013-integrating</bibkey>
     </paper>
     <paper id="3">
       <title>Correcting Serial Grammatical Errors based on N-grams and Syntax</title>
-      <author><first>Jian-cheng</first><last>Wu</last></author>
-      <author><first>Jim</first><last>Chang</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jian-cheng-wu"><first>Jian-cheng</first><last>Wu</last></author>
+      <author id="jim-chang"><first>Jim</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <url hash="3253f1a2">O13-5003</url>
       <bibkey>wu-etal-2013-correcting</bibkey>
     </paper>
     <paper id="4">
       <title>A Semantic-Based Approach to Noun-Noun Compound Interpretation</title>
-      <author><first>You-shan</first><last>Chung</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="you-shan-chung"><first>You-shan</first><last>Chung</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <url hash="073ad673">O13-5004</url>
       <bibkey>chung-chen-2013-semantic-based</bibkey>
     </paper>
@@ -477,15 +477,15 @@
       <title>使用語音評分技術輔助台語語料的驗證 (Using Speech Assessment Technique for the Validation of <fixed-case>T</fixed-case>aiwanese Speech Corpus) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Yu-Jhe</first><last>Li</last></author>
       <author><first>Chung-Che</first><last>Wang</last></author>
-      <author><first>Liang-Yu</first><last>Chen</last></author>
-      <author><first>Jyh-Shing Roger</first><last>Jang</last></author>
-      <author><first>Ren-Yuan</first><last>Lyu</last></author>
+      <author id="liang-yu-chen"><first>Liang-Yu</first><last>Chen</last></author>
+      <author id="jyh-shing-roger-jang"><first>Jyh-Shing Roger</first><last>Jang</last></author>
+      <author id="ren-yuan-lyu"><first>Ren-Yuan</first><last>Lyu</last></author>
       <url hash="effa180b">O13-5006</url>
       <bibkey>li-etal-2013-shi-yong</bibkey>
     </paper>
     <paper id="7">
       <title>基於音段式<fixed-case>LMR</fixed-case>對映之語音轉換方法的改進 (Improving of Segmental <fixed-case>LMR</fixed-case>-Mapping Based Voice Conversion Method) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Hung-Yan</first><last>Gu</last></author>
+      <author id="hung-yan-gu"><first>Hung-Yan</first><last>Gu</last></author>
       <author><first>Jia-Wei</first><last>Chang</last></author>
       <url hash="04ba4478">O13-5007</url>
       <bibkey>gu-chang-2013-ji-yu</bibkey>
@@ -494,7 +494,7 @@
       <title>雜訊環境下應用線性估測編碼於特徵時序列之強健性語音辨識 (Employing Linear Prediction Coding in Feature Time Sequences for Robust Speech Recognition in Noisy Environments) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Hao-teng</first><last>Fan</last></author>
       <author><first>Wen-yu</first><last>Tseng</last></author>
-      <author><first>Jeih-weih</first><last>Hung</last></author>
+      <author id="jeih-weih-hung"><first>Jeih-weih</first><last>Hung</last></author>
       <url hash="ad1561c5">O13-5008</url>
       <bibkey>fan-etal-2013-za-xun</bibkey>
     </paper>
diff --git a/data/xml/O14.xml b/data/xml/O14.xml
index 209c712c56..247ffdd906 100644
--- a/data/xml/O14.xml
+++ b/data/xml/O14.xml
@@ -6,7 +6,7 @@
       <url hash="d407cc45">O14-1</url>
       <editor><first>Jing-Yang</first><last>Jou</last></editor>
       <editor><first>Chia-Hui</first><last>Chang</last></editor>
-      <editor><first>Hsin-Min</first><last>Wang</last></editor>
+      <editor id="hsin-min-wang"><first>Hsin-Min</first><last>Wang</last></editor>
       <publisher>The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)</publisher>
       <address>Jhongli, Taiwan</address>
       <month>October</month>
@@ -20,7 +20,7 @@
     <paper id="1">
       <title>運用概念模型化技術於中文大詞彙連續語音辨識之語言模型調適 (Leveraging Concept Modeling Techniques for Language Model Adaptation in <fixed-case>M</fixed-case>andarin Large Vocabulary Continuous Speech Recognition) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Po-Han</first><last>Hao</last></author>
-      <author><first>Su-Cheng</first><last>Chen</last></author>
+      <author id="ssu-cheng-chen"><first>Su-Cheng</first><last>Chen</last></author>
       <author><first>Berlin</first><last>Chen</last></author>
       <pages>1–2</pages>
       <url hash="549f6e95">O14-1001</url>
@@ -33,7 +33,7 @@
       <author><first>Yu-Lun</first><last>Hsieh</last></author>
       <author><first>Berlin</first><last>Chen</last></author>
       <author><first>Hsin-Min</first><last>Wang</last></author>
-      <author><first>Wen-Lian</first><last>Hsu</last></author>
+      <author id="wen-lian-hsu"><first>Wen-Lian</first><last>Hsu</last></author>
       <pages>3–20</pages>
       <url hash="2b91f1e5">O14-1002</url>
       <bibkey>liu-etal-2014-tan</bibkey>
@@ -65,8 +65,8 @@
     <paper id="6">
       <title>中文轉客文文轉音系統中的客語斷詞處理之研究 (Research on <fixed-case>H</fixed-case>akka Word Segmentation Processes in <fixed-case>C</fixed-case>hinese-to-<fixed-case>H</fixed-case>akka <fixed-case>T</fixed-case>ext-to-<fixed-case>S</fixed-case>peech System )[In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Hsin-Wei</first><last>Lin</last></author>
-      <author><first>Feng-Long</first><last>Huang</last></author>
-      <author><first>Ming-Shing</first><last>Yu</last></author>
+      <author id="feng-long-huang"><first>Feng-Long</first><last>Huang</last></author>
+      <author id="ming-shing-yu"><first>Ming-Shing</first><last>Yu</last></author>
       <author><first>Yih-Jeng</first><last>Lin</last></author>
       <pages>58–77</pages>
       <url hash="b6d89966">O14-1006</url>
@@ -74,7 +74,7 @@
     </paper>
     <paper id="7">
       <title>基於發音知識以建構頻譜<fixed-case>HMM</fixed-case> 之國語語音合成方法 (A <fixed-case>M</fixed-case>andarin Speech Synthesis Method Using Articulation-knowledge Based Spectral <fixed-case>HMM</fixed-case> Structure)[In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Hung-Yan</first><last>Gu</last></author>
+      <author id="hung-yan-gu"><first>Hung-Yan</first><last>Gu</last></author>
       <author><first>Ming-Yen</first><last>Lai</last></author>
       <author><first>Wei-Siang</first><last>Hong</last></author>
       <author><first>Yan-Hua</first><last>Chen</last></author>
@@ -85,8 +85,8 @@
     <paper id="8">
       <title>Some Prosodic Characteristics of <fixed-case>T</fixed-case>aiwan <fixed-case>E</fixed-case>nglish Accent</title>
       <author><first>Chao-yu</first><last>Su</last></author>
-      <author><first>Chiu-yu</first><last>Tseng</last></author>
-      <author><first>Jyh-Shing</first><last>Roger Jang</last></author>
+      <author id="chiu-yu-tseng"><first>Chiu-yu</first><last>Tseng</last></author>
+      <author id="jyh-shing-roger-jang"><first>Jyh-Shing</first><last>Roger Jang</last></author>
       <pages>89–90</pages>
       <url hash="a7248160">O14-1008</url>
       <bibkey>su-etal-2014-prosodic</bibkey>
@@ -102,7 +102,7 @@
     <paper id="10">
       <title>Public Opinion Toward <fixed-case>CSSTA</fixed-case>: A Text Mining Approach</title>
       <author><first>Yi-An</first><last>Wu</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <pages>94–95</pages>
       <url hash="bbbb6319">O14-1010</url>
       <bibkey>wu-hsieh-2014-public</bibkey>
@@ -111,7 +111,7 @@
       <title>Towards automatic enrichment of standardized electronic dictionaries by semantic classes</title>
       <author><first>Bilel</first><last>Gargouri</last></author>
       <author><first>Imen</first><last>Elleuch</last></author>
-      <author><first>Abdelmajid-Lin</first><last>Ben Hamadou</last></author>
+      <author id="abdelmajid-ben-hamadou"><first>Abdelmajid-Lin</first><last>Ben Hamadou</last></author>
       <pages>96–109</pages>
       <url hash="94a132a1">O14-1011</url>
       <bibkey>gargouri-etal-2014-towards</bibkey>
@@ -134,7 +134,7 @@
     <paper id="14">
       <title>Sketching the Dependency Relations of Words in <fixed-case>C</fixed-case>hinese</title>
       <author><last>Shih</last><first>Meng-Hsien</first></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <pages>139–152</pages>
       <url hash="71b6fce5">O14-1014</url>
       <bibkey>shih-hsieh-2014-sketching</bibkey>
@@ -151,10 +151,10 @@
     <paper id="16">
       <title>學術論文簡介的自動文步分析與寫作提示 (Automatic Move Analysis of Research Articles for Assisting Writing)[In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Guan-Cheng</first><last>Huang</last></author>
-      <author><first>Jian-Cheng</first><last>Wu</last></author>
+      <author id="jian-cheng-wu"><first>Jian-Cheng</first><last>Wu</last></author>
       <author><first>Hsiang-Ling</first><last>Hsu</last></author>
       <author><first>Tzu-Hsi</first><last>Yen</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>163–164</pages>
       <url hash="06b2b4b7">O14-1016</url>
       <bibkey>huang-etal-2014-xue</bibkey>
@@ -164,7 +164,7 @@
       <author><first>Jia-Guu</first><last>Lin</last></author>
       <author><first>Jyh-Bin</first><last>Shiau</last></author>
       <author><first>Chang</first><last>En Pu</last></author>
-      <author><first>Chia-Long</first><last>Wu</last></author>
+      <author id="chia-lung-wu"><first>Chia-Long</first><last>Wu</last></author>
       <pages>165–174</pages>
       <url hash="6e84e653">O14-1017</url>
       <bibkey>lin-etal-2014-yi</bibkey>
@@ -178,7 +178,7 @@
     </paper>
     <paper id="19">
       <title>Testing Distributional Hypothesis in Patent Translation</title>
-      <author><first>Hsin-Hung</first><last>Lin</last></author>
+      <author id="darren-hsin-hung-lin"><first>Hsin-Hung</first><last>Lin</last></author>
       <author><first>Yves</first><last>Lepage</last></author>
       <pages>185–192</pages>
       <url hash="84964a17">O14-1019</url>
@@ -186,7 +186,7 @@
     </paper>
     <paper id="20">
       <title>Spectrum Analysis of Cry Sounds in Preterm and Full-Term Infants</title>
-      <author><first>Li-mei</first><last>Chen</last></author>
+      <author id="li-mei-chen"><first>Li-mei</first><last>Chen</last></author>
       <author><first>Yu-Hsuan</first><last>Yang</last></author>
       <author><first>Chyi-Her</first><last>Lin</last></author>
       <author><first>Yuh-Jyh</first><last>Lin</last></author>
@@ -211,8 +211,8 @@
   <volume id="2" type="proceedings">
     <meta>
       <booktitle>International Journal of Computational Linguistics &amp; <fixed-case>C</fixed-case>hinese Language Processing, Volume 19, Number 1, March 2014</booktitle>
-      <editor><first>Yuen-Hsien</first><last>Tseng</last></editor>
-      <editor><first>Kuang-hua</first><last>Chen</last></editor>
+      <editor id="yuen-hsien-tseng"><first>Yuen-Hsien</first><last>Tseng</last></editor>
+      <editor id="kuang-hua-chen"><first>Kuang-hua</first><last>Chen</last></editor>
       <month>March</month>
       <year>2014</year>
       <venue>ijclclp</venue>
@@ -224,15 +224,15 @@
     <paper id="1">
       <title>A Novel Approach for Handling Unknown Word Problem in <fixed-case>C</fixed-case>hinese-<fixed-case>V</fixed-case>ietnamese Machine Translation</title>
       <author><first>Phuoc</first><last>Tran</last></author>
-      <author><first>Dien</first><last>Dinh</last></author>
+      <author id="dinh-dien"><first>Dien</first><last>Dinh</last></author>
       <url hash="8ead386c">O14-2001</url>
       <bibkey>tran-dinh-2014-novel</bibkey>
     </paper>
     <paper id="2">
       <title>Joint Learning of Entity Linking Constraints Using a <fixed-case>M</fixed-case>arkov-<fixed-case>L</fixed-case>ogic Network</title>
       <author><first>Hong-Jie</first><last>Dai</last></author>
-      <author><first>Richard Tzong-Han</first><last>Tsai</last></author>
-      <author><first>Wen-Lian</first><last>Hsu</last></author>
+      <author id="richard-tzong-han-tsai"><first>Richard Tzong-Han</first><last>Tsai</last></author>
+      <author id="wen-lian-hsu"><first>Wen-Lian</first><last>Hsu</last></author>
       <url hash="836668a7">O14-2002</url>
       <bibkey>dai-etal-2014-joint</bibkey>
     </paper>
@@ -253,8 +253,8 @@
   <volume id="3" type="proceedings">
     <meta>
       <booktitle>International Journal of Computational Linguistics &amp; <fixed-case>C</fixed-case>hinese Language Processing, Volume 19, Number 2, June 2014</booktitle>
-      <editor><first>Yuen-Hsien</first><last>Tseng</last></editor>
-      <editor><first>Kuang-hua</first><last>Chen</last></editor>
+      <editor id="yuen-hsien-tseng"><first>Yuen-Hsien</first><last>Tseng</last></editor>
+      <editor id="kuang-hua-chen"><first>Kuang-hua</first><last>Chen</last></editor>
       <month>June</month>
       <year>2014</year>
       <venue>ijclclp</venue>
@@ -265,7 +265,7 @@
     </frontmatter>
     <paper id="1">
       <title>Social Metaphor Detection via Topical Analysis</title>
-      <author><first>Ting-Hao Kenneth</first><last>Huang</last></author>
+      <author id="ting-hao-huang"><first>Ting-Hao Kenneth</first><last>Huang</last></author>
       <url hash="6840e4b8">O14-3001</url>
       <bibkey>huang-2014-social</bibkey>
     </paper>
@@ -284,13 +284,13 @@
       <author><first>Shu-Ling</first><last>Huang</last></author>
       <author><first>Yu-Ming</first><last>Hsieh</last></author>
       <author><first>Su-Chu</first><last>Lin</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <url hash="8b3efb6a">O14-3003</url>
       <bibkey>huang-etal-2014-resolving</bibkey>
     </paper>
     <paper id="4">
       <title>不同母語背景華語學習者的用詞特徵：以語料庫為本的研究 (Salient Linguistic Features of <fixed-case>C</fixed-case>hinese Learners with Different L1s: A Corpus-based Study) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Li-ping</first><last>Chang</last></author>
+      <author id="li-ping-chang"><first>Li-ping</first><last>Chang</last></author>
       <url hash="2686db4e">O14-3004</url>
       <bibkey>chang-2014-bu</bibkey>
     </paper>
@@ -298,8 +298,8 @@
   <volume id="4" type="proceedings">
     <meta>
       <booktitle>International Journal of Computational Linguistics &amp; <fixed-case>C</fixed-case>hinese Language Processing, Volume 19, Number 3, September 2014</booktitle>
-      <editor><first>Yuen-Hsien</first><last>Tseng</last></editor>
-      <editor><first>Kuang-hua</first><last>Chen</last></editor>
+      <editor id="yuen-hsien-tseng"><first>Yuen-Hsien</first><last>Tseng</last></editor>
+      <editor id="kuang-hua-chen"><first>Kuang-hua</first><last>Chen</last></editor>
       <month>September</month>
       <year>2014</year>
       <venue>ijclclp</venue>
@@ -314,7 +314,7 @@
       <author><first>Sachi</first><last>Kato</last></author>
       <author><first>Hikari</first><last>Konishi</last></author>
       <author><first>Mizuho</first><last>Imada</last></author>
-      <author><first>Kikuo</first><last>Maekawa</last></author>
+      <author id="kikuo-maekawa"><first>Kikuo</first><last>Maekawa</last></author>
       <url hash="86102e0f">O14-4001</url>
       <bibkey>asahara-etal-2014-bccwj</bibkey>
     </paper>
@@ -322,7 +322,7 @@
       <title>Transliteration Extraction from Classical <fixed-case>C</fixed-case>hinese Buddhist Literature Using Conditional Random Fields with Language Models</title>
       <author><first>Yu-Chun</first><last>Wang</last></author>
       <author><first>Karol Chia-Tien</first><last>Chang</last></author>
-      <author><first>Richard Tzong-Han</first><last>Tsai</last></author>
+      <author id="richard-tzong-han-tsai"><first>Richard Tzong-Han</first><last>Tsai</last></author>
       <author><first>Jieh</first><last>Hsiang</last></author>
       <url hash="b54046f7">O14-4002</url>
       <bibkey>wang-etal-2014-transliteration</bibkey>
@@ -339,7 +339,7 @@
   <volume id="5" type="proceedings">
     <meta>
       <booktitle>International Journal of Computational Linguistics &amp; <fixed-case>C</fixed-case>hinese Language Processing, Volume 19, Number 4, <fixed-case>D</fixed-case>ecember 2014 - Special Issue on Selected Papers from <fixed-case>ROCLING</fixed-case> <fixed-case>XXVI</fixed-case></booktitle>
-      <editor><first>Jen-Tzung</first><last>Chien</last></editor>
+      <editor id="jen-tzung-chien"><first>Jen-Tzung</first><last>Chien</last></editor>
       <editor><first>Hung-Yu</first><last>Kao</last></editor>
       <editor><first>Chia-Hui</first><last>Chang</last></editor>
       <month>December</month>
@@ -360,24 +360,24 @@
     <paper id="2">
       <title>Public Opinion Toward <fixed-case>CSSTA</fixed-case>: A Text Mining Approach</title>
       <author><first>Yi-An</first><last>Wu</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <url hash="27e1a740">O14-5002</url>
       <bibkey>wu-hsieh-2014-public-opinion</bibkey>
     </paper>
     <paper id="3">
       <title>學術論文簡介的自動文步分析與寫作提示 (Automatic Move Analysis of Research Articles for Assisting Writing) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Guan-Cheng</first><last>Huang</last></author>
-      <author><first>Jian-Cheng</first><last>Wu</last></author>
+      <author id="jian-cheng-wu"><first>Jian-Cheng</first><last>Wu</last></author>
       <author><first>Hsiang-Ling</first><last>Hsu</last></author>
       <author><first>Tzu-Hsi</first><last>Yen</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <url hash="3700644e">O14-5003</url>
       <bibkey>huang-etal-2014-xue-shu</bibkey>
     </paper>
     <paper id="4">
       <title>使用概念資訊於中文大詞彙連續語音辨識之研究 (Exploring Concept Information for <fixed-case>M</fixed-case>andarin Large Vocabulary Continuous Speech Recognition) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Po-Han</first><last>Hao</last></author>
-      <author><first>Ssu-Cheng</first><last>Chen</last></author>
+      <author id="ssu-cheng-chen"><first>Ssu-Cheng</first><last>Chen</last></author>
       <author><first>Berlin</first><last>Chen</last></author>
       <url hash="b97ec93b">O14-5004</url>
       <bibkey>hao-etal-2014-shi</bibkey>
@@ -385,14 +385,14 @@
     <paper id="5">
       <title>Some Prosodic Characteristics of <fixed-case>T</fixed-case>aiwan <fixed-case>E</fixed-case>nglish Accent</title>
       <author><first>Chao-yu</first><last>Su</last></author>
-      <author><first>Chiu-yu</first><last>Tseng</last></author>
-      <author><first>Jyh-Shing Roger</first><last>Jang</last></author>
+      <author id="chiu-yu-tseng"><first>Chiu-yu</first><last>Tseng</last></author>
+      <author id="jyh-shing-roger-jang"><first>Jyh-Shing Roger</first><last>Jang</last></author>
       <url hash="83409904">O14-5005</url>
       <bibkey>su-etal-2014-prosodic-characteristics</bibkey>
     </paper>
     <paper id="6">
       <title>Quantitative Assessment of Cry in Term and Preterm Infants: Long-Time Average Spectrum Analysis</title>
-      <author><first>Li-mei</first><last>Chen</last></author>
+      <author id="li-mei-chen"><first>Li-mei</first><last>Chen</last></author>
       <url hash="f3f0f172">O14-5006</url>
       <bibkey>chen-2014-quantitative</bibkey>
     </paper>
diff --git a/data/xml/O15.xml b/data/xml/O15.xml
index d8c076594e..2d9ee20165 100644
--- a/data/xml/O15.xml
+++ b/data/xml/O15.xml
@@ -5,8 +5,8 @@
       <booktitle>Proceedings of the 27th Conference on Computational Linguistics and Speech Processing (<fixed-case>ROCLING</fixed-case> 2015)</booktitle>
       <url hash="b8b2c3d8">O15-1</url>
       <editor><first>Sin-Horng</first><last>Chen</last></editor>
-      <editor><first>Hsin-Min</first><last>Wang</last></editor>
-      <editor><first>Jen-Tzung</first><last>Chien</last></editor>
+      <editor id="hsin-min-wang"><first>Hsin-Min</first><last>Wang</last></editor>
+      <editor id="jen-tzung-chien"><first>Jen-Tzung</first><last>Chien</last></editor>
       <publisher>The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)</publisher>
       <address>Hsinchu, Taiwan</address>
       <month>October</month>
@@ -30,7 +30,7 @@
     </paper>
     <paper id="2">
       <title>使用詞向量表示與概念資訊於中文大詞彙連續語音辨識之語言模型調適(Exploring Word Embedding and Concept Information for Language Model Adaptation in <fixed-case>M</fixed-case>andarin Large Vocabulary Continuous Speech Recognition) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Ssu-Cheng</first><last>Chen</last></author>
+      <author id="ssu-cheng-chen"><first>Ssu-Cheng</first><last>Chen</last></author>
       <author><first>Kuan-Yu</first><last>Chen</last></author>
       <author><first>Hsiao-Tsung</first><last>Hung</last></author>
       <author><first>Berlin</first><last>Chen</last></author>
@@ -51,8 +51,8 @@
     <paper id="4">
       <title>以自然語言處理方法研發智慧型客語無聲調拼音輸入法 (Smart Toneless <fixed-case>P</fixed-case>inyin Input Method for <fixed-case>H</fixed-case>akka Based on Natural Language Processing) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Hsin-Wei</first><last>Lin</last></author>
-      <author><first>Ming-Shing</first><last>Yu</last></author>
-      <author><first>Feng-Long</first><last>Huang</last></author>
+      <author id="ming-shing-yu"><first>Ming-Shing</first><last>Yu</last></author>
+      <author id="feng-long-huang"><first>Feng-Long</first><last>Huang</last></author>
       <author><first>Jiun-Wei</first><last>Wei</last></author>
       <pages>27–42</pages>
       <url hash="8e825f3e">O15-1004</url>
@@ -63,7 +63,7 @@
       <author><first>Chao-Lin</first><last>Liu</last></author>
       <author><first>Chun-Ning</first><last>Chang</last></author>
       <author><first>Chu-Ting</first><last>Hsu</last></author>
-      <author><first>Wen-Hui</first><last>Cheng</last></author>
+      <author id="wen-huei-cheng"><first>Wen-Hui</first><last>Cheng</last></author>
       <author><first>Hongsu</first><last>Wang</last></author>
       <author><first>Wei-Yun</first><last>Chiu</last></author>
       <pages>43–57</pages>
@@ -77,7 +77,7 @@
       <author><first>Chao-Chun</first><last>Liang</last></author>
       <author><first>Kuang-Yi</first><last>Hsu</last></author>
       <author><first>Shen-Yun</first><last>Miao</last></author>
-      <author><first>Wei-Yun</first><last>Ma</last></author>
+      <author id="wei-yun-ma"><first>Wei-Yun</first><last>Ma</last></author>
       <author><first>Lun-Wen</first><last>Ku</last></author>
       <author><first>Churn-Jung</first><last>Liau</last></author>
       <author><first>Keh-Yih</first><last>Su</last></author>
@@ -108,7 +108,7 @@
       <title>基於貝氏定理自動分析語料庫與標定文步 (A <fixed-case>B</fixed-case>ayesian approach to determine move tags in corpus) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Jia-Lien</first><last>Hsu</last></author>
       <author><first>Chiung-Wen</first><last>Chang</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>87–99</pages>
       <url hash="8275a585">O15-1009</url>
       <bibkey>hsu-etal-2015-ji</bibkey>
@@ -130,7 +130,7 @@
       <author><first>Ming-Han</first><last>Yang</last></author>
       <author><first>Hsiao-Tsung</first><last>Hung</last></author>
       <author><first>Yuwen</first><last>Hsiung</last></author>
-      <author><first>Yao-Ting</first><last>Hung</last></author>
+      <author id="yao-ting-sung"><first>Yao-Ting</first><last>Hung</last></author>
       <author><first>Berlin</first><last>Chen</last></author>
       <pages>103–120</pages>
       <url hash="9c3c11e5">O15-1011</url>
@@ -138,8 +138,8 @@
     </paper>
     <paper id="12">
       <title>透過語音特徵建構基於堆疊稀疏自編碼器演算法之婚姻治療中夫妻互動行為量表自動化評分系統(Automating Behavior Coding for Distressed Couples Interactions Based on Stacked Sparse Autoencoder Framework using Speech-acoustic Features)[In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Po-Hsuan</first><last>Chen</last></author>
-      <author><first>Chi-Chun</first><last>Lee</last></author>
+      <author id="po-hsuan-chen"><first>Po-Hsuan</first><last>Chen</last></author>
+      <author id="chi-chun-lee"><first>Chi-Chun</first><last>Lee</last></author>
       <pages>121–122</pages>
       <url hash="a2a84bf6">O15-1012</url>
       <bibkey>chen-lee-2015-tou</bibkey>
@@ -186,7 +186,7 @@
       <title>基於 <fixed-case>W</fixed-case>ord2<fixed-case>V</fixed-case>ec 詞向量的網路情緒文和流行音樂媒合方法之研究(Matching <fixed-case>I</fixed-case>nternet Mood Essays with Pop-Music Based on <fixed-case>W</fixed-case>ord2<fixed-case>V</fixed-case>ec)[In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Pin-Chu</first><last>Wen</last></author>
       <author><first>Yi-Lin</first><last>Tsai</last></author>
-      <author><first>Tzong-Han</first><last>Tsai</last></author>
+      <author id="richard-tzong-han-tsai"><first>Tzong-Han</first><last>Tsai</last></author>
       <pages>167–179</pages>
       <url hash="81db5e04">O15-1017</url>
       <bibkey>wen-etal-2015-ji</bibkey>
@@ -228,7 +228,7 @@
     </paper>
     <paper id="22">
       <title>The word complexity measure (<fixed-case>WCM</fixed-case>) in early phonological development: A longitudinal study from birth to three years old</title>
-      <author><first>Li-mei</first><last>Chen</last></author>
+      <author id="li-mei-chen"><first>Li-mei</first><last>Chen</last></author>
       <author><first>Yi-Hsiang</first><last>Liu</last></author>
       <pages>233–247</pages>
       <url hash="6289be33">O15-1022</url>
@@ -252,7 +252,7 @@
     </paper>
     <paper id="25">
       <title>結合<fixed-case>ANN</fixed-case>、全域變異數與真實軌跡挑選之基週軌跡產生方法(A Pitch-contour Generation Method Combining <fixed-case>ANN</fixed-case> Prediction,Global Variance Matching, and Real-contour Selection)[In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Hung-Yan</first><last>Gu</last></author>
+      <author id="hung-yan-gu"><first>Hung-Yan</first><last>Gu</last></author>
       <author><first>Kai-Wei</first><last>Jiang</last></author>
       <author><first>Hao</first><last>Wang</last></author>
       <pages>277–288</pages>
@@ -263,7 +263,7 @@
       <title>運用<fixed-case>P</fixed-case>ython結合語音辨識及合成技術於自動化音文同步之實作(A Python Implementation of Automatic Speech-text Synchronization Using Speech Recognition and Text-to-Speech Technology)[In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>ChunHan</first><last>Lai</last></author>
       <author><first>Chao-Kai</first><last>Chang</last></author>
-      <author><first>Ren-Yuan</first><last>Lyu</last></author>
+      <author id="ren-yuan-lyu"><first>Ren-Yuan</first><last>Lyu</last></author>
       <pages>289–305</pages>
       <url hash="cc2ccd5c">O15-1026</url>
       <bibkey>lai-etal-2015-yun</bibkey>
@@ -281,8 +281,8 @@
     <meta>
       <booktitle>International Journal of Computational Linguistics &amp; <fixed-case>C</fixed-case>hinese Language Processing, Volume 20, Number 1, June 2015-Special Issue on <fixed-case>C</fixed-case>hinese as a Foreign Language</booktitle>
       <editor><first>Lung-Hao</first><last>Lee</last></editor>
-      <editor><first>Liang-Chih</first><last>Yu</last></editor>
-      <editor><first>Li-Ping</first><last>Chang</last></editor>
+      <editor id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last></editor>
+      <editor id="li-ping-chang"><first>Li-Ping</first><last>Chang</last></editor>
       <month>June</month>
       <year>2015</year>
       <venue>ijclclp</venue>
@@ -305,7 +305,7 @@
       <author><first>Qiao</first><last>Zhang</last></author>
       <author><first>Shuiyuan</first><last>Zhang</last></author>
       <author><first>Jianpeng</first><last>Hou</last></author>
-      <author><first>Xueqi</first><last>Cheng</last></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last></author>
       <url hash="52525626">O15-2002</url>
       <bibkey>xiong-etal-2015-hanspeller</bibkey>
     </paper>
@@ -319,8 +319,8 @@
     <paper id="4">
       <title>Automatically Detecting Syntactic Errors in Sentences Writing by Learners of <fixed-case>C</fixed-case>hinese as a Foreign Language</title>
       <author><first>Tao-Hsing</first><last>Chang</last></author>
-      <author><first>Yao-Ting</first><last>Sung</last></author>
-      <author><first>Jia-Fei</first><last>Hong</last></author>
+      <author id="yao-ting-sung"><first>Yao-Ting</first><last>Sung</last></author>
+      <author id="jia-fei-hong"><first>Jia-Fei</first><last>Hong</last></author>
       <url hash="ffe86f26">O15-2004</url>
       <bibkey>chang-etal-2015-automatically</bibkey>
     </paper>
@@ -353,8 +353,8 @@
     <meta>
       <booktitle>International Journal of Computational Linguistics &amp; <fixed-case>C</fixed-case>hinese Language Processing, Volume 20, Number 2, <fixed-case>D</fixed-case>ecember 2015 - Special Issue on Selected Papers from <fixed-case>ROCLING</fixed-case> <fixed-case>XXVII</fixed-case></booktitle>
       <editor><first>Hung-Yu</first><last>Kao</last></editor>
-      <editor><first>Yih-Ru</first><last>Wang</last></editor>
-      <editor><first>Jen-Tzong</first><last>Chien</last></editor>
+      <editor id="yih-ru-wang"><first>Yih-Ru</first><last>Wang</last></editor>
+      <editor id="jen-tzung-chien"><first>Jen-Tzong</first><last>Chien</last></editor>
       <month>December</month>
       <year>2015</year>
       <venue>ijclclp</venue>
@@ -370,7 +370,7 @@
       <author><first>Kuang-Yi</first><last>Hsu</last></author>
       <author><first>Chien-Tsung</first><last>Huang</last></author>
       <author><first>Shen-Yun</first><last>Miao</last></author>
-      <author><first>Wei-Yun</first><last>Ma</last></author>
+      <author id="wei-yun-ma"><first>Wei-Yun</first><last>Ma</last></author>
       <author><first>Lun-Wei</first><last>Ku</last></author>
       <author><first>Churn-Jung</first><last>Liau</last></author>
       <author><first>Keh-Yih</first><last>Su</last></author>
@@ -397,7 +397,7 @@
       <author><first>Kai-Wun</first><last>Shih</last></author>
       <author><first>Kuan-Yu</first><last>Chen</last></author>
       <author><first>Shih-Hung</first><last>Liu</last></author>
-      <author><first>Hsin-Min</first><last>Wang</last></author>
+      <author id="hsin-min-wang"><first>Hsin-Min</first><last>Wang</last></author>
       <author><first>Berlin</first><last>Chen</last></author>
       <url hash="9cc8260a">O15-3004</url>
       <bibkey>shih-etal-2015-jie</bibkey>
@@ -407,15 +407,15 @@
       <author><first>Ting-Hao</first><last>Chang</last></author>
       <author><first>Hsiao-Tsung</first><last>Hung</last></author>
       <author><first>Kuan-Yu</first><last>Chen</last></author>
-      <author><first>Hsin-Min</first><last>Wang</last></author>
+      <author id="hsin-min-wang"><first>Hsin-Min</first><last>Wang</last></author>
       <author><first>Berlin</first><last>Chen</last></author>
       <url hash="8d77193d">O15-3005</url>
       <bibkey>chang-etal-2015-diao-bian</bibkey>
     </paper>
     <paper id="6">
       <title>透過語音特徵建構基於堆疊稀疏自編碼器演算法之婚姻治療中夫妻互動行為量表自動化評分系統 (Automating Behavior Coding for Distressed Couples Interactions Based on Stacked Sparse Autoencoder Framework using Speech-acoustic Features) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Po-Hsuan</first><last>Chen</last></author>
-      <author><first>Chi-Chun</first><last>Lee</last></author>
+      <author id="po-hsuan-chen"><first>Po-Hsuan</first><last>Chen</last></author>
+      <author id="chi-chun-lee"><first>Chi-Chun</first><last>Lee</last></author>
       <url hash="76eb738f">O15-3006</url>
       <bibkey>chen-lee-2015-tou-guo</bibkey>
     </paper>
diff --git a/data/xml/O16.xml b/data/xml/O16.xml
index b76f2e8123..9853619d67 100644
--- a/data/xml/O16.xml
+++ b/data/xml/O16.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the 28th Conference on Computational Linguistics and Speech Processing (<fixed-case>ROCLING</fixed-case> 2016)</booktitle>
       <url hash="954b756f">O16-1</url>
       <editor><first>Chung-Hsien</first><last>Wu</last></editor>
-      <editor><first>Yuen-Hsien</first><last>Tseng</last></editor>
+      <editor id="yuen-hsien-tseng"><first>Yuen-Hsien</first><last>Tseng</last></editor>
       <editor><first>Hung-Yu</first><last>Kao</last></editor>
       <publisher>The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)</publisher>
       <address>Tainan, Taiwan</address>
@@ -61,9 +61,9 @@
     <paper id="5">
       <title>「<fixed-case>V</fixed-case>到」結構的合分詞及語意區分(Word segmentation and sense representation for <fixed-case>V</fixed-case>-dao structure in <fixed-case>C</fixed-case>hinese)[In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Shu-Ling</first><last>Huang</last></author>
-      <author><first>Shi-Min</first><last>Li</last></author>
+      <author id="shih-min-li"><first>Shi-Min</first><last>Li</last></author>
       <author><first>Ming-Hong</first><last>Bai</last></author>
-      <author><first>Jian-Cheng</first><last>Wu</last></author>
+      <author id="jian-cheng-wu"><first>Jian-Cheng</first><last>Wu</last></author>
       <author><first>Ying-Ni</first><last>Wang</last></author>
       <author><first>Qing-Long</first><last>Lin</last></author>
       <pages>22–34</pages>
@@ -84,7 +84,7 @@
     <paper id="7">
       <title>基於詞語分布均勻度的核心詞彙選擇之研究(A Study on Dispersion Measures for Core Vocabulary Compilation )[In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Ming-Hong</first><last>Bai</last></author>
-      <author><first>Jian-Cheng</first><last>Wu</last></author>
+      <author id="jian-cheng-wu"><first>Jian-Cheng</first><last>Wu</last></author>
       <author><first>Ying-Ni</first><last>Chien</last></author>
       <author><first>Shu-Ling</first><last>Huang</last></author>
       <author><first>Ching-Lung</first><last>Lin</last></author>
@@ -95,7 +95,7 @@
     <paper id="8">
       <title>什麼時候「認真就輸了」？——語料庫中「認真」一詞的語意變化(Do We Lose When Being Serious? —<fixed-case>C</fixed-case>hange in Meaning of the Word “Renzen(認真)” in Corpora)</title>
       <author><first>Pei-Yi</first><last>Chen</last></author>
-      <author><first>Siaw-Fong</first><last>Chung</last></author>
+      <author id="siaw-fong-chung"><first>Siaw-Fong</first><last>Chung</last></author>
       <pages>52–81</pages>
       <url hash="6c2e6828">O16-1008</url>
       <bibkey>chen-chung-2016-shi</bibkey>
@@ -107,7 +107,7 @@
       <author><first>Chia-Chen</first><last>Lee</last></author>
       <author><first>Shao-Man</first><last>Lee</last></author>
       <author><first>Guan-Wei</first><last>Li</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <pages>82–99</pages>
       <url hash="56e4b6fa">O16-1009</url>
       <bibkey>huang-etal-2016-crowdsourcing</bibkey>
@@ -115,7 +115,7 @@
     <paper id="10">
       <title>基於相依詞向量的剖析結果重估與排序(N-best Parse Rescoring Based on Dependency-Based Word Embeddings)</title>
       <author><first>Yu-Ming</first><last>Hsieh</last></author>
-      <author><first>Wei-Yun</first><last>Ma</last></author>
+      <author id="wei-yun-ma"><first>Wei-Yun</first><last>Ma</last></author>
       <pages>100–102</pages>
       <url hash="f02e4726">O16-1010</url>
       <bibkey>hsieh-ma-2016-ji</bibkey>
@@ -134,8 +134,8 @@
       <author><first>Yu-Lun</first><last>Hsieh</last></author>
       <author><first>Shih-Hung</first><last>Liu</last></author>
       <author><first>Kuan-Yu</first><last>Chen</last></author>
-      <author><first>Hsin-Min</first><last>Wang</last></author>
-      <author><first>Wen-Lian</first><last>Hsu</last></author>
+      <author id="hsin-min-wang"><first>Hsin-Min</first><last>Wang</last></author>
+      <author id="wen-lian-hsu"><first>Wen-Lian</first><last>Hsu</last></author>
       <author><first>Berlin</first><last>Chen</last></author>
       <pages>115–128</pages>
       <url hash="075579c0">O16-1012</url>
@@ -146,7 +146,7 @@
       <author><first>Kuan-Hung</first><last>Chen</last></author>
       <author><first>Shu-Han</first><last>Liao</last></author>
       <author><first>Yuan-Fu</first><last>Liao</last></author>
-      <author><first>Yih-Ru</first><last>Wang</last></author>
+      <author id="yih-ru-wang"><first>Yih-Ru</first><last>Wang</last></author>
       <pages>129–130</pages>
       <url hash="c6fd1f03">O16-1013</url>
       <bibkey>chen-etal-2016-ji</bibkey>
@@ -171,8 +171,8 @@
     </paper>
     <paper id="16">
       <title>Speech Intelligibility and the Production of Fricative and Affricate among <fixed-case>M</fixed-case>andarin-speaking Children with Cerebral Palsy</title>
-      <author><first>Chin-Ting</first><last>Liu</last></author>
-      <author><first>Li-mei</first><last>Chen</last></author>
+      <author id="chin-ting-liu"><first>Chin-Ting</first><last>Liu</last></author>
+      <author id="li-mei-chen"><first>Li-mei</first><last>Chen</last></author>
       <author><first>Yu-Ching</first><last>Lin</last></author>
       <author><first>Chia-Fang</first><last>Cheng</last></author>
       <author><first>Hui-chen</first><last>Chang</last></author>
@@ -183,7 +183,7 @@
     <paper id="17">
       <title>網路新興語言&amp;耍’之語意辨析：以批踢踢語料庫為本(On the semantic analysis of the verb shua3 in <fixed-case>T</fixed-case>aiwan <fixed-case>M</fixed-case>andarin: The <fixed-case>PTT</fixed-case> corpus-based study)[In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Hsueh-ying</first><last>Hu</last></author>
-      <author><first>Siaw-Fong</first><last>Chung</last></author>
+      <author id="siaw-fong-chung"><first>Siaw-Fong</first><last>Chung</last></author>
       <pages>164–180</pages>
       <url hash="41be6df6">O16-1017</url>
       <bibkey>hu-chung-2016-wang</bibkey>
@@ -209,7 +209,7 @@
     <paper id="20">
       <title>Computing Sentiment Scores of Verb Phrases for <fixed-case>V</fixed-case>ietnamese</title>
       <author><first>Thien Khai</first><last>Tran</last></author>
-      <author><first>Tuoi Thi</first><last>Phan</last></author>
+      <author id="tuoi-thi-phan"><first>Tuoi Thi</first><last>Phan</last></author>
       <pages>204–213</pages>
       <url hash="9fa6db4c">O16-1020</url>
       <bibkey>tran-phan-2016-computing</bibkey>
@@ -243,7 +243,7 @@
       <title>基於深層類神經網路及表示學習技術之文件可讀性分類(Classification of Text Readability Based on Deep Neural Network and Representation Learning Techniques)[In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Hou-Chiang</first><last>Tseng</last></author>
       <author><first>Hsiao-Tsung</first><last>Hung</last></author>
-      <author><first>Yao-Ting</first><last>Sung</last></author>
+      <author id="yao-ting-sung"><first>Yao-Ting</first><last>Sung</last></author>
       <author><first>Berlin</first><last>Chen</last></author>
       <pages>255–270</pages>
       <url hash="c60f7ed2">O16-1024</url>
@@ -255,7 +255,7 @@
       <author><first>Jheng-Hua</first><last>Huang</last></author>
       <author><first>Rui-Jia</first><last>Zhong</last></author>
       <author><first>Liang-Pu</first><last>Chen</last></author>
-      <author><first>Ping-Che</first><last>Yang</last></author>
+      <author id="ping-che-yang"><first>Ping-Che</first><last>Yang</last></author>
       <pages>271–283</pages>
       <url hash="5b762beb">O16-1025</url>
       <bibkey>hung-etal-2016-ming</bibkey>
@@ -270,7 +270,7 @@
     <paper id="27">
       <title>Sarcasm Detection in <fixed-case>C</fixed-case>hinese Using a Crowdsourced Corpus</title>
       <author><first>Shih-Kai</first><last>Lin</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <pages>299–310</pages>
       <url hash="062bb5ad">O16-1027</url>
       <bibkey>lin-hsieh-2016-sarcasm</bibkey>
@@ -295,9 +295,9 @@
     </paper>
     <paper id="30">
       <title>中文近義詞的偵測與判別(Detection and Discrimination of <fixed-case>C</fixed-case>hinese Near-synonyms)[In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Shih-Min</first><last>Li</last></author>
+      <author id="shih-min-li"><first>Shih-Min</first><last>Li</last></author>
       <author><first>Ming-Hong</first><last>Bai</last></author>
-      <author><first>Jian-Cheng</first><last>Wu</last></author>
+      <author id="jian-cheng-wu"><first>Jian-Cheng</first><last>Wu</last></author>
       <author><first>Shu-Ling</first><last>Huang</last></author>
       <author><first>Ching-Lung</first><last>Lin</last></author>
       <pages>342–351</pages>
@@ -308,7 +308,7 @@
       <title>構建一個中文國小數學文字問題語料庫(Building a Corpus for Developing the <fixed-case>C</fixed-case>hinese Elementary School Math Word Problem Solver)[In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Shen-Yun</first><last>Miao</last></author>
       <author><first>Su-Chu</first><last>Lin</last></author>
-      <author><first>Wei-Yun</first><last>Ma</last></author>
+      <author id="wei-yun-ma"><first>Wei-Yun</first><last>Ma</last></author>
       <author><first>Keh-Yih</first><last>Su</last></author>
       <pages>352–371</pages>
       <url hash="8c291e3c">O16-1031</url>
@@ -326,7 +326,7 @@
     <paper id="33">
       <title>基於多模態主動式學習法進行需備標記樣本之挑選用於候用校長評鑑之自動化評分系統建置(A Multimodal Active Learning Approach toward Identifying Samples to Label during the Development of Automatic Oral Presentation Assessment System for Pre-service Principals Certification Program)[In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Hung-Ching</first><last>Sun</last></author>
-      <author><first>Chi-Chun</first><last>Lee</last></author>
+      <author id="chi-chun-lee"><first>Chi-Chun</first><last>Lee</last></author>
       <pages>387–401</pages>
       <url hash="87139147">O16-1033</url>
       <bibkey>sun-lee-2016-ji</bibkey>
@@ -345,8 +345,8 @@
   <volume id="2" type="proceedings">
     <meta>
       <booktitle>International Journal of Computational Linguistics &amp; <fixed-case>C</fixed-case>hinese Language Processing, Volume 21, Number 1, June 2016</booktitle>
-      <editor><first>Yuen-Hsien</first><last>Tseng</last></editor>
-      <editor><first>Kuang-Hua</first><last>Chen</last></editor>
+      <editor id="yuen-hsien-tseng"><first>Yuen-Hsien</first><last>Tseng</last></editor>
+      <editor id="kuang-hua-chen"><first>Kuang-Hua</first><last>Chen</last></editor>
       <month>June</month>
       <year>2016</year>
       <venue>ijclclp</venue>
@@ -369,7 +369,7 @@
       <author><first>Yung-Chun</first><last>Chang</last></author>
       <author><first>Chun-Han</first><last>Chu</last></author>
       <author><first>Chien Chin</first><last>Chen</last></author>
-      <author><first>Wen-Lian</first><last>Hsu</last></author>
+      <author id="wen-lian-hsu"><first>Wen-Lian</first><last>Hsu</last></author>
       <url hash="247a154a">O16-2002</url>
       <bibkey>chang-etal-2016-linguistic</bibkey>
     </paper>
@@ -378,7 +378,7 @@
       <author><first>Yu-Yang</first><last>Huang</last></author>
       <author><first>Rui</first><last>Yan</last></author>
       <author><first>Tsung-Ting</first><last>Kuo</last></author>
-      <author><first>Shou-De</first><last>Lin</last></author>
+      <author id="shou-de-lin"><first>Shou-De</first><last>Lin</last></author>
       <url hash="1e9e7cb9">O16-2003</url>
       <bibkey>huang-etal-2016-enriching</bibkey>
     </paper>
@@ -393,8 +393,8 @@
   <volume id="3" type="proceedings">
     <meta>
       <booktitle>International Journal of Computational Linguistics &amp; <fixed-case>C</fixed-case>hinese Language Processing, Volume 21, Number 2, <fixed-case>D</fixed-case>ecember 2016</booktitle>
-      <editor><first>Yuen-Hsien</first><last>Tseng</last></editor>
-      <editor><first>Jen-Tzung</first><last>Chien</last></editor>
+      <editor id="yuen-hsien-tseng"><first>Yuen-Hsien</first><last>Tseng</last></editor>
+      <editor id="jen-tzung-chien"><first>Jen-Tzung</first><last>Chien</last></editor>
       <month>December</month>
       <year>2016</year>
       <venue>ijclclp</venue>
@@ -406,7 +406,7 @@
     <paper id="1">
       <title>基於詞語分布均勻度的核心詞彙選擇 (A Study on Dispersion Measures for Core Vocabulary Compilation) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Ming-Hong</first><last>Bai</last></author>
-      <author><first>Jian-Cheng</first><last>Wu</last></author>
+      <author id="jian-cheng-wu"><first>Jian-Cheng</first><last>Wu</last></author>
       <author><first>Ying-Ni</first><last>Chien</last></author>
       <author><first>Shu-Ling</first><last>Huang</last></author>
       <author><first>Ching-Lung</first><last>Lin</last></author>
@@ -416,7 +416,7 @@
     <paper id="2">
       <title>N-best Rescoring for Parsing Based on Dependency-Based Word Embeddings</title>
       <author><first>Yu-Ming</first><last>Hsieh</last></author>
-      <author><first>Wei-Yun</first><last>Ma</last></author>
+      <author id="wei-yun-ma"><first>Wei-Yun</first><last>Ma</last></author>
       <url hash="8778088e">O16-3002</url>
       <bibkey>hsieh-ma-2016-n</bibkey>
     </paper>
@@ -445,7 +445,7 @@
       <author><first>Kuan-Hung</first><last>Chen</last></author>
       <author><first>Shu-Han</first><last>Liao</last></author>
       <author><first>Yuan-Fu</first><last>Liao</last></author>
-      <author><first>Yih-Ru</first><last>Wang</last></author>
+      <author id="yih-ru-wang"><first>Yih-Ru</first><last>Wang</last></author>
       <url hash="9e62dda1">O16-3005</url>
       <bibkey>chen-etal-2016-ji-yu-zi</bibkey>
     </paper>
diff --git a/data/xml/O17.xml b/data/xml/O17.xml
index f3d6070a1b..9030efd6cd 100644
--- a/data/xml/O17.xml
+++ b/data/xml/O17.xml
@@ -54,7 +54,7 @@
       <title>A Novel Trajectory-based Spatial-Temporal Spectral Features for Speech Emotion Recognition</title>
       <author><first>Chun-Min</first><last>Chang</last></author>
       <author><first>Wei-Cheng</first><last>Lin</last></author>
-      <author><first>Chi-Chun</first><last>Lee</last></author>
+      <author id="chi-chun-lee"><first>Chi-Chun</first><last>Lee</last></author>
       <pages>52–52</pages>
       <url hash="e1249a61">O17-1005</url>
       <bibkey>chang-etal-2017-novel</bibkey>
@@ -72,14 +72,14 @@
     <paper id="7">
       <title>Exploring Lavender Tongue from Social Media Texts[In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Hsiao-Han</first><last>Wu</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <pages>68–80</pages>
       <url hash="0296fbc8">O17-1007</url>
       <bibkey>wu-hsieh-2017-exploring</bibkey>
     </paper>
     <paper id="8">
       <title>手機平台 <fixed-case>APP</fixed-case> 之四縣客語輸入法的研發 (Research and Implementation of <fixed-case>S</fixed-case>ixian <fixed-case>H</fixed-case>akka <fixed-case>P</fixed-case>inyin Input Method for Mobile Cell <fixed-case>APP</fixed-case>) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Feng-Long</first><last>Huang</last></author>
+      <author id="feng-long-huang"><first>Feng-Long</first><last>Huang</last></author>
       <author><first>Kuei-Sen</first><last>Liu</last></author>
       <author><first>Sheng-Yi</first><last>Tseng</last></author>
       <pages>81–100</pages>
@@ -91,7 +91,7 @@
       <author><first>Shih-Kuang</first><last>Lee</last></author>
       <author><first>Syu-Siang</first><last>Wang</last></author>
       <author><first>Yu</first><last>Tsao</last></author>
-      <author><first>Jeih-weih</first><last>Hung</last></author>
+      <author id="jeih-weih-hung"><first>Jeih-weih</first><last>Hung</last></author>
       <pages>101–113</pages>
       <url hash="6c084bc9">O17-1009</url>
       <bibkey>lee-etal-2017-duo</bibkey>
@@ -101,7 +101,7 @@
       <author><first>Yu-Ding</first><last>Lu</last></author>
       <author><first>Hung-Shin</first><last>Lee</last></author>
       <author><first>Yu</first><last>Tsao</last></author>
-      <author><first>Hsin-Min</first><last>Wang</last></author>
+      <author id="hsin-min-wang"><first>Hsin-Min</first><last>Wang</last></author>
       <pages>114–115</pages>
       <url hash="7f9ee2d9">O17-1010</url>
       <bibkey>lu-etal-2017-ji</bibkey>
@@ -109,7 +109,7 @@
     <paper id="11">
       <title>探究不同領域文件之可讀性分析 (Exploring Readability Analysis on Multi-Domain Texts) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Hou-Chiang</first><last>Tseng</last></author>
-      <author><first>Yao-Ting</first><last>Sung</last></author>
+      <author id="yao-ting-sung"><first>Yao-Ting</first><last>Sung</last></author>
       <author><first>Berlin</first><last>Chen</last></author>
       <pages>116–118</pages>
       <url hash="14cec447">O17-1011</url>
@@ -119,8 +119,8 @@
       <title>基於i-vector與<fixed-case>PLDA</fixed-case>並使用<fixed-case>GMM</fixed-case>-<fixed-case>HMM</fixed-case>強制對位之自動語者分段標記系統 (Speaker Diarization based on <fixed-case>I</fixed-case>-vector <fixed-case>PLDA</fixed-case> Scoring and using <fixed-case>GMM</fixed-case>-<fixed-case>HMM</fixed-case> Forced Alignment) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Cheng-Jo Ray</first><last>Chang</last></author>
       <author><first>Hung-Shin</first><last>Lee</last></author>
-      <author><first>Hsin-Min</first><last>Wang</last></author>
-      <author><first>Jyh-Shing Roger</first><last>Jang</last></author>
+      <author id="hsin-min-wang"><first>Hsin-Min</first><last>Wang</last></author>
+      <author id="jyh-shing-roger-jang"><first>Jyh-Shing Roger</first><last>Jang</last></author>
       <pages>119–135</pages>
       <url hash="1e4b8bbd">O17-1012</url>
       <bibkey>chang-etal-2017-ji</bibkey>
@@ -131,7 +131,7 @@
       <author><first>Chun-Min</first><last>Chang</last></author>
       <author><first>Yu-Shuo</first><last>Liu</last></author>
       <author><first>Shiuan-Kai</first><last>Kao</last></author>
-      <author><first>Chi-Chun</first><last>Lee</last></author>
+      <author id="chi-chun-lee"><first>Chi-Chun</first><last>Lee</last></author>
       <pages>136–147</pages>
       <url hash="54481a7d">O17-1013</url>
       <bibkey>chou-etal-2017-amplifying</bibkey>
@@ -139,9 +139,9 @@
     <paper id="14">
       <title>Question Retrieval with Distributed Representations and Participant Reputation in Community Question Answering</title>
       <author><first>Sam</first><last>Weng</last></author>
-      <author><first>Kevin Chun-Kai</first><last>Wu</last></author>
+      <author id="chun-kai-wu"><first>Kevin Chun-Kai</first><last>Wu</last></author>
       <author><first>Yu-Chun</first><last>Wang</last></author>
-      <author><first>Richard Tzong-Han</first><last>Tsai</last></author>
+      <author id="richard-tzong-han-tsai"><first>Richard Tzong-Han</first><last>Tsai</last></author>
       <pages>148–148</pages>
       <url hash="eb64840c">O17-1014</url>
       <bibkey>weng-etal-2017-question</bibkey>
@@ -152,7 +152,7 @@
       <author><first>Ying-Wen</first><last>Chen</last></author>
       <author><first>Berlin</first><last>Chen</last></author>
       <author><first>Kuan-Yu</first><last>Chen</last></author>
-      <author><first>Hsin-Min</first><last>Wang</last></author>
+      <author id="hsin-min-wang"><first>Hsin-Min</first><last>Wang</last></author>
       <pages>149–151</pages>
       <url hash="8ce049f8">O17-1015</url>
       <bibkey>lo-etal-2017-shi</bibkey>
@@ -160,7 +160,7 @@
     <paper id="16">
       <title>Toward Contextual Valence Shifters in <fixed-case>V</fixed-case>ietnamese Reviews</title>
       <author><first>Thien Khai</first><last>Tran</last></author>
-      <author><first>Tuoi Thi</first><last>Phan</last></author>
+      <author id="tuoi-thi-phan"><first>Tuoi Thi</first><last>Phan</last></author>
       <pages>152–159</pages>
       <url hash="a41d4c51">O17-1016</url>
       <bibkey>tran-phan-2017-toward</bibkey>
@@ -213,7 +213,7 @@
       <title>應用詞向量於語言樣式探勘之研究 (Mining Language Patterns Using Word Embeddings) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Xiang</first><last>Xiao</last></author>
       <author><first>Shao-Zhen</first><last>Ye</last></author>
-      <author><first>Liang-Chih</first><last>Yu</last></author>
+      <author id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last></author>
       <author><first>K. Robert</first><last>Lai</last></author>
       <pages>230–243</pages>
       <url hash="57e5c54c">O17-1022</url>
@@ -239,7 +239,7 @@
     <paper id="25">
       <title><fixed-case>SUT</fixed-case> System Description for Anti-Spoofing 2017 Challenge</title>
       <author><first>Mohammad</first><last>Adiban</last></author>
-      <author><first>Hossein</first><last>Sameti</last></author>
+      <author id="hossein-sameti"><first>Hossein</first><last>Sameti</last></author>
       <author><first>Noushin</first><last>Maghsoodi</last></author>
       <author><first>Sajjad</first><last>Shahsavari</last></author>
       <pages>264–275</pages>
@@ -249,7 +249,7 @@
     <paper id="26">
       <title><fixed-case>SUT</fixed-case> Submission for <fixed-case>NIST</fixed-case> 2016 Speaker Recognition Evaluation: Description and Analysis</title>
       <author><first>Hossein</first><last>Zeinali</last></author>
-      <author><first>Hossein</first><last>Sameti</last></author>
+      <author id="hossein-sameti"><first>Hossein</first><last>Sameti</last></author>
       <author><first>Noushin</first><last>Maghsoodi</last></author>
       <pages>276–286</pages>
       <url hash="8f436d45">O17-1026</url>
@@ -295,7 +295,7 @@
       <author><first>Yu</first><last>Tsao</last></author>
       <author><first>Ying-Hui</first><last>Lai</last></author>
       <author><first>Hsiang-Ping</first><last>Hsu</last></author>
-      <author><first>Chia-Lung</first><last>Wu</last></author>
+      <author id="chia-lung-wu"><first>Chia-Lung</first><last>Wu</last></author>
       <pages>323–331</pages>
       <url hash="e100f664">O17-1030</url>
       <bibkey>wang-etal-2017-yi-ruan</bibkey>
@@ -324,8 +324,8 @@
   <volume id="2" type="proceedings">
     <meta>
       <booktitle>International Journal of Computational Linguistics &amp; <fixed-case>C</fixed-case>hinese Language Processing, Volume 22, Number 1, June 2017</booktitle>
-      <editor><first>Yuen-Hsien</first><last>Tseng</last></editor>
-      <editor><first>Jen-Tzung</first><last>Chien</last></editor>
+      <editor id="yuen-hsien-tseng"><first>Yuen-Hsien</first><last>Tseng</last></editor>
+      <editor id="jen-tzung-chien"><first>Jen-Tzung</first><last>Chien</last></editor>
       <month>June</month>
       <year>2017</year>
       <venue>ijclclp</venue>
@@ -340,15 +340,15 @@
       <author><first>Kuan-Yu</first><last>Chen</last></author>
       <author><first>Kai-Wun</first><last>Shih</last></author>
       <author><first>Berlin</first><last>Chen</last></author>
-      <author><first>Hsin-Min</first><last>Wang</last></author>
-      <author><first>Wen-Lian</first><last>Hsu</last></author>
+      <author id="hsin-min-wang"><first>Hsin-Min</first><last>Wang</last></author>
+      <author id="wen-lian-hsu"><first>Wen-Lian</first><last>Hsu</last></author>
       <url hash="bf53eb9e">O17-2001</url>
       <bibkey>liu-etal-2017-dang</bibkey>
     </paper>
     <paper id="2">
       <title>反義詞「多」和「少」在數量名結構中的不對稱現象－－以語料庫為本的分析 (The Asymmetric Occurences of <i>Dou1</i> and <i>Shao3</i> in the [Numeral + Measure Word/Classifier + Noun] Construction: A Corpus-based Analysis) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Wei-Yu</first><last>Chen</last></author>
-      <author><first>Siaw-Fong</first><last>Chung</last></author>
+      <author id="siaw-fong-chung"><first>Siaw-Fong</first><last>Chung</last></author>
       <url hash="e92c1833">O17-2002</url>
       <bibkey>chen-chung-2017-fan</bibkey>
     </paper>
@@ -365,7 +365,7 @@
   <volume id="3" type="proceedings">
     <meta>
       <booktitle>International Journal of Computational Linguistics &amp; <fixed-case>C</fixed-case>hinese Language Processing, Volume 22, Number 2, <fixed-case>D</fixed-case>ecember 2017-Special Issue on Selected Papers from <fixed-case>ROCLING</fixed-case> <fixed-case>XXIX</fixed-case></booktitle>
-      <editor><first>Chi-Chun (Jeremy)</first><last>Lee</last></editor>
+      <editor id="chi-chun-lee"><first>Chi-Chun (Jeremy)</first><last>Lee</last></editor>
       <editor><first>Cheng-Zen</first><last>Yang</last></editor>
       <month>December</month>
       <year>2017</year>
@@ -387,7 +387,7 @@
       <author><first>Tien-Hong</first><last>Lo</last></author>
       <author><first>Ying-Wen</first><last>Chen</last></author>
       <author><first>Kuan-Yu</first><last>Chen</last></author>
-      <author><first>Hsin-Min</first><last>Wang</last></author>
+      <author id="hsin-min-wang"><first>Hsin-Min</first><last>Wang</last></author>
       <author><first>Berlin</first><last>Chen</last></author>
       <url hash="c82040f2">O17-3002</url>
       <bibkey>lo-etal-2017-yu</bibkey>
@@ -395,9 +395,9 @@
     <paper id="3">
       <title>Question Retrieval with Distributed Representations and Participant Reputation in Community Question Answering</title>
       <author><first>Sam</first><last>Weng</last></author>
-      <author><first>Chun-Kai</first><last>Wu</last></author>
+      <author id="chun-kai-wu"><first>Chun-Kai</first><last>Wu</last></author>
       <author><first>Yu-Chun</first><last>Wang</last></author>
-      <author><first>Richard Tzong-Han</first><last>Tsai</last></author>
+      <author id="richard-tzong-han-tsai"><first>Richard Tzong-Han</first><last>Tsai</last></author>
       <url hash="3ca9cbe6">O17-3003</url>
       <bibkey>weng-etal-2017-question-retrieval</bibkey>
     </paper>
@@ -405,7 +405,7 @@
       <title>探究使用基於類神經網路之特徵於文本可讀性分類 (Exploring the Use of Neural Network based Features for Text Readability Classification) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Hou-Chiang</first><last>Tseng</last></author>
       <author><first>Berlin</first><last>Chen</last></author>
-      <author><first>Yao-Ting</first><last>Sung</last></author>
+      <author id="yao-ting-sung"><first>Yao-Ting</first><last>Sung</last></author>
       <url hash="88c135f5">O17-3004</url>
       <bibkey>tseng-etal-2017-tan-jiu</bibkey>
     </paper>
@@ -419,12 +419,12 @@
     </paper>
     <paper id="6">
       <title>基於鑑別式自編碼解碼器之錄音回放攻擊偵測系統 (A Replay Spoofing Detection System Based on Discriminative Autoencoders) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Chia-Lung</first><last>Wu</last></author>
+      <author id="chia-lung-wu"><first>Chia-Lung</first><last>Wu</last></author>
       <author><first>Hsiang-Ping</first><last>Hsu</last></author>
       <author><first>Yu-Ding</first><last>Lu</last></author>
       <author><first>Yu</first><last>Tsao</last></author>
       <author><first>Hung-Shin</first><last>Lee</last></author>
-      <author><first>Hsin-Min</first><last>Wang</last></author>
+      <author id="hsin-min-wang"><first>Hsin-Min</first><last>Wang</last></author>
       <url hash="68fc7d8e">O17-3006</url>
       <bibkey>wu-etal-2017-ji</bibkey>
     </paper>
diff --git a/data/xml/O18.xml b/data/xml/O18.xml
index bfa04542bc..816ff76bce 100644
--- a/data/xml/O18.xml
+++ b/data/xml/O18.xml
@@ -4,9 +4,9 @@
     <meta>
       <booktitle>Proceedings of the 30th Conference on Computational Linguistics and Speech Processing (<fixed-case>ROCLING</fixed-case> 2018)</booktitle>
       <url hash="622d78df">O18-1</url>
-      <editor><first>Chi-Chun (Jeremy)</first><last>Lee</last></editor>
+      <editor id="chi-chun-lee"><first>Chi-Chun (Jeremy)</first><last>Lee</last></editor>
       <editor><first>Cheng-Zen</first><last>Yang</last></editor>
-      <editor><first>Jen-Tzung</first><last>Chien</last></editor>
+      <editor id="jen-tzung-chien"><first>Jen-Tzung</first><last>Chien</last></editor>
       <publisher>The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)</publisher>
       <address>Hsinchu, Taiwan</address>
       <month>October</month>
@@ -20,7 +20,7 @@
     <paper id="1">
       <title>基於數字文本相關之語者驗證系統的研究與實作 (Study and Implementation on Digit-related Speaker Verification) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Chung-Hung</first><last>Chou</last></author>
-      <author><first>Jyh-Shing Roger</first><last>Jang</last></author>
+      <author id="jyh-shing-roger-jang"><first>Jyh-Shing Roger</first><last>Jang</last></author>
       <author><first>Shan-Wen</first><last>Hsiao</last></author>
       <pages>1–15</pages>
       <url hash="b3c44e8d">O18-1001</url>
@@ -30,7 +30,7 @@
       <title>Isolated and Ensemble Audio Preprocessing Methods for Detecting Adversarial Examples against Automatic Speech Recognition</title>
       <author><first>Krishan</first><last>Rajaratnam</last></author>
       <author><first>Kunal</first><last>Shah</last></author>
-      <author><first>Jugal</first><last>Kalita</last></author>
+      <author id="jugal-kalita"><first>Jugal</first><last>Kalita</last></author>
       <pages>16–30</pages>
       <url hash="11e9f99e">O18-1002</url>
       <bibkey>rajaratnam-etal-2018-isolated</bibkey>
@@ -38,7 +38,7 @@
     <paper id="3">
       <title>使用性別資訊於語者驗證系統之研究與實作 (A study and implementation on Speaker Verification System using Gender Information) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Yu-Jui</first><last>Su</last></author>
-      <author><first>Jyh-Shing Roger</first><last>Jang</last></author>
+      <author id="jyh-shing-roger-jang"><first>Jyh-Shing Roger</first><last>Jang</last></author>
       <author><first>Po-Cheng</first><last>Chan</last></author>
       <pages>31–45</pages>
       <url hash="26cf4f5b">O18-1003</url>
@@ -58,7 +58,7 @@
     <paper id="5">
       <title>繁體中文依存句法剖析器 (Traditional <fixed-case>C</fixed-case>hinese Dependency Parser) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Yen-Hsuan</first><last>Lee</last></author>
-      <author><first>Yih-Ru</first><last>Wang</last></author>
+      <author id="yih-ru-wang"><first>Yih-Ru</first><last>Wang</last></author>
       <pages>61–75</pages>
       <url hash="20517f91">O18-1005</url>
       <bibkey>lee-wang-2018-fan</bibkey>
@@ -97,7 +97,7 @@
       <author><first>Chen-Chou</first><last>Lo</last></author>
       <author><first>Hsin-Te</first><last>Hwang</last></author>
       <author><first>Yu</first><last>Tsao</last></author>
-      <author><first>Hsin-Min</first><last>Wang</last></author>
+      <author id="hsin-min-wang"><first>Hsin-Min</first><last>Wang</last></author>
       <pages>96–110</pages>
       <url hash="391da38b">O18-1009</url>
       <bibkey>huang-etal-2018-wavenet</bibkey>
@@ -115,7 +115,7 @@
     <paper id="11">
       <title>使用長短期記憶類神經網路建構中文語音辨識器之研究 (A study on <fixed-case>M</fixed-case>andarin speech recognition using Long Short-Term Memory neural network) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Chien-hung</first><last>Lai</last></author>
-      <author><first>Yih-Ru</first><last>Wang</last></author>
+      <author id="yih-ru-wang"><first>Yih-Ru</first><last>Wang</last></author>
       <pages>114–115</pages>
       <url hash="0adfb576">O18-1011</url>
       <bibkey>lai-wang-2018-shi</bibkey>
@@ -124,7 +124,7 @@
       <title>探索結合快速文本及卷積神經網路於可讀性模型之建立 (Exploring Combination of <fixed-case>F</fixed-case>ast<fixed-case>T</fixed-case>ext and Convolutional Neural Networks for Building Readability Models) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Hou-Chiang</first><last>Tseng</last></author>
       <author><first>Berlin</first><last>Chen</last></author>
-      <author><first>Yao-Ting</first><last>Sung</last></author>
+      <author id="yao-ting-sung"><first>Yao-Ting</first><last>Sung</last></author>
       <pages>116–125</pages>
       <url hash="ae706a6d">O18-1012</url>
       <bibkey>tseng-etal-2018-tan</bibkey>
@@ -147,7 +147,7 @@
     </paper>
     <paper id="15">
       <title>智慧手機客語拼音輸入法之研發-以臺灣海陸腔為例 (Research and Implementation of <fixed-case>H</fixed-case>akka <fixed-case>P</fixed-case>inyin Input Method for Mobile Cell - An Example of <fixed-case>T</fixed-case>aiwan <fixed-case>H</fixed-case>io<fixed-case>L</fixed-case>iuk Accent) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Feng-Long</first><last>Huang</last></author>
+      <author id="feng-long-huang"><first>Feng-Long</first><last>Huang</last></author>
       <author><first>Ming-Chan</first><last>Liu</last></author>
       <pages>142–156</pages>
       <url hash="937d361f">O18-1015</url>
@@ -156,17 +156,17 @@
     <paper id="16">
       <title>以深層類神經網路標記中文階層式多標籤語意概念 (Hierarchical Multi-Label <fixed-case>C</fixed-case>hinese Word Semantic Labeling using Deep Neural Network ) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Wei-Chieh</first><last>Chou</last></author>
-      <author><first>Yih-Ru</first><last>Wang</last></author>
+      <author id="yih-ru-wang"><first>Yih-Ru</first><last>Wang</last></author>
       <pages>157–157</pages>
       <url hash="e90cffc4">O18-1016</url>
       <bibkey>chou-wang-2018-yi</bibkey>
     </paper>
     <paper id="17">
       <title><fixed-case>LENA</fixed-case> computerized automatic analysis of speech development from birth to three</title>
-      <author><first>Li-Mei</first><last>Chen</last></author>
+      <author id="li-mei-chen"><first>Li-Mei</first><last>Chen</last></author>
       <author><first>D. Kimbrough</first><last>Oller</last></author>
       <author><first>Chia-Cheng</first><last>Lee</last></author>
-      <author><first>Chin-Ting Jimbo</first><last>Liu</last></author>
+      <author id="chin-ting-liu"><first>Chin-Ting Jimbo</first><last>Liu</last></author>
       <pages>158–168</pages>
       <url hash="225f6c4a">O18-1017</url>
       <bibkey>chen-etal-2018-lena</bibkey>
@@ -260,7 +260,7 @@
       <author><first>Shih-Ying</first><last>Chang</last></author>
       <author><first>Tsu-Jin</first><last>Chiu</last></author>
       <author><first>Ming-Chiao</first><last>Tsai</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>276–285</pages>
       <url hash="f4b393a2">O18-1027</url>
       <bibkey>chen-etal-2018-jie</bibkey>
diff --git a/data/xml/O88.xml b/data/xml/O88.xml
index a5af2b066e..5dcf40e2a6 100644
--- a/data/xml/O88.xml
+++ b/data/xml/O88.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of Rocling <fixed-case>I</fixed-case> Computational Linguistics Conference <fixed-case>I</fixed-case></booktitle>
       <url hash="cb9e17e1">O88-1</url>
-      <editor><first>Keh-Jiann</first><last>Chen</last></editor>
+      <editor id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></editor>
       <editor><first>Chu-Ren</first><last>Huang</last></editor>
       <publisher>The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)</publisher>
       <address>Nantou, Taiwan</address>
@@ -25,7 +25,7 @@
     </paper>
     <paper id="2">
       <title>漢語的時間詞組和語言剖析 (The Temporal Expressions of <fixed-case>M</fixed-case>andarin and Language Parsing) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Li-ping</first><last>Chang</last></author>
+      <author id="li-ping-chang"><first>Li-ping</first><last>Chang</last></author>
       <pages>73–86</pages>
       <url hash="9990bbb8">O88-1002</url>
       <bibkey>chang-1988-han</bibkey>
@@ -48,7 +48,7 @@
     </paper>
     <paper id="5">
       <title>A New Approach to Quality Text Generation</title>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <author><first>Hwei-Ming</first><last>Kou</last></author>
       <pages>163–177</pages>
       <url hash="11d5ebec">O88-1005</url>
@@ -65,9 +65,9 @@
     <paper id="7">
       <title>The Parsing Environment for <fixed-case>M</fixed-case>andarin Syntax</title>
       <author><first>I-Peng</first><last>Lin</last></author>
-      <author><first>Shuan-fan</first><last>Huang</last></author>
+      <author id="shuan-fan-huang"><first>Shuan-fan</first><last>Huang</last></author>
       <author><first>Hsin-Hsi</first><last>Chen</last></author>
-      <author><first>Ka-Wai</first><last>Chui</last></author>
+      <author id="ka-wai-chui"><first>Ka-Wai</first><last>Chui</last></author>
       <pages>211-214</pages>
       <url hash="e7ffcd0e">O88-1007</url>
       <bibkey>lin-etal-1988-parsing</bibkey>
diff --git a/data/xml/O89.xml b/data/xml/O89.xml
index 3e36f4a100..d16cb9d9a0 100644
--- a/data/xml/O89.xml
+++ b/data/xml/O89.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of Rocling <fixed-case>II</fixed-case> Computational Linguistics Conference <fixed-case>II</fixed-case></booktitle>
       <url hash="d3c722d9">O89-1</url>
       <editor><first>Chu-Ren</first><last>Huang</last></editor>
-      <editor><first>Keh-Jiann</first><last>Chen</last></editor>
+      <editor id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></editor>
       <publisher>The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)</publisher>
       <address>Nantou, Taiwan</address>
       <month>September</month>
@@ -19,7 +19,7 @@
       <title>The Identification Of Thematic Roles In Parsing <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese</title>
       <author><first>Keh-jiann</first><last>Chen</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
-      <author><first>Li-ping</first><last>Chang</last></author>
+      <author id="li-ping-chang"><first>Li-ping</first><last>Chang</last></author>
       <pages>123–145</pages>
       <url hash="fa8a1054">O89-1001</url>
       <bibkey>chen-etal-1989-identification</bibkey>
@@ -35,9 +35,9 @@
     </paper>
     <paper id="3">
       <title><fixed-case>NTUMT</fixed-case> Strategy for Prepositional Phrase Attachment</title>
-      <author><first>Ka-Wai</first><last>Chui</last></author>
+      <author id="ka-wai-chui"><first>Ka-Wai</first><last>Chui</last></author>
       <author><first>Yia-ping</first><last>Lin</last></author>
-      <author><first>Shuan-Fan</first><last>Huang</last></author>
+      <author id="shuan-fan-huang"><first>Shuan-Fan</first><last>Huang</last></author>
       <author><first>I-Peng</first><last>Lin</last></author>
       <pages>163–186</pages>
       <url hash="d3ca89bf">O89-1003</url>
@@ -46,7 +46,7 @@
     <paper id="4">
       <title>Systemic Generation of <fixed-case>C</fixed-case>hinese Sentences</title>
       <author><first>Hwei-Ming</first><last>Kuo</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>189–212</pages>
       <url hash="2ebd35a3">O89-1004</url>
       <bibkey>kuo-chang-1989-systemic</bibkey>
@@ -77,7 +77,7 @@
     <paper id="8">
       <title>Parsing <fixed-case>E</fixed-case>nglish Conjunctions And Comparatives Using The Wait-And-See Strategy</title>
       <author><first>Rey-Long</first><last>Liu</last></author>
-      <author><first>Von-Wun</first><last>Soo</last></author>
+      <author id="von-wun-soo"><first>Von-Wun</first><last>Soo</last></author>
       <pages>291–310</pages>
       <url hash="128eddd0">O89-1008</url>
       <bibkey>liu-soo-1989-parsing</bibkey>
diff --git a/data/xml/O90.xml b/data/xml/O90.xml
index 09c79f96a3..a88918d696 100644
--- a/data/xml/O90.xml
+++ b/data/xml/O90.xml
@@ -4,8 +4,8 @@
     <meta>
       <booktitle>Proceedings of Rocling <fixed-case>III</fixed-case> Computational Linguistics Conference <fixed-case>III</fixed-case></booktitle>
       <url hash="ee87d55f">O90-1</url>
-      <editor><first>Jason J.</first><last>Chang</last></editor>
-      <editor><first>Von-Wun</first><last>Soo</last></editor>
+      <editor id="jason-s-chang"><first>Jason J.</first><last>Chang</last></editor>
+      <editor id="von-wun-soo"><first>Von-Wun</first><last>Soo</last></editor>
       <publisher>The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)</publisher>
       <address>Taipei, Taiwan</address>
       <month>September</month>
@@ -63,7 +63,7 @@
     <paper id="6">
       <title>詞彙訊息的層次表達與管理 (Hierarchical Representation of Word Information and Management) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Lee-Feng</first><last>Chien</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <pages>297–310</pages>
       <url hash="bb9a8ac7">O90-1006</url>
       <bibkey>chien-chen-1990-ci</bibkey>
@@ -95,7 +95,7 @@
     </paper>
     <paper id="10">
       <title>An application of statistical optimization with dynamic programming to phonemic-input-to-character conversion for <fixed-case>C</fixed-case>hinese</title>
-      <author><first>Richard</first><last>Sproat</last></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last></author>
       <pages>379–390</pages>
       <url hash="751664d2">O90-1010</url>
       <bibkey>sproat-1990-application</bibkey>
diff --git a/data/xml/O91.xml b/data/xml/O91.xml
index fc7b27214c..0ce975d27b 100644
--- a/data/xml/O91.xml
+++ b/data/xml/O91.xml
@@ -17,7 +17,7 @@
     <paper id="1">
       <title>連接詞的語法表達模式－以中文訊息格位語法(<fixed-case>ICG</fixed-case>)為本的表達形式 (The Grammar Representation of Conjunctions – a Representation Based on <fixed-case>ICG</fixed-case>) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Wen-Jen</first><last>Wei</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <pages>79–95</pages>
       <url hash="919e0f7b">O91-1001</url>
       <bibkey>wei-chen-1991-lian</bibkey>
@@ -34,7 +34,7 @@
       <title>Determinative-Measure Compounds in <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese Formation Rules and Parser Implementation</title>
       <author><first>Ruo-ping</first><last>Mo</last></author>
       <author><first>Yao-Jung</first><last>Yang</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <pages>111–134</pages>
       <url hash="45e009ce">O91-1003</url>
@@ -42,9 +42,9 @@
     </paper>
     <paper id="4">
       <title>限制式滿足及機率最佳化的中文斷詞方法 (<fixed-case>C</fixed-case>hinese Word Segmentation based on Constraint satisfaction and Statistical Optimization) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <author><first>Zhi-Da</first><last>Chen</last></author>
-      <author><first>Shun-Der</first><last>Chen</last></author>
+      <author id="shun-der-chen"><first>Shun-Der</first><last>Chen</last></author>
       <pages>147–165</pages>
       <url hash="7e8f879f">O91-1004</url>
       <bibkey>chang-etal-1991-xian</bibkey>
@@ -59,27 +59,27 @@
     <paper id="6">
       <title>Lexicon-Driven Transfer In <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>hinese Machine Translation</title>
       <author><first>Chung-Teng</first><last>Sun</last></author>
-      <author><first>Jyun-Sheng</first><last>Chang</last></author>
+      <author id="jyun-sheng-chang"><first>Jyun-Sheng</first><last>Chang</last></author>
       <pages>193–214</pages>
       <url hash="e4e793c0">O91-1006</url>
       <bibkey>sun-chang-1991-lexicon</bibkey>
     </paper>
     <paper id="7">
       <title>Automatic <fixed-case>C</fixed-case>hinese Text Generation Based On Inference Trees</title>
-      <author><first>Hing-Lung</first><last>Lin</last></author>
-      <author><first>Benjamin K.</first><last>T’sou</last></author>
-      <author><first>Hing-Cheung</first><last>Ho</last></author>
-      <author><first>Bong-Yeung</first><last>Lai</last></author>
-      <author><first>Suen Caesar</first><last>Lun</last></author>
+      <author id="hing-lung-lin"><first>Hing-Lung</first><last>Lin</last></author>
+      <author id="benjamin-k-tsou"><first>Benjamin K.</first><last>T’sou</last></author>
+      <author id="hing-cheung-ho"><first>Hing-Cheung</first><last>Ho</last></author>
+      <author id="tom-bong-yeung-lai"><first>Bong-Yeung</first><last>Lai</last></author>
+      <author id="suen-caesar-lun"><first>Suen Caesar</first><last>Lun</last></author>
       <author><first>Chi-Yuen</first><last>Choi</last></author>
-      <author><first>Chun-yu</first><last>Kit</last></author>
+      <author id="chunyu-kit"><first>Chun-yu</first><last>Kit</last></author>
       <pages>215–236</pages>
       <url hash="59f5d958">O91-1007</url>
       <bibkey>lin-etal-1991-automatic</bibkey>
     </paper>
     <paper id="8">
       <title>A Trace &amp; Unification Grammar For <fixed-case>C</fixed-case>hinese</title>
-      <author><first>Hans Ulrich</first><last>Block</last></author>
+      <author id="hans-ulrich-block"><first>Hans Ulrich</first><last>Block</last></author>
       <author><first>Ping</first><last>Peng</last></author>
       <pages>237–255</pages>
       <url hash="7b06e385">O91-1008</url>
@@ -88,7 +88,7 @@
     <paper id="9">
       <title>Constructing A Phrase Structure Grammar By Incorporating Linguistic Knowledge And Statistical Log-Likelihood Ratio</title>
       <author><first>Keh-Yih</first><last>Su</last></author>
-      <author><first>Yu-Ling</first><last>Hsu</last></author>
+      <author id="yu-ling-una-hsu"><first>Yu-Ling</first><last>Hsu</last></author>
       <author><first>Claire</first><last>Saillard</last></author>
       <pages>257–275</pages>
       <url hash="991bfb52">O91-1009</url>
@@ -99,7 +99,7 @@
       <author><first>Hsien-Chin</first><last>Liou</last></author>
       <author><first>Hui-Li</first><last>Hsu</last></author>
       <author><first>Yong-Chang</first><last>Huang</last></author>
-      <author><first>Von-Wun</first><last>Soo</last></author>
+      <author id="von-wun-soo"><first>Von-Wun</first><last>Soo</last></author>
       <pages>277–302</pages>
       <url hash="3e772196">O91-1010</url>
       <bibkey>liou-etal-1991-development</bibkey>
@@ -107,7 +107,7 @@
     <paper id="11">
       <title>Training A Recurrent Neural Network to Parse Syntactically Ambiguous and Ill-Formed Sentences</title>
       <author><first>Ssu-Liang</first><last>Lin</last></author>
-      <author><first>Von-Wun</first><last>Soo</last></author>
+      <author id="von-wun-soo"><first>Von-Wun</first><last>Soo</last></author>
       <pages>303–317</pages>
       <url hash="780b8b9e">O91-1011</url>
       <bibkey>lin-soo-1991-training</bibkey>
diff --git a/data/xml/O92.xml b/data/xml/O92.xml
index 1cf153dc06..13d1bfd963 100644
--- a/data/xml/O92.xml
+++ b/data/xml/O92.xml
@@ -17,7 +17,7 @@
     <paper id="1">
       <title>Discrimination Oriented Probabilistic Tagging</title>
       <author><first>Yi-Chung</first><last>Lin</last></author>
-      <author><first>Tung-Hui</first><last>Chiang</last></author>
+      <author id="tung-hui-chiang"><first>Tung-Hui</first><last>Chiang</last></author>
       <author><first>Keh-Yih</first><last>Su</last></author>
       <pages>87–96</pages>
       <url hash="197cc4f3">O92-1001</url>
@@ -26,14 +26,14 @@
     <paper id="2">
       <title>Acquisition of Unbounded Dependency Using Explanation-Based Learning</title>
       <author><first>Rey-Long</first><last>Liu</last></author>
-      <author><first>Von-Wun</first><last>Soo</last></author>
+      <author id="von-wun-soo"><first>Von-Wun</first><last>Soo</last></author>
       <pages>99–119</pages>
       <url hash="c268d7d1">O92-1002</url>
       <bibkey>liu-soo-1992-acquisition</bibkey>
     </paper>
     <paper id="3">
       <title>Statistical Models for Word Segmentation And Unknown Word Resolution</title>
-      <author><first>Tung-Hui</first><last>Chiang</last></author>
+      <author id="tung-hui-chiang"><first>Tung-Hui</first><last>Chiang</last></author>
       <author><first>Jing-Shin</first><last>Chang</last></author>
       <author><first>Ming-Yu</first><last>Lin</last></author>
       <author><first>Keh-Yih</first><last>Su</last></author>
@@ -44,7 +44,7 @@
     <paper id="4">
       <title>A Modular and Statistical Approach to Machine Translation</title>
       <author><first>Dah-Yih</first><last>Wang</last></author>
-      <author><first>Jyun-Sheng</first><last>Chang</last></author>
+      <author id="jyun-sheng-chang"><first>Jyun-Sheng</first><last>Chang</last></author>
       <pages>149-175</pages>
       <url hash="11e5b233">O92-1004</url>
       <bibkey>wang-chang-1992-modular</bibkey>
@@ -54,7 +54,7 @@
       <author><first>Marie Meili</first><last>Yeh</last></author>
       <author><first>Chih-Chen</first><last>Tang</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <pages>179–193</pages>
       <url hash="d9bbb4c0">O92-1005</url>
       <bibkey>yeh-etal-1992-han</bibkey>
@@ -69,10 +69,10 @@
     </paper>
     <paper id="7">
       <title>Reduplication In <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese: Their Formation Rules, Syntactic Behavior And <fixed-case>ICG</fixed-case> Representation</title>
-      <author><first>Feng-yi</first><last>Chen</last></author>
+      <author id="feng-yi-chen"><first>Feng-yi</first><last>Chen</last></author>
       <author><first>Ruo-ping</first><last>Mo</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <pages>217–233</pages>
       <url hash="daa0f933">O92-1007</url>
       <bibkey>chen-etal-1992-reduplication</bibkey>
diff --git a/data/xml/O93.xml b/data/xml/O93.xml
index f562ef8312..fba662665f 100644
--- a/data/xml/O93.xml
+++ b/data/xml/O93.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of Rocling <fixed-case>VI</fixed-case> Computational Linguistics Conference <fixed-case>VI</fixed-case></booktitle>
       <url hash="8e3e7b7d">O93-1</url>
-      <editor><first>Keh-Jiann</first><last>Chen</last></editor>
+      <editor id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></editor>
       <editor><first>Chu-Ren</first><last>Huang</last></editor>
       <publisher>The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)</publisher>
       <address>Nantou, Taiwan</address>
@@ -18,7 +18,7 @@
     <paper id="1">
       <title>Automatic Clustering of <fixed-case>C</fixed-case>hinese Characters and Words</title>
       <author><first>Chao-Huang</first><last>Chang</last></author>
-      <author><first>Cheng-Der</first><last>Chen</last></author>
+      <author id="cheng-der-chen"><first>Cheng-Der</first><last>Chen</last></author>
       <pages>57–78</pages>
       <url hash="6c7ee89d">O93-1001</url>
       <bibkey>chang-chen-1993-automatic</bibkey>
@@ -33,7 +33,7 @@
     </paper>
     <paper id="3">
       <title>A Probabilistic Chunker</title>
-      <author><first>Kuang-hua</first><last>Chen</last></author>
+      <author id="kuang-hua-chen"><first>Kuang-hua</first><last>Chen</last></author>
       <author><first>Hsin-Hsi</first><last>Chen</last></author>
       <pages>99-117</pages>
       <url hash="5f7f8ba8">O93-1003</url>
@@ -42,7 +42,7 @@
     <paper id="4">
       <title>A Preliminary Study On Unknown Word Problem In <fixed-case>C</fixed-case>hinese Word Segmentation</title>
       <author><first>Ming-Yu</first><last>Lin</last></author>
-      <author><first>Tung-Hui</first><last>Chiang</last></author>
+      <author id="tung-hui-chiang"><first>Tung-Hui</first><last>Chiang</last></author>
       <author><first>Keh-Yih</first><last>Su</last></author>
       <pages>119–141</pages>
       <url hash="1765bef6">O93-1004</url>
@@ -53,7 +53,7 @@
       <author><first>Sung-Chen</first><last>Lin</last></author>
       <author><first>Lee-Feng</first><last>Chien</last></author>
       <author><first>Keh-Jiann</first><last>Chen</last></author>
-      <author><first>Lin-Shan</first><last>Lee</last></author>
+      <author id="lin-shan-lee"><first>Lin-Shan</first><last>Lee</last></author>
       <pages>143–160</pages>
       <url hash="f0d9bd60">O93-1005</url>
       <bibkey>lin-etal-1993-guo</bibkey>
@@ -61,7 +61,7 @@
     <paper id="6">
       <title>Corpus-based Automatic Rule Selection in Designing a Grammar Checker</title>
       <author><first>Yuan-Ling</first><last>Liu</last></author>
-      <author><first>Shih-ping</first><last>Wang</last></author>
+      <author id="shih-ping-wang"><first>Shih-ping</first><last>Wang</last></author>
       <author><first>Keh-Yih</first><last>Su</last></author>
       <pages>161–171</pages>
       <url hash="d9db1295">O93-1006</url>
@@ -70,16 +70,16 @@
     <paper id="7">
       <title>中文辭彙岐義之研究─斷詞與詞性標示 (The Resolution of Lexicon Ambiguity in <fixed-case>C</fixed-case>hinese - Segmentation and Tagging) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Tsai-Yen</first><last>Peng</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>173–193</pages>
       <url hash="d45c514c">O93-1007</url>
       <bibkey>peng-chang-1993-zhong</bibkey>
     </paper>
     <paper id="8">
       <title>從中文語料庫中自動選取連續國語語音特性平衡句的方法 (Automatic Selection of Phonetically Rich Sentences from A <fixed-case>C</fixed-case>hinese Text Corpus) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Hsin-Min</first><last>Wang</last></author>
+      <author id="hsin-min-wang"><first>Hsin-Min</first><last>Wang</last></author>
       <author><first>Yuan-Cheng</first><last>Chang</last></author>
-      <author><first>Lin-Shan</first><last>Lee</last></author>
+      <author id="lin-shan-lee"><first>Lin-Shan</first><last>Lee</last></author>
       <pages>195–206</pages>
       <url hash="3a85ab4b">O93-1008</url>
       <bibkey>wang-etal-1993-cong</bibkey>
@@ -94,7 +94,7 @@
     </paper>
     <paper id="10">
       <title>中文文件自動分類之研究 (A Study of Document Auto-Classification in <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Un-Gian</first><last>Iunn</last></author>
+      <author id="un-gian-iunn"><first>Un-Gian</first><last>Iunn</last></author>
       <author><first>Ching-Cyun</first><last>Hsien</last></author>
       <author><first>Shu-Mei</first><last>Chen</last></author>
       <author><first>Keh-Jiann</first><last>Chen</last></author>
@@ -120,7 +120,7 @@
     <paper id="1">
       <title><fixed-case>FAWRMT</fixed-case>: With Special Emphasis On Grammar Designs And Partitioned Parsing</title>
       <author><first>Andy Wong Man</first><last>Hon</last></author>
-      <author><first>Suen Caesar</first><last>Lun</last></author>
+      <author id="suen-caesar-lun"><first>Suen Caesar</first><last>Lun</last></author>
       <pages>235–258</pages>
       <url hash="ebb4800a">O93-2001</url>
       <bibkey>hon-lun-1993-fawrmt</bibkey>
@@ -128,7 +128,7 @@
     <paper id="2">
       <title>Toward Discourse-guided Theta-grid Chart Parsing for Madarin <fixed-case>C</fixed-case>hinese – A Preliminary Report</title>
       <author><first>Koong H. C.</first><last>Lin</last></author>
-      <author><first>Von-Wun</first><last>Soo</last></author>
+      <author id="von-wun-soo"><first>Von-Wun</first><last>Soo</last></author>
       <pages>259–270</pages>
       <url hash="6f2be392">O93-2002</url>
       <bibkey>lin-soo-1993-toward</bibkey>
@@ -137,8 +137,8 @@
       <title>Developing a <fixed-case>C</fixed-case>hinese Module in <fixed-case>UNITRAN</fixed-case></title>
       <author><first>Zhibiao</first><last>Wu</last></author>
       <author><first>Loke Soo</first><last>Hsu</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
-      <author><first>Chew Lim</first><last>Tan</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
+      <author id="chew-lim-tan"><first>Chew Lim</first><last>Tan</last></author>
       <pages>271–284</pages>
       <url hash="52319577">O93-2003</url>
       <bibkey>wu-etal-1993-developing</bibkey>
diff --git a/data/xml/O94.xml b/data/xml/O94.xml
index a0a740f947..78551feb94 100644
--- a/data/xml/O94.xml
+++ b/data/xml/O94.xml
@@ -18,16 +18,16 @@
       <author><first>Yuan-Cheng</first><last>Chang</last></author>
       <author><first>Sung-Chen</first><last>Lin</last></author>
       <author><first>Lee-Feng</first><last>Chien</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
-      <author><first>Lin-Shan</first><last>Lee</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="lin-shan-lee"><first>Lin-Shan</first><last>Lee</last></author>
       <pages>17–34</pages>
       <bibkey>chang-etal-1994-guo</bibkey>
     </paper>
     <paper id="2">
       <title>Yanhui (宴會), a Softwre Based High Performance <fixed-case>M</fixed-case>andarin Text-To-Speech System</title>
       <author><first>John</first><last>Choi</last></author>
-      <author><first>Hsiao-Wuen</first><last>Hon</last></author>
-      <author><first>Jean-Luc</first><last>Lebrun</last></author>
+      <author id="hsiao-wuen-hon"><first>Hsiao-Wuen</first><last>Hon</last></author>
+      <author id="jean-luc-lebrun"><first>Jean-Luc</first><last>Lebrun</last></author>
       <author><first>Sun-Pin</first><last>Lee</last></author>
       <author><first>Gareth</first><last>Loudon</last></author>
       <author><first>Viet-Hoang</first><last>Phan</last></author>
@@ -52,9 +52,9 @@
     </paper>
     <paper id="5">
       <title>A Practical Tagger for <fixed-case>C</fixed-case>hinese Corpora</title>
-      <author><first>Keh-jiann</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-jiann</first><last>Chen</last></author>
       <author><first>Shing-Huan</first><last>Liu</last></author>
-      <author><first>Li-ping</first><last>Chang</last></author>
+      <author id="li-ping-chang"><first>Li-ping</first><last>Chang</last></author>
       <author><first>Yeh-Hao</first><last>Chin</last></author>
       <pages>111-126</pages>
       <url hash="4cdb2386">O94-1005</url>
@@ -62,14 +62,14 @@
     </paper>
     <paper id="6">
       <title>Automatic Terminology Extraction For Thematic Corpus Based On Subterm Co-Occurrence</title>
-      <author><first>Chun-yu</first><last>Kit</last></author>
+      <author id="chunyu-kit"><first>Chun-yu</first><last>Kit</last></author>
       <pages>127–134</pages>
       <url hash="9661c2c7">O94-1006</url>
       <bibkey>kit-1994-automatic</bibkey>
     </paper>
     <paper id="7">
       <title>An Estimation of the Entropy of <fixed-case>C</fixed-case>hinese – A New Approach to Constructing Class-based n-gram Models</title>
-      <author><first>Jyun-sheng</first><last>Chang</last></author>
+      <author id="jyun-sheng-chang"><first>Jyun-sheng</first><last>Chang</last></author>
       <author><first>Yuh-Juh</first><last>Lin</last></author>
       <pages>149–169</pages>
       <url hash="612602fd">O94-1007</url>
@@ -77,7 +77,7 @@
     </paper>
     <paper id="8">
       <title>Some Issues on Applying <fixed-case>SA</fixed-case>-class Bigram Language Models</title>
-      <author><first>Chun-Jen</first><last>Lee</last></author>
+      <author id="chun-jen-lee"><first>Chun-Jen</first><last>Lee</last></author>
       <author><first>Keh-Hwa</first><last>Shyu</last></author>
       <author><first>Eng-Fong</first><last>Huang</last></author>
       <author><first>Bor-Shenn</first><last>Jeng</last></author>
@@ -87,7 +87,7 @@
     </paper>
     <paper id="9">
       <title>A Text Conversion System Between Simplified and Complex <fixed-case>C</fixed-case>hinese Characters Based on <fixed-case>OCR</fixed-case> Approaches</title>
-      <author><first>Chun-Jen</first><last>Lee</last></author>
+      <author id="chun-jen-lee"><first>Chun-Jen</first><last>Lee</last></author>
       <author><first>Keh-Hwa</first><last>Shyu</last></author>
       <author><first>Eng-Fong</first><last>Huang</last></author>
       <author><first>Bor-Shenn</first><last>Jeng</last></author>
@@ -133,7 +133,7 @@
     </paper>
     <paper id="2">
       <title>使用新式注音鍵盤及複合馬可夫語言模型之中文輸入系統 (A <fixed-case>C</fixed-case>hinese-character Inputting System Using a New Type of Phonetic Keyboard and a Compound <fixed-case>M</fixed-case>arkov Language Model) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Hung-yan</first><last>Gu</last></author>
+      <author id="hung-yan-gu"><first>Hung-yan</first><last>Gu</last></author>
       <author><first>Jr-yiau</first><last>Chen</last></author>
       <pages>253–262</pages>
       <url hash="bd85263a">O94-2002</url>
@@ -148,7 +148,7 @@
     </paper>
     <paper id="4">
       <title><fixed-case>C</fixed-case>hinese-Word Segmentation Based On Maximal-Matching And Bigram Techniques</title>
-      <author><first>Wing-Pong</first><last>Luk</last></author>
+      <author id="robert-wing-pong-luk"><first>Wing-Pong</first><last>Luk</last></author>
       <pages>273–282</pages>
       <url hash="057e9837">O94-2004</url>
       <bibkey>luk-1994-chinese</bibkey>
diff --git a/data/xml/O95.xml b/data/xml/O95.xml
index cee1198166..7ebab9f815 100644
--- a/data/xml/O95.xml
+++ b/data/xml/O95.xml
@@ -25,14 +25,14 @@
     <paper id="2">
       <title>適合大量中文文件全文檢索的索引及資料壓縮技術 (Full-text Indexing and Data Compression for <fixed-case>C</fixed-case>hinese Documents) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Lee-Feng</first><last>Chien</last></author>
-      <author><first>Hung-yan</first><last>Gu</last></author>
+      <author id="hung-yan-gu"><first>Hung-yan</first><last>Gu</last></author>
       <pages>31–42</pages>
       <url hash="f5cb5dba">O95-1002</url>
       <bibkey>chien-gu-1995-shi</bibkey>
     </paper>
     <paper id="3">
       <title>The New Generation <fixed-case>B</fixed-case>ehavior<fixed-case>T</fixed-case>ran: Design Philosophy And System Architecture</title>
-      <author><first>Yu-Ling Una</first><last>Hsu</last></author>
+      <author id="yu-ling-una-hsu"><first>Yu-Ling Una</first><last>Hsu</last></author>
       <author><first>Keh-Yih</first><last>Su</last></author>
       <pages>65–79</pages>
       <url hash="258591a6">O95-1003</url>
@@ -59,7 +59,7 @@
       <author><first>Wenjie</first><last>Li</last></author>
       <author><first>Haihua</first><last>Pan</last></author>
       <author><first>Ming</first><last>Zhou</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <author><first>Vincent</first><last>Lum</last></author>
       <pages>137–153</pages>
       <url hash="dd74aca3">O95-1006</url>
@@ -74,7 +74,7 @@
     </paper>
     <paper id="8">
       <title>A Unifying Approach To Segmentation Of <fixed-case>C</fixed-case>hinese And Its Application To Text Retrieval</title>
-      <author><first>Jian-Yun</first><last>Nie</last></author>
+      <author id="jian-yun-nie"><first>Jian-Yun</first><last>Nie</last></author>
       <author><first>Xiaobo</first><last>Ren</last></author>
       <author><first>Martin</first><last>Brisebois</last></author>
       <pages>175–190</pages>
diff --git a/data/xml/O96.xml b/data/xml/O96.xml
index a75a21e86d..440389e66d 100644
--- a/data/xml/O96.xml
+++ b/data/xml/O96.xml
@@ -18,16 +18,16 @@
     <paper id="1">
       <title>中文連音二字詞之語音合成 (Coarticulation of Two-Syllable Words in <fixed-case>M</fixed-case>andarin Speech Synthesis) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Jun-Wen</first><last>Hwang</last></author>
-      <author><first>Ming-Shing</first><last>Yu</last></author>
+      <author id="ming-shing-yu"><first>Ming-Shing</first><last>Yu</last></author>
       <author><first>Shyh-Yang</first><last>Hwang</last></author>
-      <author><first>Ming-Jer</first><last>Wu</last></author>
+      <author id="ming-jer-wu"><first>Ming-Jer</first><last>Wu</last></author>
       <pages>37–60</pages>
       <url hash="e24a7ea8">O96-1001</url>
       <bibkey>hwang-etal-1996-zhong</bibkey>
     </paper>
     <paper id="2">
       <title>時間比例基週波形內差 – 一個國語音節信號合成之新方法 (Time-Proportionated Interpolation of Pitch Waveforms – A New Method for <fixed-case>M</fixed-case>andarin Syllable-Signal Synthesis) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Hung-yan</first><last>Gu</last></author>
+      <author id="hung-yan-gu"><first>Hung-yan</first><last>Gu</last></author>
       <author><first>Wen-lung</first><last>Shiu</last></author>
       <pages>61–84</pages>
       <url hash="7a76d179">O96-1002</url>
@@ -70,8 +70,8 @@
       <author><first>Sung-Chen</first><last>Lin</last></author>
       <author><first>Jyi-Lung</first><last>Tsai</last></author>
       <author><first>Lee-Feng</first><last>Chien</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
-      <author><first>Lin-Shan</first><last>Lee</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="lin-shan-lee"><first>Lin-Shan</first><last>Lee</last></author>
       <pages>159–182</pages>
       <url hash="8a21a4c4">O96-1006</url>
       <bibkey>lin-etal-1996-guo</bibkey>
@@ -87,15 +87,15 @@
     </paper>
     <paper id="8">
       <title>A Preliminary Study of Disambiguating <fixed-case>VO</fixed-case>-and <fixed-case>VN</fixed-case>-Constructions Using Selection Preferences</title>
-      <author><first>Kok-Wee</first><last>Gan</last></author>
+      <author id="kok-wee-gan"><first>Kok-Wee</first><last>Gan</last></author>
       <pages>233–253</pages>
       <url hash="da404829">O96-1008</url>
       <bibkey>gan-1996-preliminary</bibkey>
     </paper>
     <paper id="9">
       <title>語料庫在辭典編輯上的運用 (The Application of Language Corpus on Dictionary Editing) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Li-Li</first><last>Chang</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="li-li-chang"><first>Li-Li</first><last>Chang</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <pages>255–279</pages>
       <url hash="af0c855c">O96-1009</url>
@@ -103,18 +103,18 @@
     </paper>
     <paper id="10">
       <title>語料庫為本的語義訊息抽取與辨析以近義詞研究為例 (Synonym Discrimination Based on Corpus) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Mei-Chih</first><last>Tsai</last></author>
+      <author id="mei-chih-tsai"><first>Mei-Chih</first><last>Tsai</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <pages>281–293</pages>
       <url hash="1be88a02">O96-1010</url>
       <bibkey>tsai-etal-1996-yu</bibkey>
     </paper>
     <paper id="11">
       <title>介詞翻譯法則的自動擷取 (Learning to Translate <fixed-case>E</fixed-case>nglish Prepositions) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <author><first>Ruei-Hung</first><last>Hsu</last></author>
-      <author><first>Huey-Chyun</first><last>Chen</last></author>
+      <author id="huey-chyun-chen"><first>Huey-Chyun</first><last>Chen</last></author>
       <pages>295–320</pages>
       <url hash="f341e125">O96-1011</url>
       <bibkey>chang-etal-1996-jie</bibkey>
@@ -148,7 +148,7 @@
     <paper id="2">
       <title>Issues in Text-to-Speech Conversion for <fixed-case>M</fixed-case>andarin</title>
       <author><first>Chilin</first><last>Shih</last></author>
-      <author><first>Richard</first><last>Sproat</last></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last></author>
       <pages>37–86</pages>
       <url hash="064e8bb0">O96-2002</url>
       <bibkey>shih-sproat-1996-issues</bibkey>
@@ -157,7 +157,7 @@
       <title>A <fixed-case>M</fixed-case>andarin Text-to-Speech System</title>
       <author><first>Sin-Horng</first><last>Chen</last></author>
       <author><first>Shaw-Hwa</first><last>Hwang</last></author>
-      <author><first>Yih-Ru</first><last>Wang</last></author>
+      <author id="yih-ru-wang"><first>Yih-Ru</first><last>Wang</last></author>
       <pages>87–100</pages>
       <url hash="3d6c8218">O96-2003</url>
       <bibkey>chen-etal-1996-mandarin</bibkey>
@@ -165,7 +165,7 @@
     <paper id="4">
       <title>An Overview of Corpus-Based Statistics-Oriented (<fixed-case>CBSO</fixed-case>) Techniques for Natural Language Processing</title>
       <author><first>Keh-Yih</first><last>Su</last></author>
-      <author><first>Tung-Hui</first><last>Chiang</last></author>
+      <author id="tung-hui-chiang"><first>Tung-Hui</first><last>Chiang</last></author>
       <author><first>Jing-Shin</first><last>Chang</last></author>
       <pages>101–158</pages>
       <url hash="489ba83d">O96-2004</url>
@@ -173,7 +173,7 @@
     </paper>
     <paper id="5">
       <title>A Hybrid Approach to Machine Translation System Design</title>
-      <author><first>Kuang-Hua</first><last>Chen</last></author>
+      <author id="kuang-hua-chen"><first>Kuang-Hua</first><last>Chen</last></author>
       <author><first>Hsin-Hsi</first><last>Chen</last></author>
       <pages>159–182</pages>
       <url hash="b5ed70a7">O96-2005</url>
@@ -181,7 +181,7 @@
     </paper>
     <paper id="6">
       <title>A Model for Robust <fixed-case>C</fixed-case>hinese Parser</title>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <pages>183–204</pages>
       <url hash="1116bd28">O96-2006</url>
       <bibkey>chen-1996-model</bibkey>
diff --git a/data/xml/O97.xml b/data/xml/O97.xml
index 10a900b921..5fa274390f 100644
--- a/data/xml/O97.xml
+++ b/data/xml/O97.xml
@@ -4,9 +4,9 @@
     <meta>
       <booktitle>Proceedings of the 10th Research on Computational Linguistics International Conference</booktitle>
       <url hash="4b4e34dd">O97-1</url>
-      <editor><first>Keh-Jiann</first><last>Chen</last></editor>
+      <editor id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></editor>
       <editor><first>Chu-Ren</first><last>Huang</last></editor>
-      <editor><first>Richard</first><last>Sproat</last></editor>
+      <editor id="richard-sproat"><first>Richard</first><last>Sproat</last></editor>
       <publisher>The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)</publisher>
       <address>Taipei, Taiwan</address>
       <month>August</month>
@@ -19,7 +19,7 @@
     <paper id="1">
       <title>Meaning Representation and Meaning Instantiation for <fixed-case>C</fixed-case>hinese Nominals</title>
       <author><first>Kathleen</first><last>Ahrens</last></author>
-      <author><first>Li-li</first><last>Chang</last></author>
+      <author id="li-li-chang"><first>Li-li</first><last>Chang</last></author>
       <author><first>Keh-Jiann</first><last>Chen</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <pages>4–18</pages>
@@ -36,7 +36,7 @@
     </paper>
     <paper id="3">
       <title>Towards a Representation of Verbal Semantics – An Approach Based on Near Synonyms</title>
-      <author><first>Mei-chih</first><last>Tsai</last></author>
+      <author id="mei-chih-tsai"><first>Mei-chih</first><last>Tsai</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <author><first>Keh-jiann</first><last>Chen</last></author>
       <author><first>Kathleen</first><last>Ahrens</last></author>
@@ -48,15 +48,15 @@
       <title>Word Sense Disambiguation Based on The Information Theory</title>
       <author><first>Ho</first><last>Lee</last></author>
       <author><first>Dae-Ho</first><last>Baek</last></author>
-      <author><first>Hae-Chang</first><last>Rim</last></author>
+      <author id="hae-chang-rim"><first>Hae-Chang</first><last>Rim</last></author>
       <pages>49–58</pages>
       <url hash="208e7ac4">O97-1004</url>
       <bibkey>lee-etal-1997-word</bibkey>
     </paper>
     <paper id="5">
       <title>An Agreement Error Correction Method Based on a Multicriteria Approach: An Application to <fixed-case>A</fixed-case>rabic Language</title>
-      <author><last>Belguith Hadrich</last><first>Lamia</first></author>
-      <author><last>Ben Hamadou</last><first>Abdelmajid</first></author>
+      <author id="lamia-hadrich-belguith"><first>Lamia</first><last>Belguith Hadrich</last></author>
+      <author id="abdelmajid-ben-hamadou"><first>Abdelmajid</first><last>Ben Hamadou</last></author>
       <author><last>Aloulou</last><first>Chafik</first></author>
       <pages>59–75</pages>
       <url hash="c3312788">O97-1005</url>
@@ -113,16 +113,16 @@
     <paper id="12">
       <title>Analyzing the Complexity of a Domain With Respect To An Information Extraction Task</title>
       <author><first>Amit</first><last>Bagga</last></author>
-      <author><first>Alan W.</first><last>Biermann</last></author>
+      <author id="alan-w-biermann"><first>Alan W.</first><last>Biermann</last></author>
       <pages>175–194</pages>
       <url hash="c7bac6d3">O97-1012</url>
       <bibkey>bagga-biermann-1997-analyzing</bibkey>
     </paper>
     <paper id="13">
       <title>Human Judgment as a Basis for Evaluation of Discourse-Connective-based Full-text Abstraction in <fixed-case>C</fixed-case>hinese</title>
-      <author><first>Benjamin K.</first><last>T’sou</last></author>
-      <author><first>Hing-Lung</first><last>Lin</last></author>
-      <author><first>Tom B. Y.</first><last>Lai</last></author>
+      <author id="benjamin-k-tsou"><first>Benjamin K.</first><last>T’sou</last></author>
+      <author id="hing-lung-lin"><first>Hing-Lung</first><last>Lin</last></author>
+      <author id="tom-b-y-lai"><first>Tom B. Y.</first><last>Lai</last></author>
       <pages>195–208</pages>
       <url hash="dfaebf23">O97-1013</url>
       <bibkey>tsou-etal-1997-human</bibkey>
@@ -131,7 +131,7 @@
       <title>An Assessment on Character-based <fixed-case>C</fixed-case>hinese News Filtering Using Latent Semantic Indexing</title>
       <author><first>Shih-Hung</first><last>Wu</last></author>
       <author><first>Pey-Ching</first><last>Yang</last></author>
-      <author><first>Von-Wun</first><last>Soo</last></author>
+      <author id="von-wun-soo"><first>Von-Wun</first><last>Soo</last></author>
       <pages>209–223</pages>
       <url hash="0d3f4c28">O97-1014</url>
       <bibkey>wu-etal-1997-assessment</bibkey>
@@ -141,7 +141,7 @@
       <author><first>Tai-Hsuan</first><last>Ho</last></author>
       <author><first>Kae-Cherng</first><last>Yang</last></author>
       <author><first>Juei-Sung</first><last>Lin</last></author>
-      <author><first>Lin-Shan</first><last>Lee</last></author>
+      <author id="lin-shan-lee"><first>Lin-Shan</first><last>Lee</last></author>
       <pages>287–299</pages>
       <url hash="67994bf4">O97-1015</url>
       <bibkey>ho-etal-1997-integrating</bibkey>
@@ -179,7 +179,7 @@
       <author><first>Kae-Cherng</first><last>Yang</last></author>
       <author><first>Tai-Hsuan</first><last>Ho</last></author>
       <author><first>Juei-Sung</first><last>Lin</last></author>
-      <author><first>Lin-Shan</first><last>Lee</last></author>
+      <author id="lin-shan-lee"><first>Lin-Shan</first><last>Lee</last></author>
       <pages>335–344</pages>
       <url hash="6494ff0a">O97-1019</url>
       <bibkey>yang-etal-1997-truncation</bibkey>
@@ -188,14 +188,14 @@
       <title>Recognizing <fixed-case>K</fixed-case>orean Unknown Proper Nouns by Using Automatically Extracted Lexical Clues</title>
       <author><first>Bong-Rae</first><last>Park</last></author>
       <author><first>Young-Sook</first><last>Hwang</last></author>
-      <author><first>Hae-Chang</first><last>Rim</last></author>
+      <author id="hae-chang-rim"><first>Hae-Chang</first><last>Rim</last></author>
       <pages>345–356</pages>
       <url hash="d560cc43">O97-1020</url>
       <bibkey>park-etal-1997-recognizing</bibkey>
     </paper>
     <paper id="21">
       <title>Logical Operators and Quantifiers in Natural Language</title>
-      <author><first>Shin-ichiro</first><last>Kamei</last></author>
+      <author id="shin-ichiro-kamei"><first>Shin-ichiro</first><last>Kamei</last></author>
       <author><first>Kazunori</first><last>Muraki</last></author>
       <pages>357–367</pages>
       <url hash="1d1229f9">O97-1021</url>
@@ -204,7 +204,7 @@
     <paper id="22">
       <title><fixed-case>C</fixed-case>hinese Text Compression Using <fixed-case>C</fixed-case>hinese Language Information Processing [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Jun</first><last>Gao</last></author>
-      <author><first>Xixian</first><last>Chen</last></author>
+      <author id="xixian-chen"><first>Xixian</first><last>Chen</last></author>
       <pages>368–379</pages>
       <url hash="c7721a82">O97-1022</url>
       <bibkey>gao-chen-1997-chinese</bibkey>
@@ -237,7 +237,7 @@
     <paper id="26">
       <title>Rejection in Speech Recognition Based on <fixed-case>CDCPM</fixed-case>s</title>
       <author><first>Mingxing</first><last>Xu</last></author>
-      <author><first>Fang</first><last>Zheng</last></author>
+      <author id="fang-zheng"><first>Fang</first><last>Zheng</last></author>
       <author><first>Wenhu</first><last>Wu</last></author>
       <pages>412–419</pages>
       <url hash="d4ab8afe">O97-1026</url>
@@ -267,10 +267,10 @@
     </paper>
     <paper id="2">
       <title><fixed-case>C</fixed-case>hinese Word Segmentation and Part-of-Speech Tagging in One Step</title>
-      <author><first>Tom B.Y.</first><last>Lai</last></author>
+      <author id="tom-b-y-lai"><first>Tom B.Y.</first><last>Lai</last></author>
       <author><first>Maosong</first><last>Sun</last></author>
-      <author><first>Benjamin K.</first><last>T’sou</last></author>
-      <author><first>S. Caesar</first><last>Lun</last></author>
+      <author id="benjamin-k-tsou"><first>Benjamin K.</first><last>T’sou</last></author>
+      <author id="suen-caesar-lun"><first>S. Caesar</first><last>Lun</last></author>
       <pages>229–236</pages>
       <url hash="3c5759b7">O97-2002</url>
       <bibkey>lai-etal-1997-chinese</bibkey>
@@ -278,15 +278,15 @@
     <paper id="3">
       <title>Corpus-Based <fixed-case>C</fixed-case>hinese Text Summarization System</title>
       <author><first>Jun-Jie</first><last>Li</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <pages>237–241</pages>
       <url hash="ddc5baa8">O97-2003</url>
       <bibkey>li-choi-1997-corpus</bibkey>
     </paper>
     <paper id="4">
       <title>A Study on the Portability of a Grammatical Inference System</title>
-      <author><first>Hsue-Hueh</first><last>Shih</last></author>
-      <author><first>Steve</first><last>Young</last></author>
+      <author id="hsue-hueh-shih"><first>Hsue-Hueh</first><last>Shih</last></author>
+      <author id="steve-young"><first>Steve</first><last>Young</last></author>
       <pages>242–246</pages>
       <url hash="0d69a85b">O97-2004</url>
       <bibkey>shih-young-1997-study</bibkey>
@@ -338,7 +338,7 @@
     </paper>
     <paper id="10">
       <title>The Description of the Intra-State Feature Space in Speech Recognition</title>
-      <author><first>Fang</first><last>Zheng</last></author>
+      <author id="fang-zheng"><first>Fang</first><last>Zheng</last></author>
       <author><first>Mingxing</first><last>Xu</last></author>
       <author><first>Wenhu</first><last>Wu</last></author>
       <pages>272–276</pages>
@@ -374,7 +374,7 @@
     </frontmatter>
     <paper id="1">
       <title>Computational Tools and Resources for Linguistic Studies</title>
-      <author><first>Yu-Ling Una</first><last>Hsu</last></author>
+      <author id="yu-ling-una-hsu"><first>Yu-Ling Una</first><last>Hsu</last></author>
       <author><first>Jing-Shin</first><last>Chang</last></author>
       <author><first>Keh-Yih</first><last>Su</last></author>
       <pages>1–40</pages>
@@ -397,8 +397,8 @@
     </paper>
     <paper id="4">
       <title>A Synchronous <fixed-case>C</fixed-case>hinese Language Corpus from Different Speech Communities: Construction and Applications</title>
-      <author><first>Benjamin K.</first><last>T’sou</last></author>
-      <author><first>Hing-Lung</first><last>Lin</last></author>
+      <author id="benjamin-k-tsou"><first>Benjamin K.</first><last>T’sou</last></author>
+      <author id="hing-lung-lin"><first>Hing-Lung</first><last>Lin</last></author>
       <author><first>Godfrey</first><last>Liu</last></author>
       <author><first>Terence</first><last>Chan</last></author>
       <author><first>Jerome</first><last>Hu</last></author>
@@ -455,17 +455,17 @@
     <paper id="3">
       <title>Segmentation Standard for <fixed-case>C</fixed-case>hinese Natural Language Processing</title>
       <author><first>Chu-Ren</first><last>Huang</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
-      <author><first>Li-Li</first><last>Chang</last></author>
-      <author><first>Feng-Yi</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="li-li-chang"><first>Li-Li</first><last>Chang</last></author>
+      <author id="feng-yi-chen"><first>Feng-Yi</first><last>Chen</last></author>
       <pages>47–62</pages>
       <url hash="3af5ba65">O97-4003</url>
       <bibkey>huang-etal-1997-segmentation</bibkey>
     </paper>
     <paper id="4">
       <title>Aligning More Words with High Precision for Small Bilingual Corpora</title>
-      <author><first>Sue J.</first><last>Ker</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="sue-j-ker"><first>Sue J.</first><last>Ker</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>63–96</pages>
       <url hash="52580e8c">O97-4004</url>
       <bibkey>ker-chang-1997-aligning</bibkey>
diff --git a/data/xml/O98.xml b/data/xml/O98.xml
index 49f20bba0f..3f0d8f71cf 100644
--- a/data/xml/O98.xml
+++ b/data/xml/O98.xml
@@ -26,15 +26,15 @@
     <paper id="2">
       <title>以語境判定中文未知詞詞類的方法 (Guessing Parts-Of-Speech For <fixed-case>C</fixed-case>hinese Unknown Words Using Context Information) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Ming-Hong</first><last>Bai</last></author>
-      <author><first>Chao-Jan</first><last>Chen</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="chao-jan-chen"><first>Chao-Jan</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <pages>47–62</pages>
       <url hash="656c958c">O98-1002</url>
       <bibkey>bai-etal-1998-yi</bibkey>
     </paper>
     <paper id="3">
       <title>應用動態、靜待辭典以加速鍵盤輸入中文之方法 (A Dynamic-and Static-Dictionaries Based Method for Accelerating <fixed-case>C</fixed-case>hinese-Character Inputting with Keyboard) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Hung-yan</first><last>Gu</last></author>
+      <author id="hung-yan-gu"><first>Hung-yan</first><last>Gu</last></author>
       <author><first>Chung-Chieh</first><last>Yang</last></author>
       <pages>73–86</pages>
       <url hash="918886c0">O98-1003</url>
@@ -43,26 +43,26 @@
     <paper id="4">
       <title>Quantitative Criteria for Computational <fixed-case>C</fixed-case>hinese Lexicography</title>
       <author><first>Chu-Ren</first><last>Huang</last></author>
-      <author><first>Zhao-ming</first><last>Gao</last></author>
+      <author id="zhao-ming-gao"><first>Zhao-ming</first><last>Gao</last></author>
       <author><first>Claude C.C.</first><last>Shen</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <pages>87–108</pages>
       <url hash="fadc72ef">O98-1004</url>
       <bibkey>huang-etal-1998-quantitative</bibkey>
     </paper>
     <paper id="5">
       <title>Speaker-Independent Continuous <fixed-case>M</fixed-case>andarin Speech Recognition Under Telephone Environments</title>
-      <author><first>Jia-Lin</first><last>Shen</last></author>
-      <author><first>Ying-Chieh</first><last>Tu</last></author>
-      <author><first>Po-Yu</first><last>Liang</last></author>
-      <author><first>Lin-Shan</first><last>Lee</last></author>
+      <author id="jia-lin-shen"><first>Jia-Lin</first><last>Shen</last></author>
+      <author id="ying-chieh-tu"><first>Ying-Chieh</first><last>Tu</last></author>
+      <author id="po-yu-liang"><first>Po-Yu</first><last>Liang</last></author>
+      <author id="lin-shan-lee"><first>Lin-Shan</first><last>Lee</last></author>
       <pages>119–137</pages>
       <url hash="f0a85249">O98-1005</url>
       <bibkey>shen-etal-1998-speaker</bibkey>
     </paper>
     <paper id="6">
       <title>A Large-Vocabulary <fixed-case>T</fixed-case>aiwanese (<fixed-case>M</fixed-case>in-nan) Speech Recognition System Based on Inter-syllabic Initial-Final Modeling and Lexicon-Tree Search</title>
-      <author><first>Ren-Yuan</first><last>Lyu</last></author>
+      <author id="ren-yuan-lyu"><first>Ren-Yuan</first><last>Lyu</last></author>
       <author><first>Yuang-Jin</first><last>Chiang</last></author>
       <author><first>Ren-Jou</first><last>Fang</last></author>
       <author><first>Wen-Ping</first><last>Hsieh</last></author>
@@ -72,7 +72,7 @@
     </paper>
     <paper id="7">
       <title>Using Keyword Spotting and Utterance Verification to a Prank Call Rejection System</title>
-      <author><first>Chun-Jen</first><last>Lee</last></author>
+      <author id="chun-jen-lee"><first>Chun-Jen</first><last>Lee</last></author>
       <author><first>Eng-Fong</first><last>Huang</last></author>
       <author><first>Jung-Kuei</first><last>Chen</last></author>
       <pages>151–162</pages>
@@ -84,7 +84,7 @@
       <author><first>Chun-Liang</first><last>Chen</last></author>
       <author><first>Bo-Ren</first><last>Bai</last></author>
       <author><first>Lee-Feng</first><last>Chien</last></author>
-      <author><first>Lin-Shan</first><last>Lee</last></author>
+      <author id="lin-shan-lee"><first>Lin-Shan</first><last>Lee</last></author>
       <pages>189–203</pages>
       <url hash="b575b880">O98-1008</url>
       <bibkey>chen-etal-1998-cpat</bibkey>
@@ -131,14 +131,14 @@
     <paper id="1">
       <title>結合統計與規則的多層次中文斷詞系統 (A hierarchical <fixed-case>C</fixed-case>hinese word segmentation system based on statistical and rule-based methods) [In <fixed-case>C</fixed-case>hinese]</title>
       <author><first>Chung-Chen</first><last>Chen</last></author>
-      <author><first>Wen-Lian</first><last>Hsu</last></author>
+      <author id="wen-lian-hsu"><first>Wen-Lian</first><last>Hsu</last></author>
       <pages>63–72</pages>
       <url hash="bca3dbf2">O98-2001</url>
       <bibkey>chen-hsu-1998-jie</bibkey>
     </paper>
     <paper id="2">
       <title>The Design of Sem-Syn Initial Grammar in <fixed-case>C</fixed-case>hinese Grammatical Inference</title>
-      <author><first>Hsue-Hueh</first><last>Shih</last></author>
+      <author id="hsue-hueh-shih"><first>Hsue-Hueh</first><last>Shih</last></author>
       <pages>109–118</pages>
       <url hash="36bc3db0">O98-2002</url>
       <bibkey>shih-1998-design</bibkey>
@@ -155,9 +155,9 @@
     <paper id="4">
       <title>A Way to Extract Unknown Words Without Dictionary from <fixed-case>C</fixed-case>hinese Corpus and Its Applications</title>
       <author><first>Yih-Jeng</first><last>Lin</last></author>
-      <author><first>Ming-Shing</first><last>Yu</last></author>
+      <author id="ming-shing-yu"><first>Ming-Shing</first><last>Yu</last></author>
       <author><first>Shyh-Yang</first><last>Hwang</last></author>
-      <author><first>Ming-Jer</first><last>Wu</last></author>
+      <author id="ming-jer-wu"><first>Ming-Jer</first><last>Wu</last></author>
       <pages>217–226</pages>
       <url hash="c1fedcdf">O98-2004</url>
       <bibkey>lin-etal-1998-way</bibkey>
@@ -177,14 +177,14 @@
     <paper id="1">
       <title>Analyzing the Performance of Message Understanding Systems</title>
       <author><first>Amit</first><last>Bagga</last></author>
-      <author><first>Alan W.</first><last>Biermann</last></author>
+      <author id="alan-w-biermann"><first>Alan W.</first><last>Biermann</last></author>
       <pages>1–26</pages>
       <url hash="fe0502d2">O98-3001</url>
       <bibkey>bagga-biermann-1998-analyzing</bibkey>
     </paper>
     <paper id="2">
       <title>Unknown Word Detection for <fixed-case>C</fixed-case>hinese by a Corpus-based Learning Method</title>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <author><first>Ming-Hong</first><last>Bai</last></author>
       <pages>27–44</pages>
       <url hash="fc9f3780">O98-3002</url>
@@ -193,8 +193,8 @@
     <paper id="3">
       <title>Meaning Representation and Meaning Instantiation for <fixed-case>C</fixed-case>hinese Nominals</title>
       <author><first>Kathleen</first><last>Ahrens</last></author>
-      <author><first>Li-Li</first><last>Chang</last></author>
-      <author><first>Ke-Jiann</first><last>Chen</last></author>
+      <author id="li-li-chang"><first>Li-Li</first><last>Chang</last></author>
+      <author id="keh-jiann-chen"><first>Ke-Jiann</first><last>Chen</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <pages>45–60</pages>
       <url hash="737779b6">O98-3003</url>
@@ -202,9 +202,9 @@
     </paper>
     <paper id="4">
       <title>Towards a Representation of Verbal Semantics – An Approach Based on Near-Synonyms</title>
-      <author><first>Mei-Chih</first><last>Tsai</last></author>
+      <author id="mei-chih-tsai"><first>Mei-Chih</first><last>Tsai</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <author><first>Kathleen</first><last>Ahrens</last></author>
       <pages>61–74</pages>
       <url hash="264bb533">O98-3004</url>
@@ -220,10 +220,10 @@
     </paper>
     <paper id="6">
       <title>Human Judgment as a Basis for Evaluation of Discourse-Connective-Based Full-Text Abstraction in <fixed-case>C</fixed-case>hinese</title>
-      <author><first>Benjamin K.</first><last>T’sou</last></author>
-      <author><first>Hing-Lung</first><last>Lin</last></author>
-      <author><first>Tom B. Y.</first><last>Lai</last></author>
-      <author><first>Samuel W. K.</first><last>Chan</last></author>
+      <author id="benjamin-k-tsou"><first>Benjamin K.</first><last>T’sou</last></author>
+      <author id="hing-lung-lin"><first>Hing-Lung</first><last>Lin</last></author>
+      <author id="tom-b-y-lai"><first>Tom B. Y.</first><last>Lai</last></author>
+      <author id="samuel-w-k-chan"><first>Samuel W. K.</first><last>Chan</last></author>
       <pages>101–116</pages>
       <url hash="86043ff6">O98-3006</url>
       <bibkey>tsou-etal-1998-human</bibkey>
@@ -242,15 +242,15 @@
     </frontmatter>
     <paper id="1">
       <title>Senses and Texts</title>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <pages>1–16</pages>
       <url hash="242f11e1">O98-4001</url>
       <bibkey>wilks-1998-senses</bibkey>
     </paper>
     <paper id="2">
       <title>Information Extraction: Beyond Document Retrieval</title>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <pages>17–60</pages>
       <url hash="67b38683">O98-4002</url>
       <bibkey>gaizauskas-wilks-1998-information</bibkey>
@@ -259,7 +259,7 @@
       <title>An Assessment of Character-based <fixed-case>C</fixed-case>hinese News Filtering Using Latent Semantic Indexing</title>
       <author><first>Shih-Hung</first><last>Wu</last></author>
       <author><first>Pey-Ching</first><last>Yang</last></author>
-      <author><first>Von-Wun</first><last>Soo</last></author>
+      <author id="von-wun-soo"><first>Von-Wun</first><last>Soo</last></author>
       <pages>61–78</pages>
       <url hash="0bf5f0f9">O98-4003</url>
       <bibkey>wu-etal-1998-assessment</bibkey>
@@ -273,7 +273,7 @@
     </paper>
     <paper id="5">
       <title>Statistical Analysis of <fixed-case>M</fixed-case>andarin Acoustic Units and Automatic Extraction of Phonetically Rich Sentences Based Upon a Very Large <fixed-case>C</fixed-case>hinese Text Corpus</title>
-      <author><first>Hsin-min</first><last>Wang</last></author>
+      <author id="hsin-min-wang"><first>Hsin-min</first><last>Wang</last></author>
       <pages>93–114</pages>
       <url hash="56976f29">O98-4005</url>
       <bibkey>wang-1998-statistical</bibkey>
diff --git a/data/xml/O99.xml b/data/xml/O99.xml
index d76b5a162e..5f8661369d 100644
--- a/data/xml/O99.xml
+++ b/data/xml/O99.xml
@@ -35,9 +35,9 @@
     </paper>
     <paper id="3">
       <title>Semantic Classification for Patterns Containing Non-Text Symbols in <fixed-case>M</fixed-case>andarin Text</title>
-      <author><first>Feng-Long</first><last>Hwang</last></author>
-      <author><first>Ming-shing</first><last>Yu</last></author>
-      <author><first>Ming-Jer</first><last>Wu</last></author>
+      <author id="feng-long-huang"><first>Feng-Long</first><last>Hwang</last></author>
+      <author id="ming-shing-yu"><first>Ming-shing</first><last>Yu</last></author>
+      <author id="ming-jer-wu"><first>Ming-Jer</first><last>Wu</last></author>
       <author><first>Shyh-Yang</first><last>Hwang</last></author>
       <pages>55–66</pages>
       <url hash="f08527ba">O99-1003</url>
@@ -45,15 +45,15 @@
     </paper>
     <paper id="4">
       <title>動詞詞構與語法功能互動初探 (An Explorative Study on the Interaction Between Verb Compound Constructions and Syntactic Functions) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Li-Li</first><last>Chang</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="li-li-chang"><first>Li-Li</first><last>Chang</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <pages>67–85</pages>
       <url hash="35fc3219">O99-1004</url>
       <bibkey>chang-chen-1999-dong</bibkey>
     </paper>
     <paper id="5">
       <title>Semantic Representation of Verbal Information – A Case from <fixed-case>M</fixed-case>andarin Verbs of Judging</title>
-      <author><first>Mei-Chun</first><last>Liu</last></author>
+      <author id="mei-chun-liu"><first>Mei-Chun</first><last>Liu</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <author><first>Jia-Ying</first><last>Lee</last></author>
       <pages>87–100</pages>
@@ -62,15 +62,15 @@
     </paper>
     <paper id="6">
       <title>階層式文件自動分類之特徵選取研究 (A Study on Feature Selection in Hierarchical Text Classification) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Su-Jin</first><last>Ker</last></author>
-      <author><first>Jen-Nan</first><last>Chen</last></author>
+      <author id="sue-j-ker"><first>Su-Jin</first><last>Ker</last></author>
+      <author id="jen-nan-chen"><first>Jen-Nan</first><last>Chen</last></author>
       <pages>137–149</pages>
       <url hash="3dd8f405">O99-1006</url>
       <bibkey>ker-chen-1999-jie</bibkey>
     </paper>
     <paper id="7">
       <title>Automatically Controlled-Vocabulary Indexing for Text Retrieval</title>
-      <author><first>Kuang-Hua</first><last>Chen</last></author>
+      <author id="kuang-hua-chen"><first>Kuang-Hua</first><last>Chen</last></author>
       <author><first>Chien-Tin</first><last>Wu</last></author>
       <pages>171–185</pages>
       <url hash="1acc1329">O99-1007</url>
@@ -78,7 +78,7 @@
     </paper>
     <paper id="8">
       <title>A New Syllable-based Approach for Retrieving <fixed-case>M</fixed-case>andarin Spoken Documents Using Short Speech Queries</title>
-      <author><first>Hsin-min</first><last>Wang</last></author>
+      <author id="hsin-min-wang"><first>Hsin-min</first><last>Wang</last></author>
       <pages>187–202</pages>
       <url hash="4afaddf3">O99-1008</url>
       <bibkey>wang-1999-new</bibkey>
@@ -103,7 +103,7 @@
     </paper>
     <paper id="11">
       <title>音框同步之雜訊補償方法在汽車語音辨識之應用 (Frame Synchronous Noise Compensation for Car Speech Recognition) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Jen-Tzung</first><last>Chien</last></author>
+      <author id="jen-tzung-chien"><first>Jen-Tzung</first><last>Chien</last></author>
       <author><first>Ming-Shun</first><last>Lin</last></author>
       <pages>239–251</pages>
       <url hash="ca316ea5">O99-1011</url>
@@ -125,7 +125,7 @@
     </frontmatter>
     <paper id="1">
       <title>An Analytical Study of Transformational Tagging for <fixed-case>C</fixed-case>hinese Text</title>
-      <author><first>Helen</first><last>Meng</last></author>
+      <author id="helen-meng"><first>Helen</first><last>Meng</last></author>
       <author><first>Chun Wah</first><last>Ip</last></author>
       <pages>101-122</pages>
       <url hash="82b6822e">O99-2001</url>
@@ -160,10 +160,10 @@
     </frontmatter>
     <paper id="1">
       <title>Telephony Based Speaker-Independent Continuous <fixed-case>M</fixed-case>andarin Syllable Recognition</title>
-      <author><first>Jia-lin</first><last>Shen</last></author>
-      <author><first>Ying-chieh</first><last>Tu</last></author>
-      <author><first>Po-yu</first><last>Liang</last></author>
-      <author><first>Lin-shan</first><last>Lee</last></author>
+      <author id="jia-lin-shen"><first>Jia-lin</first><last>Shen</last></author>
+      <author id="ying-chieh-tu"><first>Ying-chieh</first><last>Tu</last></author>
+      <author id="po-yu-liang"><first>Po-yu</first><last>Liang</last></author>
+      <author id="lin-shan-lee"><first>Lin-shan</first><last>Lee</last></author>
       <pages>1–24</pages>
       <url hash="89500f3b">O99-3001</url>
       <bibkey>shen-etal-1999-telephony</bibkey>
@@ -207,7 +207,7 @@
     <paper id="1">
       <title>A Model for Word Sense Disambiguation</title>
       <author><first>Juanzi</first><last>Li</last></author>
-      <author><first>Changning</first><last>Huang</last></author>
+      <author id="changning-huang"><first>Changning</first><last>Huang</last></author>
       <pages>1–20</pages>
       <url hash="4292e51a">O99-4001</url>
       <bibkey>li-huang-1999-model</bibkey>
@@ -223,7 +223,7 @@
     </paper>
     <paper id="3">
       <title>基於知網的常識知識標注 (General Knowledge Annotation Based on How-net) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Kok Wee</first><last>Gan</last></author>
+      <author id="kok-wee-gan"><first>Kok Wee</first><last>Gan</last></author>
       <author><first>Wai Mun</first><last>Tham</last></author>
       <pages>39–86</pages>
       <url hash="5f34edef">O99-4003</url>
@@ -231,9 +231,9 @@
     </paper>
     <paper id="4">
       <title>中文句結構樹資料庫的構建 (<fixed-case>S</fixed-case>inica <fixed-case>T</fixed-case>reebank) [In <fixed-case>C</fixed-case>hinese]</title>
-      <author><first>Feng-Yi</first><last>Chen</last></author>
+      <author id="feng-yi-chen"><first>Feng-Yi</first><last>Chen</last></author>
       <author><first>Pi-Fang</first><last>Tsai</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <author><first>Chu-Ren</first><last>Hunag</last></author>
       <pages>87–104</pages>
       <url hash="32e2ac06">O99-4004</url>
diff --git a/data/xml/P00.xml b/data/xml/P00.xml
index a10ec20966..55b21a9aa6 100644
--- a/data/xml/P00.xml
+++ b/data/xml/P00.xml
@@ -14,7 +14,7 @@
     </frontmatter>
     <paper id="1">
       <title>Invited Talk: Processes that Shape Conversation and their Implications for Computational Linguistics</title>
-      <author><first>Susan E.</first><last>Brennan</last></author>
+      <author id="susan-e-brennan"><first>Susan E.</first><last>Brennan</last></author>
       <doi>10.3115/1075218.1075219</doi>
       <pages>1–11</pages>
       <url hash="0df26803">P00-1001</url>
@@ -22,7 +22,7 @@
     </paper>
     <paper id="2">
       <title>Invited Talk: Generic <fixed-case>NLP</fixed-case> Technologies: Language, Knowledge and Information Extraction</title>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <doi>10.3115/1075218.1075220</doi>
       <pages>12–22</pages>
       <url hash="3203670b">P00-1002</url>
@@ -30,7 +30,7 @@
     </paper>
     <paper id="3">
       <title>Invited Talk: Spoken Language Technology: Where Do We Go From Here?</title>
-      <author><first>Roger K.</first><last>Moore</last></author>
+      <author id="roger-k-moore"><first>Roger K.</first><last>Moore</last></author>
       <doi>10.3115/1075218.1075221</doi>
       <pages>22–22</pages>
       <url hash="4aad6277">P00-1003</url>
@@ -38,8 +38,8 @@
     </paper>
     <paper id="4">
       <title>Translation with Cascaded Finite State Transducers</title>
-      <author><first>Stephan</first><last>Vogel</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <doi>10.3115/1075218.1075222</doi>
       <pages>23–30</pages>
       <url hash="2efe0c21">P00-1004</url>
@@ -47,8 +47,8 @@
     </paper>
     <paper id="5">
       <title>Phrase-Pattern-based <fixed-case>K</fixed-case>orean to <fixed-case>E</fixed-case>nglish Machine Translation using Two Level Translation Pattern Selection</title>
-      <author><first>Jung-jae</first><last>Kim</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="jung-jae-kim"><first>Jung-jae</first><last>Kim</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <author><first>Young-Soog</first><last>Chae</last></author>
       <doi>10.3115/1075218.1075223</doi>
       <pages>31–36</pages>
@@ -99,7 +99,7 @@
     </paper>
     <paper id="11">
       <title>Tagging Unknown Proper Names Using Decision Trees</title>
-      <author><first>Frédéric</first><last>Béchet</last></author>
+      <author id="frederic-bechet"><first>Frédéric</first><last>Béchet</last></author>
       <author><first>Alexis</first><last>Nasr</last></author>
       <author><first>Franck</first><last>Genet</last></author>
       <doi>10.3115/1075218.1075229</doi>
@@ -109,7 +109,7 @@
     </paper>
     <paper id="12">
       <title>The Order of Prenominal Adjectives in Natural Language Generation</title>
-      <author><first>Robert</first><last>Malouf</last></author>
+      <author id="robert-malouf"><first>Robert</first><last>Malouf</last></author>
       <doi>10.3115/1075218.1075230</doi>
       <pages>85–92</pages>
       <url hash="55a60d75">P00-1012</url>
@@ -137,7 +137,7 @@
     <paper id="15">
       <title>A Unified Statistical Model for the Identification of <fixed-case>E</fixed-case>nglish <fixed-case>B</fixed-case>ase<fixed-case>NP</fixed-case></title>
       <author><first>Endong</first><last>Xun</last></author>
-      <author><first>Changning</first><last>Huang</last></author>
+      <author id="changning-huang"><first>Changning</first><last>Huang</last></author>
       <author><first>Ming</first><last>Zhou</last></author>
       <doi>10.3115/1075218.1075233</doi>
       <pages>109–116</pages>
@@ -155,7 +155,7 @@
     </paper>
     <paper id="17">
       <title>Using Existing Systems to Supplement Small Amounts of Annotated Grammatical Relations Training Data</title>
-      <author><first>Alexander</first><last>Yeh</last></author>
+      <author id="alexander-yeh"><first>Alexander</first><last>Yeh</last></author>
       <doi>10.3115/1075218.1075235</doi>
       <pages>126–132</pages>
       <url hash="e1ce2172">P00-1017</url>
@@ -171,7 +171,7 @@
     </paper>
     <paper id="19">
       <title>Can Nominal Expressions Achieve Multiple Goals?: An Empirical Study</title>
-      <author><first>Pamela</first><last>Jordan</last></author>
+      <author id="pamela-jordan"><first>Pamela</first><last>Jordan</last></author>
       <doi>10.3115/1075218.1075237</doi>
       <pages>142–149</pages>
       <url hash="b9fc4ab0">P00-1019</url>
@@ -180,7 +180,7 @@
     <paper id="20">
       <title>An Empirical Study of the Influence of Argument Conciseness on Argument Effectiveness</title>
       <author><first>Giuseppe</first><last>Carenini</last></author>
-      <author><first>Johanna D.</first><last>Moore</last></author>
+      <author id="johanna-d-moore"><first>Johanna D.</first><last>Moore</last></author>
       <doi>10.3115/1075218.1075238</doi>
       <pages>150–157</pages>
       <url hash="da65a2ae">P00-1020</url>
@@ -188,7 +188,7 @@
     </paper>
     <paper id="21">
       <title>Multi-Agent Explanation Strategies in Real-Time Domains</title>
-      <author><first>Kumiko</first><last>Tanaka-Ishii</last></author>
+      <author id="kumiko-tanaka-ishii"><first>Kumiko</first><last>Tanaka-Ishii</last></author>
       <author><first>Ian</first><last>Frank</last></author>
       <doi>10.3115/1075218.1075239</doi>
       <pages>158–165</pages>
@@ -197,8 +197,8 @@
     </paper>
     <paper id="22">
       <title>A Computational Approach to Zero-pronouns in <fixed-case>S</fixed-case>panish</title>
-      <author><first>Antonio</first><last>Ferrández</last></author>
-      <author><first>Jesús</first><last>Peral</last></author>
+      <author id="antonio-ferrandez"><first>Antonio</first><last>Ferrández</last></author>
+      <author id="jesus-peral"><first>Jesús</first><last>Peral</last></author>
       <doi>10.3115/1075218.1075240</doi>
       <pages>166–172</pages>
       <url hash="45e13544">P00-1022</url>
@@ -206,7 +206,7 @@
     </paper>
     <paper id="23">
       <title>Coreference for <fixed-case>NLP</fixed-case> Applications</title>
-      <author><first>Thomas S.</first><last>Morton</last></author>
+      <author id="thomas-s-morton"><first>Thomas S.</first><last>Morton</last></author>
       <doi>10.3115/1075218.1075241</doi>
       <pages>173–180</pages>
       <url hash="f9a7614b">P00-1023</url>
@@ -214,8 +214,8 @@
     </paper>
     <paper id="24">
       <title>Learning Attribute Selections for Non-Pronominal Expressions</title>
-      <author><first>Pamela</first><last>Jordan</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="pamela-jordan"><first>Pamela</first><last>Jordan</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <doi>10.3115/1075218.1075242</doi>
       <pages>181–190</pages>
       <url hash="1a367f87">P00-1024</url>
@@ -232,7 +232,7 @@
     </paper>
     <paper id="26">
       <title>A Morphologically Sensitive Clustering Algorithm for Identifying <fixed-case>A</fixed-case>rabic Roots</title>
-      <author><first>Anne N.</first><last>De Roeck</last></author>
+      <author id="anne-de-roeck"><first>Anne N.</first><last>De Roeck</last></author>
       <author><first>Waleed</first><last>Al-Fares</last></author>
       <doi>10.3115/1075218.1075244</doi>
       <pages>199–206</pages>
@@ -259,7 +259,7 @@
     <paper id="29">
       <title>Inducing Probabilistic Syllable Classes Using Multivariate Clustering</title>
       <author><first>Karin</first><last>Müller</last></author>
-      <author><first>Bernd</first><last>Möbius</last></author>
+      <author id="bernd-mobius"><first>Bernd</first><last>Möbius</last></author>
       <author><first>Detlef</first><last>Prescher</last></author>
       <doi>10.3115/1075218.1075247</doi>
       <pages>225–232</pages>
@@ -269,7 +269,7 @@
     <paper id="30">
       <title>Modeling Local Context for Pitch Accent Prediction</title>
       <author><first>Shimei</first><last>Pan</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
       <doi>10.3115/1075218.1075248</doi>
       <pages>233–240</pages>
       <url hash="af61410f">P00-1030</url>
@@ -278,7 +278,7 @@
     <paper id="31">
       <title>A New Statistical Approach To <fixed-case>C</fixed-case>hinese <fixed-case>P</fixed-case>inyin Input</title>
       <author><first>Zheng</first><last>Chen</last></author>
-      <author><first>Kai-Fu</first><last>Lee</last></author>
+      <author id="kai-fu-lee"><first>Kai-Fu</first><last>Lee</last></author>
       <doi>10.3115/1075218.1075249</doi>
       <pages>241–247</pages>
       <url hash="2270a68f">P00-1031</url>
@@ -288,7 +288,7 @@
       <title>Automatic Detecting/Correcting Errors in <fixed-case>C</fixed-case>hinese Text by an Approximate Word-Matching Algorithm</title>
       <author><first>Lei</first><last>Zhang</last></author>
       <author><first>Ming</first><last>Zhou</last></author>
-      <author><first>Changning</first><last>Huang</last></author>
+      <author id="changning-huang"><first>Changning</first><last>Huang</last></author>
       <author><first>Haihua</first><last>Pan</last></author>
       <doi>10.3115/1075218.1075250</doi>
       <pages>248–254</pages>
@@ -297,8 +297,8 @@
     </paper>
     <paper id="33">
       <title>Dependency-based Syntactic Analysis of <fixed-case>C</fixed-case>hinese and Annotation of Parsed Corpus</title>
-      <author><first>Tom B.Y.</first><last>Lai</last></author>
-      <author><first>Changning</first><last>Huang</last></author>
+      <author id="tom-b-y-lai"><first>Tom B.Y.</first><last>Lai</last></author>
+      <author id="changning-huang"><first>Changning</first><last>Huang</last></author>
       <doi>10.3115/1075218.1075251</doi>
       <pages>255–262</pages>
       <url hash="21a5b2f1">P00-1033</url>
@@ -307,8 +307,8 @@
     <paper id="34">
       <title>Part-of-Speech Tagging Based on Hidden <fixed-case>M</fixed-case>arkov Model Assuming Joint Independence</title>
       <author><first>Sang-Zoo</first><last>Lee</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
-      <author><first>Hae-Chang</first><last>Rim</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="hae-chang-rim"><first>Hae-Chang</first><last>Rim</last></author>
       <doi>10.3115/1075218.1075252</doi>
       <pages>263–269</pages>
       <url hash="30023f15">P00-1034</url>
@@ -316,7 +316,7 @@
     </paper>
     <paper id="35">
       <title>Language Independent, Minimally Supervised Induction of Lexical Probabilities</title>
-      <author><first>Silviu</first><last>Cucerzan</last></author>
+      <author id="silviu-cucerzan"><first>Silviu</first><last>Cucerzan</last></author>
       <author><first>David</first><last>Yarowsky</last></author>
       <doi>10.3115/1075218.1075253</doi>
       <pages>270–277</pages>
@@ -334,7 +334,7 @@
     <paper id="37">
       <title>An Improved Error Model for Noisy Channel Spelling Correction</title>
       <author><first>Eric</first><last>Brill</last></author>
-      <author><first>Robert C.</first><last>Moore</last></author>
+      <author id="robert-c-moore"><first>Robert C.</first><last>Moore</last></author>
       <doi>10.3115/1075218.1075255</doi>
       <pages>286–293</pages>
       <url hash="43b984c0">P00-1037</url>
@@ -342,8 +342,8 @@
     </paper>
     <paper id="38">
       <title>Query-Relevant Summarization using <fixed-case>FAQ</fixed-case>s</title>
-      <author><first>Adam</first><last>Berger</last></author>
-      <author><first>Vibhu O.</first><last>Mittal</last></author>
+      <author id="adam-berger"><first>Adam</first><last>Berger</last></author>
+      <author id="vibhu-o-mittal"><first>Vibhu O.</first><last>Mittal</last></author>
       <doi>10.3115/1075218.1075256</doi>
       <pages>294–301</pages>
       <url hash="43dda2ec">P00-1038</url>
@@ -371,8 +371,8 @@
     <paper id="41">
       <title>Headline Generation Based on Statistical Translation</title>
       <author><first>Michele</first><last>Banko</last></author>
-      <author><first>Vibhu O.</first><last>Mittal</last></author>
-      <author><first>Michael J.</first><last>Witbrock</last></author>
+      <author id="vibhu-o-mittal"><first>Vibhu O.</first><last>Mittal</last></author>
+      <author id="michael-j-witbrock"><first>Michael J.</first><last>Witbrock</last></author>
       <doi>10.3115/1075218.1075259</doi>
       <pages>318–325</pages>
       <url hash="496f715a">P00-1041</url>
@@ -404,7 +404,7 @@
       <title>Difficulty Indices for the Named Entity Task in <fixed-case>J</fixed-case>apanese</title>
       <author><first>Chikashi</first><last>Nobata</last></author>
       <author><first>Satoshi</first><last>Sekine</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <doi>10.3115/1075218.1075262</doi>
       <pages>344–351</pages>
       <url hash="3dcc1136">P00-1044</url>
@@ -412,7 +412,7 @@
     </paper>
     <paper id="45">
       <title>Memory-Efficient and Thread-Safe Quasi-Destructive Graph Unification</title>
-      <author><first>Marcel P.</first><last>van Lohuizen</last></author>
+      <author id="marcel-p-van-lohuizen"><first>Marcel P.</first><last>van Lohuizen</last></author>
       <doi>10.3115/1075218.1075263</doi>
       <pages>352–359</pages>
       <url hash="639c0891">P00-1045</url>
@@ -439,8 +439,8 @@
     <paper id="48">
       <title>Hidden <fixed-case>M</fixed-case>arkov Model-Based <fixed-case>K</fixed-case>orean Part-of-Speech Tagging Considering High Agglutinativity, Word-Spacing, and Lexical Correlativity</title>
       <author><first>Sang-Zoo</first><last>Lee</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
-      <author><first>Hae-Chang</first><last>Rim</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="hae-chang-rim"><first>Hae-Chang</first><last>Rim</last></author>
       <doi>10.3115/1075218.1075266</doi>
       <pages>384–391</pages>
       <url hash="7bc91217">P00-1048</url>
@@ -457,7 +457,7 @@
     <paper id="50">
       <title><fixed-case>C</fixed-case>hinese-<fixed-case>K</fixed-case>orean Word Alignment Based on Linguistic Comparison</title>
       <author><first>Jin-Xia</first><last>Huang</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <doi>10.3115/1075218.1075268</doi>
       <pages>392–399</pages>
       <url hash="010423ce">P00-1050</url>
@@ -487,7 +487,7 @@
     </paper>
     <paper id="53">
       <title>A Hierarchical Account of Referential Accessibility</title>
-      <author><first>Nancy</first><last>Ide</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
       <author><first>Dan</first><last>Cristea</last></author>
       <doi>10.3115/1075218.1075271</doi>
       <pages>416–424</pages>
@@ -496,7 +496,7 @@
     </paper>
     <paper id="54">
       <title>Lexical Transfer Using a Vector-Space Model</title>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <doi>10.3115/1075218.1075272</doi>
       <pages>425–431</pages>
       <url hash="2c542dc0">P00-1054</url>
@@ -505,7 +505,7 @@
     <paper id="55">
       <title>Using Confidence Bands for Parallel Texts Alignment</title>
       <author><first>António</first><last>Ribeiro</last></author>
-      <author><first>Gabriel</first><last>Lopes</last></author>
+      <author id="gabriel-lopes"><first>Gabriel</first><last>Lopes</last></author>
       <author><first>João</first><last>Mexia</last></author>
       <doi>10.3115/1075218.1075273</doi>
       <pages>432–439</pages>
@@ -514,8 +514,8 @@
     </paper>
     <paper id="56">
       <title>Improved Statistical Alignment Models</title>
-      <author><first>Franz Josef</first><last>Och</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="franz-josef-och"><first>Franz Josef</first><last>Och</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <doi>10.3115/1075218.1075274</doi>
       <pages>440–447</pages>
       <url hash="2f3000d2">P00-1056</url>
@@ -541,8 +541,8 @@
     </paper>
     <paper id="59">
       <title>Corpus-Based Lexical Choice in Natural Language Generation</title>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <doi>10.3115/1075218.1075277</doi>
       <pages>464–471</pages>
       <url hash="61f9c472">P00-1059</url>
@@ -580,9 +580,9 @@
     </paper>
     <paper id="63">
       <title>Term Recognition Using Technical Dictionary Hierarchy</title>
-      <author><first>Jong-Hoon</first><last>Oh</last></author>
-      <author><first>KyungSoon</first><last>Lee</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="jong-hoon-oh"><first>Jong-Hoon</first><last>Oh</last></author>
+      <author id="kyung-soon-lee"><first>KyungSoon</first><last>Lee</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <doi>10.3115/1075218.1075281</doi>
       <pages>496–503</pages>
       <url hash="8dbbee21">P00-1063</url>
@@ -601,7 +601,7 @@
     <paper id="65">
       <title>Automatic Labeling of Semantic Roles</title>
       <author><first>Daniel</first><last>Gildea</last></author>
-      <author><first>Daniel</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Daniel</first><last>Jurafsky</last></author>
       <doi>10.3115/1075218.1075283</doi>
       <pages>512–520</pages>
       <url hash="467b5332">P00-1065</url>
@@ -623,7 +623,7 @@
       <author><first>Ming</first><last>Zhou</last></author>
       <author><first>Jianfeng</first><last>Gao</last></author>
       <author><first>Endong</first><last>Xun</last></author>
-      <author><first>Changning</first><last>Huang</last></author>
+      <author id="changning-huang"><first>Changning</first><last>Huang</last></author>
       <doi>10.3115/1075218.1075285</doi>
       <pages>529–536</pages>
       <url hash="efee7311">P00-1067</url>
@@ -631,7 +631,7 @@
     </paper>
     <paper id="68">
       <title>Diagnostic Processing of <fixed-case>J</fixed-case>apanese for Computer-Assisted Second Language Learning</title>
-      <author><first>Jun’ichi</first><last>Kakegawa</last></author>
+      <author id="junichi-kakegawa"><first>Jun’ichi</first><last>Kakegawa</last></author>
       <author><first>Hisayuki</first><last>Kanda</last></author>
       <author><first>Eitaro</first><last>Fujioka</last></author>
       <author><first>Makoto</first><last>Itami</last></author>
@@ -645,7 +645,7 @@
       <title>Word Sense Disambiguation by Learning from Unlabeled Data</title>
       <author><first>Seong-Bae</first><last>Park</last></author>
       <author><first>Byoung-Tak</first><last>Zhang</last></author>
-      <author><first>Yung Taek</first><last>Kim</last></author>
+      <author id="yung-taek-kim"><first>Yung Taek</first><last>Kim</last></author>
       <doi>10.3115/1075218.1075287</doi>
       <pages>547–554</pages>
       <url hash="0cb0e603">P00-1069</url>
@@ -653,8 +653,8 @@
     </paper>
     <paper id="70">
       <title>Importance of Pronominal Anaphora Resolution in Question Answering Systems</title>
-      <author><first>José L.</first><last>Vicedo</last></author>
-      <author><first>Antonio</first><last>Ferrández</last></author>
+      <author id="jose-luis-vicedo"><first>José L.</first><last>Vicedo</last></author>
+      <author id="antonio-ferrandez"><first>Antonio</first><last>Ferrández</last></author>
       <doi>10.3115/1075218.1075288</doi>
       <pages>555–562</pages>
       <url hash="6d426d79">P00-1070</url>
@@ -662,11 +662,11 @@
     </paper>
     <paper id="71">
       <title>The Structure and Performance of an Open-Domain Question Answering System</title>
-      <author><first>Dan</first><last>Moldovan</last></author>
-      <author><first>Sanda</first><last>Harabagiu</last></author>
-      <author><first>Marius</first><last>Pasca</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
-      <author><first>Roxana</first><last>Girju</last></author>
+      <author id="dan-moldovan"><first>Dan</first><last>Moldovan</last></author>
+      <author id="sanda-harabagiu"><first>Sanda</first><last>Harabagiu</last></author>
+      <author id="marius-pasca"><first>Marius</first><last>Pasca</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
+      <author id="roxana-girju"><first>Roxana</first><last>Girju</last></author>
       <author><first>Richard</first><last>Goodrum</last></author>
       <author><first>Vasile</first><last>Rus</last></author>
       <doi>10.3115/1075218.1075289</doi>
@@ -677,7 +677,7 @@
     <paper id="72">
       <title>Dimension-Reduced Estimation of Word Co-occurrence Probability</title>
       <author><first>Kilyoun</first><last>Kim</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <doi>10.3115/1075218.1075290</doi>
       <pages>571–578</pages>
       <url hash="95c5c362">P00-1072</url>
@@ -686,7 +686,7 @@
     <paper id="73">
       <title>Distribution-Based Pruning of Backoff Language Models</title>
       <author><first>Jianfeng</first><last>Gao</last></author>
-      <author><first>Kai-Fu</first><last>Lee</last></author>
+      <author id="kai-fu-lee"><first>Kai-Fu</first><last>Lee</last></author>
       <doi>10.3115/1075218.1075291</doi>
       <pages>579–588</pages>
       <url hash="125d9745">P00-1073</url>
@@ -694,7 +694,7 @@
     </paper>
     <paper id="74">
       <title>Panel: Computational Linguistics Research on <fixed-case>P</fixed-case>hilippine Languages</title>
-      <author><first>Rachel Edita O.</first><last>Roxas</last></author>
+      <author id="rachel-edita-roxas"><first>Rachel Edita O.</first><last>Roxas</last></author>
       <author><first>Allan</first><last>Borra</last></author>
       <doi>10.3115/1075218.1075292</doi>
       <pages>1–2</pages>
@@ -719,7 +719,7 @@
     </paper>
     <paper id="77">
       <title>Panel: Computational Linguistics in <fixed-case>I</fixed-case>ndia: An Overview</title>
-      <author><first>Akshar</first><last>Bharati</last></author>
+      <author id="akshar-bharati"><first>Akshar</first><last>Bharati</last></author>
       <author><first>Vineet</first><last>Chaitanya</last></author>
       <author><first>Rajeev</first><last>Sangal</last></author>
       <doi>10.3115/1075218.1075295</doi>
@@ -740,7 +740,7 @@
     </paper>
     <paper id="79">
       <title>Panel: Computational Linguistics in <fixed-case>M</fixed-case>alaysia</title>
-      <author><first>Zaharin</first><last>Yusoff</last></author>
+      <author id="zaharin-yusoff"><first>Zaharin</first><last>Yusoff</last></author>
       <doi>10.3115/1075218.1075297</doi>
       <pages>1–2</pages>
       <url hash="fbbb627f">P00-1079</url>
diff --git a/data/xml/P01.xml b/data/xml/P01.xml
index c4be77b3e9..cda1f07cfd 100644
--- a/data/xml/P01.xml
+++ b/data/xml/P01.xml
@@ -23,10 +23,10 @@
     <paper id="2">
       <title>Invited Talk: Processing Broadcast Audio for Information Access</title>
       <author><first>Jean-Luc</first><last>Gauvain</last></author>
-      <author><first>Lori</first><last>Lamel</last></author>
-      <author><first>Gilles</first><last>Adda</last></author>
-      <author><first>Martine</first><last>Adda-Decker</last></author>
-      <author><first>Claude</first><last>Barras</last></author>
+      <author id="lori-lamel"><first>Lori</first><last>Lamel</last></author>
+      <author id="gilles-adda"><first>Gilles</first><last>Adda</last></author>
+      <author id="martine-adda-decker"><first>Martine</first><last>Adda-Decker</last></author>
+      <author id="claude-barras"><first>Claude</first><last>Barras</last></author>
       <author><first>Langzhou</first><last>Chen</last></author>
       <author><first>Yannick</first><last>de Kercadio</last></author>
       <doi>10.3115/1073012.1073014</doi>
@@ -36,8 +36,8 @@
     </paper>
     <paper id="3">
       <title>Improvement of a Whole Sentence Maximum Entropy Language Model Using Grammatical Features</title>
-      <author><first>Fredy A.</first><last>Amaya</last></author>
-      <author><first>José Miguel</first><last>Benedí</last></author>
+      <author id="fredy-a-amaya"><first>Fredy A.</first><last>Amaya</last></author>
+      <author id="jose-miguel-benedi"><first>José Miguel</first><last>Benedí</last></author>
       <doi>10.3115/1073012.1073015</doi>
       <pages>10–17</pages>
       <url hash="7add2395">P01-1003</url>
@@ -45,7 +45,7 @@
     </paper>
     <paper id="4">
       <title>Low-cost, High-performance Translation Retrieval: Dumber is Better</title>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <doi>10.3115/1073012.1073016</doi>
       <pages>18–25</pages>
       <url hash="50676750">P01-1004</url>
@@ -62,8 +62,8 @@
     </paper>
     <paper id="6">
       <title>Evaluation Tool for Rule-based Anaphora Resolution Methods</title>
-      <author><first>Catalina</first><last>Barbu</last></author>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="catalina-barbu"><first>Catalina</first><last>Barbu</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <doi>10.3115/1073012.1073018</doi>
       <pages>34–41</pages>
       <url hash="8a486668">P01-1006</url>
@@ -71,10 +71,10 @@
     </paper>
     <paper id="7">
       <title>Guided Parsing of Range Concatenation Languages</title>
-      <author><first>François</first><last>Barthélemy</last></author>
+      <author id="francois-barthelemy"><first>François</first><last>Barthélemy</last></author>
       <author><first>Pierre</first><last>Boullier</last></author>
       <author><first>Philippe</first><last>Deschamp</last></author>
-      <author><first>Éric</first><last>Villemonte de la Clergerie</last></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Éric</first><last>Villemonte de la Clergerie</last></author>
       <doi>10.3115/1073012.1073019</doi>
       <pages>42–49</pages>
       <url hash="7d15f0d6">P01-1007</url>
@@ -83,7 +83,7 @@
     <paper id="8">
       <title>Extracting Paraphrases from a Parallel Corpus</title>
       <author><first>Regina</first><last>Barzilay</last></author>
-      <author><first>Kathleen R.</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen R.</first><last>McKeown</last></author>
       <doi>10.3115/1073012.1073020</doi>
       <pages>50–57</pages>
       <url hash="ee9b16a6">P01-1008</url>
@@ -118,8 +118,8 @@
     </paper>
     <paper id="12">
       <title>Detecting Problematic Turns in Human-Machine Interactions: Rule-induction Versus Memory-based Learning Approaches</title>
-      <author><first>Antal</first><last>van den Bosch</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <author><first>Marc</first><last>Swerts</last></author>
       <doi>10.3115/1073012.1073024</doi>
       <pages>82–89</pages>
@@ -140,7 +140,7 @@
       <author><first>Jill</first><last>Burstein</last></author>
       <author><first>Daniel</first><last>Marcu</last></author>
       <author><first>Slava</first><last>Andreyev</last></author>
-      <author><first>Martin</first><last>Chodorow</last></author>
+      <author id="martin-chodorow"><first>Martin</first><last>Chodorow</last></author>
       <doi>10.3115/1073012.1073026</doi>
       <pages>98–105</pages>
       <url hash="74422390">P01-1014</url>
@@ -148,13 +148,13 @@
     </paper>
     <paper id="15">
       <title>From <fixed-case>RAGS</fixed-case> to <fixed-case>RICHES</fixed-case>: Exploiting the Potential of a Flexible Generation Architecture</title>
-      <author><first>Lynne</first><last>Cahill</last></author>
-      <author><first>John</first><last>Carroll</last></author>
-      <author><first>Roger</first><last>Evans</last></author>
-      <author><first>Daniel</first><last>Paiva</last></author>
+      <author id="lynne-cahill"><first>Lynne</first><last>Cahill</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
+      <author id="roger-evans"><first>Roger</first><last>Evans</last></author>
+      <author id="daniel-paiva"><first>Daniel</first><last>Paiva</last></author>
       <author><first>Richard</first><last>Power</last></author>
-      <author><first>Donia</first><last>Scott</last></author>
-      <author><first>Kees</first><last>van Deemter</last></author>
+      <author id="donia-scott"><first>Donia</first><last>Scott</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
       <doi>10.3115/1073012.1073027</doi>
       <pages>106–113</pages>
       <url hash="28b84ce0">P01-1015</url>
@@ -163,9 +163,9 @@
     <paper id="16">
       <title>Non-Verbal Cues for Discourse Structure</title>
       <author><first>Justine</first><last>Cassell</last></author>
-      <author><first>Yukiko</first><last>Nakano</last></author>
-      <author><first>Timothy W.</first><last>Bickmore</last></author>
-      <author><first>Candace L.</first><last>Sidner</last></author>
+      <author id="yukiko-i-nakano"><first>Yukiko</first><last>Nakano</last></author>
+      <author id="timothy-w-bickmore"><first>Timothy W.</first><last>Bickmore</last></author>
+      <author id="candace-l-sidner"><first>Candace L.</first><last>Sidner</last></author>
       <author><first>Charles</first><last>Rich</last></author>
       <doi>10.3115/1073012.1073028</doi>
       <pages>114–123</pages>
@@ -193,7 +193,7 @@
       <title>An Algebra for Semantic Construction in Constraint-based Grammars</title>
       <author><first>Ann</first><last>Copestake</last></author>
       <author><first>Alex</first><last>Lascarides</last></author>
-      <author><first>Dan</first><last>Flickinger</last></author>
+      <author id="dan-flickinger"><first>Dan</first><last>Flickinger</last></author>
       <doi>10.3115/1073012.1073031</doi>
       <pages>140–147</pages>
       <url hash="5d6598b0">P01-1019</url>
@@ -201,7 +201,7 @@
     </paper>
     <paper id="20">
       <title>A Machine Learning Approach to the Automatic Evaluation of Machine Translation</title>
-      <author><first>Simon</first><last>Corston-Oliver</last></author>
+      <author id="simon-corston-oliver"><first>Simon</first><last>Corston-Oliver</last></author>
       <author><first>Michael</first><last>Gamon</last></author>
       <author><first>Chris</first><last>Brockett</last></author>
       <doi>10.3115/1073012.1073032</doi>
@@ -219,10 +219,10 @@
     </paper>
     <paper id="22">
       <title>Practical Issues in Compiling Typed Unification Grammars for Speech Recognition</title>
-      <author><first>John</first><last>Dowding</last></author>
-      <author><first>Beth Ann</first><last>Hockey</last></author>
-      <author><first>Jean Mark</first><last>Gawron</last></author>
-      <author><first>Christopher</first><last>Culy</last></author>
+      <author id="john-dowding"><first>John</first><last>Dowding</last></author>
+      <author id="beth-ann-hockey"><first>Beth Ann</first><last>Hockey</last></author>
+      <author id="jean-mark-gawron"><first>Jean Mark</first><last>Gawron</last></author>
+      <author id="chris-culy"><first>Christopher</first><last>Culy</last></author>
       <doi>10.3115/1073012.1073034</doi>
       <pages>164–171</pages>
       <url hash="40390649">P01-1022</url>
@@ -230,8 +230,8 @@
     </paper>
     <paper id="23">
       <title>Empirically Estimating Order Constraints for Content Planning in Generation</title>
-      <author><first>Pablo A.</first><last>Duboue</last></author>
-      <author><first>Kathleen R.</first><last>McKeown</last></author>
+      <author id="pablo-duboue"><first>Pablo A.</first><last>Duboue</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen R.</first><last>McKeown</last></author>
       <doi>10.3115/1073012.1073035</doi>
       <pages>172–179</pages>
       <url hash="f7c94117">P01-1023</url>
@@ -266,10 +266,10 @@
     </paper>
     <paper id="27">
       <title>Refined Lexicon Models for Statistical Machine Translation using a Maximum Entropy Approach</title>
-      <author><first>Ismael</first><last>García-Varea</last></author>
-      <author><first>Franz J.</first><last>Och</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="ismael-garcia-varea"><first>Ismael</first><last>García-Varea</last></author>
+      <author id="franz-josef-och"><first>Franz J.</first><last>Och</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <doi>10.3115/1073012.1073039</doi>
       <pages>204–211</pages>
       <url hash="101c5155">P01-1027</url>
@@ -296,7 +296,7 @@
     <paper id="30">
       <title>Fast Decoding and Optimal Decoding for Machine Translation</title>
       <author><first>Ulrich</first><last>Germann</last></author>
-      <author><first>Michael</first><last>Jahr</last></author>
+      <author id="michael-e-jahr"><first>Michael</first><last>Jahr</last></author>
       <author><first>Kevin</first><last>Knight</last></author>
       <author><first>Daniel</first><last>Marcu</last></author>
       <author><first>Kenji</first><last>Yamada</last></author>
@@ -319,7 +319,7 @@
       <title>Mapping Lexical Entries in a Verbs Database to <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Senses</title>
       <author><first>Rebecca</first><last>Green</last></author>
       <author><first>Lisa</first><last>Pearl</last></author>
-      <author><first>Bonnie J.</first><last>Dorr</last></author>
+      <author id="bonnie-dorr"><first>Bonnie J.</first><last>Dorr</last></author>
       <author><first>Philip</first><last>Resnik</last></author>
       <doi>10.3115/1073012.1073044</doi>
       <pages>244–251</pages>
@@ -345,11 +345,11 @@
     </paper>
     <paper id="35">
       <title>Serial Combination of Rules and Statistics: A Case Study in <fixed-case>C</fixed-case>zech Tagging</title>
-      <author><first>Jan</first><last>Hajic</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajic</last></author>
       <author><first>Pavel</first><last>Krbec</last></author>
-      <author><first>Pavel</first><last>Kveton</last></author>
-      <author><first>Karel</first><last>Oliva</last></author>
-      <author><first>Vladimir</first><last>Petkevic</last></author>
+      <author id="pavel-kveton"><first>Pavel</first><last>Kveton</last></author>
+      <author id="karel-oliva"><first>Karel</first><last>Oliva</last></author>
+      <author id="vladimir-petkevic"><first>Vladimir</first><last>Petkevic</last></author>
       <doi>10.3115/1073012.1073047</doi>
       <pages>268–275</pages>
       <url hash="51aad76d">P01-1035</url>
@@ -357,8 +357,8 @@
     </paper>
     <paper id="36">
       <title>Topic-focus and Salience</title>
-      <author><first>Eva</first><last>Hajicová</last></author>
-      <author><first>Petr</first><last>Sgall</last></author>
+      <author id="eva-hajicova"><first>Eva</first><last>Hajicová</last></author>
+      <author id="petr-sgall"><first>Petr</first><last>Sgall</last></author>
       <doi>10.3115/1073012.1073048</doi>
       <pages>276–281</pages>
       <url hash="81ea7baa">P01-1036</url>
@@ -366,15 +366,15 @@
     </paper>
     <paper id="37">
       <title>The Role of Lexico-Semantic Feedback in Open-Domain Textual Question-Answering</title>
-      <author><first>Sanda</first><last>Harabagiu</last></author>
-      <author><first>Dan</first><last>Moldovan</last></author>
-      <author><first>Marius</first><last>Pasca</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="sanda-harabagiu"><first>Sanda</first><last>Harabagiu</last></author>
+      <author id="dan-moldovan"><first>Dan</first><last>Moldovan</last></author>
+      <author id="marius-pasca"><first>Marius</first><last>Pasca</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <author><first>Mihai</first><last>Surdeanu</last></author>
       <author><first>Razvan</first><last>Bunsecu</last></author>
-      <author><first>Roxana</first><last>Girju</last></author>
+      <author id="roxana-girju"><first>Roxana</first><last>Girju</last></author>
       <author><first>Vasile</first><last>Rus</last></author>
-      <author><first>Paul</first><last>Morarescu</last></author>
+      <author id="paul-morarescu"><first>Paul</first><last>Morarescu</last></author>
       <doi>10.3115/1073012.1073049</doi>
       <pages>282–289</pages>
       <url hash="55809a3c">P01-1037</url>
@@ -383,7 +383,7 @@
     <paper id="38">
       <title>Generation of <fixed-case>VP</fixed-case> Ellipsis: A Corpus-Based Approach</title>
       <author><first>Daniel</first><last>Hardt</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <doi>10.3115/1073012.1073050</doi>
       <pages>290–297</pages>
       <url hash="75353e75">P01-1038</url>
@@ -391,8 +391,8 @@
     </paper>
     <paper id="39">
       <title>Information Extraction from Voicemail</title>
-      <author id="jing-huang"><first>Jing</first><last>Huang</last></author>
-      <author><first>Geoffrey</first><last>Zweig</last></author>
+      <author><first>Jing</first><last>Huang</last></author>
+      <author id="geoffrey-zweig"><first>Geoffrey</first><last>Zweig</last></author>
       <author><first>Mukund</first><last>Padmanabhan</last></author>
       <doi>10.3115/1073012.1073051</doi>
       <pages>298–305</pages>
@@ -401,8 +401,8 @@
     </paper>
     <paper id="40">
       <title>A Common Framework for Syntactic Annotation</title>
-      <author><first>Nancy</first><last>Ide</last></author>
-      <author><first>Laurent</first><last>Romary</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
       <doi>10.3115/1073012.1073052</doi>
       <pages>306–313</pages>
       <url hash="6b215f2f">P01-1040</url>
@@ -435,7 +435,7 @@
     <paper id="44">
       <title>Parsing with Treebank Grammars: Empirical Bounds, Theoretical Models, and the Structure of the <fixed-case>P</fixed-case>enn <fixed-case>T</fixed-case>reebank</title>
       <author><first>Dan</first><last>Klein</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <doi>10.3115/1073012.1073056</doi>
       <pages>338–345</pages>
       <url hash="a7f79d76">P01-1044</url>
@@ -443,8 +443,8 @@
     </paper>
     <paper id="45">
       <title>From Chunks to Function-Argument Structure: A Similarity-Based Approach</title>
-      <author><first>Sandra</first><last>Kübler</last></author>
-      <author><first>Erhard W.</first><last>Hinrichs</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
+      <author id="erhard-hinrichs"><first>Erhard W.</first><last>Hinrichs</last></author>
       <doi>10.3115/1073012.1073057</doi>
       <pages>346–353</pages>
       <url hash="1a8bf317">P01-1045</url>
@@ -471,8 +471,8 @@
     </paper>
     <paper id="48">
       <title>Predicting User Reactions to System Error</title>
-      <author><first>Diane</first><last>Litman</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
       <author><first>Marc</first><last>Swerts</last></author>
       <doi>10.3115/1073012.1073060</doi>
       <pages>370–377</pages>
@@ -482,7 +482,7 @@
     <paper id="49">
       <title>Building Semantic Perceptron Net for Topic Spotting</title>
       <author><first>Jimin</first><last>Liu</last></author>
-      <author><first>Tat-Seng</first><last>Chua</last></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last></author>
       <doi>10.3115/1073012.1073061</doi>
       <pages>378–385</pages>
       <url hash="ea3e8861">P01-1049</url>
@@ -498,8 +498,8 @@
     </paper>
     <paper id="51">
       <title>Error Profiling: Toward a Model of <fixed-case>E</fixed-case>nglish Acquisition for Deaf Learners</title>
-      <author><first>Lisa N.</first><last>Michaud</last></author>
-      <author><first>Kathleen F.</first><last>McCoy</last></author>
+      <author id="lisa-n-michaud"><first>Lisa N.</first><last>Michaud</last></author>
+      <author id="kathleen-f-mccoy"><first>Kathleen F.</first><last>McCoy</last></author>
       <doi>10.3115/1073012.1073063</doi>
       <pages>394–401</pages>
       <url hash="4e4ecab8">P01-1051</url>
@@ -507,7 +507,7 @@
     </paper>
     <paper id="52">
       <title>Logic Form Transformation of <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et and its Applicability to Question Answering</title>
-      <author><first>Dan</first><last>Moldovan</last></author>
+      <author id="dan-moldovan"><first>Dan</first><last>Moldovan</last></author>
       <author><first>Vasile</first><last>Rus</last></author>
       <doi>10.3115/1073012.1073064</doi>
       <pages>402–409</pages>
@@ -537,7 +537,7 @@
       <author><first>Francis</first><last>Wolinski</last></author>
       <author><first>Georgios</first><last>Paliouras</last></author>
       <author><first>Vangelis</first><last>Karkaletsis</last></author>
-      <author><first>Constantine D.</first><last>Spyropoulos</last></author>
+      <author id="constantine-d-spyropoulos"><first>Constantine D.</first><last>Spyropoulos</last></author>
       <doi>10.3115/1073012.1073067</doi>
       <pages>426–433</pages>
       <url hash="d85bb296">P01-1055</url>
@@ -545,9 +545,9 @@
     </paper>
     <paper id="56">
       <title>Evaluating a Trainable Sentence Planner for a Spoken Dialogue System</title>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <author><first>Monica</first><last>Rogati</last></author>
-      <author><first>Marilyn A.</first><last>Walker</last></author>
+      <author id="marilyn-walker"><first>Marilyn A.</first><last>Walker</last></author>
       <doi>10.3115/1073012.1073068</doi>
       <pages>434–441</pages>
       <url hash="e1ef0626">P01-1056</url>
@@ -577,7 +577,7 @@
       <title>Producing Biographical Summaries: Combining Linguistic Knowledge with Corpus Statistics</title>
       <author><first>Barry</first><last>Schiffman</last></author>
       <author><first>Inderjeet</first><last>Mani</last></author>
-      <author><first>Kristian</first><last>Concepcion</last></author>
+      <author id="kristian-concepcion"><first>Kristian</first><last>Concepcion</last></author>
       <doi>10.3115/1073012.1073071</doi>
       <pages>458–465</pages>
       <url hash="e5875511">P01-1059</url>
@@ -603,8 +603,8 @@
     <paper id="62">
       <title>Incremental Construction of Compact Acyclic <fixed-case>NFA</fixed-case>s</title>
       <author><first>Kyriakos N.</first><last>Sgarbas</last></author>
-      <author><first>Nikos D.</first><last>Fakotakis</last></author>
-      <author><first>George K.</first><last>Kokkinakis</last></author>
+      <author id="nikos-fakotakis"><first>Nikos D.</first><last>Fakotakis</last></author>
+      <author id="george-kokkinakis"><first>George K.</first><last>Kokkinakis</last></author>
       <doi>10.3115/1073012.1073074</doi>
       <pages>482–489</pages>
       <url hash="0abe5566">P01-1062</url>
@@ -612,7 +612,7 @@
     </paper>
     <paper id="63">
       <title>A <fixed-case>B</fixed-case>ayesian Model For Morpheme and Paradigm Identification</title>
-      <author><first>Matthew G.</first><last>Snover</last></author>
+      <author id="matthew-snover"><first>Matthew G.</first><last>Snover</last></author>
       <author><first>Michael R.</first><last>Brent</last></author>
       <doi>10.3115/1073012.1073075</doi>
       <pages>490–498</pages>
@@ -630,7 +630,7 @@
     </paper>
     <paper id="65">
       <title>A Generic Approach to Parallel Chart Parsing with an Application to <fixed-case>L</fixed-case>in<fixed-case>GO</fixed-case></title>
-      <author><first>Marcel P.</first><last>Van Lohuizen</last></author>
+      <author id="marcel-p-van-lohuizen"><first>Marcel P.</first><last>Van Lohuizen</last></author>
       <doi>10.3115/1073012.1073077</doi>
       <pages>507–514</pages>
       <url hash="21c63450">P01-1065</url>
@@ -638,9 +638,9 @@
     </paper>
     <paper id="66">
       <title>Quantitative and Qualitative Evaluation of Darpa Communicator Spoken Dialogue Systems</title>
-      <author><first>Marilyn A.</first><last>Walker</last></author>
-      <author><first>Rebecca</first><last>Passonneau</last></author>
-      <author><first>Julie E.</first><last>Boland</last></author>
+      <author id="marilyn-walker"><first>Marilyn A.</first><last>Walker</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca</first><last>Passonneau</last></author>
+      <author id="julie-e-boland"><first>Julie E.</first><last>Boland</last></author>
       <doi>10.3115/1073012.1073078</doi>
       <pages>515–522</pages>
       <url hash="8e8beab0">P01-1066</url>
@@ -657,7 +657,7 @@
     </paper>
     <paper id="68">
       <title>Multi-Class Composite N-gram Language Model for Spoken Language Processing Using Multiple Word Clusters</title>
-      <author><first>Hirofumi</first><last>Yamamoto</last></author>
+      <author id="hirofumi-yamamoto"><first>Hirofumi</first><last>Yamamoto</last></author>
       <author><first>Shuntaro</first><last>Isogai</last></author>
       <author><first>Yoshinori</first><last>Sagisaka</last></author>
       <doi>10.3115/1073012.1073080</doi>
@@ -668,8 +668,8 @@
     <paper id="69">
       <title>Text Chunking using Regularized Winnow</title>
       <author><first>Tong</first><last>Zhang</last></author>
-      <author><first>Fred</first><last>Damerau</last></author>
-      <author><first>David</first><last>Johnson</last></author>
+      <author id="fred-damerau"><first>Fred</first><last>Damerau</last></author>
+      <author id="david-e-johnson"><first>David</first><last>Johnson</last></author>
       <doi>10.3115/1073012.1073081</doi>
       <pages>539–546</pages>
       <url hash="63ed1b0a">P01-1069</url>
diff --git a/data/xml/P02.xml b/data/xml/P02.xml
index fb159db204..449595e8de 100644
--- a/data/xml/P02.xml
+++ b/data/xml/P02.xml
@@ -18,7 +18,7 @@
     </frontmatter>
     <paper id="1">
       <title>Parameter Estimation for Probabilistic Finite-State Transducers</title>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <doi>10.3115/1073083.1073085</doi>
       <pages>1–8</pages>
       <url hash="6a860124">P02-1001</url>
@@ -26,7 +26,7 @@
     </paper>
     <paper id="2">
       <title>Sequential Conditional Generalized Iterative Scaling</title>
-      <author><first>Joshua</first><last>Goodman</last></author>
+      <author id="joshua-goodman"><first>Joshua</first><last>Goodman</last></author>
       <doi>10.3115/1073083.1073086</doi>
       <pages>9–16</pages>
       <url hash="aec7705d">P02-1002</url>
@@ -44,9 +44,9 @@
     <paper id="4">
       <title>Machine-learned contexts for linguistic operations in <fixed-case>G</fixed-case>erman sentence realization</title>
       <author><first>Michael</first><last>Gamon</last></author>
-      <author><first>Eric</first><last>Ringger</last></author>
-      <author><first>Simon</first><last>Corston-Oliver</last></author>
-      <author><first>Robert</first><last>Moore</last></author>
+      <author id="eric-ringger"><first>Eric</first><last>Ringger</last></author>
+      <author id="simon-corston-oliver"><first>Simon</first><last>Corston-Oliver</last></author>
+      <author id="robert-c-moore"><first>Robert</first><last>Moore</last></author>
       <doi>10.3115/1073083.1073089</doi>
       <pages>25–32</pages>
       <url hash="d342a33f">P02-1004</url>
@@ -54,9 +54,9 @@
     </paper>
     <paper id="5">
       <title>Performance Issues and Error Analysis in an Open-Domain Question Answering System</title>
-      <author><first>Dan</first><last>Moldovan</last></author>
-      <author><first>Marius</first><last>Pasca</last></author>
-      <author><first>Sanda</first><last>Harabagiu</last></author>
+      <author id="dan-moldovan"><first>Dan</first><last>Moldovan</last></author>
+      <author id="marius-pasca"><first>Marius</first><last>Pasca</last></author>
+      <author id="sanda-harabagiu"><first>Sanda</first><last>Harabagiu</last></author>
       <author><first>Mihai</first><last>Surdeanu</last></author>
       <doi>10.3115/1073083.1073091</doi>
       <pages>33–40</pages>
@@ -66,7 +66,7 @@
     <paper id="6">
       <title>Learning surface text patterns for a Question Answering System</title>
       <author><first>Deepak</first><last>Ravichandran</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <doi>10.3115/1073083.1073092</doi>
       <pages>41–47</pages>
       <url hash="34c7710d">P02-1006</url>
@@ -82,7 +82,7 @@
     </paper>
     <paper id="8">
       <title>Phonological Comprehension and the Compilation of <fixed-case>O</fixed-case>ptimality <fixed-case>T</fixed-case>heory</title>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <doi>10.3115/1073083.1073095</doi>
       <pages>56–63</pages>
       <url hash="bbb86b98">P02-1008</url>
@@ -106,7 +106,7 @@
     </paper>
     <paper id="11">
       <title>Resolving Pronominal Reference to Abstract Entities</title>
-      <author><first>Donna K.</first><last>Byron</last></author>
+      <author id="donna-byron"><first>Donna K.</first><last>Byron</last></author>
       <doi>10.3115/1073083.1073099</doi>
       <pages>80–87</pages>
       <url hash="79546fc1">P02-1011</url>
@@ -114,8 +114,8 @@
     </paper>
     <paper id="12">
       <title>Pronominalization in Generated Discourse and Dialogue</title>
-      <author><first>Charles B.</first><last>Callaway</last></author>
-      <author><first>James C.</first><last>Lester</last></author>
+      <author id="charles-b-callaway"><first>Charles B.</first><last>Callaway</last></author>
+      <author id="james-lester"><first>James C.</first><last>Lester</last></author>
       <doi>10.3115/1073083.1073100</doi>
       <pages>88–95</pages>
       <url hash="7d427106">P02-1012</url>
@@ -132,7 +132,7 @@
     <paper id="14">
       <title>Improving Machine Learning Approaches to Coreference Resolution</title>
       <author><first>Vincent</first><last>Ng</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <doi>10.3115/1073083.1073102</doi>
       <pages>104–111</pages>
       <url hash="bfc1c5c1">P02-1014</url>
@@ -150,8 +150,8 @@
     <paper id="16">
       <title>Active Learning for Statistical Natural Language Parsing</title>
       <author><first>Min</first><last>Tang</last></author>
-      <author><first>Xiaoqiang</first><last>Luo</last></author>
-      <author><first>Salim</first><last>Roukos</last></author>
+      <author id="xiaoqiang-luo"><first>Xiaoqiang</first><last>Luo</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
       <doi>10.3115/1073083.1073105</doi>
       <pages>120–127</pages>
       <url hash="ad68c734">P02-1016</url>
@@ -160,7 +160,7 @@
     <paper id="17">
       <title>A Generative Constituent-Context Model for Improved Grammar Induction</title>
       <author><first>Dan</first><last>Klein</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <doi>10.3115/1073083.1073106</doi>
       <pages>128–135</pages>
       <url hash="90b9612f">P02-1017</url>
@@ -177,7 +177,7 @@
     <paper id="19">
       <title>Pronunciation Modeling for Improved Spelling Correction</title>
       <author><first>Kristina</first><last>Toutanova</last></author>
-      <author><first>Robert</first><last>Moore</last></author>
+      <author id="robert-c-moore"><first>Robert</first><last>Moore</last></author>
       <doi>10.3115/1073083.1073109</doi>
       <pages>144–151</pages>
       <url hash="234108bb">P02-1019</url>
@@ -185,10 +185,10 @@
     </paper>
     <paper id="20">
       <title>Measuring Text Reuse</title>
-      <author><first>Paul</first><last>Clough</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
-      <author><first>Scott S.L.</first><last>Piao</last></author>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="paul-clough"><first>Paul</first><last>Clough</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="scott-s-l-piao"><first>Scott S.L.</first><last>Piao</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <doi>10.3115/1073083.1073110</doi>
       <pages>152–159</pages>
       <url hash="22d76d17">P02-1020</url>
@@ -196,7 +196,7 @@
     </paper>
     <paper id="21">
       <title>Semi-Supervised Maximum Entropy Based Approach to Acronym and Abbreviation Normalization in Medical Texts</title>
-      <author><first>Serguei</first><last>Pakhomov</last></author>
+      <author id="serguei-pakhomov"><first>Serguei</first><last>Pakhomov</last></author>
       <doi>10.3115/1073083.1073111</doi>
       <pages>160–167</pages>
       <url hash="9c87e666">P02-1021</url>
@@ -204,9 +204,9 @@
     </paper>
     <paper id="22">
       <title><fixed-case>GATE</fixed-case>: an Architecture for Development of Robust <fixed-case>HLT</fixed-case> applications</title>
-      <author><first>Hamish</first><last>Cunningham</last></author>
+      <author id="hamish-cunningham"><first>Hamish</first><last>Cunningham</last></author>
       <author><first>Diana</first><last>Maynard</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
       <author><first>Valentin</first><last>Tablan</last></author>
       <doi>10.3115/1073083.1073112</doi>
       <pages>168–175</pages>
@@ -225,7 +225,7 @@
     <paper id="24">
       <title>Exploring Asymmetric Clustering for Statistical Language Modeling</title>
       <author><first>Jianfeng</first><last>Gao</last></author>
-      <author><first>Joshua</first><last>Goodman</last></author>
+      <author id="joshua-goodman"><first>Joshua</first><last>Goodman</last></author>
       <author><first>Guihong</first><last>Cao</last></author>
       <author><first>Hang</first><last>Li</last></author>
       <doi>10.3115/1073083.1073115</doi>
@@ -237,7 +237,7 @@
       <title>A Study on Richer Syntactic Dependencies for Structured Language Modeling</title>
       <author><first>Peng</first><last>Xu</last></author>
       <author><first>Ciprian</first><last>Chelba</last></author>
-      <author><first>Frederick</first><last>Jelinek</last></author>
+      <author id="frederick-jelinek"><first>Frederick</first><last>Jelinek</last></author>
       <doi>10.3115/1073083.1073116</doi>
       <pages>191–198</pages>
       <url hash="a34513c0">P02-1025</url>
@@ -268,7 +268,7 @@
       <author><first>Nobuhiro</first><last>Kaji</last></author>
       <author><first>Daisuke</first><last>Kawahara</last></author>
       <author><first>Sadao</first><last>Kurohashi</last></author>
-      <author><first>Satoshi</first><last>Sato</last></author>
+      <author id="satoshi-sato"><first>Satoshi</first><last>Sato</last></author>
       <doi>10.3115/1073083.1073120</doi>
       <pages>215–222</pages>
       <url hash="12d49805">P02-1028</url>
@@ -276,7 +276,7 @@
     </paper>
     <paper id="29">
       <title>Inducing <fixed-case>G</fixed-case>erman Semantic Verb Classes from Purely Syntactic Subcategorisation Information</title>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <author><first>Chris</first><last>Brew</last></author>
       <doi>10.3115/1073083.1073121</doi>
       <pages>223–230</pages>
@@ -285,7 +285,7 @@
     </paper>
     <paper id="30">
       <title>Scaling Context Space</title>
-      <author><first>James</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James</first><last>Curran</last></author>
       <author><first>Marc</first><last>Moens</last></author>
       <doi>10.3115/1073083.1073123</doi>
       <pages>231–238</pages>
@@ -295,7 +295,7 @@
     <paper id="31">
       <title>The Necessity of Parsing for Predicate Argument Recognition</title>
       <author><first>Daniel</first><last>Gildea</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <doi>10.3115/1073083.1073124</doi>
       <pages>239–246</pages>
       <url hash="31a58af0">P02-1031</url>
@@ -304,8 +304,8 @@
     <paper id="32">
       <title>The Descent of Hierarchy, and Selection in Relational Semantics</title>
       <author><first>Barbara</first><last>Rosario</last></author>
-      <author><first>Marti</first><last>Hearst</last></author>
-      <author><first>Charles</first><last>Fillmore</last></author>
+      <author id="marti-a-hearst"><first>Marti</first><last>Hearst</last></author>
+      <author id="charles-j-fillmore"><first>Charles</first><last>Fillmore</last></author>
       <doi>10.3115/1073083.1073125</doi>
       <pages>247–254</pages>
       <url hash="8b047c92">P02-1032</url>
@@ -313,7 +313,7 @@
     </paper>
     <paper id="33">
       <title>An Unsupervised Method for Word Sense Tagging using Parallel Corpora</title>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <author><first>Philip</first><last>Resnik</last></author>
       <doi>10.3115/1073083.1073126</doi>
       <pages>255–262</pages>
@@ -322,7 +322,7 @@
     </paper>
     <paper id="34">
       <title>New Ranking Algorithms for Parsing and Tagging: Kernels over Discrete Structures, and the Voted Perceptron</title>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <author><first>Nigel</first><last>Duffy</last></author>
       <doi>10.3115/1073083.1073128</doi>
       <pages>263–270</pages>
@@ -332,10 +332,10 @@
     <paper id="35">
       <title>Parsing the <fixed-case>W</fixed-case>all <fixed-case>S</fixed-case>treet <fixed-case>J</fixed-case>ournal using a <fixed-case>L</fixed-case>exical-<fixed-case>F</fixed-case>unctional <fixed-case>G</fixed-case>rammar and Discriminative Estimation Techniques</title>
       <author><first>Stefan</first><last>Riezler</last></author>
-      <author><first>Tracy H.</first><last>King</last></author>
-      <author><first>Ronald M.</first><last>Kaplan</last></author>
+      <author id="tracy-holloway-king"><first>Tracy H.</first><last>King</last></author>
+      <author id="ronald-m-kaplan"><first>Ronald M.</first><last>Kaplan</last></author>
       <author><first>Richard</first><last>Crouch</last></author>
-      <author><first>John T.</first><last>Maxwell III</last></author>
+      <author id="john-t-maxwell-iii"><first>John T.</first><last>Maxwell III</last></author>
       <author><first>Mark</first><last>Johnson</last></author>
       <doi>10.3115/1073083.1073129</doi>
       <pages>271–278</pages>
@@ -361,8 +361,8 @@
     </paper>
     <paper id="38">
       <title>Discriminative Training and Maximum Entropy Models for Statistical Machine Translation</title>
-      <author><first>Franz Josef</first><last>Och</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="franz-josef-och"><first>Franz Josef</first><last>Och</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <doi>10.3115/1073083.1073133</doi>
       <pages>295–302</pages>
       <award>Best Paper</award>
@@ -381,7 +381,7 @@
     <paper id="40">
       <title><fixed-case>B</fixed-case>leu: a Method for Automatic Evaluation of Machine Translation</title>
       <author><first>Kishore</first><last>Papineni</last></author>
-      <author><first>Salim</first><last>Roukos</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
       <author><first>Todd</first><last>Ward</last></author>
       <author><first>Wei-Jing</first><last>Zhu</last></author>
       <doi>10.3115/1073083.1073135</doi>
@@ -393,7 +393,7 @@
     <paper id="41">
       <title>Coupling <fixed-case>CCG</fixed-case> and Hybrid Logic Dependency Semantics</title>
       <author><first>Jason</first><last>Baldridge</last></author>
-      <author><first>Geert-Jan</first><last>Kruijff</last></author>
+      <author id="geert-jan-m-kruijff"><first>Geert-Jan</first><last>Kruijff</last></author>
       <doi>10.3115/1073083.1073137</doi>
       <pages>319–326</pages>
       <url hash="7e89d19c">P02-1041</url>
@@ -403,7 +403,7 @@
       <title>Building Deep Dependency Structures using a Wide-Coverage <fixed-case>CCG</fixed-case> Parser</title>
       <author><first>Stephen</first><last>Clark</last></author>
       <author><first>Julia</first><last>Hockenmaier</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <doi>10.3115/1073083.1073138</doi>
       <pages>327–334</pages>
       <url hash="77851f67">P02-1042</url>
@@ -412,7 +412,7 @@
     <paper id="43">
       <title>Generative Models for Statistical Parsing with <fixed-case>C</fixed-case>ombinatory <fixed-case>C</fixed-case>ategorial <fixed-case>G</fixed-case>rammar</title>
       <author><first>Julia</first><last>Hockenmaier</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <doi>10.3115/1073083.1073139</doi>
       <pages>335–342</pages>
       <url hash="66c96a5c">P02-1043</url>
@@ -429,7 +429,7 @@
     </paper>
     <paper id="45">
       <title>Applying Co-Training to Reference Resolution</title>
-      <author><first>Christoph</first><last>Mueller</last></author>
+      <author id="christoph-muller"><first>Christoph</first><last>Mueller</last></author>
       <author><first>Stefan</first><last>Rapp</last></author>
       <author><first>Michael</first><last>Strube</last></author>
       <doi>10.3115/1073083.1073142</doi>
@@ -439,7 +439,7 @@
     </paper>
     <paper id="46">
       <title>Bootstrapping</title>
-      <author><first>Steven</first><last>Abney</last></author>
+      <author id="steven-abney"><first>Steven</first><last>Abney</last></author>
       <doi>10.3115/1073083.1073143</doi>
       <pages>360–367</pages>
       <url hash="15662024">P02-1046</url>
@@ -456,14 +456,14 @@
     </paper>
     <paper id="48">
       <title><fixed-case>MATCH</fixed-case>: An Architecture for Multimodal Dialogue Systems</title>
-      <author><first>Michael</first><last>Johnston</last></author>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
-      <author><first>Gunaranjan</first><last>Vasireddy</last></author>
-      <author><first>Amanda</first><last>Stent</last></author>
+      <author id="michael-johnston"><first>Michael</first><last>Johnston</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="gunaranjan-vasireddy"><first>Gunaranjan</first><last>Vasireddy</last></author>
+      <author id="amanda-stent"><first>Amanda</first><last>Stent</last></author>
       <author><first>Patrick</first><last>Ehlen</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
-      <author><first>Steve</first><last>Whittaker</last></author>
-      <author><first>Preetam</first><last>Maloor</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
+      <author id="steve-whittaker"><first>Steve</first><last>Whittaker</last></author>
+      <author id="preetam-maloor"><first>Preetam</first><last>Maloor</last></author>
       <doi>10.3115/1073083.1073146</doi>
       <pages>376–383</pages>
       <url hash="12a46e79">P02-1048</url>
@@ -471,9 +471,9 @@
     </paper>
     <paper id="49">
       <title>What’s the Trouble: Automatically Identifying Problematic Dialogues in <fixed-case>DARPA</fixed-case> Communicator Dialogue Systems</title>
-      <author><first>Helen Wright</first><last>Hastie</last></author>
+      <author id="helen-hastie"><first>Helen Wright</first><last>Hastie</last></author>
       <author><first>Rashmi</first><last>Prasad</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <doi>10.3115/1073083.1073147</doi>
       <pages>384–391</pages>
       <url hash="eb59a13f">P02-1049</url>
@@ -483,7 +483,7 @@
       <title>Evaluating Translational Correspondence using Annotation Projection</title>
       <author><first>Rebecca</first><last>Hwa</last></author>
       <author><first>Philip</first><last>Resnik</last></author>
-      <author><first>Amy</first><last>Weinberg</last></author>
+      <author id="amy-weinberg"><first>Amy</first><last>Weinberg</last></author>
       <author><first>Okan</first><last>Kolak</last></author>
       <doi>10.3115/1073083.1073149</doi>
       <pages>392–399</pages>
@@ -492,7 +492,7 @@
     </paper>
     <paper id="51">
       <title>Translating Named Entities Using Monolingual and Bilingual Resources</title>
-      <author><first>Yaser</first><last>Al-Onaizan</last></author>
+      <author id="yaser-al-onaizan"><first>Yaser</first><last>Al-Onaizan</last></author>
       <author><first>Kevin</first><last>Knight</last></author>
       <doi>10.3115/1073083.1073150</doi>
       <pages>400–408</pages>
@@ -502,7 +502,7 @@
     <paper id="52">
       <title>Using Similarity Scoring to Improve the Bilingual Dictionary for Sub-sentential Alignment</title>
       <author><first>Katharina</first><last>Probst</last></author>
-      <author><first>Ralf</first><last>Brown</last></author>
+      <author id="ralf-d-brown"><first>Ralf</first><last>Brown</last></author>
       <doi>10.3115/1073083.1073151</doi>
       <pages>409–416</pages>
       <url hash="0aa0d9d6">P02-1052</url>
@@ -510,7 +510,7 @@
     </paper>
     <paper id="53">
       <title>Thumbs Up or Thumbs Down? Semantic Orientation Applied to Unsupervised Classification of Reviews</title>
-      <author><first>Peter</first><last>Turney</last></author>
+      <author id="peter-turney"><first>Peter</first><last>Turney</last></author>
       <doi>10.3115/1073083.1073153</doi>
       <pages>417–424</pages>
       <url hash="a36fb9fb">P02-1053</url>
@@ -518,10 +518,10 @@
     </paper>
     <paper id="54">
       <title>Is It the Right Answer? Exploiting Web Redundancy for Answer Validation</title>
-      <author><first>Bernardo</first><last>Magnini</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Roberto</first><last>Prevete</last></author>
-      <author><first>Hristo</first><last>Tanev</last></author>
+      <author id="hristo-tanev"><first>Hristo</first><last>Tanev</last></author>
       <doi>10.3115/1073083.1073154</doi>
       <pages>425–432</pages>
       <url hash="5837b56b">P02-1054</url>
@@ -529,7 +529,7 @@
     </paper>
     <paper id="55">
       <title>Shallow Parsing on the Basis of Words Only: A Case Study</title>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <author><first>Sabine</first><last>Buchholz</last></author>
       <doi>10.3115/1073083.1073156</doi>
       <pages>433–440</pages>
@@ -540,16 +540,16 @@
       <title>An Integrated Archictecture for Shallow and Deep Processing</title>
       <author><first>Berthold</first><last>Crysmann</last></author>
       <author><first>Anette</first><last>Frank</last></author>
-      <author><first>Bernd</first><last>Kiefer</last></author>
-      <author><first>Stefan</first><last>Mueller</last></author>
-      <author><first>Guenter</first><last>Neumann</last></author>
+      <author id="bernd-kiefer"><first>Bernd</first><last>Kiefer</last></author>
+      <author id="stefan-muller"><first>Stefan</first><last>Mueller</last></author>
+      <author id="gunter-neumann"><first>Guenter</first><last>Neumann</last></author>
       <author><first>Jakub</first><last>Piskorski</last></author>
-      <author><first>Ulrich</first><last>Schaefer</last></author>
+      <author id="ulrich-schafer"><first>Ulrich</first><last>Schaefer</last></author>
       <author><first>Melanie</first><last>Siegel</last></author>
       <author><first>Hans</first><last>Uszkoreit</last></author>
       <author><first>Feiyu</first><last>Xu</last></author>
       <author><first>Markus</first><last>Becker</last></author>
-      <author><first>Hans-Ulrich</first><last>Krieger</last></author>
+      <author id="hans-ulrich-krieger"><first>Hans-Ulrich</first><last>Krieger</last></author>
       <doi>10.3115/1073083.1073157</doi>
       <pages>441–448</pages>
       <url hash="52196d24">P02-1056</url>
@@ -557,7 +557,7 @@
     </paper>
     <paper id="57">
       <title>A Noisy-Channel Model for Document Compression</title>
-      <author><first>Hal</first><last>Daume III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daume III</last></author>
       <author><first>Daniel</first><last>Marcu</last></author>
       <doi>10.3115/1073083.1073159</doi>
       <pages>449–456</pages>
@@ -566,8 +566,8 @@
     </paper>
     <paper id="58">
       <title>From Single to Multi-document Summarization</title>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <doi>10.3115/1073083.1073160</doi>
       <pages>457–464</pages>
       <url hash="951ded5a">P02-1058</url>
@@ -576,7 +576,7 @@
     <paper id="59">
       <title>Supervised Ranking in Open-Domain Text Summarization</title>
       <author><first>Tadashi</first><last>Nomoto</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <doi>10.3115/1073083.1073161</doi>
       <pages>465–472</pages>
       <url hash="343ef10e">P02-1059</url>
@@ -584,7 +584,7 @@
     </paper>
     <paper id="60">
       <title>Named Entity Recognition using an <fixed-case>HMM</fixed-case>-based Chunk Tagger</title>
-      <author><first>GuoDong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>GuoDong</first><last>Zhou</last></author>
       <author><first>Jian</first><last>Su</last></author>
       <doi>10.3115/1073083.1073163</doi>
       <pages>473–480</pages>
@@ -602,7 +602,7 @@
     </paper>
     <paper id="62">
       <title>Ranking Algorithms for Named Entity Extraction: Boosting and the <fixed-case>V</fixed-case>oted<fixed-case>P</fixed-case>erceptron</title>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <doi>10.3115/1073083.1073165</doi>
       <pages>489–496</pages>
       <url hash="f24735b9">P02-1062</url>
@@ -611,8 +611,8 @@
     <paper id="63">
       <title>Revision Learning and its Application to Part-of-Speech Tagging</title>
       <author><first>Tetsuji</first><last>Nakagawa</last></author>
-      <author><first>Taku</first><last>Kudo</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="taku-kudo"><first>Taku</first><last>Kudo</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <doi>10.3115/1073083.1073167</doi>
       <pages>497–504</pages>
       <url hash="161a0f3a">P02-1063</url>
diff --git a/data/xml/P03.xml b/data/xml/P03.xml
index 242b1d5230..76c39e40ce 100644
--- a/data/xml/P03.xml
+++ b/data/xml/P03.xml
@@ -16,7 +16,7 @@
     <paper id="1">
       <title>Offline Strategies for Online Question Answering: Answering Questions Before They Are Asked</title>
       <author><first>Michael</first><last>Fleischman</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <author><first>Abdessamad</first><last>Echihabi</last></author>
       <doi>10.3115/1075096.1075097</doi>
       <pages>1–7</pages>
@@ -26,7 +26,7 @@
     <paper id="2">
       <title>Using Predicate-Argument Structures for Information Extraction</title>
       <author><first>Mihai</first><last>Surdeanu</last></author>
-      <author><first>Sanda</first><last>Harabagiu</last></author>
+      <author id="sanda-harabagiu"><first>Sanda</first><last>Harabagiu</last></author>
       <author><first>John</first><last>Williams</last></author>
       <author><first>Paul</first><last>Aarseth</last></author>
       <doi>10.3115/1075096.1075098</doi>
@@ -45,8 +45,8 @@
     </paper>
     <paper id="4">
       <title>Fast Methods for Kernel-Based Text Analysis</title>
-      <author><first>Taku</first><last>Kudo</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="taku-kudo"><first>Taku</first><last>Kudo</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <doi>10.3115/1075096.1075100</doi>
       <pages>24–31</pages>
       <url hash="761e1241">P03-1004</url>
@@ -141,8 +141,8 @@
       <author><first>Anette</first><last>Frank</last></author>
       <author><first>Markus</first><last>Becker</last></author>
       <author><first>Berthold</first><last>Crysmann</last></author>
-      <author><first>Bernd</first><last>Kiefer</last></author>
-      <author><first>Ulrich</first><last>Schäfer</last></author>
+      <author id="bernd-kiefer"><first>Bernd</first><last>Kiefer</last></author>
+      <author id="ulrich-schafer"><first>Ulrich</first><last>Schäfer</last></author>
       <doi>10.3115/1075096.1075110</doi>
       <pages>104–111</pages>
       <url hash="a90a30d4">P03-1014</url>
@@ -167,7 +167,7 @@
     </paper>
     <paper id="17">
       <title>Constructing Semantic Space Models from Parsed Corpora</title>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <author><first>Mirella</first><last>Lapata</last></author>
       <doi>10.3115/1075096.1075113</doi>
       <pages>128–135</pages>
@@ -185,7 +185,7 @@
     <paper id="19">
       <title>A Comparative Study on Reordering Constraints in Statistical Machine Translation</title>
       <author><first>Richard</first><last>Zens</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <doi>10.3115/1075096.1075115</doi>
       <pages>144–151</pages>
       <url hash="27e9e108">P03-1019</url>
@@ -193,10 +193,10 @@
     </paper>
     <paper id="20">
       <title>t<fixed-case>R</fixed-case>u<fixed-case>E</fixed-case>cas<fixed-case>I</fixed-case>ng</title>
-      <author><first>Lucian Vlad</first><last>Lita</last></author>
-      <author><first>Abe</first><last>Ittycheriah</last></author>
-      <author><first>Salim</first><last>Roukos</last></author>
-      <author><first>Nanda</first><last>Kambhatla</last></author>
+      <author id="lucian-vlad-lita"><first>Lucian Vlad</first><last>Lita</last></author>
+      <author id="abe-ittycheriah"><first>Abe</first><last>Ittycheriah</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
+      <author id="nanda-kambhatla"><first>Nanda</first><last>Kambhatla</last></author>
       <doi>10.3115/1075096.1075116</doi>
       <pages>152–159</pages>
       <url hash="6c2f5390">P03-1020</url>
@@ -204,7 +204,7 @@
     </paper>
     <paper id="21">
       <title>Minimum Error Rate Training in Statistical Machine Translation</title>
-      <author><first>Franz Josef</first><last>Och</last></author>
+      <author id="franz-josef-och"><first>Franz Josef</first><last>Och</last></author>
       <doi>10.3115/1075096.1075117</doi>
       <pages>160–167</pages>
       <url hash="c37da806">P03-1021</url>
@@ -213,7 +213,7 @@
     <paper id="22">
       <title>A Machine Learning Approach to Pronoun Resolution in Spoken Dialogue</title>
       <author><first>Michael</first><last>Strube</last></author>
-      <author><first>Christoph</first><last>Müller</last></author>
+      <author id="christoph-muller"><first>Christoph</first><last>Müller</last></author>
       <doi>10.3115/1075096.1075118</doi>
       <pages>168–175</pages>
       <url hash="c4dcb2ee">P03-1022</url>
@@ -222,9 +222,9 @@
     <paper id="23">
       <title>Coreference Resolution Using Competition Learning Approach</title>
       <author><first>Xiaofeng</first><last>Yang</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <author><first>Jian</first><last>Su</last></author>
-      <author><first>Chew Lim</first><last>Tan</last></author>
+      <author id="chew-lim-tan"><first>Chew Lim</first><last>Tan</last></author>
       <doi>10.3115/1075096.1075119</doi>
       <pages>176–183</pages>
       <url hash="839409cb">P03-1023</url>
@@ -258,7 +258,7 @@
     </paper>
     <paper id="27">
       <title>Recognizing Expressions of Commonsense Psychology in <fixed-case>E</fixed-case>nglish Text</title>
-      <author><first>Andrew</first><last>Gordon</last></author>
+      <author id="andrew-gordon"><first>Andrew</first><last>Gordon</last></author>
       <author><first>Abe</first><last>Kazemzadeh</last></author>
       <author><first>Anish</first><last>Nair</last></author>
       <author><first>Milena</first><last>Petrova</last></author>
@@ -271,7 +271,7 @@
       <title>Closing the Gap: Learning-Based Information Extraction Rivaling Knowledge-Engineering Methods</title>
       <author><first>Hai Leong</first><last>Chieu</last></author>
       <author><first>Hwee Tou</first><last>Ng</last></author>
-      <author><first>Yoong Keok</first><last>Lee</last></author>
+      <author id="yoong-keok-lee"><first>Yoong Keok</first><last>Lee</last></author>
       <doi>10.3115/1075096.1075124</doi>
       <pages>216–223</pages>
       <url hash="32133aee">P03-1028</url>
@@ -281,7 +281,7 @@
       <title>An Improved Extraction Pattern Representation Model for Automatic <fixed-case>IE</fixed-case> Pattern Acquisition</title>
       <author><first>Kiyoshi</first><last>Sudo</last></author>
       <author><first>Satoshi</first><last>Sekine</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <doi>10.3115/1075096.1075125</doi>
       <pages>224–231</pages>
       <url hash="fdfb153b">P03-1029</url>
@@ -290,7 +290,7 @@
     <paper id="30">
       <title>Optimizing Story Link Detection is not Equivalent to Optimizing New Event Detection</title>
       <author><first>Ayman</first><last>Farahat</last></author>
-      <author><first>Francine</first><last>Chen</last></author>
+      <author id="francine-chen"><first>Francine</first><last>Chen</last></author>
       <author><first>Thorsten</first><last>Brants</last></author>
       <doi>10.3115/1075096.1075126</doi>
       <pages>232–239</pages>
@@ -310,8 +310,8 @@
     <paper id="32">
       <title>Extracting Key Semantic Terms from <fixed-case>C</fixed-case>hinese Speech Query for Web Searches</title>
       <author><first>Gang</first><last>Wang</last></author>
-      <author><first>Tat-Seng</first><last>Chua</last></author>
-      <author><first>Yong-Cheng</first><last>Wang</last></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last></author>
+      <author id="yongcheng-wang"><first>Yong-Cheng</first><last>Wang</last></author>
       <doi>10.3115/1075096.1075128</doi>
       <pages>248–255</pages>
       <url hash="5367e214">P03-1032</url>
@@ -322,7 +322,7 @@
       <author><first>Kazunori</first><last>Komatani</last></author>
       <author><first>Shinichi</first><last>Ueno</last></author>
       <author><first>Tatsuya</first><last>Kawahara</last></author>
-      <author><first>Hiroshi G.</first><last>Okuno</last></author>
+      <author id="hiroshi-g-okuno"><first>Hiroshi G.</first><last>Okuno</last></author>
       <doi>10.3115/1075096.1075129</doi>
       <pages>256–263</pages>
       <url hash="2be2c826">P03-1033</url>
@@ -330,7 +330,7 @@
     </paper>
     <paper id="34">
       <title>Integrating Discourse Markers into a Pipelined Natural Language Generation Architecture</title>
-      <author><first>Charles B.</first><last>Callaway</last></author>
+      <author id="charles-b-callaway"><first>Charles B.</first><last>Callaway</last></author>
       <doi>10.3115/1075096.1075130</doi>
       <pages>264–271</pages>
       <url hash="842a18bf">P03-1034</url>
@@ -340,7 +340,7 @@
       <title>Improved Source-Channel Models for <fixed-case>C</fixed-case>hinese Word Segmentation</title>
       <author><first>Jianfeng</first><last>Gao</last></author>
       <author><first>Mu</first><last>Li</last></author>
-      <author><first>Chang-Ning</first><last>Huang</last></author>
+      <author id="changning-huang"><first>Chang-Ning</first><last>Huang</last></author>
       <doi>10.3115/1075096.1075131</doi>
       <pages>272–279</pages>
       <url hash="48f01688">P03-1035</url>
@@ -365,8 +365,8 @@
     <paper id="38">
       <title>Self-Organizing <fixed-case>M</fixed-case>arkov Models and Their Application to Part-of-Speech Tagging</title>
       <author><first>Jin-Dong</first><last>Kim</last></author>
-      <author><first>Hae-Chang</first><last>Rim</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="hae-chang-rim"><first>Hae-Chang</first><last>Rim</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <doi>10.3115/1075096.1075134</doi>
       <pages>296–302</pages>
       <url hash="85bad37a">P03-1038</url>
@@ -375,8 +375,8 @@
     <paper id="39">
       <title>Chunk-Based Statistical Translation</title>
       <author><first>Taro</first><last>Watanabe</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
-      <author><first>Hiroshi G.</first><last>Okuno</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="hiroshi-g-okuno"><first>Hiroshi G.</first><last>Okuno</last></author>
       <doi>10.3115/1075096.1075135</doi>
       <pages>303–310</pages>
       <url hash="68b4196a">P03-1039</url>
@@ -394,8 +394,8 @@
     <paper id="41">
       <title>Effective Phrase Translation Extraction from Alignment Models</title>
       <author><first>Ashish</first><last>Venugopal</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <doi>10.3115/1075096.1075137</doi>
       <pages>319–326</pages>
       <url hash="bbe0c78e">P03-1041</url>
@@ -416,7 +416,7 @@
       <author><first>Cheng</first><last>Niu</last></author>
       <author><first>Wei</first><last>Li</last></author>
       <author><first>Jihong</first><last>Ding</last></author>
-      <author><first>Rohini</first><last>Srihari</last></author>
+      <author id="rohini-k-srihari"><first>Rohini</first><last>Srihari</last></author>
       <doi>10.3115/1075096.1075139</doi>
       <pages>335–342</pages>
       <url hash="4c3622d0">P03-1043</url>
@@ -458,7 +458,7 @@
     </paper>
     <paper id="48">
       <title>Evaluation Challenges in Large-Scale Document Summarization</title>
-      <author><first>Dragomir R.</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir R.</first><last>Radev</last></author>
       <author><first>Simone</first><last>Teufel</last></author>
       <author><first>Horacio</first><last>Saggion</last></author>
       <author><first>Wai</first><last>Lam</last></author>
@@ -466,7 +466,7 @@
       <author><first>Hong</first><last>Qi</last></author>
       <author><first>Arda</first><last>Çelebi</last></author>
       <author><first>Danyu</first><last>Liu</last></author>
-      <author><first>Elliott</first><last>Drabek</last></author>
+      <author id="elliott-franco-drabek"><first>Elliott</first><last>Drabek</last></author>
       <doi>10.3115/1075096.1075144</doi>
       <pages>375–382</pages>
       <url hash="a7f2509f">P03-1048</url>
@@ -474,7 +474,7 @@
     </paper>
     <paper id="49">
       <title>Analysis of Source Identified Text Corpora: Exploring the Statistics of the Reused Text and Authorship</title>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <doi>10.3115/1075096.1075145</doi>
       <pages>383–390</pages>
       <url hash="0a1d48db">P03-1049</url>
@@ -483,7 +483,7 @@
     <paper id="50">
       <title>Unsupervised Learning of <fixed-case>A</fixed-case>rabic Stemming Using a Parallel Corpus</title>
       <author><first>Monica</first><last>Rogati</last></author>
-      <author><first>Scott</first><last>McCarley</last></author>
+      <author id="j-scott-mccarley"><first>Scott</first><last>McCarley</last></author>
       <author><first>Yiming</first><last>Yang</last></author>
       <doi>10.3115/1075096.1075146</doi>
       <pages>391–398</pages>
@@ -494,9 +494,9 @@
       <title>Language Model Based <fixed-case>A</fixed-case>rabic Word Segmentation</title>
       <author><first>Young-Suk</first><last>Lee</last></author>
       <author><first>Kishore</first><last>Papineni</last></author>
-      <author><first>Salim</first><last>Roukos</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
       <author><first>Ossama</first><last>Emam</last></author>
-      <author><first>Hany</first><last>Hassan</last></author>
+      <author id="hany-hassan-awadalla"><first>Hany</first><last>Hassan</last></author>
       <doi>10.3115/1075096.1075147</doi>
       <pages>399–406</pages>
       <url hash="b3d773ec">P03-1051</url>
@@ -504,7 +504,7 @@
     </paper>
     <paper id="52">
       <title>Acquiring Vocabulary for Predictive Text Entry through Dynamic Reuse of a Small User Corpus</title>
-      <author><first>Kumiko</first><last>Tanaka-Ishii</last></author>
+      <author id="kumiko-tanaka-ishii"><first>Kumiko</first><last>Tanaka-Ishii</last></author>
       <author><first>Daichi</first><last>Hayakawa</last></author>
       <author><first>Masato</first><last>Takeichi</last></author>
       <doi>10.3115/1075096.1075148</doi>
@@ -523,7 +523,7 @@
     <paper id="54">
       <title>Accurate Unlexicalized Parsing</title>
       <author><first>Dan</first><last>Klein</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <doi>10.3115/1075096.1075150</doi>
       <pages>423–430</pages>
       <award>Best Paper</award>
@@ -541,8 +541,8 @@
     </paper>
     <paper id="56">
       <title>Is it Harder to Parse <fixed-case>C</fixed-case>hinese, or the <fixed-case>C</fixed-case>hinese Treebank?</title>
-      <author><first>Roger</first><last>Levy</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <doi>10.3115/1075096.1075152</doi>
       <pages>439–446</pages>
       <url hash="aa72715d">P03-1056</url>
@@ -551,8 +551,8 @@
     <paper id="57">
       <title>Feedback Cleaning of Machine Translation Rules Using Automatic Evaluation</title>
       <author><first>Kenji</first><last>Imamura</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <doi>10.3115/1075096.1075153</doi>
       <pages>447–454</pages>
       <url hash="cc20c214">P03-1057</url>
@@ -570,7 +570,7 @@
     </paper>
     <paper id="59">
       <title>Learning the Countability of <fixed-case>E</fixed-case>nglish Nouns from Corpus Data</title>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Francis</first><last>Bond</last></author>
       <doi>10.3115/1075096.1075155</doi>
       <pages>463–470</pages>
@@ -580,8 +580,8 @@
     <paper id="60">
       <title>A Syllable Based Word Recognition Model for <fixed-case>K</fixed-case>orean Noun Extraction</title>
       <author><first>Do-Gil</first><last>Lee</last></author>
-      <author><first>Hae-Chang</first><last>Rim</last></author>
-      <author><first>Heui-Seok</first><last>Lim</last></author>
+      <author id="hae-chang-rim"><first>Hae-Chang</first><last>Rim</last></author>
+      <author id="heui-seok-lim"><first>Heui-Seok</first><last>Lim</last></author>
       <doi>10.3115/1075096.1075156</doi>
       <pages>471–478</pages>
       <url hash="945cf8e0">P03-1060</url>
@@ -603,9 +603,9 @@
       <title>Learning to Predict Pitch Accents and Prosodic Boundaries in <fixed-case>D</fixed-case>utch</title>
       <author><first>Erwin</first><last>Marsi</last></author>
       <author><first>Martin</first><last>Reynaert</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
-      <author><first>Véronique</first><last>Hoste</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
+      <author id="veronique-hoste"><first>Véronique</first><last>Hoste</last></author>
       <doi>10.3115/1075096.1075158</doi>
       <pages>489–496</pages>
       <url hash="8855d6e5">P03-1062</url>
@@ -623,7 +623,7 @@
     <paper id="64">
       <title>A <fixed-case>SN</fixed-case>o<fixed-case>W</fixed-case> Based Supertagger with Application to <fixed-case>NP</fixed-case> Chunking</title>
       <author><first>Libin</first><last>Shen</last></author>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
       <doi>10.3115/1075096.1075160</doi>
       <pages>505–512</pages>
       <url hash="9c8c0c9b">P03-1064</url>
@@ -635,7 +635,7 @@
       <author><first>Xiuhong</first><last>Zhang</last></author>
       <author><first>Cheng</first><last>Niu</last></author>
       <author><first>Yuankai</first><last>Jiang</last></author>
-      <author><first>Rohini K.</first><last>Srihari</last></author>
+      <author id="rohini-k-srihari"><first>Rohini K.</first><last>Srihari</last></author>
       <doi>10.3115/1075096.1075161</doi>
       <pages>513–520</pages>
       <url hash="4c478ac3">P03-1065</url>
@@ -662,7 +662,7 @@
       <title>Towards a Resource for Lexical Semantics: A Large <fixed-case>G</fixed-case>erman Corpus with Extensive Semantic Annotation</title>
       <author><first>Katrin</first><last>Erk</last></author>
       <author><first>Andrea</first><last>Kowalski</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <author><first>Manfred</first><last>Pinkal</last></author>
       <doi>10.3115/1075096.1075164</doi>
       <pages>537–544</pages>
@@ -679,7 +679,7 @@
     </paper>
     <paper id="70">
       <title>Towards a Model of Face-to-Face Grounding</title>
-      <author><first>Yukiko</first><last>Nakano</last></author>
+      <author id="yukiko-i-nakano"><first>Yukiko</first><last>Nakano</last></author>
       <author><first>Gabe</first><last>Reinstein</last></author>
       <author><first>Tom</first><last>Stocky</last></author>
       <author><first>Justine</first><last>Cassell</last></author>
@@ -691,9 +691,9 @@
     <paper id="71">
       <title>Discourse Segmentation of Multi-Party Conversation</title>
       <author><first>Michel</first><last>Galley</last></author>
-      <author><first>Kathleen R.</first><last>McKeown</last></author>
-      <author><first>Eric</first><last>Fosler-Lussier</last></author>
-      <author><first>Hongyan</first><last>Jing</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen R.</first><last>McKeown</last></author>
+      <author id="eric-fosler-lussier"><first>Eric</first><last>Fosler-Lussier</last></author>
+      <author id="hongyan-jing"><first>Hongyan</first><last>Jing</last></author>
       <doi>10.3115/1075096.1075167</doi>
       <pages>562–569</pages>
       <url hash="3df95939">P03-1071</url>
@@ -727,7 +727,7 @@
     </paper>
     <paper id="3">
       <title>On the Applicability of Global Index Grammars</title>
-      <author><first>José M.</first><last>Castaño</last></author>
+      <author id="jose-castano"><first>José M.</first><last>Castaño</last></author>
       <doi>10.3115/1075178.1075180</doi>
       <pages>15–22</pages>
       <url hash="9401366c">P03-2003</url>
@@ -759,7 +759,7 @@
     </paper>
     <paper id="7">
       <title>A Novel Approach to Semantic Indexing Based on Concept</title>
-      <author><first>Bo-Yeong</first><last>Kang</last></author>
+      <author id="bo-yeong-kang"><first>Bo-Yeong</first><last>Kang</last></author>
       <doi>10.3115/1075178.1075184</doi>
       <pages>44–49</pages>
       <url hash="cdbe15c6">P03-2007</url>
@@ -791,7 +791,7 @@
     </paper>
     <paper id="11">
       <title>Semantic Classification of <fixed-case>C</fixed-case>hinese Unknown Words</title>
-      <author><first>Huihsin</first><last>Tseng</last></author>
+      <author id="huihsin-tseng"><first>Huihsin</first><last>Tseng</last></author>
       <doi>10.3115/1075178.1075188</doi>
       <pages>72–79</pages>
       <url hash="6e5aee90">P03-2011</url>
@@ -832,7 +832,7 @@
     </paper>
     <paper id="16">
       <title><fixed-case>K</fixed-case>iwi: A Multilingual Usage Consultation Tool based on <fixed-case>I</fixed-case>nternet Searching</title>
-      <author><first>Kumiko</first><last>Tanaka-Ishii</last></author>
+      <author id="kumiko-tanaka-ishii"><first>Kumiko</first><last>Tanaka-Ishii</last></author>
       <author><first>Masato</first><last>Yamamoto</last></author>
       <author><first>Hiroshi</first><last>Nakagawa</last></author>
       <doi>10.3115/1075178.1075192</doi>
@@ -862,10 +862,10 @@
     <paper id="19">
       <title>Integrating Information Extraction and Automatic Hyperlinking</title>
       <author><first>Stephan</first><last>Busemann</last></author>
-      <author><first>Witold</first><last>Drozdzynski</last></author>
-      <author><first>Hans-Ulrich</first><last>Krieger</last></author>
+      <author id="witold-drozdzynski"><first>Witold</first><last>Drozdzynski</last></author>
+      <author id="hans-ulrich-krieger"><first>Hans-Ulrich</first><last>Krieger</last></author>
       <author><first>Jakub</first><last>Piskorski</last></author>
-      <author><first>Ulrich</first><last>Schaefer</last></author>
+      <author id="ulrich-schafer"><first>Ulrich</first><last>Schaefer</last></author>
       <author><first>Hans</first><last>Uszkoreit</last></author>
       <author><first>Feiyu</first><last>Xu</last></author>
       <doi>10.3115/1075178.1075195</doi>
@@ -875,7 +875,7 @@
     </paper>
     <paper id="20">
       <title>Automatic Collection of Related Terms from the Web</title>
-      <author><first>Satoshi</first><last>Sato</last></author>
+      <author id="satoshi-sato"><first>Satoshi</first><last>Sato</last></author>
       <author><first>Yasuhiro</first><last>Sasaki</last></author>
       <doi>10.3115/1075178.1075196</doi>
       <pages>121–124</pages>
@@ -885,8 +885,8 @@
     <paper id="21">
       <title>i<fixed-case>N</fixed-case>e<fixed-case>ATS</fixed-case>: Interactive Multi-Document Summarization</title>
       <author><first>Anton</first><last>Leuski</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <doi>10.3115/1075178.1075197</doi>
       <pages>125–128</pages>
       <url hash="6fd4d76b">P03-2021</url>
@@ -917,12 +917,12 @@
     </paper>
     <paper id="24">
       <title>A Limited-Domain <fixed-case>E</fixed-case>nglish to <fixed-case>J</fixed-case>apanese Medical Speech Translator Built Using <fixed-case>REGULUS</fixed-case> 2</title>
-      <author><first>Manny</first><last>Rayner</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Vol</first><last>Van Dalsem III</last></author>
       <author><first>Hitoshi</first><last>Isahara</last></author>
       <author><first>Kyoko</first><last>Kanzaki</last></author>
-      <author><first>Beth Ann</first><last>Hockey</last></author>
+      <author id="beth-ann-hockey"><first>Beth Ann</first><last>Hockey</last></author>
       <doi>10.3115/1075178.1075200</doi>
       <pages>137–140</pages>
       <url hash="01d4ee4e">P03-2024</url>
@@ -932,7 +932,7 @@
       <title>Bilingual Terminology Acquisition from Comparable Corpora and Phrasal Translation to Cross-Language Information Retrieval</title>
       <author><first>Fatiha</first><last>Sadat</last></author>
       <author><first>Masatoshi</first><last>Yoshikawa</last></author>
-      <author><first>Shunsuke</first><last>Uemura</last></author>
+      <author id="shunsuke-uemura"><first>Shunsuke</first><last>Uemura</last></author>
       <doi>10.3115/1075178.1075201</doi>
       <pages>141–144</pages>
       <url hash="37894d2d">P03-2025</url>
@@ -980,7 +980,7 @@
       <title>Word Sense Disambiguation Using Pairwise Alignment</title>
       <author><first>Koichi</first><last>Yamashita</last></author>
       <author><first>Keiichi</first><last>Yoshida</last></author>
-      <author><first>Yukihiro</first><last>Itoh</last></author>
+      <author id="yukihiro-itoh"><first>Yukihiro</first><last>Itoh</last></author>
       <doi>10.3115/1075178.1075205</doi>
       <pages>157–160</pages>
       <url hash="74096d3b">P03-2029</url>
@@ -988,7 +988,7 @@
     </paper>
     <paper id="30">
       <title>The <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et Data and Software</title>
-      <author><first>Collin F.</first><last>Baker</last></author>
+      <author id="collin-f-baker"><first>Collin F.</first><last>Baker</last></author>
       <author><first>Hiroaki</first><last>Sato</last></author>
       <doi>10.3115/1075178.1075206</doi>
       <pages>161–164</pages>
@@ -999,7 +999,7 @@
       <title>Automatic Acquisition of Named Entity Tagged Corpus from World Wide Web</title>
       <author><first>Joohui</first><last>An</last></author>
       <author><first>Seungwoo</first><last>Lee</last></author>
-      <author><first>Gary Geunbae</first><last>Lee</last></author>
+      <author id="gary-geunbae-lee"><first>Gary Geunbae</first><last>Lee</last></author>
       <doi>10.3115/1075178.1075207</doi>
       <pages>165–168</pages>
       <url hash="249b6465">P03-2031</url>
@@ -1019,10 +1019,10 @@
     <paper id="33">
       <title>A Debug Tool for Practical Grammar Development</title>
       <author><first>Akane</first><last>Yakushiji</last></author>
-      <author><first>Yuka</first><last>Tateisi</last></author>
+      <author id="yuka-tateisi"><first>Yuka</first><last>Tateisi</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
       <author><first>Naoki</first><last>Yoshinaga</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <doi>10.3115/1075178.1075209</doi>
       <pages>173–176</pages>
       <url hash="e793c319">P03-2033</url>
@@ -1030,7 +1030,7 @@
     </paper>
     <paper id="34">
       <title>A Speech Interface for Open-Domain Question-Answering</title>
-      <author><first>Edward</first><last>Schofield</last></author>
+      <author id="edward-schofield"><first>Edward</first><last>Schofield</last></author>
       <author><first>Zhiping</first><last>Zheng</last></author>
       <doi>10.3115/1075178.1075210</doi>
       <pages>177–180</pages>
@@ -1051,7 +1051,7 @@
       <title>Comparison between <fixed-case>CFG</fixed-case> Filtering Techniques for <fixed-case>LTAG</fixed-case> and <fixed-case>HPSG</fixed-case></title>
       <author><first>Naoki</first><last>Yoshinaga</last></author>
       <author><first>Kentaro</first><last>Torisawa</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <doi>10.3115/1075178.1075212</doi>
       <pages>185–188</pages>
       <url hash="1ff4b7f7">P03-2036</url>
@@ -1060,7 +1060,7 @@
     <paper id="37">
       <title>Automatic Detection of Grammar Elements that Decrease Readability</title>
       <author><first>Masatoshi</first><last>Tsuchiya</last></author>
-      <author><first>Satoshi</first><last>Sato</last></author>
+      <author id="satoshi-sato"><first>Satoshi</first><last>Sato</last></author>
       <doi>10.3115/1075178.1075213</doi>
       <pages>189–192</pages>
       <url hash="a2e44001">P03-2037</url>
@@ -1068,12 +1068,12 @@
     </paper>
     <paper id="38">
       <title>An Intelligent Procedure Assistant Built Using <fixed-case>REGULUS</fixed-case> 2 and <fixed-case>ALTERF</fixed-case></title>
-      <author><first>Manny</first><last>Rayner</last></author>
-      <author><first>Beth Ann</first><last>Hockey</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
+      <author id="beth-ann-hockey"><first>Beth Ann</first><last>Hockey</last></author>
       <author><first>Jim</first><last>Hieronymus</last></author>
-      <author><first>John</first><last>Dowding</last></author>
-      <author><first>Greg</first><last>Aist</last></author>
-      <author><first>Susana</first><last>Early</last></author>
+      <author id="john-dowding"><first>John</first><last>Dowding</last></author>
+      <author id="gregory-aist"><first>Greg</first><last>Aist</last></author>
+      <author id="susana-early"><first>Susana</first><last>Early</last></author>
       <doi>10.3115/1075178.1075214</doi>
       <pages>193–196</pages>
       <url hash="794ee307">P03-2038</url>
@@ -1081,9 +1081,9 @@
     </paper>
     <paper id="39">
       <title><fixed-case>C</fixed-case>hinese Unknown Word Identification Using Character-based Tagging and Chunking</title>
-      <author><first>Chooi Ling</first><last>Goh</last></author>
+      <author id="chooi-ling-goh"><first>Chooi Ling</first><last>Goh</last></author>
       <author><first>Masayuki</first><last>Asahara</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <doi>10.3115/1075178.1075215</doi>
       <pages>197–200</pages>
       <url hash="47975bcb">P03-2039</url>
@@ -1091,11 +1091,11 @@
     </paper>
     <paper id="40">
       <title><fixed-case>T</fixed-case>otal<fixed-case>R</fixed-case>ecall: A Bilingual Concordance for Computer Assisted Translation and Language Learning</title>
-      <author><first>Jian-Cheng</first><last>Wu</last></author>
-      <author><first>Kevin C.</first><last>Yeh</last></author>
+      <author id="jian-cheng-wu"><first>Jian-Cheng</first><last>Wu</last></author>
+      <author id="kevin-c-yeh"><first>Kevin C.</first><last>Yeh</last></author>
       <author><first>Thomas C.</first><last>Chuang</last></author>
       <author><first>Wen-Chi</first><last>Shei</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <doi>10.3115/1075178.1075216</doi>
       <pages>201–204</pages>
       <url hash="06b0bbaf">P03-2040</url>
@@ -1103,7 +1103,7 @@
     </paper>
     <paper id="41">
       <title>Learning Non-Isomorphic Tree Mappings for Machine Translation</title>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <doi>10.3115/1075178.1075217</doi>
       <pages>205–208</pages>
       <url hash="97327b36">P03-2041</url>
diff --git a/data/xml/P04.xml b/data/xml/P04.xml
index 67af34de73..8edef5c473 100644
--- a/data/xml/P04.xml
+++ b/data/xml/P04.xml
@@ -14,7 +14,7 @@
     </frontmatter>
     <paper id="1">
       <title>Optimization in Multimodal Interpretation</title>
-      <author><first>Joyce Y.</first><last>Chai</last></author>
+      <author id="joyce-chai"><first>Joyce Y.</first><last>Chai</last></author>
       <author><first>Pengyu</first><last>Hong</last></author>
       <author><first>Michelle X.</first><last>Zhou</last></author>
       <author><first>Zahar</first><last>Prasov</last></author>
@@ -43,7 +43,7 @@
     <paper id="4">
       <title>Analysis of Mixed Natural and Symbolic Input in Mathematical Dialogs</title>
       <author><first>Magdalena</first><last>Wolska</last></author>
-      <author><first>Ivana</first><last>Kruijff-Korbayová</last></author>
+      <author id="ivana-kruijff-korbayova"><first>Ivana</first><last>Kruijff-Korbayová</last></author>
       <doi>10.3115/1218955.1218959</doi>
       <pages>25–32</pages>
       <url hash="c2d70c55">P04-1004</url>
@@ -60,7 +60,7 @@
     </paper>
     <paper id="6">
       <title>Attention Shifting for Parsing Speech</title>
-      <author><first>Keith B.</first><last>Hall</last></author>
+      <author id="keith-hall"><first>Keith B.</first><last>Hall</last></author>
       <author><first>Mark</first><last>Johnson</last></author>
       <doi>10.3115/1218955.1218961</doi>
       <pages>40–46</pages>
@@ -70,8 +70,8 @@
     <paper id="7">
       <title>Discriminative Language Modeling with Conditional Random Fields and the Perceptron Algorithm</title>
       <author><first>Brian</first><last>Roark</last></author>
-      <author><first>Murat</first><last>Saraclar</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="murat-saraclar"><first>Murat</first><last>Saraclar</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <author><first>Mark</first><last>Johnson</last></author>
       <doi>10.3115/1218955.1218962</doi>
       <pages>47–54</pages>
@@ -82,7 +82,7 @@
       <title>Statistical Modeling for Unit Selection in Speech Synthesis</title>
       <author><first>Mehryar</first><last>Mohri</last></author>
       <author><first>Cyril</first><last>Allauzen</last></author>
-      <author><first>Michael</first><last>Riley</last></author>
+      <author id="michael-riley"><first>Michael</first><last>Riley</last></author>
       <doi>10.3115/1218955.1218963</doi>
       <pages>55–62</pages>
       <url hash="d0465c10">P04-1008</url>
@@ -90,7 +90,7 @@
     </paper>
     <paper id="9">
       <title>Developing a Flexible Spoken Dialog System Using Simulation</title>
-      <author><first>Grace</first><last>Chung</last></author>
+      <author id="grace-chung"><first>Grace</first><last>Chung</last></author>
       <doi>10.3115/1218955.1218964</doi>
       <pages>63–70</pages>
       <url hash="82797001">P04-1009</url>
@@ -99,11 +99,11 @@
     <paper id="10">
       <title>Data-Driven Strategies for an Automated Dialogue System</title>
       <author><first>Hilda</first><last>Hardy</last></author>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
       <author><first>Min</first><last>Wu</last></author>
-      <author><first>Cristian</first><last>Ursu</last></author>
-      <author><first>Nick</first><last>Webb</last></author>
-      <author><first>Alan</first><last>Biermann</last></author>
+      <author id="cristian-ursu"><first>Cristian</first><last>Ursu</last></author>
+      <author id="nick-webb"><first>Nick</first><last>Webb</last></author>
+      <author id="alan-w-biermann"><first>Alan</first><last>Biermann</last></author>
       <author><first>R. Bryce</first><last>Inouye</last></author>
       <author><first>Ashley</first><last>McKenzie</last></author>
       <doi>10.3115/1218955.1218965</doi>
@@ -113,9 +113,9 @@
     </paper>
     <paper id="11">
       <title>Trainable Sentence Planning for Complex Information Presentations in Spoken Dialog Systems</title>
-      <author><first>Amanda</first><last>Stent</last></author>
+      <author id="amanda-stent"><first>Amanda</first><last>Stent</last></author>
       <author><first>Rashmi</first><last>Prasad</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <doi>10.3115/1218955.1218966</doi>
       <pages>79–86</pages>
       <url hash="9a29bce5">P04-1011</url>
@@ -123,7 +123,7 @@
     </paper>
     <paper id="12">
       <title>User Expertise Modeling and Adaptivity in a Speech-Based <fixed-case>E</fixed-case>-Mail System</title>
-      <author><first>Kristiina</first><last>Jokinen</last></author>
+      <author id="kristiina-jokinen"><first>Kristiina</first><last>Jokinen</last></author>
       <author><first>Kari</first><last>Kanto</last></author>
       <doi>10.3115/1218955.1218967</doi>
       <pages>87–94</pages>
@@ -132,7 +132,7 @@
     </paper>
     <paper id="13">
       <title>Discriminative Training of a Neural Network Statistical Parser</title>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <doi>10.3115/1218955.1218968</doi>
       <pages>95–102</pages>
       <url hash="91055ec7">P04-1013</url>
@@ -141,7 +141,7 @@
     <paper id="14">
       <title>Parsing the <fixed-case>WSJ</fixed-case> Using <fixed-case>CCG</fixed-case> and Log-Linear Models</title>
       <author><first>Stephen</first><last>Clark</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <doi>10.3115/1218955.1218969</doi>
       <pages>103–110</pages>
       <url hash="87aae890">P04-1014</url>
@@ -149,7 +149,7 @@
     </paper>
     <paper id="15">
       <title>Incremental Parsing with the Perceptron Algorithm</title>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <author><first>Brian</first><last>Roark</last></author>
       <doi>10.3115/1218955.1218970</doi>
       <pages>111–118</pages>
@@ -170,8 +170,8 @@
       <title>Improving Pronoun Resolution by Incorporating Coreferential Information of Candidates</title>
       <author><first>Xiaofeng</first><last>Yang</last></author>
       <author><first>Jian</first><last>Su</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
-      <author><first>Chew-Lim</first><last>Tan</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
+      <author id="chew-lim-tan"><first>Chew-Lim</first><last>Tan</last></author>
       <doi>10.3115/1218955.1218972</doi>
       <pages>127–134</pages>
       <url hash="6fb714d0">P04-1017</url>
@@ -179,11 +179,11 @@
     </paper>
     <paper id="18">
       <title>A Mention-Synchronous Coreference Resolution Algorithm Based On the Bell Tree</title>
-      <author><first>Xiaoqiang</first><last>Luo</last></author>
-      <author><first>Abe</first><last>Ittycheriah</last></author>
-      <author><first>Hongyan</first><last>Jing</last></author>
-      <author><first>Nanda</first><last>Kambhatla</last></author>
-      <author><first>Salim</first><last>Roukos</last></author>
+      <author id="xiaoqiang-luo"><first>Xiaoqiang</first><last>Luo</last></author>
+      <author id="abe-ittycheriah"><first>Abe</first><last>Ittycheriah</last></author>
+      <author id="hongyan-jing"><first>Hongyan</first><last>Jing</last></author>
+      <author id="nanda-kambhatla"><first>Nanda</first><last>Kambhatla</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
       <doi>10.3115/1218955.1218973</doi>
       <pages>135–142</pages>
       <url hash="aee1d887">P04-1018</url>
@@ -191,10 +191,10 @@
     </paper>
     <paper id="19">
       <title>Learning to Resolve Bridging References</title>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <author><first>Rahul</first><last>Mehta</last></author>
       <author><first>Axel</first><last>Maroudas</last></author>
-      <author><first>Janet</first><last>Hitzeman</last></author>
+      <author id="janet-hitzeman"><first>Janet</first><last>Hitzeman</last></author>
       <doi>10.3115/1218955.1218974</doi>
       <pages>143–150</pages>
       <url hash="5f7c596a">P04-1019</url>
@@ -220,7 +220,7 @@
     </paper>
     <paper id="22">
       <title>Collocation Translation Acquisition Using Monolingual Corpora</title>
-      <author><first>Yajuan</first><last>Lü</last></author>
+      <author id="yajuan-lu"><first>Yajuan</first><last>Lü</last></author>
       <author><first>Ming</first><last>Zhou</last></author>
       <doi>10.3115/1218955.1218977</doi>
       <pages>167–174</pages>
@@ -249,7 +249,7 @@
     <paper id="25">
       <title>Extracting Regulatory Gene Expression Networks From Pubmed</title>
       <author><first>Jasmin</first><last>Saric</last></author>
-      <author><first>Lars J.</first><last>Jensen</last></author>
+      <author id="lars-juhl-jensen"><first>Lars J.</first><last>Jensen</last></author>
       <author><first>Peer</first><last>Bork</last></author>
       <author><first>Rossitza</first><last>Ouzounova</last></author>
       <author><first>Isabel</first><last>Rojas</last></author>
@@ -260,7 +260,7 @@
     </paper>
     <paper id="26">
       <title>Linguistic Profiling for Authorship Recognition and Verification</title>
-      <author><first>Hans</first><last>van Halteren</last></author>
+      <author id="hans-van-halteren"><first>Hans</first><last>van Halteren</last></author>
       <doi>10.3115/1218955.1218981</doi>
       <pages>199–206</pages>
       <url hash="e46629a3">P04-1026</url>
@@ -268,10 +268,10 @@
     </paper>
     <paper id="27">
       <title>An Empirical Study of Information Synthesis Task</title>
-      <author><first>Enrique</first><last>Amigo</last></author>
+      <author id="enrique-amigo"><first>Enrique</first><last>Amigo</last></author>
       <author><first>Julio</first><last>Gonzalo</last></author>
-      <author><first>Victor</first><last>Peinado</last></author>
-      <author><first>Anselmo</first><last>Peñas</last></author>
+      <author id="victor-peinado"><first>Victor</first><last>Peinado</last></author>
+      <author id="anselmo-penas"><first>Anselmo</first><last>Peñas</last></author>
       <author><first>Felisa</first><last>Verdejo</last></author>
       <doi>10.3115/1218955.1218982</doi>
       <pages>207–214</pages>
@@ -280,7 +280,7 @@
     </paper>
     <paper id="28">
       <title>Mining Metalinguistic Activity in Corpora to Create Lexical Resources Using Information Extraction Techniques: the <fixed-case>MOP</fixed-case> System</title>
-      <author><first>Carlos Rodriguez</first><last>Penagos</last></author>
+      <author id="carlos-rodriguez-penagos"><first>Carlos Rodriguez</first><last>Penagos</last></author>
       <doi>10.3115/1218955.1218983</doi>
       <pages>215–222</pages>
       <url hash="c622c82c">P04-1028</url>
@@ -327,7 +327,7 @@
     <paper id="33">
       <title>Learning with Unlabeled Data for Text Categorization Using a Bootstrapping and a Feature Projection Technique</title>
       <author><first>Youngjoong</first><last>Ko</last></author>
-      <author><first>Jungyun</first><last>Seo</last></author>
+      <author id="jungyun-seo"><first>Jungyun</first><last>Seo</last></author>
       <doi>10.3115/1218955.1218988</doi>
       <pages>255–262</pages>
       <url hash="9036e022">P04-1033</url>
@@ -354,10 +354,10 @@
     </paper>
     <paper id="36">
       <title>Finding Predominant Word Senses in Untagged Text</title>
-      <author><first>Diana</first><last>McCarthy</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
       <author><first>Rob</first><last>Koeling</last></author>
       <author><first>Julie</first><last>Weeds</last></author>
-      <author><first>John</first><last>Carroll</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
       <doi>10.3115/1218955.1218991</doi>
       <pages>279–286</pages>
       <award>Best Paper</award>
@@ -377,7 +377,7 @@
     <paper id="38">
       <title><fixed-case>C</fixed-case>hinese Verb Sense Discrimination Using an <fixed-case>EM</fixed-case> Clustering Model with Rich Linguistic Features</title>
       <author><first>Jinying</first><last>Chen</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <doi>10.3115/1218955.1218993</doi>
       <pages>295–302</pages>
       <url hash="483a9e64">P04-1038</url>
@@ -385,7 +385,7 @@
     </paper>
     <paper id="39">
       <title>Relieving the data Acquisition Bottleneck in Word Sense Disambiguation</title>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <doi>10.3115/1218955.1218994</doi>
       <pages>303–310</pages>
       <url hash="8d122c38">P04-1039</url>
@@ -394,7 +394,7 @@
     <paper id="40">
       <title>Enriching the Output of a Parser Using Memory-based Learning</title>
       <author><first>Valentin</first><last>Jijkoun</last></author>
-      <author><first>Maarten</first><last>de Rijke</last></author>
+      <author id="maarten-de-rijke"><first>Maarten</first><last>de Rijke</last></author>
       <doi>10.3115/1218955.1218995</doi>
       <pages>311–318</pages>
       <url hash="58425dd8">P04-1040</url>
@@ -405,7 +405,7 @@
       <author><first>Aoife</first><last>Cahill</last></author>
       <author><first>Michael</first><last>Burke</last></author>
       <author><first>Ruth</first><last>O’Donovan</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <doi>10.3115/1218955.1218996</doi>
       <pages>319–326</pages>
@@ -414,8 +414,8 @@
     </paper>
     <paper id="42">
       <title>Deep Dependencies from Context-Free Statistical Parsers: Correcting the Surface Dependency Approximation</title>
-      <author><first>Roger</first><last>Levy</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <doi>10.3115/1218955.1218997</doi>
       <pages>327–334</pages>
       <url hash="624644ce">P04-1042</url>
@@ -440,8 +440,8 @@
     </paper>
     <paper id="45">
       <title>Predicting Student Emotions in Computer-Human Tutoring Dialogues</title>
-      <author><first>Diane J.</first><last>Litman</last></author>
-      <author><first>Kate</first><last>Forbes-Riley</last></author>
+      <author id="diane-litman"><first>Diane J.</first><last>Litman</last></author>
+      <author id="kate-forbes-riley"><first>Kate</first><last>Forbes-Riley</last></author>
       <doi>10.3115/1218955.1219000</doi>
       <pages>351–358</pages>
       <url hash="3b8060b9">P04-1045</url>
@@ -460,7 +460,7 @@
       <author><first>Ruth</first><last>O’Donovan</last></author>
       <author><first>Michael</first><last>Burke</last></author>
       <author><first>Aoife</first><last>Cahill</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <doi>10.3115/1218955.1219002</doi>
       <pages>367–374</pages>
@@ -470,7 +470,7 @@
     <paper id="48">
       <title>Inducing Frame Semantic Verb Classes from <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et and <fixed-case>LDOCE</fixed-case></title>
       <author><first>Rebecca</first><last>Green</last></author>
-      <author><first>Bonnie J.</first><last>Dorr</last></author>
+      <author id="bonnie-dorr"><first>Bonnie J.</first><last>Dorr</last></author>
       <author><first>Philip</first><last>Resnik</last></author>
       <doi>10.3115/1218955.1219003</doi>
       <pages>375–382</pages>
@@ -489,9 +489,9 @@
     <paper id="50">
       <title>Evaluating Centering-Based Metrics of Coherence</title>
       <author><first>Nikiforos</first><last>Karamanis</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
-      <author><first>Chris</first><last>Mellish</last></author>
-      <author><first>Jon</first><last>Oberlander</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
+      <author id="chris-mellish"><first>Chris</first><last>Mellish</last></author>
+      <author id="jon-oberlander"><first>Jon</first><last>Oberlander</last></author>
       <doi>10.3115/1218955.1219005</doi>
       <pages>391–398</pages>
       <url hash="dd4c7ca8">P04-1050</url>
@@ -520,7 +520,7 @@
       <title>Discovering Relations among Named Entities from Large Corpora</title>
       <author><first>Takaaki</first><last>Hasegawa</last></author>
       <author><first>Satoshi</first><last>Sekine</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <doi>10.3115/1218955.1219008</doi>
       <pages>415–422</pages>
       <url hash="8f7dc67d">P04-1053</url>
@@ -529,7 +529,7 @@
     <paper id="54">
       <title>Dependency Tree Kernels for Relation Extraction</title>
       <author><first>Aron</first><last>Culotta</last></author>
-      <author><first>Jeffrey</first><last>Sorensen</last></author>
+      <author id="jeffrey-sorensen"><first>Jeffrey</first><last>Sorensen</last></author>
       <doi>10.3115/1218955.1219009</doi>
       <pages>423–429</pages>
       <url hash="5472e946">P04-1054</url>
@@ -538,7 +538,7 @@
     <paper id="55">
       <title>Classifying Semantic Relations in Bioscience Texts</title>
       <author><first>Barbara</first><last>Rosario</last></author>
-      <author><first>Marti</first><last>Hearst</last></author>
+      <author id="marti-a-hearst"><first>Marti</first><last>Hearst</last></author>
       <doi>10.3115/1218955.1219010</doi>
       <pages>430–437</pages>
       <url hash="35da99f7">P04-1055</url>
@@ -546,8 +546,8 @@
     </paper>
     <paper id="56">
       <title>Collective Information Extraction with Relational <fixed-case>M</fixed-case>arkov Networks</title>
-      <author><first>Razvan</first><last>Bunescu</last></author>
-      <author><first>Raymond</first><last>Mooney</last></author>
+      <author id="razvan-bunescu"><first>Razvan</first><last>Bunescu</last></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last></author>
       <doi>10.3115/1218955.1219011</doi>
       <pages>438–445</pages>
       <url hash="33a53465">P04-1056</url>
@@ -555,7 +555,7 @@
     </paper>
     <paper id="57">
       <title>Error Mining for Wide-Coverage Grammar Engineering</title>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <doi>10.3115/1218955.1219012</doi>
       <pages>446–453</pages>
       <url hash="ae2f4080">P04-1057</url>
@@ -564,7 +564,7 @@
     <paper id="58">
       <title>Alternative approaches for Generating Bodies of Grammar Rules</title>
       <author><first>Gabriel</first><last>Infante-Lopez</last></author>
-      <author><first>Maarten</first><last>de Rijke</last></author>
+      <author id="maarten-de-rijke"><first>Maarten</first><last>de Rijke</last></author>
       <doi>10.3115/1218955.1219013</doi>
       <pages>454–461</pages>
       <url hash="6976b394">P04-1058</url>
@@ -575,7 +575,7 @@
       <author><first>Jianfeng</first><last>Gao</last></author>
       <author><first>Andi</first><last>Wu</last></author>
       <author><first>Mu</first><last>Li</last></author>
-      <author><first>Chang-Ning</first><last>Huang</last></author>
+      <author id="changning-huang"><first>Chang-Ning</first><last>Huang</last></author>
       <author><first>Hongqiao</first><last>Li</last></author>
       <author><first>Xinsong</first><last>Xia</last></author>
       <author><first>Haowei</first><last>Qin</last></author>
@@ -595,7 +595,7 @@
     <paper id="61">
       <title>Corpus-Based Induction of Syntactic Structure: Models of Dependency and Constituency</title>
       <author><first>Dan</first><last>Klein</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <doi>10.3115/1218955.1219016</doi>
       <pages>478–485</pages>
       <url hash="2217832f">P04-1061</url>
@@ -603,8 +603,8 @@
     </paper>
     <paper id="62">
       <title>Annealing Techniques For Unsupervised Statistical Language Learning</title>
-      <author><first>Noah A.</first><last>Smith</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <doi>10.3115/1218955.1219017</doi>
       <pages>486–493</pages>
       <url hash="ab3800a7">P04-1062</url>
@@ -620,9 +620,9 @@
     </paper>
     <paper id="64">
       <title>Aligning words using matrix factorisation</title>
-      <author><first>Cyril</first><last>Goutte</last></author>
+      <author id="cyril-goutte"><first>Cyril</first><last>Goutte</last></author>
       <author><first>Kenji</first><last>Yamada</last></author>
-      <author><first>Eric</first><last>Gaussier</last></author>
+      <author id="eric-gaussier"><first>Eric</first><last>Gaussier</last></author>
       <doi>10.3115/1218955.1219019</doi>
       <pages>502–509</pages>
       <url hash="d93c4122">P04-1064</url>
@@ -631,7 +631,7 @@
     <paper id="65">
       <title><fixed-case>FSA</fixed-case>: An Efficient and Flexible <fixed-case>C</fixed-case>++ Toolkit for Finite State Automata Using On-Demand Computation</title>
       <author><first>Stephan</first><last>Kanthak</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <doi>10.3115/1218955.1219020</doi>
       <pages>510–517</pages>
       <url hash="49efca0e">P04-1065</url>
@@ -639,7 +639,7 @@
     </paper>
     <paper id="66">
       <title>Improving <fixed-case>IBM</fixed-case> Word Alignment Model 1</title>
-      <author><first>Robert C.</first><last>Moore</last></author>
+      <author id="robert-c-moore"><first>Robert C.</first><last>Moore</last></author>
       <doi>10.3115/1218955.1219021</doi>
       <pages>518–525</pages>
       <url hash="d30e42fb">P04-1066</url>
@@ -647,7 +647,7 @@
     </paper>
     <paper id="67">
       <title>A Geometric View on Bilingual Lexicon Extraction from Comparable Corpora</title>
-      <author><first>Eric</first><last>Gaussier</last></author>
+      <author id="eric-gaussier"><first>Eric</first><last>Gaussier</last></author>
       <author><first>J.M.</first><last>Renders</last></author>
       <author id="irina-matveeva"><first>I.</first><last>Matveeva</last></author>
       <author id="cyril-goutte"><first>C.</first><last>Goutte</last></author>
@@ -708,7 +708,7 @@
     <paper id="73">
       <title>Question Answering Using Constraint Satisfaction: <fixed-case>QA</fixed-case>-By-Dossier-With-Contraints</title>
       <author><first>John</first><last>Prager</last></author>
-      <author><first>Jennifer</first><last>Chu-Carroll</last></author>
+      <author id="jennifer-chu-carroll"><first>Jennifer</first><last>Chu-Carroll</last></author>
       <author><first>Krzysztof</first><last>Czuba</last></author>
       <doi>10.3115/1218955.1219028</doi>
       <pages>574–581</pages>
@@ -718,7 +718,7 @@
     <paper id="74">
       <title>Applying Machine Learning to <fixed-case>C</fixed-case>hinese Temporal Relation Resolution</title>
       <author><first>Wenjie</first><last>Li</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <author><first>Guihong</first><last>Cao</last></author>
       <author><first>Chunfa</first><last>Yuan</last></author>
       <doi>10.3115/1218955.1219029</doi>
@@ -731,8 +731,8 @@
       <author><first>Dan</first><last>Shen</last></author>
       <author><first>Jie</first><last>Zhang</last></author>
       <author><first>Jian</first><last>Su</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
-      <author><first>Chew-Lim</first><last>Tan</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
+      <author id="chew-lim-tan"><first>Chew-Lim</first><last>Tan</last></author>
       <doi>10.3115/1218955.1219030</doi>
       <pages>589–596</pages>
       <url hash="5e22f526">P04-1075</url>
@@ -742,7 +742,7 @@
       <title>Weakly Supervised Learning for Cross-document Person Name Disambiguation Supported by Information Extraction</title>
       <author><first>Cheng</first><last>Niu</last></author>
       <author><first>Wei</first><last>Li</last></author>
-      <author><first>Rohini K.</first><last>Srihari</last></author>
+      <author id="rohini-k-srihari"><first>Rohini K.</first><last>Srihari</last></author>
       <doi>10.3115/1218955.1219031</doi>
       <pages>597–604</pages>
       <url hash="93f794b1">P04-1076</url>
@@ -750,8 +750,8 @@
     </paper>
     <paper id="77">
       <title>Automatic Evaluation of Machine Translation Quality Using Longest Common Subsequence and Skip-Bigram Statistics</title>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
-      <author><first>Franz Josef</first><last>Och</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="franz-josef-och"><first>Franz Josef</first><last>Och</last></author>
       <doi>10.3115/1218955.1219032</doi>
       <pages>605–612</pages>
       <url hash="11eaaba9">P04-1077</url>
@@ -777,9 +777,9 @@
     </paper>
     <paper id="80">
       <title>Learning Word Sense With Feature Selection and Order Identification Capabilities</title>
-      <author><first>Zheng-Yu</first><last>Niu</last></author>
-      <author><first>Dong-Hong</first><last>Ji</last></author>
-      <author><first>Chew-Lim</first><last>Tan</last></author>
+      <author id="zheng-yu-niu"><first>Zheng-Yu</first><last>Niu</last></author>
+      <author id="donghong-ji"><first>Dong-Hong</first><last>Ji</last></author>
+      <author id="chew-lim-tan"><first>Chew-Lim</first><last>Tan</last></author>
       <doi>10.3115/1218955.1219035</doi>
       <pages>629–636</pages>
       <url hash="2818d5a3">P04-1080</url>
@@ -824,9 +824,9 @@
     <paper id="85">
       <title>Identifying Agreement and Disagreement in Conversational Speech: Use of <fixed-case>B</fixed-case>ayesian Networks to Model Pragmatic Dependencies</title>
       <author><first>Michel</first><last>Galley</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
-      <author><first>Elizabeth</first><last>Shriberg</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
+      <author id="elizabeth-shriberg"><first>Elizabeth</first><last>Shriberg</last></author>
       <doi>10.3115/1218955.1219040</doi>
       <pages>669–676</pages>
       <url hash="9cccc8eb">P04-1085</url>
@@ -834,7 +834,7 @@
     </paper>
     <paper id="86">
       <title>Using Conditional Random Fields to Predict Pitch Accents in Conversational Speech</title>
-      <author><first>Michelle</first><last>Gregory</last></author>
+      <author id="michelle-gregory"><first>Michelle</first><last>Gregory</last></author>
       <author><first>Yasemin</first><last>Altun</last></author>
       <doi>10.3115/1218955.1219041</doi>
       <pages>677–683</pages>
@@ -889,7 +889,7 @@
     <paper id="3">
       <title>Searching for Topics in a Large Collection of Texts</title>
       <author><first>Martin</first><last>Holub</last></author>
-      <author><first>Jiří</first><last>Semecký</last></author>
+      <author id="jiri-semecky"><first>Jiří</first><last>Semecký</last></author>
       <author><first>Jiří</first><last>Diviš</last></author>
       <pages>13–18</pages>
       <url hash="0e98ed84">P04-2003</url>
@@ -976,7 +976,7 @@
       <title><fixed-case>T</fixed-case>rans<fixed-case>T</fixed-case>ype2 - An Innovative Computer-Assisted Translation System</title>
       <author><first>José</first><last>Esteban</last></author>
       <author><first>José</first><last>Lorenzo</last></author>
-      <author><first>Antonio S.</first><last>Valderrábanos</last></author>
+      <author id="antonio-s-valderrabanos"><first>Antonio S.</first><last>Valderrábanos</last></author>
       <author><first>Guy</first><last>Lapalme</last></author>
       <pages>94–97</pages>
       <url hash="97960d3b">P04-3001</url>
@@ -1000,10 +1000,10 @@
     </paper>
     <paper id="4">
       <title>Subsentential Translation Memory for Computer Assisted Writing and Translation</title>
-      <author><first>Jian-Cheng</first><last>Wu</last></author>
+      <author id="jian-cheng-wu"><first>Jian-Cheng</first><last>Wu</last></author>
       <author><first>Thomas C.</first><last>Chuang</last></author>
       <author><first>Wen-Chi</first><last>Shei</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>106–109</pages>
       <url hash="174f20ed">P04-3004</url>
       <bibkey>wu-etal-2004-subsentential</bibkey>
@@ -1034,7 +1034,7 @@
     </paper>
     <paper id="8">
       <title>Interactive grammar development with <fixed-case>WCDG</fixed-case></title>
-      <author><first>Kilian A.</first><last>Foth</last></author>
+      <author id="kilian-a-foth"><first>Kilian A.</first><last>Foth</last></author>
       <author><first>Michael</first><last>Daum</last></author>
       <author><first>Wolfgang</first><last>Menzel</last></author>
       <pages>122–125</pages>
@@ -1043,7 +1043,7 @@
     </paper>
     <paper id="9">
       <title>Wide Coverage Symbolic Surface Realization</title>
-      <author><first>Charles</first><last>Callaway</last></author>
+      <author id="charles-b-callaway"><first>Charles</first><last>Callaway</last></author>
       <pages>126–129</pages>
       <url hash="6aa3892e">P04-3009</url>
       <bibkey>callaway-2004-wide</bibkey>
@@ -1051,7 +1051,7 @@
     <paper id="10">
       <title>Part-of-Speech Tagging Considering Surface Form for an Agglutinative Language</title>
       <author><first>Do-Gil</first><last>Lee</last></author>
-      <author><first>Hae-Chang</first><last>Rim</last></author>
+      <author id="hae-chang-rim"><first>Hae-Chang</first><last>Rim</last></author>
       <pages>130–133</pages>
       <url hash="4a1bc998">P04-3010</url>
       <bibkey>lee-rim-2004-part</bibkey>
@@ -1066,21 +1066,21 @@
     </paper>
     <paper id="12">
       <title>Corpus representativeness for syntactic information acquisition</title>
-      <author><first>Núria</first><last>Bel</last></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last></author>
       <pages>138–141</pages>
       <url hash="f65f5d5e">P04-3012</url>
       <bibkey>bel-2004-corpus</bibkey>
     </paper>
     <paper id="13">
       <title>Exploiting Unannotated Corpora for Tagging and Chunking</title>
-      <author><first>Rie Kubota</first><last>Ando</last></author>
+      <author id="rie-johnson"><first>Rie Kubota</first><last>Ando</last></author>
       <pages>142–145</pages>
       <url hash="1c910a42">P04-3013</url>
       <bibkey>ando-2004-exploiting</bibkey>
     </paper>
     <paper id="14">
       <title>Improving Bitext Word Alignments via Syntax-based Reordering of <fixed-case>E</fixed-case>nglish</title>
-      <author><first>Elliott Franco</first><last>Drabek</last></author>
+      <author id="elliott-franco-drabek"><first>Elliott Franco</first><last>Drabek</last></author>
       <author><first>David</first><last>Yarowsky</last></author>
       <pages>146–149</pages>
       <url hash="a1d349cd">P04-3014</url>
@@ -1097,10 +1097,10 @@
     </paper>
     <paper id="16">
       <title>Knowledge intensive e-mail summarization in <fixed-case>CARPANTA</fixed-case></title>
-      <author><first>Laura</first><last>Alonso</last></author>
-      <author><first>Irene</first><last>Castellón</last></author>
-      <author><first>Bernardino</first><last>Casas</last></author>
-      <author><first>Lluís</first><last>Padró</last></author>
+      <author id="laura-alonso-alemany"><first>Laura</first><last>Alonso</last></author>
+      <author id="irene-castellon"><first>Irene</first><last>Castellón</last></author>
+      <author id="bernardino-casas"><first>Bernardino</first><last>Casas</last></author>
+      <author id="lluis-padro"><first>Lluís</first><last>Padró</last></author>
       <pages>154–157</pages>
       <url hash="5f6bdb61">P04-3016</url>
       <bibkey>alonso-etal-2004-knowledge</bibkey>
@@ -1108,48 +1108,48 @@
     <paper id="17">
       <title>Finding Anchor Verbs for Biomedical <fixed-case>IE</fixed-case> Using Predicate-Argument Structures</title>
       <author><first>Akane</first><last>Yakushiji</last></author>
-      <author><first>Yuka</first><last>Tateisi</last></author>
+      <author id="yuka-tateisi"><first>Yuka</first><last>Tateisi</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>158–161</pages>
       <url hash="09435c6a">P04-3017</url>
       <bibkey>yakushiji-etal-2004-finding</bibkey>
     </paper>
     <paper id="18">
       <title>Resource Analysis for Question Answering</title>
-      <author><first>Lucian Vlad</first><last>Lita</last></author>
+      <author id="lucian-vlad-lita"><first>Lucian Vlad</first><last>Lita</last></author>
       <author><first>Warren A.</first><last>Hunt</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <pages>162–165</pages>
       <url hash="c0559b3c">P04-3018</url>
       <bibkey>lita-etal-2004-resource</bibkey>
     </paper>
     <paper id="19">
       <title><fixed-case>TANGO</fixed-case>: Bilingual Collocational Concordancer</title>
-      <author><first>Jia-Yan</first><last>Jian</last></author>
+      <author id="jia-yan-jian"><first>Jia-Yan</first><last>Jian</last></author>
       <author><first>Yu-Chia</first><last>Chang</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>166–169</pages>
       <url hash="cfdb9f00">P04-3019</url>
       <bibkey>jian-etal-2004-tango</bibkey>
     </paper>
     <paper id="20">
       <title>Graph-based Ranking Algorithms for Sentence Extraction, Applied to Text Summarization</title>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>170–173</pages>
       <url hash="aa1fa501">P04-3020</url>
       <bibkey>mihalcea-2004-graph</bibkey>
     </paper>
     <paper id="21">
       <title>Compiling Boostexter Rules into a Finite-state Transducer</title>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
       <pages>174–177</pages>
       <url hash="6fdf9d01">P04-3021</url>
       <bibkey>bangalore-2004-compiling</bibkey>
     </paper>
     <paper id="22">
       <title>Combining Lexical, Syntactic, and Semantic Features with Maximum Entropy Models for Information Extraction</title>
-      <author><first>Nanda</first><last>Kambhatla</last></author>
+      <author id="nanda-kambhatla"><first>Nanda</first><last>Kambhatla</last></author>
       <pages>178–181</pages>
       <url hash="4adb397e">P04-3022</url>
       <bibkey>kambhatla-2004-combining</bibkey>
@@ -1185,8 +1185,8 @@
     </paper>
     <paper id="27">
       <title>Automatic clustering of collocation for detecting practical sense boundary</title>
-      <author><first>Saim</first><last>Shin</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="saim-shin"><first>Saim</first><last>Shin</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <pages>198–201</pages>
       <url hash="ec970462">P04-3027</url>
       <bibkey>shin-choi-2004-automatic</bibkey>
@@ -1194,7 +1194,7 @@
     <paper id="28">
       <title>Co-training for Predicting Emotions with Spoken Dialogue Data</title>
       <author><first>Beatriz</first><last>Maeireizo</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <author><first>Rebecca</first><last>Hwa</last></author>
       <pages>202–205</pages>
       <url hash="78fd3a6e">P04-3028</url>
@@ -1211,7 +1211,7 @@
     <paper id="30">
       <title>Wysiwym with wider coverage</title>
       <author><first>Richard</first><last>Power</last></author>
-      <author><first>Roger</first><last>Evans</last></author>
+      <author id="roger-evans"><first>Roger</first><last>Evans</last></author>
       <pages>210–213</pages>
       <url hash="32c19295">P04-3030</url>
       <bibkey>power-evans-2004-wysiwym</bibkey>
@@ -1226,17 +1226,17 @@
     </paper>
     <paper id="32">
       <title><fixed-case>D</fixed-case>yna: A Language for Weighted Dynamic Programming</title>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <author><first>Eric</first><last>Goldlust</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>218–221</pages>
       <url hash="42face46">P04-3032</url>
       <bibkey>eisner-etal-2004-dyna</bibkey>
     </paper>
     <paper id="33">
       <title><fixed-case>MATCH</fixed-case>kiosk: A Multimodal Interactive City Guide</title>
-      <author><first>Michael</first><last>Johnston</last></author>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="michael-johnston"><first>Michael</first><last>Johnston</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
       <pages>222–225</pages>
       <url hash="2150dfc2">P04-3033</url>
       <bibkey>johnston-bangalore-2004-matchkiosk</bibkey>
@@ -1245,7 +1245,7 @@
       <title>Fragments and Text Categorization</title>
       <author><first>Jan</first><last>Blaták</last></author>
       <author><first>Eva</first><last>Mráková</last></author>
-      <author><first>Lubos</first><last>Popelínsky</last></author>
+      <author id="lubos-popelinsky"><first>Lubos</first><last>Popelínsky</last></author>
       <pages>226–229</pages>
       <url hash="75dcef44">P04-3034</url>
       <bibkey>blatak-etal-2004-fragments</bibkey>
diff --git a/data/xml/P05.xml b/data/xml/P05.xml
index 8930a297d5..420d78d3d0 100644
--- a/data/xml/P05.xml
+++ b/data/xml/P05.xml
@@ -19,7 +19,7 @@
     </frontmatter>
     <paper id="1">
       <title>A High-Performance Semi-Supervised Learning Method for Text Chunking</title>
-      <author><first>Rie</first><last>Ando</last></author>
+      <author id="rie-johnson"><first>Rie</first><last>Ando</last></author>
       <author><first>Tong</first><last>Zhang</last></author>
       <pages>1–9</pages>
       <url hash="48e70f06">P05-1001</url>
@@ -28,8 +28,8 @@
     </paper>
     <paper id="2">
       <title>Scaling Conditional Random Fields Using Error-Correcting Codes</title>
-      <author><first>Trevor</first><last>Cohn</last></author>
-      <author><first>Andrew</first><last>Smith</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
+      <author id="andrew-smith"><first>Andrew</first><last>Smith</last></author>
       <author><first>Miles</first><last>Osborne</last></author>
       <pages>10–17</pages>
       <url hash="48a9d0ec">P05-1002</url>
@@ -38,8 +38,8 @@
     </paper>
     <paper id="3">
       <title>Logarithmic Opinion Pools for Conditional Random Fields</title>
-      <author><first>Andrew</first><last>Smith</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="andrew-smith"><first>Andrew</first><last>Smith</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <author><first>Miles</first><last>Osborne</last></author>
       <pages>18–25</pages>
       <url hash="f3701613">P05-1003</url>
@@ -48,7 +48,7 @@
     </paper>
     <paper id="4">
       <title>Supersense Tagging of Unknown Nouns Using Semantic Similarity</title>
-      <author><first>James</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James</first><last>Curran</last></author>
       <pages>26–33</pages>
       <url hash="7ee72d3c">P05-1004</url>
       <doi>10.3115/1219840.1219844</doi>
@@ -65,8 +65,8 @@
     </paper>
     <paper id="6">
       <title>The Role of Semantic Roles in Disambiguating Verb Senses</title>
-      <author><first>Hoa Trang</first><last>Dang</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="hoa-trang-dang"><first>Hoa Trang</first><last>Dang</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>42–49</pages>
       <url hash="f5df2003">P05-1006</url>
       <doi>10.3115/1219840.1219846</doi>
@@ -77,8 +77,8 @@
       <author><first>Barbara</first><last>Di Eugenio</last></author>
       <author><first>Davide</first><last>Fossati</last></author>
       <author><first>Dan</first><last>Yu</last></author>
-      <author><first>Susan</first><last>Haller</last></author>
-      <author><first>Michael</first><last>Glass</last></author>
+      <author id="susan-haller"><first>Susan</first><last>Haller</last></author>
+      <author id="michael-glass"><first>Michael</first><last>Glass</last></author>
       <pages>50–57</pages>
       <url hash="7b45056c">P05-1007</url>
       <doi>10.3115/1219840.1219847</doi>
@@ -86,8 +86,8 @@
     </paper>
     <paper id="8">
       <title>Empirically-based Control of Natural Language Generation</title>
-      <author><first>Daniel S.</first><last>Paiva</last></author>
-      <author><first>Roger</first><last>Evans</last></author>
+      <author id="daniel-paiva"><first>Daniel S.</first><last>Paiva</last></author>
+      <author id="roger-evans"><first>Roger</first><last>Evans</last></author>
       <pages>58–65</pages>
       <url hash="7e069acd">P05-1008</url>
       <doi>10.3115/1219840.1219848</doi>
@@ -106,7 +106,7 @@
       <title>Probabilistic <fixed-case>CFG</fixed-case> with Latent Annotations</title>
       <author><first>Takuya</first><last>Matsuzaki</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>75–82</pages>
       <url hash="b3e17063">P05-1010</url>
       <doi>10.3115/1219840.1219850</doi>
@@ -115,7 +115,7 @@
     <paper id="11">
       <title>Probabilistic Disambiguation Models for Wide-Coverage <fixed-case>HPSG</fixed-case> Parsing</title>
       <author><first>Yusuke</first><last>Miyao</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>83–90</pages>
       <url hash="3e84b1df">P05-1011</url>
       <doi>10.3115/1219840.1219851</doi>
@@ -142,7 +142,7 @@
     </paper>
     <paper id="14">
       <title>The Distributional Inclusion Hypotheses and Lexical Entailment</title>
-      <author><first>Maayan</first><last>Geffet</last></author>
+      <author id="maayan-geffet"><first>Maayan</first><last>Geffet</last></author>
       <author><first>Ido</first><last>Dagan</last></author>
       <pages>107–114</pages>
       <url hash="7883e4b6">P05-1014</url>
@@ -170,7 +170,7 @@
       <title>Extracting Semantic Orientations of Words using Spin Model</title>
       <author><first>Hiroya</first><last>Takamura</last></author>
       <author><first>Takashi</first><last>Inui</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>133–140</pages>
       <url hash="8394de81">P05-1017</url>
       <doi>10.3115/1219840.1219857</doi>
@@ -205,7 +205,7 @@
       <title>Improving Pronoun Resolution Using Statistics-Based Semantic Compatibility Information</title>
       <author><first>Xiaofeng</first><last>Yang</last></author>
       <author><first>Jian</first><last>Su</last></author>
-      <author><first>Chew Lim</first><last>Tan</last></author>
+      <author id="chew-lim-tan"><first>Chew Lim</first><last>Tan</last></author>
       <pages>165–172</pages>
       <url hash="ef6ca82f">P05-1021</url>
       <doi>10.3115/1219840.1219861</doi>
@@ -222,7 +222,7 @@
     </paper>
     <paper id="23">
       <title>Data-Defined Kernels for Parse Reranking Derived from Probabilistic Models</title>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <author><first>Ivan</first><last>Titov</last></author>
       <pages>181–188</pages>
       <url hash="943de270">P05-1023</url>
@@ -231,7 +231,7 @@
     </paper>
     <paper id="24">
       <title>Boosting-based Parse Reranking with Subtree Features</title>
-      <author><first>Taku</first><last>Kudo</last></author>
+      <author id="taku-kudo"><first>Taku</first><last>Kudo</last></author>
       <author><first>Jun</first><last>Suzuki</last></author>
       <author><first>Hideki</first><last>Isozaki</last></author>
       <pages>189–196</pages>
@@ -242,7 +242,7 @@
     <paper id="25">
       <title>Automatic Measurement of Syntactic Development in Child Language</title>
       <author><first>Kenji</first><last>Sagae</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <author><first>Brian</first><last>MacWhinney</last></author>
       <pages>197–204</pages>
       <url hash="1c8ebbab">P05-1025</url>
@@ -251,10 +251,10 @@
     </paper>
     <paper id="26">
       <title>Experiments with Interactive Question-Answering</title>
-      <author><first>Sanda</first><last>Harabagiu</last></author>
+      <author id="sanda-harabagiu"><first>Sanda</first><last>Harabagiu</last></author>
       <author><first>Andrew</first><last>Hickl</last></author>
       <author><first>John</first><last>Lehmann</last></author>
-      <author><first>Dan</first><last>Moldovan</last></author>
+      <author id="dan-moldovan"><first>Dan</first><last>Moldovan</last></author>
       <pages>205–214</pages>
       <url hash="6006978e">P05-1026</url>
       <doi>10.3115/1219840.1219866</doi>
@@ -271,10 +271,10 @@
     <paper id="28">
       <title>Exploring and Exploiting the Limited Utility of Captions in Recognizing Intention in Information Graphics</title>
       <author><first>Stephanie</first><last>Elzer</last></author>
-      <author><first>Sandra</first><last>Carberry</last></author>
+      <author id="sandra-carberry"><first>Sandra</first><last>Carberry</last></author>
       <author><first>Daniel</first><last>Chester</last></author>
       <author><first>Seniz</first><last>Demir</last></author>
-      <author><first>Nancy</first><last>Green</last></author>
+      <author id="nancy-green"><first>Nancy</first><last>Green</last></author>
       <author><first>Ingrid</first><last>Zukerman</last></author>
       <author><first>Keith</first><last>Trnka</last></author>
       <pages>223–230</pages>
@@ -285,7 +285,7 @@
     <paper id="29">
       <title>Scaling up from Dialogue to Multilogue: Some Principles and Benchmarks</title>
       <author><first>Jonathan</first><last>Ginzburg</last></author>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <pages>231–238</pages>
       <url hash="abcc8113">P05-1029</url>
       <doi>10.3115/1219840.1219869</doi>
@@ -294,7 +294,7 @@
     <paper id="30">
       <title>Implications for Generating Clarification Requests in Task-Oriented Dialogues</title>
       <author><first>Verena</first><last>Rieser</last></author>
-      <author><first>Johanna</first><last>Moore</last></author>
+      <author id="johanna-d-moore"><first>Johanna</first><last>Moore</last></author>
       <pages>239–246</pages>
       <url hash="574eaefb">P05-1030</url>
       <doi>10.3115/1219840.1219870</doi>
@@ -339,9 +339,9 @@
     </paper>
     <paper id="35">
       <title><fixed-case>QARLA</fixed-case>: A Framework for the Evaluation of Text Summarization Systems</title>
-      <author><first>Enrique</first><last>Amigó</last></author>
+      <author id="enrique-amigo"><first>Enrique</first><last>Amigó</last></author>
       <author><first>Julio</first><last>Gonzalo</last></author>
-      <author><first>Anselmo</first><last>Peñas</last></author>
+      <author id="anselmo-penas"><first>Anselmo</first><last>Peñas</last></author>
       <author><first>Felisa</first><last>Verdejo</last></author>
       <pages>280–289</pages>
       <url hash="1fda44f4">P05-1035</url>
@@ -360,7 +360,7 @@
     <paper id="37">
       <title>Digesting Virtual “Geek” Culture: The Summarization of Technical <fixed-case>I</fixed-case>nternet Relay Chats</title>
       <author><first>Liang</first><last>Zhou</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>298–305</pages>
       <url hash="ab045f0a">P05-1037</url>
       <doi>10.3115/1219840.1219877</doi>
@@ -386,7 +386,7 @@
     <paper id="40">
       <title>Detecting Errors in Discontinuous Structural Annotation</title>
       <author><first>Markus</first><last>Dickinson</last></author>
-      <author><first>W. Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>W. Detmar</first><last>Meurers</last></author>
       <pages>322–329</pages>
       <url hash="44808be8">P05-1040</url>
       <doi>10.3115/1219840.1219880</doi>
@@ -406,7 +406,7 @@
     <paper id="42">
       <title>A Dynamic <fixed-case>B</fixed-case>ayesian Framework to Model Context and Memory in Edit Distance Learning: An Application to Pronunciation Classification</title>
       <author><first>Karim</first><last>Filali</last></author>
-      <author><first>Jeff</first><last>Bilmes</last></author>
+      <author id="jeff-bilmes"><first>Jeff</first><last>Bilmes</last></author>
       <pages>338–345</pages>
       <url hash="3059a53f">P05-1042</url>
       <doi>10.3115/1219840.1219882</doi>
@@ -422,8 +422,8 @@
     </paper>
     <paper id="44">
       <title>Contrastive Estimation: Training Log-Linear Models on Unlabeled Data</title>
-      <author><first>Noah A.</first><last>Smith</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>354–362</pages>
       <url hash="6a6f7b8b">P05-1044</url>
       <doi>10.3115/1219840.1219884</doi>
@@ -431,9 +431,9 @@
     </paper>
     <paper id="45">
       <title>Incorporating Non-local Information into Information Extraction Systems by <fixed-case>G</fixed-case>ibbs Sampling</title>
-      <author><first>Jenny Rose</first><last>Finkel</last></author>
+      <author id="jenny-rose-finkel"><first>Jenny Rose</first><last>Finkel</last></author>
       <author><first>Trond</first><last>Grenager</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <pages>363–370</pages>
       <url hash="d8e3a88d">P05-1045</url>
       <doi>10.3115/1219840.1219885</doi>
@@ -443,7 +443,7 @@
       <title>Unsupervised Learning of Field Segmentation Models for Information Extraction</title>
       <author><first>Trond</first><last>Grenager</last></author>
       <author><first>Dan</first><last>Klein</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <pages>371–378</pages>
       <url hash="47072447">P05-1046</url>
       <doi>10.3115/1219840.1219886</doi>
@@ -452,7 +452,7 @@
     <paper id="47">
       <title>A Semantic Approach to <fixed-case>IE</fixed-case> Pattern Induction</title>
       <author><first>Mark</first><last>Stevenson</last></author>
-      <author><first>Mark</first><last>Greenwood</last></author>
+      <author id="mark-a-greenwood"><first>Mark</first><last>Greenwood</last></author>
       <pages>379–386</pages>
       <url hash="83ed4548">P05-1047</url>
       <doi>10.3115/1219840.1219887</doi>
@@ -469,9 +469,9 @@
     </paper>
     <paper id="49">
       <title>Word Sense Disambiguation Using Label Propagation Based Semi-Supervised Learning</title>
-      <author><first>Zheng-Yu</first><last>Niu</last></author>
-      <author><first>Dong-Hong</first><last>Ji</last></author>
-      <author><first>Chew Lim</first><last>Tan</last></author>
+      <author id="zheng-yu-niu"><first>Zheng-Yu</first><last>Niu</last></author>
+      <author id="donghong-ji"><first>Dong-Hong</first><last>Ji</last></author>
+      <author id="chew-lim-tan"><first>Chew Lim</first><last>Tan</last></author>
       <pages>395–402</pages>
       <url hash="17a35529">P05-1049</url>
       <doi>10.3115/1219840.1219889</doi>
@@ -479,8 +479,8 @@
     </paper>
     <paper id="50">
       <title>Domain Kernels for Word Sense Disambiguation</title>
-      <author><first>Alfio</first><last>Gliozzo</last></author>
-      <author><first>Claudio</first><last>Giuliano</last></author>
+      <author id="alfio-gliozzo"><first>Alfio</first><last>Gliozzo</last></author>
+      <author id="claudio-giuliano"><first>Claudio</first><last>Giuliano</last></author>
       <author><first>Carlo</first><last>Strapparava</last></author>
       <pages>403–410</pages>
       <url hash="e9311a98">P05-1050</url>
@@ -490,7 +490,7 @@
     <paper id="51">
       <title>Improving Name Tagging by Reference Resolution and Relation Detection</title>
       <author><first>Heng</first><last>Ji</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <pages>411–418</pages>
       <url hash="859cb84e">P05-1051</url>
       <doi>10.3115/1219840.1219891</doi>
@@ -499,7 +499,7 @@
     <paper id="52">
       <title>Extracting Relations with Integrated Information Using Kernel Methods</title>
       <author><first>Shubin</first><last>Zhao</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <pages>419–426</pages>
       <url hash="017f5af8">P05-1052</url>
       <doi>10.3115/1219840.1219892</doi>
@@ -507,7 +507,7 @@
     </paper>
     <paper id="53">
       <title>Exploring Various Knowledge in Relation Extraction</title>
-      <author><first>GuoDong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>GuoDong</first><last>Zhou</last></author>
       <author><first>Jian</first><last>Su</last></author>
       <author><first>Jie</first><last>Zhang</last></author>
       <author><first>Min</first><last>Zhang</last></author>
@@ -519,7 +519,7 @@
     <paper id="54">
       <title>A Quantitative Analysis of Lexical Differences Between Genders in Telephone Conversations</title>
       <author><first>Constantinos</first><last>Boulis</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
       <pages>435–442</pages>
       <url hash="97b6c71c">P05-1054</url>
       <doi>10.3115/1219840.1219894</doi>
@@ -537,9 +537,9 @@
     <paper id="56">
       <title>Using Conditional Random Fields for Sentence Boundary Detection in Speech</title>
       <author id="yang-liu-icsi"><first>Yang</first><last>Liu</last></author>
-      <author><first>Andreas</first><last>Stolcke</last></author>
-      <author><first>Elizabeth</first><last>Shriberg</last></author>
-      <author><first>Mary</first><last>Harper</last></author>
+      <author id="andreas-stolcke"><first>Andreas</first><last>Stolcke</last></author>
+      <author id="elizabeth-shriberg"><first>Elizabeth</first><last>Shriberg</last></author>
+      <author id="mary-harper"><first>Mary</first><last>Harper</last></author>
       <pages>451–458</pages>
       <url hash="226453be">P05-1056</url>
       <doi>10.3115/1219840.1219896</doi>
@@ -576,7 +576,7 @@
     </paper>
     <paper id="60">
       <title>Multi-Field Information Extraction and Cross-Document Fusion</title>
-      <author><first>Gideon</first><last>Mann</last></author>
+      <author id="gideon-mann"><first>Gideon</first><last>Mann</last></author>
       <author><first>David</first><last>Yarowsky</last></author>
       <pages>483–490</pages>
       <url hash="aae7a93e">P05-1060</url>
@@ -590,7 +590,7 @@
       <author><first>Seth</first><last>Kulick</last></author>
       <author><first>Scott</first><last>Winters</last></author>
       <author><first>Yang</first><last>Jin</last></author>
-      <author><first>Pete</first><last>White</last></author>
+      <author id="peter-white"><first>Pete</first><last>White</last></author>
       <pages>491–498</pages>
       <url hash="0f58c3b8">P05-1061</url>
       <doi>10.3115/1219840.1219901</doi>
@@ -608,9 +608,9 @@
     </paper>
     <paper id="63">
       <title>Discriminative Syntactic Language Modeling for Speech Recognition</title>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <author><first>Brian</first><last>Roark</last></author>
-      <author><first>Murat</first><last>Saraclar</last></author>
+      <author id="murat-saraclar"><first>Murat</first><last>Saraclar</last></author>
       <pages>507–514</pages>
       <url hash="7d1f7cbf">P05-1063</url>
       <doi>10.3115/1219840.1219903</doi>
@@ -627,8 +627,8 @@
     </paper>
     <paper id="65">
       <title>Reading Level Assessment Using Support Vector Machines and Statistical Language Models</title>
-      <author><first>Sarah</first><last>Schwarm</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
+      <author id="sarah-e-schwarm"><first>Sarah</first><last>Schwarm</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
       <pages>523–530</pages>
       <url hash="702cc3ba">P05-1065</url>
       <doi>10.3115/1219840.1219905</doi>
@@ -636,9 +636,9 @@
     </paper>
     <paper id="66">
       <title>Clause Restructuring for Statistical Machine Translation</title>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <author><first>Philipp</first><last>Koehn</last></author>
-      <author><first>Ivona</first><last>Kučerová</last></author>
+      <author id="ivona-kucerova"><first>Ivona</first><last>Kučerová</last></author>
       <pages>531–540</pages>
       <url hash="981f8977">P05-1066</url>
       <doi>10.3115/1219840.1219906</doi>
@@ -647,7 +647,7 @@
     <paper id="67">
       <title>Machine Translation Using Probabilistic Synchronous Dependency Insertion Grammars</title>
       <author><first>Yuan</first><last>Ding</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>541–548</pages>
       <url hash="d25626cd">P05-1067</url>
       <doi>10.3115/1219840.1219907</doi>
@@ -664,7 +664,7 @@
     </paper>
     <paper id="69">
       <title>A Localized Prediction Model for Statistical Machine Translation</title>
-      <author><first>Christoph</first><last>Tillmann</last></author>
+      <author id="christoph-tillmann"><first>Christoph</first><last>Tillmann</last></author>
       <author><first>Tong</first><last>Zhang</last></author>
       <pages>557–564</pages>
       <url hash="32da38bb">P05-1069</url>
@@ -683,7 +683,7 @@
     <paper id="71">
       <title><fixed-case>A</fixed-case>rabic Tokenization, Part-of-Speech Tagging and Morphological Disambiguation in One Fell Swoop</title>
       <author><first>Nizar</first><last>Habash</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>573–580</pages>
       <url hash="ea33345d">P05-1071</url>
       <doi>10.3115/1219840.1219911</doi>
@@ -691,11 +691,11 @@
     </paper>
     <paper id="72">
       <title>Semantic Role Labeling Using Different Syntactic Views</title>
-      <author><first>Sameer</first><last>Pradhan</last></author>
-      <author><first>Wayne</first><last>Ward</last></author>
-      <author><first>Kadri</first><last>Hacioglu</last></author>
-      <author><first>James</first><last>Martin</last></author>
-      <author><first>Daniel</first><last>Jurafsky</last></author>
+      <author id="sameer-pradhan"><first>Sameer</first><last>Pradhan</last></author>
+      <author id="wayne-ward"><first>Wayne</first><last>Ward</last></author>
+      <author id="kadri-hacioglu"><first>Kadri</first><last>Hacioglu</last></author>
+      <author id="james-h-martin"><first>James</first><last>Martin</last></author>
+      <author id="dan-jurafsky"><first>Daniel</first><last>Jurafsky</last></author>
       <pages>581–588</pages>
       <url hash="51361879">P05-1072</url>
       <doi>10.3115/1219840.1219912</doi>
@@ -705,7 +705,7 @@
       <title>Joint Learning Improves Semantic Role Labeling</title>
       <author><first>Kristina</first><last>Toutanova</last></author>
       <author><first>Aria</first><last>Haghighi</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <pages>589–596</pages>
       <url hash="500028d1">P05-1073</url>
       <doi>10.3115/1219840.1219913</doi>
@@ -732,7 +732,7 @@
       <title>Automatic Acquisition of Adjectival Subcategorization from Corpora</title>
       <author><first>Jeremy</first><last>Yallop</last></author>
       <author><first>Anna</first><last>Korhonen</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <pages>614–621</pages>
       <url hash="b8cad1de">P05-1076</url>
       <doi>10.3115/1219840.1219916</doi>
@@ -742,7 +742,7 @@
       <title>Randomized Algorithms and <fixed-case>NLP</fixed-case>: Using Locality Sensitive Hash Functions for High Speed Noun Clustering</title>
       <author><first>Deepak</first><last>Ravichandran</last></author>
       <author><first>Patrick</first><last>Pantel</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>622–629</pages>
       <url hash="c1ccde4b">P05-1077</url>
       <doi>10.3115/1219840.1219917</doi>
@@ -844,14 +844,14 @@
     </paper>
     <paper id="12">
       <title>Phrase Linguistic Classification and Generalization for Improving Statistical Machine Translation</title>
-      <author><first>Adrià</first><last>de Gispert</last></author>
+      <author id="adria-de-gispert"><first>Adrià</first><last>de Gispert</last></author>
       <pages>67–72</pages>
       <url hash="5145fea5">P05-2012</url>
       <bibkey>de-gispert-2005-phrase</bibkey>
     </paper>
     <paper id="13">
       <title>Automatic Induction of a <fixed-case>CCG</fixed-case> Grammar for <fixed-case>T</fixed-case>urkish</title>
-      <author><first>Ruken</first><last>Çakıcı</last></author>
+      <author id="ruket-cakici"><first>Ruken</first><last>Çakıcı</last></author>
       <pages>73–78</pages>
       <url hash="2bba926f">P05-2013</url>
       <bibkey>cakici-2005-automatic</bibkey>
@@ -872,14 +872,14 @@
     </paper>
     <paper id="16">
       <title>Dependency-Based Statistical Machine Translation</title>
-      <author><first>Heidi</first><last>Fox</last></author>
+      <author id="heidi-fox"><first>Heidi</first><last>Fox</last></author>
       <pages>91–96</pages>
       <url hash="bbea4149">P05-2016</url>
       <bibkey>fox-2005-dependency</bibkey>
     </paper>
     <paper id="17">
       <title>Minimalist Parsing of Subjects Displaced from Embedded Clauses in Free Word Order Languages</title>
-      <author><first>Asad B.</first><last>Sayeed</last></author>
+      <author id="asad-sayeed"><first>Asad B.</first><last>Sayeed</last></author>
       <pages>97–102</pages>
       <url hash="10d5a638">P05-2017</url>
       <bibkey>sayeed-2005-minimalist</bibkey>
@@ -901,7 +901,7 @@
     </paper>
     <paper id="20">
       <title>Learning Information Structure in the <fixed-case>P</fixed-case>rague <fixed-case>T</fixed-case>reebank</title>
-      <author><first>Oana</first><last>Postolache</last></author>
+      <author id="oana-postolache"><first>Oana</first><last>Postolache</last></author>
       <pages>115–120</pages>
       <url hash="9d90250e">P05-2020</url>
       <bibkey>postolache-2005-learning</bibkey>
@@ -923,7 +923,7 @@
     </paper>
     <paper id="23">
       <title>An Unsupervised System for Identifying <fixed-case>E</fixed-case>nglish Inclusions in <fixed-case>G</fixed-case>erman Text</title>
-      <author><first>Beatrice</first><last>Alex</last></author>
+      <author id="beatrice-alex"><first>Beatrice</first><last>Alex</last></author>
       <pages>133–138</pages>
       <url hash="8660b3a6">P05-2023</url>
       <bibkey>alex-2005-unsupervised</bibkey>
@@ -969,7 +969,7 @@
     <paper id="1">
       <title>An Information-State Approach to Collaborative Reference</title>
       <author><first>David</first><last>DeVault</last></author>
-      <author><first>Natalia</first><last>Kariaeva</last></author>
+      <author id="natalia-kariaeva-rutgers"><first>Natalia</first><last>Kariaeva</last></author>
       <author><first>Anubha</first><last>Kothari</last></author>
       <author><first>Iris</first><last>Oved</last></author>
       <author><first>Matthew</first><last>Stone</last></author>
@@ -1018,7 +1018,7 @@
       <title>Descriptive Question Answering in Encyclopedia</title>
       <author><first>Hyo-Jung O.</first><last>Lee</last></author>
       <author><first>Hyeon-Jin</first><last>Kim</last></author>
-      <author><first>Myung-Gil</first><last>Jang</last></author>
+      <author id="myung-gil-jang"><first>Myung-Gil</first><last>Jang</last></author>
       <pages>21–24</pages>
       <url hash="87157244">P05-3006</url>
       <doi>10.3115/1225753.1225759</doi>
@@ -1026,7 +1026,7 @@
     </paper>
     <paper id="7">
       <title>High Throughput Modularized <fixed-case>NLP</fixed-case> System for Clinical Text</title>
-      <author><first>Serguei</first><last>Pakhomov</last></author>
+      <author id="serguei-pakhomov"><first>Serguei</first><last>Pakhomov</last></author>
       <author><first>James</first><last>Buntrock</last></author>
       <author><first>Patrick</first><last>Duffy</last></author>
       <pages>25–28</pages>
@@ -1036,8 +1036,8 @@
     </paper>
     <paper id="8">
       <title>A Voice Enabled Procedure Browser for the International Space Station</title>
-      <author><first>Manny</first><last>Rayner</last></author>
-      <author><first>Beth A.</first><last>Hockey</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
+      <author id="beth-ann-hockey"><first>Beth A.</first><last>Hockey</last></author>
       <author><first>Nikos</first><last>Chatzichrisafis</last></author>
       <author><first>Kim</first><last>Farrell</last></author>
       <author><first>Jean-Michel</first><last>Renders</last></author>
@@ -1057,9 +1057,9 @@
     </paper>
     <paper id="10">
       <title>Learning Source-Target Surface Patterns for Web-based Terminology Translation</title>
-      <author><first>Jian-Cheng</first><last>Wu</last></author>
+      <author id="jian-cheng-wu"><first>Jian-Cheng</first><last>Wu</last></author>
       <author><first>Tracy</first><last>Lin</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>37–40</pages>
       <url hash="76babc65">P05-3010</url>
       <doi>10.3115/1225753.1225763</doi>
@@ -1076,10 +1076,10 @@
     </paper>
     <paper id="12">
       <title>Multimodal Generation in the <fixed-case>COMIC</fixed-case> Dialogue System</title>
-      <author><first>Mary E.</first><last>Foster</last></author>
-      <author><first>Michael</first><last>White</last></author>
-      <author><first>Andrea</first><last>Setzer</last></author>
-      <author><first>Roberta</first><last>Catizone</last></author>
+      <author id="mary-ellen-foster"><first>Mary E.</first><last>Foster</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
+      <author id="andrea-setzer"><first>Andrea</first><last>Setzer</last></author>
+      <author id="roberta-catizone"><first>Roberta</first><last>Catizone</last></author>
       <pages>45–48</pages>
       <url hash="a7397886">P05-3012</url>
       <doi>10.3115/1225753.1225765</doi>
@@ -1087,7 +1087,7 @@
     </paper>
     <paper id="13">
       <title>Language Independent Extractive Summarization</title>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>49–52</pages>
       <url hash="43401106">P05-3013</url>
       <doi>10.3115/1225753.1225766</doi>
@@ -1095,8 +1095,8 @@
     </paper>
     <paper id="14">
       <title><fixed-case>S</fixed-case>ense<fixed-case>L</fixed-case>earner: Word Sense Disambiguation for All Words in Unrestricted Text</title>
-      <author><first>Rada</first><last>Mihalcea</last></author>
-      <author><first>Andras</first><last>Csomai</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
+      <author id="andras-csomai"><first>Andras</first><last>Csomai</last></author>
       <pages>53–56</pages>
       <url hash="b3476635">P05-3014</url>
       <doi>10.3115/1225753.1225767</doi>
@@ -1124,10 +1124,10 @@
     </paper>
     <paper id="17">
       <title>Supporting Annotation Layers for Natural Language Processing</title>
-      <author><first>Preslav</first><last>Nakov</last></author>
-      <author><first>Ariel</first><last>Schwartz</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
+      <author id="ariel-schwartz"><first>Ariel</first><last>Schwartz</last></author>
       <author><first>Brian</first><last>Wolf</last></author>
-      <author><first>Marti</first><last>Hearst</last></author>
+      <author id="marti-a-hearst"><first>Marti</first><last>Hearst</last></author>
       <pages>65–68</pages>
       <url hash="06932378">P05-3017</url>
       <doi>10.3115/1225753.1225770</doi>
@@ -1164,13 +1164,13 @@
       <title>Automating Temporal Annotation with <fixed-case>TARSQI</fixed-case></title>
       <author><first>Marc</first><last>Verhagen</last></author>
       <author><first>Inderjeet</first><last>Mani</last></author>
-      <author><first>Roser</first><last>Sauri</last></author>
+      <author id="roser-sauri"><first>Roser</first><last>Sauri</last></author>
       <author><first>Jessica</first><last>Littman</last></author>
       <author><first>Robert</first><last>Knippen</last></author>
-      <author><first>Seok B.</first><last>Jang</last></author>
+      <author id="seok-bae-jang"><first>Seok B.</first><last>Jang</last></author>
       <author><first>Anna</first><last>Rumshisky</last></author>
       <author id="jon-phillips"><first>John</first><last>Phillips</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <pages>81–84</pages>
       <url hash="0aff7968">P05-3021</url>
       <doi>10.3115/1225753.1225774</doi>
@@ -1178,15 +1178,15 @@
     </paper>
     <paper id="22">
       <title>Two Diverse Systems Built using Generic Components for Spoken Dialogue (Recent Progress on <fixed-case>TRIPS</fixed-case>)</title>
-      <author><first>James</first><last>Allen</last></author>
+      <author id="james-allen"><first>James</first><last>Allen</last></author>
       <author><first>George</first><last>Ferguson</last></author>
-      <author><first>Amanda</first><last>Stent</last></author>
-      <author><first>Scott</first><last>Stoness</last></author>
-      <author><first>Mary</first><last>Swift</last></author>
+      <author id="amanda-stent"><first>Amanda</first><last>Stent</last></author>
+      <author id="scott-c-stoness"><first>Scott</first><last>Stoness</last></author>
+      <author id="mary-swift"><first>Mary</first><last>Swift</last></author>
       <author><first>Lucian</first><last>Galescu</last></author>
-      <author><first>Nathan</first><last>Chambers</last></author>
-      <author><first>Ellen</first><last>Campana</last></author>
-      <author><first>Gregory</first><last>Aist</last></author>
+      <author id="nathanael-chambers"><first>Nathan</first><last>Chambers</last></author>
+      <author id="ellen-campana"><first>Ellen</first><last>Campana</last></author>
+      <author id="gregory-aist"><first>Gregory</first><last>Aist</last></author>
       <pages>85–88</pages>
       <url hash="18e520de">P05-3022</url>
       <doi>10.3115/1225753.1225775</doi>
@@ -1194,16 +1194,16 @@
     </paper>
     <paper id="23">
       <title><fixed-case>T</fixed-case>ransonics: A Practical Speech-to-Speech Translator for <fixed-case>E</fixed-case>nglish-<fixed-case>F</fixed-case>arsi Medical Dialogs</title>
-      <author><first>Robert</first><last>Belvin</last></author>
+      <author id="robert-s-belvin"><first>Robert</first><last>Belvin</last></author>
       <author><first>Emil</first><last>Ettelaie</last></author>
       <author><first>Sudeep</first><last>Gandhe</last></author>
-      <author><first>Panayiotis</first><last>Georgiou</last></author>
+      <author id="panayiotis-georgiou"><first>Panayiotis</first><last>Georgiou</last></author>
       <author><first>Kevin</first><last>Knight</last></author>
       <author><first>Daniel</first><last>Marcu</last></author>
       <author><first>Scott</first><last>Millward</last></author>
-      <author><first>Shrikanth</first><last>Narayanan</last></author>
+      <author id="shrikanth-narayanan"><first>Shrikanth</first><last>Narayanan</last></author>
       <author><first>Howard</first><last>Neely</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <pages>89–92</pages>
       <url hash="9869785b">P05-3023</url>
       <doi>10.3115/1225753.1225776</doi>
@@ -1211,7 +1211,7 @@
     </paper>
     <paper id="24">
       <title>The Wild Thing</title>
-      <author><first>Ken</first><last>Church</last></author>
+      <author id="kenneth-church"><first>Ken</first><last>Church</last></author>
       <author><first>Bo</first><last>Thiesson</last></author>
       <pages>93–96</pages>
       <url hash="d1d2db6d">P05-3024</url>
@@ -1231,7 +1231,7 @@
     <paper id="26">
       <title>Multi-Engine Machine Translation Guided by Explicit Word Matching</title>
       <author><first>Shyamsundar</first><last>Jayaraman</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <pages>101–104</pages>
       <url hash="899959ec">P05-3026</url>
       <doi>10.3115/1225753.1225779</doi>
@@ -1248,7 +1248,7 @@
     </paper>
     <paper id="28">
       <title>A Flexible Stand-Off Data Model with Query Language for Multi-Level Annotation</title>
-      <author><first>Christoph</first><last>Mueller</last></author>
+      <author id="christoph-muller"><first>Christoph</first><last>Mueller</last></author>
       <pages>109–112</pages>
       <url hash="85ef05f7">P05-3028</url>
       <doi>10.3115/1225753.1225781</doi>
diff --git a/data/xml/P06.xml b/data/xml/P06.xml
index 4e0d3156dc..472b60fb84 100644
--- a/data/xml/P06.xml
+++ b/data/xml/P06.xml
@@ -4,8 +4,8 @@
     <meta>
       <booktitle>Proceedings of the 21st International Conference on Computational Linguistics and 44th Annual Meeting of the Association for Computational Linguistics</booktitle>
       <url hash="16657fd2">P06-1</url>
-      <editor><first>Nicoletta</first><last>Calzolari</last></editor>
-      <editor><first>Claire</first><last>Cardie</last></editor>
+      <editor id="nicoletta-calzolari"><first>Nicoletta</first><last>Calzolari</last></editor>
+      <editor id="claire-cardie"><first>Claire</first><last>Cardie</last></editor>
       <editor><first>Pierre</first><last>Isabelle</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Sydney, Australia</address>
@@ -29,8 +29,8 @@
     </paper>
     <paper id="2">
       <title>Going Beyond <fixed-case>AER</fixed-case>: An Extensive Analysis of Word Alignments and Their Impact on <fixed-case>MT</fixed-case></title>
-      <author><first>Necip Fazil</first><last>Ayan</last></author>
-      <author><first>Bonnie J.</first><last>Dorr</last></author>
+      <author id="necip-fazil-ayan"><first>Necip Fazil</first><last>Ayan</last></author>
+      <author id="bonnie-dorr"><first>Bonnie J.</first><last>Dorr</last></author>
       <pages>9–16</pages>
       <url hash="d4a97ef3">P06-1002</url>
       <doi>10.3115/1220175.1220177</doi>
@@ -40,7 +40,7 @@
       <title>Unsupervised Topic Modelling for Multi-Party Spoken Discourse</title>
       <author><first>Matthew</first><last>Purver</last></author>
       <author><first>Konrad P.</first><last>Körding</last></author>
-      <author><first>Thomas L.</first><last>Griffiths</last></author>
+      <author id="thomas-l-griffiths"><first>Thomas L.</first><last>Griffiths</last></author>
       <author><first>Joshua B.</first><last>Tenenbaum</last></author>
       <pages>17–24</pages>
       <url hash="2a1b06d4">P06-1003</url>
@@ -69,7 +69,7 @@
       <title>Kernel-Based Pronoun Resolution with Structured Syntactic Knowledge</title>
       <author><first>Xiaofeng</first><last>Yang</last></author>
       <author><first>Jian</first><last>Su</last></author>
-      <author><first>Chew Lim</first><last>Tan</last></author>
+      <author id="chew-lim-tan"><first>Chew Lim</first><last>Tan</last></author>
       <pages>41–48</pages>
       <url hash="8563ba5b">P06-1006</url>
       <doi>10.3115/1220175.1220181</doi>
@@ -96,8 +96,8 @@
     </paper>
     <paper id="9">
       <title>Discriminative Word Alignment with Conditional Random Fields</title>
-      <author><first>Phil</first><last>Blunsom</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>65–72</pages>
       <url hash="43e91f52">P06-1009</url>
       <doi>10.3115/1220175.1220184</doi>
@@ -105,9 +105,9 @@
     </paper>
     <paper id="10">
       <title>Named Entity Transliteration with Comparable Corpora</title>
-      <author><first>Richard</first><last>Sproat</last></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last></author>
       <author><first>Tao</first><last>Tao</last></author>
-      <author><first>ChengXiang</first><last>Zhai</last></author>
+      <author id="chengxiang-zhai"><first>ChengXiang</first><last>Zhai</last></author>
       <pages>73–80</pages>
       <url hash="f566ae45">P06-1010</url>
       <doi>10.3115/1220175.1220185</doi>
@@ -115,7 +115,7 @@
     </paper>
     <paper id="11">
       <title>Extracting Parallel Sub-Sentential Fragments from Non-Parallel Corpora</title>
-      <author><first>Dragos Stefan</first><last>Munteanu</last></author>
+      <author id="dragos-stefan-munteanu"><first>Dragos Stefan</first><last>Munteanu</last></author>
       <author><first>Daniel</first><last>Marcu</last></author>
       <pages>81–88</pages>
       <url hash="ab8af1cd">P06-1011</url>
@@ -160,7 +160,7 @@
     </paper>
     <paper id="16">
       <title>Modeling Commonality among Related Classes in Relation Extraction</title>
-      <author><first>GuoDong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>GuoDong</first><last>Zhou</last></author>
       <author><first>Jian</first><last>Su</last></author>
       <author><first>Min</first><last>Zhang</last></author>
       <pages>121–128</pages>
@@ -171,9 +171,9 @@
     <paper id="17">
       <title>Relation Extraction Using Label Propagation Based Semi-Supervised Learning</title>
       <author><first>Jinxiu</first><last>Chen</last></author>
-      <author><first>Donghong</first><last>Ji</last></author>
-      <author><first>Chew Lim</first><last>Tan</last></author>
-      <author><first>Zhengyu</first><last>Niu</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
+      <author id="chew-lim-tan"><first>Chew Lim</first><last>Tan</last></author>
+      <author id="zheng-yu-niu"><first>Zhengyu</first><last>Niu</last></author>
       <pages>129–136</pages>
       <url hash="25b86da8">P06-1017</url>
       <doi>10.3115/1220175.1220192</doi>
@@ -189,7 +189,7 @@
     </paper>
     <paper id="19">
       <title>Partially Specified Signatures: A Vehicle for Grammar Modularity</title>
-      <author><first>Yael</first><last>Cohen-Sygal</last></author>
+      <author id="yael-cohen-sygal"><first>Yael</first><last>Cohen-Sygal</last></author>
       <author><first>Shuly</first><last>Wintner</last></author>
       <pages>145–152</pages>
       <url hash="a18a785f">P06-1019</url>
@@ -198,7 +198,7 @@
     </paper>
     <paper id="20">
       <title>Morphology-Syntax Interface for <fixed-case>T</fixed-case>urkish <fixed-case>LFG</fixed-case></title>
-      <author><first>Özlem</first><last>Çetinoğlu</last></author>
+      <author id="ozlem-cetinoglu"><first>Özlem</first><last>Çetinoğlu</last></author>
       <author><first>Kemal</first><last>Oflazer</last></author>
       <pages>153–160</pages>
       <url hash="e4c7da47">P06-1020</url>
@@ -207,16 +207,16 @@
     </paper>
     <paper id="21">
       <title><fixed-case>PCFG</fixed-case>s with Syntactic and Prosodic Indicators of Speech Repairs</title>
-      <author><first>John</first><last>Hale</last></author>
+      <author id="john-hale"><first>John</first><last>Hale</last></author>
       <author><first>Izhak</first><last>Shafran</last></author>
       <author><first>Lisa</first><last>Yung</last></author>
-      <author><first>Bonnie J.</first><last>Dorr</last></author>
-      <author><first>Mary</first><last>Harper</last></author>
+      <author id="bonnie-dorr"><first>Bonnie J.</first><last>Dorr</last></author>
+      <author id="mary-harper"><first>Mary</first><last>Harper</last></author>
       <author><first>Anna</first><last>Krasnyanskaya</last></author>
       <author><first>Matthew</first><last>Lease</last></author>
       <author id="yang-liu-icsi"><first>Yang</first><last>Liu</last></author>
       <author><first>Brian</first><last>Roark</last></author>
-      <author><first>Matthew</first><last>Snover</last></author>
+      <author id="matthew-snover"><first>Matthew</first><last>Snover</last></author>
       <author><first>Robin</first><last>Stewart</last></author>
       <pages>161–168</pages>
       <url hash="bf09be6d">P06-1021</url>
@@ -227,7 +227,7 @@
       <title>Dependency Parsing of <fixed-case>J</fixed-case>apanese Spoken Monologue Based on Clause Boundaries</title>
       <author><first>Tomohiro</first><last>Ohno</last></author>
       <author><first>Shigeki</first><last>Matsubara</last></author>
-      <author><first>Hideki</first><last>Kashioka</last></author>
+      <author id="hideki-kashioka"><first>Hideki</first><last>Kashioka</last></author>
       <author><first>Takehiko</first><last>Maruyama</last></author>
       <author><first>Yasuyoshi</first><last>Inagaki</last></author>
       <pages>169–176</pages>
@@ -255,7 +255,7 @@
     <paper id="25">
       <title>Dependencies between Student State and Speech Recognition Problems in Spoken Tutoring Dialogues</title>
       <author><first>Mihai</first><last>Rotaru</last></author>
-      <author><first>Diane J.</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane J.</first><last>Litman</last></author>
       <pages>193–200</pages>
       <url hash="2623c084">P06-1025</url>
       <doi>10.3115/1220175.1220200</doi>
@@ -263,9 +263,9 @@
     </paper>
     <paper id="26">
       <title>Learning the Structure of Task-Driven Human-Human Dialogs</title>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
-      <author><first>Giuseppe</first><last>Di Fabbrizio</last></author>
-      <author><first>Amanda</first><last>Stent</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="giuseppe-di-fabbrizio"><first>Giuseppe</first><last>Di Fabbrizio</last></author>
+      <author id="amanda-stent"><first>Amanda</first><last>Stent</last></author>
       <pages>201–208</pages>
       <url hash="35ee6045">P06-1026</url>
       <doi>10.3115/1220175.1220201</doi>
@@ -326,7 +326,7 @@
     <paper id="32">
       <title>Correcting <fixed-case>ESL</fixed-case> Errors Using Phrasal <fixed-case>SMT</fixed-case> Techniques</title>
       <author><first>Chris</first><last>Brockett</last></author>
-      <author><first>William B.</first><last>Dolan</last></author>
+      <author id="william-b-dolan"><first>William B.</first><last>Dolan</last></author>
       <author><first>Michael</first><last>Gamon</last></author>
       <pages>249–256</pages>
       <url hash="1c352d9f">P06-1032</url>
@@ -347,7 +347,7 @@
       <title>Learning to Generate Naturalistic Utterances Using Reviews in Spoken Dialogue Systems</title>
       <author><first>Ryuichiro</first><last>Higashinaka</last></author>
       <author><first>Rashmi</first><last>Prasad</last></author>
-      <author><first>Marilyn A.</first><last>Walker</last></author>
+      <author id="marilyn-walker"><first>Marilyn A.</first><last>Walker</last></author>
       <pages>265–272</pages>
       <url hash="3b259095">P06-1034</url>
       <doi>10.3115/1220175.1220209</doi>
@@ -355,7 +355,7 @@
     </paper>
     <paper id="35">
       <title>Measuring Language Divergence by Intra-Lexical Comparison</title>
-      <author><first>T. Mark</first><last>Ellison</last></author>
+      <author id="t-mark-ellison"><first>T. Mark</first><last>Ellison</last></author>
       <author><first>Simon</first><last>Kirby</last></author>
       <pages>273–280</pages>
       <url hash="116c5ed6">P06-1035</url>
@@ -373,7 +373,7 @@
     </paper>
     <paper id="37">
       <title>Guiding a Constraint Dependency Parser with Supertags</title>
-      <author><first>Kilian A.</first><last>Foth</last></author>
+      <author id="kilian-a-foth"><first>Kilian A.</first><last>Foth</last></author>
       <author><first>Tomas</first><last>By</last></author>
       <author><first>Wolfgang</first><last>Menzel</last></author>
       <pages>289–296</pages>
@@ -392,7 +392,7 @@
     </paper>
     <paper id="39">
       <title><fixed-case>B</fixed-case>ayesian Query-Focused Summarization</title>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <author><first>Daniel</first><last>Marcu</last></author>
       <pages>305–312</pages>
       <url hash="9d069091">P06-1039</url>
@@ -401,7 +401,7 @@
     </paper>
     <paper id="40">
       <title>Expressing Implicit Semantic Relations without Supervision</title>
-      <author><first>Peter D.</first><last>Turney</last></author>
+      <author id="peter-turney"><first>Peter D.</first><last>Turney</last></author>
       <pages>313–320</pages>
       <url hash="b74b06d7">P06-1040</url>
       <doi>10.3115/1220175.1220215</doi>
@@ -409,7 +409,7 @@
     </paper>
     <paper id="41">
       <title>Hybrid Parsing: Using Probabilistic Models as Predictors for a Symbolic Parser</title>
-      <author><first>Kilian A.</first><last>Foth</last></author>
+      <author id="kilian-a-foth"><first>Kilian A.</first><last>Foth</last></author>
       <author><first>Wolfgang</first><last>Menzel</last></author>
       <pages>321–328</pages>
       <url hash="ccfe67e6">P06-1041</url>
@@ -418,8 +418,8 @@
     </paper>
     <paper id="42">
       <title>Error Mining in Parsing Results</title>
-      <author><first>Benoît</first><last>Sagot</last></author>
-      <author><first>Éric</first><last>de la Clergerie</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Éric</first><last>de la Clergerie</last></author>
       <pages>329–336</pages>
       <url hash="a093ea75">P06-1042</url>
       <doi>10.3115/1220175.1220217</doi>
@@ -458,7 +458,7 @@
     <paper id="46">
       <title>Scaling Distributional Similarity to Large Corpora</title>
       <author><first>James</first><last>Gorman</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <pages>361–368</pages>
       <url hash="5ccf6652">P06-1046</url>
       <doi>10.3115/1220175.1220221</doi>
@@ -468,7 +468,7 @@
       <title>Extractive Summarization using Inter- and Intra- Event Relevance</title>
       <author><first>Wenjie</first><last>Li</last></author>
       <author><first>Mingli</first><last>Wu</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <author><first>Wei</first><last>Xu</last></author>
       <author><first>Chunfa</first><last>Yuan</last></author>
       <pages>369–376</pages>
@@ -488,7 +488,7 @@
     <paper id="49">
       <title>A Bottom-Up Approach to Sentence Ordering for Multi-Document Summarization</title>
       <author><first>Danushka</first><last>Bollegala</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Mitsuru</first><last>Ishizuka</last></author>
       <pages>385–392</pages>
       <url hash="a3fb8500">P06-1049</url>
@@ -498,8 +498,8 @@
     <paper id="50">
       <title>Learning Event Durations from Event Descriptions</title>
       <author><first>Feng</first><last>Pan</last></author>
-      <author><first>Rutu</first><last>Mulkar</last></author>
-      <author><first>Jerry R.</first><last>Hobbs</last></author>
+      <author id="rutu-mulkar-mehta"><first>Rutu</first><last>Mulkar</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry R.</first><last>Hobbs</last></author>
       <pages>393–400</pages>
       <url hash="95269248">P06-1050</url>
       <doi>10.3115/1220175.1220225</doi>
@@ -507,7 +507,7 @@
     </paper>
     <paper id="51">
       <title>Automatic Learning of Textual Entailments with Cross-Pair Similarities</title>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last></author>
       <author><first>Alessandro</first><last>Moschitti</last></author>
       <pages>401–408</pages>
       <url hash="1b36db80">P06-1051</url>
@@ -557,7 +557,7 @@
     <paper id="56">
       <title>Semi-Supervised Learning of Partial Cognates Using Bilingual Bootstrapping</title>
       <author><first>Oana</first><last>Frunza</last></author>
-      <author><first>Diana</first><last>Inkpen</last></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last></author>
       <pages>441–448</pages>
       <url hash="9d494137">P06-1056</url>
       <doi>10.3115/1220175.1220231</doi>
@@ -567,7 +567,7 @@
       <title>Direct Word Sense Matching for Lexical Substitution</title>
       <author><first>Ido</first><last>Dagan</last></author>
       <author><first>Oren</first><last>Glickman</last></author>
-      <author><first>Alfio</first><last>Gliozzo</last></author>
+      <author id="alfio-gliozzo"><first>Alfio</first><last>Gliozzo</last></author>
       <author><first>Efrat</first><last>Marmorshtein</last></author>
       <author><first>Carlo</first><last>Strapparava</last></author>
       <pages>449–456</pages>
@@ -579,7 +579,7 @@
       <title>An Equivalent Pseudoword Solution to <fixed-case>C</fixed-case>hinese Word Sense Disambiguation</title>
       <author><first>Zhimao</first><last>Lu</last></author>
       <author><first>Haifeng</first><last>Wang</last></author>
-      <author><first>Jianmin</first><last>Yao</last></author>
+      <author id="jianmin-yao"><first>Jianmin</first><last>Yao</last></author>
       <author><first>Ting</first><last>Liu</last></author>
       <author><first>Sheng</first><last>Li</last></author>
       <pages>457–464</pages>
@@ -592,7 +592,7 @@
       <author><first>Daisuke</first><last>Okanohara</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
       <author><first>Yoshimasa</first><last>Tsuruoka</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>465–472</pages>
       <url hash="356b3a8f">P06-1059</url>
       <doi>10.3115/1220175.1220234</doi>
@@ -600,9 +600,9 @@
     </paper>
     <paper id="60">
       <title>Factorizing Complex Models: A Case Study in Mention Detection</title>
-      <author><first>Radu</first><last>Florian</last></author>
-      <author><first>Hongyan</first><last>Jing</last></author>
-      <author><first>Nanda</first><last>Kambhatla</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
+      <author id="hongyan-jing"><first>Hongyan</first><last>Jing</last></author>
+      <author id="nanda-kambhatla"><first>Nanda</first><last>Kambhatla</last></author>
       <author><first>Imed</first><last>Zitouni</last></author>
       <pages>473–480</pages>
       <url hash="0c2f344e">P06-1060</url>
@@ -633,7 +633,7 @@
       <title><fixed-case>Q</fixed-case>uestion<fixed-case>B</fixed-case>ank: Creating a Corpus of Parse-Annotated Questions</title>
       <author><first>John</first><last>Judge</last></author>
       <author><first>Aoife</first><last>Cahill</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>497–504</pages>
       <url hash="ee20e3e0">P06-1063</url>
       <doi>10.3115/1220175.1220238</doi>
@@ -649,8 +649,8 @@
     </paper>
     <paper id="65">
       <title>Improved Discriminative Bilingual Word Alignment</title>
-      <author><first>Robert C.</first><last>Moore</last></author>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="robert-c-moore"><first>Robert C.</first><last>Moore</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <author><first>Andreas</first><last>Bode</last></author>
       <pages>513–520</pages>
       <url hash="8c084e4d">P06-1065</url>
@@ -659,7 +659,7 @@
     </paper>
     <paper id="66">
       <title>Maximum Entropy Based Phrase Reordering Model for Statistical Machine Translation</title>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Qun</first><last>Liu</last></author>
       <author><first>Shouxun</first><last>Lin</last></author>
       <pages>521–528</pages>
@@ -669,7 +669,7 @@
     </paper>
     <paper id="67">
       <title>Distortion Models for Statistical Machine Translation</title>
-      <author><first>Yaser</first><last>Al-Onaizan</last></author>
+      <author id="yaser-al-onaizan"><first>Yaser</first><last>Al-Onaizan</last></author>
       <author><first>Kishore</first><last>Papineni</last></author>
       <pages>529–536</pages>
       <url hash="42e407b2">P06-1067</url>
@@ -679,7 +679,7 @@
     <paper id="68">
       <title>A Study on Automatically Extracted Keywords in Text Categorization</title>
       <author><first>Anette</first><last>Hulth</last></author>
-      <author><first>Beáta B.</first><last>Megyesi</last></author>
+      <author id="beata-megyesi"><first>Beáta B.</first><last>Megyesi</last></author>
       <pages>537–544</pages>
       <url hash="556fee23">P06-1068</url>
       <doi>10.3115/1220175.1220243</doi>
@@ -697,7 +697,7 @@
     </paper>
     <paper id="70">
       <title>Exploiting Comparable Corpora and Bilingual Dictionaries for Cross-Language Text Categorization</title>
-      <author><first>Alfio</first><last>Gliozzo</last></author>
+      <author id="alfio-gliozzo"><first>Alfio</first><last>Gliozzo</last></author>
       <author><first>Carlo</first><last>Strapparava</last></author>
       <pages>553–560</pages>
       <url hash="a72c61bd">P06-1070</url>
@@ -716,8 +716,8 @@
     </paper>
     <paper id="72">
       <title>Annealing Structural Bias in Multilingual Weighted Grammar Induction</title>
-      <author><first>Noah A.</first><last>Smith</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>569–576</pages>
       <url hash="84afbbbd">P06-1072</url>
       <doi>10.3115/1220175.1220247</doi>
@@ -726,8 +726,8 @@
     <paper id="73">
       <title>Maximum Entropy Based Restoration of <fixed-case>A</fixed-case>rabic Diacritics</title>
       <author><first>Imed</first><last>Zitouni</last></author>
-      <author><first>Jeffrey S.</first><last>Sorensen</last></author>
-      <author><first>Ruhi</first><last>Sarikaya</last></author>
+      <author id="jeffrey-sorensen"><first>Jeffrey S.</first><last>Sorensen</last></author>
+      <author id="ruhi-sarikaya"><first>Ruhi</first><last>Sarikaya</last></author>
       <pages>577–584</pages>
       <url hash="9c96537f">P06-1073</url>
       <doi>10.3115/1220175.1220248</doi>
@@ -738,7 +738,7 @@
       <author><first>Yuanhua</first><last>Lv</last></author>
       <author><first>Le</first><last>Sun</last></author>
       <author><first>Junlin</first><last>Zhang</last></author>
-      <author><first>Jian-Yun</first><last>Nie</last></author>
+      <author id="jian-yun-nie"><first>Jian-Yun</first><last>Nie</last></author>
       <author><first>Wan</first><last>Chen</last></author>
       <author><first>Wei</first><last>Zhang</last></author>
       <pages>585–592</pages>
@@ -787,7 +787,7 @@
       <title>Exploiting Syntactic Patterns as Clues in Zero-Anaphora Resolution</title>
       <author><first>Ryu</first><last>Iida</last></author>
       <author><first>Kentaro</first><last>Inui</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>625–632</pages>
       <url hash="ccb4492a">P06-1079</url>
       <doi>10.3115/1220175.1220254</doi>
@@ -797,7 +797,7 @@
       <title>Self-Organizing n-gram Model for Automatic Word Spacing</title>
       <author><first>Seong-Bae</first><last>Park</last></author>
       <author><first>Yoon-Shik</first><last>Tae</last></author>
-      <author><first>Se-Young</first><last>Park</last></author>
+      <author id="se-young-park"><first>Se-Young</first><last>Park</last></author>
       <pages>633–640</pages>
       <url hash="f7cc57b2">P06-1080</url>
       <doi>10.3115/1220175.1220255</doi>
@@ -806,9 +806,9 @@
     <paper id="81">
       <title>Concept Unification of Terms in Different Languages for <fixed-case>IR</fixed-case></title>
       <author><first>Qing</first><last>Li</last></author>
-      <author><first>Sung-Hyon</first><last>Myaeng</last></author>
+      <author id="sung-hyon-myaeng"><first>Sung-Hyon</first><last>Myaeng</last></author>
       <author><first>Yun</first><last>Jin</last></author>
-      <author><first>Bo-yeong</first><last>Kang</last></author>
+      <author id="bo-yeong-kang"><first>Bo-yeong</first><last>Kang</last></author>
       <pages>641–648</pages>
       <url hash="83ca2589">P06-1081</url>
       <doi>10.3115/1220175.1220256</doi>
@@ -816,7 +816,7 @@
     </paper>
     <paper id="82">
       <title>Word Alignment in <fixed-case>E</fixed-case>nglish-<fixed-case>H</fixed-case>indi Parallel Corpus Using Recency-Vector Approach: Some Studies</title>
-      <author><first>Niladri</first><last>Chatterjee</last></author>
+      <author id="niladri-chatterjee"><first>Niladri</first><last>Chatterjee</last></author>
       <author><first>Saumya</first><last>Agrawal</last></author>
       <pages>649–656</pages>
       <url hash="8ee66157">P06-1082</url>
@@ -844,8 +844,8 @@
     </paper>
     <paper id="85">
       <title>Contextual Dependencies in Unsupervised Word Segmentation</title>
-      <author><first>Sharon</first><last>Goldwater</last></author>
-      <author><first>Thomas L.</first><last>Griffiths</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
+      <author id="thomas-l-griffiths"><first>Thomas L.</first><last>Griffiths</last></author>
       <author><first>Mark</first><last>Johnson</last></author>
       <pages>673–680</pages>
       <url hash="c857a7ed">P06-1085</url>
@@ -855,7 +855,7 @@
     <paper id="86">
       <title><fixed-case>MAGEAD</fixed-case>: A Morphological Analyzer and Generator for the <fixed-case>A</fixed-case>rabic Dialects</title>
       <author><first>Nizar</first><last>Habash</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>681–688</pages>
       <url hash="6406cdf1">P06-1086</url>
       <doi>10.3115/1220175.1220261</doi>
@@ -873,7 +873,7 @@
     </paper>
     <paper id="88">
       <title>Multi-Tagging for Lexicalized-Grammar Parsing</title>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <author><first>Stephen</first><last>Clark</last></author>
       <author><first>David</first><last>Vadas</last></author>
       <pages>697–704</pages>
@@ -884,7 +884,7 @@
     <paper id="89">
       <title>Guessing Parts-of-Speech of Unknown Words Using Global Information</title>
       <author><first>Tetsuji</first><last>Nakagawa</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>705–712</pages>
       <url hash="c9c1dacd">P06-1089</url>
       <doi>10.3115/1220175.1220264</doi>
@@ -903,7 +903,7 @@
     </paper>
     <paper id="91">
       <title>A Discriminative Global Training Algorithm for Statistical <fixed-case>MT</fixed-case></title>
-      <author><first>Christoph</first><last>Tillmann</last></author>
+      <author id="christoph-tillmann"><first>Christoph</first><last>Tillmann</last></author>
       <author><first>Tong</first><last>Zhang</last></author>
       <pages>721–728</pages>
       <url hash="8fe34da9">P06-1091</url>
@@ -923,7 +923,7 @@
     <paper id="93">
       <title>Automatic Generation of Domain Models for Call-Centers from Noisy Transcriptions</title>
       <author><first>Shourya</first><last>Roy</last></author>
-      <author><first>L Venkata</first><last>Subramaniam</last></author>
+      <author id="l-venkata-subramaniam"><first>L Venkata</first><last>Subramaniam</last></author>
       <pages>737–744</pages>
       <url hash="94d687c8">P06-1093</url>
       <doi>10.3115/1220175.1220268</doi>
@@ -931,9 +931,9 @@
     </paper>
     <paper id="94">
       <title>Proximity in Context: An Empirically Grounded Computational Model of Proximity for Processing Topological Spatial Expressions</title>
-      <author><first>John D.</first><last>Kelleher</last></author>
-      <author><first>Geert-Jan M.</first><last>Kruijff</last></author>
-      <author><first>Fintan J.</first><last>Costello</last></author>
+      <author id="john-kelleher"><first>John D.</first><last>Kelleher</last></author>
+      <author id="geert-jan-m-kruijff"><first>Geert-Jan M.</first><last>Kruijff</last></author>
+      <author id="fintan-j-costello"><first>Fintan J.</first><last>Costello</last></author>
       <pages>745–752</pages>
       <url hash="5ed54e78">P06-1094</url>
       <doi>10.3115/1220175.1220269</doi>
@@ -943,9 +943,9 @@
       <title>Machine Learning of Temporal Relations</title>
       <author><first>Inderjeet</first><last>Mani</last></author>
       <author><first>Marc</first><last>Verhagen</last></author>
-      <author><first>Ben</first><last>Wellner</last></author>
-      <author><first>Chong Min</first><last>Lee</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="ben-wellner"><first>Ben</first><last>Wellner</last></author>
+      <author id="chungmin-lee"><first>Chong Min</first><last>Lee</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <pages>753–760</pages>
       <url hash="bd82b2c1">P06-1095</url>
       <doi>10.3115/1220175.1220270</doi>
@@ -954,7 +954,7 @@
     <paper id="96">
       <title>An End-to-End Discriminative Approach to Machine Translation</title>
       <author><first>Percy</first><last>Liang</last></author>
-      <author><first>Alexandre</first><last>Bouchard-Côté</last></author>
+      <author id="alexandre-bouchard-cote"><first>Alexandre</first><last>Bouchard-Côté</last></author>
       <author><first>Dan</first><last>Klein</last></author>
       <author><first>Ben</first><last>Taskar</last></author>
       <pages>761–768</pages>
@@ -964,7 +964,7 @@
     </paper>
     <paper id="97">
       <title>Semi-Supervised Training for Statistical Word Alignment</title>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <author><first>Daniel</first><last>Marcu</last></author>
       <pages>769–776</pages>
       <url hash="6ba214dc">P06-1097</url>
@@ -1002,8 +1002,8 @@
     <paper id="101">
       <title>Semantic Taxonomy Induction from Heterogenous Evidence</title>
       <author><first>Rion</first><last>Snow</last></author>
-      <author><first>Daniel</first><last>Jurafsky</last></author>
-      <author><first>Andrew Y.</first><last>Ng</last></author>
+      <author id="dan-jurafsky"><first>Daniel</first><last>Jurafsky</last></author>
+      <author id="andrew-y-ng"><first>Andrew Y.</first><last>Ng</last></author>
       <pages>801–808</pages>
       <url hash="80c3c0b9">P06-1101</url>
       <doi>10.3115/1220175.1220276</doi>
@@ -1011,9 +1011,9 @@
     </paper>
     <paper id="102">
       <title>Names and Similarities on the Web: Fact Extraction in the Fast Lane</title>
-      <author><first>Marius</first><last>Paşca</last></author>
+      <author id="marius-pasca"><first>Marius</first><last>Paşca</last></author>
       <author><first>Dekang</first><last>Lin</last></author>
-      <author><first>Jeffrey</first><last>Bigham</last></author>
+      <author id="jeffrey-p-bigham"><first>Jeffrey</first><last>Bigham</last></author>
       <author><first>Andrei</first><last>Lifchits</last></author>
       <author><first>Alpa</first><last>Jain</last></author>
       <pages>809–816</pages>
@@ -1035,7 +1035,7 @@
       <author><first>Min</first><last>Zhang</last></author>
       <author><first>Jie</first><last>Zhang</last></author>
       <author><first>Jian</first><last>Su</last></author>
-      <author><first>GuoDong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>GuoDong</first><last>Zhou</last></author>
       <pages>825–832</pages>
       <url hash="e68c3e9e">P06-1104</url>
       <doi>10.3115/1220175.1220279</doi>
@@ -1044,7 +1044,7 @@
     <paper id="105">
       <title><fixed-case>J</fixed-case>apanese Dependency Parsing Using Co-Occurrence Information and a Combination of Case Elements</title>
       <author><first>Takeshi</first><last>Abekawa</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>833–840</pages>
       <url hash="1de9f40c">P06-1105</url>
       <doi>10.3115/1220175.1220280</doi>
@@ -1061,9 +1061,9 @@
     </paper>
     <paper id="107">
       <title>Discovering Asymmetric Entailment Relations between Verbs Using Selectional Preferences</title>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last></author>
       <author><first>Marco</first><last>Pennacchiotti</last></author>
-      <author><first>Maria Teresa</first><last>Pazienza</last></author>
+      <author id="maria-teresa-pazienza"><first>Maria Teresa</first><last>Pazienza</last></author>
       <pages>849–856</pages>
       <url hash="55bb9d08">P06-1107</url>
       <doi>10.3115/1220175.1220282</doi>
@@ -1072,7 +1072,7 @@
     <paper id="108">
       <title>Event Extraction in a Plot Advice Agent</title>
       <author><first>Harry</first><last>Halpin</last></author>
-      <author><first>Johanna D.</first><last>Moore</last></author>
+      <author id="johanna-d-moore"><first>Johanna D.</first><last>Moore</last></author>
       <pages>857–864</pages>
       <url hash="5a80a854">P06-1108</url>
       <doi>10.3115/1220175.1220283</doi>
@@ -1088,7 +1088,7 @@
     </paper>
     <paper id="110">
       <title>Advances in Discriminative Parsing</title>
-      <author><first>Joseph</first><last>Turian</last></author>
+      <author id="joseph-turian"><first>Joseph</first><last>Turian</last></author>
       <author><first>I. Dan</first><last>Melamed</last></author>
       <pages>873–880</pages>
       <url hash="2f3cb54d">P06-1110</url>
@@ -1116,7 +1116,7 @@
     <paper id="113">
       <title>Question Answering with Lexical Chains Propagating Verb Arguments</title>
       <author><first>Adrian</first><last>Novischi</last></author>
-      <author><first>Dan</first><last>Moldovan</last></author>
+      <author id="dan-moldovan"><first>Dan</first><last>Moldovan</last></author>
       <pages>897–904</pages>
       <url hash="3fe6d8df">P06-1113</url>
       <doi>10.3115/1220175.1220288</doi>
@@ -1124,7 +1124,7 @@
     </paper>
     <paper id="114">
       <title>Methods for Using Textual Entailment in Open-Domain Question Answering</title>
-      <author><first>Sanda</first><last>Harabagiu</last></author>
+      <author id="sanda-harabagiu"><first>Sanda</first><last>Harabagiu</last></author>
       <author><first>Andrew</first><last>Hickl</last></author>
       <pages>905–912</pages>
       <url hash="72bb85bc">P06-1114</url>
@@ -1133,8 +1133,8 @@
     </paper>
     <paper id="115">
       <title>Using String-Kernels for Learning Semantic Parsers</title>
-      <author><first>Rohit J.</first><last>Kate</last></author>
-      <author><first>Raymond J.</first><last>Mooney</last></author>
+      <author id="rohit-kate"><first>Rohit J.</first><last>Kate</last></author>
+      <author id="raymond-mooney"><first>Raymond J.</first><last>Mooney</last></author>
       <pages>913–920</pages>
       <url hash="bfbbac6a">P06-1115</url>
       <doi>10.3115/1220175.1220290</doi>
@@ -1160,7 +1160,7 @@
     </paper>
     <paper id="118">
       <title>Multilingual Legal Terminology on the Jibiki Platform: The <fixed-case>L</fixed-case>ex<fixed-case>ALP</fixed-case> Project</title>
-      <author><first>Gilles</first><last>Sérasset</last></author>
+      <author id="gilles-serasset"><first>Gilles</first><last>Sérasset</last></author>
       <author><first>Francis</first><last>Brunet-Manquat</last></author>
       <author><first>Elena</first><last>Chiocchetti</last></author>
       <pages>937–944</pages>
@@ -1171,9 +1171,9 @@
     <paper id="119">
       <title>Leveraging Reusability: Cost-Effective Lexical Acquisition for Large-Scale Ontology Translation</title>
       <author><first>G. Craig</first><last>Murray</last></author>
-      <author><first>Bonnie J.</first><last>Dorr</last></author>
+      <author id="bonnie-dorr"><first>Bonnie J.</first><last>Dorr</last></author>
       <author><first>Jimmy</first><last>Lin</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
       <author><first>Pavel</first><last>Pecina</last></author>
       <pages>945–952</pages>
       <url hash="2b920c6f">P06-1119</url>
@@ -1183,7 +1183,7 @@
     <paper id="120">
       <title>Accurate Collocation Extraction Using a Multilingual Parser</title>
       <author><first>Violeta</first><last>Seretan</last></author>
-      <author><first>Eric</first><last>Wehrli</last></author>
+      <author id="eric-wehrli"><first>Eric</first><last>Wehrli</last></author>
       <pages>953–960</pages>
       <url hash="cff5e2fb">P06-1120</url>
       <doi>10.3115/1220175.1220295</doi>
@@ -1233,7 +1233,7 @@
     <paper id="125">
       <title>A Phonetic-Based Approach to <fixed-case>C</fixed-case>hinese Chat Text Normalization</title>
       <author><first>Yunqing</first><last>Xia</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <author><first>Wenjie</first><last>Li</last></author>
       <pages>993–1000</pages>
       <url hash="e8c85213">P06-1125</url>
@@ -1269,7 +1269,7 @@
       <author><first>Yoshimasa</first><last>Tsuruoka</last></author>
       <author><first>Kazuhiro</first><last>Yoshida</last></author>
       <author><first>Takashi</first><last>Ninomiya</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>1017–1024</pages>
       <url hash="e365f2e3">P06-1128</url>
       <doi>10.3115/1220175.1220303</doi>
@@ -1279,7 +1279,7 @@
       <title>Exploring Distributional Similarity Based Models for Query Spelling Correction</title>
       <author><first>Mu</first><last>Li</last></author>
       <author><first>Muhua</first><last>Zhu</last></author>
-      <author id="yang-zhang"><first>Yang</first><last>Zhang</last></author>
+      <author><first>Yang</first><last>Zhang</last></author>
       <author><first>Ming</first><last>Zhou</last></author>
       <pages>1025–1032</pages>
       <url hash="cf38e469">P06-1129</url>
@@ -1289,7 +1289,7 @@
     <paper id="130">
       <title>Robust <fixed-case>PCFG</fixed-case>-Based Generation Using Automatically Acquired <fixed-case>LFG</fixed-case> Approximations</title>
       <author><first>Aoife</first><last>Cahill</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>1033–1040</pages>
       <url hash="afa39754">P06-1130</url>
       <doi>10.3115/1220175.1220305</doi>
@@ -1297,8 +1297,8 @@
     </paper>
     <paper id="131">
       <title>Incremental Generation of Spatial Referring Expressions in Situated Dialog</title>
-      <author><first>John D.</first><last>Kelleher</last></author>
-      <author><first>Geert-Jan M.</first><last>Kruijff</last></author>
+      <author id="john-kelleher"><first>John D.</first><last>Kelleher</last></author>
+      <author id="geert-jan-m-kruijff"><first>Geert-Jan M.</first><last>Kruijff</last></author>
       <pages>1041–1048</pages>
       <url hash="a23878b8">P06-1131</url>
       <doi>10.3115/1220175.1220306</doi>
@@ -1316,7 +1316,7 @@
     <paper id="133">
       <title>Are These Documents Written from Different Perspectives? A Test of Different Perspectives Based on Statistical Distribution Divergence</title>
       <author><first>Wei-Hao</first><last>Lin</last></author>
-      <author><first>Alexander</first><last>Hauptmann</last></author>
+      <author id="alexander-g-hauptmann"><first>Alexander</first><last>Hauptmann</last></author>
       <pages>1057–1064</pages>
       <url hash="94318283">P06-1133</url>
       <doi>10.3115/1220175.1220308</doi>
@@ -1324,8 +1324,8 @@
     </paper>
     <paper id="134">
       <title>Word Sense and Subjectivity</title>
-      <author><first>Janyce</first><last>Wiebe</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>1065–1072</pages>
       <url hash="b579b43b">P06-1134</url>
       <doi>10.3115/1220175.1220309</doi>
@@ -1334,8 +1334,8 @@
     <paper id="135">
       <title>Improving <fixed-case>QA</fixed-case> Accuracy by Question Inversion</title>
       <author><first>John</first><last>Prager</last></author>
-      <author><first>Pablo</first><last>Duboue</last></author>
-      <author><first>Jennifer</first><last>Chu-Carroll</last></author>
+      <author id="pablo-duboue"><first>Pablo</first><last>Duboue</last></author>
+      <author id="jennifer-chu-carroll"><first>Jennifer</first><last>Chu-Carroll</last></author>
       <pages>1073–1080</pages>
       <url hash="40dccb33">P06-1135</url>
       <doi>10.3115/1220175.1220310</doi>
@@ -1381,7 +1381,7 @@
     <paper id="140">
       <title>Learning to Say It Well: Reranking Realizations by Predicted Synthesis Quality</title>
       <author><first>Crystal</first><last>Nakatsu</last></author>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <pages>1113–1120</pages>
       <url hash="ecb24db7">P06-1140</url>
       <doi>10.3115/1220175.1220315</doi>
@@ -1390,7 +1390,7 @@
     <paper id="141">
       <title>An Effective Two-Stage Model for Exploiting Non-Local Dependencies in Named Entity Recognition</title>
       <author><first>Vijay</first><last>Krishnan</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>1121–1128</pages>
       <url hash="fc87e01e">P06-1141</url>
       <doi>10.3115/1220175.1220316</doi>
@@ -1408,7 +1408,7 @@
     </paper>
     <paper id="143">
       <title><fixed-case>P</fixed-case>unjabi Machine Transliteration</title>
-      <author><first>M.G. Abbas</first><last>Malik</last></author>
+      <author id="m-g-abbas-malik"><first>M.G. Abbas</first><last>Malik</last></author>
       <pages>1137–1144</pages>
       <url hash="83c60b40">P06-1143</url>
       <doi>10.3115/1220175.1220318</doi>
@@ -1417,8 +1417,8 @@
     <paper id="144">
       <title>Multilingual Document Clustering: An Heuristic Approach Based on Cognate Named Entities</title>
       <author><first>Soto</first><last>Montalvo</last></author>
-      <author><first>Raquel</first><last>Martínez</last></author>
-      <author><first>Arantza</first><last>Casillas</last></author>
+      <author id="raquel-martinez"><first>Raquel</first><last>Martínez</last></author>
+      <author id="arantza-casillas"><first>Arantza</first><last>Casillas</last></author>
       <author><first>Víctor</first><last>Fresno</last></author>
       <pages>1145–1152</pages>
       <url hash="c6bd5bda">P06-1144</url>
@@ -1430,7 +1430,7 @@
       <author><first>Taichi</first><last>Noro</last></author>
       <author><first>Takashi</first><last>Inui</last></author>
       <author><first>Hiroya</first><last>Takamura</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>1153–1160</pages>
       <url hash="45dfa52a">P06-1145</url>
       <doi>10.3115/1220175.1220320</doi>
@@ -1438,7 +1438,7 @@
     </paper>
     <paper id="146">
       <title>Optimal Constituent Alignment with Edge Covers for Semantic Projection</title>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <author><first>Mirella</first><last>Lapata</last></author>
       <pages>1161–1168</pages>
       <url hash="02fa34cc">P06-1146</url>
@@ -1448,7 +1448,7 @@
     <paper id="147">
       <title>Utilizing Co-Occurrence of Answers in Question Answering</title>
       <author><first>Min</first><last>Wu</last></author>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
       <pages>1169–1176</pages>
       <url hash="c4dce65a">P06-1147</url>
       <doi>10.3115/1220175.1220322</doi>
@@ -1472,11 +1472,11 @@
     </frontmatter>
     <paper id="1">
       <title>Using Machine Learning Techniques to Build a Comma Checker for <fixed-case>B</fixed-case>asque</title>
-      <author><first>Iñaki</first><last>Alegria</last></author>
+      <author id="inaki-alegria"><first>Iñaki</first><last>Alegria</last></author>
       <author><first>Bertol</first><last>Arrieta</last></author>
-      <author><first>Arantza</first><last>Diaz de Ilarraza</last></author>
+      <author id="arantza-diaz-de-ilarraza"><first>Arantza</first><last>Diaz de Ilarraza</last></author>
       <author><first>Eli</first><last>Izagirre</last></author>
-      <author><first>Montse</first><last>Maritxalar</last></author>
+      <author id="montse-maritxalar"><first>Montse</first><last>Maritxalar</last></author>
       <pages>1–8</pages>
       <url hash="c2e61e8f">P06-2001</url>
       <bibkey>alegria-etal-2006-using</bibkey>
@@ -1485,18 +1485,18 @@
       <title>A Rote Extractor with Edit Distance-Based Generalisation and Multi-Corpora Precision Calculation</title>
       <author><first>Enrique</first><last>Alfonseca</last></author>
       <author><first>Pablo</first><last>Castells</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
-      <author><first>Maria</first><last>Ruiz-Casado</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
+      <author id="maria-ruiz-casado"><first>Maria</first><last>Ruiz-Casado</last></author>
       <pages>9–16</pages>
       <url hash="893b13cb">P06-2002</url>
       <bibkey>alfonseca-etal-2006-rote</bibkey>
     </paper>
     <paper id="3">
       <title><fixed-case>MT</fixed-case> Evaluation: Human-Like vs. Human Acceptable</title>
-      <author><first>Enrique</first><last>Amigó</last></author>
-      <author><first>Jesús</first><last>Giménez</last></author>
+      <author id="enrique-amigo"><first>Enrique</first><last>Amigó</last></author>
+      <author id="jesus-gimenez"><first>Jesús</first><last>Giménez</last></author>
       <author><first>Julio</first><last>Gonzalo</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <pages>17–24</pages>
       <url hash="19c2e803">P06-2003</url>
       <bibkey>amigo-etal-2006-mt</bibkey>
@@ -1504,14 +1504,14 @@
     <paper id="4">
       <title>The Effect of Corpus Size in Combining Supervised and Unsupervised Training for Disambiguation</title>
       <author><first>Michaela</first><last>Atterer</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>25–32</pages>
       <url hash="639a4660">P06-2004</url>
       <bibkey>atterer-schutze-2006-effect</bibkey>
     </paper>
     <paper id="5">
       <title>A Phrase-Based Statistical Model for <fixed-case>SMS</fixed-case> Text Normalization</title>
-      <author><first>AiTi</first><last>Aw</last></author>
+      <author id="aiti-aw"><first>AiTi</first><last>Aw</last></author>
       <author><first>Min</first><last>Zhang</last></author>
       <author><first>Juan</first><last>Xiao</last></author>
       <author><first>Jian</first><last>Su</last></author>
@@ -1521,8 +1521,8 @@
     </paper>
     <paper id="6">
       <title>Evaluating the Accuracy of an Unlexicalized Statistical Parser on the <fixed-case>PARC</fixed-case> <fixed-case>D</fixed-case>ep<fixed-case>B</fixed-case>ank</title>
-      <author><first>Ted</first><last>Briscoe</last></author>
-      <author><first>John</first><last>Carroll</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
       <pages>41–48</pages>
       <url hash="8f0944ca">P06-2006</url>
       <bibkey>briscoe-carroll-2006-evaluating</bibkey>
@@ -1539,8 +1539,8 @@
     </paper>
     <paper id="8">
       <title>Towards Conversational <fixed-case>QA</fixed-case>: Automatic Identification of Problematic Situations and User Intent</title>
-      <author><first>Joyce Y.</first><last>Chai</last></author>
-      <author id="chen-zhang"><first>Chen</first><last>Zhang</last></author>
+      <author id="joyce-chai"><first>Joyce Y.</first><last>Chai</last></author>
+      <author><first>Chen</first><last>Zhang</last></author>
       <author><first>Tyler</first><last>Baldwin</last></author>
       <pages>57–64</pages>
       <url hash="68c23117">P06-2008</url>
@@ -1576,9 +1576,9 @@
     <paper id="12">
       <title>Unsupervised Relation Disambiguation Using Spectral Clustering</title>
       <author><first>Jinxiu</first><last>Chen</last></author>
-      <author><first>Donghong</first><last>Ji</last></author>
-      <author><first>Chew Lim</first><last>Tan</last></author>
-      <author><first>Zhengyu</first><last>Niu</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
+      <author id="chew-lim-tan"><first>Chew Lim</first><last>Tan</last></author>
+      <author id="zheng-yu-niu"><first>Zhengyu</first><last>Niu</last></author>
       <pages>89–96</pages>
       <url hash="24d93955">P06-2012</url>
       <bibkey>chen-etal-2006-unsupervised</bibkey>
@@ -1626,8 +1626,8 @@
     </paper>
     <paper id="18">
       <title>Using Machine-Learning to Assign Function Labels to Parser Output for <fixed-case>S</fixed-case>panish</title>
-      <author><first>Grzegorz</first><last>Chrupała</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="grzegorz-chrupala"><first>Grzegorz</first><last>Chrupała</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>136–143</pages>
       <url hash="65e4895c">P06-2018</url>
       <bibkey>chrupala-van-genabith-2006-using</bibkey>
@@ -1642,16 +1642,16 @@
     </paper>
     <paper id="20">
       <title>Topic-Focused Multi-Document Summarization Using an Approximate Oracle Score</title>
-      <author><first>John M.</first><last>Conroy</last></author>
-      <author><first>Judith D.</first><last>Schlesinger</last></author>
-      <author><first>Dianne P.</first><last>O’Leary</last></author>
+      <author id="john-conroy"><first>John M.</first><last>Conroy</last></author>
+      <author id="judith-d-schlesinger"><first>Judith D.</first><last>Schlesinger</last></author>
+      <author id="dianne-p-oleary"><first>Dianne P.</first><last>O’Leary</last></author>
       <pages>152–159</pages>
       <url hash="e5015d24">P06-2020</url>
       <bibkey>conroy-etal-2006-topic</bibkey>
     </paper>
     <paper id="21">
       <title>Using <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et to Automatically Deduce Relations between Words in Noun-Noun Compounds</title>
-      <author><first>Fintan J.</first><last>Costello</last></author>
+      <author id="fintan-j-costello"><first>Fintan J.</first><last>Costello</last></author>
       <author><first>Tony</first><last>Veale</last></author>
       <author><first>Simon</first><last>Dunne</last></author>
       <pages>160–167</pages>
@@ -1663,16 +1663,16 @@
       <author><first>Cassandre</first><last>Creswell</last></author>
       <author><first>Matthew J.</first><last>Beal</last></author>
       <author><first>John</first><last>Chen</last></author>
-      <author><first>Thomas L.</first><last>Cornell</last></author>
+      <author id="thomas-l-cornell"><first>Thomas L.</first><last>Cornell</last></author>
       <author><first>Lars</first><last>Nilsson</last></author>
-      <author><first>Rohini K.</first><last>Srihari</last></author>
+      <author id="rohini-k-srihari"><first>Rohini K.</first><last>Srihari</last></author>
       <pages>168–175</pages>
       <url hash="355caec9">P06-2022</url>
       <bibkey>creswell-etal-2006-automatically</bibkey>
     </paper>
     <paper id="23">
       <title>A Bio-Inspired Approach for Multi-Word Expression Extraction</title>
-      <author><first>Jianyong</first><last>Duan</last></author>
+      <author id="jianyong-duan"><first>Jianyong</first><last>Duan</last></author>
       <author><first>Ruzhan</first><last>Lu</last></author>
       <author><first>Weilin</first><last>Wu</last></author>
       <author><first>Yi</first><last>Hu</last></author>
@@ -1691,8 +1691,8 @@
     <paper id="25">
       <title>A Modified Joint Source-Channel Model for Transliteration</title>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Sudip Kumar</first><last>Naskar</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>191–198</pages>
       <url hash="4ef01627">P06-2025</url>
       <bibkey>ekbal-etal-2006-modified</bibkey>
@@ -1710,7 +1710,7 @@
       <title>Automatic Creation of Domain Templates</title>
       <author><first>Elena</first><last>Filatova</last></author>
       <author><first>Vasileios</first><last>Hatzivassiloglou</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>207–214</pages>
       <url hash="37cd2b4e">P06-2027</url>
       <bibkey>filatova-etal-2006-automatic</bibkey>
@@ -1718,16 +1718,16 @@
     <paper id="28">
       <title>Using Lexical Dependency and Ontological Knowledge to Improve a Detailed Syntactic and Semantic Tagger of <fixed-case>E</fixed-case>nglish</title>
       <author><first>Andrew</first><last>Finch</last></author>
-      <author><first>Ezra</first><last>Black</last></author>
+      <author id="ezra-black"><first>Ezra</first><last>Black</last></author>
       <author><first>Young-Sook</first><last>Hwang</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>215–222</pages>
       <url hash="890228b9">P06-2028</url>
       <bibkey>finch-etal-2006-using</bibkey>
     </paper>
     <paper id="29">
       <title>The Benefit of Stochastic <fixed-case>PP</fixed-case> Attachment to a Rule-Based Parser</title>
-      <author><first>Kilian A.</first><last>Foth</last></author>
+      <author id="kilian-a-foth"><first>Kilian A.</first><last>Foth</last></author>
       <author><first>Wolfgang</first><last>Menzel</last></author>
       <pages>223–230</pages>
       <url hash="95de9cef">P06-2029</url>
@@ -1760,7 +1760,7 @@
     <paper id="33">
       <title>Conceptual Coherence in the Generation of Referring Expressions</title>
       <author><first>Albert</first><last>Gatt</last></author>
-      <author><first>Kees</first><last>van Deemter</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
       <pages>255–262</pages>
       <url hash="513678a8">P06-2033</url>
       <bibkey>gatt-van-deemter-2006-conceptual</bibkey>
@@ -1768,7 +1768,7 @@
     <paper id="34">
       <title>Discriminative Reranking for Semantic Parsing</title>
       <author><first>Ruifang</first><last>Ge</last></author>
-      <author><first>Raymond J.</first><last>Mooney</last></author>
+      <author id="raymond-mooney"><first>Raymond J.</first><last>Mooney</last></author>
       <pages>263–270</pages>
       <url hash="1b2ca91b">P06-2034</url>
       <bibkey>ge-mooney-2006-discriminative</bibkey>
@@ -1792,8 +1792,8 @@
     </paper>
     <paper id="37">
       <title>Low-Cost Enrichment of <fixed-case>S</fixed-case>panish <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et with Automatically Translated Glosses: Combining General and Specialized Models</title>
-      <author><first>Jesús</first><last>Giménez</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="jesus-gimenez"><first>Jesús</first><last>Giménez</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <pages>287–294</pages>
       <url hash="3503d577">P06-2037</url>
       <bibkey>gimenez-marquez-2006-low</bibkey>
@@ -1801,7 +1801,7 @@
     <paper id="38">
       <title>Speeding Up Full Syntactic Parsing by Leveraging Partial Parsing Decisions</title>
       <author><first>Elliot</first><last>Glaysher</last></author>
-      <author><first>Dan</first><last>Moldovan</last></author>
+      <author id="dan-moldovan"><first>Dan</first><last>Moldovan</last></author>
       <pages>295–300</pages>
       <url hash="92f457cc">P06-2038</url>
       <bibkey>glaysher-moldovan-2006-speeding</bibkey>
@@ -1809,14 +1809,14 @@
     <paper id="39">
       <title>Parsing Aligned Parallel Corpus by Projecting Syntactic Relations from Annotated Source Corpus</title>
       <author><first>Shailly</first><last>Goyal</last></author>
-      <author><first>Niladri</first><last>Chatterjee</last></author>
+      <author id="niladri-chatterjee"><first>Niladri</first><last>Chatterjee</last></author>
       <pages>301–308</pages>
       <url hash="50c37237">P06-2039</url>
       <bibkey>goyal-chatterjee-2006-parsing</bibkey>
     </paper>
     <paper id="40">
       <title>Reduced n-gram Models for <fixed-case>E</fixed-case>nglish and <fixed-case>C</fixed-case>hinese Corpora</title>
-      <author><first>Le Q</first><last>Ha</last></author>
+      <author id="le-quan-ha"><first>Le Q</first><last>Ha</last></author>
       <author id="philip-hanna"><first>P</first><last>Hanna</last></author>
       <author><first>D W</first><last>Stewart</last></author>
       <author id="francis-j-smith"><first>F J</first><last>Smith</last></author>
@@ -1846,7 +1846,7 @@
     <paper id="43">
       <title>Improving <fixed-case>E</fixed-case>nglish Subcategorization Acquisition with Diathesis Alternations as Heuristic Information</title>
       <author><first>Xiwu</first><last>Han</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <author><first>Xingshang</first><last>Fu</last></author>
       <pages>331–336</pages>
       <url hash="622d6920">P06-2043</url>
@@ -1874,7 +1874,7 @@
     <paper id="46">
       <title><fixed-case>J</fixed-case>apanese Idiom Recognition: Drawing a Line between Literal and Idiomatic Meanings</title>
       <author><first>Chikara</first><last>Hashimoto</last></author>
-      <author><first>Satoshi</first><last>Sato</last></author>
+      <author id="satoshi-sato"><first>Satoshi</first><last>Sato</last></author>
       <author><first>Takehito</first><last>Utsuro</last></author>
       <pages>353–360</pages>
       <url hash="9d27bcd1">P06-2046</url>
@@ -1882,7 +1882,7 @@
     </paper>
     <paper id="47">
       <title>Graph Branch Algorithm: An Optimum Tree Search Method for Scored Dependency Graph with Arc Co-Occurrence Constraints</title>
-      <author><first>Hideki</first><last>Hirakawa</last></author>
+      <author id="hideki-hirakawa"><first>Hideki</first><last>Hirakawa</last></author>
       <pages>361–368</pages>
       <url hash="f6061200">P06-2047</url>
       <bibkey>hirakawa-2006-graph</bibkey>
@@ -1905,7 +1905,7 @@
     </paper>
     <paper id="50">
       <title>When Conset Meets Synset: A Preliminary Survey of an Ontological Lexical Resource Based on <fixed-case>C</fixed-case>hinese Characters</title>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <pages>385–390</pages>
       <url hash="17dde7c0">P06-2050</url>
@@ -1934,7 +1934,7 @@
       <author><first>Sanaz</first><last>Jabbari</last></author>
       <author><first>Ben</first><last>Allison</last></author>
       <author><first>David</first><last>Guthrie</last></author>
-      <author><first>Louise</first><last>Guthrie</last></author>
+      <author id="louise-guthrie"><first>Louise</first><last>Guthrie</last></author>
       <pages>407–411</pages>
       <url hash="9e79931d">P06-2053</url>
       <bibkey>jabbari-etal-2006-towards</bibkey>
@@ -1942,7 +1942,7 @@
     <paper id="54">
       <title>Exploiting Non-Local Features for Spoken Language Understanding</title>
       <author><first>Minwoo</first><last>Jeong</last></author>
-      <author><first>Gary Geunbae</first><last>Lee</last></author>
+      <author id="gary-geunbae-lee"><first>Gary Geunbae</first><last>Lee</last></author>
       <pages>412–419</pages>
       <url hash="c3e60630">P06-2054</url>
       <bibkey>jeong-lee-2006-exploiting</bibkey>
@@ -1950,7 +1950,7 @@
     <paper id="55">
       <title>Analysis and Repair of Name Tagger Errors</title>
       <author><first>Heng</first><last>Ji</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <pages>420–427</pages>
       <url hash="3c6d3b48">P06-2055</url>
       <bibkey>ji-grishman-2006-analysis</bibkey>
@@ -1958,7 +1958,7 @@
     <paper id="56">
       <title>Unsupervised Segmentation of <fixed-case>C</fixed-case>hinese Text by Use of Branching Entropy</title>
       <author><first>Zhihui</first><last>Jin</last></author>
-      <author><first>Kumiko</first><last>Tanaka-Ishii</last></author>
+      <author id="kumiko-tanaka-ishii"><first>Kumiko</first><last>Tanaka-Ishii</last></author>
       <pages>428–435</pages>
       <url hash="77059fd8">P06-2056</url>
       <bibkey>jin-tanaka-ishii-2006-unsupervised</bibkey>
@@ -1989,7 +1989,7 @@
     </paper>
     <paper id="60">
       <title>Minority Vote: At-Least-N Voting Improves Recall for Extracting Relations</title>
-      <author><first>Nanda</first><last>Kambhatla</last></author>
+      <author id="nanda-kambhatla"><first>Nanda</first><last>Kambhatla</last></author>
       <pages>460–466</pages>
       <url hash="d0853d6d">P06-2060</url>
       <bibkey>kambhatla-2006-minority</bibkey>
@@ -1998,7 +1998,7 @@
       <title>Integration of Speech to Computer-Assisted Translation Using Finite-State Automata</title>
       <author><first>Shahram</first><last>Khadivi</last></author>
       <author><first>Richard</first><last>Zens</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>467–474</pages>
       <url hash="cf048cec">P06-2061</url>
       <bibkey>khadivi-etal-2006-integration</bibkey>
@@ -2013,7 +2013,7 @@
     <paper id="63">
       <title>Automatic Identification of Pro and Con Reasons in Online Reviews</title>
       <author><first>Soo-Min</first><last>Kim</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>483–490</pages>
       <url hash="edeb7a27">P06-2063</url>
       <bibkey>kim-hovy-2006-automatic</bibkey>
@@ -2021,7 +2021,7 @@
     <paper id="64">
       <title>Interpreting Semantic Relations in Noun Compounds via Verb Semantics</title>
       <author><first>Su Nam</first><last>Kim</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>491–498</pages>
       <url hash="f22605df">P06-2064</url>
       <bibkey>kim-baldwin-2006-interpreting</bibkey>
@@ -2078,8 +2078,8 @@
     <paper id="71">
       <title>Discriminating Image Senses by Clustering with Multimodal Features</title>
       <author><first>Nicolas</first><last>Loeff</last></author>
-      <author><first>Cecilia Ovesdotter</first><last>Alm</last></author>
-      <author><first>David A.</first><last>Forsyth</last></author>
+      <author id="cecilia-ovesdotter-alm"><first>Cecilia Ovesdotter</first><last>Alm</last></author>
+      <author id="david-forsyth"><first>David A.</first><last>Forsyth</last></author>
       <pages>547–554</pages>
       <url hash="8a42cca3">P06-2071</url>
       <bibkey>loeff-etal-2006-discriminating</bibkey>
@@ -2094,9 +2094,9 @@
     </paper>
     <paper id="73">
       <title>Segmented and Unsegmented Dialogue-Act Annotation with Statistical Dialogue Models</title>
-      <author><first>Carlos D.</first><last>Martínez Hinarejos</last></author>
-      <author><first>Ramón</first><last>Granell</last></author>
-      <author><first>José Miguel</first><last>Benedí</last></author>
+      <author id="carlos-d-martinez-hinarejos"><first>Carlos D.</first><last>Martínez Hinarejos</last></author>
+      <author id="ramon-granell"><first>Ramón</first><last>Granell</last></author>
+      <author id="jose-miguel-benedi"><first>José Miguel</first><last>Benedí</last></author>
       <pages>563–570</pages>
       <url hash="41177c72">P06-2073</url>
       <bibkey>martinez-hinarejos-etal-2006-segmented</bibkey>
@@ -2105,7 +2105,7 @@
       <title><fixed-case>ARE</fixed-case>: Instance Splitting Strategies for Dependency Relation-Based Information Extraction</title>
       <author><first>Mstislav</first><last>Maslennikov</last></author>
       <author><first>Hai-Kiat</first><last>Goh</last></author>
-      <author><first>Tat-Seng</first><last>Chua</last></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last></author>
       <pages>571–578</pages>
       <url hash="0e4f2381">P06-2074</url>
       <bibkey>maslennikov-etal-2006-instance</bibkey>
@@ -2114,7 +2114,7 @@
       <title>Integrating Pattern-Based and Distributional Similarity Methods for Lexical Entailment Acquisition</title>
       <author><first>Shachar</first><last>Mirkin</last></author>
       <author><first>Ido</first><last>Dagan</last></author>
-      <author><first>Maayan</first><last>Geffet</last></author>
+      <author id="maayan-geffet"><first>Maayan</first><last>Geffet</last></author>
       <pages>579–586</pages>
       <url hash="7567bb66">P06-2075</url>
       <bibkey>mirkin-etal-2006-integrating</bibkey>
@@ -2142,7 +2142,7 @@
     <paper id="78">
       <title>An Automatic Method for Summary Evaluation Using Multiple Evaluation Results by a Manual Method</title>
       <author><first>Hidetsugu</first><last>Nanba</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>603–610</pages>
       <url hash="de0fe56b">P06-2078</url>
       <bibkey>nanba-okumura-2006-automatic</bibkey>
@@ -2158,7 +2158,7 @@
     </paper>
     <paper id="80">
       <title>Semantic Parsing with Structured <fixed-case>SVM</fixed-case> Ensemble Classification Models</title>
-      <author><first>Le-Minh</first><last>Nguyen</last></author>
+      <author id="minh-le-nguyen"><first>Le-Minh</first><last>Nguyen</last></author>
       <author><first>Akira</first><last>Shimazu</last></author>
       <author><first>Xuan-Hieu</first><last>Phan</last></author>
       <pages>619–626</pages>
@@ -2167,7 +2167,7 @@
     </paper>
     <paper id="81">
       <title>Whose Thumb Is It Anyway? Classifying Author Personality from Weblog Text</title>
-      <author><first>Jon</first><last>Oberlander</last></author>
+      <author id="jon-oberlander"><first>Jon</first><last>Oberlander</last></author>
       <author><first>Scott</first><last>Nowson</last></author>
       <pages>627–634</pages>
       <url hash="c7ff81a4">P06-2081</url>
@@ -2182,7 +2182,7 @@
     </paper>
     <paper id="83">
       <title>A Term Recognition Approach to Acronym Recognition</title>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
       <pages>643–650</pages>
       <url hash="055deb26">P06-2083</url>
@@ -2217,7 +2217,7 @@
       <author><first>Patrick</first><last>Ruch</last></author>
       <author><first>Imad</first><last>Tbahriti</last></author>
       <author><first>Julien</first><last>Gobeill</last></author>
-      <author><first>Alan R.</first><last>Aronson</last></author>
+      <author id="alan-r-aronson"><first>Alan R.</first><last>Aronson</last></author>
       <pages>675–682</pages>
       <url hash="b107fb0a">P06-2087</url>
       <bibkey>ruch-etal-2006-argumentative</bibkey>
@@ -2234,7 +2234,7 @@
     <paper id="89">
       <title>A Best-First Probabilistic Shift-Reduce Parser</title>
       <author><first>Kenji</first><last>Sagae</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <pages>691–698</pages>
       <url hash="7cf84030">P06-2089</url>
       <bibkey>sagae-lavie-2006-best</bibkey>
@@ -2243,7 +2243,7 @@
       <title>Implementing a Characterization of Genre for Automatic Genre Identification of Web Pages</title>
       <author><first>Marina</first><last>Santini</last></author>
       <author><first>Richard</first><last>Power</last></author>
-      <author><first>Roger</first><last>Evans</last></author>
+      <author id="roger-evans"><first>Roger</first><last>Evans</last></author>
       <pages>699–706</pages>
       <url hash="e0ca997a">P06-2090</url>
       <bibkey>santini-etal-2006-implementing</bibkey>
@@ -2253,7 +2253,7 @@
       <author><first>Manabu</first><last>Sato</last></author>
       <author><first>Daisuke</first><last>Bekki</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>707–714</pages>
       <url hash="e7f214c5">P06-2091</url>
       <bibkey>sato-etal-2006-translating</bibkey>
@@ -2268,7 +2268,7 @@
     <paper id="93">
       <title>Continuous Space Language Models for Statistical Machine Translation</title>
       <author><first>Holger</first><last>Schwenk</last></author>
-      <author><first>Daniel</first><last>Dechelotte</last></author>
+      <author id="daniel-dechelotte"><first>Daniel</first><last>Dechelotte</last></author>
       <author><first>Jean-Luc</first><last>Gauvain</last></author>
       <pages>723–730</pages>
       <url hash="492cd64b">P06-2093</url>
@@ -2285,7 +2285,7 @@
       <title>Using Comparable Corpora to Solve Problems Difficult for Human Translators</title>
       <author><first>Serge</first><last>Sharoff</last></author>
       <author><first>Bogdan</first><last>Babych</last></author>
-      <author><first>Anthony</first><last>Hartley</last></author>
+      <author id="anthony-hartley"><first>Anthony</first><last>Hartley</last></author>
       <pages>739–746</pages>
       <url hash="d8b17523">P06-2095</url>
       <bibkey>sharoff-etal-2006-using-comparable</bibkey>
@@ -2293,9 +2293,9 @@
     <paper id="96">
       <title>Adding Syntax to Dynamic Programming for Aligning Comparable Texts for the Generation of Paraphrases</title>
       <author><first>Siwei</first><last>Shen</last></author>
-      <author><first>Dragomir R.</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir R.</first><last>Radev</last></author>
       <author><first>Agam</first><last>Patel</last></author>
-      <author><first>Güneş</first><last>Erkan</last></author>
+      <author id="gunes-erkan"><first>Güneş</first><last>Erkan</last></author>
       <pages>747–754</pages>
       <url hash="d0ee700c">P06-2096</url>
       <bibkey>shen-etal-2006-adding</bibkey>
@@ -2311,7 +2311,7 @@
     <paper id="98">
       <title>Exact Decoding for Jointly Labeling and Chunking Sequences</title>
       <author><first>Nobuyuki</first><last>Shimizu</last></author>
-      <author><first>Andrew</first><last>Haas</last></author>
+      <author id="andrew-haas"><first>Andrew</first><last>Haas</last></author>
       <pages>763–770</pages>
       <url hash="7d9b114c">P06-2098</url>
       <bibkey>shimizu-haas-2006-exact</bibkey>
@@ -2328,16 +2328,16 @@
       <title>Morphological Richness Offsets Resource Demand – Experiences in Constructing a <fixed-case>POS</fixed-case> Tagger for <fixed-case>H</fixed-case>indi</title>
       <author><first>Smriti</first><last>Singh</last></author>
       <author><first>Kuhoo</first><last>Gupta</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>779–786</pages>
       <url hash="3f8d04e9">P06-2100</url>
       <bibkey>singh-etal-2006-morphological</bibkey>
     </paper>
     <paper id="101">
       <title>Minimum Risk Annealing for Training Log-Linear Models</title>
-      <author><first>David A.</first><last>Smith</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="david-a-smith"><first>David A.</first><last>Smith</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>787–794</pages>
       <url hash="047c289d">P06-2101</url>
       <bibkey>smith-eisner-2006-minimum</bibkey>
@@ -2345,7 +2345,7 @@
     <paper id="102">
       <title>Unsupervised Induction of <fixed-case>M</fixed-case>odern <fixed-case>S</fixed-case>tandard <fixed-case>A</fixed-case>rabic Verb Classes Using Syntactic Frames and <fixed-case>LSA</fixed-case></title>
       <author><first>Neal</first><last>Snider</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>795–802</pages>
       <url hash="809ff2be">P06-2102</url>
       <bibkey>snider-diab-2006-unsupervised-induction</bibkey>
@@ -2361,7 +2361,7 @@
     <paper id="104">
       <title>A Comparison of Alternative Parse Tree Paths for Labeling Semantic Roles</title>
       <author><first>Reid</first><last>Swanson</last></author>
-      <author><first>Andrew S.</first><last>Gordon</last></author>
+      <author id="andrew-gordon"><first>Andrew S.</first><last>Gordon</last></author>
       <pages>811–818</pages>
       <url hash="0836d830">P06-2104</url>
       <bibkey>swanson-gordon-2006-comparison</bibkey>
@@ -2369,7 +2369,7 @@
     <paper id="105">
       <title>A Logic-Based Semantic Approach to Recognizing Textual Entailment</title>
       <author><first>Marta</first><last>Tatu</last></author>
-      <author><first>Dan</first><last>Moldovan</last></author>
+      <author id="dan-moldovan"><first>Dan</first><last>Moldovan</last></author>
       <pages>819–826</pages>
       <url hash="40e7f74b">P06-2105</url>
       <bibkey>tatu-moldovan-2006-logic</bibkey>
@@ -2379,13 +2379,13 @@
       <author><first>Takenobu</first><last>Tokunaga</last></author>
       <author><first>Virach</first><last>Sornlertlamvanich</last></author>
       <author><first>Thatsanee</first><last>Charoenporn</last></author>
-      <author><first>Nicoletta</first><last>Calzolari</last></author>
+      <author id="nicoletta-calzolari"><first>Nicoletta</first><last>Calzolari</last></author>
       <author><first>Monica</first><last>Monachini</last></author>
       <author><first>Claudia</first><last>Soria</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
-      <author><first>YingJu</first><last>Xia</last></author>
+      <author id="yingju-xia"><first>YingJu</first><last>Xia</last></author>
       <author><first>Hao</first><last>Yu</last></author>
-      <author><first>Laurent</first><last>Prevot</last></author>
+      <author id="laurent-prevot"><first>Laurent</first><last>Prevot</last></author>
       <author><first>Kiyoaki</first><last>Shirai</last></author>
       <pages>827–834</pages>
       <url hash="de1ce513">P06-2106</url>
@@ -2394,7 +2394,7 @@
     <paper id="107">
       <title>Statistical Phrase-Based Models for Interactive Computer-Assisted Translation</title>
       <author><first>Jesús</first><last>Tomás</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <pages>835–841</pages>
       <url hash="4da4e353">P06-2107</url>
       <bibkey>tomas-casacuberta-2006-statistical</bibkey>
@@ -2411,7 +2411,7 @@
       <author><first>Yuya</first><last>Unno</last></author>
       <author><first>Takashi</first><last>Ninomiya</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>850–857</pages>
       <url hash="bb0cbd88">P06-2109</url>
       <bibkey>unno-etal-2006-trimming</bibkey>
@@ -2426,8 +2426,8 @@
     </paper>
     <paper id="111">
       <title>Finding Synonyms Using Automatic Word Alignment and Measures of Distributional Similarity</title>
-      <author><first>Lonneke</first><last>van der Plas</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="lonneke-van-der-plas"><first>Lonneke</first><last>van der Plas</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>866–873</pages>
       <url hash="963c6250">P06-2111</url>
       <bibkey>van-der-plas-tiedemann-2006-finding</bibkey>
@@ -2446,7 +2446,7 @@
       <author><first>Ye-Yi</first><last>Wang</last></author>
       <author><first>Alex</first><last>Acero</last></author>
       <author><first>Milind</first><last>Mahajan</last></author>
-      <author><first>John</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
       <pages>882–889</pages>
       <url hash="c980aa08">P06-2113</url>
       <bibkey>wang-etal-2006-combining</bibkey>
@@ -2489,7 +2489,7 @@
       <title>Aligning Features with Sense Distinction Dimensions</title>
       <author><first>Nianwen</first><last>Xue</last></author>
       <author><first>Jinying</first><last>Chen</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>921–928</pages>
       <url hash="e0200a99">P06-2118</url>
       <bibkey>xue-etal-2006-aligning</bibkey>
@@ -2497,7 +2497,7 @@
     <paper id="119">
       <title>Word Sense Disambiguation Using Lexical Cohesion in the Context</title>
       <author><first>Dongqiang</first><last>Yang</last></author>
-      <author><first>David M. W.</first><last>Powers</last></author>
+      <author id="david-m-w-powers"><first>David M. W.</first><last>Powers</last></author>
       <pages>929–936</pages>
       <url hash="de7e4ab5">P06-2119</url>
       <bibkey>yang-powers-2006-word</bibkey>
@@ -2513,7 +2513,7 @@
     </paper>
     <paper id="121">
       <title><fixed-case>HAL</fixed-case>-Based Cascaded Model for Variable-Length Semantic Pattern Induction from Psychiatry Web Resources</title>
-      <author><first>Liang-Chih</first><last>Yu</last></author>
+      <author id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last></author>
       <author><first>Chung-Hsien</first><last>Wu</last></author>
       <author><first>Fong-Lin</first><last>Jang</last></author>
       <pages>945–952</pages>
@@ -2532,7 +2532,7 @@
       <title>Subword-Based Tagging for Confidence-Dependent <fixed-case>C</fixed-case>hinese Word Segmentation</title>
       <author><first>Ruiqiang</first><last>Zhang</last></author>
       <author><first>Genichiro</first><last>Kikui</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>961–968</pages>
       <url hash="979ec7e6">P06-2123</url>
       <bibkey>zhang-etal-2006-subword-based</bibkey>
@@ -2540,7 +2540,7 @@
     <paper id="124">
       <title><fixed-case>B</fixed-case>i<fixed-case>TAM</fixed-case>: Bilingual Topic <fixed-case>A</fixed-case>d<fixed-case>M</fixed-case>ixture Models for Word Alignment</title>
       <author><first>Bing</first><last>Zhao</last></author>
-      <author><first>Eric P.</first><last>Xing</last></author>
+      <author id="eric-xing"><first>Eric P.</first><last>Xing</last></author>
       <pages>969–976</pages>
       <url hash="b0d46ed1">P06-2124</url>
       <bibkey>zhao-xing-2006-bitam</bibkey>
@@ -2581,14 +2581,14 @@
     </paper>
     <paper id="2">
       <title>Unsupervised Part-of-Speech Tagging Employing Efficient Graph Clustering</title>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>7–12</pages>
       <url hash="5a9155c4">P06-3002</url>
       <bibkey>biemann-2006-unsupervised</bibkey>
     </paper>
     <paper id="3">
       <title>Sub-Sentential Alignment Using Substring Co-Occurrence Counts</title>
-      <author><first>Fabien</first><last>Cromieres</last></author>
+      <author id="fabien-cromieres"><first>Fabien</first><last>Cromieres</last></author>
       <pages>13–18</pages>
       <url hash="2f80bcfc">P06-3003</url>
       <bibkey>cromieres-2006-sub</bibkey>
@@ -2682,7 +2682,7 @@
     <meta>
       <booktitle>Proceedings of the <fixed-case>COLING</fixed-case>/<fixed-case>ACL</fixed-case> 2006 Interactive Presentation Sessions</booktitle>
       <url hash="2008f3d7">P06-4</url>
-      <editor><first>James</first><last>Curran</last></editor>
+      <editor id="james-r-curran"><first>James</first><last>Curran</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Sydney, Australia</address>
       <month>July</month>
@@ -2698,7 +2698,7 @@
       <title><fixed-case>FAST</fixed-case> – An Automatic Generation System for Grammar Tests</title>
       <author><first>Chia-Yin</first><last>Chen</last></author>
       <author><first>Hsien-Chin</first><last>Liou</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>1–4</pages>
       <url hash="4698f11b">P06-4001</url>
       <doi>10.3115/1225403.1225404</doi>
@@ -2706,8 +2706,8 @@
     </paper>
     <paper id="2">
       <title>Is It Correct? – Towards Web-Based Evaluation of Automatic Natural Language Phrase Generation</title>
-      <author><first>Calkin S.</first><last>Montero</last></author>
-      <author><first>Kenji</first><last>Araki</last></author>
+      <author id="calkin-s-montero"><first>Calkin S.</first><last>Montero</last></author>
+      <author id="kenji-araki"><first>Kenji</first><last>Araki</last></author>
       <pages>5–8</pages>
       <url hash="c727e412">P06-4002</url>
       <doi>10.3115/1225403.1225405</doi>
@@ -2715,12 +2715,12 @@
     </paper>
     <paper id="3">
       <title><fixed-case>L</fixed-case>e<fixed-case>XF</fixed-case>low: A System for Cross-Fertilization of Computational Lexicons</title>
-      <author><first>Maurizio</first><last>Tesconi</last></author>
+      <author id="maurizio-tesconi"><first>Maurizio</first><last>Tesconi</last></author>
       <author><first>Andrea</first><last>Marchetti</last></author>
       <author><first>Francesca</first><last>Bertagna</last></author>
       <author><first>Monica</first><last>Monachini</last></author>
       <author><first>Claudia</first><last>Soria</last></author>
-      <author><first>Nicoletta</first><last>Calzolari</last></author>
+      <author id="nicoletta-calzolari"><first>Nicoletta</first><last>Calzolari</last></author>
       <pages>9–12</pages>
       <url hash="f481f369">P06-4003</url>
       <doi>10.3115/1225403.1225406</doi>
@@ -2746,8 +2746,8 @@
       <author><first>Kazuhiro</first><last>Yoshida</last></author>
       <author><first>Tadayoshi</first><last>Hara</last></author>
       <author><first>Jin-Dong</first><last>Kim</last></author>
-      <author><first>Yuka</first><last>Tateisi</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="yuka-tateisi"><first>Yuka</first><last>Tateisi</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>17–20</pages>
       <url hash="fc570e26">P06-4005</url>
       <doi>10.3115/1225403.1225408</doi>
@@ -2766,7 +2766,7 @@
       <author><first>Andrew</first><last>Hickl</last></author>
       <author><first>Patrick</first><last>Wang</last></author>
       <author><first>John</first><last>Lehmann</last></author>
-      <author><first>Sanda</first><last>Harabagiu</last></author>
+      <author id="sanda-harabagiu"><first>Sanda</first><last>Harabagiu</last></author>
       <pages>25–28</pages>
       <url hash="086d8e86">P06-4007</url>
       <doi>10.3115/1225403.1225410</doi>
@@ -2775,11 +2775,11 @@
     <paper id="8">
       <title><fixed-case>K</fixed-case>-<fixed-case>QARD</fixed-case>: A Practical <fixed-case>K</fixed-case>orean Question Answering Framework for Restricted Domain</title>
       <author><first>Young-In</first><last>Song</last></author>
-      <author><first>HooJung</first><last>Chung</last></author>
+      <author id="hoojung-chung"><first>HooJung</first><last>Chung</last></author>
       <author><first>Kyoung-Soo</first><last>Han</last></author>
-      <author><first>JooYoung</first><last>Lee</last></author>
-      <author><first>Hae-Chang</first><last>Rim</last></author>
-      <author><first>Jae-Won</first><last>Lee</last></author>
+      <author id="joo-young-lee"><first>JooYoung</first><last>Lee</last></author>
+      <author id="hae-chang-rim"><first>Hae-Chang</first><last>Rim</last></author>
+      <author id="jae-won-lee"><first>Jae-Won</first><last>Lee</last></author>
       <pages>29–32</pages>
       <url hash="f049dc34">P06-4008</url>
       <doi>10.3115/1225403.1225411</doi>
@@ -2787,7 +2787,7 @@
     </paper>
     <paper id="9">
       <title>An Intermediate Representation for the Interpretation of Temporal Expressions</title>
-      <author><first>Paweł</first><last>Mazur</last></author>
+      <author id="pawel-mazur"><first>Paweł</first><last>Mazur</last></author>
       <author><first>Robert</first><last>Dale</last></author>
       <pages>33–36</pages>
       <url hash="a3aed4c2">P06-4009</url>
@@ -2805,10 +2805,10 @@
     </paper>
     <paper id="11">
       <title>Computational Analysis of Move Structures in Academic Abstracts</title>
-      <author><first>Jien-Chen</first><last>Wu</last></author>
+      <author id="jian-chen-wu"><first>Jien-Chen</first><last>Wu</last></author>
       <author><first>Yu-Chia</first><last>Chang</last></author>
       <author><first>Hsien-Chin</first><last>Liou</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>41–44</pages>
       <url hash="11ba8344">P06-4011</url>
       <doi>10.3115/1225403.1225414</doi>
@@ -2816,8 +2816,8 @@
     </paper>
     <paper id="12">
       <title><fixed-case>L</fixed-case>ex<fixed-case>N</fixed-case>et: A Graphical Environment for Graph-Based <fixed-case>NLP</fixed-case></title>
-      <author><first>Dragomir R.</first><last>Radev</last></author>
-      <author><first>Güneş</first><last>Erkan</last></author>
+      <author id="dragomir-radev"><first>Dragomir R.</first><last>Radev</last></author>
+      <author id="gunes-erkan"><first>Güneş</first><last>Erkan</last></author>
       <author><first>Anthony</first><last>Fader</last></author>
       <author><first>Patrick</first><last>Jordan</last></author>
       <author><first>Siwei</first><last>Shen</last></author>
@@ -2832,8 +2832,8 @@
       <author><first>Marita</first><last>Ailomaa</last></author>
       <author><first>Miroslav</first><last>Melichar</last></author>
       <author><first>Agnes</first><last>Lisowska</last></author>
-      <author><first>Martin</first><last>Rajman</last></author>
-      <author><first>Susan</first><last>Armstrong</last></author>
+      <author id="martin-rajman"><first>Martin</first><last>Rajman</last></author>
+      <author id="susan-armstrong"><first>Susan</first><last>Armstrong</last></author>
       <pages>49–52</pages>
       <url hash="70db34ea">P06-4013</url>
       <doi>10.3115/1225403.1225416</doi>
@@ -2853,9 +2853,9 @@
       <author><first>Tilman</first><last>Becker</last></author>
       <author><first>Peter</first><last>Poller</last></author>
       <author><first>Jan</first><last>Schehl</last></author>
-      <author><first>Nate</first><last>Blaylock</last></author>
+      <author id="nate-blaylock"><first>Nate</first><last>Blaylock</last></author>
       <author><first>Ciprian</first><last>Gerstenberger</last></author>
-      <author><first>Ivana</first><last>Kruijff-Korbayová</last></author>
+      <author id="ivana-kruijff-korbayova"><first>Ivana</first><last>Kruijff-Korbayová</last></author>
       <pages>57–60</pages>
       <url hash="19629601">P06-4015</url>
       <doi>10.3115/1225403.1225418</doi>
@@ -2863,7 +2863,7 @@
     </paper>
     <paper id="16">
       <title><fixed-case>T</fixed-case>wic<fixed-case>P</fixed-case>en: Hand-held Scanner and Translation Software for non-Native Readers</title>
-      <author><first>Eric</first><last>Wehrli</last></author>
+      <author id="eric-wehrli"><first>Eric</first><last>Wehrli</last></author>
       <pages>61–64</pages>
       <url hash="1cda2b8d">P06-4016</url>
       <doi>10.3115/1225403.1225419</doi>
@@ -2891,7 +2891,7 @@
     <paper id="19">
       <title><fixed-case>O</fixed-case>utilex, a Linguistic Platform for Text Processing</title>
       <author><first>Olivier</first><last>Blanc</last></author>
-      <author><first>Matthieu</first><last>Constant</last></author>
+      <author id="matthieu-constant"><first>Matthieu</first><last>Constant</last></author>
       <pages>73–76</pages>
       <url hash="900cd285">P06-4019</url>
       <doi>10.3115/1225403.1225422</doi>
@@ -2899,8 +2899,8 @@
     </paper>
     <paper id="20">
       <title>The Second Release of the <fixed-case>RASP</fixed-case> System</title>
-      <author><first>Ted</first><last>Briscoe</last></author>
-      <author><first>John</first><last>Carroll</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
       <author><first>Rebecca</first><last>Watson</last></author>
       <pages>77–80</pages>
       <url hash="00ab603d">P06-4020</url>
diff --git a/data/xml/P07.xml b/data/xml/P07.xml
index 8f77114dba..a63d82ec20 100644
--- a/data/xml/P07.xml
+++ b/data/xml/P07.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the 45th Annual Meeting of the Association of Computational Linguistics</booktitle>
       <url hash="a1f67da6">P07-1</url>
       <editor><first>Annie</first><last>Zaenen</last></editor>
-      <editor><first>Antal</first><last>van den Bosch</last></editor>
+      <editor id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Prague, Czech Republic</address>
       <month>June</month>
@@ -19,14 +19,14 @@
     <paper id="1">
       <title>Guiding Statistical Word Alignment Models With Prior Knowledge</title>
       <author><first>Yonggang</first><last>Deng</last></author>
-      <author><first>Yuqing</first><last>Gao</last></author>
+      <author id="yuqing-guo"><first>Yuqing</first><last>Gao</last></author>
       <pages>1–8</pages>
       <url hash="c20f947c">P07-1001</url>
       <bibkey>deng-gao-2007-guiding</bibkey>
     </paper>
     <paper id="2">
       <title>A Discriminative Syntactic Word Order Model for Machine Translation</title>
-      <author><first>Pi-Chuan</first><last>Chang</last></author>
+      <author id="pi-chuan-chang"><first>Pi-Chuan</first><last>Chang</last></author>
       <author><first>Kristina</first><last>Toutanova</last></author>
       <pages>9–16</pages>
       <url hash="dd30726c">P07-1002</url>
@@ -43,7 +43,7 @@
     <paper id="4">
       <title>Transductive learning for statistical machine translation</title>
       <author><first>Nicola</first><last>Ueffing</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <author><first>Anoop</first><last>Sarkar</last></author>
       <pages>25–32</pages>
       <url hash="153e4ef2">P07-1004</url>
@@ -62,7 +62,7 @@
       <title>Learning Expressive Models for Word Sense Disambiguation</title>
       <author><first>Lucia</first><last>Specia</last></author>
       <author><first>Mark</first><last>Stevenson</last></author>
-      <author><first>Maria das Graças</first><last>Volpe Nunes</last></author>
+      <author id="maria-das-gracas-volpe-nunes"><first>Maria das Graças</first><last>Volpe Nunes</last></author>
       <pages>41–48</pages>
       <url hash="9b253c14">P07-1006</url>
       <bibkey>specia-etal-2007-learning</bibkey>
@@ -85,7 +85,7 @@
     </paper>
     <paper id="9">
       <title>A <fixed-case>B</fixed-case>ayesian Model for Discovering Typological Implications</title>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <author><first>Lyle</first><last>Campbell</last></author>
       <pages>65–72</pages>
       <url hash="39b082a2">P07-1009</url>
@@ -94,7 +94,7 @@
     <paper id="10">
       <title>A discriminative language model with pseudo-negative samples</title>
       <author><first>Daisuke</first><last>Okanohara</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>73–80</pages>
       <url hash="ed68b580">P07-1010</url>
       <bibkey>okanohara-tsujii-2007-discriminative</bibkey>
@@ -111,10 +111,10 @@
     </paper>
     <paper id="101">
       <title>On the role of context and prosody in the interpretation of ‘okay’</title>
-      <author><first>Agustín</first><last>Gravano</last></author>
-      <author><first>Stefan</first><last>Benus</last></author>
+      <author id="agustin-gravano"><first>Agustín</first><last>Gravano</last></author>
+      <author id="stefan-benus"><first>Stefan</first><last>Benus</last></author>
       <author><first>Héctor</first><last>Chávez</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
       <author><first>Lauren</first><last>Wilcox</last></author>
       <pages>800–807</pages>
       <url hash="fc519e8a">P07-1101</url>
@@ -123,14 +123,14 @@
     <paper id="102">
       <title>Predicting Success in Dialogue</title>
       <author><first>David</first><last>Reitter</last></author>
-      <author><first>Johanna D.</first><last>Moore</last></author>
+      <author id="johanna-d-moore"><first>Johanna D.</first><last>Moore</last></author>
       <pages>808–815</pages>
       <url hash="0878bbed">P07-1102</url>
       <bibkey>reitter-moore-2007-predicting</bibkey>
     </paper>
     <paper id="103">
       <title>Resolving It, This, and That in Unrestricted Multi-Party Dialog</title>
-      <author><first>Christoph</first><last>Müller</last></author>
+      <author id="christoph-muller"><first>Christoph</first><last>Müller</last></author>
       <pages>816–823</pages>
       <url hash="73aa3a45">P07-1103</url>
       <bibkey>muller-2007-resolving</bibkey>
@@ -148,7 +148,7 @@
     <paper id="105">
       <title>Grammar Approximation by Representative Sublanguage: A New Model for Language Learning</title>
       <author><first>Smaranda</first><last>Muresan</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>832–839</pages>
       <url hash="f0b72a71">P07-1105</url>
       <bibkey>muresan-rambow-2007-grammar</bibkey>
@@ -192,15 +192,15 @@
       <author><first>Gao</first><last>Cong</last></author>
       <author><first>Ming</first><last>Zhou</last></author>
       <author><first>Zhongyang</first><last>Xiong</last></author>
-      <author><first>John</first><last>Lee</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <pages>81–88</pages>
       <url hash="00a527a6">P07-1011</url>
       <bibkey>sun-etal-2007-detecting</bibkey>
     </paper>
     <paper id="110">
       <title>Benefits of the ‘Massively Parallel Rosetta Stone’: Cross-Language Information Retrieval with over 30 Languages</title>
-      <author><first>Peter</first><last>Chew</last></author>
+      <author id="peter-a-chew"><first>Peter</first><last>Chew</last></author>
       <author><first>Ahmed</first><last>Abdelali</last></author>
       <pages>872–879</pages>
       <url hash="6bd90811">P07-1110</url>
@@ -216,7 +216,7 @@
     </paper>
     <paper id="112">
       <title>Automatic Acquisition of Ranked Qualia Structures from the Web</title>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <author><first>Johanna</first><last>Wenderoth</last></author>
       <pages>888–895</pages>
       <url hash="b77b026e">P07-1112</url>
@@ -234,7 +234,7 @@
     </paper>
     <paper id="114">
       <title>Words and Echoes: Assessing and Mitigating the Non-Randomness Problem in Word Frequency Distribution Modeling</title>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <author><first>Stefan</first><last>Evert</last></author>
       <pages>904–911</pages>
       <url hash="56db663d">P07-1114</url>
@@ -243,7 +243,7 @@
     <paper id="115">
       <title>A System for Large-Scale Acquisition of Verbal, Nominal and Adjectival Subcategorization Frames from Corpora</title>
       <author><first>Judita</first><last>Preiss</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <author><first>Anna</first><last>Korhonen</last></author>
       <pages>912–919</pages>
       <url hash="a900da35">P07-1115</url>
@@ -258,7 +258,7 @@
     </paper>
     <paper id="117">
       <title>Using Mazurkiewicz Trace Languages for Partition-Based Morphology</title>
-      <author><first>François</first><last>Barthélemy</last></author>
+      <author id="francois-barthelemy"><first>François</first><last>Barthélemy</last></author>
       <pages>928–935</pages>
       <url hash="466498d2">P07-1117</url>
       <bibkey>barthelemy-2007-using</bibkey>
@@ -267,7 +267,7 @@
       <title>Much ado about nothing: A social network model of <fixed-case>R</fixed-case>ussian paradigmatic gaps</title>
       <author><first>Robert</first><last>Daland</last></author>
       <author><first>Andrea D.</first><last>Sims</last></author>
-      <author><first>Janet</first><last>Pierrehumbert</last></author>
+      <author id="janet-pierrehumbert"><first>Janet</first><last>Pierrehumbert</last></author>
       <pages>936–943</pages>
       <url hash="48aa03e9">P07-1118</url>
       <bibkey>daland-etal-2007-much</bibkey>
@@ -299,7 +299,7 @@
     <paper id="121">
       <title>Learning Synchronous Grammars for Semantic Parsing with Lambda Calculus</title>
       <author><first>Yuk Wah</first><last>Wong</last></author>
-      <author><first>Raymond</first><last>Mooney</last></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last></author>
       <pages>960–967</pages>
       <url hash="9d6b2445">P07-1121</url>
       <bibkey>wong-mooney-2007-learning</bibkey>
@@ -315,9 +315,9 @@
     </paper>
     <paper id="123">
       <title>Learning Multilingual Subjective Language via Cross-Lingual Projections</title>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <author><first>Carmen</first><last>Banea</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <pages>976–983</pages>
       <url hash="4cf50131">P07-1123</url>
       <bibkey>mihalcea-etal-2007-learning</bibkey>
@@ -333,7 +333,7 @@
     <paper id="125">
       <title>Weakly Supervised Learning for Hedge Classification in Scientific Literature</title>
       <author><first>Ben</first><last>Medlock</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <pages>992–999</pages>
       <url hash="40414331">P07-1125</url>
       <bibkey>medlock-briscoe-2007-weakly</bibkey>
@@ -341,7 +341,7 @@
     <paper id="126">
       <title>Text Analysis for Automatic Image Annotation</title>
       <author><first>Koen</first><last>Deschacht</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>1000–1007</pages>
       <url hash="0d2d2e05">P07-1126</url>
       <bibkey>deschacht-moens-2007-text</bibkey>
@@ -358,17 +358,17 @@
     <paper id="128">
       <title>Combining Multiple Knowledge Sources for Dialogue Segmentation in Multimedia Archives</title>
       <author><first>Pei-Yun</first><last>Hsueh</last></author>
-      <author><first>Johanna D.</first><last>Moore</last></author>
+      <author id="johanna-d-moore"><first>Johanna D.</first><last>Moore</last></author>
       <pages>1016–1023</pages>
       <url hash="448b7234">P07-1128</url>
       <bibkey>hsueh-moore-2007-combining</bibkey>
     </paper>
     <paper id="129">
       <title>Topic Analysis for Psychiatric Document Retrieval</title>
-      <author><first>Liang-Chih</first><last>Yu</last></author>
+      <author id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last></author>
       <author><first>Chung-Hsien</first><last>Wu</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <author><first>Chia-Ling</first><last>Lin</last></author>
       <pages>1024–1031</pages>
       <url hash="32d79f70">P07-1129</url>
@@ -386,7 +386,7 @@
     <paper id="130">
       <title>What to be? - Electronic Career Guidance Based on Semantic Relatedness</title>
       <author><first>Iryna</first><last>Gurevych</last></author>
-      <author><first>Christof</first><last>Müller</last></author>
+      <author id="christof-muller"><first>Christof</first><last>Müller</last></author>
       <author><first>Torsten</first><last>Zesch</last></author>
       <pages>1032–1039</pages>
       <url hash="6875a9c0">P07-1130</url>
@@ -394,9 +394,9 @@
     </paper>
     <paper id="131">
       <title>Extracting Social Networks and Biographical Facts From Conversational Speech Transcripts</title>
-      <author><first>Hongyan</first><last>Jing</last></author>
-      <author><first>Nanda</first><last>Kambhatla</last></author>
-      <author><first>Salim</first><last>Roukos</last></author>
+      <author id="hongyan-jing"><first>Hongyan</first><last>Jing</last></author>
+      <author id="nanda-kambhatla"><first>Nanda</first><last>Kambhatla</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
       <pages>1040–1047</pages>
       <url hash="e7270524">P07-1131</url>
       <bibkey>jing-etal-2007-extracting</bibkey>
@@ -413,9 +413,9 @@
     </paper>
     <paper id="15">
       <title>Multilingual Transliteration Using Feature based Phonetic Method</title>
-      <author><first>Su-Youn</first><last>Yoon</last></author>
-      <author><first>Kyoung-Young</first><last>Kim</last></author>
-      <author><first>Richard</first><last>Sproat</last></author>
+      <author id="su-youn-yoon"><first>Su-Youn</first><last>Yoon</last></author>
+      <author id="kyoung-young-kim"><first>Kyoung-Young</first><last>Kim</last></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last></author>
       <pages>112–119</pages>
       <url hash="940e6be9">P07-1015</url>
       <bibkey>yoon-etal-2007-multilingual</bibkey>
@@ -423,7 +423,7 @@
     <paper id="16">
       <title>Semantic Transliteration of Personal Names</title>
       <author><first>Haizhou</first><last>Li</last></author>
-      <author><first>Khe Chai</first><last>Sim</last></author>
+      <author id="khe-chai-sim"><first>Khe Chai</first><last>Sim</last></author>
       <author><first>Jin-Shea</first><last>Kuo</last></author>
       <author><first>Minghui</first><last>Dong</last></author>
       <pages>120–127</pages>
@@ -442,7 +442,7 @@
     <paper id="18">
       <title>Assisting Translators in Indirect Lexical Transfer</title>
       <author><first>Bogdan</first><last>Babych</last></author>
-      <author><first>Anthony</first><last>Hartley</last></author>
+      <author id="anthony-hartley"><first>Anthony</first><last>Hartley</last></author>
       <author><first>Serge</first><last>Sharoff</last></author>
       <author><first>Olga</first><last>Mudraya</last></author>
       <pages>136–143</pages>
@@ -459,7 +459,7 @@
     </paper>
     <paper id="20">
       <title>Statistical Machine Translation through Global Lexical Selection and Sentence Reconstruction</title>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
       <author><first>Patrick</first><last>Haffner</last></author>
       <author><first>Stephan</first><last>Kanthak</last></author>
       <pages>152–159</pages>
@@ -498,7 +498,7 @@
     </paper>
     <paper id="25">
       <title>Generalizing semantic role annotations across syntactically similar verbs</title>
-      <author><first>Andrew</first><last>Gordon</last></author>
+      <author id="andrew-gordon"><first>Andrew</first><last>Gordon</last></author>
       <author><first>Reid</first><last>Swanson</last></author>
       <pages>192–199</pages>
       <url hash="b63d0507">P07-1025</url>
@@ -508,9 +508,9 @@
       <title>A Grammar-driven Convolution Tree Kernel for Semantic Role Classification</title>
       <author><first>Min</first><last>Zhang</last></author>
       <author><first>Wanxiang</first><last>Che</last></author>
-      <author><first>Aiti</first><last>Aw</last></author>
-      <author><first>Chew Lim</first><last>Tan</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="aiti-aw"><first>Aiti</first><last>Aw</last></author>
+      <author id="chew-lim-tan"><first>Chew Lim</first><last>Tan</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <author><first>Ting</first><last>Liu</last></author>
       <author><first>Sheng</first><last>Li</last></author>
       <pages>200–207</pages>
@@ -552,7 +552,7 @@
     <paper id="31">
       <title>Adding Noun Phrase Structure to the <fixed-case>P</fixed-case>enn <fixed-case>T</fixed-case>reebank</title>
       <author><first>David</first><last>Vadas</last></author>
-      <author><first>James</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James</first><last>Curran</last></author>
       <pages>240–247</pages>
       <url hash="6f772cb7">P07-1031</url>
       <bibkey>vadas-curran-2007-adding</bibkey>
@@ -560,14 +560,14 @@
     <paper id="32">
       <title>Formalism-Independent Parser Evaluation with <fixed-case>CCG</fixed-case> and <fixed-case>D</fixed-case>ep<fixed-case>B</fixed-case>ank</title>
       <author><first>Stephen</first><last>Clark</last></author>
-      <author><first>James</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James</first><last>Curran</last></author>
       <pages>248–255</pages>
       <url hash="eefad845">P07-1032</url>
       <bibkey>clark-curran-2007-formalism</bibkey>
     </paper>
     <paper id="33">
       <title>Frustratingly Easy Domain Adaptation</title>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <pages>256–263</pages>
       <url hash="4733dae5">P07-1033</url>
       <bibkey>daume-iii-2007-frustratingly</bibkey>
@@ -575,16 +575,16 @@
     <paper id="34">
       <title>Instance Weighting for Domain Adaptation in <fixed-case>NLP</fixed-case></title>
       <author><first>Jing</first><last>Jiang</last></author>
-      <author><first>ChengXiang</first><last>Zhai</last></author>
+      <author id="chengxiang-zhai"><first>ChengXiang</first><last>Zhai</last></author>
       <pages>264–271</pages>
       <url hash="6b54f4e7">P07-1034</url>
       <bibkey>jiang-zhai-2007-instance</bibkey>
     </paper>
     <paper id="35">
       <title>The Infinite Tree</title>
-      <author><first>Jenny Rose</first><last>Finkel</last></author>
+      <author id="jenny-rose-finkel"><first>Jenny Rose</first><last>Finkel</last></author>
       <author><first>Trond</first><last>Grenager</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>272–279</pages>
       <url hash="7daffc75">P07-1035</url>
       <bibkey>finkel-etal-2007-infinite</bibkey>
@@ -592,7 +592,7 @@
     <paper id="36">
       <title>Guiding Semi-Supervision with Constraint-Driven Learning</title>
       <author><first>Ming-Wei</first><last>Chang</last></author>
-      <author><first>Lev</first><last>Ratinov</last></author>
+      <author id="lev-ratinov"><first>Lev</first><last>Ratinov</last></author>
       <author><first>Dan</first><last>Roth</last></author>
       <pages>280–287</pages>
       <url hash="1927617b">P07-1036</url>
@@ -600,8 +600,8 @@
     </paper>
     <paper id="37">
       <title>Supertagged Phrase-Based Statistical Machine Translation</title>
-      <author><first>Hany</first><last>Hassan</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="hany-hassan-awadalla"><first>Hany</first><last>Hassan</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <pages>288–295</pages>
       <url hash="cda51a3e">P07-1037</url>
@@ -626,9 +626,9 @@
     </paper>
     <paper id="40">
       <title>Improved Word-Level System Combination for Machine Translation</title>
-      <author><first>Antti-Veikko</first><last>Rosti</last></author>
+      <author id="antti-veikko-rosti"><first>Antti-Veikko</first><last>Rosti</last></author>
       <author><first>Spyros</first><last>Matsoukas</last></author>
-      <author><first>Richard</first><last>Schwartz</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
       <pages>312–319</pages>
       <url hash="f59ec5a1">P07-1040</url>
       <bibkey>rosti-etal-2007-improved</bibkey>
@@ -678,7 +678,7 @@
     <paper id="46">
       <title>The Utility of a Graphical Representation of Discourse Structure in Spoken Dialogue Systems</title>
       <author><first>Mihai</first><last>Rotaru</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <pages>360–367</pages>
       <url hash="da48ab24">P07-1046</url>
       <bibkey>rotaru-litman-2007-utility</bibkey>
@@ -686,7 +686,7 @@
     <paper id="47">
       <title>Automated Vocabulary Acquisition and Interpretation in Multimodal Conversational Systems</title>
       <author><first>Yi</first><last>Liu</last></author>
-      <author><first>Joyce</first><last>Chai</last></author>
+      <author id="joyce-chai"><first>Joyce</first><last>Chai</last></author>
       <author><first>Rong</first><last>Jin</last></author>
       <pages>368–375</pages>
       <url hash="eed3d9f3">P07-1047</url>
@@ -694,8 +694,8 @@
     </paper>
     <paper id="48">
       <title>A Multimodal Interface for Access to Content in the Home</title>
-      <author><first>Michael</first><last>Johnston</last></author>
-      <author><first>Luis Fernando</first><last>D’Haro</last></author>
+      <author id="michael-johnston"><first>Michael</first><last>Johnston</last></author>
+      <author id="luis-fernando-dharo"><first>Luis Fernando</first><last>D’Haro</last></author>
       <author><first>Michelle</first><last>Levine</last></author>
       <author><first>Bernard</first><last>Renger</last></author>
       <pages>376–383</pages>
@@ -711,7 +711,7 @@
     </paper>
     <paper id="50">
       <title>K-best Spanning Tree Parsing</title>
-      <author><first>Keith</first><last>Hall</last></author>
+      <author id="keith-hall"><first>Keith</first><last>Hall</last></author>
       <pages>392–399</pages>
       <url hash="03e33d21">P07-1050</url>
       <bibkey>hall-2007-k</bibkey>
@@ -787,9 +787,9 @@
     <paper id="59">
       <title>Statistical Machine Translation for Query Expansion in Answer Retrieval</title>
       <author><first>Stefan</first><last>Riezler</last></author>
-      <author><first>Alexander</first><last>Vasserman</last></author>
+      <author id="alexander-vasserman"><first>Alexander</first><last>Vasserman</last></author>
       <author><first>Ioannis</first><last>Tsochantaridis</last></author>
-      <author><first>Vibhu</first><last>Mittal</last></author>
+      <author id="vibhu-o-mittal"><first>Vibhu</first><last>Mittal</last></author>
       <author><first>Yi</first><last>Liu</last></author>
       <pages>464–471</pages>
       <url hash="b5385dc2">P07-1059</url>
@@ -797,7 +797,7 @@
     </paper>
     <paper id="60">
       <title>A Computational Model of Text Reuse in Ancient Literary Texts</title>
-      <author><first>John</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
       <pages>472–479</pages>
       <url hash="76ac7e58">P07-1060</url>
       <bibkey>lee-2007-computational</bibkey>
@@ -819,8 +819,8 @@
     </paper>
     <paper id="63">
       <title><fixed-case>PERSONAGE</fixed-case>: Personality Generation for Dialogue</title>
-      <author><first>François</first><last>Mairesse</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="francois-mairesse"><first>François</first><last>Mairesse</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <pages>496–503</pages>
       <url hash="274bdd55">P07-1063</url>
       <bibkey>mairesse-walker-2007-personage</bibkey>
@@ -830,7 +830,7 @@
       <author><first>Igor</first><last>Malioutov</last></author>
       <author><first>Alex</first><last>Park</last></author>
       <author><first>Regina</first><last>Barzilay</last></author>
-      <author><first>James</first><last>Glass</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
       <pages>504–511</pages>
       <url hash="82c0a9d1">P07-1064</url>
       <bibkey>malioutov-etal-2007-making</bibkey>
@@ -869,7 +869,7 @@
     </paper>
     <paper id="69">
       <title>Generating a Table-of-Contents</title>
-      <author><first>S. R. K.</first><last>Branavan</last></author>
+      <author id="s-r-k-branavan"><first>S. R. K.</first><last>Branavan</last></author>
       <author><first>Pawan</first><last>Deshpande</last></author>
       <author><first>Regina</first><last>Barzilay</last></author>
       <pages>544–551</pages>
@@ -895,15 +895,15 @@
     </paper>
     <paper id="72">
       <title>Improving the Interpretation of Noun Phrases with Cross-linguistic Information</title>
-      <author><first>Roxana</first><last>Girju</last></author>
+      <author id="roxana-girju"><first>Roxana</first><last>Girju</last></author>
       <pages>568–575</pages>
       <url hash="ff8f4796">P07-1072</url>
       <bibkey>girju-2007-improving</bibkey>
     </paper>
     <paper id="73">
       <title>Learning to Extract Relations from the Web using Minimal Supervision</title>
-      <author><first>Razvan</first><last>Bunescu</last></author>
-      <author><first>Raymond</first><last>Mooney</last></author>
+      <author id="razvan-bunescu"><first>Razvan</first><last>Bunescu</last></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last></author>
       <pages>576–583</pages>
       <url hash="039c1fab">P07-1073</url>
       <bibkey>bunescu-mooney-2007-learning</bibkey>
@@ -920,7 +920,7 @@
     <paper id="75">
       <title>A Multi-resolution Framework for Information Extraction from Free Text</title>
       <author><first>Mstislav</first><last>Maslennikov</last></author>
-      <author><first>Tat-Seng</first><last>Chua</last></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last></author>
       <pages>592–599</pages>
       <url hash="826f8faa">P07-1075</url>
       <bibkey>maslennikov-chua-2007-multi</bibkey>
@@ -935,7 +935,7 @@
     </paper>
     <paper id="77">
       <title>Beyond Projectivity: Multilingual Evaluation of Constraints and Measures on Non-Projective Structures</title>
-      <author><first>Jiří</first><last>Havelka</last></author>
+      <author id="jiri-havelka"><first>Jiří</first><last>Havelka</last></author>
       <pages>608–615</pages>
       <url hash="4cbafda1">P07-1077</url>
       <bibkey>havelka-2007-beyond</bibkey>
@@ -952,7 +952,7 @@
       <title><fixed-case>HPSG</fixed-case> Parsing with Shallow Dependency Constraints</title>
       <author><first>Kenji</first><last>Sagae</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>624–631</pages>
       <url hash="876c8693">P07-1079</url>
       <bibkey>sagae-etal-2007-hpsg</bibkey>
@@ -960,7 +960,7 @@
     <paper id="80">
       <title>Constituent Parsing with Incremental Sigmoid Belief Networks</title>
       <author><first>Ivan</first><last>Titov</last></author>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <pages>632–639</pages>
       <url hash="6c9a47e3">P07-1080</url>
       <bibkey>titov-henderson-2007-constituent</bibkey>
@@ -994,7 +994,7 @@
     <paper id="84">
       <title>Bilingual Terminology Mining - Using Brain, not brawn comparable corpora</title>
       <author><first>Emmanuel</first><last>Morin</last></author>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <author><first>Koichi</first><last>Takeuchi</last></author>
       <author><first>Kyo</first><last>Kageura</last></author>
       <pages>664–671</pages>
@@ -1022,7 +1022,7 @@
       <author><first>Jie</first><last>Tang</last></author>
       <author><first>Hang</first><last>Li</last></author>
       <author><first>Hwee Tou</first><last>Ng</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <pages>688–695</pages>
       <url hash="f89c4ae8">P07-1087</url>
       <bibkey>zhu-etal-2007-unified</bibkey>
@@ -1069,7 +1069,7 @@
     </paper>
     <paper id="92">
       <title>Machine Translation by Triangulation: Making Effective Use of Multi-Parallel Corpora</title>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <author><first>Mirella</first><last>Lapata</last></author>
       <pages>728–735</pages>
       <url hash="b9a262a7">P07-1092</url>
@@ -1084,7 +1084,7 @@
     </paper>
     <paper id="94">
       <title>A fully <fixed-case>B</fixed-case>ayesian approach to unsupervised part-of-speech tagging</title>
-      <author><first>Sharon</first><last>Goldwater</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
       <author><first>Tom</first><last>Griffiths</last></author>
       <pages>744–751</pages>
       <url hash="1147bd8b">P07-1094</url>
@@ -1092,9 +1092,9 @@
     </paper>
     <paper id="95">
       <title>Computationally Efficient <fixed-case>M</fixed-case>-Estimation of Log-Linear Structure Models</title>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <author><first>Douglas L.</first><last>Vail</last></author>
-      <author><first>John D.</first><last>Lafferty</last></author>
+      <author id="john-lafferty"><first>John D.</first><last>Lafferty</last></author>
       <pages>752–759</pages>
       <url hash="2edfdcac">P07-1095</url>
       <bibkey>smith-etal-2007-computationally</bibkey>
@@ -1103,14 +1103,14 @@
       <title>Guided Learning for Bidirectional Sequence Classification</title>
       <author><first>Libin</first><last>Shen</last></author>
       <author><first>Giorgio</first><last>Satta</last></author>
-      <author><first>Aravind</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
       <pages>760–767</pages>
       <url hash="cee21b2e">P07-1096</url>
       <bibkey>shen-etal-2007-guided</bibkey>
     </paper>
     <paper id="97">
       <title>Different Structures for Evaluating Answers to Complex Questions: Pyramids Won’t Topple, and Neither Will Human Assessors</title>
-      <author><first>Hoa Trang</first><last>Dang</last></author>
+      <author id="hoa-trang-dang"><first>Hoa Trang</first><last>Dang</last></author>
       <author><first>Jimmy</first><last>Lin</last></author>
       <pages>768–775</pages>
       <url hash="ffe52f5c">P07-1097</url>
@@ -1120,7 +1120,7 @@
       <title>Exploiting Syntactic and Shallow Semantic Kernels for Question Answer Classification</title>
       <author><first>Alessandro</first><last>Moschitti</last></author>
       <author><first>Silvia</first><last>Quarteroni</last></author>
-      <author><first>Roberto</first><last>Basili</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
       <author><first>Suresh</first><last>Manandhar</last></author>
       <pages>776–783</pages>
       <url hash="7307f3ef">P07-1098</url>
@@ -1130,7 +1130,7 @@
       <title>Language-independent Probabilistic Answer Ranking for Question Answering</title>
       <author><first>Jeongwoo</first><last>Ko</last></author>
       <author><first>Teruko</first><last>Mitamura</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <pages>784–791</pages>
       <url hash="e88ff8c4">P07-1099</url>
       <bibkey>ko-etal-2007-language</bibkey>
@@ -1173,7 +1173,7 @@
       <author><first>Akitoshi</first><last>Okumura</last></author>
       <author><first>Takahiro</first><last>Ikeda</last></author>
       <author><first>Toshihiro</first><last>Nishizawa</last></author>
-      <author><first>Shin-ichi</first><last>Ando</last></author>
+      <author id="shinichi-ando"><first>Shin-ichi</first><last>Ando</last></author>
       <author><first>Fumihiro</first><last>Adachi</last></author>
       <pages>9–12</pages>
       <url hash="a3d133f8">P07-2003</url>
@@ -1197,19 +1197,19 @@
     </paper>
     <paper id="6">
       <title>Multilingual Ontological Analysis of <fixed-case>E</fixed-case>uropean Directives</title>
-      <author><first>Gianmaria</first><last>Ajani</last></author>
-      <author><first>Guido</first><last>Boella</last></author>
-      <author><first>Leonardo</first><last>Lesmo</last></author>
-      <author><first>Alessandro</first><last>Mazzei</last></author>
-      <author><first>Piercarlo</first><last>Rossi</last></author>
+      <author id="gianmaria-ajani"><first>Gianmaria</first><last>Ajani</last></author>
+      <author id="guido-boella"><first>Guido</first><last>Boella</last></author>
+      <author id="leonardo-lesmo"><first>Leonardo</first><last>Lesmo</last></author>
+      <author id="alessandro-mazzei"><first>Alessandro</first><last>Mazzei</last></author>
+      <author id="piercarlo-rossi"><first>Piercarlo</first><last>Rossi</last></author>
       <pages>21–24</pages>
       <url hash="582ca6d1">P07-2006</url>
       <bibkey>ajani-etal-2007-multilingual</bibkey>
     </paper>
     <paper id="7">
       <title><fixed-case>NICT</fixed-case>-<fixed-case>ATR</fixed-case> Speech-to-Speech Translation System</title>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
-      <author><first>Tohru</first><last>Shimizu</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="tohru-shimizu"><first>Tohru</first><last>Shimizu</last></author>
       <author><first>Satoshi</first><last>Nakamura</last></author>
       <pages>25–28</pages>
       <url hash="453e2bbc">P07-2007</url>
@@ -1218,14 +1218,14 @@
     <paper id="8">
       <title>zipf<fixed-case>R</fixed-case>: Word Frequency Modeling in <fixed-case>R</fixed-case></title>
       <author><first>Stefan</first><last>Evert</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <pages>29–32</pages>
       <url hash="2bec8818">P07-2008</url>
       <bibkey>evert-baroni-2007-zipfr</bibkey>
     </paper>
     <paper id="9">
       <title>Linguistically Motivated Large-Scale <fixed-case>NLP</fixed-case> with <fixed-case>C</fixed-case>&amp;<fixed-case>C</fixed-case> and Boxer</title>
-      <author><first>James</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James</first><last>Curran</last></author>
       <author><first>Stephen</first><last>Clark</last></author>
       <author><first>Johan</first><last>Bos</last></author>
       <pages>33–36</pages>
@@ -1236,17 +1236,17 @@
       <title>Don’t worry about metaphor: affect detection for conversational agents</title>
       <author><first>Catherine</first><last>Smith</last></author>
       <author><first>Timothy</first><last>Rumbell</last></author>
-      <author><first>John</first><last>Barnden</last></author>
-      <author><first>Robert</first><last>Hendley</last></author>
-      <author><first>Mark</first><last>Lee</last></author>
-      <author><first>Alan</first><last>Wallington</last></author>
+      <author id="john-barnden"><first>John</first><last>Barnden</last></author>
+      <author id="robert-j-hendley"><first>Robert</first><last>Hendley</last></author>
+      <author id="mark-lee"><first>Mark</first><last>Lee</last></author>
+      <author id="alan-wallington"><first>Alan</first><last>Wallington</last></author>
       <pages>37–40</pages>
       <url hash="163a73c7">P07-2010</url>
       <bibkey>smith-etal-2007-dont</bibkey>
     </paper>
     <paper id="11">
       <title>An efficient algorithm for building a distributional thesaurus (and other <fixed-case>S</fixed-case>ketch <fixed-case>E</fixed-case>ngine developments)</title>
-      <author><first>Pavel</first><last>Rychlý</last></author>
+      <author id="pavel-rychly"><first>Pavel</first><last>Rychlý</last></author>
       <author><first>Adam</first><last>Kilgarriff</last></author>
       <pages>41–44</pages>
       <url hash="0ca1441b">P07-2011</url>
@@ -1254,15 +1254,15 @@
     </paper>
     <paper id="12">
       <title>Semantic enrichment of journal articles using chemical named entity recognition</title>
-      <author><first>Colin R.</first><last>Batchelor</last></author>
-      <author><first>Peter T.</first><last>Corbett</last></author>
+      <author id="colin-batchelor"><first>Colin R.</first><last>Batchelor</last></author>
+      <author id="peter-corbett"><first>Peter T.</first><last>Corbett</last></author>
       <pages>45–48</pages>
       <url hash="a1be1fe3">P07-2012</url>
       <bibkey>batchelor-corbett-2007-semantic</bibkey>
     </paper>
     <paper id="13">
       <title>An <fixed-case>API</fixed-case> for Measuring the Relatedness of Words in <fixed-case>W</fixed-case>ikipedia</title>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <author><first>Michael</first><last>Strube</last></author>
       <pages>49–52</pages>
       <url hash="3bab0ef9">P07-2013</url>
@@ -1277,9 +1277,9 @@
     </paper>
     <paper id="15">
       <title>Support Vector Machines for Query-focused Summarization trained and evaluated on Pyramid data</title>
-      <author><first>Maria</first><last>Fuentes</last></author>
+      <author id="maria-fuentes"><first>Maria</first><last>Fuentes</last></author>
       <author><first>Enrique</first><last>Alfonseca</last></author>
-      <author><first>Horacio</first><last>Rodríguez</last></author>
+      <author id="horacio-rodriguez"><first>Horacio</first><last>Rodríguez</last></author>
       <pages>57–60</pages>
       <url hash="40246ebc">P07-2015</url>
       <bibkey>fuentes-etal-2007-support</bibkey>
@@ -1287,8 +1287,8 @@
     <paper id="16">
       <title>A Joint Statistical Model for Simultaneous Word Spacing and Spelling Error Correction for <fixed-case>K</fixed-case>orean</title>
       <author><first>Hyungjong</first><last>Noh</last></author>
-      <author><first>Jeong-Won</first><last>Cha</last></author>
-      <author><first>Gary Geunbae</first><last>Lee</last></author>
+      <author id="jeong-won-cha"><first>Jeong-Won</first><last>Cha</last></author>
+      <author id="gary-geunbae-lee"><first>Gary Geunbae</first><last>Lee</last></author>
       <pages>61–64</pages>
       <url hash="b08d0ac4">P07-2016</url>
       <bibkey>noh-etal-2007-joint</bibkey>
@@ -1306,8 +1306,8 @@
       <title>Rethinking <fixed-case>C</fixed-case>hinese Word Segmentation: Tokenization, Character Classification, or Wordbreak Identification</title>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <author><first>Petr</first><last>Šimon</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
-      <author><first>Laurent</first><last>Prévot</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="laurent-prevot"><first>Laurent</first><last>Prévot</last></author>
       <pages>69–72</pages>
       <url hash="61777de0">P07-2018</url>
       <bibkey>huang-etal-2007-rethinking</bibkey>
@@ -1316,7 +1316,7 @@
       <title>A Feature Based Approach to Leveraging Context for Classifying Newsgroup Style Discussion Segments</title>
       <author><first>Yi-Chia</first><last>Wang</last></author>
       <author><first>Mahesh</first><last>Joshi</last></author>
-      <author><first>Carolyn</first><last>Rosé</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rosé</last></author>
       <pages>73–76</pages>
       <url hash="32c7b9c3">P07-2019</url>
       <bibkey>wang-etal-2007-feature</bibkey>
@@ -1355,7 +1355,7 @@
     </paper>
     <paper id="24">
       <title>Generating Usable Formats for Metadata and Annotations in a Large Meeting Corpus</title>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <author><first>Paula</first><last>Estrella</last></author>
       <pages>93–96</pages>
       <url hash="9a21dfe7">P07-2024</url>
@@ -1375,7 +1375,7 @@
       <title>Minimum <fixed-case>B</fixed-case>ayes Risk Decoding for <fixed-case>BLEU</fixed-case></title>
       <author><first>Nicola</first><last>Ehling</last></author>
       <author><first>Richard</first><last>Zens</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>101–104</pages>
       <url hash="dcee3a08">P07-2026</url>
       <bibkey>ehling-etal-2007-minimum</bibkey>
@@ -1384,25 +1384,25 @@
       <title>Disambiguating Between Generic and Referential “You” in Dialog</title>
       <author><first>Surabhi</first><last>Gupta</last></author>
       <author><first>Matthew</first><last>Purver</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <pages>105–108</pages>
       <url hash="82abd99e">P07-2027</url>
       <bibkey>gupta-etal-2007-disambiguating</bibkey>
     </paper>
     <paper id="28">
       <title>On the formalization of Invariant Mappings for Metaphor Interpretation</title>
-      <author><first>Rodrigo</first><last>Agerri</last></author>
-      <author><first>John</first><last>Barnden</last></author>
-      <author><first>Mark</first><last>Lee</last></author>
-      <author><first>Alan</first><last>Wallington</last></author>
+      <author id="rodrigo-agerri"><first>Rodrigo</first><last>Agerri</last></author>
+      <author id="john-barnden"><first>John</first><last>Barnden</last></author>
+      <author id="mark-lee"><first>Mark</first><last>Lee</last></author>
+      <author id="alan-wallington"><first>Alan</first><last>Wallington</last></author>
       <pages>109–112</pages>
       <url hash="e329279f">P07-2028</url>
       <bibkey>agerri-etal-2007-formalization</bibkey>
     </paper>
     <paper id="29">
       <title>Real-Time Correction of Closed-Captions</title>
-      <author><first>Patrick</first><last>Cardinal</last></author>
-      <author><first>Gilles</first><last>Boulianne</last></author>
+      <author id="patrick-cardinal"><first>Patrick</first><last>Cardinal</last></author>
+      <author id="gilles-boulianne"><first>Gilles</first><last>Boulianne</last></author>
       <author><first>Michel</first><last>Comeau</last></author>
       <author><first>Maryse</first><last>Boisvert</last></author>
       <pages>113–116</pages>
@@ -1420,7 +1420,7 @@
     </paper>
     <paper id="31">
       <title>Predicting Evidence of Understanding by Monitoring User’s Task Manipulation in Multimodal Conversations</title>
-      <author><first>Yukiko</first><last>Nakano</last></author>
+      <author id="yukiko-i-nakano"><first>Yukiko</first><last>Nakano</last></author>
       <author><first>Kazuyoshi</first><last>Murata</last></author>
       <author><first>Mika</first><last>Enomoto</last></author>
       <author><first>Yoshiko</first><last>Arimoto</last></author>
@@ -1473,7 +1473,7 @@
     </paper>
     <paper id="37">
       <title>A Linguistic Service Ontology for Language Infrastructures</title>
-      <author><first>Yoshihiko</first><last>Hayashi</last></author>
+      <author id="yoshihiko-hayashi"><first>Yoshihiko</first><last>Hayashi</last></author>
       <pages>145–148</pages>
       <url hash="76cb5a36">P07-2037</url>
       <bibkey>hayashi-2007-linguistic</bibkey>
@@ -1489,7 +1489,7 @@
       <title>Automatic Discovery of Named Entity Variants: Grammar-driven Approaches to Non-Alphabetical Transliterations</title>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <author><first>Petr</first><last>Šimon</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <pages>153–156</pages>
       <url hash="e2bb2051">P07-2039</url>
       <bibkey>huang-etal-2007-automatic</bibkey>
@@ -1516,23 +1516,23 @@
     </paper>
     <paper id="42">
       <title>Extracting Hypernym Pairs from the Web</title>
-      <author><first>Erik</first><last>Tjong Kim Sang</last></author>
+      <author id="erik-tjong-kim-sang"><first>Erik</first><last>Tjong Kim Sang</last></author>
       <pages>165–168</pages>
       <url hash="4252da67">P07-2042</url>
       <bibkey>tjong-kim-sang-2007-extracting</bibkey>
     </paper>
     <paper id="43">
       <title>An <fixed-case>OWL</fixed-case> Ontology for <fixed-case>HPSG</fixed-case></title>
-      <author><first>Graham</first><last>Wilcock</last></author>
+      <author id="graham-wilcock"><first>Graham</first><last>Wilcock</last></author>
       <pages>169–172</pages>
       <url hash="044c6d2a">P07-2043</url>
       <bibkey>wilcock-2007-owl</bibkey>
     </paper>
     <paper id="44">
       <title>Classifying Temporal Relations Between Events</title>
-      <author><first>Nathanael</first><last>Chambers</last></author>
+      <author id="nathanael-chambers"><first>Nathanael</first><last>Chambers</last></author>
       <author><first>Shan</first><last>Wang</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <pages>173–176</pages>
       <url hash="a1d8b3ba">P07-2044</url>
       <bibkey>chambers-etal-2007-classifying</bibkey>
@@ -1549,8 +1549,8 @@
       <author><first>Wade</first><last>Shen</last></author>
       <author><first>Christine</first><last>Moran</last></author>
       <author><first>Richard</first><last>Zens</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <author><first>Alexandra</first><last>Constantin</last></author>
       <author><first>Evan</first><last>Herbst</last></author>
       <pages>177–180</pages>
@@ -1560,7 +1560,7 @@
     <paper id="46">
       <title>Boosting Statistical Machine Translation by Lemmatization and Linear Interpolation</title>
       <author><first>Ruiqiang</first><last>Zhang</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>181–184</pages>
       <url hash="90121be0">P07-2046</url>
       <bibkey>zhang-sumita-2007-boosting</bibkey>
@@ -1570,15 +1570,15 @@
       <author><first>Maofu</first><last>Liu</last></author>
       <author><first>Wenjie</first><last>Li</last></author>
       <author><first>Mingli</first><last>Wu</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <pages>185–188</pages>
       <url hash="b25bf6ac">P07-2047</url>
       <bibkey>liu-etal-2007-extractive</bibkey>
     </paper>
     <paper id="48">
       <title>Machine Translation between <fixed-case>T</fixed-case>urkic Languages</title>
-      <author><first>Ahmet Cüneyd</first><last>Tantuğ</last></author>
-      <author><first>Eşref</first><last>Adali</last></author>
+      <author id="ahmet-cuneyd-tantug"><first>Ahmet Cüneyd</first><last>Tantuğ</last></author>
+      <author id="esref-adali"><first>Eşref</first><last>Adali</last></author>
       <author><first>Kemal</first><last>Oflazer</last></author>
       <pages>189–192</pages>
       <url hash="106c66f0">P07-2048</url>
@@ -1588,7 +1588,7 @@
       <title>Measuring Importance and Query Relevance in Topic-focused Multi-document Summarization</title>
       <author><first>Surabhi</first><last>Gupta</last></author>
       <author><first>Ani</first><last>Nenkova</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <pages>193–196</pages>
       <url hash="51b4736c">P07-2049</url>
       <bibkey>gupta-etal-2007-measuring</bibkey>
@@ -1598,7 +1598,7 @@
       <author><first>Masatoshi</first><last>Tsuchiya</last></author>
       <author><first>Ayu</first><last>Purwarianti</last></author>
       <author><first>Toshiyuki</first><last>Wakita</last></author>
-      <author><first>Seiichi</first><last>Nakagawa</last></author>
+      <author id="seiichi-nakagawa"><first>Seiichi</first><last>Nakagawa</last></author>
       <pages>197–200</pages>
       <url hash="7d5cda74">P07-2050</url>
       <bibkey>tsuchiya-etal-2007-expanding</bibkey>
@@ -1620,7 +1620,7 @@
     </paper>
     <paper id="53">
       <title>Poster paper: <fixed-case>H</fixed-case>un<fixed-case>P</fixed-case>os – an open source trigram tagger</title>
-      <author><first>Péter</first><last>Halácsy</last></author>
+      <author id="peter-halacsy"><first>Péter</first><last>Halácsy</last></author>
       <author><first>András</first><last>Kornai</last></author>
       <author><first>Csaba</first><last>Oravecz</last></author>
       <pages>209–212</pages>
@@ -1629,8 +1629,8 @@
     </paper>
     <paper id="54">
       <title>Extending <fixed-case>MARIE</fixed-case>: an N-gram-based <fixed-case>SMT</fixed-case> decoder</title>
-      <author><first>Josep M.</first><last>Crego</last></author>
-      <author><first>José B.</first><last>Mariño</last></author>
+      <author id="josep-m-crego"><first>Josep M.</first><last>Crego</last></author>
+      <author id="jose-b-marino"><first>José B.</first><last>Mariño</last></author>
       <pages>213–216</pages>
       <url hash="8018fca3">P07-2054</url>
       <bibkey>crego-marino-2007-extending</bibkey>
@@ -1645,7 +1645,7 @@
     </paper>
     <paper id="56">
       <title>Automatic Part-of-Speech Tagging for <fixed-case>B</fixed-case>engali: An Approach for Morphologically Rich Languages in a Poor Resource Scenario</title>
-      <author><first>Sandipan</first><last>Dandapat</last></author>
+      <author id="sandipan-dandapat"><first>Sandipan</first><last>Dandapat</last></author>
       <author><first>Sudeshna</first><last>Sarkar</last></author>
       <author><first>Anupam</first><last>Basu</last></author>
       <pages>221–224</pages>
@@ -1656,7 +1656,7 @@
       <title><fixed-case>J</fixed-case>apanese Dependency Parsing Using Sequential Labeling for Semi-spoken Language</title>
       <author><first>Kenji</first><last>Imamura</last></author>
       <author><first>Genichiro</first><last>Kikui</last></author>
-      <author><first>Norihito</first><last>Yasuda</last></author>
+      <author id="norihito-yasuda"><first>Norihito</first><last>Yasuda</last></author>
       <pages>225–228</pages>
       <url hash="d5c896e2">P07-2057</url>
       <bibkey>imamura-etal-2007-japanese</bibkey>
@@ -1666,9 +1666,9 @@
     <meta>
       <booktitle>Proceedings of the <fixed-case>ACL</fixed-case> 2007 Student Research Workshop</booktitle>
       <url hash="bdc22e23">P07-3</url>
-      <editor><first>Chris</first><last>Biemann</last></editor>
+      <editor id="chris-biemann"><first>Chris</first><last>Biemann</last></editor>
       <editor><first>Violeta</first><last>Seretan</last></editor>
-      <editor><first>Ellen</first><last>Riloff</last></editor>
+      <editor id="ellen-riloff"><first>Ellen</first><last>Riloff</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Prague, Czech Republic</address>
       <month>June</month>
diff --git a/data/xml/P08.xml b/data/xml/P08.xml
index 81d3d3cae8..d5a2801ee3 100644
--- a/data/xml/P08.xml
+++ b/data/xml/P08.xml
@@ -4,10 +4,10 @@
     <meta>
       <booktitle>Proceedings of ACL-08: HLT</booktitle>
       <url hash="b9197d07">P08-1</url>
-      <editor><first>Johanna D.</first><last>Moore</last></editor>
+      <editor id="johanna-d-moore"><first>Johanna D.</first><last>Moore</last></editor>
       <editor><first>Simone</first><last>Teufel</last></editor>
-      <editor><first>James</first><last>Allan</last></editor>
-      <editor><first>Sadaoki</first><last>Furui</last></editor>
+      <editor id="james-allan"><first>James</first><last>Allan</last></editor>
+      <editor id="sadaoki-furui"><first>Sadaoki</first><last>Furui</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Columbus, Ohio</address>
       <month>June</month>
@@ -37,7 +37,7 @@
     </paper>
     <paper id="3">
       <title>Weakly-Supervised Acquisition of Open-Domain Classes and Class Attributes from Web Documents and Query Logs</title>
-      <author><first>Marius</first><last>Paşca</last></author>
+      <author id="marius-pasca"><first>Marius</first><last>Paşca</last></author>
       <author><first>Benjamin</first><last>Van Durme</last></author>
       <pages>19–27</pages>
       <url hash="105a4f10">P08-1003</url>
@@ -61,10 +61,10 @@
     <paper id="6">
       <title>Task-oriented Evaluation of Syntactic Parsers and Their Representations</title>
       <author><first>Yusuke</first><last>Miyao</last></author>
-      <author><first>Rune</first><last>Sætre</last></author>
+      <author id="rune-saetre"><first>Rune</first><last>Sætre</last></author>
       <author><first>Kenji</first><last>Sagae</last></author>
       <author><first>Takuya</first><last>Matsuzaki</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>46–54</pages>
       <url hash="b675cdc3">P08-1006</url>
       <bibkey>miyao-etal-2008-task</bibkey>
@@ -79,7 +79,7 @@
     </paper>
     <paper id="8">
       <title>Contradictions and Justifications: Extensions to the Textual Entailment Task</title>
-      <author><first>Ellen M.</first><last>Voorhees</last></author>
+      <author id="ellen-m-voorhees"><first>Ellen M.</first><last>Voorhees</last></author>
       <pages>63–71</pages>
       <url hash="27f14d18">P08-1008</url>
       <bibkey>voorhees-2008-contradictions</bibkey>
@@ -95,7 +95,7 @@
       <title>Phrase Table Training for Precision and Recall: What Makes a Good Phrase and a Good Phrase Pair?</title>
       <author><first>Yonggang</first><last>Deng</last></author>
       <author><first>Jia</first><last>Xu</last></author>
-      <author><first>Yuqing</first><last>Gao</last></author>
+      <author id="yuqing-guo"><first>Yuqing</first><last>Gao</last></author>
       <pages>81–88</pages>
       <url hash="f800bec9">P08-1010</url>
       <bibkey>deng-etal-2008-phrase</bibkey>
@@ -115,7 +115,7 @@
       <title><fixed-case>B</fixed-case>ayesian Learning of Non-Compositional Phrases with Synchronous Parsing</title>
       <author><first>Hao</first><last>Zhang</last></author>
       <author><first>Chris</first><last>Quirk</last></author>
-      <author><first>Robert C.</first><last>Moore</last></author>
+      <author id="robert-c-moore"><first>Robert C.</first><last>Moore</last></author>
       <author><first>Daniel</first><last>Gildea</last></author>
       <pages>97–105</pages>
       <url hash="fff2ce03">P08-1012</url>
@@ -142,7 +142,7 @@
     <paper id="15">
       <title>Grounded Language Modeling for Automatic Speech Recognition of Sports Video</title>
       <author><first>Michael</first><last>Fleischman</last></author>
-      <author><first>Deb</first><last>Roy</last></author>
+      <author id="deb-roy"><first>Deb</first><last>Roy</last></author>
       <pages>121–129</pages>
       <url hash="e33cd812">P08-1015</url>
       <bibkey>fleischman-roy-2008-grounded</bibkey>
@@ -165,7 +165,7 @@
       <title>Selecting Query Term Alternations for Web Search by Exploiting Query Contexts</title>
       <author><first>Guihong</first><last>Cao</last></author>
       <author><first>Stephen</first><last>Robertson</last></author>
-      <author><first>Jian-Yun</first><last>Nie</last></author>
+      <author id="jian-yun-nie"><first>Jian-Yun</first><last>Nie</last></author>
       <pages>148–155</pages>
       <url hash="63f87fc2">P08-1018</url>
       <bibkey>cao-etal-2008-selecting</bibkey>
@@ -174,7 +174,7 @@
       <title>Searching Questions by Identifying Question Topic and Question Focus</title>
       <author><first>Huizhong</first><last>Duan</last></author>
       <author><first>Yunbo</first><last>Cao</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <author><first>Yong</first><last>Yu</last></author>
       <pages>156–164</pages>
       <url hash="7c07ab24">P08-1019</url>
@@ -182,16 +182,16 @@
     </paper>
     <paper id="20">
       <title>Trainable Generation of Big-Five Personality Styles through Data-Driven Parameter Estimation</title>
-      <author><first>François</first><last>Mairesse</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="francois-mairesse"><first>François</first><last>Mairesse</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <pages>165–173</pages>
       <url hash="05e6b875">P08-1020</url>
       <bibkey>mairesse-walker-2008-trainable</bibkey>
     </paper>
     <paper id="21">
       <title>Correcting Misuse of Verb Forms</title>
-      <author><first>John</first><last>Lee</last></author>
-      <author><first>Stephanie</first><last>Seneff</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
+      <author id="stephanie-seneff"><first>Stephanie</first><last>Seneff</last></author>
       <pages>174–182</pages>
       <url hash="1d65830c">P08-1021</url>
       <bibkey>lee-seneff-2008-correcting</bibkey>
@@ -199,8 +199,8 @@
     <paper id="22">
       <title><fixed-case>H</fixed-case>ypertagging: Supertagging for Surface Realization with <fixed-case>CCG</fixed-case></title>
       <author><first>Dominic</first><last>Espinosa</last></author>
-      <author><first>Michael</first><last>White</last></author>
-      <author><first>Dennis</first><last>Mehay</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
+      <author id="dennis-mehay"><first>Dennis</first><last>Mehay</last></author>
       <pages>183–191</pages>
       <url hash="63712d5a">P08-1022</url>
       <bibkey>espinosa-etal-2008-hypertagging</bibkey>
@@ -216,8 +216,8 @@
     </paper>
     <paper id="24">
       <title>A Discriminative Latent Variable Model for Statistical Machine Translation</title>
-      <author><first>Phil</first><last>Blunsom</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <author><first>Miles</first><last>Osborne</last></author>
       <pages>200–208</pages>
       <url hash="5880daa9">P08-1024</url>
@@ -260,7 +260,7 @@
       <title>Exploiting Feature Hierarchy for Transfer Learning in Named Entity Recognition</title>
       <author><first>Andrew</first><last>Arnold</last></author>
       <author><first>Ramesh</first><last>Nallapati</last></author>
-      <author><first>William W.</first><last>Cohen</last></author>
+      <author id="william-cohen"><first>William W.</first><last>Cohen</last></author>
       <pages>245–253</pages>
       <url hash="65160f3f">P08-1029</url>
       <bibkey>arnold-etal-2008-exploiting</bibkey>
@@ -268,14 +268,14 @@
     <paper id="30">
       <title>Refining Event Extraction through Cross-Document Inference</title>
       <author><first>Heng</first><last>Ji</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <pages>254–262</pages>
       <url hash="759b45c0">P08-1030</url>
       <bibkey>ji-grishman-2008-refining</bibkey>
     </paper>
     <paper id="31">
       <title>Learning Document-Level Semantic Properties from Free-Text Annotations</title>
-      <author><first>S.R.K.</first><last>Branavan</last></author>
+      <author id="s-r-k-branavan"><first>S.R.K.</first><last>Branavan</last></author>
       <author><first>Harr</first><last>Chen</last></author>
       <author><first>Jacob</first><last>Eisenstein</last></author>
       <author><first>Regina</first><last>Barzilay</last></author>
@@ -323,16 +323,16 @@
     </paper>
     <paper id="37">
       <title>Improving Parsing and <fixed-case>PP</fixed-case> Attachment Performance with Sense Information</title>
-      <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
-      <author><first>David</first><last>Martinez</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
+      <author id="david-martinez"><first>David</first><last>Martinez</last></author>
       <pages>317–325</pages>
       <url hash="f4369d4a">P08-1037</url>
       <bibkey>agirre-etal-2008-improving</bibkey>
     </paper>
     <paper id="38">
       <title>A Logical Basis for the <fixed-case>D</fixed-case> Combinator and Normal Form in <fixed-case>CCG</fixed-case></title>
-      <author><first>Frederick</first><last>Hoyt</last></author>
+      <author id="frederick-m-hoyt"><first>Frederick</first><last>Hoyt</last></author>
       <author><first>Jason</first><last>Baldridge</last></author>
       <pages>326–334</pages>
       <url hash="dc185918">P08-1038</url>
@@ -341,7 +341,7 @@
     <paper id="39">
       <title>Parsing Noun Phrase Structure with <fixed-case>CCG</fixed-case></title>
       <author><first>David</first><last>Vadas</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <pages>335–343</pages>
       <url hash="990e4db4">P08-1039</url>
       <bibkey>vadas-curran-2008-parsing</bibkey>
@@ -357,7 +357,7 @@
     <paper id="41">
       <title>Summarizing Emails with Conversational Cohesion and Subjectivity</title>
       <author><first>Giuseppe</first><last>Carenini</last></author>
-      <author><first>Raymond T.</first><last>Ng</last></author>
+      <author id="raymond-ng"><first>Raymond T.</first><last>Ng</last></author>
       <author><first>Xiaodong</first><last>Zhou</last></author>
       <pages>353–361</pages>
       <url hash="ede123a7">P08-1041</url>
@@ -380,9 +380,9 @@
     </paper>
     <paper id="44">
       <title>Which Words Are Hard to Recognize? Prosodic, Lexical, and Disfluency Factors that Increase <fixed-case>ASR</fixed-case> Error Rates</title>
-      <author><first>Sharon</first><last>Goldwater</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>380–388</pages>
       <url hash="9008f082">P08-1044</url>
       <bibkey>goldwater-etal-2008-words</bibkey>
@@ -391,7 +391,7 @@
       <title>Name Translation in Statistical Machine Translation - Learning When to Transliterate</title>
       <author><first>Ulf</first><last>Hermjakob</last></author>
       <author><first>Kevin</first><last>Knight</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <pages>389–397</pages>
       <url hash="b2c30b8f">P08-1045</url>
       <bibkey>hermjakob-etal-2008-name</bibkey>
@@ -405,7 +405,7 @@
     </paper>
     <paper id="47">
       <title>Inducing Gazetteers for Named Entity Recognition by Large-Scale Clustering of Dependency Relations</title>
-      <author><first>Jun’ichi</first><last>Kazama</last></author>
+      <author id="junichi-kazama"><first>Jun’ichi</first><last>Kazama</last></author>
       <author><first>Kentaro</first><last>Torisawa</last></author>
       <pages>407–415</pages>
       <url hash="aa85a859">P08-1047</url>
@@ -414,7 +414,7 @@
     <paper id="48">
       <title>Evaluating <fixed-case>R</fixed-case>oget‘s Thesauri</title>
       <author><first>Alistair</first><last>Kennedy</last></author>
-      <author><first>Stan</first><last>Szpakowicz</last></author>
+      <author id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></author>
       <pages>416–424</pages>
       <url hash="6bbe1d0c">P08-1048</url>
       <bibkey>kennedy-szpakowicz-2008-evaluating</bibkey>
@@ -437,7 +437,7 @@
     </paper>
     <paper id="51">
       <title>Collecting a Why-Question Corpus for Development and Evaluation of an Automatic <fixed-case>QA</fixed-case>-System</title>
-      <author><first>Joanna</first><last>Mrozinski</last></author>
+      <author id="joanna-mrozinski"><first>Joanna</first><last>Mrozinski</last></author>
       <author><first>Edward</first><last>Whittaker</last></author>
       <author><first>Sadaoki</first><last>Furui</last></author>
       <pages>443–451</pages>
@@ -446,8 +446,8 @@
     </paper>
     <paper id="52">
       <title>Solving Relational Similarity Problems Using the Web as a Corpus</title>
-      <author><first>Preslav</first><last>Nakov</last></author>
-      <author><first>Marti A.</first><last>Hearst</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
+      <author id="marti-a-hearst"><first>Marti A.</first><last>Hearst</last></author>
       <pages>452–460</pages>
       <url hash="784b335b">P08-1052</url>
       <bibkey>nakov-hearst-2008-solving</bibkey>
@@ -455,7 +455,7 @@
     <paper id="53">
       <title>Combining Speech Retrieval Results with Generalized Additive Models</title>
       <author><first>J. Scott</first><last>Olsson</last></author>
-      <author><first>Douglas W.</first><last>Oard</last></author>
+      <author id="douglas-w-oard"><first>Douglas W.</first><last>Oard</last></author>
       <pages>461–469</pages>
       <url hash="e09d2722">P08-1053</url>
       <bibkey>olsson-oard-2008-combining</bibkey>
@@ -470,8 +470,8 @@
     </paper>
     <paper id="55">
       <title>Intensional Summaries as Cooperative Responses in Dialogue: Automation and Evaluation</title>
-      <author><first>Joseph</first><last>Polifroni</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="joseph-polifroni"><first>Joseph</first><last>Polifroni</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <pages>479–487</pages>
       <url hash="f1832654">P08-1055</url>
       <bibkey>polifroni-walker-2008-intensional</bibkey>
@@ -487,7 +487,7 @@
     </paper>
     <paper id="57">
       <title>Combining <fixed-case>EM</fixed-case> Training and the <fixed-case>MDL</fixed-case> Principle for an Automatic Verb Classification Incorporating Selectional Preferences</title>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <author><first>Christian</first><last>Hying</last></author>
       <author><first>Christian</first><last>Scheible</last></author>
       <author><first>Helmut</first><last>Schmid</last></author>
@@ -544,8 +544,8 @@
     <paper id="63">
       <title>Robustness and Generalization of Role Sets: <fixed-case>P</fixed-case>rop<fixed-case>B</fixed-case>ank vs. <fixed-case>V</fixed-case>erb<fixed-case>N</fixed-case>et</title>
       <author><first>Beñat</first><last>Zapirain</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <pages>550–558</pages>
       <url hash="5f1c3f63">P08-1063</url>
       <bibkey>zapirain-etal-2008-robustness</bibkey>
@@ -554,9 +554,9 @@
       <title>A Tree Sequence Alignment-based Tree-to-Tree Translation Model</title>
       <author><first>Min</first><last>Zhang</last></author>
       <author><first>Hongfei</first><last>Jiang</last></author>
-      <author><first>Aiti</first><last>Aw</last></author>
+      <author id="aiti-aw"><first>Aiti</first><last>Aw</last></author>
       <author><first>Haizhou</first><last>Li</last></author>
-      <author><first>Chew Lim</first><last>Tan</last></author>
+      <author id="chew-lim-tan"><first>Chew Lim</first><last>Tan</last></author>
       <author><first>Sheng</first><last>Li</last></author>
       <pages>559–567</pages>
       <url hash="b9c327ce">P08-1064</url>
@@ -575,7 +575,7 @@
       <title>A New String-to-Dependency Machine Translation Algorithm with a Target Dependency Language Model</title>
       <author><first>Libin</first><last>Shen</last></author>
       <author><first>Jinxi</first><last>Xu</last></author>
-      <author><first>Ralph</first><last>Weischedel</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
       <pages>577–585</pages>
       <url hash="27e80b27">P08-1066</url>
       <bibkey>shen-etal-2008-new</bibkey>
@@ -591,7 +591,7 @@
       <title>Simple Semi-supervised Dependency Parsing</title>
       <author><first>Terry</first><last>Koo</last></author>
       <author><first>Xavier</first><last>Carreras</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <pages>595–603</pages>
       <url hash="726401f9">P08-1068</url>
       <bibkey>koo-etal-2008-simple</bibkey>
@@ -600,7 +600,7 @@
       <title>Optimal <tex-math>k</tex-math>-arization of Synchronous <fixed-case>T</fixed-case>ree-<fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammar</title>
       <author><first>Rebecca</first><last>Nesson</last></author>
       <author><first>Giorgio</first><last>Satta</last></author>
-      <author><first>Stuart M.</first><last>Shieber</last></author>
+      <author id="stuart-m-shieber"><first>Stuart M.</first><last>Shieber</last></author>
       <pages>604–612</pages>
       <url hash="31543bb3">P08-1069</url>
       <bibkey>nesson-etal-2008-optimal</bibkey>
@@ -617,7 +617,7 @@
     <paper id="71">
       <title>Assessing Dialog System User Simulation Evaluation Measures Using Human Judges</title>
       <author><first>Hua</first><last>Ai</last></author>
-      <author><first>Diane J.</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane J.</first><last>Litman</last></author>
       <pages>622–629</pages>
       <url hash="ee79880f">P08-1071</url>
       <bibkey>ai-litman-2008-assessing</bibkey>
@@ -625,8 +625,8 @@
     <paper id="72">
       <title>Robust Dialog Management with N-Best Hypotheses Using Dialog Examples and Agenda</title>
       <author><first>Cheongjae</first><last>Lee</last></author>
-      <author><first>Sangkeun</first><last>Jung</last></author>
-      <author><first>Gary Geunbae</first><last>Lee</last></author>
+      <author id="sangkeun-jung"><first>Sangkeun</first><last>Jung</last></author>
+      <author id="gary-geunbae-lee"><first>Gary Geunbae</first><last>Lee</last></author>
       <pages>630–637</pages>
       <url hash="22ebf55b">P08-1072</url>
       <bibkey>lee-etal-2008-robust</bibkey>
@@ -641,8 +641,8 @@
     </paper>
     <paper id="74">
       <title>Phrase Chunking Using Entropy Guided Transformation Learning</title>
-      <author><first>Ruy Luiz</first><last>Milidiú</last></author>
-      <author><first>Cícero Nogueira</first><last>dos Santos</last></author>
+      <author id="ruy-luiz-milidiu"><first>Ruy Luiz</first><last>Milidiú</last></author>
+      <author id="cicero-dos-santos"><first>Cícero Nogueira</first><last>dos Santos</last></author>
       <author><first>Julio C.</first><last>Duarte</last></author>
       <pages>647–655</pages>
       <url hash="ee4b8ea9">P08-1074</url>
@@ -650,8 +650,8 @@
     </paper>
     <paper id="75">
       <title>Learning Bigrams from Unigrams</title>
-      <author><first>Xiaojin</first><last>Zhu</last></author>
-      <author><first>Andrew B.</first><last>Goldberg</last></author>
+      <author id="xiaojin-zhu"><first>Xiaojin</first><last>Zhu</last></author>
+      <author id="andrew-b-goldberg"><first>Andrew B.</first><last>Goldberg</last></author>
       <author><first>Michael</first><last>Rabbat</last></author>
       <author><first>Robert</first><last>Nowak</last></author>
       <pages>656–664</pages>
@@ -694,9 +694,9 @@
     </paper>
     <paper id="80">
       <title>Improving Search Results Quality by Customizing Summary Lengths</title>
-      <author><first>Michael</first><last>Kaisser</last></author>
-      <author><first>Marti A.</first><last>Hearst</last></author>
-      <author><first>John B.</first><last>Lowe</last></author>
+      <author id="michael-kaisser"><first>Michael</first><last>Kaisser</last></author>
+      <author id="marti-a-hearst"><first>Marti A.</first><last>Hearst</last></author>
+      <author id="john-b-lowe"><first>John B.</first><last>Lowe</last></author>
       <pages>701–709</pages>
       <url hash="7d6871cf">P08-1080</url>
       <bibkey>kaisser-etal-2008-improving</bibkey>
@@ -705,7 +705,7 @@
       <title>Using Conditional Random Fields to Extract Contexts and Answers of Questions from Online Forums</title>
       <author><first>Shilin</first><last>Ding</last></author>
       <author><first>Gao</first><last>Cong</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <author><first>Xiaoyan</first><last>Zhu</last></author>
       <pages>710–718</pages>
       <url hash="9bed3649">P08-1081</url>
@@ -785,15 +785,15 @@
     </paper>
     <paper id="90">
       <title>Unsupervised Learning of Narrative Event Chains</title>
-      <author><first>Nathanael</first><last>Chambers</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="nathanael-chambers"><first>Nathanael</first><last>Chambers</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <pages>789–797</pages>
       <url hash="883d178c">P08-1090</url>
       <bibkey>chambers-jurafsky-2008-unsupervised</bibkey>
     </paper>
     <paper id="91">
       <title>Semantic Role Labeling Systems for <fixed-case>A</fixed-case>rabic using Kernel Methods</title>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <author><first>Alessandro</first><last>Moschitti</last></author>
       <author><first>Daniele</first><last>Pighin</last></author>
       <pages>798–806</pages>
@@ -803,7 +803,7 @@
     <paper id="92">
       <title>An Unsupervised Approach to Biography Production Using <fixed-case>W</fixed-case>ikipedia</title>
       <author><first>Fadi</first><last>Biadsy</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
       <author><first>Elena</first><last>Filatova</last></author>
       <pages>807–815</pages>
       <url hash="a6bf306a">P08-1092</url>
@@ -812,7 +812,7 @@
     <paper id="93">
       <title>Generating Impact-Based Summaries for Scientific Literature</title>
       <author><first>Qiaozhu</first><last>Mei</last></author>
-      <author><first>ChengXiang</first><last>Zhai</last></author>
+      <author id="chengxiang-zhai"><first>ChengXiang</first><last>Zhai</last></author>
       <pages>816–824</pages>
       <url hash="e17f4064">P08-1093</url>
       <bibkey>mei-zhai-2008-generating</bibkey>
@@ -838,7 +838,7 @@
       <author><first>Xiaofeng</first><last>Yang</last></author>
       <author><first>Jian</first><last>Su</last></author>
       <author><first>Jun</first><last>Lang</last></author>
-      <author><first>Chew Lim</first><last>Tan</last></author>
+      <author id="chew-lim-tan"><first>Chew Lim</first><last>Tan</last></author>
       <author><first>Ting</first><last>Liu</last></author>
       <author><first>Sheng</first><last>Li</last></author>
       <pages>843–851</pages>
@@ -866,7 +866,7 @@
     </paper>
     <paper id="99">
       <title>Generalized Expectation Criteria for Semi-Supervised Learning of Conditional Random Fields</title>
-      <author><first>Gideon S.</first><last>Mann</last></author>
+      <author id="gideon-mann"><first>Gideon S.</first><last>Mann</last></author>
       <author><first>Andrew</first><last>McCallum</last></author>
       <pages>870–878</pages>
       <url hash="6fe6ac34">P08-1099</url>
@@ -893,7 +893,7 @@
       <author><first>Wenbin</first><last>Jiang</last></author>
       <author><first>Liang</first><last>Huang</last></author>
       <author><first>Qun</first><last>Liu</last></author>
-      <author><first>Yajuan</first><last>Lü</last></author>
+      <author id="yajuan-lu"><first>Yajuan</first><last>Lü</last></author>
       <pages>897–904</pages>
       <url hash="44d27b2e">P08-1102</url>
       <bibkey>jiang-etal-2008-cascaded</bibkey>
@@ -922,15 +922,15 @@
     <paper id="105">
       <title>Credibility Improves Topical Blog Post Retrieval</title>
       <author><first>Wouter</first><last>Weerkamp</last></author>
-      <author><first>Maarten</first><last>de Rijke</last></author>
+      <author id="maarten-de-rijke"><first>Maarten</first><last>de Rijke</last></author>
       <pages>923–931</pages>
       <url hash="d74a8ddd">P08-1105</url>
       <bibkey>weerkamp-de-rijke-2008-credibility</bibkey>
     </paper>
     <paper id="106">
       <title>Linguistically Motivated Features for Enhanced Back-of-the-Book Indexing</title>
-      <author><first>Andras</first><last>Csomai</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="andras-csomai"><first>Andras</first><last>Csomai</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>932–940</pages>
       <url hash="f1cc5f5d">P08-1106</url>
       <bibkey>csomai-mihalcea-2008-linguistically</bibkey>
@@ -938,7 +938,7 @@
     <paper id="107">
       <title>Resolving Personal Names in Email Using Context Expansion</title>
       <author><first>Tamer</first><last>Elsayed</last></author>
-      <author><first>Douglas W.</first><last>Oard</last></author>
+      <author id="douglas-w-oard"><first>Douglas W.</first><last>Oard</last></author>
       <author><first>Galileo</first><last>Namata</last></author>
       <pages>941–949</pages>
       <url hash="d2377438">P08-1107</url>
@@ -954,9 +954,9 @@
     </paper>
     <paper id="109">
       <title>Efficient, Feature-based, Conditional Random Field Parsing</title>
-      <author><first>Jenny Rose</first><last>Finkel</last></author>
+      <author id="jenny-rose-finkel"><first>Jenny Rose</first><last>Finkel</last></author>
       <author><first>Alex</first><last>Kleeman</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>959–967</pages>
       <url hash="58da7ce1">P08-1109</url>
       <bibkey>finkel-etal-2008-efficient</bibkey>
@@ -964,15 +964,15 @@
     <paper id="110">
       <title>A Deductive Approach to Dependency Parsing</title>
       <author><first>Carlos</first><last>Gómez-Rodríguez</last></author>
-      <author><first>John</first><last>Carroll</last></author>
-      <author><first>David</first><last>Weir</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
+      <author id="david-weir"><first>David</first><last>Weir</last></author>
       <pages>968–976</pages>
       <url hash="74bfc6d5">P08-1110</url>
       <bibkey>gomez-rodriguez-etal-2008-deductive</bibkey>
     </paper>
     <paper id="111">
       <title>Evaluating a Crosslinguistic Grammar Resource: A Case Study of <fixed-case>W</fixed-case>ambaya</title>
-      <author><first>Emily M.</first><last>Bender</last></author>
+      <author id="emily-m-bender"><first>Emily M.</first><last>Bender</last></author>
       <pages>977–985</pages>
       <url hash="03deffc4">P08-1111</url>
       <bibkey>bender-2008-evaluating</bibkey>
@@ -980,7 +980,7 @@
     <paper id="112">
       <title>Better Alignments = Better Translations?</title>
       <author><first>Kuzman</first><last>Ganchev</last></author>
-      <author><first>João V.</first><last>Graça</last></author>
+      <author id="joao-graca"><first>João V.</first><last>Graça</last></author>
       <author><first>Ben</first><last>Taskar</last></author>
       <pages>986–993</pages>
       <url hash="02cc9298">P08-1112</url>
@@ -991,7 +991,7 @@
       <author><first>Dekang</first><last>Lin</last></author>
       <author><first>Shaojun</first><last>Zhao</last></author>
       <author><first>Benjamin</first><last>Van Durme</last></author>
-      <author><first>Marius</first><last>Paşca</last></author>
+      <author id="marius-pasca"><first>Marius</first><last>Paşca</last></author>
       <pages>994–1002</pages>
       <url hash="aa98c69b">P08-1113</url>
       <bibkey>lin-etal-2008-mining</bibkey>
@@ -1006,7 +1006,7 @@
     </paper>
     <paper id="115">
       <title>Generalizing Word Lattice Translation</title>
-      <author><first>Christopher</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Christopher</first><last>Dyer</last></author>
       <author><first>Smaranda</first><last>Muresan</last></author>
       <author><first>Philip</first><last>Resnik</last></author>
       <pages>1012–1020</pages>
@@ -1037,9 +1037,9 @@
     </paper>
     <paper id="118">
       <title>Finding Contradictions in Text</title>
-      <author><first>Marie-Catherine</first><last>de Marneffe</last></author>
-      <author><first>Anna N.</first><last>Rafferty</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="marie-catherine-de-marneffe"><first>Marie-Catherine</first><last>de Marneffe</last></author>
+      <author id="anna-n-rafferty"><first>Anna N.</first><last>Rafferty</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>1039–1047</pages>
       <url hash="9c1e95bb">P08-1118</url>
       <bibkey>de-marneffe-etal-2008-finding</bibkey>
@@ -1047,8 +1047,8 @@
     <paper id="119">
       <title>Semantic Class Learning from the Web with Hyponym Pattern Linkage Graphs</title>
       <author><first>Zornitsa</first><last>Kozareva</last></author>
-      <author><first>Ellen</first><last>Riloff</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>1048–1056</pages>
       <url hash="ebb02ab8">P08-1119</url>
       <bibkey>kozareva-etal-2008-semantic</bibkey>
@@ -1058,10 +1058,10 @@
     <meta>
       <booktitle>Proceedings of ACL-08: HLT, Short Papers</booktitle>
       <url hash="52d6a17c">P08-2</url>
-      <editor><first>Johanna D.</first><last>Moore</last></editor>
+      <editor id="johanna-d-moore"><first>Johanna D.</first><last>Moore</last></editor>
       <editor><first>Simone</first><last>Teufel</last></editor>
-      <editor><first>James</first><last>Allan</last></editor>
-      <editor><first>Sadaoki</first><last>Furui</last></editor>
+      <editor id="james-allan"><first>James</first><last>Allan</last></editor>
+      <editor id="sadaoki-furui"><first>Sadaoki</first><last>Furui</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Columbus, Ohio</address>
       <month>June</month>
@@ -1075,7 +1075,7 @@
     <paper id="1">
       <title>Language Dynamics and Capitalization using Maximum Entropy</title>
       <author><first>Fernando</first><last>Batista</last></author>
-      <author><first>Nuno</first><last>Mamede</last></author>
+      <author id="nuno-mamede"><first>Nuno</first><last>Mamede</last></author>
       <author><first>Isabel</first><last>Trancoso</last></author>
       <pages>1–4</pages>
       <url hash="9bb7a5d7">P08-2001</url>
@@ -1084,7 +1084,7 @@
     <paper id="2">
       <title>Surprising Parser Actions and Reading Difficulty</title>
       <author><first>Marisa Ferrara</first><last>Boston</last></author>
-      <author><first>John T.</first><last>Hale</last></author>
+      <author id="john-hale"><first>John T.</first><last>Hale</last></author>
       <author><first>Reinhold</first><last>Kliegl</last></author>
       <author><first>Shravan</first><last>Vasishth</last></author>
       <pages>5–8</pages>
@@ -1094,7 +1094,7 @@
     <paper id="3">
       <title>Improving the Performance of the Random Walk Model for Answering Complex Questions</title>
       <author><first>Yllias</first><last>Chali</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <pages>9–12</pages>
       <url hash="3ea6a2ac">P08-2003</url>
       <bibkey>chali-joty-2008-improving</bibkey>
@@ -1109,8 +1109,8 @@
     <paper id="5">
       <title>Extractive Summaries for Educational Science Content</title>
       <author><first>Sebastian</first><last>de la Chica</last></author>
-      <author><first>Faisal</first><last>Ahmad</last></author>
-      <author><first>James H.</first><last>Martin</last></author>
+      <author id="faisal-ahmad"><first>Faisal</first><last>Ahmad</last></author>
+      <author id="james-h-martin"><first>James H.</first><last>Martin</last></author>
       <author><first>Tamara</first><last>Sumner</last></author>
       <pages>17–20</pages>
       <url hash="1b088426">P08-2005</url>
@@ -1135,7 +1135,7 @@
     <paper id="8">
       <title>Novel Semantic Features for Verb Sense Disambiguation</title>
       <author><first>Dmitriy</first><last>Dligach</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>29–32</pages>
       <url hash="a31475f8">P08-2008</url>
       <bibkey>dligach-palmer-2008-novel</bibkey>
@@ -1143,7 +1143,7 @@
     <paper id="9">
       <title><fixed-case>I</fixed-case>celandic Data Driven Part of Speech Tagging</title>
       <author><first>Mark</first><last>Dredze</last></author>
-      <author><first>Joel</first><last>Wallenberg</last></author>
+      <author id="joel-wallenberg"><first>Joel</first><last>Wallenberg</last></author>
       <pages>33–36</pages>
       <url hash="eed7ae02">P08-2009</url>
       <bibkey>dredze-wallenberg-2008-icelandic</bibkey>
@@ -1166,8 +1166,8 @@
     </paper>
     <paper id="12">
       <title>Enforcing Transitivity in Coreference Resolution</title>
-      <author><first>Jenny Rose</first><last>Finkel</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="jenny-rose-finkel"><first>Jenny Rose</first><last>Finkel</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>45–48</pages>
       <url hash="3c5f6976">P08-2012</url>
       <bibkey>finkel-manning-2008-enforcing</bibkey>
@@ -1198,7 +1198,7 @@
     </paper>
     <paper id="16">
       <title>Combined One Sense Disambiguation of Abbreviations</title>
-      <author><first>Yaakov</first><last>HaCohen-Kerner</last></author>
+      <author id="yaakov-hacohen-kerner"><first>Yaakov</first><last>HaCohen-Kerner</last></author>
       <author><first>Ariel</first><last>Kass</last></author>
       <author><first>Ariel</first><last>Peretz</last></author>
       <pages>61–64</pages>
@@ -1208,7 +1208,7 @@
     <paper id="17">
       <title>Assessing the Costs of Sampling Methods in Active Learning for Annotation</title>
       <author><first>Robbie</first><last>Haertel</last></author>
-      <author><first>Eric</first><last>Ringger</last></author>
+      <author id="eric-ringger"><first>Eric</first><last>Ringger</last></author>
       <author><first>Kevin</first><last>Seppi</last></author>
       <author><first>James</first><last>Carroll</last></author>
       <author><first>Peter</first><last>McClanahan</last></author>
@@ -1226,7 +1226,7 @@
     </paper>
     <paper id="19">
       <title>Mixture Model <fixed-case>POMDP</fixed-case>s for Efficient Handling of Uncertainty in Dialogue Management</title>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <author><first>Oliver</first><last>Lemon</last></author>
       <pages>73–76</pages>
       <url hash="a4c55011">P08-2019</url>
@@ -1234,13 +1234,13 @@
     </paper>
     <paper id="20">
       <title>Recent Improvements in the <fixed-case>CMU</fixed-case> Large Scale <fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish <fixed-case>SMT</fixed-case> System</title>
-      <author><first>Almut Silja</first><last>Hildebrand</last></author>
+      <author id="almut-silja-hildebrand"><first>Almut Silja</first><last>Hildebrand</last></author>
       <author><first>Kay</first><last>Rottmann</last></author>
       <author><first>Mohamed</first><last>Noamany</last></author>
       <author><first>Quin</first><last>Gao</last></author>
       <author><first>Sanjika</first><last>Hewavitharana</last></author>
       <author><first>Nguyen</first><last>Bach</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>77–80</pages>
       <url hash="134da80b">P08-2020</url>
       <bibkey>hildebrand-etal-2008-recent</bibkey>
@@ -1248,8 +1248,8 @@
     <paper id="21">
       <title>Machine Translation System Combination using <fixed-case>ITG</fixed-case>-based Alignments</title>
       <author><first>Damianos</first><last>Karakos</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
-      <author><first>Sanjeev</first><last>Khudanpur</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
+      <author id="sanjeev-khudanpur"><first>Sanjeev</first><last>Khudanpur</last></author>
       <author><first>Markus</first><last>Dreyer</last></author>
       <pages>81–84</pages>
       <url hash="b78286f5">P08-2021</url>
@@ -1269,7 +1269,7 @@
       <author><first>Peng</first><last>Zhang</last></author>
       <author><first>Furu</first><last>Wei</last></author>
       <author><first>Yuexian</first><last>Hou</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <pages>89–92</pages>
       <url hash="30758b3e">P08-2023</url>
       <bibkey>li-etal-2008-novel</bibkey>
@@ -1300,7 +1300,7 @@
     </paper>
     <paper id="27">
       <title>A Unified Syntactic Model for Parsing Fluent and Disfluent Speech</title>
-      <author><first>Tim</first><last>Miller</last></author>
+      <author id="timothy-miller"><first>Tim</first><last>Miller</last></author>
       <author><first>William</first><last>Schuler</last></author>
       <pages>105–108</pages>
       <url hash="baf053e2">P08-2027</url>
@@ -1309,7 +1309,7 @@
     <paper id="28">
       <title>The Good, the Bad, and the Unknown: Morphosyllabic Sentiment Tagging of Unseen Words</title>
       <author><first>Karo</first><last>Moilanen</last></author>
-      <author><first>Stephen</first><last>Pulman</last></author>
+      <author id="stephen-pulman"><first>Stephen</first><last>Pulman</last></author>
       <pages>109–112</pages>
       <url hash="6d708fbd">P08-2028</url>
       <bibkey>moilanen-pulman-2008-good</bibkey>
@@ -1324,10 +1324,10 @@
     </paper>
     <paper id="30">
       <title><fixed-case>A</fixed-case>rabic Morphological Tagging, Diacritization, and Lemmatization Using Lexeme Models and Feature Ranking</title>
-      <author><first>Ryan</first><last>Roth</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="ryan-roth"><first>Ryan</first><last>Roth</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <author><first>Cynthia</first><last>Rudin</last></author>
       <pages>117–120</pages>
       <url hash="4c1389c9">P08-2030</url>
@@ -1336,7 +1336,7 @@
     <paper id="31">
       <title>Using Automatically Transcribed Dialogs to Learn User Models in a Spoken Dialog System</title>
       <author><first>Umar</first><last>Syed</last></author>
-      <author><first>Jason</first><last>Williams</last></author>
+      <author id="jason-d-williams"><first>Jason</first><last>Williams</last></author>
       <pages>121–124</pages>
       <url hash="9f61413e">P08-2031</url>
       <bibkey>syed-williams-2008-using</bibkey>
@@ -1345,7 +1345,7 @@
       <title>Robust Extraction of Named Entity Including Unfamiliar Word</title>
       <author><first>Masatoshi</first><last>Tsuchiya</last></author>
       <author><first>Shinya</first><last>Hida</last></author>
-      <author><first>Seiichi</first><last>Nakagawa</last></author>
+      <author id="seiichi-nakagawa"><first>Seiichi</first><last>Nakagawa</last></author>
       <pages>125–128</pages>
       <url hash="c06769b5">P08-2032</url>
       <bibkey>tsuchiya-etal-2008-robust</bibkey>
@@ -1353,7 +1353,7 @@
     <paper id="33">
       <title>In-Browser Summarisation: Generating Elaborative Summaries Biased Towards the Reading Context</title>
       <author><first>Stephen</first><last>Wan</last></author>
-      <author><first>Cécile</first><last>Paris</last></author>
+      <author id="cecile-paris"><first>Cécile</first><last>Paris</last></author>
       <pages>129–132</pages>
       <url hash="e53ff011">P08-2033</url>
       <bibkey>wan-paris-2008-browser</bibkey>
@@ -1362,7 +1362,7 @@
       <title>Lyric-based Song Sentiment Classification with Sentiment Vector Space Model</title>
       <author><first>Yunqing</first><last>Xia</last></author>
       <author><first>Linlin</first><last>Wang</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <author><first>Mingxing</first><last>Xu</last></author>
       <pages>133–136</pages>
       <url hash="5d5c7854">P08-2034</url>
@@ -1385,7 +1385,7 @@
     </paper>
     <paper id="37">
       <title>Event Matching Using the Transitive Closure of Dependency Relations</title>
-      <author><first>Daniel M.</first><last>Bikel</last></author>
+      <author id="daniel-m-bikel"><first>Daniel M.</first><last>Bikel</last></author>
       <author><first>Vittorio</first><last>Castelli</last></author>
       <pages>145–148</pages>
       <url hash="3ccc2eb1">P08-2037</url>
@@ -1393,9 +1393,9 @@
     </paper>
     <paper id="38">
       <title>A Linguistically Annotated Reordering Model for <fixed-case>BTG</fixed-case>-based Statistical Machine Translation</title>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Min</first><last>Zhang</last></author>
-      <author><first>Aiti</first><last>Aw</last></author>
+      <author id="aiti-aw"><first>Aiti</first><last>Aw</last></author>
       <author><first>Haizhou</first><last>Li</last></author>
       <pages>149–152</pages>
       <url hash="3569a557">P08-2038</url>
@@ -1405,7 +1405,7 @@
       <title>Segmentation for <fixed-case>E</fixed-case>nglish-to-<fixed-case>A</fixed-case>rabic Statistical Machine Translation</title>
       <author><first>Ibrahim</first><last>Badr</last></author>
       <author><first>Rabih</first><last>Zbib</last></author>
-      <author><first>James</first><last>Glass</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
       <pages>153–156</pages>
       <url hash="49ddfe94">P08-2039</url>
       <bibkey>badr-etal-2008-segmentation</bibkey>
@@ -1414,7 +1414,7 @@
       <title>Exploiting N-best Hypotheses for <fixed-case>SMT</fixed-case> Self-Enhancement</title>
       <author><first>Boxing</first><last>Chen</last></author>
       <author><first>Min</first><last>Zhang</last></author>
-      <author><first>Aiti</first><last>Aw</last></author>
+      <author id="aiti-aw"><first>Aiti</first><last>Aw</last></author>
       <author><first>Haizhou</first><last>Li</last></author>
       <pages>157–160</pages>
       <url hash="2100c815">P08-2040</url>
@@ -1432,7 +1432,7 @@
     <paper id="42">
       <title>Unsupervised Learning of Acoustic Sub-word Units</title>
       <author><first>Balakrishnan</first><last>Varadarajan</last></author>
-      <author><first>Sanjeev</first><last>Khudanpur</last></author>
+      <author id="sanjeev-khudanpur"><first>Sanjeev</first><last>Khudanpur</last></author>
       <author><first>Emmanuel</first><last>Dupoux</last></author>
       <pages>165–168</pages>
       <url hash="3b28daa9">P08-2042</url>
@@ -1441,8 +1441,8 @@
     <paper id="43">
       <title>High Frequency Word Entrainment in Spoken Dialogue</title>
       <author><first>Ani</first><last>Nenkova</last></author>
-      <author><first>Agustín</first><last>Gravano</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
+      <author id="agustin-gravano"><first>Agustín</first><last>Gravano</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
       <pages>169–172</pages>
       <url hash="fe8d6f14">P08-2043</url>
       <bibkey>nenkova-etal-2008-high</bibkey>
@@ -1458,7 +1458,7 @@
     <paper id="45">
       <title>Learning Semantic Links from a Corpus of Parallel Temporal and Causal Relations</title>
       <author><first>Steven</first><last>Bethard</last></author>
-      <author><first>James H.</first><last>Martin</last></author>
+      <author id="james-h-martin"><first>James H.</first><last>Martin</last></author>
       <pages>177–180</pages>
       <url hash="10d3a536">P08-2045</url>
       <bibkey>bethard-martin-2008-learning</bibkey>
@@ -1466,8 +1466,8 @@
     <paper id="46">
       <title>Evolving New Lexical Association Measures Using Genetic Programming</title>
       <author><first>Jan</first><last>Šnajder</last></author>
-      <author><first>Bojana</first><last>Dalbelo Bašić</last></author>
-      <author><first>Saša</first><last>Petrović</last></author>
+      <author id="bojana-dalbelo-basic"><first>Bojana</first><last>Dalbelo Bašić</last></author>
+      <author id="sasa-petrovic"><first>Saša</first><last>Petrović</last></author>
       <author><first>Ivan</first><last>Sikirić</last></author>
       <pages>181–184</pages>
       <url hash="9b2b3952">P08-2046</url>
@@ -1492,7 +1492,7 @@
     </paper>
     <paper id="49">
       <title>Query-based Sentence Fusion is Better Defined and Leads to More Preferred Results than Generic Sentence Fusion</title>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <author><first>Erwin</first><last>Marsi</last></author>
       <author><first>Paul</first><last>van Pelt</last></author>
       <pages>193–196</pages>
@@ -1501,7 +1501,7 @@
     </paper>
     <paper id="50">
       <title>Intrinsic vs. Extrinsic Evaluation Measures for Referring Expression Generation</title>
-      <author><first>Anja</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anja</first><last>Belz</last></author>
       <author><first>Albert</first><last>Gatt</last></author>
       <pages>197–200</pages>
       <url hash="88790a03">P08-2050</url>
@@ -1518,7 +1518,7 @@
     <paper id="52">
       <title><fixed-case>F</fixed-case>ast<fixed-case>S</fixed-case>um: Fast and Accurate Query-based Multi-document Summarization</title>
       <author><first>Frank</first><last>Schilder</last></author>
-      <author><first>Ravikumar</first><last>Kondadadi</last></author>
+      <author id="ravikumar-kondadadi"><first>Ravikumar</first><last>Kondadadi</last></author>
       <pages>205–208</pages>
       <url hash="d40cd888">P08-2052</url>
       <bibkey>schilder-kondadadi-2008-fastsum</bibkey>
@@ -1533,7 +1533,7 @@
     </paper>
     <paper id="54">
       <title>Unlexicalised Hidden Variable Models of Split Dependency Grammars</title>
-      <author><first>Gabriele Antonio</first><last>Musillo</last></author>
+      <author id="gabriele-musillo"><first>Gabriele Antonio</first><last>Musillo</last></author>
       <author><first>Paola</first><last>Merlo</last></author>
       <pages>213–216</pages>
       <url hash="d98d491d">P08-2054</url>
@@ -1551,7 +1551,7 @@
       <title>Adapting a <fixed-case>WSJ</fixed-case>-Trained Parser to Grammatically Noisy Text</title>
       <author><first>Jennifer</first><last>Foster</last></author>
       <author><first>Joachim</first><last>Wagner</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>221–224</pages>
       <url hash="ab9b167a">P08-2056</url>
       <bibkey>foster-etal-2008-adapting</bibkey>
@@ -1559,8 +1559,8 @@
     <paper id="57">
       <title>Enriching Spoken Language Translation with Dialog Acts</title>
       <author><first>Vivek Kumar</first><last>Rangarajan Sridhar</last></author>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
-      <author><first>Shrikanth</first><last>Narayanan</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="shrikanth-narayanan"><first>Shrikanth</first><last>Narayanan</last></author>
       <pages>225–228</pages>
       <url hash="dbd0459b">P08-2057</url>
       <bibkey>rangarajan-sridhar-etal-2008-enriching</bibkey>
@@ -1571,7 +1571,7 @@
       <author><first>Hyunjung</first><last>Lee</last></author>
       <author><first>Choong-Nyoung</first><last>Seon</last></author>
       <author><first>Harksoo</first><last>Kim</last></author>
-      <author><first>Jungyun</first><last>Seo</last></author>
+      <author id="jungyun-seo"><first>Jungyun</first><last>Seo</last></author>
       <pages>229–232</pages>
       <url hash="a6230e1f">P08-2058</url>
       <bibkey>kim-etal-2008-speakers</bibkey>
@@ -1594,10 +1594,10 @@
     </paper>
     <paper id="61">
       <title>Extracting a Representation from Text for Semantic Analysis</title>
-      <author><first>Rodney D.</first><last>Nielsen</last></author>
-      <author><first>Wayne</first><last>Ward</last></author>
-      <author><first>James H.</first><last>Martin</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="rodney-nielsen"><first>Rodney D.</first><last>Nielsen</last></author>
+      <author id="wayne-ward"><first>Wayne</first><last>Ward</last></author>
+      <author id="james-h-martin"><first>James H.</first><last>Martin</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>241–244</pages>
       <url hash="2505f813">P08-2061</url>
       <bibkey>nielsen-etal-2008-extracting</bibkey>
@@ -1613,7 +1613,7 @@
     </paper>
     <paper id="63">
       <title>Choosing Sense Distinctions for <fixed-case>WSD</fixed-case>: Psycholinguistic Evidence</title>
-      <author><first>Susan Windisch</first><last>Brown</last></author>
+      <author id="susan-windisch-brown"><first>Susan Windisch</first><last>Brown</last></author>
       <pages>249–252</pages>
       <url hash="9fedbd55">P08-2063</url>
       <bibkey>brown-2008-choosing</bibkey>
@@ -1630,7 +1630,7 @@
     <paper id="65">
       <title>Multi-domain Sentiment Classification</title>
       <author><first>Shoushan</first><last>Li</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>257–260</pages>
       <url hash="fdc33b1b">P08-2065</url>
       <bibkey>li-zong-2008-multi</bibkey>
@@ -1638,7 +1638,7 @@
     <paper id="66">
       <title>Evaluating Word Prediction: Framing Keystroke Savings</title>
       <author><first>Keith</first><last>Trnka</last></author>
-      <author><first>Kathleen</first><last>McCoy</last></author>
+      <author id="kathleen-f-mccoy"><first>Kathleen</first><last>McCoy</last></author>
       <pages>261–264</pages>
       <url hash="4d7297c3">P08-2066</url>
       <bibkey>trnka-mccoy-2008-evaluating</bibkey>
@@ -1647,7 +1647,7 @@
       <title>Pairwise Document Similarity in Large Collections with <fixed-case>M</fixed-case>ap<fixed-case>R</fixed-case>educe</title>
       <author><first>Tamer</first><last>Elsayed</last></author>
       <author><first>Jimmy</first><last>Lin</last></author>
-      <author><first>Douglas</first><last>Oard</last></author>
+      <author id="douglas-w-oard"><first>Douglas</first><last>Oard</last></author>
       <pages>265–268</pages>
       <url hash="63ffc957">P08-2067</url>
       <bibkey>elsayed-etal-2008-pairwise</bibkey>
@@ -1739,7 +1739,7 @@
     </paper>
     <paper id="9">
       <title>An Unsupervised Vector Approach to Biomedical Term Disambiguation: Integrating <fixed-case>UMLS</fixed-case> and <fixed-case>M</fixed-case>edline</title>
-      <author><first>Bridget</first><last>McInnes</last></author>
+      <author id="bridget-mcinnes"><first>Bridget</first><last>McInnes</last></author>
       <pages>49–54</pages>
       <url hash="10df6ba3">P08-3009</url>
       <bibkey>mcinnes-2008-unsupervised</bibkey>
@@ -1782,7 +1782,7 @@
     </frontmatter>
     <paper id="1">
       <title>Demonstration of a <fixed-case>POMDP</fixed-case> Voice Dialer</title>
-      <author><first>Jason</first><last>Williams</last></author>
+      <author id="jason-d-williams"><first>Jason</first><last>Williams</last></author>
       <pages>1–4</pages>
       <url hash="99cf537f">P08-4001</url>
       <bibkey>williams-2008-demonstration</bibkey>
@@ -1798,11 +1798,11 @@
     <paper id="3">
       <title><fixed-case>BART</fixed-case>: A Modular Toolkit for Coreference Resolution</title>
       <author><first>Yannick</first><last>Versley</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <author><first>Vladimir</first><last>Eidelman</last></author>
       <author><first>Alan</first><last>Jern</last></author>
-      <author><first>Jason</first><last>Smith</last></author>
+      <author id="jason-smith"><first>Jason</first><last>Smith</last></author>
       <author><first>Xiaofeng</first><last>Yang</last></author>
       <author><first>Alessandro</first><last>Moschitti</last></author>
       <pages>9–12</pages>
@@ -1819,7 +1819,7 @@
     <paper id="5">
       <title>Interactive <fixed-case>ASR</fixed-case> Error Correction for Touchscreen Devices</title>
       <author><first>David</first><last>Huggins-Daines</last></author>
-      <author><first>Alexander I.</first><last>Rudnicky</last></author>
+      <author id="alexander-rudnicky"><first>Alexander I.</first><last>Rudnicky</last></author>
       <pages>17–19</pages>
       <url hash="6c2c9d48">P08-4005</url>
       <bibkey>huggins-daines-rudnicky-2008-interactive</bibkey>
@@ -1836,7 +1836,7 @@
       <author><first>Moonyoung</first><last>Kang</last></author>
       <author><first>Sourish</first><last>Chaudhuri</last></author>
       <author><first>Mahesh</first><last>Joshi</last></author>
-      <author><first>Carolyn P.</first><last>Rosé</last></author>
+      <author id="carolyn-rose"><first>Carolyn P.</first><last>Rosé</last></author>
       <pages>24–27</pages>
       <url hash="7d380a54">P08-4007</url>
       <bibkey>kang-etal-2008-side</bibkey>
@@ -1845,7 +1845,7 @@
       <title><fixed-case>M</fixed-case>odel<fixed-case>T</fixed-case>alker <fixed-case>V</fixed-case>oice <fixed-case>R</fixed-case>ecorder—<fixed-case>A</fixed-case>n Interface System for Recording a Corpus of Speech for Synthesis</title>
       <author><first>Debra</first><last>Yarrington</last></author>
       <author><first>John</first><last>Gray</last></author>
-      <author><first>Chris</first><last>Pennington</last></author>
+      <author id="christopher-pennington"><first>Chris</first><last>Pennington</last></author>
       <author><first>H. Timothy</first><last>Bunnell</last></author>
       <author><first>Allegra</first><last>Cornaglia</last></author>
       <author><first>Jason</first><last>Lilley</last></author>
@@ -1857,7 +1857,7 @@
     </paper>
     <paper id="9">
       <title>The <fixed-case>Q</fixed-case>u<fixed-case>AL</fixed-case>i<fixed-case>M</fixed-case> Question Answering Demo: Supplementing Answers with Paragraphs drawn from <fixed-case>W</fixed-case>ikipedia</title>
-      <author><first>Michael</first><last>Kaisser</last></author>
+      <author id="michael-kaisser"><first>Michael</first><last>Kaisser</last></author>
       <pages>32–35</pages>
       <url hash="1035862f">P08-4009</url>
       <bibkey>kaisser-2008-qualim</bibkey>
@@ -1868,7 +1868,7 @@
       <booktitle>Tutorial Abstracts of ACL-08: HLT</booktitle>
       <url hash="df49111d">P08-5</url>
       <editor><first>Ani</first><last>Nenkova</last></editor>
-      <editor><first>Marilyn</first><last>Walker</last></editor>
+      <editor id="marilyn-walker"><first>Marilyn</first><last>Walker</last></editor>
       <editor><first>Eugene</first><last>Agichtein</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Columbus, Ohio</address>
@@ -1893,7 +1893,7 @@
       <title>Building Practical Spoken Dialog Systems</title>
       <author><first>Antoine</first><last>Raux</last></author>
       <author><first>Brian</first><last>Langner</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <author><first>Maxine</first><last>Eskenazi</last></author>
       <pages>2</pages>
       <url hash="40631f22">P08-5002</url>
@@ -1902,7 +1902,7 @@
     <paper id="3">
       <title>Semi-Supervised Learning for Natural Language Processing</title>
       <author><first>John</first><last>Blitzer</last></author>
-      <author><first>Xiaojin Jerry</first><last>Zhu</last></author>
+      <author id="xiaojin-zhu"><first>Xiaojin Jerry</first><last>Zhu</last></author>
       <pages>3</pages>
       <url hash="7c2178f3">P08-5003</url>
       <bibkey>blitzer-zhu-2008-semi</bibkey>
@@ -1916,7 +1916,7 @@
     </paper>
     <paper id="5">
       <title>Speech Technology: From Research to the Industry of Human-Machine Communication</title>
-      <author><first>Roberto</first><last>Pieraccini</last></author>
+      <author id="roberto-pieraccini"><first>Roberto</first><last>Pieraccini</last></author>
       <pages>5</pages>
       <url hash="143a64b8">P08-5005</url>
       <bibkey>pieraccini-2008-speech</bibkey>
diff --git a/data/xml/P09.xml b/data/xml/P09.xml
index 079b46f867..2d9d3760c9 100644
--- a/data/xml/P09.xml
+++ b/data/xml/P09.xml
@@ -6,7 +6,7 @@
       <url hash="ed552c42">P09-1</url>
       <editor><first>Keh-Yih</first><last>Su</last></editor>
       <editor><first>Jian</first><last>Su</last></editor>
-      <editor><first>Janyce</first><last>Wiebe</last></editor>
+      <editor id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></editor>
       <editor><first>Haizhou</first><last>Li</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Suntec, Singapore</address>
@@ -33,7 +33,7 @@
     <paper id="2">
       <title>Investigations on Word Senses and Word Usages</title>
       <author><first>Katrin</first><last>Erk</last></author>
-      <author><first>Diana</first><last>McCarthy</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
       <author><first>Nicholas</first><last>Gaylord</last></author>
       <pages>10–18</pages>
       <url hash="c3d9461f">P09-1002</url>
@@ -42,8 +42,8 @@
     <paper id="3">
       <title>A Comparative Study on Generalization of Semantic Roles in <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et</title>
       <author><first>Yuichiroh</first><last>Matsubayashi</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>19–27</pages>
       <url hash="929b1ec3">P09-1003</url>
       <bibkey>matsubayashi-etal-2009-comparative</bibkey>
@@ -59,8 +59,8 @@
     </paper>
     <paper id="5">
       <title><fixed-case>B</fixed-case>rutus: A Semantic Role Labeling System Incorporating <fixed-case>CCG</fixed-case>, <fixed-case>CFG</fixed-case>, and Dependency Features</title>
-      <author><first>Stephen</first><last>Boxwell</last></author>
-      <author><first>Dennis</first><last>Mehay</last></author>
+      <author id="stephen-boxwell"><first>Stephen</first><last>Boxwell</last></author>
+      <author id="dennis-mehay"><first>Dennis</first><last>Mehay</last></author>
       <author><first>Chris</first><last>Brew</last></author>
       <pages>37–45</pages>
       <url hash="017e854a">P09-1005</url>
@@ -68,7 +68,7 @@
     </paper>
     <paper id="6">
       <title>Exploiting Heterogeneous Treebanks for Parsing</title>
-      <author><first>Zheng-Yu</first><last>Niu</last></author>
+      <author id="zheng-yu-niu"><first>Zheng-Yu</first><last>Niu</last></author>
       <author><first>Haifeng</first><last>Wang</last></author>
       <author><first>Hua</first><last>Wu</last></author>
       <pages>46–54</pages>
@@ -79,15 +79,15 @@
       <title>Cross Language Dependency Parsing using a Bilingual Lexicon</title>
       <author><first>Hai</first><last>Zhao</last></author>
       <author><first>Yan</first><last>Song</last></author>
-      <author><first>Chunyu</first><last>Kit</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="chunyu-kit"><first>Chunyu</first><last>Kit</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>55–63</pages>
       <url hash="544ab07c">P09-1007</url>
       <bibkey>zhao-etal-2009-cross</bibkey>
     </paper>
     <paper id="8">
       <title>Topological Field Parsing of <fixed-case>G</fixed-case>erman</title>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <author><first>Gerald</first><last>Penn</last></author>
       <pages>64–72</pages>
       <url hash="ea3b11a3">P09-1008</url>
@@ -104,9 +104,9 @@
     </paper>
     <paper id="10">
       <title>Reinforcement Learning for Mapping Instructions to Actions</title>
-      <author><first>S.R.K.</first><last>Branavan</last></author>
+      <author id="s-r-k-branavan"><first>S.R.K.</first><last>Branavan</last></author>
       <author><first>Harr</first><last>Chen</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <author><first>Regina</first><last>Barzilay</last></author>
       <pages>82–90</pages>
       <url hash="d449f36b">P09-1010</url>
@@ -115,7 +115,7 @@
     <paper id="11">
       <title>Learning Semantic Correspondences with Less Supervision</title>
       <author><first>Percy</first><last>Liang</last></author>
-      <author><first>Michael</first><last>Jordan</last></author>
+      <author id="michael-i-jordan"><first>Michael</first><last>Jordan</last></author>
       <author><first>Dan</first><last>Klein</last></author>
       <pages>91–99</pages>
       <url hash="b07519bd">P09-1011</url>
@@ -133,7 +133,7 @@
     <paper id="13">
       <title>Knowing the Unseen: Estimating Vocabulary Size over Unseen Samples</title>
       <author><first>Suma</first><last>Bhat</last></author>
-      <author><first>Richard</first><last>Sproat</last></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last></author>
       <pages>109–117</pages>
       <url hash="7d275dd5">P09-1013</url>
       <bibkey>bhat-sproat-2009-knowing</bibkey>
@@ -185,8 +185,8 @@
       <title>Efficient Minimum Error Rate Training and Minimum <fixed-case>B</fixed-case>ayes-Risk Decoding for Translation Hypergraphs and Lattices</title>
       <author><first>Shankar</first><last>Kumar</last></author>
       <author><first>Wolfgang</first><last>Macherey</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Franz</first><last>Och</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="franz-josef-och"><first>Franz</first><last>Och</last></author>
       <pages>163–171</pages>
       <url hash="e03a87b6">P09-1019</url>
       <bibkey>kumar-etal-2009-efficient</bibkey>
@@ -196,15 +196,15 @@
       <author><first>Hui</first><last>Zhang</last></author>
       <author><first>Min</first><last>Zhang</last></author>
       <author><first>Haizhou</first><last>Li</last></author>
-      <author><first>Aiti</first><last>Aw</last></author>
-      <author><first>Chew Lim</first><last>Tan</last></author>
+      <author id="aiti-aw"><first>Aiti</first><last>Aw</last></author>
+      <author id="chew-lim-tan"><first>Chew Lim</first><last>Tan</last></author>
       <pages>172–180</pages>
       <url hash="3ee61b39">P09-1020</url>
       <bibkey>zhang-etal-2009-forest</bibkey>
     </paper>
     <paper id="21">
       <title>Active Learning for Multilingual Statistical Machine Translation</title>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <author><first>Anoop</first><last>Sarkar</last></author>
       <pages>181–189</pages>
       <url hash="6f9075af">P09-1021</url>
@@ -220,7 +220,7 @@
     <paper id="23">
       <title>Summarizing Definition from <fixed-case>W</fixed-case>ikipedia</title>
       <author><first>Shiren</first><last>Ye</last></author>
-      <author><first>Tat-Seng</first><last>Chua</last></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last></author>
       <author><first>Jie</first><last>Lu</last></author>
       <pages>199–207</pages>
       <url hash="79a7cbfc">P09-1023</url>
@@ -278,11 +278,11 @@
     <paper id="30">
       <title>Compiling a Massive, Multilingual Dictionary via Probabilistic Inference</title>
       <author><first/><last>Mausam</last></author>
-      <author><first>Stephen</first><last>Soderland</last></author>
+      <author id="stephen-soderland"><first>Stephen</first><last>Soderland</last></author>
       <author><first>Oren</first><last>Etzioni</last></author>
-      <author><first>Daniel</first><last>Weld</last></author>
-      <author><first>Michael</first><last>Skinner</last></author>
-      <author><first>Jeff</first><last>Bilmes</last></author>
+      <author id="daniel-s-weld"><first>Daniel</first><last>Weld</last></author>
+      <author id="michael-skinner"><first>Michael</first><last>Skinner</last></author>
+      <author id="jeff-bilmes"><first>Jeff</first><last>Bilmes</last></author>
       <pages>262–270</pages>
       <url hash="070b2fde">P09-1030</url>
       <bibkey>mausam-etal-2009-compiling</bibkey>
@@ -306,25 +306,25 @@
     <paper id="33">
       <title>Abstraction and Generalisation in Semantic Role Labels: <fixed-case>P</fixed-case>rop<fixed-case>B</fixed-case>ank, <fixed-case>V</fixed-case>erb<fixed-case>N</fixed-case>et or both?</title>
       <author><first>Paola</first><last>Merlo</last></author>
-      <author><first>Lonneke</first><last>Van Der Plas</last></author>
+      <author id="lonneke-van-der-plas"><first>Lonneke</first><last>Van Der Plas</last></author>
       <pages>288–296</pages>
       <url hash="f661acee">P09-1033</url>
       <bibkey>merlo-van-der-plas-2009-abstraction</bibkey>
     </paper>
     <paper id="34">
       <title>Robust Machine Translation Evaluation with Entailment Features</title>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <author><first>Michel</first><last>Galley</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>297–305</pages>
       <url hash="68039d34">P09-1034</url>
       <bibkey>pado-etal-2009-robust</bibkey>
     </paper>
     <paper id="35">
       <title>The Contribution of Linguistic Features to Automatic Machine Translation Evaluation</title>
-      <author><first>Enrique</first><last>Amigó</last></author>
-      <author><first>Jesús</first><last>Giménez</last></author>
+      <author id="enrique-amigo"><first>Enrique</first><last>Amigó</last></author>
+      <author id="jesus-gimenez"><first>Jesús</first><last>Giménez</last></author>
       <author><first>Julio</first><last>Gonzalo</last></author>
       <author><first>Felisa</first><last>Verdejo</last></author>
       <pages>306–314</pages>
@@ -333,9 +333,9 @@
     </paper>
     <paper id="36">
       <title>A Syntax-Driven Bracketing Model for Phrase-Based Translation</title>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Min</first><last>Zhang</last></author>
-      <author><first>Aiti</first><last>Aw</last></author>
+      <author id="aiti-aw"><first>Aiti</first><last>Aw</last></author>
       <author><first>Haizhou</first><last>Li</last></author>
       <pages>315–323</pages>
       <url hash="7201d4e2">P09-1036</url>
@@ -362,9 +362,9 @@
     </paper>
     <paper id="39">
       <title>Concise Integer Linear Programming Formulations for Dependency Parsing</title>
-      <author><first>André</first><last>Martins</last></author>
-      <author><first>Noah</first><last>Smith</last></author>
-      <author><first>Eric</first><last>Xing</last></author>
+      <author id="andre-f-t-martins"><first>André</first><last>Martins</last></author>
+      <author id="noah-a-smith"><first>Noah</first><last>Smith</last></author>
+      <author id="eric-xing"><first>Eric</first><last>Xing</last></author>
       <pages>342–350</pages>
       <url hash="d46d0f9c">P09-1039</url>
       <bibkey>martins-etal-2009-concise</bibkey>
@@ -379,7 +379,7 @@
     <paper id="41">
       <title>Semi-supervised Learning of Dependency Parsers using Generalized Expectation Criteria</title>
       <author><first>Gregory</first><last>Druck</last></author>
-      <author><first>Gideon</first><last>Mann</last></author>
+      <author id="gideon-mann"><first>Gideon</first><last>Mann</last></author>
       <author><first>Andrew</first><last>McCallum</last></author>
       <pages>360–368</pages>
       <url hash="e7d4cfd3">P09-1041</url>
@@ -414,7 +414,7 @@
     <paper id="45">
       <title>Reducing Semantic Drift with Bagging and Distributional Similarity</title>
       <author><first>Tara</first><last>McIntosh</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <pages>396–404</pages>
       <url hash="f6e43057">P09-1045</url>
       <bibkey>mcintosh-curran-2009-reducing</bibkey>
@@ -424,7 +424,7 @@
       <author><first>Katsumasa</first><last>Yoshikawa</last></author>
       <author><first>Sebastian</first><last>Riedel</last></author>
       <author><first>Masayuki</first><last>Asahara</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>405–413</pages>
       <url hash="94dfd362">P09-1046</url>
       <bibkey>yoshikawa-etal-2009-jointly</bibkey>
@@ -432,7 +432,7 @@
     <paper id="47">
       <title>Profile Based Cross-Document Coreference Using Kernelized Fuzzy Relational Clustering</title>
       <author><first>Jian</first><last>Huang</last></author>
-      <author><first>Sarah M.</first><last>Taylor</last></author>
+      <author id="sarah-taylor"><first>Sarah M.</first><last>Taylor</last></author>
       <author><first>Jonathan L.</first><last>Smith</last></author>
       <author><first>Konstantinos A.</first><last>Fotiadis</last></author>
       <author><first>C. Lee</first><last>Giles</last></author>
@@ -443,18 +443,18 @@
     <paper id="48">
       <title>Who, What, When, Where, Why? Comparing Multiple Approaches to the Cross-Lingual 5<fixed-case>W</fixed-case> Task</title>
       <author><first>Kristen</first><last>Parton</last></author>
-      <author><first>Kathleen R.</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen R.</first><last>McKeown</last></author>
       <author><first>Bob</first><last>Coyne</last></author>
-      <author><first>Mona T.</first><last>Diab</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tür</last></author>
-      <author><first>Mary</first><last>Harper</last></author>
+      <author id="mona-diab"><first>Mona T.</first><last>Diab</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tür</last></author>
+      <author id="mary-harper"><first>Mary</first><last>Harper</last></author>
       <author><first>Heng</first><last>Ji</last></author>
-      <author><first>Wei Yun</first><last>Ma</last></author>
-      <author><first>Adam</first><last>Meyers</last></author>
+      <author id="wei-yun-ma"><first>Wei Yun</first><last>Ma</last></author>
+      <author id="adam-meyers"><first>Adam</first><last>Meyers</last></author>
       <author><first>Sara</first><last>Stolbach</last></author>
       <author><first>Ang</first><last>Sun</last></author>
-      <author><first>Gokhan</first><last>Tur</last></author>
+      <author id="gokhan-tur"><first>Gokhan</first><last>Tur</last></author>
       <author><first>Wei</first><last>Xu</last></author>
       <author><first>Sibel</first><last>Yaman</last></author>
       <pages>423–431</pages>
@@ -463,7 +463,7 @@
     </paper>
     <paper id="49">
       <title>Bilingual Co-Training for Monolingual Hyponymy-Relation Acquisition</title>
-      <author><first>Jong-Hoon</first><last>Oh</last></author>
+      <author id="jong-hoon-oh"><first>Jong-Hoon</first><last>Oh</last></author>
       <author><first>Kiyotaka</first><last>Uchimoto</last></author>
       <author><first>Kentaro</first><last>Torisawa</last></author>
       <pages>432–440</pages>
@@ -472,8 +472,8 @@
     </paper>
     <paper id="50">
       <title>Automatic Set Instance Extraction using the Web</title>
-      <author><first>Richard C.</first><last>Wang</last></author>
-      <author><first>William W.</first><last>Cohen</last></author>
+      <author id="richard-c-wang"><first>Richard C.</first><last>Wang</last></author>
+      <author id="william-cohen"><first>William W.</first><last>Cohen</last></author>
       <pages>441–449</pages>
       <url hash="0728b9ea">P09-1050</url>
       <bibkey>wang-cohen-2009-automatic</bibkey>
@@ -500,7 +500,7 @@
     <paper id="53">
       <title>Paraphrase Identification as Probabilistic Quasi-Synchronous Recognition</title>
       <author><first>Dipanjan</first><last>Das</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>468–476</pages>
       <url hash="2914425d">P09-1053</url>
       <bibkey>das-smith-2009-paraphrase</bibkey>
@@ -508,7 +508,7 @@
     <paper id="54">
       <title>Stochastic Gradient Descent Training for <fixed-case>L</fixed-case>1-regularized Log-linear Models with Cumulative Penalty</title>
       <author><first>Yoshimasa</first><last>Tsuruoka</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
       <pages>477–485</pages>
       <url hash="42088c69">P09-1054</url>
@@ -542,7 +542,7 @@
       <title>An Error-Driven Word-Character Hybrid Model for Joint <fixed-case>C</fixed-case>hinese Word Segmentation and <fixed-case>POS</fixed-case> Tagging</title>
       <author><first>Canasai</first><last>Kruengkrai</last></author>
       <author><first>Kiyotaka</first><last>Uchimoto</last></author>
-      <author><first>Jun’ichi</first><last>Kazama</last></author>
+      <author id="junichi-kazama"><first>Jun’ichi</first><last>Kazama</last></author>
       <author><first>Yiou</first><last>Wang</last></author>
       <author><first>Kentaro</first><last>Torisawa</last></author>
       <author><first>Hitoshi</first><last>Isahara</last></author>
@@ -588,7 +588,7 @@
     <paper id="63">
       <title>Improving Tree-to-Tree Translation with Packed Forests</title>
       <author id="yang-liu-ict"><first>Yang</first><last>Liu</last></author>
-      <author><first>Yajuan</first><last>Lü</last></author>
+      <author id="yajuan-lu"><first>Yajuan</first><last>Lü</last></author>
       <author><first>Qun</first><last>Liu</last></author>
       <pages>558–566</pages>
       <url hash="3fdbb194">P09-1063</url>
@@ -627,16 +627,16 @@
     <paper id="67">
       <title>Variational Decoding for Statistical Machine Translation</title>
       <author><first>Zhifei</first><last>Li</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
-      <author><first>Sanjeev</first><last>Khudanpur</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
+      <author id="sanjeev-khudanpur"><first>Sanjeev</first><last>Khudanpur</last></author>
       <pages>593–601</pages>
       <url hash="066e7d8b">P09-1067</url>
       <bibkey>li-etal-2009-variational</bibkey>
     </paper>
     <paper id="68">
       <title>Unsupervised Learning of Narrative Schemas and their Participants</title>
-      <author><first>Nathanael</first><last>Chambers</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="nathanael-chambers"><first>Nathanael</first><last>Chambers</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <pages>602–610</pages>
       <url hash="b448f7fa">P09-1068</url>
       <bibkey>chambers-jurafsky-2009-unsupervised</bibkey>
@@ -644,7 +644,7 @@
     <paper id="69">
       <title>Learning a Compositional Semantic Parser using an Existing Syntactic Parser</title>
       <author><first>Ruifang</first><last>Ge</last></author>
-      <author><first>Raymond</first><last>Mooney</last></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last></author>
       <pages>611–619</pages>
       <url hash="9c5e5aaa">P09-1069</url>
       <bibkey>ge-mooney-2009-learning</bibkey>
@@ -652,7 +652,7 @@
     <paper id="70">
       <title>Latent Variable Models of Concept-Attribute Attachment</title>
       <author><first>Joseph</first><last>Reisinger</last></author>
-      <author><first>Marius</first><last>Paşca</last></author>
+      <author id="marius-pasca"><first>Marius</first><last>Paşca</last></author>
       <pages>620–628</pages>
       <url hash="5e7a8925">P09-1070</url>
       <bibkey>reisinger-pasca-2009-latent</bibkey>
@@ -660,17 +660,17 @@
     <paper id="71">
       <title>The <fixed-case>C</fixed-case>hinese Aspect Generation Based on Aspect Selection Functions</title>
       <author><first>Guowen</first><last>Yang</last></author>
-      <author><first>John</first><last>Bateman</last></author>
+      <author id="john-bateman"><first>John</first><last>Bateman</last></author>
       <pages>629–637</pages>
       <url hash="f3a55245">P09-1071</url>
       <bibkey>yang-bateman-2009-chinese</bibkey>
     </paper>
     <paper id="72">
       <title>Quantitative modeling of the neural representation of adjective-noun phrases to account for f<fixed-case>MRI</fixed-case> activation</title>
-      <author><first>Kai-min K.</first><last>Chang</last></author>
+      <author id="kai-min-kevin-chang"><first>Kai-min K.</first><last>Chang</last></author>
       <author><first>Vladimir L.</first><last>Cherkassky</last></author>
-      <author><first>Tom M.</first><last>Mitchell</last></author>
-      <author><first>Marcel Adam</first><last>Just</last></author>
+      <author id="tom-mitchell"><first>Tom M.</first><last>Mitchell</last></author>
+      <author id="marcel-adam-just"><first>Marcel Adam</first><last>Just</last></author>
       <pages>638–646</pages>
       <url hash="13a31adf">P09-1072</url>
       <bibkey>chang-etal-2009-quantitative</bibkey>
@@ -679,7 +679,7 @@
       <title>Capturing Salience with a Trainable Cache Model for Zero-anaphora Resolution</title>
       <author><first>Ryu</first><last>Iida</last></author>
       <author><first>Kentaro</first><last>Inui</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>647–655</pages>
       <url hash="7d918a91">P09-1073</url>
       <bibkey>iida-etal-2009-capturing</bibkey>
@@ -688,8 +688,8 @@
       <title>Conundrums in Noun Phrase Coreference Resolution: Making Sense of the State-of-the-Art</title>
       <author><first>Veselin</first><last>Stoyanov</last></author>
       <author><first>Nathan</first><last>Gilbert</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
-      <author><first>Ellen</first><last>Riloff</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
       <pages>656–664</pages>
       <url hash="153c7750">P09-1074</url>
       <bibkey>stoyanov-etal-2009-conundrums</bibkey>
@@ -704,7 +704,7 @@
     </paper>
     <paper id="76">
       <title>Genre distinctions for discourse in the <fixed-case>P</fixed-case>enn <fixed-case>T</fixed-case>ree<fixed-case>B</fixed-case>ank</title>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <pages>674–682</pages>
       <url hash="b7d4b42e">P09-1076</url>
       <bibkey>webber-2009-genre</bibkey>
@@ -722,7 +722,7 @@
       <title>A Framework of Feature Selection Methods for Text Categorization</title>
       <author><first>Shoushan</first><last>Li</last></author>
       <author><first>Rui</first><last>Xia</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <pages>692–700</pages>
       <url hash="2c32344f">P09-1078</url>
@@ -774,7 +774,7 @@
     <paper id="84">
       <title>What lies beneath: Semantic and syntactic analysis of manually reconstructed spontaneous speech</title>
       <author><first>Erin</first><last>Fitzgerald</last></author>
-      <author><first>Frederick</first><last>Jelinek</last></author>
+      <author id="frederick-jelinek"><first>Frederick</first><last>Jelinek</last></author>
       <author><first>Robert</first><last>Frank</last></author>
       <pages>746–754</pages>
       <url hash="ff230b1e">P09-1084</url>
@@ -782,9 +782,9 @@
     </paper>
     <paper id="85">
       <title>Discriminative Lexicon Adaptation for Improved Character Accuracy - A New Direction in <fixed-case>C</fixed-case>hinese Language Modeling</title>
-      <author><first>Yi-cheng</first><last>Pan</last></author>
-      <author><first>Lin-shan</first><last>Lee</last></author>
-      <author><first>Sadaoki</first><last>Furui</last></author>
+      <author id="yi-cheng-pan"><first>Yi-cheng</first><last>Pan</last></author>
+      <author id="lin-shan-lee"><first>Lin-shan</first><last>Lee</last></author>
+      <author id="sadaoki-furui"><first>Sadaoki</first><last>Furui</last></author>
       <pages>755–763</pages>
       <url hash="dce44acf">P09-1085</url>
       <bibkey>pan-etal-2009-discriminative</bibkey>
@@ -801,16 +801,16 @@
     <paper id="87">
       <title>Quadratic-Time Dependency Parsing for Machine Translation</title>
       <author><first>Michel</first><last>Galley</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>773–781</pages>
       <url hash="64226dff">P09-1087</url>
       <bibkey>galley-manning-2009-quadratic</bibkey>
     </paper>
     <paper id="88">
       <title>A <fixed-case>G</fixed-case>ibbs Sampler for Phrasal Synchronous Grammar Induction</title>
-      <author><first>Phil</first><last>Blunsom</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <author><first>Miles</first><last>Osborne</last></author>
       <pages>782–790</pages>
       <url hash="f7a2f4dd">P09-1088</url>
@@ -833,7 +833,7 @@
       <author><first>Ananthakrishnan</first><last>Ramanathan</last></author>
       <author><first>Hansraj</first><last>Choudhary</last></author>
       <author><first>Avishek</first><last>Ghosh</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>800–808</pages>
       <url hash="fc3112d2">P09-1090</url>
       <bibkey>ramanathan-etal-2009-case</bibkey>
@@ -842,7 +842,7 @@
       <title>Dependency Based <fixed-case>C</fixed-case>hinese Sentence Realization</title>
       <author><first>Wei</first><last>He</last></author>
       <author><first>Haifeng</first><last>Wang</last></author>
-      <author><first>Yuqing</first><last>Guo</last></author>
+      <author id="yuqing-guo"><first>Yuqing</first><last>Guo</last></author>
       <author><first>Ting</first><last>Liu</last></author>
       <pages>809–816</pages>
       <url hash="6ad2aa7d">P09-1091</url>
@@ -885,11 +885,11 @@
     </paper>
     <paper id="96">
       <title><fixed-case>SMS</fixed-case> based Interface for <fixed-case>FAQ</fixed-case> Retrieval</title>
-      <author><first>Govind</first><last>Kothari</last></author>
+      <author id="govind-kothari"><first>Govind</first><last>Kothari</last></author>
       <author><first>Sumit</first><last>Negi</last></author>
-      <author><first>Tanveer A.</first><last>Faruquie</last></author>
+      <author id="tanveer-a-faruquie"><first>Tanveer A.</first><last>Faruquie</last></author>
       <author><first>Venkatesan T.</first><last>Chakaravarthy</last></author>
-      <author><first>L. Venkata</first><last>Subramaniam</last></author>
+      <author id="l-venkata-subramaniam"><first>L. Venkata</first><last>Subramaniam</last></author>
       <pages>852–860</pages>
       <url hash="436518c4">P09-1096</url>
       <bibkey>kothari-etal-2009-sms</bibkey>
@@ -915,7 +915,7 @@
     </paper>
     <paper id="99">
       <title>Comparing Objective and Subjective Measures of Usability in a Human-Robot Dialogue System</title>
-      <author><first>Mary Ellen</first><last>Foster</last></author>
+      <author id="mary-ellen-foster"><first>Mary Ellen</first><last>Foster</last></author>
       <author><first>Manuel</first><last>Giuliani</last></author>
       <author><first>Alois</first><last>Knoll</last></author>
       <pages>879–887</pages>
@@ -925,7 +925,7 @@
     <paper id="100">
       <title>Setting Up User Action Probabilities in User Simulations for Dialog System Development</title>
       <author><first>Hua</first><last>Ai</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <pages>888–896</pages>
       <url hash="a3cd6c08">P09-1100</url>
       <bibkey>ai-litman-2009-setting</bibkey>
@@ -940,8 +940,8 @@
     <paper id="102">
       <title>Robust Approach to Abbreviating Terms: A Discriminative Latent Variable Model with Global Information</title>
       <author><first>Xu</first><last>Sun</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>905–913</pages>
       <url hash="1d721a60">P09-1102</url>
       <bibkey>sun-etal-2009-robust</bibkey>
@@ -950,7 +950,7 @@
       <title>A non-contiguous Tree Sequence Alignment-based Model for Statistical Machine Translation</title>
       <author><first>Jun</first><last>Sun</last></author>
       <author><first>Min</first><last>Zhang</last></author>
-      <author><first>Chew Lim</first><last>Tan</last></author>
+      <author id="chew-lim-tan"><first>Chew Lim</first><last>Tan</last></author>
       <pages>914–922</pages>
       <url hash="b424ed6f">P09-1103</url>
       <bibkey>sun-etal-2009-non</bibkey>
@@ -977,7 +977,7 @@
       <author><first>Boxing</first><last>Chen</last></author>
       <author><first>Min</first><last>Zhang</last></author>
       <author><first>Haizhou</first><last>Li</last></author>
-      <author><first>Aiti</first><last>Aw</last></author>
+      <author id="aiti-aw"><first>Aiti</first><last>Aw</last></author>
       <pages>941–948</pages>
       <url hash="9c93d724">P09-1106</url>
       <bibkey>chen-etal-2009-comparative</bibkey>
@@ -1005,15 +1005,15 @@
       <author><first>Kazuo</first><last>Hara</last></author>
       <author><first>Masashi</first><last>Shimbo</last></author>
       <author><first>Hideharu</first><last>Okuma</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>967–975</pages>
       <url hash="35be6097">P09-1109</url>
       <bibkey>hara-etal-2009-coordinate</bibkey>
     </paper>
     <paper id="110">
       <title>Learning Context-Dependent Mappings from Sentences to Logical Form</title>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <pages>976–984</pages>
       <url hash="f29c636f">P09-1110</url>
       <bibkey>zettlemoyer-collins-2009-learning</bibkey>
@@ -1039,7 +1039,7 @@
       <author><first>Mike</first><last>Mintz</last></author>
       <author><first>Steven</first><last>Bills</last></author>
       <author><first>Rion</first><last>Snow</last></author>
-      <author><first>Daniel</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Daniel</first><last>Jurafsky</last></author>
       <pages>1003–1011</pages>
       <url hash="77319c3a">P09-1113</url>
       <bibkey>mintz-etal-2009-distant</bibkey>
@@ -1054,7 +1054,7 @@
     <paper id="115">
       <title>Unsupervised Relation Extraction by Mining <fixed-case>W</fixed-case>ikipedia Texts Using Information from the Web</title>
       <author><first>Yulan</first><last>Yan</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Yutaka</first><last>Matsuo</last></author>
       <author><first>Zhenglu</first><last>Yang</last></author>
       <author><first>Mitsuru</first><last>Ishizuka</last></author>
@@ -1082,7 +1082,7 @@
       <title>Word or Phrase? Learning Which Unit to Stress for Information Retrieval</title>
       <author><first>Young-In</first><last>Song</last></author>
       <author><first>Jung-Tae</first><last>Lee</last></author>
-      <author><first>Hae-Chang</first><last>Rim</last></author>
+      <author id="hae-chang-rim"><first>Hae-Chang</first><last>Rim</last></author>
       <pages>1048–1056</pages>
       <url hash="c205acfc">P09-1118</url>
       <bibkey>song-etal-2009-word</bibkey>
@@ -1091,7 +1091,7 @@
       <title>A Generative Blog Post Retrieval Model that Uses Query Expansion based on External Collections</title>
       <author><first>Wouter</first><last>Weerkamp</last></author>
       <author><first>Krisztian</first><last>Balog</last></author>
-      <author><first>Maarten</first><last>de Rijke</last></author>
+      <author id="maarten-de-rijke"><first>Maarten</first><last>de Rijke</last></author>
       <pages>1057–1065</pages>
       <url hash="9168eab4">P09-1119</url>
       <bibkey>weerkamp-etal-2009-generative</bibkey>
@@ -1109,7 +1109,7 @@
       <author><first>Wei</first><last>Gao</last></author>
       <author><first>John</first><last>Blitzer</last></author>
       <author><first>Ming</first><last>Zhou</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <pages>1075–1083</pages>
       <url hash="08049a0f">P09-1121</url>
       <bibkey>gao-etal-2009-exploiting</bibkey>
@@ -1121,7 +1121,7 @@
       <url hash="ab777239">P09-2</url>
       <editor><first>Keh-Yih</first><last>Su</last></editor>
       <editor><first>Jian</first><last>Su</last></editor>
-      <editor><first>Janyce</first><last>Wiebe</last></editor>
+      <editor id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></editor>
       <editor><first>Haizhou</first><last>Li</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Suntec, Singapore</address>
@@ -1136,8 +1136,8 @@
     </frontmatter>
     <paper id="1">
       <title>Variational Inference for Grammar Induction with Prior Knowledge</title>
-      <author><first>Shay</first><last>Cohen</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="shay-b-cohen"><first>Shay</first><last>Cohen</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>1–4</pages>
       <url hash="88c1e42d">P09-2001</url>
       <bibkey>cohen-smith-2009-variational</bibkey>
@@ -1147,7 +1147,7 @@
       <author><first>Hideharu</first><last>Okuma</last></author>
       <author><first>Kazuo</first><last>Hara</last></author>
       <author><first>Masashi</first><last>Shimbo</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>5–8</pages>
       <url hash="b5adfa15">P09-2002</url>
       <bibkey>okuma-etal-2009-bypassed</bibkey>
@@ -1171,17 +1171,17 @@
     </paper>
     <paper id="5">
       <title>Hybrid Approach to User Intention Modeling for Dialog Simulation</title>
-      <author><first>Sangkeun</first><last>Jung</last></author>
+      <author id="sangkeun-jung"><first>Sangkeun</first><last>Jung</last></author>
       <author><first>Cheongjae</first><last>Lee</last></author>
       <author><first>Kyungduk</first><last>Kim</last></author>
-      <author><first>Gary Geunbae</first><last>Lee</last></author>
+      <author id="gary-geunbae-lee"><first>Gary Geunbae</first><last>Lee</last></author>
       <pages>17–20</pages>
       <url hash="aac493d1">P09-2005</url>
       <bibkey>jung-etal-2009-hybrid</bibkey>
     </paper>
     <paper id="6">
       <title>Homophones and Tonal Patterns in <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>hinese Transliteration</title>
-      <author><first>Oi Yee</first><last>Kwong</last></author>
+      <author id="olivia-o-y-kwong"><first>Oi Yee</first><last>Kwong</last></author>
       <pages>21–24</pages>
       <url hash="65acefd1">P09-2006</url>
       <bibkey>kwong-2009-homophones</bibkey>
@@ -1190,7 +1190,7 @@
       <title>Capturing Errors in Written <fixed-case>C</fixed-case>hinese Words</title>
       <author><first>Chao-Lin</first><last>Liu</last></author>
       <author><first>Kan-Wen</first><last>Tien</last></author>
-      <author><first>Min-Hua</first><last>Lai</last></author>
+      <author id="min-hua-lai"><first>Min-Hua</first><last>Lai</last></author>
       <author><first>Yi-Hsuan</first><last>Chuang</last></author>
       <author><first>Shih-Hung</first><last>Wu</last></author>
       <pages>25–28</pages>
@@ -1203,8 +1203,8 @@
       <author><first>Do-Gil</first><last>Lee</last></author>
       <author><first>Jung-Tae</first><last>Lee</last></author>
       <author><first>Pontus</first><last>Stenetorp</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
-      <author><first>Hae-Chang</first><last>Rim</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="hae-chang-rim"><first>Hae-Chang</first><last>Rim</last></author>
       <pages>29–32</pages>
       <url hash="ef0dc18d">P09-2008</url>
       <bibkey>cho-etal-2009-novel</bibkey>
@@ -1214,14 +1214,14 @@
       <author><first>Navanath</first><last>Saharia</last></author>
       <author><first>Dhrubajyoti</first><last>Das</last></author>
       <author><first>Utpal</first><last>Sharma</last></author>
-      <author><first>Jugal</first><last>Kalita</last></author>
+      <author id="jugal-kalita"><first>Jugal</first><last>Kalita</last></author>
       <pages>33–36</pages>
       <url hash="26f48c1f">P09-2009</url>
       <bibkey>saharia-etal-2009-part</bibkey>
     </paper>
     <paper id="10">
       <title>Improving data-driven dependency parsing using large-scale <fixed-case>LFG</fixed-case> grammars</title>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <author><first>Jonas</first><last>Kuhn</last></author>
       <author><first>Kathrin</first><last>Spreyer</last></author>
       <pages>37–40</pages>
@@ -1230,7 +1230,7 @@
     </paper>
     <paper id="11">
       <title>Incremental Parsing with Monotonic Adjoining Operation</title>
-      <author><first>Yoshihide</first><last>Kato</last></author>
+      <author id="yoshihide-kato"><first>Yoshihide</first><last>Kato</last></author>
       <author><first>Shigeki</first><last>Matsubara</last></author>
       <pages>41–44</pages>
       <url hash="51ef28f6">P09-2011</url>
@@ -1255,7 +1255,7 @@
     <paper id="14">
       <title>Comparing the Accuracy of <fixed-case>CCG</fixed-case> and <fixed-case>P</fixed-case>enn <fixed-case>T</fixed-case>reebank Parsers</title>
       <author><first>Stephen</first><last>Clark</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <pages>53–56</pages>
       <url hash="19338be8">P09-2014</url>
       <bibkey>clark-curran-2009-comparing</bibkey>
@@ -1293,7 +1293,7 @@
       <author><first>Lili</first><last>Kotlerman</last></author>
       <author><first>Ido</first><last>Dagan</last></author>
       <author><first>Idan</first><last>Szpektor</last></author>
-      <author><first>Maayan</first><last>Zhitomirsky-Geffet</last></author>
+      <author id="maayan-geffet"><first>Maayan</first><last>Zhitomirsky-Geffet</last></author>
       <pages>69–72</pages>
       <url hash="b9391c5f">P09-2018</url>
       <bibkey>kotlerman-etal-2009-directional</bibkey>
@@ -1301,8 +1301,8 @@
     <paper id="19">
       <title>Generalizing over Lexical Features: Selectional Preferences for Semantic Role Classification</title>
       <author><first>Beñat</first><last>Zapirain</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <pages>73–76</pages>
       <url hash="a1ec69b2">P09-2019</url>
       <bibkey>zapirain-etal-2009-generalizing</bibkey>
@@ -1311,15 +1311,15 @@
       <title>A Syntactic and Lexical-Based Discourse Segmenter</title>
       <author><first>Milan</first><last>Tofiloski</last></author>
       <author><first>Julian</first><last>Brooke</last></author>
-      <author><first>Maite</first><last>Taboada</last></author>
+      <author id="maite-taboada"><first>Maite</first><last>Taboada</last></author>
       <pages>77–80</pages>
       <url hash="1cca1855">P09-2020</url>
       <bibkey>tofiloski-etal-2009-syntactic</bibkey>
     </paper>
     <paper id="21">
       <title>Realistic Grammar Error Simulation using <fixed-case>M</fixed-case>arkov <fixed-case>L</fixed-case>ogic</title>
-      <author><first>Sungjin</first><last>Lee</last></author>
-      <author><first>Gary Geunbae</first><last>Lee</last></author>
+      <author id="sungjin-lee"><first>Sungjin</first><last>Lee</last></author>
+      <author id="gary-geunbae-lee"><first>Gary Geunbae</first><last>Lee</last></author>
       <pages>81–84</pages>
       <url hash="28a4810d">P09-2021</url>
       <bibkey>lee-lee-2009-realistic</bibkey>
@@ -1336,7 +1336,7 @@
     <paper id="23">
       <title>Predicting Barge-in Utterance Errors by using Implicitly-Supervised <fixed-case>ASR</fixed-case> Accuracy and Barge-in Rate per User</title>
       <author><first>Kazunori</first><last>Komatani</last></author>
-      <author><first>Alexander I.</first><last>Rudnicky</last></author>
+      <author id="alexander-rudnicky"><first>Alexander I.</first><last>Rudnicky</last></author>
       <pages>89–92</pages>
       <url hash="b0cec259">P09-2023</url>
       <bibkey>komatani-rudnicky-2009-predicting</bibkey>
@@ -1359,9 +1359,9 @@
     <paper id="26">
       <title>Leveraging Structural Relations for Fluent Compressions at Multiple Compression Rates</title>
       <author><first>Sourish</first><last>Chaudhuri</last></author>
-      <author><first>Naman K.</first><last>Gupta</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
-      <author><first>Carolyn P.</first><last>Rosé</last></author>
+      <author id="naman-k-gupta"><first>Naman K.</first><last>Gupta</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
+      <author id="carolyn-rose"><first>Carolyn P.</first><last>Rosé</last></author>
       <pages>101–104</pages>
       <url hash="63af40cd">P09-2026</url>
       <bibkey>chaudhuri-etal-2009-leveraging</bibkey>
@@ -1376,7 +1376,7 @@
     </paper>
     <paper id="28">
       <title>Using Generation for Grammar Analysis and Error Detection</title>
-      <author><first>Michael</first><last>Goodman</last></author>
+      <author id="michael-wayne-goodman"><first>Michael</first><last>Goodman</last></author>
       <author><first>Francis</first><last>Bond</last></author>
       <pages>109–112</pages>
       <url hash="06317621">P09-2028</url>
@@ -1386,7 +1386,7 @@
       <title>An Integrated Multi-document Summarization Approach based on Word Hierarchical Representation</title>
       <author><first>You</first><last>Ouyang</last></author>
       <author><first>Wenjie</first><last>Li</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <pages>113–116</pages>
       <url hash="5f292272">P09-2029</url>
       <bibkey>ouyang-etal-2009-integrated</bibkey>
@@ -1404,7 +1404,7 @@
       <title>Reducing <fixed-case>SMT</fixed-case> Rule Table with Monolingual Key Phrase</title>
       <author><first>Zhongjun</first><last>He</last></author>
       <author><first>Yao</first><last>Meng</last></author>
-      <author><first>Yajuan</first><last>Lü</last></author>
+      <author id="yajuan-lu"><first>Yajuan</first><last>Lü</last></author>
       <author><first>Hao</first><last>Yu</last></author>
       <author><first>Qun</first><last>Liu</last></author>
       <pages>121–124</pages>
@@ -1414,8 +1414,8 @@
     <paper id="32">
       <title>A Statistical Machine Translation Model Based on a Synthetic Synchronous Grammar</title>
       <author><first>Hongfei</first><last>Jiang</last></author>
-      <author><first>Muyun</first><last>Yang</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="muyun-yang"><first>Muyun</first><last>Yang</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <author><first>Sheng</first><last>Li</last></author>
       <author><first>Bo</first><last>Wang</last></author>
       <pages>125–128</pages>
@@ -1460,7 +1460,7 @@
     </paper>
     <paper id="37">
       <title>Hidden <fixed-case>M</fixed-case>arkov Tree Model in Dependency-based Machine Translation</title>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
       <author><first>Martin</first><last>Popel</last></author>
       <pages>145–148</pages>
       <url hash="3cd59705">P09-2037</url>
@@ -1469,7 +1469,7 @@
     <paper id="38">
       <title>Word to Sentence Level Emotion Tagging for <fixed-case>B</fixed-case>engali Blogs</title>
       <author><first>Dipankar</first><last>Das</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>149–152</pages>
       <url hash="6645dc68">P09-2038</url>
       <bibkey>das-bandyopadhyay-2009-word</bibkey>
@@ -1484,18 +1484,18 @@
     </paper>
     <paper id="40">
       <title>Opinion and Generic Question Answering Systems: a Performance Analysis</title>
-      <author><first>Alexandra</first><last>Balahur</last></author>
+      <author id="alexandra-balahur"><first>Alexandra</first><last>Balahur</last></author>
       <author><first>Ester</first><last>Boldrini</last></author>
-      <author><first>Andrés</first><last>Montoyo</last></author>
-      <author><first>Patricio</first><last>Martínez-Barco</last></author>
+      <author id="andres-montoyo"><first>Andrés</first><last>Montoyo</last></author>
+      <author id="patricio-martinez-barco"><first>Patricio</first><last>Martínez-Barco</last></author>
       <pages>157–160</pages>
       <url hash="2582b78e">P09-2040</url>
       <bibkey>balahur-etal-2009-opinion</bibkey>
     </paper>
     <paper id="41">
       <title>Automatic Satire Detection: Are You Having a Laugh?</title>
-      <author><first>Clint</first><last>Burfoot</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="clint-burfoot"><first>Clint</first><last>Burfoot</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>161–164</pages>
       <url hash="882419b6">P09-2041</url>
       <bibkey>burfoot-baldwin-2009-automatic</bibkey>
@@ -1504,7 +1504,7 @@
       <title>Hierarchical Multi-Label Text Categorization with Global Margin Maximization</title>
       <author><first>Xipeng</first><last>Qiu</last></author>
       <author><first>Wenjun</first><last>Gao</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>165–168</pages>
       <url hash="a793b081">P09-2042</url>
       <bibkey>qiu-etal-2009-hierarchical</bibkey>
@@ -1512,7 +1512,7 @@
     <paper id="43">
       <title>Toward finer-grained sentiment identification in product reviews through linguistic and ontological analyses</title>
       <author><first>Hye-Jin</first><last>Min</last></author>
-      <author><first>Jong C.</first><last>Park</last></author>
+      <author id="jong-c-park"><first>Jong C.</first><last>Park</last></author>
       <pages>169–172</pages>
       <url hash="e4dd2189">P09-2043</url>
       <bibkey>min-park-2009-toward</bibkey>
@@ -1536,18 +1536,18 @@
     <paper id="46">
       <title>An Ontology-Based Approach for Key Phrase Extraction</title>
       <author><first>Chau</first><last>Q. Nguyen</last></author>
-      <author><first>Tuoi</first><last>T. Phan</last></author>
+      <author id="tuoi-thi-phan"><first>Tuoi</first><last>T. Phan</last></author>
       <pages>181–184</pages>
       <url hash="b654da50">P09-2046</url>
       <bibkey>q-nguyen-t-phan-2009-ontology</bibkey>
     </paper>
     <paper id="47">
       <title>Query Segmentation Based on Eigenspace Similarity</title>
-      <author><first>Chao</first><last>Zhang</last></author>
+      <author id="chao-zhang-tu"><first>Chao</first><last>Zhang</last></author>
       <author><first>Nan</first><last>Sun</last></author>
       <author><first>Xia</first><last>Hu</last></author>
       <author><first>Tingzhu</first><last>Huang</last></author>
-      <author><first>Tat-Seng</first><last>Chua</last></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last></author>
       <pages>185–188</pages>
       <url hash="d01cf54a">P09-2047</url>
       <bibkey>zhang-etal-2009-query</bibkey>
@@ -1584,7 +1584,7 @@
     </paper>
     <paper id="51">
       <title>Mining Association Language Patterns for Negative Life Event Classification</title>
-      <author><first>Liang-Chih</first><last>Yu</last></author>
+      <author id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last></author>
       <author><first>Chien-Lung</first><last>Chan</last></author>
       <author><first>Chung-Hsien</first><last>Wu</last></author>
       <author><first>Chao-Cheng</first><last>Lin</last></author>
@@ -1606,7 +1606,7 @@
     </paper>
     <paper id="53">
       <title>Play the Language: Play Coreference</title>
-      <author><first>Barbora</first><last>Hladká</last></author>
+      <author id="barbora-hladka"><first>Barbora</first><last>Hladká</last></author>
       <author><first>Jiří</first><last>Mírovský</last></author>
       <author><first>Pavel</first><last>Schlesinger</last></author>
       <pages>209–212</pages>
@@ -1616,8 +1616,8 @@
     <paper id="54">
       <title><fixed-case>C</fixed-case>hinese Term Extraction Using Different Types of Relevance</title>
       <author><first>Yuhang</first><last>Yang</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <author><first>Dequan</first><last>Zheng</last></author>
       <author><first>Hao</first><last>Yu</last></author>
       <pages>213–216</pages>
@@ -1626,8 +1626,8 @@
     </paper>
     <paper id="55">
       <title>i<fixed-case>C</fixed-case>hi: a bilingual dictionary generating tool</title>
-      <author><first>István</first><last>Varga</last></author>
-      <author><first>Shoichi</first><last>Yokoyama</last></author>
+      <author id="istvan-varga"><first>István</first><last>Varga</last></author>
+      <author id="shoichi-yokoyama"><first>Shoichi</first><last>Yokoyama</last></author>
       <pages>217–220</pages>
       <url hash="537f60cd">P09-2055</url>
       <bibkey>varga-yokoyama-2009-ichi</bibkey>
@@ -1635,14 +1635,14 @@
     <paper id="56">
       <title><fixed-case>CAT</fixed-case>i<fixed-case>B</fixed-case>: The <fixed-case>C</fixed-case>olumbia <fixed-case>A</fixed-case>rabic Treebank</title>
       <author><first>Nizar</first><last>Habash</last></author>
-      <author><first>Ryan</first><last>Roth</last></author>
+      <author id="ryan-roth"><first>Ryan</first><last>Roth</last></author>
       <pages>221–224</pages>
       <url hash="0cedfaaf">P09-2056</url>
       <bibkey>habash-roth-2009-catib</bibkey>
     </paper>
     <paper id="57">
       <title>A Beam-Search Extraction Algorithm for Comparable Data</title>
-      <author><first>Christoph</first><last>Tillmann</last></author>
+      <author id="christoph-tillmann"><first>Christoph</first><last>Tillmann</last></author>
       <pages>225–228</pages>
       <url hash="992c8d11">P09-2057</url>
       <bibkey>tillmann-2009-beam</bibkey>
@@ -1659,7 +1659,7 @@
       <title>Bridging Morpho-Syntactic Gap between Source and Target Sentences for <fixed-case>E</fixed-case>nglish-<fixed-case>K</fixed-case>orean Statistical Machine Translation</title>
       <author><first>Gumwon</first><last>Hong</last></author>
       <author><first>Seung-Wook</first><last>Lee</last></author>
-      <author><first>Hae-Chang</first><last>Rim</last></author>
+      <author id="hae-chang-rim"><first>Hae-Chang</first><last>Rim</last></author>
       <pages>233–236</pages>
       <url hash="4260d6a7">P09-2059</url>
       <bibkey>hong-etal-2009-bridging</bibkey>
@@ -1684,7 +1684,7 @@
     </paper>
     <paper id="62">
       <title>Syntax is from <fixed-case>M</fixed-case>ars while Semantics from <fixed-case>V</fixed-case>enus! Insights from Spectral Analysis of Distributional Similarity Networks</title>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <author><first>Monojit</first><last>Choudhury</last></author>
       <author><first>Animesh</first><last>Mukherjee</last></author>
       <pages>245–248</pages>
@@ -1703,7 +1703,7 @@
     </paper>
     <paper id="64">
       <title>Prediction of Thematic Rank for Structured Semantic Role Labeling</title>
-      <author><first>Weiwei</first><last>Sun</last></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last></author>
       <author><first>Zhifang</first><last>Sui</last></author>
       <author><first>Meng</first><last>Wang</last></author>
       <pages>253–256</pages>
@@ -1712,8 +1712,8 @@
     </paper>
     <paper id="65">
       <title>Transfer Learning, Feature Selection and Word Sense Disambiguation</title>
-      <author><first>Paramveer S.</first><last>Dhillon</last></author>
-      <author><first>Lyle H.</first><last>Ungar</last></author>
+      <author id="paramveer-s-dhillon"><first>Paramveer S.</first><last>Dhillon</last></author>
+      <author id="lyle-ungar"><first>Lyle H.</first><last>Ungar</last></author>
       <pages>257–260</pages>
       <url hash="f8733dfa">P09-2065</url>
       <bibkey>dhillon-ungar-2009-transfer</bibkey>
@@ -1728,16 +1728,16 @@
     </paper>
     <paper id="67">
       <title>Automatic Story Segmentation using a <fixed-case>B</fixed-case>ayesian Decision Framework for Statistical Models of Lexical Chain Features</title>
-      <author><first>Wai-Kit</first><last>Lo</last></author>
+      <author id="wai-kit-lo"><first>Wai-Kit</first><last>Lo</last></author>
       <author><first>Wenying</first><last>Xiong</last></author>
-      <author><first>Helen</first><last>Meng</last></author>
+      <author id="helen-meng"><first>Helen</first><last>Meng</last></author>
       <pages>265–268</pages>
       <url hash="0cc8078a">P09-2067</url>
       <bibkey>lo-etal-2009-automatic</bibkey>
     </paper>
     <paper id="68">
       <title>Investigating Pitch Accent Recognition in Non-native Speech</title>
-      <author><first>Gina-Anne</first><last>Levow</last></author>
+      <author id="gina-anne-levow"><first>Gina-Anne</first><last>Levow</last></author>
       <pages>269–272</pages>
       <url hash="0eb17330">P09-2068</url>
       <bibkey>levow-2009-investigating</bibkey>
@@ -1746,14 +1746,14 @@
       <title>A Stochastic Finite-State Morphological Parser for <fixed-case>T</fixed-case>urkish</title>
       <author><first>Haşim</first><last>Sak</last></author>
       <author><first>Tunga</first><last>Güngör</last></author>
-      <author><first>Murat</first><last>Saraçlar</last></author>
+      <author id="murat-saraclar"><first>Murat</first><last>Saraçlar</last></author>
       <pages>273–276</pages>
       <url hash="a9e937a4">P09-2069</url>
       <bibkey>sak-etal-2009-stochastic</bibkey>
     </paper>
     <paper id="70">
       <title>Parsing Speech Repair without Specialized Grammar Symbols</title>
-      <author><first>Tim</first><last>Miller</last></author>
+      <author id="timothy-miller"><first>Tim</first><last>Miller</last></author>
       <author><first>Luan</first><last>Nguyen</last></author>
       <author><first>William</first><last>Schuler</last></author>
       <pages>277–280</pages>
@@ -1763,8 +1763,8 @@
     <paper id="71">
       <title>Efficient Inference of <fixed-case>CRF</fixed-case>s for Large-Scale Natural Language Data</title>
       <author><first>Minwoo</first><last>Jeong</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
-      <author><first>Gary Geunbae</first><last>Lee</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="gary-geunbae-lee"><first>Gary Geunbae</first><last>Lee</last></author>
       <pages>281–284</pages>
       <url hash="de658e1b">P09-2071</url>
       <bibkey>jeong-etal-2009-efficient</bibkey>
@@ -1788,7 +1788,7 @@
     </paper>
     <paper id="74">
       <title><fixed-case>M</fixed-case>arkov Random Topic Fields</title>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <pages>293–296</pages>
       <url hash="71d10d66">P09-2074</url>
       <bibkey>daume-iii-2009-markov</bibkey>
@@ -1807,12 +1807,12 @@
       <title>Validating the web-based evaluation of <fixed-case>NLG</fixed-case> systems</title>
       <author><first>Alexander</first><last>Koller</last></author>
       <author><first>Kristina</first><last>Striegnitz</last></author>
-      <author><first>Donna</first><last>Byron</last></author>
+      <author id="donna-byron"><first>Donna</first><last>Byron</last></author>
       <author><first>Justine</first><last>Cassell</last></author>
       <author><first>Robert</first><last>Dale</last></author>
       <author><first>Sara</first><last>Dalzel-Job</last></author>
-      <author><first>Johanna</first><last>Moore</last></author>
-      <author><first>Jon</first><last>Oberlander</last></author>
+      <author id="johanna-d-moore"><first>Johanna</first><last>Moore</last></author>
+      <author id="jon-oberlander"><first>Jon</first><last>Oberlander</last></author>
       <pages>301–304</pages>
       <url hash="37730e5d">P09-2076</url>
       <bibkey>koller-etal-2009-validating</bibkey>
@@ -1826,7 +1826,7 @@
     </paper>
     <paper id="78">
       <title>The Lie Detector: Explorations in the Automatic Recognition of Deceptive Language</title>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <author><first>Carlo</first><last>Strapparava</last></author>
       <pages>309–312</pages>
       <url hash="e355fd35">P09-2078</url>
@@ -1835,7 +1835,7 @@
     <paper id="79">
       <title>Generalizing Dependency Features for Opinion Mining</title>
       <author><first>Mahesh</first><last>Joshi</last></author>
-      <author><first>Carolyn</first><last>Penstein-Rosé</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Penstein-Rosé</last></author>
       <pages>313–316</pages>
       <url hash="05cab7aa">P09-2079</url>
       <bibkey>joshi-penstein-rose-2009-generalizing</bibkey>
@@ -1844,7 +1844,7 @@
       <title>Graph Ranking for Sentiment Transfer</title>
       <author><first>Qiong</first><last>Wu</last></author>
       <author><first>Songbo</first><last>Tan</last></author>
-      <author><first>Xueqi</first><last>Cheng</last></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last></author>
       <pages>317–320</pages>
       <url hash="95335213">P09-2080</url>
       <bibkey>wu-etal-2009-graph</bibkey>
@@ -1853,7 +1853,7 @@
       <title>The Contribution of Stylistic Information to Content-based Mobile Spam Filtering</title>
       <author><first>Dae-Neung</first><last>Sohn</last></author>
       <author><first>Jung-Tae</first><last>Lee</last></author>
-      <author><first>Hae-Chang</first><last>Rim</last></author>
+      <author id="hae-chang-rim"><first>Hae-Chang</first><last>Rim</last></author>
       <pages>321–324</pages>
       <url hash="1e95d9cf">P09-2081</url>
       <bibkey>sohn-etal-2009-contribution</bibkey>
@@ -1870,25 +1870,25 @@
     <paper id="83">
       <title>Do Automatic Annotation Techniques Have Any Impact on Supervised Complex Question Answering?</title>
       <author><first>Yllias</first><last>Chali</last></author>
-      <author><first>Sadid</first><last>Hasan</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="sadid-a-hasan"><first>Sadid</first><last>Hasan</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <pages>329–332</pages>
       <url hash="636bf8e9">P09-2083</url>
       <bibkey>chali-etal-2009-automatic</bibkey>
     </paper>
     <paper id="84">
       <title>Where’s the Verb? Correcting Machine Translation During Question Answering</title>
-      <author><first>Wei-Yun</first><last>Ma</last></author>
-      <author><first>Kathy</first><last>McKeown</last></author>
+      <author id="wei-yun-ma"><first>Wei-Yun</first><last>Ma</last></author>
+      <author id="kathleen-mckeown"><first>Kathy</first><last>McKeown</last></author>
       <pages>333–336</pages>
       <url hash="0a1828ae">P09-2084</url>
       <bibkey>ma-mckeown-2009-wheres</bibkey>
     </paper>
     <paper id="85">
       <title>A Note on the Implementation of Hierarchical <fixed-case>D</fixed-case>irichlet Processes</title>
-      <author><first>Phil</first><last>Blunsom</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
-      <author><first>Sharon</first><last>Goldwater</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
       <author><first>Mark</first><last>Johnson</last></author>
       <pages>337–340</pages>
       <url hash="74573abb">P09-2085</url>
@@ -1906,14 +1906,14 @@
     <paper id="87">
       <title>Modeling Morphologically Rich Languages Using Split Words and Unstructured Dependencies</title>
       <author><first>Deniz</first><last>Yuret</last></author>
-      <author><first>Ergun</first><last>Biçici</last></author>
+      <author id="ergun-bicici"><first>Ergun</first><last>Biçici</last></author>
       <pages>345–348</pages>
       <url hash="a6e87340">P09-2087</url>
       <bibkey>yuret-bicici-2009-modeling</bibkey>
     </paper>
     <paper id="88">
       <title>Improved Smoothing for N-gram Language Models Based on Ordinary Counts</title>
-      <author><first>Robert C.</first><last>Moore</last></author>
+      <author id="robert-c-moore"><first>Robert C.</first><last>Moore</last></author>
       <author><first>Chris</first><last>Quirk</last></author>
       <pages>349–352</pages>
       <url hash="f9f22b1d">P09-2088</url>
@@ -1922,21 +1922,21 @@
     <paper id="89">
       <title>Updating a Name Tagger Using Contemporary Unlabeled Data</title>
       <author><first>Cristina</first><last>Mota</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <pages>353–356</pages>
       <url hash="1fc6ac28">P09-2089</url>
       <bibkey>mota-grishman-2009-updating</bibkey>
     </paper>
     <paper id="90">
       <title><fixed-case>A</fixed-case>rabic Cross-Document Coreference Resolution</title>
-      <author><first>Asad</first><last>Sayeed</last></author>
+      <author id="asad-sayeed"><first>Asad</first><last>Sayeed</last></author>
       <author><first>Tamer</first><last>Elsayed</last></author>
       <author><first>Nikesh</first><last>Garera</last></author>
       <author><first>David</first><last>Alexander</last></author>
       <author><first>Tan</first><last>Xu</last></author>
-      <author><first>Doug</first><last>Oard</last></author>
+      <author id="douglas-w-oard"><first>Doug</first><last>Oard</last></author>
       <author><first>David</first><last>Yarowsky</last></author>
-      <author><first>Christine</first><last>Piatko</last></author>
+      <author id="christine-piatko"><first>Christine</first><last>Piatko</last></author>
       <pages>357–360</pages>
       <url hash="fec5994e">P09-2090</url>
       <bibkey>sayeed-etal-2009-arabic</bibkey>
@@ -1945,7 +1945,7 @@
       <title>The Impact of Query Refinement in the Web People Search Task</title>
       <author><first>Javier</first><last>Artiles</last></author>
       <author><first>Julio</first><last>Gonzalo</last></author>
-      <author><first>Enrique</first><last>Amigó</last></author>
+      <author id="enrique-amigo"><first>Enrique</first><last>Amigó</last></author>
       <pages>361–364</pages>
       <url hash="9df1bb4c">P09-2091</url>
       <bibkey>artiles-etal-2009-impact</bibkey>
@@ -1954,7 +1954,7 @@
       <title>Composite Kernels For Relation Extraction</title>
       <author><first>Frank</first><last>Reichartz</last></author>
       <author><first>Hannes</first><last>Korte</last></author>
-      <author><first>Gerhard</first><last>Paass</last></author>
+      <author id="gerhard-paass"><first>Gerhard</first><last>Paass</last></author>
       <pages>365–368</pages>
       <url hash="85c6db2e">P09-2092</url>
       <bibkey>reichartz-etal-2009-composite</bibkey>
@@ -1974,8 +1974,8 @@
       <url hash="78a26dc9">P09-3</url>
       <editor><first>Brian</first><last>Roark</last></editor>
       <editor><first>Grace</first><last>Ngai</last></editor>
-      <editor><first>Davis Muhajereen D.</first><last>Dimalen</last></editor>
-      <editor><first>Jenny Rose</first><last>Finkel</last></editor>
+      <editor id="davis-muhajereen-d-dimalen"><first>Davis Muhajereen D.</first><last>Dimalen</last></editor>
+      <editor id="jenny-rose-finkel"><first>Jenny Rose</first><last>Finkel</last></editor>
       <editor><first>Blaise</first><last>Thomson</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Suntec, Singapore</address>
@@ -1997,9 +1997,9 @@
     </paper>
     <paper id="2">
       <title>Insights into Non-projectivity in <fixed-case>H</fixed-case>indi</title>
-      <author><first>Prashanth</first><last>Mannem</last></author>
-      <author><first>Himani</first><last>Chaudhry</last></author>
-      <author><first>Akshar</first><last>Bharati</last></author>
+      <author id="prashanth-mannem"><first>Prashanth</first><last>Mannem</last></author>
+      <author id="himani-chaudhry"><first>Himani</first><last>Chaudhry</last></author>
+      <author id="akshar-bharati"><first>Akshar</first><last>Bharati</last></author>
       <pages>10–17</pages>
       <url hash="5d07ed9e">P09-3002</url>
       <bibkey>mannem-etal-2009-insights</bibkey>
@@ -2074,7 +2074,7 @@
     </paper>
     <paper id="12">
       <title>Creating a Gold Standard for Sentence Clustering in Multi-Document Summarization</title>
-      <author><first>Johanna</first><last>Geiss</last></author>
+      <author id="johanna-geiss"><first>Johanna</first><last>Geiss</last></author>
       <pages>96–104</pages>
       <url hash="a6406076">P09-3012</url>
       <bibkey>geiss-2009-creating</bibkey>
@@ -2084,8 +2084,8 @@
     <meta>
       <booktitle>Proceedings of the <fixed-case>ACL</fixed-case>-<fixed-case>IJCNLP</fixed-case> 2009 Software Demonstrations</booktitle>
       <url hash="e1c91b8c">P09-4</url>
-      <editor><first>Gary Geunbae</first><last>Lee</last></editor>
-      <editor><first>Sabine</first><last>Schulte im Walde</last></editor>
+      <editor id="gary-geunbae-lee"><first>Gary Geunbae</first><last>Lee</last></editor>
+      <editor id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Suntec, Singapore</address>
       <month>August</month>
@@ -2112,12 +2112,12 @@
     </paper>
     <paper id="2">
       <title><fixed-case>LX</fixed-case>-Center: a center of online linguistic services</title>
-      <author><first>António</first><last>Branco</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
       <author><first>Francisco</first><last>Costa</last></author>
       <author><first>Eduardo</first><last>Ferreira</last></author>
       <author><first>Pedro</first><last>Martins</last></author>
       <author><first>Filipe</first><last>Nunes</last></author>
-      <author><first>João</first><last>Silva</last></author>
+      <author id="joao-silva"><first>João</first><last>Silva</last></author>
       <author><first>Sara</first><last>Silveira</last></author>
       <pages>5–8</pages>
       <url hash="a6057e92">P09-4002</url>
@@ -2125,8 +2125,8 @@
     </paper>
     <paper id="3">
       <title>A Tool for Deep Semantic Encoding of Narrative Texts</title>
-      <author><first>David K.</first><last>Elson</last></author>
-      <author><first>Kathleen R.</first><last>McKeown</last></author>
+      <author id="david-elson"><first>David K.</first><last>Elson</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen R.</first><last>McKeown</last></author>
       <pages>9–12</pages>
       <url hash="18c8e00b">P09-4003</url>
       <bibkey>elson-mckeown-2009-tool</bibkey>
@@ -2148,9 +2148,9 @@
     </paper>
     <paper id="6">
       <title><fixed-case>MARS</fixed-case>: Multilingual Access and Retrieval System with Enhanced Query Translation and Document Retrieval</title>
-      <author><first>Lianhau</first><last>Lee</last></author>
-      <author><first>Aiti</first><last>Aw</last></author>
-      <author><first>Thuy</first><last>Vu</last></author>
+      <author id="lianhau-lee"><first>Lianhau</first><last>Lee</last></author>
+      <author id="aiti-aw"><first>Aiti</first><last>Aw</last></author>
+      <author id="thuy-vu"><first>Thuy</first><last>Vu</last></author>
       <author><first>Sharifah Aljunied</first><last>Mahani</last></author>
       <author><first>Min</first><last>Zhang</last></author>
       <author><first>Haizhou</first><last>Li</last></author>
@@ -2162,21 +2162,21 @@
       <title>Demonstration of <fixed-case>J</fixed-case>oshua: An Open Source Toolkit for Parsing-based Machine Translation</title>
       <author><first>Zhifei</first><last>Li</last></author>
       <author><first>Chris</first><last>Callison-Burch</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <author><first>Juri</first><last>Ganitkevitch</last></author>
-      <author><first>Sanjeev</first><last>Khudanpur</last></author>
+      <author id="sanjeev-khudanpur"><first>Sanjeev</first><last>Khudanpur</last></author>
       <author><first>Lane</first><last>Schwartz</last></author>
       <author><first>Wren N. G.</first><last>Thornton</last></author>
       <author><first>Jonathan</first><last>Weese</last></author>
-      <author><first>Omar F.</first><last>Zaidan</last></author>
+      <author id="omar-zaidan"><first>Omar F.</first><last>Zaidan</last></author>
       <pages>25–28</pages>
       <url hash="e3ed8a55">P09-4007</url>
       <bibkey>li-etal-2009-demonstration</bibkey>
     </paper>
     <paper id="8">
       <title><fixed-case>W</fixed-case>iki<fixed-case>BABEL</fixed-case>: A <fixed-case>W</fixed-case>iki-style Platform for Creation of Parallel Data</title>
-      <author><first>A</first><last>Kumaran</last></author>
-      <author><first>K</first><last>Saravanan</last></author>
+      <author id="a-kumaran"><first>A</first><last>Kumaran</last></author>
+      <author id="k-saravanan"><first>K</first><last>Saravanan</last></author>
       <author><first>Naren</first><last>Datha</last></author>
       <author><first>B</first><last>Ashok</last></author>
       <author><first>Vikram</first><last>Dendi</last></author>
@@ -2205,7 +2205,7 @@
       <author><first>Sebastian</first><last>Varges</last></author>
       <author><first>Silvia</first><last>Quarteroni</last></author>
       <author><first>Giuseppe</first><last>Riccardi</last></author>
-      <author><first>Alexei V.</first><last>Ivanov</last></author>
+      <author id="alexei-v-ivanov"><first>Alexei V.</first><last>Ivanov</last></author>
       <author><first>Pierluigi</first><last>Roberti</last></author>
       <pages>41–44</pages>
       <url hash="839fb0c2">P09-4011</url>
@@ -2216,8 +2216,8 @@
     <meta>
       <booktitle>Tutorial Abstracts of <fixed-case>ACL</fixed-case>-<fixed-case>IJCNLP</fixed-case> 2009</booktitle>
       <url hash="bb9a091b">P09-5</url>
-      <editor><first>Diana</first><last>McCarthy</last></editor>
-      <editor><first>Chengqing</first><last>Zong</last></editor>
+      <editor id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></editor>
+      <editor id="chengqing-zong"><first>Chengqing</first><last>Zong</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Suntec, Singapore</address>
       <month>August</month>
@@ -2232,7 +2232,7 @@
     <paper id="1">
       <title>Fundamentals of <fixed-case>C</fixed-case>hinese Language Processing</title>
       <author><first>Chu-Ren</first><last>Huang</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <pages>1</pages>
       <url hash="37c61d58">P09-5001</url>
       <bibkey>huang-lu-2009-fundamentals</bibkey>
@@ -2247,7 +2247,7 @@
     </paper>
     <paper id="3">
       <title>Semantic Role Labeling: Past, Present and Future</title>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <pages>3</pages>
       <url hash="a7c2798b">P09-5003</url>
       <bibkey>marquez-2009-semantic</bibkey>
@@ -2268,8 +2268,8 @@
     </paper>
     <paper id="6">
       <title>State-of-the-art <fixed-case>NLP</fixed-case> Approaches to Coreference Resolution: Theory and Practical Recipes</title>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>6</pages>
       <url hash="68083699">P09-5006</url>
       <bibkey>ponzetto-poesio-2009-state</bibkey>
diff --git a/data/xml/P10.xml b/data/xml/P10.xml
index 06ae476425..a2da7c64d0 100644
--- a/data/xml/P10.xml
+++ b/data/xml/P10.xml
@@ -4,8 +4,8 @@
     <meta>
       <booktitle>Proceedings of the 48th Annual Meeting of the Association for Computational Linguistics</booktitle>
       <url hash="2736e8be">P10-1</url>
-      <editor><first>Jan</first><last>Hajič</last></editor>
-      <editor><first>Sandra</first><last>Carberry</last></editor>
+      <editor id="jan-hajic"><first>Jan</first><last>Hajič</last></editor>
+      <editor id="sandra-carberry"><first>Sandra</first><last>Carberry</last></editor>
       <editor><first>Stephen</first><last>Clark</last></editor>
       <editor><first>Joakim</first><last>Nivre</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -21,7 +21,7 @@
     <paper id="1">
       <title>Efficient Third-Order Dependency Parsers</title>
       <author><first>Terry</first><last>Koo</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <pages>1–11</pages>
       <url hash="f797c22e">P10-1001</url>
       <bibkey>koo-collins-2010-efficient</bibkey>
@@ -37,7 +37,7 @@
     <paper id="3">
       <title>Bitext Dependency Parsing with Bilingual Subtree Constraints</title>
       <author><first>Wenliang</first><last>Chen</last></author>
-      <author><first>Jun’ichi</first><last>Kazama</last></author>
+      <author id="junichi-kazama"><first>Jun’ichi</first><last>Kazama</last></author>
       <author><first>Kentaro</first><last>Torisawa</last></author>
       <pages>21–29</pages>
       <url hash="fe47ed79">P10-1003</url>
@@ -76,7 +76,7 @@
     </paper>
     <paper id="8">
       <title>Learning to Adapt to Unknown Users: Referring Expression Generation in Spoken Dialogue Systems</title>
-      <author><first>Srinivasan</first><last>Janarthanam</last></author>
+      <author id="srinivasan-janarthanam"><first>Srinivasan</first><last>Janarthanam</last></author>
       <author><first>Oliver</first><last>Lemon</last></author>
       <pages>69–78</pages>
       <url hash="c70a5ded">P10-1008</url>
@@ -92,7 +92,7 @@
     </paper>
     <paper id="10">
       <title>The Human Language Project: Building a Universal Corpus of the World’s Languages</title>
-      <author><first>Steven</first><last>Abney</last></author>
+      <author id="steven-abney"><first>Steven</first><last>Abney</last></author>
       <author><first>Steven</first><last>Bird</last></author>
       <pages>88–97</pages>
       <url hash="cdc6e0bf">P10-1010</url>
@@ -108,8 +108,8 @@
     </paper>
     <paper id="12">
       <title>Automatic Evaluation Method for Machine Translation Using Noun-Phrase Chunking</title>
-      <author><first>Hiroshi</first><last>Echizen-ya</last></author>
-      <author><first>Kenji</first><last>Araki</last></author>
+      <author id="hiroshi-echizen-ya"><first>Hiroshi</first><last>Echizen-ya</last></author>
+      <author id="kenji-araki"><first>Kenji</first><last>Araki</last></author>
       <pages>108–117</pages>
       <url hash="8c67d8ef">P10-1012</url>
       <bibkey>echizen-ya-araki-2010-automatic</bibkey>
@@ -117,7 +117,7 @@
     <paper id="13">
       <title>Open Information Extraction Using <fixed-case>W</fixed-case>ikipedia</title>
       <author><first>Fei</first><last>Wu</last></author>
-      <author><first>Daniel S.</first><last>Weld</last></author>
+      <author id="daniel-s-weld"><first>Daniel S.</first><last>Weld</last></author>
       <pages>118–127</pages>
       <url hash="31f99ec8">P10-1013</url>
       <bibkey>wu-weld-2010-open</bibkey>
@@ -128,7 +128,7 @@
       <author><first>Rajasekar</first><last>Krishnamurthy</last></author>
       <author><first>Yunyao</first><last>Li</last></author>
       <author><first>Sriram</first><last>Raghavan</last></author>
-      <author><first>Frederick</first><last>Reiss</last></author>
+      <author id="frederick-reiss"><first>Frederick</first><last>Reiss</last></author>
       <author><first>Shivakumar</first><last>Vaithyanathan</last></author>
       <pages>128–137</pages>
       <url hash="b5cdccb8">P10-1014</url>
@@ -136,9 +136,9 @@
     </paper>
     <paper id="15">
       <title>Extracting Social Networks from Literary Fiction</title>
-      <author><first>David</first><last>Elson</last></author>
+      <author id="david-elson"><first>David</first><last>Elson</last></author>
       <author><first>Nicholas</first><last>Dames</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>138–147</pages>
       <url hash="00e4f93b">P10-1015</url>
       <bibkey>elson-etal-2010-extracting</bibkey>
@@ -162,8 +162,8 @@
     </paper>
     <paper id="18">
       <title>“Was It Good? It Was Provocative.” Learning the Meaning of Scalar Adjectives</title>
-      <author><first>Marie-Catherine</first><last>de Marneffe</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="marie-catherine-de-marneffe"><first>Marie-Catherine</first><last>de Marneffe</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <author><first>Christopher</first><last>Potts</last></author>
       <pages>167–176</pages>
       <url hash="c460fca0">P10-1018</url>
@@ -171,15 +171,15 @@
     </paper>
     <paper id="19">
       <title>Importance-Driven Turn-Bidding for Spoken Dialogue Systems</title>
-      <author><first>Ethan</first><last>Selfridge</last></author>
-      <author><first>Peter</first><last>Heeman</last></author>
+      <author id="ethan-selfridge"><first>Ethan</first><last>Selfridge</last></author>
+      <author id="peter-a-heeman"><first>Peter</first><last>Heeman</last></author>
       <pages>177–185</pages>
       <url hash="6fee2b69">P10-1019</url>
       <bibkey>selfridge-heeman-2010-importance</bibkey>
     </paper>
     <paper id="20">
       <title>Entity-Based Local Coherence Modelling Using Topological Fields</title>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <author><first>Gerald</first><last>Penn</last></author>
       <pages>186–195</pages>
       <url hash="89ae3190">P10-1020</url>
@@ -198,7 +198,7 @@
     <paper id="22">
       <title>Rebanking <fixed-case>CCG</fixed-case>bank for Improved <fixed-case>NP</fixed-case> Interpretation</title>
       <author><first>Matthew</first><last>Honnibal</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <author><first>Johan</first><last>Bos</last></author>
       <pages>207–215</pages>
       <url hash="f9cdea72">P10-1022</url>
@@ -207,7 +207,7 @@
     <paper id="23">
       <title><fixed-case>B</fixed-case>abel<fixed-case>N</fixed-case>et: Building a Very Large Multilingual Semantic Network</title>
       <author><first>Roberto</first><last>Navigli</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <pages>216–225</pages>
       <url hash="016dbc1b">P10-1023</url>
       <bibkey>navigli-ponzetto-2010-babelnet</bibkey>
@@ -225,14 +225,14 @@
       <author><first>Danilo</first><last>Croce</last></author>
       <author><first>Cristina</first><last>Giannone</last></author>
       <author><first>Paolo</first><last>Annesi</last></author>
-      <author><first>Roberto</first><last>Basili</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
       <pages>237–246</pages>
       <url hash="80ef3984">P10-1025</url>
       <bibkey>croce-etal-2010-towards</bibkey>
     </paper>
     <paper id="26">
       <title>A <fixed-case>B</fixed-case>ayesian Method for Robust Estimation of Distributional Similarities</title>
-      <author><first>Jun’ichi</first><last>Kazama</last></author>
+      <author id="junichi-kazama"><first>Jun’ichi</first><last>Kazama</last></author>
       <author><first>Stijn</first><last>De Saeger</last></author>
       <author><first>Kow</first><last>Kuroda</last></author>
       <author><first>Masaki</first><last>Murata</last></author>
@@ -245,7 +245,7 @@
       <title>Recommendation in <fixed-case>I</fixed-case>nternet Forums and Blogs</title>
       <author><first>Jia</first><last>Wang</last></author>
       <author><first>Qing</first><last>Li</last></author>
-      <author><first>Yuanzhu Peter</first><last>Chen</last></author>
+      <author id="yuanzhu-peter-chen"><first>Yuanzhu Peter</first><last>Chen</last></author>
       <author><first>Zhangxi</first><last>Lin</last></author>
       <pages>257–265</pages>
       <url hash="a051d3e7">P10-1027</url>
@@ -264,16 +264,16 @@
     <paper id="29">
       <title>Inducing Domain-Specific Semantic Class Taggers from (Almost) Nothing</title>
       <author><first>Ruihong</first><last>Huang</last></author>
-      <author><first>Ellen</first><last>Riloff</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
       <pages>275–285</pages>
       <url hash="ef70e7f5">P10-1029</url>
       <bibkey>huang-riloff-2010-inducing</bibkey>
     </paper>
     <paper id="30">
       <title>Learning 5000 Relational Extractors</title>
-      <author><first>Raphael</first><last>Hoffmann</last></author>
+      <author id="raphael-hoffmann"><first>Raphael</first><last>Hoffmann</last></author>
       <author><first>Congle</first><last>Zhang</last></author>
-      <author><first>Daniel S.</first><last>Weld</last></author>
+      <author id="daniel-s-weld"><first>Daniel S.</first><last>Weld</last></author>
       <pages>286–295</pages>
       <url hash="2add4ab2">P10-1030</url>
       <bibkey>hoffmann-etal-2010-learning</bibkey>
@@ -290,7 +290,7 @@
       <title>Exploring Syntactic Structural Features for Sub-Tree Alignment Using Bilingual Tree Kernels</title>
       <author><first>Jun</first><last>Sun</last></author>
       <author><first>Min</first><last>Zhang</last></author>
-      <author><first>Chew Lim</first><last>Tan</last></author>
+      <author id="chew-lim-tan"><first>Chew Lim</first><last>Tan</last></author>
       <pages>306–315</pages>
       <url hash="beaa49ee">P10-1032</url>
       <bibkey>sun-etal-2010-exploring</bibkey>
@@ -308,7 +308,7 @@
       <title>Fine-Grained Tree-to-String Translation Rule Extraction</title>
       <author><first>Xianchao</first><last>Wu</last></author>
       <author><first>Takuya</first><last>Matsuzaki</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>325–334</pages>
       <url hash="038f7fb5">P10-1034</url>
       <bibkey>wu-etal-2010-fine</bibkey>
@@ -327,7 +327,7 @@
       <author><first>Jessika</first><last>Roesner</last></author>
       <author><first>Tim</first><last>Dawborn</last></author>
       <author><first>James</first><last>Haggerty</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <author><first>Stephen</first><last>Clark</last></author>
       <pages>345–355</pages>
       <url hash="d48686d1">P10-1036</url>
@@ -359,8 +359,8 @@
     </paper>
     <paper id="40">
       <title>Word Representations: A Simple and General Method for Semi-Supervised Learning</title>
-      <author><first>Joseph</first><last>Turian</last></author>
-      <author><first>Lev-Arie</first><last>Ratinov</last></author>
+      <author id="joseph-turian"><first>Joseph</first><last>Turian</last></author>
+      <author id="lev-ratinov"><first>Lev-Arie</first><last>Ratinov</last></author>
       <author><first>Yoshua</first><last>Bengio</last></author>
       <pages>384–394</pages>
       <award>ACL 2020 Test of Time Award (10 year)</award>
@@ -369,8 +369,8 @@
     </paper>
     <paper id="41">
       <title>Identifying Text Polarity Using Random Walks</title>
-      <author><first>Ahmed</first><last>Hassan</last></author>
-      <author><first>Dragomir R.</first><last>Radev</last></author>
+      <author id="ahmed-hassan"><first>Ahmed</first><last>Hassan</last></author>
+      <author id="dragomir-radev"><first>Dragomir R.</first><last>Radev</last></author>
       <pages>395–403</pages>
       <url hash="8b7af21d">P10-1041</url>
       <bibkey>hassan-radev-2010-identifying</bibkey>
@@ -387,8 +387,8 @@
       <title>Employing Personal/Impersonal Views in Supervised and Semi-Supervised Sentiment Classification</title>
       <author><first>Shoushan</first><last>Li</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
-      <author><first>Sophia Yat Mei</first><last>Lee</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
+      <author id="sophia-yat-mei-lee"><first>Sophia Yat Mei</first><last>Lee</last></author>
       <pages>414–423</pages>
       <url hash="1f412ce6">P10-1043</url>
       <bibkey>li-etal-2010-employing</bibkey>
@@ -411,8 +411,8 @@
     </paper>
     <paper id="46">
       <title>Improving the Use of Pseudo-Words for Evaluating Selectional Preferences</title>
-      <author><first>Nathanael</first><last>Chambers</last></author>
-      <author><first>Daniel</first><last>Jurafsky</last></author>
+      <author id="nathanael-chambers"><first>Nathanael</first><last>Chambers</last></author>
+      <author id="dan-jurafsky"><first>Daniel</first><last>Jurafsky</last></author>
       <pages>445–453</pages>
       <url hash="82d218e5">P10-1046</url>
       <bibkey>chambers-jurafsky-2010-improving</bibkey>
@@ -429,7 +429,7 @@
       <title><fixed-case>H</fixed-case>indi-to-<fixed-case>U</fixed-case>rdu Machine Translation through Transliteration</title>
       <author><first>Nadir</first><last>Durrani</last></author>
       <author><first>Hassan</first><last>Sajjad</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <author><first>Helmut</first><last>Schmid</last></author>
       <pages>465–474</pages>
       <url hash="7b36db30">P10-1048</url>
@@ -439,7 +439,7 @@
       <title>Training Phrase Translation Models with Leaving-One-Out</title>
       <author><first>Joern</first><last>Wuebker</last></author>
       <author><first>Arne</first><last>Mauser</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>475–484</pages>
       <url hash="782c2f3b">P10-1049</url>
       <bibkey>wuebker-etal-2010-training</bibkey>
@@ -481,7 +481,7 @@
     </paper>
     <paper id="54">
       <title>Optimal Rank Reduction for Linear Context-Free Rewriting Systems with Fan-Out Two</title>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <author><first>Giorgio</first><last>Satta</last></author>
       <pages>525–533</pages>
       <url hash="9358f919">P10-1054</url>
@@ -508,7 +508,7 @@
     <paper id="57">
       <title>Identifying Non-Explicit Citing Sentences for Citation-Based Summarization.</title>
       <author><first>Vahed</first><last>Qazvinian</last></author>
-      <author><first>Dragomir R.</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir R.</first><last>Radev</last></author>
       <pages>555–564</pages>
       <url hash="c81f562c">P10-1057</url>
       <bibkey>qazvinian-radev-2010-identifying</bibkey>
@@ -533,7 +533,7 @@
     <paper id="60">
       <title>Generating Focused Topic-Specific Sentiment Lexicons</title>
       <author><first>Valentin</first><last>Jijkoun</last></author>
-      <author><first>Maarten</first><last>de Rijke</last></author>
+      <author id="maarten-de-rijke"><first>Maarten</first><last>de Rijke</last></author>
       <author><first>Wouter</first><last>Weerkamp</last></author>
       <pages>585–594</pages>
       <url hash="1587c551">P10-1060</url>
@@ -550,7 +550,7 @@
     </paper>
     <paper id="62">
       <title>Error Detection for Statistical Machine Translation Using Linguistic Features</title>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Min</first><last>Zhang</last></author>
       <author><first>Haizhou</first><last>Li</last></author>
       <pages>604–611</pages>
@@ -569,7 +569,7 @@
       <title>Bridging <fixed-case>SMT</fixed-case> and <fixed-case>TM</fixed-case> with Translation Recommendation</title>
       <author><first>Yifan</first><last>He</last></author>
       <author><first>Yanjun</first><last>Ma</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <pages>622–630</pages>
       <url hash="07b6e15e">P10-1064</url>
@@ -578,7 +578,7 @@
     <paper id="65">
       <title>On Jointly Recognizing and Aligning Bilingual Named Entities</title>
       <author><first>Yufeng</first><last>Chen</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <author><first>Keh-Yih</first><last>Su</last></author>
       <pages>631–639</pages>
       <url hash="930403f9">P10-1065</url>
@@ -596,7 +596,7 @@
     <paper id="67">
       <title>Comparable Entity Mining from Comparative Questions</title>
       <author><first>Shasha</first><last>Li</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <author><first>Young-In</first><last>Song</last></author>
       <author><first>Zhoujun</first><last>Li</last></author>
       <pages>650–658</pages>
@@ -613,7 +613,7 @@
     <paper id="69">
       <title>Temporal Information Processing of a New Language: Fast Porting with Minimal Resources</title>
       <author><first>Francisco</first><last>Costa</last></author>
-      <author><first>António</first><last>Branco</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
       <pages>671–677</pages>
       <url hash="e6970e04">P10-1069</url>
       <bibkey>costa-branco-2010-temporal</bibkey>
@@ -621,7 +621,7 @@
     <paper id="70">
       <title>A Taxonomy, Dataset, and Classifier for Automatic Noun Compound Interpretation</title>
       <author><first>Stephen</first><last>Tratz</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>678–687</pages>
       <url hash="40d871cf">P10-1070</url>
       <bibkey>tratz-hovy-2010-taxonomy</bibkey>
@@ -643,17 +643,17 @@
     </paper>
     <paper id="73">
       <title>Kernel Based Discourse Relation Recognition with Temporal Ordering Information</title>
-      <author><first>WenTing</first><last>Wang</last></author>
+      <author id="wen-ting-wang"><first>WenTing</first><last>Wang</last></author>
       <author><first>Jian</first><last>Su</last></author>
-      <author><first>Chew Lim</first><last>Tan</last></author>
+      <author id="chew-lim-tan"><first>Chew Lim</first><last>Tan</last></author>
       <pages>710–719</pages>
       <url hash="2d4713a7">P10-1073</url>
       <bibkey>wang-etal-2010-kernel</bibkey>
     </paper>
     <paper id="74">
       <title>Hierarchical Joint Learning: Improving Joint Parsing and Named Entity Recognition with Non-Jointly Labeled Data</title>
-      <author><first>Jenny Rose</first><last>Finkel</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="jenny-rose-finkel"><first>Jenny Rose</first><last>Finkel</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>720–728</pages>
       <url hash="10963207">P10-1074</url>
       <bibkey>finkel-manning-2010-hierarchical</bibkey>
@@ -696,8 +696,8 @@
       <title>A Hybrid Rule/Model-Based Finite-State Framework for Normalizing <fixed-case>SMS</fixed-case> Messages</title>
       <author><first>Richard</first><last>Beaufort</last></author>
       <author><first>Sophie</first><last>Roekhaut</last></author>
-      <author><first>Louise-Amélie</first><last>Cougnon</last></author>
-      <author><first>Cédrick</first><last>Fairon</last></author>
+      <author id="louise-amelie-cougnon"><first>Louise-Amélie</first><last>Cougnon</last></author>
+      <author id="cedrick-fairon"><first>Cédrick</first><last>Fairon</last></author>
       <pages>770–779</pages>
       <url hash="328a2543">P10-1079</url>
       <bibkey>beaufort-etal-2010-hybrid</bibkey>
@@ -713,7 +713,7 @@
     <paper id="81">
       <title>Using Document Level Cross-Event Inference to Improve Event Extraction</title>
       <author><first>Shasha</first><last>Liao</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <pages>789–797</pages>
       <url hash="c18da458">P10-1081</url>
       <bibkey>liao-grishman-2010-using</bibkey>
@@ -728,7 +728,7 @@
     <paper id="83">
       <title>Learning to Follow Navigational Directions</title>
       <author><first>Adam</first><last>Vogel</last></author>
-      <author><first>Daniel</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Daniel</first><last>Jurafsky</last></author>
       <pages>806–814</pages>
       <url hash="780f82fb">P10-1083</url>
       <bibkey>vogel-jurafsky-2010-learning</bibkey>
@@ -736,7 +736,7 @@
     <paper id="84">
       <title>A Hybrid Hierarchical Model for Multi-Document Summarization</title>
       <author><first>Asli</first><last>Celikyilmaz</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></author>
       <pages>815–824</pages>
       <url hash="49c9b985">P10-1084</url>
       <bibkey>celikyilmaz-hakkani-tur-2010-hybrid</bibkey>
@@ -838,7 +838,7 @@
     <paper id="96">
       <title><fixed-case>B</fixed-case>ayesian Synchronous Tree-Substitution Grammar Induction and Its Application to Sentence Compression</title>
       <author><first>Elif</first><last>Yamangil</last></author>
-      <author><first>Stuart M.</first><last>Shieber</last></author>
+      <author id="stuart-m-shieber"><first>Stuart M.</first><last>Shieber</last></author>
       <pages>937–947</pages>
       <url hash="599fa80a">P10-1096</url>
       <bibkey>yamangil-shieber-2010-bayesian</bibkey>
@@ -965,7 +965,7 @@
       <author><first>Wolfgang</first><last>Seeker</last></author>
       <author><first>Ines</first><last>Rehbein</last></author>
       <author><first>Jonas</first><last>Kuhn</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>1087–1097</pages>
       <url hash="d094321a">P10-1111</url>
       <bibkey>seeker-etal-2010-hard</bibkey>
@@ -980,8 +980,8 @@
     </paper>
     <paper id="113">
       <title>Joint Syntactic and Semantic Parsing of <fixed-case>C</fixed-case>hinese</title>
-      <author><first>Junhui</first><last>Li</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="junhui-li"><first>Junhui</first><last>Li</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <author><first>Hwee Tou</first><last>Ng</last></author>
       <pages>1108–1117</pages>
       <url hash="8972232e">P10-1113</url>
@@ -999,7 +999,7 @@
       <title>Cross-Lingual Latent Topic Extraction</title>
       <author><first>Duo</first><last>Zhang</last></author>
       <author><first>Qiaozhu</first><last>Mei</last></author>
-      <author><first>ChengXiang</first><last>Zhai</last></author>
+      <author id="chengxiang-zhai"><first>ChengXiang</first><last>Zhai</last></author>
       <pages>1128–1137</pages>
       <url hash="82ed7fb0">P10-1115</url>
       <bibkey>zhang-etal-2010-cross</bibkey>
@@ -1033,7 +1033,7 @@
     <paper id="119">
       <title>A Rational Model of Eye Movement Control in Reading</title>
       <author><first>Klinton</first><last>Bicknell</last></author>
-      <author><first>Roger</first><last>Levy</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
       <pages>1168–1178</pages>
       <url hash="2ec4658d">P10-1119</url>
       <bibkey>bicknell-levy-2010-rational</bibkey>
@@ -1068,7 +1068,7 @@
       <title>Assessing the Role of Discourse References in Entailment Inference</title>
       <author><first>Shachar</first><last>Mirkin</last></author>
       <author><first>Ido</first><last>Dagan</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <pages>1209–1219</pages>
       <url hash="297acb4a">P10-1123</url>
       <bibkey>mirkin-etal-2010-assessing</bibkey>
@@ -1086,8 +1086,8 @@
       <title>Modeling Semantic Relevance for Question-Answer Pairs in Web Social Communities</title>
       <author><first>Baoxun</first><last>Wang</last></author>
       <author><first>Xiaolong</first><last>Wang</last></author>
-      <author><first>Chengjie</first><last>Sun</last></author>
-      <author><first>Bingquan</first><last>Liu</last></author>
+      <author id="cheng-jie-sun"><first>Chengjie</first><last>Sun</last></author>
+      <author id="bingquan-liu"><first>Bingquan</first><last>Liu</last></author>
       <author><first>Lin</first><last>Sun</last></author>
       <pages>1230–1238</pages>
       <url hash="6f1d21e2">P10-1125</url>
@@ -1104,7 +1104,7 @@
     <paper id="127">
       <title>Generating Image Descriptions Using Dependency Relational Patterns</title>
       <author><first>Ahmet</first><last>Aker</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <pages>1250–1258</pages>
       <url hash="39e894c9">P10-1127</url>
       <bibkey>aker-gaizauskas-2010-generating</bibkey>
@@ -1120,8 +1120,8 @@
     </paper>
     <paper id="129">
       <title>Reading between the Lines: Learning to Map High-Level Instructions to Commands</title>
-      <author><first>S.R.K.</first><last>Branavan</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="s-r-k-branavan"><first>S.R.K.</first><last>Branavan</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <author><first>Regina</first><last>Barzilay</last></author>
       <pages>1268–1277</pages>
       <url hash="c8b34f2c">P10-1129</url>
@@ -1129,9 +1129,9 @@
     </paper>
     <paper id="130">
       <title>Profiting from Mark-Up: Hyper-Text Annotations for Guided Parsing</title>
-      <author><first>Valentin I.</first><last>Spitkovsky</last></author>
-      <author><first>Daniel</first><last>Jurafsky</last></author>
-      <author><first>Hiyan</first><last>Alshawi</last></author>
+      <author id="valentin-i-spitkovsky"><first>Valentin I.</first><last>Spitkovsky</last></author>
+      <author id="dan-jurafsky"><first>Daniel</first><last>Jurafsky</last></author>
+      <author id="hiyan-alshawi"><first>Hiyan</first><last>Alshawi</last></author>
       <pages>1278–1287</pages>
       <url hash="74114416">P10-1130</url>
       <bibkey>spitkovsky-etal-2010-profiting</bibkey>
@@ -1164,7 +1164,7 @@
     <paper id="134">
       <title>Learning Word-Class Lattices for Definition and Hypernym Extraction</title>
       <author><first>Roberto</first><last>Navigli</last></author>
-      <author><first>Paola</first><last>Velardi</last></author>
+      <author id="paola-velardi"><first>Paola</first><last>Velardi</last></author>
       <pages>1318–1327</pages>
       <url hash="51fc51d9">P10-1134</url>
       <bibkey>navigli-velardi-2010-learning</bibkey>
@@ -1186,9 +1186,9 @@
     </paper>
     <paper id="137">
       <title>Multilingual Pseudo-Relevance Feedback: Performance Study of Assisting Languages</title>
-      <author><first>Manoj Kumar</first><last>Chinnakotla</last></author>
+      <author id="manoj-chinnakotla"><first>Manoj Kumar</first><last>Chinnakotla</last></author>
       <author><first>Karthik</first><last>Raman</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>1346–1356</pages>
       <url hash="2b4fbf17">P10-1137</url>
       <bibkey>chinnakotla-etal-2010-multilingual</bibkey>
@@ -1207,7 +1207,7 @@
       <author><first>Binyang</first><last>Li</last></author>
       <author><first>Lanjun</first><last>Zhou</last></author>
       <author><first>Shi</first><last>Feng</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <pages>1367–1375</pages>
       <url hash="eb6d1801">P10-1139</url>
       <bibkey>li-etal-2010-unified</bibkey>
@@ -1237,8 +1237,8 @@
     </paper>
     <paper id="143">
       <title>Unsupervised Event Coreference Resolution with Rich Linguistic Features</title>
-      <author><first>Cosmin</first><last>Bejan</last></author>
-      <author><first>Sanda</first><last>Harabagiu</last></author>
+      <author id="cosmin-adrian-bejan"><first>Cosmin</first><last>Bejan</last></author>
+      <author id="sanda-harabagiu"><first>Sanda</first><last>Harabagiu</last></author>
       <pages>1412–1422</pages>
       <url hash="3585dea0">P10-1143</url>
       <bibkey>bejan-harabagiu-2010-unsupervised</bibkey>
@@ -1246,7 +1246,7 @@
     <paper id="144">
       <title>Coreference Resolution across Corpora: Languages, Coding Schemes, and Preprocessing Information</title>
       <author><first>Marta</first><last>Recasens</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>1423–1432</pages>
       <url hash="df9c65ac">P10-1144</url>
       <bibkey>recasens-hovy-2010-coreference</bibkey>
@@ -1278,14 +1278,14 @@
       <title>Detecting Experiences from Weblogs</title>
       <author><first>Keun Chan</first><last>Park</last></author>
       <author><first>Yoonjae</first><last>Jeong</last></author>
-      <author><first>Sung Hyon</first><last>Myaeng</last></author>
+      <author id="sung-hyon-myaeng"><first>Sung Hyon</first><last>Myaeng</last></author>
       <pages>1464–1472</pages>
       <url hash="71051478">P10-1148</url>
       <bibkey>park-etal-2010-detecting</bibkey>
     </paper>
     <paper id="149">
       <title>Experiments in Graph-Based Semi-Supervised Learning Methods for Class-Instance Acquisition</title>
-      <author><first>Partha Pratim</first><last>Talukdar</last></author>
+      <author id="partha-talukdar"><first>Partha Pratim</first><last>Talukdar</last></author>
       <author><first>Fernando</first><last>Pereira</last></author>
       <pages>1473–1481</pages>
       <url hash="441c4bac">P10-1149</url>
@@ -1294,7 +1294,7 @@
     <paper id="150">
       <title>Learning Arguments and Supertypes of Semantic Relations Using Recursive Patterns</title>
       <author><first>Zornitsa</first><last>Kozareva</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>1482–1491</pages>
       <url hash="cc3d20b6">P10-1150</url>
       <bibkey>kozareva-hovy-2010-learning</bibkey>
@@ -1309,8 +1309,8 @@
     </paper>
     <paper id="152">
       <title><fixed-case>V</fixed-case>iterbi Training for <fixed-case>PCFG</fixed-case>s: Hardness Results and Competitiveness of Uniform Initialization</title>
-      <author><first>Shay</first><last>Cohen</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="shay-b-cohen"><first>Shay</first><last>Cohen</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>1502–1511</pages>
       <url hash="137335a7">P10-1152</url>
       <bibkey>cohen-smith-2010-viterbi</bibkey>
@@ -1327,7 +1327,7 @@
     </paper>
     <paper id="154">
       <title>Knowledge-Rich Word Sense Disambiguation Rivaling Supervised Systems</title>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <author><first>Roberto</first><last>Navigli</last></author>
       <pages>1522–1531</pages>
       <url hash="74d98f07">P10-1154</url>
@@ -1335,10 +1335,10 @@
     </paper>
     <paper id="155">
       <title>All Words Domain Adapted <fixed-case>WSD</fixed-case>: Finding a Middle Ground between Supervision and Unsupervision</title>
-      <author><first>Mitesh</first><last>Khapra</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh</first><last>Khapra</last></author>
       <author><first>Anup</first><last>Kulkarni</last></author>
       <author><first>Saurabh</first><last>Sohoney</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>1532–1541</pages>
       <url hash="02887ada">P10-1155</url>
       <bibkey>khapra-etal-2010-words</bibkey>
@@ -1346,20 +1346,20 @@
     <paper id="156">
       <title>Combining Orthogonal Monolingual and Multilingual Sources of Evidence for All Words <fixed-case>WSD</fixed-case></title>
       <author><first>Weiwei</first><last>Guo</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>1542–1551</pages>
       <url hash="81869530">P10-1156</url>
       <bibkey>guo-diab-2010-combining</bibkey>
     </paper>
     <paper id="157">
       <title>Phrase-Based Statistical Language Generation Using Graphical Models and Active Learning</title>
-      <author><first>François</first><last>Mairesse</last></author>
-      <author><first>Milica</first><last>Gašić</last></author>
-      <author><first>Filip</first><last>Jurčíček</last></author>
+      <author id="francois-mairesse"><first>François</first><last>Mairesse</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gašić</last></author>
+      <author id="filip-jurcicek"><first>Filip</first><last>Jurčíček</last></author>
       <author><first>Simon</first><last>Keizer</last></author>
       <author><first>Blaise</first><last>Thomson</last></author>
       <author><first>Kai</first><last>Yu</last></author>
-      <author><first>Steve</first><last>Young</last></author>
+      <author id="steve-young"><first>Steve</first><last>Young</last></author>
       <pages>1552–1561</pages>
       <url hash="cb190f5b">P10-1157</url>
       <bibkey>mairesse-etal-2010-phrase</bibkey>
@@ -1382,8 +1382,8 @@
     </paper>
     <paper id="160">
       <title>Beyond <fixed-case>N</fixed-case>om<fixed-case>B</fixed-case>ank: A Study of Implicit Arguments for Nominal Predicates</title>
-      <author><first>Matthew</first><last>Gerber</last></author>
-      <author><first>Joyce</first><last>Chai</last></author>
+      <author id="matthew-gerber"><first>Matthew</first><last>Gerber</last></author>
+      <author id="joyce-chai"><first>Joyce</first><last>Chai</last></author>
       <pages>1583–1592</pages>
       <url hash="70d4349c">P10-1160</url>
       <bibkey>gerber-chai-2010-beyond</bibkey>
@@ -1393,8 +1393,8 @@
     <meta>
       <booktitle>Proceedings of the <fixed-case>ACL</fixed-case> 2010 Conference Short Papers</booktitle>
       <url hash="cd5461ca">P10-2</url>
-      <editor><first>Jan</first><last>Hajič</last></editor>
-      <editor><first>Sandra</first><last>Carberry</last></editor>
+      <editor id="jan-hajic"><first>Jan</first><last>Hajič</last></editor>
+      <editor id="sandra-carberry"><first>Sandra</first><last>Carberry</last></editor>
       <editor><first>Stephen</first><last>Clark</last></editor>
       <editor><first>Joakim</first><last>Nivre</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -1409,9 +1409,9 @@
     </frontmatter>
     <paper id="1">
       <title>Paraphrase Lattice for Statistical Machine Translation</title>
-      <author><first>Takashi</first><last>Onishi</last></author>
+      <author id="takashi-onishi"><first>Takashi</first><last>Onishi</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>1–5</pages>
       <url hash="6434f4b7">P10-2001</url>
       <bibkey>onishi-etal-2010-paraphrase</bibkey>
@@ -1422,7 +1422,7 @@
       <author><first>Dongdong</first><last>Zhang</last></author>
       <author><first>Mu</first><last>Li</last></author>
       <author><first>Ming</first><last>Zhou</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <pages>6–11</pages>
       <url hash="cb2d7a9e">P10-2002</url>
       <bibkey>cui-etal-2010-joint</bibkey>
@@ -1431,7 +1431,7 @@
       <title>Learning Lexicalized Reordering Models from Reordering Graphs</title>
       <author><first>Jinsong</first><last>Su</last></author>
       <author id="yang-liu-ict"><first>Yang</first><last>Liu</last></author>
-      <author><first>Yajuan</first><last>Lv</last></author>
+      <author id="yajuan-lu"><first>Yajuan</first><last>Lv</last></author>
       <author><first>Haitao</first><last>Mi</last></author>
       <author><first>Qun</first><last>Liu</last></author>
       <pages>12–16</pages>
@@ -1441,7 +1441,7 @@
     <paper id="4">
       <title>Filtering Syntactic Constraints for Statistical Machine Translation</title>
       <author><first>Hailong</first><last>Cao</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>17–21</pages>
       <url hash="722b0023">P10-2004</url>
       <bibkey>cao-sumita-2010-filtering</bibkey>
@@ -1458,7 +1458,7 @@
     <paper id="6">
       <title>Efficient Path Counting Transducers for Minimum <fixed-case>B</fixed-case>ayes-Risk Decoding of Statistical Machine Translation Lattices</title>
       <author><first>Graeme</first><last>Blackwood</last></author>
-      <author><first>Adrià</first><last>de Gispert</last></author>
+      <author id="adria-de-gispert"><first>Adrià</first><last>de Gispert</last></author>
       <author id="bill-byrne"><first>William</first><last>Byrne</last></author>
       <pages>27–32</pages>
       <url hash="99335eb2">P10-2006</url>
@@ -1476,15 +1476,15 @@
       <title>Authorship Attribution Using Probabilistic Context-Free Grammars</title>
       <author><first>Sindhu</first><last>Raghavan</last></author>
       <author><first>Adriana</first><last>Kovashka</last></author>
-      <author><first>Raymond</first><last>Mooney</last></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last></author>
       <pages>38–42</pages>
       <url hash="b96b378a">P10-2008</url>
       <bibkey>raghavan-etal-2010-authorship</bibkey>
     </paper>
     <paper id="9">
       <title>The Impact of Interpretation Problems on Tutorial Dialogue</title>
-      <author><first>Myroslava O.</first><last>Dzikovska</last></author>
-      <author><first>Johanna D.</first><last>Moore</last></author>
+      <author id="myroslava-o-dzikovska"><first>Myroslava O.</first><last>Dzikovska</last></author>
+      <author id="johanna-d-moore"><first>Johanna D.</first><last>Moore</last></author>
       <author><first>Natalie</first><last>Steinhauser</last></author>
       <author><first>Gwendolyn</first><last>Campbell</last></author>
       <pages>43–48</pages>
@@ -1494,7 +1494,7 @@
     <paper id="10">
       <title>The Prevalence of Descriptive Referring Expressions in News and Narrative</title>
       <author><first>Raquel</first><last>Hervás</last></author>
-      <author><first>Mark</first><last>Finlayson</last></author>
+      <author id="mark-finlayson"><first>Mark</first><last>Finlayson</last></author>
       <pages>49–54</pages>
       <url hash="2f942ba0">P10-2010</url>
       <bibkey>hervas-finlayson-2010-prevalence</bibkey>
@@ -1502,7 +1502,7 @@
     <paper id="11">
       <title>Preferences versus Adaptation during Referring Expression Generation</title>
       <author><first>Martijn</first><last>Goudbeek</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <pages>55–59</pages>
       <url hash="e1cf1549">P10-2011</url>
       <bibkey>goudbeek-krahmer-2010-preferences</bibkey>
@@ -1516,17 +1516,17 @@
     </paper>
     <paper id="13">
       <title>The Manually Annotated Sub-Corpus: A Community Resource for and by the People</title>
-      <author><first>Nancy</first><last>Ide</last></author>
-      <author><first>Collin</first><last>Baker</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
+      <author id="collin-f-baker"><first>Collin</first><last>Baker</last></author>
       <author><first>Christiane</first><last>Fellbaum</last></author>
-      <author><first>Rebecca</first><last>Passonneau</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca</first><last>Passonneau</last></author>
       <pages>68–73</pages>
       <url hash="8d6e264a">P10-2013</url>
       <bibkey>ide-etal-2010-manually</bibkey>
     </paper>
     <paper id="14">
       <title>Correcting Errors in a Treebank Based on Synchronous Tree Substitution Grammar</title>
-      <author><first>Yoshihide</first><last>Kato</last></author>
+      <author id="yoshihide-kato"><first>Yoshihide</first><last>Kato</last></author>
       <author><first>Shigeki</first><last>Matsubara</last></author>
       <pages>74–79</pages>
       <url hash="3d0beb95">P10-2014</url>
@@ -1536,7 +1536,7 @@
       <title>Evaluating Machine Translations Using m<fixed-case>NCD</fixed-case></title>
       <author><first>Marcus</first><last>Dobrinkat</last></author>
       <author><first>Tero</first><last>Tapiovaara</last></author>
-      <author><first>Jaakko</first><last>Väyrynen</last></author>
+      <author id="jaakko-vayrynen"><first>Jaakko</first><last>Väyrynen</last></author>
       <author><first>Kimmo</first><last>Kettunen</last></author>
       <pages>80–85</pages>
       <url hash="b2636e57">P10-2015</url>
@@ -1544,7 +1544,7 @@
     </paper>
     <paper id="16">
       <title>Tackling Sparse Data Issue in Machine Translation Evaluation</title>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <author><first>Kamil</first><last>Kos</last></author>
       <author><first>David</first><last>Mareček</last></author>
       <pages>86–91</pages>
@@ -1554,7 +1554,7 @@
     <paper id="17">
       <title>Exemplar-Based Models for Word Meaning in Context</title>
       <author><first>Katrin</first><last>Erk</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <pages>92–97</pages>
       <url hash="17f7a075">P10-2017</url>
       <bibkey>erk-pado-2010-exemplar</bibkey>
@@ -1563,14 +1563,14 @@
       <title>A Structured Model for Joint Learning of Argument Roles and Predicate Senses</title>
       <author><first>Yotaro</first><last>Watanabe</last></author>
       <author><first>Masayuki</first><last>Asahara</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>98–102</pages>
       <url hash="cd9fb4bb">P10-2018</url>
       <bibkey>watanabe-etal-2010-structured</bibkey>
     </paper>
     <paper id="19">
       <title>Semantics-Driven Shallow Parsing for <fixed-case>C</fixed-case>hinese Semantic Role Labeling</title>
-      <author><first>Weiwei</first><last>Sun</last></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last></author>
       <pages>103–108</pages>
       <url hash="6dbf2661">P10-2019</url>
       <bibkey>sun-2010-semantics</bibkey>
@@ -1584,10 +1584,10 @@
     </paper>
     <paper id="21">
       <title>Automatic Collocation Suggestion in Academic Writing</title>
-      <author><first>Jian-Cheng</first><last>Wu</last></author>
+      <author id="jian-cheng-wu"><first>Jian-Cheng</first><last>Wu</last></author>
       <author><first>Yu-Chia</first><last>Chang</last></author>
       <author><first>Teruko</first><last>Mitamura</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>115–119</pages>
       <url hash="342f136d">P10-2021</url>
       <bibkey>wu-etal-2010-automatic-collocation</bibkey>
@@ -1595,7 +1595,7 @@
     <paper id="22">
       <title>Event-Based Hyperspace Analogue to Language for Query Expansion</title>
       <author><first>Tingxu</first><last>Yan</last></author>
-      <author><first>Tamsin</first><last>Maxwell</last></author>
+      <author id="k-tamsin-maxwell"><first>Tamsin</first><last>Maxwell</last></author>
       <author><first>Dawei</first><last>Song</last></author>
       <author><first>Yuexian</first><last>Hou</last></author>
       <author><first>Peng</first><last>Zhang</last></author>
@@ -1606,8 +1606,8 @@
     <paper id="23">
       <title>Automatically Generating Term Frequency Induced Taxonomies</title>
       <author><first>Karin</first><last>Murthy</last></author>
-      <author><first>Tanveer A</first><last>Faruquie</last></author>
-      <author><first>L Venkata</first><last>Subramaniam</last></author>
+      <author id="tanveer-a-faruquie"><first>Tanveer A</first><last>Faruquie</last></author>
+      <author id="l-venkata-subramaniam"><first>L Venkata</first><last>Subramaniam</last></author>
       <author><first>Hima Prasad</first><last>K</last></author>
       <author><first>Mukesh</first><last>Mohania</last></author>
       <pages>126–131</pages>
@@ -1633,7 +1633,7 @@
     <paper id="26">
       <title>Better Filtration and Augmentation for Hierarchical Phrase-Based Translation Rules</title>
       <author><first>Zhiyang</first><last>Wang</last></author>
-      <author><first>Yajuan</first><last>Lv</last></author>
+      <author id="yajuan-lu"><first>Yajuan</first><last>Lv</last></author>
       <author><first>Qun</first><last>Liu</last></author>
       <author><first>Young-Sook</first><last>Hwang</last></author>
       <pages>142–146</pages>
@@ -1643,7 +1643,7 @@
     <paper id="27">
       <title>Fixed Length Word Suffix for Factored Statistical Machine Translation</title>
       <author><first>Narges</first><last>Sharif Razavian</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>147–150</pages>
       <url hash="962008db">P10-2027</url>
       <bibkey>sharif-razavian-vogel-2010-fixed</bibkey>
@@ -1659,9 +1659,9 @@
     <paper id="29">
       <title>Coreference Resolution with Reconcile</title>
       <author><first>Veselin</first><last>Stoyanov</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <author><first>Nathan</first><last>Gilbert</last></author>
-      <author><first>Ellen</first><last>Riloff</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
       <author><first>David</first><last>Buttler</last></author>
       <author><first>David</first><last>Hysom</last></author>
       <pages>156–161</pages>
@@ -1679,16 +1679,16 @@
     </paper>
     <paper id="31">
       <title>Improving <fixed-case>C</fixed-case>hinese Semantic Role Labeling with Rich Syntactic Features</title>
-      <author><first>Weiwei</first><last>Sun</last></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last></author>
       <pages>168–172</pages>
       <url hash="c2e11772">P10-2031</url>
       <bibkey>sun-2010-improving</bibkey>
     </paper>
     <paper id="32">
       <title>Balancing User Effort and Translation Error in Interactive Machine Translation via Confidence Measures</title>
-      <author><first>Jesús</first><last>González-Rubio</last></author>
-      <author><first>Daniel</first><last>Ortiz-Martínez</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="jesus-gonzalez-rubio"><first>Jesús</first><last>González-Rubio</last></author>
+      <author id="daniel-ortiz-martinez"><first>Daniel</first><last>Ortiz-Martínez</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <pages>173–177</pages>
       <url hash="1417be9c">P10-2032</url>
       <bibkey>gonzalez-rubio-etal-2010-balancing</bibkey>
@@ -1714,7 +1714,7 @@
     <paper id="35">
       <title>Tree-Based Deterministic Dependency Parsing — An Application to <fixed-case>N</fixed-case>ivre’s Method —</title>
       <author><first>Kotaro</first><last>Kitagawa</last></author>
-      <author><first>Kumiko</first><last>Tanaka-Ishii</last></author>
+      <author id="kumiko-tanaka-ishii"><first>Kumiko</first><last>Tanaka-Ishii</last></author>
       <pages>189–193</pages>
       <url hash="01b7e8b7">P10-2035</url>
       <bibkey>kitagawa-tanaka-ishii-2010-tree</bibkey>
@@ -1723,7 +1723,7 @@
       <title>Sparsity in Dependency Grammar Induction</title>
       <author><first>Jennifer</first><last>Gillenwater</last></author>
       <author><first>Kuzman</first><last>Ganchev</last></author>
-      <author><first>João</first><last>Graça</last></author>
+      <author id="joao-graca"><first>João</first><last>Graça</last></author>
       <author><first>Fernando</first><last>Pereira</last></author>
       <author><first>Ben</first><last>Taskar</last></author>
       <pages>194–199</pages>
@@ -1741,7 +1741,7 @@
     </paper>
     <paper id="38">
       <title>Simple Semi-Supervised Training of Part-Of-Speech Taggers</title>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>205–208</pages>
       <url hash="b82313e8">P10-2038</url>
       <bibkey>sogaard-2010-simple</bibkey>
@@ -1767,16 +1767,16 @@
     </paper>
     <paper id="41">
       <title>Intelligent Selection of Language Model Training Data</title>
-      <author><first>Robert C.</first><last>Moore</last></author>
-      <author><first>William</first><last>Lewis</last></author>
+      <author id="robert-c-moore"><first>Robert C.</first><last>Moore</last></author>
+      <author id="william-lewis"><first>William</first><last>Lewis</last></author>
       <pages>220–224</pages>
       <url hash="00924ad2">P10-2041</url>
       <bibkey>moore-lewis-2010-intelligent</bibkey>
     </paper>
     <paper id="42">
       <title>Blocked Inference in <fixed-case>B</fixed-case>ayesian Tree Substitution Grammars</title>
-      <author><first>Trevor</first><last>Cohn</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
       <pages>225–230</pages>
       <url hash="622dda35">P10-2042</url>
       <bibkey>cohn-blunsom-2010-blocked</bibkey>
@@ -1792,15 +1792,15 @@
     <paper id="44">
       <title>Optimizing Question Answering Accuracy by Maximizing Log-Likelihood</title>
       <author><first>Matthias H.</first><last>Heie</last></author>
-      <author><first>Edward W. D.</first><last>Whittaker</last></author>
-      <author><first>Sadaoki</first><last>Furui</last></author>
+      <author id="edward-w-d-whittaker"><first>Edward W. D.</first><last>Whittaker</last></author>
+      <author id="sadaoki-furui"><first>Sadaoki</first><last>Furui</last></author>
       <pages>236–240</pages>
       <url hash="66a221a6">P10-2044</url>
       <bibkey>heie-etal-2010-optimizing</bibkey>
     </paper>
     <paper id="45">
       <title>Generating Entailment Rules from <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et</title>
-      <author><first>Roni</first><last>Ben Aharon</last></author>
+      <author id="roni-ben-aharon"><first>Roni</first><last>Ben Aharon</last></author>
       <author><first>Idan</first><last>Szpektor</last></author>
       <author><first>Ido</first><last>Dagan</last></author>
       <pages>241–246</pages>
@@ -1827,7 +1827,7 @@
     <paper id="48">
       <title>Cross Lingual Adaptation: An Experiment on Sentiment Classifications</title>
       <author><first>Bin</first><last>Wei</last></author>
-      <author><first>Christopher</first><last>Pal</last></author>
+      <author id="christopher-pal"><first>Christopher</first><last>Pal</last></author>
       <pages>258–262</pages>
       <url hash="c7f0d545">P10-2048</url>
       <bibkey>wei-pal-2010-cross</bibkey>
@@ -1843,16 +1843,16 @@
     <paper id="50">
       <title>Hierarchical Sequential Learning for Extracting Opinions and Their Attributes</title>
       <author><first>Yejin</first><last>Choi</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>269–274</pages>
       <url hash="2ecfe623">P10-2050</url>
       <bibkey>choi-cardie-2010-hierarchical</bibkey>
     </paper>
     <paper id="51">
       <title>Jointly Optimizing a Two-Step Conditional Random Field Model for Machine Transliteration and Its Fast Decoding Algorithm</title>
-      <author><first>Dong</first><last>Yang</last></author>
-      <author><first>Paul</first><last>Dixon</last></author>
-      <author><first>Sadaoki</first><last>Furui</last></author>
+      <author id="dong-yang"><first>Dong</first><last>Yang</last></author>
+      <author id="paul-dixon"><first>Paul</first><last>Dixon</last></author>
+      <author id="sadaoki-furui"><first>Sadaoki</first><last>Furui</last></author>
       <pages>275–280</pages>
       <url hash="fdcbd48d">P10-2051</url>
       <bibkey>yang-etal-2010-jointly</bibkey>
@@ -1861,7 +1861,7 @@
       <title><fixed-case>A</fixed-case>rabic Named Entity Recognition: Using Features Extracted from Noisy Data</title>
       <author><first>Yassine</first><last>Benajiba</last></author>
       <author><first>Imed</first><last>Zitouni</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <author><first>Paolo</first><last>Rosso</last></author>
       <pages>281–285</pages>
       <url hash="4728718f">P10-2052</url>
@@ -1870,7 +1870,7 @@
     <paper id="53">
       <title>Extracting Sequences from the Web</title>
       <author><first>Anthony</first><last>Fader</last></author>
-      <author><first>Stephen</first><last>Soderland</last></author>
+      <author id="stephen-soderland"><first>Stephen</first><last>Soderland</last></author>
       <author><first>Oren</first><last>Etzioni</last></author>
       <pages>286–290</pages>
       <url hash="981649ea">P10-2053</url>
@@ -1905,7 +1905,7 @@
     </paper>
     <paper id="57">
       <title>Decision Detection Using Hierarchical Graphical Models</title>
-      <author><first>Trung H.</first><last>Bui</last></author>
+      <author id="trung-bui"><first>Trung H.</first><last>Bui</last></author>
       <author><first>Stanley</first><last>Peters</last></author>
       <pages>307–312</pages>
       <url hash="5c665fcf">P10-2057</url>
@@ -1913,7 +1913,7 @@
     </paper>
     <paper id="58">
       <title>Using Speech to Reply to <fixed-case>SMS</fixed-case> Messages While Driving: An In-Car Simulator User Study</title>
-      <author><first>Yun-Cheng</first><last>Ju</last></author>
+      <author id="yun-cheng-ju"><first>Yun-Cheng</first><last>Ju</last></author>
       <author><first>Tim</first><last>Paek</last></author>
       <pages>313–317</pages>
       <url hash="8b46f61c">P10-2058</url>
@@ -1949,7 +1949,7 @@
       <title>Automatically Generating Annotator Rationales to Improve Sentiment Classification</title>
       <author><first>Ainur</first><last>Yessenalina</last></author>
       <author><first>Yejin</first><last>Choi</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>336–341</pages>
       <url hash="e681c661">P10-2062</url>
       <bibkey>yessenalina-etal-2010-automatically</bibkey>
@@ -1971,9 +1971,9 @@
     </paper>
     <paper id="65">
       <title>Using Parse Features for Preposition Selection and Error Detection</title>
-      <author><first>Joel</first><last>Tetreault</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
       <author><first>Jennifer</first><last>Foster</last></author>
-      <author><first>Martin</first><last>Chodorow</last></author>
+      <author id="martin-chodorow"><first>Martin</first><last>Chodorow</last></author>
       <pages>353–358</pages>
       <url hash="71bbd889">P10-2065</url>
       <bibkey>tetreault-etal-2010-using</bibkey>
@@ -1983,7 +1983,7 @@
       <author><first>Xiao-Li</first><last>Li</last></author>
       <author><first>Lei</first><last>Zhang</last></author>
       <author><first>Bing</first><last>Liu</last></author>
-      <author><first>See-Kiong</first><last>Ng</last></author>
+      <author id="see-kiong-ng"><first>See-Kiong</first><last>Ng</last></author>
       <pages>359–364</pages>
       <url hash="23d6f77a">P10-2066</url>
       <bibkey>li-etal-2010-distributional</bibkey>
@@ -1991,8 +1991,8 @@
     <paper id="67">
       <title>Active Learning-Based Elicitation for Semi-Supervised Word Alignment</title>
       <author><first>Vamshi</first><last>Ambati</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
       <pages>365–370</pages>
       <url hash="3598e4df">P10-2067</url>
       <bibkey>ambati-etal-2010-active-learning</bibkey>
@@ -2008,8 +2008,8 @@
     </paper>
     <paper id="69">
       <title>Learning Better Data Representation Using Inference-Driven Metric Learning</title>
-      <author><first>Paramveer S.</first><last>Dhillon</last></author>
-      <author><first>Partha Pratim</first><last>Talukdar</last></author>
+      <author id="paramveer-s-dhillon"><first>Paramveer S.</first><last>Dhillon</last></author>
+      <author id="partha-talukdar"><first>Partha Pratim</first><last>Talukdar</last></author>
       <author><first>Koby</first><last>Crammer</last></author>
       <pages>377–381</pages>
       <url hash="4950d21f">P10-2069</url>
@@ -2019,7 +2019,7 @@
       <title>Wrapping up a Summary: From Representation to Generation</title>
       <author><first>Josef</first><last>Steinberger</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
-      <author><first>Mijail</first><last>Kabadjov</last></author>
+      <author id="mijail-kabadjov"><first>Mijail</first><last>Kabadjov</last></author>
       <author><first>Ralf</first><last>Steinberger</last></author>
       <author><first>Nello</first><last>Cristianini</last></author>
       <pages>382–386</pages>
@@ -2034,8 +2034,8 @@
       <editor><first>Seniz</first><last>Demir</last></editor>
       <editor><first>Jan</first><last>Raab</last></editor>
       <editor><first>Nils</first><last>Reiter</last></editor>
-      <editor><first>Marketa</first><last>Lopatkova</last></editor>
-      <editor><first>Tomek</first><last>Strzalkowski</last></editor>
+      <editor id="marketa-lopatkova"><first>Marketa</first><last>Lopatkova</last></editor>
+      <editor id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Uppsala, Sweden</address>
       <month>July</month>
@@ -2159,7 +2159,7 @@
     </paper>
     <paper id="16">
       <title>Adapting Self-Training for Semantic Role Labeling</title>
-      <author><first>Rasoul</first><last>Samad Zadeh Kaljahi</last></author>
+      <author id="rasoul-samad-zadeh-kaljahi"><first>Rasoul</first><last>Samad Zadeh Kaljahi</last></author>
       <pages>91–96</pages>
       <url hash="8f5d474f">P10-3016</url>
       <bibkey>samad-zadeh-kaljahi-2010-adapting</bibkey>
@@ -2190,7 +2190,7 @@
     <meta>
       <booktitle>Proceedings of the <fixed-case>ACL</fixed-case> 2010 System Demonstrations</booktitle>
       <url hash="36aaabf4">P10-4</url>
-      <editor><first>Sandra</first><last>Kübler</last></editor>
+      <editor id="sandra-kubler"><first>Sandra</first><last>Kübler</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Uppsala, Sweden</address>
       <month>July</month>
@@ -2203,11 +2203,11 @@
     </frontmatter>
     <paper id="1">
       <title>Grammar Prototyping and Testing with the <fixed-case>L</fixed-case>in<fixed-case>GO</fixed-case> Grammar Matrix Customization System</title>
-      <author><first>Emily M.</first><last>Bender</last></author>
+      <author id="emily-m-bender"><first>Emily M.</first><last>Bender</last></author>
       <author><first>Scott</first><last>Drellishak</last></author>
       <author><first>Antske</first><last>Fokkens</last></author>
-      <author><first>Michael Wayne</first><last>Goodman</last></author>
-      <author><first>Daniel P.</first><last>Mills</last></author>
+      <author id="michael-wayne-goodman"><first>Michael Wayne</first><last>Goodman</last></author>
+      <author id="daniel-p-mills"><first>Daniel P.</first><last>Mills</last></author>
       <author><first>Laurie</first><last>Poulson</last></author>
       <author><first>Safiyyah</first><last>Saleem</last></author>
       <pages>1–6</pages>
@@ -2216,12 +2216,12 @@
     </paper>
     <paper id="2">
       <title>cdec: A Decoder, Alignment, and Learning Framework for Finite-State and Context-Free Translation Models</title>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <author><first>Adam</first><last>Lopez</last></author>
       <author><first>Juri</first><last>Ganitkevitch</last></author>
       <author><first>Jonathan</first><last>Weese</last></author>
-      <author><first>Ferhan</first><last>Ture</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
+      <author id="ferhan-ture"><first>Ferhan</first><last>Ture</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
       <author><first>Hendra</first><last>Setiawan</last></author>
       <author><first>Vladimir</first><last>Eidelman</last></author>
       <author><first>Philip</first><last>Resnik</last></author>
@@ -2231,12 +2231,12 @@
     </paper>
     <paper id="3">
       <title>Beetle <fixed-case>II</fixed-case>: A System for Tutoring and Computational Linguistics Experimentation</title>
-      <author><first>Myroslava O.</first><last>Dzikovska</last></author>
-      <author><first>Johanna D.</first><last>Moore</last></author>
+      <author id="myroslava-o-dzikovska"><first>Myroslava O.</first><last>Dzikovska</last></author>
+      <author id="johanna-d-moore"><first>Johanna D.</first><last>Moore</last></author>
       <author><first>Natalie</first><last>Steinhauser</last></author>
       <author><first>Gwendolyn</first><last>Campbell</last></author>
       <author><first>Elaine</first><last>Farrow</last></author>
-      <author><first>Charles B.</first><last>Callaway</last></author>
+      <author id="charles-b-callaway"><first>Charles B.</first><last>Callaway</last></author>
       <pages>13–18</pages>
       <url hash="9d914499">P10-4003</url>
       <bibkey>dzikovska-etal-2010-beetle</bibkey>
@@ -2244,15 +2244,15 @@
     <paper id="4">
       <title><fixed-case>G</fixed-case>ern<fixed-case>E</fixed-case>di<fixed-case>T</fixed-case>: A Graphical Tool for <fixed-case>G</fixed-case>erma<fixed-case>N</fixed-case>et Development</title>
       <author><first>Verena</first><last>Henrich</last></author>
-      <author><first>Erhard</first><last>Hinrichs</last></author>
+      <author id="erhard-hinrichs"><first>Erhard</first><last>Hinrichs</last></author>
       <pages>19–24</pages>
       <url hash="0bdc14b0">P10-4004</url>
       <bibkey>henrich-hinrichs-2010-gernedit-graphical</bibkey>
     </paper>
     <paper id="5">
       <title><fixed-case>W</fixed-case>eb<fixed-case>L</fixed-case>icht: Web-Based <fixed-case>LRT</fixed-case> Services for <fixed-case>G</fixed-case>erman</title>
-      <author><first>Erhard</first><last>Hinrichs</last></author>
-      <author><first>Marie</first><last>Hinrichs</last></author>
+      <author id="erhard-hinrichs"><first>Erhard</first><last>Hinrichs</last></author>
+      <author id="marie-hinrichs"><first>Marie</first><last>Hinrichs</last></author>
       <author><first>Thomas</first><last>Zastrow</last></author>
       <pages>25–29</pages>
       <url hash="40389494">P10-4005</url>
@@ -2268,7 +2268,7 @@
     </paper>
     <paper id="7">
       <title>Talking <fixed-case>NPC</fixed-case>s in a Virtual Game World</title>
-      <author><first>Tina</first><last>Klüwer</last></author>
+      <author id="tina-kluwer"><first>Tina</first><last>Klüwer</last></author>
       <author><first>Peter</first><last>Adolphs</last></author>
       <author><first>Feiyu</first><last>Xu</last></author>
       <author><first>Hans</first><last>Uszkoreit</last></author>
@@ -2280,7 +2280,7 @@
     <paper id="8">
       <title>An Open-Source Package for Recognizing Textual Entailment</title>
       <author><first>Milen</first><last>Kouylekov</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <pages>42–47</pages>
       <url hash="d63ef4cd">P10-4008</url>
       <bibkey>kouylekov-negri-2010-open</bibkey>
@@ -2298,7 +2298,7 @@
       <author><first>Simon</first><last>King</last></author>
       <author><first>Hui</first><last>Liang</last></author>
       <author><first>Keiichiro</first><last>Oura</last></author>
-      <author><first>Lakshmi</first><last>Saheer</last></author>
+      <author id="lakshmi-s"><first>Lakshmi</first><last>Saheer</last></author>
       <author><first>Matt</first><last>Shannon</last></author>
       <author><first>Sayaki</first><last>Shiota</last></author>
       <author><first>Jilei</first><last>Tian</last></author>
@@ -2308,7 +2308,7 @@
     </paper>
     <paper id="10">
       <title>Hunting for the Black Swan: Risk Mining from Text</title>
-      <author><first>Jochen</first><last>Leidner</last></author>
+      <author id="jochen-l-leidner"><first>Jochen</first><last>Leidner</last></author>
       <author><first>Frank</first><last>Schilder</last></author>
       <pages>54–59</pages>
       <url hash="fa998c22">P10-4010</url>
@@ -2317,7 +2317,7 @@
     <paper id="11">
       <title>Speech-Driven Access to the Deep Web on Mobile Devices</title>
       <author><first>Taniya</first><last>Mishra</last></author>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
       <pages>60–65</pages>
       <url hash="fe6ca08d">P10-4011</url>
       <bibkey>mishra-bangalore-2010-speech</bibkey>
@@ -2333,8 +2333,8 @@
     </paper>
     <paper id="13">
       <title>Demonstration of a Prototype for a Conversational Companion for Reminiscing about Images</title>
-      <author><first>Yorick</first><last>Wilks</last></author>
-      <author><first>Roberta</first><last>Catizone</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
+      <author id="roberta-catizone"><first>Roberta</first><last>Catizone</last></author>
       <author><first>Alexiei</first><last>Dingli</last></author>
       <author><first>Weiwei</first><last>Cheng</last></author>
       <pages>72–77</pages>
@@ -2354,7 +2354,7 @@
     <meta>
       <booktitle>Proceedings of the 48th Annual Meeting of the Association for Computational Linguistics: Tutorial Abstracts</booktitle>
       <url hash="0bffad6f">P10-5</url>
-      <editor><first>Lluís</first><last>Màrquez</last></editor>
+      <editor id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></editor>
       <editor><first>Haifeng</first><last>Wang</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Uppsala, Sweden</address>
@@ -2370,7 +2370,7 @@
       <title>Wide-Coverage <fixed-case>NLP</fixed-case> with Linguistically Expressive Grammars</title>
       <author><first>Julia</first><last>Hockenmaier</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>1</pages>
       <url hash="7954ab28">P10-5001</url>
       <bibkey>hockenmaier-etal-2010-wide</bibkey>
@@ -2385,7 +2385,7 @@
     </paper>
     <paper id="3">
       <title>Discourse Structure: Theory, Practice and Use</title>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <author><first>Markus</first><last>Egg</last></author>
       <author><first>Valia</first><last>Kordoni</last></author>
       <pages>3</pages>
@@ -2394,21 +2394,21 @@
     </paper>
     <paper id="4">
       <title>Annotation</title>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>4</pages>
       <url hash="a2874565">P10-5004</url>
       <bibkey>hovy-2010-annotation</bibkey>
     </paper>
     <paper id="5">
       <title>From Structured Prediction to Inverse Reinforcement Learning</title>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <pages>5</pages>
       <url hash="4479fcef">P10-5005</url>
       <bibkey>daume-iii-2010-structured</bibkey>
     </paper>
     <paper id="6">
       <title>Semantic Parsing: The Task, the State of the Art and the Future</title>
-      <author><first>Rohit J.</first><last>Kate</last></author>
+      <author id="rohit-kate"><first>Rohit J.</first><last>Kate</last></author>
       <author><first>Yuk Wah</first><last>Wong</last></author>
       <pages>6</pages>
       <url hash="38be5912">P10-5006</url>
diff --git a/data/xml/P11.xml b/data/xml/P11.xml
index 91cba3e629..feacd2d9ee 100644
--- a/data/xml/P11.xml
+++ b/data/xml/P11.xml
@@ -5,8 +5,8 @@
       <booktitle>Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies</booktitle>
       <url hash="814f30e4">P11-1</url>
       <editor><first>Dekang</first><last>Lin</last></editor>
-      <editor><first>Yuji</first><last>Matsumoto</last></editor>
-      <editor><first>Rada</first><last>Mihalcea</last></editor>
+      <editor id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></editor>
+      <editor id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Portland, Oregon, USA</address>
       <month>June</month>
@@ -20,7 +20,7 @@
     <paper id="1">
       <title>A Word-Class Approach to Labeling <fixed-case>PSCFG</fixed-case> Rules for Machine Translation</title>
       <author><first>Andreas</first><last>Zollmann</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>1–11</pages>
       <url hash="ca4b09a6">P11-1001</url>
       <bibkey>zollmann-vogel-2011-word</bibkey>
@@ -37,7 +37,7 @@
       <title>Effective Use of Function Words for Rule Generalization in Forest-Based Translation</title>
       <author><first>Xianchao</first><last>Wu</last></author>
       <author><first>Takuya</first><last>Matsuzaki</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>22–31</pages>
       <url hash="7ae5824c">P11-1003</url>
       <bibkey>wu-etal-2011-effective</bibkey>
@@ -77,8 +77,8 @@
     </paper>
     <paper id="8">
       <title>Exact Decoding of Syntactic Translation Models through <fixed-case>L</fixed-case>agrangian Relaxation</title>
-      <author><first>Alexander M.</first><last>Rush</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="alexander-m-rush"><first>Alexander M.</first><last>Rush</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <pages>72–82</pages>
       <url hash="feb55062">P11-1008</url>
       <bibkey>rush-collins-2011-exact</bibkey>
@@ -102,8 +102,8 @@
     <paper id="11">
       <title>Joint Annotation of Search Queries</title>
       <author><first>Michael</first><last>Bendersky</last></author>
-      <author><first>W. Bruce</first><last>Croft</last></author>
-      <author><first>David A.</first><last>Smith</last></author>
+      <author id="w-bruce-croft"><first>W. Bruce</first><last>Croft</last></author>
+      <author id="david-a-smith"><first>David A.</first><last>Smith</last></author>
       <pages>102–111</pages>
       <url hash="1ee55433">P11-1011</url>
       <attachment type="dataset" hash="03b53962">P11-1011.Datasets.zip</attachment>
@@ -114,7 +114,7 @@
       <author><first>Peng</first><last>Cai</last></author>
       <author><first>Wei</first><last>Gao</last></author>
       <author><first>Aoying</first><last>Zhou</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <pages>112–122</pages>
       <url hash="bc8db4c1">P11-1012</url>
       <bibkey>cai-etal-2011-query</bibkey>
@@ -131,19 +131,19 @@
     <paper id="14">
       <title>Using Multiple Sources to Construct a Sentiment Sensitive Thesaurus for Cross-Domain Sentiment Classification</title>
       <author><first>Danushka</first><last>Bollegala</last></author>
-      <author><first>David</first><last>Weir</last></author>
-      <author><first>John</first><last>Carroll</last></author>
+      <author id="david-weir"><first>David</first><last>Weir</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
       <pages>132–141</pages>
       <url hash="ec685d98">P11-1014</url>
       <bibkey>bollegala-etal-2011-using</bibkey>
     </paper>
     <paper id="15">
       <title>Learning Word Vectors for Sentiment Analysis</title>
-      <author><first>Andrew L.</first><last>Maas</last></author>
+      <author id="andrew-maas"><first>Andrew L.</first><last>Maas</last></author>
       <author><first>Raymond E.</first><last>Daly</last></author>
       <author><first>Peter T.</first><last>Pham</last></author>
       <author><first>Dan</first><last>Huang</last></author>
-      <author><first>Andrew Y.</first><last>Ng</last></author>
+      <author id="andrew-y-ng"><first>Andrew Y.</first><last>Ng</last></author>
       <author><first>Christopher</first><last>Potts</last></author>
       <pages>142–150</pages>
       <url hash="d957ea5d">P11-1015</url>
@@ -155,7 +155,7 @@
       <author><first>Mo</first><last>Yu</last></author>
       <author><first>Ming</first><last>Zhou</last></author>
       <author><first>Xiaohua</first><last>Liu</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <pages>151–160</pages>
       <url hash="2f8c304f">P11-1016</url>
       <bibkey>jiang-etal-2011-target</bibkey>
@@ -171,9 +171,9 @@
     </paper>
     <paper id="18">
       <title>Multi-Modal Annotation of Quest Games in Second Life</title>
-      <author><first>Sharon</first><last>Gower Small</last></author>
-      <author><first>Jennifer</first><last>Strommer-Galley</last></author>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="sharon-small"><first>Sharon</first><last>Gower Small</last></author>
+      <author id="jennifer-stromer-galley"><first>Jennifer</first><last>Strommer-Galley</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
       <pages>171–179</pages>
       <url hash="7a12e6ff">P11-1018</url>
       <bibkey>gower-small-etal-2011-multi</bibkey>
@@ -181,7 +181,7 @@
     <paper id="19">
       <title>A New Dataset and Method for Automatically Grading <fixed-case>ESOL</fixed-case> Texts</title>
       <author><first>Helen</first><last>Yannakoudakis</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <author><first>Ben</first><last>Medlock</last></author>
       <pages>180–189</pages>
       <url hash="6efc99fc">P11-1019</url>
@@ -190,7 +190,7 @@
     <paper id="20">
       <title>Collecting Highly Parallel Data for Paraphrase Evaluation</title>
       <author><first>David</first><last>Chen</last></author>
-      <author><first>William</first><last>Dolan</last></author>
+      <author id="william-b-dolan"><first>William</first><last>Dolan</last></author>
       <pages>190–200</pages>
       <url hash="7f3652ae">P11-1020</url>
       <attachment type="dataset" hash="1f683c88">P11-1020.Datasets.txt</attachment>
@@ -210,7 +210,7 @@
       <title><fixed-case>G</fixed-case>oodness: A Method for Measuring Machine Translation Confidence</title>
       <author><first>Nguyen</first><last>Bach</last></author>
       <author><first>Fei</first><last>Huang</last></author>
-      <author><first>Yaser</first><last>Al-Onaizan</last></author>
+      <author id="yaser-al-onaizan"><first>Yaser</first><last>Al-Onaizan</last></author>
       <pages>211–219</pages>
       <url hash="19c5b6bc">P11-1022</url>
       <bibkey>bach-etal-2011-goodness</bibkey>
@@ -257,7 +257,7 @@
     </paper>
     <paper id="28">
       <title>Learning to Win by Reading Manuals in a <fixed-case>M</fixed-case>onte-<fixed-case>C</fixed-case>arlo Framework</title>
-      <author><first>S.R.K.</first><last>Branavan</last></author>
+      <author id="s-r-k-branavan"><first>S.R.K.</first><last>Branavan</last></author>
       <author><first>David</first><last>Silver</last></author>
       <author><first>Regina</first><last>Barzilay</last></author>
       <pages>268–277</pages>
@@ -275,7 +275,7 @@
       <title>Local Histograms of Character N-grams for Authorship Attribution</title>
       <author><first>Hugo Jair</first><last>Escalante</last></author>
       <author><first>Thamar</first><last>Solorio</last></author>
-      <author><first>Manuel</first><last>Montes-y-Gómez</last></author>
+      <author id="manuel-montes"><first>Manuel</first><last>Montes-y-Gómez</last></author>
       <pages>288–298</pages>
       <url hash="e0693fea">P11-1030</url>
       <bibkey>escalante-etal-2011-local</bibkey>
@@ -283,7 +283,7 @@
     <paper id="31">
       <title>Word Maturity: Computational Modeling of Word Knowledge</title>
       <author><first>Kirill</first><last>Kireyev</last></author>
-      <author><first>Thomas K</first><last>Landauer</last></author>
+      <author id="thomas-landauer"><first>Thomas K</first><last>Landauer</last></author>
       <pages>299–308</pages>
       <url hash="e191e5d0">P11-1031</url>
       <bibkey>kireyev-landauer-2011-word</bibkey>
@@ -292,7 +292,7 @@
       <title>Finding Deceptive Opinion Spam by Any Stretch of the Imagination</title>
       <author><first>Myle</first><last>Ott</last></author>
       <author><first>Yejin</first><last>Choi</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <author><first>Jeffrey T.</first><last>Hancock</last></author>
       <pages>309–319</pages>
       <award>ACL 2021 Test of Time Award (10 year)</award>
@@ -304,8 +304,8 @@
       <title>Joint Bilingual Sentiment Classification with Unlabeled Parallel Corpora</title>
       <author><first>Bin</first><last>Lu</last></author>
       <author><first>Chenhao</first><last>Tan</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
-      <author><first>Benjamin K.</first><last>Tsou</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
+      <author id="benjamin-k-tsou"><first>Benjamin K.</first><last>Tsou</last></author>
       <pages>320–330</pages>
       <url hash="dfd3baff">P11-1033</url>
       <bibkey>lu-etal-2011-joint</bibkey>
@@ -321,7 +321,7 @@
     <paper id="35">
       <title>Contrasting Opposing Views of News Articles on Contentious Issues</title>
       <author><first>Souneil</first><last>Park</last></author>
-      <author><first>KyungSoon</first><last>Lee</last></author>
+      <author id="kyung-soon-lee"><first>KyungSoon</first><last>Lee</last></author>
       <author><first>Junehwa</first><last>Song</last></author>
       <pages>340–349</pages>
       <url hash="823b267b">P11-1035</url>
@@ -349,7 +349,7 @@
     <paper id="38">
       <title>Lexical Normalisation of Short Text Messages: Makn Sens a #twitter</title>
       <author><first>Bo</first><last>Han</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>368–378</pages>
       <url hash="460d1e47">P11-1038</url>
       <attachment type="software" hash="f8a0d66f">P11-1038.Software.tar.bz2</attachment>
@@ -358,7 +358,7 @@
     </paper>
     <paper id="39">
       <title>Topical Keyphrase Extraction from <fixed-case>T</fixed-case>witter</title>
-      <author><first>Xin</first><last>Zhao</last></author>
+      <author id="wayne-xin-zhao"><first>Xin</first><last>Zhao</last></author>
       <author><first>Jing</first><last>Jiang</last></author>
       <author><first>Jing</first><last>He</last></author>
       <author><first>Yang</first><last>Song</last></author>
@@ -389,10 +389,10 @@
     </paper>
     <paper id="42">
       <title>Unsupervised Word Alignment with Arbitrary Features</title>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Jonathan H.</first><last>Clark</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="jonathan-h-clark"><first>Jonathan H.</first><last>Clark</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>409–419</pages>
       <url hash="3e45dd1a">P11-1042</url>
       <bibkey>dyer-etal-2011-unsupervised</bibkey>
@@ -408,7 +408,7 @@
     <paper id="44">
       <title>An Algorithm for Unsupervised Transliteration Mining with an Application to Word Alignment</title>
       <author><first>Hassan</first><last>Sajjad</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <author><first>Helmut</first><last>Schmid</last></author>
       <pages>430–439</pages>
       <url hash="04844e9f">P11-1044</url>
@@ -419,7 +419,7 @@
       <title>Beam-Width Prediction for Efficient Context-Free Parsing</title>
       <author><first>Nathan</first><last>Bodenstab</last></author>
       <author><first>Aaron</first><last>Dunlop</last></author>
-      <author><first>Keith</first><last>Hall</last></author>
+      <author id="keith-hall"><first>Keith</first><last>Hall</last></author>
       <author><first>Brian</first><last>Roark</last></author>
       <pages>440–449</pages>
       <url hash="6d00d0e2">P11-1045</url>
@@ -430,7 +430,7 @@
       <author><first>Pierluigi</first><last>Crescenzi</last></author>
       <author><first>Daniel</first><last>Gildea</last></author>
       <author><first>Andrea</first><last>Marino</last></author>
-      <author><first>Gianluca</first><last>Rossi</last></author>
+      <author id="gianluca-de-rossi"><first>Gianluca</first><last>Rossi</last></author>
       <author><first>Giorgio</first><last>Satta</last></author>
       <pages>450–459</pages>
       <url hash="36e3ebd2">P11-1046</url>
@@ -455,7 +455,7 @@
     <paper id="49">
       <title>Jointly Learning to Extract and Compress</title>
       <author><first>Taylor</first><last>Berg-Kirkpatrick</last></author>
-      <author><first>Dan</first><last>Gillick</last></author>
+      <author id="dan-gillick"><first>Dan</first><last>Gillick</last></author>
       <author><first>Dan</first><last>Klein</last></author>
       <pages>481–490</pages>
       <url hash="65721174">P11-1049</url>
@@ -464,7 +464,7 @@
     <paper id="50">
       <title>Discovery of Topically Coherent Sentences for Extractive Summarization</title>
       <author><first>Asli</first><last>Celikyilmaz</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tür</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tür</last></author>
       <pages>491–499</pages>
       <url hash="3bd9a9d4">P11-1050</url>
       <bibkey>celikyilmaz-hakkani-tur-2011-discovery</bibkey>
@@ -472,7 +472,7 @@
     <paper id="51">
       <title>Coherent Citation-Based Summarization of Scientific Papers</title>
       <author><first>Amjad</first><last>Abu-Jbara</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <pages>500–509</pages>
       <url hash="6faaa68b">P11-1051</url>
       <bibkey>abu-jbara-radev-2011-coherent</bibkey>
@@ -480,7 +480,7 @@
     <paper id="52">
       <title>A Class of Submodular Functions for Document Summarization</title>
       <author><first>Hui</first><last>Lin</last></author>
-      <author><first>Jeff</first><last>Bilmes</last></author>
+      <author id="jeff-bilmes"><first>Jeff</first><last>Bilmes</last></author>
       <pages>510–520</pages>
       <url hash="c832beb6">P11-1052</url>
       <bibkey>lin-bilmes-2011-class</bibkey>
@@ -488,7 +488,7 @@
     <paper id="53">
       <title>Semi-supervised Relation Extraction with Large-scale Word Clustering</title>
       <author><first>Ang</first><last>Sun</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <author><first>Satoshi</first><last>Sekine</last></author>
       <pages>521–529</pages>
       <url hash="bdc0619b">P11-1053</url>
@@ -506,11 +506,11 @@
     </paper>
     <paper id="55">
       <title>Knowledge-Based Weak Supervision for Information Extraction of Overlapping Relations</title>
-      <author><first>Raphael</first><last>Hoffmann</last></author>
+      <author id="raphael-hoffmann"><first>Raphael</first><last>Hoffmann</last></author>
       <author><first>Congle</first><last>Zhang</last></author>
       <author><first>Xiao</first><last>Ling</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
-      <author><first>Daniel S.</first><last>Weld</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="daniel-s-weld"><first>Daniel S.</first><last>Weld</last></author>
       <pages>541–550</pages>
       <url hash="30738c1c">P11-1055</url>
       <bibkey>hoffmann-etal-2011-knowledge</bibkey>
@@ -525,10 +525,10 @@
     </paper>
     <paper id="57">
       <title>Together We Can: Bilingual Bootstrapping for <fixed-case>WSD</fixed-case></title>
-      <author><first>Mitesh M.</first><last>Khapra</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh M.</first><last>Khapra</last></author>
       <author><first>Salil</first><last>Joshi</last></author>
       <author><first>Arindam</first><last>Chatterjee</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>561–569</pages>
       <url hash="ce228eee">P11-1057</url>
       <bibkey>khapra-etal-2011-together</bibkey>
@@ -536,7 +536,7 @@
     <paper id="58">
       <title>Which Noun Phrases Denote Which Concepts?</title>
       <author><first>Jayant</first><last>Krishnamurthy</last></author>
-      <author><first>Tom</first><last>Mitchell</last></author>
+      <author id="tom-mitchell"><first>Tom</first><last>Mitchell</last></author>
       <pages>570–580</pages>
       <url hash="141e799f">P11-1058</url>
       <bibkey>krishnamurthy-mitchell-2011-noun</bibkey>
@@ -544,7 +544,7 @@
     <paper id="59">
       <title>Semantic Representation of Negation Using Focus Detection</title>
       <author><first>Eduardo</first><last>Blanco</last></author>
-      <author><first>Dan</first><last>Moldovan</last></author>
+      <author id="dan-moldovan"><first>Dan</first><last>Moldovan</last></author>
       <pages>581–589</pages>
       <url hash="9d211bf7">P11-1059</url>
       <bibkey>blanco-moldovan-2011-semantic</bibkey>
@@ -552,7 +552,7 @@
     <paper id="60">
       <title>Learning Dependency-Based Compositional Semantics</title>
       <author><first>Percy</first><last>Liang</last></author>
-      <author><first>Michael</first><last>Jordan</last></author>
+      <author id="michael-i-jordan"><first>Michael</first><last>Jordan</last></author>
       <author><first>Dan</first><last>Klein</last></author>
       <pages>590–599</pages>
       <url hash="0df9f12d">P11-1060</url>
@@ -594,7 +594,7 @@
       <title>An Unsupervised Model for Joint Phrase Alignment and Extraction</title>
       <author><first>Graham</first><last>Neubig</last></author>
       <author><first>Taro</first><last>Watanabe</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Shinsuke</first><last>Mori</last></author>
       <author><first>Tatsuya</first><last>Kawahara</last></author>
       <pages>632–641</pages>
@@ -604,7 +604,7 @@
     <paper id="65">
       <title>Learning Hierarchical Translation Structure with Linguistic Annotations</title>
       <author><first>Markos</first><last>Mylonakis</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <pages>642–652</pages>
       <url hash="b7e13540">P11-1065</url>
       <bibkey>mylonakis-simaan-2011-learning</bibkey>
@@ -685,7 +685,7 @@
     <paper id="74">
       <title>N-Best Rescoring Based on Pitch-accent Patterns</title>
       <author><first>Je Hun</first><last>Jeon</last></author>
-      <author><first>Wen</first><last>Wang</last></author>
+      <author id="wen-wang"><first>Wen</first><last>Wang</last></author>
       <author id="yang-liu-icsi"><first>Yang</first><last>Liu</last></author>
       <pages>732–741</pages>
       <url hash="ff2ca298">P11-1074</url>
@@ -702,7 +702,7 @@
     <paper id="76">
       <title>Learning to Grade Short Answer Questions using Semantic Similarity Measures and Dependency Graph Alignments</title>
       <author><first>Michael</first><last>Mohler</last></author>
-      <author><first>Razvan</first><last>Bunescu</last></author>
+      <author id="razvan-bunescu"><first>Razvan</first><last>Bunescu</last></author>
       <author><first>Rada</first><last>Mihalcea</last></author>
       <pages>752–762</pages>
       <url hash="35996b4c">P11-1076</url>
@@ -712,7 +712,7 @@
     <paper id="77">
       <title>Age Prediction in Blogs: A Study of Style, Content, and Online Behavior in Pre- and Post-Social Media Generations</title>
       <author><first>Sara</first><last>Rosenthal</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>763–772</pages>
       <url hash="2715546e">P11-1077</url>
       <bibkey>rosenthal-mckeown-2011-age</bibkey>
@@ -730,7 +730,7 @@
     <paper id="79">
       <title>Bootstrapping coreference resolution using word associations</title>
       <author><first>Hamidreza</first><last>Kobdani</last></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <author><first>Michael</first><last>Schiehlen</last></author>
       <author><first>Hans</first><last>Kamp</last></author>
       <pages>783–792</pages>
@@ -740,7 +740,7 @@
     <paper id="80">
       <title>Large-Scale Cross-Document Coreference Using Distributed Inference and Hierarchical Models</title>
       <author><first>Sameer</first><last>Singh</last></author>
-      <author><first>Amarnag</first><last>Subramanya</last></author>
+      <author id="amarnag-subramanya"><first>Amarnag</first><last>Subramanya</last></author>
       <author><first>Fernando</first><last>Pereira</last></author>
       <author><first>Andrew</first><last>McCallum</last></author>
       <pages>793–803</pages>
@@ -750,7 +750,7 @@
     <paper id="81">
       <title>A Cross-Lingual <fixed-case>ILP</fixed-case> Solution to Zero Anaphora Resolution</title>
       <author><first>Ryu</first><last>Iida</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>804–813</pages>
       <url hash="12c5f46e">P11-1081</url>
       <bibkey>iida-poesio-2011-cross</bibkey>
@@ -784,7 +784,7 @@
       <title>Learning to Transform and Select Elementary Trees for Improved Syntax-based Machine Translations</title>
       <author><first>Bing</first><last>Zhao</last></author>
       <author><first>Young-Suk</first><last>Lee</last></author>
-      <author><first>Xiaoqiang</first><last>Luo</last></author>
+      <author id="xiaoqiang-luo"><first>Xiaoqiang</first><last>Luo</last></author>
       <author><first>Liu</first><last>Li</last></author>
       <pages>846–855</pages>
       <url hash="f6e39d63">P11-1085</url>
@@ -802,8 +802,8 @@
     </paper>
     <paper id="87">
       <title>A Hierarchical <fixed-case>P</fixed-case>itman-<fixed-case>Y</fixed-case>or Process <fixed-case>HMM</fixed-case> for Unsupervised Part of Speech Induction</title>
-      <author><first>Phil</first><last>Blunsom</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>865–874</pages>
       <url hash="b4f92cf4">P11-1087</url>
       <bibkey>blunsom-cohn-2011-hierarchical</bibkey>
@@ -811,16 +811,16 @@
     <paper id="88">
       <title>Using Deep Morphology to Improve Automatic Error Detection in <fixed-case>A</fixed-case>rabic Handwriting Recognition</title>
       <author><first>Nizar</first><last>Habash</last></author>
-      <author><first>Ryan</first><last>Roth</last></author>
+      <author id="ryan-roth"><first>Ryan</first><last>Roth</last></author>
       <pages>875–884</pages>
       <url hash="6ee190e8">P11-1088</url>
       <bibkey>habash-roth-2011-using</bibkey>
     </paper>
     <paper id="89">
       <title>A Discriminative Model for Joint Morphological Disambiguation and Dependency Parsing</title>
-      <author><first>John</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
       <author><first>Jason</first><last>Naradowsky</last></author>
-      <author><first>David A.</first><last>Smith</last></author>
+      <author id="david-a-smith"><first>David A.</first><last>Smith</last></author>
       <pages>885–894</pages>
       <url hash="aa7d3b80">P11-1089</url>
       <bibkey>lee-etal-2011-discriminative</bibkey>
@@ -861,8 +861,8 @@
     </paper>
     <paper id="94">
       <title>Automated Whole Sentence Grammar Correction Using a Noisy Channel Model</title>
-      <author><first>Y. Albert</first><last>Park</last></author>
-      <author><first>Roger</first><last>Levy</last></author>
+      <author id="y-albert-park"><first>Y. Albert</first><last>Park</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
       <pages>934–944</pages>
       <url hash="5e1d0858">P11-1094</url>
       <bibkey>park-levy-2011-automated</bibkey>
@@ -877,7 +877,7 @@
     </paper>
     <paper id="96">
       <title>Simple supervised document geolocation with geodesic grids</title>
-      <author><first>Benjamin</first><last>Wing</last></author>
+      <author id="benjamin-wing"><first>Benjamin</first><last>Wing</last></author>
       <author><first>Jason</first><last>Baldridge</last></author>
       <pages>955–964</pages>
       <url hash="9a4de9d6">P11-1096</url>
@@ -885,18 +885,18 @@
     </paper>
     <paper id="97">
       <title><fixed-case>P</fixed-case>iggyback: Using Search Engines for Robust Cross-Domain Named Entity Recognition</title>
-      <author><first>Stefan</first><last>Rüd</last></author>
+      <author id="stefan-rued"><first>Stefan</first><last>Rüd</last></author>
       <author><first>Massimiliano</first><last>Ciaramita</last></author>
       <author><first>Jens</first><last>Müller</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>965–975</pages>
       <url hash="41ff3f4b">P11-1097</url>
       <bibkey>rud-etal-2011-piggyback</bibkey>
     </paper>
     <paper id="98">
       <title>Template-Based Information Extraction without the Templates</title>
-      <author><first>Nathanael</first><last>Chambers</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="nathanael-chambers"><first>Nathanael</first><last>Chambers</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <pages>976–986</pages>
       <url hash="cd81f349">P11-1098</url>
       <bibkey>chambers-jurafsky-2011-template</bibkey>
@@ -920,7 +920,7 @@
     </paper>
     <paper id="101">
       <title>Underspecifying and Predicting Voice for Surface Realisation Ranking</title>
-      <author><first>Sina</first><last>Zarrieß</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
       <author><first>Aoife</first><last>Cahill</last></author>
       <author><first>Jonas</first><last>Kuhn</last></author>
       <pages>1007–1017</pages>
@@ -930,7 +930,7 @@
     <paper id="102">
       <title>Recognizing Authority in Dialogue with an Integer Linear Programming Constrained Model</title>
       <author><first>Elijah</first><last>Mayfield</last></author>
-      <author><first>Carolyn</first><last>Penstein Rosé</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Penstein Rosé</last></author>
       <pages>1018–1026</pages>
       <url hash="6e770f6f">P11-1102</url>
       <bibkey>mayfield-penstein-rose-2011-recognizing</bibkey>
@@ -958,14 +958,14 @@
       <title>A Joint Sequence Translation Model with Integrated Reordering</title>
       <author><first>Nadir</first><last>Durrani</last></author>
       <author><first>Helmut</first><last>Schmid</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>1045–1054</pages>
       <url hash="0630461d">P11-1105</url>
       <bibkey>durrani-etal-2011-joint</bibkey>
     </paper>
     <paper id="106">
       <title>Integrating surprisal and uncertain-input models in online sentence comprehension: formal techniques and empirical results</title>
-      <author><first>Roger</first><last>Levy</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
       <pages>1055–1065</pages>
       <url hash="f7eaaf44">P11-1106</url>
       <bibkey>levy-2011-integrating</bibkey>
@@ -993,7 +993,7 @@
       <author><first>Chikara</first><last>Hashimoto</last></author>
       <author><first>Kentaro</first><last>Torisawa</last></author>
       <author><first>Stijn</first><last>De Saeger</last></author>
-      <author><first>Jun’ichi</first><last>Kazama</last></author>
+      <author id="junichi-kazama"><first>Jun’ichi</first><last>Kazama</last></author>
       <author><first>Sadao</first><last>Kurohashi</last></author>
       <pages>1087–1097</pages>
       <url hash="0a18e008">P11-1109</url>
@@ -1002,7 +1002,7 @@
     <paper id="110">
       <title>Learning From Collective Human Behavior to Introduce Diversity in Lexical Choice</title>
       <author><first>Vahed</first><last>Qazvinian</last></author>
-      <author><first>Dragomir R.</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir R.</first><last>Radev</last></author>
       <pages>1098–1108</pages>
       <url hash="6bc06efd">P11-1110</url>
       <bibkey>qazvinian-radev-2011-learning</bibkey>
@@ -1028,9 +1028,9 @@
       <author><first>Yu</first><last>Hong</last></author>
       <author><first>Jianfeng</first><last>Zhang</last></author>
       <author><first>Bin</first><last>Ma</last></author>
-      <author><first>Jianmin</first><last>Yao</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
-      <author><first>Qiaoming</first><last>Zhu</last></author>
+      <author id="jianmin-yao"><first>Jianmin</first><last>Yao</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last></author>
       <pages>1127–1136</pages>
       <url hash="287f01b1">P11-1113</url>
       <bibkey>hong-etal-2011-using</bibkey>
@@ -1038,7 +1038,7 @@
     <paper id="114">
       <title>Peeling Back the Layers: Detecting Event Role Fillers in Secondary Contexts</title>
       <author><first>Ruihong</first><last>Huang</last></author>
-      <author><first>Ellen</first><last>Riloff</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
       <pages>1137–1147</pages>
       <url hash="ce216c07">P11-1114</url>
       <bibkey>huang-riloff-2011-peeling</bibkey>
@@ -1046,7 +1046,7 @@
     <paper id="115">
       <title>Knowledge Base Population: Successful Approaches and Challenges</title>
       <author><first>Heng</first><last>Ji</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <pages>1148–1158</pages>
       <url hash="e95604d6">P11-1115</url>
       <bibkey>ji-grishman-2011-knowledge</bibkey>
@@ -1057,7 +1057,7 @@
       <author><first>Shuming</first><last>Shi</last></author>
       <author><first>Jing</first><last>Liu</last></author>
       <author><first>Shuqi</first><last>Sun</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <pages>1159–1168</pages>
       <url hash="fa5d7632">P11-1116</url>
       <bibkey>zhang-etal-2011-nonlinear</bibkey>
@@ -1065,7 +1065,7 @@
     <paper id="117">
       <title>A Pronoun Anaphora Resolution System based on Factorial Hidden <fixed-case>M</fixed-case>arkov Models</title>
       <author><first>Dingcheng</first><last>Li</last></author>
-      <author><first>Tim</first><last>Miller</last></author>
+      <author id="timothy-miller"><first>Tim</first><last>Miller</last></author>
       <author><first>William</first><last>Schuler</last></author>
       <pages>1169–1178</pages>
       <url hash="e04094f3">P11-1117</url>
@@ -1082,11 +1082,11 @@
     </paper>
     <paper id="119">
       <title>An Affect-Enriched Dialogue Act Classification Model for Task-Oriented Dialogue</title>
-      <author><first>Kristy</first><last>Boyer</last></author>
-      <author><first>Joseph</first><last>Grafsgaard</last></author>
-      <author><first>Eun Young</first><last>Ha</last></author>
-      <author><first>Robert</first><last>Phillips</last></author>
-      <author><first>James</first><last>Lester</last></author>
+      <author id="kristy-boyer"><first>Kristy</first><last>Boyer</last></author>
+      <author id="joseph-f-grafsgaard"><first>Joseph</first><last>Grafsgaard</last></author>
+      <author id="eun-young-ha"><first>Eun Young</first><last>Ha</last></author>
+      <author id="robert-phillips"><first>Robert</first><last>Phillips</last></author>
+      <author id="james-lester"><first>James</first><last>Lester</last></author>
       <pages>1190–1199</pages>
       <url hash="b93f142a">P11-1119</url>
       <bibkey>boyer-etal-2011-affect</bibkey>
@@ -1094,7 +1094,7 @@
     <paper id="120">
       <title>Fine-Grained Class Label Markup of Search Queries</title>
       <author><first>Joseph</first><last>Reisinger</last></author>
-      <author><first>Marius</first><last>Paşca</last></author>
+      <author id="marius-pasca"><first>Marius</first><last>Paşca</last></author>
       <pages>1200–1209</pages>
       <url hash="dd16d0cf">P11-1120</url>
       <bibkey>reisinger-pasca-2011-fine</bibkey>
@@ -1110,7 +1110,7 @@
     </paper>
     <paper id="122">
       <title>Crowdsourcing Translation: Professional Quality from Non-Professionals</title>
-      <author><first>Omar F.</first><last>Zaidan</last></author>
+      <author id="omar-zaidan"><first>Omar F.</first><last>Zaidan</last></author>
       <author><first>Chris</first><last>Callison-Burch</last></author>
       <pages>1220–1229</pages>
       <url hash="bdca1cc3">P11-1122</url>
@@ -1119,7 +1119,7 @@
     </paper>
     <paper id="123">
       <title>A Statistical Tree Annotator and Its Applications</title>
-      <author><first>Xiaoqiang</first><last>Luo</last></author>
+      <author id="xiaoqiang-luo"><first>Xiaoqiang</first><last>Luo</last></author>
       <author><first>Bing</first><last>Zhao</last></author>
       <pages>1230–1238</pages>
       <url hash="e46a60b2">P11-1123</url>
@@ -1130,7 +1130,7 @@
       <author><first>Yanjun</first><last>Ma</last></author>
       <author><first>Yifan</first><last>He</last></author>
       <author><first>Andy</first><last>Way</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>1239–1248</pages>
       <url hash="7161f225">P11-1124</url>
       <bibkey>ma-etal-2011-consistent</bibkey>
@@ -1138,7 +1138,7 @@
     <paper id="125">
       <title>Machine Translation System Combination by Confusion Forest</title>
       <author><first>Taro</first><last>Watanabe</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>1249–1257</pages>
       <url hash="452297d6">P11-1125</url>
       <bibkey>watanabe-sumita-2011-machine</bibkey>
@@ -1154,9 +1154,9 @@
     </paper>
     <paper id="127">
       <title>Minimum <fixed-case>B</fixed-case>ayes-risk System Combination</title>
-      <author><first>Jesús</first><last>González-Rubio</last></author>
-      <author><first>Alfons</first><last>Juan</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="jesus-gonzalez-rubio"><first>Jesús</first><last>González-Rubio</last></author>
+      <author id="alfons-juan"><first>Alfons</first><last>Juan</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <pages>1268–1277</pages>
       <url hash="7374170d">P11-1127</url>
       <bibkey>gonzalez-rubio-etal-2011-minimum</bibkey>
@@ -1165,14 +1165,14 @@
       <title>Adjoining Tree-to-String Translation</title>
       <author id="yang-liu-ict"><first>Yang</first><last>Liu</last></author>
       <author><first>Qun</first><last>Liu</last></author>
-      <author><first>Yajuan</first><last>Lü</last></author>
+      <author id="yajuan-lu"><first>Yajuan</first><last>Lü</last></author>
       <pages>1278–1287</pages>
       <url hash="486facac">P11-1128</url>
       <bibkey>liu-etal-2011-adjoining</bibkey>
     </paper>
     <paper id="129">
       <title>Enhancing Language Models in Statistical Machine Translation with Backward N-grams and Mutual Information Triggers</title>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Min</first><last>Zhang</last></author>
       <author><first>Haizhou</first><last>Li</last></author>
       <pages>1288–1297</pages>
@@ -1181,7 +1181,7 @@
     </paper>
     <paper id="130">
       <title>Translating from Morphologically Complex Languages: A Paraphrase-Based Approach</title>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Hwee Tou</first><last>Ng</last></author>
       <pages>1298–1307</pages>
       <url hash="ff114b64">P11-1130</url>
@@ -1191,7 +1191,7 @@
       <title>Gappy Phrasal Alignment By Agreement</title>
       <author><first>Mohit</first><last>Bansal</last></author>
       <author><first>Chris</first><last>Quirk</last></author>
-      <author><first>Robert</first><last>Moore</last></author>
+      <author id="robert-c-moore"><first>Robert</first><last>Moore</last></author>
       <pages>1308–1317</pages>
       <url hash="6dbd8745">P11-1131</url>
       <bibkey>bansal-etal-2011-gappy</bibkey>
@@ -1215,7 +1215,7 @@
     <paper id="134">
       <title>Using Bilingual Parallel Corpora for Cross-Lingual Textual Entailment</title>
       <author><first>Yashar</first><last>Mehdad</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marcello</first><last>Federico</last></author>
       <pages>1336–1345</pages>
       <url hash="4603f894">P11-1134</url>
@@ -1225,7 +1225,7 @@
       <title>Using Large Monolingual and Bilingual Corpora to Improve Coordination Disambiguation</title>
       <author><first>Shane</first><last>Bergsma</last></author>
       <author><first>David</first><last>Yarowsky</last></author>
-      <author><first>Kenneth</first><last>Church</last></author>
+      <author id="kenneth-church"><first>Kenneth</first><last>Church</last></author>
       <pages>1346–1355</pages>
       <url hash="9b97c3d3">P11-1135</url>
       <attachment type="dataset" hash="34c13163">P11-1135.Datasets.tar.bz2</attachment>
@@ -1244,15 +1244,15 @@
     <paper id="137">
       <title>Discovering Sociolinguistic Associations with Structured Sparsity</title>
       <author><first>Jacob</first><last>Eisenstein</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
-      <author><first>Eric P.</first><last>Xing</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
+      <author id="eric-xing"><first>Eric P.</first><last>Xing</last></author>
       <pages>1365–1374</pages>
       <url hash="abb155db">P11-1137</url>
       <bibkey>eisenstein-etal-2011-discovering</bibkey>
     </paper>
     <paper id="138">
       <title>Local and Global Algorithms for Disambiguation to <fixed-case>W</fixed-case>ikipedia</title>
-      <author><first>Lev</first><last>Ratinov</last></author>
+      <author id="lev-ratinov"><first>Lev</first><last>Ratinov</last></author>
       <author><first>Dan</first><last>Roth</last></author>
       <author><first>Doug</first><last>Downey</last></author>
       <author><first>Mike</first><last>Anderson</last></author>
@@ -1262,7 +1262,7 @@
     </paper>
     <paper id="139">
       <title>A Stacked Sub-Word Model for Joint <fixed-case>C</fixed-case>hinese Word Segmentation and Part-of-Speech Tagging</title>
-      <author><first>Weiwei</first><last>Sun</last></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last></author>
       <pages>1385–1394</pages>
       <url hash="474b1f1a">P11-1139</url>
       <bibkey>sun-2011-stacked</bibkey>
@@ -1272,8 +1272,8 @@
       <author><first>Klaus</first><last>Macherey</last></author>
       <author><first>Andrew</first><last>Dai</last></author>
       <author><first>David</first><last>Talbot</last></author>
-      <author><first>Ashok</first><last>Popat</last></author>
-      <author><first>Franz</first><last>Och</last></author>
+      <author id="ashok-popat"><first>Ashok</first><last>Popat</last></author>
+      <author id="franz-josef-och"><first>Franz</first><last>Och</last></author>
       <pages>1395–1404</pages>
       <url hash="22bd9b39">P11-1140</url>
       <bibkey>macherey-etal-2011-language</bibkey>
@@ -1287,8 +1287,8 @@
     </paper>
     <paper id="142">
       <title>A Simple Measure to Assess Non-response</title>
-      <author><first>Anselmo</first><last>Peñas</last></author>
-      <author><first>Alvaro</first><last>Rodrigo</last></author>
+      <author id="anselmo-penas"><first>Anselmo</first><last>Peñas</last></author>
+      <author id="alvaro-rodrigo"><first>Alvaro</first><last>Rodrigo</last></author>
       <pages>1415–1424</pages>
       <url hash="ed48c0b1">P11-1142</url>
       <bibkey>penas-rodrigo-2011-simple</bibkey>
@@ -1304,7 +1304,7 @@
     <paper id="144">
       <title>Semi-Supervised Frame-Semantic Parsing for Unknown Predicates</title>
       <author><first>Dipanjan</first><last>Das</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>1435–1444</pages>
       <url hash="dfc51719">P11-1144</url>
       <bibkey>das-smith-2011-semi</bibkey>
@@ -1320,7 +1320,7 @@
     <paper id="146">
       <title>Unsupervised Learning of Semantic Relation Composition</title>
       <author><first>Eduardo</first><last>Blanco</last></author>
-      <author><first>Dan</first><last>Moldovan</last></author>
+      <author id="dan-moldovan"><first>Dan</first><last>Moldovan</last></author>
       <pages>1456–1465</pages>
       <url hash="3f51814c">P11-1146</url>
       <bibkey>blanco-moldovan-2011-unsupervised</bibkey>
@@ -1329,15 +1329,15 @@
       <title>Unsupervised Discovery of Domain-Specific Knowledge from Text</title>
       <author><first>Dirk</first><last>Hovy</last></author>
       <author><first>Chunliang</first><last>Zhang</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
-      <author><first>Anselmo</first><last>Peñas</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
+      <author id="anselmo-penas"><first>Anselmo</first><last>Peñas</last></author>
       <pages>1466–1475</pages>
       <url hash="a6cb1857">P11-1147</url>
       <bibkey>hovy-etal-2011-unsupervised</bibkey>
     </paper>
     <paper id="148">
       <title>Latent Semantic Word Sense Induction and Disambiguation</title>
-      <author><first>Tim</first><last>Van de Cruys</last></author>
+      <author id="tim-van-de-cruys"><first>Tim</first><last>Van de Cruys</last></author>
       <author><first>Marianna</first><last>Apidianaki</last></author>
       <pages>1476–1485</pages>
       <url hash="f2a8da6c">P11-1148</url>
@@ -1358,23 +1358,23 @@
       <author><first>Jianxing</first><last>Yu</last></author>
       <author><first>Zheng-Jun</first><last>Zha</last></author>
       <author><first>Meng</first><last>Wang</last></author>
-      <author><first>Tat-Seng</first><last>Chua</last></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last></author>
       <pages>1496–1505</pages>
       <url hash="622e2f1c">P11-1150</url>
       <bibkey>yu-etal-2011-aspect</bibkey>
     </paper>
     <paper id="151">
       <title>Collective Classification of Congressional Floor-Debate Transcripts</title>
-      <author><first>Clinton</first><last>Burfoot</last></author>
+      <author id="clint-burfoot"><first>Clinton</first><last>Burfoot</last></author>
       <author><first>Steven</first><last>Bird</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>1506–1515</pages>
       <url hash="7561dde2">P11-1151</url>
       <bibkey>burfoot-etal-2011-collective</bibkey>
     </paper>
     <paper id="152">
       <title>Integrating history-length interpolation and classes in language modeling</title>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>1516–1525</pages>
       <url hash="cb923b10">P11-1152</url>
       <bibkey>schutze-2011-integrating</bibkey>
@@ -1383,7 +1383,7 @@
       <title>Structural Topic Model for Latent Topical Structure Analysis</title>
       <author><first>Hongning</first><last>Wang</last></author>
       <author><first>Duo</first><last>Zhang</last></author>
-      <author><first>ChengXiang</first><last>Zhai</last></author>
+      <author id="chengxiang-zhai"><first>ChengXiang</first><last>Zhai</last></author>
       <pages>1526–1535</pages>
       <url hash="99d8054b">P11-1153</url>
       <bibkey>wang-etal-2011-structural</bibkey>
@@ -1393,7 +1393,7 @@
       <author><first>Jey Han</first><last>Lau</last></author>
       <author><first>Karl</first><last>Grieser</last></author>
       <author><first>David</first><last>Newman</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>1536–1545</pages>
       <url hash="df55e3dc">P11-1154</url>
       <bibkey>lau-etal-2011-automatic</bibkey>
@@ -1417,8 +1417,8 @@
     </paper>
     <paper id="157">
       <title>Effective Measures of Domain Similarity for Parsing</title>
-      <author><first>Barbara</first><last>Plank</last></author>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <pages>1566–1576</pages>
       <url hash="cd8ecb6a">P11-1157</url>
       <bibkey>plank-van-noord-2011-effective</bibkey>
@@ -1435,15 +1435,15 @@
       <title>Improving <fixed-case>A</fixed-case>rabic Dependency Parsing with Form-based and Functional Morphological Features</title>
       <author><first>Yuval</first><last>Marton</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>1586–1596</pages>
       <url hash="5d7f9630">P11-1159</url>
       <bibkey>marton-etal-2011-improving</bibkey>
     </paper>
     <paper id="160">
       <title>Partial Parsing from Bitext Projections</title>
-      <author><first>Prashanth</first><last>Mannem</last></author>
-      <author><first>Aswarth</first><last>Dara</last></author>
+      <author id="prashanth-mannem"><first>Prashanth</first><last>Mannem</last></author>
+      <author id="aswarth-abhilash-dara"><first>Aswarth</first><last>Dara</last></author>
       <pages>1597–1606</pages>
       <url hash="657faf6e">P11-1160</url>
       <attachment type="software" hash="faa4abdc">P11-1160.Software.tar.bz2</attachment>
@@ -1451,7 +1451,7 @@
     </paper>
     <paper id="161">
       <title>Ranking Class Labels Using Query Sessions</title>
-      <author><first>Marius</first><last>Paşca</last></author>
+      <author id="marius-pasca"><first>Marius</first><last>Paşca</last></author>
       <pages>1607–1615</pages>
       <url hash="27785666">P11-1161</url>
       <bibkey>pasca-2011-ranking</bibkey>
@@ -1459,7 +1459,7 @@
     <paper id="162">
       <title>Insights from Network Structure for Text Mining</title>
       <author><first>Zornitsa</first><last>Kozareva</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>1616–1625</pages>
       <url hash="38ed8713">P11-1162</url>
       <bibkey>kozareva-hovy-2011-insights</bibkey>
@@ -1468,7 +1468,7 @@
       <title>Event Extraction as Dependency Parsing</title>
       <author><first>David</first><last>McClosky</last></author>
       <author><first>Mihai</first><last>Surdeanu</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <pages>1626–1635</pages>
       <url hash="53410b81">P11-1163</url>
       <bibkey>mcclosky-etal-2011-event</bibkey>
@@ -1487,8 +1487,8 @@
       <booktitle>Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies</booktitle>
       <url hash="04ae7f14">P11-2</url>
       <editor><first>Dekang</first><last>Lin</last></editor>
-      <editor><first>Yuji</first><last>Matsumoto</last></editor>
-      <editor><first>Rada</first><last>Mihalcea</last></editor>
+      <editor id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></editor>
+      <editor id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Portland, Oregon, USA</address>
       <month>June</month>
@@ -1502,7 +1502,7 @@
     <paper id="1">
       <title>Lexicographic Semirings for Exact Automata Encoding of Sequence Models</title>
       <author><first>Brian</first><last>Roark</last></author>
-      <author><first>Richard</first><last>Sproat</last></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last></author>
       <author><first>Izhak</first><last>Shafran</last></author>
       <pages>1–5</pages>
       <url hash="ec7e27dd">P11-2001</url>
@@ -1511,7 +1511,7 @@
     <paper id="2">
       <title>Good Seed Makes a Good Crop: Accelerating Active Learning Using Language Modeling</title>
       <author><first>Dmitriy</first><last>Dligach</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>6–10</pages>
       <url hash="f2a57e56">P11-2002</url>
       <bibkey>dligach-palmer-2011-good</bibkey>
@@ -1519,7 +1519,7 @@
     <paper id="3">
       <title>Temporal Restricted Boltzmann Machines for Dependency Parsing</title>
       <author><first>Nikhil</first><last>Garg</last></author>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <pages>11–17</pages>
       <url hash="21c4cc25">P11-2003</url>
       <bibkey>garg-henderson-2011-temporal</bibkey>
@@ -1552,7 +1552,7 @@
     </paper>
     <paper id="7">
       <title>The <fixed-case>A</fixed-case>rabic Online Commentary Dataset: an Annotated Dataset of Informal <fixed-case>A</fixed-case>rabic with High Dialectal Content</title>
-      <author><first>Omar F.</first><last>Zaidan</last></author>
+      <author id="omar-zaidan"><first>Omar F.</first><last>Zaidan</last></author>
       <author><first>Chris</first><last>Callison-Burch</last></author>
       <pages>37–41</pages>
       <url hash="e8c20b91">P11-2007</url>
@@ -1565,19 +1565,19 @@
       <author><first>Nathan</first><last>Schneider</last></author>
       <author><first>Brendan</first><last>O’Connor</last></author>
       <author><first>Dipanjan</first><last>Das</last></author>
-      <author><first>Daniel</first><last>Mills</last></author>
+      <author id="daniel-p-mills"><first>Daniel</first><last>Mills</last></author>
       <author><first>Jacob</first><last>Eisenstein</last></author>
       <author><first>Michael</first><last>Heilman</last></author>
       <author><first>Dani</first><last>Yogatama</last></author>
       <author><first>Jeffrey</first><last>Flanigan</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>42–47</pages>
       <url hash="dfc52291">P11-2008</url>
       <bibkey>gimpel-etal-2011-part</bibkey>
     </paper>
     <paper id="9">
       <title>Semi-supervised condensed nearest neighbor for part-of-speech tagging</title>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>48–52</pages>
       <url hash="5df6ad00">P11-2009</url>
       <bibkey>sogaard-2011-semi</bibkey>
@@ -1642,7 +1642,7 @@
     </paper>
     <paper id="16">
       <title>That’s What She Said: Double Entendre Identification</title>
-      <author><first>Chloé</first><last>Kiddon</last></author>
+      <author id="chloe-kiddon"><first>Chloé</first><last>Kiddon</last></author>
       <author><first>Yuriy</first><last>Brun</last></author>
       <pages>89–94</pages>
       <url hash="d80490f5">P11-2016</url>
@@ -1666,7 +1666,7 @@
     </paper>
     <paper id="19">
       <title>Subjective Natural Language Problems: Motivations, Applications, Characterizations, and Implications</title>
-      <author><first>Cecilia</first><last>Ovesdotter Alm</last></author>
+      <author id="cecilia-ovesdotter-alm"><first>Cecilia</first><last>Ovesdotter Alm</last></author>
       <pages>107–112</pages>
       <url hash="b07908a2">P11-2019</url>
       <bibkey>ovesdotter-alm-2011-subjective</bibkey>
@@ -1674,8 +1674,8 @@
     <paper id="20">
       <title>Entrainment in Speech Preceding Backchannels.</title>
       <author><first>Rivka</first><last>Levitan</last></author>
-      <author><first>Agustín</first><last>Gravano</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
+      <author id="agustin-gravano"><first>Agustín</first><last>Gravano</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
       <pages>113–117</pages>
       <url hash="bdce116d">P11-2020</url>
       <bibkey>levitan-etal-2011-entrainment</bibkey>
@@ -1683,7 +1683,7 @@
     <paper id="21">
       <title>Question Detection in Spoken Conversations Using Textual Conversations</title>
       <author><first>Anna</first><last>Margolis</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
       <pages>118–124</pages>
       <url hash="14c4aebc">P11-2021</url>
       <bibkey>margolis-ostendorf-2011-question</bibkey>
@@ -1718,8 +1718,8 @@
     <paper id="25">
       <title>A Corpus of Scope-disambiguated <fixed-case>E</fixed-case>nglish Text</title>
       <author><first>Mehdi</first><last>Manshadi</last></author>
-      <author><first>James</first><last>Allen</last></author>
-      <author><first>Mary</first><last>Swift</last></author>
+      <author id="james-allen"><first>James</first><last>Allen</last></author>
+      <author id="mary-swift"><first>Mary</first><last>Swift</last></author>
       <pages>141–146</pages>
       <url hash="e862eda9">P11-2025</url>
       <bibkey>manshadi-etal-2011-corpus</bibkey>
@@ -1727,15 +1727,15 @@
     <paper id="26">
       <title>From Bilingual Dictionaries to Interlingual Document Representations</title>
       <author><first>Jagadeesh</first><last>Jagarlamudi</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
-      <author><first>Raghavendra</first><last>Udupa</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
+      <author id="raghavendra-udupa"><first>Raghavendra</first><last>Udupa</last></author>
       <pages>147–152</pages>
       <url hash="5676fa52">P11-2026</url>
       <bibkey>jagarlamudi-etal-2011-bilingual</bibkey>
     </paper>
     <paper id="27">
       <title><fixed-case>AM</fixed-case>-<fixed-case>FM</fixed-case>: A Semantic Framework for Translation Quality Assessment</title>
-      <author><first>Rafael E.</first><last>Banchs</last></author>
+      <author id="rafael-e-banchs"><first>Rafael E.</first><last>Banchs</last></author>
       <author><first>Haizhou</first><last>Li</last></author>
       <pages>153–158</pages>
       <url hash="d318e718">P11-2027</url>
@@ -1744,7 +1744,7 @@
     <paper id="28">
       <title>Automatic Evaluation of <fixed-case>C</fixed-case>hinese Translation Output: Word-Level or Character-Level?</title>
       <author><first>Maoxi</first><last>Li</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <author><first>Hwee Tou</first><last>Ng</last></author>
       <pages>159–164</pages>
       <url hash="ad955fe8">P11-2028</url>
@@ -1761,17 +1761,17 @@
     <paper id="30">
       <title>Word Alignment via Submodular Maximization over Matroids</title>
       <author><first>Hui</first><last>Lin</last></author>
-      <author><first>Jeff</first><last>Bilmes</last></author>
+      <author id="jeff-bilmes"><first>Jeff</first><last>Bilmes</last></author>
       <pages>170–175</pages>
       <url hash="39a7dee0">P11-2030</url>
       <bibkey>lin-bilmes-2011-word</bibkey>
     </paper>
     <paper id="31">
       <title>Better Hypothesis Testing for Statistical Machine Translation: Controlling for Optimizer Instability</title>
-      <author><first>Jonathan H.</first><last>Clark</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="jonathan-h-clark"><first>Jonathan H.</first><last>Clark</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>176–181</pages>
       <url hash="8437abf0">P11-2031</url>
       <bibkey>clark-etal-2011-better</bibkey>
@@ -1779,7 +1779,7 @@
     <paper id="32">
       <title><fixed-case>B</fixed-case>ayesian Word Alignment for Statistical Machine Translation</title>
       <author><first>Coşkun</first><last>Mermer</last></author>
-      <author><first>Murat</first><last>Saraçlar</last></author>
+      <author id="murat-saraclar"><first>Murat</first><last>Saraçlar</last></author>
       <pages>182–187</pages>
       <url hash="46949f34">P11-2032</url>
       <attachment type="software" hash="a1f3a916">P11-2032.Software.txt</attachment>
@@ -1796,8 +1796,8 @@
     <paper id="34">
       <title>Reversible Stochastic Attribute-Value Grammars</title>
       <author><first>Daniël</first><last>de Kok</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <pages>194–199</pages>
       <url hash="bcd82ce5">P11-2034</url>
       <bibkey>de-kok-etal-2011-reversible</bibkey>
@@ -1839,14 +1839,14 @@
       <title>Query Snowball: A Co-occurrence-based Approach to Multi-document Summarization for Question Answering</title>
       <author><first>Hajime</first><last>Morita</last></author>
       <author><first>Tetsuya</first><last>Sakai</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>223–229</pages>
       <url hash="8c4abaeb">P11-2039</url>
       <bibkey>morita-etal-2011-query</bibkey>
     </paper>
     <paper id="40">
       <title>Discrete vs. Continuous Rating Scales for Language Evaluation in <fixed-case>NLP</fixed-case></title>
-      <author><first>Anja</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anja</first><last>Belz</last></author>
       <author><first>Eric</first><last>Kow</last></author>
       <pages>230–235</pages>
       <url hash="15330596">P11-2040</url>
@@ -1881,7 +1881,7 @@
     <paper id="44">
       <title>Optimal and Syntactically-Informed Decoding for Monolingual Phrase-Based Alignment</title>
       <author><first>Kapil</first><last>Thadani</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>254–259</pages>
       <url hash="d9a17c60">P11-2044</url>
       <bibkey>thadani-mckeown-2011-optimal</bibkey>
@@ -1889,7 +1889,7 @@
     <paper id="45">
       <title>Can Document Selection Help Semi-supervised Learning? A Case Study On Event Extraction</title>
       <author><first>Shasha</first><last>Liao</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <pages>260–265</pages>
       <url hash="b82504bf">P11-2045</url>
       <bibkey>liao-grishman-2011-document</bibkey>
@@ -1898,8 +1898,8 @@
       <title>Relation Guided Bootstrapping of Semantic Lexicons</title>
       <author><first>Tara</first><last>McIntosh</last></author>
       <author><first>Lars</first><last>Yencken</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>266–270</pages>
       <url hash="3a1a0566">P11-2046</url>
       <bibkey>mcintosh-etal-2011-relation</bibkey>
@@ -1908,7 +1908,7 @@
       <title>Model-Portability Experiments for Textual Temporal Analysis</title>
       <author><first>Oleksandr</first><last>Kolomiyets</last></author>
       <author><first>Steven</first><last>Bethard</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>271–276</pages>
       <url hash="7fda6154">P11-2047</url>
       <bibkey>kolomiyets-etal-2011-model</bibkey>
@@ -1934,7 +1934,7 @@
       <title>Coreference for Learning to Extract Relations: Yes <fixed-case>V</fixed-case>irginia, Coreference Matters</title>
       <author><first>Ryan</first><last>Gabbard</last></author>
       <author><first>Marjorie</first><last>Freedman</last></author>
-      <author><first>Ralph</first><last>Weischedel</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
       <pages>288–293</pages>
       <url hash="44d99264">P11-2050</url>
       <bibkey>gabbard-etal-2011-coreference</bibkey>
@@ -1942,16 +1942,16 @@
     <paper id="51">
       <title>Corpus Expansion for Statistical Machine Translation with Semantic Role Label Substitution Rules</title>
       <author><first>Qin</first><last>Gao</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>294–298</pages>
       <url hash="040b0578">P11-2051</url>
       <bibkey>gao-vogel-2011-corpus</bibkey>
     </paper>
     <paper id="52">
       <title>Scaling up Automatic Cross-Lingual Semantic Role Annotation</title>
-      <author><first>Lonneke</first><last>van der Plas</last></author>
+      <author id="lonneke-van-der-plas"><first>Lonneke</first><last>van der Plas</last></author>
       <author><first>Paola</first><last>Merlo</last></author>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <pages>299–304</pages>
       <url hash="f6c31a66">P11-2052</url>
       <attachment type="dataset" hash="60e669ea">P11-2052.Datasets.txt</attachment>
@@ -1960,10 +1960,10 @@
     <paper id="53">
       <title>Towards Tracking Semantic Change by Visual Analytics</title>
       <author><first>Christian</first><last>Rohrdantz</last></author>
-      <author><first>Annette</first><last>Hautli</last></author>
+      <author id="annette-hautli"><first>Annette</first><last>Hautli</last></author>
       <author><first>Thomas</first><last>Mayer</last></author>
       <author><first>Miriam</first><last>Butt</last></author>
-      <author><first>Daniel A.</first><last>Keim</last></author>
+      <author id="daniel-keim"><first>Daniel A.</first><last>Keim</last></author>
       <author><first>Frans</first><last>Plank</last></author>
       <pages>305–310</pages>
       <url hash="7d4242eb">P11-2053</url>
@@ -1973,8 +1973,8 @@
     <paper id="54">
       <title>Improving Classification of Medical Assertions in Clinical Notes</title>
       <author><first>Youngjun</first><last>Kim</last></author>
-      <author><first>Ellen</first><last>Riloff</last></author>
-      <author><first>Stéphane</first><last>Meystre</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
+      <author id="stephane-meystre"><first>Stéphane</first><last>Meystre</last></author>
       <pages>311–316</pages>
       <url hash="01ae9a29">P11-2054</url>
       <bibkey>kim-etal-2011-improving</bibkey>
@@ -1982,7 +1982,7 @@
     <paper id="55">
       <title><fixed-case>P</fixed-case>ara<fixed-case>S</fixed-case>ense or How to Use Parallel Corpora for Word Sense Disambiguation</title>
       <author><first>Els</first><last>Lefever</last></author>
-      <author><first>Véronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Véronique</first><last>Hoste</last></author>
       <author><first>Martine</first><last>De Cock</last></author>
       <pages>317–322</pages>
       <url hash="38c3a2e4">P11-2055</url>
@@ -1994,7 +1994,7 @@
       <author><first>Ashish</first><last>Vaswani</last></author>
       <author><first>Stephen</first><last>Tratz</last></author>
       <author><first>David</first><last>Chiang</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>323–328</pages>
       <url hash="50994db4">P11-2056</url>
       <bibkey>hovy-etal-2011-models</bibkey>
@@ -2021,7 +2021,7 @@
       <author><first>Marjorie</first><last>Freedman</last></author>
       <author><first>Alex</first><last>Baron</last></author>
       <author><first>Vasin</first><last>Punyakanok</last></author>
-      <author><first>Ralph</first><last>Weischedel</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
       <pages>341–345</pages>
       <url hash="fcfd582b">P11-2059</url>
       <bibkey>freedman-etal-2011-language</bibkey>
@@ -2029,7 +2029,7 @@
     <paper id="60">
       <title>Automatic Detection and Correction of Errors in Dependency Treebanks</title>
       <author><first>Alexander</first><last>Volokh</last></author>
-      <author><first>Günter</first><last>Neumann</last></author>
+      <author id="gunter-neumann"><first>Günter</first><last>Neumann</last></author>
       <pages>346–350</pages>
       <url hash="0a703bb7">P11-2060</url>
       <bibkey>volokh-neumann-2011-automatic</bibkey>
@@ -2037,7 +2037,7 @@
     <paper id="61">
       <title>Temporal Evaluation</title>
       <author><first>Naushad</first><last>UzZaman</last></author>
-      <author><first>James</first><last>Allen</last></author>
+      <author id="james-allen"><first>James</first><last>Allen</last></author>
       <pages>351–356</pages>
       <url hash="a15e2937">P11-2061</url>
       <attachment type="software" hash="7810f55c">P11-2061.Software.zip</attachment>
@@ -2054,7 +2054,7 @@
     <paper id="63">
       <title><fixed-case>NULEX</fixed-case>: An Open-License Broad Coverage Lexicon</title>
       <author><first>Clifton</first><last>McFate</last></author>
-      <author><first>Kenneth</first><last>Forbus</last></author>
+      <author id="kenneth-forbus"><first>Kenneth</first><last>Forbus</last></author>
       <pages>363–367</pages>
       <url hash="2f705ace">P11-2063</url>
       <attachment type="dataset" hash="1fa6e64f">P11-2063.Datasets.tar.bz2</attachment>
@@ -2062,14 +2062,14 @@
     </paper>
     <paper id="64">
       <title><fixed-case>E</fixed-case>ven the Abstract have Color: Consensus in Word-Colour Associations</title>
-      <author><first>Saif</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
       <pages>368–373</pages>
       <url hash="1332152e">P11-2064</url>
       <bibkey>mohammad-2011-even</bibkey>
     </paper>
     <paper id="65">
       <title>Detection of Agreement and Disagreement in Broadcast Conversations</title>
-      <author><first>Wen</first><last>Wang</last></author>
+      <author id="wen-wang"><first>Wen</first><last>Wang</last></author>
       <author><first>Sibel</first><last>Yaman</last></author>
       <author><first>Kristin</first><last>Precoda</last></author>
       <author><first>Colleen</first><last>Richey</last></author>
@@ -2081,8 +2081,8 @@
     <paper id="66">
       <title>Dealing with Spurious Ambiguity in Learning <fixed-case>ITG</fixed-case>-based Word Alignment</title>
       <author><first>Shujian</first><last>Huang</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
       <pages>379–383</pages>
       <url hash="bdfd5ed5">P11-2066</url>
       <bibkey>huang-etal-2011-dealing</bibkey>
@@ -2099,9 +2099,9 @@
     </paper>
     <paper id="68">
       <title>Improving On-line Handwritten Recognition using Translation Models in Multimodal Interactive Machine Translation</title>
-      <author><first>Vicent</first><last>Alabau</last></author>
+      <author id="vicent-alabau"><first>Vicent</first><last>Alabau</last></author>
       <author><first>Alberto</first><last>Sanchis</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <pages>389–394</pages>
       <url hash="b13dc2bf">P11-2068</url>
       <bibkey>alabau-etal-2011-improving</bibkey>
@@ -2127,7 +2127,7 @@
     </paper>
     <paper id="71">
       <title>Domain Adaptation for Machine Translation by Mining Unseen Words</title>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <author><first>Jagadeesh</first><last>Jagarlamudi</last></author>
       <pages>407–412</pages>
       <url hash="12707191">P11-2071</url>
@@ -2169,9 +2169,9 @@
     </paper>
     <paper id="76">
       <title>Reordering Constraint Based on Document-Level Context</title>
-      <author><first>Takashi</first><last>Onishi</last></author>
+      <author id="takashi-onishi"><first>Takashi</first><last>Onishi</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>434–438</pages>
       <url hash="0e615178">P11-2076</url>
       <bibkey>onishi-etal-2011-reordering</bibkey>
@@ -2188,7 +2188,7 @@
       <title>On-line Language Model Biasing for Statistical Machine Translation</title>
       <author><first>Sankaranarayanan</first><last>Ananthakrishnan</last></author>
       <author><first>Rohit</first><last>Prasad</last></author>
-      <author><first>Prem</first><last>Natarajan</last></author>
+      <author id="prem-natarajan"><first>Prem</first><last>Natarajan</last></author>
       <pages>445–449</pages>
       <url hash="251dd461">P11-2078</url>
       <bibkey>ananthakrishnan-etal-2011-line</bibkey>
@@ -2197,9 +2197,9 @@
       <title>Reordering Modeling using Weighted Alignment Matrices</title>
       <author><first>Wang</first><last>Ling</last></author>
       <author><first>Tiago</first><last>Luís</last></author>
-      <author><first>João</first><last>Graça</last></author>
+      <author id="joao-graca"><first>João</first><last>Graça</last></author>
       <author><first>Isabel</first><last>Trancoso</last></author>
-      <author><first>Luísa</first><last>Coheur</last></author>
+      <author id="luisa-coheur"><first>Luísa</first><last>Coheur</last></author>
       <pages>450–454</pages>
       <url hash="d3b61c4e">P11-2079</url>
       <bibkey>ling-etal-2011-reordering</bibkey>
@@ -2224,16 +2224,16 @@
     <paper id="82">
       <title>“<fixed-case>I</fixed-case> Thou Thee, Thou Traitor”: Predicting Formal vs. Informal Address in <fixed-case>E</fixed-case>nglish Literature</title>
       <author><first>Manaal</first><last>Faruqui</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <pages>467–472</pages>
       <url hash="f093f423">P11-2082</url>
       <bibkey>faruqui-pado-2011-thou</bibkey>
     </paper>
     <paper id="83">
       <title>Clustering Comparable Corpora For Bilingual Lexicon Extraction</title>
-      <author id="bo-li"><first>Bo</first><last>Li</last></author>
-      <author><first>Eric</first><last>Gaussier</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author><first>Bo</first><last>Li</last></author>
+      <author id="eric-gaussier"><first>Eric</first><last>Gaussier</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>473–478</pages>
       <url hash="cee72fce">P11-2083</url>
       <bibkey>li-etal-2011-clustering</bibkey>
@@ -2242,7 +2242,7 @@
       <title>Identifying Word Translations from Comparable Corpora Using Latent Topic Models</title>
       <author><first>Ivan</first><last>Vulić</last></author>
       <author><first>Wim</first><last>De Smet</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>479–484</pages>
       <url hash="60df761b">P11-2084</url>
       <bibkey>vulic-etal-2011-identifying</bibkey>
@@ -2253,7 +2253,7 @@
       <author><first>Lixing</first><last>Xie</last></author>
       <author><first>Zhiyuan</first><last>Liu</last></author>
       <author><first>Maosong</first><last>Sun</last></author>
-      <author id="yang-zhang"><first>Yang</first><last>Zhang</last></author>
+      <author><first>Yang</first><last>Zhang</last></author>
       <author><first>Liyun</first><last>Ru</last></author>
       <pages>485–490</pages>
       <url hash="5dfd1526">P11-2085</url>
@@ -2262,7 +2262,7 @@
     <paper id="86">
       <title>Automatic Assessment of Coverage Quality in Intelligence Reports</title>
       <author><first>Samuel</first><last>Brody</last></author>
-      <author><first>Paul</first><last>Kantor</last></author>
+      <author id="paul-kantor"><first>Paul</first><last>Kantor</last></author>
       <pages>491–495</pages>
       <url hash="4549e58b">P11-2086</url>
       <bibkey>brody-kantor-2011-automatic</bibkey>
@@ -2271,7 +2271,7 @@
       <title>Putting it Simply: a Context-Aware Approach to Lexical Simplification</title>
       <author><first>Or</first><last>Biran</last></author>
       <author><first>Samuel</first><last>Brody</last></author>
-      <author><first>Noémie</first><last>Elhadad</last></author>
+      <author id="noemie-elhadad"><first>Noémie</first><last>Elhadad</last></author>
       <pages>496–501</pages>
       <url hash="ae774e6a">P11-2087</url>
       <bibkey>biran-etal-2011-putting</bibkey>
@@ -2279,7 +2279,7 @@
     <paper id="88">
       <title>Automatically Predicting Peer-Review Helpfulness</title>
       <author><first>Wenting</first><last>Xiong</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <pages>502–507</pages>
       <url hash="280223b9">P11-2088</url>
       <attachment type="dataset" hash="efb61885">P11-2088.Datasets.zip</attachment>
@@ -2288,8 +2288,8 @@
     <paper id="89">
       <title>They Can Help: Using Crowdsourcing to Improve the Evaluation of Grammatical Error Detection Systems</title>
       <author><first>Nitin</first><last>Madnani</last></author>
-      <author><first>Martin</first><last>Chodorow</last></author>
-      <author><first>Joel</first><last>Tetreault</last></author>
+      <author id="martin-chodorow"><first>Martin</first><last>Chodorow</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
       <author><first>Alla</first><last>Rozovskaya</last></author>
       <pages>508–513</pages>
       <url hash="4973c3e3">P11-2089</url>
@@ -2314,8 +2314,8 @@
     </paper>
     <paper id="92">
       <title>Improved Modeling of Out-Of-Vocabulary Words Using Morphological Classes</title>
-      <author><first>Thomas</first><last>Mueller</last></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="thomas-mueller"><first>Thomas</first><last>Mueller</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <pages>524–528</pages>
       <url hash="6d5bbe07">P11-2092</url>
       <bibkey>mueller-schuetze-2011-improved</bibkey>
@@ -2334,7 +2334,7 @@
       <title>Nonparametric <fixed-case>B</fixed-case>ayesian Machine Transliteration with Synchronous <fixed-case>A</fixed-case>daptor <fixed-case>G</fixed-case>rammars</title>
       <author><first>Yun</first><last>Huang</last></author>
       <author><first>Min</first><last>Zhang</last></author>
-      <author><first>Chew Lim</first><last>Tan</last></author>
+      <author id="chew-lim-tan"><first>Chew Lim</first><last>Tan</last></author>
       <pages>534–539</pages>
       <url hash="9ad79908">P11-2094</url>
       <bibkey>huang-etal-2011-nonparametric</bibkey>
@@ -2350,7 +2350,7 @@
     <paper id="96">
       <title>An Empirical Evaluation of Data-Driven Paraphrase Generation Techniques</title>
       <author><first>Donald</first><last>Metzler</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <author><first>Chunliang</first><last>Zhang</last></author>
       <pages>546–551</pages>
       <url hash="bcf133d4">P11-2096</url>
@@ -2378,7 +2378,7 @@
       <author><first>Paula</first><last>Carvalho</last></author>
       <author><first>Luís</first><last>Sarmento</last></author>
       <author><first>Jorge</first><last>Teixeira</last></author>
-      <author><first>Mário J.</first><last>Silva</last></author>
+      <author id="mario-j-silva"><first>Mário J.</first><last>Silva</last></author>
       <pages>564–568</pages>
       <url hash="fa55876d">P11-2099</url>
       <bibkey>carvalho-etal-2011-liars</bibkey>
@@ -2411,7 +2411,7 @@
     <paper id="103">
       <title>Subjectivity and Sentiment Analysis of <fixed-case>M</fixed-case>odern <fixed-case>S</fixed-case>tandard <fixed-case>A</fixed-case>rabic</title>
       <author><first>Muhammad</first><last>Abdul-Mageed</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <author><first>Mohammed</first><last>Korayem</last></author>
       <pages>587–591</pages>
       <url hash="65144598">P11-2103</url>
@@ -2419,10 +2419,10 @@
     </paper>
     <paper id="104">
       <title>Identifying the Semantic Orientation of Foreign Words</title>
-      <author><first>Ahmed</first><last>Hassan</last></author>
+      <author id="ahmed-hassan"><first>Ahmed</first><last>Hassan</last></author>
       <author><first>Amjad</first><last>Abu-Jbara</last></author>
       <author><first>Rahul</first><last>Jha</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <pages>592–597</pages>
       <url hash="912dcb8b">P11-2104</url>
       <bibkey>hassan-etal-2011-identifying</bibkey>
@@ -2430,7 +2430,7 @@
     <paper id="105">
       <title>Hierarchical Text Classification with Latent Concepts</title>
       <author><first>Xipeng</first><last>Qiu</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <author><first>Zhao</first><last>Liu</last></author>
       <author><first>Jinlong</first><last>Zhou</last></author>
       <pages>598–602</pages>
@@ -2449,7 +2449,7 @@
     <paper id="107">
       <title>Predicting Relative Prominence in Noun-Noun Compounds</title>
       <author><first>Taniya</first><last>Mishra</last></author>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
       <pages>609–613</pages>
       <url hash="2b165c08">P11-2107</url>
       <bibkey>mishra-bangalore-2011-predicting</bibkey>
@@ -2457,7 +2457,7 @@
     <paper id="108">
       <title>Contrasting Multi-Lingual Prosodic Cues to Predict Verbal Feedback for Rapport</title>
       <author><first>Siwei</first><last>Wang</last></author>
-      <author><first>Gina-Anne</first><last>Levow</last></author>
+      <author id="gina-anne-levow"><first>Gina-Anne</first><last>Levow</last></author>
       <pages>614–619</pages>
       <url hash="86a6ac36">P11-2108</url>
       <bibkey>wang-levow-2011-contrasting</bibkey>
@@ -2465,7 +2465,7 @@
     <paper id="109">
       <title>Generalized Interpolation in Decision Tree <fixed-case>LM</fixed-case></title>
       <author><first>Denis</first><last>Filimonov</last></author>
-      <author><first>Mary</first><last>Harper</last></author>
+      <author id="mary-harper"><first>Mary</first><last>Harper</last></author>
       <pages>620–624</pages>
       <url hash="824cdb45">P11-2109</url>
       <bibkey>filimonov-harper-2011-generalized</bibkey>
@@ -2522,9 +2522,9 @@
     </paper>
     <paper id="116">
       <title>Does Size Matter – How Much Data is Required to Train a <fixed-case>REG</fixed-case> Algorithm?</title>
-      <author><first>Mariët</first><last>Theune</last></author>
+      <author id="mariet-theune"><first>Mariët</first><last>Theune</last></author>
       <author><first>Ruud</first><last>Koolen</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <author><first>Sander</first><last>Wubben</last></author>
       <pages>660–664</pages>
       <url hash="ed42804f">P11-2116</url>
@@ -2532,7 +2532,7 @@
     </paper>
     <paper id="117">
       <title><fixed-case>S</fixed-case>imple <fixed-case>E</fixed-case>nglish <fixed-case>W</fixed-case>ikipedia: A New Text Simplification Task</title>
-      <author><first>William</first><last>Coster</last></author>
+      <author id="william-coster"><first>William</first><last>Coster</last></author>
       <author><first>David</first><last>Kauchak</last></author>
       <pages>665–669</pages>
       <url hash="33ad8595">P11-2117</url>
@@ -2542,7 +2542,7 @@
     <paper id="118">
       <title>A Hierarchical Model of Web Summaries</title>
       <author><first>Yves</first><last>Petinot</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <author><first>Kapil</first><last>Thadani</last></author>
       <pages>670–675</pages>
       <url hash="72354e15">P11-2118</url>
@@ -2559,15 +2559,15 @@
     </paper>
     <paper id="120">
       <title>Data point selection for cross-language adaptation of dependency parsers</title>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>682–686</pages>
       <url hash="cea5c6dd">P11-2120</url>
       <bibkey>sogaard-2011-data</bibkey>
     </paper>
     <paper id="121">
       <title>Getting the Most out of Transition-based Dependency Parsing</title>
-      <author><first>Jinho D.</first><last>Choi</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>687–692</pages>
       <url hash="aa58d24b">P11-2121</url>
       <bibkey>choi-palmer-2011-getting</bibkey>
@@ -2583,9 +2583,9 @@
     </paper>
     <paper id="123">
       <title>Improving Dependency Parsing with Semantic Classes</title>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <author><first>Kepa</first><last>Bengoetxea</last></author>
-      <author><first>Koldo</first><last>Gojenola</last></author>
+      <author id="koldo-gojenola"><first>Koldo</first><last>Gojenola</last></author>
       <author><first>Joakim</first><last>Nivre</last></author>
       <pages>699–703</pages>
       <url hash="006bcdb9">P11-2123</url>
@@ -2601,7 +2601,7 @@
     </paper>
     <paper id="125">
       <title>An Ensemble Model that Combines Syntactic and Semantic Clustering for Discriminative Dependency Parsing</title>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <author><first>Marzieh</first><last>Razavi</last></author>
       <author><first>Anoop</first><last>Sarkar</last></author>
       <pages>710–714</pages>
@@ -2640,8 +2640,8 @@
     <meta>
       <booktitle>Proceedings of the <fixed-case>ACL</fixed-case> 2011 Student Session</booktitle>
       <url hash="feb453cd">P11-3</url>
-      <editor><first>Sasa</first><last>Petrovic</last></editor>
-      <editor><first>Ethan</first><last>Selfridge</last></editor>
+      <editor id="sasa-petrovic"><first>Sasa</first><last>Petrovic</last></editor>
+      <editor id="ethan-selfridge"><first>Ethan</first><last>Selfridge</last></editor>
       <editor><first>Emily</first><last>Pitler</last></editor>
       <editor><first>Miles</first><last>Osborne</last></editor>
       <editor><first>Thamar</first><last>Solorio</last></editor>
@@ -2688,7 +2688,7 @@
     </paper>
     <paper id="5">
       <title>Extracting and Classifying <fixed-case>U</fixed-case>rdu Multiword Expressions</title>
-      <author><first>Annette</first><last>Hautli</last></author>
+      <author id="annette-hautli"><first>Annette</first><last>Hautli</last></author>
       <author><first>Sebastian</first><last>Sulger</last></author>
       <pages>24–29</pages>
       <url hash="132ce5e8">P11-3005</url>
@@ -2704,7 +2704,7 @@
     </paper>
     <paper id="7">
       <title>Syntax-based Statistical Machine Translation using Tree Automata and Tree Transducers</title>
-      <author><first>Daniel Emilio</first><last>Beck</last></author>
+      <author id="daniel-beck"><first>Daniel Emilio</first><last>Beck</last></author>
       <pages>36–40</pages>
       <url hash="77ce1b28">P11-3007</url>
       <bibkey>beck-2011-syntax</bibkey>
@@ -2834,15 +2834,15 @@
     <paper id="1">
       <title><fixed-case>H</fixed-case>indi to <fixed-case>P</fixed-case>unjabi Machine Translation System</title>
       <author><first>Vishal</first><last>Goyal</last></author>
-      <author><first>Gurpreet</first><last>Singh Lehal</last></author>
+      <author id="gurpreet-singh-lehal"><first>Gurpreet</first><last>Singh Lehal</last></author>
       <pages>1–6</pages>
       <url hash="9f678551">P11-4001</url>
       <bibkey>goyal-singh-lehal-2011-hindi</bibkey>
     </paper>
     <paper id="2">
       <title>The <fixed-case>ACL</fixed-case> <fixed-case>A</fixed-case>nthology Searchbench</title>
-      <author><first>Ulrich</first><last>Schäfer</last></author>
-      <author><first>Bernd</first><last>Kiefer</last></author>
+      <author id="ulrich-schafer"><first>Ulrich</first><last>Schäfer</last></author>
+      <author id="bernd-kiefer"><first>Bernd</first><last>Kiefer</last></author>
       <author><first>Christian</first><last>Spurk</last></author>
       <author><first>Jörg</first><last>Steffen</last></author>
       <author><first>Rui</first><last>Wang</last></author>
@@ -2860,7 +2860,7 @@
     </paper>
     <paper id="4">
       <title>A Mobile Touchable Application for Online Topic Graph Extraction and Exploration of Web Content</title>
-      <author><first>Günter</first><last>Neumann</last></author>
+      <author id="gunter-neumann"><first>Günter</first><last>Neumann</last></author>
       <author><first>Sven</first><last>Schmeier</last></author>
       <pages>20–25</pages>
       <url hash="8d8ec78d">P11-4004</url>
@@ -2868,10 +2868,10 @@
     </paper>
     <paper id="5">
       <title><fixed-case>E</fixed-case>d<fixed-case>I</fixed-case>t: A Broad-Coverage Grammar Checker Using Pattern Grammar</title>
-      <author><first>Chung-Chi</first><last>Huang</last></author>
-      <author><first>Mei-Hua</first><last>Chen</last></author>
-      <author><first>Shih-Ting</first><last>Huang</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="chung-chi-huang"><first>Chung-Chi</first><last>Huang</last></author>
+      <author id="mei-hua-chen"><first>Mei-Hua</first><last>Chen</last></author>
+      <author id="shih-ting-huang"><first>Shih-Ting</first><last>Huang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>26–31</pages>
       <url hash="8763908c">P11-4005</url>
       <bibkey>huang-etal-2011-edit</bibkey>
@@ -2881,7 +2881,7 @@
       <author><first>Cheng-Te</first><last>Li</last></author>
       <author><first>Chien-Yuan</first><last>Wang</last></author>
       <author><first>Chien-Lin</first><last>Tseng</last></author>
-      <author><first>Shou-De</first><last>Lin</last></author>
+      <author id="shou-de-lin"><first>Shou-De</first><last>Lin</last></author>
       <pages>32–37</pages>
       <url hash="75aa8d96">P11-4006</url>
       <bibkey>li-etal-2011-memetube</bibkey>
@@ -2913,7 +2913,7 @@
     <paper id="9">
       <title>Dr Sentiment Knows Everything!</title>
       <author><first>Amitava</first><last>Das</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>50–55</pages>
       <url hash="06566180">P11-4009</url>
       <bibkey>das-bandyopadhyay-2011-dr</bibkey>
@@ -2928,18 +2928,18 @@
     <paper id="11">
       <title>Prototyping virtual instructors from human-human corpora</title>
       <author><first>Luciana</first><last>Benotti</last></author>
-      <author><first>Alexandre</first><last>Denis</last></author>
+      <author id="alexandre-denis"><first>Alexandre</first><last>Denis</last></author>
       <pages>62–67</pages>
       <url hash="e97b38dc">P11-4011</url>
       <bibkey>benotti-denis-2011-prototyping</bibkey>
     </paper>
     <paper id="12">
       <title>An Interactive Machine Translation System with Online Learning</title>
-      <author><first>Daniel</first><last>Ortiz-Martínez</last></author>
-      <author><first>Luis A.</first><last>Leiva</last></author>
-      <author><first>Vicent</first><last>Alabau</last></author>
-      <author><first>Ismael</first><last>García-Varea</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="daniel-ortiz-martinez"><first>Daniel</first><last>Ortiz-Martínez</last></author>
+      <author id="luis-a-leiva"><first>Luis A.</first><last>Leiva</last></author>
+      <author id="vicent-alabau"><first>Vicent</first><last>Alabau</last></author>
+      <author id="ismael-garcia-varea"><first>Ismael</first><last>García-Varea</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <pages>68–73</pages>
       <url hash="f7ad2c70">P11-4012</url>
       <bibkey>ortiz-martinez-etal-2011-interactive</bibkey>
@@ -2956,7 +2956,7 @@
     </paper>
     <paper id="14">
       <title>A Speech-based Just-in-Time Retrieval System using Semantic Search</title>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <author><first>Majid</first><last>Yazdani</last></author>
       <author><first>Alexandre</first><last>Nanchen</last></author>
       <author><first>Philip N.</first><last>Garner</last></author>
@@ -2967,10 +2967,10 @@
     <paper id="15">
       <title><fixed-case>MACAON</fixed-case> An <fixed-case>NLP</fixed-case> Tool Suite for Processing Word Lattices</title>
       <author><first>Alexis</first><last>Nasr</last></author>
-      <author><first>Frédéric</first><last>Béchet</last></author>
+      <author id="frederic-bechet"><first>Frédéric</first><last>Béchet</last></author>
       <author><first>Jean-François</first><last>Rey</last></author>
-      <author><first>Benoît</first><last>Favre</last></author>
-      <author><first>Joseph</first><last>Le Roux</last></author>
+      <author id="benoit-favre"><first>Benoît</first><last>Favre</last></author>
+      <author id="joseph-le-roux"><first>Joseph</first><last>Le Roux</last></author>
       <pages>86–91</pages>
       <url hash="73d608b1">P11-4015</url>
       <bibkey>nasr-etal-2011-macaon</bibkey>
@@ -2996,7 +2996,7 @@
     <paper id="18">
       <title>An Efficient Indexer for Large N-Gram Corpora</title>
       <author><first>Hakan</first><last>Ceylan</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>103–108</pages>
       <url hash="0d9823c5">P11-4018</url>
       <bibkey>ceylan-mihalcea-2011-efficient</bibkey>
@@ -3004,7 +3004,7 @@
     <paper id="19">
       <title><fixed-case>S</fixed-case>ystem<fixed-case>T</fixed-case>: A Declarative Information Extraction System</title>
       <author><first>Yunyao</first><last>Li</last></author>
-      <author><first>Frederick</first><last>Reiss</last></author>
+      <author id="frederick-reiss"><first>Frederick</first><last>Reiss</last></author>
       <author><first>Laura</first><last>Chiticariu</last></author>
       <pages>109–114</pages>
       <url hash="d0491f65">P11-4019</url>
@@ -3014,8 +3014,8 @@
       <title><fixed-case>S</fixed-case>ci<fixed-case>S</fixed-case>umm: A Multi-Document Summarization System for Scientific Articles</title>
       <author><first>Nitin</first><last>Agarwal</last></author>
       <author><first>Ravi Shankar</first><last>Reddy</last></author>
-      <author><first>Kiran</first><last>Gvr</last></author>
-      <author><first>Carolyn Penstein</first><last>Rosé</last></author>
+      <author id="kiran-gvr"><first>Kiran</first><last>Gvr</last></author>
+      <author id="carolyn-rose"><first>Carolyn Penstein</first><last>Rosé</last></author>
       <pages>115–120</pages>
       <url hash="60ed1886">P11-4020</url>
       <bibkey>agarwal-etal-2011-scisumm</bibkey>
@@ -3023,7 +3023,7 @@
     <paper id="21">
       <title><fixed-case>C</fixed-case>lairlib: A Toolkit for Natural Language Processing, Information Retrieval, and Network Analysis</title>
       <author><first>Amjad</first><last>Abu-Jbara</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <pages>121–126</pages>
       <url hash="d7d27321">P11-4021</url>
       <bibkey>abu-jbara-radev-2011-clairlib</bibkey>
@@ -3031,8 +3031,8 @@
     <paper id="22">
       <title><fixed-case>C</fixed-case>-Feel-It: A Sentiment Analyzer for Micro-blogs</title>
       <author><first>Aditya</first><last>Joshi</last></author>
-      <author><first>Balamurali</first><last>AR</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="balamurali-ar"><first>Balamurali</first><last>AR</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <author><first>Rajat</first><last>Mohanty</last></author>
       <pages>127–132</pages>
       <url hash="b239d592">P11-4022</url>
@@ -3044,7 +3044,7 @@
       <author><first>Cheng-Lun</first><last>Yang</last></author>
       <author><first>Bo-Nian</first><last>Chen</last></author>
       <author><first>Yen-Kai</first><last>Wang</last></author>
-      <author><first>Shou-De</first><last>Lin</last></author>
+      <author id="shou-de-lin"><first>Shou-De</first><last>Lin</last></author>
       <pages>133–138</pages>
       <url hash="f30362e1">P11-4023</url>
       <bibkey>weng-etal-2011-imass</bibkey>
@@ -3077,7 +3077,7 @@
     </frontmatter>
     <paper id="1">
       <title>Beyond Structured Prediction: Inverse Reinforcement Learning</title>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <pages>1</pages>
       <url hash="99f4fee8">P11-5001</url>
       <bibkey>daume-iii-2011-beyond</bibkey>
@@ -3086,7 +3086,7 @@
       <title>Formal and Empirical Grammatical Inference</title>
       <author><first>Jeffrey</first><last>Heinz</last></author>
       <author><first>Colin</first><last>de la Higuera</last></author>
-      <author><first>Menno</first><last>van Zannen</last></author>
+      <author id="menno-van-zaanen"><first>Menno</first><last>van Zannen</last></author>
       <pages>2</pages>
       <url hash="6366394a">P11-5002</url>
       <bibkey>heinz-etal-2011-formal</bibkey>
@@ -3102,7 +3102,7 @@
     </paper>
     <paper id="4">
       <title>Web Search Queries as a Corpus</title>
-      <author><first>Marius</first><last>Paşca</last></author>
+      <author id="marius-pasca"><first>Marius</first><last>Paşca</last></author>
       <pages>4</pages>
       <url hash="12b94e72">P11-5004</url>
       <bibkey>pasca-2011-web</bibkey>
@@ -3111,15 +3111,15 @@
       <title>Rich Prior Knowledge in Learning for Natural Language Processing</title>
       <author><first>Gregory</first><last>Druck</last></author>
       <author><first>Kuzman</first><last>Ganchev</last></author>
-      <author><first>João</first><last>Graça</last></author>
+      <author id="joao-graca"><first>João</first><last>Graça</last></author>
       <pages>5</pages>
       <url hash="7077f5e8">P11-5005</url>
       <bibkey>druck-etal-2011-rich</bibkey>
     </paper>
     <paper id="6">
       <title>Dual Decomposition for Natural Language Processing</title>
-      <author><first>Michael</first><last>Collins</last></author>
-      <author><first>Alexander M.</first><last>Rush</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
+      <author id="alexander-m-rush"><first>Alexander M.</first><last>Rush</last></author>
       <pages>6</pages>
       <url hash="618bcc55">P11-5006</url>
       <bibkey>collins-rush-2011-dual</bibkey>
diff --git a/data/xml/P12.xml b/data/xml/P12.xml
index cea56821b4..8a416c8e9d 100644
--- a/data/xml/P12.xml
+++ b/data/xml/P12.xml
@@ -5,10 +5,10 @@
       <booktitle>Proceedings of the 50th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</booktitle>
       <url hash="72fe65ea">P12-1</url>
       <editor><first>Haizhou</first><last>Li</last></editor>
-      <editor><first>Chin-Yew</first><last>Lin</last></editor>
+      <editor id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></editor>
       <editor><first>Miles</first><last>Osborne</last></editor>
-      <editor><first>Gary Geunbae</first><last>Lee</last></editor>
-      <editor><first>Jong C.</first><last>Park</last></editor>
+      <editor id="gary-geunbae-lee"><first>Gary Geunbae</first><last>Lee</last></editor>
+      <editor id="jong-c-park"><first>Jong C.</first><last>Park</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Jeju Island, Korea</address>
       <month>July</month>
@@ -34,7 +34,7 @@
       <title>Joint Feature Selection in Distributed Stochastic Learning for Large-Scale Discriminative Training in <fixed-case>SMT</fixed-case></title>
       <author><first>Patrick</first><last>Simianer</last></author>
       <author><first>Stefan</first><last>Riezler</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <pages>11–21</pages>
       <url hash="23904ae1">P12-1002</url>
       <bibkey>simianer-etal-2012-joint</bibkey>
@@ -52,15 +52,15 @@
     </paper>
     <paper id="4">
       <title>Probabilistic Integration of Partial Lexical Information for Noise Robust Haptic Voice Recognition</title>
-      <author><first>Khe Chai</first><last>Sim</last></author>
+      <author id="khe-chai-sim"><first>Khe Chai</first><last>Sim</last></author>
       <pages>31–39</pages>
       <url hash="1357233a">P12-1004</url>
       <bibkey>sim-2012-probabilistic</bibkey>
     </paper>
     <paper id="5">
       <title>A Nonparametric <fixed-case>B</fixed-case>ayesian Approach to Acoustic Model Discovery</title>
-      <author><first>Chia-ying</first><last>Lee</last></author>
-      <author><first>James</first><last>Glass</last></author>
+      <author id="chia-ying-lee"><first>Chia-ying</first><last>Lee</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
       <pages>40–49</pages>
       <url hash="af944b65">P12-1005</url>
       <bibkey>lee-glass-2012-nonparametric</bibkey>
@@ -103,14 +103,14 @@
       <title>Extracting Narrative Timelines as Temporal Dependency Structures</title>
       <author><first>Oleksandr</first><last>Kolomiyets</last></author>
       <author><first>Steven</first><last>Bethard</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>88–97</pages>
       <url hash="f7cc6812">P12-1010</url>
       <bibkey>kolomiyets-etal-2012-extracting</bibkey>
     </paper>
     <paper id="11">
       <title>Labeling Documents with Timestamps: Learning from their Time Expressions</title>
-      <author><first>Nathanael</first><last>Chambers</last></author>
+      <author id="nathanael-chambers"><first>Nathanael</first><last>Chambers</last></author>
       <pages>98–106</pages>
       <url hash="e9f4de81">P12-1011</url>
       <bibkey>chambers-2012-labeling</bibkey>
@@ -118,9 +118,9 @@
     <paper id="12">
       <title>Temporally Anchored Relation Extraction</title>
       <author><first>Guillermo</first><last>Garrido</last></author>
-      <author><first>Anselmo</first><last>Peñas</last></author>
+      <author id="anselmo-penas"><first>Anselmo</first><last>Peñas</last></author>
       <author><first>Bernardo</first><last>Cabaleiro</last></author>
-      <author><first>Álvaro</first><last>Rodrigo</last></author>
+      <author id="alvaro-rodrigo"><first>Álvaro</first><last>Rodrigo</last></author>
       <pages>107–116</pages>
       <url hash="bcf15c3f">P12-1012</url>
       <bibkey>garrido-etal-2012-temporally</bibkey>
@@ -137,7 +137,7 @@
     </paper>
     <paper id="14">
       <title>Learning High-Level Planning from Text</title>
-      <author><first>S.R.K.</first><last>Branavan</last></author>
+      <author id="s-r-k-branavan"><first>S.R.K.</first><last>Branavan</last></author>
       <author><first>Nate</first><last>Kushman</last></author>
       <author><first>Tao</first><last>Lei</last></author>
       <author><first>Regina</first><last>Barzilay</last></author>
@@ -148,9 +148,9 @@
     <paper id="15">
       <title>Distributional Semantics in Technicolor</title>
       <author><first>Elia</first><last>Bruni</last></author>
-      <author><first>Gemma</first><last>Boleda</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
-      <author><first>Nam-Khanh</first><last>Tran</last></author>
+      <author id="gemma-boleda"><first>Gemma</first><last>Boleda</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
+      <author id="nam-khanh-tran"><first>Nam-Khanh</first><last>Tran</last></author>
       <pages>136–145</pages>
       <url hash="87509e74">P12-1015</url>
       <bibkey>bruni-etal-2012-distributional</bibkey>
@@ -167,7 +167,7 @@
       <title>Deciphering Foreign Language by Combining Language Models and Context Vectors</title>
       <author><first>Malte</first><last>Nuhn</last></author>
       <author><first>Arne</first><last>Mauser</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>156–164</pages>
       <url hash="85b18d8d">P12-1017</url>
       <bibkey>nuhn-etal-2012-deciphering</bibkey>
@@ -186,7 +186,7 @@
       <title>Fast Syntactic Analysis for Statistical Language Modeling via Substructure Sharing and Uptraining</title>
       <author><first>Ariya</first><last>Rastrow</last></author>
       <author><first>Mark</first><last>Dredze</last></author>
-      <author><first>Sanjeev</first><last>Khudanpur</last></author>
+      <author id="sanjeev-khudanpur"><first>Sanjeev</first><last>Khudanpur</last></author>
       <pages>175–183</pages>
       <url hash="d6deb993">P12-1019</url>
       <bibkey>rastrow-etal-2012-fast</bibkey>
@@ -194,7 +194,7 @@
     <paper id="20">
       <title>Bootstrapping a Unified Model of Lexical and Phonetic Acquisition</title>
       <author><first>Micha</first><last>Elsner</last></author>
-      <author><first>Sharon</first><last>Goldwater</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
       <author><first>Jacob</first><last>Eisenstein</last></author>
       <pages>184–193</pages>
       <url hash="92bbc6ff">P12-1020</url>
@@ -211,7 +211,7 @@
     </paper>
     <paper id="22">
       <title>Discriminative Strategies to Integrate Multiword Expression Recognition and Parsing</title>
-      <author><first>Matthieu</first><last>Constant</last></author>
+      <author id="matthieu-constant"><first>Matthieu</first><last>Constant</last></author>
       <author><first>Anthony</first><last>Sigogne</last></author>
       <author><first>Patrick</first><last>Watrin</last></author>
       <pages>204–212</pages>
@@ -229,18 +229,18 @@
     </paper>
     <paper id="24">
       <title>Spectral Learning of Latent-Variable <fixed-case>PCFG</fixed-case>s</title>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <author><first>Karl</first><last>Stratos</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
-      <author><first>Dean P.</first><last>Foster</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
+      <author id="dean-foster"><first>Dean P.</first><last>Foster</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <pages>223–231</pages>
       <url hash="a547bb94">P12-1024</url>
       <bibkey>cohen-etal-2012-spectral</bibkey>
     </paper>
     <paper id="25">
       <title>Reducing Approximation and Estimation Errors for <fixed-case>C</fixed-case>hinese Lexical Processing with Heterogeneous Annotations</title>
-      <author><first>Weiwei</first><last>Sun</last></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last></author>
       <author><first>Xiaojun</first><last>Wan</last></author>
       <pages>232–241</pages>
       <url hash="5af6a0d8">P12-1025</url>
@@ -249,7 +249,7 @@
     </paper>
     <paper id="26">
       <title>Capturing Paradigmatic and Syntagmatic Lexical Relations: Towards Accurate <fixed-case>C</fixed-case>hinese Part-of-Speech Tagging</title>
-      <author><first>Weiwei</first><last>Sun</last></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last></author>
       <author><first>Hans</first><last>Uszkoreit</last></author>
       <pages>242–252</pages>
       <url hash="7509d5df">P12-1026</url>
@@ -268,8 +268,8 @@
       <title>Verb Classification using Distributional Similarity in Syntactic and Semantic Structures</title>
       <author><first>Danilo</first><last>Croce</last></author>
       <author><first>Alessandro</first><last>Moschitti</last></author>
-      <author><first>Roberto</first><last>Basili</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>263–272</pages>
       <url hash="d35e4f6b">P12-1028</url>
       <bibkey>croce-etal-2012-verb</bibkey>
@@ -332,7 +332,7 @@
     <paper id="35">
       <title>A Joint Model for Discovery of Aspects in Utterances</title>
       <author><first>Asli</first><last>Celikyilmaz</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></author>
       <pages>330–338</pages>
       <url hash="4610c615">P12-1035</url>
       <bibkey>celikyilmaz-hakkani-tur-2012-joint</bibkey>
@@ -348,7 +348,7 @@
     <paper id="37">
       <title>Learning to “Read Between the Lines” using <fixed-case>B</fixed-case>ayesian Logic Programs</title>
       <author><first>Sindhu</first><last>Raghavan</last></author>
-      <author><first>Raymond</first><last>Mooney</last></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last></author>
       <author><first>Hyeonseo</first><last>Ku</last></author>
       <pages>349–358</pages>
       <url hash="ad42997e">P12-1037</url>
@@ -358,8 +358,8 @@
       <title>Collective Generation of Natural Image Descriptions</title>
       <author><first>Polina</first><last>Kuznetsova</last></author>
       <author><first>Vicente</first><last>Ordonez</last></author>
-      <author><first>Alexander</first><last>Berg</last></author>
-      <author><first>Tamara</first><last>Berg</last></author>
+      <author id="alexander-berg"><first>Alexander</first><last>Berg</last></author>
+      <author id="tamara-berg"><first>Tamara</first><last>Berg</last></author>
       <author><first>Yejin</first><last>Choi</last></author>
       <pages>359–368</pages>
       <url hash="31d27744">P12-1038</url>
@@ -394,8 +394,8 @@
       <title>Subgroup Detection in Ideological Discussions</title>
       <author><first>Amjad</first><last>Abu-Jbara</last></author>
       <author><first>Pradeep</first><last>Dasigi</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <pages>399–409</pages>
       <url hash="a091ba7f">P12-1042</url>
       <bibkey>abu-jbara-etal-2012-subgroup</bibkey>
@@ -462,7 +462,7 @@
     <paper id="49">
       <title>A Statistical Model for Unsupervised and Semi-supervised Transliteration Mining</title>
       <author><first>Hassan</first><last>Sajjad</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <author><first>Helmut</first><last>Schmid</last></author>
       <pages>469–477</pages>
       <url hash="a496cf4b">P12-1049</url>
@@ -478,9 +478,9 @@
     </paper>
     <paper id="51">
       <title>Semantic Parsing with <fixed-case>B</fixed-case>ayesian Tree Transducers</title>
-      <author><first>Bevan</first><last>Jones</last></author>
+      <author id="bevan-jones"><first>Bevan</first><last>Jones</last></author>
       <author><first>Mark</first><last>Johnson</last></author>
-      <author><first>Sharon</first><last>Goldwater</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
       <pages>488–496</pages>
       <url hash="fb66d1e7">P12-1051</url>
       <bibkey>jones-etal-2012-semantic</bibkey>
@@ -488,7 +488,7 @@
     <paper id="52">
       <title>Dependency Hashing for n-best <fixed-case>CCG</fixed-case> Parsing</title>
       <author><first>Dominick</first><last>Ng</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <pages>497–505</pages>
       <url hash="0effd45d">P12-1052</url>
       <bibkey>ng-curran-2012-dependency</bibkey>
@@ -573,7 +573,7 @@
       <author><first>Wen</first><last>Chan</last></author>
       <author><first>Xiangdong</first><last>Zhou</last></author>
       <author><first>Wei</first><last>Wang</last></author>
-      <author><first>Tat-Seng</first><last>Chua</last></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last></author>
       <pages>582–591</pages>
       <url hash="525c17e2">P12-1061</url>
       <bibkey>chan-etal-2012-community</bibkey>
@@ -588,10 +588,10 @@
     </paper>
     <paper id="63">
       <title>Computational Approaches to Sentence Completion</title>
-      <author><first>Geoffrey</first><last>Zweig</last></author>
-      <author><first>John C.</first><last>Platt</last></author>
-      <author><first>Christopher</first><last>Meek</last></author>
-      <author><first>Christopher J.C.</first><last>Burges</last></author>
+      <author id="geoffrey-zweig"><first>Geoffrey</first><last>Zweig</last></author>
+      <author id="john-c-platt"><first>John C.</first><last>Platt</last></author>
+      <author id="christopher-meek"><first>Christopher</first><last>Meek</last></author>
+      <author id="christopher-j-c-burges"><first>Christopher J.C.</first><last>Burges</last></author>
       <author><first>Ainur</first><last>Yessenalina</last></author>
       <author><first>Qiang</first><last>Liu</last></author>
       <pages>601–610</pages>
@@ -605,7 +605,7 @@
       <author><first>Bo</first><last>Long</last></author>
       <author><first>Jean-Francois</first><last>Crespo</last></author>
       <author><first>Anlei</first><last>Dong</last></author>
-      <author><first>Sathiya</first><last>Keerthi</last></author>
+      <author id="sathiya-keerthi-selvaraj"><first>Sathiya</first><last>Keerthi</last></author>
       <author><first>Su-Lin</first><last>Wu</last></author>
       <pages>611–619</pages>
       <url hash="682fd712">P12-1064</url>
@@ -649,7 +649,7 @@
       <author><first>Katsuhiko</first><last>Hayashi</last></author>
       <author><first>Taro</first><last>Watanabe</last></author>
       <author><first>Masayuki</first><last>Asahara</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>657–665</pages>
       <url hash="16c63126">P12-1069</url>
       <bibkey>hayashi-etal-2012-head</bibkey>
@@ -675,7 +675,7 @@
       <title>A Probabilistic Model for Canonicalizing Named Entity Mentions</title>
       <author><first>Dani</first><last>Yogatama</last></author>
       <author><first>Yanchuan</first><last>Sim</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>685–693</pages>
       <url hash="37747d06">P12-1072</url>
       <bibkey>yogatama-etal-2012-probabilistic</bibkey>
@@ -691,7 +691,7 @@
     </paper>
     <paper id="74">
       <title>A Computational Approach to the Automation of Creative Naming</title>
-      <author><first>Gözde</first><last>Özbal</last></author>
+      <author id="gozde-ozbal"><first>Gözde</first><last>Özbal</last></author>
       <author><first>Carlo</first><last>Strapparava</last></author>
       <pages>703–711</pages>
       <url hash="276cf8fa">P12-1074</url>
@@ -720,7 +720,7 @@
       <author><first>Rémy</first><last>Kessler</last></author>
       <author><first>Xavier</first><last>Tannier</last></author>
       <author><first>Caroline</first><last>Hagège</last></author>
-      <author><first>Véronique</first><last>Moriceau</last></author>
+      <author id="veronique-moriceau"><first>Véronique</first><last>Moriceau</last></author>
       <author><first>André</first><last>Bittar</last></author>
       <pages>730–739</pages>
       <url hash="3f9cb378">P12-1077</url>
@@ -739,7 +739,7 @@
     <paper id="79">
       <title>A Topic Similarity Model for Hierarchical Phrase-based Translation</title>
       <author><first>Xinyan</first><last>Xiao</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Min</first><last>Zhang</last></author>
       <author><first>Qun</first><last>Liu</last></author>
       <author><first>Shouxun</first><last>Lin</last></author>
@@ -767,7 +767,7 @@
       <title>Semi-supervised Dependency Parsing using Lexical Affinities</title>
       <author><first>Seyed Abolghasem</first><last>Mirroshandel</last></author>
       <author><first>Alexis</first><last>Nasr</last></author>
-      <author><first>Joseph</first><last>Le Roux</last></author>
+      <author id="joseph-le-roux"><first>Joseph</first><last>Le Roux</last></author>
       <pages>777–785</pages>
       <url hash="45effed7">P12-1082</url>
       <bibkey>mirroshandel-etal-2012-semi</bibkey>
@@ -793,7 +793,7 @@
       <title>Structuring <fixed-case>E</fixed-case>-Commerce Inventory</title>
       <author><first>Karin</first><last>Mauge</last></author>
       <author><first>Khash</first><last>Rohanimanesh</last></author>
-      <author><first>Jean-David</first><last>Ruvini</last></author>
+      <author id="jean-david-ruvini"><first>Jean-David</first><last>Ruvini</last></author>
       <pages>805–814</pages>
       <url hash="526b6911">P12-1085</url>
       <bibkey>mauge-etal-2012-structuring</bibkey>
@@ -830,7 +830,7 @@
     <paper id="89">
       <title>Discriminative Learning for Joint Template Filling</title>
       <author><first>Einat</first><last>Minkov</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>845–853</pages>
       <url hash="ee57f64d">P12-1089</url>
       <bibkey>minkov-zettlemoyer-2012-discriminative</bibkey>
@@ -847,7 +847,7 @@
     <paper id="91">
       <title>Modeling Sentences in the Latent Space</title>
       <author><first>Weiwei</first><last>Guo</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>864–872</pages>
       <url hash="7790d409">P12-1091</url>
       <revision id="1" href="P12-1091v1" hash="4a9c6a97"/>
@@ -856,10 +856,10 @@
     </paper>
     <paper id="92">
       <title>Improving Word Representations via Global Context and Multiple Word Prototypes</title>
-      <author><first>Eric</first><last>Huang</last></author>
+      <author id="eric-h-huang"><first>Eric</first><last>Huang</last></author>
       <author><first>Richard</first><last>Socher</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
-      <author><first>Andrew</first><last>Ng</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
+      <author id="andrew-y-ng"><first>Andrew</first><last>Ng</last></author>
       <pages>873–882</pages>
       <url hash="4fad8e42">P12-1092</url>
       <bibkey>huang-etal-2012-improving</bibkey>
@@ -868,7 +868,7 @@
       <title>Exploiting Social Information in Grounded Language Learning via Grammatical Reduction</title>
       <author><first>Mark</first><last>Johnson</last></author>
       <author><first>Katherine</first><last>Demuth</last></author>
-      <author><first>Michael</first><last>Frank</last></author>
+      <author id="michael-c-frank"><first>Michael</first><last>Frank</last></author>
       <pages>883–891</pages>
       <url hash="49ad9afb">P12-1093</url>
       <bibkey>johnson-etal-2012-exploiting</bibkey>
@@ -885,7 +885,7 @@
     </paper>
     <paper id="95">
       <title>Modeling the Translation of Predicate-Argument Structure for <fixed-case>SMT</fixed-case></title>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Min</first><last>Zhang</last></author>
       <author><first>Haizhou</first><last>Li</last></author>
       <pages>902–911</pages>
@@ -923,7 +923,7 @@
       <title>Mixing Multiple Translation Models in Statistical Machine Translation</title>
       <author><first>Majid</first><last>Razmara</last></author>
       <author><first>George</first><last>Foster</last></author>
-      <author><first>Baskaran</first><last>Sankaran</last></author>
+      <author id="baskaran-sankaran"><first>Baskaran</first><last>Sankaran</last></author>
       <author><first>Anoop</first><last>Sarkar</last></author>
       <pages>940–949</pages>
       <url hash="6c2b4fa1">P12-1099</url>
@@ -950,7 +950,7 @@
     <paper id="102">
       <title>Text Segmentation by Language Using Minimum Description Length</title>
       <author><first>Hiroshi</first><last>Yamaguchi</last></author>
-      <author><first>Kumiko</first><last>Tanaka-Ishii</last></author>
+      <author id="kumiko-tanaka-ishii"><first>Kumiko</first><last>Tanaka-Ishii</last></author>
       <pages>969–978</pages>
       <url hash="13797ce7">P12-1102</url>
       <bibkey>yamaguchi-tanaka-ishii-2012-text</bibkey>
@@ -978,7 +978,7 @@
       <title>Polarity Consistency Checking for Sentiment Dictionaries</title>
       <author><first>Eduard</first><last>Dragut</last></author>
       <author><first>Hong</first><last>Wang</last></author>
-      <author><first>Clement</first><last>Yu</last></author>
+      <author id="clement-t-yu"><first>Clement</first><last>Yu</last></author>
       <author><first>Prasad</first><last>Sistla</last></author>
       <author><first>Weiyi</first><last>Meng</last></author>
       <pages>997–1005</pages>
@@ -998,8 +998,8 @@
     <paper id="107">
       <title>Sentence Simplification by Monolingual Machine Translation</title>
       <author><first>Sander</first><last>Wubben</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <pages>1015–1024</pages>
       <url hash="e9451f04">P12-1107</url>
       <bibkey>wubben-etal-2012-sentence</bibkey>
@@ -1010,7 +1010,7 @@
       <author><first>Jeong-Woo</first><last>Son</last></author>
       <author><first>Tae-Gil</first><last>Noh</last></author>
       <author><first>Seong-Bae</first><last>Park</last></author>
-      <author><first>Sang-Jo</first><last>Lee</last></author>
+      <author id="sang-jo-lee"><first>Sang-Jo</first><last>Lee</last></author>
       <pages>1025–1034</pages>
       <url hash="c3651fb8">P12-1108</url>
       <bibkey>song-etal-2012-cost</bibkey>
@@ -1029,7 +1029,7 @@
       <author><first>Jun</first><last>Hatori</last></author>
       <author><first>Takuya</first><last>Matsuzaki</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>1045–1053</pages>
       <url hash="2daae0fd">P12-1110</url>
       <bibkey>hatori-etal-2012-incremental</bibkey>
@@ -1037,7 +1037,7 @@
     <paper id="111">
       <title>Exploring Deterministic Constraints: from a Constrained <fixed-case>E</fixed-case>nglish <fixed-case>POS</fixed-case> Tagger to an Efficient <fixed-case>ILP</fixed-case> Solution to <fixed-case>C</fixed-case>hinese Word Segmentation</title>
       <author><first>Qiuye</first><last>Zhao</last></author>
-      <author><first>Mitch</first><last>Marcus</last></author>
+      <author id="mitch-marcus"><first>Mitch</first><last>Marcus</last></author>
       <pages>1054–1062</pages>
       <url hash="10dd8c74">P12-1111</url>
       <bibkey>zhao-marcus-2012-exploring</bibkey>
@@ -1048,10 +1048,10 @@
       <booktitle>Proceedings of the 50th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)</booktitle>
       <url hash="b8f17125">P12-2</url>
       <editor><first>Haizhou</first><last>Li</last></editor>
-      <editor><first>Chin-Yew</first><last>Lin</last></editor>
+      <editor id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></editor>
       <editor><first>Miles</first><last>Osborne</last></editor>
-      <editor><first>Gary Geunbae</first><last>Lee</last></editor>
-      <editor><first>Jong C.</first><last>Park</last></editor>
+      <editor id="gary-geunbae-lee"><first>Gary Geunbae</first><last>Lee</last></editor>
+      <editor id="jong-c-park"><first>Jong C.</first><last>Park</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Jeju Island, Korea</address>
       <month>July</month>
@@ -1065,7 +1065,7 @@
     <paper id="1">
       <title>Higher-order Constituent Parsing and Parser Combination</title>
       <author><first>Xiao</first><last>Chen</last></author>
-      <author><first>Chunyu</first><last>Kit</last></author>
+      <author id="chunyu-kit"><first>Chunyu</first><last>Kit</last></author>
       <pages>1–5</pages>
       <url hash="2a7b7129">P12-2001</url>
       <bibkey>chen-kit-2012-higher</bibkey>
@@ -1082,7 +1082,7 @@
     <paper id="3">
       <title>A Comparison of <fixed-case>C</fixed-case>hinese Parsers for <fixed-case>S</fixed-case>tanford Dependencies</title>
       <author><first>Wanxiang</first><last>Che</last></author>
-      <author><first>Valentin</first><last>Spitkovsky</last></author>
+      <author id="valentin-i-spitkovsky"><first>Valentin</first><last>Spitkovsky</last></author>
       <author><first>Ting</first><last>Liu</last></author>
       <pages>11–16</pages>
       <url hash="cc2737a8">P12-2003</url>
@@ -1107,7 +1107,7 @@
     <paper id="6">
       <title>Fast and Scalable Decoding with Language Model Look-Ahead for Phrase-based Statistical Machine Translation</title>
       <author><first>Joern</first><last>Wuebker</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <author><first>Richard</first><last>Zens</last></author>
       <pages>28–32</pages>
       <url hash="03dece73">P12-2006</url>
@@ -1115,10 +1115,10 @@
     </paper>
     <paper id="7">
       <title>Head-Driven Hierarchical Phrase-based Translation</title>
-      <author><first>Junhui</first><last>Li</last></author>
+      <author id="junhui-li"><first>Junhui</first><last>Li</last></author>
       <author><first>Zhaopeng</first><last>Tu</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>33–37</pages>
       <url hash="7f67a6f7">P12-2007</url>
       <bibkey>li-etal-2012-head</bibkey>
@@ -1133,7 +1133,7 @@
     </paper>
     <paper id="9">
       <title>A Novel Burst-based Text Representation Model for Scalable Event Detection</title>
-      <author><first>Xin</first><last>Zhao</last></author>
+      <author id="wayne-xin-zhao"><first>Xin</first><last>Zhao</last></author>
       <author><first>Rishan</first><last>Chen</last></author>
       <author><first>Kai</first><last>Fan</last></author>
       <author><first>Hongfei</first><last>Yan</last></author>
@@ -1164,7 +1164,7 @@
       <title>Self-Disclosure and Relationship Strength in <fixed-case>T</fixed-case>witter Conversations</title>
       <author><first>JinYeong</first><last>Bak</last></author>
       <author><first>Suin</first><last>Kim</last></author>
-      <author><first>Alice</first><last>Oh</last></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last></author>
       <pages>60–64</pages>
       <url hash="f563d9b6">P12-2012</url>
       <attachment type="presentation" hash="97a41399">P12-2012.Presentation.pdf</attachment>
@@ -1174,7 +1174,7 @@
       <title>Genre Independent Subgroup Detection in Online Discussion Threads: A Study of Implicit Attitude using Textual Latent Semantics</title>
       <author><first>Pradeep</first><last>Dasigi</last></author>
       <author><first>Weiwei</first><last>Guo</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>65–69</pages>
       <url hash="31dbd200">P12-2013</url>
       <attachment type="dataset" hash="43e00847">P12-2013.Datasets.zip</attachment>
@@ -1183,8 +1183,8 @@
     <paper id="14">
       <title>Learning to Temporally Order Medical Events in Clinical Text</title>
       <author><first>Preethi</first><last>Raghavan</last></author>
-      <author><first>Albert</first><last>Lai</last></author>
-      <author><first>Eric</first><last>Fosler-Lussier</last></author>
+      <author id="albert-m-lai"><first>Albert</first><last>Lai</last></author>
+      <author id="eric-fosler-lussier"><first>Eric</first><last>Fosler-Lussier</last></author>
       <pages>70–74</pages>
       <url hash="fa76c3b0">P12-2014</url>
       <bibkey>raghavan-etal-2012-learning-temporally</bibkey>
@@ -1206,7 +1206,7 @@
     </paper>
     <paper id="17">
       <title>Using Rejuvenation to Improve Particle Filtering for <fixed-case>B</fixed-case>ayesian Word Segmentation</title>
-      <author><first>Benjamin</first><last>Börschinger</last></author>
+      <author id="benjamin-borschinger"><first>Benjamin</first><last>Börschinger</last></author>
       <author><first>Mark</first><last>Johnson</last></author>
       <pages>85–89</pages>
       <url hash="20064e34">P12-2017</url>
@@ -1214,8 +1214,8 @@
     </paper>
     <paper id="18">
       <title>Baselines and Bigrams: Simple, Good Sentiment and Topic Classification</title>
-      <author><first>Sida</first><last>Wang</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="sida-i-wang"><first>Sida</first><last>Wang</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <pages>90–94</pages>
       <url hash="7679a3ee">P12-2018</url>
       <bibkey>wang-manning-2012-baselines</bibkey>
@@ -1243,7 +1243,7 @@
       <title>Robust Conversion of <fixed-case>CCG</fixed-case> Derivations to Phrase Structure Trees</title>
       <author><first>Jonathan K.</first><last>Kummerfeld</last></author>
       <author><first>Dan</first><last>Klein</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <pages>105–109</pages>
       <url hash="fc20e6eb">P12-2021</url>
       <attachment type="software" hash="ac0cb2c9">P12-2021.Software.zip</attachment>
@@ -1252,7 +1252,7 @@
     <paper id="22">
       <title>Estimating Compact Yet Rich Tree Insertion Grammars</title>
       <author><first>Elif</first><last>Yamangil</last></author>
-      <author><first>Stuart</first><last>Shieber</last></author>
+      <author id="stuart-m-shieber"><first>Stuart</first><last>Shieber</last></author>
       <pages>110–114</pages>
       <url hash="6dced041">P12-2022</url>
       <bibkey>yamangil-shieber-2012-estimating</bibkey>
@@ -1269,7 +1269,7 @@
     <paper id="24">
       <title>Detecting Semantic Equivalence and Information Disparity in Cross-lingual Documents</title>
       <author><first>Yashar</first><last>Mehdad</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marcello</first><last>Federico</last></author>
       <pages>120–124</pages>
       <url hash="a79d59da">P12-2024</url>
@@ -1285,9 +1285,9 @@
     </paper>
     <paper id="26">
       <title>Learning to Find Translations and Transliterations on the Web</title>
-      <author><first>Joseph Z.</first><last>Chang</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
-      <author><first>Roger Jyh-Shing</first><last>Jang</last></author>
+      <author id="joseph-z-chang"><first>Joseph Z.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
+      <author id="jyh-shing-roger-jang"><first>Roger Jyh-Shing</first><last>Jang</last></author>
       <pages>130–134</pages>
       <url hash="e7fd7401">P12-2026</url>
       <bibkey>chang-etal-2012-learning</bibkey>
@@ -1303,7 +1303,7 @@
     <paper id="28">
       <title>Learning the Latent Semantics of a Concept from its Definition</title>
       <author><first>Weiwei</first><last>Guo</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>140–144</pages>
       <url hash="83ba114b">P12-2028</url>
       <attachment type="dataset" hash="24377b3e">P12-2028.Datasets.zip</attachment>
@@ -1312,7 +1312,7 @@
     <paper id="29">
       <title>Unsupervised Semantic Role Induction with Global Role Ordering</title>
       <author><first>Nikhil</first><last>Garg</last></author>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <pages>145–149</pages>
       <url hash="f6093280">P12-2029</url>
       <bibkey>garg-henderson-2012-unsupervised</bibkey>
@@ -1339,7 +1339,7 @@
       <author><first>Apoorv</first><last>Agarwal</last></author>
       <author><first>Adinoyi</first><last>Omuya</last></author>
       <author><first>Aaron</first><last>Harnly</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>161–165</pages>
       <url hash="597fe604">P12-2032</url>
       <bibkey>agarwal-etal-2012-comprehensive</bibkey>
@@ -1393,7 +1393,7 @@
     </paper>
     <paper id="38">
       <title>Native Language Detection with Tree Substitution Grammars</title>
-      <author><first>Benjamin</first><last>Swanson</last></author>
+      <author id="ben-swanson"><first>Benjamin</first><last>Swanson</last></author>
       <author><first>Eugene</first><last>Charniak</last></author>
       <pages>193–197</pages>
       <url hash="ea87cb6e">P12-2038</url>
@@ -1403,14 +1403,14 @@
       <title>Tense and Aspect Error Correction for <fixed-case>ESL</fixed-case> Learners Using Global Context</title>
       <author><first>Toshikazu</first><last>Tajiri</last></author>
       <author><first>Mamoru</first><last>Komachi</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>198–202</pages>
       <url hash="642662e0">P12-2039</url>
       <bibkey>tajiri-etal-2012-tense</bibkey>
     </paper>
     <paper id="40">
       <title>Movie-<fixed-case>D</fixed-case>i<fixed-case>C</fixed-case>: a Movie Dialogue Corpus for Research and Development</title>
-      <author><first>Rafael E.</first><last>Banchs</last></author>
+      <author id="rafael-e-banchs"><first>Rafael E.</first><last>Banchs</last></author>
       <pages>203–207</pages>
       <url hash="740aab87">P12-2040</url>
       <bibkey>banchs-2012-movie</bibkey>
@@ -1442,7 +1442,7 @@
     <paper id="44">
       <title>Extracting and modeling durations for habits and events from <fixed-case>T</fixed-case>witter</title>
       <author><first>Jennifer</first><last>Williams</last></author>
-      <author><first>Graham</first><last>Katz</last></author>
+      <author id="graham-katz"><first>Graham</first><last>Katz</last></author>
       <pages>223–227</pages>
       <url hash="2985dd5d">P12-2044</url>
       <bibkey>williams-katz-2012-extracting</bibkey>
@@ -1452,7 +1452,7 @@
       <author><first>Joel</first><last>Nothman</last></author>
       <author><first>Matthew</first><last>Honnibal</last></author>
       <author><first>Ben</first><last>Hachey</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <pages>228–232</pages>
       <url hash="def2116c">P12-2045</url>
       <bibkey>nothman-etal-2012-event</bibkey>
@@ -1470,7 +1470,7 @@
     <paper id="47">
       <title>Using Search-Logs to Improve Query Tagging</title>
       <author><first>Kuzman</first><last>Ganchev</last></author>
-      <author><first>Keith</first><last>Hall</last></author>
+      <author id="keith-hall"><first>Keith</first><last>Hall</last></author>
       <author><first>Ryan</first><last>McDonald</last></author>
       <author><first>Slav</first><last>Petrov</last></author>
       <pages>238–242</pages>
@@ -1486,8 +1486,8 @@
     </paper>
     <paper id="49">
       <title>A Corpus of Textual Revisions in Second Language Writing</title>
-      <author><first>John</first><last>Lee</last></author>
-      <author><first>Jonathan</first><last>Webster</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
+      <author id="jonathan-j-webster"><first>Jonathan</first><last>Webster</last></author>
       <pages>248–252</pages>
       <url hash="7929b266">P12-2049</url>
       <bibkey>lee-webster-2012-corpus</bibkey>
@@ -1497,7 +1497,7 @@
       <author><first>Nathan</first><last>Schneider</last></author>
       <author><first>Behrang</first><last>Mohit</last></author>
       <author><first>Kemal</first><last>Oflazer</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>253–258</pages>
       <url hash="6830dff1">P12-2050</url>
       <attachment type="dataset" hash="b6a234b5">P12-2050.Datasets.zip</attachment>
@@ -1505,8 +1505,8 @@
     </paper>
     <paper id="51">
       <title>Word Epoch Disambiguation: Finding How Words Change Over Time</title>
-      <author><first>Rada</first><last>Mihalcea</last></author>
-      <author><first>Vivi</first><last>Nastase</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
+      <author id="vivi-nastase"><first>Vivi</first><last>Nastase</last></author>
       <pages>259–263</pages>
       <url hash="f053e2b2">P12-2051</url>
       <bibkey>mihalcea-nastase-2012-word</bibkey>
@@ -1527,7 +1527,7 @@
       <author><first>Pei</first><last>Yang</last></author>
       <author><first>Wei</first><last>Gao</last></author>
       <author><first>Qi</first><last>Tan</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <pages>270–274</pages>
       <url hash="9bc5f781">P12-2053</url>
       <bibkey>yang-etal-2012-information</bibkey>
@@ -1555,9 +1555,9 @@
       <title>Enhancing Statistical Machine Translation with Character Alignment</title>
       <author><first>Ning</first><last>Xi</last></author>
       <author><first>Guangchao</first><last>Tang</last></author>
-      <author><first>Xinyu</first><last>Dai</last></author>
+      <author id="xinyu-dai"><first>Xinyu</first><last>Dai</last></author>
       <author><first>Shujian</first><last>Huang</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
       <pages>285–290</pages>
       <url hash="69a8ea33">P12-2056</url>
       <bibkey>xi-etal-2012-enhancing</bibkey>
@@ -1568,7 +1568,7 @@
       <author><first>Dongdong</first><last>Zhang</last></author>
       <author><first>Mu</first><last>Li</last></author>
       <author><first>Ming</first><last>Zhou</last></author>
-      <author><first>Hae-Chang</first><last>Rim</last></author>
+      <author id="hae-chang-rim"><first>Hae-Chang</first><last>Rim</last></author>
       <pages>291–295</pages>
       <url hash="432f80c6">P12-2057</url>
       <bibkey>lee-etal-2012-translation</bibkey>
@@ -1577,15 +1577,15 @@
       <title>Heuristic Cube Pruning in Linear Time</title>
       <author><first>Andrea</first><last>Gesmundo</last></author>
       <author><first>Giorgio</first><last>Satta</last></author>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <pages>296–300</pages>
       <url hash="e956f6be">P12-2058</url>
       <bibkey>gesmundo-etal-2012-heuristic</bibkey>
     </paper>
     <paper id="59">
       <title>Combining Word-Level and Character-Level Models for Machine Translation Between Closely-Related Languages</title>
-      <author><first>Preslav</first><last>Nakov</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>301–305</pages>
       <url hash="53023496">P12-2059</url>
       <bibkey>nakov-tiedemann-2012-combining</bibkey>
@@ -1602,7 +1602,7 @@
       <title>Post-ordering by Parsing for <fixed-case>J</fixed-case>apanese-<fixed-case>E</fixed-case>nglish Statistical Machine Translation</title>
       <author><first>Isao</first><last>Goto</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>311–316</pages>
       <url hash="018ba03e">P12-2061</url>
       <bibkey>goto-etal-2012-post</bibkey>
@@ -1619,10 +1619,10 @@
     </paper>
     <paper id="63">
       <title>Unsupervised Morphology Rivals Supervised Morphology for <fixed-case>A</fixed-case>rabic <fixed-case>MT</fixed-case></title>
-      <author><first>David</first><last>Stallard</last></author>
+      <author id="david-stallard"><first>David</first><last>Stallard</last></author>
       <author><first>Jacob</first><last>Devlin</last></author>
       <author><first>Michael</first><last>Kayser</last></author>
-      <author><first>Yoong Keok</first><last>Lee</last></author>
+      <author id="yoong-keok-lee"><first>Yoong Keok</first><last>Lee</last></author>
       <author><first>Regina</first><last>Barzilay</last></author>
       <pages>322–327</pages>
       <url hash="55689cdc">P12-2063</url>
@@ -1630,7 +1630,7 @@
     </paper>
     <paper id="64">
       <title>A Meta Learning Approach to Grammatical Error Correction</title>
-      <author><first>Hongsuck</first><last>Seo</last></author>
+      <author id="hongsuck-seo"><first>Hongsuck</first><last>Seo</last></author>
       <author><first>Jonghoon</first><last>Lee</last></author>
       <author><first>Seokhwan</first><last>Kim</last></author>
       <author><first>Kyusong</first><last>Lee</last></author>
@@ -1653,7 +1653,7 @@
       <author><first>Zhaopeng</first><last>Tu</last></author>
       <author><first>Yifan</first><last>He</last></author>
       <author><first>Jennifer</first><last>Foster</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <author><first>Qun</first><last>Liu</last></author>
       <author><first>Shouxun</first><last>Lin</last></author>
       <pages>338–343</pages>
@@ -1666,7 +1666,7 @@
       <author><first>San-Chuan</first><last>Hung</last></author>
       <author><first>Wei-Shih</first><last>Lin</last></author>
       <author><first>Nanyun</first><last>Peng</last></author>
-      <author><first>Shou-De</first><last>Lin</last></author>
+      <author id="shou-de-lin"><first>Shou-De</first><last>Lin</last></author>
       <author><first>Wei-Fen</first><last>Lin</last></author>
       <pages>344–348</pages>
       <url hash="e73a61f8">P12-2067</url>
@@ -1679,7 +1679,7 @@
       <author><first>Katsumasa</first><last>Yoshikawa</last></author>
       <author><first>Ryu</first><last>Iida</last></author>
       <author><first>Tsutomu</first><last>Hirao</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>349–353</pages>
       <url hash="1fcdba29">P12-2068</url>
       <bibkey>yoshikawa-etal-2012-sentence</bibkey>
@@ -1695,17 +1695,17 @@
     <paper id="70">
       <title>Assessing the Effect of Inconsistent Assessors on Summarization Evaluation</title>
       <author><first>Karolina</first><last>Owczarzak</last></author>
-      <author><first>Peter A.</first><last>Rankel</last></author>
-      <author><first>Hoa Trang</first><last>Dang</last></author>
-      <author><first>John M.</first><last>Conroy</last></author>
+      <author id="peter-a-rankel"><first>Peter A.</first><last>Rankel</last></author>
+      <author id="hoa-trang-dang"><first>Hoa Trang</first><last>Dang</last></author>
+      <author id="john-conroy"><first>John M.</first><last>Conroy</last></author>
       <pages>359–362</pages>
       <url hash="1506fc39">P12-2070</url>
       <bibkey>owczarzak-etal-2012-assessing</bibkey>
     </paper>
     <paper id="71">
       <title>Fast and Robust Part-of-Speech Tagging Using Dynamic Model Selection</title>
-      <author><first>Jinho D.</first><last>Choi</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>363–367</pages>
       <url hash="816b49a8">P12-2071</url>
       <bibkey>choi-palmer-2012-fast</bibkey>
@@ -1713,7 +1713,7 @@
     <paper id="72">
       <title>Lemmatisation as a Tagging Task</title>
       <author><first>Andrea</first><last>Gesmundo</last></author>
-      <author><first>Tanja</first><last>Samardžić</last></author>
+      <author id="tanja-samardzic"><first>Tanja</first><last>Samardžić</last></author>
       <pages>368–372</pages>
       <url hash="1e65a53b">P12-2072</url>
       <bibkey>gesmundo-samardzic-2012-lemmatisation</bibkey>
@@ -1737,7 +1737,7 @@
     <paper id="75">
       <title>Unsupervized Word Segmentation: the Case for <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese</title>
       <author><first>Pierre</first><last>Magistry</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <pages>383–387</pages>
       <url hash="2463723f">P12-2075</url>
       <bibkey>magistry-sagot-2012-unsupervized</bibkey>
@@ -1773,7 +1773,7 @@
       <author><first>Wei-Jie</first><last>Huang</last></author>
       <author><first>Chia-Ru</first><last>Chou</last></author>
       <author><first>Yu-Lin</first><last>Tzeng</last></author>
-      <author><first>Chia-Ying</first><last>Lee</last></author>
+      <author id="chia-ying-lee"><first>Chia-Ying</first><last>Lee</last></author>
       <author><first>Chao-Lin</first><last>Liu</last></author>
       <pages>1–6</pages>
       <url hash="fbd361f1">P12-3001</url>
@@ -1810,22 +1810,22 @@
     <paper id="5">
       <title>langid.py: An Off-the-shelf Language Identification Tool</title>
       <author><first>Marco</first><last>Lui</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>25–30</pages>
       <url hash="9f5cd9c4">P12-3005</url>
       <bibkey>lui-baldwin-2012-langid</bibkey>
     </paper>
     <paper id="6">
       <title>Personalized Normalization for a Multilingual Chat System</title>
-      <author><first>Ai Ti</first><last>Aw</last></author>
-      <author><first>Lian Hau</first><last>Lee</last></author>
+      <author id="aiti-aw"><first>Ai Ti</first><last>Aw</last></author>
+      <author id="lianhau-lee"><first>Lian Hau</first><last>Lee</last></author>
       <pages>31–36</pages>
       <url hash="ef54edde">P12-3006</url>
       <bibkey>aw-lee-2012-personalized</bibkey>
     </paper>
     <paper id="7">
       <title><fixed-case>IRIS</fixed-case>: a Chat-oriented Dialogue System based on the Vector Space Model</title>
-      <author><first>Rafael E.</first><last>Banchs</last></author>
+      <author id="rafael-e-banchs"><first>Rafael E.</first><last>Banchs</last></author>
       <author><first>Haizhou</first><last>Li</last></author>
       <pages>37–42</pages>
       <url hash="9f4d695e">P12-3007</url>
@@ -1833,16 +1833,16 @@
     </paper>
     <paper id="8">
       <title><fixed-case>L</fixed-case>ets<fixed-case>MT</fixed-case>!: Cloud-Based Platform for Do-It-Yourself Machine Translation</title>
-      <author><first>Andrejs</first><last>Vasiļjevs</last></author>
+      <author id="andrejs-vasiljevs"><first>Andrejs</first><last>Vasiļjevs</last></author>
       <author><first>Raivis</first><last>Skadiņš</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>43–48</pages>
       <url hash="befec908">P12-3008</url>
       <bibkey>vasiljevs-etal-2012-letsmt</bibkey>
     </paper>
     <paper id="9">
       <title>A Web-based Evaluation Framework for Spatial Instruction-Giving Systems</title>
-      <author><first>Srinivasan</first><last>Janarthanam</last></author>
+      <author id="srinivasan-janarthanam"><first>Srinivasan</first><last>Janarthanam</last></author>
       <author><first>Oliver</first><last>Lemon</last></author>
       <author><first>Xingkun</first><last>Liu</last></author>
       <pages>49–54</pages>
@@ -1853,8 +1853,8 @@
       <title><fixed-case>DOMCAT</fixed-case>: A Bilingual Concordancer for Domain-Specific Computer Assisted Translation</title>
       <author><first>Ming-Hong</first><last>Bai</last></author>
       <author><first>Yu-Ming</first><last>Hsieh</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>55–60</pages>
       <url hash="fe93ccd3">P12-3010</url>
       <bibkey>bai-etal-2012-domcat</bibkey>
@@ -1862,10 +1862,10 @@
     <paper id="11">
       <title>The <fixed-case>O</fixed-case>pen<fixed-case>G</fixed-case>rm open-source finite-state grammar software libraries</title>
       <author><first>Brian</first><last>Roark</last></author>
-      <author><first>Richard</first><last>Sproat</last></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last></author>
       <author><first>Cyril</first><last>Allauzen</last></author>
-      <author><first>Michael</first><last>Riley</last></author>
-      <author><first>Jeffrey</first><last>Sorensen</last></author>
+      <author id="michael-riley"><first>Michael</first><last>Riley</last></author>
+      <author id="jeffrey-sorensen"><first>Jeffrey</first><last>Sorensen</last></author>
       <author><first>Terry</first><last>Tai</last></author>
       <pages>61–66</pages>
       <url hash="c4312e1f">P12-3011</url>
@@ -1874,7 +1874,7 @@
     <paper id="12">
       <title>Multilingual <fixed-case>WSD</fixed-case> with Just a Few Lines of Code: the <fixed-case>B</fixed-case>abel<fixed-case>N</fixed-case>et <fixed-case>API</fixed-case></title>
       <author><first>Roberto</first><last>Navigli</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <pages>67–72</pages>
       <url hash="2983bfcb">P12-3012</url>
       <bibkey>navigli-ponzetto-2012-multilingual</bibkey>
@@ -1907,12 +1907,12 @@
     </paper>
     <paper id="16">
       <title><fixed-case>ACCURAT</fixed-case> Toolkit for Multi-Level Alignment and Information Extraction from Comparable Corpora</title>
-      <author><first>Mārcis</first><last>Pinnis</last></author>
+      <author id="marcis-pinnis"><first>Mārcis</first><last>Pinnis</last></author>
       <author><first>Radu</first><last>Ion</last></author>
-      <author><first>Dan</first><last>Ştefănescu</last></author>
+      <author id="dan-stefanescu"><first>Dan</first><last>Ştefănescu</last></author>
       <author><first>Fangzhong</first><last>Su</last></author>
-      <author><first>Inguna</first><last>Skadiņa</last></author>
-      <author><first>Andrejs</first><last>Vasiļjevs</last></author>
+      <author id="inguna-skadina"><first>Inguna</first><last>Skadiņa</last></author>
+      <author id="andrejs-vasiljevs"><first>Andrejs</first><last>Vasiļjevs</last></author>
       <author><first>Bogdan</first><last>Babych</last></author>
       <pages>91–96</pages>
       <url hash="3b0f2722">P12-3016</url>
@@ -1940,7 +1940,7 @@
       <author><first>Yunyao</first><last>Li</last></author>
       <author><first>Laura</first><last>Chiticariu</last></author>
       <author><first>Huahai</first><last>Yang</last></author>
-      <author><first>Frederick</first><last>Reiss</last></author>
+      <author id="frederick-reiss"><first>Frederick</first><last>Reiss</last></author>
       <author><first>Arnaldo</first><last>Carreno-fuentes</last></author>
       <pages>109–114</pages>
       <url hash="5b54ca95">P12-3019</url>
@@ -1949,10 +1949,10 @@
     <paper id="20">
       <title>A System for Real-time <fixed-case>T</fixed-case>witter Sentiment Analysis of 2012 <fixed-case>U</fixed-case>.<fixed-case>S</fixed-case>. Presidential Election Cycle</title>
       <author><first>Hao</first><last>Wang</last></author>
-      <author><first>Dogan</first><last>Can</last></author>
+      <author id="dogan-can"><first>Dogan</first><last>Can</last></author>
       <author><first>Abe</first><last>Kazemzadeh</last></author>
       <author><first>François</first><last>Bar</last></author>
-      <author><first>Shrikanth</first><last>Narayanan</last></author>
+      <author id="shrikanth-narayanan"><first>Shrikanth</first><last>Narayanan</last></author>
       <pages>115–120</pages>
       <url hash="5f058c88">P12-3020</url>
       <bibkey>wang-etal-2012-system</bibkey>
@@ -1970,7 +1970,7 @@
       <title><fixed-case>A</fixed-case>kamon: An Open Source Toolkit for Tree/Forest-Based Statistical Machine Translation</title>
       <author><first>Xianchao</first><last>Wu</last></author>
       <author><first>Takuya</first><last>Matsuzaki</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>127–132</pages>
       <url hash="511ba892">P12-3022</url>
       <bibkey>wu-etal-2012-akamon</bibkey>
@@ -1978,16 +1978,16 @@
     <paper id="23">
       <title>Subgroup Detector: A System for Detecting Subgroups in Online Discussions</title>
       <author><first>Amjad</first><last>Abu-Jbara</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <pages>133–138</pages>
       <url hash="cc4d193a">P12-3023</url>
       <bibkey>abu-jbara-radev-2012-subgroup</bibkey>
     </paper>
     <paper id="24">
       <title>A Graphical Interface for <fixed-case>MT</fixed-case> Evaluation and Error Analysis</title>
-      <author><first>Meritxell</first><last>Gonzàlez</last></author>
-      <author><first>Jesús</first><last>Giménez</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="meritxell-gonzalez"><first>Meritxell</first><last>Gonzàlez</last></author>
+      <author id="jesus-gimenez"><first>Jesús</first><last>Giménez</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <pages>139–144</pages>
       <url hash="ac02954e">P12-3024</url>
       <bibkey>gonzalez-etal-2012-graphical</bibkey>
@@ -1997,7 +1997,7 @@
       <author><first>Wan-Yu</first><last>Lin</last></author>
       <author><first>Nanyun</first><last>Peng</last></author>
       <author><first>Chun-Chao</first><last>Yen</last></author>
-      <author><first>Shou-de</first><last>Lin</last></author>
+      <author id="shou-de-lin"><first>Shou-de</first><last>Lin</last></author>
       <pages>145–150</pages>
       <url hash="a88d7e8f">P12-3025</url>
       <bibkey>lin-etal-2012-online</bibkey>
@@ -2012,11 +2012,11 @@
     </paper>
     <paper id="27">
       <title><fixed-case>FLOW</fixed-case>: A First-Language-Oriented Writing Assistant System</title>
-      <author><first>Mei-Hua</first><last>Chen</last></author>
-      <author><first>Shih-Ting</first><last>Huang</last></author>
-      <author><first>Hung-Ting</first><last>Hsieh</last></author>
-      <author><first>Ting-Hui</first><last>Kao</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="mei-hua-chen"><first>Mei-Hua</first><last>Chen</last></author>
+      <author id="shih-ting-huang"><first>Shih-Ting</first><last>Huang</last></author>
+      <author id="hung-ting-hsieh"><first>Hung-Ting</first><last>Hsieh</last></author>
+      <author id="ting-hui-kao"><first>Ting-Hui</first><last>Kao</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>157–162</pages>
       <url hash="655dddf9">P12-3027</url>
       <bibkey>chen-etal-2012-flow</bibkey>
@@ -2026,7 +2026,7 @@
       <author><first>Wen-Tai</first><last>Hsieh</last></author>
       <author><first>Chen-Ming</first><last>Wu</last></author>
       <author><first>Tsun</first><last>Ku</last></author>
-      <author><first>Seng-cho T.</first><last>Chou</last></author>
+      <author id="seng-cho-t-chou"><first>Seng-cho T.</first><last>Chou</last></author>
       <pages>163–168</pages>
       <url hash="352826a9">P12-3028</url>
       <bibkey>hsieh-etal-2012-social</bibkey>
@@ -2062,7 +2062,7 @@
     <paper id="1">
       <title>Qualitative Modeling of Spatial Prepositions and Motion Expressions</title>
       <author><first>Inderjeet</first><last>Mani</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <pages>1</pages>
       <url hash="f6e2f632">P12-4001</url>
       <attachment type="presentation" hash="3a26b317">P12-4001.Presentation.pdf</attachment>
@@ -2078,7 +2078,7 @@
     </paper>
     <paper id="3">
       <title>Topic Models, Latent Space Models, Sparse Coding, and All That: A Systematic Understanding of Probabilistic Semantic Extraction in Large Corpus</title>
-      <author><first>Eric</first><last>Xing</last></author>
+      <author id="eric-xing"><first>Eric</first><last>Xing</last></author>
       <pages>3</pages>
       <url hash="b6f54cd8">P12-4003</url>
       <attachment type="presentation" hash="1fef3536">P12-4003.Presentation.pdf</attachment>
@@ -2086,9 +2086,9 @@
     </paper>
     <paper id="4">
       <title>Multilingual Subjectivity and Sentiment Analysis</title>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <author><first>Carmen</first><last>Banea</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <pages>4</pages>
       <url hash="763e13a0">P12-4004</url>
       <attachment type="presentation" hash="47b1df77">P12-4004.Presentation.pdf</attachment>
@@ -2098,7 +2098,7 @@
       <title>Deep Learning for <fixed-case>NLP</fixed-case> (without Magic)</title>
       <author><first>Richard</first><last>Socher</last></author>
       <author><first>Yoshua</first><last>Bengio</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>5</pages>
       <url hash="7e1da11f">P12-4005</url>
       <attachment type="presentation" hash="2e4bcb6e">P12-4005.Presentation.pdf</attachment>
@@ -2106,8 +2106,8 @@
     </paper>
     <paper id="6">
       <title>Graph-based Semi-Supervised Learning Algorithms for <fixed-case>NLP</fixed-case></title>
-      <author><first>Amar</first><last>Subramanya</last></author>
-      <author><first>Partha Pratim</first><last>Talukdar</last></author>
+      <author id="amarnag-subramanya"><first>Amar</first><last>Subramanya</last></author>
+      <author id="partha-talukdar"><first>Partha Pratim</first><last>Talukdar</last></author>
       <pages>6</pages>
       <url hash="cf5cc559">P12-4006</url>
       <attachment type="presentation" hash="e2ed1206">P12-4006.Presentation.pdf</attachment>
diff --git a/data/xml/P13.xml b/data/xml/P13.xml
index 80680b5b12..4c04a1404c 100644
--- a/data/xml/P13.xml
+++ b/data/xml/P13.xml
@@ -4,9 +4,9 @@
     <meta>
       <booktitle>Proceedings of the 51st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</booktitle>
       <url hash="0248321d">P13-1</url>
-      <editor><first>Hinrich</first><last>Schuetze</last></editor>
+      <editor id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></editor>
       <editor><first>Pascale</first><last>Fung</last></editor>
-      <editor><first>Massimo</first><last>Poesio</last></editor>
+      <editor id="massimo-poesio"><first>Massimo</first><last>Poesio</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Sofia, Bulgaria</address>
       <month>August</month>
@@ -27,7 +27,7 @@
     <paper id="2">
       <title>Integrating Translation Memory into Phrase-Based Machine Translation during Decoding</title>
       <author><first>Kun</first><last>Wang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <author><first>Keh-Yih</first><last>Su</last></author>
       <pages>11–21</pages>
       <url hash="bfa2de0d">P13-1002</url>
@@ -42,7 +42,7 @@
     </paper>
     <paper id="4">
       <title>Modelling Annotator Bias with Multi-task <fixed-case>G</fixed-case>aussian Processes: An Application to Machine Translation Quality Estimation</title>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>32–42</pages>
       <url hash="8d8b72df">P13-1004</url>
@@ -52,7 +52,7 @@
       <title>Smoothed marginal distribution constraints for language modeling</title>
       <author><first>Brian</first><last>Roark</last></author>
       <author><first>Cyril</first><last>Allauzen</last></author>
-      <author><first>Michael</first><last>Riley</last></author>
+      <author id="michael-riley"><first>Michael</first><last>Riley</last></author>
       <pages>43–52</pages>
       <url hash="5c0a565a">P13-1005</url>
       <bibkey>roark-etal-2013-smoothed</bibkey>
@@ -69,14 +69,14 @@
       <title>Plurality, Negation, and Quantification:Towards Comprehensive Quantifier Scope Disambiguation</title>
       <author><first>Mehdi</first><last>Manshadi</last></author>
       <author><first>Daniel</first><last>Gildea</last></author>
-      <author><first>James</first><last>Allen</last></author>
+      <author id="james-allen"><first>James</first><last>Allen</last></author>
       <pages>64–72</pages>
       <url hash="56e1ce41">P13-1007</url>
       <bibkey>manshadi-etal-2013-plurality</bibkey>
     </paper>
     <paper id="8">
       <title>Joint Event Extraction via Structured Prediction with Global Features</title>
-      <author id="qi-li"><first>Qi</first><last>Li</last></author>
+      <author><first>Qi</first><last>Li</last></author>
       <author><first>Heng</first><last>Ji</last></author>
       <author><first>Liang</first><last>Huang</last></author>
       <pages>73–82</pages>
@@ -103,7 +103,7 @@
       <title>Recognizing Rare Social Phenomena in Conversation: Empowerment Detection in Support Group Chatrooms</title>
       <author><first>Elijah</first><last>Mayfield</last></author>
       <author><first>David</first><last>Adamson</last></author>
-      <author><first>Carolyn</first><last>Penstein Rosé</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Penstein Rosé</last></author>
       <pages>104–113</pages>
       <url hash="86291242">P13-1011</url>
       <bibkey>mayfield-etal-2013-recognizing</bibkey>
@@ -149,7 +149,7 @@
       <title>Distortion Model Considering Rich Context for Statistical Machine Translation</title>
       <author><first>Isao</first><last>Goto</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Akihiro</first><last>Tamura</last></author>
       <author><first>Sadao</first><last>Kurohashi</last></author>
       <pages>155–165</pages>
@@ -171,8 +171,8 @@
       <title>Microblogs as Parallel Corpora</title>
       <author><first>Wang</first><last>Ling</last></author>
       <author><first>Guang</first><last>Xiang</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Alan</first><last>Black</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="alan-w-black"><first>Alan</first><last>Black</last></author>
       <author><first>Isabel</first><last>Trancoso</last></author>
       <pages>176–186</pages>
       <url hash="1f99acc5">P13-1018</url>
@@ -189,8 +189,8 @@
     </paper>
     <paper id="20">
       <title>Fast and Robust Compressive Summarization with Dual Decomposition and Multi-Task Learning</title>
-      <author><first>Miguel</first><last>Almeida</last></author>
-      <author><first>André</first><last>Martins</last></author>
+      <author id="miguel-b-almeida"><first>Miguel</first><last>Almeida</last></author>
+      <author id="andre-f-t-martins"><first>André</first><last>Martins</last></author>
       <pages>196–206</pages>
       <url hash="b779b355">P13-1020</url>
       <bibkey>almeida-martins-2013-fast</bibkey>
@@ -207,7 +207,7 @@
     <paper id="22">
       <title>Adapting Discriminative Reranking to Grounded Language Learning</title>
       <author><first>Joohyun</first><last>Kim</last></author>
-      <author><first>Raymond</first><last>Mooney</last></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last></author>
       <pages>218–227</pages>
       <url hash="af3c7b70">P13-1022</url>
       <bibkey>kim-mooney-2013-adapting</bibkey>
@@ -225,7 +225,7 @@
       <author><first>Weiwei</first><last>Guo</last></author>
       <author><first>Hao</first><last>Li</last></author>
       <author><first>Heng</first><last>Ji</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>239–249</pages>
       <url hash="aeb645ac">P13-1024</url>
       <bibkey>guo-etal-2013-linking</bibkey>
@@ -234,7 +234,7 @@
       <title>A computational approach to politeness with application to social factors</title>
       <author><first>Cristian</first><last>Danescu-Niculescu-Mizil</last></author>
       <author><first>Moritz</first><last>Sudhof</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <author><first>Jure</first><last>Leskovec</last></author>
       <author><first>Christopher</first><last>Potts</last></author>
       <pages>250–259</pages>
@@ -276,10 +276,10 @@
     </paper>
     <paper id="30">
       <title>A Context Free <fixed-case>TAG</fixed-case> Variant</title>
-      <author><first>Ben</first><last>Swanson</last></author>
+      <author id="ben-swanson"><first>Ben</first><last>Swanson</last></author>
       <author><first>Elif</first><last>Yamangil</last></author>
       <author><first>Eugene</first><last>Charniak</last></author>
-      <author><first>Stuart</first><last>Shieber</last></author>
+      <author id="stuart-m-shieber"><first>Stuart</first><last>Shieber</last></author>
       <pages>302–310</pages>
       <url hash="6e24940e">P13-1030</url>
       <bibkey>swanson-etal-2013-context</bibkey>
@@ -287,9 +287,9 @@
     <paper id="31">
       <title>Fast and Adaptive Online Training of Feature-Rich Translation Models</title>
       <author><first>Spence</first><last>Green</last></author>
-      <author><first>Sida</first><last>Wang</last></author>
+      <author id="sida-i-wang"><first>Sida</first><last>Wang</last></author>
       <author><first>Daniel</first><last>Cer</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>311–321</pages>
       <url hash="a90f1890">P13-1031</url>
       <bibkey>green-etal-2013-fast</bibkey>
@@ -298,7 +298,7 @@
       <title>Advancements in Reordering Models for Statistical Machine Translation</title>
       <author><first>Minwei</first><last>Feng</last></author>
       <author><first>Jan-Thorsten</first><last>Peter</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>322–332</pages>
       <url hash="582dd60f">P13-1032</url>
       <bibkey>feng-etal-2013-advancements</bibkey>
@@ -306,7 +306,7 @@
     <paper id="33">
       <title>A <fixed-case>M</fixed-case>arkov Model of Machine Translation using Non-parametric <fixed-case>B</fixed-case>ayesian Inference</title>
       <author><first>Yang</first><last>Feng</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>333–342</pages>
       <url hash="24c5ffa6">P13-1033</url>
       <bibkey>feng-cohn-2013-markov</bibkey>
@@ -323,7 +323,7 @@
       <title>Learning Latent Personas of Film Characters</title>
       <author><first>David</first><last>Bamman</last></author>
       <author><first>Brendan</first><last>O’Connor</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>352–361</pages>
       <url hash="0867cfdf">P13-1035</url>
       <bibkey>bamman-etal-2013-learning</bibkey>
@@ -338,7 +338,7 @@
     <paper id="37">
       <title>Automatic Interpretation of the <fixed-case>E</fixed-case>nglish Possessive</title>
       <author><first>Stephen</first><last>Tratz</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>372–381</pages>
       <url hash="caab3603">P13-1037</url>
       <bibkey>tratz-hovy-2013-automatic</bibkey>
@@ -348,7 +348,7 @@
       <author><first>Katsuma</first><last>Narisawa</last></author>
       <author><first>Yotaro</first><last>Watanabe</last></author>
       <author><first>Junta</first><last>Mizuno</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Kentaro</first><last>Inui</last></author>
       <pages>382–391</pages>
       <url hash="b5038b65">P13-1038</url>
@@ -356,7 +356,7 @@
     </paper>
     <paper id="39">
       <title>Probabilistic Domain Modelling With Contextualized Distributional Semantic Vectors</title>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <author><first>Gerald</first><last>Penn</last></author>
       <pages>392–401</pages>
       <url hash="af71baa3">P13-1039</url>
@@ -365,8 +365,8 @@
     <paper id="40">
       <title>Extracting bilingual terminologies from comparable corpora</title>
       <author><first>Ahmet</first><last>Aker</last></author>
-      <author><first>Monica</first><last>Paramita</last></author>
-      <author><first>Rob</first><last>Gaizauskas</last></author>
+      <author id="monica-lestari-paramita"><first>Monica</first><last>Paramita</last></author>
+      <author id="robert-gaizauskas"><first>Rob</first><last>Gaizauskas</last></author>
       <pages>402–411</pages>
       <url hash="60d69069">P13-1040</url>
       <bibkey>aker-etal-2013-extracting</bibkey>
@@ -374,16 +374,16 @@
     <paper id="41">
       <title>The Haves and the Have-Nots: Leveraging Unlabelled Corpora for Sentiment Analysis</title>
       <author><first>Kashyap</first><last>Popat</last></author>
-      <author><first>Balamurali</first><last>A.R</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="balamurali-ar"><first>Balamurali</first><last>A.R</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>412–422</pages>
       <url hash="4a48de54">P13-1041</url>
       <bibkey>popat-etal-2013-haves</bibkey>
     </paper>
     <paper id="42">
       <title>Large-scale Semantic Parsing via Schema Matching and Lexicon Extension</title>
-      <author><first>Qingqing</first><last>Cai</last></author>
+      <author id="qingqing-cai"><first>Qingqing</first><last>Cai</last></author>
       <author><first>Alexander</first><last>Yates</last></author>
       <pages>423–433</pages>
       <url hash="496a108e">P13-1042</url>
@@ -402,8 +402,8 @@
     </paper>
     <paper id="44">
       <title>Nonconvex Global Optimization for Latent-Variable Models</title>
-      <author><first>Matthew R.</first><last>Gormley</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="matthew-r-gormley"><first>Matthew R.</first><last>Gormley</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>444–454</pages>
       <url hash="78ec0f95">P13-1044</url>
       <bibkey>gormley-eisner-2013-nonconvex</bibkey>
@@ -412,8 +412,8 @@
       <title>Parsing with Compositional Vector Grammars</title>
       <author><first>Richard</first><last>Socher</last></author>
       <author><first>John</first><last>Bauer</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
-      <author><first>Andrew Y.</first><last>Ng</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
+      <author id="andrew-y-ng"><first>Andrew Y.</first><last>Ng</last></author>
       <pages>455–465</pages>
       <url hash="cf1f2ac4">P13-1045</url>
       <bibkey>socher-etal-2013-parsing</bibkey>
@@ -421,26 +421,26 @@
     <paper id="46">
       <title>Discriminative state tracking for spoken dialog systems</title>
       <author><first>Angeliki</first><last>Metallinou</last></author>
-      <author><first>Dan</first><last>Bohus</last></author>
-      <author><first>Jason</first><last>Williams</last></author>
+      <author id="dan-bohus"><first>Dan</first><last>Bohus</last></author>
+      <author id="jason-d-williams"><first>Jason</first><last>Williams</last></author>
       <pages>466–475</pages>
       <url hash="102458c6">P13-1046</url>
       <bibkey>metallinou-etal-2013-discriminative</bibkey>
     </paper>
     <paper id="47">
       <title>Leveraging Synthetic Discourse Data via Multi-task Learning for Implicit Discourse Relation Recognition</title>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <author><first>Yu</first><last>Xu</last></author>
-      <author><first>Zhengyu</first><last>Niu</last></author>
+      <author id="zheng-yu-niu"><first>Zhengyu</first><last>Niu</last></author>
       <pages>476–485</pages>
       <url hash="4dd7628b">P13-1047</url>
       <bibkey>lan-etal-2013-leveraging</bibkey>
     </paper>
     <paper id="48">
       <title>Combining Intra- and Multi-sentential Rhetorical Parsing for Document-level Discourse Analysis</title>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <author><first>Giuseppe</first><last>Carenini</last></author>
-      <author><first>Raymond</first><last>Ng</last></author>
+      <author id="raymond-ng"><first>Raymond</first><last>Ng</last></author>
       <author><first>Yashar</first><last>Mehdad</last></author>
       <pages>486–496</pages>
       <url hash="81a5cf6b">P13-1048</url>
@@ -456,9 +456,9 @@
     </paper>
     <paper id="50">
       <title>Feature-Based Selection of Dependency Paths in Ad Hoc Information Retrieval</title>
-      <author><first>K. Tamsin</first><last>Maxwell</last></author>
-      <author><first>Jon</first><last>Oberlander</last></author>
-      <author><first>W. Bruce</first><last>Croft</last></author>
+      <author id="k-tamsin-maxwell"><first>K. Tamsin</first><last>Maxwell</last></author>
+      <author id="jon-oberlander"><first>Jon</first><last>Oberlander</last></author>
+      <author id="w-bruce-croft"><first>W. Bruce</first><last>Croft</last></author>
       <pages>507–516</pages>
       <url hash="6ab38d3c">P13-1050</url>
       <bibkey>maxwell-etal-2013-feature</bibkey>
@@ -468,8 +468,8 @@
       <author><first>Martin</first><last>Popel</last></author>
       <author><first>David</first><last>Mareček</last></author>
       <author><first>Jan</first><last>Štěpánek</last></author>
-      <author><first>Daniel</first><last>Zeman</last></author>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
       <pages>517–527</pages>
       <url hash="6258be29">P13-1051</url>
       <bibkey>popel-etal-2013-coordination</bibkey>
@@ -486,7 +486,7 @@
     <paper id="53">
       <title>Collective Annotation of Linguistic Resources: Basic Principles and a Formal Model</title>
       <author><first>Ulle</first><last>Endriss</last></author>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <pages>539–549</pages>
       <url hash="7b2a107e">P13-1053</url>
       <bibkey>endriss-fernandez-2013-collective</bibkey>
@@ -495,16 +495,16 @@
       <title><fixed-case>P</fixed-case>ar<fixed-case>G</fixed-case>ram<fixed-case>B</fixed-case>ank: The <fixed-case>P</fixed-case>ar<fixed-case>G</fixed-case>ram Parallel Treebank</title>
       <author><first>Sebastian</first><last>Sulger</last></author>
       <author><first>Miriam</first><last>Butt</last></author>
-      <author><first>Tracy Holloway</first><last>King</last></author>
+      <author id="tracy-holloway-king"><first>Tracy Holloway</first><last>King</last></author>
       <author><first>Paul</first><last>Meurer</last></author>
       <author><first>Tibor</first><last>Laczkó</last></author>
       <author><first>György</first><last>Rákosi</last></author>
-      <author><first>Cheikh Bamba</first><last>Dione</last></author>
+      <author id="cheikh-m-bamba-dione"><first>Cheikh Bamba</first><last>Dione</last></author>
       <author><first>Helge</first><last>Dyvik</last></author>
       <author><first>Victoria</first><last>Rosén</last></author>
-      <author><first>Koenraad</first><last>De Smedt</last></author>
+      <author id="koenraad-de-smedt"><first>Koenraad</first><last>De Smedt</last></author>
       <author><first>Agnieszka</first><last>Patejuk</last></author>
-      <author><first>Özlem</first><last>Çetinoğlu</last></author>
+      <author id="ozlem-cetinoglu"><first>Özlem</first><last>Çetinoğlu</last></author>
       <author><first>I Wayan</first><last>Arka</last></author>
       <author><first>Meladel</first><last>Mistica</last></author>
       <pages>550–560</pages>
@@ -539,8 +539,8 @@
     <paper id="58">
       <title>Using subcategorization knowledge to improve case prediction for translation to <fixed-case>G</fixed-case>erman</title>
       <author><first>Marion</first><last>Weller</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>593–603</pages>
       <url hash="f27c1fde">P13-1058</url>
       <bibkey>weller-etal-2013-using</bibkey>
@@ -550,8 +550,8 @@
       <author><first>Haibo</first><last>Li</last></author>
       <author><first>Jing</first><last>Zheng</last></author>
       <author><first>Heng</first><last>Ji</last></author>
-      <author id="qi-li"><first>Qi</first><last>Li</last></author>
-      <author><first>Wen</first><last>Wang</last></author>
+      <author><first>Qi</first><last>Li</last></author>
+      <author id="wen-wang"><first>Wen</first><last>Wang</last></author>
       <pages>604–614</pages>
       <url hash="d7186e0b">P13-1059</url>
       <bibkey>li-etal-2013-name</bibkey>
@@ -559,7 +559,7 @@
     <paper id="60">
       <title>Decipherment Complexity in 1:1 Substitution Ciphers</title>
       <author><first>Malte</first><last>Nuhn</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>615–621</pages>
       <url hash="0b8e6b02">P13-1060</url>
       <bibkey>nuhn-ney-2013-decipherment</bibkey>
@@ -567,7 +567,7 @@
     <paper id="61">
       <title>Non-Monotonic Sentence Alignment via Semisupervised Learning</title>
       <author><first>Xiaojun</first><last>Quan</last></author>
-      <author><first>Chunyu</first><last>Kit</last></author>
+      <author id="chunyu-kit"><first>Chunyu</first><last>Kit</last></author>
       <author><first>Yan</first><last>Song</last></author>
       <pages>622–630</pages>
       <url hash="e088f944">P13-1061</url>
@@ -594,7 +594,7 @@
     </paper>
     <paper id="64">
       <title>Bridging Languages through Etymology: The case of cross language text categorization</title>
-      <author><first>Vivi</first><last>Nastase</last></author>
+      <author id="vivi-nastase"><first>Vivi</first><last>Nastase</last></author>
       <author><first>Carlo</first><last>Strapparava</last></author>
       <pages>651–659</pages>
       <url hash="802332eb">P13-1064</url>
@@ -625,9 +625,9 @@
     </paper>
     <paper id="68">
       <title>Large tagset labeling using Feed Forward Neural Networks. Case study on <fixed-case>R</fixed-case>omanian Language</title>
-      <author><first>Tiberiu</first><last>Boros</last></author>
+      <author id="tiberiu-boros"><first>Tiberiu</first><last>Boros</last></author>
       <author><first>Radu</first><last>Ion</last></author>
-      <author><first>Dan</first><last>Tufis</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufis</last></author>
       <pages>692–700</pages>
       <url hash="39a156eb">P13-1068</url>
       <bibkey>boros-etal-2013-large</bibkey>
@@ -686,7 +686,7 @@
       <title>Discriminative Learning with Natural Annotations: Word Segmentation as a Case Study</title>
       <author><first>Wenbin</first><last>Jiang</last></author>
       <author><first>Meng</first><last>Sun</last></author>
-      <author><first>Yajuan</first><last>Lü</last></author>
+      <author id="yajuan-lu"><first>Yajuan</first><last>Lü</last></author>
       <author><first>Yating</first><last>Yang</last></author>
       <author><first>Qun</first><last>Liu</last></author>
       <pages>761–769</pages>
@@ -705,8 +705,8 @@
     </paper>
     <paper id="77">
       <title>An Infinite Hierarchical <fixed-case>B</fixed-case>ayesian Model of Phrasal Translation</title>
-      <author><first>Trevor</first><last>Cohn</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>780–790</pages>
       <url hash="dc81e385">P13-1077</url>
       <bibkey>cohn-haffari-2013-infinite</bibkey>
@@ -715,8 +715,8 @@
       <title>Additive Neural Networks for Statistical Machine Translation</title>
       <author><first>Lemao</first><last>Liu</last></author>
       <author><first>Taro</first><last>Watanabe</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <pages>791–801</pages>
       <url hash="4de5f171">P13-1078</url>
       <bibkey>liu-etal-2013-additive</bibkey>
@@ -725,8 +725,8 @@
       <title>Hierarchical Phrase Table Combination for Machine Translation</title>
       <author><first>Conghui</first><last>Zhu</last></author>
       <author><first>Taro</first><last>Watanabe</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <pages>802–810</pages>
       <url hash="19514efc">P13-1079</url>
       <bibkey>zhu-etal-2013-hierarchical</bibkey>
@@ -744,7 +744,7 @@
     <paper id="81">
       <title>Enlisting the Ghost: Modeling Empty Categories for Machine Translation</title>
       <author><first>Bing</first><last>Xiang</last></author>
-      <author><first>Xiaoqiang</first><last>Luo</last></author>
+      <author id="xiaoqiang-luo"><first>Xiaoqiang</first><last>Luo</last></author>
       <author><first>Bowen</first><last>Zhou</last></author>
       <pages>822–831</pages>
       <url hash="7e0f3d63">P13-1081</url>
@@ -763,9 +763,9 @@
       <title>Part-of-Speech Induction in Dependency Trees for Statistical Machine Translation</title>
       <author><first>Akihiro</first><last>Tamura</last></author>
       <author><first>Taro</first><last>Watanabe</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Hiroya</first><last>Takamura</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>841–851</pages>
       <url hash="9008e4b7">P13-1083</url>
       <bibkey>tamura-etal-2013-part</bibkey>
@@ -792,7 +792,7 @@
     <paper id="86">
       <title>Semantic Frames to Predict Stock Price Movement</title>
       <author><first>Boyi</first><last>Xie</last></author>
-      <author><first>Rebecca J.</first><last>Passonneau</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca J.</first><last>Passonneau</last></author>
       <author><first>Leon</first><last>Wu</last></author>
       <author><first>Germán G.</first><last>Creamer</last></author>
       <pages>873–883</pages>
@@ -812,7 +812,7 @@
     <paper id="88">
       <title>The Role of Syntax in Vector Space Models of Compositional Semantics</title>
       <author><first>Karl Moritz</first><last>Hermann</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
       <pages>894–904</pages>
       <url hash="dad633f7">P13-1088</url>
       <bibkey>hermann-blunsom-2013-role</bibkey>
@@ -829,9 +829,9 @@
     <paper id="90">
       <title>Semi-Supervised Semantic Tagging of Conversational Understanding using <fixed-case>M</fixed-case>arkov Topic Regression</title>
       <author><first>Asli</first><last>Celikyilmaz</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last></author>
-      <author><first>Gokhan</first><last>Tur</last></author>
-      <author><first>Ruhi</first><last>Sarikaya</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></author>
+      <author id="gokhan-tur"><first>Gokhan</first><last>Tur</last></author>
+      <author id="ruhi-sarikaya"><first>Ruhi</first><last>Sarikaya</last></author>
       <pages>914–923</pages>
       <url hash="175e8b67">P13-1090</url>
       <bibkey>celikyilmaz-etal-2013-semi</bibkey>
@@ -842,7 +842,7 @@
       <author><first>Jacob</first><last>Andreas</last></author>
       <author><first>Daniel</first><last>Bauer</last></author>
       <author><first>Karl Moritz</first><last>Hermann</last></author>
-      <author><first>Bevan</first><last>Jones</last></author>
+      <author id="bevan-jones"><first>Bevan</first><last>Jones</last></author>
       <author><first>Kevin</first><last>Knight</last></author>
       <pages>924–932</pages>
       <url hash="ed6ea0b1">P13-1091</url>
@@ -884,7 +884,7 @@
     <paper id="96">
       <title>Utterance-Level Multimodal Sentiment Analysis</title>
       <author><first>Verónica</first><last>Pérez-Rosas</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <author><first>Louis-Philippe</first><last>Morency</last></author>
       <pages>973–982</pages>
       <url hash="10518efd">P13-1096</url>
@@ -893,8 +893,8 @@
     <paper id="97">
       <title>Probabilistic Sense Sentiment Similarity through Hidden Emotions</title>
       <author><first>Mitra</first><last>Mohtarami</last></author>
-      <author><first>Man</first><last>Lan</last></author>
-      <author><first>Chew Lim</first><last>Tan</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
+      <author id="chew-lim-tan"><first>Chew Lim</first><last>Tan</last></author>
       <pages>983–992</pages>
       <url hash="3f6082f2">P13-1097</url>
       <bibkey>mohtarami-etal-2013-probabilistic</bibkey>
@@ -902,8 +902,8 @@
     <paper id="98">
       <title>A user-centric model of voting intention from Social Media</title>
       <author><first>Vasileios</first><last>Lampos</last></author>
-      <author><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="daniel-preotiuc-pietro"><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>993–1003</pages>
       <url hash="25b863f6">P13-1098</url>
       <attachment type="poster" hash="4f62d599">P13-1098.Poster.pdf</attachment>
@@ -932,14 +932,14 @@
       <author><first>Hajime</first><last>Morita</last></author>
       <author><first>Ryohei</first><last>Sasano</last></author>
       <author><first>Hiroya</first><last>Takamura</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>1023–1032</pages>
       <url hash="8a9fc4ef">P13-1101</url>
       <bibkey>morita-etal-2013-subtree</bibkey>
     </paper>
     <paper id="102">
       <title>The effect of non-tightness on <fixed-case>B</fixed-case>ayesian estimation of <fixed-case>PCFG</fixed-case>s</title>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <author><first>Mark</first><last>Johnson</last></author>
       <pages>1033–1041</pages>
       <url hash="dc2f4d83">P13-1102</url>
@@ -960,7 +960,7 @@
     </paper>
     <paper id="104">
       <title>Transition-based Dependency Parsing with Selectional Branching</title>
-      <author><first>Jinho D.</first><last>Choi</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
       <author><first>Andrew</first><last>McCallum</last></author>
       <pages>1052–1062</pages>
       <url hash="6cad1370">P13-1104</url>
@@ -969,7 +969,7 @@
     <paper id="105">
       <title>Bilingually-Guided Monolingual Dependency Grammar Induction</title>
       <author><first>Kai</first><last>Liu</last></author>
-      <author><first>Yajuan</first><last>Lü</last></author>
+      <author id="yajuan-lu"><first>Yajuan</first><last>Lü</last></author>
       <author><first>Wenbin</first><last>Jiang</last></author>
       <author><first>Qun</first><last>Liu</last></author>
       <pages>1063–1072</pages>
@@ -980,7 +980,7 @@
       <title>Joint Word Alignment and Bilingual Named Entity Recognition Using Dual Decomposition</title>
       <author><first>Mengqiu</first><last>Wang</last></author>
       <author><first>Wanxiang</first><last>Che</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>1073–1082</pages>
       <url hash="c167d52e">P13-1106</url>
       <bibkey>wang-etal-2013-joint</bibkey>
@@ -1001,8 +1001,8 @@
     <paper id="108">
       <title>Learning to Extract International Relations from Political Context</title>
       <author><first>Brendan</first><last>O’Connor</last></author>
-      <author><first>Brandon M.</first><last>Stewart</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="brandon-m-stewart"><first>Brandon M.</first><last>Stewart</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>1094–1104</pages>
       <url hash="f36aaf5d">P13-1108</url>
       <bibkey>oconnor-etal-2013-learning</bibkey>
@@ -1011,7 +1011,7 @@
       <title>Graph Propagation for Paraphrasing Out-of-Vocabulary Words in Statistical Machine Translation</title>
       <author><first>Majid</first><last>Razmara</last></author>
       <author><first>Maryam</first><last>Siahbani</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <author><first>Anoop</first><last>Sarkar</last></author>
       <pages>1105–1115</pages>
       <url hash="d01fbffb">P13-1109</url>
@@ -1031,7 +1031,7 @@
       <author><first>Feifei</first><last>Zhai</last></author>
       <author><first>Jiajun</first><last>Zhang</last></author>
       <author><first>Yu</first><last>Zhou</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>1127–1136</pages>
       <url hash="7ca539e7">P13-1111</url>
       <bibkey>zhai-etal-2013-handling</bibkey>
@@ -1076,7 +1076,7 @@
     <paper id="116">
       <title><fixed-case>I</fixed-case>mp<fixed-case>A</fixed-case>r: A Deterministic Algorithm for Implicit Semantic Role Labelling</title>
       <author><first>Egoitz</first><last>Laparra</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <pages>1180–1189</pages>
       <url hash="75c09455">P13-1116</url>
       <bibkey>laparra-rigau-2013-impar</bibkey>
@@ -1091,9 +1091,9 @@
     </paper>
     <paper id="118">
       <title><fixed-case>DE</fixed-case>riv<fixed-case>B</fixed-case>ase: Inducing and Evaluating a Derivational Morphology Resource for <fixed-case>G</fixed-case>erman</title>
-      <author><first>Britta</first><last>Zeller</last></author>
+      <author id="britta-zeller"><first>Britta</first><last>Zeller</last></author>
       <author><first>Jan</first><last>Šnajder</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <pages>1201–1211</pages>
       <url hash="311951d8">P13-1118</url>
       <bibkey>zeller-etal-2013-derivbase</bibkey>
@@ -1120,7 +1120,7 @@
     </paper>
     <paper id="121">
       <title>Towards Robust Abstractive Multi-Document Summarization: A Caseframe Analysis of Centrality and Domain</title>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <author><first>Gerald</first><last>Penn</last></author>
       <pages>1233–1242</pages>
       <url hash="26570e86">P13-1121</url>
@@ -1138,7 +1138,7 @@
     <paper id="123">
       <title>Conditional Random Fields for Responsive Surface Realisation using Global Features</title>
       <author><first>Nina</first><last>Dethlefs</last></author>
-      <author><first>Helen</first><last>Hastie</last></author>
+      <author id="helen-hastie"><first>Helen</first><last>Hastie</last></author>
       <author><first>Heriberto</first><last>Cuayáhuitl</last></author>
       <author><first>Oliver</first><last>Lemon</last></author>
       <pages>1254–1263</pages>
@@ -1158,7 +1158,7 @@
     <paper id="125">
       <title>Cut the noise: Mutually reinforcing reordering and alignments for improved machine translation</title>
       <author><first>Karthik</first><last>Visweswariah</last></author>
-      <author><first>Mitesh M.</first><last>Khapra</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh M.</first><last>Khapra</last></author>
       <author><first>Ananthakrishnan</first><last>Ramanathan</last></author>
       <pages>1275–1284</pages>
       <url hash="0f5ba36c">P13-1125</url>
@@ -1208,7 +1208,7 @@
       <title>Language Acquisition and Probabilistic Models: keeping it simple</title>
       <author><first>Aline</first><last>Villavicencio</last></author>
       <author><first>Marco</first><last>Idiart</last></author>
-      <author><first>Robert</first><last>Berwick</last></author>
+      <author id="robert-c-berwick"><first>Robert</first><last>Berwick</last></author>
       <author><first>Igor</first><last>Malioutov</last></author>
       <pages>1321–1330</pages>
       <url hash="01e984b6">P13-1130</url>
@@ -1252,9 +1252,9 @@
     </paper>
     <paper id="135">
       <title>Dirt Cheap Web-Scale Parallel Text from the <fixed-case>C</fixed-case>ommon <fixed-case>C</fixed-case>rawl</title>
-      <author><first>Jason R.</first><last>Smith</last></author>
-      <author><first>Herve</first><last>Saint-Amand</last></author>
-      <author><first>Magdalena</first><last>Plamada</last></author>
+      <author id="jason-smith"><first>Jason R.</first><last>Smith</last></author>
+      <author id="herve-saint-amand"><first>Herve</first><last>Saint-Amand</last></author>
+      <author id="magdalena-plamada"><first>Magdalena</first><last>Plamada</last></author>
       <author><first>Philipp</first><last>Koehn</last></author>
       <author><first>Chris</first><last>Callison-Burch</last></author>
       <author><first>Adam</first><last>Lopez</last></author>
@@ -1267,8 +1267,8 @@
       <author><first>Lu</first><last>Wang</last></author>
       <author><first>Hema</first><last>Raghavan</last></author>
       <author><first>Vittorio</first><last>Castelli</last></author>
-      <author><first>Radu</first><last>Florian</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>1384–1394</pages>
       <url hash="6f3661cb">P13-1136</url>
       <bibkey>wang-etal-2013-sentence</bibkey>
@@ -1276,15 +1276,15 @@
     <paper id="137">
       <title>Domain-Independent Abstract Generation for Focused Meeting Summarization</title>
       <author><first>Lu</first><last>Wang</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>1395–1405</pages>
       <url hash="02ee6879">P13-1137</url>
       <bibkey>wang-cardie-2013-domain</bibkey>
     </paper>
     <paper id="138">
       <title>A Statistical <fixed-case>NLG</fixed-case> Framework for Aggregated Planning and Realization</title>
-      <author><first>Ravi</first><last>Kondadadi</last></author>
-      <author><first>Blake</first><last>Howald</last></author>
+      <author id="ravikumar-kondadadi"><first>Ravi</first><last>Kondadadi</last></author>
+      <author id="blake-howald"><first>Blake</first><last>Howald</last></author>
       <author><first>Frank</first><last>Schilder</last></author>
       <pages>1406–1415</pages>
       <url hash="b011d016">P13-1138</url>
@@ -1301,7 +1301,7 @@
     <paper id="140">
       <title>Learning a Phrase-based Translation Model from Monolingual Data with Application to Domain Adaptation</title>
       <author><first>Jiajun</first><last>Zhang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>1425–1434</pages>
       <url hash="d6122954">P13-1140</url>
       <bibkey>zhang-zong-2013-learning</bibkey>
@@ -1309,7 +1309,7 @@
     <paper id="141">
       <title><fixed-case>S</fixed-case>ense<fixed-case>S</fixed-case>potting: Never let your parallel data tie you to an old domain</title>
       <author><first>Marine</first><last>Carpuat</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <author><first>Katharine</first><last>Henry</last></author>
       <author><first>Ann</first><last>Irvine</last></author>
       <author><first>Jagadeesh</first><last>Jagarlamudi</last></author>
@@ -1320,7 +1320,7 @@
     </paper>
     <paper id="142">
       <title><fixed-case>BRAINSUP</fixed-case>: Brainstorming Support for Creative Sentence Generation</title>
-      <author><first>Gözde</first><last>Özbal</last></author>
+      <author id="gozde-ozbal"><first>Gözde</first><last>Özbal</last></author>
       <author><first>Daniele</first><last>Pighin</last></author>
       <author><first>Carlo</first><last>Strapparava</last></author>
       <pages>1446–1455</pages>
@@ -1346,15 +1346,15 @@
     <paper id="145">
       <title>Argument Inference from Relevant Event Mentions in <fixed-case>C</fixed-case>hinese Argument Extraction</title>
       <author><first>Peifeng</first><last>Li</last></author>
-      <author><first>Qiaoming</first><last>Zhu</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>1477–1487</pages>
       <url hash="1ad71432">P13-1145</url>
       <bibkey>li-etal-2013-argument</bibkey>
     </paper>
     <paper id="146">
       <title>Fine-grained Semantic Typing of Emerging Entities</title>
-      <author><first>Ndapandula</first><last>Nakashole</last></author>
+      <author id="ndapandula-nakashole"><first>Ndapandula</first><last>Nakashole</last></author>
       <author><first>Tomasz</first><last>Tylenda</last></author>
       <author><first>Gerhard</first><last>Weikum</last></author>
       <pages>1488–1497</pages>
@@ -1363,7 +1363,7 @@
     </paper>
     <paper id="147">
       <title>Embedding Semantic Similarity in Tree Kernels for Domain Adaptation of Relation Extraction</title>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <author><first>Alessandro</first><last>Moschitti</last></author>
       <pages>1498–1507</pages>
       <url hash="46406167">P13-1147</url>
@@ -1371,7 +1371,7 @@
     </paper>
     <paper id="148">
       <title>A joint model of word segmentation and phonological variation for <fixed-case>E</fixed-case>nglish word-final /t/-deletion</title>
-      <author><first>Benjamin</first><last>Börschinger</last></author>
+      <author id="benjamin-borschinger"><first>Benjamin</first><last>Börschinger</last></author>
       <author><first>Mark</first><last>Johnson</last></author>
       <author><first>Katherine</first><last>Demuth</last></author>
       <pages>1508–1516</pages>
@@ -1383,7 +1383,7 @@
       <author><first>Angeliki</first><last>Lazaridou</last></author>
       <author><first>Marco</first><last>Marelli</last></author>
       <author><first>Roberto</first><last>Zamparelli</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <pages>1517–1526</pages>
       <url hash="e7ce2d42">P13-1149</url>
       <bibkey>lazaridou-etal-2013-compositional</bibkey>
@@ -1405,7 +1405,7 @@
     </paper>
     <paper id="152">
       <title>Combining Referring Expression Generation and Surface Realization: A Corpus-Based Investigation of Architectures</title>
-      <author><first>Sina</first><last>Zarrieß</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
       <author><first>Jonas</first><last>Kuhn</last></author>
       <pages>1547–1557</pages>
       <url hash="bb0027ca">P13-1152</url>
@@ -1422,14 +1422,14 @@
       <title>Beam Search for Solving Substitution Ciphers</title>
       <author><first>Malte</first><last>Nuhn</last></author>
       <author><first>Julian</first><last>Schamper</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>1568–1576</pages>
       <url hash="725b1fc0">P13-1154</url>
       <bibkey>nuhn-etal-2013-beam</bibkey>
     </paper>
     <paper id="155">
       <title>Social Text Normalization using Contextual Graph Random Walks</title>
-      <author><first>Hany</first><last>Hassan</last></author>
+      <author id="hany-hassan-awadalla"><first>Hany</first><last>Hassan</last></author>
       <author><first>Arul</first><last>Menezes</last></author>
       <pages>1577–1586</pages>
       <url hash="e96f0a00">P13-1155</url>
@@ -1437,8 +1437,8 @@
     </paper>
     <paper id="156">
       <title>Integrating Phrase-based Reordering Features into a Chart-based Decoder for Machine Translation</title>
-      <author><first>ThuyLinh</first><last>Nguyen</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="thuylinh-nguyen"><first>ThuyLinh</first><last>Nguyen</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>1587–1596</pages>
       <url hash="a541e992">P13-1156</url>
       <bibkey>nguyen-vogel-2013-integrating</bibkey>
@@ -1454,7 +1454,7 @@
     <paper id="158">
       <title>Paraphrase-Driven Learning for Open Question Answering</title>
       <author><first>Anthony</first><last>Fader</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <author><first>Oren</first><last>Etzioni</last></author>
       <pages>1608–1618</pages>
       <url hash="dd3f7852">P13-1158</url>
@@ -1462,13 +1462,13 @@
     </paper>
     <paper id="159">
       <title>Aid is Out There: Looking for Help from Tweets during a Large Scale Disaster</title>
-      <author><first>István</first><last>Varga</last></author>
+      <author id="istvan-varga"><first>István</first><last>Varga</last></author>
       <author><first>Motoki</first><last>Sano</last></author>
       <author><first>Kentaro</first><last>Torisawa</last></author>
       <author><first>Chikara</first><last>Hashimoto</last></author>
       <author><first>Kiyonori</first><last>Ohtake</last></author>
       <author><first>Takao</first><last>Kawai</last></author>
-      <author><first>Jong-Hoon</first><last>Oh</last></author>
+      <author id="jong-hoon-oh"><first>Jong-Hoon</first><last>Oh</last></author>
       <author><first>Stijn</first><last>De Saeger</last></author>
       <pages>1619–1629</pages>
       <url hash="5375e195">P13-1159</url>
@@ -1486,7 +1486,7 @@
     <paper id="161">
       <title>Joint Inference for Fine-grained Opinion Extraction</title>
       <author><first>Bishan</first><last>Yang</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>1640–1649</pages>
       <url hash="c975736d">P13-1161</url>
       <bibkey>yang-cardie-2013-joint</bibkey>
@@ -1495,21 +1495,21 @@
       <title>Linguistic Models for Analyzing and Detecting Biased Language</title>
       <author><first>Marta</first><last>Recasens</last></author>
       <author><first>Cristian</first><last>Danescu-Niculescu-Mizil</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <pages>1650–1659</pages>
       <url hash="dd55e91e">P13-1162</url>
       <bibkey>recasens-etal-2013-linguistic</bibkey>
     </paper>
     <paper id="163">
       <title>Evaluating a City Exploration Dialogue System with Integrated Question-Answering and Pedestrian Navigation</title>
-      <author><first>Srinivasan</first><last>Janarthanam</last></author>
+      <author id="srinivasan-janarthanam"><first>Srinivasan</first><last>Janarthanam</last></author>
       <author><first>Oliver</first><last>Lemon</last></author>
       <author><first>Phil</first><last>Bartie</last></author>
       <author><first>Tiphaine</first><last>Dalmas</last></author>
       <author><first>Anna</first><last>Dickinson</last></author>
       <author><first>Xingkun</first><last>Liu</last></author>
       <author><first>William</first><last>Mackaness</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <pages>1660–1668</pages>
       <url hash="5017f57d">P13-1163</url>
       <bibkey>janarthanam-etal-2013-evaluating</bibkey>
@@ -1519,8 +1519,8 @@
       <author><first>Svitlana</first><last>Volkova</last></author>
       <author><first>Pallavi</first><last>Choudhury</last></author>
       <author><first>Chris</first><last>Quirk</last></author>
-      <author><first>Bill</first><last>Dolan</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="william-b-dolan"><first>Bill</first><last>Dolan</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>1669–1679</pages>
       <url hash="ac821d40">P13-1164</url>
       <bibkey>volkova-etal-2013-lightly</bibkey>
@@ -1577,7 +1577,7 @@
     </paper>
     <paper id="170">
       <title>Why-Question Answering using Intra- and Inter-Sentential Causal Relations</title>
-      <author><first>Jong-Hoon</first><last>Oh</last></author>
+      <author id="jong-hoon-oh"><first>Jong-Hoon</first><last>Oh</last></author>
       <author><first>Kentaro</first><last>Torisawa</last></author>
       <author><first>Chikara</first><last>Hashimoto</last></author>
       <author><first>Motoki</first><last>Sano</last></author>
@@ -1589,9 +1589,9 @@
     </paper>
     <paper id="171">
       <title>Question Answering Using Enhanced Lexical Semantic Models</title>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <author><first>Ming-Wei</first><last>Chang</last></author>
-      <author><first>Christopher</first><last>Meek</last></author>
+      <author id="christopher-meek"><first>Christopher</first><last>Meek</last></author>
       <author><first>Andrzej</first><last>Pastusiak</last></author>
       <pages>1744–1753</pages>
       <url hash="4335eb2c">P13-1171</url>
@@ -1633,9 +1633,9 @@
     <meta>
       <booktitle>Proceedings of the 51st Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)</booktitle>
       <url hash="524835d9">P13-2</url>
-      <editor><first>Hinrich</first><last>Schuetze</last></editor>
+      <editor id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></editor>
       <editor><first>Pascale</first><last>Fung</last></editor>
-      <editor><first>Massimo</first><last>Poesio</last></editor>
+      <editor id="massimo-poesio"><first>Massimo</first><last>Poesio</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Sofia, Bulgaria</address>
       <month>August</month>
@@ -1664,9 +1664,9 @@
     </paper>
     <paper id="3">
       <title>A Tale about <fixed-case>PRO</fixed-case> and Monsters</title>
-      <author><first>Preslav</first><last>Nakov</last></author>
-      <author><first>Francisco</first><last>Guzmán</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzmán</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>12–17</pages>
       <url hash="ccbe2f01">P13-2003</url>
       <bibkey>nakov-etal-2013-tale</bibkey>
@@ -1706,7 +1706,7 @@
     <paper id="7">
       <title>Natural Language Models for Predicting Programming Comments</title>
       <author><first>Dana</first><last>Movshovitz-Attias</last></author>
-      <author><first>William W.</first><last>Cohen</last></author>
+      <author id="william-cohen"><first>William W.</first><last>Cohen</last></author>
       <pages>35–40</pages>
       <url hash="1bebf2f0">P13-2007</url>
       <bibkey>movshovitz-attias-cohen-2013-natural</bibkey>
@@ -1732,10 +1732,10 @@
     </paper>
     <paper id="10">
       <title>A relatedness benchmark to test the role of determiners in compositional distributional semantics</title>
-      <author><first>Raffaella</first><last>Bernardi</last></author>
-      <author><first>Georgiana</first><last>Dinu</last></author>
+      <author id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last></author>
+      <author id="georgiana-dinu"><first>Georgiana</first><last>Dinu</last></author>
       <author><first>Marco</first><last>Marelli</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <pages>53–57</pages>
       <url hash="4d0727c5">P13-2010</url>
       <bibkey>bernardi-etal-2013-relatedness</bibkey>
@@ -1748,7 +1748,7 @@
       <author><first>Binyang</first><last>Li</last></author>
       <author><first>Lanjun</first><last>Zhou</last></author>
       <author><first>Yulan</first><last>He</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <pages>58–62</pages>
       <url hash="e4e7014e">P13-2011</url>
       <bibkey>wei-etal-2013-empirical</bibkey>
@@ -1773,7 +1773,7 @@
     <paper id="13">
       <title>Aggregated Word Pair Features for Implicit Discourse Relation Disambiguation</title>
       <author><first>Or</first><last>Biran</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>69–73</pages>
       <url hash="d81a4908">P13-2013</url>
       <bibkey>biran-mckeown-2013-aggregated</bibkey>
@@ -1782,7 +1782,7 @@
       <title>Implicatures and Nested Beliefs in Approximate Decentralized-<fixed-case>POMDP</fixed-case>s</title>
       <author><first>Adam</first><last>Vogel</last></author>
       <author><first>Christopher</first><last>Potts</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <pages>74–80</pages>
       <url hash="e30e6dc3">P13-2014</url>
       <bibkey>vogel-etal-2013-implicatures</bibkey>
@@ -1790,7 +1790,7 @@
     <paper id="15">
       <title>Domain-Specific Coreference Resolution with Lexicalized Features</title>
       <author><first>Nathan</first><last>Gilbert</last></author>
-      <author><first>Ellen</first><last>Riloff</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
       <pages>81–86</pages>
       <url hash="5c890a5b">P13-2015</url>
       <bibkey>gilbert-riloff-2013-domain</bibkey>
@@ -1812,12 +1812,12 @@
       <author><first>Yoav</first><last>Goldberg</last></author>
       <author><first>Dipanjan</first><last>Das</last></author>
       <author><first>Kuzman</first><last>Ganchev</last></author>
-      <author><first>Keith</first><last>Hall</last></author>
+      <author id="keith-hall"><first>Keith</first><last>Hall</last></author>
       <author><first>Slav</first><last>Petrov</last></author>
       <author><first>Hao</first><last>Zhang</last></author>
       <author><first>Oscar</first><last>Täckström</last></author>
       <author><first>Claudia</first><last>Bedini</last></author>
-      <author><first>Núria</first><last>Bertomeu Castelló</last></author>
+      <author id="nuria-bertomeu"><first>Núria</first><last>Bertomeu Castelló</last></author>
       <author><first>Jungmee</first><last>Lee</last></author>
       <pages>92–97</pages>
       <url hash="6502fbd8">P13-2017</url>
@@ -1828,7 +1828,7 @@
       <title>An Empirical Examination of Challenges in <fixed-case>C</fixed-case>hinese Parsing</title>
       <author><first>Jonathan K.</first><last>Kummerfeld</last></author>
       <author><first>Daniel</first><last>Tse</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <author><first>Dan</first><last>Klein</last></author>
       <pages>98–103</pages>
       <url hash="71dda972">P13-2018</url>
@@ -1856,7 +1856,7 @@
       <title>Arguments and Modifiers from the Learner’s Perspective</title>
       <author><first>Leon</first><last>Bergen</last></author>
       <author><first>Edward</first><last>Gibson</last></author>
-      <author><first>Timothy J.</first><last>O’Donnell</last></author>
+      <author id="timothy-odonnell"><first>Timothy J.</first><last>O’Donnell</last></author>
       <pages>115–119</pages>
       <url hash="9c7b2fff">P13-2021</url>
       <bibkey>bergen-etal-2013-arguments</bibkey>
@@ -1865,7 +1865,7 @@
       <title>Benefactive/Malefactive Event and Writer Attitude Annotation</title>
       <author><first>Lingjia</first><last>Deng</last></author>
       <author><first>Yoonjung</first><last>Choi</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <pages>120–125</pages>
       <url hash="0c41e3da">P13-2022</url>
       <bibkey>deng-etal-2013-benefactive</bibkey>
@@ -1880,9 +1880,9 @@
     </paper>
     <paper id="24">
       <title>A Decade of Automatic Content Evaluation of News Summaries: Reassessing the State of the Art</title>
-      <author><first>Peter A.</first><last>Rankel</last></author>
-      <author><first>John M.</first><last>Conroy</last></author>
-      <author><first>Hoa Trang</first><last>Dang</last></author>
+      <author id="peter-a-rankel"><first>Peter A.</first><last>Rankel</last></author>
+      <author id="john-conroy"><first>John M.</first><last>Conroy</last></author>
+      <author id="hoa-trang-dang"><first>Hoa Trang</first><last>Dang</last></author>
       <author><first>Ani</first><last>Nenkova</last></author>
       <pages>131–136</pages>
       <url hash="553b9882">P13-2024</url>
@@ -1897,7 +1897,7 @@
     </paper>
     <paper id="26">
       <title>Automated Pyramid Scoring of Summaries using Distributional Semantics</title>
-      <author><first>Rebecca J.</first><last>Passonneau</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca J.</first><last>Passonneau</last></author>
       <author><first>Emily</first><last>Chen</last></author>
       <author><first>Weiwei</first><last>Guo</last></author>
       <author><first>Dolores</first><last>Perin</last></author>
@@ -1909,15 +1909,15 @@
       <title>Are Semantically Coherent Topic Models Useful for Ad Hoc Information Retrieval?</title>
       <author><first>Romain</first><last>Deveaud</last></author>
       <author><first>Eric</first><last>SanJuan</last></author>
-      <author><first>Patrice</first><last>Bellot</last></author>
+      <author id="patrice-bellot"><first>Patrice</first><last>Bellot</last></author>
       <pages>148–152</pages>
       <url hash="bfcf7a60">P13-2027</url>
       <bibkey>deveaud-etal-2013-semantically</bibkey>
     </paper>
     <paper id="28">
       <title>Post-Retrieval Clustering Using Third-Order Similarity Measures</title>
-      <author><first>José G.</first><last>Moreno</last></author>
-      <author><first>Gaël</first><last>Dias</last></author>
+      <author id="jose-g-moreno"><first>José G.</first><last>Moreno</last></author>
+      <author id="gael-dias"><first>Gaël</first><last>Dias</last></author>
       <author><first>Guillaume</first><last>Cleuziou</last></author>
       <pages>153–158</pages>
       <url hash="49ee7164">P13-2028</url>
@@ -1984,7 +1984,7 @@
       <author><first>Naresh Kumar</first><last>Elluru</last></author>
       <author><first>Anandaswarup</first><last>Vadapalli</last></author>
       <author><first>Raghavendra</first><last>Elluru</last></author>
-      <author><first>Hema</first><last>Murthy</last></author>
+      <author id="hema-a-murthy"><first>Hema</first><last>Murthy</last></author>
       <author><first>Kishore</first><last>Prahallad</last></author>
       <pages>196–200</pages>
       <url hash="a6dc7870">P13-2035</url>
@@ -2021,7 +2021,7 @@
     <paper id="39">
       <title><fixed-case>T</fixed-case>opic<fixed-case>S</fixed-case>pam: a Topic-Model based approach for spam detection</title>
       <author><first>Jiwei</first><last>Li</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <author><first>Sujian</first><last>Li</last></author>
       <pages>217–221</pages>
       <url hash="12c8b11b">P13-2039</url>
@@ -2037,7 +2037,7 @@
     </paper>
     <paper id="41">
       <title>Unsupervised joke generation from big data</title>
-      <author><first>Saša</first><last>Petrović</last></author>
+      <author id="sasa-petrovic"><first>Saša</first><last>Petrović</last></author>
       <author><first>David</first><last>Matthews</last></author>
       <pages>228–232</pages>
       <url hash="1c8081fc">P13-2041</url>
@@ -2046,7 +2046,7 @@
     <paper id="42">
       <title>Modeling of term-distance and term-occurrence information for improving n-gram language model performance</title>
       <author><first>Tze Yuang</first><last>Chong</last></author>
-      <author><first>Rafael E.</first><last>Banchs</last></author>
+      <author id="rafael-e-banchs"><first>Rafael E.</first><last>Banchs</last></author>
       <author><first>Eng Siong</first><last>Chng</last></author>
       <author><first>Haizhou</first><last>Li</last></author>
       <pages>233–237</pages>
@@ -2076,7 +2076,7 @@
       <title>Random Walk Factoid Annotation for Collective Discourse</title>
       <author><first>Ben</first><last>King</last></author>
       <author><first>Rahul</first><last>Jha</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <author><first>Robert</first><last>Mankoff</last></author>
       <pages>249–254</pages>
       <url hash="2f54de54">P13-2045</url>
@@ -2085,8 +2085,8 @@
     <paper id="46">
       <title>Identifying <fixed-case>E</fixed-case>nglish and <fixed-case>H</fixed-case>ungarian Light Verb Constructions: A Contrastive Approach</title>
       <author><first>Veronika</first><last>Vincze</last></author>
-      <author><first>István</first><last>Nagy T.</last></author>
-      <author><first>Richárd</first><last>Farkas</last></author>
+      <author id="istvan-nagy-t"><first>István</first><last>Nagy T.</last></author>
+      <author id="richard-farkas"><first>Richárd</first><last>Farkas</last></author>
       <pages>255–261</pages>
       <url hash="b4e034c7">P13-2046</url>
       <bibkey>vincze-etal-2013-identifying</bibkey>
@@ -2105,7 +2105,7 @@
       <title><fixed-case>I</fixed-case>ndo<fixed-case>N</fixed-case>et: A Multilingual Lexical Knowledge Network for <fixed-case>I</fixed-case>ndian Languages</title>
       <author><first>Brijesh</first><last>Bhatt</last></author>
       <author><first>Lahari</first><last>Poddar</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>268–272</pages>
       <url hash="128cfd68">P13-2048</url>
       <bibkey>bhatt-etal-2013-indonet</bibkey>
@@ -2145,7 +2145,7 @@
       <author><first>Anna Lisa</first><last>Gentile</last></author>
       <author><first>Isabelle</first><last>Augenstein</last></author>
       <author><first>Eva</first><last>Blomqvist</last></author>
-      <author><first>Fabio</first><last>Ciravegna</last></author>
+      <author id="fabio-ciravegna"><first>Fabio</first><last>Ciravegna</last></author>
       <pages>289–293</pages>
       <url hash="e5e91e48">P13-2052</url>
       <bibkey>zhang-etal-2013-mining</bibkey>
@@ -2156,14 +2156,14 @@
       <author><first>Lay-Ki</first><last>Soon</last></author>
       <author><first>Tek Yong</first><last>Lim</last></author>
       <author><first>Enya Kong</first><last>Tang</last></author>
-      <author><first>Bali</first><last>Ranaivo-Malançon</last></author>
+      <author id="bali-ranaivo-malancon"><first>Bali</first><last>Ranaivo-Malançon</last></author>
       <pages>294–299</pages>
       <url hash="8d0a5c9f">P13-2053</url>
       <bibkey>lim-etal-2013-context</bibkey>
     </paper>
     <paper id="54">
       <title><fixed-case>S</fixed-case>orani <fixed-case>K</fixed-case>urdish versus <fixed-case>K</fixed-case>urmanji <fixed-case>K</fixed-case>urdish: An Empirical Comparison</title>
-      <author><first>Kyumars</first><last>Sheykh Esmaili</last></author>
+      <author id="kyumars-sheykh-esmaili"><first>Kyumars</first><last>Sheykh Esmaili</last></author>
       <author><first>Shahin</first><last>Salavati</last></author>
       <pages>300–305</pages>
       <url hash="323ddbfd">P13-2054</url>
@@ -2173,7 +2173,7 @@
       <title>Enhanced and Portable Dependency Projection Algorithms Using Interlinear Glossed Text</title>
       <author><first>Ryan</first><last>Georgi</last></author>
       <author><first>Fei</first><last>Xia</last></author>
-      <author><first>William D.</first><last>Lewis</last></author>
+      <author id="william-lewis"><first>William D.</first><last>Lewis</last></author>
       <pages>306–311</pages>
       <url hash="d0178e6a">P13-2055</url>
       <bibkey>georgi-etal-2013-enhanced</bibkey>
@@ -2181,7 +2181,7 @@
     <paper id="56">
       <title>Cross-lingual Projections between Languages from Different Families</title>
       <author><first>Mo</first><last>Yu</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <author><first>Yalong</first><last>Bai</last></author>
       <author><first>Hao</first><last>Tian</last></author>
       <author><first>Dianhai</first><last>Yu</last></author>
@@ -2209,8 +2209,8 @@
     </paper>
     <paper id="59">
       <title>Sign Language Lexical Recognition With Propositional Dynamic Logic</title>
-      <author><first>Arturo</first><last>Curiel</last></author>
-      <author><first>Christophe</first><last>Collet</last></author>
+      <author id="arturo-curiel"><first>Arturo</first><last>Curiel</last></author>
+      <author id="christophe-collet"><first>Christophe</first><last>Collet</last></author>
       <pages>328–333</pages>
       <url hash="0b3f6e58">P13-2059</url>
       <bibkey>curiel-collet-2013-sign</bibkey>
@@ -2237,7 +2237,7 @@
     <paper id="62">
       <title>Automatically Predicting Sentence Translation Difficulty</title>
       <author><first>Abhijit</first><last>Mishra</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <author><first>Michael</first><last>Carl</last></author>
       <pages>346–351</pages>
       <url hash="db2cca90">P13-2062</url>
@@ -2265,7 +2265,7 @@
     <paper id="65">
       <title>Stem Translation with Affix-Based Rule Selection for Agglutinative Languages</title>
       <author><first>Zhiyang</first><last>Wang</last></author>
-      <author><first>Yajuan</first><last>Lü</last></author>
+      <author id="yajuan-lu"><first>Yajuan</first><last>Lü</last></author>
       <author><first>Meng</first><last>Sun</last></author>
       <author><first>Qun</first><last>Liu</last></author>
       <pages>364–369</pages>
@@ -2276,7 +2276,7 @@
       <title>A Novel Translation Framework Based on <fixed-case>R</fixed-case>hetorical <fixed-case>S</fixed-case>tructure <fixed-case>T</fixed-case>heory</title>
       <author><first>Mei</first><last>Tu</last></author>
       <author><first>Yu</first><last>Zhou</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>370–374</pages>
       <url hash="f0f0e9a4">P13-2066</url>
       <bibkey>tu-etal-2013-novel</bibkey>
@@ -2294,9 +2294,9 @@
     <paper id="68">
       <title>Bilingual Lexical Cohesion Trigger Model for Document-Level Machine Translation</title>
       <author><first>Guosheng</first><last>Ben</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Zhiyang</first><last>Teng</last></author>
-      <author><first>Yajuan</first><last>Lü</last></author>
+      <author id="yajuan-lu"><first>Yajuan</first><last>Lü</last></author>
       <author><first>Qun</first><last>Liu</last></author>
       <pages>382–386</pages>
       <url hash="0030cc5e">P13-2068</url>
@@ -2313,7 +2313,7 @@
     <paper id="70">
       <title>A Tightly-coupled Unsupervised Clustering and Bilingual Alignment Model for Transliteration</title>
       <author><first>Tingting</first><last>Li</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <author><first>Andrew</first><last>Finch</last></author>
       <author><first>Chunyue</first><last>Zhang</last></author>
       <pages>393–398</pages>
@@ -2323,7 +2323,7 @@
     <paper id="71">
       <title>Can <fixed-case>M</fixed-case>arkov Models Over Minimal Translation Units Help Phrase-Based <fixed-case>SMT</fixed-case>?</title>
       <author><first>Nadir</first><last>Durrani</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <author><first>Helmut</first><last>Schmid</last></author>
       <author><first>Hieu</first><last>Hoang</last></author>
       <author><first>Philipp</first><last>Koehn</last></author>
@@ -2334,7 +2334,7 @@
     <paper id="72">
       <title>Learning Non-linear Features for Machine Translation Using Gradient Boosting Machines</title>
       <author><first>Kristina</first><last>Toutanova</last></author>
-      <author><first>Byung-Gyu</first><last>Ahn</last></author>
+      <author id="byung-gyu-ahn"><first>Byung-Gyu</first><last>Ahn</last></author>
       <pages>406–411</pages>
       <url hash="9387d2aa">P13-2072</url>
       <bibkey>toutanova-ahn-2013-learning</bibkey>
@@ -2378,14 +2378,14 @@
       <title>Latent Semantic Tensor Indexing for Community-based Question Answering</title>
       <author><first>Xipeng</first><last>Qiu</last></author>
       <author><first>Le</first><last>Tian</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>434–439</pages>
       <url hash="9d8420c2">P13-2077</url>
       <bibkey>qiu-etal-2013-latent</bibkey>
     </paper>
     <paper id="78">
       <title>Measuring semantic content in distributional vectors</title>
-      <author><first>Aurélie</first><last>Herbelot</last></author>
+      <author id="aurelie-herbelot"><first>Aurélie</first><last>Herbelot</last></author>
       <author><first>Mohan</first><last>Ganesalingam</last></author>
       <pages>440–445</pages>
       <url hash="d0079b22">P13-2078</url>
@@ -2413,7 +2413,7 @@
     <paper id="81">
       <title>Sentence Level Dialect Identification in <fixed-case>A</fixed-case>rabic</title>
       <author><first>Heba</first><last>Elfardy</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>456–461</pages>
       <url hash="2c5c4771">P13-2081</url>
       <bibkey>elfardy-diab-2013-sentence</bibkey>
@@ -2433,7 +2433,7 @@
       <author><first>Huiying</first><last>Li</last></author>
       <author><first>Mrinmaya</first><last>Sachan</last></author>
       <author><first>Shashank</first><last>Srivastava</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>467–473</pages>
       <url hash="43bd7a52">P13-2083</url>
       <bibkey>goyal-etal-2013-structured-distributional</bibkey>
@@ -2484,7 +2484,7 @@
       <title>Generating Recommendation Dialogs by Extracting Information from User Reviews</title>
       <author><first>Kevin</first><last>Reschke</last></author>
       <author><first>Adam</first><last>Vogel</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <pages>499–504</pages>
       <url hash="cc14d04f">P13-2089</url>
       <bibkey>reschke-etal-2013-generating</bibkey>
@@ -2502,7 +2502,7 @@
       <title>Joint Modeling of News Reader’s and Comment Writer’s Emotions</title>
       <author><first>Huanhuan</first><last>Liu</last></author>
       <author><first>Shoushan</first><last>Li</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <author><first>Peifeng</first><last>Li</last></author>
       <pages>511–515</pages>
@@ -2511,8 +2511,8 @@
     </paper>
     <paper id="92">
       <title>An annotated corpus of quoted opinions in news articles</title>
-      <author><first>Tim</first><last>O’Keefe</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="tim-okeefe"><first>Tim</first><last>O’Keefe</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <author><first>Peter</first><last>Ashwell</last></author>
       <author><first>Irena</first><last>Koprinska</last></author>
       <pages>516–520</pages>
@@ -2525,7 +2525,7 @@
       <author><first>Tao</first><last>Wang</last></author>
       <author><first>Xuelei</first><last>Hu</last></author>
       <author><first>Shoushan</first><last>Li</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>521–525</pages>
       <url hash="74323100">P13-2093</url>
       <bibkey>xia-etal-2013-dual</bibkey>
@@ -2539,8 +2539,8 @@
     </paper>
     <paper id="95">
       <title>Extracting Definitions and Hypernym Relations relying on Syntactic Dependencies and Support Vector Machines</title>
-      <author><first>Guido</first><last>Boella</last></author>
-      <author><first>Luigi</first><last>Di Caro</last></author>
+      <author id="guido-boella"><first>Guido</first><last>Boella</last></author>
+      <author id="luigi-di-caro"><first>Luigi</first><last>Di Caro</last></author>
       <pages>532–537</pages>
       <url hash="c4350cbf">P13-2095</url>
       <bibkey>boella-di-caro-2013-extracting</bibkey>
@@ -2549,16 +2549,16 @@
       <title>Neighbors Help: Bilingual Unsupervised <fixed-case>WSD</fixed-case> Using Context</title>
       <author><first>Sudha</first><last>Bhingardive</last></author>
       <author><first>Samiulla</first><last>Shaikh</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>538–542</pages>
       <url hash="2d255e9c">P13-2096</url>
       <bibkey>bhingardive-etal-2013-neighbors</bibkey>
     </paper>
     <paper id="97">
       <title>Reducing Annotation Effort for Quality Estimation via Active Learning</title>
-      <author><first>Daniel</first><last>Beck</last></author>
+      <author id="daniel-beck"><first>Daniel</first><last>Beck</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>543–548</pages>
       <url hash="4d0a671e">P13-2097</url>
       <bibkey>beck-etal-2013-reducing</bibkey>
@@ -2567,10 +2567,10 @@
       <title>Reranking with Linguistic and Semantic Features for <fixed-case>A</fixed-case>rabic Optical Character Recognition</title>
       <author><first>Nadi</first><last>Tomeh</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
-      <author><first>Ryan</first><last>Roth</last></author>
+      <author id="ryan-roth"><first>Ryan</first><last>Roth</last></author>
       <author><first>Noura</first><last>Farra</last></author>
       <author><first>Pradeep</first><last>Dasigi</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>549–555</pages>
       <url hash="7187ff32">P13-2098</url>
       <bibkey>tomeh-etal-2013-reranking</bibkey>
@@ -2604,7 +2604,7 @@
       <title>A System for Summarizing Scientific Topics Starting from Keywords</title>
       <author><first>Rahul</first><last>Jha</last></author>
       <author><first>Amjad</first><last>Abu-Jbara</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <pages>572–577</pages>
       <url hash="9e6a3d7f">P13-2102</url>
       <bibkey>jha-etal-2013-system</bibkey>
@@ -2628,7 +2628,7 @@
       <title>Iterative Transformation of Annotation Guidelines for Constituency Parsing</title>
       <author><first>Xiang</first><last>Li</last></author>
       <author><first>Wenbin</first><last>Jiang</last></author>
-      <author><first>Yajuan</first><last>Lü</last></author>
+      <author id="yajuan-lu"><first>Yajuan</first><last>Lü</last></author>
       <author><first>Qun</first><last>Liu</last></author>
       <pages>591–596</pages>
       <url hash="ab102e90">P13-2105</url>
@@ -2637,7 +2637,7 @@
     <paper id="106">
       <title>Nonparametric <fixed-case>B</fixed-case>ayesian Inference and Efficient Parsing for Tree-adjoining Grammars</title>
       <author><first>Elif</first><last>Yamangil</last></author>
-      <author><first>Stuart M.</first><last>Shieber</last></author>
+      <author id="stuart-m-shieber"><first>Stuart M.</first><last>Shieber</last></author>
       <pages>597–603</pages>
       <url hash="9eb17ab5">P13-2106</url>
       <bibkey>yamangil-shieber-2013-nonparametric</bibkey>
@@ -2646,7 +2646,7 @@
       <title>Using <fixed-case>CCG</fixed-case> categories to improve <fixed-case>H</fixed-case>indi dependency parsing</title>
       <author><first>Bharat Ram</first><last>Ambati</last></author>
       <author><first>Tejaswini</first><last>Deoskar</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>604–609</pages>
       <url hash="0a4b0971">P13-2107</url>
       <bibkey>ambati-etal-2013-using</bibkey>
@@ -2654,16 +2654,16 @@
     <paper id="108">
       <title>The Effect of Higher-Order Dependency Features in Discriminative Phrase-Structure Parsing</title>
       <author><first>Greg</first><last>Coppola</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>610–616</pages>
       <url hash="ecbcba61">P13-2108</url>
       <bibkey>coppola-steedman-2013-effect</bibkey>
     </paper>
     <paper id="109">
       <title>Turning on the Turbo: Fast Third-Order Non-Projective Turbo Parsers</title>
-      <author><first>André</first><last>Martins</last></author>
-      <author><first>Miguel</first><last>Almeida</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="andre-f-t-martins"><first>André</first><last>Martins</last></author>
+      <author id="miguel-b-almeida"><first>Miguel</first><last>Almeida</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>617–622</pages>
       <url hash="f8183a41">P13-2109</url>
       <bibkey>martins-etal-2013-turning</bibkey>
@@ -2671,7 +2671,7 @@
     <paper id="110">
       <title>A Lattice-based Framework for Joint <fixed-case>C</fixed-case>hinese Word Segmentation, <fixed-case>POS</fixed-case> Tagging and Parsing</title>
       <author><first>Zhiguo</first><last>Wang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <author><first>Nianwen</first><last>Xue</last></author>
       <pages>623–627</pages>
       <url hash="7f7b039d">P13-2110</url>
@@ -2688,7 +2688,7 @@
     </paper>
     <paper id="112">
       <title>Simpler unsupervised <fixed-case>POS</fixed-case> tagging with bilingual projections</title>
-      <author><first>Long</first><last>Duong</last></author>
+      <author id="long-duong"><first>Long</first><last>Duong</last></author>
       <author><first>Paul</first><last>Cook</last></author>
       <author><first>Steven</first><last>Bird</last></author>
       <author><first>Pavel</first><last>Pecina</last></author>
@@ -2698,15 +2698,15 @@
     </paper>
     <paper id="113">
       <title>Part-of-speech tagging with antagonistic adversaries</title>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>640–644</pages>
       <url hash="c9a318e8">P13-2113</url>
       <bibkey>sogaard-2013-part</bibkey>
     </paper>
     <paper id="114">
       <title>Temporal Signals Help Label Temporal Relations</title>
-      <author><first>Leon</first><last>Derczynski</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <pages>645–650</pages>
       <url hash="735649a4">P13-2114</url>
       <bibkey>derczynski-gaizauskas-2013-temporal</bibkey>
@@ -2714,7 +2714,7 @@
     <paper id="115">
       <title>Diverse Keyword Extraction from Conversations</title>
       <author><first>Maryam</first><last>Habibi</last></author>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <pages>651–657</pages>
       <url hash="3fa0ef4f">P13-2115</url>
       <bibkey>habibi-popescu-belis-2013-diverse</bibkey>
@@ -2731,9 +2731,9 @@
     <paper id="117">
       <title>Filling Knowledge Base Gaps for Distant Supervision of Relation Extraction</title>
       <author><first>Wei</first><last>Xu</last></author>
-      <author><first>Raphael</first><last>Hoffmann</last></author>
+      <author id="raphael-hoffmann"><first>Raphael</first><last>Hoffmann</last></author>
       <author><first>Le</first><last>Zhao</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <pages>665–670</pages>
       <url hash="2d3cbe54">P13-2117</url>
       <bibkey>xu-etal-2013-filling</bibkey>
@@ -2741,7 +2741,7 @@
     <paper id="118">
       <title>Joint Apposition Extraction with Syntactic and Semantic Constraints</title>
       <author><first>Will</first><last>Radford</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <pages>671–677</pages>
       <url hash="7e7734c0">P13-2118</url>
       <bibkey>radford-curran-2013-joint</bibkey>
@@ -2758,7 +2758,7 @@
     </paper>
     <paper id="120">
       <title>Mapping Source to Target Strings without Alignment by Analogical Learning: A Case Study with Transliteration</title>
-      <author><first>Phillippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Phillippe</first><last>Langlais</last></author>
       <pages>684–689</pages>
       <url hash="1012b3e5">P13-2120</url>
       <bibkey>langlais-2013-mapping</bibkey>
@@ -2767,7 +2767,7 @@
       <title>Scalable Modified <fixed-case>K</fixed-case>neser-<fixed-case>N</fixed-case>ey Language Model Estimation</title>
       <author><first>Kenneth</first><last>Heafield</last></author>
       <author><first>Ivan</first><last>Pouzyrevsky</last></author>
-      <author><first>Jonathan H.</first><last>Clark</last></author>
+      <author id="jonathan-h-clark"><first>Jonathan H.</first><last>Clark</last></author>
       <author><first>Philipp</first><last>Koehn</last></author>
       <pages>690–696</pages>
       <url hash="ce961214">P13-2121</url>
@@ -2776,9 +2776,9 @@
     <paper id="122">
       <title>Incremental Topic-Based Translation Model Adaptation for Conversational Spoken Language Translation</title>
       <author><first>Sanjika</first><last>Hewavitharana</last></author>
-      <author><first>Dennis</first><last>Mehay</last></author>
+      <author id="dennis-mehay"><first>Dennis</first><last>Mehay</last></author>
       <author><first>Sankaranarayanan</first><last>Ananthakrishnan</last></author>
-      <author><first>Prem</first><last>Natarajan</last></author>
+      <author id="prem-natarajan"><first>Prem</first><last>Natarajan</last></author>
       <pages>697–701</pages>
       <url hash="e877ec30">P13-2122</url>
       <bibkey>hewavitharana-etal-2013-incremental</bibkey>
@@ -2797,7 +2797,7 @@
       <title>A Learner Corpus-based Approach to Verb Suggestion for <fixed-case>ESL</fixed-case></title>
       <author><first>Yu</first><last>Sawai</last></author>
       <author><first>Mamoru</first><last>Komachi</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>708–713</pages>
       <url hash="35fcd246">P13-2124</url>
       <bibkey>sawai-etal-2013-learner</bibkey>
@@ -2822,18 +2822,18 @@
     </paper>
     <paper id="127">
       <title>Annotation of regular polysemy and underspecification</title>
-      <author><first>Héctor</first><last>Martínez Alonso</last></author>
-      <author><first>Bolette</first><last>Sandford Pedersen</last></author>
-      <author><first>Núria</first><last>Bel</last></author>
+      <author id="hector-martinez-alonso"><first>Héctor</first><last>Martínez Alonso</last></author>
+      <author id="bolette-sandford-pedersen"><first>Bolette</first><last>Sandford Pedersen</last></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last></author>
       <pages>725–730</pages>
       <url hash="558dd519">P13-2127</url>
       <bibkey>martinez-alonso-etal-2013-annotation</bibkey>
     </paper>
     <paper id="128">
       <title>Derivational Smoothing for Syntactic Distributional Semantics</title>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <author><first>Jan</first><last>Šnajder</last></author>
-      <author><first>Britta</first><last>Zeller</last></author>
+      <author id="britta-zeller"><first>Britta</first><last>Zeller</last></author>
       <pages>731–735</pages>
       <url hash="a5aa60df">P13-2128</url>
       <bibkey>pado-etal-2013-derivational</bibkey>
@@ -2841,7 +2841,7 @@
     <paper id="129">
       <title>Diathesis alternation approximation for verb clustering</title>
       <author><first>Lin</first><last>Sun</last></author>
-      <author><first>Diana</first><last>McCarthy</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
       <author><first>Anna</first><last>Korhonen</last></author>
       <pages>736–741</pages>
       <url hash="2af42762">P13-2129</url>
@@ -2850,7 +2850,7 @@
     <paper id="130">
       <title>Outsourcing <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et to the Crowd</title>
       <author><first>Marco</first><last>Fossati</last></author>
-      <author><first>Claudio</first><last>Giuliano</last></author>
+      <author id="claudio-giuliano"><first>Claudio</first><last>Giuliano</last></author>
       <author><first>Sara</first><last>Tonelli</last></author>
       <pages>742–747</pages>
       <url hash="525f25bc">P13-2130</url>
@@ -2877,7 +2877,7 @@
       <title>Context Vector Disambiguation for Bilingual Lexicon Extraction from Comparable Corpora</title>
       <author><first>Dhouha</first><last>Bouamor</last></author>
       <author><first>Nasredine</first><last>Semmar</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <pages>759–764</pages>
       <url hash="c9596b69">P13-2133</url>
       <bibkey>bouamor-etal-2013-context</bibkey>
@@ -2885,18 +2885,18 @@
     <paper id="134">
       <title>The Effects of Lexical Resource Quality on Preference Violation Detection</title>
       <author><first>Jesse</first><last>Dunietz</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
       <pages>765–770</pages>
       <url hash="4bed4893">P13-2134</url>
       <bibkey>dunietz-etal-2013-effects</bibkey>
     </paper>
     <paper id="135">
       <title>Exploiting Qualitative Information from Automatic Word Alignment for Cross-lingual <fixed-case>NLP</fixed-case> Tasks</title>
-      <author><first>José G.C.</first><last>de Souza</last></author>
-      <author><first>Miquel</first><last>Esplà-Gomis</last></author>
+      <author id="jose-g-c-de-souza"><first>José G.C.</first><last>de Souza</last></author>
+      <author id="miquel-espla-gomis"><first>Miquel</first><last>Esplà-Gomis</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <pages>771–776</pages>
       <url hash="a1a51456">P13-2135</url>
       <bibkey>de-souza-etal-2013-exploiting</bibkey>
@@ -2904,7 +2904,7 @@
     <paper id="136">
       <title>An Information Theoretic Approach to Bilingual Word Clustering</title>
       <author><first>Manaal</first><last>Faruqui</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <pages>777–783</pages>
       <url hash="ece6a04f">P13-2136</url>
       <bibkey>faruqui-dyer-2013-information</bibkey>
@@ -2912,8 +2912,8 @@
     <paper id="137">
       <title>Building and Evaluating a Distributional Memory for <fixed-case>C</fixed-case>roatian</title>
       <author><first>Jan</first><last>Šnajder</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
-      <author><first>Željko</first><last>Agić</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
+      <author id="zeljko-agic"><first>Željko</first><last>Agić</last></author>
       <pages>784–789</pages>
       <url hash="1378c829">P13-2137</url>
       <bibkey>snajder-etal-2013-building</bibkey>
@@ -2922,8 +2922,8 @@
       <title>Generalizing Image Captions for Image-Text Parallel Corpus</title>
       <author><first>Polina</first><last>Kuznetsova</last></author>
       <author><first>Vicente</first><last>Ordonez</last></author>
-      <author><first>Alexander</first><last>Berg</last></author>
-      <author><first>Tamara</first><last>Berg</last></author>
+      <author id="alexander-berg"><first>Alexander</first><last>Berg</last></author>
+      <author id="tamara-berg"><first>Tamara</first><last>Berg</last></author>
       <author><first>Yejin</first><last>Choi</last></author>
       <pages>790–796</pages>
       <url hash="79bd0c67">P13-2138</url>
@@ -2980,8 +2980,8 @@
       <title>Identifying Opinion Subgroups in <fixed-case>A</fixed-case>rabic Online Discussions</title>
       <author><first>Amjad</first><last>Abu-Jbara</last></author>
       <author><first>Ben</first><last>King</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <pages>829–835</pages>
       <url hash="abef009a">P13-2144</url>
       <bibkey>abu-jbara-etal-2013-identifying</bibkey>
@@ -2989,10 +2989,10 @@
     <paper id="145">
       <title>Extracting Events with Informal Temporal References in Personal Histories in Online Communities</title>
       <author><first>Miaomiao</first><last>Wen</last></author>
-      <author><first>Zeyu</first><last>Zheng</last></author>
+      <author id="ze-yu-zheng"><first>Zeyu</first><last>Zheng</last></author>
       <author><first>Hyeju</first><last>Jang</last></author>
       <author><first>Guang</first><last>Xiang</last></author>
-      <author><first>Carolyn</first><last>Penstein Rosé</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Penstein Rosé</last></author>
       <pages>836–842</pages>
       <url hash="b91f7030">P13-2145</url>
       <bibkey>wen-etal-2013-extracting</bibkey>
@@ -3000,7 +3000,7 @@
     <paper id="146">
       <title>Multimodal <fixed-case>DBN</fixed-case> for Predicting High-Quality Answers in c<fixed-case>QA</fixed-case> portals</title>
       <author><first>Haifeng</first><last>Hu</last></author>
-      <author><first>Bingquan</first><last>Liu</last></author>
+      <author id="bingquan-liu"><first>Bingquan</first><last>Liu</last></author>
       <author><first>Baoxun</first><last>Wang</last></author>
       <author><first>Ming</first><last>Liu</last></author>
       <author><first>Xiaolong</first><last>Wang</last></author>
@@ -3011,7 +3011,7 @@
     <paper id="147">
       <title>Bi-directional Inter-dependencies of Subjective Expressions and Targets and their Value for a Joint Model</title>
       <author><first>Roman</first><last>Klinger</last></author>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <pages>848–854</pages>
       <url hash="3bd8175c">P13-2147</url>
       <bibkey>klinger-cimiano-2013-bi</bibkey>
@@ -3019,7 +3019,7 @@
     <paper id="148">
       <title>Identifying Sentiment Words Using an Optimization-based Model without Seed Words</title>
       <author><first>Hongliang</first><last>Yu</last></author>
-      <author><first>Zhi-Hong</first><last>Deng</last></author>
+      <author id="zhi-hong-deng"><first>Zhi-Hong</first><last>Deng</last></author>
       <author><first>Shiyingxue</first><last>Li</last></author>
       <pages>855–859</pages>
       <url hash="1909f5bc">P13-2148</url>
@@ -3029,7 +3029,7 @@
       <title>Detecting Turnarounds in Sentiment Analysis: Thwarting</title>
       <author><first>Ankit</first><last>Ramteke</last></author>
       <author><first>Akshat</first><last>Malu</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <author><first>J. Saketha</first><last>Nath</last></author>
       <pages>860–865</pages>
       <url hash="bec4a832">P13-2149</url>
@@ -3053,7 +3053,7 @@
     </paper>
     <paper id="152">
       <title>Word surprisal predicts N400 amplitude during reading</title>
-      <author><first>Stefan L.</first><last>Frank</last></author>
+      <author id="stefan-l-frank"><first>Stefan L.</first><last>Frank</last></author>
       <author><first>Leun J.</first><last>Otten</last></author>
       <author><first>Giulia</first><last>Galli</last></author>
       <author><first>Gabriella</first><last>Vigliocco</last></author>
@@ -3064,7 +3064,7 @@
     <paper id="153">
       <title>Computerized Analysis of a Verbal Fluency Test</title>
       <author><first>James O.</first><last>Ryan</last></author>
-      <author><first>Serguei</first><last>Pakhomov</last></author>
+      <author id="serguei-pakhomov"><first>Serguei</first><last>Pakhomov</last></author>
       <author><first>Susan</first><last>Marino</last></author>
       <author><first>Charles</first><last>Bernick</last></author>
       <author><first>Sarah</first><last>Banks</last></author>
@@ -3089,9 +3089,9 @@
       <editor><first>Anik</first><last>Dey</last></editor>
       <editor><first>Sebastian</first><last>Krause</last></editor>
       <editor><first>Ivelina</first><last>Nikolova</last></editor>
-      <editor><first>Eva</first><last>Vecchi</last></editor>
+      <editor id="eva-maria-vecchi"><first>Eva</first><last>Vecchi</last></editor>
       <editor><first>Steven</first><last>Bethard</last></editor>
-      <editor><first>Preslav I.</first><last>Nakov</last></editor>
+      <editor id="preslav-nakov"><first>Preslav I.</first><last>Nakov</last></editor>
       <editor><first>Feiyu</first><last>Xu</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Sofia, Bulgaria</address>
@@ -3106,7 +3106,7 @@
     <paper id="1">
       <title>Categorization of <fixed-case>T</fixed-case>urkish News Documents with Morphological Analysis</title>
       <author><first>Burak Kerim</first><last>Akkuş</last></author>
-      <author><first>Ruket</first><last>Cakici</last></author>
+      <author id="ruket-cakici"><first>Ruket</first><last>Cakici</last></author>
       <pages>1–8</pages>
       <url hash="3f856032">P13-3001</url>
       <bibkey>akkus-cakici-2013-categorization</bibkey>
@@ -3136,7 +3136,7 @@
       <title>Survey on parsing three dependency representations for <fixed-case>E</fixed-case>nglish</title>
       <author><first>Angelina</first><last>Ivanova</last></author>
       <author><first>Stephan</first><last>Oepen</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <pages>31–37</pages>
       <url hash="60a550d7">P13-3005</url>
       <bibkey>ivanova-etal-2013-survey</bibkey>
@@ -3161,7 +3161,7 @@
       <title>Automated Collocation Suggestion for <fixed-case>J</fixed-case>apanese Second Language Learners</title>
       <author><first>Lis</first><last>Pereira</last></author>
       <author><first>Erlyn</first><last>Manguilimotan</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>52–58</pages>
       <url hash="a9dab496">P13-3008</url>
       <bibkey>pereira-etal-2013-automated</bibkey>
@@ -3175,7 +3175,7 @@
     </paper>
     <paper id="10">
       <title>Topic Modeling Based Classification of Clinical Reports</title>
-      <author><first>Efsun</first><last>Sarioglu</last></author>
+      <author id="efsun-sarioglu-kayi"><first>Efsun</first><last>Sarioglu</last></author>
       <author><first>Kabir</first><last>Yadav</last></author>
       <author><first>Hyeong-Ah</first><last>Choi</last></author>
       <pages>67–73</pages>
@@ -3223,7 +3223,7 @@
       <author><first>Sho</first><last>Takase</last></author>
       <author><first>Akiko</first><last>Murakami</last></author>
       <author><first>Miki</first><last>Enoki</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Kentaro</first><last>Inui</last></author>
       <pages>110–116</pages>
       <url hash="cd50da74">P13-3016</url>
@@ -3263,7 +3263,7 @@
     <paper id="21">
       <title>Simple, readable sub-sentences</title>
       <author><first>Sigrid</first><last>Klerke</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>142–149</pages>
       <url hash="ca19ead3">P13-3021</url>
       <bibkey>klerke-sogaard-2013-simple</bibkey>
@@ -3278,7 +3278,7 @@
     <paper id="23">
       <title>Robust multilingual statistical morphological generation models</title>
       <author><first>Ondřej</first><last>Dušek</last></author>
-      <author><first>Filip</first><last>Jurčíček</last></author>
+      <author id="filip-jurcicek"><first>Filip</first><last>Jurčíček</last></author>
       <pages>158–164</pages>
       <url hash="30d5e993">P13-3023</url>
       <bibkey>dusek-jurcicek-2013-robust</bibkey>
@@ -3322,7 +3322,7 @@
       <author><first>Seid Muhie</first><last>Yimam</last></author>
       <author><first>Iryna</first><last>Gurevych</last></author>
       <author><first>Richard</first><last>Eckart de Castilho</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>1–6</pages>
       <url hash="4f486fcb">P13-4001</url>
       <bibkey>yimam-etal-2013-webanno</bibkey>
@@ -3331,7 +3331,7 @@
       <title>A Stacking-based Approach to <fixed-case>T</fixed-case>witter User Geolocation Prediction</title>
       <author><first>Bo</first><last>Han</last></author>
       <author><first>Paul</first><last>Cook</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>7–12</pages>
       <url hash="6d187059">P13-4002</url>
       <bibkey>han-etal-2013-stacking</bibkey>
@@ -3339,7 +3339,7 @@
     <paper id="3">
       <title>An Open Source Toolkit for Quantitative Historical Linguistics</title>
       <author><first>Johann-Mattis</first><last>List</last></author>
-      <author><first>Steven</first><last>Moran</last></author>
+      <author id="steven-moran"><first>Steven</first><last>Moran</last></author>
       <pages>13–18</pages>
       <url hash="2ccce806">P13-4003</url>
       <bibkey>list-moran-2013-open</bibkey>
@@ -3347,9 +3347,9 @@
     <paper id="4">
       <title><fixed-case>A</fixed-case>nno<fixed-case>M</fixed-case>arket: An Open Cloud Platform for <fixed-case>NLP</fixed-case></title>
       <author><first>Valentin</first><last>Tablan</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
       <author><first>Ian</first><last>Roberts</last></author>
-      <author><first>Hamish</first><last>Cunningham</last></author>
+      <author id="hamish-cunningham"><first>Hamish</first><last>Cunningham</last></author>
       <author><first>Marin</first><last>Dimitrov</last></author>
       <pages>19–24</pages>
       <url hash="9755b66e">P13-4004</url>
@@ -3357,17 +3357,17 @@
     </paper>
     <paper id="5">
       <title>Detecting Event-Related Links and Sentiments from Social Media Texts</title>
-      <author><first>Alexandra</first><last>Balahur</last></author>
-      <author><first>Hristo</first><last>Tanev</last></author>
+      <author id="alexandra-balahur"><first>Alexandra</first><last>Balahur</last></author>
+      <author id="hristo-tanev"><first>Hristo</first><last>Tanev</last></author>
       <pages>25–30</pages>
       <url hash="b9a10dc1">P13-4005</url>
       <bibkey>balahur-tanev-2013-detecting</bibkey>
     </paper>
     <paper id="6">
       <title><fixed-case>DISSECT</fixed-case> - <fixed-case>DIS</fixed-case>tributional <fixed-case>SE</fixed-case>mantics Composition Toolkit</title>
-      <author><first>Georgiana</first><last>Dinu</last></author>
-      <author><first>Nghia The</first><last>Pham</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="georgiana-dinu"><first>Georgiana</first><last>Dinu</last></author>
+      <author id="nghia-the-pham"><first>Nghia The</first><last>Pham</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <pages>31–36</pages>
       <url hash="fba87741">P13-4006</url>
       <bibkey>dinu-etal-2013-dissect</bibkey>
@@ -3387,7 +3387,7 @@
       <title>Extending an interoperable platform to facilitate the creation of multilingual and multimodal <fixed-case>NLP</fixed-case> applications</title>
       <author><first>Georgios</first><last>Kontonatsios</last></author>
       <author><first>Paul</first><last>Thompson</last></author>
-      <author><first>Riza Theresa</first><last>Batista-Navarro</last></author>
+      <author id="riza-theresa-batista-navarro"><first>Riza Theresa</first><last>Batista-Navarro</last></author>
       <author><first>Claudiu</first><last>Mihăilă</last></author>
       <author><first>Ioannis</first><last>Korkontzelos</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
@@ -3399,7 +3399,7 @@
       <title><fixed-case>F</fixed-case>udan<fixed-case>NLP</fixed-case>: A Toolkit for <fixed-case>C</fixed-case>hinese Natural Language Processing</title>
       <author><first>Xipeng</first><last>Qiu</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>49–54</pages>
       <url hash="ed28ba19">P13-4009</url>
       <bibkey>qiu-etal-2013-fudannlp</bibkey>
@@ -3418,10 +3418,10 @@
     <paper id="11">
       <title>Meet <fixed-case>EDGAR</fixed-case>, a tutoring agent at <fixed-case>MONSERRATE</fixed-case></title>
       <author><first>Pedro</first><last>Fialho</last></author>
-      <author><first>Luísa</first><last>Coheur</last></author>
+      <author id="luisa-coheur"><first>Luísa</first><last>Coheur</last></author>
       <author><first>Sérgio</first><last>Curto</last></author>
       <author><first>Pedro</first><last>Cláudio</last></author>
-      <author><first>Ângela</first><last>Costa</last></author>
+      <author id="angela-costa"><first>Ângela</first><last>Costa</last></author>
       <author><first>Alberto</first><last>Abad</last></author>
       <author><first>Hugo</first><last>Meinedo</last></author>
       <author><first>Isabel</first><last>Trancoso</last></author>
@@ -3452,8 +3452,8 @@
       <title><fixed-case>Q</fixed-case>u<fixed-case>E</fixed-case>st - A translation quality estimation framework</title>
       <author><first>Lucia</first><last>Specia</last></author>
       <author><first>Kashif</first><last>Shah</last></author>
-      <author><first>Jose G.C.</first><last>de Souza</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="jose-g-c-de-souza"><first>Jose G.C.</first><last>de Souza</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>79–84</pages>
       <url hash="1a2e1845">P13-4014</url>
       <bibkey>specia-etal-2013-quest</bibkey>
@@ -3496,7 +3496,7 @@
     <paper id="19">
       <title>A Visual Analytics System for Cluster Exploration</title>
       <author><first>Andreas</first><last>Lamprecht</last></author>
-      <author><first>Annette</first><last>Hautli</last></author>
+      <author id="annette-hautli"><first>Annette</first><last>Hautli</last></author>
       <author><first>Christian</first><last>Rohrdantz</last></author>
       <author><first>Tina</first><last>Bögel</last></author>
       <pages>109–114</pages>
@@ -3546,10 +3546,10 @@
     <paper id="24">
       <title><fixed-case>L</fixed-case>inggle: a Web-scale Linguistic Search Engine for Words in Context</title>
       <author><first>Joanne</first><last>Boisson</last></author>
-      <author><first>Ting-Hui</first><last>Kao</last></author>
-      <author><first>Jian-Cheng</first><last>Wu</last></author>
+      <author id="ting-hui-kao"><first>Ting-Hui</first><last>Kao</last></author>
+      <author id="jian-cheng-wu"><first>Jian-Cheng</first><last>Wu</last></author>
       <author><first>Tzu-Hsi</first><last>Yen</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>139–144</pages>
       <url hash="7b7d9c43">P13-4024</url>
       <bibkey>boisson-etal-2013-linggle</bibkey>
@@ -3565,13 +3565,13 @@
     </paper>
     <paper id="26">
       <title><fixed-case>PATHS</fixed-case>: A System for Accessing Cultural Heritage Collections</title>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <author><first>Nikolaos</first><last>Aletras</last></author>
-      <author><first>Paul</first><last>Clough</last></author>
+      <author id="paul-clough"><first>Paul</first><last>Clough</last></author>
       <author><first>Samuel</first><last>Fernando</last></author>
       <author><first>Paula</first><last>Goodale</last></author>
-      <author><first>Mark</first><last>Hall</last></author>
-      <author><first>Aitor</first><last>Soroa</last></author>
+      <author id="mark-hall"><first>Mark</first><last>Hall</last></author>
+      <author id="aitor-soroa"><first>Aitor</first><last>Soroa</last></author>
       <author><first>Mark</first><last>Stevenson</last></author>
       <pages>151–156</pages>
       <url hash="f9992c0d">P13-4026</url>
@@ -3591,8 +3591,8 @@
       <author><first>Vasile</first><last>Rus</last></author>
       <author><first>Mihai</first><last>Lintean</last></author>
       <author><first>Rajendra</first><last>Banjade</last></author>
-      <author><first>Nobal</first><last>Niraula</last></author>
-      <author><first>Dan</first><last>Stefanescu</last></author>
+      <author id="nobal-bikram-niraula"><first>Nobal</first><last>Niraula</last></author>
+      <author id="dan-stefanescu"><first>Dan</first><last>Stefanescu</last></author>
       <pages>163–168</pages>
       <url hash="ddb82195">P13-4028</url>
       <bibkey>rus-etal-2013-semilar</bibkey>
@@ -3602,8 +3602,8 @@
       <author><first>Kapila</first><last>Ponnamperuma</last></author>
       <author><first>Advaith</first><last>Siddharthan</last></author>
       <author><first>Cheng</first><last>Zeng</last></author>
-      <author><first>Chris</first><last>Mellish</last></author>
-      <author><first>René</first><last>van der Wal</last></author>
+      <author id="chris-mellish"><first>Chris</first><last>Mellish</last></author>
+      <author id="rene-van-der-wal"><first>René</first><last>van der Wal</last></author>
       <pages>169–174</pages>
       <url hash="abffcece">P13-4029</url>
       <bibkey>ponnamperuma-etal-2013-tag2blog</bibkey>
@@ -3611,19 +3611,19 @@
     <paper id="30">
       <title><fixed-case>T</fixed-case>rans<fixed-case>D</fixed-case>oop: A Map-Reduce based Crowdsourced Translation for Complex Domain</title>
       <author><first>Anoop</first><last>Kunchukuttan</last></author>
-      <author><first>Rajen</first><last>Chatterjee</last></author>
+      <author id="rajen-chatterjee"><first>Rajen</first><last>Chatterjee</last></author>
       <author><first>Shourya</first><last>Roy</last></author>
       <author><first>Abhijit</first><last>Mishra</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>175–180</pages>
       <url hash="92a774f2">P13-4030</url>
       <bibkey>kunchukuttan-etal-2013-transdoop</bibkey>
     </paper>
     <paper id="31">
       <title>t<fixed-case>SEARCH</fixed-case>: Flexible and Fast Search over Automatic Translations for Improved Quality/Error Analysis</title>
-      <author><first>Meritxell</first><last>Gonzàlez</last></author>
+      <author id="meritxell-gonzalez"><first>Meritxell</first><last>Gonzàlez</last></author>
       <author><first>Laura</first><last>Mascarell</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <pages>181–186</pages>
       <url hash="ffaf8907">P13-4031</url>
       <bibkey>gonzalez-etal-2013-tsearch</bibkey>
@@ -3632,7 +3632,7 @@
       <title><fixed-case>VSEM</fixed-case>: An open library for visual semantics representation</title>
       <author><first>Elia</first><last>Bruni</last></author>
       <author><first>Ulisse</first><last>Bordignon</last></author>
-      <author><first>Adam</first><last>Liska</last></author>
+      <author id="adam-liska"><first>Adam</first><last>Liska</last></author>
       <author><first>Jasper</first><last>Uijlings</last></author>
       <author><first>Irina</first><last>Sergienya</last></author>
       <pages>187–192</pages>
@@ -3643,7 +3643,7 @@
       <title><fixed-case>D</fixed-case>ocent: A Document-Level Decoder for Phrase-Based Statistical Machine Translation</title>
       <author><first>Christian</first><last>Hardmeier</last></author>
       <author><first>Sara</first><last>Stymne</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <author><first>Joakim</first><last>Nivre</last></author>
       <pages>193–198</pages>
       <url hash="0e19f244">P13-4033</url>
@@ -3653,7 +3653,7 @@
       <title>Mr. <fixed-case>MIRA</fixed-case>: Open-Source Large-Margin Structured Learning on <fixed-case>M</fixed-case>ap<fixed-case>R</fixed-case>educe</title>
       <author><first>Vladimir</first><last>Eidelman</last></author>
       <author><first>Ke</first><last>Wu</last></author>
-      <author><first>Ferhan</first><last>Ture</last></author>
+      <author id="ferhan-ture"><first>Ferhan</first><last>Ture</last></author>
       <author><first>Philip</first><last>Resnik</last></author>
       <author><first>Jimmy</first><last>Lin</last></author>
       <pages>199–204</pages>
@@ -3666,7 +3666,7 @@
       <booktitle>Proceedings of the 51st Annual Meeting of the Association for Computational Linguistics (Tutorials)</booktitle>
       <url hash="fdba7fd9">P13-5</url>
       <editor><first>Johan</first><last>Bos</last></editor>
-      <editor><first>Keith</first><last>Hall</last></editor>
+      <editor id="keith-hall"><first>Keith</first><last>Hall</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Sofia, Bulgaria</address>
       <month>August</month>
@@ -3680,7 +3680,7 @@
     <paper id="1">
       <title>Visual Features for Linguists: Basic image analysis techniques for multimodally-curious <fixed-case>NLP</fixed-case>ers</title>
       <author><first>Elia</first><last>Bruni</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <pages>1</pages>
       <url hash="7705abba">P13-5001</url>
       <bibkey>bruni-baroni-2013-visual</bibkey>
@@ -3689,7 +3689,7 @@
       <title>Semantic Parsing with <fixed-case>C</fixed-case>ombinatory <fixed-case>C</fixed-case>ategorial <fixed-case>G</fixed-case>rammars</title>
       <author><first>Yoav</first><last>Artzi</last></author>
       <author><first>Nicholas</first><last>FitzGerald</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>2</pages>
       <url hash="eba9ae7a">P13-5002</url>
       <bibkey>artzi-etal-2013-semantic</bibkey>
@@ -3703,7 +3703,7 @@
     </paper>
     <paper id="4">
       <title>Exploiting Social Media for Natural Language Processing: Bridging the Gap between Language-centric and Real-world Applications</title>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <author><first>Andrea</first><last>Zielinski</last></author>
       <pages>5–6</pages>
       <url hash="81e8d4d1">P13-5004</url>
diff --git a/data/xml/P14.xml b/data/xml/P14.xml
index acdf203fda..f2f5202bdc 100644
--- a/data/xml/P14.xml
+++ b/data/xml/P14.xml
@@ -53,7 +53,7 @@
     <paper id="4">
       <title>Discovering Latent Structure in Task-Oriented Dialogues</title>
       <author><first>Ke</first><last>Zhai</last></author>
-      <author><first>Jason D.</first><last>Williams</last></author>
+      <author id="jason-d-williams"><first>Jason D.</first><last>Williams</last></author>
       <pages>36–46</pages>
       <url hash="885b81dc">P14-1004</url>
       <doi>10.3115/v1/P14-1004</doi>
@@ -73,7 +73,7 @@
     <paper id="6">
       <title>Multilingual Models for Compositional Distributed Semantics</title>
       <author><first>Karl Moritz</first><last>Hermann</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
       <pages>58–68</pages>
       <url hash="c8ff6c36">P14-1006</url>
       <doi>10.3115/v1/P14-1006</doi>
@@ -83,7 +83,7 @@
     <paper id="7">
       <title>Simple Negation Scope Resolution through Deep Parsing: A Semantic Solution to a Semantic Problem</title>
       <author><first>Woodley</first><last>Packard</last></author>
-      <author><first>Emily M.</first><last>Bender</last></author>
+      <author id="emily-m-bender"><first>Emily M.</first><last>Bender</last></author>
       <author><first>Jonathon</first><last>Read</last></author>
       <author><first>Stephan</first><last>Oepen</last></author>
       <author><first>Rebecca</first><last>Dridan</last></author>
@@ -109,8 +109,8 @@
     <paper id="9">
       <title>A practical and linguistically-motivated approach to compositional distributional semantics</title>
       <author><first>Denis</first><last>Paperno</last></author>
-      <author><first>Nghia The</first><last>Pham</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="nghia-the-pham"><first>Nghia The</first><last>Pham</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <pages>90–99</pages>
       <url hash="fc835f9c">P14-1009</url>
       <doi>10.3115/v1/P14-1009</doi>
@@ -136,7 +136,7 @@
       <author><first>Shujie</first><last>Liu</last></author>
       <author><first>Mu</first><last>Li</last></author>
       <author><first>Ming</first><last>Zhou</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>111–121</pages>
       <url hash="a4a0dd5e">P14-1011</url>
       <doi>10.3115/v1/P14-1011</doi>
@@ -164,7 +164,7 @@
       <author><first>Qiming</first><last>Chen</last></author>
       <author><first>Mu</first><last>Li</last></author>
       <author><first>Ming</first><last>Zhou</last></author>
-      <author><first>Muyun</first><last>Yang</last></author>
+      <author id="muyun-yang"><first>Muyun</first><last>Yang</last></author>
       <pages>133–143</pages>
       <url hash="4873275c">P14-1013</url>
       <doi>10.3115/v1/P14-1013</doi>
@@ -186,7 +186,7 @@
     </paper>
     <paper id="15">
       <title>Unsupervised Solution Post Identification from Discussion Forums</title>
-      <author><first>Deepak</first><last>P</last></author>
+      <author id="deepak-p"><first>Deepak</first><last>P</last></author>
       <author><first>Karthik</first><last>Visweswariah</last></author>
       <pages>155–164</pages>
       <url hash="f59d5d8d">P14-1015</url>
@@ -199,7 +199,7 @@
       <title>Weakly Supervised User Profile Extraction from <fixed-case>T</fixed-case>witter</title>
       <author><first>Jiwei</first><last>Li</last></author>
       <author><first>Alan</first><last>Ritter</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>165–174</pages>
       <url hash="2e2bf6b6">P14-1016</url>
       <doi>10.3115/v1/P14-1016</doi>
@@ -281,9 +281,9 @@
     </paper>
     <paper id="23">
       <title>Don’t count, predict! A systematic comparison of context-counting vs. context-predicting semantic vectors</title>
-      <author><first>Marco</first><last>Baroni</last></author>
-      <author><first>Georgiana</first><last>Dinu</last></author>
-      <author><first>Germán</first><last>Kruszewski</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
+      <author id="georgiana-dinu"><first>Georgiana</first><last>Dinu</last></author>
+      <author id="german-kruszewski"><first>Germán</first><last>Kruszewski</last></author>
       <pages>238–247</pages>
       <url hash="fc001217">P14-1023</url>
       <doi>10.3115/v1/P14-1023</doi>
@@ -295,8 +295,8 @@
       <author><first>Yulia</first><last>Tsvetkov</last></author>
       <author><first>Leonid</first><last>Boytsov</last></author>
       <author><first>Anatole</first><last>Gershman</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <pages>248–258</pages>
       <url hash="4344a1df">P14-1024</url>
       <doi>10.3115/v1/P14-1024</doi>
@@ -308,9 +308,9 @@
       <title>Learning Word Sense Distributions, Detecting Unattested Senses and Identifying Novel Senses Using Topic Models</title>
       <author><first>Jey Han</first><last>Lau</last></author>
       <author><first>Paul</first><last>Cook</last></author>
-      <author><first>Diana</first><last>McCarthy</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
       <author><first>Spandana</first><last>Gella</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>259–270</pages>
       <url hash="d99f4c2c">P14-1025</url>
       <doi>10.3115/v1/P14-1025</doi>
@@ -322,7 +322,7 @@
       <title>Learning to Automatically Solve Algebra Word Problems</title>
       <author><first>Nate</first><last>Kushman</last></author>
       <author><first>Yoav</first><last>Artzi</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <author><first>Regina</first><last>Barzilay</last></author>
       <pages>271–281</pages>
       <url hash="4c58d438">P14-1026</url>
@@ -347,7 +347,7 @@
       <title>Max-Margin Tensor Neural Network for <fixed-case>C</fixed-case>hinese Word Segmentation</title>
       <author><first>Wenzhe</first><last>Pei</last></author>
       <author><first>Tao</first><last>Ge</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <pages>293–303</pages>
       <url hash="b0d1e5d1">P14-1028</url>
       <doi>10.3115/v1/P14-1028</doi>
@@ -359,7 +359,7 @@
       <title>An Empirical Study on the Effect of Negation Words on Sentiment</title>
       <author><first>Xiaodan</first><last>Zhu</last></author>
       <author><first>Hongyu</first><last>Guo</last></author>
-      <author><first>Saif</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
       <author><first>Svetlana</first><last>Kiritchenko</last></author>
       <pages>304–313</pages>
       <url hash="eda80382">P14-1029</url>
@@ -382,7 +382,7 @@
     <paper id="31">
       <title>Context-aware Learning for Sentence-level Sentiment Analysis with Posterior Regularization</title>
       <author><first>Bishan</first><last>Yang</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>325–335</pages>
       <url hash="70a72bcb">P14-1031</url>
       <doi>10.3115/v1/P14-1031</doi>
@@ -431,7 +431,7 @@
       <title>A <fixed-case>B</fixed-case>ayesian Mixed Effects Model of Literary Character</title>
       <author><first>David</first><last>Bamman</last></author>
       <author><first>Ted</first><last>Underwood</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>370–379</pages>
       <url hash="af806b7a">P14-1035</url>
       <doi>10.3115/v1/P14-1035</doi>
@@ -444,7 +444,7 @@
       <author><first>Yunbo</first><last>Cao</last></author>
       <author><first>Xiaojiang</first><last>Huang</last></author>
       <author><first>Heng</first><last>Ji</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <pages>380–390</pages>
       <url hash="60c6debf">P14-1036</url>
       <doi>10.3115/v1/P14-1036</doi>
@@ -463,7 +463,7 @@
     </paper>
     <paper id="38">
       <title>Incremental Joint Extraction of Entity Mentions and Relations</title>
-      <author id="qi-li"><first>Qi</first><last>Li</last></author>
+      <author><first>Qi</first><last>Li</last></author>
       <author><first>Heng</first><last>Ji</last></author>
       <pages>402–412</pages>
       <url hash="b6930be1">P14-1038</url>
@@ -474,7 +474,7 @@
     <paper id="39">
       <title>That’s Not What <fixed-case>I</fixed-case> Meant! Using Parsers to Avoid Structural Ambiguities in Generated Text</title>
       <author><first>Manjuan</first><last>Duan</last></author>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <pages>413–423</pages>
       <url hash="1d439016">P14-1039</url>
       <doi>10.3115/v1/P14-1039</doi>
@@ -508,7 +508,7 @@
     </paper>
     <paper id="42">
       <title>Grammatical Relations in <fixed-case>C</fixed-case>hinese: <fixed-case>GB</fixed-case>-Ground Extraction and Data-Driven Parsing</title>
-      <author><first>Weiwei</first><last>Sun</last></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last></author>
       <author><first>Yantao</first><last>Du</last></author>
       <author><first>Xin</first><last>Kou</last></author>
       <author><first>Shuoyang</first><last>Ding</last></author>
@@ -547,7 +547,7 @@
     <paper id="45">
       <title>Predicting the relevance of distributional semantic similarity with contextual information</title>
       <author><first>Philippe</first><last>Muller</last></author>
-      <author><first>Cécile</first><last>Fabre</last></author>
+      <author id="cecile-fabre"><first>Cécile</first><last>Fabre</last></author>
       <author><first>Clémentine</first><last>Adam</last></author>
       <pages>479–488</pages>
       <url hash="1ee27fc7">P14-1045</url>
@@ -559,9 +559,9 @@
     <paper id="46">
       <title>Interpretable Semantic Vectors from a Joint Model of Brain- and Text- Based Meaning</title>
       <author><first>Alona</first><last>Fyshe</last></author>
-      <author><first>Partha P.</first><last>Talukdar</last></author>
+      <author id="partha-talukdar"><first>Partha P.</first><last>Talukdar</last></author>
       <author><first>Brian</first><last>Murphy</last></author>
-      <author><first>Tom M.</first><last>Mitchell</last></author>
+      <author id="tom-mitchell"><first>Tom M.</first><last>Mitchell</last></author>
       <pages>489–499</pages>
       <url hash="89ced4e6">P14-1046</url>
       <doi>10.3115/v1/P14-1046</doi>
@@ -571,7 +571,7 @@
       <title>Single-Agent vs. Multi-Agent Techniques for Concurrent Reinforcement Learning of Negotiation Dialogue Policies</title>
       <author><first>Kallirroi</first><last>Georgila</last></author>
       <author><first>Claire</first><last>Nelson</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <pages>500–510</pages>
       <url hash="af4dca10">P14-1047</url>
       <doi>10.3115/v1/P14-1047</doi>
@@ -591,8 +591,8 @@
     <paper id="49">
       <title>Negation Focus Identification with Contextual Discourse Information</title>
       <author><first>Bowei</first><last>Zou</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
-      <author><first>Qiaoming</first><last>Zhu</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last></author>
       <pages>522–530</pages>
       <url hash="9d918495">P14-1049</url>
       <doi>10.3115/v1/P14-1049</doi>
@@ -615,7 +615,7 @@
     <paper id="51">
       <title><fixed-case>R</fixed-case>e<fixed-case>N</fixed-case>ew: A Semi-Supervised Framework for Generating Domain-Specific Lexicons and Sentiment Analysis</title>
       <author><first>Zhe</first><last>Zhang</last></author>
-      <author><first>Munindar P.</first><last>Singh</last></author>
+      <author id="munindar-p-singh"><first>Munindar P.</first><last>Singh</last></author>
       <pages>542–551</pages>
       <url hash="c783b9d4">P14-1051</url>
       <doi>10.3115/v1/P14-1051</doi>
@@ -655,11 +655,11 @@
     </paper>
     <paper id="55">
       <title>Bilingual Active Learning for Relation Classification via Pseudo Parallel Corpora</title>
-      <author><first>Longhua</first><last>Qian</last></author>
+      <author id="longhua-qian"><first>Longhua</first><last>Qian</last></author>
       <author><first>Haotian</first><last>Hui</last></author>
       <author><first>Ya’nan</first><last>Hu</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
-      <author><first>Qiaoming</first><last>Zhu</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last></author>
       <pages>582–592</pages>
       <url hash="c45671c3">P14-1055</url>
       <doi>10.3115/v1/P14-1055</doi>
@@ -691,8 +691,8 @@
     <paper id="58">
       <title>Learning to Predict Distributions of Words Across Domains</title>
       <author><first>Danushka</first><last>Bollegala</last></author>
-      <author><first>David</first><last>Weir</last></author>
-      <author><first>John</first><last>Carroll</last></author>
+      <author id="david-weir"><first>David</first><last>Weir</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
       <pages>613–623</pages>
       <url hash="abd60ab0">P14-1058</url>
       <doi>10.3115/v1/P14-1058</doi>
@@ -701,8 +701,8 @@
     </paper>
     <paper id="59">
       <title>How to make words with vectors: Phrase generation in distributional semantics</title>
-      <author><first>Georgiana</first><last>Dinu</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="georgiana-dinu"><first>Georgiana</first><last>Dinu</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <pages>624–633</pages>
       <url hash="a74eb4cf">P14-1059</url>
       <doi>10.3115/v1/P14-1059</doi>
@@ -711,7 +711,7 @@
     <paper id="60">
       <title>Vector space semantics with frequency-driven motifs</title>
       <author><first>Shashank</first><last>Srivastava</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>634–643</pages>
       <url hash="da4a9ab9">P14-1060</url>
       <doi>10.3115/v1/P14-1060</doi>
@@ -721,8 +721,8 @@
     <paper id="61">
       <title>Lexical Inference over Multi-Word Predicates: A Distributional Approach</title>
       <author><first>Omri</first><last>Abend</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>644–654</pages>
       <url hash="12c7e681">P14-1061</url>
       <doi>10.3115/v1/P14-1061</doi>
@@ -731,8 +731,8 @@
     <paper id="62">
       <title>A Convolutional Neural Network for Modelling Sentences</title>
       <author><first>Nal</first><last>Kalchbrenner</last></author>
-      <author><first>Edward</first><last>Grefenstette</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
+      <author id="edward-grefenstette"><first>Edward</first><last>Grefenstette</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
       <pages>655–665</pages>
       <url hash="f0d3fe77">P14-1062</url>
       <doi>10.3115/v1/P14-1062</doi>
@@ -742,7 +742,7 @@
     <paper id="63">
       <title>Online Learning in Tensor Space</title>
       <author><first>Yuan</first><last>Cao</last></author>
-      <author><first>Sanjeev</first><last>Khudanpur</last></author>
+      <author id="sanjeev-khudanpur"><first>Sanjeev</first><last>Khudanpur</last></author>
       <pages>666–675</pages>
       <url hash="d9e857f8">P14-1063</url>
       <doi>10.3115/v1/P14-1063</doi>
@@ -752,7 +752,7 @@
     <paper id="64">
       <title>Graph-based Semi-Supervised Learning of Translation Models from Monolingual Data</title>
       <author><first>Avneesh</first><last>Saluja</last></author>
-      <author><first>Hany</first><last>Hassan</last></author>
+      <author id="hany-hassan-awadalla"><first>Hany</first><last>Hassan</last></author>
       <author><first>Kristina</first><last>Toutanova</last></author>
       <author><first>Chris</first><last>Quirk</last></author>
       <pages>676–686</pages>
@@ -763,10 +763,10 @@
     </paper>
     <paper id="65">
       <title>Using Discourse Structure Improves Machine Translation Evaluation</title>
-      <author><first>Francisco</first><last>Guzmán</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzmán</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>687–698</pages>
       <url hash="64ea1890">P14-1065</url>
       <doi>10.3115/v1/P14-1065</doi>
@@ -777,7 +777,7 @@
       <title>Learning Continuous Phrase Representations for Translation Modeling</title>
       <author><first>Jianfeng</first><last>Gao</last></author>
       <author><first>Xiaodong</first><last>He</last></author>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <author><first>Li</first><last>Deng</last></author>
       <pages>699–709</pages>
       <url hash="61a4fc1d">P14-1066</url>
@@ -788,8 +788,8 @@
       <title>Adaptive Quality Estimation for Machine Translation</title>
       <author><first>Marco</first><last>Turchi</last></author>
       <author><first>Antonios</first><last>Anastasopoulos</last></author>
-      <author><first>José G.</first><last>C. de Souza</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="jose-g-c-de-souza"><first>José G.</first><last>C. de Souza</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <pages>710–720</pages>
       <url hash="bb1f9255">P14-1067</url>
       <doi>10.3115/v1/P14-1067</doi>
@@ -817,8 +817,8 @@
     </paper>
     <paper id="70">
       <title>Strategies for Contiguous Multiword Expression Analysis and Dependency Parsing</title>
-      <author><first>Marie</first><last>Candito</last></author>
-      <author><first>Matthieu</first><last>Constant</last></author>
+      <author id="marie-candito"><first>Marie</first><last>Candito</last></author>
+      <author id="matthieu-constant"><first>Matthieu</first><last>Constant</last></author>
       <pages>743–753</pages>
       <url hash="744ec4bf">P14-1070</url>
       <doi>10.3115/v1/P14-1070</doi>
@@ -849,7 +849,7 @@
     <paper id="73">
       <title>Robust Entity Clustering via Phylogenetic Inference</title>
       <author><first>Nicholas</first><last>Andrews</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <author><first>Mark</first><last>Dredze</last></author>
       <pages>775–785</pages>
       <url hash="c9e7ac19">P14-1073</url>
@@ -862,7 +862,7 @@
     <paper id="74">
       <title>Linguistic Structured Sparsity in Text Categorization</title>
       <author><first>Dani</first><last>Yogatama</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>786–796</pages>
       <url hash="192659e2">P14-1074</url>
       <doi>10.3115/v1/P14-1074</doi>
@@ -883,7 +883,7 @@
       <title>Robust Domain Adaptation for Relation Extraction via Clustering Consistency</title>
       <author><first>Minh Luan</first><last>Nguyen</last></author>
       <author><first>Ivor W.</first><last>Tsang</last></author>
-      <author><first>Kian Ming A.</first><last>Chai</last></author>
+      <author id="kian-ming-a-chai"><first>Kian Ming A.</first><last>Chai</last></author>
       <author><first>Hai Leong</first><last>Chieu</last></author>
       <pages>807–817</pages>
       <url hash="bdf8dc33">P14-1076</url>
@@ -921,8 +921,8 @@
       <author><first>Deli</first><last>Zhao</last></author>
       <author><first>Qiang</first><last>Zhou</last></author>
       <author><first>Zhiyuan</first><last>Liu</last></author>
-      <author><first>Thomas Fang</first><last>Zheng</last></author>
-      <author><first>Edward Y.</first><last>Chang</last></author>
+      <author id="fang-zheng"><first>Thomas Fang</first><last>Zheng</last></author>
+      <author id="edward-y-chang"><first>Edward Y.</first><last>Chang</last></author>
       <pages>839–849</pages>
       <url hash="1949eecf">P14-1079</url>
       <doi>10.3115/v1/P14-1079</doi>
@@ -934,7 +934,7 @@
       <title>Enhancing Grammatical Cohesion: Generating Transitional Expressions for <fixed-case>SMT</fixed-case></title>
       <author><first>Mei</first><last>Tu</last></author>
       <author><first>Yu</first><last>Zhou</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>850–860</pages>
       <url hash="c67306f0">P14-1080</url>
       <doi>10.3115/v1/P14-1080</doi>
@@ -944,9 +944,9 @@
     <paper id="81">
       <title>Adaptive <fixed-case>HTER</fixed-case> Estimation for Document-Specific <fixed-case>MT</fixed-case> Post-Editing</title>
       <author><first>Fei</first><last>Huang</last></author>
-      <author><first>Jian-Ming</first><last>Xu</last></author>
+      <author id="jian-ming-xu"><first>Jian-Ming</first><last>Xu</last></author>
       <author><first>Abraham</first><last>Ittycheriah</last></author>
-      <author><first>Salim</first><last>Roukos</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
       <pages>861–870</pages>
       <url hash="9acb8a10">P14-1081</url>
       <doi>10.3115/v1/P14-1081</doi>
@@ -956,7 +956,7 @@
     <paper id="82">
       <title>Translation Assistance by Translation of <fixed-case>L</fixed-case>1 Fragments in an <fixed-case>L</fixed-case>2 Context</title>
       <author><first>Maarten</first><last>van Gompel</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <pages>871–880</pages>
       <url hash="ae97d3b9">P14-1082</url>
       <doi>10.3115/v1/P14-1082</doi>
@@ -991,7 +991,7 @@
     <paper id="85">
       <title>Hierarchical Summarization: Scaling Up Multi-Document Summarization</title>
       <author><first>Janara</first><last>Christensen</last></author>
-      <author><first>Stephen</first><last>Soderland</last></author>
+      <author id="stephen-soderland"><first>Stephen</first><last>Soderland</last></author>
       <author><first>Gagan</first><last>Bansal</last></author>
       <author><first/><last>Mausam</last></author>
       <pages>902–912</pages>
@@ -1014,7 +1014,7 @@
     </paper>
     <paper id="87">
       <title>Exploiting Timelines to Enhance Multi-document Summarization</title>
-      <author><first>Jun-Ping</first><last>Ng</last></author>
+      <author id="jun-ping-ng"><first>Jun-Ping</first><last>Ng</last></author>
       <author><first>Yan</first><last>Chen</last></author>
       <author><first>Min-Yen</first><last>Kan</last></author>
       <author><first>Zhoujun</first><last>Li</last></author>
@@ -1063,7 +1063,7 @@
       <author><first>Junwei</first><last>Bao</last></author>
       <author><first>Nan</first><last>Duan</last></author>
       <author><first>Ming</first><last>Zhou</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <pages>967–976</pages>
       <url hash="ba3ee189">P14-1091</url>
       <doi>10.3115/v1/P14-1091</doi>
@@ -1073,7 +1073,7 @@
     </paper>
     <paper id="92">
       <title>Discourse Complements Lexical Semantics for Non-factoid Answer Reranking</title>
-      <author><first>Peter</first><last>Jansen</last></author>
+      <author id="peter-jansen"><first>Peter</first><last>Jansen</last></author>
       <author><first>Mihai</first><last>Surdeanu</last></author>
       <author><first>Peter</first><last>Clark</last></author>
       <pages>977–986</pages>
@@ -1089,8 +1089,8 @@
       <author><first>Kentaro</first><last>Torisawa</last></author>
       <author><first>Julien</first><last>Kloetzer</last></author>
       <author><first>Motoki</first><last>Sano</last></author>
-      <author><first>István</first><last>Varga</last></author>
-      <author><first>Jong-Hoon</first><last>Oh</last></author>
+      <author id="istvan-varga"><first>István</first><last>Varga</last></author>
+      <author id="jong-hoon-oh"><first>Jong-Hoon</first><last>Oh</last></author>
       <author><first>Yutaka</first><last>Kidawara</last></author>
       <pages>987–997</pages>
       <url hash="fcb8a0ad">P14-1093</url>
@@ -1103,9 +1103,9 @@
     <paper id="94">
       <title>Cross-narrative Temporal Ordering of Medical Events</title>
       <author><first>Preethi</first><last>Raghavan</last></author>
-      <author><first>Eric</first><last>Fosler-Lussier</last></author>
-      <author><first>Noémie</first><last>Elhadad</last></author>
-      <author><first>Albert M.</first><last>Lai</last></author>
+      <author id="eric-fosler-lussier"><first>Eric</first><last>Fosler-Lussier</last></author>
+      <author id="noemie-elhadad"><first>Noémie</first><last>Elhadad</last></author>
+      <author id="albert-m-lai"><first>Albert M.</first><last>Lai</last></author>
       <pages>998–1008</pages>
       <url hash="c31a8301">P14-1094</url>
       <doi>10.3115/v1/P14-1094</doi>
@@ -1114,8 +1114,8 @@
     </paper>
     <paper id="95">
       <title>Language-Aware Truth Assessment of Fact Candidates</title>
-      <author><first>Ndapandula</first><last>Nakashole</last></author>
-      <author><first>Tom M.</first><last>Mitchell</last></author>
+      <author id="ndapandula-nakashole"><first>Ndapandula</first><last>Nakashole</last></author>
+      <author id="tom-mitchell"><first>Tom M.</first><last>Mitchell</last></author>
       <pages>1009–1019</pages>
       <url hash="c193a7f2">P14-1095</url>
       <doi>10.3115/v1/P14-1095</doi>
@@ -1127,7 +1127,7 @@
       <author><first>Sunny</first><last>Mitra</last></author>
       <author><first>Ritwik</first><last>Mitra</last></author>
       <author><first>Martin</first><last>Riedl</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <author><first>Animesh</first><last>Mukherjee</last></author>
       <author><first>Pawan</first><last>Goyal</last></author>
       <pages>1020–1029</pages>
@@ -1140,8 +1140,8 @@
     <paper id="97">
       <title>A Step-wise Usage-based Method for Inducing Polysemy-aware Verb Classes</title>
       <author><first>Daisuke</first><last>Kawahara</last></author>
-      <author><first>Daniel W.</first><last>Peterson</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="daniel-peterson"><first>Daniel W.</first><last>Peterson</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>1030–1040</pages>
       <url hash="1b1fe343">P14-1097</url>
       <doi>10.3115/v1/P14-1097</doi>
@@ -1163,8 +1163,8 @@
     </paper>
     <paper id="99">
       <title>A Provably Correct Learning Algorithm for Latent-Variable <fixed-case>PCFG</fixed-case>s</title>
-      <author><first>Shay B.</first><last>Cohen</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <pages>1052–1061</pages>
       <url hash="3e6b2be7">P14-1099</url>
       <doi>10.3115/v1/P14-1099</doi>
@@ -1173,9 +1173,9 @@
     </paper>
     <paper id="100">
       <title>Spectral Unsupervised Parsing with Additive Tree Metrics</title>
-      <author><first>Ankur P.</first><last>Parikh</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
-      <author><first>Eric P.</first><last>Xing</last></author>
+      <author id="ankur-parikh"><first>Ankur P.</first><last>Parikh</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
+      <author id="eric-xing"><first>Eric P.</first><last>Xing</last></author>
       <pages>1062–1072</pages>
       <url hash="10dedbed">P14-1100</url>
       <doi>10.3115/v1/P14-1100</doi>
@@ -1187,8 +1187,8 @@
     <paper id="101">
       <title>Weak semantic context helps phonetic learning in a model of infant language acquisition</title>
       <author><first>Stella</first><last>Frank</last></author>
-      <author><first>Naomi H.</first><last>Feldman</last></author>
-      <author><first>Sharon</first><last>Goldwater</last></author>
+      <author id="naomi-feldman"><first>Naomi H.</first><last>Feldman</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
       <pages>1073–1083</pages>
       <url hash="d5160f5e">P14-1101</url>
       <doi>10.3115/v1/P14-1101</doi>
@@ -1198,7 +1198,7 @@
     </paper>
     <paper id="102">
       <title>Bootstrapping into Filler-Gap: An Acquisition Story</title>
-      <author><first>Marten</first><last>van Schijndel</last></author>
+      <author id="marten-van-schijndel"><first>Marten</first><last>van Schijndel</last></author>
       <author><first>Micha</first><last>Elsner</last></author>
       <pages>1084–1093</pages>
       <url hash="47042c81">P14-1102</url>
@@ -1211,7 +1211,7 @@
       <title>Nonparametric Learning of Phonological Constraints in <fixed-case>O</fixed-case>ptimality <fixed-case>T</fixed-case>heory</title>
       <author><first>Gabriel</first><last>Doyle</last></author>
       <author><first>Klinton</first><last>Bicknell</last></author>
-      <author><first>Roger</first><last>Levy</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
       <pages>1094–1103</pages>
       <url hash="f2e1f286">P14-1103</url>
       <doi>10.3115/v1/P14-1103</doi>
@@ -1245,10 +1245,10 @@
     </paper>
     <paper id="106">
       <title>A Unified Model for Soft Linguistic Reordering Constraints in Statistical Machine Translation</title>
-      <author><first>Junhui</first><last>Li</last></author>
+      <author id="junhui-li"><first>Junhui</first><last>Li</last></author>
       <author><first>Yuval</first><last>Marton</last></author>
       <author><first>Philip</first><last>Resnik</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <pages>1123–1133</pages>
       <url hash="a24cc99e">P14-1106</url>
       <doi>10.3115/v1/P14-1106</doi>
@@ -1304,7 +1304,7 @@
     </paper>
     <paper id="111">
       <title>Low-Resource Semantic Role Labeling</title>
-      <author><first>Matthew R.</first><last>Gormley</last></author>
+      <author id="matthew-r-gormley"><first>Matthew R.</first><last>Gormley</last></author>
       <author><first>Margaret</first><last>Mitchell</last></author>
       <author><first>Benjamin</first><last>Van Durme</last></author>
       <author><first>Mark</first><last>Dredze</last></author>
@@ -1316,7 +1316,7 @@
     <paper id="112">
       <title>Joint Syntactic and Semantic Parsing with <fixed-case>C</fixed-case>ombinatory <fixed-case>C</fixed-case>ategorial <fixed-case>G</fixed-case>rammar</title>
       <author><first>Jayant</first><last>Krishnamurthy</last></author>
-      <author><first>Tom M.</first><last>Mitchell</last></author>
+      <author id="tom-mitchell"><first>Tom M.</first><last>Mitchell</last></author>
       <pages>1188–1198</pages>
       <url hash="42b67370">P14-1112</url>
       <doi>10.3115/v1/P14-1112</doi>
@@ -1339,9 +1339,9 @@
     </paper>
     <paper id="114">
       <title>Probabilistic Soft Logic for Semantic Textual Similarity</title>
-      <author><first>Islam</first><last>Beltagy</last></author>
+      <author id="islam-beltagy"><first>Islam</first><last>Beltagy</last></author>
       <author><first>Katrin</first><last>Erk</last></author>
-      <author><first>Raymond</first><last>Mooney</last></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last></author>
       <pages>1210–1219</pages>
       <url hash="f59bec15">P14-1114</url>
       <doi>10.3115/v1/P14-1114</doi>
@@ -1351,7 +1351,7 @@
       <title>Abstractive Summarization of Spoken and Written Conversations Based on Phrasal Queries</title>
       <author><first>Yashar</first><last>Mehdad</last></author>
       <author><first>Giuseppe</first><last>Carenini</last></author>
-      <author><first>Raymond T.</first><last>Ng</last></author>
+      <author id="raymond-ng"><first>Raymond T.</first><last>Ng</last></author>
       <pages>1220–1230</pages>
       <url hash="220fe32d">P14-1115</url>
       <doi>10.3115/v1/P14-1115</doi>
@@ -1361,7 +1361,7 @@
     <paper id="116">
       <title>Comparing Multi-label Classification with Reinforcement Learning for Summarisation of Time-series Data</title>
       <author><first>Dimitra</first><last>Gkatzia</last></author>
-      <author><first>Helen</first><last>Hastie</last></author>
+      <author id="helen-hastie"><first>Helen</first><last>Hastie</last></author>
       <author><first>Oliver</first><last>Lemon</last></author>
       <pages>1231–1240</pages>
       <url hash="b31e399b">P14-1116</url>
@@ -1383,7 +1383,7 @@
       <author><first>Aliaksei</first><last>Severyn</last></author>
       <author><first>Alessandro</first><last>Moschitti</last></author>
       <author><first>Olga</first><last>Uryupina</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <author><first>Katja</first><last>Filippova</last></author>
       <pages>1252–1261</pages>
       <url hash="a3425c59">P14-1118</url>
@@ -1436,7 +1436,7 @@
       <title>Shallow Analysis Based Assessment of Syntactic Complexity for Automated Speech Scoring</title>
       <author><first>Suma</first><last>Bhat</last></author>
       <author><first>Huichao</first><last>Xue</last></author>
-      <author><first>Su-Youn</first><last>Yoon</last></author>
+      <author id="su-youn-yoon"><first>Su-Youn</first><last>Yoon</last></author>
       <pages>1305–1315</pages>
       <url hash="c4db1472">P14-1123</url>
       <doi>10.3115/v1/P14-1123</doi>
@@ -1446,7 +1446,7 @@
     <paper id="124">
       <title>Can You Repeat That? Using Word Repetition to Improve Spoken Term Detection</title>
       <author><first>Jonathan</first><last>Wintrode</last></author>
-      <author><first>Sanjeev</first><last>Khudanpur</last></author>
+      <author id="sanjeev-khudanpur"><first>Sanjeev</first><last>Khudanpur</last></author>
       <pages>1316–1325</pages>
       <url hash="47678c6d">P14-1124</url>
       <doi>10.3115/v1/P14-1124</doi>
@@ -1480,7 +1480,7 @@
       <author><first>Mohammad Sadegh</first><last>Rasooli</last></author>
       <author><first>Thomas</first><last>Lippincott</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>1349–1359</pages>
       <url hash="65fc12e3">P14-1127</url>
       <doi>10.3115/v1/P14-1127</doi>
@@ -1505,8 +1505,8 @@
       <author><first>Rabih</first><last>Zbib</last></author>
       <author><first>Zhongqiang</first><last>Huang</last></author>
       <author><first>Thomas</first><last>Lamar</last></author>
-      <author><first>Richard</first><last>Schwartz</last></author>
-      <author><first>John</first><last>Makhoul</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
+      <author id="john-makhoul"><first>John</first><last>Makhoul</last></author>
       <pages>1370–1380</pages>
       <url hash="1e4d1b9a">P14-1129</url>
       <doi>10.3115/v1/P14-1129</doi>
@@ -1529,7 +1529,7 @@
     <paper id="131">
       <title><fixed-case>C</fixed-case>o<fixed-case>S</fixed-case>im<fixed-case>R</fixed-case>ank: A Flexible &amp; Efficient Graph-Theoretic Similarity Measure</title>
       <author><first>Sascha</first><last>Rothe</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>1392–1402</pages>
       <url hash="da3632d6">P14-1131</url>
       <doi>10.3115/v1/P14-1131</doi>
@@ -1541,7 +1541,7 @@
       <title>Is this a wampimuk? Cross-modal mapping between distributional semantics and the visual world</title>
       <author><first>Angeliki</first><last>Lazaridou</last></author>
       <author><first>Elia</first><last>Bruni</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <pages>1403–1414</pages>
       <url hash="ffb9d968">P14-1132</url>
       <doi>10.3115/v1/P14-1132</doi>
@@ -1564,9 +1564,9 @@
       <title>A Discriminative Graph-Based Parser for the <fixed-case>A</fixed-case>bstract <fixed-case>M</fixed-case>eaning <fixed-case>R</fixed-case>epresentation</title>
       <author><first>Jeffrey</first><last>Flanigan</last></author>
       <author><first>Sam</first><last>Thomson</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>1426–1436</pages>
       <url hash="83224b58">P14-1134</url>
       <doi>10.3115/v1/P14-1134</doi>
@@ -1578,7 +1578,7 @@
       <author><first>Kenton</first><last>Lee</last></author>
       <author><first>Yoav</first><last>Artzi</last></author>
       <author><first>Jesse</first><last>Dodge</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>1437–1447</pages>
       <url hash="75217904">P14-1135</url>
       <doi>10.3115/v1/P14-1135</doi>
@@ -1602,7 +1602,7 @@
     </paper>
     <paper id="137">
       <title>A Sense-Based Translation Model for Statistical Machine Translation</title>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Min</first><last>Zhang</last></author>
       <pages>1459–1469</pages>
       <url hash="31b51dd4">P14-1137</url>
@@ -1614,7 +1614,7 @@
       <title>Recurrent Neural Networks for Word Alignment Model</title>
       <author><first>Akihiro</first><last>Tamura</last></author>
       <author><first>Taro</first><last>Watanabe</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>1470–1480</pages>
       <url hash="0b102cdb">P14-1138</url>
       <doi>10.3115/v1/P14-1138</doi>
@@ -1625,9 +1625,9 @@
     <paper id="139">
       <title>A Constrained <fixed-case>V</fixed-case>iterbi Relaxation for Bidirectional Word Alignment</title>
       <author><first>Yin-Wen</first><last>Chang</last></author>
-      <author><first>Alexander M.</first><last>Rush</last></author>
+      <author id="alexander-m-rush"><first>Alexander M.</first><last>Rush</last></author>
       <author><first>John</first><last>DeNero</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <pages>1481–1490</pages>
       <url hash="b4390e43">P14-1139</url>
       <doi>10.3115/v1/P14-1139</doi>
@@ -1651,7 +1651,7 @@
       <title>Predicting Instructor’s Intervention in <fixed-case>MOOC</fixed-case> forums</title>
       <author><first>Snigdha</first><last>Chaturvedi</last></author>
       <author><first>Dan</first><last>Goldwasser</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <pages>1501–1511</pages>
       <url hash="842f9a9c">P14-1141</url>
       <doi>10.3115/v1/P14-1141</doi>
@@ -1724,8 +1724,8 @@
       <title>Towards a General Rule for Identifying Deceptive Opinion Spam</title>
       <author><first>Jiwei</first><last>Li</last></author>
       <author><first>Myle</first><last>Ott</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>1566–1576</pages>
       <url hash="4b6fc8a7">P14-1147</url>
       <doi>10.3115/v1/P14-1147</doi>
@@ -1763,9 +1763,9 @@
     </paper>
     <paper id="2">
       <title>Biases in Predicting the Human Language Model</title>
-      <author><first>Alex B.</first><last>Fine</last></author>
+      <author id="alex-fine"><first>Alex B.</first><last>Fine</last></author>
       <author><first>Austin F.</first><last>Frank</last></author>
-      <author><first>T. Florian</first><last>Jaeger</last></author>
+      <author id="t-florian-jaeger"><first>T. Florian</first><last>Jaeger</last></author>
       <author><first>Benjamin</first><last>Van Durme</last></author>
       <pages>7–12</pages>
       <url hash="065949aa">P14-2002</url>
@@ -1777,7 +1777,7 @@
       <author><first>Changsong</first><last>Liu</last></author>
       <author><first>Lanbo</first><last>She</last></author>
       <author><first>Rui</first><last>Fang</last></author>
-      <author><first>Joyce Y.</first><last>Chai</last></author>
+      <author id="joyce-chai"><first>Joyce Y.</first><last>Chai</last></author>
       <pages>13–18</pages>
       <url hash="74e78e5f">P14-2003</url>
       <doi>10.3115/v1/P14-2003</doi>
@@ -1786,7 +1786,7 @@
     <paper id="4">
       <title>A Composite Kernel Approach for Dialog Topic Tracking with Structured Domain Knowledge from <fixed-case>W</fixed-case>ikipedia</title>
       <author><first>Seokhwan</first><last>Kim</last></author>
-      <author><first>Rafael E.</first><last>Banchs</last></author>
+      <author id="rafael-e-banchs"><first>Rafael E.</first><last>Banchs</last></author>
       <author><first>Haizhou</first><last>Li</last></author>
       <pages>19–23</pages>
       <url hash="94660a1e">P14-2004</url>
@@ -1796,10 +1796,10 @@
     </paper>
     <paper id="5">
       <title>An Extension of <fixed-case>BLANC</fixed-case> to System Mentions</title>
-      <author><first>Xiaoqiang</first><last>Luo</last></author>
-      <author><first>Sameer</first><last>Pradhan</last></author>
+      <author id="xiaoqiang-luo"><first>Xiaoqiang</first><last>Luo</last></author>
+      <author id="sameer-pradhan"><first>Sameer</first><last>Pradhan</last></author>
       <author><first>Marta</first><last>Recasens</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>24–29</pages>
       <url hash="2615318e">P14-2005</url>
       <doi>10.3115/v1/P14-2005</doi>
@@ -1808,10 +1808,10 @@
     </paper>
     <paper id="6">
       <title>Scoring Coreference Partitions of Predicted Mentions: A Reference Implementation</title>
-      <author><first>Sameer</first><last>Pradhan</last></author>
-      <author><first>Xiaoqiang</first><last>Luo</last></author>
+      <author id="sameer-pradhan"><first>Sameer</first><last>Pradhan</last></author>
+      <author id="xiaoqiang-luo"><first>Xiaoqiang</first><last>Luo</last></author>
       <author><first>Marta</first><last>Recasens</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <author><first>Vincent</first><last>Ng</last></author>
       <author><first>Michael</first><last>Strube</last></author>
       <pages>30–35</pages>
@@ -1824,7 +1824,7 @@
       <author><first>Aditya</first><last>Joshi</last></author>
       <author><first>Abhijit</first><last>Mishra</last></author>
       <author><first>Nivvedan</first><last>Senthamilselvan</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>36–41</pages>
       <url hash="dec4a191">P14-2007</url>
       <doi>10.3115/v1/P14-2007</doi>
@@ -1834,7 +1834,7 @@
     <paper id="8">
       <title>Improving Citation Polarity Classification with Product Reviews</title>
       <author><first>Charles</first><last>Jochim</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>42–48</pages>
       <url hash="108ea71c">P14-2008</url>
       <doi>10.3115/v1/P14-2008</doi>
@@ -1876,7 +1876,7 @@
     <paper id="12">
       <title>Employing Word Representations and Regularization for Domain Adaptation of Relation Extraction</title>
       <author><first>Thien Huu</first><last>Nguyen</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <pages>68–74</pages>
       <url hash="c45fd2f1">P14-2012</url>
       <doi>10.3115/v1/P14-2012</doi>
@@ -1886,7 +1886,7 @@
     <paper id="13">
       <title>Graph Ranking for Collective Named Entity Disambiguation</title>
       <author><first>Ayman</first><last>Alhelbawy</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <pages>75–80</pages>
       <url hash="105e224e">P14-2013</url>
       <doi>10.3115/v1/P14-2013</doi>
@@ -1896,11 +1896,11 @@
     <paper id="14">
       <title>Descending-Path Convolution Kernel for Syntactic Structures</title>
       <author><first>Chen</first><last>Lin</last></author>
-      <author><first>Timothy</first><last>Miller</last></author>
+      <author id="timothy-miller"><first>Timothy</first><last>Miller</last></author>
       <author><first>Alvin</first><last>Kho</last></author>
       <author><first>Steven</first><last>Bethard</last></author>
       <author><first>Dmitriy</first><last>Dligach</last></author>
-      <author><first>Sameer</first><last>Pradhan</last></author>
+      <author id="sameer-pradhan"><first>Sameer</first><last>Pradhan</last></author>
       <author><first>Guergana</first><last>Savova</last></author>
       <pages>81–86</pages>
       <url hash="65445c85">P14-2014</url>
@@ -1920,8 +1920,8 @@
     </paper>
     <paper id="16">
       <title>Automatic Detection of Multilingual Dictionaries on the Web</title>
-      <author><first>Gintarė</first><last>Grigonytė</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="gintare-grigonyte"><first>Gintarė</first><last>Grigonytė</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>93–98</pages>
       <url hash="c17e7721">P14-2016</url>
       <doi>10.3115/v1/P14-2016</doi>
@@ -1930,8 +1930,8 @@
     </paper>
     <paper id="17">
       <title>Automatic Detection of Cognates Using Orthographic Alignment</title>
-      <author><first>Alina Maria</first><last>Ciobanu</last></author>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="alina-maria-ciobanu"><first>Alina Maria</first><last>Ciobanu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <pages>99–105</pages>
       <url hash="c5f13b96">P14-2017</url>
       <doi>10.3115/v1/P14-2017</doi>
@@ -1944,7 +1944,7 @@
       <title>Automatically constructing <fixed-case>W</fixed-case>ordnet Synsets</title>
       <author><first>Khang Nhut</first><last>Lam</last></author>
       <author><first>Feras</first><last>Al Tarouti</last></author>
-      <author><first>Jugal</first><last>Kalita</last></author>
+      <author id="jugal-kalita"><first>Jugal</first><last>Kalita</last></author>
       <pages>106–111</pages>
       <url hash="678bb5fc">P14-2018</url>
       <doi>10.3115/v1/P14-2018</doi>
@@ -1974,7 +1974,7 @@
     <paper id="21">
       <title>Robust Logistic Regression using Shift Parameters</title>
       <author><first>Julie</first><last>Tibshirani</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>124–129</pages>
       <url hash="42cef4ec">P14-2021</url>
       <doi>10.3115/v1/P14-2021</doi>
@@ -1984,7 +1984,7 @@
       <title>Faster Phrase-Based Decoding by Refining Feature State</title>
       <author><first>Kenneth</first><last>Heafield</last></author>
       <author><first>Michael</first><last>Kayser</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>130–135</pages>
       <url hash="2be09be9">P14-2022</url>
       <doi>10.3115/v1/P14-2022</doi>
@@ -2013,7 +2013,7 @@
     <paper id="25">
       <title>Simple extensions and <fixed-case>POS</fixed-case> Tags for a reparameterised <fixed-case>IBM</fixed-case> Model 2</title>
       <author><first>Douwe</first><last>Gelling</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>150–154</pages>
       <url hash="222b5fea">P14-2025</url>
       <doi>10.3115/v1/P14-2025</doi>
@@ -2023,7 +2023,7 @@
       <title>Dependency-based Pre-ordering for <fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish Machine Translation</title>
       <author><first>Jingsheng</first><last>Cai</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Yujie</first><last>Zhang</last></author>
       <pages>155–160</pages>
       <url hash="947ef257">P14-2026</url>
@@ -2060,7 +2060,7 @@
       <author><first>Nitin</first><last>Madnani</last></author>
       <author><first>Melissa</first><last>Lopez</last></author>
       <author><first>Matthew</first><last>Mulholland</last></author>
-      <author><first>Joel</first><last>Tetreault</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
       <pages>174–180</pages>
       <url hash="b479c497">P14-2029</url>
       <doi>10.3115/v1/P14-2029</doi>
@@ -2096,7 +2096,7 @@
       <title>Two Knives Cut Better Than One: <fixed-case>C</fixed-case>hinese Word Segmentation with Dual Decomposition</title>
       <author><first>Mengqiu</first><last>Wang</last></author>
       <author><first>Rob</first><last>Voigt</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>193–198</pages>
       <url hash="213d310b">P14-2032</url>
       <doi>10.3115/v1/P14-2032</doi>
@@ -2117,7 +2117,7 @@
       <title>Word Segmentation of Informal <fixed-case>A</fixed-case>rabic with Domain Adaptation</title>
       <author><first>Will</first><last>Monroe</last></author>
       <author><first>Spence</first><last>Green</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>206–211</pages>
       <url hash="aa0d0067">P14-2034</url>
       <doi>10.3115/v1/P14-2034</doi>
@@ -2130,7 +2130,7 @@
       <title>Resolving Lexical Ambiguity in Tensor Regression Models of Meaning</title>
       <author><first>Dimitri</first><last>Kartsaklis</last></author>
       <author><first>Nal</first><last>Kalchbrenner</last></author>
-      <author><first>Mehrnoosh</first><last>Sadrzadeh</last></author>
+      <author id="mehrnoosh-sadrzadeh"><first>Mehrnoosh</first><last>Sadrzadeh</last></author>
       <pages>212–217</pages>
       <url hash="571f38f6">P14-2035</url>
       <doi>10.3115/v1/P14-2035</doi>
@@ -2140,7 +2140,7 @@
     <paper id="36">
       <title>A Novel Content Enriching Model for Microblog Using News Corpus</title>
       <author><first>Yunlun</first><last>Yang</last></author>
-      <author><first>Zhihong</first><last>Deng</last></author>
+      <author id="zhi-hong-deng"><first>Zhihong</first><last>Deng</last></author>
       <author><first>Hongliang</first><last>Yu</last></author>
       <pages>218–223</pages>
       <url hash="0a1a9fee">P14-2036</url>
@@ -2152,7 +2152,7 @@
       <title>Learning Bilingual Word Representations by Marginalizing Alignments</title>
       <author><first>Tomáš</first><last>Kočiský</last></author>
       <author><first>Karl Moritz</first><last>Hermann</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
       <pages>224–229</pages>
       <url hash="565488c0">P14-2037</url>
       <doi>10.3115/v1/P14-2037</doi>
@@ -2161,7 +2161,7 @@
     <paper id="38">
       <title>Detecting Retries of Voice Search Queries</title>
       <author><first>Rivka</first><last>Levitan</last></author>
-      <author><first>David</first><last>Elson</last></author>
+      <author id="david-elson"><first>David</first><last>Elson</last></author>
       <pages>230–235</pages>
       <url hash="a1bc5d14">P14-2038</url>
       <doi>10.3115/v1/P14-2038</doi>
@@ -2213,9 +2213,9 @@
     </paper>
     <paper id="43">
       <title>Part-of-Speech Tagging using Conditional Random Fields: Exploiting Sub-Label Dependencies for Improved Accuracy</title>
-      <author><first>Miikka</first><last>Silfverberg</last></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last></author>
       <author><first>Teemu</first><last>Ruokolainen</last></author>
-      <author><first>Krister</first><last>Lindén</last></author>
+      <author id="krister-linden"><first>Krister</first><last>Lindén</last></author>
       <author><first>Mikko</first><last>Kurimo</last></author>
       <pages>259–264</pages>
       <url hash="ff966e4b">P14-2043</url>
@@ -2228,7 +2228,7 @@
       <author><first>Kairit</first><last>Sirts</last></author>
       <author><first>Jacob</first><last>Eisenstein</last></author>
       <author><first>Micha</first><last>Elsner</last></author>
-      <author><first>Sharon</first><last>Goldwater</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
       <pages>265–271</pages>
       <url hash="42df14fe">P14-2044</url>
       <doi>10.3115/v1/P14-2044</doi>
@@ -2238,7 +2238,7 @@
     <paper id="45">
       <title>Improving the Recognizability of Syntactic Relations Using Contextualized Examples</title>
       <author><first>Aditi</first><last>Muralidharan</last></author>
-      <author><first>Marti A.</first><last>Hearst</last></author>
+      <author id="marti-a-hearst"><first>Marti A.</first><last>Hearst</last></author>
       <pages>272–277</pages>
       <url hash="37f87429">P14-2045</url>
       <doi>10.3115/v1/P14-2045</doi>
@@ -2283,7 +2283,7 @@
     </paper>
     <paper id="49">
       <title>Improving sparse word similarity models with asymmetric measures</title>
-      <author><first>Jean Mark</first><last>Gawron</last></author>
+      <author id="jean-mark-gawron"><first>Jean Mark</first><last>Gawron</last></author>
       <pages>296–301</pages>
       <url hash="02ff612f">P14-2049</url>
       <doi>10.3115/v1/P14-2049</doi>
@@ -2319,7 +2319,7 @@
       <author><first>Yuta</first><last>Kikuchi</last></author>
       <author><first>Tsutomu</first><last>Hirao</last></author>
       <author><first>Hiroya</first><last>Takamura</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <author><first>Masaaki</first><last>Nagata</last></author>
       <pages>315–320</pages>
       <url hash="3c9dfd92">P14-2052</url>
@@ -2331,7 +2331,7 @@
     <paper id="53">
       <title>Linguistic Considerations in Automatic Question Generation</title>
       <author><first>Karen</first><last>Mazidi</last></author>
-      <author><first>Rodney D.</first><last>Nielsen</last></author>
+      <author id="rodney-nielsen"><first>Rodney D.</first><last>Nielsen</last></author>
       <pages>321–326</pages>
       <url hash="ecebdd7d">P14-2053</url>
       <doi>10.3115/v1/P14-2053</doi>
@@ -2363,7 +2363,7 @@
     <paper id="56">
       <title>Predicting Power Relations between Participants in Written Dialog from a Single Thread</title>
       <author><first>Vinodkumar</first><last>Prabhakaran</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>339–344</pages>
       <url hash="65930d1c">P14-2056</url>
       <doi>10.3115/v1/P14-2056</doi>
@@ -2384,7 +2384,7 @@
     </paper>
     <paper id="58">
       <title>Automation and Evaluation of the Keyword Method for Second Language Learning</title>
-      <author><first>Gözde</first><last>Özbal</last></author>
+      <author id="gozde-ozbal"><first>Gözde</first><last>Özbal</last></author>
       <author><first>Daniele</first><last>Pighin</last></author>
       <author><first>Carlo</first><last>Strapparava</last></author>
       <pages>352–357</pages>
@@ -2408,7 +2408,7 @@
     <paper id="60">
       <title>Hippocratic Abbreviation Expansion</title>
       <author><first>Brian</first><last>Roark</last></author>
-      <author><first>Richard</first><last>Sproat</last></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last></author>
       <pages>364–369</pages>
       <url hash="07f14fd0">P14-2060</url>
       <doi>10.3115/v1/P14-2060</doi>
@@ -2418,10 +2418,10 @@
     </paper>
     <paper id="61">
       <title>Unsupervised Feature Learning for Visual Sign Language Identification</title>
-      <author><first>Binyam Gebrekidan</first><last>Gebre</last></author>
+      <author id="binyam-gebrekidan-gebre"><first>Binyam Gebrekidan</first><last>Gebre</last></author>
       <author><first>Onno</first><last>Crasborn</last></author>
-      <author><first>Peter</first><last>Wittenburg</last></author>
-      <author><first>Sebastian</first><last>Drude</last></author>
+      <author id="peter-wittenburg"><first>Peter</first><last>Wittenburg</last></author>
+      <author id="sebastian-drude"><first>Sebastian</first><last>Drude</last></author>
       <author><first>Tom</first><last>Heskes</last></author>
       <pages>370–376</pages>
       <url hash="af72e5c1">P14-2061</url>
@@ -2433,8 +2433,8 @@
     <paper id="62">
       <title>Experiments with crowdsourced re-annotation of a <fixed-case>POS</fixed-case> tagging data set</title>
       <author><first>Dirk</first><last>Hovy</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>377–382</pages>
       <url hash="b143e5d0">P14-2062</url>
       <doi>10.3115/v1/P14-2062</doi>
@@ -2445,7 +2445,7 @@
     <paper id="63">
       <title>Building Sentiment Lexicons for All Major Languages</title>
       <author><first>Yanqing</first><last>Chen</last></author>
-      <author><first>Steven</first><last>Skiena</last></author>
+      <author id="steven-skiena"><first>Steven</first><last>Skiena</last></author>
       <pages>383–389</pages>
       <url hash="54963f89">P14-2063</url>
       <doi>10.3115/v1/P14-2063</doi>
@@ -2467,8 +2467,8 @@
     <paper id="65">
       <title>The <fixed-case>V</fixed-case>erb<fixed-case>C</fixed-case>orner Project: Findings from Phase 1 of crowd-sourcing a semantic decomposition of verbs</title>
       <author><first>Joshua K.</first><last>Hartshorne</last></author>
-      <author><first>Claire</first><last>Bonial</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="claire-bonial"><first>Claire</first><last>Bonial</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>397–402</pages>
       <url hash="a1d5f927">P14-2065</url>
       <doi>10.3115/v1/P14-2065</doi>
@@ -2492,7 +2492,7 @@
     <paper id="67">
       <title>Determiner-Established Deixis to Communicative Artifacts in Pedagogical Text</title>
       <author><first>Shomir</first><last>Wilson</last></author>
-      <author><first>Jon</first><last>Oberlander</last></author>
+      <author id="jon-oberlander"><first>Jon</first><last>Oberlander</last></author>
       <pages>409–414</pages>
       <url hash="4b8d3df4">P14-2067</url>
       <doi>10.3115/v1/P14-2067</doi>
@@ -2545,7 +2545,7 @@
     <paper id="72">
       <title>Cross-cultural Deception Detection</title>
       <author><first>Verónica</first><last>Pérez-Rosas</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>440–445</pages>
       <url hash="a772ebd3">P14-2072</url>
       <doi>10.3115/v1/P14-2072</doi>
@@ -2604,7 +2604,7 @@
     </paper>
     <paper id="78">
       <title>Mutual Disambiguation for Entity Linking</title>
-      <author><first>Eric</first><last>Charton</last></author>
+      <author id="eric-charton"><first>Eric</first><last>Charton</last></author>
       <author><first>Marie-Jean</first><last>Meurs</last></author>
       <author><first>Ludovic</first><last>Jean-Louis</last></author>
       <author><first>Michel</first><last>Gagnon</last></author>
@@ -2626,7 +2626,7 @@
       <title>Learning Translational and Knowledge-based Similarities from Relevance Rankings for Cross-Language Retrieval</title>
       <author><first>Shigehiko</first><last>Schamoni</last></author>
       <author><first>Felix</first><last>Hieber</last></author>
-      <author><first>Artem</first><last>Sokolov</last></author>
+      <author id="artem-sokolov"><first>Artem</first><last>Sokolov</last></author>
       <author><first>Stefan</first><last>Riezler</last></author>
       <pages>488–494</pages>
       <url hash="36a4b019">P14-2080</url>
@@ -2637,7 +2637,7 @@
     <paper id="81">
       <title>Two-Stage Hashing for Fast Document Retrieval</title>
       <author><first>Hao</first><last>Li</last></author>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last></author>
+      <author><first>Wei</first><last>Liu</last></author>
       <author><first>Heng</first><last>Ji</last></author>
       <pages>495–500</pages>
       <url hash="0fa97fb1">P14-2081</url>
@@ -2648,7 +2648,7 @@
       <title>An Annotation Framework for Dense Event Ordering</title>
       <author><first>Taylor</first><last>Cassidy</last></author>
       <author><first>Bill</first><last>McDowell</last></author>
-      <author><first>Nathanael</first><last>Chambers</last></author>
+      <author id="nathanael-chambers"><first>Nathanael</first><last>Chambers</last></author>
       <author><first>Steven</first><last>Bethard</last></author>
       <pages>501–506</pages>
       <url hash="58d6e7d2">P14-2082</url>
@@ -2658,9 +2658,9 @@
     </paper>
     <paper id="83">
       <title>Linguistically debatable or just plain wrong?</title>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <author><first>Dirk</first><last>Hovy</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>507–511</pages>
       <url hash="b70b8f96">P14-2083</url>
       <doi>10.3115/v1/P14-2083</doi>
@@ -2669,7 +2669,7 @@
     </paper>
     <paper id="84">
       <title>Humans Require Context to Infer Ironic Intent (so Computers Probably do, too)</title>
-      <author><first>Byron C.</first><last>Wallace</last></author>
+      <author id="byron-c-wallace"><first>Byron C.</first><last>Wallace</last></author>
       <author><first>Do Kook</first><last>Choe</last></author>
       <author><first>Laura</first><last>Kertz</last></author>
       <author><first>Eugene</first><last>Charniak</last></author>
@@ -2692,7 +2692,7 @@
     <paper id="86">
       <title>Combining Word Patterns and Discourse Markers for Paradigmatic Relation Classification</title>
       <author><first>Michael</first><last>Roth</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>524–530</pages>
       <url hash="7652d492">P14-2086</url>
       <doi>10.3115/v1/P14-2086</doi>
@@ -2733,7 +2733,7 @@
       <author><first>Yusuke</first><last>Oda</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
       <author><first>Sakriani</first><last>Sakti</last></author>
-      <author><first>Tomoki</first><last>Toda</last></author>
+      <author id="tomoki-toda"><first>Tomoki</first><last>Toda</last></author>
       <author><first>Satoshi</first><last>Nakamura</last></author>
       <pages>551–556</pages>
       <url hash="b82bf4ca">P14-2090</url>
@@ -2742,7 +2742,7 @@
     </paper>
     <paper id="91">
       <title>A joint inference of deep case analysis and zero subject generation for <fixed-case>J</fixed-case>apanese-to-<fixed-case>E</fixed-case>nglish statistical machine translation</title>
-      <author><first>Taku</first><last>Kudo</last></author>
+      <author id="taku-kudo"><first>Taku</first><last>Kudo</last></author>
       <author><first>Hiroshi</first><last>Ichikawa</last></author>
       <author><first>Hideto</first><last>Kazawa</last></author>
       <pages>557–562</pages>
@@ -2767,7 +2767,7 @@
       <author><first>Yu</first><last>Hong</last></author>
       <author><first>Hao</first><last>Liu</last></author>
       <author><first>Xing</first><last>Wang</last></author>
-      <author><first>Jianmin</first><last>Yao</last></author>
+      <author id="jianmin-yao"><first>Jianmin</first><last>Yao</last></author>
       <pages>569–573</pages>
       <url hash="552db7e8">P14-2093</url>
       <doi>10.3115/v1/P14-2093</doi>
@@ -2777,7 +2777,7 @@
       <title>Refinements to Interactive Translation Prediction Based on Search Graphs</title>
       <author><first>Philipp</first><last>Koehn</last></author>
       <author><first>Chara</first><last>Tsoukala</last></author>
-      <author><first>Herve</first><last>Saint-Amand</last></author>
+      <author id="herve-saint-amand"><first>Herve</first><last>Saint-Amand</last></author>
       <pages>574–578</pages>
       <url hash="0114c288">P14-2094</url>
       <doi>10.3115/v1/P14-2094</doi>
@@ -2796,8 +2796,8 @@
     <paper id="96">
       <title>Cross-language and Cross-encyclopedia Article Linking Using Mixed-language Topic Model and Hypernym Translation</title>
       <author><first>Yu-Chun</first><last>Wang</last></author>
-      <author><first>Chun-Kai</first><last>Wu</last></author>
-      <author><first>Richard Tzong-Han</first><last>Tsai</last></author>
+      <author id="chun-kai-wu"><first>Chun-Kai</first><last>Wu</last></author>
+      <author id="richard-tzong-han-tsai"><first>Richard Tzong-Han</first><last>Tsai</last></author>
       <pages>586–591</pages>
       <url hash="47be98cd">P14-2096</url>
       <doi>10.3115/v1/P14-2096</doi>
@@ -2830,7 +2830,7 @@
       <author><first>Rohan</first><last>Ramanath</last></author>
       <author id="fei-liu-utdallas"><first>Fei</first><last>Liu</last></author>
       <author><first>Norman</first><last>Sadeh</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>605–610</pages>
       <url hash="d62a876f">P14-2099</url>
       <doi>10.3115/v1/P14-2099</doi>
@@ -2842,7 +2842,7 @@
       <author><first>Yu-Yang</first><last>Huang</last></author>
       <author><first>Rui</first><last>Yan</last></author>
       <author><first>Tsung-Ting</first><last>Kuo</last></author>
-      <author><first>Shou-De</first><last>Lin</last></author>
+      <author id="shou-de-lin"><first>Shou-De</first><last>Lin</last></author>
       <pages>611–617</pages>
       <url hash="463ffbef">P14-2100</url>
       <doi>10.3115/v1/P14-2100</doi>
@@ -2850,7 +2850,7 @@
     </paper>
     <paper id="101">
       <title>Automatic Labelling of Topic Models Learned from <fixed-case>T</fixed-case>witter by Summarisation</title>
-      <author><first>Amparo Elizabeth</first><last>Cano Basave</last></author>
+      <author id="amparo-elizabeth-cano-basave"><first>Amparo Elizabeth</first><last>Cano Basave</last></author>
       <author><first>Yulan</first><last>He</last></author>
       <author><first>Ruifeng</first><last>Xu</last></author>
       <pages>618–624</pages>
@@ -2863,7 +2863,7 @@
       <title>Stochastic Contextual Edit Distance and Probabilistic <fixed-case>FST</fixed-case>s</title>
       <author><first>Ryan</first><last>Cotterell</last></author>
       <author><first>Nanyun</first><last>Peng</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>625–630</pages>
       <url hash="2337835a">P14-2102</url>
       <doi>10.3115/v1/P14-2102</doi>
@@ -2893,9 +2893,9 @@
     </paper>
     <paper id="105">
       <title>Semantic Parsing for Single-Relation Question Answering</title>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <author><first>Xiaodong</first><last>He</last></author>
-      <author><first>Christopher</first><last>Meek</last></author>
+      <author id="christopher-meek"><first>Christopher</first><last>Meek</last></author>
       <pages>643–648</pages>
       <url hash="92870bc7">P14-2105</url>
       <doi>10.3115/v1/P14-2105</doi>
@@ -2906,10 +2906,10 @@
     <paper id="106">
       <title>On <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Semantic Classes and Dependency Parsing</title>
       <author><first>Kepa</first><last>Bengoetxea</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <author><first>Joakim</first><last>Nivre</last></author>
       <author><first>Yue</first><last>Zhang</last></author>
-      <author><first>Koldo</first><last>Gojenola</last></author>
+      <author id="koldo-gojenola"><first>Koldo</first><last>Gojenola</last></author>
       <pages>649–655</pages>
       <url hash="75ffb632">P14-2106</url>
       <doi>10.3115/v1/P14-2106</doi>
@@ -2928,8 +2928,8 @@
     <paper id="108">
       <title>The <fixed-case>P</fixed-case>enn Parsed Corpus of <fixed-case>M</fixed-case>odern <fixed-case>B</fixed-case>ritish <fixed-case>E</fixed-case>nglish: First Parsing Results and Analysis</title>
       <author><first>Seth</first><last>Kulick</last></author>
-      <author><first>Anthony</first><last>Kroch</last></author>
-      <author><first>Beatrice</first><last>Santorini</last></author>
+      <author id="anthony-kroch"><first>Anthony</first><last>Kroch</last></author>
+      <author id="beatrice-santorini"><first>Beatrice</first><last>Santorini</last></author>
       <pages>662–667</pages>
       <url hash="1c359cc5">P14-2108</url>
       <doi>10.3115/v1/P14-2108</doi>
@@ -2941,9 +2941,9 @@
       <author><first>Seth</first><last>Kulick</last></author>
       <author><first>Ann</first><last>Bies</last></author>
       <author><first>Justin</first><last>Mott</last></author>
-      <author><first>Anthony</first><last>Kroch</last></author>
-      <author><first>Beatrice</first><last>Santorini</last></author>
-      <author><first>Mark</first><last>Liberman</last></author>
+      <author id="anthony-kroch"><first>Anthony</first><last>Kroch</last></author>
+      <author id="beatrice-santorini"><first>Beatrice</first><last>Santorini</last></author>
+      <author id="mark-liberman"><first>Mark</first><last>Liberman</last></author>
       <pages>668–673</pages>
       <url hash="a8192cf4">P14-2109</url>
       <doi>10.3115/v1/P14-2109</doi>
@@ -2964,7 +2964,7 @@
     </paper>
     <paper id="111">
       <title>Normalizing tweets with edit scripts and recurrent neural embeddings</title>
-      <author><first>Grzegorz</first><last>Chrupała</last></author>
+      <author id="grzegorz-chrupala"><first>Grzegorz</first><last>Chrupała</last></author>
       <pages>680–686</pages>
       <url hash="e198d39a">P14-2111</url>
       <doi>10.3115/v1/P14-2111</doi>
@@ -2986,7 +2986,7 @@
     <paper id="113">
       <title>A Piece of My Mind: A Sentiment Analysis Approach for Online Dispute Detection</title>
       <author><first>Lu</first><last>Wang</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>693–699</pages>
       <url hash="52ee6161">P14-2113</url>
       <doi>10.3115/v1/P14-2113</doi>
@@ -2997,7 +2997,7 @@
     <paper id="114">
       <title>A Simple <fixed-case>B</fixed-case>ayesian Modelling Approach to Event Extraction from <fixed-case>T</fixed-case>witter</title>
       <author><first>Deyu</first><last>Zhou</last></author>
-      <author><first>Liangyu</first><last>Chen</last></author>
+      <author id="liang-yu-chen"><first>Liangyu</first><last>Chen</last></author>
       <author><first>Yulan</first><last>He</last></author>
       <pages>700–705</pages>
       <url hash="b6f8bac8">P14-2114</url>
@@ -3060,7 +3060,7 @@
       <author><first>Maria</first><last>Pershina</last></author>
       <author><first>Bonan</first><last>Min</last></author>
       <author><first>Wei</first><last>Xu</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <pages>732–738</pages>
       <url hash="cb6c4ce3">P14-2119</url>
       <doi>10.3115/v1/P14-2119</doi>
@@ -3096,7 +3096,7 @@
       <author><first>Xiaolin</first><last>Wang</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
       <author><first>Andrew</first><last>Finch</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>752–758</pages>
       <url hash="eb85d948">P14-2122</url>
       <doi>10.3115/v1/P14-2122</doi>
@@ -3107,7 +3107,7 @@
     <paper id="123">
       <title><fixed-case>EM</fixed-case> Decipherment for Large Vocabularies</title>
       <author><first>Malte</first><last>Nuhn</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>759–764</pages>
       <url hash="9792b2b3">P14-2123</url>
       <doi>10.3115/v1/P14-2123</doi>
@@ -3132,7 +3132,7 @@
       <author><first>Heba</first><last>Elfardy</last></author>
       <author><first>Linda</first><last>Alamir-Salloum</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>772–778</pages>
       <url hash="06aa93e3">P14-2125</url>
       <doi>10.3115/v1/P14-2125</doi>
@@ -3144,7 +3144,7 @@
       <author><first>Feifei</first><last>Zhai</last></author>
       <author><first>Jiajun</first><last>Zhang</last></author>
       <author><first>Yu</first><last>Zhou</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>779–784</pages>
       <url hash="2d15e4bf">P14-2126</url>
       <doi>10.3115/v1/P14-2126</doi>
@@ -3156,7 +3156,7 @@
       <author><first>Kai</first><last>Zhao</last></author>
       <author><first>Liang</first><last>Huang</last></author>
       <author><first>Haitao</first><last>Mi</last></author>
-      <author><first>Abe</first><last>Ittycheriah</last></author>
+      <author id="abe-ittycheriah"><first>Abe</first><last>Ittycheriah</last></author>
       <pages>785–790</pages>
       <url hash="6b4b936c">P14-2127</url>
       <doi>10.3115/v1/P14-2127</doi>
@@ -3234,8 +3234,8 @@
     <paper id="134">
       <title>Distributed Representations of Geographically Situated Language</title>
       <author><first>David</first><last>Bamman</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>828–834</pages>
       <url hash="f78339b7">P14-2134</url>
       <doi>10.3115/v1/P14-2134</doi>
@@ -3260,7 +3260,7 @@
       <title>Bilingual Event Extraction: a Case Study on Trigger Type Determination</title>
       <author><first>Zhu</first><last>Zhu</last></author>
       <author><first>Shoushan</first><last>Li</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <author><first>Rui</first><last>Xia</last></author>
       <pages>842–847</pages>
       <url hash="0bc03303">P14-2136</url>
@@ -3294,7 +3294,7 @@
       <title>Cross-lingual Opinion Analysis via Negative Transfer Detection</title>
       <author><first>Lin</first><last>Gui</last></author>
       <author><first>Ruifeng</first><last>Xu</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <author><first>Jun</first><last>Xu</last></author>
       <author><first>Jian</first><last>Xu</last></author>
       <author><first>Bin</first><last>Liu</last></author>
@@ -3328,7 +3328,7 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>B</fixed-case>ayesian Kernel Methods for Natural Language Processing</title>
-      <author><first>Daniel</first><last>Beck</last></author>
+      <author id="daniel-beck"><first>Daniel</first><last>Beck</last></author>
       <pages>1–9</pages>
       <url hash="5971a849">P14-3001</url>
       <doi>10.3115/v1/P14-3001</doi>
@@ -3355,7 +3355,7 @@
     </paper>
     <paper id="4">
       <title>Analyzing Positions and Topics in Political Discussions of the <fixed-case>G</fixed-case>erman Bundestag</title>
-      <author><first>Cäcilia</first><last>Zirn</last></author>
+      <author id="cacilia-zirn"><first>Cäcilia</first><last>Zirn</last></author>
       <pages>26–33</pages>
       <url hash="2e568f14">P14-3004</url>
       <doi>10.3115/v1/P14-3004</doi>
@@ -3365,7 +3365,7 @@
     <paper id="5">
       <title>A Mapping-Based Approach for General Formal Human Computer Interaction Using Natural Language</title>
       <author><first>Vincent</first><last>Letard</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <author><first>Gabriel</first><last>Illouz</last></author>
       <pages>34–40</pages>
       <url hash="c9253b2a">P14-3005</url>
@@ -3375,7 +3375,7 @@
     <paper id="6">
       <title>An Exploration of Embeddings for Generalized Phrases</title>
       <author><first>Wenpeng</first><last>Yin</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>41–47</pages>
       <url hash="4b96d523">P14-3006</url>
       <doi>10.3115/v1/P14-3006</doi>
@@ -3419,7 +3419,7 @@
     <paper id="11">
       <title>Open Information Extraction for <fixed-case>S</fixed-case>panish Language based on Syntactic Constraints</title>
       <author><first>Alisa</first><last>Zhila</last></author>
-      <author><first>Alexander</first><last>Gelbukh</last></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last></author>
       <pages>78–85</pages>
       <url hash="1fc07d0e">P14-3011</url>
       <doi>10.3115/v1/P14-3011</doi>
@@ -3447,7 +3447,7 @@
     <meta>
       <booktitle>Proceedings of 52nd Annual Meeting of the Association for Computational Linguistics: System Demonstrations</booktitle>
       <url hash="4e40d9d3">P14-5</url>
-      <editor><first>Kalina</first><last>Bontcheva</last></editor>
+      <editor id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></editor>
       <editor><last>Zhu</last><first>Jingbo</first></editor>
       <doi>10.3115/v1/P14-5</doi>
       <publisher>Association for Computational Linguistics</publisher>
@@ -3462,11 +3462,11 @@
     </frontmatter>
     <paper id="1">
       <title>Cross-Lingual Information to the Rescue in Keyword Extraction</title>
-      <author><first>Chung-Chi</first><last>Huang</last></author>
+      <author id="chung-chi-huang"><first>Chung-Chi</first><last>Huang</last></author>
       <author><first>Maxine</first><last>Eskenazi</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
       <author><first>Lun-Wei</first><last>Ku</last></author>
-      <author><first>Ping-Che</first><last>Yang</last></author>
+      <author id="ping-che-yang"><first>Ping-Che</first><last>Yang</last></author>
       <pages>1–6</pages>
       <url hash="0f29640b">P14-5001</url>
       <doi>10.3115/v1/P14-5001</doi>
@@ -3488,7 +3488,7 @@
       <title>Open-Source Tools for Morphology, Lemmatization, <fixed-case>POS</fixed-case> Tagging and Named Entity Recognition</title>
       <author><first>Jana</first><last>Straková</last></author>
       <author><first>Milan</first><last>Straka</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
       <pages>13–18</pages>
       <url hash="d4bc2897">P14-5003</url>
       <doi>10.3115/v1/P14-5003</doi>
@@ -3497,7 +3497,7 @@
     <paper id="4">
       <title>Community Evaluation and Exchange of Word Vectors at wordvectors.org</title>
       <author><first>Manaal</first><last>Faruqui</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <pages>19–24</pages>
       <url hash="ae802948">P14-5004</url>
       <doi>10.3115/v1/P14-5004</doi>
@@ -3508,7 +3508,7 @@
       <author><first>Xianjun</first><last>Dai</last></author>
       <author><first>Yuanchao</first><last>Liu</last></author>
       <author><first>Xiaolong</first><last>Wang</last></author>
-      <author><first>Bingquan</first><last>Liu</last></author>
+      <author id="bingquan-liu"><first>Bingquan</first><last>Liu</last></author>
       <pages>25–30</pages>
       <url hash="1facb1fb">P14-5005</url>
       <doi>10.3115/v1/P14-5005</doi>
@@ -3538,7 +3538,7 @@
       <author><first>Iadh</first><last>Ounis</last></author>
       <author><first>Yulan</first><last>He</last></author>
       <author><first>Tom</first><last>Jackson</last></author>
-      <author><first>Fabio</first><last>Ciravegna</last></author>
+      <author id="fabio-ciravegna"><first>Fabio</first><last>Ciravegna</last></author>
       <author><first>Ann</first><last>O’Brien</last></author>
       <pages>37–42</pages>
       <url hash="f54f905f">P14-5007</url>
@@ -3547,13 +3547,13 @@
     </paper>
     <paper id="8">
       <title>The Excitement Open Platform for Textual Inferences</title>
-      <author><first>Bernardo</first><last>Magnini</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
       <author><first>Roberto</first><last>Zanoli</last></author>
       <author><first>Ido</first><last>Dagan</last></author>
       <author><first>Kathrin</first><last>Eichler</last></author>
-      <author><first>Guenter</first><last>Neumann</last></author>
+      <author id="gunter-neumann"><first>Guenter</first><last>Neumann</last></author>
       <author><first>Tae-Gil</first><last>Noh</last></author>
-      <author><first>Sebastian</first><last>Pado</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Pado</last></author>
       <author><first>Asher</first><last>Stern</last></author>
       <author><first>Omer</first><last>Levy</last></author>
       <pages>43–48</pages>
@@ -3566,8 +3566,8 @@
       <author><first>Morgan</first><last>Ulinski</last></author>
       <author><first>Anusha</first><last>Balakrishnan</last></author>
       <author><first>Bob</first><last>Coyne</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>49–54</pages>
       <url hash="1aadc27b">P14-5009</url>
       <doi>10.3115/v1/P14-5009</doi>
@@ -3575,10 +3575,10 @@
     </paper>
     <paper id="10">
       <title>The <fixed-case>S</fixed-case>tanford <fixed-case>C</fixed-case>ore<fixed-case>NLP</fixed-case> Natural Language Processing Toolkit</title>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <author><first>Mihai</first><last>Surdeanu</last></author>
       <author><first>John</first><last>Bauer</last></author>
-      <author><first>Jenny</first><last>Finkel</last></author>
+      <author id="jenny-rose-finkel"><first>Jenny</first><last>Finkel</last></author>
       <author><first>Steven</first><last>Bethard</last></author>
       <author><first>David</first><last>McClosky</last></author>
       <pages>55–60</pages>
@@ -3610,7 +3610,7 @@
     <paper id="13">
       <title>A Rule-Augmented Statistical Phrase-based Translation System</title>
       <author><first>Cong Duy Vu</first><last>Hoang</last></author>
-      <author><first>AiTi</first><last>Aw</last></author>
+      <author id="aiti-aw"><first>AiTi</first><last>Aw</last></author>
       <author><first>Nhung T. H.</first><last>Nguyen</last></author>
       <pages>73–78</pages>
       <url hash="14e77e17">P14-5013</url>
@@ -3620,7 +3620,7 @@
     <paper id="14">
       <title><fixed-case>K</fixed-case>yoto<fixed-case>EBMT</fixed-case>: An Example-Based Dependency-to-Dependency Translation Framework</title>
       <author><first>John</first><last>Richardson</last></author>
-      <author><first>Fabien</first><last>Cromières</last></author>
+      <author id="fabien-cromieres"><first>Fabien</first><last>Cromières</last></author>
       <author><first>Toshiaki</first><last>Nakazawa</last></author>
       <author><first>Sadao</first><last>Kurohashi</last></author>
       <pages>79–84</pages>
@@ -3643,7 +3643,7 @@
     <paper id="16">
       <title>Automatic Annotation Suggestions and Custom Annotation Layers in <fixed-case>W</fixed-case>eb<fixed-case>A</fixed-case>nno</title>
       <author><first>Seid Muhie</first><last>Yimam</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <author><first>Richard</first><last>Eckart de Castilho</last></author>
       <author><first>Iryna</first><last>Gurevych</last></author>
       <pages>91–96</pages>
@@ -3656,7 +3656,7 @@
       <author><first>Binyang</first><last>Li</last></author>
       <author><first>Lanjun</first><last>Zhou</last></author>
       <author><first>Zhongyu</first><last>Wei</last></author>
-      <author><first>Kam-fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-fai</first><last>Wong</last></author>
       <author><first>Ruifeng</first><last>Xu</last></author>
       <author><first>Yunqing</first><last>Xia</last></author>
       <pages>97–102</pages>
@@ -3700,8 +3700,8 @@
       <title>Simplified Dependency Annotations with <fixed-case>GFL</fixed-case>-<fixed-case>W</fixed-case>eb</title>
       <author><first>Michael T.</first><last>Mordowanec</last></author>
       <author><first>Nathan</first><last>Schneider</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>121–126</pages>
       <url hash="a8485ca5">P14-5021</url>
       <doi>10.3115/v1/P14-5021</doi>
@@ -3712,7 +3712,7 @@
     <meta>
       <booktitle>Proceedings of the 52nd Annual Meeting of the Association for Computational Linguistics: Tutorials</booktitle>
       <url hash="35a6ee1d">P14-6</url>
-      <editor><first>Alex</first><last>Fraser</last></editor>
+      <editor id="alexander-fraser"><first>Alex</first><last>Fraser</last></editor>
       <editor id="yang-liu-ict"><first>Yang</first><last>Liu</last></editor>
       <doi>10.3115/v1/P14-6</doi>
       <publisher>Association for Computational Linguistics</publisher>
@@ -3727,8 +3727,8 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>G</fixed-case>aussian Processes for Natural Language Processing</title>
-      <author><first>Trevor</first><last>Cohn</last></author>
-      <author><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
+      <author id="daniel-preotiuc-pietro"><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
       <author><first>Neil</first><last>Lawrence</last></author>
       <pages>1–3</pages>
       <url hash="4c03ed37">P14-6001</url>
@@ -3767,10 +3767,10 @@
     </paper>
     <paper id="5">
       <title>New Directions in Vector Space Models of Meaning</title>
-      <author><first>Edward</first><last>Grefenstette</last></author>
+      <author id="edward-grefenstette"><first>Edward</first><last>Grefenstette</last></author>
       <author><first>Karl Moritz</first><last>Hermann</last></author>
-      <author><first>Georgiana</first><last>Dinu</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
+      <author id="georgiana-dinu"><first>Georgiana</first><last>Dinu</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
       <pages>8</pages>
       <url hash="ed1069b4">P14-6005</url>
       <doi>10.3115/v1/P14-6005</doi>
@@ -3780,8 +3780,8 @@
     </paper>
     <paper id="6">
       <title>Structured Belief Propagation for <fixed-case>NLP</fixed-case></title>
-      <author><first>Matthew</first><last>Gormley</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="matthew-r-gormley"><first>Matthew</first><last>Gormley</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>9–10</pages>
       <url hash="183a563d">P14-6006</url>
       <doi>10.3115/v1/P14-6006</doi>
@@ -3789,7 +3789,7 @@
     </paper>
     <paper id="7">
       <title>Semantics, Discourse and Statistical Machine Translation</title>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Min</first><last>Zhang</last></author>
       <pages>11–12</pages>
       <url hash="dd95e92d">P14-6007</url>
diff --git a/data/xml/P15.xml b/data/xml/P15.xml
index 0494fd799f..b33543bf5c 100644
--- a/data/xml/P15.xml
+++ b/data/xml/P15.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the 53rd Annual Meeting of the Association for Computational Linguistics and the 7th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)</booktitle>
       <url hash="4274a9bc">P15-1</url>
-      <editor><first>Chengqing</first><last>Zong</last></editor>
+      <editor id="chengqing-zong"><first>Chengqing</first><last>Zong</last></editor>
       <editor><first>Michael</first><last>Strube</last></editor>
       <doi>10.3115/v1/P15-1</doi>
       <publisher>Association for Computational Linguistics</publisher>
@@ -31,9 +31,9 @@
     </paper>
     <paper id="2">
       <title>Addressing the Rare Word Problem in Neural Machine Translation</title>
-      <author><first>Thang</first><last>Luong</last></author>
+      <author id="minh-thang-luong"><first>Thang</first><last>Luong</last></author>
       <author><first>Ilya</first><last>Sutskever</last></author>
-      <author><first>Quoc</first><last>Le</last></author>
+      <author id="quoc-le"><first>Quoc</first><last>Le</last></author>
       <author><first>Oriol</first><last>Vinyals</last></author>
       <author><first>Wojciech</first><last>Zaremba</last></author>
       <pages>11–19</pages>
@@ -61,8 +61,8 @@
       <author><first>Jacob</first><last>Devlin</last></author>
       <author><first>Thomas</first><last>Lamar</last></author>
       <author><first>Rabih</first><last>Zbib</last></author>
-      <author><first>Richard</first><last>Schwartz</last></author>
-      <author><first>John</first><last>Makhoul</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
+      <author id="john-makhoul"><first>John</first><last>Makhoul</last></author>
       <pages>31–41</pages>
       <url hash="8b9c162d">P15-1004</url>
       <doi>10.3115/v1/P15-1004</doi>
@@ -79,11 +79,11 @@
     </paper>
     <paper id="6">
       <title>Text to 3<fixed-case>D</fixed-case> Scene Generation with Rich Lexical Grounding</title>
-      <author><first>Angel</first><last>Chang</last></author>
+      <author id="angel-chang"><first>Angel</first><last>Chang</last></author>
       <author><first>Will</first><last>Monroe</last></author>
       <author><first>Manolis</first><last>Savva</last></author>
       <author><first>Christopher</first><last>Potts</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>53–62</pages>
       <url hash="9fec7219">P15-1006</url>
       <doi>10.3115/v1/P15-1006</doi>
@@ -92,7 +92,7 @@
     <paper id="7">
       <title><fixed-case>M</fixed-case>ulti<fixed-case>G</fixed-case>ran<fixed-case>CNN</fixed-case>: An Architecture for General Matching of Text Chunks on Multiple Levels of Granularity</title>
       <author><first>Wenpeng</first><last>Yin</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>63–73</pages>
       <url hash="a1eed73e">P15-1007</url>
       <doi>10.3115/v1/P15-1007</doi>
@@ -101,7 +101,7 @@
     <paper id="8">
       <title>Weakly Supervised Models of Aspect-Sentiment for Online Course Discussion Forums</title>
       <author><first>Arti</first><last>Ramesh</last></author>
-      <author><first>Shachi</first><last>H. Kumar</last></author>
+      <author id="shachi-h-kumar"><first>Shachi</first><last>H. Kumar</last></author>
       <author><first>James</first><last>Foulds</last></author>
       <author><first>Lise</first><last>Getoor</last></author>
       <pages>74–83</pages>
@@ -152,7 +152,7 @@
       <author><first>James</first><last>Foulds</last></author>
       <author><first>Bert</first><last>Huang</last></author>
       <author><first>Lise</first><last>Getoor</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <pages>116–125</pages>
       <url hash="cab94f05">P15-1012</url>
       <doi>10.3115/v1/P15-1012</doi>
@@ -174,7 +174,7 @@
       <author><first>Jiafeng</first><last>Guo</last></author>
       <author><first>Yanyan</first><last>Lan</last></author>
       <author><first>Jun</first><last>Xu</last></author>
-      <author><first>Xueqi</first><last>Cheng</last></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last></author>
       <pages>136–145</pages>
       <url hash="e1cacdcc">P15-1014</url>
       <doi>10.3115/v1/P15-1014</doi>
@@ -219,7 +219,7 @@
       <author><first>Vidhoon</first><last>Viswanathan</last></author>
       <author><first>Nazneen Fatema</first><last>Rajani</last></author>
       <author><first>Yinon</first><last>Bentor</last></author>
-      <author><first>Raymond</first><last>Mooney</last></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last></author>
       <pages>177–187</pages>
       <url hash="ade5d91c">P15-1018</url>
       <doi>10.3115/v1/P15-1018</doi>
@@ -241,7 +241,7 @@
       <author><first>Yusuke</first><last>Oda</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
       <author><first>Sakriani</first><last>Sakti</last></author>
-      <author><first>Tomoki</first><last>Toda</last></author>
+      <author id="tomoki-toda"><first>Tomoki</first><last>Toda</last></author>
       <author><first>Satoshi</first><last>Nakamura</last></author>
       <pages>198–207</pages>
       <url hash="1b937999">P15-1020</url>
@@ -258,8 +258,8 @@
     </paper>
     <paper id="22">
       <title>Online Multitask Learning for Machine Translation Quality Estimation</title>
-      <author><first>José G.</first><last>C. de Souza</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="jose-g-c-de-souza"><first>José G.</first><last>C. de Souza</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Elisa</first><last>Ricci</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <pages>219–228</pages>
@@ -270,7 +270,7 @@
     <paper id="23">
       <title>A Context-Aware Topic Model for Statistical Machine Translation</title>
       <author><first>Jinsong</first><last>Su</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author id="yang-liu-ict"><first>Yang</first><last>Liu</last></author>
       <author><first>Xianpei</first><last>Han</last></author>
       <author><first>Hongyu</first><last>Lin</last></author>
@@ -285,7 +285,7 @@
       <title>Learning Answer-Entailing Structures for Machine Comprehension</title>
       <author><first>Mrinmaya</first><last>Sachan</last></author>
       <author><first>Kumar</first><last>Dubey</last></author>
-      <author><first>Eric</first><last>Xing</last></author>
+      <author id="eric-xing"><first>Eric</first><last>Xing</last></author>
       <author><first>Matthew</first><last>Richardson</last></author>
       <pages>239–249</pages>
       <url hash="d6002e05">P15-1024</url>
@@ -318,8 +318,8 @@
     <paper id="27">
       <title>Hubness and Pollution: Delving into Cross-Space Mapping for Zero-Shot Learning</title>
       <author><first>Angeliki</first><last>Lazaridou</last></author>
-      <author><first>Georgiana</first><last>Dinu</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="georgiana-dinu"><first>Georgiana</first><last>Dinu</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <pages>270–280</pages>
       <url hash="ae8e7386">P15-1027</url>
       <doi>10.3115/v1/P15-1027</doi>
@@ -328,8 +328,8 @@
     <paper id="28">
       <title>A Generalisation of Lexical Functions for Composition in Distributional Semantics</title>
       <author><first>Antoine</first><last>Bride</last></author>
-      <author><first>Tim</first><last>Van de Cruys</last></author>
-      <author><first>Nicholas</first><last>Asher</last></author>
+      <author id="tim-van-de-cruys"><first>Tim</first><last>Van de Cruys</last></author>
+      <author id="nicholas-asher"><first>Nicholas</first><last>Asher</last></author>
       <pages>281–291</pages>
       <url hash="58f99f2c">P15-1028</url>
       <doi>10.3115/v1/P15-1028</doi>
@@ -338,7 +338,7 @@
     </paper>
     <paper id="29">
       <title>Simple Learning and Compositional Application of Perceptually Grounded Word Meanings for Incremental Reference Resolution</title>
-      <author><first>Casey</first><last>Kennington</last></author>
+      <author id="casey-kennington"><first>Casey</first><last>Kennington</last></author>
       <author><first>David</first><last>Schlangen</last></author>
       <pages>292–301</pages>
       <url hash="bedb8015">P15-1029</url>
@@ -358,7 +358,7 @@
       <title>An Effective Neural Network Model for Graph-based Dependency Parsing</title>
       <author><first>Wenzhe</first><last>Pei</last></author>
       <author><first>Tao</first><last>Ge</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <pages>313–322</pages>
       <url hash="1df02ead">P15-1031</url>
       <doi>10.3115/v1/P15-1031</doi>
@@ -368,7 +368,7 @@
       <title>Structured Training for Neural Network Transition-Based Parsing</title>
       <author><first>David</first><last>Weiss</last></author>
       <author><first>Chris</first><last>Alberti</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <author><first>Slav</first><last>Petrov</last></author>
       <pages>323–333</pages>
       <url hash="78cc6e09">P15-1032</url>
@@ -378,11 +378,11 @@
     </paper>
     <paper id="33">
       <title>Transition-Based Dependency Parsing with Stack Long Short-Term Memory</title>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <author><first>Miguel</first><last>Ballesteros</last></author>
       <author><first>Wang</first><last>Ling</last></author>
       <author><first>Austin</first><last>Matthews</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>334–343</pages>
       <url hash="eb2af94d">P15-1033</url>
       <doi>10.3115/v1/P15-1033</doi>
@@ -392,7 +392,7 @@
       <title>Leveraging Linguistic Structure For Open Domain Information Extraction</title>
       <author><first>Gabor</first><last>Angeli</last></author>
       <author><first>Melvin Jose</first><last>Johnson Premkumar</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>344–354</pages>
       <url hash="ea44fd8e">P15-1034</url>
       <doi>10.3115/v1/P15-1034</doi>
@@ -401,7 +401,7 @@
     <paper id="35">
       <title>Joint Information Extraction and Reasoning: A Scalable Statistical Relational Learning Approach</title>
       <author><first>William Yang</first><last>Wang</last></author>
-      <author><first>William W.</first><last>Cohen</last></author>
+      <author id="william-cohen"><first>William W.</first><last>Cohen</last></author>
       <pages>355–364</pages>
       <url hash="a037826b">P15-1035</url>
       <doi>10.3115/v1/P15-1035</doi>
@@ -409,8 +409,8 @@
     </paper>
     <paper id="36">
       <title>A Knowledge-Intensive Model for Prepositional Phrase Attachment</title>
-      <author><first>Ndapandula</first><last>Nakashole</last></author>
-      <author><first>Tom M.</first><last>Mitchell</last></author>
+      <author id="ndapandula-nakashole"><first>Ndapandula</first><last>Nakashole</last></author>
+      <author id="tom-mitchell"><first>Tom M.</first><last>Mitchell</last></author>
       <pages>365–375</pages>
       <url hash="86cc4c48">P15-1036</url>
       <doi>10.3115/v1/P15-1036</doi>
@@ -428,9 +428,9 @@
     </paper>
     <paper id="38">
       <title>It Depends: Dependency Parser Comparison Using A Web-based Evaluation Tool</title>
-      <author><first>Jinho D.</first><last>Choi</last></author>
-      <author><first>Joel</first><last>Tetreault</last></author>
-      <author><first>Amanda</first><last>Stent</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
+      <author id="amanda-stent"><first>Amanda</first><last>Stent</last></author>
       <pages>387–396</pages>
       <url hash="4f6d3e96">P15-1038</url>
       <doi>10.3115/v1/P15-1038</doi>
@@ -455,7 +455,7 @@
       <author><first>Cláudia</first><last>Pinto</last></author>
       <author><first>Helena</first><last>Figueira</last></author>
       <author><first>Pedro</first><last>Mendes</last></author>
-      <author><first>André F. T.</first><last>Martins</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
       <pages>408–418</pages>
       <url hash="00c45047">P15-1040</url>
       <doi>10.3115/v1/P15-1040</doi>
@@ -476,10 +476,10 @@
     </paper>
     <paper id="42">
       <title>Learning Bilingual Sentiment Word Embeddings for Cross-language Sentiment Classification</title>
-      <author><first>HuiWei</first><last>Zhou</last></author>
+      <author id="huiwei-zhou"><first>HuiWei</first><last>Zhou</last></author>
       <author><first>Long</first><last>Chen</last></author>
       <author><first>Fulin</first><last>Shi</last></author>
-      <author><first>Degen</first><last>Huang</last></author>
+      <author id="degen-huang"><first>Degen</first><last>Huang</last></author>
       <pages>430–440</pages>
       <url hash="722c82f1">P15-1042</url>
       <doi>10.3115/v1/P15-1042</doi>
@@ -491,7 +491,7 @@
       <author><first>Catherine</first><last>Finegan-Dollak</last></author>
       <author><first>Ben</first><last>King</last></author>
       <author><first>Reed</first><last>Coke</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <pages>441–450</pages>
       <url hash="5c3879d2">P15-1043</url>
       <doi>10.3115/v1/P15-1043</doi>
@@ -500,7 +500,7 @@
     <paper id="44">
       <title>Training a Natural Language Generator From Unaligned Data</title>
       <author><first>Ondřej</first><last>Dušek</last></author>
-      <author><first>Filip</first><last>Jurčíček</last></author>
+      <author id="filip-jurcicek"><first>Filip</first><last>Jurčíček</last></author>
       <pages>451–461</pages>
       <url hash="72014180">P15-1044</url>
       <doi>10.3115/v1/P15-1044</doi>
@@ -511,7 +511,7 @@
       <author><first>Rui</first><last>Sun</last></author>
       <author><first>Yue</first><last>Zhang</last></author>
       <author><first>Meishan</first><last>Zhang</last></author>
-      <author><first>Donghong</first><last>Ji</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
       <pages>462–472</pages>
       <url hash="cf92afeb">P15-1045</url>
       <doi>10.3115/v1/P15-1045</doi>
@@ -521,7 +521,7 @@
       <title>New Transfer Learning Techniques for Disparate Label Sets</title>
       <author><first>Young-Bum</first><last>Kim</last></author>
       <author><first>Karl</first><last>Stratos</last></author>
-      <author><first>Ruhi</first><last>Sarikaya</last></author>
+      <author id="ruhi-sarikaya"><first>Ruhi</first><last>Sarikaya</last></author>
       <author><first>Minwoo</first><last>Jeong</last></author>
       <pages>473–482</pages>
       <url hash="c1d63801">P15-1046</url>
@@ -533,7 +533,7 @@
       <author><first>Yun-Nung</first><last>Chen</last></author>
       <author><first>William Yang</first><last>Wang</last></author>
       <author><first>Anatole</first><last>Gershman</last></author>
-      <author><first>Alexander</first><last>Rudnicky</last></author>
+      <author id="alexander-rudnicky"><first>Alexander</first><last>Rudnicky</last></author>
       <pages>483–494</pages>
       <url hash="8f09c619">P15-1047</url>
       <doi>10.3115/v1/P15-1047</doi>
@@ -545,7 +545,7 @@
       <author><first>Shuangzhi</first><last>Wu</last></author>
       <author><first>Dongdong</first><last>Zhang</last></author>
       <author><first>Ming</first><last>Zhou</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <pages>495–503</pages>
       <url hash="aaf4eecc">P15-1048</url>
       <doi>10.3115/v1/P15-1048</doi>
@@ -607,7 +607,7 @@
       <author><first>Ramakrishna</first><last>Bairi</last></author>
       <author><first>Rishabh</first><last>Iyer</last></author>
       <author><first>Ganesh</first><last>Ramakrishnan</last></author>
-      <author><first>Jeff</first><last>Bilmes</last></author>
+      <author id="jeff-bilmes"><first>Jeff</first><last>Bilmes</last></author>
       <pages>553–563</pages>
       <url hash="5724cce1">P15-1054</url>
       <doi>10.3115/v1/P15-1054</doi>
@@ -618,7 +618,7 @@
       <author><first>Nikos</first><last>Voskarides</last></author>
       <author><first>Edgar</first><last>Meij</last></author>
       <author><first>Manos</first><last>Tsagkias</last></author>
-      <author><first>Maarten</first><last>de Rijke</last></author>
+      <author id="maarten-de-rijke"><first>Maarten</first><last>de Rijke</last></author>
       <author><first>Wouter</first><last>Weerkamp</last></author>
       <pages>564–574</pages>
       <url hash="ada8520f">P15-1055</url>
@@ -631,7 +631,7 @@
       <author><first>Wenzhe</first><last>Pei</last></author>
       <author><first>Heng</first><last>Ji</last></author>
       <author><first>Sujian</first><last>Li</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <author><first>Zhifang</first><last>Sui</last></author>
       <pages>575–585</pages>
       <url hash="32d469b6">P15-1056</url>
@@ -645,7 +645,7 @@
       <author><first>Hongzhao</first><last>Huang</last></author>
       <author><first>Xiaoman</first><last>Pan</last></author>
       <author><first>Sujian</first><last>Li</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <author><first>Heng</first><last>Ji</last></author>
       <author><first>Kevin</first><last>Knight</last></author>
       <author><first>Zhen</first><last>Wen</last></author>
@@ -693,7 +693,7 @@
     </paper>
     <paper id="61">
       <title>Classifying Relations by Ranking with Convolutional Neural Networks</title>
-      <author><first>Cícero</first><last>dos Santos</last></author>
+      <author id="cicero-dos-santos"><first>Cícero</first><last>dos Santos</last></author>
       <author><first>Bing</first><last>Xiang</last></author>
       <author><first>Bowen</first><last>Zhou</last></author>
       <pages>626–634</pages>
@@ -704,8 +704,8 @@
     <paper id="62">
       <title>Semantic Representations for Domain Adaptation: A Case Study on the Tree Kernel-based Method for Relation Extraction</title>
       <author><first>Thien Huu</first><last>Nguyen</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <pages>635–644</pages>
       <url hash="ce5f47cd">P15-1062</url>
       <doi>10.3115/v1/P15-1062</doi>
@@ -725,8 +725,8 @@
     <paper id="64">
       <title>Negation and Speculation Identification in <fixed-case>C</fixed-case>hinese Language</title>
       <author><first>Bowei</first><last>Zou</last></author>
-      <author><first>Qiaoming</first><last>Zhu</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>656–665</pages>
       <url hash="aa683b30">P15-1064</url>
       <doi>10.3115/v1/P15-1064</doi>
@@ -736,7 +736,7 @@
       <title>Learning Relational Features with Backward Random Walks</title>
       <author><first>Ni</first><last>Lao</last></author>
       <author><first>Einat</first><last>Minkov</last></author>
-      <author><first>William</first><last>Cohen</last></author>
+      <author id="william-cohen"><first>William</first><last>Cohen</last></author>
       <pages>666–675</pages>
       <url hash="2cc25392">P15-1065</url>
       <doi>10.3115/v1/P15-1065</doi>
@@ -777,7 +777,7 @@
     </paper>
     <paper id="69">
       <title>Knowledge Portability with Semantic Expansion of Ontology Labels</title>
-      <author><first>Mihael</first><last>Arcan</last></author>
+      <author id="mihael-arcan"><first>Mihael</first><last>Arcan</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <author><first>Paul</first><last>Buitelaar</last></author>
       <pages>708–718</pages>
@@ -806,7 +806,7 @@
     </paper>
     <paper id="72">
       <title>A Unified Multilingual Semantic Representation of Concepts</title>
-      <author><first>José</first><last>Camacho-Collados</last></author>
+      <author id="jose-camacho-collados"><first>José</first><last>Camacho-Collados</last></author>
       <author><first>Mohammad Taher</first><last>Pilehvar</last></author>
       <author><first>Roberto</first><last>Navigli</last></author>
       <pages>741–751</pages>
@@ -825,7 +825,7 @@
     </paper>
     <paper id="74">
       <title>Vector-space calculation of semantic surprisal for predicting word pronunciation duration</title>
-      <author><first>Asad</first><last>Sayeed</last></author>
+      <author id="asad-sayeed"><first>Asad</first><last>Sayeed</last></author>
       <author><first>Stefan</first><last>Fischer</last></author>
       <author><first>Vera</first><last>Demberg</last></author>
       <pages>763–773</pages>
@@ -857,7 +857,7 @@
       <title><fixed-case>G</fixed-case>aussian <fixed-case>LDA</fixed-case> for Topic Models with Word Embeddings</title>
       <author><first>Rajarshi</first><last>Das</last></author>
       <author><first>Manzil</first><last>Zaheer</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <pages>795–804</pages>
       <url hash="135ea052">P15-1077</url>
       <doi>10.3115/v1/P15-1077</doi>
@@ -865,10 +865,10 @@
     </paper>
     <paper id="78">
       <title>Pairwise Neural Machine Translation Evaluation</title>
-      <author><first>Francisco</first><last>Guzmán</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzmán</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>805–814</pages>
       <url hash="ce9ceb14">P15-1078</url>
       <doi>10.3115/v1/P15-1078</doi>
@@ -888,8 +888,8 @@
       <title>Non-linear Learning for Statistical Machine Translation</title>
       <author><first>Shujian</first><last>Huang</last></author>
       <author><first>Huadong</first><last>Chen</last></author>
-      <author><first>Xin-Yu</first><last>Dai</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
+      <author id="xinyu-dai"><first>Xin-Yu</first><last>Dai</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
       <pages>825–835</pages>
       <url hash="85865c01">P15-1080</url>
       <doi>10.3115/v1/P15-1080</doi>
@@ -900,7 +900,7 @@
       <author><first>Qing</first><last>Dou</last></author>
       <author><first>Ashish</first><last>Vaswani</last></author>
       <author><first>Kevin</first><last>Knight</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <pages>836–845</pages>
       <url hash="5098f5c1">P15-1081</url>
       <doi>10.3115/v1/P15-1081</doi>
@@ -909,7 +909,7 @@
     </paper>
     <paper id="82">
       <title>Non-projective Dependency-based Pre-Reordering with Recurrent Neural Network for Machine Translation</title>
-      <author><first>Antonio Valerio</first><last>Miceli-Barone</last></author>
+      <author id="antonio-valerio-miceli-barone"><first>Antonio Valerio</first><last>Miceli-Barone</last></author>
       <author><first>Giuseppe</first><last>Attardi</last></author>
       <pages>846–856</pages>
       <url hash="8356019b">P15-1082</url>
@@ -939,7 +939,7 @@
     <paper id="85">
       <title>Language to Code: Learning Semantic Parsers for If-This-Then-That Recipes</title>
       <author><first>Chris</first><last>Quirk</last></author>
-      <author><first>Raymond</first><last>Mooney</last></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last></author>
       <author><first>Michel</first><last>Galley</last></author>
       <pages>878–888</pages>
       <url hash="1ab5dc8a">P15-1085</url>
@@ -958,7 +958,7 @@
     </paper>
     <paper id="87">
       <title>The <fixed-case>NL</fixed-case>2<fixed-case>KR</fixed-case> Platform for building Natural Language Translation Systems</title>
-      <author><first>Nguyen</first><last>Vo</last></author>
+      <author id="nguyen-vo"><first>Nguyen</first><last>Vo</last></author>
       <author><first>Arindam</first><last>Mitra</last></author>
       <author><first>Chitta</first><last>Baral</last></author>
       <pages>899–908</pages>
@@ -1017,7 +1017,7 @@
       <author><first>Hiroki</first><last>Ouchi</last></author>
       <author><first>Hiroyuki</first><last>Shindo</last></author>
       <author><first>Kevin</first><last>Duh</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>961–970</pages>
       <url hash="5a257db4">P15-1093</url>
       <doi>10.3115/v1/P15-1093</doi>
@@ -1025,10 +1025,10 @@
     </paper>
     <paper id="94">
       <title>Jointly optimizing word representations for lexical and sentential tasks with the <fixed-case>C</fixed-case>-<fixed-case>PHRASE</fixed-case> model</title>
-      <author><first>Nghia The</first><last>Pham</last></author>
-      <author><first>Germán</first><last>Kruszewski</last></author>
+      <author id="nghia-the-pham"><first>Nghia The</first><last>Pham</last></author>
+      <author id="german-kruszewski"><first>Germán</first><last>Kruszewski</last></author>
       <author><first>Angeliki</first><last>Lazaridou</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <pages>971–981</pages>
       <url hash="5378b8df">P15-1094</url>
       <doi>10.3115/v1/P15-1094</doi>
@@ -1038,7 +1038,7 @@
       <title>Robust Subgraph Generation Improves <fixed-case>A</fixed-case>bstract <fixed-case>M</fixed-case>eaning <fixed-case>R</fixed-case>epresentation Parsing</title>
       <author><first>Keenon</first><last>Werling</last></author>
       <author><first>Gabor</first><last>Angeli</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>982–991</pages>
       <url hash="a9c394a1">P15-1095</url>
       <doi>10.3115/v1/P15-1095</doi>
@@ -1046,7 +1046,7 @@
     </paper>
     <paper id="96">
       <title>Environment-Driven Lexicon Induction for High-Level Instructions</title>
-      <author><first>Dipendra Kumar</first><last>Misra</last></author>
+      <author id="dipendra-misra"><first>Dipendra Kumar</first><last>Misra</last></author>
       <author><first>Kejia</first><last>Tao</last></author>
       <author><first>Percy</first><last>Liang</last></author>
       <author><first>Ashutosh</first><last>Saxena</last></author>
@@ -1089,7 +1089,7 @@
     </paper>
     <paper id="100">
       <title>Sparse, Contextually Informed Models for Irony Detection: Exploiting User Communities, Entities and Sentiment</title>
-      <author><first>Byron C.</first><last>Wallace</last></author>
+      <author id="byron-c-wallace"><first>Byron C.</first><last>Wallace</last></author>
       <author><first>Do Kook</first><last>Choe</last></author>
       <author><first>Eugene</first><last>Charniak</last></author>
       <pages>1035–1044</pages>
@@ -1102,7 +1102,7 @@
       <author><first>Shoushan</first><last>Li</last></author>
       <author><first>Lei</first><last>Huang</last></author>
       <author><first>Rong</first><last>Wang</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>1045–1053</pages>
       <url hash="74acbec8">P15-1101</url>
       <doi>10.3115/v1/P15-1101</doi>
@@ -1112,7 +1112,7 @@
       <title>Co-training for Semi-supervised Sentiment Classification Based on Dual-view Bags-of-words Representation</title>
       <author><first>Rui</first><last>Xia</last></author>
       <author><first>Cheng</first><last>Wang</last></author>
-      <author><first>Xin-Yu</first><last>Dai</last></author>
+      <author id="xinyu-dai"><first>Xin-Yu</first><last>Dai</last></author>
       <author><first>Tao</first><last>Li</last></author>
       <pages>1054–1063</pages>
       <url hash="069604c3">P15-1102</url>
@@ -1130,10 +1130,10 @@
     </paper>
     <paper id="104">
       <title>Learning Word Representations from Scarce and Noisy Data with Embedding Subspaces</title>
-      <author><first>Ramon</first><last>F. Astudillo</last></author>
+      <author id="ramon-fernandez-astudillo"><first>Ramon</first><last>F. Astudillo</last></author>
       <author><first>Silvio</first><last>Amir</last></author>
       <author><first>Wang</first><last>Ling</last></author>
-      <author><first>Mário</first><last>Silva</last></author>
+      <author id="mario-j-silva"><first>Mário</first><last>Silva</last></author>
       <author><first>Isabel</first><last>Trancoso</last></author>
       <pages>1074–1084</pages>
       <url hash="b25d9f31">P15-1104</url>
@@ -1153,7 +1153,7 @@
     <paper id="106">
       <title>Driving <fixed-case>ROVER</fixed-case> with Segment-based <fixed-case>ASR</fixed-case> Quality Estimation</title>
       <author><first>Shahab</first><last>Jalalvand</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Daniele</first><last>Falavigna</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <pages>1095–1105</pages>
@@ -1164,8 +1164,8 @@
     <paper id="107">
       <title>A Hierarchical Neural Autoencoder for Paragraphs and Documents</title>
       <author><first>Jiwei</first><last>Li</last></author>
-      <author><first>Thang</first><last>Luong</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="minh-thang-luong"><first>Thang</first><last>Luong</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <pages>1106–1115</pages>
       <url hash="bb1384fc">P15-1107</url>
       <doi>10.3115/v1/P15-1107</doi>
@@ -1175,8 +1175,8 @@
       <title>Joint Dependency Parsing and Multiword Expression Tokenization</title>
       <author><first>Alexis</first><last>Nasr</last></author>
       <author><first>Carlos</first><last>Ramisch</last></author>
-      <author><first>José</first><last>Deulofeu</last></author>
-      <author><first>André</first><last>Valli</last></author>
+      <author id="jose-deulofeu"><first>José</first><last>Deulofeu</last></author>
+      <author id="andre-valli"><first>André</first><last>Valli</last></author>
       <pages>1116–1126</pages>
       <url hash="fef98798">P15-1108</url>
       <doi>10.3115/v1/P15-1108</doi>
@@ -1206,7 +1206,7 @@
     <paper id="111">
       <title>Identifying Cascading Errors using Constraints in Dependency Parsing</title>
       <author><first>Dominick</first><last>Ng</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <pages>1148–1158</pages>
       <url hash="eab48e18">P15-1111</url>
       <doi>10.3115/v1/P15-1111</doi>
@@ -1217,7 +1217,7 @@
       <author><first>Chenxi</first><last>Zhu</last></author>
       <author><first>Xipeng</first><last>Qiu</last></author>
       <author><first>Xinchi</first><last>Chen</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>1159–1168</pages>
       <url hash="551cff3a">P15-1112</url>
       <doi>10.3115/v1/P15-1112</doi>
@@ -1226,7 +1226,7 @@
     <paper id="113">
       <title>Transition-based Neural Constituent Parsing</title>
       <author><first>Taro</first><last>Watanabe</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>1169–1179</pages>
       <url hash="7aa14d6a">P15-1113</url>
       <doi>10.3115/v1/P15-1113</doi>
@@ -1263,7 +1263,7 @@
       <author><first>Hao</first><last>Zhou</last></author>
       <author><first>Yue</first><last>Zhang</last></author>
       <author><first>Shujian</first><last>Huang</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
       <pages>1213–1222</pages>
       <url hash="5880cdc1">P15-1117</url>
       <doi>10.3115/v1/P15-1117</doi>
@@ -1292,7 +1292,7 @@
     </paper>
     <paper id="120">
       <title>Can Natural Language Processing Become Natural Language Coaching?</title>
-      <author><first>Marti A.</first><last>Hearst</last></author>
+      <author id="marti-a-hearst"><first>Marti A.</first><last>Hearst</last></author>
       <pages>1245–1252</pages>
       <url hash="8d045719">P15-1120</url>
       <doi>10.3115/v1/P15-1120</doi>
@@ -1330,7 +1330,7 @@
     <paper id="124">
       <title>Model-based Word Embeddings from Decompositions of Count Matrices</title>
       <author><first>Karl</first><last>Stratos</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <author><first>Daniel</first><last>Hsu</last></author>
       <pages>1282–1291</pages>
       <url hash="cb8048c0">P15-1124</url>
@@ -1343,7 +1343,7 @@
       <author><first>Poyao</first><last>Huang</last></author>
       <author><first>Yuntian</first><last>Deng</last></author>
       <author><first>Yingkai</first><last>Gao</last></author>
-      <author><first>Eric</first><last>Xing</last></author>
+      <author id="eric-xing"><first>Eric</first><last>Xing</last></author>
       <pages>1292–1300</pages>
       <url hash="93585336">P15-1125</url>
       <doi>10.3115/v1/P15-1125</doi>
@@ -1353,7 +1353,7 @@
     <paper id="126">
       <title>Orthogonality of Syntax and Semantics within Distributional Spaces</title>
       <author><first>Jeff</first><last>Mitchell</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>1301–1310</pages>
       <url hash="877f8e95">P15-1126</url>
       <doi>10.3115/v1/P15-1126</doi>
@@ -1365,7 +1365,7 @@
       <title>Scalable Semantic Parsing with Partial Ontologies</title>
       <author><first>Eunsol</first><last>Choi</last></author>
       <author><first>Tom</first><last>Kwiatkowski</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>1311–1320</pages>
       <url hash="ab1cf7d0">P15-1127</url>
       <doi>10.3115/v1/P15-1127</doi>
@@ -1373,7 +1373,7 @@
     </paper>
     <paper id="128">
       <title>Semantic Parsing via Staged Query Graph Generation: Question Answering with Knowledge Base</title>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <author><first>Ming-Wei</first><last>Chang</last></author>
       <author><first>Xiaodong</first><last>He</last></author>
       <author><first>Jianfeng</first><last>Gao</last></author>
@@ -1398,7 +1398,7 @@
       <title>Predicting Polarities of Tweets by Composing Word Embeddings with Long Short-Term Memory</title>
       <author><first>Xin</first><last>Wang</last></author>
       <author><first>Yuanchao</first><last>Liu</last></author>
-      <author><first>Chengjie</first><last>Sun</last></author>
+      <author id="cheng-jie-sun"><first>Chengjie</first><last>Sun</last></author>
       <author><first>Baoxun</first><last>Wang</last></author>
       <author><first>Xiaolong</first><last>Wang</last></author>
       <pages>1343–1353</pages>
@@ -1420,7 +1420,7 @@
       <author><first>Qiao</first><last>Qian</last></author>
       <author><first>Bo</first><last>Tian</last></author>
       <author><first>Minlie</first><last>Huang</last></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <author><first>Xuan</first><last>Zhu</last></author>
       <author><first>Xiaoyan</first><last>Zhu</last></author>
       <pages>1365–1374</pages>
@@ -1431,8 +1431,8 @@
     </paper>
     <paper id="133">
       <title>A convex and feature-rich discriminative approach to dependency grammar induction</title>
-      <author><first>Édouard</first><last>Grave</last></author>
-      <author><first>Noémie</first><last>Elhadad</last></author>
+      <author id="edouard-grave"><first>Édouard</first><last>Grave</last></author>
+      <author id="noemie-elhadad"><first>Noémie</first><last>Elhadad</last></author>
       <pages>1375–1384</pages>
       <url hash="cfe0ab09">P15-1133</url>
       <doi>10.3115/v1/P15-1133</doi>
@@ -1460,7 +1460,7 @@
     <paper id="136">
       <title>Entity-Centric Coreference Resolution with Model Stacking</title>
       <author><first>Kevin</first><last>Clark</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>1405–1415</pages>
       <url hash="0683bdf6">P15-1136</url>
       <doi>10.3115/v1/P15-1136</doi>
@@ -1469,8 +1469,8 @@
     <paper id="137">
       <title>Learning Anaphoricity and Antecedent Ranking Features for Coreference Resolution</title>
       <author><first>Sam</first><last>Wiseman</last></author>
-      <author><first>Alexander M.</first><last>Rush</last></author>
-      <author><first>Stuart</first><last>Shieber</last></author>
+      <author id="alexander-m-rush"><first>Alexander M.</first><last>Rush</last></author>
+      <author id="stuart-m-shieber"><first>Stuart</first><last>Shieber</last></author>
       <author><first>Jason</first><last>Weston</last></author>
       <pages>1416–1426</pages>
       <url hash="a8bbfdbc">P15-1137</url>
@@ -1479,7 +1479,7 @@
     </paper>
     <paper id="138">
       <title>Transferring Coreference Resolvers with Posterior Regularization</title>
-      <author><first>André F. T.</first><last>Martins</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
       <pages>1427–1437</pages>
       <url hash="88d386e1">P15-1138</url>
       <doi>10.3115/v1/P15-1138</doi>
@@ -1499,7 +1499,7 @@
     <paper id="140">
       <title><fixed-case>KB</fixed-case>-<fixed-case>LDA</fixed-case>: Jointly Learning a Knowledge Base of Hierarchy, Relations, and Facts</title>
       <author><first>Dana</first><last>Movshovitz-Attias</last></author>
-      <author><first>William W.</first><last>Cohen</last></author>
+      <author id="william-cohen"><first>William W.</first><last>Cohen</last></author>
       <pages>1449–1459</pages>
       <url hash="c1151922">P15-1140</url>
       <doi>10.3115/v1/P15-1140</doi>
@@ -1510,10 +1510,10 @@
       <title>A Computationally Efficient Algorithm for Learning Topical Collocation Models</title>
       <author><first>Zhendong</first><last>Zhao</last></author>
       <author><first>Lan</first><last>Du</last></author>
-      <author><first>Benjamin</first><last>Börschinger</last></author>
-      <author><first>John K</first><last>Pate</last></author>
+      <author id="benjamin-borschinger"><first>Benjamin</first><last>Börschinger</last></author>
+      <author id="john-k-pate"><first>John K</first><last>Pate</last></author>
       <author><first>Massimiliano</first><last>Ciaramita</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <author><first>Mark</first><last>Johnson</last></author>
       <pages>1460–1469</pages>
       <url hash="da9ace76">P15-1141</url>
@@ -1545,8 +1545,8 @@
       <author><first>Manaal</first><last>Faruqui</last></author>
       <author><first>Yulia</first><last>Tsvetkov</last></author>
       <author><first>Dani</first><last>Yogatama</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>1491–1500</pages>
       <url hash="dc50a303">P15-1144</url>
       <doi>10.3115/v1/P15-1144</doi>
@@ -1584,7 +1584,7 @@
     <paper id="147">
       <title>Parsing as Reduction</title>
       <author><first>Daniel</first><last>Fernández-González</last></author>
-      <author><first>André F. T.</first><last>Martins</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
       <pages>1523–1533</pages>
       <url hash="b456df22">P15-1147</url>
       <doi>10.3115/v1/P15-1147</doi>
@@ -1604,7 +1604,7 @@
     <paper id="149">
       <title>A Data-Driven, Factorization Parser for <fixed-case>CCG</fixed-case> Dependency Structures</title>
       <author><first>Yantao</first><last>Du</last></author>
-      <author><first>Weiwei</first><last>Sun</last></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last></author>
       <author><first>Xiaojun</first><last>Wan</last></author>
       <pages>1545–1555</pages>
       <url hash="4a78abf9">P15-1149</url>
@@ -1617,7 +1617,7 @@
       <title>Improved Semantic Representations From Tree-Structured Long Short-Term Memory Networks</title>
       <author><first>Kai Sheng</first><last>Tai</last></author>
       <author><first>Richard</first><last>Socher</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>1556–1566</pages>
       <url hash="5a65f30e">P15-1150</url>
       <doi>10.3115/v1/P15-1150</doi>
@@ -1652,7 +1652,7 @@
       <author><first>Yi</first><last>Liao</last></author>
       <author><first>Wai</first><last>Lam</last></author>
       <author><first>Weiwei</first><last>Guo</last></author>
-      <author><first>Rebecca</first><last>Passonneau</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca</first><last>Passonneau</last></author>
       <pages>1587–1597</pages>
       <url hash="26f6616a">P15-1153</url>
       <doi>10.3115/v1/P15-1153</doi>
@@ -1660,7 +1660,7 @@
     </paper>
     <paper id="154">
       <title>Joint Graphical Models for Date Selection in Timeline Summarization</title>
-      <author><first>Giang</first><last>Tran</last></author>
+      <author id="giang-binh-tran"><first>Giang</first><last>Tran</last></author>
       <author><first>Eelco</first><last>Herder</last></author>
       <author><first>Katja</first><last>Markert</last></author>
       <pages>1598–1607</pages>
@@ -1671,7 +1671,7 @@
     <paper id="155">
       <title>Predicting Salient Updates for Disaster Summarization</title>
       <author><first>Chris</first><last>Kedzie</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <author><first>Fernando</first><last>Diaz</last></author>
       <pages>1608–1617</pages>
       <url hash="eeb8f37b">P15-1155</url>
@@ -1702,8 +1702,8 @@
       <title>Why discourse affects speakers’ choice of referring expressions</title>
       <author><first>Naho</first><last>Orita</last></author>
       <author><first>Eliana</first><last>Vornov</last></author>
-      <author><first>Naomi</first><last>Feldman</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="naomi-feldman"><first>Naomi</first><last>Feldman</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <pages>1639–1649</pages>
       <url hash="cfbf5d74">P15-1158</url>
       <doi>10.3115/v1/P15-1158</doi>
@@ -1724,7 +1724,7 @@
       <title>Who caught a cold ? - Identifying the subject of a symptom</title>
       <author><first>Shin</first><last>Kanouchi</last></author>
       <author><first>Mamoru</first><last>Komachi</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Eiji</first><last>Aramaki</last></author>
       <author><first>Hiroshi</first><last>Ishikawa</last></author>
       <pages>1660–1670</pages>
@@ -1736,7 +1736,7 @@
       <title>Weakly Supervised Role Identification in Teamwork Interactions</title>
       <author><first>Diyi</first><last>Yang</last></author>
       <author><first>Miaomiao</first><last>Wen</last></author>
-      <author><first>Carolyn</first><last>Rosé</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rosé</last></author>
       <pages>1671–1680</pages>
       <url hash="520d9dea">P15-1161</url>
       <doi>10.3115/v1/P15-1161</doi>
@@ -1745,9 +1745,9 @@
     <paper id="162">
       <title>Deep Unordered Composition Rivals Syntactic Methods for Text Classification</title>
       <author><first>Mohit</first><last>Iyyer</last></author>
-      <author><first>Varun</first><last>Manjunatha</last></author>
+      <author id="varun-manjunatha"><first>Varun</first><last>Manjunatha</last></author>
       <author><first>Jordan</first><last>Boyd-Graber</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <pages>1681–1691</pages>
       <url hash="bba13cb6">P15-1162</url>
       <doi>10.3115/v1/P15-1162</doi>
@@ -1776,12 +1776,12 @@
     </paper>
     <paper id="165">
       <title>Inverted indexing for cross-lingual <fixed-case>NLP</fixed-case></title>
-      <author><first>Anders</first><last>Søgaard</last></author>
-      <author><first>Željko</first><last>Agić</last></author>
-      <author><first>Héctor</first><last>Martínez Alonso</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
+      <author id="zeljko-agic"><first>Željko</first><last>Agić</last></author>
+      <author id="hector-martinez-alonso"><first>Héctor</first><last>Martínez Alonso</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <author><first>Bernd</first><last>Bohnet</last></author>
-      <author><first>Anders</first><last>Johannsen</last></author>
+      <author id="anders-johannsen"><first>Anders</first><last>Johannsen</last></author>
       <pages>1713–1722</pages>
       <url hash="71ab13db">P15-1165</url>
       <doi>10.3115/v1/P15-1165</doi>
@@ -1802,7 +1802,7 @@
     <paper id="167">
       <title>Accurate Linear-Time <fixed-case>C</fixed-case>hinese Word Segmentation via Embedding Matching</title>
       <author><first>Jianqiang</first><last>Ma</last></author>
-      <author><first>Erhard</first><last>Hinrichs</last></author>
+      <author id="erhard-hinrichs"><first>Erhard</first><last>Hinrichs</last></author>
       <pages>1733–1743</pages>
       <url hash="b8f2a844">P15-1167</url>
       <doi>10.3115/v1/P15-1167</doi>
@@ -1814,7 +1814,7 @@
       <author><first>Xinchi</first><last>Chen</last></author>
       <author><first>Xipeng</first><last>Qiu</last></author>
       <author><first>Chenxi</first><last>Zhu</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>1744–1753</pages>
       <url hash="56c85da3">P15-1168</url>
       <doi>10.3115/v1/P15-1168</doi>
@@ -1822,7 +1822,7 @@
     </paper>
     <paper id="169">
       <title>An analysis of the user occupational class through <fixed-case>T</fixed-case>witter content</title>
-      <author><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
+      <author id="daniel-preotiuc-pietro"><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
       <author><first>Vasileios</first><last>Lampos</last></author>
       <author><first>Nikolaos</first><last>Aletras</last></author>
       <pages>1754–1764</pages>
@@ -1865,7 +1865,7 @@
     <paper id="173">
       <title><fixed-case>A</fixed-case>uto<fixed-case>E</fixed-case>xtend: Extending Word Embeddings to Embeddings for Synsets and Lexemes</title>
       <author><first>Sascha</first><last>Rothe</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>1793–1803</pages>
       <url hash="e89fc17c">P15-1173</url>
       <doi>10.3115/v1/P15-1173</doi>
@@ -1884,7 +1884,7 @@
     <meta>
       <booktitle>Proceedings of the 53rd Annual Meeting of the Association for Computational Linguistics and the 7th International Joint Conference on Natural Language Processing (Volume 2: Short Papers)</booktitle>
       <url hash="19f8dc2f">P15-2</url>
-      <editor><first>Chengqing</first><last>Zong</last></editor>
+      <editor id="chengqing-zong"><first>Chengqing</first><last>Zong</last></editor>
       <editor><first>Michael</first><last>Strube</last></editor>
       <doi>10.3115/v1/P15-2</doi>
       <publisher>Association for Computational Linguistics</publisher>
@@ -1900,7 +1900,7 @@
     </frontmatter>
     <paper id="1">
       <title>A Framework for the Construction of Monolingual and Cross-lingual Word Similarity Datasets</title>
-      <author><first>José</first><last>Camacho-Collados</last></author>
+      <author id="jose-camacho-collados"><first>José</first><last>Camacho-Collados</last></author>
       <author><first>Mohammad Taher</first><last>Pilehvar</last></author>
       <author><first>Roberto</first><last>Navigli</last></author>
       <pages>1–7</pages>
@@ -1913,7 +1913,7 @@
       <title>On metric embedding for boosting semantic similarity computations</title>
       <author><first>Julien</first><last>Subercaze</last></author>
       <author><first>Christophe</first><last>Gravier</last></author>
-      <author><first>Frederique</first><last>Laforest</last></author>
+      <author id="frederique-laforest"><first>Frederique</first><last>Laforest</last></author>
       <pages>8–14</pages>
       <url hash="660f813b">P15-2002</url>
       <doi>10.3115/v1/P15-2002</doi>
@@ -1932,9 +1932,9 @@
     </paper>
     <paper id="4">
       <title>A Multitask Objective to Inject Lexical Contrast into Distributional Semantics</title>
-      <author><first>Nghia The</first><last>Pham</last></author>
+      <author id="nghia-the-pham"><first>Nghia The</first><last>Pham</last></author>
       <author><first>Angeliki</first><last>Lazaridou</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <pages>21–26</pages>
       <url hash="2116ef79">P15-2004</url>
       <doi>10.3115/v1/P15-2004</doi>
@@ -1945,7 +1945,7 @@
       <author><first>Shoushan</first><last>Li</last></author>
       <author><first>Lei</first><last>Huang</last></author>
       <author><first>Jingjing</first><last>Wang</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>27–31</pages>
       <url hash="69bd9664">P15-2005</url>
       <doi>10.3115/v1/P15-2005</doi>
@@ -1964,7 +1964,7 @@
       <author><first>Yinfei</first><last>Yang</last></author>
       <author><first>Yaowei</first><last>Yan</last></author>
       <author><first>Minghui</first><last>Qiu</last></author>
-      <author><first>Forrest</first><last>Bao</last></author>
+      <author id="forrest-bao"><first>Forrest</first><last>Bao</last></author>
       <pages>38–44</pages>
       <url hash="a852b990">P15-2007</url>
       <doi>10.3115/v1/P15-2007</doi>
@@ -2007,7 +2007,7 @@
     <paper id="11">
       <title>Simplifying Lexical Simplification: Do We Need Simplified Corpora?</title>
       <author><first>Goran</first><last>Glavaš</last></author>
-      <author><first>Sanja</first><last>Štajner</last></author>
+      <author id="sanja-stajner"><first>Sanja</first><last>Štajner</last></author>
       <pages>63–68</pages>
       <url hash="fc182ace">P15-2011</url>
       <doi>10.3115/v1/P15-2011</doi>
@@ -2015,9 +2015,9 @@
     </paper>
     <paper id="12">
       <title><fixed-case>Z</fixed-case>oom: a corpus of natural language descriptions of map locations</title>
-      <author><first>Romina</first><last>Altamirano</last></author>
-      <author><first>Thiago</first><last>Ferreira</last></author>
-      <author><first>Ivandré</first><last>Paraboni</last></author>
+      <author id="romina-altamirano"><first>Romina</first><last>Altamirano</last></author>
+      <author id="thiago-castro-ferreira"><first>Thiago</first><last>Ferreira</last></author>
+      <author id="ivandre-paraboni"><first>Ivandré</first><last>Paraboni</last></author>
       <author><first>Luciana</first><last>Benotti</last></author>
       <pages>69–75</pages>
       <url hash="d782c94c">P15-2012</url>
@@ -2026,7 +2026,7 @@
     </paper>
     <paper id="13">
       <title>Generating overspecified referring expressions: the role of discrimination</title>
-      <author><first>Ivandré</first><last>Paraboni</last></author>
+      <author id="ivandre-paraboni"><first>Ivandré</first><last>Paraboni</last></author>
       <author><first>Michelle</first><last>Galindo</last></author>
       <author><first>Douglas</first><last>Iacovelli</last></author>
       <pages>76–82</pages>
@@ -2036,7 +2036,7 @@
     </paper>
     <paper id="14">
       <title>Using prosodic annotations to improve coreference resolution of spoken text</title>
-      <author><first>Ina</first><last>Roesiger</last></author>
+      <author id="ina-roesiger"><first>Ina</first><last>Roesiger</last></author>
       <author><first>Arndt</first><last>Riester</last></author>
       <pages>83–88</pages>
       <url hash="72970607">P15-2014</url>
@@ -2055,7 +2055,7 @@
     <paper id="16">
       <title><fixed-case>I</fixed-case> do not disagree: leveraging monolingual alignment to detect disagreement in dialogue</title>
       <author><first>Ajda</first><last>Gokcen</last></author>
-      <author><first>Marie-Catherine</first><last>de Marneffe</last></author>
+      <author id="marie-catherine-de-marneffe"><first>Marie-Catherine</first><last>de Marneffe</last></author>
       <pages>94–99</pages>
       <url hash="6234a2ee">P15-2016</url>
       <doi>10.3115/v1/P15-2016</doi>
@@ -2069,7 +2069,7 @@
       <author><first>Saurabh</first><last>Gupta</last></author>
       <author><first>Li</first><last>Deng</last></author>
       <author><first>Xiaodong</first><last>He</last></author>
-      <author><first>Geoffrey</first><last>Zweig</last></author>
+      <author id="geoffrey-zweig"><first>Geoffrey</first><last>Zweig</last></author>
       <author><first>Margaret</first><last>Mitchell</last></author>
       <pages>100–105</pages>
       <url hash="ab507871">P15-2017</url>
@@ -2081,7 +2081,7 @@
       <author><first>Semih</first><last>Yagcioglu</last></author>
       <author><first>Erkut</first><last>Erdem</last></author>
       <author><first>Aykut</first><last>Erdem</last></author>
-      <author><first>Ruket</first><last>Cakici</last></author>
+      <author id="ruket-cakici"><first>Ruket</first><last>Cakici</last></author>
       <pages>106–111</pages>
       <url hash="8b78eb5f">P15-2018</url>
       <doi>10.3115/v1/P15-2018</doi>
@@ -2090,7 +2090,7 @@
     </paper>
     <paper id="19">
       <title>Learning language through pictures</title>
-      <author><first>Grzegorz</first><last>Chrupała</last></author>
+      <author id="grzegorz-chrupala"><first>Grzegorz</first><last>Chrupała</last></author>
       <author><first>Ákos</first><last>Kádár</last></author>
       <author><first>Afra</first><last>Alishahi</last></author>
       <pages>112–118</pages>
@@ -2113,7 +2113,7 @@
     <paper id="21">
       <title>Lexicon Stratification for Translating Out-of-Vocabulary Words</title>
       <author><first>Yulia</first><last>Tsvetkov</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <pages>125–131</pages>
       <url hash="2d82e7d2">P15-2021</url>
       <doi>10.3115/v1/P15-2021</doi>
@@ -2145,7 +2145,7 @@
       <author><first>Takuya</first><last>Matsuzaki</last></author>
       <author><first>Akira</first><last>Fujita</last></author>
       <author><first>Naoya</first><last>Todo</last></author>
-      <author><first>Noriko H.</first><last>Arai</last></author>
+      <author id="noriko-h-arai"><first>Noriko H.</first><last>Arai</last></author>
       <pages>145–149</pages>
       <url hash="8390c173">P15-2024</url>
       <doi>10.3115/v1/P15-2024</doi>
@@ -2162,9 +2162,9 @@
     </paper>
     <paper id="26">
       <title>Exploring the Planet of the <fixed-case>APE</fixed-case>s: a Comparative Study of State-of-the-art Methods for <fixed-case>MT</fixed-case> Automatic Post-Editing</title>
-      <author><first>Rajen</first><last>Chatterjee</last></author>
+      <author id="rajen-chatterjee"><first>Rajen</first><last>Chatterjee</last></author>
       <author><first>Marion</first><last>Weller</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <pages>156–161</pages>
       <url hash="6400a4b3">P15-2026</url>
@@ -2175,7 +2175,7 @@
     <paper id="27">
       <title>Efficient Learning for Undirected Topic Models</title>
       <author><first>Jiatao</first><last>Gu</last></author>
-      <author><first>Victor O.K.</first><last>Li</last></author>
+      <author id="victor-o-k-li"><first>Victor O.K.</first><last>Li</last></author>
       <pages>162–167</pages>
       <url hash="f4fedceb">P15-2027</url>
       <doi>10.3115/v1/P15-2027</doi>
@@ -2204,7 +2204,7 @@
     <paper id="30">
       <title>Non-Linear Text Regression with a Deep Convolutional Neural Network</title>
       <author><first>Zsolt</first><last>Bitvai</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>180–185</pages>
       <url hash="7088b839">P15-2030</url>
       <doi>10.3115/v1/P15-2030</doi>
@@ -2223,7 +2223,7 @@
       <title>Pre-training of Hidden-Unit <fixed-case>CRF</fixed-case>s</title>
       <author><first>Young-Bum</first><last>Kim</last></author>
       <author><first>Karl</first><last>Stratos</last></author>
-      <author><first>Ruhi</first><last>Sarikaya</last></author>
+      <author id="ruhi-sarikaya"><first>Ruhi</first><last>Sarikaya</last></author>
       <pages>192–198</pages>
       <url hash="03cc3b83">P15-2032</url>
       <doi>10.3115/v1/P15-2032</doi>
@@ -2252,9 +2252,9 @@
     <paper id="35">
       <title>Measuring idiosyncratic interests in children with autism</title>
       <author><first>Masoud</first><last>Rouhizadeh</last></author>
-      <author><first>Emily</first><last>Prud’hommeaux</last></author>
+      <author id="emily-prudhommeaux"><first>Emily</first><last>Prud’hommeaux</last></author>
       <author><first>Jan</first><last>van Santen</last></author>
-      <author><first>Richard</first><last>Sproat</last></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last></author>
       <pages>212–217</pages>
       <url hash="2a18636b">P15-2035</url>
       <doi>10.3115/v1/P15-2035</doi>
@@ -2265,9 +2265,9 @@
       <author><first>Meghana</first><last>Kshirsagar</last></author>
       <author><first>Sam</first><last>Thomson</last></author>
       <author><first>Nathan</first><last>Schneider</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <pages>218–224</pages>
       <url hash="e46b867d">P15-2036</url>
       <doi>10.3115/v1/P15-2036</doi>
@@ -2308,7 +2308,7 @@
     <paper id="40">
       <title><fixed-case>KL</fixed-case>cpos3 - a Language Similarity Measure for Delexicalized Parser Transfer</title>
       <author><first>Rudolf</first><last>Rosa</last></author>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
       <pages>243–249</pages>
       <url hash="5f9edd53">P15-2040</url>
       <doi>10.3115/v1/P15-2040</doi>
@@ -2337,7 +2337,7 @@
       <title>Synthetic Word Parsing Improves <fixed-case>C</fixed-case>hinese Word Segmentation</title>
       <author><first>Fei</first><last>Cheng</last></author>
       <author><first>Kevin</first><last>Duh</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>262–267</pages>
       <url hash="47a87040">P15-2043</url>
       <doi>10.3115/v1/P15-2043</doi>
@@ -2345,9 +2345,9 @@
     </paper>
     <paper id="44">
       <title>If all you have is a bit of the <fixed-case>B</fixed-case>ible: Learning <fixed-case>POS</fixed-case> taggers for truly low-resource languages</title>
-      <author><first>Željko</first><last>Agić</last></author>
+      <author id="zeljko-agic"><first>Željko</first><last>Agić</last></author>
       <author><first>Dirk</first><last>Hovy</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>268–272</pages>
       <url hash="6e0e17f1">P15-2044</url>
       <doi>10.3115/v1/P15-2044</doi>
@@ -2356,8 +2356,8 @@
     <paper id="45">
       <title>Improving distant supervision using inference learning</title>
       <author><first>Roland</first><last>Roller</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>Aitor</first><last>Soroa</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
+      <author id="aitor-soroa"><first>Aitor</first><last>Soroa</last></author>
       <author><first>Mark</first><last>Stevenson</last></author>
       <pages>273–278</pages>
       <url hash="325a72ad">P15-2045</url>
@@ -2393,7 +2393,7 @@
     <paper id="48">
       <title>Embedding Methods for Fine Grained Entity Type Classification</title>
       <author><first>Dani</first><last>Yogatama</last></author>
-      <author><first>Daniel</first><last>Gillick</last></author>
+      <author id="dan-gillick"><first>Daniel</first><last>Gillick</last></author>
       <author><first>Nevena</first><last>Lazic</last></author>
       <pages>291–296</pages>
       <url hash="19c70d3b">P15-2048</url>
@@ -2432,7 +2432,7 @@
     <paper id="52">
       <title>The Users Who Say ‘Ni’: Audience Identification in <fixed-case>C</fixed-case>hinese-language Restaurant Reviews</title>
       <author><first>Rob</first><last>Voigt</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <pages>314–319</pages>
       <url hash="898e8972">P15-2052</url>
       <doi>10.3115/v1/P15-2052</doi>
@@ -2508,7 +2508,7 @@
       <title>Document Level Time-anchoring for <fixed-case>T</fixed-case>ime<fixed-case>L</fixed-case>ine Extraction</title>
       <author><first>Egoitz</first><last>Laparra</last></author>
       <author><first>Itziar</first><last>Aldabe</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <pages>358–364</pages>
       <url hash="a48fde62">P15-2059</url>
       <doi>10.3115/v1/P15-2059</doi>
@@ -2517,7 +2517,7 @@
     <paper id="60">
       <title>Event Detection and Domain Adaptation with Convolutional Neural Networks</title>
       <author><first>Thien Huu</first><last>Nguyen</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <pages>365–371</pages>
       <url hash="f20d847e">P15-2060</url>
       <doi>10.3115/v1/P15-2060</doi>
@@ -2527,7 +2527,7 @@
       <title>Seed-Based Event Trigger Labeling: How far can event descriptions get us?</title>
       <author><first>Ofer</first><last>Bronstein</last></author>
       <author><first>Ido</first><last>Dagan</last></author>
-      <author id="qi-li"><first>Qi</first><last>Li</last></author>
+      <author><first>Qi</first><last>Li</last></author>
       <author><first>Heng</first><last>Ji</last></author>
       <author><first>Anette</first><last>Frank</last></author>
       <pages>372–376</pages>
@@ -2607,7 +2607,7 @@
     <paper id="69">
       <title><fixed-case>TR</fixed-case>9856: A Multi-word Term Relatedness Benchmark</title>
       <author><first>Ran</first><last>Levy</last></author>
-      <author><first>Liat</first><last>Ein-Dor</last></author>
+      <author id="liat-ein-dor"><first>Liat</first><last>Ein-Dor</last></author>
       <author><first>Shay</first><last>Hummel</last></author>
       <author><first>Ruty</first><last>Rinott</last></author>
       <author><first>Noam</first><last>Slonim</last></author>
@@ -2631,8 +2631,8 @@
     </paper>
     <paper id="71">
       <title>Automatic Discrimination between Cognates and Borrowings</title>
-      <author><first>Alina Maria</first><last>Ciobanu</last></author>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="alina-maria-ciobanu"><first>Alina Maria</first><last>Ciobanu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <pages>431–437</pages>
       <url hash="bf484b25">P15-2071</url>
       <doi>10.3115/v1/P15-2071</doi>
@@ -2641,10 +2641,10 @@
     <paper id="72">
       <title>The Media Frames Corpus: Annotations of Frames Across Issues</title>
       <author><first>Dallas</first><last>Card</last></author>
-      <author><first>Amber E.</first><last>Boydstun</last></author>
-      <author><first>Justin H.</first><last>Gross</last></author>
+      <author id="amber-boydstun"><first>Amber E.</first><last>Boydstun</last></author>
+      <author id="justin-h-gross"><first>Justin H.</first><last>Gross</last></author>
       <author><first>Philip</first><last>Resnik</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>438–444</pages>
       <url hash="28a5d574">P15-2072</url>
       <doi>10.3115/v1/P15-2072</doi>
@@ -2661,7 +2661,7 @@
       <author><first>Chris</first><last>Quirk</last></author>
       <author><first>Margaret</first><last>Mitchell</last></author>
       <author><first>Jianfeng</first><last>Gao</last></author>
-      <author><first>Bill</first><last>Dolan</last></author>
+      <author id="william-b-dolan"><first>Bill</first><last>Dolan</last></author>
       <pages>445–450</pages>
       <url hash="76323bb4">P15-2073</url>
       <doi>10.3115/v1/P15-2073</doi>
@@ -2669,8 +2669,8 @@
     </paper>
     <paper id="74">
       <title><fixed-case>T</fixed-case>ibetan Unknown Word Identification from News Corpora for Supporting Lexicon-based <fixed-case>T</fixed-case>ibetan Word Segmentation</title>
-      <author><first>Minghua</first><last>Nuo</last></author>
-      <author><first>Huidan</first><last>Liu</last></author>
+      <author id="minghua-nuo"><first>Minghua</first><last>Nuo</last></author>
+      <author id="huidan-liu"><first>Huidan</first><last>Liu</last></author>
       <author><first>Congjun</first><last>Long</last></author>
       <author><first>Jian</first><last>Wu</last></author>
       <pages>451–457</pages>
@@ -2691,7 +2691,7 @@
     <paper id="76">
       <title>Non-distributional Word Vector Representations</title>
       <author><first>Manaal</first><last>Faruqui</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <pages>464–469</pages>
       <url hash="26554e98">P15-2076</url>
       <doi>10.3115/v1/P15-2076</doi>
@@ -2710,7 +2710,7 @@
       <title>Dependency length minimisation effects in short spans: a large-scale analysis of adjective placement in complex noun phrases</title>
       <author><first>Kristina</first><last>Gulordava</last></author>
       <author><first>Paola</first><last>Merlo</last></author>
-      <author><first>Benoit</first><last>Crabbé</last></author>
+      <author id="benoit-crabbe"><first>Benoit</first><last>Crabbé</last></author>
       <pages>477–482</pages>
       <url hash="564ed170">P15-2078</url>
       <doi>10.3115/v1/P15-2078</doi>
@@ -2719,7 +2719,7 @@
     <paper id="79">
       <title>Tagging Performance Correlates with Author Age</title>
       <author><first>Dirk</first><last>Hovy</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>483–488</pages>
       <url hash="a3acb207">P15-2079</url>
       <doi>10.3115/v1/P15-2079</doi>
@@ -2741,9 +2741,9 @@
       <title>The Fixed-Size Ordinally-Forgetting Encoding Method for Neural Network Language Models</title>
       <author><first>ShiLiang</first><last>Zhang</last></author>
       <author><first>Hui</first><last>Jiang</last></author>
-      <author><first>MingBin</first><last>Xu</last></author>
+      <author id="mingbin-xu"><first>MingBin</first><last>Xu</last></author>
       <author><first>JunFeng</first><last>Hou</last></author>
-      <author><first>LiRong</first><last>Dai</last></author>
+      <author id="li-rong-dai"><first>LiRong</first><last>Dai</last></author>
       <pages>495–500</pages>
       <url hash="76bb3c92">P15-2081</url>
       <doi>10.3115/v1/P15-2081</doi>
@@ -2782,8 +2782,8 @@
     <paper id="85">
       <title>Point Process Modelling of Rumour Dynamics in Social Media</title>
       <author><first>Michal</first><last>Lukasik</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
       <pages>518–523</pages>
       <url hash="47fb89e9">P15-2085</url>
       <doi>10.3115/v1/P15-2085</doi>
@@ -2801,7 +2801,7 @@
     <paper id="87">
       <title><fixed-case>MT</fixed-case> Quality Estimation for Computer-assisted Translation: Does it Really Help?</title>
       <author><first>Marco</first><last>Turchi</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marcello</first><last>Federico</last></author>
       <pages>530–535</pages>
       <url hash="1199f32d">P15-2087</url>
@@ -2824,7 +2824,7 @@
       <title>Learning Word Reorderings for Hierarchical Phrase-based Statistical Machine Translation</title>
       <author><first>Jingyi</first><last>Zhang</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichro</first><last>Sumita</last></author>
       <author><first>Hai</first><last>Zhao</last></author>
       <pages>542–548</pages>
       <url hash="8f4d9849">P15-2089</url>
@@ -2835,7 +2835,7 @@
       <title><fixed-case>UNRAVEL</fixed-case>—<fixed-case>A</fixed-case> Decipherment Toolkit</title>
       <author><first>Malte</first><last>Nuhn</last></author>
       <author><first>Julian</first><last>Schamper</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>549–553</pages>
       <url hash="fe3a95ee">P15-2090</url>
       <doi>10.3115/v1/P15-2090</doi>
@@ -2877,7 +2877,7 @@
       <author><first>Akiva</first><last>Miura</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
       <author><first>Sakriani</first><last>Sakti</last></author>
-      <author><first>Tomoki</first><last>Toda</last></author>
+      <author id="tomoki-toda"><first>Tomoki</first><last>Toda</last></author>
       <author><first>Satoshi</first><last>Nakamura</last></author>
       <pages>573–577</pages>
       <url hash="765d97ea">P15-2094</url>
@@ -2896,7 +2896,7 @@
     <paper id="96">
       <title>Automatic Identification of Age-Appropriate Ratings of Song Lyrics</title>
       <author><first>Anggi</first><last>Maulidyani</last></author>
-      <author><first>Ruli</first><last>Manurung</last></author>
+      <author id="ruli-manurung"><first>Ruli</first><last>Manurung</last></author>
       <pages>583–587</pages>
       <url hash="2e405010">P15-2096</url>
       <doi>10.3115/v1/P15-2096</doi>
@@ -2907,7 +2907,7 @@
       <author><first>Courtney</first><last>Napoles</last></author>
       <author><first>Keisuke</first><last>Sakaguchi</last></author>
       <author><first>Matt</first><last>Post</last></author>
-      <author><first>Joel</first><last>Tetreault</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
       <pages>588–593</pages>
       <url hash="d73958e9">P15-2097</url>
       <doi>10.3115/v1/P15-2097</doi>
@@ -2930,7 +2930,7 @@
     <paper id="99">
       <title>Automatic Detection of Sentence Fragments</title>
       <author><first>Chak Yan</first><last>Yeung</last></author>
-      <author><first>John</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
       <pages>599–603</pages>
       <url hash="b68d16d4">P15-2099</url>
       <doi>10.3115/v1/P15-2099</doi>
@@ -2940,9 +2940,9 @@
       <title>A Computational Approach to Automatic Prediction of Drunk-Texting</title>
       <author><first>Aditya</first><last>Joshi</last></author>
       <author><first>Abhijit</first><last>Mishra</last></author>
-      <author><first>Balamurali</first><last>AR</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
-      <author><first>Mark J.</first><last>Carman</last></author>
+      <author id="balamurali-ar"><first>Balamurali</first><last>AR</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="mark-carman"><first>Mark J.</first><last>Carman</last></author>
       <pages>604–608</pages>
       <url hash="1ec6192b">P15-2100</url>
       <doi>10.3115/v1/P15-2100</doi>
@@ -2986,8 +2986,8 @@
     <paper id="104">
       <title><fixed-case>T</fixed-case>witter User Geolocation Using a Unified Text and Network Prediction Model</title>
       <author><first>Afshin</first><last>Rahimi</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>630–636</pages>
       <url hash="8057b896">P15-2104</url>
       <doi>10.3115/v1/P15-2104</doi>
@@ -2995,15 +2995,15 @@
     </paper>
     <paper id="105">
       <title>Automatic Keyword Extraction on <fixed-case>T</fixed-case>witter</title>
-      <author><first>Luís</first><last>Marujo</last></author>
+      <author id="luis-marujo"><first>Luís</first><last>Marujo</last></author>
       <author><first>Wang</first><last>Ling</last></author>
       <author><first>Isabel</first><last>Trancoso</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Alan W.</first><last>Black</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="alan-w-black"><first>Alan W.</first><last>Black</last></author>
       <author><first>Anatole</first><last>Gershman</last></author>
-      <author><first>David</first><last>Martins de Matos</last></author>
-      <author><first>João</first><last>Neto</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
+      <author id="david-martins-de-matos"><first>David</first><last>Martins de Matos</last></author>
+      <author id="joao-p-neto"><first>João</first><last>Neto</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
       <pages>637–643</pages>
       <url hash="cb5da123">P15-2105</url>
       <doi>10.3115/v1/P15-2105</doi>
@@ -3013,10 +3013,10 @@
     <paper id="106">
       <title>Towards a Contextual Pragmatic Model to Detect Irony in Tweets</title>
       <author><first>Jihen</first><last>Karoui</last></author>
-      <author><first>Farah</first><last>Benamara Zitoune</last></author>
-      <author><first>Véronique</first><last>Moriceau</last></author>
+      <author id="farah-benamara"><first>Farah</first><last>Benamara Zitoune</last></author>
+      <author id="veronique-moriceau"><first>Véronique</first><last>Moriceau</last></author>
       <author><first>Nathalie</first><last>Aussenac-Gilles</last></author>
-      <author><first>Lamia</first><last>Hadrich Belguith</last></author>
+      <author id="lamia-hadrich-belguith"><first>Lamia</first><last>Hadrich Belguith</last></author>
       <pages>644–650</pages>
       <url hash="0719d119">P15-2106</url>
       <doi>10.3115/v1/P15-2106</doi>
@@ -3047,7 +3047,7 @@
       <title>The Discovery of Natural Typing Annotations: User-produced Potential <fixed-case>C</fixed-case>hinese Word Delimiters</title>
       <author><first>Dakui</first><last>Zhang</last></author>
       <author><first>Yu</first><last>Mao</last></author>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
       <author><first>Hanshi</first><last>Wang</last></author>
       <author><first>Chuyuan</first><last>Wei</last></author>
       <author><first>Shiping</first><last>Tang</last></author>
@@ -3060,7 +3060,7 @@
       <title>One Tense per Scene: Predicting Tense in <fixed-case>C</fixed-case>hinese Conversations</title>
       <author><first>Tao</first><last>Ge</last></author>
       <author><first>Heng</first><last>Ji</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <author><first>Zhifang</first><last>Sui</last></author>
       <pages>668–673</pages>
       <url hash="0ba44d67">P15-2110</url>
@@ -3083,7 +3083,7 @@
       <title>Rhetoric Map of an Answer to Compound Queries</title>
       <author><first>Boris</first><last>Galitsky</last></author>
       <author><first>Dmitry</first><last>Ilvovsky</last></author>
-      <author><first>Sergey O.</first><last>Kuznetsov</last></author>
+      <author id="sergey-o-kuznetsov"><first>Sergey O.</first><last>Kuznetsov</last></author>
       <pages>681–686</pages>
       <url hash="5778a1c4">P15-2112</url>
       <doi>10.3115/v1/P15-2112</doi>
@@ -3094,9 +3094,9 @@
       <author><first>Alberto</first><last>Barrón-Cedeño</last></author>
       <author><first>Simone</first><last>Filice</last></author>
       <author><first>Giovanni</first><last>Da San Martino</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Alessandro</first><last>Moschitti</last></author>
       <pages>687–693</pages>
       <url hash="c00025f5">P15-2113</url>
@@ -3105,7 +3105,7 @@
     </paper>
     <paper id="114">
       <title>Learning Hybrid Representations to Retrieve Semantically Equivalent Questions</title>
-      <author><first>Cícero</first><last>dos Santos</last></author>
+      <author id="cicero-dos-santos"><first>Cícero</first><last>dos Santos</last></author>
       <author><first>Luciano</first><last>Barbosa</last></author>
       <author><first>Dasha</first><last>Bogdanova</last></author>
       <author><first>Bianca</first><last>Zadrozny</last></author>
@@ -3128,7 +3128,7 @@
     <paper id="116">
       <title>A Long Short-Term Memory Model for Answer Sentence Selection in Question Answering</title>
       <author><first>Di</first><last>Wang</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <pages>707–712</pages>
       <url hash="34ef0e38">P15-2116</url>
       <doi>10.3115/v1/P15-2116</doi>
@@ -3149,7 +3149,7 @@
     <paper id="118">
       <title>Bilingual Word Embeddings from Non-Parallel Document-Aligned Data Applied to Bilingual Lexicon Induction</title>
       <author><first>Ivan</first><last>Vulić</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>719–725</pages>
       <url hash="aa2ab11e">P15-2118</url>
       <doi>10.3115/v1/P15-2118</doi>
@@ -3211,7 +3211,7 @@
       <title>Harnessing Context Incongruity for Sarcasm Detection</title>
       <author><first>Aditya</first><last>Joshi</last></author>
       <author><first>Vinita</first><last>Sharma</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>757–762</pages>
       <url hash="459f15a0">P15-2124</url>
       <doi>10.3115/v1/P15-2124</doi>
@@ -3222,7 +3222,7 @@
       <author><first>Zhongqing</first><last>Wang</last></author>
       <author><first>Sophia</first><last>Lee</last></author>
       <author><first>Shoushan</first><last>Li</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>763–768</pages>
       <url hash="7cfdfcfe">P15-2125</url>
       <doi>10.3115/v1/P15-2125</doi>
@@ -3234,7 +3234,7 @@
       <author><first>Mohammad</first><last>Al Boni</last></author>
       <author><first>Keira</first><last>Zhou</last></author>
       <author><first>Hongning</first><last>Wang</last></author>
-      <author><first>Matthew S.</first><last>Gerber</last></author>
+      <author id="matthew-gerber"><first>Matthew S.</first><last>Gerber</last></author>
       <pages>769–774</pages>
       <url hash="f09f255e">P15-2126</url>
       <doi>10.3115/v1/P15-2126</doi>
@@ -3246,7 +3246,7 @@
       <author><first>Cen-Chieh</first><last>Chen</last></author>
       <author><first>Yu-Lun</first><last>Hsieh</last></author>
       <author><first>Chien Chin</first><last>Chen</last></author>
-      <author><first>Wen-Lian</first><last>Hsu</last></author>
+      <author id="wen-lian-hsu"><first>Wen-Lian</first><last>Hsu</last></author>
       <pages>775–780</pages>
       <url hash="dce32a6a">P15-2127</url>
       <doi>10.3115/v1/P15-2127</doi>
@@ -3262,7 +3262,7 @@
     </paper>
     <paper id="129">
       <title>Predicting Valence-Arousal Ratings of Words Using a Weighted Graph Method</title>
-      <author><first>Liang-Chih</first><last>Yu</last></author>
+      <author id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last></author>
       <author><first>Jin</first><last>Wang</last></author>
       <author><first>K. Robert</first><last>Lai</last></author>
       <author><first>Xue-jie</first><last>Zhang</last></author>
@@ -3276,11 +3276,11 @@
       <author><first>Nikola</first><last>Mrkšić</last></author>
       <author><first>Diarmuid</first><last>Ó Séaghdha</last></author>
       <author><first>Blaise</first><last>Thomson</last></author>
-      <author><first>Milica</first><last>Gašić</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gašić</last></author>
       <author><first>Pei-Hao</first><last>Su</last></author>
       <author><first>David</first><last>Vandyke</last></author>
       <author><first>Tsung-Hsien</first><last>Wen</last></author>
-      <author><first>Steve</first><last>Young</last></author>
+      <author id="steve-young"><first>Steve</first><last>Young</last></author>
       <pages>794–799</pages>
       <url hash="a51dfaac">P15-2130</url>
       <doi>10.3115/v1/P15-2130</doi>
@@ -3300,7 +3300,7 @@
       <author><first>Young-Bum</first><last>Kim</last></author>
       <author><first>Karl</first><last>Stratos</last></author>
       <author><first>Xiaohu</first><last>Liu</last></author>
-      <author><first>Ruhi</first><last>Sarikaya</last></author>
+      <author id="ruhi-sarikaya"><first>Ruhi</first><last>Sarikaya</last></author>
       <pages>806–811</pages>
       <url hash="605d21ba">P15-2132</url>
       <doi>10.3115/v1/P15-2132</doi>
@@ -3321,7 +3321,7 @@
       <title>A Simultaneous Recognition Framework for the Spoken Language Understanding Module of Intelligent Personal Assistant Software on Smart Phones</title>
       <author><first>Changsu</first><last>Lee</last></author>
       <author><first>Youngjoong</first><last>Ko</last></author>
-      <author><first>Jungyun</first><last>Seo</last></author>
+      <author id="jungyun-seo"><first>Jungyun</first><last>Seo</last></author>
       <pages>818–822</pages>
       <url hash="b9974242">P15-2134</url>
       <doi>10.3115/v1/P15-2134</doi>
@@ -3329,8 +3329,8 @@
     </paper>
     <paper id="135">
       <title>A Deeper Exploration of the Standard <fixed-case>PB</fixed-case>-<fixed-case>SMT</fixed-case> Approach to Text Simplification and its Evaluation</title>
-      <author><first>Sanja</first><last>Štajner</last></author>
-      <author><first>Hannah</first><last>Béchara</last></author>
+      <author id="sanja-stajner"><first>Sanja</first><last>Štajner</last></author>
+      <author id="hannah-bechara"><first>Hannah</first><last>Béchara</last></author>
       <author><first>Horacio</first><last>Saggion</last></author>
       <pages>823–828</pages>
       <url hash="87266492">P15-2135</url>
@@ -3362,7 +3362,7 @@
     <paper id="138">
       <title>Unsupervised extractive summarization via coverage maximization with syntactic and semantic concepts</title>
       <author><first>Natalie</first><last>Schluter</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>840–844</pages>
       <url hash="6c882ea7">P15-2138</url>
       <doi>10.3115/v1/P15-2138</doi>
@@ -3370,8 +3370,8 @@
     </paper>
     <paper id="139">
       <title>Low Resource Dependency Parsing: Cross-lingual Parameter Sharing in a Neural Network Parser</title>
-      <author><first>Long</first><last>Duong</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="long-duong"><first>Long</first><last>Duong</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <author><first>Steven</first><last>Bird</last></author>
       <author><first>Paul</first><last>Cook</last></author>
       <pages>845–850</pages>
@@ -3383,7 +3383,7 @@
       <title>Semantic Structure Analysis of Noun Phrases using <fixed-case>A</fixed-case>bstract <fixed-case>M</fixed-case>eaning <fixed-case>R</fixed-case>epresentation</title>
       <author><first>Yuichiro</first><last>Sawai</last></author>
       <author><first>Hiroyuki</first><last>Shindo</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>851–856</pages>
       <url hash="c47841b9">P15-2140</url>
       <doi>10.3115/v1/P15-2140</doi>
@@ -3393,7 +3393,7 @@
       <title>Boosting Transition-based <fixed-case>AMR</fixed-case> Parsing with Refined Actions and Auxiliary Analyzers</title>
       <author><first>Chuan</first><last>Wang</last></author>
       <author><first>Nianwen</first><last>Xue</last></author>
-      <author><first>Sameer</first><last>Pradhan</last></author>
+      <author id="sameer-pradhan"><first>Sameer</first><last>Pradhan</last></author>
       <pages>857–862</pages>
       <url hash="0e668554">P15-2141</url>
       <doi>10.3115/v1/P15-2141</doi>
@@ -3402,7 +3402,7 @@
     <paper id="142">
       <title>Generative Incremental Dependency Parsing with Neural Networks</title>
       <author><first>Jan</first><last>Buys</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
       <pages>863–869</pages>
       <url hash="7e5aabb4">P15-2142</url>
       <doi>10.3115/v1/P15-2142</doi>
@@ -3437,8 +3437,8 @@
       <editor><first>Kuan-Yu</first><last>Chen</last></editor>
       <editor><first>Angelina</first><last>Ivanova</last></editor>
       <editor><first>Ellie</first><last>Pavlick</last></editor>
-      <editor><first>Emily</first><last>Bender</last></editor>
-      <editor><first>Chin-Yew</first><last>Lin</last></editor>
+      <editor id="emily-m-bender"><first>Emily</first><last>Bender</last></editor>
+      <editor id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></editor>
       <editor><first>Stephan</first><last>Oepen</last></editor>
       <doi>10.3115/v1/P15-3</doi>
       <publisher>Association for Computational Linguistics</publisher>
@@ -3466,9 +3466,9 @@
       <title>Leveraging Compounds to Improve Noun Phrase Translation from <fixed-case>C</fixed-case>hinese and <fixed-case>G</fixed-case>erman</title>
       <author><first>Xiao</first><last>Pu</last></author>
       <author><first>Laura</first><last>Mascarell</last></author>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <author><first>Mark</first><last>Fishel</last></author>
-      <author><first>Ngoc-Quang</first><last>Luong</last></author>
+      <author id="ngoc-quang-luong"><first>Ngoc-Quang</first><last>Luong</last></author>
       <author><first>Martin</first><last>Volk</last></author>
       <pages>8–15</pages>
       <url hash="a4b4a901">P15-3002</url>
@@ -3486,7 +3486,7 @@
     <paper id="4">
       <title>Transition-based Dependency <fixed-case>DAG</fixed-case> Parsing Using Dynamic Oracles</title>
       <author><first>Alper</first><last>Tokgöz</last></author>
-      <author><first>Gülşen</first><last>Eryiǧit</last></author>
+      <author id="gulsen-eryigit"><first>Gülşen</first><last>Eryiǧit</last></author>
       <pages>22–27</pages>
       <url hash="2cbb16a7">P15-3004</url>
       <doi>10.3115/v1/P15-3004</doi>
@@ -3497,7 +3497,7 @@
       <author><first>Yoshiaki</first><last>Kitagawa</last></author>
       <author><first>Mamoru</first><last>Komachi</last></author>
       <author><first>Eiji</first><last>Aramaki</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Hiroshi</first><last>Ishikawa</last></author>
       <pages>28–34</pages>
       <url hash="fb923d96">P15-3005</url>
@@ -3553,7 +3553,7 @@
       <title><fixed-case>IMI</fixed-case> — A Multilingual Semantic Annotation Environment</title>
       <author><first>Francis</first><last>Bond</last></author>
       <author><first>Luís</first><last>Morgado da Costa</last></author>
-      <author><first>Tuấn Anh</first><last>Lê</last></author>
+      <author id="tuan-anh-le"><first>Tuấn Anh</first><last>Lê</last></author>
       <pages>7–12</pages>
       <url hash="c6cbc786">P15-4002</url>
       <doi>10.3115/v1/P15-4002</doi>
@@ -3574,7 +3574,7 @@
       <author><first>Simone</first><last>Filice</last></author>
       <author><first>Giuseppe</first><last>Castellucci</last></author>
       <author><first>Danilo</first><last>Croce</last></author>
-      <author><first>Roberto</first><last>Basili</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
       <pages>19–24</pages>
       <url hash="fb639eda">P15-4004</url>
       <doi>10.3115/v1/P15-4004</doi>
@@ -3679,7 +3679,7 @@
     </paper>
     <paper id="15">
       <title><fixed-case>LEX</fixed-case>enstein: A Framework for Lexical Simplification</title>
-      <author><first>Gustavo</first><last>Paetzold</last></author>
+      <author id="gustavo-paetzold"><first>Gustavo</first><last>Paetzold</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>85–90</pages>
       <url hash="430a5905">P15-4015</url>
@@ -3695,7 +3695,7 @@
       <author><first>Kai</first><last>Hakala</last></author>
       <author><first>Chen</first><last>Li</last></author>
       <author><first>Pontus</first><last>Stenetorp</last></author>
-      <author><first>Lars Juhl</first><last>Jensen</last></author>
+      <author id="lars-juhl-jensen"><first>Lars Juhl</first><last>Jensen</last></author>
       <pages>91–96</pages>
       <url hash="4aab6dc6">P15-4016</url>
       <doi>10.3115/v1/P15-4016</doi>
@@ -3703,8 +3703,8 @@
     </paper>
     <paper id="17">
       <title>A Data Sharing and Annotation Service Infrastructure</title>
-      <author><first>Stelios</first><last>Piperidis</last></author>
-      <author><first>Dimitrios</first><last>Galanis</last></author>
+      <author id="stelios-piperidis"><first>Stelios</first><last>Piperidis</last></author>
+      <author id="dimitrios-galanis"><first>Dimitrios</first><last>Galanis</last></author>
       <author><first>Juli</first><last>Bakagianni</last></author>
       <author><first>Sokratis</first><last>Sofianopoulos</last></author>
       <pages>97–102</pages>
@@ -3717,7 +3717,7 @@
       <author><first>Eugen</first><last>Ruppert</last></author>
       <author><first>Manuel</first><last>Kaufmann</last></author>
       <author><first>Martin</first><last>Riedl</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>103–108</pages>
       <url hash="1532fb9a">P15-4018</url>
       <doi>10.3115/v1/P15-4018</doi>
@@ -3740,8 +3740,8 @@
     <paper id="20">
       <title>Multi-level Translation Quality Prediction with <fixed-case>Q</fixed-case>u<fixed-case>E</fixed-case>st++</title>
       <author><first>Lucia</first><last>Specia</last></author>
-      <author><first>Gustavo</first><last>Paetzold</last></author>
-      <author><first>Carolina</first><last>Scarton</last></author>
+      <author id="gustavo-paetzold"><first>Gustavo</first><last>Paetzold</last></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last></author>
       <pages>115–120</pages>
       <url hash="7ded1856">P15-4020</url>
       <doi>10.3115/v1/P15-4020</doi>
@@ -3758,8 +3758,8 @@
     </paper>
     <paper id="22">
       <title>A Domain-independent Rule-based Framework for Event Extraction</title>
-      <author><first>Marco A.</first><last>Valenzuela-Escárcega</last></author>
-      <author><first>Gus</first><last>Hahn-Powell</last></author>
+      <author id="marco-a-valenzuela-escarcega"><first>Marco A.</first><last>Valenzuela-Escárcega</last></author>
+      <author id="gus-hahn-powell"><first>Gus</first><last>Hahn-Powell</last></author>
       <author><first>Mihai</first><last>Surdeanu</last></author>
       <author><first>Thomas</first><last>Hicks</last></author>
       <pages>127–132</pages>
@@ -3780,10 +3780,10 @@
     <paper id="24">
       <title><fixed-case>W</fixed-case>rite<fixed-case>A</fixed-case>head: Mining Grammar Patterns in Corpora for Assisted Writing</title>
       <author><first>Tzu-Hsi</first><last>Yen</last></author>
-      <author><first>Jian-Cheng</first><last>Wu</last></author>
-      <author><first>Jim</first><last>Chang</last></author>
+      <author id="jian-cheng-wu"><first>Jian-Cheng</first><last>Wu</last></author>
+      <author id="jim-chang"><first>Jim</first><last>Chang</last></author>
       <author><first>Joanne</first><last>Boisson</last></author>
-      <author><first>Jason</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason</first><last>Chang</last></author>
       <pages>139–144</pages>
       <url hash="e1e7a870">P15-4024</url>
       <doi>10.3115/v1/P15-4024</doi>
@@ -3805,7 +3805,7 @@
     <meta>
       <booktitle>Proceedings of the 53rd Annual Meeting of the Association for Computational Linguistics and the 7th International Joint Conference on Natural Language Processing: Tutorial Abstracts</booktitle>
       <url hash="f80e6e98">P15-5</url>
-      <editor><first>Eneko</first><last>Agirre</last></editor>
+      <editor id="eneko-agirre"><first>Eneko</first><last>Agirre</last></editor>
       <editor><first>Kevin</first><last>Duh</last></editor>
       <doi>10.3115/v1/P15-5</doi>
       <publisher>Association for Computational Linguistics</publisher>
@@ -3831,8 +3831,8 @@
     </paper>
     <paper id="2">
       <title>Structured Belief Propagation for <fixed-case>NLP</fixed-case></title>
-      <author><first>Matthew R.</first><last>Gormley</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="matthew-r-gormley"><first>Matthew R.</first><last>Gormley</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>5–6</pages>
       <url hash="85b900f5">P15-5002</url>
       <doi>10.3115/v1/P15-5002</doi>
@@ -3840,8 +3840,8 @@
     </paper>
     <paper id="3">
       <title>Sentiment and Belief: How to Think about, Represent, and Annotate Private States</title>
-      <author><first>Owen</first><last>Rambow</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <pages>7–11</pages>
       <url hash="698ec06c">P15-5003</url>
       <doi>10.3115/v1/P15-5003</doi>
@@ -3863,7 +3863,7 @@
       <author><first>Guillaume</first><last>Bouchard</last></author>
       <author><first>Jason</first><last>Naradowsky</last></author>
       <author><first>Sebastian</first><last>Riedel</last></author>
-      <author><first>Tim</first><last>Rocktäschel</last></author>
+      <author id="tim-rocktaschel"><first>Tim</first><last>Rocktäschel</last></author>
       <author><first>Andreas</first><last>Vlachos</last></author>
       <pages>16–18</pages>
       <url hash="3eea5faf">P15-5005</url>
diff --git a/data/xml/P16.xml b/data/xml/P16.xml
index e5e7833be9..542704f06f 100644
--- a/data/xml/P16.xml
+++ b/data/xml/P16.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</booktitle>
       <url hash="84256c85">P16-1</url>
       <editor><first>Katrin</first><last>Erk</last></editor>
-      <editor><first>Noah A.</first><last>Smith</last></editor>
+      <editor id="noah-a-smith"><first>Noah A.</first><last>Smith</last></editor>
       <doi>10.18653/v1/P16-1</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Berlin, Germany</address>
@@ -69,7 +69,7 @@
       <author><first>Di</first><last>Lu</last></author>
       <author><first>Xiaoman</first><last>Pan</last></author>
       <author><first>Nima</first><last>Pourdamghani</last></author>
-      <author><first>Shih-Fu</first><last>Chang</last></author>
+      <author id="shih-fu-chang"><first>Shih-Fu</first><last>Chang</last></author>
       <author><first>Heng</first><last>Ji</last></author>
       <author><first>Kevin</first><last>Knight</last></author>
       <pages>54–65</pages>
@@ -82,8 +82,8 @@
       <author><first>Joern</first><last>Wuebker</last></author>
       <author><first>Spence</first><last>Green</last></author>
       <author><first>John</first><last>DeNero</last></author>
-      <author><first>Saša</first><last>Hasan</last></author>
-      <author><first>Minh-Thang</first><last>Luong</last></author>
+      <author id="sasa-hasan"><first>Saša</first><last>Hasan</last></author>
+      <author id="minh-thang-luong"><first>Minh-Thang</first><last>Luong</last></author>
       <pages>66–75</pages>
       <url hash="9425f6eb">P16-1007</url>
       <doi>10.18653/v1/P16-1007</doi>
@@ -124,7 +124,7 @@
     <paper id="11">
       <title>Incremental Acquisition of Verb Hypothesis Space towards Physical World Interaction</title>
       <author><first>Lanbo</first><last>She</last></author>
-      <author><first>Joyce</first><last>Chai</last></author>
+      <author id="joyce-chai"><first>Joyce</first><last>Chai</last></author>
       <pages>108–117</pages>
       <url hash="cbea70a6">P16-1011</url>
       <doi>10.18653/v1/P16-1011</doi>
@@ -133,7 +133,7 @@
     <paper id="12">
       <title>Language Transfer Learning for Supervised Lexical Substitution</title>
       <author><first>Gerold</first><last>Hintz</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>118–129</pages>
       <url hash="f20384f3">P16-1012</url>
       <doi>10.18653/v1/P16-1012</doi>
@@ -145,7 +145,7 @@
       <author><first>Manaal</first><last>Faruqui</last></author>
       <author><first>Wang</first><last>Ling</last></author>
       <author><first>Brian</first><last>MacWhinney</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <pages>130–139</pages>
       <url hash="ffae873b">P16-1013</url>
       <doi>10.18653/v1/P16-1013</doi>
@@ -176,7 +176,7 @@
     </paper>
     <paper id="16">
       <title>A Transition-Based System for Joint Lexical and Syntactic Analysis</title>
-      <author><first>Matthieu</first><last>Constant</last></author>
+      <author id="matthieu-constant"><first>Matthieu</first><last>Constant</last></author>
       <author><first>Joakim</first><last>Nivre</last></author>
       <pages>161–171</pages>
       <url hash="78757fba">P16-1016</url>
@@ -186,7 +186,7 @@
     <paper id="17">
       <title>Neural Greedy Constituent Parsing with Dynamic Oracles</title>
       <author><first>Maximin</first><last>Coavoux</last></author>
-      <author><first>Benoît</first><last>Crabbé</last></author>
+      <author id="benoit-crabbe"><first>Benoît</first><last>Crabbé</last></author>
       <pages>172–182</pages>
       <url hash="2e5d249e">P16-1017</url>
       <doi>10.18653/v1/P16-1017</doi>
@@ -205,9 +205,9 @@
     </paper>
     <paper id="19">
       <title>Idiom Token Classification using Sentential Distributed Semantics</title>
-      <author><first>Giancarlo</first><last>Salton</last></author>
+      <author id="giancarlo-salton"><first>Giancarlo</first><last>Salton</last></author>
       <author><first>Robert</first><last>Ross</last></author>
-      <author><first>John</first><last>Kelleher</last></author>
+      <author id="john-kelleher"><first>John</first><last>Kelleher</last></author>
       <pages>194–204</pages>
       <url hash="78725e50">P16-1019</url>
       <doi>10.18653/v1/P16-1019</doi>
@@ -229,7 +229,7 @@
       <author><first>Qinlan</first><last>Shen</last></author>
       <author><first>Michael</first><last>Miller</last></author>
       <author><first>Seungwhan</first><last>Moon</last></author>
-      <author><first>Carolyn</first><last>Rosé</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rosé</last></author>
       <pages>216–225</pages>
       <url hash="ee211cb5">P16-1021</url>
       <doi>10.18653/v1/P16-1021</doi>
@@ -251,7 +251,7 @@
     <paper id="23">
       <title>Intrinsic Subspace Evaluation of Word Embedding Representations</title>
       <author><first>Yadollah</first><last>Yaghoobzadeh</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>236–246</pages>
       <url hash="54e0df42">P16-1023</url>
       <doi>10.18653/v1/P16-1023</doi>
@@ -272,9 +272,9 @@
       <author><first>Taylor</first><last>Cassidy</last></author>
       <author><first>Xiaocheng</first><last>Feng</last></author>
       <author><first>Heng</first><last>Ji</last></author>
-      <author><first>Clare R.</first><last>Voss</last></author>
+      <author id="clare-voss"><first>Clare R.</first><last>Voss</last></author>
       <author><first>Jiawei</first><last>Han</last></author>
-      <author><first>Avirup</first><last>Sil</last></author>
+      <author id="avirup-sil"><first>Avirup</first><last>Sil</last></author>
       <pages>258–268</pages>
       <url hash="5030ef7e">P16-1025</url>
       <doi>10.18653/v1/P16-1025</doi>
@@ -293,7 +293,7 @@
     <paper id="27">
       <title>Using Sentence-Level <fixed-case>LSTM</fixed-case> Language Models for Script Inference</title>
       <author><first>Karl</first><last>Pichotta</last></author>
-      <author><first>Raymond J.</first><last>Mooney</last></author>
+      <author id="raymond-mooney"><first>Raymond J.</first><last>Mooney</last></author>
       <pages>279–289</pages>
       <url hash="c1cb900e">P16-1027</url>
       <doi>10.18653/v1/P16-1027</doi>
@@ -332,7 +332,7 @@
       <title>Bi-Transferring Deep Neural Networks for Domain Adaptation</title>
       <author><first>Guangyou</first><last>Zhou</last></author>
       <author><first>Zhiwen</first><last>Xie</last></author>
-      <author><first>Jimmy Xiangji</first><last>Huang</last></author>
+      <author id="xiangji-huang"><first>Jimmy Xiangji</first><last>Huang</last></author>
       <author><first>Tingting</first><last>He</last></author>
       <pages>322–332</pages>
       <url hash="da6d847e">P16-1031</url>
@@ -343,7 +343,7 @@
       <title>Document-level Sentiment Inference with Social, Faction, and Discourse Context</title>
       <author><first>Eunsol</first><last>Choi</last></author>
       <author><first>Hannah</first><last>Rashkin</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <author><first>Yejin</first><last>Choi</last></author>
       <pages>333–343</pages>
       <url hash="c8f5b496">P16-1032</url>
@@ -367,7 +367,7 @@
     <paper id="34">
       <title>Dependency Parsing with Bounded Block Degree and Well-nestedness via <fixed-case>L</fixed-case>agrangian Relaxation and Branch-and-Bound</title>
       <author><first>Caio</first><last>Corro</last></author>
-      <author><first>Joseph</first><last>Le Roux</last></author>
+      <author id="joseph-le-roux"><first>Joseph</first><last>Le Roux</last></author>
       <author><first>Mathieu</first><last>Lacroix</last></author>
       <author><first>Antoine</first><last>Rozenknop</last></author>
       <author><first>Roberto</first><last>Wolfler Calvo</last></author>
@@ -390,8 +390,8 @@
       <title>Together we stand: <fixed-case>S</fixed-case>iamese Networks for Similar Question Retrieval</title>
       <author><first>Arpita</first><last>Das</last></author>
       <author><first>Harish</first><last>Yenala</last></author>
-      <author><first>Manoj</first><last>Chinnakotla</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manoj-chinnakotla"><first>Manoj</first><last>Chinnakotla</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>378–387</pages>
       <url hash="b64c77c0">P16-1036</url>
       <doi>10.18653/v1/P16-1036</doi>
@@ -401,7 +401,7 @@
       <title>News Citation Recommendation with Implicit and Explicit Semantics</title>
       <author><first>Hao</first><last>Peng</last></author>
       <author><first>Jing</first><last>Liu</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <pages>388–398</pages>
       <url hash="748eb170">P16-1037</url>
       <doi>10.18653/v1/P16-1037</doi>
@@ -452,7 +452,7 @@
       <title>Combining Natural Logic and Shallow Reasoning for Question Answering</title>
       <author><first>Gabor</first><last>Angeli</last></author>
       <author><first>Neha Nayak</first><last>Kennard</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>442–452</pages>
       <url hash="fe8db93b">P16-1042</url>
       <doi>10.18653/v1/P16-1042</doi>
@@ -461,7 +461,7 @@
     <paper id="43">
       <title>Easy Questions First? A Case Study on Curriculum Learning for Question Answering</title>
       <author><first>Mrinmaya</first><last>Sachan</last></author>
-      <author><first>Eric</first><last>Xing</last></author>
+      <author id="eric-xing"><first>Eric</first><last>Xing</last></author>
       <pages>453–463</pages>
       <url hash="49dcdb66">P16-1043</url>
       <doi>10.18653/v1/P16-1043</doi>
@@ -470,7 +470,7 @@
     <paper id="44">
       <title>Improved Representation Learning for Question Answer Matching</title>
       <author><first>Ming</first><last>Tan</last></author>
-      <author><first>Cicero</first><last>dos Santos</last></author>
+      <author id="cicero-dos-santos"><first>Cicero</first><last>dos Santos</last></author>
       <author><first>Bing</first><last>Xiang</last></author>
       <author><first>Bowen</first><last>Zhou</last></author>
       <pages>464–473</pages>
@@ -481,8 +481,8 @@
     <paper id="45">
       <title>Tables as Semi-structured Knowledge for Question Answering</title>
       <author><first>Sujay Kumar</first><last>Jauhar</last></author>
-      <author><first>Peter</first><last>Turney</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="peter-turney"><first>Peter</first><last>Turney</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>474–483</pages>
       <url hash="5d9f4363">P16-1045</url>
       <doi>10.18653/v1/P16-1045</doi>
@@ -502,7 +502,7 @@
       <title>Neural Networks For Negation Scope Detection</title>
       <author><first>Federico</first><last>Fancellu</last></author>
       <author><first>Adam</first><last>Lopez</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <pages>495–504</pages>
       <url hash="4889e1a6">P16-1047</url>
       <doi>10.18653/v1/P16-1047</doi>
@@ -511,7 +511,7 @@
     <paper id="48">
       <title><fixed-case>CSE</fixed-case>: Conceptual Sentence Embeddings based on Attention Model</title>
       <author><first>Yashen</first><last>Wang</last></author>
-      <author><first>Heyan</first><last>Huang</last></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last></author>
       <author><first>Chong</first><last>Feng</last></author>
       <author><first>Qiang</first><last>Zhou</last></author>
       <author><first>Jiahui</first><last>Gu</last></author>
@@ -538,7 +538,7 @@
     <paper id="50">
       <title>Investigating the Sources of Linguistic Alignment in Conversation</title>
       <author><first>Gabriel</first><last>Doyle</last></author>
-      <author><first>Michael C.</first><last>Frank</last></author>
+      <author id="michael-c-frank"><first>Michael C.</first><last>Frank</last></author>
       <pages>526–536</pages>
       <url hash="84be4ef1">P16-1050</url>
       <doi>10.18653/v1/P16-1050</doi>
@@ -574,8 +574,8 @@
     </paper>
     <paper id="54">
       <title>Towards more variation in text generation: Developing and evaluating variation models for choice of referential form</title>
-      <author><first>Thiago</first><last>Castro Ferreira</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="thiago-castro-ferreira"><first>Thiago</first><last>Castro Ferreira</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <author><first>Sander</first><last>Wubben</last></author>
       <pages>568–577</pages>
       <url hash="7f9e0d60">P16-1054</url>
@@ -593,8 +593,8 @@
     </paper>
     <paper id="56">
       <title>Generating Factoid Questions With Recurrent Neural Networks: The 30<fixed-case>M</fixed-case> Factoid Question-Answer Corpus</title>
-      <author><first>Iulian Vlad</first><last>Serban</last></author>
-      <author><first>Alberto</first><last>García-Durán</last></author>
+      <author id="iulian-vlad-serban"><first>Iulian Vlad</first><last>Serban</last></author>
+      <author id="alberto-garcia-duran"><first>Alberto</first><last>García-Durán</last></author>
       <author><first>Caglar</first><last>Gulcehre</last></author>
       <author><first>Sungjin</first><last>Ahn</last></author>
       <author><first>Sarath</first><last>Chandar</last></author>
@@ -608,8 +608,8 @@
     <paper id="57">
       <title>Latent Predictor Networks for Code Generation</title>
       <author><first>Wang</first><last>Ling</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
-      <author><first>Edward</first><last>Grefenstette</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
+      <author id="edward-grefenstette"><first>Edward</first><last>Grefenstette</last></author>
       <author><first>Karl Moritz</first><last>Hermann</last></author>
       <author><first>Tomáš</first><last>Kočiský</last></author>
       <author><first>Fumin</first><last>Wang</last></author>
@@ -621,7 +621,7 @@
     </paper>
     <paper id="58">
       <title>Easy Things First: Installments Improve Referring Expression Generation for Objects in Photographs</title>
-      <author><first>Sina</first><last>Zarrieß</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
       <author><first>David</first><last>Schlangen</last></author>
       <pages>610–620</pages>
       <url hash="f3f12b3d">P16-1058</url>
@@ -633,7 +633,7 @@
       <author><first>Amir</first><last>Globerson</last></author>
       <author><first>Nevena</first><last>Lazic</last></author>
       <author><first>Soumen</first><last>Chakrabarti</last></author>
-      <author><first>Amarnag</first><last>Subramanya</last></author>
+      <author id="amarnag-subramanya"><first>Amarnag</first><last>Subramanya</last></author>
       <author><first>Michael</first><last>Ringgaard</last></author>
       <author><first>Fernando</first><last>Pereira</last></author>
       <pages>621–631</pages>
@@ -643,7 +643,7 @@
     </paper>
     <paper id="60">
       <title>Which Coreference Evaluation Metric Do You Trust? A Proposal for a Link-based Entity Aware Metric</title>
-      <author><first>Nafise Sadat</first><last>Moosavi</last></author>
+      <author id="nafise-sadat-moosavi"><first>Nafise Sadat</first><last>Moosavi</last></author>
       <author><first>Michael</first><last>Strube</last></author>
       <pages>632–642</pages>
       <url hash="025a4897">P16-1060</url>
@@ -654,7 +654,7 @@
     <paper id="61">
       <title>Improving Coreference Resolution by Learning Entity-Level Distributed Representations</title>
       <author><first>Kevin</first><last>Clark</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>643–653</pages>
       <url hash="804f66fc">P16-1061</url>
       <doi>10.18653/v1/P16-1061</doi>
@@ -666,7 +666,7 @@
       <author><first>Reed</first><last>Coke</last></author>
       <author><first>Rui</first><last>Zhang</last></author>
       <author><first>Xiangyi</first><last>Ye</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <pages>654–665</pages>
       <url hash="66051360">P16-1062</url>
       <doi>10.18653/v1/P16-1062</doi>
@@ -676,7 +676,7 @@
     <paper id="63">
       <title>Generative Topic Embedding: a Continuous Representation of Documents</title>
       <author><first>Shaohua</first><last>Li</last></author>
-      <author><first>Tat-Seng</first><last>Chua</last></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last></author>
       <author><first>Jun</first><last>Zhu</last></author>
       <author><first>Chunyan</first><last>Miao</last></author>
       <pages>666–675</pages>
@@ -768,7 +768,7 @@
       <title>Extracting token-level signals of syntactic processing from f<fixed-case>MRI</fixed-case> - with an application to <fixed-case>P</fixed-case>o<fixed-case>S</fixed-case> induction</title>
       <author><first>Joachim</first><last>Bingel</last></author>
       <author><first>Maria</first><last>Barrett</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>747–755</pages>
       <url hash="ce864a89">P16-1071</url>
       <doi>10.18653/v1/P16-1071</doi>
@@ -808,7 +808,7 @@
       <title>Constrained Multi-Task Learning for Automated Essay Scoring</title>
       <author><first>Ronan</first><last>Cummins</last></author>
       <author><first>Meng</first><last>Zhang</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <pages>789–799</pages>
       <url hash="f41f29ff">P16-1075</url>
       <doi>10.18653/v1/P16-1075</doi>
@@ -856,8 +856,8 @@
       <author><first>Lucie</first><last>Flekova</last></author>
       <author><first>Jordan</first><last>Carpenter</last></author>
       <author><first>Salvatore</first><last>Giorgi</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
-      <author><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
+      <author id="daniel-preotiuc-pietro"><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
       <pages>843–854</pages>
       <url hash="e9a2acd6">P16-1080</url>
       <doi>10.18653/v1/P16-1080</doi>
@@ -879,7 +879,7 @@
       <author><first>Jonathan</first><last>Gordon</last></author>
       <author><first>Linhong</first><last>Zhu</last></author>
       <author><first>Aram</first><last>Galstyan</last></author>
-      <author><first>Prem</first><last>Natarajan</last></author>
+      <author id="prem-natarajan"><first>Prem</first><last>Natarajan</last></author>
       <author><first>Gully</first><last>Burns</last></author>
       <pages>866–875</pages>
       <url hash="163f7287">P16-1082</url>
@@ -901,9 +901,9 @@
       <title>How well do Computers Solve Math Word Problems? Large-Scale Dataset Construction and Evaluation</title>
       <author><first>Danqing</first><last>Huang</last></author>
       <author><first>Shuming</first><last>Shi</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <author><first>Jian</first><last>Yin</last></author>
-      <author><first>Wei-Ying</first><last>Ma</last></author>
+      <author id="wei-ying-ma"><first>Wei-Ying</first><last>Ma</last></author>
       <pages>887–896</pages>
       <url hash="a585611c">P16-1084</url>
       <doi>10.18653/v1/P16-1084</doi>
@@ -923,7 +923,7 @@
       <title>Text Understanding with the Attention Sum Reader Network</title>
       <author><first>Rudolf</first><last>Kadlec</last></author>
       <author><first>Martin</first><last>Schmid</last></author>
-      <author><first>Ondrej</first><last>Bajgar</last></author>
+      <author id="ondrej-bajgar"><first>Ondrej</first><last>Bajgar</last></author>
       <author><first>Jan</first><last>Kleindienst</last></author>
       <pages>908–918</pages>
       <url hash="759e0a55">P16-1086</url>
@@ -933,7 +933,7 @@
     <paper id="87">
       <title>Investigating <fixed-case>LSTM</fixed-case>s for Joint Extraction of Opinion Entities and Relations</title>
       <author><first>Arzoo</first><last>Katiyar</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>919–929</pages>
       <url hash="2c17bdb4">P16-1087</url>
       <doi>10.18653/v1/P16-1087</doi>
@@ -941,7 +941,7 @@
     </paper>
     <paper id="88">
       <title>Transition-Based Left-Corner Parsing for Identifying <fixed-case>PTB</fixed-case>-Style Nonlocal Dependencies</title>
-      <author><first>Yoshihide</first><last>Kato</last></author>
+      <author id="yoshihide-kato"><first>Yoshihide</first><last>Kato</last></author>
       <author><first>Shigeki</first><last>Matsubara</last></author>
       <pages>930–940</pages>
       <url hash="4fa8775b">P16-1088</url>
@@ -952,7 +952,7 @@
       <title><fixed-case>S</fixed-case>iamese <fixed-case>CBOW</fixed-case>: Optimizing Word Embeddings for Sentence Representations</title>
       <author><first>Tom</first><last>Kenter</last></author>
       <author><first>Alexey</first><last>Borisov</last></author>
-      <author><first>Maarten</first><last>de Rijke</last></author>
+      <author id="maarten-de-rijke"><first>Maarten</first><last>de Rijke</last></author>
       <pages>941–951</pages>
       <url hash="bbba3e8d">P16-1089</url>
       <doi>10.18653/v1/P16-1089</doi>
@@ -971,7 +971,7 @@
     <paper id="91">
       <title>Exploring Convolutional and Recurrent Neural Networks in Sequential Labelling for Dialogue Topic Tracking</title>
       <author><first>Seokhwan</first><last>Kim</last></author>
-      <author><first>Rafael</first><last>Banchs</last></author>
+      <author id="rafael-e-banchs"><first>Rafael</first><last>Banchs</last></author>
       <author><first>Haizhou</first><last>Li</last></author>
       <pages>963–973</pages>
       <url hash="046d43e6">P16-1091</url>
@@ -989,7 +989,7 @@
     </paper>
     <paper id="93">
       <title>A <fixed-case>CALL</fixed-case> System for Learning Preposition Usage</title>
-      <author><first>John</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
       <author><first>Donald</first><last>Sturgeon</last></author>
       <author><first>Mengqi</first><last>Luo</last></author>
       <pages>984–993</pages>
@@ -1004,7 +1004,7 @@
       <author><first>Chris</first><last>Brockett</last></author>
       <author><first>Georgios</first><last>Spithourakis</last></author>
       <author><first>Jianfeng</first><last>Gao</last></author>
-      <author><first>Bill</first><last>Dolan</last></author>
+      <author id="william-b-dolan"><first>Bill</first><last>Dolan</last></author>
       <pages>994–1003</pages>
       <url hash="2c2cd229">P16-1094</url>
       <doi>10.18653/v1/P16-1094</doi>
@@ -1047,7 +1047,7 @@
       <author><first>Pengfei</first><last>Liu</last></author>
       <author><first>Xipeng</first><last>Qiu</last></author>
       <author><first>Jifan</first><last>Chen</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>1034–1043</pages>
       <url hash="de49f560">P16-1098</url>
       <doi>10.18653/v1/P16-1098</doi>
@@ -1057,7 +1057,7 @@
       <title>Understanding Discourse on Work and Job-Related Well-Being in Public Social Media</title>
       <author><first>Tong</first><last>Liu</last></author>
       <author><first>Christopher</first><last>Homan</last></author>
-      <author><first>Cecilia</first><last>Ovesdotter Alm</last></author>
+      <author id="cecilia-ovesdotter-alm"><first>Cecilia</first><last>Ovesdotter Alm</last></author>
       <author><first>Megan</first><last>Lytle</last></author>
       <author><first>Ann</first><last>Marie White</last></author>
       <author><first>Henry</first><last>Kautz</last></author>
@@ -1068,8 +1068,8 @@
     </paper>
     <paper id="100">
       <title>Achieving Open Vocabulary Neural Machine Translation with Hybrid Word-Character Models</title>
-      <author><first>Minh-Thang</first><last>Luong</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="minh-thang-luong"><first>Minh-Thang</first><last>Luong</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>1054–1063</pages>
       <url hash="1cc7ae10">P16-1100</url>
       <doi>10.18653/v1/P16-1100</doi>
@@ -1078,7 +1078,7 @@
     <paper id="101">
       <title>End-to-end Sequence Labeling via Bi-directional <fixed-case>LSTM</fixed-case>-<fixed-case>CNN</fixed-case>s-<fixed-case>CRF</fixed-case></title>
       <author><first>Xuezhe</first><last>Ma</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>1064–1074</pages>
       <url hash="09c9f844">P16-1101</url>
       <doi>10.18653/v1/P16-1101</doi>
@@ -1100,8 +1100,8 @@
       <title>Synthesizing Compound Words for Machine Translation</title>
       <author><first>Austin</first><last>Matthews</last></author>
       <author><first>Eva</first><last>Schlinger</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <pages>1085–1094</pages>
       <url hash="045c8a10">P16-1103</url>
       <doi>10.18653/v1/P16-1103</doi>
@@ -1114,7 +1114,7 @@
       <author><first>Diptesh</first><last>Kanojia</last></author>
       <author><first>Seema</first><last>Nagar</last></author>
       <author><first>Kuntal</first><last>Dey</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>1095–1104</pages>
       <url hash="a483cf5b">P16-1104</url>
       <doi>10.18653/v1/P16-1104</doi>
@@ -1140,7 +1140,7 @@
     <paper id="107">
       <title>Context-aware Argumentative Relation Mining</title>
       <author id="huy-nguyen-pgh"><first>Huy</first><last>Nguyen</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <pages>1127–1137</pages>
       <url hash="3f1a5dfc">P16-1107</url>
       <doi>10.18653/v1/P16-1107</doi>
@@ -1180,8 +1180,8 @@
       <title>Predicting the Rise and Fall of Scientific Topics from Trends in their Rhetorical Framing</title>
       <author><first>Vinodkumar</first><last>Prabhakaran</last></author>
       <author><first>William L.</first><last>Hamilton</last></author>
-      <author><first>Dan</first><last>McFarland</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-mcfarland"><first>Dan</first><last>McFarland</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <pages>1170–1180</pages>
       <url hash="5274ab1c">P16-1111</url>
       <doi>10.18653/v1/P16-1111</doi>
@@ -1218,8 +1218,8 @@
     <paper id="115">
       <title>Resolving References to Objects in Photographs using the Words-As-Classifiers Model</title>
       <author><first>David</first><last>Schlangen</last></author>
-      <author><first>Sina</first><last>Zarrieß</last></author>
-      <author><first>Casey</first><last>Kennington</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
+      <author id="casey-kennington"><first>Casey</first><last>Kennington</last></author>
       <pages>1213–1223</pages>
       <url hash="e12ed28d">P16-1115</url>
       <doi>10.18653/v1/P16-1115</doi>
@@ -1229,9 +1229,9 @@
       <title><fixed-case>RBPB</fixed-case>: Regularization-Based Pattern Balancing Method for Event Extraction</title>
       <author><first>Lei</first><last>Sha</last></author>
       <author><first>Jing</first><last>Liu</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <author><first>Sujian</first><last>Li</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <author><first>Zhifang</first><last>Sui</last></author>
       <pages>1224–1234</pages>
       <url hash="0012d39a">P16-1116</url>
@@ -1270,7 +1270,7 @@
     <paper id="120">
       <title>Bilingual Segmented Topic Model</title>
       <author><first>Akihiro</first><last>Tamura</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>1266–1276</pages>
       <url hash="5cd80fcc">P16-1120</url>
       <doi>10.18653/v1/P16-1120</doi>
@@ -1279,7 +1279,7 @@
     <paper id="121">
       <title>Learning Semantically and Additively Compositional Distributional Representations</title>
       <author><first>Ran</first><last>Tian</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Kentaro</first><last>Inui</last></author>
       <pages>1277–1287</pages>
       <url hash="ed1c5f50">P16-1121</url>
@@ -1313,7 +1313,7 @@
       <author><first>Jing</first><last>Liu</last></author>
       <author><first>Yuanfei</first><last>Luo</last></author>
       <author><first>Bin</first><last>Wang</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <pages>1308–1318</pages>
       <url hash="886173e7">P16-1124</url>
       <doi>10.18653/v1/P16-1124</doi>
@@ -1332,16 +1332,16 @@
       <title>The Creation and Analysis of a Website Privacy Policy Corpus</title>
       <author><first>Shomir</first><last>Wilson</last></author>
       <author><first>Florian</first><last>Schaub</last></author>
-      <author><first>Aswarth Abhilash</first><last>Dara</last></author>
+      <author id="aswarth-abhilash-dara"><first>Aswarth Abhilash</first><last>Dara</last></author>
       <author><first>Frederick</first><last>Liu</last></author>
       <author><first>Sushain</first><last>Cherivirala</last></author>
       <author><first>Pedro</first><last>Giovanni Leon</last></author>
       <author><first>Mads</first><last>Schaarup Andersen</last></author>
       <author><first>Sebastian</first><last>Zimmeck</last></author>
-      <author><first>Kanthashree Mysore</first><last>Sathyendra</last></author>
+      <author id="kanthashree-mysore-sathyendra"><first>Kanthashree Mysore</first><last>Sathyendra</last></author>
       <author><first>N. Cameron</first><last>Russell</last></author>
       <author><first>Thomas B.</first><last>Norton</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <author><first>Joel</first><last>Reidenberg</last></author>
       <author><first>Norman</first><last>Sadeh</last></author>
       <pages>1330–1340</pages>
@@ -1362,7 +1362,7 @@
     <paper id="128">
       <title>Learning Word Meta-Embeddings</title>
       <author><first>Wenpeng</first><last>Yin</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>1351–1360</pages>
       <url hash="528b7855">P16-1128</url>
       <doi>10.18653/v1/P16-1128</doi>
@@ -1371,7 +1371,7 @@
     <paper id="129">
       <title>Towards Constructing Sports News from Live Text Commentary</title>
       <author><first>Jianmin</first><last>Zhang</last></author>
-      <author><first>Jin-ge</first><last>Yao</last></author>
+      <author id="jin-ge-yao"><first>Jin-ge</first><last>Yao</last></author>
       <author><first>Xiaojun</first><last>Wan</last></author>
       <pages>1361–1371</pages>
       <url hash="60fb881f">P16-1129</url>
@@ -1384,7 +1384,7 @@
       <title>A Continuous Space Rule Selection Model for Syntax-based Statistical Machine Translation</title>
       <author><first>Jingyi</first><last>Zhang</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichro</first><last>Sumita</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
       <author><first>Satoshi</first><last>Nakamura</last></author>
       <pages>1372–1381</pages>
@@ -1409,8 +1409,8 @@
       <author><first>Yue</first><last>Zhang</last></author>
       <author><first>Shujian</first><last>Huang</last></author>
       <author><first>Junsheng</first><last>Zhou</last></author>
-      <author><first>Xin-Yu</first><last>Dai</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
+      <author id="xinyu-dai"><first>Xin-Yu</first><last>Dai</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
       <pages>1393–1402</pages>
       <url hash="de7f557f">P16-1132</url>
       <doi>10.18653/v1/P16-1132</doi>
@@ -1442,7 +1442,7 @@
     <paper id="135">
       <title>Identifying Causal Relations Using Parallel <fixed-case>W</fixed-case>ikipedia Articles</title>
       <author><first>Christopher</first><last>Hidey</last></author>
-      <author><first>Kathy</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathy</first><last>McKeown</last></author>
       <pages>1424–1433</pages>
       <url hash="2b8fd15a">P16-1135</url>
       <doi>10.18653/v1/P16-1135</doi>
@@ -1453,8 +1453,8 @@
     <paper id="136">
       <title>Compositional Learning of Embeddings for Relation Paths in Knowledge Base and Text</title>
       <author><first>Kristina</first><last>Toutanova</last></author>
-      <author><first>Xi Victoria</first><last>Lin</last></author>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="xi-victoria-lin"><first>Xi Victoria</first><last>Lin</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <author><first>Hoifung</first><last>Poon</last></author>
       <author><first>Chris</first><last>Quirk</last></author>
       <pages>1434–1444</pages>
@@ -1485,11 +1485,11 @@
     </paper>
     <paper id="139">
       <title>A Fast Unified Model for Parsing and Sentence Understanding</title>
-      <author><first>Samuel R.</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel R.</first><last>Bowman</last></author>
       <author><first>Jon</first><last>Gauthier</last></author>
       <author><first>Abhinav</first><last>Rastogi</last></author>
       <author><first>Raghav</first><last>Gupta</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <author><first>Christopher</first><last>Potts</last></author>
       <pages>1466–1477</pages>
       <url hash="4d03d8ce">P16-1139</url>
@@ -1500,7 +1500,7 @@
       <title>Investigating Language Universal and Specific Properties in Word Embeddings</title>
       <author><first>Peng</first><last>Qian</last></author>
       <author><first>Xipeng</first><last>Qiu</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>1478–1488</pages>
       <url hash="578a8b32">P16-1140</url>
       <doi>10.18653/v1/P16-1140</doi>
@@ -1510,7 +1510,7 @@
       <title>Diachronic Word Embeddings Reveal Statistical Laws of Semantic Change</title>
       <author><first>William L.</first><last>Hamilton</last></author>
       <author><first>Jure</first><last>Leskovec</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <pages>1489–1501</pages>
       <url hash="1e42c1ef">P16-1141</url>
       <doi>10.18653/v1/P16-1141</doi>
@@ -1527,10 +1527,10 @@
     </paper>
     <paper id="143">
       <title><fixed-case>L</fixed-case>ex<fixed-case>S</fixed-case>em<fixed-case>T</fixed-case>m: A Semantic Dataset Based on All-words Unsupervised Sense Distribution Learning</title>
-      <author><first>Andrew</first><last>Bennett</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="andrew-bennett"><first>Andrew</first><last>Bennett</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Jey Han</first><last>Lau</last></author>
-      <author><first>Diana</first><last>McCarthy</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
       <author><first>Francis</first><last>Bond</last></author>
       <pages>1513–1524</pages>
       <url hash="b529122e">P16-1143</url>
@@ -1540,14 +1540,14 @@
     <paper id="144">
       <title>The <fixed-case>LAMBADA</fixed-case> dataset: Word prediction requiring a broad discourse context</title>
       <author><first>Denis</first><last>Paperno</last></author>
-      <author><first>Germán</first><last>Kruszewski</last></author>
+      <author id="german-kruszewski"><first>Germán</first><last>Kruszewski</last></author>
       <author><first>Angeliki</first><last>Lazaridou</last></author>
-      <author><first>Ngoc Quan</first><last>Pham</last></author>
-      <author><first>Raffaella</first><last>Bernardi</last></author>
+      <author id="ngoc-quan-pham"><first>Ngoc Quan</first><last>Pham</last></author>
+      <author id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last></author>
       <author><first>Sandro</first><last>Pezzelle</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
-      <author><first>Gemma</first><last>Boleda</last></author>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
+      <author id="gemma-boleda"><first>Gemma</first><last>Boleda</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <pages>1525–1534</pages>
       <url hash="528bce7a">P16-1144</url>
       <doi>10.18653/v1/P16-1144</doi>
@@ -1571,7 +1571,7 @@
     <paper id="146">
       <title>Optimizing Spectral Learning for Parsing</title>
       <author><first>Shashi</first><last>Narayan</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <pages>1546–1556</pages>
       <url hash="4e53cd91">P16-1146</url>
       <doi>10.18653/v1/P16-1146</doi>
@@ -1627,7 +1627,7 @@
     </paper>
     <paper id="152">
       <title>Learning Structured Predictors from Bandit Feedback for Interactive <fixed-case>NLP</fixed-case></title>
-      <author><first>Artem</first><last>Sokolov</last></author>
+      <author id="artem-sokolov"><first>Artem</first><last>Sokolov</last></author>
       <author><first>Julia</first><last>Kreutzer</last></author>
       <author><first>Christopher</first><last>Lo</last></author>
       <author><first>Stefan</first><last>Riezler</last></author>
@@ -1644,7 +1644,7 @@
       <author><first>Jianfeng</first><last>Gao</last></author>
       <author><first>Lihong</first><last>Li</last></author>
       <author><first>Li</first><last>Deng</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
       <pages>1621–1630</pages>
       <url hash="d24ca144">P16-1153</url>
       <doi>10.18653/v1/P16-1153</doi>
@@ -1656,7 +1656,7 @@
       <author><first>Jiatao</first><last>Gu</last></author>
       <author><first>Zhengdong</first><last>Lu</last></author>
       <author><first>Hang</first><last>Li</last></author>
-      <author><first>Victor O.K.</first><last>Li</last></author>
+      <author id="victor-o-k-li"><first>Victor O.K.</first><last>Li</last></author>
       <pages>1631–1640</pages>
       <url hash="d179b3fa">P16-1154</url>
       <doi>10.18653/v1/P16-1154</doi>
@@ -1675,8 +1675,8 @@
     <paper id="156">
       <title>Morphological Smoothing and Extrapolation of Word Embeddings</title>
       <author><first>Ryan</first><last>Cotterell</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>1651–1660</pages>
       <url hash="b3b66c68">P16-1156</url>
       <doi>10.18653/v1/P16-1156</doi>
@@ -1687,7 +1687,7 @@
       <title>Cross-lingual Models of Word Embeddings: An Empirical Comparison</title>
       <author><first>Shyam</first><last>Upadhyay</last></author>
       <author><first>Manaal</first><last>Faruqui</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <author><first>Dan</first><last>Roth</last></author>
       <pages>1661–1670</pages>
       <url hash="9288eae6">P16-1157</url>
@@ -1698,8 +1698,8 @@
       <title>Take and Took, Gaggle and Goose, Book and Read: Evaluating the Utility of Vector Differences for Lexical Relation Learning</title>
       <author><first>Ekaterina</first><last>Vylomova</last></author>
       <author><first>Laura</first><last>Rimell</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>1671–1682</pages>
       <url hash="def386c9">P16-1158</url>
       <doi>10.18653/v1/P16-1158</doi>
@@ -1732,8 +1732,8 @@
     <paper id="161">
       <title>Target-Side Context for Discriminative Models in Statistical Machine Translation</title>
       <author><first>Aleš</first><last>Tamchyna</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <author><first>Marcin</first><last>Junczys-Dowmunt</last></author>
       <pages>1704–1714</pages>
       <url hash="c739bd22">P16-1161</url>
@@ -1757,7 +1757,7 @@
       <author><first>Qi</first><last>Zhang</last></author>
       <author><first>Pengfei</first><last>Liu</last></author>
       <author><first>Xipeng</first><last>Qiu</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>1726–1735</pages>
       <url hash="64bded35">P16-1163</url>
       <doi>10.18653/v1/P16-1163</doi>
@@ -1767,7 +1767,7 @@
       <title>Model Architectures for Quotation Detection</title>
       <author><first>Christian</first><last>Scheible</last></author>
       <author><first>Roman</first><last>Klinger</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <pages>1736–1745</pages>
       <url hash="4d115fb0">P16-1164</url>
       <doi>10.18653/v1/P16-1164</doi>
@@ -1776,7 +1776,7 @@
     </paper>
     <paper id="165">
       <title>Speech Act Modeling of Written Asynchronous Conversations with Task-Specific Embeddings and Conditional Structured Models</title>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <author><first>Enamul</first><last>Hoque</last></author>
       <pages>1746–1756</pages>
       <url hash="43e492ed">P16-1165</url>
@@ -1821,7 +1821,7 @@
       <author><first>Yuntian</first><last>Deng</last></author>
       <author><first>Mrinmaya</first><last>Sachan</last></author>
       <author><first>Zhicheng</first><last>Yan</last></author>
-      <author><first>Eric</first><last>Xing</last></author>
+      <author id="eric-xing"><first>Eric</first><last>Xing</last></author>
       <pages>1791–1801</pages>
       <url hash="978d3f63">P16-1169</url>
       <doi>10.18653/v1/P16-1169</doi>
@@ -1845,7 +1845,7 @@
       <author><first>Qiaozi</first><last>Gao</last></author>
       <author><first>Malcolm</first><last>Doering</last></author>
       <author><first>Shaohua</first><last>Yang</last></author>
-      <author><first>Joyce</first><last>Chai</last></author>
+      <author id="joyce-chai"><first>Joyce</first><last>Chai</last></author>
       <pages>1814–1824</pages>
       <url hash="ca8b8580">P16-1171</url>
       <doi>10.18653/v1/P16-1171</doi>
@@ -1884,7 +1884,7 @@
       <author><first>Adithya</first><last>Renduchintala</last></author>
       <author><first>Rebecca</first><last>Knowles</last></author>
       <author><first>Philipp</first><last>Koehn</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>1859–1869</pages>
       <url hash="1f6f4977">P16-1175</url>
       <doi>10.18653/v1/P16-1175</doi>
@@ -1907,7 +1907,7 @@
       <author><first>Hadi</first><last>Amiri</last></author>
       <author><first>Philip</first><last>Resnik</last></author>
       <author><first>Jordan</first><last>Boyd-Graber</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <pages>1882–1892</pages>
       <url hash="51f58b4b">P16-1177</url>
       <doi>10.18653/v1/P16-1177</doi>
@@ -1927,8 +1927,8 @@
     <paper id="179">
       <title>Alleviating Poor Context with Background Knowledge for Named Entity Disambiguation</title>
       <author><first>Ander</first><last>Barrena</last></author>
-      <author><first>Aitor</first><last>Soroa</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="aitor-soroa"><first>Aitor</first><last>Soroa</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <pages>1903–1912</pages>
       <url hash="df13cdab">P16-1179</url>
       <doi>10.18653/v1/P16-1179</doi>
@@ -1938,7 +1938,7 @@
       <title>Mining Paraphrasal Typed Templates from a Plain Text Corpus</title>
       <author><first>Or</first><last>Biran</last></author>
       <author><first>Terra</first><last>Blevins</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>1913–1923</pages>
       <url hash="292148db">P16-1180</url>
       <doi>10.18653/v1/P16-1180</doi>
@@ -1947,7 +1947,7 @@
     <paper id="181">
       <title>How to Train Dependency Parsers with Inexact Search for Joint Sentence Boundary Detection and Parsing of Entire Documents</title>
       <author><first>Anders</first><last>Björkelund</last></author>
-      <author><first>Agnieszka</first><last>Faleńska</last></author>
+      <author id="agnieszka-falenska"><first>Agnieszka</first><last>Faleńska</last></author>
       <author><first>Wolfgang</first><last>Seeker</last></author>
       <author><first>Jonas</first><last>Kuhn</last></author>
       <pages>1924–1934</pages>
@@ -2010,7 +2010,7 @@
     </paper>
     <paper id="187">
       <title>Predicting the Compositionality of Nominal Compounds: Giving Word Embeddings a Hard Time</title>
-      <author><first>Silvio</first><last>Cordeiro</last></author>
+      <author id="silvio-cordeiro"><first>Silvio</first><last>Cordeiro</last></author>
       <author><first>Carlos</first><last>Ramisch</last></author>
       <author><first>Marco</first><last>Idiart</last></author>
       <author><first>Aline</first><last>Villavicencio</last></author>
@@ -2042,7 +2042,7 @@
     <paper id="190">
       <title>Jointly Learning to Embed and Predict with Multiple Languages</title>
       <author><first>Daniel C.</first><last>Ferreira</last></author>
-      <author><first>André F. T.</first><last>Martins</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
       <author><first>Mariana S. C.</first><last>Almeida</last></author>
       <pages>2019–2028</pages>
       <url hash="d3d5c244">P16-1190</url>
@@ -2072,8 +2072,8 @@
     </paper>
     <paper id="193">
       <title>A Vector Space for Distributional Semantics for Entailment</title>
-      <author><first>James</first><last>Henderson</last></author>
-      <author><first>Diana</first><last>Popa</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
+      <author id="diana-nicoleta-popa"><first>Diana</first><last>Popa</last></author>
       <pages>2052–2062</pages>
       <url hash="c7241fb7">P16-1193</url>
       <doi>10.18653/v1/P16-1193</doi>
@@ -2095,7 +2095,7 @@
       <author><first>Srinivasan</first><last>Iyer</last></author>
       <author><first>Ioannis</first><last>Konstas</last></author>
       <author><first>Alvin</first><last>Cheung</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>2073–2083</pages>
       <url hash="490eb1c6">P16-1195</url>
       <doi>10.18653/v1/P16-1195</doi>
@@ -2138,7 +2138,7 @@
       <author><first>Ming</first><last>Liao</last></author>
       <author><first>Wei</first><last>Gao</last></author>
       <author><first>Yulan</first><last>He</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <pages>2114–2123</pages>
       <url hash="7b44b2c7">P16-1199</url>
       <doi>10.18653/v1/P16-1199</doi>
@@ -2213,7 +2213,7 @@
       <title>A New Psychometric-inspired Evaluation Metric for <fixed-case>C</fixed-case>hinese Word Segmentation</title>
       <author><first>Peng</first><last>Qian</last></author>
       <author><first>Xipeng</first><last>Qiu</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>2185–2194</pages>
       <url hash="ce89e813">P16-1206</url>
       <doi>10.18653/v1/P16-1206</doi>
@@ -2251,7 +2251,7 @@
       <title>Domain Adaptation for Authorship Attribution: Improved Structural Correspondence Learning</title>
       <author><first>Upendra</first><last>Sapkota</last></author>
       <author><first>Thamar</first><last>Solorio</last></author>
-      <author><first>Manuel</first><last>Montes</last></author>
+      <author id="manuel-montes"><first>Manuel</first><last>Montes</last></author>
       <author><first>Steven</first><last>Bethard</last></author>
       <pages>2226–2235</pages>
       <url hash="942ba089">P16-1210</url>
@@ -2261,7 +2261,7 @@
     <paper id="211">
       <title>A Corpus-Based Analysis of Canonical Word Order of <fixed-case>J</fixed-case>apanese Double Object Constructions</title>
       <author><first>Ryohei</first><last>Sasano</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>2236–2244</pages>
       <url hash="47816f75">P16-1211</url>
       <doi>10.18653/v1/P16-1211</doi>
@@ -2284,8 +2284,8 @@
     </paper>
     <paper id="213">
       <title>One for All: Towards Language Independent Named Entity Linking</title>
-      <author><first>Avirup</first><last>Sil</last></author>
-      <author><first>Radu</first><last>Florian</last></author>
+      <author id="avirup-sil"><first>Avirup</first><last>Sil</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
       <pages>2255–2264</pages>
       <url hash="8dcbff15">P16-1213</url>
       <doi>10.18653/v1/P16-1213</doi>
@@ -2305,7 +2305,7 @@
     <paper id="215">
       <title>Composing Distributed Representations of Relational Patterns</title>
       <author><first>Sho</first><last>Takase</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Kentaro</first><last>Inui</last></author>
       <pages>2276–2286</pages>
       <url hash="95941c34">P16-1215</url>
@@ -2334,7 +2334,7 @@
     <paper id="218">
       <title>Graph-based Dependency Parsing with Bidirectional <fixed-case>LSTM</fixed-case></title>
       <author><first>Wenhui</first><last>Wang</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <pages>2306–2315</pages>
       <url hash="a1ee9e49">P16-1218</url>
       <doi>10.18653/v1/P16-1218</doi>
@@ -2386,7 +2386,7 @@
       <title>A Thorough Examination of the <fixed-case>CNN</fixed-case>/<fixed-case>D</fixed-case>aily <fixed-case>M</fixed-case>ail Reading Comprehension Task</title>
       <author><first>Danqi</first><last>Chen</last></author>
       <author><first>Jason</first><last>Bolton</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>2358–2367</pages>
       <url hash="397a26ef">P16-1223</url>
       <doi>10.18653/v1/P16-1223</doi>
@@ -2394,9 +2394,9 @@
     </paper>
     <paper id="224">
       <title>Learning Language Games through Interaction</title>
-      <author><first>Sida I.</first><last>Wang</last></author>
+      <author id="sida-i-wang"><first>Sida I.</first><last>Wang</last></author>
       <author><first>Percy</first><last>Liang</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>2368–2378</pages>
       <url hash="21f22621">P16-1224</url>
       <doi>10.18653/v1/P16-1224</doi>
@@ -2405,7 +2405,7 @@
     <paper id="225">
       <title>Finding Non-Arbitrary Form-Meaning Systematicity Using String-Metric Learning for Kernel Regression</title>
       <author><first>E. Dario</first><last>Gutiérrez</last></author>
-      <author><first>Roger</first><last>Levy</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
       <author><first>Benjamin</first><last>Bergen</last></author>
       <pages>2379–2388</pages>
       <url hash="a1278d17">P16-1225</url>
@@ -2437,8 +2437,8 @@
       <author><first>Zhiting</first><last>Hu</last></author>
       <author><first>Xuezhe</first><last>Ma</last></author>
       <author><first>Zhengzhong</first><last>Liu</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
-      <author><first>Eric</first><last>Xing</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
+      <author id="eric-xing"><first>Eric</first><last>Xing</last></author>
       <pages>2410–2420</pages>
       <url hash="ecef3c38">P16-1228</url>
       <doi>10.18653/v1/P16-1228</doi>
@@ -2457,13 +2457,13 @@
     <paper id="230">
       <title>On-line Active Reward Learning for Policy Optimisation in Spoken Dialogue Systems</title>
       <author><first>Pei-Hao</first><last>Su</last></author>
-      <author><first>Milica</first><last>Gašić</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gašić</last></author>
       <author><first>Nikola</first><last>Mrkšić</last></author>
-      <author><first>Lina M.</first><last>Rojas-Barahona</last></author>
+      <author id="lina-m-rojas-barahona"><first>Lina M.</first><last>Rojas-Barahona</last></author>
       <author><first>Stefan</first><last>Ultes</last></author>
       <author><first>David</first><last>Vandyke</last></author>
       <author><first>Tsung-Hsien</first><last>Wen</last></author>
-      <author><first>Steve</first><last>Young</last></author>
+      <author id="steve-young"><first>Steve</first><last>Young</last></author>
       <pages>2431–2441</pages>
       <url hash="6321c941">P16-1230</url>
       <doi>10.18653/v1/P16-1230</doi>
@@ -2478,7 +2478,7 @@
       <author><first>Alessandro</first><last>Presta</last></author>
       <author><first>Kuzman</first><last>Ganchev</last></author>
       <author><first>Slav</first><last>Petrov</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <pages>2442–2452</pages>
       <url hash="83181131">P16-1231</url>
       <doi>10.18653/v1/P16-1231</doi>
@@ -2490,7 +2490,7 @@
       <booktitle>Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)</booktitle>
       <url hash="8f2d9b35">P16-2</url>
       <editor><first>Katrin</first><last>Erk</last></editor>
-      <editor><first>Noah A.</first><last>Smith</last></editor>
+      <editor id="noah-a-smith"><first>Noah A.</first><last>Smith</last></editor>
       <doi>10.18653/v1/P16-2</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Berlin, Germany</address>
@@ -2505,7 +2505,7 @@
     <paper id="1">
       <title>Transition-based dependency parsing with topological fields</title>
       <author><first>Daniël</first><last>de Kok</last></author>
-      <author><first>Erhard</first><last>Hinrichs</last></author>
+      <author id="erhard-hinrichs"><first>Erhard</first><last>Hinrichs</last></author>
       <pages>1–7</pages>
       <url hash="4b68f584">P16-2001</url>
       <doi>10.18653/v1/P16-2001</doi>
@@ -2515,7 +2515,7 @@
       <title>Scalable Semi-Supervised Query Classification Using Matrix Sketching</title>
       <author><first>Young-Bum</first><last>Kim</last></author>
       <author><first>Karl</first><last>Stratos</last></author>
-      <author><first>Ruhi</first><last>Sarikaya</last></author>
+      <author id="ruhi-sarikaya"><first>Ruhi</first><last>Sarikaya</last></author>
       <pages>8–13</pages>
       <url hash="743ac8a2">P16-2002</url>
       <doi>10.18653/v1/P16-2002</doi>
@@ -2571,7 +2571,7 @@
     <paper id="8">
       <title>Sequence-to-Sequence Generation for Spoken Dialogue via Deep Syntax Trees and Strings</title>
       <author><first>Ondřej</first><last>Dušek</last></author>
-      <author><first>Filip</first><last>Jurčíček</last></author>
+      <author id="filip-jurcicek"><first>Filip</first><last>Jurčíček</last></author>
       <pages>45–51</pages>
       <url hash="67521c54">P16-2008</url>
       <doi>10.18653/v1/P16-2008</doi>
@@ -2590,7 +2590,7 @@
       <title>Joint Word Segmentation and Phonetic Category Induction</title>
       <author><first>Micha</first><last>Elsner</last></author>
       <author><first>Stephanie</first><last>Antetomaso</last></author>
-      <author><first>Naomi</first><last>Feldman</last></author>
+      <author id="naomi-feldman"><first>Naomi</first><last>Feldman</last></author>
       <pages>59–65</pages>
       <url hash="66955258">P16-2010</url>
       <doi>10.18653/v1/P16-2010</doi>
@@ -2632,7 +2632,7 @@
       <title>Cross-lingual projection for class-based language models</title>
       <author><first>Beat</first><last>Gfeller</last></author>
       <author><first>Vlad</first><last>Schogol</last></author>
-      <author><first>Keith</first><last>Hall</last></author>
+      <author id="keith-hall"><first>Keith</first><last>Hall</last></author>
       <pages>83–88</pages>
       <url hash="ba7c57a1">P16-2014</url>
       <doi>10.18653/v1/P16-2014</doi>
@@ -2663,8 +2663,8 @@
     <paper id="17">
       <title>Semantic classifications for detection of verb metaphors</title>
       <author><first>Beata</first><last>Beigman Klebanov</last></author>
-      <author><first>Chee Wee</first><last>Leong</last></author>
-      <author><first>E. Dario</first><last>Gutierrez</last></author>
+      <author id="chee-wee-leong"><first>Chee Wee</first><last>Leong</last></author>
+      <author id="e-dario-gutierrez"><first>E. Dario</first><last>Gutierrez</last></author>
       <author><first>Ekaterina</first><last>Shutova</last></author>
       <author><first>Michael</first><last>Flor</last></author>
       <pages>101–106</pages>
@@ -2675,7 +2675,7 @@
     <paper id="18">
       <title>Recognizing Salient Entities in Shopping Queries</title>
       <author><first>Zornitsa</first><last>Kozareva</last></author>
-      <author id="qi-li"><first>Qi</first><last>Li</last></author>
+      <author><first>Qi</first><last>Li</last></author>
       <author><first>Ke</first><last>Zhai</last></author>
       <author><first>Weiwei</first><last>Guo</last></author>
       <pages>107–111</pages>
@@ -2687,7 +2687,7 @@
       <title>Leveraging Lexical Resources for Learning Entity Embeddings in Multi-Relational Data</title>
       <author><first>Teng</first><last>Long</last></author>
       <author><first>Ryan</first><last>Lowe</last></author>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <author><first>Doina</first><last>Precup</last></author>
       <pages>112–117</pages>
       <url hash="1699df47">P16-2019</url>
@@ -2709,7 +2709,7 @@
       <title>Vocabulary Manipulation for Neural Machine Translation</title>
       <author><first>Haitao</first><last>Mi</last></author>
       <author><first>Zhiguo</first><last>Wang</last></author>
-      <author><first>Abe</first><last>Ittycheriah</last></author>
+      <author id="abe-ittycheriah"><first>Abe</first><last>Ittycheriah</last></author>
       <pages>124–129</pages>
       <url hash="7c021d46">P16-2021</url>
       <doi>10.18653/v1/P16-2021</doi>
@@ -2732,8 +2732,8 @@
     <paper id="23">
       <title>Improving cross-domain n-gram language modelling with skipgrams</title>
       <author><first>Louis</first><last>Onrust</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
-      <author><first>Hugo</first><last>Van hamme</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
+      <author id="hugo-van-hamme"><first>Hugo</first><last>Van hamme</last></author>
       <pages>137–142</pages>
       <url hash="f9da899f">P16-2023</url>
       <doi>10.18653/v1/P16-2023</doi>
@@ -2760,7 +2760,7 @@
     <paper id="26">
       <title>How Naked is the Naked Truth? A Multilingual Lexicon of Nominal Compound Compositionality</title>
       <author><first>Carlos</first><last>Ramisch</last></author>
-      <author><first>Silvio</first><last>Cordeiro</last></author>
+      <author id="silvio-cordeiro"><first>Silvio</first><last>Cordeiro</last></author>
       <author><first>Leonardo</first><last>Zilio</last></author>
       <author><first>Marco</first><last>Idiart</last></author>
       <author><first>Aline</first><last>Villavicencio</last></author>
@@ -2771,8 +2771,8 @@
     </paper>
     <paper id="27">
       <title>An Open Web Platform for Rule-Based Speech-to-Sign Translation</title>
-      <author><first>Manny</first><last>Rayner</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Sarah</first><last>Ebling</last></author>
       <author><first>Johanna</first><last>Gerlach</last></author>
       <author><first>Irene</first><last>Strasly</last></author>
@@ -2786,7 +2786,7 @@
       <title>Word Alignment without <fixed-case>NULL</fixed-case> Words</title>
       <author><first>Philip</first><last>Schulz</last></author>
       <author><first>Wilker</first><last>Aziz</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <pages>169–174</pages>
       <url hash="90e1fbaa">P16-2028</url>
       <doi>10.18653/v1/P16-2028</doi>
@@ -2821,7 +2821,7 @@
       <author><first>Ivan</first><last>Vulić</last></author>
       <author><first>Douwe</first><last>Kiela</last></author>
       <author><first>Stephen</first><last>Clark</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>188–194</pages>
       <url hash="2ea6ae77">P16-2031</url>
       <doi>10.18653/v1/P16-2031</doi>
@@ -2839,9 +2839,9 @@
     </paper>
     <paper id="33">
       <title>The Value of Semantic Parse Labeling for Knowledge Base Question Answering</title>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <author><first>Matthew</first><last>Richardson</last></author>
-      <author><first>Chris</first><last>Meek</last></author>
+      <author id="christopher-meek"><first>Chris</first><last>Meek</last></author>
       <author><first>Ming-Wei</first><last>Chang</last></author>
       <author><first>Jina</first><last>Suh</last></author>
       <pages>201–206</pages>
@@ -2868,8 +2868,8 @@
     <paper id="35">
       <title>The red one!: On learning to refer to things based on discriminative properties</title>
       <author><first>Angeliki</first><last>Lazaridou</last></author>
-      <author><first>Nghia The</first><last>Pham</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="nghia-the-pham"><first>Nghia The</first><last>Pham</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <pages>213–218</pages>
       <url hash="a6a19776">P16-2035</url>
       <doi>10.18653/v1/P16-2035</doi>
@@ -2887,7 +2887,7 @@
     <paper id="37">
       <title>Dimensional Sentiment Analysis Using a Regional <fixed-case>CNN</fixed-case>-<fixed-case>LSTM</fixed-case> Model</title>
       <author><first>Jin</first><last>Wang</last></author>
-      <author><first>Liang-Chih</first><last>Yu</last></author>
+      <author id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last></author>
       <author><first>K. Robert</first><last>Lai</last></author>
       <author><first>Xuejie</first><last>Zhang</last></author>
       <pages>225–230</pages>
@@ -2897,7 +2897,7 @@
     </paper>
     <paper id="38">
       <title>Deep multi-task learning with low level tasks supervised at lower layers</title>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <author><first>Yoav</first><last>Goldberg</last></author>
       <pages>231–235</pages>
       <url hash="e74b596b">P16-2038</url>
@@ -2918,8 +2918,8 @@
       <title>An Entity-Focused Approach to Generating Company Descriptions</title>
       <author><first>Gavin</first><last>Saldanha</last></author>
       <author><first>Or</first><last>Biran</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
-      <author><first>Alfio</first><last>Gliozzo</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
+      <author id="alfio-gliozzo"><first>Alfio</first><last>Gliozzo</last></author>
       <pages>243–248</pages>
       <url hash="4bbec3ef">P16-2040</url>
       <doi>10.18653/v1/P16-2040</doi>
@@ -2939,7 +2939,7 @@
     <paper id="42">
       <title>Automatic Semantic Classification of <fixed-case>G</fixed-case>erman Preposition Types: Comparing Hard and Soft Clustering Approaches across Features</title>
       <author><first>Maximilian</first><last>Köper</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>256–263</pages>
       <url hash="13735490">P16-2042</url>
       <doi>10.18653/v1/P16-2042</doi>
@@ -2962,7 +2962,7 @@
       <author><first>Zhong</first><last>Zhou</last></author>
       <author><first>Dylan</first><last>Fitzpatrick</last></author>
       <author><first>Michael</first><last>Muehl</last></author>
-      <author><first>William</first><last>Cohen</last></author>
+      <author id="william-cohen"><first>William</first><last>Cohen</last></author>
       <pages>269–274</pages>
       <url hash="04b73286">P16-2044</url>
       <doi>10.18653/v1/P16-2044</doi>
@@ -2981,9 +2981,9 @@
     <paper id="46">
       <title>A Neural Network based Approach to Automatic Post-Editing</title>
       <author><first>Santanu</first><last>Pal</last></author>
-      <author><first>Sudip Kumar</first><last>Naskar</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last></author>
       <author><first>Mihaela</first><last>Vela</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>281–286</pages>
       <url hash="974aaa11">P16-2046</url>
       <doi>10.18653/v1/P16-2046</doi>
@@ -2992,7 +2992,7 @@
     <paper id="47">
       <title>An Unsupervised Method for Automatic Translation Memory Cleaning</title>
       <author><first>Masoud</first><last>Jalili Sabet</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <author><first>Eduard</first><last>Barbu</last></author>
       <pages>287–292</pages>
@@ -3004,7 +3004,7 @@
       <title>Exponentially Decaying Bag-of-Words Input Features for Feed-Forward Neural Network in Statistical Machine Translation</title>
       <author><first>Jan-Thorsten</first><last>Peter</last></author>
       <author><first>Weiyue</first><last>Wang</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>293–298</pages>
       <url hash="b42f1f4d">P16-2048</url>
       <doi>10.18653/v1/P16-2048</doi>
@@ -3027,7 +3027,7 @@
       <author><first>Els</first><last>Lefever</last></author>
       <author><first>Ilja</first><last>Croijmans</last></author>
       <author><first>Asifa</first><last>Majid</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <pages>306–312</pages>
       <url hash="7906bb78">P16-2050</url>
       <doi>10.18653/v1/P16-2050</doi>
@@ -3036,8 +3036,8 @@
     <paper id="51">
       <title>Exploring Stylistic Variation with Age and Income on <fixed-case>T</fixed-case>witter</title>
       <author><first>Lucie</first><last>Flekova</last></author>
-      <author><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="daniel-preotiuc-pietro"><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <pages>313–319</pages>
       <url hash="75a9add3">P16-2051</url>
       <doi>10.18653/v1/P16-2051</doi>
@@ -3047,7 +3047,7 @@
       <title>Finding Optimists and Pessimists on <fixed-case>T</fixed-case>witter</title>
       <author><first>Xianzhi</first><last>Ruan</last></author>
       <author><first>Steven</first><last>Wilson</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>320–325</pages>
       <url hash="37c27113">P16-2052</url>
       <doi>10.18653/v1/P16-2052</doi>
@@ -3075,7 +3075,7 @@
     <paper id="55">
       <title>Text Simplification as Tree Labeling</title>
       <author><first>Joachim</first><last>Bingel</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>337–343</pages>
       <url hash="149dadb7">P16-2055</url>
       <doi>10.18653/v1/P16-2055</doi>
@@ -3085,7 +3085,7 @@
       <title>Bootstrapped Text-level Named Entity Recognition for Literature</title>
       <author><first>Julian</first><last>Brooke</last></author>
       <author><first>Adam</first><last>Hammond</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>344–350</pages>
       <url hash="65383fc5">P16-2056</url>
       <doi>10.18653/v1/P16-2056</doi>
@@ -3101,8 +3101,8 @@
     </paper>
     <paper id="58">
       <title>Character-based Neural Machine Translation</title>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
-      <author><first>José A. R.</first><last>Fonollosa</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="jose-a-r-fonollosa"><first>José A. R.</first><last>Fonollosa</last></author>
       <pages>357–361</pages>
       <url hash="c5cb0b88">P16-2058</url>
       <doi>10.18653/v1/P16-2058</doi>
@@ -3141,7 +3141,7 @@
     <paper id="62">
       <title>A Latent Concept Topic Model for Robust Topic Inference Using Word Embeddings</title>
       <author><first>Weihua</first><last>Hu</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>380–386</pages>
       <url hash="ede8d518">P16-2062</url>
       <doi>10.18653/v1/P16-2062</doi>
@@ -3162,9 +3162,9 @@
       <author><first>Michal</first><last>Lukasik</last></author>
       <author><first>P. K.</first><last>Srijith</last></author>
       <author><first>Duy</first><last>Vu</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
       <author><first>Arkaitz</first><last>Zubiaga</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>393–398</pages>
       <url hash="e1d3e3d2">P16-2064</url>
       <doi>10.18653/v1/P16-2064</doi>
@@ -3173,7 +3173,7 @@
     <paper id="65">
       <title>Hunting for Troll Comments in News Community Forums</title>
       <author><first>Todor</first><last>Mihaylov</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>399–405</pages>
       <url hash="13cb5d74">P16-2065</url>
       <doi>10.18653/v1/P16-2065</doi>
@@ -3191,8 +3191,8 @@
     </paper>
     <paper id="67">
       <title>Multilingual Part-of-Speech Tagging with Bidirectional Long Short-Term Memory Models and Auxiliary Loss</title>
-      <author><first>Barbara</first><last>Plank</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <author><first>Yoav</first><last>Goldberg</last></author>
       <pages>412–418</pages>
       <url hash="41a1d380">P16-2067</url>
@@ -3213,7 +3213,7 @@
       <title>One model, two languages: training bilingual parsers with harmonized treebanks</title>
       <author><first>David</first><last>Vilares</last></author>
       <author><first>Carlos</first><last>Gómez-Rodríguez</last></author>
-      <author><first>Miguel A.</first><last>Alonso</last></author>
+      <author id="miguel-a-alonso"><first>Miguel A.</first><last>Alonso</last></author>
       <pages>425–431</pages>
       <url hash="724efd5b">P16-2069</url>
       <doi>10.18653/v1/P16-2069</doi>
@@ -3258,7 +3258,7 @@
     <paper id="74">
       <title>Integrating Distributional Lexical Contrast into Word Embeddings for Antonym-Synonym Distinction</title>
       <author><first>Kim Anh</first><last>Nguyen</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <author><first>Ngoc Thang</first><last>Vu</last></author>
       <pages>454–459</pages>
       <url hash="35e38309">P16-2074</url>
@@ -3267,9 +3267,9 @@
     </paper>
     <paper id="75">
       <title>Machine Translation Evaluation Meets Community Question Answering</title>
-      <author><first>Francisco</first><last>Guzmán</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzmán</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>460–466</pages>
       <url hash="1b4047fc">P16-2075</url>
       <doi>10.18653/v1/P16-2075</doi>
@@ -3279,7 +3279,7 @@
       <title>Science Question Answering using Instructional Materials</title>
       <author><first>Mrinmaya</first><last>Sachan</last></author>
       <author><first>Kumar</first><last>Dubey</last></author>
-      <author><first>Eric</first><last>Xing</last></author>
+      <author id="eric-xing"><first>Eric</first><last>Xing</last></author>
       <pages>467–473</pages>
       <url hash="35f89af4">P16-2076</url>
       <doi>10.18653/v1/P16-2076</doi>
@@ -3310,7 +3310,7 @@
     <paper id="79">
       <title>Machine Comprehension using Rich Semantic Representations</title>
       <author><first>Mrinmaya</first><last>Sachan</last></author>
-      <author><first>Eric</first><last>Xing</last></author>
+      <author id="eric-xing"><first>Eric</first><last>Xing</last></author>
       <pages>486–492</pages>
       <url hash="deb24cb5">P16-2079</url>
       <doi>10.18653/v1/P16-2079</doi>
@@ -3330,7 +3330,7 @@
     <paper id="81">
       <title>Semantics-Driven Recognition of Collocations Using Word Embeddings</title>
       <author><first>Sara</first><last>Rodríguez-Fernández</last></author>
-      <author><first>Luis</first><last>Espinosa-Anke</last></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa-Anke</last></author>
       <author><first>Roberto</first><last>Carlini</last></author>
       <author><first>Leo</first><last>Wanner</last></author>
       <pages>499–505</pages>
@@ -3350,7 +3350,7 @@
     <paper id="83">
       <title>Word Embedding Calculus in Meaningful Ultradense Subspaces</title>
       <author><first>Sascha</first><last>Rothe</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>512–517</pages>
       <url hash="da7e8f5f">P16-2083</url>
       <doi>10.18653/v1/P16-2083</doi>
@@ -3379,7 +3379,7 @@
       <author><first>Frances</first><last>Yung</last></author>
       <author><first>Kevin</first><last>Duh</last></author>
       <author><first>Taku</first><last>Komura</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>531–536</pages>
       <url hash="41adc283">P16-2086</url>
       <doi>10.18653/v1/P16-2086</doi>
@@ -3418,8 +3418,8 @@
     </paper>
     <paper id="90">
       <title>Single-Model Encoder-Decoder with Explicit Morphological Representation for Reinflection</title>
-      <author><first>Katharina</first><last>Kann</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>555–560</pages>
       <url hash="e5d46901">P16-2090</url>
       <doi>10.18653/v1/P16-2090</doi>
@@ -3427,9 +3427,9 @@
     </paper>
     <paper id="91">
       <title>Joint part-of-speech and dependency projection from multiple sources</title>
-      <author><first>Anders</first><last>Johannsen</last></author>
-      <author><first>Željko</first><last>Agić</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-johannsen"><first>Anders</first><last>Johannsen</last></author>
+      <author id="zeljko-agic"><first>Željko</first><last>Agić</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>561–566</pages>
       <url hash="f24e1793">P16-2091</url>
       <doi>10.18653/v1/P16-2091</doi>
@@ -3458,7 +3458,7 @@
       <author><first>Maria</first><last>Barrett</last></author>
       <author><first>Joachim</first><last>Bingel</last></author>
       <author><first>Frank</first><last>Keller</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>579–584</pages>
       <url hash="22881a7b">P16-2094</url>
       <doi>10.18653/v1/P16-2094</doi>
@@ -3477,7 +3477,7 @@
     <paper id="96">
       <title>The Social Impact of Natural Language Processing</title>
       <author><first>Dirk</first><last>Hovy</last></author>
-      <author><first>Shannon L.</first><last>Spruit</last></author>
+      <author id="shannon-l-spruit"><first>Shannon L.</first><last>Spruit</last></author>
       <pages>591–598</pages>
       <url hash="1de5b5f3">P16-2096</url>
       <doi>10.18653/v1/P16-2096</doi>
@@ -3566,8 +3566,8 @@
     </paper>
     <paper id="6">
       <title>Significance of an Accurate Sandhi-Splitter in Shallow Parsing of <fixed-case>D</fixed-case>ravidian Languages</title>
-      <author><first>Devadath</first><last>V V</last></author>
-      <author><first>Dipti Misra</first><last>Sharma</last></author>
+      <author id="devadath-v-v"><first>Devadath</first><last>V V</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></author>
       <pages>37–42</pages>
       <url hash="dda7b6ad">P16-3006</url>
       <doi>10.18653/v1/P16-3006</doi>
@@ -3577,7 +3577,7 @@
       <title>Improving Topic Model Clustering of Newspaper Comments for Summarisation</title>
       <author><first>Clare</first><last>Llewellyn</last></author>
       <author><first>Claire</first><last>Grover</last></author>
-      <author><first>Jon</first><last>Oberlander</last></author>
+      <author id="jon-oberlander"><first>Jon</first><last>Oberlander</last></author>
       <pages>43–50</pages>
       <url hash="2a0a4a82">P16-3007</url>
       <doi>10.18653/v1/P16-3007</doi>
@@ -3594,7 +3594,7 @@
     <paper id="9">
       <title>Robust Co-occurrence Quantification for Lexical Distributional Semantics</title>
       <author><first>Dmitrijs</first><last>Milajevs</last></author>
-      <author><first>Mehrnoosh</first><last>Sadrzadeh</last></author>
+      <author id="mehrnoosh-sadrzadeh"><first>Mehrnoosh</first><last>Sadrzadeh</last></author>
       <author><first>Matthew</first><last>Purver</last></author>
       <pages>58–64</pages>
       <url hash="02c5316d">P16-3009</url>
@@ -3628,7 +3628,7 @@
     <paper id="13">
       <title>Improving Dependency Parsing Using Sentence Clause Charts</title>
       <author><first>Vincent</first><last>Kríž</last></author>
-      <author><first>Barbora</first><last>Hladká</last></author>
+      <author id="barbora-hladka"><first>Barbora</first><last>Hladká</last></author>
       <pages>86–92</pages>
       <url hash="f4db1978">P16-3013</url>
       <doi>10.18653/v1/P16-3013</doi>
@@ -3690,7 +3690,7 @@
       <title><fixed-case>QA</fixed-case>-It: Classifying Non-Referential It for Question Answer Pairs</title>
       <author><first>Timothy</first><last>Lee</last></author>
       <author><first>Alex</first><last>Lutz</last></author>
-      <author><first>Jinho D.</first><last>Choi</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
       <pages>132–137</pages>
       <url hash="d4acccfe">P16-3020</url>
       <doi>10.18653/v1/P16-3020</doi>
@@ -3699,7 +3699,7 @@
     <paper id="21">
       <title>Building a Corpus for <fixed-case>J</fixed-case>apanese Wikification with Fine-Grained Entity Classes</title>
       <author><first>Davaajav</first><last>Jargalsaikhan</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Koji</first><last>Matsuda</last></author>
       <author><first>Kentaro</first><last>Inui</last></author>
       <pages>138–144</pages>
@@ -3722,7 +3722,7 @@
     <meta>
       <booktitle>Proceedings of <fixed-case>ACL</fixed-case>-2016 System Demonstrations</booktitle>
       <url hash="802a1706">P16-4</url>
-      <editor><first>Sameer</first><last>Pradhan</last></editor>
+      <editor id="sameer-pradhan"><first>Sameer</first><last>Pradhan</last></editor>
       <editor><first>Marianna</first><last>Apidianaki</last></editor>
       <doi>10.18653/v1/P16-4</doi>
       <publisher>Association for Computational Linguistics</publisher>
@@ -3748,7 +3748,7 @@
       <title>Online Information Retrieval for Language Learning</title>
       <author><first>Maria</first><last>Chinkina</last></author>
       <author><first>Madeeswaran</first><last>Kannan</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <pages>7–12</pages>
       <url hash="574be6ec">P16-4002</url>
       <doi>10.18653/v1/P16-4002</doi>
@@ -3757,7 +3757,7 @@
     <paper id="3">
       <title>Terminology Extraction with Term Variant Detection</title>
       <author><first>Damien</first><last>Cram</last></author>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <pages>13–18</pages>
       <url hash="326860eb">P16-4003</url>
       <doi>10.18653/v1/P16-4003</doi>
@@ -3790,7 +3790,7 @@
     <paper id="6">
       <title>A Web-framework for <fixed-case>ODIN</fixed-case> Annotation</title>
       <author><first>Ryan</first><last>Georgi</last></author>
-      <author><first>Michael Wayne</first><last>Goodman</last></author>
+      <author id="michael-wayne-goodman"><first>Michael Wayne</first><last>Goodman</last></author>
       <author><first>Fei</first><last>Xia</last></author>
       <pages>31–36</pages>
       <url hash="40b41b1f">P16-4006</url>
@@ -3818,9 +3818,9 @@
     <paper id="8">
       <title><fixed-case>T</fixed-case>ransc<fixed-case>R</fixed-case>ater: a Tool for Automatic Speech Recognition Quality Estimation</title>
       <author><first>Shahab</first><last>Jalalvand</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
-      <author><first>José G.</first><last>C. de Souza</last></author>
+      <author id="jose-g-c-de-souza"><first>José G.</first><last>C. de Souza</last></author>
       <author><first>Daniele</first><last>Falavigna</last></author>
       <author><first>Mohammed R. H.</first><last>Qwaider</last></author>
       <pages>43–48</pages>
@@ -3831,9 +3831,9 @@
     <paper id="9">
       <title><fixed-case>TM</fixed-case>op: a Tool for Unsupervised Translation Memory Cleaning</title>
       <author><first>Masoud</first><last>Jalili Sabet</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
-      <author><first>José G.</first><last>C. de Souza</last></author>
+      <author id="jose-g-c-de-souza"><first>José G.</first><last>C. de Souza</last></author>
       <author><first>Marcello</first><last>Federico</last></author>
       <pages>49–54</pages>
       <url hash="ede60a34">P16-4009</url>
@@ -3861,7 +3861,7 @@
     <paper id="12">
       <title><fixed-case>O</fixed-case>pen<fixed-case>D</fixed-case>ial: A Toolkit for Developing Spoken Dialogue Systems with Probabilistic Rules</title>
       <author><first>Pierre</first><last>Lison</last></author>
-      <author><first>Casey</first><last>Kennington</last></author>
+      <author id="casey-kennington"><first>Casey</first><last>Kennington</last></author>
       <pages>67–72</pages>
       <url hash="5f5fb06c">P16-4012</url>
       <doi>10.18653/v1/P16-4012</doi>
@@ -3905,7 +3905,7 @@
       <title><fixed-case>M</fixed-case>e<fixed-case>TA</fixed-case>: A Unified Toolkit for Text Retrieval and Analysis</title>
       <author><first>Sean</first><last>Massung</last></author>
       <author><first>Chase</first><last>Geigle</last></author>
-      <author><first>ChengXiang</first><last>Zhai</last></author>
+      <author id="chengxiang-zhai"><first>ChengXiang</first><last>Zhai</last></author>
       <pages>91–96</pages>
       <url hash="6973438d">P16-4016</url>
       <doi>10.18653/v1/P16-4016</doi>
@@ -3943,7 +3943,7 @@
     </paper>
     <paper id="20">
       <title>Personalized Exercises for Preposition Learning</title>
-      <author><first>John</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
       <author><first>Mengqi</first><last>Luo</last></author>
       <pages>115–120</pages>
       <url hash="6a4f7e1e">P16-4020</url>
@@ -3953,8 +3953,8 @@
     <paper id="21">
       <title>My Science <fixed-case>T</fixed-case>utor—<fixed-case>L</fixed-case>earning Science with a Conversational Virtual Tutor</title>
       <author><first>Sameer</first><last>Pradhan</last></author>
-      <author><first>Ron</first><last>Cole</last></author>
-      <author><first>Wayne</first><last>Ward</last></author>
+      <author id="ronald-cole"><first>Ron</first><last>Cole</last></author>
+      <author id="wayne-ward"><first>Wayne</first><last>Ward</last></author>
       <pages>121–126</pages>
       <url hash="8c1ec7d1">P16-4021</url>
       <doi>10.18653/v1/P16-4021</doi>
@@ -3963,8 +3963,8 @@
     <paper id="22">
       <title>pigeo: A Python Geotagging Tool</title>
       <author><first>Afshin</first><last>Rahimi</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>127–132</pages>
       <url hash="470ddcfc">P16-4022</url>
       <doi>10.18653/v1/P16-4022</doi>
@@ -3975,7 +3975,7 @@
       <author><first>Adithya</first><last>Renduchintala</last></author>
       <author><first>Rebecca</first><last>Knowles</last></author>
       <author><first>Philipp</first><last>Koehn</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>133–138</pages>
       <url hash="44339234">P16-4023</url>
       <doi>10.18653/v1/P16-4023</doi>
@@ -3983,7 +3983,7 @@
     </paper>
     <paper id="24">
       <title><fixed-case>R</fixed-case>oleo: Visualising Thematic Fit Spaces on the Web</title>
-      <author><first>Asad</first><last>Sayeed</last></author>
+      <author id="asad-sayeed"><first>Asad</first><last>Sayeed</last></author>
       <author><first>Xudong</first><last>Hong</last></author>
       <author><first>Vera</first><last>Demberg</last></author>
       <pages>139–144</pages>
@@ -4012,7 +4012,7 @@
     <paper id="27">
       <title><fixed-case>L</fixed-case>i<fixed-case>M</fixed-case>o<fixed-case>SIN</fixed-case>e Pipeline: Multilingual <fixed-case>UIMA</fixed-case>-based <fixed-case>NLP</fixed-case> Platform</title>
       <author><first>Olga</first><last>Uryupina</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <author><first>Gianni</first><last>Barlacchi</last></author>
       <author><first>Francisco J.</first><last>Valverde Albacete</last></author>
       <author><first>Manos</first><last>Tsagkias</last></author>
@@ -4033,7 +4033,7 @@
       <author><first>Alexander</first><last>Panchenko</last></author>
       <author><first>Franziska</first><last>Lehmann</last></author>
       <author><first>Uli</first><last>Fahrer</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <author><first>Kathrin</first><last>Ballweg</last></author>
       <pages>163–168</pages>
       <url hash="7e169ce3">P16-4028</url>
@@ -4067,7 +4067,7 @@ All material associated to the tutorial will be available at <url>http://multimo
       <title><fixed-case>NLP</fixed-case> Approaches to Computational Argumentation</title>
       <author><first>Noam</first><last>Slonim</last></author>
       <author><first>Iryna</first><last>Gurevych</last></author>
-      <author><first>Chris</first><last>Reed</last></author>
+      <author id="chris-reed"><first>Chris</first><last>Reed</last></author>
       <author><first>Benno</first><last>Stein</last></author>
       <abstract>Argumentation and debating represent primary intellectual activities of the human mind. People in all societies argue and debate, not only to convince others of their own opinions but also in order to explore the differences between multiple perspectives and conceptualizations, and to learn from this exploration. The process of reaching a resolution on controversial topics typically does not follow a simple sequence of purely logical steps. Rather it involves a wide variety of complex and interwoven actions. Presumably, pros and cons are identified, considered, and weighed, via cognitive processes that often involve persuasion and emotions, which are inherently harder to formalize from a computational perspective.
 
@@ -4101,7 +4101,7 @@ For each of these, the state of the art and open challenges are presented. The t
     </paper>
     <paper id="4">
       <title>Semantic Representations of Word Senses and Concepts</title>
-      <author><first>José</first><last>Camacho-Collados</last></author>
+      <author id="jose-camacho-collados"><first>José</first><last>Camacho-Collados</last></author>
       <author><first>Ignacio</first><last>Iacobacci</last></author>
       <author><first>Roberto</first><last>Navigli</last></author>
       <author><first>Mohammad</first><last>Taher Pilehvar</last></author>
@@ -4112,9 +4112,9 @@ This tutorial will first provide a brief overview of the recent literature conce
     </paper>
     <paper id="5">
       <title>Neural Machine Translation</title>
-      <author><first>Thang</first><last>Luong</last></author>
+      <author id="minh-thang-luong"><first>Thang</first><last>Luong</last></author>
       <author><first>Kyunghyun</first><last>Cho</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <abstract>Neural Machine Translation (NMT) is a simple new architecture for getting machines to learn to translate. Despite being relatively new (Kalchbrenner and Blunsom, 2013; Cho et al., 2014; Sutskever et al., 2014), NMT has already shown promising results, achieving state-of-the-art performances for various language pairs (Luong et al, 2015a; Jean et al, 2015; Luong et al, 2015b; Sennrich et al., 2016; Luong and Manning, 2016). While many of these NMT papers were presented to the ACL community, research and practice of NMT are only at their beginning stage. This tutorial would be a great opportunity for the whole community of machine translation and natural language processing to learn more about a very promising new approach to MT. This tutorial has four parts.
 
 In the first part, we start with an overview of MT approaches, including: (a) traditional methods that have been dominant over the past twenty years and (b) recent hybrid models with the use of neural network components. From these, we motivate why an end-to-end approach like neural machine translation is needed. The second part introduces a basic instance of NMT. We start out with a discussion of recurrent neural networks, including the back-propagation-through-time algorithm and stochastic gradient descent optimizers, as these are the foundation on which NMT builds. We then describe in detail the basic sequence-to-sequence architecture of NMT (Cho et al., 2014; Sutskever et al., 2014), the maximum likelihood training approach, and a simple beam-search decoder to produce translations.
@@ -4148,7 +4148,7 @@ The goal of this tutorial is to offer an introduction to the basic concepts of g
     </paper>
     <paper id="8">
       <title><fixed-case>M</fixed-case>eta<fixed-case>N</fixed-case>et: Repository, Identification System, and Applications</title>
-      <author><first>Miriam R L</first><last>Petruck</last></author>
+      <author id="miriam-r-l-petruck"><first>Miriam R L</first><last>Petruck</last></author>
       <author><first>Ellen K</first><last>Dodge</last></author>
       <abstract>The ubiquity of metaphor in language (Lakoff and Johnson 1980) has served as impetus for cognitive linguistic approaches to the study of language, mind, and the study of mind (e.g. Thibodeau &amp; Boroditsky 2011). While native speakers use metaphor naturally and easily, the treatment and interpretation of metaphor in computational systems remains challenging because such systems have not succeeded in developing ways to recognize the semantic elements that define metaphor. This tutorial demonstrates MetaNet's frame-based semantic analyses, and their informing of MetaNet's automatic metaphor identification system. Participants will gain a complete understanding of the theoretical basis and the practical workings of MetaNet, and acquire relevant information about the Frame Semantics basis of that knowledge base and the way that FrameNet handles the widespread phenomenon of metaphor in language. The tutorial is geared to researchers and practitioners of language technology, not necessarily experts in metaphor analysis or knowledgeable about either FrameNet or MetaNet, but who are interested in natural language processing tasks that involve automatic metaphor processing, or could benefit from exposure to tools and resources that support frame-based deep semantic, analyses of language, including metaphor as a widespread phenomenon in human language.</abstract>
       <bibkey>petruck-dodge-2016-metanet</bibkey>
diff --git a/data/xml/P17.xml b/data/xml/P17.xml
index 7975ee129d..38366e7331 100644
--- a/data/xml/P17.xml
+++ b/data/xml/P17.xml
@@ -21,7 +21,7 @@
       <title>Adversarial Multi-task Learning for Text Classification</title>
       <author><first>Pengfei</first><last>Liu</last></author>
       <author><first>Xipeng</first><last>Qiu</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>1–10</pages>
       <url hash="89b5abc0">P17-1001</url>
       <doi>10.18653/v1/P17-1001</doi>
@@ -46,8 +46,8 @@
       <title>Neural Symbolic Machines: Learning Semantic Parsers on <fixed-case>F</fixed-case>reebase with Weak Supervision</title>
       <author><first>Chen</first><last>Liang</last></author>
       <author><first>Jonathan</first><last>Berant</last></author>
-      <author><first>Quoc</first><last>Le</last></author>
-      <author><first>Kenneth D.</first><last>Forbus</last></author>
+      <author id="quoc-le"><first>Quoc</first><last>Le</last></author>
+      <author id="kenneth-forbus"><first>Kenneth D.</first><last>Forbus</last></author>
       <author><first>Ni</first><last>Lao</last></author>
       <pages>23–33</pages>
       <url hash="a59d8d09">P17-1003</url>
@@ -88,7 +88,7 @@
       <author><first>Nikola</first><last>Mrkšić</last></author>
       <author><first>Roi</first><last>Reichart</last></author>
       <author><first>Diarmuid</first><last>Ó Séaghdha</last></author>
-      <author><first>Steve</first><last>Young</last></author>
+      <author id="steve-young"><first>Steve</first><last>Young</last></author>
       <author><first>Anna</first><last>Korhonen</last></author>
       <pages>56–68</pages>
       <url hash="74979b09">P17-1006</url>
@@ -139,7 +139,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <author><first>Ting</first><last>Liu</last></author>
       <author><first>Yiming</first><last>Cui</last></author>
       <author><first>Qingyu</first><last>Yin</last></author>
-      <author><first>Wei-Nan</first><last>Zhang</last></author>
+      <author id="weinan-zhang"><first>Wei-Nan</first><last>Zhang</last></author>
       <author><first>Shijin</first><last>Wang</last></author>
       <author><first>Guoping</first><last>Hu</last></author>
       <pages>102–111</pages>
@@ -196,7 +196,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <author><first>Srinivasan</first><last>Iyer</last></author>
       <author><first>Mark</first><last>Yatskar</last></author>
       <author><first>Yejin</first><last>Choi</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>146–157</pages>
       <url hash="5d4cc03a">P17-1014</url>
       <attachment type="presentation" hash="096882b2">P17-1014.Presentation.pdf</attachment>
@@ -209,8 +209,8 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <title>Program Induction by Rationale Generation: Learning to Solve and Explain Algebraic Word Problems</title>
       <author><first>Wang</first><last>Ling</last></author>
       <author><first>Dani</first><last>Yogatama</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
       <pages>158–167</pages>
       <url hash="11f7da7b">P17-1015</url>
       <doi>10.18653/v1/P17-1015</doi>
@@ -248,7 +248,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <author><first>Wenhui</first><last>Wang</last></author>
       <author><first>Nan</first><last>Yang</last></author>
       <author><first>Furu</first><last>Wei</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <author><first>Ming</first><last>Zhou</last></author>
       <pages>189–198</pages>
       <url hash="a8bacf81">P17-1018</url>
@@ -312,7 +312,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
     </paper>
     <paper id="23">
       <title>Obtaining referential word meanings from visual and distributional information: Experiments on object naming</title>
-      <author><first>Sina</first><last>Zarrieß</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
       <author><first>David</first><last>Schlangen</last></author>
       <pages>243–254</pages>
       <url hash="60eabed7">P17-1023</url>
@@ -326,10 +326,10 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <author><first>Ravi</first><last>Shekhar</last></author>
       <author><first>Sandro</first><last>Pezzelle</last></author>
       <author><first>Yauhen</first><last>Klimovich</last></author>
-      <author><first>Aurélie</first><last>Herbelot</last></author>
+      <author id="aurelie-herbelot"><first>Aurélie</first><last>Herbelot</last></author>
       <author><first>Moin</first><last>Nabi</last></author>
       <author><first>Enver</first><last>Sangineto</last></author>
-      <author><first>Raffaella</first><last>Bernardi</last></author>
+      <author id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last></author>
       <pages>255–265</pages>
       <url hash="7ef57d9e">P17-1024</url>
       <doi>10.18653/v1/P17-1024</doi>
@@ -352,7 +352,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <title><fixed-case>A</fixed-case>* <fixed-case>CCG</fixed-case> Parsing with a Supertag and Dependency Factored Model</title>
       <author><first>Masashi</first><last>Yoshikawa</last></author>
       <author><first>Hiroshi</first><last>Noji</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>277–287</pages>
       <url hash="90ff11fb">P17-1026</url>
       <doi>10.18653/v1/P17-1026</doi>
@@ -374,7 +374,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
     <paper id="28">
       <title>Aggregating and Predicting Sequence Labels from Crowd Annotations</title>
       <author><first>An Thanh</first><last>Nguyen</last></author>
-      <author><first>Byron</first><last>Wallace</last></author>
+      <author id="byron-c-wallace"><first>Byron</first><last>Wallace</last></author>
       <author><first>Junyi Jessy</first><last>Li</last></author>
       <author><first>Ani</first><last>Nenkova</last></author>
       <author><first>Matthew</first><last>Lease</last></author>
@@ -416,7 +416,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <title>Learning attention for historical text normalization by learning to pronounce</title>
       <author><first>Marcel</first><last>Bollmann</last></author>
       <author><first>Joachim</first><last>Bingel</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>332–344</pages>
       <url hash="9560b963">P17-1031</url>
       <doi>10.18653/v1/P17-1031</doi>
@@ -430,7 +430,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <author><first>Danilo</first><last>Croce</last></author>
       <author><first>Simone</first><last>Filice</last></author>
       <author><first>Giuseppe</first><last>Castellucci</last></author>
-      <author><first>Roberto</first><last>Basili</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
       <pages>345–354</pages>
       <url hash="209d2347">P17-1032</url>
       <doi>10.18653/v1/P17-1032</doi>
@@ -441,8 +441,8 @@ two word-vectors results in a vector that is only a small angle away from the ve
     <paper id="33">
       <title>Topically Driven Neural Language Model</title>
       <author><first>Jey Han</first><last>Lau</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>355–365</pages>
       <url hash="29754809">P17-1033</url>
       <doi>10.18653/v1/P17-1033</doi>
@@ -467,7 +467,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <title>Learning Cognitive Features from Gaze Data for Sentiment and Sarcasm Classification using Convolutional Neural Network</title>
       <author><first>Abhijit</first><last>Mishra</last></author>
       <author><first>Kuntal</first><last>Dey</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>377–387</pages>
       <url hash="28ae9c7c">P17-1035</url>
       <doi>10.18653/v1/P17-1035</doi>
@@ -492,7 +492,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <title>Other Topics You May Also Agree or Disagree: Modeling Inter-Topic Preferences using Tweets and Matrix Factorization</title>
       <author><first>Akira</first><last>Sasaki</last></author>
       <author><first>Kazuaki</first><last>Hanawa</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Kentaro</first><last>Inui</last></author>
       <pages>398–408</pages>
       <url hash="c1b8df36">P17-1037</url>
@@ -555,8 +555,8 @@ two word-vectors results in a vector that is only a small angle away from the ve
     <paper id="42">
       <title>Learning bilingual word embeddings with (almost) no bilingual data</title>
       <author><first>Mikel</first><last>Artetxe</last></author>
-      <author><first>Gorka</first><last>Labaka</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="gorka-labaka"><first>Gorka</first><last>Labaka</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <pages>451–462</pages>
       <url hash="f1baef47">P17-1042</url>
       <attachment type="presentation" hash="d74e2599">P17-1042.Presentation.pdf</attachment>
@@ -568,7 +568,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
     <paper id="43">
       <title><fixed-case>A</fixed-case>bstract <fixed-case>M</fixed-case>eaning <fixed-case>R</fixed-case>epresentation Parsing using <fixed-case>LSTM</fixed-case> Recurrent Neural Networks</title>
       <author><first>William</first><last>Foland</last></author>
-      <author><first>James H.</first><last>Martin</last></author>
+      <author id="james-h-martin"><first>James H.</first><last>Martin</last></author>
       <pages>463–472</pages>
       <url hash="3cc15ce5">P17-1043</url>
       <doi>10.18653/v1/P17-1043</doi>
@@ -581,7 +581,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <author><first>Luheng</first><last>He</last></author>
       <author><first>Kenton</first><last>Lee</last></author>
       <author><first>Mike</first><last>Lewis</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>473–483</pages>
       <url hash="10568783">P17-1044</url>
       <doi>10.18653/v1/P17-1044</doi>
@@ -596,7 +596,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <author><first>Xiujun</first><last>Li</last></author>
       <author><first>Jianfeng</first><last>Gao</last></author>
       <author><first>Yun-Nung</first><last>Chen</last></author>
-      <author><first>Faisal</first><last>Ahmed</last></author>
+      <author id="faisal-ahmad"><first>Faisal</first><last>Ahmed</last></author>
       <author><first>Li</first><last>Deng</last></author>
       <pages>484–495</pages>
       <url hash="fee6e09c">P17-1045</url>
@@ -622,7 +622,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
     <paper id="47">
       <title>Learning Word-Like Units from Joint Audio-Visual Analysis</title>
       <author><first>David</first><last>Harwath</last></author>
-      <author><first>James</first><last>Glass</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
       <pages>506–517</pages>
       <url hash="355aa6ac">P17-1047</url>
       <doi>10.18653/v1/P17-1047</doi>
@@ -634,7 +634,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <title>Joint <fixed-case>CTC</fixed-case>/attention decoding for end-to-end speech recognition</title>
       <author><first>Takaaki</first><last>Hori</last></author>
       <author><first>Shinji</first><last>Watanabe</last></author>
-      <author><first>John</first><last>Hershey</last></author>
+      <author id="john-r-hershey"><first>John</first><last>Hershey</last></author>
       <pages>518–529</pages>
       <url hash="07bc4085">P17-1048</url>
       <doi>10.18653/v1/P17-1048</doi>
@@ -684,7 +684,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
     </paper>
     <paper id="52">
       <title>Deep Pyramid Convolutional Neural Networks for Text Categorization</title>
-      <author><first>Rie</first><last>Johnson</last></author>
+      <author id="rie-johnson"><first>Rie</first><last>Johnson</last></author>
       <author><first>Tong</first><last>Zhang</last></author>
       <pages>562–570</pages>
       <url hash="ab99b5c7">P17-1052</url>
@@ -698,7 +698,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <author><first>Mo</first><last>Yu</last></author>
       <author><first>Wenpeng</first><last>Yin</last></author>
       <author><first>Kazi Saidul</first><last>Hasan</last></author>
-      <author><first>Cicero</first><last>dos Santos</last></author>
+      <author id="cicero-dos-santos"><first>Cicero</first><last>dos Santos</last></author>
       <author><first>Bing</first><last>Xiang</last></author>
       <author><first>Bowen</first><last>Zhou</last></author>
       <pages>571–581</pages>
@@ -752,7 +752,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <author><first>Gabriel</first><last>Doyle</last></author>
       <author><first>Amir</first><last>Goldberg</last></author>
       <author><first>Sameer</first><last>Srivastava</last></author>
-      <author><first>Michael</first><last>Frank</last></author>
+      <author id="michael-c-frank"><first>Michael</first><last>Frank</last></author>
       <pages>603–612</pages>
       <url hash="bec9e531">P17-1056</url>
       <doi>10.18653/v1/P17-1056</doi>
@@ -762,7 +762,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
     </paper>
     <paper id="57">
       <title>Representations of language in a model of visually grounded speech signal</title>
-      <author><first>Grzegorz</first><last>Chrupała</last></author>
+      <author id="grzegorz-chrupala"><first>Grzegorz</first><last>Chrupała</last></author>
       <author><first>Lieke</first><last>Gelderloos</last></author>
       <author><first>Afra</first><last>Alishahi</last></author>
       <pages>613–622</pages>
@@ -826,9 +826,9 @@ two word-vectors results in a vector that is only a small angle away from the ve
     </paper>
     <paper id="62">
       <title>Hybrid Code Networks: practical and efficient end-to-end dialog control with supervised and reinforcement learning</title>
-      <author><first>Jason D.</first><last>Williams</last></author>
-      <author><first>Kavosh</first><last>Asadi</last></author>
-      <author><first>Geoffrey</first><last>Zweig</last></author>
+      <author id="jason-d-williams"><first>Jason D.</first><last>Williams</last></author>
+      <author id="kavosh-asadi-atui"><first>Kavosh</first><last>Asadi</last></author>
+      <author id="geoffrey-zweig"><first>Geoffrey</first><last>Zweig</last></author>
       <pages>665–677</pages>
       <url hash="f8d00879">P17-1062</url>
       <doi>10.18653/v1/P17-1062</doi>
@@ -851,12 +851,12 @@ two word-vectors results in a vector that is only a small angle away from the ve
     </paper>
     <paper id="64">
       <title>Modeling Source Syntax for Neural Machine Translation</title>
-      <author><first>Junhui</first><last>Li</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="junhui-li"><first>Junhui</first><last>Li</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Zhaopeng</first><last>Tu</last></author>
       <author><first>Muhua</first><last>Zhu</last></author>
       <author><first>Min</first><last>Zhang</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>688–697</pages>
       <url hash="1e361a27">P17-1064</url>
       <doi>10.18653/v1/P17-1064</doi>
@@ -882,7 +882,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <title>Detect Rumors in Microblog Posts Using Propagation Structure via Kernel Learning</title>
       <author><first>Jing</first><last>Ma</last></author>
       <author><first>Wei</first><last>Gao</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <pages>708–717</pages>
       <url hash="074d65b5">P17-1066</url>
       <attachment type="presentation" hash="b86700db">P17-1066.Presentation.pdf</attachment>
@@ -894,7 +894,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
     <paper id="67">
       <title><fixed-case>E</fixed-case>mo<fixed-case>N</fixed-case>et: Fine-Grained Emotion Detection with Gated Recurrent Neural Networks</title>
       <author><first>Muhammad</first><last>Abdul-Mageed</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <pages>718–728</pages>
       <url hash="fac7f29a">P17-1067</url>
       <doi>10.18653/v1/P17-1067</doi>
@@ -904,10 +904,10 @@ two word-vectors results in a vector that is only a small angle away from the ve
     </paper>
     <paper id="68">
       <title>Beyond Binary Labels: Political Ideology Prediction of <fixed-case>T</fixed-case>witter Users</title>
-      <author><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
+      <author id="daniel-preotiuc-pietro"><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
       <author><first>Ye</first><last>Liu</last></author>
       <author><first>Daniel</first><last>Hopkins</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <pages>729–740</pages>
       <url hash="41a9cfb6">P17-1068</url>
       <attachment type="presentation" hash="c22f5341">P17-1068.Presentation.pdf</attachment>
@@ -918,7 +918,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
     </paper>
     <paper id="69">
       <title>Leveraging Behavioral and Social Information for Weakly Supervised Collective Classification of Political Discourse on <fixed-case>T</fixed-case>witter</title>
-      <author><first>Kristen</first><last>Johnson</last></author>
+      <author id="kristen-johnson"><first>Kristen</first><last>Johnson</last></author>
       <author><first>Di</first><last>Jin</last></author>
       <author><first>Dan</first><last>Goldwasser</last></author>
       <pages>741–752</pages>
@@ -959,7 +959,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <title>Friendships, Rivalries, and Trysts: Characterizing Relations between Ideas in Texts</title>
       <author><first>Chenhao</first><last>Tan</last></author>
       <author><first>Dallas</first><last>Card</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>773–783</pages>
       <url hash="36f1736b">P17-1072</url>
       <doi>10.18653/v1/P17-1072</doi>
@@ -984,7 +984,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <title>Automatic Annotation and Evaluation of Error Types for Grammatical Error Correction</title>
       <author><first>Christopher</first><last>Bryant</last></author>
       <author><first>Mariano</first><last>Felice</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <pages>793–805</pages>
       <url hash="1caa6c12">P17-1074</url>
       <doi>10.18653/v1/P17-1074</doi>
@@ -997,7 +997,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <author><first>Saku</first><last>Sugawara</last></author>
       <author><first>Yusuke</first><last>Kido</last></author>
       <author><first>Hikaru</first><last>Yokono</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>806–817</pages>
       <url hash="d611f69a">P17-1075</url>
       <doi>10.18653/v1/P17-1075</doi>
@@ -1019,7 +1019,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
     </paper>
     <paper id="77">
       <title>Semantic Dependency Parsing via Book Embedding</title>
-      <author><first>Weiwei</first><last>Sun</last></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last></author>
       <author><first>Junjie</first><last>Cao</last></author>
       <author><first>Xiaojun</first><last>Wan</last></author>
       <pages>828–838</pages>
@@ -1063,7 +1063,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <author><first>Nadir</first><last>Durrani</last></author>
       <author><first>Fahim</first><last>Dalvi</last></author>
       <author><first>Hassan</first><last>Sajjad</last></author>
-      <author><first>James</first><last>Glass</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
       <pages>861–872</pages>
       <url hash="96aab525">P17-1080</url>
       <doi>10.18653/v1/P17-1080</doi>
@@ -1090,7 +1090,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <title>A Multidimensional Lexicon for Interpersonal Stancetaking</title>
       <author><first>Umashanthi</first><last>Pavalanathan</last></author>
       <author><first>Jim</first><last>Fitzpatrick</last></author>
-      <author><first>Scott</first><last>Kiesling</last></author>
+      <author id="scott-f-kiesling"><first>Scott</first><last>Kiesling</last></author>
       <author><first>Jacob</first><last>Eisenstein</last></author>
       <pages>884–895</pages>
       <url hash="eaf0ebc3">P17-1082</url>
@@ -1115,7 +1115,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
     <paper id="84">
       <title>Apples to Apples: Learning Semantics of Common Entities Through a Novel Comprehension Task</title>
       <author><first>Omid</first><last>Bakhshandeh</last></author>
-      <author><first>James</first><last>Allen</last></author>
+      <author id="james-allen"><first>James</first><last>Allen</last></author>
       <pages>906–916</pages>
       <url hash="d75950c6">P17-1084</url>
       <doi>10.18653/v1/P17-1084</doi>
@@ -1126,7 +1126,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
     <paper id="85">
       <title>Going out on a limb: Joint Extraction of Entity Mentions and Relations without Dependency Trees</title>
       <author><first>Arzoo</first><last>Katiyar</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>917–928</pages>
       <url hash="5b89acdf">P17-1085</url>
       <doi>10.18653/v1/P17-1085</doi>
@@ -1136,10 +1136,10 @@ two word-vectors results in a vector that is only a small angle away from the ve
     </paper>
     <paper id="86">
       <title>Naturalizing a Programming Language via Interactive Learning</title>
-      <author><first>Sida I.</first><last>Wang</last></author>
+      <author id="sida-i-wang"><first>Sida I.</first><last>Wang</last></author>
       <author><first>Samuel</first><last>Ginn</last></author>
       <author><first>Percy</first><last>Liang</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>929–938</pages>
       <url hash="ea8d56b7">P17-1086</url>
       <doi>10.18653/v1/P17-1086</doi>
@@ -1151,8 +1151,8 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <title>Semantic Word Clusters Using Signed Spectral Clustering</title>
       <author><first>João</first><last>Sedoc</last></author>
       <author><first>Jean</first><last>Gallier</last></author>
-      <author><first>Dean</first><last>Foster</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="dean-foster"><first>Dean</first><last>Foster</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <pages>939–949</pages>
       <url hash="7903b034">P17-1087</url>
       <doi>10.18653/v1/P17-1087</doi>
@@ -1174,7 +1174,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <author><first>Qizhe</first><last>Xie</last></author>
       <author><first>Xuezhe</first><last>Ma</last></author>
       <author><first>Zihang</first><last>Dai</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>950–962</pages>
       <url hash="0388693e">P17-1088</url>
       <doi>10.18653/v1/P17-1088</doi>
@@ -1187,7 +1187,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <author><first>Ioannis</first><last>Konstas</last></author>
       <author><first>Alvin</first><last>Cheung</last></author>
       <author><first>Jayant</first><last>Krishnamurthy</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>963–973</pages>
       <url hash="8792abb9">P17-1089</url>
       <doi>10.18653/v1/P17-1089</doi>
@@ -1212,7 +1212,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <title>Argument Mining with Structured <fixed-case>SVM</fixed-case>s and <fixed-case>RNN</fixed-case>s</title>
       <author><first>Vlad</first><last>Niculae</last></author>
       <author><first>Joonsuk</first><last>Park</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>985–995</pages>
       <url hash="b8002b5f">P17-1091</url>
       <doi>10.18653/v1/P17-1091</doi>
@@ -1223,7 +1223,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
     <paper id="92">
       <title>Neural Discourse Structure for Text Categorization</title>
       <author><first>Yangfeng</first><last>Ji</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>996–1005</pages>
       <url hash="02257434">P17-1092</url>
       <doi>10.18653/v1/P17-1092</doi>
@@ -1238,7 +1238,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <author><first>Zhisong</first><last>Zhang</last></author>
       <author><first>Hai</first><last>Zhao</last></author>
       <author><first>Zhiting</first><last>Hu</last></author>
-      <author><first>Eric</first><last>Xing</last></author>
+      <author id="eric-xing"><first>Eric</first><last>Xing</last></author>
       <pages>1006–1017</pages>
       <url hash="088377c8">P17-1093</url>
       <doi>10.18653/v1/P17-1093</doi>
@@ -1262,7 +1262,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <author><first>Nicholas</first><last>Andrews</last></author>
       <author><first>Mark</first><last>Dredze</last></author>
       <author><first>Benjamin</first><last>Van Durme</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>1029–1039</pages>
       <url hash="bc7ccec2">P17-1095</url>
       <doi>10.18653/v1/P17-1095</doi>
@@ -1276,7 +1276,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <author><first>Zhilin</first><last>Yang</last></author>
       <author><first>Junjie</first><last>Hu</last></author>
       <author><first>Ruslan</first><last>Salakhutdinov</last></author>
-      <author><first>William</first><last>Cohen</last></author>
+      <author id="william-cohen"><first>William</first><last>Cohen</last></author>
       <pages>1040–1050</pages>
       <url hash="2cd8bebc">P17-1096</url>
       <doi>10.18653/v1/P17-1096</doi>
@@ -1316,9 +1316,9 @@ two word-vectors results in a vector that is only a small angle away from the ve
     <paper id="98">
       <title>Diversity driven attention model for query-based abstractive summarization</title>
       <author><first>Preksha</first><last>Nema</last></author>
-      <author><first>Mitesh M.</first><last>Khapra</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh M.</first><last>Khapra</last></author>
       <author><first>Anirban</first><last>Laha</last></author>
-      <author><first>Balaraman</first><last>Ravindran</last></author>
+      <author id="balaraman-ravindran"><first>Balaraman</first><last>Ravindran</last></author>
       <pages>1063–1072</pages>
       <url hash="69ba053f">P17-1098</url>
       <doi>10.18653/v1/P17-1098</doi>
@@ -1329,8 +1329,8 @@ two word-vectors results in a vector that is only a small angle away from the ve
     <paper id="99">
       <title>Get To The Point: Summarization with Pointer-Generator Networks</title>
       <author><first>Abigail</first><last>See</last></author>
-      <author><first>Peter J.</first><last>Liu</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="peter-j-liu"><first>Peter J.</first><last>Liu</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>1073–1083</pages>
       <url hash="0bbc6a0e">P17-1099</url>
       <doi>10.18653/v1/P17-1099</doi>
@@ -1378,7 +1378,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <title>Towards an Automatic <fixed-case>T</fixed-case>uring Test: Learning to Evaluate Dialogue Responses</title>
       <author><first>Ryan</first><last>Lowe</last></author>
       <author><first>Michael</first><last>Noseworthy</last></author>
-      <author><first>Iulian Vlad</first><last>Serban</last></author>
+      <author id="iulian-vlad-serban"><first>Iulian Vlad</first><last>Serban</last></author>
       <author><first>Nicolas</first><last>Angelard-Gontier</last></author>
       <author><first>Yoshua</first><last>Bengio</last></author>
       <author><first>Joelle</first><last>Pineau</last></author>
@@ -1455,7 +1455,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
     <paper id="109">
       <title>Probabilistic Typology: Deep Generative Models of Vowel Inventories</title>
       <author><first>Ryan</first><last>Cotterell</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>1182–1192</pages>
       <url hash="81ab77b4">P17-1109</url>
       <doi>10.18653/v1/P17-1109</doi>
@@ -1468,7 +1468,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <author><first>Xinchi</first><last>Chen</last></author>
       <author><first>Zhan</first><last>Shi</last></author>
       <author><first>Xipeng</first><last>Qiu</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>1193–1203</pages>
       <url hash="760759b1">P17-1110</url>
       <doi>10.18653/v1/P17-1110</doi>
@@ -1491,7 +1491,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
     <paper id="112">
       <title>Robust Incremental Neural Semantic Graph Parsing</title>
       <author><first>Jan</first><last>Buys</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
       <pages>1215–1226</pages>
       <url hash="8f47172e">P17-1112</url>
       <doi>10.18653/v1/P17-1112</doi>
@@ -1515,7 +1515,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
     </paper>
     <paper id="114">
       <title>A Local Detection Approach for Named Entity Recognition and Mention Detection</title>
-      <author><first>Mingbin</first><last>Xu</last></author>
+      <author id="mingbin-xu"><first>Mingbin</first><last>Xu</last></author>
       <author><first>Hui</first><last>Jiang</last></author>
       <author><first>Sedtawut</first><last>Watcharawittayakul</last></author>
       <pages>1237–1247</pages>
@@ -1569,10 +1569,10 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <title>Enriching Complex Networks with Word Embeddings for Detecting Mild Cognitive Impairment from Speech Transcripts</title>
       <author><first>Leandro</first><last>Santos</last></author>
       <author><first>Edilson Anselmo</first><last>Corrêa Júnior</last></author>
-      <author><first>Osvaldo</first><last>Oliveira Jr</last></author>
-      <author><first>Diego</first><last>Amancio</last></author>
+      <author id="osvaldo-novais-oliveira-jr"><first>Osvaldo</first><last>Oliveira Jr</last></author>
+      <author id="diego-raphael-amancio"><first>Diego</first><last>Amancio</last></author>
       <author><first>Letícia</first><last>Mansur</last></author>
-      <author><first>Sandra</first><last>Aluísio</last></author>
+      <author id="sandra-aluisio"><first>Sandra</first><last>Aluísio</last></author>
       <pages>1284–1296</pages>
       <url hash="909e081b">P17-1118</url>
       <doi>10.18653/v1/P17-1118</doi>
@@ -1603,7 +1603,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
     <paper id="121">
       <title>A Neural Local Coherence Model</title>
       <author><first>Dat</first><last>Tien Nguyen</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <pages>1320–1330</pages>
       <url hash="44db4013">P17-1121</url>
       <doi>10.18653/v1/P17-1121</doi>
@@ -1614,7 +1614,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
     <paper id="122">
       <title>Data-Driven Broad-Coverage Grammars for Opinionated Natural Language Generation (<fixed-case>ONLG</fixed-case>)</title>
       <author><first>Tomer</first><last>Cagan</last></author>
-      <author><first>Stefan L.</first><last>Frank</last></author>
+      <author id="stefan-l-frank"><first>Stefan L.</first><last>Frank</last></author>
       <author><first>Reut</first><last>Tsarfaty</last></author>
       <pages>1331–1341</pages>
       <url hash="e822e049">P17-1122</url>
@@ -1627,7 +1627,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <title>Learning to Ask: Neural Question Generation for Reading Comprehension</title>
       <author><first>Xinya</first><last>Du</last></author>
       <author><first>Junru</first><last>Shao</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>1342–1352</pages>
       <url hash="18bc4a39">P17-1123</url>
       <doi>10.18653/v1/P17-1123</doi>
@@ -1708,7 +1708,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
     <paper id="129">
       <title>A Constituent-Centric Neural Architecture for Reading Comprehension</title>
       <author><first>Pengtao</first><last>Xie</last></author>
-      <author><first>Eric</first><last>Xing</last></author>
+      <author id="eric-xing"><first>Eric</first><last>Xing</last></author>
       <pages>1405–1414</pages>
       <url hash="f1a32efb">P17-1129</url>
       <doi>10.18653/v1/P17-1129</doi>
@@ -1728,7 +1728,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
     <paper id="131">
       <title>Understanding and Predicting Empathic Behavior in Counseling Therapy</title>
       <author><first>Verónica</first><last>Pérez-Rosas</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <author><first>Kenneth</first><last>Resnicow</last></author>
       <author><first>Satinder</first><last>Singh</last></author>
       <author><first>Lawrence</first><last>An</last></author>
@@ -1741,7 +1741,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
     <paper id="132">
       <title>Leveraging Knowledge Bases in <fixed-case>LSTM</fixed-case>s for Improving Machine Reading</title>
       <author><first>Bishan</first><last>Yang</last></author>
-      <author><first>Tom</first><last>Mitchell</last></author>
+      <author id="tom-mitchell"><first>Tom</first><last>Mitchell</last></author>
       <pages>1436–1446</pages>
       <url hash="517d4efe">P17-1132</url>
       <doi>10.18653/v1/P17-1132</doi>
@@ -1762,7 +1762,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
     </paper>
     <paper id="134">
       <title>Unsupervised Text Segmentation Based on Native Language Characteristics</title>
-      <author><first>Shervin</first><last>Malmasi</last></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></author>
       <author><first>Mark</first><last>Dras</last></author>
       <author><first>Mark</first><last>Johnson</last></author>
       <author><first>Lan</first><last>Du</last></author>
@@ -1776,8 +1776,8 @@ two word-vectors results in a vector that is only a small angle away from the ve
     <paper id="135">
       <title>Weakly Supervised Cross-Lingual Named Entity Recognition via Effective Annotation and Representation Projection</title>
       <author><first>Jian</first><last>Ni</last></author>
-      <author><first>Georgiana</first><last>Dinu</last></author>
-      <author><first>Radu</first><last>Florian</last></author>
+      <author id="georgiana-dinu"><first>Georgiana</first><last>Dinu</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
       <pages>1470–1480</pages>
       <url hash="67f9e3f3">P17-1135</url>
       <doi>10.18653/v1/P17-1135</doi>
@@ -1787,7 +1787,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
     <paper id="136">
       <title>Context Sensitive Lemmatization Using Two Successive Bidirectional Gated Recurrent Networks</title>
       <author><first>Abhisek</first><last>Chakrabarty</last></author>
-      <author><first>Onkar Arun</first><last>Pandit</last></author>
+      <author id="onkar-arun-pandit"><first>Onkar Arun</first><last>Pandit</last></author>
       <author><first>Utpal</first><last>Garain</last></author>
       <pages>1481–1491</pages>
       <url hash="26e1ae08">P17-1136</url>
@@ -1800,8 +1800,8 @@ two word-vectors results in a vector that is only a small angle away from the ve
     <paper id="137">
       <title>Learning to Create and Reuse Words in Open-Vocabulary Neural Language Modeling</title>
       <author><first>Kazuya</first><last>Kawakami</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
       <pages>1492–1502</pages>
       <url hash="0af3b7d4">P17-1137</url>
       <doi>10.18653/v1/P17-1137</doi>
@@ -1812,7 +1812,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
     <paper id="138">
       <title>Bandit Structured Prediction for Neural Sequence-to-Sequence Learning</title>
       <author><first>Julia</first><last>Kreutzer</last></author>
-      <author><first>Artem</first><last>Sokolov</last></author>
+      <author id="artem-sokolov"><first>Artem</first><last>Sokolov</last></author>
       <author><first>Stefan</first><last>Riezler</last></author>
       <pages>1503–1513</pages>
       <url hash="c5027f5b">P17-1138</url>
@@ -1848,7 +1848,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
     </paper>
     <paper id="141">
       <title>Lexically Constrained Decoding for Sequence Generation Using Grid Beam Search</title>
-      <author><first>Chris</first><last>Hokamp</last></author>
+      <author id="chris-hokamp"><first>Chris</first><last>Hokamp</last></author>
       <author><first>Qun</first><last>Liu</last></author>
       <pages>1535–1546</pages>
       <url hash="e0f68e13">P17-1141</url>
@@ -1887,7 +1887,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <author><first>Fan</first><last>Zhang</last></author>
       <author><first>Homa B.</first><last>Hashemi</last></author>
       <author><first>Rebecca</first><last>Hwa</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <pages>1568–1578</pages>
       <url hash="852330cc">P17-1144</url>
       <doi>10.18653/v1/P17-1144</doi>
@@ -1898,7 +1898,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <title><fixed-case>W</fixed-case>atset: Automatic Induction of Synsets from a Graph of Synonyms</title>
       <author><first>Dmitry</first><last>Ustalov</last></author>
       <author><first>Alexander</first><last>Panchenko</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>1579–1590</pages>
       <url hash="13f99755">P17-1145</url>
       <doi>10.18653/v1/P17-1145</doi>
@@ -1909,7 +1909,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <title>Neural Modeling of Multi-Predicate Interactions for <fixed-case>J</fixed-case>apanese Predicate Argument Structure Analysis</title>
       <author><first>Hiroki</first><last>Ouchi</last></author>
       <author><first>Hiroyuki</first><last>Shindo</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>1591–1600</pages>
       <url hash="8d32a8e8">P17-1146</url>
       <doi>10.18653/v1/P17-1146</doi>
@@ -1920,8 +1920,8 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <title><fixed-case>T</fixed-case>rivia<fixed-case>QA</fixed-case>: A Large Scale Distantly Supervised Challenge Dataset for Reading Comprehension</title>
       <author><first>Mandar</first><last>Joshi</last></author>
       <author><first>Eunsol</first><last>Choi</last></author>
-      <author><first>Daniel</first><last>Weld</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="daniel-s-weld"><first>Daniel</first><last>Weld</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>1601–1611</pages>
       <url hash="6d6408f8">P17-1147</url>
       <doi>10.18653/v1/P17-1147</doi>
@@ -1955,7 +1955,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
     <paper id="150">
       <title>Interactive Learning of Grounded Verb Semantics towards Human-Robot Communication</title>
       <author><first>Lanbo</first><last>She</last></author>
-      <author><first>Joyce</first><last>Chai</last></author>
+      <author id="joyce-chai"><first>Joyce</first><last>Chai</last></author>
       <pages>1634–1644</pages>
       <url hash="b7bbb9bc">P17-1150</url>
       <doi>10.18653/v1/P17-1150</doi>
@@ -1965,7 +1965,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
     <paper id="151">
       <title>Multimodal Word Distributions</title>
       <author><first>Ben</first><last>Athiwaratkun</last></author>
-      <author><first>Andrew</first><last>Wilson</last></author>
+      <author id="andrew-wilson"><first>Andrew</first><last>Wilson</last></author>
       <pages>1645–1656</pages>
       <url hash="477b3417">P17-1151</url>
       <doi>10.18653/v1/P17-1151</doi>
@@ -1979,7 +1979,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <author><first>Zhen-Hua</first><last>Ling</last></author>
       <author><first>Si</first><last>Wei</last></author>
       <author><first>Hui</first><last>Jiang</last></author>
-      <author><first>Diana</first><last>Inkpen</last></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last></author>
       <pages>1657–1668</pages>
       <url hash="9f7c89ae">P17-1152</url>
       <doi>10.18653/v1/P17-1152</doi>
@@ -1993,7 +1993,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <author><first>Victor R.</first><last>Martínez</last></author>
       <author><first>Nikolaos</first><last>Malandrakis</last></author>
       <author><first>Karan</first><last>Singla</last></author>
-      <author><first>Shrikanth</first><last>Narayanan</last></author>
+      <author id="shrikanth-narayanan"><first>Shrikanth</first><last>Narayanan</last></author>
       <pages>1669–1678</pages>
       <url hash="e7b35cd9">P17-1153</url>
       <doi>10.18653/v1/P17-1153</doi>
@@ -2089,7 +2089,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
     </paper>
     <paper id="161">
       <title>Semi-supervised sequence tagging with bidirectional language models</title>
-      <author><first>Matthew E.</first><last>Peters</last></author>
+      <author id="matthew-e-peters"><first>Matthew E.</first><last>Peters</last></author>
       <author><first>Waleed</first><last>Ammar</last></author>
       <author><first>Chandra</first><last>Bhagavatula</last></author>
       <author><first>Russell</first><last>Power</last></author>
@@ -2127,7 +2127,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <author><first>Diarmuid</first><last>Ó Séaghdha</last></author>
       <author><first>Tsung-Hsien</first><last>Wen</last></author>
       <author><first>Blaise</first><last>Thomson</last></author>
-      <author><first>Steve</first><last>Young</last></author>
+      <author id="steve-young"><first>Steve</first><last>Young</last></author>
       <pages>1777–1788</pages>
       <url hash="5529e315">P17-1163</url>
       <doi>10.18653/v1/P17-1163</doi>
@@ -2150,9 +2150,9 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <title>Topical Coherence in <fixed-case>LDA</fixed-case>-based Models through Induced Segmentation</title>
       <author><first>Hesam</first><last>Amoualian</last></author>
       <author><first>Wei</first><last>Lu</last></author>
-      <author><first>Eric</first><last>Gaussier</last></author>
+      <author id="eric-gaussier"><first>Eric</first><last>Gaussier</last></author>
       <author><first>Georgios</first><last>Balikas</last></author>
-      <author><first>Massih R.</first><last>Amini</last></author>
+      <author id="massih-r-amini"><first>Massih R.</first><last>Amini</last></author>
       <author><first>Marianne</first><last>Clausel</last></author>
       <pages>1799–1809</pages>
       <url hash="a2cf3b6f">P17-1165</url>
@@ -2163,7 +2163,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
     <paper id="166">
       <title>Jointly Extracting Relations with Class Ties via Effective Deep Ranking</title>
       <author><first>Hai</first><last>Ye</last></author>
-      <author><first>Wenhan</first><last>Chao</last></author>
+      <author id="wenhan-chao"><first>Wenhan</first><last>Chao</last></author>
       <author><first>Zhunchen</first><last>Luo</last></author>
       <author><first>Zhoujun</first><last>Li</last></author>
       <pages>1810–1820</pages>
@@ -2175,7 +2175,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
     <paper id="167">
       <title>Search-based Neural Structured Learning for Sequential Question Answering</title>
       <author><first>Mohit</first><last>Iyyer</last></author>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <author><first>Ming-Wei</first><last>Chang</last></author>
       <pages>1821–1831</pages>
       <url hash="3656b490">P17-1167</url>
@@ -2189,7 +2189,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <author><first>Bhuwan</first><last>Dhingra</last></author>
       <author><first>Hanxiao</first><last>Liu</last></author>
       <author><first>Zhilin</first><last>Yang</last></author>
-      <author><first>William</first><last>Cohen</last></author>
+      <author id="william-cohen"><first>William</first><last>Cohen</last></author>
       <author><first>Ruslan</first><last>Salakhutdinov</last></author>
       <pages>1832–1846</pages>
       <url hash="6750ad55">P17-1168</url>
@@ -2215,7 +2215,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
     <paper id="170">
       <title>Towards a Seamless Integration of Word Senses into Downstream <fixed-case>NLP</fixed-case> Applications</title>
       <author><first>Mohammad Taher</first><last>Pilehvar</last></author>
-      <author><first>Jose</first><last>Camacho-Collados</last></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></author>
       <author><first>Roberto</first><last>Navigli</last></author>
       <author><first>Nigel</first><last>Collier</last></author>
       <pages>1857–1869</pages>
@@ -2240,7 +2240,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <title>Learning to Skim Text</title>
       <author><first>Adams Wei</first><last>Yu</last></author>
       <author><first>Hongrae</first><last>Lee</last></author>
-      <author><first>Quoc</first><last>Le</last></author>
+      <author id="quoc-le"><first>Quoc</first><last>Le</last></author>
       <pages>1880–1890</pages>
       <url hash="452943d8">P17-1172</url>
       <doi>10.18653/v1/P17-1172</doi>
@@ -2288,7 +2288,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <author><first>Yun</first><last>Chen</last></author>
       <author id="yang-liu-ict"><first>Yang</first><last>Liu</last></author>
       <author><first>Yong</first><last>Cheng</last></author>
-      <author><first>Victor O.K.</first><last>Li</last></author>
+      <author id="victor-o-k-li"><first>Victor O.K.</first><last>Li</last></author>
       <pages>1925–1935</pages>
       <url hash="96258258">P17-1176</url>
       <doi>10.18653/v1/P17-1176</doi>
@@ -2300,7 +2300,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <author><first>Huadong</first><last>Chen</last></author>
       <author><first>Shujian</first><last>Huang</last></author>
       <author><first>David</first><last>Chiang</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
       <pages>1936–1945</pages>
       <url hash="cde55045">P17-1177</url>
       <doi>10.18653/v1/P17-1177</doi>
@@ -2362,9 +2362,9 @@ two word-vectors results in a vector that is only a small angle away from the ve
     </paper>
     <paper id="182">
       <title>One-Shot Neural Cross-Lingual Transfer for Paradigm Completion</title>
-      <author><first>Katharina</first><last>Kann</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
       <author><first>Ryan</first><last>Cotterell</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>1993–2003</pages>
       <url hash="601b1049">P17-1182</url>
       <doi>10.18653/v1/P17-1182</doi>
@@ -2411,7 +2411,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <title>Deep Multitask Learning for Semantic Dependency Parsing</title>
       <author><first>Hao</first><last>Peng</last></author>
       <author><first>Sam</first><last>Thomson</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>2037–2048</pages>
       <url hash="b72cb961">P17-1186</url>
       <doi>10.18653/v1/P17-1186</doi>
@@ -2448,7 +2448,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <title>A Progressive Learning Approach to <fixed-case>C</fixed-case>hinese <fixed-case>SRL</fixed-case> Using Heterogeneous Data</title>
       <author><first>Qiaolin</first><last>Xia</last></author>
       <author><first>Lei</first><last>Sha</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <author><first>Zhifang</first><last>Sui</last></author>
       <pages>2069–2077</pages>
       <url hash="19e59a9d">P17-1189</url>
@@ -2470,8 +2470,8 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <title>Ontology-Aware Token Embeddings for Prepositional Phrase Attachment</title>
       <author><first>Pradeep</first><last>Dasigi</last></author>
       <author><first>Waleed</first><last>Ammar</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>2089–2098</pages>
       <url hash="e92638bc">P17-1191</url>
       <doi>10.18653/v1/P17-1191</doi>
@@ -2481,7 +2481,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
     <paper id="192">
       <title>Identifying 1950s <fixed-case>A</fixed-case>merican Jazz Musicians: Fine-Grained <fixed-case>I</fixed-case>s<fixed-case>A</fixed-case> Extraction via Modifier Composition</title>
       <author><first>Ellie</first><last>Pavlick</last></author>
-      <author><first>Marius</first><last>Paşca</last></author>
+      <author id="marius-pasca"><first>Marius</first><last>Paşca</last></author>
       <pages>2099–2109</pages>
       <url hash="7767e31c">P17-1192</url>
       <doi>10.18653/v1/P17-1192</doi>
@@ -2494,7 +2494,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <title>Parsing to 1-Endpoint-Crossing, Pagenumber-2 Graphs</title>
       <author><first>Junjie</first><last>Cao</last></author>
       <author><first>Sheng</first><last>Huang</last></author>
-      <author><first>Weiwei</first><last>Sun</last></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last></author>
       <author><first>Xiaojun</first><last>Wan</last></author>
       <pages>2110–2120</pages>
       <url hash="2667fea7">P17-1193</url>
@@ -2520,7 +2520,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <author><first>Takumi</first><last>Ito</last></author>
       <author><first>Hidenao</first><last>Iwane</last></author>
       <author><first>Hirokazu</first><last>Anai</last></author>
-      <author><first>Noriko H.</first><last>Arai</last></author>
+      <author id="noriko-h-arai"><first>Noriko H.</first><last>Arai</last></author>
       <pages>2131–2141</pages>
       <url hash="2fb9cb70">P17-1195</url>
       <doi>10.18653/v1/P17-1195</doi>
@@ -2572,7 +2572,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
     </paper>
     <paper id="3">
       <title>Lexical Features in Coreference Resolution: To be Used With Caution</title>
-      <author><first>Nafise Sadat</first><last>Moosavi</last></author>
+      <author id="nafise-sadat-moosavi"><first>Nafise Sadat</first><last>Moosavi</last></author>
       <author><first>Michael</first><last>Strube</last></author>
       <pages>14–19</pages>
       <url hash="0286fd18">P17-2003</url>
@@ -2584,7 +2584,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
     <paper id="4">
       <title>Alternative Objective Functions for Training <fixed-case>MT</fixed-case> Evaluation Metrics</title>
       <author><first>Miloš</first><last>Stanojević</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <pages>20–25</pages>
       <url hash="0e955d73">P17-2004</url>
       <doi>10.18653/v1/P17-2004</doi>
@@ -2606,7 +2606,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
     </paper>
     <paper id="6">
       <title>Vector space models for evaluating semantic fluency in autism</title>
-      <author><first>Emily</first><last>Prud’hommeaux</last></author>
+      <author id="emily-prudhommeaux"><first>Emily</first><last>Prud’hommeaux</last></author>
       <author><first>Jan</first><last>van Santen</last></author>
       <author><first>Douglas</first><last>Gliner</last></author>
       <pages>32–37</pages>
@@ -2643,7 +2643,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <title>Incorporating Dialectal Variability for Socially Equitable Language Identification</title>
       <author><first>David</first><last>Jurgens</last></author>
       <author><first>Yulia</first><last>Tsvetkov</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <pages>51–57</pages>
       <url hash="ae1bc7cd">P17-2009</url>
       <doi>10.18653/v1/P17-2009</doi>
@@ -2656,7 +2656,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <title>Evaluating Compound Splitters Extrinsically with Textual Entailment</title>
       <author><first>Glorianna</first><last>Jagfeld</last></author>
       <author><first>Patrick</first><last>Ziering</last></author>
-      <author><first>Lonneke</first><last>van der Plas</last></author>
+      <author id="lonneke-van-der-plas"><first>Lonneke</first><last>van der Plas</last></author>
       <pages>58–63</pages>
       <url hash="9ff53a4e">P17-2010</url>
       <doi>10.18653/v1/P17-2010</doi>
@@ -2690,11 +2690,11 @@ two word-vectors results in a vector that is only a small angle away from the ve
     <paper id="13">
       <title>On the Distribution of Lexical Features at Multiple Levels of Analysis</title>
       <author><first>Fatemeh</first><last>Almodaresi</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <author><first>Vivek</first><last>Kulkarni</last></author>
       <author><first>Mohsen</first><last>Zakeri</last></author>
       <author><first>Salvatore</first><last>Giorgi</last></author>
-      <author><first>H. Andrew</first><last>Schwartz</last></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last></author>
       <pages>79–84</pages>
       <url hash="b7181c98">P17-2013</url>
       <doi>10.18653/v1/P17-2013</doi>
@@ -2705,9 +2705,9 @@ two word-vectors results in a vector that is only a small angle away from the ve
     <paper id="14">
       <title>Exploring Neural Text Simplification Models</title>
       <author><first>Sergiu</first><last>Nisioi</last></author>
-      <author><first>Sanja</first><last>Štajner</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="sanja-stajner"><first>Sanja</first><last>Štajner</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <pages>85–91</pages>
       <url hash="c945a88e">P17-2014</url>
       <doi>10.18653/v1/P17-2014</doi>
@@ -2728,9 +2728,9 @@ two word-vectors results in a vector that is only a small angle away from the ve
     </paper>
     <paper id="16">
       <title>Sentence Alignment Methods for Improving Text Simplification Systems</title>
-      <author><first>Sanja</first><last>Štajner</last></author>
+      <author id="sanja-stajner"><first>Sanja</first><last>Štajner</last></author>
       <author><first>Marc</first><last>Franco-Salvador</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <author><first>Paolo</first><last>Rosso</last></author>
       <author><first>Heiner</first><last>Stuckenschmidt</last></author>
       <pages>97–102</pages>
@@ -2744,7 +2744,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <title>Understanding Task Design Trade-offs in Crowdsourced Paraphrase Collection</title>
       <author><first>Youxuan</first><last>Jiang</last></author>
       <author><first>Jonathan K.</first><last>Kummerfeld</last></author>
-      <author><first>Walter S.</first><last>Lasecki</last></author>
+      <author id="walter-lasecki"><first>Walter S.</first><last>Lasecki</last></author>
       <pages>103–109</pages>
       <url hash="5ae8a56e">P17-2017</url>
       <doi>10.18653/v1/P17-2017</doi>
@@ -2757,7 +2757,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
     <paper id="18">
       <title>Arc-swift: A Novel Transition System for Dependency Parsing</title>
       <author><first>Peng</first><last>Qi</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>110–117</pages>
       <url hash="f079e521">P17-2018</url>
       <doi>10.18653/v1/P17-2018</doi>
@@ -2782,7 +2782,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <author><first>Weiyue</first><last>Wang</last></author>
       <author><first>Tamer</first><last>Alkhouli</last></author>
       <author><first>Derui</first><last>Zhu</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>125–131</pages>
       <url hash="f1051c9b">P17-2020</url>
       <doi>10.18653/v1/P17-2020</doi>
@@ -2807,8 +2807,8 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <author><first>Lena</first><last>Reed</last></author>
       <author><first>Jiaqi</first><last>Wu</last></author>
       <author><first>Shereen</first><last>Oraby</last></author>
-      <author><first>Pranav</first><last>Anand</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="pranav-anand"><first>Pranav</first><last>Anand</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <pages>141–147</pages>
       <url hash="03872cd1">P17-2022</url>
       <doi>10.18653/v1/P17-2022</doi>
@@ -2832,7 +2832,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <title>Exploiting Domain Knowledge via Grouped Weight Sharing with Application to Text Categorization</title>
       <author><first>Ye</first><last>Zhang</last></author>
       <author><first>Matthew</first><last>Lease</last></author>
-      <author><first>Byron C.</first><last>Wallace</last></author>
+      <author id="byron-c-wallace"><first>Byron C.</first><last>Wallace</last></author>
       <pages>155–160</pages>
       <url hash="166270a1">P17-2024</url>
       <doi>10.18653/v1/P17-2024</doi>
@@ -2915,7 +2915,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
     <paper id="31">
       <title>Attention Strategies for Multi-Source Sequence-to-Sequence Learning</title>
       <author><first>Jindřich</first><last>Libovický</last></author>
-      <author><first>Jindřich</first><last>Helcl</last></author>
+      <author id="jindrich-helcl"><first>Jindřich</first><last>Helcl</last></author>
       <pages>196–202</pages>
       <url hash="17167c3f">P17-2031</url>
       <doi>10.18653/v1/P17-2031</doi>
@@ -2937,8 +2937,8 @@ two word-vectors results in a vector that is only a small angle away from the ve
     <paper id="33">
       <title>A Neural Model for User Geolocation and Lexical Dialectology</title>
       <author><first>Afshin</first><last>Rahimi</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>209–216</pages>
       <url hash="33376138">P17-2033</url>
       <doi>10.18653/v1/P17-2033</doi>
@@ -2964,7 +2964,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <title>Neural Architecture for Temporal Relation Extraction: A <fixed-case>B</fixed-case>i-<fixed-case>LSTM</fixed-case> Approach for Detecting Narrative Containers</title>
       <author><first>Julien</first><last>Tourille</last></author>
       <author><first>Olivier</first><last>Ferret</last></author>
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <author><first>Xavier</first><last>Tannier</last></author>
       <pages>224–230</pages>
       <url hash="d6311668">P17-2035</url>
@@ -2990,7 +2990,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <title>Cross-lingual and cross-domain discourse segmentation of entire documents</title>
       <author><first>Chloé</first><last>Braud</last></author>
       <author><first>Ophélie</first><last>Lacroix</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>237–243</pages>
       <url hash="225e3473">P17-2037</url>
       <doi>10.18653/v1/P17-2037</doi>
@@ -3076,7 +3076,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <author><first>Shun</first><last>Hasegawa</last></author>
       <author><first>Yuta</first><last>Kikuchi</last></author>
       <author><first>Hiroya</first><last>Takamura</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>281–286</pages>
       <url hash="f9c14c74">P17-2044</url>
       <doi>10.18653/v1/P17-2044</doi>
@@ -3185,7 +3185,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
     <paper id="54">
       <title>Multi-Task Learning of Keyphrase Boundary Classification</title>
       <author><first>Isabelle</first><last>Augenstein</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>341–346</pages>
       <url hash="525c1c1d">P17-2054</url>
       <doi>10.18653/v1/P17-2054</doi>
@@ -3234,7 +3234,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
     <paper id="58">
       <title>Differentiable Scheduled Sampling for Credit Assignment</title>
       <author><first>Kartik</first><last>Goyal</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <author><first>Taylor</first><last>Berg-Kirkpatrick</last></author>
       <pages>366–371</pages>
       <url hash="2f5576df">P17-2058</url>
@@ -3256,7 +3256,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <author><first>Long</first><last>Zhou</last></author>
       <author><first>Wenpeng</first><last>Hu</last></author>
       <author><first>Jiajun</first><last>Zhang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>378–384</pages>
       <url hash="4ec8ca04">P17-2060</url>
       <doi>10.18653/v1/P17-2060</doi>
@@ -3288,7 +3288,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
     </paper>
     <paper id="63">
       <title>Feature Hashing for Language and Dialect Identification</title>
-      <author><first>Shervin</first><last>Malmasi</last></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></author>
       <author><first>Mark</first><last>Dras</last></author>
       <pages>399–403</pages>
       <url hash="d4b31e58">P17-2063</url>
@@ -3345,7 +3345,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <title><fixed-case>E</fixed-case>nglish Multiword Expression-aware Dependency Parsing Including Named Entities</title>
       <author><first>Akihiko</first><last>Kato</last></author>
       <author><first>Hiroyuki</first><last>Shindo</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>427–432</pages>
       <url hash="d822d5f6">P17-2068</url>
       <doi>10.18653/v1/P17-2068</doi>
@@ -3357,7 +3357,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <author><first>Thomas</first><last>Kober</last></author>
       <author><first>Julie</first><last>Weeds</last></author>
       <author><first>Jeremy</first><last>Reffin</last></author>
-      <author><first>David</first><last>Weir</last></author>
+      <author id="david-weir"><first>David</first><last>Weir</last></author>
       <pages>433–440</pages>
       <url hash="0e3d685c">P17-2069</url>
       <doi>10.18653/v1/P17-2069</doi>
@@ -3391,7 +3391,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <title>Methodical Evaluation of <fixed-case>A</fixed-case>rabic Word Embeddings</title>
       <author><first>Mohammed</first><last>Elrazzaz</last></author>
       <author><first>Shady</first><last>Elbassuoni</last></author>
-      <author><first>Khaled</first><last>Shaban</last></author>
+      <author id="khaled-shaban"><first>Khaled</first><last>Shaban</last></author>
       <author><first>Chadi</first><last>Helwe</last></author>
       <pages>454–458</pages>
       <url hash="f1c0d915">P17-2072</url>
@@ -3414,7 +3414,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
     <paper id="74">
       <title>Best-Worst Scaling More Reliable than Rating Scales: A Case Study on Sentiment Intensity Annotation</title>
       <author><first>Svetlana</first><last>Kiritchenko</last></author>
-      <author><first>Saif</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
       <pages>465–470</pages>
       <url hash="4b7cf607">P17-2074</url>
       <doi>10.18653/v1/P17-2074</doi>
@@ -3423,11 +3423,11 @@ two word-vectors results in a vector that is only a small angle away from the ve
     </paper>
     <paper id="75">
       <title>Demographic Inference on <fixed-case>T</fixed-case>witter using Recursive Neural Networks</title>
-      <author><first>Sunghwan Mac</first><last>Kim</last></author>
+      <author id="sunghwan-mac-kim"><first>Sunghwan Mac</first><last>Kim</last></author>
       <author><first>Qiongkai</first><last>Xu</last></author>
       <author><first>Lizhen</first><last>Qu</last></author>
       <author><first>Stephen</first><last>Wan</last></author>
-      <author><first>Cécile</first><last>Paris</last></author>
+      <author id="cecile-paris"><first>Cécile</first><last>Paris</last></author>
       <pages>471–477</pages>
       <url hash="c78f4fdf">P17-2075</url>
       <doi>10.18653/v1/P17-2075</doi>
@@ -3438,7 +3438,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <title><fixed-case>T</fixed-case>witter Demographic Classification Using Deep Multi-modal Multi-task Learning</title>
       <author><first>Prashanth</first><last>Vijayaraghavan</last></author>
       <author><first>Soroush</first><last>Vosoughi</last></author>
-      <author><first>Deb</first><last>Roy</last></author>
+      <author id="deb-roy"><first>Deb</first><last>Roy</last></author>
       <pages>478–483</pages>
       <url hash="ad5dc446">P17-2076</url>
       <doi>10.18653/v1/P17-2076</doi>
@@ -3463,7 +3463,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
     <paper id="78">
       <title>Parser Adaptation for Social Media by Integrating Normalization</title>
       <author><first>Rob</first><last>van der Goot</last></author>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <pages>491–497</pages>
       <url hash="4b1d7791">P17-2078</url>
       <doi>10.18653/v1/P17-2078</doi>
@@ -3497,7 +3497,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <author><first>Wenjie</first><last>Li</last></author>
       <author><first>Shuzi</first><last>Niu</last></author>
       <author><first>Yang</first><last>Zhao</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <author><first>Guoping</first><last>Long</last></author>
       <pages>504–509</pages>
       <url hash="4413e1d6">P17-2080</url>
@@ -3529,8 +3529,8 @@ two word-vectors results in a vector that is only a small angle away from the ve
     </paper>
     <paper id="83">
       <title>A Generative Attentional Neural Network Model for Dialogue Act Classification</title>
-      <author><first>Quan Hung</first><last>Tran</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="quan-hung-tran"><first>Quan Hung</first><last>Tran</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <author><first>Ingrid</first><last>Zukerman</last></author>
       <pages>524–529</pages>
       <url hash="a99acf63">P17-2083</url>
@@ -3551,7 +3551,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
     <paper id="85">
       <title>List-only Entity Linking</title>
       <author><first>Ying</first><last>Lin</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <author><first>Heng</first><last>Ji</last></author>
       <pages>536–541</pages>
       <url hash="db17e714">P17-2085</url>
@@ -3563,7 +3563,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <title>Improving Native Language Identification by Using Spelling Errors</title>
       <author><first>Lingzhen</first><last>Chen</last></author>
       <author><first>Carlo</first><last>Strapparava</last></author>
-      <author><first>Vivi</first><last>Nastase</last></author>
+      <author id="vivi-nastase"><first>Vivi</first><last>Nastase</last></author>
       <pages>542–546</pages>
       <url hash="9eb808ac">P17-2086</url>
       <doi>10.18653/v1/P17-2086</doi>
@@ -3596,7 +3596,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <author><first>Rui</first><last>Wang</last></author>
       <author><first>Andrew</first><last>Finch</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>560–566</pages>
       <url hash="b90f95e6">P17-2089</url>
       <doi>10.18653/v1/P17-2089</doi>
@@ -3632,7 +3632,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <author><first>Shujian</first><last>Huang</last></author>
       <author><first>Xiaohua</first><last>Liu</last></author>
       <author><first>Hang</first><last>Li</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
       <pages>580–586</pages>
       <url hash="f89d880b">P17-2092</url>
       <doi>10.18653/v1/P17-2092</doi>
@@ -3642,7 +3642,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
     <paper id="93">
       <title>Model Transfer for Tagging Low-resource Languages using a Bilingual Dictionary</title>
       <author><first>Meng</first><last>Fang</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>587–593</pages>
       <url hash="d7a7ff4c">P17-2093</url>
       <doi>10.18653/v1/P17-2093</doi>
@@ -3653,7 +3653,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
     <paper id="94">
       <title><fixed-case>E</fixed-case>uro<fixed-case>S</fixed-case>ense: Automatic Harvesting of Multilingual Sense Annotations from Parallel Text</title>
       <author><first>Claudio</first><last>Delli Bovi</last></author>
-      <author><first>Jose</first><last>Camacho-Collados</last></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></author>
       <author><first>Alessandro</first><last>Raganato</last></author>
       <author><first>Roberto</first><last>Navigli</last></author>
       <pages>594–600</pages>
@@ -3669,7 +3669,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <author><first>Nadir</first><last>Durrani</last></author>
       <author><first>Ahmed</first><last>Abdelali</last></author>
       <author><first>Yonatan</first><last>Belinkov</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>601–607</pages>
       <url hash="25b7a28d">P17-2095</url>
       <doi>10.18653/v1/P17-2095</doi>
@@ -3768,8 +3768,8 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <author><first>Allegra</first><last>Larche</last></author>
       <author><first>Anthony</first><last>Janocko</last></author>
       <author><first>Kevin</first><last>Zembroski</last></author>
-      <author><first>H Andrew</first><last>Schwartz</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="h-andrew-schwartz"><first>H Andrew</first><last>Schwartz</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <pages>654–658</pages>
       <url hash="1499d893">P17-2103</url>
       <doi>10.18653/v1/P17-2103</doi>
@@ -3815,7 +3815,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
     </paper>
     <paper id="107">
       <title>How (not) to train a dependency parser: The curious case of jackknifing part-of-speech taggers</title>
-      <author><first>Željko</first><last>Agić</last></author>
+      <author id="zeljko-agic"><first>Željko</first><last>Agić</last></author>
       <author><first>Natalie</first><last>Schluter</last></author>
       <pages>679–684</pages>
       <url hash="f1ac8104">P17-2107</url>
@@ -3831,7 +3831,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <editor><first>Allyson</first><last>Ettinger</last></editor>
       <editor><first>Spandana</first><last>Gella</last></editor>
       <editor><first>Matthieu</first><last>Labeau</last></editor>
-      <editor><first>Cecilia Ovesdotter</first><last>Alm</last></editor>
+      <editor id="cecilia-ovesdotter-alm"><first>Cecilia Ovesdotter</first><last>Alm</last></editor>
       <editor><first>Marine</first><last>Carpuat</last></editor>
       <editor><first>Mark</first><last>Dredze</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -3919,7 +3919,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <title>Text-based Speaker Identification on Multiparty Dialogues Using Multi-document Convolutional Neural Networks</title>
       <author><first>Kaixin</first><last>Ma</last></author>
       <author><first>Catherine</first><last>Xiao</last></author>
-      <author><first>Jinho D.</first><last>Choi</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
       <pages>49–55</pages>
       <url hash="45817342">P17-3009</url>
       <bibkey>ma-etal-2017-text</bibkey>
@@ -3955,7 +3955,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
     <paper id="13">
       <title>Word Embedding for Response-To-Text Assessment of Evidence</title>
       <author><first>Haoran</first><last>Zhang</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <pages>75–81</pages>
       <url hash="210b9385">P17-3013</url>
       <bibkey>zhang-litman-2017-word</bibkey>
@@ -3970,7 +3970,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
     <paper id="15">
       <title><fixed-case>S</fixed-case>occ<fixed-case>E</fixed-case>val: An Annotation Schema for Rating Soccer Players</title>
       <author><first>Jose</first><last>Ramirez</last></author>
-      <author><first>Matthew</first><last>Garber</last></author>
+      <author id="matthew-gerber"><first>Matthew</first><last>Garber</last></author>
       <author><first>Xinhao</first><last>Wang</last></author>
       <pages>89–94</pages>
       <url hash="9c73a81f">P17-3015</url>
@@ -3978,7 +3978,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
     </paper>
     <paper id="16">
       <title>Accent Adaptation for the Air Traffic Control Domain</title>
-      <author><first>Matthew</first><last>Garber</last></author>
+      <author id="matthew-gerber"><first>Matthew</first><last>Garber</last></author>
       <author><first>Meital</first><last>Singer</last></author>
       <author><first>Christopher</first><last>Ward</last></author>
       <pages>95–99</pages>
@@ -4004,7 +4004,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
     <paper id="19">
       <title>Fast Forward Through Opportunistic Incremental Meaning Representation Construction</title>
       <author><first>Petr</first><last>Babkin</last></author>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <pages>114–119</pages>
       <url hash="68dfb0b3">P17-3019</url>
       <bibkey>babkin-nirenburg-2017-fast</bibkey>
@@ -4065,7 +4065,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <author><first>Anita</first><last>Ramm</last></author>
       <author><first>Sharid</first><last>Loáiciga</last></author>
       <author><first>Annemarie</first><last>Friedrich</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>1–6</pages>
       <url hash="73bb6239">P17-4001</url>
       <attachment type="poster" hash="41b81ae3">P17-4001.Poster.pdf</attachment>
@@ -4073,17 +4073,17 @@ two word-vectors results in a vector that is only a small angle away from the ve
     </paper>
     <paper id="2">
       <title>Automating Biomedical Evidence Synthesis: <fixed-case>R</fixed-case>obot<fixed-case>R</fixed-case>eviewer</title>
-      <author><first>Iain</first><last>Marshall</last></author>
+      <author id="iain-marshall"><first>Iain</first><last>Marshall</last></author>
       <author><first>Joël</first><last>Kuiper</last></author>
       <author><first>Edward</first><last>Banner</last></author>
-      <author><first>Byron C.</first><last>Wallace</last></author>
+      <author id="byron-c-wallace"><first>Byron C.</first><last>Wallace</last></author>
       <pages>7–12</pages>
       <url hash="2ed0b580">P17-4002</url>
       <bibkey>marshall-etal-2017-automating</bibkey>
     </paper>
     <paper id="3">
       <title><fixed-case>B</fixed-case>enben: A <fixed-case>C</fixed-case>hinese Intelligent Conversational Robot</title>
-      <author><first>Wei-Nan</first><last>Zhang</last></author>
+      <author id="weinan-zhang"><first>Wei-Nan</first><last>Zhang</last></author>
       <author><first>Ting</first><last>Liu</last></author>
       <author><first>Bing</first><last>Qin</last></author>
       <author><first>Yu</first><last>Zhang</last></author>
@@ -4146,11 +4146,11 @@ two word-vectors results in a vector that is only a small angle away from the ve
     <paper id="9">
       <title>Interactive Visual Analysis of Transcribed Multi-Party Discourse</title>
       <author><first>Mennatallah</first><last>El-Assady</last></author>
-      <author><first>Annette</first><last>Hautli-Janisz</last></author>
+      <author id="annette-hautli"><first>Annette</first><last>Hautli-Janisz</last></author>
       <author><first>Valentin</first><last>Gold</last></author>
       <author><first>Miriam</first><last>Butt</last></author>
       <author><first>Katharina</first><last>Holzinger</last></author>
-      <author><first>Daniel</first><last>Keim</last></author>
+      <author id="daniel-keim"><first>Daniel</first><last>Keim</last></author>
       <pages>49–54</pages>
       <url hash="da589dce">P17-4009</url>
       <bibkey>el-assady-etal-2017-interactive</bibkey>
@@ -4195,7 +4195,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <author><first>Yoon</first><last>Kim</last></author>
       <author><first>Yuntian</first><last>Deng</last></author>
       <author><first>Jean</first><last>Senellart</last></author>
-      <author><first>Alexander</first><last>Rush</last></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last></author>
       <pages>67–72</pages>
       <url hash="0a9ffca2">P17-4012</url>
       <bibkey>klein-etal-2017-opennmt</bibkey>
@@ -4203,7 +4203,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
     <paper id="13">
       <title><fixed-case>P</fixed-case>y<fixed-case>D</fixed-case>ial: A Multi-domain Statistical Dialogue System Toolkit</title>
       <author><first>Stefan</first><last>Ultes</last></author>
-      <author><first>Lina M.</first><last>Rojas-Barahona</last></author>
+      <author id="lina-m-rojas-barahona"><first>Lina M.</first><last>Rojas-Barahona</last></author>
       <author><first>Pei-Hao</first><last>Su</last></author>
       <author><first>David</first><last>Vandyke</last></author>
       <author><first>Dongho</first><last>Kim</last></author>
@@ -4211,8 +4211,8 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <author><first>Paweł</first><last>Budzianowski</last></author>
       <author><first>Nikola</first><last>Mrkšić</last></author>
       <author><first>Tsung-Hsien</first><last>Wen</last></author>
-      <author><first>Milica</first><last>Gašić</last></author>
-      <author><first>Steve</first><last>Young</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gašić</last></author>
+      <author id="steve-young"><first>Steve</first><last>Young</last></author>
       <pages>73–78</pages>
       <url hash="8dbfc319">P17-4013</url>
       <bibkey>ultes-etal-2017-pydial</bibkey>
@@ -4256,8 +4256,8 @@ two word-vectors results in a vector that is only a small angle away from the ve
     </paper>
     <paper id="18">
       <title>Swanson linking revisited: Accelerating literature-based discovery across domains using a conceptual influence graph</title>
-      <author><first>Gus</first><last>Hahn-Powell</last></author>
-      <author><first>Marco A.</first><last>Valenzuela-Escárcega</last></author>
+      <author id="gus-hahn-powell"><first>Gus</first><last>Hahn-Powell</last></author>
+      <author id="marco-a-valenzuela-escarcega"><first>Marco A.</first><last>Valenzuela-Escárcega</last></author>
       <author><first>Mihai</first><last>Surdeanu</last></author>
       <pages>103–108</pages>
       <url hash="f9dfc795">P17-4018</url>
@@ -4297,7 +4297,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
     <meta>
       <booktitle>Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics: Tutorial Abstracts</booktitle>
       <url hash="a3f062a7">P17-5</url>
-      <editor><first>Maja</first><last>Popović</last></editor>
+      <editor id="maja-popovic"><first>Maja</first><last>Popović</last></editor>
       <editor><first>Jordan</first><last>Boyd-Graber</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Vancouver, Canada</address>
@@ -4314,7 +4314,7 @@ two word-vectors results in a vector that is only a small angle away from the ve
       <author><first>Hoifung</first><last>Poon</last></author>
       <author><first>Chris</first><last>Quirk</last></author>
       <author><first>Kristina</first><last>Toutanova</last></author>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <pages>1–2</pages>
       <url hash="8059b2aa">P17-5001</url>
       <abstract>We will introduce precision medicine and showcase the vast opportunities for NLP in this burgeoning field with great societal impact. We will review pressing NLP problems, state-of-the art methods, and important applications, as well as datasets, medical resources, and practical issues. The tutorial will provide an accessible overview of biomedicine, and does not presume knowledge in biology or healthcare. The ultimate goal is to reduce the entry barrier for NLP researchers to contribute to this exciting domain.</abstract>
@@ -4334,7 +4334,7 @@ This tutorial builds upon a recent course taught at Carnegie Mellon University d
     <paper id="3">
       <title>Deep Learning for Semantic Composition</title>
       <author><first>Xiaodan</first><last>Zhu</last></author>
-      <author><first>Edward</first><last>Grefenstette</last></author>
+      <author id="edward-grefenstette"><first>Edward</first><last>Grefenstette</last></author>
       <pages>6–7</pages>
       <url hash="4b8be4fe">P17-5003</url>
       <video href="P17-5003.mp4"/>
@@ -4347,7 +4347,7 @@ In this tutorial, we will cover the fundamentals and the state-of-the-art resear
       <title>Deep Learning for Dialogue Systems</title>
       <author><first>Yun-Nung</first><last>Chen</last></author>
       <author><first>Asli</first><last>Celikyilmaz</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tür</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tür</last></author>
       <pages>8–14</pages>
       <url hash="ca2d3fe8">P17-5004</url>
       <video href="P17-5004.mp4"/>
diff --git a/data/xml/P18.xml b/data/xml/P18.xml
index b81ed978c4..a64310f99c 100644
--- a/data/xml/P18.xml
+++ b/data/xml/P18.xml
@@ -19,8 +19,8 @@
     <paper id="1">
       <title>Probabilistic <fixed-case>F</fixed-case>ast<fixed-case>T</fixed-case>ext for Multi-Sense Word Embeddings</title>
       <author><first>Ben</first><last>Athiwaratkun</last></author>
-      <author><first>Andrew</first><last>Wilson</last></author>
-      <author><first>Anima</first><last>Anandkumar</last></author>
+      <author id="andrew-wilson"><first>Andrew</first><last>Wilson</last></author>
+      <author id="animashree-anandkumar"><first>Anima</first><last>Anandkumar</last></author>
       <pages>1–11</pages>
       <abstract>We introduce Probabilistic FastText, a new model for word embeddings that can capture multiple word senses, sub-word structure, and uncertainty information. In particular, we represent each word with a Gaussian mixture density, where the mean of a mixture component is given by the sum of n-grams. This representation allows the model to share the “strength” across sub-word structures (e.g. Latin roots), producing accurate representations of rare, misspelt, or even unseen words. Moreover, each component of the mixture can capture a different word sense. Probabilistic FastText outperforms both FastText, which has no probabilistic model, and dictionary-level probabilistic embeddings, which do not incorporate subword structures, on several word-similarity benchmarks, including English RareWord and foreign language datasets. We also achieve state-of-art performance on benchmarks that measure ability to discern different meanings. Thus, our model is the first to achieve best of both the worlds: multi-sense representations while having enriched semantics on rare words.</abstract>
       <url hash="a84bb478">P18-1001</url>
@@ -35,7 +35,7 @@
       <author><first>Nikunj</first><last>Saunshi</last></author>
       <author><first>Yingyu</first><last>Liang</last></author>
       <author><first>Tengyu</first><last>Ma</last></author>
-      <author><first>Brandon</first><last>Stewart</last></author>
+      <author id="brandon-m-stewart"><first>Brandon</first><last>Stewart</last></author>
       <author><first>Sanjeev</first><last>Arora</last></author>
       <pages>12–22</pages>
       <abstract>Motivations like domain adaptation, transfer learning, and feature learning have fueled interest in inducing embeddings for rare or unseen words, n-grams, synsets, and other textual features. This paper introduces a la carte embedding, a simple and general alternative to the usual word2vec-based approaches for building such representations that is based upon recent theoretical results for GloVe-like embeddings. Our method relies mainly on a linear transformation that is efficiently learnable using pretrained word vectors and linear regression. This transform is applicable on the fly in the future when a new text feature or rare word is encountered, even if only a single usage example is available. We introduce a new dataset showing how the a la carte method requires fewer examples of words in context to learn high-quality embeddings and we obtain state-of-the-art results on a nonce task and some unsupervised document classification tasks.</abstract>
@@ -102,7 +102,7 @@
     </paper>
     <paper id="7">
       <title>Subword Regularization: Improving Neural Network Translation Models with Multiple Subword Candidates</title>
-      <author><first>Taku</first><last>Kudo</last></author>
+      <author id="taku-kudo"><first>Taku</first><last>Kudo</last></author>
       <pages>66–75</pages>
       <abstract>Subword units are an effective way to alleviate the open vocabulary problems in neural machine translation (NMT). While sentences are usually converted into unique subword sequences, subword segmentation is potentially ambiguous and multiple segmentations are possible even with the same vocabulary. The question addressed in this paper is whether it is possible to harness the segmentation ambiguity as a noise to improve the robustness of NMT. We present a simple regularization method, subword regularization, which trains the model with multiple subword segmentations probabilistically sampled during training. In addition, for better subword sampling, we propose a new subword segmentation algorithm based on a unigram language model. We experiment with multiple corpora and report consistent improvements especially on low resource and out-of-domain settings.</abstract>
       <url hash="485f9836">P18-1007</url>
@@ -112,7 +112,7 @@
     </paper>
     <paper id="8">
       <title>The Best of Both Worlds: Combining Recent Advances in Neural Machine Translation</title>
-      <author><first>Mia Xu</first><last>Chen</last></author>
+      <author id="mia-xu-chen"><first>Mia Xu</first><last>Chen</last></author>
       <author><first>Orhan</first><last>Firat</last></author>
       <author><first>Ankur</first><last>Bapna</last></author>
       <author><first>Melvin</first><last>Johnson</last></author>
@@ -124,7 +124,7 @@
       <author><first>Niki</first><last>Parmar</last></author>
       <author><first>Ashish</first><last>Vaswani</last></author>
       <author><first>Jakob</first><last>Uszkoreit</last></author>
-      <author><first>Lukasz</first><last>Kaiser</last></author>
+      <author id="lukasz-kaiser"><first>Lukasz</first><last>Kaiser</last></author>
       <author><first>Zhifeng</first><last>Chen</last></author>
       <author><first>Yonghui</first><last>Wu</last></author>
       <author><first>Macduff</first><last>Hughes</last></author>
@@ -132,8 +132,8 @@
       <abstract>The past year has witnessed rapid advances in sequence-to-sequence (seq2seq) modeling for Machine Translation (MT). The classic RNN-based approaches to MT were first out-performed by the convolutional seq2seq model, which was then out-performed by the more recent Transformer model. Each of these new approaches consists of a fundamental architecture accompanied by a set of modeling and training techniques that are in principle applicable to other seq2seq architectures. In this paper, we tease apart the new architectures and their accompanying techniques in two ways. First, we identify several key modeling and training techniques, and apply them to the RNN architecture, yielding a new RNMT+ model that outperforms all of the three fundamental architectures on the benchmark WMT’14 English to French and English to German tasks. Second, we analyze the properties of each fundamental seq2seq architecture and devise new hybrid architectures intended to combine their strengths. Our hybrid models obtain further improvements, outperforming the RNMT+ model on both benchmark datasets.</abstract>
       <url hash="3701fe51">P18-1008</url>
       <attachment type="note" hash="90615a6e">P18-1008.Notes.pdf</attachment>
-      <video href="P18-1008.mp4"/>
       <attachment type="presentation" hash="b42dff13">P18-1008.Presentation.pdf</attachment>
+      <video href="P18-1008.mp4"/>
       <doi>10.18653/v1/P18-1008</doi>
       <bibkey>chen-etal-2018-best</bibkey>
     </paper>
@@ -142,7 +142,7 @@
       <author><first>Eunsol</first><last>Choi</last></author>
       <author><first>Omer</first><last>Levy</last></author>
       <author><first>Yejin</first><last>Choi</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>87–96</pages>
       <abstract>We introduce a new entity typing task: given a sentence with an entity mention, the goal is to predict a set of free-form phrases (e.g. skyscraper, songwriter, or criminal) that describe appropriate types for the target entity. This formulation allows us to use a new type of distant supervision at large scale: head words, which indicate the type of the noun phrases they appear in. We show that these ultra-fine types can be crowd-sourced, and introduce new evaluation sets that are much more diverse and fine-grained than existing benchmarks. We present a model that can predict ultra-fine types, and is trained using a multitask objective that pools our new head-word supervision with prior supervision from entity linking. Experimental results demonstrate that our model is effective in predicting entity types at varying granularity; it achieves state of the art performance on an existing fine-grained entity typing benchmark, and sets baselines for our newly-introduced datasets.</abstract>
       <url hash="4eb503b8">P18-1009</url>
@@ -186,14 +186,14 @@
       <title>Towards Understanding the Geometry of Knowledge Graph Embeddings</title>
       <author><first/><last>Chandrahas</last></author>
       <author><first>Aditya</first><last>Sharma</last></author>
-      <author><first>Partha</first><last>Talukdar</last></author>
+      <author id="partha-talukdar"><first>Partha</first><last>Talukdar</last></author>
       <pages>122–131</pages>
       <abstract>Knowledge Graph (KG) embedding has emerged as a very active area of research over the last few years, resulting in the development of several embedding methods. These KG embedding methods represent KG entities and relations as vectors in a high-dimensional space. Despite this popularity and effectiveness of KG embeddings in various tasks (e.g., link prediction), geometric understanding of such embeddings (i.e., arrangement of entity and relation vectors in vector space) is unexplored – we fill this gap in the paper. We initiate a study to analyze the geometry of KG embeddings and correlate it with task performance and other hyperparameters. To the best of our knowledge, this is the first study of its kind. Through extensive experiments on real-world datasets, we discover several insights. For example, we find that there are sharp differences between the geometry of embeddings learnt by different classes of KG embeddings methods. We hope that this initial study will inspire other follow-up research on this important but unexplored problem.</abstract>
       <url hash="72d664e7">P18-1012</url>
       <attachment type="note" hash="066cbc01">P18-1012.Notes.pdf</attachment>
       <attachment type="software" hash="497eeb3b">P18-1012.Software.zip</attachment>
-      <video href="P18-1012.mp4"/>
       <attachment type="presentation" hash="a27603ef">P18-1012.Presentation.pdf</attachment>
+      <video href="P18-1012.mp4"/>
       <doi>10.18653/v1/P18-1012</doi>
       <bibkey>chandrahas-etal-2018-towards</bibkey>
     </paper>
@@ -254,7 +254,7 @@
     </paper>
     <paper id="17">
       <title>Obtaining Reliable Human Ratings of Valence, Arousal, and Dominance for 20,000 <fixed-case>E</fixed-case>nglish Words</title>
-      <author><first>Saif</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
       <pages>174–184</pages>
       <abstract>Words play a central role in language and thought. Factor analysis studies have shown that the primary dimensions of meaning are valence, arousal, and dominance (VAD). We present the NRC VAD Lexicon, which has human ratings of valence, arousal, and dominance for more than 20,000 English words. We use Best–Worst Scaling to obtain fine-grained scores and address issues of annotation consistency that plague traditional rating scale methods of annotation. We show that the ratings obtained are vastly more reliable than those in existing lexicons. We also show that there exist statistically significant differences in the shared understanding of valence, arousal, and dominance across demographic variables such as age, gender, and personality.</abstract>
       <url hash="f82a65e1">P18-1017</url>
@@ -270,16 +270,16 @@
       <author><first>Vivek</first><last>Srikumar</last></author>
       <author><first>Jakob</first><last>Prange</last></author>
       <author><first>Austin</first><last>Blodgett</last></author>
-      <author><first>Sarah R.</first><last>Moeller</last></author>
+      <author id="sarah-moeller"><first>Sarah R.</first><last>Moeller</last></author>
       <author><first>Aviram</first><last>Stern</last></author>
-      <author><first>Adi</first><last>Bitan</last></author>
+      <author id="adi-shalev"><first>Adi</first><last>Bitan</last></author>
       <author><first>Omri</first><last>Abend</last></author>
       <pages>185–196</pages>
       <abstract>Semantic relations are often signaled with prepositional or possessive marking—but extreme polysemy bedevils their analysis and automatic interpretation. We introduce a new annotation scheme, corpus, and task for the disambiguation of prepositions and possessives in English. Unlike previous approaches, our annotations are comprehensive with respect to types and tokens of these markers; use broadly applicable supersense classes rather than fine-grained dictionary definitions; unite prepositions and possessives under the same class inventory; and distinguish between a marker’s lexical contribution and the role it marks in the context of a predicate or scene. Strong interannotator agreement rates, as well as encouraging disambiguation results with established supervised methods, speak to the viability of the scheme and task.</abstract>
       <url hash="27525c95">P18-1018</url>
       <attachment type="note" hash="906e88f7">P18-1018.Notes.pdf</attachment>
-      <video href="P18-1018.mp4"/>
       <attachment type="presentation" hash="53961198">P18-1018.Presentation.pdf</attachment>
+      <video href="P18-1018.mp4"/>
       <doi>10.18653/v1/P18-1018</doi>
       <bibkey>schneider-etal-2018-comprehensive</bibkey>
     </paper>
@@ -289,9 +289,9 @@
       <author><first>Junyi Jessy</first><last>Li</last></author>
       <author><first>Roma</first><last>Patel</last></author>
       <author><first>Yinfei</first><last>Yang</last></author>
-      <author><first>Iain</first><last>Marshall</last></author>
+      <author id="iain-marshall"><first>Iain</first><last>Marshall</last></author>
       <author><first>Ani</first><last>Nenkova</last></author>
-      <author><first>Byron</first><last>Wallace</last></author>
+      <author id="byron-c-wallace"><first>Byron</first><last>Wallace</last></author>
       <pages>197–207</pages>
       <abstract>We present a corpus of 5,000 richly annotated abstracts of medical articles describing clinical randomized controlled trials. Annotations include demarcations of text spans that describe the Patient population enrolled, the Interventions studied and to what they were Compared, and the Outcomes measured (the ‘PICO’ elements). These spans are further annotated at a more granular level, e.g., individual interventions within them are marked and mapped onto a structured medical vocabulary. We acquired annotations from a diverse set of workers with varying levels of expertise and cost. We describe our data collection process and the corpus itself in detail. We then outline a set of challenging NLP tasks that would aid searching of the medical literature and the practice of evidence-based medicine.</abstract>
       <url hash="18e0ad8b">P18-1019</url>
@@ -356,7 +356,7 @@
       <title><fixed-case>L</fixed-case>ink<fixed-case>NB</fixed-case>ed: Multi-Graph Representation Learning with Entity Linkage</title>
       <author><first>Rakshit</first><last>Trivedi</last></author>
       <author><first>Bunyamin</first><last>Sisman</last></author>
-      <author><first>Xin Luna</first><last>Dong</last></author>
+      <author id="xin-luna-dong"><first>Xin Luna</first><last>Dong</last></author>
       <author><first>Christos</first><last>Faloutsos</last></author>
       <author><first>Jun</first><last>Ma</last></author>
       <author><first>Hongyuan</first><last>Zha</last></author>
@@ -383,9 +383,9 @@
     </paper>
     <paper id="26">
       <title>Graph-to-Sequence Learning using Gated Graph Neural Networks</title>
-      <author><first>Daniel</first><last>Beck</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="daniel-beck"><first>Daniel</first><last>Beck</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>273–283</pages>
       <abstract>Many NLP applications can be framed as a graph-to-sequence learning problem. Previous work proposing neural architectures on graph-to-sequence obtained promising results compared to grammar-based approaches but still rely on linearisation heuristics and/or standard recurrent networks to achieve the best performance. In this work propose a new model that encodes the full structural information contained in the graph. Our architecture couples the recently proposed Gated Graph Neural Networks with an input transformation that allows nodes and edges to have their own hidden representations, while tackling the parameter explosion problem present in previous work. Experimental results shows that our model outperforms strong baselines in generation from AMR graphs and syntax-based neural machine translation.</abstract>
       <url hash="daa00558">P18-1026</url>
@@ -398,7 +398,7 @@
       <author><first>Urvashi</first><last>Khandelwal</last></author>
       <author><first>He</first><last>He</last></author>
       <author><first>Peng</first><last>Qi</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <pages>284–294</pages>
       <abstract>We know very little about how neural language models (LM) use prior linguistic context. In this paper, we investigate the role of context in an LSTM LM, through ablation studies. Specifically, we analyze the increase in perplexity when prior context words are shuffled, replaced, or dropped. On two standard datasets, Penn Treebank and WikiText-2, we find that the model is capable of using about 200 tokens of context on average, but sharply distinguishes nearby context (recent 50 tokens) from the distant history. The model is highly sensitive to the order of words within the most recent sentence, but ignores word order in the long-range context (beyond 50 tokens), suggesting the distant past is modeled only as a rough semantic field or topic. We further find that the neural caching model (Grave et al., 2017b) especially helps the LSTM to copy words from within this distant context. Overall, our analysis not only provides a better understanding of how neural LMs use their context, but also sheds light on recent success from cache-based models.</abstract>
       <url hash="37871b8f">P18-1027</url>
@@ -411,7 +411,7 @@
       <title>Bridging <fixed-case>CNN</fixed-case>s, <fixed-case>RNN</fixed-case>s, and Weighted Finite-State Machines</title>
       <author><first>Roy</first><last>Schwartz</last></author>
       <author><first>Sam</first><last>Thomson</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>295–305</pages>
       <abstract>Recurrent and convolutional neural networks comprise two distinct families of models that have proven to be useful for encoding natural language utterances. In this paper we present SoPa, a new model that aims to bridge these two approaches. SoPa combines neural representation learning with weighted finite-state automata (WFSAs) to learn a soft version of traditional surface patterns. We show that SoPa is an extension of a one-layer CNN, and that such CNNs are equivalent to a restricted version of SoPa, and accordingly, to a restricted form of WFSA. Empirically, on three text classification tasks, SoPa is comparable or better than both a BiLSTM (RNN) baseline and a CNN baseline, and is particularly useful in small data settings.</abstract>
       <url hash="7fef99b1">P18-1028</url>
@@ -424,7 +424,7 @@
       <title>Zero-shot Learning of Classifiers from Natural Language Quantification</title>
       <author><first>Shashank</first><last>Srivastava</last></author>
       <author><first>Igor</first><last>Labutov</last></author>
-      <author><first>Tom</first><last>Mitchell</last></author>
+      <author id="tom-mitchell"><first>Tom</first><last>Mitchell</last></author>
       <pages>306–316</pages>
       <abstract>Humans can efficiently learn new concepts using language. We present a framework through which a set of explanations of a concept can be used to learn a classifier without access to any labeled examples. We use semantic parsing to map explanations to probabilistic assertions grounded in latent class labels and observed attributes of unlabeled data, and leverage the differential semantics of linguistic quantifiers (e.g., ‘usually’ vs ‘always’) to drive model training. Experiments on three domains show that the learned classifiers outperform previous approaches for learning with limited data, and are comparable with fully supervised classifiers trained from a small number of labeled examples.</abstract>
       <url hash="9843471d">P18-1029</url>
@@ -458,7 +458,7 @@
     <paper id="32">
       <title>Evaluating neural network explanation methods using hybrid documents and morphosyntactic agreement</title>
       <author><first>Nina</first><last>Poerner</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <author><first>Benjamin</first><last>Roth</last></author>
       <pages>340–350</pages>
       <abstract>The behavior of deep neural networks (DNNs) is hard to understand. This makes it necessary to explore post hoc explanation methods. We conduct the first comprehensive evaluation of explanation methods for NLP. To this end, we design two novel evaluation paradigms that cover two important classes of NLP problems: small context and large context problems. Both paradigms require no manual annotation and are therefore broadly applicable. We also introduce LIMSSE, an explanation method inspired by LIME that is designed for NLP. We show empirically that LIMSSE, LRP and DeepLIFT are the most effective explanation methods and recommend them for explaining DNNs in NLP.</abstract>
@@ -476,7 +476,7 @@
       <author><first>Karthik</first><last>Ramanathan</last></author>
       <author><first>Sesh</first><last>Sadasivam</last></author>
       <author><first>Rui</first><last>Zhang</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <pages>351–360</pages>
       <abstract>To be informative, an evaluation must measure how well systems generalize to realistic unseen data. We identify limitations of and propose improvements to current evaluations of text-to-SQL systems. First, we compare human-generated and automatically generated questions, characterizing properties of queries necessary for real-world applications. To facilitate evaluation on multiple datasets, we release standardized and improved versions of seven existing datasets and one new text-to-SQL dataset. Second, we show that the current division of data into training and test sets measures robustness to variations in the way questions are asked, but only partially tests how well systems generalize to new queries; therefore, we propose a complementary dataset split for evaluation of future work. Finally, we demonstrate how the common practice of anonymizing variables during evaluation removes an important challenge of the task. Our observations highlight key difficulties, and our methodology enables effective measurement of future development.</abstract>
       <url hash="e3675b6c">P18-1033</url>
@@ -517,8 +517,8 @@
     </paper>
     <paper id="36">
       <title>Character-Level Models versus Morphology in Semantic Role Labeling</title>
-      <author><first>Gözde Gül</first><last>Şahin</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="gozde-gul-sahin"><first>Gözde Gül</first><last>Şahin</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>386–396</pages>
       <abstract>Character-level models have become a popular approach specially for their accessibility and ability to handle unseen data. However, little is known on their ability to reveal the underlying morphological structure of a word, which is a crucial skill for high-level semantic analysis tasks, such as semantic role labeling (SRL). In this work, we train various types of SRL models that use word, character and morphology level information and analyze how performance of characters compare to words and morphology for several languages. We conduct an in-depth error analysis for each morphological typology and analyze the strengths and limitations of character-level models that relate to out-of-domain data, training data size, long range dependencies and model complexity. Our exhaustive analyses shed light on important characteristics of character-level models and their semantic capability.</abstract>
       <url hash="bb8aa103">P18-1036</url>
@@ -541,7 +541,7 @@
     <paper id="38">
       <title>Accurate <fixed-case>SHRG</fixed-case>-Based Semantic Parsing</title>
       <author><first>Yufei</first><last>Chen</last></author>
-      <author><first>Weiwei</first><last>Sun</last></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last></author>
       <author><first>Xiaojun</first><last>Wan</last></author>
       <pages>408–418</pages>
       <abstract>We demonstrate that an SHRG-based parser can produce semantic graphs much more accurately than previously shown, by relating synchronous production rules to the syntacto-semantic composition process. Our parser achieves an accuracy of 90.35 for EDS (89.51 for DMRS) in terms of elementary dependency match, which is a 4.87 (5.45) point improvement over the best existing data-driven model, indicating, in our view, the importance of linguistically-informed derivation for data-driven semantic parsing. This accuracy is equivalent to that of English Resource Grammar guided models, suggesting that (recurrent) neural network models are able to effectively learn deep linguistic knowledge from annotations.</abstract>
@@ -554,8 +554,8 @@
     <paper id="39">
       <title>Using Intermediate Representations to Solve Math Word Problems</title>
       <author><first>Danqing</first><last>Huang</last></author>
-      <author><first>Jin-Ge</first><last>Yao</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="jin-ge-yao"><first>Jin-Ge</first><last>Yao</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <author><first>Qingyu</first><last>Zhou</last></author>
       <author><first>Jian</first><last>Yin</last></author>
       <pages>419–428</pages>
@@ -569,7 +569,7 @@
     <paper id="40">
       <title>Discourse Representation Structure Parsing</title>
       <author><first>Jiangming</first><last>Liu</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <author><first>Mirella</first><last>Lapata</last></author>
       <pages>429–439</pages>
       <abstract>We introduce an open-domain neural semantic parser which generates formal meaning representations in the style of Discourse Representation Theory (DRT; Kamp and Reyle 1993). We propose a method which transforms Discourse Representation Structures (DRSs) to trees and develop a structure-aware model which decomposes the decoding process into three stages: basic DRS structure prediction, condition prediction (i.e., predicates and relations), and referent prediction (i.e., variables). Experimental results on the Groningen Meaning Bank (GMB) show that our model outperforms competitive baselines by a wide margin.</abstract>
@@ -614,7 +614,7 @@
       <author><first>Hannah</first><last>Rashkin</last></author>
       <author><first>Maarten</first><last>Sap</last></author>
       <author><first>Emily</first><last>Allaway</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <author><first>Yejin</first><last>Choi</last></author>
       <pages>463–473</pages>
       <abstract>We investigate a new commonsense inference task: given an event described in a short free-form text (“X drinks coffee in the morning”), a system reasons about the likely intents (“X wants to stay awake”) and reactions (“X feels alert”) of the event’s participants. To support this study, we construct a new crowdsourced corpus of 25,000 event phrases covering a diverse range of everyday events and situations. We report baseline performance on this task, demonstrating that neural encoder-decoder models can successfully compose embedding representations of previously unseen events and reason about the likely intents and reactions of the event participants. In addition, we demonstrate how commonsense inference on people’s intents and reactions can help unveil the implicit gender inequality prevalent in modern movie scripts.</abstract>
@@ -638,7 +638,7 @@
     </paper>
     <paper id="45">
       <title>Improving Event Coreference Resolution by Modeling Correlations between Event Coreference Chains and Document Topic Structures</title>
-      <author><first>Prafulla Kumar</first><last>Choubey</last></author>
+      <author id="prafulla-kumar-choubey"><first>Prafulla Kumar</first><last>Choubey</last></author>
       <author><first>Ruihong</first><last>Huang</last></author>
       <pages>485–495</pages>
       <abstract>This paper proposes a novel approach for event coreference resolution that models correlations between event coreference chains and document topical structures through an Integer Linear Programming formulation. We explicitly model correlations between the main event chains of a document with topic transition sentences, inter-coreference chain correlations, event mention distributional characteristics and sub-event structure, and use them with scores obtained from a local coreference relation classifier for jointly resolving multiple event chains in a document. Our experiments across KBP 2016 and 2017 datasets suggest that each of the structures contribute to improving event coreference resolution performance.</abstract>
@@ -678,8 +678,8 @@
       <author><first>Yu</first><last>Hong</last></author>
       <author><first>Wenxuan</first><last>Zhou</last></author>
       <author><first>Jingli</first><last>Zhang</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
-      <author><first>Qiaoming</first><last>Zhu</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last></author>
       <pages>515–526</pages>
       <abstract>Due to the ability of encoding and mapping semantic information into a high-dimensional latent feature space, neural networks have been successfully used for detecting events to a certain extent. However, such a feature space can be easily contaminated by spurious features inherent in event detection. In this paper, we propose a self-regulated learning approach by utilizing a generative adversarial network to generate spurious features. On the basis, we employ a recurrent network to eliminate the fakes. Detailed experiments on the ACE 2005 and TAC-KBP 2015 corpora show that our proposed method is highly effective and adaptable.</abstract>
       <url hash="45849e79">P18-1048</url>
@@ -726,8 +726,8 @@
     </paper>
     <paper id="52">
       <title>Coherence Modeling of Asynchronous Conversations: A Neural Entity Grid Approach</title>
-      <author><first>Shafiq</first><last>Joty</last></author>
-      <author><first>Muhammad Tasnim</first><last>Mohiuddin</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
+      <author id="muhammad-tasnim-mohiuddin"><first>Muhammad Tasnim</first><last>Mohiuddin</last></author>
       <author><first>Dat</first><last>Tien Nguyen</last></author>
       <pages>558–568</pages>
       <abstract>We propose a novel coherence model for written asynchronous conversations (e.g., forums, emails), and show its applications in coherence assessment and thread reconstruction tasks. We conduct our research in two steps. First, we propose improvements to the recently proposed neural entity grid model by lexicalizing its entity transitions. Then, we extend the model to asynchronous conversations by incorporating the underlying conversational structure in the entity grid representation and feature computation. Our model achieves state of the art results on standard coherence assessment tasks in monologue and conversations outperforming existing models. We also demonstrate its effectiveness in reconstructing thread structures.</abstract>
@@ -741,7 +741,7 @@
       <title>Deep Reinforcement Learning for <fixed-case>C</fixed-case>hinese Zero Pronoun Resolution</title>
       <author><first>Qingyu</first><last>Yin</last></author>
       <author><first>Yu</first><last>Zhang</last></author>
-      <author><first>Wei-Nan</first><last>Zhang</last></author>
+      <author id="weinan-zhang"><first>Wei-Nan</first><last>Zhang</last></author>
       <author><first>Ting</first><last>Liu</last></author>
       <author><first>William Yang</first><last>Wang</last></author>
       <pages>569–578</pages>
@@ -786,14 +786,14 @@
     </paper>
     <paper id="57">
       <title><fixed-case>T</fixed-case>utorial<fixed-case>B</fixed-case>ank: A Manually-Collected Corpus for Prerequisite Chains, Survey Extraction and Resource Recommendation</title>
-      <author><first>Alexander</first><last>Fabbri</last></author>
+      <author id="alexander-richard-fabbri"><first>Alexander</first><last>Fabbri</last></author>
       <author><first>Irene</first><last>Li</last></author>
       <author><first>Prawat</first><last>Trairatvorakul</last></author>
       <author><first>Yijiao</first><last>He</last></author>
       <author><first>Weitai</first><last>Ting</last></author>
       <author><first>Robert</first><last>Tung</last></author>
       <author><first>Caitlin</first><last>Westerfield</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <pages>611–620</pages>
       <abstract>The field of Natural Language Processing (NLP) is growing rapidly, with new research published daily along with an abundance of tutorials, codebases and other online resources. In order to learn this dynamic field or stay up-to-date on the latest research, students as well as educators and researchers must constantly sift through multiple sources to find valuable, relevant information. To address this situation, we introduce TutorialBank, a new, publicly available dataset which aims to facilitate NLP education and research. We have manually collected and categorized over 5,600 resources on NLP as well as the related fields of Artificial Intelligence (AI), Machine Learning (ML) and Information Retrieval (IR). Our dataset is notably the largest manually-picked corpus of resources intended for NLP education which does not include only academic papers. Additionally, we have created both a search engine and a command-line tool for the resources and have annotated the corpus to include lists of research topics, relevant resources for each topic, prerequisite relations among topics, relevant sub-parts of individual resources, among other annotations. We are releasing the dataset and present several avenues for further research.</abstract>
       <url hash="494bf057">P18-1057</url>
@@ -846,7 +846,7 @@
       <author><first>Furu</first><last>Wei</last></author>
       <author><first>Shaohan</first><last>Huang</last></author>
       <author><first>Ming</first><last>Zhou</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <pages>654–663</pages>
       <abstract>Sentence scoring and sentence selection are two main steps in extractive document summarization systems. However, previous works treat them as two separated subtasks. In this paper, we present a novel end-to-end neural network framework for extractive document summarization by jointly learning to score and select sentences. It first reads the document sentences with a hierarchical encoder to obtain the representation of sentences. Then it builds the output summary by extracting sentences one by one. Different from previous methods, our approach integrates the selection strategy into the scoring model, which directly predicts the relative importance given previously selected sentences. Experiments on the CNN/Daily Mail dataset show that the proposed framework significantly outperforms the state-of-the-art extractive summarization models.</abstract>
       <url hash="f71f4903">P18-1061</url>
@@ -906,9 +906,9 @@
     </paper>
     <paper id="66">
       <title>Mining Cross-Cultural Differences and Similarities in Social Media</title>
-      <author><first>Bill Yuchen</first><last>Lin</last></author>
-      <author><first>Frank F.</first><last>Xu</last></author>
-      <author><first>Kenny</first><last>Zhu</last></author>
+      <author id="bill-yuchen-lin"><first>Bill Yuchen</first><last>Lin</last></author>
+      <author id="frank-f-xu"><first>Frank F.</first><last>Xu</last></author>
+      <author id="kenny-zhu"><first>Kenny</first><last>Zhu</last></author>
       <author><first>Seung-won</first><last>Hwang</last></author>
       <pages>709–719</pages>
       <abstract>Cross-cultural differences and similarities are common in cross-lingual natural language understanding, especially for research in social media. For instance, people of distinct cultures often hold different opinions on a single named entity. Also, understanding slang terms across languages requires knowledge of cross-cultural similarities. In this paper, we study the problem of computing such cross-cultural differences and similarities. We present a lightweight yet effective approach, and evaluate it on two novel tasks: 1) mining cross-cultural differences of named entities and 2) finding similar terms for slang across languages. Experimental results show that our framework substantially outperforms a number of baseline methods on both tasks. The framework could be useful for machine translation applications and research in computational social science.</abstract>
@@ -919,7 +919,7 @@
     </paper>
     <paper id="67">
       <title>Classification of Moral Foundations in Microblog Political Discourse</title>
-      <author><first>Kristen</first><last>Johnson</last></author>
+      <author id="kristen-johnson"><first>Kristen</first><last>Johnson</last></author>
       <author><first>Dan</first><last>Goldwasser</last></author>
       <pages>720–730</pages>
       <abstract>Previous works in computer science, as well as political and social science, have shown correlation in text between political ideologies and the moral foundations expressed within that text. Additional work has shown that policy frames, which are used by politicians to bias the public towards their stance on an issue, are also correlated with political ideology. Based on these associations, this work takes a first step towards modeling both the language and how politicians frame issues on Twitter, in order to predict the moral foundations that are used by politicians to express their stances on issues. The contributions of this work includes a dataset annotated for the moral foundations, annotation guidelines, and probabilistic graphical models which show the usefulness of jointly modeling abstract political slogans, as opposed to the unigrams of previous works, with policy frames for the prediction of the morality underlying political tweets.</abstract>
@@ -980,7 +980,7 @@
     </paper>
     <paper id="72">
       <title>On the Limitations of Unsupervised Bilingual Dictionary Induction</title>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <author><first>Sebastian</first><last>Ruder</last></author>
       <author><first>Ivan</first><last>Vulić</last></author>
       <pages>778–788</pages>
@@ -994,8 +994,8 @@
     <paper id="73">
       <title>A robust self-learning method for fully unsupervised cross-lingual mappings of word embeddings</title>
       <author><first>Mikel</first><last>Artetxe</last></author>
-      <author><first>Gorka</first><last>Labaka</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="gorka-labaka"><first>Gorka</first><last>Labaka</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <pages>789–798</pages>
       <abstract>Recent work has managed to learn cross-lingual word embeddings without parallel data by mapping monolingual embeddings to a shared space through adversarial training. However, their evaluation has focused on favorable conditions, using comparable corpora or closely-related languages, and we show that they often fail in more realistic scenarios. This work proposes an alternative approach based on a fully unsupervised initialization that explicitly exploits the structural similarity of the embeddings, and a robust self-learning algorithm that iteratively improves this solution. Our method succeeds in all tested scenarios and obtains the best published results in standard datasets, even surpassing previous supervised systems. Our implementation is released as an open source project at <url>https://github.com/artetxem/vecmap</url>.</abstract>
       <url hash="2c9893fe">P18-1073</url>
@@ -1022,8 +1022,8 @@
       <title>Two Methods for Domain Adaptation of Bilingual Tasks: Delightfully Simple and Broadly Applicable</title>
       <author><first>Viktor</first><last>Hangya</last></author>
       <author><first>Fabienne</first><last>Braune</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>810–820</pages>
       <abstract>Bilingual tasks, such as bilingual lexicon induction and cross-lingual classification, are crucial for overcoming data sparsity in the target language. Resources required for such tasks are often out-of-domain, thus domain adaptation is an important problem here. We make two contributions. First, we test a delightfully simple method for domain adaptation of bilingual word embeddings. We evaluate these embeddings on two bilingual tasks involving different domains: cross-lingual twitter sentiment classification and medical bilingual lexicon induction. Second, we tailor a broadly applicable semi-supervised classification method from computer vision to these tasks. We show that this method also helps in low-resource setups. Using both methods together we achieve large improvements over our baselines, by using only additional unlabeled data.</abstract>
       <url hash="9748ac96">P18-1075</url>
@@ -1071,15 +1071,15 @@
     </paper>
     <paper id="79">
       <title>Semantically Equivalent Adversarial Rules for Debugging <fixed-case>NLP</fixed-case> models</title>
-      <author><first>Marco Tulio</first><last>Ribeiro</last></author>
+      <author id="marco-tulio-ribeiro"><first>Marco Tulio</first><last>Ribeiro</last></author>
       <author><first>Sameer</first><last>Singh</last></author>
       <author><first>Carlos</first><last>Guestrin</last></author>
       <pages>856–865</pages>
       <abstract>Complex machine learning models for NLP are often brittle, making different predictions for input instances that are extremely similar semantically. To automatically detect this behavior for individual instances, we present semantically equivalent adversaries (SEAs) – semantic-preserving perturbations that induce changes in the model’s predictions. We generalize these adversaries into semantically equivalent adversarial rules (SEARs) – simple, universal replacement rules that induce adversaries on many instances. We demonstrate the usefulness and flexibility of SEAs and SEARs by detecting bugs in black-box state-of-the-art models for three domains: machine comprehension, visual question-answering, and sentiment analysis. Via user studies, we demonstrate that we generate high-quality local adversaries for more instances than humans, and that SEARs induce four times as many mistakes as the bugs discovered by human experts. SEARs are also actionable: retraining models using data augmentation significantly reduces bugs, while maintaining accuracy.</abstract>
       <url hash="c0777e5b">P18-1079</url>
       <attachment type="note" hash="53e951a8">P18-1079.Notes.pdf</attachment>
-      <video href="P18-1079.mp4"/>
       <attachment type="presentation" hash="e54b2174">P18-1079.Presentation.pdf</attachment>
+      <video href="P18-1079.mp4"/>
       <doi>10.18653/v1/P18-1079</doi>
       <bibkey>ribeiro-etal-2018-semantically</bibkey>
     </paper>
@@ -1088,7 +1088,7 @@
       <author><first>Shrimai</first><last>Prabhumoye</last></author>
       <author><first>Yulia</first><last>Tsvetkov</last></author>
       <author><first>Ruslan</first><last>Salakhutdinov</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <pages>866–876</pages>
       <abstract>Style transfer is the task of rephrasing the text to contain specific stylistic properties without changing the intent or affect within the context. This paper introduces a new method for automatic style transfer. We first learn a latent representation of the input sentence which is grounded in a language translation model in order to better preserve the meaning of the sentence while reducing stylistic properties. Then adversarial generation techniques are used to make the output match the desired style. We evaluate this technique on three different style transformations: sentiment, gender and political slant. Compared to two state-of-the-art style transfer modeling techniques we show improvements both in automatic evaluation of style transfer and in manual evaluation of meaning preservation and fluency.</abstract>
       <url hash="2cc39ade">P18-1080</url>
@@ -1153,7 +1153,7 @@
     </paper>
     <paper id="85">
       <title>Illustrative Language Understanding: Large-Scale Visual Grounding with Image Search</title>
-      <author><first>Jamie</first><last>Kiros</last></author>
+      <author id="jamie-kiros"><first>Jamie</first><last>Kiros</last></author>
       <author><first>William</first><last>Chan</last></author>
       <author><first>Geoffrey</first><last>Hinton</last></author>
       <pages>922–933</pages>
@@ -1167,7 +1167,7 @@
       <title>What Action Causes This? Towards Naive Physical Action-Effect Prediction</title>
       <author><first>Qiaozi</first><last>Gao</last></author>
       <author><first>Shaohua</first><last>Yang</last></author>
-      <author><first>Joyce</first><last>Chai</last></author>
+      <author id="joyce-chai"><first>Joyce</first><last>Chai</last></author>
       <author><first>Lucy</first><last>Vanderwende</last></author>
       <pages>934–945</pages>
       <abstract>Despite recent advances in knowledge representation, automated reasoning, and machine learning, artificial agents still lack the ability to understand basic action-effect relations regarding the physical world, for example, the action of cutting a cucumber most likely leads to the state where the cucumber is broken apart into smaller pieces. If artificial agents (e.g., robots) ever become our partners in joint tasks, it is critical to empower them with such action-effect understanding so that they can reason about the state of the world and plan for actions. Towards this goal, this paper introduces a new task on naive physical action-effect prediction, which addresses the relations between concrete actions (expressed in the form of verb-noun pairs) and their effects on the state of the physical world as depicted by images. We collected a dataset for this task and developed an approach that harnesses web image data through distant supervision to facilitate learning for action-effect prediction. Our empirical results have shown that web data can be used to complement a small number of seed examples (e.g., three examples for each action) for model learning. This opens up possibilities for agents to learn physical action-effect relations for tasks at hand through communication with humans with a few examples.</abstract>
@@ -1208,8 +1208,8 @@
     <paper id="89">
       <title>Identifying Transferable Information Across Domains for Cross-domain Sentiment Classification</title>
       <author><first>Raksha</first><last>Sharma</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
-      <author><first>Sandipan</first><last>Dandapat</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="sandipan-dandapat"><first>Sandipan</first><last>Dandapat</last></author>
       <author><first>Himanshu Sharad</first><last>Bhatt</last></author>
       <pages>968–978</pages>
       <abstract>Getting manually labeled data in each domain is always an expensive and a time consuming task. Cross-domain sentiment analysis has emerged as a demanding concept where a labeled source domain facilitates a sentiment classifier for an unlabeled target domain. However, polarity orientation (positive or negative) and the significance of a word to express an opinion often differ from one domain to another domain. Owing to these differences, cross-domain sentiment classification is still a challenging task. In this paper, we propose that words that do not change their polarity and significance represent the transferable (usable) information across domains for cross-domain sentiment classification. We present a novel approach based on χ2 test and cosine-similarity between context vector of words to identify polarity preserving significant words across domains. Furthermore, we show that a weighted ensemble of the classifiers enhances the cross-domain classification performance.</abstract>
@@ -1255,7 +1255,7 @@
     <paper id="92">
       <title>Working Memory Networks: Augmenting Memory Networks with a Relational Reasoning Module</title>
       <author><first>Juan</first><last>Pavez</last></author>
-      <author><first>Héctor</first><last>Allende</last></author>
+      <author id="hector-allende-cid"><first>Héctor</first><last>Allende</last></author>
       <author><first>Héctor</first><last>Allende-Cid</last></author>
       <pages>1000–1009</pages>
       <abstract>During the last years, there has been a lot of interest in achieving some kind of complex reasoning using deep neural networks. To do that, models like Memory Networks (MemNNs) have combined external memory storages and attention mechanisms. These architectures, however, lack of more complex reasoning mechanisms that could allow, for instance, relational reasoning. Relation Networks (RNs), on the other hand, have shown outstanding results in relational reasoning tasks. Unfortunately, their computational cost grows quadratically with the number of memories, something prohibitive for larger problems. To solve these issues, we introduce the Working Memory Network, a MemNN architecture with a novel working memory storage and reasoning module. Our model retains the relational reasoning abilities of the RN while reducing its computational complexity from quadratic to linear. We tested our model on the text QA dataset bAbI and the visual QA dataset NLVR. In the jointly trained bAbI-10k, we set a new state-of-the-art, achieving a mean error of less than 0.5%. Moreover, a simple ensemble of two of our models solves all 20 tasks in the joint version of the benchmark.</abstract>
@@ -1268,7 +1268,7 @@
     <paper id="93">
       <title>Reasoning with Sarcasm by Reading In-Between</title>
       <author><first>Yi</first><last>Tay</last></author>
-      <author><first>Anh Tuan</first><last>Luu</last></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last></author>
       <author><first>Siu Cheung</first><last>Hui</last></author>
       <author><first>Jian</first><last>Su</last></author>
       <pages>1010–1020</pages>
@@ -1308,7 +1308,7 @@
     <paper id="96">
       <title>Strong Baselines for Neural Semi-Supervised Learning under Domain Shift</title>
       <author><first>Sebastian</first><last>Ruder</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>1044–1054</pages>
       <abstract>Novel neural models have been proposed in recent years for learning under domain shift. Most models, however, only evaluate on a single task, on proprietary datasets, or compare to weak baselines, which makes comparison of models difficult. In this paper, we re-evaluate classic general-purpose bootstrapping approaches in the context of neural networks under domain shifts vs. recent neural approaches and propose a novel multi-task tri-training method that reduces the time and space complexity of classic tri-training. Extensive experiments on two benchmarks for part-of-speech tagging and sentiment analysis are negative: while our novel method establishes a new state-of-the-art for sentiment analysis, it does not fare consistently the best. More importantly, we arrive at the somewhat surprising conclusion that classic tri-training, with some additions, outperforms the state-of-the-art for NLP. Hence classic approaches constitute an important and strong baseline.</abstract>
       <url hash="31a011c8">P18-1096</url>
@@ -1333,7 +1333,7 @@
     <paper id="98">
       <title>A Neural Architecture for Automated <fixed-case>ICD</fixed-case> Coding</title>
       <author><first>Pengtao</first><last>Xie</last></author>
-      <author><first>Eric</first><last>Xing</last></author>
+      <author id="eric-xing"><first>Eric</first><last>Xing</last></author>
       <pages>1066–1076</pages>
       <abstract>The International Classification of Diseases (ICD) provides a hierarchy of diagnostic codes for classifying diseases. Medical coding – which assigns a subset of ICD codes to a patient visit – is a mandatory process that is crucial for patient care and billing. Manual coding is time-consuming, expensive, and error prone. In this paper, we build a neural architecture for automated coding. It takes the diagnosis descriptions (DDs) of a patient as inputs and selects the most relevant ICD codes. This architecture contains four major ingredients: (1) tree-of-sequences LSTM encoding of code descriptions (CDs), (2) adversarial learning for reconciling the different writing styles of DDs and CDs, (3) isotonic constraints for incorporating the importance order among the assigned codes, and (4) attentional matching for performing many-to-one and one-to-many mappings from DDs to CDs. We demonstrate the effectiveness of the proposed methods on a clinical datasets with 59K patient visits.</abstract>
       <url hash="f270be80">P18-1098</url>
@@ -1344,7 +1344,7 @@
     <paper id="99">
       <title>Domain Adaptation with Adversarial Training and Graph Embeddings</title>
       <author><first>Firoj</first><last>Alam</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <author><first>Muhammad</first><last>Imran</last></author>
       <pages>1077–1087</pages>
       <abstract>The success of deep neural networks (DNNs) is heavily dependent on the availability of labeled data. However, obtaining labeled data is a big challenge in many real-world problems. In such scenarios, a DNN model can leverage labeled and unlabeled data from a related domain, but it has to deal with the shift in data distributions between the source and the target domains. In this paper, we study the problem of classifying social media posts during a crisis event (e.g., Earthquake). For that, we use labeled and unlabeled data from past similar events (e.g., Flood) and unlabeled data for the current event. We propose a novel model that performs adversarial learning based domain adaptation to deal with distribution drifts and graph based semi-supervised learning to leverage unlabeled data within a single unified deep learning framework. Our experiments with two real-world crisis datasets collected from Twitter demonstrate significant improvements over several baselines.</abstract>
@@ -1389,7 +1389,7 @@
       <author><first>Yixing</first><last>Fan</last></author>
       <author><first>Yanyan</first><last>Lan</last></author>
       <author><first>Jun</first><last>Xu</last></author>
-      <author><first>Xueqi</first><last>Cheng</last></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last></author>
       <pages>1108–1117</pages>
       <abstract>In conversation, a general response (e.g., “I don’t know”) could correspond to a large variety of input utterances. Previous generative conversational models usually employ a single model to learn the relationship between different utterance-response pairs, thus tend to favor general and trivial responses which appear frequently. To address this problem, we propose a novel controlled response generation mechanism to handle different utterance-response relationships in terms of specificity. Specifically, we introduce an explicit specificity control variable into a sequence-to-sequence model, which interacts with the usage representation of words through a Gaussian Kernel layer, to guide the model to generate responses at different specificity levels. We describe two ways to acquire distant labels for the specificity control variable in learning. Empirical studies show that our model can significantly outperform the state-of-the-art response generation models under both automatic and human evaluations.</abstract>
       <url hash="a1037a36">P18-1102</url>
@@ -1405,15 +1405,15 @@
       <author><first>Daxiang</first><last>Dong</last></author>
       <author><first>Yi</first><last>Liu</last></author>
       <author><first>Ying</first><last>Chen</last></author>
-      <author><first>Wayne Xin</first><last>Zhao</last></author>
+      <author id="wayne-xin-zhao"><first>Wayne Xin</first><last>Zhao</last></author>
       <author><first>Dianhai</first><last>Yu</last></author>
       <author><first>Hua</first><last>Wu</last></author>
       <pages>1118–1127</pages>
       <abstract>Human generates responses relying on semantic and functional dependencies, including coreference relation, among dialogue elements and their context. In this paper, we investigate matching a response with its multi-turn context using dependency information based entirely on attention. Our solution is inspired by the recently proposed Transformer in machine translation (Vaswani et al., 2017) and we extend the attention mechanism in two ways. First, we construct representations of text segments at different granularities solely with stacked self-attention. Second, we try to extract the truly matched segment pairs with attention across the context and response. We jointly introduce those two kinds of attention in one uniform neural network. Experiments on two large-scale multi-turn response selection tasks show that our proposed model significantly outperforms the state-of-the-art models.</abstract>
       <url hash="500e4365">P18-1103</url>
       <attachment type="software" hash="dd1ac09a">P18-1103.Software.zip</attachment>
-      <video href="P18-1103.mp4"/>
       <attachment type="presentation" hash="4eaacffc">P18-1103.Presentation.pdf</attachment>
+      <video href="P18-1103.mp4"/>
       <doi>10.18653/v1/P18-1103</doi>
       <bibkey>zhou-etal-2018-multi</bibkey>
     </paper>
@@ -1432,13 +1432,13 @@
     <paper id="105">
       <title><fixed-case>T</fixed-case>aylor’s law for Human Linguistic Sequences</title>
       <author><first>Tatsuru</first><last>Kobayashi</last></author>
-      <author><first>Kumiko</first><last>Tanaka-Ishii</last></author>
+      <author id="kumiko-tanaka-ishii"><first>Kumiko</first><last>Tanaka-Ishii</last></author>
       <pages>1138–1148</pages>
       <abstract>Taylor’s law describes the fluctuation characteristics underlying a system in which the variance of an event within a time span grows by a power law with respect to the mean. Although Taylor’s law has been applied in many natural and social systems, its application for language has been scarce. This article describes a new way to quantify Taylor’s law in natural language and conducts Taylor analysis of over 1100 texts across 14 languages. We found that the Taylor exponents of natural language written texts exhibit almost the same value. The exponent was also compared for other language-related data, such as the child-directed speech, music, and programming languages. The results show how the Taylor exponent serves to quantify the fundamental structural complexity underlying linguistic time series. The article also shows the applicability of these findings in evaluating language models.</abstract>
       <url hash="2d6c9cc2">P18-1105</url>
       <attachment type="note" hash="70df51c2">P18-1105.Notes.pdf</attachment>
-      <video href="P18-1105.mp4"/>
       <attachment type="presentation" hash="317c0a34">P18-1105.Presentation.pdf</attachment>
+      <video href="P18-1105.mp4"/>
       <doi>10.18653/v1/P18-1105</doi>
       <bibkey>kobayashi-tanaka-ishii-2018-taylors</bibkey>
     </paper>
@@ -1499,7 +1499,7 @@
     <paper id="110">
       <title>Extending a Parser to Distant Domains Using a Few Dozen Partially Annotated Examples</title>
       <author><first>Vidur</first><last>Joshi</last></author>
-      <author><first>Matthew</first><last>Peters</last></author>
+      <author id="matthew-e-peters"><first>Matthew</first><last>Peters</last></author>
       <author><first>Mark</first><last>Hopkins</last></author>
       <pages>1190–1199</pages>
       <abstract>We revisit domain adaptation for parsers in the neural era. First we show that recent advances in word representations greatly diminish the need for domain adaptation when the target domain is syntactically similar to the source domain. As evidence, we train a parser on the Wall Street Journal alone that achieves over 90% F1 on the Brown corpus. For more syntactically distant domains, we provide a simple way to adapt a parser using only dozens of partial annotations. For instance, we increase the percentage of error-free geometry-domain parses in a held-out set from 45% to 73% using approximately five dozen training examples. In the process, we demonstrate a new state-of-the-art single model result on the Wall Street Journal test set of 94.3%. This is an absolute increase of 1.7% over the previous state-of-the-art of 92.6%.</abstract>
@@ -1565,7 +1565,7 @@
       <title>A Stochastic Decoder for Neural Machine Translation</title>
       <author><first>Philip</first><last>Schulz</last></author>
       <author><first>Wilker</first><last>Aziz</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>1243–1252</pages>
       <abstract>The process of translation is ambiguous, in that there are typically many valid translations for a given sentence. This gives rise to significant variation in parallel corpora, however, most current models of machine translation do not account for this variation, instead treating the problem as a deterministic process. To this end, we present a deep generative model of machine translation which incorporates a chain of latent variables, in order to account for local lexical and syntactic variation in parallel corpora. We provide an in-depth analysis of the pitfalls encountered in variational inference for training deep generative models. Experiments on several different language pairs demonstrate that the model consistently improves over strong baselines.</abstract>
       <url hash="6a159e01">P18-1115</url>
@@ -1579,8 +1579,8 @@
       <author><first>Chunpeng</first><last>Ma</last></author>
       <author><first>Akihiro</first><last>Tamura</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>1253–1263</pages>
       <abstract>Tree-based neural machine translation (NMT) approaches, although achieved impressive performance, suffer from a major drawback: they only use the 1-best parse tree to direct the translation, which potentially introduces translation mistakes due to parsing errors. For statistical machine translation (SMT), forest-based methods have been proven to be effective for solving this problem, while for NMT this kind of approach has not been attempted. This paper proposes a forest-based NMT method that translates a linearized packed forest under a simple sequence-to-sequence framework (i.e., a forest-to-sequence NMT model). The BLEU score of the proposed method is higher than that of the sequence-to-sequence NMT, tree-based NMT, and forest-based SMT systems.</abstract>
       <url hash="7acfdfbb">P18-1116</url>
@@ -1609,7 +1609,7 @@
     <paper id="118">
       <title>Document Context Neural Machine Translation with Memory Networks</title>
       <author><first>Sameen</first><last>Maruf</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>1275–1284</pages>
       <abstract>We present a document-level neural machine translation model which takes both source and target document context into account using memory networks. We model the problem as a structured prediction problem with interdependencies among the observed and hidden variables, i.e., the source sentences and their unobserved target translations in the document. The resulting structured prediction problem is tackled with a neural translation model equipped with two memory components, one each for the source and target side, to capture the documental interdependencies. We train the model end-to-end, and propose an iterative decoding algorithm based on block coordinate descent. Experimental results of English translations from French, German, and Estonian documents show that our model is effective in exploiting both source and target document context, and statistically significantly outperforms the previous work in terms of BLEU and METEOR.</abstract>
       <url hash="d06ccb47">P18-1118</url>
@@ -1635,7 +1635,7 @@
     <paper id="120">
       <title>Learning Prototypical Goal Activities for Locations</title>
       <author><first>Tianyu</first><last>Jiang</last></author>
-      <author><first>Ellen</first><last>Riloff</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
       <pages>1297–1307</pages>
       <abstract>People go to different places to engage in activities that reflect their goals. For example, people go to restaurants to eat, libraries to study, and churches to pray. We refer to an activity that represents a common reason why people typically go to a location as a prototypical goal activity (goal-act). Our research aims to learn goal-acts for specific locations using a text corpus and semi-supervised learning. First, we extract activities and locations that co-occur in goal-oriented syntactic patterns. Next, we create an activity profile matrix and apply a semi-supervised label propagation algorithm to iteratively revise the activity strengths for different locations using a small set of labeled data. We show that this approach outperforms several baseline methods when judged against goal-acts identified by human annotators.</abstract>
       <url hash="eabdfcec">P18-1120</url>
@@ -1675,7 +1675,7 @@
       <author><first>Gaurav</first><last>Pandey</last></author>
       <author><first>Danish</first><last>Contractor</last></author>
       <author><first>Vineet</first><last>Kumar</last></author>
-      <author><first>Sachindra</first><last>Joshi</last></author>
+      <author id="sachindra-joshi"><first>Sachindra</first><last>Joshi</last></author>
       <pages>1329–1338</pages>
       <abstract>In this paper we present the Exemplar Encoder-Decoder network (EED), a novel conversation model that learns to utilize <i>similar</i> examples from training data to generate responses. Similar conversation examples (context-response pairs) from training data are retrieved using a traditional TF-IDF based retrieval model and the corresponding responses are used by our decoder to generate the ground truth response. The contribution of each retrieved response is weighed by the similarity of corresponding context with the input context. As a result, our model learns to assign higher similarity scores to those retrieved contexts whose responses are crucial for generating the final response. We present detailed experiments on two large data sets and we find that our method out-performs state of the art sequence to sequence generative models on several recently proposed evaluation metrics.</abstract>
       <url hash="281a66d6">P18-1123</url>
@@ -1717,13 +1717,13 @@
     <paper id="126">
       <title>Are <fixed-case>BLEU</fixed-case> and Meaning Representation in Opposition?</title>
       <author><first>Ondřej</first><last>Cífka</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>1362–1371</pages>
       <abstract>One of possible ways of obtaining continuous-space sentence representations is by training neural machine translation (NMT) systems. The recent attention mechanism however removes the single point in the neural network from which the source sentence representation can be extracted. We propose several variations of the attentive NMT architecture bringing this meeting point back. Empirical evaluation suggests that the better the translation quality, the worse the learned sentence representations serve in a wide range of classification and similarity tasks.</abstract>
       <url hash="716d3b10">P18-1126</url>
       <attachment type="note" hash="e9e3aa09">P18-1126.Notes.pdf</attachment>
-      <video href="P18-1126.mp4"/>
       <attachment type="presentation" hash="ad3c314b">P18-1126.Presentation.pdf</attachment>
+      <video href="P18-1126.mp4"/>
       <doi>10.18653/v1/P18-1126</doi>
       <bibkey>cifka-bojar-2018-bleu</bibkey>
     </paper>
@@ -1775,13 +1775,13 @@
       <author><first>Jingzhou</first><last>Liu</last></author>
       <author><first>Nanyun</first><last>Peng</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>1403–1414</pages>
       <abstract>We introduce a novel architecture for dependency parsing: stack-pointer networks (StackPtr). Combining pointer networks (Vinyals et al., 2015) with an internal stack, the proposed model first reads and encodes the whole sentence, then builds the dependency tree top-down (from root-to-leaf) in a depth-first fashion. The stack tracks the status of the depth-first search and the pointer networks select one child for the word at the top of the stack at each step. The StackPtr parser benefits from the information of whole sentence and all previously derived subtree structures, and removes the left-to-right restriction in classical transition-based parsers. Yet the number of steps for building any (non-projective) parse tree is linear in the length of the sentence just as other transition-based parsers, yielding an efficient decoding algorithm with <tex-math>O(n^2)</tex-math> time complexity. We evaluate our model on 29 treebanks spanning 20 languages and different dependency annotation schemas, and achieve state-of-the-art performances on 21 of them</abstract>
       <url hash="d8e9fb26">P18-1130</url>
       <attachment type="note" hash="d32d7eab">P18-1130.Notes.zip</attachment>
-      <video href="P18-1130.mp4"/>
       <attachment type="presentation" hash="71eaa359">P18-1130.Presentation.pdf</attachment>
+      <video href="P18-1130.mp4"/>
       <doi>10.18653/v1/P18-1130</doi>
       <bibkey>ma-etal-2018-stack</bibkey>
     </paper>
@@ -1800,11 +1800,11 @@
     <paper id="132">
       <title><fixed-case>LSTM</fixed-case>s Can Learn Syntax-Sensitive Dependencies Well, But Modeling Structure Makes Them Better</title>
       <author><first>Adhiguna</first><last>Kuncoro</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>John</first><last>Hale</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="john-hale"><first>John</first><last>Hale</last></author>
       <author><first>Dani</first><last>Yogatama</last></author>
       <author><first>Stephen</first><last>Clark</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
       <pages>1426–1436</pages>
       <abstract>Language exhibits hierarchical structure, but recent work using a subject-verb agreement diagnostic argued that state-of-the-art language models, LSTMs, fail to learn long-range syntax sensitive dependencies. Using the same diagnostic, we show that, in fact, LSTMs do succeed in learning such dependencies—provided they have enough capacity. We then explore whether models that have access to explicit syntactic information learn agreement more effectively, and how the way in which this structural information is incorporated into the model impacts performance. We find that the mere presence of syntactic information does not improve accuracy, but when model architecture is determined by syntax, number agreement is improved. Further, we find that the choice of how syntactic structure is built affects how well number agreement is learned: top-down construction outperforms left-corner and bottom-up variants in capturing non-local structural dependencies.</abstract>
       <url hash="c089094b">P18-1132</url>
@@ -1869,7 +1869,7 @@
       <author><first>Yanyan</first><last>Lan</last></author>
       <author><first>Jiafeng</first><last>Guo</last></author>
       <author><first>Jun</first><last>Xu</last></author>
-      <author><first>Xueqi</first><last>Cheng</last></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last></author>
       <pages>1479–1488</pages>
       <abstract>Sequence to sequence (Seq2Seq) models have been widely used for response generation in the area of conversation. However, the requirements for different conversation scenarios are distinct. For example, customer service requires the generated responses to be specific and accurate, while chatbot prefers diverse responses so as to attract different users. The current Seq2Seq model fails to meet these diverse requirements, by using a general average likelihood as the optimization criteria. As a result, it usually generates safe and commonplace responses, such as ‘I don’t know’. In this paper, we propose two tailored optimization criteria for Seq2Seq to different conversation scenarios, i.e., the maximum generated likelihood for specific-requirement scenario, and the conditional value-at-risk for diverse-requirement scenario. Experimental results on the Ubuntu dialogue corpus (Ubuntu service scenario) and Chinese Weibo dataset (social chatbot scenario) show that our proposed models not only satisfies diverse requirements for different scenarios, but also yields better performances against traditional Seq2Seq models in terms of both metric-based and human evaluations.</abstract>
       <url hash="6f641bd6">P18-1137</url>
@@ -1922,8 +1922,8 @@
       <author><first>Philipp</first><last>Dufter</last></author>
       <author><first>Mengjie</first><last>Zhao</last></author>
       <author><first>Martin</first><last>Schmitt</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>1520–1530</pages>
       <abstract>We present a new method for estimating vector space representations of words: embedding learning by concept induction. We test this method on a highly parallel corpus and learn semantic representations of words in 1259 different languages in a single common space. An extensive experimental evaluation on crosslingual word similarity and sentiment analysis indicates that concept-based multilingual embedding learning performs better than previous approaches.</abstract>
       <url hash="c51983ff">P18-1141</url>
@@ -1951,7 +1951,7 @@
       <author><first>Gayatri</first><last>Bhat</last></author>
       <author><first>Monojit</first><last>Choudhury</last></author>
       <author><first>Sunayana</first><last>Sitaram</last></author>
-      <author><first>Sandipan</first><last>Dandapat</last></author>
+      <author id="sandipan-dandapat"><first>Sandipan</first><last>Dandapat</last></author>
       <author><first>Kalika</first><last>Bali</last></author>
       <pages>1543–1553</pages>
       <abstract>Training language models for Code-mixed (CM) language is known to be a difficult problem because of lack of data compounded by the increased confusability due to the presence of more than one language. We present a computational technique for creation of grammatically valid artificial CM data based on the Equivalence Constraint Theory. We show that when training examples are sampled appropriately from this synthetic data and presented in certain order (aka training curriculum) along with monolingual and real CM data, it can significantly reduce the perplexity of an RNN-based language model. We also show that randomly generated CM data does not help in decreasing the perplexity of the LMs.</abstract>
@@ -1988,7 +1988,7 @@
       <title>Higher-order Relation Schema Induction using Tensor Factorization with Back-off and Aggregation</title>
       <author><first>Madhav</first><last>Nimishakavi</last></author>
       <author><first>Manish</first><last>Gupta</last></author>
-      <author><first>Partha</first><last>Talukdar</last></author>
+      <author id="partha-talukdar"><first>Partha</first><last>Talukdar</last></author>
       <pages>1575–1584</pages>
       <abstract>Relation Schema Induction (RSI) is the problem of identifying type signatures of arguments of relations from unlabeled text. Most of the previous work in this area have focused only on binary RSI, i.e., inducing only the subject and object type signatures per relation. However, in practice, many relations are high-order, i.e., they have more than two arguments and inducing type signatures of all arguments is necessary. For example, in the sports domain, inducing a schema win(WinningPlayer, OpponentPlayer, Tournament, Location) is more informative than inducing just win(WinningPlayer, OpponentPlayer). We refer to this problem as Higher-order Relation Schema Induction (HRSI). In this paper, we propose Tensor Factorization with Back-off and Aggregation (TFBA), a novel framework for the HRSI problem. To the best of our knowledge, this is the first attempt at inducing higher-order relation schemata from unlabeled text. Using the experimental analysis on three real world datasets we show how TFBA helps in dealing with sparsity and induce higher-order schemata.</abstract>
       <url hash="49af9813">P18-1146</url>
@@ -1998,8 +1998,8 @@
     </paper>
     <paper id="147">
       <title>Discovering Implicit Knowledge with Unary Relations</title>
-      <author><first>Michael</first><last>Glass</last></author>
-      <author><first>Alfio</first><last>Gliozzo</last></author>
+      <author id="michael-glass"><first>Michael</first><last>Glass</last></author>
+      <author id="alfio-gliozzo"><first>Alfio</first><last>Gliozzo</last></author>
       <pages>1585–1594</pages>
       <abstract>State-of-the-art relation extraction approaches are only able to recognize relationships between mentions of entity arguments stated explicitly in the text and typically localized to the same sentence. However, the vast majority of relations are either implicit or not sententially localized. This is a major problem for Knowledge Base Population, severely limiting recall. In this paper we propose a new methodology to identify relations between two entities, consisting of detecting a very large number of unary relations, and using them to infer missing entities. We describe a deep learning architecture able to learn thousands of such relations very efficiently by using a common deep learning based representation. Our approach largely outperforms state of the art relation extraction technology on a newly introduced web scale knowledge base population benchmark, that we release to the research community.</abstract>
       <url hash="503d3beb">P18-1147</url>
@@ -2023,7 +2023,7 @@
       <author><first>Shikhar</first><last>Vashishth</last></author>
       <author><first>Shib Sankar</first><last>Dasgupta</last></author>
       <author><first>Swayambhu Nath</first><last>Ray</last></author>
-      <author><first>Partha</first><last>Talukdar</last></author>
+      <author id="partha-talukdar"><first>Partha</first><last>Talukdar</last></author>
       <pages>1605–1615</pages>
       <abstract>Document date is essential for many important tasks, such as document retrieval, summarization, event detection, etc. While existing approaches for these tasks assume accurate knowledge of the document date, this is not always available, especially for arbitrary documents from the Web. Document Dating is a challenging problem which requires inference over the temporal structure of the document. Prior document dating systems have largely relied on handcrafted features while ignoring such document-internal structures. In this paper, we propose NeuralDater, a Graph Convolutional Network (GCN) based document dating approach which jointly exploits syntactic and temporal graph structures of document in a principled way. To the best of our knowledge, this is the first application of deep learning for the problem of document dating. Through extensive experiments on real-world datasets, we find that NeuralDater significantly outperforms state-of-the-art baseline by 19% absolute (45% relative) accuracy points.</abstract>
       <url hash="2b0827e7">P18-1149</url>
@@ -2046,7 +2046,7 @@
     </paper>
     <paper id="151">
       <title><fixed-case>GTR</fixed-case>-<fixed-case>LSTM</fixed-case>: A Triple Encoder for Sentence Generation from <fixed-case>RDF</fixed-case> Data</title>
-      <author><first>Bayu Distiawan</first><last>Trisedya</last></author>
+      <author id="bayu-distiawan"><first>Bayu Distiawan</first><last>Trisedya</last></author>
       <author><first>Jianzhong</first><last>Qi</last></author>
       <author><first>Rui</first><last>Zhang</last></author>
       <author><first>Wei</first><last>Wang</last></author>
@@ -2089,7 +2089,7 @@
       <title>Learning to Generate Move-by-Move Commentary for Chess Games from Large-Scale Social Forum Data</title>
       <author><first>Harsh</first><last>Jhamtani</last></author>
       <author><first>Varun</first><last>Gangal</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
       <author><first>Taylor</first><last>Berg-Kirkpatrick</last></author>
       <pages>1661–1671</pages>
@@ -2103,7 +2103,7 @@
       <title>From Credit Assignment to Entropy Regularization: Two New Algorithms for Neural Sequence Prediction</title>
       <author><first>Zihang</first><last>Dai</last></author>
       <author><first>Qizhe</first><last>Xie</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>1672–1682</pages>
       <abstract>In this work, we study the credit assignment problem in reward augmented maximum likelihood (RAML) learning, and establish a theoretical equivalence between the token-level counterpart of RAML and the entropy regularized reinforcement learning. Inspired by the connection, we propose two sequence prediction algorithms, one extending RAML with fine-grained credit assignment and the other improving Actor-Critic with a systematic entropy regularization. On two benchmark datasets, we show the proposed algorithms outperform RAML and Actor-Critic respectively, providing new alternatives to sequence prediction.</abstract>
       <url hash="29ae0821">P18-1155</url>
@@ -2116,7 +2116,7 @@
       <title><fixed-case>D</fixed-case>uo<fixed-case>RC</fixed-case>: Towards Complex Language Understanding with Paraphrased Reading Comprehension</title>
       <author><first>Amrita</first><last>Saha</last></author>
       <author><first>Rahul</first><last>Aralikatte</last></author>
-      <author><first>Mitesh M.</first><last>Khapra</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh M.</first><last>Khapra</last></author>
       <author><first>Karthik</first><last>Sankaranarayanan</last></author>
       <pages>1683–1693</pages>
       <abstract>We propose DuoRC, a novel dataset for Reading Comprehension (RC) that motivates several new challenges for neural approaches in language understanding beyond those offered by existing RC datasets. DuoRC contains 186,089 unique question-answer pairs created from a collection of 7680 pairs of movie plots where each pair in the collection reflects two versions of the same movie - one from Wikipedia and the other from IMDb - written by two different authors. We asked crowdsourced workers to create questions from one version of the plot and a different set of workers to extract or synthesize answers from the other version. This unique characteristic of DuoRC where questions and answers are created from different versions of a document narrating the same underlying story, ensures by design, that there is very little lexical overlap between the questions created from one version and the segments containing the answer in the other version. Further, since the two versions have different levels of plot detail, narration style, vocabulary, etc., answering questions from the second version requires deeper language understanding and incorporating external background knowledge. Additionally, the narrative style of passages arising from movie plots (as opposed to typical descriptive passages in existing datasets) exhibits the need to perform complex reasoning over events across multiple sentences. Indeed, we observe that state-of-the-art neural RC models which have achieved near human performance on the SQuAD dataset, even when coupled with traditional NLP techniques to address the challenges presented in DuoRC exhibit very poor performance (F1 score of 37.42% on DuoRC v/s 86% on SQuAD dataset). This opens up several interesting research avenues wherein DuoRC could complement other RC datasets to explore novel neural approaches for studying language understanding.</abstract>
@@ -2219,10 +2219,10 @@
     <paper id="164">
       <title>Attention Focusing for Neural Machine Translation by Bridging Source and Target Embeddings</title>
       <author><first>Shaohui</first><last>Kuang</last></author>
-      <author><first>Junhui</first><last>Li</last></author>
-      <author><first>António</first><last>Branco</last></author>
+      <author id="junhui-li"><first>Junhui</first><last>Li</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
       <author><first>Weihua</first><last>Luo</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <pages>1767–1776</pages>
       <abstract>In neural machine translation, a source sequence of words is encoded into a vector from which a target sequence is generated in the decoding phase. Differently from statistical machine translation, the associations between source words and their possible target counterparts are not explicitly stored. Source and target words are at the two ends of a long information processing procedure, mediated by hidden states at both the source encoding and the target decoding phases. This makes it possible that a source word is incorrectly translated into a target word that is not any of its admissible equivalent counterparts in the target language. In this paper, we seek to somewhat shorten the distance between source and target words in that procedure, and thus strengthen their association, by means of a method we term bridging source and target word embeddings. We experiment with three strategies: (1) a source-side bridging model, where source word embeddings are moved one step closer to the output target sequence; (2) a target-side bridging model, which explores the more relevant source word embeddings for the prediction of the target sequence; and (3) a direct bridging model, which directly connects source and target word embeddings seeking to minimize errors in the translation of ones by the others. Experiments and analysis presented in this paper demonstrate that the proposed bridging models are able to significantly improve quality of both sentence translation, in general, and alignment and translation of individual source words with target words, in particular.</abstract>
       <url hash="132a2c64">P18-1164</url>
@@ -2246,7 +2246,7 @@
     <paper id="166">
       <title>Accelerating Neural Transformer via an Average Attention Network</title>
       <author><first>Biao</first><last>Zhang</last></author>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Jinsong</first><last>Su</last></author>
       <pages>1789–1798</pages>
       <abstract>With parallelizable attention networks, the neural Transformer is very fast to train. However, due to the auto-regressive architecture and self-attention in the decoder, the decoding procedure becomes slow. To alleviate this issue, we propose an average attention network as an alternative to the self-attention network in the decoder of the neural Transformer. The average attention network consists of two layers, with an average layer that models dependencies on previous positions and a gating layer that is stacked over the average layer to enhance the expressiveness of the proposed attention network. We apply this network on the decoder part of the neural Transformer to replace the original target-side self-attention model. With masking tricks and dynamic programming, our model enables the neural Transformer to decode sentences over four times faster than its original version with almost no loss in training time and translation performance. We conduct a series of experiments on WMT17 translation tasks, where on 6 different language pairs, we obtain robust and consistent speed-ups in decoding.</abstract>
@@ -2341,13 +2341,13 @@
       <title>Backpropagating through Structured Argmax using a <fixed-case>SPIGOT</fixed-case></title>
       <author><first>Hao</first><last>Peng</last></author>
       <author><first>Sam</first><last>Thomson</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>1863–1873</pages>
       <abstract>We introduce structured projection of intermediate gradients (SPIGOT), a new method for backpropagating through neural networks that include hard-decision structured predictions (e.g., parsing) in intermediate layers. SPIGOT requires no marginal inference, unlike structured attention networks and reinforcement learning-inspired solutions. Like so-called straight-through estimators, SPIGOT defines gradient-like quantities associated with intermediate nondifferentiable operations, allowing backpropagation before and after them; SPIGOT’s proxy aims to ensure that, after a parameter update, the intermediate structure will remain well-formed. We experiment on two structured NLP pipelines: syntactic-then-semantic dependency parsing, and semantic parsing followed by sentiment classification. We show that training with SPIGOT leads to a larger improvement on the downstream task than a modularly-trained pipeline, the straight-through estimator, and structured attention, reaching a new state of the art on semantic dependency parsing.</abstract>
       <url hash="e1fc2bc1">P18-1173</url>
       <attachment type="note" hash="ec8041e5">P18-1173.Notes.pdf</attachment>
-      <video href="P18-1173.mp4"/>
       <attachment type="presentation" hash="bc2aed51">P18-1173.Presentation.pdf</attachment>
+      <video href="P18-1173.mp4"/>
       <doi>10.18653/v1/P18-1173</doi>
       <bibkey>peng-etal-2018-backpropagating</bibkey>
     </paper>
@@ -2355,7 +2355,7 @@
       <title>Learning How to Actively Learn: A Deep Imitation Learning Approach</title>
       <author><first>Ming</first><last>Liu</last></author>
       <author><first>Wray</first><last>Buntine</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>1874–1883</pages>
       <abstract>Heuristic-based active learning (AL) methods are limited when the data distribution of the underlying learning problems vary. We introduce a method that learns an AL “policy” using “imitation learning” (IL). Our IL-based approach makes use of an efficient and effective “algorithmic expert”, which provides the policy learner with good actions in the encountered AL situations. The AL strategy is then learned with a feedforward network, mapping situations to most informative query datapoints. We evaluate our method on two different tasks: text classification and named entity recognition. Experimental results show that our IL-based AL strategy is more effective than strong previous methods using heuristics and reinforcement learning.</abstract>
       <url hash="b7e43836">P18-1174</url>
@@ -2398,7 +2398,7 @@
     <paper id="177">
       <title>Harvesting Paragraph-level Question-Answer Pairs from <fixed-case>W</fixed-case>ikipedia</title>
       <author><first>Xinya</first><last>Du</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>1907–1917</pages>
       <abstract>We study the task of generating from Wikipedia articles question-answer pairs that cover content beyond a single sentence. We propose a neural network approach that incorporates coreference knowledge via a novel gating mechanism. As compared to models that only take into account sentence-level information (Heilman and Smith, 2010; Du et al., 2017; Zhou et al., 2017), we find that the linguistic knowledge introduced by the coreference representation aids question generation significantly, producing models that outperform the current state-of-the-art. We apply our system (composed of an answer span extraction system and the passage-level QG system) to the 10,000 top ranking Wikipedia articles and create a corpus of over one million question-answer pairs. We provide qualitative analysis for the this large-scale generated corpus from Wikipedia.</abstract>
       <url hash="ac441db1">P18-1177</url>
@@ -2428,14 +2428,14 @@
     <paper id="179">
       <title>Language Generation via <fixed-case>DAG</fixed-case> Transduction</title>
       <author><first>Yajie</first><last>Ye</last></author>
-      <author><first>Weiwei</first><last>Sun</last></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last></author>
       <author><first>Xiaojun</first><last>Wan</last></author>
       <pages>1928–1937</pages>
       <abstract>A DAG automaton is a formal device for manipulating graphs. By augmenting a DAG automaton with transduction rules, a DAG transducer has potential applications in fundamental NLP tasks. In this paper, we propose a novel DAG transducer to perform graph-to-program transformation. The target structure of our transducer is a program licensed by a declarative programming language rather than linguistic structures. By executing such a program, we can easily get a surface string. Our transducer is designed especially for natural language generation (NLG) from type-logical semantic graphs. Taking Elementary Dependency Structures, a format of English Resource Semantics, as input, our NLG system achieves a BLEU-4 score of 68.07. This remarkable result demonstrates the feasibility of applying a DAG transducer to resolve NLG, as well as the effectiveness of our design.</abstract>
       <url hash="079820d7">P18-1179</url>
       <attachment type="software" hash="dce33220">P18-1179.Software.zip</attachment>
-      <video href="P18-1179.mp4"/>
       <attachment type="presentation" hash="a251bae4">P18-1179.Presentation.pdf</attachment>
+      <video href="P18-1179.mp4"/>
       <doi>10.18653/v1/P18-1179</doi>
       <bibkey>ye-etal-2018-language</bibkey>
     </paper>
@@ -2455,26 +2455,26 @@
     <paper id="181">
       <title>Deep-speare: A joint neural model of poetic language, meter and rhyme</title>
       <author><first>Jey Han</first><last>Lau</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Julian</first><last>Brooke</last></author>
       <author><first>Adam</first><last>Hammond</last></author>
       <pages>1948–1958</pages>
       <abstract>In this paper, we propose a joint architecture that captures language, rhyme and meter for sonnet modelling. We assess the quality of generated poems using crowd and expert judgements. The stress and rhyme models perform very well, as generated poems are largely indistinguishable from human-written poems. Expert evaluation, however, reveals that a vanilla language model captures meter implicitly, and that machine-generated poems still underperform in terms of readability and emotion. Our research shows the importance expert evaluation for poetry generation, and that future research should look beyond rhyme/meter and focus on poetic language.</abstract>
       <url hash="454c161d">P18-1181</url>
       <attachment type="note" hash="1ed3ad8d">P18-1181.Notes.pdf</attachment>
-      <video href="P18-1181.mp4"/>
       <attachment type="presentation" hash="f3ed4c1b">P18-1181.Presentation.pdf</attachment>
+      <video href="P18-1181.mp4"/>
       <doi>10.18653/v1/P18-1181</doi>
       <bibkey>lau-etal-2018-deep</bibkey>
     </paper>
     <paper id="182">
       <title><fixed-case>N</fixed-case>eural<fixed-case>REG</fixed-case>: An end-to-end approach to referring expression generation</title>
-      <author><first>Thiago</first><last>Castro Ferreira</last></author>
+      <author id="thiago-castro-ferreira"><first>Thiago</first><last>Castro Ferreira</last></author>
       <author><first>Diego</first><last>Moussallem</last></author>
       <author><first>Ákos</first><last>Kádár</last></author>
       <author><first>Sander</first><last>Wubben</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <pages>1959–1969</pages>
       <abstract>Traditionally, Referring Expression Generation (REG) models first decide on the form and then on the content of references to discourse entities in text, typically relying on features such as salience and grammatical function. In this paper, we present a new approach (NeuralREG), relying on deep neural networks, which makes decisions about form and content in one go without explicit feature extraction. Using a delexicalized version of the WebNLG corpus, we show that the neural model substantially improves over two strong baselines.</abstract>
       <url hash="c2d580db">P18-1182</url>
@@ -2486,13 +2486,13 @@
     <paper id="183">
       <title>Stock Movement Prediction from Tweets and Historical Prices</title>
       <author><first>Yumo</first><last>Xu</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <pages>1970–1979</pages>
       <abstract>Stock movement prediction is a challenging problem: the market is highly stochastic, and we make temporally-dependent predictions from chaotic data. We treat these three complexities and present a novel deep generative model jointly exploiting text and price signals for this task. Unlike the case with discriminative or topic modeling, our model introduces recurrent, continuous latent variables for a better treatment of stochasticity, and uses neural variational inference to address the intractable posterior inference. We also provide a hybrid objective with temporal auxiliary to flexibly capture predictive dependencies. We demonstrate the state-of-the-art performance of our proposed model on a new stock movement prediction dataset which we collected.</abstract>
       <url hash="2955e628">P18-1183</url>
       <attachment type="note" hash="7aaf4b5b">P18-1183.Notes.pdf</attachment>
-      <video href="P18-1183.mp4"/>
       <attachment type="presentation" hash="8198efe9">P18-1183.Presentation.pdf</attachment>
+      <video href="P18-1183.mp4"/>
       <doi>10.18653/v1/P18-1183</doi>
       <bibkey>xu-cohen-2018-stock</bibkey>
     </paper>
@@ -2500,7 +2500,7 @@
       <title>Rumor Detection on <fixed-case>T</fixed-case>witter with Tree-structured Recursive Neural Networks</title>
       <author><first>Jing</first><last>Ma</last></author>
       <author><first>Wei</first><last>Gao</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <pages>1980–1989</pages>
       <abstract>Automatic rumor detection is technically very challenging. In this work, we try to learn discriminative features from tweets content by following their non-sequential propagation structure and generate more powerful representations for identifying different type of rumors. We propose two recursive neural models based on a bottom-up and a top-down tree-structured neural networks for rumor representation learning and classification, which naturally conform to the propagation layout of tweets. Results on two public Twitter datasets demonstrate that our recursive neural models 1) achieve much better performance than state-of-the-art approaches; 2) demonstrate superior capacity on detecting rumors at very early stage.</abstract>
       <url hash="a92a5852">P18-1184</url>
@@ -2513,7 +2513,7 @@
       <title>Visual Attention Model for Name Tagging in Multimodal Social Media</title>
       <author><first>Di</first><last>Lu</last></author>
       <author><first>Leonardo</first><last>Neves</last></author>
-      <author><first>Vitor</first><last>Carvalho</last></author>
+      <author id="vitor-carvalho"><first>Vitor</first><last>Carvalho</last></author>
       <author><first>Ning</first><last>Zhang</last></author>
       <author><first>Heng</first><last>Ji</last></author>
       <pages>1990–1999</pages>
@@ -2528,7 +2528,7 @@
       <title>Multimodal Named Entity Disambiguation for Noisy Social Media Posts</title>
       <author><first>Seungwhan</first><last>Moon</last></author>
       <author><first>Leonardo</first><last>Neves</last></author>
-      <author><first>Vitor</first><last>Carvalho</last></author>
+      <author id="vitor-carvalho"><first>Vitor</first><last>Carvalho</last></author>
       <pages>2000–2008</pages>
       <abstract>We introduce the new Multimodal Named Entity Disambiguation (MNED) task for multimodal social media posts such as Snapchat or Instagram captions, which are composed of short captions with accompanying images. Social media posts bring significant challenges for disambiguation tasks because 1) ambiguity not only comes from polysemous entities, but also from inconsistent or incomplete notations, 2) very limited context is provided with surrounding words, and 3) there are many emerging entities often unseen during training. To this end, we build a new dataset called SnapCaptionsKB, a collection of Snapchat image captions submitted to public and crowd-sourced stories, with named entity mentions fully annotated and linked to entities in an external knowledge base. We then build a deep zeroshot multimodal network for MNED that 1) extracts contexts from both text and image, and 2) predicts correct entity in the knowledge graph embeddings space, allowing for zeroshot disambiguation of entities unseen in training set as well. The proposed model significantly outperforms the state-of-the-art text-only NED models, showing efficacy and potentials of the MNED task.</abstract>
       <url hash="b8465ef6">P18-1186</url>
@@ -2539,14 +2539,14 @@
     <paper id="187">
       <title>Semi-supervised User Geolocation via Graph Convolutional Networks</title>
       <author><first>Afshin</first><last>Rahimi</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>2009–2019</pages>
       <abstract>Social media user geolocation is vital to many applications such as event detection. In this paper, we propose GCN, a multiview geolocation model based on Graph Convolutional Networks, that uses both text and network context. We compare GCN to the state-of-the-art, and to two baselines we propose, and show that our model achieves or is competitive with the state-of-the-art over three benchmark geolocation datasets when sufficient supervision is available. We also evaluate GCN under a minimal supervision scenario, and show it outperforms baselines. We find that highway network gates are essential for controlling the amount of useful neighbourhood expansion in GCN.</abstract>
       <url hash="844c0d8d">P18-1187</url>
       <attachment type="note" hash="cbc4c8cf">P18-1187.Notes.pdf</attachment>
-      <video href="P18-1187.mp4"/>
       <attachment type="presentation" hash="42c65759">P18-1187.Presentation.pdf</attachment>
+      <video href="P18-1187.mp4"/>
       <doi>10.18653/v1/P18-1187</doi>
       <bibkey>rahimi-etal-2018-semi</bibkey>
     </paper>
@@ -2555,7 +2555,7 @@
       <author><first>Shashi</first><last>Narayan</last></author>
       <author><first>Ronald</first><last>Cardenas</last></author>
       <author><first>Nikos</first><last>Papasarantopoulos</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <author><first>Mirella</first><last>Lapata</last></author>
       <author><first>Jiangsheng</first><last>Yu</last></author>
       <author><first>Yi</first><last>Chang</last></author>
@@ -2563,8 +2563,8 @@
       <abstract>Document modeling is essential to a variety of natural language understanding tasks. We propose to use external information to improve document modeling for problems that can be framed as sentence extraction. We develop a framework composed of a hierarchical document encoder and an attention-based extractor with attention over external information. We evaluate our model on extractive document summarization (where the external information is image captions and the title of the document) and answer selection (where the external information is a question). We show that our model consistently outperforms strong baselines, in terms of both informativeness and fluency (for CNN document summarization) and achieves state-of-the-art results for answer selection on WikiQA and NewsQA.</abstract>
       <url hash="28da2122">P18-1188</url>
       <attachment type="note" hash="68738463">P18-1188.Notes.pdf</attachment>
-      <video href="P18-1188.mp4"/>
       <attachment type="presentation" hash="1e204689">P18-1188.Presentation.pdf</attachment>
+      <video href="P18-1188.mp4"/>
       <doi>10.18653/v1/P18-1188</doi>
       <bibkey>narayan-etal-2018-document</bibkey>
       <erratum id="1" hash="8eecd4c3" date="2022-09-20">P18-1188e1</erratum>
@@ -2573,13 +2573,13 @@
       <title>Neural Models for Documents with Metadata</title>
       <author><first>Dallas</first><last>Card</last></author>
       <author><first>Chenhao</first><last>Tan</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>2031–2040</pages>
       <abstract>Most real-world document collections involve various types of metadata, such as author, source, and date, and yet the most commonly-used approaches to modeling text corpora ignore this information. While specialized models have been developed for particular applications, few are widely used in practice, as customization typically requires derivation of a custom inference algorithm. In this paper, we build on recent advances in variational inference methods and propose a general neural framework, based on topic models, to enable flexible incorporation of metadata and allow for rapid exploration of alternative models. Our approach achieves strong performance, with a manageable tradeoff between perplexity, coherence, and sparsity. Finally, we demonstrate the potential of our framework through an exploration of a corpus of articles about US immigration.</abstract>
       <url hash="97b9e84a">P18-1189</url>
       <attachment type="note" hash="b70cb502">P18-1189.Notes.pdf</attachment>
-      <video href="P18-1189.mp4"/>
       <attachment type="presentation" hash="eb197400">P18-1189.Presentation.pdf</attachment>
+      <video href="P18-1189.mp4"/>
       <doi>10.18653/v1/P18-1189</doi>
       <bibkey>card-etal-2018-neural</bibkey>
     </paper>
@@ -2605,13 +2605,13 @@
       <author><first>Nicholas</first><last>FitzGerald</last></author>
       <author><first>Julian</first><last>Michael</last></author>
       <author><first>Luheng</first><last>He</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>2051–2060</pages>
       <abstract>We present a new large-scale corpus of Question-Answer driven Semantic Role Labeling (QA-SRL) annotations, and the first high-quality QA-SRL parser. Our corpus, QA-SRL Bank 2.0, consists of over 250,000 question-answer pairs for over 64,000 sentences across 3 domains and was gathered with a new crowd-sourcing scheme that we show has high precision and good recall at modest cost. We also present neural models for two QA-SRL subtasks: detecting argument spans for a predicate and generating questions to label the semantic relationship. The best models achieve question accuracy of 82.6% and span-level accuracy of 77.6% (under human evaluation) on the full pipelined QA-SRL prediction task. They can also, as we show, be used to gather additional annotations at low cost.</abstract>
       <url hash="148c61b7">P18-1191</url>
       <attachment type="note" hash="5802ff60">P18-1191.Notes.pdf</attachment>
-      <video href="P18-1191.mp4"/>
       <attachment type="presentation" hash="80cb67c8">P18-1191.Presentation.pdf</attachment>
+      <video href="P18-1191.mp4"/>
       <doi>10.18653/v1/P18-1191</doi>
       <bibkey>fitzgerald-etal-2018-large</bibkey>
     </paper>
@@ -2661,14 +2661,14 @@
     <paper id="195">
       <title>Token-level and sequence-level loss smoothing for <fixed-case>RNN</fixed-case> language models</title>
       <author><first>Maha</first><last>Elbayad</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <author><first>Jakob</first><last>Verbeek</last></author>
       <pages>2094–2103</pages>
       <abstract>Despite the effectiveness of recurrent neural network language models, their maximum likelihood estimation suffers from two limitations. It treats all sentences that do not match the ground truth as equally poor, ignoring the structure of the output space. Second, it suffers from ’exposure bias’: during training tokens are predicted given ground-truth sequences, while at test time prediction is conditioned on generated output sequences. To overcome these limitations we build upon the recent reward augmented maximum likelihood approach that encourages the model to predict sentences that are close to the ground truth according to a given performance metric. We extend this approach to token-level loss smoothing, and propose improvements to the sequence-level smoothing approach. Our experiments on two different tasks, image captioning and machine translation, show that token-level and sequence-level loss smoothing are complementary, and significantly improve results.</abstract>
       <url hash="b5503f3a">P18-1195</url>
       <attachment type="note" hash="f6220faa">P18-1195.Notes.pdf</attachment>
-      <video href="P18-1195.mp4"/>
       <attachment type="presentation" hash="c004cb40">P18-1195.Presentation.pdf</attachment>
+      <video href="P18-1195.mp4"/>
       <doi>10.18653/v1/P18-1195</doi>
       <bibkey>elbayad-etal-2018-token</bibkey>
     </paper>
@@ -2699,16 +2699,16 @@
     <paper id="198">
       <title>What you can cram into a single $&amp;!#* vector: Probing sentence embeddings for linguistic properties</title>
       <author><first>Alexis</first><last>Conneau</last></author>
-      <author><first>German</first><last>Kruszewski</last></author>
+      <author id="german-kruszewski"><first>German</first><last>Kruszewski</last></author>
       <author><first>Guillaume</first><last>Lample</last></author>
       <author><first>Loïc</first><last>Barrault</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <pages>2126–2136</pages>
       <abstract>Although much effort has recently been devoted to training high-quality sentence embeddings, we still have a poor understanding of what they are capturing. “Downstream” tasks, often based on sentence classification, are commonly used to evaluate the quality of sentence representations. The complexity of the tasks makes it however difficult to infer what kind of information is present in the representations. We introduce here 10 probing tasks designed to capture simple linguistic features of sentences, and we use them to study embeddings generated by three different encoders trained in eight distinct ways, uncovering intriguing properties of both encoders and training methods.</abstract>
       <url hash="4eb8eb26">P18-1198</url>
       <attachment type="note" hash="002202d0">P18-1198.Notes.pdf</attachment>
-      <video href="P18-1198.mp4"/>
       <attachment type="presentation" hash="fc75646f">P18-1198.Presentation.pdf</attachment>
+      <video href="P18-1198.mp4"/>
       <doi>10.18653/v1/P18-1198</doi>
       <bibkey>conneau-etal-2018-cram</bibkey>
     </paper>
@@ -2745,7 +2745,7 @@
       <author><first>Kyunghyun</first><last>Cho</last></author>
       <author><first>Ido</first><last>Dagan</last></author>
       <author><first>Sebastian</first><last>Riedel</last></author>
-      <author><first>Clare</first><last>Voss</last></author>
+      <author id="clare-voss"><first>Clare</first><last>Voss</last></author>
       <pages>2160–2170</pages>
       <abstract>Most previous supervised event extraction methods have relied on features derived from manual annotations, and thus cannot be applied to new event types without extra annotation effort. We take a fresh look at event extraction and model it as a generic grounding problem: mapping each event mention to a specific type in a target event ontology. We design a transferable architecture of structural and compositional neural networks to jointly represent and map event mentions and types into a shared semantic space. Based on this new framework, we can select, for each event mention, the event type which is semantically closest in this space as its type. By leveraging manual annotations available for a small set of existing event types, our framework can be applied to new unseen event types without additional manual annotations. When tested on 23 unseen event types, our zero-shot framework, without manual annotations, achieved performance comparable to a supervised model trained from 3,000 sentences annotated with 500 event mentions.</abstract>
       <url hash="17ea6d7e">P18-1201</url>
@@ -2772,7 +2772,7 @@
       <author><first>Xiujun</first><last>Li</last></author>
       <author><first>Jianfeng</first><last>Gao</last></author>
       <author><first>Jingjing</first><last>Liu</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <pages>2182–2192</pages>
       <abstract>Training a task-completion dialogue agent via reinforcement learning (RL) is costly because it requires many interactions with real users. One common alternative is to use a user simulator. However, a user simulator usually lacks the language complexity of human interlocutors and the biases in its design may tend to degrade the agent. To address these issues, we present Deep Dyna-Q, which to our knowledge is the first deep RL framework that integrates planning for task-completion dialogue policy learning. We incorporate into the dialogue agent a model of the environment, referred to as the world model, to mimic real user response and generate simulated experience. During dialogue policy learning, the world model is constantly updated with real user experience to approach real user behavior, and in turn, the dialogue agent is optimized using both real experience and simulated experience. The effectiveness of our approach is demonstrated on a movie-ticket booking task in both simulated and human-in-the-loop settings.</abstract>
       <url hash="c7dfe8d0">P18-1203</url>
@@ -2816,7 +2816,7 @@
       <author><first>Young-Bum</first><last>Kim</last></author>
       <author><first>Dongchan</first><last>Kim</last></author>
       <author><first>Anjishnu</first><last>Kumar</last></author>
-      <author><first>Ruhi</first><last>Sarikaya</last></author>
+      <author id="ruhi-sarikaya"><first>Ruhi</first><last>Sarikaya</last></author>
       <pages>2214–2224</pages>
       <abstract>In this paper, we explore the task of mapping spoken language utterances to one of thousands of natural language understanding domains in intelligent personal digital assistants (IPDAs). This scenario is observed in mainstream IPDAs in industry that allow third parties to develop thousands of new domains to augment built-in first party domains to rapidly increase domain coverage and overall IPDA capabilities. We propose a scalable neural model architecture with a shared encoder, a novel attention mechanism that incorporates personalization information and domain-specific classifiers that solves the problem efficiently. Our architecture is designed to efficiently accommodate incremental domain additions achieving two orders of magnitude speed up compared to full model retraining. We consider the practical constraints of real-time production systems, and design to minimize memory footprint and runtime latency. We demonstrate that incorporating personalization significantly improves domain classification accuracy in a setting with thousands of overlapping domains.</abstract>
       <url hash="e76e0cce">P18-1206</url>
@@ -2875,13 +2875,13 @@
       <author><first>Hannah</first><last>Rohde</last></author>
       <author><first>Alexander</first><last>Johnson</last></author>
       <author><first>Nathan</first><last>Schneider</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <pages>2257–2267</pages>
       <abstract>Theories of discourse coherence posit relations between discourse segments as a key feature of coherent text. Our prior work suggests that multiple discourse relations can be simultaneously operative between two segments for reasons not predicted by the literature. Here we test how this joint presence can lead participants to endorse seemingly divergent conjunctions (e.g., BUT and SO) to express the link they see between two segments. These apparent divergences are not symptomatic of participant naivety or bias, but arise reliably from the concurrent availability of multiple relations between segments – some available through explicit signals and some via inference. We believe that these new results can both inform future progress in theoretical work on discourse coherence and lead to higher levels of performance in discourse parsing.</abstract>
       <url hash="5f7e866d">P18-1210</url>
       <attachment type="note" hash="2527de9b">P18-1210.Notes.pdf</attachment>
-      <video href="P18-1210.mp4"/>
       <attachment type="presentation" hash="08ee8bc7">P18-1210.Presentation.pdf</attachment>
+      <video href="P18-1210.mp4"/>
       <doi>10.18653/v1/P18-1210</doi>
       <bibkey>rohde-etal-2018-discourse</bibkey>
     </paper>
@@ -2932,7 +2932,7 @@
       <author><first>Chenliang</first><last>Li</last></author>
       <author><first>Wei</first><last>Zhou</last></author>
       <author><first>Feng</first><last>Ji</last></author>
-      <author><first>Yu</first><last>Duan</last></author>
+      <author id="yuguang-duan"><first>Yu</first><last>Duan</last></author>
       <author><first>Haiqing</first><last>Chen</last></author>
       <pages>2300–2310</pages>
       <abstract>In the era of big data, focused analysis for diverse topics with a short response time becomes an urgent demand. As a fundamental task, information filtering therefore becomes a critical necessity. In this paper, we propose a novel deep relevance model for zero-shot document filtering, named DAZER. DAZER estimates the relevance between a document and a category by taking a small set of seed words relevant to the category. With pre-trained word embeddings from a large external corpus, DAZER is devised to extract the relevance signals by modeling the hidden feature interactions in the word embedding space. The relevance signals are extracted through a gated convolutional process. The gate mechanism controls which convolution filters output the relevance signals in a category dependent manner. Experiments on two document collections of two different tasks (i.e., topic categorization and sentiment analysis) demonstrate that DAZER significantly outperforms the existing alternative solutions, including the state-of-the-art deep relevance ranking models.</abstract>
@@ -2974,7 +2974,7 @@
       <author><first>Qianqian</first><last>Xie</last></author>
       <author><first>Yanchun</first><last>Zhang</last></author>
       <author><first>Hua</first><last>Wang</last></author>
-      <author><first>Xiuzhen</first><last>Zhang</last></author>
+      <author id="xiuzhen-jenny-zhang"><first>Xiuzhen</first><last>Zhang</last></author>
       <author><first>Jimin</first><last>Huang</last></author>
       <author><first>Gang</first><last>Tian</last></author>
       <pages>2332–2340</pages>
@@ -3007,7 +3007,7 @@
       <author><first>Kevin</first><last>Patel</last></author>
       <author><first>Samarth</first><last>Agrawal</last></author>
       <author><first>Abhijit</first><last>Mishra</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>2352–2362</pages>
       <abstract>Predicting a reader’s rating of text quality is a challenging task that involves estimating different subjective aspects of the text, like structure, clarity, etc. Such subjective aspects are better handled using cognitive information. One such source of cognitive information is gaze behaviour. In this paper, we show that gaze behaviour does indeed help in effectively predicting the rating of text quality. To do this, we first we model text quality as a function of three properties - organization, coherence and cohesion. Then, we demonstrate how capturing gaze behaviour helps in predicting each of these properties, and hence the overall quality, by reporting improvements obtained by adding gaze features to traditional textual features for score prediction. We also hypothesize that if a reader has fully understood the text, the corresponding gaze behaviour would give a better indication of the assigned rating, as opposed to partial understanding. Our experiments validate this hypothesis by showing greater agreement between the given rating and the predicted rating when the reader has a full understanding of the text.</abstract>
       <url hash="85cb0755">P18-1219</url>
@@ -3019,7 +3019,7 @@
     <paper id="220">
       <title>Multi-Input Attention for Unsupervised <fixed-case>OCR</fixed-case> Correction</title>
       <author><first>Rui</first><last>Dong</last></author>
-      <author><first>David</first><last>Smith</last></author>
+      <author id="david-a-smith"><first>David</first><last>Smith</last></author>
       <pages>2363–2372</pages>
       <abstract>We propose a novel approach to OCR post-correction that exploits repeated texts in large corpora both as a source of noisy target outputs for unsupervised training and as a source of evidence when decoding. A sequence-to-sequence model with attention is applied for single-input correction, and a new decoder with multi-input attention averaging is developed to search for consensus among multiple sequences. We design two ways of training the correction model without human annotation, either training to match noisily observed textual variants or bootstrapping from a uniform error model. On two corpora of historical newspapers and books, we show that these unsupervised techniques cut the character and word error rates nearly in half on single inputs and, with the addition of multi-input decoding, can rival supervised methods.</abstract>
       <url hash="8b4bc11a">P18-1220</url>
@@ -3029,7 +3029,7 @@
     </paper>
     <paper id="221">
       <title>Building Language Models for Text with Named Entities</title>
-      <author><first>Md Rizwan</first><last>Parvez</last></author>
+      <author id="md-rizwan-parvez"><first>Md Rizwan</first><last>Parvez</last></author>
       <author><first>Saikat</first><last>Chakraborty</last></author>
       <author><first>Baishakhi</first><last>Ray</last></author>
       <author><first>Kai-Wei</first><last>Chang</last></author>
@@ -3044,7 +3044,7 @@
       <title>hyperdoc2vec: Distributed Representations of Hypertext Documents</title>
       <author><first>Jialong</first><last>Han</last></author>
       <author><first>Yan</first><last>Song</last></author>
-      <author><first>Wayne Xin</first><last>Zhao</last></author>
+      <author id="wayne-xin-zhao"><first>Wayne Xin</first><last>Zhao</last></author>
       <author><first>Shuming</first><last>Shi</last></author>
       <author><first>Haisong</first><last>Zhang</last></author>
       <pages>2384–2394</pages>
@@ -3072,7 +3072,7 @@
       <author><first>Qian</first><last>Chen</last></author>
       <author><first>Xiaodan</first><last>Zhu</last></author>
       <author><first>Zhen-Hua</first><last>Ling</last></author>
-      <author><first>Diana</first><last>Inkpen</last></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last></author>
       <author><first>Si</first><last>Wei</last></author>
       <pages>2406–2417</pages>
       <abstract>Modeling natural language inference is a very challenging task. With the availability of large annotated data, it has recently become feasible to train complex models such as neural-network-based inference models, which have shown to achieve the state-of-the-art performance. Although there exist relatively large annotated data, can machines learn all knowledge needed to perform natural language inference (NLI) from these data? If not, how can neural-network-based NLI models benefit from external knowledge and how to build NLI models to leverage it? In this paper, we enrich the state-of-the-art neural natural language inference models with external knowledge. We demonstrate that the proposed models improve neural NLI models to achieve the state-of-the-art performance on the SNLI and MultiNLI datasets.</abstract>
@@ -3086,7 +3086,7 @@
       <author><first>Dongyeop</first><last>Kang</last></author>
       <author><first>Tushar</first><last>Khot</last></author>
       <author><first>Ashish</first><last>Sabharwal</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>2418–2428</pages>
       <abstract>We consider the problem of learning textual entailment models with limited supervision (5K-10K training examples), and present two complementary approaches for it. First, we propose knowledge-guided adversarial example generators for incorporating large lexical resources in entailment models via only a handful of rule templates. Second, to make the entailment model—a discriminator—more robust, we propose the first GAN-style approach for training it using a natural language example generator that iteratively adjusts to the discriminator’s weaknesses. We demonstrate effectiveness using two entailment datasets, where the proposed methods increase accuracy by 4.7% on SciTail and by 2.8% on a 1% sub-sample of SNLI. Notably, even a single hand-written rule, negate, improves the accuracy of negation examples in SNLI by 6.1%.</abstract>
       <url hash="dbea3450">P18-1225</url>
@@ -3101,7 +3101,7 @@
       <author><first>Jeongmin</first><last>Byun</last></author>
       <author><first>Sion</first><last>Baek</last></author>
       <author><first>Yongseok</first><last>Cho</last></author>
-      <author><first>Alice</first><last>Oh</last></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last></author>
       <pages>2429–2438</pages>
       <abstract>Research on distributed word representations is focused on widely-used languages such as English. Although the same methods can be used for other languages, language-specific knowledge can enhance the accuracy and richness of word vector representations. In this paper, we look at improving distributed word representations for Korean using knowledge about the unique linguistic structure of Korean. Specifically, we decompose Korean words into the jamo-level, beyond the character-level, allowing a systematic use of subword information. To evaluate the vectors, we develop Korean test sets for word similarity and analogy and make them publicly available. The results show that our simple method outperforms word2vec and character-level Skip-Grams on semantic and syntactic similarity and analogy tasks and contributes positively toward downstream NLP tasks such as sentiment analysis.</abstract>
       <url hash="577ef475">P18-1226</url>
@@ -3156,7 +3156,7 @@
       <author><first>Fuli</first><last>Luo</last></author>
       <author><first>Tianyu</first><last>Liu</last></author>
       <author><first>Qiaolin</first><last>Xia</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <author><first>Zhifang</first><last>Sui</last></author>
       <pages>2473–2482</pages>
       <abstract>Word Sense Disambiguation (WSD) aims to identify the correct meaning of polysemous words in the particular context. Lexical resources like WordNet which are proved to be of great help for WSD in the knowledge-based methods. However, previous neural networks for WSD always rely on massive labeled data (context), ignoring lexical resources like glosses (sense definitions). In this paper, we integrate the context and glosses of the target word into a unified framework in order to make full use of both labeled data and lexical knowledge. Therefore, we propose GAS: a gloss-augmented WSD neural network which jointly encodes the context and glosses of the target word. GAS models the semantic relationship between the context and the gloss in an improved memory network framework, which breaks the barriers of the previous supervised methods and knowledge-based methods. We further extend the original gloss of word sense via its semantic relations in WordNet to enrich the gloss information. The experimental results show that our model outperforms the state-of-the-art systems on several English all-words WSD datasets.</abstract>
@@ -3169,7 +3169,7 @@
       <title>Bilingual Sentiment Embeddings: Joint Projection of Sentiment Across Languages</title>
       <author><first>Jeremy</first><last>Barnes</last></author>
       <author><first>Roman</first><last>Klinger</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>2483–2493</pages>
       <abstract>Sentiment analysis in low-resource languages suffers from a lack of annotated corpora to estimate high-performing models. Machine translation and bilingual word embeddings provide some relief through cross-lingual sentiment approaches. However, they either require large amounts of parallel data or do not sufficiently capture sentiment information. We introduce Bilingual Sentiment Embeddings (BLSE), which jointly represent sentiment information in a source and target language. This model only requires a small bilingual lexicon, a source-language corpus annotated for sentiment, and monolingual word embeddings for each language. We perform experiments on three language combinations (Spanish, Catalan, Basque) for sentence-level cross-lingual sentiment classification and find that our model significantly outperforms state-of-the-art methods on four out of six experimental setups, as well as capturing complementary information to machine translation. Our analysis of the resulting embedding space provides evidence that it represents sentiment information in the resource-poor target language without any annotated data in that language.</abstract>
       <url hash="d2fa428e">P18-1231</url>
@@ -3195,7 +3195,7 @@
       <author><first>Minlong</first><last>Peng</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
       <author><first>Yu-gang</first><last>Jiang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>2505–2513</pages>
       <abstract>The task of adopting a model with good performance to a target domain that is different from the source domain used for training has received considerable attention in sentiment analysis. Most existing approaches mainly focus on learning representations that are domain-invariant in both the source and target domains. Few of them pay attention to domain-specific information, which should also be informative. In this work, we propose a method to simultaneously extract domain specific and invariant representations and train a classifier on each of the representation, respectively. And we introduce a few target domain labeled data for learning domain-specific information. To effectively utilize the target domain labeled data, we train the domain invariant representation based classifier with both the source and target domain labeled data and train the domain-specific representation based classifier with only the target domain labeled data. These two classifiers then boost each other in a co-training style. Extensive sentiment analysis experiments demonstrated that the proposed method could achieve better performance than state-of-the-art methods.</abstract>
       <url hash="4df14a55">P18-1233</url>
@@ -3216,7 +3216,7 @@
     </paper>
     <paper id="235">
       <title>A Helping Hand: Transfer Learning for Deep Sentiment Analysis</title>
-      <author><first>Xin</first><last>Dong</last></author>
+      <author id="xin-luna-dong"><first>Xin</first><last>Dong</last></author>
       <author><first>Gerard</first><last>de Melo</last></author>
       <pages>2524–2534</pages>
       <abstract>Deep convolutional neural networks excel at sentiment polarity classification, but tend to require substantial amounts of training data, which moreover differs quite significantly between domains. In this work, we present an approach to feed generic cues into the training process of such networks, leading to better generalization abilities given limited training data. We propose to induce sentiment embeddings via supervision on extrinsic data, which are then fed into the model via a dedicated memory-based component. We observe significant gains in effectiveness on a range of different datasets in seven different languages.</abstract>
@@ -3226,7 +3226,7 @@
     </paper>
     <paper id="236">
       <title>Cold-Start Aware User and Product Attention for Sentiment Classification</title>
-      <author><first>Reinald Kim</first><last>Amplayo</last></author>
+      <author id="reinald-kim-amplayo"><first>Reinald Kim</first><last>Amplayo</last></author>
       <author><first>Jihyeok</first><last>Kim</last></author>
       <author><first>Sua</first><last>Sung</last></author>
       <author><first>Seung-won</first><last>Hwang</last></author>
@@ -3239,7 +3239,7 @@
     </paper>
     <paper id="237">
       <title>Modeling Deliberative Argumentation Strategies on <fixed-case>W</fixed-case>ikipedia</title>
-      <author><first>Khalid</first><last>Al-Khatib</last></author>
+      <author id="khalid-al-khatib"><first>Khalid</first><last>Al-Khatib</last></author>
       <author><first>Henning</first><last>Wachsmuth</last></author>
       <author><first>Kevin</first><last>Lang</last></author>
       <author><first>Jakob</first><last>Herpel</last></author>
@@ -3270,21 +3270,21 @@
       <author><first>Daphne</first><last>Ippolito</last></author>
       <author><first>Brendan</first><last>Callahan</last></author>
       <author><first>Reno</first><last>Kriz</last></author>
-      <author><first>Derry Tanti</first><last>Wijaya</last></author>
+      <author id="derry-tanti-wijaya"><first>Derry Tanti</first><last>Wijaya</last></author>
       <author><first>Chris</first><last>Callison-Burch</last></author>
       <pages>2566–2576</pages>
       <abstract>We conduct the most comprehensive study to date into translating words via images. To facilitate research on the task, we introduce a large-scale multilingual corpus of images, each labeled with the word it represents. Past datasets have been limited to only a few high-resource languages and unrealistically easy translation settings. In contrast, we have collected by far the largest available dataset for this task, with images for approximately 10,000 words in each of 100 languages. We run experiments on a dozen high resource languages and 20 low resources languages, demonstrating the effect of word concreteness and part-of-speech on translation quality. %We find that while image features work best for concrete nouns, they are sometimes effective on other parts of speech. To improve image-based translation, we introduce a novel method of predicting word concreteness from images, which improves on a previous state-of-the-art unsupervised technique. This allows us to predict when image-based translation may be effective, enabling consistent improvements to a state-of-the-art text-based word translation system. Our code and the Massively Multilingual Image Dataset (MMID) are available at <url>http://multilingual-images.org/</url>.</abstract>
       <url hash="17d09072">P18-1239</url>
       <attachment type="poster" hash="9355d1a3">P18-1239.Poster.pdf</attachment>
-      <doi>10.18653/v1/P18-1239</doi>
       <attachment type="notes" hash="c31a0581">P18-1239.Notes.pdf</attachment>
+      <doi>10.18653/v1/P18-1239</doi>
       <bibkey>hewitt-etal-2018-learning</bibkey>
     </paper>
     <paper id="240">
       <title>On the Automatic Generation of Medical Imaging Reports</title>
       <author><first>Baoyu</first><last>Jing</last></author>
       <author><first>Pengtao</first><last>Xie</last></author>
-      <author><first>Eric</first><last>Xing</last></author>
+      <author id="eric-xing"><first>Eric</first><last>Xing</last></author>
       <pages>2577–2586</pages>
       <abstract>Medical imaging is widely used in clinical practice for diagnosis and treatment. Report-writing can be error-prone for unexperienced physicians, and time-consuming and tedious for experienced physicians. To address these issues, we study the automatic generation of medical imaging reports. This task presents several challenges. First, a complete report contains multiple heterogeneous forms of information, including findings and tags. Second, abnormal regions in medical images are difficult to identify. Third, the reports are typically long, containing multiple sentences. To cope with these challenges, we (1) build a multi-task learning framework which jointly performs the prediction of tags and the generation of paragraphs, (2) propose a co-attention mechanism to localize regions containing abnormalities and generate narrations for them, (3) develop a hierarchical LSTM model to generate long paragraphs. We demonstrate the effectiveness of the proposed methods on two publicly available dataset.</abstract>
       <url hash="58199865">P18-1240</url>
@@ -3310,7 +3310,7 @@
     <paper id="242">
       <title>Think Visually: Question Answering through Virtual Imagery</title>
       <author><first>Ankit</first><last>Goyal</last></author>
-      <author id="jian-wang"><first>Jian</first><last>Wang</last></author>
+      <author><first>Jian</first><last>Wang</last></author>
       <author><first>Jia</first><last>Deng</last></author>
       <pages>2598–2608</pages>
       <abstract>In this paper, we study the problem of geometric reasoning (a form of visual reasoning) in the context of question-answering. We introduce Dynamic Spatial Memory Network (DSMN), a new deep network architecture that specializes in answering questions that admit latent visual representations, and learns to generate and reason over such representations. Further, we propose two synthetic benchmarks, FloorPlanQA and ShapeIntersection, to evaluate the geometric reasoning capability of QA systems. Experimental results validate the effectiveness of our proposed DSMN for visual thinking tasks.</abstract>
@@ -3338,7 +3338,7 @@
       <author><first>Takaaki</first><last>Hori</last></author>
       <author><first>Shinji</first><last>Watanabe</last></author>
       <author><first>Jonathan</first><last>Le Roux</last></author>
-      <author><first>John R.</first><last>Hershey</last></author>
+      <author id="john-r-hershey"><first>John R.</first><last>Hershey</last></author>
       <pages>2620–2630</pages>
       <abstract>Recently, there has been growing interest in multi-speaker speech recognition, where the utterances of multiple speakers are recognized from their mixture. Promising techniques have been proposed for this task, but earlier works have required additional training data such as isolated source signals or senone alignments for effective learning. In this paper, we propose a new sequence-to-sequence framework to directly decode multiple label sequences from a single speech sequence by unifying source separation and speech recognition functions in an end-to-end manner. We further propose a new objective function to improve the contrast between the hidden vectors to avoid generating similar hypotheses. Experimental results show that the model is directly able to learn a mapping from a speech mixture to multiple label sequences, achieving 83.1% relative improvement compared to a model trained without the proposed objective. Interestingly, the results are comparable to those produced by previous end-to-end works featuring explicit separation and recognition modules.</abstract>
       <url hash="90986db7">P18-1244</url>
@@ -3351,7 +3351,7 @@
       <title>A Structured Variational Autoencoder for Contextual Morphological Inflection</title>
       <author><first>Lawrence</first><last>Wolf-Sonkin</last></author>
       <author><first>Jason</first><last>Naradowsky</last></author>
-      <author><first>Sabrina J.</first><last>Mielke</last></author>
+      <author id="sabrina-j-mielke"><first>Sabrina J.</first><last>Mielke</last></author>
       <author><first>Ryan</first><last>Cotterell</last></author>
       <pages>2631–2641</pages>
       <abstract>Statistical morphological inflectors are typically trained on fully supervised, type-level data. One remaining open research question is the following: How can we effectively exploit raw, token-level data to improve their performance? To this end, we introduce a novel generative latent-variable model for the semi-supervised learning of inflection generation. To enable posterior inference over the latent variables, we derive an efficient variational inference procedure based on the wake-sleep algorithm. We experiment on 23 languages, using the Universal Dependencies corpora in a simulated low-resource setting, and find improvements of over 10% absolute accuracy in some cases.</abstract>
@@ -3381,7 +3381,7 @@
     <paper id="247">
       <title>Neural Factor Graph Models for Cross-lingual Morphological Tagging</title>
       <author><first>Chaitanya</first><last>Malaviya</last></author>
-      <author><first>Matthew R.</first><last>Gormley</last></author>
+      <author id="matthew-r-gormley"><first>Matthew R.</first><last>Gormley</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
       <pages>2653–2663</pages>
       <abstract>Morphological analysis involves predicting the syntactic traits of a word (e.g. POS: Noun, Case: Acc, Gender: Fem). Previous work in morphological tagging improves performance for low-resource languages (LRLs) through cross-lingual training with a high-resource language (HRL) from the same family, but is limited by the strict, often false, assumption that tag sets exactly overlap between the HRL and LRL. In this paper we propose a method for cross-lingual morphological tagging that aims to improve information sharing between languages by relaxing this assumption. The proposed model uses factorial conditional random fields with neural network potentials, making it possible to (1) utilize the expressive power of neural network representations to smooth over superficial differences in the surface forms, (2) model pairwise and transitive relationships between tags, and (3) accurately generate tag sets that are unseen or rare in the training data. Experiments on four languages from the Universal Dependencies Treebank demonstrate superior tagging accuracies over existing cross-lingual approaches.</abstract>
@@ -3418,7 +3418,7 @@
       <title>Pre- and In-Parsing Models for Neural Empty Category Detection</title>
       <author><first>Yufei</first><last>Chen</last></author>
       <author><first>Yuanyuan</first><last>Zhao</last></author>
-      <author><first>Weiwei</first><last>Sun</last></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last></author>
       <author><first>Xiaojun</first><last>Wan</last></author>
       <pages>2687–2696</pages>
       <abstract>Motivated by the positive impact of empty category on syntactic parsing, we study neural models for pre- and in-parsing detection of empty category, which has not previously been investigated. We find several non-obvious facts: (a) BiLSTM can capture non-local contextual information which is essential for detecting empty categories, (b) even with a BiLSTM, syntactic information is still able to enhance the detection, and (c) automatic detection of empty categories improves parsing quality for overt words. Our neural ECD models outperform the prior state-of-the-art by significant margins.</abstract>
@@ -3472,8 +3472,8 @@
     </paper>
     <paper id="254">
       <title>Finding syntax in human encephalography with beam search</title>
-      <author><first>John</first><last>Hale</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="john-hale"><first>John</first><last>Hale</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <author><first>Adhiguna</first><last>Kuncoro</last></author>
       <author><first>Jonathan</first><last>Brennan</last></author>
       <pages>2727–2736</pages>
@@ -3486,13 +3486,13 @@
     <paper id="255">
       <title>Learning to Ask Good Questions: Ranking Clarification Questions using Neural Expected Value of Perfect Information</title>
       <author><first>Sudha</first><last>Rao</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <pages>2737–2746</pages>
       <abstract>Inquiry is fundamental to communication, and machines cannot effectively collaborate with humans unless they can ask questions. In this work, we build a neural network model for the task of ranking clarification questions. Our model is inspired by the idea of expected value of perfect information: a good question is one whose expected answer will be useful. We study this problem using data from StackExchange, a plentiful online resource in which people routinely ask clarifying questions to posts so that they can better offer assistance to the original poster. We create a dataset of clarification questions consisting of 77K posts paired with a clarification question (and answer) from three domains of StackExchange: askubuntu, unix and superuser. We evaluate our model on 500 samples of this dataset against expert human judgments and demonstrate significant improvements over controlled baselines.</abstract>
       <url hash="13f619ea">P18-1255</url>
       <attachment type="note" hash="a070bfc3">P18-1255.Notes.pdf</attachment>
-      <video href="P18-1255.mp4"/>
       <attachment type="presentation" hash="0ecddd68">P18-1255.Presentation.pdf</attachment>
+      <video href="P18-1255.mp4"/>
       <doi>10.18653/v1/P18-1255</doi>
       <bibkey>rao-daume-iii-2018-learning</bibkey>
     </paper>
@@ -3501,7 +3501,7 @@
       <author><first>Andre</first><last>Cianflone</last></author>
       <author><first>Yulan</first><last>Feng</last></author>
       <author><first>Jad</first><last>Kabbara</last></author>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <pages>2747–2755</pages>
       <abstract>We introduce the novel task of predicting adverbial presupposition triggers, which is useful for natural language generation tasks such as summarization and dialogue systems. We introduce two new corpora, derived from the Penn Treebank and the Annotated English Gigaword dataset and investigate the use of a novel attention mechanism tailored to this task. Our attention mechanism augments a baseline recurrent neural network without the need for additional trainable parameters, minimizing the added computational cost of our mechanism. We demonstrate that this model statistically outperforms our baselines.</abstract>
       <url hash="b016d196">P18-1256</url>
@@ -3553,7 +3553,7 @@
       <title>Deep <fixed-case>RNN</fixed-case>s Encode Soft Hierarchical Syntax</title>
       <author><first>Terra</first><last>Blevins</last></author>
       <author><first>Omer</first><last>Levy</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>14–19</pages>
       <abstract>We present a set of experiments to demonstrate that deep recurrent neural networks (RNNs) learn internal representations that capture soft hierarchical notions of syntax from highly varied supervision. We consider four syntax tasks at different depths of the parse tree; for each word, we predict its part of speech as well as the first (parent), second (grandparent) and third level (great-grandparent) constituent labels that appear above it. These predictions are made from representations produced at different depths in networks that are pretrained with one of four objectives: dependency parsing, semantic role labeling, machine translation, or language modeling. In every case, we find a correspondence between network depth and syntactic depth, suggesting that a soft syntactic hierarchy emerges. This effect is robust across all conditions, indicating that the models encode significant amounts of syntax even in the absence of an explicit syntactic training supervision.</abstract>
       <url hash="0f479934">P18-2003</url>
@@ -3575,8 +3575,8 @@
     <paper id="5">
       <title>Towards Robust and Privacy-preserving Text Representations</title>
       <author><first>Yitong</first><last>Li</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>25–30</pages>
       <abstract>Written text often provides sufficient clues to identify the author, their gender, age, and other important attributes. Consequently, the authorship of training and evaluation corpora can have unforeseen impacts, including differing model performance for different user groups, as well as privacy implications. In this paper, we propose an approach to explicitly obscure important author characteristics at training time, such that representations learned are invariant to these attributes. Evaluating on two tasks, we show that this leads to increased privacy in the learned representations, as well as more robust models to varying evaluation conditions, including out-of-domain corpora.</abstract>
       <url hash="a351092b">P18-2005</url>
@@ -3599,7 +3599,7 @@
     </paper>
     <paper id="7">
       <title>Domain Adapted Word Embeddings for Improved Sentiment Classification</title>
-      <author><first>Prathusha</first><last>K Sarma</last></author>
+      <author id="prathusha-kameswara-sarma"><first>Prathusha</first><last>K Sarma</last></author>
       <author><first>Yingyu</first><last>Liang</last></author>
       <author><first>Bill</first><last>Sethares</last></author>
       <pages>37–42</pages>
@@ -3612,11 +3612,11 @@
     </paper>
     <paper id="8">
       <title>Active learning for deep semantic parsing</title>
-      <author><first>Long</first><last>Duong</last></author>
+      <author id="long-duong"><first>Long</first><last>Duong</last></author>
       <author><first>Hadi</first><last>Afshar</last></author>
-      <author><first>Dominique</first><last>Estival</last></author>
+      <author id="dominique-estival"><first>Dominique</first><last>Estival</last></author>
       <author><first>Glen</first><last>Pink</last></author>
-      <author><first>Philip</first><last>Cohen</last></author>
+      <author id="philip-r-cohen"><first>Philip</first><last>Cohen</last></author>
       <author><first>Mark</first><last>Johnson</last></author>
       <pages>43–48</pages>
       <abstract>Semantic parsing requires training data that is expensive and slow to collect. We apply active learning to both traditional and “overnight” data collection approaches. We show that it is possible to obtain good training hyperparameters from seed data which is only a small fraction of the full dataset. We show that uncertainty sampling based on least confidence score is competitive in traditional data collection but not applicable for overnight collection. We propose several active learning strategies for overnight data collection and show that different example selection strategies per domain perform best.</abstract>
@@ -3628,7 +3628,7 @@
     </paper>
     <paper id="9">
       <title>Learning Thematic Similarity Metric from Article Sections Using Triplet Networks</title>
-      <author><first>Liat</first><last>Ein Dor</last></author>
+      <author id="liat-ein-dor"><first>Liat</first><last>Ein Dor</last></author>
       <author><first>Yosi</first><last>Mass</last></author>
       <author><first>Alon</first><last>Halfon</last></author>
       <author><first>Elad</first><last>Venezian</last></author>
@@ -3647,8 +3647,8 @@
       <author><first>Dmitry</first><last>Ustalov</last></author>
       <author><first>Alexander</first><last>Panchenko</last></author>
       <author><first>Andrey</first><last>Kutuzov</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <pages>55–62</pages>
       <abstract>We use dependency triples automatically extracted from a Web-scale corpus to perform unsupervised semantic frame induction. We cast the frame induction problem as a triclustering problem that is a generalization of clustering for triadic data. Our replicable benchmarks demonstrate that the proposed graph-based approach, Triframes, shows state-of-the art results on this task on a FrameNet-derived dataset and performing on par with competitive methods on a verb class clustering task.</abstract>
       <url hash="b8b75ecf">P18-2010</url>
@@ -3662,9 +3662,9 @@
       <author><first>Sangameshwar</first><last>Patil</last></author>
       <author><first>Sachin</first><last>Pawar</last></author>
       <author><first>Swapnil</first><last>Hingmire</last></author>
-      <author><first>Girish</first><last>Palshikar</last></author>
+      <author id="girish-palshikar"><first>Girish</first><last>Palshikar</last></author>
       <author><first>Vasudeva</first><last>Varma</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>63–68</pages>
       <abstract>Identification of distinct and independent participants (entities of interest) in a narrative is an important task for many NLP applications. This task becomes challenging because these participants are often referred to using multiple aliases. In this paper, we propose an approach based on linguistic knowledge for identification of aliases mentioned using proper nouns, pronouns or noun phrases with common noun headword. We use Markov Logic Network (MLN) to encode the linguistic knowledge for identification of aliases. We evaluate on four diverse history narratives of varying complexity. Our approach performs better than the state-of-the-art approach as well as a combination of standard named entity recognition and coreference resolution techniques.</abstract>
       <url hash="60f00ff0">P18-2011</url>
@@ -3714,7 +3714,7 @@
       <author><first>Van-Thuy</first><last>Phi</last></author>
       <author><first>Joan</first><last>Santoso</last></author>
       <author><first>Masashi</first><last>Shimbo</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>89–95</pages>
       <abstract>This paper addresses the tasks of automatic seed selection for bootstrapping relation extraction, and noise reduction for distantly supervised relation extraction. We first point out that these tasks are related. Then, inspired by ranking relation instances and patterns computed by the HITS algorithm, and selecting cluster centroids using the K-means, LSA, or NMF method, we propose methods for selecting the initial seeds from an existing resource, or reducing the level of noise in the distantly labeled data. Experiments show that our proposed methods achieve a better performance than the baseline systems in both tasks.</abstract>
       <url hash="5eddd072">P18-2015</url>
@@ -3723,9 +3723,9 @@
     </paper>
     <paper id="16">
       <title>Automatic Extraction of Commonsense <fixed-case>L</fixed-case>ocated<fixed-case>N</fixed-case>ear Knowledge</title>
-      <author><first>Frank F.</first><last>Xu</last></author>
-      <author><first>Bill Yuchen</first><last>Lin</last></author>
-      <author><first>Kenny</first><last>Zhu</last></author>
+      <author id="frank-f-xu"><first>Frank F.</first><last>Xu</last></author>
+      <author id="bill-yuchen-lin"><first>Bill Yuchen</first><last>Lin</last></author>
+      <author id="kenny-zhu"><first>Kenny</first><last>Zhu</last></author>
       <pages>96–101</pages>
       <abstract>LocatedNear relation is a kind of commonsense knowledge describing two physical objects that are typically found near each other in real life. In this paper, we study how to automatically extract such relationship through a sentence-level relation classifier and aggregating the scores of entity pairs from a large corpus. Also, we release two benchmark datasets for evaluation and future research.</abstract>
       <url hash="0f947f58">P18-2016</url>
@@ -3736,10 +3736,10 @@
     <paper id="17">
       <title>Neural Coreference Resolution with Deep Biaffine Attention by Joint Mention Detection and Mention Clustering</title>
       <author><first>Rui</first><last>Zhang</last></author>
-      <author><first>Cícero</first><last>Nogueira dos Santos</last></author>
+      <author id="cicero-dos-santos"><first>Cícero</first><last>Nogueira dos Santos</last></author>
       <author><first>Michihiro</first><last>Yasunaga</last></author>
       <author><first>Bing</first><last>Xiang</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <pages>102–107</pages>
       <abstract>Coreference resolution aims to identify in a text all mentions that refer to the same real world entity. The state-of-the-art end-to-end neural coreference model considers all text spans in a document as potential mentions and learns to link an antecedent for each possible mention. In this paper, we propose to improve the end-to-end coreference resolution system by (1) using a biaffine attention model to get antecedent scores for each possible mention, and (2) jointly optimizing the mention detection accuracy and mention clustering accuracy given the mention cluster labels. Our model achieves the state-of-the-art performance on the CoNLL-2012 shared task English test set.</abstract>
       <url hash="4e2a7048">P18-2017</url>
@@ -3761,7 +3761,7 @@
       <title>Some of Them Can be Guessed! Exploring the Effect of Linguistic Context in Predicting Quantifiers</title>
       <author><first>Sandro</first><last>Pezzelle</last></author>
       <author><first>Shane</first><last>Steinert-Threlkeld</last></author>
-      <author><first>Raffaella</first><last>Bernardi</last></author>
+      <author id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last></author>
       <author><first>Jakub</first><last>Szymanik</last></author>
       <pages>114–119</pages>
       <abstract>We study the role of linguistic context in predicting quantifiers (‘few’, ‘all’). We collect crowdsourced data from human participants and test various models in a local (single-sentence) and a global context (multi-sentence) condition. Models significantly out-perform humans in the former setting and are only slightly better in the latter. While human performance improves with more linguistic context (especially on proportional quantifiers), model performance suffers. Models are very effective in exploiting lexical and morpho-syntactic patterns; humans are better at genuinely understanding the meaning of the (global) context.</abstract>
@@ -3773,7 +3773,7 @@
     <paper id="20">
       <title>A Named Entity Recognition Shootout for <fixed-case>G</fixed-case>erman</title>
       <author><first>Martin</first><last>Riedl</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <pages>120–125</pages>
       <abstract>We ask how to practically build a model for German named entity recognition (NER) that performs at the state of the art for both contemporary and historical texts, i.e., a big-data and a small-data scenario. The two best-performing model families are pitted against each other (linear-chain CRFs and BiLSTM) to observe the trade-off between expressiveness and data requirements. BiLSTM outperforms the CRF when large datasets are available and performs inferior for the smallest dataset. BiLSTMs profit substantially from transfer learning, which enables them to be trained on multiple corpora, resulting in a new state-of-the-art model for German NER on two contemporary German corpora (CoNLL 2003 and GermEval 2014) and two historic corpora.</abstract>
       <url hash="52c050cf">P18-2020</url>
@@ -3783,11 +3783,11 @@
     </paper>
     <paper id="21">
       <title>A dataset for identifying actionable feedback in collaborative software development</title>
-      <author><first>Benjamin S.</first><last>Meyers</last></author>
+      <author id="benjamin-s-meyers"><first>Benjamin S.</first><last>Meyers</last></author>
       <author><first>Nuthan</first><last>Munaiah</last></author>
-      <author><first>Emily</first><last>Prud’hommeaux</last></author>
+      <author id="emily-prudhommeaux"><first>Emily</first><last>Prud’hommeaux</last></author>
       <author><first>Andrew</first><last>Meneely</last></author>
-      <author><first>Cecilia</first><last>Ovesdotter Alm</last></author>
+      <author id="cecilia-ovesdotter-alm"><first>Cecilia</first><last>Ovesdotter Alm</last></author>
       <author><first>Josephine</first><last>Wolff</last></author>
       <author><first>Pradeep K.</first><last>Murukannaiah</last></author>
       <pages>126–131</pages>
@@ -3799,9 +3799,9 @@
     <paper id="22">
       <title><fixed-case>SNAG</fixed-case>: Spoken Narratives and Gaze Dataset</title>
       <author><first>Preethi</first><last>Vaidyanathan</last></author>
-      <author><first>Emily T.</first><last>Prud’hommeaux</last></author>
+      <author id="emily-prudhommeaux"><first>Emily T.</first><last>Prud’hommeaux</last></author>
       <author><first>Jeff B.</first><last>Pelz</last></author>
-      <author><first>Cecilia O.</first><last>Alm</last></author>
+      <author id="cecilia-ovesdotter-alm"><first>Cecilia O.</first><last>Alm</last></author>
       <pages>132–137</pages>
       <abstract>Humans rely on multiple sensory modalities when examining and reasoning over images. In this paper, we describe a new multimodal dataset that consists of gaze measurements and spoken descriptions collected in parallel during an image inspection task. The task was performed by multiple participants on 100 general-domain images showing everyday objects and activities. We demonstrate the usefulness of the dataset by applying an existing visual-linguistic data fusion framework in order to label important image regions with appropriate linguistic labels.</abstract>
       <url hash="58dad69d">P18-2022</url>
@@ -3886,7 +3886,7 @@
       <title>A Language Model based Evaluator for Sentence Compression</title>
       <author><first>Yang</first><last>Zhao</last></author>
       <author><first>Zhiyuan</first><last>Luo</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>170–175</pages>
       <abstract>We herein present a language-model-based evaluator for deletion-based sentence compression and view this task as a series of deletion-and-evaluation operations using the evaluator. More specifically, the evaluator is a syntactic neural language model that is first built by learning the syntactic and structural collocation among words. Subsequently, a series of trial-and-error deletion operations are conducted on the source sentences via a reinforcement learning framework to obtain the best target compression. An empirical study shows that the proposed model can effectively generate more readable compression, comparable or superior to several strong baselines. Furthermore, we introduce a 200-sentence test set for a large-scale dataset, setting a new baseline for the future research.</abstract>
       <url hash="ffcf2bc9">P18-2028</url>
@@ -3908,9 +3908,9 @@
     </paper>
     <paper id="30">
       <title>Content-based Popularity Prediction of Online Petitions Using a Deep Regression Model</title>
-      <author><first>Shivashankar</first><last>Subramanian</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="shivashankar-subramanian"><first>Shivashankar</first><last>Subramanian</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>182–188</pages>
       <abstract>Online petitions are a cost-effective way for citizens to collectively engage with policy-makers in a democracy. Predicting the popularity of a petition — commonly measured by its signature count — based on its textual content has utility for policymakers as well as those posting the petition. In this work, we model this task using CNN regression with an auxiliary ordinal regression objective. We demonstrate the effectiveness of our proposed approach using UK and US government petition datasets.</abstract>
       <url hash="de002702">P18-2030</url>
@@ -3919,7 +3919,7 @@
     </paper>
     <paper id="31">
       <title>Fighting Offensive Language on Social Media with Unsupervised Text Style Transfer</title>
-      <author><first>Cicero</first><last>Nogueira dos Santos</last></author>
+      <author id="cicero-dos-santos"><first>Cicero</first><last>Nogueira dos Santos</last></author>
       <author><first>Igor</first><last>Melnyk</last></author>
       <author><first>Inkit</first><last>Padhi</last></author>
       <pages>189–194</pages>
@@ -3932,7 +3932,7 @@
       <title>Diachronic degradation of language models: Insights from social media</title>
       <author><first>Kokil</first><last>Jaidka</last></author>
       <author><first>Niyati</first><last>Chhaya</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <pages>195–200</pages>
       <abstract>Natural languages change over time because they evolve to the needs of their users and the socio-technological environment. This study investigates the diachronic accuracy of pre-trained language models for downstream tasks in machine learning and user profiling. It asks the question: given that the social media platform and its users remain the same, how is language changing over time? How can these differences be used to track the changes in the affect around a particular topic? To our knowledge, this is the first study to show that it is possible to measure diachronic semantic drifts within social media and within the span of a few years.</abstract>
       <url hash="2e046485">P18-2032</url>
@@ -3947,9 +3947,9 @@
       <author><first>Baolin</first><last>Peng</last></author>
       <author><first>Huaixiao</first><last>Tou</last></author>
       <author><first>Ting</first><last>Chen</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
-      <author><first>Kam-fai</first><last>Wong</last></author>
-      <author><first>Xiangying</first><last>Dai</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
+      <author id="kam-fai-wong"><first>Kam-fai</first><last>Wong</last></author>
+      <author id="xiang-dai"><first>Xiangying</first><last>Dai</last></author>
       <pages>201–207</pages>
       <abstract>In this paper, we make a move to build a dialogue system for automatic diagnosis. We first build a dataset collected from an online medical forum by extracting symptoms from both patients’ self-reports and conversational data between patients and doctors. Then we propose a task-oriented dialogue system framework to make diagnosis for patients automatically, which can converse with patients to collect additional symptoms beyond their self-reports. Experimental results on our dataset show that additional symptoms extracted from conversation can greatly improve the accuracy for disease identification and our dialogue system is able to collect these symptoms automatically and make a better diagnosis.</abstract>
       <url hash="c46781de">P18-2033</url>
@@ -3965,7 +3965,7 @@
       <author><first>Wei</first><last>Zhou</last></author>
       <author><first>Jun</first><last>Huang</last></author>
       <author><first>Haiqing</first><last>Chen</last></author>
-      <author><first>Bruce</first><last>Croft</last></author>
+      <author id="w-bruce-croft"><first>Bruce</first><last>Croft</last></author>
       <author><first>Wei</first><last>Lin</last></author>
       <pages>208–213</pages>
       <abstract>Building multi-turn information-seeking conversation systems is an important and challenging research topic. Although several advanced neural text matching models have been proposed for this task, they are generally not efficient for industrial applications. Furthermore, they rely on a large amount of labeled data, which may not be available in real-world applications. To alleviate these problems, we study transfer learning for multi-turn information seeking conversations in this paper. We first propose an efficient and effective multi-turn conversation model based on convolutional neural networks. After that, we extend our model to adapt the knowledge learned from a resource-rich domain to enhance the performance. Finally, we deployed our model in an industrial chatbot called AliMe Assist and observed a significant improvement over the existing online model.</abstract>
@@ -3976,8 +3976,8 @@
     <paper id="35">
       <title>A Multi-task Approach to Learning Multilingual Representations</title>
       <author><first>Karan</first><last>Singla</last></author>
-      <author><first>Dogan</first><last>Can</last></author>
-      <author><first>Shrikanth</first><last>Narayanan</last></author>
+      <author id="dogan-can"><first>Dogan</first><last>Can</last></author>
+      <author id="shrikanth-narayanan"><first>Shrikanth</first><last>Narayanan</last></author>
       <pages>214–220</pages>
       <abstract>We present a novel multi-task modeling approach to learning multilingual distributed representations of text. Our system learns word and sentence embeddings jointly by training a multilingual skip-gram model together with a cross-lingual sentence similarity model. Our architecture can transparently use both monolingual and sentence aligned bilingual corpora to learn multilingual embeddings, thus covering a vocabulary significantly larger than the vocabulary of the bilingual corpora alone. Our model shows competitive performance in a standard cross-lingual document classification task. We also show the effectiveness of our method in a limited resource scenario.</abstract>
       <url hash="c6ed25ef">P18-2035</url>
@@ -3987,7 +3987,7 @@
     </paper>
     <paper id="36">
       <title>Characterizing Departures from Linearity in Word Translation</title>
-      <author><first>Ndapa</first><last>Nakashole</last></author>
+      <author id="ndapandula-nakashole"><first>Ndapa</first><last>Nakashole</last></author>
       <author><first>Raphael</first><last>Flauger</last></author>
       <pages>221–227</pages>
       <abstract>We investigate the behavior of maps learned by machine translation methods. The maps translate words by projecting between word embedding spaces of different languages. We locally approximate these maps using linear maps, and find that they vary across the word embedding space. This demonstrates that the underlying maps are non-linear. Importantly, we show that the locally linear maps vary by an amount that is tightly correlated with the distance between the neighborhoods on which they are trained. Our results can be used to test non-linear methods, and to drive the design of more accurate maps for word translation.</abstract>
@@ -4083,7 +4083,7 @@
     <paper id="44">
       <title><fixed-case>CNN</fixed-case> for Text-Based Multiple Choice Question Answering</title>
       <author><first>Akshay</first><last>Chaturvedi</last></author>
-      <author><first>Onkar</first><last>Pandit</last></author>
+      <author id="onkar-arun-pandit"><first>Onkar</first><last>Pandit</last></author>
       <author><first>Utpal</first><last>Garain</last></author>
       <pages>272–277</pages>
       <abstract>The task of Question Answering is at the very core of machine comprehension. In this paper, we propose a Convolutional Neural Network (CNN) model for text-based multiple choice question answering where questions are based on a particular article. Given an article and a multiple choice question, our model assigns a score to each question-option tuple and chooses the final option accordingly. We test our model on Textbook Question Answering (TQA) and SciQ dataset. Our model outperforms several LSTM-based baseline models on the two datasets.</abstract>
@@ -4096,8 +4096,8 @@
     <paper id="45">
       <title>Narrative Modeling with Memory Chains and Semantic Supervision</title>
       <author id="fei-liu-unimelb"><first>Fei</first><last>Liu</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>278–284</pages>
       <abstract>Story comprehension requires a deep semantic understanding of the narrative, making it a challenging task. Inspired by previous studies on ROC Story Cloze Test, we propose a novel method, tracking various semantic aspects with external neural memory chains while encouraging each to focus on a particular semantic aspect. Evaluated on the task of story ending prediction, our model demonstrates superior performance to a collection of competitive baselines, setting a new state of the art.</abstract>
       <url hash="f70ad355">P18-2045</url>
@@ -4136,7 +4136,7 @@
       <title>Dynamic Sentence Sampling for Efficient Training of Neural Machine Translation</title>
       <author><first>Rui</first><last>Wang</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>298–304</pages>
       <abstract>Traditional Neural machine translation (NMT) involves a fixed training procedure where each sentence is sampled once during each epoch. In reality, some sentences are well-learned during the initial few epochs; however, using this approach, the well-learned sentences would continue to be trained along with those sentences that were not well learned for 10-30 epochs, which results in a wastage of time. Here, we propose an efficient method to dynamically sample the sentences in order to accelerate the NMT training. In this approach, a weight is assigned to each sentence based on the measured difference between the training costs of two iterations. Further, in each epoch, a certain percentage of sentences are dynamically sampled according to their weights. Empirical results based on the NIST Chinese-to-English and the WMT English-to-German tasks show that the proposed method can significantly accelerate the NMT training and improve the NMT performance.</abstract>
       <url hash="d75801fe">P18-2048</url>
@@ -4170,7 +4170,7 @@
       <title>Multi-representation ensembles and delayed <fixed-case>SGD</fixed-case> updates improve syntax-based <fixed-case>NMT</fixed-case></title>
       <author><first>Danielle</first><last>Saunders</last></author>
       <author><first>Felix</first><last>Stahlberg</last></author>
-      <author><first>Adrià</first><last>de Gispert</last></author>
+      <author id="adria-de-gispert"><first>Adrià</first><last>de Gispert</last></author>
       <author id="bill-byrne"><first>Bill</first><last>Byrne</last></author>
       <pages>319–325</pages>
       <abstract>We explore strategies for incorporating target syntax into Neural Machine Translation. We specifically focus on syntax in ensembles containing multiple sentence representations. We formulate beam search over such ensembles using WFSTs, and describe a delayed SGD update training procedure that is especially effective for long representations like linearized syntax. Our approach gives state-of-the-art performance on a difficult Japanese-English task.</abstract>
@@ -4255,13 +4255,13 @@
       <author><first>Luheng</first><last>He</last></author>
       <author><first>Kenton</first><last>Lee</last></author>
       <author><first>Omer</first><last>Levy</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>364–369</pages>
       <abstract>Recent BIO-tagging-based neural semantic role labeling models are very high performing, but assume gold predicates as part of the input and cannot incorporate span-level features. We propose an end-to-end approach for jointly predicting all predicates, arguments spans, and the relations between them. The model makes independent decisions about what relationship, if any, holds between every possible word-span pair, and learns contextualized span representations that provide rich, shared input features for each decision. Experiments demonstrate that this approach sets a new state of the art on PropBank SRL without gold predicates.</abstract>
       <url hash="08d25279">P18-2058</url>
       <attachment type="note" hash="8ae5f78e">P18-2058.Notes.pdf</attachment>
-      <video href="P18-2058.mp4"/>
       <attachment type="presentation" hash="28c92dc1">P18-2058.Presentation.pdf</attachment>
+      <video href="P18-2058.mp4"/>
       <doi>10.18653/v1/P18-2058</doi>
       <bibkey>he-etal-2018-jointly</bibkey>
     </paper>
@@ -4269,13 +4269,13 @@
       <title>Sparse and Constrained Attention for Neural Machine Translation</title>
       <author><first>Chaitanya</first><last>Malaviya</last></author>
       <author><first>Pedro</first><last>Ferreira</last></author>
-      <author><first>André F. T.</first><last>Martins</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
       <pages>370–376</pages>
       <abstract>In neural machine translation, words are sometimes dropped from the source or generated repeatedly in the translation. We explore novel strategies to address the coverage problem that change only the attention transformation. Our approach allocates fertilities to source words, used to bound the attention each word can receive. We experiment with various sparse and constrained attention transformations and propose a new one, constrained sparsemax, shown to be differentiable and sparse. Empirical evaluation is provided in three languages pairs.</abstract>
       <url hash="f5145462">P18-2059</url>
       <attachment type="note" hash="2896928d">P18-2059.Notes.pdf</attachment>
-      <video href="P18-2059.mp4"/>
       <attachment type="presentation" hash="268cc0cb">P18-2059.Presentation.pdf</attachment>
+      <video href="P18-2059.mp4"/>
       <doi>10.18653/v1/P18-2059</doi>
       <bibkey>malaviya-etal-2018-sparse</bibkey>
     </paper>
@@ -4285,7 +4285,7 @@
       <author><first>Derui</first><last>Zhu</last></author>
       <author><first>Tamer</first><last>Alkhouli</last></author>
       <author><first>Zixuan</first><last>Gan</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>377–382</pages>
       <abstract>Attention-based neural machine translation (NMT) models selectively focus on specific source positions to produce a translation, which brings significant improvements over pure encoder-decoder sequence-to-sequence models. This work investigates NMT while replacing the attention component. We study a neural hidden Markov model (HMM) consisting of neural network-based alignment and lexicon models, which are trained jointly using the forward-backward algorithm. We show that the attention component can be effectively replaced by the neural network alignment model and the neural HMM approach is able to provide comparable performance with the state-of-the-art attention-based models on the WMT 2017 German↔English and Chinese→English translation tasks.</abstract>
       <url hash="b850755f">P18-2060</url>
@@ -4300,7 +4300,7 @@
       <author><first>Nikola</first><last>Ljubešić</last></author>
       <author><first>Ian</first><last>Matroos</last></author>
       <author><first>Malvina</first><last>Nissim</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>383–389</pages>
       <abstract>Gender prediction has typically focused on lexical and social network features, yielding good performance, but making systems highly language-, topic-, and platform dependent. Cross-lingual embeddings circumvent some of these limitations, but capture gender-specific style less. We propose an alternative: bleaching text, i.e., transforming lexical strings into more abstract features. This study provides evidence that such features allow for better transfer across languages. Moreover, we present a first study on the ability of humans to perform cross-lingual gender prediction. We find that human predictive power proves similar to that of our bleached models, and both perform better than lexical models.</abstract>
       <url hash="0f0cf0b8">P18-2061</url>
@@ -4324,8 +4324,8 @@
     <paper id="63">
       <title>Neural Cross-Lingual Coreference Resolution And Its Application To Entity Linking</title>
       <author><first>Gourab</first><last>Kundu</last></author>
-      <author><first>Avi</first><last>Sil</last></author>
-      <author><first>Radu</first><last>Florian</last></author>
+      <author id="avirup-sil"><first>Avi</first><last>Sil</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
       <author><first>Wael</first><last>Hamza</last></author>
       <pages>395–400</pages>
       <abstract>We propose an entity-centric neural crosslingual coreference model that builds on multi-lingual embeddings and language independent features. We perform both intrinsic and extrinsic evaluations of our model. In the intrinsic evaluation, we show that our model, when trained on English and tested on Chinese and Spanish, achieves competitive results to the models trained directly on Chinese and Spanish respectively. In the extrinsic evaluation, we show that our English model helps achieve superior entity linking accuracy on Chinese and Spanish test sets than the top 2015 TAC system without using any annotated data from Chinese or Spanish.</abstract>
@@ -4338,7 +4338,7 @@
       <title>Judicious Selection of Training Data in Assisting Language for Multilingual Neural <fixed-case>NER</fixed-case></title>
       <author><first>Rudra</first><last>Murthy</last></author>
       <author><first>Anoop</first><last>Kunchukuttan</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>401–406</pages>
       <abstract>Multilingual learning for Neural Named Entity Recognition (NNER) involves jointly training a neural network for multiple languages. Typically, the goal is improving the NER performance of one of the languages (the primary language) using the other assisting languages. We show that the divergence in the tag distributions of the common named entities between the primary and assisting languages can reduce the effectiveness of multilingual learning. To alleviate this problem, we propose a metric based on symmetric KL divergence to filter out the highly divergent training instances in the assisting language. We empirically show that our data selection strategy improves NER performance in many languages, including those with very limited training data.</abstract>
       <url hash="ea6d8d7a">P18-2064</url>
@@ -4364,7 +4364,7 @@
       <author><first>Yue</first><last>Zhao</last></author>
       <author><first>Xiaolong</first><last>Jin</last></author>
       <author><first>Yuanzhuo</first><last>Wang</last></author>
-      <author><first>Xueqi</first><last>Cheng</last></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last></author>
       <pages>414–419</pages>
       <abstract>Document-level information is very important for event detection even at sentence level. In this paper, we propose a novel Document Embedding Enhanced Bi-RNN model, called DEEB-RNN, to detect events in sentences. This model first learns event detection oriented embeddings of documents through a hierarchical and supervised attention based RNN, which pays word-level attention to event triggers and sentence-level attention to those sentences containing events. It then uses the learned document embedding to enhance another bidirectional RNN model to identify event triggers and their types in sentences. Through experiments on the ACE-2005 dataset, we demonstrate the effectiveness and merits of the proposed DEEB-RNN model via comparison with state-of-the-art methods.</abstract>
       <url hash="ea9082d4">P18-2066</url>
@@ -4402,7 +4402,7 @@
       <title>Large-Scale Multi-Domain Belief Tracking with Knowledge Sharing</title>
       <author><first>Osman</first><last>Ramadan</last></author>
       <author><first>Paweł</first><last>Budzianowski</last></author>
-      <author><first>Milica</first><last>Gašić</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gašić</last></author>
       <pages>432–437</pages>
       <abstract>Robust dialogue belief tracking is a key component in maintaining good quality dialogue systems. The tasks that dialogue systems are trying to solve are becoming increasingly complex, requiring scalability to multi-domain, semantically rich dialogues. However, most current approaches have difficulty scaling up with domains because of the dependency of the model parameters on the dialogue ontology. In this paper, a novel approach is introduced that fully utilizes semantic similarity between dialogue utterances and the ontology terms, allowing the information to be shared across domains. The evaluation is performed on a recently collected multi-domain dialogues dataset, one order of magnitude larger than currently available corpora. Our model demonstrates great capability in handling multi-domain dialogues, simultaneously outperforming existing state-of-the-art models in single-domain dialogue tracking tasks.</abstract>
       <url hash="a682c0c7">P18-2069</url>
@@ -4444,13 +4444,13 @@
       <author><first>Mark</first><last>Johnson</last></author>
       <author><first>Peter</first><last>Anderson</last></author>
       <author><first>Mark</first><last>Dras</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>450–455</pages>
       <abstract>Because obtaining training data is often the most difficult part of an NLP or ML project, we develop methods for predicting how much data is required to achieve a desired test accuracy by extrapolating results from models trained on a small pilot training dataset. We model how accuracy varies as a function of training size on subsets of the pilot data, and use that model to predict how much training data would be required to achieve the desired accuracy. We introduce a new performance extrapolation task to evaluate how well different extrapolations predict accuracy on larger training sets. We show that details of hyperparameter optimisation and the extrapolation models can have dramatic effects in a document classification task. We believe this is an important first step in developing methods for estimating the resources required to meet specific engineering performance targets.</abstract>
       <url hash="6bd188cb">P18-2072</url>
       <attachment type="note" hash="8515f885">P18-2072.Notes.pdf</attachment>
-      <video href="P18-2072.mp4"/>
       <attachment type="presentation" hash="3f05a72b">P18-2072.Presentation.pdf</attachment>
+      <video href="P18-2072.mp4"/>
       <doi>10.18653/v1/P18-2072</doi>
       <bibkey>johnson-etal-2018-predicting</bibkey>
     </paper>
@@ -4471,13 +4471,13 @@
     <paper id="74">
       <title>Do Neural Network Cross-Modal Mappings Really Bridge Modalities?</title>
       <author><first>Guillem</first><last>Collell</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>462–468</pages>
       <abstract>Feed-forward networks are widely used in cross-modal applications to bridge modalities by mapping distributed vectors of one modality to the other, or to a shared space. The predicted vectors are then used to perform e.g., retrieval or labeling. Thus, the success of the whole system relies on the ability of the mapping to make the neighborhood structure (i.e., the pairwise similarities) of the predicted vectors akin to that of the target vectors. However, whether this is achieved has not been investigated yet. Here, we propose a new similarity measure and two ad hoc experiments to shed light on this issue. In three cross-modal benchmarks we learn a large number of language-to-vision and vision-to-language neural network mappings (up to five layers) using a rich diversity of image and text features and loss functions. Our results reveal that, surprisingly, the neighborhood structure of the predicted vectors consistently resembles more that of the input vectors than that of the target vectors. In a second experiment, we further show that untrained nets do not significantly disrupt the neighborhood (i.e., semantic) structure of the input vectors.</abstract>
       <url hash="74fb30e1">P18-2074</url>
       <attachment type="note" hash="bf4b518e">P18-2074.Notes.pdf</attachment>
-      <video href="P18-2074.mp4"/>
       <attachment type="presentation" hash="323b1b15">P18-2074.Presentation.pdf</attachment>
+      <video href="P18-2074.mp4"/>
       <doi>10.18653/v1/P18-2074</doi>
       <bibkey>collell-moens-2018-neural</bibkey>
     </paper>
@@ -4509,7 +4509,7 @@
     <paper id="77">
       <title>Simpler but More Accurate Semantic Dependency Parsing</title>
       <author><first>Timothy</first><last>Dozat</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>484–490</pages>
       <abstract>While syntactic dependency annotations concentrate on the surface or functional structure of a sentence, semantic dependency annotations aim to capture between-word relationships that are more closely related to the meaning of a sentence, using graph-structured representations. We extend the LSTM-based syntactic parser of Dozat and Manning (2017) to train on and generate these graph structures. The resulting system on its own achieves state-of-the-art performance, beating the previous, substantially more complex state-of-the-art system by 0.6% labeled F1. Adding linguistically richer input representations pushes the margin even higher, allowing us to beat it by 1.9% labeled F1.</abstract>
       <url hash="45c6e9dd">P18-2077</url>
@@ -4521,7 +4521,7 @@
       <title>Simplified Abugidas</title>
       <author><first>Chenchen</first><last>Ding</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>491–495</pages>
       <abstract>An abugida is a writing system where the consonant letters represent syllables with a default vowel and other vowels are denoted by diacritics. We investigate the feasibility of recovering the original text written in an abugida after omitting subordinate diacritics and merging consonant letters with similar phonetic values. This is crucial for developing more efficient input methods by reducing the complexity in abugidas. Four abugidas in the southern Brahmic family, i.e., Thai, Burmese, Khmer, and Lao, were studied using a newswire 20,000-sentence dataset. We compared the recovery performance of a support vector machine and an LSTM-based recurrent neural network, finding that the abugida graphemes could be recovered with 94% - 97% accuracy at the top-1 level and 98% - 99% at the top-4 level, even after omitting most diacritics (10 - 30 types) and merging the remaining 30 - 50 characters into 21 graphemes.</abstract>
       <url hash="d4b5e020">P18-2078</url>
@@ -4546,7 +4546,7 @@
     <paper id="80">
       <title>Automated essay scoring with string kernels and word embeddings</title>
       <author><first>Mădălina</first><last>Cozma</last></author>
-      <author><first>Andrei</first><last>Butnaru</last></author>
+      <author id="andrei-butnaru"><first>Andrei</first><last>Butnaru</last></author>
       <author><first>Radu Tudor</first><last>Ionescu</last></author>
       <pages>503–509</pages>
       <abstract>In this work, we present an approach based on combining string kernels and word embeddings for automatic essay scoring. String kernels capture the similarity among strings based on counting common character n-grams, which are a low-level yet powerful type of feature, demonstrating state-of-the-art results in various text classification tasks such as Arabic dialect identification or native language identification. To our best knowledge, we are the first to apply string kernels to automatically score essays. We are also the first to combine them with a high-level semantic feature representation, namely the bag-of-super-word-embeddings. We report the best performance on the Automated Student Assessment Prize data set, in both in-domain and cross-domain settings, surpassing recent state-of-the-art deep learning approaches.</abstract>
@@ -4615,7 +4615,7 @@
       <title>End-Task Oriented Textual Entailment via Deep Explorations of Inter-Sentence Interactions</title>
       <author><first>Wenpeng</first><last>Yin</last></author>
       <author><first>Dan</first><last>Roth</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>540–545</pages>
       <abstract>This work deals with SciTail, a natural entailment challenge derived from a multi-choice question answering problem. The premises and hypotheses in SciTail were generated with no awareness of each other, and did not specifically aim at the entailment task. This makes it more challenging than other entailment data sets and more directly useful to the end-task – question answering. We propose DEISTE (deep explorations of inter-sentence interactions for textual entailment) for this entailment task. Given word-to-word interactions between the premise-hypothesis pair (P, H), DEISTE consists of: (i) a parameter-dynamic convolution to make important words in P and H play a dominant role in learnt representations; and (ii) a position-aware attentive convolution to encode the representation and position information of the aligned word pairs. Experiments show that DEISTE gets ≈5% improvement over prior state of the art and that the pretrained DEISTE on SciTail generalizes well on RTE-5.</abstract>
       <url hash="8c822319">P18-2086</url>
@@ -4664,7 +4664,7 @@
     <paper id="90">
       <title><fixed-case>GNEG</fixed-case>: Graph-Based Negative Sampling for word2vec</title>
       <author><first>Zheng</first><last>Zhang</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <pages>566–571</pages>
       <abstract>Negative sampling is an important component in word2vec for distributed word representation learning. We hypothesize that taking into account global, corpus-level information and generating a different noise distribution for each target word better satisfies the requirements of negative examples for each training word than the original frequency-based distribution. In this purpose we pre-compute word co-occurrence statistics from the corpus and apply to it network algorithms such as random walk. We test this hypothesis through a set of experiments whose results show that our approach boosts the word analogy task by about 5% and improves the performance on word similarity tasks by about 1% compared to the skip-gram negative sampling baseline.</abstract>
       <url hash="58fa9761">P18-2090</url>
@@ -4716,7 +4716,7 @@
       <author><first>Hu</first><last>Xu</last></author>
       <author><first>Bing</first><last>Liu</last></author>
       <author><first>Lei</first><last>Shu</last></author>
-      <author><first>Philip S.</first><last>Yu</last></author>
+      <author id="philip-s-yu"><first>Philip S.</first><last>Yu</last></author>
       <pages>592–598</pages>
       <abstract>One key task of fine-grained sentiment analysis of product reviews is to extract product aspects or features that users have expressed opinions on. This paper focuses on supervised aspect extraction using deep learning. Unlike other highly sophisticated supervised deep learning models, this paper proposes a novel and yet simple CNN model employing two types of pre-trained embeddings for aspect extraction: general-purpose embeddings and domain-specific embeddings. Without using any additional supervision, this model achieves surprisingly good results, outperforming state-of-the-art sophisticated existing methods. To our knowledge, this paper is the first to report such double embeddings based CNN model for aspect extraction and achieve very good results.</abstract>
       <url hash="10fa390c">P18-2094</url>
@@ -4727,7 +4727,7 @@
     <paper id="95">
       <title>Will it Blend? Blending Weak and Strong Labeled Data in a Neural Network for Argumentation Mining</title>
       <author><first>Eyal</first><last>Shnarch</last></author>
-      <author><first>Carlos</first><last>Alzate</last></author>
+      <author id="carlos-alzate"><first>Carlos</first><last>Alzate</last></author>
       <author><first>Lena</first><last>Dankin</last></author>
       <author><first>Martin</first><last>Gleize</last></author>
       <author><first>Yufang</first><last>Hou</last></author>
@@ -4851,7 +4851,7 @@
       <title>Adaptive Knowledge Sharing in Multi-Task Learning: Improving Low-Resource Neural Machine Translation</title>
       <author><first>Poorya</first><last>Zaremoodi</last></author>
       <author><first>Wray</first><last>Buntine</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>656–661</pages>
       <abstract>Neural Machine Translation (NMT) is notorious for its need for large amounts of bilingual data. An effective approach to compensate for this requirement is Multi-Task Learning (MTL) to leverage different linguistic resources as a source of inductive bias. Current MTL architectures are based on the Seq2Seq transduction, and (partially) share different components of the models among the tasks. However, this MTL approach often suffers from task interference and is not able to fully capture commonalities among subsets of tasks. We address this issue by extending the recurrent units with multiple “blocks” along with a trainable “routing network”. The routing network enables adaptive collaboration by dynamic sharing of blocks conditioned on the task at hand, input, and model state. Empirical evaluation of two low-resource translation tasks, English to Vietnamese and Farsi, show +1 BLEU score improvements compared to strong baselines.</abstract>
       <url hash="d7969fe1">P18-2104</url>
@@ -4879,7 +4879,7 @@
       <title>Polyglot Semantic Role Labeling</title>
       <author><first>Phoebe</first><last>Mulcaire</last></author>
       <author><first>Swabha</first><last>Swayamdipta</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>667–672</pages>
       <abstract>Previous approaches to multilingual semantic dependency parsing treat languages independently, without exploiting the similarities between semantic structures across languages. We experiment with a new approach where we combine resources from different languages in the CoNLL 2009 shared task to build a single polyglot semantic dependency parser. Notwithstanding the absence of parallel data, and the dissimilarity in annotations between languages, our approach results in improvement in parsing performance on several languages over a monolingual baseline. Analysis of the polyglot models’ performance provides a new understanding of the similarities and differences between languages in the shared task.</abstract>
       <url hash="4478c6c1">P18-2106</url>
@@ -4931,7 +4931,7 @@
     <paper id="110">
       <title>Examining Temporality in Document Classification</title>
       <author><first>Xiaolei</first><last>Huang</last></author>
-      <author><first>Michael J.</first><last>Paul</last></author>
+      <author id="michael-paul"><first>Michael J.</first><last>Paul</last></author>
       <pages>694–699</pages>
       <abstract>Many corpora span broad periods of time. Language processing models trained during one time period may not work well in future time periods, and the best model may depend on specific times of year (e.g., people might describe hotels differently in reviews during the winter versus the summer). This study investigates how document classifiers trained on documents from certain time intervals perform on documents from other time intervals, considering both seasonal intervals (intervals that repeat across years, e.g., winter) and non-seasonal intervals (e.g., specific years). We show experimentally that classification performance varies over time, and that performance can be improved by using a standard domain adaptation approach to adjust for changes in time.</abstract>
       <url hash="3966b4ba">P18-2110</url>
@@ -4943,7 +4943,7 @@
     <paper id="111">
       <title>Personalized Language Model for Query Auto-Completion</title>
       <author><first>Aaron</first><last>Jaech</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
       <pages>700–705</pages>
       <abstract>Query auto-completion is a search engine feature whereby the system suggests completed queries as the user types. Recently, the use of a recurrent neural network language model was suggested as a method of generating query completions. We show how an adaptable language model can be used to generate personalized completions and how the model can use online updating to make predictions for users not seen during training. The personalized predictions are significantly better than a baseline that uses no user information.</abstract>
       <url hash="327a60d3">P18-2111</url>
@@ -4966,14 +4966,14 @@
     </paper>
     <paper id="113">
       <title>Learning Simplifications for Specific Target Audiences</title>
-      <author><first>Carolina</first><last>Scarton</last></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>712–718</pages>
       <abstract>Text simplification (TS) is a monolingual text-to-text transformation task where an original (complex) text is transformed into a target (simpler) text. Most recent work is based on sequence-to-sequence neural models similar to those used for machine translation (MT). Different from MT, TS data comprises more elaborate transformations, such as sentence splitting. It can also contain multiple simplifications of the same original text targeting different audiences, such as school grade levels. We explore these two features of TS to build models tailored for specific grade levels. Our approach uses a standard sequence-to-sequence architecture where the original sequence is annotated with information about the target audience and/or the (predicted) type of simplification operation. We show that it outperforms state-of-the-art TS approaches (up to 3 and 12 BLEU and SARI points, respectively), including when training data for the specific complex-simple combination of grade levels is not available, i.e. zero-shot learning.</abstract>
       <url hash="3b8ffe03">P18-2113</url>
       <attachment type="note" hash="9413a55f">P18-2113.Notes.pdf</attachment>
-      <video href="P18-2113.mp4"/>
       <attachment type="presentation" hash="8a7c4d46">P18-2113.Presentation.pdf</attachment>
+      <video href="P18-2113.mp4"/>
       <doi>10.18653/v1/P18-2113</doi>
       <bibkey>scarton-specia-2018-learning</bibkey>
     </paper>
@@ -5008,7 +5008,7 @@
       <author><first>Omer</first><last>Levy</last></author>
       <author><first>Kenton</first><last>Lee</last></author>
       <author><first>Nicholas</first><last>FitzGerald</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>732–739</pages>
       <abstract>LSTMs were introduced to combat vanishing gradients in simple RNNs by augmenting them with gated additive recurrent connections. We present an alternative view to explain the success of LSTMs: the gates themselves are versatile recurrent models that provide more representational power than previously appreciated. We do this by decoupling the LSTM’s gates from the embedded simple RNN, producing a new class of RNNs where the recurrence computes an element-wise weighted sum of context-independent functions of the input. Ablations on a range of problems demonstrate that the gating mechanism alone performs as well as an LSTM in most settings, strongly suggesting that the gates are doing much more in practice than just alleviating vanishing gradients.</abstract>
       <url hash="ca252f45">P18-2116</url>
@@ -5047,7 +5047,7 @@
     <paper id="119">
       <title>Tackling the Story Ending Biases in The Story Cloze Test</title>
       <author><first>Rishi</first><last>Sharma</last></author>
-      <author><first>James</first><last>Allen</last></author>
+      <author id="james-allen"><first>James</first><last>Allen</last></author>
       <author><first>Omid</first><last>Bakhshandeh</last></author>
       <author><first>Nasrin</first><last>Mostafazadeh</last></author>
       <pages>752–757</pages>
@@ -5074,15 +5074,15 @@
     </paper>
     <paper id="121">
       <title>Pretraining Sentiment Classifiers with Unlabeled Dialog Data</title>
-      <author><first>Toru</first><last>Shimizu</last></author>
+      <author id="tohru-shimizu"><first>Toru</first><last>Shimizu</last></author>
       <author><first>Nobuyuki</first><last>Shimizu</last></author>
       <author><first>Hayato</first><last>Kobayashi</last></author>
       <pages>764–770</pages>
       <abstract>The huge cost of creating labeled training data is a common problem for supervised learning tasks such as sentiment classification. Recent studies showed that pretraining with unlabeled data via a language model can improve the performance of classification models. In this paper, we take the concept a step further by using a conditional language model, instead of a language model. Specifically, we address a sentiment classification task for a tweet analysis service as a case study and propose a pretraining strategy with unlabeled dialog data (tweet-reply pairs) via an encoder-decoder model. Experimental results show that our strategy can improve the performance of sentiment classifiers and outperform several state-of-the-art strategies including language model pretraining.</abstract>
       <url hash="21ab1cd9">P18-2121</url>
       <attachment type="note" hash="17c6b5cb">P18-2121.Notes.pdf</attachment>
-      <video href="P18-2121.mp4"/>
       <attachment type="presentation" hash="75b31d7f">P18-2121.Presentation.pdf</attachment>
+      <video href="P18-2121.mp4"/>
       <doi>10.18653/v1/P18-2121</doi>
       <bibkey>shimizu-etal-2018-pretraining</bibkey>
     </paper>
@@ -5102,7 +5102,7 @@
     <paper id="123">
       <title>Cross-Target Stance Classification with Self-Attention Networks</title>
       <author><first>Chang</first><last>Xu</last></author>
-      <author><first>Cécile</first><last>Paris</last></author>
+      <author id="cecile-paris"><first>Cécile</first><last>Paris</last></author>
       <author><first>Surya</first><last>Nepal</last></author>
       <author><first>Ross</first><last>Sparks</last></author>
       <pages>778–783</pages>
@@ -5147,7 +5147,7 @@
       <editor><first>Jeniya</first><last>Tabassum</last></editor>
       <editor><first>Rob</first><last>Voigt</last></editor>
       <editor><first>Wanxiang</first><last>Che</last></editor>
-      <editor><first>Marie-Catherine</first><last>de Marneffe</last></editor>
+      <editor id="marie-catherine-de-marneffe"><first>Marie-Catherine</first><last>de Marneffe</last></editor>
       <editor><first>Malvina</first><last>Nissim</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Melbourne, Australia</address>
@@ -5212,7 +5212,7 @@
     </paper>
     <paper id="6">
       <title>Recognizing Complex Entity Mentions: A Review and Future Directions</title>
-      <author><first>Xiang</first><last>Dai</last></author>
+      <author id="xiang-dai"><first>Xiang</first><last>Dai</last></author>
       <pages>37–44</pages>
       <abstract>Standard named entity recognizers can effectively recognize entity mentions that consist of contiguous tokens and do not overlap with each other. However, in practice, there are many domains, such as the biomedical domain, in which there are nested, overlapping, and discontinuous entity mentions. These complex mentions cannot be directly recognized by conventional sequence tagging models because they may break the assumptions based on which sequence tagging techniques are built. We review the existing methods which are revised to tackle complex entity mentions and categorize them as tokenlevel and sentence-level approaches. We then identify the research gap, and discuss some directions that we are exploring.</abstract>
       <url hash="ac6b2a5a">P18-3006</url>
@@ -5332,7 +5332,7 @@
     <paper id="17">
       <title>Exploring Chunk Based Templates for Generating a subset of <fixed-case>E</fixed-case>nglish Text</title>
       <author><first>Nikhilesh</first><last>Bhatnagar</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <author><first>Radhika</first><last>Mamidi</last></author>
       <pages>120–126</pages>
       <abstract>Natural Language Generation (NLG) is a research task which addresses the automatic generation of natural language text representative of an input non-linguistic collection of knowledge. In this paper, we address the task of the generation of grammatical sentences in an isolated context given a partial bag-of-words which the generated sentence must contain. We view the task as a search problem (a problem of choice) involving combinations of smaller chunk based templates extracted from a training corpus to construct a complete sentence. To achieve that, we propose a fitness function which we use in conjunction with an evolutionary algorithm as the search procedure to arrive at a potentially grammatical sentence (modeled by the fitness score) which satisfies the input constraints.</abstract>
@@ -5353,7 +5353,7 @@
     </paper>
     <paper id="19">
       <title>Alignment Analysis of Sequential Segmentation of Lexicons to Improve Automatic Cognate Detection</title>
-      <author><first>Pranav</first><last>A</last></author>
+      <author id="pranav-a"><first>Pranav</first><last>A</last></author>
       <pages>134–140</pages>
       <abstract>Ranking functions in information retrieval are often used in search engines to extract the relevant answers to the query. This paper makes use of this notion of information retrieval and applies onto the problem domain of cognate detection. The main contributions of this paper are: (1) positional tokenization, which incorporates the sequential notion; (2) graphical error modelling, which calculates the morphological shifts. The current research work only distinguishes whether a pair of words are cognates or not. However, we also study if we could predict a possible cognate from the given input. Our study shows that language modelling based retrieval functions with positional tokenization and error modelling tend to give better results than competing baselines.</abstract>
       <url hash="957308f1">P18-3019</url>
@@ -5372,7 +5372,7 @@
     <paper id="21">
       <title>Automatic Spelling Correction for Resource-Scarce Languages using Deep Learning</title>
       <author><first>Pravallika</first><last>Etoori</last></author>
-      <author><first>Manoj</first><last>Chinnakotla</last></author>
+      <author id="manoj-chinnakotla"><first>Manoj</first><last>Chinnakotla</last></author>
       <author><first>Radhika</first><last>Mamidi</last></author>
       <pages>146–152</pages>
       <abstract>Spelling correction is a well-known task in Natural Language Processing (NLP). Automatic spelling correction is important for many NLP applications like web search engines, text summarization, sentiment analysis etc. Most approaches use parallel data of noisy and correct word mappings from different sources as training data for automatic spelling correction. Indic languages are resource-scarce and do not have such parallel data due to low volume of queries and non-existence of such prior implementations. In this paper, we show how to build an automatic spelling corrector for resource-scarce languages. We propose a sequence-to-sequence deep learning model which trains end-to-end. We perform experiments on synthetic datasets created for Indic languages, Hindi and Telugu, by incorporating the spelling mistakes committed at character level. A comparative evaluation shows that our model is competitive with the existing spell checking and correction techniques for Indic languages.</abstract>
@@ -5385,7 +5385,7 @@
       <author><first>Payal</first><last>Khullar</last></author>
       <author><first>Konigari</first><last>Rachna</last></author>
       <author><first>Mukul</first><last>Hase</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>153–158</pages>
       <abstract>This paper presents a system that automatically generates multiple, natural language questions using relative pronouns and relative adverbs from complex English sentences. Our system is syntax-based, runs on dependency parse information of a single-sentence input, and achieves high accuracy in terms of syntactic correctness, semantic adequacy, fluency and uniqueness. One of the key advantages of our system, in comparison with other rule-based approaches, is that we nearly eliminate the chances of getting a wrong wh-word in the generated question, by fetching the requisite wh-word from the input sentence itself. Depending upon the input, we generate both factoid and descriptive type questions. To the best of our information, the exploitation of wh-pronouns and wh-adverbs to generate questions is novel in the Automatic Question Generation task.</abstract>
       <url hash="8e344a83">P18-3022</url>
@@ -5428,7 +5428,7 @@
       <title><fixed-case>N</fixed-case>ovel<fixed-case>P</fixed-case>erspective: Identifying Point of View Characters</title>
       <author><first>Lyndon</first><last>White</last></author>
       <author><first>Roberto</first><last>Togneri</last></author>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last></author>
+      <author><first>Wei</first><last>Liu</last></author>
       <author><first>Mohammed</first><last>Bennamoun</last></author>
       <pages>7–12</pages>
       <abstract>We present NovelPerspective: a tool to allow consumers to subset their digital literature, based on point of view (POV) character. Many novels have multiple main characters each with their own storyline running in parallel. A well-known example is George R. R. Martin’s novel: “A Game of Thrones”, and others from that series. Our tool detects the main character that each section is from the POV of, and allows the user to generate a new ebook with only those sections. This gives consumers new options in how they consume their media; allowing them to pursue the storylines sequentially, or skip chapters about characters they find boring. We present two heuristic-based baselines, and two machine learning based methods for the detection of the main character.</abstract>
@@ -5464,8 +5464,8 @@
       <author><first>Pasquale</first><last>Minervini</last></author>
       <author><first>Isabelle</first><last>Augenstein</last></author>
       <author><first>Johannes</first><last>Welbl</last></author>
-      <author><first>Tim</first><last>Rocktäschel</last></author>
-      <author><first>Matko</first><last>Bošnjak</last></author>
+      <author id="tim-rocktaschel"><first>Tim</first><last>Rocktäschel</last></author>
+      <author id="matko-bosnjak"><first>Matko</first><last>Bošnjak</last></author>
       <author><first>Jeff</first><last>Mitchell</last></author>
       <author><first>Thomas</first><last>Demeester</last></author>
       <author><first>Tim</first><last>Dettmers</last></author>
@@ -5491,9 +5491,9 @@
     </paper>
     <paper id="7">
       <title><fixed-case>N</fixed-case>ext<fixed-case>G</fixed-case>en <fixed-case>AML</fixed-case>: Distributed Deep Learning based Language Technologies to Augment Anti Money Laundering Investigation</title>
-      <author><first>Jingguang</first><last>Han</last></author>
+      <author id="jingguang-han"><first>Jingguang</first><last>Han</last></author>
       <author><first>Utsab</first><last>Barman</last></author>
-      <author><first>Jeremiah</first><last>Hayes</last></author>
+      <author id="jer-hayes"><first>Jeremiah</first><last>Hayes</last></author>
       <author><first>Jinhua</first><last>Du</last></author>
       <author><first>Edward</first><last>Burgin</last></author>
       <author><first>Dadong</first><last>Wan</last></author>
@@ -5532,7 +5532,7 @@
       <title>Sentence Suggestion of <fixed-case>J</fixed-case>apanese Functional Expressions for <fixed-case>C</fixed-case>hinese-speaking Learners</title>
       <author><first>Jun</first><last>Liu</last></author>
       <author><first>Hiroyuki</first><last>Shindo</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>56–61</pages>
       <abstract>We present a computer-assisted learning system, Jastudy, which is particularly designed for Chinese-speaking learners of Japanese as a second language (JSL) to learn Japanese functional expressions with suggestion of appropriate example sentences. The system automatically recognizes Japanese functional expressions using a free Japanese morphological analyzer MeCab, which is retrained on a new Conditional Random Fields (CRF) model. In order to select appropriate example sentences, we apply a pairwise-based machine learning tool, Support Vector Machine for Ranking (SVMrank) to estimate the complexity of the example sentences using Japanese–Chinese homographs as an important feature. In addition, we cluster the example sentences that contain Japanese functional expressions with two or more meanings and usages, based on part-of-speech, conjugation forms of verbs and semantic attributes, using the K-means clustering algorithm in Scikit-Learn. Experimental results demonstrate the effectiveness of our approach.</abstract>
       <url hash="92f03912">P18-4010</url>
@@ -5553,12 +5553,12 @@
     </paper>
     <paper id="12">
       <title><fixed-case>SANTO</fixed-case>: A Web-based Annotation Tool for Ontology-driven Slot Filling</title>
-      <author><first>Matthias</first><last>Hartung</last></author>
+      <author id="matthias-hartung"><first>Matthias</first><last>Hartung</last></author>
       <author><first>Hendrik</first><last>ter Horst</last></author>
       <author><first>Frank</first><last>Grimm</last></author>
       <author><first>Tim</first><last>Diekmann</last></author>
       <author><first>Roman</first><last>Klinger</last></author>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <pages>68–73</pages>
       <abstract>Supervised machine learning algorithms require training data whose generation for complex relation extraction tasks tends to be difficult. Being optimized for relation extraction at sentence level, many annotation tools lack in facilitating the annotation of relational structures that are widely spread across the text. This leads to non-intuitive and cumbersome visualizations, making the annotation process unnecessarily time-consuming. We propose SANTO, an easy-to-use, domain-adaptive annotation tool specialized for complex slot filling tasks which may involve problems of cardinality and referential grounding. The web-based architecture enables fast and clearly structured annotation for multiple users in parallel. Relational structures are formulated as templates following the conceptualization of an underlying ontology. Further, import and export procedures of standard formats enable interoperability with external sources and tools.</abstract>
       <url hash="54cd5626">P18-4012</url>
@@ -5598,16 +5598,16 @@
     </paper>
     <paper id="16">
       <title><fixed-case>S</fixed-case>cout<fixed-case>B</fixed-case>ot: A Dialogue System for Collaborative Navigation</title>
-      <author><first>Stephanie M.</first><last>Lukin</last></author>
+      <author id="stephanie-lukin"><first>Stephanie M.</first><last>Lukin</last></author>
       <author><first>Felix</first><last>Gervits</last></author>
-      <author><first>Cory J.</first><last>Hayes</last></author>
+      <author id="cory-hayes"><first>Cory J.</first><last>Hayes</last></author>
       <author><first>Pooja</first><last>Moolchandani</last></author>
       <author><first>Anton</first><last>Leuski</last></author>
       <author><first>John G.</first><last>Rogers III</last></author>
       <author><first>Carlos</first><last>Sanchez Amaro</last></author>
       <author><first>Matthew</first><last>Marge</last></author>
-      <author><first>Clare R.</first><last>Voss</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="clare-voss"><first>Clare R.</first><last>Voss</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <pages>93–98</pages>
       <abstract>ScoutBot is a dialogue interface to physical and simulated robots that supports collaborative exploration of environments. The demonstration will allow users to issue unconstrained spoken language commands to ScoutBot. ScoutBot will prompt for clarification if the user’s instruction needs additional input. It is trained on human-robot dialogue collected from Wizard-of-Oz experiments, where robot responses were initiated by a human wizard in previous interactions. The demonstration will show a simulated ground robot (Clearpath Jackal) in a simulated environment supported by ROS (Robot Operating System).</abstract>
       <url hash="68ce707b">P18-4016</url>
@@ -5618,8 +5618,8 @@
       <title>The <fixed-case>SUMMA</fixed-case> Platform: A Scalable Infrastructure for Multi-lingual Multi-media Monitoring</title>
       <author><first>Ulrich</first><last>Germann</last></author>
       <author><first>Renārs</first><last>Liepins</last></author>
-      <author><first>Guntis</first><last>Barzdins</last></author>
-      <author><first>Didzis</first><last>Gosko</last></author>
+      <author id="guntis-barzdins"><first>Guntis</first><last>Barzdins</last></author>
+      <author id="didzis-gosko"><first>Didzis</first><last>Gosko</last></author>
       <author><first>Sebastião</first><last>Miranda</last></author>
       <author><first>David</first><last>Nogueira</last></author>
       <pages>99–104</pages>
@@ -5661,7 +5661,7 @@
       <author><first>Ulrich</first><last>Germann</last></author>
       <author><first>Alham Fikri</first><last>Aji</last></author>
       <author><first>Nikolay</first><last>Bogoychev</last></author>
-      <author><first>André F. T.</first><last>Martins</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
       <author><first>Alexandra</first><last>Birch</last></author>
       <pages>116–121</pages>
       <abstract>We present Marian, an efficient and self-contained Neural Machine Translation framework with an integrated automatic differentiation engine based on dynamic computation graphs. Marian is written entirely in C++. We describe the design of the encoder-decoder framework and demonstrate that a research-friendly toolkit can achieve high training and translation speed.</abstract>
@@ -5679,7 +5679,7 @@
       <author><first>Nickolay</first><last>Bushkov</last></author>
       <author><first>Olga</first><last>Gureenkova</last></author>
       <author><first>Taras</first><last>Khakhulin</last></author>
-      <author><first>Yuri</first><last>Kuratov</last></author>
+      <author id="yurii-kuratov"><first>Yuri</first><last>Kuratov</last></author>
       <author><first>Denis</first><last>Kuznetsov</last></author>
       <author><first>Alexey</first><last>Litinsky</last></author>
       <author><first>Varvara</first><last>Logacheva</last></author>
@@ -5701,7 +5701,7 @@
       <title><fixed-case>RETURNN</fixed-case> as a Generic Flexible Neural Toolkit with Application to Translation and Speech Recognition</title>
       <author><first>Albert</first><last>Zeyer</last></author>
       <author><first>Tamer</first><last>Alkhouli</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>128–133</pages>
       <abstract>We compare the fast training and decoding speed of RETURNN of attention models for translation, due to fast CUDA LSTM kernels, and a fast pure TensorFlow beam search decoder. We show that a layer-wise pretraining scheme for recurrent attention models gives over 1% BLEU improvement absolute and it allows to train deeper recurrent encoder networks. Promising preliminary results on max. expected BLEU training are presented. We are able to train state-of-the-art models for translation and end-to-end models for speech recognition and show results on WMT 2017 and Switchboard. The flexibility of RETURNN allows a fast research feedback loop to experiment with alternative architectures, and its generality allows to use it on a wide range of applications.</abstract>
       <url hash="d826553f">P18-4022</url>
@@ -5751,7 +5751,7 @@
     </frontmatter>
     <paper id="1">
       <title>100 Things You Always Wanted to Know about Semantics &amp; Pragmatics But Were Afraid to Ask</title>
-      <author><first>Emily M.</first><last>Bender</last></author>
+      <author id="emily-m-bender"><first>Emily M.</first><last>Bender</last></author>
       <pages>1</pages>
       <abstract>Meaning is a fundamental concept in Natural Language Processing (NLP), given its aim to build systems that mean what they say to you, and understand what you say to them. In order for NLP to scale beyond partial, task-specific solutions, it must be informed by what is known about how humans use language to express and understand communicative intents. The purpose of this tutorial is to present a selection of useful information about semantics and pragmatics, as understood in linguistics, in a way that’s accessible to and useful for NLP practitioners with minimal (or even no) prior training in linguistics. The tutorial content is based on a manuscript in progress I am co-authoring with Prof. Alex Lascarides of the University of Edinburgh.</abstract>
       <url hash="5ecf1a30">P18-5001</url>
@@ -5805,7 +5805,7 @@
       <author><first>Pradeep</first><last>Dasigi</last></author>
       <author><first>Srinivasan</first><last>Iyer</last></author>
       <author><first>Alane</first><last>Suhr</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>17–18</pages>
       <abstract>Semantic parsing, the study of translating natural language utterances into machine-executable programs, is a well-established research area and has applications in question answering, instruction following, voice assistants, and code generation. In the last two years, the models used for semantic parsing have changed dramatically with the introduction of neural encoder-decoder methods that allow us to rethink many of the previous assumptions underlying semantic parsing. We aim to inform those already interested in semantic parsing research of these new developments in the field, as well as introduce the topic as an exciting research area to those who are unfamiliar with it. Current approaches for neural semantic parsing share several similarities with neural machine translation, but the key difference between the two fields is that semantic parsing translates natural language into a formal language, while machine translation translates it into a different natural language. The formal language used in semantic parsing allows for constrained decoding, where the model is constrained to only produce outputs that are valid formal statements. We will describe the various approaches researchers have taken to do this. We will also discuss the choice of formal languages used by semantic parsers, and describe why much recent work has chosen to use standard programming languages instead of more linguistically-motivated representations. We will then describe a particularly challenging setting for semantic parsing, where there is additional context or interaction that the parser must take into account when translating natural language to formal language, and give an overview of recent work in this direction. Finally, we will introduce some tools available in AllenNLP for doing semantic parsing research.</abstract>
       <url hash="99ea8f14">P18-5006</url>
@@ -5825,10 +5825,10 @@
     </paper>
     <paper id="8">
       <title>Multi-lingual Entity Discovery and Linking</title>
-      <author><first>Avi</first><last>Sil</last></author>
+      <author id="avirup-sil"><first>Avi</first><last>Sil</last></author>
       <author><first>Heng</first><last>Ji</last></author>
       <author><first>Dan</first><last>Roth</last></author>
-      <author><first>Silviu-Petru</first><last>Cucerzan</last></author>
+      <author id="silviu-cucerzan"><first>Silviu-Petru</first><last>Cucerzan</last></author>
       <pages>22–29</pages>
       <abstract>The primary goals of this tutorial are to review the framework of cross-lingual EL and motivate it as a broad paradigm for the Information Extraction task. We will start by discussing the traditional EL techniques and metrics and address questions relevant to the adequacy of these to across domains and languages. We will then present more recent approaches such as Neural EL, discuss the basic building blocks of a state-of-the-art neural EL system and analyze some of the current results on English EL. We will then proceed to Cross-lingual EL and discuss methods that work across languages. In particular, we will discuss and compare multiple methods that make use of multi-lingual word embeddings. We will also present EL methods that work for both name tagging and linking in very low resource languages. Finally, we will discuss the uses of cross-lingual EL in a variety of applications like search engines and commercial product selling applications. Also, contrary to the 2014 EL tutorial, we will also focus on Entity Discovery which is an essential component of EL.</abstract>
       <url hash="de8ae3c5">P18-5008</url>
diff --git a/data/xml/P19.xml b/data/xml/P19.xml
index 3b36f126f1..2ed5396792 100644
--- a/data/xml/P19.xml
+++ b/data/xml/P19.xml
@@ -5,8 +5,8 @@
       <booktitle>Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics</booktitle>
       <url hash="8d8cdc49">P19-1</url>
       <editor><first>Anna</first><last>Korhonen</last></editor>
-      <editor><first>David</first><last>Traum</last></editor>
-      <editor><first>Lluís</first><last>Màrquez</last></editor>
+      <editor id="david-traum"><first>David</first><last>Traum</last></editor>
+      <editor id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Florence, Italy</address>
       <month>July</month>
@@ -67,7 +67,7 @@
       <title>Do Neural Dialog Systems Use the Conversation History Effectively? An Empirical Study</title>
       <author><first>Chinnadhurai</first><last>Sankar</last></author>
       <author><first>Sandeep</first><last>Subramanian</last></author>
-      <author><first>Chris</first><last>Pal</last></author>
+      <author id="christopher-pal"><first>Chris</first><last>Pal</last></author>
       <author><first>Sarath</first><last>Chandar</last></author>
       <author><first>Yoshua</first><last>Bengio</last></author>
       <pages>32–37</pages>
@@ -80,7 +80,7 @@
     <paper id="5">
       <title>Boosting Dialog Response Generation</title>
       <author><first>Wenchao</first><last>Du</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <pages>38–43</pages>
       <abstract>Neural models have become one of the most important approaches to dialog response generation. However, they still tend to generate the most common and generic responses in the corpus all the time. To address this problem, we designed an iterative training process and ensemble method based on boosting. We combined our method with different training and decoding paradigms as the base model, including mutual-information-based decoding and reward-augmented maximum likelihood learning. Empirical results show that our approach can significantly improve the diversity and relevance of the responses generated by all base models, backed by objective measurements and human evaluation.</abstract>
       <url hash="d672a301">P19-1005</url>
@@ -107,7 +107,7 @@
       <title>Semantic Parsing with Dual Learning</title>
       <author><first>Ruisheng</first><last>Cao</last></author>
       <author><first>Su</first><last>Zhu</last></author>
-      <author id="chen-liu"><first>Chen</first><last>Liu</last></author>
+      <author><first>Chen</first><last>Liu</last></author>
       <author><first>Jieyu</first><last>Li</last></author>
       <author><first>Kai</first><last>Yu</last></author>
       <pages>51–64</pages>
@@ -174,7 +174,7 @@
     </paper>
     <paper id="12">
       <title>The (Non-)Utility of Structural Features in <fixed-case>B</fixed-case>i<fixed-case>LSTM</fixed-case>-based Dependency Parsers</title>
-      <author><first>Agnieszka</first><last>Falenska</last></author>
+      <author id="agnieszka-falenska"><first>Agnieszka</first><last>Falenska</last></author>
       <author><first>Jonas</first><last>Kuhn</last></author>
       <pages>117–128</pages>
       <abstract>Classical non-neural dependency parsers put considerable effort on the design of feature functions. Especially, they benefit from information coming from structural features, such as features drawn from neighboring tokens in the dependency tree. In contrast, their BiLSTM-based successors achieve state-of-the-art performance without explicit information about the structural context. In this paper we aim to answer the question: How much structural context are the BiLSTM representations able to capture implicitly? We show that features drawn from partial subtrees become redundant when the BiLSTMs are used. We provide a deep insight into information flow in transition- and graph-based neural architectures to demonstrate where the implicit information comes from when the parsers make their decisions. Finally, with model ablations we demonstrate that the structural context is not only present in the models, but it significantly influences their performance.</abstract>
@@ -214,7 +214,7 @@
       <title>Massively Multilingual Transfer for <fixed-case>NER</fixed-case></title>
       <author><first>Afshin</first><last>Rahimi</last></author>
       <author><first>Yuan</first><last>Li</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>151–164</pages>
       <abstract>In cross-lingual transfer, NLP models over one or more source languages are applied to a low-resource target language. While most prior work has used a single source model or a few carefully selected models, here we consider a “massive” setting with many such models. This setting raises the problem of poor transfer, particularly from distant languages. We propose two techniques for modulating the transfer, suitable for zero-shot or few-shot learning, respectively. Evaluating on named entity recognition, we show that our techniques are much more effective than strong baselines, including standard ensembling, and our unsupervised method rivals oracle selection of the single best individual model.</abstract>
       <url hash="8d4849be">P19-1015</url>
@@ -258,7 +258,7 @@
       <author><first>Barun</first><last>Patra</last></author>
       <author><first>Joel Ruben Antony</first><last>Moniz</last></author>
       <author><first>Sarthak</first><last>Garg</last></author>
-      <author><first>Matthew R.</first><last>Gormley</last></author>
+      <author id="matthew-r-gormley"><first>Matthew R.</first><last>Gormley</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
       <pages>184–193</pages>
       <abstract>Recent work on bilingual lexicon induction (BLI) has frequently depended either on aligned bilingual lexicons or on distribution matching, often with an assumption about the isometry of the two spaces. We propose a technique to quantitatively estimate this assumption of the isometry between two embedding spaces and empirically show that this assumption weakens as the languages in question become increasingly etymologically distant. We then propose Bilingual Lexicon Induction with Semi-Supervision (BLISS) — a semi-supervised approach that relaxes the isometric assumption while leveraging both limited aligned bilingual lexicons and a larger set of unaligned word embeddings, as well as a novel hubness filtering technique. Our proposed method obtains state of the art results on 15 of 18 language pairs on the MUSE dataset, and does particularly well when the embedding spaces don’t appear to be isometric. In addition, we also show that adding supervision stabilizes the learning procedure, and is effective even with minimal supervision.</abstract>
@@ -271,8 +271,8 @@
     <paper id="19">
       <title>An Effective Approach to Unsupervised Machine Translation</title>
       <author><first>Mikel</first><last>Artetxe</last></author>
-      <author><first>Gorka</first><last>Labaka</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="gorka-labaka"><first>Gorka</first><last>Labaka</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <pages>194–203</pages>
       <abstract>While machine translation has traditionally relied on large amounts of parallel corpora, a recent research line has managed to train both Neural Machine Translation (NMT) and Statistical Machine Translation (SMT) systems using monolingual corpora only. In this paper, we identify and address several deficiencies of existing unsupervised SMT approaches by exploiting subword information, developing a theoretically well founded unsupervised tuning method, and incorporating a joint refinement procedure. Moreover, we use our improved SMT system to initialize a dual NMT model, which is further fine-tuned through on-the-fly back-translation. Together, we obtain large improvements over the previous state-of-the-art in unsupervised machine translation. For instance, we get 22.5 BLEU points in English-to-German WMT 2014, 5.5 points more than the previous best unsupervised system, and 0.5 points more than the (supervised) shared task winner back in 2014.</abstract>
       <url hash="d450f4bc">P19-1019</url>
@@ -308,7 +308,7 @@
       <title>Domain Adaptive Inference for Neural Machine Translation</title>
       <author><first>Danielle</first><last>Saunders</last></author>
       <author><first>Felix</first><last>Stahlberg</last></author>
-      <author><first>Adrià</first><last>de Gispert</last></author>
+      <author id="adria-de-gispert"><first>Adrià</first><last>de Gispert</last></author>
       <author id="bill-byrne"><first>Bill</first><last>Byrne</last></author>
       <pages>222–228</pages>
       <abstract>We investigate adaptive ensemble weighting for Neural Machine Translation, addressing the case of improving performance on a new and potentially unknown domain without sacrificing performance on the original domain. We adapt sequentially across two Spanish-English and three English-German tasks, comparing unregularized fine-tuning, L2 and Elastic Weight Consolidation. We then report a novel scheme for adaptive NMT ensemble decoding by extending Bayesian Interpolation with source information, and report strong improvements across test domains without access to the domain label.</abstract>
@@ -319,7 +319,7 @@
     </paper>
     <paper id="23">
       <title>Neural Relation Extraction for Knowledge Base Enrichment</title>
-      <author><first>Bayu Distiawan</first><last>Trisedya</last></author>
+      <author id="bayu-distiawan"><first>Bayu Distiawan</first><last>Trisedya</last></author>
       <author><first>Gerhard</first><last>Weikum</last></author>
       <author><first>Jianzhong</first><last>Qi</last></author>
       <author><first>Rui</first><last>Zhang</last></author>
@@ -403,7 +403,7 @@
       <title>You Only Need Attention to Traverse Trees</title>
       <author><first>Mahtab</first><last>Ahmed</last></author>
       <author><first>Muhammad Rifayat</first><last>Samee</last></author>
-      <author><first>Robert E.</first><last>Mercer</last></author>
+      <author id="robert-e-mercer"><first>Robert E.</first><last>Mercer</last></author>
       <pages>316–322</pages>
       <abstract>In recent NLP research, a topic of interest is universal sentence encoding, sentence representations that can be used in any supervised task. At the word sequence level, fully attention-based models suffer from two problems: a quadratic increase in memory consumption with respect to the sentence length and an inability to capture and use syntactic information. Recursive neural nets can extract very good syntactic information by traversing a tree structure. To this end, we propose Tree Transformer, a model that captures phrase level syntax for constituency trees as well as word-level dependencies for dependency trees by doing recursive traversal only with attention. Evaluation of this model on four tasks gets noteworthy results compared to the standard transformer and LSTM-based models as well as tree-structured LSTMs. Ablation studies to find whether positional information is inherently encoded in the trees and which type of attention is suitable for doing the recursive traversal are provided.</abstract>
       <url hash="7d82b714">P19-1030</url>
@@ -426,7 +426,7 @@
     <paper id="32">
       <title>Adaptive Attention Span in Transformers</title>
       <author><first>Sainbayar</first><last>Sukhbaatar</last></author>
-      <author><first>Edouard</first><last>Grave</last></author>
+      <author id="edouard-grave"><first>Edouard</first><last>Grave</last></author>
       <author><first>Piotr</first><last>Bojanowski</last></author>
       <author><first>Armand</first><last>Joulin</last></author>
       <pages>331–335</pages>
@@ -454,7 +454,7 @@
       <title>Automatic Domain Adaptation Outperforms Manual Domain Adaptation for Predicting Financial Outcomes</title>
       <author><first>Marina</first><last>Sedinkina</last></author>
       <author><first>Nikolas</first><last>Breitkopf</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>346–359</pages>
       <abstract>In this paper, we automatically create sentiment dictionaries for predicting financial outcomes. We compare three approaches: (i) manual adaptation of the domain-general dictionary H4N, (ii) automatic adaptation of H4N and (iii) a combination consisting of first manual, then automatic adaptation. In our experiments, we demonstrate that the automatically adapted sentiment dictionary outperforms the previous state of the art in predicting the financial outcomes excess return and volatility. In particular, automatic adaptation performs better than manual adaptation. In our analysis, we find that annotation based on an expert’s a priori belief about a word’s meaning can be incorrect – annotation should be performed based on the word’s contexts in the target domain instead.</abstract>
       <url hash="4c641b70">P19-1034</url>
@@ -552,7 +552,7 @@
     <paper id="43">
       <title>This Email Could Save Your Life: Introducing the Task of Email Subject Line Generation</title>
       <author><first>Rui</first><last>Zhang</last></author>
-      <author><first>Joel</first><last>Tetreault</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
       <pages>446–456</pages>
       <abstract>Given the overwhelming number of emails, an effective subject line becomes essential to better inform the recipient of the email’s content. In this paper, we propose and study the task of <i>email subject line generation</i>: automatically generating an email subject line from the email body. We create the first dataset for this task and find that email subject line generation favor extremely abstractive summary which differentiates it from news headline generation or news single document summarization. We then develop a novel deep learning method and compare it to several baselines as well as recent state-of-the-art text summarization systems. We also investigate the efficacy of several automatic metrics based on correlations with human judgments and propose a new automatic evaluation metric. Our system outperforms competitive baselines given both automatic and human evaluations. To our knowledge, this is the first work to tackle the problem of effective email subject line generation.</abstract>
       <url hash="577c59dd">P19-1043</url>
@@ -576,7 +576,7 @@
       <title>Adversarial Attention Modeling for Multi-dimensional Emotion Regression</title>
       <author><first>Suyang</first><last>Zhu</last></author>
       <author><first>Shoushan</first><last>Li</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>471–480</pages>
       <abstract>In this paper, we propose a neural network-based approach, namely Adversarial Attention Network, to the task of multi-dimensional emotion regression, which automatically rates multiple emotion dimension scores for an input text. Especially, to determine which words are valuable for a particular emotion dimension, an attention layer is trained to weight the words in an input sequence. Furthermore, adversarial training is employed between two attention layers to learn better word weights via a discriminator. In particular, a shared attention layer is incorporated to learn public word weights between two emotion dimensions. Empirical evaluation on the EMOBANK corpus shows that our approach achieves notable improvements in r-values on both EMOBANK Reader’s and Writer’s multi-dimensional emotion regression tasks in all domains over the state-of-the-art baselines.</abstract>
       <url hash="fc5d6b43">P19-1045</url>
@@ -597,7 +597,7 @@
     <paper id="47">
       <title>Modeling Financial Analysts’ Decision Making via the Pragmatics and Semantics of Earnings Calls</title>
       <author><first>Katherine</first><last>Keith</last></author>
-      <author><first>Amanda</first><last>Stent</last></author>
+      <author id="amanda-stent"><first>Amanda</first><last>Stent</last></author>
       <pages>493–503</pages>
       <abstract>Every fiscal quarter, companies hold earnings calls in which company executives respond to questions from analysts. After these calls, analysts often change their price target recommendations, which are used in equity re- search reports to help investors make deci- sions. In this paper, we examine analysts’ decision making behavior as it pertains to the language content of earnings calls. We identify a set of 20 pragmatic features of analysts’ questions which we correlate with analysts’ pre-call investor recommendations. We also analyze the degree to which semantic and pragmatic features from an earnings call complement market data in predicting analysts’ post-call changes in price targets. Our results show that earnings calls are moderately predictive of analysts’ decisions even though these decisions are influenced by a number of other factors including private communication with company executives and market conditions. A breakdown of model errors indicates disparate performance on calls from different market sectors.</abstract>
       <url hash="f3f0da25">P19-1047</url>
@@ -620,7 +620,7 @@
     <paper id="49">
       <title>Decompositional Argument Mining: A General Purpose Approach for Argument Graph Construction</title>
       <author><first>Debela</first><last>Gemechu</last></author>
-      <author><first>Chris</first><last>Reed</last></author>
+      <author id="chris-reed"><first>Chris</first><last>Reed</last></author>
       <pages>516–526</pages>
       <abstract>This work presents an approach decomposing propositions into four functional components and identify the patterns linking those components to determine argument structure. The entities addressed by a proposition are target concepts and the features selected to make a point about the target concepts are aspects. A line of reasoning is followed by providing evidence for the points made about the target concepts via aspects. Opinions on target concepts and opinions on aspects are used to support or attack the ideas expressed by target concepts and aspects. The relations between aspects, target concepts, opinions on target concepts and aspects are used to infer the argument relations. Propositions are connected iteratively to form a graph structure. The approach is generic in that it is not tuned for a specific corpus and evaluated on three different corpora from the literature: AAEC, AMT, US2016G1tv and achieved an F score of 0.79, 0.77 and 0.64, respectively.</abstract>
       <url hash="25fc6ace">P19-1049</url>
@@ -634,7 +634,7 @@
       <author><first>Navonil</first><last>Majumder</last></author>
       <author><first>Gautam</first><last>Naik</last></author>
       <author><first>Erik</first><last>Cambria</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>527–536</pages>
       <abstract>Emotion recognition in conversations is a challenging task that has recently gained popularity due to its potential applications. Until now, however, a large-scale multimodal multi-party emotional conversational database containing more than two speakers per dialogue was missing. Thus, we propose the Multimodal EmotionLines Dataset (MELD), an extension and enhancement of EmotionLines. MELD contains about 13,000 utterances from 1,433 dialogues from the TV-series Friends. Each utterance is annotated with emotion and sentiment labels, and encompasses audio, visual and textual modalities. We propose several strong multimodal baselines and show the importance of contextual and multimodal information for emotion recognition in conversations. The full dataset is available for use at <url>http://affective-meld.github.io</url>.</abstract>
       <url hash="93893a69">P19-1050</url>
@@ -646,7 +646,7 @@
       <author><first>Minghao</first><last>Hu</last></author>
       <author><first>Yuxing</first><last>Peng</last></author>
       <author><first>Zhen</first><last>Huang</last></author>
-      <author id="dongsheng-li"><first>Dongsheng</first><last>Li</last></author>
+      <author><first>Dongsheng</first><last>Li</last></author>
       <author><first>Yiwei</first><last>Lv</last></author>
       <pages>537–546</pages>
       <abstract>Open-domain targeted sentiment analysis aims to detect opinion targets along with their sentiment polarities from a sentence. Prior work typically formulates this task as a sequence tagging problem. However, such formulation suffers from problems such as huge search space and sentiment inconsistency. To address these problems, we propose a span-based extract-then-classify framework, where multiple opinion targets are directly extracted from the sentence under the supervision of target span boundaries, and corresponding polarities are then classified using their span representations. We further investigate three approaches under this framework, namely the pipeline, joint, and collapsed models. Experiments on three benchmark datasets show that our approach consistently outperforms the sequence tagging baseline. Moreover, we find that the pipeline model achieves the best performance compared with the other two models.</abstract>
@@ -720,7 +720,7 @@
     <paper id="57">
       <title>A Corpus for Modeling User and Language Effects in Argumentation on Online Debating</title>
       <author><first>Esin</first><last>Durmus</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>602–607</pages>
       <abstract>Existing argumentation datasets have succeeded in allowing researchers to develop computational methods for analyzing the content, structure and linguistic features of argumentative text. They have been much less successful in fostering studies of the effect of “user” traits — characteristics and beliefs of the participants — on the debate/argument outcome as this type of user information is generally not available. This paper presents a dataset of 78,376 debates generated over a 10-year period along with surprisingly comprehensive participant profiles. We also complete an example study using the dataset to analyze the effect of selected user traits on the debate outcome in comparison to the linguistic features typically employed in studies of this kind.</abstract>
       <url hash="b04a3bb0">P19-1057</url>
@@ -732,8 +732,8 @@
       <author><first>Sheng</first><last>Xu</last></author>
       <author><first>Peifeng</first><last>Li</last></author>
       <author><first>Fang</first><last>Kong</last></author>
-      <author><first>Qiaoming</first><last>Zhu</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>608–618</pages>
       <abstract>In the literature, most of the previous studies on English implicit discourse relation recognition only use sentence-level representations, which cannot provide enough semantic information in Chinese due to its unique paratactic characteristics. In this paper, we propose a topic tensor network to recognize Chinese implicit discourse relations with both sentence-level and topic-level representations. In particular, besides encoding arguments (discourse units) using a gated convolutional network to obtain sentence-level representations, we train a simplified topic model to infer the latent topic-level representations. Moreover, we feed the two pairs of representations to two factored tensor networks, respectively, to capture both the sentence-level interactions and topic-level relevance using multi-slice tensors. Experimentation on CDTB, a Chinese discourse corpus, shows that our proposed model significantly outperforms several state-of-the-art baselines in both micro and macro F1-scores.</abstract>
       <url hash="9e1a7fcc">P19-1058</url>
@@ -743,7 +743,7 @@
     <paper id="59">
       <title>Learning from Omission</title>
       <author><first>Bill</first><last>McDowell</last></author>
-      <author><first>Noah</first><last>Goodman</last></author>
+      <author id="noah-goodman"><first>Noah</first><last>Goodman</last></author>
       <pages>619–628</pages>
       <abstract>Pragmatic reasoning allows humans to go beyond the literal meaning when interpret- ing language in context. Previous work has shown that such reasoning can improve the performance of already-trained language understanding systems. Here, we explore whether pragmatic reasoning during training can improve the quality of learned meanings. Our experiments on reference game data show that end-to-end pragmatic training produces more accurate utterance interpretation models, especially when data is sparse and language is complex.</abstract>
       <url hash="67a08fdd">P19-1059</url>
@@ -765,7 +765,7 @@
       <author><first>Sonia</first><last>Badene</last></author>
       <author><first>Kate</first><last>Thompson</last></author>
       <author><first>Jean-Pierre</first><last>Lorré</last></author>
-      <author><first>Nicholas</first><last>Asher</last></author>
+      <author id="nicholas-asher"><first>Nicholas</first><last>Asher</last></author>
       <pages>640–645</pages>
       <abstract>This paper investigates the advantages and limits of data programming for the task of learning discourse structure. The data programming paradigm implemented in the Snorkel framework allows a user to label training data using expert-composed heuristics, which are then transformed via the “generative step” into probability distributions of the class labels given the training candidates. These results are later generalized using a discriminative model. Snorkel’s attractive promise to create a large amount of annotated data from a smaller set of training data by unifying the output of a set of heuristics has yet to be used for computationally difficult tasks, such as that of discourse attachment, in which one must decide where a given discourse unit attaches to other units in a text in order to form a coherent discourse structure. Although approaching this problem using Snorkel requires significant modifications to the structure of the heuristics, we show that weak supervision methods can be more than competitive with classical supervised learning approaches to the attachment problem.</abstract>
       <url hash="d972e4b7">P19-1061</url>
@@ -787,7 +787,7 @@
     </paper>
     <paper id="63">
       <title>Know What You Don’t Know: Modeling a Pragmatic Speaker that Refers to Objects of Unknown Categories</title>
-      <author><first>Sina</first><last>Zarrieß</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
       <author><first>David</first><last>Schlangen</last></author>
       <pages>654–659</pages>
       <abstract>Zero-shot learning in Language &amp; Vision is the task of correctly labelling (or naming) objects of novel categories. Another strand of work in L&amp;V aims at pragmatically informative rather than “correct” object descriptions, e.g. in reference games. We combine these lines of research and model zero-shot reference games, where a speaker needs to successfully refer to a novel object in an image. Inspired by models of “rational speech acts”, we extend a neural generator to become a pragmatic speaker reasoning about uncertain object categories. As a result of this reasoning, the generator produces fewer nouns and names of distractor categories as compared to a literal speaker. We show that this conversational strategy for dealing with novel objects often improves communicative success, in terms of resolution accuracy of an automatic listener.</abstract>
@@ -813,7 +813,7 @@
       <author><first>Kevin</first><last>Bowden</last></author>
       <author><first>Jiaqi</first><last>Wu</last></author>
       <author><first>Wen</first><last>Cui</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <pages>666–672</pages>
       <abstract>Discourse relation identification has been an active area of research for many years, and the challenge of identifying implicit relations remains largely an unsolved task, especially in the context of an open-domain dialogue system. Previous work primarily relies on a corpora of formal text which is inherently non-dialogic, i.e., news and journals. This data however is not suitable to handle the nuances of informal dialogue nor is it capable of navigating the plethora of valid topics present in open-domain dialogue. In this paper, we designed a novel discourse relation identification pipeline specifically tuned for open-domain dialogue systems. We firstly propose a method to automatically extract the implicit discourse relation argument pairs and labels from a dataset of dialogic turns, resulting in a novel corpus of discourse relation pairs; the first of its kind to attempt to identify the discourse relations connecting the dialogic turns in open-domain discourse. Moreover, we have taken the first steps to leverage the dialogue features unique to our task to further improve the identification of such relations by performing feature ablation and incorporating dialogue features to enhance the state-of-the-art model.</abstract>
       <url hash="1f2dae52">P19-1065</url>
@@ -838,7 +838,7 @@
       <author><first>Teng</first><last>Long</last></author>
       <author><first>Avishek Joey</first><last>Bose</last></author>
       <author><first>Yanshuai</first><last>Cao</last></author>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <pages>678–687</pages>
       <abstract>Coherence is an important aspect of text quality and is crucial for ensuring its readability. One important limitation of existing coherence models is that training on one domain does not easily generalize to unseen categories of text. Previous work advocates for generative models for cross-domain generalization, because for discriminative models, the space of incoherent sentence orderings to discriminate against during training is prohibitively large. In this work, we propose a local discriminative neural model with a much smaller negative sampling space that can efficiently learn against incorrect orderings. The proposed coherence model is simple in structure, yet it significantly outperforms previous state-of-art methods on a standard benchmark dataset on the Wall Street Journal corpus, as well as in multiple new challenging settings of transfer to unseen categories of discourse on Wikipedia articles.</abstract>
       <url hash="95e08554">P19-1067</url>
@@ -847,7 +847,7 @@
     </paper>
     <paper id="68">
       <title><fixed-case>MOROCO</fixed-case>: The <fixed-case>M</fixed-case>oldavian and <fixed-case>R</fixed-case>omanian Dialectal Corpus</title>
-      <author><first>Andrei</first><last>Butnaru</last></author>
+      <author id="andrei-butnaru"><first>Andrei</first><last>Butnaru</last></author>
       <author><first>Radu Tudor</first><last>Ionescu</last></author>
       <pages>688–698</pages>
       <abstract>In this work, we introduce the MOldavian and ROmanian Dialectal COrpus (MOROCO), which is freely available for download at <url>https://github.com/butnaruandrei/MOROCO</url>. The corpus contains 33564 samples of text (with over 10 million tokens) collected from the news domain. The samples belong to one of the following six topics: culture, finance, politics, science, sports and tech. The data set is divided into 21719 samples for training, 5921 samples for validation and another 5924 samples for testing. For each sample, we provide corresponding dialectal and category labels. This allows us to perform empirical studies on several classification tasks such as (i) binary discrimination of Moldavian versus Romanian text samples, (ii) intra-dialect multi-class categorization by topic and (iii) cross-dialect multi-class categorization by topic. We perform experiments using a shallow approach based on string kernels, as well as a novel deep approach based on character-level convolutional neural networks containing Squeeze-and-Excitation blocks. We also present and analyze the most discriminative features of our best performing model, before and after named entity removal.</abstract>
@@ -895,7 +895,7 @@
       <author><first>Dominik</first><last>Schlechtweg</last></author>
       <author><first>Anna</first><last>Hätty</last></author>
       <author><first>Marco</first><last>Del Tredici</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>732–746</pages>
       <abstract>We perform an interdisciplinary large-scale evaluation for detecting lexical semantic divergences in a diachronic and in a synchronic task: semantic sense changes across time, and semantic sense changes across domains. Our work addresses the superficialness and lack of comparison in assessing models of diachronic lexical change, by bringing together and extending benchmark models on a common state-of-the-art evaluation task. In addition, we demonstrate that the same evaluation task and modelling approaches can successfully be utilised for the synchronic detection of domain-specific sense divergences in the field of term extraction.</abstract>
       <url hash="77329c31">P19-1072</url>
@@ -905,9 +905,9 @@
     <paper id="73">
       <title><fixed-case>E</fixed-case>rrudite: Scalable, Reproducible, and Testable Error Analysis</title>
       <author><first>Tongshuang</first><last>Wu</last></author>
-      <author><first>Marco Tulio</first><last>Ribeiro</last></author>
+      <author id="marco-tulio-ribeiro"><first>Marco Tulio</first><last>Ribeiro</last></author>
       <author><first>Jeffrey</first><last>Heer</last></author>
-      <author><first>Daniel</first><last>Weld</last></author>
+      <author id="daniel-s-weld"><first>Daniel</first><last>Weld</last></author>
       <pages>747–763</pages>
       <abstract>Though error analysis is crucial to understanding and improving NLP models, the common practice of manual, subjective categorization of a small sample of errors can yield biased and incomplete conclusions. This paper codifies model and task agnostic principles for informative error analysis, and presents Errudite, an interactive tool for better supporting this process. First, error groups should be precisely defined for reproducibility; Errudite supports this with an expressive domain-specific language. Second, to avoid spurious conclusions, a large set of instances should be analyzed, including both positive and negative examples; Errudite enables systematic grouping of relevant instances with filtering queries. Third, hypotheses about the cause of errors should be explicitly tested; Errudite supports this via automated counterfactual rewriting. We validate our approach with a user study, finding that Errudite (1) enables users to perform high quality and reproducible error analyses with less effort, (2) reveals substantial ambiguities in prior published error analyses practices, and (3) enhances the error analysis experience by allowing users to test and revise prior beliefs.</abstract>
       <url hash="c706c000">P19-1073</url>
@@ -963,9 +963,9 @@
       <author><first>Chris</first><last>Madge</last></author>
       <author><first>Juntao</first><last>Yu</last></author>
       <author><first>Jon</first><last>Chamberlain</last></author>
-      <author><first>Udo</first><last>Kruschwitz</last></author>
+      <author id="udo-kruschwitz"><first>Udo</first><last>Kruschwitz</last></author>
       <author><first>Silviu</first><last>Paun</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>797–807</pages>
       <abstract>One of the key steps in language resource creation is the identification of the text segments to be annotated, or markables, which depending on the task may vary from nominal chunks for named entity resolution to (potentially nested) noun phrases in coreference resolution (or mentions) to larger text segments in text segmentation. Markable identification is typically carried out semi-automatically, by running a markable identifier and correcting its output by hand–which is increasingly done via annotators recruited through crowdsourcing and aggregating their responses. In this paper, we present a method for identifying markables for coreference annotation that combines high-performance automatic markable detectors with checking with a Game-With-A-Purpose (GWAP) and aggregation using a Bayesian annotation model. The method was evaluated both on news data and data from a variety of other genres and results in an improvement on F1 of mention boundaries of over seven percentage points when compared with a state-of-the-art, domain-independent automatic mention detector, and almost three points over an in-domain mention detector. One of the key contributions of our proposal is its applicability to the case in which markables are nested, as is the case with coreference markables; but the GWAP and several of the proposed markable detectors are task and language-independent and are thus applicable to a variety of other annotation scenarios.</abstract>
       <url hash="ae2eb41c">P19-1077</url>
@@ -1005,7 +1005,7 @@
       <author><first>Anusha</first><last>Balakrishnan</last></author>
       <author><first>Jinfeng</first><last>Rao</last></author>
       <author><first>Kartikeya</first><last>Upasani</last></author>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <author><first>Rajen</first><last>Subba</last></author>
       <pages>831–844</pages>
       <abstract>Generating fluent natural language responses from structured semantic representations is a critical step in task-oriented conversational systems. Avenues like the E2E NLG Challenge have encouraged the development of neural approaches, particularly sequence-to-sequence (Seq2Seq) models for this problem. The semantic representations used, however, are often underspecified, which places a higher burden on the generation model for sentence planning, and also limits the extent to which generated responses can be controlled in a live system. In this paper, we (1) propose using tree-structured semantic representations, like those used in traditional rule-based NLG systems, for better discourse-level structuring and sentence-level planning; (2) introduce a challenging dataset using this representation for the weather domain; (3) introduce a constrained decoding approach for Seq2Seq models that leverages this representation to improve semantic correctness; and (4) demonstrate promising results on our dataset and the E2E dataset.</abstract>
@@ -1058,9 +1058,9 @@
       <title>Don’t Take the Premise for Granted: Mitigating Artifacts in Natural Language Inference</title>
       <author><first>Yonatan</first><last>Belinkov</last></author>
       <author><first>Adam</first><last>Poliak</last></author>
-      <author><first>Stuart</first><last>Shieber</last></author>
+      <author id="stuart-m-shieber"><first>Stuart</first><last>Shieber</last></author>
       <author><first>Benjamin</first><last>Van Durme</last></author>
-      <author><first>Alexander</first><last>Rush</last></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last></author>
       <pages>877–891</pages>
       <abstract>Natural Language Inference (NLI) datasets often contain hypothesis-only biases—artifacts that allow models to achieve non-trivial performance without learning whether a premise entails a hypothesis. We propose two probabilistic methods to build models that are more robust to such biases and better transfer across datasets. In contrast to standard approaches to NLI, our methods predict the probability of a premise given a hypothesis and NLI label, discouraging models from ignoring the premise. We evaluate our methods on synthetic and existing NLI datasets by training on datasets containing biases and testing on datasets containing no (or different) hypothesis-only biases. Our results indicate that these methods can make NLI models more robust to dataset-specific artifacts, transferring better than a baseline architecture in 9 out of 12 NLI datasets. Additionally, we provide an extensive analysis of the interplay of our methods with known biases in NLI datasets, as well as the effects of encouraging models to ignore biases and fine-tuning on target datasets.</abstract>
       <url hash="c793a21f">P19-1084</url>
@@ -1088,7 +1088,7 @@
     <paper id="86">
       <title><fixed-case>S</fixed-case>her<fixed-case>LI</fixed-case>i<fixed-case>C</fixed-case>: A Typed Event-Focused Lexical Inference Benchmark for Evaluating Natural Language Inference</title>
       <author><first>Martin</first><last>Schmitt</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>902–914</pages>
       <abstract>We present SherLIiC, a testbed for lexical inference in context (LIiC), consisting of 3985 manually annotated inference rule candidates (InfCands), accompanied by (i) ~960k unlabeled InfCands, and (ii) ~190k typed textual relations between Freebase entities extracted from the large entity-linked corpus ClueWeb09. Each InfCand consists of one of these relations, expressed as a lemmatized dependency path, and two argument placeholders, each linked to one or more Freebase types. Due to our candidate selection process based on strong distributional evidence, SherLIiC is much harder than existing testbeds because distributional evidence is of little utility in the classification of InfCands. We also show that, due to its construction, many of SherLIiC’s correct InfCands are novel and missing from existing rule bases. We evaluate a large number of strong baselines on SherLIiC, ranging from semantic vector space models to state of the art neural models of natural language inference (NLI). We show that SherLIiC poses a tough challenge to existing NLI systems.</abstract>
       <url hash="315db90c">P19-1086</url>
@@ -1116,7 +1116,7 @@
       <author><first>Verónica</first><last>Pérez-Rosas</last></author>
       <author><first>Xinyi</first><last>Wu</last></author>
       <author><first>Kenneth</first><last>Resnicow</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>926–935</pages>
       <abstract>The quality of a counseling intervention relies highly on the active collaboration between clients and counselors. In this paper, we explore several linguistic aspects of the collaboration process occurring during counseling conversations. Specifically, we address the differences between high-quality and low-quality counseling. Our approach examines participants’ turn-by-turn interaction, their linguistic alignment, the sentiment expressed by speakers during the conversation, as well as the different topics being discussed. Our results suggest important language differences in low- and high-quality counseling, which we further use to derive linguistic features able to capture the differences between the two groups. These features are then used to build automatic classifiers that can predict counseling quality with accuracies of up to 88%.</abstract>
       <url hash="74a2fb41">P19-1088</url>
@@ -1270,7 +1270,7 @@
       <author><first>Takuya</first><last>Makino</last></author>
       <author><first>Tomoya</first><last>Iwakura</last></author>
       <author><first>Hiroya</first><last>Takamura</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>1039–1048</pages>
       <abstract>We propose a global optimization method under length constraint (GOLC) for neural text summarization models. GOLC increases the probabilities of generating summaries that have high evaluation scores, ROUGE in this paper, within a desired length. We compared GOLC with two optimization methods, a maximum log-likelihood and a minimum risk training, on CNN/Daily Mail and a Japanese single document summarization data set of The Mainichi Shimbun Newspapers. The experimental results show that a state-of-the-art neural summarization model optimized with GOLC generates fewer overlength summaries while maintaining the fastest processing speed; only 6.70% overlength summaries on CNN/Daily and 7.8% on long summary of Mainichi, compared to the approximately 20% to 50% on CNN/Daily Mail and 10% to 30% on Mainichi with the other optimization methods. We also demonstrate the importance of the generation of in-length summaries for post-editing with the dataset Mainich that is created with strict length constraints. The ex- perimental results show approximately 30% to 40% improved post-editing time by use of in-length summaries.</abstract>
       <url hash="a3f5df62">P19-1099</url>
@@ -1284,7 +1284,7 @@
       <author><first>Pengfei</first><last>Liu</last></author>
       <author><first>Danqing</first><last>Wang</last></author>
       <author><first>Xipeng</first><last>Qiu</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>1049–1058</pages>
       <abstract>The recent years have seen remarkable success in the use of deep neural networks on text summarization. However, there is no clear understanding of why they perform so well, or how they might be improved. In this paper, we seek to better understand how neural extractive summarization systems could benefit from different types of model architectures, transferable knowledge and learning schemas. Besides, we find an effective way to improve the current framework and achieve the state-of-the-art result on CNN/DailyMail by a large margin based on our observations and analysis. Hopefully, our work could provide more hints for future research on extractive summarization.</abstract>
       <url hash="d591aef3">P19-1100</url>
@@ -1305,11 +1305,11 @@
     </paper>
     <paper id="102">
       <title>Multi-News: A Large-Scale Multi-Document Summarization Dataset and Abstractive Hierarchical Model</title>
-      <author><first>Alexander</first><last>Fabbri</last></author>
+      <author id="alexander-richard-fabbri"><first>Alexander</first><last>Fabbri</last></author>
       <author><first>Irene</first><last>Li</last></author>
       <author><first>Tianwei</first><last>She</last></author>
       <author><first>Suyi</first><last>Li</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <pages>1074–1084</pages>
       <abstract>Automatic generation of summaries from multiple news articles is a valuable tool as the number of online publications grows rapidly. Single document summarization (SDS) systems have benefited from advances in neural encoder-decoder model thanks to the availability of large datasets. However, multi-document summarization (MDS) of news articles has been limited to datasets of a couple of hundred examples. In this paper, we introduce Multi-News, the first large-scale MDS news dataset. Additionally, we propose an end-to-end model which incorporates a traditional extractive summarization model with a standard SDS model and achieves competitive results on MDS datasets. We benchmark several methods on Multi-News and hope that this work will promote advances in summarization in the multi-document setting.</abstract>
       <url hash="84c79d84">P19-1102</url>
@@ -1361,7 +1361,7 @@
       <author><first>Tirthankar</first><last>Ghosal</last></author>
       <author><first>Rajeev</first><last>Verma</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>1120–1130</pages>
       <abstract>Automatically validating a research artefact is one of the frontiers in Artificial Intelligence (AI) that directly brings it close to competing with human intellect and intuition. Although criticised sometimes, the existing peer review system still stands as the benchmark of research validation. The present-day peer review process is not straightforward and demands profound domain knowledge, expertise, and intelligence of human reviewer(s), which is somewhat elusive with the current state of AI. However, the peer review texts, which contains rich sentiment information of the reviewer, reflecting his/her overall attitude towards the research in the paper, could be a valuable entity to predict the acceptance or rejection of the manuscript under consideration. Here in this work, we investigate the role of reviewer sentiment embedded within peer review texts to predict the peer review outcome. Our proposed deep neural architecture takes into account three channels of information: the paper, the corresponding reviews, and review’s polarity to predict the overall recommendation score as well as the final decision. We achieve significant performance improvement over the baselines (∼ 29% error reduction) proposed in a recently released dataset of peer reviews. An AI of this kind could assist the editors/program chairs as an additional layer of confidence, especially when non-responding/missing reviewers are frequent in present day peer review.</abstract>
       <url hash="664649c8">P19-1106</url>
@@ -1385,7 +1385,7 @@
       <author><first>Aditya</first><last>Joshi</last></author>
       <author><first>Sarvnaz</first><last>Karimi</last></author>
       <author><first>Ross</first><last>Sparks</last></author>
-      <author><first>Cecile</first><last>Paris</last></author>
+      <author id="cecile-paris"><first>Cecile</first><last>Paris</last></author>
       <pages>1142–1147</pages>
       <abstract>Personal health mention detection deals with predicting whether or not a given sentence is a report of a health condition. Past work mentions errors in this prediction when symptom words, i.e., names of symptoms of interest, are used in a figurative sense. Therefore, we combine a state-of-the-art figurative usage detection with CNN-based personal health mention detection. To do so, we present two methods: a pipeline-based approach and a feature augmentation-based approach. The introduction of figurative usage detection results in an average improvement of 2.21% F-score of personal health mention detection, in the case of the feature augmentation-based approach. This paper demonstrates the promise of using figurative usage detection to improve personal health mention detection.</abstract>
       <url hash="cccd70e9">P19-1108</url>
@@ -1418,10 +1418,10 @@
     <paper id="111">
       <title>Poetry to Prose Conversion in <fixed-case>S</fixed-case>anskrit as a Linearisation Task: A Case for Low-Resource Languages</title>
       <author><first>Amrith</first><last>Krishna</last></author>
-      <author><first>Vishnu</first><last>Sharma</last></author>
+      <author id="vishnu-dutt-sharma"><first>Vishnu</first><last>Sharma</last></author>
       <author><first>Bishal</first><last>Santra</last></author>
       <author><first>Aishik</first><last>Chakraborty</last></author>
-      <author><first>Pavankumar</first><last>Satuluri</last></author>
+      <author id="pavankumar-satuluri"><first>Pavankumar</first><last>Satuluri</last></author>
       <author><first>Pawan</first><last>Goyal</last></author>
       <pages>1160–1166</pages>
       <abstract>The word ordering in a Sanskrit verse is often not aligned with its corresponding prose order. Conversion of the verse to its corresponding prose helps in better comprehension of the construction. Owing to the resource constraints, we formulate this task as a word ordering (linearisation) task. In doing so, we completely ignore the word arrangement at the verse side. kāvya guru, the approach we propose, essentially consists of a pipeline of two pretraining steps followed by a seq2seq model. The first pretraining step learns task-specific token embeddings from pretrained embeddings. In the next step, we generate multiple possible hypotheses for possible word arrangements of the input %using another pretraining step. We then use them as inputs to a neural seq2seq model for the final prediction. We empirically show that the hypotheses generated by our pretraining step result in predictions that consistently outperform predictions based on the original order in the verse. Overall, kāvya guru outperforms current state of the art models in linearisation for the poetry to prose conversion task in Sanskrit.</abstract>
@@ -1458,7 +1458,7 @@
     <paper id="114">
       <title>Context-specific Language Modeling for Human Trafficking Detection from Online Advertisements</title>
       <author><first>Saeideh</first><last>Shahrokh Esfahani</last></author>
-      <author><first>Michael J.</first><last>Cafarella</last></author>
+      <author id="michael-j-cafarella"><first>Michael J.</first><last>Cafarella</last></author>
       <author><first>Maziyar</first><last>Baran Pouyan</last></author>
       <author><first>Gregory</first><last>DeAngelo</last></author>
       <author><first>Elena</first><last>Eneva</last></author>
@@ -1473,8 +1473,8 @@
       <title>Self-Attentional Models for Lattice Inputs</title>
       <author><first>Matthias</first><last>Sperber</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
-      <author><first>Ngoc-Quan</first><last>Pham</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="ngoc-quan-pham"><first>Ngoc-Quan</first><last>Pham</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>1185–1197</pages>
       <abstract>Lattices are an efficient and effective method to encode ambiguity of upstream systems in natural language processing tasks, for example to compactly capture multiple speech recognition hypotheses, or to represent multiple linguistic analyses. Previous work has extended recurrent neural networks to model lattice inputs and achieved improvements in various tasks, but these models suffer from very slow computation speeds. This paper extends the recently proposed paradigm of self-attention to handle lattice inputs. Self-attention is a sequence modeling technique that relates inputs to one another by computing pairwise similarities and has gained popularity for both its strong results and its computational efficiency. To extend such models to handle lattices, we introduce probabilistic reachability masks that incorporate lattice structure into the model and support lattice scores if available. We also propose a method for adapting positional embeddings to lattice structures. We apply the proposed model to a speech translation task and find that it outperforms all examined baselines while being much faster to compute than previous neural lattice models during both training and inference.</abstract>
       <url hash="bbe7ede5">P19-1115</url>
@@ -1499,7 +1499,7 @@
       <author><first>Jiajun</first><last>Zhang</last></author>
       <author><first>Feifei</first><last>Zhai</last></author>
       <author><first>Jingfang</first><last>Xu</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>1213–1223</pages>
       <abstract>Multilingual neural machine translation (Multi-NMT) with one encoder-decoder model has made remarkable progress due to its simple deployment. However, this multilingual translation paradigm does not make full use of language commonality and parameter sharing between encoder and decoder. Furthermore, this kind of paradigm cannot outperform the individual models trained on bilingual corpus in most cases. In this paper, we propose a compact and language-sensitive method for multilingual translation. To maximize parameter sharing, we first present a universal representor to replace both encoder and decoder models. To make the representor sensitive for specific languages, we further introduce language-sensitive embedding, attention, and discriminator with the ability to enhance model performance. We verify our methods on various translation scenarios, including one-to-many, many-to-many and zero-shot. Extensive experiments demonstrate that our proposed methods remarkably outperform strong standard multilingual translation systems on WMT and IWSLT datasets. Moreover, we find that our model is especially helpful in low-resource and zero-shot translation scenarios.</abstract>
       <url hash="925ed253">P19-1117</url>
@@ -1509,7 +1509,7 @@
     <paper id="118">
       <title>Unsupervised Parallel Sentence Extraction with Parallel Segment Detection Helps Machine Translation</title>
       <author><first>Viktor</first><last>Hangya</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>1224–1234</pages>
       <abstract>Mining parallel sentences from comparable corpora is important. Most previous work relies on supervised systems, which are trained on parallel data, thus their applicability is problematic in low-resource scenarios. Recent developments in building unsupervised bilingual word embeddings made it possible to mine parallel sentences based on cosine similarities of source and target language words. We show that relying only on this information is not enough, since sentences often have similar words but different meanings. We detect continuous parallel segments in sentence pair candidates and rely on them when mining parallel sentences. We show better mining accuracy on three language pairs in a standard shared task on artificial data. We also provide the first experiments showing that parallel sentences mined from real life sources improve unsupervised MT. Our code is available, we hope it will be used to support low-resource MT research.</abstract>
       <url hash="0ec837c6">P19-1118</url>
@@ -1522,8 +1522,8 @@
       <author><first>Rui</first><last>Wang</last></author>
       <author><first>Kehai</first><last>Chen</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <pages>1235–1245</pages>
       <abstract>Unsupervised bilingual word embedding (UBWE), together with other technologies such as back-translation and denoising, has helped unsupervised neural machine translation (UNMT) achieve remarkable results in several language pairs. In previous methods, UBWE is first trained using non-parallel monolingual corpora and then this pre-trained UBWE is used to initialize the word embedding in the encoder and decoder of UNMT. That is, the training of UBWE and UNMT are separate. In this paper, we first empirically investigate the relationship between UBWE and UNMT. The empirical findings show that the performance of UNMT is significantly affected by the performance of UBWE. Thus, we propose two methods that train UNMT with UBWE agreement. Empirical results on several language pairs show that the proposed methods significantly outperform conventional UNMT.</abstract>
       <url hash="987381b0">P19-1119</url>
@@ -1534,7 +1534,7 @@
       <title>Effective Cross-lingual Transfer of Neural Machine Translation Models without Shared Vocabularies</title>
       <author><first>Yunsu</first><last>Kim</last></author>
       <author><first>Yingbo</first><last>Gao</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>1246–1257</pages>
       <abstract>Transfer learning or multilingual model is essential for low-resource neural machine translation (NMT), but the applicability is limited to cognate languages by sharing their vocabularies. This paper shows effective techniques to transfer a pretrained NMT model to a new, unrelated language without shared vocabularies. We relieve the vocabulary mismatch by using cross-lingual word embedding, train a more language-agnostic encoder by injecting artificial noises, and generate synthetic data easily from the pretraining data without back-translation. Our methods do not require restructuring the vocabulary or retraining the model. We improve plain NMT transfer by up to +5.1% BLEU in five low-resource translation tasks, outperforming multilingual joint training by a large margin. We also provide extensive ablation studies on pretrained embedding, synthetic data, vocabulary size, and parameter freezing for a better understanding of NMT transfer.</abstract>
       <url hash="b25ea728">P19-1120</url>
@@ -1546,7 +1546,7 @@
       <author><first>Jiatao</first><last>Gu</last></author>
       <author><first>Yong</first><last>Wang</last></author>
       <author><first>Kyunghyun</first><last>Cho</last></author>
-      <author><first>Victor O.K.</first><last>Li</last></author>
+      <author id="victor-o-k-li"><first>Victor O.K.</first><last>Li</last></author>
       <pages>1258–1268</pages>
       <abstract>Zero-shot translation, translating between language pairs on which a Neural Machine Translation (NMT) system has never been trained, is an emergent property when training the system in multilingual settings. However, naive training for zero-shot NMT easily fails, and is sensitive to hyper-parameter setting. The performance typically lags far behind the more conventional pivot-based approach which translates twice using a third language as a pivot. In this work, we address the degeneracy problem due to capturing spurious correlations by quantitatively analyzing the mutual information between language IDs of the source and decoded sentences. Inspired by this analysis, we propose to use two simple but effective approaches: (1) decoder pre-training; (2) back-translation. These methods show significant improvement (4 22 BLEU points) over the vanilla zero-shot translation on three challenging multilingual datasets, and achieve similar or better results than the pivot-based approach.</abstract>
       <url hash="4adbf392">P19-1121</url>
@@ -1622,7 +1622,7 @@
     </paper>
     <paper id="127">
       <title>Global Textual Relation Embedding for Relational Understanding</title>
-      <author id="zhiyu-chen"><first>Zhiyu</first><last>Chen</last></author>
+      <author><first>Zhiyu</first><last>Chen</last></author>
       <author><first>Hanwen</first><last>Zha</last></author>
       <author><first>Honglei</first><last>Liu</last></author>
       <author><first>Wenhu</first><last>Chen</last></author>
@@ -1640,7 +1640,7 @@
       <author><first>Yankai</first><last>Lin</last></author>
       <author><first>Zhiyuan</first><last>Liu</last></author>
       <author><first>Jie</first><last>Fu</last></author>
-      <author><first>Tat-Seng</first><last>Chua</last></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last></author>
       <author><first>Maosong</first><last>Sun</last></author>
       <pages>1331–1339</pages>
       <abstract>In this paper, we propose a novel graph neural network with generated parameters (GP-GNNs). The parameters in the propagation module, i.e. the transition matrices used in message passing procedure, are produced by a generator taking natural language sentences as inputs. We verify GP-GNNs in relation extraction from text, both on bag- and instance-settings. Experimental results on a human-annotated dataset and two distantly supervised datasets show that multi-hop reasoning mechanism yields significant improvements. We also perform a qualitative analysis to demonstrate that our model could discover more accurate relations by multi-hop relational reasoning.</abstract>
@@ -1667,7 +1667,7 @@
     <paper id="130">
       <title>Exploiting Entity <fixed-case>BIO</fixed-case> Tag Embeddings and Multi-task Learning for Relation Extraction with Imbalanced Data</title>
       <author><first>Wei</first><last>Ye</last></author>
-      <author id="bo-li"><first>Bo</first><last>Li</last></author>
+      <author><first>Bo</first><last>Li</last></author>
       <author><first>Rui</first><last>Xie</last></author>
       <author><first>Zhonghao</first><last>Sheng</last></author>
       <author><first>Long</first><last>Chen</last></author>
@@ -1685,7 +1685,7 @@
       <author><first>Yuanbin</first><last>Wu</last></author>
       <author><first>Ming</first><last>Gong</last></author>
       <author><first>Daxin</first><last>Jiang</last></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <author><first>Shiliang</first><last>Sun</last></author>
       <author><first>Nan</first><last>Duan</last></author>
       <pages>1361–1370</pages>
@@ -1748,7 +1748,7 @@
       <title><fixed-case>G</fixed-case>raph<fixed-case>R</fixed-case>el: Modeling Text as Relational Graphs for Joint Entity and Relation Extraction</title>
       <author><first>Tsu-Jui</first><last>Fu</last></author>
       <author><first>Peng-Hsuan</first><last>Li</last></author>
-      <author><first>Wei-Yun</first><last>Ma</last></author>
+      <author id="wei-yun-ma"><first>Wei-Yun</first><last>Ma</last></author>
       <pages>1409–1418</pages>
       <abstract>In this paper, we present GraphRel, an end-to-end relation extraction model which uses graph convolutional networks (GCNs) to jointly learn named entities and relations. In contrast to previous baselines, we consider the interaction between named entities and relations via a 2nd-phase relation-weighted GCN to better extract relations. Linear and dependency structures are both used to extract both sequential and regional features of the text, and a complete word graph is further utilized to extract implicit features among all word pairs of the text. With the graph-based approach, the prediction for overlapping relations is substantially improved over previous sequential approaches. We evaluate GraphRel on two public datasets: NYT and WebNLG. Results show that GraphRel maintains high precision while increasing recall substantially. Also, GraphRel outperforms previous work by 3.2% and 5.8% (F1 score), achieving a new state-of-the-art for relation extraction.</abstract>
       <url hash="beabe36d">P19-1136</url>
@@ -1779,9 +1779,9 @@
       <author><first>Yaliang</first><last>Li</last></author>
       <author><first>Nan</first><last>Du</last></author>
       <author><first>Xian</first><last>Wu</last></author>
-      <author id="wei-fan"><first>Wei</first><last>Fan</last></author>
+      <author><first>Wei</first><last>Fan</last></author>
       <author><first>Fenglong</first><last>Ma</last></author>
-      <author><first>Philip</first><last>Yu</last></author>
+      <author id="philip-s-yu"><first>Philip</first><last>Yu</last></author>
       <pages>1430–1440</pages>
       <abstract>This paper presents a novel framework, MGNER, for Multi-Grained Named Entity Recognition where multiple entities or entity mentions in a sentence could be non-overlapping or totally nested. Different from traditional approaches regarding NER as a sequential labeling task and annotate entities consecutively, MGNER detects and recognizes entities on multiple granularities: it is able to recognize named entities without explicitly assuming non-overlapping or totally nested structures. MGNER consists of a Detector that examines all possible word segments and a Classifier that categorizes entities. In addition, contextual information and a self-attention mechanism are utilized throughout the framework to improve the NER performance. Experimental results show that MGNER outperforms current state-of-the-art baselines up to 4.4% in terms of the F1 score among nested/non-overlapping NER tasks.</abstract>
       <url hash="b216daeb">P19-1138</url>
@@ -1810,7 +1810,7 @@
       <author><first>Chengjiang</first><last>Li</last></author>
       <author><first>Zhiyuan</first><last>Liu</last></author>
       <author><first>Juanzi</first><last>Li</last></author>
-      <author><first>Tat-Seng</first><last>Chua</last></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last></author>
       <pages>1452–1461</pages>
       <abstract>Entity alignment typically suffers from the issues of structural heterogeneity and limited seed alignments. In this paper, we propose a novel Multi-channel Graph Neural Network model (MuGNN) to learn alignment-oriented knowledge graph (KG) embeddings by robustly encoding two KGs via multiple channels. Each channel encodes KGs via different relation weighting schemes with respect to self-attention towards KG completion and cross-KG attention for pruning exclusive entities respectively, which are further combined via pooling techniques. Moreover, we also infer and transfer rule knowledge for completing two KGs consistently. MuGNN is expected to reconcile the structural differences of two KGs, and thus make better use of seed alignments. Extensive experiments on five publicly available datasets demonstrate our superior performance (5% Hits@1 up on average). Source code and data used in the experiments can be accessed at <url>https://github.com/thunlp/MuGNN</url> .</abstract>
       <url hash="43d79483">P19-1140</url>
@@ -1844,7 +1844,7 @@
     </paper>
     <paper id="143">
       <title>Training Hybrid Language Models by Marginalizing over Segmentations</title>
-      <author><first>Edouard</first><last>Grave</last></author>
+      <author id="edouard-grave"><first>Edouard</first><last>Grave</last></author>
       <author><first>Sainbayar</first><last>Sukhbaatar</last></author>
       <author><first>Piotr</first><last>Bojanowski</last></author>
       <author><first>Armand</first><last>Joulin</last></author>
@@ -1859,7 +1859,7 @@
       <author><first>Hongyin</first><last>Luo</last></author>
       <author><first>Lan</first><last>Jiang</last></author>
       <author><first>Yonatan</first><last>Belinkov</last></author>
-      <author><first>James</first><last>Glass</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
       <pages>1483–1493</pages>
       <abstract>Common language models typically predict the next word given the context. In this work, we propose a method that improves language modeling by learning to align the given context and the following phrase. The model does not require any linguistic annotation of phrase segmentation. Instead, we define syntactic heights and phrase segmentation rules, enabling the model to automatically induce phrases, recognize their task-specific heads, and generate phrase embeddings in an unsupervised learning manner. Our method can easily be applied to language models with different network architectures since an independent module is used for phrase induction and context-phrase alignment, and no change is required in the underlying language modeling network. Experiments have shown that our model outperformed several strong baseline models on different data sets. We achieved a new state-of-the-art performance of 17.4 perplexity on the Wikitext-103 dataset. Additionally, visualizing the outputs of the phrase induction module showed that our model is able to learn approximate phrase-level structural knowledge without any annotation.</abstract>
       <url hash="b1eaecdb">P19-1144</url>
@@ -1870,7 +1870,7 @@
       <title>Lightweight and Efficient Neural Natural Language Processing with Quaternion Networks</title>
       <author><first>Yi</first><last>Tay</last></author>
       <author><first>Aston</first><last>Zhang</last></author>
-      <author><first>Anh Tuan</first><last>Luu</last></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last></author>
       <author><first>Jinfeng</first><last>Rao</last></author>
       <author><first>Shuai</first><last>Zhang</last></author>
       <author><first>Shuohang</first><last>Wang</last></author>
@@ -1886,7 +1886,7 @@
       <title>Sparse Sequence-to-Sequence Models</title>
       <author><first>Ben</first><last>Peters</last></author>
       <author><first>Vlad</first><last>Niculae</last></author>
-      <author><first>André F. T.</first><last>Martins</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
       <pages>1504–1519</pages>
       <abstract>Sequence-to-sequence models are a powerful workhorse of NLP. Most variants employ a softmax transformation in both their attention mechanism and output layer, leading to dense alignments and strictly positive output probabilities. This density is wasteful, making models less interpretable and assigning probability mass to many implausible outputs. In this paper, we propose sparse sequence-to-sequence models, rooted in a new family of <tex-math>\alpha</tex-math>-entmax transformations, which includes softmax and sparsemax as particular cases, and is sparse for any <tex-math>\alpha &gt; 1</tex-math>. We provide fast algorithms to evaluate these transformations and their gradients, which scale well for large vocabulary sizes. Our models are able to produce sparse alignments and to assign nonzero probability to a short list of plausible outputs, sometimes rendering beam search exact. Experiments on morphological inflection and machine translation reveal consistent gains over dense models.</abstract>
       <url hash="9d702af0">P19-1146</url>
@@ -1899,7 +1899,7 @@
       <author><first>Minhao</first><last>Cheng</last></author>
       <author><first>Da-Cheng</first><last>Juan</last></author>
       <author><first>Wei</first><last>Wei</last></author>
-      <author><first>Wen-Lian</first><last>Hsu</last></author>
+      <author id="wen-lian-hsu"><first>Wen-Lian</first><last>Hsu</last></author>
       <author><first>Cho-Jui</first><last>Hsieh</last></author>
       <pages>1520–1529</pages>
       <abstract>This work examines the robustness of self-attentive neural networks against adversarial input perturbations. Specifically, we investigate the attention and feature extraction mechanisms of state-of-the-art recurrent neural networks and self-attentive architectures for sentiment analysis, entailment and machine translation under adversarial attacks. We also propose a novel attack algorithm for generating more natural adversarial examples that could mislead neural models but not humans. Experimental results show that, compared to recurrent neural models, self-attentive models are more robust against adversarial perturbation. In addition, we provide theoretical explanations for their superior robustness to support our claims.</abstract>
@@ -2004,7 +2004,7 @@
     <paper id="156">
       <title>Better Character Language Modeling through Morphology</title>
       <author><first>Terra</first><last>Blevins</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>1606–1613</pages>
       <abstract>We incorporate morphological supervision into character language models (CLMs) via multitasking and show that this addition improves bits-per-character (BPC) performance across 24 languages, even when the morphology data and language modeling data are disjoint. Analyzing the CLMs shows that inflected words benefit more from explicitly modeling morphology than uninflected words, and that morphological supervision improves performance even as the amount of language modeling data grows. We then transfer morphological supervision across languages to improve performance in the low-resource setting.</abstract>
       <url hash="217b7001">P19-1156</url>
@@ -2015,7 +2015,7 @@
       <title>Historical Text Normalization with Delayed Rewards</title>
       <author><first>Simon</first><last>Flachs</last></author>
       <author><first>Marcel</first><last>Bollmann</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>1614–1619</pages>
       <abstract>Training neural sequence-to-sequence models with simple token-level log-likelihood is now a standard approach to historical text normalization, albeit often outperformed by phrase-based models. Policy gradient training enables direct optimization for exact matches, and while the small datasets in historical text normalization are prohibitive of from-scratch reinforcement learning, we show that policy gradient fine-tuning leads to significant improvements across the board. Policy gradient training, in particular, leads to more accurate normalizations for long or unseen words.</abstract>
       <url hash="9b7aeac6">P19-1157</url>
@@ -2026,7 +2026,7 @@
       <title>Stochastic Tokenization with a Language Model for Neural Text Classification</title>
       <author><first>Tatsuya</first><last>Hiraoka</last></author>
       <author><first>Hiroyuki</first><last>Shindo</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>1620–1629</pages>
       <abstract>For unsegmented languages such as Japanese and Chinese, tokenization of a sentence has a significant impact on the performance of text classification. Sentences are usually segmented with words or subwords by a morphological analyzer or byte pair encoding and then encoded with word (or subword) representations for neural networks. However, segmentation is potentially ambiguous, and it is unclear whether the segmented tokens achieve the best performance for the target task. In this paper, we propose a method to simultaneously learn tokenization and text classification to address these problems. Our model incorporates a language model for unsupervised tokenization into a text classifier and then trains both models simultaneously. To make the model robust against infrequent tokens, we sampled segmentation for each sentence stochastically during training, which resulted in improved performance of text classification. We conducted experiments on sentiment analysis as a text classification task and show that our method achieves better performance than previous methods.</abstract>
       <url hash="0769836a">P19-1158</url>
@@ -2066,8 +2066,8 @@
     <paper id="161">
       <title>Counterfactual Data Augmentation for Mitigating Gender Stereotypes in Languages with Rich Morphology</title>
       <author><first>Ran</first><last>Zmigrod</last></author>
-      <author><first>Sabrina J.</first><last>Mielke</last></author>
-      <author><first>Hanna</first><last>Wallach</last></author>
+      <author id="sabrina-j-mielke"><first>Sabrina J.</first><last>Mielke</last></author>
+      <author id="hanna-wallach"><first>Hanna</first><last>Wallach</last></author>
       <author><first>Ryan</first><last>Cotterell</last></author>
       <pages>1651–1661</pages>
       <abstract>Gender stereotypes are manifest in most of the world’s languages and are consequently propagated or amplified by NLP systems. Although research has focused on mitigating gender stereotypes in English, the approaches that are commonly employed produce ungrammatical sentences in morphologically rich languages. We present a novel approach for converting between masculine-inflected and feminine-inflected sentences in such languages. For Spanish and Hebrew, our approach achieves F1 scores of 82% and 73% at the level of tags and accuracies of 90% and 87% at the level of forms. By evaluating our approach using four different languages, we show that, on average, it reduces gender stereotyping by a factor of 2.5 without any sacrifice to grammaticality.</abstract>
@@ -2098,7 +2098,7 @@
       <author><first>Dallas</first><last>Card</last></author>
       <author><first>Saadia</first><last>Gabriel</last></author>
       <author><first>Yejin</first><last>Choi</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>1668–1678</pages>
       <abstract>We investigate how annotators’ insensitivity to differences in dialect can lead to racial bias in automatic hate speech detection models, potentially amplifying harm against minority populations. We first uncover unexpected correlations between surface markers of African American English (AAE) and ratings of toxicity in several widely-used hate speech datasets. Then, we show that models trained on these corpora acquire and propagate these biases, such that AAE tweets and tweets by self-identified African Americans are up to two times more likely to be labelled as offensive compared to others. Finally, we propose *dialect* and *race priming* as ways to reduce the racial bias in annotation, showing that when annotators are made explicitly aware of an AAE tweet’s dialect they are significantly less likely to label the tweet as offensive.</abstract>
       <url hash="216c87cb">P19-1163</url>
@@ -2109,8 +2109,8 @@
     <paper id="164">
       <title>Evaluating Gender Bias in Machine Translation</title>
       <author><first>Gabriel</first><last>Stanovsky</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>1679–1684</pages>
       <abstract>We present the first challenge set and evaluation protocol for the analysis of gender bias in machine translation (MT). Our approach uses two recent coreference resolution datasets composed of English sentences which cast participants into non-stereotypical gender roles (e.g., “The doctor asked the nurse to help her in the operation”). We devise an automatic gender bias evaluation method for eight target languages with grammatical gender, based on morphological analysis (e.g., the use of female inflection for the word “doctor”). Our analyses show that four popular industrial MT systems and two recent state-of-the-art academic MT models are significantly prone to gender-biased translation errors for all tested target languages. Our data and code are publicly available at <url>https://github.com/gabrielStanovsky/mt_gender</url>.</abstract>
       <url hash="68e5302a">P19-1164</url>
@@ -2145,7 +2145,7 @@
       <title>Unsupervised Discovery of Gendered Language through Latent-Variable Modeling</title>
       <author><first>Alexander Miserlis</first><last>Hoyle</last></author>
       <author><first>Lawrence</first><last>Wolf-Sonkin</last></author>
-      <author><first>Hanna</first><last>Wallach</last></author>
+      <author id="hanna-wallach"><first>Hanna</first><last>Wallach</last></author>
       <author><first>Isabelle</first><last>Augenstein</last></author>
       <author><first>Ryan</first><last>Cotterell</last></author>
       <pages>1706–1716</pages>
@@ -2200,7 +2200,7 @@
     <paper id="171">
       <title>Meaning to Form: Measuring Systematicity as Information</title>
       <author><first>Tiago</first><last>Pimentel</last></author>
-      <author><first>Arya D.</first><last>McCarthy</last></author>
+      <author id="arya-d-mccarthy"><first>Arya D.</first><last>McCarthy</last></author>
       <author><first>Damian</first><last>Blasi</last></author>
       <author><first>Brian</first><last>Roark</last></author>
       <author><first>Ryan</first><last>Cotterell</last></author>
@@ -2239,7 +2239,7 @@
       <author><first>Kehai</first><last>Chen</last></author>
       <author><first>Rui</first><last>Wang</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>1787–1799</pages>
       <abstract>The reordering model plays an important role in phrase-based statistical machine translation. However, there are few works that exploit the reordering information in neural machine translation. In this paper, we propose a reordering mechanism to learn the reordering embedding of a word based on its contextual information. These learned reordering embeddings are stacked together with self-attention networks to learn sentence representation for machine translation. The reordering mechanism can be easily integrated into both the encoder and the decoder in the Transformer translation system. Experimental results on WMT’14 English-to-German, NIST Chinese-to-English, and WAT Japanese-to-English translation tasks demonstrate that the proposed methods can significantly improve the performance of the Transformer.</abstract>
       <url hash="3ddfdc57">P19-1174</url>
@@ -2290,7 +2290,7 @@
       <title>Self-Supervised Neural Machine Translation</title>
       <author><first>Dana</first><last>Ruiter</last></author>
       <author><first>Cristina</first><last>España-Bonet</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>1828–1834</pages>
       <abstract>We present a simple new method where an emergent NMT system is used for simultaneously selecting training data and learning internal NMT representations. This is done in a self-supervised way without parallel data, in such a way that both tasks enhance each other during training. The method is language independent, introduces no additional hyper-parameters, and achieves BLEU scores of 29.21 (en2fr) and 27.36 (fr2en) on newstest2014 using English and French Wikipedia data for training.</abstract>
       <url hash="aca8a68c">P19-1178</url>
@@ -2302,7 +2302,7 @@
       <title>Exploring Phoneme-Level Speech Representations for End-to-End Speech Translation</title>
       <author><first>Elizabeth</first><last>Salesky</last></author>
       <author><first>Matthias</first><last>Sperber</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <pages>1835–1841</pages>
       <abstract>Previous work on end-to-end translation from speech has primarily used frame-level features as speech representations, which creates longer, sparser sequences than text. We show that a naive method to create compressed phoneme-like speech representations is far more effective and efficient for translation than traditional frame-level speech features. Specifically, we generate phoneme labels for speech frames and average consecutive frames with the same label to create shorter, higher-level source sequences for translation. We see improvements of up to 5 BLEU on both our high and low resource language pairs, with a reduction in training time of 60%. Our improvements hold across multiple data sizes and two language pairs.</abstract>
       <url hash="db586366">P19-1179</url>
@@ -2312,7 +2312,7 @@
     </paper>
     <paper id="180">
       <title>Visually Grounded Neural Syntax Acquisition</title>
-      <author><first>Haoyue</first><last>Shi</last></author>
+      <author id="freda-shi"><first>Haoyue</first><last>Shi</last></author>
       <author><first>Jiayuan</first><last>Mao</last></author>
       <author><first>Kevin</first><last>Gimpel</last></author>
       <author><first>Karen</first><last>Livescu</last></author>
@@ -2343,7 +2343,7 @@
       <author><first>Hao</first><last>Tan</last></author>
       <author><first>Franck</first><last>Dernoncourt</last></author>
       <author><first>Zhe</first><last>Lin</last></author>
-      <author><first>Trung</first><last>Bui</last></author>
+      <author id="trung-bui"><first>Trung</first><last>Bui</last></author>
       <author><first>Mohit</first><last>Bansal</last></author>
       <pages>1873–1883</pages>
       <abstract>Describing images with text is a fundamental problem in vision-language research. Current studies in this domain mostly focus on single image captioning. However, in various real applications (e.g., image editing, difference interpretation, and retrieval), generating relational captions for two images, can also be very useful. This important problem has not been explored mostly due to lack of datasets and effective models. To push forward the research in this direction, we first introduce a new language-guided image editing dataset that contains a large number of real image pairs with corresponding editing instructions. We then propose a new relational speaker model based on an encoder-decoder architecture with static relational attention and sequential multi-head attention. We also extend the model with dynamic relational attention, which calculates visual alignment while decoding. Our models are evaluated on our newly collected and two public datasets consisting of image pairs annotated with relationship sentences. Experimental results, based on both automatic and human evaluation, demonstrate that our model outperforms all baselines and existing methods on all the datasets.</abstract>
@@ -2373,7 +2373,7 @@
       <author><first>Ece</first><last>Takmaz</last></author>
       <author><first>Lieke</first><last>Gelderloos</last></author>
       <author><first>Elia</first><last>Bruni</last></author>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <pages>1895–1910</pages>
       <abstract>This paper introduces the PhotoBook dataset, a large-scale collection of visually-grounded, task-oriented dialogues in English designed to investigate shared dialogue history accumulating during conversation. Taking inspiration from seminal work on dialogue analysis, we propose a data-collection task formulated as a collaborative game prompting two online participants to refer to images utilising both their visual context as well as previously established referring expressions. We provide a detailed description of the task setup and a thorough analysis of the 2,500 dialogues collected. To further illustrate the novel features of the dataset, we propose a baseline model for reference resolution which uses a simple method to take into account shared information accumulated in a reference chain. Our results show that this information is particularly important to resolve later descriptions and underline the need to develop more sophisticated models of common ground in dialogue interaction.</abstract>
       <url hash="e847549f">P19-1184</url>
@@ -2397,8 +2397,8 @@
     <paper id="186">
       <title>Semi-supervised Stochastic Multi-Domain Learning using Variational Inference</title>
       <author><first>Yitong</first><last>Li</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>1923–1934</pages>
       <abstract>Supervised models of NLP rely on large collections of text which closely resemble the intended testing setting. Unfortunately matching text is often not available in sufficient quantity, and moreover, within any domain of text, data is often highly heterogenous. In this paper we propose a method to distill the important domain signal as part of a multi-domain learning system, using a latent variable model in which parts of a neural model are stochastically gated based on the inferred domain. We compare the use of discrete versus continuous latent variables, operating in a domain-supervised or a domain semi-supervised setting, where the domain is known only for a subset of training inputs. We show that our model leads to substantial performance improvements over competitive benchmark domain adaptation methods, including methods using adversarial learning.</abstract>
       <url hash="1fd9b0c5">P19-1186</url>
@@ -2444,7 +2444,7 @@
     <paper id="190">
       <title>Generating Long and Informative Reviews with Aspect-Aware Coarse-to-Fine Decoding</title>
       <author><first>Junyi</first><last>Li</last></author>
-      <author><first>Wayne Xin</first><last>Zhao</last></author>
+      <author id="wayne-xin-zhao"><first>Wayne Xin</first><last>Zhao</last></author>
       <author><first>Ji-Rong</first><last>Wen</last></author>
       <author><first>Yang</first><last>Song</last></author>
       <pages>1969–1979</pages>
@@ -2505,7 +2505,7 @@
       <author><first>Pengcheng</first><last>Yang</last></author>
       <author><first>Jie</first><last>Zhou</last></author>
       <author><first>Yutong</first><last>Tan</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <author><first>Zhifang</first><last>Sui</last></author>
       <author><first>Xu</first><last>Sun</last></author>
       <pages>2013–2022</pages>
@@ -2620,7 +2620,7 @@
       <author><first>Eiji</first><last>Aramaki</last></author>
       <author><first>Ichiro</first><last>Kobayashi</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Hiroya</first><last>Takamura</last></author>
       <pages>2102–2113</pages>
       <abstract>We propose a data-to-text generation model with two modules, one for tracking and the other for text generation. Our tracking module selects and keeps track of salient information and memorizes which record has been mentioned. Our generation module generates a summary conditioned on the state of tracking module. Our proposed model is considered to simulate the human-like writing process that gradually selects the information by determining the intermediate variables while writing the summary. In addition, we also explore the effectiveness of the writer information for generations. Experimental results show that our proposed model outperforms existing models in all evaluation metrics even without writer information. Incorporating writer information further improves the performance, contributing to content planning and surface realization.</abstract>
@@ -2731,7 +2731,7 @@
     </paper>
     <paper id="211">
       <title>Adversarial Domain Adaptation Using Artificial Titles for Abstractive Title Generation</title>
-      <author><first>Francine</first><last>Chen</last></author>
+      <author id="francine-chen"><first>Francine</first><last>Chen</last></author>
       <author><first>Yan-Ying</first><last>Chen</last></author>
       <pages>2197–2203</pages>
       <abstract>A common issue in training a deep learning, abstractive summarization model is lack of a large set of training summaries. This paper examines techniques for adapting from a labeled source domain to an unlabeled target domain in the context of an encoder-decoder model for text generation. In addition to adversarial domain adaptation (ADA), we introduce the use of artificial titles and sequential training to capture the grammatical style of the unlabeled target domain. Evaluation on adapting to/from news articles and Stack Exchange posts indicates that the use of these techniques can boost performance for both unsupervised adaptation as well as fine-tuning with limited target data.</abstract>
@@ -2793,7 +2793,7 @@
       <author><first>Yang</first><last>Zhao</last></author>
       <author><first>Xiaoyu</first><last>Shen</last></author>
       <author><first>Wei</first><last>Bi</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>2235–2240</pages>
       <abstract>Multi-sentence compression (MSC) aims to generate a grammatical but reduced compression from multiple input sentences while retaining their key information. Previous dominating approach for MSC is the extraction-based word graph approach. A few variants further leveraged lexical substitution to yield more abstractive compression. However, two limitations exist. First, the word graph approach that simply concatenates fragments from multiple sentences may yield non-fluent or ungrammatical compression. Second, lexical substitution is often inappropriate without the consideration of context information. To tackle the above-mentioned issues, we present a neural rewriter for multi-sentence compression that does not need any parallel corpus. Empirical studies have shown that our approach achieves comparable results upon automatic evaluation and improves the grammaticality of compression based on human evaluation. A parallel corpus with more than 140,000 (sentence group, compression) pairs is also constructed as a by-product for future research.</abstract>
       <url hash="2bbebcbb">P19-1216</url>
@@ -2853,7 +2853,7 @@
       <author><first>Minghao</first><last>Hu</last></author>
       <author><first>Yuxing</first><last>Peng</last></author>
       <author><first>Zhen</first><last>Huang</last></author>
-      <author id="dongsheng-li"><first>Dongsheng</first><last>Li</last></author>
+      <author><first>Dongsheng</first><last>Li</last></author>
       <pages>2285–2295</pages>
       <abstract>This paper considers the reading comprehension task in which multiple documents are given as input. Prior work has shown that a pipeline of retriever, reader, and reranker can improve the overall performance. However, the pipeline system is inefficient since the input is re-encoded within each module, and is unable to leverage upstream components to help downstream training. In this work, we present RE<tex-math>^3</tex-math>QA, a unified question answering model that combines context retrieving, reading comprehension, and answer reranking to predict the final answer. Unlike previous pipelined approaches, RE<tex-math>^3</tex-math>QA shares contextualized text representation across different components, and is carefully designed to use high-quality upstream outputs (e.g., retrieved context or candidate answers) for directly supervising downstream modules (e.g., the reader or the reranker). As a result, the whole network can be trained end-to-end to avoid the context inconsistency problem. Experiments show that our model outperforms the pipelined baseline and achieves state-of-the-art results on two versions of TriviaQA and two variants of SQuAD.</abstract>
       <url hash="b85d6c90">P19-1221</url>
@@ -2873,7 +2873,7 @@
     <paper id="223">
       <title><fixed-case>E</fixed-case>3: Entailment-driven Extracting and Editing for Conversational Machine Reading</title>
       <author><first>Victor</first><last>Zhong</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>2310–2320</pages>
       <abstract>Conversational machine reading systems help users answer high-level questions (e.g. determine if they qualify for particular government benefits) when they do not know the exact rules by which the determination is made (e.g. whether they need certain income levels or veteran status). The key challenge is that these rules are only provided in the form of a procedural text (e.g. guidelines from government website) which the system must read to figure out what to ask the user. We present a new conversational machine reading model that jointly extracts a set of decision rules from the procedural text while reasoning about which are entailed by the conversational history and which still need to be edited to create questions for the user. On the recently introduced ShARC conversational machine reading dataset, our Entailment-driven Extract and Edit network (E3) achieves a new state-of-the-art, outperforming existing systems as well as a new BERT-based baseline. In addition, by explicitly highlighting which information still needs to be gathered, E3 provides a more explainable alternative to prior work. We release source code for our models and experiments at <url>https://github.com/vzhong/e3</url>.</abstract>
       <url hash="30337cbb">P19-1223</url>
@@ -2940,8 +2940,8 @@
     <paper id="228">
       <title>Compound Probabilistic Context-Free Grammars for Grammar Induction</title>
       <author><first>Yoon</first><last>Kim</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Alexander</first><last>Rush</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last></author>
       <pages>2369–2385</pages>
       <abstract>We study a formalization of the grammar induction problem that models sentences as being generated by a compound probabilistic context free grammar. In contrast to traditional formulations which learn a single stochastic grammar, our context-free rule probabilities are modulated by a per-sentence continuous latent variable, which induces marginal dependencies beyond the traditional context-free assumptions. Inference in this context-dependent grammar is performed by collapsed variational inference, in which an amortized variational posterior is placed on the continuous variable, and the latent trees are marginalized with dynamic programming. Experiments on English and Chinese show the effectiveness of our approach compared to recent state-of-the-art methods for grammar induction from words with neural language models.</abstract>
       <url hash="c4473d80">P19-1228</url>
@@ -2978,7 +2978,7 @@
       <author><first>Xiaoyu</first><last>Xing</last></author>
       <author><first>Qi</first><last>Zhang</last></author>
       <author><first>Jinlan</first><last>Fu</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>2409–2419</pages>
       <abstract>In this work, we explore the way to perform named entity recognition (NER) using only unlabeled data and named entity dictionaries. To this end, we formulate the task as a positive-unlabeled (PU) learning problem and accordingly propose a novel PU learning algorithm to perform the task. We prove that the proposed algorithm can unbiasedly and consistently estimate the task loss as if there is fully labeled data. A key feature of the proposed method is that it does not require the dictionaries to label every entity within a sentence, and it even does not require the dictionaries to label all of the words constituting an entity. This greatly reduces the requirement on the quality of the dictionaries and makes our method generalize well with quite simple dictionaries. Empirical studies on four public NER datasets demonstrate the effectiveness of our proposed method. We have published the source code at <url>https://github.com/v-mipeng/LexiconNER</url>.</abstract>
       <url hash="216206fb">P19-1231</url>
@@ -2988,7 +2988,7 @@
     <paper id="232">
       <title>Multi-Task Semantic Dependency Parsing with Policy Gradient for Learning Easy-First Strategies</title>
       <author><first>Shuhei</first><last>Kurita</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>2420–2430</pages>
       <abstract>In Semantic Dependency Parsing (SDP), semantic relations form directed acyclic graphs, rather than trees. We propose a new iterative predicate selection (IPS) algorithm for SDP. Our IPS algorithm combines the graph-based and transition-based parsing approaches in order to handle multiple semantic head words. We train the IPS model using a combination of multi-task learning and task-specific policy gradient training. Trained this way, IPS achieves a new state of the art on the SemEval 2015 Task 18 datasets. Furthermore, we observe that policy gradient training learns an easy-first strategy.</abstract>
       <url hash="2dddfeaf">P19-1232</url>
@@ -3001,7 +3001,7 @@
       <author><first>Yijin</first><last>Liu</last></author>
       <author><first>Fandong</first><last>Meng</last></author>
       <author><first>Jinchao</first><last>Zhang</last></author>
-      <author><first>Jinan</first><last>Xu</last></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last></author>
       <author><first>Yufeng</first><last>Chen</last></author>
       <author><first>Jie</first><last>Zhou</last></author>
       <pages>2431–2441</pages>
@@ -3014,7 +3014,7 @@
       <title>Unsupervised Learning of <fixed-case>PCFG</fixed-case>s with Normalizing Flow</title>
       <author><first>Lifeng</first><last>Jin</last></author>
       <author><first>Finale</first><last>Doshi-Velez</last></author>
-      <author><first>Timothy</first><last>Miller</last></author>
+      <author id="timothy-miller"><first>Timothy</first><last>Miller</last></author>
       <author><first>Lane</first><last>Schwartz</last></author>
       <author><first>William</first><last>Schuler</last></author>
       <pages>2442–2452</pages>
@@ -3048,7 +3048,7 @@
       <title>Graph-based Dependency Parsing with Graph Neural Networks</title>
       <author><first>Tao</first><last>Ji</last></author>
       <author><first>Yuanbin</first><last>Wu</last></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <pages>2475–2485</pages>
       <abstract>We investigate the problem of efficiently incorporating high-order features into neural graph-based dependency parsing. Instead of explicitly extracting high-order features from intermediate parse trees, we develop a more powerful dependency tree node representation which captures high-order information concisely and efficiently. We use graph neural networks (GNNs) to learn the representations and discuss several new configurations of GNN’s updating and aggregation functions. Experiments on PTB show that our parser achieves the best UAS and LAS on PTB (96.0%, 94.3%) among systems without using any external resources.</abstract>
       <url hash="1e5b1b5f">P19-1237</url>
@@ -3059,8 +3059,8 @@
       <title>Wide-Coverage Neural <fixed-case>A</fixed-case>* Parsing for <fixed-case>M</fixed-case>inimalist <fixed-case>G</fixed-case>rammars</title>
       <author><first>John</first><last>Torr</last></author>
       <author><first>Miloš</first><last>Stanojević</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <pages>2486–2505</pages>
       <abstract>Minimalist Grammars (Stabler, 1997) are a computationally oriented, and rigorous formalisation of many aspects of Chomsky’s (1995) Minimalist Program. This paper presents the first ever application of this formalism to the task of realistic wide-coverage parsing. The parser uses a linguistically expressive yet highly constrained grammar, together with an adaptation of the A* search algorithm currently used in CCG parsing (Lewis and Steedman, 2014; Lewis et al., 2016), with supertag probabilities provided by a bi-LSTM neural network supertagger trained on MGbank, a corpus of MG derivation trees. We report on some promising initial experimental results for overall dependency recovery as well as on the recovery of certain unbounded long distance dependencies. Finally, although like other MG parsers, ours has a high order polynomial worst case time complexity, we show that in practice its expected time complexity is cubic in the length of the sentence. The parser is publicly available.</abstract>
       <url hash="6ee7b4c2">P19-1238</url>
@@ -3096,7 +3096,7 @@
       <title>#<fixed-case>Y</fixed-case>ou<fixed-case>T</fixed-case>oo? Detection of Personal Recollections of Sexual Harassment on Social Media</title>
       <author><first>Arijit</first><last>Ghosh Chowdhury</last></author>
       <author><first>Ramit</first><last>Sawhney</last></author>
-      <author><first>Rajiv Ratn</first><last>Shah</last></author>
+      <author id="rajiv-shah"><first>Rajiv Ratn</first><last>Shah</last></author>
       <author><first>Debanjan</first><last>Mahata</last></author>
       <pages>2527–2537</pages>
       <abstract>The availability of large-scale online social data, coupled with computational methods can help us answer fundamental questions relat- ing to our social lives, particularly our health and well-being. The #MeToo trend has led to people talking about personal experiences of harassment more openly. This work at- tempts to aggregate such experiences of sex- ual abuse to facilitate a better understanding of social media constructs and to bring about social change. It has been found that disclo- sure of abuse has positive psychological im- pacts. Hence, we contend that such informa- tion can leveraged to create better campaigns for social change by analyzing how users react to these stories and to obtain a better insight into the consequences of sexual abuse. We use a three part Twitter-Specific Social Media Lan- guage Model to segregate personal recollec- tions of sexual harassment from Twitter posts. An extensive comparison with state-of-the-art generic and specific models along with a de- tailed error analysis explores the merit of our proposed model.</abstract>
@@ -3108,7 +3108,7 @@
       <title>Multi-task Pairwise Neural Ranking for Hashtag Segmentation</title>
       <author><first>Mounica</first><last>Maddela</last></author>
       <author><first>Wei</first><last>Xu</last></author>
-      <author><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
+      <author id="daniel-preotiuc-pietro"><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
       <pages>2538–2549</pages>
       <abstract>Hashtags are often employed on social media and beyond to add metadata to a textual utterance with the goal of increasing discoverability, aiding search, or providing additional semantics. However, the semantic content of hashtags is not straightforward to infer as these represent ad-hoc conventions which frequently include multiple words joined together and can include abbreviations and unorthodox spellings. We build a dataset of 12,594 hashtags split into individual segments and propose a set of approaches for hashtag segmentation by framing it as a pairwise ranking problem between candidate segmentations. Our novel neural approaches demonstrate 24.6% error reduction in hashtag segmentation accuracy compared to the current state-of-the-art method. Finally, we demonstrate that a deeper understanding of hashtag semantics obtained through segmentation is useful for downstream applications such as sentiment analysis, for which we achieved a 2.6% increase in average recall on the SemEval 2017 sentiment analysis dataset.</abstract>
       <url hash="bfde6af9">P19-1242</url>
@@ -3130,8 +3130,8 @@
       <title>Sentence-Level Evidence Embedding for Claim Verification with Hierarchical Attention Networks</title>
       <author><first>Jing</first><last>Ma</last></author>
       <author><first>Wei</first><last>Gao</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <pages>2561–2571</pages>
       <abstract>Claim verification is generally a task of verifying the veracity of a given claim, which is critical to many downstream applications. It is cumbersome and inefficient for human fact-checkers to find consistent pieces of evidence, from which solid verdict could be inferred against the claim. In this paper, we propose a novel end-to-end hierarchical attention network focusing on learning to represent coherent evidence as well as their semantic relatedness with the claim. Our model consists of three main components: 1) A coherence-based attention layer embeds coherent evidence considering the claim and sentences from relevant articles; 2) An entailment-based attention layer attends on sentences that can semantically infer the claim on top of the first attention; and 3) An output layer predicts the verdict based on the embedded evidence. Experimental results on three public benchmark datasets show that our proposed model outperforms a set of state-of-the-art baselines.</abstract>
       <url hash="2a3f25b3">P19-1244</url>
@@ -3141,7 +3141,7 @@
     <paper id="245">
       <title>Predicting Human Activities from User-Generated Content</title>
       <author><first>Steven</first><last>Wilson</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>2572–2582</pages>
       <abstract>The activities we do are linked to our interests, personality, political preferences, and decisions we make about the future. In this paper, we explore the task of predicting human activities from user-generated content. We collect a dataset containing instances of social media users writing about a range of everyday activities. We then use a state-of-the-art sentence embedding framework tailored to recognize the semantics of human activities and perform an automatic clustering of these activities. We train a neural network model to make predictions about which clusters contain activities that were performed by a given user based on the text of their previous posts and self-description. Additionally, we explore the degree to which incorporating inferred user traits into our model helps with this prediction task.</abstract>
       <url hash="c0ca4b11">P19-1245</url>
@@ -3173,7 +3173,7 @@
       <title>Fine-Grained Spoiler Detection from Large-Scale Review Corpora</title>
       <author><first>Mengting</first><last>Wan</last></author>
       <author><first>Rishabh</first><last>Misra</last></author>
-      <author><first>Ndapa</first><last>Nakashole</last></author>
+      <author id="ndapandula-nakashole"><first>Ndapa</first><last>Nakashole</last></author>
       <author><first>Julian</first><last>McAuley</last></author>
       <pages>2605–2610</pages>
       <abstract>This paper presents computational approaches for automatically detecting critical plot twists in reviews of media products. First, we created a large-scale book review dataset that includes fine-grained spoiler annotations at the sentence-level, as well as book and (anonymized) user information. Second, we carefully analyzed this dataset, and found that: spoiler language tends to be book-specific; spoiler distributions vary greatly across books and review authors; and spoiler sentences tend to jointly appear in the latter part of reviews. Third, inspired by these findings, we developed an end-to-end neural network architecture to detect spoiler sentences in review corpora. Quantitative and qualitative results demonstrate that the proposed method substantially outperforms existing baselines.</abstract>
@@ -3196,7 +3196,7 @@
       <title>Dataset Creation for Ranking Constructive News Comments</title>
       <author><first>Soichiro</first><last>Fujita</last></author>
       <author><first>Hayato</first><last>Kobayashi</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>2619–2626</pages>
       <abstract>Ranking comments on an online news service is a practically important task for the service provider, and thus there have been many studies on this task. However, most of them considered users’ positive feedback, such as “Like”-button clicks, as a quality measure. In this paper, we address directly evaluating the quality of comments on the basis of “constructiveness,” separately from user feedback. To this end, we create a new dataset including 100K+ Japanese comments with constructiveness scores (C-scores). Our experiments clarify that C-scores are not always related to users’ positive feedback, and the performance of pairwise ranking models tends to be enhanced by the variation of comments rather than articles.</abstract>
       <url hash="5a722d27">P19-1250</url>
@@ -3270,10 +3270,10 @@
     <paper id="256">
       <title>A Simple Recipe towards Reducing Hallucination in Neural Surface Realisation</title>
       <author><first>Feng</first><last>Nie</last></author>
-      <author><first>Jin-Ge</first><last>Yao</last></author>
+      <author id="jin-ge-yao"><first>Jin-Ge</first><last>Yao</last></author>
       <author><first>Jinpeng</first><last>Wang</last></author>
       <author><first>Rong</first><last>Pan</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <pages>2673–2679</pages>
       <abstract>Recent neural language generation systems often <i>hallucinate</i> contents (i.e., producing irrelevant or contradicted facts), especially when trained on loosely corresponding pairs of the input structure and text. To mitigate this issue, we propose to integrate a language understanding module for data refinement with self-training iterations to effectively induce strong equivalence between the input data and the paired text. Experiments on the E2E challenge dataset show that our proposed framework can reduce more than 50% relative unaligned noise from the original data-text pairs. A vanilla sequence-to-sequence neural NLG model trained on the refined data has improved on content correctness compared with the current state-of-the-art ensemble generator.</abstract>
       <url hash="b96018f5">P19-1256</url>
@@ -3284,7 +3284,7 @@
     <paper id="257">
       <title>Cross-Modal Commentator: Automatic Machine Commenting Based on Cross-Modal Information</title>
       <author><first>Pengcheng</first><last>Yang</last></author>
-      <author id="zhihan-zhang"><first>Zhihan</first><last>Zhang</last></author>
+      <author><first>Zhihan</first><last>Zhang</last></author>
       <author><first>Fuli</first><last>Luo</last></author>
       <author><first>Lei</first><last>Li</last></author>
       <author><first>Chengyang</first><last>Huang</last></author>
@@ -3327,7 +3327,7 @@
       <title>Multi-hop Reading Comprehension across Multiple Documents by Reasoning over Heterogeneous Graphs</title>
       <author><first>Ming</first><last>Tu</last></author>
       <author><first>Guangtao</first><last>Wang</last></author>
-      <author id="jing-huang"><first>Jing</first><last>Huang</last></author>
+      <author><first>Jing</first><last>Huang</last></author>
       <author><first>Yun</first><last>Tang</last></author>
       <author><first>Xiaodong</first><last>He</last></author>
       <author><first>Bowen</first><last>Zhou</last></author>
@@ -3380,7 +3380,7 @@
       <title>Sentence Mover’s Similarity: Automatic Evaluation for Multi-Sentence Texts</title>
       <author><first>Elizabeth</first><last>Clark</last></author>
       <author><first>Asli</first><last>Celikyilmaz</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>2748–2760</pages>
       <abstract>For evaluating machine-generated texts, automatic methods hold the promise of avoiding collection of human judgments, which can be expensive and time-consuming. The most common automatic metrics, like BLEU and ROUGE, depend on exact word matching, an inflexible approach for measuring semantic similarity. We introduce methods based on sentence mover’s similarity; our automatic metrics evaluate text in a continuous space using word and sentence embeddings. We find that sentence-based metrics correlate with human judgments significantly better than ROUGE, both on machine-generated summaries (average length of 3.4 sentences) and human-authored essays (average length of 7.5). We also show that sentence mover’s similarity can be used as a reward when learning a generation model via reinforcement learning; we present both automatic and human evaluations of summaries learned in this way, finding that our approach outperforms ROUGE.</abstract>
       <url hash="bcdd2058">P19-1264</url>
@@ -3447,8 +3447,8 @@
     <paper id="269">
       <title>Putting Evaluation in Context: Contextual Embeddings Improve Machine Translation Evaluation</title>
       <author><first>Nitika</first><last>Mathur</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>2799–2808</pages>
       <abstract>Accurate, automatic evaluation of machine translation is critical for system tuning, and evaluating progress in the field. We proposed a simple unsupervised metric, and additional supervised metrics which rely on contextual word embeddings to encode the translation and reference sentences. We find that these models rival or surpass all existing metrics in the WMT 2017 sentence-level and system-level tracks, and our trained model has a substantially higher correlation with human judgements than all existing metrics on the WMT 2017 to-English sentence level dataset.</abstract>
       <url hash="435775a4">P19-1269</url>
@@ -3461,7 +3461,7 @@
       <author><first>Xingshan</first><last>Zeng</last></author>
       <author><first>Jing</first><last>Li</last></author>
       <author><first>Lu</first><last>Wang</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <pages>2809–2818</pages>
       <abstract>As the online world continues its exponential growth, interpersonal communication has come to play an increasingly central role in opinion formation and change. In order to help users better engage with each other online, we study a challenging problem of re-entry prediction foreseeing whether a user will come back to a conversation they once participated in. We hypothesize that both the context of the ongoing conversations and the users’ previous chatting history will affect their continued interests in future engagement. Specifically, we propose a neural framework with three main layers, each modeling context, user history, and interactions between them, to explore how the conversation context and user chatting history jointly result in their re-entry behavior. We experiment with two large-scale datasets collected from Twitter and Reddit. Results show that our proposed framework with bi-attention achieves an F1 score of 61.1 on Twitter conversations, outperforming the state-of-the-art methods from previous work.</abstract>
       <url hash="8b5685e1">P19-1270</url>
@@ -3485,7 +3485,7 @@
     <paper id="272">
       <title>Categorizing and Inferring the Relationship between the Text and Image of <fixed-case>T</fixed-case>witter Posts</title>
       <author><first>Alakananda</first><last>Vempala</last></author>
-      <author><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
+      <author id="daniel-preotiuc-pietro"><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
       <pages>2830–2840</pages>
       <abstract>Text in social media posts is frequently accompanied by images in order to provide content, supply context, or to express feelings. This paper studies how the meaning of the entire tweet is composed through the relationship between its textual content and its image. We build and release a data set of image tweets annotated with four classes which express whether the text or the image provides additional information to the other modality. We show that by combining the text and image information, we can build a machine learning approach that accurately distinguishes between the relationship types. Further, we derive insights into how these relationships are materialized through text and image content analysis and how they are impacted by user demographic traits. These methods can be used in several downstream applications including pre-training image tagging models, collecting distantly supervised data for image captioning, and can be directly used in end-user applications to optimize screen estate.</abstract>
       <url hash="a5a163ca">P19-1272</url>
@@ -3496,8 +3496,8 @@
     </paper>
     <paper id="273">
       <title>Who Sides with Whom? Towards Computational Construction of Discourse Networks for Political Debates</title>
-      <author><first>Sebastian</first><last>Padó</last></author>
-      <author><first>Andre</first><last>Blessing</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
+      <author id="andre-blessing"><first>Andre</first><last>Blessing</last></author>
       <author><first>Nico</first><last>Blokker</last></author>
       <author><first>Erenay</first><last>Dayanik</last></author>
       <author><first>Sebastian</first><last>Haunss</last></author>
@@ -3512,7 +3512,7 @@
     </paper>
     <paper id="274">
       <title>Analyzing Linguistic Differences between Owner and Staff Attributed Tweets</title>
-      <author><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
+      <author id="daniel-preotiuc-pietro"><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
       <author><first>Rita</first><last>Devlin Marier</last></author>
       <pages>2848–2853</pages>
       <abstract>Research on social media has to date assumed that all posts from an account are authored by the same person. In this study, we challenge this assumption and study the linguistic differences between posts signed by the account owner or attributed to their staff. We introduce a novel data set of tweets posted by U.S. politicians who self-reported their tweets using a signature. We analyze the linguistic topics and style features that distinguish the two types of tweets. Predictive results show that we are able to predict owner and staff attributed tweets with good accuracy, even when not using any training data from that account.</abstract>
@@ -3536,7 +3536,7 @@
     <paper id="276">
       <title>Open Domain Event Extraction Using Neural Latent Variable Models</title>
       <author><first>Xiao</first><last>Liu</last></author>
-      <author><first>Heyan</first><last>Huang</last></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last></author>
       <author><first>Yue</first><last>Zhang</last></author>
       <pages>2860–2871</pages>
       <abstract>We consider open domain event extraction, the task of extracting unconstraint types of events from news clusters. A novel latent variable neural model is constructed, which is scalable to very large corpus. A dataset is collected and manually annotated, with task-specific evaluation metrics being designed. Results show that the proposed unsupervised model gives better performance compared to the state-of-the-art method for event schema induction.</abstract>
@@ -3611,7 +3611,7 @@
     <paper id="282">
       <title>Is Attention Interpretable?</title>
       <author><first>Sofia</first><last>Serrano</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>2931–2951</pages>
       <abstract>Attention mechanisms have recently boosted performance on a range of NLP tasks. Because attention layers explicitly weight input components’ representations, it is also often assumed that attention can be used to identify information that models found important (e.g., specific contextualized word tokens). We test whether that assumption holds by manipulating attention weights in already-trained text classification models and analyzing the resulting differences in their predictions. While we observe some ways in which higher attention weights correlate with greater impact on model predictions, we also find many ways in which this does not hold, i.e., where gradient-based rankings of attention weights better predict their effects than their magnitudes. We conclude that while attention noisily predicts input components’ overall importance to a model, it is by no means a fail-safe indicator.</abstract>
       <url hash="abe1179e">P19-1282</url>
@@ -3621,7 +3621,7 @@
     </paper>
     <paper id="283">
       <title>Correlating Neural and Symbolic Representations of Language</title>
-      <author><first>Grzegorz</first><last>Chrupała</last></author>
+      <author id="grzegorz-chrupala"><first>Grzegorz</first><last>Chrupała</last></author>
       <author><first>Afra</first><last>Alishahi</last></author>
       <pages>2952–2962</pages>
       <abstract>Analysis methods which enable us to better understand the representations and functioning of neural models of language are increasingly needed as deep learning becomes the dominant approach in NLP. Here we present two methods based on Representational Similarity Analysis (RSA) and Tree Kernels (TK) which allow us to directly quantify how strongly the information encoded in neural activation patterns corresponds to information represented by symbolic structures such as syntax trees. We first validate our methods on the case of a simple synthetic language for arithmetic expressions with clearly defined syntax and semantics, and show that they exhibit the expected pattern of results. We then our methods to correlate neural representations of English sentences with their constituency parse trees.</abstract>
@@ -3646,8 +3646,8 @@
       <author><first>Zihang</first><last>Dai</last></author>
       <author><first>Zhilin</first><last>Yang</last></author>
       <author><first>Yiming</first><last>Yang</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
-      <author><first>Quoc</first><last>Le</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
+      <author id="quoc-le"><first>Quoc</first><last>Le</last></author>
       <author><first>Ruslan</first><last>Salakhutdinov</last></author>
       <pages>2978–2988</pages>
       <abstract>Transformers have a potential of learning longer-term dependency, but are limited by a fixed-length context in the setting of language modeling. We propose a novel neural architecture Transformer-XL that enables learning dependency beyond a fixed length without disrupting temporal coherence. It consists of a segment-level recurrence mechanism and a novel positional encoding scheme. Our method not only enables capturing longer-term dependency, but also resolves the context fragmentation problem. As a result, Transformer-XL learns dependency that is 80% longer than RNNs and 450% longer than vanilla Transformers, achieves better performance on both short and long sequences, and is up to 1,800+ times faster than vanilla Transformers during evaluation. Notably, we improve the state-of-the-art results of bpc/perplexity to 0.99 on enwiki8, 1.08 on text8, 18.3 on WikiText-103, 21.8 on One Billion Word, and 54.5 on Penn Treebank (without finetuning). When trained only on WikiText-103, Transformer-XL manages to generate reasonably coherent, novel text articles with thousands of tokens. Our code, pretrained models, and hyperparameters are available in both Tensorflow and PyTorch.</abstract>
@@ -3662,7 +3662,7 @@
       <author><first>Junjie</first><last>Hu</last></author>
       <author><first>Mengzhou</first><last>Xia</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
       <pages>2989–3001</pages>
       <abstract>It has been previously noted that neural machine translation (NMT) is very sensitive to domain shift. In this paper, we argue that this is a dual effect of the highly lexicalized nature of NMT, resulting in failure for sentences with large numbers of unknown words, and lack of supervision for domain-specific words. To remedy this problem, we propose an unsupervised adaptation method which fine-tunes a pre-trained out-of-domain NMT model using a pseudo-in-domain corpus. Specifically, we perform lexicon induction to extract an in-domain lexicon, and construct a pseudo-parallel in-domain corpus by performing word-for-word back-translation of monolingual in-domain target sentences. In five domains over twenty pairwise adaptation settings and two model architectures, our method achieves consistent improvements without using any in-domain parallel sentences, improving up to 14 BLEU over unadapted models, and up to 2 BLEU over strong back-translation baselines.</abstract>
       <url hash="670f6605">P19-1286</url>
@@ -3717,7 +3717,7 @@
     </paper>
     <paper id="290">
       <title>Look Harder: A Neural Machine Translation Model with Hard Attention</title>
-      <author><first>Sathish Reddy</first><last>Indurthi</last></author>
+      <author id="sathish-reddy-indurthi"><first>Sathish Reddy</first><last>Indurthi</last></author>
       <author><first>Insoo</first><last>Chung</last></author>
       <author><first>Sangha</first><last>Kim</last></author>
       <pages>3037–3043</pages>
@@ -3742,7 +3742,7 @@
     <paper id="292">
       <title>A Simple and Effective Approach to Automatic Post-Editing with Transfer Learning</title>
       <author><first>Gonçalo M.</first><last>Correia</last></author>
-      <author><first>André F. T.</first><last>Martins</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
       <pages>3050–3056</pages>
       <abstract>Automatic post-editing (APE) seeks to automatically refine the output of a black-box machine translation (MT) system through human post-edits. APE systems are usually trained by complementing human post-edited data with large, artificial data generated through back-translations, a time-consuming process often no easier than training a MT system from scratch. in this paper, we propose an alternative where we fine-tune pre-trained BERT models on both the encoder and decoder of an APE system, exploring several parameter sharing strategies. By only training on a dataset of 23K sentences for 3 hours on a single GPU we obtain results that are competitive with systems that were trained on 5M artificial sentences. When we add this artificial data our method obtains state-of-the-art results.</abstract>
       <url hash="5d7ad86d">P19-1292</url>
@@ -3764,10 +3764,10 @@
     </paper>
     <paper id="294">
       <title>Training Neural Machine Translation to Apply Terminology Constraints</title>
-      <author><first>Georgiana</first><last>Dinu</last></author>
+      <author id="georgiana-dinu"><first>Georgiana</first><last>Dinu</last></author>
       <author><first>Prashant</first><last>Mathur</last></author>
       <author><first>Marcello</first><last>Federico</last></author>
-      <author><first>Yaser</first><last>Al-Onaizan</last></author>
+      <author id="yaser-al-onaizan"><first>Yaser</first><last>Al-Onaizan</last></author>
       <pages>3063–3068</pages>
       <abstract>This paper proposes a novel method to inject custom terminology into neural machine translation at run time. Previous works have mainly proposed modifications to the decoding algorithm in order to constrain the output to include run-time-provided target terms. While being effective, these constrained decoding methods add, however, significant computational overhead to the inference step, and, as we show in this paper, can be brittle when tested in realistic conditions. In this paper we approach the problem by training a neural MT system to learn how to use custom terminology when provided with the input. Comparative experiments show that our method is not only more effective than a state-of-the-art implementation of constrained decoding, but is also as fast as constraint-free decoding.</abstract>
       <url hash="fd968e31">P19-1294</url>
@@ -3793,9 +3793,9 @@
       <author><first>Rui</first><last>Wang</last></author>
       <author><first>Kehai</first><last>Chen</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Min</first><last>Zhang</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <pages>3076–3082</pages>
       <abstract>The training objective of neural machine translation (NMT) is to minimize the loss between the words in the translated sentences and those in the references. In NMT, there is a natural correspondence between the source sentence and the target sentence. However, this relationship has only been represented using the entire neural network and the training objective is computed in word-level. In this paper, we propose a sentence-level agreement module to directly minimize the difference between the representation of source and target sentence. The proposed agreement module can be integrated into NMT as an additional training objective function and can also be used to enhance the representation of the source sentences. Empirical results on the NIST Chinese-to-English and WMT English-to-German tasks show the proposed agreement module can significantly improve the NMT performance.</abstract>
       <url hash="5ef14026">P19-1296</url>
@@ -3807,7 +3807,7 @@
       <author><first>Sukanta</first><last>Sen</last></author>
       <author><first>Kamal Kumar</first><last>Gupta</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>3083–3089</pages>
       <abstract>In this paper, we propose a multilingual unsupervised NMT scheme which jointly trains multiple languages with a shared encoder and multiple decoders. Our approach is based on denoising autoencoding of each language and back-translating between English and multiple non-English languages. This results in a universal encoder which can encode any language participating in training into an inter-lingual representation, and language-specific decoders. Our experiments using only monolingual corpora show that multilingual unsupervised model performs better than the separately trained bilingual models achieving improvement of up to 1.48 BLEU points on WMT test sets. We also observe that even if we do not train the network for all possible translation directions, the network is still able to translate in a many-to-many fashion leveraging encoder’s ability to generate interlingual representation.</abstract>
       <url hash="b59684db">P19-1297</url>
@@ -3830,10 +3830,10 @@
     <paper id="299">
       <title>Multi-Source Cross-Lingual Model Transfer: Learning What to Share</title>
       <author><first>Xilun</first><last>Chen</last></author>
-      <author><first>Ahmed Hassan</first><last>Awadallah</last></author>
-      <author><first>Hany</first><last>Hassan</last></author>
+      <author id="ahmed-hassan"><first>Ahmed Hassan</first><last>Awadallah</last></author>
+      <author id="hany-hassan-awadalla"><first>Hany</first><last>Hassan</last></author>
       <author><first>Wei</first><last>Wang</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>3098–3112</pages>
       <abstract>Modern NLP applications have enjoyed a great boost utilizing neural networks models. Such deep neural models, however, are not applicable to most human languages due to the lack of annotated training data for various NLP tasks. Cross-lingual transfer learning (CLTL) is a viable method for building NLP models for a low-resource target language by leveraging labeled data from other (source) languages. In this work, we focus on the multilingual transfer setting where training data in multiple source languages is leveraged to further boost target language performance. Unlike most existing methods that rely only on language-invariant features for CLTL, our approach coherently utilizes both language-invariant and language-specific features at instance level. Our model leverages adversarial networks to learn language-invariant features, and mixture-of-experts models to dynamically exploit the similarity between the target language and each individual source language. This enables our model to learn effectively what to share between various languages in the multilingual setup. Moreover, when coupled with unsupervised multilingual embeddings, our model can operate in a zero-resource setting where neither target language training data nor cross-lingual resources are available. Our model achieves significant performance gains over prior art, as shown in an extensive set of experiments over multiple text classification and sequence tagging tasks including a large-scale industry dataset.</abstract>
       <url hash="e7db05fe">P19-1299</url>
@@ -3844,7 +3844,7 @@
       <title>Unsupervised Multilingual Word Embedding with Limited Resources using Neural Language Models</title>
       <author><first>Takashi</first><last>Wada</last></author>
       <author><first>Tomoharu</first><last>Iwata</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>3113–3124</pages>
       <abstract>Recently, a variety of unsupervised methods have been proposed that map pre-trained word embeddings of different languages into the same space without any parallel data. These methods aim to find a linear transformation based on the assumption that monolingual word embeddings are approximately isomorphic between languages. However, it has been demonstrated that this assumption holds true only on specific conditions, and with limited resources, the performance of these methods decreases drastically. To overcome this problem, we propose a new unsupervised multilingual embedding method that does not rely on such assumption and performs well under resource-poor scenarios, namely when only a small amount of monolingual data (i.e., 50k sentences) are available, or when the domains of monolingual data are different across languages. Our proposed model, which we call ‘Multilingual Neural Language Models’, shares some of the network parameters among multiple languages, and encodes sentences of multiple languages into the same space. The model jointly learns word embeddings of different languages in the same space, and generates multilingual embeddings without any parallel data or pre-training. Our experiments on word alignment tasks have demonstrated that, on the low-resource condition, our model substantially outperforms existing unsupervised and even supervised methods trained with 500 bilingual pairs of words. Our model also outperforms unsupervised methods given different-domain corpora across languages. Our code is publicly available.</abstract>
       <url hash="31e7e9bf">P19-1300</url>
@@ -3929,10 +3929,10 @@
       <author><first>Caitlin</first><last>Westerfield</last></author>
       <author><first>Sungrok</first><last>Shim</last></author>
       <author><first>Garrett</first><last>Bingham</last></author>
-      <author><first>Alexander</first><last>Fabbri</last></author>
+      <author id="alexander-richard-fabbri"><first>Alexander</first><last>Fabbri</last></author>
       <author><first>William</first><last>Hu</last></author>
       <author><first>Neha</first><last>Verma</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <pages>3173–3179</pages>
       <abstract>In this paper, we propose to boost low-resource cross-lingual document retrieval performance with deep bilingual query-document representations. We match queries and documents in both source and target languages with four components, each of which is implemented as a term interaction-based deep neural network with cross-lingual word embeddings as input. By including query likelihood scores as extra features, our model effectively learns to rerank the retrieved documents by using a small number of relevance labels for low-resource language pairs. Due to the shared cross-lingual word embedding space, the model can also be directly applied to another language pair without any training label. Experimental results on the Material dataset show that our model outperforms the competitive translation-based baselines on English-Swahili, English-Tagalog, and English-Somali cross-lingual information retrieval tasks.</abstract>
       <url hash="09cc1749">P19-1306</url>
@@ -3977,7 +3977,7 @@
     </paper>
     <paper id="310">
       <title><fixed-case>JW</fixed-case>300: A Wide-Coverage Parallel Corpus for Low-Resource Languages</title>
-      <author><first>Željko</first><last>Agić</last></author>
+      <author id="zeljko-agic"><first>Željko</first><last>Agić</last></author>
       <author><first>Ivan</first><last>Vulić</last></author>
       <pages>3204–3210</pages>
       <abstract>Viable cross-lingual transfer critically depends on the availability of parallel texts. Shortage of such resources imposes a development and evaluation bottleneck in multilingual processing. We introduce JW300, a parallel corpus of over 300 languages with around 100 thousand parallel sentences per language pair on average. In this paper, we present the resource and showcase its utility in experiments with cross-lingual word embedding induction and multi-source part-of-speech projection.</abstract>
@@ -4052,7 +4052,7 @@
       <author><first>Dima</first><last>Puzyrev</last></author>
       <author><first>Alexander</first><last>Panchenko</last></author>
       <author><first>Pawan</first><last>Goyal</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <author><first>Animesh</first><last>Mukherjee</last></author>
       <pages>3263–3274</pages>
       <abstract>The compositionality degree of multiword expressions indicates to what extent the meaning of a phrase can be derived from the meaning of its constituents and their grammatical relations. Prediction of (non)-compositionality is a task that has been frequently addressed with distributional semantic models. We introduce a novel technique to blend hierarchical information with distributional information for predicting compositionality. In particular, we use hypernymy information of the multiword and its constituents encoded in the form of the recently introduced Poincaré embeddings in addition to the distributional information to detect compositionality for noun phrases. Using a weighted average of the distributional similarity and a Poincaré similarity function, we obtain consistent and substantial, statistically significant improvement across three gold standard datasets over state-of-the-art models based on distributional information only. Unlike traditional approaches that solely use an unsupervised setting, we have also framed the problem as a supervised task, obtaining comparable improvements. Further, we publicly release our Poincaré embeddings, which are trained on the output of handcrafted lexical-syntactic patterns on a large corpus.</abstract>
@@ -4073,8 +4073,8 @@
     </paper>
     <paper id="318">
       <title>Relational Word Embeddings</title>
-      <author><first>Jose</first><last>Camacho-Collados</last></author>
-      <author><first>Luis</first><last>Espinosa Anke</last></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa Anke</last></author>
       <author><first>Steven</first><last>Schockaert</last></author>
       <pages>3286–3296</pages>
       <abstract>While word embeddings have been shown to implicitly encode various forms of attributional knowledge, the extent to which they capture relational information is far more limited. In previous work, this limitation has been addressed by incorporating relational knowledge from external knowledge bases when learning the word embedding. Such strategies may not be optimal, however, as they are limited by the coverage of available resources and conflate similarity with other forms of relatedness. As an alternative, in this paper we propose to encode relational knowledge in a separate word embedding, which is aimed to be complementary to a given standard word embedding. This relational word embedding is still learned from co-occurrence statistics, and can thus be used even when no external knowledge base is available. Our analysis shows that relational word vectors do indeed capture information that is complementary to what is encoded in standard word embeddings.</abstract>
@@ -4099,7 +4099,7 @@
       <author><first>Prateek</first><last>Yadav</last></author>
       <author><first>Piyush</first><last>Rai</last></author>
       <author><first>Chiranjib</first><last>Bhattacharyya</last></author>
-      <author><first>Partha</first><last>Talukdar</last></author>
+      <author id="partha-talukdar"><first>Partha</first><last>Talukdar</last></author>
       <pages>3308–3318</pages>
       <abstract>Word embeddings have been widely adopted across several NLP applications. Most existing word embedding methods utilize sequential context of a word to learn its embedding. While there have been some attempts at utilizing syntactic context of a word, such methods result in an explosion of the vocabulary size. In this paper, we overcome this problem by proposing SynGCN, a flexible Graph Convolution based method for learning word embeddings. SynGCN utilizes the dependency context of a word without increasing the vocabulary size. Word embeddings learned by SynGCN outperform existing methods on various intrinsic and extrinsic tasks and provide an advantage when used with ELMo. We also propose SemGCN, an effective framework for incorporating diverse semantic knowledge for further enhancing learned word representations. We make the source code of both models available to encourage reproducible research.</abstract>
       <url hash="26edfa15">P19-1320</url>
@@ -4143,7 +4143,7 @@
       <title>Putting Words in Context: <fixed-case>LSTM</fixed-case> Language Models and Lexical Ambiguity</title>
       <author><first>Laura</first><last>Aina</last></author>
       <author><first>Kristina</first><last>Gulordava</last></author>
-      <author><first>Gemma</first><last>Boleda</last></author>
+      <author id="gemma-boleda"><first>Gemma</first><last>Boleda</last></author>
       <pages>3342–3348</pages>
       <abstract>In neural network models of language, words are commonly represented using context-invariant representations (word embeddings) which are then put in context in the hidden layers. Since words are often ambiguous, representing the contextually relevant information is not trivial. We investigate how an LSTM language model deals with lexical ambiguity in English, designing a method to probe its hidden representations for lexical and contextual information about words. We find that both types of information are represented to a large extent, but also that there is room for improvement for contextual information.</abstract>
       <url hash="427e71f0">P19-1324</url>
@@ -4155,7 +4155,7 @@
       <author><first>Andrey</first><last>Kutuzov</last></author>
       <author><first>Mohammad</first><last>Dorgham</last></author>
       <author><first>Oleksiy</first><last>Oliynyk</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <author><first>Alexander</first><last>Panchenko</last></author>
       <pages>3349–3355</pages>
       <abstract>Graph measures, such as node distances, are inefficient to compute. We explore dense vector representations as an effective way to approximate the same information. We introduce a simple yet efficient and effective approach for learning graph embeddings. Instead of directly operating on the graph structure, our method takes structural measures of pairwise node similarities into account and learns dense node representations reflecting user-defined graph distance measures, such as e.g. the shortest path distance or distance measures that take information beyond the graph structure into account. We demonstrate a speed-up of several orders of magnitude when predicting word similarity by vector operations on our embeddings as opposed to directly computing the respective path-based measures, while outperforming various other graph embeddings on semantic similarity and word sense disambiguation tasks.</abstract>
@@ -4203,8 +4203,8 @@
       <title>Exploring Numeracy in Word Embeddings</title>
       <author><first>Aakanksha</first><last>Naik</last></author>
       <author><first>Abhilasha</first><last>Ravichander</last></author>
-      <author><first>Carolyn</first><last>Rose</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rose</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>3374–3380</pages>
       <abstract>Word embeddings are now pervasive across NLP subfields as the de-facto method of forming text representataions. In this work, we show that existing embedding models are inadequate at constructing representations that capture salient aspects of mathematical meaning for numbers, which is important for language understanding. Numbers are ubiquitous and frequently appear in text. Inspired by cognitive studies on how humans perceive numbers, we develop an analysis framework to test how well word embeddings capture two essential properties of numbers: magnitude (e.g. 3&lt;4) and numeration (e.g. 3=three). Our experiments reveal that most models capture an approximate notion of magnitude, but are inadequate at capturing numeration. We hope that our observations provide a starting point for the development of methods which better capture numeracy in NLP systems.</abstract>
       <url hash="a4c2c6ae">P19-1329</url>
@@ -4229,7 +4229,7 @@
       <author><first>Yue</first><last>Dong</last></author>
       <author><first>Zichao</first><last>Li</last></author>
       <author><first>Mehdi</first><last>Rezagholizadeh</last></author>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <pages>3393–3402</pages>
       <abstract>We present the first sentence simplification model that learns explicit edit operations (ADD, DELETE, and KEEP) via a neural programmer-interpreter approach. Most current neural sentence simplification systems are variants of sequence-to-sequence models adopted from machine translation. These methods learn to simplify sentences as a byproduct of the fact that they are trained on complex-simple sentence pairs. By contrast, our neural programmer-interpreter is directly trained to predict explicit edit operations on targeted parts of the input sentence, resembling the way that humans perform simplification and revision. Our model outperforms previous state-of-the-art neural sentence simplification models (without external knowledge) by large margins on three benchmark text simplification corpora in terms of SARI (+0.95 WikiLarge, +1.89 WikiSmall, +1.41 Newsela), and is judged by humans to produce overall better and simpler output sentences.</abstract>
       <url hash="fe115747">P19-1331</url>
@@ -4254,7 +4254,7 @@
       <title>Transforming Complex Sentences into a Semantic Hierarchy</title>
       <author><first>Christina</first><last>Niklaus</last></author>
       <author><first>Matthias</first><last>Cetto</last></author>
-      <author><first>André</first><last>Freitas</last></author>
+      <author id="andre-freitas"><first>André</first><last>Freitas</last></author>
       <author><first>Siegfried</first><last>Handschuh</last></author>
       <pages>3415–3427</pages>
       <abstract>We present an approach for recursively splitting and rephrasing complex English sentences into a novel semantic hierarchy of simplified sentences, with each of them presenting a more regular structure that may facilitate a wide variety of artificial intelligence tasks, such as machine translation (MT) or information extraction (IE). Using a set of hand-crafted transformation rules, input sentences are recursively transformed into a two-layered hierarchical representation in the form of core sentences and accompanying contexts that are linked via rhetorical relations. In this way, the semantic relationship of the decomposed constituents is preserved in the output, maintaining its interpretability for downstream applications. Both a thorough manual analysis and automatic evaluation across three datasets from two different domains demonstrate that the proposed syntactic simplification approach outperforms the state of the art in structural text simplification. Moreover, an extrinsic evaluation shows that when applying our framework as a preprocessing step the performance of state-of-the-art Open IE systems can be improved by up to 346% in precision and 52% in recall. To enable reproducible research, all code is provided online.</abstract>
@@ -4266,7 +4266,7 @@
     </paper>
     <paper id="334">
       <title>Right for the Wrong Reasons: Diagnosing Syntactic Heuristics in Natural Language Inference</title>
-      <author><first>R. Thomas</first><last>McCoy</last></author>
+      <author id="r-thomas-mccoy"><first>R. Thomas</first><last>McCoy</last></author>
       <author><first>Ellie</first><last>Pavlick</last></author>
       <author><first>Tal</first><last>Linzen</last></author>
       <pages>3428–3448</pages>
@@ -4311,10 +4311,10 @@
     <paper id="337">
       <title>Scalable Syntax-Aware Language Models Using Knowledge Distillation</title>
       <author><first>Adhiguna</first><last>Kuncoro</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <author><first>Laura</first><last>Rimell</last></author>
       <author><first>Stephen</first><last>Clark</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
       <pages>3472–3484</pages>
       <abstract>Prior work has shown that, on small amounts of training data, syntactic neural language models learn structurally sensitive generalisations more successfully than sequential language models. However, their computational complexity renders scaling difficult, and it remains an open question whether structural biases are still necessary when sequential models have access to ever larger amounts of training data. To answer this question, we introduce an efficient knowledge distillation (KD) technique that transfers knowledge from a syntactic language model trained on a small corpus to an LSTM language model, hence enabling the LSTM to develop a more structurally sensitive representation of the larger training data it learns from. On targeted syntactic evaluations, we find that, while sequential LSTMs perform much better than previously reported, our proposed technique substantially improves on this baseline, yielding a new state of the art. Our findings and analysis affirm the importance of structural biases, even in models that learn from large amounts of data.</abstract>
       <url hash="4c41e88d">P19-1337</url>
@@ -4339,7 +4339,7 @@
       <author><first>Aparna</first><last>Garimella</last></author>
       <author><first>Carmen</first><last>Banea</last></author>
       <author><first>Dirk</first><last>Hovy</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>3493–3498</pages>
       <abstract>Several linguistic studies have shown the prevalence of various lexical and grammatical patterns in texts authored by a person of a particular gender, but models for part-of-speech tagging and dependency parsing have still not adapted to account for these differences. To address this, we annotate the Wall Street Journal part of the Penn Treebank with the gender information of the articles’ authors, and build taggers and parsers trained on this data that show performance differences in text written by men and women. Further analyses reveal numerous part-of-speech tags and syntactic relations whose prediction performances benefit from the prevalence of a specific gender in the training data. The results underscore the importance of accounting for gendered differences in syntactic tasks, and outline future venues for developing more accurate taggers and parsers. We release our data to the research community.</abstract>
       <url hash="e25b82c0">P19-1339</url>
@@ -4362,7 +4362,7 @@
     <paper id="341">
       <title>A Multilingual <fixed-case>BPE</fixed-case> Embedding Space for Universal Sentiment Lexicon Induction</title>
       <author><first>Mengjie</first><last>Zhao</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>3506–3517</pages>
       <abstract>We present a new method for sentiment lexicon induction that is designed to be applicable to the entire range of typological diversity of the world’s languages. We evaluate our method on Parallel Bible Corpus+ (PBC+), a parallel corpus of 1593 languages. The key idea is to use Byte Pair Encodings (BPEs) as basic units for multilingual embeddings. Through zero-shot transfer from English sentiment, we learn a seed lexicon for each language in the domain of PBC+. Through domain adaptation, we then generalize the domain-specific lexicon to a general one. We show – across typologically diverse languages in PBC+ – good quality of seed and general-domain sentiment lexicons by intrinsic and extrinsic and by automatic and human evaluation. We make freely available our code, seed sentiment lexicons for all 1593 languages and induced general-domain sentiment lexicons for 200 languages.</abstract>
       <url hash="5af8e7a9">P19-1341</url>
@@ -4415,7 +4415,7 @@
       <author><first>Xiaozhong</first><last>Liu</last></author>
       <author><first>Luo</first><last>Si</last></author>
       <author><first>Min</first><last>Zhang</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>3548–3557</pages>
       <abstract>In the literature, existing studies on aspect sentiment classification (ASC) focus on individual non-interactive reviews. This paper extends the research to interactive reviews and proposes a new research task, namely Aspect Sentiment Classification towards Question-Answering (ASC-QA), for real-world applications. This new task aims to predict sentiment polarities for specific aspects from interactive QA style reviews. In particular, a high-quality annotated corpus is constructed for ASC-QA to facilitate corresponding research. On this basis, a Reinforced Bidirectional Attention Network (RBAN) approach is proposed to address two inherent challenges in ASC-QA, i.e., semantic matching between question and answer, and data noise. Experimental results demonstrate the great advantage of the proposed approach to ASC-QA against several state-of-the-art baselines.</abstract>
       <url hash="1953b28a">P19-1345</url>
@@ -4455,7 +4455,7 @@
       <title>Generating Question Relevant Captions to Aid Visual Question Answering</title>
       <author><first>Jialin</first><last>Wu</last></author>
       <author><first>Zeyuan</first><last>Hu</last></author>
-      <author><first>Raymond</first><last>Mooney</last></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last></author>
       <pages>3585–3594</pages>
       <abstract>Visual question answering (VQA) and image captioning require a shared body of general knowledge connecting language and vision. We present a novel approach to better VQA performance that exploits this connection by jointly generating captions that are targeted to help answer a specific visual question. The model is trained using an existing caption dataset by automatically determining question-relevant captions using an online gradient-based method. Experimental results on the VQA v2 challenge demonstrates that our approach obtains state-of-the-art VQA performance (e.g. 68.4% in the Test-standard set using a single model) by simultaneously generating question-relevant captions.</abstract>
       <url hash="43a14972">P19-1348</url>
@@ -4467,7 +4467,7 @@
       <title>Multi-grained Attention with Object-level Grounding for Visual Question Answering</title>
       <author><first>Pingping</first><last>Huang</last></author>
       <author><first>Jianhui</first><last>Huang</last></author>
-      <author><first>Yuqing</first><last>Guo</last></author>
+      <author id="yuqing-guo"><first>Yuqing</first><last>Guo</last></author>
       <author><first>Min</first><last>Qiao</last></author>
       <author><first>Yong</first><last>Zhu</last></author>
       <pages>3595–3600</pages>
@@ -4480,9 +4480,9 @@
     <paper id="350">
       <title>Psycholinguistics Meets Continual Learning: Measuring Catastrophic Forgetting in Visual Question Answering</title>
       <author><first>Claudio</first><last>Greco</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
-      <author><first>Raquel</first><last>Fernández</last></author>
-      <author><first>Raffaella</first><last>Bernardi</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
+      <author id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last></author>
       <pages>3601–3605</pages>
       <abstract>We study the issue of catastrophic forgetting in the context of neural multimodal approaches to Visual Question Answering (VQA). Motivated by evidence from psycholinguistics, we devise a set of linguistically-informed VQA tasks, which differ by the types of questions involved (Wh-questions and polar questions). We test what impact task difficulty has on continual learning, and whether the order in which a child acquires question types facilitates computational models. Our results show that dramatic forgetting is at play and that task difficulty and order matter. Two well-known current continual learning methods mitigate the problem only to a limiting degree.</abstract>
       <url hash="cbef0d73">P19-1350</url>
@@ -4558,8 +4558,8 @@
     <paper id="356">
       <title>What Does <fixed-case>BERT</fixed-case> Learn about the Structure of Language?</title>
       <author><first>Ganesh</first><last>Jawahar</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
-      <author><first>Djamé</first><last>Seddah</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
       <pages>3651–3657</pages>
       <abstract>BERT is a recent language representation model that has surprisingly performed well in diverse language understanding benchmarks. This result indicates the possibility that BERT networks capture structural information about language. In this work, we provide novel support for this claim by performing a series of experiments to unpack the elements of English language structure learned by BERT. Our findings are fourfold. BERT’s phrasal representation captures the phrase-level information in the lower layers. The intermediate layers of BERT compose a rich hierarchy of linguistic information, starting with surface features at the bottom, syntactic features in the middle followed by semantic features at the top. BERT requires deeper layers while tracking subject-verb agreement to handle long-term dependency problem. Finally, the compositional scheme underlying BERT mimics classical, tree-like structures.</abstract>
       <url hash="7e767aaf">P19-1356</url>
@@ -4583,7 +4583,7 @@
       <title>Learning from Dialogue after Deployment: Feed Yourself, Chatbot!</title>
       <author><first>Braden</first><last>Hancock</last></author>
       <author><first>Antoine</first><last>Bordes</last></author>
-      <author><first>Pierre-Emmanuel</first><last>Mazare</last></author>
+      <author id="pierre-emmanuel-mazare"><first>Pierre-Emmanuel</first><last>Mazare</last></author>
       <author><first>Jason</first><last>Weston</last></author>
       <pages>3667–3684</pages>
       <abstract>The majority of conversations a dialogue agent sees over its lifetime occur after it has already been trained and deployed, leaving a vast store of potential training signal untapped. In this work, we propose the self-feeding chatbot, a dialogue agent with the ability to extract new training examples from the conversations it participates in. As our agent engages in conversation, it also estimates user satisfaction in its responses. When the conversation appears to be going well, the user’s responses become new training examples to imitate. When the agent believes it has made a mistake, it asks for feedback; learning to predict the feedback that will be given improves the chatbot’s dialogue abilities further. On the PersonaChat chit-chat dataset with over 131k training examples, we find that learning from dialogue with a self-feeding chatbot significantly improves performance, regardless of the amount of traditional supervision.</abstract>
@@ -4597,7 +4597,7 @@
       <author><first>Xiaoqing</first><last>Zheng</last></author>
       <author><first>Lu</first><last>Liu</last></author>
       <author><first>Mu</first><last>Xu</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>3685–3695</pages>
       <abstract>It is desirable for dialog systems to have capability to express specific emotions during a conversation, which has a direct, quantifiable impact on improvement of their usability and user satisfaction. After a careful investigation of real-life conversation data, we found that there are at least two ways to express emotions with language. One is to describe emotional states by explicitly using strong emotional words; another is to increase the intensity of the emotional experiences by implicitly combining neutral words in distinct ways. We propose an emotional dialogue system (EmoDS) that can generate the meaningful responses with a coherent structure for a post, and meanwhile express the desired emotion explicitly or implicitly within a unified framework. Experimental results showed EmoDS performed better than the baselines in BLEU, diversity and the quality of emotional expression.</abstract>
       <url hash="4a6dff4b">P19-1359</url>
@@ -4622,8 +4622,8 @@
       <author><first>Weikang</first><last>Wang</last></author>
       <author><first>Jiajun</first><last>Zhang</last></author>
       <author><first>Qian</first><last>Li</last></author>
-      <author><first>Mei-Yuh</first><last>Hwang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="mei-yuh-hwang"><first>Mei-Yuh</first><last>Hwang</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <author><first>Zhifei</first><last>Li</last></author>
       <pages>3710–3720</pages>
       <abstract>Clarifying user needs is essential for existing task-oriented dialogue systems. However, in real-world applications, developers can never guarantee that all possible user demands are taken into account in the design phase. Consequently, existing systems will break down when encountering unconsidered user needs. To address this problem, we propose a novel incremental learning framework to design task-oriented dialogue systems, or for short Incremental Dialogue System (IDS), without pre-defining the exhaustive list of user needs. Specifically, we introduce an uncertainty estimation module to evaluate the confidence of giving correct responses. If there is high confidence, IDS will provide responses to users. Otherwise, humans will be involved in the dialogue process, and IDS can learn from human intervention through an online learning module. To evaluate our method, we propose a new dataset which simulates unanticipated user needs in the deployment stage. Experiments show that IDS is robust to unconsidered user actions, and can update itself online by smartly selecting only the most effective training data, and hence attains better performance with less annotation cost.</abstract>
@@ -4638,7 +4638,7 @@
       <author><first>Yanyan</first><last>Lan</last></author>
       <author><first>Liang</first><last>Pang</last></author>
       <author><first>Jiafeng</first><last>Guo</last></author>
-      <author><first>Xueqi</first><last>Cheng</last></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last></author>
       <pages>3721–3730</pages>
       <abstract>In multi-turn dialogue generation, response is usually related with only a few contexts. Therefore, an ideal model should be able to detect these relevant contexts and produce a suitable response accordingly. However, the widely used hierarchical recurrent encoder-decoder models just treat all the contexts indiscriminately, which may hurt the following response generation process. Some researchers try to use the cosine similarity or the traditional attention mechanism to find the relevant contexts, but they suffer from either insufficient relevance assumption or position bias problem. In this paper, we propose a new model, named ReCoSa, to tackle this problem. Firstly, a word level LSTM encoder is conducted to obtain the initial representation of each context. Then, the self-attention mechanism is utilized to update both the context and masked response representation. Finally, the attention weights between each context and response representations are computed and used in the further decoding process. Experimental results on both Chinese customer services dataset and English Ubuntu dialogue dataset show that ReCoSa significantly outperforms baseline models, in terms of both metric-based and human evaluations. Further analysis on attention shows that the detected relevant contexts by ReCoSa are highly coherent with human’s understanding, validating the correctness and interpretability of ReCoSa.</abstract>
       <url hash="dcb29386">P19-1362</url>
@@ -4687,7 +4687,7 @@
       <title>Retrieval-Enhanced Adversarial Training for Neural Response Generation</title>
       <author><first>Qingfu</first><last>Zhu</last></author>
       <author><first>Lei</first><last>Cui</last></author>
-      <author><first>Wei-Nan</first><last>Zhang</last></author>
+      <author id="weinan-zhang"><first>Wei-Nan</first><last>Zhang</last></author>
       <author><first>Furu</first><last>Wei</last></author>
       <author><first>Ting</first><last>Liu</last></author>
       <pages>3763–3773</pages>
@@ -4789,13 +4789,13 @@
       <title>A Large-Scale Corpus for Conversation Disentanglement</title>
       <author><first>Jonathan K.</first><last>Kummerfeld</last></author>
       <author><first>Sai R.</first><last>Gouravajhala</last></author>
-      <author><first>Joseph J.</first><last>Peper</last></author>
+      <author id="joseph-j-peper"><first>Joseph J.</first><last>Peper</last></author>
       <author><first>Vignesh</first><last>Athreya</last></author>
       <author><first>Chulaka</first><last>Gunasekara</last></author>
       <author><first>Jatin</first><last>Ganhotra</last></author>
       <author><first>Siva Sankalp</first><last>Patel</last></author>
       <author><first>Lazaros C</first><last>Polymenakos</last></author>
-      <author><first>Walter</first><last>Lasecki</last></author>
+      <author id="walter-lasecki"><first>Walter</first><last>Lasecki</last></author>
       <pages>3846–3856</pages>
       <abstract>Disentangling conversations mixed together in a single stream of messages is a difficult task, made harder by the lack of large manually annotated datasets. We created a new dataset of 77,563 messages manually annotated with reply-structure graphs that both disentangle conversations and define internal conversation structure. Our data is 16 times larger than all previously released datasets combined, the first to include adjudication of annotation disagreements, and the first to include context. We use our data to re-examine prior work, in particular, finding that 89% of conversations in a widely used dialogue corpus are either missing messages or contain extra messages. Our manually-annotated data presents an opportunity to develop robust data-driven methods for conversation disentanglement, which will help advance dialogue research.</abstract>
       <url hash="a1256dea">P19-1374</url>
@@ -4819,7 +4819,7 @@
       <title>Are we there yet? Encoder-decoder neural networks as cognitive models of <fixed-case>E</fixed-case>nglish past tense inflection</title>
       <author><first>Maria</first><last>Corkery</last></author>
       <author><first>Yevgen</first><last>Matusevych</last></author>
-      <author><first>Sharon</first><last>Goldwater</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
       <pages>3868–3877</pages>
       <abstract>The cognitive mechanisms needed to account for the English past tense have long been a subject of debate in linguistics and cognitive science. Neural network models were proposed early on, but were shown to have clear flaws. Recently, however, Kirov and Cotterell (2018) showed that modern encoder-decoder (ED) models overcome many of these flaws. They also presented evidence that ED models demonstrate humanlike performance in a nonce-word task. Here, we look more closely at the behaviour of their model in this task. We find that (1) the model exhibits instability across multiple simulations in terms of its correlation with human data, and (2) even when results are aggregated across simulations (treating each simulation as an individual human participant), the fit to the human data is not strong—worse than an older rule-based model. These findings hold up through several alternative training regimes and evaluation measures. Although other neural architectures might do better, we conclude that there is still insufficient evidence to claim that neural nets are a good cognitive model for this task.</abstract>
       <url hash="e3420d7f">P19-1376</url>
@@ -4829,7 +4829,7 @@
     <paper id="377">
       <title>A Spreading Activation Framework for Tracking Conceptual Complexity of Texts</title>
       <author><first>Ioana</first><last>Hulpuș</last></author>
-      <author><first>Sanja</first><last>Štajner</last></author>
+      <author id="sanja-stajner"><first>Sanja</first><last>Štajner</last></author>
       <author><first>Heiner</first><last>Stuckenschmidt</last></author>
       <pages>3878–3887</pages>
       <abstract>We propose an unsupervised approach for assessing conceptual complexity of texts, based on spreading activation. Using DBpedia knowledge graph as a proxy to long-term memory, mentioned concepts become activated and trigger further activation as the text is sequentially traversed. Drawing inspiration from psycholinguistic theories of reading comprehension, we model memory processes such as semantic priming, sentence wrap-up, and forgetting. We show that our models capture various aspects of conceptual text complexity and significantly outperform current state of the art.</abstract>
@@ -4862,7 +4862,7 @@
     <paper id="380">
       <title>Miss Tools and Mr Fruit: Emergent Communication in Agents Learning about Object Affordances</title>
       <author><first>Diane</first><last>Bouchacourt</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <pages>3909–3918</pages>
       <abstract>Recent research studies communication emergence in communities of deep network agents assigned a joint task, hoping to gain insights on human language evolution. We propose here a new task capturing crucial aspects of the human environment, such as natural object affordances, and of human conversation, such as full symmetry among the participants. By conducting a thorough pragmatic and semantic analysis of the emergent protocol, we show that the agents solve the shared task through genuine bilateral, referential communication. However, the agents develop multiple idiolects, which makes us conclude that full symmetry is not a sufficient condition for a common language to emerge.</abstract>
       <url hash="a5fd3bf7">P19-1380</url>
@@ -4873,7 +4873,7 @@
     <paper id="381">
       <title><fixed-case>CNN</fixed-case>s found to jump around more skillfully than <fixed-case>RNN</fixed-case>s: Compositional Generalization in Seq2seq Convolutional Networks</title>
       <author><first>Roberto</first><last>Dessì</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <pages>3919–3923</pages>
       <abstract>Lake and Baroni (2018) introduced the SCAN dataset probing the ability of seq2seq models to capture compositional generalizations, such as inferring the meaning of “jump around” 0-shot from the component words. Recurrent networks (RNNs) were found to completely fail the most challenging generalization cases. We test here a convolutional network (CNN) on these tasks, reporting hugely improved performance with respect to RNNs. Despite the big improvement, the CNN has however not induced systematic rules, suggesting that the difference between compositional and non-compositional behaviour is not clear-cut.</abstract>
       <url hash="70e0bc70">P19-1381</url>
@@ -4895,7 +4895,7 @@
     <paper id="383">
       <title>Is Word Segmentation Child’s Play in All Languages?</title>
       <author><first>Georgia R.</first><last>Loukatou</last></author>
-      <author><first>Steven</first><last>Moran</last></author>
+      <author id="steven-moran"><first>Steven</first><last>Moran</last></author>
       <author><first>Damian</first><last>Blasi</last></author>
       <author><first>Sabine</first><last>Stoll</last></author>
       <author><first>Alejandrina</first><last>Cristia</last></author>
@@ -4912,7 +4912,7 @@
       <author><first>Lawrence</first><last>Wolf-Sonkin</last></author>
       <author><first>Sabine</first><last>Stoll</last></author>
       <author><first>Balthasar</first><last>Bickel</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <pages>3938–3943</pages>
       <abstract>Embedding a clause inside another (“the girl [who likes cars [that run fast]] has arrived”) is a fundamental resource that has been argued to be a key driver of linguistic expressiveness. As such, it plays a central role in fundamental debates on what makes human language unique, and how they might have evolved. Empirical evidence on the prevalence and the limits of embeddings has however been based on either laboratory setups or corpus data of relatively limited size. We introduce here a collection of large, dependency-parsed written corpora in 17 languages, that allow us, for the first time, to capture clausal embedding through dependency graphs and assess their distribution. Our results indicate that there is no evidence for hard constraints on embedding depth: the tail of depth distributions is heavy. Moreover, although deeply embedded clauses tend to be shorter, suggesting processing load issues, complex sentences with many embeddings do not display a bias towards less deep embeddings. Taken together, the results suggest that deep embeddings are not disfavoured in written language. More generally, our study illustrates how resources and methods from latest-generation big-data NLP can provide new perspectives on fundamental questions in theoretical linguistics.</abstract>
       <url hash="d391b104">P19-1384</url>
@@ -4937,7 +4937,7 @@
       <author><first>Adam</first><last>Trischler</last></author>
       <author><first>Kaheer</first><last>Suleman</last></author>
       <author><first>Hannes</first><last>Schulz</last></author>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <pages>3952–3961</pages>
       <abstract>We introduce a new benchmark for coreference resolution and NLI, KnowRef, that targets common-sense understanding and world knowledge. Previous coreference resolution tasks can largely be solved by exploiting the number and gender of the antecedents, or have been handcrafted and do not reflect the diversity of naturally occurring text. We present a corpus of over 8,000 annotated text passages with ambiguous pronominal anaphora. These instances are both challenging and realistic. We show that various coreference systems, whether rule-based, feature-rich, or neural, perform significantly worse on the task than humans, who display high inter-annotator agreement. To explain this performance gap, we show empirically that state-of-the art models often fail to capture context, instead relying on the gender or number of candidate antecedents to make a decision. We then use problem-specific insights to propose a data-augmentation trick called antecedent switching to alleviate this tendency in models. Finally, we show that antecedent switching yields promising results on other tasks as well: we use it to achieve state-of-the-art results on the GAP coreference task.</abstract>
       <url hash="2c35fda6">P19-1386</url>
@@ -4997,7 +4997,7 @@
     <paper id="391">
       <title>Crowdsourcing and Validating Event-focused Emotion Corpora for <fixed-case>G</fixed-case>erman and <fixed-case>E</fixed-case>nglish</title>
       <author><first>Enrica</first><last>Troiano</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <author><first>Roman</first><last>Klinger</last></author>
       <pages>4005–4011</pages>
       <abstract>Sentiment analysis has a range of corpora available across multiple languages. For emotion analysis, the situation is more limited, which hinders potential research on crosslingual modeling and the development of predictive models for other languages. In this paper, we fill this gap for German by constructing deISEAR, a corpus designed in analogy to the well-established English ISEAR emotion dataset. Motivated by Scherer’s appraisal theory, we implement a crowdsourcing experiment which consists of two steps. In step 1, participants create descriptions of emotional events for a given emotion. In step 2, five annotators assess the emotion expressed by the texts. We show that transferring an emotion classification model from the original English ISEAR to the German crowdsourced deISEAR via machine translation does not, on average, cause a performance drop.</abstract>
@@ -5009,10 +5009,10 @@
     <paper id="392">
       <title>Pay Attention when you Pay the Bills. A Multilingual Corpus with Dependency-based and Semantic Annotation of Collocations.</title>
       <author><first>Marcos</first><last>Garcia</last></author>
-      <author><first>Marcos</first><last>García Salido</last></author>
-      <author><first>Susana</first><last>Sotelo</last></author>
+      <author id="marcos-garcia-salido"><first>Marcos</first><last>García Salido</last></author>
+      <author id="susana-sotelo"><first>Susana</first><last>Sotelo</last></author>
       <author><first>Estela</first><last>Mosqueira</last></author>
-      <author><first>Margarita</first><last>Alonso-Ramos</last></author>
+      <author id="margarita-alonso-ramos"><first>Margarita</first><last>Alonso-Ramos</last></author>
       <pages>4012–4019</pages>
       <abstract>This paper presents a new multilingual corpus with semantic annotation of collocations in English, Portuguese, and Spanish. The whole resource contains 155k tokens and 1,526 collocations labeled in context. The annotated examples belong to three syntactic relations (adjective-noun, verb-object, and nominal compounds), and represent 58 lexical functions in the Meaning-Text Theory (e.g., Oper, Magn, Bon, etc.). Each collocation was annotated by three linguists and the final resource was revised by a team of experts. The resulting corpus can serve as a basis to evaluate different approaches for collocation identification, which in turn can be useful for different NLP tasks such as natural language understanding or natural language generation.</abstract>
       <url hash="50c6fd60">P19-1392</url>
@@ -5093,7 +5093,7 @@
       <title>Hubless Nearest Neighbor Search for Bilingual Lexicon Induction</title>
       <author><first>Jiaji</first><last>Huang</last></author>
       <author><first>Qiang</first><last>Qiu</last></author>
-      <author><first>Kenneth</first><last>Church</last></author>
+      <author id="kenneth-church"><first>Kenneth</first><last>Church</last></author>
       <pages>4072–4080</pages>
       <abstract>Bilingual Lexicon Induction (BLI) is the task of translating words from corpora in two languages. Recent advances in BLI work by aligning the two word embedding spaces. Following that, a key step is to retrieve the nearest neighbor (NN) in the target space given the source word. However, a phenomenon called hubness often degrades the accuracy of NN. Hubness appears as some data points, called hubs, being extra-ordinarily close to many of the other data points. Reducing hubness is necessary for retrieval tasks. One successful example is Inverted SoFtmax (ISF), recently proposed to improve NN. This work proposes a new method, Hubless Nearest Neighbor (HNN), to mitigate hubness. HNN differs from NN by imposing an additional equal preference assumption. Moreover, the HNN formulation explains why ISF works as well as it does. Empirical results demonstrate that HNN outperforms NN, ISF and other state-of-the-art. For reproducibility and follow-ups, we have published all code.</abstract>
       <url hash="2e04ca86">P19-1399</url>
@@ -5113,10 +5113,10 @@
     </paper>
     <paper id="401">
       <title>Learning How to Active Learn by Dreaming</title>
-      <author><first>Thuy-Trang</first><last>Vu</last></author>
+      <author id="thuy-vu"><first>Thuy-Trang</first><last>Vu</last></author>
       <author><first>Ming</first><last>Liu</last></author>
       <author><first>Dinh</first><last>Phung</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>4091–4101</pages>
       <abstract>Heuristic-based active learning (AL) methods are limited when the data distribution of the underlying learning problems vary. Recent data-driven AL policy learning methods are also restricted to learn from closely related domains. We introduce a new sample-efficient method that learns the AL policy directly on the target domain of interest by using wake and dream cycles. Our approach interleaves between querying the annotation of the selected datapoints to update the underlying student learner and improving AL policy using simulation where the current student learner acts as an imperfect annotator. We evaluate our method on cross-domain and cross-lingual text classification and named entity recognition tasks. Experimental results show that our dream-based AL policy training strategy is more effective than applying the pretrained policy without further fine-tuning and better than the existing strong baseline methods that use heuristics or reinforcement learning.</abstract>
       <url hash="406fcff1">P19-1401</url>
@@ -5138,7 +5138,7 @@
     <paper id="403">
       <title>Neural Temporality Adaptation for Document Classification: Diachronic Word Embeddings and Domain Adaptation Models</title>
       <author><first>Xiaolei</first><last>Huang</last></author>
-      <author><first>Michael J.</first><last>Paul</last></author>
+      <author id="michael-paul"><first>Michael J.</first><last>Paul</last></author>
       <pages>4113–4123</pages>
       <abstract>Language usage can change across periods of time, but document classifiers models are usually trained and tested on corpora spanning multiple years without considering temporal variations. This paper describes two complementary ways to adapt classifiers to shifts across time. First, we show that diachronic word embeddings, which were originally developed to study language change, can also improve document classification, and we show a simple method for constructing this type of embedding. Second, we propose a time-driven neural classification model inspired by methods for domain adaptation. Experiments on six corpora show how these methods can make classifiers more robust over time.</abstract>
       <url hash="9ad47f95">P19-1403</url>
@@ -5184,7 +5184,7 @@
       <author><first>Ryan</first><last>Benmalek</last></author>
       <author><first>Madian</first><last>Khabsa</last></author>
       <author><first>Suma</first><last>Desu</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <author><first>Michele</first><last>Banko</last></author>
       <pages>4157–4167</pages>
       <abstract>We introduce the Scratchpad Mechanism, a novel addition to the sequence-to-sequence (seq2seq) neural network architecture and demonstrate its effectiveness in improving the overall fluency of seq2seq models for natural language generation tasks. By enabling the decoder at each time step to write to all of the encoder output layers, Scratchpad can employ the encoder as a “scratchpad” memory to keep track of what has been generated so far and thereby guide future generation. We evaluate Scratchpad in the context of three well-studied natural language generation tasks — Machine Translation, Question Generation, and Text Summarization — and obtain state-of-the-art or comparable performance on standard datasets for each task. Qualitative assessments in the form of human judgements (question generation), attention visualization (MT), and sample output (summarization) provide further evidence of the ability of Scratchpad to generate fluent and expressive output.</abstract>
@@ -5194,9 +5194,9 @@
     </paper>
     <paper id="408">
       <title>Using Automatically Extracted Minimum Spans to Disentangle Coreference Evaluation from Boundary Detection</title>
-      <author><first>Nafise Sadat</first><last>Moosavi</last></author>
+      <author id="nafise-sadat-moosavi"><first>Nafise Sadat</first><last>Moosavi</last></author>
       <author><first>Leo</first><last>Born</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <author><first>Michael</first><last>Strube</last></author>
       <pages>4168–4178</pages>
       <abstract>The common practice in coreference resolution is to identify and evaluate the maximum span of mentions. The use of maximum spans tangles coreference evaluation with the challenges of mention boundary detection like prepositional phrase attachment. To address this problem, minimum spans are manually annotated in smaller corpora. However, this additional annotation is costly and therefore, this solution does not scale to large corpora. In this paper, we propose the MINA algorithm for automatically extracting minimum spans to benefit from minimum span evaluation in all corpora. We show that the extracted minimum spans by MINA are consistent with those that are manually annotated by experts. Our experiments show that using minimum spans is in particular important in cross-dataset coreference evaluation, in which detected mention boundaries are noisier due to domain shift. We have integrated MINA into <url>https://github.com/ns-moosavi/coval</url> for reporting standard coreference scores based on both maximum and automatically detected minimum spans.</abstract>
@@ -5223,7 +5223,7 @@
     <paper id="410">
       <title>A Unified Linear-Time Framework for Sentence-Level Discourse Parsing</title>
       <author><first>Xiang</first><last>Lin</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <author><first>Prathyusha</first><last>Jwalapuram</last></author>
       <author><first>M Saiful</first><last>Bari</last></author>
       <pages>4190–4200</pages>
@@ -5250,7 +5250,7 @@
     <paper id="412">
       <title>Do You Know That Florence Is Packed with Visitors? Evaluating State-of-the-art Models of Speaker Commitment</title>
       <author><first>Nanjiang</first><last>Jiang</last></author>
-      <author><first>Marie-Catherine</first><last>de Marneffe</last></author>
+      <author id="marie-catherine-de-marneffe"><first>Marie-Catherine</first><last>de Marneffe</last></author>
       <pages>4208–4213</pages>
       <abstract>When a speaker, Mary, asks “Do you know that Florence is packed with visitors?”, we take her to believe that Florence is packed with visitors, but not if she asks “Do you think that Florence is packed with visitors?”. Inferring speaker commitment (aka event factuality) is crucial for information extraction and question answering. Here, we explore the hypothesis that linguistic deficits drive the error patterns of existing speaker commitment models by analyzing the linguistic correlates of model error on a challenging naturalistic dataset. We evaluate two state-of-the-art speaker commitment models on the CommitmentBank, an English dataset of naturally occurring discourses. The CommitmentBank is annotated with speaker commitment towards the content of the complement (“Florence is packed with visitors” in our example) of clause-embedding verbs (“know”, “think”) under four entailment-canceling environments (negation, modal, question, conditional). A breakdown of items by linguistic features reveals asymmetrical error patterns: while the models achieve good performance on some classes (e.g., negation), they fail to generalize to the diverse linguistic constructions (e.g., conditionals) in natural language, highlighting directions for improvement.</abstract>
       <url hash="83ae9d16">P19-1412</url>
@@ -5272,7 +5272,7 @@
     </paper>
     <paper id="414">
       <title>Open-Domain Why-Question Answering with Adversarial Learning to Encode Answer Texts</title>
-      <author><first>Jong-Hoon</first><last>Oh</last></author>
+      <author id="jong-hoon-oh"><first>Jong-Hoon</first><last>Oh</last></author>
       <author><first>Kazuma</first><last>Kadowaki</last></author>
       <author><first>Julien</first><last>Kloetzer</last></author>
       <author><first>Ryu</first><last>Iida</last></author>
@@ -5307,7 +5307,7 @@
       <author><first>Sameer</first><last>Singh</last></author>
       <author><first>Matt</first><last>Gardner</last></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>4249–4257</pages>
       <abstract>Multi-hop reading comprehension (RC) questions are challenging because they require reading and reasoning over multiple paragraphs. We argue that it can be difficult to construct large multi-hop RC datasets. For example, even highly compositional questions can be answered with a single hop if they target specific entity types, or the facts needed to answer them are redundant. Our analysis is centered on HotpotQA, where we show that single-hop reasoning can solve much more of the dataset than previously thought. We introduce a single-hop BERT-based RC model that achieves 67 F1—comparable to state-of-the-art multi-hop models. We also design an evaluation setting where humans are not shown all of the necessary paragraphs for the intended multi-hop reasoning but can still answer over 80% of questions. Together with detailed error analysis, these results suggest there should be an increasing focus on the role of evidence in multi-hop reasoning and possibly even a shift towards information retrieval style evaluations with large and diverse evidence collections.</abstract>
       <url hash="59ce82d9">P19-1416</url>
@@ -5469,8 +5469,8 @@
     <paper id="428">
       <title><fixed-case>A</fixed-case>uto<fixed-case>ML</fixed-case> Strategy Based on Grammatical Evolution: A Case Study about Knowledge Discovery from Text</title>
       <author><first>Suilan</first><last>Estevez-Velarde</last></author>
-      <author><first>Yoan</first><last>Gutiérrez</last></author>
-      <author><first>Andrés</first><last>Montoyo</last></author>
+      <author id="yoan-gutierrez"><first>Yoan</first><last>Gutiérrez</last></author>
+      <author id="andres-montoyo"><first>Andrés</first><last>Montoyo</last></author>
       <author><first>Yudivián</first><last>Almeida-Cruz</last></author>
       <pages>4356–4365</pages>
       <abstract>The process of extracting knowledge from natural language text poses a complex problem that requires both a combination of machine learning techniques and proper feature selection. Recent advances in Automatic Machine Learning (AutoML) provide effective tools to explore large sets of algorithms, hyper-parameters and features to find out the most suitable combination of them. This paper proposes a novel AutoML strategy based on probabilistic grammatical evolution, which is evaluated on the health domain by facing the knowledge discovery challenge in Spanish text documents. Our approach achieves state-of-the-art results and provides interesting insights into the best combination of parameters and algorithms to use when dealing with this challenge. Source code is provided for the research community.</abstract>
@@ -5565,7 +5565,7 @@
       <author><first>Shiyu</first><last>Chang</last></author>
       <author><first>Mo</first><last>Yu</last></author>
       <author><first>Conghui</first><last>Zhu</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <pages>4418–4429</pages>
       <abstract>Natural Language Sentence Matching (NLSM) has gained substantial attention from both academics and the industry, and rich public datasets contribute a lot to this process. However, biased datasets can also hurt the generalization performance of trained models and give untrustworthy evaluation results. For many NLSM datasets, the providers select some pairs of sentences into the datasets, and this sampling procedure can easily bring unintended pattern, i.e., selection bias. One example is the QuoraQP dataset, where some content-independent naive features are unreasonably predictive. Such features are the reflection of the selection bias and termed as the “leakage features.” In this paper, we investigate the problem of selection bias on six NLSM datasets and find that four out of them are significantly biased. We further propose a training and evaluation framework to alleviate the bias. Experimental results on QuoraQP suggest that the proposed framework can improve the generalization ability of trained models, and give more trustworthy evaluation results for real-world adoptions.</abstract>
       <url hash="95c04a6e">P19-1435</url>
@@ -5579,7 +5579,7 @@
       <author><first>Minjoon</first><last>Seo</last></author>
       <author><first>Jinhyuk</first><last>Lee</last></author>
       <author><first>Tom</first><last>Kwiatkowski</last></author>
-      <author><first>Ankur</first><last>Parikh</last></author>
+      <author id="ankur-parikh"><first>Ankur</first><last>Parikh</last></author>
       <author><first>Ali</first><last>Farhadi</last></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last></author>
       <pages>4430–4441</pages>
@@ -5617,7 +5617,7 @@
       <author><first>Jan</first><last>Hula</last></author>
       <author><first>Patrick</first><last>Xia</last></author>
       <author><first>Raghavendra</first><last>Pappagari</last></author>
-      <author><first>R. Thomas</first><last>McCoy</last></author>
+      <author id="r-thomas-mccoy"><first>R. Thomas</first><last>McCoy</last></author>
       <author><first>Roma</first><last>Patel</last></author>
       <author><first>Najoung</first><last>Kim</last></author>
       <author><first>Ian</first><last>Tenney</last></author>
@@ -5626,15 +5626,15 @@
       <author><first>Shuning</first><last>Jin</last></author>
       <author><first>Berlin</first><last>Chen</last></author>
       <author><first>Benjamin</first><last>Van Durme</last></author>
-      <author><first>Edouard</first><last>Grave</last></author>
+      <author id="edouard-grave"><first>Edouard</first><last>Grave</last></author>
       <author><first>Ellie</first><last>Pavlick</last></author>
-      <author><first>Samuel R.</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel R.</first><last>Bowman</last></author>
       <pages>4465–4476</pages>
       <abstract>Natural language understanding has recently seen a surge of progress with the use of sentence encoders like ELMo (Peters et al., 2018a) and BERT (Devlin et al., 2019) which are pretrained on variants of language modeling. We conduct the first large-scale systematic study of candidate pretraining tasks, comparing 19 different tasks both as alternatives and complements to language modeling. Our primary results support the use language modeling, especially when combined with pretraining on additional labeled-data tasks. However, our results are mixed across pretraining tasks and show some concerning trends: In ELMo’s pretrain-then-freeze paradigm, random baselines are worryingly strong and results vary strikingly across target tasks. In addition, fine-tuning BERT on an intermediate task often negatively impacts downstream transfer. In a more positive trend, we see modest gains from multitask training, suggesting the development of more sophisticated multitask and transfer learning techniques as an avenue for further research.</abstract>
       <url hash="a6365460">P19-1439</url>
       <attachment type="supplementary" hash="700c7cf0">P19-1439.Supplementary.pdf</attachment>
-      <doi>10.18653/v1/P19-1439</doi>
       <attachment type="poster" hash="f4b76b2f">P19-1439.Poster.pdf</attachment>
+      <doi>10.18653/v1/P19-1439</doi>
       <bibkey>wang-etal-2019-tell</bibkey>
     </paper>
     <paper id="440">
@@ -5665,7 +5665,7 @@
       <title><fixed-case>D</fixed-case>is<fixed-case>S</fixed-case>ent: Learning Sentence Representations from Explicit Discourse Relations</title>
       <author><first>Allen</first><last>Nie</last></author>
       <author><first>Erin</first><last>Bennett</last></author>
-      <author><first>Noah</first><last>Goodman</last></author>
+      <author id="noah-goodman"><first>Noah</first><last>Goodman</last></author>
       <pages>4497–4510</pages>
       <abstract>Learning effective representations of sentences is one of the core missions of natural language understanding. Existing models either train on a vast amount of text, or require costly, manually curated sentence relation datasets. We show that with dependency parsing and rule-based rubrics, we can curate a high quality sentence relation task by leveraging explicit discourse relations. We show that our curated dataset provides an excellent signal for learning vector representations of sentence meaning, representing relations that can only be determined when the meanings of two sentences are combined. We demonstrate that the automatically curated corpus allows a bidirectional LSTM sentence encoder to yield high quality sentence embeddings and can serve as a supervised fine-tuning dataset for larger models such as BERT. Our fixed sentence embeddings achieve high performance on a variety of transfer tasks, including SentEval, and we achieve state-of-the-art results on Penn Discourse Treebank’s implicit relation prediction task.</abstract>
       <url hash="25c3e118">P19-1442</url>
@@ -5678,7 +5678,7 @@
       <author><first>Rui</first><last>Zhang</last></author>
       <author><first>Michihiro</first><last>Yasunaga</last></author>
       <author><first>Yi Chern</first><last>Tan</last></author>
-      <author><first>Xi Victoria</first><last>Lin</last></author>
+      <author id="xi-victoria-lin"><first>Xi Victoria</first><last>Lin</last></author>
       <author><first>Suyi</first><last>Li</last></author>
       <author><first>Heyang</first><last>Er</last></author>
       <author><first>Irene</first><last>Li</last></author>
@@ -5692,7 +5692,7 @@
       <author><first>Vincent</first><last>Zhang</last></author>
       <author><first>Caiming</first><last>Xiong</last></author>
       <author><first>Richard</first><last>Socher</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <pages>4511–4523</pages>
       <abstract>We present SParC, a dataset for cross-domainSemanticParsing inContext that consists of 4,298 coherent question sequences (12k+ individual questions annotated with SQL queries). It is obtained from controlled user interactions with 200 complex databases over 138 domains. We provide an in-depth analysis of SParC and show that it introduces new challenges compared to existing datasets. SParC demonstrates complex contextual dependencies, (2) has greater semantic diversity, and (3) requires generalization to unseen domains due to its cross-domain nature and the unseen databases at test time. We experiment with two state-of-the-art text-to-SQL models adapted to the context-dependent, cross-domain setup. The best model obtains an exact match accuracy of 20.2% over all questions and less than10% over all interaction sequences, indicating that the cross-domain setting and the con-textual phenomena of the dataset present significant challenges for future research. The dataset, baselines, and leaderboard are released at <url>https://yale-lily.github.io/sparc</url>.</abstract>
       <url hash="613929d4">P19-1443</url>
@@ -5760,7 +5760,7 @@
     <paper id="449">
       <title>Human vs. Muppet: A Conservative Estimate of Human Performance on the <fixed-case>GLUE</fixed-case> Benchmark</title>
       <author><first>Nikita</first><last>Nangia</last></author>
-      <author><first>Samuel R.</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel R.</first><last>Bowman</last></author>
       <pages>4566–4575</pages>
       <abstract>The GLUE benchmark (Wang et al., 2019b) is a suite of language understanding tasks which has seen dramatic progress in the past year, with average performance moving from 70.0 at launch to 83.9, state of the art at the time of writing (May 24, 2019). Here, we measure human performance on the benchmark, in order to learn whether significant headroom remains for further progress. We provide a conservative estimate of human performance on the benchmark through crowdsourcing: Our annotators are non-experts who must learn each task from a brief set of instructions and 20 examples. In spite of limited training, these annotators robustly outperform the state of the art on six of the nine GLUE tasks and achieve an average score of 87.1. Given the fast pace of progress however, the headroom we observe is quite limited. To reproduce the data-poor setting that our annotators must learn in, we also train the BERT model (Devlin et al., 2019) in limited-data regimes, and conclude that low-resource sentence classification remains a challenge for modern neural network approaches to text understanding.</abstract>
       <url hash="202237b5">P19-1449</url>
@@ -5784,8 +5784,8 @@
       <author><first>Tahira</first><last>Naseem</last></author>
       <author><first>Abhishek</first><last>Shah</last></author>
       <author><first>Hui</first><last>Wan</last></author>
-      <author><first>Radu</first><last>Florian</last></author>
-      <author><first>Salim</first><last>Roukos</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
       <author><first>Miguel</first><last>Ballesteros</last></author>
       <pages>4586–4592</pages>
       <abstract>Our work involves enriching the Stack-LSTM transition-based AMR parser (Ballesteros and Al-Onaizan, 2017) by augmenting training with Policy Learning and rewarding the Smatch score of sampled graphs. In addition, we also combined several AMR-to-text alignments with an attention mechanism and we supplemented the parser with pre-processed concept identification, named entities and contextualized embeddings. We achieve a highly competitive performance that is comparable to the best published results. We show an in-depth study ablating each of the new components of the parser.</abstract>
@@ -5833,7 +5833,7 @@
       <author><first>Devamanyu</first><last>Hazarika</last></author>
       <author><first>Verónica</first><last>Pérez-Rosas</last></author>
       <author><first>Roger</first><last>Zimmermann</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <author><first>Soujanya</first><last>Poria</last></author>
       <pages>4619–4629</pages>
       <abstract>Sarcasm is often expressed through several verbal and non-verbal cues, e.g., a change of tone, overemphasis in a word, a drawn-out syllable, or a straight looking face. Most of the recent work in sarcasm detection has been carried out on textual data. In this paper, we argue that incorporating multimodal cues can improve the automatic classification of sarcasm. As a first step towards enabling the development of multimodal approaches for sarcasm detection, we propose a new sarcasm dataset, Multimodal Sarcasm Detection Dataset (MUStARD), compiled from popular TV shows. MUStARD consists of audiovisual utterances annotated with sarcasm labels. Each utterance is accompanied by its context of historical utterances in the dialogue, which provides additional information on the scenario where the utterance occurs. Our initial results show that the use of multimodal information can reduce the relative error rate of sarcasm detection by up to 12.9% in F-score when compared to the use of individual modalities. The full dataset is publicly available for use at <url>https://github.com/soujanyaporia/MUStARD</url>.</abstract>
@@ -5846,7 +5846,7 @@
       <title>Determining Relative Argument Specificity and Stance for Complex Argumentative Structures</title>
       <author><first>Esin</first><last>Durmus</last></author>
       <author><first>Faisal</first><last>Ladhak</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>4630–4641</pages>
       <abstract>Systems for automatic argument generation and debate require the ability to (1) determine the stance of any claims employed in the argument and (2) assess the specificity of each claim relative to the argument context. Existing work on understanding claim specificity and stance, however, has been limited to the study of argumentative structures that are relatively shallow, most often consisting of a single claim that directly supports or opposes the argument thesis. In this paper, we tackle these tasks in the context of complex arguments on a diverse set of topics. In particular, our dataset consists of manually curated argument trees for 741 controversial topics covering 95,312 unique claims; lines of argument are generally of depth 2 to 6. We find that as the distance between a pair of claims increases along the argument path, determining the relative specificity of a pair of claims becomes easier and determining their relative stance becomes harder.</abstract>
       <url hash="4f8d688b">P19-1456</url>
@@ -5887,7 +5887,7 @@
     <paper id="460">
       <title>Recognising Agreement and Disagreement between Stances with Reason Comparing Networks</title>
       <author><first>Chang</first><last>Xu</last></author>
-      <author><first>Cecile</first><last>Paris</last></author>
+      <author id="cecile-paris"><first>Cecile</first><last>Paris</last></author>
       <author><first>Surya</first><last>Nepal</last></author>
       <author><first>Ross</first><last>Sparks</last></author>
       <pages>4665–4671</pages>
@@ -5899,7 +5899,7 @@
     <paper id="461">
       <title>Toward Comprehensive Understanding of a Sentiment Based on Human Motives</title>
       <author><first>Naoki</first><last>Otani</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>4672–4677</pages>
       <abstract>In sentiment detection, the natural language processing community has focused on determining holders, facets, and valences, but has paid little attention to the reasons for sentiment decisions. Our work considers human motives as the driver for human sentiments and addresses the problem of motive detection as the first step. Following a study in psychology, we define six basic motives that cover a wide range of topics appearing in review texts, annotate 1,600 texts in restaurant and laptop domains with the motives, and report the performance of baseline methods on this new dataset. We also show that cross-domain transfer learning boosts detection performance, which indicates that these universal motives exist across different domains.</abstract>
       <url hash="f3bb301c">P19-1461</url>
@@ -5974,7 +5974,7 @@
     <paper id="467">
       <title>Neural Network Alignment for Sentential Paraphrases</title>
       <author><first>Jessica</first><last>Ouyang</last></author>
-      <author><first>Kathy</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathy</first><last>McKeown</last></author>
       <pages>4724–4735</pages>
       <abstract>We present a monolingual alignment system for long, sentence- or clause-level alignments, and demonstrate that systems designed for word- or short phrase-based alignment are ill-suited for these longer alignments. Our system is capable of aligning semantically similar spans of arbitrary length. We achieve significantly higher recall on aligning phrases of four or more words and outperform state-of-the- art aligners on the long alignments in the MSR RTE corpus.</abstract>
       <url hash="7b3a2d80">P19-1467</url>
@@ -5984,9 +5984,9 @@
     <paper id="468">
       <title>Duality of Link Prediction and Entailment Graph Induction</title>
       <author><first>Mohammad Javad</first><last>Hosseini</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <author><first>Mark</first><last>Johnson</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>4736–4746</pages>
       <abstract>Link prediction and entailment graph induction are often treated as different problems. In this paper, we show that these two problems are actually complementary. We train a link prediction model on a knowledge graph of assertions extracted from raw text. We propose an entailment score that exploits the new facts discovered by the link prediction model, and then form entailment graphs between relations. We further use the learned entailments to predict improved link prediction scores. Our results show that the two tasks can benefit from each other. The new entailment score outperforms prior state-of-the-art results on a standard entialment dataset and the new link prediction scores show improvements over the raw link prediction scores.</abstract>
       <url hash="ef54d3b8">P19-1468</url>
@@ -6021,7 +6021,7 @@
     <paper id="471">
       <title>Detecting Subevents using Discourse and Narrative Features</title>
       <author><first>Mohammed</first><last>Aldawsari</last></author>
-      <author><first>Mark</first><last>Finlayson</last></author>
+      <author id="mark-finlayson"><first>Mark</first><last>Finlayson</last></author>
       <pages>4780–4790</pages>
       <abstract>Recognizing the internal structure of events is a challenging language processing task of great importance for text understanding. We present a supervised model for automatically identifying when one event is a subevent of another. Building on prior work, we introduce several novel features, in particular discourse and narrative features, that significantly improve upon prior state-of-the-art performance. Error analysis further demonstrates the utility of these features. We evaluate our model on the only two annotated corpora with event hierarchies: HiEve and the Intelligence Community corpus. No prior system has been evaluated on both corpora. Our model outperforms previous systems on both corpora, achieving 0.74 BLANC F1 on the Intelligence Community corpus and 0.70 F1 on the HiEve corpus, respectively a 15 and 5 percentage point improvement over previous models.</abstract>
       <url hash="077ec539">P19-1471</url>
@@ -6062,7 +6062,7 @@
       <author><first>Shantanu</first><last>Acharya</last></author>
       <author><first>Alexander</first><last>Ossa</last></author>
       <author><first>Arne</first><last>Köhn</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <author><first>Alexander</first><last>Panchenko</last></author>
       <pages>4811–4817</pages>
       <abstract>We introduce the use of Poincaré embeddings to improve existing state-of-the-art approaches to domain-specific taxonomy induction from text as a signal for both relocating wrong hyponym terms within a (pre-induced) taxonomy as well as for attaching disconnected terms in a taxonomy. This method substantially improves previous state-of-the-art results on the SemEval-2016 Task 13 on taxonomy extraction. We demonstrate the superiority of Poincaré embeddings over distributional semantic representations, supporting the hypothesis that they can better capture hierarchical lexical-semantic relationships than embeddings in the Euclidean space.</abstract>
@@ -6173,10 +6173,10 @@
       <title>Handling Divergent Reference Texts when Evaluating Table-to-Text Generation</title>
       <author><first>Bhuwan</first><last>Dhingra</last></author>
       <author><first>Manaal</first><last>Faruqui</last></author>
-      <author><first>Ankur</first><last>Parikh</last></author>
+      <author id="ankur-parikh"><first>Ankur</first><last>Parikh</last></author>
       <author><first>Ming-Wei</first><last>Chang</last></author>
       <author><first>Dipanjan</first><last>Das</last></author>
-      <author><first>William</first><last>Cohen</last></author>
+      <author id="william-cohen"><first>William</first><last>Cohen</last></author>
       <pages>4884–4895</pages>
       <abstract>Automatically constructed datasets for generating text from semi-structured data (tables), such as WikiBio, often contain reference texts that diverge from the information in the corresponding semi-structured data. We show that metrics which rely solely on the reference texts, such as BLEU and ROUGE, show poor correlation with human judgments when those references diverge. We propose a new metric, PARENT, which aligns n-grams from the reference and generated texts to the semi-structured data before computing their precision and recall. Through a large scale human evaluation study of table-to-text models for WikiBio, we show that PARENT correlates with human judgments better than existing text generation metrics. We also adapt and evaluate the information extraction based evaluation proposed by Wiseman et al (2017), and show that PARENT has comparable correlation to it, while being easier to use. We show that PARENT is also applicable when the reference texts are elicited from humans using the data from the WebNLG challenge.</abstract>
       <url hash="613b5654">P19-1483</url>
@@ -6212,7 +6212,7 @@
       <title>Simple and Effective Curriculum Pointer-Generator Networks for Reading Comprehension over Long Narratives</title>
       <author><first>Yi</first><last>Tay</last></author>
       <author><first>Shuohang</first><last>Wang</last></author>
-      <author><first>Anh Tuan</first><last>Luu</last></author>
+      <author id="luu-anh-tuan"><first>Anh Tuan</first><last>Luu</last></author>
       <author><first>Jie</first><last>Fu</last></author>
       <author><first>Minh C.</first><last>Phan</last></author>
       <author><first>Xingdi</first><last>Yuan</last></author>
@@ -6255,7 +6255,7 @@
       <title>A Resource-Free Evaluation Metric for Cross-Lingual Word Embeddings Based on Graph Modularity</title>
       <author><first>Yoshinari</first><last>Fujinuma</last></author>
       <author><first>Jordan</first><last>Boyd-Graber</last></author>
-      <author><first>Michael J.</first><last>Paul</last></author>
+      <author id="michael-paul"><first>Michael J.</first><last>Paul</last></author>
       <pages>4952–4962</pages>
       <abstract>Cross-lingual word embeddings encode the meaning of words from different languages into a shared low-dimensional space. An important requirement for many downstream tasks is that word similarity should be independent of language—i.e., word vectors within one language should not be more similar to each other than to words in another language. We measure this characteristic using modularity, a network measurement that measures the strength of clusters in a graph. Modularity has a moderate to strong correlation with three downstream tasks, even though modularity is based only on the structure of embeddings and does not require any external resources. We show through experiments that modularity can serve as an intrinsic validation metric to improve unsupervised cross-lingual word embeddings, particularly on distant language pairs in low-resource settings.</abstract>
       <url hash="bdc682c2">P19-1489</url>
@@ -6266,7 +6266,7 @@
     <paper id="490">
       <title>Multilingual and Cross-Lingual Graded Lexical Entailment</title>
       <author><first>Ivan</first><last>Vulić</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <author><first>Goran</first><last>Glavaš</last></author>
       <pages>4963–4974</pages>
       <abstract>Grounded in cognitive linguistics, graded lexical entailment (GR-LE) is concerned with fine-grained assertions regarding the directional hierarchical relationships between concepts on a continuous scale. In this paper, we present the first work on cross-lingual generalisation of GR-LE relation. Starting from HyperLex, the only available GR-LE dataset in English, we construct new monolingual GR-LE datasets for three other languages, and combine those to create a set of six cross-lingual GR-LE datasets termed CL-HYPERLEX. We next present a novel method dubbed CLEAR (Cross-Lingual Lexical Entailment Attract-Repel) for effectively capturing graded (and binary) LE, both monolingually in different languages as well as across languages (i.e., on CL-HYPERLEX). Coupled with a bilingual dictionary, CLEAR leverages taxonomic LE knowledge in a resource-rich language (e.g., English) and propagates it to other languages. Supported by cross-lingual LE transfer, CLEAR sets competitive baseline performance on three new monolingual GR-LE datasets and six cross-lingual GR-LE datasets. In addition, we show that CLEAR outperforms current state-of-the-art on binary cross-lingual LE detection by a wide margin for diverse language pairs.</abstract>
@@ -6278,11 +6278,11 @@
     </paper>
     <paper id="491">
       <title>What Kind of Language Is Hard to Language-Model?</title>
-      <author><first>Sabrina J.</first><last>Mielke</last></author>
+      <author id="sabrina-j-mielke"><first>Sabrina J.</first><last>Mielke</last></author>
       <author><first>Ryan</first><last>Cotterell</last></author>
       <author><first>Kyle</first><last>Gorman</last></author>
       <author><first>Brian</first><last>Roark</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>4975–4989</pages>
       <abstract>How language-agnostic are current state-of-the-art NLP tools? Are there some types of language that are easier to model with current methods? In prior work (Cotterell et al., 2018) we attempted to address this question for language modeling, and observed that recurrent neural network language models do not perform equally well over all the high-resource European languages found in the Europarl corpus. We speculated that inflectional morphology may be the primary culprit for the discrepancy. In this paper, we extend these earlier experiments to cover 69 languages from 13 language families using a multilingual Bible corpus. Methodologically, we introduce a new paired-sample multiplicative mixed-effects model to obtain language difficulty coefficients from at-least-pairwise parallel corpora. In other words, the model is aware of inter-sentence variation and can handle missing data. Exploiting this model, we show that “translationese” is not any easier to model than natively written language in a fair comparison. Trying to answer the question of what features difficult languages have in common, we try and fail to reproduce our earlier (Cotterell et al., 2018) observation about morphological complexity and instead reveal far simpler statistics of the data that seem to drive complexity in a much larger sample.</abstract>
       <url hash="6994ab78">P19-1491</url>
@@ -6297,9 +6297,9 @@
       <title>Analyzing the Limitations of Cross-lingual Word Embedding Mappings</title>
       <author><first>Aitor</first><last>Ormazabal</last></author>
       <author><first>Mikel</first><last>Artetxe</last></author>
-      <author><first>Gorka</first><last>Labaka</last></author>
-      <author><first>Aitor</first><last>Soroa</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="gorka-labaka"><first>Gorka</first><last>Labaka</last></author>
+      <author id="aitor-soroa"><first>Aitor</first><last>Soroa</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <pages>4990–4995</pages>
       <abstract>Recent research in cross-lingual word embeddings has almost exclusively focused on offline methods, which independently train word embeddings in different languages and map them to a shared space through linear transformations. While several authors have questioned the underlying isomorphism assumption, which states that word embeddings in different languages have approximately the same structure, it is not clear whether this is an inherent limitation of mapping approaches or a more general issue when learning cross-lingual embeddings. So as to answer this question, we experiment with parallel corpora, which allows us to compare offline mapping to an extension of skip-gram that jointly learns both embedding spaces. We observe that, under these ideal conditions, joint learning yields to more isomorphic embeddings, is less sensitive to hubness, and obtains stronger results in bilingual lexicon induction. We thus conclude that current mapping methods do have strong limitations, calling for further research to jointly learn cross-lingual embeddings with a weaker cross-lingual signal.</abstract>
       <url hash="7383c2a9">P19-1492</url>
@@ -6322,8 +6322,8 @@
     <paper id="494">
       <title>Bilingual Lexicon Induction through Unsupervised Machine Translation</title>
       <author><first>Mikel</first><last>Artetxe</last></author>
-      <author><first>Gorka</first><last>Labaka</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="gorka-labaka"><first>Gorka</first><last>Labaka</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <pages>5002–5007</pages>
       <abstract>A recent research line has obtained strong results on bilingual lexicon induction by aligning independently trained word embeddings in two languages and using the resulting cross-lingual embeddings to induce word translation pairs through nearest neighbor or related retrieval methods. In this paper, we propose an alternative approach to this problem that builds on the recent work on unsupervised machine translation. This way, instead of directly inducing a bilingual lexicon from cross-lingual embeddings, we use them to build a phrase-table, combine it with a language model, and use the resulting machine translation system to generate a synthetic parallel corpus, from which we extract the bilingual lexicon using statistical word alignment techniques. As such, our method can work with any word embedding and cross-lingual mapping technique, and it does not require any additional resource besides the monolingual corpus used to train the embeddings. When evaluated on the exact same cross-lingual embeddings, our proposed method obtains an average improvement of 6 accuracy points over nearest neighbor and 4 points over CSLS retrieval, establishing a new state-of-the-art in the standard MUSE dataset.</abstract>
       <url hash="69b54ef0">P19-1494</url>
@@ -6333,7 +6333,7 @@
     </paper>
     <paper id="495">
       <title>Automatically Identifying Complaints in Social Media</title>
-      <author><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
+      <author id="daniel-preotiuc-pietro"><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
       <author><first>Mihaela</first><last>Gaman</last></author>
       <author><first>Nikolaos</first><last>Aletras</last></author>
       <pages>5008–5019</pages>
@@ -6377,7 +6377,7 @@
     <paper id="498">
       <title>Tree <fixed-case>LSTM</fixed-case>s with Convolution Units to Predict Stance and Rumor Veracity in Social Media Conversations</title>
       <author><first>Sumeet</first><last>Kumar</last></author>
-      <author><first>Kathleen</first><last>Carley</last></author>
+      <author id="kathleen-m-carley"><first>Kathleen</first><last>Carley</last></author>
       <pages>5047–5058</pages>
       <abstract>Learning from social-media conversations has gained significant attention recently because of its applications in areas like rumor detection. In this research, we propose a new way to represent social-media conversations as binarized constituency trees that allows comparing features in source-posts and their replies effectively. Moreover, we propose to use convolution units in Tree LSTMs that are better at learning patterns in features obtained from the source and reply posts. Our Tree LSTM models employ multi-task (stance + rumor) learning and propagate the useful stance signal up in the tree for rumor classification at the root node. The proposed models achieve state-of-the-art performance, outperforming the current best model by 12% and 15% on F1-macro for rumor-veracity classification and stance classification tasks respectively.</abstract>
       <url hash="3dbf2f48">P19-1498</url>
@@ -6433,7 +6433,7 @@
     <paper id="503">
       <title>Simple Unsupervised Summarization by Contextual Matching</title>
       <author><first>Jiawei</first><last>Zhou</last></author>
-      <author><first>Alexander</first><last>Rush</last></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last></author>
       <pages>5101–5106</pages>
       <abstract>We propose an unsupervised method for sentence summarization using only language modeling. The approach employs two language models, one that is generic (i.e. pretrained), and the other that is specific to the target domain. We show that by using a product-of-experts criteria these are enough for maintaining continuous contextual matching while maintaining output fluency. Experiments on both abstractive and extractive sentence summarization data sets show promising results of our method without being exposed to any paired data.</abstract>
       <url hash="bcecdc13">P19-1503</url>
@@ -6457,7 +6457,7 @@
       <title>Morphological Irregularity Correlates with Frequency</title>
       <author><first>Shijie</first><last>Wu</last></author>
       <author><first>Ryan</first><last>Cotterell</last></author>
-      <author><first>Timothy</first><last>O’Donnell</last></author>
+      <author id="timothy-odonnell"><first>Timothy</first><last>O’Donnell</last></author>
       <pages>5117–5126</pages>
       <abstract>We present a study of morphological irregularity. Following recent work, we define an information-theoretic measure of irregularity based on the predictability of forms in a language. Using a neural transduction model, we estimate this quantity for the forms in 28 languages. We first present several validatory and exploratory analyses of irregularity. We then show that our analyses provide evidence for a correlation between irregularity and frequency: higher frequency items are more likely to be irregular and irregular items are more likely be highly frequent. To our knowledge, this result is the first of its breadth and confirms longstanding proposals from the linguistics literature. The correlation is more robust when aggregated at the level of whole paradigms—providing support for models of linguistic structure in which inflected forms are unified by abstract underlying stems or lexemes.</abstract>
       <url hash="af142045">P19-1505</url>
@@ -6483,8 +6483,8 @@
       <title>Relating Simple Sentence Representations in Deep Neural Networks and the Brain</title>
       <author><first>Sharmistha</first><last>Jat</last></author>
       <author><first>Hao</first><last>Tang</last></author>
-      <author><first>Partha</first><last>Talukdar</last></author>
-      <author><first>Tom</first><last>Mitchell</last></author>
+      <author id="partha-talukdar"><first>Partha</first><last>Talukdar</last></author>
+      <author id="tom-mitchell"><first>Tom</first><last>Mitchell</last></author>
       <pages>5137–5154</pages>
       <abstract>What is the relationship between sentence representations learned by deep recurrent models against those encoded by the brain? Is there any correspondence between hidden layers of these recurrent models and brain regions when processing sentences? Can these deep models be used to synthesize brain data which can then be utilized in other extrinsic tasks? We investigate these questions using sentences with simple syntax and semantics (e.g., The bone was eaten by the dog.). We consider multiple neural network architectures, including recently proposed ELMo and BERT. We use magnetoencephalography (MEG) brain recording data collected from human subjects when they were reading these simple sentences. Overall, we find that BERT’s activations correlate the best with MEG brain data. We also find that the deep network representation can be used to generate brain data from new sentences to augment existing brain data. To the best of our knowledge, this is the first work showing that the MEG brain recording when reading a word in a sentence can be used to distinguish earlier words in the sentence. Our exploration is also the first to use deep neural network representations to generate synthetic brain data and to show that it helps in improving subsequent stimuli decoding task accuracy.</abstract>
       <url hash="ce43a409">P19-1507</url>
@@ -6512,7 +6512,7 @@
       <author><first>Eugene</first><last>Kharitonov</last></author>
       <author><first>Alessandro</first><last>Lazaric</last></author>
       <author><first>Emmanuel</first><last>Dupoux</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <pages>5166–5175</pages>
       <abstract>Sequence-processing neural networks led to remarkable progress on many NLP tasks. As a consequence, there has been increasing interest in understanding to what extent they process language as humans do. We aim here to uncover which biases such models display with respect to “natural” word-order constraints. We train models to communicate about paths in a simple gridworld, using miniature languages that reflect or violate various natural language trends, such as the tendency to avoid redundancy or to minimize long-distance dependencies. We study how the controlled characteristics of our miniature languages affect individual learning and their stability across multiple network generations. The results draw a mixed picture. On the one hand, neural networks show a strong tendency to avoid long-distance dependencies. On the other hand, there is no clear preference for the efficient, non-redundant encoding of information that is widely attested in natural language. We thus suggest inoculating a notion of “effort” into neural networks, as a possible way to make their linguistic behavior more human-like.</abstract>
       <url hash="fe31b3eb">P19-1509</url>
@@ -6524,11 +6524,11 @@
     <paper id="510">
       <title><fixed-case>NNE</fixed-case>: A Dataset for Nested Named Entity Recognition in <fixed-case>E</fixed-case>nglish Newswire</title>
       <author><first>Nicky</first><last>Ringland</last></author>
-      <author><first>Xiang</first><last>Dai</last></author>
+      <author id="xiang-dai"><first>Xiang</first><last>Dai</last></author>
       <author><first>Ben</first><last>Hachey</last></author>
       <author><first>Sarvnaz</first><last>Karimi</last></author>
-      <author><first>Cecile</first><last>Paris</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="cecile-paris"><first>Cecile</first><last>Paris</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <pages>5176–5181</pages>
       <abstract>Named entity recognition (NER) is widely used in natural language processing applications and downstream tasks. However, most NER tools target flat annotation from popular datasets, eschewing the semantic information available in nested entity mentions. We describe NNE—a fine-grained, nested named entity dataset over the full Wall Street Journal portion of the Penn Treebank (PTB). Our annotation comprises 279,795 mentions of 114 entity types with up to 6 layers of nesting. We hope the public release of this large dataset for English newswire will encourage development of new techniques for nested NER.</abstract>
       <url hash="7a3a1720">P19-1510</url>
@@ -6582,9 +6582,9 @@
       <title>Scaling up Open Tagging from Tens to Thousands: Comprehension Empowered Attribute Value Extraction from Product Title</title>
       <author><first>Huimin</first><last>Xu</last></author>
       <author><first>Wenting</first><last>Wang</last></author>
-      <author><first>Xin</first><last>Mao</last></author>
+      <author id="xinnian-mao"><first>Xin</first><last>Mao</last></author>
       <author><first>Xinyu</first><last>Jiang</last></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <pages>5214–5223</pages>
       <abstract>Supplementing product information by extracting attribute values from title is a crucial task in e-Commerce domain. Previous studies treat each attribute only as an entity type and build one set of NER tags (e.g., BIO) for each of them, leading to a scalability issue which unfits to the large sized attribute system in real world e-Commerce. In this work, we propose a novel approach to support value extraction scaling up to thousands of attributes without losing performance: (1) We propose to regard attribute as a query and adopt only one global set of BIO tags for any attributes to reduce the burden of attribute tag or model explosion; (2) We explicitly model the semantic representations for attribute and title, and develop an attention mechanism to capture the interactive semantic relations in-between to enforce our framework to be attribute comprehensive. We conduct extensive experiments in real-life datasets. The results show that our model not only outperforms existing state-of-the-art NER tagging models, but also is robust and generates promising results for up to 8,906 attributes.</abstract>
       <url hash="24fddcac">P19-1514</url>
@@ -6606,7 +6606,7 @@
       <author><first>Shweta</first><last>Yadav</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
       <author><first>Sriparna</first><last>Saha</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>5234–5245</pages>
       <abstract>The mining of adverse drug reaction (ADR) has a crucial role in the pharmacovigilance. The traditional ways of identifying ADR are reliable but time-consuming, non-scalable and offer a very limited amount of ADR relevant information. With the unprecedented growth of information sources in the forms of social media texts (Twitter, Blogs, Reviews etc.), biomedical literature, and Electronic Medical Records (EMR), it has become crucial to extract the most pertinent ADR related information from these free-form texts. In this paper, we propose a neural network inspired multi- task learning framework that can simultaneously extract ADRs from various sources. We adopt a novel adversarial learning-based approach to learn features across multiple ADR information sources. Unlike the other existing techniques, our approach is capable to extracting fine-grained information (such as ‘Indications’, ‘Symptoms’, ‘Finding’, ‘Disease’, ‘Drug’) which provide important cues in pharmacovigilance. We evaluate our proposed approach on three publicly available real- world benchmark pharmacovigilance datasets, a Twitter dataset from PSB 2016 Social Me- dia Shared Task, CADEC corpus and Medline ADR corpus. Experiments show that our unified framework achieves state-of-the-art performance on individual tasks associated with the different benchmark datasets. This establishes the fact that our proposed approach is generic, which enables it to achieve high performance on the diverse datasets.</abstract>
       <url hash="d46915a9">P19-1516</url>
@@ -6642,8 +6642,8 @@
       <author><first>Chenwei</first><last>Zhang</last></author>
       <author><first>Yaliang</first><last>Li</last></author>
       <author><first>Nan</first><last>Du</last></author>
-      <author id="wei-fan"><first>Wei</first><last>Fan</last></author>
-      <author><first>Philip</first><last>Yu</last></author>
+      <author><first>Wei</first><last>Fan</last></author>
+      <author id="philip-s-yu"><first>Philip</first><last>Yu</last></author>
       <pages>5259–5267</pages>
       <abstract>Being able to recognize words as slots and detect the intent of an utterance has been a keen issue in natural language understanding. The existing works either treat slot filling and intent detection separately in a pipeline manner, or adopt joint models which sequentially label slots while summarizing the utterance-level intent without explicitly preserving the hierarchical relationship among words, slots, and intents. To exploit the semantic hierarchy for effective modeling, we propose a capsule-based neural network model which accomplishes slot filling and intent detection via a dynamic routing-by-agreement schema. A re-routing schema is proposed to further synergize the slot filling performance using the inferred intent representation. Experiments on two real-world datasets show the effectiveness of our model when compared with other alternative model architectures, as well as existing natural language understanding services.</abstract>
       <url hash="2e3eda2d">P19-1519</url>
@@ -6679,7 +6679,7 @@
       <author><first>Dawei</first><last>Feng</last></author>
       <author><first>Linbo</first><last>Qiao</last></author>
       <author><first>Zhigang</first><last>Kan</last></author>
-      <author id="dongsheng-li"><first>Dongsheng</first><last>Li</last></author>
+      <author><first>Dongsheng</first><last>Li</last></author>
       <pages>5284–5294</pages>
       <abstract>Traditional approaches to the task of ACE event extraction usually depend on manually annotated data, which is often laborious to create and limited in size. Therefore, in addition to the difficulty of event extraction itself, insufficient training data hinders the learning process as well. To promote event extraction, we first propose an event extraction model to overcome the roles overlap problem by separating the argument prediction in terms of roles. Moreover, to address the problem of insufficient training data, we propose a method to automatically generate labeled data by editing prototypes and screen out generated samples by ranking the quality. Experiments on the ACE2005 dataset demonstrate that our extraction model can surpass most existing extraction methods. Besides, incorporating our generation method exhibits further significant improvement. It obtains new state-of-the-art results on the event extraction task, including pushing the F1 score of trigger classification to 81.1%, and the F1 score of argument classification to 58.9%.</abstract>
       <url hash="cc787227">P19-1522</url>
@@ -6700,8 +6700,8 @@
     <paper id="524">
       <title>Towards Improving Neural Named Entity Recognition with Gazetteers</title>
       <author><first>Tianyu</first><last>Liu</last></author>
-      <author><first>Jin-Ge</first><last>Yao</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="jin-ge-yao"><first>Jin-Ge</first><last>Yao</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <pages>5301–5307</pages>
       <abstract>Most of the recently proposed neural models for named entity recognition have been purely data-driven, with a strong emphasis on getting rid of the efforts for collecting external resources or designing hand-crafted features. This could increase the chance of overfitting since the models cannot access any supervision signal beyond the small amount of annotated data, limiting their power to generalize beyond the annotated entities. In this work, we show that properly utilizing external gazetteers could benefit segmental neural NER models. We add a simple module on the recently proposed hybrid semi-Markov CRF architecture and observe some promising results.</abstract>
       <url hash="b423f4b6">P19-1524</url>
@@ -6712,7 +6712,7 @@
     <paper id="525">
       <title>Span-Level Model for Relation Extraction</title>
       <author><first>Kalpit</first><last>Dixit</last></author>
-      <author><first>Yaser</first><last>Al-Onaizan</last></author>
+      <author id="yaser-al-onaizan"><first>Yaser</first><last>Al-Onaizan</last></author>
       <pages>5308–5314</pages>
       <abstract>Relation Extraction is the task of identifying entity mention spans in raw text and then identifying relations between pairs of the entity mentions. Recent approaches for this span-level task have been token-level models which have inherent limitations. They cannot easily define and implement span-level features, cannot model overlapping entity mentions and have cascading errors due to the use of sequential decoding. To address these concerns, we present a model which directly models all possible spans and performs joint entity mention detection and relation extraction. We report a new state-of-the-art performance of 62.83 F1 (prev best was 60.49) on the ACE2005 dataset.</abstract>
       <url hash="50b698b1">P19-1525</url>
@@ -6734,7 +6734,7 @@
       <title>Neural Architectures for Nested <fixed-case>NER</fixed-case> through Linearization</title>
       <author><first>Jana</first><last>Straková</last></author>
       <author><first>Milan</first><last>Straka</last></author>
-      <author><first>Jan</first><last>Hajic</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajic</last></author>
       <pages>5326–5331</pages>
       <abstract>We propose two neural network architectures for nested named entity recognition (NER), a setting in which named entities may overlap and also be labeled with more than one label. We encode the nested labels using a linearized scheme. In our first proposed approach, the nested labels are modeled as multilabels corresponding to the Cartesian product of the nested labels in a standard LSTM-CRF architecture. In the second one, the nested NER is viewed as a sequence-to-sequence problem, in which the input sequence consists of the tokens and output sequence of the labels, using hard attention on the word whose label is being predicted. The proposed methods outperform the nested NER state of the art on four corpora: ACE-2004, ACE-2005, GENIA and Czech CNEC. We also enrich our architectures with the recently published contextual embeddings: ELMo, BERT and Flair, reaching further improvements for the four nested entity corpora. In addition, we report flat NER state-of-the-art results for CoNLL-2002 Dutch and Spanish and for CoNLL-2003 English.</abstract>
       <url hash="6546d42f">P19-1527</url>
@@ -6768,7 +6768,7 @@
     </paper>
     <paper id="530">
       <title><fixed-case>PTB</fixed-case> Graph Parsing with Tree Approximation</title>
-      <author><first>Yoshihide</first><last>Kato</last></author>
+      <author id="yoshihide-kato"><first>Yoshihide</first><last>Kato</last></author>
       <author><first>Shigeki</first><last>Matsubara</last></author>
       <pages>5344–5349</pages>
       <abstract>The Penn Treebank (PTB) represents syntactic structures as graphs due to nonlocal dependencies. This paper proposes a method that approximates PTB graph-structured representations by trees. By our approximation method, we can reduce nonlocal dependency identification and constituency parsing into single tree-based parsing. An experimental result demonstrates that our approximation method with an off-the-shelf tree-based constituency parser significantly outperforms the previous methods in nonlocal dependency identification.</abstract>
@@ -6888,7 +6888,7 @@
       <author><first>Chris</first><last>Brockett</last></author>
       <author><first>Xiaodong</first><last>Liu</last></author>
       <author><first>Xiang</first><last>Gao</last></author>
-      <author><first>Bill</first><last>Dolan</last></author>
+      <author id="william-b-dolan"><first>Bill</first><last>Dolan</last></author>
       <author><first>Yejin</first><last>Choi</last></author>
       <author><first>Jianfeng</first><last>Gao</last></author>
       <pages>5427–5436</pages>
@@ -6902,7 +6902,7 @@
       <author><first>Hardik</first><last>Chauhan</last></author>
       <author><first>Mauajama</first><last>Firdaus</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>5437–5447</pages>
       <abstract>Multimodal dialogue systems have opened new frontiers in the traditional goal-oriented dialogue systems. The state-of-the-art dialogue systems are primarily based on unimodal sources, predominantly the text, and hence cannot capture the information present in the other sources such as videos, audios, images etc. With the availability of large scale multimodal dialogue dataset (MMD) (Saha et al., 2018) on the fashion domain, the visual appearance of the products is essential for understanding the intention of the user. Without capturing the information from both the text and image, the system will be incapable of generating correct and desirable responses. In this paper, we propose a novel position and attribute aware attention mechanism to learn enhanced image representation conditioned on the user utterance. Our evaluation shows that the proposed model can generate appropriate responses while preserving the position and attribute information. Experimental results also prove that our proposed approach attains superior performance compared to the baseline models, and outperforms the state-of-the-art approaches on text similarity based evaluation metrics.</abstract>
       <url hash="6e1c9b59">P19-1540</url>
@@ -6914,7 +6914,7 @@
       <author><first>He</first><last>Bai</last></author>
       <author><first>Yu</first><last>Zhou</last></author>
       <author><first>Jiajun</first><last>Zhang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>5448–5453</pages>
       <abstract>Dialogue contexts are proven helpful in the spoken language understanding (SLU) system and they are typically encoded with explicit memory representations. However, most of the previous models learn the context memory with only one objective to maximizing the SLU performance, leaving the context memory under-exploited. In this paper, we propose a new dialogue logistic inference (DLI) task to consolidate the context memory jointly with SLU in the multi-task framework. DLI is defined as sorting a shuffled dialogue session into its original logical order and shares the same memory encoder and retrieval mechanism as the SLU model. Our experimental results show that various popular contextual SLU models can benefit from our approach, and improvements are quite impressive, especially in slot filling.</abstract>
       <url hash="30e55ef8">P19-1541</url>
@@ -6937,7 +6937,7 @@
     <paper id="543">
       <title>Reading Turn by Turn: Hierarchical Attention Architecture for Spoken Dialogue Comprehension</title>
       <author><first>Zhengyuan</first><last>Liu</last></author>
-      <author><first>Nancy</first><last>Chen</last></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last></author>
       <pages>5460–5466</pages>
       <abstract>Comprehending multi-turn spoken conversations is an emerging research area, presenting challenges different from reading comprehension of passages due to the interactive nature of information exchange from at least two speakers. Unlike passages, where sentences are often the default semantic modeling unit, in multi-turn conversations, a turn is a topically coherent unit embodied with immediately relevant context, making it a linguistically intuitive segment for computationally modeling verbal interactions. Therefore, in this work, we propose a hierarchical attention neural network architecture, combining turn-level and word-level attention mechanisms, to improve spoken dialogue comprehension performance. Experiments are conducted on a multi-turn conversation dataset, where nurses inquire and discuss symptom information with patients. We empirically show that the proposed approach outperforms standard attention baselines, achieves more efficient learning outcomes, and is more robust to lengthy and out-of-distribution test samples.</abstract>
       <url hash="2cff57eb">P19-1543</url>
@@ -6984,7 +6984,7 @@
       <author><first>Darsh</first><last>Shah</last></author>
       <author><first>Raghav</first><last>Gupta</last></author>
       <author><first>Amir</first><last>Fayazi</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></author>
       <pages>5484–5490</pages>
       <abstract>Task-oriented dialog systems increasingly rely on deep learning-based slot filling models, usually needing extensive labeled training data for target domains. Often, however, little to no target domain training data may be available, or the training and target domain schemas may be misaligned, as is common for web forms on similar websites. Prior zero-shot slot filling models use slot descriptions to learn concepts, but are not robust to misaligned schemas. We propose utilizing both the slot description and a small number of examples of slot values, which may be easily available, to learn semantic representations of slots which are transferable across domains and robust to misaligned schemas. Our approach outperforms state-of-the-art models on two multi-domain datasets, especially in the low-data setting.</abstract>
       <url hash="f910d9d9">P19-1547</url>
@@ -7036,7 +7036,7 @@
       <title>Neural-based <fixed-case>C</fixed-case>hinese Idiom Recommendation for Enhancing Elegance in Essay Writing</title>
       <author><first>Yuanchao</first><last>Liu</last></author>
       <author><first>Bo</first><last>Pang</last></author>
-      <author><first>Bingquan</first><last>Liu</last></author>
+      <author id="bingquan-liu"><first>Bingquan</first><last>Liu</last></author>
       <pages>5522–5526</pages>
       <abstract>Although the proper use of idioms can enhance the elegance of writing, the active use of various expressions is a challenge because remembering idioms is difficult. In this study, we address the problem of idiom recommendation by leveraging a neural machine translation framework, in which we suppose that idioms are written with one pseudo target language. Two types of real-life datasets are collected to support this study. Experimental results show that the proposed approach achieves promising performance compared with other baseline methods.</abstract>
       <url hash="774ce74a">P19-1552</url>
@@ -7070,7 +7070,7 @@
       <author><first>Lijun</first><last>Wu</last></author>
       <author><first>Yingce</first><last>Xia</last></author>
       <author><first>Tao</first><last>Qin</last></author>
-      <author><first>Xueqi</first><last>Cheng</last></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last></author>
       <author><first>Wengang</first><last>Zhou</last></author>
       <author><first>Tie-Yan</first><last>Liu</last></author>
       <pages>5539–5544</pages>
@@ -7083,7 +7083,7 @@
       <title>Reversing Gradients in Adversarial Domain Adaptation for Question Deduplication and Textual Entailment Tasks</title>
       <author><first>Anush</first><last>Kamath</last></author>
       <author><first>Sparsh</first><last>Gupta</last></author>
-      <author><first>Vitor</first><last>Carvalho</last></author>
+      <author id="vitor-carvalho"><first>Vitor</first><last>Carvalho</last></author>
       <pages>5545–5550</pages>
       <abstract>Adversarial domain adaptation has been recently proposed as an effective technique for textual matching tasks, such as question deduplication. Here we investigate the use of gradient reversal on adversarial domain adaptation to explicitly learn both shared and unshared (domain specific) representations between two textual domains. In doing so, gradient reversal learns features that explicitly compensate for domain mismatch, while still distilling domain specific knowledge that can improve target domain accuracy. We evaluate reversing gradients for adversarial adaptation on multiple domains, and demonstrate that it significantly outperforms other methods on question deduplication as well as on recognizing textual entailment (RTE) tasks, achieving up to 7% absolute boost in base model accuracy on some datasets.</abstract>
       <url hash="d4673a9c">P19-1556</url>
@@ -7145,7 +7145,7 @@
       <title>Combating Adversarial Misspellings with Robust Word Recognition</title>
       <author><first>Danish</first><last>Pruthi</last></author>
       <author><first>Bhuwan</first><last>Dhingra</last></author>
-      <author><first>Zachary C.</first><last>Lipton</last></author>
+      <author id="zachary-c-lipton"><first>Zachary C.</first><last>Lipton</last></author>
       <pages>5582–5591</pages>
       <abstract>To combat adversarial spelling mistakes, we propose placing a word recognition model in front of the downstream classifier. Our word recognition models build upon the RNN semi-character architecture, introducing several new backoff strategies for handling rare and unseen words. Trained to recognize words corrupted by random adds, drops, swaps, and keyboard mistakes, our method achieves 32% relative (and 3.3% absolute) error reduction over the vanilla semi-character model. Notably, our pipeline confers robustness on the downstream classifier, outperforming both adversarial training and off-the-shelf spell checkers. Against a BERT model fine-tuned for sentiment analysis, a single adversarially-chosen character attack lowers accuracy from 90.3% to 45.8%. Our defense restores accuracy to 75%. Surprisingly, better word recognition does not always entail greater robustness. Our analysis reveals that robustness also depends upon a quantity that we denote the sensitivity.</abstract>
       <url hash="d0c241a9">P19-1561</url>
@@ -7156,7 +7156,7 @@
       <title>An Empirical Investigation of Structured Output Modeling for Graph-based Neural Dependency Parsing</title>
       <author><first>Zhisong</first><last>Zhang</last></author>
       <author><first>Xuezhe</first><last>Ma</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>5592–5598</pages>
       <abstract>In this paper, we investigate the aspect of structured output modeling for the state-of-the-art graph-based neural dependency parser (Dozat and Manning, 2017). With evaluations on 14 treebanks, we empirically show that global output-structured models can generally obtain better performance, especially on the metric of sentence-level Complete Match. However, probably because neural models already learn good global views of the inputs, the improvement brought by structured output modeling is modest.</abstract>
       <url hash="438bda11">P19-1562</url>
@@ -7184,7 +7184,7 @@
       <title>Multimodal Transformer Networks for End-to-End Video-Grounded Dialogue Systems</title>
       <author><first>Hung</first><last>Le</last></author>
       <author><first>Doyen</first><last>Sahoo</last></author>
-      <author><first>Nancy</first><last>Chen</last></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last></author>
       <author><first>Steven</first><last>Hoi</last></author>
       <pages>5612–5623</pages>
       <abstract>Developing Video-Grounded Dialogue Systems (VGDS), where a dialogue is conducted based on visual and audio aspects of a given video, is significantly more challenging than traditional image or text-grounded dialogue systems because (1) feature space of videos span across multiple picture frames, making it difficult to obtain semantic information; and (2) a dialogue agent must perceive and process information from different modalities (audio, video, caption, etc.) to obtain a comprehensive understanding. Most existing work is based on RNNs and sequence-to-sequence architectures, which are not very effective for capturing complex long-term dependencies (like in videos). To overcome this, we propose Multimodal Transformer Networks (MTN) to encode videos and incorporate information from different modalities. We also propose query-aware attention through an auto-encoder to extract query-aware features from non-text modalities. We develop a training procedure to simulate token-level decoding to improve the quality of generated responses during inference. We get state of the art performance on Dialogue System Technology Challenge 7 (DSTC7). Our model also generalizes to another multimodal visual-grounded dialogue task, and obtains promising performance.</abstract>
@@ -7199,7 +7199,7 @@
       <author><first>Tiancheng</first><last>Zhao</last></author>
       <author><first>Chenyan</first><last>Xiong</last></author>
       <author><first>Xiaodan</first><last>Liang</last></author>
-      <author><first>Eric</first><last>Xing</last></author>
+      <author id="eric-xing"><first>Eric</first><last>Xing</last></author>
       <author><first>Zhiting</first><last>Hu</last></author>
       <pages>5624–5634</pages>
       <abstract>Many real-world open-domain conversation applications have specific goals to achieve during open-ended chats, such as recommendation, psychotherapy, education, etc. We study the problem of imposing conversational goals on open-domain chat agents. In particular, we want a conversational system to chat naturally with human and proactively guide the conversation to a designated target subject. The problem is challenging as no public data is available for learning such a target-guided strategy. We propose a structured approach that introduces coarse-grained keywords to control the intended content of system responses. We then attain smooth conversation transition through turn-level supervised learning, and drive the conversation towards the target with discourse-level constraints. We further derive a keyword-augmented conversation dataset for the study. Quantitative and human evaluations show our system can produce meaningful and effective conversations, significantly improving over other approaches</abstract>
@@ -7243,7 +7243,7 @@
       <author><first>Sawan</first><last>Kumar</last></author>
       <author><first>Sharmistha</first><last>Jat</last></author>
       <author><first>Karan</first><last>Saxena</last></author>
-      <author><first>Partha</first><last>Talukdar</last></author>
+      <author id="partha-talukdar"><first>Partha</first><last>Talukdar</last></author>
       <pages>5670–5681</pages>
       <abstract>Word Sense Disambiguation (WSD) is a long-standing but open problem in Natural Language Processing (NLP). WSD corpora are typically small in size, owing to an expensive annotation process. Current supervised WSD methods treat senses as discrete labels and also resort to predicting the Most-Frequent-Sense (MFS) for words unseen during training. This leads to poor performance on rare and unseen senses. To overcome this challenge, we propose Extended WSD Incorporating Sense Embeddings (EWISE), a supervised model to perform WSD by predicting over a continuous sense embedding space as opposed to a discrete label space. This allows EWISE to generalize over both seen and unseen senses, thus achieving generalized zero-shot learning. To obtain target sense embeddings, EWISE utilizes sense definitions. EWISE learns a novel sentence encoder for sense definitions by using WordNet relations and also ConvE, a recently proposed knowledge graph embedding method. We also compare EWISE against other sentence encoders pretrained on large corpora to generate definition embeddings. EWISE achieves new state-of-the-art WSD performance.</abstract>
       <url hash="8fcc5d79">P19-1568</url>
@@ -7254,7 +7254,7 @@
     <paper id="569">
       <title>Language Modelling Makes Sense: Propagating Representations through <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et for Full-Coverage Word Sense Disambiguation</title>
       <author><first>Daniel</first><last>Loureiro</last></author>
-      <author><first>Alípio</first><last>Jorge</last></author>
+      <author id="alipio-jorge"><first>Alípio</first><last>Jorge</last></author>
       <pages>5682–5691</pages>
       <abstract>Contextual embeddings represent a new generation of semantic representations learned from Neural Language Modelling (NLM) that addresses the issue of meaning conflation hampering traditional word embeddings. In this work, we show that contextual embeddings can be used to achieve unprecedented gains in Word Sense Disambiguation (WSD) tasks. Our approach focuses on creating sense-level embeddings with full-coverage of WordNet, and without recourse to explicit knowledge of sense distributions or task-specific modelling. As a result, a simple Nearest Neighbors (k-NN) method using our representations is able to consistently surpass the performance of previous systems using powerful neural sequencing models. We also analyse the robustness of our approach when ignoring part-of-speech and lemma features, requiring disambiguation against the full sense inventory, and revealing shortcomings to be improved. Finally, we explore applications of our sense embeddings for concept-level analyses of contextual embeddings and their respective NLMs.</abstract>
       <url hash="89fc428e">P19-1569</url>
@@ -7317,10 +7317,10 @@
     <paper id="574">
       <title>Probing for Semantic Classes: Diagnosing the Meaning Content of Word Embeddings</title>
       <author><first>Yadollah</first><last>Yaghoobzadeh</last></author>
-      <author><first>Katharina</first><last>Kann</last></author>
-      <author><first>T. J.</first><last>Hazen</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
+      <author id="timothy-j-hazen"><first>T. J.</first><last>Hazen</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>5740–5753</pages>
       <abstract>Word embeddings typically represent different meanings of a word in a single conflated vector. Empirical analysis of embeddings of ambiguous words is currently limited by the small size of manually annotated resources and by the fact that word senses are treated as unrelated individual concepts. We present a large dataset based on manual Wikipedia annotations and word senses, where word senses from different words are related by semantic classes. This is the basis for novel diagnostic tests for an embedding’s content: we probe word embeddings for semantic classes and analyze the embedding space by classifying embeddings into semantic classes. Our main findings are: (i) Information about a sense is generally represented well in a single-vector embedding – if the sense is frequent. (ii) A classifier can accurately predict whether a word is single-sense or multi-sense, based only on its embedding. (iii) Although rare senses are not well represented in single-vector embeddings, this does not have negative impact on an NLP application whose performance depends on frequent senses.</abstract>
       <url hash="8d3ef09c">P19-1574</url>
@@ -7332,7 +7332,7 @@
       <title>Deep Neural Model Inspection and Comparison via Functional Neuron Pathways</title>
       <author><first>James</first><last>Fiacco</last></author>
       <author><first>Samridhi</first><last>Choudhary</last></author>
-      <author><first>Carolyn</first><last>Rose</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rose</last></author>
       <pages>5754–5764</pages>
       <abstract>We introduce a general method for the interpretation and comparison of neural models. The method is used to factor a complex neural model into its functional components, which are comprised of sets of co-firing neurons that cut across layers of the network architecture, and which we call neural pathways. The function of these pathways can be understood by identifying correlated task level and linguistic heuristics in such a way that this knowledge acts as a lens for approximating what the network has learned to apply to its intended task. As a case study for investigating the utility of these pathways, we present an examination of pathways identified in models trained for two standard tasks, namely Named Entity Recognition and Recognizing Textual Entailment.</abstract>
       <url hash="533a1237">P19-1575</url>
@@ -7342,7 +7342,7 @@
     </paper>
     <paper id="576">
       <title>Collocation Classification with Unsupervised Relation Vectors</title>
-      <author><first>Luis</first><last>Espinosa Anke</last></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa Anke</last></author>
       <author><first>Steven</first><last>Schockaert</last></author>
       <author><first>Leo</first><last>Wanner</last></author>
       <pages>5765–5772</pages>
@@ -7354,7 +7354,7 @@
     </paper>
     <paper id="577">
       <title>Corpus-based Check-up for Thesaurus</title>
-      <author><first>Natalia</first><last>Loukachevitch</last></author>
+      <author id="natalia-loukachevitch"><first>Natalia</first><last>Loukachevitch</last></author>
       <pages>5773–5779</pages>
       <abstract>In this paper we discuss the usefulness of applying a checking procedure to existing thesauri. The procedure is based on the analysis of discrepancies of corpus-based and thesaurus-based word similarities. We applied the procedure to more than 30 thousand words of the Russian wordnet and found some serious errors in word sense description, including inaccurate relationships and missing senses of ambiguous words.</abstract>
       <url hash="5b04b187">P19-1577</url>
@@ -7405,7 +7405,7 @@
       <title>Better <fixed-case>OOV</fixed-case> Translation with Bilingual Terminology Mining</title>
       <author><first>Matthias</first><last>Huck</last></author>
       <author><first>Viktor</first><last>Hangya</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>5809–5815</pages>
       <abstract>Unseen words, also called out-of-vocabulary words (OOVs), are difficult for machine translation. In neural machine translation, byte-pair encoding can be used to represent OOVs, but they are still often incorrectly translated. We improve the translation of OOVs in NMT using easy-to-obtain monolingual data. We look for OOVs in the text to be translated and translate them using simple-to-construct bilingual word embeddings (BWEs). In our MT experiments we take the 5-best candidates, which is motivated by intrinsic mining experiments. Using all five of the proposed target language words as queries we mine target-language sentences. We then back-translate, forcing the back-translation of each of the five proposed target-language OOV-translation-candidates to be the original source-language OOV. We show that by using this synthetic data to fine-tune our system the translation of OOVs can be dramatically improved. In our experiments we use a system trained on Europarl and mine sentences containing medical terms from monolingual data.</abstract>
       <url hash="50c1135e">P19-1581</url>
@@ -7442,7 +7442,7 @@
       <author><first>Max</first><last>Friedrich</last></author>
       <author><first>Arne</first><last>Köhn</last></author>
       <author><first>Gregor</first><last>Wiedemann</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>5829–5839</pages>
       <abstract>De-identification is the task of detecting protected health information (PHI) in medical text. It is a critical step in sanitizing electronic health records (EHR) to be shared for research. Automatic de-identification classifiers can significantly speed up the sanitization process. However, obtaining a large and diverse dataset to train such a classifier that works well across many types of medical text poses a challenge as privacy laws prohibit the sharing of raw medical records. We introduce a method to create privacy-preserving shareable representations of medical text (i.e. they contain no PHI) that does not require expensive manual pseudonymization. These representations can be shared between organizations to create unified datasets for training de-identification models. Our representation allows training a simple LSTM-CRF de-identification model to an F1 score of 97.4%, which is comparable to a strong baseline that exposes private information in its representation. A robust, widely available de-identification classifier based on our representation could potentially enable studies for which de-identification would otherwise be too costly.</abstract>
       <url hash="a11527ed">P19-1584</url>
@@ -7517,7 +7517,7 @@
       <author><first>Suchin</first><last>Gururangan</last></author>
       <author><first>Tam</first><last>Dang</last></author>
       <author><first>Dallas</first><last>Card</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>5880–5894</pages>
       <abstract>We introduce VAMPIRE, a lightweight pretraining framework for effective text classification when data and computing resources are limited. We pretrain a unigram document model as a variational autoencoder on in-domain, unlabeled data and use its internal states as features in a downstream classifier. Empirically, we show the relative strength of VAMPIRE against computationally expensive contextual embeddings and other popular semi-supervised baselines under low resource settings. We also find that fine-tuning to in-domain data is crucial to achieving decent performance from contextual embeddings when working with limited supervision. We accompany this paper with code to pretrain and use VAMPIRE embeddings in downstream tasks.</abstract>
       <url hash="31842231">P19-1590</url>
@@ -7554,7 +7554,7 @@
     <paper id="593">
       <title>The Referential Reader: A Recurrent Entity Network for Anaphora Resolution</title>
       <author id="fei-liu-unimelb"><first>Fei</first><last>Liu</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <author><first>Jacob</first><last>Eisenstein</last></author>
       <pages>5918–5925</pages>
       <abstract>We present a new architecture for storing and accessing entity mentions during online text processing. While reading the text, entity references are identified, and may be stored by either updating or overwriting a cell in a fixed-length memory. The update operation implies coreference with the other mentions that are stored in the same cell; the overwrite operation causes these mentions to be forgotten. By encoding the memory operations as differentiable gates, it is possible to train the model end-to-end, using both a supervised anaphora resolution objective as well as a supplementary language modeling objective. Evaluation on a dataset of pronoun-name anaphora demonstrates strong performance with purely incremental text processing.</abstract>
@@ -7577,10 +7577,10 @@
     <paper id="595">
       <title><fixed-case>BAM</fixed-case>! Born-Again Multi-Task Networks for Natural Language Understanding</title>
       <author><first>Kevin</first><last>Clark</last></author>
-      <author><first>Minh-Thang</first><last>Luong</last></author>
+      <author id="minh-thang-luong"><first>Minh-Thang</first><last>Luong</last></author>
       <author><first>Urvashi</first><last>Khandelwal</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
-      <author><first>Quoc V.</first><last>Le</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
+      <author id="quoc-le"><first>Quoc V.</first><last>Le</last></author>
       <pages>5931–5937</pages>
       <abstract>It can be challenging to train multi-task neural networks that outperform or even match their single-task counterparts. To help address this, we propose using knowledge distillation where single-task models teach a multi-task model. We enhance this training with teacher annealing, a novel method that gradually transitions the model from distillation to supervised learning, helping the multi-task model surpass its single-task teachers. We evaluate our approach by multi-task fine-tuning BERT on the GLUE benchmark. Our method consistently improves over standard single-task and multi-task training.</abstract>
       <url hash="1c4523b1">P19-1595</url>
@@ -7593,7 +7593,7 @@
       <author><first>Shereen</first><last>Oraby</last></author>
       <author><first>Vrindavan</first><last>Harrison</last></author>
       <author><first>Abteen</first><last>Ebrahimi</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <pages>5938–5951</pages>
       <abstract>Neural natural language generation (NNLG) from structured meaning representations has become increasingly popular in recent years. While we have seen progress with generating syntactically correct utterances that preserve semantics, various shortcomings of NNLG systems are clear: new tasks require new training data which is not available or straightforward to acquire, and model outputs are simple and may be dull and repetitive. This paper addresses these two critical challenges in NNLG by: (1) scalably (and at no cost) creating training datasets of parallel meaning representations and reference texts with rich style markup by using data from freely available and naturally descriptive user reviews, and (2) systematically exploring how the style markup enables joint control of semantic and stylistic aspects of neural model output. We present YelpNLG, a corpus of 300,000 rich, parallel meaning representations and highly stylistically varied reference texts spanning different restaurant attributes, and describe a novel methodology that can be scalably reused to generate NLG datasets for other domains. The experiments show that the models control important aspects, including lexical choice of adjectives, output length, and sentiment, allowing the models to successfully hit multiple style targets without sacrificing semantics.</abstract>
       <url hash="45c84897">P19-1596</url>
@@ -7614,8 +7614,8 @@
     <paper id="598">
       <title><fixed-case>B</fixed-case>arack’s Wife Hillary: Using Knowledge Graphs for Fact-Aware Language Modeling</title>
       <author><first>Robert</first><last>Logan</last></author>
-      <author><first>Nelson F.</first><last>Liu</last></author>
-      <author><first>Matthew E.</first><last>Peters</last></author>
+      <author id="nelson-f-liu"><first>Nelson F.</first><last>Liu</last></author>
+      <author id="matthew-e-peters"><first>Matthew E.</first><last>Peters</last></author>
       <author><first>Matt</first><last>Gardner</last></author>
       <author><first>Sameer</first><last>Singh</last></author>
       <pages>5962–5971</pages>
@@ -7642,7 +7642,7 @@
       <author><first>Fuli</first><last>Luo</last></author>
       <author><first>Pengcheng</first><last>Yang</last></author>
       <author><first>Wei</first><last>Wu</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <author><first>Zhifang</first><last>Sui</last></author>
       <pages>5985–5996</pages>
       <abstract>The comprehensive descriptions for factual attribute-value tables, which should be accurate, informative and loyal, can be very helpful for end users to understand the structured data in this form. However previous neural generators might suffer from key attributes missing, less informative and groundless information problems, which impede the generation of high-quality comprehensive descriptions for tables. To relieve these problems, we first propose force attention (FA) method to encourage the generator to pay more attention to the uncovered attributes to avoid potential key attributes missing. Furthermore, we propose reinforcement learning for information richness to generate more informative as well as more loyal descriptions for tables. In our experiments, we utilize the widely used WIKIBIO dataset as a benchmark. Besides, we create WB-filter based on WIKIBIO to test our model in the simulated user-oriented scenarios, in which the generated descriptions should accord with particular user interests. Experimental results show that our model outperforms the state-of-the-art baselines on both automatic and human evaluation.</abstract>
@@ -7655,7 +7655,7 @@
       <author><first>Ning</first><last>Dai</last></author>
       <author><first>Jianze</first><last>Liang</last></author>
       <author><first>Xipeng</first><last>Qiu</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>5997–6007</pages>
       <abstract>Disentangling the content and style in the latent space is prevalent in unpaired text style transfer. However, two major issues exist in most of the current neural models. 1) It is difficult to completely strip the style information from the semantics for a sentence. 2) The recurrent neural network (RNN) based encoder and decoder, mediated by the latent representation, cannot well deal with the issue of the long-term dependency, resulting in poor preservation of non-stylistic semantic content. In this paper, we propose the Style Transformer, which makes no assumption about the latent representation of source sentence and equips the power of attention mechanism in Transformer to achieve better style transfer and better content preservation.</abstract>
       <url hash="0ca2c53d">P19-1601</url>
@@ -7670,8 +7670,8 @@
       <author><first>Lei</first><last>Li</last></author>
       <author><first>Lili</first><last>Mou</last></author>
       <author><first>Olga</first><last>Vechtomova</last></author>
-      <author><first>Xin-yu</first><last>Dai</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
+      <author id="xinyu-dai"><first>Xin-yu</first><last>Dai</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
       <pages>6008–6019</pages>
       <abstract>Variational auto-encoders (VAEs) are widely used in natural language generation due to the regularization of the latent space. However, generating sentences from the continuous latent space does not explicitly model the syntactic information. In this paper, we propose to generate sentences from disentangled syntactic and semantic spaces. Our proposed method explicitly models syntactic information in the VAE’s latent space by using the linearized tree sequence, leading to better performance of language generation. Additionally, the advantage of sampling in the disentangled syntactic and semantic latent spaces enables us to perform novel applications, such as the unsupervised paraphrase generation and syntax transfer generation. Experimental results show that our proposed model achieves similar or better performance in various tasks, compared with state-of-the-art related work.</abstract>
       <url hash="789f5b8d">P19-1602</url>
@@ -7684,7 +7684,7 @@
       <author><first>Damai</first><last>Dai</last></author>
       <author><first>Pengcheng</first><last>Yang</last></author>
       <author><first>Tianyu</first><last>Liu</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <author><first>Zhifang</first><last>Sui</last></author>
       <author><first>Xu</first><last>Sun</last></author>
       <pages>6020–6026</pages>
@@ -7718,8 +7718,8 @@
     <paper id="606">
       <title>Storyboarding of Recipes: Grounded Contextual Generation</title>
       <author><first>Khyathi</first><last>Chandu</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <pages>6040–6046</pages>
       <abstract>Information need of humans is essentially multimodal in nature, enabling maximum exploitation of situated context. We introduce a dataset for sequential procedural (how-to) text generation from images in cooking domain. The dataset consists of 16,441 cooking recipes with 160,479 photos associated with different steps. We setup a baseline motivated by the best performing model in terms of human evaluation for the Visual Story Telling (ViST) task. In addition, we introduce two models to incorporate high level structure learnt by a Finite State Machine (FSM) in neural sequential generation process by: (1) Scaffolding Structure in Decoder (SSiD) (2) Scaffolding Structure in Loss (SSiL). Our best performing model (SSiL) achieves a METEOR score of 0.31, which is an improvement of 0.6 over the baseline model. We also conducted human evaluation of the generated grounded recipes, which reveal that 61% found that our proposed (SSiL) model is better than the baseline model in terms of overall recipes. We also discuss analysis of the output highlighting key important NLP issues for prospective directions.</abstract>
       <url hash="ac1523e3">P19-1606</url>
@@ -7798,7 +7798,7 @@
       <title>Multi-hop Reading Comprehension through Question Decomposition and Rescoring</title>
       <author><first>Sewon</first><last>Min</last></author>
       <author><first>Victor</first><last>Zhong</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last></author>
       <pages>6097–6109</pages>
       <abstract>Multi-hop Reading Comprehension (RC) requires reasoning and aggregation across several paragraphs. We propose a system for multi-hop RC that decomposes a compositional question into simpler sub-questions that can be answered by off-the-shelf single-hop RC models. Since annotations for such decomposition are expensive, we recast subquestion generation as a span prediction problem and show that our method, trained using only 400 labeled examples, generates sub-questions that are as effective as human-authored sub-questions. We also introduce a new global rescoring approach that considers each decomposition (i.e. the sub-questions and their answers) to select the best final answer, greatly improving overall performance. Our experiments on HotpotQA show that this approach achieves the state-of-the-art results, while providing explainable evidence for its decision making in the form of sub-questions.</abstract>
@@ -7838,7 +7838,7 @@
       <author><first>Zaixiang</first><last>Zheng</last></author>
       <author><first>Jianbing</first><last>Zhang</last></author>
       <author><first>Xiaohui</first><last>Yan</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
       <pages>6130–6139</pages>
       <abstract>Relation detection is a core step in many natural language process applications including knowledge base question answering. Previous efforts show that single-fact questions could be answered with high accuracy. However, one critical problem is that current approaches only get high accuracy for questions whose relations have been seen in the training data. But for unseen relations, the performance will drop rapidly. The main reason for this problem is that the representations for unseen relations are missing. In this paper, we propose a simple mapping method, named representation adapter, to learn the representation mapping for both seen and unseen relations based on previously learned relation embedding. We employ the adversarial objective and the reconstruction objective to improve the mapping performance. We re-organize the popular SimpleQuestion dataset to reveal and evaluate the problem of detecting unseen relations. Experiments show that our method can greatly improve the performance of unseen relations while the performance for those seen part is kept comparable to the state-of-the-art.</abstract>
       <url hash="af975275">P19-1616</url>
@@ -7852,7 +7852,7 @@
       <author><first>Yanru</first><last>Qu</last></author>
       <author><first>Hao</first><last>Zhou</last></author>
       <author><first>Lei</first><last>Li</last></author>
-      <author><first>Weinan</first><last>Zhang</last></author>
+      <author id="weinan-zhang"><first>Weinan</first><last>Zhang</last></author>
       <author><first>Yong</first><last>Yu</last></author>
       <pages>6140–6150</pages>
       <abstract>Text-based question answering (TBQA) has been studied extensively in recent years. Most existing approaches focus on finding the answer to a question within a single paragraph. However, many difficult questions require multiple supporting evidence from scattered text among two or more documents. In this paper, we propose Dynamically Fused Graph Network (DFGN), a novel method to answer those questions requiring multiple scattered evidence and reasoning over them. Inspired by human’s step-by-step reasoning behavior, DFGN includes a dynamic fusion layer that starts from the entities mentioned in the given query, explores along the entity graph dynamically built from the text, and gradually finds relevant supporting entities from the given documents. We evaluate DFGN on HotpotQA, a public TBQA dataset requiring multi-hop reasoning. DFGN achieves competitive results on the public board. Furthermore, our analysis shows DFGN produces interpretable reasoning chains.</abstract>
@@ -7866,7 +7866,7 @@
       <author><first>Pasquale</first><last>Minervini</last></author>
       <author><first>Jannes</first><last>Münchmeyer</last></author>
       <author><first>Ulf</first><last>Leser</last></author>
-      <author><first>Tim</first><last>Rocktäschel</last></author>
+      <author id="tim-rocktaschel"><first>Tim</first><last>Rocktäschel</last></author>
       <pages>6151–6161</pages>
       <abstract>Rule-based models are attractive for various tasks because they inherently lead to interpretable and explainable decisions and can easily incorporate prior knowledge. However, such systems are difficult to apply to problems involving natural language, due to its large linguistic variability. In contrast, neural models can cope very well with ambiguity by learning distributed representations of words and their composition from data, but lead to models that are difficult to interpret. In this paper, we describe a model combining neural networks with logic programming in a novel manner for solving multi-hop reasoning tasks over natural language. Specifically, we propose to use an Prolog prover which we extend to utilize a similarity function over pretrained sentence encoders. We fine-tune the representations for the similarity function via backpropagation. This leads to a system that can apply rule-based reasoning to natural language, and induce domain-specific natural language rules from training data. We evaluate the proposed system on two different question answering tasks, showing that it outperforms two baselines – BiDAF (Seo et al., 2016a) and FastQA( Weissenborn et al., 2017) on a subset of the WikiHop corpus and achieves competitive results on the MedHop data set (Welbl et al., 2017).</abstract>
       <url hash="dadb47b6">P19-1618</url>
@@ -7893,7 +7893,7 @@
       <author><first>Daniel</first><last>Andor</last></author>
       <author><first>Emily</first><last>Pitler</last></author>
       <author><first>Jacob</first><last>Devlin</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <pages>6168–6173</pages>
       <abstract>We introduce a novel method of generating synthetic question answering corpora by combining models of question generation and answer extraction, and by filtering the results to ensure roundtrip consistency. By pretraining on the resulting corpora we obtain significant improvements on SQuAD2 and NQ, establishing a new state-of-the-art on the latter. Our synthetic data generation models, for both question generation and answer extraction, can be fully reproduced by finetuning a publicly available BERT model on the extractive subsets of SQuAD2 and NQ. We also describe a more powerful variant that does full sequence-to-sequence pretraining for question generation, obtaining exact match and F1 at less than 0.1% and 0.4% from human performance on SQuAD2.</abstract>
       <url hash="bbe0d6c4">P19-1620</url>
@@ -7903,7 +7903,7 @@
     </paper>
     <paper id="621">
       <title>Are Red Roses Red? Evaluating Consistency of Question-Answering Models</title>
-      <author><first>Marco Tulio</first><last>Ribeiro</last></author>
+      <author id="marco-tulio-ribeiro"><first>Marco Tulio</first><last>Ribeiro</last></author>
       <author><first>Carlos</first><last>Guestrin</last></author>
       <author><first>Sameer</first><last>Singh</last></author>
       <pages>6174–6184</pages>
@@ -7989,7 +7989,7 @@
     <paper id="629">
       <title>Discourse Representation Parsing for Sentences and Documents</title>
       <author><first>Jiangming</first><last>Liu</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <author><first>Mirella</first><last>Lapata</last></author>
       <pages>6248–6262</pages>
       <abstract>We introduce a novel semantic parsing task based on Discourse Representation Theory (DRT; Kamp and Reyle 1993). Our model operates over Discourse Representation Tree Structures which we formally define for sentences and documents. We present a general framework for parsing discourse structures of arbitrary length and granularity. We achieve this with a neural model equipped with a supervised hierarchical attention mechanism and a linguistically-motivated copy strategy. Experimental results on sentence- and document-level benchmarks show that our model outperforms competitive baselines by a wide margin.</abstract>
@@ -8098,7 +8098,7 @@
     </paper>
     <paper id="638">
       <title>Encouraging Paragraph Embeddings to Remember Sentence Identity Improves Classification</title>
-      <author><first>Tu</first><last>Vu</last></author>
+      <author id="tu-vu"><first>Tu</first><last>Vu</last></author>
       <author><first>Mohit</first><last>Iyyer</last></author>
       <pages>6331–6338</pages>
       <abstract>While paragraph embedding models are remarkably effective for downstream classification tasks, what they learn and encode into a single vector remains opaque. In this paper, we investigate a state-of-the-art paragraph embedding method proposed by Zhang et al. (2017) and discover that it cannot reliably tell whether a given sentence occurs in the input paragraph or not. We formulate a sentence content task to probe for this basic linguistic property and find that even a much simpler bag-of-words method has no trouble solving it. This result motivates us to replace the reconstruction-based objective of Zhang et al. (2017) with our sentence content probe objective in a semi-supervised setting. Despite its simplicity, our objective improves over paragraph reconstruction in terms of (1) downstream classification accuracies on benchmark datasets, (2) faster training, and (3) better generalization ability.</abstract>
@@ -8111,7 +8111,7 @@
       <title>A Multi-Task Architecture on Relevance-based Neural Query Translation</title>
       <author><first>Sheikh Muhammad</first><last>Sarwar</last></author>
       <author><first>Hamed</first><last>Bonab</last></author>
-      <author><first>James</first><last>Allan</last></author>
+      <author id="james-allan"><first>James</first><last>Allan</last></author>
       <pages>6339–6344</pages>
       <abstract>We describe a multi-task learning approach to train a Neural Machine Translation (NMT) model with a Relevance-based Auxiliary Task (RAT) for search query translation. The translation process for Cross-lingual Information Retrieval (CLIR) task is usually treated as a black box and it is performed as an independent step. However, an NMT model trained on sentence-level parallel data is not aware of the vocabulary distribution of the retrieval corpus. We address this problem and propose a multi-task learning architecture that achieves 16% improvement over a strong baseline on Italian-English query-document dataset. We show using both quantitative and qualitative analysis that our model generates balanced and precise translations with the regularization effect it achieves from multi-task learning paradigm.</abstract>
       <url hash="0441a30f">P19-1639</url>
@@ -8159,9 +8159,9 @@
     <paper id="643">
       <title>Identifying Visible Actions in Lifestyle Vlogs</title>
       <author><first>Oana</first><last>Ignat</last></author>
-      <author><first>Laura</first><last>Burdick</last></author>
+      <author id="laura-burdick"><first>Laura</first><last>Burdick</last></author>
       <author><first>Jia</first><last>Deng</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>6406–6417</pages>
       <abstract>We consider the task of identifying human actions visible in online videos. We focus on the widely spread genre of lifestyle vlogs, which consist of videos of people performing actions while verbally describing them. Our goal is to identify if actions mentioned in the speech description of a video are visually present. We construct a dataset with crowdsourced manual annotations of visible actions, and introduce a multimodal algorithm that leverages information derived from visual and linguistic clues to automatically infer which actions are visible in a video.</abstract>
       <url hash="fb315e0e">P19-1643</url>
@@ -8188,8 +8188,8 @@
     <paper id="645">
       <title>Learning to Discover, Ground and Use Words with Segmental Neural Language Models</title>
       <author><first>Kazuya</first><last>Kawakami</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
       <pages>6429–6441</pages>
       <abstract>We propose a segmental neural language model that combines the generalization power of neural networks with the ability to discover word-like units that are latent in unsegmented character sequences. In contrast to previous segmentation models that treat word segmentation as an isolated task, our model unifies word discovery, learning how words fit together to form sentences, and, by conditioning the model on visual context, how words’ meanings ground in representations of nonlinguistic modalities. Experiments show that the unconditional model learns predictive distributions better than character LSTM models, discovers words competitively with nonparametric Bayesian word segmentation models, and that modeling language conditional on visual context improves performance on both.</abstract>
       <url hash="5b0638a5">P19-1645</url>
@@ -8212,7 +8212,7 @@
     </paper>
     <paper id="647">
       <title>Symbolic Inductive Bias for Visually Grounded Learning of Spoken Language</title>
-      <author><first>Grzegorz</first><last>Chrupała</last></author>
+      <author id="grzegorz-chrupala"><first>Grzegorz</first><last>Chrupała</last></author>
       <pages>6452–6462</pages>
       <abstract>A widespread approach to processing spoken language is to first automatically transcribe it into text. An alternative is to use an end-to-end approach: recent works have proposed to learn semantic embeddings of spoken language from images with spoken captions, without an intermediate transcription step. We propose to use multitask learning to exploit existing transcribed speech within the end-to-end setting. We describe a three-task architecture which combines the objectives of matching spoken captions with corresponding images, speech with text, and text with images. We show that the addition of the speech/text task leads to substantial performance improvements on image retrieval when compared to training the speech/image task in isolation. We conjecture that this is due to a strong inductive bias transcribed speech provides to the model, and offer supporting evidence for this.</abstract>
       <url hash="a0ac629c">P19-1647</url>
@@ -8278,7 +8278,7 @@
       <author><first>Zhihao</first><last>Fan</last></author>
       <author><first>Zhongyu</first><last>Wei</last></author>
       <author><first>Siyuan</first><last>Wang</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>6514–6524</pages>
       <abstract>Image Captioning aims at generating a short description for an image. Existing research usually employs the architecture of CNN-RNN that views the generation as a sequential decision-making process and the entire dataset vocabulary is used as decoding space. They suffer from generating high frequent n-gram with irrelevant words. To tackle this problem, we propose to construct an image-grounded vocabulary, based on which, captions are generated with limitation and guidance. In specific, a novel hierarchical structure is proposed to construct the vocabulary incorporating both visual information and relations among words. For generation, we propose a word-aware RNN cell incorporating vocabulary information into the decoding process directly. Reinforce algorithm is employed to train the generator using constraint vocabulary as action space. Experimental results on MS COCO and Flickr30k show the effectiveness of our framework compared to some state-of-the-art models.</abstract>
       <url hash="135f9e5f">P19-1652</url>
@@ -8288,7 +8288,7 @@
     <paper id="653">
       <title>Distilling Translations with Visual Awareness</title>
       <author><first>Julia</first><last>Ive</last></author>
-      <author><first>Pranava</first><last>Madhyastha</last></author>
+      <author id="pranava-swaroop-madhyastha"><first>Pranava</first><last>Madhyastha</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>6525–6538</pages>
       <abstract>Previous work on multimodal machine translation has shown that visual information is only needed in very specific cases, for example in the presence of ambiguous words where the textual context is not sufficient. As a consequence, models tend to learn to ignore this information. We propose a translate-and-refine approach to this problem where images are only used by a second stage decoder. This approach is trained jointly to generate a good first draft translation and to improve over this draft by (i) making better use of the target language textual context (both left and right-side contexts) and (ii) making use of visual context. This approach leads to the state of the art results. Additionally, we show that it has the ability to recover from erroneous or missing words in the source language.</abstract>
@@ -8298,7 +8298,7 @@
     </paper>
     <paper id="654">
       <title><fixed-case>VIFIDEL</fixed-case>: Evaluating the Visual Fidelity of Image Descriptions</title>
-      <author><first>Pranava</first><last>Madhyastha</last></author>
+      <author id="pranava-swaroop-madhyastha"><first>Pranava</first><last>Madhyastha</last></author>
       <author><first>Josiah</first><last>Wang</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>6539–6550</pages>
@@ -8340,7 +8340,7 @@
       <title>Show, Describe and Conclude: On Exploiting the Structure Information of Chest <fixed-case>X</fixed-case>-ray Reports</title>
       <author><first>Baoyu</first><last>Jing</last></author>
       <author><first>Zeya</first><last>Wang</last></author>
-      <author><first>Eric</first><last>Xing</last></author>
+      <author id="eric-xing"><first>Eric</first><last>Xing</last></author>
       <pages>6570–6580</pages>
       <abstract>Chest X-Ray (CXR) images are commonly used for clinical screening and diagnosis. Automatically writing reports for these images can considerably lighten the workload of radiologists for summarizing descriptive findings and conclusive impressions. The complex structures between and within sections of the reports pose a great challenge to the automatic report generation. Specifically, the section Impression is a diagnostic summarization over the section Findings; and the appearance of normality dominates each section over that of abnormality. Existing studies rarely explore and consider this fundamental structure information. In this work, we propose a novel framework which exploits the structure information between and within report sections for generating CXR imaging reports. First, we propose a two-stage strategy that explicitly models the relationship between Findings and Impression. Second, we design a novel co-operative multi-agent system that implicitly captures the imbalanced distribution between abnormality and normality. Experiments on two CXR report datasets show that our method achieves state-of-the-art performance in terms of various evaluation metrics. Our results expose that the proposed approach is able to generate high-quality medical reports through integrating the structure information.</abstract>
       <url hash="1f863b4e">P19-1657</url>
@@ -8352,7 +8352,7 @@
       <author><first>Ting-Yao</first><last>Hsu</last></author>
       <author><first>Chieh-Yang</first><last>Huang</last></author>
       <author><first>Yen-Chia</first><last>Hsu</last></author>
-      <author><first>Ting-Hao</first><last>Huang</last></author>
+      <author id="ting-hao-huang"><first>Ting-Hao</first><last>Huang</last></author>
       <pages>6581–6586</pages>
       <abstract>We introduce the first dataset for human edits of machine-generated visual stories and explore how these collected edits may be used for the visual story post-editing task. The dataset ,VIST-Edit, includes 14,905 human-edited versions of 2,981 machine-generated visual stories. The stories were generated by two state-of-the-art visual storytelling models, each aligned to 5 human-edited versions. We establish baselines for the task, showing how a relatively small set of human edits can be leveraged to boost the performance of large visual storytelling models. We also discuss the weak correlation between automatic evaluation scores and human ratings, motivating the need for new automatic metrics.</abstract>
       <url hash="2308f3fc">P19-1658</url>
@@ -8450,7 +8450,7 @@
     <paper id="6">
       <title>Towards <fixed-case>T</fixed-case>urkish <fixed-case>A</fixed-case>bstract <fixed-case>M</fixed-case>eaning <fixed-case>R</fixed-case>epresentation</title>
       <author><first>Zahra</first><last>Azin</last></author>
-      <author><first>Gülşen</first><last>Eryiğit</last></author>
+      <author id="gulsen-eryigit"><first>Gülşen</first><last>Eryiğit</last></author>
       <pages>43–47</pages>
       <abstract>Using rooted, directed and labeled graphs, Abstract Meaning Representation (AMR) abstracts away from syntactic features such as word order and does not annotate every constituent in a sentence. AMR has been specified for English and was not supposed to be an Interlingua. However, several studies strived to overcome divergences in the annotations between English AMRs and those of their target languages by refining the annotation specification. Following this line of research, we have started to build the first Turkish AMR corpus by hand-annotating 100 sentences of the Turkish translation of the novel “The Little Prince” and comparing the results with the English AMRs available for the same corpus. The next step is to prepare the Turkish AMR annotation specification for training future annotators.</abstract>
       <url hash="bdaa78be">P19-2006</url>
@@ -8489,7 +8489,7 @@
       <author><first>Nina</first><last>Hosseini-Kivanani</last></author>
       <author><first>Juan Camilo</first><last>Vásquez-Correa</last></author>
       <author><first>Manfred</first><last>Stede</last></author>
-      <author><first>Elmar</first><last>Nöth</last></author>
+      <author id="elmar-noth"><first>Elmar</first><last>Nöth</last></author>
       <pages>74–80</pages>
       <abstract>Speech deficits are common symptoms amongParkinson’s Disease (PD) patients. The automatic assessment of speech signals is promising for the evaluation of the neurological state and the speech quality of the patients. Recently, progress has been made in applying machine learning and computational methods to automatically evaluate the speech of PD patients. In the present study, we plan to analyze the speech signals of PD patients and healthy control (HC) subjects in three different languages: German, Spanish, and Czech, with the aim to identify biomarkers to discriminate between PD patients and HC subjects and to evaluate the neurological state of the patients. Therefore, the main contribution of this study is the automatic classification of PD patients and HC subjects in different languages with focusing on phonation, articulation, and prosody. We will focus on an intelligibility analysis based on automatic speech recognition systems trained on these three languages. This is one of the first studies done that considers the evaluation of the speech of PD patients in different languages. The purpose of this research proposal is to build a model that can discriminate PD and HC subjects even when the language used for train and test is different.</abstract>
       <url hash="cf21cac9">P19-2010</url>
@@ -8498,7 +8498,7 @@
     </paper>
     <paper id="11">
       <title>Natural Language Generation: Recently Learned Lessons, Directions for Semantic Representation-based Approaches, and the Case of <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese Language</title>
-      <author><first>Marco Antonio</first><last>Sobrevilla Cabezudo</last></author>
+      <author id="marco-antonio-sobrevilla-cabezudo"><first>Marco Antonio</first><last>Sobrevilla Cabezudo</last></author>
       <author><first>Thiago</first><last>Pardo</last></author>
       <pages>81–88</pages>
       <abstract>This paper presents a more recent literature review on Natural Language Generation. In particular, we highlight the efforts for Brazilian Portuguese in order to show the available resources and the existent approaches for this language. We also focus on the approaches for generation from semantic representations (emphasizing the Abstract Meaning Representation formalism) as well as their advantages and limitations, including possible future directions.</abstract>
@@ -8537,7 +8537,7 @@
       <title>Paraphrases as Foreign Languages in Multilingual Neural Machine Translation</title>
       <author><first>Zhong</first><last>Zhou</last></author>
       <author><first>Matthias</first><last>Sperber</last></author>
-      <author><first>Alexander</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alexander</first><last>Waibel</last></author>
       <pages>113–122</pages>
       <abstract>Paraphrases, rewordings of the same semantic meaning, are useful for improving generalization and translation. Unlike previous works that only explore paraphrases at the word or phrase level, we use different translations of the whole training data that are consistent in structure as paraphrases at the corpus level. We treat paraphrases as foreign languages, tag source sentences with paraphrase labels, and train on parallel paraphrases in the style of multilingual Neural Machine Translation (NMT). Our multi-paraphrase NMT that trains only on two languages outperforms the multilingual baselines. Adding paraphrases improves the rare word translation and increases entropy and diversity in lexical choice. Adding the source paraphrases boosts performance better than adding the target ones, while adding both lifts performance further. We achieve a BLEU score of 57.2 for French-to-English translation using 24 corpus-level paraphrases of the Bible, which outperforms the multilingual baselines and is +34.7 above the single-source single-target NMT baseline.</abstract>
       <url hash="5ac03988">P19-2015</url>
@@ -8558,8 +8558,8 @@
     </paper>
     <paper id="17">
       <title>Unsupervised Pretraining for Neural Machine Translation Using Elastic Weight Consolidation</title>
-      <author><first>Dušan</first><last>Variš</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="dusan-varis"><first>Dušan</first><last>Variš</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>130–135</pages>
       <abstract>This work presents our ongoing research of unsupervised pretraining in neural machine translation (NMT). In our method, we initialize the weights of the encoder and decoder with two language models that are trained with monolingual data and then fine-tune the model on parallel data using Elastic Weight Consolidation (EWC) to avoid forgetting of the original language modeling task. We compare the regularization by EWC with the previous work that focuses on regularization by language modeling objectives. The positive result is that using EWC with the decoder achieves BLEU scores similar to the previous work. However, the model converges 2-3 times faster and does not require the original unlabeled training data during the fine-tuning stage. In contrast, the regularization using EWC is less effective if the original and new tasks are not closely related. We show that initializing the bidirectional NMT encoder with a left-to-right language model and forcing the model to remember the original left-to-right language modeling task limits the learning capacity of the encoder for the whole bidirectional context.</abstract>
       <url hash="38fa1224">P19-2017</url>
@@ -8603,7 +8603,7 @@
     <paper id="21">
       <title>From Brain Space to Distributional Space: The Perilous Journeys of f<fixed-case>MRI</fixed-case> Decoding</title>
       <author><first>Gosse</first><last>Minnema</last></author>
-      <author><first>Aurélie</first><last>Herbelot</last></author>
+      <author id="aurelie-herbelot"><first>Aurélie</first><last>Herbelot</last></author>
       <pages>155–161</pages>
       <abstract>Recent work in cognitive neuroscience has introduced models for predicting distributional word meaning representations from brain imaging data. Such models have great potential, but the quality of their predictions has not yet been thoroughly evaluated from a computational linguistics point of view. Due to the limited size of available brain imaging datasets, standard quality metrics (e.g. similarity judgments and analogies) cannot be used. Instead, we investigate the use of several alternative measures for evaluating the predicted distributional space against a corpus-derived distributional space. We show that a state-of-the-art decoder, while performing impressively on metrics that are commonly used in cognitive neuroscience, performs unexpectedly poorly on our metrics. To address this, we propose strategies for improving the model’s performance. Despite returning promising results, our experiments also demonstrate that much work remains to be done before distributional representations can reliably be predicted from brain data.</abstract>
       <url hash="15cdf899">P19-2021</url>
@@ -8614,7 +8614,7 @@
       <title>Towards Incremental Learning of Word Embeddings Using Context Informativeness</title>
       <author><first>Alexandre</first><last>Kabbach</last></author>
       <author><first>Kristina</first><last>Gulordava</last></author>
-      <author><first>Aurélie</first><last>Herbelot</last></author>
+      <author id="aurelie-herbelot"><first>Aurélie</first><last>Herbelot</last></author>
       <pages>162–168</pages>
       <abstract>In this paper, we investigate the task of learning word embeddings from very sparse data in an incremental, cognitively-plausible way. We focus on the notion of ‘informativeness’, that is, the idea that some content is more valuable to the learning process than other. We further highlight the challenges of online learning and argue that previous systems fall short of implementing incrementality. Concretely, we incorporate informativeness in a previously proposed model of nonce learning, using it for context selection and learning rate modulation. We test our system on the task of learning new words from definitions, as well as on the task of learning new words from potentially uninformative contexts. We demonstrate that informativeness is crucial to obtaining state-of-the-art performance in a truly incremental setup.</abstract>
       <url hash="cb9a2089">P19-2022</url>
@@ -8647,7 +8647,7 @@
       <author><first>Vamshi Krishna</first><last>Srirangam</last></author>
       <author><first>Appidi Abhinav</first><last>Reddy</last></author>
       <author><first>Vinay</first><last>Singh</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>183–189</pages>
       <abstract>Named Entity Recognition(NER) is one of the important tasks in Natural Language Processing(NLP) and also is a subtask of Information Extraction. In this paper we present our work on NER in Telugu-English code-mixed social media data. Code-Mixing, a progeny of multilingualism is a way in which multilingual people express themselves on social media by using linguistics units from different languages within a sentence or speech context. Entity Extraction from social media data such as tweets(twitter) is in general difficult due to its informal nature, code-mixed data further complicates the problem due to its informal, unstructured and incomplete information. We present a Telugu-English code-mixed corpus with the corresponding named entity tags. The named entities used to tag data are Person(‘Per’), Organization(‘Org’) and Location(‘Loc’). We experimented with the machine learning models Conditional Random Fields(CRFs), Decision Trees and BiLSTMs on our corpus which resulted in a F1-score of 0.96, 0.94 and 0.95 respectively.</abstract>
       <url hash="b62dbad3">P19-2025</url>
@@ -8658,7 +8658,7 @@
       <title>Joint Learning of Named Entity Recognition and Entity Linking</title>
       <author><first>Pedro Henrique</first><last>Martins</last></author>
       <author><first>Zita</first><last>Marinho</last></author>
-      <author><first>André F. T.</first><last>Martins</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
       <pages>190–196</pages>
       <abstract>Named entity recognition (NER) and entity linking (EL) are two fundamentally related tasks, since in order to perform EL, first the mentions to entities have to be detected. However, most entity linking approaches disregard the mention detection part, assuming that the correct mentions have been previously detected. In this paper, we perform joint learning of NER and EL to leverage their relatedness and obtain a more robust and generalisable system. For that, we introduce a model inspired by the Stack-LSTM approach. We observe that, in fact, doing multi-task learning of NER and EL improves the performance in both tasks when comparing with models trained with individual objectives. Furthermore, we achieve results competitive with the state-of-the-art in both NER and EL.</abstract>
       <url hash="a27d410d">P19-2026</url>
@@ -8741,8 +8741,8 @@
     <paper id="33">
       <title>From Bilingual to Multilingual Neural Machine Translation by Incremental Training</title>
       <author><first>Carlos</first><last>Escolano</last></author>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
-      <author><first>José A. R.</first><last>Fonollosa</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="jose-a-r-fonollosa"><first>José A. R.</first><last>Fonollosa</last></author>
       <pages>236–242</pages>
       <abstract>Multilingual Neural Machine Translation approaches are based on the use of task specific models and the addition of one more language can only be done by retraining the whole system. In this work, we propose a new training schedule that allows the system to scale to more languages without modification of the previous components based on joint training and language-independent encoder/decoder modules allowing for zero-shot translation. This work in progress shows close results to state-of-the-art in the WMT task.</abstract>
       <url hash="64755f04">P19-2033</url>
@@ -8787,7 +8787,7 @@
       <title>Normalizing Non-canonical <fixed-case>T</fixed-case>urkish Texts Using Machine Translation Approaches</title>
       <author><first>Talha</first><last>Çolakoğlu</last></author>
       <author><first>Umut</first><last>Sulubacak</last></author>
-      <author><first>Ahmet Cüneyd</first><last>Tantuğ</last></author>
+      <author id="ahmet-cuneyd-tantug"><first>Ahmet Cüneyd</first><last>Tantuğ</last></author>
       <pages>267–272</pages>
       <abstract>With the growth of the social web, user-generated text data has reached unprecedented sizes. Non-canonical text normalization provides a way to exploit this as a practical source of training data for language processing systems. The state of the art in Turkish text normalization is composed of a token level pipeline of modules, heavily dependent on external linguistic resources and manually defined rules. Instead, we propose a fully automated, context-aware machine translation approach with fewer stages of processing. Experiments with various implementations of our approach show that we are able to surpass the current best-performing system by a large margin.</abstract>
       <url hash="2ee11481">P19-2037</url>
@@ -8799,7 +8799,7 @@
       <author><first>Arijit</first><last>Ghosh Chowdhury</last></author>
       <author><first>Aniket</first><last>Didolkar</last></author>
       <author><first>Ramit</first><last>Sawhney</last></author>
-      <author><first>Rajiv Ratn</first><last>Shah</last></author>
+      <author id="rajiv-shah"><first>Rajiv Ratn</first><last>Shah</last></author>
       <pages>273–280</pages>
       <abstract>The rapid widespread of social media has lead to some undesirable consequences like the rapid increase of hateful content and offensive language. Religious Hate Speech, in particular, often leads to unrest and sometimes aggravates to violence against people on the basis of their religious affiliations. The richness of the Arabic morphology and the limited available resources makes this task especially challenging. The current state-of-the-art approaches to detect hate speech in Arabic rely entirely on textual (lexical and semantic) cues. Our proposed methodology contends that leveraging Community-Interaction can better help us profile hate speech content on social media. Our proposed ARHNet (Arabic Religious Hate Speech Net) model incorporates both Arabic Word Embeddings and Social Network Graphs for the detection of religious hate speech.</abstract>
       <url hash="77edfdc0">P19-2038</url>
@@ -8812,7 +8812,7 @@
       <author><first>Rohan</first><last>Mishra</last></author>
       <author><first>Pradyumna Prakhar</first><last>Sinha</last></author>
       <author><first>Ramit</first><last>Sawhney</last></author>
-      <author><first>Rajiv Ratn</first><last>Shah</last></author>
+      <author id="rajiv-shah"><first>Rajiv Ratn</first><last>Shah</last></author>
       <pages>281–287</pages>
       <abstract>Analyzing polarities and sentiments inherent in political speeches and debates poses an important problem today. This experiment aims to address this issue by analyzing publicly-available Hansard transcripts of the debates conducted in the UK Parliament. Our proposed approach, which uses community-based graph information to augment hand-crafted features based on topic modeling and emotion detection on debate transcripts, currently surpasses the benchmark results on the same dataset. Such sentiment classification systems could prove to be of great use in today’s politically turbulent times, for public knowledge of politicians’ stands on various relevant issues proves vital for good governance and citizenship. The experiments also demonstrate that continuous feature representations learned from graphs can improve performance on sentiment classification tasks significantly.</abstract>
       <url hash="b8ca077f">P19-2039</url>
@@ -8835,7 +8835,7 @@
       <author><first>Hicham</first><last>El Boukkouri</last></author>
       <author><first>Olivier</first><last>Ferret</last></author>
       <author><first>Thomas</first><last>Lavergne</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <pages>295–301</pages>
       <abstract>Using pre-trained word embeddings in conjunction with Deep Learning models has become the “de facto” approach in Natural Language Processing (NLP). While this usually yields satisfactory results, off-the-shelf word embeddings tend to perform poorly on texts from specialized domains such as clinical reports. Moreover, training specialized word representations from scratch is often either impossible or ineffective due to the lack of large enough in-domain data. In this work, we focus on the clinical domain for which we study embedding strategies that rely on general-domain resources only. We show that by combining off-the-shelf contextual embeddings (ELMo) with static word2vec embeddings trained on a small in-domain corpus built from the task data, we manage to reach and sometimes outperform representations learned from a large corpus in the medical domain.</abstract>
       <url hash="51426742">P19-2041</url>
@@ -8865,7 +8865,7 @@
       <title>Improving Neural Entity Disambiguation with Graph Embeddings</title>
       <author><first>Özge</first><last>Sevgili</last></author>
       <author><first>Alexander</first><last>Panchenko</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>315–322</pages>
       <abstract>Entity Disambiguation (ED) is the task of linking an ambiguous entity mention to a corresponding entry in a knowledge base. Current methods have mostly focused on unstructured text data to learn representations of entities, however, there is structured information in the knowledge base itself that should be useful to disambiguate entities. In this work, we propose a method that uses graph embeddings for integrating structured information from the knowledge base with unstructured information from text-based representations. Our experiments confirm that graph embeddings trained on a graph of hyperlinks between Wikipedia articles improve the performances of simple feed-forward neural ED model and a state-of-the-art neural ED system.</abstract>
       <url hash="eb34789d">P19-2044</url>
@@ -8876,7 +8876,7 @@
       <title>Hierarchical Multi-label Classification of Text with Capsule Networks</title>
       <author><first>Rami</first><last>Aly</last></author>
       <author><first>Steffen</first><last>Remus</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>323–330</pages>
       <abstract>Capsule networks have been shown to demonstrate good performance on structured data in the area of visual inference. In this paper we apply and compare simple shallow capsule networks for hierarchical multi-label text classification and show that they can perform superior to other neural networks, such as CNNs and LSTMs, and non-neural network architectures such as SVMs. For our experiments, we use the established Web of Science (WOS) dataset and introduce a new real-world scenario dataset, the BlurbGenreCollection (BGC). Our results confirm the hypothesis that capsule networks are especially advantageous for rare events and structurally diverse categories, which we attribute to their ability to combine latent encoded information.</abstract>
       <url hash="07bf9a10">P19-2045</url>
@@ -8886,7 +8886,7 @@
     <paper id="46">
       <title>Convolutional Neural Networks for Financial Text Regression</title>
       <author><first>Neşat</first><last>Dereli</last></author>
-      <author><first>Murat</first><last>Saraclar</last></author>
+      <author id="murat-saraclar"><first>Murat</first><last>Saraclar</last></author>
       <pages>331–337</pages>
       <abstract>Forecasting financial volatility of a publicly-traded company from its annual reports has been previously defined as a text regression problem. Recent studies use a manually labeled lexicon to filter the annual reports by keeping sentiment words only. In order to remove the lexicon dependency without decreasing the performance, we replace bag-of-words model word features by word embedding vectors. Using word vectors increases the number of parameters. Considering the increase in number of parameters and excessive lengths of annual reports, a convolutional neural network model is proposed and transfer learning is applied. Experimental results show that the convolutional neural network model provides more accurate volatility predictions than lexicon based models.</abstract>
       <url hash="77e56af3">P19-2046</url>
@@ -8920,7 +8920,7 @@
     <paper id="49">
       <title>Scheduled Sampling for Transformers</title>
       <author><first>Tsvetomila</first><last>Mihaylova</last></author>
-      <author><first>André F. T.</first><last>Martins</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
       <pages>351–356</pages>
       <abstract>Scheduled sampling is a technique for avoiding one of the known problems in sequence-to-sequence generation: exposure bias. It consists of feeding the model a mix of the teacher forced embeddings and the model predictions from the previous step in training time. The technique has been used for improving model performance with recurrent neural networks (RNN). In the Transformer model, unlike the RNN, the generation of a new word attends to the full sentence generated so far, not only to the last word, and it is not straightforward to apply the scheduled sampling technique. We propose some structural changes to allow scheduled sampling to be applied to Transformer architectures, via a two-pass decoding strategy. Experiments on two language pairs achieve performance close to a teacher-forcing baseline and show that this technique is promising for further exploration.</abstract>
       <url hash="7d6e900e">P19-2049</url>
@@ -8930,7 +8930,7 @@
     <paper id="50">
       <title><fixed-case>BREAKING</fixed-case>! Presenting Fake News Corpus for Automated Fact Checking</title>
       <author><first>Archita</first><last>Pathak</last></author>
-      <author><first>Rohini</first><last>Srihari</last></author>
+      <author id="rohini-k-srihari"><first>Rohini</first><last>Srihari</last></author>
       <pages>357–362</pages>
       <abstract>Popular fake news articles spread faster than mainstream articles on the same topic which renders manual fact checking inefficient. At the same time, creating tools for automatic detection is as challenging due to lack of dataset containing articles which present fake or manipulated stories as compelling facts. In this paper, we introduce manually verified corpus of compelling fake and questionable news articles on the USA politics, containing around 700 articles from Aug-Nov, 2016. We present various analyses on this corpus and finally implement classification model based on linguistic features. This work is still in progress as we plan to extend the dataset in the future and use it for our approach towards automated fake news detection.</abstract>
       <doi>10.18653/v1/P19-2050</doi>
@@ -8954,7 +8954,7 @@
       <author><first>Yash Kumar</first><last>Lal</last></author>
       <author><first>Vaibhav</first><last>Kumar</last></author>
       <author><first>Mrinal</first><last>Dhar</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <author><first>Philipp</first><last>Koehn</last></author>
       <pages>371–377</pages>
       <abstract>Code-mixing is the phenomenon of mixing the vocabulary and syntax of multiple languages in the same sentence. It is an increasingly common occurrence in today’s multilingual society and poses a big challenge when encountered in different downstream tasks. In this paper, we present a hybrid architecture for the task of Sentiment Analysis of English-Hindi code-mixed data. Our method consists of three components, each seeking to alleviate different issues. We first generate subword level representations for the sentences using a CNN architecture. The generated representations are used as inputs to a Dual Encoder Network which consists of two different BiLSTMs - the Collective and Specific Encoder. The Collective Encoder captures the overall sentiment of the sentence, while the Specific Encoder utilizes an attention mechanism in order to focus on individual sentiment-bearing sub-words. This, combined with a Feature Network consisting of orthographic features and specially trained word embeddings, achieves state-of-the-art results - 83.54% accuracy and 0.827 F1 score - on a benchmark dataset.</abstract>
@@ -9057,7 +9057,7 @@
     <meta>
       <booktitle>Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics: System Demonstrations</booktitle>
       <url hash="a4023c8e">P19-3</url>
-      <editor><first>Marta R.</first><last>Costa-jussà</last></editor>
+      <editor id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></editor>
       <editor><first>Enrique</first><last>Alfonseca</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Florence, Italy</address>
@@ -9096,9 +9096,9 @@
       <author><first>Wolfgang</first><last>Jentner</last></author>
       <author><first>Fabian</first><last>Sperrle</last></author>
       <author><first>Rita</first><last>Sevastjanova</last></author>
-      <author><first>Annette</first><last>Hautli-Janisz</last></author>
+      <author id="annette-hautli"><first>Annette</first><last>Hautli-Janisz</last></author>
       <author><first>Miriam</first><last>Butt</last></author>
-      <author><first>Daniel</first><last>Keim</last></author>
+      <author id="daniel-keim"><first>Daniel</first><last>Keim</last></author>
       <pages>13–18</pages>
       <abstract>We present a modular framework for the rapid-prototyping of linguistic, web-based, visual analytics applications. Our framework gives developers access to a rich set of machine learning and natural language processing steps, through encapsulating them into micro-services and combining them into a computational pipeline. This processing pipeline is auto-configured based on the requirements of the visualization front-end, making the linguistic processing and visualization design, detached independent development tasks. This paper describes the constellation and modality of our framework, which continues to support the efficient development of various human-in-the-loop, linguistic visual analytics research techniques and applications.</abstract>
       <url hash="d022bde8">P19-3003</url>
@@ -9178,7 +9178,7 @@
     </paper>
     <paper id="9">
       <title>An adaptable task-oriented dialog system for stand-alone embedded devices</title>
-      <author><first>Long</first><last>Duong</last></author>
+      <author id="long-duong"><first>Long</first><last>Duong</last></author>
       <author><first>Vu Cong Duy</first><last>Hoang</last></author>
       <author><first>Tuyen Quang</first><last>Pham</last></author>
       <author><first>Yu-Heng</first><last>Hong</last></author>
@@ -9196,9 +9196,9 @@
     </paper>
     <paper id="10">
       <title><fixed-case>A</fixed-case>lpaca<fixed-case>T</fixed-case>ag: An Active Learning-based Crowd Annotation Framework for Sequence Tagging</title>
-      <author><first>Bill Yuchen</first><last>Lin</last></author>
+      <author id="bill-yuchen-lin"><first>Bill Yuchen</first><last>Lin</last></author>
       <author><first>Dong-Ho</first><last>Lee</last></author>
-      <author><first>Frank F.</first><last>Xu</last></author>
+      <author id="frank-f-xu"><first>Frank F.</first><last>Xu</last></author>
       <author><first>Ouyu</first><last>Lan</last></author>
       <author><first>Xiang</first><last>Ren</last></author>
       <pages>58–63</pages>
@@ -9209,7 +9209,7 @@
     </paper>
     <paper id="11">
       <title><fixed-case>C</fixed-case>onv<fixed-case>L</fixed-case>ab: Multi-Domain End-to-End Dialog System Platform</title>
-      <author><first>Sungjin</first><last>Lee</last></author>
+      <author id="sungjin-lee"><first>Sungjin</first><last>Lee</last></author>
       <author><first>Qi</first><last>Zhu</last></author>
       <author><first>Ryuichi</first><last>Takanobu</last></author>
       <author><first>Zheng</first><last>Zhang</last></author>
@@ -9229,12 +9229,12 @@
     <paper id="12">
       <title>Demonstration of a Neural Machine Translation System with Online Learning for Translators</title>
       <author><first>Miguel</first><last>Domingo</last></author>
-      <author><first>Mercedes</first><last>García-Martínez</last></author>
+      <author id="mercedes-garcia-martinez"><first>Mercedes</first><last>García-Martínez</last></author>
       <author><first>Amando</first><last>Estela Pastor</last></author>
       <author><first>Laurent</first><last>Bié</last></author>
       <author><first>Alexander</first><last>Helle</last></author>
-      <author><first>Álvaro</first><last>Peris</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="alvaro-peris"><first>Álvaro</first><last>Peris</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <author><first>Manuel</first><last>Herranz Pérez</last></author>
       <pages>70–74</pages>
       <abstract>We present a demonstration of our system, which implements online learning for neural machine translation in a production environment. These techniques allow the system to continuously learn from the corrections provided by the translators. We implemented an end-to-end platform integrating our machine translation servers to one of the most common user interfaces for professional translators: SDL Trados Studio. We pretend to save post-editing effort as the machine is continuously learning from its mistakes and adapting the models to a specific domain or user style.</abstract>
@@ -9245,7 +9245,7 @@
     <paper id="13">
       <title><fixed-case>FASTD</fixed-case>ial: Abstracting Dialogue Policies for Fast Development of Task Oriented Agents</title>
       <author><first>Serra Sinem</first><last>Tekiroglu</last></author>
-      <author><first>Bernardo</first><last>Magnini</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
       <author><first>Marco</first><last>Guerini</last></author>
       <pages>75–80</pages>
       <abstract>We present a novel abstraction framework called FASTDial for designing task oriented dialogue agents, built on top of the OpenDial toolkit. This framework is meant to facilitate prototyping and development of dialogue systems from scratch also by non tech savvy especially when limited training data is available. To this end, we use a generic and simple frame-slots data-structure with pre-defined dialogue policies that allows for fast design and implementation at the price of some flexibility reduction. Moreover, it allows for minimizing programming effort and domain expert training time, by hiding away many implementation details. We provide a system demonstration screencast video in the following link: <url>https://vimeo.com/329840716</url></abstract>
@@ -9255,8 +9255,8 @@
     </paper>
     <paper id="14">
       <title>A Neural, Interactive-predictive System for Multimodal Sequence to Sequence Tasks</title>
-      <author><first>Álvaro</first><last>Peris</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="alvaro-peris"><first>Álvaro</first><last>Peris</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <pages>81–86</pages>
       <abstract>We present a demonstration of a neural interactive-predictive system for tackling multimodal sequence to sequence tasks. The system generates text predictions to different sequence to sequence tasks: machine translation, image and video captioning. These predictions are revised by a human agent, who introduces corrections in the form of characters. The system reacts to each correction, providing alternative hypotheses, compelling with the feedback provided by the user. The final objective is to reduce the human effort required during this correction process. This system is implemented following a client-server architecture. For accessing the system, we developed a website, which communicates with the neural model, hosted in a local server. From this website, the different tasks can be tackled following the interactive–predictive framework. We open-source all the code developed for building this system. The demonstration in hosted in <url>http://casmacat.prhlt.upv.es/interactive-seq2seq</url>.</abstract>
       <url hash="9fe6153d">P19-3014</url>
@@ -9315,12 +9315,12 @@
     </paper>
     <paper id="18">
       <title>An Environment for Relational Annotation of Political Debates</title>
-      <author><first>Andre</first><last>Blessing</last></author>
+      <author id="andre-blessing"><first>Andre</first><last>Blessing</last></author>
       <author><first>Nico</first><last>Blokker</last></author>
       <author><first>Sebastian</first><last>Haunss</last></author>
       <author><first>Jonas</first><last>Kuhn</last></author>
       <author><first>Gabriella</first><last>Lapesa</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <pages>105–110</pages>
       <abstract>This paper describes the MARDY corpus annotation environment developed for a collaboration between political science and computational linguistics. The tool realizes the complete workflow necessary for annotating a large newspaper text collection with rich information about claims (demands) raised by politicians and other actors, including claim and actor spans, relations, and polarities. In addition to the annotation GUI, the tool supports the identification of relevant documents, text pre-processing, user management, integration of external knowledge bases, annotation comparison and merging, statistical analysis, and the incorporation of machine learning models as “pseudo-annotators”.</abstract>
       <url hash="ee87cf86">P19-3018</url>
@@ -9331,7 +9331,7 @@
       <title><fixed-case>GLTR</fixed-case>: Statistical Detection and Visualization of Generated Text</title>
       <author><first>Sebastian</first><last>Gehrmann</last></author>
       <author><first>Hendrik</first><last>Strobelt</last></author>
-      <author><first>Alexander</first><last>Rush</last></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last></author>
       <pages>111–116</pages>
       <abstract>The rapid improvement of language models has raised the specter of abuse of text generation systems. This progress motivates the development of simple methods for detecting generated text that can be used by non-experts. In this work, we introduce GLTR, a tool to support humans in detecting whether a text was generated by a model. GLTR applies a suite of baseline statistical methods that can detect generation artifacts across multiple sampling schemes. In a human-subjects study, we show that the annotation scheme provided by GLTR improves the human detection-rate of fake text from 54% to 72% without any prior training. GLTR is open-source and publicly deployed, and has already been widely used to detect generated outputs.</abstract>
       <url hash="09e60d79">P19-3019</url>
@@ -9340,11 +9340,11 @@
     </paper>
     <paper id="20">
       <title><fixed-case>O</fixed-case>pen<fixed-case>K</fixed-case>iwi: An Open Source Framework for Quality Estimation</title>
-      <author><first>Fabio</first><last>Kepler</last></author>
+      <author id="fabio-kepler"><first>Fabio</first><last>Kepler</last></author>
       <author><first>Jonay</first><last>Trénous</last></author>
       <author><first>Marcos</first><last>Treviso</last></author>
       <author><first>Miguel</first><last>Vera</last></author>
-      <author><first>André F. T.</first><last>Martins</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
       <pages>117–122</pages>
       <abstract>We introduce OpenKiwi, a Pytorch-based open source framework for translation quality estimation. OpenKiwi supports training and testing of word-level and sentence-level quality estimation systems, implementing the winning systems of the WMT 2015–18 quality estimation campaigns. We benchmark OpenKiwi on two datasets from WMT 2018 (English-German SMT and NMT), yielding state-of-the-art performance on the word-level tasks and near state-of-the-art in the sentence-level tasks.</abstract>
       <url hash="cd2b919f">P19-3020</url>
@@ -9365,7 +9365,7 @@
       <author><first>Michel</first><last>Galley</last></author>
       <author><first>Chris</first><last>Brockett</last></author>
       <author><first>Tulasi</first><last>Menon</last></author>
-      <author><first>Bill</first><last>Dolan</last></author>
+      <author id="william-b-dolan"><first>Bill</first><last>Dolan</last></author>
       <pages>123–128</pages>
       <abstract>The Intelligent Conversation Engine: Code and Pre-trained Systems (Microsoft Icecaps) is an upcoming open-source natural language processing repository. Icecaps wraps TensorFlow functionality in a modular component-based architecture, presenting an intuitive and flexible paradigm for constructing sophisticated learning setups. Capabilities include multitask learning between models with shared parameters, upgraded language model decoding features, a range of built-in architectures, and a user-friendly data processing pipeline. The system is targeted toward conversational tasks, exploring diverse response generation, coherence, and knowledge grounding. Icecaps also provides pre-trained conversational models that can be either used directly or loaded for fine-tuning or bootstrapping other models; these models power an online demo of our framework.</abstract>
       <url hash="723560f0">P19-3021</url>
@@ -9390,7 +9390,7 @@
       <author><first>Yunyao</first><last>Li</last></author>
       <author><first>Eser</first><last>Kandogan</last></author>
       <author><first>Yiwei</first><last>Yang</last></author>
-      <author><first>Walter</first><last>Lasecki</last></author>
+      <author id="walter-lasecki"><first>Walter</first><last>Lasecki</last></author>
       <pages>135–140</pages>
       <abstract>While the role of humans is increasingly recognized in machine learning community, representation of and interaction with models in current human-in-the-loop machine learning (HITL-ML) approaches are too low-level and far-removed from human’s conceptual models. We demonstrate HEIDL, a prototype HITL-ML system that exposes the machine-learned model through high-level, explainable linguistic expressions formed of predicates representing semantic structure of text. In HEIDL, human’s role is elevated from simply evaluating model predictions to interpreting and even updating the model logic directly by enabling interaction with rule predicates themselves. Raising the currency of interaction to such semantic levels calls for new interaction paradigms between humans and machines that result in improved productivity for text analytics model development process. Moreover, by involving humans in the process, the human-machine co-created models generalize better to unseen data as domain experts are able to instill their expertise by extrapolating from what has been learned by automated algorithms from few labelled data.</abstract>
       <url hash="a3343b77">P19-3023</url>
@@ -9403,7 +9403,7 @@
       <author><first>Beata</first><last>Beigman Klebanov</last></author>
       <author><first>Anastassia</first><last>Loukina</last></author>
       <author><first>Binod</first><last>Gyawali</last></author>
-      <author><first>Patrick</first><last>Lange</last></author>
+      <author id="patrick-l-lange"><first>Patrick</first><last>Lange</last></author>
       <author><first>John</first><last>Sabatini</last></author>
       <author><first>Michael</first><last>Flor</last></author>
       <pages>141–146</pages>
@@ -9454,7 +9454,7 @@
       <author><first>Xiaodan</first><last>Liang</last></author>
       <author><first>Wanrong</first><last>Zhu</last></author>
       <author><first>Devendra</first><last>Sachan</last></author>
-      <author><first>Eric</first><last>Xing</last></author>
+      <author id="eric-xing"><first>Eric</first><last>Xing</last></author>
       <pages>159–164</pages>
       <abstract>We introduce Texar, an open-source toolkit aiming to support the broad set of text generation tasks that transform any inputs into natural language, such as machine translation, summarization, dialog, content manipulation, and so forth. With the design goals of modularity, versatility, and extensibility in mind, Texar extracts common patterns underlying the diverse tasks and methodologies, creates a library of highly reusable modules and functionalities, and allows arbitrary model architectures and algorithmic paradigms. In Texar, model architecture, inference, and learning processes are properly decomposed. Modules at a high concept level can be freely assembled or plugged in/swapped out. Texar is thus particularly suitable for researchers and practitioners to do fast prototyping and experimentation. The versatile toolkit also fosters technique sharing across different text generation tasks. Texar supports both TensorFlow and PyTorch, and is released under Apache License 2.0 at <url>https://www.texar.io</url>.</abstract>
       <url hash="705ffb5d">P19-3027</url>
@@ -9500,7 +9500,7 @@
       <author><first>Philipp</first><last>Heidenreich</last></author>
       <author><first>Alexander</first><last>Bondarenko</last></author>
       <author><first>Matthias</first><last>Hagen</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <author><first>Alexander</first><last>Panchenko</last></author>
       <pages>195–200</pages>
       <abstract>We present TARGER, an open source neural argument mining framework for tagging arguments in free input texts and for keyword-based retrieval of arguments from an argument-tagged web-scale corpus. The currently available models are pre-trained on three recent argument mining datasets and enable the use of neural argument mining without any reproducibility effort on the user’s side. The open source code ensures portability to other domains and use cases.</abstract>
@@ -9524,7 +9524,7 @@
       <author><first>Wen-Bin</first><last>Han</last></author>
       <author><first>Jhih-Jie</first><last>Chen</last></author>
       <author><first>Chingyu</first><last>Yang</last></author>
-      <author><first>Jason</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason</first><last>Chang</last></author>
       <pages>207–212</pages>
       <abstract>We introduce a method for generating suggestions on a given sentence for improving the proficiency level. In our approach, the sentence is transformed into a sequence of grammatical elements aimed at providing suggestions of more advanced grammar elements based on originals. The method involves parsing the sentence, identifying grammatical elements, and ranking related elements to recommend a higher level of grammatical element. We present a prototype tutoring system, Level-Up, that applies the method to English learners’ essays in order to assist them in writing and reading. Evaluation on a set of essays shows that our method does assist user in writing.</abstract>
       <url hash="cb9e295c">P19-3033</url>
@@ -9541,7 +9541,7 @@
       <author><first>Kai-Wen</first><last>Tuan</last></author>
       <author><first>Chung-Ting</first><last>Tsai</last></author>
       <author><first>Wen-Bin</first><last>Han</last></author>
-      <author><first>Jason</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason</first><last>Chang</last></author>
       <pages>213–218</pages>
       <abstract>We introduce a system aimed at improving and expanding second language learners’ English vocabulary. In addition to word definitions, we provide rich lexical information such as collocations and grammar patterns for target words. We present Linggle Booster that takes an article, identifies target vocabulary, provides lexical information, and generates a quiz on target words. Linggle Booster also links named-entity to corresponding Wikipedia pages. Evaluation on a set of target words shows that the method have reasonably good performance in terms of generating useful and information for learning vocabulary.</abstract>
       <url hash="2973f004">P19-3034</url>
@@ -9553,7 +9553,7 @@
     <meta>
       <booktitle>Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics: Tutorial Abstracts</booktitle>
       <url hash="8caff722">P19-4</url>
-      <editor><first>Preslav</first><last>Nakov</last></editor>
+      <editor id="preslav-nakov"><first>Preslav</first><last>Nakov</last></editor>
       <editor><first>Alexis</first><last>Palmer</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Florence, Italy</address>
@@ -9567,7 +9567,7 @@
     </frontmatter>
     <paper id="1">
       <title>Latent Structure Models for Natural Language Processing</title>
-      <author><first>André F. T.</first><last>Martins</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
       <author><first>Tsvetomila</first><last>Mihaylova</last></author>
       <author><first>Nikita</first><last>Nangia</last></author>
       <author><first>Vlad</first><last>Niculae</last></author>
@@ -9582,7 +9582,7 @@
       <title>Graph-Based Meaning Representations: Design and Processing</title>
       <author><first>Alexander</first><last>Koller</last></author>
       <author><first>Stephan</first><last>Oepen</last></author>
-      <author><first>Weiwei</first><last>Sun</last></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last></author>
       <pages>6–11</pages>
       <abstract>This tutorial is on representing and processing sentence meaning in the form of labeled directed graphs. The tutorial will (a) briefly review relevant background in formal and linguistic semantics; (b) semi-formally define a unified abstract view on different flavors of semantic graphs and associated terminology; (c) survey common frameworks for graph-based meaning representation and available graph banks; and (d) offer a technical overview of a representative selection of different parsing approaches.</abstract>
       <url hash="db6fb488">P19-4002</url>
@@ -9591,9 +9591,9 @@
     </paper>
     <paper id="3">
       <title>Discourse Analysis and Its Applications</title>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <author><first>Giuseppe</first><last>Carenini</last></author>
-      <author><first>Raymond</first><last>Ng</last></author>
+      <author id="raymond-ng"><first>Raymond</first><last>Ng</last></author>
       <author><first>Gabriel</first><last>Murray</last></author>
       <pages>12–17</pages>
       <abstract>Discourse processing is a suite of Natural Language Processing (NLP) tasks to uncover linguistic structures from texts at several levels, which can support many downstream applications. This involves identifying the topic structure, the coherence structure, the coreference structure, and the conversation structure for conversational discourse. Taken together, these structures can inform text summarization, machine translation, essay scoring, sentiment analysis, information extraction, question answering, and thread recovery. The tutorial starts with an overview of basic concepts in discourse analysis – monologue vs. conversation, synchronous vs. asynchronous conversation, and key linguistic structures in discourse analysis. We also give an overview of linguistic structures and corresponding discourse analysis tasks that discourse researchers are generally interested in, as well as key applications on which these discourse structures have an impact.</abstract>
@@ -9605,7 +9605,7 @@
       <title>Computational Analysis of Political Texts: Bridging Research Efforts Across Communities</title>
       <author><first>Goran</first><last>Glavaš</last></author>
       <author><first>Federico</first><last>Nanni</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <pages>18–23</pages>
       <abstract>In the last twenty years, political scientists started adopting and developing natural language processing (NLP) methods more actively in order to exploit text as an additional source of data in their analyses. Over the last decade the usage of computational methods for analysis of political texts has drastically expanded in scope, allowing for a sustained growth of the text-as-data community in political science. In political science, NLP methods have been extensively used for a number of analyses types and tasks, including inferring policy position of actors from textual evidence, detecting topics in political texts, and analyzing stylistic aspects of political texts (e.g., assessing the role of language ambiguity in framing the political agenda). Just like in numerous other domains, much of the work on computational analysis of political texts has been enabled and facilitated by the development of resources such as, the topically coded electoral programmes (e.g., the Manifesto Corpus) or topically coded legislative texts (e.g., the Comparative Agenda Project). Political scientists created resources and used available NLP methods to process textual data largely in isolation from the NLP community. At the same time, NLP researchers addressed closely related tasks such as election prediction, ideology classification, and stance detection. In other words, these two communities have been largely agnostic of one another, with NLP researchers mostly unaware of interesting applications in political science and political scientists not applying cutting-edge NLP methodology to their problems. The main goal of this tutorial is to systematize and analyze the body of research work on political texts from both communities. We aim to provide a gentle, all-round introduction to methods and tasks related to computational analysis of political texts. Our vision is to bring the two research communities closer to each other and contribute to faster and more significant developments in this interdisciplinary research area.</abstract>
       <url hash="fc3c89ff">P19-4004</url>
@@ -9614,7 +9614,7 @@
     </paper>
     <paper id="5">
       <title><fixed-case>W</fixed-case>ikipedia as a Resource for Text Analysis and Retrieval</title>
-      <author><first>Marius</first><last>Pasca</last></author>
+      <author id="marius-pasca"><first>Marius</first><last>Pasca</last></author>
       <pages>24</pages>
       <abstract>This tutorial examines the role of Wikipedia in tasks related to text analysis and retrieval. Text analysis tasks, which take advantage of Wikipedia, include coreference resolution, word sense and entity disambiguation and information extraction. In information retrieval, a better understanding of the structure and meaning of queries helps in matching queries against documents, clustering search results, answer and entity retrieval and retrieving knowledge panels for queries asking about popular entities.</abstract>
       <url hash="3fbb1180">P19-4005</url>
@@ -9623,7 +9623,7 @@
     </paper>
     <paper id="6">
       <title>Deep <fixed-case>B</fixed-case>ayesian Natural Language Processing</title>
-      <author><first>Jen-Tzung</first><last>Chien</last></author>
+      <author id="jen-tzung-chien"><first>Jen-Tzung</first><last>Chien</last></author>
       <pages>25–30</pages>
       <abstract>This introductory tutorial addresses the advances in deep Bayesian learning for natural language with ubiquitous applications ranging from speech recognition to document summarization, text classification, text segmentation, information extraction, image caption generation, sentence generation, dialogue control, sentiment classification, recommendation system, question answering and machine translation, to name a few. Traditionally, “deep learning” is taken to be a learning process where the inference or optimization is based on the real-valued deterministic model. The “semantic structure” in words, sentences, entities, actions and documents drawn from a large vocabulary may not be well expressed or correctly optimized in mathematical logic or computer programs. The “distribution function” in discrete or continuous latent variable model for natural language may not be properly decomposed or estimated. This tutorial addresses the fundamentals of statistical models and neural networks, and focus on a series of advanced Bayesian models and deep models including hierarchical Dirichlet process, Chinese restaurant process, hierarchical Pitman-Yor process, Indian buffet process, recurrent neural network, long short-term memory, sequence-to-sequence model, variational auto-encoder, generative adversarial network, attention mechanism, memory-augmented neural network, skip neural network, stochastic neural network, predictive state neural network and policy neural network. We present how these models are connected and why they work for a variety of applications on symbolic and complex patterns in natural language. The variational inference and sampling method are formulated to tackle the optimization for complicated models. The word and sentence embeddings, clustering and co-clustering are merged with linguistic and semantic constraints. A series of case studies and domain applications are presented to tackle different issues in deep Bayesian processing, learning and understanding. At last, we will point out a number of directions and outlooks for future studies.</abstract>
       <url hash="5c4f9604">P19-4006</url>
@@ -9633,7 +9633,7 @@
     <paper id="7">
       <title>Unsupervised Cross-Lingual Representation Learning</title>
       <author><first>Sebastian</first><last>Ruder</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <author><first>Ivan</first><last>Vulić</last></author>
       <pages>31–38</pages>
       <abstract>In this tutorial, we provide a comprehensive survey of the exciting recent work on cutting-edge weakly-supervised and unsupervised cross-lingual word representations. After providing a brief history of supervised cross-lingual word representations, we focus on: 1) how to induce weakly-supervised and unsupervised cross-lingual word representations in truly resource-poor settings where bilingual supervision cannot be guaranteed; 2) critical examinations of different training conditions and requirements under which unsupervised algorithms can and cannot work effectively; 3) more robust methods for distant language pairs that can mitigate instability issues and low performance for distant language pairs; 4) how to comprehensively evaluate such representations; and 5) diverse applications that benefit from cross-lingual word representations (e.g., MT, dialogue, cross-lingual sequence labeling and structured prediction applications, cross-lingual IR).</abstract>
@@ -9644,7 +9644,7 @@
     <paper id="8">
       <title>Advances in Argument Mining</title>
       <author><first>Katarzyna</first><last>Budzynska</last></author>
-      <author><first>Chris</first><last>Reed</last></author>
+      <author id="chris-reed"><first>Chris</first><last>Reed</last></author>
       <pages>39–42</pages>
       <abstract>This course aims to introduce students to an exciting and dynamic area that has witnessed remarkable growth over the past 36 months. Argument mining builds on opinion mining, sentiment analysis and related to tasks to automatically extract not just *what* people think, but *why* they hold the opinions they do. From being largely beyond the state of the art barely five years ago, there are now many hundreds of papers on the topic, millions of dollars of commercial and research investment, and the 6th ACL workshop on the topic will be in Florence in 2019. The tutors have delivered tutorials on argument mining at ACL 2016, at IJCAI 2016 and at ESSLLI 2017; for ACL 2019, we have developed a tutorial that provides a synthesis of the major advances in the area over the past three years.</abstract>
       <url hash="5b83e062">P19-4008</url>
diff --git a/data/xml/P79.xml b/data/xml/P79.xml
index 9b8533ef75..b6658aef5a 100644
--- a/data/xml/P79.xml
+++ b/data/xml/P79.xml
@@ -23,7 +23,7 @@
     </paper>
     <paper id="2">
       <title>Towards a Self-Extending Parser</title>
-      <author><first>Jaime G.</first><last>Carbonell</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime G.</first><last>Carbonell</last></author>
       <doi>10.3115/982163.982166</doi>
       <pages>3–7</pages>
       <url hash="7af5e8c7">P79-1002</url>
@@ -47,7 +47,7 @@
     </paper>
     <paper id="5">
       <title>Toward a Computational Theory of Speech Perception</title>
-      <author><first>Jonathan</first><last>All</last></author>
+      <author id="jonathan-allen"><first>Jonathan</first><last>All</last></author>
       <doi>10.3115/982163.982169</doi>
       <pages>17–17</pages>
       <url hash="362d44de">P79-1005</url>
@@ -55,8 +55,8 @@
     </paper>
     <paper id="6">
       <title>Ungrammaticality and Extra-Grammaticality in Natural Language Understanding Systems</title>
-      <author><first>Stan C.</first><last>Kwasny</last></author>
-      <author><first>Norman K.</first><last>Sondheimer</last></author>
+      <author id="stan-c-kwasny"><first>Stan C.</first><last>Kwasny</last></author>
+      <author id="norman-k-sondheimer"><first>Norman K.</first><last>Sondheimer</last></author>
       <doi>10.3115/982163.982170</doi>
       <pages>19–23</pages>
       <url hash="dae87e77">P79-1006</url>
@@ -64,7 +64,7 @@
     </paper>
     <paper id="7">
       <title>Generalized Augmented Transitiom Network Grammars for Generation From Semantic Networks</title>
-      <author><first>Stuart C.</first><last>Shapiro</last></author>
+      <author id="stuart-c-shapiro"><first>Stuart C.</first><last>Shapiro</last></author>
       <doi>10.3115/982163.982171</doi>
       <pages>25–29</pages>
       <url hash="a635d68e">P79-1007</url>
@@ -72,7 +72,7 @@
     </paper>
     <paper id="8">
       <title>Knowledge Organization and Application: Brief Comments on Papers in the Session</title>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
       <doi>10.3115/982163.982173</doi>
       <pages>31–31</pages>
       <url hash="12302a79">P79-1008</url>
@@ -105,7 +105,7 @@
     <paper id="12">
       <title>A Snapshot of <fixed-case>KDS</fixed-case>: A Knowledge Delivery System</title>
       <author><first>James A.</first><last>Moore</last></author>
-      <author><first>William C.</first><last>Mann</last></author>
+      <author id="william-c-mann"><first>William C.</first><last>Mann</last></author>
       <doi>10.3115/982163.982177</doi>
       <pages>51–52</pages>
       <url hash="f002c179">P79-1012</url>
@@ -129,7 +129,7 @@
     </paper>
     <paper id="15">
       <title>Discourse: Codes and Clues in Contexts</title>
-      <author><first>Jane J.</first><last>Robinson</last></author>
+      <author id="jane-j-robinson"><first>Jane J.</first><last>Robinson</last></author>
       <doi>10.3115/982163.982181</doi>
       <pages>65–65</pages>
       <url hash="5237d2b3">P79-1015</url>
@@ -137,7 +137,7 @@
     </paper>
     <paper id="16">
       <title>Paraphrasing Using Given and New Information in a Question-Answer System</title>
-      <author><first>Kathleen R.</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen R.</first><last>McKeown</last></author>
       <doi>10.3115/982163.982182</doi>
       <pages>67–72</pages>
       <url hash="c1bc81f0">P79-1016</url>
@@ -153,7 +153,7 @@
     </paper>
     <paper id="18">
       <title>The Role Of Focussing in Interpretation of Pronouns</title>
-      <author><first>Candace L.</first><last>Sidner</last></author>
+      <author id="candace-l-sidner"><first>Candace L.</first><last>Sidner</last></author>
       <doi>10.3115/982163.982184</doi>
       <pages>77–78</pages>
       <url hash="7aabef01">P79-1018</url>
@@ -170,7 +170,7 @@
     </paper>
     <paper id="20">
       <title>Design for Dialogue Comprehension</title>
-      <author><first>William C.</first><last>Mann</last></author>
+      <author id="william-c-mann"><first>William C.</first><last>Mann</last></author>
       <doi>10.3115/982163.982186</doi>
       <pages>83–84</pages>
       <url hash="366390e7">P79-1020</url>
@@ -178,7 +178,7 @@
     </paper>
     <paper id="21">
       <title>Plans, Inference, and Indirect Speech Acts</title>
-      <author><first>James F.</first><last>Allen</last></author>
+      <author id="james-allen"><first>James F.</first><last>Allen</last></author>
       <doi>10.3115/982163.982187</doi>
       <pages>85–87</pages>
       <url hash="23366bac">P79-1021</url>
@@ -203,7 +203,7 @@
     <paper id="24">
       <title>An Application of Automated Language Understanding Techniques to the Generation of Data Base Elements</title>
       <author><first>Georgette</first><last>Silva</last></author>
-      <author><first>Christine</first><last>Montgomery</last></author>
+      <author id="christine-a-montgomery"><first>Christine</first><last>Montgomery</last></author>
       <author><first>Don</first><last>Dwiggins</last></author>
       <doi>10.3115/982163.982191</doi>
       <pages>95–97</pages>
@@ -211,7 +211,7 @@
     </paper>
     <paper id="25">
       <title>Response Generation in Question - Answering Systems</title>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <doi>10.3115/982163.982192</doi>
       <pages>99–101</pages>
       <url hash="9e76384e">P79-1025</url>
@@ -228,7 +228,7 @@
     <paper id="27">
       <title>Prospects for Computer-Assisted Dialect Adaption</title>
       <author><first>David J.</first><last>Weber</last></author>
-      <author><first>William C.</first><last>Mann</last></author>
+      <author id="william-c-mann"><first>William C.</first><last>Mann</last></author>
       <doi>10.3115/982163.982194</doi>
       <pages>109–110</pages>
       <url hash="2980ccfa">P79-1027</url>
diff --git a/data/xml/P80.xml b/data/xml/P80.xml
index 8c6cdbd4e3..cfae10ef5d 100644
--- a/data/xml/P80.xml
+++ b/data/xml/P80.xml
@@ -23,7 +23,7 @@
     </paper>
     <paper id="2">
       <title>Understanding Scene Descriptions as Event Simulations</title>
-      <author><first>David L.</first><last>Waltz</last></author>
+      <author id="david-l-waltz"><first>David L.</first><last>Waltz</last></author>
       <doi>10.3115/981436.981439</doi>
       <pages>7–11</pages>
       <url hash="267f84a5">P80-1002</url>
@@ -39,7 +39,7 @@
     </paper>
     <paper id="4">
       <title>Metaphor - A Key to Extensible Semantic Analysis</title>
-      <author><first>Jaime G.</first><last>Carbonell</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime G.</first><last>Carbonell</last></author>
       <doi>10.3115/981436.981441</doi>
       <pages>17–21</pages>
       <url hash="c6324f45">P80-1004</url>
@@ -55,7 +55,7 @@
     </paper>
     <paper id="6">
       <title>Interactive Discourse: Influence of Problem Context Panel Chair’s Introduction</title>
-      <author><first>Barbara</first><last>Grosz</last></author>
+      <author id="barbara-j-grosz"><first>Barbara</first><last>Grosz</last></author>
       <doi>10.3115/981436.981444</doi>
       <pages>25–25</pages>
       <url hash="17b3be88">P80-1006</url>
@@ -71,7 +71,7 @@
     </paper>
     <paper id="8">
       <title>Signalling the Interpretation of Indirect Speech Acts</title>
-      <author><first>Philip R.</first><last>Cohen</last></author>
+      <author id="philip-r-cohen"><first>Philip R.</first><last>Cohen</last></author>
       <doi>10.3115/981436.981446</doi>
       <pages>29–30</pages>
       <url hash="ec6a3db4">P80-1008</url>
@@ -79,7 +79,7 @@
     </paper>
     <paper id="9">
       <title>Parasession on Topics in Interactive Discourse Influence of the Problem Context</title>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
       <doi>10.3115/981436.981447</doi>
       <pages>31–33</pages>
       <url hash="2695382f">P80-1009</url>
@@ -104,7 +104,7 @@
     </paper>
     <paper id="12">
       <title>Phrase Structure Trees Bear More Fruit Than You Would Have Thought</title>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
       <author><first>Leon S.</first><last>Levy</last></author>
       <doi>10.3115/981436.981451</doi>
       <pages>41–42</pages>
@@ -121,7 +121,7 @@
     </paper>
     <paper id="14">
       <title>Computational Analogues of Constraints on Grammars: A Model of Syntactic Acquisition</title>
-      <author><first>Robert Cregar</first><last>Berwick</last></author>
+      <author id="robert-c-berwick"><first>Robert Cregar</first><last>Berwick</last></author>
       <doi>10.3115/981436.981453</doi>
       <pages>49–53</pages>
       <url hash="d61c63e3">P80-1014</url>
@@ -129,7 +129,7 @@
     </paper>
     <paper id="15">
       <title>A Linear-time Model of Language Production: some psychological implications (extended abstract)</title>
-      <author><first>David D.</first><last>McDonald</last></author>
+      <author id="david-d-mcdonald"><first>David D.</first><last>McDonald</last></author>
       <doi>10.3115/981436.981454</doi>
       <pages>55–57</pages>
       <url hash="f10f1412">P80-1015</url>
@@ -137,7 +137,7 @@
     </paper>
     <paper id="16">
       <title>Problem Solving Applied to Language Generation</title>
-      <author><first>Douglas E.</first><last>Appelt</last></author>
+      <author id="douglas-appelt"><first>Douglas E.</first><last>Appelt</last></author>
       <doi>10.3115/981436.981455</doi>
       <pages>59–63</pages>
       <url hash="2fc12819">P80-1016</url>
@@ -145,7 +145,7 @@
     </paper>
     <paper id="17">
       <title>Interactive Discourse: Influence of the Social Context: Panel Chair’s Introduction</title>
-      <author><first>Jerry R.</first><last>Hobbs</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry R.</first><last>Hobbs</last></author>
       <doi>10.3115/981436.981457</doi>
       <pages>65–66</pages>
       <url hash="5762f186">P80-1017</url>
@@ -187,7 +187,7 @@
     </paper>
     <paper id="22">
       <title>The Computer as an Active Communication Medium</title>
-      <author><first>John C.</first><last>Thomas</last></author>
+      <author id="john-c-thomas"><first>John C.</first><last>Thomas</last></author>
       <doi>10.3115/981436.981462</doi>
       <pages>83–86</pages>
       <url hash="e46890d6">P80-1022</url>
@@ -211,7 +211,7 @@
     </paper>
     <paper id="25">
       <title>If The Parser Fails</title>
-      <author><first>Ralph M.</first><last>Weischedel</last></author>
+      <author id="ralph-weischedel"><first>Ralph M.</first><last>Weischedel</last></author>
       <author><first>John E.</first><last>Black</last></author>
       <doi>10.3115/981436.981466</doi>
       <pages>95–95</pages>
@@ -238,7 +238,7 @@
     </paper>
     <paper id="28">
       <title>On Parsing Strategies and Closure</title>
-      <author><first>Kenneth</first><last>Church</last></author>
+      <author id="kenneth-church"><first>Kenneth</first><last>Church</last></author>
       <doi>10.3115/981436.981469</doi>
       <pages>107–111</pages>
       <url hash="ed25e061">P80-1028</url>
@@ -273,7 +273,7 @@
     </paper>
     <paper id="32">
       <title>Interactive Discourse: Looking to the Future: Panel Chair’s Introduction</title>
-      <author><first>Bonnie Lynn</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie Lynn</first><last>Webber</last></author>
       <doi>10.3115/981436.981474</doi>
       <pages>127–127</pages>
       <url hash="54d75824">P80-1032</url>
@@ -289,7 +289,7 @@
     </paper>
     <paper id="34">
       <title>Future Prospects for Computational Linguistics</title>
-      <author><first>Gary G.</first><last>Hendrix</last></author>
+      <author id="gary-g-hendrix"><first>Gary G.</first><last>Hendrix</last></author>
       <doi>10.3115/981436.981476</doi>
       <pages>131–135</pages>
       <url hash="ff7a418e">P80-1034</url>
@@ -367,7 +367,7 @@
     <paper id="43">
       <title>Real Reading Behavior</title>
       <author><first>Robert</first><last>Thibadeau</last></author>
-      <author><first>Marcel</first><last>Just</last></author>
+      <author id="marcel-adam-just"><first>Marcel</first><last>Just</last></author>
       <author><first>Patricia</first><last>Carpenter</last></author>
       <doi>10.3115/981436.981486</doi>
       <pages>159–162</pages>
diff --git a/data/xml/P81.xml b/data/xml/P81.xml
index e927152db6..6834cb1d4c 100644
--- a/data/xml/P81.xml
+++ b/data/xml/P81.xml
@@ -23,7 +23,7 @@
     </paper>
     <paper id="2">
       <title>Computational Complexity and <fixed-case>L</fixed-case>exical <fixed-case>F</fixed-case>unctional <fixed-case>G</fixed-case>rammar</title>
-      <author><first>Robert C.</first><last>Berwick</last></author>
+      <author id="robert-c-berwick"><first>Robert C.</first><last>Berwick</last></author>
       <doi>10.3115/981923.981926</doi>
       <pages>7–12</pages>
       <url hash="7964cbf3">P81-1002</url>
@@ -31,7 +31,7 @@
     </paper>
     <paper id="3">
       <title>Corepresentational Grammar and Parsing <fixed-case>E</fixed-case>nglish Comparatives</title>
-      <author><first>Karen</first><last>Ryan</last></author>
+      <author id="karen-l-ryan"><first>Karen</first><last>Ryan</last></author>
       <doi>10.3115/981923.981927</doi>
       <pages>13–18</pages>
       <url hash="9a8447b6">P81-1003</url>
@@ -48,7 +48,7 @@
     </paper>
     <paper id="5">
       <title><fixed-case>PHONY</fixed-case>: A Heuristic Phonological Analyzer</title>
-      <author><first>Lee A.</first><last>Becker</last></author>
+      <author id="lee-becker"><first>Lee A.</first><last>Becker</last></author>
       <doi>10.3115/981923.981929</doi>
       <pages>23–27</pages>
       <url hash="3631ad98">P81-1005</url>
@@ -56,7 +56,7 @@
     </paper>
     <paper id="6">
       <title>Evaluation of Natural Language Interfaces to Database Systems: A Panel Discussion</title>
-      <author><first>Norman K.</first><last>Sondheimer</last></author>
+      <author id="norman-k-sondheimer"><first>Norman K.</first><last>Sondheimer</last></author>
       <doi>10.3115/981923.981931</doi>
       <pages>29–29</pages>
       <url hash="40f7600a">P81-1006</url>
@@ -72,7 +72,7 @@
     </paper>
     <paper id="8">
       <title>Selective Planning of Interface Evaluations</title>
-      <author><first>William C.</first><last>Mann</last></author>
+      <author id="william-c-mann"><first>William C.</first><last>Mann</last></author>
       <doi>10.3115/981923.981933</doi>
       <pages>33–34</pages>
       <url hash="2d8be381">P81-1008</url>
@@ -88,7 +88,7 @@
     </paper>
     <paper id="10">
       <title>What Makes Evaluation Hard?</title>
-      <author><first>Harry</first><last>Tennant</last></author>
+      <author id="harry-tennant"><first>Harry</first><last>Tennant</last></author>
       <doi>10.3115/981923.981935</doi>
       <pages>37–38</pages>
       <url hash="40b5eded">P81-1010</url>
@@ -104,7 +104,7 @@
     </paper>
     <paper id="12">
       <title>Two Discourse Generators</title>
-      <author><first>William C.</first><last>Mann</last></author>
+      <author id="william-c-mann"><first>William C.</first><last>Mann</last></author>
       <doi>10.3115/981923.981938</doi>
       <pages>43–47</pages>
       <url hash="bb5c1318">P81-1012</url>
@@ -112,7 +112,7 @@
     </paper>
     <paper id="13">
       <title>A Grammar and a Lexicon for a Text-Production System</title>
-      <author><first>Christian M.I.M.</first><last>Matthiessen</last></author>
+      <author id="christian-m-i-m-matthiessen"><first>Christian M.I.M.</first><last>Matthiessen</last></author>
       <doi>10.3115/981923.981939</doi>
       <pages>49–55</pages>
       <url hash="84fc2a90">P81-1013</url>
@@ -120,7 +120,7 @@
     </paper>
     <paper id="14">
       <title>Language Production: the Source of the Dictionary</title>
-      <author><first>David D.</first><last>McDonald</last></author>
+      <author id="david-d-mcdonald"><first>David D.</first><last>McDonald</last></author>
       <doi>10.3115/981923.981940</doi>
       <pages>57–62</pages>
       <url hash="d793eab3">P81-1014</url>
@@ -144,7 +144,7 @@
     </paper>
     <paper id="17">
       <title>What’s Necessary to Hide?: Modeling Action Verbs</title>
-      <author><first>James F.</first><last>Allen</last></author>
+      <author id="james-allen"><first>James F.</first><last>Allen</last></author>
       <doi>10.3115/981923.981944</doi>
       <pages>77–81</pages>
       <url hash="3ae11cde">P81-1017</url>
@@ -152,7 +152,7 @@
     </paper>
     <paper id="18">
       <title>A Rule-based Conversation Participant</title>
-      <author><first>Robert E.</first><last>Frederking</last></author>
+      <author id="robert-frederking"><first>Robert E.</first><last>Frederking</last></author>
       <doi>10.3115/981923.981945</doi>
       <pages>83–87</pages>
       <url hash="5d7fd221">P81-1018</url>
@@ -168,7 +168,7 @@
     </paper>
     <paper id="20">
       <title>Perspectives on Parsing Issues</title>
-      <author><first>Jane J.</first><last>Robinson</last></author>
+      <author id="jane-j-robinson"><first>Jane J.</first><last>Robinson</last></author>
       <doi>10.3115/981923.981948</doi>
       <pages>95–95</pages>
       <url hash="2658762b">P81-1020</url>
@@ -176,8 +176,8 @@
     </paper>
     <paper id="21">
       <title>Some Issues in Parsing and Natural Language Understanding</title>
-      <author><first>Robert J.</first><last>Bobrow</last></author>
-      <author><first>Bonnie L.</first><last>Webber</last></author>
+      <author id="robert-bobrow"><first>Robert J.</first><last>Bobrow</last></author>
+      <author id="bonnie-webber"><first>Bonnie L.</first><last>Webber</last></author>
       <doi>10.3115/981923.981949</doi>
       <pages>97–99</pages>
       <url hash="0b53ed01">P81-1021</url>
@@ -185,7 +185,7 @@
     </paper>
     <paper id="22">
       <title>Parsing</title>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <doi>10.3115/981923.981950</doi>
       <pages>101–101</pages>
       <url hash="2994b94e">P81-1022</url>
@@ -193,7 +193,7 @@
     </paper>
     <paper id="23">
       <title>A View of Parsing</title>
-      <author><first>Ronald M.</first><last>Kaplan</last></author>
+      <author id="ronald-m-kaplan"><first>Ronald M.</first><last>Kaplan</last></author>
       <doi>10.3115/981923.981951</doi>
       <pages>103–104</pages>
       <url hash="3ab1498c">P81-1023</url>
@@ -209,7 +209,7 @@
     </paper>
     <paper id="25">
       <title>Presupposition and Implicature in Model-Theoretic Pragmatics</title>
-      <author><first>Douglas B.</first><last>Moran</last></author>
+      <author id="douglas-b-moran"><first>Douglas B.</first><last>Moran</last></author>
       <doi>10.3115/981923.981954</doi>
       <pages>107–108</pages>
       <url hash="edb81b43">P81-1025</url>
@@ -225,7 +225,7 @@
     </paper>
     <paper id="27">
       <title>A Situation Semantics Approach to the Analysis of Speech Acts</title>
-      <author><first>David Andreoff</first><last>Evans</last></author>
+      <author id="david-a-evans"><first>David Andreoff</first><last>Evans</last></author>
       <doi>10.3115/981923.981956</doi>
       <pages>113–116</pages>
       <url hash="4179e7ca">P81-1027</url>
@@ -233,7 +233,7 @@
     </paper>
     <paper id="28">
       <title>Problems in Logical Form</title>
-      <author><first>Robert C.</first><last>Moore</last></author>
+      <author id="robert-c-moore"><first>Robert C.</first><last>Moore</last></author>
       <doi>10.3115/981923.981957</doi>
       <pages>117–124</pages>
       <url hash="8b9d7246">P81-1028</url>
@@ -241,7 +241,7 @@
     </paper>
     <paper id="29">
       <title>A Case for Rule-Driven Semantic Processing</title>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <doi>10.3115/981923.981958</doi>
       <pages>125–131</pages>
       <url hash="79c8c039">P81-1029</url>
@@ -266,7 +266,7 @@
     </paper>
     <paper id="32">
       <title>Dynamic Strategy Selection in Flexible Parsing</title>
-      <author><first>Jaime G.</first><last>Carbonell</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime G.</first><last>Carbonell</last></author>
       <author><first>Philip J.</first><last>Hayes</last></author>
       <doi>10.3115/981923.981962</doi>
       <pages>143–147</pages>
@@ -283,8 +283,8 @@
     </paper>
     <paper id="34">
       <title>Controlled Transformational Sentence Generation</title>
-      <author><first>Madeleine</first><last>Bates</last></author>
-      <author><first>Robert</first><last>Ingria</last></author>
+      <author id="madeleine-bates"><first>Madeleine</first><last>Bates</last></author>
+      <author id="robert-ingria"><first>Robert</first><last>Ingria</last></author>
       <doi>10.3115/981923.981964</doi>
       <pages>153–158</pages>
       <url hash="2b4790f2">P81-1034</url>
@@ -292,7 +292,7 @@
     </paper>
     <paper id="35">
       <title>Transportable Natural-Language Interfaces to Databases</title>
-      <author><first>Gary G.</first><last>Hendrix</last></author>
+      <author id="gary-g-hendrix"><first>Gary G.</first><last>Hendrix</last></author>
       <author><first>William H.</first><last>Lewis</last></author>
       <doi>10.3115/981923.981965</doi>
       <pages>159–165</pages>
@@ -301,7 +301,7 @@
     </paper>
     <paper id="36">
       <title>Chart Parsing and Rule Schemata in <fixed-case>PSG</fixed-case></title>
-      <author><first>Henry</first><last>Thompson</last></author>
+      <author id="henry-s-thompson"><first>Henry</first><last>Thompson</last></author>
       <doi>10.3115/981923.981966</doi>
       <pages>167–172</pages>
       <url hash="39b13ba4">P81-1036</url>
diff --git a/data/xml/P82.xml b/data/xml/P82.xml
index 0ff858a81c..14f4a6f4de 100644
--- a/data/xml/P82.xml
+++ b/data/xml/P82.xml
@@ -15,8 +15,8 @@
     </frontmatter>
     <paper id="1">
       <title>Translating <fixed-case>E</fixed-case>nglish Into Logical Form</title>
-      <author><first>Stanley J.</first><last>Rosenschein</last></author>
-      <author><first>Stuart M.</first><last>Shieber</last></author>
+      <author id="stanley-j-rosenschein"><first>Stanley J.</first><last>Rosenschein</last></author>
+      <author id="stuart-m-shieber"><first>Stuart M.</first><last>Shieber</last></author>
       <doi>10.3115/981251.981253</doi>
       <pages>1–8</pages>
       <url hash="33ae0066">P82-1001</url>
@@ -24,7 +24,7 @@
     </paper>
     <paper id="2">
       <title>Linguistic and Computational Semantics</title>
-      <author><first>Brian Cantwell</first><last>Smith</last></author>
+      <author id="brian-cantwell-smith"><first>Brian Cantwell</first><last>Smith</last></author>
       <doi>10.3115/981251.981254</doi>
       <pages>9–15</pages>
       <url hash="3c2ab9ca">P82-1002</url>
@@ -32,7 +32,7 @@
     </paper>
     <paper id="3">
       <title>The Representation of Inconsistent Information in a Dynamic Model-Theoretic Semantics</title>
-      <author><first>Douglas B.</first><last>Moran</last></author>
+      <author id="douglas-b-moran"><first>Douglas B.</first><last>Moran</last></author>
       <doi>10.3115/981251.981255</doi>
       <pages>16–18</pages>
       <url hash="84949fb4">P82-1003</url>
@@ -40,7 +40,7 @@
     </paper>
     <paper id="4">
       <title>What’s in a Semantic Network?</title>
-      <author><first>James F.</first><last>Allen</last></author>
+      <author id="james-allen"><first>James F.</first><last>Allen</last></author>
       <author><first>Alan M.</first><last>Frisch</last></author>
       <doi>10.3115/981251.981256</doi>
       <pages>19–27</pages>
@@ -49,7 +49,7 @@
     </paper>
     <paper id="5">
       <title>Dependencies of Discourse Structure on the Modality of Communication: Telephone vs. Teletype</title>
-      <author><first>Philip R.</first><last>Cohen</last></author>
+      <author id="philip-r-cohen"><first>Philip R.</first><last>Cohen</last></author>
       <author><first>Scott</first><last>Fertig</last></author>
       <author><first>Kathy</first><last>Starr</last></author>
       <doi>10.3115/981251.981258</doi>
@@ -67,7 +67,7 @@
     </paper>
     <paper id="7">
       <title>Natural-Language Access to Databases–Theoretical/Technical Issues</title>
-      <author><first>Robert C.</first><last>Moore</last></author>
+      <author id="robert-c-moore"><first>Robert C.</first><last>Moore</last></author>
       <doi>10.3115/981251.981261</doi>
       <pages>44–45</pages>
       <url hash="4a9f453a">P82-1007</url>
@@ -75,7 +75,7 @@
     </paper>
     <paper id="8">
       <title>Transportable Natural-Language Interfaces: Problems and Techniques</title>
-      <author><first>Barbara J.</first><last>Grosz</last></author>
+      <author id="barbara-j-grosz"><first>Barbara J.</first><last>Grosz</last></author>
       <doi>10.3115/981251.981262</doi>
       <pages>46–50</pages>
       <url hash="1fe2aae5">P82-1008</url>
@@ -91,7 +91,7 @@
     </paper>
     <paper id="10">
       <title><fixed-case>E</fixed-case>nglish Words and Data Bases: How to Bridge the Gap</title>
-      <author><first>Remko J. H.</first><last>Scha</last></author>
+      <author id="remko-scha"><first>Remko J. H.</first><last>Scha</last></author>
       <doi>10.3115/981251.981264</doi>
       <pages>57–59</pages>
       <url hash="15425106">P82-1010</url>
@@ -107,7 +107,7 @@
     </paper>
     <paper id="12">
       <title>Issues in Natural Language Access to Databases From a Logic Programming Perspective</title>
-      <author><first>David H. D.</first><last>Warren</last></author>
+      <author id="david-h-d-warren"><first>David H. D.</first><last>Warren</last></author>
       <doi>10.3115/981251.981266</doi>
       <pages>63–66</pages>
       <url hash="48aec762">P82-1012</url>
@@ -124,14 +124,14 @@
     </paper>
     <paper id="14">
       <title>Processing <fixed-case>E</fixed-case>nglish With a Generalized Phrase Structure Grammar</title>
-      <author><first>Jean Mark</first><last>Gawron</last></author>
+      <author id="jean-mark-gawron"><first>Jean Mark</first><last>Gawron</last></author>
       <author><first>Jonathan</first><last>King</last></author>
       <author><first>John</first><last>Lamping</last></author>
       <author><first>Egon</first><last>Loebner</last></author>
       <author><first>E. Anne</first><last>Paulson</last></author>
-      <author><first>Geoffrey K.</first><last>Pullum</last></author>
+      <author id="geoffrey-k-pullum"><first>Geoffrey K.</first><last>Pullum</last></author>
       <author><first>Ivan A.</first><last>Sag</last></author>
-      <author><first>Thomas</first><last>Wasow</last></author>
+      <author id="thomas-wasow"><first>Thomas</first><last>Wasow</last></author>
       <doi>10.3115/981251.981269</doi>
       <pages>74–81</pages>
       <url hash="9c46fc2f">P82-1014</url>
@@ -139,7 +139,7 @@
     </paper>
     <paper id="15">
       <title>Experience with an Easily Computed Metric for Ranking Alternative Parses</title>
-      <author><first>George E.</first><last>Heidorn</last></author>
+      <author id="george-e-heidorn"><first>George E.</first><last>Heidorn</last></author>
       <doi>10.3115/981251.981270</doi>
       <pages>82–84</pages>
       <url hash="2fbf7bff">P82-1015</url>
@@ -147,8 +147,8 @@
     </paper>
     <paper id="16">
       <title>An Improved Heuristic for Ellipsis Processing</title>
-      <author><first>Ralph M.</first><last>Weischedel</last></author>
-      <author><first>Norman K.</first><last>Sondheimer</last></author>
+      <author id="ralph-weischedel"><first>Ralph M.</first><last>Weischedel</last></author>
+      <author id="norman-k-sondheimer"><first>Norman K.</first><last>Sondheimer</last></author>
       <doi>10.3115/981251.981271</doi>
       <pages>85–88</pages>
       <url hash="15d0e932">P82-1016</url>
@@ -204,7 +204,7 @@
     </paper>
     <paper id="23">
       <title>Twenty Years of Reflections</title>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
       <doi>10.3115/981251.981279</doi>
       <pages>102–102</pages>
       <url hash="a23de7aa">P82-1023</url>
@@ -220,7 +220,7 @@
     </paper>
     <paper id="25">
       <title>Reflections on Twenty Years of the <fixed-case>ACL</fixed-case></title>
-      <author><first>Jonathan</first><last>Allen</last></author>
+      <author id="jonathan-allen"><first>Jonathan</first><last>Allen</last></author>
       <doi>10.3115/981251.981281</doi>
       <pages>104–106</pages>
       <url hash="127866e6">P82-1025</url>
@@ -228,7 +228,7 @@
     </paper>
     <paper id="26">
       <title>On the Present</title>
-      <author><first>Norman K.</first><last>Sondheimer</last></author>
+      <author id="norman-k-sondheimer"><first>Norman K.</first><last>Sondheimer</last></author>
       <doi>10.3115/981251.981282</doi>
       <pages>107–107</pages>
       <url hash="7d9432a6">P82-1026</url>
@@ -236,7 +236,7 @@
     </paper>
     <paper id="27">
       <title>Planning Natural Language Referring Expressions</title>
-      <author><first>Douglas E.</first><last>Appelt</last></author>
+      <author id="douglas-appelt"><first>Douglas E.</first><last>Appelt</last></author>
       <doi>10.3115/981251.981284</doi>
       <pages>108–112</pages>
       <url hash="5da1e408">P82-1027</url>
@@ -244,7 +244,7 @@
     </paper>
     <paper id="28">
       <title>The Text System for Natural Language Generation: An Overview</title>
-      <author><first>Kathleen R.</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen R.</first><last>McKeown</last></author>
       <doi>10.3115/981251.981285</doi>
       <pages>113–120</pages>
       <url hash="f6b7fbc7">P82-1028</url>
@@ -252,7 +252,7 @@
     </paper>
     <paper id="29">
       <title>Augmenting a Database Knowledge Representation for Natural Language Generation</title>
-      <author><first>Kathleen F.</first><last>McCoy</last></author>
+      <author id="kathleen-f-mccoy"><first>Kathleen F.</first><last>McCoy</last></author>
       <doi>10.3115/981251.981286</doi>
       <pages>121–128</pages>
       <url hash="2802bf18">P82-1029</url>
@@ -261,7 +261,7 @@
     <paper id="30">
       <title>Salience: The Key to the Selection Problem in Natural Language Generation</title>
       <author><first>E. Jeffrey</first><last>Conklin</last></author>
-      <author><first>David D.</first><last>McDonald</last></author>
+      <author id="david-d-mcdonald"><first>David D.</first><last>McDonald</last></author>
       <doi>10.3115/981251.981287</doi>
       <pages>129–135</pages>
       <url hash="bb5d7735">P82-1030</url>
@@ -269,8 +269,8 @@
     </paper>
     <paper id="31">
       <title>A Knowledge Engineering Approach to Natural Language Understanding</title>
-      <author><first>Stuart C.</first><last>Shapiro</last></author>
-      <author><first>Jeannette G.</first><last>Neal</last></author>
+      <author id="stuart-c-shapiro"><first>Stuart C.</first><last>Shapiro</last></author>
+      <author id="jeannette-g-neal"><first>Jeannette G.</first><last>Neal</last></author>
       <doi>10.3115/981251.981289</doi>
       <pages>136–144</pages>
       <url hash="2ff2c941">P82-1031</url>
@@ -286,7 +286,7 @@
     </paper>
     <paper id="33">
       <title>Building Non-Normative Systems - The Search for Robustness: An Overview</title>
-      <author><first>Mitchell P.</first><last>Marcus</last></author>
+      <author id="mitch-marcus"><first>Mitchell P.</first><last>Marcus</last></author>
       <doi>10.3115/981251.981292</doi>
       <pages>152–152</pages>
       <url hash="d39e9fad">P82-1033</url>
@@ -294,7 +294,7 @@
     </paper>
     <paper id="34">
       <title>Design Dimensions for Non-Normative Understanding Systems</title>
-      <author><first>Robert J.</first><last>Bobrow</last></author>
+      <author id="robert-bobrow"><first>Robert J.</first><last>Bobrow</last></author>
       <doi>10.3115/981251.981293</doi>
       <pages>153–156</pages>
       <url hash="3f932f8a">P82-1034</url>
@@ -310,8 +310,8 @@
     </paper>
     <paper id="36">
       <title>On the Linguistic Character of Non-Standard Input</title>
-      <author><first>Anthony S.</first><last>Kroch</last></author>
-      <author><first>Donald</first><last>Hindle</last></author>
+      <author id="anthony-kroch"><first>Anthony S.</first><last>Kroch</last></author>
+      <author id="donald-hindle"><first>Donald</first><last>Hindle</last></author>
       <doi>10.3115/981251.981295</doi>
       <pages>161–163</pages>
       <url hash="72583029">P82-1036</url>
@@ -319,7 +319,7 @@
     </paper>
     <paper id="37">
       <title>Ill-Formed and Non-Standard Language Problems</title>
-      <author><first>Stan</first><last>Kwasny</last></author>
+      <author id="stan-c-kwasny"><first>Stan</first><last>Kwasny</last></author>
       <doi>10.3115/981251.981296</doi>
       <pages>164–166</pages>
       <url hash="e705060f">P82-1037</url>
@@ -327,7 +327,7 @@
     </paper>
     <paper id="38">
       <title>“Natural Language Texts Are Not Necessarily Grammatical and Unambiguous or <fixed-case>E</fixed-case>ven Complete.”</title>
-      <author><first>Lance A.</first><last>Miller</last></author>
+      <author id="lance-a-miller"><first>Lance A.</first><last>Miller</last></author>
       <doi>10.3115/981251.981297</doi>
       <pages>167–168</pages>
       <url hash="7ff69f2c">P82-1038</url>
diff --git a/data/xml/P83.xml b/data/xml/P83.xml
index 5326f7b923..0d6ee6cf5f 100644
--- a/data/xml/P83.xml
+++ b/data/xml/P83.xml
@@ -15,7 +15,7 @@
     </frontmatter>
     <paper id="1">
       <title>Context-Freeness and the Computer Processing of Human Languages</title>
-      <author><first>Geoffrey K.</first><last>Pullum</last></author>
+      <author id="geoffrey-k-pullum"><first>Geoffrey K.</first><last>Pullum</last></author>
       <doi>10.3115/981311.981313</doi>
       <pages>1–6</pages>
       <url hash="11c09a7d">P83-1001</url>
@@ -23,7 +23,7 @@
     </paper>
     <paper id="2">
       <title>Factoring Recursion and Dependencies: An Aspect of <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars (<fixed-case>TAG</fixed-case>) and a Comparison of Some Formal Properties of <fixed-case>TAG</fixed-case>s, <fixed-case>GPSG</fixed-case>s, <fixed-case>PLG</fixed-case>s, and <fixed-case>LPG</fixed-case>s</title>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
       <doi>10.3115/981311.981314</doi>
       <pages>7–15</pages>
       <url hash="349db0d3">P83-1002</url>
@@ -31,7 +31,7 @@
     </paper>
     <paper id="3">
       <title>Crossed Serial Dependencies: A low-power parseable extension to <fixed-case>GPSG</fixed-case></title>
-      <author><first>Henry</first><last>Thompson</last></author>
+      <author id="henry-s-thompson"><first>Henry</first><last>Thompson</last></author>
       <doi>10.3115/981311.981315</doi>
       <pages>16–21</pages>
       <url hash="f60a3bde">P83-1003</url>
@@ -39,10 +39,10 @@
     </paper>
     <paper id="4">
       <title>Formal Constraints on Metarules</title>
-      <author><first>Stuart M.</first><last>Shieber</last></author>
+      <author id="stuart-m-shieber"><first>Stuart M.</first><last>Shieber</last></author>
       <author><first>Swan U.</first><last>Stucky</last></author>
       <author><first>Hans</first><last>Uszkoreit</last></author>
-      <author><first>Jane J.</first><last>Robinson</last></author>
+      <author id="jane-j-robinson"><first>Jane J.</first><last>Robinson</last></author>
       <doi>10.3115/981311.981316</doi>
       <pages>22–27</pages>
       <url hash="b5e992ce">P83-1004</url>
@@ -50,7 +50,7 @@
     </paper>
     <paper id="5">
       <title>A Prolegomenon to Situation Semantics</title>
-      <author><first>David J.</first><last>Israel</last></author>
+      <author id="david-israel"><first>David J.</first><last>Israel</last></author>
       <doi>10.3115/981311.981318</doi>
       <pages>28–37</pages>
       <url hash="8a7d45ca">P83-1005</url>
@@ -66,8 +66,8 @@
     </paper>
     <paper id="7">
       <title>Providing a Unified Account of Definite Noun Phrases in Discourse</title>
-      <author><first>Barbara J.</first><last>Grosz</last></author>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="barbara-j-grosz"><first>Barbara J.</first><last>Grosz</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
       <author><first>Scott</first><last>Weinstein</last></author>
       <doi>10.3115/981311.981320</doi>
       <pages>44–50</pages>
@@ -84,7 +84,7 @@
     </paper>
     <paper id="9">
       <title>An Improper Treatment of Quantification in Ordinary <fixed-case>E</fixed-case>nglish</title>
-      <author><first>Jerry R.</first><last>Hobbs</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry R.</first><last>Hobbs</last></author>
       <doi>10.3115/981311.981322</doi>
       <pages>57–63</pages>
       <url hash="9c4b2123">P83-1009</url>
@@ -100,7 +100,7 @@
     </paper>
     <paper id="11">
       <title><fixed-case>TELEGRAM</fixed-case>: A Grammar Formalism for Language Planning</title>
-      <author><first>Douglas E.</first><last>Appelt</last></author>
+      <author id="douglas-appelt"><first>Douglas E.</first><last>Appelt</last></author>
       <doi>10.3115/981311.981325</doi>
       <pages>74–78</pages>
       <url hash="197d9228">P83-1011</url>
@@ -108,7 +108,7 @@
     </paper>
     <paper id="12">
       <title>An Overview of the <fixed-case>N</fixed-case>igel Text Generation Grammar</title>
-      <author><first>William C.</first><last>Mann</last></author>
+      <author id="william-c-mann"><first>William C.</first><last>Mann</last></author>
       <doi>10.3115/981311.981326</doi>
       <pages>79–84</pages>
       <url hash="fab7a6b5">P83-1012</url>
@@ -116,7 +116,7 @@
     </paper>
     <paper id="13">
       <title>Automatic Recognition of Intonation Patterns</title>
-      <author><first>Janet B.</first><last>Pierrehumbert</last></author>
+      <author id="janet-pierrehumbert"><first>Janet B.</first><last>Pierrehumbert</last></author>
       <doi>10.3115/981311.981328</doi>
       <pages>85–90</pages>
       <url hash="e73d8538">P83-1013</url>
@@ -124,7 +124,7 @@
     </paper>
     <paper id="14">
       <title>A Finite-State Parser for Use in Speech Recognition</title>
-      <author><first>Kenneth W.</first><last>Church</last></author>
+      <author id="kenneth-church"><first>Kenneth W.</first><last>Church</last></author>
       <doi>10.3115/981311.981329</doi>
       <pages>91–97</pages>
       <url hash="125f0292">P83-1014</url>
@@ -132,7 +132,7 @@
     </paper>
     <paper id="15">
       <title>On the Mathematical Properties of Linguistic Theories</title>
-      <author><first>C. Raymond</first><last>Perrault</last></author>
+      <author id="c-raymond-perrault"><first>C. Raymond</first><last>Perrault</last></author>
       <doi>10.3115/981311.981331</doi>
       <pages>98–105</pages>
       <url hash="7d190ee8">P83-1015</url>
@@ -148,7 +148,7 @@
     </paper>
     <paper id="17">
       <title>Sentence Disambiguation by a Shift-Reduce Parsing Technique</title>
-      <author><first>Stuart M.</first><last>Shieber</last></author>
+      <author id="stuart-m-shieber"><first>Stuart M.</first><last>Shieber</last></author>
       <doi>10.3115/981311.981334</doi>
       <pages>113–118</pages>
       <url hash="6f59e405">P83-1017</url>
@@ -156,8 +156,8 @@
     </paper>
     <paper id="18">
       <title>Syntactic Constraints and Efficient Parsability</title>
-      <author><first>Robert C.</first><last>Berwick</last></author>
-      <author><first>Amy S.</first><last>Weinberg</last></author>
+      <author id="robert-c-berwick"><first>Robert C.</first><last>Berwick</last></author>
+      <author id="amy-weinberg"><first>Amy S.</first><last>Weinberg</last></author>
       <doi>10.3115/981311.981335</doi>
       <pages>119–122</pages>
       <url hash="9dbc6a42">P83-1018</url>
@@ -165,7 +165,7 @@
     </paper>
     <paper id="19">
       <title>Deterministic Parsing of Syntactic Non-fluencies</title>
-      <author><first>Donald</first><last>Hindle</last></author>
+      <author id="donald-hindle"><first>Donald</first><last>Hindle</last></author>
       <doi>10.3115/981311.981336</doi>
       <pages>123–128</pages>
       <url hash="fcd36bcf">P83-1019</url>
@@ -173,8 +173,8 @@
     </paper>
     <paper id="20">
       <title><fixed-case>D</fixed-case>-Theory: Talking about Talking about Trees</title>
-      <author><first>Mitchell P.</first><last>Marcus</last></author>
-      <author><first>Donald</first><last>Hindle</last></author>
+      <author id="mitch-marcus"><first>Mitchell P.</first><last>Marcus</last></author>
+      <author id="donald-hindle"><first>Donald</first><last>Hindle</last></author>
       <author><first>Margaret M.</first><last>Fleck</last></author>
       <doi>10.3115/981311.981337</doi>
       <pages>129–136</pages>
@@ -183,8 +183,8 @@
     </paper>
     <paper id="21">
       <title>Parsing as Deduction</title>
-      <author><first>Fernando C. N.</first><last>Pereira</last></author>
-      <author><first>David H. D.</first><last>Warren</last></author>
+      <author id="fernando-c-n-pereira"><first>Fernando C. N.</first><last>Pereira</last></author>
+      <author id="david-h-d-warren"><first>David H. D.</first><last>Warren</last></author>
       <doi>10.3115/981311.981338</doi>
       <pages>137–144</pages>
       <url hash="3f1706e4">P83-1021</url>
@@ -200,7 +200,7 @@
     </paper>
     <paper id="23">
       <title>Menu-Based Natural Language Understanding</title>
-      <author><first>Harry R.</first><last>Tennant</last></author>
+      <author id="harry-tennant"><first>Harry R.</first><last>Tennant</last></author>
       <author><first>Kenneth M.</first><last>Ross</last></author>
       <author><first>Richard M.</first><last>Saenz</last></author>
       <author><first>Craig W.</first><last>Thompson</last></author>
@@ -220,7 +220,7 @@
     </paper>
     <paper id="25">
       <title>Discourse Pragmatics and Ellipsis Resolution in Task-Oriented Natural Language Interfaces</title>
-      <author><first>Jaime G.</first><last>Carbonell</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime G.</first><last>Carbonell</last></author>
       <doi>10.3115/981311.981343</doi>
       <pages>164–168</pages>
       <url hash="0b818131">P83-1025</url>
diff --git a/data/xml/P84.xml b/data/xml/P84.xml
index 1bf256bcc1..3d1e6115f0 100644
--- a/data/xml/P84.xml
+++ b/data/xml/P84.xml
@@ -25,8 +25,8 @@
     <paper id="2">
       <title>Conveying Implicit Content in Narrative Summaries</title>
       <author><first>Malcolm E.</first><last>Cook</last></author>
-      <author><first>Wendy G.</first><last>Lehnert</last></author>
-      <author><first>David D.</first><last>McDonald</last></author>
+      <author id="wendy-lehnert"><first>Wendy G.</first><last>Lehnert</last></author>
+      <author id="david-d-mcdonald"><first>David D.</first><last>McDonald</last></author>
       <doi>10.3115/980491.980493</doi>
       <pages>5–7</pages>
       <url hash="fe8a4f0b">P84-1002</url>
@@ -34,7 +34,7 @@
     </paper>
     <paper id="3">
       <title>Transforming <fixed-case>E</fixed-case>nglish Interfaces to Other Natural Languages: An Experiment With <fixed-case>P</fixed-case>ortuguese</title>
-      <author><first>Gabriel Pereira</first><last>Lopes</last></author>
+      <author id="gabriel-lopes"><first>Gabriel Pereira</first><last>Lopes</last></author>
       <doi>10.3115/980491.980494</doi>
       <pages>8–10</pages>
       <url hash="d2feb5d6">P84-1003</url>
@@ -58,7 +58,7 @@
     </paper>
     <paper id="6">
       <title>Bounded Context Parsing and Easy Learnability</title>
-      <author><first>Robert C.</first><last>Berwick</last></author>
+      <author id="robert-c-berwick"><first>Robert C.</first><last>Berwick</last></author>
       <doi>10.3115/980491.980497</doi>
       <pages>20–23</pages>
       <url hash="16a89b21">P84-1006</url>
@@ -66,7 +66,7 @@
     </paper>
     <paper id="7">
       <title>The Representation of Constituent Structures for Finite-State Parsing</title>
-      <author><first>D. Terence</first><last>Langendoen</last></author>
+      <author id="d-terence-langendoen"><first>D. Terence</first><last>Langendoen</last></author>
       <author><first>Yedidyah</first><last>Langsam</last></author>
       <doi>10.3115/980491.980498</doi>
       <pages>24–27</pages>
@@ -109,7 +109,7 @@
     </paper>
     <paper id="12">
       <title>Toward a Redefinition of Yes/No Questions</title>
-      <author><first>Julia</first><last>Hirschberg</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
       <doi>10.3115/980491.980503</doi>
       <pages>48–51</pages>
       <url hash="4c009d63">P84-1012</url>
@@ -117,7 +117,7 @@
     </paper>
     <paper id="13">
       <title>The Syntax and Semantics of User-Defined Modifiers in Transportable Natural Language Processor</title>
-      <author><first>Bruce W.</first><last>Ballard</last></author>
+      <author id="bruce-w-ballard"><first>Bruce W.</first><last>Ballard</last></author>
       <doi>10.3115/980491.980504</doi>
       <pages>52–56</pages>
       <url hash="bf593e60">P84-1013</url>
@@ -142,7 +142,7 @@
     <paper id="16">
       <title>Quasi-Indexical Reference in Propositional Semantic Networks</title>
       <author><first>William J.</first><last>Rapaport</last></author>
-      <author><first>Stuart C.</first><last>Shapiro</last></author>
+      <author id="stuart-c-shapiro"><first>Stuart C.</first><last>Shapiro</last></author>
       <doi>10.3115/980491.980507</doi>
       <pages>65–70</pages>
       <url hash="69ff680e">P84-1016</url>
@@ -199,10 +199,10 @@
     </paper>
     <paper id="23">
       <title>Automated Determination of Sublanguage Syntactic Usage</title>
-      <author><first>Ralph</first><last>Grishman</last></author>
-      <author><first>Ngo Thanh</first><last>Nhan</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
+      <author id="ngo-thanh-nhan"><first>Ngo Thanh</first><last>Nhan</last></author>
       <author><first>Elaine</first><last>Marsh</last></author>
-      <author><first>Lynette</first><last>Hirschman</last></author>
+      <author id="lynette-hirschman"><first>Lynette</first><last>Hirschman</last></author>
       <doi>10.3115/980491.980514</doi>
       <pages>96–100</pages>
       <url hash="af61ad38">P84-1023</url>
@@ -210,9 +210,9 @@
     </paper>
     <paper id="24">
       <title>Semantic Interpretation Using <fixed-case>KL</fixed-case>-<fixed-case>ONE</fixed-case></title>
-      <author><first>Norman K.</first><last>Sondheimer</last></author>
-      <author><first>Ralph M.</first><last>Weischedel</last></author>
-      <author><first>Robert J.</first><last>Bobrow</last></author>
+      <author id="norman-k-sondheimer"><first>Norman K.</first><last>Sondheimer</last></author>
+      <author id="ralph-weischedel"><first>Ralph M.</first><last>Weischedel</last></author>
+      <author id="robert-bobrow"><first>Robert J.</first><last>Bobrow</last></author>
       <doi>10.3115/980491.980515</doi>
       <pages>101–107</pages>
       <url hash="5404fbfa">P84-1024</url>
@@ -221,7 +221,7 @@
     <paper id="25">
       <title>Two Theories for Computing the Logical Form of Mass Expressions</title>
       <author><first>Francis Jeffry</first><last>Pelletier</last></author>
-      <author><first>Lenhart K.</first><last>Schubert</last></author>
+      <author id="lenhart-schubert"><first>Lenhart K.</first><last>Schubert</last></author>
       <doi>10.3115/980491.980516</doi>
       <pages>108–111</pages>
       <url hash="5386004b">P84-1025</url>
@@ -229,7 +229,7 @@
     </paper>
     <paper id="26">
       <title>Syntactic and Semantic Parsability</title>
-      <author><first>Geoffrey K.</first><last>Pullum</last></author>
+      <author id="geoffrey-k-pullum"><first>Geoffrey K.</first><last>Pullum</last></author>
       <doi>10.3115/980491.980517</doi>
       <pages>112–122</pages>
       <url hash="059d95b7">P84-1026</url>
@@ -237,8 +237,8 @@
     </paper>
     <paper id="27">
       <title>The Semantics of Grammar Formalisms Seen as Computer Languages</title>
-      <author><first>Fernando C. N.</first><last>Pereira</last></author>
-      <author><first>Stuart M.</first><last>Shieber</last></author>
+      <author id="fernando-c-n-pereira"><first>Fernando C. N.</first><last>Pereira</last></author>
+      <author id="stuart-m-shieber"><first>Stuart M.</first><last>Shieber</last></author>
       <doi>10.3115/980491.980518</doi>
       <pages>123–129</pages>
       <url hash="c3370e30">P84-1027</url>
@@ -246,7 +246,7 @@
     </paper>
     <paper id="28">
       <title>The Resolution of Quantificational Ambiguity in the <fixed-case>TENDUM</fixed-case> System</title>
-      <author><first>Harry</first><last>Bunt</last></author>
+      <author id="harry-bunt"><first>Harry</first><last>Bunt</last></author>
       <doi>10.3115/980491.980519</doi>
       <pages>130–133</pages>
       <url hash="807e7cdd">P84-1028</url>
@@ -254,9 +254,9 @@
     </paper>
     <paper id="29">
       <title>Preventing False Inferences</title>
-      <author><first>Aravind</first><last>Joshi</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
-      <author><first>Ralph M.</first><last>Weischedel</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
+      <author id="ralph-weischedel"><first>Ralph M.</first><last>Weischedel</last></author>
       <doi>10.3115/980491.980520</doi>
       <pages>134–138</pages>
       <url hash="34e66673">P84-1029</url>
@@ -264,8 +264,8 @@
     </paper>
     <paper id="30">
       <title>Problem Localization Strategies for Pramatics Processing in Natural-Language Front Ends</title>
-      <author><first>Lance A.</first><last>Ramshaw</last></author>
-      <author><first>Ralph M.</first><last>Weischedel</last></author>
+      <author id="lance-ramshaw"><first>Lance A.</first><last>Ramshaw</last></author>
+      <author id="ralph-weischedel"><first>Ralph M.</first><last>Weischedel</last></author>
       <doi>10.3115/980491.980521</doi>
       <pages>139–143</pages>
       <url hash="7f063fa9">P84-1030</url>
@@ -273,7 +273,7 @@
     </paper>
     <paper id="31">
       <title>A Connectionist Model of Some Aspects of Anaphor Resolution</title>
-      <author><first>Ronan G.</first><last>Reilly</last></author>
+      <author id="ronan-g-reilly"><first>Ronan G.</first><last>Reilly</last></author>
       <doi>10.3115/980491.980522</doi>
       <pages>144–149</pages>
       <url hash="e228f021">P84-1031</url>
@@ -317,7 +317,7 @@
     </paper>
     <paper id="36">
       <title>Detecting Patterns in a Lexical Data Base</title>
-      <author><first>Nicoletta</first><last>Calzolari</last></author>
+      <author id="nicoletta-calzolari"><first>Nicoletta</first><last>Calzolari</last></author>
       <doi>10.3115/980491.980527</doi>
       <pages>170–173</pages>
       <url hash="ac87431a">P84-1036</url>
@@ -341,7 +341,7 @@
     </paper>
     <paper id="39">
       <title>Panel: Natural Language and Databases, Again</title>
-      <author><first>Karen</first><last>Sparck Jones</last></author>
+      <author id="karen-sparck-jones"><first>Karen</first><last>Sparck Jones</last></author>
       <doi>10.3115/980491.980530</doi>
       <pages>182–183</pages>
       <url hash="5db5c44b">P84-1039</url>
@@ -349,7 +349,7 @@
     </paper>
     <paper id="40">
       <title>There Still Is Gold in the Database Mine</title>
-      <author><first>Madeleine</first><last>Bates</last></author>
+      <author id="madeleine-bates"><first>Madeleine</first><last>Bates</last></author>
       <doi>10.3115/980491.980531</doi>
       <pages>184–185</pages>
       <url hash="21a3b3c5">P84-1040</url>
@@ -357,7 +357,7 @@
     </paper>
     <paper id="41">
       <title>Is There Natural Language after Data Bases?</title>
-      <author><first>Jaime G.</first><last>Carbonell</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime G.</first><last>Carbonell</last></author>
       <doi>10.3115/980491.980532</doi>
       <pages>186–187</pages>
       <url hash="d82261f0">P84-1041</url>
@@ -365,7 +365,7 @@
     </paper>
     <paper id="42">
       <title>Panel on Natural Language and Databases</title>
-      <author><first>Daniel P.</first><last>Flickinger</last></author>
+      <author id="dan-flickinger"><first>Daniel P.</first><last>Flickinger</last></author>
       <doi>10.3115/980491.980533</doi>
       <pages>188–189</pages>
       <url hash="65cd64dc">P84-1042</url>
@@ -373,7 +373,7 @@
     </paper>
     <paper id="43">
       <title>Natural Language for Exert Systems: Comparisons with Database Systems</title>
-      <author><first>Kathleen R.</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen R.</first><last>McKeown</last></author>
       <doi>10.3115/980491.980534</doi>
       <pages>190–193</pages>
       <url hash="e6fb3f8c">P84-1043</url>
@@ -389,7 +389,7 @@
     </paper>
     <paper id="45">
       <title>Understanding Pragmatically Ill-Formed Input</title>
-      <author><first>M. Sandra</first><last>Carberry</last></author>
+      <author id="sandra-carberry"><first>M. Sandra</first><last>Carberry</last></author>
       <doi>10.3115/980491.980536</doi>
       <pages>200–206</pages>
       <url hash="1271463c">P84-1045</url>
@@ -397,7 +397,7 @@
     </paper>
     <paper id="46">
       <title>Referring as Requesting</title>
-      <author><first>Philip R.</first><last>Cohen</last></author>
+      <author id="philip-r-cohen"><first>Philip R.</first><last>Cohen</last></author>
       <doi>10.3115/980491.980537</doi>
       <pages>207–211</pages>
       <url hash="65fccdd3">P84-1046</url>
@@ -413,7 +413,7 @@
     </paper>
     <paper id="48">
       <title>Combining Functionality and Object-Orientedness for Natural Language Processing</title>
-      <author><first>Toyoaki</first><last>Nishida</last></author>
+      <author id="toyoaki-nishida"><first>Toyoaki</first><last>Nishida</last></author>
       <author><first>Shuji</first><last>Doshita</last></author>
       <doi>10.3115/980491.980539</doi>
       <pages>218–221</pages>
@@ -423,7 +423,7 @@
     <paper id="49">
       <title>Use of Heuristic Knowledge in <fixed-case>C</fixed-case>hinese Language Analysis</title>
       <author><first>Yiming</first><last>Yang</last></author>
-      <author><first>Toyoaki</first><last>Nishida</last></author>
+      <author id="toyoaki-nishida"><first>Toyoaki</first><last>Nishida</last></author>
       <author><first>Shuji</first><last>Doshita</last></author>
       <doi>10.3115/980491.980540</doi>
       <pages>222–225</pages>
@@ -466,7 +466,7 @@
     </paper>
     <paper id="54">
       <title>On Parsing Preferences</title>
-      <author><first>Lenhart K.</first><last>Schubert</last></author>
+      <author id="lenhart-schubert"><first>Lenhart K.</first><last>Schubert</last></author>
       <doi>10.3115/980491.980545</doi>
       <pages>247–250</pages>
       <url hash="ca847fdb">P84-1054</url>
@@ -491,9 +491,9 @@
     </paper>
     <paper id="57">
       <title>Analysis Grammar of <fixed-case>J</fixed-case>apanese in the <fixed-case>M</fixed-case>u-project - A Procedural Approach to Analysis Grammar</title>
-      <author><first>Jun-ichi</first><last>Tsujii</last></author>
-      <author><first>Jun-ichi</first><last>Nakamura</last></author>
-      <author><first>Makoto</first><last>Nagao</last></author>
+      <author id="junichi-tsujii"><first>Jun-ichi</first><last>Tsujii</last></author>
+      <author id="jun-ichi-nakamura"><first>Jun-ichi</first><last>Nakamura</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
       <doi>10.3115/980491.980548</doi>
       <pages>267–274</pages>
       <url hash="81824a62">P84-1057</url>
@@ -509,7 +509,7 @@
     </paper>
     <paper id="59">
       <title>Building a Large Knowledge Base for a Natural Language System</title>
-      <author><first>Jerry R.</first><last>Hobbs</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry R.</first><last>Hobbs</last></author>
       <doi>10.3115/980491.980550</doi>
       <pages>283–286</pages>
       <url hash="c3f51120">P84-1059</url>
@@ -526,7 +526,7 @@
     </paper>
     <paper id="61">
       <title>Inferencing on Linguistically Based Semantic Structures</title>
-      <author><first>Eva</first><last>Hajičová</last></author>
+      <author id="eva-hajicova"><first>Eva</first><last>Hajičová</last></author>
       <author><first>Milena</first><last>Hnátková</last></author>
       <doi>10.3115/980491.980552</doi>
       <pages>291–297</pages>
@@ -543,8 +543,8 @@
     </paper>
     <paper id="63">
       <title>A Plan Recognition Model for Clarification Subdialogues</title>
-      <author><first>Diane J.</first><last>Litman</last></author>
-      <author><first>James F.</first><last>Allen</last></author>
+      <author id="diane-litman"><first>Diane J.</first><last>Litman</last></author>
+      <author id="james-allen"><first>James F.</first><last>Allen</last></author>
       <doi>10.3115/980491.980554</doi>
       <pages>302–311</pages>
       <url hash="f82d3330">P84-1063</url>
@@ -561,7 +561,7 @@
     <paper id="65">
       <title>Using Focus to Generate Complex and Simple Sentences</title>
       <author><first>Marcia A.</first><last>Derr</last></author>
-      <author><first>Kathleen R.</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen R.</first><last>McKeown</last></author>
       <doi>10.3115/980491.980556</doi>
       <pages>319–326</pages>
       <url hash="7d3d1626">P84-1065</url>
@@ -569,7 +569,7 @@
     </paper>
     <paper id="66">
       <title>A Rational Reconstruction of the <fixed-case>PROTEUS</fixed-case> Sentence Planner</title>
-      <author><first>Graeme</first><last>Ritchie</last></author>
+      <author id="graeme-ritchie"><first>Graeme</first><last>Ritchie</last></author>
       <doi>10.3115/980491.980557</doi>
       <pages>327–329</pages>
       <url hash="d877501d">P84-1066</url>
@@ -577,7 +577,7 @@
     </paper>
     <paper id="67">
       <title>Software Tools for the Environment of a Computer Aided Translation System</title>
-      <author><first>Daniel</first><last>Bachut</last></author>
+      <author id="daniel-bachut"><first>Daniel</first><last>Bachut</last></author>
       <author><first>Nelson</first><last>Verastegui</last></author>
       <doi>10.3115/980491.980558</doi>
       <pages>330–333</pages>
@@ -587,7 +587,7 @@
     <paper id="68">
       <title>Design of a Machine Translation System for a Sublanguage</title>
       <author><first>Beat</first><last>Buchmann</last></author>
-      <author><first>Susan</first><last>Warwick</last></author>
+      <author id="susan-armstrong"><first>Susan</first><last>Warwick</last></author>
       <author><first>Patrick</first><last>Shann</last></author>
       <doi>10.3115/980491.980559</doi>
       <pages>334–337</pages>
@@ -596,9 +596,9 @@
     </paper>
     <paper id="69">
       <title>Grammar Writing System (<fixed-case>GRADE</fixed-case>) of <fixed-case>M</fixed-case>u-Machine Translation Project and its Characteristics</title>
-      <author><first>Jun-ichi</first><last>Nakamura</last></author>
-      <author><first>Jun-ichi</first><last>Tsujii</last></author>
-      <author><first>Makoto</first><last>Nagao</last></author>
+      <author id="jun-ichi-nakamura"><first>Jun-ichi</first><last>Nakamura</last></author>
+      <author id="junichi-tsujii"><first>Jun-ichi</first><last>Tsujii</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
       <doi>10.3115/980491.980560</doi>
       <pages>338–343</pages>
       <url hash="5e2fdf60">P84-1069</url>
@@ -630,7 +630,7 @@
     </paper>
     <paper id="73">
       <title><fixed-case>LR</fixed-case> Parsers For Natural Languages</title>
-      <author><first>Masaru</first><last>Tomita</last></author>
+      <author id="masaru-tomita"><first>Masaru</first><last>Tomita</last></author>
       <doi>10.3115/980491.980564</doi>
       <pages>354–357</pages>
       <url hash="40903d74">P84-1073</url>
@@ -646,7 +646,7 @@
     </paper>
     <paper id="75">
       <title>The Design of a Computer Language for Linguistic Information</title>
-      <author><first>Stuart M.</first><last>Shieber</last></author>
+      <author id="stuart-m-shieber"><first>Stuart M.</first><last>Shieber</last></author>
       <doi>10.3115/980491.980566</doi>
       <pages>362–366</pages>
       <url hash="6585f998">P84-1075</url>
@@ -654,7 +654,7 @@
     </paper>
     <paper id="76">
       <title>Discourse Structures for Text Generation</title>
-      <author><first>William C.</first><last>Mann</last></author>
+      <author id="william-c-mann"><first>William C.</first><last>Mann</last></author>
       <doi>10.3115/980491.980567</doi>
       <pages>367–375</pages>
       <url hash="de86dede">P84-1076</url>
@@ -662,7 +662,7 @@
     </paper>
     <paper id="77">
       <title>Semantic Rule Based Text Generation</title>
-      <author><first>Michael L.</first><last>Mauldin</last></author>
+      <author id="michael-l-mauldin"><first>Michael L.</first><last>Mauldin</last></author>
       <doi>10.3115/980491.980568</doi>
       <pages>376–380</pages>
       <url hash="106791d1">P84-1077</url>
@@ -670,7 +670,7 @@
     </paper>
     <paper id="78">
       <title>Controlling Lexical Substitution in Computer Text Generation</title>
-      <author><first>Robert</first><last>Granville</last></author>
+      <author id="robert-granville"><first>Robert</first><last>Granville</last></author>
       <doi>10.3115/980491.980569</doi>
       <pages>381–384</pages>
       <url hash="6a971e18">P84-1078</url>
@@ -688,9 +688,9 @@
     </paper>
     <paper id="80">
       <title>Two-Way Finite Automata and Dependency Grammar: A Parsing Method for Inflectional Free Word Order Languages</title>
-      <author><first>Esa</first><last>Nelimarkka</last></author>
-      <author><first>Harri</first><last>Jappinen</last></author>
-      <author><first>Aarno</first><last>Lehtola</last></author>
+      <author id="esa-nelimarkka"><first>Esa</first><last>Nelimarkka</last></author>
+      <author id="harri-jappinen"><first>Harri</first><last>Jappinen</last></author>
+      <author id="aarno-lehtola"><first>Aarno</first><last>Lehtola</last></author>
       <doi>10.3115/980491.980571</doi>
       <pages>389–392</pages>
       <url hash="d8459851">P84-1080</url>
@@ -698,7 +698,7 @@
     </paper>
     <paper id="81">
       <title>Interruptable Transition Networks</title>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <doi>10.3115/980491.980572</doi>
       <pages>393–397</pages>
       <url hash="fdbf1676">P84-1081</url>
@@ -706,7 +706,7 @@
     </paper>
     <paper id="82">
       <title>Automatic Construction of Discourse Representation Structures</title>
-      <author><first>Franz</first><last>Guenthner</last></author>
+      <author id="franz-guenthner"><first>Franz</first><last>Guenthner</last></author>
       <doi>10.3115/980491.980573</doi>
       <pages>398–401</pages>
       <url hash="c0c30c77">P84-1082</url>
@@ -731,7 +731,7 @@
     <paper id="85">
       <title>A Syntactic Approach to Discourse Semantics</title>
       <author><first>Livia</first><last>Polanyi</last></author>
-      <author><first>Remko</first><last>Scha</last></author>
+      <author id="remko-scha"><first>Remko</first><last>Scha</last></author>
       <doi>10.3115/980491.980576</doi>
       <pages>413–419</pages>
       <url hash="aed22691">P84-1085</url>
@@ -739,9 +739,9 @@
     </paper>
     <paper id="86">
       <title>Dealing With Incompleteness of Linguistic Knowledge in Language Translation – Transfer and Generation Stage of <fixed-case>M</fixed-case>u Machine Translation Project</title>
-      <author><first>Makoto</first><last>Nagao</last></author>
-      <author><first>Toyoaki</first><last>Nishida</last></author>
-      <author><first>Jun-ichi</first><last>Tsujii</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
+      <author id="toyoaki-nishida"><first>Toyoaki</first><last>Nishida</last></author>
+      <author id="junichi-tsujii"><first>Jun-ichi</first><last>Tsujii</last></author>
       <doi>10.3115/980491.980577</doi>
       <pages>420–427</pages>
       <url hash="b5e1a107">P84-1086</url>
@@ -767,7 +767,7 @@
     </paper>
     <paper id="89">
       <title>Coping with Extragrammaticality</title>
-      <author><first>Jaime G.</first><last>Carbonell</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime G.</first><last>Carbonell</last></author>
       <author><first>Philip J.</first><last>Hayes</last></author>
       <doi>10.3115/980491.980580</doi>
       <pages>437–443</pages>
@@ -776,7 +776,7 @@
     </paper>
     <paper id="90">
       <title>Correcting Object-Related Misconceptions: How Should The System Respond?</title>
-      <author><first>Kathleen F.</first><last>McCoy</last></author>
+      <author id="kathleen-f-mccoy"><first>Kathleen F.</first><last>McCoy</last></author>
       <doi>10.3115/980491.980581</doi>
       <pages>444–447</pages>
       <url hash="eb800da7">P84-1090</url>
@@ -792,7 +792,7 @@
     </paper>
     <paper id="92">
       <title>From <fixed-case>HOPE</fixed-case> en <fixed-case>I</fixed-case>’<fixed-case>ESPERANCE</fixed-case> On the Role of Computational Neurolinguistics in Cross-Language Studies</title>
-      <author><first>Helen M.</first><last>Gigley</last></author>
+      <author id="helen-m-gigley"><first>Helen M.</first><last>Gigley</last></author>
       <doi>10.3115/980491.980583</doi>
       <pages>452–456</pages>
       <url hash="f30e3f2a">P84-1092</url>
@@ -816,7 +816,7 @@
     </paper>
     <paper id="95">
       <title>Machine-Readable Dictionaries, Lexical Data Bases and the Lexical System</title>
-      <author><first>Nicoletta</first><last>Calzolari</last></author>
+      <author id="nicoletta-calzolari"><first>Nicoletta</first><last>Calzolari</last></author>
       <doi>10.3115/980491.980586</doi>
       <pages>460–460</pages>
       <url hash="2358c853">P84-1095</url>
@@ -848,8 +848,8 @@
     </paper>
     <paper id="99">
       <title>Transfer in a Multilingual <fixed-case>MT</fixed-case> System</title>
-      <author><first>Steven</first><last>Krauwer</last></author>
-      <author><first>Louis</first><last>des Tombe</last></author>
+      <author id="steven-krauwer"><first>Steven</first><last>Krauwer</last></author>
+      <author id="louis-des-tombe"><first>Louis</first><last>des Tombe</last></author>
       <doi>10.3115/980491.980590</doi>
       <pages>464–467</pages>
       <url hash="4551cc3f">P84-1099</url>
@@ -857,7 +857,7 @@
     </paper>
     <paper id="100">
       <title>Expert Systems and Other New Techniques in <fixed-case>MT</fixed-case> Systems</title>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <author><first>Rene</first><last>Gerber</last></author>
       <doi>10.3115/980491.980591</doi>
       <pages>468–471</pages>
@@ -875,7 +875,7 @@
     </paper>
     <paper id="102">
       <title>Disambiguating Grammatically Ambiguous Sentences By Asking</title>
-      <author><first>Masaru</first><last>Tomita</last></author>
+      <author id="masaru-tomita"><first>Masaru</first><last>Tomita</last></author>
       <doi>10.3115/980491.980593</doi>
       <pages>475–480</pages>
       <url hash="43f7fc44">P84-1102</url>
@@ -910,8 +910,8 @@
     </paper>
     <paper id="106">
       <title><fixed-case>NA</fixed-case>tural Language driven Image Generation</title>
-      <author><first>Giovanni</first><last>Adorni</last></author>
-      <author><first>Mauro</first><last>Di Manzo</last></author>
+      <author id="giovanni-adorni"><first>Giovanni</first><last>Adorni</last></author>
+      <author id="mauro-di-manzo"><first>Mauro</first><last>Di Manzo</last></author>
       <author><first>Fausto</first><last>Giunchiglia</last></author>
       <doi>10.3115/980491.980597</doi>
       <pages>495–500</pages>
@@ -978,7 +978,7 @@
     </paper>
     <paper id="114">
       <title>Interpreting Syntactically Ill-Formed Sentences</title>
-      <author><first>Leonardo</first><last>Lesmo</last></author>
+      <author id="leonardo-lesmo"><first>Leonardo</first><last>Lesmo</last></author>
       <author><first>Pietro</first><last>Torasso</last></author>
       <doi>10.3115/980491.980605</doi>
       <pages>534–539</pages>
diff --git a/data/xml/P85.xml b/data/xml/P85.xml
index ee1821ac7b..d038a39dda 100644
--- a/data/xml/P85.xml
+++ b/data/xml/P85.xml
@@ -65,7 +65,7 @@
     </paper>
     <paper id="7">
       <title>Speech Acts and Rationality</title>
-      <author><first>Philip R.</first><last>Cohen</last></author>
+      <author id="philip-r-cohen"><first>Philip R.</first><last>Cohen</last></author>
       <author><first>Hector J.</first><last>Levesque</last></author>
       <doi>10.3115/981210.981217</doi>
       <pages>49–60</pages>
@@ -74,7 +74,7 @@
     </paper>
     <paper id="8">
       <title>Ontological Promiscuity</title>
-      <author><first>Jerry R.</first><last>Hobbs</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry R.</first><last>Hobbs</last></author>
       <doi>10.3115/981210.981218</doi>
       <pages>60–69</pages>
       <url hash="0a073380">P85-1008</url>
@@ -90,7 +90,7 @@
     </paper>
     <paper id="10">
       <title>The Computational Difficulty of <fixed-case>ID/LP</fixed-case> Parsing</title>
-      <author><first>G. Edward</first><last>Barton</last></author>
+      <author id="g-edward-barton"><first>G. Edward</first><last>Barton</last></author>
       <doi>10.3115/981210.981220</doi>
       <pages>76–81</pages>
       <url hash="a6c6c1c3">P85-1010</url>
@@ -98,8 +98,8 @@
     </paper>
     <paper id="11">
       <title>Some Computational Properties of <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars</title>
-      <author><first>K.</first><last>Vijay-Shankar</last></author>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="k-vijay-shanker"><first>K.</first><last>Vijay-Shankar</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
       <doi>10.3115/981210.981221</doi>
       <pages>82–93</pages>
       <url hash="cd26eeb3">P85-1011</url>
@@ -107,8 +107,8 @@
     </paper>
     <paper id="12">
       <title><fixed-case>TAG</fixed-case>’s as a Grammatical Formalism for Generation</title>
-      <author><first>David D.</first><last>McDonald</last></author>
-      <author><first>James D.</first><last>Pustejovsky</last></author>
+      <author id="david-d-mcdonald"><first>David D.</first><last>McDonald</last></author>
+      <author id="james-pustejovsky"><first>James D.</first><last>Pustejovsky</last></author>
       <doi>10.3115/981210.981222</doi>
       <pages>94–103</pages>
       <url hash="5ccb0cff">P85-1012</url>
@@ -116,7 +116,7 @@
     </paper>
     <paper id="13">
       <title>Modular Logic Grammars</title>
-      <author><first>Michael C.</first><last>McCord</last></author>
+      <author id="michael-c-mccord"><first>Michael C.</first><last>McCord</last></author>
       <doi>10.3115/981210.981223</doi>
       <pages>104–117</pages>
       <url hash="b1fb6c59">P85-1013</url>
@@ -125,7 +125,7 @@
     <paper id="14">
       <title>New Approaches to Parsing Conjunctions Using <fixed-case>P</fixed-case>rolog</title>
       <author><first>Sandiway</first><last>Fong</last></author>
-      <author><first>Robert C.</first><last>Berwick</last></author>
+      <author id="robert-c-berwick"><first>Robert C.</first><last>Berwick</last></author>
       <doi>10.3115/981210.981224</doi>
       <pages>118–126</pages>
       <url hash="92397b47">P85-1014</url>
@@ -149,7 +149,7 @@
     </paper>
     <paper id="17">
       <title>A Structure-Sharing Representation for Unification-Based Grammar Formalisms</title>
-      <author><first>Fernando C. N.</first><last>Pereira</last></author>
+      <author id="fernando-c-n-pereira"><first>Fernando C. N.</first><last>Pereira</last></author>
       <doi>10.3115/981210.981227</doi>
       <pages>137–144</pages>
       <url hash="ccaf835b">P85-1017</url>
@@ -157,7 +157,7 @@
     </paper>
     <paper id="18">
       <title>Using Restriction to Extend Parsing Algorithms for Complex-Feature-Based Formalisms</title>
-      <author><first>Stuart M.</first><last>Shieber</last></author>
+      <author id="stuart-m-shieber"><first>Stuart M.</first><last>Shieber</last></author>
       <doi>10.3115/981210.981228</doi>
       <pages>145–152</pages>
       <url hash="9311592b">P85-1018</url>
@@ -175,7 +175,7 @@
     </paper>
     <paper id="20">
       <title>Movement in Active Production Networks</title>
-      <author><first>Mark A.</first><last>Jones</last></author>
+      <author id="mark-jones"><first>Mark A.</first><last>Jones</last></author>
       <author><first>Alan S.</first><last>Driscoll</last></author>
       <doi>10.3115/981210.981230</doi>
       <pages>161–166</pages>
@@ -185,7 +185,7 @@
     <paper id="21">
       <title>Parsing <fixed-case>H</fixed-case>ead-<fixed-case>D</fixed-case>riven <fixed-case>P</fixed-case>hrase <fixed-case>S</fixed-case>tructure <fixed-case>G</fixed-case>rammar</title>
       <author><first>Derek</first><last>Proudian</last></author>
-      <author><first>Carl</first><last>Pollard</last></author>
+      <author id="carl-pollard"><first>Carl</first><last>Pollard</last></author>
       <doi>10.3115/981210.981231</doi>
       <pages>167–171</pages>
       <url hash="335fe7b5">P85-1021</url>
@@ -194,7 +194,7 @@
     <paper id="22">
       <title>A Computational Semantics for Natural Language</title>
       <author><first>Lewis G.</first><last>Creary</last></author>
-      <author><first>Carl J.</first><last>Pollard</last></author>
+      <author id="carl-pollard"><first>Carl J.</first><last>Pollard</last></author>
       <doi>10.3115/981210.981232</doi>
       <pages>172–179</pages>
       <url hash="69862c81">P85-1022</url>
@@ -202,7 +202,7 @@
     </paper>
     <paper id="23">
       <title>Analysis of Conjunctions in a Rule-Based Parser</title>
-      <author><first>Leonardo</first><last>Lesmo</last></author>
+      <author id="leonardo-lesmo"><first>Leonardo</first><last>Lesmo</last></author>
       <author><first>Pietro</first><last>Torasso</last></author>
       <doi>10.3115/981210.981233</doi>
       <pages>180–187</pages>
@@ -211,7 +211,7 @@
     </paper>
     <paper id="24">
       <title>A Pragmatics-Based Approach to Understanding Intersentential Ellipsis</title>
-      <author><first>Sandra</first><last>Carberry</last></author>
+      <author id="sandra-carberry"><first>Sandra</first><last>Carberry</last></author>
       <doi>10.3115/981210.981234</doi>
       <pages>188–197</pages>
       <url hash="4d0b21b6">P85-1024</url>
@@ -219,7 +219,7 @@
     </paper>
     <paper id="25">
       <title>Some Pragmatic Issues in the Planning of Definite and Indefinite Noun Phrases</title>
-      <author><first>Douglas E.</first><last>Appelt</last></author>
+      <author id="douglas-appelt"><first>Douglas E.</first><last>Appelt</last></author>
       <doi>10.3115/981210.981235</doi>
       <pages>198–203</pages>
       <url hash="16d998f7">P85-1025</url>
@@ -251,7 +251,7 @@
     </paper>
     <paper id="29">
       <title>Description Strategies for Naive and Expert Users</title>
-      <author><first>Cecile L.</first><last>Paris</last></author>
+      <author id="cecile-paris"><first>Cecile L.</first><last>Paris</last></author>
       <doi>10.3115/981210.981239</doi>
       <pages>238–245</pages>
       <url hash="1800b777">P85-1029</url>
@@ -259,7 +259,7 @@
     </paper>
     <paper id="30">
       <title>Stress Assignment in Letter to Sound Rules for Speech Synthesis</title>
-      <author><first>Kenneth</first><last>Church</last></author>
+      <author id="kenneth-church"><first>Kenneth</first><last>Church</last></author>
       <doi>10.3115/981210.981240</doi>
       <pages>246–253</pages>
       <url hash="04314368">P85-1030</url>
@@ -280,9 +280,9 @@
     </paper>
     <paper id="32">
       <title>Structure-Sharing in Lexical Representation</title>
-      <author><first>Daniel</first><last>Flickinger</last></author>
-      <author><first>Carl</first><last>Pollard</last></author>
-      <author><first>Thomas</first><last>Wasow</last></author>
+      <author id="dan-flickinger"><first>Daniel</first><last>Flickinger</last></author>
+      <author id="carl-pollard"><first>Carl</first><last>Pollard</last></author>
+      <author id="thomas-wasow"><first>Thomas</first><last>Wasow</last></author>
       <doi>10.3115/981210.981242</doi>
       <pages>262–267</pages>
       <url hash="5552667e">P85-1032</url>
@@ -290,7 +290,7 @@
     </paper>
     <paper id="33">
       <title>A Tool Kit for Lexicon Building</title>
-      <author><first>Thomas E.</first><last>Ahlswede</last></author>
+      <author id="thomas-ahlswede"><first>Thomas E.</first><last>Ahlswede</last></author>
       <doi>10.3115/981210.981243</doi>
       <pages>268–276</pages>
       <url hash="f63b6e94">P85-1033</url>
@@ -299,7 +299,7 @@
     <paper id="34">
       <title>Using an On-Line Dictionary to Find Rhyming Words and Pronunciations for Unknown Words</title>
       <author><first>Roy J.</first><last>Byrd</last></author>
-      <author><first>Martin S.</first><last>Chodorow</last></author>
+      <author id="martin-chodorow"><first>Martin S.</first><last>Chodorow</last></author>
       <doi>10.3115/981210.981244</doi>
       <pages>277–283</pages>
       <url hash="b0630968">P85-1034</url>
@@ -308,7 +308,7 @@
     <paper id="35">
       <title>Towards a Self-Extending Lexicon</title>
       <author><first>Uri</first><last>Zernik</last></author>
-      <author><first>Michael G.</first><last>Dyer</last></author>
+      <author id="michael-g-dyer"><first>Michael G.</first><last>Dyer</last></author>
       <doi>10.3115/981210.981245</doi>
       <pages>284–292</pages>
       <url hash="ba3a6571">P85-1035</url>
@@ -316,7 +316,7 @@
     </paper>
     <paper id="36">
       <title>Grammatical Analysis by Computer of the <fixed-case>L</fixed-case>ancaster-<fixed-case>O</fixed-case>slo/<fixed-case>B</fixed-case>ergen (<fixed-case>LOB</fixed-case>) Corpus of <fixed-case>B</fixed-case>ritish <fixed-case>E</fixed-case>nglish Texts</title>
-      <author><first>Andrew</first><last>David</last></author>
+      <author id="andrew-david-beale"><first>Andrew</first><last>David</last></author>
       <doi>10.3115/981210.981246</doi>
       <pages>293–298</pages>
       <url hash="d6c19f6c">P85-1036</url>
@@ -324,9 +324,9 @@
     </paper>
     <paper id="37">
       <title>Extracting Semantic Hierarchies From a Large On-Line Dictionary</title>
-      <author><first>Martin S.</first><last>Chodorow</last></author>
+      <author id="martin-chodorow"><first>Martin S.</first><last>Chodorow</last></author>
       <author><first>Roy J.</first><last>Byrd</last></author>
-      <author><first>George E.</first><last>Heidorn</last></author>
+      <author id="george-e-heidorn"><first>George E.</first><last>Heidorn</last></author>
       <doi>10.3115/981210.981247</doi>
       <pages>299–304</pages>
       <url hash="0d412277">P85-1037</url>
@@ -350,7 +350,7 @@
     </paper>
     <paper id="40">
       <title>Grammar Viewed as a Functioning Part of a Cognitive System</title>
-      <author><first>Helen M.</first><last>Gigley</last></author>
+      <author id="helen-m-gigley"><first>Helen M.</first><last>Gigley</last></author>
       <doi>10.3115/981210.981250</doi>
       <pages>324–332</pages>
       <url hash="ea932d68">P85-1040</url>
diff --git a/data/xml/P86.xml b/data/xml/P86.xml
index daf57c3e99..48a8a4bc96 100644
--- a/data/xml/P86.xml
+++ b/data/xml/P86.xml
@@ -15,7 +15,7 @@
     </frontmatter>
     <paper id="1">
       <title>Tutorial Abstracts</title>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <doi>10.3115/981131.981132</doi>
       <pages>1–1</pages>
       <url hash="c685f1e5">P86-1001</url>
@@ -23,7 +23,7 @@
     </paper>
     <paper id="2">
       <title>Bringing Natural Language Processing to the Microcomputer Market: The Story of <fixed-case>Q&amp;A</fixed-case></title>
-      <author><first>Gary G.</first><last>Hendrix</last></author>
+      <author id="gary-g-hendrix"><first>Gary G.</first><last>Hendrix</last></author>
       <doi>10.3115/981131.981133</doi>
       <pages>2–2</pages>
       <url hash="810ff9c1">P86-1002</url>
@@ -31,7 +31,7 @@
     </paper>
     <paper id="3">
       <title>Time and Tense in <fixed-case>E</fixed-case>nglish</title>
-      <author><first>Mary P.</first><last>Harper</last></author>
+      <author id="mary-harper"><first>Mary P.</first><last>Harper</last></author>
       <author><first>Eugene</first><last>Charniak</last></author>
       <doi>10.3115/981131.981134</doi>
       <pages>3–9</pages>
@@ -40,12 +40,12 @@
     </paper>
     <paper id="4">
       <title>Recovering Implicit Information</title>
-      <author><first>Martha S.</first><last>Palmer</last></author>
-      <author><first>Deborah A.</first><last>Dahl</last></author>
+      <author id="martha-palmer"><first>Martha S.</first><last>Palmer</last></author>
+      <author id="deborah-a-dahl"><first>Deborah A.</first><last>Dahl</last></author>
       <author><first>Rebecca J.</first><last>Schiffman</last></author>
-      <author><first>Lynette</first><last>Hirschman</last></author>
-      <author><first>Marcia</first><last>Linebarger</last></author>
-      <author><first>John</first><last>Dowding</last></author>
+      <author id="lynette-hirschman"><first>Lynette</first><last>Hirschman</last></author>
+      <author id="marcia-c-linebarger"><first>Marcia</first><last>Linebarger</last></author>
+      <author id="john-dowding"><first>John</first><last>Dowding</last></author>
       <doi>10.3115/981131.981135</doi>
       <pages>10–19</pages>
       <url hash="2ca0ded2">P86-1004</url>
@@ -53,7 +53,7 @@
     </paper>
     <paper id="5">
       <title>Semantic Acquisition In <fixed-case>TELI</fixed-case>: A Transportable, User-Customized Natural Language Processor</title>
-      <author><first>Bruce W.</first><last>Ballard</last></author>
+      <author id="bruce-w-ballard"><first>Bruce W.</first><last>Ballard</last></author>
       <author><first>Douglas E.</first><last>Stumberger</last></author>
       <doi>10.3115/981131.981136</doi>
       <pages>20–29</pages>
@@ -62,7 +62,7 @@
     </paper>
     <paper id="6">
       <title>Computational Complexity of Current <fixed-case>GPSG</fixed-case> Theory</title>
-      <author><first>Eric Sven</first><last>Ristad</last></author>
+      <author id="eric-sven-ristad"><first>Eric Sven</first><last>Ristad</last></author>
       <doi>10.3115/981131.981137</doi>
       <pages>30–39</pages>
       <url hash="7a38d009">P86-1006</url>
@@ -70,7 +70,7 @@
     </paper>
     <paper id="7">
       <title>Defining Natural Language Grammars in <fixed-case>GPSG</fixed-case></title>
-      <author><first>Eric Sven</first><last>Ristad</last></author>
+      <author id="eric-sven-ristad"><first>Eric Sven</first><last>Ristad</last></author>
       <doi>10.3115/981131.981138</doi>
       <pages>40–44</pages>
       <url hash="b303fe8e">P86-1007</url>
@@ -78,7 +78,7 @@
     </paper>
     <paper id="8">
       <title>Constraint Propagation in <fixed-case>K</fixed-case>immo Systems</title>
-      <author><first>G. Edward</first><last>Barton</last></author>
+      <author id="g-edward-barton"><first>G. Edward</first><last>Barton</last></author>
       <doi>10.3115/981131.981139</doi>
       <pages>45–52</pages>
       <url hash="23223eba">P86-1008</url>
@@ -86,7 +86,7 @@
     </paper>
     <paper id="9">
       <title>Computational Complexity in Two-Level Morphology</title>
-      <author><first>G. Edward</first><last>Barton</last></author>
+      <author id="g-edward-barton"><first>G. Edward</first><last>Barton</last></author>
       <doi>10.3115/981131.981140</doi>
       <pages>53–59</pages>
       <url hash="85fb232e">P86-1009</url>
@@ -103,7 +103,7 @@
     <paper id="11">
       <title>The Relationship Between <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars And Head Grammars</title>
       <author id="david-weir"><first>D. J.</first><last>Weir</last></author>
-      <author><first>K.</first><last>Vijay-Shanker</last></author>
+      <author id="k-vijay-shanker"><first>K.</first><last>Vijay-Shanker</last></author>
       <author id="aravind-joshi"><first>A. K.</first><last>Joshi</last></author>
       <doi>10.3115/981131.981142</doi>
       <pages>67–74</pages>
@@ -129,7 +129,7 @@
     </paper>
     <paper id="14">
       <title>Copying in Natural Languages, Context-Freeness, and Queue Grammars</title>
-      <author><first>Alexis</first><last>Manaster-Ramer</last></author>
+      <author id="alexis-manaster-ramer"><first>Alexis</first><last>Manaster-Ramer</last></author>
       <doi>10.3115/981131.981145</doi>
       <pages>85–89</pages>
       <url hash="dc24a01a">P86-1014</url>
@@ -138,7 +138,7 @@
     <paper id="15">
       <title>A Model of Revision in Natural Language Generation</title>
       <author><first>Marie M.</first><last>Vaughan</last></author>
-      <author><first>David D.</first><last>McDonald</last></author>
+      <author id="david-d-mcdonald"><first>David D.</first><last>McDonald</last></author>
       <doi>10.3115/981131.981146</doi>
       <pages>90–96</pages>
       <url hash="10c4d0a9">P86-1015</url>
@@ -146,7 +146,7 @@
     </paper>
     <paper id="16">
       <title>The <fixed-case>ROMPER</fixed-case> System: Responding to Object-Related Misconceptions using Perspective</title>
-      <author><first>Kathleen F.</first><last>McCoy</last></author>
+      <author id="kathleen-f-mccoy"><first>Kathleen F.</first><last>McCoy</last></author>
       <doi>10.3115/981131.981147</doi>
       <pages>97–105</pages>
       <url hash="a85f35d0">P86-1016</url>
@@ -154,7 +154,7 @@
     </paper>
     <paper id="17">
       <title>Encoding and Acquiring Meanings for Figurative Phrases</title>
-      <author><first>Michael G.</first><last>Dyer</last></author>
+      <author id="michael-g-dyer"><first>Michael G.</first><last>Dyer</last></author>
       <author><first>Uri</first><last>Zernik</last></author>
       <doi>10.3115/981131.981148</doi>
       <pages>106–111</pages>
@@ -164,8 +164,8 @@
     <paper id="18">
       <title>Semantically Significant Patterns in Dictionary Definitions</title>
       <author><first>Judith</first><last>Markowitz</last></author>
-      <author><first>Thomas</first><last>Ahlswede</last></author>
-      <author><first>Martha</first><last>Evens</last></author>
+      <author id="thomas-ahlswede"><first>Thomas</first><last>Ahlswede</last></author>
+      <author id="martha-evens"><first>Martha</first><last>Evens</last></author>
       <doi>10.3115/981131.981149</doi>
       <pages>112–119</pages>
       <url hash="9a7bb2d3">P86-1018</url>
@@ -174,7 +174,7 @@
     <paper id="19">
       <title>Computer Methods for Morphological Analysis</title>
       <author><first>Roy J.</first><last>Byrd</last></author>
-      <author><first>Judith L.</first><last>Klavans</last></author>
+      <author id="judith-l-klavans"><first>Judith L.</first><last>Klavans</last></author>
       <author><first>Mark</first><last>Aronoff</last></author>
       <author><first>Frank</first><last>Anshen</last></author>
       <doi>10.3115/981131.981150</doi>
@@ -192,8 +192,8 @@
     </paper>
     <paper id="21">
       <title>The Intonational Structuring of Discourse</title>
-      <author><first>Julia</first><last>Hirschberg</last></author>
-      <author><first>Janet</first><last>Pierrehumbert</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
+      <author id="janet-pierrehumbert"><first>Janet</first><last>Pierrehumbert</last></author>
       <doi>10.3115/981131.981152</doi>
       <pages>136–144</pages>
       <url hash="a81f6fd2">P86-1021</url>
@@ -201,8 +201,8 @@
     </paper>
     <paper id="22">
       <title>The Contribution of Parsing to Prosodic Phrasing in an Experimental Text-to-Speech System</title>
-      <author><first>Joan</first><last>Bachenko</last></author>
-      <author><first>Eileen</first><last>Fitzpatrick</last></author>
+      <author id="joan-bachenko"><first>Joan</first><last>Bachenko</last></author>
+      <author id="eileen-fitzpatrick"><first>Eileen</first><last>Fitzpatrick</last></author>
       <author><first>C. E.</first><last>Wright</last></author>
       <doi>10.3115/981131.981153</doi>
       <pages>145–155</pages>
@@ -211,7 +211,7 @@
     </paper>
     <paper id="23">
       <title>Morphological Decomposition and Stress Assignment for Speech Synthesis</title>
-      <author><first>Kenneth</first><last>Church</last></author>
+      <author id="kenneth-church"><first>Kenneth</first><last>Church</last></author>
       <doi>10.3115/981131.981154</doi>
       <pages>156–164</pages>
       <url hash="e281dc8f">P86-1023</url>
@@ -231,7 +231,7 @@
     <paper id="25">
       <title><fixed-case>J</fixed-case>apanese Prosodic Phrasing and Intonation Synthesis</title>
       <author><first>Mary E.</first><last>Beckman</last></author>
-      <author><first>Janet B.</first><last>Pierrehumbert</last></author>
+      <author id="janet-pierrehumbert"><first>Janet B.</first><last>Pierrehumbert</last></author>
       <doi>10.3115/981131.981156</doi>
       <pages>173–180</pages>
       <url hash="174ce067">P86-1025</url>
@@ -239,7 +239,7 @@
     </paper>
     <paper id="26">
       <title>Questions about Connectionist Models of Natural Language</title>
-      <author><first>Mark</first><last>Liberman</last></author>
+      <author id="mark-liberman"><first>Mark</first><last>Liberman</last></author>
       <doi>10.3115/981131.981168</doi>
       <pages>181–183</pages>
       <url hash="10df13a6">P86-1026</url>
@@ -255,7 +255,7 @@
     </paper>
     <paper id="28">
       <title>Connectionist Models for Natural Language Processing Program</title>
-      <author><first>David L.</first><last>Waltz</last></author>
+      <author id="david-l-waltz"><first>David L.</first><last>Waltz</last></author>
       <doi>10.3115/981131.981170</doi>
       <pages>185–185</pages>
       <url hash="64975356">P86-1028</url>
@@ -296,7 +296,7 @@
     </paper>
     <paper id="33">
       <title>Linguistic Coherence: A Plan-Based Alternative</title>
-      <author><first>Diane J.</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane J.</first><last>Litman</last></author>
       <doi>10.3115/981131.981161</doi>
       <pages>215–223</pages>
       <url hash="6c8b2d0a">P86-1033</url>
@@ -313,7 +313,7 @@
     </paper>
     <paper id="35">
       <title>Commonsense Metaphysics and Lexical Semantics</title>
-      <author><first>Jerry R.</first><last>Hobbs</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry R.</first><last>Hobbs</last></author>
       <author><first>William</first><last>Croft</last></author>
       <author><first>Todd</first><last>Davies</last></author>
       <author><first>Douglas</first><last>Edwards</last></author>
@@ -325,7 +325,7 @@
     </paper>
     <paper id="36">
       <title>A Terminological Simplification Transformation for Natural Language Question-Answering Systems</title>
-      <author><first>David G.</first><last>Stallard</last></author>
+      <author id="david-stallard"><first>David G.</first><last>Stallard</last></author>
       <doi>10.3115/981131.981164</doi>
       <pages>241–246</pages>
       <url hash="1cd0dfa0">P86-1036</url>
@@ -333,7 +333,7 @@
     </paper>
     <paper id="37">
       <title>Some Uses of Higher-Order Logic in Computational Linguistics</title>
-      <author><first>Dale A.</first><last>Miller</last></author>
+      <author id="dale-a-miller"><first>Dale A.</first><last>Miller</last></author>
       <author><first>Gopalan</first><last>Nadathur</last></author>
       <doi>10.3115/981131.981165</doi>
       <pages>247–256</pages>
@@ -342,7 +342,7 @@
     </paper>
     <paper id="38">
       <title>A Logical Semantics for Feature Structures</title>
-      <author><first>Robert T.</first><last>Kasper</last></author>
+      <author id="robert-t-kasper"><first>Robert T.</first><last>Kasper</last></author>
       <doi>10.3115/981131.981166</doi>
       <pages>257–266</pages>
       <url hash="14f48e21">P86-1038</url>
@@ -350,7 +350,7 @@
     </paper>
     <paper id="39">
       <title>What Should Machine Translation Be?</title>
-      <author><first>John S.</first><last>White</last></author>
+      <author id="john-s-white"><first>John S.</first><last>White</last></author>
       <doi>10.3115/981131.981172</doi>
       <pages>267–267</pages>
       <url hash="0aa3c309">P86-1039</url>
diff --git a/data/xml/P87.xml b/data/xml/P87.xml
index 99df74f2ca..9816d4cb99 100644
--- a/data/xml/P87.xml
+++ b/data/xml/P87.xml
@@ -16,7 +16,7 @@
     <paper id="1">
       <title>Temporal Ontology in Natural Language</title>
       <author><first>Marc</first><last>Moens</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <doi>10.3115/981175.981176</doi>
       <pages>1–7</pages>
       <url hash="95541478">P87-1001</url>
@@ -24,7 +24,7 @@
     </paper>
     <paper id="2">
       <title>A Compositional Semantics of Temporal Expressions in <fixed-case>E</fixed-case>nglish</title>
-      <author><first>Erhard W.</first><last>Hinrichs</last></author>
+      <author id="erhard-hinrichs"><first>Erhard W.</first><last>Hinrichs</last></author>
       <doi>10.3115/981175.981177</doi>
       <pages>8–15</pages>
       <url hash="b463a281">P87-1002</url>
@@ -32,7 +32,7 @@
     </paper>
     <paper id="3">
       <title>Situations and Intervals</title>
-      <author><first>Rebecca J.</first><last>Passonneau</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca J.</first><last>Passonneau</last></author>
       <doi>10.3115/981175.981178</doi>
       <pages>16–24</pages>
       <url hash="eeddb3b9">P87-1003</url>
@@ -48,9 +48,9 @@
     </paper>
     <paper id="5">
       <title>An Environment for Acquiring Semantic Information</title>
-      <author><first>Damaris M.</first><last>Ayuso</last></author>
+      <author id="damaris-ayuso"><first>Damaris M.</first><last>Ayuso</last></author>
       <author><first>Varda</first><last>Shaked</last></author>
-      <author><first>Ralph M.</first><last>Weischedel</last></author>
+      <author id="ralph-weischedel"><first>Ralph M.</first><last>Weischedel</last></author>
       <doi>10.3115/981175.981180</doi>
       <pages>32–40</pages>
       <url hash="8903bb95">P87-1005</url>
@@ -94,7 +94,7 @@
     </paper>
     <paper id="10">
       <title>Constituent-Based Morphological Parsing: A New Approach to the Problem of Word-Recognition.</title>
-      <author><first>Richard</first><last>Sproat</last></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last></author>
       <author><first>Barbara</first><last>Brunson</last></author>
       <doi>10.3115/981175.981185</doi>
       <pages>65–72</pages>
@@ -112,7 +112,7 @@
     <paper id="12">
       <title>A Lazy way to Chart-Parse with Categorial Grammars</title>
       <author><first>Remo</first><last>Pareschi</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <doi>10.3115/981175.981187</doi>
       <pages>81–88</pages>
       <url hash="7db95f9c">P87-1012</url>
@@ -121,7 +121,7 @@
     <paper id="13">
       <title>A Logical Version of Functional Grammar</title>
       <author><first>William C.</first><last>Rounds</last></author>
-      <author><first>Alexis</first><last>Manaster-Ramer</last></author>
+      <author id="alexis-manaster-ramer"><first>Alexis</first><last>Manaster-Ramer</last></author>
       <doi>10.3115/981175.981188</doi>
       <pages>89–96</pages>
       <url hash="a12ad3a3">P87-1013</url>
@@ -129,8 +129,8 @@
     </paper>
     <paper id="14">
       <title>Functional Unification Grammar Revisited</title>
-      <author><first>Kathleen R.</first><last>McKeown</last></author>
-      <author><first>Cecile L.</first><last>Paris</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen R.</first><last>McKeown</last></author>
+      <author id="cecile-paris"><first>Cecile L.</first><last>Paris</last></author>
       <doi>10.3115/981175.981189</doi>
       <pages>97–103</pages>
       <url hash="144fb7e0">P87-1014</url>
@@ -138,9 +138,9 @@
     </paper>
     <paper id="15">
       <title>Characterizing Structural Descriptions Produced by Various Grammatical Formalisms</title>
-      <author><first>K.</first><last>Vijay-Shanker</last></author>
-      <author><first>David J.</first><last>Weir</last></author>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="k-vijay-shanker"><first>K.</first><last>Vijay-Shanker</last></author>
+      <author id="david-weir"><first>David J.</first><last>Weir</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
       <doi>10.3115/981175.981190</doi>
       <pages>104–111</pages>
       <url hash="9005d618">P87-1015</url>
@@ -157,7 +157,7 @@
     </paper>
     <paper id="17">
       <title>Context-Freeness of the Language Accepted by <fixed-case>M</fixed-case>arcus’ Parser</title>
-      <author><first>R</first><last>Nozohoor-Farshi</last></author>
+      <author id="r-nozohoor-farshi"><first>R</first><last>Nozohoor-Farshi</last></author>
       <doi>10.3115/981175.981192</doi>
       <pages>117–122</pages>
       <url hash="b4a767b8">P87-1017</url>
@@ -175,9 +175,9 @@
     </paper>
     <paper id="19">
       <title>Nominalizations in <fixed-case>PUNDIT</fixed-case></title>
-      <author><first>Deborah A.</first><last>Dahl</last></author>
-      <author><first>Martha S.</first><last>Palmer</last></author>
-      <author><first>Rebecca J.</first><last>Passonneau</last></author>
+      <author id="deborah-a-dahl"><first>Deborah A.</first><last>Dahl</last></author>
+      <author id="martha-palmer"><first>Martha S.</first><last>Palmer</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca J.</first><last>Passonneau</last></author>
       <doi>10.3115/981175.981194</doi>
       <pages>131–139</pages>
       <url hash="4029c254">P87-1019</url>
@@ -185,8 +185,8 @@
     </paper>
     <paper id="20">
       <title>Toward Treating <fixed-case>E</fixed-case>nglish Nominals Correctly</title>
-      <author><first>Richard W.</first><last>Sproat</last></author>
-      <author><first>Mark Y.</first><last>Liberman</last></author>
+      <author id="richard-sproat"><first>Richard W.</first><last>Sproat</last></author>
+      <author id="mark-liberman"><first>Mark Y.</first><last>Liberman</last></author>
       <doi>10.3115/981175.981195</doi>
       <pages>140–146</pages>
       <url hash="835effd4">P87-1020</url>
@@ -194,7 +194,7 @@
     </paper>
     <paper id="21">
       <title>The Interpretation of Tense in Discourse</title>
-      <author><first>Bonnie Lynn</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie Lynn</first><last>Webber</last></author>
       <doi>10.3115/981175.981196</doi>
       <pages>147–154</pages>
       <url hash="ed3929ba">P87-1021</url>
@@ -202,9 +202,9 @@
     </paper>
     <paper id="22">
       <title>A Centering Approach to Pronouns</title>
-      <author><first>Susan E.</first><last>Brennan</last></author>
+      <author id="susan-e-brennan"><first>Susan E.</first><last>Brennan</last></author>
       <author><first>Marilyn W.</first><last>Friedman</last></author>
-      <author><first>Carl J.</first><last>Pollard</last></author>
+      <author id="carl-pollard"><first>Carl J.</first><last>Pollard</last></author>
       <doi>10.3115/981175.981197</doi>
       <pages>155–162</pages>
       <url hash="cd60925a">P87-1022</url>
@@ -212,8 +212,8 @@
     </paper>
     <paper id="23">
       <title>Now Let’s Talk About Now; Identifying Cue Phrases Intonationally</title>
-      <author><first>Julia</first><last>Hirschberg</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <doi>10.3115/981175.981198</doi>
       <pages>163–171</pages>
       <url hash="d73d12c0">P87-1023</url>
@@ -221,7 +221,7 @@
     </paper>
     <paper id="24">
       <title>On the Acquisition of Lexical Entries: The Perceptual Origin of Thematic Relations</title>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <doi>10.3115/981175.981199</doi>
       <pages>172–178</pages>
       <url hash="fb18b402">P87-1024</url>
@@ -229,7 +229,7 @@
     </paper>
     <paper id="25">
       <title>The Logical Analysis of Lexical Ambiguity</title>
-      <author><first>David</first><last>Stallard</last></author>
+      <author id="david-stallard"><first>David</first><last>Stallard</last></author>
       <doi>10.3115/981175.981200</doi>
       <pages>179–185</pages>
       <url hash="fbb10931">P87-1025</url>
@@ -238,7 +238,7 @@
     <paper id="26">
       <title><fixed-case>FLUSH</fixed-case>: A Flexible Lexicon Design</title>
       <author><first>David J.</first><last>Besemer</last></author>
-      <author><first>Paul S.</first><last>Jacobs</last></author>
+      <author id="paul-s-jacobs"><first>Paul S.</first><last>Jacobs</last></author>
       <doi>10.3115/981175.981201</doi>
       <pages>186–192</pages>
       <url hash="43606086">P87-1026</url>
@@ -246,10 +246,10 @@
     </paper>
     <paper id="27">
       <title>The Derivation of a Grammatically Indexed Lexicon from the Longman Dictionary of Contemporary <fixed-case>E</fixed-case>nglish</title>
-      <author><first>Bran</first><last>Boguraev</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
-      <author><first>John</first><last>Carroll</last></author>
-      <author><first>David</first><last>Carter</last></author>
+      <author id="branimir-boguraev"><first>Bran</first><last>Boguraev</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
+      <author id="david-carter"><first>David</first><last>Carter</last></author>
       <author><first>Claire</first><last>Grover</last></author>
       <doi>10.3115/981175.981202</doi>
       <pages>193–200</pages>
@@ -258,8 +258,8 @@
     </paper>
     <paper id="28">
       <title>Lexical Selection in the Process of Language Generation</title>
-      <author><first>James</first><last>Pustejovsky</last></author>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <doi>10.3115/981175.981203</doi>
       <pages>201–206</pages>
       <url hash="1897b842">P87-1028</url>
@@ -269,7 +269,7 @@
       <title>Constraints on the Generation of Adjunct Clauses</title>
       <author><first>Alison K.</first><last>Huettner</last></author>
       <author><first>Marie M.</first><last>Vaughan</last></author>
-      <author><first>David D.</first><last>McDonald</last></author>
+      <author id="david-d-mcdonald"><first>David D.</first><last>McDonald</last></author>
       <doi>10.3115/981175.981204</doi>
       <pages>207–214</pages>
       <url hash="5628b06b">P87-1029</url>
@@ -301,7 +301,7 @@
     </paper>
     <paper id="33">
       <title>A Unification Method for Disjunctive Feature Descriptions</title>
-      <author><first>Robert T.</first><last>Kasper</last></author>
+      <author id="robert-t-kasper"><first>Robert T.</first><last>Kasper</last></author>
       <doi>10.3115/981175.981208</doi>
       <pages>235–242</pages>
       <url hash="ae3e4d68">P87-1033</url>
@@ -309,7 +309,7 @@
     </paper>
     <paper id="34">
       <title>Revised Generalized Phrase Structure Grammar</title>
-      <author><first>Eric Sven</first><last>Ristad</last></author>
+      <author id="eric-sven-ristad"><first>Eric Sven</first><last>Ristad</last></author>
       <doi>10.3115/981175.981209</doi>
       <pages>243–250</pages>
       <url hash="12ccbc15">P87-1034</url>
diff --git a/data/xml/P88.xml b/data/xml/P88.xml
index aacaa706fc..9f74a23ed5 100644
--- a/data/xml/P88.xml
+++ b/data/xml/P88.xml
@@ -16,7 +16,7 @@
     <paper id="1">
       <title>Adapting an <fixed-case>E</fixed-case>nglish Morphological Analyzer for <fixed-case>F</fixed-case>rench</title>
       <author><first>Roy J.</first><last>Byrd</last></author>
-      <author><first>Evelyne</first><last>Tzoukermann</last></author>
+      <author id="evelyne-tzoukermann"><first>Evelyne</first><last>Tzoukermann</last></author>
       <doi>10.3115/982023.982024</doi>
       <pages>1–6</pages>
       <url hash="61b7f916">P88-1001</url>
@@ -24,10 +24,10 @@
     </paper>
     <paper id="2">
       <title>Sentence Fragments Regular Structures</title>
-      <author><first>Marcia C.</first><last>Linebarger</last></author>
-      <author><first>Deborah A.</first><last>Dahl</last></author>
-      <author><first>Lynette</first><last>Hirschman</last></author>
-      <author><first>Rebecca J.</first><last>Passonneau</last></author>
+      <author id="marcia-c-linebarger"><first>Marcia C.</first><last>Linebarger</last></author>
+      <author id="deborah-a-dahl"><first>Deborah A.</first><last>Dahl</last></author>
+      <author id="lynette-hirschman"><first>Lynette</first><last>Hirschman</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca J.</first><last>Passonneau</last></author>
       <doi>10.3115/982023.982025</doi>
       <pages>7–16</pages>
       <url hash="43b55852">P88-1002</url>
@@ -35,8 +35,8 @@
     </paper>
     <paper id="3">
       <title>Multi-Level Plurals and Distributivity</title>
-      <author><first>Remko</first><last>Scha</last></author>
-      <author><first>David</first><last>Stallard</last></author>
+      <author id="remko-scha"><first>Remko</first><last>Scha</last></author>
+      <author id="david-stallard"><first>David</first><last>Stallard</last></author>
       <doi>10.3115/982023.982026</doi>
       <pages>17–24</pages>
       <url hash="4c1beba1">P88-1003</url>
@@ -45,7 +45,7 @@
     <paper id="4">
       <title>The Interpretation of Relational Nouns</title>
       <author><first>Jos</first><last>de Bruin</last></author>
-      <author><first>Remko</first><last>Scha</last></author>
+      <author id="remko-scha"><first>Remko</first><last>Scha</last></author>
       <doi>10.3115/982023.982027</doi>
       <pages>25–32</pages>
       <url hash="845badbc">P88-1004</url>
@@ -53,7 +53,7 @@
     </paper>
     <paper id="5">
       <title>Quantifier Scoping in the <fixed-case>SRI</fixed-case> Core Language Engine</title>
-      <author><first>Douglas B.</first><last>Moran</last></author>
+      <author id="douglas-b-moran"><first>Douglas B.</first><last>Moran</last></author>
       <doi>10.3115/982023.982028</doi>
       <pages>33–40</pages>
       <url hash="5c1a4bc2">P88-1005</url>
@@ -61,7 +61,7 @@
     </paper>
     <paper id="6">
       <title>A General Computational Treatment of Comparatives for Natural Language Question Answering</title>
-      <author><first>Bruce W.</first><last>Ballard</last></author>
+      <author id="bruce-w-ballard"><first>Bruce W.</first><last>Ballard</last></author>
       <doi>10.3115/982023.982029</doi>
       <pages>41–48</pages>
       <url hash="d16be232">P88-1006</url>
@@ -69,7 +69,7 @@
     </paper>
     <paper id="7">
       <title>Parsing and Interpreting Comparatives</title>
-      <author><first>Manny</first><last>Rayner</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
       <author><first>Amelie</first><last>Banks</last></author>
       <doi>10.3115/982023.982030</doi>
       <pages>49–60</pages>
@@ -95,7 +95,7 @@
     <paper id="10">
       <title>An Integrated Framework for Semantic and Pragmatic Interpretation</title>
       <author><first>Martha E.</first><last>Pollack</last></author>
-      <author><first>Fernando C.N.</first><last>Pereira</last></author>
+      <author id="fernando-c-n-pereira"><first>Fernando C.N.</first><last>Pereira</last></author>
       <doi>10.3115/982023.982033</doi>
       <pages>75–86</pages>
       <url hash="981e299a">P88-1010</url>
@@ -112,7 +112,7 @@
     </paper>
     <paper id="12">
       <title>Interpretation as Abduction</title>
-      <author><first>Jerry R.</first><last>Hobbs</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry R.</first><last>Hobbs</last></author>
       <author><first>Mark</first><last>Stickel</last></author>
       <author><first>Paul</first><last>Martin</last></author>
       <author><first>Douglas</first><last>Edwards</last></author>
@@ -125,7 +125,7 @@
       <title><fixed-case>P</fixed-case>roject <fixed-case>A</fixed-case>pril --- A Progress Report</title>
       <author><first>Robin</first><last>Haigh</last></author>
       <author><first>Geoffrey</first><last>Sampson</last></author>
-      <author><first>Eric</first><last>Atwell</last></author>
+      <author id="eric-atwell"><first>Eric</first><last>Atwell</last></author>
       <doi>10.3115/982023.982036</doi>
       <pages>104–112</pages>
       <url hash="4859a454">P88-1013</url>
@@ -133,7 +133,7 @@
     </paper>
     <paper id="14">
       <title>Discourse Deixis: Reference to Discourse Segments</title>
-      <author><first>Bonnie Lynn</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie Lynn</first><last>Webber</last></author>
       <doi>10.3115/982023.982037</doi>
       <pages>113–122</pages>
       <url hash="cc34ba88">P88-1014</url>
@@ -141,7 +141,7 @@
     </paper>
     <paper id="15">
       <title>Cues and control in Expert-Client Dialogues</title>
-      <author><first>Steve</first><last>Whittaker</last></author>
+      <author id="steve-whittaker"><first>Steve</first><last>Whittaker</last></author>
       <author><first>Phil</first><last>Stenton</last></author>
       <doi>10.3115/982023.982038</doi>
       <pages>123–130</pages>
@@ -150,7 +150,7 @@
     </paper>
     <paper id="16">
       <title>A Computational Theory of Perspective and Reference in Narrative</title>
-      <author><first>Janyce M.</first><last>Wiebe</last></author>
+      <author id="janyce-wiebe"><first>Janyce M.</first><last>Wiebe</last></author>
       <author><first>William J.</first><last>Rapaport</last></author>
       <doi>10.3115/982023.982039</doi>
       <pages>131–138</pages>
@@ -170,7 +170,7 @@
     </paper>
     <paper id="18">
       <title>Aspects of Clause Politeness in <fixed-case>J</fixed-case>apanese: An Extended Inquiry Semantics Treatment</title>
-      <author><first>John A.</first><last>Bateman</last></author>
+      <author id="john-bateman"><first>John A.</first><last>Bateman</last></author>
       <doi>10.3115/982023.982041</doi>
       <pages>147–154</pages>
       <url hash="d4b571b1">P88-1018</url>
@@ -180,8 +180,8 @@
       <title>Experiences With an On-Line Translating Dialogue System</title>
       <author><first>Seiji</first><last>Miike</last></author>
       <author><first>Koichi</first><last>Hasebe</last></author>
-      <author><first>Harold</first><last>Somers</last></author>
-      <author><first>Shin-ya</first><last>Amano</last></author>
+      <author id="harold-somers"><first>Harold</first><last>Somers</last></author>
+      <author id="shin-ya-amano"><first>Shin-ya</first><last>Amano</last></author>
       <doi>10.3115/982023.982042</doi>
       <pages>155–162</pages>
       <url hash="cba32790">P88-1019</url>
@@ -189,7 +189,7 @@
     </paper>
     <paper id="20">
       <title>Planning Coherent Multisentential Text</title>
-      <author><first>Eduard H.</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard H.</first><last>Hovy</last></author>
       <doi>10.3115/982023.982043</doi>
       <pages>163–169</pages>
       <url hash="ed1019ef">P88-1020</url>
@@ -197,7 +197,7 @@
     </paper>
     <paper id="21">
       <title>A Practical Nonmonotonic Theory for Reasoning about Speech Acts</title>
-      <author><first>Douglas</first><last>Appelt</last></author>
+      <author id="douglas-appelt"><first>Douglas</first><last>Appelt</last></author>
       <author><first>Kurt</first><last>Konolige</last></author>
       <doi>10.3115/982023.982044</doi>
       <pages>170–178</pages>
@@ -206,7 +206,7 @@
     </paper>
     <paper id="22">
       <title>Two Types of Planning in Language Generation</title>
-      <author><first>Eduard H.</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard H.</first><last>Hovy</last></author>
       <doi>10.3115/982023.982045</doi>
       <pages>179–186</pages>
       <url hash="9623a5d3">P88-1022</url>
@@ -214,8 +214,8 @@
     </paper>
     <paper id="23">
       <title>Assigning Intonational Features in Synthesized Spoken Directions</title>
-      <author><first>James Raymond</first><last>Davis</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
+      <author id="james-davis"><first>James Raymond</first><last>Davis</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
       <doi>10.3115/982023.982046</doi>
       <pages>187–193</pages>
       <url hash="f62eb1d5">P88-1023</url>
@@ -231,7 +231,7 @@
     </paper>
     <paper id="25">
       <title>Syntactic Approaches to Automatic Book Indexing</title>
-      <author><first>Gerard</first><last>Salton</last></author>
+      <author id="gerard-salton"><first>Gerard</first><last>Salton</last></author>
       <doi>10.3115/982023.982048</doi>
       <pages>204–210</pages>
       <url hash="523fb4cd">P88-1025</url>
@@ -239,7 +239,7 @@
     </paper>
     <paper id="26">
       <title>Lexicon and grammar in probabilistic tagging of written <fixed-case>E</fixed-case>nglish.</title>
-      <author><first>Andrew David</first><last>Beale</last></author>
+      <author id="andrew-david-beale"><first>Andrew David</first><last>Beale</last></author>
       <doi>10.3115/982023.982049</doi>
       <pages>211–216</pages>
       <url hash="ada916ce">P88-1026</url>
@@ -247,8 +247,8 @@
     </paper>
     <paper id="27">
       <title>Parsing vs. Text Processing in the Analysis of Dictionary Definitions</title>
-      <author><first>Thomas</first><last>Ahlswede</last></author>
-      <author><first>Martha</first><last>Evens</last></author>
+      <author id="thomas-ahlswede"><first>Thomas</first><last>Ahlswede</last></author>
+      <author id="martha-evens"><first>Martha</first><last>Evens</last></author>
       <doi>10.3115/982023.982050</doi>
       <pages>217–224</pages>
       <url hash="456cf838">P88-1027</url>
@@ -264,7 +264,7 @@
     </paper>
     <paper id="29">
       <title>Conditional Descriptions in Functional Unification Grammar</title>
-      <author><first>Robert T.</first><last>Kasper</last></author>
+      <author id="robert-t-kasper"><first>Robert T.</first><last>Kasper</last></author>
       <doi>10.3115/982023.982052</doi>
       <pages>233–240</pages>
       <url hash="274ee40e">P88-1029</url>
@@ -280,7 +280,7 @@
     </paper>
     <paper id="31">
       <title>Graph-structured Stack and Natural Language Parsing</title>
-      <author><first>Masaru</first><last>Tomita</last></author>
+      <author id="masaru-tomita"><first>Masaru</first><last>Tomita</last></author>
       <doi>10.3115/982023.982054</doi>
       <pages>249–257</pages>
       <url hash="a12980fb">P88-1031</url>
@@ -289,7 +289,7 @@
     <paper id="32">
       <title>An <fixed-case>E</fixed-case>arley-Type Parsing Algorithm for <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars</title>
       <author><first>Yves</first><last>Schabes</last></author>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
       <doi>10.3115/982023.982055</doi>
       <pages>258–269</pages>
       <url hash="0fd96d06">P88-1032</url>
@@ -305,8 +305,8 @@
     </paper>
     <paper id="34">
       <title><fixed-case>C</fixed-case>ombinatory <fixed-case>C</fixed-case>ategorial <fixed-case>G</fixed-case>rammars: Generative Power and Relationship to Linear Context-Free Rewriting Systems</title>
-      <author><first>David J.</first><last>Weir</last></author>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="david-weir"><first>David J.</first><last>Weir</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
       <doi>10.3115/982023.982057</doi>
       <pages>278–285</pages>
       <url hash="d3a3b11a">P88-1034</url>
@@ -314,8 +314,8 @@
     </paper>
     <paper id="35">
       <title>Unification of Disjunctive Feature Descriptions</title>
-      <author><first>Andreas</first><last>Eisele</last></author>
-      <author><first>Jochen</first><last>Dorre</last></author>
+      <author id="andreas-eisele"><first>Andreas</first><last>Eisele</last></author>
+      <author id="jochen-dorre"><first>Jochen</first><last>Dorre</last></author>
       <doi>10.3115/982023.982058</doi>
       <pages>286–294</pages>
       <url hash="3128aa48">P88-1035</url>
diff --git a/data/xml/P89.xml b/data/xml/P89.xml
index 14edcfdff8..73ba46cf9d 100644
--- a/data/xml/P89.xml
+++ b/data/xml/P89.xml
@@ -23,10 +23,10 @@
     </paper>
     <paper id="2">
       <title>A Semantic-Head-Driven Generation Algorithm for Unification-Based Formalisms</title>
-      <author><first>Stuart M.</first><last>Shieber</last></author>
-      <author><first>Gertjan</first><last>van Noord</last></author>
-      <author><first>Robert C.</first><last>Moore</last></author>
-      <author><first>Fernando C. N.</first><last>Pereira</last></author>
+      <author id="stuart-m-shieber"><first>Stuart M.</first><last>Shieber</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
+      <author id="robert-c-moore"><first>Robert C.</first><last>Moore</last></author>
+      <author id="fernando-c-n-pereira"><first>Fernando C. N.</first><last>Pereira</last></author>
       <doi>10.3115/981623.981625</doi>
       <pages>7–17</pages>
       <url hash="13a19cd3">P89-1002</url>
@@ -35,7 +35,7 @@
     <paper id="3">
       <title>A Three-Valued Interpretation of Negation in Feature Structure Descriptions</title>
       <author><first>Anuj</first><last>Dawar</last></author>
-      <author><first>K.</first><last>Vijay-Shanker</last></author>
+      <author id="k-vijay-shanker"><first>K.</first><last>Vijay-Shanker</last></author>
       <doi>10.3115/981623.981626</doi>
       <pages>18–24</pages>
       <url hash="b2047da4">P89-1003</url>
@@ -43,7 +43,7 @@
     </paper>
     <paper id="4">
       <title>Logical Forms in the Core Language Engine</title>
-      <author><first>Hiyan</first><last>Alshawi</last></author>
+      <author id="hiyan-alshawi"><first>Hiyan</first><last>Alshawi</last></author>
       <author><first>Jan</first><last>van Eijck</last></author>
       <doi>10.3115/981623.981627</doi>
       <pages>25–32</pages>
@@ -52,7 +52,7 @@
     </paper>
     <paper id="5">
       <title>Unification-Based Semantic Interpretation</title>
-      <author><first>Robert C.</first><last>Moore</last></author>
+      <author id="robert-c-moore"><first>Robert C.</first><last>Moore</last></author>
       <doi>10.3115/981623.981628</doi>
       <pages>33–41</pages>
       <url hash="a9ed8223">P89-1005</url>
@@ -61,7 +61,7 @@
     <paper id="6">
       <title>Reference to Locations</title>
       <author><first>Lewis G.</first><last>Creary</last></author>
-      <author><first>J. Mark</first><last>Gawron</last></author>
+      <author id="jean-mark-gawron"><first>J. Mark</first><last>Gawron</last></author>
       <author><first>John</first><last>Nerbonne</last></author>
       <doi>10.3115/981623.981629</doi>
       <pages>42–50</pages>
@@ -70,7 +70,7 @@
     </paper>
     <paper id="7">
       <title>Getting at Discourse Referents</title>
-      <author><first>Rebecca J.</first><last>Passonneau</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca J.</first><last>Passonneau</last></author>
       <doi>10.3115/981623.981630</doi>
       <pages>51–59</pages>
       <url hash="fc6490cc">P89-1007</url>
@@ -94,7 +94,7 @@
     </paper>
     <paper id="10">
       <title>Word Association Norms, Mutual Information, and Lexicography</title>
-      <author><first>Kenneth Ward</first><last>Church</last></author>
+      <author id="kenneth-church"><first>Kenneth Ward</first><last>Church</last></author>
       <author><first>Patrick</first><last>Hanks</last></author>
       <doi>10.3115/981623.981633</doi>
       <pages>76–83</pages>
@@ -103,7 +103,7 @@
     </paper>
     <paper id="11">
       <title>Lexical Access in Connected Speech Recognition</title>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <doi>10.3115/981623.981634</doi>
       <pages>84–90</pages>
       <url hash="f7e0b362">P89-1011</url>
@@ -111,8 +111,8 @@
     </paper>
     <paper id="12">
       <title>Dictionaries, Dictionary Grammars and Dictionary Entry Parsing</title>
-      <author><first>Mary S.</first><last>Neff</last></author>
-      <author><first>Branimir K.</first><last>Boguraev</last></author>
+      <author id="mary-s-neff"><first>Mary S.</first><last>Neff</last></author>
+      <author id="branimir-boguraev"><first>Branimir K.</first><last>Boguraev</last></author>
       <doi>10.3115/981623.981635</doi>
       <pages>91–101</pages>
       <url hash="ee3d0162">P89-1012</url>
@@ -120,7 +120,7 @@
     </paper>
     <paper id="13">
       <title>Some Chart-Based Techniques for Parsing Ill-Formed Input</title>
-      <author><first>Chris S.</first><last>Mellish</last></author>
+      <author id="chris-mellish"><first>Chris S.</first><last>Mellish</last></author>
       <doi>10.3115/981623.981636</doi>
       <pages>102–109</pages>
       <url hash="19311f04">P89-1013</url>
@@ -128,9 +128,9 @@
     </paper>
     <paper id="14">
       <title>On Representing Governed Prepositions and Handling “Incorrect” and Novel Prepositions</title>
-      <author><first>Hatte R.</first><last>Blejer</last></author>
+      <author id="hatte-blejer"><first>Hatte R.</first><last>Blejer</last></author>
       <author><first>Sharon</first><last>Flank</last></author>
-      <author><first>Andrew</first><last>Kehler</last></author>
+      <author id="andrew-kehler"><first>Andrew</first><last>Kehler</last></author>
       <doi>10.3115/981623.981637</doi>
       <pages>110–117</pages>
       <url hash="e230fae4">P89-1014</url>
@@ -138,7 +138,7 @@
     </paper>
     <paper id="15">
       <title>Acquiring Disambiguation Rules From Text</title>
-      <author><first>Donald</first><last>Hindle</last></author>
+      <author id="donald-hindle"><first>Donald</first><last>Hindle</last></author>
       <doi>10.3115/981623.981638</doi>
       <pages>118–125</pages>
       <url hash="91ddf932">P89-1015</url>
@@ -146,8 +146,8 @@
     </paper>
     <paper id="16">
       <title>The Effects of Interaction on Spoken Discourse</title>
-      <author><first>Sharon L.</first><last>Oviatt</last></author>
-      <author><first>Philip R.</first><last>Cohen</last></author>
+      <author id="sharon-oviatt"><first>Sharon L.</first><last>Oviatt</last></author>
+      <author id="philip-r-cohen"><first>Philip R.</first><last>Cohen</last></author>
       <doi>10.3115/981623.981639</doi>
       <pages>126–134</pages>
       <url hash="8fcec82e">P89-1016</url>
@@ -172,7 +172,7 @@
     </paper>
     <paper id="19">
       <title>A Calculus for Semantic Composition and Scoping</title>
-      <author><first>Fernando C.N.</first><last>Pereira</last></author>
+      <author id="fernando-c-n-pereira"><first>Fernando C.N.</first><last>Pereira</last></author>
       <doi>10.3115/981623.981642</doi>
       <pages>152–160</pages>
       <url hash="5af5be9a">P89-1019</url>
@@ -180,7 +180,7 @@
     </paper>
     <paper id="20">
       <title>A General Computational Treatment Of The Comparative</title>
-      <author><first>Carol</first><last>Friedman</last></author>
+      <author id="carol-friedman"><first>Carol</first><last>Friedman</last></author>
       <doi>10.3115/981623.981643</doi>
       <pages>161–168</pages>
       <url hash="da536cb0">P89-1020</url>
@@ -188,7 +188,7 @@
     </paper>
     <paper id="21">
       <title>The Lexical Semantics of Comparative Expressions in a Multi-Level Semantic Processor</title>
-      <author><first>Duane E.</first><last>Olawsky</last></author>
+      <author id="duane-e-olawsky"><first>Duane E.</first><last>Olawsky</last></author>
       <doi>10.3115/981623.981644</doi>
       <pages>169–176</pages>
       <url hash="9772dd70">P89-1021</url>
@@ -197,7 +197,7 @@
     <paper id="22">
       <title>Automatic Acquisition of the Lexical Semantics of Verbs From Sentence Frames</title>
       <author><first>Mort</first><last>Webster</last></author>
-      <author><first>Mitch</first><last>Marcus</last></author>
+      <author id="mitch-marcus"><first>Mitch</first><last>Marcus</last></author>
       <doi>10.3115/981623.981645</doi>
       <pages>177–184</pages>
       <url hash="61d23225">P89-1022</url>
@@ -205,8 +205,8 @@
     </paper>
     <paper id="23">
       <title>Computer Aided Interpretation of Lexical Cooccurrences</title>
-      <author><first>Paola</first><last>Velardi</last></author>
-      <author><first>Maria Teresa</first><last>Pazienza</last></author>
+      <author id="paola-velardi"><first>Paola</first><last>Velardi</last></author>
+      <author id="maria-teresa-pazienza"><first>Maria Teresa</first><last>Pazienza</last></author>
       <doi>10.3115/981623.981646</doi>
       <pages>185–192</pages>
       <url hash="30444c1b">P89-1023</url>
@@ -214,7 +214,7 @@
     </paper>
     <paper id="24">
       <title>A Hybrid Approach to Representation in the <fixed-case>J</fixed-case>anus Natural Language Processor</title>
-      <author><first>Ralph M.</first><last>Weischedel</last></author>
+      <author id="ralph-weischedel"><first>Ralph M.</first><last>Weischedel</last></author>
       <doi>10.3115/981623.981647</doi>
       <pages>193–202</pages>
       <url hash="36e5519e">P89-1024</url>
@@ -222,8 +222,8 @@
     </paper>
     <paper id="25">
       <title>Planning Text for Advisory Dialogues</title>
-      <author><first>Johanna D.</first><last>Moore</last></author>
-      <author><first>Cecile L.</first><last>Paris</last></author>
+      <author id="johanna-d-moore"><first>Johanna D.</first><last>Moore</last></author>
+      <author id="cecile-paris"><first>Cecile L.</first><last>Paris</last></author>
       <doi>10.3115/981623.981648</doi>
       <pages>203–211</pages>
       <url hash="ee7a3ad8">P89-1025</url>
@@ -231,8 +231,8 @@
     </paper>
     <paper id="26">
       <title>Two Constraints on Speech Act Ambiguity</title>
-      <author><first>Elizabeth A.</first><last>Hinkelman</last></author>
-      <author><first>James F.</first><last>Allen</last></author>
+      <author id="elizabeth-a-hinkelman"><first>Elizabeth A.</first><last>Hinkelman</last></author>
+      <author id="james-allen"><first>James F.</first><last>Allen</last></author>
       <doi>10.3115/981623.981649</doi>
       <pages>212–219</pages>
       <url hash="12b3bab4">P89-1026</url>
@@ -240,8 +240,8 @@
     </paper>
     <paper id="27">
       <title>Treatment of Long Distance Dependencies in <fixed-case>LFG</fixed-case> and <fixed-case>TAG</fixed-case>: Functional Uncertainty in <fixed-case>LFG</fixed-case> Is a Corollary in <fixed-case>TAG</fixed-case></title>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
-      <author><first>K.</first><last>Vijay-Shanker</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="k-vijay-shanker"><first>K.</first><last>Vijay-Shanker</last></author>
       <doi>10.3115/981623.981650</doi>
       <pages>220–227</pages>
       <url hash="2e942fdb">P89-1027</url>
@@ -257,7 +257,7 @@
     </paper>
     <paper id="29">
       <title>A Generalization of the Offline Parsable Grammars</title>
-      <author><first>Andrew</first><last>Haas</last></author>
+      <author id="andrew-haas"><first>Andrew</first><last>Haas</last></author>
       <doi>10.3115/981623.981652</doi>
       <pages>237–242</pages>
       <url hash="42eb6267">P89-1029</url>
@@ -265,7 +265,7 @@
     </paper>
     <paper id="30">
       <title>Discourse Entities in <fixed-case>J</fixed-case>anus</title>
-      <author><first>Damaris M.</first><last>Ayuso</last></author>
+      <author id="damaris-ayuso"><first>Damaris M.</first><last>Ayuso</last></author>
       <doi>10.3115/981623.981653</doi>
       <pages>243–250</pages>
       <url hash="3eebcc83">P89-1030</url>
@@ -273,7 +273,7 @@
     </paper>
     <paper id="31">
       <title>Evaluating Discourse Processing Algorithms</title>
-      <author><first>Marilyn A.</first><last>Walker</last></author>
+      <author id="marilyn-walker"><first>Marilyn A.</first><last>Walker</last></author>
       <doi>10.3115/981623.981654</doi>
       <pages>251–261</pages>
       <url hash="af6d5fb8">P89-1031</url>
@@ -282,7 +282,7 @@
     <paper id="32">
       <title>A Computational Mechanism for Pronominal Reference</title>
       <author><first>Robert J. P.</first><last>Ingria</last></author>
-      <author><first>David</first><last>Stallard</last></author>
+      <author id="david-stallard"><first>David</first><last>Stallard</last></author>
       <doi>10.3115/981623.981655</doi>
       <pages>262–271</pages>
       <url hash="6c7e3697">P89-1032</url>
@@ -301,7 +301,7 @@
       <author><first>Claire</first><last>Gardent</last></author>
       <author><first>Gabriel G.</first><last>Bias</last></author>
       <author><first>Pierre-Frangois</first><last>Jurie</last></author>
-      <author><first>Karine</first><last>Baschung</last></author>
+      <author id="karine-baschung"><first>Karine</first><last>Baschung</last></author>
       <doi>10.3115/981623.981657</doi>
       <pages>280–287</pages>
       <url hash="d35a4358">P89-1034</url>
diff --git a/data/xml/P90.xml b/data/xml/P90.xml
index 82e95578ad..862b75215e 100644
--- a/data/xml/P90.xml
+++ b/data/xml/P90.xml
@@ -15,8 +15,8 @@
     </frontmatter>
     <paper id="1">
       <title>Polynomial Time Parsing of <fixed-case>C</fixed-case>ombinatory <fixed-case>C</fixed-case>ategorial <fixed-case>G</fixed-case>rammars</title>
-      <author><first>K.</first><last>Vijay-Shanker</last></author>
-      <author><first>David J.</first><last>Weir</last></author>
+      <author id="k-vijay-shanker"><first>K.</first><last>Vijay-Shanker</last></author>
+      <author id="david-weir"><first>David J.</first><last>Weir</last></author>
       <doi>10.3115/981823.981824</doi>
       <pages>1–8</pages>
       <url hash="d3823382">P90-1001</url>
@@ -24,7 +24,7 @@
     </paper>
     <paper id="2">
       <title>Structure and Intonation in Spoken Language Understanding</title>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <doi>10.3115/981823.981825</doi>
       <pages>9–16</pages>
       <url hash="a220cf88">P90-1002</url>
@@ -32,8 +32,8 @@
     </paper>
     <paper id="3">
       <title>Prosody, Syntax and Parsing</title>
-      <author><first>John</first><last>Bear</last></author>
-      <author><first>Patti</first><last>Price</last></author>
+      <author id="john-bear"><first>John</first><last>Bear</last></author>
+      <author id="patti-price"><first>Patti</first><last>Price</last></author>
       <doi>10.3115/981823.981826</doi>
       <pages>17–22</pages>
       <url hash="09ac3d07">P90-1003</url>
@@ -42,7 +42,7 @@
     <paper id="4">
       <title>Empirical Study of Predictive Powers of Simple Attachment Schemes for Post-modifier Prepositional Phrases</title>
       <author><first>Greg</first><last>Whittemore</last></author>
-      <author><first>Kathleen</first><last>Ferrara</last></author>
+      <author id="kathleen-ferraro"><first>Kathleen</first><last>Ferrara</last></author>
       <author><first>Hans</first><last>Brunner</last></author>
       <doi>10.3115/981823.981827</doi>
       <pages>23–30</pages>
@@ -67,9 +67,9 @@
     </paper>
     <paper id="7">
       <title>Transforming Syntactic Graphs Into Semantic Graphs</title>
-      <author><first>Hae-Chang</first><last>Rim</last></author>
+      <author id="hae-chang-rim"><first>Hae-Chang</first><last>Rim</last></author>
       <author><first>Robert F.</first><last>Simmons</last></author>
-      <author><first>Jungyun</first><last>Seo</last></author>
+      <author id="jungyun-seo"><first>Jungyun</first><last>Seo</last></author>
       <doi>10.3115/981823.981830</doi>
       <pages>47–53</pages>
       <url hash="bde695f0">P90-1007</url>
@@ -85,7 +85,7 @@
     </paper>
     <paper id="9">
       <title>Designer Definites in Logical Form</title>
-      <author><first>Mary P.</first><last>Harper</last></author>
+      <author id="mary-harper"><first>Mary P.</first><last>Harper</last></author>
       <doi>10.3115/981823.981832</doi>
       <pages>62–69</pages>
       <url hash="674c1909">P90-1009</url>
@@ -93,8 +93,8 @@
     </paper>
     <paper id="10">
       <title>Mixed Initiative in Dialogue: An Investigation into Discourse Segmentation</title>
-      <author><first>Marilyn</first><last>Walker</last></author>
-      <author><first>Steve</first><last>Whittaker</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
+      <author id="steve-whittaker"><first>Steve</first><last>Whittaker</last></author>
       <doi>10.3115/981823.981833</doi>
       <pages>70–78</pages>
       <url hash="d14de285">P90-1010</url>
@@ -102,7 +102,7 @@
     </paper>
     <paper id="11">
       <title>Performatives in a Rationally Based Speech Act Theory</title>
-      <author><first>Philip R.</first><last>Cohen</last></author>
+      <author id="philip-r-cohen"><first>Philip R.</first><last>Cohen</last></author>
       <author><first>Hector J.</first><last>Levesque</last></author>
       <doi>10.3115/981823.981834</doi>
       <pages>79–88</pages>
@@ -111,7 +111,7 @@
     </paper>
     <paper id="12">
       <title>Normal State Implicature</title>
-      <author><first>Nancy L.</first><last>Green</last></author>
+      <author id="nancy-green"><first>Nancy L.</first><last>Green</last></author>
       <doi>10.3115/981823.981835</doi>
       <pages>89–96</pages>
       <url hash="3986511f">P90-1012</url>
@@ -151,7 +151,7 @@
     </paper>
     <paper id="17">
       <title>Solving Thematic Divergences in Machine Translation</title>
-      <author><first>Bonnie</first><last>Dorr</last></author>
+      <author id="bonnie-dorr"><first>Bonnie</first><last>Dorr</last></author>
       <doi>10.3115/981823.981840</doi>
       <pages>127–134</pages>
       <url hash="8ebda07c">P90-1017</url>
@@ -160,7 +160,7 @@
     <paper id="18">
       <title>A Syntactic Filter on Pronominal Anaphora for Slot Grammar</title>
       <author><first>Shalom</first><last>Lappin</last></author>
-      <author><first>Michael</first><last>McCord</last></author>
+      <author id="michael-c-mccord"><first>Michael</first><last>McCord</last></author>
       <doi>10.3115/981823.981841</doi>
       <pages>135–142</pages>
       <url hash="24e22dd9">P90-1018</url>
@@ -225,9 +225,9 @@
     </paper>
     <paper id="26">
       <title>Asymmetry in Parsing and Generating with Unification Grammars: Case Studies From <fixed-case>ELU</fixed-case></title>
-      <author><first>Graham</first><last>Russell</last></author>
-      <author><first>John</first><last>Carroll</last></author>
-      <author><first>Susan</first><last>Warwick</last></author>
+      <author id="graham-russell"><first>Graham</first><last>Russell</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
+      <author id="susan-armstrong"><first>Susan</first><last>Warwick</last></author>
       <doi>10.3115/981823.981849</doi>
       <pages>205–211</pages>
       <url hash="38e4683f">P90-1026</url>
@@ -235,7 +235,7 @@
     </paper>
     <paper id="27">
       <title>Automated Inversion of Logic Grammars for Generation</title>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
       <author><first>Ping</first><last>Peng</last></author>
       <doi>10.3115/981823.981850</doi>
       <pages>212–219</pages>
@@ -253,9 +253,9 @@
     </paper>
     <paper id="29">
       <title>Multiple Underlying Systems: Translating User Requests into Programs to Produce Answers</title>
-      <author><first>Robert J.</first><last>Bobrow</last></author>
+      <author id="robert-bobrow"><first>Robert J.</first><last>Bobrow</last></author>
       <author><first>Philip</first><last>Resnik</last></author>
-      <author><first>Ralph M.</first><last>Weischedel</last></author>
+      <author id="ralph-weischedel"><first>Ralph M.</first><last>Weischedel</last></author>
       <doi>10.3115/981823.981852</doi>
       <pages>227–234</pages>
       <url hash="51422006">P90-1029</url>
@@ -263,7 +263,7 @@
     </paper>
     <paper id="30">
       <title>Computational structure of generative phonology and its relation to language comprehension.</title>
-      <author><first>Eric Sven</first><last>Ristad</last></author>
+      <author id="eric-sven-ristad"><first>Eric Sven</first><last>Ristad</last></author>
       <doi>10.3115/981823.981853</doi>
       <pages>235–242</pages>
       <url hash="e061abc4">P90-1030</url>
@@ -271,7 +271,7 @@
     </paper>
     <paper id="31">
       <title>Parsing the <fixed-case>LOB</fixed-case> Corpus</title>
-      <author><first>Carl G.</first><last>de Marcken</last></author>
+      <author id="carl-de-marcken"><first>Carl G.</first><last>de Marcken</last></author>
       <doi>10.3115/981823.981854</doi>
       <pages>243–251</pages>
       <url hash="eb34cc51">P90-1031</url>
@@ -279,8 +279,8 @@
     </paper>
     <paper id="32">
       <title>Automatically Extracting and Representing Collocations for Language Generation</title>
-      <author><first>Frank A.</first><last>Smadja</last></author>
-      <author><first>Kathleen R.</first><last>McKeown</last></author>
+      <author id="frank-smadja"><first>Frank A.</first><last>Smadja</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen R.</first><last>McKeown</last></author>
       <doi>10.3115/981823.981855</doi>
       <pages>252–259</pages>
       <url hash="142c77c2">P90-1032</url>
@@ -288,7 +288,7 @@
     </paper>
     <paper id="33">
       <title>Disamibiguating and Interpreting Verb Definitions</title>
-      <author><first>Yael</first><last>Ravin</last></author>
+      <author id="yael-ravin"><first>Yael</first><last>Ravin</last></author>
       <doi>10.3115/981823.981856</doi>
       <pages>260–267</pages>
       <url hash="2ae1cf00">P90-1033</url>
@@ -296,7 +296,7 @@
     </paper>
     <paper id="34">
       <title>Noun Classification From Predicate-Argument Structures</title>
-      <author><first>Donald</first><last>Hindle</last></author>
+      <author id="donald-hindle"><first>Donald</first><last>Hindle</last></author>
       <doi>10.3115/981823.981857</doi>
       <pages>268–275</pages>
       <url hash="a697b365">P90-1034</url>
@@ -305,7 +305,7 @@
     <paper id="35">
       <title>Deterministic Left to Right Parsing of Tree Adjoining Languages</title>
       <author><first>Yves</first><last>Schabes</last></author>
-      <author><first>K.</first><last>Vijay-Shanker</last></author>
+      <author id="k-vijay-shanker"><first>K.</first><last>Vijay-Shanker</last></author>
       <doi>10.3115/981823.981858</doi>
       <pages>276–283</pages>
       <url hash="d5434d40">P90-1035</url>
@@ -321,7 +321,7 @@
     </paper>
     <paper id="37">
       <title>Lexical and Syntactic Rules in a <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammar</title>
-      <author><first>Anne</first><last>Abeille</last></author>
+      <author id="anne-abeille"><first>Anne</first><last>Abeille</last></author>
       <doi>10.3115/981823.981860</doi>
       <pages>292–298</pages>
       <url hash="291438d0">P90-1037</url>
diff --git a/data/xml/P91.xml b/data/xml/P91.xml
index 38ed3e3ce4..0f63d5f5f1 100644
--- a/data/xml/P91.xml
+++ b/data/xml/P91.xml
@@ -24,7 +24,7 @@
     <paper id="2">
       <title>Inclusion, Disjointness and Choice: The Logic of Linguistic Classification</title>
       <author><first>Bob</first><last>Carpenter</last></author>
-      <author><first>Carl</first><last>Pollard</last></author>
+      <author id="carl-pollard"><first>Carl</first><last>Pollard</last></author>
       <doi>10.3115/981344.981346</doi>
       <pages>9–16</pages>
       <url hash="a2e2a284">P91-1002</url>
@@ -43,7 +43,7 @@
     <paper id="4">
       <title>Toward a Plan-Based Understanding Model for Mixed-Initiative Dialogues</title>
       <author><first>Hiroaki</first><last>Kitano</last></author>
-      <author><first>Carol</first><last>Van Ess-Dykema</last></author>
+      <author id="carol-van-ess-dykema"><first>Carol</first><last>Van Ess-Dykema</last></author>
       <doi>10.3115/981344.981348</doi>
       <pages>25–32</pages>
       <url hash="ada2ecb4">P91-1004</url>
@@ -51,7 +51,7 @@
     </paper>
     <paper id="5">
       <title>An Algorithm for Plan Recognition in Collaborative Discourse</title>
-      <author><first>Karen E.</first><last>Lochbaum</last></author>
+      <author id="karen-e-lochbaum"><first>Karen E.</first><last>Lochbaum</last></author>
       <doi>10.3115/981344.981349</doi>
       <pages>33–38</pages>
       <url hash="7a81e510">P91-1005</url>
@@ -59,7 +59,7 @@
     </paper>
     <paper id="6">
       <title>A Three-Level Model for Plan Exploration</title>
-      <author><first>Lance A.</first><last>Ramshaw</last></author>
+      <author id="lance-ramshaw"><first>Lance A.</first><last>Ramshaw</last></author>
       <doi>10.3115/981344.981350</doi>
       <pages>39–46</pages>
       <url hash="4de806be">P91-1006</url>
@@ -68,7 +68,7 @@
     <paper id="7">
       <title>A Tripartite Plan-Based Model of Dialogue</title>
       <author><first>Lynn</first><last>Lambert</last></author>
-      <author><first>Sandra</first><last>Carberry</last></author>
+      <author id="sandra-carberry"><first>Sandra</first><last>Carberry</last></author>
       <doi>10.3115/981344.981351</doi>
       <pages>47–54</pages>
       <url hash="4df0e470">P91-1007</url>
@@ -77,7 +77,7 @@
     <paper id="8">
       <title>Discourse Relations and Defeasible Knowledge</title>
       <author><first>Alex</first><last>Lascarides</last></author>
-      <author><first>Nicholas</first><last>Asher</last></author>
+      <author id="nicholas-asher"><first>Nicholas</first><last>Asher</last></author>
       <doi>10.3115/981344.981352</doi>
       <pages>55–62</pages>
       <url hash="9b02d0ce">P91-1008</url>
@@ -85,7 +85,7 @@
     </paper>
     <paper id="9">
       <title>Some Facts About Centers, Indexicals, and Demonstratives</title>
-      <author><first>Rebecca J.</first><last>Passonneau</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca J.</first><last>Passonneau</last></author>
       <doi>10.3115/981344.981353</doi>
       <pages>63–70</pages>
       <url hash="87350fec">P91-1009</url>
@@ -93,7 +93,7 @@
     </paper>
     <paper id="10">
       <title>Type-Raising and Directionality in Combinatory Grammar</title>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <doi>10.3115/981344.981354</doi>
       <pages>71–78</pages>
       <url hash="2c0690af">P91-1010</url>
@@ -109,7 +109,7 @@
     </paper>
     <paper id="12">
       <title>Compose-Reduce Parsing</title>
-      <author><first>Henry S.</first><last>Thompson</last></author>
+      <author id="henry-s-thompson"><first>Henry S.</first><last>Thompson</last></author>
       <author><first>Mike</first><last>Dixon</last></author>
       <author><first>John</first><last>Lamping</last></author>
       <doi>10.3115/981344.981356</doi>
@@ -135,7 +135,7 @@
     </paper>
     <paper id="15">
       <title>Head Corner Parsing for Discontinuous Constituency</title>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <doi>10.3115/981344.981359</doi>
       <pages>114–121</pages>
       <url hash="3a1657ac">P91-1015</url>
@@ -171,9 +171,9 @@
     <paper id="19">
       <title>Subject-Dependent Co-Occurrence and Word Sense Disambiguation</title>
       <author><first>Joe A.</first><last>Guthriee</last></author>
-      <author><first>Louise</first><last>Guthrie</last></author>
+      <author id="louise-guthrie"><first>Louise</first><last>Guthrie</last></author>
       <author><first>Homa</first><last>Aidinejad</last></author>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <doi>10.3115/981344.981363</doi>
       <pages>146–152</pages>
       <url hash="233b92fd">P91-1019</url>
@@ -182,7 +182,7 @@
     <paper id="20">
       <title>A System for Translating Locative Prepositions From <fixed-case>E</fixed-case>nglish Into <fixed-case>F</fixed-case>rench</title>
       <author><first>Nathalie</first><last>Japkowicz</last></author>
-      <author><first>Janyce M.</first><last>Wiebe</last></author>
+      <author id="janyce-wiebe"><first>Janyce M.</first><last>Wiebe</last></author>
       <doi>10.3115/981344.981364</doi>
       <pages>153–160</pages>
       <url hash="ae7e5aa1">P91-1020</url>
@@ -190,10 +190,10 @@
     </paper>
     <paper id="21">
       <title>Translation by Quasi Logical Form Transfer</title>
-      <author><first>Hiyan</first><last>Alshawi</last></author>
-      <author><first>David</first><last>Carter</last></author>
-      <author><first>Manny</first><last>Rayner</last></author>
-      <author><first>Bjorn</first><last>Gamback</last></author>
+      <author id="hiyan-alshawi"><first>Hiyan</first><last>Alshawi</last></author>
+      <author id="david-carter"><first>David</first><last>Carter</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
+      <author id="bjorn-gamback"><first>Bjorn</first><last>Gamback</last></author>
       <doi>10.3115/981344.981365</doi>
       <pages>161–168</pages>
       <url hash="fb1a6cab">P91-1021</url>
@@ -201,9 +201,9 @@
     </paper>
     <paper id="22">
       <title>Aligning Sentences in Parallel Corpora</title>
-      <author><first>Peter F.</first><last>Brown</last></author>
-      <author><first>Jennifer C.</first><last>Lai</last></author>
-      <author><first>Robert L.</first><last>Mercer</last></author>
+      <author id="peter-f-brown"><first>Peter F.</first><last>Brown</last></author>
+      <author id="jennifer-c-lai"><first>Jennifer C.</first><last>Lai</last></author>
+      <author id="robert-l-mercer"><first>Robert L.</first><last>Mercer</last></author>
       <doi>10.3115/981344.981366</doi>
       <pages>169–176</pages>
       <url hash="fcbec16e">P91-1022</url>
@@ -211,8 +211,8 @@
     </paper>
     <paper id="23">
       <title>A Program for Aligning Sentences in Bilingual Corpora</title>
-      <author><first>William A.</first><last>Gale</last></author>
-      <author><first>Kenneth W.</first><last>Church</last></author>
+      <author id="william-a-gale"><first>William A.</first><last>Gale</last></author>
+      <author id="kenneth-church"><first>Kenneth W.</first><last>Church</last></author>
       <doi>10.3115/981344.981367</doi>
       <pages>177–184</pages>
       <url hash="b8a4e19c">P91-1023</url>
@@ -220,7 +220,7 @@
     </paper>
     <paper id="24">
       <title>Experiments and Prospects of Example-Based Machine Translation</title>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Hitoshi</first><last>Iida</last></author>
       <doi>10.3115/981344.981368</doi>
       <pages>185–192</pages>
@@ -255,9 +255,9 @@
     </paper>
     <paper id="28">
       <title>Multiple Default Inheritance in a Unification-Based Lexicon</title>
-      <author><first>Graham</first><last>Russell</last></author>
-      <author><first>John</first><last>Carroll</last></author>
-      <author><first>Susan</first><last>Warwick-Armstrong</last></author>
+      <author id="graham-russell"><first>Graham</first><last>Russell</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
+      <author id="susan-armstrong"><first>Susan</first><last>Warwick-Armstrong</last></author>
       <doi>10.3115/981344.981372</doi>
       <pages>215–221</pages>
       <url hash="5fbf6ff8">P91-1028</url>
@@ -273,7 +273,7 @@
     </paper>
     <paper id="30">
       <title>Structural Ambiguity and Lexical Relations</title>
-      <author><first>Donald</first><last>Hindle</last></author>
+      <author id="donald-hindle"><first>Donald</first><last>Hindle</last></author>
       <author><first>Mats</first><last>Rooth</last></author>
       <doi>10.3115/981344.981374</doi>
       <pages>229–236</pages>
@@ -290,7 +290,7 @@
     </paper>
     <paper id="32">
       <title>Finite-State Approximation of Phrase Structure Grammars</title>
-      <author><first>Fernando C. N.</first><last>Pereira</last></author>
+      <author id="fernando-c-n-pereira"><first>Fernando C. N.</first><last>Pereira</last></author>
       <doi>10.3115/981344.981376</doi>
       <pages>246–255</pages>
       <url hash="5436e0c6">P91-1032</url>
@@ -298,7 +298,7 @@
     </paper>
     <paper id="33">
       <title>Feature Logic With Weak Subsumption Constraints</title>
-      <author><first>Jochen</first><last>Dorre</last></author>
+      <author id="jochen-dorre"><first>Jochen</first><last>Dorre</last></author>
       <doi>10.3115/981344.981377</doi>
       <pages>256–263</pages>
       <url hash="200e8876">P91-1033</url>
@@ -306,10 +306,10 @@
     </paper>
     <paper id="34">
       <title>Word-Sense Disambiguation Using Statistical Methods</title>
-      <author><first>Peter F.</first><last>Brown</last></author>
-      <author><first>Stephen A.</first><last>Della Pietra</last></author>
-      <author><first>Vincent J.</first><last>Della Pietra</last></author>
-      <author><first>Robert L.</first><last>Mercer</last></author>
+      <author id="peter-f-brown"><first>Peter F.</first><last>Brown</last></author>
+      <author id="stephen-a-della-pietra"><first>Stephen A.</first><last>Della Pietra</last></author>
+      <author id="vincent-j-della-pietra"><first>Vincent J.</first><last>Della Pietra</last></author>
+      <author id="robert-l-mercer"><first>Robert L.</first><last>Mercer</last></author>
       <doi>10.3115/981344.981378</doi>
       <pages>264–270</pages>
       <url hash="f540a326">P91-1034</url>
@@ -317,7 +317,7 @@
     </paper>
     <paper id="35">
       <title>A Stochastic Process for Word Frequency Distributions</title>
-      <author><first>Harald</first><last>Baayen</last></author>
+      <author id="harald-baayen"><first>Harald</first><last>Baayen</last></author>
       <doi>10.3115/981344.981379</doi>
       <pages>271–278</pages>
       <url hash="f05a7ffa">P91-1035</url>
@@ -325,7 +325,7 @@
     </paper>
     <paper id="36">
       <title>From N-Grams to Collocations: An Evaluation of <fixed-case>X</fixed-case>tract</title>
-      <author><first>Frank A.</first><last>Smadja</last></author>
+      <author id="frank-smadja"><first>Frank A.</first><last>Smadja</last></author>
       <doi>10.3115/981344.981380</doi>
       <pages>279–284</pages>
       <url hash="ad428894">P91-1036</url>
@@ -333,7 +333,7 @@
     </paper>
     <paper id="37">
       <title>Predicting Intonational Phrasing From Text</title>
-      <author><first>Michelle Q.</first><last>Wang</last></author>
+      <author id="michelle-q-wang"><first>Michelle Q.</first><last>Wang</last></author>
       <doi>10.3115/981344.981381</doi>
       <pages>285–292</pages>
       <url hash="0f7b1942">P91-1037</url>
@@ -342,8 +342,8 @@
     <paper id="38">
       <title>A Preference-first Language Processor: Integrating the Unification Grammar and <fixed-case>M</fixed-case>arkov Language Model for Speech Recognition Applications</title>
       <author><first>Lee-Feng</first><last>Chien</last></author>
-      <author><first>K. J.</first><last>Chen</last></author>
-      <author><first>Lin-Shan</first><last>Lee</last></author>
+      <author id="keh-jiann-chen"><first>K. J.</first><last>Chen</last></author>
+      <author id="lin-shan-lee"><first>Lin-Shan</first><last>Lee</last></author>
       <doi>10.3115/981344.981382</doi>
       <pages>293–298</pages>
       <url hash="0c83b5ad">P91-1038</url>
@@ -351,7 +351,7 @@
     </paper>
     <paper id="39">
       <title>Factorization of Language Constraints in Speech Recognition</title>
-      <author><first>Roberto</first><last>Pieraccini</last></author>
+      <author id="roberto-pieraccini"><first>Roberto</first><last>Pieraccini</last></author>
       <author><first>Chin-Hui</first><last>Lee</last></author>
       <doi>10.3115/981344.981383</doi>
       <pages>299–306</pages>
@@ -376,7 +376,7 @@
     </paper>
     <paper id="42">
       <title>Unification With Lazy Non-Redundant Copying</title>
-      <author><first>Martin C.</first><last>Emele</last></author>
+      <author id="martin-c-emele"><first>Martin C.</first><last>Emele</last></author>
       <doi>10.3115/981344.981386</doi>
       <pages>323–330</pages>
       <url hash="50de5fba">P91-1042</url>
@@ -441,7 +441,7 @@
     </paper>
     <paper id="50">
       <title>Collaborating on Referring Expressions</title>
-      <author><first>Peter A.</first><last>Heeman</last></author>
+      <author id="peter-a-heeman"><first>Peter A.</first><last>Heeman</last></author>
       <doi>10.3115/981344.981395</doi>
       <pages>345–346</pages>
       <url hash="17e929db">P91-1050</url>
@@ -465,7 +465,7 @@
     </paper>
     <paper id="53">
       <title>Resolving a Pragmatic Prepositional Phrase Attachment Ambiguity</title>
-      <author><first>Christine H.</first><last>Nakatani</last></author>
+      <author id="christine-h-nakatani"><first>Christine H.</first><last>Nakatani</last></author>
       <doi>10.3115/981344.981398</doi>
       <pages>351–352</pages>
       <url hash="749777a5">P91-1053</url>
@@ -489,7 +489,7 @@
     </paper>
     <paper id="56">
       <title>An Incremental Connectionist Phrase Structure Parser</title>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <doi>10.3115/981344.981401</doi>
       <pages>357–358</pages>
       <url hash="044d9195">P91-1056</url>
diff --git a/data/xml/P92.xml b/data/xml/P92.xml
index 5e24ff35b5..f67111aae6 100644
--- a/data/xml/P92.xml
+++ b/data/xml/P92.xml
@@ -16,8 +16,8 @@
     <paper id="1">
       <title>Inferring Discourse Relations in Context</title>
       <author><first>Alex</first><last>Lascarides</last></author>
-      <author><first>Nicholas</first><last>Asher</last></author>
-      <author><first>Jon</first><last>Oberlander</last></author>
+      <author id="nicholas-asher"><first>Nicholas</first><last>Asher</last></author>
+      <author id="jon-oberlander"><first>Jon</first><last>Oberlander</last></author>
       <doi>10.3115/981967.981968</doi>
       <pages>1–8</pages>
       <url hash="1fcf6e40">P92-1001</url>
@@ -42,7 +42,7 @@
     </paper>
     <paper id="4">
       <title>The Representation of Multimodal User Interface Dialogues Using Discourse Pegs</title>
-      <author><first>Susann</first><last>Luperfoy</last></author>
+      <author id="susann-luperfoy"><first>Susann</first><last>Luperfoy</last></author>
       <doi>10.3115/981967.981971</doi>
       <pages>22–31</pages>
       <url hash="688857ca">P92-1004</url>
@@ -50,7 +50,7 @@
     </paper>
     <paper id="5">
       <title>Monotonic Semantic Interpretation</title>
-      <author><first>Hiyan</first><last>Alshawi</last></author>
+      <author id="hiyan-alshawi"><first>Hiyan</first><last>Alshawi</last></author>
       <author><first>Richard</first><last>Crouch</last></author>
       <doi>10.3115/981967.981972</doi>
       <pages>32–39</pages>
@@ -59,7 +59,7 @@
     </paper>
     <paper id="6">
       <title>Efficiency, Robustness and Accuracy in picky Chart Parsing</title>
-      <author><first>David M.</first><last>Magerman</last></author>
+      <author id="david-m-magerman"><first>David M.</first><last>Magerman</last></author>
       <author><first>Carl</first><last>Weir</last></author>
       <doi>10.3115/981967.981973</doi>
       <pages>40–47</pages>
@@ -68,8 +68,8 @@
     </paper>
     <paper id="7">
       <title>A Functional Approach to Generation with <fixed-case>TAG</fixed-case></title>
-      <author><first>Kathleen F.</first><last>McCoy</last></author>
-      <author><first>K.</first><last>Vijay-Shanker</last></author>
+      <author id="kathleen-f-mccoy"><first>Kathleen F.</first><last>McCoy</last></author>
+      <author id="k-vijay-shanker"><first>K.</first><last>Vijay-Shanker</last></author>
       <author><first>Gijoo</first><last>Yang</last></author>
       <doi>10.3115/981967.981974</doi>
       <pages>48–55</pages>
@@ -78,9 +78,9 @@
     </paper>
     <paper id="8">
       <title>Integrating Multiple Knowledge Sources for Detection and Correction of Repairs in Human-Computer Dialog</title>
-      <author><first>John</first><last>Bear</last></author>
-      <author><first>John</first><last>Dowding</last></author>
-      <author><first>Elizabeth</first><last>Shriberg</last></author>
+      <author id="john-bear"><first>John</first><last>Bear</last></author>
+      <author id="john-dowding"><first>John</first><last>Dowding</last></author>
+      <author id="elizabeth-shriberg"><first>Elizabeth</first><last>Shriberg</last></author>
       <doi>10.3115/981967.981975</doi>
       <pages>56–63</pages>
       <url hash="6b65a1ba">P92-1008</url>
@@ -88,8 +88,8 @@
     </paper>
     <paper id="9">
       <title>Conversational Implicatures in Indirect Replies</title>
-      <author><first>Nancy</first><last>Green</last></author>
-      <author><first>Sandra</first><last>Carberry</last></author>
+      <author id="nancy-green"><first>Nancy</first><last>Green</last></author>
+      <author id="sandra-carberry"><first>Sandra</first><last>Carberry</last></author>
       <doi>10.3115/981967.981976</doi>
       <pages>64–71</pages>
       <url hash="bfb58e56">P92-1009</url>
@@ -98,7 +98,7 @@
     <paper id="10">
       <title>Reasoning with Descriptions of Trees</title>
       <author><first>James</first><last>Rogers</last></author>
-      <author><first>K.</first><last>Vijay-Shanker</last></author>
+      <author id="k-vijay-shanker"><first>K.</first><last>Vijay-Shanker</last></author>
       <doi>10.3115/981967.981977</doi>
       <pages>72–80</pages>
       <url hash="82e87c41">P92-1010</url>
@@ -106,8 +106,8 @@
     </paper>
     <paper id="11">
       <title>Comparing Two Grammar-Based Generation Algorithms: A Case Study</title>
-      <author><first>Miroslav</first><last>Martinovic</last></author>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="miroslav-martinovic"><first>Miroslav</first><last>Martinovic</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
       <doi>10.3115/981967.981978</doi>
       <pages>81–88</pages>
       <url hash="294c18de">P92-1011</url>
@@ -123,7 +123,7 @@
     </paper>
     <paper id="13">
       <title>Accommodating Context Change</title>
-      <author><first>Bonnie Lynn</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie Lynn</first><last>Webber</last></author>
       <author><first>Breck</first><last>Baldwin</last></author>
       <doi>10.3115/981967.981980</doi>
       <pages>96–103</pages>
@@ -132,7 +132,7 @@
     </paper>
     <paper id="14">
       <title>Information Retrieval Using Robust Natural Language Processing</title>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
       <author><first>Barbara</first><last>Vauthey</last></author>
       <doi>10.3115/981967.981981</doi>
       <pages>104–111</pages>
@@ -167,7 +167,7 @@
     </paper>
     <paper id="18">
       <title>Linear Context-Free Rewriting Systems and Deterministic Tree-Walking Transducers</title>
-      <author><first>David J.</first><last>Weir</last></author>
+      <author id="david-weir"><first>David J.</first><last>Weir</last></author>
       <doi>10.3115/981967.981985</doi>
       <pages>136–143</pages>
       <url hash="3b3c58a5">P92-1018</url>
@@ -175,7 +175,7 @@
     </paper>
     <paper id="19">
       <title>A Connectionist Parser for Structure Unification Grammar</title>
-      <author><first>James B.</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James B.</first><last>Henderson</last></author>
       <doi>10.3115/981967.981986</doi>
       <pages>144–151</pages>
       <url hash="43c541ab">P92-1019</url>
@@ -184,7 +184,7 @@
     <paper id="20">
       <title>Would <fixed-case>I</fixed-case> Lie to You? Modelling Misrepresentation and Context in Dialogue</title>
       <author><first>Carl</first><last>Gutwin</last></author>
-      <author><first>Gordon</first><last>McCalla</last></author>
+      <author id="gordon-i-mccalla"><first>Gordon</first><last>McCalla</last></author>
       <doi>10.3115/981967.981987</doi>
       <pages>152–158</pages>
       <url hash="9d7c51cf">P92-1020</url>
@@ -192,7 +192,7 @@
     </paper>
     <paper id="21">
       <title>Lattice-Based Word Identification in <fixed-case>CLARE</fixed-case></title>
-      <author><first>David M.</first><last>Carter</last></author>
+      <author id="david-carter"><first>David M.</first><last>Carter</last></author>
       <doi>10.3115/981967.981988</doi>
       <pages>159–166</pages>
       <url hash="9730e80e">P92-1021</url>
@@ -201,7 +201,7 @@
     <paper id="22">
       <title>An Alternative Conception of Tree-Adjoining Derivation</title>
       <author><first>Yves</first><last>Schabes</last></author>
-      <author><first>Stuart M.</first><last>Shieber</last></author>
+      <author id="stuart-m-shieber"><first>Stuart M.</first><last>Shieber</last></author>
       <doi>10.3115/981967.981989</doi>
       <pages>167–176</pages>
       <url hash="6f402e24">P92-1022</url>
@@ -219,9 +219,9 @@
     </paper>
     <paper id="24">
       <title>Development and Evaluation of a Broad-Coverage Probabilistic Grammar of <fixed-case>E</fixed-case>nglish-Language Computer Manuals</title>
-      <author><first>Ezra</first><last>Black</last></author>
-      <author><first>John</first><last>Lafferty</last></author>
-      <author><first>Salim</first><last>Roukos</last></author>
+      <author id="ezra-black"><first>Ezra</first><last>Black</last></author>
+      <author id="john-lafferty"><first>John</first><last>Lafferty</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
       <doi>10.3115/981967.981991</doi>
       <pages>185–192</pages>
       <url hash="fd4cec9e">P92-1024</url>
@@ -230,7 +230,7 @@
     <paper id="25">
       <title>Modeling Negotiation Subdialogues</title>
       <author><first>Lynn</first><last>Lambert</last></author>
-      <author><first>Sandra</first><last>Carberry</last></author>
+      <author id="sandra-carberry"><first>Sandra</first><last>Carberry</last></author>
       <doi>10.3115/981967.981992</doi>
       <pages>193–200</pages>
       <url hash="1ef0b3f5">P92-1025</url>
@@ -248,7 +248,7 @@
     </paper>
     <paper id="27">
       <title>A Unification-Based Semantic Interpretation for Coordinate Constructs</title>
-      <author><first>Jong C.</first><last>Park</last></author>
+      <author id="jong-c-park"><first>Jong C.</first><last>Park</last></author>
       <doi>10.3115/981967.981994</doi>
       <pages>209–215</pages>
       <url hash="5837cf51">P92-1027</url>
@@ -256,7 +256,7 @@
     </paper>
     <paper id="28">
       <title>Corpus-Based Acquisition of Relative Pronoun Disambiguation Heuristics</title>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <doi>10.3115/981967.981995</doi>
       <pages>216–223</pages>
       <url hash="340dc505">P92-1028</url>
@@ -266,7 +266,7 @@
       <title>Association-based Natural Language Processing with Neural Networks</title>
       <author><last>Kimura</last><first>Kazuhiro</first></author>
       <author><last>Uzuoka</last><first>Takashi</first></author>
-      <author><last>Amano</last><first>Sin-ya</first></author>
+      <author id="shin-ya-amano"><first>Sin-ya</first><last>Amano</last></author>
       <doi>10.3115/981967.981996</doi>
       <pages>224–231</pages>
       <url hash="2ad178b4">P92-1029</url>
@@ -275,7 +275,7 @@
     <paper id="30">
       <title>Tense Trees as the “Fine Structure” of Discourse</title>
       <author><first>Chung Hee</first><last>Hwang</last></author>
-      <author><first>Lenhart K.</first><last>Schubert</last></author>
+      <author id="lenhart-schubert"><first>Lenhart K.</first><last>Schubert</last></author>
       <doi>10.3115/981967.981997</doi>
       <pages>232–240</pages>
       <url hash="9475688c">P92-1030</url>
@@ -291,8 +291,8 @@
     </paper>
     <paper id="32">
       <title>Estimating Upper and Lower Bounds on the Performance of Word-Sense Disambiguation Programs</title>
-      <author><first>William</first><last>Gale</last></author>
-      <author><first>Kenneth Ward</first><last>Church</last></author>
+      <author id="william-a-gale"><first>William</first><last>Gale</last></author>
+      <author id="kenneth-church"><first>Kenneth Ward</first><last>Church</last></author>
       <author><first>David</first><last>Yarowsky</last></author>
       <doi>10.3115/981967.981999</doi>
       <pages>249–256</pages>
@@ -301,7 +301,7 @@
     </paper>
     <paper id="33">
       <title>A Parameterized Approach to Integrating Aspect With Lexical-Semantics for Machine Translation</title>
-      <author><first>Bonnie J.</first><last>Dorr</last></author>
+      <author id="bonnie-dorr"><first>Bonnie J.</first><last>Dorr</last></author>
       <doi>10.3115/981967.982000</doi>
       <pages>257–264</pages>
       <url hash="a799d518">P92-1033</url>
@@ -310,7 +310,7 @@
     <paper id="34">
       <title>Using Classification to Generate Text</title>
       <author><first>Ehud</first><last>Reiter</last></author>
-      <author><first>Chris</first><last>Mellish</last></author>
+      <author id="chris-mellish"><first>Chris</first><last>Mellish</last></author>
       <doi>10.3115/981967.982001</doi>
       <pages>265–272</pages>
       <url hash="dd80c6ef">P92-1034</url>
@@ -398,7 +398,7 @@
     </paper>
     <paper id="45">
       <title>Information States as First Class Citizens</title>
-      <author><first>Jorgen</first><last>Villadsen</last></author>
+      <author id="jorgen-villadsen"><first>Jorgen</first><last>Villadsen</last></author>
       <doi>10.3115/981967.982013</doi>
       <pages>303–305</pages>
       <url hash="83a35b82">P92-1045</url>
@@ -414,8 +414,8 @@
     </paper>
     <paper id="47">
       <title>Metonymy: Reassessment, Survey of Acceptability, and Its Treatment in a Machine Translation System</title>
-      <author><first>Shin-ichiro</first><last>Kamei</last></author>
-      <author><first>Takahiro</first><last>Wakao</last></author>
+      <author id="shin-ichiro-kamei"><first>Shin-ichiro</first><last>Kamei</last></author>
+      <author id="takahiro-wakao"><first>Takahiro</first><last>Wakao</last></author>
       <doi>10.3115/981967.982015</doi>
       <pages>309–311</pages>
       <url hash="0d6db465">P92-1047</url>
@@ -423,7 +423,7 @@
     </paper>
     <paper id="48">
       <title>A Basis for a Formalization of Linguistic Style</title>
-      <author><first>Stephen J.</first><last>Green</last></author>
+      <author id="stephen-j-green"><first>Stephen J.</first><last>Green</last></author>
       <doi>10.3115/981967.982016</doi>
       <pages>312–314</pages>
       <url hash="e524d7d9">P92-1048</url>
@@ -431,7 +431,7 @@
     </paper>
     <paper id="49">
       <title>Elaboration in Object Descriptions Through Examples</title>
-      <author><first>Vibhu O.</first><last>Mittal</last></author>
+      <author id="vibhu-o-mittal"><first>Vibhu O.</first><last>Mittal</last></author>
       <doi>10.3115/981967.982017</doi>
       <pages>315–317</pages>
       <url hash="effe7519">P92-1049</url>
@@ -447,7 +447,7 @@
     </paper>
     <paper id="51">
       <title>Generating a Specific Class of Metaphors</title>
-      <author><first>Mark Alan</first><last>Jones</last></author>
+      <author id="mark-jones"><first>Mark Alan</first><last>Jones</last></author>
       <doi>10.3115/981967.982019</doi>
       <pages>321–323</pages>
       <url hash="e5c44d3d">P92-1051</url>
diff --git a/data/xml/P93.xml b/data/xml/P93.xml
index 0421d1769a..e0c4815796 100644
--- a/data/xml/P93.xml
+++ b/data/xml/P93.xml
@@ -15,7 +15,7 @@
     </frontmatter>
     <paper id="1">
       <title>Char_align: A Program for Aligning Parallel Texts at the Character Level</title>
-      <author><first>Kenneth Ward</first><last>Church</last></author>
+      <author id="kenneth-church"><first>Kenneth Ward</first><last>Church</last></author>
       <doi>10.3115/981574.981575</doi>
       <pages>1–8</pages>
       <url hash="270e4e55">P93-1001</url>
@@ -23,7 +23,7 @@
     </paper>
     <paper id="2">
       <title>Aligning Sentences in Bilingual Corpora Using Lexical Information</title>
-      <author><first>Stanley F.</first><last>Chen</last></author>
+      <author id="stanley-f-chen"><first>Stanley F.</first><last>Chen</last></author>
       <doi>10.3115/981574.981576</doi>
       <pages>9–16</pages>
       <url hash="9cfa51b7">P93-1002</url>
@@ -39,7 +39,7 @@
     </paper>
     <paper id="4">
       <title>Structural Matching of Parallel Texts</title>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <author><first>Takehito</first><last>Utsuro</last></author>
       <author><first>Hiroyuki</first><last>Ishimoto</last></author>
       <doi>10.3115/981574.981578</doi>
@@ -49,12 +49,12 @@
     </paper>
     <paper id="5">
       <title>Towards History-based Grammars: Using Richer Models for Probabilistic Parsing</title>
-      <author><first>Ezra</first><last>Black</last></author>
-      <author><first>Fred</first><last>Jelinek</last></author>
-      <author><first>John</first><last>Lafrerty</last></author>
-      <author><first>David M.</first><last>Magerman</last></author>
+      <author id="ezra-black"><first>Ezra</first><last>Black</last></author>
+      <author id="frederick-jelinek"><first>Fred</first><last>Jelinek</last></author>
+      <author id="john-lafferty"><first>John</first><last>Lafrerty</last></author>
+      <author id="david-m-magerman"><first>David M.</first><last>Magerman</last></author>
       <author id="robert-l-mercer"><first>Robert</first><last>Mercer</last></author>
-      <author><first>Salim</first><last>Roukos</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
       <doi>10.3115/981574.981579</doi>
       <pages>31–37</pages>
       <url hash="85b63614">P93-1005</url>
@@ -63,7 +63,7 @@
     <paper id="6">
       <title>Using Bracketed Parses to Evaluate a Grammar Checking Application</title>
       <author><first>Richard H.</first><last>Wojcik</last></author>
-      <author><first>Philip</first><last>Harrison</last></author>
+      <author id="phil-harrison"><first>Philip</first><last>Harrison</last></author>
       <author><first>John</first><last>Bremer</last></author>
       <doi>10.3115/981574.981580</doi>
       <pages>38–45</pages>
@@ -72,8 +72,8 @@
     </paper>
     <paper id="7">
       <title>A Speech-First Model for Repair Detection and Correction</title>
-      <author><first>Christine</first><last>Nakatani</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
+      <author id="christine-h-nakatani"><first>Christine</first><last>Nakatani</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
       <doi>10.3115/981574.981581</doi>
       <pages>46–53</pages>
       <url hash="ebcf0fac">P93-1007</url>
@@ -81,13 +81,13 @@
     </paper>
     <paper id="8">
       <title><fixed-case>GEMINI</fixed-case>: A Natural Language System for Spoken-Language Understanding</title>
-      <author><first>John</first><last>Dowding</last></author>
-      <author><first>Jean Mark</first><last>Gawron</last></author>
-      <author><first>Doug</first><last>Appelt</last></author>
-      <author><first>John</first><last>Bear</last></author>
+      <author id="john-dowding"><first>John</first><last>Dowding</last></author>
+      <author id="jean-mark-gawron"><first>Jean Mark</first><last>Gawron</last></author>
+      <author id="douglas-appelt"><first>Doug</first><last>Appelt</last></author>
+      <author id="john-bear"><first>John</first><last>Bear</last></author>
       <author><first>Lynn</first><last>Cherny</last></author>
-      <author><first>Robert</first><last>Moore</last></author>
-      <author><first>Douglas</first><last>Moran</last></author>
+      <author id="robert-c-moore"><first>Robert</first><last>Moore</last></author>
+      <author id="douglas-b-moran"><first>Douglas</first><last>Moran</last></author>
       <doi>10.3115/981574.981582</doi>
       <pages>54–61</pages>
       <url hash="959e3131">P93-1008</url>
@@ -95,7 +95,7 @@
     </paper>
     <paper id="9">
       <title>The Effect of Establishing Coherence in Ellipsis and Anaphora Resolution</title>
-      <author><first>Andrew</first><last>Kehler</last></author>
+      <author id="andrew-kehler"><first>Andrew</first><last>Kehler</last></author>
       <doi>10.3115/981574.981583</doi>
       <pages>62–69</pages>
       <url hash="f7e3ad6f">P93-1009</url>
@@ -104,8 +104,8 @@
     <paper id="10">
       <title>Temporal Centering</title>
       <author><first>Megumi</first><last>Kameyama</last></author>
-      <author><first>Rebecca</first><last>Passonneau</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca</first><last>Passonneau</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <doi>10.3115/981574.981584</doi>
       <pages>70–77</pages>
       <url hash="5d6f3c8a">P93-1010</url>
@@ -113,7 +113,7 @@
     </paper>
     <paper id="11">
       <title>Assigning a Semantic Scope to Operators</title>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <doi>10.3115/981574.981585</doi>
       <pages>78–86</pages>
       <url hash="fbb9fb4c">P93-1011</url>
@@ -121,7 +121,7 @@
     </paper>
     <paper id="12">
       <title>Two Kinds of Metonymy</title>
-      <author><first>David</first><last>Stallard</last></author>
+      <author id="david-stallard"><first>David</first><last>Stallard</last></author>
       <doi>10.3115/981574.981586</doi>
       <pages>87–94</pages>
       <url hash="d278845a">P93-1012</url>
@@ -129,7 +129,7 @@
     </paper>
     <paper id="13">
       <title>Planning Multimodal Discourse</title>
-      <author><first>Wolfgang</first><last>Wahlster</last></author>
+      <author id="wolfgang-wahlster"><first>Wolfgang</first><last>Wahlster</last></author>
       <doi>10.3115/981574.981587</doi>
       <pages>95–96</pages>
       <url hash="f77faa95">P93-1013</url>
@@ -137,9 +137,9 @@
     </paper>
     <paper id="14">
       <title>A Unification-Based Parser for Relational Grammar</title>
-      <author><first>David E.</first><last>Johnson</last></author>
-      <author><first>Adam</first><last>Meyers</last></author>
-      <author><first>Lawrence S.</first><last>Moss</last></author>
+      <author id="david-e-johnson"><first>David E.</first><last>Johnson</last></author>
+      <author id="adam-meyers"><first>Adam</first><last>Meyers</last></author>
+      <author id="lawrence-s-moss"><first>Lawrence S.</first><last>Moss</last></author>
       <doi>10.3115/981574.981588</doi>
       <pages>97–104</pages>
       <url hash="1efc9c3c">P93-1014</url>
@@ -147,7 +147,7 @@
     </paper>
     <paper id="15">
       <title>Parsing Free Word Order Languages in the <fixed-case>P</fixed-case>aninian Framework</title>
-      <author><first>Akshar</first><last>Bharati</last></author>
+      <author id="akshar-bharati"><first>Akshar</first><last>Bharati</last></author>
       <author><first>Rajeev</first><last>Sangal</last></author>
       <doi>10.3115/981574.981589</doi>
       <pages>105–111</pages>
@@ -185,7 +185,7 @@
     </paper>
     <paper id="19">
       <title>Feature-Based Allomorphy</title>
-      <author><first>Hans-Ulrich</first><last>Krieger</last></author>
+      <author id="hans-ulrich-krieger"><first>Hans-Ulrich</first><last>Krieger</last></author>
       <author><first>Hannes</first><last>Pirker</last></author>
       <doi>10.3115/981574.981593</doi>
       <pages>140–147</pages>
@@ -194,8 +194,8 @@
     </paper>
     <paper id="20">
       <title>Intention-Based Segmentation: Human Reliability and Correlation With Linguistic Cues</title>
-      <author><first>Rebecca J.</first><last>Passonneau</last></author>
-      <author><first>Diane J.</first><last>Litman</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca J.</first><last>Passonneau</last></author>
+      <author id="diane-litman"><first>Diane J.</first><last>Litman</last></author>
       <doi>10.3115/981574.981594</doi>
       <pages>148–155</pages>
       <url hash="8de448ae">P93-1020</url>
@@ -204,7 +204,7 @@
     <paper id="21">
       <title>A Language-Independent Anaphora Resolution System for Understanding Multilingual Texts</title>
       <author><first>Chinatsu</first><last>Aone</last></author>
-      <author><first>Douglas</first><last>McKee</last></author>
+      <author id="douglas-mckee"><first>Douglas</first><last>McKee</last></author>
       <doi>10.3115/981574.981595</doi>
       <pages>156–163</pages>
       <url hash="e8856e93">P93-1021</url>
@@ -223,7 +223,7 @@
     <paper id="23">
       <title>Towards the Automatic Identification of Adjectival Scales: Clustering Adjectives According to Meaning</title>
       <author><first>Vasileios</first><last>Hatzivassiloglou</last></author>
-      <author><first>Kathleen R.</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen R.</first><last>McKeown</last></author>
       <doi>10.3115/981574.981597</doi>
       <pages>172–182</pages>
       <url hash="08f2e322">P93-1023</url>
@@ -283,7 +283,7 @@
     </paper>
     <paper id="30">
       <title>Quantificational Domains and Recursive Contexts</title>
-      <author><first>Barbara</first><last>Partee</last></author>
+      <author id="barbara-h-partee"><first>Barbara</first><last>Partee</last></author>
       <doi>10.3115/981574.981604</doi>
       <pages>224–225</pages>
       <url hash="c45239da">P93-1030</url>
@@ -291,7 +291,7 @@
     </paper>
     <paper id="31">
       <title>Tailoring Lexical Choice to the User’s Vocabulary in Multimedia Explanation Generation</title>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <author><first>Jacques</first><last>Robin</last></author>
       <author><first>Michael</first><last>Tanenblatt</last></author>
       <doi>10.3115/981574.981605</doi>
@@ -301,7 +301,7 @@
     </paper>
     <paper id="32">
       <title>Automatic Acquisition of a Large Sub Categorization Dictionary From Corpora</title>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <doi>10.3115/981574.981606</doi>
       <pages>235–242</pages>
       <url hash="91aeef51">P93-1032</url>
@@ -310,7 +310,7 @@
     <paper id="33">
       <title>An Empirical Study on Thematic Knowledge Acquisition Based on Syntactic Clues and Heuristics</title>
       <author><first>Rey-Long</first><last>Liu</last></author>
-      <author><first>Von-wun</first><last>Soo</last></author>
+      <author id="von-wun-soo"><first>Von-wun</first><last>Soo</last></author>
       <doi>10.3115/981574.981607</doi>
       <pages>243–250</pages>
       <url hash="f8005a80">P93-1033</url>
@@ -318,7 +318,7 @@
     </paper>
     <paper id="34">
       <title>Part-of-Speech Induction From Scratch</title>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <doi>10.3115/981574.981608</doi>
       <pages>251–258</pages>
       <url hash="62f9b3fa">P93-1034</url>
@@ -360,7 +360,7 @@
     </paper>
     <paper id="39">
       <title>Responding to User Queries in a Collaborative Environment</title>
-      <author><first>Jennifer</first><last>Chu</last></author>
+      <author id="jennifer-chu-carroll"><first>Jennifer</first><last>Chu</last></author>
       <doi>10.3115/981574.981614</doi>
       <pages>280–282</pages>
       <url hash="56d3c489">P93-1039</url>
@@ -368,7 +368,7 @@
     </paper>
     <paper id="40">
       <title>The Imperfective Paradox and Trajectory-of-Motion Events</title>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <doi>10.3115/981574.981615</doi>
       <pages>283–285</pages>
       <url hash="27c5ffae">P93-1040</url>
@@ -416,7 +416,7 @@
     </paper>
     <paper id="46">
       <title>Integrating Word Boundary Identification With Sentence Understanding</title>
-      <author><first>Kok Wee</first><last>Gan</last></author>
+      <author id="kok-wee-gan"><first>Kok Wee</first><last>Gan</last></author>
       <doi>10.3115/981574.981621</doi>
       <pages>301–303</pages>
       <url hash="a7b04146">P93-1046</url>
diff --git a/data/xml/P94.xml b/data/xml/P94.xml
index de23e0b009..2c6a758090 100644
--- a/data/xml/P94.xml
+++ b/data/xml/P94.xml
@@ -15,8 +15,8 @@
     </frontmatter>
     <paper id="1">
       <title>Discourse Obligations in Dialogue Processing</title>
-      <author><first>David R.</first><last>Traum</last></author>
-      <author><first>James F.</first><last>Allen</last></author>
+      <author id="david-traum"><first>David R.</first><last>Traum</last></author>
+      <author id="james-allen"><first>James F.</first><last>Allen</last></author>
       <doi>10.3115/981732.981733</doi>
       <pages>1–8</pages>
       <url hash="71eeb539">P94-1001</url>
@@ -24,7 +24,7 @@
     </paper>
     <paper id="2">
       <title>Multi-Paragraph Segmentation Expository Text</title>
-      <author><first>Marti A.</first><last>Hearst</last></author>
+      <author id="marti-a-hearst"><first>Marti A.</first><last>Hearst</last></author>
       <doi>10.3115/981732.981734</doi>
       <pages>9–16</pages>
       <url hash="c257b7d1">P94-1002</url>
@@ -44,9 +44,9 @@
     <paper id="4">
       <title>Hidden Understanding Models of Natural Language</title>
       <author><first>Scott</first><last>Miller</last></author>
-      <author><first>Robert</first><last>Bobrow</last></author>
-      <author><first>Robert</first><last>Ingria</last></author>
-      <author><first>Richard</first><last>Schwartz</last></author>
+      <author id="robert-bobrow"><first>Robert</first><last>Bobrow</last></author>
+      <author id="robert-ingria"><first>Robert</first><last>Ingria</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
       <doi>10.3115/981732.981736</doi>
       <pages>25–32</pages>
       <url hash="6e4928e8">P94-1004</url>
@@ -54,7 +54,7 @@
     </paper>
     <paper id="5">
       <title>From Strings to Trees to Strings to Trees ... (Abstract)</title>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
       <doi>10.3115/981732.981737</doi>
       <pages>33–33</pages>
       <url hash="e758903e">P94-1005</url>
@@ -62,7 +62,7 @@
     </paper>
     <paper id="6">
       <title>Intentions and Information in Discourse</title>
-      <author><first>Nicholas</first><last>Asher</last></author>
+      <author id="nicholas-asher"><first>Nicholas</first><last>Asher</last></author>
       <author><first>Alex</first><last>Lascarides</last></author>
       <doi>10.3115/981732.981738</doi>
       <pages>34–41</pages>
@@ -79,7 +79,7 @@
     </paper>
     <paper id="8">
       <title>Common Topics and Coherent Situations: Interpreting Ellipsis in the Context of Discourse Inference</title>
-      <author><first>Andrew</first><last>Kehler</last></author>
+      <author id="andrew-kehler"><first>Andrew</first><last>Kehler</last></author>
       <doi>10.3115/981732.981740</doi>
       <pages>50–57</pages>
       <url hash="c81f276a">P94-1008</url>
@@ -87,8 +87,8 @@
     </paper>
     <paper id="9">
       <title>A Hybrid Reasoning Model for Indirect Answers</title>
-      <author><first>Nancy</first><last>Green</last></author>
-      <author><first>Sandra</first><last>Carberry</last></author>
+      <author id="nancy-green"><first>Nancy</first><last>Green</last></author>
+      <author id="sandra-carberry"><first>Sandra</first><last>Carberry</last></author>
       <doi>10.3115/981732.981741</doi>
       <pages>58–65</pages>
       <url hash="9d883a32">P94-1009</url>
@@ -96,9 +96,9 @@
     </paper>
     <paper id="10">
       <title>A Stochastic Finite-State Word-Segmentation Algorithm for <fixed-case>C</fixed-case>hinese</title>
-      <author><first>Richard</first><last>Sproat</last></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last></author>
       <author><first>Chilin</first><last>Shih</last></author>
-      <author><first>William</first><last>Gale</last></author>
+      <author id="william-a-gale"><first>William</first><last>Gale</last></author>
       <author><first>Nancy</first><last>Chang</last></author>
       <doi>10.3115/981732.981742</doi>
       <pages>66–73</pages>
@@ -107,7 +107,7 @@
     </paper>
     <paper id="11">
       <title>Precise N-Gram Probabilities From Stochastic Context-Free Grammars</title>
-      <author><first>Andreas</first><last>Stolcke</last></author>
+      <author id="andreas-stolcke"><first>Andreas</first><last>Stolcke</last></author>
       <author><first>Jonathan</first><last>Segal</last></author>
       <doi>10.3115/981732.981743</doi>
       <pages>74–79</pages>
@@ -150,10 +150,10 @@
     </paper>
     <paper id="16">
       <title>Interleaving Syntax and Semantics in an Efficient Bottom-Up Parser</title>
-      <author><first>John</first><last>Dowding</last></author>
-      <author><first>Robert</first><last>Moore</last></author>
+      <author id="john-dowding"><first>John</first><last>Dowding</last></author>
+      <author id="robert-c-moore"><first>Robert</first><last>Moore</last></author>
       <author><first>Francois</first><last>Andry</last></author>
-      <author><first>Douglas</first><last>Moran</last></author>
+      <author id="douglas-b-moran"><first>Douglas</first><last>Moran</last></author>
       <doi>10.3115/981732.981748</doi>
       <pages>110–116</pages>
       <url hash="b06e77e7">P94-1016</url>
@@ -178,7 +178,7 @@
     <paper id="19">
       <title>Verb Semantics and Lexical Selection</title>
       <author><first>Zhibiao</first><last>Wu</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <doi>10.3115/981732.981751</doi>
       <pages>133–138</pages>
       <url hash="8836c9c7">P94-1019</url>
@@ -186,8 +186,8 @@
     </paper>
     <paper id="20">
       <title>Word-Sense Disambiguation Using Decomposable Models</title>
-      <author><first>Rebecca</first><last>Bruce</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="rebecca-bruce"><first>Rebecca</first><last>Bruce</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <doi>10.3115/981732.981752</doi>
       <pages>139–146</pages>
       <url hash="278d899e">P94-1020</url>
@@ -196,7 +196,7 @@
     <paper id="21">
       <title>Constraint-Based Categorial Grammar</title>
       <author><first>Gosse</first><last>Bouma</last></author>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <doi>10.3115/981732.981753</doi>
       <pages>147–154</pages>
       <url hash="05fd3488">P94-1021</url>
@@ -212,7 +212,7 @@
     </paper>
     <paper id="23">
       <title>On Determining the Consistency of Partial Descriptions of Trees</title>
-      <author><first>Thomas L.</first><last>Cornell</last></author>
+      <author id="thomas-l-cornell"><first>Thomas L.</first><last>Cornell</last></author>
       <doi>10.3115/981732.981755</doi>
       <pages>163–170</pages>
       <url hash="5597a968">P94-1023</url>
@@ -221,7 +221,7 @@
     <paper id="24">
       <title>A <fixed-case>M</fixed-case>arkov Language Learning Model for Finite Parameter Spaces</title>
       <author><first>Partha</first><last>Niyogi</last></author>
-      <author><first>Robert C.</first><last>Berwick</last></author>
+      <author id="robert-c-berwick"><first>Robert C.</first><last>Berwick</last></author>
       <doi>10.3115/981732.981756</doi>
       <pages>171–180</pages>
       <url hash="a425339f">P94-1024</url>
@@ -229,7 +229,7 @@
     </paper>
     <paper id="25">
       <title>Part-of-Speech Tagging Using a Variable Memory <fixed-case>M</fixed-case>arkov Model</title>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <author><first>Yoram</first><last>Singer</last></author>
       <doi>10.3115/981732.981757</doi>
       <pages>181–187</pages>
@@ -287,7 +287,7 @@
     </paper>
     <paper id="32">
       <title>Extracting Noun Phrases from Large-Scale Texts: A Hybrid Approach and Its Automatic Evaluation</title>
-      <author><first>Kuang-hua</first><last>Chen</last></author>
+      <author id="kuang-hua-chen"><first>Kuang-hua</first><last>Chen</last></author>
       <author><first>Hsin-Hsi</first><last>Chen</last></author>
       <doi>10.3115/981732.981764</doi>
       <pages>234–241</pages>
@@ -324,7 +324,7 @@
     </paper>
     <paper id="36">
       <title>Multiset-Valued Linear Index Grammars: Imposing Dominance Constraints on Derivations</title>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <doi>10.3115/981732.981768</doi>
       <pages>263–270</pages>
       <url hash="601a9f9f">P94-1036</url>
@@ -359,7 +359,7 @@
     </paper>
     <paper id="40">
       <title>Relating Complexity to Practical Performance in Parsing With Wide-Coverage Unification Grammars</title>
-      <author><first>John</first><last>Carroll</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
       <doi>10.3115/981732.981772</doi>
       <pages>287–294</pages>
       <url hash="86deec73">P94-1040</url>
@@ -367,8 +367,8 @@
     </paper>
     <paper id="41">
       <title>Detecting and Correcting Speech Repairs</title>
-      <author><first>Peter</first><last>Heeman</last></author>
-      <author><first>James</first><last>Allen</last></author>
+      <author id="peter-a-heeman"><first>Peter</first><last>Heeman</last></author>
+      <author id="james-allen"><first>James</first><last>Allen</last></author>
       <doi>10.3115/981732.981773</doi>
       <pages>295–302</pages>
       <url hash="48bad4a6">P94-1041</url>
@@ -377,7 +377,7 @@
     <paper id="42">
       <title>A Computational View of the Cognitive Semantics of Spatial Prepositions</title>
       <author><first>Patrick</first><last>Olivier</last></author>
-      <author><first>Jun-ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun-ichi</first><last>Tsujii</last></author>
       <doi>10.3115/981732.981774</doi>
       <pages>303–309</pages>
       <url hash="90c05bb6">P94-1042</url>
@@ -385,7 +385,7 @@
     </paper>
     <paper id="43">
       <title>Reaping the Benefits of Interactive Syntax and Semantics</title>
-      <author><first>Kavi</first><last>Mahesh</last></author>
+      <author id="kavi-mahesh"><first>Kavi</first><last>Mahesh</last></author>
       <doi>10.3115/981732.981776</doi>
       <pages>310–312</pages>
       <url hash="df34c6ed">P94-1043</url>
@@ -401,7 +401,7 @@
     </paper>
     <paper id="45">
       <title>An Integrated Heuristic Scheme for Partial Parse Evaluation</title>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <doi>10.3115/981732.981778</doi>
       <pages>316–318</pages>
       <url hash="52a70839">P94-1045</url>
@@ -409,7 +409,7 @@
     </paper>
     <paper id="46">
       <title>Temporal Relations: Reference or Discourse Coherence?</title>
-      <author><first>Andrew</first><last>Kehler</last></author>
+      <author id="andrew-kehler"><first>Andrew</first><last>Kehler</last></author>
       <doi>10.3115/981732.981779</doi>
       <pages>319–321</pages>
       <url hash="85863f29">P94-1046</url>
@@ -449,9 +449,9 @@
     </paper>
     <paper id="51">
       <title>Automatic Alignment in Parallel Corpora</title>
-      <author><first>Harris</first><last>Papageorgiou</last></author>
-      <author><first>Lambros</first><last>Cranias</last></author>
-      <author><first>Stelios</first><last>Piperidis</last></author>
+      <author id="harris-papageorgiou"><first>Harris</first><last>Papageorgiou</last></author>
+      <author id="lambros-cranias"><first>Lambros</first><last>Cranias</last></author>
+      <author id="stelios-piperidis"><first>Stelios</first><last>Piperidis</last></author>
       <doi>10.3115/981732.981784</doi>
       <pages>334–336</pages>
       <url hash="1e1c3938">P94-1051</url>
diff --git a/data/xml/P95.xml b/data/xml/P95.xml
index 7c81015203..cad76d2aff 100644
--- a/data/xml/P95.xml
+++ b/data/xml/P95.xml
@@ -16,8 +16,8 @@
     <paper id="1">
       <title>Learning Phonological Rule Probabilities from Speech Corpora with Exploratory Computational Phonology</title>
       <author><first>Gary</first><last>Tajchman</last></author>
-      <author><first>Daniel</first><last>Jurafsky</last></author>
-      <author><first>Eric</first><last>Fosler</last></author>
+      <author id="dan-jurafsky"><first>Daniel</first><last>Jurafsky</last></author>
+      <author id="eric-fosler-lussier"><first>Eric</first><last>Fosler</last></author>
       <doi>10.3115/981658.981659</doi>
       <pages>1–8</pages>
       <url hash="760fe32a">P95-1001</url>
@@ -26,7 +26,7 @@
     <paper id="2">
       <title>Automatic Induction of Finite State Transducers for Simple Phonological Rules</title>
       <author><first>Daniel</first><last>Gildea</last></author>
-      <author><first>Daniel</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Daniel</first><last>Jurafsky</last></author>
       <doi>10.3115/981658.981660</doi>
       <pages>9–15</pages>
       <url hash="1d5cc247">P95-1002</url>
@@ -43,7 +43,7 @@
     <paper id="4">
       <title>A Morphographemic Model for Error Correction in Nonconcatenative Strings</title>
       <author><first>Tanya</first><last>Bowden</last></author>
-      <author><first>George Anton</first><last>Kiraz</last></author>
+      <author id="george-anton-kiraz"><first>George Anton</first><last>Kiraz</last></author>
       <doi>10.3115/981658.981662</doi>
       <pages>24–30</pages>
       <url hash="03b01ddf">P95-1004</url>
@@ -51,10 +51,10 @@
     </paper>
     <paper id="5">
       <title>Discourse Processing of Dialogues with Multiple Threads</title>
-      <author><first>Carolyn Penstein</first><last>Rosé</last></author>
+      <author id="carolyn-rose"><first>Carolyn Penstein</first><last>Rosé</last></author>
       <author><first>Barbara</first><last>Di Eugenio</last></author>
-      <author><first>Lori S.</first><last>Levin</last></author>
-      <author><first>Carol</first><last>Van Ess-Dykema</last></author>
+      <author id="lori-levin"><first>Lori S.</first><last>Levin</last></author>
+      <author id="carol-van-ess-dykema"><first>Carol</first><last>Van Ess-Dykema</last></author>
       <doi>10.3115/981658.981663</doi>
       <pages>31–38</pages>
       <url hash="8e907d23">P95-1005</url>
@@ -86,7 +86,7 @@
     </paper>
     <paper id="9">
       <title>User-Defined Nonmonotonicity in Unification-Based Formalisms</title>
-      <author><first>Lena</first><last>Stromback</last></author>
+      <author id="lena-stromback"><first>Lena</first><last>Stromback</last></author>
       <doi>10.3115/981658.981667</doi>
       <pages>63–69</pages>
       <url hash="f09899cf">P95-1009</url>
@@ -94,7 +94,7 @@
     </paper>
     <paper id="10">
       <title>Features and Agreement</title>
-      <author><first>Sam</first><last>Bayer</last></author>
+      <author id="samuel-bayer"><first>Sam</first><last>Bayer</last></author>
       <author><first>Mark</first><last>Johnson</last></author>
       <doi>10.3115/981658.981668</doi>
       <pages>70–76</pages>
@@ -103,9 +103,9 @@
     </paper>
     <paper id="11">
       <title>Encoding <fixed-case>L</fixed-case>exicalized <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars with a Nonmonotonic Inheritance Hierarchy</title>
-      <author><first>Roger</first><last>Evans</last></author>
+      <author id="roger-evans"><first>Roger</first><last>Evans</last></author>
       <author><first>Gerald</first><last>Gazdar</last></author>
-      <author><first>David</first><last>Weir</last></author>
+      <author id="david-weir"><first>David</first><last>Weir</last></author>
       <doi>10.3115/981658.981669</doi>
       <pages>77–84</pages>
       <url hash="deb62383">P95-1011</url>
@@ -113,8 +113,8 @@
     </paper>
     <paper id="12">
       <title>Compiling <fixed-case>HPSG</fixed-case> type constraints into definite clause programs</title>
-      <author><first>Thilo</first><last>Gotz</last></author>
-      <author><first>Walt Detmar</first><last>Meurers</last></author>
+      <author id="thilo-gotz"><first>Thilo</first><last>Gotz</last></author>
+      <author id="detmar-meurers"><first>Walt Detmar</first><last>Meurers</last></author>
       <doi>10.3115/981658.981670</doi>
       <pages>85–91</pages>
       <url hash="136f9195">P95-1012</url>
@@ -122,10 +122,10 @@
     </paper>
     <paper id="13">
       <title>Compilation of <fixed-case>HPSG</fixed-case> to <fixed-case>TAG</fixed-case></title>
-      <author><first>Robert</first><last>Kasper</last></author>
-      <author><first>Bernd</first><last>Kiefer</last></author>
+      <author id="robert-t-kasper"><first>Robert</first><last>Kasper</last></author>
+      <author id="bernd-kiefer"><first>Bernd</first><last>Kiefer</last></author>
       <author><first>Klaus</first><last>Netter</last></author>
-      <author><first>K.</first><last>Vijay-Shanker</last></author>
+      <author id="k-vijay-shanker"><first>K.</first><last>Vijay-Shanker</last></author>
       <doi>10.3115/981658.981671</doi>
       <pages>92–99</pages>
       <url hash="23260eb1">P95-1013</url>
@@ -134,7 +134,7 @@
     <paper id="14">
       <title>Memoization of Coroutined Constraints</title>
       <author><first>Mark</first><last>Johnson</last></author>
-      <author><first>Jochen</first><last>Dorre</last></author>
+      <author id="jochen-dorre"><first>Jochen</first><last>Dorre</last></author>
       <doi>10.3115/981658.981672</doi>
       <pages>100–107</pages>
       <url hash="cd1dfb86">P95-1014</url>
@@ -142,8 +142,8 @@
     </paper>
     <paper id="15">
       <title>Combining Multiple Knowledge Sources for Discourse Segmentation</title>
-      <author><first>Diane J.</first><last>Litman</last></author>
-      <author><first>Rebecca J.</first><last>Passonneau</last></author>
+      <author id="diane-litman"><first>Diane J.</first><last>Litman</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca J.</first><last>Passonneau</last></author>
       <doi>10.3115/981658.981673</doi>
       <pages>108–115</pages>
       <url hash="72085374">P95-1015</url>
@@ -152,7 +152,7 @@
     <paper id="16">
       <title>Utilizing Statistical Dialogue Act Processing in Verbrnobil</title>
       <author><first>Norbert</first><last>Reithinger</last></author>
-      <author><first>Elisabeth</first><last>Maier</last></author>
+      <author id="elisabeth-maier"><first>Elisabeth</first><last>Maier</last></author>
       <doi>10.3115/981658.981674</doi>
       <pages>116–121</pages>
       <url hash="655987eb">P95-1016</url>
@@ -170,7 +170,7 @@
     <paper id="18">
       <title>Investigating Cue Selection and Placement in Tutorial Discourse</title>
       <author><first>Megan</first><last>Moser</last></author>
-      <author><first>Johanna D.</first><last>Moore</last></author>
+      <author id="johanna-d-moore"><first>Johanna D.</first><last>Moore</last></author>
       <doi>10.3115/981658.981676</doi>
       <pages>130–135</pages>
       <url hash="52c7e2c2">P95-1018</url>
@@ -178,8 +178,8 @@
     </paper>
     <paper id="19">
       <title>Response Generation in Collaborative Negotiation</title>
-      <author><first>Jennifer</first><last>Chu-Carroll</last></author>
-      <author><first>Sandra</first><last>Carberry</last></author>
+      <author id="jennifer-chu-carroll"><first>Jennifer</first><last>Chu-Carroll</last></author>
+      <author id="sandra-carberry"><first>Sandra</first><last>Carberry</last></author>
       <doi>10.3115/981658.981677</doi>
       <pages>136–143</pages>
       <url hash="a3bd80a6">P95-1019</url>
@@ -196,9 +196,9 @@
     </paper>
     <paper id="21">
       <title><fixed-case>D</fixed-case>-Tree Grammars</title>
-      <author><first>Owen</first><last>Rambow</last></author>
-      <author><first>K.</first><last>Vijay-Shanker</last></author>
-      <author><first>David</first><last>Weir</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
+      <author id="k-vijay-shanker"><first>K.</first><last>Vijay-Shanker</last></author>
+      <author id="david-weir"><first>David</first><last>Weir</last></author>
       <doi>10.3115/981658.981679</doi>
       <pages>151–158</pages>
       <url hash="fd633c53">P95-1021</url>
@@ -206,7 +206,7 @@
     </paper>
     <paper id="22">
       <title>The intersection of Finite State Automata and Definite Clause Grammars</title>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <doi>10.3115/981658.981680</doi>
       <pages>159–165</pages>
       <url hash="675f0dab">P95-1022</url>
@@ -224,7 +224,7 @@
     <paper id="24">
       <title>Extraposition via Complex Domain Formation</title>
       <author><first>Andreas</first><last>Kathol</last></author>
-      <author><first>Carl</first><last>Pollard</last></author>
+      <author id="carl-pollard"><first>Carl</first><last>Pollard</last></author>
       <doi>10.3115/981658.981682</doi>
       <pages>174–180</pages>
       <url hash="8358437c">P95-1024</url>
@@ -250,7 +250,7 @@
     <paper id="27">
       <title>A Quantitative Evaluation of Linguistic Tests for the Automatic Prediction of Semantic Markedness</title>
       <author><first>Vasileios</first><last>Hatzivassiloglou</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <doi>10.3115/981658.981685</doi>
       <pages>197–204</pages>
       <url hash="93d4e023">P95-1027</url>
@@ -258,7 +258,7 @@
     </paper>
     <paper id="28">
       <title>Quantifier Scope and Constituency</title>
-      <author><first>Jong C.</first><last>Park</last></author>
+      <author id="jong-c-park"><first>Jong C.</first><last>Park</last></author>
       <doi>10.3115/981658.981686</doi>
       <pages>205–212</pages>
       <url hash="f4fc6147">P95-1028</url>
@@ -274,7 +274,7 @@
     </paper>
     <paper id="30">
       <title>New Techniques for Context Modeling</title>
-      <author><first>Eric Sven</first><last>Ristad</last></author>
+      <author id="eric-sven-ristad"><first>Eric Sven</first><last>Ristad</last></author>
       <author><first>Robert G.</first><last>Thomas</last></author>
       <doi>10.3115/981658.981688</doi>
       <pages>220–227</pages>
@@ -283,7 +283,7 @@
     </paper>
     <paper id="31">
       <title><fixed-case>B</fixed-case>ayesian Grammar Induction for Language Modeling</title>
-      <author><first>Stanley F.</first><last>Chen</last></author>
+      <author id="stanley-f-chen"><first>Stanley F.</first><last>Chen</last></author>
       <doi>10.3115/981658.981689</doi>
       <pages>228–235</pages>
       <url hash="cc61cfe3">P95-1031</url>
@@ -318,7 +318,7 @@
       <title>An Efficient Generation Algorithm for Lexicalist <fixed-case>MT</fixed-case></title>
       <author><first>Victor</first><last>Poznanski</last></author>
       <author><first>John L.</first><last>Beaven</last></author>
-      <author><first>Pete</first><last>Whitelock</last></author>
+      <author id="pete-whitelock"><first>Pete</first><last>Whitelock</last></author>
       <doi>10.3115/981658.981693</doi>
       <pages>261–267</pages>
       <url hash="a111ddfb">P95-1035</url>
@@ -326,8 +326,8 @@
     </paper>
     <paper id="36">
       <title>Some Novel Applications of Explanation-Based Learning to Parsing <fixed-case>L</fixed-case>exicalized <fixed-case>T</fixed-case>ree-<fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars</title>
-      <author><first>B.</first><last>Srinivas</last></author>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="srinivas-bangalore"><first>B.</first><last>Srinivas</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
       <doi>10.3115/981658.981694</doi>
       <pages>268–275</pages>
       <url hash="3b0e16fe">P95-1036</url>
@@ -335,7 +335,7 @@
     </paper>
     <paper id="37">
       <title>Statistical Decision-Tree Models for Parsing</title>
-      <author><first>David M.</first><last>Magerman</last></author>
+      <author id="david-m-magerman"><first>David M.</first><last>Magerman</last></author>
       <doi>10.3115/981658.981695</doi>
       <pages>276–283</pages>
       <url hash="489d337b">P95-1037</url>
@@ -407,7 +407,7 @@
     </paper>
     <paper id="46">
       <title>Knowledge-based Automatic Topic Identification</title>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <doi>10.3115/981658.981705</doi>
       <pages>308–310</pages>
       <url hash="c3c46975">P95-1046</url>
@@ -415,7 +415,7 @@
     </paper>
     <paper id="47">
       <title>Acquiring a Lexicon from Unsegmented Speech</title>
-      <author><first>Carl</first><last>de Marcken</last></author>
+      <author id="carl-de-marcken"><first>Carl</first><last>de Marcken</last></author>
       <doi>10.3115/981658.981706</doi>
       <pages>311–313</pages>
       <url hash="d8aae875">P95-1047</url>
@@ -432,7 +432,7 @@
     </paper>
     <paper id="49">
       <title>Mapping Scrambled <fixed-case>K</fixed-case>orean Sentences into <fixed-case>E</fixed-case>nglish Using Synchronous <fixed-case>TAG</fixed-case>s</title>
-      <author><first>Hyun S.</first><last>Park</last></author>
+      <author id="hyun-seok-park"><first>Hyun S.</first><last>Park</last></author>
       <doi>10.3115/981658.981708</doi>
       <pages>317–319</pages>
       <url hash="6156b7d5">P95-1049</url>
diff --git a/data/xml/P96.xml b/data/xml/P96.xml
index a145fe2373..c504f9e6c6 100644
--- a/data/xml/P96.xml
+++ b/data/xml/P96.xml
@@ -32,8 +32,8 @@
     </paper>
     <paper id="3">
       <title>Noun Phrase Analysis in Large Unrestricted Text for Information Retrieval</title>
-      <author><first>David A.</first><last>Evans</last></author>
-      <author><first>Chengxiang</first><last>Zhai</last></author>
+      <author id="david-a-evans"><first>David A.</first><last>Evans</last></author>
+      <author id="chengxiang-zhai"><first>Chengxiang</first><last>Zhai</last></author>
       <doi>10.3115/981863.981866</doi>
       <pages>17–24</pages>
       <url hash="ae6c1ccc">P96-1003</url>
@@ -50,9 +50,9 @@
     <paper id="5">
       <title>From Submit to Submitted via Submission: On Lexical Rules in Large-Scale Lexicon Acquisition</title>
       <author><first>Evelyne</first><last>Viegas</last></author>
-      <author><first>Boyan</first><last>Onyshkevych</last></author>
+      <author id="boyan-onyshkevych"><first>Boyan</first><last>Onyshkevych</last></author>
       <author><first>Victor</first><last>Raskin</last></author>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <doi>10.3115/981863.981868</doi>
       <pages>32–39</pages>
       <url hash="d1ae9b4b">P96-1005</url>
@@ -78,9 +78,9 @@
     <paper id="8">
       <title>A Fully Statistical Approach to Natural Language Interfaces</title>
       <author><first>Scott</first><last>Miller</last></author>
-      <author><first>David</first><last>Stallard</last></author>
-      <author><first>Robert</first><last>Bobrow</last></author>
-      <author><first>Richard</first><last>Schwartz</last></author>
+      <author id="david-stallard"><first>David</first><last>Stallard</last></author>
+      <author id="robert-bobrow"><first>Robert</first><last>Bobrow</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
       <doi>10.3115/981863.981871</doi>
       <pages>55–61</pages>
       <url hash="db448bc7">P96-1008</url>
@@ -88,9 +88,9 @@
     </paper>
     <paper id="9">
       <title>A Robust System for Natural Spoken Dialogue</title>
-      <author><first>James F.</first><last>Allen</last></author>
+      <author id="james-allen"><first>James F.</first><last>Allen</last></author>
       <author><first>Bradford W.</first><last>Miller</last></author>
-      <author><first>Eric K.</first><last>Ringger</last></author>
+      <author id="eric-ringger"><first>Eric K.</first><last>Ringger</last></author>
       <author><first>Teresa</first><last>Sikorski</last></author>
       <doi>10.3115/981863.981872</doi>
       <pages>62–70</pages>
@@ -108,7 +108,7 @@
     </paper>
     <paper id="11">
       <title>Efficient Normal-Form Parsing for <fixed-case>C</fixed-case>ombinatory <fixed-case>C</fixed-case>ategorial <fixed-case>G</fixed-case>rammar</title>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <doi>10.3115/981863.981874</doi>
       <pages>79–86</pages>
       <url hash="103ec5fb">P96-1011</url>
@@ -124,7 +124,7 @@
     </paper>
     <paper id="13">
       <title>Parsing with Semidirectional <fixed-case>L</fixed-case>ambek Grammar is <fixed-case>NP</fixed-case>-Complete</title>
-      <author><first>Jochen</first><last>Dörre</last></author>
+      <author id="jochen-dorre"><first>Jochen</first><last>Dörre</last></author>
       <doi>10.3115/981863.981876</doi>
       <pages>95–100</pages>
       <url hash="7efcca68">P96-1013</url>
@@ -148,7 +148,7 @@
     </paper>
     <paper id="16">
       <title>Synchronous Models of Language</title>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <author><first>Giorgio</first><last>Satta</last></author>
       <doi>10.3115/981863.981879</doi>
       <pages>116–123</pages>
@@ -158,7 +158,7 @@
     <paper id="17">
       <title>Coordination as a Direct Process</title>
       <author><first>Augusta</first><last>Mela</last></author>
-      <author><first>Christophe</first><last>Fouqueré</last></author>
+      <author id="christophe-fouquere"><first>Christophe</first><last>Fouqueré</last></author>
       <doi>10.3115/981863.981880</doi>
       <pages>124–130</pages>
       <url hash="42c3da74">P96-1017</url>
@@ -175,8 +175,8 @@
     </paper>
     <paper id="19">
       <title>An Iterative Algorithm to Build <fixed-case>C</fixed-case>hinese Language Models</title>
-      <author><first>Xiaoqiang</first><last>Luo</last></author>
-      <author><first>Salim</first><last>Roukos</last></author>
+      <author id="xiaoqiang-luo"><first>Xiaoqiang</first><last>Luo</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
       <doi>10.3115/981863.981882</doi>
       <pages>139–143</pages>
       <url hash="99733d9b">P96-1019</url>
@@ -200,7 +200,7 @@
     </paper>
     <paper id="22">
       <title><fixed-case>SEMHE</fixed-case>: A Generalised Two-level System</title>
-      <author><first>George Anton</first><last>Kiraz</last></author>
+      <author id="george-anton-kiraz"><first>George Anton</first><last>Kiraz</last></author>
       <doi>10.3115/981863.981885</doi>
       <pages>159–166</pages>
       <url hash="ab7ad402">P96-1022</url>
@@ -208,7 +208,7 @@
     </paper>
     <paper id="23">
       <title>Head Automata and Bilingual Tiling: Translation with Minimal Representations (Invited Talk)</title>
-      <author><first>Hiyan</first><last>Alshawi</last></author>
+      <author id="hiyan-alshawi"><first>Hiyan</first><last>Alshawi</last></author>
       <doi>10.3115/981863.981886</doi>
       <pages>167–176</pages>
       <url hash="cba6bf26">P96-1023</url>
@@ -216,7 +216,7 @@
     </paper>
     <paper id="24">
       <title>Parsing Algorithms and Metrics</title>
-      <author><first>Joshua</first><last>Goodman</last></author>
+      <author id="joshua-goodman"><first>Joshua</first><last>Goodman</last></author>
       <doi>10.3115/981863.981887</doi>
       <pages>177–183</pages>
       <url hash="4133c6a2">P96-1024</url>
@@ -224,7 +224,7 @@
     </paper>
     <paper id="25">
       <title>A New Statistical Parser Based on Bigram Lexical Dependencies</title>
-      <author><first>Michael John</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael John</first><last>Collins</last></author>
       <doi>10.3115/981863.981888</doi>
       <pages>184–191</pages>
       <url hash="90133f34">P96-1025</url>
@@ -232,8 +232,8 @@
     </paper>
     <paper id="26">
       <title>Two Sources of Control Over the Generation of Software Instructions</title>
-      <author><first>Anthony</first><last>Hartley</last></author>
-      <author><first>Cecile</first><last>Paris</last></author>
+      <author id="anthony-hartley"><first>Anthony</first><last>Hartley</last></author>
+      <author id="cecile-paris"><first>Cecile</first><last>Paris</last></author>
       <doi>10.3115/981863.981889</doi>
       <pages>192–199</pages>
       <url hash="ad9c2de0">P96-1026</url>
@@ -257,8 +257,8 @@
     </paper>
     <paper id="29">
       <title>Compilation of Weighted Finite-State Transducers from Decision Trees</title>
-      <author><first>Richard</first><last>Sproat</last></author>
-      <author><first>Michael</first><last>Riley</last></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last></author>
+      <author id="michael-riley"><first>Michael</first><last>Riley</last></author>
       <doi>10.3115/981863.981892</doi>
       <pages>215–222</pages>
       <url hash="15c2119e">P96-1029</url>
@@ -266,8 +266,8 @@
     </paper>
     <paper id="30">
       <title>Fast Parsing Using Pruning and Grammar Specialization</title>
-      <author><first>Manny</first><last>Rayner</last></author>
-      <author><first>David</first><last>Carter</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
+      <author id="david-carter"><first>David</first><last>Carter</last></author>
       <doi>10.3115/981863.981893</doi>
       <pages>223–230</pages>
       <url hash="21cdb407">P96-1030</url>
@@ -276,7 +276,7 @@
     <paper id="31">
       <title>An Efficient Compiler for Weighted Rewrite Rules</title>
       <author><first>Mehryar</first><last>Mohri</last></author>
-      <author><first>Richard</first><last>Sproat</last></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last></author>
       <doi>10.3115/981863.981894</doi>
       <pages>231–238</pages>
       <url hash="0a1b199c">P96-1031</url>
@@ -310,7 +310,7 @@
     </paper>
     <paper id="35">
       <title>Resolving Anaphors in Embedded Sentences</title>
-      <author><first>Saliha</first><last>Azzam</last></author>
+      <author id="saliha-azzam"><first>Saliha</first><last>Azzam</last></author>
       <doi>10.3115/981863.981898</doi>
       <pages>263–268</pages>
       <url hash="0e0aeb58">P96-1035</url>
@@ -327,7 +327,7 @@
     </paper>
     <paper id="37">
       <title>Mechanisms for Mixed-Initiative Human-Computer Collaborative Discourse</title>
-      <author><first>Curry I.</first><last>Guinn</last></author>
+      <author id="curry-i-guinn"><first>Curry I.</first><last>Guinn</last></author>
       <doi>10.3115/981863.981900</doi>
       <pages>278–285</pages>
       <url hash="4d5d6fe9">P96-1037</url>
@@ -335,8 +335,8 @@
     </paper>
     <paper id="38">
       <title>A Prosodic Analysis of Discourse Segments in Direction-Giving Monologues</title>
-      <author><first>Julia</first><last>Hirschberg</last></author>
-      <author><first>Christine H.</first><last>Nakatani</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
+      <author id="christine-h-nakatani"><first>Christine H.</first><last>Nakatani</last></author>
       <doi>10.3115/981863.981901</doi>
       <pages>286–293</pages>
       <url hash="e63a9cef">P96-1038</url>
@@ -360,8 +360,8 @@
     </paper>
     <paper id="41">
       <title>An Empirical Study of Smoothing Techniques for Language Modeling</title>
-      <author><first>Stanley F.</first><last>Chen</last></author>
-      <author><first>Joshua</first><last>Goodman</last></author>
+      <author id="stanley-f-chen"><first>Stanley F.</first><last>Chen</last></author>
+      <author id="joshua-goodman"><first>Joshua</first><last>Goodman</last></author>
       <doi>10.3115/981863.981904</doi>
       <pages>310–318</pages>
       <url hash="57a13578">P96-1041</url>
@@ -386,7 +386,7 @@
     </paper>
     <paper id="44">
       <title>Linguistic Structure as Composition and Perturbation</title>
-      <author><first>Carl</first><last>de Marcken</last></author>
+      <author id="carl-de-marcken"><first>Carl</first><last>de Marcken</last></author>
       <doi>10.3115/981863.981907</doi>
       <pages>335–341</pages>
       <url hash="4e9341b7">P96-1044</url>
@@ -394,7 +394,7 @@
     </paper>
     <paper id="45">
       <title>Generating an <fixed-case>LTAG</fixed-case> out of a Principle-based Hierarchical Representation</title>
-      <author><first>Marie-Helene</first><last>Candito</last></author>
+      <author id="marie-candito"><first>Marie-Helene</first><last>Candito</last></author>
       <doi>10.3115/981863.981909</doi>
       <pages>342–344</pages>
       <url hash="bb5e024e">P96-1045</url>
@@ -402,7 +402,7 @@
     </paper>
     <paper id="46">
       <title>Using Parsed Corpora for Structural Disambiguation in the <fixed-case>TRAINS</fixed-case> Domain</title>
-      <author><first>Mark</first><last>Core</last></author>
+      <author id="mark-g-core"><first>Mark</first><last>Core</last></author>
       <doi>10.3115/981863.981910</doi>
       <pages>345–347</pages>
       <url hash="4c72a8f9">P96-1046</url>
@@ -418,7 +418,7 @@
     </paper>
     <paper id="48">
       <title>Using Textual Clues to Improve Metaphor Processing</title>
-      <author><first>Stephane</first><last>Ferrari</last></author>
+      <author id="stephane-ferrari"><first>Stephane</first><last>Ferrari</last></author>
       <doi>10.3115/981863.981912</doi>
       <pages>351–353</pages>
       <url hash="d5f1cccc">P96-1048</url>
@@ -426,7 +426,7 @@
     </paper>
     <paper id="49">
       <title>On Reversing the Generation Process in <fixed-case>O</fixed-case>ptimality <fixed-case>T</fixed-case>heory</title>
-      <author><first>J. Eric</first><last>Fosler</last></author>
+      <author id="eric-fosler-lussier"><first>J. Eric</first><last>Fosler</last></author>
       <doi>10.3115/981863.981913</doi>
       <pages>354–356</pages>
       <url hash="78f29a80">P96-1049</url>
@@ -442,7 +442,7 @@
     </paper>
     <paper id="51">
       <title>An Application of <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et to Prepositional Attachment</title>
-      <author><first>Sanda M.</first><last>Harabagiu</last></author>
+      <author id="sanda-harabagiu"><first>Sanda M.</first><last>Harabagiu</last></author>
       <doi>10.3115/981863.981915</doi>
       <pages>360–362</pages>
       <url hash="55f52f4b">P96-1051</url>
@@ -458,7 +458,7 @@
     </paper>
     <paper id="53">
       <title>Using Terminological Knowledge Representation Languages to Manage Linguistic Resources</title>
-      <author><first>Pamela W.</first><last>Jordan</last></author>
+      <author id="pamela-jordan"><first>Pamela W.</first><last>Jordan</last></author>
       <doi>10.3115/981863.981917</doi>
       <pages>366–368</pages>
       <url hash="3ec975a3">P96-1053</url>
diff --git a/data/xml/P97.xml b/data/xml/P97.xml
index e2dce1ae6e..107f6c9c5f 100644
--- a/data/xml/P97.xml
+++ b/data/xml/P97.xml
@@ -16,8 +16,8 @@
     </frontmatter>
     <paper id="1">
       <title>Interleaving Universal Principles and Relational Constraints over Typed Feature Logic</title>
-      <author><first>Thilo</first><last>Gotz</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="thilo-gotz"><first>Thilo</first><last>Gotz</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <doi>10.3115/976909.979618</doi>
       <pages>1–8</pages>
       <url hash="06bb54a8">P97-1001</url>
@@ -34,7 +34,7 @@
     <paper id="3">
       <title>Three Generative, Lexicalised Models for Statistical Parsing</title>
       <award>ACL 2022 25-Year Test of Time</award>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <doi>10.3115/976909.979620</doi>
       <pages>16–23</pages>
       <url hash="76b2f017">P97-1003</url>
@@ -43,8 +43,8 @@
     <paper id="4">
       <title>Expansion of Multi-Word Terms for Indexing and Retrieval Using Morphology and Syntax</title>
       <author><first>Christian</first><last>Jacquemin</last></author>
-      <author><first>Judith L.</first><last>Klavans</last></author>
-      <author><first>Evelyne</first><last>Tzoukermann</last></author>
+      <author id="judith-l-klavans"><first>Judith L.</first><last>Klavans</last></author>
+      <author id="evelyne-tzoukermann"><first>Evelyne</first><last>Tzoukermann</last></author>
       <doi>10.3115/976909.979621</doi>
       <pages>24–31</pages>
       <url hash="65618d62">P97-1004</url>
@@ -54,7 +54,7 @@
       <title>Automatic Detection of Text Genre</title>
       <author><first>Brett</first><last>Kessler</last></author>
       <author><first>Geoffrey</first><last>Nunberg</last></author>
-      <author><first>Hinrich</first><last>Schutze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schutze</last></author>
       <doi>10.3115/976909.979622</doi>
       <pages>32–38</pages>
       <url hash="03478417">P97-1005</url>
@@ -71,9 +71,9 @@
     </paper>
     <paper id="7">
       <title>Combining Unsupervised Lexical Knowledge Methods for Word Sense Disambiguation</title>
-      <author><first>German</first><last>Rigau</last></author>
-      <author><first>Jordi</first><last>Atserias</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
+      <author id="jordi-atserias"><first>Jordi</first><last>Atserias</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <doi>10.3115/976909.979624</doi>
       <pages>48–55</pages>
       <url hash="1d65bade">P97-1007</url>
@@ -108,7 +108,7 @@
     <paper id="11">
       <title>Learning Features that Predict Cue Usage</title>
       <author><first>Barbara</first><last>Di Eugenio</last></author>
-      <author><first>Johanna D.</first><last>Moore</last></author>
+      <author id="johanna-d-moore"><first>Johanna D.</first><last>Moore</last></author>
       <author><first>Massimo</first><last>Paolucci</last></author>
       <doi>10.3115/976909.979628</doi>
       <pages>80–87</pages>
@@ -118,7 +118,7 @@
     <paper id="12">
       <title>Expectations in Incremental Discourse Processing</title>
       <author><first>Dan</first><last>Cristea</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <doi>10.3115/976909.979629</doi>
       <pages>88–95</pages>
       <url hash="22b993c0">P97-1012</url>
@@ -152,8 +152,8 @@
     <paper id="16">
       <title>Ambiguity Resolution for Machine Translation of Telegraphic Messages</title>
       <author><first>Young-Suk</first><last>Lee</last></author>
-      <author><first>Clifford</first><last>Weinstein</last></author>
-      <author><first>Stephanie</first><last>Seneff</last></author>
+      <author id="clifford-j-weinstein"><first>Clifford</first><last>Weinstein</last></author>
+      <author id="stephanie-seneff"><first>Stephanie</first><last>Seneff</last></author>
       <author><first>Dinesh</first><last>Tummala</last></author>
       <doi>10.3115/976909.979633</doi>
       <pages>120–127</pages>
@@ -189,8 +189,8 @@
     </paper>
     <paper id="20">
       <title>Deriving Verbal and Compositonal Lexical Aspect for <fixed-case>NLP</fixed-case> Applications</title>
-      <author><first>Bonnie J.</first><last>Dorr</last></author>
-      <author><first>Mari Broman</first><last>Olsen</last></author>
+      <author id="bonnie-dorr"><first>Bonnie J.</first><last>Dorr</last></author>
+      <author id="mari-broman-olsen"><first>Mari Broman</first><last>Olsen</last></author>
       <doi>10.3115/976909.979637</doi>
       <pages>151–158</pages>
       <url hash="ebd9ce7d">P97-1020</url>
@@ -200,7 +200,7 @@
       <title>A <fixed-case>DOP</fixed-case> Model for Semantic Interpretation</title>
       <author><first>Remko</first><last>Bonnema</last></author>
       <author><first>Rens</first><last>Bod</last></author>
-      <author><first>Remko</first><last>Scha</last></author>
+      <author id="remko-scha"><first>Remko</first><last>Scha</last></author>
       <doi>10.3115/976909.979638</doi>
       <pages>159–167</pages>
       <url hash="9a1e139e">P97-1021</url>
@@ -208,9 +208,9 @@
     </paper>
     <paper id="22">
       <title>Fertility Models for Statistical Natural Language Understanding</title>
-      <author><first>Stephen</first><last>Della Pietra</last></author>
-      <author><first>Mark</first><last>Epstein</last></author>
-      <author><first>Salim</first><last>Roukos</last></author>
+      <author id="stephen-a-della-pietra"><first>Stephen</first><last>Della Pietra</last></author>
+      <author id="mark-epstein"><first>Mark</first><last>Epstein</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
       <author><first>Todd</first><last>Ward</last></author>
       <doi>10.3115/976909.979639</doi>
       <pages>168–173</pages>
@@ -220,7 +220,7 @@
     <paper id="23">
       <title>Predicting the Semantic Orientation of Adjectives</title>
       <author><first>Vasileios</first><last>Hatzivassiloglou</last></author>
-      <author><first>Kathleen R.</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen R.</first><last>McKeown</last></author>
       <doi>10.3115/976909.979640</doi>
       <pages>174–181</pages>
       <url hash="9d75ef56">P97-1023</url>
@@ -228,7 +228,7 @@
     </paper>
     <paper id="24">
       <title>Independence Assumptions Considered Harmful</title>
-      <author><first>Alexander</first><last>Franz</last></author>
+      <author id="alexander-franz"><first>Alexander</first><last>Franz</last></author>
       <doi>10.3115/976909.979641</doi>
       <pages>182–189</pages>
       <url hash="be6b2256">P97-1024</url>
@@ -245,7 +245,7 @@
     <paper id="26">
       <title>Sentence Planning as Description Using <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammar</title>
       <author><first>Matthew</first><last>Stone</last></author>
-      <author><first>Christine</first><last>Doran</last></author>
+      <author id="christine-doran"><first>Christine</first><last>Doran</last></author>
       <doi>10.3115/976909.979643</doi>
       <pages>198–205</pages>
       <url hash="c81bb4da">P97-1026</url>
@@ -261,7 +261,7 @@
     </paper>
     <paper id="28">
       <title>Applying Explanation-based Learning to Control and Speeding-up Natural Language Generation</title>
-      <author><first>Gunter</first><last>Neumann</last></author>
+      <author id="gunter-neumann"><first>Gunter</first><last>Neumann</last></author>
       <doi>10.3115/976909.979645</doi>
       <pages>214–221</pages>
       <url hash="831b905d">P97-1028</url>
@@ -270,7 +270,7 @@
     <paper id="29">
       <title>Morphological Disambiguation by Voting Constraints</title>
       <author><first>Kemal</first><last>Oflazer</last></author>
-      <author><first>Gokhan</first><last>Tur</last></author>
+      <author id="gokhan-tur"><first>Gokhan</first><last>Tur</last></author>
       <doi>10.3115/976909.979646</doi>
       <pages>222–229</pages>
       <url hash="6b868f60">P97-1029</url>
@@ -279,7 +279,7 @@
     <paper id="30">
       <title>Mistake-Driven Mixture of Hierarchical Tag Context Trees</title>
       <author><first>Masahiko</first><last>Haruno</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <doi>10.3115/976909.979647</doi>
       <pages>230–237</pages>
       <url hash="3f7809c7">P97-1030</url>
@@ -287,8 +287,8 @@
     </paper>
     <paper id="31">
       <title>A Flexible <fixed-case>POS</fixed-case> Tagger Using an Automatically Acquired Language Model</title>
-      <author><first>Lluis</first><last>Marquez</last></author>
-      <author><first>Lluis</first><last>Padro</last></author>
+      <author id="lluis-marquez"><first>Lluis</first><last>Marquez</last></author>
+      <author id="lluis-padro"><first>Lluis</first><last>Padro</last></author>
       <doi>10.3115/976909.979648</doi>
       <pages>238–245</pages>
       <url hash="6ad19a23">P97-1031</url>
@@ -305,8 +305,8 @@
     </paper>
     <paper id="33">
       <title>Intonational Boundaries, Speech Repairs, and Discourse Markers: Modeling Spoken Dialog</title>
-      <author><first>Peter A.</first><last>Heeman</last></author>
-      <author><first>James F.</first><last>Allen</last></author>
+      <author id="peter-a-heeman"><first>Peter A.</first><last>Heeman</last></author>
+      <author id="james-allen"><first>James F.</first><last>Allen</last></author>
       <doi>10.3115/976909.979650</doi>
       <pages>254–261</pages>
       <url hash="017fa8c1">P97-1033</url>
@@ -314,8 +314,8 @@
     </paper>
     <paper id="34">
       <title>Tracking Initiative in Collaborative Dialogue Interactions</title>
-      <author><first>Jennifer</first><last>Chu-Carroll</last></author>
-      <author><first>Michael K.</first><last>Brown</last></author>
+      <author id="jennifer-chu-carroll"><first>Jennifer</first><last>Chu-Carroll</last></author>
+      <author id="michael-k-brown"><first>Michael K.</first><last>Brown</last></author>
       <doi>10.3115/976909.979651</doi>
       <pages>262–270</pages>
       <url hash="b4550766">P97-1034</url>
@@ -323,9 +323,9 @@
     </paper>
     <paper id="35">
       <title><fixed-case>PARADISE</fixed-case>: A Framework for Evaluating Spoken Dialogue Agents</title>
-      <author><first>Marilyn A.</first><last>Walker</last></author>
-      <author><first>Diane J.</first><last>Litman</last></author>
-      <author><first>Candace A.</first><last>Kamm</last></author>
+      <author id="marilyn-walker"><first>Marilyn A.</first><last>Walker</last></author>
+      <author id="diane-litman"><first>Diane J.</first><last>Litman</last></author>
+      <author id="candace-a-kamm"><first>Candace A.</first><last>Kamm</last></author>
       <author><first>Alicia</first><last>Abella</last></author>
       <doi>10.3115/976909.979652</doi>
       <pages>271–280</pages>
@@ -334,10 +334,10 @@
     </paper>
     <paper id="36">
       <title>Unification-based Multimodal Integration</title>
-      <author><first>Michael</first><last>Johnston</last></author>
-      <author><first>Philip R.</first><last>Cohen</last></author>
-      <author><first>David</first><last>McGee</last></author>
-      <author><first>Sharon L.</first><last>Oviatt</last></author>
+      <author id="michael-johnston"><first>Michael</first><last>Johnston</last></author>
+      <author id="philip-r-cohen"><first>Philip R.</first><last>Cohen</last></author>
+      <author id="david-mcgee"><first>David</first><last>McGee</last></author>
+      <author id="sharon-oviatt"><first>Sharon L.</first><last>Oviatt</last></author>
       <author><first>James A.</first><last>Pittman</last></author>
       <author><first>Ira</first><last>Smith</last></author>
       <doi>10.3115/976909.979653</doi>
@@ -347,9 +347,9 @@
     </paper>
     <paper id="37">
       <title>A <fixed-case>DP</fixed-case>-based Search Using Monotone Alignments in Statistical Translation</title>
-      <author><first>Christoph</first><last>Tillmann</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="christoph-tillmann"><first>Christoph</first><last>Tillmann</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <author><first>Alex</first><last>Zubiaga</last></author>
       <doi>10.3115/976909.979654</doi>
       <pages>289–296</pages>
@@ -358,7 +358,7 @@
     </paper>
     <paper id="38">
       <title>An Alignment Method for Noisy Parallel Corpora based on Image Processing Techniques</title>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <author><first>Mathis H.</first><last>Chen</last></author>
       <doi>10.3115/976909.979655</doi>
       <pages>297–304</pages>
@@ -375,7 +375,7 @@
     </paper>
     <paper id="40">
       <title>Efficient Generation in Primitive <fixed-case>O</fixed-case>ptimality <fixed-case>T</fixed-case>heory</title>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <doi>10.3115/976909.979657</doi>
       <pages>313–320</pages>
       <url hash="3177c271">P97-1040</url>
@@ -383,7 +383,7 @@
     </paper>
     <paper id="41">
       <title>A Trainable Rule-Based Algorithm for Word Segmentation</title>
-      <author><first>David D.</first><last>Palmer</last></author>
+      <author id="david-d-palmer"><first>David D.</first><last>Palmer</last></author>
       <doi>10.3115/976909.979658</doi>
       <pages>321–328</pages>
       <url hash="34f42593">P97-1041</url>
@@ -391,7 +391,7 @@
     </paper>
     <paper id="42">
       <title>Compiling Regular Formalisms with Rule Features into Finite-State Automata</title>
-      <author><first>George Anton</first><last>Kiraz</last></author>
+      <author id="george-anton-kiraz"><first>George Anton</first><last>Kiraz</last></author>
       <doi>10.3115/976909.979659</doi>
       <pages>329–336</pages>
       <url hash="b259a232">P97-1042</url>
@@ -417,7 +417,7 @@
     <paper id="45">
       <title>Automatic Extraction of Aspectual Information from a Monolingual Corpus</title>
       <author><first>Akira</first><last>Oishi</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <doi>10.3115/976909.979662</doi>
       <pages>352–359</pages>
       <url hash="4f4a21c7">P97-1045</url>
@@ -425,7 +425,7 @@
     </paper>
     <paper id="46">
       <title>A Comparison of Head Transducers and Transfer for a Limited Domain Translation Application</title>
-      <author><first>Hiyan</first><last>Alshawi</last></author>
+      <author id="hiyan-alshawi"><first>Hiyan</first><last>Alshawi</last></author>
       <author><first>Adam L.</first><last>Buchsbaum</last></author>
       <author><first>Fei</first><last>Xia</last></author>
       <doi>10.3115/976909.979663</doi>
@@ -436,7 +436,7 @@
     <paper id="47">
       <title>Decoding Algorithm in Statistical Machine Translation</title>
       <author><first>Ye-Yi</first><last>Wang</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <doi>10.3115/976909.979664</doi>
       <pages>366–372</pages>
       <url hash="5b4ff51f">P97-1047</url>
@@ -445,8 +445,8 @@
     <paper id="48">
       <title>A Model of Lexical Attraction and Repulsion</title>
       <author><first>Doug</first><last>Beeferman</last></author>
-      <author><first>Adam</first><last>Berger</last></author>
-      <author><first>John</first><last>Lafferty</last></author>
+      <author id="adam-berger"><first>Adam</first><last>Berger</last></author>
+      <author id="john-lafferty"><first>John</first><last>Lafferty</last></author>
       <doi>10.3115/976909.979665</doi>
       <pages>373–380</pages>
       <url hash="a47b1d03">P97-1048</url>
@@ -454,7 +454,7 @@
     </paper>
     <paper id="49">
       <title>Hierarchical Non-Emitting <fixed-case>M</fixed-case>arkov Models</title>
-      <author><first>Eric Sven</first><last>Ristad</last></author>
+      <author id="eric-sven-ristad"><first>Eric Sven</first><last>Ristad</last></author>
       <author><first>Robert G.</first><last>Thomas</last></author>
       <doi>10.3115/976909.979666</doi>
       <pages>381–385</pages>
@@ -463,7 +463,7 @@
     </paper>
     <paper id="50">
       <title>Efficient Construction of Underspecified Semantics under Massive Ambiguity</title>
-      <author><first>Jochen</first><last>Dörre</last></author>
+      <author id="jochen-dorre"><first>Jochen</first><last>Dörre</last></author>
       <doi>10.3115/976909.979667</doi>
       <pages>386–393</pages>
       <url hash="62eb4949">P97-1050</url>
@@ -471,8 +471,8 @@
     </paper>
     <paper id="51">
       <title>A Theory of Parallelism and the Case of <fixed-case>VP</fixed-case> Ellipsis</title>
-      <author><first>Jerry R.</first><last>Hobbs</last></author>
-      <author><first>Andrew</first><last>Kehler</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry R.</first><last>Hobbs</last></author>
+      <author id="andrew-kehler"><first>Andrew</first><last>Kehler</last></author>
       <doi>10.3115/976909.979668</doi>
       <pages>394–401</pages>
       <url hash="15713d77">P97-1051</url>
@@ -480,7 +480,7 @@
     </paper>
     <paper id="52">
       <title>On Interpreting <fixed-case>F</fixed-case>-Structures as <fixed-case>UDRS</fixed-case>s</title>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <author><first>Richard</first><last>Crouch</last></author>
       <doi>10.3115/976909.979669</doi>
       <pages>402–409</pages>
@@ -499,7 +499,7 @@
     </paper>
     <paper id="54">
       <title>Co-Evolution of Language and of the Language Acquisition Device</title>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <doi>10.3115/976909.979671</doi>
       <pages>418–427</pages>
       <url hash="b57098e3">P97-1054</url>
@@ -516,7 +516,7 @@
     <paper id="56">
       <title>Memory-Based Learning: Using Similarity for Smoothing</title>
       <author><first>Jakub</first><last>Zavrel</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <doi>10.3115/976909.979673</doi>
       <pages>436–443</pages>
       <url hash="56ee8214">P97-1056</url>
@@ -525,7 +525,7 @@
     <paper id="57">
       <title>String Transformation Learning</title>
       <author><first>Giorgio</first><last>Satta</last></author>
-      <author><first>John C.</first><last>Henderson</last></author>
+      <author id="john-henderson"><first>John C.</first><last>Henderson</last></author>
       <doi>10.3115/976909.979674</doi>
       <pages>444–451</pages>
       <url hash="29dab906">P97-1057</url>
@@ -533,7 +533,7 @@
     </paper>
     <paper id="58">
       <title>Approximating Context-Free Grammars with a Finite-State Calculus</title>
-      <author><first>Edmund Grimley</first><last>Evans</last></author>
+      <author id="edmund-grimley-evans"><first>Edmund Grimley</first><last>Evans</last></author>
       <doi>10.3115/976909.979675</doi>
       <pages>452–459</pages>
       <url hash="89f68d14">P97-1058</url>
@@ -541,7 +541,7 @@
     </paper>
     <paper id="59">
       <title>Finite State Transducers Approximating Hidden <fixed-case>M</fixed-case>arkov Models</title>
-      <author><first>Andre</first><last>Kempe</last></author>
+      <author id="andre-kempe"><first>Andre</first><last>Kempe</last></author>
       <doi>10.3115/976909.979676</doi>
       <pages>460–467</pages>
       <url hash="ad6d12af">P97-1059</url>
@@ -569,7 +569,7 @@
     <paper id="62">
       <title>Learning Parse and Translation Decisions from Examples with Rich Context</title>
       <author><first>Ulf</first><last>Hermjakob</last></author>
-      <author><first>Raymond J.</first><last>Mooney</last></author>
+      <author id="raymond-mooney"><first>Raymond J.</first><last>Mooney</last></author>
       <doi>10.3115/976909.979679</doi>
       <pages>482–489</pages>
       <url hash="68bdf070">P97-1062</url>
@@ -609,7 +609,7 @@
     </paper>
     <paper id="67">
       <title>Choosing the Word Most Typical in Context Using a Lexical Co-occurrence Network</title>
-      <author><first>Philip</first><last>Edmonds</last></author>
+      <author id="philip-edmonds"><first>Philip</first><last>Edmonds</last></author>
       <doi>10.3115/976909.979684</doi>
       <pages>507–509</pages>
       <url hash="d6693b20">P97-1067</url>
@@ -617,7 +617,7 @@
     </paper>
     <paper id="68">
       <title>Improving Translation through Contextual Information</title>
-      <author><first>Maite</first><last>Taboada</last></author>
+      <author id="maite-taboada"><first>Maite</first><last>Taboada</last></author>
       <doi>10.3115/976909.979685</doi>
       <pages>510–512</pages>
       <url hash="c4a7c5ae">P97-1068</url>
@@ -641,7 +641,7 @@
     </paper>
     <paper id="71">
       <title>Contrastive accent in a data-to-speech system</title>
-      <author><first>Mariet</first><last>Theune</last></author>
+      <author id="mariet-theune"><first>Mariet</first><last>Theune</last></author>
       <doi>10.3115/976909.979688</doi>
       <pages>519–521</pages>
       <url hash="a3c2bee5">P97-1071</url>
@@ -649,7 +649,7 @@
     </paper>
     <paper id="72">
       <title>Towards resolution of bridging descriptions</title>
-      <author><first>Renata</first><last>Vieira</last></author>
+      <author id="renata-vieira"><first>Renata</first><last>Vieira</last></author>
       <author><first>Simone</first><last>Teufel</last></author>
       <doi>10.3115/976909.979689</doi>
       <pages>522–524</pages>
diff --git a/data/xml/P98.xml b/data/xml/P98.xml
index a24bec262f..f174811ab8 100644
--- a/data/xml/P98.xml
+++ b/data/xml/P98.xml
@@ -16,7 +16,7 @@
     <paper id="1">
       <title>A Quasi-Dependency Model for the Structural Analysis of <fixed-case>C</fixed-case>hinese <fixed-case>B</fixed-case>ase<fixed-case>NP</fixed-case>s</title>
       <author><last>Zhao</last><first>Jun</first></author>
-      <author><last>Huang</last><first>Changning</first></author>
+      <author id="changning-huang"><first>Changning</first><last>Huang</last></author>
       <doi>10.3115/980845.980847</doi>
       <pages>1–7</pages>
       <url hash="b07bb624">P98-1001</url>
@@ -32,9 +32,9 @@
     </paper>
     <paper id="3">
       <title>Towards a Single Proposal in Spelling Correction</title>
-      <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>Koldo</first><last>Gojenola</last></author>
-      <author><first>Kepa</first><last>Sarasola</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
+      <author id="koldo-gojenola"><first>Koldo</first><last>Gojenola</last></author>
+      <author id="kepa-sarasola"><first>Kepa</first><last>Sarasola</last></author>
       <author><first>Atro</first><last>Voutilainen</last></author>
       <doi>10.3115/980845.980850</doi>
       <pages>22–28</pages>
@@ -53,7 +53,7 @@
     </paper>
     <paper id="5">
       <title>Parole et Prduction automatique: le module de reconnaissance <fixed-case>RAPHAEL</fixed-case></title>
-      <author><first>Mohammad</first><last>Akbar</last></author>
+      <author id="mohammad-akbar"><first>Mohammad</first><last>Akbar</last></author>
       <author><first>Jean</first><last>Caelen</last></author>
       <doi>10.3115/980845.980852</doi>
       <pages>36–40</pages>
@@ -62,8 +62,8 @@
     </paper>
     <paper id="6">
       <title>Automatic Acquisition of Hierarchical Transduction Models for Machine Translation</title>
-      <author><first>Hiyan</first><last>Alshawi</last></author>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="hiyan-alshawi"><first>Hiyan</first><last>Alshawi</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
       <author><first>Shona</first><last>Douglas</last></author>
       <doi>10.3115/980845.980853</doi>
       <pages>41–47</pages>
@@ -110,9 +110,9 @@
     </paper>
     <paper id="11">
       <title>Evaluating a Focus-Based Approach to Anaphora Resolution</title>
-      <author><first>Saliha</first><last>Azzam</last></author>
-      <author><first>Kevin</first><last>Humphreys</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="saliha-azzam"><first>Saliha</first><last>Azzam</last></author>
+      <author id="kevin-humphreys"><first>Kevin</first><last>Humphreys</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <doi>10.3115/980845.980858</doi>
       <pages>74–78</pages>
       <url hash="000b3039">P98-1011</url>
@@ -129,9 +129,9 @@
     </paper>
     <paper id="13">
       <title>The <fixed-case>B</fixed-case>erkeley <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et Project</title>
-      <author><first>Collin F.</first><last>Baker</last></author>
-      <author><first>Charles J.</first><last>Fillmore</last></author>
-      <author><first>John B.</first><last>Lowe</last></author>
+      <author id="collin-f-baker"><first>Collin F.</first><last>Baker</last></author>
+      <author id="charles-j-fillmore"><first>Charles J.</first><last>Fillmore</last></author>
+      <author id="john-b-lowe"><first>John B.</first><last>Lowe</last></author>
       <doi>10.3115/980845.980860</doi>
       <pages>86–90</pages>
       <url hash="bca049c1">P98-1013</url>
@@ -149,7 +149,7 @@
     <paper id="15">
       <title>Semi-Automatic Recognition of Noun Modifier Relationships</title>
       <author><first>Ken</first><last>Barker</last></author>
-      <author><first>Stan</first><last>Szpakowicz</last></author>
+      <author id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></author>
       <doi>10.3115/980845.980862</doi>
       <pages>96–102</pages>
       <url hash="b1568bee">P98-1015</url>
@@ -157,7 +157,7 @@
     </paper>
     <paper id="16">
       <title><fixed-case>R</fixed-case>edundancy: Helping Semantic Disambiguation</title>
-      <author><first>Caroline</first><last>Barriere</last></author>
+      <author id="caroline-barriere"><first>Caroline</first><last>Barriere</last></author>
       <doi>10.3115/980845.980863</doi>
       <pages>103–109</pages>
       <url hash="2e9a6f10">P98-1016</url>
@@ -192,9 +192,9 @@
     </paper>
     <paper id="20">
       <title>Trigger-Pair Predictors in Parsing and Tagging</title>
-      <author><first>Ezra</first><last>Black</last></author>
+      <author id="ezra-black"><first>Ezra</first><last>Black</last></author>
       <author><first>Andrew</first><last>Finch</last></author>
-      <author><first>Hideki</first><last>Kashioka</last></author>
+      <author id="hideki-kashioka"><first>Hideki</first><last>Kashioka</last></author>
       <doi>10.3115/980845.980867</doi>
       <pages>131–137</pages>
       <url hash="ff99d86c">P98-1020</url>
@@ -211,7 +211,7 @@
     <paper id="22">
       <title>A Probabilistic Corpus-Driven Model for Lexical-Functional Analysis</title>
       <author><first>Rens</first><last>Bod</last></author>
-      <author><first>Ronald</first><last>Kaplan</last></author>
+      <author id="ronald-m-kaplan"><first>Ronald</first><last>Kaplan</last></author>
       <doi>10.3115/980845.980869</doi>
       <pages>145–151</pages>
       <url hash="d09894de">P98-1022</url>
@@ -221,7 +221,7 @@
       <title>Anchoring Floating Quantifiers in <fixed-case>J</fixed-case>apanese-to-<fixed-case>E</fixed-case>nglish Machine Translation</title>
       <author><first>Francis</first><last>Bond</last></author>
       <author><first>Daniela</first><last>Kurz</last></author>
-      <author><first>Satoshi</first><last>Shirai</last></author>
+      <author id="satoshi-shirai"><first>Satoshi</first><last>Shirai</last></author>
       <doi>10.3115/980845.980870</doi>
       <pages>152–159</pages>
       <url hash="bf7e22f8">P98-1023</url>
@@ -230,8 +230,8 @@
     <paper id="24">
       <title>Managing Information at Linguistic Interfaces</title>
       <author><first>Johan</first><last>Bos</last></author>
-      <author><first>C.J.</first><last>Rupp</last></author>
-      <author><first>Bianka</first><last>Buschbeck-Wolf</last></author>
+      <author id="c-j-rupp"><first>C.J.</first><last>Rupp</last></author>
+      <author id="bianka-buschbeck"><first>Bianka</first><last>Buschbeck-Wolf</last></author>
       <author><first>Michael</first><last>Dorna</last></author>
       <doi>10.3115/980845.980871</doi>
       <pages>160–166</pages>
@@ -240,7 +240,7 @@
     </paper>
     <paper id="25">
       <title>Deriving the Predicate-Argument Structure for a Free Word Order Language</title>
-      <author><first>Cem</first><last>Bozsahin</last></author>
+      <author id="cem-bozsahin"><first>Cem</first><last>Bozsahin</last></author>
       <doi>10.3115/980845.980872</doi>
       <pages>167–173</pages>
       <url hash="dcb3f8a5">P98-1025</url>
@@ -256,7 +256,7 @@
     </paper>
     <paper id="27">
       <title>The Logical Structure of Binding</title>
-      <author><first>Antonio</first><last>Branco</last></author>
+      <author id="antonio-branco"><first>Antonio</first><last>Branco</last></author>
       <doi>10.3115/980845.980874</doi>
       <pages>181–185</pages>
       <url hash="b9efc064">P98-1027</url>
@@ -265,9 +265,9 @@
     <paper id="28">
       <title>Beyond N -Grams: Can Linguistic Sophistication Improve Language Modeling?</title>
       <author><first>Eric</first><last>Brill</last></author>
-      <author><first>Radu</first><last>Florian</last></author>
-      <author><first>John C.</first><last>Henderson</last></author>
-      <author><first>Lidia</first><last>Mangu</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
+      <author id="john-henderson"><first>John C.</first><last>Henderson</last></author>
+      <author id="lidia-mangu"><first>Lidia</first><last>Mangu</last></author>
       <doi>10.3115/980845.980875</doi>
       <pages>186–190</pages>
       <url hash="fabd48d2">P98-1028</url>
@@ -292,9 +292,9 @@
     </paper>
     <paper id="31">
       <title>Named Entity Scoring for Speech Input</title>
-      <author><first>John D.</first><last>Burger</last></author>
-      <author><first>David</first><last>Palmer</last></author>
-      <author><first>Lynette</first><last>Hirschman</last></author>
+      <author id="john-d-burger"><first>John D.</first><last>Burger</last></author>
+      <author id="david-d-palmer"><first>David</first><last>Palmer</last></author>
+      <author id="lynette-hirschman"><first>Lynette</first><last>Hirschman</last></author>
       <doi>10.3115/980845.980878</doi>
       <pages>201–205</pages>
       <url hash="261d5931">P98-1031</url>
@@ -306,8 +306,8 @@
       <author><first>Karen</first><last>Kukich</last></author>
       <author><first>Susanne</first><last>Wolff</last></author>
       <author><first>Chi</first><last>Lu</last></author>
-      <author><first>Martin</first><last>Chodorow</last></author>
-      <author><first>Lisa</first><last>Braden-Harder</last></author>
+      <author id="martin-chodorow"><first>Martin</first><last>Chodorow</last></author>
+      <author id="lisa-braden-harder"><first>Lisa</first><last>Braden-Harder</last></author>
       <author><first>Mary Dee</first><last>Harris</last></author>
       <doi>10.3115/980845.980879</doi>
       <pages>206–210</pages>
@@ -316,7 +316,7 @@
     </paper>
     <paper id="33">
       <title>Building Parallel <fixed-case>LTAG</fixed-case> for <fixed-case>F</fixed-case>rench and <fixed-case>I</fixed-case>talian</title>
-      <author><first>Marie-Helene</first><last>Candito</last></author>
+      <author id="marie-candito"><first>Marie-Helene</first><last>Candito</last></author>
       <doi>10.3115/980845.980880</doi>
       <pages>211–217</pages>
       <url hash="a879068d">P98-1033</url>
@@ -324,8 +324,8 @@
     </paper>
     <paper id="34">
       <title>Error-Driven Pruning of Treebank Grammars for Base Noun Phrase Identification</title>
-      <author><first>Claire</first><last>Cardie</last></author>
-      <author><first>David</first><last>Pierce</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
+      <author id="david-pierce"><first>David</first><last>Pierce</last></author>
       <doi>10.3115/980845.980881</doi>
       <pages>218–224</pages>
       <url hash="027d7f02">P98-1034</url>
@@ -334,7 +334,7 @@
     <paper id="35">
       <title>Exploiting Syntactic Structure for Language Modeling</title>
       <author><first>Ciprian</first><last>Chelba</last></author>
-      <author><first>Frederick</first><last>Jelinek</last></author>
+      <author id="frederick-jelinek"><first>Frederick</first><last>Jelinek</last></author>
       <doi>10.3115/980845.980882</doi>
       <pages>225–231</pages>
       <url hash="26a5aa88">P98-1035</url>
@@ -353,8 +353,8 @@
     </paper>
     <paper id="37">
       <title>A Concept-based Adaptive Approach to Word Sense Disambiguation</title>
-      <author><first>Jen Nan</first><last>Chen</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jen-nan-chen"><first>Jen Nan</first><last>Chen</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <doi>10.3115/980845.980884</doi>
       <pages>237–243</pages>
       <url hash="8e06b95b">P98-1037</url>
@@ -362,7 +362,7 @@
     </paper>
     <paper id="38">
       <title><fixed-case>PAT</fixed-case>-Trees with the Deletion Function as the Learning Device for Linguistic Patterns</title>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <author><first>Wen</first><last>Tsuei</last></author>
       <author><first>Lee-Feng</first><last>Chien</last></author>
       <doi>10.3115/980845.980885</doi>
@@ -373,12 +373,12 @@
     <paper id="39">
       <title>Hybrid Approaches to Improvement of Translation Quality in Web-based <fixed-case>E</fixed-case>nglish-<fixed-case>K</fixed-case>orean Machine Translation</title>
       <author><first>Sung-Kwon</first><last>Choi</last></author>
-      <author><first>Han-Min</first><last>Jung</last></author>
+      <author id="han-min-jung"><first>Han-Min</first><last>Jung</last></author>
       <author><first>Chul-Min</first><last>Sim</last></author>
       <author><first>Taewan</first><last>Kim</last></author>
       <author><first>Dong-In</first><last>Park</last></author>
       <author><first>Jun-Sik</first><last>Park</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <doi>10.3115/980845.980886</doi>
       <pages>251–255</pages>
       <url hash="f07e94e3">P98-1039</url>
@@ -386,7 +386,7 @@
     </paper>
     <paper id="40">
       <title>Dialogue Management in Vector-Based Call Routing</title>
-      <author><first>Jennifer</first><last>Chu-Carroll</last></author>
+      <author id="jennifer-chu-carroll"><first>Jennifer</first><last>Chu-Carroll</last></author>
       <author><first>Bob</first><last>Carpenter</last></author>
       <doi>10.3115/980845.980887</doi>
       <pages>256–262</pages>
@@ -396,7 +396,7 @@
     <paper id="41">
       <title>Machine Translation vs. Dictionary Term Translation - a Comparison for <fixed-case>E</fixed-case>nglish-<fixed-case>J</fixed-case>apanese News Article Alignment</title>
       <author><first>Nigel</first><last>Collier</last></author>
-      <author><first>Hideki</first><last>Hirakawa</last></author>
+      <author id="hideki-hirakawa"><first>Hideki</first><last>Hirakawa</last></author>
       <author><first>Akira</first><last>Kumano</last></author>
       <doi>10.3115/980845.980888</doi>
       <pages>263–267</pages>
@@ -407,7 +407,7 @@
       <title>An Experiment in Hybrid Dictionary and Statistical Sentence Alignment</title>
       <author><first>Nigel</first><last>Collier</last></author>
       <author><first>Kenji</first><last>Ono</last></author>
-      <author><first>Hideki</first><last>Hirakawa</last></author>
+      <author id="hideki-hirakawa"><first>Hideki</first><last>Hirakawa</last></author>
       <doi>10.3115/980845.980889</doi>
       <pages>268–274</pages>
       <url hash="2794494d">P98-1042</url>
@@ -424,8 +424,8 @@
     <paper id="44">
       <title>Veins Theory: A Model of Global Discourse Cohesion and Coherence</title>
       <author><first>Dan</first><last>Cristea</last></author>
-      <author><first>Nancy</first><last>Ide</last></author>
-      <author><first>Laurent</first><last>Romary</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
       <doi>10.3115/980845.980891</doi>
       <pages>281–285</pages>
       <url hash="0aa1631c">P98-1044</url>
@@ -435,7 +435,7 @@
       <title>Automatic Semantic Tagging of Unknown Proper Names</title>
       <author><first>Alessandro</first><last>Cucchiarelli</last></author>
       <author><first>Danilo</first><last>Luzi</last></author>
-      <author><first>Paola</first><last>Velardi</last></author>
+      <author id="paola-velardi"><first>Paola</first><last>Velardi</last></author>
       <doi>10.3115/980845.980892</doi>
       <pages>286–292</pages>
       <url hash="fc96dd7d">P98-1045</url>
@@ -443,9 +443,9 @@
     </paper>
     <paper id="46">
       <title>Investigating Regular Sense Extensions based on Intersective <fixed-case>L</fixed-case>evin Classes</title>
-      <author><first>Hoa Trang</first><last>Dang</last></author>
-      <author><first>Karin</first><last>Kipper</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="hoa-trang-dang"><first>Hoa Trang</first><last>Dang</last></author>
+      <author id="karin-kipper"><first>Karin</first><last>Kipper</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Joseph</first><last>Rosenzweig</last></author>
       <doi>10.3115/980845.980893</doi>
       <pages>293–299</pages>
@@ -484,11 +484,11 @@
     <paper id="50">
       <title><fixed-case>M</fixed-case>ultext-<fixed-case>E</fixed-case>ast: Parallel and Comparable Corpora and Lexicons for Six <fixed-case>C</fixed-case>entral and <fixed-case>E</fixed-case>astern <fixed-case>E</fixed-case>uropean Languages</title>
       <author><first>Ludmila</first><last>Dimitrova</last></author>
-      <author><first>Tomaz</first><last>Erjavec</last></author>
-      <author><first>Nancy</first><last>Ide</last></author>
-      <author><first>Heiki Jaan</first><last>Kaalep</last></author>
-      <author><first>Vladimir</first><last>Petkevic</last></author>
-      <author><first>Dan</first><last>Tufis</last></author>
+      <author id="tomaz-erjavec"><first>Tomaz</first><last>Erjavec</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
+      <author id="heiki-jaan-kaalep"><first>Heiki Jaan</first><last>Kaalep</last></author>
+      <author id="vladimir-petkevic"><first>Vladimir</first><last>Petkevic</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufis</last></author>
       <doi>10.3115/980845.980897</doi>
       <pages>315–319</pages>
       <url hash="f6f28d42">P98-1050</url>
@@ -496,9 +496,9 @@
     </paper>
     <paper id="51">
       <title>Error Driven Word Sense Disambiguation</title>
-      <author><first>Luca</first><last>Dini</last></author>
-      <author><first>Vittorio</first><last>Di Tomaso</last></author>
-      <author><first>Frederique</first><last>Segond</last></author>
+      <author id="luca-dini"><first>Luca</first><last>Dini</last></author>
+      <author id="vittorio-di-tomaso"><first>Vittorio</first><last>Di Tomaso</last></author>
+      <author id="frederique-segond"><first>Frederique</first><last>Segond</last></author>
       <doi>10.3115/980845.980898</doi>
       <pages>320–324</pages>
       <url hash="d5d02241">P98-1051</url>
@@ -507,9 +507,9 @@
     <paper id="52">
       <title>An Empirical Investigation of Proposals in Collaborative Dialogues</title>
       <author><first>Barbara</first><last>Di Eugenio</last></author>
-      <author><first>Pamela W.</first><last>Jordan</last></author>
-      <author><first>Johanna D.</first><last>Moore</last></author>
-      <author><first>Richmond H.</first><last>Thomason</last></author>
+      <author id="pamela-jordan"><first>Pamela W.</first><last>Jordan</last></author>
+      <author id="johanna-d-moore"><first>Johanna D.</first><last>Moore</last></author>
+      <author id="richmond-h-thomason"><first>Richmond H.</first><last>Thomason</last></author>
       <doi>10.3115/980845.980899</doi>
       <pages>325–329</pages>
       <url hash="0402a443">P98-1052</url>
@@ -533,8 +533,8 @@
     </paper>
     <paper id="55">
       <title>A Text Input Front-end Processor as an Information Access Platform</title>
-      <author><first>Shinichi</first><last>Doi</last></author>
-      <author><first>Shin-ichiro</first><last>Kamei</last></author>
+      <author id="shinichi-doi"><first>Shinichi</first><last>Doi</last></author>
+      <author id="shin-ichiro-kamei"><first>Shin-ichiro</first><last>Kamei</last></author>
       <author><first>Kiyoshi</first><last>Yamabana</last></author>
       <doi>10.3115/980845.980902</doi>
       <pages>336–340</pages>
@@ -545,8 +545,8 @@
       <title>Syntactic and Semantic Transfer with <fixed-case>F</fixed-case>-Structures</title>
       <author><first>Michael</first><last>Dorna</last></author>
       <author><first>Anette</first><last>Frank</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
-      <author><first>Martin C.</first><last>Emele</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
+      <author id="martin-c-emele"><first>Martin C.</first><last>Emele</last></author>
       <doi>10.3115/980845.980903</doi>
       <pages>341–347</pages>
       <url hash="44044a20">P98-1056</url>
@@ -574,7 +574,7 @@
     <paper id="59">
       <title>Spelling Correction using Context</title>
       <author><first>Mohammad Ali</first><last>Elmi</last></author>
-      <author><first>Martha</first><last>Evens</last></author>
+      <author id="martha-evens"><first>Martha</first><last>Evens</last></author>
       <doi>10.3115/980845.980906</doi>
       <pages>360–364</pages>
       <url hash="4532b5e3">P98-1059</url>
@@ -582,7 +582,7 @@
     </paper>
     <paper id="60">
       <title>Ambiguity Preserving Machine Translation using Packed Representations</title>
-      <author><first>Martin C.</first><last>Emele</last></author>
+      <author id="martin-c-emele"><first>Martin C.</first><last>Emele</last></author>
       <author><first>Michael</first><last>Dorna</last></author>
       <doi>10.3115/980845.980907</doi>
       <pages>365–371</pages>
@@ -591,8 +591,8 @@
     </paper>
     <paper id="61">
       <title>A Structure-sharing Parser for Lexicalized Grammars</title>
-      <author><first>Roger</first><last>Evans</last></author>
-      <author><first>David</first><last>Weir</last></author>
+      <author id="roger-evans"><first>Roger</first><last>Evans</last></author>
+      <author id="david-weir"><first>David</first><last>Weir</last></author>
       <doi>10.3115/980845.980908</doi>
       <pages>372–378</pages>
       <url hash="7629c4a5">P98-1061</url>
@@ -624,9 +624,9 @@
     </paper>
     <paper id="64">
       <title>Anaphor Resolution In Unrestricted Texts With Partial Parsing</title>
-      <author><first>Antonio</first><last>Ferrandez</last></author>
-      <author><first>Manuel</first><last>Palomar</last></author>
-      <author><first>Lidia</first><last>Moreno</last></author>
+      <author id="antonio-ferrandez"><first>Antonio</first><last>Ferrandez</last></author>
+      <author id="manuel-palomar"><first>Manuel</first><last>Palomar</last></author>
+      <author id="lidia-moreno"><first>Lidia</first><last>Moreno</last></author>
       <doi>10.3115/980845.980911</doi>
       <pages>385–391</pages>
       <url hash="d948778d">P98-1064</url>
@@ -652,7 +652,7 @@
     </paper>
     <paper id="67">
       <title>Toward General-Purpose Learning for Information Extraction</title>
-      <author><first>Dayne</first><last>Freitag</last></author>
+      <author id="dayne-freitag"><first>Dayne</first><last>Freitag</last></author>
       <doi>10.3115/980845.980914</doi>
       <pages>404–408</pages>
       <url hash="1faa208f">P98-1067</url>
@@ -696,7 +696,7 @@
     </paper>
     <paper id="72">
       <title>Semantic-Head Based Resolution of Scopal Ambiguities</title>
-      <author><first>Bjorn</first><last>Gamback</last></author>
+      <author id="bjorn-gamback"><first>Bjorn</first><last>Gamback</last></author>
       <author><first>Johan</first><last>Bos</last></author>
       <doi>10.3115/980845.980919</doi>
       <pages>433–437</pages>
@@ -707,7 +707,7 @@
       <title>Vers l’utilisation des méthodes formelles pour le développement de linguiciels</title>
       <author><first>Bilel</first><last>Gargouri</last></author>
       <author><first>Mohamed</first><last>Jmaiel</last></author>
-      <author><first>Abdelmajid</first><last>Ben Hamadou</last></author>
+      <author id="abdelmajid-ben-hamadou"><first>Abdelmajid</first><last>Ben Hamadou</last></author>
       <doi>10.3115/980845.980920</doi>
       <pages>438–443</pages>
       <url hash="348a10a0">P98-1073</url>
@@ -715,7 +715,7 @@
     </paper>
     <paper id="74">
       <title>Flow Network Models for Word Alignment and Terminology Extraction from Bilingual Corpora</title>
-      <author><first>Eric</first><last>Gaussier</last></author>
+      <author id="eric-gaussier"><first>Eric</first><last>Gaussier</last></author>
       <doi>10.3115/980845.980921</doi>
       <pages>444–450</pages>
       <url hash="77ed23bf">P98-1074</url>
@@ -723,8 +723,8 @@
     </paper>
     <paper id="75">
       <title>Growing Semantic Grammars</title>
-      <author><first>Marsal</first><last>Gavalda</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="marsal-gavalda"><first>Marsal</first><last>Gavalda</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <doi>10.3115/980845.980922</doi>
       <pages>451–456</pages>
       <url hash="cc0218fb">P98-1075</url>
@@ -740,7 +740,7 @@
     </paper>
     <paper id="77">
       <title>Efficient Linear Logic Meaning Assembly</title>
-      <author><first>Vineet</first><last>Gupta</last></author>
+      <author id="vineet-gupta"><first>Vineet</first><last>Gupta</last></author>
       <author><first>John</first><last>Lamping</last></author>
       <doi>10.3115/980845.980924</doi>
       <pages>464–470</pages>
@@ -767,8 +767,8 @@
     </paper>
     <paper id="80">
       <title>Tagging Inflective Languages: Prediction of Morphological Categories for a Rich Structured Tagset</title>
-      <author><first>Jan</first><last>Hajič</last></author>
-      <author><first>Barbora</first><last>Hladká</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
+      <author id="barbora-hladka"><first>Barbora</first><last>Hladká</last></author>
       <doi>10.3115/980845.980927</doi>
       <pages>483–490</pages>
       <url hash="ba943096">P98-1080</url>
@@ -776,9 +776,9 @@
     </paper>
     <paper id="81">
       <title>Improving Data Driven Wordclass Tagging by System Combination</title>
-      <author><first>Hans</first><last>van Halteren</last></author>
+      <author id="hans-van-halteren"><first>Hans</first><last>van Halteren</last></author>
       <author><first>Jakub</first><last>Zavrel</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <doi>10.3115/980845.980928</doi>
       <pages>491–497</pages>
       <url hash="829c7943">P98-1081</url>
@@ -786,8 +786,8 @@
     </paper>
     <paper id="82">
       <title>A Step towards the Detection of Semantic Variants of Terms in Technical Documents</title>
-      <author><first>Thierry</first><last>Hamon</last></author>
-      <author><first>Adeline</first><last>Nazarenko</last></author>
+      <author id="thierry-hamon"><first>Thierry</first><last>Hamon</last></author>
+      <author id="adeline-nazarenko"><first>Adeline</first><last>Nazarenko</last></author>
       <author><first>Cecile</first><last>Gros</last></author>
       <doi>10.3115/980845.980929</doi>
       <pages>498–504</pages>
@@ -797,7 +797,7 @@
     <paper id="83">
       <title>Using Decision Trees to Construct a Practical Parser</title>
       <author><first>Masahiko</first><last>Haruno</last></author>
-      <author><first>Satoshi</first><last>Shirai</last></author>
+      <author id="satoshi-shirai"><first>Satoshi</first><last>Shirai</last></author>
       <author><first>Yoshifumi</first><last>Ooyama</last></author>
       <doi>10.3115/980845.980930</doi>
       <pages>505–511</pages>
@@ -807,7 +807,7 @@
     <paper id="84">
       <title>Integrating Text Plans for Conciseness and Coherence</title>
       <author><first>Terrence</first><last>Harvey</last></author>
-      <author><first>Sandra</first><last>Carberry</last></author>
+      <author id="sandra-carberry"><first>Sandra</first><last>Carberry</last></author>
       <doi>10.3115/980845.980931</doi>
       <pages>512–518</pages>
       <url hash="0430da7b">P98-1084</url>
@@ -834,7 +834,7 @@
     </paper>
     <paper id="87">
       <title>A Connectionist Architecture for Learning to Parse</title>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <author><first>Peter</first><last>Lane</last></author>
       <doi>10.3115/980845.980934</doi>
       <pages>531–537</pages>
@@ -859,8 +859,8 @@
     </paper>
     <paper id="90">
       <title>Long Distance Pronominalisation and Global Focus</title>
-      <author><first>Janet</first><last>Hitzeman</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="janet-hitzeman"><first>Janet</first><last>Hitzeman</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <doi>10.3115/980845.980937</doi>
       <pages>550–556</pages>
       <url hash="549916cf">P98-1090</url>
@@ -876,7 +876,7 @@
     </paper>
     <paper id="92">
       <title>Terminological Variation, a Means of Identifying Research Topics from Texts</title>
-      <author><first>Fidelia</first><last>Ibekwe-SanJuan</last></author>
+      <author id="fidelia-ibekwe-sanjuan"><first>Fidelia</first><last>Ibekwe-SanJuan</last></author>
       <doi>10.3115/980845.980939</doi>
       <pages>564–570</pages>
       <url hash="650a3285">P98-1092</url>
@@ -903,7 +903,7 @@
     </paper>
     <paper id="95">
       <title>Exploring the Characteristics of Multi-party Dialogues</title>
-      <author><first>Masato</first><last>Ishizaki</last></author>
+      <author id="masato-ishizaki"><first>Masato</first><last>Ishizaki</last></author>
       <author><first>Tsuneaki</first><last>Kato</last></author>
       <doi>10.3115/980845.980942</doi>
       <pages>583–589</pages>
@@ -912,8 +912,8 @@
     </paper>
     <paper id="96">
       <title>Robust Interaction through Partial Interpretation and Dialogue Management</title>
-      <author><first>Arne</first><last>Jönsson</last></author>
-      <author><first>Lena</first><last>Strömbäck</last></author>
+      <author id="arne-jonsson"><first>Arne</first><last>Jönsson</last></author>
+      <author id="lena-stromback"><first>Lena</first><last>Strömbäck</last></author>
       <doi>10.3115/980845.980943</doi>
       <pages>590–594</pages>
       <url hash="cff8e1e6">P98-1096</url>
@@ -929,9 +929,9 @@
     </paper>
     <paper id="98">
       <title>Combining a <fixed-case>C</fixed-case>hinese Thesaurus with a <fixed-case>C</fixed-case>hinese Dictionary</title>
-      <author><first>Donghong</first><last>Ji</last></author>
-      <author><first>Junping</first><last>Gong</last></author>
-      <author><first>Changning</first><last>Huang</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
+      <author id="junping-gong"><first>Junping</first><last>Gong</last></author>
+      <author id="changning-huang"><first>Changning</first><last>Huang</last></author>
       <doi>10.3115/980845.980945</doi>
       <pages>600–606</pages>
       <url hash="ea0fa4d1">P98-1098</url>
@@ -939,8 +939,8 @@
     </paper>
     <paper id="99">
       <title>Combining Multiple, Large-Scale Resources in a Reusable Lexicon for Natural Language Generation</title>
-      <author><first>Hongyan</first><last>Jing</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="hongyan-jing"><first>Hongyan</first><last>Jing</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <doi>10.3115/980845.980946</doi>
       <pages>607–613</pages>
       <url hash="c4abbf4d">P98-1099</url>
@@ -948,8 +948,8 @@
     </paper>
     <paper id="100">
       <title>Text Segmentation Using Reiteration and Collocation</title>
-      <author><first>Amanda C.</first><last>Jobbins</last></author>
-      <author><first>Lindsay J.</first><last>Evett</last></author>
+      <author id="amanda-c-jobbins"><first>Amanda C.</first><last>Jobbins</last></author>
+      <author id="lindsay-j-evett"><first>Lindsay J.</first><last>Evett</last></author>
       <doi>10.3115/980845.980947</doi>
       <pages>614–618</pages>
       <url hash="fa3dea98">P98-1100</url>
@@ -965,7 +965,7 @@
     </paper>
     <paper id="102">
       <title>Unification-based Multimodal Parsing</title>
-      <author><first>Michael</first><last>Johnston</last></author>
+      <author id="michael-johnston"><first>Michael</first><last>Johnston</last></author>
       <doi>10.3115/980845.980949</doi>
       <pages>624–630</pages>
       <url hash="05acff78">P98-1102</url>
@@ -973,7 +973,7 @@
     </paper>
     <paper id="103">
       <title>Context Management with Topics for Spoken Dialogue Systems</title>
-      <author><first>Kristiina</first><last>Jokinen</last></author>
+      <author id="kristiina-jokinen"><first>Kristiina</first><last>Jokinen</last></author>
       <author><first>Hideki</first><last>Tanaka</last></author>
       <author><first>Akio</first><last>Yokoo</last></author>
       <doi>10.3115/980845.980950</doi>
@@ -1001,7 +1001,7 @@
       <title>Pseudo-Projectivity, A Polynomially Parsable Non-Projective Dependency Grammar</title>
       <author><first>Sylvain</first><last>Kahane</last></author>
       <author><first>Alexis</first><last>Nasr</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <doi>10.3115/980845.980953</doi>
       <pages>646–652</pages>
       <url hash="4122b4c8">P98-1106</url>
@@ -1010,7 +1010,7 @@
     <paper id="107">
       <title>A Method for Correcting Errors in Speech Recognition using the Statistical Features of Character Co-occurrence</title>
       <author><first>Satoshi</first><last>Kaki</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Hitoshi</first><last>Iida</last></author>
       <doi>10.3115/980845.980954</doi>
       <pages>653–657</pages>
@@ -1019,11 +1019,11 @@
     </paper>
     <paper id="108">
       <title>Use of Mutual Information Based Character Clusters in Dictionary-less Morphological Analysis of <fixed-case>J</fixed-case>apanese</title>
-      <author><first>Hideki</first><last>Kashioka</last></author>
+      <author id="hideki-kashioka"><first>Hideki</first><last>Kashioka</last></author>
       <author><first>Yasuhiro</first><last>Kawata</last></author>
       <author><first>Yumiko</first><last>Kinjo</last></author>
       <author><first>Andrew</first><last>Finch</last></author>
-      <author><first>Ezra W.</first><last>Black</last></author>
+      <author id="ezra-black"><first>Ezra W.</first><last>Black</last></author>
       <doi>10.3115/980845.980955</doi>
       <pages>658–662</pages>
       <url hash="e14a1ab8">P98-1108</url>
@@ -1031,7 +1031,7 @@
     </paper>
     <paper id="109">
       <title>Know When to Hold '<fixed-case>E</fixed-case>m: Shuffling Deterministically in a Parser for Nonconcatenative Grammars</title>
-      <author><first>Robert T.</first><last>Kasper</last></author>
+      <author id="robert-t-kasper"><first>Robert T.</first><last>Kasper</last></author>
       <author><first>Mike</first><last>Calcagno</last></author>
       <author><first>Paul C.</first><last>Davis</last></author>
       <doi>10.3115/980845.980956</doi>
@@ -1051,7 +1051,7 @@
       <title>Unlimited Vocabulary Grapheme to Phoneme Conversion for <fixed-case>K</fixed-case>orean <fixed-case>TTS</fixed-case></title>
       <author><first>Byeongchang</first><last>Kim</last></author>
       <author><first>WonIl</first><last>Lee</last></author>
-      <author><first>Geunbae</first><last>Lee</last></author>
+      <author id="gary-geunbae-lee"><first>Geunbae</first><last>Lee</last></author>
       <author><first>Jong-Hyeok</first><last>Lee</last></author>
       <doi>10.3115/980845.980958</doi>
       <pages>675–679</pages>
@@ -1060,7 +1060,7 @@
     </paper>
     <paper id="112">
       <title>Role of Verbs in Document Analysis</title>
-      <author><first>Judith L.</first><last>Klavans</last></author>
+      <author id="judith-l-klavans"><first>Judith L.</first><last>Klavans</last></author>
       <author><first>Min-Yen</first><last>Kan</last></author>
       <doi>10.3115/980845.980959</doi>
       <pages>680–686</pages>
@@ -1069,7 +1069,7 @@
     </paper>
     <paper id="113">
       <title>A Flexible Example-Based Parser Based on the <fixed-case>SSTC</fixed-case></title>
-      <author><first>Mosleh Hmoud</first><last>Al-Adhaileh</last></author>
+      <author id="mosleh-hmoud-al-adhaileh"><first>Mosleh Hmoud</first><last>Al-Adhaileh</last></author>
       <author><last>Tang</last><first>Enya Kong</first></author>
       <doi>10.3115/980845.980960</doi>
       <pages>687–693</pages>
@@ -1091,8 +1091,8 @@
       <title>Compacting the <fixed-case>P</fixed-case>enn <fixed-case>T</fixed-case>reebank Grammar</title>
       <author><first>Alexander</first><last>Krotov</last></author>
       <author><first>Mark</first><last>Hepple</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <doi>10.3115/980845.980962</doi>
       <pages>699–703</pages>
       <url hash="599af611">P98-1115</url>
@@ -1100,7 +1100,7 @@
     </paper>
     <paper id="116">
       <title>Generation that Exploits Corpus-Based Statistical Knowledge</title>
-      <author><first>Irene</first><last>Langkilde</last></author>
+      <author id="irene-langkilde"><first>Irene</first><last>Langkilde</last></author>
       <author><first>Kevin</first><last>Knight</last></author>
       <doi>10.3115/980845.980963</doi>
       <pages>704–710</pages>
@@ -1110,9 +1110,9 @@
     </paper>
     <paper id="117">
       <title>Methods and Practical Issues in Evaluating Alignment Techniques</title>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <author><first>Michel</first><last>Simard</last></author>
-      <author><first>Jean</first><last>Veronis</last></author>
+      <author id="jean-veronis"><first>Jean</first><last>Veronis</last></author>
       <doi>10.3115/980845.980964</doi>
       <pages>711–717</pages>
       <url hash="050ca3c3">P98-1117</url>
@@ -1120,8 +1120,8 @@
     </paper>
     <paper id="118">
       <title>A Framework for Customizable Generation of Hypertext Presentations</title>
-      <author><first>Benoit</first><last>Lavoie</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="benoit-lavoie"><first>Benoit</first><last>Lavoie</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <doi>10.3115/980845.980965</doi>
       <pages>718–722</pages>
       <url hash="70c9ec64">P98-1118</url>
@@ -1130,7 +1130,7 @@
     <paper id="119">
       <title>Automatic Acquisition of Language Model based on Head-Dependent Relation between Words</title>
       <author><first>Seungmi</first><last>Lee</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <doi>10.3115/980845.980966</doi>
       <pages>723–727</pages>
       <url hash="f6b3adac">P98-1119</url>
@@ -1154,7 +1154,7 @@
     </paper>
     <paper id="122">
       <title>Characterizing and Recognizing Spoken Corrections in Human-Computer Dialogue</title>
-      <author><first>Gina-Anne</first><last>Levow</last></author>
+      <author id="gina-anne-levow"><first>Gina-Anne</first><last>Levow</last></author>
       <doi>10.3115/980845.980969</doi>
       <pages>736–742</pages>
       <url hash="68e454b4">P98-1122</url>
@@ -1195,9 +1195,9 @@
     </paper>
     <paper id="125">
       <title>Identifying Syntactic Role of Antecedent in <fixed-case>K</fixed-case>orean Relative Clause using Corpus and Thesaurus Informationes</title>
-      <author><first>Hui-Feng</first><last>Li</last></author>
+      <author id="huifeng-li"><first>Hui-Feng</first><last>Li</last></author>
       <author><first>Jong-Hyeok</first><last>Lee</last></author>
-      <author><first>Geunbae</first><last>Lee</last></author>
+      <author id="gary-geunbae-lee"><first>Geunbae</first><last>Lee</last></author>
       <doi>10.3115/980691.980694</doi>
       <pages>756–762</pages>
       <url hash="74ec505a">P98-2125</url>
@@ -1206,9 +1206,9 @@
     <paper id="126">
       <title>A Test Environment for Natural Language Understanding Systems</title>
       <author><first>Li</first><last>Li</last></author>
-      <author><first>Deborah A.</first><last>Dahl</last></author>
-      <author><first>Lewis M.</first><last>Norton</last></author>
-      <author><first>Marcia C.</first><last>Linebarger</last></author>
+      <author id="deborah-a-dahl"><first>Deborah A.</first><last>Dahl</last></author>
+      <author id="lewis-m-norton"><first>Lewis M.</first><last>Norton</last></author>
+      <author id="marcia-c-linebarger"><first>Marcia C.</first><last>Linebarger</last></author>
       <author><first>Dongdong</first><last>Chen</last></author>
       <doi>10.3115/980691.980695</doi>
       <pages>763–767</pages>
@@ -1234,9 +1234,9 @@
     </paper>
     <paper id="129">
       <title>Evaluating Response Strategies in a Web-Based Spoken Dialogue Agent</title>
-      <author><first>Diane J.</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane J.</first><last>Litman</last></author>
       <author><first>Shimei</first><last>Pan</last></author>
-      <author><first>Marilyn A.</first><last>Walker</last></author>
+      <author id="marilyn-walker"><first>Marilyn A.</first><last>Walker</last></author>
       <doi>10.3115/980691.980698</doi>
       <pages>780–786</pages>
       <url hash="229ddeac">P98-2129</url>
@@ -1245,7 +1245,7 @@
     <paper id="130">
       <title>Formal Aspects and Parsing Issues of Dependency Theory</title>
       <author><first>Vincenzo</first><last>Lombardo</last></author>
-      <author><first>Leonardo</first><last>Lesmo</last></author>
+      <author id="leonardo-lesmo"><first>Leonardo</first><last>Lesmo</last></author>
       <doi>10.3115/980691.980699</doi>
       <pages>787–793</pages>
       <url hash="f9feec67">P98-2130</url>
@@ -1253,11 +1253,11 @@
     </paper>
     <paper id="131">
       <title>A Multi-Neuro Tagger Using Variable Lengths of Contexts</title>
-      <author><first>Susann</first><last>LuperFoy</last></author>
+      <author id="susann-luperfoy"><first>Susann</first><last>LuperFoy</last></author>
       <author><first>Dan</first><last>Loehr</last></author>
       <author><first>David</first><last>Duff</last></author>
-      <author><first>Keith</first><last>Miller</last></author>
-      <author><first>Florence</first><last>Reeder</last></author>
+      <author id="keith-j-miller"><first>Keith</first><last>Miller</last></author>
+      <author id="florence-reeder"><first>Florence</first><last>Reeder</last></author>
       <author><first>Lisa</first><last>Harper</last></author>
       <author><first>Qing</first><last>Ma</last></author>
       <author><first>Hitoshi</first><last>Isahara</last></author>
@@ -1271,7 +1271,7 @@
       <author><first>Takaki</first><last>Makino</last></author>
       <author><first>Minoru</first><last>Yoshida</last></author>
       <author><first>Kentaro</first><last>Torisawa</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <doi>10.3115/980691.980702</doi>
       <pages>807–811</pages>
       <url hash="110c5210">P98-2132</url>
@@ -1279,9 +1279,9 @@
     </paper>
     <paper id="134">
       <title>Bitext Correspondences through Rich Mark-up</title>
-      <author><first>Raquel</first><last>Martinez</last></author>
+      <author id="raquel-martinez"><first>Raquel</first><last>Martinez</last></author>
       <author><first>Joseba</first><last>Abaitua</last></author>
-      <author><first>Arantza</first><last>Casillas</last></author>
+      <author id="arantza-casillas"><first>Arantza</first><last>Casillas</last></author>
       <doi>10.3115/980691.980703</doi>
       <pages>812–818</pages>
       <url hash="449e8781">P98-2134</url>
@@ -1289,7 +1289,7 @@
     </paper>
     <paper id="135">
       <title>Discourse Cues for Broadcast News Segmentation</title>
-      <author><first>Mark T.</first><last>Maybury</last></author>
+      <author id="mark-t-maybury"><first>Mark T.</first><last>Maybury</last></author>
       <doi>10.3115/980691.980704</doi>
       <pages>819–822</pages>
       <url hash="4e9fe176">P98-2135</url>
@@ -1297,9 +1297,9 @@
     </paper>
     <paper id="136">
       <title>Confirmation in Multimodal Systems</title>
-      <author><first>David R.</first><last>McGee</last></author>
-      <author><first>Phil R.</first><last>Cohen</last></author>
-      <author><first>Sharon</first><last>Oviatt</last></author>
+      <author id="david-mcgee"><first>David R.</first><last>McGee</last></author>
+      <author id="philip-r-cohen"><first>Phil R.</first><last>Cohen</last></author>
+      <author id="sharon-oviatt"><first>Sharon</first><last>Oviatt</last></author>
       <doi>10.3115/980691.980705</doi>
       <pages>823–829</pages>
       <url hash="3754f047">P98-2136</url>
@@ -1326,11 +1326,11 @@
     </paper>
     <paper id="139">
       <title>Deriving Transfer Rules from Dominance-Preserving Alignments</title>
-      <author><first>Adam</first><last>Meyers</last></author>
+      <author id="adam-meyers"><first>Adam</first><last>Meyers</last></author>
       <author><first>Roman</first><last>Yangarber</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
-      <author><first>Catherine</first><last>Macleod</last></author>
-      <author><first>Antonio</first><last>Moreno-Sandoval</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
+      <author id="catherine-macleod"><first>Catherine</first><last>Macleod</last></author>
+      <author id="antonio-moreno-sandoval"><first>Antonio</first><last>Moreno-Sandoval</last></author>
       <doi>10.3115/980691.980708</doi>
       <pages>843–847</pages>
       <url hash="83cb9f97">P98-2139</url>
@@ -1365,7 +1365,7 @@
     </paper>
     <paper id="143">
       <title>Robust Pronoun Resolution with Limited Knowledge</title>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <doi>10.3115/980691.980712</doi>
       <pages>869–875</pages>
       <url hash="c33577b6">P98-2143</url>
@@ -1375,7 +1375,7 @@
       <title><fixed-case>HPSG</fixed-case>-Style Underspecified <fixed-case>J</fixed-case>apanese Grammar with Wide Coverage</title>
       <author><first>Yutaka</first><last>Mitsuishi</last></author>
       <author><first>Kentaro</first><last>Torisawa</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <doi>10.3115/980691.980713</doi>
       <pages>876–880</pages>
       <url hash="a1532c3f">P98-2144</url>
@@ -1385,7 +1385,7 @@
       <title>Text Segmentation with Multiple Surface Linguistic Cues</title>
       <author><first>Hajime</first><last>Mochizuki</last></author>
       <author><first>Takeo</first><last>Honda</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <doi>10.3115/980691.980714</doi>
       <pages>881–885</pages>
       <url hash="2b81e142">P98-2145</url>
@@ -1402,7 +1402,7 @@
     <paper id="147">
       <title>Dynamic Compilation of Weighted Context-Free Grammars</title>
       <author><first>Mehryar</first><last>Mohri</last></author>
-      <author><first>Fernando C.N.</first><last>Pereira</last></author>
+      <author id="fernando-c-n-pereira"><first>Fernando C.N.</first><last>Pereira</last></author>
       <doi>10.3115/980691.980716</doi>
       <pages>891–897</pages>
       <url hash="db4552c3">P98-2147</url>
@@ -1411,7 +1411,7 @@
     <paper id="148">
       <title>A Stochastic Language Model using Dependency and its Improvement by Word Clustering</title>
       <author><first>Shinsuke</first><last>Mori</last></author>
-      <author><first>Makoto</first><last>Nagao</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
       <doi>10.3115/980691.980717</doi>
       <pages>898–904</pages>
       <url hash="d3aa6fc2">P98-2148</url>
@@ -1429,7 +1429,7 @@
     <paper id="150">
       <title>An Estimate of Referent of Noun Phrases in <fixed-case>J</fixed-case>apanese Sentences</title>
       <author><first>Masaki</first><last>Murata</last></author>
-      <author><first>Makoto</first><last>Nagao</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
       <doi>10.3115/980691.980719</doi>
       <pages>912–916</pages>
       <url hash="a33943f1">P98-2150</url>
@@ -1438,7 +1438,7 @@
     <paper id="151">
       <title>Automatic Text Summarization Based on the Global Document Annotation</title>
       <author><first>Katashi</first><last>Nagao</last></author>
-      <author><first>Koiti</first><last>Hasida</last></author>
+      <author id="koiti-hasida"><first>Koiti</first><last>Hasida</last></author>
       <doi>10.3115/980691.980720</doi>
       <pages>917–921</pages>
       <url hash="6a33d1e3">P98-2151</url>
@@ -1474,7 +1474,7 @@
     </paper>
     <paper id="155">
       <title>Constituent-based Accent Prediction</title>
-      <author><first>Christine H.</first><last>Nakatani</last></author>
+      <author id="christine-h-nakatani"><first>Christine H.</first><last>Nakatani</last></author>
       <doi>10.3115/980691.980724</doi>
       <pages>939–945</pages>
       <url hash="5ea79a1a">P98-2155</url>
@@ -1513,7 +1513,7 @@
       <title>An Efficient Parallel Substrate for Typed Feature Structures on Shared Memory Parallel Machines</title>
       <author><first>Takashi</first><last>Ninomiya</last></author>
       <author><first>Kentaro</first><last>Torisawa</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <doi>10.3115/980691.980728</doi>
       <pages>968–974</pages>
       <url hash="ba194b17">P98-2159</url>
@@ -1521,7 +1521,7 @@
     </paper>
     <paper id="160">
       <title>Universal Grammar and Lexis for Quick Ramp-Up of <fixed-case>MT</fixed-case> Systems</title>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <author><first>Victor</first><last>Raskin</last></author>
       <doi>10.3115/980691.980729</doi>
       <pages>975–979</pages>
@@ -1530,8 +1530,8 @@
     </paper>
     <paper id="161">
       <title>Integration of Large-Scale Linguistic Resources in a Natural Language Understanding System</title>
-      <author><first>Lewis M.</first><last>Norton</last></author>
-      <author><first>Deborah A.</first><last>Dahl</last></author>
+      <author id="lewis-m-norton"><first>Lewis M.</first><last>Norton</last></author>
+      <author id="deborah-a-dahl"><first>Deborah A.</first><last>Dahl</last></author>
       <author><first>Li</first><last>Li</last></author>
       <author><first>Katharine P.</first><last>Beals</last></author>
       <doi>10.3115/980691.980730</doi>
@@ -1541,7 +1541,7 @@
     </paper>
     <paper id="162">
       <title>Improving Statistical Natural Language Translation with Categories and Rules</title>
-      <author><first>Franz Josef</first><last>Och</last></author>
+      <author id="franz-josef-och"><first>Franz Josef</first><last>Och</last></author>
       <author><first>Hans</first><last>Weber</last></author>
       <doi>10.3115/980691.980731</doi>
       <pages>985–989</pages>
@@ -1551,7 +1551,7 @@
     <paper id="163">
       <title>Recognition of the Coherence Relation between Te-linked Clauses</title>
       <author><first>Akira</first><last>Oishi</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <doi>10.3115/980691.980732</doi>
       <pages>990–996</pages>
       <url hash="9c5113e8">P98-2163</url>
@@ -1559,8 +1559,8 @@
     </paper>
     <paper id="164">
       <title>On the Evaluation and Comparison of Taggers: the Effect of Noise in Testing Corpora</title>
-      <author><first>Lluis</first><last>Padro</last></author>
-      <author><first>Lluis</first><last>Marquez</last></author>
+      <author id="lluis-padro"><first>Lluis</first><last>Padro</last></author>
+      <author id="lluis-marquez"><first>Lluis</first><last>Marquez</last></author>
       <doi>10.3115/980691.980733</doi>
       <pages>997–1002</pages>
       <url hash="c26cf729">P98-2164</url>
@@ -1569,7 +1569,7 @@
     <paper id="165">
       <title>Learning Intonation Rules for Concept to Speech Generation</title>
       <author><first>Shimei</first><last>Pan</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <doi>10.3115/980691.980734</doi>
       <pages>1003–1009</pages>
       <url hash="f81a1b71">P98-2165</url>
@@ -1577,8 +1577,8 @@
     </paper>
     <paper id="166">
       <title>Possessive Pronominal Anaphor Resolution in <fixed-case>P</fixed-case>ortuguese Written Texts</title>
-      <author><first>Ivandre</first><last>Paraboni</last></author>
-      <author><first>Vera Lucia Strube</first><last>de Lima</last></author>
+      <author id="ivandre-paraboni"><first>Ivandre</first><last>Paraboni</last></author>
+      <author id="vera-lucia-strube-de-lima"><first>Vera Lucia Strube</first><last>de Lima</last></author>
       <doi>10.3115/980691.980735</doi>
       <pages>1010–1014</pages>
       <url hash="0da76696">P98-2166</url>
@@ -1589,7 +1589,7 @@
       <author><first>Junsik</first><last>Park</last></author>
       <author><first>Jung-Goo</first><last>Kang</last></author>
       <author><first>Wook</first><last>Hur</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <doi>10.3115/980691.980736</doi>
       <pages>1015–1019</pages>
       <url hash="06caf277">P98-2167</url>
@@ -1626,7 +1626,7 @@
       <author><first>Hannes</first><last>Pirker</last></author>
       <author><first>Georg</first><last>Niklfeld</last></author>
       <author><first>Johannes</first><last>Matiasek</last></author>
-      <author><first>Harald</first><last>Trost</last></author>
+      <author id="harald-trost"><first>Harald</first><last>Trost</last></author>
       <doi>10.3115/980691.980740</doi>
       <pages>1041–1045</pages>
       <url hash="16fdaba4">P98-2171</url>
@@ -1634,7 +1634,7 @@
     </paper>
     <paper id="172">
       <title>Reference Resolution beyond Coreference: a Conceptual Frame and its Application</title>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <author><first>Isabelle</first><last>Robba</last></author>
       <author><first>Gerard</first><last>Sabah</last></author>
       <doi>10.3115/980691.980741</doi>
@@ -1645,7 +1645,7 @@
     <paper id="173">
       <title>Multilingual Authoring using Feedback Texts</title>
       <author><first>Richard</first><last>Power</last></author>
-      <author><first>Donia</first><last>Scott</last></author>
+      <author id="donia-scott"><first>Donia</first><last>Scott</last></author>
       <doi>10.3115/980691.980742</doi>
       <pages>1053–1059</pages>
       <url hash="6f099d74">P98-2173</url>
@@ -1654,7 +1654,7 @@
     <paper id="174">
       <title>Practical Glossing by Prioritised Tiling</title>
       <author><first>Victor</first><last>Poznanski</last></author>
-      <author><first>Pete</first><last>Whitelock</last></author>
+      <author id="pete-whitelock"><first>Pete</first><last>Whitelock</last></author>
       <author><first>Jan</first><last>IJdens</last></author>
       <author><first>Steffan</first><last>Corley</last></author>
       <doi>10.3115/980691.980743</doi>
@@ -1664,7 +1664,7 @@
     </paper>
     <paper id="175">
       <title>An Intelligent Multi-Dictionary Environment</title>
-      <author><first>Gabor</first><last>Prbszeky</last></author>
+      <author id="gabor-proszeky"><first>Gabor</first><last>Prbszeky</last></author>
       <doi>10.3115/980691.980744</doi>
       <pages>1067–1071</pages>
       <url hash="f7e20e45">P98-2175</url>
@@ -1672,7 +1672,7 @@
     </paper>
     <paper id="176">
       <title>Learning Correlations between Linguistic Indicators and Semantic Constraints: Reuse of Context-Dependent Descriptions of Entities</title>
-      <author><first>Dragomir R.</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir R.</first><last>Radev</last></author>
       <doi>10.3115/980691.980745</doi>
       <pages>1072–1078</pages>
       <url hash="b426785e">P98-2176</url>
@@ -1680,7 +1680,7 @@
     </paper>
     <paper id="177">
       <title>Statistical Models for Unsupervised Prepositional Phrase Attachment</title>
-      <author><first>Adwait</first><last>Ratnaparkhi</last></author>
+      <author id="adwait-ratnaparkhi"><first>Adwait</first><last>Ratnaparkhi</last></author>
       <doi>10.3115/980691.980746</doi>
       <pages>1079–1085</pages>
       <url hash="36bbe11b">P98-2177</url>
@@ -1697,8 +1697,8 @@
     </paper>
     <paper id="179">
       <title>Generating the Structure of Argument</title>
-      <author><first>Chris</first><last>Reed</last></author>
-      <author><first>Derek</first><last>Long</last></author>
+      <author id="chris-reed"><first>Chris</first><last>Reed</last></author>
+      <author id="derek-long"><first>Derek</first><last>Long</last></author>
       <doi>10.3115/980691.980748</doi>
       <pages>1091–1097</pages>
       <url hash="bd5c5a99">P98-2179</url>
@@ -1707,7 +1707,7 @@
     <paper id="180">
       <title><fixed-case>M</fixed-case>ind<fixed-case>N</fixed-case>et: Acquiring and Structuring Semantic Information from Text</title>
       <author><first>Stephen D.</first><last>Richardson</last></author>
-      <author><first>William B.</first><last>Dolan</last></author>
+      <author id="william-b-dolan"><first>William B.</first><last>Dolan</last></author>
       <author><first>Lucy</first><last>Vanderwende</last></author>
       <doi>10.3115/980691.980749</doi>
       <pages>1098–1102</pages>
@@ -1716,9 +1716,9 @@
     </paper>
     <paper id="181">
       <title>Building Accurate Semantic Taxonomies from Monolingual <fixed-case>MRD</fixed-case>s</title>
-      <author><first>German</first><last>Rigau</last></author>
-      <author><first>Horacio</first><last>Rodriguez</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
+      <author id="horacio-rodriguez"><first>Horacio</first><last>Rodriguez</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <doi>10.3115/980691.980750</doi>
       <pages>1103–1109</pages>
       <url hash="a381b3fc">P98-2181</url>
@@ -1744,7 +1744,7 @@
     <paper id="184">
       <title>How Verb Subcategorization Frequencies are Affected by Corpus Choice</title>
       <author><first>Douglas</first><last>Roland</last></author>
-      <author><first>Daniel</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Daniel</first><last>Jurafsky</last></author>
       <doi>10.3115/980691.980753</doi>
       <pages>1122–1128</pages>
       <url hash="7b1640bc">P98-2184</url>
@@ -1752,8 +1752,8 @@
     </paper>
     <paper id="185">
       <title>An Interactive Domain Independent Approach to Robust Dialogue Interpretation</title>
-      <author><first>Carolyn Penstein</first><last>Rose</last></author>
-      <author><first>Lori S.</first><last>Levin</last></author>
+      <author id="carolyn-rose"><first>Carolyn Penstein</first><last>Rose</last></author>
+      <author id="lori-levin"><first>Lori S.</first><last>Levin</last></author>
       <doi>10.3115/980691.980754</doi>
       <pages>1129–1135</pages>
       <url hash="b1ddfc6a">P98-2185</url>
@@ -1770,7 +1770,7 @@
     </paper>
     <paper id="187">
       <title>A <fixed-case>G</fixed-case>enerative <fixed-case>L</fixed-case>exicon Perspective for Adjectival Modification</title>
-      <author><first>Patrick</first><last>Saint-Dizier</last></author>
+      <author id="patrick-saint-dizier"><first>Patrick</first><last>Saint-Dizier</last></author>
       <doi>10.3115/980691.980756</doi>
       <pages>1143–1149</pages>
       <url hash="c189db7c">P98-2187</url>
@@ -1778,9 +1778,9 @@
     </paper>
     <paper id="188">
       <title>Dialogue Act Tagging with Transformation-Based Learning</title>
-      <author><first>Ken</first><last>Samuel</last></author>
-      <author><first>Sandra</first><last>Carberry</last></author>
-      <author><first>K.</first><last>Vijay-Shanker</last></author>
+      <author id="ken-samuel"><first>Ken</first><last>Samuel</last></author>
+      <author id="sandra-carberry"><first>Sandra</first><last>Carberry</last></author>
+      <author id="k-vijay-shanker"><first>K.</first><last>Vijay-Shanker</last></author>
       <doi>10.3115/980691.980757</doi>
       <pages>1150–1156</pages>
       <url hash="76177444">P98-2188</url>
@@ -1854,7 +1854,7 @@
     <paper id="196">
       <title>Recognizing Syntactic Errors in the Writing of Second Language Learners</title>
       <author><first>David</first><last>Schneider</last></author>
-      <author><first>Kathleen F.</first><last>McCoy</last></author>
+      <author id="kathleen-f-mccoy"><first>Kathleen F.</first><last>McCoy</last></author>
       <doi>10.3115/980691.980765</doi>
       <pages>1198–1204</pages>
       <url hash="f22913ae">P98-2196</url>
@@ -1863,8 +1863,8 @@
     <paper id="197">
       <title>Transforming Lattices into Non-deterministic Automata with Optional Null Arcs</title>
       <author><first>Mark</first><last>Seligman</last></author>
-      <author><first>Christian</first><last>Boitet</last></author>
-      <author><first>Boubaker</first><last>Meddeb-Hamrouni</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
+      <author id="boubaker-meddeb-hamrouni"><first>Boubaker</first><last>Meddeb-Hamrouni</last></author>
       <doi>10.3115/980691.980766</doi>
       <pages>1205–1211</pages>
       <url hash="5d7e7463">P98-2197</url>
@@ -1888,7 +1888,7 @@
     </paper>
     <paper id="200">
       <title>Similarity Metrics for Aligning Children’s Articulation Data</title>
-      <author><first>Harold L.</first><last>Somers</last></author>
+      <author id="harold-somers"><first>Harold L.</first><last>Somers</last></author>
       <doi>10.3115/980691.980769</doi>
       <pages>1227–1232</pages>
       <url hash="3902a57a">P98-2200</url>
@@ -1897,7 +1897,7 @@
     <paper id="201">
       <title>A Connectionist Approach to Prepositional Phrase Attachment for Real World Texts</title>
       <author><first>Josep M.</first><last>Sopena</last></author>
-      <author><first>Agusti</first><last>LLoberas</last></author>
+      <author id="agusti-lloberas"><first>Agusti</first><last>LLoberas</last></author>
       <author><first>Joan L.</first><last>Moliner</last></author>
       <doi>10.3115/980691.980770</doi>
       <pages>1233–1237</pages>
@@ -1932,9 +1932,9 @@
     </paper>
     <paper id="205">
       <title>Summarization-based Query Expansion in Information Retrieval</title>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
       <author><first>Jin</first><last>Wang</last></author>
-      <author><first>Bowden</first><last>Wise</last></author>
+      <author id="g-bowden-wise"><first>Bowden</first><last>Wise</last></author>
       <doi>10.3115/980691.980774</doi>
       <pages>1258–1264</pages>
       <url hash="434dd446">P98-2205</url>
@@ -1944,7 +1944,7 @@
       <title><fixed-case>C</fixed-case>hinese Word Segmentation without Using Lexicon and Hand-crafted Training Data</title>
       <author><first>Maosong</first><last>Sun</last></author>
       <author><first>Dayang</first><last>Shen</last></author>
-      <author><first>Benjamin K.</first><last>Tsou</last></author>
+      <author id="benjamin-k-tsou"><first>Benjamin K.</first><last>Tsou</last></author>
       <doi>10.3115/980691.980775</doi>
       <pages>1265–1271</pages>
       <url hash="bdf60c43">P98-2206</url>
@@ -1971,8 +1971,8 @@
     </paper>
     <paper id="209">
       <title>Reactive Content Selection in the Generation of Real-time Soccer Commentary</title>
-      <author><first>Kumiko</first><last>Tanaka-Ishii</last></author>
-      <author><first>Koiti</first><last>Hasida</last></author>
+      <author id="kumiko-tanaka-ishii"><first>Kumiko</first><last>Tanaka-Ishii</last></author>
+      <author id="koiti-hasida"><first>Koiti</first><last>Hasida</last></author>
       <author><first>Itsuki</first><last>Noda</last></author>
       <doi>10.3115/980691.980778</doi>
       <pages>1282–1288</pages>
@@ -1983,7 +1983,7 @@
       <title>Idiomatic Object Usage and Support Verbs</title>
       <author><first>Pasi</first><last>Tapanainen</last></author>
       <author><first>Jussi</first><last>Piitulainen</last></author>
-      <author><first>Timo</first><last>Jarvinen</last></author>
+      <author id="timo-jarvinen"><first>Timo</first><last>Jarvinen</last></author>
       <doi>10.3115/980691.980779</doi>
       <pages>1289–1293</pages>
       <url hash="21456a51">P98-2210</url>
@@ -2019,7 +2019,7 @@
       <title>General-to-Specific Model Selection for Subcategorization Preference</title>
       <author><first>Takehito</first><last>Utsuro</last></author>
       <author><first>Takashi</first><last>Miyata</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <doi>10.3115/980691.980783</doi>
       <pages>1314–1320</pages>
       <url hash="53e93247">P98-2214</url>
@@ -2037,7 +2037,7 @@
       <title>The Computational Lexical Semantics of Syntagmatic Expressions</title>
       <author><first>Evelyne</first><last>Viegas</last></author>
       <author><first>Stephen</first><last>Beale</last></author>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <doi>10.3115/980691.980785</doi>
       <pages>1328–1332</pages>
       <url hash="e9fa97e3">P98-2216</url>
@@ -2045,8 +2045,8 @@
     </paper>
     <paper id="217">
       <title>A tabular interpretation of a class of 2-Stack Automata</title>
-      <author><first>Eric</first><last>Villemonte de la Clergerie</last></author>
-      <author><first>Miguel</first><last>Alonso Pardo</last></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Eric</first><last>Villemonte de la Clergerie</last></author>
+      <author id="miguel-a-alonso"><first>Miguel</first><last>Alonso Pardo</last></author>
       <doi>10.3115/980691.980786</doi>
       <pages>1333–1339</pages>
       <url hash="9ac7862f">P98-2217</url>
@@ -2054,7 +2054,7 @@
     </paper>
     <paper id="218">
       <title>Project for production of closed-caption <fixed-case>TV</fixed-case> programs for the hearing impaired</title>
-      <author><first>Takahiro</first><last>Wakao</last></author>
+      <author id="takahiro-wakao"><first>Takahiro</first><last>Wakao</last></author>
       <doi>10.3115/980691.980787</doi>
       <pages>1340–1344</pages>
       <url hash="7b4c72f7">P98-2218</url>
@@ -2062,7 +2062,7 @@
     </paper>
     <paper id="219">
       <title>Learning Optimal Dialogue Strategies: A Case Study of a Spoken Dialogue Agent for Email</title>
-      <author><first>Marilyn A.</first><last>Walker</last></author>
+      <author id="marilyn-walker"><first>Marilyn A.</first><last>Walker</last></author>
       <doi>10.3115/980691.980788</doi>
       <pages>1345–1351</pages>
       <url hash="8f033d7f">P98-2219</url>
@@ -2071,7 +2071,7 @@
     <paper id="220">
       <title>Automatic <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>hinese name transliteration for development of multilingual resources</title>
       <author><first>Stephen</first><last>Wan</last></author>
-      <author><first>Cornelia Maria</first><last>Verspoor</last></author>
+      <author id="karin-verspoor"><first>Cornelia Maria</first><last>Verspoor</last></author>
       <doi>10.3115/980691.980789</doi>
       <pages>1352–1356</pages>
       <url hash="c45154e5">P98-2220</url>
@@ -2080,7 +2080,7 @@
     <paper id="221">
       <title>Modeling with Structures in Statistical Machine translation</title>
       <author><first>Ye-Yi</first><last>Wang</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <doi>10.3115/980691.980790</doi>
       <pages>1357–1363</pages>
       <url hash="d699368b">P98-2221</url>
@@ -2106,7 +2106,7 @@
     <paper id="224">
       <title>Diagram Understanding Using Integration of Layout Information and Textual Information</title>
       <author><first>Yasuhiko</first><last>Watanabe</last></author>
-      <author><first>Makoto</first><last>Nagao</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
       <doi>10.3115/980691.980793</doi>
       <pages>1374–1380</pages>
       <url hash="b00b4353">P98-2224</url>
@@ -2117,7 +2117,7 @@
       <author><first>Yasuhiko</first><last>Watanabe</last></author>
       <author><first>Yoshihiro</first><last>Okada</last></author>
       <author><first>Kengo</first><last>Kaneji</last></author>
-      <author><first>Makoto</first><last>Nagao</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
       <doi>10.3115/980691.980794</doi>
       <pages>1381–1387</pages>
       <url hash="65711826">P98-2225</url>
@@ -2125,7 +2125,7 @@
     </paper>
     <paper id="226">
       <title>Translating Idioms</title>
-      <author><first>Eric</first><last>Wehrli</last></author>
+      <author id="eric-wehrli"><first>Eric</first><last>Wehrli</last></author>
       <doi>10.3115/980691.980795</doi>
       <pages>1388–1392</pages>
       <url hash="70a00f0a">P98-2226</url>
@@ -2133,7 +2133,7 @@
     </paper>
     <paper id="227">
       <title>Head-Driven Generation with <fixed-case>HPSG</fixed-case></title>
-      <author><first>Graham</first><last>Wilcock</last></author>
+      <author id="graham-wilcock"><first>Graham</first><last>Wilcock</last></author>
       <doi>10.3115/980691.980796</doi>
       <pages>1393–1397</pages>
       <url hash="51b53b96">P98-2227</url>
@@ -2141,7 +2141,7 @@
     </paper>
     <paper id="228">
       <title>Word Sense Disambiguation using Optimised Combinations of Knowledge Sources</title>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <author><first>Mark</first><last>Stevenson</last></author>
       <doi>10.3115/980691.980797</doi>
       <pages>1398–1402</pages>
@@ -2150,7 +2150,7 @@
     </paper>
     <paper id="229">
       <title>A Model for Robust Processing of Spontaneous Speech by Integrating Viable Fragments</title>
-      <author><first>Karsten L.</first><last>Worm</last></author>
+      <author id="karsten-l-worm"><first>Karsten L.</first><last>Worm</last></author>
       <doi>10.3115/980691.980798</doi>
       <pages>1403–1407</pages>
       <url hash="09014e6e">P98-2229</url>
@@ -2190,7 +2190,7 @@
     <paper id="233">
       <title>Feasibility Study for Ellipsis Resolution in Dialogues by Machine-Learning Technique</title>
       <author><first>Kazuhide</first><last>Yamamoto</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <doi>10.3115/980691.980802</doi>
       <pages>1428–1435</pages>
       <url hash="52ee1976">P98-2233</url>
@@ -2198,8 +2198,8 @@
     </paper>
     <paper id="234">
       <title>Some Properties of Preposition and Subordinate Conjunction Attachments</title>
-      <author><first>Alexander S.</first><last>Yeh</last></author>
-      <author><first>Marc B.</first><last>Vilain</last></author>
+      <author id="alexander-yeh"><first>Alexander S.</first><last>Yeh</last></author>
+      <author id="marc-vilain"><first>Marc B.</first><last>Vilain</last></author>
       <doi>10.3115/980691.980803</doi>
       <pages>1436–1442</pages>
       <url hash="ef8641a0">P98-2234</url>
@@ -2207,7 +2207,7 @@
     </paper>
     <paper id="235">
       <title>Evaluation of Importance of Sentences based on Connectivity to Title</title>
-      <author><first>Takehiko</first><last>Yoshimi</last></author>
+      <author id="takehiko-yoshimi"><first>Takehiko</first><last>Yoshimi</last></author>
       <author><first>Toshiyuki</first><last>Okunishi</last></author>
       <author><first>Takahiro</first><last>Yamaji</last></author>
       <author><first>Yoji</first><last>Fukumochi</last></author>
@@ -2227,7 +2227,7 @@
     <paper id="237">
       <title>Using Chunk Based Partial Parsing of Spontaneous Speech in Unrestricted Domains for Reducing Word Error Rate in Speech Recognition</title>
       <author><first>Klaus</first><last>Zechner</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <doi>10.3115/980691.980806</doi>
       <pages>1453–1459</pages>
       <url hash="f04feba1">P98-2237</url>
@@ -2243,8 +2243,8 @@
     </paper>
     <paper id="239">
       <title>Word Association and <fixed-case>MI-T</fixed-case>rigger-based Language Modeling</title>
-      <author><first>GuoDong</first><last>Zhou</last></author>
-      <author><first>KimTeng</first><last>Lua</last></author>
+      <author id="guodong-zhou"><first>GuoDong</first><last>Zhou</last></author>
+      <author id="kim-teng-lua"><first>KimTeng</first><last>Lua</last></author>
       <doi>10.3115/980691.980808</doi>
       <pages>1465–1471</pages>
       <url hash="a9d81858">P98-2239</url>
@@ -2252,7 +2252,7 @@
     </paper>
     <paper id="240">
       <title>Discovering Phonotactic Finite-State Automata by Genetic Search</title>
-      <author><first>Anja</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anja</first><last>Belz</last></author>
       <doi>10.3115/980691.980810</doi>
       <pages>1472–1474</pages>
       <url hash="33c3a97f">P98-2240</url>
@@ -2260,8 +2260,8 @@
     </paper>
     <paper id="241">
       <title>A Preliminary Model of Centering in Dialog</title>
-      <author><first>Donna</first><last>Byron</last></author>
-      <author><first>Amanda</first><last>Stent</last></author>
+      <author id="donna-byron"><first>Donna</first><last>Byron</last></author>
+      <author id="amanda-stent"><first>Amanda</first><last>Stent</last></author>
       <doi>10.3115/980691.980811</doi>
       <pages>1475–1477</pages>
       <url hash="a91e2788">P98-2241</url>
@@ -2293,7 +2293,7 @@
     </paper>
     <paper id="245">
       <title>Bridging the Gap between Dictionary and Thesaurus</title>
-      <author><first>Oi Yee</first><last>Kwong</last></author>
+      <author id="olivia-o-y-kwong"><first>Oi Yee</first><last>Kwong</last></author>
       <doi>10.3115/980691.980815</doi>
       <pages>1487–1489</pages>
       <url hash="05148de8">P98-2245</url>
@@ -2309,7 +2309,7 @@
     </paper>
     <paper id="247">
       <title>Detecting Verbal Participation in Diathesis Alternations</title>
-      <author><first>Diana</first><last>McCarthy</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
       <author><first>Anna</first><last>Korhonen</last></author>
       <doi>10.3115/980691.980817</doi>
       <pages>1493–1495</pages>
diff --git a/data/xml/P99.xml b/data/xml/P99.xml
index 44afe77ac1..b54045ca07 100644
--- a/data/xml/P99.xml
+++ b/data/xml/P99.xml
@@ -15,7 +15,7 @@
     </frontmatter>
     <paper id="1">
       <title>Untangling Text Data Mining</title>
-      <author><first>Marti A.</first><last>Hearst</last></author>
+      <author id="marti-a-hearst"><first>Marti A.</first><last>Hearst</last></author>
       <doi>10.3115/1034678.1034679</doi>
       <pages>3–10</pages>
       <url hash="75afe883">P99-1001</url>
@@ -23,7 +23,7 @@
     </paper>
     <paper id="2">
       <title>Automatic Speech Recognition and Its Application to Information Extraction</title>
-      <author><first>Sadaoki</first><last>Furui</last></author>
+      <author id="sadaoki-furui"><first>Sadaoki</first><last>Furui</last></author>
       <doi>10.3115/1034678.1034680</doi>
       <pages>11–20</pages>
       <url hash="b8121b7b">P99-1002</url>
@@ -55,10 +55,10 @@
     </paper>
     <paper id="6">
       <title>Discourse Relations: A Structural and Presuppositional Account Using Lexicalised <fixed-case>TAG</fixed-case></title>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <author><first>Alistair</first><last>Knott</last></author>
       <author><first>Matthew</first><last>Stone</last></author>
-      <author><first>Aravind</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
       <doi>10.3115/1034678.1034695</doi>
       <pages>41–48</pages>
       <url hash="9f5e25d0">P99-1006</url>
@@ -152,7 +152,7 @@
     </paper>
     <paper id="17">
       <title>Using aggregation for selecting content when generating referring expressions</title>
-      <author><first>John A.</first><last>Bateman</last></author>
+      <author id="john-bateman"><first>John A.</first><last>Bateman</last></author>
       <doi>10.3115/1034678.1034706</doi>
       <pages>127–134</pages>
       <url hash="18e27e6d">P99-1017</url>
@@ -169,7 +169,7 @@
     </paper>
     <paper id="19">
       <title>Bilingual <fixed-case>H</fixed-case>ebrew-<fixed-case>E</fixed-case>nglish Generation of Possessives and Partitives: Raising the Input Abstraction Level</title>
-      <author><first>Yael Dahan</first><last>Netzer</last></author>
+      <author id="yael-netzer"><first>Yael Dahan</first><last>Netzer</last></author>
       <author><first>Michael</first><last>Elhadad</last></author>
       <doi>10.3115/1034678.1034708</doi>
       <pages>144–151</pages>
@@ -178,8 +178,8 @@
     </paper>
     <paper id="20">
       <title>A Method for Word Sense Disambiguation of Unrestricted Text</title>
-      <author><first>Rada</first><last>Mihalcea</last></author>
-      <author><first>Dan I.</first><last>Moldovan</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
+      <author id="dan-moldovan"><first>Dan I.</first><last>Moldovan</last></author>
       <doi>10.3115/1034678.1034709</doi>
       <pages>152–158</pages>
       <url hash="f6fe8aee">P99-1020</url>
@@ -195,7 +195,7 @@
     </paper>
     <paper id="22">
       <title>Dynamic Nonlocal Language Modeling via Hierarchical Topic-Based Adaptation</title>
-      <author><first>Radu</first><last>Florian</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
       <author><first>David</first><last>Yarowsky</last></author>
       <doi>10.3115/1034678.1034711</doi>
       <pages>167–174</pages>
@@ -205,7 +205,7 @@
     <paper id="23">
       <title>A Second-Order Hidden <fixed-case>M</fixed-case>arkov Model for Part-of-Speech Tagging</title>
       <author><first>Scott M.</first><last>Thede</last></author>
-      <author><first>Mary P.</first><last>Harper</last></author>
+      <author id="mary-harper"><first>Mary P.</first><last>Harper</last></author>
       <doi>10.3115/1034678.1034712</doi>
       <pages>175–182</pages>
       <url hash="bf450bbd">P99-1023</url>
@@ -213,11 +213,11 @@
     </paper>
     <paper id="24">
       <title>The <fixed-case>C</fixed-case>ommand<fixed-case>T</fixed-case>alk Spoken Dialogue System</title>
-      <author><first>Amanda</first><last>Stent</last></author>
-      <author><first>John</first><last>Dowding</last></author>
-      <author><first>Jean Mark</first><last>Gawron</last></author>
+      <author id="amanda-stent"><first>Amanda</first><last>Stent</last></author>
+      <author id="john-dowding"><first>John</first><last>Dowding</last></author>
+      <author id="jean-mark-gawron"><first>Jean Mark</first><last>Gawron</last></author>
       <author><first>Elizabeth Owen</first><last>Bratt</last></author>
-      <author><first>Robert</first><last>Moore</last></author>
+      <author id="robert-c-moore"><first>Robert</first><last>Moore</last></author>
       <doi>10.3115/1034678.1034713</doi>
       <pages>183–190</pages>
       <url hash="0d190a03">P99-1024</url>
@@ -226,7 +226,7 @@
     <paper id="25">
       <title>Construct Algebra: Analytical Dialog Management</title>
       <author><first>Alicia</first><last>Abella</last></author>
-      <author><first>Allen L.</first><last>Gorin</last></author>
+      <author id="allen-l-gorin"><first>Allen L.</first><last>Gorin</last></author>
       <doi>10.3115/1034678.1034714</doi>
       <pages>191–199</pages>
       <url hash="b9a1d251">P99-1025</url>
@@ -246,7 +246,7 @@
     </paper>
     <paper id="27">
       <title>Should we Translate the Documents or the Queries in Cross-language Information Retrieval?</title>
-      <author><first>J. Scott</first><last>McCarley</last></author>
+      <author id="j-scott-mccarley"><first>J. Scott</first><last>McCarley</last></author>
       <doi>10.3115/1034678.1034716</doi>
       <pages>208–214</pages>
       <url hash="88ad58b0">P99-1027</url>
@@ -264,9 +264,9 @@
     </paper>
     <paper id="29">
       <title>Using Mutual Information to Resolve Query Translation Ambiguities and Query Term Weighting</title>
-      <author><first>Myung-Gil</first><last>Jang</last></author>
-      <author><first>Sung Hyon</first><last>Myaeng</last></author>
-      <author><first>Se Young</first><last>Park</last></author>
+      <author id="myung-gil-jang"><first>Myung-Gil</first><last>Jang</last></author>
+      <author id="sung-hyon-myaeng"><first>Sung Hyon</first><last>Myaeng</last></author>
+      <author id="se-young-park"><first>Se Young</first><last>Park</last></author>
       <doi>10.3115/1034678.1034718</doi>
       <pages>223–229</pages>
       <url hash="47d33a19">P99-1029</url>
@@ -276,7 +276,7 @@
       <title>Analysis System of Speech Acts and Discourse Structures Using Maximum Entropy Model</title>
       <author><first>Won Seug</first><last>Choi</last></author>
       <author><first>Jeong-Mi</first><last>Cho</last></author>
-      <author><first>Jungyun</first><last>Seo</last></author>
+      <author id="jungyun-seo"><first>Jungyun</first><last>Seo</last></author>
       <doi>10.3115/1034678.1034719</doi>
       <pages>230–237</pages>
       <url hash="a781d258">P99-1030</url>
@@ -284,7 +284,7 @@
     </paper>
     <paper id="31">
       <title>Measuring Conformity to Discourse Routines in Decision-Making Interactions</title>
-      <author><first>Sherri L.</first><last>Condon</last></author>
+      <author id="sherri-condon"><first>Sherri L.</first><last>Condon</last></author>
       <author><first>Claude G.</first><last>Cech</last></author>
       <author><first>William R.</first><last>Edwards</last></author>
       <doi>10.3115/1034678.1034720</doi>
@@ -294,9 +294,9 @@
     </paper>
     <paper id="32">
       <title>Development and Use of a Gold-Standard Data Set for Subjectivity Classifications</title>
-      <author><first>Janyce M.</first><last>Wiebe</last></author>
-      <author><first>Rebecca F.</first><last>Bruce</last></author>
-      <author><first>Thomas P.</first><last>O’Hara</last></author>
+      <author id="janyce-wiebe"><first>Janyce M.</first><last>Wiebe</last></author>
+      <author id="rebecca-bruce"><first>Rebecca F.</first><last>Bruce</last></author>
+      <author id="thomas-p-ohara"><first>Thomas P.</first><last>O’Hara</last></author>
       <doi>10.3115/1034678.1034721</doi>
       <pages>246–253</pages>
       <url hash="868bc470">P99-1032</url>
@@ -312,8 +312,8 @@
     </paper>
     <paper id="34">
       <title>A Unification-based Approach to Morpho-syntactic Parsing of Agglutinative and Other (Highly) Inflectional Languages</title>
-      <author><first>Gabor</first><last>Proszeky</last></author>
-      <author><first>Balazs</first><last>Kis</last></author>
+      <author id="gabor-proszeky"><first>Gabor</first><last>Proszeky</last></author>
+      <author id="balazs-kis"><first>Balazs</first><last>Kis</last></author>
       <doi>10.3115/1034678.1034723</doi>
       <pages>261–268</pages>
       <url hash="ba7da2ae">P99-1034</url>
@@ -341,8 +341,8 @@
     </paper>
     <paper id="37">
       <title>Memory-Based Morphological Analysis</title>
-      <author><first>Antal</first><last>van den Bosch</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <doi>10.3115/1034678.1034726</doi>
       <pages>285–292</pages>
       <url hash="75adcfe5">P99-1037</url>
@@ -359,7 +359,7 @@
     </paper>
     <paper id="39">
       <title>Alternating Quantifier Scope in <fixed-case>CCG</fixed-case></title>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <doi>10.3115/1034678.1034728</doi>
       <pages>301–308</pages>
       <url hash="7bb4cc7f">P99-1039</url>
@@ -367,9 +367,9 @@
     </paper>
     <paper id="40">
       <title>Automatic Detection of Poor Speech Recognition at the Dialogue Level</title>
-      <author><first>Diane J.</first><last>Litman</last></author>
-      <author><first>Marilyn A.</first><last>Walker</last></author>
-      <author><first>Michael S.</first><last>Kearns</last></author>
+      <author id="diane-litman"><first>Diane J.</first><last>Litman</last></author>
+      <author id="marilyn-walker"><first>Marilyn A.</first><last>Walker</last></author>
+      <author id="michael-s-kearns"><first>Michael S.</first><last>Kearns</last></author>
       <doi>10.3115/1034678.1034729</doi>
       <pages>309–316</pages>
       <url hash="a54676db">P99-1040</url>
@@ -385,10 +385,10 @@
     </paper>
     <paper id="42">
       <title>Deep Read: A Reading Comprehension System</title>
-      <author><first>Lynette</first><last>Hirschman</last></author>
+      <author id="lynette-hirschman"><first>Lynette</first><last>Hirschman</last></author>
       <author><first>Marc</first><last>Light</last></author>
-      <author><first>Eric</first><last>Breck</last></author>
-      <author><first>John D.</first><last>Burger</last></author>
+      <author id="eric-breck"><first>Eric</first><last>Breck</last></author>
+      <author id="john-d-burger"><first>John D.</first><last>Burger</last></author>
       <doi>10.3115/1034678.1034731</doi>
       <pages>325–332</pages>
       <url hash="749233c8">P99-1042</url>
@@ -398,7 +398,7 @@
       <title>Mixed Language Query Disambiguation</title>
       <author><first>Pascale</first><last>Fung</last></author>
       <author><last>Liu</last><first>Xiaohu</first></author>
-      <author><last>Cheung</last><first>Chi Shun</first></author>
+      <author id="chi-shun-cheung"><first>Chi Shun</first><last>Cheung</last></author>
       <doi>10.3115/1034678.1034732</doi>
       <pages>333–340</pages>
       <url hash="bffce5c2">P99-1043</url>
@@ -414,8 +414,8 @@
     </paper>
     <paper id="45">
       <title>Less is more: Eliminating index terms from subordinate clauses</title>
-      <author><first>Simon H.</first><last>Corston-Oliver</last></author>
-      <author><first>William B.</first><last>Dolan</last></author>
+      <author id="simon-corston-oliver"><first>Simon H.</first><last>Corston-Oliver</last></author>
+      <author id="william-b-dolan"><first>William B.</first><last>Dolan</last></author>
       <doi>10.3115/1034678.1034734</doi>
       <pages>349–356</pages>
       <url hash="fcc41bc9">P99-1045</url>
@@ -439,8 +439,8 @@
     </paper>
     <paper id="48">
       <title>Corpus-Based Identification of Non-Anaphoric Noun Phrases</title>
-      <author><first>David L.</first><last>Bean</last></author>
-      <author><first>Ellen</first><last>Riloff</last></author>
+      <author id="david-l-bean"><first>David L.</first><last>Bean</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
       <doi>10.3115/1034678.1034737</doi>
       <pages>373–380</pages>
       <url hash="10792591">P99-1048</url>
@@ -476,7 +476,7 @@
       <author id="walter-kasper"><first>W.</first><last>Kasper</last></author>
       <author id="bernd-kiefer"><first>B.</first><last>Kiefer</last></author>
       <author><first>H.-U.</first><last>Krieger</last></author>
-      <author><first>C. J.</first><last>Rupp</last></author>
+      <author id="c-j-rupp"><first>C. J.</first><last>Rupp</last></author>
       <author id="karsten-l-worm"><first>K. L.</first><last>Worm</last></author>
       <doi>10.3115/1034678.1034741</doi>
       <pages>405–412</pages>
@@ -485,8 +485,8 @@
     </paper>
     <paper id="53">
       <title>A Syntactic Framework for Speech Repairs and Other Disruptions</title>
-      <author><first>Mark G.</first><last>Core</last></author>
-      <author><first>Lenhart K.</first><last>Schubert</last></author>
+      <author id="mark-g-core"><first>Mark G.</first><last>Core</last></author>
+      <author id="lenhart-schubert"><first>Lenhart K.</first><last>Schubert</last></author>
       <doi>10.3115/1034678.1034742</doi>
       <pages>413–420</pages>
       <url hash="d21fdc8d">P99-1053</url>
@@ -503,7 +503,7 @@
     </paper>
     <paper id="55">
       <title>A Selectionist Theory of Language Acquisition</title>
-      <author><first>Charles D.</first><last>Yang</last></author>
+      <author id="charles-yang"><first>Charles D.</first><last>Yang</last></author>
       <doi>10.3115/1034678.1034744</doi>
       <pages>429–435</pages>
       <url hash="d153fcd5">P99-1055</url>
@@ -521,7 +521,7 @@
     <paper id="57">
       <title>Learning to Recognize Tables in Free Text</title>
       <author><first>Hwee Tou</first><last>Ng</last></author>
-      <author><first>Chung Yong</first><last>Lim</last></author>
+      <author id="chung-yong-lim"><first>Chung Yong</first><last>Lim</last></author>
       <author><first>Jessica Li Teng</first><last>Koo</last></author>
       <doi>10.3115/1034678.1034746</doi>
       <pages>443–450</pages>
@@ -539,7 +539,7 @@
     </paper>
     <paper id="59">
       <title>Efficient Parsing for Bilexical Context-Free Grammars and Head Automaton Grammars</title>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <author><first>Giorgio</first><last>Satta</last></author>
       <doi>10.3115/1034678.1034748</doi>
       <pages>457–464</pages>
@@ -556,10 +556,10 @@
     </paper>
     <paper id="61">
       <title>A Bag of Useful Techniques for Efficient and Robust Parsing</title>
-      <author><first>Bernd</first><last>Kiefer</last></author>
-      <author><first>Hans-Ulrich</first><last>Krieger</last></author>
-      <author><first>John</first><last>Carroll</last></author>
-      <author><first>Rob</first><last>Malouf</last></author>
+      <author id="bernd-kiefer"><first>Bernd</first><last>Kiefer</last></author>
+      <author id="hans-ulrich-krieger"><first>Hans-Ulrich</first><last>Krieger</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
+      <author id="robert-malouf"><first>Rob</first><last>Malouf</last></author>
       <doi>10.3115/1034678.1034750</doi>
       <pages>473–480</pages>
       <url hash="175befa7">P99-1061</url>
@@ -593,10 +593,10 @@
     </paper>
     <paper id="65">
       <title>A Statistical Parser for <fixed-case>C</fixed-case>zech</title>
-      <author><first>Michael</first><last>Collins</last></author>
-      <author><first>Jan</first><last>Hajic</last></author>
-      <author><first>Lance</first><last>Ramshaw</last></author>
-      <author><first>Christoph</first><last>Tillmann</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajic</last></author>
+      <author id="lance-ramshaw"><first>Lance</first><last>Ramshaw</last></author>
+      <author id="christoph-tillmann"><first>Christoph</first><last>Tillmann</last></author>
       <doi>10.3115/1034678.1034754</doi>
       <pages>505–512</pages>
       <url hash="520220ba">P99-1065</url>
@@ -641,7 +641,7 @@
     </paper>
     <paper id="70">
       <title>Relating Probabilistic Grammars and Automata</title>
-      <author><first>Steven</first><last>Abney</last></author>
+      <author id="steven-abney"><first>Steven</first><last>Abney</last></author>
       <author><first>David</first><last>McAllester</last></author>
       <author><first>Fernando</first><last>Pereira</last></author>
       <doi>10.3115/1034678.1034759</doi>
@@ -652,7 +652,7 @@
     <paper id="71">
       <title>Information Fusion in the Context of Multi-Document Summarization</title>
       <author><first>Regina</first><last>Barzilay</last></author>
-      <author><first>Kathleen R.</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen R.</first><last>McKeown</last></author>
       <author><first>Michael</first><last>Elhadad</last></author>
       <doi>10.3115/1034678.1034760</doi>
       <pages>550–557</pages>
@@ -719,7 +719,7 @@
     </paper>
     <paper id="79">
       <title>Analysis of Syntax-Based Pronoun Resolution Methods</title>
-      <author><first>Joel R.</first><last>Tetreault</last></author>
+      <author id="joel-tetreault"><first>Joel R.</first><last>Tetreault</last></author>
       <doi>10.3115/1034678.1034688</doi>
       <pages>602–605</pages>
       <url hash="70c6d8ff">P99-1079</url>
@@ -751,7 +751,7 @@
     </paper>
     <paper id="83">
       <title>Modeling Filled Pauses in Medical Dictations</title>
-      <author><first>Sergey V.</first><last>Pakhomov</last></author>
+      <author id="sergey-v-pakhomov"><first>Sergey V.</first><last>Pakhomov</last></author>
       <doi>10.3115/1034678.1034692</doi>
       <pages>619–624</pages>
       <url hash="f6bff6c1">P99-1083</url>
diff --git a/data/xml/Q13.xml b/data/xml/Q13.xml
index 1239c280e3..d3ba13b428 100644
--- a/data/xml/Q13.xml
+++ b/data/xml/Q13.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Transactions of the Association for Computational Linguistics, Volume 1</booktitle>
       <editor><last>Lin</last><first>Dekang</first></editor>
-      <editor><last>Collins</last><first>Michael</first></editor>
+      <editor id="michael-collins"><first>Michael</first><last>Collins</last></editor>
       <publisher>MIT Press</publisher>
       <address>Cambridge, MA</address>
       <year>2013</year>
@@ -31,7 +31,7 @@
       <title>Finding Optimal 1-Endpoint-Crossing Trees</title>
       <author><first>Emily</first><last>Pitler</last></author>
       <author><first>Sampath</first><last>Kannan</last></author>
-      <author><first>Mitchell</first><last>Marcus</last></author>
+      <author id="mitch-marcus"><first>Mitchell</first><last>Marcus</last></author>
       <doi>10.1162/tacl_a_00206</doi>
       <abstract>Dependency parsing algorithms capable of producing the types of crossing dependencies seen in natural language sentences have traditionally been orders of magnitude slower than algorithms for projective trees. For 95.8–99.8% of dependency parses in various natural language treebanks, whenever an edge is crossed, the edges that cross it all have a common vertex. The optimal dependency tree that satisfies this 1-Endpoint-Crossing property can be found with an O(n4) parsing algorithm that recursively combines forests over intervals with one exterior point. 1-Endpoint-Crossing trees also have natural connections to linguistics and another class of graphs that has been studied in NLP.</abstract>
       <pages>13–24</pages>
@@ -66,7 +66,7 @@
     <paper id="5">
       <title>Weakly Supervised Learning of Semantic Parsers for Mapping Instructions to Actions</title>
       <author><first>Yoav</first><last>Artzi</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <doi>10.1162/tacl_a_00209</doi>
       <abstract>The context in which language is used provides a strong signal for learning to recover its meaning. In this paper, we show it can be used within a grounded CCG semantic parsing approach that learns a joint model of meaning and context for interpreting and executing natural language instructions, using various types of weak supervision. The joint nature provides crucial benefits by allowing situated cues, such as the set of visible objects, to directly influence learning. It also enables algorithms that learn while executing instructions, for example by trying to replicate human actions. Experiments on a benchmark navigational dataset demonstrate strong performance under differing forms of supervision, including correctly executing 60% more instruction sets relative to the previous state of the art.</abstract>
       <pages>49–62</pages>
@@ -77,8 +77,8 @@
     </paper>
     <paper id="6">
       <title>Unsupervised Dependency Parsing with Acoustic Cues</title>
-      <author><first>John K</first><last>Pate</last></author>
-      <author><first>Sharon</first><last>Goldwater</last></author>
+      <author id="john-k-pate"><first>John K</first><last>Pate</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
       <doi>10.1162/tacl_a_00210</doi>
       <abstract>Unsupervised parsing is a difficult task that infants readily perform. Progress has been made on this task using text-based models, but few computational approaches have considered how infants might benefit from acoustic cues. This paper explores the hypothesis that word duration can help with learning syntax. We describe how duration information can be incorporated into an unsupervised Bayesian dependency parser whose only other source of information is the words themselves (without punctuation or parts of speech). Our results, evaluated on both adult-directed and child-directed utterances, show that using word duration can improve parse quality relative to words-only baselines. These results support the idea that acoustic cues provide useful evidence about syntactic structure for language-learning infants, and motivate the use of word duration cues in NLP tasks with speech.</abstract>
       <pages>63–74</pages>
@@ -142,7 +142,7 @@
       <title>Efficient Stacked Dependency Parsing by Forest Reranking</title>
       <author><first>Katsuhiko</first><last>Hayashi</last></author>
       <author><first>Shuhei</first><last>Kondo</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <doi>10.1162/tacl_a_00216</doi>
       <abstract>This paper proposes a discriminative forest reranking algorithm for dependency parsing that can be seen as a form of efficient stacked parsing. A dynamic programming shift-reduce parser produces a packed derivation forest which is then scored by a discriminative reranker, using the 1-best tree output by the shift-reduce parser as guide features in addition to third-order graph-based features. To improve efficiency and accuracy, this paper also proposes a novel shift-reduce parser that eliminates the spurious ambiguity of arc-standard transition systems. Testing on the English Penn Treebank data, forest reranking gave a state-of-the-art unlabeled dependency accuracy of 93.12.</abstract>
       <pages>139–150</pages>
@@ -171,7 +171,7 @@
       <author><first>Leah</first><last>Hanson</last></author>
       <author><first>Beenish</first><last>Jamil</last></author>
       <author><first>Matthias</first><last>Lee</last></author>
-      <author><first>Ya-Ting</first><last>Lin</last></author>
+      <author id="ya-ting-lin"><first>Ya-Ting</first><last>Lin</last></author>
       <author><first>Henry</first><last>Pao</last></author>
       <author><first>Fatima</first><last>Rivera</last></author>
       <author><first>Leili</first><last>Shahriyari</last></author>
@@ -191,7 +191,7 @@
     <paper id="15">
       <title>Combined Distributional and Logical Semantics</title>
       <author><first>Mike</first><last>Lewis</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <doi>10.1162/tacl_a_00219</doi>
       <abstract>We introduce a new approach to semantics which combines the benefits of distributional and formal logical semantics. Distributional models have been successful in modelling the meanings of content words, but logical semantics is necessary to adequately represent many function words. We follow formal semantics in mapping language to logical representations, but differ in that the relational constants used are induced by offline distributional clustering at the level of predicate-argument structure. Our clustering algorithm is highly scalable, allowing us to run on corpora the size of Gigaword. Different senses of a word are disambiguated based on their induced types. We outperform a variety of existing approaches on a wide-coverage question answering task, and demonstrate the ability to make complex multi-sentence inferences involving quantifiers on the FraCaS suite.</abstract>
       <pages>179–192</pages>
@@ -211,7 +211,7 @@
     <paper id="17">
       <title>Dual Coordinate Descent Algorithms for Efficient Large Margin Structured Prediction</title>
       <author><first>Ming-Wei</first><last>Chang</last></author>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <doi>10.1162/tacl_a_00221</doi>
       <abstract>Due to the nature of complex NLP problems, structured prediction algorithms have been important modeling tools for a wide range of tasks. While there exists evidence showing that linear Structural Support Vector Machine (SSVM) algorithm performs better than structured Perceptron, the SSVM algorithm is still less frequently chosen in the NLP community because of its relatively slow training speed. In this paper, we propose a fast and easy-to-implement dual coordinate descent algorithm for SSVMs. Unlike algorithms such as Perceptron and stochastic gradient descent, our method keeps track of dual variables and updates the weight vector more aggressively. As a result, this training process is as efficient as existing online learning methods, and yet derives consistently better models, as evaluated on four benchmark NLP datasets for part-of-speech tagging, named-entity recognition and dependency parsing.</abstract>
       <pages>207–218</pages>
@@ -222,7 +222,7 @@
       <title>Joint Arc-factored Parsing of Syntactic and Semantic Dependencies</title>
       <author><first>Xavier</first><last>Lluís</last></author>
       <author><first>Xavier</first><last>Carreras</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <doi>10.1162/tacl_a_00222</doi>
       <abstract>In this paper we introduce a joint arc-factored model for syntactic and semantic dependency parsing. The semantic role labeler predicts the full syntactic paths that connect predicates with their arguments. This process is framed as a linear assignment task, which allows to control some well-formedness constraints. For the syntactic part, we define a standard arc-factored dependency model that predicts the full syntactic tree. Finally, we employ dual decomposition techniques to produce consistent syntactic and predicate-argument structures while searching over a large space of syntactic configurations. In experiments on the CoNLL-2009 English benchmark we observe very competitive results.</abstract>
       <pages>219–230</pages>
@@ -245,7 +245,7 @@
       <author><first>Feifei</first><last>Zhai</last></author>
       <author><first>Jiajun</first><last>Zhang</last></author>
       <author><first>Yu</first><last>Zhou</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <doi>10.1162/tacl_a_00224</doi>
       <abstract>In current research, most tree-based translation models are built directly from parse trees. In this study, we go in another direction and build a translation model with an unsupervised tree structure derived from a novel non-parametric Bayesian model. In the model, we utilize synchronous tree substitution grammars (STSG) to capture the bilingual mapping between language pairs. To train the model efficiently, we develop a Gibbs sampler with three novel Gibbs operators. The sampler is capable of exploring the infinite space of tree structures by performing local changes on the tree nodes. Experimental results show that the string-to-tree translation system using our Bayesian tree structures significantly outperforms the strong baseline string-to-tree system using parse trees.</abstract>
       <pages>243–254</pages>
@@ -255,7 +255,7 @@
     <paper id="21">
       <title>Minimally-Supervised Morphological Segmentation using <fixed-case>A</fixed-case>daptor <fixed-case>G</fixed-case>rammars</title>
       <author><first>Kairit</first><last>Sirts</last></author>
-      <author><first>Sharon</first><last>Goldwater</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
       <doi>10.1162/tacl_a_00225</doi>
       <abstract>This paper explores the use of Adaptor Grammars, a nonparametric Bayesian modelling framework, for minimally supervised morphological segmentation. We compare three training methods: unsupervised training, semi-supervised training, and a novel model selection method. In the model selection method, we train unsupervised Adaptor Grammars using an over-articulated metagrammar, then use a small labelled data set to select which potential morph boundaries identified by the metagrammar should be returned in the final output. We evaluate on five languages and show that semi-supervised training provides a boost over unsupervised training, while the model selection method yields the best average results over all languages and is competitive with state-of-the-art semi-supervised systems. Moreover, this method provides the potential to tune performance according to different evaluation metrics or downstream tasks.</abstract>
       <pages>255–266</pages>
@@ -285,7 +285,7 @@
     <paper id="24">
       <title>Large-scale Word Alignment Using Soft Dependency Cohesion Constraints</title>
       <author><first>Zhiguo</first><last>Wang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <doi>10.1162/tacl_a_00228</doi>
       <abstract>Dependency cohesion refers to the observation that phrases dominated by disjoint dependency subtrees in the source language generally do not overlap in the target language. It has been verified to be a useful constraint for word alignment. However, previous work either treats this as a hard constraint or uses it as a feature in discriminative models, which is ineffective for large-scale tasks. In this paper, we take dependency cohesion as a soft constraint, and integrate it into a generative model for large-scale word alignment experiments. We also propose an approximate EM algorithm and a Gibbs sampling algorithm to estimate model parameters in an unsupervised manner. Experiments on large-scale Chinese-English translation tasks demonstrate that our model achieves improvements in both alignment quality and translation quality.</abstract>
       <pages>291–300</pages>
@@ -294,7 +294,7 @@
     </paper>
     <paper id="25">
       <title>Data-driven, <fixed-case>PCFG</fixed-case>-based and Pseudo-<fixed-case>PCFG</fixed-case>-based Models for <fixed-case>C</fixed-case>hinese Dependency Parsing</title>
-      <author><first>Weiwei</first><last>Sun</last></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last></author>
       <author><first>Xiaojun</first><last>Wan</last></author>
       <doi>10.1162/tacl_a_00229</doi>
       <abstract>We present a comparative study of transition-, graph- and PCFG-based models aimed at illuminating more precisely the likely contribution of CFGs in improving Chinese dependency parsing accuracy, especially by combining heterogeneous models. Inspired by the impact of a constituency grammar on dependency parsing, we propose several strategies to acquire pseudo CFGs only from dependency annotations. Compared to linguistic grammars learned from rich phrase-structure treebanks, well designed pseudo grammars achieve similar parsing accuracy and have equivalent contributions to parser ensemble. Moreover, pseudo grammars increase the diversity of base models; therefore, together with all other models, further improve system combination. Based on automatic POS tagging, our final model achieves a UAS of 87.23%, resulting in a significant improvement of the state of the art.</abstract>
@@ -304,8 +304,8 @@
     </paper>
     <paper id="26">
       <title>Parsing entire discourses as very long strings: Capturing topic continuity in grounded language learning</title>
-      <author><first>Minh-Thang</first><last>Luong</last></author>
-      <author><first>Michael C.</first><last>Frank</last></author>
+      <author id="minh-thang-luong"><first>Minh-Thang</first><last>Luong</last></author>
+      <author id="michael-c-frank"><first>Michael C.</first><last>Frank</last></author>
       <author><first>Mark</first><last>Johnson</last></author>
       <doi>10.1162/tacl_a_00230</doi>
       <abstract>Grounded language learning, the task of mapping from natural language to a representation of meaning, has attracted more and more interest in recent years. In most work on this topic, however, utterances in a conversation are treated independently and discourse structure information is largely ignored. In the context of language acquisition, this independence assumption discards cues that are important to the learner, e.g., the fact that consecutive utterances are likely to share the same referent (Frank et al., 2013). The current paper describes an approach to the problem of simultaneously modeling grounded language at the sentence and discourse levels. We combine ideas from parsing and grammar induction to produce a parser that can handle long input strings with thousands of tokens, creating parse trees that represent full discourses. By casting grounded language learning as a grammatical inference task, we use our parser to extend the work of Johnson et al. (2012), investigating the importance of discourse continuity in children’s language acquisition and its interaction with social cues. Our model boosts performance in a language acquisition task and yields good discourse segmentations compared with human annotators.</abstract>
@@ -335,7 +335,7 @@
     </paper>
     <paper id="29">
       <title>Distributional Semantics Beyond Words: Supervised Learning of Analogy and Paraphrase</title>
-      <author><first>Peter D.</first><last>Turney</last></author>
+      <author id="peter-turney"><first>Peter D.</first><last>Turney</last></author>
       <doi>10.1162/tacl_a_00233</doi>
       <abstract>There have been several efforts to extend distributional semantics beyond individual words, to measure the similarity of word pairs, phrases, and sentences (briefly, tuples; ordered sets of words, contiguous or noncontiguous). One way to extend beyond words is to compare two tuples using a function that combines pairwise similarities between the component words in the tuples. A strength of this approach is that it works with both relational similarity (analogy) and compositional similarity (paraphrase). However, past work required hand-coding the combination function for different tasks. The main contribution of this paper is that combination functions are generated by supervised learning. We achieve state-of-the-art results in measuring relational similarity between word pairs (SAT analogies and SemEval 2012 Task 2) and measuring compositional similarity between noun-modifier phrases and unigrams (multiple-choice paraphrase questions).</abstract>
       <pages>353–366</pages>
@@ -345,7 +345,7 @@
     <paper id="30">
       <title>Modeling Missing Data in Distant Supervision for Information Extraction</title>
       <author><first>Alan</first><last>Ritter</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <author><first/><last>Mausam</last></author>
       <author><first>Oren</first><last>Etzioni</last></author>
       <doi>10.1162/tacl_a_00234</doi>
@@ -357,7 +357,7 @@
     <paper id="31">
       <title>Data-Driven Metaphor Recognition and Explanation</title>
       <author><first>Hongsong</first><last>Li</last></author>
-      <author><first>Kenny Q.</first><last>Zhu</last></author>
+      <author id="kenny-zhu"><first>Kenny Q.</first><last>Zhu</last></author>
       <author><first>Haixun</first><last>Wang</last></author>
       <doi>10.1162/tacl_a_00235</doi>
       <abstract>Recognizing metaphors and identifying the source-target mappings is an important task as metaphorical text poses a big challenge for machine reading. To address this problem, we automatically acquire a metaphor knowledge base and an isA knowledge base from billions of web pages. Using the knowledge bases, we develop an inference mechanism to recognize and explain the metaphors in the text. To our knowledge, this is the first purely data-driven approach of probabilistic metaphor acquisition, recognition, and explanation. Our results shows that it significantly outperforms other state-of-the-art methods in recognizing and explaining metaphors.</abstract>
@@ -390,10 +390,10 @@
       <title>Joint Morphological and Syntactic Analysis for Richly Inflected Languages</title>
       <author><first>Bernd</first><last>Bohnet</last></author>
       <author><first>Joakim</first><last>Nivre</last></author>
-      <author><first>Igor</first><last>Boguslavsky</last></author>
-      <author><first>Richárd</first><last>Farkas</last></author>
+      <author id="igor-boguslavsky"><first>Igor</first><last>Boguslavsky</last></author>
+      <author id="richard-farkas"><first>Richárd</first><last>Farkas</last></author>
       <author><first>Filip</first><last>Ginter</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
       <doi>10.1162/tacl_a_00238</doi>
       <abstract>Joint morphological and syntactic analysis has been proposed as a way of improving parsing accuracy for richly inflected languages. Starting from a transition-based model for joint part-of-speech tagging and dependency parsing, we explore different ways of integrating morphological features into the model. We also investigate the use of rule-based morphological analyzers to provide hard or soft lexical constraints and the use of word clusters to tackle the sparsity of lexical features. Evaluation on five morphologically rich languages (Czech, Finnish, German, Hungarian, and Russian) shows consistent improvements in both morphological and syntactic accuracy for joint prediction over a pipeline model, with further improvements thanks to lexical constraints and word clusters. The final results improve the state of the art in dependency parsing for all languages.</abstract>
       <pages>415–428</pages>
@@ -405,8 +405,8 @@
       <author><first>Ann</first><last>Irvine</last></author>
       <author><first>John</first><last>Morgan</last></author>
       <author><first>Marine</first><last>Carpuat</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
-      <author><first>Dragos</first><last>Munteanu</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
+      <author id="dragos-stefan-munteanu"><first>Dragos</first><last>Munteanu</last></author>
       <doi>10.1162/tacl_a_00239</doi>
       <abstract>We develop two techniques for analyzing the effect of porting a machine translation system to a new domain. One is a macro-level analysis that measures how domain shift affects corpus-level evaluation; the second is a micro-level analysis for word-level errors. We apply these methods to understand what happens when a Parliament-trained phrase-based machine translation system is applied in four very different domains: news, medical texts, scientific articles and movie subtitles. We present quantitative and qualitative experiments that highlight opportunities for future research in domain adaptation for machine translation.</abstract>
       <pages>429–440</pages>
diff --git a/data/xml/Q14.xml b/data/xml/Q14.xml
index 41daf276db..f77ae8f20a 100644
--- a/data/xml/Q14.xml
+++ b/data/xml/Q14.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Transactions of the Association for Computational Linguistics, Volume 2</booktitle>
       <editor><last>Lin</last><first>Dekang</first></editor>
-      <editor><last>Collins</last><first>Michael</first></editor>
+      <editor id="michael-collins"><first>Michael</first><last>Collins</last></editor>
       <editor><last>Lee</last><first>Lillian</first></editor>
       <publisher>MIT Press</publisher>
       <address>Cambridge, MA</address>
@@ -19,7 +19,7 @@
       <title>Heterogeneous Networks and Their Applications: Scientometrics, Name Disambiguation, and Topic Modeling</title>
       <author><first>Ben</first><last>King</last></author>
       <author><first>Rahul</first><last>Jha</last></author>
-      <author><first>Dragomir R.</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir R.</first><last>Radev</last></author>
       <doi>10.1162/tacl_a_00161</doi>
       <abstract>We present heterogeneous networks as a way to unify lexical networks with relational data. We build a unified ACL Anthology network, tying together the citation, author collaboration, and term-cooccurence networks with affiliation and venue relations. This representation proves to be convenient and allows problems such as name disambiguation, topic modeling, and the measurement of scientific impact to be easily solved using only this network and off-the-shelf graph algorithms.</abstract>
       <pages>1–14</pages>
@@ -29,7 +29,7 @@
     <paper id="2">
       <title><fixed-case>FLORS</fixed-case>: Fast and Simple Domain Adaptation for Part-of-Speech Tagging</title>
       <author><first>Tobias</first><last>Schnabel</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <doi>10.1162/tacl_a_00162</doi>
       <abstract>We present FLORS, a new part-of-speech tagger for domain adaptation. FLORS uses robust representations that work especially well for unknown words and for known words with unseen tags. FLORS is simpler and faster than previous domain adaptation methods, yet it has significantly better accuracy than several baselines.</abstract>
       <pages>15–26</pages>
@@ -41,7 +41,7 @@
       <title>Automatic Detection and Language Identification of Multilingual Documents</title>
       <author><first>Marco</first><last>Lui</last></author>
       <author><first>Jey Han</first><last>Lau</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <doi>10.1162/tacl_a_00163</doi>
       <abstract>Language identification is the task of automatically detecting the language(s) present in a document based on the content of the document. In this work, we address the problem of detecting documents that contain text from more than one language (multilingual documents). We introduce a method that is able to detect that a document is multilingual, identify the languages present, and estimate their relative proportions. We demonstrate the effectiveness of our method over synthetic data, as well as real-world multilingual documents collected from the web.</abstract>
       <pages>27–40</pages>
@@ -61,7 +61,7 @@
     <paper id="5">
       <title>Cross-lingual Projected Expectation Regularization for Weakly Supervised Learning</title>
       <author><first>Mengqiu</first><last>Wang</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <doi>10.1162/tacl_a_00165</doi>
       <abstract>We consider a multilingual weakly supervised learning scenario where knowledge from annotated corpora in a resource-rich language is transferred via bitext to guide the learning in other languages. Past approaches project labels across bitext and use them as features or gold labels for training. We propose a new method that projects model expectations rather than labels, which facilities transfer of model uncertainty across language boundaries. We encode expectations as constraints and train a discriminative CRF model using Generalized Expectation Criteria (Mann and McCallum, 2010). Evaluated on standard Chinese-English and German-English NER datasets, our method demonstrates F1 scores of 64% and 60% when no labeled data is used. Attaining the same accuracy with supervised CRFs requires 12k and 1.5k labeled sentences. Furthermore, when combined with labeled examples, our method yields significant improvements over state-of-the-art supervised methods, achieving best reported numbers to date on Chinese OntoNotes and German CoNLL-03 datasets.</abstract>
       <pages>55–66</pages>
@@ -98,7 +98,7 @@
     </paper>
     <paper id="8">
       <title>Exploring the Role of Stress in <fixed-case>B</fixed-case>ayesian Word Segmentation using <fixed-case>A</fixed-case>daptor <fixed-case>G</fixed-case>rammars</title>
-      <author><first>Benjamin</first><last>Börschinger</last></author>
+      <author id="benjamin-borschinger"><first>Benjamin</first><last>Börschinger</last></author>
       <author><first>Mark</first><last>Johnson</last></author>
       <doi>10.1162/tacl_a_00168</doi>
       <abstract>Stress has long been established as a major cue in word segmentation for English infants. We show that enabling a current state-of-the-art Bayesian word segmentation model to take advantage of stress cues noticeably improves its performance. We find that the improvements range from 10 to 4%, depending on both the use of phonotactic cues and, to a lesser extent, the amount of evidence available to the learner. We also find that in particular early on, stress cues are much more useful for our model than phonotactic cues by themselves, consistent with the finding that children do seem to use stress cues before they use phonotactic cues. Finally, we study how the model’s knowledge about stress patterns evolves over time. We not only find that our model correctly acquires the most frequent patterns relatively quickly but also that the Unique Stress Constraint that is at the heart of a previously proposed model does not need to be built in but can be acquired jointly with word segmentation.</abstract>
@@ -146,14 +146,14 @@
       <author><first>William F.</first><last>Styler IV</last></author>
       <author><first>Steven</first><last>Bethard</last></author>
       <author><first>Sean</first><last>Finan</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
-      <author><first>Sameer</first><last>Pradhan</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
+      <author id="sameer-pradhan"><first>Sameer</first><last>Pradhan</last></author>
       <author><first>Piet C</first><last>de Groen</last></author>
       <author><first>Brad</first><last>Erickson</last></author>
-      <author><first>Timothy</first><last>Miller</last></author>
+      <author id="timothy-miller"><first>Timothy</first><last>Miller</last></author>
       <author><first>Chen</first><last>Lin</last></author>
       <author><first>Guergana</first><last>Savova</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <doi>10.1162/tacl_a_00172</doi>
       <abstract>This article discusses the requirements of a formal specification for the annotation of temporal information in clinical narratives. We discuss the implementation and extension of ISO-TimeML for annotating a corpus of clinical notes, known as the THYME corpus. To reflect the information task and the heavily inference-based reasoning demands in the domain, a new annotation guideline has been developed, “the THYME Guidelines to ISO-TimeML (THYME-TimeML)”. To clarify what relations merit annotation, we distinguish between linguistically-derived and inferentially-derived temporal orderings in the text. We also apply a top performing TempEval 2013 system against this new resource to measure the difficulty of adapting systems to the clinical domain. The corpus is available to the community and has been proposed for use in a SemEval 2015 task.</abstract>
       <pages>143–154</pages>
@@ -182,7 +182,7 @@
       <author><first>Mirjam</first><last>Simantzik</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
       <author><first>Satoshi</first><last>Nakamura</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <doi>10.1162/tacl_a_00174</doi>
       <abstract>In this paper, we study the problem of manually correcting automatic annotations of natural language in as efficient a manner as possible. We introduce a method for automatically segmenting a corpus into chunks such that many uncertain labels are grouped into the same chunk, while human supervision can be omitted altogether for other segments. A tradeoff must be found for segment sizes. Choosing short segments allows us to reduce the number of highly confident labels that are supervised by the annotator, which is useful because these labels are often already correct and supervising correct labels is a waste of effort. In contrast, long segments reduce the cognitive effort due to context switches. Our method helps find the segmentation that optimizes supervision efficiency by defining user models to predict the cost and utility of supervising each segment and solving a constrained optimization problem balancing these contradictory objectives. A user study demonstrates noticeable gains over pre-segmented, confidence-ordered baselines on two natural language processing tasks: speech transcription and word segmentation.</abstract>
       <pages>169–180</pages>
@@ -194,9 +194,9 @@
       <title>Dynamic Language Models for Streaming Text</title>
       <author><first>Dani</first><last>Yogatama</last></author>
       <author><first>Chong</first><last>Wang</last></author>
-      <author><first>Bryan R.</first><last>Routledge</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
-      <author><first>Eric P.</first><last>Xing</last></author>
+      <author id="bryan-r-routledge"><first>Bryan R.</first><last>Routledge</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
+      <author id="eric-xing"><first>Eric P.</first><last>Xing</last></author>
       <doi>10.1162/tacl_a_00175</doi>
       <abstract>We present a probabilistic language model that captures temporal dynamics and conditions on arbitrary non-linguistic context features. These context features serve as important indicators of language changes that are otherwise difficult to capture using text data by itself. We learn our model in an efficient online fashion that is scalable for large, streaming data. With five streaming datasets from two different genres—economics news articles and social media—we evaluate our model on the task of sequential language modeling. Our model consistently outperforms competing models.</abstract>
       <pages>181–192</pages>
@@ -207,8 +207,8 @@
       <title>Discriminative Lexical Semantic Segmentation with Gaps: Running the <fixed-case>MWE</fixed-case> Gamut</title>
       <author><first>Nathan</first><last>Schneider</last></author>
       <author><first>Emily</first><last>Danchik</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <doi>10.1162/tacl_a_00176</doi>
       <abstract>We present a novel representation, evaluation measure, and supervised models for the task of identifying the multiword expressions (MWEs) in a sentence, resulting in a lexical semantic segmentation. Our approach generalizes a standard chunking representation to encode MWEs containing gaps, thereby enabling efficient sequence tagging algorithms for feature-rich discriminative models. Experiments on a new dataset of English web text offer the first linguistically-driven evaluation of MWE identification with truly heterogeneous expression types. Our statistical sequence model greatly outperforms a lookup-based segmentation procedure, achieving nearly 60% F1 for MWE identification.</abstract>
       <pages>193–206</pages>
@@ -219,9 +219,9 @@
       <title>Grounded Compositional Semantics for Finding and Describing Images with Sentences</title>
       <author><first>Richard</first><last>Socher</last></author>
       <author><first>Andrej</first><last>Karpathy</last></author>
-      <author><first>Quoc V.</first><last>Le</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
-      <author><first>Andrew Y.</first><last>Ng</last></author>
+      <author id="quoc-le"><first>Quoc V.</first><last>Le</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
+      <author id="andrew-y-ng"><first>Andrew Y.</first><last>Ng</last></author>
       <doi>10.1162/tacl_a_00177</doi>
       <abstract>Previous work on Recursive Neural Networks (RNNs) shows that these models can produce compositional feature vectors for accurately representing and classifying sentences or images. However, the sentence vectors of previous models cannot accurately represent visually grounded meaning. We introduce the DT-RNN model which uses dependency trees to embed sentences into a vector space in order to retrieve images that are described by those sentences. Unlike previous RNN-based models which use constituency trees, DT-RNNs naturally focus on the action and agents in a sentence. They are better able to abstract from the details of word order and syntactic expression. DT-RNNs outperform other recursive and recurrent neural networks, kernelized CCA and a bag-of-words baseline on the tasks of finding an image that fits a sentence description and vice versa. They also give more similar representations to sentences that describe the same image.</abstract>
       <pages>207–218</pages>
@@ -230,7 +230,7 @@
     </paper>
     <paper id="18">
       <title>Back to Basics for Monolingual Alignment: Exploiting Word Similarity and Contextual Evidence</title>
-      <author><first>Md Arafat</first><last>Sultan</last></author>
+      <author id="md-arafat-sultan"><first>Md Arafat</first><last>Sultan</last></author>
       <author><first>Steven</first><last>Bethard</last></author>
       <author><first>Tamara</first><last>Sumner</last></author>
       <doi>10.1162/tacl_a_00178</doi>
@@ -254,7 +254,7 @@
     <paper id="20">
       <title>Crosslingual and Multilingual Construction of Syntax-Based Vector Space Models</title>
       <author><first>Jason</first><last>Utt</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <doi>10.1162/tacl_a_00180</doi>
       <abstract>Syntax-based distributional models of lexical semantics provide a flexible and linguistically adequate representation of co-occurrence information. However, their construction requires large, accurately parsed corpora, which are unavailable for most languages. In this paper, we develop a number of methods to overcome this obstacle. We describe (a) a crosslingual approach that constructs a syntax-based model for a new language requiring only an English resource and a translation lexicon; and (b) multilingual approaches that combine crosslingual with monolingual information, subject to availability. We evaluate on two lexical semantic benchmarks in German and Croatian. We find that the models exhibit complementary profiles: crosslingual models yield higher accuracies while monolingual models provide better coverage. In addition, we show that simple multilingual models can successfully combine their strengths.</abstract>
       <pages>245–258</pages>
@@ -273,7 +273,7 @@
     </paper>
     <paper id="22">
       <title>Dense Event Ordering with a Multi-Pass Architecture</title>
-      <author><first>Nathanael</first><last>Chambers</last></author>
+      <author id="nathanael-chambers"><first>Nathanael</first><last>Chambers</last></author>
       <author><first>Taylor</first><last>Cassidy</last></author>
       <author><first>Bill</first><last>McDowell</last></author>
       <author><first>Steven</first><last>Bethard</last></author>
@@ -308,7 +308,7 @@
     </paper>
     <paper id="25">
       <title>The Benefits of a Model of Annotation</title>
-      <author><first>Rebecca J.</first><last>Passonneau</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca J.</first><last>Passonneau</last></author>
       <author><first>Bob</first><last>Carpenter</last></author>
       <doi>10.1162/tacl_a_00185</doi>
       <abstract>Standard agreement measures for interannotator reliability are neither necessary nor sufficient to ensure a high quality corpus. In a case study of word sense annotation, conventional methods for evaluating labels from trained annotators are contrasted with a probabilistic annotation model applied to crowdsourced data. The annotation model provides far more information, including a certainty measure for each gold standard label; the crowdsourced data was collected at less than half the cost of the conventional approach.</abstract>
@@ -320,7 +320,7 @@
     <paper id="26">
       <title>Improved <fixed-case>CCG</fixed-case> Parsing with Semi-supervised Supertagging</title>
       <author><first>Mike</first><last>Lewis</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <doi>10.1162/tacl_a_00186</doi>
       <abstract>Current supervised parsers are limited by the size of their labelled training data, making improving them with unlabelled data an important goal. We show how a state-of-the-art CCG parser can be enhanced, by predicting lexical categories using unsupervised vector-space embeddings of words. The use of word embeddings enables our model to better generalize from the labelled data, and allows us to accurately assign lexical categories without depending on a POS-tagger. Our approach leads to substantial improvements in dependency parsing results over the standard supervised CCG parser when evaluated on Wall Street Journal (0.8%), Wikipedia (1.8%) and biomedical (3.4%) text. We compare the performance of two recently proposed approaches for classification using a wide variety of word embeddings. We also give a detailed error analysis demonstrating where using embeddings outperforms traditional feature sets, and showing how including POS features can decrease accuracy.</abstract>
       <pages>327–338</pages>
@@ -341,7 +341,7 @@
       <title><fixed-case>T</fixed-case>ree<fixed-case>T</fixed-case>alk: Composition and Compression of Trees for Image Descriptions</title>
       <author><first>Polina</first><last>Kuznetsova</last></author>
       <author><first>Vicente</first><last>Ordonez</last></author>
-      <author><first>Tamara L.</first><last>Berg</last></author>
+      <author id="tamara-berg"><first>Tamara L.</first><last>Berg</last></author>
       <author><first>Yejin</first><last>Choi</last></author>
       <doi>10.1162/tacl_a_00188</doi>
       <abstract>We present a new tree based approach to composing expressive image descriptions that makes use of naturally occuring web images with captions. We investigate two related tasks: image caption generalization and generation, where the former is an optional subtask of the latter. The high-level idea of our approach is to harvest expressive phrases (as tree fragments) from existing image descriptions, then to compose a new description by selectively combining the extracted (and optionally pruned) tree fragments. Key algorithmic components are tree composition and compression, both integrating tree structure with sequence structure. Our proposed system attains significantly better performance than previous approaches for both image caption generalization and generation. In addition, our work is the first to show the empirical benefit of automatically generalized captions for composing natural image descriptions.</abstract>
@@ -352,7 +352,7 @@
     <paper id="29">
       <title>Unsupervised Discovery of Biographical Structure from Text</title>
       <author><first>David</first><last>Bamman</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <doi>10.1162/tacl_a_00189</doi>
       <abstract>We present a method for discovering abstract event classes in biographies, based on a probabilistic latent-variable model. Taking as input timestamped text, we exploit latent correlations among events to learn a set of event classes (such as Born, Graduates High School, and Becomes Citizen), along with the typical times in a person’s life when those events occur. In a quantitative evaluation at the task of predicting a person’s age for a given event, we find that our generative model outperforms a strong linear regression baseline, along with simpler variants of the model that ablate some features. The abstract event classes that we learn allow us to perform a large-scale analysis of 242,970 Wikipedia biographies. Though it is known that women are greatly underrepresented on Wikipedia—not only as editors (Wikipedia, 2011) but also as subjects of articles (Reagle and Rhue, 2011)—we find that there is a bias in their characterization as well, with biographies of women containing significantly more emphasis on events of marriage and divorce than biographies of men.</abstract>
       <pages>363–376</pages>
@@ -363,7 +363,7 @@
       <title>Large-scale Semantic Parsing without Question-Answer Pairs</title>
       <author><first>Siva</first><last>Reddy</last></author>
       <author><first>Mirella</first><last>Lapata</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <doi>10.1162/tacl_a_00190</doi>
       <abstract>In this paper we introduce a novel semantic parsing approach to query Freebase in natural language without requiring manual annotations or question-answer pairs. Our key insight is to represent natural language via semantic graphs whose topology shares many commonalities with Freebase. Given this representation, we conceptualize semantic parsing as a graph matching problem. Our model converts sentences to semantic graphs using CCG and subsequently grounds them to Freebase guided by denotations as a form of weak supervision. Evaluation experiments on a subset of the Free917 and WebQuestions benchmark datasets show our semantic parser improves over the state of the art.</abstract>
       <pages>377–392</pages>
@@ -372,9 +372,9 @@
     </paper>
     <paper id="31">
       <title>Locally Non-Linear Learning for Statistical Machine Translation via Discretization and Structured Regularization</title>
-      <author><first>Jonathan H.</first><last>Clark</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="jonathan-h-clark"><first>Jonathan H.</first><last>Clark</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <doi>10.1162/tacl_a_00191</doi>
       <abstract>Linear models, which support efficient learning and inference, are the workhorses of statistical machine translation; however, linear decision rules are less attractive from a modeling perspective. In this work, we introduce a technique for learning arbitrary, rule-local, non-linear feature transforms that improve model expressivity, but do not sacrifice the efficient inference and learning associated with linear models. To demonstrate the value of our technique, we discard the customary log transform of lexical probabilities and drop the phrasal translation probability in favor of raw counts. We observe that our algorithm learns a variation of a log transform that leads to better translation quality compared to the explicit log transform. We conclude that non-linear responses play an important role in SMT, an observation that we hope will inform the efforts of feature engineers.</abstract>
       <pages>393–404</pages>
@@ -406,7 +406,7 @@
       <author><first>Wei</first><last>Xu</last></author>
       <author><first>Alan</first><last>Ritter</last></author>
       <author><first>Chris</first><last>Callison-Burch</last></author>
-      <author><first>William B.</first><last>Dolan</last></author>
+      <author id="william-b-dolan"><first>William B.</first><last>Dolan</last></author>
       <author><first>Yangfeng</first><last>Ji</last></author>
       <doi>10.1162/tacl_a_00194</doi>
       <abstract>We present MultiP (Multi-instance Learning Paraphrase Model), a new model suited to identify paraphrases within the short messages on Twitter. We jointly model paraphrase relations between word and sentence pairs and assume only sentence-level annotations during learning. Using this principled latent variable model alone, we achieve the performance competitive with a state-of-the-art method which combines a latent space model with a feature-based supervised classifier. Our model also captures lexically divergent paraphrases that differ from yet complement previous methods; combining our model with previous work significantly outperforms the state-of-the-art. In addition, we present a novel annotation methodology that has allowed us to crowdsource a paraphrase corpus from Twitter. We make this new dataset available to the research community.</abstract>
@@ -429,7 +429,7 @@
       <title>Online <fixed-case>A</fixed-case>daptor <fixed-case>G</fixed-case>rammars with Hybrid Inference</title>
       <author><first>Ke</first><last>Zhai</last></author>
       <author><first>Jordan</first><last>Boyd-Graber</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <doi>10.1162/tacl_a_00196</doi>
       <abstract>Adaptor grammars are a flexible, powerful formalism for defining nonparametric, unsupervised models of grammar productions. This flexibility comes at the cost of expensive inference. We address the difficulty of inference through an online algorithm which uses a hybrid of Markov chain Monte Carlo and variational inference. We show that this inference strategy improves scalability without sacrificing performance on unsupervised word segmentation and topic modeling tasks.</abstract>
       <pages>465–476</pages>
@@ -460,7 +460,7 @@
     <paper id="39">
       <title>Joint Modeling of Opinion Expression Extraction and Attribute Classification</title>
       <author><first>Bishan</first><last>Yang</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <doi>10.1162/tacl_a_00199</doi>
       <abstract>In this paper, we study the problems of opinion expression extraction and expression-level polarity and intensity classification. Traditional fine-grained opinion analysis systems address these problems in isolation and thus cannot capture interactions among the textual spans of opinion expressions and their opinion-related properties. We present two types of joint approaches that can account for such interactions during 1) both learning and inference or 2) only during inference. Extensive experiments on a standard dataset demonstrate that our approaches provide substantial improvements over previously published results. By analyzing the results, we gain some insight into the advantages of different joint models.</abstract>
       <pages>505–516</pages>
diff --git a/data/xml/Q15.xml b/data/xml/Q15.xml
index d891b5446d..6267f79918 100644
--- a/data/xml/Q15.xml
+++ b/data/xml/Q15.xml
@@ -3,7 +3,7 @@
   <volume id="1" type="journal">
     <meta>
       <booktitle>Transactions of the Association for Computational Linguistics, Volume 3</booktitle>
-      <editor><last>Collins</last><first>Michael</first></editor>
+      <editor id="michael-collins"><first>Michael</first><last>Collins</last></editor>
       <editor><last>Lee</last><first>Lillian</first></editor>
       <publisher>MIT Press</publisher>
       <address>Cambridge, MA</address>
@@ -48,7 +48,7 @@
     </paper>
     <paper id="4">
       <title><fixed-case>S</fixed-case>prite: Generalizing Topic Models with Structured Priors</title>
-      <author><first>Michael J.</first><last>Paul</last></author>
+      <author id="michael-paul"><first>Michael J.</first><last>Paul</last></author>
       <author><first>Mark</first><last>Dredze</last></author>
       <doi>10.1162/tacl_a_00121</doi>
       <abstract>We introduce Sprite, a family of topic models that incorporates structure into model priors as a function of underlying components. The structured priors can be constrained to model topic hierarchies, factorizations, correlations, and supervision, allowing Sprite to be tailored to particular settings. We demonstrate this flexibility by constructing a Sprite-based model to jointly infer topic hierarchies and author perspective, which we apply to corpora of political debates and online reviews. We show that the model learns intuitive topics, outperforming several other topic models at predictive tasks.</abstract>
@@ -62,7 +62,7 @@
       <author><first>Mohit</first><last>Bansal</last></author>
       <author><first>Kevin</first><last>Gimpel</last></author>
       <author><first>Brian D.</first><last>Ziebart</last></author>
-      <author><first>Clement T.</first><last>Yu</last></author>
+      <author id="clement-t-yu"><first>Clement T.</first><last>Yu</last></author>
       <doi>10.1162/tacl_a_00122</doi>
       <abstract>Word sense induction (WSI) seeks to automatically discover the senses of a word in a corpus via unsupervised methods. We propose a sense-topic model for WSI, which treats sense and topic as two separate latent variables to be inferred jointly. Topics are informed by the entire document, while senses are informed by the local context surrounding the ambiguous word. We also discuss unsupervised ways of enriching the original corpus in order to improve model performance, including using neural word embeddings and external corpora to expand the context of each data instance. We demonstrate significant improvements over the previous state-of-the-art, achieving the best results reported to date on the SemEval-2013 WSI task.</abstract>
       <pages>59–71</pages>
@@ -104,8 +104,8 @@
     <paper id="9">
       <title>Exploiting Parallel News Streams for Unsupervised Event Extraction</title>
       <author><first>Congle</first><last>Zhang</last></author>
-      <author><first>Stephen</first><last>Soderland</last></author>
-      <author><first>Daniel S.</first><last>Weld</last></author>
+      <author id="stephen-soderland"><first>Stephen</first><last>Soderland</last></author>
+      <author id="daniel-s-weld"><first>Daniel S.</first><last>Weld</last></author>
       <doi>10.1162/tacl_a_00127</doi>
       <abstract>Most approaches to relation extraction, the task of extracting ground facts from natural language text, are based on machine learning and thus starved by scarce training data. Manual annotation is too expensive to scale to a comprehensive set of relations. Distant supervision, which automatically creates training data, only works with relations that already populate a knowledge base (KB). Unfortunately, KBs such as FreeBase rarely cover event relations (e.g. “person travels to location”). Thus, the problem of extracting a wide range of events — e.g., from news streams — is an important, open challenge. This paper introduces NewsSpike-RE, a novel, unsupervised algorithm that discovers event relations and then learns to extract them. NewsSpike-RE uses a novel probabilistic graphical model to cluster sentences describing similar events from parallel news streams. These clusters then comprise training data for the extractor. Our evaluation shows that NewsSpike-RE generates high quality training sentences and learns extractors that perform much better than rival approaches, more than doubling the area under a precision-recall curve compared to Universal Schemas.</abstract>
       <pages>117–129</pages>
@@ -157,9 +157,9 @@
     <paper id="14">
       <title>From Visual Attributes to Adjectives through Decompositional Distributional Semantics</title>
       <author><first>Angeliki</first><last>Lazaridou</last></author>
-      <author><first>Georgiana</first><last>Dinu</last></author>
-      <author><first>Adam</first><last>Liska</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="georgiana-dinu"><first>Georgiana</first><last>Dinu</last></author>
+      <author id="adam-liska"><first>Adam</first><last>Liska</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <doi>10.1162/tacl_a_00132</doi>
       <abstract>As automated image analysis progresses, there is increasing interest in richer linguistic annotation of pictures, with attributes of objects (e.g., furry, brown…) attracting most attention. By building on the recent “zero-shot learning” approach, and paying attention to the linguistic nature of attributes as noun modifiers, and specifically adjectives, we show that it is possible to tag images with attribute-denoting adjectives even when no training data containing the relevant annotation are available. Our approach relies on two key observations. First, objects can be seen as bundles of attributes, typically expressed as adjectival modifiers (a dog is something furry, brown, etc.), and thus a function trained to map visual representations of objects to nominal labels can implicitly learn to map attributes to adjectives. Second, objects and attributes come together in pictures (the same thing is a dog and it is brown). We can thus achieve better attribute (and object) label retrieval by treating images as “visual phrases”, and decomposing their linguistic representation into an attribute-denoting adjective and an object-denoting noun. Our approach performs comparably to a method exploiting manual attribute annotation, it out-performs various competitive alternatives in both attribute and object annotation, and it automatically constructs attribute-centric representations that significantly improve performance in supervised object recognition.</abstract>
       <pages>183–196</pages>
@@ -169,8 +169,8 @@
     <paper id="15">
       <title>Higher-order Lexical Semantic Models for Non-factoid Answer Reranking</title>
       <author><first>Daniel</first><last>Fried</last></author>
-      <author><first>Peter</first><last>Jansen</last></author>
-      <author><first>Gustave</first><last>Hahn-Powell</last></author>
+      <author id="peter-jansen"><first>Peter</first><last>Jansen</last></author>
+      <author id="gus-hahn-powell"><first>Gustave</first><last>Hahn-Powell</last></author>
       <author><first>Mihai</first><last>Surdeanu</last></author>
       <author><first>Peter</first><last>Clark</last></author>
       <doi>10.1162/tacl_a_00133</doi>
@@ -203,8 +203,8 @@
     <paper id="18">
       <title>Combining Minimally-supervised Methods for <fixed-case>A</fixed-case>rabic Named Entity Recognition</title>
       <author><first>Maha</first><last>Althobaiti</last></author>
-      <author><first>Udo</first><last>Kruschwitz</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="udo-kruschwitz"><first>Udo</first><last>Kruschwitz</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <doi>10.1162/tacl_a_00136</doi>
       <abstract>Supervised methods can achieve high performance on NLP tasks, such as Named Entity Recognition (NER), but new annotations are required for every new domain and/or genre change. This has motivated research in minimally supervised methods such as semi-supervised learning and distant learning, but neither technique has yet achieved performance levels comparable to those of supervised methods. Semi-supervised methods tend to have very high precision but comparatively low recall, whereas distant learning tends to achieve higher recall but lower precision. This complementarity suggests that better results may be obtained by combining the two types of minimally supervised methods. In this paper we present a novel approach to Arabic NER using a combination of semi-supervised and distant learning techniques. We trained a semi-supervised NER classifier and another one using distant learning techniques, and then combined them using a variety of classifier combination schemes, including the Bayesian Classifier Combination (BCC) procedure recently proposed for sentiment analysis. According to our results, the BCC model leads to an increase in performance of 8 percentage points over the best base classifiers.</abstract>
       <pages>243–255</pages>
@@ -214,7 +214,7 @@
     <paper id="19">
       <title>Learning a Compositional Semantics for <fixed-case>F</fixed-case>reebase with an Open Predicate Vocabulary</title>
       <author><first>Jayant</first><last>Krishnamurthy</last></author>
-      <author><first>Tom M.</first><last>Mitchell</last></author>
+      <author id="tom-mitchell"><first>Tom M.</first><last>Mitchell</last></author>
       <doi>10.1162/tacl_a_00137</doi>
       <abstract>We present an approach to learning a model-theoretic semantics for natural language tied to Freebase. Crucially, our approach uses an open predicate vocabulary, enabling it to produce denotations for phrases such as “Republican front-runner from Texas” whose semantics cannot be represented using the Freebase schema. Our approach directly converts a sentence’s syntactic CCG parse into a logical form containing predicates derived from the words in the sentence, assigning each word a consistent semantics across sentences. This logical form is evaluated against a learned probabilistic database that defines a distribution over denotations for each textual predicate. A training phase produces this probabilistic database using a corpus of entity-linked text and probabilistic matrix factorization with a novel ranking objective function. We evaluate our approach on a compositional question answering task where it outperforms several competitive baselines. We also compare our approach against manually annotated Freebase queries, finding that our open predicate vocabulary enables us to answer many questions that Freebase cannot.</abstract>
       <pages>257–270</pages>
@@ -225,7 +225,7 @@
       <title>Domain Adaptation for Syntactic and Semantic Dependency Parsing Using Deep Belief Networks</title>
       <author><first>Haitong</first><last>Yang</last></author>
       <author><first>Tao</first><last>Zhuang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <doi>10.1162/tacl_a_00138</doi>
       <abstract>In current systems for syntactic and semantic dependency parsing, people usually define a very high-dimensional feature space to achieve good performance. But these systems often suffer severe performance drops on out-of-domain test data due to the diversity of features of different domains. This paper focuses on how to relieve this domain adaptation problem with the help of unlabeled target domain data. We propose a deep learning method to adapt both syntactic and semantic parsers. With additional unlabeled target domain data, our method can learn a latent feature representation (LFR) that is beneficial to both domains. Experiments on English data in the CoNLL 2009 shared task show that our method largely reduced the performance drop on out-of-domain test data. Moreover, we get a Macro F1 score that is 2.32 points higher than the best system in the CoNLL 2009 shared task in out-of-domain tests.</abstract>
       <pages>271–282</pages>
@@ -264,7 +264,7 @@
       <title>Design Challenges for Entity Linking</title>
       <author><first>Xiao</first><last>Ling</last></author>
       <author><first>Sameer</first><last>Singh</last></author>
-      <author><first>Daniel S.</first><last>Weld</last></author>
+      <author id="daniel-s-weld"><first>Daniel S.</first><last>Weld</last></author>
       <doi>10.1162/tacl_a_00141</doi>
       <abstract>Recent research on entity linking (EL) has introduced a plethora of promising techniques, ranging from deep neural networks to joint inference. But despite numerous papers there is surprisingly little understanding of the state of the art in EL. We attack this confusion by analyzing differences between several versions of the EL problem and presenting a simple yet effective, modular, unsupervised system, called Vinculum, for entity linking. We conduct an extensive evaluation on nine data sets, comparing Vinculum with two state-of-the-art systems, and elucidate key aspects of the system that include mention extraction, candidate generation, entity type prediction, entity coreference, and coherence.</abstract>
       <pages>315–328</pages>
@@ -301,7 +301,7 @@
     <paper id="26">
       <title>A Graph-based Lattice Dependency Parser for Joint Morphological Segmentation and Syntactic Analysis</title>
       <author><first>Wolfgang</first><last>Seeker</last></author>
-      <author><first>Özlem</first><last>Çetinoğlu</last></author>
+      <author id="ozlem-cetinoglu"><first>Özlem</first><last>Çetinoğlu</last></author>
       <doi>10.1162/tacl_a_00144</doi>
       <abstract>Space-delimited words in Turkish and Hebrew text can be further segmented into meaningful units, but syntactic and semantic context is necessary to predict segmentation. At the same time, predicting correct syntactic structures relies on correct segmentation. We present a graph-based lattice dependency parser that operates on morphological lattices to represent different segmentations and morphological analyses for a given input sentence. The lattice parser predicts a dependency tree over a path in the lattice and thus solves the joint task of segmentation, morphological analysis, and syntactic parsing. We conduct experiments on the Turkish and the Hebrew treebank and show that the joint model outperforms three state-of-the-art pipeline systems on both data sets. Our work corroborates findings from constituency lattice parsing for Hebrew and presents the first results for full lattice parsing on Turkish.</abstract>
       <pages>359–373</pages>
@@ -310,9 +310,9 @@
     </paper>
     <paper id="27">
       <title>Deriving <fixed-case>B</fixed-case>oolean structures from distributional vectors</title>
-      <author><first>German</first><last>Kruszewski</last></author>
+      <author id="german-kruszewski"><first>German</first><last>Kruszewski</last></author>
       <author><first>Denis</first><last>Paperno</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <doi>10.1162/tacl_a_00145</doi>
       <abstract>Corpus-based distributional semantic models capture degrees of semantic relatedness among the words of very large vocabularies, but have problems with logical phenomena such as entailment, that are instead elegantly handled by model-theoretic approaches, which, in turn, do not scale up. We combine the advantages of the two views by inducing a mapping from distributional vectors of words (or sentences) into a Boolean structure of the kind in which natural language terms are assumed to denote. We evaluate this Boolean Distributional Semantic Model (BDSM) on recognizing entailment between words and sentences. The method achieves results comparable to a state-of-the-art SVM, degrades more gracefully when less training data are available and displays interesting qualitative properties.</abstract>
       <pages>375–388</pages>
@@ -322,9 +322,9 @@
     </paper>
     <paper id="28">
       <title>Unsupervised Lexicon Discovery from Acoustic Input</title>
-      <author><first>Chia-ying</first><last>Lee</last></author>
-      <author><first>Timothy J.</first><last>O’Donnell</last></author>
-      <author><first>James</first><last>Glass</last></author>
+      <author id="chia-ying-lee"><first>Chia-ying</first><last>Lee</last></author>
+      <author id="timothy-odonnell"><first>Timothy J.</first><last>O’Donnell</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
       <doi>10.1162/tacl_a_00146</doi>
       <abstract>We present a model of unsupervised phonological lexicon discovery—the problem of simultaneously learning phoneme-like and word-like units from acoustic input. Our model builds on earlier models of unsupervised phone-like unit discovery from acoustic data (Lee and Glass, 2012), and unsupervised symbolic lexicon discovery using the Adaptor Grammar framework (Johnson et al., 2006), integrating these earlier approaches using a probabilistic model of phonological variation. We show that the model is competitive with state-of-the-art spoken term discovery systems, and present analyses exploring the model’s behavior and the kinds of linguistic structures it learns.</abstract>
       <pages>389–403</pages>
@@ -357,7 +357,7 @@
       <title>Modeling Word Forms Using Latent Underlying Morphs and Phonology</title>
       <author><first>Ryan</first><last>Cotterell</last></author>
       <author><first>Nanyun</first><last>Peng</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <doi>10.1162/tacl_a_00149</doi>
       <abstract>The observed pronunciations or spellings of words are often explained as arising from the “underlying forms” of their morphemes. These forms are latent strings that linguists try to reconstruct by hand. We propose to reconstruct them automatically at scale, enabling generalization to new words. Given some surface word types of a concatenative language along with the abstract morpheme sequences that they express, we show how to recover consistent underlying forms for these morphemes, together with the (stochastic) phonology that maps each concatenation of underlying forms to a surface form. Our technique involves loopy belief propagation in a natural directed graphical model whose variables are unknown strings and whose conditional distributions are encoded as finite-state machines with trainable weights. We define training and evaluation paradigms for the task of surface word prediction, and report results on subsets of 7 languages.</abstract>
       <pages>433–447</pages>
@@ -376,8 +376,8 @@
     </paper>
     <paper id="33">
       <title>Learning Structural Kernels for Natural Language Processing</title>
-      <author><first>Daniel</first><last>Beck</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="daniel-beck"><first>Daniel</first><last>Beck</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <author><first>Christian</first><last>Hardmeier</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <doi>10.1162/tacl_a_00151</doi>
@@ -404,9 +404,9 @@
     </paper>
     <paper id="35">
       <title>Approximation-Aware Dependency Parsing by Belief Propagation</title>
-      <author><first>Matthew R.</first><last>Gormley</last></author>
+      <author id="matthew-r-gormley"><first>Matthew R.</first><last>Gormley</last></author>
       <author><first>Mark</first><last>Dredze</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <doi>10.1162/tacl_a_00153</doi>
       <abstract>We show how to train the fast dependency parser of Smith and Eisner (2008) for improved accuracy. This parser can consider higher-order interactions among edges while retaining O(n3) runtime. It outputs the parse with maximum expected recall—but for speed, this expectation is taken under a posterior distribution that is constructed only approximately, using loopy belief propagation through structured factors. We show how to adjust the model parameters to compensate for the errors introduced by this approximation, by following the gradient of the actual loss on training data. We find this gradient by back-propagation. That is, we treat the entire parser (approximations and all) as a differentiable circuit, as others have done for loopy CRFs (Domke, 2010; Stoyanov et al., 2011; Domke, 2011; Stoyanov and Eisner, 2012). The resulting parser obtains higher accuracy with fewer iterations of belief propagation than one trained by conditional log-likelihood.</abstract>
       <pages>489–501</pages>
@@ -417,7 +417,7 @@
     <paper id="36">
       <title><fixed-case>P</fixed-case>lato: A Selective Context Model for Entity Resolution</title>
       <author><first>Nevena</first><last>Lazic</last></author>
-      <author><first>Amarnag</first><last>Subramanya</last></author>
+      <author id="amarnag-subramanya"><first>Amarnag</first><last>Subramanya</last></author>
       <author><first>Michael</first><last>Ringgaard</last></author>
       <author><first>Fernando</first><last>Pereira</last></author>
       <doi>10.1162/tacl_a_00154</doi>
@@ -429,7 +429,7 @@
     <paper id="37">
       <title>A Hierarchical Distance-dependent <fixed-case>B</fixed-case>ayesian Model for Event Coreference Resolution</title>
       <author><first>Bishan</first><last>Yang</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <author><first>Peter</first><last>Frazier</last></author>
       <doi>10.1162/tacl_a_00155</doi>
       <abstract>We present a novel hierarchical distance-dependent Bayesian model for event coreference resolution. While existing generative models for event coreference resolution are completely unsupervised, our model allows for the incorporation of pairwise distances between event mentions — information that is widely used in supervised coreference models to guide the generative clustering processing for better event clustering both within and across documents. We model the distances between event mentions using a feature-rich learnable distance function and encode them as Bayesian priors for nonparametric clustering. Experiments on the ECB+ corpus show that our model outperforms state-of-the-art methods for both within- and cross-document event coreference resolution.</abstract>
@@ -473,7 +473,7 @@
       <author><first>Philip</first><last>Arthur</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
       <author><first>Sakriani</first><last>Sakti</last></author>
-      <author><first>Tomoki</first><last>Toda</last></author>
+      <author id="tomoki-toda"><first>Tomoki</first><last>Toda</last></author>
       <author><first>Satoshi</first><last>Nakamura</last></author>
       <doi>10.1162/tacl_a_00159</doi>
       <abstract>We propose a new method for semantic parsing of ambiguous and ungrammatical input, such as search queries. We do so by building on an existing semantic parsing framework that uses synchronous context free grammars (SCFG) to jointly model the input sentence and output meaning representation. We generalize this SCFG framework to allow not one, but multiple outputs. Using this formalism, we construct a grammar that takes an ambiguous input string and jointly maps it into both a meaning representation and a natural language paraphrase that is less ambiguous than the original input. This paraphrase can be used to disambiguate the meaning representation via verification using a language model that calculates the probability of each paraphrase.</abstract>
@@ -483,7 +483,7 @@
     </paper>
     <paper id="42">
       <title>Parsing Algebraic Word Problems into Equations</title>
-      <author><first>Rik</first><last>Koncel-Kedziorski</last></author>
+      <author id="rik-koncel-kedziorski"><first>Rik</first><last>Koncel-Kedziorski</last></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last></author>
       <author><first>Ashish</first><last>Sabharwal</last></author>
       <author><first>Oren</first><last>Etzioni</last></author>
diff --git a/data/xml/Q16.xml b/data/xml/Q16.xml
index a54bbd369b..187dab1e18 100644
--- a/data/xml/Q16.xml
+++ b/data/xml/Q16.xml
@@ -64,7 +64,7 @@
     <paper id="5">
       <title>An Empirical Analysis of Formality in Online Communication</title>
       <author><first>Ellie</first><last>Pavlick</last></author>
-      <author><first>Joel</first><last>Tetreault</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
       <doi>10.1162/tacl_a_00083</doi>
       <abstract>This paper presents an empirical study of linguistic formality. We perform an analysis of humans’ perceptions of formality in four different genres. These findings are used to develop a statistical model for predicting formality, which is evaluated under different feature settings and genres. We apply our model to an investigation of formality in online discussion forums, and present findings consistent with theories of formality and linguistic coordination.</abstract>
       <pages>61–74</pages>
@@ -96,7 +96,7 @@
     <paper id="8">
       <title>Adapting to All Domains at Once: Rewarding Domain Invariance in <fixed-case>SMT</fixed-case></title>
       <author><first>Hoang</first><last>Cuong</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <author><first>Ivan</first><last>Titov</last></author>
       <doi>10.1162/tacl_a_00086</doi>
       <abstract>Existing work on domain adaptation for statistical machine translation has consistently assumed access to a small sample from the test distribution (target domain) at training time. In practice, however, the target domain may not be known at training time or it may change to match user needs. In such situations, it is natural to push the system to make safer choices, giving higher preference to domain-invariant translations, which work well across domains, over risky domain-specific alternatives. We encode this intuition by (1) inducing latent subdomains from the training data only; (2) introducing features which measure how specialized phrases are to individual induced sub-domains; (3) estimating feature weights on out-of-domain data (rather than on the target domain). We conduct experiments on three language pairs and a number of different domains. We observe consistent improvements over a baseline which does not explicitly reward domain invariance.</abstract>
@@ -106,9 +106,9 @@
     </paper>
     <paper id="9">
       <title>A Joint Model for Answer Sentence Ranking and Answer Extraction</title>
-      <author><first>Md Arafat</first><last>Sultan</last></author>
+      <author id="md-arafat-sultan"><first>Md Arafat</first><last>Sultan</last></author>
       <author><first>Vittorio</first><last>Castelli</last></author>
-      <author><first>Radu</first><last>Florian</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
       <doi>10.1162/tacl_a_00087</doi>
       <abstract>Answer sentence ranking and answer extraction are two key challenges in question answering that have traditionally been treated in isolation, i.e., as independent tasks. In this article, we (1) explain how both tasks are related at their core by a common quantity, and (2) propose a simple and intuitive joint probabilistic model that addresses both via joint computation but task-specific application of that quantity. In our experiments with two TREC datasets, our joint model substantially outperforms state-of-the-art systems in both tasks.</abstract>
       <pages>113–125</pages>
@@ -119,10 +119,10 @@
       <title>Transforming Dependency Structures to Logical Forms for Semantic Parsing</title>
       <author><first>Siva</first><last>Reddy</last></author>
       <author><first>Oscar</first><last>Täckström</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <author><first>Tom</first><last>Kwiatkowski</last></author>
       <author><first>Dipanjan</first><last>Das</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <author><first>Mirella</first><last>Lapata</last></author>
       <doi>10.1162/tacl_a_00088</doi>
       <abstract>The strongly typed syntax of grammar formalisms such as CCG, TAG, LFG and HPSG offers a synchronous framework for deriving syntactic structures and semantic logical forms. In contrast—partly due to the lack of a strong type system—dependency structures are easy to annotate and have become a widely used form of syntactic analysis for many languages. However, the lack of a type system makes a formal mechanism for deriving logical forms from dependency structures challenging. We address this by introducing a robust system based on the lambda calculus for deriving neo-Davidsonian logical forms from dependency trees. These logical forms are then used for semantic parsing of natural language to Freebase. Experiments on the Free917 and Web-Questions datasets show that our representation is superior to the original dependency trees and that it outperforms a CCG-based representation on this task. Compared to prior work, we obtain the strongest result to date on Free917 and competitive results on WebQuestions.</abstract>
@@ -155,7 +155,7 @@
       <author><first>Keisuke</first><last>Sakaguchi</last></author>
       <author><first>Courtney</first><last>Napoles</last></author>
       <author><first>Matt</first><last>Post</last></author>
-      <author><first>Joel</first><last>Tetreault</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
       <doi>10.1162/tacl_a_00091</doi>
       <abstract>The field of grammatical error correction (GEC) has grown substantially in recent years, with research directed at both evaluation metrics and improved system performance against those metrics. One unvisited assumption, however, is the reliance of GEC evaluation on error-coded corpora, which contain specific labeled corrections. We examine current practices and show that GEC’s reliance on such corpora unnaturally constrains annotation and automatic evaluation, resulting in (a) sentences that do not sound acceptable to native speakers and (b) system rankings that do not correlate with human judgments. In light of this, we propose an alternate approach that jettisons costly error coding in favor of unannotated, whole-sentence rewrites. We compare the performance of existing metrics over different gold-standard annotations, and show that automatic evaluation with our new annotation scheme has very strong correlation with expert rankings (ρ = 0.82). As a result, we advocate for a fundamental and necessary shift in the goal of GEC, from correcting small, labeled error types, to producing text that has native fluency.</abstract>
       <pages>169–182</pages>
@@ -207,7 +207,7 @@
     <paper id="18">
       <title>Unsupervised Part-Of-Speech Tagging with Anchor Hidden <fixed-case>M</fixed-case>arkov Models</title>
       <author><first>Karl</first><last>Stratos</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <author><first>Daniel</first><last>Hsu</last></author>
       <doi>10.1162/tacl_a_00096</doi>
       <abstract>We tackle unsupervised part-of-speech (POS) tagging by learning hidden Markov models (HMMs) that are particularly well-suited for the problem. These HMMs, which we call anchor HMMs, assume that each tag is associated with at least one word that can have no other tag, which is a relatively benign condition for POS tagging (e.g., “the” is a word that appears only under the determiner tag). We exploit this assumption and extend the non-negative matrix factorization framework of Arora et al. (2013) to design a consistent estimator for anchor HMMs. In experiments, our algorithm is competitive with strong baselines such as the clustering method of Brown et al. (1992) and the log-linear model of Berg-Kirkpatrick et al. (2010). Furthermore, it produces an interpretable model in which hidden states are automatically lexicalized by words.</abstract>
@@ -218,7 +218,7 @@
     <paper id="19">
       <title><fixed-case>ABCNN</fixed-case>: Attention-Based Convolutional Neural Network for Modeling Sentence Pairs</title>
       <author><first>Wenpeng</first><last>Yin</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <author><first>Bing</first><last>Xiang</last></author>
       <author><first>Bowen</first><last>Zhou</last></author>
       <doi>10.1162/tacl_a_00097</doi>
@@ -230,7 +230,7 @@
     </paper>
     <paper id="20">
       <title>Word Embeddings as Metric Recovery in Semantic Spaces</title>
-      <author><first>Tatsunori B.</first><last>Hashimoto</last></author>
+      <author id="tatsunori-b-hashimoto"><first>Tatsunori B.</first><last>Hashimoto</last></author>
       <author><first>David</first><last>Alvarez-Melis</last></author>
       <author><first>Tommi S.</first><last>Jaakkola</last></author>
       <doi>10.1162/tacl_a_00098</doi>
@@ -251,12 +251,12 @@
     </paper>
     <paper id="22">
       <title>Multilingual Projection for Parsing Truly Low-Resource Languages</title>
-      <author><first>Željko</first><last>Agić</last></author>
-      <author><first>Anders</first><last>Johannsen</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
-      <author><first>Héctor</first><last>Martínez Alonso</last></author>
+      <author id="zeljko-agic"><first>Željko</first><last>Agić</last></author>
+      <author id="anders-johannsen"><first>Anders</first><last>Johannsen</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
+      <author id="hector-martinez-alonso"><first>Héctor</first><last>Martínez Alonso</last></author>
       <author><first>Natalie</first><last>Schluter</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <doi>10.1162/tacl_a_00100</doi>
       <abstract>We propose a novel approach to cross-lingual part-of-speech tagging and dependency parsing for truly low-resource languages. Our annotation projection-based approach yields tagging and parsing models for over 100 languages. All that is needed are freely available parallel texts, and taggers and parsers for resource-rich languages. The empirical evaluation across 30 test languages shows that our method consistently provides top-level accuracies, close to established upper bounds, and outperforms several competitive baselines.</abstract>
       <pages>301–312</pages>
@@ -349,7 +349,7 @@
       <title>Encoding Prior Knowledge with Eigenword Embeddings</title>
       <author><first>Dominique</first><last>Osborne</last></author>
       <author><first>Shashi</first><last>Narayan</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <doi>10.1162/tacl_a_00108</doi>
       <abstract>Canonical correlation analysis (CCA) is a method for reducing the dimension of data represented using two views. It has been previously used to derive word embeddings, where one view indicates a word, and the other view indicates its context. We describe a way to incorporate prior knowledge into CCA, give a theoretical justification for it, and test it by deriving word embeddings and evaluating them on a myriad of datasets.</abstract>
       <pages>417–430</pages>
@@ -361,8 +361,8 @@
       <author><first>Waleed</first><last>Ammar</last></author>
       <author><first>George</first><last>Mulcaire</last></author>
       <author><first>Miguel</first><last>Ballesteros</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <doi>10.1162/tacl_a_00109</doi>
       <abstract>We train one multilingual model for dependency parsing and use it to parse sentences in several languages. The parsing model uses (i) multilingual word clusters and embeddings; (ii) token-level language information; and (iii) language-specific features (fine-grained POS tags). This input representation enables the parser not only to parse effectively in multiple languages, but also to generalize across languages based on linguistic universals and typological similarities, making it more effective to learn from limited annotations. Our parser’s performance compares favorably to strong baselines in a range of data scenarios, including when the target language has a large treebank, a small treebank, or no treebank for training.</abstract>
       <pages>431–444</pages>
@@ -396,8 +396,8 @@
       <title>Fast, Small and Exact: Infinite-order Language Modelling with Compressed Suffix Trees</title>
       <author><first>Ehsan</first><last>Shareghi</last></author>
       <author><first>Matthias</first><last>Petri</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <doi>10.1162/tacl_a_00112</doi>
       <abstract>Efficient methods for storing and querying are critical for scaling high-order m-gram language models to large corpora. We propose a language model based on compressed suffix trees, a representation that is highly compact and can be easily held in memory, while supporting queries needed in computing language model probabilities on-the-fly. We present several optimisations which improve query runtimes up to 2500×, despite only incurring a modest increase in construction time and memory usage. For large corpora and high Markov orders, our method is highly competitive with the state-of-the-art KenLM package. It imposes much lower memory requirements, often by orders of magnitude, and has runtimes that are either similar (for training) or comparable (for querying).</abstract>
       <pages>477–490</pages>
@@ -408,7 +408,7 @@
     <paper id="35">
       <title>The Galactic Dependencies Treebanks: Getting More Data by Synthesizing New Languages</title>
       <author><first>Dingquan</first><last>Wang</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <doi>10.1162/tacl_a_00113</doi>
       <abstract>We release Galactic Dependencies 1.0—a large set of synthetic languages not found on Earth, but annotated in Universal Dependencies format. This new resource aims to provide training and development data for NLP methods that aim to adapt to unfamiliar languages. Each synthetic treebank is produced from a real treebank by stochastically permuting the dependents of nouns and/or verbs to match the word order of other real languages. We discuss the usefulness, realism, parsability, perplexity, and diversity of the synthetic languages. As a simple demonstration of the use of Galactic Dependencies, we consider single-source transfer, which attempts to parse a real target language using a parser trained on a “nearby” source language. We find that including synthetic source languages somewhat increases the diversity of the source pool, which significantly improves results for most target languages.</abstract>
       <pages>491–505</pages>
@@ -419,7 +419,7 @@
     <paper id="36">
       <title>Minimally Supervised Number Normalization</title>
       <author><first>Kyle</first><last>Gorman</last></author>
-      <author><first>Richard</first><last>Sproat</last></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last></author>
       <doi>10.1162/tacl_a_00114</doi>
       <abstract>We propose two models for verbalizing numbers, a key component in speech recognition and synthesis systems. The first model uses an end-to-end recurrent neural network. The second model, drawing inspiration from the linguistics literature, uses finite-state transducers constructed with a minimal amount of training data. While both models achieve near-perfect performance, the latter model can be trained using several orders of magnitude less data than the former, making it particularly useful for low-resource languages.</abstract>
       <pages>507–519</pages>
@@ -451,9 +451,9 @@
     </paper>
     <paper id="39">
       <title>Utilizing Temporal Information for Taxonomy Construction</title>
-      <author><first>Luu Anh</first><last>Tuan</last></author>
+      <author id="luu-anh-tuan"><first>Luu Anh</first><last>Tuan</last></author>
       <author><first>Siu Cheung</first><last>Hui</last></author>
-      <author><first>See Kiong</first><last>Ng</last></author>
+      <author id="see-kiong-ng"><first>See Kiong</first><last>Ng</last></author>
       <doi>10.1162/tacl_a_00117</doi>
       <abstract>Taxonomies play an important role in many applications by organizing domain knowledge into a hierarchy of ‘is-a’ relations between terms. Previous work on automatic construction of taxonomies from text documents either ignored temporal information or used fixed time periods to discretize the time series of documents. In this paper, we propose a time-aware method to automatically construct and effectively maintain a taxonomy from a given series of documents preclustered for a domain of interest. The method extracts temporal information from the documents and uses a timestamp contribution function to score the temporal relevance of the evidence from source texts when identifying the taxonomic relations for constructing the taxonomy. Experimental results show that our proposed method outperforms the state-of-the-art methods by increasing F-measure up to 7%–20%. Furthermore, the proposed method can incrementally update the taxonomy by adding fresh relations from new data and removing outdated relations using an information decay function. It thus avoids rebuilding the whole taxonomy from scratch for every update and keeps the taxonomy effectively up-to-date in order to track the latest information trends in the rapidly evolving domain.</abstract>
       <pages>551–564</pages>
diff --git a/data/xml/Q17.xml b/data/xml/Q17.xml
index 6db3ae34f0..3417f659fd 100644
--- a/data/xml/Q17.xml
+++ b/data/xml/Q17.xml
@@ -32,10 +32,10 @@
     </paper>
     <paper id="2">
       <title>Visually Grounded and Textual Semantic Models Differentially Decode Brain Activity Associated with Concrete and Abstract Nouns</title>
-      <author><first>Andrew J.</first><last>Anderson</last></author>
+      <author id="andrew-j-anderson"><first>Andrew J.</first><last>Anderson</last></author>
       <author><first>Douwe</first><last>Kiela</last></author>
       <author><first>Stephen</first><last>Clark</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <doi>10.1162/tacl_a_00043</doi>
       <abstract>Important advances have recently been made using computational semantic models to decode brain activity patterns associated with concepts; however, this work has almost exclusively focused on concrete nouns. How well these models extend to decoding abstract nouns is largely unknown. We address this question by applying state-of-the-art computational models to decode functional Magnetic Resonance Imaging (fMRI) activity patterns, elicited by participants reading and imagining a diverse set of both concrete and abstract nouns. One of the models we use is linguistic, exploiting the recent word2vec skipgram approach trained on Wikipedia. The second is visually grounded, using deep convolutional neural networks trained on Google Images. Dual coding theory considers concrete concepts to be encoded in the brain both linguistically and visually, and abstract concepts only linguistically. Splitting the fMRI data according to human concreteness ratings, we indeed observe that both models significantly decode the most concrete nouns; however, accuracy is significantly greater using the text-based models for the most abstract nouns. More generally this confirms that current computational models are sufficiently advanced to assist in investigating the representational structure of abstract concepts in the brain.</abstract>
       <pages>17–30</pages>
@@ -48,7 +48,7 @@
       <author><first>Ashutosh</first><last>Modi</last></author>
       <author><first>Ivan</first><last>Titov</last></author>
       <author><first>Vera</first><last>Demberg</last></author>
-      <author><first>Asad</first><last>Sayeed</last></author>
+      <author id="asad-sayeed"><first>Asad</first><last>Sayeed</last></author>
       <author><first>Manfred</first><last>Pinkal</last></author>
       <doi>10.1162/tacl_a_00044</doi>
       <abstract>Recent research in psycholinguistics has provided increasing evidence that humans predict upcoming content. Prediction also affects perception and might be a key to robustness in human language processing. In this paper, we investigate the factors that affect human prediction by building a computational model that can predict upcoming discourse referents based on linguistic knowledge alone vs. linguistic knowledge jointly with common-sense knowledge in the form of scripts. We find that script knowledge significantly improves model estimates of human predictions. In a second study, we test the highly controversial hypothesis that predictability influences referring expression type but do not find evidence for such an effect.</abstract>
@@ -70,7 +70,7 @@
     <paper id="5">
       <title>A Polynomial-Time Dynamic Programming Algorithm for Phrase-Based Decoding with a Fixed Distortion Limit</title>
       <author><first>Yin-Wen</first><last>Chang</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <doi>10.1162/tacl_a_00046</doi>
       <abstract>Decoding of phrase-based translation models in the general case is known to be NP-complete, by a reduction from the traveling salesman problem (Knight, 1999). In practice, phrase-based systems often impose a hard distortion limit that limits the movement of phrases during translation. However, the impact on complexity after imposing such a constraint is not well studied. In this paper, we describe a dynamic programming algorithm for phrase-based decoding with a fixed distortion limit. The runtime of the algorithm is O(nd!lhd+1) where n is the sentence length, d is the distortion limit, l is a bound on the number of phrases starting at any position in the sentence, and h is related to the maximum number of target language translations for any source word. The algorithm makes use of a novel representation that gives a new perspective on decoding of phrase-based models.</abstract>
       <pages>59–71</pages>
@@ -84,7 +84,7 @@
       <author><first>Richard</first><last>Futrell</last></author>
       <author><first>Adam</first><last>Albright</last></author>
       <author><first>Peter</first><last>Graff</last></author>
-      <author><first>Timothy J.</first><last>O’Donnell</last></author>
+      <author id="timothy-odonnell"><first>Timothy J.</first><last>O’Donnell</last></author>
       <doi>10.1162/tacl_a_00047</doi>
       <abstract>We present a probabilistic model of phonotactics, the set of well-formed phoneme sequences in a language. Unlike most computational models of phonotactics (Hayes and Wilson, 2008; Goldsmith and Riggle, 2012), we take a fully generative approach, modeling a process where forms are built up out of subparts by phonologically-informed structure building operations. We learn an inventory of subparts by applying stochastic memoization (Johnson et al., 2007; Goodman et al., 2008) to a generative process for phonemes structured as an and-or graph, based on concepts of feature hierarchy from generative phonology (Clements, 1985; Dresher, 2009). Subparts are combined in a way that allows tier-based feature interactions. We evaluate our models’ ability to capture phonotactic distributions in the lexicons of 14 languages drawn from the WOLEX corpus (Graff, 2012). Our full model robustly assigns higher probabilities to held-out forms than a sophisticated N-gram model for all languages. We also present novel analyses that probe model behavior in more detail.</abstract>
       <pages>73–86</pages>
@@ -112,7 +112,7 @@
       <author><first>Hoifung</first><last>Poon</last></author>
       <author><first>Chris</first><last>Quirk</last></author>
       <author><first>Kristina</first><last>Toutanova</last></author>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <doi>10.1162/tacl_a_00049</doi>
       <abstract>Past work in relation extraction has focused on binary relations in single sentences. Recent NLP inroads in high-value domains have sparked interest in the more general setting of extracting n-ary relations that span multiple sentences. In this paper, we explore a general relation extraction framework based on graph long short-term memory networks (graph LSTMs) that can be easily extended to cross-sentence n-ary relation extraction. The graph formulation provides a unified way of exploring different LSTM approaches and incorporating various intra-sentential and inter-sentential dependencies, such as sequential, syntactic, and discourse relations. A robust contextual representation is learned for the entities, which serves as input to the relation classifier. This simplifies handling of relations with arbitrary arity, and enables multi-task learning with related relations. We evaluate this framework in two important precision medicine settings, demonstrating its effectiveness with both conventional supervised learning and distant supervision. Cross-sentence extraction produced larger knowledge bases. and multi-task learning significantly improved extraction accuracy. A thorough analysis of various LSTM approaches yielded useful insight the impact of linguistic analysis on extraction accuracy.</abstract>
       <pages>101–115</pages>
@@ -123,8 +123,8 @@
     <paper id="9">
       <title>Automatically Tagging Constructions of Causation and Their Slot-Fillers</title>
       <author><first>Jesse</first><last>Dunietz</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
       <doi>10.1162/tacl_a_00050</doi>
       <abstract>This paper explores extending shallow semantic parsing beyond lexical-unit triggers, using causal relations as a test case. Semantic parsing becomes difficult in the face of the wide variety of linguistic realizations that causation can take on. We therefore base our approach on the concept of constructions from the linguistic paradigm known as Construction Grammar (CxG). In CxG, a construction is a form/function pairing that can rely on arbitrary linguistic and semantic features. Rather than codifying all aspects of each construction’s form, as some attempts to employ CxG in NLP have done, we propose methods that offload that problem to machine learning. We describe two supervised approaches for tagging causal constructions and their arguments. Both approaches combine automatically induced pattern-matching rules with statistical classifiers that learn the subtler parameters of the constructions. Our results show that these approaches are promising: they significantly outperform naïve baselines for both construction recognition and cause and effect head matches.</abstract>
       <pages>117–133</pages>
@@ -134,9 +134,9 @@
     <paper id="10">
       <title>Enriching Word Vectors with Subword Information</title>
       <author><first>Piotr</first><last>Bojanowski</last></author>
-      <author><first>Edouard</first><last>Grave</last></author>
+      <author id="edouard-grave"><first>Edouard</first><last>Grave</last></author>
       <author><first>Armand</first><last>Joulin</last></author>
-      <author><first>Tomas</first><last>Mikolov</last></author>
+      <author id="tomas-mikolov"><first>Tomas</first><last>Mikolov</last></author>
       <doi>10.1162/tacl_a_00051</doi>
       <abstract>Continuous word representations, trained on large unlabeled corpora are useful for many natural language processing tasks. Popular models that learn such representations ignore the morphology of words, by assigning a distinct vector to each word. This is a limitation, especially for languages with large vocabularies and many rare words. In this paper, we propose a new approach based on the skipgram model, where each word is represented as a bag of character n-grams. A vector representation is associated to each character n-gram; words being represented as the sum of these representations. Our method is fast, allowing to train models on large corpora quickly and allows us to compute word representations for words that did not appear in the training data. We evaluate our word representations on nine different languages, both on word similarity and analogy tasks. By comparing to recently proposed morphological word representations, we show that our vectors achieve state-of-the-art performance on these tasks.</abstract>
       <pages>135–146</pages>
@@ -147,7 +147,7 @@
     <paper id="11">
       <title>Fine-Grained Prediction of Syntactic Typology: Discovering Latent Structure with Supervised Learning</title>
       <author><first>Dingquan</first><last>Wang</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <doi>10.1162/tacl_a_00052</doi>
       <abstract>We show how to predict the basic word-order facts of a novel language given only a corpus of part-of-speech (POS) sequences. We predict how often direct objects follow their verbs, how often adjectives follow their nouns, and in general the directionalities of all dependency relations. Such typological properties could be helpful in grammar induction. While such a problem is usually regarded as unsupervised learning, our innovation is to treat it as supervised learning, using a large collection of realistic synthetic languages as training data. The supervised learner must identify surface features of a language’s POS sequence (hand-engineered or neural features) that correlate with the language’s deeper structure (latent trees). In the experiment, we show: 1) Given a small set of real languages, it helps to add many synthetic languages to the training data. 2) Our system is robust even when the POS sequences include noise. 3) Our system on this task outperforms a grammar induction baseline by a large margin.</abstract>
       <pages>147–161</pages>
@@ -182,7 +182,7 @@
       <title>Joint Modeling of Topics, Citations, and Topical Authority in Academic Corpora</title>
       <author><first>Jooyeon</first><last>Kim</last></author>
       <author><first>Dongwoo</first><last>Kim</last></author>
-      <author><first>Alice</first><last>Oh</last></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last></author>
       <doi>10.1162/tacl_a_00055</doi>
       <abstract>Much of scientific progress stems from previously published findings, but searching through the vast sea of scientific publications is difficult. We often rely on metrics of scholarly authority to find the prominent authors but these authority indices do not differentiate authority based on research topics. We present Latent Topical-Authority Indexing (LTAI) for jointly modeling the topics, citations, and topical authority in a corpus of academic papers. Compared to previous models, LTAI differs in two main aspects. First, it explicitly models the generative process of the citations, rather than treating the citations as given. Second, it models each author’s influence on citations of a paper based on the topics of the cited papers, as well as the citing papers. We fit LTAI into four academic corpora: CORA, Arxiv Physics, PNAS, and Citeseer. We compare the performance of LTAI against various baselines, starting with the latent Dirichlet allocation, to the more advanced models including author-link topic model and dynamic author citation topic model. The results show that LTAI achieves improved accuracy over other similar models when predicting words, citations and authors of publications.</abstract>
       <pages>191–204</pages>
@@ -192,11 +192,11 @@
     </paper>
     <paper id="15">
       <title>Pushing the Limits of Translation Quality Estimation</title>
-      <author><first>André F. T.</first><last>Martins</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
       <author><first>Marcin</first><last>Junczys-Dowmunt</last></author>
-      <author><first>Fabio N.</first><last>Kepler</last></author>
-      <author><first>Ramón</first><last>Astudillo</last></author>
-      <author><first>Chris</first><last>Hokamp</last></author>
+      <author id="fabio-kepler"><first>Fabio N.</first><last>Kepler</last></author>
+      <author id="ramon-fernandez-astudillo"><first>Ramón</first><last>Astudillo</last></author>
+      <author id="chris-hokamp"><first>Chris</first><last>Hokamp</last></author>
       <author><first>Roman</first><last>Grundkiewicz</last></author>
       <doi>10.1162/tacl_a_00056</doi>
       <abstract>Translation quality estimation is a task of growing importance in NLP, due to its potential to reduce post-editing human effort in disruptive ways. However, this potential is currently limited by the relatively low accuracy of existing systems. In this paper, we achieve remarkable improvements by exploiting synergies between the related tasks of word-level quality estimation and automatic post-editing. First, we stack a new, carefully engineered, neural model into a rich feature-based word-level quality estimation system. Then, we use the output of an automatic post-editing system as an extra feature, obtaining striking results on WMT16: a word-level FMULT1 score of 57.47% (an absolute gain of +7.95% over the current state of the art), and a Pearson correlation score of 65.56% for sentence-level HTER prediction (an absolute gain of +13.36%).</abstract>
@@ -220,7 +220,7 @@
     </paper>
     <paper id="17">
       <title>Domain-Targeted, High Precision Knowledge Extraction</title>
-      <author><first>Bhavana</first><last>Dalvi Mishra</last></author>
+      <author id="bhavana-dalvi"><first>Bhavana</first><last>Dalvi Mishra</last></author>
       <author><first>Niket</first><last>Tandon</last></author>
       <author><first>Peter</first><last>Clark</last></author>
       <doi>10.1162/tacl_a_00058</doi>
@@ -243,7 +243,7 @@
     <paper id="19">
       <title>Learning to Prune: Exploring the Frontier of Fast and Accurate Parsing</title>
       <author><first>Tim</first><last>Vieira</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <doi>10.1162/tacl_a_00060</doi>
       <abstract>Pruning hypotheses during dynamic programming is commonly used to speed up inference in settings such as parsing. Unlike prior work, we train a pruning policy under an objective that measures end-to-end performance: we search for a fast and accurate policy. This poses a difficult machine learning problem, which we tackle with the lols algorithm. lols training must continually compute the effects of changing pruning decisions: we show how to make this efficient in the constituency parsing setting, via dynamic programming and change propagation algorithms. We find that optimizing end-to-end performance in this way leads to a better Pareto frontier—i.e., parsers which are more accurate for a given runtime.</abstract>
       <pages>263–278</pages>
@@ -254,7 +254,7 @@
     <paper id="20">
       <title>Cross-Lingual Syntactic Transfer with Limited Resources</title>
       <author><first>Mohammad Sadegh</first><last>Rasooli</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <doi>10.1162/tacl_a_00061</doi>
       <abstract>We describe a simple but effective method for cross-lingual syntactic transfer of dependency parsers, in the scenario where a large amount of translation data is not available. This method makes use of three steps: 1) a method for deriving cross-lingual word clusters, which can then be used in a multilingual parser; 2) a method for transferring lexical information from a target language to source language treebanks; 3) a method for integrating these steps with the density-driven annotation projection method of Rasooli and Collins (2015). Experiments show improvements over the state-of-the-art in several languages used in previous work, in a setting where the only source of translation data is the Bible, a considerably smaller corpus than the Europarl corpus used in previous work. Results using the Europarl corpus as a source of translation data show additional improvements over the results of Rasooli and Collins (2015). We conclude with results on 38 datasets from the Universal Dependencies corpora.</abstract>
       <pages>279–293</pages>
@@ -280,9 +280,9 @@
       <author><first>Diarmuid</first><last>Ó Séaghdha</last></author>
       <author><first>Ira</first><last>Leviant</last></author>
       <author><first>Roi</first><last>Reichart</last></author>
-      <author><first>Milica</first><last>Gašić</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gašić</last></author>
       <author><first>Anna</first><last>Korhonen</last></author>
-      <author><first>Steve</first><last>Young</last></author>
+      <author id="steve-young"><first>Steve</first><last>Young</last></author>
       <doi>10.1162/tacl_a_00063</doi>
       <abstract>We present Attract-Repel, an algorithm for improving the semantic quality of word vectors by injecting constraints extracted from lexical resources. Attract-Repel facilitates the use of constraints from mono- and cross-lingual resources, yielding semantically specialized cross-lingual vector spaces. Our evaluation shows that the method can make use of existing cross-lingual lexicons to construct high-quality vector spaces for a plethora of different languages, facilitating semantic transfer from high- to lower-resource ones. The effectiveness of our approach is demonstrated with state-of-the-art results on semantic similarity datasets in six languages. We next show that Attract-Repel-specialized vectors boost performance in the downstream task of dialogue state tracking (DST) across multiple languages. Finally, we show that cross-lingual vector spaces produced by our algorithm facilitate the training of multilingual DST models, which brings further performance improvements.</abstract>
       <pages>309–324</pages>
@@ -293,7 +293,7 @@
       <title>Colors in Context: A Pragmatic Neural Model for Grounded Language Understanding</title>
       <author><first>Will</first><last>Monroe</last></author>
       <author><first>Robert X.D.</first><last>Hawkins</last></author>
-      <author><first>Noah D.</first><last>Goodman</last></author>
+      <author id="noah-goodman"><first>Noah D.</first><last>Goodman</last></author>
       <author><first>Christopher</first><last>Potts</last></author>
       <doi>10.1162/tacl_a_00064</doi>
       <abstract>We present a model of pragmatic referring expression interpretation in a grounded communication task (identifying colors from descriptions) that draws upon predictions from two recurrent neural network classifiers, a speaker and a listener, unified by a recursive pragmatic reasoning framework. Experiments show that this combined pragmatic model interprets color descriptions more accurately than the classifiers from which it is built, and that much of this improvement results from combining the speaker and listener perspectives. We observe that pragmatic reasoning helps primarily in the hardest cases: when the model must distinguish very similar colors, or when few utterances adequately express the target color. Our findings make use of a newly-collected corpus of human utterances in color reference games, which exhibit a variety of pragmatic behaviors. We also show that the embedded speaker model reproduces many of these pragmatic behaviors.</abstract>
@@ -306,7 +306,7 @@
       <title><fixed-case>G</fixed-case>oogle’s Multilingual Neural Machine Translation System: Enabling Zero-Shot Translation</title>
       <author><first>Melvin</first><last>Johnson</last></author>
       <author><first>Mike</first><last>Schuster</last></author>
-      <author><first>Quoc V.</first><last>Le</last></author>
+      <author id="quoc-le"><first>Quoc V.</first><last>Le</last></author>
       <author><first>Maxim</first><last>Krikun</last></author>
       <author><first>Yonghui</first><last>Wu</last></author>
       <author><first>Zhifeng</first><last>Chen</last></author>
@@ -384,7 +384,7 @@
     <paper id="30">
       <title>Evaluating Low-Level Speech Features Against Human Perceptual Data</title>
       <author><first>Caitlin</first><last>Richter</last></author>
-      <author><first>Naomi H.</first><last>Feldman</last></author>
+      <author id="naomi-feldman"><first>Naomi H.</first><last>Feldman</last></author>
       <author><first>Harini</first><last>Salgado</last></author>
       <author><first>Aren</first><last>Jansen</last></author>
       <doi>10.1162/tacl_a_00071</doi>
@@ -408,7 +408,7 @@
       <title>Unsupervised Acquisition of Comprehensive Multiword Lexicons using Competition in an n-gram Lattice</title>
       <author><first>Julian</first><last>Brooke</last></author>
       <author><first>Jan</first><last>Šnajder</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <doi>10.1162/tacl_a_00073</doi>
       <abstract>We present a new model for acquiring comprehensive multiword lexicons from large corpora based on competition among n-gram candidates. In contrast to the standard approach of simple ranking by association measure, in our model n-grams are arranged in a lattice structure based on subsumption and overlap relationships, with nodes inhibiting other nodes in their vicinity when they are selected as a lexical item. We show how the configuration of such a lattice can be optimized tractably, and demonstrate using annotations of sampled n-grams that our method consistently outperforms alternatives by at least 0.05 F-score across several corpora and languages.</abstract>
       <pages>455–470</pages>
diff --git a/data/xml/Q18.xml b/data/xml/Q18.xml
index 2e363410f8..5056929662 100644
--- a/data/xml/Q18.xml
+++ b/data/xml/Q18.xml
@@ -19,7 +19,7 @@
     <paper id="1">
       <title>Whodunnit? Crime Drama as a Case for Natural Language Understanding</title>
       <author><first>Lea</first><last>Frermann</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <author><first>Mirella</first><last>Lapata</last></author>
       <doi>10.1162/tacl_a_00001</doi>
       <abstract>In this paper we argue that crime drama exemplified in television programs such as CSI: Crime Scene Investigation is an ideal testbed for approximating real-world natural language understanding and the complex inferences associated with it. We propose to treat crime drama as a new inference task, capitalizing on the fact that each episode poses the same basic question (i.e., who committed the crime) and naturally provides the answer when the perpetrator is revealed. We develop a new dataset based on CSI episodes, formalize perpetrator identification as a sequence labeling problem, and develop an LSTM-based model which learns from multi-modal data. Experimental results show that an incremental inference strategy is key to making accurate guesses as well as learning from representations fusing textual, visual, and acoustic input.</abstract>
@@ -42,7 +42,7 @@
     <paper id="3">
       <title>Joint Semantic Synthesis and Morphological Analysis of the Derived Word</title>
       <author><first>Ryan</first><last>Cotterell</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <doi>10.1162/tacl_a_00003</doi>
       <abstract>Much like sentences are composed of words, words themselves are composed of smaller units. For example, the English word questionably can be analyzed as question+able+ly. However, this structural decomposition of the word does not directly give us a semantic representation of the word’s meaning. Since morphology obeys the principle of compositionality, the semantics of the word can be systematically derived from the meaning of its parts. In this work, we propose a novel probabilistic model of word formation that captures both the analysis of a word w into its constituent segments and the synthesis of the meaning of w from the meanings of those segments. Our model jointly learns to segment words into morphemes and compose distributional semantic vectors of those morphemes. We experiment with the model on English CELEX data and German DErivBase (Zeller et al., 2013) data. We show that jointly modeling semantics increases both segmentation accuracy and morpheme F1 by between 3% and 5%. Additionally, we investigate different models of vector composition, showing that recurrent neural networks yield an improvement over simple additive models. Finally, we study the degree to which the representations correspond to a linguist’s notion of morphological productivity.</abstract>
       <pages>33–48</pages>
@@ -88,7 +88,7 @@
       <title>Towards Evaluating Narrative Quality In Student Writing</title>
       <author><first>Swapna</first><last>Somasundaran</last></author>
       <author><first>Michael</first><last>Flor</last></author>
-      <author><first>Martin</first><last>Chodorow</last></author>
+      <author id="martin-chodorow"><first>Martin</first><last>Chodorow</last></author>
       <author><first>Hillary</first><last>Molloy</last></author>
       <author><first>Binod</first><last>Gyawali</last></author>
       <author><first>Laura</first><last>McCulla</last></author>
@@ -113,7 +113,7 @@
     <paper id="9">
       <title>Conversation Modeling on <fixed-case>R</fixed-case>eddit Using a Graph-Structured <fixed-case>LSTM</fixed-case></title>
       <author><first>Victoria</first><last>Zayats</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
       <doi>10.1162/tacl_a_00009</doi>
       <abstract>This paper presents a novel approach for modeling threaded discussions on social media using a graph-structured bidirectional LSTM (long-short term memory) which represents both hierarchical and temporal conversation structure. In experiments with a task of predicting popularity of comments in Reddit discussions, the proposed model outperforms a node-independent architecture for different sets of input features. Analyses show a benefit to the model over the full course of the discussion, improving detection in both early and late stages. Further, the use of language cues with the bidirectional tree state updates helps with identifying controversial comments.</abstract>
       <pages>121–132</pages>
@@ -124,7 +124,7 @@
     <paper id="10">
       <title>Learning Representations Specialized in Spatial Knowledge: Leveraging Language and Vision</title>
       <author><first>Guillem</first><last>Collell</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <doi>10.1162/tacl_a_00010</doi>
       <abstract>Spatial understanding is crucial in many real-world problems, yet little progress has been made towards building representations that capture spatial knowledge. Here, we move one step forward in this direction and learn such representations by leveraging a task consisting in predicting continuous 2D spatial arrangements of objects given object-relationship-object instances (e.g., “cat under chair”) and a simple neural network model that learns the task from annotated images. We show that the model succeeds in this task and, furthermore, that it is capable of predicting correct spatial arrangements for unseen objects if either CNN features or word embeddings of the objects are provided. The differences between visual and linguistic features are discussed. Next, to evaluate the spatial representations learned in the previous task, we introduce a task and a dataset consisting in a set of crowdsourced human ratings of spatial similarity for object pairs. We find that both CNN (convolutional neural network) features and word embeddings predict human judgments of similarity well and that these vectors can be further specialized in spatial knowledge if we update them when training the model that predicts spatial arrangements of objects. Overall, this paper paves the way towards building distributed spatial representations, contributing to the understanding of spatial expressions in language.</abstract>
       <pages>133–144</pages>
@@ -137,8 +137,8 @@
       <author><first>Hao</first><last>Zhou</last></author>
       <author><first>Shujian</first><last>Huang</last></author>
       <author><first>Lili</first><last>Mou</last></author>
-      <author><first>Xinyu</first><last>Dai</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
+      <author id="xinyu-dai"><first>Xinyu</first><last>Dai</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
       <author><first>Zhaopeng</first><last>Tu</last></author>
       <doi>10.1162/tacl_a_00011</doi>
       <abstract>Existing neural machine translation systems do not explicitly model what has been translated and what has not during the decoding phase. To address this problem, we propose a novel mechanism that separates the source information into two parts: translated Past contents and untranslated Future contents, which are modeled by two additional recurrent layers. The Past and Future contents are fed to both the attention model and the decoder states, which provides Neural Machine Translation (NMT) systems with the knowledge of translated and untranslated contents. Experimental results show that the proposed approach significantly improves the performance in Chinese-English, German-English, and English-German translation tasks. Specifically, the proposed model outperforms the conventional coverage model in terms of both the translation quality and the alignment error rate.</abstract>
@@ -172,7 +172,7 @@
       <title>Unsupervised Word Mapping Using Structural Similarities in Monolingual Embeddings</title>
       <author><first>Hanan</first><last>Aldarmaki</last></author>
       <author><first>Mahesh</first><last>Mohan</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <doi>10.1162/tacl_a_00014</doi>
       <abstract>Most existing methods for automatic bilingual dictionary induction rely on prior alignments between the source and target languages, such as parallel corpora or seed dictionaries. For many language pairs, such supervised alignments are not readily available. We propose an unsupervised approach for learning a bilingual dictionary for a pair of languages given their independently-learned monolingual word embeddings. The proposed method exploits local and global structures in monolingual vector spaces to align them such that similar words are mapped to each other. We show empirically that the performance of bilingual correspondents that are learned using our proposed unsupervised method is comparable to that of using supervised bilingual correspondents from a seed dictionary.</abstract>
       <pages>185–196</pages>
@@ -194,7 +194,7 @@
       <title>Unsupervised Grammar Induction with Depth-bounded <fixed-case>PCFG</fixed-case></title>
       <author><first>Lifeng</first><last>Jin</last></author>
       <author><first>Finale</first><last>Doshi-Velez</last></author>
-      <author><first>Timothy</first><last>Miller</last></author>
+      <author id="timothy-miller"><first>Timothy</first><last>Miller</last></author>
       <author><first>William</first><last>Schuler</last></author>
       <author><first>Lane</first><last>Schwartz</last></author>
       <doi>10.1162/tacl_a_00016</doi>
@@ -228,7 +228,7 @@
       <title>Do latent tree learning models identify meaningful structure in sentences?</title>
       <author><first>Adina</first><last>Williams</last></author>
       <author><first>Andrew</first><last>Drozdov</last></author>
-      <author><first>Samuel R.</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel R.</first><last>Bowman</last></author>
       <doi>10.1162/tacl_a_00019</doi>
       <abstract>Recent work on the problem of latent tree learning has made it possible to train neural networks that learn to both parse a sentence and use the resulting parse to interpret the sentence, all without exposure to ground-truth parse trees at training time. Surprisingly, these models often perform better at sentence understanding tasks than models that use parse trees from conventional parsers. This paper aims to investigate what these latent tree learning models learn. We replicate two such models in a shared codebase and find that (i) only one of these models outperforms conventional tree-structured models on sentence classification, (ii) its parsing strategies are not especially consistent across random restarts, (iii) the parses it produces tend to be shallower than standard Penn Treebank (PTB) parses, and (iv) they do not resemble those of PTB or any other semantic or syntactic formalism that the authors are aware of.</abstract>
       <pages>253–267</pages>
@@ -261,9 +261,9 @@
     <paper id="22">
       <title>Leveraging Orthographic Similarity for Multilingual Neural Transliteration</title>
       <author><first>Anoop</first><last>Kunchukuttan</last></author>
-      <author><first>Mitesh</first><last>Khapra</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh</first><last>Khapra</last></author>
       <author><first>Gurneet</first><last>Singh</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <doi>10.1162/tacl_a_00022</doi>
       <abstract>We address the task of joint training of transliteration models for multiple language pairs (multilingual transliteration). This is an instance of multitask learning, where individual tasks (language pairs) benefit from sharing knowledge with related tasks. We focus on transliteration involving related tasks i.e., languages sharing writing systems and phonetic properties (orthographically similar languages). We propose a modified neural encoder-decoder model that maximizes parameter sharing across language pairs in order to effectively leverage orthographic similarity. We show that multilingual transliteration significantly outperforms bilingual transliteration in different scenarios (average increase of 58% across a variety of languages we experimented with). We also show that multilingual transliteration models can generalize well to languages/language pairs not encountered during training and hence perform well on the zeroshot transliteration task. We show that further improvements can be achieved by using phonetic feature input.</abstract>
       <pages>303–316</pages>
@@ -274,11 +274,11 @@
       <title>The <fixed-case>N</fixed-case>arrative<fixed-case>QA</fixed-case> Reading Comprehension Challenge</title>
       <author><first>Tomáš</first><last>Kočiský</last></author>
       <author><first>Jonathan</first><last>Schwarz</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <author><first>Karl Moritz</first><last>Hermann</last></author>
       <author><first>Gábor</first><last>Melis</last></author>
-      <author><first>Edward</first><last>Grefenstette</last></author>
+      <author id="edward-grefenstette"><first>Edward</first><last>Grefenstette</last></author>
       <doi>10.1162/tacl_a_00023</doi>
       <abstract>Reading comprehension (RC)—in contrast to information retrieval—requires integrating information and reasoning about events, entities, and their relations across a full document. Question answering is conventionally used to assess RC ability, in both artificial agents and children learning to read. However, existing RC datasets and tasks are dominated by questions that can be solved by selecting answers using superficial information (e.g., local context similarity or global term frequency); they thus fail to test for the essential integrative aspect of RC. To encourage progress on deeper comprehension of language, we present a new dataset and set of tasks in which the reader must answer questions about stories by reading entire books or movie scripts. These tasks are designed so that successfully answering their questions requires understanding the underlying narrative rather than relying on shallow pattern matching or salience. We show that although humans solve the tasks easily, standard RC models struggle on the tasks presented here. We provide an analysis of the dataset and the challenges it presents.</abstract>
       <pages>317–328</pages>
@@ -335,8 +335,8 @@
       <author><first>David</first><last>Jurgens</last></author>
       <author><first>Srijan</first><last>Kumar</last></author>
       <author><first>Raine</first><last>Hoover</last></author>
-      <author><first>Dan</first><last>McFarland</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-mcfarland"><first>Dan</first><last>McFarland</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <doi>10.1162/tacl_a_00028</doi>
       <abstract>Citations have long been used to characterize the state of a scientific field and to identify influential works. However, writers use citations for different purposes, and this varied purpose influences uptake by future scholars. Unfortunately, our understanding of how scholars use and frame citations has been limited to small-scale manual citation analysis of individual papers. We perform the largest behavioral study of citations to date, analyzing how scientific works frame their contributions through different types of citations and how this framing affects the field as a whole. We introduce a new dataset of nearly 2,000 citations annotated for their function, and use it to develop a state-of-the-art classifier and label the papers of an entire field: Natural Language Processing. We then show how differences in framing affect scientific uptake and reveal the evolution of the publication venues and the field as a whole. We demonstrate that authors are sensitive to discourse structure and publication venue when citing, and that how a paper frames its work through citations is predictive of the citation count it will receive. Finally, we use changes in citation framing to show that the field of NLP is undergoing a significant increase in consensus.</abstract>
       <pages>391–406</pages>
@@ -372,7 +372,7 @@
     <paper id="31">
       <title>Generating Sentences by Editing Prototypes</title>
       <author><first>Kelvin</first><last>Guu</last></author>
-      <author><first>Tatsunori B.</first><last>Hashimoto</last></author>
+      <author id="tatsunori-b-hashimoto"><first>Tatsunori B.</first><last>Hashimoto</last></author>
       <author><first>Yonatan</first><last>Oren</last></author>
       <author><first>Percy</first><last>Liang</last></author>
       <doi>10.1162/tacl_a_00030</doi>
@@ -404,7 +404,7 @@
       <author><first>Prateek</first><last>Verma</last></author>
       <author><first>Nelson</first><last>Morgan</last></author>
       <author><first>Jennifer L.</first><last>Eberhardt</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <doi>10.1162/tacl_a_00031</doi>
       <abstract>We apply computational dialog methods to police body-worn camera footage to model conversations between police officers and community members in traffic stops. Relying on the theory of institutional talk, we develop a labeling scheme for police speech during traffic stops, and a tagger to detect institutional dialog acts (Reasons, Searches, Offering Help) from transcribed text at the turn (78% F-score) and stop (89% F-score) level. We then develop speech recognition and segmentation algorithms to detect these acts at the stop level from raw camera audio (81% F-score, with even higher accuracy for crucial acts like conveying the reason for the stop). We demonstrate that the dialog structures produced by our tagger could reveal whether officers follow law enforcement norms like introducing themselves, explaining the reason for the stop, and asking permission for searches. This work may therefore inform and aid efforts to ensure the procedural justice of police-community interactions.</abstract>
       <pages>467–481</pages>
@@ -428,7 +428,7 @@
     <paper id="35">
       <title>Low-Rank <fixed-case>RNN</fixed-case> Adaptation for Context-Aware Language Modeling</title>
       <author><first>Aaron</first><last>Jaech</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
       <doi>10.1162/tacl_a_00035</doi>
       <abstract>A context-aware language model uses location, user and/or domain metadata (context) to adapt its predictions. In neural language models, context information is typically represented as an embedding and it is given to the RNN as an additional input, which has been shown to be useful in many applications. We introduce a more powerful mechanism for using context to adapt an RNN by letting the context vector control a low-rank transformation of the recurrent layer weight matrix. Experiments show that allowing a greater fraction of the model parameters to be adjusted has benefits in terms of perplexity and classification for several different types of context.</abstract>
       <pages>497–510</pages>
@@ -448,7 +448,7 @@
     <paper id="37">
       <title>Planning, Inference and Pragmatics in Sequential Language Games</title>
       <author><first>Fereshte</first><last>Khani</last></author>
-      <author><first>Noah D.</first><last>Goodman</last></author>
+      <author id="noah-goodman"><first>Noah D.</first><last>Goodman</last></author>
       <author><first>Percy</first><last>Liang</last></author>
       <doi>10.1162/tacl_a_00037</doi>
       <abstract>We study sequential language games in which two players, each with private information, communicate to achieve a common goal. In such games, a successful player must (i) infer the partner’s private information from the partner’s messages, (ii) generate messages that are most likely to help with the goal, and (iii) reason pragmatically about the partner’s strategy. We propose a model that captures all three characteristics and demonstrate their importance in capturing human behavior on a new goal-oriented dataset we collected using crowdsourcing.</abstract>
@@ -459,7 +459,7 @@
     <paper id="38">
       <title>Probabilistic Verb Selection for Data-to-Text Generation</title>
       <author><first>Dell</first><last>Zhang</last></author>
-      <author id="jiahao-yuan"><first>Jiahao</first><last>Yuan</last></author>
+      <author><first>Jiahao</first><last>Yuan</last></author>
       <author><first>Xiaoling</first><last>Wang</last></author>
       <author><first>Adam</first><last>Foster</last></author>
       <doi>10.1162/tacl_a_00038</doi>
@@ -474,7 +474,7 @@
       <author><first>Xilun</first><last>Chen</last></author>
       <author><first>Yu</first><last>Sun</last></author>
       <author><first>Ben</first><last>Athiwaratkun</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <author><first>Kilian</first><last>Weinberger</last></author>
       <doi>10.1162/tacl_a_00039</doi>
       <abstract>In recent years great success has been achieved in sentiment classification for English, thanks in part to the availability of copious annotated resources. Unfortunately, most languages do not enjoy such an abundance of labeled data. To tackle the sentiment classification problem in low-resource languages without adequate annotated data, we propose an Adversarial Deep Averaging Network (ADAN1) to transfer the knowledge learned from labeled data on a resource-rich source language to low-resource languages where only unlabeled data exist. ADAN has two discriminative branches: a sentiment classifier and an adversarial language discriminator. Both branches take input from a shared feature extractor to learn hidden representations that are simultaneously indicative for the classification task and invariant across languages. Experiments on Chinese and Arabic sentiment classification demonstrate that ADAN significantly outperforms state-of-the-art systems.</abstract>
@@ -489,8 +489,8 @@
       <author><first>Bob</first><last>Carpenter</last></author>
       <author><first>Jon</first><last>Chamberlain</last></author>
       <author><first>Dirk</first><last>Hovy</last></author>
-      <author><first>Udo</first><last>Kruschwitz</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="udo-kruschwitz"><first>Udo</first><last>Kruschwitz</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <doi>10.1162/tacl_a_00040</doi>
       <abstract>The analysis of crowdsourced annotations in natural language processing is concerned with identifying (1) gold standard labels, (2) annotator accuracies and biases, and (3) item difficulties and error patterns. Traditionally, majority voting was used for 1, and coefficients of agreement for 2 and 3. Lately, model-based analysis of corpus annotations have proven better at all three tasks. But there has been relatively little work comparing them on the same datasets. This paper aims to fill this gap by analyzing six models of annotation, covering different approaches to annotator ability, item difficulty, and parameter pooling (tying) across annotators and items. We evaluate these models along four aspects: comparison to gold labels, predictive accuracy for new annotations, annotator characterization, and item difficulty, using four datasets with varying degrees of noise in the form of random (spammy) annotators. We conclude with guidelines for model selection, application, and implementation.</abstract>
       <pages>571–585</pages>
@@ -500,7 +500,7 @@
     </paper>
     <paper id="41">
       <title>Data Statements for Natural Language Processing: Toward Mitigating System Bias and Enabling Better Science</title>
-      <author><first>Emily M.</first><last>Bender</last></author>
+      <author id="emily-m-bender"><first>Emily M.</first><last>Bender</last></author>
       <author><first>Batya</first><last>Friedman</last></author>
       <doi>10.1162/tacl_a_00041</doi>
       <abstract>In this paper, we propose data statements as a design solution and professional practice for natural language processing technologists, in both research and development. Through the adoption and widespread use of data statements, the field can begin to address critical scientific and ethical issues that result from the use of data from certain populations in the development of technology for other populations. We present a form that data statements can take and explore the implications of adopting them as part of regular practice. We argue that data statements will help alleviate issues related to exclusion and bias in language technology, lead to better precision in claims about how natural language processing research can generalize and thus better engineering results, protect companies from public embarrassment, and ultimately lead to language technology that meets its users in their own preferred linguistic style and furthermore does not misrepresent them to others.</abstract>
@@ -536,8 +536,8 @@
       <title>Integrating Weakly Supervised Word Sense Disambiguation into Neural Machine Translation</title>
       <author><first>Xiao</first><last>Pu</last></author>
       <author><first>Nikolaos</first><last>Pappas</last></author>
-      <author><first>James</first><last>Henderson</last></author>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <doi>10.1162/tacl_a_00242</doi>
       <abstract>This paper demonstrates that word sense disambiguation (WSD) can improve neural machine translation (NMT) by widening the source context considered when modeling the senses of potentially ambiguous words. We first introduce three adaptive clustering algorithms for WSD, based on k-means, Chinese restaurant processes, and random walks, which are then applied to large word contexts represented in a low-rank space and evaluated on SemEval shared-task data. We then learn word vectors jointly with sense vectors defined by our best WSD method, within a state-of-the-art NMT system. We show that the concatenation of these vectors, and the use of a sense selection mechanism based on the weighted average of sense vectors, outperforms several baselines including sense-aware ones. This is demonstrated by translation on five language pairs. The improvements are more than 1 BLEU point over strong NMT baselines, +4% accuracy over all ambiguous nouns and verbs, or +20% when scored manually over several challenging words.</abstract>
       <pages>635–649</pages>
@@ -559,7 +559,7 @@
     <paper id="46">
       <title>Surface Statistics of an Unknown Language Indicate How to Parse It</title>
       <author><first>Dingquan</first><last>Wang</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <doi>10.1162/tacl_a_00248</doi>
       <abstract>We introduce a novel framework for delexicalized dependency parsing in a new language. We show that useful features of the target language can be extracted automatically from an unparsed corpus, which consists only of gold part-of-speech (POS) sequences. Providing these features to our neural parser enables it to parse sequences like those in the corpus. Strikingly, our system has no supervision in the target language. Rather, it is a multilingual system that is trained end-to-end on a variety of other languages, so it learns a feature extractor that works well. We show experimentally across multiple languages: (1) Features computed from the unparsed corpus improve parsing accuracy. (2) Including thousands of synthetic languages in the training yields further improvement. (3) Despite being computed from unparsed corpora, our learned task-specific features beat previous work’s interpretable typological features that require parsed corpora or expert categorization of the language. Our best method improved attachment scores on held-out test languages by an average of 5.6 percentage points over past work that does not inspect the unparsed data (McDonald et al., 2011), and by 20.7 points over past “grammar induction” work that does not use training languages (Naseem et al., 2010).</abstract>
       <pages>667–685</pages>
@@ -569,7 +569,7 @@
     <paper id="47">
       <title>Attentive Convolution: Equipping <fixed-case>CNN</fixed-case>s with <fixed-case>RNN</fixed-case>-style Attention Mechanisms</title>
       <author><first>Wenpeng</first><last>Yin</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <doi>10.1162/tacl_a_00249</doi>
       <abstract>In NLP, convolutional neural networks (CNNs) have benefited less than recurrent neural networks (RNNs) from attention mechanisms. We hypothesize that this is because the attention in CNNs has been mainly implemented as attentive pooling (i.e., it is applied to pooling) rather than as attentive convolution (i.e., it is integrated into convolution). Convolution is the differentiator of CNNs in that it can powerfully model the higher-level representation of a word by taking into account its local fixed-size context in the input text tx. In this work, we propose an attentive convolution network, ATTCONV. It extends the context scope of the convolution operation, deriving higher-level features for a word not only from local context, but also from information extracted from nonlocal context by the attention mechanism commonly used in RNNs. This nonlocal context can come (i) from parts of the input text tx that are distant or (ii) from extra (i.e., external) contexts ty. Experiments on sentence modeling with zero-context (sentiment analysis), single-context (textual entailment) and multiple-context (claim verification) demonstrate the effectiveness of ATTCONV in sentence representation learning with the incorporation of context. In particular, attentive convolution outperforms attentive pooling and is a strong competitor to popular attentive RNNs.1</abstract>
       <pages>687–702</pages>
@@ -579,12 +579,12 @@
     <paper id="48">
       <title>Learning Typed Entailment Graphs with Global Soft Constraints</title>
       <author><first>Mohammad Javad</first><last>Hosseini</last></author>
-      <author><first>Nathanael</first><last>Chambers</last></author>
+      <author id="nathanael-chambers"><first>Nathanael</first><last>Chambers</last></author>
       <author><first>Siva</first><last>Reddy</last></author>
       <author><first>Xavier R.</first><last>Holt</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <author><first>Mark</first><last>Johnson</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <doi>10.1162/tacl_a_00250</doi>
       <abstract>This paper presents a new method for learning typed entailment graphs from text. We extract predicate-argument structures from multiple-source news corpora, and compute local distributional similarity scores to learn entailments between predicates with typed arguments (e.g., person contracted disease). Previous work has used transitivity constraints to improve local decisions, but these constraints are intractable on large graphs. We instead propose a scalable method that learns globally consistent similarity scores based on new soft constraints that consider both the structures across typed entailment graphs and inside each graph. Learning takes only a few hours to run over 100K predicates and our results show large improvements over local similarity scores on two entailment data sets. We further show improvements over paraphrases and entailments from the Paraphrase Database, and prior state-of-the-art entailment graphs. We show that the entailment graphs improve performance in a downstream task.</abstract>
       <pages>703–717</pages>
diff --git a/data/xml/Q19.xml b/data/xml/Q19.xml
index 9beb2729d4..d2b703a620 100644
--- a/data/xml/Q19.xml
+++ b/data/xml/Q19.xml
@@ -55,7 +55,7 @@
     <paper id="4">
       <title>Analysis Methods in Neural Language Processing: A Survey</title>
       <author><first>Yonatan</first><last>Belinkov</last></author>
-      <author><first>James</first><last>Glass</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
       <doi>10.1162/tacl_a_00254</doi>
       <abstract>The field of natural language processing has seen impressive progress in recent years, with neural network models replacing many of the traditional systems. A plethora of new models have been proposed, many of which are thought to be opaque compared to their feature-rich counterparts. This has led researchers to analyze, interpret, and evaluate neural networks in novel and more fine-grained ways. In this survey paper, we review analysis methods in neural language processing, categorize them according to prominent research trends, highlight existing limitations, and point to potential directions for future work.</abstract>
       <pages>49–72</pages>
@@ -65,8 +65,8 @@
     <paper id="5">
       <title>Unlexicalized Transition-based Discontinuous Constituency Parsing</title>
       <author><first>Maximin</first><last>Coavoux</last></author>
-      <author><first>Benoît</first><last>Crabbé</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="benoit-crabbe"><first>Benoît</first><last>Crabbé</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <doi>10.1162/tacl_a_00255</doi>
       <abstract>Lexicalized parsing models are based on the assumptions that (i) constituents are organized around a lexical head and (ii) bilexical statistics are crucial to solve ambiguities. In this paper, we introduce an unlexicalized transition-based parser for discontinuous constituency structures, based on a structure-label transition system and a bi-LSTM scoring system. We compare it with lexicalized parsing models in order to address the question of lexicalization in the context of discontinuous constituency parsing. Our experiments show that unlexicalized models systematically achieve higher results than lexicalized models, and provide additional empirical evidence that lexicalization is not necessary to achieve strong parsing results. Our best unlexicalized model sets a new state of the art on English and German discontinuous constituency treebanks. We further provide a per-phenomenon analysis of its errors on discontinuous constituents.</abstract>
       <pages>73–89</pages>
@@ -77,7 +77,7 @@
       <title>Synchronous Bidirectional Neural Machine Translation</title>
       <author><first>Long</first><last>Zhou</last></author>
       <author><first>Jiajun</first><last>Zhang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <doi>10.1162/tacl_a_00256</doi>
       <abstract>Existing approaches to neural machine translation (NMT) generate the target language sequence token-by-token from left to right. However, this kind of unidirectional decoding framework cannot make full use of the target-side future contexts which can be produced in a right-to-left decoding direction, and thus suffers from the issue of unbalanced outputs. In this paper, we introduce a synchronous bidirectional–neural machine translation (SB-NMT) that predicts its outputs using left-to-right and right-to-left decoding simultaneously and interactively, in order to leverage both of the history and future information at the same time. Specifically, we first propose a new algorithm that enables synchronous bidirectional decoding in a single model. Then, we present an interactive decoding model in which left-to-right (right-to-left) generation does not only depend on its previously generated outputs, but also relies on future contexts predicted by right-to-left (left-to-right) decoding. We extensively evaluate the proposed SB-NMT model on large-scale NIST Chinese–English, WMT14 English–German, and WMT18 Russian–English translation tasks. Experimental results demonstrate that our model achieves significant improvements over the strong Transformer model by 3.92, 1.49, and 1.04 BLEU points, respectively, and obtains the state-of-the-art performance on Chinese–English and English–German translation tasks.</abstract>
       <pages>91–105</pages>
@@ -102,7 +102,7 @@
       <title>Rotational Unit of Memory: A Novel Representation Unit for <fixed-case>RNN</fixed-case>s with Scalable Applications</title>
       <author><first>Rumen</first><last>Dangovski</last></author>
       <author><first>Li</first><last>Jing</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Mićo</first><last>Tatalović</last></author>
       <author><first>Marin</first><last>Soljačić</last></author>
       <doi>10.1162/tacl_a_00258</doi>
@@ -114,7 +114,7 @@
     <paper id="9">
       <title><fixed-case>GILE</fixed-case>: A Generalized Input-Label Embedding for Text Classification</title>
       <author><first>Nikolaos</first><last>Pappas</last></author>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <doi>10.1162/tacl_a_00259</doi>
       <abstract>Neural text classification models typically treat output labels as categorical variables that lack description and semantics. This forces their parametrization to be dependent on the label set size, and, hence, they are unable to scale to large label sets and generalize to unseen ones. Existing joint input-label text models overcome these issues by exploiting label descriptions, but they are unable to capture complex label relationships, have rigid parametrization, and their gains on unseen labels happen often at the expense of weak performance on the labels seen during training. In this paper, we propose a new input-label model that generalizes over previous such models, addresses their limitations, and does not compromise performance on seen labels. The model consists of a joint nonlinear input-label embedding with controllable capacity and a joint-space-dependent classification unit that is trained with cross-entropy loss to optimize classification performance. We evaluate models on full-resource and low- or zero-resource text classification of multilingual news and biomedical text with a large label set. Our model outperforms monolingual and multilingual models that do not leverage label semantics and previous joint input-label space models in both scenarios.</abstract>
       <pages>139–155</pages>
@@ -162,7 +162,7 @@
     <paper id="13">
       <title>Categorical Metadata Representation for Customized Text Classification</title>
       <author><first>Jihyeok</first><last>Kim</last></author>
-      <author><first>Reinald Kim</first><last>Amplayo</last></author>
+      <author id="reinald-kim-amplayo"><first>Reinald Kim</first><last>Amplayo</last></author>
       <author><first>Kyungjae</first><last>Lee</last></author>
       <author><first>Sua</first><last>Sung</last></author>
       <author><first>Minji</first><last>Seo</last></author>
@@ -181,7 +181,7 @@
       <author><first>Jianshu</first><last>Chen</last></author>
       <author><first>Dong</first><last>Yu</last></author>
       <author><first>Yejin</first><last>Choi</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <doi>10.1162/tacl_a_00264</doi>
       <abstract>We present DREAM, the first dialogue-based multiple-choice reading comprehension data set. Collected from English as a Foreign Language examinations designed by human experts to evaluate the comprehension level of Chinese learners of English, our data set contains 10,197 multiple-choice questions for 6,444 dialogues. In contrast to existing reading comprehension data sets, DREAM is the first to focus on in-depth multi-turn multi-party dialogue understanding. DREAM is likely to present significant challenges for existing reading comprehension systems: 84% of answers are non-extractive, 85% of questions require reasoning beyond a single sentence, and 34% of questions also involve commonsense knowledge. We apply several popular neural reading comprehension models that primarily exploit surface information within the text and find them to, at best, just barely outperform a rule-based approach. We next investigate the effects of incorporating dialogue structure and different kinds of general world knowledge into both rule-based and (neural and non-neural) machine learning-based reading comprehension models. Experimental results on the DREAM data set show the effectiveness of dialogue structure and general world knowledge. DREAM is available at <url>https://dataset.org/dream/</url>.</abstract>
       <pages>217–231</pages>
@@ -204,7 +204,7 @@
       <title><fixed-case>C</fixed-case>o<fixed-case>QA</fixed-case>: A Conversational Question Answering Challenge</title>
       <author><first>Siva</first><last>Reddy</last></author>
       <author><first>Danqi</first><last>Chen</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <doi>10.1162/tacl_a_00266</doi>
       <abstract>Humans gather information through conversations involving a series of interconnected questions and answers. For machines to assist in information gathering, it is therefore essential to enable them to answer conversational questions. We introduce CoQA, a novel dataset for building Conversational Question Answering systems. Our dataset contains 127k questions with answers, obtained from 8k conversations about text passages from seven diverse domains. The questions are conversational, and the answers are free-form text with their corresponding evidence highlighted in the passage. We analyze CoQA in depth and show that conversational questions have challenging phenomena not present in existing reading comprehension datasets (e.g., coreference and pragmatic reasoning). We evaluate strong dialogue and reading comprehension models on CoQA. The best system obtains an F1 score of 65.4%, which is 23.4 points behind human performance (88.8%), indicating that there is ample room for improvement. We present CoQA as a challenge to the community at <url>https://stanfordnlp.github.io/coqa</url>.</abstract>
       <pages>249–266</pages>
@@ -253,7 +253,7 @@
       <author><first>Matthias</first><last>Sperber</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <doi>10.1162/tacl_a_00270</doi>
       <abstract>Speech translation has traditionally been approached through cascaded models consisting of a speech recognizer trained on a corpus of transcribed speech, and a machine translation system trained on parallel texts. Several recent works have shown the feasibility of collapsing the cascade into a single, direct model that can be trained in an end-to-end fashion on a corpus of translated speech. However, experiments are inconclusive on whether the cascade or the direct model is stronger, and have only been conducted under the unrealistic assumption that both are trained on equal amounts of data, ignoring other available speech recognition and machine translation corpora. In this paper, we demonstrate that direct speech translation models require more data to perform well than cascaded models, and although they allow including auxiliary data through multi-task training, they are poor at exploiting such data, putting them at a severe disadvantage. As a remedy, we propose the use of end- to-end trainable models with two attention mechanisms, the first establishing source speech to source text alignments, the second modeling source to target text alignment. We show that such models naturally decompose into multi-task–trainable recognition and translation tasks and propose an attention-passing technique that alleviates error propagation issues in a previous formulation of a model with two attention stages. Our proposed model outperforms all examined baselines and is able to exploit auxiliary training data much more effectively than direct attentional models.</abstract>
       <pages>313–325</pages>
@@ -266,7 +266,7 @@
       <author><first>Ryan</first><last>Cotterell</last></author>
       <author><first>Christo</first><last>Kirov</last></author>
       <author><first>Mans</first><last>Hulden</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <doi>10.1162/tacl_a_00271</doi>
       <abstract>We quantify the linguistic complexity of different languages’ morphological systems. We verify that there is a statistically significant empirical trade-off between paradigm size and irregularity: A language’s inflectional paradigms may be either large in size or highly irregular, but never both. We define a new measure of paradigm irregularity based on the conditional entropy of the surface realization of a paradigm— how hard it is to jointly predict all the word forms in a paradigm from the lemma. We estimate irregularity by training a predictive model. Our measurements are taken on large morphological paradigms from 36 typologically diverse languages.</abstract>
       <pages>327–342</pages>
@@ -288,7 +288,7 @@
       <title>A Generative Model for Punctuation in Dependency Trees</title>
       <author><first>Xiang Lisa</first><last>Li</last></author>
       <author><first>Dingquan</first><last>Wang</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <doi>10.1162/tacl_a_00273</doi>
       <abstract>Treebanks traditionally treat punctuation marks as ordinary words, but linguists have suggested that a tree’s “true” punctuation marks are not observed (Nunberg, 1990). These latent “underlying” marks serve to delimit or separate constituents in the syntax tree. When the tree’s yield is rendered as a written sentence, a string rewriting mechanism transduces the underlying marks into “surface” marks, which are part of the observed (surface) string but should not be regarded as part of the tree. We formalize this idea in a generative model of punctuation that admits efficient dynamic programming. We train it without observing the underlying marks, by locally maximizing the incomplete data likelihood (similarly to the EM algorithm). When we use the trained model to reconstruct the tree’s underlying punctuation, the results appear plausible across 5 languages, and in particular are consistent with Nunberg’s analysis of English. We show that our generative model can be used to beat baselines on punctuation restoration. Also, our reconstruction of a sentence’s underlying punctuation lets us appropriately render the surface punctuation (via our trained underlying-to-surface mechanism) when we syntactically transform the sentence.</abstract>
       <pages>357–373</pages>
@@ -345,7 +345,7 @@
       <author><first>Corina</first><last>Dima</last></author>
       <author><first>Daniël</first><last>de Kok</last></author>
       <author><first>Neele</first><last>Witte</last></author>
-      <author><first>Erhard</first><last>Hinrichs</last></author>
+      <author id="erhard-hinrichs"><first>Erhard</first><last>Hinrichs</last></author>
       <doi>10.1162/tacl_a_00275</doi>
       <abstract>Composition models of distributional semantics are used to construct phrase representations from the representations of their words. Composition models are typically situated on two ends of a spectrum. They either have a small number of parameters but compose all phrases in the same way, or they perform word-specific compositions at the cost of a far larger number of parameters. In this paper we propose transformation weighting (TransWeight), a composition model that consistently outperforms existing models on nominal compounds, adjective-noun phrases, and adverb-adjective phrases in English, German, and Dutch. TransWeight drastically reduces the number of parameters needed compared with the best model in the literature by composing similar words in the same way.</abstract>
       <pages>437–451</pages>
@@ -358,8 +358,8 @@
       <author><first>Tom</first><last>Kwiatkowski</last></author>
       <author><first>Jennimaria</first><last>Palomaki</last></author>
       <author><first>Olivia</first><last>Redfield</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
-      <author><first>Ankur</first><last>Parikh</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
+      <author id="ankur-parikh"><first>Ankur</first><last>Parikh</last></author>
       <author><first>Chris</first><last>Alberti</last></author>
       <author><first>Danielle</first><last>Epstein</last></author>
       <author><first>Illia</first><last>Polosukhin</last></author>
@@ -371,7 +371,7 @@
       <author><first>Ming-Wei</first><last>Chang</last></author>
       <author><first>Andrew M.</first><last>Dai</last></author>
       <author><first>Jakob</first><last>Uszkoreit</last></author>
-      <author><first>Quoc</first><last>Le</last></author>
+      <author id="quoc-le"><first>Quoc</first><last>Le</last></author>
       <author><first>Slav</first><last>Petrov</last></author>
       <doi>10.1162/tacl_a_00276</doi>
       <abstract>We present the Natural Questions corpus, a question answering data set. Questions consist of real anonymized, aggregated queries issued to the Google search engine. An annotator is presented with a question along with a Wikipedia page from the top 5 search results, and annotates a long answer (typically a paragraph) and a short answer (one or more entities) if present on the page, or marks null if no long/short answer is present. The public release consists of 307,373 training examples with single annotations; 7,830 examples with 5-way annotations for development data; and a further 7,842 examples with 5-way annotated sequestered as test data. We present experiments validating quality of the data. We also describe analysis of 25-way annotations on 302 examples, giving insights into human variability on the annotation task. We introduce robust metrics for the purposes of evaluating question answering systems; demonstrate high human upper bounds on these metrics; and establish baseline results using competitive methods drawn from related literature.</abstract>
@@ -383,7 +383,7 @@
     <paper id="33">
       <title>Tabula Nearly Rasa: Probing the Linguistic Knowledge of Character-level Neural Language Models Trained on Unsegmented Text</title>
       <author><first>Michael</first><last>Hahn</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <doi>10.1162/tacl_a_00283</doi>
       <abstract>Recurrent neural networks (RNNs) have reached striking performance in many natural language processing tasks. This has renewed interest in whether these generic sequence processing devices are inducing genuine linguistic knowledge. Nearly all current analytical studies, however, initialize the RNNs with a vocabulary of known words, and feed them tokenized input during training. We present a multi-lingual study of the linguistic knowledge encoded in RNNs trained as character-level language models, on input data with word boundaries removed. These networks face a tougher and more cognitively realistic task, having to discover any useful linguistic unit from scratch based on input statistics. The results show that our “near tabula rasa” RNNs are mostly able to solve morphological, syntactic and semantic tasks that intuitively presuppose word-level knowledge, and indeed they learned, to some extent, to track word boundaries. Our study opens the door to speculations about the necessity of an explicit, rigid word lexicon in language learning and usage.</abstract>
       <pages>467–484</pages>
@@ -393,7 +393,7 @@
     <paper id="34">
       <title>Graph Convolutional Network with Sequential Attention for Goal-Oriented Dialogue Systems</title>
       <author><first>Suman</first><last>Banerjee</last></author>
-      <author><first>Mitesh M.</first><last>Khapra</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh M.</first><last>Khapra</last></author>
       <doi>10.1162/tacl_a_00284</doi>
       <abstract>Domain-specific goal-oriented dialogue systems typically require modeling three types of inputs, namely, (i) the knowledge-base associated with the domain, (ii) the history of the conversation, which is a sequence of utterances, and (iii) the current utterance for which the response needs to be generated. While modeling these inputs, current state-of-the-art models such as Mem2Seq typically ignore the rich structure inherent in the knowledge graph and the sentences in the conversation context. Inspired by the recent success of structure-aware Graph Convolutional Networks (GCNs) for various NLP tasks such as machine translation, semantic role labeling, and document dating, we propose a memory-augmented GCN for goal-oriented dialogues. Our model exploits (i) the entity relation graph in a knowledge-base and (ii) the dependency graph associated with an utterance to compute richer representations for words and entities. Further, we take cognizance of the fact that in certain situations, such as when the conversation is in a code-mixed language, dependency parsers may not be available. We show that in such situations we could use the global word co-occurrence graph to enrich the representations of utterances. We experiment with four datasets: (i) the modified DSTC2 dataset, (ii) recently released code-mixed versions of DSTC2 dataset in four languages, (iii) Wizard-of-Oz style CAM676 dataset, and (iv) Wizard-of-Oz style MultiWOZ dataset. On all four datasets our method outperforms existing methods, on a wide range of evaluation metrics.</abstract>
       <pages>485–500</pages>
@@ -425,7 +425,7 @@
       <title>Measuring Online Debaters’ Persuasive Skill from Text over Time</title>
       <author><first>Kelvin</first><last>Luu</last></author>
       <author><first>Chenhao</first><last>Tan</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <doi>10.1162/tacl_a_00281</doi>
       <abstract>Online debates allow people to express their persuasive abilities and provide exciting opportunities for understanding persuasion. Prior studies have focused on studying persuasion in debate content, but without accounting for each debater’s history or exploring the progression of a debater’s persuasive ability. We study debater skill by modeling how participants progress over time in a collection of debates from Debate.org. We build on a widely used model of skill in two-player games and augment it with linguistic features of a debater’s content. We show that online debaters’ skill levels do tend to improve over time. Incorporating linguistic profiles leads to more robust skill estimation than winning records alone. Notably, we find that an interaction feature combining uncertainty cues (hedging) with terms strongly associated with either side of a particular debate (fightin’ words) is more predictive than either feature on its own, indicating the importance of fine- grained linguistic features.</abstract>
       <pages>537–550</pages>
@@ -435,8 +435,8 @@
     <paper id="32">
       <title>Enabling Robust Grammatical Error Correction in New Domains: Data Sets, Metrics, and Analyses</title>
       <author><first>Courtney</first><last>Napoles</last></author>
-      <author><first>Maria</first><last>Nădejde</last></author>
-      <author><first>Joel</first><last>Tetreault</last></author>
+      <author id="maria-nadejde"><first>Maria</first><last>Nădejde</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
       <doi>10.1162/tacl_a_00282</doi>
       <abstract>Until now, grammatical error correction (GEC) has been primarily evaluated on text written by non-native English speakers, with a focus on student essays. This paper enables GEC development on text written by native speakers by providing a new data set and metric. We present a multiple-reference test corpus for GEC that includes 4,000 sentences in two new domains (formal and informal writing by native English speakers) and 2,000 sentences from a diverse set of non-native student writing. We also collect human judgments of several GEC systems on this new test set and perform a meta-evaluation, assessing how reliable automatic metrics are across these domains. We find that commonly used GEC metrics have inconsistent performance across domains, and therefore we propose a new ensemble metric that is robust on all three domains of text.</abstract>
       <pages>551–566</pages>
@@ -490,7 +490,7 @@
       <title>Neural Network Acceptability Judgments</title>
       <author><first>Alex</first><last>Warstadt</last></author>
       <author><first>Amanpreet</first><last>Singh</last></author>
-      <author><first>Samuel R.</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel R.</first><last>Bowman</last></author>
       <doi>10.1162/tacl_a_00290</doi>
       <abstract>This paper investigates the ability of artificial neural networks to judge the grammatical acceptability of a sentence, with the goal of testing their linguistic competence. We introduce the Corpus of Linguistic Acceptability (CoLA), a set of 10,657 English sentences labeled as grammatical or ungrammatical from published linguistics literature. As baselines, we train several recurrent neural network models on acceptability classification, and find that our models outperform unsupervised models by Lau et al. (2016) on CoLA. Error-analysis on specific grammatical phenomena reveals that both Lau et al.’s models and ours learn systematic generalizations like subject-verb-object order. However, all models we test perform far below human level on a wide range of grammatical constructions.</abstract>
       <pages>625–641</pages>
diff --git a/data/xml/R09.xml b/data/xml/R09.xml
index 64f20bf275..e894ca6fd4 100644
--- a/data/xml/R09.xml
+++ b/data/xml/R09.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the International Conference <fixed-case>RANLP</fixed-case>-2009</booktitle>
       <url hash="2365410e">R09-1</url>
       <editor><first>Galia</first><last>Angelova</last></editor>
-      <editor><first>Ruslan</first><last>Mitkov</last></editor>
+      <editor id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Borovets, Bulgaria</address>
       <month>September</month>
@@ -27,7 +27,7 @@
     <paper id="2">
       <title>Summary Generation for Toponym-referenced Images using Object Type Language Models</title>
       <author><first>Ahmet</first><last>Aker</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <pages>6–11</pages>
       <url hash="e7b3d360">R09-1002</url>
       <bibkey>aker-gaizauskas-2009-summary</bibkey>
@@ -35,17 +35,17 @@
     <paper id="3">
       <title>Prepositional Phrase Attachment in Shallow Parsing</title>
       <author><first>Vincent</first><last>Van Asch</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>12–17</pages>
       <url hash="3224abf9">R09-1003</url>
       <bibkey>van-asch-daelemans-2009-prepositional</bibkey>
     </paper>
     <paper id="4">
       <title>A Comparative Study of Open Domain and Opinion Question Answering Systems for Factual and Opinionated Queries</title>
-      <author><first>Alexandra</first><last>Balahur</last></author>
+      <author id="alexandra-balahur"><first>Alexandra</first><last>Balahur</last></author>
       <author><first>Ester</first><last>Boldrini</last></author>
-      <author><first>Andrés</first><last>Montoyo</last></author>
-      <author><first>Patricio</first><last>Martínez-Barco</last></author>
+      <author id="andres-montoyo"><first>Andrés</first><last>Montoyo</last></author>
+      <author id="patricio-martinez-barco"><first>Patricio</first><last>Martínez-Barco</last></author>
       <pages>18–22</pages>
       <url hash="747176ce">R09-1004</url>
       <bibkey>balahur-etal-2009-comparative</bibkey>
@@ -60,7 +60,7 @@
     <paper id="6">
       <title>Unsupervised Knowledge Extraction for Taxonomies of Concepts from <fixed-case>W</fixed-case>ikipedia</title>
       <author><first>Eduard</first><last>Barbu</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>28–32</pages>
       <url hash="c1a8cb57">R09-1006</url>
       <bibkey>barbu-poesio-2009-unsupervised</bibkey>
@@ -68,7 +68,7 @@
     <paper id="7">
       <title>Exploring Treebank Transformations in Dependency Parsing</title>
       <author><first>Kepa</first><last>Bengoetxea</last></author>
-      <author><first>Koldo</first><last>Gojenola</last></author>
+      <author id="koldo-gojenola"><first>Koldo</first><last>Gojenola</last></author>
       <pages>33–38</pages>
       <url hash="34701fef">R09-1007</url>
       <bibkey>bengoetxea-gojenola-2009-exploring</bibkey>
@@ -92,7 +92,7 @@
       <title>Cross-Linguistic Sentiment Analysis: From <fixed-case>E</fixed-case>nglish to <fixed-case>S</fixed-case>panish</title>
       <author><first>Julian</first><last>Brooke</last></author>
       <author><first>Milan</first><last>Tofiloski</last></author>
-      <author><first>Maite</first><last>Taboada</last></author>
+      <author id="maite-taboada"><first>Maite</first><last>Taboada</last></author>
       <pages>50–54</pages>
       <url hash="b0faca10">R09-1010</url>
       <bibkey>brooke-etal-2009-cross</bibkey>
@@ -108,16 +108,16 @@
     <paper id="12">
       <title>Combining Finite State and Corpus-based Techniques for Unknown Word Prediction</title>
       <author><first>Kostadin</first><last>Cholakov</last></author>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <pages>60–64</pages>
       <url hash="ff539e33">R09-1012</url>
       <bibkey>cholakov-van-noord-2009-combining</bibkey>
     </paper>
     <paper id="13">
       <title>Prototype-based Active Learning for Lemmatization</title>
-      <author><first>Walter</first><last>Daelemans</last></author>
-      <author><first>Hendrik J.</first><last>Groenewald</last></author>
-      <author><first>Gerhard B.</first><last>van Huyssteen</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
+      <author id="hendrik-johannes-groenewald"><first>Hendrik J.</first><last>Groenewald</last></author>
+      <author id="gerhard-b-van-huyssteen"><first>Gerhard B.</first><last>van Huyssteen</last></author>
       <pages>65–70</pages>
       <url hash="2be9df57">R09-1013</url>
       <bibkey>daelemans-etal-2009-prototype</bibkey>
@@ -142,7 +142,7 @@
     <paper id="16">
       <title>Singular Value Decomposition for Feature Selection in Taxonomy Learning</title>
       <author><first>Francesca</first><last>Fallucchi</last></author>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last></author>
       <pages>82–87</pages>
       <url hash="7cf0a19c">R09-1016</url>
       <bibkey>fallucchi-zanzotto-2009-singular</bibkey>
@@ -164,7 +164,7 @@
     </paper>
     <paper id="19">
       <title>Exploiting the Use of Prior Probabilities for Passage Retrieval in Question Answering</title>
-      <author><first>Surya</first><last>Ganesh</last></author>
+      <author id="surya-ganesh"><first>Surya</first><last>Ganesh</last></author>
       <author><first>Vasudeva</first><last>Varma</last></author>
       <pages>99–102</pages>
       <url hash="c3b7c7b6">R09-1019</url>
@@ -172,7 +172,7 @@
     </paper>
     <paper id="20">
       <title>Exploiting Structure and Content of <fixed-case>W</fixed-case>ikipedia for Query Expansion in the Context</title>
-      <author><first>Surya</first><last>Ganesh</last></author>
+      <author id="surya-ganesh"><first>Surya</first><last>Ganesh</last></author>
       <author><first>Vasudeva</first><last>Varma</last></author>
       <pages>103–106</pages>
       <url hash="5a1a7437">R09-1020</url>
@@ -189,10 +189,10 @@
     <paper id="22">
       <title>Feature-Rich Named Entity Recognition for <fixed-case>B</fixed-case>ulgarian Using Conditional Random Fields</title>
       <author><first>Georgi</first><last>Georgiev</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Kuzman</first><last>Ganchev</last></author>
       <author><first>Petya</first><last>Osenova</last></author>
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <pages>113–117</pages>
       <url hash="517275c7">R09-1022</url>
       <bibkey>georgiev-etal-2009-feature</bibkey>
@@ -207,15 +207,15 @@
     <paper id="24">
       <title>Learning to Identify Educational Materials</title>
       <author><first>Samer</first><last>Hassan</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>123–127</pages>
       <url hash="1bf8ff8f">R09-1024</url>
       <bibkey>hassan-mihalcea-2009-learning</bibkey>
     </paper>
     <paper id="25">
       <title>Lexicalized Semi-incremental Dependency Parsing</title>
-      <author><first>Hany</first><last>Hassan</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="hany-hassan-awadalla"><first>Hany</first><last>Hassan</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <pages>128–134</pages>
       <url hash="4db74068">R09-1025</url>
@@ -246,7 +246,7 @@
     <paper id="29">
       <title>Detection of Opinions and Facts. A Cognitive Approach</title>
       <author><first>Yann Vigile</first><last>Hoareau</last></author>
-      <author><first>Adil</first><last>El-Ghali</last></author>
+      <author id="adil-el-ghali"><first>Adil</first><last>El-Ghali</last></author>
       <author><first>Charles</first><last>Tijus</last></author>
       <pages>150–154</pages>
       <url hash="a45a49ce">R09-1029</url>
@@ -254,9 +254,9 @@
     </paper>
     <paper id="30">
       <title>Evaluating the Impact of Morphosyntactic Ambiguity in Grammatical Error Detection</title>
-      <author><first>Arantza</first><last>Díaz de Ilarraza</last></author>
-      <author><first>Koldo</first><last>Gojenola</last></author>
-      <author><first>Maite</first><last>Oronoz</last></author>
+      <author id="arantza-diaz-de-ilarraza"><first>Arantza</first><last>Díaz de Ilarraza</last></author>
+      <author id="koldo-gojenola"><first>Koldo</first><last>Gojenola</last></author>
+      <author id="maite-oronoz"><first>Maite</first><last>Oronoz</last></author>
       <pages>155–160</pages>
       <url hash="e8b50d57">R09-1030</url>
       <bibkey>diaz-de-ilarraza-etal-2009-evaluating</bibkey>
@@ -271,7 +271,7 @@
     <paper id="32">
       <title>Cross-document Event Extraction and Tracking: Task, Evaluation, Techniques and Challenges</title>
       <author><first>Heng</first><last>Ji</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <author><first>Zheng</first><last>Chen</last></author>
       <author><first>Prashant</first><last>Gupta</last></author>
       <pages>166–172</pages>
@@ -281,7 +281,7 @@
     <paper id="33">
       <title>Co-Parsing with Competitive Models</title>
       <author><first>Lidia</first><last>Khmylko</last></author>
-      <author><first>Kilian A.</first><last>Foth</last></author>
+      <author id="kilian-a-foth"><first>Kilian A.</first><last>Foth</last></author>
       <author><first>Wolfgang</first><last>Menzel</last></author>
       <pages>173–179</pages>
       <url hash="f3e6ea9f">R09-1033</url>
@@ -314,7 +314,7 @@
     </paper>
     <paper id="37">
       <title>Semi-Supervised Learning for Word Sense Disambiguation: Quality vs. Quantity</title>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <author><first>Desislava</first><last>Zhekova</last></author>
       <pages>197–202</pages>
       <url hash="f02d782a">R09-1037</url>
@@ -322,7 +322,7 @@
     </paper>
     <paper id="38">
       <title>Treelex Meets Adjectival Tables</title>
-      <author><first>Anna</first><last>Kupść</last></author>
+      <author id="anna-kupsc"><first>Anna</first><last>Kupść</last></author>
       <pages>203–207</pages>
       <url hash="bd0cbe87">R09-1038</url>
       <bibkey>kupsc-2009-treelex</bibkey>
@@ -330,7 +330,7 @@
     <paper id="39">
       <title>Integrating <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et and <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et using a Knowledge-based Word Sense Disambiguation Algorithm</title>
       <author><first>Egoitz</first><last>Laparra</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <pages>208–213</pages>
       <url hash="6cbd08db">R09-1039</url>
       <bibkey>laparra-rigau-2009-integrating</bibkey>
@@ -345,9 +345,9 @@
     </paper>
     <paper id="41">
       <title>Using Semantic Networks to Identify Temporal Expressions from Semantic Roles</title>
-      <author><first>Hector</first><last>Llorens</last></author>
-      <author><first>Borja</first><last>Navarro</last></author>
-      <author><first>Estela</first><last>Saquete</last></author>
+      <author id="hector-llorens"><first>Hector</first><last>Llorens</last></author>
+      <author id="borja-navarro"><first>Borja</first><last>Navarro</last></author>
+      <author id="estela-saquete"><first>Estela</first><last>Saquete</last></author>
       <pages>219–224</pages>
       <url hash="57bb366c">R09-1041</url>
       <bibkey>llorens-etal-2009-using</bibkey>
@@ -355,7 +355,7 @@
     <paper id="42">
       <title>The Design of an Experiment in Anaphora Resolution for Referring Expressions Generation</title>
       <author><first>Diego Jesus</first><last>de Lucena</last></author>
-      <author><first>Ivandré</first><last>Paraboni</last></author>
+      <author id="ivandre-paraboni"><first>Ivandré</first><last>Paraboni</last></author>
       <pages>225–229</pages>
       <url hash="675a0eaa">R09-1042</url>
       <bibkey>de-lucena-paraboni-2009-design</bibkey>
@@ -396,7 +396,7 @@
     <paper id="47">
       <title>Diacritization for Real-World <fixed-case>A</fixed-case>rabic Texts</title>
       <author><first>Emad</first><last>Mohamed</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <pages>251–257</pages>
       <url hash="024586e6">R09-1047</url>
       <bibkey>mohamed-kubler-2009-diacritization</bibkey>
@@ -404,7 +404,7 @@
     <paper id="48">
       <title>Multi-entity Sentiment Scoring</title>
       <author><first>Karo</first><last>Moilanen</last></author>
-      <author><first>Stephen</first><last>Pulman</last></author>
+      <author id="stephen-pulman"><first>Stephen</first><last>Pulman</last></author>
       <pages>258–263</pages>
       <url hash="65a15073">R09-1048</url>
       <bibkey>moilanen-pulman-2009-multi</bibkey>
@@ -412,7 +412,7 @@
     <paper id="49">
       <title>A Morphological and Syntactic Wide-coverage Lexicon for <fixed-case>S</fixed-case>panish: The Leffe</title>
       <author><first>Miguel A.</first><last>Molinero</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <author><first>Lionel</first><last>Nicolas</last></author>
       <pages>264–269</pages>
       <url hash="c2721b1b">R09-1049</url>
@@ -429,7 +429,7 @@
       <title>Dependency Parsing and Semantic Role Labeling as a Single Task</title>
       <author><first>Roser</first><last>Morante</last></author>
       <author><first>Vincent</first><last>Van Asch</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <pages>275–280</pages>
       <url hash="d0738207">R09-1051</url>
       <bibkey>morante-etal-2009-dependency</bibkey>
@@ -438,7 +438,7 @@
       <title>Structured Output Learning with Polynomial Kernel</title>
       <author><first>Hajime</first><last>Morita</last></author>
       <author><first>Hiroya</first><last>Takamura</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>281–286</pages>
       <url hash="be93b396">R09-1052</url>
       <bibkey>morita-etal-2009-structured</bibkey>
@@ -456,15 +456,15 @@
     <paper id="54">
       <title>Unsupervised Extraction of False <fixed-case>F</fixed-case>riends from Parallel Bi-Texts Using the Web as a Corpus</title>
       <author><first>Svetlin</first><last>Nakov</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
-      <author><first>Elena</first><last>Paskaleva</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
+      <author id="elena-paskaleva"><first>Elena</first><last>Paskaleva</last></author>
       <pages>292–298</pages>
       <url hash="da1e2604">R09-1054</url>
       <bibkey>nakov-etal-2009-unsupervised</bibkey>
     </paper>
     <paper id="55">
       <title>Evaluating Term Extraction</title>
-      <author><first>Adeline</first><last>Nazarenko</last></author>
+      <author id="adeline-nazarenko"><first>Adeline</first><last>Nazarenko</last></author>
       <author><first>Haïfa</first><last>Zargayouna</last></author>
       <pages>299–304</pages>
       <url hash="0134c6a8">R09-1055</url>
@@ -472,7 +472,7 @@
     </paper>
     <paper id="56">
       <title>Question Answering over Structured Data: an Entailment-Based Approach to Question Analysis</title>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Milen</first><last>Kouylekov</last></author>
       <pages>305–311</pages>
       <url hash="b11bf98b">R09-1056</url>
@@ -480,8 +480,8 @@
     </paper>
     <paper id="57">
       <title>A Semi-supervised Approach for Generating a Table-of-Contents</title>
-      <author><first>Viet Cuong</first><last>Nguyen</last></author>
-      <author><first>Le Minh</first><last>Nguyen</last></author>
+      <author id="viet-cuong-nguyen"><first>Viet Cuong</first><last>Nguyen</last></author>
+      <author id="minh-le-nguyen"><first>Le Minh</first><last>Nguyen</last></author>
       <author><first>Akira</first><last>Shimazu</last></author>
       <pages>312–317</pages>
       <url hash="575757c1">R09-1057</url>
@@ -491,10 +491,10 @@
       <title>Towards Efficient Production of Linguistic Resources: the <fixed-case>V</fixed-case>ictoria Project</title>
       <author><first>Lionel</first><last>Nicolas</last></author>
       <author><first>Miguel A.</first><last>Molinero</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <author><first>Elena</first><last>Trigo</last></author>
-      <author><first>Éric</first><last>de La Clergerie</last></author>
-      <author><first>Miguel</first><last>Alonso Pardo</last></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Éric</first><last>de La Clergerie</last></author>
+      <author id="miguel-a-alonso"><first>Miguel</first><last>Alonso Pardo</last></author>
       <author><first>Jacques</first><last>Farré</last></author>
       <author><first>Joan Miquel</first><last>Vergés</last></author>
       <pages>318–323</pages>
@@ -506,24 +506,24 @@
       <author><first>Rafael</first><last>Oliveira</last></author>
       <author><first>Eder</first><last>Novais</last></author>
       <author><first>Roberto</first><last>Araujo</last></author>
-      <author><first>Ivandré</first><last>Paraboni</last></author>
+      <author id="ivandre-paraboni"><first>Ivandré</first><last>Paraboni</last></author>
       <pages>324–329</pages>
       <url hash="4d2b6feb">R09-1059</url>
       <bibkey>oliveira-etal-2009-classification</bibkey>
     </paper>
     <paper id="60">
       <title>Interactive Machine Translation Based on Partial Statistical Phrase-based Alignments</title>
-      <author><first>Daniel</first><last>Ortiz-Martínez</last></author>
-      <author><first>Ismael</first><last>García-Varea</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="daniel-ortiz-martinez"><first>Daniel</first><last>Ortiz-Martínez</last></author>
+      <author id="ismael-garcia-varea"><first>Ismael</first><last>García-Varea</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <pages>330–336</pages>
       <url hash="01ccb0b1">R09-1060</url>
       <bibkey>ortiz-martinez-etal-2009-interactive</bibkey>
     </paper>
     <paper id="61">
       <title>Topic Modeling of Research Fields: An Interdisciplinary Perspective</title>
-      <author><first>Michael</first><last>Paul</last></author>
-      <author><first>Roxana</first><last>Girju</last></author>
+      <author id="michael-paul"><first>Michael</first><last>Paul</last></author>
+      <author id="roxana-girju"><first>Roxana</first><last>Girju</last></author>
       <pages>337–342</pages>
       <url hash="392500d8">R09-1061</url>
       <bibkey>paul-girju-2009-topic</bibkey>
@@ -538,7 +538,7 @@
     <paper id="63">
       <title>Comparing Statistical Similarity Measures for Stylistic Multivariate Analysis</title>
       <author><first>Marius</first><last>Popescu</last></author>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <pages>349–354</pages>
       <url hash="e9d6f2c6">R09-1063</url>
       <bibkey>popescu-dinu-2009-comparing</bibkey>
@@ -546,7 +546,7 @@
     <paper id="64">
       <title>From Bag of Languages to Family Trees From Noisy Corpus</title>
       <author><first>Taraka</first><last>Rama</last></author>
-      <author><first>Anil Kumar</first><last>Singh</last></author>
+      <author id="anil-kumar-singh"><first>Anil Kumar</first><last>Singh</last></author>
       <pages>355–359</pages>
       <url hash="47db3d36">R09-1064</url>
       <bibkey>rama-singh-2009-bag</bibkey>
@@ -554,7 +554,7 @@
     <paper id="65">
       <title>Language-Independent Sentiment Analysis Using Subjectivity and Positional Information</title>
       <author><first>Veselin</first><last>Raychev</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>360–364</pages>
       <url hash="99e2d98d">R09-1065</url>
       <bibkey>raychev-nakov-2009-language</bibkey>
@@ -591,7 +591,7 @@
     <paper id="69">
       <title>Identifying Semantic Relations in Context: Near-misses and Overlaps</title>
       <author><first>Alla</first><last>Rozovskaya</last></author>
-      <author><first>Roxana</first><last>Girju</last></author>
+      <author id="roxana-girju"><first>Roxana</first><last>Girju</last></author>
       <pages>381–387</pages>
       <url hash="295dd009">R09-1069</url>
       <bibkey>rozovskaya-girju-2009-identifying</bibkey>
@@ -599,8 +599,8 @@
     <paper id="70">
       <title>Statistical Confidence Measures for Probabilistic Parsing</title>
       <author><first>Ricardo</first><last>Sánchez-Sáez</last></author>
-      <author><first>Joan-Andreu</first><last>Sánchez</last></author>
-      <author><first>José-Miguel</first><last>Benedí Ruíz</last></author>
+      <author id="joan-andreu-sanchez"><first>Joan-Andreu</first><last>Sánchez</last></author>
+      <author id="jose-miguel-benedi"><first>José-Miguel</first><last>Benedí Ruíz</last></author>
       <pages>388–392</pages>
       <url hash="ff31b886">R09-1070</url>
       <bibkey>sanchez-saez-etal-2009-statistical</bibkey>
@@ -624,7 +624,7 @@
     <paper id="73">
       <title>Combining Lexical Resources for Contextual Synonym Expansion</title>
       <author><first>Ravi</first><last>Sinha</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>404–410</pages>
       <url hash="f2408f67">R09-1073</url>
       <bibkey>sinha-mihalcea-2009-combining</bibkey>
@@ -632,7 +632,7 @@
     <paper id="74">
       <title>String Distance-Based Stemming of the Highly Inflected <fixed-case>C</fixed-case>roatian Language</title>
       <author><first>Jan</first><last>Šnajder</last></author>
-      <author><first>Bojana</first><last>Dalbelo Bašić</last></author>
+      <author id="bojana-dalbelo-basic"><first>Bojana</first><last>Dalbelo Bašić</last></author>
       <pages>411–415</pages>
       <url hash="419158a0">R09-1074</url>
       <bibkey>snajder-dalbelo-basic-2009-string</bibkey>
@@ -655,7 +655,7 @@
     </paper>
     <paper id="77">
       <title><fixed-case>A</fixed-case>mharic Part-of-Speech Tagger for Factored Language Modeling</title>
-      <author><first>Martha Yifiru</first><last>Tachbelie</last></author>
+      <author id="martha-yifiru-tachbelie"><first>Martha Yifiru</first><last>Tachbelie</last></author>
       <author><first>Wolfgang</first><last>Menzel</last></author>
       <pages>428–433</pages>
       <url hash="3408789d">R09-1077</url>
@@ -664,8 +664,8 @@
     <paper id="78">
       <title>Improving Unsegmented Statistical Dialogue Act Labelling</title>
       <author><first>Vicent</first><last>Tamarit</last></author>
-      <author><first>Carlos-D.</first><last>Martínez-Hinarejos</last></author>
-      <author><first>José Miguel</first><last>Benedí Ruíz</last></author>
+      <author id="carlos-d-martinez-hinarejos"><first>Carlos-D.</first><last>Martínez-Hinarejos</last></author>
+      <author id="jose-miguel-benedi"><first>José Miguel</first><last>Benedí Ruíz</last></author>
       <pages>434–440</pages>
       <url hash="600150ea">R09-1078</url>
       <bibkey>tamarit-etal-2009-improving</bibkey>
@@ -673,7 +673,7 @@
     <paper id="79">
       <title>Three Issues in Cross-Language Frame Information Transfer</title>
       <author><first>Sara</first><last>Tonelli</last></author>
-      <author><first>Emanuele</first><last>Pianta</last></author>
+      <author id="emanuele-pianta"><first>Emanuele</first><last>Pianta</last></author>
       <pages>441–448</pages>
       <url hash="81ac74ec">R09-1079</url>
       <bibkey>tonelli-pianta-2009-three</bibkey>
@@ -681,9 +681,9 @@
     <paper id="80">
       <title>A Study on Linking <fixed-case>W</fixed-case>ikipedia Categories to <fixed-case>W</fixed-case>ordnet Synsets using Text Similarity</title>
       <author><first>Antonio</first><last>Toral</last></author>
-      <author><first>Óscar</first><last>Ferrández</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>Rafael</first><last>Muñoz</last></author>
+      <author id="oscar-ferrandez"><first>Óscar</first><last>Ferrández</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
+      <author id="rafael-munoz"><first>Rafael</first><last>Muñoz</last></author>
       <pages>449–454</pages>
       <url hash="9df99928">R09-1080</url>
       <bibkey>toral-etal-2009-study</bibkey>
@@ -700,16 +700,16 @@
       <title>A Method to Restrict the Blow-up of Hypotheses of a Non-disambiguated</title>
       <author><first>Jernej</first><last>Vičič</last></author>
       <author><first>Petr</first><last>Homola</last></author>
-      <author><first>Vladislav</first><last>Kuboň</last></author>
+      <author id="vladislav-kubon"><first>Vladislav</first><last>Kuboň</last></author>
       <pages>460–464</pages>
       <url hash="06814889">R09-1082</url>
       <bibkey>vicic-etal-2009-method</bibkey>
     </paper>
     <paper id="83">
       <title>Sources of Performance in <fixed-case>CRF</fixed-case> Transfer Training: a Business Name-tagging Case Study</title>
-      <author><first>Marc</first><last>Vilain</last></author>
+      <author id="marc-vilain"><first>Marc</first><last>Vilain</last></author>
       <author><first>Jonathan</first><last>Huggins</last></author>
-      <author><first>Ben</first><last>Wellner</last></author>
+      <author id="ben-wellner"><first>Ben</first><last>Wellner</last></author>
       <pages>465–470</pages>
       <url hash="f53b8f6d">R09-1083</url>
       <bibkey>vilain-etal-2009-sources</bibkey>
@@ -725,7 +725,7 @@
     <paper id="85">
       <title>Instance Sampling Methods for Pronoun Resolution</title>
       <author><first>Holger</first><last>Wunsch</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <author><first>Rachael</first><last>Cantrell</last></author>
       <pages>478–483</pages>
       <url hash="477e79c1">R09-1085</url>
@@ -743,8 +743,8 @@
       <title>Too Many Mammals: Improving the Diversity of Automatically Recognized Terms</title>
       <author><first>Ziqi</first><last>Zhang</last></author>
       <author><first>Lei</first><last>Xia</last></author>
-      <author><first>Mark A.</first><last>Greenwood</last></author>
-      <author><first>José</first><last>Iria</last></author>
+      <author id="mark-a-greenwood"><first>Mark A.</first><last>Greenwood</last></author>
+      <author id="jose-iria"><first>José</first><last>Iria</last></author>
       <pages>490–495</pages>
       <url hash="60f3f597">R09-1087</url>
       <bibkey>zhang-etal-2009-many</bibkey>
@@ -770,7 +770,7 @@
     <paper id="1">
       <title>Effect of Minimal Semantics on Dependency Parsing</title>
       <author><first>Bharat Ram</first><last>Ambati</last></author>
-      <author><first>Pujitha</first><last>Gade</last></author>
+      <author id="raghu-pujitha-gade"><first>Pujitha</first><last>Gade</last></author>
       <author><first>Chaitanya</first><last>GSK</last></author>
       <author><first>Samar</first><last>Husain</last></author>
       <pages>1–5</pages>
@@ -786,7 +786,7 @@
     </paper>
     <paper id="3">
       <title>A Study of Machine Learning Algorithms for Recognizing Textual Entailment</title>
-      <author><first>Julio Javier</first><last>Castillo</last></author>
+      <author id="julio-castillo"><first>Julio Javier</first><last>Castillo</last></author>
       <pages>12–17</pages>
       <url hash="c53e83c9">R09-2003</url>
       <bibkey>castillo-2009-study</bibkey>
@@ -856,7 +856,7 @@
     </paper>
     <paper id="12">
       <title>Exploring Context Variation and Lexicon Coverage in Projection-based Approach for Term Translation</title>
-      <author><first>Raphaël</first><last>Rubino</last></author>
+      <author id="raphael-rubino"><first>Raphaël</first><last>Rubino</last></author>
       <pages>66–70</pages>
       <url hash="be7d1e54">R09-2012</url>
       <bibkey>rubino-2009-exploring</bibkey>
diff --git a/data/xml/R11.xml b/data/xml/R11.xml
index 45a75b63a6..62444f12b8 100644
--- a/data/xml/R11.xml
+++ b/data/xml/R11.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the International Conference Recent Advances in Natural Language Processing 2011</booktitle>
       <url hash="2f185176">R11-1</url>
-      <editor><first>Ruslan</first><last>Mitkov</last></editor>
+      <editor id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></editor>
       <editor><first>Galia</first><last>Angelova</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Hissar, Bulgaria</address>
@@ -18,7 +18,7 @@
     </frontmatter>
     <paper id="1">
       <title>Extracting <fixed-case>STRIPS</fixed-case> Representations of Actions and Events</title>
-      <author><first>Avirup</first><last>Sil</last></author>
+      <author id="avirup-sil"><first>Avirup</first><last>Sil</last></author>
       <author><first>Alexander</first><last>Yates</last></author>
       <pages>1–8</pages>
       <url hash="2b5c5e98">R11-1001</url>
@@ -27,7 +27,7 @@
     <paper id="2">
       <title>Acquiring Topic Features to improve Event Extraction: in Pre-selected and Balanced Collections</title>
       <author><first>Shasha</first><last>Liao</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <pages>9–16</pages>
       <url hash="ca634de0">R11-1002</url>
       <bibkey>liao-grishman-2011-acquiring</bibkey>
@@ -52,14 +52,14 @@
     <paper id="5">
       <title>Knowledge-Poor Approach to Shallow Parsing: Contribution of Unsupervised Part-of-Speech Induction</title>
       <author><first>Marie</first><last>Guégan</last></author>
-      <author><first>Claude</first><last>de Loupy</last></author>
+      <author id="claude-de-loupy"><first>Claude</first><last>de Loupy</last></author>
       <pages>33–40</pages>
       <url hash="29a79de5">R11-1005</url>
       <bibkey>guegan-de-loupy-2011-knowledge</bibkey>
     </paper>
     <paper id="6">
       <title>Fast Domain Adaptation for Part of Speech Tagging for Dialogues</title>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <author><first>Eric</first><last>Baucom</last></author>
       <pages>41–48</pages>
       <url hash="67a2cb6c">R11-1006</url>
@@ -69,14 +69,14 @@
       <title>Using a Morphological Database to Increase the Accuracy in <fixed-case>POS</fixed-case> Tagging</title>
       <author><first>Hrafn</first><last>Loftsson</last></author>
       <author><first>Sigrún</first><last>Helgadóttir</last></author>
-      <author><first>Eiríkur</first><last>Rögnvaldsson</last></author>
+      <author id="eirikur-rognvaldsson"><first>Eiríkur</first><last>Rögnvaldsson</last></author>
       <pages>49–55</pages>
       <url hash="46e97dc4">R11-1007</url>
       <bibkey>loftsson-etal-2011-using</bibkey>
     </paper>
     <paper id="8">
       <title>Actions Speak Louder than Words: Evaluating Parsers in the Context of Natural Language Understanding Systems for Human-Robot Interaction</title>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <author><first>Rachael</first><last>Cantrell</last></author>
       <author><first>Matthias</first><last>Scheutz</last></author>
       <pages>56–62</pages>
@@ -118,7 +118,7 @@
     </paper>
     <paper id="13">
       <title>Enriching a statistical machine translation system trained on small parallel corpora with rule-based bilingual phrases</title>
-      <author><first>Víctor M.</first><last>Sánchez-Cartagena</last></author>
+      <author id="victor-m-sanchez-cartagena"><first>Víctor M.</first><last>Sánchez-Cartagena</last></author>
       <author><first>Felipe</first><last>Sánchez-Martínez</last></author>
       <author><first>Juan Antonio</first><last>Pérez-Ortiz</last></author>
       <pages>90–96</pages>
@@ -127,7 +127,7 @@
     </paper>
     <paper id="14">
       <title>Assessing the Post-Editing Effort for Automatic and Semi-Automatic Translations of <fixed-case>DVD</fixed-case> Subtitles</title>
-      <author><first>Sheila</first><last>C. M. de Sousa</last></author>
+      <author id="sheila-c-m-de-sousa"><first>Sheila</first><last>C. M. de Sousa</last></author>
       <author><first>Wilker</first><last>Aziz</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>97–103</pages>
@@ -138,9 +138,9 @@
       <title><fixed-case>JRC</fixed-case>-<fixed-case>NAMES</fixed-case>: A Freely Available, Highly Multilingual Named Entity Resource</title>
       <author><first>Ralf</first><last>Steinberger</last></author>
       <author><first>Bruno</first><last>Pouliquen</last></author>
-      <author><first>Mijail</first><last>Kabadjov</last></author>
+      <author id="mijail-kabadjov"><first>Mijail</first><last>Kabadjov</last></author>
       <author><first>Jenya</first><last>Belyaeva</last></author>
-      <author><first>Erik</first><last>van der Goot</last></author>
+      <author id="erik-van-der-goot"><first>Erik</first><last>van der Goot</last></author>
       <pages>104–110</pages>
       <url hash="3682e1a2">R11-1015</url>
       <bibkey>steinberger-etal-2011-jrc</bibkey>
@@ -211,7 +211,7 @@
     </paper>
     <paper id="23">
       <title>Noun Compound and Named Entity Recognition and their Usability in Keyphrase Extraction</title>
-      <author><first>István</first><last>Nagy T.</last></author>
+      <author id="istvan-nagy-t"><first>István</first><last>Nagy T.</last></author>
       <author><first>Gábor</first><last>Berend</last></author>
       <author><first>Veronika</first><last>Vincze</last></author>
       <pages>162–169</pages>
@@ -235,8 +235,8 @@
     </paper>
     <paper id="26">
       <title>Cross-Domain <fixed-case>D</fixed-case>utch Coreference Resolution</title>
-      <author><first>Orphée</first><last>De Clercq</last></author>
-      <author><first>Véronique</first><last>Hoste</last></author>
+      <author id="orphee-de-clercq"><first>Orphée</first><last>De Clercq</last></author>
+      <author id="veronique-hoste"><first>Véronique</first><last>Hoste</last></author>
       <author><first>Iris</first><last>Hendrickx</last></author>
       <pages>186–193</pages>
       <url hash="ef52b8cd">R11-1026</url>
@@ -245,7 +245,7 @@
     <paper id="27">
       <title>Finding the Best Approach for Multi-lingual Text Summarisation: A Comparative Analysis</title>
       <author><first>Elena</first><last>Lloret</last></author>
-      <author><first>Manuel</first><last>Palomar</last></author>
+      <author id="manuel-palomar"><first>Manuel</first><last>Palomar</last></author>
       <pages>194–201</pages>
       <url hash="19996d77">R11-1027</url>
       <bibkey>lloret-palomar-2011-finding</bibkey>
@@ -253,7 +253,7 @@
     <paper id="28">
       <title>Automatically Creating General-Purpose Opinion Summaries from Text</title>
       <author><first>Veselin</first><last>Stoyanov</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>202–209</pages>
       <url hash="c72b119c">R11-1028</url>
       <bibkey>stoyanov-cardie-2011-automatically</bibkey>
@@ -270,7 +270,7 @@
     <paper id="30">
       <title>Temporal Relation Extraction Using Expectation Maximization</title>
       <author><first>Seyed Abolghasem</first><last>Mirroshandel</last></author>
-      <author><first>Gholamreza</first><last>Ghassem-Sani</last></author>
+      <author id="gholamreza-ghassem-sani"><first>Gholamreza</first><last>Ghassem-Sani</last></author>
       <pages>218–225</pages>
       <url hash="33549241">R11-1030</url>
       <bibkey>mirroshandel-ghassem-sani-2011-temporal</bibkey>
@@ -286,9 +286,9 @@
     </paper>
     <paper id="32">
       <title>Improving <fixed-case>WSD</fixed-case> using <fixed-case>ISR</fixed-case>-<fixed-case>WN</fixed-case> with Relevant Semantic Trees and <fixed-case>S</fixed-case>em<fixed-case>C</fixed-case>or Senses Frequency</title>
-      <author><first>Yoan</first><last>Gutiérrez</last></author>
-      <author><first>Sonia</first><last>Vázquez</last></author>
-      <author><first>Andrés</first><last>Montoyo</last></author>
+      <author id="yoan-gutierrez"><first>Yoan</first><last>Gutiérrez</last></author>
+      <author id="sonia-vazquez"><first>Sonia</first><last>Vázquez</last></author>
+      <author id="andres-montoyo"><first>Andrés</first><last>Montoyo</last></author>
       <pages>233–239</pages>
       <url hash="7bbf7ba2">R11-1032</url>
       <bibkey>gutierrez-etal-2011-improving</bibkey>
@@ -296,8 +296,8 @@
     <paper id="33">
       <title>Investigating Advanced Techniques for Document Content Similarity Applied to External Plagiarism Analysis</title>
       <author><first>Daniel</first><last>Micol</last></author>
-      <author><first>Rafael</first><last>Muñoz</last></author>
-      <author><first>Óscar</first><last>Ferrández</last></author>
+      <author id="rafael-munoz"><first>Rafael</first><last>Muñoz</last></author>
+      <author id="oscar-ferrandez"><first>Óscar</first><last>Ferrández</last></author>
       <pages>240–246</pages>
       <url hash="326351d4">R11-1033</url>
       <bibkey>micol-etal-2011-investigating</bibkey>
@@ -305,7 +305,7 @@
     <paper id="34">
       <title>Using Cognates in a <fixed-case>F</fixed-case>rench-<fixed-case>R</fixed-case>omanian Lexical Alignment System: A Comparative Study</title>
       <author><first>Mirabela</first><last>Navlea</last></author>
-      <author><first>Amalia</first><last>Todiraşcu</last></author>
+      <author id="amalia-todirascu"><first>Amalia</first><last>Todiraşcu</last></author>
       <pages>247–253</pages>
       <url hash="bdc82721">R11-1034</url>
       <bibkey>navlea-todirascu-2011-using</bibkey>
@@ -315,19 +315,19 @@
       <author><first>Josef</first><last>Steinberger</last></author>
       <author><first>Jenya</first><last>Belyaeva</last></author>
       <author><first>Jonathan</first><last>Crawley</last></author>
-      <author><first>Leonida</first><last>Della-Rocca</last></author>
+      <author id="leonida-della-rocca"><first>Leonida</first><last>Della-Rocca</last></author>
       <author><first>Mohamed</first><last>Ebrahim</last></author>
       <author><first>Maud</first><last>Ehrmann</last></author>
-      <author><first>Mijail</first><last>Kabadjov</last></author>
+      <author id="mijail-kabadjov"><first>Mijail</first><last>Kabadjov</last></author>
       <author><first>Ralf</first><last>Steinberger</last></author>
-      <author><first>Erik</first><last>van der Goot</last></author>
+      <author id="erik-van-der-goot"><first>Erik</first><last>van der Goot</last></author>
       <pages>254–260</pages>
       <url hash="9a6b3f98">R11-1035</url>
       <bibkey>steinberger-etal-2011-highly</bibkey>
     </paper>
     <paper id="36">
       <title>Singletons and Coreference Resolution Evaluation</title>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <author><first>Desislava</first><last>Zhekova</last></author>
       <pages>261–267</pages>
       <url hash="852d78c2">R11-1036</url>
@@ -335,7 +335,7 @@
     </paper>
     <paper id="37">
       <title>Modelling Entity Instantiations</title>
-      <author><first>Andrew</first><last>McKinlay</last></author>
+      <author id="andrew-mackinlay"><first>Andrew</first><last>McKinlay</last></author>
       <author><first>Katja</first><last>Markert</last></author>
       <pages>268–274</pages>
       <url hash="80fa89e6">R11-1037</url>
@@ -360,7 +360,7 @@
     <paper id="40">
       <title>Multiword Expressions and Named Entities in the Wiki50 Corpus</title>
       <author><first>Veronika</first><last>Vincze</last></author>
-      <author><first>István</first><last>Nagy T.</last></author>
+      <author id="istvan-nagy-t"><first>István</first><last>Nagy T.</last></author>
       <author><first>Gábor</first><last>Berend</last></author>
       <pages>289–295</pages>
       <url hash="a7e475e9">R11-1040</url>
@@ -368,9 +368,9 @@
     </paper>
     <paper id="41">
       <title>Towards the Automatic Merging of Lexical Resources: Automatic Mapping</title>
-      <author><first>Muntsa</first><last>Padró</last></author>
-      <author><first>Núria</first><last>Bel</last></author>
-      <author><first>Silvia</first><last>Necsulescu</last></author>
+      <author id="muntsa-padro"><first>Muntsa</first><last>Padró</last></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last></author>
+      <author id="silvia-necsulescu"><first>Silvia</first><last>Necsulescu</last></author>
       <pages>296–301</pages>
       <url hash="e502498c">R11-1041</url>
       <bibkey>padro-etal-2011-towards</bibkey>
@@ -394,14 +394,14 @@
     <paper id="44">
       <title>Cultural Configuration of <fixed-case>W</fixed-case>ikipedia: measuring Autoreferentiality in Different Languages</title>
       <author><first>Marc</first><last>Miquel Ribé</last></author>
-      <author><first>Horacio</first><last>Rodríguez</last></author>
+      <author id="horacio-rodriguez"><first>Horacio</first><last>Rodríguez</last></author>
       <pages>316–322</pages>
       <url hash="114c3bf9">R11-1044</url>
       <bibkey>miquel-ribe-rodriguez-2011-cultural</bibkey>
     </paper>
     <paper id="45">
       <title>Combining Relational and Attributional Similarity for Semantic Relation Classification</title>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Zornitsa</first><last>Kozareva</last></author>
       <pages>323–330</pages>
       <url hash="ec892727">R11-1045</url>
@@ -410,7 +410,7 @@
     <paper id="46">
       <title>In Search of Missing Arguments: A Linguistic Approach</title>
       <author><first>Josef</first><last>Ruppenhofer</last></author>
-      <author><first>Philip</first><last>Gorinski</last></author>
+      <author id="philip-gorinski"><first>Philip</first><last>Gorinski</last></author>
       <author><first>Caroline</first><last>Sporleder</last></author>
       <pages>331–338</pages>
       <url hash="15ddebe9">R11-1046</url>
@@ -418,8 +418,8 @@
     </paper>
     <paper id="47">
       <title>Enlarging Monolingual Dictionaries for Machine Translation with Active Learning and Non-Expert Users</title>
-      <author><first>Miquel</first><last>Esplà-Gomis</last></author>
-      <author><first>Víctor M.</first><last>Sánchez-Cartagena</last></author>
+      <author id="miquel-espla-gomis"><first>Miquel</first><last>Esplà-Gomis</last></author>
+      <author id="victor-m-sanchez-cartagena"><first>Víctor M.</first><last>Sánchez-Cartagena</last></author>
       <author><first>Juan Antonio</first><last>Pérez-Ortiz</last></author>
       <pages>339–346</pages>
       <url hash="3effaafc">R11-1047</url>
@@ -436,7 +436,7 @@
     <paper id="49">
       <title>Adaptability of Lexical Acquisition for Large-scale Grammars</title>
       <author><first>Kostadin</first><last>Cholakov</last></author>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <author><first>Valia</first><last>Kordoni</last></author>
       <author><first>Yi</first><last>Zhang</last></author>
       <pages>355–362</pages>
@@ -446,8 +446,8 @@
     <paper id="50">
       <title>Integration of Data from a Syntactic Lexicon into Generative and Discriminative Probabilistic Parsers</title>
       <author><first>Anthony</first><last>Sigogne</last></author>
-      <author><first>Matthieu</first><last>Constant</last></author>
-      <author><first>Éric</first><last>Laporte</last></author>
+      <author id="matthieu-constant"><first>Matthieu</first><last>Constant</last></author>
+      <author id="eric-laporte"><first>Éric</first><last>Laporte</last></author>
       <pages>363–370</pages>
       <url hash="4c69a630">R11-1050</url>
       <bibkey>sigogne-etal-2011-integration</bibkey>
@@ -456,7 +456,7 @@
       <title>Pattern Learning for Event Extraction using Monolingual Statistical Machine Translation</title>
       <author><first>Marco</first><last>Turchi</last></author>
       <author><first>Vanni</first><last>Zavarella</last></author>
-      <author><first>Hristo</first><last>Tanev</last></author>
+      <author id="hristo-tanev"><first>Hristo</first><last>Tanev</last></author>
       <pages>371–377</pages>
       <url hash="baaa6a03">R11-1051</url>
       <bibkey>turchi-etal-2011-pattern</bibkey>
@@ -504,7 +504,7 @@
     <paper id="57">
       <title>A Semi-Automatic, Iterative Method for Creating a Domain-Specific Treebank</title>
       <author><first>Corina</first><last>Dima</last></author>
-      <author><first>Erhard</first><last>Hinrichs</last></author>
+      <author id="erhard-hinrichs"><first>Erhard</first><last>Hinrichs</last></author>
       <pages>413–419</pages>
       <url hash="3acfb286">R11-1057</url>
       <bibkey>dima-hinrichs-2011-semi</bibkey>
@@ -512,7 +512,7 @@
     <paper id="58">
       <title>Determining Immediate Constituents of Compounds in <fixed-case>G</fixed-case>erma<fixed-case>N</fixed-case>et</title>
       <author><first>Verena</first><last>Henrich</last></author>
-      <author><first>Erhard</first><last>Hinrichs</last></author>
+      <author id="erhard-hinrichs"><first>Erhard</first><last>Hinrichs</last></author>
       <pages>420–426</pages>
       <url hash="56e3e267">R11-1058</url>
       <bibkey>henrich-hinrichs-2011-determining</bibkey>
@@ -530,7 +530,7 @@
     <paper id="60">
       <title>A Contextual Classification Strategy for Polarity Analysis of Direct Quotations from Financial News</title>
       <author><first>Brett</first><last>Drury</last></author>
-      <author><first>Gaël</first><last>Dias</last></author>
+      <author id="gael-dias"><first>Gaël</first><last>Dias</last></author>
       <author><first>Luís</first><last>Torgo</last></author>
       <pages>434–440</pages>
       <url hash="c3d870af">R11-1060</url>
@@ -572,7 +572,7 @@
     </paper>
     <paper id="65">
       <title>Towards <fixed-case>M</fixed-case>inimal <fixed-case>R</fixed-case>ecursion <fixed-case>S</fixed-case>emantics over <fixed-case>B</fixed-case>ulgarian Dependency Parsing</title>
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <author><first>Petya</first><last>Osenova</last></author>
       <pages>471–478</pages>
       <url hash="d9964310">R11-1065</url>
@@ -632,8 +632,8 @@
       <title>Evaluating the Robustness of <fixed-case>E</fixed-case>moti<fixed-case>B</fixed-case>log for Sentiment Analysis and Opinion Mining</title>
       <author><first>Ester</first><last>Boldrini</last></author>
       <author><first>Javi</first><last>Fernández</last></author>
-      <author><first>José Manuel</first><last>Gómez</last></author>
-      <author><first>Patricio</first><last>Martínez-Barco</last></author>
+      <author id="jose-m-gomez"><first>José Manuel</first><last>Gómez</last></author>
+      <author id="patricio-martinez-barco"><first>Patricio</first><last>Martínez-Barco</last></author>
       <pages>521–526</pages>
       <url hash="090ba200">R11-1072</url>
       <bibkey>boldrini-etal-2011-evaluating</bibkey>
@@ -641,8 +641,8 @@
     <paper id="73">
       <title>Hybrid System For Plagiarism Detection</title>
       <author><first>Javier R.</first><last>Bru</last></author>
-      <author><first>Patricio</first><last>Martínez-Barco</last></author>
-      <author><first>Rafael</first><last>Muñoz</last></author>
+      <author id="patricio-martinez-barco"><first>Patricio</first><last>Martínez-Barco</last></author>
+      <author id="rafael-munoz"><first>Rafael</first><last>Muñoz</last></author>
       <pages>527–532</pages>
       <url hash="c73080bc">R11-1073</url>
       <bibkey>bru-etal-2011-hybrid</bibkey>
@@ -650,19 +650,19 @@
     <paper id="74">
       <title>Data-Driven Approach Using Semantics for Recognizing and Classifying <fixed-case>T</fixed-case>ime<fixed-case>ML</fixed-case> Events in <fixed-case>I</fixed-case>talian</title>
       <author><first>Tommaso</first><last>Caselli</last></author>
-      <author><first>Hector</first><last>Llorens</last></author>
-      <author><first>Borja</first><last>Navarro-Colorado</last></author>
-      <author><first>Estela</first><last>Saquete</last></author>
+      <author id="hector-llorens"><first>Hector</first><last>Llorens</last></author>
+      <author id="borja-navarro"><first>Borja</first><last>Navarro-Colorado</last></author>
+      <author id="estela-saquete"><first>Estela</first><last>Saquete</last></author>
       <pages>533–538</pages>
       <url hash="59644044">R11-1074</url>
       <bibkey>caselli-etal-2011-data</bibkey>
     </paper>
     <paper id="75">
       <title>Can Alternations Be Learned? A Machine Learning Approach To <fixed-case>R</fixed-case>omanian Verb Conjugation</title>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <author><first>Emil</first><last>Ionescu</last></author>
       <author><first>Vlad</first><last>Niculae</last></author>
-      <author><first>Octavia-Maria</first><last>Şulea</last></author>
+      <author id="octavia-maria-sulea"><first>Octavia-Maria</first><last>Şulea</last></author>
       <pages>539–544</pages>
       <url hash="8c4e90ab">R11-1075</url>
       <bibkey>dinu-etal-2011-alternations</bibkey>
@@ -671,7 +671,7 @@
       <title>A New Representation Model for the Automatic Recognition and Translation of <fixed-case>A</fixed-case>rabic Named Entities with <fixed-case>N</fixed-case>oo<fixed-case>J</fixed-case></title>
       <author><first>Héla</first><last>Fehri</last></author>
       <author><first>Kais</first><last>Haddar</last></author>
-      <author><first>Abdelmajid</first><last>Ben hamadou</last></author>
+      <author id="abdelmajid-ben-hamadou"><first>Abdelmajid</first><last>Ben hamadou</last></author>
       <pages>545–550</pages>
       <url hash="2db40770">R11-1076</url>
       <bibkey>fehri-etal-2011-new</bibkey>
@@ -702,7 +702,7 @@
     <paper id="80">
       <title>An algorithm of Identifying Semantic Arguments of a Verb From Structured Data</title>
       <author><first>Minhua</first><last>Huang</last></author>
-      <author><first>Robert M.</first><last>Haralick</last></author>
+      <author id="robert-m-haralick"><first>Robert M.</first><last>Haralick</last></author>
       <pages>568–573</pages>
       <url hash="97a3f50b">R11-1080</url>
       <bibkey>huang-haralick-2011-algorithm</bibkey>
@@ -734,9 +734,9 @@
     </paper>
     <paper id="84">
       <title>A Hybrid Approach for Event Extraction and Event Actor Identification</title>
-      <author><first>Anup Kumar</first><last>Kolya</last></author>
+      <author id="anup-kumar-kolya"><first>Anup Kumar</first><last>Kolya</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>592–597</pages>
       <url hash="f24593df">R11-1084</url>
       <bibkey>kolya-etal-2011-hybrid</bibkey>
@@ -778,7 +778,7 @@
     </paper>
     <paper id="89">
       <title>Domain-Dependent Identification of Multiword Expressions</title>
-      <author><first>István</first><last>Nagy T.</last></author>
+      <author id="istvan-nagy-t"><first>István</first><last>Nagy T.</last></author>
       <author><first>Veronika</first><last>Vincze</last></author>
       <author><first>Gábor</first><last>Berend</last></author>
       <pages>622–627</pages>
@@ -812,7 +812,7 @@
     <paper id="93">
       <title>Finding Negative Key Phrases for <fixed-case>I</fixed-case>nternet Advertising Campaigns using <fixed-case>W</fixed-case>ikipedia</title>
       <author><first>Martin</first><last>Scaiano</last></author>
-      <author><first>Diana</first><last>Inkpen</last></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last></author>
       <pages>648–653</pages>
       <url hash="0462ab59">R11-1093</url>
       <bibkey>scaiano-inkpen-2011-finding</bibkey>
@@ -844,7 +844,7 @@
     </paper>
     <paper id="97">
       <title><fixed-case>W</fixed-case>ordnets: State of the Art and Perspectives. Case Study: the <fixed-case>R</fixed-case>omanian <fixed-case>W</fixed-case>ordnet</title>
-      <author><first>Verginica</first><last>Barbu Mititelu</last></author>
+      <author id="verginica-barbu-mititelu"><first>Verginica</first><last>Barbu Mititelu</last></author>
       <pages>672–677</pages>
       <url hash="bc77eba6">R11-1097</url>
       <bibkey>barbu-mititelu-2011-wordnets</bibkey>
@@ -879,10 +879,10 @@
     </paper>
     <paper id="101">
       <title>The <fixed-case>RST</fixed-case> <fixed-case>S</fixed-case>panish Treebank On-line Interface</title>
-      <author><first>Iria</first><last>da Cunha</last></author>
-      <author><first>Juan-Manuel</first><last>Torres-Moreno</last></author>
-      <author><first>Gerardo</first><last>Sierra</last></author>
-      <author><first>Luis-Adrián</first><last>Cabrera-Diego</last></author>
+      <author id="iria-da-cunha"><first>Iria</first><last>da Cunha</last></author>
+      <author id="juan-manuel-torres-moreno"><first>Juan-Manuel</first><last>Torres-Moreno</last></author>
+      <author id="gerardo-sierra"><first>Gerardo</first><last>Sierra</last></author>
+      <author id="luis-adrian-cabrera-diego"><first>Luis-Adrián</first><last>Cabrera-Diego</last></author>
       <author><first>Brenda-Gabriela</first><last>Castro-Rolón</last></author>
       <author><first>Juan-Miguel</first><last>Rolland Bartilotti</last></author>
       <pages>698–703</pages>
@@ -899,8 +899,8 @@
     </paper>
     <paper id="103">
       <title>Multiple Evidence for Term Extraction in Broad Domains</title>
-      <author><first>Boris</first><last>Dobrov</last></author>
-      <author><first>Natalia</first><last>Loukachevitch</last></author>
+      <author id="boris-v-dobrov"><first>Boris</first><last>Dobrov</last></author>
+      <author id="natalia-loukachevitch"><first>Natalia</first><last>Loukachevitch</last></author>
       <pages>710–715</pages>
       <url hash="b1670ffc">R11-1103</url>
       <bibkey>dobrov-loukachevitch-2011-multiple</bibkey>
@@ -908,8 +908,8 @@
     <paper id="104">
       <title>Language Modeling for Document Selection in Question Answering</title>
       <author><first>Nicolas</first><last>Foucault</last></author>
-      <author><first>Gilles</first><last>Adda</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="gilles-adda"><first>Gilles</first><last>Adda</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <pages>716–720</pages>
       <url hash="c811f085">R11-1104</url>
       <bibkey>foucault-etal-2011-language</bibkey>
@@ -941,9 +941,9 @@
     <paper id="108">
       <title>Bilingual Experiments with an <fixed-case>A</fixed-case>rabic-<fixed-case>E</fixed-case>nglish Corpus for Opinion Mining</title>
       <author><first>Mohammed</first><last>Rushdi-Saleh</last></author>
-      <author><first>M. Teresa</first><last>Martín-Valdivia</last></author>
-      <author><first>L. Alfonso</first><last>Ureña-López</last></author>
-      <author><first>José M.</first><last>Perea-Ortega</last></author>
+      <author id="m-teresa-martin-valdivia"><first>M. Teresa</first><last>Martín-Valdivia</last></author>
+      <author id="l-alfonso-urena-lopez"><first>L. Alfonso</first><last>Ureña-López</last></author>
+      <author id="jose-manuel-perea-ortega"><first>José M.</first><last>Perea-Ortega</last></author>
       <pages>740–745</pages>
       <url hash="a858c564">R11-1108</url>
       <bibkey>rushdi-saleh-etal-2011-bilingual</bibkey>
@@ -954,7 +954,7 @@
       <author><first>Walter</first><last>Koza</last></author>
       <author><first>Josuka</first><last>Díaz-Labrador</last></author>
       <author><first>Joseba</first><last>Abaitua</last></author>
-      <author><first>Solange</first><last>Oliveira Rezende</last></author>
+      <author id="solange-oliveira-rezende"><first>Solange</first><last>Oliveira Rezende</last></author>
       <author><first>Thiago</first><last>Pardo</last></author>
       <author><first>Zulema</first><last>Solana</last></author>
       <pages>746–751</pages>
@@ -980,10 +980,10 @@
     <paper id="112">
       <title>Recognition and Classification of Numerical Entities in <fixed-case>B</fixed-case>asque</title>
       <author><first>Ander</first><last>Soraluze</last></author>
-      <author><first>Iñaki</first><last>Alegria</last></author>
-      <author><first>Olatz</first><last>Ansa</last></author>
+      <author id="inaki-alegria"><first>Iñaki</first><last>Alegria</last></author>
+      <author id="olatz-ansa"><first>Olatz</first><last>Ansa</last></author>
       <author><first>Olatz</first><last>Arregi</last></author>
-      <author><first>Xabier</first><last>Arregi</last></author>
+      <author id="xabier-arregi"><first>Xabier</first><last>Arregi</last></author>
       <pages>764–769</pages>
       <url hash="43dc0e15">R11-1112</url>
       <bibkey>soraluze-etal-2011-recognition</bibkey>
@@ -992,9 +992,9 @@
       <title>Multilingual Entity-Centered Sentiment Analysis Evaluated by Parallel Corpora</title>
       <author><first>Josef</first><last>Steinberger</last></author>
       <author><first>Polina</first><last>Lenkova</last></author>
-      <author><first>Mijail</first><last>Kabadjov</last></author>
+      <author id="mijail-kabadjov"><first>Mijail</first><last>Kabadjov</last></author>
       <author><first>Ralf</first><last>Steinberger</last></author>
-      <author><first>Erik</first><last>van der Goot</last></author>
+      <author id="erik-van-der-goot"><first>Erik</first><last>van der Goot</last></author>
       <pages>770–775</pages>
       <url hash="4da6dcbe">R11-1113</url>
       <bibkey>steinberger-etal-2011-multilingual</bibkey>
@@ -1038,7 +1038,7 @@
     </frontmatter>
     <paper id="1">
       <title>Domain-Dependent Detection of Light Verb Constructions</title>
-      <author><first>István T.</first><last>Nagy</last></author>
+      <author id="istvan-nagy-t"><first>István T.</first><last>Nagy</last></author>
       <author><first>Gábor</first><last>Berend</last></author>
       <author><first>György</first><last>Móra</last></author>
       <author><first>Veronika</first><last>Vincze</last></author>
@@ -1055,7 +1055,7 @@
     </paper>
     <paper id="3">
       <title>Towards a Better Exploitation of the Brown ‘Family’ Corpora in Diachronic Studies of <fixed-case>B</fixed-case>ritish and <fixed-case>A</fixed-case>merican <fixed-case>E</fixed-case>nglish Language Varieties</title>
-      <author><first>Sanja</first><last>Štajner</last></author>
+      <author id="sanja-stajner"><first>Sanja</first><last>Štajner</last></author>
       <pages>17–24</pages>
       <url hash="7c33e617">R11-2003</url>
       <bibkey>stajner-2011-towards</bibkey>
@@ -1081,7 +1081,7 @@
     <paper id="6">
       <title>Inter-domain Opinion Phrase Extraction Based on Feature Augmentation</title>
       <author><first>Gábor</first><last>Berend</last></author>
-      <author><first>István T.</first><last>Nagy</last></author>
+      <author id="istvan-nagy-t"><first>István T.</first><last>Nagy</last></author>
       <author><first>György</first><last>Móra</last></author>
       <author><first>Veronika</first><last>Vincze</last></author>
       <pages>41–47</pages>
@@ -1133,7 +1133,7 @@
     </paper>
     <paper id="13">
       <title>Initial Experiments with Multilingual Extraction of Rhetoric Figures by means of <fixed-case>PERL</fixed-case>-compatible Regular Expressions</title>
-      <author><first>Daniel Devatman</first><last>Hromada</last></author>
+      <author id="daniel-hromada"><first>Daniel Devatman</first><last>Hromada</last></author>
       <pages>85–90</pages>
       <url hash="4e39306e">R11-2013</url>
       <bibkey>hromada-2011-initial</bibkey>
@@ -1149,7 +1149,7 @@
     </paper>
     <paper id="15">
       <title>Heterogeneous Natural Language Processing Tools via Language Processing Chains</title>
-      <author><first>Diman</first><last>Karagiozov</last></author>
+      <author id="diman-karagyozov"><first>Diman</first><last>Karagiozov</last></author>
       <pages>97–102</pages>
       <url hash="06fee5d8">R11-2015</url>
       <bibkey>karagiozov-2011-heterogeneous</bibkey>
@@ -1179,7 +1179,7 @@
     </paper>
     <paper id="19">
       <title>Automatic Acquisition of Possible Contexts for Low-Frequent Words</title>
-      <author><first>Silvia</first><last>Necsulescu</last></author>
+      <author id="silvia-necsulescu"><first>Silvia</first><last>Necsulescu</last></author>
       <pages>121–126</pages>
       <url hash="c52ee011">R11-2019</url>
       <bibkey>necsulescu-2011-automatic</bibkey>
@@ -1193,7 +1193,7 @@
     </paper>
     <paper id="21">
       <title>Towards Cross-Language Word Sense Disambiguation for <fixed-case>Q</fixed-case>uechua</title>
-      <author><first>Alex</first><last>Rudnick</last></author>
+      <author id="alex-rudnick"><first>Alex</first><last>Rudnick</last></author>
       <pages>133–138</pages>
       <url hash="bb2eb1a5">R11-2021</url>
       <bibkey>rudnick-2011-towards</bibkey>
@@ -1201,7 +1201,7 @@
     <paper id="22">
       <title>Annotating Negation and Speculation: the Case of the Review Domain</title>
       <author><first>Natalia</first><last>Konstantinova</last></author>
-      <author><first>Sheila</first><last>C. M. de Sousa</last></author>
+      <author id="sheila-c-m-de-sousa"><first>Sheila</first><last>C. M. de Sousa</last></author>
       <pages>139–144</pages>
       <url hash="f8036bc2">R11-2022</url>
       <bibkey>konstantinova-c-m-de-sousa-2011-annotating</bibkey>
diff --git a/data/xml/R13.xml b/data/xml/R13.xml
index 6dd39520bc..d641d549ee 100644
--- a/data/xml/R13.xml
+++ b/data/xml/R13.xml
@@ -4,9 +4,9 @@
     <meta>
       <booktitle>Proceedings of the International Conference Recent Advances in Natural Language Processing <fixed-case>RANLP</fixed-case> 2013</booktitle>
       <url hash="1d184be2">R13-1</url>
-      <editor><first>Ruslan</first><last>Mitkov</last></editor>
+      <editor id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></editor>
       <editor><first>Galia</first><last>Angelova</last></editor>
-      <editor><first>Kalina</first><last>Bontcheva</last></editor>
+      <editor id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></editor>
       <publisher>INCOMA Ltd. Shoumen, BULGARIA</publisher>
       <address>Hissar, Bulgaria</address>
       <month>September</month>
@@ -20,8 +20,8 @@
     <paper id="1">
       <title><fixed-case>ASMA</fixed-case>: A System for Automatic Segmentation and Morpho-Syntactic Disambiguation of <fixed-case>M</fixed-case>odern <fixed-case>S</fixed-case>tandard <fixed-case>A</fixed-case>rabic</title>
       <author><first>Muhammad</first><last>Abdul-Mageed</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <pages>1–8</pages>
       <url hash="42f90823">R13-1001</url>
       <bibkey>abdul-mageed-etal-2013-asma</bibkey>
@@ -39,7 +39,7 @@
       <author><first>Tanveer</first><last>Ali</last></author>
       <author><first>Marina</first><last>Sokolova</last></author>
       <author><first>David</first><last>Schramm</last></author>
-      <author><first>Diana</first><last>Inkpen</last></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last></author>
       <pages>18–24</pages>
       <url hash="ccf674dc">R13-1003</url>
       <bibkey>ali-etal-2013-opinion</bibkey>
@@ -56,8 +56,8 @@
     <paper id="5">
       <title>A Semi-supervised Learning Approach to <fixed-case>A</fixed-case>rabic Named Entity Recognition</title>
       <author><first>Maha</first><last>Althobaiti</last></author>
-      <author><first>Udo</first><last>Kruschwitz</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="udo-kruschwitz"><first>Udo</first><last>Kruschwitz</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>32–40</pages>
       <url hash="6ad00d46">R13-1005</url>
       <bibkey>althobaiti-etal-2013-semi</bibkey>
@@ -76,7 +76,7 @@
     </paper>
     <paper id="7">
       <title>Improving Sentiment Analysis in <fixed-case>T</fixed-case>witter Using Multilingual Machine Translated Data</title>
-      <author><first>Alexandra</first><last>Balahur</last></author>
+      <author id="alexandra-balahur"><first>Alexandra</first><last>Balahur</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <pages>49–55</pages>
       <url hash="85d5bee2">R13-1007</url>
@@ -86,7 +86,7 @@
       <title>Domain Adaptation for Parsing</title>
       <author><first>Eric</first><last>Baucom</last></author>
       <author><first>Levi</first><last>King</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <pages>56–64</pages>
       <url hash="236b496f">R13-1008</url>
       <bibkey>baucom-etal-2013-domain</bibkey>
@@ -112,9 +112,9 @@
     <paper id="11">
       <title><fixed-case>T</fixed-case>wit<fixed-case>IE</fixed-case>: An Open-Source Information Extraction Pipeline for Microblog Text</title>
       <author><first>Kalina</first><last>Bontcheva</last></author>
-      <author><first>Leon</first><last>Derczynski</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
       <author><first>Adam</first><last>Funk</last></author>
-      <author><first>Mark</first><last>Greenwood</last></author>
+      <author id="mark-a-greenwood"><first>Mark</first><last>Greenwood</last></author>
       <author><first>Diana</first><last>Maynard</last></author>
       <author><first>Niraj</first><last>Aswani</last></author>
       <pages>83–90</pages>
@@ -123,7 +123,7 @@
     </paper>
     <paper id="12">
       <title>A unified lexical processing framework based on the Margin Infused Relaxed Algorithm. A case study on the <fixed-case>R</fixed-case>omanian Language</title>
-      <author><first>Tiberiu</first><last>Boroș</last></author>
+      <author id="tiberiu-boros"><first>Tiberiu</first><last>Boroș</last></author>
       <pages>91–97</pages>
       <url hash="d29f1f61">R13-1012</url>
       <bibkey>boros-2013-unified</bibkey>
@@ -149,8 +149,8 @@
     <paper id="15">
       <title>Recognising and Interpreting Named Temporal Expressions</title>
       <author><first>Matteo</first><last>Brucato</last></author>
-      <author><first>Leon</first><last>Derczynski</last></author>
-      <author><first>Hector</first><last>Llorens</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
+      <author id="hector-llorens"><first>Hector</first><last>Llorens</last></author>
       <author><first>Kalina</first><last>Bontcheva</last></author>
       <author><first>Christian S.</first><last>Jensen</last></author>
       <pages>113–121</pages>
@@ -175,9 +175,9 @@
     </paper>
     <paper id="18">
       <title>Temporal Text Classification for <fixed-case>R</fixed-case>omanian Novels set in the Past</title>
-      <author><first>Alina Maria</first><last>Ciobanu</last></author>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
-      <author><first>Octavia-Maria</first><last>Şulea</last></author>
+      <author id="alina-maria-ciobanu"><first>Alina Maria</first><last>Ciobanu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="octavia-maria-sulea"><first>Octavia-Maria</first><last>Şulea</last></author>
       <author><first>Anca</first><last>Dinu</last></author>
       <author><first>Vlad</first><last>Niculae</last></author>
       <pages>136–140</pages>
@@ -186,8 +186,8 @@
     </paper>
     <paper id="19">
       <title>A Dictionary-Based Approach for Evaluating Orthographic Methods in Cognates Identification</title>
-      <author><first>Alina Maria</first><last>Ciobanu</last></author>
-      <author><first>Liviu Petrisor</first><last>Dinu</last></author>
+      <author id="alina-maria-ciobanu"><first>Alina Maria</first><last>Ciobanu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu Petrisor</first><last>Dinu</last></author>
       <pages>141–147</pages>
       <url hash="52d7d60d">R13-1019</url>
       <bibkey>ciobanu-dinu-2013-dictionary</bibkey>
@@ -202,7 +202,7 @@
     </paper>
     <paper id="21">
       <title>Semantic Relations between Events and their Time, Locations and Participants for Event Coreference Resolution</title>
-      <author><first>Agata</first><last>Cybulska</last></author>
+      <author id="agata-cybulska"><first>Agata</first><last>Cybulska</last></author>
       <author><first>Piek</first><last>Vossen</last></author>
       <pages>156–163</pages>
       <url hash="0fc8e843">R13-1021</url>
@@ -211,9 +211,9 @@
     <paper id="22">
       <title>Sense Clustering Using <fixed-case>W</fixed-case>ikipedia</title>
       <author><first>Bharath</first><last>Dandala</last></author>
-      <author><first>Chris</first><last>Hokamp</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
-      <author><first>Razvan</first><last>Bunescu</last></author>
+      <author id="chris-hokamp"><first>Chris</first><last>Hokamp</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
+      <author id="razvan-bunescu"><first>Razvan</first><last>Bunescu</last></author>
       <pages>164–171</pages>
       <url hash="95da7e12">R13-1022</url>
       <bibkey>dandala-etal-2013-sense</bibkey>
@@ -228,11 +228,11 @@
     </paper>
     <paper id="24">
       <title>Normalization of <fixed-case>D</fixed-case>utch User-Generated Content</title>
-      <author><first>Orphée</first><last>De Clercq</last></author>
+      <author id="orphee-de-clercq"><first>Orphée</first><last>De Clercq</last></author>
       <author><first>Sarah</first><last>Schulz</last></author>
       <author><first>Bart</first><last>Desmet</last></author>
       <author><first>Els</first><last>Lefever</last></author>
-      <author><first>Véronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Véronique</first><last>Hoste</last></author>
       <pages>179–188</pages>
       <url hash="cff40aed">R13-1024</url>
       <bibkey>de-clercq-etal-2013-normalization</bibkey>
@@ -240,7 +240,7 @@
     <paper id="25">
       <title>Linguistic Profiling of Texts Across Textual Genres and Readability Levels. An Exploratory Study on <fixed-case>I</fixed-case>talian Fictional Prose</title>
       <author><first>Felice</first><last>Dell’Orletta</last></author>
-      <author><first>Simonetta</first><last>Montemagni</last></author>
+      <author id="simonetta-montemagni"><first>Simonetta</first><last>Montemagni</last></author>
       <author><first>Giulia</first><last>Venturi</last></author>
       <pages>189–197</pages>
       <url hash="3a308568">R13-1025</url>
@@ -248,7 +248,7 @@
     </paper>
     <paper id="26">
       <title><fixed-case>T</fixed-case>witter Part-of-Speech Tagging for All: Overcoming Sparse and Noisy Data</title>
-      <author><first>Leon</first><last>Derczynski</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
       <author><first>Alan</first><last>Ritter</last></author>
       <author><first>Sam</first><last>Clark</last></author>
       <author><first>Kalina</first><last>Bontcheva</last></author>
@@ -268,8 +268,8 @@
     </paper>
     <paper id="28">
       <title>Sequence Tagging for Verb Conjugation in <fixed-case>R</fixed-case>omanian</title>
-      <author><first>Liviu</first><last>Dinu</last></author>
-      <author><first>Octavia-Maria</first><last>Şulea</last></author>
+      <author id="liviu-p-dinu"><first>Liviu</first><last>Dinu</last></author>
+      <author id="octavia-maria-sulea"><first>Octavia-Maria</first><last>Şulea</last></author>
       <author><first>Vlad</first><last>Niculae</last></author>
       <pages>215–220</pages>
       <url hash="75bbc1e8">R13-1028</url>
@@ -278,17 +278,17 @@
     <paper id="29">
       <title>A Tagging Approach to Identify Complex Constituents for Text Simplification</title>
       <author><first>Iustin</first><last>Dornescu</last></author>
-      <author><first>Richard</first><last>Evans</last></author>
-      <author><first>Constantin</first><last>Orăsan</last></author>
+      <author id="richard-evans"><first>Richard</first><last>Evans</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orăsan</last></author>
       <pages>221–229</pages>
       <url hash="f282d19a">R13-1029</url>
       <bibkey>dornescu-etal-2013-tagging</bibkey>
     </paper>
     <paper id="30">
       <title>Automatic Evaluation Metric for Machine Translation that is Independent of Sentence Length</title>
-      <author><first>Hiroshi</first><last>Echizen’ya</last></author>
-      <author><first>Kenji</first><last>Araki</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="hiroshi-echizen-ya"><first>Hiroshi</first><last>Echizen’ya</last></author>
+      <author id="kenji-araki"><first>Kenji</first><last>Araki</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>230–236</pages>
       <url hash="97c3e995">R13-1030</url>
       <bibkey>echizenya-etal-2013-automatic</bibkey>
@@ -296,18 +296,18 @@
     <paper id="31">
       <title>Acronym recognition and processing in 22 languages</title>
       <author><first>Maud</first><last>Ehrmann</last></author>
-      <author><first>Leonida</first><last>Della Rocca</last></author>
+      <author id="leonida-della-rocca"><first>Leonida</first><last>Della Rocca</last></author>
       <author><first>Ralf</first><last>Steinberger</last></author>
-      <author><first>Hristo</first><last>Tannev</last></author>
+      <author id="hristo-tanev"><first>Hristo</first><last>Tannev</last></author>
       <pages>237–244</pages>
       <url hash="16300b6a">R13-1031</url>
       <bibkey>ehrmann-etal-2013-acronym</bibkey>
     </paper>
     <paper id="32">
       <title>An Evaluation Summary Method Based on a Combination of Content and Linguistic Metrics</title>
-      <author><first>Samira</first><last>Ellouze</last></author>
+      <author id="samira-ellouze"><first>Samira</first><last>Ellouze</last></author>
       <author><first>Maher</first><last>Jaoua</last></author>
-      <author><first>Lamia</first><last>Hadrich Belguith</last></author>
+      <author id="lamia-hadrich-belguith"><first>Lamia</first><last>Hadrich Belguith</last></author>
       <pages>245–251</pages>
       <url hash="42bf178e">R13-1032</url>
       <bibkey>ellouze-etal-2013-evaluation-summary</bibkey>
@@ -323,8 +323,8 @@
     </paper>
     <paper id="34">
       <title>Temporal Relation Classification in <fixed-case>P</fixed-case>ersian and <fixed-case>E</fixed-case>nglish contexts</title>
-      <author><first>Mahbaneh</first><last>Eshaghzadeh Torbati</last></author>
-      <author><first>Gholamreza</first><last>Ghassem-sani</last></author>
+      <author id="mahbaneh-eshaghzadeh-torbati"><first>Mahbaneh</first><last>Eshaghzadeh Torbati</last></author>
+      <author id="gholamreza-ghassem-sani"><first>Gholamreza</first><last>Ghassem-sani</last></author>
       <author><first>Seyed Abolghasem</first><last>Mirroshandel</last></author>
       <author><first>Yadollah</first><last>Yaghoobzadeh</last></author>
       <author><first>Negin</first><last>Karimi Hosseini</last></author>
@@ -334,7 +334,7 @@
     </paper>
     <paper id="35">
       <title>The Extended Lexicon: Language Processing as Lexical Description</title>
-      <author><first>Roger</first><last>Evans</last></author>
+      <author id="roger-evans"><first>Roger</first><last>Evans</last></author>
       <pages>270–276</pages>
       <url hash="af1a9649">R13-1035</url>
       <bibkey>evans-2013-extended</bibkey>
@@ -342,7 +342,7 @@
     <paper id="36">
       <title>Did <fixed-case>I</fixed-case> really mean that? Applying automatic summarisation techniques to formative feedback</title>
       <author><first>Debora</first><last>Field</last></author>
-      <author><first>Stephen</first><last>Pulman</last></author>
+      <author id="stephen-pulman"><first>Stephen</first><last>Pulman</last></author>
       <author><first>Nicolas</first><last>Van Labeke</last></author>
       <author><first>Denise</first><last>Whitelock</last></author>
       <author><first>John</first><last>Richardson</last></author>
@@ -354,7 +354,7 @@
       <title>Matching sets of parse trees for answering multi-sentence questions</title>
       <author><first>Boris</first><last>Galitsky</last></author>
       <author><first>Dmitry</first><last>Ilvovsky</last></author>
-      <author><first>Sergei O.</first><last>Kuznetsov</last></author>
+      <author id="sergey-o-kuznetsov"><first>Sergei O.</first><last>Kuznetsov</last></author>
       <author><first>Fedor</first><last>Strok</last></author>
       <pages>285–293</pages>
       <url hash="8f1cb1fa">R13-1037</url>
@@ -419,7 +419,7 @@
     <paper id="45">
       <title>Unsupervised Induction of <fixed-case>A</fixed-case>rabic Root and Pattern Lexicons using Machine Learning</title>
       <author><first>Bilal</first><last>Khaliq</last></author>
-      <author><first>John</first><last>Carroll</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
       <pages>350–356</pages>
       <url hash="0d0b501f">R13-1045</url>
       <bibkey>khaliq-carroll-2013-unsupervised</bibkey>
@@ -428,7 +428,7 @@
       <title>Towards Domain Adaptation for Parsing Web Data</title>
       <author><first>Mohammad</first><last>Khan</last></author>
       <author><first>Markus</first><last>Dickinson</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <pages>357–364</pages>
       <url hash="b52e197d">R13-1046</url>
       <bibkey>khan-etal-2013-towards</bibkey>
@@ -436,7 +436,7 @@
     <paper id="47">
       <title>Capturing Anomalies in the Choice of Content Words in Compositional Distributional Semantic Space</title>
       <author><first>Ekaterina</first><last>Kochmar</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <pages>365–372</pages>
       <url hash="1e029f92">R13-1047</url>
       <bibkey>kochmar-briscoe-2013-capturing</bibkey>
@@ -468,7 +468,7 @@
     <paper id="51">
       <title>Confidence Estimation for Knowledge Base Population</title>
       <author><first>Xiang</first><last>Li</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <pages>396–401</pages>
       <url hash="b653bb95">R13-1051</url>
       <bibkey>li-grishman-2013-confidence</bibkey>
@@ -477,8 +477,8 @@
       <title>Towards Fine-grained Citation Function Classification</title>
       <author><first>Xiang</first><last>Li</last></author>
       <author><first>Yifan</first><last>He</last></author>
-      <author><first>Adam</first><last>Meyers</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="adam-meyers"><first>Adam</first><last>Meyers</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <pages>402–407</pages>
       <url hash="57db5507">R13-1052</url>
       <bibkey>li-etal-2013-towards</bibkey>
@@ -534,7 +534,7 @@
       <author><first>Marek</first><last>Maziarz</last></author>
       <author><first>Maciej</first><last>Piasecki</last></author>
       <author><first>Ewa</first><last>Rudnicka</last></author>
-      <author><first>Stan</first><last>Szpakowicz</last></author>
+      <author id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></author>
       <pages>443–452</pages>
       <url hash="b4e58a4b">R13-1058</url>
       <bibkey>maziarz-etal-2013-beyond</bibkey>
@@ -542,14 +542,14 @@
     <paper id="59">
       <title>History Based Unsupervised Data Oriented Parsing</title>
       <author><first>Mohsen</first><last>Mesgar</last></author>
-      <author><first>Gholamreza</first><last>Ghasem-Sani</last></author>
+      <author id="gholamreza-ghassem-sani"><first>Gholamreza</first><last>Ghasem-Sani</last></author>
       <pages>453–459</pages>
       <url hash="dd96269a">R13-1059</url>
       <bibkey>mesgar-ghasem-sani-2013-history</bibkey>
     </paper>
     <paper id="60">
       <title>Contrasting and Corroborating Citations in Journal Articles</title>
-      <author><first>Adam</first><last>Meyers</last></author>
+      <author id="adam-meyers"><first>Adam</first><last>Meyers</last></author>
       <pages>460–466</pages>
       <url hash="37b63550">R13-1060</url>
       <bibkey>meyers-2013-contrasting</bibkey>
@@ -557,7 +557,7 @@
     <paper id="61">
       <title><fixed-case>CCG</fixed-case> Categories for Distributional Semantic Models</title>
       <author><first>Paramita</first><last>Mirza</last></author>
-      <author><first>Raffaella</first><last>Bernardi</last></author>
+      <author id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last></author>
       <pages>467–474</pages>
       <url hash="c7e7edbd">R13-1061</url>
       <bibkey>mirza-bernardi-2013-ccg</bibkey>
@@ -584,26 +584,26 @@
     <paper id="64">
       <title>Improving Web 2.0 Opinion Mining Systems Using Text Normalisation Techniques</title>
       <author><first>Alejandro</first><last>Mosquera</last></author>
-      <author><first>Paloma</first><last>Moreda Pozo</last></author>
+      <author id="paloma-moreda-pozo"><first>Paloma</first><last>Moreda Pozo</last></author>
       <pages>491–495</pages>
       <url hash="642a1997">R13-1064</url>
       <bibkey>mosquera-moreda-pozo-2013-improving</bibkey>
     </paper>
     <paper id="65">
       <title>Identifying Social and Expressive Factors in Request Texts Using Transaction/Sequence Model</title>
-      <author><first>Daša</first><last>Munková</last></author>
+      <author id="dasa-munkova"><first>Daša</first><last>Munková</last></author>
       <author><first>Michal</first><last>Munk</last></author>
-      <author><first>Zuzana</first><last>Fráterová</last></author>
+      <author id="zuzana-fraterova"><first>Zuzana</first><last>Fráterová</last></author>
       <pages>496–503</pages>
       <url hash="0f69cf61">R13-1065</url>
       <bibkey>munkova-etal-2013-identifying</bibkey>
     </paper>
     <paper id="66">
       <title>Parameter Optimization for Statistical Machine Translation: It Pays to Learn from Hard Examples</title>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Fahad</first><last>Al Obaidli</last></author>
-      <author><first>Francisco</first><last>Guzmán</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzmán</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>504–510</pages>
       <url hash="7a6fdc31">R13-1066</url>
       <bibkey>nakov-etal-2013-parameter</bibkey>
@@ -620,7 +620,7 @@
     <paper id="68">
       <title>High-Accuracy Phrase Translation Acquisition Through Battle-Royale Selection</title>
       <author><first>Lionel</first><last>Nicolas</last></author>
-      <author><first>Egon W.</first><last>Stemle</last></author>
+      <author id="egon-stemle"><first>Egon W.</first><last>Stemle</last></author>
       <author><first>Klara</first><last>Kranebitter</last></author>
       <author><first>Verena</first><last>Lyding</last></author>
       <pages>516–524</pages>
@@ -639,7 +639,7 @@
     <paper id="70">
       <title>A clustering approach for translationese identification</title>
       <author><first>Sergiu</first><last>Nisioi</last></author>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <pages>532–538</pages>
       <url hash="ab35ec49">R13-1070</url>
       <bibkey>nisioi-dinu-2013-clustering</bibkey>
@@ -663,7 +663,7 @@
     <paper id="73">
       <title>Information Spreading in Expanding <fixed-case>W</fixed-case>ordnet Hypernymy Structure</title>
       <author><first>Maciej</first><last>Piasecki</last></author>
-      <author><first>Radosław</first><last>Ramocki</last></author>
+      <author id="radoslaw-ramocki"><first>Radosław</first><last>Ramocki</last></author>
       <author><first>Michał</first><last>Kaliński</last></author>
       <pages>553–561</pages>
       <url hash="be1780c7">R13-1073</url>
@@ -671,7 +671,7 @@
     </paper>
     <paper id="74">
       <title>Context Independent Term Mapper for <fixed-case>E</fixed-case>uropean Languages</title>
-      <author><first>Mārcis</first><last>Pinnis</last></author>
+      <author id="marcis-pinnis"><first>Mārcis</first><last>Pinnis</last></author>
       <pages>562–570</pages>
       <url hash="87d442b5">R13-1074</url>
       <bibkey>pinnis-2013-context</bibkey>
@@ -704,7 +704,7 @@
       <title>A Combined Pattern-based and Distributional Approach for Automatic Hypernym Detection in <fixed-case>D</fixed-case>utch.</title>
       <author><first>Gwendolijn</first><last>Schropp</last></author>
       <author><first>Els</first><last>Lefever</last></author>
-      <author><first>Véronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Véronique</first><last>Hoste</last></author>
       <pages>593–600</pages>
       <url hash="b81a491f">R13-1078</url>
       <bibkey>schropp-etal-2013-combined</bibkey>
@@ -730,7 +730,7 @@
       <title>A New Approach to the <fixed-case>POS</fixed-case> Tagging Problem Using Evolutionary Computation</title>
       <author><first>Ana Paula</first><last>Silva</last></author>
       <author><first>Arlindo</first><last>Silva</last></author>
-      <author><first>Irene</first><last>Rodrigues</last></author>
+      <author id="irene-rodrigues"><first>Irene</first><last>Rodrigues</last></author>
       <pages>619–625</pages>
       <url hash="91972196">R13-1081</url>
       <bibkey>silva-etal-2013-new</bibkey>
@@ -758,7 +758,7 @@
       <author><first>Giovanni</first><last>Stilo</last></author>
       <author><first>Moreno</first><last>De Vincenzi</last></author>
       <author><first>Alberto E.</first><last>Tozzi</last></author>
-      <author><first>Paola</first><last>Velardi</last></author>
+      <author id="paola-velardi"><first>Paola</first><last>Velardi</last></author>
       <pages>640–648</pages>
       <url hash="ee714fc9">R13-1084</url>
       <bibkey>stilo-etal-2013-automated</bibkey>
@@ -774,9 +774,9 @@
     <paper id="86">
       <title>Measuring Closure Properties of Patent Sublanguages</title>
       <author><first>Irina</first><last>Temnikova</last></author>
-      <author><first>Negacy</first><last>Hailu</last></author>
+      <author id="negacy-hailu"><first>Negacy</first><last>Hailu</last></author>
       <author><first>Galia</first><last>Angelova</last></author>
-      <author><first>K. Bretonnel</first><last>Cohen</last></author>
+      <author id="k-bretonnel-cohen"><first>K. Bretonnel</first><last>Cohen</last></author>
       <pages>659–666</pages>
       <url hash="bf5a1217">R13-1086</url>
       <bibkey>temnikova-etal-2013-measuring</bibkey>
@@ -785,17 +785,17 @@
       <title>Closure Properties of <fixed-case>B</fixed-case>ulgarian Clinical Text</title>
       <author><first>Irina</first><last>Temnikova</last></author>
       <author><first>Ivelina</first><last>Nikolova</last></author>
-      <author><first>William A.</first><last>Baumgartner</last></author>
+      <author id="william-a-baumgartner-jr"><first>William A.</first><last>Baumgartner</last></author>
       <author><first>Galia</first><last>Angelova</last></author>
-      <author><first>K. Bretonnel</first><last>Cohen</last></author>
+      <author id="k-bretonnel-cohen"><first>K. Bretonnel</first><last>Cohen</last></author>
       <pages>667–675</pages>
       <url hash="9682d2f6">R13-1087</url>
       <bibkey>temnikova-etal-2013-closure</bibkey>
     </paper>
     <paper id="88">
       <title>Analyzing the Use of Character-Level Translation with Sparse and Noisy Datasets</title>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>676–684</pages>
       <url hash="1da0c534">R13-1088</url>
       <bibkey>tiedemann-nakov-2013-analyzing</bibkey>
@@ -812,20 +812,20 @@
     </paper>
     <paper id="90">
       <title>Introducing a Corpus of Human-Authored Dialogue Summaries in <fixed-case>P</fixed-case>ortuguese</title>
-      <author><first>Norton</first><last>Trevisan Roman</last></author>
+      <author id="norton-trevisan-roman"><first>Norton</first><last>Trevisan Roman</last></author>
       <author><first>Paul</first><last>Piwek</last></author>
-      <author><first>Ariadne</first><last>M. B. Rizzoni Carvalho</last></author>
-      <author><first>Alexandre</first><last>Rossi Alvares</last></author>
+      <author id="ariadne-m-b-rizzoni-carvalho"><first>Ariadne</first><last>M. B. Rizzoni Carvalho</last></author>
+      <author id="alexandre-rossi-alvares"><first>Alexandre</first><last>Rossi Alvares</last></author>
       <pages>692–701</pages>
       <url hash="361b1f2a">R13-1090</url>
       <bibkey>trevisan-roman-etal-2013-introducing</bibkey>
     </paper>
     <paper id="91">
       <title><fixed-case>W</fixed-case>ikipedia as an <fixed-case>SMT</fixed-case> Training Corpus</title>
-      <author><first>Dan</first><last>Tufiș</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufiș</last></author>
       <author><first>Radu</first><last>Ion</last></author>
-      <author><first>Ștefan</first><last>Dumitrescu</last></author>
-      <author><first>Dan</first><last>Ștefănescu</last></author>
+      <author id="stefan-daniel-dumitrescu"><first>Ștefan</first><last>Dumitrescu</last></author>
+      <author id="dan-stefanescu"><first>Dan</first><last>Ștefănescu</last></author>
       <pages>702–709</pages>
       <url hash="858ac8f7">R13-1091</url>
       <bibkey>tufis-etal-2013-wikipedia</bibkey>
@@ -833,7 +833,7 @@
     <paper id="92">
       <title><fixed-case>D</fixed-case>utch<fixed-case>S</fixed-case>em<fixed-case>C</fixed-case>or: in quest of the ideal sense-tagged corpus</title>
       <author><first>Piek</first><last>Vossen</last></author>
-      <author><first>Rubén</first><last>Izquierdo</last></author>
+      <author id="ruben-izquierdo"><first>Rubén</first><last>Izquierdo</last></author>
       <author><first>Attila</first><last>Görög</last></author>
       <pages>710–718</pages>
       <url hash="693e0f04">R13-1092</url>
@@ -843,7 +843,7 @@
       <title>Towards detecting anomalies in the content of standardized <fixed-case>LMF</fixed-case> dictionaries</title>
       <author><first>Wafa</first><last>Wali</last></author>
       <author><first>Bilel</first><last>Gargouri</last></author>
-      <author><first>Abdelmajid</first><last>Ben Hamadou</last></author>
+      <author id="abdelmajid-ben-hamadou"><first>Abdelmajid</first><last>Ben Hamadou</last></author>
       <pages>719–726</pages>
       <url hash="918349ee">R13-1093</url>
       <bibkey>wali-etal-2013-towards</bibkey>
@@ -882,7 +882,7 @@
     <paper id="97">
       <title>Machine Learning for Mention Head Detection in Multilingual Coreference Resolution</title>
       <author><first>Desislava</first><last>Zhekova</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <pages>747–754</pages>
       <url hash="c26bdf6c">R13-1097</url>
       <bibkey>zhekova-kubler-2013-machine</bibkey>
@@ -891,7 +891,7 @@
       <title>Combining <fixed-case>POS</fixed-case> Tagging, Dependency Parsing and Coreferential Resolution for <fixed-case>B</fixed-case>ulgarian</title>
       <author><first>Valentin</first><last>Zhikov</last></author>
       <author><first>Georgi</first><last>Georgiev</last></author>
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <author><first>Petya</first><last>Osenova</last></author>
       <pages>755–762</pages>
       <url hash="e799fa0f">R13-1098</url>
@@ -901,7 +901,7 @@
       <title>magyarlanc: A Tool for Morphological and Dependency Parsing of <fixed-case>H</fixed-case>ungarian</title>
       <author><first>János</first><last>Zsibrita</last></author>
       <author><first>Veronika</first><last>Vincze</last></author>
-      <author><first>Richárd</first><last>Farkas</last></author>
+      <author id="richard-farkas"><first>Richárd</first><last>Farkas</last></author>
       <pages>763–771</pages>
       <url hash="3756b5c9">R13-1099</url>
       <bibkey>zsibrita-etal-2013-magyarlanc</bibkey>
@@ -969,7 +969,7 @@
     </paper>
     <paper id="7">
       <title>Detecting Negated and Uncertain Information in Biomedical and Review Texts</title>
-      <author><first>Noa P.</first><last>Cruz Díaz</last></author>
+      <author id="noa-p-cruz-diaz"><first>Noa P.</first><last>Cruz Díaz</last></author>
       <pages>45–50</pages>
       <url hash="28c5ad03">R13-2007</url>
       <bibkey>cruz-diaz-2013-detecting</bibkey>
@@ -990,7 +990,7 @@
     </paper>
     <paper id="10">
       <title>Towards Definition Extraction Using Conditional Random Fields</title>
-      <author><first>Luis</first><last>Espinosa Anke</last></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa Anke</last></author>
       <pages>63–70</pages>
       <url hash="226d80f7">R13-2010</url>
       <bibkey>espinosa-anke-2013-towards</bibkey>
@@ -998,14 +998,14 @@
     <paper id="11">
       <title>Event-Centered Simplification of News Stories</title>
       <author><first>Goran</first><last>Glavaš</last></author>
-      <author><first>Sanja</first><last>Štajner</last></author>
+      <author id="sanja-stajner"><first>Sanja</first><last>Štajner</last></author>
       <pages>71–78</pages>
       <url hash="f5f3fa6b">R13-2011</url>
       <bibkey>glavas-stajner-2013-event</bibkey>
     </paper>
     <paper id="12">
       <title>Random Projection and Geometrization of String Distance Metrics</title>
-      <author><first>Daniel</first><last>Hromada</last></author>
+      <author id="daniel-hromada"><first>Daniel</first><last>Hromada</last></author>
       <pages>79–85</pages>
       <url hash="fd468821">R13-2012</url>
       <bibkey>hromada-2013-random</bibkey>
diff --git a/data/xml/R15.xml b/data/xml/R15.xml
index 15dee4540d..b9a6aae7b9 100644
--- a/data/xml/R15.xml
+++ b/data/xml/R15.xml
@@ -4,9 +4,9 @@
     <meta>
       <booktitle>Proceedings of the International Conference Recent Advances in Natural Language Processing</booktitle>
       <url hash="690bbedc">R15-1</url>
-      <editor><first>Ruslan</first><last>Mitkov</last></editor>
+      <editor id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></editor>
       <editor><first>Galia</first><last>Angelova</last></editor>
-      <editor><first>Kalina</first><last>Bontcheva</last></editor>
+      <editor id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></editor>
       <publisher>INCOMA Ltd. Shoumen, BULGARIA</publisher>
       <address>Hissar, Bulgaria</address>
       <month>September</month>
@@ -38,7 +38,7 @@
       <title>Automatic Construction of a <fixed-case>TMF</fixed-case> Terminological Database using a Transducer Cascade</title>
       <author><first>Chihebeddine</first><last>Ammar</last></author>
       <author><first>Kais</first><last>Haddar</last></author>
-      <author><first>Laurent</first><last>Romary</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
       <pages>17–23</pages>
       <url hash="12c98288">R15-1003</url>
       <bibkey>ammar-etal-2015-automatic</bibkey>
@@ -55,7 +55,7 @@
     <paper id="5">
       <title>Predicting the quality of questions on <fixed-case>S</fixed-case>tackoverflow</title>
       <author><first>Antoaneta</first><last>Baltadzhieva</last></author>
-      <author><first>Grzegorz</first><last>Chrupała</last></author>
+      <author id="grzegorz-chrupala"><first>Grzegorz</first><last>Chrupała</last></author>
       <pages>32–40</pages>
       <url hash="1f9ec96a">R15-1005</url>
       <bibkey>baltadzhieva-chrupala-2015-predicting</bibkey>
@@ -81,7 +81,7 @@
     </paper>
     <paper id="8">
       <title><fixed-case>D</fixed-case>an<fixed-case>P</fixed-case>roof: Pedagogical Spell and Grammar Checking for <fixed-case>D</fixed-case>anish</title>
-      <author><first>Eckhard</first><last>Bick</last></author>
+      <author id="eckhard-bick"><first>Eckhard</first><last>Bick</last></author>
       <pages>55–62</pages>
       <url hash="b7f938ba">R15-1008</url>
       <bibkey>bick-2015-danproof</bibkey>
@@ -98,7 +98,7 @@
       <author><first>Kai</first><last>Cao</last></author>
       <author><first>Xiang</first><last>Li</last></author>
       <author><first>Miao</first><last>Fan</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <pages>72–77</pages>
       <url hash="8479410f">R15-1010</url>
       <bibkey>cao-etal-2015-improving</bibkey>
@@ -107,7 +107,7 @@
       <title>Improving Event Detection with Dependency Regularization</title>
       <author><first>Kai</first><last>Cao</last></author>
       <author><first>Xiang</first><last>Li</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <pages>78–83</pages>
       <url hash="0cbf1ad0">R15-1011</url>
       <bibkey>cao-etal-2015-improving-event</bibkey>
@@ -117,7 +117,7 @@
       <author><first>Daniel</first><last>Castro Castro</last></author>
       <author><first>Yaritza</first><last>Adame Arcia</last></author>
       <author><first>María</first><last>Pelaez Brioso</last></author>
-      <author><first>Rafael</first><last>Muñoz Guillena</last></author>
+      <author id="rafael-munoz"><first>Rafael</first><last>Muñoz Guillena</last></author>
       <pages>84–90</pages>
       <url hash="188f7fc7">R15-1012</url>
       <bibkey>castro-castro-etal-2015-authorship</bibkey>
@@ -125,15 +125,15 @@
     <paper id="13">
       <title>Coreference Resolution to Support <fixed-case>IE</fixed-case> from <fixed-case>I</fixed-case>ndian Classical Music Forums</title>
       <author><first>Joe</first><last>Cheri</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>91–96</pages>
       <url hash="ff38c20b">R15-1013</url>
       <bibkey>cheri-bhattacharyya-2015-coreference</bibkey>
     </paper>
     <paper id="14">
       <title>Readability Assessment of Translated Texts</title>
-      <author><first>Alina Maria</first><last>Ciobanu</last></author>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="alina-maria-ciobanu"><first>Alina Maria</first><last>Ciobanu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <author><first>Flaviu</first><last>Pepelea</last></author>
       <pages>97–103</pages>
       <url hash="fee87511">R15-1014</url>
@@ -149,24 +149,24 @@
     </paper>
     <paper id="16">
       <title>Tune Your Brown Clustering, Please</title>
-      <author><first>Leon</first><last>Derczynski</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
       <author><first>Sean</first><last>Chester</last></author>
-      <author><first>Kenneth</first><last>Bøgh</last></author>
+      <author id="kenneth-s-bogh"><first>Kenneth</first><last>Bøgh</last></author>
       <pages>110–117</pages>
       <url hash="9589be77">R15-1016</url>
       <bibkey>derczynski-etal-2015-tune</bibkey>
     </paper>
     <paper id="17">
       <title>Temporal Relation Classification using a Model of Tense and Aspect</title>
-      <author><first>Leon</first><last>Derczynski</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <pages>118–122</pages>
       <url hash="230f7102">R15-1017</url>
       <bibkey>derczynski-gaizauskas-2015-temporal</bibkey>
     </paper>
     <paper id="18">
       <title>Efficient Named Entity Annotation through Pre-empting</title>
-      <author><first>Leon</first><last>Derczynski</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
       <author><first>Kalina</first><last>Bontcheva</last></author>
       <pages>123–130</pages>
       <url hash="0a9c7b1e">R15-1018</url>
@@ -191,7 +191,7 @@
     <paper id="21">
       <title>Cross-lingual Synonymy Overlap</title>
       <author><first>Anca</first><last>Dinu</last></author>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <author><first>Ana Sabina</first><last>Uban</last></author>
       <pages>147–152</pages>
       <url hash="c47efb04">R15-1021</url>
@@ -199,9 +199,9 @@
     </paper>
     <paper id="22">
       <title>Barbecued Opakapaka: Using Semantic Preferences for Ontology Population</title>
-      <author><first>Ismail</first><last>El Maarouf</last></author>
+      <author id="ismail-el-maarouf"><first>Ismail</first><last>El Maarouf</last></author>
       <author><first>Georgiana</first><last>Marsic</last></author>
-      <author><first>Constantin</first><last>Orăsan</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orăsan</last></author>
       <pages>153–159</pages>
       <url hash="f5ec2537">R15-1022</url>
       <bibkey>el-maarouf-etal-2015-barbecued</bibkey>
@@ -220,14 +220,14 @@
       <title>Using the Textual Content of the <fixed-case>LMF</fixed-case>-Normalized Dictionaries for Identifying and Linking the Syntactic Behaviors to the Meanings</title>
       <author><first>Imen</first><last>Elleuch</last></author>
       <author><first>Bilel</first><last>Gargouri</last></author>
-      <author><first>Abdelmajid</first><last>Ben Hamadou</last></author>
+      <author id="abdelmajid-ben-hamadou"><first>Abdelmajid</first><last>Ben Hamadou</last></author>
       <pages>168–175</pages>
       <url hash="535c8c34">R15-1024</url>
       <bibkey>elleuch-etal-2015-using</bibkey>
     </paper>
     <paper id="25">
       <title>Weakly Supervised Definition Extraction</title>
-      <author><first>Luis</first><last>Espinosa-Anke</last></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa-Anke</last></author>
       <author><first>Horacio</first><last>Saggion</last></author>
       <author><first>Francesco</first><last>Ronzano</last></author>
       <pages>176–185</pages>
@@ -239,7 +239,7 @@
       <author><first>Miao</first><last>Fan</last></author>
       <author><first>Kai</first><last>Cao</last></author>
       <author><first>Yifan</first><last>He</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <pages>186–191</pages>
       <url hash="a04aa712">R15-1026</url>
       <bibkey>fan-etal-2015-jointly</bibkey>
@@ -248,7 +248,7 @@
       <title>Distributional Semantics for Resolving Bridging Mentions</title>
       <author><first>Tim</first><last>Feuerbach</last></author>
       <author><first>Martin</first><last>Riedl</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>192–199</pages>
       <url hash="597e42de">R15-1027</url>
       <bibkey>feuerbach-etal-2015-distributional</bibkey>
@@ -257,7 +257,7 @@
       <title>Text Classification into Abstract Classes Based on Discourse Structure</title>
       <author><first>Boris</first><last>Galitsky</last></author>
       <author><first>Dmitry</first><last>Ilvovsky</last></author>
-      <author><first>Sergey O.</first><last>Kuznetsov</last></author>
+      <author id="sergey-o-kuznetsov"><first>Sergey O.</first><last>Kuznetsov</last></author>
       <pages>200–207</pages>
       <url hash="fb110d9d">R15-1028</url>
       <bibkey>galitsky-etal-2015-text</bibkey>
@@ -296,7 +296,7 @@
     <paper id="33">
       <title>Part-of-Speech Tagging for Code-Mixed <fixed-case>E</fixed-case>nglish-<fixed-case>H</fixed-case>indi <fixed-case>T</fixed-case>witter and <fixed-case>F</fixed-case>acebook Chat Messages</title>
       <author><first>Anupam</first><last>Jamatia</last></author>
-      <author><first>Björn</first><last>Gambäck</last></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gambäck</last></author>
       <author><first>Amitava</first><last>Das</last></author>
       <pages>239–248</pages>
       <url hash="5a64c07a">R15-1033</url>
@@ -306,7 +306,7 @@
       <title>Sentiment Analysis in <fixed-case>T</fixed-case>witter for <fixed-case>M</fixed-case>acedonian</title>
       <author><first>Dame</first><last>Jovanoski</last></author>
       <author><first>Veno</first><last>Pachovski</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>249–257</pages>
       <url hash="dc03e619">R15-1034</url>
       <bibkey>jovanoski-etal-2015-sentiment</bibkey>
@@ -322,7 +322,7 @@
     <paper id="36">
       <title>Fine-Grained Sentiment Analysis for Movie Reviews in <fixed-case>B</fixed-case>ulgarian</title>
       <author><first>Borislav</first><last>Kapukaranov</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>266–274</pages>
       <url hash="2eefbb8d">R15-1036</url>
       <bibkey>kapukaranov-nakov-2015-fine</bibkey>
@@ -367,7 +367,7 @@
     <paper id="41">
       <title>Learning Agglutinative Morphology of <fixed-case>I</fixed-case>ndian Languages with Linguistically Motivated <fixed-case>A</fixed-case>daptor <fixed-case>G</fixed-case>rammars</title>
       <author><first>Arun</first><last>Kumar</last></author>
-      <author><first>Lluís</first><last>Padró</last></author>
+      <author id="lluis-padro"><first>Lluís</first><last>Padró</last></author>
       <author><first>Antoni</first><last>Oliver</last></author>
       <pages>307–312</pages>
       <url hash="e1c95d51">R15-1041</url>
@@ -385,7 +385,7 @@
     <paper id="43">
       <title>Automatically Identifying Periodic Social Events from <fixed-case>T</fixed-case>witter</title>
       <author><first>Florian</first><last>Kunneman</last></author>
-      <author><first>Antal</first><last>Van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>Van den Bosch</last></author>
       <pages>320–328</pages>
       <url hash="b30a36ea">R15-1043</url>
       <bibkey>kunneman-van-den-bosch-2015-automatically</bibkey>
@@ -394,7 +394,7 @@
       <title>Collecting and Evaluating Lexical Polarity with A Game With a Purpose</title>
       <author><first>Mathieu</first><last>Lafourcade</last></author>
       <author><first>Alain</first><last>Joubert</last></author>
-      <author><first>Nathalie</first><last>Le Brun</last></author>
+      <author id="nathalie-le-brun"><first>Nathalie</first><last>Le Brun</last></author>
       <pages>329–337</pages>
       <url hash="4dbc0e8d">R15-1044</url>
       <bibkey>lafourcade-etal-2015-collecting</bibkey>
@@ -437,7 +437,7 @@
       <title>Predicting the Level of Text Standardness in User-generated Content</title>
       <author><first>Nikola</first><last>Ljubešić</last></author>
       <author><first>Darja</first><last>Fišer</last></author>
-      <author><first>Tomaž</first><last>Erjavec</last></author>
+      <author id="tomaz-erjavec"><first>Tomaž</first><last>Erjavec</last></author>
       <author><first>Jaka</first><last>Čibej</last></author>
       <author><first>Dafne</first><last>Marko</last></author>
       <author><first>Senja</first><last>Pollak</last></author>
@@ -449,9 +449,9 @@
     <paper id="50">
       <title>Predicting Inflectional Paradigms and Lemmata of Unknown Words for Semi-automatic Expansion of Morphological Lexicons</title>
       <author><first>Nikola</first><last>Ljubešić</last></author>
-      <author><first>Miquel</first><last>Esplà-Gomis</last></author>
+      <author id="miquel-espla-gomis"><first>Miquel</first><last>Esplà-Gomis</last></author>
       <author><first>Filip</first><last>Klubička</last></author>
-      <author><first>Nives</first><last>Mikelić Preradović</last></author>
+      <author id="nives-mikelic-preradovic"><first>Nives</first><last>Mikelić Preradović</last></author>
       <pages>379–387</pages>
       <url hash="637ed592">R15-1050</url>
       <bibkey>ljubesic-etal-2015-predicting-inflectional</bibkey>
@@ -459,7 +459,7 @@
     <paper id="51">
       <title>Predicting Correlations Between Lexical Alignments and Semantic Inferences</title>
       <author><first>Simone</first><last>Magnolini</last></author>
-      <author><first>Bernardo</first><last>Magnini</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
       <pages>388–397</pages>
       <url hash="319dd7e2">R15-1051</url>
       <bibkey>magnolini-magnini-2015-predicting</bibkey>
@@ -475,7 +475,7 @@
     </paper>
     <paper id="53">
       <title><fixed-case>N</fixed-case>orwegian Native Language Identification</title>
-      <author><first>Shervin</first><last>Malmasi</last></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></author>
       <author><first>Mark</first><last>Dras</last></author>
       <author><first>Irina</first><last>Temnikova</last></author>
       <pages>404–412</pages>
@@ -492,8 +492,8 @@
     <paper id="55">
       <title>Pattern Construction for Extracting Domain Terminology</title>
       <author><first>Yusney</first><last>Marrero García</last></author>
-      <author><first>Paloma</first><last>Moreda Pozo</last></author>
-      <author><first>Rafael</first><last>Muñoz-Guillena</last></author>
+      <author id="paloma-moreda-pozo"><first>Paloma</first><last>Moreda Pozo</last></author>
+      <author id="rafael-munoz"><first>Rafael</first><last>Muñoz-Guillena</last></author>
       <pages>420–426</pages>
       <url hash="a40348b8">R15-1055</url>
       <bibkey>marrero-garcia-etal-2015-pattern</bibkey>
@@ -501,7 +501,7 @@
     <paper id="56">
       <title>A Procedural Definition of Multi-word Lexical Units</title>
       <author><first>Marek</first><last>Maziarz</last></author>
-      <author><first>Stan</first><last>Szpakowicz</last></author>
+      <author id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></author>
       <author><first>Maciej</first><last>Piasecki</last></author>
       <pages>427–435</pages>
       <url hash="cc86613c">R15-1056</url>
@@ -509,7 +509,7 @@
     </paper>
     <paper id="57">
       <title>Semi-Supervised Never-Ending Learning in Rhetorical Relation Identification</title>
-      <author><first>Erick Galani</first><last>Maziero</last></author>
+      <author id="erick-galani-maziero"><first>Erick Galani</first><last>Maziero</last></author>
       <author><first>Graeme</first><last>Hirst</last></author>
       <author><first>Thiago Alexandre Salgueiro</first><last>Pardo</last></author>
       <pages>436–442</pages>
@@ -521,7 +521,7 @@
       <author><first>Todor</first><last>Mihaylov</last></author>
       <author><first>Ivan</first><last>Koychev</last></author>
       <author><first>Georgi</first><last>Georgiev</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>443–450</pages>
       <url hash="49c4c805">R15-1058</url>
       <bibkey>mihaylov-etal-2015-exposing</bibkey>
@@ -539,7 +539,7 @@
       <title>Statistical Machine Translation Improvement based on Phrase Selection</title>
       <author><first>Cyrine</first><last>Nasri</last></author>
       <author><first>Chiraz</first><last>Latiri</last></author>
-      <author><first>Kamel</first><last>Smaili</last></author>
+      <author id="kamel-smaili"><first>Kamel</first><last>Smaili</last></author>
       <pages>458–464</pages>
       <url hash="15696778">R15-1060</url>
       <bibkey>nasri-etal-2015-statistical</bibkey>
@@ -571,7 +571,7 @@
     </paper>
     <paper id="64">
       <title>A Comparative Study of Different Sentiment Lexica for Sentiment Analysis of Tweets</title>
-      <author><first>Canberk</first><last>Ozdemir</last></author>
+      <author id="canberk-ozdemir"><first>Canberk</first><last>Ozdemir</last></author>
       <author><first>Sabine</first><last>Bergler</last></author>
       <pages>488–496</pages>
       <url hash="828b9e88">R15-1064</url>
@@ -608,7 +608,7 @@
     <paper id="68">
       <title>A New Approach to Automated Text Readability Classification based on Concept Indexing with Integrated Part-of-Speech n-gram Features</title>
       <author><first>Abigail</first><last>Razon</last></author>
-      <author><first>John</first><last>Barnden</last></author>
+      <author id="john-barnden"><first>John</first><last>Barnden</last></author>
       <pages>521–528</pages>
       <url hash="58d21add">R15-1068</url>
       <bibkey>razon-barnden-2015-new</bibkey>
@@ -671,23 +671,23 @@
       <title>Evaluating the Impact of Using a Domain-specific Bilingual Lexicon on the Performance of a Hybrid Machine Translation Approach</title>
       <author><first>Nasredine</first><last>Semmar</last></author>
       <author><first>Othman</first><last>Zennaki</last></author>
-      <author><first>Meriama</first><last>Laib</last></author>
+      <author id="meriama-laib"><first>Meriama</first><last>Laib</last></author>
       <pages>579–587</pages>
       <url hash="c53d6305">R15-1075</url>
       <bibkey>semmar-etal-2015-evaluating</bibkey>
     </paper>
     <paper id="76">
       <title>Hierarchical Topic Structuring: From Dense Segmentation to Topically Focused Fragments via Burst Analysis</title>
-      <author><first>Anca-Roxana</first><last>Simon</last></author>
+      <author id="anca-roxana-simon"><first>Anca-Roxana</first><last>Simon</last></author>
       <author><first>Pascale</first><last>Sébillot</last></author>
-      <author><first>Guillaume</first><last>Gravier</last></author>
+      <author id="guillaume-gravier"><first>Guillaume</first><last>Gravier</last></author>
       <pages>588–595</pages>
       <url hash="399263a3">R15-1076</url>
       <bibkey>simon-etal-2015-hierarchical</bibkey>
     </paper>
     <paper id="77">
       <title>Improving Word Sense Disambiguation with Linguistic Knowledge from a Sense Annotated Treebank</title>
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <author><first>Alexander</first><last>Popov</last></author>
       <author><first>Petya</first><last>Osenova</last></author>
       <pages>596–603</pages>
@@ -704,7 +704,7 @@
     </paper>
     <paper id="79">
       <title>Translating from Original to Simplified Sentences using <fixed-case>M</fixed-case>oses: When does it Actually Work?</title>
-      <author><first>Sanja</first><last>Štajner</last></author>
+      <author id="sanja-stajner"><first>Sanja</first><last>Štajner</last></author>
       <author><first>Horacio</first><last>Saggion</last></author>
       <pages>611–617</pages>
       <url hash="8dffe7e7">R15-1079</url>
@@ -712,7 +712,7 @@
     </paper>
     <paper id="80">
       <title>Automatic Text Simplification for <fixed-case>S</fixed-case>panish: Comparative Evaluation of Various Simplification Strategies</title>
-      <author><first>Sanja</first><last>Štajner</last></author>
+      <author id="sanja-stajner"><first>Sanja</first><last>Štajner</last></author>
       <author><first>Iacer</first><last>Calixto</last></author>
       <author><first>Horacio</first><last>Saggion</last></author>
       <pages>618–626</pages>
@@ -722,7 +722,7 @@
     <paper id="81">
       <title>Towards Multilingual Event Extraction Evaluation: A Case Study for the <fixed-case>C</fixed-case>zech Language</title>
       <author><first>Josef</first><last>Steinberger</last></author>
-      <author><first>Hristo</first><last>Tanev</last></author>
+      <author id="hristo-tanev"><first>Hristo</first><last>Tanev</last></author>
       <pages>627–635</pages>
       <url hash="3ba51fcc">R15-1081</url>
       <bibkey>steinberger-tanev-2015-towards</bibkey>
@@ -747,7 +747,7 @@
     <paper id="84">
       <title>Training Automatic Transliteration Models on <fixed-case>DBP</fixed-case>edia Data</title>
       <author><first>Velislava</first><last>Todorova</last></author>
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <pages>654–662</pages>
       <url hash="d1088778">R15-1084</url>
       <bibkey>todorova-simov-2015-training</bibkey>
@@ -756,7 +756,7 @@
       <title><fixed-case>A</fixed-case>rabic-<fixed-case>E</fixed-case>nglish Semantic Word Class Alignment to Improve Statistical Machine Translation</title>
       <author><first>Ines</first><last>Turki Khemakhem</last></author>
       <author><first>Salma</first><last>Jamoussi</last></author>
-      <author><first>Abdelmajid</first><last>Ben Hamadou</last></author>
+      <author id="abdelmajid-ben-hamadou"><first>Abdelmajid</first><last>Ben Hamadou</last></author>
       <pages>663–671</pages>
       <url hash="b62678a4">R15-1085</url>
       <bibkey>turki-khemakhem-etal-2015-arabic</bibkey>
@@ -769,8 +769,8 @@
       <author><first>Julie</first><last>Mennes</last></author>
       <author><first>Bart</first><last>Desmet</last></author>
       <author><first>Guy</first><last>De Pauw</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
-      <author><first>Veronique</first><last>Hoste</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
+      <author id="veronique-hoste"><first>Veronique</first><last>Hoste</last></author>
       <pages>672–680</pages>
       <url hash="ea19ee76">R15-1086</url>
       <bibkey>van-hee-etal-2015-detection</bibkey>
@@ -794,7 +794,7 @@
     <paper id="89">
       <title>Six Good Predictors of Autistic Text Comprehension</title>
       <author><first>Victoria</first><last>Yaneva</last></author>
-      <author><first>Richard</first><last>Evans</last></author>
+      <author id="richard-evans"><first>Richard</first><last>Evans</last></author>
       <pages>697–706</pages>
       <url hash="9b193e92">R15-1089</url>
       <bibkey>yaneva-evans-2015-six</bibkey>
@@ -818,7 +818,7 @@
       <title>A Large <fixed-case>W</fixed-case>ordnet-based Sentiment Lexicon for <fixed-case>P</fixed-case>olish</title>
       <author><first>Monika</first><last>Zaśko-Zielińska</last></author>
       <author><first>Maciej</first><last>Piasecki</last></author>
-      <author><first>Stan</first><last>Szpakowicz</last></author>
+      <author id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></author>
       <pages>721–730</pages>
       <url hash="2dc96df2">R15-1092</url>
       <bibkey>zasko-zielinska-etal-2015-large</bibkey>
@@ -826,7 +826,7 @@
     <paper id="93">
       <title>Named Entity Recognition of Persons’ Names in <fixed-case>A</fixed-case>rabic Tweets</title>
       <author><first>Omnia</first><last>Zayed</last></author>
-      <author><first>Samhaa</first><last>El-Beltagy</last></author>
+      <author id="samhaa-r-el-beltagy"><first>Samhaa</first><last>El-Beltagy</last></author>
       <pages>731–738</pages>
       <url hash="9725e495">R15-1093</url>
       <bibkey>zayed-el-beltagy-2015-named</bibkey>
@@ -834,14 +834,14 @@
     <paper id="94">
       <title>One Tree is not Enough: Cross-lingual Accumulative Structure Transfer for Semantic Indeterminacy</title>
       <author><first>Patrick</first><last>Ziering</last></author>
-      <author><first>Lonneke</first><last>van der Plas</last></author>
+      <author id="lonneke-van-der-plas"><first>Lonneke</first><last>van der Plas</last></author>
       <pages>739–746</pages>
       <url hash="d0d25937">R15-1094</url>
       <bibkey>ziering-van-der-plas-2015-one</bibkey>
     </paper>
     <paper id="95">
       <title>Lost in Discussion? Tracking Opinion Groups in Complex Political Discussions by the Example of the <fixed-case>FOMC</fixed-case> Meeting Transcriptions</title>
-      <author><first>Cäcilia</first><last>Zirn</last></author>
+      <author id="cacilia-zirn"><first>Cäcilia</first><last>Zirn</last></author>
       <author><first>Robert</first><last>Meusel</last></author>
       <author><first>Heiner</first><last>Stuckenschmidt</last></author>
       <pages>747–753</pages>
diff --git a/data/xml/R17.xml b/data/xml/R17.xml
index 250160589a..c34259ff70 100644
--- a/data/xml/R17.xml
+++ b/data/xml/R17.xml
@@ -3,7 +3,7 @@
   <volume id="1" type="proceedings">
     <meta>
       <booktitle>Proceedings of the International Conference Recent Advances in Natural Language Processing, <fixed-case>RANLP</fixed-case> 2017</booktitle>
-      <editor><first>Ruslan</first><last>Mitkov</last></editor>
+      <editor id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></editor>
       <editor><first>Galia</first><last>Angelova</last></editor>
       <publisher>INCOMA Ltd.</publisher>
       <address>Varna, Bulgaria</address>
@@ -28,7 +28,7 @@
     </paper>
     <paper id="2">
       <title>What Sentence are you Referring to and Why? Identifying Cited Sentences in Scientific Literature</title>
-      <author><first>Ahmed</first><last>AbuRa’ed</last></author>
+      <author id="ahmed-aburaed"><first>Ahmed</first><last>AbuRa’ed</last></author>
       <author><first>Luis</first><last>Chiruzzo</last></author>
       <author><first>Horacio</first><last>Saggion</last></author>
       <pages>9–17</pages>
@@ -40,7 +40,7 @@
     <paper id="3">
       <title>A Comparison of Feature-Based and Neural Scansion of Poetry</title>
       <author><first>Manex</first><last>Agirrezabal</last></author>
-      <author><first>Iñaki</first><last>Alegria</last></author>
+      <author id="inaki-alegria"><first>Iñaki</first><last>Alegria</last></author>
       <author><first>Mans</first><last>Hulden</last></author>
       <pages>18–23</pages>
       <doi>10.26615/978-954-452-049-6_003</doi>
@@ -52,7 +52,7 @@
       <title><fixed-case>P</fixed-case>ersian-<fixed-case>S</fixed-case>panish Low-Resource Statistical Machine Translation Through <fixed-case>E</fixed-case>nglish as Pivot Language</title>
       <author><first>Benyamin</first><last>Ahmadnia</last></author>
       <author><first>Javier</first><last>Serrano</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>24–30</pages>
       <doi>10.26615/978-954-452-049-6_004</doi>
       <url>https://doi.org/10.26615/978-954-452-049-6_004</url>
@@ -62,8 +62,8 @@
     <paper id="5">
       <title>Simple Open Stance Classification for Rumour Analysis</title>
       <author><first>Ahmet</first><last>Aker</last></author>
-      <author><first>Leon</first><last>Derczynski</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
       <pages>31–39</pages>
       <doi>10.26615/978-954-452-049-6_005</doi>
       <url>https://doi.org/10.26615/978-954-452-049-6_005</url>
@@ -176,8 +176,8 @@
     </paper>
     <paper id="16">
       <title>Fast and Accurate Decision Trees for Natural Language Processing Tasks</title>
-      <author><first>Tiberiu</first><last>Boros</last></author>
-      <author><first>Stefan Daniel</first><last>Dumitrescu</last></author>
+      <author id="tiberiu-boros"><first>Tiberiu</first><last>Boros</last></author>
+      <author id="stefan-daniel-dumitrescu"><first>Stefan Daniel</first><last>Dumitrescu</last></author>
       <author><first>Sonia</first><last>Pipa</last></author>
       <pages>103–110</pages>
       <doi>10.26615/978-954-452-049-6_016</doi>
@@ -198,7 +198,7 @@
     <paper id="18">
       <title>Building Chatbots from Forum Data: Model Selection Using Question Answering Metrics</title>
       <author><first>Martin</first><last>Boyanov</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Alessandro</first><last>Moschitti</last></author>
       <author><first>Giovanni</first><last>Da San Martino</last></author>
       <author><first>Ivan</first><last>Koychev</last></author>
@@ -233,8 +233,8 @@
       <title>Role-based model for Named Entity Recognition</title>
       <author><first>Pablo</first><last>Calleja</last></author>
       <author><first>Raúl</first><last>García-Castro</last></author>
-      <author><first>Guadalupe</first><last>Aguado-de-Cea</last></author>
-      <author><first>Asunción</first><last>Gómez-Pérez</last></author>
+      <author id="guadalupe-aguado-de-cea"><first>Guadalupe</first><last>Aguado-de-Cea</last></author>
+      <author id="asuncion-gomez-perez"><first>Asunción</first><last>Gómez-Pérez</last></author>
       <pages>149–156</pages>
       <doi>10.26615/978-954-452-049-6_021</doi>
       <url>https://doi.org/10.26615/978-954-452-049-6_021</url>
@@ -244,9 +244,9 @@
     <paper id="22">
       <title>Towards the Improvement of Automatic Emotion Pre-annotation with Polarity and Subjective Information</title>
       <author><first>Lea</first><last>Canales</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <author><first>Ester</first><last>Boldrini</last></author>
-      <author><first>Patricio</first><last>Martínez-Barco</last></author>
+      <author id="patricio-martinez-barco"><first>Patricio</first><last>Martínez-Barco</last></author>
       <pages>157–163</pages>
       <doi>10.26615/978-954-452-049-6_022</doi>
       <url>https://doi.org/10.26615/978-954-452-049-6_022</url>
@@ -256,7 +256,7 @@
     <paper id="23">
       <title>Underspecification in Natural Language Understanding for Dialog Automation</title>
       <author><first>John</first><last>Chen</last></author>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
       <pages>164–170</pages>
       <doi>10.26615/978-954-452-049-6_023</doi>
       <url>https://doi.org/10.26615/978-954-452-049-6_023</url>
@@ -289,7 +289,7 @@
     <paper id="26">
       <title>Towards Replicability in Parsing</title>
       <author><first>Daniel</first><last>Dakota</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <pages>185–194</pages>
       <doi>10.26615/978-954-452-049-6_026</doi>
       <url>https://doi.org/10.26615/978-954-452-049-6_026</url>
@@ -309,7 +309,7 @@
     <paper id="28">
       <title>On the stylistic evolution from communism to democracy: <fixed-case>S</fixed-case>olomon <fixed-case>M</fixed-case>arcus study case</title>
       <author><first>Anca</first><last>Dinu</last></author>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <author><first>Bogdan</first><last>Dumitru</last></author>
       <pages>201–207</pages>
       <doi>10.26615/978-954-452-049-6_028</doi>
@@ -356,9 +356,9 @@
     <paper id="32">
       <title>Opinion Mining in Social Networks versus Electoral Polls</title>
       <author><first>Javi</first><last>Fernández</last></author>
-      <author><first>Fernando</first><last>Llopis</last></author>
-      <author><first>Yoan</first><last>Gutiérrez</last></author>
-      <author><first>Patricio</first><last>Martínez-Barco</last></author>
+      <author id="fernando-llopis"><first>Fernando</first><last>Llopis</last></author>
+      <author id="yoan-gutierrez"><first>Yoan</first><last>Gutiérrez</last></author>
+      <author id="patricio-martinez-barco"><first>Patricio</first><last>Martínez-Barco</last></author>
       <author><first>Álvaro</first><last>Díez</last></author>
       <pages>231–237</pages>
       <doi>10.26615/978-954-452-049-6_032</doi>
@@ -370,7 +370,7 @@
       <title>Corpus Creation and Initial <fixed-case>SMT</fixed-case> Experiments between <fixed-case>S</fixed-case>panish and <fixed-case>S</fixed-case>hipibo-konibo</title>
       <author><first>Ana-Paula</first><last>Galarreta</last></author>
       <author><first>Andrés</first><last>Melgar</last></author>
-      <author><first>Arturo</first><last>Oncevay</last></author>
+      <author id="arturo-oncevay"><first>Arturo</first><last>Oncevay</last></author>
       <pages>238–244</pages>
       <doi>10.26615/978-954-452-049-6_033</doi>
       <url>https://doi.org/10.26615/978-954-452-049-6_033</url>
@@ -411,8 +411,8 @@
     <paper id="37">
       <title>A Context-Aware Approach for Detecting Worth-Checking Claims in Political Debates</title>
       <author><first>Pepa</first><last>Gencheva</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <author><first>Alberto</first><last>Barrón-Cedeño</last></author>
       <author><first>Ivan</first><last>Koychev</last></author>
       <pages>267–276</pages>
@@ -434,8 +434,8 @@
     <paper id="39">
       <title>Natural Language Processing Technologies for Document Profiling</title>
       <author><first>Antonio</first><last>Guillén</last></author>
-      <author><first>Yoan</first><last>Gutiérrez</last></author>
-      <author><first>Rafael</first><last>Muñoz</last></author>
+      <author id="yoan-gutierrez"><first>Yoan</first><last>Gutiérrez</last></author>
+      <author id="rafael-munoz"><first>Rafael</first><last>Muñoz</last></author>
       <pages>284–290</pages>
       <doi>10.26615/978-954-452-049-6_039</doi>
       <url>https://doi.org/10.26615/978-954-452-049-6_039</url>
@@ -477,7 +477,7 @@
       <title>Non-Deterministic Segmentation for <fixed-case>C</fixed-case>hinese Lattice Parsing</title>
       <author><first>Hai</first><last>Hu</last></author>
       <author><first>Daniel</first><last>Dakota</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <pages>316–324</pages>
       <doi>10.26615/978-954-452-049-6_043</doi>
       <url>https://doi.org/10.26615/978-954-452-049-6_043</url>
@@ -498,7 +498,7 @@
       <title>We Built a Fake News / Click Bait Filter: What Happened Next Will Blow Your Mind!</title>
       <author><first>Georgi</first><last>Karadzhov</last></author>
       <author><first>Pepa</first><last>Gencheva</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Ivan</first><last>Koychev</last></author>
       <pages>334–343</pages>
       <doi>10.26615/978-954-452-049-6_045</doi>
@@ -509,8 +509,8 @@
     <paper id="46">
       <title>Fully Automated Fact Checking Using External Sources</title>
       <author><first>Georgi</first><last>Karadzhov</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <author><first>Alberto</first><last>Barrón-Cedeño</last></author>
       <author><first>Ivan</first><last>Koychev</last></author>
       <pages>344–353</pages>
@@ -522,7 +522,7 @@
     <paper id="47">
       <title>Making Travel Smarter: Extracting Travel Information From Email Itineraries Using Named Entity Recognition</title>
       <author><first>Divyansh</first><last>Kaushik</last></author>
-      <author id="shashank-gupta"><first>Shashank</first><last>Gupta</last></author>
+      <author><first>Shashank</first><last>Gupta</last></author>
       <author><first>Chakradhar</first><last>Raju</last></author>
       <author><first>Reuben</first><last>Dias</last></author>
       <author><first>Sanjib</first><last>Ghosh</last></author>
@@ -546,7 +546,7 @@
     <paper id="49">
       <title>Domain Control for Neural Machine Translation</title>
       <author><first>Catherine</first><last>Kobus</last></author>
-      <author><first>Josep</first><last>Crego</last></author>
+      <author id="josep-m-crego"><first>Josep</first><last>Crego</last></author>
       <author><first>Jean</first><last>Senellart</last></author>
       <pages>372–378</pages>
       <doi>10.26615/978-954-452-049-6_049</doi>
@@ -557,7 +557,7 @@
     <paper id="50">
       <title>Curriculum Learning and Minibatch Bucketing in Neural Machine Translation</title>
       <author><first>Tom</first><last>Kocmi</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>379–386</pages>
       <doi>10.26615/978-954-452-049-6_050</doi>
       <url>https://doi.org/10.26615/978-954-452-049-6_050</url>
@@ -607,7 +607,7 @@
     <paper id="55">
       <title>Extracting semantic relations via the combination of inferences, schemas and cooccurrences</title>
       <author><first>Mathieu</first><last>Lafourcade</last></author>
-      <author><first>Nathalie</first><last>Le Brun</last></author>
+      <author id="nathalie-le-brun"><first>Nathalie</first><last>Le Brun</last></author>
       <pages>417–423</pages>
       <doi>10.26615/978-954-452-049-6_055</doi>
       <url>https://doi.org/10.26615/978-954-452-049-6_055</url>
@@ -618,7 +618,7 @@
       <title>If mice were reptiles, then reptiles could be mammals or How to detect errors in the <fixed-case>J</fixed-case>eux<fixed-case>D</fixed-case>e<fixed-case>M</fixed-case>ots lexical network?</title>
       <author><first>Mathieu</first><last>Lafourcade</last></author>
       <author><first>Alain</first><last>Joubert</last></author>
-      <author><first>Nathalie</first><last>Le Brun</last></author>
+      <author id="nathalie-le-brun"><first>Nathalie</first><last>Le Brun</last></author>
       <pages>424–430</pages>
       <doi>10.26615/978-954-452-049-6_056</doi>
       <url>https://doi.org/10.26615/978-954-452-049-6_056</url>
@@ -658,7 +658,7 @@
     <paper id="60">
       <title>Summarizing World Speak : A Preliminary Graph Based Approach</title>
       <author><first>Nikhil</first><last>Londhe</last></author>
-      <author><first>Rohini</first><last>Srihari</last></author>
+      <author id="rohini-k-srihari"><first>Rohini</first><last>Srihari</last></author>
       <pages>452–458</pages>
       <doi>10.26615/978-954-452-049-6_060</doi>
       <url>https://doi.org/10.26615/978-954-452-049-6_060</url>
@@ -667,7 +667,7 @@
     </paper>
     <paper id="61">
       <title>Human Associations Help to Detect Conventionalized Multiword Expressions</title>
-      <author><first>Natalia</first><last>Loukachevitch</last></author>
+      <author id="natalia-loukachevitch"><first>Natalia</first><last>Loukachevitch</last></author>
       <author><first>Anastasia</first><last>Gerasimova</last></author>
       <pages>459–466</pages>
       <doi>10.26615/978-954-452-049-6_061</doi>
@@ -677,7 +677,7 @@
     </paper>
     <paper id="62">
       <title>Detecting Hate Speech in Social Media</title>
-      <author><first>Shervin</first><last>Malmasi</last></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></author>
       <author><first>Marcos</first><last>Zampieri</last></author>
       <pages>467–472</pages>
       <doi>10.26615/978-954-452-049-6_062</doi>
@@ -733,7 +733,7 @@
       <title>A Domain and Language Independent Named Entity Classification Approach Based on Profiles and Local Information</title>
       <author><first>Isabel</first><last>Moreno</last></author>
       <author><first>María Teresa</first><last>Romá-Ferri</last></author>
-      <author><first>Paloma</first><last>Moreda Pozo</last></author>
+      <author id="paloma-moreda-pozo"><first>Paloma</first><last>Moreda Pozo</last></author>
       <pages>510–518</pages>
       <doi>10.26615/978-954-452-049-6_067</doi>
       <url>https://doi.org/10.26615/978-954-452-049-6_067</url>
@@ -743,7 +743,7 @@
     <paper id="68">
       <title>Similarity Based Genre Identification for <fixed-case>POS</fixed-case> Tagging Experts &amp; Dependency Parsing</title>
       <author><first>Atreyee</first><last>Mukherjee</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <pages>519–526</pages>
       <doi>10.26615/978-954-452-049-6_068</doi>
       <url>https://doi.org/10.26615/978-954-452-049-6_068</url>
@@ -772,8 +772,8 @@
     </paper>
     <paper id="71">
       <title>Robust Tuning Datasets for Statistical Machine Translation</title>
-      <author><first>Preslav</first><last>Nakov</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>543–550</pages>
       <doi>10.26615/978-954-452-049-6_071</doi>
       <url>https://doi.org/10.26615/978-954-452-049-6_071</url>
@@ -782,9 +782,9 @@
     </paper>
     <paper id="72">
       <title>Do Not Trust the Trolls: Predicting Credibility in Community Question Answering Forums</title>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Tsvetomila</first><last>Mihaylova</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <author><first>Yashkumar</first><last>Shiroya</last></author>
       <author><first>Ivan</first><last>Koychev</last></author>
       <pages>551–560</pages>
@@ -796,7 +796,7 @@
     <paper id="73">
       <title><fixed-case>B</fixed-case>ulgarian-<fixed-case>E</fixed-case>nglish and <fixed-case>E</fixed-case>nglish-<fixed-case>B</fixed-case>ulgarian Machine Translation: System Design and Evaluation</title>
       <author><first>Petya</first><last>Osenova</last></author>
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <pages>561–568</pages>
       <doi>10.26615/978-954-452-049-6_073</doi>
       <url>https://doi.org/10.26615/978-954-452-049-6_073</url>
@@ -902,9 +902,9 @@
     </paper>
     <paper id="83">
       <title>Idiom Type Identification with Smoothed Lexical Features and a Maximum Margin Classifier</title>
-      <author><first>Giancarlo</first><last>Salton</last></author>
+      <author id="giancarlo-salton"><first>Giancarlo</first><last>Salton</last></author>
       <author><first>Robert</first><last>Ross</last></author>
-      <author><first>John</first><last>Kelleher</last></author>
+      <author id="john-kelleher"><first>John</first><last>Kelleher</last></author>
       <pages>642–651</pages>
       <doi>10.26615/978-954-452-049-6_083</doi>
       <url>https://doi.org/10.26615/978-954-452-049-6_083</url>
@@ -914,7 +914,7 @@
     <paper id="84">
       <title>A Calibration Method for Evaluation of Sentiment Analysis</title>
       <author><first>F. Sharmila</first><last>Satthar</last></author>
-      <author><first>Roger</first><last>Evans</last></author>
+      <author id="roger-evans"><first>Roger</first><last>Evans</last></author>
       <author><first>Gulden</first><last>Uchyigit</last></author>
       <pages>652–660</pages>
       <doi>10.26615/978-954-452-049-6_084</doi>
@@ -925,7 +925,7 @@
     <paper id="85">
       <title>Building Multiword Expressions Bilingual Lexicons for Domain Adaptation of an Example-Based Machine Translation System</title>
       <author><first>Nasredine</first><last>Semmar</last></author>
-      <author><first>Mariama</first><last>Laib</last></author>
+      <author id="meriama-laib"><first>Mariama</first><last>Laib</last></author>
       <pages>661–670</pages>
       <doi>10.26615/978-954-452-049-6_085</doi>
       <url>https://doi.org/10.26615/978-954-452-049-6_085</url>
@@ -946,7 +946,7 @@
     </paper>
     <paper id="87">
       <title>Towards Lexical Chains for Knowledge-Graph-based Word Embeddings</title>
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <author><first>Svetla</first><last>Boytcheva</last></author>
       <author><first>Petya</first><last>Osenova</last></author>
       <pages>679–685</pages>
@@ -992,8 +992,8 @@
       <title>Large-scale news entity sentiment analysis</title>
       <author><first>Ralf</first><last>Steinberger</last></author>
       <author><first>Stefanie</first><last>Hegele</last></author>
-      <author><first>Hristo</first><last>Tanev</last></author>
-      <author><first>Leonida</first><last>Della Rocca</last></author>
+      <author id="hristo-tanev"><first>Hristo</first><last>Tanev</last></author>
+      <author id="leonida-della-rocca"><first>Leonida</first><last>Della Rocca</last></author>
       <pages>707–715</pages>
       <doi>10.26615/978-954-452-049-6_091</doi>
       <url>https://doi.org/10.26615/978-954-452-049-6_091</url>
@@ -1002,10 +1002,10 @@
     </paper>
     <paper id="92">
       <title>Predicting the Law Area and Decisions of <fixed-case>F</fixed-case>rench <fixed-case>S</fixed-case>upreme <fixed-case>C</fixed-case>ourt Cases</title>
-      <author><first>Octavia-Maria</first><last>Şulea</last></author>
+      <author id="octavia-maria-sulea"><first>Octavia-Maria</first><last>Şulea</last></author>
       <author><first>Marcos</first><last>Zampieri</last></author>
       <author><first>Mihaela</first><last>Vela</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>716–722</pages>
       <doi>10.26615/978-954-452-049-6_092</doi>
       <url>https://doi.org/10.26615/978-954-452-049-6_092</url>
@@ -1024,7 +1024,7 @@
     <paper id="94">
       <title>Multi-entity sentiment analysis using entity-level feature extraction and word embeddings approach</title>
       <author><first>Colm</first><last>Sweeney</last></author>
-      <author><first>Deepak</first><last>Padmanabhan</last></author>
+      <author id="deepak-p"><first>Deepak</first><last>Padmanabhan</last></author>
       <pages>733–740</pages>
       <doi>10.26615/978-954-452-049-6_094</doi>
       <url>https://doi.org/10.26615/978-954-452-049-6_094</url>
@@ -1137,9 +1137,9 @@
     <paper id="104">
       <title>Multilingual and Cross-Lingual Complex Word Identification</title>
       <author><first>Seid Muhie</first><last>Yimam</last></author>
-      <author><first>Sanja</first><last>Štajner</last></author>
+      <author id="sanja-stajner"><first>Sanja</first><last>Štajner</last></author>
       <author><first>Martin</first><last>Riedl</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>813–822</pages>
       <doi>10.26615/978-954-452-049-6_104</doi>
       <url>https://doi.org/10.26615/978-954-452-049-6_104</url>
@@ -1168,7 +1168,7 @@
       <title>Using <fixed-case>NLP</fixed-case> for Enhancing Second Language Acquisition</title>
       <author><first>Leonardo</first><last>Zilio</last></author>
       <author><first>Rodrigo</first><last>Wilkens</last></author>
-      <author><first>Cédrick</first><last>Fairon</last></author>
+      <author id="cedrick-fairon"><first>Cédrick</first><last>Fairon</last></author>
       <pages>839–846</pages>
       <doi>10.26615/978-954-452-049-6_107</doi>
       <url>https://doi.org/10.26615/978-954-452-049-6_107</url>
diff --git a/data/xml/R19.xml b/data/xml/R19.xml
index 3aecde9e69..f3ba5be13a 100644
--- a/data/xml/R19.xml
+++ b/data/xml/R19.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2019)</booktitle>
       <url hash="ff24b8c4">R19-1</url>
-      <editor><first>Ruslan</first><last>Mitkov</last></editor>
+      <editor id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></editor>
       <editor><first>Galia</first><last>Angelova</last></editor>
       <publisher>INCOMA Ltd.</publisher>
       <address>Varna, Bulgaria</address>
@@ -41,7 +41,7 @@
     <paper id="3">
       <title>Bilingual Low-Resource Neural Machine Translation with Round-Tripping: The Case of <fixed-case>P</fixed-case>ersian-<fixed-case>S</fixed-case>panish</title>
       <author><first>Benyamin</first><last>Ahmadnia</last></author>
-      <author><first>Bonnie</first><last>Dorr</last></author>
+      <author id="bonnie-dorr"><first>Bonnie</first><last>Dorr</last></author>
       <pages>18–24</pages>
       <abstract>The quality of Neural Machine Translation (NMT), as a data-driven approach, massively depends on quantity, quality, and relevance of the training dataset. Such approaches have achieved promising results for bilingually high-resource scenarios but are inadequate for low-resource conditions. This paper describes a round-trip training approach to bilingual low-resource NMT that takes advantage of monolingual datasets to address training data scarcity, thus augmenting translation quality. We conduct detailed experiments on Persian-Spanish as a bilingually low-resource scenario. Experimental results demonstrate that this competitive approach outperforms the baselines.</abstract>
       <url hash="7f7b0b45">R19-1003</url>
@@ -51,7 +51,7 @@
     <paper id="4">
       <title>Enhancing Phrase-Based Statistical Machine Translation by Learning Phrase Representations Using Long Short-Term Memory Network</title>
       <author><first>Benyamin</first><last>Ahmadnia</last></author>
-      <author><first>Bonnie</first><last>Dorr</last></author>
+      <author id="bonnie-dorr"><first>Bonnie</first><last>Dorr</last></author>
       <pages>25–32</pages>
       <abstract>Phrases play a key role in Machine Translation (MT). In this paper, we apply a Long Short-Term Memory (LSTM) model over conventional Phrase-Based Statistical MT (PBSMT). The core idea is to use an LSTM encoder-decoder to score the phrase table generated by the PBSMT decoder. Given a source sequence, the encoder and decoder are jointly trained in order to maximize the conditional probability of a target sequence. Analytically, the performance of a PBSMT system is enhanced by using the conditional probabilities of phrase pairs computed by an LSTM encoder-decoder as an additional feature in the existing log-linear model. We compare the performance of the phrase tables in the PBSMT to the performance of the proposed LSTM and observe its positive impact on translation quality. We construct a PBSMT model using the Moses decoder and enrich the Language Model (LM) utilizing an external dataset. We then rank the phrase tables using an LSTM-based encoder-decoder. This method produces a gain of up to 3.14 BLEU score on the test set.</abstract>
       <url hash="6173e682">R19-1004</url>
@@ -82,7 +82,7 @@
     <paper id="7">
       <title>Supervised Morphological Segmentation Using Rich Annotated Lexicon</title>
       <author><first>Ebrahim</first><last>Ansari</last></author>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
       <author><first>Mohammad</first><last>Mahmoudi</last></author>
       <author><first>Hamid</first><last>Haghdoost</last></author>
       <author><first>Jonáš</first><last>Vidra</last></author>
@@ -94,7 +94,7 @@
     </paper>
     <paper id="8">
       <title>Combining Lexical Substitutes in Neural Word Sense Induction</title>
-      <author><first>Nikolay</first><last>Arefyev</last></author>
+      <author id="nikolay-arefyev"><first>Nikolay</first><last>Arefyev</last></author>
       <author><first>Boris</first><last>Sheludko</last></author>
       <author><first>Alexander</first><last>Panchenko</last></author>
       <pages>62–70</pages>
@@ -107,7 +107,7 @@
       <title>Detecting Clitics Related Orthographic Errors in <fixed-case>T</fixed-case>urkish</title>
       <author><first>Ugurcan</first><last>Arikan</last></author>
       <author><first>Onur</first><last>Gungor</last></author>
-      <author><first>Suzan</first><last>Uskudarli</last></author>
+      <author id="suzan-uskudarli"><first>Suzan</first><last>Uskudarli</last></author>
       <pages>71–76</pages>
       <abstract>For the spell correction task, vocabulary based methods have been replaced with methods that take morphological and grammar rules into account. However, such tools are fairly immature, and, worse, non-existent for many low resource languages. Checking only if a word is well-formed with respect to the morphological rules of a language may produce false negatives due to the ambiguity resulting from the presence of numerous homophonic words. In this work, we propose an approach to detect and correct the “de/da” clitic errors in Turkish text. Our model is a neural sequence tagger trained with a synthetically constructed dataset consisting of positive and negative samples. The model’s performance with this dataset is presented according to different word embedding configurations. The model achieved an F1 score of 86.67% on a synthetically constructed dataset. We also compared the model’s performance on a manually curated dataset of challenging samples that proved superior to other spelling correctors with 71% accuracy compared to the second-best (Google Docs) with and accuracy of 34%.</abstract>
       <url hash="71d9b4b1">R19-1009</url>
@@ -129,8 +129,8 @@
       <title>Diachronic Analysis of Entities by Exploiting <fixed-case>W</fixed-case>ikipedia Page revisions</title>
       <author><first>Pierpaolo</first><last>Basile</last></author>
       <author><first>Annalina</first><last>Caputo</last></author>
-      <author><first>Seamus</first><last>Lawless</last></author>
-      <author><first>Giovanni</first><last>Semeraro</last></author>
+      <author id="seamus-lawless"><first>Seamus</first><last>Lawless</last></author>
+      <author id="giovanni-semeraro"><first>Giovanni</first><last>Semeraro</last></author>
       <pages>84–91</pages>
       <abstract>In the last few years, the increasing availability of large corpora spanning several time periods has opened new opportunities for the diachronic analysis of language. This type of analysis can bring to the light not only linguistic phenomena related to the shift of word meanings over time, but it can also be used to study the impact that societal and cultural trends have on this language change. This paper introduces a new resource for performing the diachronic analysis of named entities built upon Wikipedia page revisions. This resource enables the analysis over time of changes in the relations between entities (concepts), surface forms (words), and the contexts surrounding entities and surface forms, by analysing the whole history of Wikipedia internal links. We provide some useful use cases that prove the impact of this resource on diachronic studies and delineate some possible future usage.</abstract>
       <url hash="9a1416ed">R19-1011</url>
@@ -153,7 +153,7 @@
       <author><first>Meriem</first><last>Beloucif</last></author>
       <author><first>Ana Valeria</first><last>Gonzalez</last></author>
       <author><first>Marcel</first><last>Bollmann</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>102–111</pages>
       <abstract>Neural machine translation models have little inductive bias, which can be a disadvantage in low-resource scenarios. Neural models have to be trained on large amounts of data and have been shown to perform poorly when only limited data is available. We show that using naive regularization methods, based on sentence length, punctuation and word frequencies, to penalize translations that are very different from the input sentences, consistently improves the translation quality across multiple low-resource languages. We experiment with 12 language pairs, varying the training data size between 17k to 230k sentence pairs. Our best regularizer achieves an average increase of 1.5 BLEU score and 1.0 TER score across all the language pairs. For example, we achieve a BLEU score of 26.70 on the IWSLT15 English–Vietnamese translation task simply by using relative differences in punctuation as a regularizer.</abstract>
       <url hash="8b1d0353">R19-1013</url>
@@ -183,7 +183,7 @@
       <title>Evaluating the Consistency of Word Embeddings from Small Data</title>
       <author><first>Jelke</first><last>Bloem</last></author>
       <author><first>Antske</first><last>Fokkens</last></author>
-      <author><first>Aurélie</first><last>Herbelot</last></author>
+      <author id="aurelie-herbelot"><first>Aurélie</first><last>Herbelot</last></author>
       <pages>132–141</pages>
       <abstract>In this work, we address the evaluation of distributional semantic models trained on smaller, domain-specific texts, specifically, philosophical text. Specifically, we inspect the behaviour of models using a pre-trained background space in learning. We propose a measure of consistency which can be used as an evaluation metric when no in-domain gold-standard data is available. This measure simply computes the ability of a model to learn similar embeddings from different parts of some homogeneous data. We show that in spite of being a simple evaluation, consistency actually depends on various combinations of factors, including the nature of the data itself, the model used to train the semantic space, and the frequency of the learnt terms, both in the background space and in the in-domain data of interest.</abstract>
       <url hash="a9cbe167">R19-1016</url>
@@ -237,7 +237,7 @@
     <paper id="21">
       <title>Classifying Author Intention for Writer Feedback in Related Work</title>
       <author><first>Arlene</first><last>Casey</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <author><first>Dorota</first><last>Glowacka</last></author>
       <pages>178–187</pages>
       <abstract>The ability to produce high-quality publishable material is critical to academic success but many Post-Graduate students struggle to learn to do so. While recent years have seen an increase in tools designed to provide feedback on aspects of writing, one aspect that has so far been neglected is the Related Work section of academic research papers. To address this, we have trained a supervised classifier on a corpus of 94 Related Work sections and evaluated it against a manually annotated gold standard. The classifier uses novel features pertaining to citation types and co-reference, along with patterns found from studying Related Works. We show that these novel features contribute to classifier performance with performance being favourable compared to other similar works that classify author intentions and consider feedback for academic writing.</abstract>
@@ -271,7 +271,7 @@
     <paper id="24">
       <title>Personality-dependent Neural Text Summarization</title>
       <author><first>Pablo</first><last>Costa</last></author>
-      <author><first>Ivandré</first><last>Paraboni</last></author>
+      <author id="ivandre-paraboni"><first>Ivandré</first><last>Paraboni</last></author>
       <pages>205–212</pages>
       <abstract>In Natural Language Generation systems, personalization strategies - i.e, the use of information about a target author to generate text that (more) closely resembles human-produced language - have long been applied to improve results. The present work addresses one such strategy - namely, the use of Big Five personality information about the target author - applied to the case of abstractive text summarization using neural sequence-to-sequence models. Initial results suggest that having access to personality information does lead to more accurate (or human-like) text summaries, and paves the way for more robust systems of this kind.</abstract>
       <url hash="07a9836c">R19-1024</url>
@@ -324,7 +324,7 @@
       <title>Detecting Toxicity in News Articles: Application to <fixed-case>B</fixed-case>ulgarian</title>
       <author><first>Yoan</first><last>Dinkov</last></author>
       <author><first>Ivan</first><last>Koychev</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>247–258</pages>
       <abstract>Online media aim for reaching ever bigger audience and for attracting ever longer attention span. This competition creates an environment that rewards sensational, fake, and toxic news. To help limit their spread and impact, we propose and develop a news toxicity detector that can recognize various types of toxic content. While previous research primarily focused on English, here we target Bulgarian. We created a new dataset by crawling a website that for five years has been collecting Bulgarian news articles that were manually categorized into eight toxicity groups. Then we trained a multi-class classifier with nine categories: eight toxic and one non-toxic. We experimented with different representations based on ElMo, BERT, and XLM, as well as with a variety of domain-specific features. Due to the small size of our dataset, we created a separate model for each feature type, and we ultimately combined these models into a meta-classifier. The evaluation results show an accuracy of 59.0% and a macro-F1 score of 39.7%, which represent sizable improvements over the majority-class baseline (Acc=30.3%, macro-F1=5.2%).</abstract>
       <url hash="a06b5392">R19-1029</url>
@@ -355,11 +355,11 @@
     <paper id="32">
       <title>Demo Application for <fixed-case>LETO</fixed-case>: Learning Engine Through Ontologies</title>
       <author><first>Suilan</first><last>Estevez-Velarde</last></author>
-      <author><first>Andrés</first><last>Montoyo</last></author>
+      <author id="andres-montoyo"><first>Andrés</first><last>Montoyo</last></author>
       <author><first>Yudivian</first><last>Almeida-Cruz</last></author>
-      <author><first>Yoan</first><last>Gutiérrez</last></author>
+      <author id="yoan-gutierrez"><first>Yoan</first><last>Gutiérrez</last></author>
       <author><first>Alejandro</first><last>Piad-Morffis</last></author>
-      <author><first>Rafael</first><last>Muñoz</last></author>
+      <author id="rafael-munoz"><first>Rafael</first><last>Muñoz</last></author>
       <pages>276–284</pages>
       <abstract>The massive amount of multi-formatted information available on the Web necessitates the design of software systems that leverage this information to obtain knowledge that is valid and useful. The main challenge is to discover relevant information and continuously update, enrich and integrate knowledge from various sources of structured and unstructured data. This paper presents the Learning Engine Through Ontologies(LETO) framework, an architecture for the continuous and incremental discovery of knowledge from multiple sources of unstructured and structured data. We justify the main design decision behind LETO’s architecture and evaluate the framework’s feasibility using the Internet Movie Data Base(IMDB) and Twitter as a practical application.</abstract>
       <url hash="2aea80b4">R19-1032</url>
@@ -368,8 +368,8 @@
     </paper>
     <paper id="33">
       <title>Sentence Simplification for Semantic Role Labelling and Information Extraction</title>
-      <author><first>Richard</first><last>Evans</last></author>
-      <author><first>Constantin</first><last>Orasan</last></author>
+      <author id="richard-evans"><first>Richard</first><last>Evans</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orasan</last></author>
       <pages>285–294</pages>
       <abstract>In this paper, we report on the extrinsic evaluation of an automatic sentence simplification method with respect to two NLP tasks: semantic role labelling (SRL) and information extraction (IE). The paper begins with our observation of challenges in the intrinsic evaluation of sentence simplification systems, which motivates the use of extrinsic evaluation of these systems with respect to other NLP tasks. We describe the two NLP systems and the test data used in the extrinsic evaluation, and present arguments and evidence motivating the integration of a sentence simplification step as a means of improving the accuracy of these systems. Our evaluation reveals that their performance is improved by the simplification step: the SRL system is better able to assign semantic roles to the majority of the arguments of verbs and the IE system is better able to identify fillers for all IE template slots.</abstract>
       <url hash="8704ae51">R19-1033</url>
@@ -379,7 +379,7 @@
     <paper id="34">
       <title><fixed-case>O</fixed-case>llo<fixed-case>B</fixed-case>ot - Towards A Text-Based <fixed-case>A</fixed-case>rabic Health Conversational Agent: Evaluation and Results</title>
       <author><first>Ahmed</first><last>Fadhil</last></author>
-      <author><first>Ahmed</first><last>AbuRa’ed</last></author>
+      <author id="ahmed-aburaed"><first>Ahmed</first><last>AbuRa’ed</last></author>
       <pages>295–303</pages>
       <abstract>We introduce OlloBot, an Arabic conversational agent that assists physicians and supports patients with the care process. It doesn’t replace the physicians, instead provides health tracking and support and assists physicians with the care delivery through a conversation medium. The current model comprises healthy diet, physical activity, mental health, in addition to food logging. Not only OlloBot tracks user daily food, it also offers useful tips for healthier living. We will discuss the design, development and testing of OlloBot, and highlight the findings and limitations arose from the testing.</abstract>
       <url hash="4f533371">R19-1034</url>
@@ -445,7 +445,7 @@
       <author><first>Laura</first><last>Franzoi</last></author>
       <author><first>Andrea</first><last>Sgarro</last></author>
       <author><first>Anca</first><last>Dinu</last></author>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <pages>345–352</pages>
       <abstract>In this paper, we present new methods for language classification which put to good use both syntax and fuzzy tools, and are capable of dealing with irrelevant linguistic features (i.e. features which should not contribute to the classification) and even inconsistent features (which do not make sense for specific languages). We introduce a metric distance, based on the generalized Steinhaus transform, which allows one to deal jointly with irrelevance and inconsistency. To evaluate our methods, we test them on a syntactic data set, due to the linguist G. Longobardi and his school. We obtain phylogenetic trees which sometimes outperform the ones obtained by Atkinson and Gray.</abstract>
       <url hash="5026a46f">R19-1040</url>
@@ -579,7 +579,7 @@
       <title>Beyond <fixed-case>E</fixed-case>nglish-Only Reading Comprehension: Experiments in Zero-shot Multilingual Transfer for <fixed-case>B</fixed-case>ulgarian</title>
       <author><first>Momchil</first><last>Hardalov</last></author>
       <author><first>Ivan</first><last>Koychev</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>447–459</pages>
       <abstract>Recently, reading comprehension models achieved near-human performance on large-scale datasets such as SQuAD, CoQA, MS Macro, RACE, etc. This is largely due to the release of pre-trained contextualized representations such as BERT and ELMo, which can be fine-tuned for the target task. Despite those advances and the creation of more challenging datasets, most of the work is still done for English. Here, we study the effectiveness of multilingual BERT fine-tuned on large-scale English datasets for reading comprehension (e.g., for RACE), and we apply it to Bulgarian multiple-choice reading comprehension. We propose a new dataset containing 2,221 questions from matriculation exams for twelfth grade in various subjects —history, biology, geography and philosophy—, and 412 additional questions from online quizzes in history. While the quiz authors gave no relevant context, we incorporate knowledge from Wikipedia, retrieving documents matching the combination of question + each answer option. Moreover, we experiment with different indexing and pre-training strategies. The evaluation results show accuracy of 42.23%, which is well above the baseline of 24.89%.</abstract>
       <url hash="21539b59">R19-1053</url>
@@ -642,7 +642,7 @@
       <author><first>Tatsuya</first><last>Ishigaki</last></author>
       <author><first>Hidetaka</first><last>Kamigaito</last></author>
       <author><first>Hiroya</first><last>Takamura</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>497–506</pages>
       <abstract>Discourse relations between sentences are often represented as a tree, and the tree structure provides important information for summarizers to create a short and coherent summary. However, current neural network-based summarizers treat the source document as just a sequence of sentences and ignore the tree-like discourse structure inherent in the document. To incorporate the information of a discourse tree structure into the neural network-based summarizers, we propose a discourse-aware neural extractive summarizer which can explicitly take into account the discourse dependency tree structure of the source document. Our discourse-aware summarizer can jointly learn the discourse structure and the salience score of a sentence by using novel hierarchical attention modules, which can be trained on automatically parsed discourse dependency trees. Experimental results showed that our model achieved competitive or better performances against state-of-the-art models in terms of ROUGE scores on the DailyMail dataset. We further conducted manual evaluations. The results showed that our approach also gained the coherence of the output summaries.</abstract>
       <url hash="4a8868e8">R19-1059</url>
@@ -682,7 +682,7 @@
       <title>Using Syntax to Resolve <fixed-case>NPE</fixed-case> in <fixed-case>E</fixed-case>nglish</title>
       <author><first>Payal</first><last>Khullar</last></author>
       <author><first>Allen</first><last>Antony</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>534–540</pages>
       <abstract>This paper describes a novel, syntax-based system for automatic detection and resolution of Noun Phrase Ellipsis (NPE) in English. The system takes in free input English text, detects the site of nominal elision, and if present, selects potential antecedent candidates. The rules are built using the syntactic information on ellipsis and its antecedent discussed in previous theoretical linguistics literature on NPE. Additionally, we prepare a curated dataset of 337 sentences from well-known, reliable sources, containing positive and negative samples of NPE. We split this dataset into two parts, and use one part to refine our rules and the other to test the performance of our final system. We get an F1-score of 76.47% for detection and 70.27% for NPE resolution on the testset. To the best of our knowledge, ours is the first system that detects and resolves NPE in English. The curated dataset used for this task, albeit small, covers a wide variety of NPE cases and will be made public for future work.</abstract>
       <url hash="8894d46e">R19-1063</url>
@@ -722,7 +722,7 @@
     <paper id="67">
       <title>A Qualitative Evaluation Framework for Paraphrase Identification</title>
       <author><first>Venelin</first><last>Kovatchev</last></author>
-      <author><first>M. Antonia</first><last>Marti</last></author>
+      <author id="m-antonia-marti"><first>M. Antonia</first><last>Marti</last></author>
       <author><first>Maria</first><last>Salamo</last></author>
       <author><first>Javier</first><last>Beltran</last></author>
       <pages>568–577</pages>
@@ -757,8 +757,8 @@
       <title>Question Similarity in Community Question Answering: A Systematic Exploration of Preprocessing Methods and Models</title>
       <author><first>Florian</first><last>Kunneman</last></author>
       <author><first>Thiago Castro</first><last>Ferreira</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <pages>593–601</pages>
       <abstract>Community Question Answering forums are popular among Internet users, and a basic problem they encounter is trying to find out if their question has already been posed before. To address this issue, NLP researchers have developed methods to automatically detect question-similarity, which was one of the shared tasks in SemEval. The best performing systems for this task made use of Syntactic Tree Kernels or the SoftCosine metric. However, it remains unclear why these methods seem to work, whether their performance can be improved by better preprocessing methods and what kinds of errors they (and other methods) make. In this paper, we therefore systematically combine and compare these two approaches with the more traditional BM25 and translation-based models. Moreover, we analyze the impact of preprocessing steps (lowercasing, suppression of punctuation and stop words removal) and word meaning similarity based on different distributions (word translation probability, Word2Vec, fastText and ELMo) on the performance of the task. We conduct an error analysis to gain insight into the differences in performance between the system set-ups. The implementation is made publicly available from <url>https://github.com/fkunneman/DiscoSumo/tree/master/ranlp</url>.</abstract>
       <url hash="85b47cba">R19-1070</url>
@@ -777,7 +777,7 @@
     </paper>
     <paper id="72">
       <title>Resolving Pronouns for a Resource-Poor Language, <fixed-case>M</fixed-case>alayalam Using Resource-Rich Language, <fixed-case>T</fixed-case>amil.</title>
-      <author><first>Sobha</first><last>Lalitha Devi</last></author>
+      <author id="sobha-lalitha-devi"><first>Sobha</first><last>Lalitha Devi</last></author>
       <pages>611–618</pages>
       <abstract>In this paper we give in detail how a resource rich language can be used for resolving pronouns for a less resource language. The source language, which is resource rich language in this study, is Tamil and the resource poor language is Malayalam, both belonging to the same language family, Dravidian. The Pronominal resolution developed for Tamil uses CRFs. Our approach is to leverage the Tamil language model to test Malayalam data and the processing required for Malayalam data is detailed. The similarity at the syntactic level between the languages is exploited in identifying the features for developing the Tamil language model. The word form or the lexical item is not considered as a feature for training the CRFs. Evaluation on Malayalam Wikipedia data shows that our approach is correct and the results, though not as good as Tamil, but comparable.</abstract>
       <url hash="a192769d">R19-1072</url>
@@ -831,8 +831,8 @@
       <author><first>Pilar</first><last>López Úbeda</last></author>
       <author><first>Flor Miriam</first><last>Plaza del Arco</last></author>
       <author><first>Manuel Carlos</first><last>Díaz Galiano</last></author>
-      <author><first>L. Alfonso</first><last>Urena Lopez</last></author>
-      <author><first>Maite</first><last>Martin</last></author>
+      <author id="l-alfonso-urena-lopez"><first>L. Alfonso</first><last>Urena Lopez</last></author>
+      <author id="m-teresa-martin-valdivia"><first>Maite</first><last>Martin</last></author>
       <pages>655–663</pages>
       <abstract>Mental health is one of the main concerns of today’s society. Early detection of symptoms can greatly help people with mental disorders. People are using social networks more and more to express emotions, sentiments and mental states. Thus, the treatment of this information using NLP technologies can be applied to the automatic detection of mental problems such as eating disorders. However, the first step to solving the problem should be to provide a corpus in order to evaluate our systems. In this paper, we specifically focus on detecting anorexia messages on Twitter. Firstly, we have generated a new corpus of tweets extracted from different accounts including anorexia and non-anorexia messages in Spanish. The corpus is called SAD: Spanish Anorexia Detection corpus. In order to validate the effectiveness of the SAD corpus, we also propose several machine learning approaches for automatically detecting anorexia symptoms in the corpus. The good results obtained show that the application of textual classification methods is a promising option for developing this kind of system demonstrating that these tools could be used by professionals to help in the early detection of mental problems.</abstract>
       <url hash="48173a51">R19-1077</url>
@@ -869,8 +869,8 @@
       <author><first>Suraj</first><last>Maharjan</last></author>
       <author><first>Deepthi</first><last>Mave</last></author>
       <author><first>Prasha</first><last>Shrestha</last></author>
-      <author><first>Manuel</first><last>Montes</last></author>
-      <author><first>Fabio A.</first><last>González</last></author>
+      <author id="manuel-montes"><first>Manuel</first><last>Montes</last></author>
+      <author id="fabio-a-gonzalez"><first>Fabio A.</first><last>González</last></author>
       <author><first>Thamar</first><last>Solorio</last></author>
       <pages>684–692</pages>
       <abstract>An author’s way of presenting a story through his/her writing style has a great impact on whether the story will be liked by readers or not. In this paper, we learn representations for authors of literary texts together with representations for character n-grams annotated with their functional roles. We train a neural character n-gram based language model using an external corpus of literary texts and transfer learned representations for use in downstream tasks. We show that augmenting the knowledge from external works of authors produces results competitive with other style-based methods for book likability prediction, genre classification, and authorship attribution.</abstract>
@@ -917,8 +917,8 @@
       <title>Semantic Language Model for <fixed-case>T</fixed-case>unisian Dialect</title>
       <author><first>Abir</first><last>Masmoudi</last></author>
       <author><first>Rim</first><last>Laatar</last></author>
-      <author><first>Mariem</first><last>Ellouze</last></author>
-      <author><first>Lamia</first><last>Hadrich Belguith</last></author>
+      <author id="mariem-ellouze-khemekhem"><first>Mariem</first><last>Ellouze</last></author>
+      <author id="lamia-hadrich-belguith"><first>Lamia</first><last>Hadrich Belguith</last></author>
       <pages>720–729</pages>
       <abstract>In this paper, we describe the process of creating a statistical Language Model (LM) for the Tunisian Dialect. Indeed, this work is part of the realization of Automatic Speech Recognition (ASR) system for the Tunisian Railway Transport Network. Since our eld of work has been limited, there are several words with similar behaviors (semantic for example) but they do not have the same appearance probability; their class groupings will therefore be possible. For these reasons, we propose to build an n-class LM that is based mainly on the integration of purely semantic data. Indeed, each class represents an abstraction of similar labels. In order to improve the sequence labeling task, we proposed to use a discriminative algorithm based on the Conditional Random Field (CRF) model. To better judge our choice of creating an n-class word model, we compared the created model with the 3-gram type model on the same test corpus of evaluation. Additionally, to assess the impact of using the CRF model to perform the semantic labelling task in order to construct semantic classes, we compared the n-class created model with using the CRF in the semantic labelling task and the n- class model without using the CRF in the semantic labelling task. The drawn comparison of the predictive power of the n-class model obtained by applying the CRF model in the semantic labelling is that it is better than the other two models presenting the highest value of its perplexity.</abstract>
       <url hash="57640f3f">R19-1084</url>
@@ -928,7 +928,7 @@
     <paper id="85">
       <title>Automatic diacritization of <fixed-case>T</fixed-case>unisian dialect text using Recurrent Neural Network</title>
       <author><first>Abir</first><last>Masmoudi</last></author>
-      <author><first>Mariem</first><last>Ellouze</last></author>
+      <author id="mariem-ellouze-khemekhem"><first>Mariem</first><last>Ellouze</last></author>
       <author><first>Lamia</first><last>Hadrich belguith</last></author>
       <pages>730–739</pages>
       <abstract>The absence of diacritical marks in the Arabic texts generally leads to morphological, syntactic and semantic ambiguities. This can be more blatant when one deals with under-resourced languages, such as the Tunisian dialect, which suffers from unavailability of basic tools and linguistic resources, like sufficient amount of corpora, multilingual dictionaries, morphological and syntactic analyzers. Thus, this language processing faces greater challenges due to the lack of these resources. The automatic diacritization of MSA text is one of the various complex problems that can be solved by deep neural networks today. Since the Tunisian dialect is an under-resourced language of MSA and as there are a lot of resemblance between both languages, we suggest to investigate a recurrent neural network (RNN) for this dialect diacritization problem. This model will be compared to our previous models models CRF and SMT (CITATION) based on the same dialect corpus. We can experimentally show that our model can achieve better outcomes (DER of 10.72%), as compared to the two models CRF (DER of 20.25%) and SMT (DER of 33.15%).</abstract>
@@ -939,8 +939,8 @@
     <paper id="86">
       <title>Comparing <fixed-case>MT</fixed-case> Approaches for Text Normalization</title>
       <author><first>Claudia</first><last>Matos Veliz</last></author>
-      <author><first>Orphee</first><last>De Clercq</last></author>
-      <author><first>Veronique</first><last>Hoste</last></author>
+      <author id="orphee-de-clercq"><first>Orphee</first><last>De Clercq</last></author>
+      <author id="veronique-hoste"><first>Veronique</first><last>Hoste</last></author>
       <pages>740–749</pages>
       <abstract>One of the main characteristics of social media data is the use of non-standard language. Since NLP tools have been trained on traditional text material their performance drops when applied to social media data. One way to overcome this is to first perform text normalization. In this work, we apply text normalization to noisy English and Dutch text coming from different social media genres: text messages, message board posts and tweets. We consider the normalization task as a Machine Translation problem and test the two leading paradigms: statistical and neural machine translation. For SMT we explore the added value of varying background corpora for training the language model. For NMT we have a look at data augmentation since the parallel datasets we are working with are limited in size. Our results reveal that when relying on SMT to perform the normalization it is beneficial to use a background corpus that is close to the genre you are normalizing. Regarding NMT, we find that the translations - or normalizations - coming out of this model are far from perfect and that for a low-resource language like Dutch adding additional training data works better than artificially augmenting the data.</abstract>
       <url hash="8ba8d875">R19-1086</url>
@@ -951,7 +951,7 @@
       <title>Sentiment and Emotion Based Representations for Fake Reviews Detection</title>
       <author><first>Alimuddin</first><last>Melleng</last></author>
       <author><first>Anna</first><last>Jurek-Loughrey</last></author>
-      <author><first>Deepak</first><last>P</last></author>
+      <author id="deepak-p"><first>Deepak</first><last>P</last></author>
       <pages>750–757</pages>
       <abstract>Fake reviews are increasingly prevalent across the Internet. They can be unethical as well as harmful. They can affect businesses and mislead individual customers. As the opinions on the Web are increasingly used the detection of fake reviews has become more and more critical. In this study, we explore the effectiveness of sentiment and emotions based representations for the task of building machine learning models for fake review detection. We perform empirical studies over three real world datasets and demonstrate that improved data representation can be achieved by combining sentiment and emotion extraction methods, as well as by performing sentiment and emotion analysis on a part-by-part basis by segmenting the reviews.</abstract>
       <url hash="c93eab66">R19-1087</url>
@@ -972,9 +972,9 @@
       <title>Community Perspective on Replicability in Natural Language Processing</title>
       <author><first>Margot</first><last>Mieskes</last></author>
       <author><first>Karën</first><last>Fort</last></author>
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <author><first>Cyril</first><last>Grouin</last></author>
-      <author><first>Kevin</first><last>Cohen</last></author>
+      <author id="k-bretonnel-cohen"><first>Kevin</first><last>Cohen</last></author>
       <pages>768–775</pages>
       <abstract>With recent efforts in drawing attention to the task of replicating and/or reproducing results, for example in the context of COLING 2018 and various LREC workshops, the question arises how the NLP community views the topic of replicability in general. Using a survey, in which we involve members of the NLP community, we investigate how our community perceives this topic, its relevance and options for improvement. Based on over two hundred participants, the survey results confirm earlier observations, that successful reproducibility requires more than having access to code and data. Additionally, the results show that the topic has to be tackled from the authors’, reviewers’ and community’s side.</abstract>
       <url hash="d29cfb94">R19-1089</url>
@@ -1060,7 +1060,7 @@
     </paper>
     <paper id="97">
       <title>Summary Refinement through Denoising</title>
-      <author><first>Nikola I.</first><last>Nikolov</last></author>
+      <author id="nikola-i-nikolov"><first>Nikola I.</first><last>Nikolov</last></author>
       <author><first>Alessandro</first><last>Calmanovici</last></author>
       <author><first>Richard</first><last>Hahnloser</last></author>
       <pages>837–843</pages>
@@ -1071,7 +1071,7 @@
     </paper>
     <paper id="98">
       <title>Large-Scale Hierarchical Alignment for Data-driven Text Rewriting</title>
-      <author><first>Nikola I.</first><last>Nikolov</last></author>
+      <author id="nikola-i-nikolov"><first>Nikola I.</first><last>Nikolov</last></author>
       <author><first>Richard</first><last>Hahnloser</last></author>
       <pages>844–853</pages>
       <abstract>We propose a simple unsupervised method for extracting pseudo-parallel monolingual sentence pairs from comparable corpora representative of two different text styles, such as news articles and scientific papers. Our approach does not require a seed parallel corpus, but instead relies solely on hierarchical search over pre-trained embeddings of documents and sentences. We demonstrate the effectiveness of our method through automatic and extrinsic evaluation on text simplification from the normal to the Simple Wikipedia. We show that pseudo-parallel sentences extracted with our method not only supplement existing parallel data, but can even lead to competitive performance on their own.</abstract>
@@ -1093,7 +1093,7 @@
     <paper id="100">
       <title>From Image to Text in Sentiment Analysis via Regression and Deep Learning</title>
       <author><first>Daniela</first><last>Onita</last></author>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <author><first>Adriana</first><last>Birlutiu</last></author>
       <pages>862–868</pages>
       <abstract>Images and text represent types of content which are used together for conveying user emotions in online social networks. These contents are usually associated with a sentiment category. In this paper, we investigate an approach for mapping images to text for three types of sentiment categories: positive, neutral and negative. The mapping from images to text is performed using a Kernel Ridge Regression model. We considered two types of image features: i) RGB pixel-values features, and ii) features extracted with a deep learning approach. The experimental evaluation was performed on a Twitter data set containing both text and images and the sentiment associated with these. The experimental results show a difference in performance for different sentiment categories, in particular the mapping that we propose performs better for the positive sentiment category in comparison with the neutral and negative ones. Furthermore, the experimental results show that the more complex deep learning features perform better than the RGB pixel-value features for all sentiment categories and for larger training sets.</abstract>
@@ -1104,7 +1104,7 @@
     <paper id="101">
       <title>Building a Morphological Analyser for <fixed-case>L</fixed-case>az</title>
       <author><first>Esra</first><last>Onal</last></author>
-      <author><first>Francis</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last></author>
       <pages>869–877</pages>
       <abstract>This study is an attempt to contribute to documentation and revitalization efforts of endangered Laz language, a member of South Caucasian language family mainly spoken on northeastern coastline of Turkey. It constitutes the first steps to create a general computational model for word form recognition and production for Laz by building a rule-based morphological analyser using Helsinki Finite-State Toolkit (HFST). The evaluation results show that the analyser has a 64.9% coverage over a corpus collected for this study with 111,365 tokens. We have also performed an error analysis on randomly selected 100 tokens from the corpus which are not covered by the analyser, and these results show that the errors mostly result from Turkish words in the corpus and missing stems in our lexicon.</abstract>
       <url hash="b0663f1f">R19-1101</url>
@@ -1115,7 +1115,7 @@
       <title>Term Based Semantic Clusters for Very Short Text Classification</title>
       <author><first>Jasper</first><last>Paalman</last></author>
       <author><first>Shantanu</first><last>Mullick</last></author>
-      <author><first>Kalliopi</first><last>Zervanou</last></author>
+      <author id="kalliopi-zervanou"><first>Kalliopi</first><last>Zervanou</last></author>
       <author><first>Yingqian</first><last>Zhang</last></author>
       <pages>878–887</pages>
       <abstract>Very short texts, such as tweets and invoices, present challenges in classification. Although term occurrences are strong indicators of content, in very short texts, the sparsity of these texts makes it difficult to capture important semantic relationships. A solution calls for a method that not only considers term occurrence, but also handles sparseness well. In this work, we introduce such an approach, the Term Based Semantic Clusters (TBSeC) that employs terms to create distinctive semantic concept clusters. These clusters are ranked using a semantic similarity function which in turn defines a semantic feature space that can be used for text classification. Our method is evaluated in an invoice classification task. Compared to well-known content representation methods the proposed method performs competitively.</abstract>
@@ -1126,7 +1126,7 @@
     <paper id="103">
       <title>Quotation Detection and Classification with a Corpus-Agnostic Model</title>
       <author><first>Sean</first><last>Papay</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <pages>888–894</pages>
       <abstract>The detection of quotations (i.e., reported speech, thought, and writing) has established itself as an NLP analysis task. However, state-of-the-art models have been developed on the basis of specific corpora and incorpo- rate a high degree of corpus-specific assumptions and knowledge, which leads to fragmentation. In the spirit of task-agnostic modeling, we present a corpus-agnostic neural model for quotation detection and evaluate it on three corpora that vary in language, text genre, and structural assumptions. The model (a) approaches the state-of-the-art on the corpora when using established feature sets and (b) shows reasonable performance even when us- ing solely word forms, which makes it applicable for non-standard (i.e., historical) corpora.</abstract>
       <url hash="208966a6">R19-1103</url>
@@ -1149,11 +1149,11 @@
     <paper id="105">
       <title>A Neural Network Component for Knowledge-Based Semantic Representations of Text</title>
       <author><first>Alejandro</first><last>Piad-Morffis</last></author>
-      <author><first>Rafael</first><last>Muñoz</last></author>
-      <author><first>Yoan</first><last>Gutiérrez</last></author>
+      <author id="rafael-munoz"><first>Rafael</first><last>Muñoz</last></author>
+      <author id="yoan-gutierrez"><first>Yoan</first><last>Gutiérrez</last></author>
       <author><first>Yudivian</first><last>Almeida-Cruz</last></author>
       <author><first>Suilan</first><last>Estevez-Velarde</last></author>
-      <author><first>Andrés</first><last>Montoyo</last></author>
+      <author id="andres-montoyo"><first>Andrés</first><last>Montoyo</last></author>
       <pages>904–911</pages>
       <abstract>This paper presents Semantic Neural Networks (SNNs), a knowledge-aware component based on deep learning. SNNs can be trained to encode explicit semantic knowledge from an arbitrary knowledge base, and can subsequently be combined with other deep learning architectures. At prediction time, SNNs provide a semantic encoding extracted from the input data, which can be exploited by other neural network components to build extended representation models that can face alternative problems. The SNN architecture is defined in terms of the concepts and relations present in a knowledge base. Based on this architecture, a training procedure is developed. Finally, an experimental setup is presented to illustrate the behaviour and performance of a SNN for a specific NLP problem, in this case, opinion mining for the classification of movie reviews.</abstract>
       <url hash="be4f8802">R19-1105</url>
@@ -1164,7 +1164,7 @@
       <title>Toponym Detection in the Bio-Medical Domain: A Hybrid Approach with Deep Learning</title>
       <author><first>Alistair</first><last>Plum</last></author>
       <author><first>Tharindu</first><last>Ranasinghe</last></author>
-      <author><first>Constantin</first><last>Orasan</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orasan</last></author>
       <pages>912–921</pages>
       <abstract>This paper compares how different machine learning classifiers can be used together with simple string matching and named entity recognition to detect locations in texts. We compare five different state-of-the-art machine learning classifiers in order to predict whether a sentence contains a location or not. Following this classification task, we use a string matching algorithm with a gazetteer to identify the exact index of a toponym within the sentence. We evaluate different approaches in terms of machine learning classifiers, text pre-processing and location extraction on the SemEval-2019 Task 12 dataset, compiled for toponym resolution in the bio-medical domain. Finally, we compare the results with our system that was previously submitted to the SemEval-2019 task evaluation.</abstract>
       <url hash="5a059526">R19-1106</url>
@@ -1174,7 +1174,7 @@
     <paper id="107">
       <title>Combining <fixed-case>PBSMT</fixed-case> and <fixed-case>NMT</fixed-case> Back-translated Data for Efficient <fixed-case>NMT</fixed-case></title>
       <author><first>Alberto</first><last>Poncelas</last></author>
-      <author><first>Maja</first><last>Popović</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
       <author><first>Dimitar</first><last>Shterionov</last></author>
       <author><first>Gideon</first><last>Maillette de Buy Wenniger</last></author>
       <author><first>Andy</first><last>Way</last></author>
@@ -1209,7 +1209,7 @@
     <paper id="110">
       <title>Know Your Graph. State-of-the-Art Knowledge-Based <fixed-case>WSD</fixed-case></title>
       <author><first>Alexander</first><last>Popov</last></author>
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <author><first>Petya</first><last>Osenova</last></author>
       <pages>949–958</pages>
       <abstract>This paper introduces several improvements over the current state of the art in knowledge-based word sense disambiguation. Those innovations are the result of modifying and enriching a knowledge base created originally on the basis of WordNet. They reflect several separate but connected strategies: manipulating the shape and the content of the knowledge base, assigning weights over the relations in the knowledge base, and the addition of new relations to it. The main contribution of the paper is to demonstrate that the previously proposed knowledge bases organize linguistic and world knowledge suboptimally for the task of word sense disambiguation. In doing so, the paper also establishes a new state of the art for knowledge-based approaches. Its best models are competitive in the broader context of supervised systems as well.</abstract>
@@ -1219,7 +1219,7 @@
     </paper>
     <paper id="111">
       <title>Are ambiguous conjunctions problematic for machine translation?</title>
-      <author><first>Maja</first><last>Popović</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
       <author><first>Sheila</first><last>Castilho</last></author>
       <pages>959–966</pages>
       <abstract>The translation of ambiguous words still poses challenges for machine translation. In this work, we carry out a systematic quantitative analysis regarding the ability of different machine translation systems to disambiguate the source language conjunctions “but” and “and”. We evaluate specialised test sets focused on the translation of these two conjunctions. The test sets contain source languages that do not distinguish different variants of the given conjunction, whereas the target languages do. In total, we evaluate the conjunction “but” on 20 translation outputs, and the conjunction “and” on 10. All machine translation systems almost perfectly recognise one variant of the target conjunction, especially for the source conjunction “but”. The other target variant, however, represents a challenge for machine translation systems, with accuracy varying from 50% to 95% for “but” and from 20% to 57% for “and”. The major error for all systems is replacing the correct target variant with the opposite one.</abstract>
@@ -1265,7 +1265,7 @@
     <paper id="115">
       <title>Enhancing Unsupervised Sentence Similarity Methods with Deep Contextualised Word Representations</title>
       <author><first>Tharindu</first><last>Ranasinghe</last></author>
-      <author><first>Constantin</first><last>Orasan</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orasan</last></author>
       <author><first>Ruslan</first><last>Mitkov</last></author>
       <pages>994–1003</pages>
       <abstract>Calculating Semantic Textual Similarity (STS) plays a significant role in many applications such as question answering, document summarisation, information retrieval and information extraction. All modern state of the art STS methods rely on word embeddings one way or another. The recently introduced contextualised word embeddings have proved more effective than standard word embeddings in many natural language processing tasks. This paper evaluates the impact of several contextualised word embeddings on unsupervised STS methods and compares it with the existing supervised/unsupervised STS methods for different datasets in different languages and different domains</abstract>
@@ -1276,7 +1276,7 @@
     <paper id="116">
       <title>Semantic Textual Similarity with <fixed-case>S</fixed-case>iamese Neural Networks</title>
       <author><first>Tharindu</first><last>Ranasinghe</last></author>
-      <author><first>Constantin</first><last>Orasan</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orasan</last></author>
       <author><first>Ruslan</first><last>Mitkov</last></author>
       <pages>1004–1011</pages>
       <abstract>Calculating the Semantic Textual Similarity (STS) is an important research area in natural language processing which plays a significant role in many applications such as question answering, document summarisation, information retrieval and information extraction. This paper evaluates Siamese recurrent architectures, a special type of neural networks, which are used here to measure STS. Several variants of the architecture are compared with existing methods</abstract>
@@ -1288,7 +1288,7 @@
       <title>Analysing the Impact of Supervised Machine Learning on Automatic Term Extraction: <fixed-case>HAMLET</fixed-case> vs <fixed-case>T</fixed-case>ermo<fixed-case>S</fixed-case>tat</title>
       <author><first>Ayla</first><last>Rigouts Terryn</last></author>
       <author><first>Patrick</first><last>Drouin</last></author>
-      <author><first>Veronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Veronique</first><last>Hoste</last></author>
       <author><first>Els</first><last>Lefever</last></author>
       <pages>1012–1021</pages>
       <abstract>Traditional approaches to automatic term extraction do not rely on machine learning (ML) and select the top n ranked candidate terms or candidate terms above a certain predefined cut-off point, based on a limited number of linguistic and statistical clues. However, supervised ML approaches are gaining interest. Relatively little is known about the impact of these supervised methodologies; evaluations are often limited to precision, and sometimes recall and f1-scores, without information about the nature of the extracted candidate terms. Therefore, the current paper presents a detailed and elaborate analysis and comparison of a traditional, state-of-the-art system (TermoStat) and a new, supervised ML approach (HAMLET), using the results obtained for the same, manually annotated, Dutch corpus about dressage.</abstract>
@@ -1299,7 +1299,7 @@
     <paper id="118">
       <title>Distant Supervision for Sentiment Attitude Extraction</title>
       <author><first>Nicolay</first><last>Rusnachenko</last></author>
-      <author><first>Natalia</first><last>Loukachevitch</last></author>
+      <author id="natalia-loukachevitch"><first>Natalia</first><last>Loukachevitch</last></author>
       <author><first>Elena</first><last>Tutubalina</last></author>
       <pages>1022–1030</pages>
       <abstract>News articles often convey attitudes between the mentioned subjects, which is essential for understanding the described situation. In this paper, we describe a new approach to distant supervision for extracting sentiment attitudes between named entities mentioned in texts. Two factors (pair-based and frame-based) were used to automatically label an extensive news collection, dubbed as RuAttitudes. The latter became a basis for adaptation and training convolutional architectures, including piecewise max pooling and full use of information across different sentences. The results show that models, trained with RuAttitudes, outperform ones that were trained with only supervised learning approach and achieve 13.4% increase in F1-score on RuSentRel collection.</abstract>
@@ -1311,7 +1311,7 @@
       <title>Self-Attentional Models Application in Task-Oriented Dialogue Generation Systems</title>
       <author><first>Mansour</first><last>Saffar Mehrjardi</last></author>
       <author><first>Amine</first><last>Trabelsi</last></author>
-      <author><first>Osmar R.</first><last>Zaiane</last></author>
+      <author id="osmar-r-zaiane"><first>Osmar R.</first><last>Zaiane</last></author>
       <pages>1031–1040</pages>
       <abstract>Self-attentional models are a new paradigm for sequence modelling tasks which differ from common sequence modelling methods, such as recurrence-based and convolution-based sequence learning, in the way that their architecture is only based on the attention mechanism. Self-attentional models have been used in the creation of the state-of-the-art models in many NLP task such as neural machine translation, but their usage has not been explored for the task of training end-to-end task-oriented dialogue generation systems yet. In this study, we apply these models on the DSTC2 dataset for training task-oriented chatbots. Our finding shows that self-attentional models can be exploited to create end-to-end task-oriented chatbots which not only achieve higher evaluation scores compared to recurrence-based models, but also do so more efficiently.</abstract>
       <url hash="fdfbdb79">R19-1119</url>
@@ -1321,9 +1321,9 @@
     <paper id="120">
       <title>Whom to Learn From? Graph- vs. Text-based Word Embeddings</title>
       <author><first>Małgorzata</first><last>Salawa</last></author>
-      <author><first>António</first><last>Branco</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
       <author><first>Ruben</first><last>Branco</last></author>
-      <author><first>João</first><last>António Rodrigues</last></author>
+      <author id="joao-rodrigues"><first>João</first><last>António Rodrigues</last></author>
       <author><first>Chakaveh</first><last>Saedi</last></author>
       <pages>1041–1051</pages>
       <abstract>Vectorial representations of meaning can be supported by empirical data from diverse sources and obtained with diverse embedding approaches. This paper aims at screening this experimental space and reports on an assessment of word embeddings supported (i) by data in raw texts vs. in lexical graphs, (ii) by lexical information encoded in association- vs. inference-based graphs, and obtained (iii) by edge reconstruction- vs. matrix factorisation vs. random walk-based graph embedding methods. The results observed with these experiments indicate that the best solutions with graph-based word embeddings are very competitive, consistently outperforming mainstream text-based ones.</abstract>
@@ -1333,8 +1333,8 @@
     </paper>
     <paper id="121">
       <title>Persistence pays off: Paying Attention to What the <fixed-case>LSTM</fixed-case> Gating Mechanism Persists</title>
-      <author><first>Giancarlo</first><last>Salton</last></author>
-      <author><first>John</first><last>Kelleher</last></author>
+      <author id="giancarlo-salton"><first>Giancarlo</first><last>Salton</last></author>
+      <author id="john-kelleher"><first>John</first><last>Kelleher</last></author>
       <pages>1052–1059</pages>
       <abstract>Recurrent Neural Network Language Models composed of LSTM units, especially those augmented with an external memory, have achieved state-of-the-art results in Language Modeling. However, these models still struggle to process long sequences which are more likely to contain long-distance dependencies because of information fading. In this paper we demonstrate an effective mechanism for retrieving information in a memory augmented LSTM LM based on attending to information in memory in proportion to the number of timesteps the LSTM gating mechanism persisted the information.</abstract>
       <url hash="44a99322">R19-1121</url>
@@ -1355,7 +1355,7 @@
     <paper id="123">
       <title>Moral Stance Recognition and Polarity Classification from <fixed-case>T</fixed-case>witter and Elicited Text</title>
       <author><first>Wesley</first><last>Santos</last></author>
-      <author><first>Ivandré</first><last>Paraboni</last></author>
+      <author id="ivandre-paraboni"><first>Ivandré</first><last>Paraboni</last></author>
       <pages>1069–1075</pages>
       <abstract>We introduce a labelled corpus of stances about moral issues for the Brazilian Portuguese language, and present reference results for both the stance recognition and polarity classification tasks. The corpus is built from Twitter and further expanded with data elicited through crowd sourcing and labelled by their own authors. Put together, the corpus and reference results are expected to be taken as a baseline for further studies in the field of stance recognition and polarity classification from text.</abstract>
       <url hash="7ed1ff6b">R19-1123</url>
@@ -1396,9 +1396,9 @@
     <paper id="127">
       <title>A Morpho-Syntactically Informed <fixed-case>LSTM</fixed-case>-<fixed-case>CRF</fixed-case> Model for Named Entity Recognition</title>
       <author><first>Lilia</first><last>Simeonova</last></author>
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <author><first>Petya</first><last>Osenova</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>1104–1113</pages>
       <abstract>We propose a morphologically informed model for named entity recognition, which is based on LSTM-CRF architecture and combines word embeddings, Bi-LSTM character embeddings, part-of-speech (POS) tags, and morphological information. While previous work has focused on learning from raw word input, using word and character embeddings only, we show that for morphologically rich languages, such as Bulgarian, access to POS information contributes more to the performance gains than the detailed morphological information. Thus, we show that named entity recognition needs only coarse-grained POS tags, but at the same time it can benefit from simultaneously using some POS information of different granularity. Our evaluation results over a standard dataset show sizeable improvements over the state-of-the-art for Bulgarian NER.</abstract>
       <url hash="74886e28">R19-1127</url>
@@ -1408,7 +1408,7 @@
     <paper id="128">
       <title>Named Entity Recognition in Information Security Domain for <fixed-case>R</fixed-case>ussian</title>
       <author><first>Anastasiia</first><last>Sirotina</last></author>
-      <author><first>Natalia</first><last>Loukachevitch</last></author>
+      <author id="natalia-loukachevitch"><first>Natalia</first><last>Loukachevitch</last></author>
       <pages>1114–1120</pages>
       <abstract>In this paper we discuss the named entity recognition task for Russian texts related to cybersecurity. First of all, we describe the problems that arise in course of labeling unstructured texts from information security domain. We introduce guidelines for human annotators, according to which a corpus has been marked up. Then, a CRF-based system and different neural architectures have been implemented and applied to the corpus. The named entity recognition systems have been evaluated and compared to determine the most efficient one.</abstract>
       <url hash="64853053">R19-1128</url>
@@ -1437,8 +1437,8 @@
     </paper>
     <paper id="131">
       <title>Automated Text Simplification as a Preprocessing Step for Machine Translation into an Under-resourced Language</title>
-      <author><first>Sanja</first><last>Štajner</last></author>
-      <author><first>Maja</first><last>Popović</last></author>
+      <author id="sanja-stajner"><first>Sanja</first><last>Štajner</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
       <pages>1141–1150</pages>
       <abstract>In this work, we investigate the possibility of using fully automatic text simplification system on the English source in machine translation (MT) for improving its translation into an under-resourced language. We use the state-of-the-art automatic text simplification (ATS) system for lexically and syntactically simplifying source sentences, which are then translated with two state-of-the-art English-to-Serbian MT systems, the phrase-based MT (PBMT) and the neural MT (NMT). We explore three different scenarios for using the ATS in MT: (1) using the raw output of the ATS; (2) automatically filtering out the sentences with low grammaticality and meaning preservation scores; and (3) performing a minimal manual correction of the ATS output. Our results show improvement in fluency of the translation regardless of the chosen scenario, and difference in success of the three scenarios depending on the MT approach used (PBMT or NMT) with regards to improving translation fluency and post-editing effort.</abstract>
       <url hash="39f85c13">R19-1131</url>
@@ -1450,7 +1450,7 @@
       <author><first>Kenneth</first><last>Steimel</last></author>
       <author><first>Daniel</first><last>Dakota</last></author>
       <author><first>Yue</first><last>Chen</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <pages>1151–1160</pages>
       <abstract>Abusive language detection has received much attention in the last years, and recent approaches perform the task in a number of different languages. We investigate which factors have an effect on multilingual settings, focusing on the compatibility of data and annotations. In the current paper, we focus on English and German. Our findings show large differences in performance between the two languages. We find that the best performance is achieved by different classification algorithms. Sampling to address class imbalance issues is detrimental for German and beneficial for English. The only similarity that we find is that neither data set shows clear topics when we compare the results of topic modeling to the gold standard. Based on our findings, we can conclude that a multilingual optimization of classifiers is not possible even in settings where comparable data sets are used.</abstract>
       <url hash="26796c6e">R19-1132</url>
@@ -1481,7 +1481,7 @@
     </paper>
     <paper id="135">
       <title>A Quantum-Like Approach to Word Sense Disambiguation</title>
-      <author><first>Fabio</first><last>Tamburini</last></author>
+      <author id="fabio-tamburini"><first>Fabio</first><last>Tamburini</last></author>
       <pages>1176–1185</pages>
       <abstract>This paper presents a novel algorithm for Word Sense Disambiguation (WSD) based on Quantum Probability Theory. The Quantum WSD algorithm requires concepts representations as vectors in the complex domain and thus we have developed a technique for computing complex word and sentence embeddings based on the Paragraph Vectors algorithm. Despite the proposed method is quite simple and that it does not require long training phases, when it is evaluated on a standardized benchmark for this task it exhibits state-of-the-art (SOTA) performances.</abstract>
       <url hash="2a2f026d">R19-1135</url>
@@ -1503,7 +1503,7 @@
       <title>Text-Based Joint Prediction of Numeric and Categorical Attributes of Entities in Knowledge Bases</title>
       <author><first>V</first><last>Thejas</last></author>
       <author><first>Abhijeet</first><last>Gupta</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <pages>1194–1202</pages>
       <abstract>Collaboratively constructed knowledge bases play an important role in information systems, but are essentially always incomplete. Thus, a large number of models has been developed for Knowledge Base Completion, the task of predicting new attributes of entities given partial descriptions of these entities. Virtually all of these models either concentrate on numeric attributes (&lt;Italy,GDP,2T$&gt;) or they concentrate on categorical attributes (&lt;Tim Cook,chairman,Apple&gt;). In this paper, we propose a simple feed-forward neural architecture to jointly predict numeric and categorical attributes based on embeddings learned from textual occurrences of the entities in question. Following insights from multi-task learning, our hypothesis is that due to the correlations among attributes of different kinds, joint prediction improves over separate prediction. Our experiments on seven FreeBase domains show that this hypothesis is true of the two attribute types: we find substantial improvements for numeric attributes in the joint model, while performance remains largely unchanged for categorical attributes. Our analysis indicates that this is the case because categorical attributes, many of which describe membership in various classes, provide useful ‘background knowledge’ for numeric prediction, while this is true to a lesser degree in the inverse direction.</abstract>
       <url hash="1e788182">R19-1137</url>
@@ -1541,7 +1541,7 @@
       <title>Cross-Lingual Word Embeddings for Morphologically Rich Languages</title>
       <author><first>Ahmet</first><last>Üstün</last></author>
       <author><first>Gosse</first><last>Bouma</last></author>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <pages>1222–1228</pages>
       <abstract>Cross-lingual word embedding models learn a shared vector space for two or more languages so that words with similar meaning are represented by similar vectors regardless of their language. Although the existing models achieve high performance on pairs of morphologically simple languages, they perform very poorly on morphologically rich languages such as Turkish and Finnish. In this paper, we propose a morpheme-based model in order to increase the performance of cross-lingual word embeddings on morphologically rich languages. Our model includes a simple extension which enables us to exploit morphemes for cross-lingual mapping. We applied our model for the Turkish-Finnish language pair on the bilingual word translation task. Results show that our model outperforms the baseline models by 2% in the nearest neighbour ranking.</abstract>
       <url hash="a6ad34bc">R19-1140</url>
@@ -1552,9 +1552,9 @@
       <title>It Takes Nine to Smell a Rat: Neural Multi-Task Learning for Check-Worthiness Prediction</title>
       <author><first>Slavena</first><last>Vasileva</last></author>
       <author><first>Pepa</first><last>Atanasova</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <author><first>Alberto</first><last>Barrón-Cedeño</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>1229–1239</pages>
       <abstract>We propose a multi-task deep-learning approach for estimating the check-worthiness of claims in political debates. Given a political debate, such as the 2016 US Presidential and Vice-Presidential ones, the task is to predict which statements in the debate should be prioritized for fact-checking. While different fact-checking organizations would naturally make different choices when analyzing the same debate, we show that it pays to learn from multiple sources simultaneously (PolitiFact, FactCheck, ABC, CNN, NPR, NYT, Chicago Tribune, The Guardian, and Washington Post) in a multi-task learning setup, even when a particular source is chosen as a target to imitate. Our evaluation shows state-of-the-art results on a standard dataset for the task of check-worthiness prediction.</abstract>
       <url hash="24bbadff">R19-1141</url>
@@ -1653,7 +1653,7 @@
       <title>Bigger versus Similar: Selecting a Background Corpus for First Story Detection Based on Distributional Similarity</title>
       <author><first>Fei</first><last>Wang</last></author>
       <author><first>Robert J.</first><last>Ross</last></author>
-      <author><first>John D.</first><last>Kelleher</last></author>
+      <author id="john-kelleher"><first>John D.</first><last>Kelleher</last></author>
       <pages>1312–1320</pages>
       <abstract>The current state of the art for First Story Detection (FSD) are nearest neighbour-based models with traditional term vector representations; however, one challenge faced by FSD models is that the document representation is usually defined by the vocabulary and term frequency from a background corpus. Consequently, the ideal background corpus should arguably be both large-scale to ensure adequate term coverage, and similar to the target domain in terms of the language distribution. However, given these two factors cannot always be mutually satisfied, in this paper we examine whether the distributional similarity of common terms is more important than the scale of common terms for FSD. As a basis for our analysis we propose a set of metrics to quantitatively measure the scale of common terms and the distributional similarity between corpora. Using these metrics we rank different background corpora relative to a target corpus. We also apply models based on different background corpora to the FSD task. Our results show that term distributional similarity is more predictive of good FSD performance than the scale of common terms; and, thus we demonstrate that a smaller recent domain-related corpus will be more suitable than a very large-scale general corpus for FSD.</abstract>
       <url hash="6dbbf3d3">R19-1150</url>
@@ -1706,7 +1706,7 @@
     <paper id="155">
       <title>A Survey of the Perceived Text Adaptation Needs of Adults with Autism</title>
       <author><first>Victoria</first><last>Yaneva</last></author>
-      <author><first>Constantin</first><last>Orasan</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orasan</last></author>
       <author><first>Le An</first><last>Ha</last></author>
       <author><first>Natalia</first><last>Ponomareva</last></author>
       <pages>1356–1363</pages>
@@ -1741,7 +1741,7 @@
       <title><fixed-case>T</fixed-case>urkish Tweet Classification with Transformer Encoder</title>
       <author><first>Atıf Emre</first><last>Yüksel</last></author>
       <author><first>Yaşar Alim</first><last>Türkmen</last></author>
-      <author><first>Arzucan</first><last>Özgür</last></author>
+      <author id="arzucan-ozgur"><first>Arzucan</first><last>Özgür</last></author>
       <author><first>Berna</first><last>Altınel</last></author>
       <pages>1380–1387</pages>
       <abstract>Short-text classification is a challenging task, due to the sparsity and high dimensionality of the feature space. In this study, we aim to analyze and classify Turkish tweets based on their topics. Social media jargon and the agglutinative structure of the Turkish language makes this classification task even harder. As far as we know, this is the first study that uses a Transformer Encoder for short text classification in Turkish. The model is trained in a weakly supervised manner, where the training data set has been labeled automatically. Our results on the test set, which has been manually labeled, show that performing morphological analysis improves the classification performance of the traditional machine learning algorithms Random Forest, Naive Bayes, and Support Vector Machines. Still, the proposed approach achieves an F-score of 89.3 % outperforming those algorithms by at least 5 points.</abstract>
diff --git a/data/xml/S01.xml b/data/xml/S01.xml
index fd4db78542..c169af9363 100644
--- a/data/xml/S01.xml
+++ b/data/xml/S01.xml
@@ -18,7 +18,7 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>SENSEVAL</fixed-case>-2: Overview</title>
-      <author><first>Philip</first><last>Edmonds</last></author>
+      <author id="philip-edmonds"><first>Philip</first><last>Edmonds</last></author>
       <author><first>Scott</first><last>Cotton</last></author>
       <pages>1–5</pages>
       <url hash="d8b68887">S01-1001</url>
@@ -26,10 +26,10 @@
     </paper>
     <paper id="2">
       <title>The <fixed-case>B</fixed-case>asque Task: Did Systems Perform in the Upperbound?</title>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <author><first>Elena</first><last>Garcia</last></author>
-      <author><first>Mikel</first><last>Lersundi</last></author>
-      <author><first>David</first><last>Martinez</last></author>
+      <author id="mikel-lersundi"><first>Mikel</first><last>Lersundi</last></author>
+      <author id="david-martinez"><first>David</first><last>Martinez</last></author>
       <author><first>Eli</first><last>Pociello</last></author>
       <pages>9–12</pages>
       <url hash="0942a14d">S01-1002</url>
@@ -38,7 +38,7 @@
     <paper id="3">
       <title><fixed-case>D</fixed-case>utch Word Sense Disambiguation: Data and Preliminary Results</title>
       <author><first>Iris</first><last>Hendrickx</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <pages>13–16</pages>
       <url hash="9b6f9c37">S01-1003</url>
       <bibkey>hendrickx-van-den-bosch-2001-dutch</bibkey>
@@ -52,11 +52,11 @@
     </paper>
     <paper id="5">
       <title><fixed-case>E</fixed-case>nglish Tasks: All-Words and Verb Lexical Sample</title>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Christiane</first><last>Fellbaum</last></author>
       <author><first>Scott</first><last>Cotton</last></author>
       <author><first>Lauren</first><last>Delfs</last></author>
-      <author><first>Hoa Trang</first><last>Dang</last></author>
+      <author id="hoa-trang-dang"><first>Hoa Trang</first><last>Dang</last></author>
       <pages>21–24</pages>
       <url hash="55a09bad">S01-1005</url>
       <bibkey>palmer-etal-2001-english</bibkey>
@@ -65,7 +65,7 @@
       <title>Sensiting Inflectionality: <fixed-case>E</fixed-case>stonian Task for <fixed-case>SENSEVAL</fixed-case>-2</title>
       <author><first>Neeme</first><last>Kahusk</last></author>
       <author><first>Heili</first><last>Orav</last></author>
-      <author><first>Haldur</first><last>Õim</last></author>
+      <author id="haldur-oim"><first>Haldur</first><last>Õim</last></author>
       <pages>25–28</pages>
       <url hash="520a8e4a">S01-1006</url>
       <bibkey>kahusk-etal-2001-sensiting</bibkey>
@@ -74,7 +74,7 @@
       <title>The <fixed-case>I</fixed-case>talian Lexical Sample Task</title>
       <author><first>Francesca</first><last>Bertagna</last></author>
       <author><first>Claudia</first><last>Soria</last></author>
-      <author><first>Nicoletta</first><last>Calzolari</last></author>
+      <author id="nicoletta-calzolari"><first>Nicoletta</first><last>Calzolari</last></author>
       <pages>29–32</pages>
       <url hash="2dd09af3">S01-1007</url>
       <bibkey>bertagna-etal-2001-italian</bibkey>
@@ -95,9 +95,9 @@
     </paper>
     <paper id="10">
       <title>Framework and Results for the <fixed-case>S</fixed-case>panish <fixed-case>SENSEVAL</fixed-case></title>
-      <author><first>German</first><last>Rigau</last></author>
-      <author><first>Mariona</first><last>Taulé</last></author>
-      <author><first>Ana</first><last>Fernandez</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
+      <author id="mariona-taule"><first>Mariona</first><last>Taulé</last></author>
+      <author id="ana-fernandez"><first>Ana</first><last>Fernandez</last></author>
       <author><first>Julio</first><last>Gonzalo</last></author>
       <pages>41–44</pages>
       <url hash="3e4eb5f4">S01-1010</url>
@@ -121,7 +121,7 @@
     </paper>
     <paper id="13">
       <title>The <fixed-case>J</fixed-case>apanese Translation Task: Lexical and Structural Perspectives</title>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Atsushi</first><last>Okazaki</last></author>
       <author><first>Takenobu</first><last>Tokunaga</last></author>
       <author><first>Hozumi</first><last>Tanaka</last></author>
@@ -141,7 +141,7 @@
     <paper id="15">
       <title>Probabilistic Network Models for Word Sense Disambiguation</title>
       <author><first>Gerald</first><last>Chao</last></author>
-      <author><first>Michael G.</first><last>Dyer</last></author>
+      <author id="michael-g-dyer"><first>Michael G.</first><last>Dyer</last></author>
       <pages>63–66</pages>
       <url hash="d5a87249">S01-1015</url>
       <bibkey>chao-dyer-2001-probabilistic</bibkey>
@@ -149,8 +149,8 @@
     <paper id="16">
       <title>Improving <fixed-case>WSD</fixed-case> with Multi-Level View of Context Monitored by Similarity Measure</title>
       <author><first>Eric</first><last>Crestan</last></author>
-      <author><first>Marc</first><last>El-Bèze</last></author>
-      <author><first>Claude</first><last>de Loupy</last></author>
+      <author id="marc-el-beze"><first>Marc</first><last>El-Bèze</last></author>
+      <author id="claude-de-loupy"><first>Claude</first><last>de Loupy</last></author>
       <pages>67–70</pages>
       <url hash="11f7b7ba">S01-1016</url>
       <bibkey>crestan-etal-2001-improving</bibkey>
@@ -158,15 +158,15 @@
     <paper id="17">
       <title>Using <fixed-case>L</fixed-case>azy<fixed-case>B</fixed-case>oosting for Word Sense Disambiguation</title>
       <author><first>Gerard</first><last>Escudero</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <pages>71–74</pages>
       <url hash="fa7516e7">S01-1017</url>
       <bibkey>escudero-etal-2001-using</bibkey>
     </paper>
     <paper id="18">
       <title>The <fixed-case>UNED</fixed-case> Systems at <fixed-case>SENSEVAL</fixed-case>-2</title>
-      <author><first>David</first><last>Fernández-Amorós</last></author>
+      <author id="david-fernandez-amoros"><first>David</first><last>Fernández-Amorós</last></author>
       <author><first>Julio</first><last>Gonzalo</last></author>
       <author><first>Felisa</first><last>Verdejo</last></author>
       <pages>75–78</pages>
@@ -182,9 +182,9 @@
     </paper>
     <paper id="20">
       <title>Classifier Optimization and Combination in the <fixed-case>E</fixed-case>nglish All Words Task</title>
-      <author><first>Véronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Véronique</first><last>Hoste</last></author>
       <author><first>Anne</first><last>Kool</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>83–86</pages>
       <url hash="752b639c">S01-1020</url>
       <bibkey>hoste-etal-2001-classifier</bibkey>
@@ -194,7 +194,7 @@
       <author><first>H. Tolga</first><last>Ilhan</last></author>
       <author><first>Sepandar D.</first><last>Kamvar</last></author>
       <author><first>Dan</first><last>Klein</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <author><first>Kristina</first><last>Toutanova</last></author>
       <pages>87–90</pages>
       <url hash="4a8471a3">S01-1021</url>
@@ -210,7 +210,7 @@
     <paper id="23">
       <title><fixed-case>ATR</fixed-case>-<fixed-case>SLT</fixed-case> System for <fixed-case>SENSEVAL</fixed-case>-2 <fixed-case>J</fixed-case>apanese Translation Task</title>
       <author><first>Tadashi</first><last>Kumano</last></author>
-      <author><first>Hideki</first><last>Kashioka</last></author>
+      <author id="hideki-kashioka"><first>Hideki</first><last>Kashioka</last></author>
       <author><first>Hideki</first><last>Tanaka</last></author>
       <pages>95–98</pages>
       <url hash="9bc55d17">S01-1023</url>
@@ -218,7 +218,7 @@
     </paper>
     <paper id="24">
       <title>Sense and Deduction: The Power of Peewees Applied to the <fixed-case>SENSEVAL</fixed-case>-2 <fixed-case>S</fixed-case>wedish Lexical Sample Task</title>
-      <author><first>Torbjörn</first><last>Lager</last></author>
+      <author id="torbjorn-lager"><first>Torbjörn</first><last>Lager</last></author>
       <author><first>Natalia</first><last>Zinovjeva</last></author>
       <pages>99–102</pages>
       <url hash="e020e12e">S01-1024</url>
@@ -227,9 +227,9 @@
     <paper id="25">
       <title>Primitive-Based Word Sense Disambiguation for <fixed-case>SENSEVAL</fixed-case>-2</title>
       <author><last>Lim</last><first>Beng Tat</first></author>
-      <author><first>Zaharin</first><last>Yusoff</last></author>
-      <author><last>Tang</last><first>Enya Kong</first></author>
-      <author><last>Guo</last><first>Cheng Ming</first></author>
+      <author id="zaharin-yusoff"><first>Zaharin</first><last>Yusoff</last></author>
+      <author><first>Enya Kong</first><last>Tang</last></author>
+      <author id="cheng-ming-guo"><first>Cheng Ming</first><last>Guo</last></author>
       <pages>103–106</pages>
       <url hash="b6651c7a">S01-1025</url>
       <bibkey>lim-etal-2001-primitive</bibkey>
@@ -243,26 +243,26 @@
     </paper>
     <paper id="27">
       <title>Using Domain Information for Word Sense Disambiguation</title>
-      <author><first>Bernardo</first><last>Magnini</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
       <author><first>Carlo</first><last>Strapparava</last></author>
       <author><first>Giovanni</first><last>Pezzulo</last></author>
-      <author><first>Alfio</first><last>Gliozzo</last></author>
+      <author id="alfio-gliozzo"><first>Alfio</first><last>Gliozzo</last></author>
       <pages>111–114</pages>
       <url hash="52276399">S01-1027</url>
       <bibkey>magnini-etal-2001-using</bibkey>
     </paper>
     <paper id="28">
       <title>Decision Lists for <fixed-case>E</fixed-case>nglish and <fixed-case>B</fixed-case>asque</title>
-      <author><first>David</first><last>Martinez</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="david-martinez"><first>David</first><last>Martinez</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <pages>115–118</pages>
       <url hash="2e5b6e6d">S01-1028</url>
       <bibkey>martinez-agirre-2001-decision</bibkey>
     </paper>
     <paper id="29">
       <title>Disambiguating Noun and Verb Senses Using Automatically Acquired Selectional Preferences</title>
-      <author><first>Diana</first><last>McCarthy</last></author>
-      <author><first>John</first><last>Carroll</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
       <author><first>Judita</first><last>Preiss</last></author>
       <pages>119–122</pages>
       <url hash="04cec89b">S01-1029</url>
@@ -278,16 +278,16 @@
     </paper>
     <paper id="31">
       <title>Pattern Learning and Active Feature Selection for Word Sense Disambiguation</title>
-      <author><first>Rada F.</first><last>Mihalcea</last></author>
-      <author><first>Dan I.</first><last>Moldovan</last></author>
+      <author id="rada-mihalcea"><first>Rada F.</first><last>Mihalcea</last></author>
+      <author id="dan-moldovan"><first>Dan I.</first><last>Moldovan</last></author>
       <pages>127–130</pages>
       <url hash="0506f8ba">S01-1031</url>
       <bibkey>mihalcea-moldovan-2001-pattern</bibkey>
     </paper>
     <paper id="32">
       <title><fixed-case>T</fixed-case>he <fixed-case>U</fixed-case>niversity of <fixed-case>A</fixed-case>licante Word Sense Disambiguation System</title>
-      <author><first>Andrés</first><last>Montoyo</last></author>
-      <author><first>Armando</first><last>Suárez</last></author>
+      <author id="andres-montoyo"><first>Andrés</first><last>Montoyo</last></author>
+      <author id="armando-suarez"><first>Armando</first><last>Suárez</last></author>
       <pages>131–134</pages>
       <url hash="e063fe26">S01-1032</url>
       <bibkey>montoyo-suarez-2001-university</bibkey>
@@ -321,7 +321,7 @@
       <title><fixed-case>KUNLP</fixed-case> system using Classification Information Model at <fixed-case>SENSEVAL</fixed-case>-2</title>
       <author><first>Hee-Cheol</first><last>Seo</last></author>
       <author><first>Sang-Zoo</first><last>Lee</last></author>
-      <author><first>Hae-Chang</first><last>Rim</last></author>
+      <author id="hae-chang-rim"><first>Hae-Chang</first><last>Rim</last></author>
       <author><first>Ho</first><last>Lee</last></author>
       <pages>147–150</pages>
       <url hash="5bfd7614">S01-1036</url>
@@ -356,8 +356,8 @@
     <paper id="40">
       <title><fixed-case>T</fixed-case>he <fixed-case>J</fixed-case>ohn <fixed-case>H</fixed-case>opkins <fixed-case>SENSEVAL</fixed-case>-2 System Descriptions</title>
       <author><first>David</first><last>Yarowsky</last></author>
-      <author><first>Silviu</first><last>Cucerzan</last></author>
-      <author><first>Radu</first><last>Florian</last></author>
+      <author id="silviu-cucerzan"><first>Silviu</first><last>Cucerzan</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
       <author><first>Charles</first><last>Schafer</last></author>
       <author><first>Richard</first><last>Wicentowski</last></author>
       <pages>163–166</pages>
diff --git a/data/xml/S07.xml b/data/xml/S07.xml
index db4eaa5f3d..562c8e0e46 100644
--- a/data/xml/S07.xml
+++ b/data/xml/S07.xml
@@ -4,8 +4,8 @@
     <meta>
       <booktitle>Proceedings of the Fourth International Workshop on Semantic Evaluations (<fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2007)</booktitle>
       <url hash="94d2c8de">S07-1</url>
-      <editor><first>Eneko</first><last>Agirre</last></editor>
-      <editor><first>Lluís</first><last>Màrquez</last></editor>
+      <editor id="eneko-agirre"><first>Eneko</first><last>Agirre</last></editor>
+      <editor id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></editor>
       <editor><first>Richard</first><last>Wicentowski</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Prague, Czech Republic</address>
@@ -20,10 +20,10 @@
     <paper id="1">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2007 Task 01: Evaluating <fixed-case>WSD</fixed-case> on Cross-Language Information Retrieval</title>
       <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>Bernardo</first><last>Magnini</last></author>
-      <author><first>Oier</first><last>Lopez de Lacalle</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
+      <author id="oier-lopez-de-lacalle"><first>Oier</first><last>Lopez de Lacalle</last></author>
       <author><first>Arantxa</first><last>Otegi</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <author><first>Piek</first><last>Vossen</last></author>
       <pages>1–6</pages>
       <url hash="26335382">S07-1001</url>
@@ -32,18 +32,18 @@
     <paper id="2">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2007 Task 02: Evaluating Word Sense Induction and Discrimination Systems</title>
       <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>Aitor</first><last>Soroa</last></author>
+      <author id="aitor-soroa"><first>Aitor</first><last>Soroa</last></author>
       <pages>7–12</pages>
       <url hash="843a1e61">S07-1002</url>
       <bibkey>agirre-soroa-2007-semeval</bibkey>
     </paper>
     <paper id="3">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2007 Task 04: Classification of Semantic Relations between Nominals</title>
-      <author><first>Roxana</first><last>Girju</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
-      <author><first>Vivi</first><last>Nastase</last></author>
-      <author><first>Stan</first><last>Szpakowicz</last></author>
-      <author><first>Peter</first><last>Turney</last></author>
+      <author id="roxana-girju"><first>Roxana</first><last>Girju</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
+      <author id="vivi-nastase"><first>Vivi</first><last>Nastase</last></author>
+      <author id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></author>
+      <author id="peter-turney"><first>Peter</first><last>Turney</last></author>
       <author><first>Deniz</first><last>Yuret</last></author>
       <pages>13–18</pages>
       <url hash="13a2d0ca">S07-1003</url>
@@ -86,16 +86,16 @@
     <paper id="8">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2007 Task 09: Multilevel Semantic Annotation of <fixed-case>C</fixed-case>atalan and <fixed-case>S</fixed-case>panish</title>
       <author><first>Lluís</first><last>Màrquez</last></author>
-      <author><first>Luis</first><last>Villarejo</last></author>
+      <author id="luis-villarejo"><first>Luis</first><last>Villarejo</last></author>
       <author id="m-antonia-marti"><first>M. A.</first><last>Martí</last></author>
-      <author><first>Mariona</first><last>Taulé</last></author>
+      <author id="mariona-taule"><first>Mariona</first><last>Taulé</last></author>
       <pages>42–47</pages>
       <url hash="99472d3f">S07-1008</url>
       <bibkey>marquez-etal-2007-semeval</bibkey>
     </paper>
     <paper id="9">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2007 Task 10: <fixed-case>E</fixed-case>nglish Lexical Substitution Task</title>
-      <author><first>Diana</first><last>McCarthy</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
       <author><first>Roberto</first><last>Navigli</last></author>
       <pages>48–53</pages>
       <url hash="5afe42c3">S07-1009</url>
@@ -111,7 +111,7 @@
     </paper>
     <paper id="11">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2007 Task 12: <fixed-case>T</fixed-case>urkish Lexical Sample Task</title>
-      <author><first>Zeynep</first><last>Orhan</last></author>
+      <author id="zeynep-orhan"><first>Zeynep</first><last>Orhan</last></author>
       <author><first>Emine</first><last>Çelik</last></author>
       <author><first>Demirgüç</first><last>Neslihan</last></author>
       <pages>59–63</pages>
@@ -130,7 +130,7 @@
     <paper id="13">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2007 Task 14: Affective Text</title>
       <author><first>Carlo</first><last>Strapparava</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>70–74</pages>
       <url hash="3715a8ed">S07-1013</url>
       <bibkey>strapparava-mihalcea-2007-semeval</bibkey>
@@ -138,11 +138,11 @@
     <paper id="14">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2007 Task 15: <fixed-case>T</fixed-case>emp<fixed-case>E</fixed-case>val Temporal Relation Identification</title>
       <author><first>Marc</first><last>Verhagen</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <author><first>Frank</first><last>Schilder</last></author>
       <author><first>Mark</first><last>Hepple</last></author>
-      <author><first>Graham</first><last>Katz</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="graham-katz"><first>Graham</first><last>Katz</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <pages>75–80</pages>
       <url hash="68702a8d">S07-1014</url>
       <bibkey>verhagen-etal-2007-semeval</bibkey>
@@ -150,37 +150,37 @@
     <paper id="15">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2007 Task 16: Evaluation of Wide Coverage Knowledge Resources</title>
       <author><first>Montse</first><last>Cuadros</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <pages>81–86</pages>
       <url hash="12e1de01">S07-1015</url>
       <bibkey>cuadros-rigau-2007-semeval</bibkey>
     </paper>
     <paper id="16">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2007 Task-17: <fixed-case>E</fixed-case>nglish Lexical Sample, <fixed-case>SRL</fixed-case> and All Words</title>
-      <author><first>Sameer</first><last>Pradhan</last></author>
+      <author id="sameer-pradhan"><first>Sameer</first><last>Pradhan</last></author>
       <author><first>Edward</first><last>Loper</last></author>
       <author><first>Dmitriy</first><last>Dligach</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>87–92</pages>
       <url hash="5c519222">S07-1016</url>
       <bibkey>pradhan-etal-2007-semeval</bibkey>
     </paper>
     <paper id="17">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2007 Task 18: <fixed-case>A</fixed-case>rabic Semantic Labeling</title>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <author><first>Musa</first><last>Alkhalifa</last></author>
       <author><first>Sabry</first><last>ElKateb</last></author>
       <author><first>Christiane</first><last>Fellbaum</last></author>
       <author><first>Aous</first><last>Mansouri</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>93–98</pages>
       <url hash="e09695c2">S07-1017</url>
       <bibkey>diab-etal-2007-semeval</bibkey>
     </paper>
     <paper id="18">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2007 Task 19: Frame Semantic Structure Extraction</title>
-      <author><first>Collin</first><last>Baker</last></author>
-      <author><first>Michael</first><last>Ellsworth</last></author>
+      <author id="collin-f-baker"><first>Collin</first><last>Baker</last></author>
+      <author id="michael-ellsworth"><first>Michael</first><last>Ellsworth</last></author>
       <author><first>Katrin</first><last>Erk</last></author>
       <pages>99–104</pages>
       <url hash="1f3ac062">S07-1018</url>
@@ -189,7 +189,7 @@
     <paper id="19">
       <title><fixed-case>AUG</fixed-case>: A combined classification and clustering approach for web people disambiguation</title>
       <author><first>Els</first><last>Lefever</last></author>
-      <author><first>Véronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Véronique</first><last>Hoste</last></author>
       <author><first>Timur</first><last>Fayruzov</last></author>
       <pages>105–108</pages>
       <url hash="6b8c1911">S07-1019</url>
@@ -197,7 +197,7 @@
     </paper>
     <paper id="20">
       <title><fixed-case>CITYU</fixed-case>-<fixed-case>HIF</fixed-case>: <fixed-case>WSD</fixed-case> with Human-Informed Feature Preference</title>
-      <author><first>Oi Yee</first><last>Kwong</last></author>
+      <author id="olivia-o-y-kwong"><first>Oi Yee</first><last>Kwong</last></author>
       <pages>109–112</pages>
       <url hash="cea77731">S07-1020</url>
       <bibkey>kwong-2007-cityu</bibkey>
@@ -228,7 +228,7 @@
     <paper id="24">
       <title><fixed-case>CU</fixed-case>-<fixed-case>COMSEM</fixed-case>: Exploring Rich Features for Unsupervised Web Personal Name Disambiguation</title>
       <author><first>Ying</first><last>Chen</last></author>
-      <author><first>James H.</first><last>Martin</last></author>
+      <author id="james-h-martin"><first>James H.</first><last>Martin</last></author>
       <pages>125–128</pages>
       <url hash="dad274f2">S07-1024</url>
       <bibkey>chen-martin-2007-cu</bibkey>
@@ -236,14 +236,14 @@
     <paper id="25">
       <title><fixed-case>CU</fixed-case>-<fixed-case>TMP</fixed-case>: Temporal Relation Classification Using Syntactic and Semantic Features</title>
       <author><first>Steven</first><last>Bethard</last></author>
-      <author><first>James H.</first><last>Martin</last></author>
+      <author id="james-h-martin"><first>James H.</first><last>Martin</last></author>
       <pages>129–132</pages>
       <url hash="b229f99b">S07-1025</url>
       <bibkey>bethard-martin-2007-cu</bibkey>
     </paper>
     <paper id="26">
       <title><fixed-case>CUNIT</fixed-case>: A Semantic Role Labeling System for <fixed-case>M</fixed-case>odern <fixed-case>S</fixed-case>tandard <fixed-case>A</fixed-case>rabic</title>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <author><first>Alessandro</first><last>Moschitti</last></author>
       <author><first>Daniele</first><last>Pighin</last></author>
       <pages>133–136</pages>
@@ -253,25 +253,25 @@
     <paper id="27">
       <title><fixed-case>DFKI</fixed-case>2: An Information Extraction Based Approach to People Disambiguation</title>
       <author><first>Andrea</first><last>Heyl</last></author>
-      <author><first>Günter</first><last>Neumann</last></author>
+      <author id="gunter-neumann"><first>Günter</first><last>Neumann</last></author>
       <pages>137–140</pages>
       <url hash="3c66e437">S07-1027</url>
       <bibkey>heyl-neumann-2007-dfki2</bibkey>
     </paper>
     <paper id="28">
       <title><fixed-case>FBK</fixed-case>-<fixed-case>IRST</fixed-case>: Kernel Methods for Semantic Relation Extraction</title>
-      <author><first>Claudio</first><last>Giuliano</last></author>
-      <author><first>Alberto</first><last>Lavelli</last></author>
+      <author id="claudio-giuliano"><first>Claudio</first><last>Giuliano</last></author>
+      <author id="alberto-lavelli"><first>Alberto</first><last>Lavelli</last></author>
       <author><first>Daniele</first><last>Pighin</last></author>
-      <author><first>Lorenza</first><last>Romano</last></author>
+      <author id="lorenza-romano"><first>Lorenza</first><last>Romano</last></author>
       <pages>141–144</pages>
       <url hash="9037866f">S07-1028</url>
       <bibkey>giuliano-etal-2007-fbk</bibkey>
     </paper>
     <paper id="29">
       <title><fixed-case>FBK</fixed-case>-irst: Lexical Substitution Task Exploiting Domain and Syntagmatic Coherence</title>
-      <author><first>Claudio</first><last>Giuliano</last></author>
-      <author><first>Alfio</first><last>Gliozzo</last></author>
+      <author id="claudio-giuliano"><first>Claudio</first><last>Giuliano</last></author>
+      <author id="alfio-gliozzo"><first>Alfio</first><last>Gliozzo</last></author>
       <author><first>Carlo</first><last>Strapparava</last></author>
       <pages>145–148</pages>
       <url hash="85e3913b">S07-1029</url>
@@ -294,19 +294,19 @@
     </paper>
     <paper id="32">
       <title><fixed-case>GPLSI</fixed-case>: Word Coarse-grained Disambiguation aided by Basic Level Concepts</title>
-      <author><first>Rubén</first><last>Izquierdo</last></author>
-      <author><first>Armando</first><last>Suárez</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="ruben-izquierdo"><first>Rubén</first><last>Izquierdo</last></author>
+      <author id="armando-suarez"><first>Armando</first><last>Suárez</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <pages>157–160</pages>
       <url hash="21589348">S07-1032</url>
       <bibkey>izquierdo-etal-2007-gplsi</bibkey>
     </paper>
     <paper id="33">
       <title><fixed-case>GYDER</fixed-case>: Maxent Metonymy Resolution</title>
-      <author><first>Richárd</first><last>Farkas</last></author>
+      <author id="richard-farkas"><first>Richárd</first><last>Farkas</last></author>
       <author><first>Eszter</first><last>Simon</last></author>
       <author><first>György</first><last>Szarvas</last></author>
-      <author><first>Dániel</first><last>Varga</last></author>
+      <author id="daniel-varga"><first>Dániel</first><last>Varga</last></author>
       <pages>161–164</pages>
       <url hash="72197350">S07-1033</url>
       <bibkey>farkas-etal-2007-gyder</bibkey>
@@ -324,9 +324,9 @@
     </paper>
     <paper id="35">
       <title><fixed-case>HIT</fixed-case>-<fixed-case>WSD</fixed-case>: Using Search Engine for Multilingual <fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish Lexical Sample Task</title>
-      <author><first>PengYuan</first><last>Liu</last></author>
-      <author><first>TieJun</first><last>Zhao</last></author>
-      <author><first>MuYun</first><last>Yang</last></author>
+      <author id="pengyuan-liu"><first>PengYuan</first><last>Liu</last></author>
+      <author id="tiejun-zhao"><first>TieJun</first><last>Zhao</last></author>
+      <author id="muyun-yang"><first>MuYun</first><last>Yang</last></author>
       <pages>169–172</pages>
       <url hash="2f1a10bf">S07-1035</url>
       <bibkey>liu-etal-2007-hit</bibkey>
@@ -344,9 +344,9 @@
     </paper>
     <paper id="37">
       <title><fixed-case>I</fixed-case>2<fixed-case>R</fixed-case>: Three Systems for Word Sense Discrimination, <fixed-case>C</fixed-case>hinese Word Sense Disambiguation, and <fixed-case>E</fixed-case>nglish Word Sense Disambiguation</title>
-      <author><first>Zheng-Yu</first><last>Niu</last></author>
-      <author><first>Dong-Hong</first><last>Ji</last></author>
-      <author><first>Chew-Lim</first><last>Tan</last></author>
+      <author id="zheng-yu-niu"><first>Zheng-Yu</first><last>Niu</last></author>
+      <author id="donghong-ji"><first>Dong-Hong</first><last>Ji</last></author>
+      <author id="chew-lim-tan"><first>Chew-Lim</first><last>Tan</last></author>
       <pages>177–182</pages>
       <url hash="c08608b3">S07-1037</url>
       <bibkey>niu-etal-2007-i2r</bibkey>
@@ -364,7 +364,7 @@
       <author><first>Iris</first><last>Hendrickx</last></author>
       <author><first>Roser</first><last>Morante</last></author>
       <author><first>Caroline</first><last>Sporleder</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <pages>187–190</pages>
       <url hash="c73daadf">S07-1039</url>
       <bibkey>hendrickx-etal-2007-ilk</bibkey>
@@ -373,7 +373,7 @@
       <title><fixed-case>IRST</fixed-case>-<fixed-case>BP</fixed-case>: Preposition Disambiguation based on Chain Clarifying Relationships Contexts</title>
       <author><first>Octavian</first><last>Popescu</last></author>
       <author><first>Sara</first><last>Tonelli</last></author>
-      <author><first>Emanuele</first><last>Pianta</last></author>
+      <author id="emanuele-pianta"><first>Emanuele</first><last>Pianta</last></author>
       <pages>191–194</pages>
       <url hash="e75c62f7">S07-1040</url>
       <bibkey>popescu-etal-2007-irst</bibkey>
@@ -381,7 +381,7 @@
     <paper id="41">
       <title><fixed-case>IRST</fixed-case>-<fixed-case>BP</fixed-case>: Web People Search Using Name Entities</title>
       <author><first>Octavian</first><last>Popescu</last></author>
-      <author><first>Bernardo</first><last>Magnini</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
       <pages>195–198</pages>
       <url hash="080fcd9f">S07-1041</url>
       <bibkey>popescu-magnini-2007-irst</bibkey>
@@ -397,8 +397,8 @@
     </paper>
     <paper id="43">
       <title><fixed-case>JU</fixed-case>-<fixed-case>SKNSB</fixed-case>: Extended <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Based <fixed-case>WSD</fixed-case> on the <fixed-case>E</fixed-case>nglish All-Words Task at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-1</title>
-      <author><first>Sudip Kumar</first><last>Naskar</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>203–206</pages>
       <url hash="9f3d96c6">S07-1043</url>
       <bibkey>naskar-bandyopadhyay-2007-ju</bibkey>
@@ -412,8 +412,8 @@
     </paper>
     <paper id="45">
       <title><fixed-case>LCC</fixed-case>-<fixed-case>SRN</fixed-case>: <fixed-case>LCC</fixed-case>’s <fixed-case>SRN</fixed-case> System for <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val 2007 Task 4</title>
-      <author><first>Adriana</first><last>Badulescu</last></author>
-      <author><first>Munirathnam</first><last>Srikanth</last></author>
+      <author id="adriana-badulescu"><first>Adriana</first><last>Badulescu</last></author>
+      <author id="munirathnam-srikanth"><first>Munirathnam</first><last>Srikanth</last></author>
       <pages>215–218</pages>
       <url hash="d3211f5a">S07-1045</url>
       <bibkey>badulescu-srikanth-2007-lcc</bibkey>
@@ -421,7 +421,7 @@
     <paper id="46">
       <title><fixed-case>LCC</fixed-case>-<fixed-case>TE</fixed-case>: A Hybrid Approach to Temporal Relation Identification in News Text</title>
       <author><first>Congmin</first><last>Min</last></author>
-      <author><first>Munirathnam</first><last>Srikanth</last></author>
+      <author id="munirathnam-srikanth"><first>Munirathnam</first><last>Srikanth</last></author>
       <author><first>Abraham</first><last>Fowler</last></author>
       <pages>219–222</pages>
       <url hash="fc7a1c1a">S07-1046</url>
@@ -430,8 +430,8 @@
     <paper id="47">
       <title><fixed-case>LCC</fixed-case>-<fixed-case>WSD</fixed-case>: System Description for <fixed-case>E</fixed-case>nglish Coarse Grained All Words Task at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val 2007</title>
       <author><first>Adrian</first><last>Novischi</last></author>
-      <author><first>Muirathnam</first><last>Srikanth</last></author>
-      <author><first>Andrew</first><last>Bennett</last></author>
+      <author id="munirathnam-srikanth"><first>Muirathnam</first><last>Srikanth</last></author>
+      <author id="andrew-bennett"><first>Andrew</first><last>Bennett</last></author>
       <pages>223–226</pages>
       <url hash="305ba929">S07-1047</url>
       <bibkey>novischi-etal-2007-lcc</bibkey>
@@ -447,16 +447,16 @@
     <paper id="49">
       <title><fixed-case>MELB</fixed-case>-<fixed-case>KB</fixed-case>: Nominal Classification as Noun Compound Interpretation</title>
       <author><first>Su Nam</first><last>Kim</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>231–236</pages>
       <url hash="464847d1">S07-1049</url>
       <bibkey>kim-baldwin-2007-melb</bibkey>
     </paper>
     <paper id="50">
       <title><fixed-case>MELB</fixed-case>-<fixed-case>MKB</fixed-case>: Lexical Substitution system based on Relatives in Context</title>
-      <author><first>David</first><last>Martinez</last></author>
+      <author id="david-martinez"><first>David</first><last>Martinez</last></author>
       <author><first>Su Nam</first><last>Kim</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>237–240</pages>
       <url hash="af1441f8">S07-1050</url>
       <bibkey>martinez-etal-2007-melb</bibkey>
@@ -464,7 +464,7 @@
     <paper id="51">
       <title><fixed-case>MELB</fixed-case>-<fixed-case>YB</fixed-case>: Preposition Sense Disambiguation Using Rich Semantic Features</title>
       <author><first>Patrick</first><last>Ye</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>241–244</pages>
       <url hash="e33dc4aa">S07-1051</url>
       <bibkey>ye-baldwin-2007-melb</bibkey>
@@ -473,14 +473,14 @@
       <title><fixed-case>NAIST</fixed-case>.<fixed-case>J</fixed-case>apan: Temporal Relation Identification Using Dependency Parsed Tree</title>
       <author><first>Yuchang</first><last>Cheng</last></author>
       <author><first>Masayuki</first><last>Asahara</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>245–248</pages>
       <url hash="c6994ffe">S07-1052</url>
       <bibkey>cheng-etal-2007-naist</bibkey>
     </paper>
     <paper id="53">
       <title><fixed-case>NUS</fixed-case>-<fixed-case>ML</fixed-case>:Improving Word Sense Disambiguation Using Topic Features</title>
-      <author><first>Jun Fu</first><last>Cai</last></author>
+      <author id="jun-fu-cai"><first>Jun Fu</first><last>Cai</last></author>
       <author><first>Wee Sun</first><last>Lee</last></author>
       <author><first>Yee Whye</first><last>Teh</last></author>
       <pages>249–252</pages>
@@ -517,7 +517,7 @@
       <title><fixed-case>PNNL</fixed-case>: A Supervised Maximum Entropy Approach to Word Sense Disambiguation</title>
       <author><first>Stephen</first><last>Tratz</last></author>
       <author><first>Antonio</first><last>Sanfilippo</last></author>
-      <author><first>Michelle</first><last>Gregory</last></author>
+      <author id="michelle-gregory"><first>Michelle</first><last>Gregory</last></author>
       <author><first>Alan</first><last>Chappell</last></author>
       <author><first>Christian</first><last>Posse</last></author>
       <author><first>Paul</first><last>Whitney</last></author>
@@ -540,7 +540,7 @@
       <title><fixed-case>PU</fixed-case>-<fixed-case>BCD</fixed-case>: Exponential Family Models for the Coarse- and Fine-Grained All-Words Tasks</title>
       <author><first>Jonathan</first><last>Chang</last></author>
       <author><first>Miroslav</first><last>Dudík</last></author>
-      <author><first>David</first><last>Blei</last></author>
+      <author id="david-blei"><first>David</first><last>Blei</last></author>
       <pages>272–276</pages>
       <url hash="51a228a7">S07-1059</url>
       <bibkey>chang-etal-2007-pu</bibkey>
@@ -548,7 +548,7 @@
     <paper id="60">
       <title><fixed-case>PUTOP</fixed-case>: Turning Predominant Senses into a Topic Model for Word Sense Disambiguation</title>
       <author><first>Jordan</first><last>Boyd-Graber</last></author>
-      <author><first>David</first><last>Blei</last></author>
+      <author id="david-blei"><first>David</first><last>Blei</last></author>
       <pages>277–281</pages>
       <url hash="ceba9d06">S07-1060</url>
       <bibkey>boyd-graber-blei-2007-putop</bibkey>
@@ -556,7 +556,7 @@
     <paper id="61">
       <title><fixed-case>RACAI</fixed-case>: Meaning Affinity Models</title>
       <author><first>Radu</first><last>Ion</last></author>
-      <author><first>Dan</first><last>Tufiş</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufiş</last></author>
       <pages>282–287</pages>
       <url hash="fecc68d7">S07-1061</url>
       <bibkey>ion-tufis-2007-racai</bibkey>
@@ -565,7 +565,7 @@
       <title><fixed-case>RTV</fixed-case>: Tree Kernels for Thematic Role Classification</title>
       <author><first>Daniele</first><last>Pighin</last></author>
       <author><first>Alessandro</first><last>Moschitti</last></author>
-      <author><first>Roberto</first><last>Basili</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
       <pages>288–291</pages>
       <url hash="4395ca3b">S07-1062</url>
       <bibkey>pighin-etal-2007-rtv</bibkey>
@@ -614,7 +614,7 @@
     <paper id="68">
       <title><fixed-case>S</fixed-case>ussx: <fixed-case>WSD</fixed-case> using Automatically Acquired Predominant Senses</title>
       <author><first>Rob</first><last>Koeling</last></author>
-      <author><first>Diana</first><last>McCarthy</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
       <pages>314–317</pages>
       <url hash="8e8485a2">S07-1068</url>
       <bibkey>koeling-mccarthy-2007-sussx</bibkey>
@@ -622,7 +622,7 @@
     <paper id="69">
       <title><fixed-case>TITPI</fixed-case>: Web People Search Task Using Semi-Supervised Clustering Approach</title>
       <author><first>Kazunari</first><last>Sugiyama</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>318–321</pages>
       <url hash="a5897fb7">S07-1069</url>
       <bibkey>sugiyama-okumura-2007-titpi</bibkey>
@@ -638,7 +638,7 @@
     </paper>
     <paper id="71">
       <title>Tor, <fixed-case>T</fixed-case>or<fixed-case>M</fixed-case>d: Distributional Profiles of Concepts for Unsupervised Word Sense Disambiguation</title>
-      <author><first>Saif</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
       <author><first>Graeme</first><last>Hirst</last></author>
       <author><first>Philip</first><last>Resnik</last></author>
       <pages>326–333</pages>
@@ -648,9 +648,9 @@
     <paper id="72">
       <title><fixed-case>UA</fixed-case>-<fixed-case>ZBSA</fixed-case>: A Headline Emotion Classification through Web Information</title>
       <author><first>Zornitsa</first><last>Kozareva</last></author>
-      <author><first>Borja</first><last>Navarro</last></author>
-      <author><first>Sonia</first><last>Vázquez</last></author>
-      <author><first>Andrés</first><last>Montoyo</last></author>
+      <author id="borja-navarro"><first>Borja</first><last>Navarro</last></author>
+      <author id="sonia-vazquez"><first>Sonia</first><last>Vázquez</last></author>
+      <author id="andres-montoyo"><first>Andrés</first><last>Montoyo</last></author>
       <pages>334–337</pages>
       <url hash="9d3400db">S07-1072</url>
       <bibkey>kozareva-etal-2007-ua</bibkey>
@@ -658,8 +658,8 @@
     <paper id="73">
       <title><fixed-case>UA</fixed-case>-<fixed-case>ZSA</fixed-case>: Web Page Clustering on the basis of Name Disambiguation</title>
       <author><first>Zornitsa</first><last>Kozareva</last></author>
-      <author><first>Sonia</first><last>Vazquez</last></author>
-      <author><first>Andres</first><last>Montoyo</last></author>
+      <author id="sonia-vazquez"><first>Sonia</first><last>Vazquez</last></author>
+      <author id="andres-montoyo"><first>Andres</first><last>Montoyo</last></author>
       <pages>338–341</pages>
       <url hash="873cd2a7">S07-1073</url>
       <bibkey>kozareva-etal-2007-ua-zsa</bibkey>
@@ -667,7 +667,7 @@
     <paper id="74">
       <title><fixed-case>UBC</fixed-case>-<fixed-case>ALM</fixed-case>: Combining k-<fixed-case>NN</fixed-case> with <fixed-case>SVD</fixed-case> for <fixed-case>WSD</fixed-case></title>
       <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>Oier</first><last>Lopez de Lacalle</last></author>
+      <author id="oier-lopez-de-lacalle"><first>Oier</first><last>Lopez de Lacalle</last></author>
       <pages>342–345</pages>
       <url hash="148bcc02">S07-1074</url>
       <bibkey>agirre-lopez-de-lacalle-2007-ubc</bibkey>
@@ -675,17 +675,17 @@
     <paper id="75">
       <title><fixed-case>UBC</fixed-case>-<fixed-case>AS</fixed-case>: A Graph Based Unsupervised System for Induction and Classification</title>
       <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>Aitor</first><last>Soroa</last></author>
+      <author id="aitor-soroa"><first>Aitor</first><last>Soroa</last></author>
       <pages>346–349</pages>
       <url hash="1e1429bb">S07-1075</url>
       <bibkey>agirre-soroa-2007-ubc</bibkey>
     </paper>
     <paper id="76">
       <title><fixed-case>UBC</fixed-case>-<fixed-case>UMB</fixed-case>: Combining unsupervised and supervised systems for all-words <fixed-case>WSD</fixed-case></title>
-      <author><first>David</first><last>Martinez</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="david-martinez"><first>David</first><last>Martinez</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>Oier</first><last>Lopez de Lacalle</last></author>
+      <author id="oier-lopez-de-lacalle"><first>Oier</first><last>Lopez de Lacalle</last></author>
       <pages>350–353</pages>
       <url hash="48f22752">S07-1076</url>
       <bibkey>martinez-etal-2007-ubc</bibkey>
@@ -719,15 +719,15 @@
     </paper>
     <paper id="80">
       <title><fixed-case>UCB</fixed-case>: System Description for <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val Task #4</title>
-      <author><first>Preslav</first><last>Nakov</last></author>
-      <author><first>Marti</first><last>Hearst</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
+      <author id="marti-a-hearst"><first>Marti</first><last>Hearst</last></author>
       <pages>366–369</pages>
       <url hash="1b92a957">S07-1080</url>
       <bibkey>nakov-hearst-2007-ucb</bibkey>
     </paper>
     <paper id="81">
       <title><fixed-case>UCD</fixed-case>-<fixed-case>FC</fixed-case>: Deducing semantic relations using <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et senses that occur frequently in a database of noun-noun compounds</title>
-      <author><first>Fintan J.</first><last>Costello</last></author>
+      <author id="fintan-j-costello"><first>Fintan J.</first><last>Costello</last></author>
       <pages>370–373</pages>
       <url hash="74fbbbba">S07-1081</url>
       <bibkey>costello-2007-ucd</bibkey>
@@ -749,7 +749,7 @@
     </paper>
     <paper id="84">
       <title><fixed-case>UCM</fixed-case>3: Classification of Semantic Relations between Nominals using Sequential Minimal Optimization</title>
-      <author><first>Isabel</first><last>Segura Bedmar</last></author>
+      <author id="isabel-segura-bedmar"><first>Isabel</first><last>Segura Bedmar</last></author>
       <author><first>Doaa</first><last>Samy</last></author>
       <author><first>Jose L.</first><last>Martinez</last></author>
       <pages>382–385</pages>
@@ -763,7 +763,7 @@
       <author><first>Brant</first><last>Chee</last></author>
       <author><first>Andrew</first><last>Fister</last></author>
       <author><first>Alla</first><last>Rozovskaya</last></author>
-      <author><first>Roxana</first><last>Girju</last></author>
+      <author id="roxana-girju"><first>Roxana</first><last>Girju</last></author>
       <pages>386–389</pages>
       <url hash="3a692d86">S07-1085</url>
       <bibkey>beamer-etal-2007-uiuc</bibkey>
@@ -790,7 +790,7 @@
       <author><first>Marco</first><last>de Gemmis</last></author>
       <author><first>Anna Lisa</first><last>Gentile</last></author>
       <author><first>Pasquale</first><last>Lops</last></author>
-      <author><first>Giovanni</first><last>Semeraro</last></author>
+      <author id="giovanni-semeraro"><first>Giovanni</first><last>Semeraro</last></author>
       <pages>398–401</pages>
       <url hash="b3e80d1a">S07-1088</url>
       <bibkey>basile-etal-2007-uniba</bibkey>
@@ -805,8 +805,8 @@
     </paper>
     <paper id="90">
       <title><fixed-case>UNT</fixed-case>-Yahoo: <fixed-case>S</fixed-case>uper<fixed-case>S</fixed-case>ense<fixed-case>L</fixed-case>earner: Combining <fixed-case>S</fixed-case>ense<fixed-case>L</fixed-case>earner with <fixed-case>S</fixed-case>uper<fixed-case>S</fixed-case>ense and other Coarse Semantic Features</title>
-      <author><first>Rada</first><last>Mihalcea</last></author>
-      <author><first>Andras</first><last>Csomai</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
+      <author id="andras-csomai"><first>Andras</first><last>Csomai</last></author>
       <author><first>Massimiliano</first><last>Ciaramita</last></author>
       <pages>406–409</pages>
       <url hash="db074948">S07-1090</url>
@@ -815,17 +815,17 @@
     <paper id="91">
       <title><fixed-case>UNT</fixed-case>: <fixed-case>S</fixed-case>ub<fixed-case>F</fixed-case>inder: Combining Knowledge Sources for Automatic Lexical Substitution</title>
       <author><first>Samer</first><last>Hassan</last></author>
-      <author><first>Andras</first><last>Csomai</last></author>
+      <author id="andras-csomai"><first>Andras</first><last>Csomai</last></author>
       <author><first>Carmen</first><last>Banea</last></author>
       <author><first>Ravi</first><last>Sinha</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>410–413</pages>
       <url hash="f6240285">S07-1091</url>
       <bibkey>hassan-etal-2007-unt</bibkey>
     </paper>
     <paper id="92">
       <title><fixed-case>UOY</fixed-case>: A Hypergraph Model For Word Sense Induction &amp; Disambiguation</title>
-      <author><first>Ioannis</first><last>Klapaftis</last></author>
+      <author id="ioannis-klapaftis"><first>Ioannis</first><last>Klapaftis</last></author>
       <author><first>Suresh</first><last>Manandhar</last></author>
       <pages>414–417</pages>
       <url hash="d2dc5d14">S07-1092</url>
@@ -848,16 +848,16 @@
     <paper id="95">
       <title><fixed-case>UPC</fixed-case>: Experiments with Joint Learning within <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val Task 9</title>
       <author><first>Lluís</first><last>Màrquez</last></author>
-      <author><first>Lluís</first><last>Padró</last></author>
+      <author id="lluis-padro"><first>Lluís</first><last>Padró</last></author>
       <author><first>Mihai</first><last>Surdeanu</last></author>
-      <author><first>Luis</first><last>Villarejo</last></author>
+      <author id="luis-villarejo"><first>Luis</first><last>Villarejo</last></author>
       <pages>426–429</pages>
       <url hash="6ea435a5">S07-1095</url>
       <bibkey>marquez-etal-2007-upc</bibkey>
     </paper>
     <paper id="96">
       <title><fixed-case>UPV</fixed-case>-<fixed-case>SI</fixed-case>: Word Sense Induction using Self Term Expansion</title>
-      <author><first>David</first><last>Pinto</last></author>
+      <author id="david-pinto"><first>David</first><last>Pinto</last></author>
       <author><first>Paolo</first><last>Rosso</last></author>
       <author><first>Héctor</first><last>Jiménez-Salazar</last></author>
       <pages>430–433</pages>
@@ -875,8 +875,8 @@
     <paper id="98">
       <title><fixed-case>USFD</fixed-case>: Preliminary Exploration of Features and Classifiers for the <fixed-case>T</fixed-case>emp<fixed-case>E</fixed-case>val-2007 Task</title>
       <author><first>Mark</first><last>Hepple</last></author>
-      <author><first>Andrea</first><last>Setzer</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="andrea-setzer"><first>Andrea</first><last>Setzer</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <pages>438–441</pages>
       <url hash="1050f6ae">S07-1098</url>
       <bibkey>hepple-etal-2007-usfd</bibkey>
@@ -884,7 +884,7 @@
     <paper id="99">
       <title><fixed-case>USP</fixed-case>-<fixed-case>IBM</fixed-case>-1 and <fixed-case>USP</fixed-case>-<fixed-case>IBM</fixed-case>-2: The <fixed-case>ILP</fixed-case>-based Systems for Lexical Sample <fixed-case>WSD</fixed-case> in <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2007</title>
       <author><first>Lucia</first><last>Specia</last></author>
-      <author><first>Maria</first><last>das Graças</last></author>
+      <author id="maria-das-gracas-volpe-nunes"><first>Maria</first><last>das Graças</last></author>
       <author><first>Volpe</first><last>Nunes</last></author>
       <author><first>Ashwin</first><last>Srinivasan</last></author>
       <author><first>Ganesh</first><last>Ramakrishnan</last></author>
@@ -903,14 +903,14 @@
       <title><fixed-case>UTD</fixed-case>-<fixed-case>HLT</fixed-case>-<fixed-case>CG</fixed-case>: Semantic Architecture for Metonymy Resolution and Classification of Nominal Relations</title>
       <author><first>Cristina</first><last>Nicolae</last></author>
       <author><first>Gabriel</first><last>Nicolae</last></author>
-      <author><first>Sanda</first><last>Harabagiu</last></author>
+      <author id="sanda-harabagiu"><first>Sanda</first><last>Harabagiu</last></author>
       <pages>454–459</pages>
       <url hash="cba6c8dc">S07-1101</url>
       <bibkey>nicolae-etal-2007-utd</bibkey>
     </paper>
     <paper id="102">
       <title><fixed-case>UTD</fixed-case>-<fixed-case>SRL</fixed-case>: A Pipeline Architecture for Extracting Frame Semantic Structures</title>
-      <author><first>Cosmin Adrian</first><last>Bejan</last></author>
+      <author id="cosmin-adrian-bejan"><first>Cosmin Adrian</first><last>Bejan</last></author>
       <author><first>Chris</first><last>Hathaway</last></author>
       <pages>460–463</pages>
       <url hash="4681b0cb">S07-1102</url>
@@ -930,14 +930,14 @@
       <title><fixed-case>UVA</fixed-case>: Language Modeling Techniques for Web People Search</title>
       <author><first>Krisztian</first><last>Balog</last></author>
       <author><first>Leif</first><last>Azzopardi</last></author>
-      <author><first>Maarten</first><last>de Rijke</last></author>
+      <author id="maarten-de-rijke"><first>Maarten</first><last>de Rijke</last></author>
       <pages>468–471</pages>
       <url hash="e8c71fe8">S07-1104</url>
       <bibkey>balog-etal-2007-uva</bibkey>
     </paper>
     <paper id="105">
       <title><fixed-case>UVAVU</fixed-case>: <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Similarity and Lexical Patterns for Semantic Relation Classification</title>
-      <author><first>Willem Robert</first><last>van Hage</last></author>
+      <author id="willem-robert-van-hage"><first>Willem Robert</first><last>van Hage</last></author>
       <author><first>Sophia</first><last>Katrenko</last></author>
       <pages>472–475</pages>
       <url hash="4a0c98c1">S07-1105</url>
@@ -946,14 +946,14 @@
     <paper id="106">
       <title><fixed-case>U</fixed-case>of<fixed-case>L</fixed-case>: Word Sense Disambiguation Using Lexical Cohesion</title>
       <author><first>Yllias</first><last>Chali</last></author>
-      <author><first>Shafiq R.</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq R.</first><last>Joty</last></author>
       <pages>476–479</pages>
       <url hash="e2edeb0e">S07-1106</url>
       <bibkey>chali-joty-2007-uofl</bibkey>
     </paper>
     <paper id="107">
       <title><fixed-case>WIT</fixed-case>: Web People Search Disambiguation using Random Walks</title>
-      <author><first>José</first><last>Iria</last></author>
+      <author id="jose-iria"><first>José</first><last>Iria</last></author>
       <author><first>Lei</first><last>Xia</last></author>
       <author><first>Ziqi</first><last>Zhang</last></author>
       <pages>480–483</pages>
diff --git a/data/xml/S10.xml b/data/xml/S10.xml
index d648768067..c224ad0a40 100644
--- a/data/xml/S10.xml
+++ b/data/xml/S10.xml
@@ -19,12 +19,12 @@
     <paper id="1">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2010 Task 1: Coreference Resolution in Multiple Languages</title>
       <author><first>Marta</first><last>Recasens</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <author><first>Emili</first><last>Sapena</last></author>
-      <author><first>M. Antònia</first><last>Martí</last></author>
-      <author><first>Mariona</first><last>Taulé</last></author>
-      <author><first>Véronique</first><last>Hoste</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="m-antonia-marti"><first>M. Antònia</first><last>Martí</last></author>
+      <author id="mariona-taule"><first>Mariona</first><last>Taulé</last></author>
+      <author id="veronique-hoste"><first>Véronique</first><last>Hoste</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <author><first>Yannick</first><last>Versley</last></author>
       <pages>1–8</pages>
       <url hash="a430f79d">S10-1001</url>
@@ -32,9 +32,9 @@
     </paper>
     <paper id="2">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2010 Task 2: Cross-Lingual Lexical Substitution</title>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <author><first>Ravi</first><last>Sinha</last></author>
-      <author><first>Diana</first><last>McCarthy</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
       <pages>9–14</pages>
       <url hash="2da81694">S10-1002</url>
       <bibkey>mihalcea-etal-2010-semeval</bibkey>
@@ -42,7 +42,7 @@
     <paper id="3">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2010 Task 3: Cross-Lingual Word Sense Disambiguation</title>
       <author><first>Els</first><last>Lefever</last></author>
-      <author><first>Veronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Veronique</first><last>Hoste</last></author>
       <pages>15–20</pages>
       <url hash="8489d63e">S10-1003</url>
       <bibkey>lefever-hoste-2010-semeval</bibkey>
@@ -52,14 +52,14 @@
       <author><first>Su Nam</first><last>Kim</last></author>
       <author><first>Olena</first><last>Medelyan</last></author>
       <author><first>Min-Yen</first><last>Kan</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>21–26</pages>
       <url hash="dba6e4c0">S10-1004</url>
       <bibkey>kim-etal-2010-semeval</bibkey>
     </paper>
     <paper id="5">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2010 Task 7: Argument Selection and Coercion</title>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <author><first>Anna</first><last>Rumshisky</last></author>
       <author><first>Alex</first><last>Plotnick</last></author>
       <author><first>Elisabetta</first><last>Jezek</last></author>
@@ -74,12 +74,12 @@
       <author><first>Iris</first><last>Hendrickx</last></author>
       <author><first>Su Nam</first><last>Kim</last></author>
       <author><first>Zornitsa</first><last>Kozareva</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Diarmuid</first><last>Ó Séaghdha</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <author><first>Marco</first><last>Pennacchiotti</last></author>
-      <author><first>Lorenza</first><last>Romano</last></author>
-      <author><first>Stan</first><last>Szpakowicz</last></author>
+      <author id="lorenza-romano"><first>Lorenza</first><last>Romano</last></author>
+      <author id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></author>
       <pages>33–38</pages>
       <url hash="5481969a">S10-1006</url>
       <bibkey>hendrickx-etal-2010-semeval</bibkey>
@@ -88,9 +88,9 @@
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2 Task 9: The Interpretation of Noun Compounds Using Paraphrasing Verbs and Prepositions</title>
       <author><first>Cristina</first><last>Butnariu</last></author>
       <author><first>Su Nam</first><last>Kim</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Diarmuid</first><last>Ó Séaghdha</last></author>
-      <author><first>Stan</first><last>Szpakowicz</last></author>
+      <author id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></author>
       <author><first>Tony</first><last>Veale</last></author>
       <pages>39–44</pages>
       <url hash="7c3d5dc4">S10-1007</url>
@@ -101,8 +101,8 @@
       <author><first>Josef</first><last>Ruppenhofer</last></author>
       <author><first>Caroline</first><last>Sporleder</last></author>
       <author><first>Roser</first><last>Morante</last></author>
-      <author><first>Collin</first><last>Baker</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="collin-f-baker"><first>Collin</first><last>Baker</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>45–50</pages>
       <url hash="0f44d3c6">S10-1008</url>
       <bibkey>ruppenhofer-etal-2010-semeval</bibkey>
@@ -119,9 +119,9 @@
     <paper id="10">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2010 Task 13: <fixed-case>T</fixed-case>emp<fixed-case>E</fixed-case>val-2</title>
       <author><first>Marc</first><last>Verhagen</last></author>
-      <author><first>Roser</first><last>Saurí</last></author>
+      <author id="roser-sauri"><first>Roser</first><last>Saurí</last></author>
       <author><first>Tommaso</first><last>Caselli</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <pages>57–62</pages>
       <url hash="2d84aa78">S10-1010</url>
       <bibkey>verhagen-etal-2010-semeval</bibkey>
@@ -129,16 +129,16 @@
     <paper id="11">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2010 Task 14: Word Sense Induction &amp;Disambiguation</title>
       <author><first>Suresh</first><last>Manandhar</last></author>
-      <author><first>Ioannis</first><last>Klapaftis</last></author>
+      <author id="ioannis-klapaftis"><first>Ioannis</first><last>Klapaftis</last></author>
       <author><first>Dmitriy</first><last>Dligach</last></author>
-      <author><first>Sameer</first><last>Pradhan</last></author>
+      <author id="sameer-pradhan"><first>Sameer</first><last>Pradhan</last></author>
       <pages>63–68</pages>
       <url hash="7ad1cbcf">S10-1011</url>
       <bibkey>manandhar-etal-2010-semeval</bibkey>
     </paper>
     <paper id="12">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2010 Task: <fixed-case>J</fixed-case>apanese <fixed-case>WSD</fixed-case></title>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <author><first>Kiyoaki</first><last>Shirai</last></author>
       <author><first>Kanako</first><last>Komiya</last></author>
       <author><first>Hikaru</first><last>Yokono</last></author>
@@ -148,14 +148,14 @@
     </paper>
     <paper id="13">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2010 Task 17: All-Words Word Sense Disambiguation on a Specific Domain</title>
-      <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>Oier</first><last>Lopez de Lacalle</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
+      <author id="oier-lopez-de-lacalle"><first>Oier</first><last>Lopez de Lacalle</last></author>
       <author><first>Christiane</first><last>Fellbaum</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
-      <author><first>Maurizio</first><last>Tesconi</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="maurizio-tesconi"><first>Maurizio</first><last>Tesconi</last></author>
       <author><first>Monica</first><last>Monachini</last></author>
       <author><first>Piek</first><last>Vossen</last></author>
-      <author><first>Roxanne</first><last>Segers</last></author>
+      <author id="roxane-segers"><first>Roxanne</first><last>Segers</last></author>
       <pages>75–80</pages>
       <url hash="9c97b145">S10-1013</url>
       <bibkey>agirre-etal-2010-semeval</bibkey>
@@ -186,8 +186,8 @@
     <paper id="17">
       <title><fixed-case>R</fixed-case>elax<fixed-case>C</fixed-case>or: A Global Relaxation Labeling Approach to Coreference Resolution</title>
       <author><first>Emili</first><last>Sapena</last></author>
-      <author><first>Lluís</first><last>Padró</last></author>
-      <author><first>Jordi</first><last>Turmo</last></author>
+      <author id="lluis-padro"><first>Lluís</first><last>Padró</last></author>
+      <author id="jordi-turmo"><first>Jordi</first><last>Turmo</last></author>
       <pages>88–91</pages>
       <url hash="e13b3583">S10-1017</url>
       <bibkey>sapena-etal-2010-relaxcor</bibkey>
@@ -195,7 +195,7 @@
     <paper id="18">
       <title><fixed-case>SUCRE</fixed-case>: A Modular System for Coreference Resolution</title>
       <author><first>Hamidreza</first><last>Kobdani</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>92–95</pages>
       <url hash="d20740d0">S10-1018</url>
       <bibkey>kobdani-schutze-2010-sucre</bibkey>
@@ -203,7 +203,7 @@
     <paper id="19">
       <title><fixed-case>UBIU</fixed-case>: A Language-Independent System for Coreference Resolution</title>
       <author><first>Desislava</first><last>Zhekova</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <pages>96–99</pages>
       <url hash="55e184e0">S10-1019</url>
       <bibkey>zhekova-kubler-2010-ubiu</bibkey>
@@ -218,10 +218,10 @@
     <paper id="21">
       <title><fixed-case>BART</fixed-case>: A Multilingual Anaphora Resolution System</title>
       <author><first>Samuel</first><last>Broscheit</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
-      <author><first>Kepa Joseba</first><last>Rodriguez</last></author>
-      <author><first>Lorenza</first><last>Romano</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="kepa-joseba-rodriguez"><first>Kepa Joseba</first><last>Rodriguez</last></author>
+      <author id="lorenza-romano"><first>Lorenza</first><last>Romano</last></author>
       <author><first>Olga</first><last>Uryupina</last></author>
       <author><first>Yannick</first><last>Versley</last></author>
       <author><first>Roberto</first><last>Zanoli</last></author>
@@ -233,18 +233,18 @@
       <title><fixed-case>TANL</fixed-case>-1: Coreference Resolution by Parse Analysis and Similarity Clustering</title>
       <author><first>Giuseppe</first><last>Attardi</last></author>
       <author><first>Maria</first><last>Simi</last></author>
-      <author><first>Stefano</first><last>Dei Rossi</last></author>
+      <author id="stefano-dei-rossi"><first>Stefano</first><last>Dei Rossi</last></author>
       <pages>108–111</pages>
       <url hash="48758da0">S10-1022</url>
       <bibkey>attardi-etal-2010-tanl</bibkey>
     </paper>
     <paper id="23">
       <title><fixed-case>FCC</fixed-case>: Modeling Probabilities with <fixed-case>GIZA</fixed-case>++ for Task 2 and 3 of <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2</title>
-      <author><first>Darnes</first><last>Vilariño Ayala</last></author>
+      <author id="darnes-vilarino"><first>Darnes</first><last>Vilariño Ayala</last></author>
       <author><first>Carlos</first><last>Balderas Posada</last></author>
-      <author><first>David Eduardo</first><last>Pinto Avendaño</last></author>
-      <author><first>Miguel</first><last>Rodríguez Hernández</last></author>
-      <author><first>Saul</first><last>León Silverio</last></author>
+      <author id="david-pinto"><first>David Eduardo</first><last>Pinto Avendaño</last></author>
+      <author id="miguel-rodriguez-hernandez"><first>Miguel</first><last>Rodríguez Hernández</last></author>
+      <author id="saul-leon"><first>Saul</first><last>León Silverio</last></author>
       <pages>112–116</pages>
       <url hash="c9fc6c79">S10-1023</url>
       <bibkey>vilarino-ayala-etal-2010-fcc</bibkey>
@@ -269,7 +269,7 @@
     <paper id="26">
       <title><fixed-case>COLEPL</fixed-case> and <fixed-case>COLSLM</fixed-case>: An Unsupervised <fixed-case>WSD</fixed-case> Approach to Multilingual Lexical Substitution, Tasks 2 and 3 <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val 2010</title>
       <author><first>Weiwei</first><last>Guo</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>129–133</pages>
       <url hash="f35ef802">S10-1026</url>
       <bibkey>guo-diab-2010-colepl</bibkey>
@@ -277,7 +277,7 @@
     <paper id="27">
       <title><fixed-case>UHD</fixed-case>: Cross-Lingual Word Sense Disambiguation Using Multilingual Co-Occurrence Graphs</title>
       <author><first>Carina</first><last>Silberer</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <pages>134–137</pages>
       <url hash="b6a0b8fd">S10-1027</url>
       <bibkey>silberer-ponzetto-2010-uhd</bibkey>
@@ -286,8 +286,8 @@
       <title><fixed-case>OWNS</fixed-case>: Cross-lingual Word Sense Disambiguation Using Weighted Overlap Counts and <fixed-case>W</fixed-case>ordnet Based Similarity Measures</title>
       <author><first>Lipta</first><last>Mahapatra</last></author>
       <author><first>Meera</first><last>Mohan</last></author>
-      <author><first>Mitesh</first><last>Khapra</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh</first><last>Khapra</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>138–141</pages>
       <url hash="21b03293">S10-1028</url>
       <bibkey>mahapatra-etal-2010-owns</bibkey>
@@ -312,7 +312,7 @@
     <paper id="31">
       <title><fixed-case>DFKI</fixed-case> <fixed-case>K</fixed-case>ey<fixed-case>WE</fixed-case>: Ranking Keyphrases Extracted from Scientific Articles</title>
       <author><first>Kathrin</first><last>Eichler</last></author>
-      <author><first>Günter</first><last>Neumann</last></author>
+      <author id="gunter-neumann"><first>Günter</first><last>Neumann</last></author>
       <pages>150–153</pages>
       <url hash="a295d5ab">S10-1031</url>
       <bibkey>eichler-neumann-2010-dfki</bibkey>
@@ -343,14 +343,14 @@
     <paper id="35">
       <title><fixed-case>WINGNUS</fixed-case>: Keyphrase Extraction Utilizing Document Logical Structure</title>
       <author><first>Thuy Dung</first><last>Nguyen</last></author>
-      <author><first>Minh-Thang</first><last>Luong</last></author>
+      <author id="minh-thang-luong"><first>Minh-Thang</first><last>Luong</last></author>
       <pages>166–169</pages>
       <url hash="579369b3">S10-1035</url>
       <bibkey>nguyen-luong-2010-wingnus</bibkey>
     </paper>
     <paper id="36">
       <title><fixed-case>KX</fixed-case>: A Flexible System for Keyphrase e<fixed-case>X</fixed-case>traction</title>
-      <author><first>Emanuele</first><last>Pianta</last></author>
+      <author id="emanuele-pianta"><first>Emanuele</first><last>Pianta</last></author>
       <author><first>Sara</first><last>Tonelli</last></author>
       <pages>170–173</pages>
       <url hash="e1625ec1">S10-1036</url>
@@ -359,7 +359,7 @@
     <paper id="37">
       <title><fixed-case>BUAP</fixed-case>: An Unsupervised Approach to Automatic Keyphrase Extraction from Scientific Articles</title>
       <author><first>Roberto</first><last>Ortiz</last></author>
-      <author><first>David</first><last>Pinto</last></author>
+      <author id="david-pinto"><first>David</first><last>Pinto</last></author>
       <author><first>Mireya</first><last>Tovar</last></author>
       <author><first>Héctor</first><last>Jiménez-Salazar</last></author>
       <pages>174–177</pages>
@@ -370,7 +370,7 @@
       <title><fixed-case>UNPMC</fixed-case>: Naive Approach to Extract Keyphrases from Scientific Articles</title>
       <author><first>Jungyeul</first><last>Park</last></author>
       <author><first>Jong Gun</first><last>Lee</last></author>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <pages>178–181</pages>
       <url hash="0ee4f190">S10-1038</url>
       <bibkey>park-etal-2010-unpmc</bibkey>
@@ -388,14 +388,14 @@
     <paper id="40">
       <title><fixed-case>SZTERGAK</fixed-case> : Feature Engineering for Keyphrase Extraction</title>
       <author><first>Gábor</first><last>Berend</last></author>
-      <author><first>Richárd</first><last>Farkas</last></author>
+      <author id="richard-farkas"><first>Richárd</first><last>Farkas</last></author>
       <pages>186–189</pages>
       <url hash="911114e1">S10-1040</url>
       <bibkey>berend-farkas-2010-sztergak</bibkey>
     </paper>
     <paper id="41">
       <title><fixed-case>KP</fixed-case>-Miner: Participation in <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2</title>
-      <author><first>Samhaa R.</first><last>El-Beltagy</last></author>
+      <author id="samhaa-r-el-beltagy"><first>Samhaa R.</first><last>El-Beltagy</last></author>
       <author><first>Ahmed</first><last>Rafea</last></author>
       <pages>190–193</pages>
       <url hash="85602442">S10-1041</url>
@@ -403,7 +403,7 @@
     </paper>
     <paper id="42">
       <title><fixed-case>U</fixed-case>v<fixed-case>T</fixed-case>: The <fixed-case>U</fixed-case>v<fixed-case>T</fixed-case> Term Extraction System in the Keyphrase Extraction Task</title>
-      <author><first>Kalliopi</first><last>Zervanou</last></author>
+      <author id="kalliopi-zervanou"><first>Kalliopi</first><last>Zervanou</last></author>
       <pages>194–197</pages>
       <url hash="86e31ef6">S10-1042</url>
       <bibkey>zervanou-2010-uvt</bibkey>
@@ -417,7 +417,7 @@
     </paper>
     <paper id="44">
       <title><fixed-case>FBK</fixed-case>_<fixed-case>NK</fixed-case>: A <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et-Based System for Multi-Way Classification of Semantic Relations</title>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Milen</first><last>Kouylekov</last></author>
       <pages>202–205</pages>
       <url hash="964cebc5">S10-1044</url>
@@ -428,7 +428,7 @@
       <author><first>Santanu</first><last>Pal</last></author>
       <author><first>Partha</first><last>Pakray</last></author>
       <author><first>Dipankar</first><last>Das</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>206–209</pages>
       <url hash="4384e9ac">S10-1045</url>
       <bibkey>pal-etal-2010-ju</bibkey>
@@ -444,7 +444,7 @@
     <paper id="47">
       <title><fixed-case>FBK</fixed-case>-<fixed-case>IRST</fixed-case>: Semantic Relation Extraction Using <fixed-case>C</fixed-case>yc</title>
       <author><first>Kateryna</first><last>Tymoshenko</last></author>
-      <author><first>Claudio</first><last>Giuliano</last></author>
+      <author id="claudio-giuliano"><first>Claudio</first><last>Giuliano</last></author>
       <pages>214–217</pages>
       <url hash="b441dc3e">S10-1047</url>
       <bibkey>tymoshenko-giuliano-2010-fbk</bibkey>
@@ -461,7 +461,7 @@
     <paper id="49">
       <title><fixed-case>ISI</fixed-case>: Automatic Classification of Relations Between Nominals Using a Maximum Entropy Classifier</title>
       <author><first>Stephen</first><last>Tratz</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>222–225</pages>
       <url hash="e79ed150">S10-1049</url>
       <bibkey>tratz-hovy-2010-isi</bibkey>
@@ -469,9 +469,9 @@
     <paper id="50">
       <title><fixed-case>ECNU</fixed-case>: Effective Semantic Relations Classification without Complicated Features or Multiple External Corpora</title>
       <author><first>Yuan</first><last>Chen</last></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <author><first>Jian</first><last>Su</last></author>
-      <author><first>Zhi Min</first><last>Zhou</last></author>
+      <author id="zhi-min-zhou"><first>Zhi Min</first><last>Zhou</last></author>
       <author><first>Yu</first><last>Xu</last></author>
       <pages>226–229</pages>
       <url hash="cd17712e">S10-1050</url>
@@ -489,7 +489,7 @@
     <paper id="52">
       <title><fixed-case>UCD</fixed-case>-<fixed-case>PN</fixed-case>: Selecting General Paraphrases Using Conditional Probability</title>
       <author><first>Paul</first><last>Nulty</last></author>
-      <author><first>Fintan</first><last>Costello</last></author>
+      <author id="fintan-j-costello"><first>Fintan</first><last>Costello</last></author>
       <pages>234–237</pages>
       <url hash="73eb0f2d">S10-1052</url>
       <bibkey>nulty-costello-2010-ucd</bibkey>
@@ -504,7 +504,7 @@
     <paper id="54">
       <title><fixed-case>UBA</fixed-case>: Using Automatic Translation and <fixed-case>W</fixed-case>ikipedia for Cross-Lingual Lexical Substitution</title>
       <author><first>Pierpaolo</first><last>Basile</last></author>
-      <author><first>Giovanni</first><last>Semeraro</last></author>
+      <author id="giovanni-semeraro"><first>Giovanni</first><last>Semeraro</last></author>
       <pages>242–247</pages>
       <url hash="e172648f">S10-1054</url>
       <bibkey>basile-semeraro-2010-uba</bibkey>
@@ -512,7 +512,7 @@
     <paper id="55">
       <title><fixed-case>HUMB</fixed-case>: Automatic Key Term Extraction from Scientific Articles in <fixed-case>GROBID</fixed-case></title>
       <author><first>Patrice</first><last>Lopez</last></author>
-      <author><first>Laurent</first><last>Romary</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
       <pages>248–251</pages>
       <url hash="424fc6c7">S10-1055</url>
       <bibkey>lopez-romary-2010-humb</bibkey>
@@ -520,7 +520,7 @@
     <paper id="56">
       <title><fixed-case>UTDM</fixed-case>et: Combining <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et and Corpus Data for Argument Coercion Detection</title>
       <author><first>Kirk</first><last>Roberts</last></author>
-      <author><first>Sanda</first><last>Harabagiu</last></author>
+      <author id="sanda-harabagiu"><first>Sanda</first><last>Harabagiu</last></author>
       <pages>252–255</pages>
       <url hash="cbdabdb9">S10-1056</url>
       <bibkey>roberts-harabagiu-2010-utdmet</bibkey>
@@ -528,7 +528,7 @@
     <paper id="57">
       <title><fixed-case>UTD</fixed-case>: Classifying Semantic Relations by Combining Lexical and Semantic Resources</title>
       <author><first>Bryan</first><last>Rink</last></author>
-      <author><first>Sanda</first><last>Harabagiu</last></author>
+      <author id="sanda-harabagiu"><first>Sanda</first><last>Harabagiu</last></author>
       <pages>256–259</pages>
       <url hash="859727d8">S10-1057</url>
       <bibkey>rink-harabagiu-2010-utd</bibkey>
@@ -545,7 +545,7 @@
       <author><first>Desai</first><last>Chen</last></author>
       <author><first>Nathan</first><last>Schneider</last></author>
       <author><first>Dipanjan</first><last>Das</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>264–267</pages>
       <url hash="5951b7de">S10-1059</url>
       <bibkey>chen-etal-2010-semafor</bibkey>
@@ -569,16 +569,16 @@
     <paper id="62">
       <title><fixed-case>TRIPS</fixed-case> and <fixed-case>TRIOS</fixed-case> System for <fixed-case>T</fixed-case>emp<fixed-case>E</fixed-case>val-2: Extracting Temporal Information from Text</title>
       <author><first>Naushad</first><last>UzZaman</last></author>
-      <author><first>James</first><last>Allen</last></author>
+      <author id="james-allen"><first>James</first><last>Allen</last></author>
       <pages>276–283</pages>
       <url hash="5f895537">S10-1062</url>
       <bibkey>uzzaman-allen-2010-trips</bibkey>
     </paper>
     <paper id="63">
       <title><fixed-case>TIPS</fixed-case>em (<fixed-case>E</fixed-case>nglish and <fixed-case>S</fixed-case>panish): Evaluating <fixed-case>CRF</fixed-case>s and Semantic Roles in <fixed-case>T</fixed-case>emp<fixed-case>E</fixed-case>val-2</title>
-      <author><first>Hector</first><last>Llorens</last></author>
-      <author><first>Estela</first><last>Saquete</last></author>
-      <author><first>Borja</first><last>Navarro</last></author>
+      <author id="hector-llorens"><first>Hector</first><last>Llorens</last></author>
+      <author id="estela-saquete"><first>Estela</first><last>Saquete</last></author>
+      <author id="borja-navarro"><first>Borja</first><last>Navarro</last></author>
       <pages>284–291</pages>
       <url hash="78450737">S10-1063</url>
       <bibkey>llorens-etal-2010-tipsem</bibkey>
@@ -586,7 +586,7 @@
     <paper id="64">
       <title><fixed-case>C</fixed-case>ity<fixed-case>U</fixed-case>-<fixed-case>DAC</fixed-case>: Disambiguating Sentiment-Ambiguous Adjectives within Context</title>
       <author><first>Bin</first><last>Lu</last></author>
-      <author><first>Benjamin K.</first><last>Tsou</last></author>
+      <author id="benjamin-k-tsou"><first>Benjamin K.</first><last>Tsou</last></author>
       <pages>292–295</pages>
       <url hash="7d522333">S10-1064</url>
       <bibkey>lu-tsou-2010-cityu</bibkey>
@@ -594,7 +594,7 @@
     <paper id="65">
       <title><fixed-case>VENSES</fixed-case>++: Adapting a deep semantic processing system to the identification of null instantiations</title>
       <author><first>Sara</first><last>Tonelli</last></author>
-      <author><first>Rodolfo</first><last>Delmonte</last></author>
+      <author id="rodolfo-delmonte"><first>Rodolfo</first><last>Delmonte</last></author>
       <pages>296–299</pages>
       <url hash="2eaad053">S10-1065</url>
       <bibkey>tonelli-delmonte-2010-venses</bibkey>
@@ -609,9 +609,9 @@
     <paper id="67">
       <title><fixed-case>PKU</fixed-case>_<fixed-case>HIT</fixed-case>: An Event Detection System Based on Instances Expansion and Rich Syntactic Features</title>
       <author><first>Shiqi</first><last>Li</last></author>
-      <author><first>Pengyuan</first><last>Liu</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
+      <author id="pengyuan-liu"><first>Pengyuan</first><last>Liu</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <author><first>Hanjing</first><last>Li</last></author>
       <pages>304–307</pages>
       <url hash="746a5e49">S10-1067</url>
@@ -620,7 +620,7 @@
     <paper id="68">
       <title>372:Comparing the Benefit of Different Dependency Parsers for Textual Entailment Using Syntactic Constraints Only</title>
       <author><first>Alexander</first><last>Volokh</last></author>
-      <author><first>Günter</first><last>Neumann</last></author>
+      <author id="gunter-neumann"><first>Günter</first><last>Neumann</last></author>
       <pages>308–312</pages>
       <url hash="b154003a">S10-1068</url>
       <bibkey>volokh-neumann-2010-372</bibkey>
@@ -630,14 +630,14 @@
       <author><first>Dominick</first><last>Ng</last></author>
       <author><first>James W.D.</first><last>Constable</last></author>
       <author><first>Matthew</first><last>Honnibal</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <pages>313–316</pages>
       <url hash="92f11c1c">S10-1069</url>
       <bibkey>ng-etal-2010-schwa</bibkey>
     </paper>
     <paper id="70">
       <title><fixed-case>ID 392:TERSEO + T2T3</fixed-case> Transducer. A systems for Recognizing and Normalizing <fixed-case>TIMEX3</fixed-case></title>
-      <author><first>Estela</first><last>Saquete Boro</last></author>
+      <author id="estela-saquete"><first>Estela</first><last>Saquete Boro</last></author>
       <pages>317–320</pages>
       <url hash="e504f846">S10-1070</url>
       <bibkey>saquete-boro-2010-id</bibkey>
@@ -653,7 +653,7 @@
     <paper id="72">
       <title><fixed-case>KUL</fixed-case>: Recognition and Normalization of Temporal Expressions</title>
       <author><first>Oleksandr</first><last>Kolomiyets</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>325–328</pages>
       <url hash="9eaaf453">S10-1072</url>
       <bibkey>kolomiyets-moens-2010-kul</bibkey>
@@ -661,7 +661,7 @@
     <paper id="73">
       <title><fixed-case>UC</fixed-case>3<fixed-case>M</fixed-case> System: Determining the Extent, Type and Value of Time Expressions in <fixed-case>T</fixed-case>emp<fixed-case>E</fixed-case>val-2</title>
       <author><first>María Teresa</first><last>Vicente-Díez</last></author>
-      <author><first>Julián</first><last>Moreno Schneider</last></author>
+      <author id="julian-moreno-schneider"><first>Julián</first><last>Moreno Schneider</last></author>
       <author><first>Paloma</first><last>Martínez</last></author>
       <pages>329–332</pages>
       <url hash="8d2eab83">S10-1073</url>
@@ -671,7 +671,7 @@
       <title><fixed-case>E</fixed-case>dinburgh-<fixed-case>LTG</fixed-case>: <fixed-case>T</fixed-case>emp<fixed-case>E</fixed-case>val-2 System Description</title>
       <author><first>Claire</first><last>Grover</last></author>
       <author><first>Richard</first><last>Tobin</last></author>
-      <author><first>Beatrice</first><last>Alex</last></author>
+      <author id="beatrice-alex"><first>Beatrice</first><last>Alex</last></author>
       <author><first>Kate</first><last>Byrne</last></author>
       <pages>333–336</pages>
       <url hash="e74ff360">S10-1074</url>
@@ -679,27 +679,27 @@
     </paper>
     <paper id="75">
       <title><fixed-case>USFD</fixed-case>2: Annotating Temporal Expresions and <fixed-case>TLINK</fixed-case>s for <fixed-case>T</fixed-case>emp<fixed-case>E</fixed-case>val-2</title>
-      <author><first>Leon</first><last>Derczynski</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <pages>337–340</pages>
       <url hash="de0aa62b">S10-1075</url>
       <bibkey>derczynski-gaizauskas-2010-usfd2</bibkey>
     </paper>
     <paper id="76">
       <title><fixed-case>NCSU</fixed-case>: Modeling Temporal Relations with <fixed-case>M</fixed-case>arkov <fixed-case>L</fixed-case>ogic and Lexical Ontology</title>
-      <author><first>Eun</first><last>Ha</last></author>
+      <author id="eun-young-ha"><first>Eun</first><last>Ha</last></author>
       <author><first>Alok</first><last>Baikadi</last></author>
       <author><first>Carlyle</first><last>Licata</last></author>
-      <author><first>James</first><last>Lester</last></author>
+      <author id="james-lester"><first>James</first><last>Lester</last></author>
       <pages>341–344</pages>
       <url hash="097141b4">S10-1076</url>
       <bibkey>ha-etal-2010-ncsu</bibkey>
     </paper>
     <paper id="77">
       <title><fixed-case>JU</fixed-case>_<fixed-case>CSE</fixed-case>_<fixed-case>TEMP</fixed-case>: A First Step towards Evaluating Events, Time Expressions and Temporal Relations</title>
-      <author><first>Anup</first><last>Kumar Kolya</last></author>
+      <author id="anup-kumar-kolya"><first>Anup</first><last>Kumar Kolya</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>345–350</pages>
       <url hash="3fd185d6">S10-1077</url>
       <bibkey>kumar-kolya-etal-2010-ju</bibkey>
@@ -747,10 +747,10 @@
     </paper>
     <paper id="83">
       <title><fixed-case>P</fixed-case>eng<fixed-case>Y</fixed-case>uan@<fixed-case>PKU</fixed-case>: Extracting Infrequent Sense Instance with the Same N-Gram Pattern for the <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2010 Task 15</title>
-      <author><first>Peng-Yuan</first><last>Liu</last></author>
+      <author id="pengyuan-liu"><first>Peng-Yuan</first><last>Liu</last></author>
       <author><first>Shi-Wen</first><last>Yu</last></author>
       <author><first>Shui</first><last>Liu</last></author>
-      <author><first>Tie-Jun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tie-Jun</first><last>Zhao</last></author>
       <pages>371–374</pages>
       <url hash="f89eda8c">S10-1083</url>
       <bibkey>liu-etal-2010-pengyuan</bibkey>
@@ -759,7 +759,7 @@
       <title><fixed-case>RALI</fixed-case>: Automatic Weighting of Text Window Distances</title>
       <author><first>Bernard</first><last>Brosseau-Villeneuve</last></author>
       <author><first>Noriko</first><last>Kando</last></author>
-      <author><first>Jian-Yun</first><last>Nie</last></author>
+      <author id="jian-yun-nie"><first>Jian-Yun</first><last>Nie</last></author>
       <pages>375–378</pages>
       <url hash="72cf7484">S10-1084</url>
       <bibkey>brosseau-villeneuve-etal-2010-rali</bibkey>
@@ -787,7 +787,7 @@
       <title><fixed-case>IIITH</fixed-case>: Domain Specific Word Sense Disambiguation</title>
       <author><first>Siva</first><last>Reddy</last></author>
       <author><first>Abhilash</first><last>Inumella</last></author>
-      <author><first>Diana</first><last>McCarthy</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
       <author><first>Mark</first><last>Stevenson</last></author>
       <pages>387–391</pages>
       <url hash="5f879be2">S10-1087</url>
@@ -795,7 +795,7 @@
     </paper>
     <paper id="88">
       <title><fixed-case>UCF</fixed-case>-<fixed-case>WS</fixed-case>: Domain Word Sense Disambiguation Using Web Selectors</title>
-      <author><first>Hansen A.</first><last>Schwartz</last></author>
+      <author id="h-andrew-schwartz"><first>Hansen A.</first><last>Schwartz</last></author>
       <author><first>Fernando</first><last>Gomez</last></author>
       <pages>392–395</pages>
       <url hash="38476a88">S10-1088</url>
@@ -815,9 +815,9 @@
     </paper>
     <paper id="90">
       <title><fixed-case>GPLSI</fixed-case>-<fixed-case>IXA</fixed-case>: Using Semantic Classes to Acquire Monosemous Training Examples from Domain Texts</title>
-      <author><first>Rubén</first><last>Izquierdo</last></author>
-      <author><first>Armando</first><last>Suárez</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="ruben-izquierdo"><first>Rubén</first><last>Izquierdo</last></author>
+      <author id="armando-suarez"><first>Armando</first><last>Suárez</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <pages>402–406</pages>
       <url hash="f2ea8fb8">S10-1090</url>
       <bibkey>izquierdo-etal-2010-gplsi</bibkey>
@@ -836,21 +836,21 @@
     <paper id="92">
       <title><fixed-case>RACAI</fixed-case>: Unsupervised <fixed-case>WSD</fixed-case> Experiments @ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2, Task 17</title>
       <author><first>Radu</first><last>Ion</last></author>
-      <author><first>Dan</first><last>Ştefănescu</last></author>
+      <author id="dan-stefanescu"><first>Dan</first><last>Ştefănescu</last></author>
       <pages>411–416</pages>
       <url hash="5716fbee">S10-1092</url>
       <bibkey>ion-stefanescu-2010-racai</bibkey>
     </paper>
     <paper id="93">
       <title><fixed-case>K</fixed-case>yoto: An Integrated System for Specific Domain <fixed-case>WSD</fixed-case></title>
-      <author><first>Aitor</first><last>Soroa</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>Oier</first><last>Lopez de Lacalle</last></author>
+      <author id="aitor-soroa"><first>Aitor</first><last>Soroa</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
+      <author id="oier-lopez-de-lacalle"><first>Oier</first><last>Lopez de Lacalle</last></author>
       <author><first>Wauter</first><last>Bosma</last></author>
       <author><first>Piek</first><last>Vossen</last></author>
       <author><first>Monica</first><last>Monachini</last></author>
       <author><first>Jessie</first><last>Lo</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <pages>417–420</pages>
       <url hash="444e9778">S10-1093</url>
       <bibkey>soroa-etal-2010-kyoto</bibkey>
@@ -858,19 +858,19 @@
     <paper id="94">
       <title><fixed-case>CFILT</fixed-case>: Resource Conscious Approaches for All-Words Domain Specific <fixed-case>WSD</fixed-case></title>
       <author><first>Anup</first><last>Kulkarni</last></author>
-      <author><first>Mitesh</first><last>Khapra</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh</first><last>Khapra</last></author>
       <author><first>Saurabh</first><last>Sohoney</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>421–426</pages>
       <url hash="90d569e0">S10-1094</url>
       <bibkey>kulkarni-etal-2010-cfilt</bibkey>
     </paper>
     <paper id="95">
       <title><fixed-case>UMCC</fixed-case>-<fixed-case>DLSI</fixed-case>: Integrative Resource for Disambiguation Task</title>
-      <author><first>Yoan</first><last>Gutiérrez Vázquez</last></author>
-      <author><first>Antonio</first><last>Fernandez Orquín</last></author>
-      <author><first>Andrés</first><last>Montoyo Guijarro</last></author>
-      <author><first>Sonia</first><last>Vázquez Pérez</last></author>
+      <author id="yoan-gutierrez"><first>Yoan</first><last>Gutiérrez Vázquez</last></author>
+      <author id="antonio-fernandez-orquin"><first>Antonio</first><last>Fernandez Orquín</last></author>
+      <author id="andres-montoyo"><first>Andrés</first><last>Montoyo Guijarro</last></author>
+      <author id="sonia-vazquez"><first>Sonia</first><last>Vázquez Pérez</last></author>
       <pages>427–432</pages>
       <url hash="3e09d939">S10-1095</url>
       <bibkey>gutierrez-vazquez-etal-2010-umcc</bibkey>
@@ -885,7 +885,7 @@
     <paper id="97">
       <title><fixed-case>T</fixed-case>witter Based System: Using <fixed-case>T</fixed-case>witter for Disambiguating Sentiment Ambiguous Adjectives</title>
       <author><first>Alexander</first><last>Pak</last></author>
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <pages>436–439</pages>
       <url hash="e4836198">S10-1097</url>
       <bibkey>pak-paroubek-2010-twitter-based</bibkey>
@@ -900,8 +900,8 @@
     </paper>
     <paper id="99">
       <title><fixed-case>O</fixed-case>p<fixed-case>AL</fixed-case>: Applying Opinion Mining Techniques for the Disambiguation of Sentiment Ambiguous Adjectives in <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2 Task 18</title>
-      <author><first>Alexandra</first><last>Balahur</last></author>
-      <author><first>Andrés</first><last>Montoyo</last></author>
+      <author id="alexandra-balahur"><first>Alexandra</first><last>Balahur</last></author>
+      <author id="andres-montoyo"><first>Andrés</first><last>Montoyo</last></author>
       <pages>444–447</pages>
       <url hash="d1096ac6">S10-1099</url>
       <bibkey>balahur-montoyo-2010-opal</bibkey>
@@ -910,7 +910,7 @@
       <title><fixed-case>HITSZ</fixed-case>_<fixed-case>CITYU</fixed-case>: Combine Collocation, Context Words and Neighboring Sentence Sentiment in Sentiment Adjectives Disambiguation</title>
       <author><first>Ruifeng</first><last>Xu</last></author>
       <author><first>Jun</first><last>Xu</last></author>
-      <author><first>Chunyu</first><last>Kit</last></author>
+      <author id="chunyu-kit"><first>Chunyu</first><last>Kit</last></author>
       <pages>448–451</pages>
       <url hash="de15e2fa">S10-1100</url>
       <bibkey>xu-etal-2010-hitsz</bibkey>
diff --git a/data/xml/S12.xml b/data/xml/S12.xml
index 01c3cc7623..5cfab83dfe 100644
--- a/data/xml/S12.xml
+++ b/data/xml/S12.xml
@@ -4,9 +4,9 @@
     <meta>
       <booktitle>*<fixed-case>SEM</fixed-case> 2012: The First Joint Conference on Lexical and Computational Semantics – Volume 1: Proceedings of the main conference and the shared task, and Volume 2: Proceedings of the Sixth International Workshop on Semantic Evaluation (<fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val 2012)</booktitle>
       <url hash="683fe37c">S12-1</url>
-      <editor><first>Eneko</first><last>Agirre</last></editor>
+      <editor id="eneko-agirre"><first>Eneko</first><last>Agirre</last></editor>
       <editor><first>Johan</first><last>Bos</last></editor>
-      <editor><first>Mona</first><last>Diab</last></editor>
+      <editor id="mona-diab"><first>Mona</first><last>Diab</last></editor>
       <editor><first>Suresh</first><last>Manandhar</last></editor>
       <editor><first>Yuval</first><last>Marton</last></editor>
       <editor><first>Deniz</first><last>Yuret</last></editor>
@@ -31,7 +31,7 @@
     </paper>
     <paper id="2">
       <title>Adaptive Clustering for Coreference Resolution with Deterministic Rules and Web-Based Language Models</title>
-      <author><first>Razvan</first><last>Bunescu</last></author>
+      <author id="razvan-bunescu"><first>Razvan</first><last>Bunescu</last></author>
       <pages>11–19</pages>
       <url hash="3fb8680a">S12-1002</url>
       <bibkey>bunescu-2012-adaptive</bibkey>
@@ -40,7 +40,7 @@
       <title>Measuring Semantic Relatedness using Multilingual Representations</title>
       <author><first>Samer</first><last>Hassan</last></author>
       <author><first>Carmen</first><last>Banea</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>20–29</pages>
       <url hash="645139cc">S12-1003</url>
       <bibkey>hassan-etal-2012-measuring</bibkey>
@@ -48,8 +48,8 @@
     <paper id="4">
       <title>Towards Building a Multilingual Semantic Network: Identifying Interlingual Links in <fixed-case>W</fixed-case>ikipedia</title>
       <author><first>Bharath</first><last>Dandala</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
-      <author><first>Razvan</first><last>Bunescu</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
+      <author id="razvan-bunescu"><first>Razvan</first><last>Bunescu</last></author>
       <pages>30–37</pages>
       <url hash="5a6ea3c5">S12-1004</url>
       <bibkey>dandala-etal-2012-towards</bibkey>
@@ -66,7 +66,7 @@
     </paper>
     <paper id="6">
       <title>The Use of Granularity in Rhetorical Relation Prediction</title>
-      <author><first>Blake</first><last>Howald</last></author>
+      <author id="blake-howald"><first>Blake</first><last>Howald</last></author>
       <author><first>Martha</first><last>Abramson</last></author>
       <pages>44–48</pages>
       <url hash="12653ca4">S12-1006</url>
@@ -83,7 +83,7 @@
       <title>Detecting Text Reuse with Modified and Weighted N-grams</title>
       <author><first>Rao Muhammad Adeel</first><last>Nawab</last></author>
       <author><first>Mark</first><last>Stevenson</last></author>
-      <author><first>Paul</first><last>Clough</last></author>
+      <author id="paul-clough"><first>Paul</first><last>Clough</last></author>
       <pages>54–58</pages>
       <url hash="51b716ea">S12-1008</url>
       <bibkey>nawab-etal-2012-detecting</bibkey>
@@ -108,9 +108,9 @@
     <paper id="11">
       <title>Learning Semantics and Selectional Preference of Adjective-Noun Pairs</title>
       <author><first>Karl Moritz</first><last>Hermann</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
-      <author><first>Stephen</first><last>Pulman</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
+      <author id="stephen-pulman"><first>Stephen</first><last>Pulman</last></author>
       <pages>70–74</pages>
       <url hash="002ec2a7">S12-1011</url>
       <bibkey>hermann-etal-2012-learning</bibkey>
@@ -126,7 +126,7 @@
     <paper id="13">
       <title>Towards a Flexible Semantics: Colour Terms in Collaborative Reference Tasks</title>
       <author><first>Bert</first><last>Baumgaertner</last></author>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <author><first>Matthew</first><last>Stone</last></author>
       <pages>80–84</pages>
       <url hash="9708c740">S12-1013</url>
@@ -160,16 +160,16 @@
     <paper id="17">
       <title>Combining resources for <fixed-case>MWE</fixed-case>-token classification</title>
       <author><first>Richard</first><last>Fothergill</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>100–104</pages>
       <url hash="cf192a22">S12-1017</url>
       <bibkey>fothergill-baldwin-2012-combining</bibkey>
     </paper>
     <paper id="18">
       <title>Annotating Preferences in Negotiation Dialogues</title>
-      <author><first>Anaïs</first><last>Cadilhac</last></author>
-      <author><first>Nicholas</first><last>Asher</last></author>
-      <author><first>Farah</first><last>Benamara</last></author>
+      <author id="anais-cadilhac"><first>Anaïs</first><last>Cadilhac</last></author>
+      <author id="nicholas-asher"><first>Nicholas</first><last>Asher</last></author>
+      <author id="farah-benamara"><first>Farah</first><last>Benamara</last></author>
       <pages>105–113</pages>
       <url hash="bd81f261">S12-1018</url>
       <erratum id="1" hash="db49614d">S12-1018e1</erratum>
@@ -178,8 +178,8 @@
     <paper id="19">
       <title>Selecting Corpus-Semantic Models for Neurolinguistic Decoding</title>
       <author><first>Brian</first><last>Murphy</last></author>
-      <author><first>Partha</first><last>Talukdar</last></author>
-      <author><first>Tom</first><last>Mitchell</last></author>
+      <author id="partha-talukdar"><first>Partha</first><last>Talukdar</last></author>
+      <author id="tom-mitchell"><first>Tom</first><last>Mitchell</last></author>
       <pages>114–123</pages>
       <url hash="05ee3e04">S12-1019</url>
       <bibkey>murphy-etal-2012-selecting</bibkey>
@@ -194,8 +194,8 @@
     <paper id="21">
       <title>An Unsupervised Ranking Model for Noun-Noun Compositionality</title>
       <author><first>Karl Moritz</first><last>Hermann</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
-      <author><first>Stephen</first><last>Pulman</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
+      <author id="stephen-pulman"><first>Stephen</first><last>Pulman</last></author>
       <pages>132–141</pages>
       <url hash="0a2d2d4a">S12-1021</url>
       <bibkey>hermann-etal-2012-unsupervised</bibkey>
@@ -203,15 +203,15 @@
     <paper id="22">
       <title>Expanding the Range of Tractable Scope-Underspecified Semantic Representations</title>
       <author><first>Mehdi</first><last>Manshadi</last></author>
-      <author><first>James</first><last>Allen</last></author>
+      <author id="james-allen"><first>James</first><last>Allen</last></author>
       <pages>142–150</pages>
       <url hash="a5462d27">S12-1022</url>
       <bibkey>manshadi-allen-2012-expanding</bibkey>
     </paper>
     <paper id="23">
       <title>Regular polysemy: A distributional model</title>
-      <author><first>Gemma</first><last>Boleda</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="gemma-boleda"><first>Gemma</first><last>Boleda</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <author><first>Jason</first><last>Utt</last></author>
       <pages>151–160</pages>
       <url hash="d6403291">S12-1023</url>
@@ -237,7 +237,7 @@
     <paper id="26">
       <title>Unsupervised Induction of a Syntax-Semantics Lexicon Using Iterative Refinement</title>
       <author><first>Hagen</first><last>Fürstenau</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>180–188</pages>
       <url hash="b9c31be1">S12-1026</url>
       <bibkey>furstenau-rambow-2012-unsupervised</bibkey>
@@ -252,7 +252,7 @@
     <paper id="28">
       <title>Ensemble-based Semantic Lexicon Induction for Semantic Tagging</title>
       <author><first>Ashequl</first><last>Qadir</last></author>
-      <author><first>Ellen</first><last>Riloff</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
       <pages>199–208</pages>
       <url hash="cf3882b8">S12-1028</url>
       <bibkey>qadir-riloff-2012-ensemble</bibkey>
@@ -260,8 +260,8 @@
     <paper id="29">
       <title>An Exact Dual Decomposition Algorithm for Shallow Semantic Parsing with Constraints</title>
       <author><first>Dipanjan</first><last>Das</last></author>
-      <author><first>André F. T.</first><last>Martins</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>209–217</pages>
       <url hash="262996a2">S12-1029</url>
       <bibkey>das-etal-2012-exact</bibkey>
@@ -276,10 +276,10 @@
     </paper>
     <paper id="31">
       <title>The Effects of Semantic Annotations on Precision Parse Ranking</title>
-      <author><first>Andrew</first><last>MacKinlay</last></author>
+      <author id="andrew-mackinlay"><first>Andrew</first><last>MacKinlay</last></author>
       <author><first>Rebecca</first><last>Dridan</last></author>
-      <author><first>Diana</first><last>McCarthy</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>228–236</pages>
       <url hash="42992c5b">S12-1031</url>
       <bibkey>mackinlay-etal-2012-effects</bibkey>
@@ -295,7 +295,7 @@
     </paper>
     <paper id="33">
       <title>#Emotional Tweets</title>
-      <author><first>Saif</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
       <pages>246–255</pages>
       <url hash="447b7325">S12-1033</url>
       <bibkey>mohammad-2012-emotional</bibkey>
@@ -327,7 +327,7 @@
     </paper>
     <paper id="37">
       <title><fixed-case>UCM</fixed-case>-<fixed-case>I</fixed-case>: A Rule-based Syntactic Approach for Resolving the Scope of Negation</title>
-      <author><first>Jorge</first><last>Carrillo de Albornoz</last></author>
+      <author id="jorge-carrillo-de-albornoz"><first>Jorge</first><last>Carrillo de Albornoz</last></author>
       <author><first>Laura</first><last>Plaza</last></author>
       <author><first>Alberto</first><last>Díaz</last></author>
       <author><first>Miguel</first><last>Ballesteros</last></author>
@@ -340,8 +340,8 @@
       <author><first>Miguel</first><last>Ballesteros</last></author>
       <author><first>Alberto</first><last>Díaz</last></author>
       <author><first>Virginia</first><last>Francisco</last></author>
-      <author><first>Pablo</first><last>Gervás</last></author>
-      <author><first>Jorge</first><last>Carrillo de Albornoz</last></author>
+      <author id="pablo-gervas"><first>Pablo</first><last>Gervás</last></author>
+      <author id="jorge-carrillo-de-albornoz"><first>Jorge</first><last>Carrillo de Albornoz</last></author>
       <author><first>Laura</first><last>Plaza</last></author>
       <pages>288–293</pages>
       <url hash="49233e8d">S12-1038</url>
@@ -360,7 +360,7 @@
       <author><first>Valerio</first><last>Basile</last></author>
       <author><first>Johan</first><last>Bos</last></author>
       <author><first>Kilian</first><last>Evang</last></author>
-      <author><first>Noortje</first><last>Venhuizen</last></author>
+      <author id="noortje-venhuizen"><first>Noortje</first><last>Venhuizen</last></author>
       <pages>301–309</pages>
       <url hash="22fdced0">S12-1040</url>
       <bibkey>basile-etal-2012-ugroningen</bibkey>
@@ -369,7 +369,7 @@
       <title><fixed-case>U</fixed-case>i<fixed-case>O</fixed-case>1: Constituent-Based Discriminative Ranking for Negation Resolution</title>
       <author><first>Jonathon</first><last>Read</last></author>
       <author><first>Erik</first><last>Velldal</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <author><first>Stephan</first><last>Oepen</last></author>
       <pages>310–318</pages>
       <url hash="3a905b5d">S12-1041</url>
@@ -379,7 +379,7 @@
       <title><fixed-case>U</fixed-case>i<fixed-case>O</fixed-case> 2: Sequence-labeling Negation Using Dependency Features</title>
       <author><first>Emanuele</first><last>Lapponi</last></author>
       <author><first>Erik</first><last>Velldal</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <author><first>Jonathon</first><last>Read</last></author>
       <pages>319–327</pages>
       <url hash="ee5a7425">S12-1042</url>
@@ -388,14 +388,14 @@
     <paper id="43">
       <title><fixed-case>UM</fixed-case>ichigan: A Conditional Random Field Model for Resolving the Scope of Negation</title>
       <author><first>Amjad</first><last>Abu-Jbara</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <pages>328–334</pages>
       <url hash="510e024d">S12-1043</url>
       <bibkey>abu-jbara-radev-2012-umichigan</bibkey>
     </paper>
     <paper id="44">
       <title><fixed-case>UW</fixed-case>ashington: Negation Resolution using Machine Learning Methods</title>
-      <author><first>James Paul</first><last>White</last></author>
+      <author id="james-paul-white"><first>James Paul</first><last>White</last></author>
       <pages>335–339</pages>
       <url hash="d7a17bfe">S12-1044</url>
       <bibkey>white-2012-uwashington</bibkey>
@@ -411,7 +411,7 @@
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2012 Task 1: <fixed-case>E</fixed-case>nglish Lexical Simplification</title>
       <author><first>Lucia</first><last>Specia</last></author>
       <author><first>Sujay Kumar</first><last>Jauhar</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>347–355</pages>
       <url hash="4813e8ed">S12-1046</url>
       <bibkey>specia-etal-2012-semeval</bibkey>
@@ -419,8 +419,8 @@
     <paper id="47">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2012 Task 2: Measuring Degrees of Relational Similarity</title>
       <author><first>David</first><last>Jurgens</last></author>
-      <author><first>Saif</first><last>Mohammad</last></author>
-      <author><first>Peter</first><last>Turney</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
+      <author id="peter-turney"><first>Peter</first><last>Turney</last></author>
       <author><first>Keith</first><last>Holyoak</last></author>
       <pages>356–364</pages>
       <url hash="c3ebfa0a">S12-1047</url>
@@ -430,7 +430,7 @@
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2012 Task 3: Spatial Role Labeling</title>
       <author><first>Parisa</first><last>Kordjamshidi</last></author>
       <author><first>Steven</first><last>Bethard</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>365–373</pages>
       <url hash="87febc52">S12-1048</url>
       <bibkey>kordjamshidi-etal-2012-semeval</bibkey>
@@ -458,14 +458,14 @@
       <author><first>Eneko</first><last>Agirre</last></author>
       <author><first>Daniel</first><last>Cer</last></author>
       <author><first>Mona</first><last>Diab</last></author>
-      <author><first>Aitor</first><last>Gonzalez-Agirre</last></author>
+      <author id="aitor-gonzalez-agirre"><first>Aitor</first><last>Gonzalez-Agirre</last></author>
       <pages>385–393</pages>
       <url hash="f821fc8c">S12-1051</url>
       <bibkey>agirre-etal-2012-semeval</bibkey>
     </paper>
     <paper id="52">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2012 Task 7: Choice of Plausible Alternatives: An Evaluation of Commonsense Causal Reasoning</title>
-      <author><first>Andrew</first><last>Gordon</last></author>
+      <author id="andrew-gordon"><first>Andrew</first><last>Gordon</last></author>
       <author><first>Zornitsa</first><last>Kozareva</last></author>
       <author><first>Melissa</first><last>Roemmele</last></author>
       <pages>394–398</pages>
@@ -474,7 +474,7 @@
     </paper>
     <paper id="53">
       <title><fixed-case>S</fixed-case>emeval-2012 Task 8: Cross-lingual Textual Entailment for Content Synchronization</title>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Alessandro</first><last>Marchetti</last></author>
       <author><first>Yashar</first><last>Mehdad</last></author>
       <author><first>Luisa</first><last>Bentivogli</last></author>
@@ -485,10 +485,10 @@
     </paper>
     <paper id="54">
       <title><fixed-case>EMNLP</fixed-case>@<fixed-case>CPH</fixed-case>: Is frequency all there is to simplicity?</title>
-      <author><first>Anders</first><last>Johannsen</last></author>
-      <author><first>Héctor</first><last>Martínez</last></author>
+      <author id="anders-johannsen"><first>Anders</first><last>Johannsen</last></author>
+      <author id="hector-martinez-alonso"><first>Héctor</first><last>Martínez</last></author>
       <author><first>Sigrid</first><last>Klerke</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>408–412</pages>
       <url hash="cc22dfe1">S12-1054</url>
       <bibkey>johannsen-etal-2012-emnlp</bibkey>
@@ -496,7 +496,7 @@
     <paper id="55">
       <title><fixed-case>UTD</fixed-case>: Determining Relational Similarity Using Lexical Patterns</title>
       <author><first>Bryan</first><last>Rink</last></author>
-      <author><first>Sanda</first><last>Harabagiu</last></author>
+      <author id="sanda-harabagiu"><first>Sanda</first><last>Harabagiu</last></author>
       <pages>413–418</pages>
       <url hash="ab963bc1">S12-1055</url>
       <bibkey>rink-harabagiu-2012-utd</bibkey>
@@ -504,7 +504,7 @@
     <paper id="56">
       <title><fixed-case>UTD</fixed-case>-<fixed-case>S</fixed-case>p<fixed-case>RL</fixed-case>: A Joint Approach to Spatial Role Labeling</title>
       <author><first>Kirk</first><last>Roberts</last></author>
-      <author><first>Sanda</first><last>Harabagiu</last></author>
+      <author id="sanda-harabagiu"><first>Sanda</first><last>Harabagiu</last></author>
       <pages>419–424</pages>
       <url hash="34a8f99f">S12-1056</url>
       <bibkey>roberts-harabagiu-2012-utd</bibkey>
@@ -513,8 +513,8 @@
       <title><fixed-case>MIXCD</fixed-case>: System Description for Evaluating <fixed-case>C</fixed-case>hinese Word Similarity at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2012</title>
       <author><first>Yingjie</first><last>Zhang</last></author>
       <author><first>Bin</first><last>Li</last></author>
-      <author><first>Xinyu</first><last>Dai</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
+      <author id="xinyu-dai"><first>Xinyu</first><last>Dai</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
       <pages>425–429</pages>
       <url hash="b17b634e">S12-1057</url>
       <bibkey>zhang-etal-2012-mixcd</bibkey>
@@ -531,7 +531,7 @@
     <paper id="59">
       <title><fixed-case>UKP</fixed-case>: Computing Semantic Textual Similarity by Combining Multiple Content Similarity Measures</title>
       <author><first>Daniel</first><last>Bär</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <author><first>Iryna</first><last>Gurevych</last></author>
       <author><first>Torsten</first><last>Zesch</last></author>
       <pages>435–440</pages>
@@ -542,9 +542,9 @@
       <title><fixed-case>T</fixed-case>ake<fixed-case>L</fixed-case>ab: Systems for Measuring Semantic Text Similarity</title>
       <author><first>Frane</first><last>Šarić</last></author>
       <author><first>Goran</first><last>Glavaš</last></author>
-      <author><first>Vanja Mladen</first><last>Karan</last></author>
+      <author id="vanja-m-karan"><first>Vanja Mladen</first><last>Karan</last></author>
       <author><first>Jan</first><last>Šnajder</last></author>
-      <author><first>Bojana</first><last>Dalbelo Bašić</last></author>
+      <author id="bojana-dalbelo-basic"><first>Bojana</first><last>Dalbelo Bašić</last></author>
       <pages>441–448</pages>
       <url hash="63976eb9">S12-1060</url>
       <bibkey>saric-etal-2012-takelab</bibkey>
@@ -553,15 +553,15 @@
       <title>Soft Cardinality: A Parameterized Similarity Function for Text Comparison</title>
       <author><first>Sergio</first><last>Jimenez</last></author>
       <author><first>Claudia</first><last>Becerra</last></author>
-      <author><first>Alexander</first><last>Gelbukh</last></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last></author>
       <pages>449–453</pages>
       <url hash="4a1ce421">S12-1061</url>
       <bibkey>jimenez-etal-2012-soft</bibkey>
     </paper>
     <paper id="62">
       <title><fixed-case>UNED</fixed-case>: Improving Text Similarity Measures without Human Assessments</title>
-      <author><first>Enrique</first><last>Amigó</last></author>
-      <author><first>Jesús</first><last>Giménez</last></author>
+      <author id="enrique-amigo"><first>Enrique</first><last>Amigó</last></author>
+      <author id="jesus-gimenez"><first>Jesús</first><last>Giménez</last></author>
       <author><first>Julio</first><last>Gonzalo</last></author>
       <author><first>Felisa</first><last>Verdejo</last></author>
       <pages>454–460</pages>
@@ -573,14 +573,14 @@
       <author><first>Travis</first><last>Goodwin</last></author>
       <author><first>Bryan</first><last>Rink</last></author>
       <author><first>Kirk</first><last>Roberts</last></author>
-      <author><first>Sanda</first><last>Harabagiu</last></author>
+      <author id="sanda-harabagiu"><first>Sanda</first><last>Harabagiu</last></author>
       <pages>461–466</pages>
       <url hash="504a1214">S12-1063</url>
       <bibkey>goodwin-etal-2012-utdhlt</bibkey>
     </paper>
     <paper id="64">
       <title><fixed-case>HDU</fixed-case>: Cross-lingual Textual Entailment with <fixed-case>SMT</fixed-case> Features</title>
-      <author><first>Katharina</first><last>Wäschle</last></author>
+      <author id="katharina-waschle"><first>Katharina</first><last>Wäschle</last></author>
       <author><first>Sascha</first><last>Fendrich</last></author>
       <pages>467–471</pages>
       <url hash="c07182ff">S12-1064</url>
@@ -588,9 +588,9 @@
     </paper>
     <paper id="65">
       <title><fixed-case>UA</fixed-case>lacant: Using Online Machine Translation for Cross-Lingual Textual Entailment</title>
-      <author><first>Miquel</first><last>Esplà-Gomis</last></author>
+      <author id="miquel-espla-gomis"><first>Miquel</first><last>Esplà-Gomis</last></author>
       <author><first>Felipe</first><last>Sánchez-Martínez</last></author>
-      <author><first>Mikel L.</first><last>Forcada</last></author>
+      <author id="mikel-l-forcada"><first>Mikel L.</first><last>Forcada</last></author>
       <pages>472–476</pages>
       <url hash="1b89560a">S12-1065</url>
       <bibkey>espla-gomis-etal-2012-ualacant</bibkey>
@@ -639,10 +639,10 @@
       <title><fixed-case>BUAP</fixed-case>: A First Approximation to Relational Similarity Measuring</title>
       <author><first>Mireya</first><last>Tovar</last></author>
       <author><first>J. Alejandro</first><last>Reyes</last></author>
-      <author><first>Azucena</first><last>Montes</last></author>
-      <author><first>Darnes</first><last>Vilariño</last></author>
-      <author><first>David</first><last>Pinto</last></author>
-      <author><first>Saul</first><last>León</last></author>
+      <author id="azucena-montes-rendon"><first>Azucena</first><last>Montes</last></author>
+      <author id="darnes-vilarino"><first>Darnes</first><last>Vilariño</last></author>
+      <author id="david-pinto"><first>David</first><last>Pinto</last></author>
+      <author id="saul-leon"><first>Saul</first><last>León</last></author>
       <pages>502–505</pages>
       <url hash="a66d506a">S12-1071</url>
       <bibkey>tovar-etal-2012-buap</bibkey>
@@ -651,9 +651,9 @@
       <title><fixed-case>Z</fixed-case>hou qiaoli: A divide-and-conquer strategy for semantic dependency parsing</title>
       <author><last>Zhou</last><first>Qiaoli</first></author>
       <author><last>Zhang</last><first>Ling</first></author>
-      <author id="fei-liu"><last>Liu</last><first>Fei</first></author>
-      <author><last>Cai</last><first>Dongfeng</first></author>
-      <author><last>Zhang</last><first>Guiping</first></author>
+      <author><last>Liu</last><first>Fei</first></author>
+      <author id="dongfeng-cai"><first>Dongfeng</first><last>Cai</last></author>
+      <author id="guiping-zhang"><first>Guiping</first><last>Zhang</last></author>
       <pages>506–513</pages>
       <url hash="b4e717f1">S12-1072</url>
       <bibkey>zhou-etal-2012-zhou</bibkey>
@@ -671,8 +671,8 @@
       <author><first>Guangchao</first><last>Tang</last></author>
       <author><first>Bin</first><last>Li</last></author>
       <author><first>Shuaishuai</first><last>Xu</last></author>
-      <author><first>Xinyu</first><last>Dai</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
+      <author id="xinyu-dai"><first>Xinyu</first><last>Dai</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
       <pages>519–523</pages>
       <url hash="0effb701">S12-1074</url>
       <bibkey>tang-etal-2012-nju</bibkey>
@@ -680,7 +680,7 @@
     <paper id="75">
       <title><fixed-case>P</fixed-case>oly<fixed-case>UCOMP</fixed-case>: Combining Semantic Vectors with Skip bigrams for Semantic Textual Similarity</title>
       <author><first>Jian</first><last>Xu</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <author><first>Zhengzhong</first><last>Liu</last></author>
       <pages>524–528</pages>
       <url hash="136253fa">S12-1075</url>
@@ -696,7 +696,7 @@
     </paper>
     <paper id="77">
       <title><fixed-case>S</fixed-case>bdlrhmn: A Rule-based Human Interpretation System for Semantic Textual Similarity Task</title>
-      <author><first>Samir</first><last>AbdelRahman</last></author>
+      <author id="samir-abdelrahman"><first>Samir</first><last>AbdelRahman</last></author>
       <author><first>Catherine</first><last>Blake</last></author>
       <pages>536–542</pages>
       <url hash="a097bc74">S12-1077</url>
@@ -704,7 +704,7 @@
     </paper>
     <paper id="78">
       <title><fixed-case>LIMSI</fixed-case>: Learning Semantic Similarity by Selecting Random Word Subsets</title>
-      <author><first>Artem</first><last>Sokolov</last></author>
+      <author id="artem-sokolov"><first>Artem</first><last>Sokolov</last></author>
       <pages>543–546</pages>
       <url hash="39c2a226">S12-1078</url>
       <bibkey>sokolov-2012-limsi</bibkey>
@@ -728,7 +728,7 @@
     </paper>
     <paper id="81">
       <title><fixed-case>DSS</fixed-case>: Text Similarity Using Lexical Alignments of Form, Distributional Semantics and Grammatical Relations</title>
-      <author><first>Diana</first><last>McCarthy</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
       <author><first>Spandana</first><last>Gella</last></author>
       <author><first>Siva</first><last>Reddy</last></author>
       <pages>557–564</pages>
@@ -750,8 +750,8 @@
       <title><fixed-case>JU</fixed-case>_<fixed-case>CSE</fixed-case>_<fixed-case>NLP</fixed-case>: Multi-grade Classification of Semantic Similarity between Text Pairs</title>
       <author><first>Snehasis</first><last>Neogi</last></author>
       <author><first>Partha</first><last>Pakray</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
-      <author><first>Alexander</first><last>Gelbukh</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last></author>
       <pages>571–574</pages>
       <url hash="0b558c96">S12-1083</url>
       <bibkey>neogi-etal-2012-ju</bibkey>
@@ -759,7 +759,7 @@
     <paper id="84">
       <title><fixed-case>T</fixed-case>iantianzhu7:System Description of Semantic Textual Similarity (<fixed-case>STS</fixed-case>) in the <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2012 (Task 6)</title>
       <author><last>Zhu</last><first>Tiantian</first></author>
-      <author><last>Lan</last><first>Man</first></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <pages>575–578</pages>
       <url hash="a64c0bb8">S12-1084</url>
       <bibkey>zhu-lan-2012-tiantianzhu7</bibkey>
@@ -785,7 +785,7 @@
       <title><fixed-case>UNIBA</fixed-case>: Distributional Semantics for Textual Similarity</title>
       <author><first>Annalina</first><last>Caputo</last></author>
       <author><first>Pierpaolo</first><last>Basile</last></author>
-      <author><first>Giovanni</first><last>Semeraro</last></author>
+      <author id="giovanni-semeraro"><first>Giovanni</first><last>Semeraro</last></author>
       <pages>591–596</pages>
       <url hash="ea734a0b">S12-1087</url>
       <bibkey>caputo-etal-2012-uniba</bibkey>
@@ -795,14 +795,14 @@
       <author><first>Danilo</first><last>Croce</last></author>
       <author><first>Paolo</first><last>Annesi</last></author>
       <author><first>Valerio</first><last>Storch</last></author>
-      <author><first>Roberto</first><last>Basili</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
       <pages>597–602</pages>
       <url hash="83cfa58c">S12-1088</url>
       <bibkey>croce-etal-2012-unitor</bibkey>
     </paper>
     <paper id="89">
       <title><fixed-case>S</fixed-case>aarland: Vector-based models of semantic textual similarity</title>
-      <author><first>Georgiana</first><last>Dinu</last></author>
+      <author id="georgiana-dinu"><first>Georgiana</first><last>Dinu</last></author>
       <author><first>Stefan</first><last>Thater</last></author>
       <pages>603–607</pages>
       <url hash="27b230ac">S12-1089</url>
@@ -810,16 +810,16 @@
     </paper>
     <paper id="90">
       <title><fixed-case>UMCC</fixed-case>_<fixed-case>DLSI</fixed-case>: Multidimensional Lexical-Semantic Textual Similarity</title>
-      <author><first>Antonio</first><last>Fernández</last></author>
-      <author><first>Yoan</first><last>Gutiérrez</last></author>
+      <author id="antonio-fernandez-orquin"><first>Antonio</first><last>Fernández</last></author>
+      <author id="yoan-gutierrez"><first>Yoan</first><last>Gutiérrez</last></author>
       <author><first>Héctor</first><last>Dávila</last></author>
       <author><first>Alexander</first><last>Chávez</last></author>
       <author><first>Andy</first><last>González</last></author>
       <author><first>Rainel</first><last>Estrada</last></author>
       <author><first>Yenier</first><last>Castañeda</last></author>
-      <author><first>Sonia</first><last>Vázquez</last></author>
-      <author><first>Andrés</first><last>Montoyo</last></author>
-      <author><first>Rafael</first><last>Muñoz</last></author>
+      <author id="sonia-vazquez"><first>Sonia</first><last>Vázquez</last></author>
+      <author id="andres-montoyo"><first>Andrés</first><last>Montoyo</last></author>
+      <author id="rafael-munoz"><first>Rafael</first><last>Muñoz</last></author>
       <pages>608–616</pages>
       <url hash="6ff06ea1">S12-1090</url>
       <bibkey>fernandez-etal-2012-umcc</bibkey>
@@ -834,8 +834,8 @@
     </paper>
     <paper id="92">
       <title><fixed-case>FBK</fixed-case>: Machine Translation Evaluation and Word Similarity metrics for Semantic Textual Similarity</title>
-      <author><first>José Guilherme</first><last>Camargo de Souza</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="jose-g-c-de-souza"><first>José Guilherme</first><last>Camargo de Souza</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Yashar</first><last>Mehdad</last></author>
       <pages>624–630</pages>
       <url hash="5e685176">S12-1092</url>
@@ -844,10 +844,10 @@
     <paper id="93">
       <title><fixed-case>FCC</fixed-case>: Three Approaches for Semantic Textual Similarity</title>
       <author><first>Maya</first><last>Carrillo</last></author>
-      <author><first>Darnes</first><last>Vilariño</last></author>
-      <author><first>David</first><last>Pinto</last></author>
+      <author id="darnes-vilarino"><first>Darnes</first><last>Vilariño</last></author>
+      <author id="david-pinto"><first>David</first><last>Pinto</last></author>
       <author><first>Mireya</first><last>Tovar</last></author>
-      <author><first>Saul</first><last>León</last></author>
+      <author id="saul-leon"><first>Saul</first><last>León</last></author>
       <author><first>Esteban</first><last>Castillo</last></author>
       <pages>631–634</pages>
       <url hash="9cea2073">S12-1093</url>
@@ -858,7 +858,7 @@
       <author><first>Carmen</first><last>Banea</last></author>
       <author><first>Samer</first><last>Hassan</last></author>
       <author><first>Michael</first><last>Mohler</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>635–642</pages>
       <url hash="82a2c467">S12-1094</url>
       <bibkey>banea-etal-2012-unt</bibkey>
@@ -896,14 +896,14 @@
       <title>janardhan: Semantic Textual Similarity using Universal Networking Language graph matching</title>
       <author><first>Janardhan</first><last>Singh</last></author>
       <author><first>Arindam</first><last>Bhattacharya</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>662–666</pages>
       <url hash="be3ae1f1">S12-1098</url>
       <bibkey>singh-etal-2012-janardhan</bibkey>
     </paper>
     <paper id="99">
       <title><fixed-case>SAGAN</fixed-case>: An approach to Semantic Textual Similarity based on Textual Entailment</title>
-      <author><first>Julio</first><last>Castillo</last></author>
+      <author id="julio-castillo"><first>Julio</first><last>Castillo</last></author>
       <author><first>Paula</first><last>Estrella</last></author>
       <pages>667–672</pages>
       <url hash="6a99cc1b">S12-1099</url>
@@ -921,8 +921,8 @@
     <paper id="101">
       <title><fixed-case>P</fixed-case>enn: Using Word Similarities to better Estimate Sentence Similarity</title>
       <author><first>Sneha</first><last>Jha</last></author>
-      <author><first>Hansen A.</first><last>Schwartz</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="h-andrew-schwartz"><first>Hansen A.</first><last>Schwartz</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <pages>679–683</pages>
       <url hash="5e24b237">S12-1101</url>
       <bibkey>jha-etal-2012-penn</bibkey>
@@ -931,7 +931,7 @@
       <title>Soft Cardinality + <fixed-case>ML</fixed-case>: Learning Adaptive Similarity Functions for Cross-lingual Textual Entailment</title>
       <author><first>Sergio</first><last>Jimenez</last></author>
       <author><first>Claudia</first><last>Becerra</last></author>
-      <author><first>Alexander</first><last>Gelbukh</last></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last></author>
       <pages>684–688</pages>
       <url hash="4244dc9a">S12-1102</url>
       <bibkey>jimenez-etal-2012-soft-cardinality</bibkey>
@@ -940,8 +940,8 @@
       <title><fixed-case>JU</fixed-case>_<fixed-case>CSE</fixed-case>_<fixed-case>NLP</fixed-case>: Language Independent Cross-lingual Textual Entailment System</title>
       <author><first>Snehasis</first><last>Neogi</last></author>
       <author><first>Partha</first><last>Pakray</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
-      <author><first>Alexander</first><last>Gelbukh</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last></author>
       <pages>689–695</pages>
       <url hash="cca9c172">S12-1103</url>
       <bibkey>neogi-etal-2012-ju-cse</bibkey>
@@ -949,7 +949,7 @@
     <paper id="104">
       <title><fixed-case>CELI</fixed-case>: An Experiment with Cross Language Textual Entailment</title>
       <author><first>Milen</first><last>Kouylekov</last></author>
-      <author><first>Luca</first><last>Dini</last></author>
+      <author id="luca-dini"><first>Luca</first><last>Dini</last></author>
       <author><first>Alessio</first><last>Bosca</last></author>
       <author><first>Marco</first><last>Trevisan</last></author>
       <pages>696–700</pages>
@@ -959,18 +959,18 @@
     <paper id="105">
       <title><fixed-case>FBK</fixed-case>: Cross-Lingual Textual Entailment Without Translation</title>
       <author><first>Yashar</first><last>Mehdad</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
-      <author><first>José Guilherme</first><last>C. de Souza</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
+      <author id="jose-g-c-de-souza"><first>José Guilherme</first><last>C. de Souza</last></author>
       <pages>701–705</pages>
       <url hash="e0a37917">S12-1105</url>
       <bibkey>mehdad-etal-2012-fbk</bibkey>
     </paper>
     <paper id="106">
       <title><fixed-case>BUAP</fixed-case>: Lexical and Semantic Similarity for Cross-lingual Textual Entailment</title>
-      <author><first>Darnes</first><last>Vilariño</last></author>
-      <author><first>David</first><last>Pinto</last></author>
+      <author id="darnes-vilarino"><first>Darnes</first><last>Vilariño</last></author>
+      <author id="david-pinto"><first>David</first><last>Pinto</last></author>
       <author><first>Mireya</first><last>Tovar</last></author>
-      <author><first>Saul</first><last>León</last></author>
+      <author id="saul-leon"><first>Saul</first><last>León</last></author>
       <author><first>Esteban</first><last>Castillo</last></author>
       <pages>706–709</pages>
       <url hash="9b41929a">S12-1106</url>
@@ -994,7 +994,7 @@
     </paper>
     <paper id="109">
       <title><fixed-case>SAGAN</fixed-case>: A Machine Translation Approach for Cross-Lingual Textual Entailment</title>
-      <author><first>Julio</first><last>Castillo</last></author>
+      <author id="julio-castillo"><first>Julio</first><last>Castillo</last></author>
       <author><first>Marina</first><last>Cardenas</last></author>
       <pages>721–726</pages>
       <url hash="dd62bdbe">S12-1109</url>
diff --git a/data/xml/S13.xml b/data/xml/S13.xml
index 233fea72b0..565a93a539 100644
--- a/data/xml/S13.xml
+++ b/data/xml/S13.xml
@@ -4,9 +4,9 @@
     <meta>
       <booktitle>Second Joint Conference on Lexical and Computational Semantics (*<fixed-case>SEM</fixed-case>), Volume 1: Proceedings of the Main Conference and the Shared Task: Semantic Textual Similarity</booktitle>
       <url hash="82a00a51">S13-1</url>
-      <editor><first>Mona</first><last>Diab</last></editor>
-      <editor><first>Tim</first><last>Baldwin</last></editor>
-      <editor><first>Marco</first><last>Baroni</last></editor>
+      <editor id="mona-diab"><first>Mona</first><last>Diab</last></editor>
+      <editor id="timothy-baldwin"><first>Tim</first><last>Baldwin</last></editor>
+      <editor id="marco-baroni"><first>Marco</first><last>Baroni</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Atlanta, Georgia, USA</address>
       <month>June</month>
@@ -19,19 +19,19 @@
     </frontmatter>
     <paper id="1">
       <title>Towards a Formal Distributional Semantics: Simulating Logical Calculi with Tensors</title>
-      <author><first>Edward</first><last>Grefenstette</last></author>
+      <author id="edward-grefenstette"><first>Edward</first><last>Grefenstette</last></author>
       <pages>1–10</pages>
       <url hash="5da3cdc8">S13-1001</url>
       <bibkey>grefenstette-2013-towards</bibkey>
     </paper>
     <paper id="2">
       <title><fixed-case>M</fixed-case>ontague Meets <fixed-case>M</fixed-case>arkov: Deep Semantics with Probabilistic Logical Form</title>
-      <author><first>Islam</first><last>Beltagy</last></author>
+      <author id="islam-beltagy"><first>Islam</first><last>Beltagy</last></author>
       <author><first>Cuong</first><last>Chau</last></author>
-      <author><first>Gemma</first><last>Boleda</last></author>
+      <author id="gemma-boleda"><first>Gemma</first><last>Boleda</last></author>
       <author><first>Dan</first><last>Garrette</last></author>
       <author><first>Katrin</first><last>Erk</last></author>
-      <author><first>Raymond</first><last>Mooney</last></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last></author>
       <pages>11–21</pages>
       <url hash="16a36178">S13-1002</url>
       <bibkey>beltagy-etal-2013-montague</bibkey>
@@ -39,18 +39,18 @@
     <paper id="3">
       <title>Coarse to Fine Grained Sense Disambiguation in <fixed-case>W</fixed-case>ikipedia</title>
       <author><first>Hui</first><last>Shen</last></author>
-      <author><first>Razvan</first><last>Bunescu</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="razvan-bunescu"><first>Razvan</first><last>Bunescu</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>22–31</pages>
       <url hash="092ce4f9">S13-1003</url>
       <bibkey>shen-etal-2013-coarse</bibkey>
     </paper>
     <paper id="4">
       <title>*<fixed-case>SEM</fixed-case> 2013 shared task: Semantic Textual Similarity</title>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <author><first>Daniel</first><last>Cer</last></author>
       <author><first>Mona</first><last>Diab</last></author>
-      <author><first>Aitor</first><last>Gonzalez-Agirre</last></author>
+      <author id="aitor-gonzalez-agirre"><first>Aitor</first><last>Gonzalez-Agirre</last></author>
       <author><first>Weiwei</first><last>Guo</last></author>
       <pages>32–43</pages>
       <url hash="86b914cd">S13-1004</url>
@@ -59,8 +59,8 @@
     <paper id="5">
       <title><fixed-case>UMBC</fixed-case>_<fixed-case>EBIQUITY</fixed-case>-<fixed-case>CORE</fixed-case>: Semantic Textual Similarity Systems</title>
       <author><first>Lushan</first><last>Han</last></author>
-      <author><first>Abhay</first><last>L. Kashyap</last></author>
-      <author><first>Tim</first><last>Finin</last></author>
+      <author id="abhay-l-kashyap"><first>Abhay</first><last>L. Kashyap</last></author>
+      <author id="tim-finin"><first>Tim</first><last>Finin</last></author>
       <author><first>James</first><last>Mayfield</last></author>
       <author><first>Jonathan</first><last>Weese</last></author>
       <pages>44–52</pages>
@@ -80,7 +80,7 @@
       <title><fixed-case>UNITOR</fixed-case>-<fixed-case>CORE</fixed-case>_<fixed-case>TYPED</fixed-case>: Combining Text Similarity and Semantic Filters through <fixed-case>SV</fixed-case> Regression</title>
       <author><first>Danilo</first><last>Croce</last></author>
       <author><first>Valerio</first><last>Storch</last></author>
-      <author><first>Roberto</first><last>Basili</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
       <pages>59–65</pages>
       <url hash="a8e31e6f">S13-1007</url>
       <bibkey>croce-etal-2013-unitor</bibkey>
@@ -91,7 +91,7 @@
       <author><first>Hans</first><last>Moen</last></author>
       <author><first>Lars</first><last>Bungum</last></author>
       <author><first>Gleb</first><last>Sizov</last></author>
-      <author><first>Björn</first><last>Gambäck</last></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gambäck</last></author>
       <author><first>André</first><last>Lynum</last></author>
       <pages>66–73</pages>
       <url hash="973bd41e">S13-1008</url>
@@ -128,7 +128,7 @@
     <paper id="12">
       <title><fixed-case>P</fixed-case>oly<fixed-case>UCOMP</fixed-case>-<fixed-case>CORE</fixed-case>_<fixed-case>TYPED</fixed-case>: Computing Semantic Textual Similarity using Overlapped Senses</title>
       <author><first>Jian</first><last>Xu</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <pages>90–95</pages>
       <url hash="642243fa">S13-1012</url>
       <bibkey>xu-lu-2013-polyucomp</bibkey>
@@ -147,7 +147,7 @@
       <author><first>Elias</first><last>Iosif</last></author>
       <author><first>Vassiliki</first><last>Prokopi</last></author>
       <author><first>Alexandros</first><last>Potamianos</last></author>
-      <author><first>Shrikanth</first><last>Narayanan</last></author>
+      <author id="shrikanth-narayanan"><first>Shrikanth</first><last>Narayanan</last></author>
       <pages>103–108</pages>
       <url hash="581e6eb4">S13-1014</url>
       <bibkey>malandrakis-etal-2013-deeppurple</bibkey>
@@ -156,12 +156,12 @@
       <title><fixed-case>UMCC</fixed-case>_<fixed-case>DLSI</fixed-case>: Textual Similarity based on Lexical-Semantic features</title>
       <author><first>Alexander</first><last>Chávez</last></author>
       <author><first>Héctor</first><last>Dávila</last></author>
-      <author><first>Yoan</first><last>Gutiérrez</last></author>
+      <author id="yoan-gutierrez"><first>Yoan</first><last>Gutiérrez</last></author>
       <author><first>Armando</first><last>Collazo</last></author>
-      <author><first>José I.</first><last>Abreu</last></author>
-      <author><first>Antonio</first><last>Fernández Orquín</last></author>
-      <author><first>Andrés</first><last>Montoyo</last></author>
-      <author><first>Rafael</first><last>Muñoz</last></author>
+      <author id="jose-i-abreu"><first>José I.</first><last>Abreu</last></author>
+      <author id="antonio-fernandez-orquin"><first>Antonio</first><last>Fernández Orquín</last></author>
+      <author id="andres-montoyo"><first>Andrés</first><last>Montoyo</last></author>
+      <author id="rafael-munoz"><first>Rafael</first><last>Muñoz</last></author>
       <pages>109–118</pages>
       <url hash="22c053bb">S13-1015</url>
       <bibkey>chavez-etal-2013-umcc</bibkey>
@@ -169,7 +169,7 @@
     <paper id="16">
       <title><fixed-case>BUT</fixed-case>-<fixed-case>TYPED</fixed-case>: Using domain knowledge for computing typed similarity</title>
       <author><first>Lubomir</first><last>Otrusina</last></author>
-      <author><first>Pavel</first><last>Smrz</last></author>
+      <author id="pavel-smrz"><first>Pavel</first><last>Smrz</last></author>
       <pages>119–123</pages>
       <url hash="d697319a">S13-1016</url>
       <bibkey>otrusina-smrz-2013-typed</bibkey>
@@ -177,17 +177,17 @@
     <paper id="17">
       <title><fixed-case>ECNUCS</fixed-case>: Measuring Short Text Semantic Equivalence Using Multiple Similarity Measurements</title>
       <author><last>Zhu</last><first>Tiantian</first></author>
-      <author><last>Man</last><first>Lan</first></author>
+      <author id="man-lan"><first>Lan</first><last>Man</last></author>
       <pages>124–131</pages>
       <url hash="baea30d6">S13-1017</url>
       <bibkey>zhu-man-2013-ecnucs</bibkey>
     </paper>
     <paper id="18">
       <title><fixed-case>UBC</fixed-case>_<fixed-case>UOS</fixed-case>-<fixed-case>TYPED</fixed-case>: Regression for typed-similarity</title>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <author><first>Nikolaos</first><last>Aletras</last></author>
-      <author><first>Aitor</first><last>Gonzalez-Agirre</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="aitor-gonzalez-agirre"><first>Aitor</first><last>Gonzalez-Agirre</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <author><first>Mark</first><last>Stevenson</last></author>
       <pages>132–137</pages>
       <url hash="a30a48cb">S13-1018</url>
@@ -204,10 +204,10 @@
     <paper id="20">
       <title><fixed-case>UPC</fixed-case>-<fixed-case>CORE</fixed-case>: What Can Machine Translation Evaluation Metrics and <fixed-case>W</fixed-case>ikipedia Do for Estimating Semantic Textual Similarity?</title>
       <author><first>Alberto</first><last>Barrón-Cedeño</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
-      <author><first>Maria</first><last>Fuentes</last></author>
-      <author><first>Horacio</first><last>Rodríguez</last></author>
-      <author><first>Jordi</first><last>Turmo</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
+      <author id="maria-fuentes"><first>Maria</first><last>Fuentes</last></author>
+      <author id="horacio-rodriguez"><first>Horacio</first><last>Rodríguez</last></author>
+      <author id="jordi-turmo"><first>Jordi</first><last>Turmo</last></author>
       <pages>143–147</pages>
       <url hash="1e44d66f">S13-1020</url>
       <bibkey>barron-cedeno-etal-2013-upc</bibkey>
@@ -232,8 +232,8 @@
     <paper id="23">
       <title><fixed-case>LIPN</fixed-case>-<fixed-case>CORE</fixed-case>: Semantic Text Similarity using n-grams, <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et, Syntactic Analysis, <fixed-case>ESA</fixed-case> and Information Retrieval based Features</title>
       <author><first>Davide</first><last>Buscaldi</last></author>
-      <author><first>Joseph</first><last>Le Roux</last></author>
-      <author><first>Jorge J.</first><last>García Flores</last></author>
+      <author id="joseph-le-roux"><first>Joseph</first><last>Le Roux</last></author>
+      <author id="jorge-garcia-flores"><first>Jorge J.</first><last>García Flores</last></author>
       <author><first>Adrian</first><last>Popescu</last></author>
       <pages>162–168</pages>
       <url hash="0f92ac5c">S13-1023</url>
@@ -243,14 +243,14 @@
       <title><fixed-case>UNIBA</fixed-case>-<fixed-case>CORE</fixed-case>: Combining Strategies for Semantic Textual Similarity</title>
       <author><first>Annalina</first><last>Caputo</last></author>
       <author><first>Pierpaolo</first><last>Basile</last></author>
-      <author><first>Giovanni</first><last>Semeraro</last></author>
+      <author id="giovanni-semeraro"><first>Giovanni</first><last>Semeraro</last></author>
       <pages>169–175</pages>
       <url hash="2161b8b1">S13-1024</url>
       <bibkey>caputo-etal-2013-uniba</bibkey>
     </paper>
     <paper id="25">
       <title><fixed-case>DLS</fixed-case>@<fixed-case>CU</fixed-case>-<fixed-case>CORE</fixed-case>: A Simple Machine Learning Model of Semantic Textual Similarity</title>
-      <author><first>Md.</first><last>Sultan</last></author>
+      <author id="md-arafat-sultan"><first>Md.</first><last>Sultan</last></author>
       <author><first>Steven</first><last>Bethard</last></author>
       <author><first>Tamara</first><last>Sumner</last></author>
       <pages>176–180</pages>
@@ -278,7 +278,7 @@
       <title><fixed-case>SOFTCARDINALITY</fixed-case>-<fixed-case>CORE</fixed-case>: Improving Text Overlap with Distributional Measures for Semantic Textual Similarity</title>
       <author><first>Sergio</first><last>Jimenez</last></author>
       <author><first>Claudia</first><last>Becerra</last></author>
-      <author><first>Alexander</first><last>Gelbukh</last></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last></author>
       <pages>194–201</pages>
       <url hash="437e6b1b">S13-1028</url>
       <bibkey>jimenez-etal-2013-softcardinality</bibkey>
@@ -306,7 +306,7 @@
     <paper id="31">
       <title><fixed-case>CFILT</fixed-case>-<fixed-case>CORE</fixed-case>: Semantic Textual Similarity using Universal Networking Language</title>
       <author><first>Avishek</first><last>Dan</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>216–220</pages>
       <url hash="d15466cb">S13-1031</url>
       <bibkey>dan-bhattacharyya-2013-cfilt</bibkey>
@@ -319,9 +319,9 @@
       <author><first>Samer</first><last>Hassan</last></author>
       <author><first>Michael</first><last>Mohler</last></author>
       <author><first>Bishan</first><last>Yang</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
-      <author><first>Jan</first><last>Wiebe</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
+      <author id="janyce-wiebe"><first>Jan</first><last>Wiebe</last></author>
       <pages>221–228</pages>
       <url hash="9a98efdc">S13-1032</url>
       <bibkey>banea-etal-2013-cpn</bibkey>
@@ -329,17 +329,17 @@
     <paper id="33">
       <title><fixed-case>INAOE</fixed-case>_<fixed-case>UPV</fixed-case>-<fixed-case>CORE</fixed-case>: Extracting Word Associations from Document Corpora to estimate Semantic Textual Similarity</title>
       <author><first>Fernando</first><last>Sánchez-Vega</last></author>
-      <author><first>Manuel</first><last>Montes-y-Gómez</last></author>
+      <author id="manuel-montes"><first>Manuel</first><last>Montes-y-Gómez</last></author>
       <author><first>Paolo</first><last>Rosso</last></author>
-      <author><first>Luis</first><last>Villaseñor-Pineda</last></author>
+      <author id="luis-villasenor-pineda"><first>Luis</first><last>Villaseñor-Pineda</last></author>
       <pages>229–233</pages>
       <url hash="a0512032">S13-1033</url>
       <bibkey>sanchez-vega-etal-2013-inaoe</bibkey>
     </paper>
     <paper id="34">
       <title><fixed-case>CNGL</fixed-case>-<fixed-case>CORE</fixed-case>: Referential Translation Machines for Measuring Semantic Similarity</title>
-      <author><first>Ergun</first><last>Biçici</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="ergun-bicici"><first>Ergun</first><last>Biçici</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>234–240</pages>
       <url hash="2399834d">S13-1034</url>
       <bibkey>bicici-van-genabith-2013-cngl</bibkey>
@@ -363,15 +363,15 @@
     </paper>
     <paper id="37">
       <title>More Words and Bigger Pictures</title>
-      <author><first>David</first><last>Forsyth</last></author>
+      <author id="david-forsyth"><first>David</first><last>Forsyth</last></author>
       <pages>254</pages>
       <url hash="b230cb6e">S13-1037</url>
       <bibkey>forsyth-2013-words</bibkey>
     </paper>
     <paper id="38">
       <title>Exploring Vector Space Models to Predict the Compositionality of <fixed-case>G</fixed-case>erman Noun-Noun Compounds</title>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
-      <author><first>Stefan</first><last>Müller</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="stefan-muller"><first>Stefan</first><last>Müller</last></author>
       <author><first>Stefan</first><last>Roller</last></author>
       <pages>255–265</pages>
       <url hash="2ea566f3">S13-1038</url>
@@ -403,14 +403,14 @@
     </paper>
     <paper id="42">
       <title>Choosing the Right Words: Characterizing and Reducing Error of the Word Count Approach</title>
-      <author><first>Hansen Andrew</first><last>Schwartz</last></author>
+      <author id="h-andrew-schwartz"><first>Hansen Andrew</first><last>Schwartz</last></author>
       <author><first>Johannes</first><last>Eichstaedt</last></author>
       <author><first>Eduardo</first><last>Blanco</last></author>
       <author><first>Lukasz</first><last>Dziurzynski</last></author>
-      <author><first>Margaret L.</first><last>Kern</last></author>
+      <author id="margaret-kern"><first>Margaret L.</first><last>Kern</last></author>
       <author><first>Stephanie</first><last>Ramones</last></author>
       <author><first>Martin</first><last>Seligman</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <pages>296–305</pages>
       <url hash="6948c7f5">S13-1042</url>
       <bibkey>schwartz-etal-2013-choosing</bibkey>
@@ -433,7 +433,7 @@
     </paper>
     <paper id="45">
       <title>Semantic Parsing <fixed-case>F</fixed-case>reebase: Towards Open-domain Semantic Parsing</title>
-      <author><first>Qingqing</first><last>Cai</last></author>
+      <author id="qingqing-cai"><first>Qingqing</first><last>Cai</last></author>
       <author><first>Alexander</first><last>Yates</last></author>
       <pages>328–338</pages>
       <url hash="fa6ccbd1">S13-1045</url>
@@ -459,11 +459,11 @@
     <paper id="1">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2013 Task 1: <fixed-case>T</fixed-case>emp<fixed-case>E</fixed-case>val-3: Evaluating Time Expressions, Events, and Temporal Relations</title>
       <author><first>Naushad</first><last>UzZaman</last></author>
-      <author><first>Hector</first><last>Llorens</last></author>
-      <author><first>Leon</first><last>Derczynski</last></author>
-      <author><first>James</first><last>Allen</last></author>
+      <author id="hector-llorens"><first>Hector</first><last>Llorens</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
+      <author id="james-allen"><first>James</first><last>Allen</last></author>
       <author><first>Marc</first><last>Verhagen</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <pages>1–9</pages>
       <url hash="0dfa44e6">S13-2001</url>
       <bibkey>uzzaman-etal-2013-semeval</bibkey>
@@ -487,14 +487,14 @@
     <paper id="4">
       <title><fixed-case>ATT</fixed-case>1: Temporal Annotation Using Big Windows and Rich Syntactic and Semantic Features</title>
       <author><first>Hyuckchul</first><last>Jung</last></author>
-      <author><first>Amanda</first><last>Stent</last></author>
+      <author id="amanda-stent"><first>Amanda</first><last>Stent</last></author>
       <pages>20–24</pages>
       <url hash="0e212aa7">S13-2004</url>
       <bibkey>jung-stent-2013-att1</bibkey>
     </paper>
     <paper id="5">
       <title><fixed-case>S</fixed-case>emeval-2013 Task 8: Cross-lingual Textual Entailment for Content Synchronization</title>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Alessandro</first><last>Marchetti</last></author>
       <author><first>Yashar</first><last>Mehdad</last></author>
       <author><first>Luisa</first><last>Bentivogli</last></author>
@@ -507,7 +507,7 @@
       <title><fixed-case>SOFTCARDINALITY</fixed-case>: Learning to Identify Directional Cross-Lingual Entailment from Cardinalities and <fixed-case>SMT</fixed-case></title>
       <author><first>Sergio</first><last>Jimenez</last></author>
       <author><first>Claudia</first><last>Becerra</last></author>
-      <author><first>Alexander</first><last>Gelbukh</last></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last></author>
       <pages>34–38</pages>
       <url hash="38787b28">S13-2006</url>
       <bibkey>jimenez-etal-2013-softcardinality-learning</bibkey>
@@ -516,8 +516,8 @@
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2013 Task 5: Evaluating Phrasal Semantics</title>
       <author><first>Ioannis</first><last>Korkontzelos</last></author>
       <author><first>Torsten</first><last>Zesch</last></author>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>39–47</pages>
       <url hash="e1aa9a14">S13-2007</url>
       <bibkey>korkontzelos-etal-2013-semeval</bibkey>
@@ -533,7 +533,7 @@
       <title><fixed-case>M</fixed-case>an<fixed-case>TIME</fixed-case>: Temporal expression identification and normalization in the <fixed-case>T</fixed-case>emp<fixed-case>E</fixed-case>val-3 challenge</title>
       <author><first>Michele</first><last>Filannino</last></author>
       <author><first>Gavin</first><last>Brown</last></author>
-      <author><first>Goran</first><last>Nenadic</last></author>
+      <author id="goran-nenadic"><first>Goran</first><last>Nenadic</last></author>
       <pages>53–57</pages>
       <url hash="3269b409">S13-2009</url>
       <bibkey>filannino-etal-2013-mantime</bibkey>
@@ -541,33 +541,33 @@
     <paper id="10">
       <title><fixed-case>FSS</fixed-case>-<fixed-case>T</fixed-case>im<fixed-case>E</fixed-case>x for <fixed-case>T</fixed-case>emp<fixed-case>E</fixed-case>val-3: Extracting Temporal Information from Text</title>
       <author><first>Vanni</first><last>Zavarella</last></author>
-      <author><first>Hristo</first><last>Tanev</last></author>
+      <author id="hristo-tanev"><first>Hristo</first><last>Tanev</last></author>
       <pages>58–63</pages>
       <url hash="510332a5">S13-2010</url>
       <bibkey>zavarella-tanev-2013-fss</bibkey>
     </paper>
     <paper id="11">
       <title><fixed-case>JU</fixed-case>_<fixed-case>CSE</fixed-case>: A <fixed-case>CRF</fixed-case> Based Approach to Annotation of Temporal Expression, Event and Temporal Relations</title>
-      <author><first>Anup Kumar</first><last>Kolya</last></author>
+      <author id="anup-kumar-kolya"><first>Anup Kumar</first><last>Kolya</last></author>
       <author><first>Amitava</first><last>Kundu</last></author>
       <author><first>Rajdeep</first><last>Gupta</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>64–72</pages>
       <url hash="a623c3fe">S13-2011</url>
       <bibkey>kolya-etal-2013-ju</bibkey>
     </paper>
     <paper id="12">
       <title><fixed-case>N</fixed-case>avy<fixed-case>T</fixed-case>ime: Event and Time Ordering from Raw Text</title>
-      <author><first>Nathanael</first><last>Chambers</last></author>
+      <author id="nathanael-chambers"><first>Nathanael</first><last>Chambers</last></author>
       <pages>73–77</pages>
       <url hash="3a88af12">S13-2012</url>
       <bibkey>chambers-2013-navytime</bibkey>
     </paper>
     <paper id="13">
       <title><fixed-case>SUT</fixed-case>ime: Evaluation in <fixed-case>T</fixed-case>emp<fixed-case>E</fixed-case>val-3</title>
-      <author><first>Angel</first><last>Chang</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="angel-chang"><first>Angel</first><last>Chang</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>78–82</pages>
       <url hash="66344a42">S13-2013</url>
       <bibkey>chang-manning-2013-sutime</bibkey>
@@ -575,7 +575,7 @@
     <paper id="14">
       <title><fixed-case>KUL</fixed-case>: Data-driven Approach to Temporal Parsing of Newswire Articles</title>
       <author><first>Oleksandr</first><last>Kolomiyets</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>83–87</pages>
       <url hash="249c1f5c">S13-2014</url>
       <bibkey>kolomiyets-moens-2013-kul</bibkey>
@@ -593,20 +593,20 @@
     <paper id="16">
       <title><fixed-case>UMCC</fixed-case>_<fixed-case>DLSI</fixed-case>-(<fixed-case>EPS</fixed-case>): Paraphrases Detection Based on Semantic Distance</title>
       <author><first>Héctor</first><last>Dávila</last></author>
-      <author><first>Antonio</first><last>Fernández Orquín</last></author>
+      <author id="antonio-fernandez-orquin"><first>Antonio</first><last>Fernández Orquín</last></author>
       <author><first>Alexander</first><last>Chávez</last></author>
-      <author><first>Yoan</first><last>Gutiérrez</last></author>
+      <author id="yoan-gutierrez"><first>Yoan</first><last>Gutiérrez</last></author>
       <author><first>Armando</first><last>Collazo</last></author>
-      <author><first>José I.</first><last>Abreu</last></author>
-      <author><first>Andrés</first><last>Montoyo</last></author>
-      <author><first>Rafael</first><last>Muñoz</last></author>
+      <author id="jose-i-abreu"><first>José I.</first><last>Abreu</last></author>
+      <author id="andres-montoyo"><first>Andrés</first><last>Montoyo</last></author>
+      <author id="rafael-munoz"><first>Rafael</first><last>Muñoz</last></author>
       <pages>93–97</pages>
       <url hash="5b81a02e">S13-2016</url>
       <bibkey>davila-etal-2013-umcc</bibkey>
     </paper>
     <paper id="17">
       <title><fixed-case>MELODI</fixed-case>: Semantic Similarity of Words and Compositional Phrases using Latent Vector Weighting</title>
-      <author><first>Tim</first><last>Van de Cruys</last></author>
+      <author id="tim-van-de-cruys"><first>Tim</first><last>Van de Cruys</last></author>
       <author><first>Stergos</first><last>Afantenos</last></author>
       <author><first>Philippe</first><last>Muller</last></author>
       <pages>98–102</pages>
@@ -634,7 +634,7 @@
       <title><fixed-case>UNAL</fixed-case>: Discriminating between Literal and Figurative Phrasal Usage Using Distributional Statistics and <fixed-case>POS</fixed-case> tags</title>
       <author><first>Sergio</first><last>Jimenez</last></author>
       <author><first>Claudia</first><last>Becerra</last></author>
-      <author><first>Alexander</first><last>Gelbukh</last></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last></author>
       <pages>114–117</pages>
       <url hash="91765cbd">S13-2020</url>
       <bibkey>jimenez-etal-2013-unal</bibkey>
@@ -642,19 +642,19 @@
     <paper id="21">
       <title><fixed-case>ECNUCS</fixed-case>: Recognizing Cross-lingual Textual Entailment Using Multiple Text Similarity and Text Difference Measures</title>
       <author><first>Jiang</first><last>Zhao</last></author>
-      <author><first>Man</first><last>Lan</last></author>
-      <author><first>Zheng-Yu</first><last>Niu</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
+      <author id="zheng-yu-niu"><first>Zheng-Yu</first><last>Niu</last></author>
       <pages>118–123</pages>
       <url hash="ae04776d">S13-2021</url>
       <bibkey>zhao-etal-2013-ecnucs</bibkey>
     </paper>
     <paper id="22">
       <title><fixed-case>BUAP</fixed-case>: N-gram based Feature Evaluation for the Cross-Lingual Textual Entailment Task</title>
-      <author><first>Darnes</first><last>Vilariño</last></author>
-      <author><first>David</first><last>Pinto</last></author>
-      <author><first>Saúl</first><last>León</last></author>
+      <author id="darnes-vilarino"><first>Darnes</first><last>Vilariño</last></author>
+      <author id="david-pinto"><first>David</first><last>Pinto</last></author>
+      <author id="saul-leon"><first>Saúl</first><last>León</last></author>
       <author><first>Yuridiana</first><last>Alemán</last></author>
-      <author><first>Helena</first><last>Gómez</last></author>
+      <author id="helena-gomez"><first>Helena</first><last>Gómez</last></author>
       <pages>124–127</pages>
       <url hash="dd7287df">S13-2022</url>
       <bibkey>vilarino-etal-2013-buap</bibkey>
@@ -662,7 +662,7 @@
     <paper id="23">
       <title><fixed-case>ALTN</fixed-case>: Word Alignment Features for Cross-lingual Textual Entailment</title>
       <author><first>Marco</first><last>Turchi</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <pages>128–132</pages>
       <url hash="13cea748">S13-2023</url>
       <bibkey>turchi-negri-2013-altn</bibkey>
@@ -671,7 +671,7 @@
       <title><fixed-case>U</fixed-case>melb: Cross-lingual Textual Entailment with Word Alignment and String Similarity Features</title>
       <author><first>Yvette</first><last>Graham</last></author>
       <author><first>Bahar</first><last>Salehi</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>133–137</pages>
       <url hash="af727aa5">S13-2024</url>
       <bibkey>graham-etal-2013-umelb</bibkey>
@@ -680,9 +680,9 @@
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2013 Task 4: Free Paraphrases of Noun Compounds</title>
       <author><first>Iris</first><last>Hendrickx</last></author>
       <author><first>Zornitsa</first><last>Kozareva</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Diarmuid</first><last>Ó Séaghdha</last></author>
-      <author><first>Stan</first><last>Szpakowicz</last></author>
+      <author id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></author>
       <author><first>Tony</first><last>Veale</last></author>
       <pages>138–143</pages>
       <url hash="526fdacf">S13-2025</url>
@@ -690,7 +690,7 @@
     </paper>
     <paper id="26">
       <title><fixed-case>MELODI</fixed-case>: A Supervised Distributional Approach for Free Paraphrasing of Noun Compounds</title>
-      <author><first>Tim</first><last>Van de Cruys</last></author>
+      <author id="tim-van-de-cruys"><first>Tim</first><last>Van de Cruys</last></author>
       <author><first>Stergos</first><last>Afantenos</last></author>
       <author><first>Philippe</first><last>Muller</last></author>
       <pages>144–147</pages>
@@ -717,7 +717,7 @@
     <paper id="29">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2013 Task 10: Cross-lingual Word Sense Disambiguation</title>
       <author><first>Els</first><last>Lefever</last></author>
-      <author><first>Véronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Véronique</first><last>Hoste</last></author>
       <pages>158–166</pages>
       <url hash="2435d81e">S13-2029</url>
       <bibkey>lefever-hoste-2013-semeval</bibkey>
@@ -732,7 +732,7 @@
     </paper>
     <paper id="31">
       <title><fixed-case>HLTDI</fixed-case>: <fixed-case>CL</fixed-case>-<fixed-case>WSD</fixed-case> Using <fixed-case>M</fixed-case>arkov Random Fields for <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2013 Task 10</title>
-      <author><first>Alex</first><last>Rudnick</last></author>
+      <author id="alex-rudnick"><first>Alex</first><last>Rudnick</last></author>
       <author><first>Can</first><last>Liu</last></author>
       <author><first>Michael</first><last>Gasser</last></author>
       <pages>171–177</pages>
@@ -749,7 +749,7 @@
     <paper id="33">
       <title><fixed-case>WSD</fixed-case>2: Parameter optimisation for Memory-based Cross-Lingual Word-Sense Disambiguation</title>
       <author><first>Maarten</first><last>van Gompel</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <pages>183–187</pages>
       <url hash="94a7c8bf">S13-2033</url>
       <bibkey>van-gompel-van-den-bosch-2013-wsd2</bibkey>
@@ -798,7 +798,7 @@
       <title>unimelb: Topic Modelling-based Word Sense Induction for Web Snippet Clustering</title>
       <author><first>Jey Han</first><last>Lau</last></author>
       <author><first>Paul</first><last>Cook</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>217–221</pages>
       <url hash="7505dfa4">S13-2039</url>
       <bibkey>lau-etal-2013-unimelb</bibkey>
@@ -816,26 +816,26 @@
       <title><fixed-case>GETALP</fixed-case> System : Propagation of a <fixed-case>L</fixed-case>esk Measure through an Ant Colony Algorithm</title>
       <author><first>Didier</first><last>Schwab</last></author>
       <author><first>Andon</first><last>Tchechmedjiev</last></author>
-      <author><first>Jérôme</first><last>Goulian</last></author>
+      <author id="jerome-goulian"><first>Jérôme</first><last>Goulian</last></author>
       <author><first>Mohammad</first><last>Nasiruddin</last></author>
-      <author><first>Gilles</first><last>Sérasset</last></author>
-      <author><first>Hervé</first><last>Blanchon</last></author>
+      <author id="gilles-serasset"><first>Gilles</first><last>Sérasset</last></author>
+      <author id="herve-blanchon"><first>Hervé</first><last>Blanchon</last></author>
       <pages>232–240</pages>
       <url hash="c0ab44f2">S13-2041</url>
       <bibkey>schwab-etal-2013-getalp</bibkey>
     </paper>
     <paper id="42">
       <title><fixed-case>UMCC</fixed-case>_<fixed-case>DLSI</fixed-case>: Reinforcing a Ranking Algorithm with Sense Frequencies and Multidimensional Semantic Resources to solve Multilingual Word Sense Disambiguation</title>
-      <author><first>Yoan</first><last>Gutiérrez</last></author>
+      <author id="yoan-gutierrez"><first>Yoan</first><last>Gutiérrez</last></author>
       <author><first>Yenier</first><last>Castañeda</last></author>
       <author><first>Andy</first><last>González</last></author>
       <author><first>Rainel</first><last>Estrada</last></author>
       <author><first>Dennys D.</first><last>Piug</last></author>
-      <author><first>Jose I.</first><last>Abreu</last></author>
+      <author id="jose-i-abreu"><first>Jose I.</first><last>Abreu</last></author>
       <author><first>Roger</first><last>Pérez</last></author>
-      <author><first>Antonio</first><last>Fernández Orquín</last></author>
-      <author><first>Andrés</first><last>Montoyo</last></author>
-      <author><first>Rafael</first><last>Muñoz</last></author>
+      <author id="antonio-fernandez-orquin"><first>Antonio</first><last>Fernández Orquín</last></author>
+      <author id="andres-montoyo"><first>Andrés</first><last>Montoyo</last></author>
+      <author id="rafael-munoz"><first>Rafael</first><last>Muñoz</last></author>
       <author><first>Franc</first><last>Camara</last></author>
       <pages>241–249</pages>
       <url hash="dc9154db">S13-2042</url>
@@ -853,7 +853,7 @@
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2013 Task 3: Spatial Role Labeling</title>
       <author><first>Oleksandr</first><last>Kolomiyets</last></author>
       <author><first>Parisa</first><last>Kordjamshidi</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <author><first>Steven</first><last>Bethard</last></author>
       <pages>255–262</pages>
       <url hash="d47b3615">S13-2044</url>
@@ -861,15 +861,15 @@
     </paper>
     <paper id="45">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2013 Task 7: The Joint Student Response Analysis and 8th Recognizing Textual Entailment Challenge</title>
-      <author><first>Myroslava</first><last>Dzikovska</last></author>
-      <author><first>Rodney</first><last>Nielsen</last></author>
+      <author id="myroslava-o-dzikovska"><first>Myroslava</first><last>Dzikovska</last></author>
+      <author id="rodney-nielsen"><first>Rodney</first><last>Nielsen</last></author>
       <author><first>Chris</first><last>Brew</last></author>
       <author><first>Claudia</first><last>Leacock</last></author>
       <author><first>Danilo</first><last>Giampiccolo</last></author>
       <author><first>Luisa</first><last>Bentivogli</last></author>
       <author><first>Peter</first><last>Clark</last></author>
       <author><first>Ido</first><last>Dagan</last></author>
-      <author><first>Hoa Trang</first><last>Dang</last></author>
+      <author id="hoa-trang-dang"><first>Hoa Trang</first><last>Dang</last></author>
       <pages>263–274</pages>
       <url hash="6bb68e21">S13-2045</url>
       <bibkey>dzikovska-etal-2013-semeval</bibkey>
@@ -886,7 +886,7 @@
       <title><fixed-case>SOFTCARDINALITY</fixed-case>: Hierarchical Text Overlap for Student Response Analysis</title>
       <author><first>Sergio</first><last>Jimenez</last></author>
       <author><first>Claudia</first><last>Becerra</last></author>
-      <author><first>Alexander</first><last>Gelbukh</last></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last></author>
       <pages>280–284</pages>
       <url hash="fba82cba">S13-2047</url>
       <bibkey>jimenez-etal-2013-softcardinality-hierarchical</bibkey>
@@ -904,7 +904,7 @@
     <paper id="49">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2013 Task 13: Word Sense Induction for Graded and Non-Graded Senses</title>
       <author><first>David</first><last>Jurgens</last></author>
-      <author><first>Ioannis</first><last>Klapaftis</last></author>
+      <author id="ioannis-klapaftis"><first>Ioannis</first><last>Klapaftis</last></author>
       <pages>290–299</pages>
       <url hash="b852d5f2">S13-2049</url>
       <bibkey>jurgens-klapaftis-2013-semeval</bibkey>
@@ -923,14 +923,14 @@
       <title>unimelb: Topic Modelling-based Word Sense Induction</title>
       <author><first>Jey Han</first><last>Lau</last></author>
       <author><first>Paul</first><last>Cook</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>307–311</pages>
       <url hash="532966da">S13-2051</url>
       <bibkey>lau-etal-2013-unimelb-topic</bibkey>
     </paper>
     <paper id="52">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2013 Task 2: Sentiment Analysis in <fixed-case>T</fixed-case>witter</title>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Sara</first><last>Rosenthal</last></author>
       <author><first>Zornitsa</first><last>Kozareva</last></author>
       <author><first>Veselin</first><last>Stoyanov</last></author>
@@ -942,7 +942,7 @@
     </paper>
     <paper id="53">
       <title><fixed-case>NRC</fixed-case>-<fixed-case>C</fixed-case>anada: Building the State-of-the-Art in Sentiment Analysis of Tweets</title>
-      <author><first>Saif</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
       <author><first>Svetlana</first><last>Kiritchenko</last></author>
       <author><first>Xiaodan</first><last>Zhu</last></author>
       <pages>321–327</pages>
@@ -959,7 +959,7 @@
     </paper>
     <paper id="55">
       <title><fixed-case>AVAYA</fixed-case>: Sentiment Analysis on <fixed-case>T</fixed-case>witter with Self-Training and Polarity Lexicon Expansion</title>
-      <author><first>Lee</first><last>Becker</last></author>
+      <author id="lee-becker"><first>Lee</first><last>Becker</last></author>
       <author><first>George</first><last>Erhart</last></author>
       <author><first>David</first><last>Skiba</last></author>
       <author><first>Valentine</first><last>Matula</last></author>
@@ -969,7 +969,7 @@
     </paper>
     <paper id="56">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2013 Task 9 : Extraction of Drug-Drug Interactions from Biomedical Texts (<fixed-case>DDIE</fixed-case>xtraction 2013)</title>
-      <author><first>Isabel</first><last>Segura-Bedmar</last></author>
+      <author id="isabel-segura-bedmar"><first>Isabel</first><last>Segura-Bedmar</last></author>
       <author><first>Paloma</first><last>Martínez</last></author>
       <author><first>María</first><last>Herrero-Zazo</last></author>
       <pages>341–350</pages>
@@ -979,14 +979,14 @@
     <paper id="57">
       <title><fixed-case>FBK</fixed-case>-irst : A Multi-Phase Kernel Based Approach for Drug-Drug Interaction Detection and Classification that Exploits Linguistic Information</title>
       <author><first>Md. Faisal Mahbub</first><last>Chowdhury</last></author>
-      <author><first>Alberto</first><last>Lavelli</last></author>
+      <author id="alberto-lavelli"><first>Alberto</first><last>Lavelli</last></author>
       <pages>351–355</pages>
       <url hash="47868b85">S13-2057</url>
       <bibkey>chowdhury-lavelli-2013-fbk</bibkey>
     </paper>
     <paper id="58">
       <title><fixed-case>WBI</fixed-case>-<fixed-case>NER</fixed-case>: The impact of domain-specific features on the performance of identifying and classifying mentions of drugs</title>
-      <author><first>Tim</first><last>Rocktäschel</last></author>
+      <author id="tim-rocktaschel"><first>Tim</first><last>Rocktäschel</last></author>
       <author><first>Torsten</first><last>Huber</last></author>
       <author><first>Michael</first><last>Weidlich</last></author>
       <author><first>Ulf</first><last>Leser</last></author>
@@ -1009,7 +1009,7 @@
       <author><first>Giuseppe</first><last>Castellucci</last></author>
       <author><first>Simone</first><last>Filice</last></author>
       <author><first>Danilo</first><last>Croce</last></author>
-      <author><first>Roberto</first><last>Basili</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
       <pages>369–374</pages>
       <url hash="f1e01a43">S13-2060</url>
       <bibkey>castellucci-etal-2013-unitor</bibkey>
@@ -1026,7 +1026,7 @@
       <title>u<fixed-case>O</fixed-case>ttawa: System description for <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val 2013 Task 2 Sentiment Analysis in <fixed-case>T</fixed-case>witter</title>
       <author><first>Hamid</first><last>Poursepanj</last></author>
       <author><first>Josh</first><last>Weissbock</last></author>
-      <author><first>Diana</first><last>Inkpen</last></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last></author>
       <pages>380–383</pages>
       <url hash="2e586f18">S13-2062</url>
       <bibkey>poursepanj-etal-2013-uottawa</bibkey>
@@ -1045,7 +1045,7 @@
       <title><fixed-case>USNA</fixed-case>: A Dual-Classifier Approach to Contextual Sentiment Analysis</title>
       <author><first>Ganesh</first><last>Harihara</last></author>
       <author><first>Eugene</first><last>Yang</last></author>
-      <author><first>Nathanael</first><last>Chambers</last></author>
+      <author id="nathanael-chambers"><first>Nathanael</first><last>Chambers</last></author>
       <pages>390–394</pages>
       <url hash="a76da415">S13-2064</url>
       <bibkey>harihara-etal-2013-usna</bibkey>
@@ -1062,10 +1062,10 @@
     </paper>
     <paper id="66">
       <title><fixed-case>SINAI</fixed-case>: Machine Learning and Emotion of the Crowd for Sentiment Analysis in Microblogs</title>
-      <author><first>Eugenio</first><last>Martínez-Cámara</last></author>
+      <author id="eugenio-martinez-camara"><first>Eugenio</first><last>Martínez-Cámara</last></author>
       <author><first>Arturo</first><last>Montejo-Ráez</last></author>
-      <author><first>M. Teresa</first><last>Martín-Valdivia</last></author>
-      <author><first>L. Alfonso</first><last>Ureña-López</last></author>
+      <author id="m-teresa-martin-valdivia"><first>M. Teresa</first><last>Martín-Valdivia</last></author>
+      <author id="l-alfonso-urena-lopez"><first>L. Alfonso</first><last>Ureña-López</last></author>
       <pages>402–407</pages>
       <url hash="8413bee5">S13-2066</url>
       <bibkey>martinez-camara-etal-2013-sinai</bibkey>
@@ -1074,7 +1074,7 @@
       <title><fixed-case>ECNUCS</fixed-case>: A Surface Information Based System Description of Sentiment Analysis in <fixed-case>T</fixed-case>witter in the <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2013 (Task 2)</title>
       <author><last>Zhu</last><first>Tiantian</first></author>
       <author><last>Zhang</last><first>Fangxi</first></author>
-      <author><last>Man</last><first>Lan</first></author>
+      <author id="man-lan"><first>Lan</first><last>Man</last></author>
       <pages>408–413</pages>
       <url hash="1babc728">S13-2067</url>
       <bibkey>zhu-etal-2013-ecnucs</bibkey>
@@ -1089,7 +1089,7 @@
     <paper id="69">
       <title>[<fixed-case>LVIC</fixed-case>-<fixed-case>LIMSI</fixed-case>]: Using Syntactic Features and Multi-polarity Words for Sentiment Analysis in <fixed-case>T</fixed-case>witter</title>
       <author><first>Morgane</first><last>Marchand</last></author>
-      <author><first>Alexandru</first><last>Ginsca</last></author>
+      <author id="alexandru-lucian-ginsca"><first>Alexandru</first><last>Ginsca</last></author>
       <author><first>Romaric</first><last>Besançon</last></author>
       <author><first>Olivier</first><last>Mesnard</last></author>
       <pages>418–424</pages>
@@ -1108,7 +1108,7 @@
       <title><fixed-case>NTNU</fixed-case>: Domain Semi-Independent Short Message Sentiment Classification</title>
       <author><first>Øyvind</first><last>Selmer</last></author>
       <author><first>Mikael</first><last>Brevik</last></author>
-      <author><first>Björn</first><last>Gambäck</last></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gambäck</last></author>
       <author><first>Lars</first><last>Bungum</last></author>
       <pages>430–437</pages>
       <url hash="5e1fa414">S13-2071</url>
@@ -1119,21 +1119,21 @@
       <author><first>Nikolaos</first><last>Malandrakis</last></author>
       <author><first>Abe</first><last>Kazemzadeh</last></author>
       <author><first>Alexandros</first><last>Potamianos</last></author>
-      <author><first>Shrikanth</first><last>Narayanan</last></author>
+      <author id="shrikanth-narayanan"><first>Shrikanth</first><last>Narayanan</last></author>
       <pages>438–442</pages>
       <url hash="f1302e8f">S13-2072</url>
       <bibkey>malandrakis-etal-2013-sail</bibkey>
     </paper>
     <paper id="73">
       <title><fixed-case>UMCC</fixed-case>_<fixed-case>DLSI</fixed-case>-(<fixed-case>SA</fixed-case>): Using a ranking algorithm and informal features to solve Sentiment Analysis in <fixed-case>T</fixed-case>witter</title>
-      <author><first>Yoan</first><last>Gutiérrez</last></author>
+      <author id="yoan-gutierrez"><first>Yoan</first><last>Gutiérrez</last></author>
       <author><first>Andy</first><last>González</last></author>
       <author><first>Roger</first><last>Pérez</last></author>
-      <author><first>José I.</first><last>Abreu</last></author>
-      <author><first>Antonio</first><last>Fernández Orquín</last></author>
+      <author id="jose-i-abreu"><first>José I.</first><last>Abreu</last></author>
+      <author id="antonio-fernandez-orquin"><first>Antonio</first><last>Fernández Orquín</last></author>
       <author><first>Alejandro</first><last>Mosquera</last></author>
-      <author><first>Andrés</first><last>Montoyo</last></author>
-      <author><first>Rafael</first><last>Muñoz</last></author>
+      <author id="andres-montoyo"><first>Andrés</first><last>Montoyo</last></author>
+      <author id="rafael-munoz"><first>Rafael</first><last>Muñoz</last></author>
       <author><first>Franc</first><last>Camara</last></author>
       <pages>443–449</pages>
       <url hash="4609c35f">S13-2073</url>
@@ -1149,15 +1149,15 @@
     <paper id="75">
       <title>Experiments with <fixed-case>DB</fixed-case>pedia, <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et and <fixed-case>S</fixed-case>enti<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et as resources for sentiment analysis in micro-blogging</title>
       <author><first>Hussam</first><last>Hamdan</last></author>
-      <author><first>Frederic</first><last>Béchet</last></author>
-      <author><first>Patrice</first><last>Bellot</last></author>
+      <author id="frederic-bechet"><first>Frederic</first><last>Béchet</last></author>
+      <author id="patrice-bellot"><first>Patrice</first><last>Bellot</last></author>
       <pages>455–459</pages>
       <url hash="94039c93">S13-2075</url>
       <bibkey>hamdan-etal-2013-experiments</bibkey>
     </paper>
     <paper id="76">
       <title><fixed-case>OPTWIMA</fixed-case>: Comparing Knowledge-rich and Knowledge-poor Approaches for Sentiment Analysis in Short Informal Texts</title>
-      <author><first>Alexandra</first><last>Balahur</last></author>
+      <author id="alexandra-balahur"><first>Alexandra</first><last>Balahur</last></author>
       <pages>460–465</pages>
       <url hash="c80966e9">S13-2076</url>
       <bibkey>balahur-2013-optwima</bibkey>
@@ -1167,7 +1167,7 @@
       <author><first>Md. Faisal Mahbub</first><last>Chowdhury</last></author>
       <author><first>Marco</first><last>Guerini</last></author>
       <author><first>Sara</first><last>Tonelli</last></author>
-      <author><first>Alberto</first><last>Lavelli</last></author>
+      <author id="alberto-lavelli"><first>Alberto</first><last>Lavelli</last></author>
       <pages>466–470</pages>
       <url hash="98ce3bf6">S13-2077</url>
       <bibkey>chowdhury-etal-2013-fbk</bibkey>
@@ -1178,7 +1178,7 @@
       <author><first>Rahim</first><last>Dehkharghani</last></author>
       <author><first>Berrin</first><last>Yanikoglu</last></author>
       <author><first>Dilek</first><last>Tapucu</last></author>
-      <author><first>Yucel</first><last>Saygin</last></author>
+      <author id="yucel-saygin"><first>Yucel</first><last>Saygin</last></author>
       <pages>471–477</pages>
       <url hash="b784a846">S13-2078</url>
       <bibkey>gezici-etal-2013-su</bibkey>
@@ -1186,20 +1186,20 @@
     <paper id="79">
       <title><fixed-case>C</fixed-case>olumbia <fixed-case>NLP</fixed-case>: Sentiment Detection of Subjective Phrases in Social Media</title>
       <author><first>Sara</first><last>Rosenthal</last></author>
-      <author><first>Kathy</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathy</first><last>McKeown</last></author>
       <pages>478–482</pages>
       <url hash="9822fa03">S13-2079</url>
       <bibkey>rosenthal-mckeown-2013-columbia</bibkey>
     </paper>
     <paper id="80">
       <title><fixed-case>FBM</fixed-case>: Combining lexicon-based <fixed-case>ML</fixed-case> and heuristics for Social Media Polarities</title>
-      <author><first>Carlos</first><last>Rodríguez-Penagos</last></author>
-      <author><first>Jordi</first><last>Atserias Batalla</last></author>
-      <author><first>Joan</first><last>Codina-Filbà</last></author>
+      <author id="carlos-rodriguez-penagos"><first>Carlos</first><last>Rodríguez-Penagos</last></author>
+      <author id="jordi-atserias"><first>Jordi</first><last>Atserias Batalla</last></author>
+      <author id="joan-codina-filba"><first>Joan</first><last>Codina-Filbà</last></author>
       <author><first>David</first><last>García-Narbona</last></author>
       <author><first>Jens</first><last>Grivolla</last></author>
       <author><first>Patrik</first><last>Lambert</last></author>
-      <author><first>Roser</first><last>Saurí</last></author>
+      <author id="roser-sauri"><first>Roser</first><last>Saurí</last></author>
       <pages>483–489</pages>
       <url hash="457cad06">S13-2080</url>
       <bibkey>rodriguez-penagos-etal-2013-fbm</bibkey>
@@ -1209,8 +1209,8 @@
       <author><first>Silvio</first><last>Moreira</last></author>
       <author><first>João</first><last>Filgueiras</last></author>
       <author><first>Bruno</first><last>Martins</last></author>
-      <author><first>Francisco</first><last>Couto</last></author>
-      <author><first>Mário J.</first><last>Silva</last></author>
+      <author id="francisco-m-couto"><first>Francisco</first><last>Couto</last></author>
+      <author id="mario-j-silva"><first>Mário J.</first><last>Silva</last></author>
       <pages>490–494</pages>
       <url hash="2239b5fd">S13-2081</url>
       <bibkey>moreira-etal-2013-reaction</bibkey>
@@ -1219,7 +1219,7 @@
       <title><fixed-case>IITB</fixed-case>-Sentiment-Analysts: Participation in Sentiment Analysis in <fixed-case>T</fixed-case>witter <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val 2013 Task</title>
       <author><first>Karan</first><last>Chawla</last></author>
       <author><first>Ankit</first><last>Ramteke</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>495–500</pages>
       <url hash="3a9294c5">S13-2082</url>
       <bibkey>chawla-etal-2013-iitb</bibkey>
@@ -1228,8 +1228,8 @@
       <title><fixed-case>SSA</fixed-case>-<fixed-case>UO</fixed-case>: Unsupervised Sentiment Analysis in <fixed-case>T</fixed-case>witter</title>
       <author><first>Reynier</first><last>Ortega Bueno</last></author>
       <author><first>Adrian</first><last>Fonseca Bruzón</last></author>
-      <author><first>Yoan</first><last>Gutiérrez</last></author>
-      <author><first>Andrés</first><last>Montoyo</last></author>
+      <author id="yoan-gutierrez"><first>Yoan</first><last>Gutiérrez</last></author>
+      <author id="andres-montoyo"><first>Andrés</first><last>Montoyo</last></author>
       <pages>501–507</pages>
       <url hash="405f37a4">S13-2083</url>
       <bibkey>ortega-bueno-etal-2013-ssa</bibkey>
@@ -1259,7 +1259,7 @@
       <title><fixed-case>C</fixed-case>ode<fixed-case>X</fixed-case>: Combining an <fixed-case>SVM</fixed-case> Classifier and Character N-gram Language Models for Sentiment Analysis on <fixed-case>T</fixed-case>witter Text</title>
       <author><first>Qi</first><last>Han</last></author>
       <author><first>Junfei</first><last>Guo</last></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <pages>520–524</pages>
       <url hash="533eab23">S13-2086</url>
       <bibkey>han-etal-2013-codex</bibkey>
@@ -1284,7 +1284,7 @@
     <paper id="89">
       <title><fixed-case>U</fixed-case>o<fixed-case>M</fixed-case>: Using Explicit Semantic Analysis for Classifying Sentiments</title>
       <author><first>Sapna</first><last>Negi</last></author>
-      <author><first>Michael</first><last>Rosner</last></author>
+      <author id="michael-rosner"><first>Michael</first><last>Rosner</last></author>
       <pages>535–538</pages>
       <url hash="0114cbdb">S13-2089</url>
       <bibkey>negi-rosner-2013-uom</bibkey>
@@ -1309,7 +1309,7 @@
       <title><fixed-case>SZTE</fixed-case>-<fixed-case>NLP</fixed-case>: Sentiment Detection on <fixed-case>T</fixed-case>witter Messages</title>
       <author><first>Viktor</first><last>Hangya</last></author>
       <author><first>Gábor</first><last>Berend</last></author>
-      <author><first>Richárd</first><last>Farkas</last></author>
+      <author id="richard-farkas"><first>Richárd</first><last>Farkas</last></author>
       <pages>549–553</pages>
       <url hash="397d1b3f">S13-2092</url>
       <bibkey>hangya-etal-2013-szte</bibkey>
@@ -1318,8 +1318,8 @@
       <title><fixed-case>BOUNCE</fixed-case>: Sentiment Classification in <fixed-case>T</fixed-case>witter using Rich Feature Sets</title>
       <author><first>Nadin</first><last>Kökciyan</last></author>
       <author><first>Arda</first><last>Çelebi</last></author>
-      <author><first>Arzucan</first><last>Özgür</last></author>
-      <author><first>Suzan</first><last>Üsküdarlı</last></author>
+      <author id="arzucan-ozgur"><first>Arzucan</first><last>Özgür</last></author>
+      <author id="suzan-uskudarli"><first>Suzan</first><last>Üsküdarlı</last></author>
       <pages>554–561</pages>
       <url hash="388b60c1">S13-2093</url>
       <bibkey>kokciyan-etal-2013-bounce</bibkey>
@@ -1327,7 +1327,7 @@
     <paper id="94">
       <title>nlp.cs.aueb.gr: Two Stage Sentiment Analysis</title>
       <author><first>Prodromos</first><last>Malakasiotis</last></author>
-      <author><first>Rafael Michael</first><last>Karampatsis</last></author>
+      <author id="rafael-michael-karampatsis"><first>Rafael Michael</first><last>Karampatsis</last></author>
       <author><first>Konstantina</first><last>Makrynioti</last></author>
       <author><first>John</first><last>Pavlopoulos</last></author>
       <pages>562–567</pages>
@@ -1336,7 +1336,7 @@
     </paper>
     <paper id="95">
       <title><fixed-case>NILC</fixed-case>_<fixed-case>USP</fixed-case>: A Hybrid System for Sentiment Analysis in <fixed-case>T</fixed-case>witter Messages</title>
-      <author><first>Pedro</first><last>Balage Filho</last></author>
+      <author id="pedro-balage-filho"><first>Pedro</first><last>Balage Filho</last></author>
       <author><first>Thiago</first><last>Pardo</last></author>
       <pages>568–572</pages>
       <url hash="96aa1fde">S13-2095</url>
@@ -1346,7 +1346,7 @@
       <title><fixed-case>UNITOR</fixed-case>-<fixed-case>HMM</fixed-case>-<fixed-case>TK</fixed-case>: Structured Kernel-based learning for Spatial Role Labeling</title>
       <author><first>Emanuele</first><last>Bastianelli</last></author>
       <author><first>Danilo</first><last>Croce</last></author>
-      <author><first>Roberto</first><last>Basili</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
       <author><first>Daniele</first><last>Nardi</last></author>
       <pages>573–579</pages>
       <url hash="1a72449e">S13-2096</url>
@@ -1355,16 +1355,16 @@
     <paper id="97">
       <title><fixed-case>EHU</fixed-case>-<fixed-case>ALM</fixed-case>: Similarity-Feature Based Approach for Student Response Analysis</title>
       <author><first>Itziar</first><last>Aldabe</last></author>
-      <author><first>Montse</first><last>Maritxalar</last></author>
-      <author><first>Oier</first><last>Lopez de Lacalle</last></author>
+      <author id="montse-maritxalar"><first>Montse</first><last>Maritxalar</last></author>
+      <author id="oier-lopez-de-lacalle"><first>Oier</first><last>Lopez de Lacalle</last></author>
       <pages>580–584</pages>
       <url hash="da13a9b7">S13-2097</url>
       <bibkey>aldabe-etal-2013-ehu</bibkey>
     </paper>
     <paper id="98">
       <title><fixed-case>CNGL</fixed-case>: Grading Student Answers by Acts of Translation</title>
-      <author><first>Ergun</first><last>Biçici</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="ergun-bicici"><first>Ergun</first><last>Biçici</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>585–591</pages>
       <url hash="96e18143">S13-2098</url>
       <bibkey>bicici-van-genabith-2013-cngl-grading</bibkey>
@@ -1372,7 +1372,7 @@
     <paper id="99">
       <title><fixed-case>C</fixed-case>eli: <fixed-case>EDITS</fixed-case> and Generic Text Pair Classification</title>
       <author><first>Milen</first><last>Kouylekov</last></author>
-      <author><first>Luca</first><last>Dini</last></author>
+      <author id="luca-dini"><first>Luca</first><last>Dini</last></author>
       <author><first>Alessio</first><last>Bosca</last></author>
       <author><first>Marco</first><last>Trevisan</last></author>
       <pages>592–597</pages>
@@ -1401,21 +1401,21 @@
       <author><first>Niels</first><last>Ott</last></author>
       <author><first>Ramon</first><last>Ziai</last></author>
       <author><first>Michael</first><last>Hahn</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <pages>608–616</pages>
       <url hash="711f3d6b">S13-2102</url>
       <bibkey>ott-etal-2013-comet</bibkey>
     </paper>
     <paper id="103">
       <title><fixed-case>UC</fixed-case>3<fixed-case>M</fixed-case>: A kernel-based approach to identify and classify <fixed-case>DDI</fixed-case>s in bio-medical texts.</title>
-      <author><first>Daniel</first><last>Sanchez-Cisneros</last></author>
+      <author id="daniel-sanchez-cisneros"><first>Daniel</first><last>Sanchez-Cisneros</last></author>
       <pages>617–621</pages>
       <url hash="980ae6ae">S13-2103</url>
       <bibkey>sanchez-cisneros-2013-uc3m</bibkey>
     </paper>
     <paper id="104">
       <title><fixed-case>UEM</fixed-case>-<fixed-case>UC</fixed-case>3<fixed-case>M</fixed-case>: An Ontology-based named entity recognition system for biomedical texts.</title>
-      <author><first>Daniel</first><last>Sanchez-Cisneros</last></author>
+      <author id="daniel-sanchez-cisneros"><first>Daniel</first><last>Sanchez-Cisneros</last></author>
       <author><first>Fernando</first><last>Aparicio Gali</last></author>
       <pages>622–627</pages>
       <url hash="acbd3f65">S13-2104</url>
@@ -1425,7 +1425,7 @@
       <title><fixed-case>WBI</fixed-case>-<fixed-case>DDI</fixed-case>: Drug-Drug Interaction Extraction using Majority Voting</title>
       <author><first>Philippe</first><last>Thomas</last></author>
       <author><first>Mariana</first><last>Neves</last></author>
-      <author><first>Tim</first><last>Rocktäschel</last></author>
+      <author id="tim-rocktaschel"><first>Tim</first><last>Rocktäschel</last></author>
       <author><first>Ulf</first><last>Leser</last></author>
       <pages>628–635</pages>
       <url hash="23945ed0">S13-2105</url>
@@ -1436,12 +1436,12 @@
       <author><first>Armando</first><last>Collazo</last></author>
       <author><first>Alberto</first><last>Ceballo</last></author>
       <author><first>Dennys D.</first><last>Puig</last></author>
-      <author><first>Yoan</first><last>Gutiérrez</last></author>
-      <author><first>José I.</first><last>Abreu</last></author>
+      <author id="yoan-gutierrez"><first>Yoan</first><last>Gutiérrez</last></author>
+      <author id="jose-i-abreu"><first>José I.</first><last>Abreu</last></author>
       <author><first>Roger</first><last>Pérez</last></author>
-      <author><first>Antonio</first><last>Fernández Orquín</last></author>
-      <author><first>Andrés</first><last>Montoyo</last></author>
-      <author><first>Rafael</first><last>Muñoz</last></author>
+      <author id="antonio-fernandez-orquin"><first>Antonio</first><last>Fernández Orquín</last></author>
+      <author id="andres-montoyo"><first>Andrés</first><last>Montoyo</last></author>
+      <author id="rafael-munoz"><first>Rafael</first><last>Muñoz</last></author>
       <author><first>Franc</first><last>Camara</last></author>
       <pages>636–643</pages>
       <url hash="6b655f7b">S13-2106</url>
@@ -1468,7 +1468,7 @@
       <title><fixed-case>LASIGE</fixed-case>: using Conditional Random Fields and <fixed-case>C</fixed-case>h<fixed-case>EBI</fixed-case> ontology</title>
       <author><first>Tiago</first><last>Grego</last></author>
       <author><first>Francisco</first><last>Pinto</last></author>
-      <author><first>Francisco M.</first><last>Couto</last></author>
+      <author id="francisco-m-couto"><first>Francisco M.</first><last>Couto</last></author>
       <pages>660–666</pages>
       <url hash="a21113c9">S13-2109</url>
       <bibkey>grego-etal-2013-lasige</bibkey>
@@ -1476,7 +1476,7 @@
     <paper id="110">
       <title><fixed-case>UWM</fixed-case>-<fixed-case>TRIADS</fixed-case>: Classifying Drug-Drug Interactions with Two-Stage <fixed-case>SVM</fixed-case> and Post-Processing</title>
       <author><first>Majid</first><last>Rastegar-Mojarad</last></author>
-      <author><first>Richard D.</first><last>Boyce</last></author>
+      <author id="richard-d-boyce"><first>Richard D.</first><last>Boyce</last></author>
       <author><first>Rashmi</first><last>Prasad</last></author>
       <pages>667–674</pages>
       <url hash="aac4dcf7">S13-2110</url>
@@ -1484,18 +1484,18 @@
     </paper>
     <paper id="111">
       <title><fixed-case>SCAI</fixed-case>: Extracting drug-drug interactions using a rich feature vector</title>
-      <author><first>Tamara</first><last>Bobić</last></author>
+      <author id="tamara-bobic"><first>Tamara</first><last>Bobić</last></author>
       <author><first>Juliane</first><last>Fluck</last></author>
-      <author><first>Martin</first><last>Hofmann-Apitius</last></author>
+      <author id="martin-hofmann-apitius"><first>Martin</first><last>Hofmann-Apitius</last></author>
       <pages>675–683</pages>
       <url hash="a1e016a6">S13-2111</url>
       <bibkey>bobic-etal-2013-scai</bibkey>
     </paper>
     <paper id="112">
       <title><fixed-case>UC</fixed-case>olorado_<fixed-case>SOM</fixed-case>: Extraction of Drug-Drug Interactions from Biomedical Text using Knowledge-rich and Knowledge-poor Features</title>
-      <author><first>Negacy</first><last>Hailu</last></author>
-      <author><first>Lawrence E.</first><last>Hunter</last></author>
-      <author><first>K. Bretonnel</first><last>Cohen</last></author>
+      <author id="negacy-hailu"><first>Negacy</first><last>Hailu</last></author>
+      <author id="lawrence-hunter"><first>Lawrence E.</first><last>Hunter</last></author>
+      <author id="k-bretonnel-cohen"><first>K. Bretonnel</first><last>Cohen</last></author>
       <pages>684–688</pages>
       <url hash="40dee419">S13-2112</url>
       <bibkey>hailu-etal-2013-ucolorado</bibkey>
diff --git a/data/xml/S14.xml b/data/xml/S14.xml
index d2a50d657e..8273c26364 100644
--- a/data/xml/S14.xml
+++ b/data/xml/S14.xml
@@ -20,11 +20,11 @@
     </frontmatter>
     <paper id="1">
       <title>More or less supervised supersense tagging of <fixed-case>T</fixed-case>witter</title>
-      <author><first>Anders</first><last>Johannsen</last></author>
+      <author id="anders-johannsen"><first>Anders</first><last>Johannsen</last></author>
       <author><first>Dirk</first><last>Hovy</last></author>
-      <author><first>Héctor</first><last>Martínez Alonso</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="hector-martinez-alonso"><first>Héctor</first><last>Martínez Alonso</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>1–11</pages>
       <url hash="0e5d1fed">S14-1001</url>
       <doi>10.3115/v1/S14-1001</doi>
@@ -34,7 +34,7 @@
       <title>Generating a Word-Emotion Lexicon from #Emotional Tweets</title>
       <author><first>Anil</first><last>Bandhakavi</last></author>
       <author><first>Nirmalie</first><last>Wiratunga</last></author>
-      <author><first>Deepak</first><last>P</last></author>
+      <author id="deepak-p"><first>Deepak</first><last>P</last></author>
       <author><first>Stewart</first><last>Massie</last></author>
       <pages>12–21</pages>
       <url hash="491c968c">S14-1002</url>
@@ -101,7 +101,7 @@
       <title>An analysis of textual inference in <fixed-case>G</fixed-case>erman customer emails</title>
       <author><first>Kathrin</first><last>Eichler</last></author>
       <author><first>Aleksandra</first><last>Gabryszak</last></author>
-      <author><first>Günter</first><last>Neumann</last></author>
+      <author id="gunter-neumann"><first>Günter</first><last>Neumann</last></author>
       <pages>69–74</pages>
       <url hash="ed475b68">S14-1009</url>
       <doi>10.3115/v1/S14-1009</doi>
@@ -140,9 +140,9 @@
     </paper>
     <paper id="13">
       <title>Compositional Distributional Semantics Models in Chunk-based Smoothed Tree Kernels</title>
-      <author><first>Nghia The</first><last>Pham</last></author>
+      <author id="nghia-the-pham"><first>Nghia The</first><last>Pham</last></author>
       <author><first>Lorenzo</first><last>Ferrone</last></author>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last></author>
       <pages>93–98</pages>
       <url hash="5a660311">S14-1013</url>
       <doi>10.3115/v1/S14-1013</doi>
@@ -150,7 +150,7 @@
     </paper>
     <paper id="14">
       <title>Generating Simulations of Motion Events from Verbal Descriptions</title>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <author><first>Nikhil</first><last>Krishnaswamy</last></author>
       <pages>99–109</pages>
       <url hash="38211bec">S14-1014</url>
@@ -162,7 +162,7 @@
       <author><first>Mark</first><last>Yatskar</last></author>
       <author><first>Michel</first><last>Galley</last></author>
       <author><first>Lucy</first><last>Vanderwende</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>110–120</pages>
       <url hash="8b605a59">S14-1015</url>
       <doi>10.3115/v1/S14-1015</doi>
@@ -201,7 +201,7 @@
     </paper>
     <paper id="19">
       <title>Vagueness and Learning: A Type-Theoretic Approach</title>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <author><first>Staffan</first><last>Larsson</last></author>
       <pages>151–159</pages>
       <url hash="e87a571a">S14-1019</url>
@@ -212,7 +212,7 @@
       <title>Contrasting Syntagmatic and Paradigmatic Relations: Insights from Distributional Semantic Models</title>
       <author><first>Gabriella</first><last>Lapesa</last></author>
       <author><first>Stefan</first><last>Evert</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>160–170</pages>
       <url hash="fb00e76b">S14-1020</url>
       <doi>10.3115/v1/S14-1020</doi>
@@ -220,8 +220,8 @@
     </paper>
     <paper id="21">
       <title>Dead parrots make bad pets: Exploring modifier effects in noun phrases</title>
-      <author><first>Germán</first><last>Kruszewski</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="german-kruszewski"><first>Germán</first><last>Kruszewski</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <pages>171–181</pages>
       <url hash="d003ec40">S14-1021</url>
       <doi>10.3115/v1/S14-1021</doi>
@@ -230,7 +230,7 @@
     <paper id="22">
       <title>Syntactic Transfer Patterns of <fixed-case>G</fixed-case>erman Particle Verbs and their Impact on Lexical Semantics</title>
       <author><first>Stefan</first><last>Bott</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>182–192</pages>
       <url hash="60c417df">S14-1022</url>
       <doi>10.3115/v1/S14-1022</doi>
@@ -241,7 +241,7 @@
     <meta>
       <booktitle>Proceedings of the 8th International Workshop on Semantic Evaluation (<fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val 2014)</booktitle>
       <url hash="14dae627">S14-2</url>
-      <editor><first>Preslav</first><last>Nakov</last></editor>
+      <editor id="preslav-nakov"><first>Preslav</first><last>Nakov</last></editor>
       <editor><first>Torsten</first><last>Zesch</last></editor>
       <doi>10.3115/v1/S14-2</doi>
       <publisher>Association for Computational Linguistics</publisher>
@@ -258,8 +258,8 @@
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2014 Task 1: Evaluation of Compositional Distributional Semantic Models on Full Sentences through Semantic Relatedness and Textual Entailment</title>
       <author><first>Marco</first><last>Marelli</last></author>
       <author><first>Luisa</first><last>Bentivogli</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
-      <author><first>Raffaella</first><last>Bernardi</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
+      <author id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last></author>
       <author><first>Stefano</first><last>Menini</last></author>
       <author><first>Roberto</first><last>Zamparelli</last></author>
       <pages>1–8</pages>
@@ -291,9 +291,9 @@
     <paper id="4">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2014 Task 4: Aspect Based Sentiment Analysis</title>
       <author><first>Maria</first><last>Pontiki</last></author>
-      <author><first>Dimitris</first><last>Galanis</last></author>
+      <author id="dimitrios-galanis"><first>Dimitris</first><last>Galanis</last></author>
       <author><first>John</first><last>Pavlopoulos</last></author>
-      <author><first>Harris</first><last>Papageorgiou</last></author>
+      <author id="harris-papageorgiou"><first>Harris</first><last>Papageorgiou</last></author>
       <author><first>Ion</first><last>Androutsopoulos</last></author>
       <author><first>Suresh</first><last>Manandhar</last></author>
       <pages>27–35</pages>
@@ -305,9 +305,9 @@
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val 2014 Task 5 - <fixed-case>L</fixed-case>2 Writing Assistant</title>
       <author><first>Maarten</first><last>van Gompel</last></author>
       <author><first>Iris</first><last>Hendrickx</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <author><first>Els</first><last>Lefever</last></author>
-      <author><first>Véronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Véronique</first><last>Hoste</last></author>
       <pages>36–44</pages>
       <url hash="ac23a34a">S14-2005</url>
       <doi>10.3115/v1/S14-2005</doi>
@@ -323,9 +323,9 @@
     </paper>
     <paper id="7">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2014 Task 7: Analysis of Clinical Text</title>
-      <author><first>Sameer</first><last>Pradhan</last></author>
-      <author><first>Noémie</first><last>Elhadad</last></author>
-      <author><first>Wendy</first><last>Chapman</last></author>
+      <author id="sameer-pradhan"><first>Sameer</first><last>Pradhan</last></author>
+      <author id="noemie-elhadad"><first>Noémie</first><last>Elhadad</last></author>
+      <author id="wendy-chapman"><first>Wendy</first><last>Chapman</last></author>
       <author><first>Suresh</first><last>Manandhar</last></author>
       <author><first>Guergana</first><last>Savova</last></author>
       <pages>54–62</pages>
@@ -338,9 +338,9 @@
       <author><first>Stephan</first><last>Oepen</last></author>
       <author><first>Marco</first><last>Kuhlmann</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
-      <author><first>Daniel</first><last>Zeman</last></author>
-      <author><first>Dan</first><last>Flickinger</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
+      <author id="dan-flickinger"><first>Dan</first><last>Flickinger</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
       <author><first>Angelina</first><last>Ivanova</last></author>
       <author><first>Yi</first><last>Zhang</last></author>
       <pages>63–72</pages>
@@ -361,16 +361,16 @@
     </paper>
     <paper id="10">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2014 Task 10: Multilingual Semantic Textual Similarity</title>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <author><first>Carmen</first><last>Banea</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <author><first>Daniel</first><last>Cer</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
-      <author><first>Aitor</first><last>Gonzalez-Agirre</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
+      <author id="aitor-gonzalez-agirre"><first>Aitor</first><last>Gonzalez-Agirre</last></author>
       <author><first>Weiwei</first><last>Guo</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
-      <author><first>German</first><last>Rigau</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <pages>81–91</pages>
       <url hash="684c01b9">S14-2010</url>
       <doi>10.3115/v1/S14-2010</doi>
@@ -387,8 +387,8 @@
     <paper id="12">
       <title><fixed-case>A</fixed-case>lpage: Transition-based Semantic Graph Parsing with Syntactic Features</title>
       <author><first>Corentin</first><last>Ribeyre</last></author>
-      <author><first>Eric</first><last>Villemonte de la Clergerie</last></author>
-      <author><first>Djamé</first><last>Seddah</last></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Eric</first><last>Villemonte de la Clergerie</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
       <pages>97–103</pages>
       <url hash="e144c6c7">S14-2012</url>
       <doi>10.3115/v1/S14-2012</doi>
@@ -410,7 +410,7 @@
       <author><first>Svetlana</first><last>Stoyanchev</last></author>
       <author><first>Hyuckchul</first><last>Jung</last></author>
       <author><first>John</first><last>Chen</last></author>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
       <pages>109–113</pages>
       <url hash="b2e8bef2">S14-2014</url>
       <doi>10.3115/v1/S14-2014</doi>
@@ -418,7 +418,7 @@
     </paper>
     <paper id="15">
       <title><fixed-case>AUEB</fixed-case>: Two Stage Sentiment Analysis of Social Network Messages</title>
-      <author><first>Rafael Michael</first><last>Karampatsis</last></author>
+      <author id="rafael-michael-karampatsis"><first>Rafael Michael</first><last>Karampatsis</last></author>
       <author><first>John</first><last>Pavlopoulos</last></author>
       <author><first>Prodromos</first><last>Malakasiotis</last></author>
       <pages>114–118</pages>
@@ -428,8 +428,8 @@
     </paper>
     <paper id="16">
       <title>Bielefeld <fixed-case>SC</fixed-case>: Orthonormal Topic Modelling for Grammar Induction</title>
-      <author><first>John Philip</first><last>McCrae</last></author>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="john-philip-mccrae"><first>John Philip</first><last>McCrae</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <pages>119–122</pages>
       <url hash="86f43c56">S14-2016</url>
       <doi>10.3115/v1/S14-2016</doi>
@@ -459,7 +459,7 @@
       <title><fixed-case>B</fixed-case>ioinformatics<fixed-case>UA</fixed-case>: Concept Recognition in Clinical Narratives Using a Modular and Highly Efficient Text Processing Framework</title>
       <author><first>Sérgio</first><last>Matos</last></author>
       <author><first>Tiago</first><last>Nunes</last></author>
-      <author><first>José Luís</first><last>Oliveira</last></author>
+      <author id="jose-luis-oliveira"><first>José Luís</first><last>Oliveira</last></author>
       <pages>135–139</pages>
       <url hash="17ef2ae4">S14-2019</url>
       <doi>10.3115/v1/S14-2019</doi>
@@ -476,9 +476,9 @@
     </paper>
     <paper id="21">
       <title><fixed-case>BUAP</fixed-case>: Evaluating Compositional Distributional Semantic Models on Full Sentences through Semantic Relatedness and Textual Entailment</title>
-      <author><first>Saúl</first><last>León</last></author>
-      <author><first>Darnes</first><last>Vilariño</last></author>
-      <author><first>David</first><last>Pinto</last></author>
+      <author id="saul-leon"><first>Saúl</first><last>León</last></author>
+      <author id="darnes-vilarino"><first>Darnes</first><last>Vilariño</last></author>
+      <author id="david-pinto"><first>David</first><last>Pinto</last></author>
       <author><first>Mireya</first><last>Tovar</last></author>
       <author><first>Beatriz</first><last>Beltrán</last></author>
       <pages>145–148</pages>
@@ -488,9 +488,9 @@
     </paper>
     <paper id="22">
       <title><fixed-case>BUAP</fixed-case>: Evaluating Features for Multilingual and Cross-Level Semantic Textual Similarity</title>
-      <author><first>Darnes</first><last>Vilariño</last></author>
-      <author><first>David</first><last>Pinto</last></author>
-      <author><first>Saúl</first><last>León</last></author>
+      <author id="darnes-vilarino"><first>Darnes</first><last>Vilariño</last></author>
+      <author id="david-pinto"><first>David</first><last>Pinto</last></author>
+      <author id="saul-leon"><first>Saúl</first><last>León</last></author>
       <author><first>Mireya</first><last>Tovar</last></author>
       <author><first>Beatriz</first><last>Beltrán</last></author>
       <pages>149–153</pages>
@@ -500,9 +500,9 @@
     </paper>
     <paper id="23">
       <title><fixed-case>BUAP</fixed-case>: Polarity Classification of Short Texts</title>
-      <author><first>David</first><last>Pinto</last></author>
-      <author><first>Darnes</first><last>Vilariño</last></author>
-      <author><first>Saul</first><last>León</last></author>
+      <author id="david-pinto"><first>David</first><last>Pinto</last></author>
+      <author id="darnes-vilarino"><first>Darnes</first><last>Vilariño</last></author>
+      <author id="saul-leon"><first>Saul</first><last>León</last></author>
       <author><first>Miguel</first><last>Jasso</last></author>
       <author><first>Cupertino</first><last>Lucero</last></author>
       <pages>154–159</pages>
@@ -523,7 +523,7 @@
       <author><first>João</first><last>Leal</last></author>
       <author><first>Sara</first><last>Pinto</last></author>
       <author><first>Ana</first><last>Bento</last></author>
-      <author><first>Hugo</first><last>Gonçalo Oliveira</last></author>
+      <author id="hugo-goncalo-oliveira"><first>Hugo</first><last>Gonçalo Oliveira</last></author>
       <author><first>Paulo</first><last>Gomes</last></author>
       <pages>166–170</pages>
       <url hash="fbd96116">S14-2025</url>
@@ -548,8 +548,8 @@
       <author><first>Jesse</first><last>Dodge</last></author>
       <author><first>Swabha</first><last>Swayamdipta</last></author>
       <author><first>Nathan</first><last>Schneider</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>176–180</pages>
       <url hash="5df65a28">S14-2027</url>
       <doi>10.3115/v1/S14-2027</doi>
@@ -581,7 +581,7 @@
     </paper>
     <paper id="30">
       <title><fixed-case>CNRC</fixed-case>-<fixed-case>TMT</fixed-case>: Second Language Writing Assistant System Description</title>
-      <author><first>Cyril</first><last>Goutte</last></author>
+      <author id="cyril-goutte"><first>Cyril</first><last>Goutte</last></author>
       <author><first>Michel</first><last>Simard</last></author>
       <author><first>Marine</first><last>Carpuat</last></author>
       <pages>192–197</pages>
@@ -592,7 +592,7 @@
     <paper id="31">
       <title><fixed-case>C</fixed-case>olumbia <fixed-case>NLP</fixed-case>: Sentiment Detection of Sentences and Subjective Phrases in Social Media</title>
       <author><first>Sara</first><last>Rosenthal</last></author>
-      <author><first>Kathy</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathy</first><last>McKeown</last></author>
       <author><first>Apoorv</first><last>Agarwal</last></author>
       <pages>198–202</pages>
       <url hash="3c7b348b">S14-2031</url>
@@ -624,12 +624,12 @@
     <paper id="34">
       <title>Copenhagen-Malmö: Tree Approximations of Semantic Parsing Problems</title>
       <author><first>Natalie</first><last>Schluter</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <author><first>Jakob</first><last>Elming</last></author>
       <author><first>Dirk</first><last>Hovy</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
-      <author><first>Héctor</first><last>Martínez Alonso</last></author>
-      <author><first>Anders</first><last>Johanssen</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
+      <author id="hector-martinez-alonso"><first>Héctor</first><last>Martínez Alonso</last></author>
+      <author id="anders-johannsen"><first>Anders</first><last>Johanssen</last></author>
       <author><first>Sigrid</first><last>Klerke</last></author>
       <pages>213–217</pages>
       <url hash="1e9bc896">S14-2034</url>
@@ -650,7 +650,7 @@
       <title><fixed-case>DCU</fixed-case>: Aspect-based Polarity Classification for <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val Task 4</title>
       <author><first>Joachim</first><last>Wagner</last></author>
       <author><first>Piyush</first><last>Arora</last></author>
-      <author><first>Santiago</first><last>Cortes</last></author>
+      <author id="santiago-cortes-vaillo"><first>Santiago</first><last>Cortes</last></author>
       <author><first>Utsab</first><last>Barman</last></author>
       <author><first>Dasha</first><last>Bogdanova</last></author>
       <author><first>Jennifer</first><last>Foster</last></author>
@@ -663,7 +663,7 @@
     <paper id="37">
       <title><fixed-case>DIT</fixed-case>: Summarisation and Semantic Expansion in Evaluating Semantic Similarity</title>
       <author><first>Magdalena</first><last>Kacmajor</last></author>
-      <author><first>John D.</first><last>Kelleher</last></author>
+      <author id="john-kelleher"><first>John D.</first><last>Kelleher</last></author>
       <pages>230–234</pages>
       <url hash="333fa56a">S14-2037</url>
       <doi>10.3115/v1/S14-2037</doi>
@@ -680,7 +680,7 @@
     </paper>
     <paper id="39">
       <title><fixed-case>DLS</fixed-case>@<fixed-case>CU</fixed-case>: Sentence Similarity from Word Alignment</title>
-      <author><first>Md Arafat</first><last>Sultan</last></author>
+      <author id="md-arafat-sultan"><first>Md Arafat</first><last>Sultan</last></author>
       <author><first>Steven</first><last>Bethard</last></author>
       <author><first>Tamara</first><last>Sumner</last></author>
       <pages>241–246</pages>
@@ -700,7 +700,7 @@
       <title><fixed-case>ECNU</fixed-case>: A Combination Method and Multiple Features for Aspect Extraction and Sentiment Polarity Classification</title>
       <author><first>Fangxi</first><last>Zhang</last></author>
       <author><first>Zhihua</first><last>Zhang</last></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <pages>252–258</pages>
       <url hash="78997313">S14-2041</url>
       <doi>10.3115/v1/S14-2041</doi>
@@ -709,7 +709,7 @@
     <paper id="42">
       <title><fixed-case>ECNU</fixed-case>: Expression- and Message-level Sentiment Orientation Classification in <fixed-case>T</fixed-case>witter Using Multiple Effective Features</title>
       <author><first>Jiang</first><last>Zhao</last></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <author><first>Tiantian</first><last>Zhu</last></author>
       <pages>259–264</pages>
       <url hash="aca37fa8">S14-2042</url>
@@ -719,7 +719,7 @@
     <paper id="43">
       <title><fixed-case>ECNU</fixed-case>: Leveraging on Ensemble of Heterogeneous Features and Information Enrichment for Cross Level Semantic Similarity Estimation</title>
       <author><first>Tiantian</first><last>Zhu</last></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <pages>265–270</pages>
       <url hash="5a12b805">S14-2043</url>
       <doi>10.3115/v1/S14-2043</doi>
@@ -729,7 +729,7 @@
       <title><fixed-case>ECNU</fixed-case>: One Stone Two Birds: Ensemble of Heterogenous Measures for Semantic Relatedness and Textual Entailment</title>
       <author><first>Jiang</first><last>Zhao</last></author>
       <author><first>Tiantian</first><last>Zhu</last></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <pages>271–277</pages>
       <url hash="826d8ce4">S14-2044</url>
       <doi>10.3115/v1/S14-2044</doi>
@@ -771,9 +771,9 @@
     <paper id="48">
       <title><fixed-case>GPLSI</fixed-case>: Supervised Sentiment Analysis in <fixed-case>T</fixed-case>witter using Skipgrams</title>
       <author><first>Javi</first><last>Fernández</last></author>
-      <author><first>Yoan</first><last>Gutiérrez</last></author>
-      <author><first>Jose Manuel</first><last>Gómez</last></author>
-      <author><first>Patricio</first><last>Martínez-Barco</last></author>
+      <author id="yoan-gutierrez"><first>Yoan</first><last>Gutiérrez</last></author>
+      <author id="jose-m-gomez"><first>Jose Manuel</first><last>Gómez</last></author>
+      <author id="patricio-martinez-barco"><first>Patricio</first><last>Martínez-Barco</last></author>
       <pages>294–299</pages>
       <url hash="8478a52d">S14-2048</url>
       <doi>10.3115/v1/S14-2048</doi>
@@ -782,7 +782,7 @@
     <paper id="49">
       <title>ha<fixed-case>LF</fixed-case>: Comparing a Pure <fixed-case>CDSM</fixed-case> Approach with a Standard Machine Learning System for <fixed-case>RTE</fixed-case></title>
       <author><first>Lorenzo</first><last>Ferrone</last></author>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last></author>
       <pages>300–304</pages>
       <url hash="c9a2f598">S14-2049</url>
       <doi>10.3115/v1/S14-2049</doi>
@@ -790,10 +790,10 @@
     </paper>
     <paper id="50">
       <title><fixed-case>H</fixed-case>ul<fixed-case>T</fixed-case>ech: A General Purpose System for Cross-Level Semantic Similarity based on Anchor Web Counts</title>
-      <author><first>Jose G.</first><last>Moreno</last></author>
+      <author id="jose-g-moreno"><first>Jose G.</first><last>Moreno</last></author>
       <author><first>Rumen</first><last>Moraliyski</last></author>
       <author><first>Asma</first><last>Berrezoug</last></author>
-      <author><first>Gaël</first><last>Dias</last></author>
+      <author id="gael-dias"><first>Gaël</first><last>Dias</last></author>
       <pages>305–308</pages>
       <url hash="74b751d0">S14-2050</url>
       <doi>10.3115/v1/S14-2050</doi>
@@ -809,7 +809,7 @@
     </paper>
     <paper id="52">
       <title><fixed-case>IITP</fixed-case>: A Supervised Approach for Disorder Mention Detection and Disambiguation</title>
-      <author><first>Utpal Kumar</first><last>Sikdar</last></author>
+      <author id="utpal-kumar-sikdar"><first>Utpal Kumar</first><last>Sikdar</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
       <author><first>Sriparna</first><last>Saha</last></author>
       <pages>314–318</pages>
@@ -819,7 +819,7 @@
     </paper>
     <paper id="53">
       <title><fixed-case>IITP</fixed-case>: Supervised Machine Learning for Aspect based Sentiment Analysis</title>
-      <author><first>Deepak Kumar</first><last>Gupta</last></author>
+      <author id="deepak-gupta"><first>Deepak Kumar</first><last>Gupta</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
       <pages>319–323</pages>
       <url hash="ca74896a">S14-2053</url>
@@ -848,7 +848,7 @@
       <title>In-House: An Ensemble of Pre-Existing Off-the-Shelf Parsers</title>
       <author><first>Yusuke</first><last>Miyao</last></author>
       <author><first>Stephan</first><last>Oepen</last></author>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <pages>335–340</pages>
       <url hash="40845ce8">S14-2056</url>
       <doi>10.3115/v1/S14-2056</doi>
@@ -857,7 +857,7 @@
     <paper id="57">
       <title><fixed-case>I</fixed-case>ndian Institute of Technology-Patna: Sentiment Analysis in <fixed-case>T</fixed-case>witter</title>
       <author><first>Vikram</first><last>Singh</last></author>
-      <author><first>Arif Md.</first><last>Khan</last></author>
+      <author id="mohammed-arif-khan"><first>Arif Md.</first><last>Khan</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
       <pages>341–345</pages>
       <url hash="68904f16">S14-2057</url>
@@ -897,11 +897,11 @@
     </paper>
     <paper id="60">
       <title><fixed-case>IUCL</fixed-case>: Combining Information Sources for <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val Task 5</title>
-      <author><first>Alex</first><last>Rudnick</last></author>
+      <author id="alex-rudnick"><first>Alex</first><last>Rudnick</last></author>
       <author><first>Levi</first><last>King</last></author>
       <author><first>Can</first><last>Liu</last></author>
       <author><first>Markus</first><last>Dickinson</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <pages>356–360</pages>
       <url hash="0d283aab">S14-2060</url>
       <doi>10.3115/v1/S14-2060</doi>
@@ -909,10 +909,10 @@
     </paper>
     <paper id="61">
       <title><fixed-case>I</fixed-case>xa<fixed-case>M</fixed-case>ed: Applying Freeling and a Perceptron Sequential Tagger at the Shared Task on Analyzing Clinical Texts</title>
-      <author><first>Koldo</first><last>Gojenola</last></author>
-      <author><first>Maite</first><last>Oronoz</last></author>
+      <author id="koldo-gojenola"><first>Koldo</first><last>Gojenola</last></author>
+      <author id="maite-oronoz"><first>Maite</first><last>Oronoz</last></author>
       <author><first>Alicia</first><last>Pérez</last></author>
-      <author><first>Arantza</first><last>Casillas</last></author>
+      <author id="arantza-casillas"><first>Arantza</first><last>Casillas</last></author>
       <pages>361–365</pages>
       <url hash="3efedeac">S14-2061</url>
       <doi>10.3115/v1/S14-2061</doi>
@@ -931,9 +931,9 @@
     <paper id="63">
       <title><fixed-case>JU</fixed-case>_<fixed-case>CSE</fixed-case>: A Conditional Random Field (<fixed-case>CRF</fixed-case>) Based Approach to Aspect Based Sentiment Analysis</title>
       <author><first>Braja Gopal</first><last>Patra</last></author>
-      <author><first>Soumik</first><last>Mandal</last></author>
+      <author id="soumil-mandal"><first>Soumik</first><last>Mandal</last></author>
       <author><first>Dipankar</first><last>Das</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>370–374</pages>
       <url hash="4fb81bf4">S14-2063</url>
       <doi>10.3115/v1/S14-2063</doi>
@@ -943,7 +943,7 @@
       <title><fixed-case>JU</fixed-case>-Evora: A Graph Based Cross-Level Semantic Similarity Analysis using Discourse Information</title>
       <author><first>Swarnendu</first><last>Ghosh</last></author>
       <author><first>Nibaran</first><last>Das</last></author>
-      <author><first>Teresa</first><last>Gonçalves</last></author>
+      <author id="teresa-goncalves"><first>Teresa</first><last>Gonçalves</last></author>
       <author><first>Paulo</first><last>Quaresma</last></author>
       <pages>375–379</pages>
       <url hash="e963e789">S14-2064</url>
@@ -988,10 +988,10 @@
     <paper id="69">
       <title><fixed-case>LIPN</fixed-case>: Introducing a new Geographical Context Similarity Measure and a Statistical Similarity Measure based on the Bhattacharyya coefficient</title>
       <author><first>Davide</first><last>Buscaldi</last></author>
-      <author><first>Jorge</first><last>García Flores</last></author>
-      <author><first>Joseph</first><last>Le Roux</last></author>
+      <author id="jorge-garcia-flores"><first>Jorge</first><last>García Flores</last></author>
+      <author id="joseph-le-roux"><first>Joseph</first><last>Le Roux</last></author>
       <author><first>Nadi</first><last>Tomeh</last></author>
-      <author><first>Belém</first><last>Priego Sanchez</last></author>
+      <author id="belem-priego-sanchez"><first>Belém</first><last>Priego Sanchez</last></author>
       <pages>400–405</pages>
       <url hash="14495e5e">S14-2069</url>
       <doi>10.3115/v1/S14-2069</doi>
@@ -1001,9 +1001,9 @@
       <title><fixed-case>LT</fixed-case>3: Sentiment Classification in User-Generated Content Using a Rich Feature Set</title>
       <author><first>Cynthia</first><last>Van Hee</last></author>
       <author><first>Marjan</first><last>Van de Kauter</last></author>
-      <author><first>Orphée</first><last>De Clercq</last></author>
+      <author id="orphee-de-clercq"><first>Orphée</first><last>De Clercq</last></author>
       <author><first>Els</first><last>Lefever</last></author>
-      <author><first>Véronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Véronique</first><last>Hoste</last></author>
       <pages>406–410</pages>
       <url hash="af1fd04e">S14-2070</url>
       <doi>10.3115/v1/S14-2070</doi>
@@ -1013,9 +1013,9 @@
       <title><fixed-case>L</fixed-case>y<fixed-case>S</fixed-case>: Porting a <fixed-case>T</fixed-case>witter Sentiment Analysis Approach from <fixed-case>S</fixed-case>panish to <fixed-case>E</fixed-case>nglish</title>
       <author><first>David</first><last>Vilares</last></author>
       <author><first>Miguel</first><last>Hermo</last></author>
-      <author><first>Miguel A.</first><last>Alonso</last></author>
+      <author id="miguel-a-alonso"><first>Miguel A.</first><last>Alonso</last></author>
       <author><first>Carlos</first><last>Gómez-Rodríguez</last></author>
-      <author><first>Yerai</first><last>Doval</last></author>
+      <author id="yerai-doval"><first>Yerai</first><last>Doval</last></author>
       <pages>411–415</pages>
       <url hash="7be38332">S14-2071</url>
       <doi>10.3115/v1/S14-2071</doi>
@@ -1023,13 +1023,13 @@
     </paper>
     <paper id="72">
       <title>Meerkat Mafia: Multilingual and Cross-Level Semantic Textual Similarity Systems</title>
-      <author><first>Abhay</first><last>Kashyap</last></author>
+      <author id="abhay-l-kashyap"><first>Abhay</first><last>Kashyap</last></author>
       <author><first>Lushan</first><last>Han</last></author>
       <author><first>Roberto</first><last>Yus</last></author>
       <author><first>Jennifer</first><last>Sleeman</last></author>
       <author><first>Taneeya</first><last>Satyapanich</last></author>
       <author><first>Sunil</first><last>Gandhi</last></author>
-      <author><first>Tim</first><last>Finin</last></author>
+      <author id="tim-finin"><first>Tim</first><last>Finin</last></author>
       <pages>416–423</pages>
       <url hash="d4f5b2ec">S14-2072</url>
       <doi>10.3115/v1/S14-2072</doi>
@@ -1039,7 +1039,7 @@
       <title><fixed-case>M</fixed-case>ind<fixed-case>L</fixed-case>ab-<fixed-case>UNAL</fixed-case>: Comparing Metamap and <fixed-case>T</fixed-case>-mapper for Medical Concept Extraction in <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val 2014 Task 7</title>
       <author><first>Alejandro</first><last>Riveros</last></author>
       <author><first>Maria</first><last>De-Arteaga</last></author>
-      <author><first>Fabio</first><last>González</last></author>
+      <author id="fabio-a-gonzalez"><first>Fabio</first><last>González</last></author>
       <author><first>Sergio</first><last>Jimenez</last></author>
       <author><first>Henning</first><last>Müller</last></author>
       <pages>424–427</pages>
@@ -1049,10 +1049,10 @@
     </paper>
     <paper id="74">
       <title><fixed-case>NILC</fixed-case>_<fixed-case>USP</fixed-case>: An Improved Hybrid System for Sentiment Analysis in <fixed-case>T</fixed-case>witter Messages</title>
-      <author><first>Pedro</first><last>Balage Filho</last></author>
+      <author id="pedro-balage-filho"><first>Pedro</first><last>Balage Filho</last></author>
       <author><first>Lucas</first><last>Avanço</last></author>
       <author><first>Thiago</first><last>Pardo</last></author>
-      <author><first>Maria das Graças</first><last>Volpe Nunes</last></author>
+      <author id="maria-das-gracas-volpe-nunes"><first>Maria das Graças</first><last>Volpe Nunes</last></author>
       <pages>428–432</pages>
       <url hash="1c99f8e1">S14-2074</url>
       <doi>10.3115/v1/S14-2074</doi>
@@ -1060,7 +1060,7 @@
     </paper>
     <paper id="75">
       <title><fixed-case>NILC</fixed-case>_<fixed-case>USP</fixed-case>: Aspect Extraction using Semantic Labels</title>
-      <author><first>Pedro</first><last>Balage Filho</last></author>
+      <author id="pedro-balage-filho"><first>Pedro</first><last>Balage Filho</last></author>
       <author><first>Thiago</first><last>Pardo</last></author>
       <pages>433–436</pages>
       <url hash="06691f6b">S14-2075</url>
@@ -1072,7 +1072,7 @@
       <author><first>Svetlana</first><last>Kiritchenko</last></author>
       <author><first>Xiaodan</first><last>Zhu</last></author>
       <author><first>Colin</first><last>Cherry</last></author>
-      <author><first>Saif</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
       <pages>437–442</pages>
       <url hash="5ac4af50">S14-2076</url>
       <doi>10.3115/v1/S14-2076</doi>
@@ -1082,7 +1082,7 @@
       <title><fixed-case>NRC</fixed-case>-<fixed-case>C</fixed-case>anada-2014: Recent Improvements in the Sentiment Analysis of Tweets</title>
       <author><first>Xiaodan</first><last>Zhu</last></author>
       <author><first>Svetlana</first><last>Kiritchenko</last></author>
-      <author><first>Saif</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
       <pages>443–447</pages>
       <url hash="435a74c0">S14-2077</url>
       <doi>10.3115/v1/S14-2077</doi>
@@ -1092,7 +1092,7 @@
       <title><fixed-case>NTNU</fixed-case>: Measuring Semantic Similarity with Sublexical Feature Representations and Soft Cardinality</title>
       <author><first>André</first><last>Lynum</last></author>
       <author><first>Partha</first><last>Pakray</last></author>
-      <author><first>Björn</first><last>Gambäck</last></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gambäck</last></author>
       <author><first>Sergio</first><last>Jimenez</last></author>
       <pages>448–453</pages>
       <url hash="9b0080ea">S14-2078</url>
@@ -1101,7 +1101,7 @@
     </paper>
     <paper id="79">
       <title><fixed-case>OPI</fixed-case>: <fixed-case>S</fixed-case>emeval-2014 Task 3 System Description</title>
-      <author><first>Marek</first><last>Kozlowski</last></author>
+      <author id="marek-kozlowski"><first>Marek</first><last>Kozlowski</last></author>
       <pages>454–458</pages>
       <url hash="fecfbc69">S14-2079</url>
       <doi>10.3115/v1/S14-2079</doi>
@@ -1111,7 +1111,7 @@
       <title><fixed-case>P</fixed-case>eking: Profiling Syntactic Tree Parsing Techniques for Semantic Graph Parsing</title>
       <author><first>Yantao</first><last>Du</last></author>
       <author><first>Fan</first><last>Zhang</last></author>
-      <author><first>Weiwei</first><last>Sun</last></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last></author>
       <author><first>Xiaojun</first><last>Wan</last></author>
       <pages>459–464</pages>
       <url hash="68f1eeee">S14-2080</url>
@@ -1120,7 +1120,7 @@
     </paper>
     <paper id="81">
       <title><fixed-case>P</fixed-case>otsdam: Semantic Dependency Parsing by Bidirectional Graph-Tree Transformations and Syntactic Parsing</title>
-      <author><first>Željko</first><last>Agić</last></author>
+      <author id="zeljko-agic"><first>Željko</first><last>Agić</last></author>
       <author><first>Alexander</first><last>Koller</last></author>
       <pages>465–470</pages>
       <url hash="23d4a9e5">S14-2081</url>
@@ -1129,7 +1129,7 @@
     </paper>
     <paper id="82">
       <title><fixed-case>P</fixed-case>riberam: A Turbo Semantic Parser with Second Order Features</title>
-      <author><first>André F. T.</first><last>Martins</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
       <author><first>Mariana S. C.</first><last>Almeida</last></author>
       <pages>471–476</pages>
       <url hash="65d18e03">S14-2082</url>
@@ -1138,8 +1138,8 @@
     </paper>
     <paper id="83">
       <title><fixed-case>R</fixed-case>el<fixed-case>A</fixed-case>gent: Entity Detection and Normalization for Diseases in Clinical Records: a Linguistically Driven Approach</title>
-      <author><first>Sv</first><last>Ramanan</last></author>
-      <author><first>Senthil</first><last>Nathan</last></author>
+      <author id="sv-ramanan"><first>Sv</first><last>Ramanan</last></author>
+      <author id="p-senthil-nathan"><first>Senthil</first><last>Nathan</last></author>
       <pages>477–481</pages>
       <url hash="25ed0344">S14-2083</url>
       <doi>10.3115/v1/S14-2083</doi>
@@ -1156,7 +1156,7 @@
     </paper>
     <paper id="85">
       <title><fixed-case>RTM</fixed-case>-<fixed-case>DCU</fixed-case>: Referential Translation Machines for Semantic Similarity</title>
-      <author><first>Ergun</first><last>Biçici</last></author>
+      <author id="ergun-bicici"><first>Ergun</first><last>Biçici</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <pages>487–496</pages>
       <url hash="f26edca7">S14-2085</url>
@@ -1186,9 +1186,9 @@
     </paper>
     <paper id="88">
       <title><fixed-case>SAIL</fixed-case>-<fixed-case>GRS</fixed-case>: Grammar Induction for Spoken Dialogue Systems using <fixed-case>CF</fixed-case>-<fixed-case>IRF</fixed-case> Rule Similarity</title>
-      <author><first>Kalliopi</first><last>Zervanou</last></author>
+      <author id="kalliopi-zervanou"><first>Kalliopi</first><last>Zervanou</last></author>
       <author><first>Nikolaos</first><last>Malandrakis</last></author>
-      <author><first>Shrikanth</first><last>Narayanan</last></author>
+      <author id="shrikanth-narayanan"><first>Shrikanth</first><last>Narayanan</last></author>
       <pages>508–511</pages>
       <url hash="72ea1a72">S14-2088</url>
       <doi>10.3115/v1/S14-2088</doi>
@@ -1201,7 +1201,7 @@
       <author><first>Colin</first><last>Vaz</last></author>
       <author><first>Jesse James</first><last>Bisogni</last></author>
       <author><first>Alexandros</first><last>Potamianos</last></author>
-      <author><first>Shrikanth</first><last>Narayanan</last></author>
+      <author id="shrikanth-narayanan"><first>Shrikanth</first><last>Narayanan</last></author>
       <pages>512–516</pages>
       <url hash="51a6d3c0">S14-2089</url>
       <doi>10.3115/v1/S14-2089</doi>
@@ -1212,7 +1212,7 @@
       <author><first>Naveen</first><last>Nandan</last></author>
       <author><first>Daniel</first><last>Dahlmeier</last></author>
       <author><first>Akriti</first><last>Vij</last></author>
-      <author><first>Nishtha</first><last>Malhotra</last></author>
+      <author id="nishtha-malhotra"><first>Nishtha</first><last>Malhotra</last></author>
       <pages>517–521</pages>
       <url hash="d9c6caba">S14-2090</url>
       <doi>10.3115/v1/S14-2090</doi>
@@ -1221,7 +1221,7 @@
     <paper id="91">
       <title><fixed-case>SAP</fixed-case>-<fixed-case>RI</fixed-case>: <fixed-case>T</fixed-case>witter Sentiment Analysis in Two Days</title>
       <author><first>Akriti</first><last>Vij</last></author>
-      <author><first>Nishta</first><last>Malhotra</last></author>
+      <author id="nishtha-malhotra"><first>Nishta</first><last>Malhotra</last></author>
       <author><first>Naveen</first><last>Nandan</last></author>
       <author><first>Daniel</first><last>Dahlmeier</last></author>
       <pages>522–526</pages>
@@ -1232,7 +1232,7 @@
     <paper id="92">
       <title><fixed-case>S</fixed-case>eem<fixed-case>G</fixed-case>o: Conditional Random Fields Labeling and Maximum Entropy Classification for Aspect Based Sentiment Analysis</title>
       <author><first>Pengfei</first><last>Liu</last></author>
-      <author><first>Helen</first><last>Meng</last></author>
+      <author id="helen-meng"><first>Helen</first><last>Meng</last></author>
       <pages>527–531</pages>
       <url hash="01fc0c93">S14-2092</url>
       <doi>10.3115/v1/S14-2092</doi>
@@ -1253,7 +1253,7 @@
       <title><fixed-case>S</fixed-case>ensible: <fixed-case>L</fixed-case>2 Translation Assistance by Emulating the Manual Post-Editing Process</title>
       <author><first>Liling</first><last>Tan</last></author>
       <author><first>Anne-Kathrin</first><last>Schumann</last></author>
-      <author><first>Jose M.M.</first><last>Martinez</last></author>
+      <author id="jose-manuel-martinez"><first>Jose M.M.</first><last>Martinez</last></author>
       <author><first>Francis</first><last>Bond</last></author>
       <pages>541–545</pages>
       <url hash="0a92b772">S14-2094</url>
@@ -1281,7 +1281,7 @@
     </paper>
     <paper id="97">
       <title><fixed-case>S</fixed-case>hrd<fixed-case>L</fixed-case>ite: Semantic Parsing Using a Handmade Grammar</title>
-      <author><first>Peter</first><last>Ljunglöf</last></author>
+      <author id="peter-ljunglof"><first>Peter</first><last>Ljunglöf</last></author>
       <pages>556–559</pages>
       <url hash="8bc1db14">S14-2097</url>
       <doi>10.3115/v1/S14-2097</doi>
@@ -1291,9 +1291,9 @@
       <title><fixed-case>S</fixed-case>im<fixed-case>C</fixed-case>ompass: Using Deep Learning Word Embeddings to Assess Cross-level Similarity</title>
       <author><first>Carmen</first><last>Banea</last></author>
       <author><first>Di</first><last>Chen</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <pages>560–565</pages>
       <url hash="46a4823b">S14-2098</url>
       <doi>10.3115/v1/S14-2098</doi>
@@ -1301,10 +1301,10 @@
     </paper>
     <paper id="99">
       <title><fixed-case>SINAI</fixed-case>: Voting System for Aspect Based Sentiment Analysis</title>
-      <author><first>Salud María</first><last>Jiménez-Zafra</last></author>
-      <author><first>Eugenio</first><last>Martínez-Cámara</last></author>
-      <author><first>Maite</first><last>Martin</last></author>
-      <author><first>L. Alfonso</first><last>Ureña-López</last></author>
+      <author id="salud-maria-jimenez-zafra"><first>Salud María</first><last>Jiménez-Zafra</last></author>
+      <author id="eugenio-martinez-camara"><first>Eugenio</first><last>Martínez-Cámara</last></author>
+      <author id="m-teresa-martin-valdivia"><first>Maite</first><last>Martin</last></author>
+      <author id="l-alfonso-urena-lopez"><first>L. Alfonso</first><last>Ureña-López</last></author>
       <pages>566–571</pages>
       <url hash="db2bced7">S14-2099</url>
       <doi>10.3115/v1/S14-2099</doi>
@@ -1312,10 +1312,10 @@
     </paper>
     <paper id="100">
       <title><fixed-case>SINAI</fixed-case>: Voting System for <fixed-case>T</fixed-case>witter Sentiment Analysis</title>
-      <author><first>Eugenio</first><last>Martínez-Cámara</last></author>
-      <author><first>Salud María</first><last>Jiménez-Zafra</last></author>
-      <author><first>Maite</first><last>Martin</last></author>
-      <author><first>L. Alfonso</first><last>Ureña-López</last></author>
+      <author id="eugenio-martinez-camara"><first>Eugenio</first><last>Martínez-Cámara</last></author>
+      <author id="salud-maria-jimenez-zafra"><first>Salud María</first><last>Jiménez-Zafra</last></author>
+      <author id="m-teresa-martin-valdivia"><first>Maite</first><last>Martin</last></author>
+      <author id="l-alfonso-urena-lopez"><first>L. Alfonso</first><last>Ureña-López</last></author>
       <pages>572–577</pages>
       <url hash="269c5f1e">S14-2100</url>
       <doi>10.3115/v1/S14-2100</doi>
@@ -1342,7 +1342,7 @@
     <paper id="102">
       <title><fixed-case>SSMT</fixed-case>:A Machine Translation Evaluation View To Paragraph-to-Sentence Semantic Similarity</title>
       <author><first>Pingping</first><last>Huang</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <pages>585–589</pages>
       <url hash="e8a112f3">S14-2102</url>
       <doi>10.3115/v1/S14-2102</doi>
@@ -1367,8 +1367,8 @@
     <paper id="104">
       <title>Supervised Methods for Aspect-Based Sentiment Analysis</title>
       <author><first>Hussam</first><last>Hamdan</last></author>
-      <author><first>Patrice</first><last>Bellot</last></author>
-      <author><first>Frederic</first><last>Béchet</last></author>
+      <author id="patrice-bellot"><first>Patrice</first><last>Bellot</last></author>
+      <author id="frederic-bechet"><first>Frederic</first><last>Béchet</last></author>
       <pages>596–600</pages>
       <url hash="fda6b6a1">S14-2104</url>
       <doi>10.3115/v1/S14-2104</doi>
@@ -1386,7 +1386,7 @@
     </paper>
     <paper id="106">
       <title>Synalp-Empathic: A Valence Shifting Hybrid System for Sentiment Analysis</title>
-      <author><first>Alexandre</first><last>Denis</last></author>
+      <author id="alexandre-denis"><first>Alexandre</first><last>Denis</last></author>
       <author><first>Samuel</first><last>Cruz-Lara</last></author>
       <author><first>Nadia</first><last>Bellalem</last></author>
       <author><first>Lotfi</first><last>Bellalem</last></author>
@@ -1399,8 +1399,8 @@
       <title><fixed-case>SZTE</fixed-case>-<fixed-case>NLP</fixed-case>: Aspect level opinion mining exploiting syntactic cues</title>
       <author><first>Viktor</first><last>Hangya</last></author>
       <author><first>Gábor</first><last>Berend</last></author>
-      <author><first>István</first><last>Varga</last></author>
-      <author><first>Richárd</first><last>Farkas</last></author>
+      <author id="istvan-varga"><first>István</first><last>Varga</last></author>
+      <author id="richard-farkas"><first>Richárd</first><last>Farkas</last></author>
       <pages>610–614</pages>
       <url hash="08c4da67">S14-2107</url>
       <doi>10.3115/v1/S14-2107</doi>
@@ -1409,7 +1409,7 @@
     <paper id="108">
       <title><fixed-case>SZTE</fixed-case>-<fixed-case>NLP</fixed-case>: Clinical Text Analysis with Named Entity Recognition</title>
       <author><first>Melinda</first><last>Katona</last></author>
-      <author><first>Richárd</first><last>Farkas</last></author>
+      <author id="richard-farkas"><first>Richárd</first><last>Farkas</last></author>
       <pages>615–618</pages>
       <url hash="e753bdfc">S14-2108</url>
       <doi>10.3115/v1/S14-2108</doi>
@@ -1417,9 +1417,9 @@
     </paper>
     <paper id="109">
       <title><fixed-case>TCDSCSS</fixed-case>: Dimensionality Reduction to Evaluate Texts of Varying Lengths - an <fixed-case>IR</fixed-case> Approach</title>
-      <author><first>Arun Kumar</first><last>Jayapal</last></author>
+      <author id="arun-kumar-jayapal"><first>Arun Kumar</first><last>Jayapal</last></author>
       <author><first>Martin</first><last>Emms</last></author>
-      <author><first>John</first><last>Kelleher</last></author>
+      <author id="john-kelleher"><first>John</first><last>Kelleher</last></author>
       <pages>619–623</pages>
       <url hash="31d7d322">S14-2109</url>
       <doi>10.3115/v1/S14-2109</doi>
@@ -1455,8 +1455,8 @@
     <paper id="113">
       <title>The Impact of Z_score on <fixed-case>T</fixed-case>witter Sentiment Analysis</title>
       <author><first>Hussam</first><last>Hamdan</last></author>
-      <author><first>Patrice</first><last>Bellot</last></author>
-      <author><first>Frederic</first><last>Béchet</last></author>
+      <author id="patrice-bellot"><first>Patrice</first><last>Bellot</last></author>
+      <author id="frederic-bechet"><first>Frederic</first><last>Béchet</last></author>
       <pages>636–641</pages>
       <url hash="c3fbb94a">S14-2113</url>
       <doi>10.3115/v1/S14-2113</doi>
@@ -1475,7 +1475,7 @@
     </paper>
     <paper id="115">
       <title>Think Positive: Towards <fixed-case>T</fixed-case>witter Sentiment Analysis from Scratch</title>
-      <author><first>Cícero</first><last>dos Santos</last></author>
+      <author id="cicero-dos-santos"><first>Cícero</first><last>dos Santos</last></author>
       <pages>647–651</pages>
       <url hash="ec538d07">S14-2115</url>
       <doi>10.3115/v1/S14-2115</doi>
@@ -1483,7 +1483,7 @@
     </paper>
     <paper id="116">
       <title><fixed-case>T</fixed-case>hink<fixed-case>M</fixed-case>iners: Disorder Recognition using Conditional Random Fields and Distributional Semantics</title>
-      <author><first>Ankur</first><last>Parikh</last></author>
+      <author id="ankur-parikh"><first>Ankur</first><last>Parikh</last></author>
       <author><first>Avinesh</first><last>PVS</last></author>
       <author><first>Joy</first><last>Mustafi</last></author>
       <author><first>Lalit</first><last>Agarwalla</last></author>
@@ -1529,10 +1529,10 @@
     <paper id="120">
       <title><fixed-case>TUGAS</fixed-case>: Exploiting unlabelled data for <fixed-case>T</fixed-case>witter sentiment analysis</title>
       <author><first>Silvio</first><last>Amir</last></author>
-      <author><first>Miguel B.</first><last>Almeida</last></author>
+      <author id="miguel-b-almeida"><first>Miguel B.</first><last>Almeida</last></author>
       <author><first>Bruno</first><last>Martins</last></author>
       <author><first>João</first><last>Filgueiras</last></author>
-      <author><first>Mário J.</first><last>Silva</last></author>
+      <author id="mario-j-silva"><first>Mário J.</first><last>Silva</last></author>
       <pages>673–677</pages>
       <url hash="baf8f9eb">S14-2120</url>
       <doi>10.3115/v1/S14-2120</doi>
@@ -1599,7 +1599,7 @@
       <author><first>André</first><last>Leal</last></author>
       <author><first>Diogo</first><last>Gonçalves</last></author>
       <author><first>Bruno</first><last>Martins</last></author>
-      <author><first>Francisco M.</first><last>Couto</last></author>
+      <author id="francisco-m-couto"><first>Francisco M.</first><last>Couto</last></author>
       <pages>711–715</pages>
       <url hash="8775112c">S14-2127</url>
       <doi>10.3115/v1/S14-2127</doi>
@@ -1609,10 +1609,10 @@
       <title><fixed-case>UMCC</fixed-case>_<fixed-case>DLSI</fixed-case>_<fixed-case>S</fixed-case>em<fixed-case>S</fixed-case>im: Multilingual System for Measuring Semantic Textual Similarity</title>
       <author><first>Alexander</first><last>Chávez</last></author>
       <author><first>Héctor</first><last>Dávila</last></author>
-      <author><first>Yoan</first><last>Gutiérrez</last></author>
-      <author><first>Antonio</first><last>Fernández-Orquín</last></author>
-      <author><first>Andrés</first><last>Montoyo</last></author>
-      <author><first>Rafael</first><last>Muñoz</last></author>
+      <author id="yoan-gutierrez"><first>Yoan</first><last>Gutiérrez</last></author>
+      <author id="antonio-fernandez-orquin"><first>Antonio</first><last>Fernández-Orquín</last></author>
+      <author id="andres-montoyo"><first>Andrés</first><last>Montoyo</last></author>
+      <author id="rafael-munoz"><first>Rafael</first><last>Muñoz</last></author>
       <pages>716–721</pages>
       <url hash="9d8978e4">S14-2128</url>
       <doi>10.3115/v1/S14-2128</doi>
@@ -1624,10 +1624,10 @@
       <author><first>Armando</first><last>Collazo</last></author>
       <author><first>Elvis</first><last>Crego</last></author>
       <author><first>Jorge L.</first><last>Garcia</last></author>
-      <author><first>Yoan</first><last>Gutiérrez</last></author>
-      <author><first>David</first><last>Tomás</last></author>
-      <author><first>Andrés</first><last>Montoyo</last></author>
-      <author><first>Rafael</first><last>Muñoz</last></author>
+      <author id="yoan-gutierrez"><first>Yoan</first><last>Gutiérrez</last></author>
+      <author id="david-tomas"><first>David</first><last>Tomás</last></author>
+      <author id="andres-montoyo"><first>Andrés</first><last>Montoyo</last></author>
+      <author id="rafael-munoz"><first>Rafael</first><last>Muñoz</last></author>
       <pages>722–726</pages>
       <url hash="a47ef0e9">S14-2129</url>
       <doi>10.3115/v1/S14-2129</doi>
@@ -1638,9 +1638,9 @@
       <author><first>Pedro Aniel</first><last>Sánchez-Mirabal</last></author>
       <author><first>Yarelis</first><last>Ruano Torres</last></author>
       <author><first>Suilen</first><last>Hernández Alvarado</last></author>
-      <author><first>Yoan</first><last>Gutiérrez</last></author>
-      <author><first>Andrés</first><last>Montoyo</last></author>
-      <author><first>Rafael</first><last>Muñoz</last></author>
+      <author id="yoan-gutierrez"><first>Yoan</first><last>Gutiérrez</last></author>
+      <author id="andres-montoyo"><first>Andrés</first><last>Montoyo</last></author>
+      <author id="rafael-munoz"><first>Rafael</first><last>Muñoz</last></author>
       <pages>727–731</pages>
       <url hash="288a0f6e">S14-2130</url>
       <doi>10.3115/v1/S14-2130</doi>
@@ -1651,7 +1651,7 @@
       <author><first>Sergio</first><last>Jimenez</last></author>
       <author><first>George</first><last>Dueñas</last></author>
       <author><first>Julia</first><last>Baquero</last></author>
-      <author><first>Alexander</first><last>Gelbukh</last></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last></author>
       <pages>732–742</pages>
       <url hash="d35d2e61">S14-2131</url>
       <doi>10.3115/v1/S14-2131</doi>
@@ -1671,7 +1671,7 @@
       <title><fixed-case>UNIBA</fixed-case>: Combining Distributional Semantic Models and Word Sense Disambiguation for Textual Similarity</title>
       <author><first>Pierpaolo</first><last>Basile</last></author>
       <author><first>Annalina</first><last>Caputo</last></author>
-      <author><first>Giovanni</first><last>Semeraro</last></author>
+      <author id="giovanni-semeraro"><first>Giovanni</first><last>Semeraro</last></author>
       <pages>748–753</pages>
       <url hash="c24622f0">S14-2133</url>
       <doi>10.3115/v1/S14-2133</doi>
@@ -1692,7 +1692,7 @@
       <author><first>Giuseppe</first><last>Castellucci</last></author>
       <author><first>Simone</first><last>Filice</last></author>
       <author><first>Danilo</first><last>Croce</last></author>
-      <author><first>Roberto</first><last>Basili</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
       <pages>761–767</pages>
       <url hash="1474359e">S14-2135</url>
       <doi>10.3115/v1/S14-2135</doi>
@@ -1715,8 +1715,8 @@
       <author><first>Reynier</first><last>Ortega Bueno</last></author>
       <author><first>Adrian</first><last>Fonseca Bruzón</last></author>
       <author><first>Carlos</first><last>Muñiz Cuza</last></author>
-      <author><first>Yoan</first><last>Gutiérrez</last></author>
-      <author><first>Andrés</first><last>Montoyo</last></author>
+      <author id="yoan-gutierrez"><first>Yoan</first><last>Gutiérrez</last></author>
+      <author id="andres-montoyo"><first>Andrés</first><last>Montoyo</last></author>
       <pages>773–778</pages>
       <url hash="92af7ad8">S14-2137</url>
       <doi>10.3115/v1/S14-2137</doi>
@@ -1733,9 +1733,9 @@
     <paper id="139">
       <title><fixed-case>U</fixed-case>o<fixed-case>W</fixed-case>: <fixed-case>NLP</fixed-case> techniques developed at the <fixed-case>U</fixed-case>niversity of <fixed-case>W</fixed-case>olverhampton for Semantic Similarity and Textual Entailment</title>
       <author><first>Rohit</first><last>Gupta</last></author>
-      <author><first>Hanna</first><last>Béchara</last></author>
-      <author><first>Ismail</first><last>El Maarouf</last></author>
-      <author><first>Constantin</first><last>Orăsan</last></author>
+      <author id="hannah-bechara"><first>Hanna</first><last>Béchara</last></author>
+      <author id="ismail-el-maarouf"><first>Ismail</first><last>El Maarouf</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orăsan</last></author>
       <pages>785–789</pages>
       <url hash="0533ce00">S14-2139</url>
       <doi>10.3115/v1/S14-2139</doi>
@@ -1751,11 +1751,11 @@
     </paper>
     <paper id="141">
       <title><fixed-case>UT</fixed-case>exas: Natural Language Semantics using Distributional Semantics and Probabilistic Logic</title>
-      <author><first>Islam</first><last>Beltagy</last></author>
+      <author id="islam-beltagy"><first>Islam</first><last>Beltagy</last></author>
       <author><first>Stephen</first><last>Roller</last></author>
-      <author><first>Gemma</first><last>Boleda</last></author>
+      <author id="gemma-boleda"><first>Gemma</first><last>Boleda</last></author>
       <author><first>Katrin</first><last>Erk</last></author>
-      <author><first>Raymond</first><last>Mooney</last></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last></author>
       <pages>796–801</pages>
       <url hash="4220067d">S14-2141</url>
       <doi>10.3115/v1/S14-2141</doi>
@@ -1805,7 +1805,7 @@
     </paper>
     <paper id="146">
       <title><fixed-case>UWM</fixed-case>: Applying an Existing Trainable Semantic Parser to Parse Robotic Spatial Commands</title>
-      <author><first>Rohit</first><last>Kate</last></author>
+      <author id="rohit-kate"><first>Rohit</first><last>Kate</last></author>
       <pages>823–827</pages>
       <url hash="3bcb00d4">S14-2146</url>
       <doi>10.3115/v1/S14-2146</doi>
@@ -1814,7 +1814,7 @@
     <paper id="147">
       <title><fixed-case>UWM</fixed-case>: Disorder Mention Extraction from Clinical Text Using <fixed-case>CRF</fixed-case>s and Normalization Using Learned Edit Distance Patterns</title>
       <author><first>Omid</first><last>Ghiasvand</last></author>
-      <author><first>Rohit</first><last>Kate</last></author>
+      <author id="rohit-kate"><first>Rohit</first><last>Kate</last></author>
       <pages>828–832</pages>
       <url hash="09b56743">S14-2147</url>
       <doi>10.3115/v1/S14-2147</doi>
@@ -1824,7 +1824,7 @@
       <title><fixed-case>V</fixed-case>3: Unsupervised Generation of Domain Aspect Terms for Aspect Based Sentiment Analysis</title>
       <author><first>Aitor</first><last>García-Pablos</last></author>
       <author><first>Montse</first><last>Cuadros</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <pages>833–837</pages>
       <url hash="716e9f82">S14-2148</url>
       <doi>10.3115/v1/S14-2148</doi>
@@ -1833,7 +1833,7 @@
     <paper id="149">
       <title><fixed-case>XRCE</fixed-case>: Hybrid Classification for Aspect-based Sentiment Analysis</title>
       <author><first>Caroline</first><last>Brun</last></author>
-      <author><first>Diana Nicoleta</first><last>Popa</last></author>
+      <author id="diana-nicoleta-popa"><first>Diana Nicoleta</first><last>Popa</last></author>
       <author><first>Claude</first><last>Roux</last></author>
       <pages>838–842</pages>
       <url hash="267da451">S14-2149</url>
diff --git a/data/xml/S15.xml b/data/xml/S15.xml
index f5edc6e07f..a95b8db703 100644
--- a/data/xml/S15.xml
+++ b/data/xml/S15.xml
@@ -4,8 +4,8 @@
     <meta>
       <booktitle>Proceedings of the Fourth Joint Conference on Lexical and Computational Semantics</booktitle>
       <url hash="a2bca2be">S15-1</url>
-      <editor><first>Martha</first><last>Palmer</last></editor>
-      <editor><first>Gemma</first><last>Boleda</last></editor>
+      <editor id="martha-palmer"><first>Martha</first><last>Palmer</last></editor>
+      <editor id="gemma-boleda"><first>Gemma</first><last>Boleda</last></editor>
       <editor><first>Paolo</first><last>Rosso</last></editor>
       <doi>10.18653/v1/S15-1</doi>
       <publisher>Association for Computational Linguistics</publisher>
@@ -59,7 +59,7 @@
     <paper id="5">
       <title>Combining Seemingly Incompatible Corpora for Implicit Semantic Role Labeling</title>
       <author><first>Parvin Sadat</first><last>Feizabadi</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <pages>40–50</pages>
       <url hash="23fa7846">S15-1005</url>
       <doi>10.18653/v1/S15-1005</doi>
@@ -76,8 +76,8 @@
     </paper>
     <paper id="7">
       <title>A Methodology for Word Sense Disambiguation at 90% based on large-scale <fixed-case>C</fixed-case>rowd<fixed-case>S</fixed-case>ourcing</title>
-      <author><first>Oier</first><last>Lopez de Lacalle</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="oier-lopez-de-lacalle"><first>Oier</first><last>Lopez de Lacalle</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <pages>61–70</pages>
       <url hash="10160052">S15-1007</url>
       <doi>10.18653/v1/S15-1007</doi>
@@ -86,7 +86,7 @@
     <paper id="8">
       <title>Learning Structures of Negations from Flat Annotations</title>
       <author><first>Vinodkumar</first><last>Prabhakaran</last></author>
-      <author><first>Branimir</first><last>Boguraev</last></author>
+      <author id="branimir-boguraev"><first>Branimir</first><last>Boguraev</last></author>
       <pages>71–81</pages>
       <url hash="40039215">S15-1008</url>
       <doi>10.18653/v1/S15-1008</doi>
@@ -96,22 +96,22 @@
       <title>A New Dataset and Evaluation for Belief/Factuality</title>
       <author><first>Vinodkumar</first><last>Prabhakaran</last></author>
       <author><first>Tomas</first><last>By</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <author><first>Samira</first><last>Shaikh</last></author>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
       <author><first>Jennifer</first><last>Tracey</last></author>
       <author><first>Michael</first><last>Arrigo</last></author>
       <author><first>Rupayan</first><last>Basu</last></author>
       <author><first>Micah</first><last>Clark</last></author>
       <author><first>Adam</first><last>Dalton</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
-      <author><first>Louise</first><last>Guthrie</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
+      <author id="louise-guthrie"><first>Louise</first><last>Guthrie</last></author>
       <author><first>Anna</first><last>Prokofieva</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <author><first>Gregory</first><last>Werner</last></author>
-      <author><first>Yorick</first><last>Wilks</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <pages>82–91</pages>
       <url hash="650d861b">S15-1009</url>
       <doi>10.18653/v1/S15-1009</doi>
@@ -131,8 +131,8 @@
     <paper id="11">
       <title>Combining Mention Context and Hyperlinks from <fixed-case>W</fixed-case>ikipedia for Named Entity Disambiguation</title>
       <author><first>Ander</first><last>Barrena</last></author>
-      <author><first>Aitor</first><last>Soroa</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="aitor-soroa"><first>Aitor</first><last>Soroa</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <pages>101–105</pages>
       <url hash="e36c98a8">S15-1011</url>
       <doi>10.18653/v1/S15-1011</doi>
@@ -144,7 +144,7 @@
       <title>Collective Document Classification with Implicit Inter-document Semantic Relationships</title>
       <author><first>Clint</first><last>Burford</last></author>
       <author><first>Steven</first><last>Bird</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>106–116</pages>
       <url hash="92c1eec8">S15-1012</url>
       <doi>10.18653/v1/S15-1012</doi>
@@ -154,7 +154,7 @@
       <title><fixed-case>SGR</fixed-case>ank: Combining Statistical and Graphical Methods to Improve the State of the Art in Unsupervised Keyphrase Extraction</title>
       <author><first>Soheil</first><last>Danesh</last></author>
       <author><first>Tamara</first><last>Sumner</last></author>
-      <author><first>James H.</first><last>Martin</last></author>
+      <author id="james-h-martin"><first>James H.</first><last>Martin</last></author>
       <pages>117–126</pages>
       <url hash="b0344954">S15-1013</url>
       <doi>10.18653/v1/S15-1013</doi>
@@ -173,7 +173,7 @@
     <paper id="15">
       <title>Ideological Perspective Detection Using Semantic Features</title>
       <author><first>Heba</first><last>Elfardy</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <author><first>Chris</first><last>Callison-Burch</last></author>
       <pages>137–146</pages>
       <url hash="70751a66">S15-1015</url>
@@ -182,8 +182,8 @@
     </paper>
     <paper id="16">
       <title>Mapping Different Rhetorical Relation Annotations: A Proposal</title>
-      <author><first>Farah</first><last>Benamara</last></author>
-      <author><first>Maite</first><last>Taboada</last></author>
+      <author id="farah-benamara"><first>Farah</first><last>Benamara</last></author>
+      <author id="maite-taboada"><first>Maite</first><last>Taboada</last></author>
       <pages>147–152</pages>
       <url hash="ca100d3c">S15-1016</url>
       <doi>10.18653/v1/S15-1016</doi>
@@ -193,7 +193,7 @@
       <title>Dissecting the Practical Lexical Function Model for Compositional Distributional Semantics</title>
       <author><first>Abhijeet</first><last>Gupta</last></author>
       <author><first>Jason</first><last>Utt</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <pages>153–158</pages>
       <url hash="6fa09cef">S15-1017</url>
       <doi>10.18653/v1/S15-1017</doi>
@@ -213,7 +213,7 @@
       <author><first>András</first><last>Kornai</last></author>
       <author><first>Judit</first><last>Ács</last></author>
       <author><first>Márton</first><last>Makrai</last></author>
-      <author><first>Dávid Márk</first><last>Nemeskey</last></author>
+      <author id="david-mark-nemeskey"><first>Dávid Márk</first><last>Nemeskey</last></author>
       <author><first>Katalin</first><last>Pajkossy</last></author>
       <author><first>Gábor</first><last>Recski</last></author>
       <pages>165–175</pages>
@@ -223,12 +223,12 @@
     </paper>
     <paper id="20">
       <title>Extending a Single-Document Summarizer to Multi-Document: a Hierarchical Approach</title>
-      <author><first>Luís</first><last>Marujo</last></author>
-      <author><first>Ricardo</first><last>Ribeiro</last></author>
-      <author><first>David</first><last>Martins de Matos</last></author>
-      <author><first>João</first><last>Neto</last></author>
+      <author id="luis-marujo"><first>Luís</first><last>Marujo</last></author>
+      <author id="ricardo-ribeiro"><first>Ricardo</first><last>Ribeiro</last></author>
+      <author id="david-martins-de-matos"><first>David</first><last>Martins de Matos</last></author>
+      <author id="joao-p-neto"><first>João</first><last>Neto</last></author>
       <author><first>Anatole</first><last>Gershman</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
       <pages>176–181</pages>
       <url hash="5e612efa">S15-1020</url>
       <doi>10.18653/v1/S15-1020</doi>
@@ -236,10 +236,10 @@
     </paper>
     <paper id="21">
       <title>Reading Between the Lines: Overcoming Data Sparsity for Accurate Classification of Lexical Relationships</title>
-      <author><first>Silvia</first><last>Necşulescu</last></author>
+      <author id="silvia-necsulescu"><first>Silvia</first><last>Necşulescu</last></author>
       <author><first>Sara</first><last>Mendes</last></author>
       <author><first>David</first><last>Jurgens</last></author>
-      <author><first>Núria</first><last>Bel</last></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last></author>
       <author><first>Roberto</first><last>Navigli</last></author>
       <pages>182–192</pages>
       <url hash="c5b6afed">S15-1021</url>
@@ -249,10 +249,10 @@
     <paper id="22">
       <title>Multi-Level Alignments As An Extensible Representation Basis for Textual Entailment Algorithms</title>
       <author><first>Tae-Gil</first><last>Noh</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <author><first>Vered</first><last>Shwartz</last></author>
       <author><first>Ido</first><last>Dagan</last></author>
-      <author><first>Vivi</first><last>Nastase</last></author>
+      <author id="vivi-nastase"><first>Vivi</first><last>Nastase</last></author>
       <author><first>Kathrin</first><last>Eichler</last></author>
       <author><first>Lili</first><last>Kotlerman</last></author>
       <author><first>Meni</first><last>Adler</last></author>
@@ -266,9 +266,9 @@
       <author><first>Samuel</first><last>Ritter</last></author>
       <author><first>Cotie</first><last>Long</last></author>
       <author><first>Denis</first><last>Paperno</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <author><first>Matthew</first><last>Botvinick</last></author>
-      <author><first>Adele</first><last>Goldberg</last></author>
+      <author id="adele-goldberg"><first>Adele</first><last>Goldberg</last></author>
       <pages>199–204</pages>
       <url hash="9ff30a96">S15-1023</url>
       <doi>10.18653/v1/S15-1023</doi>
@@ -288,7 +288,7 @@
     </paper>
     <paper id="25">
       <title>Combining Open Source Annotators for Entity Linking through Weighted Voting</title>
-      <author><first>Pablo</first><last>Ruiz</last></author>
+      <author id="pablo-ruiz-fabo"><first>Pablo</first><last>Ruiz</last></author>
       <author><first>Thierry</first><last>Poibeau</last></author>
       <pages>211–215</pages>
       <url hash="1450245f">S15-1025</url>
@@ -297,8 +297,8 @@
     </paper>
     <paper id="26">
       <title>Automatic Generation of a Lexical Resource to support Semantic Role Labeling in <fixed-case>P</fixed-case>ortuguese</title>
-      <author><first>Magali</first><last>Sanches Duran</last></author>
-      <author><first>Sandra</first><last>Aluísio</last></author>
+      <author id="magali-sanches-duran"><first>Magali</first><last>Sanches Duran</last></author>
+      <author id="sandra-aluisio"><first>Sandra</first><last>Aluísio</last></author>
       <pages>216–221</pages>
       <url hash="03e71dea">S15-1026</url>
       <doi>10.18653/v1/S15-1026</doi>
@@ -356,7 +356,7 @@
     </paper>
     <paper id="32">
       <title>Incremental Semantic Construction Using Normal Form <fixed-case>CCG</fixed-case> Derivation</title>
-      <author><first>Yoshihide</first><last>Kato</last></author>
+      <author id="yoshihide-kato"><first>Yoshihide</first><last>Kato</last></author>
       <author><first>Shigeki</first><last>Matsubara</last></author>
       <pages>269–278</pages>
       <url hash="6bb03a3a">S15-1032</url>
@@ -366,7 +366,7 @@
     <paper id="33">
       <title>Dependency-Based Semantic Role Labeling using Convolutional Neural Networks</title>
       <author><first>William</first><last>Foland</last></author>
-      <author><first>James</first><last>Martin</last></author>
+      <author id="james-h-martin"><first>James</first><last>Martin</last></author>
       <pages>279–288</pages>
       <url hash="2a1a9dfc">S15-1033</url>
       <doi>10.18653/v1/S15-1033</doi>
@@ -385,7 +385,7 @@
       <title>Resolving Discourse-Deictic Pronouns: A Two-Stage Approach to Do It</title>
       <author><first>Sujay Kumar</first><last>Jauhar</last></author>
       <author><first>Raul</first><last>Guerra</last></author>
-      <author><first>Edgar</first><last>Gonzàlez Pellicer</last></author>
+      <author id="edgar-gonzalez-pellicer"><first>Edgar</first><last>Gonzàlez Pellicer</last></author>
       <author><first>Marta</first><last>Recasens</last></author>
       <pages>299–308</pages>
       <url hash="dddb9b03">S15-1035</url>
@@ -406,7 +406,7 @@
     <meta>
       <booktitle>Proceedings of the 9th International Workshop on Semantic Evaluation (<fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val 2015)</booktitle>
       <url hash="bef744ae">S15-2</url>
-      <editor><first>Preslav</first><last>Nakov</last></editor>
+      <editor id="preslav-nakov"><first>Preslav</first><last>Nakov</last></editor>
       <editor><first>Torsten</first><last>Zesch</last></editor>
       <editor><first>Daniel</first><last>Cer</last></editor>
       <editor><first>David</first><last>Jurgens</last></editor>
@@ -425,7 +425,7 @@
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2015 Task 1: Paraphrase and Semantic Similarity in <fixed-case>T</fixed-case>witter (<fixed-case>PIT</fixed-case>)</title>
       <author><first>Wei</first><last>Xu</last></author>
       <author><first>Chris</first><last>Callison-Burch</last></author>
-      <author><first>Bill</first><last>Dolan</last></author>
+      <author id="william-b-dolan"><first>Bill</first><last>Dolan</last></author>
       <pages>1–11</pages>
       <url hash="312c9936">S15-2001</url>
       <doi>10.18653/v1/S15-2001</doi>
@@ -434,8 +434,8 @@
     <paper id="2">
       <title><fixed-case>MITRE</fixed-case>: Seven Systems for Semantic Similarity in Tweets</title>
       <author><first>Guido</first><last>Zarrella</last></author>
-      <author><first>John</first><last>Henderson</last></author>
-      <author><first>Elizabeth M.</first><last>Merkhofer</last></author>
+      <author id="john-henderson"><first>John</first><last>Henderson</last></author>
+      <author id="elizabeth-merkhofer"><first>Elizabeth M.</first><last>Merkhofer</last></author>
       <author><first>Laura</first><last>Strickhart</last></author>
       <pages>12–17</pages>
       <url hash="92172388">S15-2002</url>
@@ -444,9 +444,9 @@
     </paper>
     <paper id="3">
       <title><fixed-case>CICBUAP</fixed-case>nlp: Graph-Based Approach for Answer Selection in Community Question Answering Task</title>
-      <author><first>Helena</first><last>Gomez</last></author>
-      <author><first>Darnes</first><last>Vilariño</last></author>
-      <author><first>David</first><last>Pinto</last></author>
+      <author id="helena-gomez"><first>Helena</first><last>Gomez</last></author>
+      <author id="darnes-vilarino"><first>Darnes</first><last>Vilariño</last></author>
+      <author id="david-pinto"><first>David</first><last>Pinto</last></author>
       <author><first>Grigori</first><last>Sidorov</last></author>
       <pages>18–22</pages>
       <url hash="79370d85">S15-2003</url>
@@ -475,7 +475,7 @@
     <paper id="6">
       <title><fixed-case>ECNU</fixed-case>: Leveraging Word Embeddings to Boost Performance for Paraphrase in <fixed-case>T</fixed-case>witter</title>
       <author><first>Jiang</first><last>Zhao</last></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <pages>34–39</pages>
       <url hash="b6b5111f">S15-2006</url>
       <doi>10.18653/v1/S15-2006</doi>
@@ -484,7 +484,7 @@
     <paper id="7">
       <title><fixed-case>ROB</fixed-case>: Using Semantic Meaning to Recognize Paraphrases</title>
       <author><first>Rob</first><last>van der Goot</last></author>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <pages>40–44</pages>
       <url hash="4a4d2711">S15-2007</url>
       <doi>10.18653/v1/S15-2007</doi>
@@ -493,7 +493,7 @@
     <paper id="8">
       <title><fixed-case>AMRITA</fixed-case>_<fixed-case>CEN</fixed-case>@<fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2015: Paraphrase Detection for <fixed-case>T</fixed-case>witter using Unsupervised Feature Learning with Recursive Autoencoders</title>
       <author><first>Mahalakshmi</first><last>Shanumuga Sundaram</last></author>
-      <author><first>Anand Kumar</first><last>Madasamy</last></author>
+      <author id="anand-kumar-m"><first>Anand Kumar</first><last>Madasamy</last></author>
       <author><first>Soman</first><last>Kotti Padannayil</last></author>
       <pages>45–50</pages>
       <url hash="289ca161">S15-2008</url>
@@ -504,7 +504,7 @@
       <title><fixed-case>E</fixed-case>biquity: Paraphrase and Semantic Similarity in <fixed-case>T</fixed-case>witter using Skipgrams</title>
       <author><first>Taneeya</first><last>Satyapanich</last></author>
       <author><first>Hang</first><last>Gao</last></author>
-      <author><first>Tim</first><last>Finin</last></author>
+      <author id="tim-finin"><first>Tim</first><last>Finin</last></author>
       <pages>51–55</pages>
       <url hash="41c9f305">S15-2009</url>
       <doi>10.18653/v1/S15-2009</doi>
@@ -512,7 +512,7 @@
     </paper>
     <paper id="10">
       <title><fixed-case>RTM</fixed-case>-<fixed-case>DCU</fixed-case>: Predicting Semantic Similarity with Referential Translation Machines</title>
-      <author><first>Ergun</first><last>Biçici</last></author>
+      <author id="ergun-bicici"><first>Ergun</first><last>Biçici</last></author>
       <pages>56–63</pages>
       <url hash="b00127a7">S15-2010</url>
       <doi>10.18653/v1/S15-2010</doi>
@@ -529,12 +529,12 @@
     </paper>
     <paper id="12">
       <title><fixed-case>TKLBLIIR</fixed-case>: Detecting <fixed-case>T</fixed-case>witter Paraphrases with <fixed-case>T</fixed-case>weeting<fixed-case>J</fixed-case>ay</title>
-      <author><first>Vanja Mladen</first><last>Karan</last></author>
+      <author id="vanja-m-karan"><first>Vanja Mladen</first><last>Karan</last></author>
       <author><first>Goran</first><last>Glavaš</last></author>
       <author><first>Jan</first><last>Šnajder</last></author>
-      <author><first>Bojana</first><last>Dalbelo Bašić</last></author>
+      <author id="bojana-dalbelo-basic"><first>Bojana</first><last>Dalbelo Bašić</last></author>
       <author><first>Ivan</first><last>Vulić</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>70–74</pages>
       <url hash="a9c6766f">S15-2012</url>
       <doi>10.18653/v1/S15-2012</doi>
@@ -542,7 +542,7 @@
     </paper>
     <paper id="13">
       <title><fixed-case>CDTDS</fixed-case>: Predicting Paraphrases in <fixed-case>T</fixed-case>witter via Support Vector Regression</title>
-      <author><first>Rafael Michael</first><last>Karampatsis</last></author>
+      <author id="rafael-michael-karampatsis"><first>Rafael Michael</first><last>Karampatsis</last></author>
       <pages>75–79</pages>
       <url hash="24f2c18c">S15-2013</url>
       <doi>10.18653/v1/S15-2013</doi>
@@ -550,8 +550,8 @@
     </paper>
     <paper id="14">
       <title>yi<fixed-case>G</fixed-case>ou: A Semantic Text Similarity Computing System Based on <fixed-case>SVM</fixed-case></title>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
-      <author><first>Chengjie</first><last>Sun</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
+      <author id="cheng-jie-sun"><first>Chengjie</first><last>Sun</last></author>
       <author><first>Lei</first><last>Lin</last></author>
       <author><first>Xiaolong</first><last>Wang</last></author>
       <pages>80–84</pages>
@@ -562,9 +562,9 @@
     <paper id="15">
       <title><fixed-case>USAAR</fixed-case>-<fixed-case>SHEFFIELD</fixed-case>: Semantic Textual Similarity with Deep Regression and Machine Translation Evaluation Metrics</title>
       <author><first>Liling</first><last>Tan</last></author>
-      <author><first>Carolina</first><last>Scarton</last></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>85–89</pages>
       <url hash="0b017976">S15-2015</url>
       <doi>10.18653/v1/S15-2015</doi>
@@ -573,7 +573,7 @@
     <paper id="16">
       <title><fixed-case>T</fixed-case>r<fixed-case>WP</fixed-case>: Text Relatedness using Word and Phrase Relatedness</title>
       <author><first>Md Rashadul Hasan</first><last>Rakib</last></author>
-      <author><first>Aminul</first><last>Islam</last></author>
+      <author id="aminul-islam"><first>Aminul</first><last>Islam</last></author>
       <author><first>Evangelos</first><last>Milios</last></author>
       <pages>90–95</pages>
       <url hash="4dc6aa41">S15-2016</url>
@@ -582,13 +582,13 @@
     </paper>
     <paper id="17">
       <title><fixed-case>M</fixed-case>ini<fixed-case>E</fixed-case>xperts: An <fixed-case>SVM</fixed-case> Approach for Measuring Semantic Textual Similarity</title>
-      <author><first>Hanna</first><last>Béchara</last></author>
+      <author id="hannah-bechara"><first>Hanna</first><last>Béchara</last></author>
       <author><first>Hernani</first><last>Costa</last></author>
       <author><first>Shiva</first><last>Taslimipoor</last></author>
       <author><first>Rohit</first><last>Gupta</last></author>
-      <author><first>Constantin</first><last>Orasan</last></author>
-      <author><first>Gloria</first><last>Corpas Pastor</last></author>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orasan</last></author>
+      <author id="gloria-corpas-pastor"><first>Gloria</first><last>Corpas Pastor</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <pages>96–101</pages>
       <url hash="837920bd">S15-2017</url>
       <doi>10.18653/v1/S15-2017</doi>
@@ -628,8 +628,8 @@
     <paper id="21">
       <title><fixed-case>ECNU</fixed-case>: Using Traditional Similarity Measurements and Word Embedding for Semantic Textual Similarity Estimation</title>
       <author><first>Jiang</first><last>Zhao</last></author>
-      <author><first>Man</first><last>Lan</last></author>
-      <author><first>Jun Feng</first><last>Tian</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
+      <author id="junfeng-tian"><first>Jun Feng</first><last>Tian</last></author>
       <pages>117–122</pages>
       <url hash="1c5cd3e9">S15-2021</url>
       <doi>10.18653/v1/S15-2021</doi>
@@ -658,8 +658,8 @@
     <paper id="24">
       <title><fixed-case>SOPA</fixed-case>: Random Forests Regression for the Semantic Textual Similarity task</title>
       <author><first>Davide</first><last>Buscaldi</last></author>
-      <author><first>Jorge</first><last>García Flores</last></author>
-      <author><first>Ivan V.</first><last>Meza</last></author>
+      <author id="jorge-garcia-flores"><first>Jorge</first><last>García Flores</last></author>
+      <author id="ivan-meza-ruiz"><first>Ivan V.</first><last>Meza</last></author>
       <author><first>Isaac</first><last>Rodríguez</last></author>
       <pages>132–137</pages>
       <url hash="e85ab505">S15-2024</url>
@@ -678,7 +678,7 @@
     <paper id="26">
       <title><fixed-case>DCU</fixed-case>: Using Distributional Semantics and Domain Adaptation for the Semantic Textual Similarity <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2015 Task 2</title>
       <author><first>Piyush</first><last>Arora</last></author>
-      <author><first>Chris</first><last>Hokamp</last></author>
+      <author id="chris-hokamp"><first>Chris</first><last>Hokamp</last></author>
       <author><first>Jennifer</first><last>Foster</last></author>
       <author><first>Gareth</first><last>Jones</last></author>
       <pages>143–147</pages>
@@ -688,7 +688,7 @@
     </paper>
     <paper id="27">
       <title><fixed-case>DLS</fixed-case>@<fixed-case>CU</fixed-case>: Sentence Similarity from Word Alignment and Semantic Vector Composition</title>
-      <author><first>Md Arafat</first><last>Sultan</last></author>
+      <author id="md-arafat-sultan"><first>Md Arafat</first><last>Sultan</last></author>
       <author><first>Steven</first><last>Bethard</last></author>
       <author><first>Tamara</first><last>Sumner</last></author>
       <pages>148–153</pages>
@@ -699,7 +699,7 @@
     <paper id="28">
       <title><fixed-case>FCICU</fixed-case>: The Integration between Sense-Based Kernel and Surface-Based Methods to Measure Semantic Textual Similarity</title>
       <author><first>Basma</first><last>Hassan</last></author>
-      <author><first>Samir</first><last>AbdelRahman</last></author>
+      <author id="samir-abdelrahman"><first>Samir</first><last>AbdelRahman</last></author>
       <author><first>Reem</first><last>Bahgat</last></author>
       <pages>154–158</pages>
       <url hash="01101a6d">S15-2028</url>
@@ -710,8 +710,8 @@
       <title><fixed-case>AZMAT</fixed-case>: Sentence Similarity Using Associative Matrices</title>
       <author><first>Evan</first><last>Jaffe</last></author>
       <author><first>Lifeng</first><last>Jin</last></author>
-      <author><first>David</first><last>King</last></author>
-      <author><first>Marten</first><last>van Schijndel</last></author>
+      <author id="david-king"><first>David</first><last>King</last></author>
+      <author id="marten-van-schijndel"><first>Marten</first><last>van Schijndel</last></author>
       <pages>159–163</pages>
       <url hash="16094f3b">S15-2029</url>
       <doi>10.18653/v1/S15-2029</doi>
@@ -720,10 +720,10 @@
     <paper id="30">
       <title><fixed-case>N</fixed-case>e<fixed-case>R</fixed-case>o<fixed-case>S</fixed-case>im: A System for Measuring and Interpreting Semantic Textual Similarity</title>
       <author><first>Rajendra</first><last>Banjade</last></author>
-      <author><first>Nobal Bikram</first><last>Niraula</last></author>
+      <author id="nobal-bikram-niraula"><first>Nobal Bikram</first><last>Niraula</last></author>
       <author><first>Nabin</first><last>Maharjan</last></author>
       <author><first>Vasile</first><last>Rus</last></author>
-      <author><first>Dan</first><last>Stefanescu</last></author>
+      <author id="dan-stefanescu"><first>Dan</first><last>Stefanescu</last></author>
       <author><first>Mihai</first><last>Lintean</last></author>
       <author><first>Dipesh</first><last>Gautam</last></author>
       <pages>164–171</pages>
@@ -736,7 +736,7 @@
       <author><first>Lushan</first><last>Han</last></author>
       <author><first>Justin</first><last>Martineau</last></author>
       <author><first>Doreen</first><last>Cheng</last></author>
-      <author id="christopher-thomas"><first>Christopher</first><last>Thomas</last></author>
+      <author><first>Christopher</first><last>Thomas</last></author>
       <pages>172–177</pages>
       <url hash="f79ac3ea">S15-2031</url>
       <doi>10.18653/v1/S15-2031</doi>
@@ -744,11 +744,11 @@
     </paper>
     <paper id="32">
       <title><fixed-case>UBC</fixed-case>: Cubes for <fixed-case>E</fixed-case>nglish Semantic Textual Similarity and Supervised Approaches for Interpretable <fixed-case>STS</fixed-case></title>
-      <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>Aitor</first><last>Gonzalez-Agirre</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
+      <author id="aitor-gonzalez-agirre"><first>Aitor</first><last>Gonzalez-Agirre</last></author>
       <author><first>Iñigo</first><last>Lopez-Gazpio</last></author>
-      <author><first>Montse</first><last>Maritxalar</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="montse-maritxalar"><first>Montse</first><last>Maritxalar</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <author><first>Larraitz</first><last>Uria</last></author>
       <pages>178–183</pages>
       <url hash="e6443163">S15-2032</url>
@@ -759,7 +759,7 @@
       <title><fixed-case>ASAP</fixed-case>-<fixed-case>II</fixed-case>: From the Alignment of Phrases to Textual Similarity</title>
       <author><first>Ana</first><last>Alves</last></author>
       <author><first>David</first><last>Simões</last></author>
-      <author><first>Hugo Gonçalo</first><last>Oliveira</last></author>
+      <author id="hugo-goncalo-oliveira"><first>Hugo Gonçalo</first><last>Oliveira</last></author>
       <author><first>Adriana</first><last>Ferrugento</last></author>
       <pages>184–189</pages>
       <url hash="9e09cd41">S15-2033</url>
@@ -768,8 +768,8 @@
     </paper>
     <paper id="34">
       <title><fixed-case>TATO</fixed-case>: Leveraging on Multiple Strategies for Semantic Textual Similarity</title>
-      <author><first>Tu Thanh</first><last>Vu</last></author>
-      <author><first>Quan Hung</first><last>Tran</last></author>
+      <author id="tu-vu"><first>Tu Thanh</first><last>Vu</last></author>
+      <author id="quan-hung-tran"><first>Quan Hung</first><last>Tran</last></author>
       <author><first>Son Bao</first><last>Pham</last></author>
       <pages>190–195</pages>
       <url hash="87081623">S15-2034</url>
@@ -801,8 +801,8 @@
       <author><first>Giovanni</first><last>Da San Martino</last></author>
       <author><first>Alessandro</first><last>Moschitti</last></author>
       <author><first>Kareem</first><last>Darwish</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <author><first>Walid</first><last>Magdy</last></author>
       <pages>203–209</pages>
       <url hash="35ff7776">S15-2036</url>
@@ -823,10 +823,10 @@
     </paper>
     <paper id="38">
       <title><fixed-case>JAIST</fixed-case>: Combining multiple features for Answer Selection in Community Question Answering</title>
-      <author><first>Quan Hung</first><last>Tran</last></author>
-      <author><first>Vu Duc</first><last>Tran</last></author>
-      <author><first>Tu Thanh</first><last>Vu</last></author>
-      <author><first>Minh Le</first><last>Nguyen</last></author>
+      <author id="quan-hung-tran"><first>Quan Hung</first><last>Tran</last></author>
+      <author id="duc-vu-tran"><first>Vu Duc</first><last>Tran</last></author>
+      <author id="tu-vu"><first>Tu Thanh</first><last>Vu</last></author>
+      <author id="minh-le-nguyen"><first>Minh Le</first><last>Nguyen</last></author>
       <author><first>Son Bao</first><last>Pham</last></author>
       <pages>215–219</pages>
       <url hash="3fad65c4">S15-2038</url>
@@ -849,7 +849,7 @@
       <author><first>Reham</first><last>Mohamed</last></author>
       <author><first>Maha</first><last>Ragab</last></author>
       <author><first>Heba</first><last>Abdelnasser</last></author>
-      <author><first>Nagwa</first><last>M. El-Makky</last></author>
+      <author id="nagwa-m-el-makky"><first>Nagwa</first><last>M. El-Makky</last></author>
       <author><first>Marwan</first><last>Torki</last></author>
       <pages>226–230</pages>
       <url hash="0293b3a5">S15-2040</url>
@@ -869,8 +869,8 @@
     <paper id="42">
       <title><fixed-case>ECNU</fixed-case>: Using Multiple Sources of <fixed-case>CQA</fixed-case>-based Information for Answers Selection and <fixed-case>YES</fixed-case>/<fixed-case>NO</fixed-case> Response Inference</title>
       <author><first>Liang</first><last>Yi</last></author>
-      <author><first>JianXiang</first><last>Wang</last></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="jianxiang-wang"><first>JianXiang</first><last>Wang</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <pages>236–241</pages>
       <url hash="d281cd54">S15-2042</url>
       <doi>10.18653/v1/S15-2042</doi>
@@ -891,7 +891,7 @@
     </paper>
     <paper id="44">
       <title><fixed-case>C</fixed-case>o<fixed-case>M</fixed-case>i<fixed-case>C</fixed-case>: Adapting a Short Answer Assessment System for Answer Selection</title>
-      <author><first>Björn</first><last>Rudzewitz</last></author>
+      <author id="bjorn-rudzewitz"><first>Björn</first><last>Rudzewitz</last></author>
       <author><first>Ramon</first><last>Ziai</last></author>
       <pages>247–251</pages>
       <url hash="bc30d475">S15-2044</url>
@@ -900,19 +900,19 @@
     </paper>
     <paper id="45">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2015 Task 2: Semantic Textual Similarity, <fixed-case>E</fixed-case>nglish, <fixed-case>S</fixed-case>panish and Pilot on Interpretability</title>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <author><first>Carmen</first><last>Banea</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <author><first>Daniel</first><last>Cer</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
-      <author><first>Aitor</first><last>Gonzalez-Agirre</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
+      <author id="aitor-gonzalez-agirre"><first>Aitor</first><last>Gonzalez-Agirre</last></author>
       <author><first>Weiwei</first><last>Guo</last></author>
       <author><first>Iñigo</first><last>Lopez-Gazpio</last></author>
-      <author><first>Montse</first><last>Maritxalar</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="montse-maritxalar"><first>Montse</first><last>Maritxalar</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <author><first>Larraitz</first><last>Uria</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <pages>252–263</pages>
       <url hash="8f3c1ecb">S15-2045</url>
       <doi>10.18653/v1/S15-2045</doi>
@@ -920,7 +920,7 @@
     </paper>
     <paper id="46">
       <title><fixed-case>E</fixed-case>x<fixed-case>B</fixed-case> Themis: Extensive Feature Extraction from Word Alignments for Semantic Textual Similarity</title>
-      <author><first>Christian</first><last>Hänig</last></author>
+      <author id="christian-hanig"><first>Christian</first><last>Hänig</last></author>
       <author><first>Robert</first><last>Remus</last></author>
       <author><first>Xose</first><last>De La Puente</last></author>
       <pages>264–268</pages>
@@ -931,7 +931,7 @@
     <paper id="47">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2015 Task 3: Answer Selection in Community Question Answering</title>
       <author><first>Preslav</first><last>Nakov</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <author><first>Walid</first><last>Magdy</last></author>
       <author><first>Alessandro</first><last>Moschitti</last></author>
       <author><first>Jim</first><last>Glass</last></author>
@@ -945,8 +945,8 @@
       <title><fixed-case>V</fixed-case>ector<fixed-case>SLU</fixed-case>: A Continuous Word Vector Approach to Answer Selection in Community Question Answering Systems</title>
       <author><first>Yonatan</first><last>Belinkov</last></author>
       <author><first>Mitra</first><last>Mohtarami</last></author>
-      <author><first>Scott</first><last>Cyphers</last></author>
-      <author><first>James</first><last>Glass</last></author>
+      <author id="scott-cyphers"><first>Scott</first><last>Cyphers</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
       <pages>282–287</pages>
       <url hash="95e73f22">S15-2048</url>
       <doi>10.18653/v1/S15-2048</doi>
@@ -972,11 +972,11 @@
     </paper>
     <paper id="51">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2015 Task 14: Analysis of Clinical Text</title>
-      <author><first>Noémie</first><last>Elhadad</last></author>
-      <author><first>Sameer</first><last>Pradhan</last></author>
+      <author id="noemie-elhadad"><first>Noémie</first><last>Elhadad</last></author>
+      <author id="sameer-pradhan"><first>Sameer</first><last>Pradhan</last></author>
       <author><first>Sharon</first><last>Gorman</last></author>
       <author><first>Suresh</first><last>Manandhar</last></author>
-      <author><first>Wendy</first><last>Chapman</last></author>
+      <author id="wendy-chapman"><first>Wendy</first><last>Chapman</last></author>
       <author><first>Guergana</first><last>Savova</last></author>
       <pages>303–310</pages>
       <url hash="8f462c1c">S15-2051</url>
@@ -1002,7 +1002,7 @@
       <author><first>Vít</first><last>Baisa</last></author>
       <author><first>Jane</first><last>Bradbury</last></author>
       <author><first>Silvie</first><last>Cinková</last></author>
-      <author><first>Ismaïl</first><last>El Maarouf</last></author>
+      <author id="ismail-el-maarouf"><first>Ismaïl</first><last>El Maarouf</last></author>
       <author><first>Adam</first><last>Kilgarriff</last></author>
       <author><first>Octavian</first><last>Popescu</last></author>
       <pages>315–324</pages>
@@ -1042,7 +1042,7 @@
     <paper id="57">
       <title><fixed-case>EBL</fixed-case>-Hope: Multilingual Word Sense Disambiguation Using a Hybrid Knowledge-Based Technique</title>
       <author><first>Eniafe Festus</first><last>Ayetiran</last></author>
-      <author><first>Guido</first><last>Boella</last></author>
+      <author id="guido-boella"><first>Guido</first><last>Boella</last></author>
       <pages>340–344</pages>
       <url hash="5292db03">S15-2057</url>
       <doi>10.18653/v1/S15-2057</doi>
@@ -1051,7 +1051,7 @@
     <paper id="58">
       <title><fixed-case>VUA</fixed-case>-background : When to Use Background Information to Perform Word Sense Disambiguation</title>
       <author><first>Marten</first><last>Postma</last></author>
-      <author><first>Ruben</first><last>Izquierdo</last></author>
+      <author id="ruben-izquierdo"><first>Ruben</first><last>Izquierdo</last></author>
       <author><first>Piek</first><last>Vossen</last></author>
       <pages>345–349</pages>
       <url hash="8ea563ae">S15-2058</url>
@@ -1062,7 +1062,7 @@
       <title><fixed-case>T</fixed-case>eam<fixed-case>UFAL</fixed-case>: <fixed-case>WSD</fixed-case>+<fixed-case>EL</fixed-case> as Document Retrieval</title>
       <author><first>Petr</first><last>Fanta</last></author>
       <author><first>Roman</first><last>Sudarikov</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>350–354</pages>
       <url hash="d737e4c6">S15-2059</url>
       <doi>10.18653/v1/S15-2059</doi>
@@ -1070,7 +1070,7 @@
     </paper>
     <paper id="60">
       <title><fixed-case>EL</fixed-case>92: Entity Linking Combining Open Source Annotators via Weighted Voting</title>
-      <author><first>Pablo</first><last>Ruiz</last></author>
+      <author id="pablo-ruiz-fabo"><first>Pablo</first><last>Ruiz</last></author>
       <author><first>Thierry</first><last>Poibeau</last></author>
       <pages>355–359</pages>
       <url hash="71a0d941">S15-2060</url>
@@ -1081,7 +1081,7 @@
       <title><fixed-case>UNIBA</fixed-case>: Combining Distributional Semantic Models and Sense Distribution for Multilingual All-Words Sense Disambiguation and Entity Linking</title>
       <author><first>Pierpaolo</first><last>Basile</last></author>
       <author><first>Annalina</first><last>Caputo</last></author>
-      <author><first>Giovanni</first><last>Semeraro</last></author>
+      <author id="giovanni-semeraro"><first>Giovanni</first><last>Semeraro</last></author>
       <pages>360–364</pages>
       <url hash="45a2c8ea">S15-2061</url>
       <doi>10.18653/v1/S15-2061</doi>
@@ -1098,7 +1098,7 @@
     <paper id="63">
       <title><fixed-case>T</fixed-case>eam<fixed-case>HCMUS</fixed-case>: Analysis of Clinical Text</title>
       <author><first>Nghia</first><last>Huynh</last></author>
-      <author><first>Quoc</first><last>Ho</last></author>
+      <author id="bao-quoc-ho"><first>Quoc</first><last>Ho</last></author>
       <pages>370–374</pages>
       <url hash="e0c6a225">S15-2063</url>
       <doi>10.18653/v1/S15-2063</doi>
@@ -1124,7 +1124,7 @@
     <paper id="66">
       <title><fixed-case>UWM</fixed-case>: A Simple Baseline Method for Identifying Attributes of Disease and Disorder Mentions in Clinical Text</title>
       <author><first>Omid</first><last>Ghiasvand</last></author>
-      <author><first>Rohit</first><last>Kate</last></author>
+      <author id="rohit-kate"><first>Rohit</first><last>Kate</last></author>
       <pages>385–388</pages>
       <url hash="e9247f62">S15-2066</url>
       <doi>10.18653/v1/S15-2066</doi>
@@ -1165,7 +1165,7 @@
       <title><fixed-case>UL</fixed-case>isboa: Recognition and Normalization of Medical Concepts</title>
       <author><first>André</first><last>Leal</last></author>
       <author><first>Bruno</first><last>Martins</last></author>
-      <author><first>Francisco</first><last>Couto</last></author>
+      <author id="francisco-m-couto"><first>Francisco</first><last>Couto</last></author>
       <pages>406–411</pages>
       <url hash="aed3e0fa">S15-2070</url>
       <doi>10.18653/v1/S15-2070</doi>
@@ -1199,7 +1199,7 @@
       <title><fixed-case>B</fixed-case>ioinformatics<fixed-case>UA</fixed-case>: Machine Learning and Rule-Based Recognition of Disorders and Clinical Attributes from Patient Notes</title>
       <author><first>Sérgio</first><last>Matos</last></author>
       <author><first>José</first><last>Sequeira</last></author>
-      <author><first>José Luís</first><last>Oliveira</last></author>
+      <author id="jose-luis-oliveira"><first>José Luís</first><last>Oliveira</last></author>
       <pages>422–426</pages>
       <url hash="9243b1d3">S15-2073</url>
       <doi>10.18653/v1/S15-2073</doi>
@@ -1219,7 +1219,7 @@
     <paper id="75">
       <title><fixed-case>CMILLS</fixed-case>: Adapting Semantic Role Labeling Features to Dependency Parsing</title>
       <author><first>Chad</first><last>Mills</last></author>
-      <author><first>Gina-Anne</first><last>Levow</last></author>
+      <author id="gina-anne-levow"><first>Gina-Anne</first><last>Levow</last></author>
       <pages>433–437</pages>
       <url hash="5fbeb3a5">S15-2075</url>
       <doi>10.18653/v1/S15-2075</doi>
@@ -1248,7 +1248,7 @@
       <author><first>Sara</first><last>Rosenthal</last></author>
       <author><first>Preslav</first><last>Nakov</last></author>
       <author><first>Svetlana</first><last>Kiritchenko</last></author>
-      <author><first>Saif</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
       <author><first>Alan</first><last>Ritter</last></author>
       <author><first>Veselin</first><last>Stoyanov</last></author>
       <pages>451–463</pages>
@@ -1272,7 +1272,7 @@
       <author><first>Tony</first><last>Veale</last></author>
       <author><first>Paolo</first><last>Rosso</last></author>
       <author><first>Ekaterina</first><last>Shutova</last></author>
-      <author><first>John</first><last>Barnden</last></author>
+      <author id="john-barnden"><first>John</first><last>Barnden</last></author>
       <author><first>Antonio</first><last>Reyes</last></author>
       <pages>470–478</pages>
       <url hash="040fca80">S15-2080</url>
@@ -1281,7 +1281,7 @@
     </paper>
     <paper id="81">
       <title><fixed-case>CL</fixed-case>a<fixed-case>C</fixed-case>-<fixed-case>S</fixed-case>enti<fixed-case>P</fixed-case>ipe: <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val2015 Subtasks 10 <fixed-case>B</fixed-case>,<fixed-case>E</fixed-case>, and Task 11</title>
-      <author><first>Canberk</first><last>Özdemir</last></author>
+      <author id="canberk-ozdemir"><first>Canberk</first><last>Özdemir</last></author>
       <author><first>Sabine</first><last>Bergler</last></author>
       <pages>479–485</pages>
       <url hash="8b35d063">S15-2081</url>
@@ -1291,8 +1291,8 @@
     <paper id="82">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2015 Task 12: Aspect Based Sentiment Analysis</title>
       <author><first>Maria</first><last>Pontiki</last></author>
-      <author><first>Dimitris</first><last>Galanis</last></author>
-      <author><first>Haris</first><last>Papageorgiou</last></author>
+      <author id="dimitrios-galanis"><first>Dimitris</first><last>Galanis</last></author>
+      <author id="harris-papageorgiou"><first>Haris</first><last>Papageorgiou</last></author>
       <author><first>Suresh</first><last>Manandhar</last></author>
       <author><first>Ion</first><last>Androutsopoulos</last></author>
       <pages>486–495</pages>
@@ -1321,8 +1321,8 @@
       <title><fixed-case>DIEGOL</fixed-case>ab: An Approach for Message-level Sentiment Classification in <fixed-case>T</fixed-case>witter</title>
       <author><first>Abeed</first><last>Sarker</last></author>
       <author><first>Azadeh</first><last>Nikfarjam</last></author>
-      <author><first>Davy</first><last>Weissenbacher</last></author>
-      <author><first>Graciela</first><last>Gonzalez</last></author>
+      <author id="davy-weissenbacher"><first>Davy</first><last>Weissenbacher</last></author>
+      <author id="graciela-gonzalez"><first>Graciela</first><last>Gonzalez</last></author>
       <pages>510–514</pages>
       <url hash="07543b2f">S15-2085</url>
       <doi>10.18653/v1/S15-2085</doi>
@@ -1354,7 +1354,7 @@
       <title><fixed-case>CIS</fixed-case>-positive: A Combination of Convolutional Neural Networks and Support Vector Machines for Sentiment Analysis in <fixed-case>T</fixed-case>witter</title>
       <author><first>Sebastian</first><last>Ebert</last></author>
       <author><first>Ngoc Thang</first><last>Vu</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>527–532</pages>
       <url hash="1ebb6f13">S15-2088</url>
       <doi>10.18653/v1/S15-2088</doi>
@@ -1366,7 +1366,7 @@
       <author><first>Tamara</first><last>Álvarez-López</last></author>
       <author><first>Jonathan</first><last>Juncal-Martínez</last></author>
       <author><first>Enrique</first><last>Costa-Montenegro</last></author>
-      <author><first>Francisco Javier</first><last>González-Castaño</last></author>
+      <author id="francisco-javier-gonzalez-castano"><first>Francisco Javier</first><last>González-Castaño</last></author>
       <pages>533–538</pages>
       <url hash="3938e3ce">S15-2089</url>
       <doi>10.18653/v1/S15-2089</doi>
@@ -1395,9 +1395,9 @@
     </paper>
     <paper id="92">
       <title><fixed-case>R</fixed-case>ose<fixed-case>M</fixed-case>erry: A Baseline Message-level Sentiment Classification System</title>
-      <author><first>Huizhi</first><last>Liang</last></author>
+      <author id="huizhi-liang"><first>Huizhi</first><last>Liang</last></author>
       <author><first>Richard</first><last>Fothergill</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>551–555</pages>
       <url hash="a0b70fe0">S15-2092</url>
       <doi>10.18653/v1/S15-2092</doi>
@@ -1407,7 +1407,7 @@
       <title><fixed-case>UDLAP</fixed-case>: Sentiment Analysis Using a Graph-Based Representation</title>
       <author><first>Esteban</first><last>Castillo</last></author>
       <author><first>Ofelia</first><last>Cervantes</last></author>
-      <author><first>Darnes</first><last>Vilariño</last></author>
+      <author id="darnes-vilarino"><first>Darnes</first><last>Vilariño</last></author>
       <author><first>David</first><last>Báez</last></author>
       <author><first>Alfredo</first><last>Sánchez</last></author>
       <pages>556–560</pages>
@@ -1419,7 +1419,7 @@
       <title><fixed-case>ECNU</fixed-case>: Multi-level Sentiment Analysis on <fixed-case>T</fixed-case>witter Using Traditional Linguistic Features and Word Embedding Features</title>
       <author><first>Zhihua</first><last>Zhang</last></author>
       <author><first>Guoshun</first><last>Wu</last></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <pages>561–567</pages>
       <url hash="aa751e3d">S15-2094</url>
       <doi>10.18653/v1/S15-2094</doi>
@@ -1428,8 +1428,8 @@
     <paper id="95">
       <title><fixed-case>L</fixed-case>sislif: Feature Extraction and Label Weighting for Sentiment Analysis in <fixed-case>T</fixed-case>witter</title>
       <author><first>Hussam</first><last>Hamdan</last></author>
-      <author><first>Patrice</first><last>Bellot</last></author>
-      <author><first>Frederic</first><last>Bechet</last></author>
+      <author id="patrice-bellot"><first>Patrice</first><last>Bellot</last></author>
+      <author id="frederic-bechet"><first>Frederic</first><last>Bechet</last></author>
       <pages>568–573</pages>
       <url hash="59481a45">S15-2095</url>
       <doi>10.18653/v1/S15-2095</doi>
@@ -1439,7 +1439,7 @@
       <title><fixed-case>EL</fixed-case>i<fixed-case>RF</fixed-case>: A <fixed-case>SVM</fixed-case> Approach for <fixed-case>SA</fixed-case> tasks in <fixed-case>T</fixed-case>witter at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2015</title>
       <author><first>Mayte</first><last>Giménez</last></author>
       <author><first>Ferran</first><last>Pla</last></author>
-      <author><first>Lluís-F.</first><last>Hurtado</last></author>
+      <author id="lluis-f-hurtado"><first>Lluís-F.</first><last>Hurtado</last></author>
       <pages>574–581</pages>
       <url hash="ad521c19">S15-2096</url>
       <doi>10.18653/v1/S15-2096</doi>
@@ -1477,7 +1477,7 @@
     </paper>
     <paper id="100">
       <title><fixed-case>IITPS</fixed-case>em<fixed-case>E</fixed-case>val: Sentiment Discovery from 140 Characters</title>
-      <author><first>Ayush</first><last>Kumar</last></author>
+      <author id="ayush-kumar"><first>Ayush</first><last>Kumar</last></author>
       <author><first>Vamsi</first><last>Krishna</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
       <pages>601–607</pages>
@@ -1491,7 +1491,7 @@
       <author><first>Martin</first><last>Jaggi</last></author>
       <author><first>Dominic</first><last>Egger</last></author>
       <author><first>Pascal</first><last>Julmy</last></author>
-      <author><first>Leon</first><last>Derczynski</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
       <author><first>Mark</first><last>Cieliebak</last></author>
       <pages>608–612</pages>
       <url hash="4ad6f459">S15-2101</url>
@@ -1501,10 +1501,10 @@
     <paper id="102">
       <title><fixed-case>INESC</fixed-case>-<fixed-case>ID</fixed-case>: A Regression Model for Large Scale <fixed-case>T</fixed-case>witter Sentiment Lexicon Induction</title>
       <author><first>Silvio</first><last>Amir</last></author>
-      <author><first>Ramon</first><last>F. Astudillo</last></author>
+      <author id="ramon-fernandez-astudillo"><first>Ramon</first><last>F. Astudillo</last></author>
       <author><first>Wang</first><last>Ling</last></author>
       <author><first>Bruno</first><last>Martins</last></author>
-      <author><first>Mario J.</first><last>Silva</last></author>
+      <author id="mario-j-silva"><first>Mario J.</first><last>Silva</last></author>
       <author><first>Isabel</first><last>Trancoso</last></author>
       <pages>613–618</pages>
       <url hash="0dfac86e">S15-2102</url>
@@ -1574,11 +1574,11 @@
     </paper>
     <paper id="109">
       <title><fixed-case>INESC</fixed-case>-<fixed-case>ID</fixed-case>: Sentiment Analysis without Hand-Coded Features or Linguistic Resources using Embedding Subspaces</title>
-      <author><first>Ramon</first><last>F. Astudillo</last></author>
+      <author id="ramon-fernandez-astudillo"><first>Ramon</first><last>F. Astudillo</last></author>
       <author><first>Silvio</first><last>Amir</last></author>
       <author><first>Wang</first><last>Ling</last></author>
       <author><first>Bruno</first><last>Martins</last></author>
-      <author><first>Mario J.</first><last>Silva</last></author>
+      <author id="mario-j-silva"><first>Mario J.</first><last>Silva</last></author>
       <author><first>Isabel</first><last>Trancoso</last></author>
       <pages>652–656</pages>
       <url hash="cd7c3608">S15-2109</url>
@@ -1608,7 +1608,7 @@
       <author><first>Yuxiao</first><last>Zhang</last></author>
       <author><first>Gaoyan</first><last>Ou</last></author>
       <author><first>Tengjiao</first><last>Wang</last></author>
-      <author><first>Kam-fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-fai</first><last>Wong</last></author>
       <pages>664–668</pages>
       <url hash="44125f1e">S15-2111</url>
       <doi>10.18653/v1/S15-2111</doi>
@@ -1651,7 +1651,7 @@
       <title><fixed-case>LT</fixed-case>3: Sentiment Analysis of Figurative Tweets: piece of cake #<fixed-case>N</fixed-case>ot<fixed-case>R</fixed-case>eally</title>
       <author><first>Cynthia</first><last>Van Hee</last></author>
       <author><first>Els</first><last>Lefever</last></author>
-      <author><first>Véronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Véronique</first><last>Hoste</last></author>
       <pages>684–688</pages>
       <url hash="c0d4a376">S15-2115</url>
       <doi>10.18653/v1/S15-2115</doi>
@@ -1681,8 +1681,8 @@
     <paper id="118">
       <title><fixed-case>CPH</fixed-case>: Sentiment analysis of Figurative Language on <fixed-case>T</fixed-case>witter #easypeasy #not</title>
       <author><first>Sarah</first><last>McGillion</last></author>
-      <author><first>Héctor</first><last>Martínez Alonso</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="hector-martinez-alonso"><first>Héctor</first><last>Martínez Alonso</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>699–703</pages>
       <url hash="95abb7df">S15-2118</url>
       <doi>10.18653/v1/S15-2118</doi>
@@ -1712,7 +1712,7 @@
       <title><fixed-case>V</fixed-case>3: Unsupervised Aspect Based Sentiment Analysis for <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val2015 Task 12</title>
       <author><first>Aitor</first><last>García-Pablos</last></author>
       <author><first>Montse</first><last>Cuadros</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <pages>714–718</pages>
       <url hash="e413e627">S15-2121</url>
       <doi>10.18653/v1/S15-2121</doi>
@@ -1720,10 +1720,10 @@
     </paper>
     <paper id="122">
       <title><fixed-case>LT</fixed-case>3: Applying Hybrid Terminology Extraction to Aspect-Based Sentiment Analysis</title>
-      <author><first>Orphée</first><last>De Clercq</last></author>
+      <author id="orphee-de-clercq"><first>Orphée</first><last>De Clercq</last></author>
       <author><first>Marjan</first><last>Van de Kauter</last></author>
       <author><first>Els</first><last>Lefever</last></author>
-      <author><first>Véronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Véronique</first><last>Hoste</last></author>
       <pages>719–724</pages>
       <url hash="45523ef8">S15-2122</url>
       <doi>10.18653/v1/S15-2122</doi>
@@ -1740,10 +1740,10 @@
     </paper>
     <paper id="124">
       <title><fixed-case>SINAI</fixed-case>: Syntactic Approach for Aspect-Based Sentiment Analysis</title>
-      <author><first>Salud M.</first><last>Jiménez-Zafra</last></author>
-      <author><first>Eugenio</first><last>Martínez-Cámara</last></author>
-      <author><first>M. Teresa</first><last>Martín-Valdivia</last></author>
-      <author><first>L. Alfonso</first><last>Ureña-López</last></author>
+      <author id="salud-maria-jimenez-zafra"><first>Salud M.</first><last>Jiménez-Zafra</last></author>
+      <author id="eugenio-martinez-camara"><first>Eugenio</first><last>Martínez-Cámara</last></author>
+      <author id="m-teresa-martin-valdivia"><first>M. Teresa</first><last>Martín-Valdivia</last></author>
+      <author id="l-alfonso-urena-lopez"><first>L. Alfonso</first><last>Ureña-López</last></author>
       <pages>730–735</pages>
       <url hash="02a51296">S15-2124</url>
       <doi>10.18653/v1/S15-2124</doi>
@@ -1752,7 +1752,7 @@
     <paper id="125">
       <title><fixed-case>ECNU</fixed-case>: Extracting Effective Features from Multiple Sequential Sentences for Target-dependent Sentiment Analysis in Reviews</title>
       <author><first>Zhihua</first><last>Zhang</last></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <pages>736–741</pages>
       <url hash="1c343f76">S15-2125</url>
       <doi>10.18653/v1/S15-2125</doi>
@@ -1772,8 +1772,8 @@
     <paper id="127">
       <title><fixed-case>E</fixed-case>li<fixed-case>X</fixed-case>a: A Modular and Flexible <fixed-case>ABSA</fixed-case> Platform</title>
       <author><first>Iñaki</first><last>San Vicente</last></author>
-      <author><first>Xabier</first><last>Saralegi</last></author>
-      <author><first>Rodrigo</first><last>Agerri</last></author>
+      <author id="xabier-saralegi"><first>Xabier</first><last>Saralegi</last></author>
+      <author id="rodrigo-agerri"><first>Rodrigo</first><last>Agerri</last></author>
       <pages>748–752</pages>
       <url hash="d15581d0">S15-2127</url>
       <doi>10.18653/v1/S15-2127</doi>
@@ -1782,8 +1782,8 @@
     <paper id="128">
       <title><fixed-case>L</fixed-case>sislif: <fixed-case>CRF</fixed-case> and Logistic Regression for Opinion Target Extraction and Sentiment Polarity Analysis</title>
       <author><first>Hussam</first><last>Hamdan</last></author>
-      <author><first>Patrice</first><last>Bellot</last></author>
-      <author><first>Frederic</first><last>Bechet</last></author>
+      <author id="patrice-bellot"><first>Patrice</first><last>Bellot</last></author>
+      <author id="frederic-bechet"><first>Frederic</first><last>Bechet</last></author>
       <pages>753–758</pages>
       <url hash="c257e53c">S15-2128</url>
       <doi>10.18653/v1/S15-2128</doi>
@@ -1810,7 +1810,7 @@
     <paper id="131">
       <title><fixed-case>TJU</fixed-case>de<fixed-case>M</fixed-case>: A Combination Classifier for Aspect Category Detection and Sentiment Polarity Classification</title>
       <author><first>Zhifei</first><last>Zhang</last></author>
-      <author><first>Jian-Yun</first><last>Nie</last></author>
+      <author id="jian-yun-nie"><first>Jian-Yun</first><last>Nie</last></author>
       <author><first>Hongling</first><last>Wang</last></author>
       <pages>772–777</pages>
       <url hash="26d84d4f">S15-2131</url>
@@ -1820,13 +1820,13 @@
     <paper id="132">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2015 Task 4: <fixed-case>T</fixed-case>ime<fixed-case>L</fixed-case>ine: Cross-Document Event Ordering</title>
       <author><first>Anne-Lyse</first><last>Minard</last></author>
-      <author><first>Manuela</first><last>Speranza</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="manuela-speranza"><first>Manuela</first><last>Speranza</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <author><first>Itziar</first><last>Aldabe</last></author>
       <author><first>Marieke</first><last>van Erp</last></author>
-      <author><first>Bernardo</first><last>Magnini</last></author>
-      <author><first>German</first><last>Rigau</last></author>
-      <author><first>Rubén</first><last>Urizar</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
+      <author id="ruben-urizar"><first>Rubén</first><last>Urizar</last></author>
       <pages>778–786</pages>
       <url hash="fb1a27a6">S15-2132</url>
       <doi>10.18653/v1/S15-2132</doi>
@@ -1845,12 +1845,12 @@
     </paper>
     <paper id="134">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2015 Task 5: <fixed-case>QA</fixed-case> <fixed-case>T</fixed-case>emp<fixed-case>E</fixed-case>val - Evaluating Temporal Information Understanding with Question Answering</title>
-      <author><first>Hector</first><last>Llorens</last></author>
-      <author><first>Nathanael</first><last>Chambers</last></author>
+      <author id="hector-llorens"><first>Hector</first><last>Llorens</last></author>
+      <author id="nathanael-chambers"><first>Nathanael</first><last>Chambers</last></author>
       <author><first>Naushad</first><last>UzZaman</last></author>
       <author><first>Nasrin</first><last>Mostafazadeh</last></author>
-      <author><first>James</first><last>Allen</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-allen"><first>James</first><last>Allen</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <pages>792–800</pages>
       <url hash="de4758f9">S15-2134</url>
       <doi>10.18653/v1/S15-2134</doi>
@@ -1868,9 +1868,9 @@
     <paper id="136">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2015 Task 6: Clinical <fixed-case>T</fixed-case>emp<fixed-case>E</fixed-case>val</title>
       <author><first>Steven</first><last>Bethard</last></author>
-      <author><first>Leon</first><last>Derczynski</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
       <author><first>Guergana</first><last>Savova</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <author><first>Marc</first><last>Verhagen</last></author>
       <pages>806–814</pages>
       <url hash="71a7220a">S15-2136</url>
@@ -1880,10 +1880,10 @@
     <paper id="137">
       <title><fixed-case>B</fixed-case>lu<fixed-case>L</fixed-case>ab: Temporal Information Extraction for the 2015 Clinical <fixed-case>T</fixed-case>emp<fixed-case>E</fixed-case>val Challenge</title>
       <author><first>Sumithra</first><last>Velupillai</last></author>
-      <author><first>Danielle L</first><last>Mowery</last></author>
-      <author><first>Samir</first><last>Abdelrahman</last></author>
+      <author id="danielle-l-mowery"><first>Danielle L</first><last>Mowery</last></author>
+      <author id="samir-abdelrahman"><first>Samir</first><last>Abdelrahman</last></author>
       <author><first>Lee</first><last>Christensen</last></author>
-      <author><first>Wendy</first><last>Chapman</last></author>
+      <author id="wendy-chapman"><first>Wendy</first><last>Chapman</last></author>
       <pages>815–819</pages>
       <url hash="a4a3e98d">S15-2137</url>
       <doi>10.18653/v1/S15-2137</doi>
@@ -1891,8 +1891,8 @@
     </paper>
     <paper id="138">
       <title><fixed-case>GPLSIUA</fixed-case>: Combining Temporal Information and Topic Modeling for Cross-Document Event Ordering</title>
-      <author><first>Borja</first><last>Navarro</last></author>
-      <author><first>Estela</first><last>Saquete</last></author>
+      <author id="borja-navarro"><first>Borja</first><last>Navarro</last></author>
+      <author id="estela-saquete"><first>Estela</first><last>Saquete</last></author>
       <pages>820–824</pages>
       <url hash="10e7535f">S15-2138</url>
       <doi>10.18653/v1/S15-2138</doi>
@@ -1923,9 +1923,9 @@
     <paper id="141">
       <title><fixed-case>UFPRS</fixed-case>heffield: Contrasting Rule-based and Support Vector Machine Approaches to Time Expression Identification in Clinical <fixed-case>T</fixed-case>emp<fixed-case>E</fixed-case>val</title>
       <author><first>Hegler</first><last>Tissot</last></author>
-      <author><first>Genevieve</first><last>Gorrell</last></author>
+      <author id="genevieve-gorrell"><first>Genevieve</first><last>Gorrell</last></author>
       <author><first>Angus</first><last>Roberts</last></author>
-      <author><first>Leon</first><last>Derczynski</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
       <author><first>Marcos Didonet</first><last>Del Fabro</last></author>
       <pages>835–839</pages>
       <url hash="81934f01">S15-2141</url>
@@ -1955,9 +1955,9 @@
     <paper id="144">
       <title><fixed-case>AMBRA</fixed-case>: A Ranking Approach to Temporal Text Classification</title>
       <author><first>Marcos</first><last>Zampieri</last></author>
-      <author><first>Alina Maria</first><last>Ciobanu</last></author>
+      <author id="alina-maria-ciobanu"><first>Alina Maria</first><last>Ciobanu</last></author>
       <author><first>Vlad</first><last>Niculae</last></author>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <pages>851–855</pages>
       <url hash="a9ad2300">S15-2144</url>
       <doi>10.18653/v1/S15-2144</doi>
@@ -2002,9 +2002,9 @@
     </paper>
     <paper id="149">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2015 Task 8: <fixed-case>S</fixed-case>pace<fixed-case>E</fixed-case>val</title>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <author><first>Parisa</first><last>Kordjamshidi</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <author><first>Aaron</first><last>Levine</last></author>
       <author><first>Seth</first><last>Dworman</last></author>
       <author><first>Zachary</first><last>Yocum</last></author>
@@ -2046,11 +2046,11 @@
       <author><first>Stephan</first><last>Oepen</last></author>
       <author><first>Marco</first><last>Kuhlmann</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <author><first>Silvie</first><last>Cinková</last></author>
-      <author><first>Dan</first><last>Flickinger</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
-      <author><first>Zdeňka</first><last>Urešová</last></author>
+      <author id="dan-flickinger"><first>Dan</first><last>Flickinger</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
+      <author id="zdenka-uresova"><first>Zdeňka</first><last>Urešová</last></author>
       <pages>915–926</pages>
       <url hash="3f4aeaca">S15-2153</url>
       <doi>10.18653/v1/S15-2153</doi>
@@ -2061,7 +2061,7 @@
       <author><first>Yantao</first><last>Du</last></author>
       <author><first>Fan</first><last>Zhang</last></author>
       <author><first>Xun</first><last>Zhang</last></author>
-      <author><first>Weiwei</first><last>Sun</last></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last></author>
       <author><first>Xiaojun</first><last>Wan</last></author>
       <pages>927–931</pages>
       <url hash="b87f1aba">S15-2154</url>
@@ -2072,7 +2072,7 @@
       <title><fixed-case>USAAR</fixed-case>-<fixed-case>WLV</fixed-case>: Hypernym Generation with Deep Neural Nets</title>
       <author><first>Liling</first><last>Tan</last></author>
       <author><first>Rohit</first><last>Gupta</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>932–937</pages>
       <url hash="e5ebbfd7">S15-2155</url>
       <doi>10.18653/v1/S15-2155</doi>
@@ -2081,7 +2081,7 @@
     <paper id="156">
       <title><fixed-case>NTNU</fixed-case>: An Unsupervised Knowledge Approach for Taxonomy Extraction</title>
       <author><first>Bamfa</first><last>Ceesay</last></author>
-      <author><first>Wen</first><last>Juan Hou</last></author>
+      <author id="wen-juan-hou"><first>Wen</first><last>Juan Hou</last></author>
       <pages>938–943</pages>
       <url hash="45a1bf47">S15-2156</url>
       <doi>10.18653/v1/S15-2156</doi>
@@ -2097,7 +2097,7 @@
     </paper>
     <paper id="158">
       <title><fixed-case>TALN</fixed-case>-<fixed-case>UPF</fixed-case>: Taxonomy Learning Exploiting <fixed-case>CRF</fixed-case>-Based Hypernym Extraction on Encyclopedic Definitions</title>
-      <author><first>Luis</first><last>Espinosa-Anke</last></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa-Anke</last></author>
       <author><first>Horacio</first><last>Saggion</last></author>
       <author><first>Francesco</first><last>Ronzano</last></author>
       <pages>949–954</pages>
@@ -2109,7 +2109,7 @@
       <title><fixed-case>QASSIT</fixed-case>: A Pretopological Framework for the Automatic Construction of Lexical Taxonomies from Raw Texts</title>
       <author><first>Guillaume</first><last>Cleuziou</last></author>
       <author><first>Davide</first><last>Buscaldi</last></author>
-      <author><first>Gael</first><last>Dias</last></author>
+      <author id="gael-dias"><first>Gael</first><last>Dias</last></author>
       <author><first>Vincent</first><last>Levorato</last></author>
       <author><first>Christine</first><last>Largeron</last></author>
       <pages>955–959</pages>
@@ -2119,9 +2119,9 @@
     </paper>
     <paper id="160">
       <title><fixed-case>R</fixed-case>iga: from <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et to Semantic Frames with C6.0 Rules</title>
-      <author><first>Guntis</first><last>Barzdins</last></author>
-      <author><first>Peteris</first><last>Paikens</last></author>
-      <author><first>Didzis</first><last>Gosko</last></author>
+      <author id="guntis-barzdins"><first>Guntis</first><last>Barzdins</last></author>
+      <author id="peteris-paikens"><first>Peteris</first><last>Paikens</last></author>
+      <author id="didzis-gosko"><first>Didzis</first><last>Gosko</last></author>
       <pages>960–964</pages>
       <url hash="0663b829">S15-2160</url>
       <doi>10.18653/v1/S15-2160</doi>
@@ -2140,7 +2140,7 @@
     <paper id="162">
       <title><fixed-case>L</fixed-case>isbon: Evaluating <fixed-case>T</fixed-case>urbo<fixed-case>S</fixed-case>emantic<fixed-case>P</fixed-case>arser on Multiple Languages and Out-of-Domain Data</title>
       <author><first>Mariana S. C.</first><last>Almeida</last></author>
-      <author><first>André F. T.</first><last>Martins</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
       <pages>970–973</pages>
       <url hash="64f1378a">S15-2162</url>
       <doi>10.18653/v1/S15-2162</doi>
diff --git a/data/xml/S16.xml b/data/xml/S16.xml
index 3c992c1e81..5faf65f0f1 100644
--- a/data/xml/S16.xml
+++ b/data/xml/S16.xml
@@ -7,7 +7,7 @@
       <editor><first>Marine</first><last>Carpuat</last></editor>
       <editor><first>Daniel</first><last>Cer</last></editor>
       <editor><first>David</first><last>Jurgens</last></editor>
-      <editor><first>Preslav</first><last>Nakov</last></editor>
+      <editor id="preslav-nakov"><first>Preslav</first><last>Nakov</last></editor>
       <editor><first>Torsten</first><last>Zesch</last></editor>
       <doi>10.18653/v1/S16-1</doi>
       <publisher>Association for Computational Linguistics</publisher>
@@ -35,23 +35,23 @@
     <paper id="2">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 5: Aspect Based Sentiment Analysis</title>
       <author><first>Maria</first><last>Pontiki</last></author>
-      <author><first>Dimitris</first><last>Galanis</last></author>
-      <author><first>Haris</first><last>Papageorgiou</last></author>
+      <author id="dimitrios-galanis"><first>Dimitris</first><last>Galanis</last></author>
+      <author id="harris-papageorgiou"><first>Haris</first><last>Papageorgiou</last></author>
       <author><first>Ion</first><last>Androutsopoulos</last></author>
       <author><first>Suresh</first><last>Manandhar</last></author>
       <author><first>Mohammad</first><last>AL-Smadi</last></author>
       <author><first>Mahmoud</first><last>Al-Ayyoub</last></author>
       <author><first>Yanyan</first><last>Zhao</last></author>
       <author><first>Bing</first><last>Qin</last></author>
-      <author><first>Orphée</first><last>De Clercq</last></author>
-      <author><first>Véronique</first><last>Hoste</last></author>
+      <author id="orphee-de-clercq"><first>Orphée</first><last>De Clercq</last></author>
+      <author id="veronique-hoste"><first>Véronique</first><last>Hoste</last></author>
       <author><first>Marianna</first><last>Apidianaki</last></author>
       <author><first>Xavier</first><last>Tannier</last></author>
-      <author><first>Natalia</first><last>Loukachevitch</last></author>
+      <author id="natalia-loukachevitch"><first>Natalia</first><last>Loukachevitch</last></author>
       <author><first>Evgeniy</first><last>Kotelnikov</last></author>
-      <author><first>Nuria</first><last>Bel</last></author>
-      <author><first>Salud María</first><last>Jiménez-Zafra</last></author>
-      <author><first>Gülşen</first><last>Eryiğit</last></author>
+      <author id="nuria-bel"><first>Nuria</first><last>Bel</last></author>
+      <author id="salud-maria-jimenez-zafra"><first>Salud María</first><last>Jiménez-Zafra</last></author>
+      <author id="gulsen-eryigit"><first>Gülşen</first><last>Eryiğit</last></author>
       <pages>19–30</pages>
       <url hash="020df56a">S16-1002</url>
       <doi>10.18653/v1/S16-1002</doi>
@@ -59,7 +59,7 @@
     </paper>
     <paper id="3">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 6: Detecting Stance in Tweets</title>
-      <author><first>Saif</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
       <author><first>Svetlana</first><last>Kiritchenko</last></author>
       <author><first>Parinaz</first><last>Sobhani</last></author>
       <author><first>Xiaodan</first><last>Zhu</last></author>
@@ -72,7 +72,7 @@
     <paper id="4">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 7: Determining Sentiment Intensity of <fixed-case>E</fixed-case>nglish and <fixed-case>A</fixed-case>rabic Phrases</title>
       <author><first>Svetlana</first><last>Kiritchenko</last></author>
-      <author><first>Saif</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
       <author><first>Mohammad</first><last>Salameh</last></author>
       <pages>42–51</pages>
       <url hash="2097ee84">S16-1004</url>
@@ -103,7 +103,7 @@
       <title><fixed-case>S</fixed-case>te<fixed-case>M</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 4: Applying Active Learning to Improve Sentiment Classification</title>
       <author><first>Stefan</first><last>Räbiger</last></author>
       <author><first>Mishal</first><last>Kazmi</last></author>
-      <author><first>Yücel</first><last>Saygın</last></author>
+      <author id="yucel-saygin"><first>Yücel</first><last>Saygın</last></author>
       <author><first>Peter</first><last>Schüller</last></author>
       <author><first>Myra</first><last>Spiliopoulou</last></author>
       <pages>64–70</pages>
@@ -114,7 +114,7 @@
     <paper id="8">
       <title><fixed-case>I</fixed-case>2<fixed-case>RNTU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 4: Classifier Fusion for Polarity Classification in <fixed-case>T</fixed-case>witter</title>
       <author><first>Zhengchen</first><last>Zhang</last></author>
-      <author id="chen-zhang"><first>Chen</first><last>Zhang</last></author>
+      <author><first>Chen</first><last>Zhang</last></author>
       <author><first>Fuxiang</first><last>Wu</last></author>
       <author><first>Dong-Yan</first><last>Huang</last></author>
       <author><first>Weisi</first><last>Lin</last></author>
@@ -127,8 +127,8 @@
     <paper id="9">
       <title><fixed-case>L</fixed-case>y<fixed-case>S</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 4: Exploiting Neural Activation Values for <fixed-case>T</fixed-case>witter Sentiment Classification and Quantification</title>
       <author><first>David</first><last>Vilares</last></author>
-      <author><first>Yerai</first><last>Doval</last></author>
-      <author><first>Miguel A.</first><last>Alonso</last></author>
+      <author id="yerai-doval"><first>Yerai</first><last>Doval</last></author>
+      <author id="miguel-a-alonso"><first>Miguel A.</first><last>Alonso</last></author>
       <author><first>Carlos</first><last>Gómez-Rodríguez</last></author>
       <pages>79–84</pages>
       <url hash="20fbd433">S16-1009</url>
@@ -138,7 +138,7 @@
     <paper id="10">
       <title><fixed-case>T</fixed-case>wi<fixed-case>SE</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 4: <fixed-case>T</fixed-case>witter Sentiment Classification</title>
       <author><first>Georgios</first><last>Balikas</last></author>
-      <author><first>Massih-Reza</first><last>Amini</last></author>
+      <author id="massih-r-amini"><first>Massih-Reza</first><last>Amini</last></author>
       <pages>85–91</pages>
       <url hash="981f0210">S16-1010</url>
       <doi>10.18653/v1/S16-1010</doi>
@@ -174,9 +174,9 @@
     </paper>
     <paper id="14">
       <title><fixed-case>NTNUS</fixed-case>ent<fixed-case>E</fixed-case>val at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 4: Combining General Classifiers for Fast <fixed-case>T</fixed-case>witter Sentiment Analysis</title>
-      <author><first>Brage Ekroll</first><last>Jahren</last></author>
+      <author id="brage-ekroll-jahren"><first>Brage Ekroll</first><last>Jahren</last></author>
       <author><first>Valerij</first><last>Fredriksen</last></author>
-      <author><first>Björn</first><last>Gambäck</last></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gambäck</last></author>
       <author><first>Lars</first><last>Bungum</last></author>
       <pages>103–108</pages>
       <url hash="7990d58a">S16-1014</url>
@@ -187,7 +187,7 @@
       <title><fixed-case>UDLAP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 4: Sentiment Quantification Using a Graph Based Representation</title>
       <author><first>Esteban</first><last>Castillo</last></author>
       <author><first>Ofelia</first><last>Cervantes</last></author>
-      <author><first>Darnes</first><last>Vilariño</last></author>
+      <author id="darnes-vilarino"><first>Darnes</first><last>Vilariño</last></author>
       <author><first>David</first><last>Báez</last></author>
       <pages>109–114</pages>
       <url hash="f4de348c">S16-1015</url>
@@ -200,7 +200,7 @@
       <author><first>Tamara</first><last>Álvarez-López</last></author>
       <author><first>Milagros</first><last>Fernández-Gavilanes</last></author>
       <author><first>Enrique</first><last>Costa-Montenegro</last></author>
-      <author><first>Francisco Javier</first><last>González-Castaño</last></author>
+      <author id="francisco-javier-gonzalez-castano"><first>Francisco Javier</first><last>González-Castaño</last></author>
       <pages>115–119</pages>
       <url hash="228aa15c">S16-1016</url>
       <doi>10.18653/v1/S16-1016</doi>
@@ -245,10 +245,10 @@
     </paper>
     <paper id="21">
       <title><fixed-case>CICBUAP</fixed-case>nlp at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 4-A: Discovering <fixed-case>T</fixed-case>witter Polarity using Enhanced Embeddings</title>
-      <author><first>Helena</first><last>Gomez</last></author>
-      <author><first>Darnes</first><last>Vilariño</last></author>
+      <author id="helena-gomez"><first>Helena</first><last>Gomez</last></author>
+      <author id="darnes-vilarino"><first>Darnes</first><last>Vilariño</last></author>
       <author><first>Grigori</first><last>Sidorov</last></author>
-      <author><first>David</first><last>Pinto Avendaño</last></author>
+      <author id="david-pinto"><first>David</first><last>Pinto Avendaño</last></author>
       <pages>145–148</pages>
       <url hash="8cc36a01">S16-1021</url>
       <doi>10.18653/v1/S16-1021</doi>
@@ -273,8 +273,8 @@
       <author><first>Filippos</first><last>Kokkinos</last></author>
       <author><first>Elias</first><last>Iosif</last></author>
       <author><first>Nikolaos</first><last>Malandrakis</last></author>
-      <author><first>Haris</first><last>Papageorgiou</last></author>
-      <author><first>Shrikanth</first><last>Narayanan</last></author>
+      <author id="harris-papageorgiou"><first>Haris</first><last>Papageorgiou</last></author>
+      <author id="shrikanth-narayanan"><first>Shrikanth</first><last>Narayanan</last></author>
       <author><first>Alexandros</first><last>Potamianos</last></author>
       <pages>155–163</pages>
       <url hash="db5f57fe">S16-1023</url>
@@ -313,8 +313,8 @@
     <paper id="27">
       <title><fixed-case>UNIMELB</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Tasks 4<fixed-case>A</fixed-case> and 4<fixed-case>B</fixed-case>: An Ensemble of Neural Networks and a <fixed-case>W</fixed-case>ord2<fixed-case>V</fixed-case>ec Based Model for Sentiment Classification</title>
       <author><first>Steven</first><last>Xu</last></author>
-      <author><first>HuiZhi</first><last>Liang</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="huizhi-liang"><first>HuiZhi</first><last>Liang</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>183–189</pages>
       <url hash="87ffb2d9">S16-1027</url>
       <doi>10.18653/v1/S16-1027</doi>
@@ -331,7 +331,7 @@
     <paper id="29">
       <title><fixed-case>DSIC</fixed-case>-<fixed-case>ELIRF</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 4: Message Polarity Classification in <fixed-case>T</fixed-case>witter using a Support Vector Machine Approach</title>
       <author><first>Víctor</first><last>Martinez Morant</last></author>
-      <author><first>LLuís-F.</first><last>Hurtado</last></author>
+      <author id="lluis-f-hurtado"><first>LLuís-F.</first><last>Hurtado</last></author>
       <author><first>Ferran</first><last>Pla</last></author>
       <pages>198–201</pages>
       <url hash="326c2a13">S16-1029</url>
@@ -341,7 +341,7 @@
     <paper id="30">
       <title><fixed-case>SENSEI</fixed-case>-<fixed-case>LIF</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 4: Polarity embedding fusion for robust sentiment analysis</title>
       <author><first>Mickael</first><last>Rouvier</last></author>
-      <author><first>Benoit</first><last>Favre</last></author>
+      <author id="benoit-favre"><first>Benoit</first><last>Favre</last></author>
       <pages>202–208</pages>
       <url hash="b005f28b">S16-1030</url>
       <doi>10.18653/v1/S16-1030</doi>
@@ -350,7 +350,7 @@
     <paper id="31">
       <title><fixed-case>D</fixed-case>iego<fixed-case>L</fixed-case>ab16 at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 4: Sentiment Analysis in <fixed-case>T</fixed-case>witter using Centroids, Clusters, and Sentiment Lexicons</title>
       <author><first>Abeed</first><last>Sarker</last></author>
-      <author><first>Graciela</first><last>Gonzalez</last></author>
+      <author id="graciela-gonzalez"><first>Graciela</first><last>Gonzalez</last></author>
       <pages>209–214</pages>
       <url hash="f9a9b78a">S16-1031</url>
       <doi>10.18653/v1/S16-1031</doi>
@@ -360,7 +360,7 @@
       <title><fixed-case>VCU</fixed-case>-<fixed-case>TSA</fixed-case> at <fixed-case>S</fixed-case>emeval-2016 Task 4: Sentiment Analysis in <fixed-case>T</fixed-case>witter</title>
       <author><first>Gerard</first><last>Briones</last></author>
       <author><first>Kasun</first><last>Amarasinghe</last></author>
-      <author><first>Bridget</first><last>McInnes</last></author>
+      <author id="bridget-mcinnes"><first>Bridget</first><last>McInnes</last></author>
       <pages>215–219</pages>
       <url hash="1a719bfd">S16-1032</url>
       <doi>10.18653/v1/S16-1032</doi>
@@ -394,9 +394,9 @@
     <paper id="36">
       <title><fixed-case>INESC</fixed-case>-<fixed-case>ID</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 4-A: Reducing the Problem of Out-of-Embedding Words</title>
       <author><first>Silvio</first><last>Amir</last></author>
-      <author><first>Ramon</first><last>F. Astudillo</last></author>
+      <author id="ramon-fernandez-astudillo"><first>Ramon</first><last>F. Astudillo</last></author>
       <author><first>Wang</first><last>Ling</last></author>
-      <author><first>Mário J.</first><last>Silva</last></author>
+      <author id="mario-j-silva"><first>Mário J.</first><last>Silva</last></author>
       <author><first>Isabel</first><last>Trancoso</last></author>
       <pages>238–242</pages>
       <url hash="4797edab">S16-1036</url>
@@ -410,7 +410,7 @@
       <author><first>Eduard</first><last>Apostol</last></author>
       <author><first>Octavian</first><last>Ciobanu</last></author>
       <author><first>Adrian</first><last>Iftene</last></author>
-      <author><first>Diana</first><last>Trandabăţ</last></author>
+      <author id="diana-trandabat"><first>Diana</first><last>Trandabăţ</last></author>
       <pages>243–246</pages>
       <url hash="fa774887">S16-1037</url>
       <doi>10.18653/v1/S16-1037</doi>
@@ -422,7 +422,7 @@
       <author><first>Marius-Valentin</first><last>Hrişca</last></author>
       <author><first>Mihail</first><last>Gliga</last></author>
       <author><first>Diana</first><last>Darabană</last></author>
-      <author><first>Diana</first><last>Trandabăţ</last></author>
+      <author id="diana-trandabat"><first>Diana</first><last>Trandabăţ</last></author>
       <author><first>Adrian</first><last>Iftene</last></author>
       <pages>247–250</pages>
       <url hash="f6404561">S16-1038</url>
@@ -432,10 +432,10 @@
     <paper id="39">
       <title><fixed-case>YZU</fixed-case>-<fixed-case>NLP</fixed-case> Team at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 4: Ordinal Sentiment Classification Using a Recurrent Convolutional Network</title>
       <author><first>Yunchao</first><last>He</last></author>
-      <author><first>Liang-Chih</first><last>Yu</last></author>
+      <author id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last></author>
       <author><first>Chin-Sheng</first><last>Yang</last></author>
       <author><first>K. Robert</first><last>Lai</last></author>
-      <author><first>Weiyi</first><last>Liu</last></author>
+      <author id="weiyi-liu"><first>Weiyi</first><last>Liu</last></author>
       <pages>251–255</pages>
       <url hash="e92dcb74">S16-1039</url>
       <doi>10.18653/v1/S16-1039</doi>
@@ -445,7 +445,7 @@
       <title><fixed-case>ECNU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 4: An Empirical Investigation of Traditional <fixed-case>NLP</fixed-case> Features and Word Embedding Features for Sentence-level and Topic-level Sentiment Analysis in <fixed-case>T</fixed-case>witter</title>
       <author><first>Yunxiao</first><last>Zhou</last></author>
       <author><first>Zhihua</first><last>Zhang</last></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <pages>256–261</pages>
       <url hash="edbd0f63">S16-1040</url>
       <doi>10.18653/v1/S16-1040</doi>
@@ -453,7 +453,7 @@
     </paper>
     <paper id="41">
       <title><fixed-case>OPAL</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 4: the Challenge of Porting a Sentiment Analysis System to the “Real” World</title>
-      <author><first>Alexandra</first><last>Balahur</last></author>
+      <author id="alexandra-balahur"><first>Alexandra</first><last>Balahur</last></author>
       <pages>262–265</pages>
       <url hash="726b865b">S16-1041</url>
       <doi>10.18653/v1/S16-1041</doi>
@@ -472,7 +472,7 @@
     <paper id="43">
       <title><fixed-case>N</fixed-case>ile<fixed-case>TMRG</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 5: Deep Convolutional Neural Networks for Aspect Category and Sentiment Extraction</title>
       <author><first>Talaat</first><last>Khalil</last></author>
-      <author><first>Samhaa R.</first><last>El-Beltagy</last></author>
+      <author id="samhaa-r-el-beltagy"><first>Samhaa R.</first><last>El-Beltagy</last></author>
       <pages>271–276</pages>
       <url hash="f5aa97a6">S16-1043</url>
       <doi>10.18653/v1/S16-1043</doi>
@@ -531,7 +531,7 @@
       <author><first>Jonathan</first><last>Juncal-Martínez</last></author>
       <author><first>Milagros</first><last>Fernández-Gavilanes</last></author>
       <author><first>Enrique</first><last>Costa-Montenegro</last></author>
-      <author><first>Francisco Javier</first><last>González-Castaño</last></author>
+      <author id="francisco-javier-gonzalez-castano"><first>Francisco Javier</first><last>González-Castaño</last></author>
       <pages>306–311</pages>
       <url hash="170fbca0">S16-1049</url>
       <doi>10.18653/v1/S16-1049</doi>
@@ -552,7 +552,7 @@
     <paper id="51">
       <title><fixed-case>AKTSKI</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 5: Aspect Based Sentiment Analysis for Consumer Reviews</title>
       <author><first>Shubham</first><last>Pateria</last></author>
-      <author><first>Prafulla</first><last>Choubey</last></author>
+      <author id="prafulla-kumar-choubey"><first>Prafulla</first><last>Choubey</last></author>
       <pages>318–324</pages>
       <url hash="bc266c7b">S16-1051</url>
       <doi>10.18653/v1/S16-1051</doi>
@@ -582,7 +582,7 @@
       <author><first>Fatih Samet</first><last>Çetin</last></author>
       <author><first>Ezgi</first><last>Yıldırım</last></author>
       <author><first>Can</first><last>Özbey</last></author>
-      <author><first>Gülşen</first><last>Eryiğit</last></author>
+      <author id="gulsen-eryigit"><first>Gülşen</first><last>Eryiğit</last></author>
       <pages>337–341</pages>
       <url hash="a65ab615">S16-1054</url>
       <doi>10.18653/v1/S16-1054</doi>
@@ -620,7 +620,7 @@
       <title><fixed-case>ECNU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 5: Extracting Effective Features from Relevant Fragments in Sentence for Aspect-Based Sentiment Analysis in Reviews</title>
       <author><first>Mengxiao</first><last>Jiang</last></author>
       <author><first>Zhihua</first><last>Zhang</last></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <pages>361–366</pages>
       <url hash="3a95c6be">S16-1058</url>
       <doi>10.18653/v1/S16-1058</doi>
@@ -669,7 +669,7 @@
       <title><fixed-case>USFD</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 6: Any-Target Stance Detection on <fixed-case>T</fixed-case>witter with Autoencoders</title>
       <author><first>Isabelle</first><last>Augenstein</last></author>
       <author><first>Andreas</first><last>Vlachos</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
       <pages>389–393</pages>
       <url hash="ae56e1da">S16-1063</url>
       <doi>10.18653/v1/S16-1063</doi>
@@ -688,7 +688,7 @@
       <author><first>Andrew</first><last>Lamont</last></author>
       <author><first>Manan</first><last>Pancholi</last></author>
       <author><first>Kenneth</first><last>Steimel</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <pages>394–400</pages>
       <url hash="2859eb7a">S16-1064</url>
       <doi>10.18653/v1/S16-1064</doi>
@@ -699,7 +699,7 @@
       <author><first>Yuki</first><last>Igarashi</last></author>
       <author><first>Hiroya</first><last>Komatsu</last></author>
       <author><first>Sosuke</first><last>Kobayashi</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Kentaro</first><last>Inui</last></author>
       <pages>401–407</pages>
       <url hash="ca9db53b">S16-1065</url>
@@ -720,7 +720,7 @@
       <author><first>Prashanth</first><last>Vijayaraghavan</last></author>
       <author><first>Ivan</first><last>Sysoev</last></author>
       <author><first>Soroush</first><last>Vosoughi</last></author>
-      <author><first>Deb</first><last>Roy</last></author>
+      <author id="deb-roy"><first>Deb</first><last>Roy</last></author>
       <pages>413–419</pages>
       <url hash="d190ddd0">S16-1067</url>
       <doi>10.18653/v1/S16-1067</doi>
@@ -732,7 +732,7 @@
       <author><first>Brian</first><last>Ecker</last></author>
       <author><first>Theodore</first><last>Handleman</last></author>
       <author><first>Nicolas</first><last>Hahn</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <pages>420–427</pages>
       <url hash="918a4e2f">S16-1068</url>
       <doi>10.18653/v1/S16-1068</doi>
@@ -750,7 +750,7 @@
     <paper id="70">
       <title><fixed-case>CU</fixed-case>-<fixed-case>GWU</fixed-case> Perspective at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 6: Ideological Stance Detection in Informal Text</title>
       <author><first>Heba</first><last>Elfardy</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>434–439</pages>
       <url hash="2c85baac">S16-1070</url>
       <doi>10.18653/v1/S16-1070</doi>
@@ -760,7 +760,7 @@
       <title><fixed-case>JU</fixed-case>_<fixed-case>NLP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 6: Detecting Stance in Tweets using Support Vector Machines</title>
       <author><first>Braja Gopal</first><last>Patra</last></author>
       <author><first>Dipankar</first><last>Das</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>440–444</pages>
       <url hash="7b686f2a">S16-1071</url>
       <doi>10.18653/v1/S16-1071</doi>
@@ -771,7 +771,7 @@
       <author><first>Henrik</first><last>Bøhler</last></author>
       <author><first>Petter</first><last>Asla</last></author>
       <author><first>Erwin</first><last>Marsi</last></author>
-      <author><first>Rune</first><last>Sætre</last></author>
+      <author id="rune-saetre"><first>Rune</first><last>Sætre</last></author>
       <pages>445–450</pages>
       <url hash="739d7d19">S16-1072</url>
       <doi>10.18653/v1/S16-1072</doi>
@@ -780,7 +780,7 @@
     <paper id="73">
       <title><fixed-case>ECNU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val 2016 Task 6: Relevant or Not? Supportive or Not? A Two-step Learning System for Automatic Detecting Stance in Tweets</title>
       <author><first>Zhihua</first><last>Zhang</last></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <pages>451–457</pages>
       <url hash="e99cbe41">S16-1073</url>
       <doi>10.18653/v1/S16-1073</doi>
@@ -803,7 +803,7 @@
       <author><first>Ivan</first><last>Paljak</last></author>
       <author><first>Filip</first><last>Čulinović</last></author>
       <author><first>Filip</first><last>Boltužić</last></author>
-      <author><first>Vanja Mladen</first><last>Karan</last></author>
+      <author id="vanja-m-karan"><first>Vanja Mladen</first><last>Karan</last></author>
       <author><first>Domagoj</first><last>Alagić</last></author>
       <author><first>Jan</first><last>Šnajder</last></author>
       <pages>464–468</pages>
@@ -814,8 +814,8 @@
     <paper id="76">
       <title><fixed-case>LSIS</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 7: Using Web Search Engines for <fixed-case>E</fixed-case>nglish and <fixed-case>A</fixed-case>rabic Unsupervised Sentiment Intensity Prediction</title>
       <author><first>Amal</first><last>Htait</last></author>
-      <author><first>Sebastien</first><last>Fournier</last></author>
-      <author><first>Patrice</first><last>Bellot</last></author>
+      <author id="sebastien-fournier"><first>Sebastien</first><last>Fournier</last></author>
+      <author id="patrice-bellot"><first>Patrice</first><last>Bellot</last></author>
       <pages>469–473</pages>
       <url hash="5adb2e93">S16-1076</url>
       <doi>10.18653/v1/S16-1076</doi>
@@ -842,7 +842,7 @@
     </paper>
     <paper id="79">
       <title><fixed-case>N</fixed-case>ile<fixed-case>TMRG</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 7: Deriving Prior Polarities for <fixed-case>A</fixed-case>rabic Sentiment Terms</title>
-      <author><first>Samhaa R.</first><last>El-Beltagy</last></author>
+      <author id="samhaa-r-el-beltagy"><first>Samhaa R.</first><last>El-Beltagy</last></author>
       <pages>486–490</pages>
       <url hash="b162cc0c">S16-1079</url>
       <doi>10.18653/v1/S16-1079</doi>
@@ -852,7 +852,7 @@
       <title><fixed-case>ECNU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 7: An Enhanced Supervised Learning Method for Lexicon Sentiment Intensity Ranking</title>
       <author><first>Feixiang</first><last>Wang</last></author>
       <author><first>Zhihua</first><last>Zhang</last></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <pages>491–496</pages>
       <url hash="e72dbb8f">S16-1080</url>
       <doi>10.18653/v1/S16-1080</doi>
@@ -860,14 +860,14 @@
     </paper>
     <paper id="81">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 1: Semantic Textual Similarity, Monolingual and Cross-Lingual Evaluation</title>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <author><first>Carmen</first><last>Banea</last></author>
       <author><first>Daniel</first><last>Cer</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
-      <author><first>Aitor</first><last>Gonzalez-Agirre</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
-      <author><first>German</first><last>Rigau</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
+      <author id="aitor-gonzalez-agirre"><first>Aitor</first><last>Gonzalez-Agirre</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <pages>497–511</pages>
       <url hash="fad72759">S16-1081</url>
       <doi>10.18653/v1/S16-1081</doi>
@@ -875,11 +875,11 @@
     </paper>
     <paper id="82">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 2: Interpretable Semantic Textual Similarity</title>
-      <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>Aitor</first><last>Gonzalez-Agirre</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
+      <author id="aitor-gonzalez-agirre"><first>Aitor</first><last>Gonzalez-Agirre</last></author>
       <author><first>Iñigo</first><last>Lopez-Gazpio</last></author>
-      <author><first>Montse</first><last>Maritxalar</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="montse-maritxalar"><first>Montse</first><last>Maritxalar</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <author><first>Larraitz</first><last>Uria</last></author>
       <pages>512–524</pages>
       <url hash="d90b3d97">S16-1082</url>
@@ -889,7 +889,7 @@
     <paper id="83">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 3: Community Question Answering</title>
       <author><first>Preslav</first><last>Nakov</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <author><first>Alessandro</first><last>Moschitti</last></author>
       <author><first>Walid</first><last>Magdy</last></author>
       <author><first>Hamdy</first><last>Mubarak</last></author>
@@ -905,7 +905,7 @@
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 10: Detecting Minimal Semantic Units and their Meanings (<fixed-case>D</fixed-case>i<fixed-case>MSUM</fixed-case>)</title>
       <author><first>Nathan</first><last>Schneider</last></author>
       <author><first>Dirk</first><last>Hovy</last></author>
-      <author><first>Anders</first><last>Johannsen</last></author>
+      <author id="anders-johannsen"><first>Anders</first><last>Johannsen</last></author>
       <author><first>Marine</first><last>Carpuat</last></author>
       <pages>546–559</pages>
       <url hash="707d0db2">S16-1084</url>
@@ -914,7 +914,7 @@
     </paper>
     <paper id="85">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val 2016 Task 11: Complex Word Identification</title>
-      <author><first>Gustavo</first><last>Paetzold</last></author>
+      <author id="gustavo-paetzold"><first>Gustavo</first><last>Paetzold</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>560–569</pages>
       <url hash="b0fa42a5">S16-1085</url>
@@ -924,9 +924,9 @@
     <paper id="86">
       <title><fixed-case>FBK</fixed-case> <fixed-case>HLT</fixed-case>-<fixed-case>MT</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 1: Cross-lingual Semantic Similarity Measurement Using Quality Estimation Features and Compositional Bilingual Word Embeddings</title>
       <author><first>Duygu</first><last>Ataman</last></author>
-      <author><first>José G.</first><last>C. de Souza</last></author>
+      <author id="jose-g-c-de-souza"><first>José G.</first><last>C. de Souza</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <pages>570–576</pages>
       <url hash="b48fe82f">S16-1086</url>
       <doi>10.18653/v1/S16-1086</doi>
@@ -985,12 +985,12 @@
     <paper id="92">
       <title><fixed-case>USFD</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 1: Putting different State-of-the-Arts into a Box</title>
       <author><first>Ahmet</first><last>Aker</last></author>
-      <author><first>Frederic</first><last>Blain</last></author>
+      <author id="frederic-blain"><first>Frederic</first><last>Blain</last></author>
       <author><first>Andres</first><last>Duque</last></author>
       <author><first>Marina</first><last>Fomicheva</last></author>
-      <author><first>Jurica</first><last>Seva</last></author>
+      <author id="jurica-seva"><first>Jurica</first><last>Seva</last></author>
       <author><first>Kashif</first><last>Shah</last></author>
-      <author><first>Daniel</first><last>Beck</last></author>
+      <author id="daniel-beck"><first>Daniel</first><last>Beck</last></author>
       <pages>609–613</pages>
       <url hash="1a3b945d">S16-1092</url>
       <doi>10.18653/v1/S16-1092</doi>
@@ -1010,8 +1010,8 @@
     </paper>
     <paper id="94">
       <title><fixed-case>ECNU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 1: Leveraging Word Embedding From Macro and Micro Views to Boost Performance for Semantic Textual Similarity</title>
-      <author><first>Junfeng</first><last>Tian</last></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="junfeng-tian"><first>Junfeng</first><last>Tian</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <pages>621–627</pages>
       <url hash="062aff1c">S16-1094</url>
       <doi>10.18653/v1/S16-1094</doi>
@@ -1020,9 +1020,9 @@
     <paper id="95">
       <title><fixed-case>SAARSHEFF</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 1: Semantic Textual Similarity with Machine Translation Evaluation Metrics and (e<fixed-case>X</fixed-case>treme) Boosted Tree Ensembles</title>
       <author><first>Liling</first><last>Tan</last></author>
-      <author><first>Carolina</first><last>Scarton</last></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>628–633</pages>
       <url hash="0940c5c4">S16-1095</url>
       <doi>10.18653/v1/S16-1095</doi>
@@ -1030,12 +1030,12 @@
     </paper>
     <paper id="96">
       <title><fixed-case>WOLVESAAR</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 1: Replicating the Success of Monolingual Word Alignment and Neural Embeddings for Semantic Textual Similarity</title>
-      <author><first>Hannah</first><last>Bechara</last></author>
+      <author id="hannah-bechara"><first>Hannah</first><last>Bechara</last></author>
       <author><first>Rohit</first><last>Gupta</last></author>
       <author><first>Liling</first><last>Tan</last></author>
-      <author><first>Constantin</first><last>Orăsan</last></author>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orăsan</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>634–639</pages>
       <url hash="bc2aec7f">S16-1096</url>
       <doi>10.18653/v1/S16-1096</doi>
@@ -1065,7 +1065,7 @@
     </paper>
     <paper id="99">
       <title><fixed-case>DLS</fixed-case>@<fixed-case>CU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 1: Supervised Models of Sentence Similarity</title>
-      <author><first>Md Arafat</first><last>Sultan</last></author>
+      <author id="md-arafat-sultan"><first>Md Arafat</first><last>Sultan</last></author>
       <author><first>Steven</first><last>Bethard</last></author>
       <author><first>Tamara</first><last>Sumner</last></author>
       <pages>650–655</pages>
@@ -1075,7 +1075,7 @@
     </paper>
     <paper id="100">
       <title><fixed-case>DCU</fixed-case>-<fixed-case>SEM</fixed-case>aniacs at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 1: Synthetic Paragram Embeddings for Semantic Textual Similarity</title>
-      <author><first>Chris</first><last>Hokamp</last></author>
+      <author id="chris-hokamp"><first>Chris</first><last>Hokamp</last></author>
       <author><first>Piyush</first><last>Arora</last></author>
       <pages>656–662</pages>
       <url hash="dcb4b945">S16-1100</url>
@@ -1085,7 +1085,7 @@
     <paper id="101">
       <title><fixed-case>GWU</fixed-case> <fixed-case>NLP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Shared Task 1: Matrix Factorization for Crosslingual <fixed-case>STS</fixed-case></title>
       <author><first>Hanan</first><last>Aldarmaki</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>663–667</pages>
       <url hash="e9bfa53f">S16-1101</url>
       <doi>10.18653/v1/S16-1101</doi>
@@ -1094,7 +1094,7 @@
     <paper id="102">
       <title><fixed-case>CNRC</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 1: Experiments in Crosslingual Semantic Textual Similarity</title>
       <author><first>Chi-kiu</first><last>Lo</last></author>
-      <author><first>Cyril</first><last>Goutte</last></author>
+      <author id="cyril-goutte"><first>Cyril</first><last>Goutte</last></author>
       <author><first>Michel</first><last>Simard</last></author>
       <pages>668–673</pages>
       <url hash="f682c8d6">S16-1102</url>
@@ -1115,7 +1115,7 @@
       <title><fixed-case>U</fixed-case>o<fixed-case>B</fixed-case>-<fixed-case>UK</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 1: A Flexible and Extendable System for Semantic Text Similarity using Types, Surprise and Phrase Linking</title>
       <author><first>Harish</first><last>Tayyar Madabushi</last></author>
       <author><first>Mark</first><last>Buhagiar</last></author>
-      <author><first>Mark</first><last>Lee</last></author>
+      <author id="mark-lee"><first>Mark</first><last>Lee</last></author>
       <pages>680–685</pages>
       <url hash="990237bd">S16-1104</url>
       <doi>10.18653/v1/S16-1104</doi>
@@ -1124,7 +1124,7 @@
     <paper id="105">
       <title><fixed-case>BIT</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 1: Sentence Similarity Based on Alignments and Vector with the Weight of Information Content</title>
       <author><first>Hao</first><last>Wu</last></author>
-      <author><first>Heyan</first><last>Huang</last></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last></author>
       <author><first>Wenpeng</first><last>Lu</last></author>
       <pages>686–690</pages>
       <url hash="2b0a5b6d">S16-1105</url>
@@ -1153,7 +1153,7 @@
       <author><first>Sandip</first><last>Sarkar</last></author>
       <author><first>Dipankar</first><last>Das</last></author>
       <author><first>Partha</first><last>Pakray</last></author>
-      <author><first>Alexander</first><last>Gelbukh</last></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last></author>
       <pages>702–705</pages>
       <url hash="7c31d6d8">S16-1108</url>
       <doi>10.18653/v1/S16-1108</doi>
@@ -1161,8 +1161,8 @@
     </paper>
     <paper id="109">
       <title><fixed-case>A</fixed-case>mrita_<fixed-case>CEN</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 1: Semantic Relation from Word Embeddings in Higher Dimension</title>
-      <author><first>Barathi</first><last>Ganesh HB</last></author>
-      <author><first>Anand Kumar</first><last>M</last></author>
+      <author id="barathi-ganesh-h-b"><first>Barathi</first><last>Ganesh HB</last></author>
+      <author id="anand-kumar-m"><first>Anand Kumar</first><last>M</last></author>
       <author><first>Soman</first><last>KP</last></author>
       <pages>706–711</pages>
       <url hash="1c72f801">S16-1109</url>
@@ -1171,7 +1171,7 @@
     </paper>
     <paper id="110">
       <title><fixed-case>NUIG</fixed-case>-<fixed-case>UNLP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 1: Soft Alignment and Deep Learning for Semantic Textual Similarity</title>
-      <author><first>John Philip</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John Philip</first><last>McCrae</last></author>
       <author><first>Kartik</first><last>Asooja</last></author>
       <author><first>Nitish</first><last>Aggarwal</last></author>
       <author><first>Paul</first><last>Buitelaar</last></author>
@@ -1183,8 +1183,8 @@
     <paper id="111">
       <title><fixed-case>NORMAS</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 1: <fixed-case>SEMSIM</fixed-case>: A Multi-Feature Approach to Semantic Text Similarity</title>
       <author><first>Kolawole</first><last>Adebayo</last></author>
-      <author><first>Luigi</first><last>Di Caro</last></author>
-      <author><first>Guido</first><last>Boella</last></author>
+      <author id="luigi-di-caro"><first>Luigi</first><last>Di Caro</last></author>
+      <author id="guido-boella"><first>Guido</first><last>Boella</last></author>
       <pages>718–725</pages>
       <url hash="2497e80b">S16-1111</url>
       <doi>10.18653/v1/S16-1111</doi>
@@ -1193,9 +1193,9 @@
     <paper id="112">
       <title><fixed-case>LIPN</fixed-case>-<fixed-case>IIMAS</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 1: Random Forest Regression Experiments on Align-and-Differentiate and Word Embeddings penalizing strategies</title>
       <author><first>Oscar William</first><last>Lightgow Serrano</last></author>
-      <author><first>Ivan Vladimir</first><last>Meza Ruiz</last></author>
+      <author id="ivan-meza-ruiz"><first>Ivan Vladimir</first><last>Meza Ruiz</last></author>
       <author><first>Albert Manuel</first><last>Orozco Camacho</last></author>
-      <author><first>Jorge</first><last>Garcia Flores</last></author>
+      <author id="jorge-garcia-flores"><first>Jorge</first><last>Garcia Flores</last></author>
       <author><first>Davide</first><last>Buscaldi</last></author>
       <pages>726–731</pages>
       <url hash="3956f454">S16-1112</url>
@@ -1244,7 +1244,7 @@
     </paper>
     <paper id="117">
       <title><fixed-case>RTM</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 1: Predicting Semantic Similarity with Referential Translation Machines and Related Statistics</title>
-      <author><first>Ergun</first><last>Biçici</last></author>
+      <author id="ergun-bicici"><first>Ergun</first><last>Biçici</last></author>
       <pages>758–764</pages>
       <url hash="7ea05318">S16-1117</url>
       <doi>10.18653/v1/S16-1117</doi>
@@ -1255,7 +1255,7 @@
     <paper id="118">
       <title><fixed-case>D</fixed-case>al<fixed-case>GTM</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 1: Importance-Aware Compositional Approach to Short Text Similarity</title>
       <author><first>Jie</first><last>Mei</last></author>
-      <author><first>Aminul</first><last>Islam</last></author>
+      <author id="aminul-islam"><first>Aminul</first><last>Islam</last></author>
       <author><first>Evangelos</first><last>Milios</last></author>
       <pages>765–770</pages>
       <url hash="83b64717">S16-1118</url>
@@ -1265,8 +1265,8 @@
     <paper id="119">
       <title>i<fixed-case>UBC</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 2: <fixed-case>RNN</fixed-case>s and <fixed-case>LSTM</fixed-case>s for interpretable <fixed-case>STS</fixed-case></title>
       <author><first>Iñigo</first><last>Lopez-Gazpio</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>Montse</first><last>Maritxalar</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
+      <author id="montse-maritxalar"><first>Montse</first><last>Maritxalar</last></author>
       <pages>771–776</pages>
       <url hash="fdb5ee2c">S16-1119</url>
       <doi>10.18653/v1/S16-1119</doi>
@@ -1275,8 +1275,8 @@
     <paper id="120">
       <title>Rev at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 2: Aligning Chunks by Lexical, Part of Speech and Semantic Equivalence</title>
       <author><first>Ping</first><last>Tan</last></author>
-      <author><first>Karin</first><last>Verspoor</last></author>
-      <author><first>Timothy</first><last>Miller</last></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last></author>
+      <author id="timothy-miller"><first>Timothy</first><last>Miller</last></author>
       <pages>777–782</pages>
       <url hash="1d404781">S16-1120</url>
       <doi>10.18653/v1/S16-1120</doi>
@@ -1286,7 +1286,7 @@
       <title><fixed-case>FBK</fixed-case>-<fixed-case>HLT</fixed-case>-<fixed-case>NLP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 2: A Multitask, Deep Learning Approach for Interpretable Semantic Textual Similarity</title>
       <author><first>Simone</first><last>Magnolini</last></author>
       <author><first>Anna</first><last>Feltracco</last></author>
-      <author><first>Bernardo</first><last>Magnini</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
       <pages>783–789</pages>
       <url hash="1aacd8ef">S16-1121</url>
       <doi>10.18653/v1/S16-1121</doi>
@@ -1303,7 +1303,7 @@
     </paper>
     <paper id="123">
       <title><fixed-case>VENSESEVAL</fixed-case> at <fixed-case>S</fixed-case>emeval-2016 Task 2 i<fixed-case>STS</fixed-case> - with a full-fledged rule-based approach</title>
-      <author><first>Rodolfo</first><last>Delmonte</last></author>
+      <author id="rodolfo-delmonte"><first>Rodolfo</first><last>Delmonte</last></author>
       <pages>796–802</pages>
       <url hash="18f0a500">S16-1123</url>
       <doi>10.18653/v1/S16-1123</doi>
@@ -1324,7 +1324,7 @@
       <title><fixed-case>DTS</fixed-case>im at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 2: Interpreting Similarity of Texts Based on Automated Chunking, Chunk Alignment and Semantic Relation Prediction</title>
       <author><first>Rajendra</first><last>Banjade</last></author>
       <author><first>Nabin</first><last>Maharjan</last></author>
-      <author><first>Nobal Bikram</first><last>Niraula</last></author>
+      <author id="nobal-bikram-niraula"><first>Nobal Bikram</first><last>Niraula</last></author>
       <author><first>Vasile</first><last>Rus</last></author>
       <pages>809–813</pages>
       <url hash="302bd293">S16-1125</url>
@@ -1348,8 +1348,8 @@
       <author><first>Amr</first><last>Gomaa</last></author>
       <author><first>Ashraf</first><last>Mahgoub</last></author>
       <author><first>Hany</first><last>Ahmed</last></author>
-      <author><first>Mohsen</first><last>Rashwan</last></author>
-      <author><first>Hazem</first><last>Raafat</last></author>
+      <author id="mohsen-rashwan"><first>Mohsen</first><last>Rashwan</last></author>
+      <author id="hazem-raafat"><first>Hazem</first><last>Raafat</last></author>
       <author><first>Eslam</first><last>Kamal</last></author>
       <author><first>Ahmad</first><last>Al Sallab</last></author>
       <pages>822–827</pages>
@@ -1365,7 +1365,7 @@
       <author><first>Yu</first><last>Zhang</last></author>
       <author><first>Tao</first><last>Lei</last></author>
       <author><first>Kfir</first><last>Bar</last></author>
-      <author><first>Scott</first><last>Cyphers</last></author>
+      <author id="scott-cyphers"><first>Scott</first><last>Cyphers</last></author>
       <author><first>Jim</first><last>Glass</last></author>
       <pages>828–835</pages>
       <url hash="b10ce80e">S16-1128</url>
@@ -1404,12 +1404,12 @@
     </paper>
     <paper id="131">
       <title><fixed-case>U</fixed-case>ni<fixed-case>M</fixed-case>elb at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 3: Identifying Similar Questions by combining a <fixed-case>CNN</fixed-case> with String Similarity Measures</title>
-      <author><first>Timothy</first><last>Baldwin</last></author>
-      <author><first>Huizhi</first><last>Liang</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
+      <author id="huizhi-liang"><first>Huizhi</first><last>Liang</last></author>
       <author><first>Bahar</first><last>Salehi</last></author>
       <author><first>Doris</first><last>Hoogeveen</last></author>
       <author><first>Yitong</first><last>Li</last></author>
-      <author><first>Long</first><last>Duong</last></author>
+      <author id="long-duong"><first>Long</first><last>Duong</last></author>
       <pages>851–856</pages>
       <url hash="70ae96b0">S16-1131</url>
       <doi>10.18653/v1/S16-1131</doi>
@@ -1446,7 +1446,7 @@
     <paper id="135">
       <title><fixed-case>ECNU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 3: Exploring Traditional Method and Deep Learning Method for Question Retrieval and Answer Ranking in Community Question Answering</title>
       <author><first>Guoshun</first><last>Wu</last></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <pages>872–878</pages>
       <url hash="14e5fcc3">S16-1135</url>
       <doi>10.18653/v1/S16-1135</doi>
@@ -1463,9 +1463,9 @@
     </paper>
     <paper id="137">
       <title><fixed-case>MTE</fixed-case>-<fixed-case>NN</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 3: Can Machine Translation Evaluation Help Community Question Answering?</title>
-      <author><first>Francisco</first><last>Guzmán</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzmán</last></author>
       <author><first>Preslav</first><last>Nakov</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <pages>887–895</pages>
       <url hash="7ac27399">S16-1137</url>
       <doi>10.18653/v1/S16-1137</doi>
@@ -1476,7 +1476,7 @@
       <author><first>Alberto</first><last>Barrón-Cedeño</last></author>
       <author><first>Daniele</first><last>Bonadiman</last></author>
       <author><first>Giovanni</first><last>Da San Martino</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <author><first>Alessandro</first><last>Moschitti</last></author>
       <author><first>Fahad</first><last>Al Obaidli</last></author>
       <author><first>Salvatore</first><last>Romeo</last></author>
@@ -1497,7 +1497,7 @@
     </paper>
     <paper id="140">
       <title><fixed-case>UFRGS</fixed-case>&amp;<fixed-case>LIF</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 10: Rule-Based <fixed-case>MWE</fixed-case> Identification and Predominant-Supersense Tagging</title>
-      <author><first>Silvio</first><last>Cordeiro</last></author>
+      <author id="silvio-cordeiro"><first>Silvio</first><last>Cordeiro</last></author>
       <author><first>Carlos</first><last>Ramisch</last></author>
       <author><first>Aline</first><last>Villavicencio</last></author>
       <pages>910–917</pages>
@@ -1509,7 +1509,7 @@
       <title><fixed-case>WHUN</fixed-case>lp at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task <fixed-case>D</fixed-case>i<fixed-case>MSUM</fixed-case>: A Pilot Study in Detecting Minimal Semantic Units and their Meanings using Supervised Models</title>
       <author><first>Xin</first><last>Tang</last></author>
       <author><first>Fei</first><last>Li</last></author>
-      <author><first>Donghong</first><last>Ji</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
       <pages>918–924</pages>
       <url hash="4f53c658">S16-1141</url>
       <doi>10.18653/v1/S16-1141</doi>
@@ -1527,7 +1527,7 @@
     <paper id="143">
       <title><fixed-case>UW</fixed-case>-<fixed-case>CSE</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 10: Detecting Multiword Expressions and Supersenses using Double-Chained Conditional Random Fields</title>
       <author><first>Mohammad Javad</first><last>Hosseini</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <author><first>Su-In</first><last>Lee</last></author>
       <pages>931–936</pages>
       <url hash="07dd9fa9">S16-1143</url>
@@ -1549,7 +1549,7 @@
       <author><first>Andreas</first><last>Scherbakov</last></author>
       <author><first>Ekaterina</first><last>Vylomova</last></author>
       <author id="fei-liu-unimelb"><first>Fei</first><last>Liu</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>946–952</pages>
       <url hash="b7d3ce59">S16-1145</url>
       <doi>10.18653/v1/S16-1145</doi>
@@ -1565,7 +1565,7 @@
     </paper>
     <paper id="147">
       <title><fixed-case>USAAR</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 11: Complex Word Identification with Sense Entropy and Sentence Perplexity</title>
-      <author><first>José Manuel</first><last>Martínez Martínez</last></author>
+      <author id="jose-manuel-martinez"><first>José Manuel</first><last>Martínez Martínez</last></author>
       <author><first>Liling</first><last>Tan</last></author>
       <pages>958–962</pages>
       <url hash="b9aff86e">S16-1147</url>
@@ -1582,7 +1582,7 @@
     </paper>
     <paper id="149">
       <title><fixed-case>SV</fixed-case>000gg at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 11: Heavy Gauge Complex Word Identification with System Voting</title>
-      <author><first>Gustavo</first><last>Paetzold</last></author>
+      <author id="gustavo-paetzold"><first>Gustavo</first><last>Paetzold</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>969–974</pages>
       <url hash="923500be">S16-1149</url>
@@ -1592,8 +1592,8 @@
     <paper id="150">
       <title><fixed-case>M</fixed-case>elbourne at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val 2016 Task 11: Classifying Type-level Word Complexity using Random Forests with Corpus and Word List Features</title>
       <author><first>Julian</first><last>Brooke</last></author>
-      <author><first>Alexandra</first><last>Uitdenbogerd</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="alexandra-l-uitdenbogerd"><first>Alexandra</first><last>Uitdenbogerd</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>975–981</pages>
       <url hash="1f8e37de">S16-1150</url>
       <doi>10.18653/v1/S16-1150</doi>
@@ -1613,7 +1613,7 @@
       <author><first>Niloy</first><last>Mukherjee</last></author>
       <author><first>Braja Gopal</first><last>Patra</last></author>
       <author><first>Dipankar</first><last>Das</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>986–990</pages>
       <url hash="8479a9f5">S16-1152</url>
       <doi>10.18653/v1/S16-1152</doi>
@@ -1621,7 +1621,7 @@
     </paper>
     <paper id="153">
       <title><fixed-case>MAZA</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 11: Detecting Lexical Complexity Using a Decision Stump Meta-Classifier</title>
-      <author><first>Shervin</first><last>Malmasi</last></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></author>
       <author><first>Marcos</first><last>Zampieri</last></author>
       <pages>991–995</pages>
       <url hash="2ffb7a99">S16-1153</url>
@@ -1630,7 +1630,7 @@
     </paper>
     <paper id="154">
       <title><fixed-case>LTG</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 11: Complex Word Identification with Classifier Ensembles</title>
-      <author><first>Shervin</first><last>Malmasi</last></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></author>
       <author><first>Mark</first><last>Dras</last></author>
       <author><first>Marcos</first><last>Zampieri</last></author>
       <pages>996–1000</pages>
@@ -1642,7 +1642,7 @@
       <title><fixed-case>M</fixed-case>ac<fixed-case>S</fixed-case>aar at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 11: <fixed-case>Z</fixed-case>ipfian and Character Features for <fixed-case>C</fixed-case>omplex<fixed-case>W</fixed-case>ord Identification</title>
       <author><first>Marcos</first><last>Zampieri</last></author>
       <author><first>Liling</first><last>Tan</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>1001–1005</pages>
       <url hash="48ba896d">S16-1155</url>
       <doi>10.18653/v1/S16-1155</doi>
@@ -1650,7 +1650,7 @@
     </paper>
     <paper id="156">
       <title>Garuda &amp; <fixed-case>B</fixed-case>hasha at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 11: Complex Word Identification Using Aggregated Learning Models</title>
-      <author><first>Prafulla</first><last>Choubey</last></author>
+      <author id="prafulla-kumar-choubey"><first>Prafulla</first><last>Choubey</last></author>
       <author><first>Shubham</first><last>Pateria</last></author>
       <pages>1006–1010</pages>
       <url hash="e21413f5">S16-1156</url>
@@ -1660,8 +1660,8 @@
     <paper id="157">
       <title><fixed-case>TALN</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 11: Modelling Complex Words by Contextual, Lexical and Semantic Features</title>
       <author><first>Francesco</first><last>Ronzano</last></author>
-      <author><first>Ahmed</first><last>Abura’ed</last></author>
-      <author><first>Luis</first><last>Espinosa-Anke</last></author>
+      <author id="ahmed-aburaed"><first>Ahmed</first><last>Abura’ed</last></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa-Anke</last></author>
       <author><first>Horacio</first><last>Saggion</last></author>
       <pages>1011–1016</pages>
       <url hash="6d56cdd6">S16-1157</url>
@@ -1680,7 +1680,7 @@
     <paper id="159">
       <title><fixed-case>A</fixed-case>mrita<fixed-case>CEN</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 11: Complex Word Identification using Word Embedding</title>
       <author><first>Sanjay</first><last>S.P</last></author>
-      <author><first>Anand</first><last>Kumar M</last></author>
+      <author id="anand-kumar-m"><first>Anand</first><last>Kumar M</last></author>
       <author><first>Soman</first><last>K P</last></author>
       <pages>1022–1027</pages>
       <url hash="d18378f7">S16-1159</url>
@@ -1691,7 +1691,7 @@
       <title><fixed-case>C</fixed-case>oastal<fixed-case>CPH</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 11: The importance of designing your Neural Networks right</title>
       <author><first>Joachim</first><last>Bingel</last></author>
       <author><first>Natalie</first><last>Schluter</last></author>
-      <author><first>Héctor</first><last>Martínez Alonso</last></author>
+      <author id="hector-martinez-alonso"><first>Héctor</first><last>Martínez Alonso</last></author>
       <pages>1028–1033</pages>
       <url hash="90ba8409">S16-1160</url>
       <doi>10.18653/v1/S16-1160</doi>
@@ -1735,8 +1735,8 @@
       <author><first>Steven</first><last>Bethard</last></author>
       <author><first>Guergana</first><last>Savova</last></author>
       <author><first>Wei-Te</first><last>Chen</last></author>
-      <author><first>Leon</first><last>Derczynski</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <author><first>Marc</first><last>Verhagen</last></author>
       <pages>1052–1062</pages>
       <url hash="f40dedd1">S16-1165</url>
@@ -1809,7 +1809,7 @@
       <author><first>Simone</first><last>Filice</last></author>
       <author><first>Danilo</first><last>Croce</last></author>
       <author><first>Alessandro</first><last>Moschitti</last></author>
-      <author><first>Roberto</first><last>Basili</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
       <pages>1116–1123</pages>
       <url hash="fdb82900">S16-1172</url>
       <doi>10.18653/v1/S16-1172</doi>
@@ -1817,7 +1817,7 @@
     </paper>
     <paper id="173">
       <title><fixed-case>S</fixed-case>wiss<fixed-case>C</fixed-case>heese at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 4: Sentiment Classification Using an Ensemble of Convolutional Neural Networks with Distant Supervision</title>
-      <author><first>Jan</first><last>Deriu</last></author>
+      <author id="jan-milan-deriu"><first>Jan</first><last>Deriu</last></author>
       <author><first>Maurice</first><last>Gonzenbach</last></author>
       <author><first>Fatih</first><last>Uzdilli</last></author>
       <author><first>Aurelien</first><last>Lucchi</last></author>
@@ -1830,11 +1830,11 @@
     </paper>
     <paper id="174">
       <title><fixed-case>IIT</fixed-case>-<fixed-case>TUDA</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 5: Beyond Sentiment Lexicon: Combining Domain Dependency and Distributional Semantics Features for Aspect Based Sentiment Analysis</title>
-      <author><first>Ayush</first><last>Kumar</last></author>
+      <author id="ayush-kumar"><first>Ayush</first><last>Kumar</last></author>
       <author><first>Sarah</first><last>Kohail</last></author>
       <author><first>Amit</first><last>Kumar</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>1129–1135</pages>
       <url hash="9425afb4">S16-1174</url>
       <doi>10.18653/v1/S16-1174</doi>
@@ -1844,7 +1844,7 @@
       <title><fixed-case>LIMSI</fixed-case>-<fixed-case>COT</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 12: Temporal relation identification using a pipeline of classifiers</title>
       <author><first>Julien</first><last>Tourille</last></author>
       <author><first>Olivier</first><last>Ferret</last></author>
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <author><first>Xavier</first><last>Tannier</last></author>
       <pages>1136–1142</pages>
       <url hash="fb0c8f7d">S16-1175</url>
@@ -1853,8 +1853,8 @@
     </paper>
     <paper id="176">
       <title><fixed-case>RIGA</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 8: Impact of <fixed-case>S</fixed-case>match Extensions and Character-Level Neural Translation on <fixed-case>AMR</fixed-case> Parsing Accuracy</title>
-      <author><first>Guntis</first><last>Barzdins</last></author>
-      <author><first>Didzis</first><last>Gosko</last></author>
+      <author id="guntis-barzdins"><first>Guntis</first><last>Barzdins</last></author>
+      <author id="didzis-gosko"><first>Didzis</first><last>Gosko</last></author>
       <pages>1143–1147</pages>
       <url hash="1fca8859">S16-1176</url>
       <doi>10.18653/v1/S16-1176</doi>
@@ -1902,7 +1902,7 @@
     <paper id="181">
       <title><fixed-case>CAMR</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 8: An Extended Transition-based <fixed-case>AMR</fixed-case> Parser</title>
       <author><first>Chuan</first><last>Wang</last></author>
-      <author><first>Sameer</first><last>Pradhan</last></author>
+      <author id="sameer-pradhan"><first>Sameer</first><last>Pradhan</last></author>
       <author><first>Xiaoman</first><last>Pan</last></author>
       <author><first>Heng</first><last>Ji</last></author>
       <author><first>Nianwen</first><last>Xue</last></author>
@@ -1934,7 +1934,7 @@
       <title><fixed-case>CLIP</fixed-case>@<fixed-case>UMD</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 8: Parser for <fixed-case>A</fixed-case>bstract <fixed-case>M</fixed-case>eaning <fixed-case>R</fixed-case>epresentation using Learning to Search</title>
       <author><first>Sudha</first><last>Rao</last></author>
       <author><first>Yogarshi</first><last>Vyas</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <author><first>Philip</first><last>Resnik</last></author>
       <pages>1190–1196</pages>
       <url hash="00eef60c">S16-1184</url>
@@ -1944,7 +1944,7 @@
     <paper id="185">
       <title><fixed-case>CU</fixed-case>-<fixed-case>NLP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 8: <fixed-case>AMR</fixed-case> Parsing using <fixed-case>LSTM</fixed-case>-based Recurrent Neural Networks</title>
       <author><first>William</first><last>Foland</last></author>
-      <author><first>James H.</first><last>Martin</last></author>
+      <author id="james-h-martin"><first>James H.</first><last>Martin</last></author>
       <pages>1197–1201</pages>
       <url hash="d9344a0d">S16-1185</url>
       <doi>10.18653/v1/S16-1185</doi>
@@ -1953,9 +1953,9 @@
     <paper id="186">
       <title><fixed-case>CMU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 8: Graph-based <fixed-case>AMR</fixed-case> Parsing with Infinite Ramp Loss</title>
       <author><first>Jeffrey</first><last>Flanigan</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
       <pages>1202–1206</pages>
       <url hash="76875160">S16-1186</url>
       <doi>10.18653/v1/S16-1186</doi>
@@ -1994,7 +1994,7 @@
     <paper id="190">
       <title><fixed-case>LIMSI</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 12: machine-learning and temporal information to identify clinical events and time expressions</title>
       <author><first>Cyril</first><last>Grouin</last></author>
-      <author><first>Véronique</first><last>Moriceau</last></author>
+      <author id="veronique-moriceau"><first>Véronique</first><last>Moriceau</last></author>
       <pages>1225–1230</pages>
       <url hash="e2fcc54e">S16-1190</url>
       <doi>10.18653/v1/S16-1190</doi>
@@ -2041,7 +2041,7 @@
       <title><fixed-case>U</fixed-case>tah<fixed-case>BMI</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 12: Extracting Temporal Information from Clinical Text</title>
       <author><first>Abdulrahman</first><last>Khalifa</last></author>
       <author><first>Sumithra</first><last>Velupillai</last></author>
-      <author><first>Stephane</first><last>Meystre</last></author>
+      <author id="stephane-meystre"><first>Stephane</first><last>Meystre</last></author>
       <pages>1256–1262</pages>
       <url hash="3ac7c1ae">S16-1195</url>
       <doi>10.18653/v1/S16-1195</doi>
@@ -2050,12 +2050,12 @@
     <paper id="196">
       <title><fixed-case>ULISBOA</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 12: Extraction of temporal expressions, clinical events and relations using <fixed-case>IBE</fixed-case>nt</title>
       <author><first>Marcia</first><last>Barros</last></author>
-      <author><first>Andre</first><last>Lamurias</last></author>
+      <author id="andre-lamurias"><first>Andre</first><last>Lamurias</last></author>
       <author><first>Gonçalo</first><last>Figueiro</last></author>
       <author><first>Marta</first><last>Antunes</last></author>
       <author><first>Joana</first><last>Teixeira</last></author>
       <author><first>Alexandre</first><last>Pinheiro</last></author>
-      <author><first>Francisco M.</first><last>Couto</last></author>
+      <author id="francisco-m-couto"><first>Francisco M.</first><last>Couto</last></author>
       <pages>1263–1267</pages>
       <url hash="2c886f74">S16-1196</url>
       <doi>10.18653/v1/S16-1196</doi>
@@ -2081,7 +2081,7 @@
     <paper id="199">
       <title><fixed-case>KUL</fixed-case>euven-<fixed-case>LIIR</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val 2016 Task 12: Detecting Narrative Containment in Clinical Records</title>
       <author><first>Artuur</first><last>Leeuwenberg</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>1280–1285</pages>
       <url hash="4196b830">S16-1199</url>
       <doi>10.18653/v1/S16-1199</doi>
@@ -2093,7 +2093,7 @@
       <author><first>Damien</first><last>De Meyere</last></author>
       <author><first>Patrick</first><last>Watrin</last></author>
       <author><first>André</first><last>Bittar</last></author>
-      <author><first>Cédrick</first><last>Fairon</last></author>
+      <author id="cedrick-fairon"><first>Cédrick</first><last>Fairon</last></author>
       <pages>1286–1291</pages>
       <url hash="826c6159">S16-1200</url>
       <doi>10.18653/v1/S16-1200</doi>
@@ -2105,7 +2105,7 @@
       <author><first>Hua</first><last>Xu</last></author>
       <author><first>Jingqi</first><last>Wang</last></author>
       <author><first>Yaoyun</first><last>Zhang</last></author>
-      <author><first>Sungrim</first><last>Moon</last></author>
+      <author id="sungrim-moon"><first>Sungrim</first><last>Moon</last></author>
       <author><first>Jun</first><last>Xu</last></author>
       <author><first>Yonghui</first><last>Wu</last></author>
       <pages>1292–1297</pages>
@@ -2125,7 +2125,7 @@
       <title><fixed-case>USAAR</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 13: Hyponym Endocentricity</title>
       <author><first>Liling</first><last>Tan</last></author>
       <author><first>Francis</first><last>Bond</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>1303–1309</pages>
       <url hash="fa849893">S16-1203</url>
       <doi>10.18653/v1/S16-1203</doi>
@@ -2143,7 +2143,7 @@
     <paper id="205">
       <title><fixed-case>QASSIT</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 13: On the integration of Semantic Vectors in Pretopological Spaces for Lexical Taxonomy Acquisition</title>
       <author><first>Guillaume</first><last>Cleuziou</last></author>
-      <author><first>Jose G.</first><last>Moreno</last></author>
+      <author id="jose-g-moreno"><first>Jose G.</first><last>Moreno</last></author>
       <pages>1315–1319</pages>
       <url hash="35f98127">S16-1205</url>
       <doi>10.18653/v1/S16-1205</doi>
@@ -2156,9 +2156,9 @@
       <author><first>Eugen</first><last>Ruppert</last></author>
       <author><first>Steffen</first><last>Remus</last></author>
       <author><first>Hubert</first><last>Naets</last></author>
-      <author><first>Cédrick</first><last>Fairon</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="cedrick-fairon"><first>Cédrick</first><last>Fairon</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>1320–1327</pages>
       <url hash="d93dfea4">S16-1206</url>
       <doi>10.18653/v1/S16-1206</doi>
@@ -2174,7 +2174,7 @@
     </paper>
     <paper id="208">
       <title><fixed-case>TALN</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 14: Semantic Taxonomy Enrichment Via Sense-Based Embeddings</title>
-      <author><first>Luis</first><last>Espinosa-Anke</last></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa-Anke</last></author>
       <author><first>Francesco</first><last>Ronzano</last></author>
       <author><first>Horacio</first><last>Saggion</last></author>
       <pages>1332–1336</pages>
@@ -2184,8 +2184,8 @@
     </paper>
     <paper id="209">
       <title><fixed-case>MS</fixed-case>ejr<fixed-case>K</fixed-case>u at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 14: Taxonomy Enrichment by Evidence Ranking</title>
-      <author><first>Michael</first><last>Schlichtkrull</last></author>
-      <author><first>Héctor</first><last>Martínez Alonso</last></author>
+      <author id="michael-schlichtkrull"><first>Michael</first><last>Schlichtkrull</last></author>
+      <author id="hector-martinez-alonso"><first>Héctor</first><last>Martínez Alonso</last></author>
       <pages>1337–1341</pages>
       <url hash="243481dd">S16-1209</url>
       <doi>10.18653/v1/S16-1209</doi>
@@ -2193,7 +2193,7 @@
     </paper>
     <paper id="210">
       <title>Deftor at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 14: Taxonomy enrichment using definition vectors</title>
-      <author><first>Hristo</first><last>Tanev</last></author>
+      <author id="hristo-tanev"><first>Hristo</first><last>Tanev</last></author>
       <author><first>Agata</first><last>Rotondi</last></author>
       <pages>1342–1345</pages>
       <url hash="b4c9a7bc">S16-1210</url>
@@ -2211,7 +2211,7 @@
     </paper>
     <paper id="212">
       <title><fixed-case>VCU</fixed-case> at <fixed-case>S</fixed-case>emeval-2016 Task 14: Evaluating definitional-based similarity measure for semantic taxonomy enrichment</title>
-      <author><first>Bridget</first><last>McInnes</last></author>
+      <author id="bridget-mcinnes"><first>Bridget</first><last>McInnes</last></author>
       <pages>1351–1355</pages>
       <url hash="b78cf8af">S16-1212</url>
       <doi>10.18653/v1/S16-1212</doi>
@@ -2223,7 +2223,7 @@
       <booktitle>Proceedings of the Fifth Joint Conference on Lexical and Computational Semantics</booktitle>
       <url hash="569ac29f">S16-2</url>
       <editor><first>Claire</first><last>Gardent</last></editor>
-      <editor><first>Raffaella</first><last>Bernardi</last></editor>
+      <editor id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last></editor>
       <editor><first>Ivan</first><last>Titov</last></editor>
       <doi>10.18653/v1/S16-2</doi>
       <publisher>Association for Computational Linguistics</publisher>
@@ -2257,9 +2257,9 @@
     </paper>
     <paper id="3">
       <title>Metaphor as a Medium for Emotion: An Empirical Study</title>
-      <author><first>Saif</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
       <author><first>Ekaterina</first><last>Shutova</last></author>
-      <author><first>Peter</first><last>Turney</last></author>
+      <author id="peter-turney"><first>Peter</first><last>Turney</last></author>
       <pages>23–33</pages>
       <url hash="a1071a88">S16-2003</url>
       <doi>10.18653/v1/S16-2003</doi>
@@ -2268,7 +2268,7 @@
     <paper id="4">
       <title>High-Fidelity Lexical Axiom Construction from Verb Glosses</title>
       <author><first>Gene</first><last>Kim</last></author>
-      <author><first>Lenhart</first><last>Schubert</last></author>
+      <author id="lenhart-schubert"><first>Lenhart</first><last>Schubert</last></author>
       <pages>34–44</pages>
       <url hash="a2cb2c5e">S16-2004</url>
       <doi>10.18653/v1/S16-2004</doi>
@@ -2324,9 +2324,9 @@
     <paper id="10">
       <title>Improving Zero-Shot-Learning for <fixed-case>G</fixed-case>erman Particle Verbs by using Training-Space Restrictions and Local Scaling</title>
       <author><first>Maximilian</first><last>Köper</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <author><first>Max</first><last>Kisselew</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <pages>91–96</pages>
       <url hash="84bf31d5">S16-2010</url>
       <doi>10.18653/v1/S16-2010</doi>
@@ -2345,9 +2345,9 @@
     </paper>
     <paper id="12">
       <title>Leveraging <fixed-case>V</fixed-case>erb<fixed-case>N</fixed-case>et to build Corpus-Specific Verb Clusters</title>
-      <author><first>Daniel</first><last>Peterson</last></author>
+      <author id="daniel-peterson"><first>Daniel</first><last>Peterson</last></author>
       <author><first>Jordan</first><last>Boyd-Graber</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Daisuke</first><last>Kawahara</last></author>
       <pages>102–107</pages>
       <url hash="38d63f74">S16-2012</url>
@@ -2376,7 +2376,7 @@
       <title>Linguistic Style Accommodation in Disagreements</title>
       <author><first>Elise</first><last>van der Pol</last></author>
       <author><first>Sharon</first><last>Gieske</last></author>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <pages>120–124</pages>
       <url hash="904241dc">S16-2015</url>
       <doi>10.18653/v1/S16-2015</doi>
@@ -2386,7 +2386,7 @@
       <title>Unsupervised Text Segmentation Using Semantic Relatedness Graphs</title>
       <author><first>Goran</first><last>Glavaš</last></author>
       <author><first>Federico</first><last>Nanni</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <pages>125–130</pages>
       <url hash="c5fa4e30">S16-2016</url>
       <doi>10.18653/v1/S16-2016</doi>
@@ -2398,7 +2398,7 @@
       <author><first>Gilles</first><last>Jacobs</last></author>
       <author><first>Vincent</first><last>Vandeghinste</last></author>
       <author><first>Ineke</first><last>Schuurman</last></author>
-      <author><first>Frank</first><last>Van Eynde</last></author>
+      <author id="frank-van-eynde"><first>Frank</first><last>Van Eynde</last></author>
       <pages>131–135</pages>
       <url hash="72e7c549">S16-2017</url>
       <doi>10.18653/v1/S16-2017</doi>
@@ -2426,7 +2426,7 @@
     </paper>
     <paper id="20">
       <title>The Role of Modifier and Head Properties in Predicting the Compositionality of <fixed-case>E</fixed-case>nglish and <fixed-case>G</fixed-case>erman Noun-Noun Compounds: A Vector-Space Perspective</title>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <author><first>Anna</first><last>Hätty</last></author>
       <author><first>Stefan</first><last>Bott</last></author>
       <pages>148–158</pages>
@@ -2437,7 +2437,7 @@
     <paper id="21">
       <title>Detecting Stance in Tweets And Analyzing its Interaction with Sentiment</title>
       <author><first>Parinaz</first><last>Sobhani</last></author>
-      <author><first>Saif</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
       <author><first>Svetlana</first><last>Kiritchenko</last></author>
       <pages>159–169</pages>
       <url hash="65bf997d">S16-2021</url>
@@ -2457,8 +2457,8 @@
     </paper>
     <paper id="23">
       <title>You and me... in a vector space: modelling individual speakers with distributional semantics</title>
-      <author><first>Aurélie</first><last>Herbelot</last></author>
-      <author><first>Behrang</first><last>QasemiZadeh</last></author>
+      <author id="aurelie-herbelot"><first>Aurélie</first><last>Herbelot</last></author>
+      <author id="behrang-qasemizadeh"><first>Behrang</first><last>QasemiZadeh</last></author>
       <pages>179–188</pages>
       <url hash="9fe4f62a">S16-2023</url>
       <doi>10.18653/v1/S16-2023</doi>
@@ -2466,7 +2466,7 @@
     </paper>
     <paper id="24">
       <title>Random Positive-Only Projections: <fixed-case>PPMI</fixed-case>-Enabled Incremental Semantic Space Construction</title>
-      <author><first>Behrang</first><last>QasemiZadeh</last></author>
+      <author id="behrang-qasemizadeh"><first>Behrang</first><last>QasemiZadeh</last></author>
       <author><first>Laura</first><last>Kallmeyer</last></author>
       <pages>189–198</pages>
       <url hash="a5c9602c">S16-2024</url>
@@ -2475,8 +2475,8 @@
     </paper>
     <paper id="25">
       <title>A Compositional-Distributional Semantic Model for Searching Complex Entity Categories</title>
-      <author><first>Juliano Efson</first><last>Sales</last></author>
-      <author><first>André</first><last>Freitas</last></author>
+      <author id="juliano-efson-sales"><first>Juliano Efson</first><last>Sales</last></author>
+      <author id="andre-freitas"><first>André</first><last>Freitas</last></author>
       <author><first>Brian</first><last>Davis</last></author>
       <author><first>Siegfried</first><last>Handschuh</last></author>
       <pages>199–208</pages>
@@ -2488,7 +2488,7 @@
       <title>Approximating Givenness in Content Assessment through Distributional Semantics</title>
       <author><first>Ramon</first><last>Ziai</last></author>
       <author><first>Kordula</first><last>De Kuthy</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <pages>209–218</pages>
       <url hash="453579cd">S16-2026</url>
       <doi>10.18653/v1/S16-2026</doi>
diff --git a/data/xml/S17.xml b/data/xml/S17.xml
index b9ab9e2c06..0e3b6d0bb5 100644
--- a/data/xml/S17.xml
+++ b/data/xml/S17.xml
@@ -4,9 +4,9 @@
     <meta>
       <booktitle>Proceedings of the 6th Joint Conference on Lexical and Computational Semantics (*<fixed-case>SEM</fixed-case> 2017)</booktitle>
       <url hash="ab2df2f7">S17-1</url>
-      <editor><first>Nancy</first><last>Ide</last></editor>
-      <editor><first>Aurélie</first><last>Herbelot</last></editor>
-      <editor><first>Lluís</first><last>Màrquez</last></editor>
+      <editor id="nancy-ide"><first>Nancy</first><last>Ide</last></editor>
+      <editor id="aurelie-herbelot"><first>Aurélie</first><last>Herbelot</last></editor>
+      <editor id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></editor>
       <doi>10.18653/v1/S17-1</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Vancouver, Canada</address>
@@ -20,9 +20,9 @@
     </frontmatter>
     <paper id="1">
       <title>What Analogies Reveal about Word Vectors and their Compositionality</title>
-      <author><first>Gregory</first><last>Finley</last></author>
+      <author id="gregory-finley"><first>Gregory</first><last>Finley</last></author>
       <author><first>Stephanie</first><last>Farmer</last></author>
-      <author><first>Serguei</first><last>Pakhomov</last></author>
+      <author id="serguei-pakhomov"><first>Serguei</first><last>Pakhomov</last></author>
       <pages>1–11</pages>
       <url hash="8007b00b">S17-1001</url>
       <doi>10.18653/v1/S17-1001</doi>
@@ -75,7 +75,7 @@
     <paper id="6">
       <title>Deep Learning Models For Multiword Expression Identification</title>
       <author><first>Waseem</first><last>Gharbieh</last></author>
-      <author><first>Virendrakumar</first><last>Bhavsar</last></author>
+      <author id="virendrakumar-bhavsar"><first>Virendrakumar</first><last>Bhavsar</last></author>
       <author><first>Paul</first><last>Cook</last></author>
       <pages>54–64</pages>
       <url hash="676391ca">S17-1006</url>
@@ -85,7 +85,7 @@
     </paper>
     <paper id="7">
       <title>Emotion Intensities in Tweets</title>
-      <author><first>Saif</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
       <author><first>Felipe</first><last>Bravo-Marquez</last></author>
       <pages>65–77</pages>
       <url hash="2b4c4649">S17-1007</url>
@@ -143,8 +143,8 @@
     <paper id="12">
       <title>Distributed Prediction of Relations for Entities: The Easy, The Difficult, and The Impossible</title>
       <author><first>Abhijeet</first><last>Gupta</last></author>
-      <author><first>Gemma</first><last>Boleda</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="gemma-boleda"><first>Gemma</first><last>Boleda</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <pages>104–109</pages>
       <url hash="50efb725">S17-1012</url>
       <doi>10.18653/v1/S17-1012</doi>
@@ -156,7 +156,7 @@
       <author><first>Angel</first><last>Maredia</last></author>
       <author><first>Kara</first><last>Schechtman</last></author>
       <author><first>Sarah Ita</first><last>Levitan</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
       <pages>110–114</pages>
       <url hash="69cccfb3">S17-1013</url>
       <doi>10.18653/v1/S17-1013</doi>
@@ -167,7 +167,7 @@
       <title>Does Free Word Order Hurt? Assessing the Practical Lexical Function Model for <fixed-case>C</fixed-case>roatian</title>
       <author><first>Zoran</first><last>Medić</last></author>
       <author><first>Jan</first><last>Šnajder</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <pages>115–120</pages>
       <url hash="3e24b3e2">S17-1014</url>
       <attachment type="poster" hash="b783686a">S17-1014.Poster.pdf</attachment>
@@ -298,7 +298,7 @@
     <paper id="25">
       <title>Embedded Semantic Lexicon Induction with Joint Global and Local Optimization</title>
       <author><first>Sujay Kumar</first><last>Jauhar</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>209–219</pages>
       <url hash="b4cf09bc">S17-1025</url>
       <doi>10.18653/v1/S17-1025</doi>
@@ -321,7 +321,7 @@
       <title>Classifying Semantic Clause Types: Modeling Context and Genre Characteristics with Recurrent Neural Networks and Attention</title>
       <author><first>Maria</first><last>Becker</last></author>
       <author><first>Michael</first><last>Staniek</last></author>
-      <author><first>Vivi</first><last>Nastase</last></author>
+      <author id="vivi-nastase"><first>Vivi</first><last>Nastase</last></author>
       <author><first>Alexis</first><last>Palmer</last></author>
       <author><first>Anette</first><last>Frank</last></author>
       <pages>230–240</pages>
@@ -332,8 +332,8 @@
     </paper>
     <paper id="28">
       <title>Predictive Linguistic Features of Schizophrenia</title>
-      <author><first>Efsun</first><last>Sarioglu Kayi</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="efsun-sarioglu-kayi"><first>Efsun</first><last>Sarioglu Kayi</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <author><first>Luca</first><last>Pauselli</last></author>
       <author><first>Michael</first><last>Compton</last></author>
       <author><first>Glen</first><last>Coppersmith</last></author>
@@ -346,7 +346,7 @@
     <paper id="29">
       <title>Learning to Solve Geometry Problems from Natural Language Demonstrations in Textbooks</title>
       <author><first>Mrinmaya</first><last>Sachan</last></author>
-      <author><first>Eric</first><last>Xing</last></author>
+      <author id="eric-xing"><first>Eric</first><last>Xing</last></author>
       <pages>251–261</pages>
       <url hash="cc51c956">S17-1029</url>
       <doi>10.18653/v1/S17-1029</doi>
@@ -355,11 +355,11 @@
     </paper>
     <paper id="30">
       <title>Ways of Asking and Replying in Duplicate Question Detection</title>
-      <author><first>João</first><last>António Rodrigues</last></author>
+      <author id="joao-rodrigues"><first>João</first><last>António Rodrigues</last></author>
       <author><first>Chakaveh</first><last>Saedi</last></author>
       <author><first>Vladislav</first><last>Maraev</last></author>
-      <author><first>João</first><last>Silva</last></author>
-      <author><first>António</first><last>Branco</last></author>
+      <author id="joao-silva"><first>João</first><last>Silva</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
       <pages>262–270</pages>
       <url hash="ff31f4dd">S17-1030</url>
       <doi>10.18653/v1/S17-1030</doi>
@@ -374,7 +374,7 @@
       <editor><first>Steven</first><last>Bethard</last></editor>
       <editor><first>Marine</first><last>Carpuat</last></editor>
       <editor><first>Marianna</first><last>Apidianaki</last></editor>
-      <editor><first>Saif M.</first><last>Mohammad</last></editor>
+      <editor id="saif-mohammad"><first>Saif M.</first><last>Mohammad</last></editor>
       <editor><first>Daniel</first><last>Cer</last></editor>
       <editor><first>David</first><last>Jurgens</last></editor>
       <doi>10.18653/v1/S17-2</doi>
@@ -391,8 +391,8 @@
     <paper id="1">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 1: Semantic Textual Similarity Multilingual and Crosslingual Focused Evaluation</title>
       <author><first>Daniel</first><last>Cer</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <author><first>Iñigo</first><last>Lopez-Gazpio</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>1–14</pages>
@@ -409,7 +409,7 @@
     </paper>
     <paper id="2">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 2: Multilingual and Cross-lingual Semantic Word Similarity</title>
-      <author><first>Jose</first><last>Camacho-Collados</last></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></author>
       <author><first>Mohammad Taher</first><last>Pilehvar</last></author>
       <author><first>Nigel</first><last>Collier</last></author>
       <author><first>Roberto</first><last>Navigli</last></author>
@@ -421,13 +421,13 @@
     </paper>
     <paper id="3">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 3: Community Question Answering</title>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Doris</first><last>Hoogeveen</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <author><first>Alessandro</first><last>Moschitti</last></author>
       <author><first>Hamdy</first><last>Mubarak</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
-      <author><first>Karin</first><last>Verspoor</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last></author>
       <pages>27–48</pages>
       <url hash="e72e2e5e">S17-2003</url>
       <doi>10.18653/v1/S17-2003</doi>
@@ -448,7 +448,7 @@
     <paper id="5">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 7: Detection and Interpretation of <fixed-case>E</fixed-case>nglish Puns</title>
       <author><first>Tristan</first><last>Miller</last></author>
-      <author><first>Christian</first><last>Hempelmann</last></author>
+      <author id="christian-f-hempelmann"><first>Christian</first><last>Hempelmann</last></author>
       <author><first>Iryna</first><last>Gurevych</last></author>
       <pages>58–68</pages>
       <url hash="0f60999a">S17-2005</url>
@@ -458,8 +458,8 @@
     </paper>
     <paper id="6">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 8: <fixed-case>R</fixed-case>umour<fixed-case>E</fixed-case>val: Determining rumour veracity and support for rumours</title>
-      <author><first>Leon</first><last>Derczynski</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
       <author><first>Maria</first><last>Liakata</last></author>
       <author><first>Rob</first><last>Procter</last></author>
       <author><first>Geraldine</first><last>Wong Sak Hoi</last></author>
@@ -473,7 +473,7 @@
     <paper id="7">
       <title><fixed-case>BIT</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 1: Using Semantic Information Space to Evaluate Semantic Textual Similarity</title>
       <author><first>Hao</first><last>Wu</last></author>
-      <author><first>Heyan</first><last>Huang</last></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last></author>
       <author><first>Ping</first><last>Jian</last></author>
       <author><first>Yuhang</first><last>Guo</last></author>
       <author><first>Chao</first><last>Su</last></author>
@@ -498,12 +498,12 @@
     <paper id="9">
       <title><fixed-case>IIT</fixed-case>-<fixed-case>UHH</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 3: Exploring Multiple Features for Community Question Answering and Implicit Dialogue Identification</title>
       <author><first>Titas</first><last>Nandi</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <author><first>Seid Muhie</first><last>Yimam</last></author>
-      <author><first>Deepak</first><last>Gupta</last></author>
+      <author id="deepak-gupta"><first>Deepak</first><last>Gupta</last></author>
       <author><first>Sarah</first><last>Kohail</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>90–97</pages>
       <url hash="e5637e75">S17-2009</url>
       <doi>10.18653/v1/S17-2009</doi>
@@ -536,7 +536,7 @@
     <paper id="12">
       <title><fixed-case>C</fixed-case>ompi<fixed-case>LIG</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 1: Cross-Language Plagiarism Detection Methods for Semantic Textual Similarity</title>
       <author><first>Jérémy</first><last>Ferrero</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <author><first>Didier</first><last>Schwab</last></author>
       <author><first>Frédéric</first><last>Agnès</last></author>
       <pages>109–114</pages>
@@ -574,7 +574,7 @@
     <paper id="15">
       <title><fixed-case>FCICU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 1: Sense-Based Language Independent Semantic Textual Similarity Approach</title>
       <author><first>Basma</first><last>Hassan</last></author>
-      <author><first>Samir</first><last>AbdelRahman</last></author>
+      <author id="samir-abdelrahman"><first>Samir</first><last>AbdelRahman</last></author>
       <author><first>Reem</first><last>Bahgat</last></author>
       <author><first>Ibrahim</first><last>Farag</last></author>
       <pages>125–129</pages>
@@ -651,9 +651,9 @@
     <paper id="22">
       <title><fixed-case>ITNLP</fixed-case>-<fixed-case>A</fixed-case>i<fixed-case>KF</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 1: Rich Features Based <fixed-case>SVR</fixed-case> for Semantic Textual Similarity Computing</title>
       <author><first>Wenjie</first><last>Liu</last></author>
-      <author><first>Chengjie</first><last>Sun</last></author>
+      <author id="cheng-jie-sun"><first>Chengjie</first><last>Sun</last></author>
       <author><first>Lei</first><last>Lin</last></author>
-      <author><first>Bingquan</first><last>Liu</last></author>
+      <author id="bingquan-liu"><first>Bingquan</first><last>Liu</last></author>
       <pages>159–163</pages>
       <url hash="e6b1561b">S17-2022</url>
       <doi>10.18653/v1/S17-2022</doi>
@@ -684,7 +684,7 @@
       <title><fixed-case>STS</fixed-case>-<fixed-case>UHH</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 1: Scoring Semantic Textual Similarity Using Supervised and Unsupervised Ensemble</title>
       <author><first>Sarah</first><last>Kohail</last></author>
       <author><first>Amr Rekaby</first><last>Salama</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>175–179</pages>
       <url hash="a69fa5ee">S17-2025</url>
       <doi>10.18653/v1/S17-2025</doi>
@@ -703,8 +703,8 @@
     </paper>
     <paper id="27">
       <title><fixed-case>MITRE</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 1: Simple Semantic Similarity</title>
-      <author><first>John</first><last>Henderson</last></author>
-      <author><first>Elizabeth</first><last>Merkhofer</last></author>
+      <author id="john-henderson"><first>John</first><last>Henderson</last></author>
+      <author id="elizabeth-merkhofer"><first>Elizabeth</first><last>Merkhofer</last></author>
       <author><first>Laura</first><last>Strickhart</last></author>
       <author><first>Guido</first><last>Zarrella</last></author>
       <pages>185–190</pages>
@@ -715,9 +715,9 @@
     </paper>
     <paper id="28">
       <title><fixed-case>ECNU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 1: Leverage Kernel-based Traditional <fixed-case>NLP</fixed-case> features and Neural Networks to Build a Universal Model for Multilingual and Cross-lingual Semantic Textual Similarity</title>
-      <author><first>Junfeng</first><last>Tian</last></author>
+      <author id="junfeng-tian"><first>Junfeng</first><last>Tian</last></author>
       <author><first>Zhiheng</first><last>Zhou</last></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <author><first>Yuanbin</first><last>Wu</last></author>
       <pages>191–197</pages>
       <url hash="3f2d4dc5">S17-2028</url>
@@ -731,9 +731,9 @@
       <author><first>Mahak</first><last>Goindani</last></author>
       <author><first>Chang</first><last>Li</last></author>
       <author><first>Di</first><last>Jin</last></author>
-      <author><first>Kristen Marie</first><last>Johnson</last></author>
+      <author id="kristen-johnson"><first>Kristen Marie</first><last>Johnson</last></author>
       <author><first>Xiao</first><last>Zhang</last></author>
-      <author><first>Maria Leonor</first><last>Pacheco</last></author>
+      <author id="maria-leonor-pacheco"><first>Maria Leonor</first><last>Pacheco</last></author>
       <author><first>Dan</first><last>Goldwasser</last></author>
       <pages>198–202</pages>
       <url hash="5238c1f0">S17-2029</url>
@@ -743,7 +743,7 @@
     </paper>
     <paper id="30">
       <title><fixed-case>RTM</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 1: Referential Translation Machines for Predicting Semantic Similarity</title>
-      <author><first>Ergun</first><last>Biçici</last></author>
+      <author id="ergun-bicici"><first>Ergun</first><last>Biçici</last></author>
       <pages>203–207</pages>
       <url hash="0d9940d5">S17-2030</url>
       <doi>10.18653/v1/S17-2030</doi>
@@ -753,7 +753,7 @@
     <paper id="31">
       <title><fixed-case>LIPN</fixed-case>-<fixed-case>IIMAS</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 1: Subword Embeddings, Attention Recurrent Neural Networks and Cross Word Alignment for Semantic Textual Similarity</title>
       <author><first>Ignacio</first><last>Arroyo-Fernández</last></author>
-      <author><first>Ivan Vladimir</first><last>Meza Ruiz</last></author>
+      <author id="ivan-meza-ruiz"><first>Ivan Vladimir</first><last>Meza Ruiz</last></author>
       <pages>208–212</pages>
       <url hash="311b7a5f">S17-2031</url>
       <doi>10.18653/v1/S17-2031</doi>
@@ -764,7 +764,7 @@
       <title><fixed-case>L</fixed-case>2<fixed-case>F</fixed-case>/<fixed-case>INESC</fixed-case>-<fixed-case>ID</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Tasks 1 and 2: Lexical and semantic features in word and textual similarity</title>
       <author><first>Pedro</first><last>Fialho</last></author>
       <author><first>Hugo</first><last>Patinho Rodrigues</last></author>
-      <author><first>Luísa</first><last>Coheur</last></author>
+      <author id="luisa-coheur"><first>Luísa</first><last>Coheur</last></author>
       <author><first>Paulo</first><last>Quaresma</last></author>
       <pages>213–219</pages>
       <url hash="f72880b9">S17-2032</url>
@@ -810,7 +810,7 @@
       <author><first>Yuteng</first><last>Zhang</last></author>
       <author><first>Ping</first><last>Jian</last></author>
       <author><first>Shumin</first><last>Shi</last></author>
-      <author><first>Heyan</first><last>Huang</last></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last></author>
       <pages>235–238</pages>
       <url hash="a2b36efd">S17-2036</url>
       <doi>10.18653/v1/S17-2036</doi>
@@ -842,7 +842,7 @@
     </paper>
     <paper id="39">
       <title><fixed-case>HHU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 2: Fast Hash-Based Embeddings for Semantic Word Similarity Assessment</title>
-      <author><first>Behrang</first><last>QasemiZadeh</last></author>
+      <author id="behrang-qasemizadeh"><first>Behrang</first><last>QasemiZadeh</last></author>
       <author><first>Laura</first><last>Kallmeyer</last></author>
       <pages>250–255</pages>
       <url hash="e970d8ba">S17-2039</url>
@@ -878,11 +878,11 @@
       <author><first>Răzvan-Gabriel</first><last>Rotari</last></author>
       <author><first>Ionuț</first><last>Hulub</last></author>
       <author><first>Ștefan</first><last>Oprea</last></author>
-      <author><first>Mihaela</first><last>Plămadă-Onofrei</last></author>
-      <author><first>Alina Beatrice</first><last>Lorenţ</last></author>
+      <author id="mihaela-plamada-onofrei"><first>Mihaela</first><last>Plămadă-Onofrei</last></author>
+      <author id="alina-beatrice-lorent"><first>Alina Beatrice</first><last>Lorenţ</last></author>
       <author><first>Raluca</first><last>Preisler</last></author>
       <author><first>Adrian</first><last>Iftene</last></author>
-      <author><first>Diana</first><last>Trandabăț</last></author>
+      <author id="diana-trandabat"><first>Diana</first><last>Trandabăț</last></author>
       <pages>267–270</pages>
       <url hash="626ba4c6">S17-2042</url>
       <doi>10.18653/v1/S17-2042</doi>
@@ -904,9 +904,9 @@
       <title><fixed-case>UPC</fixed-case>-<fixed-case>USMBA</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 3: Combining multiple approaches for <fixed-case>CQA</fixed-case> for <fixed-case>A</fixed-case>rabic</title>
       <author><first>Yassine</first><last>El Adlouni</last></author>
       <author><first>Imane</first><last>Lahbari</last></author>
-      <author><first>Horacio</first><last>Rodríguez</last></author>
+      <author id="horacio-rodriguez"><first>Horacio</first><last>Rodríguez</last></author>
       <author><first>Mohammed</first><last>Meknassi</last></author>
-      <author><first>Said Ouatik</first><last>El Alaoui</last></author>
+      <author id="said-ouatik-el-alaoui"><first>Said Ouatik</first><last>El Alaoui</last></author>
       <author><first>Noureddine</first><last>Ennahnahi</last></author>
       <pages>275–279</pages>
       <url hash="495163a8">S17-2044</url>
@@ -930,7 +930,7 @@
     <paper id="46">
       <title><fixed-case>M</fixed-case>o<fixed-case>RS</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 3: Easy to use <fixed-case>SVM</fixed-case> in Ranking Tasks</title>
       <author><first>Miguel J.</first><last>Rodrigues</last></author>
-      <author><first>Francisco M.</first><last>Couto</last></author>
+      <author id="francisco-m-couto"><first>Francisco M.</first><last>Couto</last></author>
       <pages>287–291</pages>
       <url hash="f41a1fd8">S17-2046</url>
       <doi>10.18653/v1/S17-2046</doi>
@@ -986,7 +986,7 @@
     <paper id="51">
       <title><fixed-case>S</fixed-case>im<fixed-case>B</fixed-case>ow at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 3: Soft-Cosine Semantic Similarity between Questions for Community Question Answering</title>
       <author><first>Delphine</first><last>Charlet</last></author>
-      <author><first>Géraldine</first><last>Damnati</last></author>
+      <author id="geraldine-damnati"><first>Géraldine</first><last>Damnati</last></author>
       <pages>315–319</pages>
       <url hash="1243c0bd">S17-2051</url>
       <doi>10.18653/v1/S17-2051</doi>
@@ -1020,7 +1020,7 @@
     </paper>
     <paper id="54">
       <title><fixed-case>S</fixed-case>wiss<fixed-case>A</fixed-case>lps at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 3: Attention-based Convolutional Neural Network for Community Question Answering</title>
-      <author><first>Jan Milan</first><last>Deriu</last></author>
+      <author id="jan-milan-deriu"><first>Jan Milan</first><last>Deriu</last></author>
       <author><first>Mark</first><last>Cieliebak</last></author>
       <pages>334–338</pages>
       <url hash="f96125b2">S17-2054</url>
@@ -1033,7 +1033,7 @@
       <author><first>Filip</first><last>Šaina</last></author>
       <author><first>Toni</first><last>Kukurin</last></author>
       <author><first>Lukrecija</first><last>Puljić</last></author>
-      <author><first>Vanja Mladen</first><last>Karan</last></author>
+      <author id="vanja-m-karan"><first>Vanja Mladen</first><last>Karan</last></author>
       <author><first>Jan</first><last>Šnajder</last></author>
       <pages>339–343</pages>
       <url hash="2f259ea9">S17-2055</url>
@@ -1044,7 +1044,7 @@
     <paper id="56">
       <title><fixed-case>GW</fixed-case>_<fixed-case>QA</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 3: Question Answer Re-ranking on <fixed-case>A</fixed-case>rabic Fora</title>
       <author><first>Nada</first><last>Almarwani</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>344–348</pages>
       <url hash="1fada65a">S17-2056</url>
       <doi>10.18653/v1/S17-2056</doi>
@@ -1091,7 +1091,7 @@
       <title><fixed-case>ECNU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 3: Using Traditional and Deep Learning Methods to Address Community Question Answering Task</title>
       <author><first>Guoshun</first><last>Wu</last></author>
       <author><first>Yixuan</first><last>Sheng</last></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <author><first>Yuanbin</first><last>Wu</last></author>
       <pages>365–369</pages>
       <url hash="3d02fa97">S17-2060</url>
@@ -1177,14 +1177,14 @@
     </paper>
     <paper id="68">
       <title>#<fixed-case>W</fixed-case>ar<fixed-case>T</fixed-case>eam at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 6: Using Neural Networks for Discovering Humorous Tweets</title>
-      <author><first>Iuliana Alexandra</first><last>Fleșcan-Lovin-Arseni</last></author>
-      <author><first>Ramona Andreea</first><last>Turcu</last></author>
+      <author id="iuliana-alexandra-flescan-lovin-arseni"><first>Iuliana Alexandra</first><last>Fleșcan-Lovin-Arseni</last></author>
+      <author id="ramona-andreea-turcu"><first>Ramona Andreea</first><last>Turcu</last></author>
       <author><first>Cristina</first><last>Sîrbu</last></author>
       <author><first>Larisa</first><last>Alexa</last></author>
       <author><first>Sandra Maria</first><last>Amarandei</last></author>
       <author><first>Nichita</first><last>Herciu</last></author>
       <author><first>Constantin</first><last>Scutaru</last></author>
-      <author><first>Diana</first><last>Trandabăț</last></author>
+      <author id="diana-trandabat"><first>Diana</first><last>Trandabăț</last></author>
       <author><first>Adrian</first><last>Iftene</last></author>
       <pages>407–410</pages>
       <url hash="f7e9e878">S17-2068</url>
@@ -1253,7 +1253,7 @@
     </paper>
     <paper id="75">
       <title><fixed-case>EL</fixed-case>i<fixed-case>RF</fixed-case>-<fixed-case>UPV</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 7: Pun Detection and Interpretation</title>
-      <author><first>Lluís-F.</first><last>Hurtado</last></author>
+      <author id="lluis-f-hurtado"><first>Lluís-F.</first><last>Hurtado</last></author>
       <author><first>Encarna</first><last>Segarra</last></author>
       <author><first>Ferran</first><last>Pla</last></author>
       <author><first>Pascual</first><last>Carrasco</last></author>
@@ -1286,7 +1286,7 @@
     <paper id="78">
       <title><fixed-case>ECNU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 7: Using Supervised and Unsupervised Methods to Detect and Locate <fixed-case>E</fixed-case>nglish Puns</title>
       <author><first>Yuhuan</first><last>Xiu</last></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <author><first>Yuanbin</first><last>Wu</last></author>
       <pages>453–456</pages>
       <url hash="96a48049">S17-2078</url>
@@ -1328,7 +1328,7 @@
     <paper id="82">
       <title><fixed-case>N</fixed-case>ile<fixed-case>TMRG</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 8: Determining Rumour and Veracity Support for Rumours on <fixed-case>T</fixed-case>witter.</title>
       <author><first>Omar</first><last>Enayet</last></author>
-      <author><first>Samhaa R.</first><last>El-Beltagy</last></author>
+      <author id="samhaa-r-el-beltagy"><first>Samhaa R.</first><last>El-Beltagy</last></author>
       <pages>470–474</pages>
       <url hash="dade7f3a">S17-2082</url>
       <doi>10.18653/v1/S17-2082</doi>
@@ -1360,9 +1360,9 @@
     </paper>
     <paper id="85">
       <title><fixed-case>DFKI</fixed-case>-<fixed-case>DKT</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 8: Rumour Detection and Classification using Cascading Heuristics</title>
-      <author><first>Ankit</first><last>Srivastava</last></author>
+      <author id="ankit-srivastava"><first>Ankit</first><last>Srivastava</last></author>
       <author><first>Georg</first><last>Rehm</last></author>
-      <author><first>Julian</first><last>Moreno Schneider</last></author>
+      <author id="julian-moreno-schneider"><first>Julian</first><last>Moreno Schneider</last></author>
       <pages>486–490</pages>
       <url hash="1cfe9c40">S17-2085</url>
       <doi>10.18653/v1/S17-2085</doi>
@@ -1372,7 +1372,7 @@
     <paper id="86">
       <title><fixed-case>ECNU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 8: Rumour Evaluation Using Effective Features and Supervised Ensemble Models</title>
       <author><first>Feixiang</first><last>Wang</last></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <author><first>Yuanbin</first><last>Wu</last></author>
       <pages>491–496</pages>
       <url hash="8d3d67ed">S17-2086</url>
@@ -1386,7 +1386,7 @@
       <author><first>Sunny</first><last>Narayan</last></author>
       <author><first>Md Shad</first><last>Akhtar</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>497–501</pages>
       <url hash="5948af08">S17-2087</url>
       <doi>10.18653/v1/S17-2087</doi>
@@ -1397,7 +1397,7 @@
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 4: Sentiment Analysis in <fixed-case>T</fixed-case>witter</title>
       <author><first>Sara</first><last>Rosenthal</last></author>
       <author><first>Noura</first><last>Farra</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>502–518</pages>
       <url hash="8bb3710d">S17-2088</url>
       <doi>10.18653/v1/S17-2088</doi>
@@ -1407,9 +1407,9 @@
     <paper id="89">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 5: Fine-Grained Sentiment Analysis on Financial Microblogs and News</title>
       <author><first>Keith</first><last>Cortis</last></author>
-      <author><first>André</first><last>Freitas</last></author>
+      <author id="andre-freitas"><first>André</first><last>Freitas</last></author>
       <author><first>Tobias</first><last>Daudert</last></author>
-      <author><first>Manuela</first><last>Huerlimann</last></author>
+      <author id="manuela-huerlimann"><first>Manuela</first><last>Huerlimann</last></author>
       <author><first>Manel</first><last>Zarrouk</last></author>
       <author><first>Siegfried</first><last>Handschuh</last></author>
       <author><first>Brian</first><last>Davis</last></author>
@@ -1444,9 +1444,9 @@
     </paper>
     <paper id="92">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 11: End-User Development using Natural Language</title>
-      <author><first>Juliano</first><last>Sales</last></author>
+      <author id="juliano-efson-sales"><first>Juliano</first><last>Sales</last></author>
       <author><first>Siegfried</first><last>Handschuh</last></author>
-      <author><first>André</first><last>Freitas</last></author>
+      <author id="andre-freitas"><first>André</first><last>Freitas</last></author>
       <pages>556–564</pages>
       <url hash="9faed52d">S17-2092</url>
       <doi>10.18653/v1/S17-2092</doi>
@@ -1457,8 +1457,8 @@
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 12: Clinical <fixed-case>T</fixed-case>emp<fixed-case>E</fixed-case>val</title>
       <author><first>Steven</first><last>Bethard</last></author>
       <author><first>Guergana</first><last>Savova</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <pages>565–572</pages>
       <url hash="be47f47c">S17-2093</url>
       <doi>10.18653/v1/S17-2093</doi>
@@ -1499,7 +1499,7 @@
     <paper id="97">
       <title>The <fixed-case>AI</fixed-case>2 system at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 10 (<fixed-case>S</fixed-case>cience<fixed-case>IE</fixed-case>): semi-supervised end-to-end entity and relation extraction</title>
       <author><first>Waleed</first><last>Ammar</last></author>
-      <author><first>Matthew E.</first><last>Peters</last></author>
+      <author id="matthew-e-peters"><first>Matthew E.</first><last>Peters</last></author>
       <author><first>Chandra</first><last>Bhagavatula</last></author>
       <author><first>Russell</first><last>Power</last></author>
       <pages>592–596</pages>
@@ -1513,7 +1513,7 @@
       <author><first>Julien</first><last>Tourille</last></author>
       <author><first>Olivier</first><last>Ferret</last></author>
       <author><first>Xavier</first><last>Tannier</last></author>
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <pages>597–602</pages>
       <url hash="eb2ff9c5">S17-2098</url>
       <doi>10.18653/v1/S17-2098</doi>
@@ -1531,8 +1531,8 @@
       <author><first>Ahmad</first><last>Al Sallab</last></author>
       <author><first>Hazem</first><last>Hajj</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
-      <author><first>Khaled</first><last>Shaban</last></author>
-      <author><first>Wassim</first><last>El-Hajj</last></author>
+      <author id="khaled-shaban"><first>Khaled</first><last>Shaban</last></author>
+      <author id="wassim-el-hajj"><first>Wassim</first><last>El-Hajj</last></author>
       <pages>603–610</pages>
       <url hash="53516a59">S17-2099</url>
       <doi>10.18653/v1/S17-2099</doi>
@@ -1584,10 +1584,10 @@
     </paper>
     <paper id="104">
       <title><fixed-case>SINAI</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 4: User based classification</title>
-      <author><first>Salud María</first><last>Jiménez-Zafra</last></author>
+      <author id="salud-maria-jimenez-zafra"><first>Salud María</first><last>Jiménez-Zafra</last></author>
       <author><first>Arturo</first><last>Montejo-Ráez</last></author>
-      <author><first>Maite</first><last>Martin</last></author>
-      <author><first>L. Alfonso</first><last>Ureña-López</last></author>
+      <author id="m-teresa-martin-valdivia"><first>Maite</first><last>Martin</last></author>
+      <author id="l-alfonso-urena-lopez"><first>L. Alfonso</first><last>Ureña-López</last></author>
       <pages>634–639</pages>
       <url hash="7338f5c4">S17-2104</url>
       <doi>10.18653/v1/S17-2104</doi>
@@ -1597,7 +1597,7 @@
     <paper id="105">
       <title><fixed-case>HLP</fixed-case>@<fixed-case>UP</fixed-case>enn at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 4<fixed-case>A</fixed-case>: A simple, self-optimizing text classification system combining dense and sparse vectors</title>
       <author><first>Abeed</first><last>Sarker</last></author>
-      <author><first>Graciela</first><last>Gonzalez</last></author>
+      <author id="graciela-gonzalez"><first>Graciela</first><last>Gonzalez</last></author>
       <pages>640–643</pages>
       <url hash="31a2d64c">S17-2105</url>
       <doi>10.18653/v1/S17-2105</doi>
@@ -1616,7 +1616,7 @@
     </paper>
     <paper id="107">
       <title><fixed-case>S</fixed-case>enti<fixed-case>ME</fixed-case>++ at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 4: Stacking State-of-the-Art Classifiers to Enhance Sentiment Classification</title>
-      <author><first>Raphaël</first><last>Troncy</last></author>
+      <author id="raphael-troncy"><first>Raphaël</first><last>Troncy</last></author>
       <author><first>Enrico</first><last>Palumbo</last></author>
       <author><first>Efstratios</first><last>Sygkounas</last></author>
       <author><first>Giuseppe</first><last>Rizzo</last></author>
@@ -1638,7 +1638,7 @@
     </paper>
     <paper id="109">
       <title><fixed-case>TWINA</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 4: <fixed-case>T</fixed-case>witter Sentiment Analysis with Ensemble Gradient Boost Tree Classifier</title>
-      <author><first>Naveen Kumar</first><last>Laskari</last></author>
+      <author id="naveen-kumar-laskari"><first>Naveen Kumar</first><last>Laskari</last></author>
       <author><first>Suresh Kumar</first><last>Sanampudi</last></author>
       <pages>659–663</pages>
       <url hash="7274d312">S17-2109</url>
@@ -1651,7 +1651,7 @@
       <author><first>Hala</first><last>Mulki</last></author>
       <author><first>Hatem</first><last>Haddad</last></author>
       <author><first>Mourad</first><last>Gridach</last></author>
-      <author><first>Ismail</first><last>Babaoglu</last></author>
+      <author id="ismail-babaoglu"><first>Ismail</first><last>Babaoglu</last></author>
       <pages>664–669</pages>
       <url hash="653fe424">S17-2110</url>
       <doi>10.18653/v1/S17-2110</doi>
@@ -1677,8 +1677,8 @@
       <author><first>Elias</first><last>Iosif</last></author>
       <author><first>Nikolaos</first><last>Malandrakis</last></author>
       <author><first>Elisavet</first><last>Palogiannidi</last></author>
-      <author><first>Haris</first><last>Papageorgiou</last></author>
-      <author><first>Shrikanth</first><last>Narayanan</last></author>
+      <author id="harris-papageorgiou"><first>Haris</first><last>Papageorgiou</last></author>
+      <author id="shrikanth-narayanan"><first>Shrikanth</first><last>Narayanan</last></author>
       <author><first>Alexandros</first><last>Potamianos</last></author>
       <pages>675–682</pages>
       <url hash="7a8e6837">S17-2112</url>
@@ -1741,7 +1741,7 @@
       <title><fixed-case>SSN</fixed-case>_<fixed-case>MLRG</fixed-case>1 at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 4: Sentiment Analysis in <fixed-case>T</fixed-case>witter Using Multi-Kernel <fixed-case>G</fixed-case>aussian Process Classifier</title>
       <author><first>Angel Deborah</first><last>S</last></author>
       <author><first>S Milton</first><last>Rajendram</last></author>
-      <author><first>T T</first><last>Mirnalinee</last></author>
+      <author id="t-t-mirnalinee"><first>T T</first><last>Mirnalinee</last></author>
       <pages>709–712</pages>
       <url hash="d5fe7f2f">S17-2118</url>
       <doi>10.18653/v1/S17-2118</doi>
@@ -1763,8 +1763,8 @@
     <paper id="120">
       <title><fixed-case>LSIS</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 4: Using Adapted Sentiment Similarity Seed Words For <fixed-case>E</fixed-case>nglish and <fixed-case>A</fixed-case>rabic Tweet Polarity Classification</title>
       <author><first>Amal</first><last>Htait</last></author>
-      <author><first>Sébastien</first><last>Fournier</last></author>
-      <author><first>Patrice</first><last>Bellot</last></author>
+      <author id="sebastien-fournier"><first>Sébastien</first><last>Fournier</last></author>
+      <author id="patrice-bellot"><first>Patrice</first><last>Bellot</last></author>
       <pages>718–722</pages>
       <url hash="75b44678">S17-2120</url>
       <doi>10.18653/v1/S17-2120</doi>
@@ -1775,7 +1775,7 @@
       <title><fixed-case>EL</fixed-case>i<fixed-case>RF</fixed-case>-<fixed-case>UPV</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 4: Sentiment Analysis using Deep Learning</title>
       <author><first>José-Ángel</first><last>González</last></author>
       <author><first>Ferran</first><last>Pla</last></author>
-      <author><first>Lluís-F.</first><last>Hurtado</last></author>
+      <author id="lluis-f-hurtado"><first>Lluís-F.</first><last>Hurtado</last></author>
       <pages>723–727</pages>
       <url hash="b4c38016">S17-2121</url>
       <doi>10.18653/v1/S17-2121</doi>
@@ -1863,7 +1863,7 @@
       <title><fixed-case>T</fixed-case>opic<fixed-case>T</fixed-case>hunder at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 4: Sentiment Classification Using a Convolutional Neural Network with Distant Supervision</title>
       <author><first>Simon</first><last>Müller</last></author>
       <author><first>Tobias</first><last>Huonder</last></author>
-      <author><first>Jan</first><last>Deriu</last></author>
+      <author id="jan-milan-deriu"><first>Jan</first><last>Deriu</last></author>
       <author><first>Mark</first><last>Cieliebak</last></author>
       <pages>766–770</pages>
       <url hash="f0ccdaf7">S17-2129</url>
@@ -1875,7 +1875,7 @@
       <title><fixed-case>INGEOTEC</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val 2017 Task 4: A <fixed-case>B</fixed-case>4<fixed-case>MSA</fixed-case> Ensemble based on Genetic Programming for <fixed-case>T</fixed-case>witter Sentiment Analysis</title>
       <author><first>Sabino</first><last>Miranda-Jiménez</last></author>
       <author><first>Mario</first><last>Graff</last></author>
-      <author><first>Eric Sadit</first><last>Tellez</last></author>
+      <author id="eric-sadit-tellez"><first>Eric Sadit</first><last>Tellez</last></author>
       <author><first>Daniela</first><last>Moctezuma</last></author>
       <pages>771–776</pages>
       <url hash="4e4367e5">S17-2130</url>
@@ -1886,8 +1886,8 @@
     <paper id="131">
       <title><fixed-case>BUSEM</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 4<fixed-case>A</fixed-case> Sentiment Analysis with Word Embedding and Long Short Term Memory <fixed-case>RNN</fixed-case> Approaches</title>
       <author><first>Deger</first><last>Ayata</last></author>
-      <author><first>Murat</first><last>Saraclar</last></author>
-      <author><first>Arzucan</first><last>Ozgur</last></author>
+      <author id="murat-saraclar"><first>Murat</first><last>Saraclar</last></author>
+      <author id="arzucan-ozgur"><first>Arzucan</first><last>Ozgur</last></author>
       <pages>777–783</pages>
       <url hash="e5871d56">S17-2131</url>
       <doi>10.18653/v1/S17-2131</doi>
@@ -1909,7 +1909,7 @@
     </paper>
     <paper id="133">
       <title><fixed-case>N</fixed-case>ile<fixed-case>TMRG</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 4: <fixed-case>A</fixed-case>rabic Sentiment Analysis</title>
-      <author><first>Samhaa R.</first><last>El-Beltagy</last></author>
+      <author id="samhaa-r-el-beltagy"><first>Samhaa R.</first><last>El-Beltagy</last></author>
       <author><first>Mona</first><last>El Kalamawy</last></author>
       <author><first>Abu Bakr</first><last>Soliman</last></author>
       <pages>790–795</pages>
@@ -1942,11 +1942,11 @@
     </paper>
     <paper id="136">
       <title><fixed-case>UCSC</fixed-case>-<fixed-case>NLP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 4: Sense n-grams for Sentiment Analysis in <fixed-case>T</fixed-case>witter</title>
-      <author><first>José</first><last>Abreu</last></author>
+      <author id="jose-i-abreu"><first>José</first><last>Abreu</last></author>
       <author><first>Iván</first><last>Castro</last></author>
       <author><first>Claudia</first><last>Martínez</last></author>
       <author><first>Sebastián</first><last>Oliva</last></author>
-      <author><first>Yoan</first><last>Gutiérrez</last></author>
+      <author id="yoan-gutierrez"><first>Yoan</first><last>Gutiérrez</last></author>
       <pages>807–811</pages>
       <url hash="4a268340">S17-2136</url>
       <doi>10.18653/v1/S17-2136</doi>
@@ -1958,7 +1958,7 @@ of average. Among 39 submissions to this task, we ranked 10th.</abstract>
     <paper id="137">
       <title><fixed-case>ECNU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 4: Evaluating Effective Features on Machine Learning Methods for <fixed-case>T</fixed-case>witter Message Polarity Classification</title>
       <author><first>Yunxiao</first><last>Zhou</last></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <author><first>Yuanbin</first><last>Wu</last></author>
       <pages>812–816</pages>
       <url hash="79dbabf5">S17-2137</url>
@@ -1984,7 +1984,7 @@ of average. Among 39 submissions to this task, we ranked 10th.</abstract>
       <title><fixed-case>SSN</fixed-case>_<fixed-case>MLRG</fixed-case>1 at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 5: Fine-Grained Sentiment Analysis Using Multiple Kernel <fixed-case>G</fixed-case>aussian Process Regression Model</title>
       <author><first>Angel Deborah</first><last>S</last></author>
       <author><first>S Milton</first><last>Rajendram</last></author>
-      <author><first>T T</first><last>Mirnalinee</last></author>
+      <author id="t-t-mirnalinee"><first>T T</first><last>Mirnalinee</last></author>
       <pages>823–826</pages>
       <url hash="aeb9940a">S17-2139</url>
       <doi>10.18653/v1/S17-2139</doi>
@@ -2127,7 +2127,7 @@ of average. Among 39 submissions to this task, we ranked 10th.</abstract>
     <paper id="152">
       <title><fixed-case>ECNU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 5: An Ensemble of Regression Algorithms with Effective Features for Fine-Grained Sentiment Analysis in Financial Domain</title>
       <author><first>Mengxiao</first><last>Jiang</last></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <author><first>Yuanbin</first><last>Wu</last></author>
       <pages>888–893</pages>
       <url hash="ea185a8b">S17-2152</url>
@@ -2141,8 +2141,8 @@ of average. Among 39 submissions to this task, we ranked 10th.</abstract>
       <author><first>Abhishek</first><last>Sethi</last></author>
       <author><first>Md Shad</first><last>Akhtar</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>894–898</pages>
       <url hash="41de481e">S17-2153</url>
       <doi>10.18653/v1/S17-2153</doi>
@@ -2155,7 +2155,7 @@ of average. Among 39 submissions to this task, we ranked 10th.</abstract>
       <author><first>Shobhit</first><last>Bhatnagar</last></author>
       <author><first>Md Shad</first><last>Akhtar</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>899–903</pages>
       <url hash="fc9e6ea2">S17-2154</url>
       <doi>10.18653/v1/S17-2154</doi>
@@ -2187,7 +2187,7 @@ of average. Among 39 submissions to this task, we ranked 10th.</abstract>
     <paper id="157">
       <title><fixed-case>O</fixed-case>xford at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 9: Neural <fixed-case>AMR</fixed-case> Parsing with Pointer-Augmented Attention</title>
       <author><first>Jan</first><last>Buys</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
       <pages>914–919</pages>
       <url hash="f742db82">S17-2157</url>
       <doi>10.18653/v1/S17-2157</doi>
@@ -2208,9 +2208,9 @@ of average. Among 39 submissions to this task, we ranked 10th.</abstract>
     </paper>
     <paper id="159">
       <title><fixed-case>RIGOTRIO</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 9: Combining Machine Learning and Grammar Engineering for <fixed-case>AMR</fixed-case> Parsing and Generation</title>
-      <author><first>Normunds</first><last>Gruzitis</last></author>
-      <author><first>Didzis</first><last>Gosko</last></author>
-      <author><first>Guntis</first><last>Barzdins</last></author>
+      <author id="normunds-gruzitis"><first>Normunds</first><last>Gruzitis</last></author>
+      <author id="didzis-gosko"><first>Didzis</first><last>Gosko</last></author>
+      <author id="guntis-barzdins"><first>Guntis</first><last>Barzdins</last></author>
       <pages>924–928</pages>
       <url hash="3112dd59">S17-2159</url>
       <doi>10.18653/v1/S17-2159</doi>
@@ -2240,10 +2240,10 @@ of average. Among 39 submissions to this task, we ranked 10th.</abstract>
     <paper id="162">
       <title><fixed-case>NTNU</fixed-case>-1@<fixed-case>S</fixed-case>cience<fixed-case>IE</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 10: Identifying and Labelling Keyphrases with Conditional Random Fields</title>
       <author><first>Erwin</first><last>Marsi</last></author>
-      <author><first>Utpal Kumar</first><last>Sikdar</last></author>
-      <author><first>Cristina</first><last>Marco</last></author>
+      <author id="utpal-kumar-sikdar"><first>Utpal Kumar</first><last>Sikdar</last></author>
+      <author id="cristina-sanchez-marco"><first>Cristina</first><last>Marco</last></author>
       <author><first>Biswanath</first><last>Barik</last></author>
-      <author><first>Rune</first><last>Sætre</last></author>
+      <author id="rune-saetre"><first>Rune</first><last>Sætre</last></author>
       <pages>938–941</pages>
       <url hash="d9873bf8">S17-2162</url>
       <doi>10.18653/v1/S17-2162</doi>
@@ -2265,7 +2265,7 @@ of average. Among 39 submissions to this task, we ranked 10th.</abstract>
     </paper>
     <paper id="164">
       <title><fixed-case>LABDA</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 10: Extracting Keyphrases from Scientific Publications by combining the <fixed-case>BANNER</fixed-case> tool and the <fixed-case>UMLS</fixed-case> Semantic Network</title>
-      <author><first>Isabel</first><last>Segura-Bedmar</last></author>
+      <author id="isabel-segura-bedmar"><first>Isabel</first><last>Segura-Bedmar</last></author>
       <author><first>Cristóbal</first><last>Colón-Ruiz</last></author>
       <author><first>Paloma</first><last>Martínez</last></author>
       <pages>947–950</pages>
@@ -2278,7 +2278,7 @@ of average. Among 39 submissions to this task, we ranked 10th.</abstract>
       <title>The <fixed-case>NTNU</fixed-case> System at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 10: Extracting Keyphrases and Relations from Scientific Publications Using Multiple Conditional Random Fields</title>
       <author><first>Lung-Hao</first><last>Lee</last></author>
       <author><first>Kuei-Ching</first><last>Lee</last></author>
-      <author><first>Yuen-Hsien</first><last>Tseng</last></author>
+      <author id="yuen-hsien-tseng"><first>Yuen-Hsien</first><last>Tseng</last></author>
       <pages>951–955</pages>
       <url hash="17363b85">S17-2165</url>
       <doi>10.18653/v1/S17-2165</doi>
@@ -2321,7 +2321,7 @@ of average. Among 39 submissions to this task, we ranked 10th.</abstract>
     <paper id="169">
       <title><fixed-case>LABDA</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 10: Relation Classification between keyphrases via Convolutional Neural Network</title>
       <author><first>Víctor</first><last>Suárez-Paniagua</last></author>
-      <author><first>Isabel</first><last>Segura-Bedmar</last></author>
+      <author id="isabel-segura-bedmar"><first>Isabel</first><last>Segura-Bedmar</last></author>
       <author><first>Paloma</first><last>Martínez</last></author>
       <pages>969–972</pages>
       <url hash="22eb314b">S17-2169</url>
@@ -2432,11 +2432,11 @@ of average. Among 39 submissions to this task, we ranked 10th.</abstract>
     </paper>
     <paper id="179">
       <title><fixed-case>ULISBOA</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 12: Extraction and classification of temporal expressions and events</title>
-      <author><first>Andre</first><last>Lamurias</last></author>
+      <author id="andre-lamurias"><first>Andre</first><last>Lamurias</last></author>
       <author><first>Diana</first><last>Sousa</last></author>
       <author><first>Sofia</first><last>Pereira</last></author>
-      <author><first>Luka</first><last>Clarke</last></author>
-      <author><first>Francisco M.</first><last>Couto</last></author>
+      <author id="luka-a-clarke"><first>Luka</first><last>Clarke</last></author>
+      <author id="francisco-m-couto"><first>Francisco M.</first><last>Couto</last></author>
       <pages>1019–1023</pages>
       <url hash="fc6eaffe">S17-2179</url>
       <doi>10.18653/v1/S17-2179</doi>
@@ -2457,7 +2457,7 @@ of average. Among 39 submissions to this task, we ranked 10th.</abstract>
     <paper id="181">
       <title><fixed-case>KUL</fixed-case>euven-<fixed-case>LIIR</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 12: Cross-Domain Temporal Information Extraction from Clinical Records</title>
       <author><first>Artuur</first><last>Leeuwenberg</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>1030–1034</pages>
       <url hash="fa5ec7e5">S17-2181</url>
       <doi>10.18653/v1/S17-2181</doi>
diff --git a/data/xml/S18.xml b/data/xml/S18.xml
index 1b326f2ee9..af318d7271 100644
--- a/data/xml/S18.xml
+++ b/data/xml/S18.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the 12th International Workshop on Semantic Evaluation</booktitle>
       <url hash="aca7e30c">S18-1</url>
       <editor><first>Marianna</first><last>Apidianaki</last></editor>
-      <editor><first>Saif M.</first><last>Mohammad</last></editor>
+      <editor id="saif-mohammad"><first>Saif M.</first><last>Mohammad</last></editor>
       <editor><first>Jonathan</first><last>May</last></editor>
       <editor><first>Ekaterina</first><last>Shutova</last></editor>
       <editor><first>Steven</first><last>Bethard</last></editor>
@@ -48,9 +48,9 @@
     <paper id="3">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val 2018 Task 2: Multilingual Emoji Prediction</title>
       <author><first>Francesco</first><last>Barbieri</last></author>
-      <author><first>Jose</first><last>Camacho-Collados</last></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></author>
       <author><first>Francesco</first><last>Ronzano</last></author>
-      <author><first>Luis</first><last>Espinosa-Anke</last></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa-Anke</last></author>
       <author><first>Miguel</first><last>Ballesteros</last></author>
       <author><first>Valerio</first><last>Basile</last></author>
       <author><first>Viviana</first><last>Patti</last></author>
@@ -75,7 +75,7 @@
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 3: Irony Detection in <fixed-case>E</fixed-case>nglish Tweets</title>
       <author><first>Cynthia</first><last>Van Hee</last></author>
       <author><first>Els</first><last>Lefever</last></author>
-      <author><first>Véronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Véronique</first><last>Hoste</last></author>
       <pages>39–50</pages>
       <abstract>This paper presents the first shared task on irony detection: given a tweet, automatic natural language processing systems should determine whether the tweet is ironic (Task A) and which type of irony (if any) is expressed (Task B). The ironic tweets were collected using irony-related hashtags (i.e. #irony, #sarcasm, #not) and were subsequently manually annotated to minimise the amount of noise in the corpus. Prior to distributing the data, hashtags that were used to collect the tweets were removed from the corpus. For both tasks, a training corpus of 3,834 tweets was provided, as well as a test set containing 784 tweets. Our shared tasks received submissions from 43 teams for the binary classification Task A and from 31 teams for the multiclass Task B. The highest classification scores obtained for both subtasks are respectively F1= 0.71 and F1= 0.51 and demonstrate that fine-grained irony classification is much more challenging than binary irony detection.</abstract>
       <url hash="922bb73b">S18-1005</url>
@@ -98,7 +98,7 @@
     </paper>
     <paper id="7">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val 2018 Task 4: Character Identification on Multiparty Dialogues</title>
-      <author><first>Jinho D.</first><last>Choi</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
       <author><first>Henry Y.</first><last>Chen</last></author>
       <pages>57–64</pages>
       <abstract>Character identification is a task of entity linking that finds the global entity of each personal mention in multiparty dialogue. For this task, the first two seasons of the popular TV show Friends are annotated, comprising a total of 448 dialogues, 15,709 mentions, and 401 entities. The personal mentions are detected from nominals referring to certain characters in the show, and the entities are collected from the list of all characters in those two seasons of the show. This task is challenging because it requires the identification of characters that are mentioned but may not be active during the conversation. Among 90+ participants, four of them submitted their system outputs and showed strengths in different aspects about the task. Thorough analyses of the distributed datasets, system outputs, and comparative studies are also provided. To facilitate the momentum, we create an open-source project for this task and publicly release a larger and cleaner dataset, hoping to support researchers for more enhanced modeling.</abstract>
@@ -110,9 +110,9 @@
       <title><fixed-case>AMORE</fixed-case>-<fixed-case>UPF</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 4: <fixed-case>B</fixed-case>i<fixed-case>LSTM</fixed-case> with Entity Library</title>
       <author><first>Laura</first><last>Aina</last></author>
       <author><first>Carina</first><last>Silberer</last></author>
-      <author><first>Ionut-Teodor</first><last>Sorodoc</last></author>
+      <author id="ionut-sorodoc"><first>Ionut-Teodor</first><last>Sorodoc</last></author>
       <author><first>Matthijs</first><last>Westera</last></author>
-      <author><first>Gemma</first><last>Boleda</last></author>
+      <author id="gemma-boleda"><first>Gemma</first><last>Boleda</last></author>
       <pages>65–69</pages>
       <abstract>This paper describes our winning contribution to SemEval 2018 Task 4: Character Identification on Multiparty Dialogues. It is a simple, standard model with one key innovation, an entity library. Our results show that this innovation greatly facilitates the identification of infrequent characters. Because of the generic nature of our model, this finding is potentially relevant to any task that requires the effective learning from sparse or imbalanced data.</abstract>
       <url hash="4ffebdd6">S18-1008</url>
@@ -147,7 +147,7 @@
       <author><first>Dongfang</first><last>Xu</last></author>
       <author><first>Ahmed</first><last>Elsayed</last></author>
       <author><first>Steven</first><last>Bethard</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>88–96</pages>
       <abstract>This paper presents the outcomes of the Parsing Time Normalization shared task held within SemEval-2018. The aim of the task is to parse time expressions into the compositional semantic graphs of the Semantically Compositional Annotation of Time Expressions (SCATE) schema, which allows the representation of a wider variety of time expressions than previous approaches. Two tracks were included, one to evaluate the parsing of individual components of the produced graphs, in a classic information extraction way, and another one to evaluate the quality of the time intervals resulting from the interpretation of those graphs. Though 40 participants registered for the task, only one team submitted output, achieving 0.55 F1 in Track 1 (parsing) and 0.70 F1 in Track 2 (intervals).</abstract>
       <url hash="ba5bb9e2">S18-1011</url>
@@ -159,7 +159,7 @@
       <author><first>Amy</first><last>Olex</last></author>
       <author><first>Luke</first><last>Maffey</last></author>
       <author><first>Nicholas</first><last>Morgan</last></author>
-      <author><first>Bridget</first><last>McInnes</last></author>
+      <author id="bridget-mcinnes"><first>Bridget</first><last>McInnes</last></author>
       <pages>97–101</pages>
       <abstract>Temporal information extraction is a challenging task. Here we describe Chrono, a hybrid rule-based and machine learning system that identifies temporal expressions in text and normalizes them into the SCATE schema. After minor parsing logic adjustments, Chrono has emerged as the top performing system for SemEval 2018 Task 6: Parsing Time Normalizations.</abstract>
       <url hash="0d97aaeb">S18-1012</url>
@@ -201,8 +201,8 @@
     <paper id="16">
       <title><fixed-case>LT</fixed-case>3 at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 1: A classifier chain to detect emotions in tweets</title>
       <author><first>Luna</first><last>De Bruyne</last></author>
-      <author><first>Orphée</first><last>De Clercq</last></author>
-      <author><first>Véronique</first><last>Hoste</last></author>
+      <author id="orphee-de-clercq"><first>Orphée</first><last>De Clercq</last></author>
+      <author id="veronique-hoste"><first>Véronique</first><last>Hoste</last></author>
       <pages>123–127</pages>
       <abstract>This paper presents an emotion classification system for English tweets, submitted for the SemEval shared task on Affect in Tweets, subtask 5: Detecting Emotions. The system combines lexicon, n-gram, style, syntactic and semantic features. For this multi-class multi-label problem, we created a classifier chain. This is an ensemble of eleven binary classifiers, one for each possible emotion category, where each model gets the predictions of the preceding models as additional features. The predicted labels are combined to get a multi-label representation of the predictions. Our system was ranked eleventh among thirty five participating teams, with a Jaccard accuracy of 52.0% and macro- and micro-average F1-scores of 49.3% and 64.0%, respectively.</abstract>
       <url hash="bd71f42a">S18-1016</url>
@@ -212,9 +212,9 @@
     <paper id="17">
       <title><fixed-case>SINAI</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 1: Emotion Recognition in Tweets</title>
       <author><first>Flor Miriam</first><last>Plaza-del-Arco</last></author>
-      <author><first>Salud María</first><last>Jiménez-Zafra</last></author>
-      <author><first>Maite</first><last>Martin</last></author>
-      <author><first>L. Alfonso</first><last>Ureña-López</last></author>
+      <author id="salud-maria-jimenez-zafra"><first>Salud María</first><last>Jiménez-Zafra</last></author>
+      <author id="m-teresa-martin-valdivia"><first>Maite</first><last>Martin</last></author>
+      <author id="l-alfonso-urena-lopez"><first>L. Alfonso</first><last>Ureña-López</last></author>
       <pages>128–132</pages>
       <abstract>Emotion classification is a new task that combines several disciplines including Artificial Intelligence and Psychology, although Natural Language Processing is perhaps the most challenging area. In this paper, we describe our participation in SemEval-2018 Task1: Affect in Tweets. In particular, we have participated in EI-oc, EI-reg and E-c subtasks for English and Spanish languages.</abstract>
       <url hash="4aaffdc7">S18-1017</url>
@@ -247,7 +247,7 @@
       <title><fixed-case>INGEOTEC</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 1: <fixed-case>E</fixed-case>vo<fixed-case>MSA</fixed-case> and μ<fixed-case>TC</fixed-case> for Sentiment Analysis</title>
       <author><first>Mario</first><last>Graff</last></author>
       <author><first>Sabino</first><last>Miranda-Jiménez</last></author>
-      <author><first>Eric S.</first><last>Tellez</last></author>
+      <author id="eric-sadit-tellez"><first>Eric S.</first><last>Tellez</last></author>
       <author><first>Daniela</first><last>Moctezuma</last></author>
       <pages>146–150</pages>
       <abstract>This paper describes our participation in Affective Tweets task for emotional intensity and sentiment intensity subtasks for English, Spanish, and Arabic languages. We used two approaches, μTC and EvoMSA. The first one is a generic text categorization and regression system; and the second one, a two-stage architecture for Sentiment Analysis. Both approaches are multilingual and domain independent.</abstract>
@@ -258,7 +258,7 @@
     <paper id="21">
       <title>Epita at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 1: Sentiment Analysis Using Transfer Learning Approach</title>
       <author><first>Guillaume</first><last>Daval-Frerot</last></author>
-      <author><first>Abdesselam</first><last>Bouchekif</last></author>
+      <author id="abdessalam-bouchekif"><first>Abdesselam</first><last>Bouchekif</last></author>
       <author><first>Anatole</first><last>Moreau</last></author>
       <pages>151–155</pages>
       <abstract>In this paper we present our system for detecting valence task. The major issue was to apply a state-of-the-art system despite the small dataset provided: the system would quickly overfit. The main idea of our proposal is to use transfer learning, which allows to avoid learning from scratch. Indeed, we start to train a first model to predict if a tweet is positive, negative or neutral. For this we use an external dataset which is larger and similar to the target dataset. Then, the pre-trained model is re-used as the starting point to train a new model that classifies a tweet into one of the seven various levels of sentiment intensity. Our system, trained using transfer learning, achieves 0.776 and 0.763 respectively for Pearson correlation coefficient and weighted quadratic kappa metrics on the subtask evaluation dataset.</abstract>
@@ -289,9 +289,9 @@
     <paper id="24">
       <title>Tw-<fixed-case>S</fixed-case>t<fixed-case>AR</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 1: Preprocessing Impact on Multi-label Emotion Classification</title>
       <author><first>Hala</first><last>Mulki</last></author>
-      <author><first>Chedi</first><last>Bechikh Ali</last></author>
+      <author id="chedi-bechikh-ali"><first>Chedi</first><last>Bechikh Ali</last></author>
       <author><first>Hatem</first><last>Haddad</last></author>
-      <author><first>Ismail</first><last>Babaoğlu</last></author>
+      <author id="ismail-babaoglu"><first>Ismail</first><last>Babaoğlu</last></author>
       <pages>167–171</pages>
       <abstract>In this paper, we describe our contribution in SemEval-2018 contest. We tackled task 1 “Affect in Tweets”, subtask E-c “Detecting Emotions (multi-label classification)”. A multilabel classification system Tw-StAR was developed to recognize the emotions embedded in Arabic, English and Spanish tweets. To handle the multi-label classification problem via traditional classifiers, we employed the binary relevance transformation strategy while a TF-IDF scheme was used to generate the tweets’ features. We investigated using single and combinations of several preprocessing tasks to further improve the performance. The results showed that specific combinations of preprocessing tasks could significantly improve the evaluation measures. This has been later emphasized by the official results as our system ranked 3rd for both Arabic and Spanish datasets and 14th for the English dataset.</abstract>
       <url hash="d276e67c">S18-1024</url>
@@ -310,11 +310,11 @@
     </paper>
     <paper id="26">
       <title><fixed-case>E</fixed-case>mo<fixed-case>I</fixed-case>ntens Tracker at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 1: Emotional Intensity Levels in #Tweets</title>
-      <author><first>Ramona-Andreea</first><last>Turcu</last></author>
+      <author id="ramona-andreea-turcu"><first>Ramona-Andreea</first><last>Turcu</last></author>
       <author><first>Sandra Maria</first><last>Amarandei</last></author>
-      <author><first>Iuliana-Alexandra</first><last>Flescan-Lovin-Arseni</last></author>
-      <author><first>Daniela</first><last>Gifu</last></author>
-      <author><first>Diana</first><last>Trandabat</last></author>
+      <author id="iuliana-alexandra-flescan-lovin-arseni"><first>Iuliana-Alexandra</first><last>Flescan-Lovin-Arseni</last></author>
+      <author id="daniela-gifu"><first>Daniela</first><last>Gifu</last></author>
+      <author id="diana-trandabat"><first>Diana</first><last>Trandabat</last></author>
       <pages>177–180</pages>
       <abstract>The „Affect in Tweets” task is centered on emotions categorization and evaluation matrix using multi-language tweets (English and Spanish). In this research, SemEval Affect dataset was preprocessed, categorized, and evaluated accordingly (precision, recall, and accuracy). The system described in this paper is based on the implementation of supervised machine learning (Naive Bayes, KNN and SVM), deep learning (NN Tensor Flow model), and decision trees algorithms.</abstract>
       <url hash="6e41d05b">S18-1026</url>
@@ -325,7 +325,7 @@
       <title>u<fixed-case>O</fixed-case>ttawa at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 1: Self-Attentive Hybrid <fixed-case>GRU</fixed-case>-Based Network</title>
       <author><first>Ahmed</first><last>Husseini Orabi</last></author>
       <author><first>Mahmoud</first><last>Husseini Orabi</last></author>
-      <author><first>Diana</first><last>Inkpen</last></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last></author>
       <author><first>David</first><last>Van Bruwaene</last></author>
       <pages>181–185</pages>
       <abstract>We propose a novel attentive hybrid GRU-based network (SAHGN), which we used at SemEval-2018 Task 1: Affect in Tweets. Our network has two main characteristics, 1) has the ability to internally optimize its feature representation using attention mechanisms, and 2) provides a hybrid representation using a character level Convolutional Neural Network (CNN), as well as a self-attentive word-level encoder. The key advantage of our model is its ability to signify the relevant and important information that enables self-optimization. Results are reported on the valence intensity regression task.</abstract>
@@ -412,7 +412,7 @@
     <paper id="35">
       <title><fixed-case>ECNU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 1: Emotion Intensity Prediction Using Effective Features and Machine Learning Models</title>
       <author><first>Huimin</first><last>Xu</last></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <author><first>Yuanbin</first><last>Wu</last></author>
       <pages>231–235</pages>
       <abstract>This paper describes our submissions to SemEval 2018 task 1. The task is affect intensity prediction in tweets, including five subtasks. We participated in all subtasks of English tweets. We extracted several traditional NLP, sentiment lexicon, emotion lexicon and domain specific features from tweets, adopted supervised machine learning algorithms to perform emotion intensity prediction.</abstract>
@@ -428,7 +428,7 @@
       <author><first>Alaa</first><last>Maarouf</last></author>
       <author><first>Raslan</first><last>Kain</last></author>
       <author><first>Hazem</first><last>Hajj</last></author>
-      <author><first>Wassim</first><last>El-Hajj</last></author>
+      <author id="wassim-el-hajj"><first>Wassim</first><last>El-Hajj</last></author>
       <pages>236–244</pages>
       <abstract>While significant progress has been achieved for Opinion Mining in Arabic (OMA), very limited efforts have been put towards the task of Emotion mining in Arabic. In fact, businesses are interested in learning a fine-grained representation of how users are feeling towards their products or services. In this work, we describe the methods used by the team Emotion Mining in Arabic (EMA), as part of the SemEval-2018 Task 1 for Affect Mining for Arabic tweets. EMA participated in all 5 subtasks. For the five tasks, several preprocessing steps were evaluated and eventually the best system included diacritics removal, elongation adjustment, replacement of emojis by the corresponding Arabic word, character normalization and light stemming. Moreover, several features were evaluated along with different classification and regression techniques. For the 5 subtasks, word embeddings feature turned out to perform best along with Ensemble technique. EMA achieved the 1st place in subtask 5, and 3rd place in subtasks 1 and 3.</abstract>
       <url hash="1d331e2c">S18-1036</url>
@@ -443,7 +443,7 @@
       <author><first>Athanasia</first><last>Kolovou</last></author>
       <author><first>Georgios</first><last>Paraskevopoulos</last></author>
       <author><first>Nikolaos</first><last>Ellinas</last></author>
-      <author><first>Shrikanth</first><last>Narayanan</last></author>
+      <author id="shrikanth-narayanan"><first>Shrikanth</first><last>Narayanan</last></author>
       <author><first>Alexandros</first><last>Potamianos</last></author>
       <pages>245–255</pages>
       <abstract>In this paper we present deep-learning models that submitted to the SemEval-2018 Task 1 competition: “Affect in Tweets”. We participated in all subtasks for English tweets. We propose a Bi-LSTM architecture equipped with a multi-layer self attention mechanism. The attention mechanism improves the model performance and allows us to identify salient words in tweets, as well as gain insight into the models making them more interpretable. Our model utilizes a set of word2vec word embeddings trained on a large collection of 550 million Twitter messages, augmented by a set of word affective features. Due to the limited amount of task-specific training data, we opted for a transfer learning approach by pretraining the Bi-LSTMs on the dataset of Semeval 2017, Task 4A. The proposed approach ranked 1st in Subtask E “Multi-Label Emotion Classification”, 2nd in Subtask A “Emotion Intensity Regression” and achieved competitive results in other subtasks.</abstract>
@@ -547,7 +547,7 @@
     <paper id="46">
       <title>Zewen at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 1: An Ensemble Model for Affect Prediction in Tweets</title>
       <author><first>Zewen</first><last>Chi</last></author>
-      <author><first>Heyan</first><last>Huang</last></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last></author>
       <author><first>Jiangui</first><last>Chen</last></author>
       <author><first>Hao</first><last>Wu</last></author>
       <author><first>Ran</first><last>Wei</last></author>
@@ -561,8 +561,8 @@
       <title>Amrita_student at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 1: Distributed Representation of Social Media Text for Affects in Tweets</title>
       <author><first>Nidhin A</first><last>Unnithan</last></author>
       <author><first>Shalini</first><last>K.</last></author>
-      <author><first>Barathi</first><last>Ganesh H. B.</last></author>
-      <author><first>Anand</first><last>Kumar M</last></author>
+      <author id="barathi-ganesh-h-b"><first>Barathi</first><last>Ganesh H. B.</last></author>
+      <author id="anand-kumar-m"><first>Anand</first><last>Kumar M</last></author>
       <author><first>Soman</first><last>K. P.</last></author>
       <pages>319–323</pages>
       <abstract>In this paper we did an analysis of “Affects in Tweets” which was one of the task conducted by semeval 2018. Task was to build a model which is able to do regression and classification of different emotions from the given tweets data set. We developed a base model for all the subtasks using distributed representation (Doc2Vec) and applied machine learning techniques for classification and regression. Distributed representation is an unsupervised algorithm which is capable of learning fixed length feature representation from variable length texts. Machine learning techniques used for regression is ’Linear Regression’ while ’Random Forest Tree’ is used for classification purpose. Empirical results obtained for all the subtasks by our model are shown in this paper.</abstract>
@@ -575,7 +575,7 @@
       <author><first>Angel Deborah</first><last>S</last></author>
       <author><first>Rajalakshmi</first><last>S</last></author>
       <author><first>S Milton</first><last>Rajendram</last></author>
-      <author><first>Mirnalinee</first><last>T T</last></author>
+      <author id="t-t-mirnalinee"><first>Mirnalinee</first><last>T T</last></author>
       <pages>324–328</pages>
       <abstract>The system developed by the SSN MLRG1 team for Semeval-2018 task 1 on affect in tweets uses rule based feature selection and one-hot encoding to generate the input feature vector. Multilayer Perceptron was used to build the model for emotion intensity ordinal classification, sentiment analysis ordinal classification and emotion classfication subtasks. Support Vector Machine was used to build the model for emotion intensity regression and sentiment intensity regression subtasks.</abstract>
       <url hash="eac06470">S18-1048</url>
@@ -585,8 +585,8 @@
     <paper id="49">
       <title><fixed-case>CENNLP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 1: Constrained Vector Space Model in Affects in Tweets</title>
       <author><first>Naveen</first><last>J R</last></author>
-      <author><first>Barathi</first><last>Ganesh H. B.</last></author>
-      <author><first>Anand</first><last>Kumar M</last></author>
+      <author id="barathi-ganesh-h-b"><first>Barathi</first><last>Ganesh H. B.</last></author>
+      <author id="anand-kumar-m"><first>Anand</first><last>Kumar M</last></author>
       <author><first>Soman</first><last>K P</last></author>
       <pages>329–333</pages>
       <abstract>This paper discusses on task 1, “Affect in Tweets” sharedtask, conducted in SemEval-2018. This task comprises of various subtasks, which required participants to analyse over different emotions and sentiments based on the provided tweet data and also measure the intensity of these emotions for subsequent subtasks. Our approach in these task was to come up with a model on count based representation and use machine learning techniques for regression and classification related tasks. In this work, we use a simple bag of words technique for supervised text classification model as to compare, that even with some advance distributed representation models we can still achieve significant accuracy. Further, fine tuning on various parameters for the bag of word, representation model we acquired better scores over various other baseline models (Vinayan et al.) participated in the sharedtask.</abstract>
@@ -597,8 +597,8 @@
     <paper id="50">
       <title><fixed-case>T</fixed-case>eam<fixed-case>CEN</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 1: Global Vectors Representation in Emotion Detection</title>
       <author><first>Anon</first><last>George</last></author>
-      <author><first>Barathi</first><last>Ganesh H. B.</last></author>
-      <author><first>Anand</first><last>Kumar M</last></author>
+      <author id="barathi-ganesh-h-b"><first>Barathi</first><last>Ganesh H. B.</last></author>
+      <author id="anand-kumar-m"><first>Anand</first><last>Kumar M</last></author>
       <author><first>Soman</first><last>K P</last></author>
       <pages>334–338</pages>
       <abstract>Emotions are a way of expressing human sentiments. In the modern era, social media is a platform where we convey our emotions. These emotions can be joy, anger, sadness and fear. Understanding the emotions from the written sentences is an interesting part in knowing about the writer. In the amount of digital language shared through social media, a considerable amount of data reflects the sentiment or emotion towards some product, person and organization. Since these texts are from users with diverse social aspects, these texts can be used to enrich the application related to the business intelligence. More than the sentiment, identification of intensity of the sentiment will enrich the performance of the end application. In this paper we experimented the intensity prediction as a text classification problem that evaluates the distributed representation text using aggregated sum and dimensionality reduction of the glove vectors of the words present in the respective texts .</abstract>
@@ -620,7 +620,7 @@
     <paper id="52">
       <title>Mutux at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 1: Exploring Impacts of Context Information On Emotion Detection</title>
       <author><first>Pan</first><last>Du</last></author>
-      <author><first>Jian-Yun</first><last>Nie</last></author>
+      <author id="jian-yun-nie"><first>Jian-Yun</first><last>Nie</last></author>
       <pages>345–349</pages>
       <abstract>This paper describes MuTuX, our system that is designed for task 1-5a, emotion classification analysis of tweets on SemEval2018. The system aims at exploring the potential of context information of terms for emotion analysis. A Recurrent Neural Network is adopted to capture the context information of terms in tweets. Only term features and the sequential relations are used in our system. The results submitted ranks 16th out of 35 systems on the task of emotion detection in English-language tweets.</abstract>
       <url hash="cea9d710">S18-1052</url>
@@ -669,7 +669,7 @@
     <paper id="57">
       <title><fixed-case>UIUC</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 1: Recognizing Affect with Ensemble Models</title>
       <author><first>Abhishek Avinash</first><last>Narwekar</last></author>
-      <author><first>Roxana</first><last>Girju</last></author>
+      <author id="roxana-girju"><first>Roxana</first><last>Girju</last></author>
       <pages>377–384</pages>
       <abstract>Our submission to the SemEval-2018 Task1: Affect in Tweets shared task competition is a supervised learning model relying on standard lexicon features coupled with word embedding features. We used an ensemble of diverse models, including random forests, gradient boosted trees, and linear models, corrected for training-development set mismatch. We submitted the system’s output for subtasks 1 (emotion intensity prediction), 2 (emotion ordinal classification), 3 (valence intensity regression) and 4 (valence ordinal classification), for English tweets. We placed 25th, 19th, 24th and 15th in the four subtasks respectively. The baseline considered was an SVM (Support Vector Machines) model with linear kernel on the lexicon and embedding based features. Our system’s final performance measured in Pearson correlation scores outperformed the baseline by a margin of 2.2% to 14.6% across all tasks.</abstract>
       <url hash="2dc5f5ad">S18-1057</url>
@@ -720,9 +720,9 @@
     <paper id="62">
       <title>The Dabblers at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 2: Multilingual Emoji Prediction</title>
       <author><first>Larisa</first><last>Alexa</last></author>
-      <author><first>Alina</first><last>Lorenț</last></author>
-      <author><first>Daniela</first><last>Gîfu</last></author>
-      <author><first>Diana</first><last>Trandabăț</last></author>
+      <author id="alina-beatrice-lorent"><first>Alina</first><last>Lorenț</last></author>
+      <author id="daniela-gifu"><first>Daniela</first><last>Gîfu</last></author>
+      <author id="diana-trandabat"><first>Diana</first><last>Trandabăț</last></author>
       <pages>405–409</pages>
       <abstract>The “Multilingual Emoji Prediction” task focuses on the ability of predicting the correspondent emoji for a certain tweet. In this paper, we investigate the relation between words and emojis. In order to do that, we used supervised machine learning (Naive Bayes) and deep learning (Recursive Neural Network).</abstract>
       <url hash="f740a040">S18-1062</url>
@@ -788,7 +788,7 @@
     <paper id="67">
       <title>Peperomia at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 2: Vector Similarity Based Approach for Emoji Prediction</title>
       <author><first>Jing</first><last>Chen</last></author>
-      <author><first>Dechuan</first><last>Yang</last></author>
+      <author id="dechuan-yang"><first>Dechuan</first><last>Yang</last></author>
       <author><first>Xilian</first><last>Li</last></author>
       <author><first>Wei</first><last>Chen</last></author>
       <author><first>Tengjiao</first><last>Wang</last></author>
@@ -801,8 +801,8 @@
     <paper id="68">
       <title><fixed-case>ECNU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 2: Leverage Traditional <fixed-case>NLP</fixed-case> Features and Neural Networks Methods to Address <fixed-case>T</fixed-case>witter Emoji Prediction Task</title>
       <author><first>Xingwu</first><last>Lu</last></author>
-      <author><first>Xin</first><last>Mao</last></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="xinnian-mao"><first>Xin</first><last>Mao</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <author><first>Yuanbin</first><last>Wu</last></author>
       <pages>433–437</pages>
       <abstract>This paper describes our submissions to Task 2 in SemEval 2018, i.e., Multilingual Emoji Prediction. We first investigate several traditional Natural Language Processing (NLP) features, and then design several deep learning models. For subtask 1: Emoji Prediction in English, we combine two different methods to represent tweet, i.e., supervised model using traditional features and deep learning model. For subtask 2: Emoji Prediction in Spanish, we only use deep learning model.</abstract>
@@ -896,7 +896,7 @@
     </paper>
     <paper id="76">
       <title><fixed-case>S</fixed-case>ynt<fixed-case>NN</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 2: is Syntax Useful for Emoji Prediction? Embedding Syntactic Trees in Multi Layer Perceptrons</title>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last></author>
       <author><first>Andrea</first><last>Santilli</last></author>
       <pages>477–481</pages>
       <abstract>In this paper, we present SyntNN as a way to include traditional syntactic models in multilayer neural networks used in the task of Semeval Task 2 of emoji prediction. The model builds on the distributed tree embedder also known as distributed tree kernel. Initial results are extremely encouraging but additional analysis is needed to overcome the problem of overfitting.</abstract>
@@ -918,8 +918,8 @@
       <title><fixed-case>CENNLP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 2: Enhanced Distributed Representation of Text using Target Classes for Emoji Prediction Representation</title>
       <author><first>Naveen</first><last>J R</last></author>
       <author><first>Hariharan</first><last>V</last></author>
-      <author><first>Barathi</first><last>Ganesh H. B.</last></author>
-      <author><first>Anand</first><last>Kumar M</last></author>
+      <author id="barathi-ganesh-h-b"><first>Barathi</first><last>Ganesh H. B.</last></author>
+      <author id="anand-kumar-m"><first>Anand</first><last>Kumar M</last></author>
       <author><first>Soman</first><last>K P</last></author>
       <pages>486–490</pages>
       <abstract>Emoji is one of the “fastest growing language ” in pop-culture, especially in social media and it is very unlikely for its usage to decrease. These are generally used to bring an extra level of meaning to the texts, posted on social media platforms. Providing such an added info, gives more insights to the plain text, arising to hidden interpretation within the text. This paper explains our analysis on Task 2, ” Multilingual Emoji Prediction” sharedtask conducted by Semeval-2018. In the task, a predicted emoji based on a piece of Twitter text are labelled under 20 different classes (most commonly used emojis) where these classes are learnt and further predicted are made for unseen Twitter text. In this work, we have experimented and analysed emojis predicted based on Twitter text, as a classification problem where the entailing emoji is considered as a label for every individual text data. We have implemented this using distributed representation of text through fastText. Also, we have made an effort to demonstrate how fastText framework can be useful in case of emoji prediction. This task is divide into two subtask, they are based on dataset presented in two different languages English and Spanish.</abstract>
@@ -956,7 +956,7 @@
       <title><fixed-case>LIS</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 2: Mixing Word Embeddings and Bag of Features for Multilingual Emoji Prediction</title>
       <author><first>Gaël</first><last>Guibon</last></author>
       <author><first>Magalie</first><last>Ochs</last></author>
-      <author><first>Patrice</first><last>Bellot</last></author>
+      <author id="patrice-bellot"><first>Patrice</first><last>Bellot</last></author>
       <pages>502–506</pages>
       <abstract>In this paper we present the system submitted to the SemEval2018 task2 : Multilingual Emoji Prediction. Our system approaches both languages as being equal by first; considering word embeddings associated to automatically computed features of different types, then by applying bagging algorithm RandomForest to predict the emoji of a tweet.</abstract>
       <url hash="1598ca93">S18-1081</url>
@@ -1021,7 +1021,7 @@
     <paper id="87">
       <title><fixed-case>IIIDYT</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 3: Irony detection in <fixed-case>E</fixed-case>nglish tweets</title>
       <author><first>Edison</first><last>Marrese-Taylor</last></author>
-      <author><first>Suzana</first><last>Ilic</last></author>
+      <author id="suzana-ilic"><first>Suzana</first><last>Ilic</last></author>
       <author><first>Jorge</first><last>Balazs</last></author>
       <author><first>Helmut</first><last>Prendinger</last></author>
       <author><first>Yutaka</first><last>Matsuo</last></author>
@@ -1059,8 +1059,8 @@
       <title><fixed-case>WLV</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 3: Dissecting Tweets in Search of Irony</title>
       <author><first>Omid</first><last>Rohanian</last></author>
       <author><first>Shiva</first><last>Taslimipoor</last></author>
-      <author><first>Richard</first><last>Evans</last></author>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="richard-evans"><first>Richard</first><last>Evans</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <pages>553–559</pages>
       <abstract>This paper describes the systems submitted to SemEval 2018 Task 3 “Irony detection in English tweets” for both subtasks A and B. The first system leveraging a combination of sentiment, distributional semantic, and text surface features is ranked third among 44 teams according to the official leaderboard of the subtask A. The second system with slightly different representation of the features ranked ninth in subtask B. We present a method that entails decomposing tweets into separate parts. Searching for contrast within the constituents of a tweet is an integral part of our system. We embrace an extensive definition of contrast which leads to a vast coverage in detecting ironic content.</abstract>
       <url hash="13a80ed7">S18-1090</url>
@@ -1079,7 +1079,7 @@
     <paper id="92">
       <title><fixed-case>EL</fixed-case>i<fixed-case>RF</fixed-case>-<fixed-case>UPV</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Tasks 1 and 3: Affect and Irony Detection in Tweets</title>
       <author><first>José-Ángel</first><last>González</last></author>
-      <author><first>Lluís-F.</first><last>Hurtado</last></author>
+      <author id="lluis-f-hurtado"><first>Lluís-F.</first><last>Hurtado</last></author>
       <author><first>Ferran</first><last>Pla</last></author>
       <pages>565–569</pages>
       <abstract>This paper describes the participation of ELiRF-UPV team at tasks 1 and 3 of Semeval-2018. We present a deep learning based system that assembles Convolutional Neural Networks and Long Short-Term Memory neural networks. This system has been used with slight modifications for the two tasks addressed both for English and Spanish. Finally, the results obtained in the competition are reported and discussed.</abstract>
@@ -1134,7 +1134,7 @@
       <title><fixed-case>INAOE</fixed-case>-<fixed-case>UPV</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 3: An Ensemble Approach for Irony Detection in <fixed-case>T</fixed-case>witter</title>
       <author><first>Delia Irazú</first><last>Hernández Farías</last></author>
       <author><first>Fernando</first><last>Sánchez-Vega</last></author>
-      <author><first>Manuel</first><last>Montes-y-Gómez</last></author>
+      <author id="manuel-montes"><first>Manuel</first><last>Montes-y-Gómez</last></author>
       <author><first>Paolo</first><last>Rosso</last></author>
       <pages>594–599</pages>
       <abstract>This paper describes an ensemble approach to the SemEval-2018 Task 3. The proposed method is composed of two renowned methods in text classification together with a novel approach for capturing ironic content by exploiting a tailored lexicon for irony detection. We experimented with different ensemble settings. The obtained results show that our method has a good performance for detecting the presence of ironic content in Twitter.</abstract>
@@ -1146,7 +1146,7 @@
       <title><fixed-case>ECNU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 3: Exploration on Irony Detection from Tweets via Machine Learning and Deep Learning Methods</title>
       <author><first>Zhenghang</first><last>Yin</last></author>
       <author><first>Feixiang</first><last>Wang</last></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <author><first>Wenting</first><last>Wang</last></author>
       <pages>600–606</pages>
       <abstract>The paper describes our submissions to task 3 in SemEval-2018. There are two subtasks: Subtask A is a binary classification task to determine whether a tweet is ironic, and Subtask B is a fine-grained classification task including four classes. To address them, we explored supervised machine learning method alone and in combination with neural networks.</abstract>
@@ -1204,7 +1204,7 @@
       <author><first>Rajalakshmi</first><last>S</last></author>
       <author><first>Angel Deborah</first><last>S</last></author>
       <author><first>S Milton</first><last>Rajendram</last></author>
-      <author><first>Mirnalinee</first><last>T T</last></author>
+      <author id="t-t-mirnalinee"><first>Mirnalinee</first><last>T T</last></author>
       <pages>633–637</pages>
       <abstract>Sentiment analysis plays an important role in E-commerce. Identifying ironic and sarcastic content in text plays a vital role in inferring the actual intention of the user, and is necessary to increase the accuracy of sentiment analysis. This paper describes the work on identifying the irony level in twitter texts. The system developed by the SSN MLRG1 team in SemEval-2018 for task 3 (irony detection) uses rule based approach for feature selection and MultiLayer Perceptron (MLP) technique to build the model for multiclass irony classification subtask, which classifies the given text into one of the four class labels.</abstract>
       <url hash="f5c1bebf">S18-1103</url>
@@ -1215,7 +1215,7 @@
       <title><fixed-case>NLPRL</fixed-case>-<fixed-case>IITBHU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 3: Combining Linguistic Features and Emoji pre-trained <fixed-case>CNN</fixed-case> for Irony Detection in Tweets</title>
       <author><first>Harsh</first><last>Rangwani</last></author>
       <author><first>Devang</first><last>Kulshreshtha</last></author>
-      <author><first>Anil</first><last>Kumar Singh</last></author>
+      <author id="anil-kumar-singh"><first>Anil</first><last>Kumar Singh</last></author>
       <pages>638–642</pages>
       <abstract>This paper describes our participation in SemEval 2018 Task 3 on Irony Detection in Tweets. We combine linguistic features with pre-trained activations of a neural network. The CNN is trained on the emoji prediction task. We combine the two feature sets and feed them into an XGBoost Classifier for classification. Subtask-A involves classification of tweets into ironic and non-ironic instances whereas Subtask-B involves classification of the tweet into - non-ironic, verbal irony, situational irony or other verbal irony. It is observed that combining features from these two different feature spaces improves our system results. We leverage the SMOTE algorithm to handle the problem of class imbalance in Subtask-B. Our final model achieves an F1-score of 0.65 and 0.47 on Subtask-A and Subtask-B respectively. Our system ranks 4th on both tasks respectively, outperforming the baseline by 6% on Subtask-A and 14% on Subtask-B.</abstract>
       <url hash="b9a3c42f">S18-1104</url>
@@ -1289,7 +1289,7 @@
       <author><first>Kata</first><last>Gábor</last></author>
       <author><first>Davide</first><last>Buscaldi</last></author>
       <author><first>Anne-Kathrin</first><last>Schumann</last></author>
-      <author><first>Behrang</first><last>QasemiZadeh</last></author>
+      <author id="behrang-qasemizadeh"><first>Behrang</first><last>QasemiZadeh</last></author>
       <author><first>Haïfa</first><last>Zargayouna</last></author>
       <author><first>Thierry</first><last>Charnois</last></author>
       <pages>679–688</pages>
@@ -1336,9 +1336,9 @@
     </paper>
     <paper id="115">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 9: Hypernym Discovery</title>
-      <author><first>Jose</first><last>Camacho-Collados</last></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></author>
       <author><first>Claudio</first><last>Delli Bovi</last></author>
-      <author><first>Luis</first><last>Espinosa-Anke</last></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa-Anke</last></author>
       <author><first>Sergio</first><last>Oramas</last></author>
       <author><first>Tommaso</first><last>Pasini</last></author>
       <author><first>Enrico</first><last>Santus</last></author>
@@ -1354,7 +1354,7 @@
     <paper id="116">
       <title><fixed-case>CRIM</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 9: A Hybrid Approach to Hypernym Discovery</title>
       <author><first>Gabriel</first><last>Bernier-Colborne</last></author>
-      <author><first>Caroline</first><last>Barrière</last></author>
+      <author id="caroline-barriere"><first>Caroline</first><last>Barrière</last></author>
       <pages>725–731</pages>
       <abstract>This report describes the system developed by the CRIM team for the hypernym discovery task at SemEval 2018. This system exploits a combination of supervised projection learning and unsupervised pattern-based hypernym discovery. It was ranked first on the 3 sub-tasks for which we submitted results.</abstract>
       <url hash="b54f7033">S18-1116</url>
@@ -1434,7 +1434,7 @@
     <paper id="123">
       <title><fixed-case>L</fixed-case>ight<fixed-case>R</fixed-case>el at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 7: Lightweight and Fast Relation Classification</title>
       <author><first>Tyler</first><last>Renslow</last></author>
-      <author><first>Günter</first><last>Neumann</last></author>
+      <author id="gunter-neumann"><first>Günter</first><last>Neumann</last></author>
       <pages>778–782</pages>
       <abstract>We present LightRel, a lightweight and fast relation classifier. Our goal is to develop a high baseline for different relation extraction tasks. By defining only very few data-internal, word-level features and external knowledge sources in the form of word clusters and word embeddings, we train a fast and simple linear classifier</abstract>
       <url hash="4e9b5b4a">S18-1123</url>
@@ -1453,7 +1453,7 @@
     <paper id="125">
       <title>The <fixed-case>UWNLP</fixed-case> system at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 7: Neural Relation Extraction Model with Selectively Incorporated Concept Embeddings</title>
       <author><first>Yi</first><last>Luan</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last></author>
       <pages>788–792</pages>
       <abstract>This paper describes our submission for SemEval 2018 Task 7 shared task on semantic relation extraction and classification in scientific papers. Our model is based on the end-to-end relation extraction model of (Miwa and Bansal, 2016) with several enhancements such as character-level encoding attention mechanism on selecting pretrained concept candidate embeddings. Our official submission ranked the second in relation classification task (Subtask 1.1 and Subtask 2 Senerio 2), and the first in the relation extraction task (Subtask 2 Scenario 1).</abstract>
@@ -1464,8 +1464,8 @@
     <paper id="126">
       <title><fixed-case>UC</fixed-case>3<fixed-case>M</fixed-case>-<fixed-case>NII</fixed-case> Team at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 7: Semantic Relation Classification in Scientific Papers via Convolutional Neural Network</title>
       <author><first>Víctor</first><last>Suárez-Paniagua</last></author>
-      <author><first>Isabel</first><last>Segura-Bedmar</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="isabel-segura-bedmar"><first>Isabel</first><last>Segura-Bedmar</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>793–797</pages>
       <abstract>This paper reports our participation for SemEval-2018 Task 7 on extraction and classification of relationships between entities in scientific papers. Our approach is based on the use of a Convolutional Neural Network (CNN) trained on350 abstract with manually annotated entities and relations. Our hypothesis is that this deep learning model can be applied to extract and classify relations between entities for scientific papers at the same time. We use the Part-of-Speech and the distances to the target entities as part of the embedding for each word and we blind all the entities by marker names. In addition, we use sampling techniques to overcome the imbalance issues of this dataset. Our architecture obtained an F1-score of 35.4% for the relation extraction task and 18.5% for the relation classification task with a basic configuration of the one step CNN.</abstract>
       <url hash="aac62a4a">S18-1126</url>
@@ -1488,7 +1488,7 @@
     <paper id="128">
       <title><fixed-case>SIRIUS</fixed-case>-<fixed-case>LTG</fixed-case>-<fixed-case>U</fixed-case>i<fixed-case>O</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 7: Convolutional Neural Networks with Shortest Dependency Paths for Semantic Relation Extraction and Classification in Scientific Papers</title>
       <author><first>Farhad</first><last>Nooralahzadeh</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <author><first>Jan Tore</first><last>Lønning</last></author>
       <pages>805–810</pages>
       <abstract>This article presents the SIRIUS-LTG-UiO system for the SemEval 2018 Task 7 on Semantic Relation Extraction and Classification in Scientific Papers. First we extract the shortest dependency path (sdp) between two entities, then we introduce a convolutional neural network (CNN) which takes the shortest dependency path embeddings as input and performs relation classification with differing objectives for each subtask of the shared task. This approach achieved overall F1 scores of 76.7 and 83.2 for relation classification on clean and noisy data, respectively. Furthermore, for combined relation extraction and classification on clean data, it obtained F1 scores of 37.4 and 33.6 for each phase. Our system ranks 3rd in all three sub-tasks of the shared task.</abstract>
@@ -1541,7 +1541,7 @@
       <author><first>Yuan</first><last>Xu</last></author>
       <author><first>Jingyi</first><last>Zhang</last></author>
       <author><first>Anne</first><last>Lauscher</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <pages>826–830</pages>
       <abstract>Large repositories of scientific literature call for the development of robust methods to extract information from scholarly papers. This problem is addressed by the SemEval 2018 Task 7 on extracting and classifying relations found within scientific publications. In this paper, we present a feature-based and a deep learning-based approach to the task and discuss the results of the system runs that we submitted for evaluation.</abstract>
       <url hash="235910cb">S18-1132</url>
@@ -1598,7 +1598,7 @@
       <title><fixed-case>S</fixed-case>ci<fixed-case>REL</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 7: A System for Semantic Relation Extraction and Classification</title>
       <author><first>Darshini</first><last>Mahendran</last></author>
       <author><first>Chathurika</first><last>Brahmana</last></author>
-      <author><first>Bridget</first><last>McInnes</last></author>
+      <author id="bridget-mcinnes"><first>Bridget</first><last>McInnes</last></author>
       <pages>853–857</pages>
       <abstract>This paper describes our system, SciREL (Scientific abstract RELation extraction system), developed for the SemEval 2018 Task 7: Semantic Relation Extraction and Classification in Scientific Papers. We present a feature-vector based system to extract explicit semantic relation and classify them. Our system is trained in the ACL corpus (BIrd et al., 2008) that contains annotated abstracts given by the task organizers. When an abstract with annotated entities is given as the input into our system, it extracts the semantic relations through a set of defined features and classifies them into one of the given six categories of relations through feature engineering and a learned model. For the best combination of features, our system SciREL obtained an F-measure of 20.03 on the official test corpus which includes 150 abstracts in the relation classification Subtask 1.1. In this paper, we provide an in-depth error analysis of our results to prevent duplication of research efforts in the development of future systems</abstract>
       <url hash="a98e3056">S18-1137</url>
@@ -1608,8 +1608,8 @@
     <paper id="138">
       <title><fixed-case>NTNU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 7: Classifier Ensembling for Semantic Relation Identification and Classification in Scientific Papers</title>
       <author><first>Biswanath</first><last>Barik</last></author>
-      <author><first>Utpal Kumar</first><last>Sikdar</last></author>
-      <author><first>Björn</first><last>Gambäck</last></author>
+      <author id="utpal-kumar-sikdar"><first>Utpal Kumar</first><last>Sikdar</last></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gambäck</last></author>
       <pages>858–862</pages>
       <abstract>The paper presents NTNU’s contribution to SemEval-2018 Task 7 on relation identification and classification. The class weights and parameters of five alternative supervised classifiers were optimized through grid search and cross-validation. The outputs of the classifiers were combined through voting for the final prediction. A wide variety of features were explored, with the most informative identified by feature selection. The best setting achieved F1 scores of 47.4% and 66.0% in the relation classification subtasks 1.1 and 1.2. For relation identification and classification in subtask 2, it achieved F1 scores of 33.9% and 17.0%,</abstract>
       <url hash="414c121d">S18-1138</url>
@@ -1659,7 +1659,7 @@
       <author><first>Shimei</first><last>Pan</last></author>
       <author><first>Youngja</first><last>Park</last></author>
       <author><first>Anupam</first><last>Joshi</last></author>
-      <author><first>Tim</first><last>Finin</last></author>
+      <author id="tim-finin"><first>Tim</first><last>Finin</last></author>
       <pages>878–884</pages>
       <abstract>We describe the systems developed by the UMBC team for 2018 SemEval Task 8, SecureNLP (Semantic Extraction from CybersecUrity REports using Natural Language Processing). We participated in three of the sub-tasks: (1) classifying sentences as being relevant or irrelevant to malware, (2) predicting token labels for sentences, and (4) predicting attribute labels from the Malware Attribute Enumeration and Characterization vocabulary for defining malware characteristics. We achieve F1 score of 50.34/18.0 (dev/test), 22.23 (test-data), and 31.98 (test-data) for Task1, Task2 and Task2 respectively. We also make our cybersecurity embeddings publicly available at <url>http://bit.ly/cyber2vec</url>.</abstract>
       <url hash="d5cba936">S18-1142</url>
@@ -1680,9 +1680,9 @@
     </paper>
     <paper id="144">
       <title><fixed-case>F</fixed-case>lytxt_<fixed-case>NTNU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 8: Identifying and Classifying Malware Text Using Conditional Random Fields and Naïve <fixed-case>B</fixed-case>ayes Classifiers</title>
-      <author><first>Utpal Kumar</first><last>Sikdar</last></author>
+      <author id="utpal-kumar-sikdar"><first>Utpal Kumar</first><last>Sikdar</last></author>
       <author><first>Biswanath</first><last>Barik</last></author>
-      <author><first>Björn</first><last>Gambäck</last></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gambäck</last></author>
       <pages>890–893</pages>
       <abstract>Cybersecurity risks such as malware threaten the personal safety of users, but to identify malware text is a major challenge. The paper proposes a supervised learning approach to identifying malware sentences given a document (subTask1 of SemEval 2018, Task 8), as well as to classifying malware tokens in the sentences (subTask2). The approach achieved good results, ranking second of twelve participants for both subtasks, with F-scores of 57% for subTask1 and 28% for subTask2.</abstract>
       <url hash="4a4befe0">S18-1144</url>
@@ -1700,10 +1700,10 @@
     </paper>
     <paper id="146">
       <title><fixed-case>A</fixed-case>pollo at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 9: Detecting Hypernymy Relations Using Syntactic Dependencies</title>
-      <author><first>Mihaela</first><last>Onofrei</last></author>
+      <author id="mihaela-plamada-onofrei"><first>Mihaela</first><last>Onofrei</last></author>
       <author><first>Ionuț</first><last>Hulub</last></author>
-      <author><first>Diana</first><last>Trandabăț</last></author>
-      <author><first>Daniela</first><last>Gîfu</last></author>
+      <author id="diana-trandabat"><first>Diana</first><last>Trandabăț</last></author>
+      <author id="daniela-gifu"><first>Daniela</first><last>Gîfu</last></author>
       <pages>898–902</pages>
       <abstract>This paper presents the participation of Apollo’s team in the SemEval-2018 Task 9 “Hypernym Discovery”, Subtask 1: “General-Purpose Hypernym Discovery”, which tries to produce a ranked list of hypernyms for a specific term. We propose a novel approach for automatic extraction of hypernymy relations from a corpus by using dependency patterns. We estimated that the application of these patterns leads to a higher score than using the traditional lexical patterns.</abstract>
       <url hash="872fd7cc">S18-1146</url>
@@ -1760,7 +1760,7 @@
     </paper>
     <paper id="151">
       <title><fixed-case>ADAPT</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 9: Skip-Gram Word Embeddings for Unsupervised Hypernym Discovery in Specialised Corpora</title>
-      <author><first>Alfredo</first><last>Maldonado</last></author>
+      <author id="alfredo-maldonado"><first>Alfredo</first><last>Maldonado</last></author>
       <author><first>Filip</first><last>Klubička</last></author>
       <pages>924–927</pages>
       <abstract>This paper describes a simple but competitive unsupervised system for hypernym discovery. The system uses skip-gram word embeddings with negative sampling, trained on specialised corpora. Candidate hypernyms for an input word are predicted based based on cosine similarity scores. Two sets of word embedding models were trained separately on two specialised corpora: a medical corpus and a music industry corpus. Our system scored highest in the medical domain among the competing unsupervised systems but performed poorly on the music industry domain. Our system does not depend on any external data other than raw specialised corpora.</abstract>
@@ -1839,8 +1839,8 @@
     <paper id="158">
       <title><fixed-case>ALB</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 10: A System for Capturing Discriminative Attributes</title>
       <author><first>Bogdan</first><last>Dumitru</last></author>
-      <author><first>Alina Maria</first><last>Ciobanu</last></author>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="alina-maria-ciobanu"><first>Alina Maria</first><last>Ciobanu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <pages>963–967</pages>
       <abstract>Semantic difference detection attempts to capture whether a word is a discriminative attribute between two other words. For example, the discriminative feature red characterizes the first word from the (apple, banana) pair, but not the second. Modeling semantic difference is essential for language understanding systems, as it provides useful information for identifying particular aspects of word senses. This paper describes our system implementation (the ALB system of the NLP@Unibuc team) for the 10th task of the SemEval 2018 workshop, “Capturing Discriminative Attributes”. We propose a method for semantic difference detection that uses an SVM classifier with features based on co-occurrence counts and shallow semantic parsing, achieving 0.63 F1 score in the competition.</abstract>
       <url hash="c70ae7bc">S18-1158</url>
@@ -1850,7 +1850,7 @@
     <paper id="159">
       <title><fixed-case>EL</fixed-case>i<fixed-case>RF</fixed-case>-<fixed-case>UPV</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 10: Capturing Discriminative Attributes with Knowledge Graphs and <fixed-case>W</fixed-case>ikipedia</title>
       <author><first>José-Ángel</first><last>González</last></author>
-      <author><first>Lluís-F.</first><last>Hurtado</last></author>
+      <author id="lluis-f-hurtado"><first>Lluís-F.</first><last>Hurtado</last></author>
       <author><first>Encarna</first><last>Segarra</last></author>
       <author><first>Ferran</first><last>Pla</last></author>
       <pages>968–971</pages>
@@ -1864,8 +1864,8 @@
       <author><first>Shiva</first><last>Taslimipoor</last></author>
       <author><first>Omid</first><last>Rohanian</last></author>
       <author><first>Le An</first><last>Ha</last></author>
-      <author><first>Gloria</first><last>Corpas Pastor</last></author>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="gloria-corpas-pastor"><first>Gloria</first><last>Corpas Pastor</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <pages>972–976</pages>
       <abstract>This paper describes the system submitted to SemEval 2018 shared task 10 ‘Capturing Dicriminative Attributes’. We use a combination of knowledge-based and co-occurrence features to capture the semantic difference between two words in relation to an attribute. We define scores based on association measures, ngram counts, word similarity, and ConceptNet relations. The system is ranked 4th (joint) on the official leaderboard of the task.</abstract>
       <url hash="f9067449">S18-1160</url>
@@ -1875,7 +1875,7 @@
     <paper id="161">
       <title><fixed-case>UNAM</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 10: Unsupervised Semantic Discriminative Attribute Identification in Neural Word Embedding Cones</title>
       <author><first>Ignacio</first><last>Arroyo-Fernández</last></author>
-      <author><first>Ivan</first><last>Meza</last></author>
+      <author id="ivan-meza-ruiz"><first>Ivan</first><last>Meza</last></author>
       <author><first>Carlos-Francisco</first><last>Méndez-Cruz</last></author>
       <pages>977–984</pages>
       <abstract>In this paper we report an unsupervised method aimed to identify whether an attribute is discriminative for two words (which are treated as concepts, in our particular case). To this end, we use geometrically inspired vector operations underlying unsupervised decision functions. These decision functions operate on state-of-the-art neural word embeddings of the attribute and the concepts. The main idea can be described as follows: if attribute <tex-math>q</tex-math> discriminates concept <tex-math>a</tex-math> from concept <tex-math>b</tex-math>, then <tex-math>q</tex-math> is excluded from the feature set shared by these two concepts: the intersection. That is, the membership <tex-math>q\in (a\cap b)</tex-math> does not hold. As <tex-math>a,b,q</tex-math> are represented with neural word embeddings, we tested vector operations allowing us to measure membership, i.e. fuzzy set operations (t-norm, for fuzzy intersection, and t-conorm, for fuzzy union) and the similarity between <tex-math>q</tex-math> and the convex cone described by <tex-math>a</tex-math> and <tex-math>b</tex-math>.</abstract>
@@ -1898,7 +1898,7 @@
     <paper id="163">
       <title><fixed-case>B</fixed-case>om<fixed-case>J</fixed-case>i at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 10: Combining Vector-, Pattern- and Graph-based Information to Identify Discriminative Attributes</title>
       <author><first>Enrico</first><last>Santus</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <author><first>Emmanuele</first><last>Chersoni</last></author>
       <pages>990–994</pages>
       <abstract>This paper describes BomJi, a supervised system for capturing discriminative attributes in word pairs (e.g. yellow as discriminative for banana over watermelon). The system relies on an XGB classifier trained on carefully engineered graph-, pattern- and word embedding-based features. It participated in the SemEval-2018 Task 10 on Capturing Discriminative Attributes, achieving an F1 score of 0.73 and ranking 2nd out of 26 participant systems.</abstract>
@@ -1918,7 +1918,7 @@
     <paper id="165">
       <title><fixed-case>ECNU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 10: Evaluating Simple but Effective Features on Machine Learning Methods for Semantic Difference Detection</title>
       <author><first>Yunxiao</first><last>Zhou</last></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <author><first>Yuanbin</first><last>Wu</last></author>
       <pages>999–1002</pages>
       <abstract>This paper describes the system we submitted to Task 10 (Capturing Discriminative Attributes) in SemEval 2018. Given a triple (word1, word2, attribute), this task is to predict whether it exemplifies a semantic difference or not. We design and investigate several word embedding features, PMI features and WordNet features together with supervised machine learning methods to address this task. Officially released results show that our system ranks above average.</abstract>
@@ -1929,7 +1929,7 @@
     <paper id="166">
       <title><fixed-case>A</fixed-case>mrita<fixed-case>NLP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 10: Capturing discriminative attributes using convolution neural network over global vector representation.</title>
       <author><first>Vivek</first><last>Vinayan</last></author>
-      <author><first>Anand</first><last>Kumar M</last></author>
+      <author id="anand-kumar-m"><first>Anand</first><last>Kumar M</last></author>
       <author><first>Soman</first><last>K P</last></author>
       <pages>1003–1007</pages>
       <abstract>The “Capturing Discriminative Attributes” sharedtask is the tenth task, conjoint with SemEval2018. The task is to predict if a word can capture distinguishing attributes of one word from another. We use GloVe word embedding, pre-trained on openly sourced corpus for this task. A base representation is initially established over varied dimensions. These representations are evaluated based on validation scores over two models, first on an SVM based classifier and second on a one dimension CNN model. The scores are used to further develop the representation with vector combinations, by considering various distance measures. These measures correspond to offset vectors which are concatenated as features, mainly to improve upon the F1score, with the best accuracy. The features are then further tuned on the validation scores, to achieve highest F1score. Our evaluation narrowed down to two representations, classified on CNN models, having a total dimension length of 1204 &amp; 1203 for the final submissions. Of the two, the latter feature representation delivered our best F1score of 0.658024 (as per result).</abstract>
@@ -1996,7 +1996,7 @@
     <paper id="172">
       <title><fixed-case>EL</fixed-case>i<fixed-case>RF</fixed-case>-<fixed-case>UPV</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 11: Machine Comprehension using Commonsense Knowledge</title>
       <author><first>José-Ángel</first><last>González</last></author>
-      <author><first>Lluís-F.</first><last>Hurtado</last></author>
+      <author id="lluis-f-hurtado"><first>Lluís-F.</first><last>Hurtado</last></author>
       <author><first>Encarna</first><last>Segarra</last></author>
       <author><first>Ferran</first><last>Pla</last></author>
       <pages>1034–1037</pages>
@@ -2030,7 +2030,7 @@
     <paper id="175">
       <title><fixed-case>ECNU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 11: Using Deep Learning Method to Address Machine Comprehension Task</title>
       <author><first>Yixuan</first><last>Sheng</last></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <author><first>Yuanbin</first><last>Wu</last></author>
       <pages>1048–1052</pages>
       <abstract>This paper describes the system we submitted to the Task 11 in SemEval 2018, i.e., Machine Comprehension using Commonsense Knowledge. Given a passage and some questions that each have two candidate answers, this task requires the participate system to select out one answer meet the meaning of original text or commonsense knowledge from the candidate answers. For this task, we use a deep learning method to obtain final predict answer by calculating relevance of choices representations and question-aware document representation.</abstract>
@@ -2040,7 +2040,7 @@
     </paper>
     <paper id="176">
       <title><fixed-case>CSR</fixed-case>eader at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 11: Multiple Choice Question Answering as Textual Entailment</title>
-      <author><first>Zhengping</first><last>Jiang</last></author>
+      <author id="zheng-ping-jiang"><first>Zhengping</first><last>Jiang</last></author>
       <author><first>Qi</first><last>Sun</last></author>
       <pages>1053–1057</pages>
       <abstract>In this document we present an end-to-end machine reading comprehension system that solves multiple choice questions with a textual entailment perspective. Since some of the knowledge required is not explicitly mentioned in the text, we try to exploit commonsense knowledge by using pretrained word embeddings during contextual embeddings and by dynamically generating a weighted representation of related script knowledge. In the model two kinds of prediction structure are ensembled, and the final accuracy of our system is 10 percent higher than the naiive baseline.</abstract>
@@ -2071,7 +2071,7 @@
     <paper id="179">
       <title><fixed-case>IUCM</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 11: Similar-Topic Texts as a Comprehension Knowledge Source</title>
       <author><first>Sofia</first><last>Reznikova</last></author>
-      <author><first>Leon</first><last>Derczynski</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
       <pages>1068–1072</pages>
       <abstract>This paper describes the IUCM entry at SemEval-2018 Task 11, on machine comprehension using commonsense knowledge. First, clustering and topic modeling are used to divide given texts into topics. Then, during the answering phase, other texts of the same topic are retrieved and used as commonsense knowledge. Finally, the answer is selected. While clustering itself shows good results, finding an answer proves to be more challenging. This paper reports the results of system evaluation and suggests potential improvements.</abstract>
       <url hash="905f1f2f">S18-1179</url>
@@ -2090,8 +2090,8 @@
     </paper>
     <paper id="181">
       <title><fixed-case>MITRE</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 11: Commonsense Reasoning without Commonsense Knowledge</title>
-      <author><first>Elizabeth</first><last>Merkhofer</last></author>
-      <author><first>John</first><last>Henderson</last></author>
+      <author id="elizabeth-merkhofer"><first>Elizabeth</first><last>Merkhofer</last></author>
+      <author id="john-henderson"><first>John</first><last>Henderson</last></author>
       <author><first>David</first><last>Bloom</last></author>
       <author><first>Laura</first><last>Strickhart</last></author>
       <author><first>Guido</first><last>Zarrella</last></author>
@@ -2115,9 +2115,9 @@
     <paper id="183">
       <title><fixed-case>ITNLP</fixed-case>-<fixed-case>ARC</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 12: Argument Reasoning Comprehension with Attention</title>
       <author><first>Wenjie</first><last>Liu</last></author>
-      <author><first>Chengjie</first><last>Sun</last></author>
+      <author id="cheng-jie-sun"><first>Chengjie</first><last>Sun</last></author>
       <author><first>Lei</first><last>Lin</last></author>
-      <author><first>Bingquan</first><last>Liu</last></author>
+      <author id="bingquan-liu"><first>Bingquan</first><last>Liu</last></author>
       <pages>1089–1093</pages>
       <abstract>Reasoning is a very important topic and has many important applications in the field of natural language processing. Semantic Evaluation (SemEval) 2018 Task 12 “The Argument Reasoning Comprehension” committed to research natural language reasoning. In this task, we proposed a novel argument reasoning comprehension system, ITNLP-ARC, which use Neural Networks technology to solve this problem. In our system, the LSTM model is involved to encode both the premise sentences and the warrant sentences. The attention model is used to merge the two premise sentence vectors. Through comparing the similarity between the attention vector and each of the two warrant vectors, we choose the one with higher similarity as our system’s final answer.</abstract>
       <url hash="6b4476fe">S18-1183</url>
@@ -2126,8 +2126,8 @@
     </paper>
     <paper id="184">
       <title><fixed-case>ECNU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 12: An End-to-End Attention-based Neural Network for the Argument Reasoning Comprehension Task</title>
-      <author><first>Junfeng</first><last>Tian</last></author>
-      <author><first>Man</first><last>Lan</last></author>
+      <author id="junfeng-tian"><first>Junfeng</first><last>Tian</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
       <author><first>Yuanbin</first><last>Wu</last></author>
       <pages>1094–1098</pages>
       <abstract>This paper presents our submissions to SemEval 2018 Task 12: the Argument Reasoning Comprehension Task. We investigate an end-to-end attention-based neural network to represent the two lexically close candidate warrants. On the one hand, we extract their different parts as attention vectors to obtain distinguishable representations. On the other hand, we use their surrounds (i.e., claim, reason, debate context) as another attention vectors to get contextual representations, which work as final clues to select the correct warrant. Our model achieves 60.4% accuracy and ranks 3rd among 22 participating systems.</abstract>
@@ -2194,9 +2194,9 @@
     <paper id="190">
       <title><fixed-case>U</fixed-case>ni<fixed-case>M</fixed-case>elb at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 12: Generative Implication using <fixed-case>LSTM</fixed-case>s, <fixed-case>S</fixed-case>iamese Networks and Semantic Representations with Synonym Fuzzing</title>
       <author><first>Anirudh</first><last>Joshi</last></author>
-      <author><first>Tim</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Tim</first><last>Baldwin</last></author>
       <author><first>Richard O.</first><last>Sinnott</last></author>
-      <author><first>Cecile</first><last>Paris</last></author>
+      <author id="cecile-paris"><first>Cecile</first><last>Paris</last></author>
       <pages>1124–1128</pages>
       <abstract>This paper describes a warrant classification system for SemEval 2018 Task 12, that attempts to learn semantic representations of reasons, claims and warrants. The system consists of 3 stacked LSTMs: one for the reason, one for the claim, and one shared Siamese Network for the 2 candidate warrants. Our main contribution is to force the embeddings into a shared feature space using vector operations, semantic similarity classification, Siamese networks, and multi-task learning. In doing so, we learn a form of generative implication, in encoding implication interrelationships between reasons, claims, and the associated correct and incorrect warrants. We augment the limited data in the task further by utilizing WordNet synonym “fuzzing”. When applied to SemEval 2018 Task 12, our system performs well on the development data, and officially ranked 8th among 21 teams.</abstract>
       <url hash="a382f263">S18-1190</url>
@@ -2206,7 +2206,7 @@
     <paper id="191">
       <title>Joker at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 12: The Argument Reasoning Comprehension with Neural Attention</title>
       <author><last>Sui</last> <first>Guobin</first></author>
-      <author><last>Chao</last> <first>Wenhan</first></author>
+      <author id="wenhan-chao"><first>Wenhan</first><last>Chao</last></author>
       <author><last>Luo</last> <first>Zhunchen</first></author>
       <pages>1129–1132</pages>
       <abstract>This paper describes a classification system that participated in the SemEval-2018 Task 12: The Argument Reasoning Comprehension Task. Briefly the task can be described as that a natural language “argument” is what we have, with reason, claim, and correct and incorrect warrants, and we need to choose the correct warrant. In order to make fully understand of the semantic information of the sentences, we proposed a neural network architecture with attention mechanism to achieve this goal. Besides we try to introduce keywords into the model to improve accuracy. Finally the proposed system achieved 5th place among 22 participating systems</abstract>
@@ -2269,7 +2269,7 @@
     <paper id="1">
       <title>Resolving Event Coreference with Supervised Representation Learning and Clustering-Oriented Regularization</title>
       <author><first>Kian</first><last>Kenyon-Dean</last></author>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <author><first>Doina</first><last>Precup</last></author>
       <pages>1–10</pages>
       <abstract>We present an approach to event coreference resolution by developing a general framework for clustering that uses supervised representation learning. We propose a neural network architecture with novel Clustering-Oriented Regularization (CORE) terms in the objective function. These terms encourage the model to create embeddings of event mentions that are amenable to clustering. We then use agglomerative clustering on these embeddings to build event coreference chains. For both within- and cross-document coreference on the ECB+ corpus, our model obtains better results than models that require significantly more pre-annotated information. This work provides insight and motivating results for a new general approach to solving coreference and clustering problems with representation learning.</abstract>
@@ -2280,7 +2280,7 @@
     <paper id="2">
       <title>Learning distributed event representations with a multi-task approach</title>
       <author><first>Xudong</first><last>Hong</last></author>
-      <author><first>Asad</first><last>Sayeed</last></author>
+      <author id="asad-sayeed"><first>Asad</first><last>Sayeed</last></author>
       <author><first>Vera</first><last>Demberg</last></author>
       <pages>11–21</pages>
       <abstract>Human world knowledge contains information about prototypical events and their participants and locations. In this paper, we train the first models using multi-task learning that can both predict missing event participants and also perform semantic role classification based on semantic plausibility. Our best-performing model is an improvement over the previous state-of-the-art on thematic fit modelling tasks. The event embeddings learned by the model can additionally be used effectively in an event similarity task, also outperforming the state-of-the-art.</abstract>
@@ -2290,7 +2290,7 @@
     </paper>
     <paper id="3">
       <title>Assessing Meaning Components in <fixed-case>G</fixed-case>erman Complex Verbs: A Collection of Source-Target Domains and Directionality</title>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <author><first>Maximilian</first><last>Köper</last></author>
       <author><first>Sylvia</first><last>Springorum</last></author>
       <pages>22–32</pages>
@@ -2325,7 +2325,7 @@
     <paper id="5">
       <title>Examining Gender and Race Bias in Two Hundred Sentiment Analysis Systems</title>
       <author><first>Svetlana</first><last>Kiritchenko</last></author>
-      <author><first>Saif</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
       <pages>43–53</pages>
       <abstract>Automatic machine learning systems can inadvertently accentuate and perpetuate inappropriate human biases. Past work on examining inappropriate biases has largely focused on just individual systems. Further, there is no benchmark dataset for examining inappropriate biases in systems. Here for the first time, we present the Equity Evaluation Corpus (EEC), which consists of 8,640 English sentences carefully chosen to tease out biases towards certain races and genders. We use the dataset to examine 219 automatic sentiment analysis systems that took part in a recent shared task, SemEval-2018 Task 1 ‘Affect in Tweets’. We find that several of the systems show statistically significant bias; that is, they consistently provide slightly higher sentiment intensity predictions for one race or one gender. We make the EEC freely available.</abstract>
       <url hash="42b4d248">S18-2005</url>
@@ -2356,7 +2356,7 @@
       <title>Quantitative Semantic Variation in the Contexts of Concrete and Abstract Words</title>
       <author><first>Daniela</first><last>Naumann</last></author>
       <author><first>Diego</first><last>Frassinelli</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>76–85</pages>
       <abstract>Across disciplines, researchers are eager to gain insight into empirical features of abstract vs. concrete concepts. In this work, we provide a detailed characterisation of the distributional nature of abstract and concrete words across 16,620 English nouns, verbs and adjectives. Specifically, we investigate the following questions: (1) What is the distribution of concreteness in the contexts of concrete and abstract target words? (2) What are the differences between concrete and abstract words in terms of contextual semantic diversity? (3) How does the entropy of concrete and abstract word contexts differ? Overall, our studies show consistent differences in the distributional representation of concrete and abstract words, thus challenging existing theories of cognition and providing a more fine-grained description of their nature.</abstract>
       <url hash="907c91aa">S18-2008</url>
@@ -2368,7 +2368,7 @@
       <author><first>Gilbert</first><last>Badaro</last></author>
       <author><first>Hussein</first><last>Jundi</last></author>
       <author><first>Hazem</first><last>Hajj</last></author>
-      <author><first>Wassim</first><last>El-Hajj</last></author>
+      <author id="wassim-el-hajj"><first>Wassim</first><last>El-Hajj</last></author>
       <pages>86–93</pages>
       <abstract>Nowadays, social media have become a platform where people can easily express their opinions and emotions about any topic such as politics, movies, music, electronic products and many others. On the other hand, politicians, companies, and businesses are interested in analyzing automatically people’s opinions and emotions. In the last decade, a lot of efforts has been put into extracting sentiment polarity from texts. Recently, the focus has expanded to also cover emotion recognition from texts. In this work, we expand an existing emotion lexicon, DepecheMood, by leveraging semantic knowledge from English WordNet (EWN). We create an expanded lexicon, EmoWordNet, consisting of 67K terms aligned with EWN, almost 1.8 times the size of DepecheMood. We also evaluate EmoWordNet in an emotion recognition task using SemEval 2007 news headlines dataset and we achieve an improvement compared to the use of DepecheMood. EmoWordNet is publicly available to speed up research in the field on <url>http://oma-project.com</url>.</abstract>
       <url hash="5f1c7beb">S18-2009</url>
@@ -2389,7 +2389,7 @@
     <paper id="11">
       <title>How Gender and Skin Tone Modifiers Affect Emoji Semantics in <fixed-case>T</fixed-case>witter</title>
       <author><first>Francesco</first><last>Barbieri</last></author>
-      <author><first>Jose</first><last>Camacho-Collados</last></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></author>
       <pages>101–106</pages>
       <abstract>In this paper we analyze the use of emojis in social media with respect to gender and skin tone. By gathering a dataset of over twenty two million tweets from United States some findings are clearly highlighted after performing a simple frequency-based analysis. Moreover, we carry out a semantic analysis on the usage of emojis and their modifiers (e.g. gender and skin tone) by embedding all words, emojis and modifiers into the same vector space. Our analyses reveal that some stereotypes related to the skin color and gender seem to be reflected on the use of these modifiers. For example, emojis representing hand gestures are more widely utilized with lighter skin tones, and the usage across skin tones differs significantly. At the same time, the vector corresponding to the male modifier tends to be semantically close to emojis related to business or technology, whereas their female counterparts appear closer to emojis about love or makeup.</abstract>
       <url hash="327e0759">S18-2011</url>
@@ -2420,7 +2420,7 @@
     <paper id="14">
       <title>Learning Patient Representations from Text</title>
       <author><first>Dmitriy</first><last>Dligach</last></author>
-      <author><first>Timothy</first><last>Miller</last></author>
+      <author id="timothy-miller"><first>Timothy</first><last>Miller</last></author>
       <pages>119–123</pages>
       <abstract>Mining electronic health records for patients who satisfy a set of predefined criteria is known in medical informatics as phenotyping. Phenotyping has numerous applications such as outcome prediction, clinical trial recruitment, and retrospective studies. Supervised machine learning for phenotyping typically relies on sparse patient representations such as bag-of-words. We consider an alternative that involves learning patient representations. We develop a neural network model for learning patient representations and show that the learned representations are general enough to obtain state-of-the-art performance on a standard comorbidity detection task.</abstract>
       <url hash="2d079f55">S18-2014</url>
@@ -2440,8 +2440,8 @@
     <paper id="16">
       <title>Coarse Lexical Frame Acquisition at the Syntax–Semantics Interface Using a Latent-Variable <fixed-case>PCFG</fixed-case> Model</title>
       <author><first>Laura</first><last>Kallmeyer</last></author>
-      <author><first>Behrang</first><last>QasemiZadeh</last></author>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="behrang-qasemizadeh"><first>Behrang</first><last>QasemiZadeh</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <pages>130–141</pages>
       <abstract>We present a method for unsupervised lexical frame acquisition at the syntax–semantics interface. Given a set of input strings derived from dependency parses, our method generates a set of clusters that resemble lexical frame structures. Our work is motivated not only by its practical applications (e.g., to build, or expand the coverage of lexical frame databases), but also to gain linguistic insight into frame structures with respect to lexical distributions in relation to grammatical structures. We model our task using a hierarchical Bayesian network and employ tools and methods from latent variable probabilistic context free grammars (L-PCFGs) for statistical inference and parameter fitting, for which we propose a new split and merge procedure. We show that our model outperforms several baselines on a portion of the Wall Street Journal sentences that we have newly annotated for evaluation purposes.</abstract>
       <url hash="88de7c23">S18-2016</url>
@@ -2489,7 +2489,7 @@
     </paper>
     <paper id="20">
       <title>Integrating Multiplicative Features into Supervised Distributional Methods for Lexical Entailment</title>
-      <author><first>Tu</first><last>Vu</last></author>
+      <author id="tu-vu"><first>Tu</first><last>Vu</last></author>
       <author><first>Vered</first><last>Shwartz</last></author>
       <pages>160–166</pages>
       <abstract>Supervised distributional methods are applied successfully in lexical entailment, but recent work questioned whether these methods actually learn a relation between two words. Specifically, Levy et al. (2015) claimed that linear classifiers learn only separate properties of each word. We suggest a cheap and easy way to boost the performance of these methods by integrating multiplicative features into commonly used representations. We provide an extensive evaluation with different classifiers and evaluation setups, and suggest a suitable evaluation setup for the task, eliminating biases existing in previous ones.</abstract>
@@ -2558,7 +2558,7 @@
       <title>Agree or Disagree: Predicting Judgments on Nuanced Assertions</title>
       <author><first>Michael</first><last>Wojatzki</last></author>
       <author><first>Torsten</first><last>Zesch</last></author>
-      <author><first>Saif</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
       <author><first>Svetlana</first><last>Kiritchenko</last></author>
       <pages>214–224</pages>
       <abstract>Being able to predict whether people agree or disagree with an assertion (i.e. an explicit, self-contained statement) has several applications ranging from predicting how many people will like or dislike a social media post to classifying posts based on whether they are in accordance with a particular point of view. We formalize this as two NLP tasks: predicting judgments of (i) individuals and (ii) groups based on the text of the assertion and previous judgments. We evaluate a wide range of approaches on a crowdsourced data set containing over 100,000 judgments on over 2,000 assertions. We find that predicting individual judgments is a hard task with our best results only slightly exceeding a majority baseline, but that judgments of groups can be more reliably predicted using a Siamese neural network, which outperforms all other approaches by a wide margin.</abstract>
@@ -2580,7 +2580,7 @@
     </paper>
     <paper id="28">
       <title>Putting Semantics into Semantic Roles</title>
-      <author><first>James</first><last>Allen</last></author>
+      <author id="james-allen"><first>James</first><last>Allen</last></author>
       <author><first>Choh Man</first><last>Teng</last></author>
       <pages>235–244</pages>
       <abstract>While there have been many proposals for theories of semantic roles over the years, these models are mostly justified by intuition and the only evaluation methods have been inter-annotator agreement. We explore three different ideas for providing more rigorous theories of semantic roles. These ideas give rise to more objective criteria for designing role sets, and lend themselves to some experimental evaluation. We illustrate the discussion by examining the semantic roles in TRIPS.</abstract>
@@ -2591,7 +2591,7 @@
     <paper id="29">
       <title>Measuring Frame Instance Relatedness</title>
       <author><first>Valerio</first><last>Basile</last></author>
-      <author><first>Roque</first><last>Lopez Condori</last></author>
+      <author id="roque-lopez-condori"><first>Roque</first><last>Lopez Condori</last></author>
       <author><first>Elena</first><last>Cabrio</last></author>
       <pages>245–254</pages>
       <abstract>Frame semantics is a well-established framework to represent the meaning of natural language in computational terms. In this work, we aim to propose a quantitative measure of relatedness between pairs of frame instances. We test our method on a dataset of sentence pairs, highlighting the correlation between our metric and human judgments of semantic similarity. Furthermore, we propose an application of our measure for clustering frame instances to extract prototypical knowledge from natural language.</abstract>
@@ -2624,7 +2624,7 @@
     </paper>
     <paper id="32">
       <title>Multiplicative Tree-Structured Long Short-Term Memory Networks for Semantic Representations</title>
-      <author><first>Nam Khanh</first><last>Tran</last></author>
+      <author id="nam-khanh-tran"><first>Nam Khanh</first><last>Tran</last></author>
       <author><first>Weiwei</first><last>Cheng</last></author>
       <pages>276–286</pages>
       <abstract>Tree-structured LSTMs have shown advantages in learning semantic representations by exploiting syntactic information. Most existing methods model tree structures by bottom-up combinations of constituent nodes using the same shared compositional function and often making use of input word information only. The inability to capture the richness of compositionality makes these models lack expressive power. In this paper, we propose multiplicative tree-structured LSTMs to tackle this problem. Our model makes use of not only word information but also relation information between words. It is more expressive, as different combination functions can be used for each child node. In addition to syntactic trees, we also investigate the use of Abstract Meaning Representation in tree-structured models, in order to incorporate both syntactic and semantic information from the sentence. Experimental results on common NLP tasks show the proposed models lead to better sentence representation and AMR brings benefits in complex tasks.</abstract>
diff --git a/data/xml/S19.xml b/data/xml/S19.xml
index a1961b1730..baafe7e447 100644
--- a/data/xml/S19.xml
+++ b/data/xml/S19.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the Eighth Joint Conference on Lexical and Computational Semantics (*<fixed-case>SEM</fixed-case> 2019)</booktitle>
       <url hash="da651103">S19-1</url>
-      <editor><first>Rada</first><last>Mihalcea</last></editor>
+      <editor id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></editor>
       <editor><first>Ekaterina</first><last>Shutova</last></editor>
       <editor><first>Lun-Wei</first><last>Ku</last></editor>
       <editor><first>Kilian</first><last>Evang</last></editor>
@@ -23,7 +23,7 @@
       <title><fixed-case>SUR</fixed-case>el: A Gold Standard for Incorporating Meaning Shifts into Term Extraction</title>
       <author><first>Anna</first><last>Hätty</last></author>
       <author><first>Dominik</first><last>Schlechtweg</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>1–8</pages>
       <abstract>We introduce SURel, a novel dataset with human-annotated meaning shifts between general-language and domain-specific contexts. We show that meaning shifts of term candidates cause errors in term extraction, and demonstrate that the SURel annotation reflects these errors. Furthermore, we illustrate that SURel enables us to assess optimisations of term extraction techniques when incorporating meaning shifts.</abstract>
       <url hash="cd5e172d">S19-1001</url>
@@ -32,7 +32,7 @@
     </paper>
     <paper id="2">
       <title>Word Usage Similarity Estimation with Sentence Representations and Automatic Substitutes</title>
-      <author><first>Aina</first><last>Garí Soler</last></author>
+      <author id="aina-gari-soler"><first>Aina</first><last>Garí Soler</last></author>
       <author><first>Marianna</first><last>Apidianaki</last></author>
       <author><first>Alexandre</first><last>Allauzen</last></author>
       <pages>9–21</pages>
@@ -54,7 +54,7 @@
     <paper id="4">
       <title>Composition of Sentence Embeddings: Lessons from Statistical Relational Learning</title>
       <author><first>Damien</first><last>Sileo</last></author>
-      <author><first>Tim</first><last>Van De Cruys</last></author>
+      <author id="tim-van-de-cruys"><first>Tim</first><last>Van De Cruys</last></author>
       <author><first>Camille</first><last>Pradel</last></author>
       <author><first>Philippe</first><last>Muller</last></author>
       <pages>33–43</pages>
@@ -78,7 +78,7 @@
     <paper id="6">
       <title>Scalable Cross-Lingual Transfer of Neural Sentence Embeddings</title>
       <author><first>Hanan</first><last>Aldarmaki</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>51–60</pages>
       <abstract>We develop and investigate several cross-lingual alignment approaches for neural sentence embedding models, such as the supervised inference classifier, InferSent, and sequential encoder-decoder models. We evaluate three alignment frameworks applied to these models: joint modeling, representation transfer learning, and sentence mapping, using parallel text to guide the alignment. Our results support representation transfer as a scalable approach for modular cross-lingual alignment of neural sentence embeddings, where we observe better performance compared to joint models in intrinsic and extrinsic evaluations, particularly with smaller sets of parallel data.</abstract>
       <url hash="affb7ac2">S19-1006</url>
@@ -88,7 +88,7 @@
     <paper id="7">
       <title>Second-order contexts from lexical substitutes for few-shot learning of word representations</title>
       <author><first>Qianchu</first><last>Liu</last></author>
-      <author><first>Diana</first><last>McCarthy</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
       <author><first>Anna</first><last>Korhonen</last></author>
       <pages>61–67</pages>
       <abstract>There is a growing awareness of the need to handle rare and unseen words in word representation modelling. In this paper, we focus on few-shot learning of emerging concepts that fully exploits only a few available contexts. We introduce a substitute-based context representation technique that can be applied on an existing word embedding space. Previous context-based approaches to modelling unseen words only consider bag-of-word first-order contexts, whereas our method aggregates contexts as second-order substitutes that are produced by a sequence-aware sentence completion model. We experimented with three tasks that aim to test the modelling of emerging concepts. We found that these tasks show different emphasis on first and second order contexts, and our substitute-based method achieves superior performance on naturally-occurring contexts from corpora.</abstract>
@@ -156,7 +156,7 @@
       <title>Deconstructing multimodality: visual properties and visual context in human semantic processing</title>
       <author><first>Christopher</first><last>Davis</last></author>
       <author><first>Luana</first><last>Bulat</last></author>
-      <author><first>Anita Lilla</first><last>Vero</last></author>
+      <author id="anita-lilla-vero"><first>Anita Lilla</first><last>Vero</last></author>
       <author><first>Ekaterina</first><last>Shutova</last></author>
       <pages>118–124</pages>
       <abstract>Multimodal semantic models that extend linguistic representations with additional perceptual input have proved successful in a range of natural language processing (NLP) tasks. Recent research has successfully used neural methods to automatically create visual representations for words. However, these works have extracted visual features from complete images, and have not examined how different kinds of visual information impact performance. In contrast, we construct multimodal models that differentiate between internal visual properties of the objects and their external visual context. We evaluate the models on the task of decoding brain activity associated with the meanings of nouns, demonstrating their advantage over those based on complete images.</abstract>
@@ -169,7 +169,7 @@
       <author><first>Andrey</first><last>Kutuzov</last></author>
       <author><first>Mohammad</first><last>Dorgham</last></author>
       <author><first>Oleksiy</first><last>Oliynyk</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <author><first>Alexander</first><last>Panchenko</last></author>
       <pages>125–135</pages>
       <abstract>We present path2vec, a new approach for learning graph embeddings that relies on structural measures of pairwise node similarities. The model learns representations for nodes in a dense space that approximate a given user-defined graph distance measure, such as e.g. the shortest path distance or distance measures that take information beyond the graph structure into account. Evaluation of the proposed model on semantic similarity and word sense disambiguation tasks, using various WordNet-based similarity measures, show that our approach yields competitive results, outperforming strong graph embedding baselines. The model is computationally efficient, being orders of magnitude faster than the direct computation of graph-based distances.</abstract>
@@ -180,7 +180,7 @@
     <paper id="15">
       <title>Neural User Factor Adaptation for Text Classification: Learning to Generalize Across Author Demographics</title>
       <author><first>Xiaolei</first><last>Huang</last></author>
-      <author><first>Michael J.</first><last>Paul</last></author>
+      <author id="michael-paul"><first>Michael J.</first><last>Paul</last></author>
       <pages>136–146</pages>
       <abstract>Language use varies across different demographic factors, such as gender, age, and geographic location. However, most existing document classification methods ignore demographic variability. In this study, we examine empirically how text data can vary across four demographic factors: gender, age, country, and region. We propose a multitask neural model to account for demographic variations via adversarial training. In experiments on four English-language social media datasets, we find that classification performance improves when adapting for user factors.</abstract>
       <url hash="4eb7c418">S19-1015</url>
@@ -189,7 +189,7 @@
     </paper>
     <paper id="16">
       <title>Abstract Graphs and Abstract Paths for Knowledge Graph Completion</title>
-      <author><first>Vivi</first><last>Nastase</last></author>
+      <author id="vivi-nastase"><first>Vivi</first><last>Nastase</last></author>
       <author><first>Bhushan</first><last>Kotnis</last></author>
       <pages>147–157</pages>
       <abstract>Knowledge graphs, which provide numerous facts in a machine-friendly format, are incomplete. Information that we induce from such graphs – e.g. entity embeddings, relation representations or patterns – will be affected by the imbalance in the information captured in the graph – by biasing representations, or causing us to miss potential patterns. To partially compensate for this situation we describe a method for representing knowledge graphs that capture an intensional representation of the original extensional information. This representation is very compact, and it abstracts away from individual links, allowing us to find better path candidates, as shown by the results of link prediction using this information.</abstract>
@@ -252,7 +252,7 @@
     <paper id="22">
       <title>Improving Human Needs Categorization of Events with Semantic Classification</title>
       <author><first>Haibo</first><last>Ding</last></author>
-      <author><first>Ellen</first><last>Riloff</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
       <author><first>Zhe</first><last>Feng</last></author>
       <pages>198–204</pages>
       <abstract>Human Needs categories have been used to characterize the reason why an affective event is positive or negative. For example, “I got the flu” and “I got fired” are both negative (undesirable) events, but getting the flu is a Health problem while getting fired is a Financial problem. Previous work created learning models to assign events to Human Needs categories based on their words and contexts. In this paper, we introduce an intermediate step that assigns words to relevant semantic concepts. We create lightly supervised models that learn to label words with respect to 10 semantic concepts associated with Human Needs categories, and incorporate these labels as features for event categorization. Our results show that recognizing relevant semantic concepts improves both the recall and precision of Human Needs categorization for events.</abstract>
@@ -299,12 +299,12 @@
       <author><first>Adam</first><last>Poliak</last></author>
       <author><first>Alex</first><last>Wang</last></author>
       <author><first>Patrick</first><last>Xia</last></author>
-      <author><first>R. Thomas</first><last>McCoy</last></author>
+      <author id="r-thomas-mccoy"><first>R. Thomas</first><last>McCoy</last></author>
       <author><first>Ian</first><last>Tenney</last></author>
       <author><first>Alexis</first><last>Ross</last></author>
       <author><first>Tal</first><last>Linzen</last></author>
       <author><first>Benjamin</first><last>Van Durme</last></author>
-      <author><first>Samuel R.</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel R.</first><last>Bowman</last></author>
       <author><first>Ellie</first><last>Pavlick</last></author>
       <pages>235–249</pages>
       <abstract>We introduce a set of nine challenge tasks that test for the understanding of function words. These tasks are created by structurally mutating sentences from existing datasets to target the comprehension of specific types of function words (e.g., prepositions, wh-words). Using these probing tasks, we explore the effects of various pretraining objectives for sentence encoders (e.g., language modeling, CCG supertagging and natural language inference (NLI)) on the learned representations. Our results show that pretraining on CCG—our most syntactic objective—performs the best on average across our probing tasks, suggesting that syntactic knowledge helps function word comprehension. Language modeling also shows strong performance, supporting its widespread use for pretraining state-of-the-art NLP models. Overall, no pretraining objective dominates across the board, and our function word probing tasks highlight several intuitive differences between pretraining objectives, e.g., that NLI helps the comprehension of negation.</abstract>
@@ -333,9 +333,9 @@
       <title>On Adversarial Removal of Hypothesis-only Bias in Natural Language Inference</title>
       <author><first>Yonatan</first><last>Belinkov</last></author>
       <author><first>Adam</first><last>Poliak</last></author>
-      <author><first>Stuart</first><last>Shieber</last></author>
+      <author id="stuart-m-shieber"><first>Stuart</first><last>Shieber</last></author>
       <author><first>Benjamin</first><last>Van Durme</last></author>
-      <author><first>Alexander</first><last>Rush</last></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last></author>
       <pages>256–262</pages>
       <abstract>Popular Natural Language Inference (NLI) datasets have been shown to be tainted by hypothesis-only biases. Adversarial learning may help models ignore sensitive biases and spurious correlations in data. We evaluate whether adversarial learning can be used in NLI to encourage models to learn representations free of hypothesis-only biases. Our analyses indicate that the representations learned via adversarial learning may be less biased, with only small drops in NLI accuracy.</abstract>
       <url hash="ffb6c4fc">S19-1028</url>
@@ -357,9 +357,9 @@
     </paper>
     <paper id="30">
       <title>Target Based Speech Act Classification in Political Campaign Text</title>
-      <author><first>Shivashankar</first><last>Subramanian</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="shivashankar-subramanian"><first>Shivashankar</first><last>Subramanian</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>273–282</pages>
       <abstract>We study pragmatics in political campaign text, through analysis of speech acts and the target of each utterance. We propose a new annotation schema incorporating domain-specific speech acts, such as commissive-action, and present a novel annotated corpus of media releases and speech transcripts from the 2016 Australian election cycle. We show how speech acts and target referents can be modeled as sequential classification, and evaluate several techniques, exploiting contextualized word representations, semi-supervised learning, task dependencies and speaker meta-data.</abstract>
       <url hash="a8dd8298">S19-1030</url>
@@ -401,10 +401,10 @@
       <url hash="c6884df0">S19-2</url>
       <editor><first>Jonathan</first><last>May</last></editor>
       <editor><first>Ekaterina</first><last>Shutova</last></editor>
-      <editor><first>Aurelie</first><last>Herbelot</last></editor>
+      <editor id="aurelie-herbelot"><first>Aurelie</first><last>Herbelot</last></editor>
       <editor><first>Xiaodan</first><last>Zhu</last></editor>
       <editor><first>Marianna</first><last>Apidianaki</last></editor>
-      <editor><first>Saif M.</first><last>Mohammad</last></editor>
+      <editor id="saif-mohammad"><first>Saif M.</first><last>Mohammad</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Minneapolis, Minnesota, USA</address>
       <month>June</month>
@@ -446,11 +446,11 @@
     </paper>
     <paper id="3">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 2: Unsupervised Lexical Frame Induction</title>
-      <author><first>Behrang</first><last>QasemiZadeh</last></author>
-      <author><first>Miriam R. L.</first><last>Petruck</last></author>
+      <author id="behrang-qasemizadeh"><first>Behrang</first><last>QasemiZadeh</last></author>
+      <author id="miriam-r-l-petruck"><first>Miriam R. L.</first><last>Petruck</last></author>
       <author><first>Regina</first><last>Stodden</last></author>
       <author><first>Laura</first><last>Kallmeyer</last></author>
-      <author><first>Marie</first><last>Candito</last></author>
+      <author id="marie-candito"><first>Marie</first><last>Candito</last></author>
       <pages>16–30</pages>
       <abstract>This paper presents Unsupervised Lexical Frame Induction, Task 2 of the International Workshop on Semantic Evaluation in 2019. Given a set of prespecified syntactic forms in context, the task requires that verbs and their arguments be clustered to resemble semantic frame structures. Results are useful in identifying polysemous words, i.e., those whose frame structures are not easily distinguished, as well as discerning semantic relations of the arguments. Evaluation of unsupervised frame induction methods fell into two tracks: Task A) Verb Clustering based on FrameNet 1.7; and B) Argument Clustering, with B.1) based on FrameNet’s core frame elements, and B.2) on VerbNet 3.2 semantic roles. The shared task attracted nine teams, of whom three reported promising results. This paper describes the task and its data, reports on methods and resources that these systems used, and offers a comparison to human annotation.</abstract>
       <url hash="3a330801">S19-2003</url>
@@ -459,7 +459,7 @@
     </paper>
     <paper id="4">
       <title>Neural <fixed-case>GRANN</fixed-case>y at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 2: A combined approach for better modeling of semantic relationships in semantic frame induction</title>
-      <author><first>Nikolay</first><last>Arefyev</last></author>
+      <author id="nikolay-arefyev"><first>Nikolay</first><last>Arefyev</last></author>
       <author><first>Boris</first><last>Sheludko</last></author>
       <author><first>Adis</first><last>Davletov</last></author>
       <author><first>Dmitry</first><last>Kharchev</last></author>
@@ -487,7 +487,7 @@
       <title><fixed-case>ANA</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 3: Contextual Emotion detection in Conversations through hierarchical <fixed-case>LSTM</fixed-case>s and <fixed-case>BERT</fixed-case></title>
       <author><first>Chenyang</first><last>Huang</last></author>
       <author><first>Amine</first><last>Trabelsi</last></author>
-      <author><first>Osmar</first><last>Zaïane</last></author>
+      <author id="osmar-r-zaiane"><first>Osmar</first><last>Zaïane</last></author>
       <pages>49–53</pages>
       <abstract>This paper describes the system submitted by ANA Team for the SemEval-2019 Task 3: EmoContext. We propose a novel Hierarchi- cal LSTMs for Contextual Emotion Detection (HRLCE) model. It classifies the emotion of an utterance given its conversational con- text. The results show that, in this task, our HRCLE outperforms the most recent state-of- the-art text classification framework: BERT. We combine the results generated by BERT and HRCLE to achieve an overall score of 0.7709 which ranked 5th on the final leader board of the competition among 165 Teams.</abstract>
       <url hash="8c794dd4">S19-2006</url>
@@ -524,7 +524,7 @@
       <title><fixed-case>FERMI</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 5: Using Sentence embeddings to Identify Hate Speech Against Immigrants and Women in <fixed-case>T</fixed-case>witter</title>
       <author><first>Vijayasaradhi</first><last>Indurthi</last></author>
       <author><first>Bakhtiyar</first><last>Syed</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <author><first>Nikhil</first><last>Chakravartula</last></author>
       <author><first>Manish</first><last>Gupta</last></author>
       <author><first>Vasudeva</first><last>Varma</last></author>
@@ -537,8 +537,8 @@
     <paper id="10">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 6: Identifying and Categorizing Offensive Language in Social Media (<fixed-case>O</fixed-case>ffens<fixed-case>E</fixed-case>val)</title>
       <author><first>Marcos</first><last>Zampieri</last></author>
-      <author><first>Shervin</first><last>Malmasi</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Sara</first><last>Rosenthal</last></author>
       <author><first>Noura</first><last>Farra</last></author>
       <author><first>Ritesh</first><last>Kumar</last></author>
@@ -565,7 +565,7 @@
       <author><first>Sheng</first><last>Huang</last></author>
       <author><first>Abdul Rafae</first><last>Khan</last></author>
       <author><first>Shengqiang</first><last>Zhang</last></author>
-      <author><first>Weiwei</first><last>Sun</last></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last></author>
       <author><first>Jia</first><last>Xu</last></author>
       <pages>92–96</pages>
       <abstract>This paper describes the systems of the CUNY-PKU team in SemEval 2019 Task 1: Cross-lingual Semantic Parsing with UCCA. We introduce a novel model by applying a cascaded MLP and BiLSTM model. Then, we ensemble multiple system-outputs by reparsing. In particular, we introduce a new decoding algorithm for building the UCCA representation. Our system won the first place in one track (French-20K-Open), second places in four tracks (English-Wiki-Open, English-20K-Open, German-20K-Open, and German-20K-Closed), and third place in one track (English-20K-Closed), among all seven tracks.</abstract>
@@ -600,7 +600,7 @@
       <title><fixed-case>M</fixed-case>ask<fixed-case>P</fixed-case>arse@Deskin at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 1: Cross-lingual <fixed-case>UCCA</fixed-case> Semantic Parsing using Recursive Masked Sequence Tagging</title>
       <author><first>Gabriel</first><last>Marzinotto</last></author>
       <author><first>Johannes</first><last>Heinecke</last></author>
-      <author><first>Géraldine</first><last>Damnati</last></author>
+      <author id="geraldine-damnati"><first>Géraldine</first><last>Damnati</last></author>
       <pages>107–112</pages>
       <abstract>This paper describes our recursive system for SemEval-2019 Task 1: Cross-lingual Semantic Parsing with UCCA. Each recursive step consists of two parts. We first perform semantic parsing using a sequence tagger to estimate the probabilities of the UCCA categories in the sentence. Then, we apply a decoding policy which interprets these probabilities and builds the graph nodes. Parsing is done recursively, we perform a first inference on the sentence to extract the main scenes and links and then we recursively apply our model on the sentence using a masking features that reflects the decisions made in previous steps. Process continues until the terminal nodes are reached. We chose a standard neural tagger and we focus on our recursive parsing strategy and on the cross lingual transfer problem to develop a robust model for the French language, using only few training samples</abstract>
       <url hash="14b9d317">S19-2015</url>
@@ -632,9 +632,9 @@
       <title><fixed-case>HHMM</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 2: Unsupervised Frame Induction using Contextualized Word Embeddings</title>
       <author><first>Saba</first><last>Anwar</last></author>
       <author><first>Dmitry</first><last>Ustalov</last></author>
-      <author><first>Nikolay</first><last>Arefyev</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="nikolay-arefyev"><first>Nikolay</first><last>Arefyev</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <author><first>Alexander</first><last>Panchenko</last></author>
       <pages>125–129</pages>
       <abstract>We present our system for semantic frame induction that showed the best performance in Subtask B.1 and finished as the runner-up in Subtask A of the SemEval 2019 Task 2 on unsupervised semantic frame induction (Qasem-iZadeh et al., 2019). Our approach separates this task into two independent steps: verb clustering using word and their context embeddings and role labeling by combining these embeddings with syntactical features. A simple combination of these steps shows very competitive results and can be extended to process other datasets and languages.</abstract>
@@ -646,11 +646,11 @@
       <title><fixed-case>L</fixed-case>2<fixed-case>F</fixed-case>/<fixed-case>INESC</fixed-case>-<fixed-case>ID</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 2: Unsupervised Lexical Semantic Frame Induction using Contextualized Word Representations</title>
       <author><first>Eugénio</first><last>Ribeiro</last></author>
       <author><first>Vânia</first><last>Mendonça</last></author>
-      <author><first>Ricardo</first><last>Ribeiro</last></author>
-      <author><first>David</first><last>Martins de Matos</last></author>
+      <author id="ricardo-ribeiro"><first>Ricardo</first><last>Ribeiro</last></author>
+      <author id="david-martins-de-matos"><first>David</first><last>Martins de Matos</last></author>
       <author><first>Alberto</first><last>Sardinha</last></author>
       <author><first>Ana Lúcia</first><last>Santos</last></author>
-      <author><first>Luísa</first><last>Coheur</last></author>
+      <author id="luisa-coheur"><first>Luísa</first><last>Coheur</last></author>
       <pages>130–136</pages>
       <abstract>Building large datasets annotated with semantic information, such as FrameNet, is an expensive process. Consequently, such resources are unavailable for many languages and specific domains. This problem can be alleviated by using unsupervised approaches to induce the frames evoked by a collection of documents. That is the objective of the second task of SemEval 2019, which comprises three subtasks: clustering of verbs that evoke the same frame and clustering of arguments into both frame-specific slots and semantic roles. We approach all the subtasks by applying a graph clustering algorithm on contextualized embedding representations of the verbs and arguments. Using such representations is appropriate in the context of this task, since they provide cues for word-sense disambiguation. Thus, they can be used to identify different frames evoked by the same words. Using this approach we were able to outperform all of the baselines reported for the task on the test set in terms of Purity F1, as well as in terms of BCubed F1 in most cases.</abstract>
       <url hash="7a9c26fe">S19-2019</url>
@@ -668,7 +668,7 @@
     </paper>
     <paper id="21">
       <title><fixed-case>CA</fixed-case>i<fixed-case>RE</fixed-case>_<fixed-case>HKUST</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 3: Hierarchical Attention for Dialogue Emotion Classification</title>
-      <author><first>Genta Indra</first><last>Winata</last></author>
+      <author id="genta-indra-winata"><first>Genta Indra</first><last>Winata</last></author>
       <author><first>Andrea</first><last>Madotto</last></author>
       <author><first>Zhaojiang</first><last>Lin</last></author>
       <author><first>Jamin</first><last>Shin</last></author>
@@ -728,7 +728,7 @@
       <author><first>Ana Valeria</first><last>González</last></author>
       <author><first>Victor</first><last>Petrén Bach Hansen</last></author>
       <author><first>Joachim</first><last>Bingel</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>169–174</pages>
       <abstract>This work describes the system presented by the CoAStaL Natural Language Processing group at University of Copenhagen. The main system we present uses the same attention mechanism presented in (Yang et al., 2016). Our overall model architecture is also inspired by their hierarchical classification model and adapted to deal with classification in dialogue by encoding information at the turn level. We use different encodings for each turn to create a more expressive representation of dialogue context which is then fed into our classifier. We also define a custom preprocessing step in order to deal with language commonly used in interactions across many social media outlets. Our proposed system achieves a micro F1 score of 0.7340 on the test set and shows significant gains in performance compared to a system using dialogue level encoding.</abstract>
       <url hash="a92b0b56">S19-2026</url>
@@ -778,7 +778,7 @@
     <paper id="31">
       <title><fixed-case>EL</fixed-case>i<fixed-case>RF</fixed-case>-<fixed-case>UPV</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 3: Snapshot Ensemble of Hierarchical Convolutional Neural Networks for Contextual Emotion Detection</title>
       <author><first>José-Ángel</first><last>González</last></author>
-      <author><first>Lluís-F.</first><last>Hurtado</last></author>
+      <author id="lluis-f-hurtado"><first>Lluís-F.</first><last>Hurtado</last></author>
       <author><first>Ferran</first><last>Pla</last></author>
       <pages>195–199</pages>
       <abstract>This paper describes the approach developed by the ELiRF-UPV team at SemEval 2019 Task 3: Contextual Emotion Detection in Text. We have developed a Snapshot Ensemble of 1D Hierarchical Convolutional Neural Networks to extract features from 3-turn conversations in order to perform contextual emotion detection in text. This Snapshot Ensemble is obtained by averaging the models selected by a Genetic Algorithm that optimizes the evaluation measure. The proposed ensemble obtains better results than a single model and it obtains competitive and promising results on Contextual Emotion Detection in Text.</abstract>
@@ -818,7 +818,7 @@
     </paper>
     <paper id="35">
       <title><fixed-case>EPITA</fixed-case>-<fixed-case>ADAPT</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 3: Detecting emotions in textual conversations using deep learning models combination</title>
-      <author><first>Abdessalam</first><last>Bouchekif</last></author>
+      <author id="abdessalam-bouchekif"><first>Abdessalam</first><last>Bouchekif</last></author>
       <author><first>Praveen</first><last>Joshi</last></author>
       <author><first>Latifa</first><last>Bouchekif</last></author>
       <author><first>Haithem</first><last>Afli</last></author>
@@ -849,7 +849,7 @@
     <paper id="38">
       <title><fixed-case>GWU</fixed-case> <fixed-case>NLP</fixed-case> Lab at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 3 : <fixed-case>E</fixed-case>mo<fixed-case>C</fixed-case>ontext: Effectiveness of<fixed-case>C</fixed-case>ontextual Information in Models for Emotion Detection in<fixed-case>S</fixed-case>entence-level at Multi-genre Corpus</title>
       <author><first>Shabnam</first><last>Tafreshi</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>230–235</pages>
       <abstract>In this paper we present an emotion classifier models that submitted to the SemEval-2019 Task 3 : <i>EmoContext</i>. Our approach is a Gated Recurrent Neural Network (GRU) model with attention layer is bootstrapped with contextual information and trained with a multigenre corpus, which is combination of several popular emotional data sets. We utilize different word embeddings to empirically select the most suited embedding to represent our features. Our aim is to build a robust emotion classifier that can generalize emotion detection, which is to learn emotion cues in a noisy training environment. To fulfill this aim we train our model with a multigenre emotion corpus, this way we leverage from having more training set. We achieved overall %56.05 f1-score and placed 144. Given our aim and noisy training environment, the results are anticipated.</abstract>
       <url hash="f842f76c">S19-2038</url>
@@ -861,7 +861,7 @@
       <author><first>Arik</first><last>Pamnani</last></author>
       <author><first>Rajat</first><last>Goel</last></author>
       <author><first>Jayesh</first><last>Choudhari</last></author>
-      <author id="mayank-singh"><first>Mayank</first><last>Singh</last></author>
+      <author><first>Mayank</first><last>Singh</last></author>
       <pages>236–240</pages>
       <abstract>Recent advancements in Internet and Mobile infrastructure have resulted in the development of faster and efficient platforms of communication. These platforms include speech, facial and text-based conversational mediums. Majority of these are text-based messaging platforms. Development of Chatbots that automatically understand latent emotions in the textual message is a challenging task. In this paper, we present an automatic emotion detection system that aims to detect the emotion of a person textually conversing with a chatbot. We explore deep learning techniques such as CNN and LSTM based neural networks and outperformed the baseline score by 14%. The trained model and code are kept in public domain.</abstract>
       <url hash="6710cbb0">S19-2039</url>
@@ -1012,9 +1012,9 @@
     <paper id="53">
       <title><fixed-case>SINAI</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 3: Using affective features for emotion classification in textual conversations</title>
       <author><first>Flor Miriam</first><last>Plaza-del-Arco</last></author>
-      <author><first>M. Dolores</first><last>Molina-González</last></author>
-      <author><first>Maite</first><last>Martin</last></author>
-      <author><first>L. Alfonso</first><last>Ureña-López</last></author>
+      <author id="m-dolores-molina-gonzalez"><first>M. Dolores</first><last>Molina-González</last></author>
+      <author id="m-teresa-martin-valdivia"><first>Maite</first><last>Martin</last></author>
+      <author id="l-alfonso-urena-lopez"><first>L. Alfonso</first><last>Ureña-López</last></author>
       <pages>307–311</pages>
       <abstract>Detecting emotions in textual conversation is a challenging problem in absence of nonverbal cues typically associated with emotion, like fa- cial expression or voice modulations. How- ever, more and more users are using message platforms such as WhatsApp or Telegram. For this reason, it is important to develop systems capable of understanding human emotions in textual conversations. In this paper, we carried out different systems to analyze the emotions of textual dialogue from SemEval-2019 Task 3: EmoContext for English language. Our main contribution is the integration of emotional and sentimental features in the classification using the SVM algorithm.</abstract>
       <url hash="57c05cce">S19-2053</url>
@@ -1048,7 +1048,7 @@
       <title><fixed-case>SWAP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 3: Emotion detection in conversations through Tweets, <fixed-case>CNN</fixed-case> and <fixed-case>LSTM</fixed-case> deep neural networks</title>
       <author><first>Marco</first><last>Polignano</last></author>
       <author><first>Marco</first><last>de Gemmis</last></author>
-      <author><first>Giovanni</first><last>Semeraro</last></author>
+      <author id="giovanni-semeraro"><first>Giovanni</first><last>Semeraro</last></author>
       <pages>324–329</pages>
       <abstract>Emotion detection from user-generated contents is growing in importance in the area of natural language processing. The approach we proposed for the EmoContext task is based on the combination of a CNN and an LSTM using a concatenation of word embeddings. A stack of convolutional neural networks (CNN) is used for capturing the hierarchical hidden relations among embedding features. Meanwhile, a long short-term memory network (LSTM) is used for capturing information shared among words of the sentence. Each conversation has been formalized as a list of word embeddings, in particular during experimental runs pre-trained Glove and Google word embeddings have been evaluated. Surface lexical features have been also considered, but they have been demonstrated to be not usefully for the classification in this specific task. The final system configuration achieved a micro F1 score of 0.7089. The python code of the system is fully available at <url>https://github.com/marcopoli/EmoContext2019</url></abstract>
       <url hash="dc0c2807">S19-2056</url>
@@ -1061,7 +1061,7 @@
       <author><first>Angelo</first><last>Basile</last></author>
       <author><first>Marc</first><last>Franco-Salvador</last></author>
       <author><first>Neha</first><last>Pawar</last></author>
-      <author><first>Sanja</first><last>Štajner</last></author>
+      <author id="sanja-stajner"><first>Sanja</first><last>Štajner</last></author>
       <author><first>Mara</first><last>Chinea Rios</last></author>
       <author><first>Yassine</first><last>Benajiba</last></author>
       <pages>330–334</pages>
@@ -1106,7 +1106,7 @@
       <title><fixed-case>T</fixed-case>okyo<fixed-case>T</fixed-case>ech_<fixed-case>NLP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 3: Emotion-related Symbols in Emotion Detection</title>
       <author><first>Zhishen</first><last>Yang</last></author>
       <author><first>Sam</first><last>Vijlbrief</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <pages>350–354</pages>
       <abstract>This paper presents our contextual emotion detection system in approaching the SemEval2019 shared task 3: EmoContext: Contextual Emotion Detection in Text. This system cooperates with an emotion detection neural network method (Poria et al., 2017), emoji2vec (Eisner et al., 2016) embedding, word2vec embedding (Mikolov et al., 2013), and our proposed emoticon and emoji preprocessing method. The experimental results demonstrate the usefulness of our emoticon and emoji prepossessing method, and representations of emoticons and emoji contribute model’s emotion detection.</abstract>
       <url hash="4c3af2ba">S19-2061</url>
@@ -1175,7 +1175,7 @@
       <author><first>Iqra</first><last>Ameer</last></author>
       <author><first>Muhammad Hammad Fahim</first><last>Siddiqui</last></author>
       <author><first>Grigori</first><last>Sidorov</last></author>
-      <author><first>Alexander</first><last>Gelbukh</last></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last></author>
       <pages>382–386</pages>
       <abstract>In recent years, the use of social media has in-creased incredibly. Social media permits Inter-net users a friendly platform to express their views and opinions. Along with these nice and distinct communication chances, it also allows bad things like usage of hate speech. Online automatic hate speech detection in various aspects is a significant scientific problem. This paper presents the Instituto Politécnico Nacional (Mexico) approach for the Semeval 2019 Task-5 [Hateval 2019] (Basile et al., 2019) competition for Multilingual Detection of Hate Speech on Twitter. The goal of this paper is to detect (A) Hate speech against immigrants and women, (B) Aggressive behavior and target classification, both for English and Spanish. In the proposed approach, we used a bag of words model with preprocessing (stem-ming and stop words removal). We submitted two different systems with names: (i) CIC-1 and (ii) CIC-2 for Hateval 2019 shared task. We used TF values in the first system and TF-IDF for the second system. The first system, CIC-1 got 2nd rank in subtask B for both English and Spanish languages with EMR score of 0.568 for English and 0.675 for Spanish. The second system, CIC-2 was ranked 4th in sub-task A and 1st in subtask B for Spanish language with a macro-F1 score of 0.727 and EMR score of 0.705 respectively.</abstract>
       <url hash="ebf50cc8">S19-2067</url>
@@ -1209,7 +1209,7 @@
       <title><fixed-case>GSI</fixed-case>-<fixed-case>UPM</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 5: Semantic Similarity and Word Embeddings for Multilingual Detection of Hate Speech Against Immigrants and Women on <fixed-case>T</fixed-case>witter</title>
       <author><first>Diego</first><last>Benito</last></author>
       <author><first>Oscar</first><last>Araque</last></author>
-      <author><first>Carlos A.</first><last>Iglesias</last></author>
+      <author id="carlos-a-iglesias"><first>Carlos A.</first><last>Iglesias</last></author>
       <pages>396–403</pages>
       <abstract>This paper describes the GSI-UPM system for SemEval-2019 Task 5, which tackles multilingual detection of hate speech on Twitter. The main contribution of the paper is the use of a method based on word embeddings and semantic similarity combined with traditional paradigms, such as n-grams, TF-IDF and POS. This combination of several features is fine-tuned through ablation tests, demonstrating the usefulness of different features. While our approach outperforms baseline classifiers on different sub-tasks, the best of our submitted runs reached the 5th position on the Spanish sub-task A.</abstract>
       <url hash="0fbc1756">S19-2070</url>
@@ -1255,7 +1255,7 @@
     </paper>
     <paper id="75">
       <title><fixed-case>JCTDHS</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 5: Detection of Hate Speech in Tweets using Deep Learning Methods, Character N-gram Features, and Preprocessing Methods</title>
-      <author><first>Yaakov</first><last>HaCohen-Kerner</last></author>
+      <author id="yaakov-hacohen-kerner"><first>Yaakov</first><last>HaCohen-Kerner</last></author>
       <author><first>Elyashiv</first><last>Shayovitz</last></author>
       <author><first>Shalom</first><last>Rochman</last></author>
       <author><first>Eli</first><last>Cahn</last></author>
@@ -1281,7 +1281,7 @@
       <title><fixed-case>LT</fixed-case>3 at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 5: Multilingual Detection of Hate Speech Against Immigrants and Women in <fixed-case>T</fixed-case>witter (hat<fixed-case>E</fixed-case>val)</title>
       <author><first>Nina</first><last>Bauwelinck</last></author>
       <author><first>Gilles</first><last>Jacobs</last></author>
-      <author><first>Véronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Véronique</first><last>Hoste</last></author>
       <author><first>Els</first><last>Lefever</last></author>
       <pages>436–440</pages>
       <abstract>This paper describes our contribution to the SemEval-2019 Task 5 on the detection of hate speech against immigrants and women in Twitter (hatEval). We considered a supervised classification-based approach to detect hate speech in English tweets, which combines a variety of standard lexical and syntactic features with specific features for capturing offensive language. Our experimental results show good classification performance on the training data, but a considerable drop in recall on the held-out test set.</abstract>
@@ -1306,7 +1306,7 @@
       <author><first>Luis Enrique</first><last>Argota Vega</last></author>
       <author><first>Jorge Carlos</first><last>Reyes-Magaña</last></author>
       <author><first>Helena</first><last>Gómez-Adorno</last></author>
-      <author><first>Gemma</first><last>Bel-Enguix</last></author>
+      <author id="gemma-bel-enguix"><first>Gemma</first><last>Bel-Enguix</last></author>
       <pages>447–452</pages>
       <abstract>This paper presents our approach to the Task 5 of Semeval-2019, which aims at detecting hate speech against immigrants and women in Twitter. The task consists of two sub-tasks, in Spanish and English: (A) detection of hate speech and (B) classification of hateful tweets as aggressive or not, and identification of the target harassed as individual or group. We used linguistically motivated features and several types of n-grams (words, characters, functional words, punctuation symbols, POS, among others). For task A, we trained a Support Vector Machine using a combinatorial framework, whereas for task B we followed a multi-labeled approach using the Random Forest classifier. Our approach achieved the highest F1-score in sub-task A for the Spanish language.</abstract>
       <url hash="c9718937">S19-2079</url>
@@ -1315,11 +1315,11 @@
     </paper>
     <paper id="80">
       <title><fixed-case>MITRE</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 5: Transfer Learning for Multilingual Hate Speech Detection</title>
-      <author><first>Abigail</first><last>Gertner</last></author>
-      <author><first>John</first><last>Henderson</last></author>
-      <author><first>Elizabeth</first><last>Merkhofer</last></author>
+      <author id="abigail-s-gertner"><first>Abigail</first><last>Gertner</last></author>
+      <author id="john-henderson"><first>John</first><last>Henderson</last></author>
+      <author id="elizabeth-merkhofer"><first>Elizabeth</first><last>Merkhofer</last></author>
       <author><first>Amy</first><last>Marsh</last></author>
-      <author><first>Ben</first><last>Wellner</last></author>
+      <author id="ben-wellner"><first>Ben</first><last>Wellner</last></author>
       <author><first>Guido</first><last>Zarrella</last></author>
       <pages>453–459</pages>
       <abstract>This paper describes MITRE’s participation in SemEval-2019 Task 5, HatEval: Multilingual detection of hate speech against immigrants and women in Twitter. The techniques explored range from simple bag-of-ngrams classifiers to neural architectures with varied attention mechanisms. We describe several styles of transfer learning from auxiliary tasks, including a novel method for adapting pre-trained BERT models to Twitter data. Logistic regression ties the systems together into an ensemble submitted for evaluation. The resulting system was used to produce predictions for all four HatEval subtasks, achieving the best mean rank of all teams that participated in all four conditions.</abstract>
@@ -1360,9 +1360,9 @@
     <paper id="84">
       <title><fixed-case>SINAI</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 5: Ensemble learning to detect hate speech against inmigrants and women in <fixed-case>E</fixed-case>nglish and <fixed-case>S</fixed-case>panish tweets</title>
       <author><first>Flor Miriam</first><last>Plaza-del-Arco</last></author>
-      <author><first>M. Dolores</first><last>Molina-González</last></author>
-      <author><first>Maite</first><last>Martin</last></author>
-      <author><first>L. Alfonso</first><last>Ureña-López</last></author>
+      <author id="m-dolores-molina-gonzalez"><first>M. Dolores</first><last>Molina-González</last></author>
+      <author id="m-teresa-martin-valdivia"><first>Maite</first><last>Martin</last></author>
+      <author id="l-alfonso-urena-lopez"><first>L. Alfonso</first><last>Ureña-López</last></author>
       <pages>476–479</pages>
       <abstract>Misogyny and xenophobia are some of the most important social problems. With the in- crease in the use of social media, this feeling ofhatred towards women and immigrants can be more easily expressed, therefore it can cause harmful effects on social media users. For this reason, it is important to develop systems ca- pable of detecting hateful comments automatically. In this paper, we describe our system to analyze the hate speech in English and Spanish tweets against Immigrants and Women as part of our participation in SemEval-2019 Task 5: hatEval. Our main contribution is the integration of three individual algorithms of predic- tion in a model based on Vote ensemble classifier.</abstract>
       <url hash="4f5fa328">S19-2084</url>
@@ -1372,7 +1372,7 @@
     <paper id="85">
       <title><fixed-case>SINAI</fixed-case>-<fixed-case>DL</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 5: Recurrent networks and data augmentation by paraphrasing</title>
       <author><first>Arturo</first><last>Montejo-Ráez</last></author>
-      <author><first>Salud María</first><last>Jiménez-Zafra</last></author>
+      <author id="salud-maria-jimenez-zafra"><first>Salud María</first><last>Jiménez-Zafra</last></author>
       <author><first>Miguel A.</first><last>García-Cumbreras</last></author>
       <author><first>Manuel Carlos</first><last>Díaz-Galiano</last></author>
       <pages>480–483</pages>
@@ -1396,8 +1396,8 @@
     <paper id="87">
       <title>The binary trio at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 5: Multitarget Hate Speech Detection in Tweets</title>
       <author><first>Patricia</first><last>Chiril</last></author>
-      <author><first>Farah</first><last>Benamara Zitoune</last></author>
-      <author><first>Véronique</first><last>Moriceau</last></author>
+      <author id="farah-benamara"><first>Farah</first><last>Benamara Zitoune</last></author>
+      <author id="veronique-moriceau"><first>Véronique</first><last>Moriceau</last></author>
       <author><first>Abhishek</first><last>Kumar</last></author>
       <pages>489–493</pages>
       <abstract>The massive growth of user-generated web content through blogs, online forums and most notably, social media networks, led to a large spreading of hatred or abusive messages which have to be moderated. This paper proposes a supervised approach to hate speech detection towards immigrants and women in English tweets. Several models have been developed ranging from feature-engineering approaches to neural ones.</abstract>
@@ -1430,9 +1430,9 @@
     <paper id="90">
       <title>Tw-<fixed-case>S</fixed-case>t<fixed-case>AR</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 5: N-gram embeddings for Hate Speech Detection in Multilingual Tweets</title>
       <author><first>Hala</first><last>Mulki</last></author>
-      <author><first>Chedi</first><last>Bechikh Ali</last></author>
+      <author id="chedi-bechikh-ali"><first>Chedi</first><last>Bechikh Ali</last></author>
       <author><first>Hatem</first><last>Haddad</last></author>
-      <author><first>Ismail</first><last>Babaoğlu</last></author>
+      <author id="ismail-babaoglu"><first>Ismail</first><last>Babaoğlu</last></author>
       <pages>503–507</pages>
       <abstract>In this paper, we describe our contribution in SemEval-2019: subtask A of task 5 “Multilingual detection of hate speech against immigrants and women in Twitter (HatEval)”. We developed two hate speech detection model variants through Tw-StAR framework. While the first model adopted one-hot encoding ngrams to train an NB classifier, the second generated and learned n-gram embeddings within a feedforward neural network. For both models, specific terms, selected via MWT patterns, were tagged in the input data. With two feature types employed, we could investigate the ability of n-gram embeddings to rival one-hot n-grams. Our results showed that in English, n-gram embeddings outperformed one-hot ngrams. However, representing Spanish tweets by one-hot n-grams yielded a slightly better performance compared to that of n-gram embeddings. The official ranking indicated that Tw-StAR ranked 9th for English and 20th for Spanish.</abstract>
       <url hash="92985795">S19-2090</url>
@@ -1442,10 +1442,10 @@
     <paper id="91">
       <title><fixed-case>UA</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 5: Setting A Strong Linear Baseline for Hate Speech Detection</title>
       <author><first>Carlos</first><last>Perelló</last></author>
-      <author><first>David</first><last>Tomás</last></author>
+      <author id="david-tomas"><first>David</first><last>Tomás</last></author>
       <author><first>Alberto</first><last>Garcia-Garcia</last></author>
       <author><first>Jose</first><last>Garcia-Rodriguez</last></author>
-      <author><first>Jose</first><last>Camacho-Collados</last></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></author>
       <pages>508–513</pages>
       <abstract>This paper describes the system developed at the University of Alicante (UA) for the SemEval 2019 Task 5: Shared Task on Multilingual Detection of Hate. The purpose of this work is to build a strong baseline for hate speech detection, using a traditional machine learning approach with standard textual features, which could serve in a near future as a reference to compare with deep learning systems. We participated in both task A (Hate Speech Detection against Immigrants and Women) and task B (Aggressive behavior and Target Classification). Despite its simplicity, our system obtained a remarkable F1-score of 72.5 (sixth highest) and an accuracy of 73.6 (second highest) in Spanish (task A), outperforming more complex neural models from a total of 40 participant systems.</abstract>
       <url hash="6564c845">S19-2091</url>
@@ -1465,9 +1465,9 @@
     </paper>
     <paper id="93">
       <title><fixed-case>UTFPR</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 5: Hate Speech Identification with Recurrent Neural Networks</title>
-      <author><first>Gustavo Henrique</first><last>Paetzold</last></author>
+      <author id="gustavo-paetzold"><first>Gustavo Henrique</first><last>Paetzold</last></author>
       <author><first>Marcos</first><last>Zampieri</last></author>
-      <author><first>Shervin</first><last>Malmasi</last></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></author>
       <pages>519–523</pages>
       <abstract>In this paper we revisit the problem of automatically identifying hate speech in posts from social media. We approach the task using a system based on minimalistic compositional Recurrent Neural Networks (RNN). We tested our approach on the SemEval-2019 Task 5: Multilingual Detection of Hate Speech Against Immigrants and Women in Twitter (HatEval) shared task dataset. The dataset made available by the HatEval organizers contained English and Spanish posts retrieved from Twitter annotated with respect to the presence of hateful content and its target. In this paper we present the results obtained by our system in comparison to the other entries in the shared task. Our system achieved competitive performance ranking 7th in sub-task A out of 62 systems in the English track.</abstract>
       <url hash="a5998468">S19-2093</url>
@@ -1477,7 +1477,7 @@
     <paper id="94">
       <title>Vista.ue at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 5: Single Multilingual Hate Speech Detection Model</title>
       <author><first>Kashyap</first><last>Raiyani</last></author>
-      <author><first>Teresa</first><last>Gonçalves</last></author>
+      <author id="teresa-goncalves"><first>Teresa</first><last>Gonçalves</last></author>
       <author><first>Paulo</first><last>Quaresma</last></author>
       <author><first>Vitor</first><last>Nogueira</last></author>
       <pages>524–528</pages>
@@ -1545,7 +1545,7 @@
     <paper id="100">
       <title><fixed-case>CAM</fixed-case>sterdam at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 6: Neural and graph-based feature extraction for the identification of offensive tweets</title>
       <author><first>Guy</first><last>Aglionby</last></author>
-      <author><first>Chris</first><last>Davis</last></author>
+      <author id="chris-irwin-davis"><first>Chris</first><last>Davis</last></author>
       <author><first>Pushkar</first><last>Mishra</last></author>
       <author><first>Andrew</first><last>Caines</last></author>
       <author><first>Helen</first><last>Yannakoudakis</last></author>
@@ -1562,7 +1562,7 @@
       <title><fixed-case>CN</fixed-case>-<fixed-case>HIT</fixed-case>-<fixed-case>MI</fixed-case>.<fixed-case>T</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 6: Offensive Language Identification Based on <fixed-case>B</fixed-case>i<fixed-case>LSTM</fixed-case> with Double Attention</title>
       <author><first>Yaojie</first><last>Zhang</last></author>
       <author><first>Bing</first><last>Xu</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <pages>564–570</pages>
       <abstract>Offensive language has become pervasive in social media. In Offensive Language Identification tasks, it may be difficult to predict accurately only according to the surface words. So we try to dig deeper semantic information of text. This paper presents use an attention-based two layers bidirectional longshort memory neural network (BiLSTM) for semantic feature extraction. Additionally, a residual connection mechanism is used to synthesize two different deep features, and an emoji attention mechanism is used to extract semantic information of emojis in text. We participated in three sub-tasks of SemEval 2019 Task 6 as CN-HIT-MI.T team. Our macro-averaged F1-score in sub-task A is 0.768, ranking 28/103. We got 0.638 in sub-task B, ranking 30/75. In sub-task C, we got 0.549, ranking 22/65. We also tried some other methods of not submitting results.</abstract>
       <url hash="eb9ee7db">S19-2101</url>
@@ -1650,7 +1650,7 @@
       <title>Fermi at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 6: Identifying and Categorizing Offensive Language in Social Media using Sentence Embeddings</title>
       <author><first>Vijayasaradhi</first><last>Indurthi</last></author>
       <author><first>Bakhtiyar</first><last>Syed</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <author><first>Manish</first><last>Gupta</last></author>
       <author><first>Vasudeva</first><last>Varma</last></author>
       <pages>611–616</pages>
@@ -1662,7 +1662,7 @@
     <paper id="110">
       <title>Ghmerti at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 6: A Deep Word- and Character-based Approach to Offensive Language Identification</title>
       <author><first>Ehsan</first><last>Doostmohammadi</last></author>
-      <author><first>Hossein</first><last>Sameti</last></author>
+      <author id="hossein-sameti"><first>Hossein</first><last>Sameti</last></author>
       <author><first>Ali</first><last>Saffar</last></author>
       <pages>617–621</pages>
       <abstract>This paper presents the models submitted by Ghmerti team for subtasks A and B of the OffensEval shared task at SemEval 2019. OffensEval addresses the problem of identifying and categorizing offensive language in social media in three subtasks; whether or not a content is offensive (subtask A), whether it is targeted (subtask B) towards an individual, a group, or other entities (subtask C). The proposed approach includes character-level Convolutional Neural Network, word-level Recurrent Neural Network, and some preprocessing. The performance achieved by the proposed model is 77.93% macro-averaged F1-score.</abstract>
@@ -1697,8 +1697,8 @@
       <title>Hope at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 6: Mining social media language to discover offensive language</title>
       <author><first>Gabriel Florentin</first><last>Patras</last></author>
       <author><first>Diana Florina</first><last>Lungu</last></author>
-      <author><first>Daniela</first><last>Gifu</last></author>
-      <author><first>Diana</first><last>Trandabat</last></author>
+      <author id="daniela-gifu"><first>Daniela</first><last>Gifu</last></author>
+      <author id="diana-trandabat"><first>Diana</first><last>Trandabat</last></author>
       <pages>635–638</pages>
       <abstract>User’s content share through social media has reached huge proportions nowadays. However, along with the free expression of thoughts on social media, people risk getting exposed to various aggressive statements. In this paper, we present a system able to identify and classify offensive user-generated content.</abstract>
       <url hash="effcb26b">S19-2113</url>
@@ -1719,7 +1719,7 @@
     </paper>
     <paper id="115">
       <title><fixed-case>JCTICOL</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 6: Classifying Offensive Language in Social Media using Deep Learning Methods, Word/Character N-gram Features, and Preprocessing Methods</title>
-      <author><first>Yaakov</first><last>HaCohen-Kerner</last></author>
+      <author id="yaakov-hacohen-kerner"><first>Yaakov</first><last>HaCohen-Kerner</last></author>
       <author><first>Ziv</first><last>Ben-David</last></author>
       <author><first>Gal</first><last>Didi</last></author>
       <author><first>Eli</first><last>Cahn</last></author>
@@ -1757,7 +1757,7 @@
       <author><first>Preeti</first><last>Mukherjee</last></author>
       <author><first>Mainak</first><last>Pal</last></author>
       <author><first>Somnath</first><last>Banerjee</last></author>
-      <author><first>Sudip Kumar</first><last>Naskar</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last></author>
       <pages>662–667</pages>
       <abstract>This paper describes our system submissions as part of our participation (team name: JU_ETCE_17_21) in the SemEval 2019 shared task 6: “OffensEval: Identifying and Catego- rizing Offensive Language in Social Media”. We participated in all the three sub-tasks: i) Sub-task A: offensive language identification, ii) Sub-task B: automatic categorization of of- fense types, and iii) Sub-task C: offense target identification. We employed machine learn- ing as well as deep learning approaches for the sub-tasks. We employed Convolutional Neural Network (CNN) and Recursive Neu- ral Network (RNN) Long Short-Term Memory (LSTM) with pre-trained word embeddings. We used both word2vec and Glove pre-trained word embeddings. We obtained the best F1- score using CNN based model for sub-task A, LSTM based model for sub-task B and Lo- gistic Regression based model for sub-task C. Our best submissions achieved 0.7844, 0.5459 and 0.48 F1-scores for sub-task A, sub-task B and sub-task C respectively.</abstract>
       <url hash="deed58ed">S19-2118</url>
@@ -1767,7 +1767,7 @@
     <paper id="119">
       <title><fixed-case>KMI</fixed-case>-Coling at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 6: Exploring N-grams for Offensive Language detection</title>
       <author><first>Priya</first><last>Rani</last></author>
-      <author><first>Atul Kr.</first><last>Ojha</last></author>
+      <author id="atul-kr-ojha"><first>Atul Kr.</first><last>Ojha</last></author>
       <pages>668–671</pages>
       <abstract>In this paper, we present the system description of Offensive language detection tool which is developed by the KMI_Coling under the OffensEval Shared task. The OffensEval Shared Task was conducted in SemEval 2019 workshop. To develop the system, we have explored n-grams up to 8-gram and trained three different namely A, B and C systems for three different subtasks within the OffensEval task which achieves 79.76%, 87.91% and 44.37% accuracy respectively. The task was completed using the dataset provided to us by the OffensEval organisers was the part of OLID dataset. It consists of 13,240 tweets extracted from twitter and were annotated at three levels using crowdsourcing.</abstract>
       <url hash="7074d3f7">S19-2119</url>
@@ -1803,7 +1803,7 @@
       <author><first>Haimin</first><last>Zhang</last></author>
       <author><first>Karan</first><last>Uppal</last></author>
       <author><first>Yaman</first><last>Kumar</last></author>
-      <author><first>Rajiv Ratn</first><last>Shah</last></author>
+      <author id="rajiv-shah"><first>Rajiv Ratn</first><last>Shah</last></author>
       <author><first>Simra</first><last>Shahid</last></author>
       <author><first>Laiba</first><last>Mehnaz</last></author>
       <author><first>Sarthak</first><last>Anand</last></author>
@@ -1827,7 +1827,7 @@
       <title><fixed-case>NIT</fixed-case>_<fixed-case>A</fixed-case>gartala_<fixed-case>NLP</fixed-case>_<fixed-case>T</fixed-case>eam at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 6: An Ensemble Approach to Identifying and Categorizing Offensive Language in <fixed-case>T</fixed-case>witter Social Media Corpora</title>
       <author><first>Steve Durairaj</first><last>Swamy</last></author>
       <author><first>Anupam</first><last>Jamatia</last></author>
-      <author><first>Björn</first><last>Gambäck</last></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gambäck</last></author>
       <author><first>Amitava</first><last>Das</last></author>
       <pages>696–703</pages>
       <abstract>The paper describes the systems submitted to OffensEval (SemEval 2019, Task 6) on ‘Identifying and Categorizing Offensive Language in Social Media’ by the ‘NIT_Agartala_NLP_Team’. A Twitter annotated dataset of 13,240 English tweets was provided by the task organizers to train the individual models, with the best results obtained using an ensemble model composed of six different classifiers. The ensemble model produced macro-averaged F1-scores of 0.7434, 0.7078 and 0.4853 on Subtasks A, B, and C, respectively. The paper highlights the overall low predictive nature of various linguistic features and surface level count features, as well as the limitations of a traditional machine learning approach when compared to a Deep Learning counterpart.</abstract>
@@ -1884,9 +1884,9 @@
     <paper id="129">
       <title><fixed-case>SINAI</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 6: Incorporating lexicon knowledge into <fixed-case>SVM</fixed-case> learning to identify and categorize offensive language in social media</title>
       <author><first>Flor Miriam</first><last>Plaza-del-Arco</last></author>
-      <author><first>M. Dolores</first><last>Molina-González</last></author>
-      <author><first>Maite</first><last>Martin</last></author>
-      <author><first>L. Alfonso</first><last>Ureña-López</last></author>
+      <author id="m-dolores-molina-gonzalez"><first>M. Dolores</first><last>Molina-González</last></author>
+      <author id="m-teresa-martin-valdivia"><first>Maite</first><last>Martin</last></author>
+      <author id="l-alfonso-urena-lopez"><first>L. Alfonso</first><last>Ureña-López</last></author>
       <pages>735–738</pages>
       <abstract>Offensive language has an impact across society. The use of social media has aggravated this issue among online users, causing suicides in the worst cases. For this reason, it is important to develop systems capable of identifying and detecting offensive language in text automatically. In this paper, we developed a system to classify offensive tweets as part of our participation in SemEval-2019 Task 6: OffensEval. Our main contribution is the integration of lexical features in the classification using the SVM algorithm.</abstract>
       <url hash="4929df78">S19-2129</url>
@@ -1908,7 +1908,7 @@
     <paper id="131">
       <title>Stop <fixed-case>P</fixed-case>ropag<fixed-case>H</fixed-case>ate at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Tasks 5 and 6: Are abusive language classification results reproducible?</title>
       <author><first>Paula</first><last>Fortuna</last></author>
-      <author><first>Juan</first><last>Soler-Company</last></author>
+      <author id="juan-soler-company"><first>Juan</first><last>Soler-Company</last></author>
       <author><first>Sérgio</first><last>Nunes</last></author>
       <pages>745–752</pages>
       <abstract>This paper summarizes the participation of Stop PropagHate team at SemEval 2019. Our approach is based on replicating one of the most relevant works on the literature, using word embeddings and LSTM. After circumventing some of the problems of the original code, we found poor results when applying it to the HatEval contest (F1=0.45). We think this is due mainly to inconsistencies in the data of this contest. Finally, for the OffensEval the classifier performed well (F1=0.74), proving to have a better performance for offense detection than for hate speech.</abstract>
@@ -1925,7 +1925,7 @@
       <author><first>Geetika</first><last>B</last></author>
       <author><first>Dyaneswaran</first><last>S</last></author>
       <author><first>S Milton</first><last>Rajendram</last></author>
-      <author><first>Mirnalinee</first><last>T T</last></author>
+      <author id="t-t-mirnalinee"><first>Mirnalinee</first><last>T T</last></author>
       <pages>753–758</pages>
       <abstract>Task 6 of SemEval 2019 involves identifying and categorizing offensive language in social media. The systems developed by TECHSSN team uses multi-level classification techniques. We have developed two systems. In the first system, the first level of classification is done by a multi-branch 2D CNN classifier with Google’s pre-trained Word2Vec embedding and the second level of classification by string matching technique supported by offensive and bad words dictionary. The second system uses a multi-branch 1D CNN classifier with Glove pre-trained embedding layer for the first level of classification and string matching for the second level of classification. Input data with a probability of less than 0.70 in the first level are passed on to the second level. The misclassified examples are classified correctly in the second level.</abstract>
       <url hash="52900fbd">S19-2132</url>
@@ -1978,7 +1978,7 @@
       <title><fixed-case>UHH</fixed-case>-<fixed-case>LT</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 6: Supervised vs. Unsupervised Transfer Learning for Offensive Language Detection</title>
       <author><first>Gregor</first><last>Wiedemann</last></author>
       <author><first>Eugen</first><last>Ruppert</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>782–787</pages>
       <abstract>We present a neural network based approach of transfer learning for offensive language detection. For our system, we compare two types of knowledge transfer: supervised and unsupervised pre-training. Supervised pre-training of our bidirectional GRU-3-CNN architecture is performed as multi-task learning of parallel training of five different tasks. The selected tasks are supervised classification problems from public NLP resources with some overlap to offensive language such as sentiment detection, emoji classification, and aggressive language classification. Unsupervised transfer learning is performed with a thematic clustering of 40M unlabeled tweets via LDA. Based on this dataset, pre-training is performed by predicting the main topic of a tweet. Results indicate that unsupervised transfer from large datasets performs slightly better than supervised training on small ‘near target category’ datasets. In the SemEval Task, our system ranks 14 out of 103 participants.</abstract>
       <url hash="b137e040">S19-2137</url>
@@ -1989,7 +1989,7 @@
       <title><fixed-case>UM</fixed-case>-<fixed-case>IU</fixed-case>@<fixed-case>LING</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 6: Identifying Offensive Tweets Using <fixed-case>BERT</fixed-case> and <fixed-case>SVM</fixed-case>s</title>
       <author><first>Jian</first><last>Zhu</last></author>
       <author><first>Zuoyu</first><last>Tian</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <pages>788–795</pages>
       <abstract>This paper describes the UM-IU@LING’s system for the SemEval 2019 Task 6: Offens-Eval. We take a mixed approach to identify and categorize hate speech in social media. In subtask A, we fine-tuned a BERT based classifier to detect abusive content in tweets, achieving a macro F1 score of 0.8136 on the test data, thus reaching the 3rd rank out of 103 submissions. In subtasks B and C, we used a linear SVM with selected character n-gram features. For subtask C, our system could identify the target of abuse with a macro F1 score of 0.5243, ranking it 27th out of 65 submissions.</abstract>
       <url hash="361e7717">S19-2138</url>
@@ -2008,7 +2008,7 @@
     </paper>
     <paper id="140">
       <title><fixed-case>UTFPR</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 6: Relying on Compositionality to Find Offense</title>
-      <author><first>Gustavo Henrique</first><last>Paetzold</last></author>
+      <author id="gustavo-paetzold"><first>Gustavo Henrique</first><last>Paetzold</last></author>
       <pages>801–805</pages>
       <abstract>We present the UTFPR system for the OffensEval shared task of SemEval 2019: A character-to-word-to-sentence compositional RNN model trained exclusively over the training data provided by the organizers. We find that, although not very competitive for the task at hand, it offers a robust solution to the orthographic irregularity inherent to tweets.</abstract>
       <url hash="e83ad5b4">S19-2140</url>
@@ -2078,7 +2078,7 @@
       <author><first>Ye</first><last>Jiang</last></author>
       <author><first>Johann</first><last>Petrak</last></author>
       <author><first>Xingyi</first><last>Song</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
       <author><first>Diana</first><last>Maynard</last></author>
       <pages>840–844</pages>
       <abstract>This paper describes the participation of team “bertha-von-suttner” in the SemEval2019 task 4 Hyperpartisan News Detection task. Our system uses sentence representations from averaged word embeddings generated from the pre-trained ELMo model with Convolutional Neural Networks and Batch Normalization for predicting hyperpartisan news. The final predictions were generated from the averaged predictions of an ensemble of models. With this architecture, our system ranked in first place, based on accuracy, the official scoring metric.</abstract>
@@ -2088,13 +2088,13 @@
     </paper>
     <paper id="147">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 7: <fixed-case>R</fixed-case>umour<fixed-case>E</fixed-case>val, Determining Rumour Veracity and Support for Rumours</title>
-      <author><first>Genevieve</first><last>Gorrell</last></author>
+      <author id="genevieve-gorrell"><first>Genevieve</first><last>Gorrell</last></author>
       <author><first>Elena</first><last>Kochkina</last></author>
       <author><first>Maria</first><last>Liakata</last></author>
       <author><first>Ahmet</first><last>Aker</last></author>
       <author><first>Arkaitz</first><last>Zubiaga</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
-      <author><first>Leon</first><last>Derczynski</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
       <pages>845–854</pages>
       <abstract>Since the first RumourEval shared task in 2017, interest in automated claim validation has greatly increased, as the danger of “fake news” has become a mainstream concern. However automated support for rumour verification remains in its infancy. It is therefore important that a shared task in this area continues to provide a focus for effort, which is likely to increase. Rumour verification is characterised by the need to consider evolving conversations and news updates to reach a verdict on a rumour’s veracity. As in RumourEval 2017 we provided a dataset of dubious posts and ensuing conversations in social media, annotated both for stance and veracity. The social media rumours stem from a variety of breaking news stories and the dataset is expanded to include Reddit as well as new Twitter posts. There were two concrete tasks; rumour stance prediction and rumour verification, which we present in detail along with results achieved by participants. We received 22 system submissions (a 70% increase from RumourEval 2017) many of which used state-of-the-art methodology to tackle the challenges involved.</abstract>
       <url hash="40ecd9c8">S19-2147</url>
@@ -2119,7 +2119,7 @@
       <author><first>Pepa</first><last>Atanasova</last></author>
       <author><first>Ramy</first><last>Baly</last></author>
       <author><first>Mitra</first><last>Mohtarami</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>860–869</pages>
       <abstract>We present SemEval-2019 Task 8 on Fact Checking in Community Question Answering Forums, which features two subtasks. Subtask A is about deciding whether a question asks for factual information vs. an opinion/advice vs. just socializing. Subtask B asks to predict whether an answer to a factual question is true, false or not a proper answer. We received 17 official submissions for subtask A and 11 official submissions for Subtask B. For subtask A, all systems improved over the majority class baseline. For Subtask B, all systems were below a majority class baseline, but several systems were very close to it. The leaderboard and the data from the competition can be found at <url>http://competitions.codalab.org/competitions/20022</url>.</abstract>
       <url hash="8dd6041d">S19-2149</url>
@@ -2170,7 +2170,7 @@
       <author><first>Cristian</first><last>Petrescu-Prahova</last></author>
       <author><first>Gabriel</first><last>Stanovsky</last></author>
       <author><first>Hannaneh</first><last>Hajishirzi</last></author>
-      <author><first>Rik</first><last>Koncel-Kedziorski</last></author>
+      <author id="rik-koncel-kedziorski"><first>Rik</first><last>Koncel-Kedziorski</last></author>
       <pages>893–899</pages>
       <abstract>We report on the SemEval 2019 task on math question answering. We provided a question set derived from Math SAT practice exams, including 2778 training questions and 1082 test questions. For a significant subset of these questions, we also provided SMT-LIB logical form annotations and an interpreter that could solve these logical forms. Systems were evaluated based on the percentage of correctly answered questions. The top system correctly answered 45% of the test questions, a considerable improvement over the 17% random guessing baseline.</abstract>
       <url hash="22765a46">S19-2153</url>
@@ -2190,11 +2190,11 @@
     </paper>
     <paper id="155">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 12: Toponym Resolution in Scientific Papers</title>
-      <author><first>Davy</first><last>Weissenbacher</last></author>
+      <author id="davy-weissenbacher"><first>Davy</first><last>Weissenbacher</last></author>
       <author><first>Arjun</first><last>Magge</last></author>
       <author><first>Karen</first><last>O’Connor</last></author>
       <author><first>Matthew</first><last>Scotch</last></author>
-      <author><first>Graciela</first><last>Gonzalez-Hernandez</last></author>
+      <author id="graciela-gonzalez"><first>Graciela</first><last>Gonzalez-Hernandez</last></author>
       <pages>907–916</pages>
       <abstract>We present the SemEval-2019 Task 12 which focuses on toponym resolution in scientific articles. Given an article from PubMed, the task consists of detecting mentions of names of places, or toponyms, and mapping the mentions to their corresponding entries in GeoNames.org, a database of geospatial locations. We proposed three subtasks. In Subtask 1, we asked participants to detect all toponyms in an article. In Subtask 2, given toponym mentions as input, we asked participants to disambiguate them by linking them to entries in GeoNames. In Subtask 3, we asked participants to perform both the detection and the disambiguation steps for all toponyms. A total of 29 teams registered, and 8 teams submitted a system run. We summarize the corpus and the tools created for the challenge. They are freely available at <url>https://competitions.codalab.org/competitions/19948</url>. We also analyze the methods, the results and the errors made by the competing systems with a focus on toponym disambiguation.</abstract>
       <url hash="2aac10ac">S19-2155</url>
@@ -2232,7 +2232,7 @@
     <paper id="158">
       <title><fixed-case>C</fixed-case>ardiff <fixed-case>U</fixed-case>niversity at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 4: Linguistic Features for Hyperpartisan News Detection</title>
       <author><first>Carla</first><last>Pérez-Almendros</last></author>
-      <author><first>Luis</first><last>Espinosa-Anke</last></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa-Anke</last></author>
       <author><first>Steven</first><last>Schockaert</last></author>
       <pages>929–933</pages>
       <abstract>This paper summarizes our contribution to the Hyperpartisan News Detection task in SemEval 2019. We experiment with two different approaches: 1) an SVM classifier based on word vector averages and hand-crafted linguistic features, and 2) a BiLSTM-based neural text classifier trained on a filtered training set. Surprisingly, despite their different nature, both approaches achieve an accuracy of 0.74. The main focus of this paper is to further analyze the remarkable fact that a simple feature-based approach can perform on par with modern neural classifiers. We also highlight the effectiveness of our filtering strategy for training the neural network on a large but noisy training set.</abstract>
@@ -2264,7 +2264,7 @@
     </paper>
     <paper id="161">
       <title>Doris <fixed-case>M</fixed-case>artin at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 4: Hyperpartisan News Detection with Generic Semi-supervised Features</title>
-      <author><first>Rodrigo</first><last>Agerri</last></author>
+      <author id="rodrigo-agerri"><first>Rodrigo</first><last>Agerri</last></author>
       <pages>944–948</pages>
       <abstract>In this paper we describe our participation to the Hyperpartisan News Detection shared task at SemEval 2019. Motivated by the late arrival of Doris Martin, we test a previously developed document classification system which consists of a combination of clustering features implemented on top of some simple shallow local features. We show how leveraging distributional features obtained from large in-domain unlabeled data helps to easily and quickly develop a reasonably good performing system for detecting hyperpartisan news. The system and models generated for this task are publicly available.</abstract>
       <url hash="4a55107b">S19-2161</url>
@@ -2350,7 +2350,7 @@
     </paper>
     <paper id="169">
       <title>Rouletabille at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 4: Neural Network Baseline for Identification of Hyperpartisan Publishers</title>
-      <author><first>Jose G.</first><last>Moreno</last></author>
+      <author id="jose-g-moreno"><first>Jose G.</first><last>Moreno</last></author>
       <author><first>Yoann</first><last>Pitarch</last></author>
       <author><first>Karen</first><last>Pinel-Sauvagnat</last></author>
       <author><first>Gilles</first><last>Hubert</last></author>
@@ -2435,7 +2435,7 @@
       <author><first>Daniel</first><last>Shaprin</last></author>
       <author><first>Giovanni</first><last>Da San Martino</last></author>
       <author><first>Alberto</first><last>Barrón-Cedeño</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>1012–1015</pages>
       <abstract>We describe the system submitted by the Jack Ryder team to SemEval-2019 Task 4 on Hyperpartisan News Detection. The task asked participants to predict whether a given article is hyperpartisan, i.e., extreme-left or extreme-right. We proposed an approach based on BERT with fine-tuning, which was ranked 7th out 28 teams on the distantly supervised dataset, where all articles from a hyperpartisan/non-hyperpartisan news outlet are considered to be hyperpartisan/non-hyperpartisan. On a manually annotated test dataset, where human annotators double-checked the labels, we were ranked 29th out of 42 teams.</abstract>
       <url hash="4146e431">S19-2176</url>
@@ -2501,8 +2501,8 @@
       <author><first>Alberto</first><last>Barrón-Cedeño</last></author>
       <author><first>Giovanni</first><last>Da San Martino</last></author>
       <author><first>Mitra</first><last>Mohtarami</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
-      <author><first>James</first><last>Glass</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
       <pages>1041–1046</pages>
       <abstract>We describe our submission to SemEval-2019 Task 4 on Hyperpartisan News Detection. We rely on a variety of engineered features originally used to detect propaganda. This is based on the assumption that biased messages are propagandistic and promote a particular political cause or viewpoint. In particular, we trained a logistic regression model with features ranging from simple bag of words to vocabulary richness and text readability. Our system achieved 72.9% accuracy on the manually annotated testset, and 60.8% on the test data that was obtained with distant supervision. Additional experiments showed that significant performance gains can be achieved with better feature pre-processing.</abstract>
       <url hash="c4ce3a88">S19-2182</url>
@@ -2616,7 +2616,7 @@
     <paper id="192">
       <title><fixed-case>BUT</fixed-case>-<fixed-case>FIT</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 7: Determining the Rumour Stance with Pre-Trained Deep Bidirectional Transformers</title>
       <author><first>Martin</first><last>Fajcik</last></author>
-      <author><first>Pavel</first><last>Smrz</last></author>
+      <author id="pavel-smrz"><first>Pavel</first><last>Smrz</last></author>
       <author><first>Lukas</first><last>Burget</last></author>
       <pages>1097–1104</pages>
       <abstract>This paper describes our system submitted to SemEval 2019 Task 7: RumourEval 2019: Determining Rumour Veracity and Support for Rumours, Subtask A (Gorrell et al., 2019). The challenge focused on classifying whether posts from Twitter and Reddit support, deny, query, or comment a hidden rumour, truthfulness of which is the topic of an underlying discussion thread. We formulate the problem as a stance classification, determining the rumour stance of a post with respect to the previous thread post and the source thread post. The recent BERT architecture was employed to build an end-to-end system which has reached the F1 score of 61.67 % on the provided test data. Without any hand-crafted feature, the system finished at the 2nd place in the competition, only 0.2 % behind the winner.</abstract>
@@ -2650,7 +2650,7 @@
     <paper id="195">
       <title><fixed-case>GWU</fixed-case> <fixed-case>NLP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 7: Hybrid Pipeline for Rumour Veracity and Stance Classification on Social Media</title>
       <author><first>Sardar</first><last>Hamidian</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>1115–1119</pages>
       <abstract>Social media plays a crucial role as the main resource news for information seekers online. However, the unmoderated feature of social media platforms lead to the emergence and spread of untrustworthy contents which harm individuals or even societies. Most of the current automated approaches for automatically determining the veracity of a rumor are not generalizable for novel emerging topics. This paper describes our hybrid system comprising rules and a machine learning model which makes use of replied tweets to identify the veracity of the source tweet. The proposed system in this paper achieved 0.435 F-Macro in stance classification, and 0.262 F-macro and 0.801 RMSE in rumor verification tasks in Task7 of SemEval 2019.</abstract>
       <url hash="bf5545ce">S19-2195</url>
@@ -2660,10 +2660,10 @@
     <paper id="196">
       <title><fixed-case>SINAI</fixed-case>-<fixed-case>DL</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 7: Data Augmentation and Temporal Expressions</title>
       <author><first>Miguel A.</first><last>García-Cumbreras</last></author>
-      <author><first>Salud María</first><last>Jiménez-Zafra</last></author>
+      <author id="salud-maria-jimenez-zafra"><first>Salud María</first><last>Jiménez-Zafra</last></author>
       <author><first>Arturo</first><last>Montejo-Ráez</last></author>
       <author><first>Manuel Carlos</first><last>Díaz-Galiano</last></author>
-      <author><first>Estela</first><last>Saquete</last></author>
+      <author id="estela-saquete"><first>Estela</first><last>Saquete</last></author>
       <pages>1120–1124</pages>
       <abstract>This paper describes the participation of the SINAI-DL team at RumourEval (Task 7 in SemEval 2019, subtask A: SDQC). SDQC addresses the challenge of rumour stance classification as an indirect way of identifying potential rumours. Given a tweet with several replies, our system classifies each reply into either supporting, denying, questioning or commenting on the underlying rumours. We have applied data augmentation, temporal expressions labelling and transfer learning with a four-layer neural classifier. We achieve an accuracy of 0.715 with the official run over reply tweets.</abstract>
       <url hash="c2dad255">S19-2196</url>
@@ -2720,7 +2720,7 @@
       <title><fixed-case>DOMLIN</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 8: Automated Fact Checking exploiting Ratings in Community Question Answering Forums</title>
       <author><first>Dominik</first><last>Stammbach</last></author>
       <author><first>Stalin</first><last>Varanasi</last></author>
-      <author><first>Guenter</first><last>Neumann</last></author>
+      <author id="gunter-neumann"><first>Guenter</first><last>Neumann</last></author>
       <pages>1149–1154</pages>
       <abstract>In the following, we describe our system developed for the Semeval2019 Task 8. We fine-tuned a BERT checkpoint on the qatar living forum dump and used this checkpoint to train a number of models. Our hand-in for subtask A consists of a fine-tuned classifier from this BERT checkpoint. For subtask B, we first have a classifier deciding whether a comment is factual or non-factual. If it is factual, we retrieve intra-forum evidence and using this evidence, have a classifier deciding the comment’s veracity. We trained this classifier on ratings which we crawled from qatarliving.com</abstract>
       <url hash="84843a14">S19-2201</url>
@@ -2742,7 +2742,7 @@
       <title>Fermi at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 8: An elementary but effective approach to Question Discernment in Community <fixed-case>QA</fixed-case> Forums</title>
       <author><first>Bakhtiyar</first><last>Syed</last></author>
       <author><first>Vijayasaradhi</first><last>Indurthi</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <author><first>Manish</first><last>Gupta</last></author>
       <author><first>Vasudeva</first><last>Varma</last></author>
       <pages>1160–1164</pages>
@@ -2832,7 +2832,7 @@
     <paper id="211">
       <title><fixed-case>INRIA</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 9: Suggestion Mining Using <fixed-case>SVM</fixed-case> with Handcrafted Features</title>
       <author><first>Ilia</first><last>Markov</last></author>
-      <author><first>Eric</first><last>Villemonte de la Clergerie</last></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Eric</first><last>Villemonte de la Clergerie</last></author>
       <pages>1204–1207</pages>
       <abstract>We present the INRIA approach to the suggestion mining task at SemEval 2019. The task consists of two subtasks: suggestion mining under single-domain (Subtask A) and cross-domain (Subtask B) settings. We used the Support Vector Machines algorithm trained on handcrafted features, function words, sentiment features, digits, and verbs for Subtask A, and handcrafted features for Subtask B. Our best run archived a F1-score of 51.18% on Subtask A, and ranked in the top ten of the submissions for Subtask B with 73.30% F1-score.</abstract>
       <url hash="2445c35d">S19-2211</url>
@@ -2857,7 +2857,7 @@
       <author><first>Simra</first><last>Shahid</last></author>
       <author><first>Haimin</first><last>Zhang</last></author>
       <author><first>Yaman</first><last>Kumar</last></author>
-      <author><first>Rajiv</first><last>Shah</last></author>
+      <author id="rajiv-shah"><first>Rajiv</first><last>Shah</last></author>
       <author><first>Karan</first><last>Uppal</last></author>
       <pages>1213–1217</pages>
       <abstract>In this paper we present our approach to tackle the Suggestion Mining from Online Reviews and Forums Sub-Task A. Given a review, we are asked to predict whether the review consists of a suggestion or not. Our model is based on Universal Language Model Fine-tuning for Text Classification. We apply various pre-processing techniques before training the language and the classification model. We further provide analysis of the model. Our team ranked 10th out of 34 participants, achieving an F1 score of 0.7011.</abstract>
@@ -2903,7 +2903,7 @@
       <author><first>Rajalakshmi</first><last>S</last></author>
       <author><first>Angel</first><last>Suseelan</last></author>
       <author><first>S Milton</first><last>Rajendram</last></author>
-      <author><first>Mirnalinee</first><last>T T</last></author>
+      <author id="t-t-mirnalinee"><first>Mirnalinee</first><last>T T</last></author>
       <pages>1237–1241</pages>
       <abstract>This paper describes the work on mining the suggestions from online reviews and forums. Opinion mining detects whether the comments are positive, negative or neutral, while suggestion mining explores the review content for the possible tips or advice. The system developed by SSN-SPARKS team in SemEval-2019 for task 9 (suggestion mining) uses a rule-based approach for feature selection, SMOTE technique for data augmentation and deep learning technique (Convolutional Neural Network) for classification. We have compared the results with Random Forest classifier (RF) and MultiLayer Perceptron (MLP) model. Results show that the CNN model performs better than other models for both the subtasks.</abstract>
       <url hash="892abf9f">S19-2217</url>
@@ -2925,7 +2925,7 @@
     <paper id="219">
       <title>Team <fixed-case>T</fixed-case>aurus at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 9: Expert-informed pattern recognition for suggestion mining</title>
       <author><first>Nelleke</first><last>Oostdijk</last></author>
-      <author><first>Hans</first><last>van Halteren</last></author>
+      <author id="hans-van-halteren"><first>Hans</first><last>van Halteren</last></author>
       <pages>1247–1253</pages>
       <abstract>This paper presents our submissions to SemEval-2019 Task9, Suggestion Mining. Our system is one in a series of systems in which we compare an approach using expert-defined rules with a comparable one using machine learning. We target tasks with a syntactic or semantic component that might be better described by a human understanding the task than by a machine learner only able to count features. For Semeval-2019 Task 9, the expert rules clearly outperformed our machine learning model when training and testing on equally balanced testsets.</abstract>
       <url hash="f7a90d9a">S19-2219</url>
@@ -2936,10 +2936,10 @@
       <title><fixed-case>T</fixed-case>his<fixed-case>I</fixed-case>s<fixed-case>C</fixed-case>ompetition at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 9: <fixed-case>BERT</fixed-case> is unstable for out-of-domain samples</title>
       <author><first>Cheoneum</first><last>Park</last></author>
       <author><first>Juae</first><last>Kim</last></author>
-      <author><first>Hyeon-gu</first><last>Lee</last></author>
-      <author><first>Reinald Kim</first><last>Amplayo</last></author>
+      <author id="hyeon-gu-lee"><first>Hyeon-gu</first><last>Lee</last></author>
+      <author id="reinald-kim-amplayo"><first>Reinald Kim</first><last>Amplayo</last></author>
       <author><first>Harksoo</first><last>Kim</last></author>
-      <author><first>Jungyun</first><last>Seo</last></author>
+      <author id="jungyun-seo"><first>Jungyun</first><last>Seo</last></author>
       <author><first>Changki</first><last>Lee</last></author>
       <pages>1254–1261</pages>
       <abstract>This paper describes our system, Joint Encoders for Stable Suggestion Inference (JESSI), for the SemEval 2019 Task 9: Suggestion Mining from Online Reviews and Forums. JESSI is a combination of two sentence encoders: (a) one using multiple pre-trained word embeddings learned from log-bilinear regression (GloVe) and translation (CoVe) models, and (b) one on top of word encodings from a pre-trained deep bidirectional transformer (BERT). We include a domain adversarial training module when training for out-of-domain samples. Our experiments show that while BERT performs exceptionally well for in-domain samples, several runs of the model show that it is unstable for out-of-domain samples. The problem is mitigated tremendously by (1) combining BERT with a non-BERT encoder, and (2) using an RNN-based classifier on top of BERT. Our final models obtained second place with 77.78% F-Score on Subtask A (i.e. in-domain) and achieved an F-Score of 79.59% on Subtask B (i.e. out-of-domain), even without using any additional external data.</abstract>
@@ -3028,8 +3028,8 @@
       <author><first>Alistair</first><last>Plum</last></author>
       <author><first>Tharindu</first><last>Ranasinghe</last></author>
       <author><first>Pablo</first><last>Calleja</last></author>
-      <author><first>Constantin</first><last>Orăsan</last></author>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orăsan</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <pages>1297–1301</pages>
       <abstract>This article describes the system submitted by the RGCL-WLV team to the SemEval 2019 Task 12: Toponym resolution in scientific papers. The system detects toponyms using a bootstrapped machine learning (ML) approach which classifies names identified using gazetteers extracted from the GeoNames geographical database. The paper evaluates the performance of several ML classifiers, as well as how the gazetteers influence the accuracy of the system. Several runs were submitted. The highest precision achieved for one of the submissions was 89%, albeit it at a relatively low recall of 49%.</abstract>
       <url hash="fb896867">S19-2228</url>
@@ -3063,7 +3063,7 @@
       <title><fixed-case>U</fixed-case>ni<fixed-case>M</fixed-case>elb at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 12: Multi-model combination for toponym resolution</title>
       <author><first>Haonan</first><last>Li</last></author>
       <author><first>Minghan</first><last>Wang</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Martin</first><last>Tomko</last></author>
       <author><first>Maria</first><last>Vasardani</last></author>
       <pages>1313–1318</pages>
diff --git a/data/xml/S98.xml b/data/xml/S98.xml
index 104ecc46d3..11d060e8ca 100644
--- a/data/xml/S98.xml
+++ b/data/xml/S98.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the Pilot <fixed-case>SENSEVAL</fixed-case></booktitle>
       <url hash="176a2129">S98-1</url>
       <editor><first>Adam</first><last>Kilgarriff</last></editor>
-      <editor><first>Martha</first><last>Palmer</last></editor>
+      <editor id="martha-palmer"><first>Martha</first><last>Palmer</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Hermonceux Castle, Sussex, UK</address>
       <month>September</month>
diff --git a/data/xml/T75.xml b/data/xml/T75.xml
index 5498ddf69d..3ad799163a 100644
--- a/data/xml/T75.xml
+++ b/data/xml/T75.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Theoretical Issues in Natural Language Processing: Supplement</booktitle>
       <editor id="bonnie-webber"><first>B.L.</first><last>Nash-Webber</last></editor>
-      <editor><first>Roger</first><last>Schank</last></editor>
+      <editor id="roger-c-schank"><first>Roger</first><last>Schank</last></editor>
       <venue>tinlap</venue>
       <year>1975</year>
     </meta>
@@ -37,7 +37,7 @@
     </paper>
     <paper id="5">
       <title>What Makes <fixed-case>S</fixed-case>am Run? Script Based Techniques for Question Answering</title>
-      <author><first>Wendy</first><last>Lehnert</last></author>
+      <author id="wendy-lehnert"><first>Wendy</first><last>Lehnert</last></author>
       <url hash="50e97149">T75-1005</url>
       <bibkey>lehnert-1975-makes</bibkey>
     </paper>
@@ -51,7 +51,7 @@
     <meta>
       <booktitle>Theoretical Issues in Natural Language Processing</booktitle>
       <editor id="bonnie-webber"><first>B.L.</first><last>Nash-Webber</last></editor>
-      <editor><first>Roger</first><last>Schank</last></editor>
+      <editor id="roger-c-schank"><first>Roger</first><last>Schank</last></editor>
       <venue>tinlap</venue>
       <year>1975</year>
     </meta>
@@ -61,13 +61,13 @@
     </frontmatter>
     <paper id="1">
       <title>Augmented Phrase Structure Grammars</title>
-      <author><first>George E.</first><last>Heidorn</last></author>
+      <author id="george-e-heidorn"><first>George E.</first><last>Heidorn</last></author>
       <url hash="01b18daa">T75-2001</url>
       <bibkey>heidorn-1975-augmented</bibkey>
     </paper>
     <paper id="2">
       <title>Diagnosis as a Notion of Grammar</title>
-      <author><first>Mitchell</first><last>Marcus</last></author>
+      <author id="mitch-marcus"><first>Mitchell</first><last>Marcus</last></author>
       <url hash="0e5fc28d">T75-2002</url>
       <bibkey>marcus-1975-diagnosis</bibkey>
     </paper>
@@ -109,7 +109,7 @@
     </paper>
     <paper id="9">
       <title>Primitives and Words</title>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <url hash="a19fee3f">T75-2009</url>
       <bibkey>wilks-1975-primitives</bibkey>
     </paper>
@@ -145,7 +145,7 @@
     </paper>
     <paper id="15">
       <title>Speaking With Manytongues: Some Problems in Modeling Speakers of Actual Discourse</title>
-      <author><first>John H.</first><last>Clippinger, Jr.</last></author>
+      <author id="john-h-clippinger-jr"><first>John H.</first><last>Clippinger, Jr.</last></author>
       <url hash="d3639581">T75-2015</url>
       <bibkey>clippinger-jr-1975-speaking</bibkey>
     </paper>
@@ -157,8 +157,8 @@
     </paper>
     <paper id="17">
       <title>A Formalism for Relating Lexical and Pragmatic Information: Its Relevance to Recognition and Generation</title>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
-      <author><first>Stanley J.</first><last>Rosenschein</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="stanley-j-rosenschein"><first>Stanley J.</first><last>Rosenschein</last></author>
       <url hash="42d07b16">T75-2017</url>
       <bibkey>joshi-rosenschein-1975-formalism</bibkey>
     </paper>
@@ -199,19 +199,19 @@
     </paper>
     <paper id="24">
       <title>Considerations for Computational Theories of Speaking: Seven Things Speakers Do</title>
-      <author><first>John H.</first><last>Clippinger, Jr.</last></author>
+      <author id="john-h-clippinger-jr"><first>John H.</first><last>Clippinger, Jr.</last></author>
       <url hash="261d14d2">T75-2024</url>
       <bibkey>clippinger-jr-1975-considerations</bibkey>
     </paper>
     <paper id="25">
       <title><fixed-case>IMPROVING</fixed-case> <fixed-case>METHODOLOGY</fixed-case> in Natural Language Processing</title>
-      <author><first>William C.</first><last>Mann</last></author>
+      <author id="william-c-mann"><first>William C.</first><last>Mann</last></author>
       <url hash="b6b37bdb">T75-2025</url>
       <bibkey>mann-1975-improving</bibkey>
     </paper>
     <paper id="26">
       <title>Methodology in <fixed-case>AI</fixed-case> and Natural Language Understanding</title>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <url hash="9a642790">T75-2026</url>
       <bibkey>wilks-1975-methodology</bibkey>
     </paper>
diff --git a/data/xml/T78.xml b/data/xml/T78.xml
index 79bdf8ea05..0a24068c52 100644
--- a/data/xml/T78.xml
+++ b/data/xml/T78.xml
@@ -3,7 +3,7 @@
   <volume id="1" type="proceedings">
     <meta>
       <booktitle>Theoretical Issues in Natural Language Processing-2</booktitle>
-      <editor><first>David L.</first><last>Waltz</last></editor>
+      <editor id="david-l-waltz"><first>David L.</first><last>Waltz</last></editor>
       <venue>tinlap</venue>
       <year>1978</year>
     </meta>
@@ -19,7 +19,7 @@
     </paper>
     <paper id="2">
       <title>What Makes Something “Ad Hoc”</title>
-      <author><first>Roger C.</first><last>Schank</last></author>
+      <author id="roger-c-schank"><first>Roger C.</first><last>Schank</last></author>
       <url hash="636853f9">T78-1002</url>
       <bibkey>schank-1978-makes-something</bibkey>
     </paper>
@@ -37,13 +37,13 @@
     </paper>
     <paper id="5">
       <title>Taxonomic Lattice Structures for Situation Recognition</title>
-      <author><first>William A.</first><last>Woods</last></author>
+      <author id="william-a-woods"><first>William A.</first><last>Woods</last></author>
       <url hash="92957ddf">T78-1005</url>
       <bibkey>woods-1978-taxonomic-lattice</bibkey>
     </paper>
     <paper id="6">
       <title>Description Formation and Discourse Model Synthesis</title>
-      <author><first>Bonnie Lynn</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie Lynn</first><last>Webber</last></author>
       <url hash="ffded1d2">T78-1006</url>
       <bibkey>webber-1978-description-formation</bibkey>
     </paper>
@@ -62,7 +62,7 @@
     </paper>
     <paper id="9">
       <title>Subsequent Reference: Syntactic and Rhetorical Constraints</title>
-      <author><first>David D.</first><last>McDonald</last></author>
+      <author id="david-d-mcdonald"><first>David D.</first><last>McDonald</last></author>
       <url hash="2d424559">T78-1009</url>
       <bibkey>mcdonald-1978-subsequent-reference</bibkey>
     </paper>
@@ -74,19 +74,19 @@
     </paper>
     <paper id="11">
       <title>Bound Variables and Other Anaphors</title>
-      <author><first>Barbara H.</first><last>Partee</last></author>
+      <author id="barbara-h-partee"><first>Barbara H.</first><last>Partee</last></author>
       <url hash="e8e6a510">T78-1011</url>
       <bibkey>partee-1978-bound-variables</bibkey>
     </paper>
     <paper id="12">
       <title>The Use of Focus as a Tool for Disambiguation of Definite Noun Phrases</title>
-      <author><first>Candace L.</first><last>Sidner</last></author>
+      <author id="candace-l-sidner"><first>Candace L.</first><last>Sidner</last></author>
       <url hash="7b96d2b7">T78-1012</url>
       <bibkey>sidner-1978-use-focus</bibkey>
     </paper>
     <paper id="13">
       <title>Focusing in Dialog</title>
-      <author><first>Barbara J.</first><last>Grosz</last></author>
+      <author id="barbara-j-grosz"><first>Barbara J.</first><last>Grosz</last></author>
       <url hash="8c8d4c2e">T78-1013</url>
       <bibkey>grosz-1978-focusing-dialog</bibkey>
     </paper>
@@ -110,8 +110,8 @@
     </paper>
     <paper id="17">
       <title>Speech Acts as a Basis for Understanding Dialogue Coherence</title>
-      <author><first>C. Raymond</first><last>Perrault</last></author>
-      <author><first>James F.</first><last>Allen</last></author>
+      <author id="c-raymond-perrault"><first>C. Raymond</first><last>Perrault</last></author>
+      <author id="james-allen"><first>James F.</first><last>Allen</last></author>
       <url hash="d111402b">T78-1017</url>
       <bibkey>perrault-allen-1978-speech</bibkey>
     </paper>
@@ -123,7 +123,7 @@
     </paper>
     <paper id="19">
       <title>Intentlonallty and Human Conversations</title>
-      <author><first>Jaime G.</first><last>Carbonell Jr</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime G.</first><last>Carbonell Jr</last></author>
       <url hash="c50fc619">T78-1019</url>
       <bibkey>carbonell-jr-1978-intentlonallty-human</bibkey>
     </paper>
@@ -160,13 +160,13 @@
     </paper>
     <paper id="25">
       <title>Semantic Primitives in Language and Vision</title>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <url hash="b4e13896">T78-1025</url>
       <bibkey>wilks-1978-semantic-primitives</bibkey>
     </paper>
     <paper id="26">
       <title>A Note on Partial Match of Descriptions. Can One Simultaneously Question (Retrieve) and Inform (Update)?</title>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
       <url hash="74969243">T78-1026</url>
       <bibkey>joshi-1978-note-partial</bibkey>
     </paper>
@@ -196,7 +196,7 @@
     </paper>
     <paper id="31">
       <title>Path-Based and Node-Based Inference in Semantic Networks</title>
-      <author><first>Stuart C.</first><last>Shapiro</last></author>
+      <author id="stuart-c-shapiro"><first>Stuart C.</first><last>Shapiro</last></author>
       <url hash="2ddc5535">T78-1031</url>
       <bibkey>shapiro-1978-path-based</bibkey>
     </paper>
@@ -216,13 +216,13 @@
     </paper>
     <paper id="34">
       <title>A Computational Account of Some Constraints on Language</title>
-      <author><first>Mitchell</first><last>Marcus</last></author>
+      <author id="mitch-marcus"><first>Mitchell</first><last>Marcus</last></author>
       <url hash="c9724c43">T78-1034</url>
       <bibkey>marcus-1978-computational-account</bibkey>
     </paper>
     <paper id="35">
       <title>Remarks on Processing, Constraints, and the Lexicon</title>
-      <author><first>Thomas</first><last>Wasow</last></author>
+      <author id="thomas-wasow"><first>Thomas</first><last>Wasow</last></author>
       <url hash="8beed275">T78-1035</url>
       <bibkey>wasow-1978-remarks-processing</bibkey>
     </paper>
diff --git a/data/xml/T87.xml b/data/xml/T87.xml
index 09a5845b8a..bd59cba3b9 100644
--- a/data/xml/T87.xml
+++ b/data/xml/T87.xml
@@ -3,7 +3,7 @@
   <volume id="1" type="proceedings">
     <meta>
       <booktitle>Theoretical Issues in Natural Language Processing 3</booktitle>
-      <editor><first>Yorick</first><last>Wilks</last></editor>
+      <editor id="yorick-wilks"><first>Yorick</first><last>Wilks</last></editor>
       <venue>tinlap</venue>
       <year>1987</year>
     </meta>
@@ -28,7 +28,7 @@
     </paper>
     <paper id="4">
       <title>The Definitional Power of Words</title>
-      <author><first>Branimir K.</first><last>Boguraev</last></author>
+      <author id="branimir-boguraev"><first>Branimir K.</first><last>Boguraev</last></author>
       <url hash="fd21b4c1">T87-1004</url>
       <bibkey>boguraev-1987-definitional</bibkey>
     </paper>
@@ -40,19 +40,19 @@
     </paper>
     <paper id="6">
       <title>World Knowledge and Word Meaning</title>
-      <author><first>Jerry R.</first><last>Hobbs</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry R.</first><last>Hobbs</last></author>
       <url hash="ed2f7f20">T87-1006</url>
       <bibkey>hobbs-1987-world</bibkey>
     </paper>
     <paper id="7">
       <title>The Boundary Between Word Knowledge and World Knowledge</title>
-      <author><first>Judy</first><last>Kegl</last></author>
+      <author id="judy-anne-kegl"><first>Judy</first><last>Kegl</last></author>
       <url hash="f5575745">T87-1007</url>
       <bibkey>kegl-1987-boundary</bibkey>
     </paper>
     <paper id="8">
       <title>Information, Unification and Locality</title>
-      <author><first>Fernando C. N.</first><last>Pereira</last></author>
+      <author id="fernando-c-n-pereira"><first>Fernando C. N.</first><last>Pereira</last></author>
       <url hash="451e82f5">T87-1008</url>
       <bibkey>pereira-1987-information</bibkey>
     </paper>
@@ -70,7 +70,7 @@
     </paper>
     <paper id="11">
       <title>Unification and Some New Grammatical Formalisms</title>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
       <url hash="985256d7">T87-1011</url>
       <bibkey>joshi-1987-unification</bibkey>
     </paper>
@@ -82,7 +82,7 @@
     </paper>
     <paper id="13">
       <title>Connectionist Models: Not Just a Notational Variant Not a Panacea</title>
-      <author><first>David L.</first><last>Waltz</last></author>
+      <author id="david-l-waltz"><first>David L.</first><last>Waltz</last></author>
       <url hash="4d833879">T87-1013</url>
       <bibkey>waltz-1987-connectionist</bibkey>
     </paper>
@@ -106,25 +106,25 @@
     </paper>
     <paper id="17">
       <title>Possible Implications of Connectionism</title>
-      <author><first>Wendy G.</first><last>Lehnert</last></author>
+      <author id="wendy-lehnert"><first>Wendy G.</first><last>Lehnert</last></author>
       <url hash="d008e109">T87-1017</url>
       <bibkey>lehnert-1987-possible</bibkey>
     </paper>
     <paper id="18">
       <title>Whither Discourse and Speech Acts?</title>
-      <author><first>Barbara J.</first><last>Grosz</last></author>
+      <author id="barbara-j-grosz"><first>Barbara J.</first><last>Grosz</last></author>
       <url hash="6e499c2d">T87-1018</url>
       <bibkey>grosz-1987-whither</bibkey>
     </paper>
     <paper id="19">
       <title>NO TITLE</title>
-      <author><first>Julia</first><last>Hirschberg</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
       <url hash="86e6003e">T87-1019</url>
       <bibkey>hirschberg-1987-title</bibkey>
     </paper>
     <paper id="20">
       <title>Towards a Semantic Theory of Discourse</title>
-      <author><first>C. Raymond</first><last>Perrault</last></author>
+      <author id="c-raymond-perrault"><first>C. Raymond</first><last>Perrault</last></author>
       <url hash="816d5f85">T87-1020</url>
       <bibkey>perrault-1987-towards</bibkey>
     </paper>
@@ -142,14 +142,14 @@
     </paper>
     <paper id="23">
       <title>Natural Language Processing: What’s Really Involved?</title>
-      <author><first>Roger</first><last>Schank</last></author>
+      <author id="roger-c-schank"><first>Roger</first><last>Schank</last></author>
       <author><first>Alex</first><last>Kass</last></author>
       <url hash="4e612eea">T87-1023</url>
       <bibkey>schank-kass-1987-natural</bibkey>
     </paper>
     <paper id="24">
       <title>The Rate of Progress in Natural Language Processing</title>
-      <author><first>Norman K.</first><last>Sondheimer</last></author>
+      <author id="norman-k-sondheimer"><first>Norman K.</first><last>Sondheimer</last></author>
       <url hash="badcefb8">T87-1024</url>
       <bibkey>sondheimer-1987-rate</bibkey>
     </paper>
@@ -173,13 +173,13 @@
     </paper>
     <paper id="28">
       <title>On Formal Versus Commonsense Semantics</title>
-      <author><first>David</first><last>Israel</last></author>
+      <author id="david-israel"><first>David</first><last>Israel</last></author>
       <url hash="5e6fc96d">T87-1028</url>
       <bibkey>israel-1987-formal</bibkey>
     </paper>
     <paper id="29">
       <title>They say it’s a new sort of engine: but the <fixed-case>SUMP</fixed-case>’s still there</title>
-      <author><first>Karen</first><last>Sparck Jones</last></author>
+      <author id="karen-sparck-jones"><first>Karen</first><last>Sparck Jones</last></author>
       <url hash="1a428294">T87-1029</url>
       <bibkey>sparck-jones-1987-say</bibkey>
     </paper>
@@ -191,19 +191,19 @@
     </paper>
     <paper id="31">
       <title>Reference and Pragmatic Identification</title>
-      <author><first>Douglas E.</first><last>Appelt</last></author>
+      <author id="douglas-appelt"><first>Douglas E.</first><last>Appelt</last></author>
       <url hash="dc36c0aa">T87-1031</url>
       <bibkey>appelt-1987-reference</bibkey>
     </paper>
     <paper id="32">
       <title>Determiners, Entities, and Contexts</title>
-      <author><first>Deborah A.</first><last>Dahl</last></author>
+      <author id="deborah-a-dahl"><first>Deborah A.</first><last>Dahl</last></author>
       <url hash="f686ea0c">T87-1032</url>
       <bibkey>dahl-1987-determiners</bibkey>
     </paper>
     <paper id="33">
       <title>Position Paper: Event Reference</title>
-      <author><first>Bonnie Lynn</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie Lynn</first><last>Webber</last></author>
       <url hash="e1393b5f">T87-1033</url>
       <bibkey>webber-1987-position</bibkey>
     </paper>
@@ -254,37 +254,37 @@
     </paper>
     <paper id="41">
       <title>Generation - A New Frontier of Natural Language Processing?</title>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
       <url hash="790d55b0">T87-1041</url>
       <bibkey>joshi-1987-generation</bibkey>
     </paper>
     <paper id="42">
       <title>Bidirectional Grammars and the Design of Natural Language Generation Systems</title>
-      <author><first>Douglas E.</first><last>Appelt</last></author>
+      <author id="douglas-appelt"><first>Douglas E.</first><last>Appelt</last></author>
       <url hash="57158c2e">T87-1042</url>
       <bibkey>appelt-1987-bidirectional</bibkey>
     </paper>
     <paper id="43">
       <title>Limits on the human sentence generator</title>
-      <author><first>Anthony S.</first><last>Kroch</last></author>
+      <author id="anthony-kroch"><first>Anthony S.</first><last>Kroch</last></author>
       <url hash="a9bec19d">T87-1043</url>
       <bibkey>kroch-1987-limits</bibkey>
     </paper>
     <paper id="44">
       <title>“No Better, but no Worse, than People”</title>
-      <author><first>David D.</first><last>McDonald</last></author>
+      <author id="david-d-mcdonald"><first>David D.</first><last>McDonald</last></author>
       <url hash="3b2e0a4f">T87-1044</url>
       <bibkey>mcdonald-1987-better</bibkey>
     </paper>
     <paper id="45">
       <title>What is Special About Natural Language Generation Research?</title>
-      <author><first>William C.</first><last>Mann</last></author>
+      <author id="william-c-mann"><first>William C.</first><last>Mann</last></author>
       <url hash="7aa953d7">T87-1045</url>
       <bibkey>mann-1987-special</bibkey>
     </paper>
     <paper id="46">
       <title>Generation Systems Should Choose Their Words</title>
-      <author><first>Mitchell</first><last>Marcus</last></author>
+      <author id="mitch-marcus"><first>Mitchell</first><last>Marcus</last></author>
       <url hash="29787e0a">T87-1046</url>
       <bibkey>marcus-1987-generation</bibkey>
     </paper>
diff --git a/data/xml/U03.xml b/data/xml/U03.xml
index 05a2ddb123..95ec9e8aa1 100644
--- a/data/xml/U03.xml
+++ b/data/xml/U03.xml
@@ -33,7 +33,7 @@
     <paper id="3">
       <title>Application of search algorithms to natural language processing</title>
       <author><first>Takeshi</first><last>Matsumoto</last></author>
-      <author><first>David M. W.</first><last>Powers</last></author>
+      <author id="david-m-w-powers"><first>David M. W.</first><last>Powers</last></author>
       <author><first>Geoff</first><last>Jarrad</last></author>
       <pages>22–29</pages>
       <url hash="491d8c3b">U03-1003</url>
@@ -65,7 +65,7 @@
     </paper>
     <paper id="7">
       <title>The Ins and Outs of <fixed-case>D</fixed-case>utch noun countability classification</title>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Leonoor</first><last>van der Beek</last></author>
       <pages>53–60</pages>
       <url hash="067fea1c">U03-1007</url>
@@ -98,7 +98,7 @@
     </paper>
     <paper id="11">
       <title>Performance metrics for word sense disambiguation</title>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>86–93</pages>
       <url hash="799b1e35">U03-1011</url>
       <bibkey>cohn-2003-performance</bibkey>
@@ -107,7 +107,7 @@
       <title>Straight to the point: Discovering themes for summary generation</title>
       <author><first>Stephen</first><last>Wan</last></author>
       <author><first>Mark</first><last>Dras</last></author>
-      <author><first>Cecile</first><last>Paris</last></author>
+      <author id="cecile-paris"><first>Cecile</first><last>Paris</last></author>
       <author><first>Robert</first><last>Dale</last></author>
       <pages>94–101</pages>
       <url hash="8ca13a95">U03-1012</url>
@@ -123,7 +123,7 @@
     </paper>
     <paper id="14">
       <title>Towards semantic-based overlap measures for question-answering</title>
-      <author><first>Diego</first><last>Mollá</last></author>
+      <author id="diego-molla"><first>Diego</first><last>Mollá</last></author>
       <pages>110–117</pages>
       <url hash="848d83f7">U03-1014</url>
       <bibkey>molla-2003-towards</bibkey>
@@ -147,7 +147,7 @@
     <paper id="17">
       <title>Document classification in structured military messages</title>
       <author><first>Oliver</first><last>Carr</last></author>
-      <author><first>Dominique</first><last>Estival</last></author>
+      <author id="dominique-estival"><first>Dominique</first><last>Estival</last></author>
       <pages>134–142</pages>
       <url hash="a0a61a6d">U03-1017</url>
       <bibkey>carr-estival-2003-document</bibkey>
diff --git a/data/xml/U04.xml b/data/xml/U04.xml
index 65e5643c41..ca37b51382 100644
--- a/data/xml/U04.xml
+++ b/data/xml/U04.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the Australasian Language Technology Workshop 2004</booktitle>
       <url hash="3ea4943e">U04-1</url>
       <editor><first>Ash</first><last>Asudeh</last></editor>
-      <editor><first>Cecile</first><last>Paris</last></editor>
+      <editor id="cecile-paris"><first>Cecile</first><last>Paris</last></editor>
       <editor><first>Stephen</first><last>Wan</last></editor>
       <address>Sydney, Australia</address>
       <month>December</month>
@@ -19,14 +19,14 @@
     <paper id="1">
       <title>Complex, Corpus-Driven, Syntactic Features for Word Sense Disambiguation</title>
       <author><first>Ari</first><last>Chanen</last></author>
-      <author><first>Jon</first><last>Patrick</last></author>
+      <author id="jon-patrick"><first>Jon</first><last>Patrick</last></author>
       <pages>1-8</pages>
       <url hash="f2ab7111">U04-1001</url>
       <bibkey>chanen-patrick-2004-complex</bibkey>
     </paper>
     <paper id="2">
       <title><fixed-case>A</fixed-case>nswerfinder: Question Answering by Combining Lexical, Syntactic and Semantic Information</title>
-      <author><first>Diego</first><last>Molla</last></author>
+      <author id="diego-molla"><first>Diego</first><last>Molla</last></author>
       <author><first>Mary</first><last>Gardiner</last></author>
       <pages>9-16</pages>
       <url hash="15421e0d">U04-1002</url>
@@ -35,14 +35,14 @@
     <paper id="3">
       <title>Using <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Domains In A Supervised Learning Word Sense Disambiguation System</title>
       <author><first>David</first><last>Bell</last></author>
-      <author><first>Jon</first><last>Patrick</last></author>
+      <author id="jon-patrick"><first>Jon</first><last>Patrick</last></author>
       <pages>17-24</pages>
       <url hash="1e206982">U04-1003</url>
       <bibkey>bell-patrick-2004-using</bibkey>
     </paper>
     <paper id="4">
       <title>Using a Trie-based Structure for Question Analysis</title>
-      <author><first>Luiz Augusto Sangoi</first><last>Pizzato</last></author>
+      <author id="luiz-augusto-pizzato"><first>Luiz Augusto Sangoi</first><last>Pizzato</last></author>
       <pages>25-31</pages>
       <url hash="fd18711f">U04-1004</url>
       <bibkey>pizzato-2004-using</bibkey>
@@ -58,7 +58,7 @@
     </paper>
     <paper id="6">
       <title>Thin Parsing: A Balance between Wide Scale Parsing and Chunking</title>
-      <author><first>Jon</first><last>Patrick</last></author>
+      <author id="jon-patrick"><first>Jon</first><last>Patrick</last></author>
       <author><first>Pham Hong</first><last>Nguyen</last></author>
       <pages>39-46</pages>
       <url hash="3b61c7c2">U04-1006</url>
@@ -108,7 +108,7 @@
       <title>Intelligent Multi Media Presentation of information in a semi-immersive Command and Control environment</title>
       <author><first>Cecile</first><last>Paris</last></author>
       <author><first>Nathalie</first><last>Colineau</last></author>
-      <author><first>Dominique</first><last>Estival</last></author>
+      <author id="dominique-estival"><first>Dominique</first><last>Estival</last></author>
       <pages>85-92</pages>
       <url hash="cbc56ec7">U04-1012</url>
       <bibkey>paris-etal-2004-intelligent</bibkey>
@@ -116,7 +116,7 @@
     <paper id="13">
       <title>Selecting Systemic Features for Text Classification</title>
       <author><first>Casey</first><last>Whitelaw</last></author>
-      <author><first>Jon</first><last>Patrick</last></author>
+      <author id="jon-patrick"><first>Jon</first><last>Patrick</last></author>
       <pages>93-100</pages>
       <url hash="76c50a89">U04-1013</url>
       <bibkey>whitelaw-patrick-2004-selecting</bibkey>
@@ -132,7 +132,7 @@
     </paper>
     <paper id="15">
       <title>Maximum Entropy <fixed-case>M</fixed-case>arkov Models for Semantic Role Labelling</title>
-      <author><first>Phil</first><last>Blunsom</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
       <pages>109-116</pages>
       <url hash="d4bd5974">U04-1015</url>
       <bibkey>blunsom-2004-maximum</bibkey>
@@ -185,7 +185,7 @@
     </paper>
     <paper id="22">
       <title>Differentiating Types of Verb Particle Constructions</title>
-      <author><first>Jon</first><last>Patrick</last></author>
+      <author id="jon-patrick"><first>Jon</first><last>Patrick</last></author>
       <author><first>Jeremy</first><last>Fletcher</last></author>
       <pages>163-170</pages>
       <url hash="2eed13aa">U04-1022</url>
diff --git a/data/xml/U05.xml b/data/xml/U05.xml
index 2973fba0dc..6d3311b371 100644
--- a/data/xml/U05.xml
+++ b/data/xml/U05.xml
@@ -4,9 +4,9 @@
     <meta>
       <booktitle>Proceedings of the Australasian Language Technology Workshop 2005</booktitle>
       <url hash="5f198cfa">U05-1</url>
-      <editor><first>Timothy</first><last>Baldwin</last></editor>
-      <editor><first>James</first><last>Curran</last></editor>
-      <editor><first>Menno</first><last>van Zaanen</last></editor>
+      <editor id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></editor>
+      <editor id="james-r-curran"><first>James</first><last>Curran</last></editor>
+      <editor id="menno-van-zaanen"><first>Menno</first><last>van Zaanen</last></editor>
       <address>Sydney, Australia</address>
       <month>December</month>
       <year>2005</year>
@@ -18,14 +18,14 @@
     </frontmatter>
     <paper id="1">
       <title>Dimensions of Deep Grammar Validation</title>
-      <author><first>Dan</first><last>Flickinger</last></author>
+      <author id="dan-flickinger"><first>Dan</first><last>Flickinger</last></author>
       <pages>1–3</pages>
       <url hash="e0ec77de">U05-1001</url>
       <bibkey>flickinger-2005-dimensions</bibkey>
     </paper>
     <paper id="2">
       <title>Text Summarization: News and Beyond</title>
-      <author><first>Kathy</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathy</first><last>McKeown</last></author>
       <pages>4</pages>
       <url hash="0dea1694">U05-1002</url>
       <bibkey>mckeown-2005-text</bibkey>
@@ -39,7 +39,7 @@
     </paper>
     <paper id="4">
       <title>Disambiguating Conjunctions in Named Entities</title>
-      <author><first>Pawel</first><last>Mazur</last></author>
+      <author id="pawel-mazur"><first>Pawel</first><last>Mazur</last></author>
       <author><first>Robert</first><last>Dale</last></author>
       <pages>7–14</pages>
       <url hash="0c3d5cd1">U05-1004</url>
@@ -47,7 +47,7 @@
     </paper>
     <paper id="5">
       <title>Learning of Graph Rules for Question Answering</title>
-      <author><first>Diego</first><last>Molla</last></author>
+      <author id="diego-molla"><first>Diego</first><last>Molla</last></author>
       <author><first>Menno</first><last>van Zaanen</last></author>
       <pages>15–23</pages>
       <url hash="9c722aff">U05-1005</url>
@@ -71,7 +71,7 @@
     </paper>
     <paper id="8">
       <title><fixed-case>POS</fixed-case> Tagging with a More Informative Tagset</title>
-      <author><first>Andrew</first><last>MacKinlay</last></author>
+      <author id="andrew-mackinlay"><first>Andrew</first><last>MacKinlay</last></author>
       <author><first>Timothy</first><last>Baldwin</last></author>
       <pages>40–48</pages>
       <url hash="efabc90f">U05-1008</url>
@@ -95,14 +95,14 @@
     </paper>
     <paper id="11">
       <title>Using Diverse Information Sources to Retrieve Samples of Low Density Languages</title>
-      <author><first>Andrew</first><last>MacKinlay</last></author>
+      <author id="andrew-mackinlay"><first>Andrew</first><last>MacKinlay</last></author>
       <pages>64–70</pages>
       <url hash="c20a5319">U05-1011</url>
       <bibkey>mackinlay-2005-using</bibkey>
     </paper>
     <paper id="12">
       <title>Faking it: Synthetic Text-to-speech Synthesis for Under-resourced Languages – Experimental Design</title>
-      <author><first>Harold</first><last>Somers</last></author>
+      <author id="harold-somers"><first>Harold</first><last>Somers</last></author>
       <pages>71–77</pages>
       <url hash="4d7ce737">U05-1012</url>
       <bibkey>somers-2005-faking</bibkey>
@@ -133,8 +133,8 @@
     </paper>
     <paper id="16">
       <title>Extracting Exact Answers using a Meta Question Answering System</title>
-      <author><first>Luiz Augusto</first><last>Pizzato</last></author>
-      <author><first>Diego</first><last>Molla</last></author>
+      <author id="luiz-augusto-pizzato"><first>Luiz Augusto</first><last>Pizzato</last></author>
+      <author id="diego-molla"><first>Diego</first><last>Molla</last></author>
       <pages>105–112</pages>
       <url hash="0350539b">U05-1016</url>
       <bibkey>pizzato-molla-2005-extracting</bibkey>
@@ -157,7 +157,7 @@
     </paper>
     <paper id="19">
       <title>Round-trip Translation: What Is It Good For?</title>
-      <author><first>Harold</first><last>Somers</last></author>
+      <author id="harold-somers"><first>Harold</first><last>Somers</last></author>
       <pages>127–133</pages>
       <url hash="e8d70475">U05-1019</url>
       <bibkey>somers-2005-round</bibkey>
@@ -165,7 +165,7 @@
     <paper id="20">
       <title>Evaluating the Utility of Appraisal Hierarchies as a Method for Sentiment Classification</title>
       <author><first>Jeremy</first><last>Fletcher</last></author>
-      <author><first>Jon</first><last>Patrick</last></author>
+      <author id="jon-patrick"><first>Jon</first><last>Patrick</last></author>
       <pages>134–142</pages>
       <url hash="742ec88f">U05-1020</url>
       <bibkey>fletcher-patrick-2005-evaluating</bibkey>
@@ -189,7 +189,7 @@
     <paper id="23">
       <title>Paraphrase Identification by Text Canonicalization</title>
       <author><first>Yitao</first><last>Zhang</last></author>
-      <author><first>Jon</first><last>Patrick</last></author>
+      <author id="jon-patrick"><first>Jon</first><last>Patrick</last></author>
       <pages>160–166</pages>
       <url hash="4029604d">U05-1023</url>
       <bibkey>zhang-patrick-2005-paraphrase</bibkey>
@@ -204,10 +204,10 @@
     </paper>
     <paper id="25">
       <title>Automatic Induction of a <fixed-case>POS</fixed-case> Tagset for <fixed-case>I</fixed-case>talian</title>
-      <author><first>Raffaella</first><last>Bernardi</last></author>
-      <author><first>Andrea</first><last>Bolognesi</last></author>
-      <author><first>Corrado</first><last>Seidenari</last></author>
-      <author><first>Fabio</first><last>Tamburini</last></author>
+      <author id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last></author>
+      <author id="andrea-bolognesi"><first>Andrea</first><last>Bolognesi</last></author>
+      <author id="corrado-seidenari"><first>Corrado</first><last>Seidenari</last></author>
+      <author id="fabio-tamburini"><first>Fabio</first><last>Tamburini</last></author>
       <pages>176–183</pages>
       <url hash="6fa8d3c1">U05-1025</url>
       <bibkey>bernardi-etal-2005-automatic</bibkey>
@@ -259,7 +259,7 @@
       <title>Design and Development of a Speech-driven Control for a In-car Personal Navigation System</title>
       <author><first>Ying</first><last>Su</last></author>
       <author><first>Tao</first><last>Bai</last></author>
-      <author><first>Catherine I.</first><last>Watson</last></author>
+      <author id="catherine-i-watson"><first>Catherine I.</first><last>Watson</last></author>
       <pages>224–232</pages>
       <url hash="487d14d5">U05-1031</url>
       <bibkey>su-etal-2005-design</bibkey>
diff --git a/data/xml/U06.xml b/data/xml/U06.xml
index 15b00f8d43..c014e0d218 100644
--- a/data/xml/U06.xml
+++ b/data/xml/U06.xml
@@ -17,7 +17,7 @@
     </frontmatter>
     <paper id="1">
       <title>Robust multimodal understanding for interactive systems</title>
-      <author><first>Michael</first><last>Johnston</last></author>
+      <author id="michael-johnston"><first>Michael</first><last>Johnston</last></author>
       <pages>1</pages>
       <url hash="8d03f7d8">U06-1001</url>
       <bibkey>johnston-2006-robust</bibkey>
@@ -32,7 +32,7 @@
     <paper id="3">
       <title>Efficient <fixed-case>C</fixed-case>ombinatory <fixed-case>C</fixed-case>ategorial <fixed-case>G</fixed-case>rammar Parsing</title>
       <author><first>Bojan</first><last>Djordjevic</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <pages>3-10</pages>
       <url hash="a7a8ce87">U06-1003</url>
       <bibkey>djordjevic-curran-2006-efficient</bibkey>
@@ -65,15 +65,15 @@
       <title>Classifying Speech Acts using Verbal Response Modes</title>
       <author><first>Andrew</first><last>Lampert</last></author>
       <author><first>Robert</first><last>Dale</last></author>
-      <author><first>Cécile</first><last>Paris</last></author>
+      <author id="cecile-paris"><first>Cécile</first><last>Paris</last></author>
       <pages>34-41</pages>
       <url hash="8f299115">U06-1007</url>
       <bibkey>lampert-etal-2006-classifying</bibkey>
     </paper>
     <paper id="8">
       <title>Word Relatives in Context for Word Sense Disambiguation</title>
-      <author><first>David</first><last>Martinez</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="david-martinez"><first>David</first><last>Martinez</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <author><first>Xinglong</first><last>Wang</last></author>
       <pages>42-50</pages>
       <url hash="4d8a3864">U06-1008</url>
@@ -81,8 +81,8 @@
     </paper>
     <paper id="9">
       <title>Named Entity Recognition for Question Answering</title>
-      <author><first>Diego</first><last>Mollá</last></author>
-      <author><first>Menno</first><last>van Zaanen</last></author>
+      <author id="diego-molla"><first>Diego</first><last>Mollá</last></author>
+      <author id="menno-van-zaanen"><first>Menno</first><last>van Zaanen</last></author>
       <author><first>Daniel</first><last>Smith</last></author>
       <pages>51-58</pages>
       <url hash="3b2d2685">U06-1009</url>
@@ -92,7 +92,7 @@
       <title>Named Entity Recognition for Astronomy Literature</title>
       <author><first>Tara</first><last>Murphy</last></author>
       <author><first>Tara</first><last>McIntosh</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <pages>59-66</pages>
       <url hash="e2bdd5b1">U06-1010</url>
       <bibkey>murphy-etal-2006-named</bibkey>
@@ -100,15 +100,15 @@
     <paper id="11">
       <title>Die Morphologie (f): Targeted Lexical Acquisition for Languages other than <fixed-case>E</fixed-case>nglish</title>
       <author><first>Jeremy</first><last>Nicholson</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
       <pages>67-74</pages>
       <url hash="b0f501c1">U06-1011</url>
       <bibkey>nicholson-etal-2006-die</bibkey>
     </paper>
     <paper id="12">
       <title>Automatic Mapping Clinical Notes to Medical Terminologies</title>
-      <author><first>Jon</first><last>Patrick</last></author>
+      <author id="jon-patrick"><first>Jon</first><last>Patrick</last></author>
       <author><first>Yefeng</first><last>Wang</last></author>
       <author><first>Peter</first><last>Budd</last></author>
       <pages>75-82</pages>
@@ -117,9 +117,9 @@
     </paper>
     <paper id="13">
       <title>Pseudo Relevance Feedback Using Named Entities for Question Answering</title>
-      <author><first>Luiz Augusto</first><last>Pizzato</last></author>
-      <author><first>Diego</first><last>Mollá</last></author>
-      <author><first>Cécile</first><last>Paris</last></author>
+      <author id="luiz-augusto-pizzato"><first>Luiz Augusto</first><last>Pizzato</last></author>
+      <author id="diego-molla"><first>Diego</first><last>Mollá</last></author>
+      <author id="cecile-paris"><first>Cécile</first><last>Paris</last></author>
       <pages>83-90</pages>
       <url hash="c33ad45b">U06-1013</url>
       <bibkey>pizzato-etal-2006-pseudo</bibkey>
@@ -133,7 +133,7 @@
     </paper>
     <paper id="15">
       <title>Web Readability and Computer-Assisted Language Learning</title>
-      <author><first>Alexandra L.</first><last>Uitdenbogerd</last></author>
+      <author id="alexandra-l-uitdenbogerd"><first>Alexandra L.</first><last>Uitdenbogerd</last></author>
       <pages>99-106</pages>
       <url hash="7cfbe950">U06-1015</url>
       <bibkey>uitdenbogerd-2006-web</bibkey>
@@ -167,7 +167,7 @@
       <author><first>Stephen</first><last>Wan</last></author>
       <author><first>Mark</first><last>Dras</last></author>
       <author><first>Robert</first><last>Dale</last></author>
-      <author><first>Cécile</first><last>Paris</last></author>
+      <author id="cecile-paris"><first>Cécile</first><last>Paris</last></author>
       <pages>131-138</pages>
       <url hash="1652a564">U06-1019</url>
       <bibkey>wan-etal-2006-using</bibkey>
@@ -175,7 +175,7 @@
     <paper id="20">
       <title>Verb Sense Disambiguation Using Selectional Preferences Extracted with a State-of-the-art Semantic Role Labeler</title>
       <author><first>Patrick</first><last>Ye</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>139-148</pages>
       <url hash="e60990c4">U06-1020</url>
       <bibkey>ye-baldwin-2006-verb</bibkey>
@@ -191,7 +191,7 @@
     <paper id="22">
       <title>Analysis and Prediction of User Behaviour in a Museum Environment</title>
       <author><first>Karl</first><last>Grieser</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Steven</first><last>Bird</last></author>
       <pages>157-158</pages>
       <url hash="049bded4">U06-1022</url>
@@ -206,7 +206,7 @@
     </paper>
     <paper id="24">
       <title>Probabilities improve stress-prediction in a <fixed-case>CFG</fixed-case> of <fixed-case>H</fixed-case>awaiian phonology</title>
-      <author><first>‘Ōiwi</first><last>Parker Jones</last></author>
+      <author id="oiwi-parker-jones"><first>‘Ōiwi</first><last>Parker Jones</last></author>
       <pages>161-162</pages>
       <url hash="924ffd2a">U06-1024</url>
       <bibkey>parker-jones-2006-probabilities</bibkey>
@@ -215,7 +215,7 @@
       <title>Towards Cognitive Optimisation of a Search Engine Interface</title>
       <author><first>Kenneth</first><last>Treharne</last></author>
       <author><first>Darius</first><last>Pfitzner</last></author>
-      <author><first>David M W</first><last>Powers</last></author>
+      <author id="david-m-w-powers"><first>David M W</first><last>Powers</last></author>
       <pages>163-164</pages>
       <url hash="5fdd796e">U06-1025</url>
       <bibkey>treharne-etal-2006-towards</bibkey>
@@ -233,7 +233,7 @@
     <paper id="27">
       <title>Extracting Patient Clinical Profiles from Case Reports</title>
       <author><first>Yitao</first><last>Zhang</last></author>
-      <author><first>Jon</first><last>Patrick</last></author>
+      <author id="jon-patrick"><first>Jon</first><last>Patrick</last></author>
       <pages>167-168</pages>
       <url hash="9b929ba7">U06-1027</url>
       <bibkey>zhang-patrick-2006-extracting</bibkey>
diff --git a/data/xml/U07.xml b/data/xml/U07.xml
index 010ff7d4ce..f1361cff5b 100644
--- a/data/xml/U07.xml
+++ b/data/xml/U07.xml
@@ -47,14 +47,14 @@
     <paper id="5">
       <title>Measuring Correlation Between Linguist’s Judgments and <fixed-case>L</fixed-case>atent <fixed-case>D</fixed-case>irichlet <fixed-case>A</fixed-case>llocation Topics</title>
       <author><first>Ari</first><last>Chanen</last></author>
-      <author><first>Jon</first><last>Patrick</last></author>
+      <author id="jon-patrick"><first>Jon</first><last>Patrick</last></author>
       <pages>13–20</pages>
       <url hash="cd54f3b0">U07-1005</url>
       <bibkey>chanen-patrick-2007-measuring</bibkey>
     </paper>
     <paper id="6">
       <title><fixed-case>TAT</fixed-case>: An Author Profiling Tool with Application to <fixed-case>A</fixed-case>rabic Emails</title>
-      <author><first>Dominique</first><last>Estival</last></author>
+      <author id="dominique-estival"><first>Dominique</first><last>Estival</last></author>
       <author><first>Tanja</first><last>Gaustad</last></author>
       <author><first>Son Bao</first><last>Pham</last></author>
       <author><first>Will</first><last>Radford</last></author>
@@ -75,7 +75,7 @@
       <title>Practical Queries of a Massive n-gram Database</title>
       <author><first>Tobias</first><last>Hawker</last></author>
       <author><first>Mary</first><last>Gardiner</last></author>
-      <author><first>Andrew</first><last>Bennetts</last></author>
+      <author id="andrew-bennetts"><first>Andrew</first><last>Bennetts</last></author>
       <pages>40–48</pages>
       <url hash="2d819850">U07-1008</url>
       <bibkey>hawker-etal-2007-practical</bibkey>
@@ -84,15 +84,15 @@
       <title>Extending Sense Collocations in Interpreting Noun Compounds</title>
       <author><first>Su Nam</first><last>Kim</last></author>
       <author><first>Meladel</first><last>Mistica</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>49–56</pages>
       <url hash="e9ca7a54">U07-1009</url>
       <bibkey>kim-etal-2007-extending</bibkey>
     </paper>
     <paper id="10">
       <title>Named Entity Recognition in Question Answering of Speech Data</title>
-      <author><first>Diego</first><last>Mollá</last></author>
-      <author><first>Menno</first><last>van Zaanen</last></author>
+      <author id="diego-molla"><first>Diego</first><last>Mollá</last></author>
+      <author id="menno-van-zaanen"><first>Menno</first><last>van Zaanen</last></author>
       <author><first>Steve</first><last>Cassidy</last></author>
       <pages>57–65</pages>
       <url hash="0c5cbb8e">U07-1010</url>
@@ -101,7 +101,7 @@
     <paper id="11">
       <title>Experiments in Mutual Exclusion Bootstrapping</title>
       <author><first>Tara</first><last>Murphy</last></author>
-      <author><first>James</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James</first><last>Curran</last></author>
       <pages>66–74</pages>
       <url hash="0550d6ed">U07-1011</url>
       <bibkey>murphy-curran-2007-experiments</bibkey>
@@ -125,8 +125,8 @@
     </paper>
     <paper id="14">
       <title>Question Prediction Language Model</title>
-      <author><first>Luiz Augusto</first><last>Pizzato</last></author>
-      <author><first>Diego</first><last>Mollá</last></author>
+      <author id="luiz-augusto-pizzato"><first>Luiz Augusto</first><last>Pizzato</last></author>
+      <author id="diego-molla"><first>Diego</first><last>Mollá</last></author>
       <pages>92–99</pages>
       <url hash="f929da8e">U07-1014</url>
       <bibkey>pizzato-molla-2007-question</bibkey>
@@ -144,7 +144,7 @@
     <paper id="16">
       <title>Parsing Internal Noun Phrase Structure with Collins’ Models</title>
       <author><first>David</first><last>Vadas</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <pages>109–116</pages>
       <url hash="46e176a4">U07-1016</url>
       <bibkey>vadas-curran-2007-parsing</bibkey>
@@ -160,7 +160,7 @@
     <paper id="18">
       <title>Dictionary Alignment for Context-sensitive Word Glossing</title>
       <author><first>Willy</first><last>Yap</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>125–133</pages>
       <url hash="f22a9a8e">U07-1018</url>
       <bibkey>yap-baldwin-2007-dictionary</bibkey>
@@ -177,7 +177,7 @@
       <title>Exploring Extensions to Machine-learning based Gene Normalisation</title>
       <author><first>Benjamin</first><last>Goudey</last></author>
       <author><first>Nicola</first><last>Stokes</last></author>
-      <author><first>David</first><last>Martinez</last></author>
+      <author id="david-martinez"><first>David</first><last>Martinez</last></author>
       <pages>143–145</pages>
       <url hash="6088e372">U07-1020</url>
       <bibkey>goudey-etal-2007-exploring</bibkey>
@@ -185,7 +185,7 @@
     <paper id="21">
       <title>Distributional Similarity of Multi-Word Expressions</title>
       <author><first>Laura</first><last>Ingram</last></author>
-      <author><first>James</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James</first><last>Curran</last></author>
       <pages>146–148</pages>
       <url hash="ce036004">U07-1021</url>
       <bibkey>ingram-curran-2007-distributional</bibkey>
@@ -193,7 +193,7 @@
     <paper id="22">
       <title>Extending <fixed-case>CCG</fixed-case>bank with Quotes and Multi-modal <fixed-case>CCG</fixed-case></title>
       <author><first>Daniel</first><last>Tse</last></author>
-      <author><first>James</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James</first><last>Curran</last></author>
       <pages>149–151</pages>
       <url hash="981e8f73">U07-1022</url>
       <bibkey>tse-curran-2007-extending</bibkey>
diff --git a/data/xml/U08.xml b/data/xml/U08.xml
index f2870628b7..6299d337f9 100644
--- a/data/xml/U08.xml
+++ b/data/xml/U08.xml
@@ -25,14 +25,14 @@
     <paper id="2">
       <title>Answer Attenuation in Question Answering</title>
       <author><first>Katie</first><last>Bell</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <pages>2–10</pages>
       <url hash="3fdefe0c">U08-1002</url>
       <bibkey>bell-curran-2008-answer</bibkey>
     </paper>
     <paper id="3">
       <title>Using Multiple Sources of Agreement Information for Sentiment Classification of Political Transcripts</title>
-      <author><first>Clint</first><last>Burfoot</last></author>
+      <author id="clint-burfoot"><first>Clint</first><last>Burfoot</last></author>
       <pages>11–18</pages>
       <url hash="dc4c43a1">U08-1003</url>
       <bibkey>burfoot-2008-using</bibkey>
@@ -40,7 +40,7 @@
     <paper id="4">
       <title>All-Topology, Semi-Abstract Syntactic Features for Text Categorization</title>
       <author><first>Ari</first><last>Chanen</last></author>
-      <author><first>Jon</first><last>Patrick</last></author>
+      <author id="jon-patrick"><first>Jon</first><last>Patrick</last></author>
       <pages>19–27</pages>
       <url hash="0b24f539">U08-1004</url>
       <bibkey>chanen-patrick-2008-topology</bibkey>
@@ -57,7 +57,7 @@
     <paper id="6">
       <title>Automatic Acquisition of Training Data for Statistical Parsers</title>
       <author><first>Susan</first><last>Howlett</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <pages>37–45</pages>
       <url hash="6bacd471">U08-1006</url>
       <bibkey>howlett-curran-2008-automatic</bibkey>
@@ -73,7 +73,7 @@
     <paper id="8">
       <title>Classification of Verb Particle Constructions with the <fixed-case>G</fixed-case>oogle <fixed-case>W</fixed-case>eb1<fixed-case>T</fixed-case> Corpus</title>
       <author><first>Jonathan K.</first><last>Kummerfeld</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <pages>55–63</pages>
       <url hash="b66a2fb4">U08-1008</url>
       <bibkey>kummerfeld-curran-2008-classification</bibkey>
@@ -82,7 +82,7 @@
       <title>Requests and Commitments in Email are More Complex Than You Think: Eight Reasons to be Cautious</title>
       <author><first>Andrew</first><last>Lampert</last></author>
       <author><first>Robert</first><last>Dale</last></author>
-      <author><first>Cécile</first><last>Paris</last></author>
+      <author id="cecile-paris"><first>Cécile</first><last>Paris</last></author>
       <pages>64–72</pages>
       <url hash="ae0f40ec">U08-1009</url>
       <bibkey>lampert-etal-2008-requests</bibkey>
@@ -99,7 +99,7 @@
     <paper id="11">
       <title>Automatic Event Reference Identification</title>
       <author><first>Olivia</first><last>March</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>79–87</pages>
       <url hash="49d117de">U08-1011</url>
       <bibkey>march-baldwin-2008-automatic</bibkey>
@@ -107,7 +107,7 @@
     <paper id="12">
       <title>Comparing the Value of Latent Semantic Analysis on two <fixed-case>E</fixed-case>nglish-to-<fixed-case>I</fixed-case>ndonesian lexical mapping tasks</title>
       <author><first>Eliza</first><last>Margaretha</last></author>
-      <author><first>Ruli</first><last>Manurung</last></author>
+      <author id="ruli-manurung"><first>Ruli</first><last>Manurung</last></author>
       <pages>88–96</pages>
       <url hash="d0313e84">U08-1012</url>
       <bibkey>margaretha-manurung-2008-comparing</bibkey>
@@ -115,7 +115,7 @@
     <paper id="13">
       <title>Weighted Mutual Exclusion Bootstrapping for Domain Independent Lexicon and Template Acquisition</title>
       <author><first>Tara</first><last>McIntosh</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <pages>97–105</pages>
       <url hash="06f5cc69">U08-1013</url>
       <bibkey>mcintosh-curran-2008-weighted</bibkey>
@@ -125,7 +125,7 @@
       <author><first>Dominick</first><last>Ng</last></author>
       <author><first>David J.</first><last>Kedziora</last></author>
       <author><first>Terry T. W.</first><last>Miu</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <pages>106–114</pages>
       <url hash="b4fc1628">U08-1014</url>
       <bibkey>ng-etal-2008-investigating</bibkey>
@@ -133,7 +133,7 @@
     <paper id="15">
       <title>Learning Count Classifier Preferences of <fixed-case>M</fixed-case>alay Nouns</title>
       <author><first>Jeremy</first><last>Nicholson</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>115–123</pages>
       <url hash="8c22047a">U08-1015</url>
       <bibkey>nicholson-baldwin-2008-learning</bibkey>
@@ -141,7 +141,7 @@
     <paper id="16">
       <title>Transforming <fixed-case>W</fixed-case>ikipedia into Named Entity Training Data</title>
       <author><first>Joel</first><last>Nothman</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <author><first>Tara</first><last>Murphy</last></author>
       <pages>124–132</pages>
       <url hash="5a51cc6a">U08-1016</url>
@@ -149,10 +149,10 @@
     </paper>
     <paper id="17">
       <title>Fit it in but say it well!</title>
-      <author><first>Cécile</first><last>Paris</last></author>
+      <author id="cecile-paris"><first>Cécile</first><last>Paris</last></author>
       <author><first>Nathalie</first><last>Colineau</last></author>
       <author><first>Andrew</first><last>Lampert</last></author>
-      <author><first>Joan Giralt</first><last>Duran</last></author>
+      <author id="joan-giralt-duran"><first>Joan Giralt</first><last>Duran</last></author>
       <pages>133–141</pages>
       <url hash="3a8e8991">U08-1017</url>
       <bibkey>paris-etal-2008-fit</bibkey>
@@ -161,7 +161,7 @@
       <title>A Two-Level Morphological Analyser for the <fixed-case>I</fixed-case>ndonesian Language</title>
       <author><first>Femphy</first><last>Pisceldo</last></author>
       <author><first>Rahmad</first><last>Mahendra</last></author>
-      <author><first>Ruli</first><last>Manurung</last></author>
+      <author id="ruli-manurung"><first>Ruli</first><last>Manurung</last></author>
       <author><first>I Wayan</first><last>Arka</last></author>
       <pages>142–150</pages>
       <url hash="235c4045">U08-1018</url>
@@ -170,7 +170,7 @@
     <paper id="19">
       <title>Punctuation Normalisation for Cleaner Treebanks and Parsers</title>
       <author><first>Daniel</first><last>Tse</last></author>
-      <author><first>James</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James</first><last>Curran</last></author>
       <pages>151–159</pages>
       <url hash="d87030d2">U08-1019</url>
       <bibkey>tse-curran-2008-punctuation</bibkey>
diff --git a/data/xml/U09.xml b/data/xml/U09.xml
index 36e19553a8..55e9b3bd26 100644
--- a/data/xml/U09.xml
+++ b/data/xml/U09.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the Australasian Language Technology Association Workshop 2009</booktitle>
       <url hash="09ebaa34">U09-1</url>
-      <editor><first>Luiz Augusto</first><last>Pizzato</last></editor>
+      <editor id="luiz-augusto-pizzato"><first>Luiz Augusto</first><last>Pizzato</last></editor>
       <editor><first>Rolf</first><last>Schwitter</last></editor>
       <address>Sydney, Australia</address>
       <month>December</month>
@@ -18,7 +18,7 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>HCSN</fixed-case>et Plenary Talk: Spoken Dialogue Models for Virtual Humans</title>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <pages>1–1</pages>
       <url hash="1c9babd0">U09-1001</url>
       <bibkey>traum-2009-hcsnet</bibkey>
@@ -34,7 +34,7 @@
       <title>Tracking Information Flow in Financial Text</title>
       <author><first>Will</first><last>Radford</last></author>
       <author><first>Ben</first><last>Hachey</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <author><first>Maria</first><last>Milosavljevic</last></author>
       <pages>11–19</pages>
       <url hash="4758006a">U09-1003</url>
@@ -45,7 +45,7 @@
       <author><first>Nicky</first><last>Ringland</last></author>
       <author><first>Joel</first><last>Nothman</last></author>
       <author><first>Tara</first><last>Murphy</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <pages>20–28</pages>
       <url hash="0eab2cfe">U09-1004</url>
       <bibkey>ringland-etal-2009-classifying</bibkey>
@@ -60,7 +60,7 @@
     <paper id="6">
       <title>Corpus-based Extraction of <fixed-case>J</fixed-case>apanese Compound Verbs</title>
       <author><first>James</first><last>Breen</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>35–43</pages>
       <url hash="6c51b043">U09-1006</url>
       <bibkey>breen-baldwin-2009-corpus</bibkey>
@@ -69,7 +69,7 @@
       <title>Double Double, Morphology and Trouble: Looking into Reduplication in <fixed-case>I</fixed-case>ndonesian</title>
       <author><first>Meladel</first><last>Mistica</last></author>
       <author><first>I Wayan</first><last>Arka</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Avery</first><last>Andrews</last></author>
       <pages>44–52</pages>
       <url hash="ef239ea0">U09-1007</url>
@@ -87,7 +87,7 @@
       <title>Faster parsing and supertagging model estimation</title>
       <author><first>Jonathan K.</first><last>Kummerfeld</last></author>
       <author><first>Jessika</first><last>Roesner</last></author>
-      <author><first>James</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James</first><last>Curran</last></author>
       <pages>62–70</pages>
       <url hash="9b2a26e0">U09-1009</url>
       <bibkey>kummerfeld-etal-2009-faster</bibkey>
@@ -95,7 +95,7 @@
     <paper id="10">
       <title><fixed-case>CCG</fixed-case> parsing with one syntactic structure per n-gram</title>
       <author><first>Tim</first><last>Dawborn</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <pages>71–79</pages>
       <url hash="b59d6f6d">U09-1010</url>
       <bibkey>dawborn-curran-2009-ccg</bibkey>
@@ -112,7 +112,7 @@
       <title>A Sentiment Detection Engine for <fixed-case>I</fixed-case>nternet Stock Message Boards</title>
       <author><first>Christopher</first><last>Chua</last></author>
       <author><first>Maria</first><last>Milosavljevic</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <pages>89–93</pages>
       <url hash="61667f26">U09-1012</url>
       <bibkey>chua-etal-2009-sentiment</bibkey>
@@ -120,7 +120,7 @@
     <paper id="13">
       <title>Extracting Domain-Specific Words - A Statistical Approach</title>
       <author><first>Su Nam</first><last>Kim</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Min-Yen</first><last>Kan</last></author>
       <pages>94–98</pages>
       <url hash="1898803b">U09-1013</url>
@@ -128,7 +128,7 @@
     </paper>
     <paper id="14">
       <title>A Cascade Approach to Extracting Medication Events</title>
-      <author><first>Jon</first><last>Patrick</last></author>
+      <author id="jon-patrick"><first>Jon</first><last>Patrick</last></author>
       <author><first>Min</first><last>Li</last></author>
       <pages>99–103</pages>
       <url hash="6d1d0b35">U09-1014</url>
@@ -137,7 +137,7 @@
     <paper id="15">
       <title>Improved Text Categorisation for <fixed-case>W</fixed-case>ikipedia Named Entities</title>
       <author><first>Sam</first><last>Tardif</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <author><first>Tara</first><last>Murphy</last></author>
       <pages>104–108</pages>
       <url hash="ec311a24">U09-1015</url>
@@ -146,7 +146,7 @@
     <paper id="16">
       <title>Towards a flexible platform for voice accent and expression selection on a Healthcare Robot</title>
       <author><first>Aleksandar</first><last>Igic</last></author>
-      <author><first>Catherine</first><last>Watson</last></author>
+      <author id="catherine-i-watson"><first>Catherine</first><last>Watson</last></author>
       <author><first>Jonathan</first><last>Teutenberg</last></author>
       <author><first>Elizabeth</first><last>Broadbent</last></author>
       <author><first>Rie</first><last>Tamagawa</last></author>
@@ -158,7 +158,7 @@
     <paper id="17">
       <title>Integrating Verb-Particle Constructions into <fixed-case>CCG</fixed-case> Parsing</title>
       <author><first>James</first><last>Constable</last></author>
-      <author><first>James</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James</first><last>Curran</last></author>
       <pages>114–118</pages>
       <url hash="3783c190">U09-1017</url>
       <bibkey>constable-curran-2009-integrating</bibkey>
diff --git a/data/xml/U10.xml b/data/xml/U10.xml
index ab73240546..2d4b961c16 100644
--- a/data/xml/U10.xml
+++ b/data/xml/U10.xml
@@ -18,7 +18,7 @@
     </frontmatter>
     <paper id="1">
       <title>Opinion Mining, Subjectivity and Factuality</title>
-      <author><first>Rodolfo</first><last>Delmonte</last></author>
+      <author id="rodolfo-delmonte"><first>Rodolfo</first><last>Delmonte</last></author>
       <pages>2–2</pages>
       <url hash="71b1c6c4">U10-1001</url>
       <bibkey>delmonte-2010-opinion</bibkey>
@@ -32,7 +32,7 @@
     </paper>
     <paper id="3">
       <title>Multilingual Language Identification: <fixed-case>ALTW</fixed-case> 2010 Shared Task Data</title>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Marco</first><last>Lui</last></author>
       <pages>4–7</pages>
       <url hash="e7777d6d">U10-1003</url>
@@ -57,7 +57,7 @@
       <title>Thread-level Analysis over Technical User Forum Data</title>
       <author><first>Li</first><last>Wang</last></author>
       <author><first>Su Nam</first><last>Kim</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>27–31</pages>
       <url hash="a2343984">U10-1006</url>
       <bibkey>wang-etal-2010-thread</bibkey>
@@ -73,8 +73,8 @@
     </paper>
     <paper id="8">
       <title>Information Extraction of Multiple Categories from Pathology Reports</title>
-      <author id="yue-li"><first>Yue</first><last>Li</last></author>
-      <author><first>David</first><last>Martinez</last></author>
+      <author><first>Yue</first><last>Li</last></author>
+      <author id="david-martinez"><first>David</first><last>Martinez</last></author>
       <pages>41–48</pages>
       <url hash="c8b7a99f">U10-1008</url>
       <bibkey>li-martinez-2010-information</bibkey>
@@ -82,14 +82,14 @@
     <paper id="9">
       <title>Classifying User Forum Participants: Separating the Gurus from the Hacks, and Other Tales of the <fixed-case>I</fixed-case>nternet</title>
       <author><first>Marco</first><last>Lui</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>49–57</pages>
       <url hash="1dd6bf43">U10-1009</url>
       <bibkey>lui-baldwin-2010-classifying</bibkey>
     </paper>
     <paper id="10">
       <title>Fun with Filtering <fixed-case>F</fixed-case>rench</title>
-      <author><first>Alexandra L.</first><last>Uitdenbogerd</last></author>
+      <author id="alexandra-l-uitdenbogerd"><first>Alexandra L.</first><last>Uitdenbogerd</last></author>
       <pages>58–66</pages>
       <url hash="f218e3ec">U10-1010</url>
       <bibkey>uitdenbogerd-2010-fun</bibkey>
@@ -104,7 +104,7 @@
     </paper>
     <paper id="12">
       <title>A Corpus for Evidence Based Medicine Summarisation</title>
-      <author><first>Diego</first><last>Molla</last></author>
+      <author id="diego-molla"><first>Diego</first><last>Molla</last></author>
       <pages>76–80</pages>
       <url hash="a7083c99">U10-1012</url>
       <bibkey>molla-2010-corpus</bibkey>
@@ -121,7 +121,7 @@
       <title>Reranking a wide-coverage ccg parser</title>
       <author><first>Dominick</first><last>Ng</last></author>
       <author><first>Matthew</first><last>Honnibal</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <pages>90–98</pages>
       <url hash="8babd56d">U10-1014</url>
       <bibkey>ng-etal-2010-reranking</bibkey>
diff --git a/data/xml/U11.xml b/data/xml/U11.xml
index 7b932c3761..e94ada9a9c 100644
--- a/data/xml/U11.xml
+++ b/data/xml/U11.xml
@@ -4,8 +4,8 @@
     <meta>
       <booktitle>Proceedings of the Australasian Language Technology Association Workshop 2011</booktitle>
       <url hash="31667fe7">U11-1</url>
-      <editor><first>Diego</first><last>Molla</last></editor>
-      <editor><first>David</first><last>Martinez</last></editor>
+      <editor id="diego-molla"><first>Diego</first><last>Molla</last></editor>
+      <editor id="david-martinez"><first>David</first><last>Martinez</last></editor>
       <address>Canberra, Australia</address>
       <month>December</month>
       <year>2011</year>
@@ -24,7 +24,7 @@
     </paper>
     <paper id="2">
       <title><fixed-case>O</fixed-case>z<fixed-case>CLO</fixed-case>: The <fixed-case>A</fixed-case>ustralian Computational Linguistic Olympiad</title>
-      <author><first>Dominique</first><last>Estival</last></author>
+      <author id="dominique-estival"><first>Dominique</first><last>Estival</last></author>
       <pages>3–3</pages>
       <url hash="3e781c0b">U11-1002</url>
       <bibkey>estival-2011-ozclo</bibkey>
@@ -39,7 +39,7 @@
     </paper>
     <paper id="4">
       <title>A Particle Filter algorithm for <fixed-case>B</fixed-case>ayesian Wordsegmentation</title>
-      <author><first>Benjamin</first><last>Börschinger</last></author>
+      <author id="benjamin-borschinger"><first>Benjamin</first><last>Börschinger</last></author>
       <author><first>Mark</first><last>Johnson</last></author>
       <pages>10–18</pages>
       <url hash="687210b4">U11-1004</url>
@@ -47,9 +47,9 @@
     </paper>
     <paper id="5">
       <title>Formalizing Semantic Parsing with Tree Transducers</title>
-      <author><first>Bevan</first><last>Jones</last></author>
+      <author id="bevan-jones"><first>Bevan</first><last>Jones</last></author>
       <author><first>Mark</first><last>Johnson</last></author>
-      <author><first>Sharon</first><last>Goldwater</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
       <pages>19–28</pages>
       <url hash="b6af6bd4">U11-1005</url>
       <bibkey>jones-etal-2011-formalizing</bibkey>
@@ -89,7 +89,7 @@
     <paper id="10">
       <title>Frontier Pruning for Shift-Reduce <fixed-case>CCG</fixed-case> Parsing</title>
       <author><first>Stephen</first><last>Merity</last></author>
-      <author><first>James</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James</first><last>Curran</last></author>
       <pages>66–75</pages>
       <url hash="fbec9702">U11-1010</url>
       <bibkey>merity-curran-2011-frontier</bibkey>
@@ -97,8 +97,8 @@
     <paper id="11">
       <title>Predicting Thread Linking Structure by Lexical Chaining</title>
       <author><first>Li</first><last>Wang</last></author>
-      <author><first>Diana</first><last>McCarthy</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>76–85</pages>
       <url hash="204639fd">U11-1011</url>
       <bibkey>wang-etal-2011-predicting-thread</bibkey>
@@ -115,7 +115,7 @@
       <title>Collocations in Multilingual Natural Language Generation: Lexical Functions meet <fixed-case>L</fixed-case>exical <fixed-case>F</fixed-case>unctional <fixed-case>G</fixed-case>rammar</title>
       <author><first>François</first><last>Lareau</last></author>
       <author><first>Mark</first><last>Dras</last></author>
-      <author><first>Benjamin</first><last>Börschinger</last></author>
+      <author id="benjamin-borschinger"><first>Benjamin</first><last>Börschinger</last></author>
       <author><first>Robert</first><last>Dale</last></author>
       <pages>95–104</pages>
       <url hash="aeff3f85">U11-1013</url>
@@ -125,7 +125,7 @@
       <title>Outcome Polarity Identification of Medical Papers</title>
       <author><first>Abeed</first><last>Sarker</last></author>
       <author><first>Diego</first><last>Molla</last></author>
-      <author><first>Cécile</first><last>Paris</last></author>
+      <author id="cecile-paris"><first>Cécile</first><last>Paris</last></author>
       <pages>105–114</pages>
       <url hash="f3842db5">U11-1014</url>
       <bibkey>sarker-etal-2011-outcome</bibkey>
diff --git a/data/xml/U12.xml b/data/xml/U12.xml
index e8285f8c53..7b0135678d 100644
--- a/data/xml/U12.xml
+++ b/data/xml/U12.xml
@@ -17,7 +17,7 @@
     </frontmatter>
     <paper id="1">
       <title>Using a large annotated historical corpus to study word-specific effects in sound change</title>
-      <author><first>Jennifer</first><last>Hay</last></author>
+      <author id="jennifer-hay"><first>Jennifer</first><last>Hay</last></author>
       <pages>2–2</pages>
       <url hash="a981bad7">U12-1001</url>
       <bibkey>hay-2012-using</bibkey>
@@ -54,7 +54,7 @@
       <author><first>Teresa</first><last>Lynn</last></author>
       <author><first>Jennifer</first><last>Foster</last></author>
       <author><first>Mark</first><last>Dras</last></author>
-      <author><first>Elaine</first><last>Uí Dhonnchadha</last></author>
+      <author id="elaine-ui-dhonnchadha"><first>Elaine</first><last>Uí Dhonnchadha</last></author>
       <pages>23–32</pages>
       <url hash="459b5025">U12-1005</url>
       <bibkey>lynn-etal-2012-active</bibkey>
@@ -62,8 +62,8 @@
     <paper id="6">
       <title>Unsupervised Estimation of Word Usage Similarity</title>
       <author><first>Marco</first><last>Lui</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
-      <author><first>Diana</first><last>McCarthy</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
       <pages>33–41</pages>
       <url hash="ae7dc370">U12-1006</url>
       <bibkey>lui-etal-2012-unsupervised</bibkey>
@@ -87,7 +87,7 @@
     <paper id="9">
       <title>Segmentation and Translation of <fixed-case>J</fixed-case>apanese Multi-word Loanwords</title>
       <author><first>James</first><last>Breen</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Francis</first><last>Bond</last></author>
       <pages>61–69</pages>
       <url hash="1fb823bf">U12-1009</url>
@@ -98,7 +98,7 @@
     <paper id="10">
       <title>Measurement of Progress in Machine Translation</title>
       <author><first>Yvette</first><last>Graham</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Aaron</first><last>Harwood</last></author>
       <author><first>Alistair</first><last>Moffat</last></author>
       <author><first>Justin</first><last>Zobel</last></author>
@@ -109,8 +109,8 @@
     <paper id="11">
       <title>Towards Two-step Multi-document Summarisation for Evidence Based Medicine: A Quantitative Analysis</title>
       <author><first>Abeed</first><last>Sarker</last></author>
-      <author><first>Diego</first><last>Mollá-Aliod</last></author>
-      <author><first>Cécile</first><last>Paris</last></author>
+      <author id="diego-molla"><first>Diego</first><last>Mollá-Aliod</last></author>
+      <author id="cecile-paris"><first>Cécile</first><last>Paris</last></author>
       <pages>79–87</pages>
       <url hash="044f8307">U12-1011</url>
       <bibkey>sarker-etal-2012-towards</bibkey>
@@ -119,7 +119,7 @@
       <title>In Your Eyes: Identifying Clichés in Song Lyrics</title>
       <author><first>Alex G.</first><last>Smith</last></author>
       <author><first>Christopher X. S.</first><last>Zee</last></author>
-      <author><first>Alexandra L.</first><last>Uitdenbogerd</last></author>
+      <author id="alexandra-l-uitdenbogerd"><first>Alexandra L.</first><last>Uitdenbogerd</last></author>
       <pages>88–96</pages>
       <url hash="b81c19fb">U12-1012</url>
       <bibkey>smith-etal-2012-eyes</bibkey>
@@ -144,7 +144,7 @@
     <paper id="15">
       <title><fixed-case>L</fixed-case>a<fixed-case>BB</fixed-case>-<fixed-case>CAT</fixed-case>: an Annotation Store</title>
       <author><first>Robert</first><last>Fromont</last></author>
-      <author><first>Jennifer</first><last>Hay</last></author>
+      <author id="jennifer-hay"><first>Jennifer</first><last>Hay</last></author>
       <pages>113–117</pages>
       <url hash="3c187367">U12-1015</url>
       <bibkey>fromont-hay-2012-labb</bibkey>
@@ -152,9 +152,9 @@
     <paper id="16">
       <title>Classification of Study Region in Environmental Science Abstracts</title>
       <author><first>Jared</first><last>Willett</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
-      <author><first>David</first><last>Martinez</last></author>
-      <author><first>Angus</first><last>Webb</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
+      <author id="david-martinez"><first>David</first><last>Martinez</last></author>
+      <author id="j-angus-webb"><first>Angus</first><last>Webb</last></author>
       <pages>118–122</pages>
       <url hash="e69903d8">U12-1016</url>
       <bibkey>willett-etal-2012-classification</bibkey>
@@ -162,8 +162,8 @@
     <paper id="17">
       <title>Overview of the <fixed-case>ALTA</fixed-case> 2012 Shared Task</title>
       <author><first>Iman</first><last>Amini</last></author>
-      <author><first>David</first><last>Martinez</last></author>
-      <author><first>Diego</first><last>Molla</last></author>
+      <author id="david-martinez"><first>David</first><last>Martinez</last></author>
+      <author id="diego-molla"><first>Diego</first><last>Molla</last></author>
       <pages>124–129</pages>
       <url hash="cea484c6">U12-1017</url>
       <bibkey>amini-etal-2012-overview</bibkey>
@@ -171,7 +171,7 @@
     <paper id="18">
       <title>Automatic sentence classifier using sentence ordering features for Event Based Medicine: Shared task system description</title>
       <author><first>Spandana</first><last>Gella</last></author>
-      <author><last>Duong Thanh</last><first>Long</first></author>
+      <author id="long-duong"><first>Long</first><last>Duong Thanh</last></author>
       <pages>130–133</pages>
       <url hash="4fc31cb4">U12-1018</url>
       <bibkey>gella-duong-thanh-2012-automatic</bibkey>
@@ -185,7 +185,7 @@
     </paper>
     <paper id="20">
       <title>Experiments with Clustering-based Features for Sentence Classification in Medical Publications: <fixed-case>M</fixed-case>acquarie Test’s participation in the <fixed-case>ALTA</fixed-case> 2012 shared task.</title>
-      <author><first>Diego</first><last>Mollá</last></author>
+      <author id="diego-molla"><first>Diego</first><last>Mollá</last></author>
       <pages>139–142</pages>
       <url hash="e48043c0">U12-1020</url>
       <bibkey>molla-2012-experiments</bibkey>
diff --git a/data/xml/U13.xml b/data/xml/U13.xml
index f35125e49a..cb4995ac5a 100644
--- a/data/xml/U13.xml
+++ b/data/xml/U13.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the Australasian Language Technology Association Workshop 2013 (<fixed-case>ALTA</fixed-case> 2013)</booktitle>
       <url hash="4965ad50">U13-1</url>
       <editor><first>Sarvnaz</first><last>Karimi</last></editor>
-      <editor><first>Karin</first><last>Verspoor</last></editor>
+      <editor id="karin-verspoor"><first>Karin</first><last>Verspoor</last></editor>
       <address>Brisbane, Australia</address>
       <month>December</month>
       <year>2013</year>
@@ -17,14 +17,14 @@
     </frontmatter>
     <paper id="1">
       <title>Robust Computational Semantics</title>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>2–2</pages>
       <url hash="b6a1bda5">U13-1001</url>
       <bibkey>steedman-2013-robust</bibkey>
     </paper>
     <paper id="2">
       <title>Concurrent Discourse Relations</title>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <pages>3–3</pages>
       <url hash="0c6c7ad7">U13-1002</url>
       <bibkey>webber-2013-concurrent</bibkey>
@@ -40,7 +40,7 @@
     <paper id="4">
       <title>Crowd-Sourcing of Human Judgments of Machine Translation Fluency</title>
       <author><first>Yvette</first><last>Graham</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Alistair</first><last>Moffat</last></author>
       <author><first>Justin</first><last>Zobel</last></author>
       <pages>16–24</pages>
@@ -64,9 +64,9 @@
     </paper>
     <paper id="7">
       <title>Examining the Impact of Coreference Resolution on Quote Attribution</title>
-      <author><first>Tim</first><last>O’Keefe</last></author>
+      <author id="tim-okeefe"><first>Tim</first><last>O’Keefe</last></author>
       <author><first>Kellie</first><last>Webster</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <author><first>Irena</first><last>Koprinska</last></author>
       <pages>43–52</pages>
       <url hash="1633bd4e">U13-1007</url>
@@ -76,8 +76,8 @@
       <title>Multi-Objective Optimization for Clustering of Medical Publications</title>
       <author><first>Asif</first><last>Ekbal</last></author>
       <author><first>Sriparna</first><last>Saha</last></author>
-      <author><first>Diego</first><last>Mollá</last></author>
-      <author><first>K</first><last>Ravikumar</last></author>
+      <author id="diego-molla"><first>Diego</first><last>Mollá</last></author>
+      <author id="ravikumar-komandur"><first>K</first><last>Ravikumar</last></author>
       <pages>53–61</pages>
       <url hash="e089c188">U13-1008</url>
       <bibkey>ekbal-etal-2013-multi</bibkey>
@@ -85,7 +85,7 @@
     <paper id="9">
       <title>A Study: From Electronic Laboratory Notebooks to Generated Queries for Literature Recommendation</title>
       <author><first>Oldooz</first><last>Dianat</last></author>
-      <author><first>Cécile</first><last>Paris</last></author>
+      <author id="cecile-paris"><first>Cécile</first><last>Paris</last></author>
       <author><first>Stephen</first><last>Wan</last></author>
       <pages>62–70</pages>
       <url hash="e7a19d90">U13-1009</url>
@@ -111,7 +111,7 @@
       <title>Impact of Corpus Diversity and Complexity on <fixed-case>NER</fixed-case> Performance</title>
       <author><first>Tatyana</first><last>Shmanina</last></author>
       <author><first>Ingrid</first><last>Zukerman</last></author>
-      <author><first>Antonio</first><last>Jimeno Yepes</last></author>
+      <author id="antonio-jimeno-yepes"><first>Antonio</first><last>Jimeno Yepes</last></author>
       <author><first>Lawrence</first><last>Cavedon</last></author>
       <author><first>Karin</first><last>Verspoor</last></author>
       <pages>91–95</pages>
@@ -144,7 +144,7 @@
     </paper>
     <paper id="16">
       <title>Rhythm, Metrics, and the Link to Phonology</title>
-      <author><first>Jason</first><last>Brown</last></author>
+      <author id="jason-katz-brown"><first>Jason</first><last>Brown</last></author>
       <author><first>Sam</first><last>Mandal</last></author>
       <pages>112–117</pages>
       <url hash="2bff497e">U13-1016</url>
@@ -160,16 +160,16 @@
     <paper id="18">
       <title>Automatic Climate Classification of Environmental Science Literature</title>
       <author><first>Jared</first><last>Willett</last></author>
-      <author><first>David</first><last>Martinez</last></author>
-      <author><first>J. Angus</first><last>Webb</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="david-martinez"><first>David</first><last>Martinez</last></author>
+      <author id="j-angus-webb"><first>J. Angus</first><last>Webb</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>123–130</pages>
       <url hash="091cdd10">U13-1018</url>
       <bibkey>willett-etal-2013-automatic</bibkey>
     </paper>
     <paper id="19">
       <title>Overview of the 2013 <fixed-case>ALTA</fixed-case> Shared Task</title>
-      <author><first>Diego</first><last>Molla</last></author>
+      <author id="diego-molla"><first>Diego</first><last>Molla</last></author>
       <pages>132–136</pages>
       <url hash="ad754a1c">U13-1019</url>
       <bibkey>molla-2013-overview</bibkey>
diff --git a/data/xml/U14.xml b/data/xml/U14.xml
index d1b57a8d80..ada517ebbb 100644
--- a/data/xml/U14.xml
+++ b/data/xml/U14.xml
@@ -17,15 +17,15 @@
     </frontmatter>
     <paper id="1">
       <title>Deep <fixed-case>QA</fixed-case>: Moving beyond the hype to examine the challenges in creating a cognitive assistant for humans</title>
-      <author><first>Jennifer</first><last>Lai</last></author>
+      <author id="jennifer-c-lai"><first>Jennifer</first><last>Lai</last></author>
       <pages>2–2</pages>
       <url hash="76731fc8">U14-1001</url>
       <bibkey>lai-2014-deep</bibkey>
     </paper>
     <paper id="2">
       <title>The Effect of Dependency Representation Scheme on Syntactic Language Modelling</title>
-      <author><first>Sunghwan</first><last>Kim</last></author>
-      <author><first>John</first><last>Pate</last></author>
+      <author id="sunghwan-mac-kim"><first>Sunghwan</first><last>Kim</last></author>
+      <author id="john-k-pate"><first>John</first><last>Pate</last></author>
       <author><first>Mark</first><last>Johnson</last></author>
       <pages>4–13</pages>
       <url hash="c96567b2">U14-1002</url>
@@ -42,7 +42,7 @@
     <paper id="4">
       <title>Automated Generation of Test Suites for Error Analysis of Concept Recognition Systems</title>
       <author><first>Tudor</first><last>Groza</last></author>
-      <author><first>Karin</first><last>Verspoor</last></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last></author>
       <pages>23–31</pages>
       <url hash="719c175c">U14-1004</url>
       <bibkey>groza-verspoor-2014-automated</bibkey>
@@ -51,7 +51,7 @@
       <title>Trading accuracy for faster named entity linking</title>
       <author><first>Kristy</first><last>Hughes</last></author>
       <author><first>Joel</first><last>Nothman</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <pages>32–40</pages>
       <url hash="0928c8e5">U14-1005</url>
       <bibkey>hughes-etal-2014-trading</bibkey>
@@ -60,7 +60,7 @@
       <title>Unsupervised Biographical Event Extraction Using <fixed-case>W</fixed-case>ikipedia Traffic</title>
       <author><first>Alexander</first><last>Hogue</last></author>
       <author><first>Joel</first><last>Nothman</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <pages>41–49</pages>
       <url hash="3f2b6027">U14-1006</url>
       <bibkey>hogue-etal-2014-unsupervised</bibkey>
@@ -94,8 +94,8 @@
     </paper>
     <paper id="10">
       <title>Impact of Citing Papers for Summarisation of Clinical Documents</title>
-      <author><first>Diego</first><last>Mollá</last></author>
-      <author><first>Christopher</first><last>Jones</last></author>
+      <author id="diego-molla"><first>Diego</first><last>Mollá</last></author>
+      <author id="christopher-jones"><first>Christopher</first><last>Jones</last></author>
       <author><first>Abeed</first><last>Sarker</last></author>
       <pages>79–87</pages>
       <url hash="74426db7">U14-1010</url>
@@ -123,7 +123,7 @@
     </paper>
     <paper id="13">
       <title>Alveo, a Human Communication Science Virtual Laboratory</title>
-      <author><first>Dominique</first><last>Estival</last></author>
+      <author id="dominique-estival"><first>Dominique</first><last>Estival</last></author>
       <author><first>Steve</first><last>Cassidy</last></author>
       <pages>104–107</pages>
       <url hash="911b7166">U14-1013</url>
@@ -140,7 +140,7 @@
     <paper id="15">
       <title>Exploring Temporal Patterns in Emergency Department Triage Notes with Topic Models</title>
       <author><first>Simon</first><last>Kocbek</last></author>
-      <author><first>Karin</first><last>Verspoor</last></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last></author>
       <author><first>Wray</first><last>Buntine</last></author>
       <pages>113–117</pages>
       <url hash="ff783b21">U14-1015</url>
@@ -157,8 +157,8 @@
     </paper>
     <paper id="17">
       <title>Deep Belief Networks and Biomedical Text Categorisation</title>
-      <author><first>Antonio Jimeno</first><last>Yepes</last></author>
-      <author><first>Andrew</first><last>MacKinlay</last></author>
+      <author id="antonio-jimeno-yepes"><first>Antonio Jimeno</first><last>Yepes</last></author>
+      <author id="andrew-mackinlay"><first>Andrew</first><last>MacKinlay</last></author>
       <author><first>Justin</first><last>Bedo</last></author>
       <author><first>Rahil</first><last>Garvani</last></author>
       <author><first>Qiang</first><last>Chen</last></author>
@@ -178,7 +178,7 @@
     <paper id="19">
       <title>Analysis of Coreference Relations in the Biomedical Literature</title>
       <author><first>Miji</first><last>Choi</last></author>
-      <author><first>Karin</first><last>Verspoor</last></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last></author>
       <author><first>Justin</first><last>Zobel</last></author>
       <pages>134–138</pages>
       <url hash="cc22de53">U14-1019</url>
@@ -186,7 +186,7 @@
     </paper>
     <paper id="20">
       <title><fixed-case>F</fixed-case>innish Native Language Identification</title>
-      <author><first>Shervin</first><last>Malmasi</last></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></author>
       <author><first>Mark</first><last>Dras</last></author>
       <pages>139–144</pages>
       <url hash="16a8ba87">U14-1020</url>
@@ -194,14 +194,14 @@
     </paper>
     <paper id="21">
       <title>A Data-driven Approach to Studying Given Names and their Gender and Ethnicity Associations</title>
-      <author><first>Shervin</first><last>Malmasi</last></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></author>
       <pages>145–149</pages>
       <url hash="c463ae10">U14-1021</url>
       <bibkey>malmasi-2014-data</bibkey>
     </paper>
     <paper id="22">
       <title>Overview of the 2014 <fixed-case>ALTA</fixed-case> Shared Task: Identifying Expressions of Locations in Tweets</title>
-      <author><first>Diego</first><last>Molla</last></author>
+      <author id="diego-molla"><first>Diego</first><last>Molla</last></author>
       <author><first>Sarvnaz</first><last>Karimi</last></author>
       <pages>151–156</pages>
       <url hash="6ece1b3c">U14-1022</url>
@@ -210,8 +210,8 @@
     <paper id="23">
       <title>Identifying <fixed-case>T</fixed-case>witter Location Mentions</title>
       <author><first>Bo</first><last>Han</last></author>
-      <author><first>Antonio Jimeno</first><last>Yepes</last></author>
-      <author><first>Andrew</first><last>MacKinlay</last></author>
+      <author id="antonio-jimeno-yepes"><first>Antonio Jimeno</first><last>Yepes</last></author>
+      <author id="andrew-mackinlay"><first>Andrew</first><last>MacKinlay</last></author>
       <author><first>Qiang</first><last>Chen</last></author>
       <pages>157–162</pages>
       <url hash="ca9deb3b">U14-1023</url>
@@ -234,7 +234,7 @@
       <author><first>Bahar</first><last>Salehi</last></author>
       <author><first>Miji</first><last>Choi</last></author>
       <author><first>Ping</first><last>Tan</last></author>
-      <author><first>Long</first><last>Duong</last></author>
+      <author id="long-duong"><first>Long</first><last>Duong</last></author>
       <pages>171–176</pages>
       <url hash="3a18b926">U14-1025</url>
       <bibkey>liu-etal-2014-automatic</bibkey>
diff --git a/data/xml/U15.xml b/data/xml/U15.xml
index 8fe00eb946..a0bd4e5949 100644
--- a/data/xml/U15.xml
+++ b/data/xml/U15.xml
@@ -19,7 +19,7 @@
       <title>Query-Based Single Document Summarization Using an Ensemble Noisy Auto-Encoder</title>
       <author><first>Mahmood Yousefi</first><last>Azar</last></author>
       <author><first>Kairit</first><last>Sirts</last></author>
-      <author><first>Diego</first><last>Mollá Aliod</last></author>
+      <author id="diego-molla"><first>Diego</first><last>Mollá Aliod</last></author>
       <author><first>Len</first><last>Hamey</last></author>
       <pages>2–10</pages>
       <url hash="4288b15d">U15-1001</url>
@@ -64,7 +64,7 @@
     <paper id="6">
       <title>Similarity Metrics for Clustering <fixed-case>P</fixed-case>ub<fixed-case>M</fixed-case>ed Abstracts for Evidence Based Medicine</title>
       <author><first>Hamed</first><last>Hassanzadeh</last></author>
-      <author><first>Diego</first><last>Mollá</last></author>
+      <author id="diego-molla"><first>Diego</first><last>Mollá</last></author>
       <author><first>Tudor</first><last>Groza</last></author>
       <author><first>Anthony</first><last>Nguyen</last></author>
       <author><first>Jane</first><last>Hunter</last></author>
@@ -74,7 +74,7 @@
     </paper>
     <paper id="7">
       <title>Finding Names in Trove: Named Entity Recognition for <fixed-case>A</fixed-case>ustralian Historical Newspapers</title>
-      <author><first>Sunghwan Mac</first><last>Kim</last></author>
+      <author id="sunghwan-mac-kim"><first>Sunghwan Mac</first><last>Kim</last></author>
       <author><first>Steve</first><last>Cassidy</last></author>
       <pages>57–65</pages>
       <url hash="1a8335cd">U15-1007</url>
@@ -82,7 +82,7 @@
     </paper>
     <paper id="8">
       <title>Clinical Information Extraction Using Word Representations</title>
-      <author><first>Shervin</first><last>Malmasi</last></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></author>
       <author><first>Hamed</first><last>Hassanzadeh</last></author>
       <author><first>Mark</first><last>Dras</last></author>
       <pages>66–74</pages>
@@ -101,8 +101,8 @@
     <paper id="10">
       <title>Domain Adaption of Named Entity Recognition to Support Credit Risk Assessment</title>
       <author><first>Julio Cesar</first><last>Salinas Alvarado</last></author>
-      <author><first>Karin</first><last>Verspoor</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>84–90</pages>
       <url hash="5edfde0b">U15-1010</url>
       <bibkey>salinas-alvarado-etal-2015-domain</bibkey>
@@ -118,8 +118,8 @@
     <paper id="12">
       <title>Structural Alignment as the Basis to Improve Significant Change Detection in Versioned Sentences</title>
       <author><first>Ping Ping</first><last>Tan</last></author>
-      <author><first>Karin</first><last>Verspoor</last></author>
-      <author><first>Tim</first><last>Miller</last></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last></author>
+      <author id="timothy-miller"><first>Tim</first><last>Miller</last></author>
       <pages>101–109</pages>
       <url hash="a1f31014">U15-1012</url>
       <bibkey>tan-etal-2015-structural</bibkey>
@@ -147,7 +147,7 @@
       <author><first>Atif</first><last>Ahmad</last></author>
       <author><first>Christoph</first><last>Breidbach</last></author>
       <author><first>David</first><last>Malet</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>122–127</pages>
       <url hash="4f238c83">U15-1015</url>
       <bibkey>nothman-etal-2015-understanding</bibkey>
@@ -163,7 +163,7 @@
     <paper id="17">
       <title>Overview of the 2015 <fixed-case>ALTA</fixed-case> Shared Task: Identifying <fixed-case>F</fixed-case>rench Cognates in <fixed-case>E</fixed-case>nglish Text</title>
       <author><first>Laurianne</first><last>Sitbon</last></author>
-      <author><first>Diego</first><last>Molla</last></author>
+      <author id="diego-molla"><first>Diego</first><last>Molla</last></author>
       <author><first>Haoxing</first><last>Wang</last></author>
       <pages>134–137</pages>
       <url hash="8b04a85b">U15-1017</url>
@@ -171,7 +171,7 @@
     </paper>
     <paper id="18">
       <title>Cognate Identification using Machine Translation</title>
-      <author><first>Shervin</first><last>Malmasi</last></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></author>
       <author><first>Mark</first><last>Dras</last></author>
       <pages>138–141</pages>
       <url hash="36d04605">U15-1018</url>
@@ -179,7 +179,7 @@
     </paper>
     <paper id="19">
       <title>Word Transformation Heuristics Agains Lexicons for Cognate Detection</title>
-      <author><first>Alexandra</first><last>Uitdenbogerd</last></author>
+      <author id="alexandra-l-uitdenbogerd"><first>Alexandra</first><last>Uitdenbogerd</last></author>
       <pages>142–144</pages>
       <url hash="6ff056b3">U15-1019</url>
       <bibkey>uitdenbogerd-2015-word</bibkey>
diff --git a/data/xml/U16.xml b/data/xml/U16.xml
index 38ce38d04e..722fc7697c 100644
--- a/data/xml/U16.xml
+++ b/data/xml/U16.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the Australasian Language Technology Association Workshop 2016</booktitle>
       <url hash="c636c524">U16-1</url>
-      <editor><first>Trevor</first><last>Cohn</last></editor>
+      <editor id="trevor-cohn"><first>Trevor</first><last>Cohn</last></editor>
       <address>Melbourne, Australia</address>
       <month>December</month>
       <year>2016</year>
@@ -17,7 +17,7 @@
     <paper id="1">
       <title>Improving Neural Translation Models with Linguistic Factors</title>
       <author><first>Cong Duy Vu</first><last>Hoang</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <author><first>Trevor</first><last>Cohn</last></author>
       <pages>7–14</pages>
       <url hash="36b3b56d">U16-1001</url>
@@ -46,8 +46,8 @@
     <paper id="4">
       <title>Syndromic Surveillance using Generic Medical Entities on <fixed-case>T</fixed-case>witter</title>
       <author><first>Pin</first><last>Huang</last></author>
-      <author><first>Andrew</first><last>MacKinlay</last></author>
-      <author><first>Antonio Jimeno</first><last>Yepes</last></author>
+      <author id="andrew-mackinlay"><first>Andrew</first><last>MacKinlay</last></author>
+      <author id="antonio-jimeno-yepes"><first>Antonio Jimeno</first><last>Yepes</last></author>
       <pages>35–44</pages>
       <url hash="2073a71f">U16-1004</url>
       <bibkey>huang-etal-2016-syndromic</bibkey>
@@ -56,7 +56,7 @@
       <title>Syndromic Surveillance through Measuring Lexical Shift in Emergency Department Chief Complaint Texts</title>
       <author><first>Hafsah</first><last>Aamer</last></author>
       <author><first>Bahadorreza</first><last>Ofoghi</last></author>
-      <author><first>Karin</first><last>Verspoor</last></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last></author>
       <pages>45–53</pages>
       <url hash="5b85a4aa">U16-1005</url>
       <bibkey>aamer-etal-2016-syndromic</bibkey>
@@ -73,8 +73,8 @@
     </paper>
     <paper id="7">
       <title><fixed-case>ASM</fixed-case> Kernel: Graph Kernel using Approximate Subgraph Matching for Relation Extraction</title>
-      <author><first>Nagesh C.</first><last>Panyam</last></author>
-      <author><first>Karin</first><last>Verspoor</last></author>
+      <author id="nagesh-c-panyam"><first>Nagesh C.</first><last>Panyam</last></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last></author>
       <author><first>Trevor</first><last>Cohn</last></author>
       <author><first>Rao</first><last>Kotagiri</last></author>
       <pages>65–73</pages>
@@ -103,7 +103,7 @@
       <title>The Role of Features and Context on Suicide Ideation Detection</title>
       <author><first>Yufei</first><last>Wang</last></author>
       <author><first>Stephen</first><last>Wan</last></author>
-      <author><first>Cécile</first><last>Paris</last></author>
+      <author id="cecile-paris"><first>Cécile</first><last>Paris</last></author>
       <pages>94–102</pages>
       <url hash="503bcf12">U16-1010</url>
       <bibkey>wang-etal-2016-role</bibkey>
@@ -111,7 +111,7 @@
     <paper id="11">
       <title>Featureless Domain-Specific Term Extraction with Minimal Labelled Data</title>
       <author><first>Rui</first><last>Wang</last></author>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last></author>
+      <author><first>Wei</first><last>Liu</last></author>
       <author><first>Chris</first><last>McDonald</last></author>
       <pages>103–112</pages>
       <url hash="02af7c2c">U16-1011</url>
@@ -128,8 +128,8 @@
       <title>How Challenging is Sarcasm versus Irony Classification?: A Study With a Dataset from <fixed-case>E</fixed-case>nglish Literature</title>
       <author><first>Aditya</first><last>Joshi</last></author>
       <author><first>Vaibhav</first><last>Tripathi</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
-      <author><first>Mark</first><last>Carman</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="mark-carman"><first>Mark</first><last>Carman</last></author>
       <author><first>Meghna</first><last>Singh</last></author>
       <author><first>Jaya</first><last>Saraswati</last></author>
       <author><first>Rajita</first><last>Shukla</last></author>
@@ -140,7 +140,7 @@
     <paper id="14">
       <title>Learning cascaded latent variable models for biomedical text classification</title>
       <author><first>Ming</first><last>Liu</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <author><first>Wray</first><last>Buntine</last></author>
       <pages>128–132</pages>
       <url hash="c4cabd89">U16-1014</url>
@@ -149,8 +149,8 @@
     <paper id="15">
       <title>Temporal Modelling of Geospatial Words in <fixed-case>T</fixed-case>witter</title>
       <author><first>Bo</first><last>Han</last></author>
-      <author><first>Antonio Jimeno</first><last>Yepes</last></author>
-      <author><first>Andrew</first><last>MacKinlay</last></author>
+      <author id="antonio-jimeno-yepes"><first>Antonio Jimeno</first><last>Yepes</last></author>
+      <author id="andrew-mackinlay"><first>Andrew</first><last>MacKinlay</last></author>
       <author><first>Lianhua</first><last>Chi</last></author>
       <pages>133–137</pages>
       <url hash="cfe3ba5c">U16-1015</url>
@@ -158,8 +158,8 @@
     </paper>
     <paper id="16">
       <title><fixed-case>NER</fixed-case> for Medical Entities in <fixed-case>T</fixed-case>witter using Sequence to Sequence Neural Networks</title>
-      <author><first>Antonio Jimeno</first><last>Yepes</last></author>
-      <author><first>Andrew</first><last>MacKinlay</last></author>
+      <author id="antonio-jimeno-yepes"><first>Antonio Jimeno</first><last>Yepes</last></author>
+      <author id="andrew-mackinlay"><first>Andrew</first><last>MacKinlay</last></author>
       <pages>138–142</pages>
       <url hash="66270c4d">U16-1016</url>
       <bibkey>yepes-mackinlay-2016-ner</bibkey>
@@ -196,7 +196,7 @@
       <title>Overview of the 2016 <fixed-case>ALTA</fixed-case> Shared Task: Cross-<fixed-case>KB</fixed-case> Coreference</title>
       <author><first>Andrew</first><last>Chisholm</last></author>
       <author><first>Ben</first><last>Hachey</last></author>
-      <author><first>Diego</first><last>Mollá</last></author>
+      <author id="diego-molla"><first>Diego</first><last>Mollá</last></author>
       <pages>161–164</pages>
       <url hash="75918551">U16-1020</url>
       <bibkey>chisholm-etal-2016-overview</bibkey>
@@ -212,7 +212,7 @@
     </paper>
     <paper id="22">
       <title>Filter and Match Approach to Pair-wise Web <fixed-case>URI</fixed-case> Linking</title>
-      <author><first>S.</first><last>Shivashankar</last></author>
+      <author id="shivashankar-subramanian"><first>S.</first><last>Shivashankar</last></author>
       <author><first>Yitong</first><last>Li</last></author>
       <author><first>Afshin</first><last>Rahimi</last></author>
       <pages>170–174</pages>
diff --git a/data/xml/U17.xml b/data/xml/U17.xml
index c07f01eec1..2ac05bb512 100644
--- a/data/xml/U17.xml
+++ b/data/xml/U17.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the Australasian Language Technology Association Workshop 2017</booktitle>
       <url hash="46e88c72">U17-1</url>
       <editor><first>Jojo Sze-Meng</first><last>Wong</last></editor>
-      <editor><first>Gholamreza</first><last>Haffari</last></editor>
+      <editor id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></editor>
       <address>Brisbane, Australia</address>
       <month>December</month>
       <year>2017</year>
@@ -26,17 +26,17 @@
     <paper id="2">
       <title>Improving End-to-End Memory Networks with Unified Weight Tying</title>
       <author id="fei-liu-unimelb"><first>Fei</first><last>Liu</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>16–24</pages>
       <url hash="3c25392b">U17-1002</url>
       <bibkey>liu-etal-2017-improving</bibkey>
     </paper>
     <paper id="3">
       <title>Joint Sentence-Document Model for Manifesto Text Analysis</title>
-      <author><first>Shivashankar</first><last>Subramanian</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="shivashankar-subramanian"><first>Shivashankar</first><last>Subramanian</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Julian</first><last>Brooke</last></author>
       <pages>25–33</pages>
       <url hash="306cbd2b">U17-1003</url>
@@ -56,7 +56,7 @@
       <title>A Hybrid Model for Quality Assessment of <fixed-case>W</fixed-case>ikipedia Articles</title>
       <author><first>Aili</first><last>Shen</last></author>
       <author><first>Jianzhong</first><last>Qi</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>43–52</pages>
       <url hash="3009c002">U17-1005</url>
       <bibkey>shen-etal-2017-hybrid</bibkey>
@@ -64,7 +64,7 @@
     <paper id="6">
       <title>Phonemic Transcription of Low-Resource Tonal Languages</title>
       <author><first>Oliver</first><last>Adams</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
       <author><first>Alexis</first><last>Michaud</last></author>
       <pages>53–60</pages>
@@ -81,17 +81,17 @@
     <paper id="8">
       <title>Automatic Negation and Speculation Detection in Veterinary Clinical Text</title>
       <author><first>Katherine</first><last>Cheng</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
-      <author><first>Karin</first><last>Verspoor</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last></author>
       <pages>70–78</pages>
       <url hash="40bcae26">U17-1008</url>
       <bibkey>cheng-etal-2017-automatic</bibkey>
     </paper>
     <paper id="9">
       <title>Medication and Adverse Event Extraction from Noisy Text</title>
-      <author><first>Xiang</first><last>Dai</last></author>
+      <author id="xiang-dai"><first>Xiang</first><last>Dai</last></author>
       <author><first>Sarvnaz</first><last>Karimi</last></author>
-      <author><first>Cecile</first><last>Paris</last></author>
+      <author id="cecile-paris"><first>Cecile</first><last>Paris</last></author>
       <pages>79–87</pages>
       <url hash="c981c1b9">U17-1009</url>
       <bibkey>dai-etal-2017-medication</bibkey>
@@ -106,7 +106,7 @@
     <paper id="11">
       <title>On Extending Neural Networks with Loss Ensembles for Text Classification</title>
       <author><first>Hamideh</first><last>Hajiabadi</last></author>
-      <author><first>Diego</first><last>Molla-Aliod</last></author>
+      <author id="diego-molla"><first>Diego</first><last>Molla-Aliod</last></author>
       <author><first>Reza</first><last>Monsefi</last></author>
       <pages>98–102</pages>
       <url hash="9aceab9c">U17-1011</url>
@@ -114,7 +114,7 @@
     </paper>
     <paper id="12">
       <title>Towards the Use of Deep Reinforcement Learning with Global Policy for Query-based Extractive Summarisation</title>
-      <author><first>Diego</first><last>Mollá-Aliod</last></author>
+      <author id="diego-molla"><first>Diego</first><last>Mollá-Aliod</last></author>
       <pages>103–107</pages>
       <url hash="6897344a">U17-1012</url>
       <bibkey>molla-aliod-2017-towards</bibkey>
@@ -132,7 +132,7 @@
     </paper>
     <paper id="14">
       <title>Overview of the 2017 <fixed-case>ALTA</fixed-case> Shared Task: Correcting <fixed-case>OCR</fixed-case> Errors</title>
-      <author><first>Diego</first><last>Mollá-Aliod</last></author>
+      <author id="diego-molla"><first>Diego</first><last>Mollá-Aliod</last></author>
       <author><first>Steve</first><last>Cassidy</last></author>
       <pages>115–118</pages>
       <url hash="f385b744">U17-1014</url>
diff --git a/data/xml/U18.xml b/data/xml/U18.xml
index 5fb7aca9be..9d09c78348 100644
--- a/data/xml/U18.xml
+++ b/data/xml/U18.xml
@@ -4,8 +4,8 @@
     <meta>
       <booktitle>Proceedings of the Australasian Language Technology Association Workshop 2018</booktitle>
       <url hash="d16bf8c3">U18-1</url>
-      <editor><first>Sunghwan Mac</first><last>Kim</last></editor>
-      <editor><first>Xiuzhen (Jenny)</first><last>Zhang</last></editor>
+      <editor id="sunghwan-mac-kim"><first>Sunghwan Mac</first><last>Kim</last></editor>
+      <editor id="xiuzhen-jenny-zhang"><first>Xiuzhen (Jenny)</first><last>Zhang</last></editor>
       <address>Dunedin, New Zealand</address>
       <month>December</month>
       <year>2018</year>
@@ -18,8 +18,8 @@
     <paper id="1">
       <title>Improved Neural Machine Translation using Side Information</title>
       <author><first>Cong Duy Vu</first><last>Hoang</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>6–16</pages>
       <url hash="46593160">U18-1001</url>
       <abstract>In this work, we investigate whether side information is helpful in neural machine translation (NMT). We study various kinds of side information, including topical information, personal trait, then propose different ways of incorporating them into the existing NMT models. Our experimental results show the benefits of side information in improving the NMT models.</abstract>
@@ -75,11 +75,11 @@
     <paper id="7">
       <title>Exploring Textual and Speech information in Dialogue Act Classification with Speaker Domain Adaptation</title>
       <author><first>Xuanli</first><last>He</last></author>
-      <author><first>Quan</first><last>Tran</last></author>
+      <author id="quan-hung-tran"><first>Quan</first><last>Tran</last></author>
       <author><first>William</first><last>Havard</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <author><first>Ingrid</first><last>Zukerman</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>61–65</pages>
       <url hash="ffff6bd6">U18-1007</url>
       <abstract>In spite of the recent success of Dialogue Act (DA) classification, the majority of prior works focus on text-based classification with oracle transcriptions, i.e. human transcriptions, instead of Automatic Speech Recognition (ASR)’s transcriptions. In spoken dialog systems, however, the agent would only have access to noisy ASR transcriptions, which may further suffer performance degradation due to domain shift. In this paper, we explore the effectiveness of using both acoustic and textual signals, either oracle or ASR transcriptions, and investigate speaker domain adaptation for DA classification. Our multimodal model proves to be superior to the unimodal models, particularly when the oracle transcriptions are not available. We also propose an effective method for speaker domain adaptation, which achieves competitive results.</abstract>
@@ -98,7 +98,7 @@
       <title>A Comparative Study of Embedding Models in Predicting the Compositionality of Multiword Expressions</title>
       <author><first>Navnita</first><last>Nandakumar</last></author>
       <author><first>Bahar</first><last>Salehi</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>71–76</pages>
       <url hash="3d7aad37">U18-1009</url>
       <abstract>In this paper, we perform a comparative evaluation of off-the-shelf embedding models over the task of compositionality prediction of multiword expressions("MWEs"). Our experimental results suggest that character- and document-level models capture knowledge of MWE compositionality and are effective in modelling varying levels of compositionality, with the advantage over word-level models that they do not require token-level identification of MWEs in the training corpus.</abstract>
@@ -107,8 +107,8 @@
     <paper id="10">
       <title>Towards Efficient Machine Translation Evaluation by Modelling Annotators</title>
       <author><first>Nitika</first><last>Mathur</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>77–82</pages>
       <url hash="618332c5">U18-1010</url>
       <abstract>Accurate evaluation of translation has long been a difficult, yet important problem. Current evaluations use direct assessment (DA), based on crowd sourcing judgements from a large pool of workers, along with quality control checks, and a robust method for combining redundant judgements. In this paper we show that the quality control mechanism is overly conservative, which increases the time and expense of the evaluation. We propose a model that does not rely on a pre-processing step to filter workers and takes into account varying annotator reliabilities. Our model effectively weights each worker's scores based on the inferred precision of the worker, and is much more reliable than the mean of either the raw scores or the standardised scores. We also show that DA does not deliver on the promise of longitudinal evaluation, and propose redesigning the structure of the annotation tasks that can solve this problem.</abstract>
@@ -116,7 +116,7 @@
     </paper>
     <paper id="11">
       <title>Overview of the 2018 <fixed-case>ALTA</fixed-case> Shared Task: Classifying Patent Applications</title>
-      <author><first>Diego</first><last>Mollá</last></author>
+      <author id="diego-molla"><first>Diego</first><last>Mollá</last></author>
       <author><first>Dilesha</first><last>Seneviratne</last></author>
       <pages>84–88</pages>
       <url hash="b16206b8">U18-1011</url>
@@ -126,7 +126,7 @@
     <paper id="12">
       <title>Classifying Patent Applications with Ensemble Methods</title>
       <author><first>Fernando</first><last>Benites</last></author>
-      <author><first>Shervin</first><last>Malmasi</last></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></author>
       <author><first>Marcos</first><last>Zampieri</last></author>
       <pages>89–92</pages>
       <url hash="fe24ec3a">U18-1012</url>
diff --git a/data/xml/U19.xml b/data/xml/U19.xml
index 014fde1cce..0e1321592e 100644
--- a/data/xml/U19.xml
+++ b/data/xml/U19.xml
@@ -6,7 +6,7 @@
       <url hash="f09ad8ed">U19-1</url>
       <editor><first>Meladel</first><last>Mistica</last></editor>
       <editor><first>Massimo</first><last>Piccardi</last></editor>
-      <editor><first>Andrew</first><last>MacKinlay</last></editor>
+      <editor id="andrew-mackinlay"><first>Andrew</first><last>MacKinlay</last></editor>
       <publisher>Australasian Language Technology Association</publisher>
       <address>Sydney, Australia</address>
       <month>4--6 December</month>
@@ -30,7 +30,7 @@
       <title>From Shakespeare to <fixed-case>L</fixed-case>i-<fixed-case>B</fixed-case>ai: Adapting a Sonnet Model to <fixed-case>C</fixed-case>hinese Poetry</title>
       <author><first>Zhuohan</first><last>Xie</last></author>
       <author><first>Jey Han</first><last>Lau</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>10–18</pages>
       <abstract>In this paper, we adapt Deep-speare, a joint neural network model for English sonnets, to Chinese poetry. We illustrate characteristics of Chinese quatrain and explain our architecture as well as training and generation procedure, which differs from Shakespeare sonnets in several aspects. We analyse the generated poetry and find that model works well for Chinese poetry, as it can: (1) generate coherent 4-line quatrains of different topics; and (2) capture rhyme automatically (to a certain extent).</abstract>
       <url hash="cf4c181e">U19-1002</url>
@@ -39,7 +39,7 @@
     <paper id="3">
       <title>Readability of <fixed-case>T</fixed-case>witter Tweets for Second Language Learners</title>
       <author><first>Patrick</first><last>Jacob</last></author>
-      <author><first>Alexandra</first><last>Uitdenbogerd</last></author>
+      <author id="alexandra-l-uitdenbogerd"><first>Alexandra</first><last>Uitdenbogerd</last></author>
       <pages>19–27</pages>
       <abstract>Optimal language acquisition via reading requires the learners to read slightly above their current language skill level. Identifying material at the right level is the essential role of automatic readability measurement. Short message platforms such as Twitter offer the opportunity for language practice while reading about current topics and engaging in conversation in small doses, and can be filtered according to linguistic criteria to suit the learner. In this research, we explore how readable tweets are for English language learners and which factors contribute to their readability. With participants from six language groups, we collected 14,659 data points, each representing a tweet from a pool of 4100 tweets, and a judgement of perceived readability. Traditional readability measures and features failed on the data-set, but demographic data showed that judgements were largely genuine and reflected reported language skill, which is consistent with other recent studies. We report on the properties of the data set and implications for future research.</abstract>
       <url hash="f153dffd">U19-1003</url>
@@ -57,9 +57,9 @@
       <title>Modelling <fixed-case>T</fixed-case>ibetan Verbal Morphology</title>
       <author><first>Qianji</first><last>Di</last></author>
       <author><first>Ekaterina</first><last>Vylomova</last></author>
-      <author><first>Tim</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Tim</first><last>Baldwin</last></author>
       <pages>35–40</pages>
-      <abstract/>
+      <abstract></abstract>
       <url hash="d94410e4">U19-1005</url>
       <bibkey>di-etal-2019-modelling</bibkey>
     </paper>
@@ -77,9 +77,9 @@
       <author><first>Aili</first><last>Shen</last></author>
       <author><first>Bahar</first><last>Salehi</last></author>
       <author><first>Jianzhong</first><last>Qi</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>47–51</pages>
-      <abstract/>
+      <abstract></abstract>
       <url hash="5f7d60a3">U19-1007</url>
       <bibkey>shen-etal-2019-feature</bibkey>
     </paper>
@@ -87,7 +87,7 @@
       <title>Red-faced <fixed-case>ROUGE</fixed-case>: Examining the Suitability of <fixed-case>ROUGE</fixed-case> for Opinion Summary Evaluation</title>
       <author><first>Wenyi</first><last>Tay</last></author>
       <author><first>Aditya</first><last>Joshi</last></author>
-      <author><first>Xiuzhen</first><last>Zhang</last></author>
+      <author id="xiuzhen-jenny-zhang"><first>Xiuzhen</first><last>Zhang</last></author>
       <author><first>Sarvnaz</first><last>Karimi</last></author>
       <author><first>Stephen</first><last>Wan</last></author>
       <pages>52–60</pages>
@@ -109,7 +109,7 @@
       <title>Improved Document Modelling with a Neural Discourse Parser</title>
       <author><first>Fajri</first><last>Koto</last></author>
       <author><first>Jey Han</first><last>Lau</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>67–76</pages>
       <abstract>Despite the success of attention-based neural models for natural language generation and classification tasks, they are unable to capture the discourse structure of larger documents. We hypothesize that explicit discourse representations have utility for NLP tasks over longer documents or document sequences, which sequence-to-sequence models are unable to capture. For abstractive summarization, for instance, conventional neural models simply match source documents and the summary in a latent space without explicit representation of text structure or relations. In this paper, we propose to use neural discourse representations obtained from a rhetorical structure theory (RST) parser to enhance document representations. Specifically, document representations are generated for discourse spans, known as the elementary discourse units (EDUs). We empirically investigate the benefit of the proposed approach on two different tasks: abstractive summarization and popularity prediction of online petitions. We find that the proposed approach leads to substantial improvements in all cases.</abstract>
       <url hash="1e04d100">U19-1010</url>
@@ -119,7 +119,7 @@
       <title>Does an <fixed-case>LSTM</fixed-case> forget more than a <fixed-case>CNN</fixed-case>? An empirical study of catastrophic forgetting in <fixed-case>NLP</fixed-case></title>
       <author><first>Gaurav</first><last>Arora</last></author>
       <author><first>Afshin</first><last>Rahimi</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>77–86</pages>
       <abstract>Catastrophic forgetting — whereby a model trained on one task is fine-tuned on a second, and in doing so, suffers a “catastrophic” drop in performance over the first task — is a hurdle in the development of better transfer learning techniques. Despite impressive progress in reducing catastrophic forgetting, we have limited understanding of how different architectures and hyper-parameters affect forgetting in a network. With this study, we aim to understand factors which cause forgetting during sequential training. Our primary finding is that CNNs forget less than LSTMs. We show that max-pooling is the underlying operation which helps CNNs alleviate forgetting compared to LSTMs. We also found that curriculum learning, placing a hard task towards the end of task sequence, reduces forgetting. We analysed the effect of fine-tuning contextual embeddings on catastrophic forgetting and found that using embeddings as feature extractor is preferable to fine-tuning in continual learning setup.</abstract>
       <url hash="5d63513f">U19-1011</url>
@@ -138,10 +138,10 @@
     <paper id="13">
       <title>A Pointer Network Architecture for Context-Dependent Semantic Parsing</title>
       <author><first>Xuanli</first><last>He</last></author>
-      <author><first>Quan</first><last>Tran</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="quan-hung-tran"><first>Quan</first><last>Tran</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>94–99</pages>
-      <abstract/>
+      <abstract></abstract>
       <url hash="aa4b0473">U19-1013</url>
       <bibkey>he-etal-2019-pointer</bibkey>
     </paper>
@@ -153,8 +153,8 @@
       <author><first>Christian</first><last>Druckenbrodt</last></author>
       <author><first>Camilo</first><last>Thorne</last></author>
       <author><first>Saber A.</first><last>Akhondi</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
-      <author><first>Karin</first><last>Verspoor</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last></author>
       <pages>100–110</pages>
       <abstract>Extracting chemical reactions from patents is a crucial task for chemists working on chemical exploration. In this paper we introduce the novel task of detecting the textual spans that describe or refer to chemical reactions within patents. We formulate this task as a paragraph-level sequence tagging problem, where the system is required to return a sequence of paragraphs which contain a description of a reaction. To address this new task, we construct an annotated dataset from an existing proprietary database of chemical reactions manually extracted from patents. We introduce several baseline methods for the task and evaluate them over our dataset. Through error analysis, we discuss what makes the task complex and challenging, and suggest possible directions for future research.</abstract>
       <url hash="748e8b9c">U19-1014</url>
@@ -195,7 +195,7 @@
     <paper id="18">
       <title>Measuring <fixed-case>E</fixed-case>nglish Readability for <fixed-case>V</fixed-case>ietnamese Speakers</title>
       <author><first>Phuoc</first><last>Nguyen</last></author>
-      <author><first>Alexandra</first><last>Uitdenbogerd</last></author>
+      <author id="alexandra-l-uitdenbogerd"><first>Alexandra</first><last>Uitdenbogerd</last></author>
       <pages>136–145</pages>
       <abstract>Reading is important for any language learner, but the difficulty level of the text needs to match a reader’s level to enable efficient learning of new vocabulary. Many widely used traditional readability measures are not effective for those who speak English as a second or additional language. This study examines English readability for Vietnamese native speakers (VL1). A collection of text difficulty judgements of nearly 100 English text passages was obtained from 12 VL1 participants, using a 5-point Likert scale. Using the same basic features found in traditional English readability measures we found that SVMs and Dale-Chall features were slightly better than linear models using either Flesch or Dale-Chall. VL1 participants’ text judgements were strongly correlated with their past IELTS test scores. This study introduces a first approximation to readability of English text for VL1, with suggestions for further improvements.</abstract>
       <url hash="1af0cef3">U19-1018</url>
@@ -216,7 +216,7 @@
       <author><first>Aditya</first><last>Joshi</last></author>
       <author><first>Sarvnaz</first><last>Karimi</last></author>
       <author><first>Ross</first><last>Sparks</last></author>
-      <author><first>Cecile</first><last>Paris</last></author>
+      <author id="cecile-paris"><first>Cecile</first><last>Paris</last></author>
       <author><first>C Raina</first><last>MacIntyre</last></author>
       <pages>151–158</pages>
       <abstract>Multi-Task Learning (MTL) has been an attractive approach to deal with limited labeled datasets or leverage related tasks, for a variety of NLP problems. We examine the benefit of MTL for three specific pairs of health informatics tasks that deal with: (a) overlapping symptoms for the same classification problem (personal health mention classification for influenza and for a set of symptoms); (b) overlapping medical concepts for related classification problems (vaccine usage and drug usage detection); and, (c) related classification problems (vaccination intent and vaccination relevance detection). We experiment with a simple neural architecture: a shared layer followed by task-specific dense layers. The novelty of this work is that it compares alternatives for shared layers for these pairs of tasks. While our observations agree with the promise of MTL as compared to single-task learning, for health informatics, we show that the benefit also comes with caveats in terms of the choice of shared layers and the relatedness between the participating tasks.</abstract>
@@ -226,10 +226,10 @@
     <paper id="21">
       <title>Difficulty-aware Distractor Generation for Gap-Fill Items</title>
       <author><first>Chak Yan</first><last>Yeung</last></author>
-      <author><first>John</first><last>Lee</last></author>
-      <author><first>Benjamin</first><last>Tsou</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
+      <author id="benjamin-k-tsou"><first>Benjamin</first><last>Tsou</last></author>
       <pages>159–164</pages>
-      <abstract/>
+      <abstract></abstract>
       <url hash="23ae24e6">U19-1021</url>
       <bibkey>yeung-etal-2019-difficulty</bibkey>
     </paper>
@@ -247,9 +247,9 @@
       <title>Neural Versus Non-Neural Text Simplification: A Case Study</title>
       <author><first>Islam</first><last>Nassar</last></author>
       <author><first>Michelle</first><last>Ananda-Rajah</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>172–177</pages>
-      <abstract/>
+      <abstract></abstract>
       <url hash="2ffd4389">U19-1023</url>
       <bibkey>nassar-etal-2019-neural</bibkey>
     </paper>
@@ -276,7 +276,7 @@
     </paper>
     <paper id="26">
       <title>Overview of the 2019 <fixed-case>ALTA</fixed-case> Shared Task: Sarcasm Target Identification</title>
-      <author><first>Diego</first><last>Molla</last></author>
+      <author id="diego-molla"><first>Diego</first><last>Molla</last></author>
       <author><first>Aditya</first><last>Joshi</last></author>
       <pages>192–196</pages>
       <abstract>We present an overview of the 2019 ALTA shared task. This is the 10th of the series of shared tasks organised by ALTA since 2010. The task was to detect the target of sarcastic comments posted on social media. We intro- duce the task, describe the data and present the results of baselines and participants. This year’s shared task was particularly challenging and no participating systems improved the re- sults of our baseline.</abstract>
diff --git a/data/xml/W00.xml b/data/xml/W00.xml
index 66d563f280..b88ca91098 100644
--- a/data/xml/W00.xml
+++ b/data/xml/W00.xml
@@ -37,7 +37,7 @@
     </paper>
     <paper id="5">
       <title>Dependency of context-based Word Sense Disambiguation from representation and domain complexity</title>
-      <author><first>Paola</first><last>Velardi</last></author>
+      <author id="paola-velardi"><first>Paola</first><last>Velardi</last></author>
       <author><first>Roma</first><last>Velardi</last></author>
       <url hash="10e826f0">W00-0105</url>
       <bibkey>velardi-velardi-2000-dependency</bibkey>
@@ -51,7 +51,7 @@
     <paper id="7">
       <title>A Measure of Semantic Complexity for Natural Language Systems</title>
       <author><first>Shannon</first><last>Pollard</last></author>
-      <author><first>Alan W.</first><last>Biermann</last></author>
+      <author id="alan-w-biermann"><first>Alan W.</first><last>Biermann</last></author>
       <url hash="771ad074">W00-0107</url>
       <bibkey>pollard-biermann-2000-measure</bibkey>
     </paper>
@@ -63,7 +63,7 @@
     </paper>
     <paper id="9">
       <title>Partially Saturated Referents as a Source of Complexity in Semantic Interpretation</title>
-      <author><first>David D.</first><last>McDonald</last></author>
+      <author id="david-d-mcdonald"><first>David D.</first><last>McDonald</last></author>
       <url hash="ca9aa960">W00-0109</url>
       <bibkey>mcdonald-2000-partially</bibkey>
     </paper>
@@ -88,23 +88,23 @@
     </frontmatter>
     <paper id="1">
       <title>An Interlingual-based Approach to Reference Resolution</title>
-      <author><first>David</first><last>Farwell</last></author>
+      <author id="david-farwell"><first>David</first><last>Farwell</last></author>
       <url hash="08b22f25">W00-0201</url>
       <bibkey>farwell-2000-interlingual</bibkey>
     </paper>
     <paper id="2">
       <title>Representations of Actions as an Interlingua</title>
-      <author><first>Karin Christine</first><last>Kipper</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="karin-kipper"><first>Karin Christine</first><last>Kipper</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <url hash="7211c18f">W00-0202</url>
       <bibkey>kipper-palmer-2000-representations</bibkey>
     </paper>
     <paper id="3">
       <title>Evaluation of a Practical Interlingua for Task-Oriented Dialogue</title>
-      <author><first>Lori</first><last>Levin</last></author>
-      <author><first>Donna</first><last>Gates</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
-      <author><first>Fabio</first><last>Pianesi</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
+      <author id="donna-gates"><first>Donna</first><last>Gates</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
+      <author id="fabio-pianesi"><first>Fabio</first><last>Pianesi</last></author>
       <author><first>Dorcas</first><last>Wallace</last></author>
       <author><first>Taro</first><last>Watanabe</last></author>
       <url hash="cab431d0">W00-0203</url>
@@ -112,34 +112,34 @@
     </paper>
     <paper id="4">
       <title>An interlingua aiming at communication on the Web: How language-independent can it be?</title>
-      <author><first>Ronaldo Teixeira</first><last>Martins</last></author>
+      <author id="ronaldo-teixeira-martins"><first>Ronaldo Teixeira</first><last>Martins</last></author>
       <author><first>Lucia Helena Machado</first><last>Rino</last></author>
-      <author><first>Maria</first><last>das Gracas Volpe Nunes</last></author>
-      <author><first>Gisele</first><last>Montilha</last></author>
+      <author id="maria-das-gracas-volpe-nunes"><first>Maria</first><last>das Gracas Volpe Nunes</last></author>
+      <author id="gisele-montilha-pinheiro"><first>Gisele</first><last>Montilha</last></author>
       <author><first>Osvaldo Novais</first><last>de Oliveira</last></author>
       <url hash="eeb4d2fb">W00-0204</url>
       <bibkey>martins-etal-2000-interlingua</bibkey>
     </paper>
     <paper id="5">
       <title>Telicity as a Cue to Temporaland Discourse Structure in <fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish Machine Translation</title>
-      <author><first>Mari</first><last>Olsen</last></author>
-      <author><first>David</first><last>Traum</last></author>
-      <author><first>Carol</first><last>Van Ess-Dykema</last></author>
-      <author><first>Amy</first><last>Weinberg</last></author>
+      <author id="mari-broman-olsen"><first>Mari</first><last>Olsen</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
+      <author id="carol-van-ess-dykema"><first>Carol</first><last>Van Ess-Dykema</last></author>
+      <author id="amy-weinberg"><first>Amy</first><last>Weinberg</last></author>
       <author><first>Ron</first><last>Dolan</last></author>
       <url hash="b9c6deea">W00-0205</url>
       <bibkey>olsen-etal-2000-telicity</bibkey>
     </paper>
     <paper id="6">
       <title>An Application of the Interlingua System <fixed-case>ISS</fixed-case> for <fixed-case>S</fixed-case>panish-<fixed-case>E</fixed-case>nglish Pronominal Anaphora Generation</title>
-      <author><first>Jesus</first><last>Peral</last></author>
-      <author><first>Antonio</first><last>Ferrandez</last></author>
+      <author id="jesus-peral"><first>Jesus</first><last>Peral</last></author>
+      <author id="antonio-ferrandez"><first>Antonio</first><last>Ferrandez</last></author>
       <url hash="a4a43a4e">W00-0206</url>
       <bibkey>peral-ferrandez-2000-application</bibkey>
     </paper>
     <paper id="7">
       <title>Generation from Lexical Conceptual Structures</title>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
       <url hash="8c35dab4">W00-0207</url>
       <bibkey>traum-habash-2000-generation</bibkey>
@@ -157,7 +157,7 @@
     </frontmatter>
     <paper id="1">
       <title>Lessons Learned in Building Spoken Language Collaborative Interface Agents</title>
-      <author><first>Candace L.</first><last>Sidner</last></author>
+      <author id="candace-l-sidner"><first>Candace L.</first><last>Sidner</last></author>
       <author><first>Carolyn</first><last>Boettner</last></author>
       <author><first>Charles</first><last>Rich</last></author>
       <url hash="ed571983">W00-0301</url>
@@ -166,7 +166,7 @@
     <paper id="2">
       <title><fixed-case>G</fixed-case>o<fixed-case>D</fixed-case>i<fixed-case>S</fixed-case>- An Accommodating Dialogue System</title>
       <author><first>Staffan</first><last>Larsson</last></author>
-      <author><first>Peter</first><last>Ljunglof</last></author>
+      <author id="peter-ljunglof"><first>Peter</first><last>Ljunglof</last></author>
       <author><first>Robin</first><last>Cooper</last></author>
       <author><first>Elisabet</first><last>Engdahl</last></author>
       <author><first>Stina</first><last>Ericsson</last></author>
@@ -175,17 +175,17 @@
     </paper>
     <paper id="3">
       <title>Dialogue Management in the Mercury Flight Reservation System</title>
-      <author><first>Stephanie</first><last>Seneff</last></author>
-      <author><first>Joseph</first><last>Polifroni</last></author>
+      <author id="stephanie-seneff"><first>Stephanie</first><last>Seneff</last></author>
+      <author id="joseph-polifroni"><first>Joseph</first><last>Polifroni</last></author>
       <url hash="3ee14a7d">W00-0303</url>
       <bibkey>seneff-polifroni-2000-dialogue</bibkey>
     </paper>
     <paper id="4">
       <title><fixed-case>NJF</fixed-case>un- A Reinforcement Learning Spoken Dialogue System</title>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <author><first>Satinder</first><last>Singh</last></author>
-      <author><first>Michael</first><last>Kearns</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="michael-s-kearns"><first>Michael</first><last>Kearns</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <url hash="d4cc5ad7">W00-0304</url>
       <bibkey>litman-etal-2000-njfun</bibkey>
     </paper>
@@ -197,57 +197,57 @@
     </paper>
     <paper id="6">
       <title>Stochastic Language Generation for Spoken Dialogue Systems</title>
-      <author><first>Alice H.</first><last>Oh</last></author>
-      <author><first>Alexander I.</first><last>Rudnicky</last></author>
+      <author id="alice-oh"><first>Alice H.</first><last>Oh</last></author>
+      <author id="alexander-rudnicky"><first>Alexander I.</first><last>Rudnicky</last></author>
       <url hash="216d5910">W00-0306</url>
       <bibkey>oh-rudnicky-2000-stochastic</bibkey>
     </paper>
     <paper id="7">
       <title><fixed-case>TRIPS</fixed-case>- 911 System Demonstration</title>
-      <author><first>James</first><last>Allen</last></author>
-      <author><first>Donna</first><last>Byron</last></author>
+      <author id="james-allen"><first>James</first><last>Allen</last></author>
+      <author id="donna-byron"><first>Donna</first><last>Byron</last></author>
       <author><first>Dave</first><last>Costello</last></author>
-      <author><first>Myroslava</first><last>Dzikovska</last></author>
+      <author id="myroslava-o-dzikovska"><first>Myroslava</first><last>Dzikovska</last></author>
       <author><first>George</first><last>Ferguson</last></author>
       <author><first>Lucian</first><last>Galescu</last></author>
-      <author><first>Amanda</first><last>Stent</last></author>
+      <author id="amanda-stent"><first>Amanda</first><last>Stent</last></author>
       <url hash="9e54ca49">W00-0307</url>
       <bibkey>allen-etal-2000-trips</bibkey>
     </paper>
     <paper id="8">
       <title>Epiphenomenal Grammar Acquisition with <fixed-case>GSG</fixed-case></title>
-      <author><first>Marsal</first><last>Gavalda</last></author>
+      <author id="marsal-gavalda"><first>Marsal</first><last>Gavalda</last></author>
       <url hash="8aedebb8">W00-0308</url>
       <bibkey>gavalda-2000-epiphenomenal</bibkey>
     </paper>
     <paper id="9">
       <title>Task-based dialog management using an agenda</title>
       <author><first>Wei</first><last>Xu</last></author>
-      <author><first>Alexander I.</first><last>Rudnicky</last></author>
+      <author id="alexander-rudnicky"><first>Alexander I.</first><last>Rudnicky</last></author>
       <url hash="3a1617d4">W00-0309</url>
       <bibkey>xu-rudnicky-2000-task</bibkey>
     </paper>
     <paper id="10">
       <title>Using Dialogue Representations for Concept-to-Speech Generation</title>
-      <author><first>Christine H.</first><last>Nakatani</last></author>
-      <author><first>Jennifer</first><last>Chu-Carroll</last></author>
+      <author id="christine-h-nakatani"><first>Christine H.</first><last>Nakatani</last></author>
+      <author id="jennifer-chu-carroll"><first>Jennifer</first><last>Chu-Carroll</last></author>
       <url hash="104658dd">W00-0310</url>
       <bibkey>nakatani-chu-carroll-2000-using</bibkey>
     </paper>
     <paper id="11">
       <title>A Compact Architecture for Dialogue Management Based on Scripts and Meta-Outputs</title>
-      <author><first>Manny</first><last>Rayner</last></author>
-      <author><first>Beth Ann</first><last>Hockey</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
+      <author id="beth-ann-hockey"><first>Beth Ann</first><last>Hockey</last></author>
       <author><first>Frankie</first><last>James</last></author>
       <url hash="bbf7eec5">W00-0311</url>
       <bibkey>rayner-etal-2000-compact-architecture</bibkey>
     </paper>
     <paper id="12">
       <title>Building a Robust Dialogue System with Limited Data</title>
-      <author><first>Sharon J.</first><last>Goldwater</last></author>
+      <author id="sharon-goldwater"><first>Sharon J.</first><last>Goldwater</last></author>
       <author><first>Elizabeth Owen</first><last>Bratt</last></author>
-      <author><first>Jean Mark</first><last>Gawron</last></author>
-      <author><first>John</first><last>Dowding</last></author>
+      <author id="jean-mark-gawron"><first>Jean Mark</first><last>Gawron</last></author>
+      <author id="john-dowding"><first>John</first><last>Dowding</last></author>
       <url hash="98446ba7">W00-0312</url>
       <bibkey>goldwater-etal-2000-building</bibkey>
     </paper>
@@ -271,17 +271,17 @@
     </paper>
     <paper id="2">
       <title>Mining Discourse Markers for <fixed-case>C</fixed-case>hinese Textual Summarization</title>
-      <author><first>Samuel W. K.</first><last>Chan</last></author>
-      <author><first>Tom B. Y.</first><last>Lai</last></author>
+      <author id="samuel-w-k-chan"><first>Samuel W. K.</first><last>Chan</last></author>
+      <author id="tom-b-y-lai"><first>Tom B. Y.</first><last>Lai</last></author>
       <author><first>W. J.</first><last>Gao</last></author>
-      <author><first>Benjamin K.</first><last>T’sou</last></author>
+      <author id="benjamin-k-tsou"><first>Benjamin K.</first><last>T’sou</last></author>
       <url hash="4fb6e79c">W00-0402</url>
       <bibkey>chan-etal-2000-mining</bibkey>
     </paper>
     <paper id="3">
       <title>Centroid-based summarization of multiple documents: sentence extraction, utility-based evaluation, and user studies</title>
-      <author><first>Dragomir R.</first><last>Radev</last></author>
-      <author><first>Hongyan</first><last>Jing</last></author>
+      <author id="dragomir-radev"><first>Dragomir R.</first><last>Radev</last></author>
+      <author id="hongyan-jing"><first>Hongyan</first><last>Jing</last></author>
       <author><first>Malgorzata</first><last>Budzikowska</last></author>
       <revision id="1" href="W00-0403v1" hash="f70b94cd"/>
       <revision id="2" href="W00-0403v2" hash="2435b1cb">No description of the changes were recorded.</revision>
@@ -297,9 +297,9 @@
     </paper>
     <paper id="5">
       <title>Multi-Document Summarization By Sentence Extraction</title>
-      <author><first>Jade</first><last>Goldstein</last></author>
-      <author><first>Vibhu</first><last>Mittal</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
+      <author id="jade-goldstein"><first>Jade</first><last>Goldstein</last></author>
+      <author id="vibhu-o-mittal"><first>Vibhu</first><last>Mittal</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
       <author><first>Mark</first><last>Kantrowitz</last></author>
       <url hash="0ab5efe2">W00-0405</url>
       <bibkey>goldstein-etal-2000-multi</bibkey>
@@ -318,7 +318,7 @@
     <paper id="7">
       <title>Evaluation of Phrase-Representation Summarization based on Information Retrieval Task</title>
       <author><first>Mamiko</first><last>Oka</last></author>
-      <author><first>Yoshihiro</first><last>Ueda</last></author>
+      <author id="yoshihiro-ueda"><first>Yoshihiro</first><last>Ueda</last></author>
       <url hash="12611b1a">W00-0407</url>
       <bibkey>oka-ueda-2000-evaluation</bibkey>
     </paper>
@@ -332,17 +332,17 @@
     </paper>
     <paper id="9">
       <title>Multi-document Summarization by Visualizing Topical Content</title>
-      <author><first>Rie Kubota</first><last>Ando</last></author>
-      <author><first>Branimir K.</first><last>Boguraev</last></author>
+      <author id="rie-johnson"><first>Rie Kubota</first><last>Ando</last></author>
+      <author id="branimir-boguraev"><first>Branimir K.</first><last>Boguraev</last></author>
       <author><first>Roy J.</first><last>Byrd</last></author>
-      <author><first>Mary S.</first><last>Neff</last></author>
+      <author id="mary-s-neff"><first>Mary S.</first><last>Neff</last></author>
       <url hash="6d651ab6">W00-0409</url>
       <bibkey>ando-etal-2000-multi</bibkey>
     </paper>
     <paper id="10">
       <title>Using Summarization for Automatic Briefing Generation</title>
       <author><first>Inderjeet</first><last>Mani</last></author>
-      <author><first>Kristian</first><last>Concepcion</last></author>
+      <author id="kristian-concepcion"><first>Kristian</first><last>Concepcion</last></author>
       <author><first>Linda</first><last>Van Guilder</last></author>
       <url hash="6bffbb30">W00-0410</url>
       <bibkey>mani-etal-2000-using</bibkey>
@@ -360,43 +360,43 @@
     </frontmatter>
     <paper id="1">
       <title>When is an Embedded <fixed-case>MT</fixed-case> System “Good Enough” for Filtering?</title>
-      <author><first>Clare R.</first><last>Voss</last></author>
-      <author><first>Carol</first><last>Van Ess-Dykema</last></author>
+      <author id="clare-voss"><first>Clare R.</first><last>Voss</last></author>
+      <author id="carol-van-ess-dykema"><first>Carol</first><last>Van Ess-Dykema</last></author>
       <url hash="86fbd93a">W00-0501</url>
       <bibkey>voss-van-ess-dykema-2000-embedded</bibkey>
     </paper>
     <paper id="2">
       <title>Task Tolerance of <fixed-case>MT</fixed-case> Output in Integrated Text Processes</title>
-      <author><first>John S.</first><last>White</last></author>
-      <author><first>Jennifer B.</first><last>Doyon</last></author>
-      <author><first>Susan W.</first><last>Talbott</last></author>
+      <author id="john-s-white"><first>John S.</first><last>White</last></author>
+      <author id="jennifer-doyon"><first>Jennifer B.</first><last>Doyon</last></author>
+      <author id="susan-w-talbott"><first>Susan W.</first><last>Talbott</last></author>
       <url hash="b22f9afb">W00-0502</url>
       <bibkey>white-etal-2000-task</bibkey>
     </paper>
     <paper id="3">
       <title>At Your Service: Embedded <fixed-case>MT</fixed-case> As a Service</title>
-      <author><first>Florence M.</first><last>Reeder</last></author>
+      <author id="florence-reeder"><first>Florence M.</first><last>Reeder</last></author>
       <url hash="02e5a87e">W00-0503</url>
       <bibkey>reeder-2000-service</bibkey>
     </paper>
     <paper id="4">
       <title><fixed-case>M</fixed-case>andarin-<fixed-case>E</fixed-case>nglish Information (<fixed-case>MEI</fixed-case>): Investigating Translingual Speech Retrieval</title>
-      <author><first>Helen</first><last>Meng</last></author>
-      <author><first>Sanjeev</first><last>Khudanpur</last></author>
-      <author><first>Gina</first><last>Levow</last></author>
-      <author><first>Douglas W.</first><last>Oard</last></author>
-      <author><first>Hsin-Min</first><last>Wang</last></author>
+      <author id="helen-meng"><first>Helen</first><last>Meng</last></author>
+      <author id="sanjeev-khudanpur"><first>Sanjeev</first><last>Khudanpur</last></author>
+      <author id="gina-anne-levow"><first>Gina</first><last>Levow</last></author>
+      <author id="douglas-w-oard"><first>Douglas W.</first><last>Oard</last></author>
+      <author id="hsin-min-wang"><first>Hsin-Min</first><last>Wang</last></author>
       <url hash="e0a00ee9">W00-0504</url>
       <bibkey>meng-etal-2000-mandarin</bibkey>
     </paper>
     <paper id="5">
       <title>Towards Translingual Information Access using Portable Information Extraction</title>
-      <author><first>Michael</first><last>White</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
-      <author><first>Chung-hye</first><last>Han</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
+      <author id="chung-hye-han"><first>Chung-hye</first><last>Han</last></author>
       <author><first>Nari</first><last>Kim</last></author>
-      <author><first>Benoit</first><last>Lavoie</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="benoit-lavoie"><first>Benoit</first><last>Lavoie</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Owen</first><last>Rainbow</last></author>
       <author><first>Juntae</first><last>Yoon</last></author>
       <url hash="65288f75">W00-0505</url>
@@ -414,7 +414,7 @@
     </paper>
     <paper id="7">
       <title><fixed-case>T</fixed-case>rans<fixed-case>T</fixed-case>ype: a Computer-Aided Translation Typing System</title>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <author><first>George</first><last>Foster</last></author>
       <author><first>Guy</first><last>Lapalme</last></author>
       <url hash="699cfef4">W00-0507</url>
@@ -422,7 +422,7 @@
     </paper>
     <paper id="8">
       <title>Stochastic Finite-State models for Spoken Language Machine Translation</title>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
       <author><first>Giuseppe</first><last>Riccardi</last></author>
       <url hash="feb6cf0d">W00-0508</url>
       <bibkey>bangalore-riccardi-2000-stochastic</bibkey>
@@ -458,13 +458,13 @@
     </paper>
     <paper id="2">
       <title>Some Challenges of Developing Fully-Automated Systems for Taking Audio Comprehension Exams</title>
-      <author><first>David D.</first><last>Palmer</last></author>
+      <author id="david-d-palmer"><first>David D.</first><last>Palmer</last></author>
       <url hash="914cc41c">W00-0602</url>
       <bibkey>palmer-2000-challenges</bibkey>
     </paper>
     <paper id="3">
       <title>A Rule-based Question Answering System for Reading Comprehension Tests</title>
-      <author><first>Ellen</first><last>Riloff</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
       <author><first>Michael</first><last>Thelen</last></author>
       <url hash="bc6c6b4e">W00-0603</url>
       <bibkey>riloff-thelen-2000-rule</bibkey>
@@ -472,7 +472,7 @@
     <paper id="4">
       <title>Answer Extraction Towards better Evaluations of <fixed-case>NLP</fixed-case> Systems</title>
       <author><first>Rolf</first><last>Schwitter</last></author>
-      <author><first>Diego</first><last>Molla</last></author>
+      <author id="diego-molla"><first>Diego</first><last>Molla</last></author>
       <author><first>Rachel</first><last>Fournier</last></author>
       <author><first>Michael</first><last>Hess</last></author>
       <url hash="72269415">W00-0604</url>
@@ -523,14 +523,14 @@
     <paper id="4">
       <title>The Role of Algorithm Bias vs Information Source in Learning Algorithms for Morphosyntactic Disambiguation</title>
       <author><first>Guy</first><last>De Pauw</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <url hash="a4eeb18d">W00-0704</url>
       <bibkey>de-pauw-daelemans-2000-role</bibkey>
     </paper>
     <paper id="5">
       <title>Increasing our Ignorance’ of Language: Identifying Language Structure in an Unknown ‘Signal’</title>
-      <author><first>John</first><last>Elliot</last></author>
-      <author><first>Eric</first><last>Atwell</last></author>
+      <author id="john-elliott"><first>John</first><last>Elliot</last></author>
+      <author id="eric-atwell"><first>Eric</first><last>Atwell</last></author>
       <author><first>Bill</first><last>Whyte</last></author>
       <url hash="897d1389">W00-0705</url>
       <bibkey>elliot-etal-2000-increasing</bibkey>
@@ -538,8 +538,8 @@
     <paper id="6">
       <title>A Comparison between Supervised Learning Algorithms for Word Sense Disambiguation</title>
       <author><first>Gerard</first><last>Escudero</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <url hash="c7dfad4c">W00-0706</url>
       <bibkey>escudero-etal-2000-comparison</bibkey>
     </paper>
@@ -579,13 +579,13 @@
     <paper id="12">
       <title>Knowledge-Free Induction of Morphology Using Latent Semantic Analysis</title>
       <author><first>Patrick</first><last>Schone</last></author>
-      <author><first>Daniel</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Daniel</first><last>Jurafsky</last></author>
       <url hash="1dcdf692">W00-0712</url>
       <bibkey>schone-jurafsky-2000-knowledge</bibkey>
     </paper>
     <paper id="13">
       <title>Using Induced Rules as Complex Features in Memory-Based Language Learning</title>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <url hash="e2e7ccf0">W00-0713</url>
       <bibkey>van-den-bosch-2000-using</bibkey>
     </paper>
@@ -617,14 +617,14 @@
     </paper>
     <paper id="18">
       <title><fixed-case>ALL</fixed-case>i<fixed-case>S</fixed-case>: a Symbolic Learning System for Natural Language Learning</title>
-      <author><first>Hervé</first><last>Déjean</last></author>
+      <author id="herve-dejean"><first>Hervé</first><last>Déjean</last></author>
       <url hash="2d08eee4">W00-0718</url>
       <bibkey>dejean-2000-allis</bibkey>
     </paper>
     <paper id="19">
       <title>Combining Text and Heuristics for Cost-Sensitive Spam Filtering</title>
-      <author><first>José M. Gómez</first><last>Hidalgo</last></author>
-      <author><first>Manual Maña</first><last>López</last></author>
+      <author id="jose-maria-gomez-hidalgo"><first>José M. Gómez</first><last>Hidalgo</last></author>
+      <author id="manuel-j-mana-lopez"><first>Manual Maña</first><last>López</last></author>
       <author><first>Enrique Puertas</first><last>Sanz</last></author>
       <url hash="dcf5b752">W00-0719</url>
       <bibkey>hidalgo-etal-2000-combining</bibkey>
@@ -632,7 +632,7 @@
     <paper id="20">
       <title>Genetic Algorithms for Feature Relevance Assignment in Memory-Based Language Processing</title>
       <author><first>Anne</first><last>Kool</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <author><first>Jakub</first><last>Zavrel</last></author>
       <url hash="1b009af1">W00-0720</url>
       <bibkey>kool-etal-2000-genetic</bibkey>
@@ -648,7 +648,7 @@
       <title>Minimal Commitment and Full Lexical Disambiguation: Balancing Rules and Hidden <fixed-case>M</fixed-case>arkov Models</title>
       <author><first>Patrick</first><last>Ruch</last></author>
       <author><first>Robert</first><last>Baud</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Gilbert</first><last>Robert</last></author>
       <url hash="22f22f05">W00-0722</url>
       <bibkey>ruch-etal-2000-minimal</bibkey>
@@ -656,13 +656,13 @@
     <paper id="23">
       <title>Learning <fixed-case>IE</fixed-case> Rules for a Set of Related Concepts</title>
       <author id="jordi-turmo"><first>J.</first><last>Turmo</last></author>
-      <author><first>H.</first><last>Rodriguez</last></author>
+      <author id="h-rodriguez-hontoria"><first>H.</first><last>Rodriguez</last></author>
       <url hash="29d7a8af">W00-0723</url>
       <bibkey>turmo-rodriguez-2000-learning</bibkey>
     </paper>
     <paper id="24">
       <title>A Default First Order Family Weight Determination Procedure for <fixed-case>WPDV</fixed-case> Models</title>
-      <author><first>Hans</first><last>van Halteren</last></author>
+      <author id="hans-van-halteren"><first>Hans</first><last>van Halteren</last></author>
       <url hash="b1585089">W00-0724</url>
       <bibkey>van-halteren-2000-default</bibkey>
     </paper>
@@ -676,14 +676,14 @@
     </paper>
     <paper id="26">
       <title>Introduction to the <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>-2000 Shared Task Chunking</title>
-      <author><first>Erik F.</first><last>Tjong Kim Sang</last></author>
+      <author id="erik-tjong-kim-sang"><first>Erik F.</first><last>Tjong Kim Sang</last></author>
       <author><first>Sabine</first><last>Buchholz</last></author>
       <url hash="3dfe6e55">W00-0726</url>
       <bibkey>tjong-kim-sang-buchholz-2000-introduction</bibkey>
     </paper>
     <paper id="27">
       <title>Learning Syntactic Structures with <fixed-case>XML</fixed-case></title>
-      <author><first>Hervé</first><last>Déjean</last></author>
+      <author id="herve-dejean"><first>Hervé</first><last>Déjean</last></author>
       <url hash="d1e5f914">W00-0727</url>
       <bibkey>dejean-2000-learning</bibkey>
     </paper>
@@ -701,8 +701,8 @@
     </paper>
     <paper id="30">
       <title>Use of Support Vector Learning for Chunk Identification</title>
-      <author><first>Taku</first><last>Kudoh</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="taku-kudo"><first>Taku</first><last>Kudoh</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <url hash="f5dd1c3b">W00-0730</url>
       <bibkey>kudoh-matsumoto-2000-use</bibkey>
     </paper>
@@ -722,33 +722,33 @@
     </paper>
     <paper id="33">
       <title>Text Chunking by System Combination</title>
-      <author><first>Erik F.</first><last>Tjong Kim Sang</last></author>
+      <author id="erik-tjong-kim-sang"><first>Erik F.</first><last>Tjong Kim Sang</last></author>
       <url hash="ab455e8f">W00-0733</url>
       <bibkey>tjong-kim-sang-2000-text</bibkey>
     </paper>
     <paper id="34">
       <title>Chunking with <fixed-case>WPDV</fixed-case> Models</title>
-      <author><first>Hans</first><last>van Halteren</last></author>
+      <author id="hans-van-halteren"><first>Hans</first><last>van Halteren</last></author>
       <url hash="4dc162db">W00-0734</url>
       <bibkey>van-halteren-2000-chunking</bibkey>
     </paper>
     <paper id="35">
       <title>Single-Classifier Memory-Based Phrase Chunking</title>
       <author><first>Jorn</first><last>Veenstra</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <url hash="49a11dde">W00-0735</url>
       <bibkey>veenstra-van-den-bosch-2000-single</bibkey>
     </paper>
     <paper id="36">
       <title>Phrase Parsing with Rule Sequence Processors: an Application to the Shared <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case> Task</title>
-      <author><first>Marc</first><last>Vilain</last></author>
-      <author><first>David</first><last>Day</last></author>
+      <author id="marc-vilain"><first>Marc</first><last>Vilain</last></author>
+      <author id="david-day"><first>David</first><last>Day</last></author>
       <url hash="e511a3ad">W00-0736</url>
       <bibkey>vilain-day-2000-phrase</bibkey>
     </paper>
     <paper id="37">
       <title>Hybrid Text Chunking</title>
-      <author><first>GuoDong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>GuoDong</first><last>Zhou</last></author>
       <author><first>Jian</first><last>Su</last></author>
       <author><first>TongGuan</first><last>Tey</last></author>
       <url hash="52519424">W00-0737</url>
@@ -772,7 +772,7 @@
     <paper id="40">
       <title>Incorporating Linguistics Constraints into Inductive Logic Programming</title>
       <author><first>James</first><last>Cussens</last></author>
-      <author><first>Stephen</first><last>Pulman</last></author>
+      <author id="stephen-pulman"><first>Stephen</first><last>Pulman</last></author>
       <url hash="62ba31b3">W00-0740</url>
       <bibkey>cussens-pulman-2000-incorporating</bibkey>
     </paper>
@@ -788,8 +788,8 @@
     <paper id="42">
       <title>Inductive Logic Programming for Corpus-Based Acquisition of Semantic Lexicons</title>
       <author><first>Pascale</first><last>Sébillot</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
-      <author><first>Cecile</first><last>Fabre</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="cecile-fabre"><first>Cecile</first><last>Fabre</last></author>
       <url hash="33c90428">W00-0742</url>
       <bibkey>sebillot-etal-2000-inductive</bibkey>
     </paper>
@@ -802,7 +802,7 @@
     <paper id="44">
       <title>Recognition and Tagging of Compound Verb Groups in <fixed-case>C</fixed-case>zech</title>
       <author><first>Eva</first><last>Zácková</last></author>
-      <author><first>Luboš</first><last>Popelínský</last></author>
+      <author id="lubos-popelinsky"><first>Luboš</first><last>Popelínský</last></author>
       <author><first>Miloš</first><last>Nepil</last></author>
       <url hash="23d842b3">W00-0744</url>
       <bibkey>zackova-etal-2000-recognition</bibkey>
@@ -823,7 +823,7 @@
     </frontmatter>
     <paper id="1">
       <title>An Unsupervised Method for Multilingual Word Sense Tagging Using Parallel Corpora</title>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <doi>10.3115/1117724.1117725</doi>
       <pages>1–9</pages>
       <url hash="f3ec75cc">W00-0801</url>
@@ -841,8 +841,8 @@
     </paper>
     <paper id="3">
       <title><fixed-case>C</fixed-case>hinese-<fixed-case>J</fixed-case>apanese Cross Language Information Retrieval: A <fixed-case>H</fixed-case>an Character Based Approach</title>
-      <author><first>Maruf</first><last>Hasan</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="md-maruf-hasan"><first>Maruf</first><last>Hasan</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <doi>10.3115/1117724.1117727</doi>
       <pages>19–26</pages>
       <url hash="2e60b7f0">W00-0803</url>
@@ -850,7 +850,7 @@
     </paper>
     <paper id="4">
       <title>Experiments in Word Domain Disambiguation for Parallel Texts</title>
-      <author><first>Bernardo</first><last>Magnini</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
       <author><first>Carlo</first><last>Strapparava</last></author>
       <doi>10.3115/1117724.1117728</doi>
       <pages>27–33</pages>
@@ -901,7 +901,7 @@
       <title>Comparison between Tagged Corpora for the Named Entity Task</title>
       <author><first>Chikashi</first><last>Nobata</last></author>
       <author><first>Nigel</first><last>Collier</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <doi>10.3115/1117729.1117733</doi>
       <pages>20–27</pages>
       <url hash="1f861732">W00-0904</url>
@@ -910,7 +910,7 @@
     <paper id="5">
       <title>Verb Subcategorization Frequency Differences between Business- News and Balanced Corpora: The Role of Verb Sense</title>
       <author><first>Douglas</first><last>Roland</last></author>
-      <author><first>Daniel</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Daniel</first><last>Jurafsky</last></author>
       <author><first>Lise</first><last>Menn</last></author>
       <author><first>Susanne</first><last>Gahl</last></author>
       <author><first>Elezabeth</first><last>Elder</last></author>
@@ -923,11 +923,11 @@
     <paper id="6">
       <title>Discriminating the registers and styles in the <fixed-case>M</fixed-case>odern <fixed-case>G</fixed-case>reek language</title>
       <author><first>George</first><last>Tambouratzis</last></author>
-      <author><first>Stella</first><last>Markantonatou</last></author>
+      <author id="stella-markantonatou"><first>Stella</first><last>Markantonatou</last></author>
       <author><first>Nikolaos</first><last>Hairetakis</last></author>
       <author><first>Marina</first><last>Vassiliou</last></author>
       <author><first>Dimitrios</first><last>Tambouratzis</last></author>
-      <author><first>George</first><last>Carayannis</last></author>
+      <author id="george-carayannis"><first>George</first><last>Carayannis</last></author>
       <doi>10.3115/1117729.1117735</doi>
       <pages>35–42</pages>
       <url hash="170fbe18">W00-0906</url>
@@ -954,7 +954,7 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>J</fixed-case>apanese Dialogue Corpus of Multi-Level Annotation</title>
-      <author><first>Shu</first><last>Nakazato</last></author>
+      <author id="shu-nakazato"><first>Shu</first><last>Nakazato</last></author>
       <doi>10.3115/1117736.1117737</doi>
       <pages>1–8</pages>
       <url hash="e8929386">W00-1001</url>
@@ -972,8 +972,8 @@
     </paper>
     <paper id="3">
       <title>The <fixed-case>MATE</fixed-case> Markup Framework</title>
-      <author><first>Laila</first><last>Dybkjaer</last></author>
-      <author><first>Niels Ole</first><last>Bernsen</last></author>
+      <author id="laila-dybkjaer"><first>Laila</first><last>Dybkjaer</last></author>
+      <author id="niels-ole-bernsen"><first>Niels Ole</first><last>Bernsen</last></author>
       <doi>10.3115/1117736.1117739</doi>
       <pages>19–28</pages>
       <url hash="452f1827">W00-1003</url>
@@ -981,7 +981,7 @@
     </paper>
     <paper id="4">
       <title>Issues in the Transcription of <fixed-case>E</fixed-case>nglish Conversational Grunts</title>
-      <author><first>Nigel</first><last>Ward</last></author>
+      <author id="nigel-ward"><first>Nigel</first><last>Ward</last></author>
       <doi>10.3115/1117736.1117740</doi>
       <pages>29–35</pages>
       <url hash="3a2c62c0">W00-1004</url>
@@ -1016,7 +1016,7 @@
     </paper>
     <paper id="8">
       <title>Using decision trees to select the grammatical relation of a noun phrase</title>
-      <author><first>Simon</first><last>Corston-Oliver</last></author>
+      <author id="simon-corston-oliver"><first>Simon</first><last>Corston-Oliver</last></author>
       <doi>10.3115/1117736.1117744</doi>
       <pages>66–73</pages>
       <url hash="c6a60b39">W00-1008</url>
@@ -1024,7 +1024,7 @@
     </paper>
     <paper id="9">
       <title>A Common Theory of Information Fusion from Multiple Text Sources Step One: Cross-Document Structure</title>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <doi>10.3115/1117736.1117745</doi>
       <pages>74–83</pages>
       <url hash="a52245a3">W00-1009</url>
@@ -1032,9 +1032,9 @@
     </paper>
     <paper id="10">
       <title>Social Goals in Conversational Cooperation</title>
-      <author><first>Guido</first><last>Boella</last></author>
+      <author id="guido-boella"><first>Guido</first><last>Boella</last></author>
       <author><first>Rossana</first><last>Damiano</last></author>
-      <author><first>Leonardo</first><last>Lesmo</last></author>
+      <author id="leonardo-lesmo"><first>Leonardo</first><last>Lesmo</last></author>
       <doi>10.3115/1117736.1117746</doi>
       <pages>84–93</pages>
       <url hash="4a36213d">W00-1010</url>
@@ -1042,8 +1042,8 @@
     </paper>
     <paper id="11">
       <title>Dynamic User Level and Utility Measurement for Adaptive Dialog in a Help-Desk System</title>
-      <author><first>Preetam</first><last>Maloor</last></author>
-      <author><first>Joyce</first><last>Chai</last></author>
+      <author id="preetam-maloor"><first>Preetam</first><last>Maloor</last></author>
+      <author id="joyce-chai"><first>Joyce</first><last>Chai</last></author>
       <doi>10.3115/1117736.1117747</doi>
       <pages>94–101</pages>
       <url hash="c1818f5b">W00-1011</url>
@@ -1051,8 +1051,8 @@
     </paper>
     <paper id="12">
       <title>Dialogue Management in the Agreement Negotiation Process: A Model that Involves Natural Reasoning</title>
-      <author><first>Mare</first><last>Koit</last></author>
-      <author><first>Haldur</first><last>Oim</last></author>
+      <author id="mare-koit"><first>Mare</first><last>Koit</last></author>
+      <author id="haldur-oim"><first>Haldur</first><last>Oim</last></author>
       <doi>10.3115/1117736.1117748</doi>
       <pages>102–111</pages>
       <url hash="2b4fc0fc">W00-1012</url>
@@ -1070,7 +1070,7 @@
     <paper id="14">
       <title>Dialogue and Domain Knowledge Management in Dialogue Systems</title>
       <author><first>Annika</first><last>Flycht-Eriksson</last></author>
-      <author><first>Arne</first><last>Jonsson</last></author>
+      <author id="arne-jonsson"><first>Arne</first><last>Jonsson</last></author>
       <doi>10.3115/1117736.1117750</doi>
       <pages>121–130</pages>
       <url hash="b8538be4">W00-1014</url>
@@ -1098,7 +1098,7 @@
       <title><fixed-case>WIT</fixed-case>: A Toolkit for Building Robust and Real-Time Spoken Dialogu Systems</title>
       <author><first>Mikio</first><last>Nakano</last></author>
       <author><first>Noboru</first><last>Miyazaki</last></author>
-      <author><first>Norihito</first><last>Yasuda</last></author>
+      <author id="norihito-yasuda"><first>Norihito</first><last>Yasuda</last></author>
       <author><first>Akira</first><last>Sugiyama</last></author>
       <author><first>Jun-ichi</first><last>Hirasawa</last></author>
       <author><first>Kohji</first><last>Dohsaka</last></author>
@@ -1146,7 +1146,7 @@
     </paper>
     <paper id="2">
       <title>Exploiting Lexical Expansions and <fixed-case>B</fixed-case>oolean Compositions for Web Querying</title>
-      <author><first>Bernardo</first><last>Magnini</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
       <author><first>Roberto</first><last>Prevete</last></author>
       <doi>10.3115/1117755.1117758</doi>
       <pages>13–21</pages>
@@ -1156,7 +1156,7 @@
     <paper id="3">
       <title>Use of Dependency Tree Structures for the Microcontext Extraction</title>
       <author><first>Martin</first><last>Holub</last></author>
-      <author><first>Alena</first><last>Bohmova</last></author>
+      <author id="alena-bohmova"><first>Alena</first><last>Bohmova</last></author>
       <doi>10.3115/1117755.1117759</doi>
       <pages>23–33</pages>
       <url hash="992fb471">W00-1103</url>
@@ -1164,8 +1164,8 @@
     </paper>
     <paper id="4">
       <title>Semantic Indexing using <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Senses</title>
-      <author><first>Rada</first><last>Mihalcea</last></author>
-      <author><first>Dan</first><last>Moldovan</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
+      <author id="dan-moldovan"><first>Dan</first><last>Moldovan</last></author>
       <doi>10.3115/1117755.1117760</doi>
       <pages>35–45</pages>
       <url hash="942a7ff3">W00-1104</url>
@@ -1183,8 +1183,8 @@
       <title>Corpus-Based Learning of Compound Noun Indexing</title>
       <author><first>Byung-Kwan</first><last>Kwak</last></author>
       <author><first>Jee-Hyub</first><last>Kim</last></author>
-      <author><first>Geunbae</first><last>Lee</last></author>
-      <author><first>Jung Yun</first><last>Seo</last></author>
+      <author id="gary-geunbae-lee"><first>Geunbae</first><last>Lee</last></author>
+      <author id="jungyun-seo"><first>Jung Yun</first><last>Seo</last></author>
       <doi>10.3115/1117755.1117763</doi>
       <pages>57–66</pages>
       <url hash="e7d0397b">W00-1106</url>
@@ -1201,8 +1201,8 @@
     </paper>
     <paper id="8">
       <title>A Text Categorization Based on a Summarization Extraction</title>
-      <author><first>Sue J.</first><last>Ker</last></author>
-      <author><first>Jen-Nan</first><last>Chen</last></author>
+      <author id="sue-j-ker"><first>Sue J.</first><last>Ker</last></author>
+      <author id="jen-nan-chen"><first>Jen-Nan</first><last>Chen</last></author>
       <doi>10.3115/1117755.1117766</doi>
       <pages>79–83</pages>
       <url hash="638f172d">W00-1108</url>
@@ -1219,7 +1219,7 @@
     </paper>
     <paper id="10">
       <title>Automatic summarization of search engine hit lists</title>
-      <author><first>Dragomir R.</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir R.</first><last>Radev</last></author>
       <author><first>Weiguo</first><last>Fan</last></author>
       <doi>10.3115/1117755.1117768</doi>
       <pages>99–109</pages>
@@ -1247,7 +1247,7 @@
     </frontmatter>
     <paper id="1">
       <title>Two Statistical Parsing Models Applied to the <fixed-case>C</fixed-case>hinese Treebank</title>
-      <author><first>Daniel M.</first><last>Bikel</last></author>
+      <author id="daniel-m-bikel"><first>Daniel M.</first><last>Bikel</last></author>
       <author><first>David</first><last>Chiang</last></author>
       <doi>10.3115/1117769.1117771</doi>
       <pages>1–6</pages>
@@ -1265,7 +1265,7 @@
     </paper>
     <paper id="3">
       <title>Knowledge Extraction for Identification of <fixed-case>C</fixed-case>hinese Organization Names</title>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <author><first>Chao-jan</first><last>Chert</last></author>
       <doi>10.3115/1117769.1117773</doi>
       <pages>15–21</pages>
@@ -1274,7 +1274,7 @@
     </paper>
     <paper id="4">
       <title>Using Co-occurrence Statistics as an Information Source for Partial Parsing of <fixed-case>C</fixed-case>hinese</title>
-      <author><first>Elliott Franco</first><last>Drabek</last></author>
+      <author id="elliott-franco-drabek"><first>Elliott Franco</first><last>Drabek</last></author>
       <author><first>Qjang</first><last>Zhou</last></author>
       <doi>10.3115/1117769.1117774</doi>
       <pages>22–28</pages>
@@ -1284,9 +1284,9 @@
     <paper id="5">
       <title><fixed-case>S</fixed-case>inica <fixed-case>T</fixed-case>reebank: Design Criteria, Annotation Guidelines, and On-line Interface</title>
       <author><first>Chu-Ren</first><last>Huang</last></author>
-      <author><first>Feng-Yi</first><last>Chen</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
-      <author><first>Zhao-ming</first><last>Gao</last></author>
+      <author id="feng-yi-chen"><first>Feng-Yi</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="zhao-ming-gao"><first>Zhao-ming</first><last>Gao</last></author>
       <author><first>Kuang-Yu</first><last>Chen</last></author>
       <doi>10.3115/1117769.1117775</doi>
       <pages>29–37</pages>
@@ -1295,9 +1295,9 @@
     </paper>
     <paper id="6">
       <title>Enhancement of a <fixed-case>C</fixed-case>hinese Discourse Marker Tagger with <fixed-case>C</fixed-case>4.5</title>
-      <author><first>Benjamin K.</first><last>T’sou</last></author>
-      <author><first>Tom B.Y</first><last>Lai</last></author>
-      <author><first>Samuel W.K.</first><last>Chan</last></author>
+      <author id="benjamin-k-tsou"><first>Benjamin K.</first><last>T’sou</last></author>
+      <author id="tom-b-y-lai"><first>Tom B.Y</first><last>Lai</last></author>
+      <author id="samuel-w-k-chan"><first>Samuel W.K.</first><last>Chan</last></author>
       <author><first>Weijun</first><last>Gao</last></author>
       <author><first>Xuegang</first><last>Zhan</last></author>
       <doi>10.3115/1117769.1117776</doi>
@@ -1317,9 +1317,9 @@
     <paper id="8">
       <title>Comparing Lexicalized Treebank Grammars Extracted from <fixed-case>C</fixed-case>hinese, <fixed-case>K</fixed-case>orean, and <fixed-case>E</fixed-case>nglish Corpora</title>
       <author><first>Fei</first><last>Xia</last></author>
-      <author><first>Chunghye</first><last>Han</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
-      <author><first>Aravind</first><last>Joshi</last></author>
+      <author id="chung-hye-han"><first>Chunghye</first><last>Han</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
       <doi>10.3115/1117769.1117778</doi>
       <pages>52–59</pages>
       <url hash="c54e7e71">W00-1208</url>
@@ -1338,7 +1338,7 @@
     <paper id="10">
       <title>A Trainable Method for Extracting <fixed-case>C</fixed-case>hinese Entity Names and Their Relations</title>
       <author><first>Yimin</first><last>Zhang</last></author>
-      <author><first>Joe F.</first><last>Zhou</last></author>
+      <author id="joe-zhou"><first>Joe F.</first><last>Zhou</last></author>
       <doi>10.3115/1117769.1117780</doi>
       <pages>66–72</pages>
       <url hash="bfebff60">W00-1210</url>
@@ -1346,10 +1346,10 @@
     </paper>
     <paper id="11">
       <title>Statistics Based Hybrid Approach to <fixed-case>C</fixed-case>hinese Base Phrase Identification</title>
-      <author><first>Tie-jun</first><last>Zhao</last></author>
-      <author><first>Mu-yun</first><last>Yang</last></author>
+      <author id="tiejun-zhao"><first>Tie-jun</first><last>Zhao</last></author>
+      <author id="muyun-yang"><first>Mu-yun</first><last>Yang</last></author>
       <author><first>Fang</first><last>Liu</last></author>
-      <author><first>Jian-min</first><last>Yao</last></author>
+      <author id="jianmin-yao"><first>Jian-min</first><last>Yao</last></author>
       <author><first>Hao</first><last>Yu</last></author>
       <doi>10.3115/1117769.1117781</doi>
       <pages>73–77</pages>
@@ -1366,8 +1366,8 @@
     </paper>
     <paper id="13">
       <title>Annotating Information Structures in <fixed-case>C</fixed-case>hinese Texts Using <fixed-case>H</fixed-case>ow<fixed-case>N</fixed-case>et</title>
-      <author><first>Kok Wee</first><last>Gan</last></author>
-      <author><first>Ping Wai</first><last>Wong</last></author>
+      <author id="kok-wee-gan"><first>Kok Wee</first><last>Gan</last></author>
+      <author id="ping-wai-wong"><first>Ping Wai</first><last>Wong</last></author>
       <doi>10.3115/1117769.1117784</doi>
       <pages>85–92</pages>
       <url hash="7b418aff">W00-1213</url>
@@ -1377,7 +1377,7 @@
       <title>Machine Learning Methods for <fixed-case>C</fixed-case>hinese Web page Categorization</title>
       <author><first>Ji</first><last>He</last></author>
       <author><first>Ah-Hwee</first><last>Tan</last></author>
-      <author><first>Chew-Lim</first><last>Tan</last></author>
+      <author id="chew-lim-tan"><first>Chew-Lim</first><last>Tan</last></author>
       <doi>10.3115/1117769.1117785</doi>
       <pages>93–100</pages>
       <url hash="114dcfb6">W00-1214</url>
@@ -1385,7 +1385,7 @@
     </paper>
     <paper id="15">
       <title>Semantic Annotation of <fixed-case>C</fixed-case>hinese Phrases Using Recursive Graph</title>
-      <author><first>Donghong</first><last>Ji</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
       <doi>10.3115/1117769.1117786</doi>
       <pages>101–108</pages>
       <url hash="d6df8ee8">W00-1215</url>
@@ -1401,7 +1401,7 @@
     </paper>
     <paper id="17">
       <title>How Should a Large Corpus Be Built?-A Comparative Study of Closure in Annotated Newspaper Corpora from Two <fixed-case>C</fixed-case>hinese Sources, Towards Building a Larger Representative Corpus Merged from Representative Sublanguage Collections</title>
-      <author><first>John J.</first><last>Kovarik</last></author>
+      <author id="john-j-kovarik"><first>John J.</first><last>Kovarik</last></author>
       <doi>10.3115/1117769.1117788</doi>
       <pages>116–123</pages>
       <url hash="7d3177c3">W00-1217</url>
@@ -1411,7 +1411,7 @@
       <title>A Clustering Algorithm for <fixed-case>C</fixed-case>hinese Adjectives and Nouns</title>
       <author><first>Yang</first><last>Wen</last></author>
       <author><first>Chunfa</first><last>Yuan</last></author>
-      <author><first>Changning</first><last>Huang</last></author>
+      <author id="changning-huang"><first>Changning</first><last>Huang</last></author>
       <doi>10.3115/1117769.1117789</doi>
       <pages>124–131</pages>
       <url hash="5debf062">W00-1218</url>
@@ -1480,8 +1480,8 @@
     </paper>
     <paper id="3">
       <title><fixed-case>J</fixed-case>apanese Dependency Structure Analysis Based on Support Vector Machines</title>
-      <author><first>Taku</first><last>Kudo</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="taku-kudo"><first>Taku</first><last>Kudo</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <doi>10.3115/1117794.1117797</doi>
       <pages>18–25</pages>
       <url hash="be9d28b9">W00-1303</url>
@@ -1489,8 +1489,8 @@
     </paper>
     <paper id="4">
       <title>Coaxing Confidences from an Old Freind: Probabilistic Classifications from Transformation Rule Lists</title>
-      <author><first>Radu</first><last>Florian</last></author>
-      <author><first>John C.</first><last>Henderson</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
+      <author id="john-henderson"><first>John C.</first><last>Henderson</last></author>
       <author><first>Grace</first><last>Ngai</last></author>
       <doi>10.3115/1117794.1117798</doi>
       <pages>26–34</pages>
@@ -1517,8 +1517,8 @@
     <paper id="7">
       <title>A Uniform Method of Grammar Extraction and Its Applications</title>
       <author><first>Fei</first><last>Xia</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
-      <author><first>Aravind</first><last>Joshi</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
       <doi>10.3115/1117794.1117801</doi>
       <pages>53–62</pages>
       <url hash="da59bc84">W00-1307</url>
@@ -1527,7 +1527,7 @@
     <paper id="8">
       <title>Enriching the Knowledge Sources Used in a Maximum Entropy Part-of-Speech Tagger</title>
       <author><first>Kristina</first><last>Toutanvoa</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <doi>10.3115/1117794.1117802</doi>
       <pages>63–70</pages>
       <url hash="9360aec1">W00-1308</url>
@@ -1535,7 +1535,7 @@
     </paper>
     <paper id="9">
       <title>Error-driven <fixed-case>HMM</fixed-case>-based Chunk Tagger with Context-dependent Lexicon</title>
-      <author><first>GuoDong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>GuoDong</first><last>Zhou</last></author>
       <author><first>Jian</first><last>Su</last></author>
       <doi>10.3115/1117794.1117803</doi>
       <pages>71–79</pages>
@@ -1566,7 +1566,7 @@
     <paper id="12">
       <title>Cross-lingual Information Retrieval Using Hidden <fixed-case>M</fixed-case>arkov Models</title>
       <author><first>Jinxi</first><last>Xu</last></author>
-      <author><first>Ralph</first><last>Weischedel</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
       <doi>10.3115/1117794.1117806</doi>
       <pages>95–103</pages>
       <url hash="b130fcb7">W00-1312</url>
@@ -1577,7 +1577,7 @@
       <author><first>Yibo</first><last>Zhang</last></author>
       <author><first>Le</first><last>Sun</last></author>
       <author><first>Lin</first><last>Du</last></author>
-      <author><first>Yufang</first><last>Sun</last></author>
+      <author id="yufang-sun"><first>Yufang</first><last>Sun</last></author>
       <doi>10.3115/1117794.1117807</doi>
       <pages>104–109</pages>
       <url hash="b3f44c87">W00-1313</url>
@@ -1588,7 +1588,7 @@
       <author><first>Le</first><last>Sun</last></author>
       <author><first>Youbing</first><last>Jin</last></author>
       <author><first>Lin</first><last>Du</last></author>
-      <author><first>Yufang</first><last>Sun</last></author>
+      <author id="yufang-sun"><first>Yufang</first><last>Sun</last></author>
       <doi>10.3115/1117794.1117808</doi>
       <pages>110–116</pages>
       <url hash="d61c20d3">W00-1314</url>
@@ -1597,7 +1597,7 @@
     <paper id="15">
       <title>Empirical Term Weighting and Expansion Frequency</title>
       <author><first>Kyoji</first><last>Umemura</last></author>
-      <author><first>Kenneth W.</first><last>Church</last></author>
+      <author id="kenneth-church"><first>Kenneth W.</first><last>Church</last></author>
       <doi>10.3115/1117794.1117809</doi>
       <pages>117–123</pages>
       <url hash="b8e79c63">W00-1315</url>
@@ -1616,7 +1616,7 @@
     <paper id="17">
       <title>Automated Construction of Database Interfaces: Intergrating Statistical and Relational Learning for Semantic Parsing</title>
       <author><first>Lappoon R.</first><last>Tang</last></author>
-      <author><first>Raymond J.</first><last>Mooney</last></author>
+      <author id="raymond-mooney"><first>Raymond J.</first><last>Mooney</last></author>
       <doi>10.3115/1117794.1117811</doi>
       <pages>133–141</pages>
       <url hash="4a28ed84">W00-1317</url>
@@ -1624,7 +1624,7 @@
     </paper>
     <paper id="18">
       <title>Automatic <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Mapping Using Word Sense Disambiguation</title>
-      <author><first>Daniel M.</first><last>Bikel</last></author>
+      <author id="daniel-m-bikel"><first>Daniel M.</first><last>Bikel</last></author>
       <doi>10.3115/1117794.1117812</doi>
       <pages>142–147</pages>
       <url hash="2c40a721">W00-1318</url>
@@ -1632,7 +1632,7 @@
     </paper>
     <paper id="19">
       <title>A Real-time Integration Of Concept-based Search and Summarization of <fixed-case>C</fixed-case>hinese Websites</title>
-      <author><first>Joe F.</first><last>Zhou</last></author>
+      <author id="joe-zhou"><first>Joe F.</first><last>Zhou</last></author>
       <author><first>Weiquan</first><last>Liu</last></author>
       <doi>10.3115/1117794.1117813</doi>
       <pages>148–154</pages>
@@ -1641,7 +1641,7 @@
     </paper>
     <paper id="20">
       <title>A Statistical Model for Parsing and Word-Sense Disambiguation</title>
-      <author><first>Daniel M.</first><last>Bikel</last></author>
+      <author id="daniel-m-bikel"><first>Daniel M.</first><last>Bikel</last></author>
       <doi>10.3115/1117794.1117814</doi>
       <pages>155–163</pages>
       <url hash="27143a25">W00-1320</url>
@@ -1649,9 +1649,9 @@
     </paper>
     <paper id="21">
       <title>Reducing Parsing Complexity by Intra-Sentence Segmentation based on Maximum Entropy Model</title>
-      <author><first>Sung Dong</first><last>Kim</last></author>
+      <author id="sung-dong-kim"><first>Sung Dong</first><last>Kim</last></author>
       <author><first>Byoung-Tak</first><last>Zhang</last></author>
-      <author><first>Yung Taek</first><last>Kim</last></author>
+      <author id="yung-taek-kim"><first>Yung Taek</first><last>Kim</last></author>
       <doi>10.3115/1117794.1117815</doi>
       <pages>164–171</pages>
       <url hash="431f01e0">W00-1321</url>
@@ -1660,8 +1660,8 @@
     <paper id="22">
       <title>An Empirical Study of the Domain Dependence of Supervised Word Disambiguation Systems</title>
       <author><first>Gerard</first><last>Escudero</last></author>
-      <author><first>Lluis</first><last>Marquez</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="lluis-marquez"><first>Lluis</first><last>Marquez</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <doi>10.3115/1117794.1117816</doi>
       <pages>172–180</pages>
       <url hash="0d590697">W00-1322</url>
@@ -1687,8 +1687,8 @@
     <paper id="25">
       <title>Statistical Filtering and Subcategorization Frame Acquisition</title>
       <author><first>Anna</first><last>Korhonen</last></author>
-      <author><first>Genevieve</first><last>Gorrell</last></author>
-      <author><first>Diana</first><last>McCarthy</last></author>
+      <author id="genevieve-gorrell"><first>Genevieve</first><last>Gorrell</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
       <doi>10.3115/1117794.1117819</doi>
       <pages>199–206</pages>
       <url hash="0ca751b4">W00-1325</url>
@@ -1696,8 +1696,8 @@
     </paper>
     <paper id="26">
       <title>One Sense per Collocation and Genre/Topic Variations</title>
-      <author><first>David</first><last>Martinez</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="david-martinez"><first>David</first><last>Martinez</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <doi>10.3115/1117794.1117820</doi>
       <pages>207–215</pages>
       <url hash="103fcdd0">W00-1326</url>
@@ -1733,9 +1733,9 @@
     </frontmatter>
     <paper id="1">
       <title>Evaluation Metrics for Generation</title>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
-      <author><first>Steve</first><last>Whittaker</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
+      <author id="steve-whittaker"><first>Steve</first><last>Whittaker</last></author>
       <doi>10.3115/1118253.1118255</doi>
       <pages>1–8</pages>
       <url hash="5ac58877">W00-1401</url>
@@ -1763,7 +1763,7 @@
       <title>Document structure and multilingual authoring</title>
       <author><first>Caroline</first><last>Brun</last></author>
       <author><first>Marc</first><last>Dymetman</last></author>
-      <author><first>Veronika</first><last>Lux</last></author>
+      <author id="veronika-lux"><first>Veronika</first><last>Lux</last></author>
       <doi>10.3115/1118253.1118258</doi>
       <pages>24–31</pages>
       <url hash="44859d59">W00-1404</url>
@@ -1771,9 +1771,9 @@
     </paper>
     <paper id="5">
       <title><fixed-case>DTD</fixed-case>-driven bilingual document generation</title>
-      <author><first>Arantza</first><last>Casillas</last></author>
+      <author id="arantza-casillas"><first>Arantza</first><last>Casillas</last></author>
       <author><first>Joseba</first><last>Abaitua</last></author>
-      <author><first>Raquel</first><last>Martínez</last></author>
+      <author id="raquel-martinez"><first>Raquel</first><last>Martínez</last></author>
       <doi>10.3115/1118253.1118259</doi>
       <pages>32–38</pages>
       <url hash="936e311d">W00-1405</url>
@@ -1793,7 +1793,7 @@
     <paper id="7">
       <title>A strategy for generating evaluative arguments</title>
       <author><first>Giuseppe</first><last>Carenini</last></author>
-      <author><first>Johanna</first><last>Moore</last></author>
+      <author id="johanna-d-moore"><first>Johanna</first><last>Moore</last></author>
       <doi>10.3115/1118253.1118261</doi>
       <pages>47–54</pages>
       <url hash="d1eeda83">W00-1407</url>
@@ -1822,14 +1822,14 @@
     </paper>
     <paper id="10">
       <title>Reinterpretation of an Existing <fixed-case>NLG</fixed-case> System in a Generic Generation Architecture</title>
-      <author><first>Lynne</first><last>Cahill</last></author>
+      <author id="lynne-cahill"><first>Lynne</first><last>Cahill</last></author>
       <author><first>Christy</first><last>Doran</last></author>
-      <author><first>Roger</first><last>Evans</last></author>
-      <author><first>Chris</first><last>Mellish</last></author>
-      <author><first>Daniel</first><last>Paiva</last></author>
-      <author><first>Mike</first><last>Reape</last></author>
-      <author><first>Donia</first><last>Scott</last></author>
-      <author><first>Neil</first><last>Tipper</last></author>
+      <author id="roger-evans"><first>Roger</first><last>Evans</last></author>
+      <author id="chris-mellish"><first>Chris</first><last>Mellish</last></author>
+      <author id="daniel-paiva"><first>Daniel</first><last>Paiva</last></author>
+      <author id="mike-reape"><first>Mike</first><last>Reape</last></author>
+      <author id="donia-scott"><first>Donia</first><last>Scott</last></author>
+      <author id="neil-tipper"><first>Neil</first><last>Tipper</last></author>
       <doi>10.3115/1118253.1118264</doi>
       <pages>69–76</pages>
       <url hash="922a297a">W00-1410</url>
@@ -1837,7 +1837,7 @@
     </paper>
     <paper id="11">
       <title>An integrated framework for text planning and pronominalisation</title>
-      <author><first>Rodger</first><last>Kibble</last></author>
+      <author id="rodger-kibble"><first>Rodger</first><last>Kibble</last></author>
       <author><first>Richard</first><last>Power</last></author>
       <doi>10.3115/1118253.1118265</doi>
       <pages>77–84</pages>
@@ -1847,7 +1847,7 @@
     <paper id="12">
       <title>Incremental Event Conceptualization and Natural Language Generation in Monitoring Enviroments</title>
       <author><first>Markus</first><last>Guhe</last></author>
-      <author><first>Christopher</first><last>Habel</last></author>
+      <author id="christopher-habel"><first>Christopher</first><last>Habel</last></author>
       <author><first>Heike</first><last>Tappe</last></author>
       <doi>10.3115/1118253.1118266</doi>
       <pages>85–92</pages>
@@ -1865,7 +1865,7 @@
     <paper id="14">
       <title>Generating Referring Quantified Expressions</title>
       <author><first>James</first><last>Shaw</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <doi>10.3115/1118253.1118268</doi>
       <pages>100–107</pages>
       <url hash="db3f0671">W00-1414</url>
@@ -1874,7 +1874,7 @@
     <paper id="15">
       <title>An Empirical Analysis of Constructing Non-restrictive <fixed-case>NP</fixed-case> Modifiers to Express Semantic Relations</title>
       <author><first>Hua</first><last>Cheng</last></author>
-      <author><first>Chris</first><last>Mellish</last></author>
+      <author id="chris-mellish"><first>Chris</first><last>Mellish</last></author>
       <doi>10.3115/1118253.1118269</doi>
       <pages>108–115</pages>
       <url hash="3c6cf6d4">W00-1415</url>
@@ -1901,8 +1901,8 @@
       <title>Optimising text quality in generation from relational databases</title>
       <author><first>Michael</first><last>O’Donnell</last></author>
       <author><first>Alistair</first><last>Knott</last></author>
-      <author><first>Jon</first><last>Oberlander</last></author>
-      <author><first>Chris</first><last>Mellish</last></author>
+      <author id="jon-oberlander"><first>Jon</first><last>Oberlander</last></author>
+      <author id="chris-mellish"><first>Chris</first><last>Mellish</last></author>
       <doi>10.3115/1118253.1118272</doi>
       <pages>133–140</pages>
       <url hash="d57d25d8">W00-1418</url>
@@ -1912,7 +1912,7 @@
       <title>Generating a controlled language</title>
       <author><first>Laurence</first><last>Danlos</last></author>
       <author><first>Guy</first><last>Lapalme</last></author>
-      <author><first>Veronika</first><last>Lux</last></author>
+      <author id="veronika-lux"><first>Veronika</first><last>Lux</last></author>
       <doi>10.3115/1118253.1118273</doi>
       <pages>141–147</pages>
       <url hash="b9b376d8">W00-1419</url>
@@ -1941,7 +1941,7 @@
     <paper id="22">
       <title>Enriching partially-specified representations for text realization using an attribute grammar</title>
       <author><first>Songsak</first><last>Channarukul</last></author>
-      <author><first>Susan W.</first><last>McRoy</last></author>
+      <author id="susan-w-mcroy"><first>Susan W.</first><last>McRoy</last></author>
       <author><first>Syed S.</first><last>Ali</last></author>
       <doi>10.3115/1118253.1118276</doi>
       <pages>163–170</pages>
@@ -1960,7 +1960,7 @@
     </paper>
     <paper id="24">
       <title>Generating Vague Descriptions</title>
-      <author><first>Kees</first><last>van Deemter</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
       <doi>10.3115/1118253.1118278</doi>
       <pages>179–185</pages>
       <url hash="4d9f787d">W00-1424</url>
@@ -1969,7 +1969,7 @@
     <paper id="25">
       <title>Capturing the Interaction between Aggregation and Text Planning in Two Generation Systems</title>
       <author><first>Hua</first><last>Cheng</last></author>
-      <author><first>Chris</first><last>Mellish</last></author>
+      <author id="chris-mellish"><first>Chris</first><last>Mellish</last></author>
       <doi>10.3115/1118253.1118279</doi>
       <pages>186–193</pages>
       <url hash="2fe1f3fb">W00-1425</url>
@@ -1979,7 +1979,7 @@
       <title>Can text structure be incompatible with rhetorical structure?</title>
       <author><first>Nadjet</first><last>Bouayad-Agha</last></author>
       <author><first>Richard</first><last>Power</last></author>
-      <author><first>Donia</first><last>Scott</last></author>
+      <author id="donia-scott"><first>Donia</first><last>Scott</last></author>
       <doi>10.3115/1118253.1118280</doi>
       <pages>194–200</pages>
       <url hash="5752c2e3">W00-1426</url>
@@ -1988,7 +1988,7 @@
     <paper id="27">
       <title>Robust, applied morphological generation</title>
       <author><first>Guido</first><last>Minnen</last></author>
-      <author><first>John</first><last>Carroll</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
       <author><first>Darren</first><last>Pearce</last></author>
       <doi>10.3115/1118253.1118281</doi>
       <pages>201–208</pages>
@@ -1997,10 +1997,10 @@
     </paper>
     <paper id="28">
       <title>Integrating a Large-Scale, Reusable Lexicon with a Natural Language Generator</title>
-      <author><first>Hongyan</first><last>Jing</last></author>
-      <author><first>Yael</first><last>Dahan</last></author>
+      <author id="hongyan-jing"><first>Hongyan</first><last>Jing</last></author>
+      <author id="yael-netzer"><first>Yael</first><last>Dahan</last></author>
       <author><first>Michael</first><last>Elhadad</last></author>
-      <author><first>Kathy</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathy</first><last>McKeown</last></author>
       <doi>10.3115/1118253.1118282</doi>
       <pages>209–216</pages>
       <url hash="668721f7">W00-1428</url>
@@ -2042,7 +2042,7 @@
     </paper>
     <paper id="33">
       <title>Rhetorical Structure in Dialog</title>
-      <author><first>Amanda</first><last>Stent</last></author>
+      <author id="amanda-stent"><first>Amanda</first><last>Stent</last></author>
       <doi>10.3115/1118253.1118288</doi>
       <pages>247–252</pages>
       <url hash="f16f1c0d">W00-1433</url>
@@ -2060,8 +2060,8 @@
       <title>Demonstration of <fixed-case>ILEX</fixed-case> 3.0</title>
       <author><first>Michael</first><last>O’Donnell</last></author>
       <author><first>Alistair</first><last>Knott</last></author>
-      <author><first>Jon</first><last>Oberlander</last></author>
-      <author><first>Chris</first><last>Mellish</last></author>
+      <author id="jon-oberlander"><first>Jon</first><last>Oberlander</last></author>
+      <author id="chris-mellish"><first>Chris</first><last>Mellish</last></author>
       <doi>10.3115/1118253.1118291</doi>
       <pages>257–259</pages>
       <url hash="5331fcc3">W00-1435</url>
@@ -2079,7 +2079,7 @@
     </paper>
     <paper id="37">
       <title><fixed-case>YAG</fixed-case>: A Template-Based Generator for Real-Time Systems</title>
-      <author><first>Susan W.</first><last>McRoy</last></author>
+      <author id="susan-w-mcroy"><first>Susan W.</first><last>McRoy</last></author>
       <author><first>Songsak</first><last>Channarukul</last></author>
       <author><first>Syed S.</first><last>Ali</last></author>
       <doi>10.3115/1118253.1118293</doi>
@@ -2090,7 +2090,7 @@
     <paper id="38">
       <title>An Efficient Text Summarizer using Lexical Chains</title>
       <author><first>H. Gregory</first><last>Silber</last></author>
-      <author><first>Kathleen F.</first><last>McCoy</last></author>
+      <author id="kathleen-f-mccoy"><first>Kathleen F.</first><last>McCoy</last></author>
       <doi>10.3115/1118253.1118294</doi>
       <pages>268–271</pages>
       <url hash="8e369754">W00-1438</url>
@@ -2118,7 +2118,7 @@
     <meta>
       <booktitle>Proceedings of the <fixed-case>COLING</fixed-case>-2000 Workshop on Using Toolsets and Architectures To Build <fixed-case>NLP</fixed-case> Systems</booktitle>
       <url hash="8dce192f">W00-15</url>
-      <editor><first>Rémi</first><last>Zajac</last></editor>
+      <editor id="remi-zajac"><first>Rémi</first><last>Zajac</last></editor>
       <publisher>International Committee on Computational Linguistics</publisher>
       <address>Centre Universitaire, Luxembourg</address>
       <month>August</month>
@@ -2131,11 +2131,11 @@
     </frontmatter>
     <paper id="1">
       <title>Experience using <fixed-case>GATE</fixed-case> for <fixed-case>NLP</fixed-case> <fixed-case>R</fixed-case>&amp;<fixed-case>D</fixed-case></title>
-      <author><first>Hamish</first><last>Cunningham</last></author>
+      <author id="hamish-cunningham"><first>Hamish</first><last>Cunningham</last></author>
       <author><first>Diana</first><last>Maynard</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
       <author><first>Valentin</first><last>Tablan</last></author>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <pages>1–8</pages>
       <url hash="be694c17">W00-1501</url>
       <bibkey>cunningham-etal-2000-experience</bibkey>
@@ -2143,18 +2143,18 @@
     <paper id="2">
       <title>Composing a General-Purpose Toolbox for <fixed-case>S</fixed-case>wedish</title>
       <author><first>Fredrik</first><last>Olsson</last></author>
-      <author><first>Björn</first><last>Gambäck</last></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gambäck</last></author>
       <pages>9–18</pages>
       <url hash="2b1e6775">W00-1502</url>
       <bibkey>olsson-gamback-2000-composing</bibkey>
     </paper>
     <paper id="3">
       <title>An Experiment in Unifying Audio-Visual and Textual Infrastructures for Language Processing Research and Development</title>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
-      <author><first>Hennie</first><last>Brugman</last></author>
-      <author><first>Hamish</first><last>Cunningham</last></author>
-      <author><first>Albert</first><last>Russel</last></author>
-      <author><first>Peter</first><last>Wittenburg</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="hennie-brugman"><first>Hennie</first><last>Brugman</last></author>
+      <author id="hamish-cunningham"><first>Hamish</first><last>Cunningham</last></author>
+      <author id="albert-russel"><first>Albert</first><last>Russel</last></author>
+      <author id="peter-wittenburg"><first>Peter</first><last>Wittenburg</last></author>
       <pages>19–25</pages>
       <url hash="9e8536c0">W00-1503</url>
       <bibkey>bontcheva-etal-2000-experiment</bibkey>
@@ -2176,7 +2176,7 @@
     </paper>
     <paper id="6">
       <title>The <fixed-case>XML</fixed-case> Framework and Its Implications for the Development of Natural Language Processing Tools</title>
-      <author><first>Nancy</first><last>Ide</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
       <pages>38–43</pages>
       <url hash="53cf6465">W00-1506</url>
       <bibkey>ide-2000-xml</bibkey>
@@ -2215,8 +2215,8 @@
     <meta>
       <booktitle>Proceedings of the <fixed-case>COLING</fixed-case>-2000 Workshop on Efficiency In Large-Scale Parsing Systems</booktitle>
       <url hash="8aee9a90">W00-16</url>
-      <editor><first>John</first><last>Carroll</last></editor>
-      <editor><first>Robert C.</first><last>Moore</last></editor>
+      <editor id="john-a-carroll"><first>John</first><last>Carroll</last></editor>
+      <editor id="robert-c-moore"><first>Robert C.</first><last>Moore</last></editor>
       <editor><first>Stephan</first><last>Oepen</last></editor>
       <publisher>International Committee on Computational Linguistics</publisher>
       <address>Centre Universitaire, Luxembourg</address>
@@ -2263,14 +2263,14 @@
       <title>Some Experiments on Indicators of Parsing Complexity for Lexicalized Grammars</title>
       <author><first>Anoop</first><last>Sarkar</last></author>
       <author><first>Fei</first><last>Xia</last></author>
-      <author><first>Aravind</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
       <pages>37–42</pages>
       <url hash="5036a91f">W00-1605</url>
       <bibkey>sarkar-etal-2000-experiments</bibkey>
     </paper>
     <paper id="6">
       <title>Large Scale Parsing of <fixed-case>C</fixed-case>zech</title>
-      <author><first>Pavel</first><last>Smrž</last></author>
+      <author id="pavel-smrz"><first>Pavel</first><last>Smrž</last></author>
       <author><first>Aleš</first><last>Horák</last></author>
       <pages>43–50</pages>
       <url hash="e771927b">W00-1606</url>
@@ -2298,7 +2298,7 @@
       <booktitle>Proceedings of the <fixed-case>COLING</fixed-case>-2000 Workshop on Semantic Annotation and Intelligent Content</booktitle>
       <url hash="6768e3b0">W00-17</url>
       <editor><first>Paul</first><last>Buitelaar</last></editor>
-      <editor><first>Kôiti</first><last>Hasida</last></editor>
+      <editor id="koiti-hasida"><first>Kôiti</first><last>Hasida</last></editor>
       <publisher>International Committee on Computational Linguistics</publisher>
       <address>Centre Universitaire, Luxembourg</address>
       <month>August</month>
@@ -2319,8 +2319,8 @@
     </paper>
     <paper id="2">
       <title>Exploring Automatic Word Sense Disambiguation with Decision Lists and the Web</title>
-      <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>David</first><last>Martinez</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
+      <author id="david-martinez"><first>David</first><last>Martinez</last></author>
       <pages>11–19</pages>
       <url hash="e6b2a0a5">W00-1702</url>
       <bibkey>agirre-martinez-2000-exploring</bibkey>
@@ -2329,7 +2329,7 @@
       <title>Improving Natural Language Processing by Linguistic Document Annotation</title>
       <author><first>Hideo</first><last>Watanabe</last></author>
       <author><first>Katashi</first><last>Nagao</last></author>
-      <author><first>Michael</first><last>McCord</last></author>
+      <author id="michael-c-mccord"><first>Michael</first><last>McCord</last></author>
       <author><first>Arendse</first><last>Bernth</last></author>
       <pages>20–27</pages>
       <url hash="6c04b371">W00-1703</url>
@@ -2337,18 +2337,18 @@
     </paper>
     <paper id="4">
       <title>Building an Annotated Corpus in the Molecular-Biology Domain</title>
-      <author><first>Yuka</first><last>Tateisi</last></author>
+      <author id="yuka-tateisi"><first>Yuka</first><last>Tateisi</last></author>
       <author><first>Tomoko</first><last>Ohta</last></author>
       <author><first>Nigel</first><last>Collier</last></author>
       <author><first>Chikashi</first><last>Nobata</last></author>
-      <author><first>Jun-ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun-ichi</first><last>Tsujii</last></author>
       <pages>28–34</pages>
       <url hash="779c82dd">W00-1704</url>
       <bibkey>tateisi-etal-2000-building</bibkey>
     </paper>
     <paper id="5">
       <title>Semantic Annotation for Generation: Issues in Annotating a Corpus to Develop and Evaluate Discourse Entity Realization Algorithms</title>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>37–43</pages>
       <url hash="dba3463b">W00-1705</url>
       <bibkey>poesio-2000-semantic</bibkey>
@@ -2403,7 +2403,7 @@
     <meta>
       <booktitle>Proceedings of the Fifth Workshop of the <fixed-case>ACL</fixed-case> Special Interest Group in Computational Phonology</booktitle>
       <url hash="b5620707">W00-18</url>
-      <editor><first>Jason</first><last>Eisner</last></editor>
+      <editor id="jason-eisner"><first>Jason</first><last>Eisner</last></editor>
       <editor><first>Lauri</first><last>Karttunen</last></editor>
       <editor><first>Alain</first><last>Thèriault</last></editor>
       <publisher>International Committee on Computational Linguistics</publisher>
@@ -2441,14 +2441,14 @@
     <paper id="4">
       <title>Approximation and Exactness in Finite State <fixed-case>O</fixed-case>ptimality <fixed-case>T</fixed-case>heory</title>
       <author><first>Dale</first><last>Gerdemann</last></author>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <pages>34–45</pages>
       <url hash="3c7e80a2">W00-1804</url>
       <bibkey>gerdemann-van-noord-2000-approximation</bibkey>
     </paper>
     <paper id="5">
       <title>Multi-Syllable Phonotactic Modelling</title>
-      <author><first>Anja</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anja</first><last>Belz</last></author>
       <pages>46–56</pages>
       <url hash="80c77b08">W00-1805</url>
       <bibkey>belz-2000-multi</bibkey>
@@ -2465,7 +2465,7 @@
     <meta>
       <booktitle>Proceedings of the <fixed-case>COLING</fixed-case>-2000 Workshop on Linguistically Interpreted Corpora</booktitle>
       <url hash="522772c8">W00-19</url>
-      <editor><first>Anne</first><last>Abeille</last></editor>
+      <editor id="anne-abeille"><first>Anne</first><last>Abeille</last></editor>
       <editor><first>Thorsten</first><last>Brants</last></editor>
       <editor><first>Hans</first><last>Uszkoreit</last></editor>
       <publisher>International Committee on Computational Linguistics</publisher>
@@ -2480,8 +2480,8 @@
     </frontmatter>
     <paper id="1">
       <title>Comparing Linguistic Interpretation Schemes for <fixed-case>E</fixed-case>nglish Corpora</title>
-      <author><first>Eric</first><last>Atwell</last></author>
-      <author><first>George</first><last>Demetriou</last></author>
+      <author id="eric-atwell"><first>Eric</first><last>Atwell</last></author>
+      <author id="george-demetriou"><first>George</first><last>Demetriou</last></author>
       <author><first>John</first><last>Hughes</last></author>
       <author><first>Amanda</first><last>Schiffrin</last></author>
       <author><first>Clive</first><last>Souter</last></author>
@@ -2492,8 +2492,8 @@
     </paper>
     <paper id="2">
       <title>Dependency-based Syntactic Annotation of a Chiense Corpus</title>
-      <author><first>Tom B. Y.</first><last>Lai</last></author>
-      <author><last>Huang</last><first>Changning</first></author>
+      <author id="tom-b-y-lai"><first>Tom B. Y.</first><last>Lai</last></author>
+      <author id="changning-huang"><first>Changning</first><last>Huang</last></author>
       <pages>11–17</pages>
       <url hash="d46eb5dd">W00-1902</url>
       <bibkey>lai-huang-2000-dependency-based</bibkey>
@@ -2544,7 +2544,7 @@
     </paper>
     <paper id="7">
       <title>The Detection of Inconsistency in Manually Tagged Text</title>
-      <author><first>Hans</first><last>van Halteren</last></author>
+      <author id="hans-van-halteren"><first>Hans</first><last>van Halteren</last></author>
       <pages>48–55</pages>
       <url hash="d66475e5">W00-1907</url>
       <bibkey>van-halteren-2000-detection</bibkey>
@@ -2558,16 +2558,16 @@
     </paper>
     <paper id="9">
       <title>Automatic Procedures in Tectogrammatical Tagging</title>
-      <author><first>Alena</first><last>Böhmová</last></author>
-      <author><first>Petr</first><last>Sgall</last></author>
+      <author id="alena-bohmova"><first>Alena</first><last>Böhmová</last></author>
+      <author id="petr-sgall"><first>Petr</first><last>Sgall</last></author>
       <pages>65–70</pages>
       <url hash="a0c82af6">W00-1909</url>
       <bibkey>bohmova-sgall-2000-automatic</bibkey>
     </paper>
     <paper id="10">
       <title>Considering Automatic Aids to Corpus Annotation</title>
-      <author><first>David</first><last>Day</last></author>
-      <author><first>Benjamin</first><last>Wellner</last></author>
+      <author id="david-day"><first>David</first><last>Day</last></author>
+      <author id="ben-wellner"><first>Benjamin</first><last>Wellner</last></author>
       <pages>71–79</pages>
       <url hash="86497b18">W00-1910</url>
       <bibkey>day-wellner-2000-considering</bibkey>
@@ -2588,8 +2588,8 @@
     </frontmatter>
     <paper id="1">
       <title>The current status of <fixed-case>FTAG</fixed-case></title>
-      <author><first>Anne</first><last>Abeillé</last></author>
-      <author><first>Marie-Hélène</first><last>Candito</last></author>
+      <author id="anne-abeille"><first>Anne</first><last>Abeillé</last></author>
+      <author id="marie-candito"><first>Marie-Hélène</first><last>Candito</last></author>
       <author><first>Alexandra</first><last>Kinyon</last></author>
       <pages>11–18</pages>
       <url hash="2f6dc8c9">W00-2001</url>
@@ -2597,8 +2597,8 @@
     </paper>
     <paper id="2">
       <title>A redefinition of Embedded Push-Down Automata</title>
-      <author><first>Miguel A.</first><last>Alonso</last></author>
-      <author><first>Éric</first><last>Villemonte de la Clergerie</last></author>
+      <author id="miguel-a-alonso"><first>Miguel A.</first><last>Alonso</last></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Éric</first><last>Villemonte de la Clergerie</last></author>
       <author><first>Manuel</first><last>Vilares</last></author>
       <pages>19–26</pages>
       <url hash="92313fae">W00-2002</url>
@@ -2606,17 +2606,17 @@
     </paper>
     <paper id="3">
       <title>Practical aspects in compiling tabular <fixed-case>TAG</fixed-case> parsers</title>
-      <author><first>Miguel A.</first><last>Alonso</last></author>
-      <author><first>Djamé</first><last>Seddah</last></author>
-      <author><first>Éric</first><last>Villemonte de la Clergerie</last></author>
+      <author id="miguel-a-alonso"><first>Miguel A.</first><last>Alonso</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Éric</first><last>Villemonte de la Clergerie</last></author>
       <pages>27–32</pages>
       <url hash="b026eae6">W00-2003</url>
       <bibkey>alonso-etal-2000-practical</bibkey>
     </paper>
     <paper id="4">
       <title>Using <fixed-case>TAG</fixed-case>s, a Tree Model, and a Language Model for Generation</title>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>33–40</pages>
       <url hash="350108e4">W00-2004</url>
       <bibkey>bangalore-rambow-2000-using</bibkey>
@@ -2640,11 +2640,11 @@
     </paper>
     <paper id="7">
       <title>Engineering a Wide-Coverage Lexicalized Grammar</title>
-      <author><first>John</first><last>Carroll</last></author>
-      <author><first>Nicolas</first><last>Nicolov</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
+      <author id="nicolas-nicolov"><first>Nicolas</first><last>Nicolov</last></author>
       <author><first>Olga</first><last>Shaumyan</last></author>
       <author><first>Martine</first><last>Smets</last></author>
-      <author><first>David</first><last>Weir</last></author>
+      <author id="david-weir"><first>David</first><last>Weir</last></author>
       <pages>55–60</pages>
       <url hash="2c02e720">W00-2007</url>
       <bibkey>carroll-etal-2000-engineering</bibkey>
@@ -2660,8 +2660,8 @@
     </paper>
     <paper id="9">
       <title>Bidirectional parsing of <fixed-case>TAG</fixed-case> without heads</title>
-      <author><first>Víctor J.</first><last>Díaz</last></author>
-      <author><first>Miguel A.</first><last>Alonso</last></author>
+      <author id="victor-j-diaz"><first>Víctor J.</first><last>Díaz</last></author>
+      <author id="miguel-a-alonso"><first>Miguel A.</first><last>Alonso</last></author>
       <author><first>Vicente</first><last>Carrillo</last></author>
       <pages>67–72</pages>
       <url hash="0f733a08">W00-2009</url>
@@ -2669,14 +2669,14 @@
     </paper>
     <paper id="10">
       <title>Punctuation in a Lexicalized Grammar</title>
-      <author><first>Christine</first><last>Doran</last></author>
+      <author id="christine-doran"><first>Christine</first><last>Doran</last></author>
       <pages>73–78</pages>
       <url hash="153591ad">W00-2010</url>
       <bibkey>doran-2000-punctuation</bibkey>
     </paper>
     <paper id="11">
       <title>A faster parsing algorithm for <fixed-case>L</fixed-case>exicalized <fixed-case>T</fixed-case>ree-<fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars</title>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <author><first>Giorgio</first><last>Satta</last></author>
       <pages>79–84</pages>
       <url hash="e410d84c">W00-2011</url>
@@ -2691,8 +2691,8 @@
     </paper>
     <paper id="13">
       <title>The <fixed-case>S</fixed-case>ino-<fixed-case>K</fixed-case>orean light verb construction and lexical argument structure</title>
-      <author><first>Chung-hye</first><last>Han</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="chung-hye-han"><first>Chung-hye</first><last>Han</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>93–100</pages>
       <url hash="ae5f7819">W00-2013</url>
       <bibkey>han-rambow-2000-sino</bibkey>
@@ -2707,7 +2707,7 @@
     </paper>
     <paper id="15">
       <title>Relationship between strong and weak generative power of formal systems</title>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
       <pages>107–114</pages>
       <url hash="a9c519ba">W00-2015</url>
       <bibkey>joshi-2000-relationship</bibkey>
@@ -2715,7 +2715,7 @@
     <paper id="16">
       <title>An alternative description of extractions in <fixed-case>TAG</fixed-case></title>
       <author><first>Sylvain</first><last>Kahane</last></author>
-      <author><first>Marie-Hélène</first><last>Candito</last></author>
+      <author id="marie-candito"><first>Marie-Hélène</first><last>Candito</last></author>
       <author><first>Yannick</first><last>de Kercadio</last></author>
       <pages>115–122</pages>
       <url hash="58045c1d">W00-2016</url>
@@ -2751,10 +2751,10 @@
     </paper>
     <paper id="21">
       <title>Building a class-based verb lexicon using <fixed-case>TAG</fixed-case>s</title>
-      <author><first>Karin</first><last>Kipper</last></author>
-      <author><first>Hoa Trang</first><last>Dang</last></author>
+      <author id="karin-kipper"><first>Karin</first><last>Kipper</last></author>
+      <author id="hoa-trang-dang"><first>Hoa Trang</first><last>Dang</last></author>
       <author><first>William</first><last>Schuler</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>147–154</pages>
       <url hash="b82d902a">W00-2021</url>
       <bibkey>kipper-etal-2000-building</bibkey>
@@ -2791,8 +2791,8 @@
     </paper>
     <paper id="26">
       <title>A comparison of the <fixed-case>XTAG</fixed-case> and <fixed-case>CLE</fixed-case> Grammars for <fixed-case>E</fixed-case>nglish</title>
-      <author><first>Manny</first><last>Rayner</last></author>
-      <author><first>Beth Ann</first><last>Hockey</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
+      <author id="beth-ann-hockey"><first>Beth Ann</first><last>Hockey</last></author>
       <author><first>Frankie</first><last>James</last></author>
       <pages>185–192</pages>
       <url hash="4711793f">W00-2026</url>
@@ -2809,8 +2809,8 @@
       <title>Lexicalized grammar and the description of motion events</title>
       <author><first>Matthew</first><last>Stone</last></author>
       <author><first>Tonia</first><last>Bleam</last></author>
-      <author><first>Christine</first><last>Doran</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="christine-doran"><first>Christine</first><last>Doran</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>199–206</pages>
       <url hash="42c2b6d8">W00-2028</url>
       <bibkey>stone-etal-2000-lexicalized</bibkey>
@@ -2833,7 +2833,7 @@
     <paper id="31">
       <title>Customizing the <fixed-case>XTAG</fixed-case> system for efficient grammar development for <fixed-case>K</fixed-case>orean</title>
       <author><first>Juntae</first><last>Yoon</last></author>
-      <author><first>Chung-hye</first><last>Han</last></author>
+      <author id="chung-hye-han"><first>Chung-hye</first><last>Han</last></author>
       <author><first>Nari</first><last>Kim</last></author>
       <author><first>Meesook</first><last>Kim</last></author>
       <pages>221–226</pages>
@@ -2842,7 +2842,7 @@
     </paper>
     <paper id="32">
       <title>Deriving polarity effects</title>
-      <author><first>Raffaella</first><last>Bernardi</last></author>
+      <author id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last></author>
       <pages>229–232</pages>
       <url hash="d8654ba0">W00-2032</url>
       <bibkey>bernardi-2000-deriving</bibkey>
@@ -2856,9 +2856,9 @@
     </paper>
     <paper id="34">
       <title>Elementary trees for syntactic and statistical disambiguation</title>
-      <author><first>Rodolfo</first><last>Delmonte</last></author>
-      <author><first>Luminita</first><last>Chiran</last></author>
-      <author><first>Ciprian</first><last>Bacalu</last></author>
+      <author id="rodolfo-delmonte"><first>Rodolfo</first><last>Delmonte</last></author>
+      <author id="luminita-chiran"><first>Luminita</first><last>Chiran</last></author>
+      <author id="ciprian-bacalu"><first>Ciprian</first><last>Bacalu</last></author>
       <pages>237–240</pages>
       <url hash="c002a3d7">W00-2034</url>
       <bibkey>delmonte-etal-2000-elementary</bibkey>
@@ -2912,7 +2912,7 @@
     <paper id="41">
       <title>Comparing and integrating <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars</title>
       <author><first>Fei</first><last>Xia</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>265–268</pages>
       <url hash="dd58583c">W00-2041</url>
       <bibkey>xia-palmer-2000-comparing</bibkey>
diff --git a/data/xml/W01.xml b/data/xml/W01.xml
index dd6e343908..ecf29b50e5 100644
--- a/data/xml/W01.xml
+++ b/data/xml/W01.xml
@@ -51,8 +51,8 @@
     </frontmatter>
     <paper id="1">
       <title>Limitations of Co-Training for Natural Language Learning from Large Datasets</title>
-      <author><first>David</first><last>Pierce</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="david-pierce"><first>David</first><last>Pierce</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <url hash="d35f4977">W01-0501</url>
       <bibkey>pierce-cardie-2001-limitations</bibkey>
     </paper>
@@ -67,7 +67,7 @@
       <title>Learning Within-Sentence Semantic Coherence</title>
       <author><first>Elena</first><last>Eneva</last></author>
       <author><first>Rose</first><last>Hoberman</last></author>
-      <author><first>Lucian</first><last>Lita</last></author>
+      <author id="lucian-vlad-lita"><first>Lucian</first><last>Lita</last></author>
       <url hash="8a5cb582">W01-0503</url>
       <bibkey>eneva-etal-2001-learning</bibkey>
     </paper>
@@ -92,7 +92,7 @@
       <author><first>Ion</first><last>Androutsopoulos</last></author>
       <author><first>Georgios</first><last>Paliouras</last></author>
       <author><first>Vangelis</first><last>Karkaletsis</last></author>
-      <author><first>Constantine D.</first><last>Spyropoulos</last></author>
+      <author id="constantine-d-spyropoulos"><first>Constantine D.</first><last>Spyropoulos</last></author>
       <author><first>Panagiotis</first><last>Stamatopoulos</last></author>
       <url hash="bb3bab02">W01-0506</url>
       <bibkey>sakkis-etal-2001-stacking</bibkey>
@@ -100,14 +100,14 @@
     <paper id="7">
       <title>Feature Space Restructuring for <fixed-case>SVM</fixed-case>s with Application to Text Categorization</title>
       <author><first>Hiroya</first><last>Takamura</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <url hash="c7bcf4cf">W01-0507</url>
       <bibkey>takamura-matsumoto-2001-feature</bibkey>
     </paper>
     <paper id="8">
       <title>Using Bins to Empirically Estimate Term Weights for Text Categorization</title>
       <author><first>Carl</first><last>Sable</last></author>
-      <author><first>Kenneth W.</first><last>Church</last></author>
+      <author id="kenneth-church"><first>Kenneth W.</first><last>Church</last></author>
       <url hash="0dbba488">W01-0508</url>
       <bibkey>sable-church-2001-using</bibkey>
     </paper>
@@ -129,7 +129,7 @@
     <paper id="11">
       <title>Classifying the Semantic Relations in Noun Compounds via a Domain-Specific Lexical Hierarchy</title>
       <author><first>Barbara</first><last>Rosario</last></author>
-      <author><first>Marti</first><last>Hearst</last></author>
+      <author id="marti-a-hearst"><first>Marti</first><last>Hearst</last></author>
       <url hash="8dee90ab">W01-0511</url>
       <bibkey>rosario-hearst-2001-classifying</bibkey>
     </paper>
@@ -144,7 +144,7 @@
     <paper id="13">
       <title>Is Knowledge-Free Induction of Multiword Unit Dictionary Headwords a Solved Problem?</title>
       <author><first>Patrick</first><last>Schone</last></author>
-      <author><first>Daniel</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Daniel</first><last>Jurafsky</last></author>
       <url hash="c79384ec">W01-0513</url>
       <bibkey>schone-jurafsky-2001-knowledge-free</bibkey>
     </paper>
@@ -152,7 +152,7 @@
       <title>Latent Semantic Analysis for Text Segmentation</title>
       <author><first>Freddy Y. Y.</first><last>Choi</last></author>
       <author><first>Peter</first><last>Wiemer-Hastings</last></author>
-      <author><first>Johanna</first><last>Moore</last></author>
+      <author id="johanna-d-moore"><first>Johanna</first><last>Moore</last></author>
       <url hash="130b18c6">W01-0514</url>
       <bibkey>choi-etal-2001-latent</bibkey>
     </paper>
@@ -173,9 +173,9 @@
     </paper>
     <paper id="17">
       <title>Automatic Corpus-based Tone Prediction using K-<fixed-case>T</fixed-case>o<fixed-case>BI</fixed-case> Representation</title>
-      <author><first>Jin-Seok</first><last>Lee</last></author>
+      <author id="jin-seok-lee"><first>Jin-Seok</first><last>Lee</last></author>
       <author><first>Byeongchang</first><last>Kim</last></author>
-      <author><first>Gary Geunbae</first><last>Lee</last></author>
+      <author id="gary-geunbae-lee"><first>Gary Geunbae</first><last>Lee</last></author>
       <url hash="be296b03">W01-0517</url>
       <bibkey>lee-etal-2001-automatic</bibkey>
     </paper>
@@ -187,15 +187,15 @@
     </paper>
     <paper id="19">
       <title>Comparing Data-Driven Learning Algorithms for <fixed-case>P</fixed-case>o<fixed-case>S</fixed-case> Tagging of <fixed-case>S</fixed-case>wedish</title>
-      <author><first>Beáta</first><last>Megyesi</last></author>
+      <author id="beata-megyesi"><first>Beáta</first><last>Megyesi</last></author>
       <url hash="cf8ab7d0">W01-0519</url>
       <bibkey>megyesi-2001-comparing</bibkey>
     </paper>
     <paper id="20">
       <title>Impact of Quality and Quantity of Corpora on Stochastic Generation</title>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
       <author><first>John</first><last>Chen</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <url hash="b961bdeb">W01-0520</url>
       <bibkey>bangalore-etal-2001-impact</bibkey>
     </paper>
@@ -228,7 +228,7 @@
     </frontmatter>
     <paper id="1">
       <title>Multidimensional transformation-based learning</title>
-      <author><first>Radu</first><last>Florian</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
       <author><first>Grace</first><last>Ngai</last></author>
       <url hash="d2b4db9d">W01-0701</url>
       <bibkey>florian-ngai-2001-multidimensional</bibkey>
@@ -236,22 +236,22 @@
     <paper id="2">
       <title>Combining a self-organising map with memory-based learning</title>
       <author><first>James</first><last>Hammerton</last></author>
-      <author><first>Erik F.</first><last>Tjong Kim Sang</last></author>
+      <author id="erik-tjong-kim-sang"><first>Erik F.</first><last>Tjong Kim Sang</last></author>
       <url hash="8a21cb21">W01-0702</url>
       <bibkey>hammerton-tjong-kim-sang-2001-combining</bibkey>
     </paper>
     <paper id="3">
       <title>Learning class-to-class selectional preferences</title>
-      <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>David</first><last>Martinez</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
+      <author id="david-martinez"><first>David</first><last>Martinez</last></author>
       <url hash="d9520558">W01-0703</url>
       <bibkey>agirre-martinez-2001-learning</bibkey>
     </paper>
     <paper id="4">
       <title>Semantic pattern learning through maximum entropy-based <fixed-case>WSD</fixed-case> technique</title>
-      <author><first>Maximiliano</first><last>Saiz-Noeda</last></author>
-      <author><first>Armando</first><last>Suárez</last></author>
-      <author><first>Manuel</first><last>Palomar</last></author>
+      <author id="maximiliano-saiz-noeda"><first>Maximiliano</first><last>Saiz-Noeda</last></author>
+      <author id="armando-suarez"><first>Armando</first><last>Suárez</last></author>
+      <author id="manuel-palomar"><first>Manuel</first><last>Palomar</last></author>
       <url hash="7ec648fc">W01-0704</url>
       <bibkey>saiz-noeda-etal-2001-semantic</bibkey>
     </paper>
@@ -271,15 +271,15 @@
     </paper>
     <paper id="7">
       <title>Probabilistic models for <fixed-case>PP</fixed-case>-attachment resolution and <fixed-case>NP</fixed-case> analysis</title>
-      <author><first>Eric</first><last>Gaussier</last></author>
+      <author id="eric-gaussier"><first>Eric</first><last>Gaussier</last></author>
       <author><first>Nicola</first><last>Cancedda</last></author>
       <url hash="11f81532">W01-0707</url>
       <bibkey>gaussier-cancedda-2001-probabilistic</bibkey>
     </paper>
     <paper id="8">
       <title>Introduction to the <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>-2001 shared task: clause identification</title>
-      <author><first>Erik F.</first><last>Tjong Kim Sang</last></author>
-      <author><first>Hervé</first><last>Déjean</last></author>
+      <author id="erik-tjong-kim-sang"><first>Erik F.</first><last>Tjong Kim Sang</last></author>
+      <author id="herve-dejean"><first>Hervé</first><last>Déjean</last></author>
       <url hash="868cea19">W01-0708</url>
       <bibkey>tjong-kim-sang-dejean-2001-introduction</bibkey>
     </paper>
@@ -298,22 +298,22 @@
     </paper>
     <paper id="11">
       <title><fixed-case>M</fixed-case>orpholog: Constrained and Supervised Learning of Morphology</title>
-      <author><first>Rémi</first><last>Zajac</last></author>
+      <author id="remi-zajac"><first>Rémi</first><last>Zajac</last></author>
       <url hash="7fdc029e">W01-0711</url>
       <bibkey>zajac-2001-morpholog</bibkey>
     </paper>
     <paper id="12">
       <title>Learning Computational Grammars</title>
       <author><first>John</first><last>Nerbonne</last></author>
-      <author><first>Anja</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anja</first><last>Belz</last></author>
       <author><first>Nicola</first><last>Cancedda</last></author>
-      <author><first>Hervé</first><last>Déjean</last></author>
+      <author id="herve-dejean"><first>Hervé</first><last>Déjean</last></author>
       <author><first>James</first><last>Hammerton</last></author>
       <author><first>Rob</first><last>Koeling</last></author>
       <author><first>Stasinos</first><last>Konstantopoulos</last></author>
       <author><first>Miles</first><last>Osborne</last></author>
       <author><first>Franck</first><last>Thollard</last></author>
-      <author><first>Erik F.</first><last>Tjong Kim Sang</last></author>
+      <author id="erik-tjong-kim-sang"><first>Erik F.</first><last>Tjong Kim Sang</last></author>
       <url hash="f37dba0d">W01-0712</url>
       <bibkey>nerbonne-etal-2001-learning</bibkey>
     </paper>
@@ -326,7 +326,7 @@
     <paper id="14">
       <title>Distributional phrase structure induction</title>
       <author><first>Dan</first><last>Klein</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <url hash="64fe3e16">W01-0714</url>
       <bibkey>klein-manning-2001-distributional</bibkey>
     </paper>
@@ -339,8 +339,8 @@
     </paper>
     <paper id="16">
       <title>Learning to identify animate references</title>
-      <author><first>Constantin</first><last>Orasan</last></author>
-      <author><first>Richard</first><last>Evans</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orasan</last></author>
+      <author id="richard-evans"><first>Richard</first><last>Evans</last></author>
       <url hash="c7eef0f6">W01-0716</url>
       <bibkey>orasan-evans-2001-learning</bibkey>
     </paper>
@@ -359,8 +359,8 @@
     <paper id="19">
       <title>Combining linguistic and machine learning techniques for email summarization</title>
       <author><first>Smaranda</first><last>Muresan</last></author>
-      <author><first>Evelyne</first><last>Tzoukermann</last></author>
-      <author><first>Judith L.</first><last>Klavans</last></author>
+      <author id="evelyne-tzoukermann"><first>Evelyne</first><last>Tzoukermann</last></author>
+      <author id="judith-l-klavans"><first>Judith L.</first><last>Klavans</last></author>
       <url hash="15a90744">W01-0719</url>
       <bibkey>muresan-etal-2001-combining</bibkey>
     </paper>
@@ -373,7 +373,7 @@
     </paper>
     <paper id="21">
       <title>Boosted decision graphs for <fixed-case>NLP</fixed-case> learning tasks</title>
-      <author><first>Jon D.</first><last>Patrick</last></author>
+      <author id="jon-patrick"><first>Jon D.</first><last>Patrick</last></author>
       <author><first>Ishaan</first><last>Goyal</last></author>
       <url hash="2948cdc8">W01-0721</url>
       <bibkey>patrick-goyal-2001-boosted</bibkey>
@@ -386,13 +386,13 @@
     </paper>
     <paper id="23">
       <title>Using <fixed-case>ALL</fixed-case>i<fixed-case>S</fixed-case> for clausing</title>
-      <author><first>Hervé</first><last>Déjean</last></author>
+      <author id="herve-dejean"><first>Hervé</first><last>Déjean</last></author>
       <url hash="1d6ca127">W01-0723</url>
       <bibkey>dejean-2001-using</bibkey>
     </paper>
     <paper id="24">
       <title>Memory-based clause identification</title>
-      <author><first>Erik F.</first><last>Tjong Kim Sang</last></author>
+      <author id="erik-tjong-kim-sang"><first>Erik F.</first><last>Tjong Kim Sang</last></author>
       <url hash="210ea360">W01-0724</url>
       <bibkey>tjong-kim-sang-2001-memory</bibkey>
     </paper>
@@ -406,7 +406,7 @@
     <paper id="26">
       <title>Boosting trees for clause splitting</title>
       <author><first>Xavier</first><last>Carreras</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <url hash="9c8d756b">W01-0726</url>
       <bibkey>carreras-marquez-2001-boosting</bibkey>
     </paper>
@@ -417,7 +417,7 @@
       <publisher>Association for Computational Linguistics</publisher>
       <address>Toulouse, France</address>
       <editor><first>Helmut</first><last>Horacek</last></editor>
-      <editor><first>Nicolas</first><last>Nicolov</last></editor>
+      <editor id="nicolas-nicolov"><first>Nicolas</first><last>Nicolov</last></editor>
       <editor><first>Leo</first><last>Wanner</last></editor>
       <year>2001</year>
       <venue>enlg</venue>
@@ -428,13 +428,13 @@
     </frontmatter>
     <paper id="1">
       <title>Corpus-Based Methods in Natural Language Generation: <fixed-case>F</fixed-case>riends or Foe? (invited talk)</title>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <url hash="3ae3b793">W01-0801</url>
       <bibkey>rambow-2001-corpus</bibkey>
     </paper>
     <paper id="2">
       <title>A Two-Staged Model For Content Determination</title>
-      <author><first>Somayajula G.</first><last>Sripada</last></author>
+      <author id="somayajulu-sripada"><first>Somayajula G.</first><last>Sripada</last></author>
       <author><first>Ehud</first><last>Reiter</last></author>
       <author><first>Jim</first><last>Hunter</last></author>
       <author><first>Jin</first><last>Yu</last></author>
@@ -451,14 +451,14 @@
     </paper>
     <paper id="4">
       <title>Logical Form Equivalence: the Case of Referring Expressions Generation</title>
-      <author><first>Kees</first><last>van Deemter</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
       <author><first>Magnús M.</first><last>Halldórsson</last></author>
       <url hash="57be637b">W01-0804</url>
       <bibkey>van-deemter-halldorsson-2001-logical</bibkey>
     </paper>
     <paper id="5">
       <title>A Meta-Algorithm for the Generation of Referring Expressions</title>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <author><first>Sebastiaan</first><last>van Erk</last></author>
       <author><first>André</first><last>Verleg</last></author>
       <url hash="0805a651">W01-0805</url>
@@ -496,9 +496,9 @@
     </paper>
     <paper id="10">
       <title>Linear Order as Higher-Level Decision: Information Structure in Strategic and Tactical Generation</title>
-      <author><first>Geert-Jan M.</first><last>Kruijff</last></author>
-      <author><first>Ivana</first><last>Kruijff-Korbayovà</last></author>
-      <author><first>John</first><last>Bateman</last></author>
+      <author id="geert-jan-m-kruijff"><first>Geert-Jan M.</first><last>Kruijff</last></author>
+      <author id="ivana-kruijff-korbayova"><first>Ivana</first><last>Kruijff-Korbayovà</last></author>
+      <author id="john-bateman"><first>John</first><last>Bateman</last></author>
       <author><first>Elke</first><last>Teich</last></author>
       <url hash="114b9aac">W01-0810</url>
       <bibkey>kruijff-etal-2001-linear</bibkey>
@@ -511,7 +511,7 @@
     </paper>
     <paper id="12">
       <title>Reusing a Statistical Language Model for Generation</title>
-      <author><first>Kevin</first><last>Humphreys</last></author>
+      <author id="kevin-humphreys"><first>Kevin</first><last>Humphreys</last></author>
       <author><first>Mike</first><last>Calcagno</last></author>
       <author><first>David</first><last>Weise</last></author>
       <url hash="c5ed14a1">W01-0812</url>
@@ -520,8 +520,8 @@
     <paper id="13">
       <title>Applying Natural Language Generation to Indicative Summarization</title>
       <author><first>Min-Yen</first><last>Kan</last></author>
-      <author><first>Kathleen R.</first><last>McKeown</last></author>
-      <author><first>Judith L.</first><last>Klavans</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen R.</first><last>McKeown</last></author>
+      <author id="judith-l-klavans"><first>Judith L.</first><last>Klavans</last></author>
       <url hash="9c642d83">W01-0813</url>
       <bibkey>kan-etal-2001-applying</bibkey>
     </paper>
@@ -534,8 +534,8 @@
     </paper>
     <paper id="15">
       <title>Evaluating Text Quality: Judging Output Texts Without a Clear Source</title>
-      <author><first>Anthony</first><last>Hartley</last></author>
-      <author><first>Donia</first><last>Scott</last></author>
+      <author id="anthony-hartley"><first>Anthony</first><last>Hartley</last></author>
+      <author id="donia-scott"><first>Donia</first><last>Scott</last></author>
       <url hash="ed520142">W01-0815</url>
       <bibkey>hartley-scott-2001-evaluating</bibkey>
     </paper>
@@ -552,7 +552,7 @@
     </frontmatter>
     <paper id="1">
       <title>Introduction</title>
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <url hash="7d5d4735">W01-0901</url>
       <bibkey>paroubek-2001-introduction</bibkey>
     </paper>
@@ -564,8 +564,8 @@
     </paper>
     <paper id="3">
       <title>Usability Evaluation in Spoken Language Dialogue Systems</title>
-      <author><first>Laila</first><last>Dybkjær</last></author>
-      <author><first>Niels O.</first><last>Bernsen</last></author>
+      <author id="laila-dybkjaer"><first>Laila</first><last>Dybkjær</last></author>
+      <author id="niels-ole-bernsen"><first>Niels O.</first><last>Bernsen</last></author>
       <url hash="f0da0d8e">W01-0903</url>
       <bibkey>dybkjaer-bernsen-2001-usability</bibkey>
     </paper>
@@ -587,14 +587,14 @@
     <paper id="6">
       <title>Verification and validation of language processing systems: Is it evaluation?</title>
       <author><first>Valerie</first><last>Barr</last></author>
-      <author><first>Judith L.</first><last>Klavans</last></author>
+      <author id="judith-l-klavans"><first>Judith L.</first><last>Klavans</last></author>
       <url hash="dfe2ca1a">W01-0906</url>
       <bibkey>barr-klavans-2001-verification</bibkey>
     </paper>
     <paper id="7">
       <title>The <fixed-case>ARC</fixed-case> A3 Project: Terminology Acquisition Tools: Evaluation Method and Task</title>
-      <author><first>Widad Mustafa El</first><last>Hadi</last></author>
-      <author><first>Ismail</first><last>Timimi</last></author>
+      <author id="widad-mustafa-el-hadi"><first>Widad Mustafa El</first><last>Hadi</last></author>
+      <author id="ismail-timimi"><first>Ismail</first><last>Timimi</last></author>
       <author><first>Annette</first><last>Beguin</last></author>
       <author><first>Marcilio</first><last>de Brito</last></author>
       <url hash="199033df">W01-0907</url>
@@ -608,7 +608,7 @@
     </paper>
     <paper id="9">
       <title>A Cross-Comparison of Two Clustering Methods</title>
-      <author><first>Michele</first><last>Jardino</last></author>
+      <author id="michele-jardino"><first>Michele</first><last>Jardino</last></author>
       <author><first>Brigitte</first><last>Grau</last></author>
       <author><first>Olivier</first><last>Ferret</last></author>
       <url hash="ae9fe8e6">W01-0909</url>
@@ -633,7 +633,7 @@
     </paper>
     <paper id="2">
       <title>Human Language Technologies for Knowledge Management</title>
-      <author><first>Mark</first><last>Maybury</last></author>
+      <author id="mark-t-maybury"><first>Mark</first><last>Maybury</last></author>
       <url hash="a17d0ba4">W01-1002</url>
       <bibkey>maybury-2001-human</bibkey>
     </paper>
@@ -645,21 +645,21 @@
     </paper>
     <paper id="4">
       <title>Using <fixed-case>HLT</fixed-case> for Acquiring, Retrieving and Publishing Knowledge in <fixed-case>AKT</fixed-case></title>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
       <author><first>Christopher</first><last>Brewster</last></author>
-      <author><first>Fabio</first><last>Ciravegna</last></author>
-      <author><first>Hamish</first><last>Cunningham</last></author>
-      <author><first>Louise</first><last>Guthrie</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="fabio-ciravegna"><first>Fabio</first><last>Ciravegna</last></author>
+      <author id="hamish-cunningham"><first>Hamish</first><last>Cunningham</last></author>
+      <author id="louise-guthrie"><first>Louise</first><last>Guthrie</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <url hash="bd5aceae">W01-1004</url>
       <bibkey>bontcheva-etal-2001-using</bibkey>
     </paper>
     <paper id="5">
       <title>Identification of Relevant Terms to Support the Construction of Domain Ontologies</title>
-      <author><first>Paola</first><last>Velardi</last></author>
+      <author id="paola-velardi"><first>Paola</first><last>Velardi</last></author>
       <author><first>Michele</first><last>Missikoff</last></author>
-      <author><first>Roberto</first><last>Basili</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
       <url hash="a1469ac2">W01-1005</url>
       <bibkey>velardi-etal-2001-identification</bibkey>
     </paper>
@@ -673,7 +673,7 @@
     <paper id="7">
       <title>The Form is the Substance: Classification of Genres in Text</title>
       <author><first>Nigel</first><last>Dewdney</last></author>
-      <author><first>Carol</first><last>VanEss-Dykema</last></author>
+      <author id="carol-van-ess-dykema"><first>Carol</first><last>VanEss-Dykema</last></author>
       <author><first>Richard</first><last>MacMillan</last></author>
       <url hash="8e9359d9">W01-1007</url>
       <bibkey>dewdney-etal-2001-form</bibkey>
@@ -700,44 +700,44 @@
     </paper>
     <paper id="11">
       <title><fixed-case>GIST</fixed-case>-<fixed-case>IT</fixed-case>: Combining Linguistic and Machine Learning Techniques for Email Summarization</title>
-      <author><first>Evelyne</first><last>Tzoukermann</last></author>
+      <author id="evelyne-tzoukermann"><first>Evelyne</first><last>Tzoukermann</last></author>
       <author><first>Smaranda</first><last>Muresan</last></author>
-      <author><first>Judith L.</first><last>Klavans</last></author>
+      <author id="judith-l-klavans"><first>Judith L.</first><last>Klavans</last></author>
       <url hash="df1b48d0">W01-1011</url>
       <bibkey>tzoukermann-etal-2001-gist</bibkey>
     </paper>
     <paper id="12">
       <title>What are the points? What are the stances? Decanting for Question-driven Retrieval and Executive Summarization</title>
-      <author><first>Jean-François</first><last>Delannoy</last></author>
+      <author id="jean-francois-delannoy"><first>Jean-François</first><last>Delannoy</last></author>
       <url hash="688da94c">W01-1012</url>
       <bibkey>delannoy-2001-points</bibkey>
     </paper>
     <paper id="13">
       <title>Multilingual Authoring: the <fixed-case>NAMIC</fixed-case> Approach</title>
-      <author><first>Roberto</first><last>Basili</last></author>
-      <author><first>Maria Teresa</first><last>Pazienza</last></author>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last></author>
-      <author><first>Roberta</first><last>Catizone</last></author>
-      <author><first>Andrea</first><last>Setzer</last></author>
-      <author><first>Nick</first><last>Webb</last></author>
-      <author><first>Yorick</first><last>Wilks</last></author>
-      <author><first>Lluís</first><last>Padró</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
+      <author id="maria-teresa-pazienza"><first>Maria Teresa</first><last>Pazienza</last></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last></author>
+      <author id="roberta-catizone"><first>Roberta</first><last>Catizone</last></author>
+      <author id="andrea-setzer"><first>Andrea</first><last>Setzer</last></author>
+      <author id="nick-webb"><first>Nick</first><last>Webb</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
+      <author id="lluis-padro"><first>Lluís</first><last>Padró</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <url hash="03b6fd05">W01-1013</url>
       <bibkey>basili-etal-2001-multilingual</bibkey>
     </paper>
     <paper id="14">
       <title>Automatic Augmentation of Translation Dictionary with Database Terminologies In Multilingual Query Interpretation</title>
       <author><first>Hodong</first><last>Lee</last></author>
-      <author><first>Jong C.</first><last>Park</last></author>
+      <author id="jong-c-park"><first>Jong C.</first><last>Park</last></author>
       <url hash="81cf29e8">W01-1014</url>
       <bibkey>lee-park-2001-automatic</bibkey>
     </paper>
     <paper id="15">
       <title>Adapting and Extending Lexical Resources in a Dialogue System</title>
-      <author><first>Ana</first><last>García-Serrano</last></author>
+      <author id="ana-garcia-serrano"><first>Ana</first><last>García-Serrano</last></author>
       <author><first>Paloma</first><last>Martínez</last></author>
-      <author><first>Luis</first><last>Rodrigo</last></author>
+      <author id="luis-rodrigo-aguado"><first>Luis</first><last>Rodrigo</last></author>
       <url hash="dc0241f6">W01-1015</url>
       <bibkey>garcia-serrano-etal-2001-adapting</bibkey>
     </paper>
@@ -750,14 +750,14 @@
     <paper id="17">
       <title>The Automatic Generation of Formal Annotations in a Multimedia Indexing and Searching Environment</title>
       <author><first>Thierry</first><last>Declerck</last></author>
-      <author><first>Peter</first><last>Wittenburg</last></author>
-      <author><first>Hamish</first><last>Cunningham</last></author>
+      <author id="peter-wittenburg"><first>Peter</first><last>Wittenburg</last></author>
+      <author id="hamish-cunningham"><first>Hamish</first><last>Cunningham</last></author>
       <url hash="126cd5a4">W01-1017</url>
       <bibkey>declerck-etal-2001-automatic</bibkey>
     </paper>
     <paper id="18">
       <title>Human Language Technology and Knowledge Management - Final Roadmap Session</title>
-      <author><first>Niels Ole</first><last>Bernsen</last></author>
+      <author id="niels-ole-bernsen"><first>Niels Ole</first><last>Bernsen</last></author>
       <url hash="72afc50e">W01-1018</url>
       <bibkey>bernsen-2001-human</bibkey>
     </paper>
@@ -784,12 +784,12 @@
     </frontmatter>
     <paper id="1">
       <title>Looking Under the Hood: Tools for Diagnosing Your Question Answering Engine</title>
-      <author><first>Eric</first><last>Breck</last></author>
+      <author id="eric-breck"><first>Eric</first><last>Breck</last></author>
       <author><first>Marc</first><last>Light</last></author>
-      <author><first>Gideon</first><last>Mann</last></author>
-      <author><first>Ellen</first><last>Riloff</last></author>
+      <author id="gideon-mann"><first>Gideon</first><last>Mann</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
       <author><first>Brianne</first><last>Brown</last></author>
-      <author><first>Pranav</first><last>Anand</last></author>
+      <author id="pranav-anand"><first>Pranav</first><last>Anand</last></author>
       <url hash="648af782">W01-1201</url>
       <bibkey>breck-etal-2001-looking</bibkey>
     </paper>
@@ -797,8 +797,8 @@
       <title><fixed-case>MAYA</fixed-case>: A Fast Question-answering System Based on a Predictive Answer Indexer</title>
       <author><first>Harksoo</first><last>Kim</last></author>
       <author><first>Kyungsun</first><last>Kim</last></author>
-      <author><first>Gary Geunbae</first><last>Lee</last></author>
-      <author><first>Jungyun</first><last>Seo</last></author>
+      <author id="gary-geunbae-lee"><first>Gary Geunbae</first><last>Lee</last></author>
+      <author id="jungyun-seo"><first>Jungyun</first><last>Seo</last></author>
       <url hash="8d35a61c">W01-1202</url>
       <bibkey>kim-etal-2001-maya</bibkey>
     </paper>
@@ -810,20 +810,20 @@
     </paper>
     <paper id="4">
       <title>A Statistical Method for Short Answer Extraction</title>
-      <author><first>Gideon</first><last>Mann</last></author>
+      <author id="gideon-mann"><first>Gideon</first><last>Mann</last></author>
       <url hash="99fda4c4">W01-1204</url>
       <bibkey>mann-2001-statistical</bibkey>
     </paper>
     <paper id="5">
       <title>Towards Ontological Question Answering</title>
-      <author><first>Remi</first><last>Zajac</last></author>
+      <author id="remi-zajac"><first>Remi</first><last>Zajac</last></author>
       <url hash="9b178632">W01-1205</url>
       <bibkey>zajac-2001-towards</bibkey>
     </paper>
     <paper id="6">
       <title>Answer Mining from On-Line Documents</title>
-      <author><first>Marius</first><last>Pasca</last></author>
-      <author><first>Sanda</first><last>Harabagiu</last></author>
+      <author id="marius-pasca"><first>Marius</first><last>Pasca</last></author>
+      <author id="sanda-harabagiu"><first>Sanda</first><last>Harabagiu</last></author>
       <url hash="72944bd5">W01-1206</url>
       <bibkey>pasca-harabagiu-2001-answer</bibkey>
     </paper>
@@ -872,9 +872,9 @@
     </paper>
     <paper id="3">
       <title>Towards Invariant Meanings Of Spatial Prepositions and Preverbs</title>
-      <author><first>Jean-Pierre</first><last>Desclés</last></author>
+      <author id="jean-pierre-descles"><first>Jean-Pierre</first><last>Desclés</last></author>
       <author><first>Ewa</first><last>Gwiazdecka</last></author>
-      <author><first>Azucena</first><last>Montes-Rendon</last></author>
+      <author id="azucena-montes-rendon"><first>Azucena</first><last>Montes-Rendon</last></author>
       <url hash="a8368403">W01-1303</url>
       <bibkey>descles-etal-2001-towards</bibkey>
     </paper>
@@ -887,7 +887,7 @@
     <paper id="5">
       <title>A Model For Processing Temporal References In <fixed-case>C</fixed-case>hinese</title>
       <author><first>Wenjie</first><last>Li</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <author><first>Chunfa</first><last>Yuan</last></author>
       <url hash="345fdcb5">W01-1305</url>
       <bibkey>li-etal-2001-model</bibkey>
@@ -915,20 +915,20 @@
     <paper id="9">
       <title>From Temporal Expressions To Temporal Information: Semantic Tagging Of News Messages</title>
       <author><first>Frank</first><last>Schilder</last></author>
-      <author><first>Christopher</first><last>Habel</last></author>
+      <author id="christopher-habel"><first>Christopher</first><last>Habel</last></author>
       <url hash="43618401">W01-1309</url>
       <bibkey>schilder-habel-2001-temporal</bibkey>
     </paper>
     <paper id="10">
       <title>Some Facts About Times, Events and Subjects - Invited Talk</title>
-      <author><first>Fabio</first><last>Pianesi</last></author>
+      <author id="fabio-pianesi"><first>Fabio</first><last>Pianesi</last></author>
       <url hash="5e1033dc">W01-1310</url>
       <bibkey>pianesi-2001-facts</bibkey>
     </paper>
     <paper id="11">
       <title>A Pilot Study On Annotating Temporal Relations In Text</title>
-      <author><first>Andrea</first><last>Setzer</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="andrea-setzer"><first>Andrea</first><last>Setzer</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <url hash="952323b2">W01-1311</url>
       <bibkey>setzer-gaizauskas-2001-pilot</bibkey>
     </paper>
@@ -936,7 +936,7 @@
       <title>A Multilingual Approach To Annotating And Extracting Temporal Information</title>
       <author><first>George</first><last>Wilson</last></author>
       <author><first>Inderjeet</first><last>Mani</last></author>
-      <author><first>Beth</first><last>Sundheim</last></author>
+      <author id="beth-m-sundheim"><first>Beth</first><last>Sundheim</last></author>
       <author><first>Lisa</first><last>Ferro</last></author>
       <url hash="f47fddc8">W01-1312</url>
       <bibkey>wilson-etal-2001-multilingual</bibkey>
@@ -944,7 +944,7 @@
     <paper id="13">
       <title>Assigning Time-Stamps To Event-Clauses</title>
       <author><first>Elena</first><last>Filatova</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <url hash="7a866bc8">W01-1313</url>
       <bibkey>filatova-hovy-2001-assigning</bibkey>
     </paper>
@@ -956,7 +956,7 @@
     </paper>
     <paper id="15">
       <title>The Annotation Of Temporal Information In Natural Language Sentences</title>
-      <author><first>Graham</first><last>Katz</last></author>
+      <author id="graham-katz"><first>Graham</first><last>Katz</last></author>
       <author><first>Fabrizio</first><last>Arosio</last></author>
       <url hash="8e799efb">W01-1315</url>
       <bibkey>katz-arosio-2001-annotation</bibkey>
@@ -974,14 +974,14 @@
     </frontmatter>
     <paper id="1">
       <title>Example-based machine translation using <fixed-case>DP</fixed-case>-matching between work sequences</title>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <url hash="7b2b687c">W01-1401</url>
       <bibkey>sumita-2001-example</bibkey>
     </paper>
     <paper id="2">
       <title>Overcoming the customization bottleneck using example-based <fixed-case>MT</fixed-case></title>
       <author><first>Stephen D.</first><last>Richardson</last></author>
-      <author><first>William B.</first><last>Dolan</last></author>
+      <author id="william-b-dolan"><first>William B.</first><last>Dolan</last></author>
       <author><first>Arul</first><last>Menezes</last></author>
       <author><first>Monica</first><last>Corston-Oliver</last></author>
       <url hash="ae6a9b12">W01-1402</url>
@@ -989,8 +989,8 @@
     </paper>
     <paper id="3">
       <title>Inducing Lexico-Structural Transfer Rules from Parsed Bi-texts</title>
-      <author><first>Benoit</first><last>Lavoie</last></author>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="benoit-lavoie"><first>Benoit</first><last>Lavoie</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <author><first>Tanya</first><last>Korelsky</last></author>
       <url hash="4f1d9cc3">W01-1403</url>
       <bibkey>lavoie-etal-2001-inducing</bibkey>
@@ -1003,7 +1003,7 @@
     </paper>
     <paper id="5">
       <title>Stochastic Modelling: From Pattern Classification to Language Translation</title>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <url hash="58b74c78">W01-1405</url>
       <bibkey>ney-2001-stochastic</bibkey>
     </paper>
@@ -1016,16 +1016,16 @@
     </paper>
     <paper id="7">
       <title>Toward hierarchical models for statistical machine translation of inflected languages</title>
-      <author><first>Sonja</first><last>Niessen</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="sonja-niessen"><first>Sonja</first><last>Niessen</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <url hash="bc261e9f">W01-1407</url>
       <bibkey>niessen-ney-2001-toward</bibkey>
     </paper>
     <paper id="8">
       <title>An Efficient <fixed-case>A</fixed-case>* Search Algorithm for Statistical Machine Translation</title>
-      <author><first>Franz Josef</first><last>Och</last></author>
+      <author id="franz-josef-och"><first>Franz Josef</first><last>Och</last></author>
       <author><first>Nicola</first><last>Ueffing</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <url hash="8477242d">W01-1408</url>
       <bibkey>och-etal-2001-efficient</bibkey>
     </paper>
@@ -1037,20 +1037,20 @@
     </paper>
     <paper id="10">
       <title>Machine Translation with Grammar Association: Some Improvements and the <fixed-case>L</fixed-case>oco_<fixed-case>C</fixed-case> Model</title>
-      <author><first>Federico</first><last>Prat</last></author>
+      <author id="federico-prat"><first>Federico</first><last>Prat</last></author>
       <url hash="02f60f72">W01-1410</url>
       <bibkey>prat-2001-machine</bibkey>
     </paper>
     <paper id="11">
       <title>Towards a Simple and Accurate Statistical Approach to Learning Translation Relationships among Words</title>
-      <author><first>Robert C.</first><last>Moore</last></author>
+      <author id="robert-c-moore"><first>Robert C.</first><last>Moore</last></author>
       <url hash="1b064210">W01-1411</url>
       <bibkey>moore-2001-towards</bibkey>
     </paper>
     <paper id="12">
       <title>A Comparative Study on Translation Units for Bilingual Lexicon Extraction</title>
       <author><first>Kaoru</first><last>Yamamoto</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <author><first>Mihoko</first><last>Kitamura</last></author>
       <url hash="7d72b662">W01-1412</url>
       <bibkey>yamamoto-etal-2001-comparative</bibkey>
@@ -1092,7 +1092,7 @@
     </frontmatter>
     <paper id="1">
       <title>Preface</title>
-      <author><first>Mike</first><last>Rosner</last></author>
+      <author id="michael-rosner"><first>Mike</first><last>Rosner</last></author>
       <url hash="8f137f68">W01-1501</url>
       <bibkey>rosner-2001-preface</bibkey>
     </paper>
@@ -1104,8 +1104,8 @@
     </paper>
     <paper id="3">
       <title>The <fixed-case>TELRI</fixed-case> tool catalogue: structure and prospects</title>
-      <author><first>Tomaž</first><last>Erjavec</last></author>
-      <author><first>Tamás</first><last>Váradi</last></author>
+      <author id="tomaz-erjavec"><first>Tomaž</first><last>Erjavec</last></author>
+      <author id="tamas-varadi"><first>Tamás</first><last>Váradi</last></author>
       <url hash="243f30b1">W01-1503</url>
       <bibkey>erjavec-varadi-2001-telri</bibkey>
     </paper>
@@ -1117,7 +1117,7 @@
     </paper>
     <paper id="5">
       <title><fixed-case>S</fixed-case>i<fixed-case>SSA</fixed-case> - An Infrastructure for <fixed-case>NLP</fixed-case> Application Development</title>
-      <author><first>Alberto</first><last>Lavelli</last></author>
+      <author id="alberto-lavelli"><first>Alberto</first><last>Lavelli</last></author>
       <author id="fabio-pianesi"><first>F.</first><last>Pianesi</last></author>
       <author><first>E.</first><last>Maci</last></author>
       <author id="irina-prodanof"><first>I.</first><last>Prodanof</last></author>
@@ -1135,26 +1135,26 @@
     </paper>
     <paper id="7">
       <title>International Standards for Multilingual Resource Sharing: The <fixed-case>ISLE</fixed-case> Computational Lexicon Working Group</title>
-      <author><first>Nicoletta</first><last>Calzolari</last></author>
+      <author id="nicoletta-calzolari"><first>Nicoletta</first><last>Calzolari</last></author>
       <author><first>Alessandro</first><last>Lenci</last></author>
-      <author><first>Antonio</first><last>Zampolli</last></author>
+      <author id="antonio-zampolli"><first>Antonio</first><last>Zampolli</last></author>
       <url hash="7b4ff8de">W01-1507</url>
       <bibkey>calzolari-etal-2001-international</bibkey>
     </paper>
     <paper id="8">
       <title>Multimedia Language Resources</title>
-      <author><first>Daan</first><last>Broeder</last></author>
-      <author><first>Peter</first><last>Wittenburg</last></author>
+      <author id="daan-broeder"><first>Daan</first><last>Broeder</last></author>
+      <author id="peter-wittenburg"><first>Peter</first><last>Wittenburg</last></author>
       <url hash="cd66350a">W01-1508</url>
       <bibkey>broeder-wittenburg-2001-multimedia</bibkey>
     </paper>
     <paper id="9">
       <title>Tools and resources for <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars</title>
-      <author><first>François</first><last>Barthélemy</last></author>
+      <author id="francois-barthelemy"><first>François</first><last>Barthélemy</last></author>
       <author><first>Pierre</first><last>Bouiller</last></author>
       <author><first>Philippe</first><last>Deschamp</last></author>
       <author><first>Linda</first><last>Kaouane</last></author>
-      <author><first>Éric</first><last>Villemonte de la Clergerie</last></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Éric</first><last>Villemonte de la Clergerie</last></author>
       <url hash="b598f7b2">W01-1509</url>
       <bibkey>barthelemy-etal-2001-tools</bibkey>
     </paper>
@@ -1163,14 +1163,14 @@
       <author><first>Naoki</first><last>Yoshinaga</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
       <author><first>Kentaro</first><last>Torisawa</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <url hash="0feaf586">W01-1510</url>
       <bibkey>yoshinaga-etal-2001-resource</bibkey>
     </paper>
     <paper id="11">
       <title>Covering Treebanks with <fixed-case>GLARF</fixed-case></title>
       <author id="adam-meyers"><first>A.</first><last>Meyers</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <author><first>Michiko</first><last>Kosaka</last></author>
       <author><first>Shubin</first><last>Zhao</last></author>
       <url hash="c9bed067">W01-1511</url>
@@ -1179,9 +1179,9 @@
     <paper id="12">
       <title>Using an Open-Source Unification-Based System for <fixed-case>CL</fixed-case>/<fixed-case>NLP</fixed-case> Teaching</title>
       <author><first>Anne</first><last>Copestake</last></author>
-      <author><first>John</first><last>Carroll</last></author>
-      <author><first>Dan</first><last>Flickinger</last></author>
-      <author><first>Robert</first><last>Malouf</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
+      <author id="dan-flickinger"><first>Dan</first><last>Flickinger</last></author>
+      <author id="robert-malouf"><first>Robert</first><last>Malouf</last></author>
       <author><first>Stephan</first><last>Oepen</last></author>
       <url hash="8d95d8ae">W01-1512</url>
       <bibkey>copestake-etal-2001-using</bibkey>
@@ -1196,7 +1196,7 @@
     </paper>
     <paper id="14">
       <title>Annotation Graphs and Servers and Multi-Modal Resources: Infrastructure for Interdisciplinary Education, Research and Development</title>
-      <author><first>Christopher</first><last>Cieri</last></author>
+      <author id="christopher-cieri"><first>Christopher</first><last>Cieri</last></author>
       <author><first>Steven</first><last>Bird</last></author>
       <url hash="988e25c6">W01-1514</url>
       <bibkey>cieri-bird-2001-annotation</bibkey>
@@ -1223,22 +1223,22 @@
     <paper id="1">
       <title>Annotations and Tools for an Activity Based Spoken Language Corpus</title>
       <author><first>Jens</first><last>Allwood</last></author>
-      <author><first>Leif</first><last>Groenqvist</last></author>
-      <author><first>Elisabeth</first><last>Ahlsen</last></author>
+      <author id="leif-groenqvist"><first>Leif</first><last>Groenqvist</last></author>
+      <author id="elisabeth-ahlsen"><first>Elisabeth</first><last>Ahlsen</last></author>
       <author><first>Magnus</first><last>Gunnarsson</last></author>
       <url hash="35b9888d">W01-1601</url>
       <bibkey>allwood-etal-2001-annotations</bibkey>
     </paper>
     <paper id="2">
       <title>Variant Transduction: A Method for Rapid Development of Interactive Spoken Interfaces</title>
-      <author><first>Hiyan</first><last>Alshawi</last></author>
+      <author id="hiyan-alshawi"><first>Hiyan</first><last>Alshawi</last></author>
       <author><first>Shona</first><last>Douglas</last></author>
       <url hash="61b5c2de">W01-1602</url>
       <bibkey>alshawi-douglas-2001-variant</bibkey>
     </paper>
     <paper id="3">
       <title>Development of a Machine Learnable Discourse Tagging Tool</title>
-      <author><first>Masahiro</first><last>Araki</last></author>
+      <author id="masahiro-araki"><first>Masahiro</first><last>Araki</last></author>
       <author><first>Yukihiko</first><last>Kimura</last></author>
       <author><first>Takuya</first><last>Nishimoto</last></author>
       <author><first>Yasuhisa</first><last>Niimi</last></author>
@@ -1267,10 +1267,10 @@
     </paper>
     <paper id="7">
       <title>Comparing Several Aspects of Human-Computer and Human-Human Dialogues</title>
-      <author><first>Christine</first><last>Doran</last></author>
+      <author id="christine-doran"><first>Christine</first><last>Doran</last></author>
       <author><first>John</first><last>Aberdeen</last></author>
-      <author><first>Laurie</first><last>Damianos</last></author>
-      <author><first>Lynette</first><last>Hirschman</last></author>
+      <author id="laurie-damianos"><first>Laurie</first><last>Damianos</last></author>
+      <author id="lynette-hirschman"><first>Lynette</first><last>Hirschman</last></author>
       <url hash="e65de2f6">W01-1607</url>
       <bibkey>doran-etal-2001-comparing</bibkey>
     </paper>
@@ -1279,7 +1279,7 @@
       <author><first>Christian</first><last>Ebert</last></author>
       <author><first>Shalom</first><last>Lappin</last></author>
       <author><first>Howard</first><last>Gregory</last></author>
-      <author><first>Nicolas</first><last>Nicolov</last></author>
+      <author id="nicolas-nicolov"><first>Nicolas</first><last>Nicolov</last></author>
       <url hash="5a9f89ec">W01-1608</url>
       <bibkey>ebert-etal-2001-generating</bibkey>
     </paper>
@@ -1295,22 +1295,22 @@
     </paper>
     <paper id="10">
       <title>Labeling Corrections and Aware Sites in Spoken Dialogue Systems</title>
-      <author><first>Julia</first><last>Hirschberg</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
       <author><first>Marc</first><last>Swerts</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <url hash="9bacbb9b">W01-1610</url>
       <bibkey>hirschberg-etal-2001-labeling</bibkey>
     </paper>
     <paper id="11">
       <title>Confidence-Based Adaptivity in Response Generation for a Spoken Dialogue System</title>
-      <author><first>Kristiina</first><last>Jokinen</last></author>
-      <author><first>Graham</first><last>Wilcock</last></author>
+      <author id="kristiina-jokinen"><first>Kristiina</first><last>Jokinen</last></author>
+      <author id="graham-wilcock"><first>Graham</first><last>Wilcock</last></author>
       <url hash="71813d6d">W01-1611</url>
       <bibkey>jokinen-wilcock-2001-confidence</bibkey>
     </paper>
     <paper id="12">
       <title>Annotating Anaphoric and Bridging Relations with <fixed-case>MMAX</fixed-case></title>
-      <author><first>Christoph</first><last>Mueller</last></author>
+      <author id="christoph-muller"><first>Christoph</first><last>Mueller</last></author>
       <author><first>Michael</first><last>Strube</last></author>
       <url hash="2c0f7f43">W01-1612</url>
       <bibkey>mueller-strube-2001-annotating</bibkey>
@@ -1329,8 +1329,8 @@
     </paper>
     <paper id="15">
       <title>Integration of Referential Scope Limitations into <fixed-case>J</fixed-case>apanese Pronoun Resolution</title>
-      <author><first>Michael</first><last>Paul</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="michael-paul"><first>Michael</first><last>Paul</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <url hash="d718a120">W01-1615</url>
       <bibkey>paul-sumita-2001-integration</bibkey>
     </paper>
@@ -1338,43 +1338,43 @@
       <title>On the Means for Clarification in Dialogue</title>
       <author><first>Matthew</first><last>Purver</last></author>
       <author><first>Jonathan</first><last>Ginzburg</last></author>
-      <author><first>Patrick</first><last>Healey</last></author>
+      <author id="patrick-healey"><first>Patrick</first><last>Healey</last></author>
       <url hash="5a6edb21">W01-1616</url>
       <bibkey>purver-etal-2001-means</bibkey>
     </paper>
     <paper id="17">
       <title>Plug and Play Speech Understanding</title>
-      <author><first>Manny</first><last>Rayner</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
       <author><first>Ian</first><last>Lewin</last></author>
-      <author><first>Genevieve</first><last>Gorrell</last></author>
+      <author id="genevieve-gorrell"><first>Genevieve</first><last>Gorrell</last></author>
       <author><first>Johan</first><last>Boye</last></author>
       <url hash="4ae2d35d">W01-1617</url>
       <bibkey>rayner-etal-2001-plug</bibkey>
     </paper>
     <paper id="18">
       <title>Designing Confirmation Mechanisms and Error Recover Techniques in a Railway Information System for <fixed-case>S</fixed-case>panish</title>
-      <author><first>Ruben</first><last>San-Segundo</last></author>
-      <author><first>Juan Manuel</first><last>Montero</last></author>
-      <author><first>Jose Manuel</first><last>Pardo</last></author>
+      <author id="ruben-san-segundo"><first>Ruben</first><last>San-Segundo</last></author>
+      <author id="juan-m-montero"><first>Juan Manuel</first><last>Montero</last></author>
+      <author id="jose-manuel-pardo"><first>Jose Manuel</first><last>Pardo</last></author>
       <url hash="95834bfa">W01-1618</url>
       <bibkey>san-segundo-etal-2001-designing</bibkey>
     </paper>
     <paper id="19">
       <title>A Telephone-Based Railway Information System for <fixed-case>S</fixed-case>panish: Development of a Methodology for Spoken Dialogue Design</title>
-      <author><first>Ruben</first><last>San-Segundo</last></author>
-      <author><first>Juan M.</first><last>Montero</last></author>
+      <author id="ruben-san-segundo"><first>Ruben</first><last>San-Segundo</last></author>
+      <author id="juan-m-montero"><first>Juan M.</first><last>Montero</last></author>
       <author><first>Juana M.</first><last>Guitierrez</last></author>
-      <author><first>Ascension</first><last>Gallardo</last></author>
+      <author id="ascension-gallardo-antolin"><first>Ascension</first><last>Gallardo</last></author>
       <author><first>Jose D.</first><last>Romeral</last></author>
-      <author><first>Jose M.</first><last>Pardo</last></author>
+      <author id="jose-manuel-pardo"><first>Jose M.</first><last>Pardo</last></author>
       <url hash="36c1d538">W01-1619</url>
       <bibkey>san-segundo-etal-2001-telephone</bibkey>
     </paper>
     <paper id="20">
       <title>A Hybrid Approach to the Development of Dialogue Systems directed by Semantics</title>
-      <author><first>Emilio</first><last>Sanchis</last></author>
+      <author id="emilio-sanchis"><first>Emilio</first><last>Sanchis</last></author>
       <author><first>Isabel</first><last>Galiano</last></author>
-      <author><first>Fernando</first><last>Garcia</last></author>
+      <author id="fernando-garcia"><first>Fernando</first><last>Garcia</last></author>
       <author><first>Antonio</first><last>Cano</last></author>
       <url hash="93b51b7e">W01-1620</url>
       <bibkey>sanchis-etal-2001-hybrid</bibkey>
@@ -1382,13 +1382,13 @@
     <paper id="21">
       <title>Reconciling Initiative and Discourse Structure</title>
       <author><first>Susan E.</first><last>Strayer</last></author>
-      <author><first>Peter A.</first><last>Heeman</last></author>
+      <author id="peter-a-heeman"><first>Peter A.</first><last>Heeman</last></author>
       <url hash="f54b760d">W01-1621</url>
       <bibkey>strayer-heeman-2001-reconciling</bibkey>
     </paper>
     <paper id="22">
       <title>Adding Extra Input/Output Modalities to a Spoken Dialogue System</title>
-      <author><first>Janienke</first><last>Sturm</last></author>
+      <author id="janienke-sturm"><first>Janienke</first><last>Sturm</last></author>
       <author><first>Fusi</first><last>Wang</last></author>
       <author><first>Bert</first><last>Cranen</last></author>
       <url hash="1a977d56">W01-1622</url>
@@ -1415,17 +1415,17 @@
     </paper>
     <paper id="26">
       <title>A Corpus Study of Evaluative and Speculative Language</title>
-      <author><first>Janyce</first><last>Wiebe</last></author>
-      <author><first>Rebecca</first><last>Bruce</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
+      <author id="rebecca-bruce"><first>Rebecca</first><last>Bruce</last></author>
       <author><first>Matthew</first><last>Bell</last></author>
-      <author><first>Melanie</first><last>Martin</last></author>
+      <author id="melanie-martin"><first>Melanie</first><last>Martin</last></author>
       <author><first>Theresa</first><last>Wilson</last></author>
       <url hash="9d04267e">W01-1626</url>
       <bibkey>wiebe-etal-2001-corpus</bibkey>
     </paper>
     <paper id="27">
       <title>Dialogue Tagsets in Oncology</title>
-      <author><first>Mary McGee</first><last>Wood</last></author>
+      <author id="mary-mcgee-wood"><first>Mary McGee</first><last>Wood</last></author>
       <url hash="8f51e678">W01-1627</url>
       <bibkey>wood-2001-dialogue</bibkey>
     </paper>
@@ -1437,7 +1437,7 @@
     </paper>
     <paper id="29">
       <title>Spoken Dialogue Control Based on a Turn-minimization Criterion Depending on the Speech Recognition Accuracy</title>
-      <author><first>Norihi</first><last>Yasuda</last></author>
+      <author id="norihito-yasuda"><first>Norihi</first><last>Yasuda</last></author>
       <author><first>Kohji</first><last>Dohsaka</last></author>
       <author><first>Kiyoaki</first><last>Aikawa</last></author>
       <url hash="d7b1ba56">W01-1629</url>
@@ -1447,7 +1447,7 @@
   <volume id="17" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 13th Nordic Conference of Computational Linguistics (<fixed-case>NODALIDA</fixed-case> 2001)</booktitle>
-      <editor><first>Anna Sågvall</first><last>Hein</last></editor>
+      <editor id="anna-sagvall-hein"><first>Anna Sågvall</first><last>Hein</last></editor>
       <publisher>Department of Linguistics, Uppsala University, Sweden</publisher>
       <address>Uppsala, Sweden</address>
       <month>May</month>
@@ -1466,7 +1466,7 @@
     </paper>
     <paper id="2">
       <title>The <fixed-case>VISL</fixed-case> System: Research and applicative aspects of <fixed-case>IT</fixed-case>-based learning</title>
-      <author><first>Eckhard</first><last>Bick</last></author>
+      <author id="eckhard-bick"><first>Eckhard</first><last>Bick</last></author>
       <url hash="c8884b02">W01-1702</url>
       <bibkey>bick-2001-visl</bibkey>
     </paper>
@@ -1481,14 +1481,14 @@
     </paper>
     <paper id="4">
       <title>The interaction between local focusing structure and global intentions in spoken discourse</title>
-      <author><first>Sofia</first><last>Gustafson-Capková</last></author>
+      <author id="sofia-gustafson-capkova"><first>Sofia</first><last>Gustafson-Capková</last></author>
       <url hash="cc005f1c">W01-1704</url>
       <bibkey>gustafson-capkova-2001-interaction</bibkey>
     </paper>
     <paper id="5">
       <title>Some problems related to the development of a grammar checker</title>
       <author><first>Kristin</first><last>Hagen</last></author>
-      <author><first>Janne Bondi</first><last>Johannessen</last></author>
+      <author id="janne-bondi-johannessen"><first>Janne Bondi</first><last>Johannessen</last></author>
       <author><first>Pia</first><last>Lane</last></author>
       <url hash="b766c127">W01-1705</url>
       <bibkey>hagen-etal-2001-problems</bibkey>
@@ -1501,9 +1501,9 @@
     </paper>
     <paper id="7">
       <title>On Ambiguity in <fixed-case>I</fixed-case>nternet Searches</title>
-      <author><first>Gordana Ilic</first><last>Holen</last></author>
+      <author id="gordana-ilic-holen"><first>Gordana Ilic</first><last>Holen</last></author>
       <author><first>Janne</first><last>von Koss Torkildsen</last></author>
-      <author><first>Janne Bondi</first><last>Johannessen</last></author>
+      <author id="janne-bondi-johannessen"><first>Janne Bondi</first><last>Johannessen</last></author>
       <url hash="156c71fe">W01-1707</url>
       <bibkey>holen-etal-2001-ambiguity</bibkey>
     </paper>
@@ -1515,7 +1515,7 @@
     </paper>
     <paper id="9">
       <title>En automatisk navnegjenkjenner for norsk, svensk og dansk</title>
-      <author><first>Janne Bondi</first><last>Johannessen</last></author>
+      <author id="janne-bondi-johannessen"><first>Janne Bondi</first><last>Johannessen</last></author>
       <url hash="f31645c7">W01-1709</url>
       <bibkey>johannessen-2001-en</bibkey>
     </paper>
@@ -1528,7 +1528,7 @@
     <paper id="11">
       <title>Clustering dialogue knowledge with self-organizing maps</title>
       <author><first>Mauri</first><last>Kaipainen</last></author>
-      <author><first>Kristiina</first><last>Jokinen</last></author>
+      <author id="kristiina-jokinen"><first>Kristiina</first><last>Jokinen</last></author>
       <author><first>Timo</first><last>Koskenniemi</last></author>
       <author><first>Antti</first><last>Kerminen</last></author>
       <author><first>Kari</first><last>Kanto</last></author>
@@ -1546,20 +1546,20 @@
     <paper id="13">
       <title>Corpus-Based Extension of Semantic Lexicons in Large Scale</title>
       <author><first>Dimitrios</first><last>Kokkinakis</last></author>
-      <author><first>Maria</first><last>Toporowska Gronostaj</last></author>
+      <author id="maria-toporowska-gronostaj"><first>Maria</first><last>Toporowska Gronostaj</last></author>
       <author><first>Karin</first><last>Warmenius</last></author>
       <url hash="62f5338e">W01-1713</url>
       <bibkey>kokkinakis-etal-2001-corpus</bibkey>
     </paper>
     <paper id="14">
       <title>Transformation-Based Learning of Rules for Constraint Grammar Tagging</title>
-      <author><first>Torbjörn</first><last>Lager</last></author>
+      <author id="torbjorn-lager"><first>Torbjörn</first><last>Lager</last></author>
       <url hash="78e69184">W01-1714</url>
       <bibkey>lager-2001-transformation</bibkey>
     </paper>
     <paper id="15">
       <title>Data-Driven Methods for <fixed-case>P</fixed-case>o<fixed-case>S</fixed-case> Tagging and Chunking of <fixed-case>S</fixed-case>wedish</title>
-      <author><first>Beáta</first><last>Megyesi</last></author>
+      <author id="beata-megyesi"><first>Beáta</first><last>Megyesi</last></author>
       <url hash="578c52ad">W01-1715</url>
       <bibkey>megyesi-2001-data</bibkey>
     </paper>
@@ -1572,7 +1572,7 @@
     <paper id="17">
       <title>Towards multimodal public information systems</title>
       <author><first>Magnus</first><last>Merkel</last></author>
-      <author><first>Arne</first><last>Jönsson</last></author>
+      <author id="arne-jonsson"><first>Arne</first><last>Jönsson</last></author>
       <url hash="6a046ea2">W01-1717</url>
       <bibkey>merkel-jonsson-2001-towards</bibkey>
     </paper>
@@ -1603,13 +1603,13 @@
     </paper>
     <paper id="22">
       <title>Detecting Grammar Errors in Children’s Writing: A Finite State Approach</title>
-      <author><first>Sylvana</first><last>Sofkova Hashemi</last></author>
+      <author id="sylvana-sofkova-hashemi"><first>Sylvana</first><last>Sofkova Hashemi</last></author>
       <url hash="7fb7e487">W01-1722</url>
       <bibkey>sofkova-hashemi-2001-detecting</bibkey>
     </paper>
     <paper id="23">
       <title><fixed-case>U</fixed-case>plug<fixed-case>W</fixed-case>eb–Corpus Tools on the Web</title>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <url hash="e2831fe2">W01-1723</url>
       <bibkey>tiedemann-2001-uplugweb</bibkey>
     </paper>
@@ -1621,7 +1621,7 @@
     </paper>
     <paper id="25">
       <title>Towards a Discourse-Oriented Representation of Information Structure in <fixed-case>HPSG</fixed-case></title>
-      <author><first>Graham</first><last>Wilcock</last></author>
+      <author id="graham-wilcock"><first>Graham</first><last>Wilcock</last></author>
       <url hash="af55051c">W01-1725</url>
       <bibkey>wilcock-2001-towards</bibkey>
     </paper>
@@ -1641,21 +1641,21 @@
     </frontmatter>
     <paper id="1">
       <title>Issues in Extracting Information from the Web</title>
-      <author><first>William W.</first><last>Cohen</last></author>
+      <author id="william-cohen"><first>William W.</first><last>Cohen</last></author>
       <url hash="24c9b0e6">W01-1801</url>
       <pages>3</pages>
       <bibkey>cohen-2001-issues</bibkey>
     </paper>
     <paper id="2">
       <title>Parameter Estimation for Statistical Parsing Models: Theory and Practice of</title>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <url hash="99e5a62e">W01-1802</url>
       <pages>4–15</pages>
       <bibkey>collins-2001-parameter</bibkey>
     </paper>
     <paper id="3">
       <title>The <fixed-case>XTAG</fixed-case> Project at <fixed-case>P</fixed-case>enn</title>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
       <url hash="903425c0">W01-1803</url>
       <pages>16–27</pages>
       <bibkey>joshi-2001-xtag</bibkey>
@@ -1663,15 +1663,15 @@
     <paper id="4">
       <title>Probabilistic Modelling of Island-Driven Parsing</title>
       <author><first>Alicia</first><last>Ageno</last></author>
-      <author><first>Horacio</first><last>Rodríguez</last></author>
+      <author id="horacio-rodriguez"><first>Horacio</first><last>Rodríguez</last></author>
       <url hash="b7e146a7">W01-1804</url>
       <pages>31–41</pages>
       <bibkey>ageno-rodriguez-2001-probabilistic</bibkey>
     </paper>
     <paper id="5">
       <title>Bidirectional Automata for <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars</title>
-      <author><first>Miguel A.</first><last>Alonso</last></author>
-      <author><first>Víctor J.</first><last>Díaz</last></author>
+      <author id="miguel-a-alonso"><first>Miguel A.</first><last>Alonso</last></author>
+      <author id="victor-j-diaz"><first>Víctor J.</first><last>Díaz</last></author>
       <author><first>Manuel</first><last>Vilares</last></author>
       <url hash="ffc11039">W01-1805</url>
       <pages>42–53</pages>
@@ -1695,7 +1695,7 @@
     <paper id="8">
       <title>High Precision Extraction of Grammatical Relations</title>
       <author><first>John</first><last>Carrol</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <url hash="3690fa27">W01-1808</url>
       <pages>78–89</pages>
       <bibkey>carrol-briscoe-2001-high</bibkey>
@@ -1719,7 +1719,7 @@
       <title>Grammar Induction by <fixed-case>MDL</fixed-case>-Based Distributional Classification</title>
       <author><first>Yikun</first><last>Guo</last></author>
       <author><first>Fuliang</first><last>Weng</last></author>
-      <author><first>Lide</first><last>Wu</last></author>
+      <author id="lide-wu"><first>Lide</first><last>Wu</last></author>
       <url hash="63e1c5b8">W01-1811</url>
       <pages>112–122</pages>
       <bibkey>guo-etal-2001-grammar</bibkey>
@@ -1727,7 +1727,7 @@
     <paper id="12">
       <title>Parsing and Hypergraphs</title>
       <author><first>Dan</first><last>Klein</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <url hash="36f95f15">W01-1812</url>
       <pages>123–134</pages>
       <bibkey>klein-manning-2001-parsing</bibkey>
@@ -1735,9 +1735,9 @@
     <paper id="13">
       <title>Automatic Detection of Prosody Phrase Boundaries for Text-to-Speech System</title>
       <author><first>Xin</first><last>Lv</last></author>
-      <author><first>Tie-jun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tie-jun</first><last>Zhao</last></author>
       <author><first>Zhan-yi</first><last>Liu</last></author>
-      <author><first>Mu-yun</first><last>Yang</last></author>
+      <author id="muyun-yang"><first>Mu-yun</first><last>Yang</last></author>
       <url hash="f2d0aff4">W01-1813</url>
       <pages>135–141</pages>
       <bibkey>lv-etal-2001-automatic</bibkey>
@@ -1752,7 +1752,7 @@
     <paper id="15">
       <title>Unsupervised <fixed-case>POS</fixed-case>-Tagging Improves Parsing Accuracy and Parsing Efficiency</title>
       <author><first>Robbert</first><last>Prins</last></author>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <url hash="9912e7cc">W01-1815</url>
       <pages>154–165</pages>
       <bibkey>prins-van-noord-2001-unsupervised</bibkey>
@@ -1760,7 +1760,7 @@
     <paper id="16">
       <title>Parsing the <fixed-case>CHILDES</fixed-case> Database: Methodology and Lessons Learned</title>
       <author><first>Kenji</first><last>Sagae</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <author><first>Brian</first><last>MacWhinney</last></author>
       <url hash="f46258ec">W01-1816</url>
       <pages>166–176</pages>
@@ -1768,7 +1768,7 @@
     </paper>
     <paper id="17">
       <title>Robust Data Oriented Parsing of Speech Utterances</title>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <url hash="7a971891">W01-1817</url>
       <pages>177–188</pages>
       <bibkey>simaan-2001-robust</bibkey>
@@ -1785,7 +1785,7 @@
     </paper>
     <paper id="19">
       <title>A Multi-Input Dependency Parser</title>
-      <author><first>Salah</first><last>Aït-Mokhtar</last></author>
+      <author id="salah-ait-mokhtar"><first>Salah</first><last>Aït-Mokhtar</last></author>
       <author><first>Jean-Pierre</first><last>Chanod</last></author>
       <author><first>Claude</first><last>Roux</last></author>
       <url hash="9ac39c50">W01-1819</url>
@@ -1809,8 +1809,8 @@
     </paper>
     <paper id="22">
       <title>An Approach to Parsing <fixed-case>V</fixed-case>ietnamese Noun Compounds</title>
-      <author><first>Dinh</first><last>Dien</last></author>
-      <author><first>Hoang</first><last>Kiem</last></author>
+      <author id="dinh-dien"><first>Dinh</first><last>Dien</last></author>
+      <author id="hoang-kiem"><first>Hoang</first><last>Kiem</last></author>
       <url hash="af84748a">W01-1822</url>
       <pages>213–216</pages>
       <bibkey>dien-kiem-2001-approach</bibkey>
@@ -1818,7 +1818,7 @@
     <paper id="23">
       <title>The Implementation Process of a Statistical Parser for <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese</title>
       <author><first>Andréia</first><last>Gentil Bonfante</last></author>
-      <author><first>Maria</first><last>das Graças Volpe Nunes</last></author>
+      <author id="maria-das-gracas-volpe-nunes"><first>Maria</first><last>das Graças Volpe Nunes</last></author>
       <url hash="239ba6ec">W01-1823</url>
       <pages>217–220</pages>
       <bibkey>gentil-bonfante-das-gracas-volpe-nunes-2001-implementation</bibkey>
@@ -1826,14 +1826,14 @@
     <paper id="24">
       <title>Efficient Sentence Parsing with Language Specific Features: A Case Study of <fixed-case>C</fixed-case>zech</title>
       <author><first>Aleš</first><last>Horák</last></author>
-      <author><first>Pavel</first><last>Smrž</last></author>
+      <author id="pavel-smrz"><first>Pavel</first><last>Smrž</last></author>
       <url hash="3c67f08b">W01-1824</url>
       <pages>221–224</pages>
       <bibkey>horak-smrz-2001-efficient</bibkey>
     </paper>
     <paper id="25">
       <title>Efficient Incremental Dependency Parsing</title>
-      <author><first>Yoshihide</first><last>Kato</last></author>
+      <author id="yoshihide-kato"><first>Yoshihide</first><last>Kato</last></author>
       <author><first>Shigeki</first><last>Matsubara</last></author>
       <author><first>Katsuhiko</first><last>Toyama</last></author>
       <author><first>Yasuyoshi</first><last>Inagaki</last></author>
@@ -1845,7 +1845,7 @@
       <title>Automatic Grammar Partitioning for Syntactic Parsing</title>
       <author><first>Po Chui</first><last>Luk</last></author>
       <author><first>Fuliang</first><last>Weng</last></author>
-      <author><first>Helen</first><last>Meng</last></author>
+      <author id="helen-meng"><first>Helen</first><last>Meng</last></author>
       <url hash="c8be356c">W01-1826</url>
       <pages>229–232</pages>
       <bibkey>luk-etal-2001-automatic</bibkey>
@@ -1862,10 +1862,10 @@
     </paper>
     <paper id="28">
       <title>Word-Order Relaxations &amp; Restrictions within a Dependency Grammar</title>
-      <author><first>Martin</first><last>Plátek</last></author>
-      <author><first>Tomáš</first><last>Holan</last></author>
-      <author><first>Vladislav</first><last>Kuboň</last></author>
-      <author><first>Karel</first><last>Oliva</last></author>
+      <author id="martin-platek"><first>Martin</first><last>Plátek</last></author>
+      <author id="tomas-holan"><first>Tomáš</first><last>Holan</last></author>
+      <author id="vladislav-kubon"><first>Vladislav</first><last>Kuboň</last></author>
+      <author id="karel-oliva"><first>Karel</first><last>Oliva</last></author>
       <url hash="4921cef3">W01-1828</url>
       <pages>237–240</pages>
       <bibkey>platek-etal-2001-word</bibkey>
@@ -1894,7 +1894,7 @@
     </paper>
     <paper id="32">
       <title>How Much Will a <fixed-case>RE</fixed-case>-Based Preprocessor Help a Statistical Parser?</title>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <url hash="4db24b18">W01-1832</url>
       <pages>253–256</pages>
       <bibkey>zeman-2001-much</bibkey>
diff --git a/data/xml/W02.xml b/data/xml/W02.xml
index 3510b4dd05..663a203f82 100644
--- a/data/xml/W02.xml
+++ b/data/xml/W02.xml
@@ -15,7 +15,7 @@
     </frontmatter>
     <paper id="1">
       <title>Teaching <fixed-case>NLP</fixed-case>/<fixed-case>CL</fixed-case> through Games: the Case of Parsing</title>
-      <author><first>Hans</first><last>van Halteren</last></author>
+      <author id="hans-van-halteren"><first>Hans</first><last>van Halteren</last></author>
       <doi>10.3115/1118108.1118109</doi>
       <pages>1–9</pages>
       <url hash="16a16d2e">W02-0101</url>
@@ -23,7 +23,7 @@
     </paper>
     <paper id="2">
       <title>An Interactive Spreadsheet for Teaching the Forward-Backward Algorithm</title>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <doi>10.3115/1118108.1118110</doi>
       <pages>10–18</pages>
       <url hash="c0ab0c3a">W02-0102</url>
@@ -31,7 +31,7 @@
     </paper>
     <paper id="3">
       <title>A Web-based Instructional Platform for Contraint-Based Grammar Formalisms and Parsing</title>
-      <author><first>W. Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>W. Detmar</first><last>Meurers</last></author>
       <author><first>Gerald</first><last>Penn</last></author>
       <author><first>Frank</first><last>Richter</last></author>
       <doi>10.3115/1118108.1118111</doi>
@@ -42,7 +42,7 @@
     <paper id="4">
       <title>Evangelising Language Technology: A Practically-Focussed Undergraduate Program</title>
       <author><first>Robert</first><last>Dale</last></author>
-      <author><first>Diego</first><last>Mollá Aliod</last></author>
+      <author id="diego-molla"><first>Diego</first><last>Mollá Aliod</last></author>
       <author><first>Rolf</first><last>Schwitter</last></author>
       <doi>10.3115/1118108.1118112</doi>
       <pages>27–32</pages>
@@ -59,10 +59,10 @@
     </paper>
     <paper id="6">
       <title>Design and Evolution of a Language Technologies Curriculum</title>
-      <author><first>Robert</first><last>Frederking</last></author>
-      <author><first>Eric H.</first><last>Nyberg</last></author>
+      <author id="robert-frederking"><first>Robert</first><last>Frederking</last></author>
+      <author id="eric-nyberg"><first>Eric H.</first><last>Nyberg</last></author>
       <author><first>Teruko</first><last>Mitamura</last></author>
-      <author><first>Jaime G.</first><last>Carbonell</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime G.</first><last>Carbonell</last></author>
       <doi>10.3115/1118108.1118114</doi>
       <pages>39–45</pages>
       <url hash="2e117686">W02-0106</url>
@@ -78,8 +78,8 @@
     </paper>
     <paper id="8">
       <title>Using <fixed-case>GATE</fixed-case> as an Environment for Teaching <fixed-case>NLP</fixed-case></title>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
-      <author><first>Hamish</first><last>Cunningham</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="hamish-cunningham"><first>Hamish</first><last>Cunningham</last></author>
       <author><first>Valentin</first><last>Tablan</last></author>
       <author><first>Diana</first><last>Maynard</last></author>
       <author><first>Oana</first><last>Hamza</last></author>
@@ -115,9 +115,9 @@
     </paper>
     <paper id="12">
       <title>Teaching Computational Linguistics at the <fixed-case>U</fixed-case>niversity of <fixed-case>T</fixed-case>artu: Experience, Perspectives and Challenges</title>
-      <author><first>Mare</first><last>Koit</last></author>
-      <author><first>Tiit</first><last>Roosmaa</last></author>
-      <author><first>Haldur</first><last>Õim</last></author>
+      <author id="mare-koit"><first>Mare</first><last>Koit</last></author>
+      <author id="tiit-roosmaa"><first>Tiit</first><last>Roosmaa</last></author>
+      <author id="haldur-oim"><first>Haldur</first><last>Õim</last></author>
       <doi>10.3115/1118108.1118120</doi>
       <pages>85–90</pages>
       <url hash="e55538df">W02-0112</url>
@@ -139,8 +139,8 @@
     </frontmatter>
     <paper id="1">
       <title>Synchronization in an Asynchronous Agent-based architecture for Dialogue Systems</title>
-      <author><first>Nate</first><last>Blaylock</last></author>
-      <author><first>James</first><last>Allen</last></author>
+      <author id="nate-blaylock"><first>Nate</first><last>Blaylock</last></author>
+      <author id="james-allen"><first>James</first><last>Allen</last></author>
       <author><first>George</first><last>Ferguson</last></author>
       <doi>10.3115/1118121.1118122</doi>
       <pages>1–02</pages>
@@ -158,7 +158,7 @@
     </paper>
     <paper id="3">
       <title>Non-Sentential Utterances in Dialogue: A: Corpus-Based Study</title>
-      <author><first>Raquel</first><last>Fernandez</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernandez</last></author>
       <author><first>Jonathan</first><last>Ginzburg</last></author>
       <doi>10.3115/1118121.1118124</doi>
       <pages>15–26</pages>
@@ -167,8 +167,8 @@
     </paper>
     <paper id="4">
       <title>A Semantic Account of Adverbials as Discourse Connectives</title>
-      <author><first>Kate</first><last>Forbes</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="kate-forbes-riley"><first>Kate</first><last>Forbes</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <doi>10.3115/1118121.1118125</doi>
       <pages>27–36</pages>
       <url hash="8a7f09e2">W02-0204</url>
@@ -176,7 +176,7 @@
     </paper>
     <paper id="5">
       <title><fixed-case>MUP</fixed-case> - The <fixed-case>UIC</fixed-case> Standoff Markup Tool</title>
-      <author><first>Michael</first><last>Glass</last></author>
+      <author id="michael-glass"><first>Michael</first><last>Glass</last></author>
       <author><first>Barbara</first><last>Di Eugenio</last></author>
       <doi>10.3115/1118121.1118126</doi>
       <pages>37–41</pages>
@@ -185,7 +185,7 @@
     </paper>
     <paper id="6">
       <title>An Experiment to evaluate the effectiveness of cross-media cues in computer media</title>
-      <author><first>Nancy</first><last>Green</last></author>
+      <author id="nancy-green"><first>Nancy</first><last>Green</last></author>
       <doi>10.3115/1118121.1118127</doi>
       <pages>42–45</pages>
       <url hash="92605370">W02-0206</url>
@@ -203,7 +203,7 @@
     </paper>
     <paper id="8">
       <title><fixed-case>D</fixed-case>ialogue<fixed-case>V</fixed-case>iew - An Annotation Tool for Dialogue</title>
-      <author><first>Peter A.</first><last>Heeman</last></author>
+      <author id="peter-a-heeman"><first>Peter A.</first><last>Heeman</last></author>
       <author><first>Fan</first><last>Yang</last></author>
       <author><first>Susan E.</first><last>Strayer</last></author>
       <doi>10.3115/1118121.1118129</doi>
@@ -214,8 +214,8 @@
     <paper id="9">
       <title>A Flexible Framework for Developing Mixed-Initiative Dialog Systems</title>
       <author><first>Judith</first><last>Hochberg</last></author>
-      <author><first>Nanda</first><last>Kambhatla</last></author>
-      <author><first>Salim</first><last>Roukos</last></author>
+      <author id="nanda-kambhatla"><first>Nanda</first><last>Kambhatla</last></author>
+      <author id="salim-roukos"><first>Salim</first><last>Roukos</last></author>
       <doi>10.3115/1118121.1118130</doi>
       <pages>60–63</pages>
       <url hash="2b1d01f0">W02-0209</url>
@@ -223,11 +223,11 @@
     </paper>
     <paper id="10">
       <title>Adaptive Dialogue Systems - Interaction with Interact</title>
-      <author><first>Kristiina</first><last>Jokinen</last></author>
+      <author id="kristiina-jokinen"><first>Kristiina</first><last>Jokinen</last></author>
       <author><first>Antti</first><last>Kerminen</last></author>
       <author><first>Tommi</first><last>Lagus</last></author>
       <author><first>Jukka</first><last>Kuusisto</last></author>
-      <author><first>Graham</first><last>Wilcock</last></author>
+      <author id="graham-wilcock"><first>Graham</first><last>Wilcock</last></author>
       <author><first>Markku</first><last>Turunen</last></author>
       <author><first>Jaakko</first><last>Hakulinen</last></author>
       <author><first>Krista</first><last>Jauhiainen</last></author>
@@ -238,7 +238,7 @@
     </paper>
     <paper id="11">
       <title>Discourse Processing for Explanatory Essays in Tutorial Applications</title>
-      <author><first>Pamela W.</first><last>Jordan</last></author>
+      <author id="pamela-jordan"><first>Pamela W.</first><last>Jordan</last></author>
       <author><first>Kurt</first><last>VanLehn</last></author>
       <doi>10.3115/1118121.1118132</doi>
       <pages>74–83</pages>
@@ -248,7 +248,7 @@
     <paper id="12">
       <title>Conditional responses in information-seeking dialogues</title>
       <author><first>Elena</first><last>Karagjosova</last></author>
-      <author><first>Ivana</first><last>Kruijff-Korbayova</last></author>
+      <author id="ivana-kruijff-korbayova"><first>Ivana</first><last>Kruijff-Korbayova</last></author>
       <doi>10.3115/1118121.1118133</doi>
       <pages>84–87</pages>
       <url hash="52527292">W02-0212</url>
@@ -304,7 +304,7 @@
     </paper>
     <paper id="18">
       <title>Dialogue Macrogame Theory</title>
-      <author><first>William</first><last>Mann</last></author>
+      <author id="william-c-mann"><first>William</first><last>Mann</last></author>
       <doi>10.3115/1118121.1118139</doi>
       <pages>129–141</pages>
       <url hash="ee72d723">W02-0218</url>
@@ -330,7 +330,7 @@
     <paper id="21">
       <title>Training a Dialogue Act Tagger for Human-human and Human-computer Travel dialogues</title>
       <author><first>Rashmi</first><last>Prasad</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <doi>10.3115/1118121.1118142</doi>
       <pages>162–173</pages>
       <url hash="b46f8775">W02-0221</url>
@@ -346,7 +346,7 @@
     </paper>
     <paper id="23">
       <title>A Dialog Architecture for Military Story Capture</title>
-      <author><first>Ronnie</first><last>Smith</last></author>
+      <author id="ronnie-w-smith"><first>Ronnie</first><last>Smith</last></author>
       <author><first>Brian</first><last>Manning</last></author>
       <author><first>Jon</first><last>Rogers</last></author>
       <author><first>Brian</first><last>Adams</last></author>
@@ -368,7 +368,7 @@
     </paper>
     <paper id="25">
       <title>Rare Dialogue Acts in Oncology Consultations</title>
-      <author><first>Mary McGee</first><last>Wood</last></author>
+      <author id="mary-mcgee-wood"><first>Mary McGee</first><last>Wood</last></author>
       <author><first>Richard</first><last>Craggs</last></author>
       <author><first>Ian</first><last>Fletcher</last></author>
       <author><first>Peter</first><last>Maguire</last></author>
@@ -413,10 +413,10 @@
     </frontmatter>
     <paper id="1">
       <title>Tuning support vector machines for biomedical named entity recognition</title>
-      <author><first>Jun’ichi</first><last>Kazama</last></author>
+      <author id="junichi-kazama"><first>Jun’ichi</first><last>Kazama</last></author>
       <author><first>Takaki</first><last>Makino</last></author>
       <author><first>Yoshihiro</first><last>Ohta</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <doi>10.3115/1118149.1118150</doi>
       <pages>1–8</pages>
       <url hash="72ca95e9">W02-0301</url>
@@ -433,10 +433,10 @@
     </paper>
     <paper id="3">
       <title>Contrast and variability in gene names</title>
-      <author><first>K. Bretonnel</first><last>Cohen</last></author>
+      <author id="k-bretonnel-cohen"><first>K. Bretonnel</first><last>Cohen</last></author>
       <author><first>Andrew</first><last>Dolbey</last></author>
       <author><first>George</first><last>Acquaah-Mensah</last></author>
-      <author><first>Lawrence</first><last>Hunter</last></author>
+      <author id="lawrence-hunter"><first>Lawrence</first><last>Hunter</last></author>
       <doi>10.3115/1118149.1118152</doi>
       <pages>14–20</pages>
       <url hash="1c06ad82">W02-0303</url>
@@ -444,7 +444,7 @@
     </paper>
     <paper id="4">
       <title>Accenting unknown words in a specialized language</title>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <author><first>Natalia</first><last>Grabar</last></author>
       <doi>10.3115/1118149.1118153</doi>
       <pages>21–28</pages>
@@ -473,7 +473,7 @@
     <paper id="7">
       <title>Enhanced natural language access to anatomically-indexed data</title>
       <author><first>Gail</first><last>Sinclair</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <author><first>Duncan</first><last>Davidson</last></author>
       <doi>10.3115/1118149.1118156</doi>
       <pages>45–52</pages>
@@ -483,7 +483,7 @@
     <paper id="8">
       <title>Unsupervised,corpus-based method for extending a biomedical terminology</title>
       <author><first>Olivier</first><last>Bodenreider</last></author>
-      <author><first>Thomas</first><last>Rindflesch</last></author>
+      <author id="thomas-c-rindflesch"><first>Thomas</first><last>Rindflesch</last></author>
       <author><first>Anita</first><last>Burgun</last></author>
       <doi>10.3115/1118149.1118157</doi>
       <pages>53–60</pages>
@@ -492,7 +492,7 @@
     </paper>
     <paper id="9">
       <title>Biomedical text retrieval in languages with a complex morphology</title>
-      <author><first>Stefan</first><last>Schultz</last></author>
+      <author id="stefan-schulz"><first>Stefan</first><last>Schultz</last></author>
       <author><first>Martin</first><last>Honeck</last></author>
       <author><first>Udo</first><last>Hahn</last></author>
       <doi>10.3115/1118149.1118158</doi>
@@ -502,7 +502,7 @@
     </paper>
     <paper id="10">
       <title>Analyzing the Semantics of patient data to rank records of literature retrieval</title>
-      <author><first>Eneida</first><last>Mendonca</last></author>
+      <author id="eneida-a-mendonca"><first>Eneida</first><last>Mendonca</last></author>
       <author><first>Stephen</first><last>Johnson</last></author>
       <author><first>Yoon-ho</first><last>Seol</last></author>
       <author><first>James</first><last>Cimino</last></author>
@@ -513,8 +513,8 @@
     </paper>
     <paper id="11">
       <title>Utilizing text mining results: The Pasta Web System</title>
-      <author><first>George</first><last>Demetriou</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="george-demetriou"><first>George</first><last>Demetriou</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <doi>10.3115/1118149.1118160</doi>
       <pages>77–84</pages>
       <url hash="ea127dfa">W02-0311</url>
@@ -522,9 +522,9 @@
     </paper>
     <paper id="12">
       <title><fixed-case>M</fixed-case>edstract: creating large-scale information servers from biomedical texts</title>
-      <author><first>James</first><last>Pustejovsky</last></author>
-      <author><first>José</first><last>Castaño</last></author>
-      <author><first>Roser</first><last>Saurí</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
+      <author id="jose-castano"><first>José</first><last>Castaño</last></author>
+      <author id="roser-sauri"><first>Roser</first><last>Saurí</last></author>
       <author><first>Jason</first><last>Zhang</last></author>
       <author><first>Wei</first><last>Luo</last></author>
       <doi>10.3115/1118149.1118161</doi>
@@ -556,8 +556,8 @@
     </paper>
     <paper id="2">
       <title>Selecting sentences for multidocument summaries using randomized local search</title>
-      <author><first>Michael</first><last>White</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <doi>10.3115/1118162.1118164</doi>
       <pages>9–18</pages>
       <url hash="9febe41b">W02-0402</url>
@@ -566,9 +566,9 @@
     <paper id="3">
       <title>Using a text engineering framework to build an extendable and portable <fixed-case>IE</fixed-case>-based summarisation system</title>
       <author><first>Diana</first><last>Maynard</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
       <author><first>Horacio</first><last>Saggion</last></author>
-      <author><first>Hamish</first><last>Cunningham</last></author>
+      <author id="hamish-cunningham"><first>Hamish</first><last>Cunningham</last></author>
       <author><first>Oana</first><last>Hamza</last></author>
       <doi>10.3115/1118162.1118165</doi>
       <pages>19–26</pages>
@@ -577,8 +577,8 @@
     </paper>
     <paper id="4">
       <title>Revisions that improve cohesion in multi-document summaries: a preliminary study</title>
-      <author><first>Jahna C.</first><last>Otterbacher</last></author>
-      <author><first>Dragomir R.</first><last>Radev</last></author>
+      <author id="jahna-otterbacher"><first>Jahna C.</first><last>Otterbacher</last></author>
+      <author id="dragomir-radev"><first>Dragomir R.</first><last>Radev</last></author>
       <author><first>Airong</first><last>Luo</last></author>
       <doi>10.3115/1118162.1118166</doi>
       <pages>27–44</pages>
@@ -595,8 +595,8 @@
     </paper>
     <paper id="6">
       <title>Manual and automatic evaluation of summaries</title>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <doi>10.3115/1118162.1118168</doi>
       <pages>45–51</pages>
       <url hash="9995c3e7">W02-0406</url>
@@ -635,7 +635,7 @@
       <title>Acquisition System for <fixed-case>A</fixed-case>rabic Noun Morphology</title>
       <author><first>Saleem</first><last>Abuleil</last></author>
       <author><first>Khalid</first><last>Alsamara</last></author>
-      <author><first>Martha</first><last>Evens</last></author>
+      <author id="martha-evens"><first>Martha</first><last>Evens</last></author>
       <doi>10.3115/1118637.1118640</doi>
       <url hash="2ae9cc8b">W02-0503</url>
       <bibkey>abuleil-etal-2002-acquisition</bibkey>
@@ -649,7 +649,7 @@
     </paper>
     <paper id="5">
       <title>Machine Transliteration of Names in <fixed-case>A</fixed-case>rabic Texts</title>
-      <author><first>Yaser</first><last>Al-Onaizan</last></author>
+      <author id="yaser-al-onaizan"><first>Yaser</first><last>Al-Onaizan</last></author>
       <author><first>Kevin</first><last>Knight</last></author>
       <doi>10.3115/1118637.1118642</doi>
       <url hash="0e79d6f4">W02-0505</url>
@@ -666,8 +666,8 @@
       <title><fixed-case>QARAB</fixed-case>: A: Question Answering System to Support the <fixed-case>A</fixed-case>rabic Language</title>
       <author><first>Bassam</first><last>Hammo</last></author>
       <author><first>Hani</first><last>Abu-Salem</last></author>
-      <author><first>Steven</first><last>Lytinen</last></author>
-      <author><first>Martha</first><last>Evens</last></author>
+      <author id="steven-l-lytinen"><first>Steven</first><last>Lytinen</last></author>
+      <author id="martha-evens"><first>Martha</first><last>Evens</last></author>
       <doi>10.3115/1118637.1118644</doi>
       <url hash="6621951a">W02-0507</url>
       <bibkey>hammo-etal-2002-qarab</bibkey>
@@ -704,7 +704,7 @@
     <paper id="1">
       <title>Unsupervised Learning of Morphology for Building Lexicon for a Highly Inflectional Language</title>
       <author><first>Utpal</first><last>Sharma</last></author>
-      <author><first>Jugal</first><last>Kalita</last></author>
+      <author id="jugal-kalita"><first>Jugal</first><last>Kalita</last></author>
       <author><first>Rajib</first><last>Das</last></author>
       <doi>10.3115/1118647.1118648</doi>
       <pages>1–10</pages>
@@ -713,8 +713,8 @@
     </paper>
     <paper id="2">
       <title>Unsupervised Learning of Morphology Using a Novel Directed Search Algorithm: Taking the First Step</title>
-      <author><first>Matthew G.</first><last>Snover</last></author>
-      <author><first>Gaja E.</first><last>Jarosz</last></author>
+      <author id="matthew-snover"><first>Matthew G.</first><last>Snover</last></author>
+      <author id="gaja-jarosz"><first>Gaja E.</first><last>Jarosz</last></author>
       <author><first>Michael R.</first><last>Brent</last></author>
       <doi>10.3115/1118647.1118649</doi>
       <pages>11–20</pages>
@@ -733,7 +733,7 @@
     <paper id="4">
       <title>Unsupervised Learning of Morphology Without Morphemes</title>
       <author><first>Sylvain</first><last>Neuvel</last></author>
-      <author><first>Sean A.</first><last>Fulop</last></author>
+      <author id="sean-a-fulop"><first>Sean A.</first><last>Fulop</last></author>
       <doi>10.3115/1118647.1118651</doi>
       <pages>31–40</pages>
       <url hash="ef5a9bb7">W02-0604</url>
@@ -750,9 +750,9 @@
     </paper>
     <paper id="6">
       <title>Unsupervised discovery of morphologically related words based on orthographic and semantic similarity</title>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <author><first>Johannes</first><last>Matiasek</last></author>
-      <author><first>Harald</first><last>Trost</last></author>
+      <author id="harald-trost"><first>Harald</first><last>Trost</last></author>
       <doi>10.3115/1118647.1118653</doi>
       <pages>48–57</pages>
       <url hash="1c735b67">W02-0606</url>
@@ -790,7 +790,7 @@
     </frontmatter>
     <paper id="1">
       <title>Corpus-Centered Computation</title>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <doi>10.3115/1118656.1118657</doi>
       <pages>1–8</pages>
       <url hash="9b3e62c7">W02-0701</url>
@@ -800,7 +800,7 @@
       <title>Topic Detection Based on Dialogue History</title>
       <author><first>Takayuki</first><last>Nakata</last></author>
       <author><first>Takahiro</first><last>Ikeda</last></author>
-      <author><first>Shinichi</first><last>Ando</last></author>
+      <author id="shinichi-ando"><first>Shinichi</first><last>Ando</last></author>
       <author><first>Akitoshi</first><last>Okumura</last></author>
       <doi>10.3115/1118656.1118658</doi>
       <pages>9–14</pages>
@@ -810,10 +810,10 @@
     <paper id="3">
       <title>Spoken Language Parsing Using Phrase-Level Grammars and Trainable Classifiers</title>
       <author><first>Chad</first><last>Langley</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
       <author><first>Dorcas</first><last>Wallace</last></author>
-      <author><first>Donna</first><last>Gates</last></author>
+      <author id="donna-gates"><first>Donna</first><last>Gates</last></author>
       <author><first>Kay</first><last>Peterson</last></author>
       <doi>10.3115/1118656.1118659</doi>
       <pages>15–22</pages>
@@ -823,7 +823,7 @@
     <paper id="4">
       <title>Finding Translation Pairs from <fixed-case>E</fixed-case>nglish-<fixed-case>J</fixed-case>apanese Untokenized Aligned Corpora</title>
       <author><first>Genichiro</first><last>Kikui</last></author>
-      <author><first>Hirofumi</first><last>Yamamoto</last></author>
+      <author id="hirofumi-yamamoto"><first>Hirofumi</first><last>Yamamoto</last></author>
       <doi>10.3115/1118656.1118660</doi>
       <pages>23–30</pages>
       <url hash="84ab2622">W02-0704</url>
@@ -831,7 +831,7 @@
     </paper>
     <paper id="5">
       <title>Speech Translation Performance of Statistical Dependency Transduction and Semantic Similarity Transduction</title>
-      <author><first>Hiyan</first><last>Alshawi</last></author>
+      <author id="hiyan-alshawi"><first>Hiyan</first><last>Alshawi</last></author>
       <author><first>Shona</first><last>Douglas</last></author>
       <doi>10.3115/1118656.1118661</doi>
       <pages>31–38</pages>
@@ -840,9 +840,9 @@
     </paper>
     <paper id="6">
       <title>Architectures for Speech-to-Speech Translation Using Finite-state Models</title>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
-      <author><first>Enrique</first><last>Vidal</last></author>
-      <author><first>Juan Miguel</first><last>Vilar</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="enrique-vidal"><first>Enrique</first><last>Vidal</last></author>
+      <author id="juan-miguel-vilar"><first>Juan Miguel</first><last>Vilar</last></author>
       <doi>10.3115/1118656.1118662</doi>
       <pages>39–44</pages>
       <url hash="e207c693">W02-0706</url>
@@ -852,7 +852,7 @@
       <title>Evaluation of Direct Speech Translation Method Using Inductive Learning for Conversations in the Travel Domain</title>
       <author><first>Koji</first><last>Murakami</last></author>
       <author><first>Makoto</first><last>Hiroshige</last></author>
-      <author><first>Kenji</first><last>Araki</last></author>
+      <author id="kenji-araki"><first>Kenji</first><last>Araki</last></author>
       <author><first>Koji</first><last>Tochinai</last></author>
       <doi>10.3115/1118656.1118663</doi>
       <pages>45–52</pages>
@@ -861,14 +861,14 @@
     </paper>
     <paper id="8">
       <title>Balancing Expressiveness and Simplicity in an Interlingua for Task Based Dialogue</title>
-      <author><first>Lori</first><last>Levin</last></author>
-      <author><first>Donna</first><last>Gates</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
+      <author id="donna-gates"><first>Donna</first><last>Gates</last></author>
       <author><first>Dorcas</first><last>Pianta</last></author>
       <author><first>Roldano</first><last>Cattoni</last></author>
-      <author><first>Nadia</first><last>Mana</last></author>
+      <author id="nadia-mana"><first>Nadia</first><last>Mana</last></author>
       <author><first>Kay</first><last>Peterson</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
-      <author><first>Fabio</first><last>Pianesi</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
+      <author id="fabio-pianesi"><first>Fabio</first><last>Pianesi</last></author>
       <doi>10.3115/1118656.1118664</doi>
       <pages>53–60</pages>
       <url hash="8be51c9b">W02-0708</url>
@@ -876,7 +876,7 @@
     </paper>
     <paper id="9">
       <title>Interactive <fixed-case>C</fixed-case>hinese-to-<fixed-case>E</fixed-case>nglish Speech Translation Based on Dialogue Management</title>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <author><first>Bo</first><last>Xu</last></author>
       <author><first>Taiyi</first><last>Huang</last></author>
       <doi>10.3115/1118656.1118665</doi>
@@ -886,8 +886,8 @@
     </paper>
     <paper id="10">
       <title>A Flexible Speech to Speech Phrasebook Translator</title>
-      <author><first>Manny</first><last>Rayner</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <doi>10.3115/1118656.1118666</doi>
       <pages>69–76</pages>
       <url hash="f7824c8c">W02-0710</url>
@@ -895,10 +895,10 @@
     </paper>
     <paper id="11">
       <title>Speech Translation on a Tight Budget without Enough Data</title>
-      <author><first>Robert E.</first><last>Frederking</last></author>
-      <author><first>Alan W.</first><last>Black</last></author>
-      <author><first>Ralf D.</first><last>Brown</last></author>
-      <author><first>Alexander</first><last>Rudnicky</last></author>
+      <author id="robert-frederking"><first>Robert E.</first><last>Frederking</last></author>
+      <author id="alan-w-black"><first>Alan W.</first><last>Black</last></author>
+      <author id="ralf-d-brown"><first>Ralf D.</first><last>Brown</last></author>
+      <author id="alexander-rudnicky"><first>Alexander</first><last>Rudnicky</last></author>
       <author><first>John</first><last>Moody</last></author>
       <author><first>Eric</first><last>Steinbrecher</last></author>
       <doi>10.3115/1118656.1118667</doi>
@@ -909,7 +909,7 @@
     <paper id="12">
       <title>Automatic Interpretation System Integrating Free-Style Sentence Translation and Parallel Text Based Translation</title>
       <author><first>Takahiro</first><last>Ikeda</last></author>
-      <author><first>Shinichi</first><last>Ando</last></author>
+      <author id="shinichi-ando"><first>Shinichi</first><last>Ando</last></author>
       <author><first>Kenji</first><last>Satoh</last></author>
       <author><first>Akitoshi</first><last>Okumura</last></author>
       <doi>10.3115/1118656.1118668</doi>
@@ -919,8 +919,8 @@
     </paper>
     <paper id="13">
       <title>Sharing Problems and Solutions for Machine Translation of Spoken and Written Interaction</title>
-      <author><first>Sherri</first><last>Condon</last></author>
-      <author><first>Keith</first><last>Miller</last></author>
+      <author id="sherri-condon"><first>Sherri</first><last>Condon</last></author>
+      <author id="keith-j-miller"><first>Keith</first><last>Miller</last></author>
       <doi>10.3115/1118656.1118669</doi>
       <pages>93–070</pages>
       <url hash="30f99787">W02-0713</url>
@@ -932,7 +932,7 @@
       <author><first>Qin</first><last>Jin</last></author>
       <author><first>Kornel</first><last>Laskowski</last></author>
       <author><first>Alicia</first><last>Tribble</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <doi>10.3115/1118656.1118670</doi>
       <pages>101–078</pages>
       <url hash="37a0cb59">W02-0714</url>
@@ -959,7 +959,7 @@
     </paper>
     <paper id="17">
       <title>A Multi-Perspective Evaluation of the <fixed-case>NESPOLE</fixed-case>! Speech-to-Speech Translation System</title>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <author><first>Florian</first><last>Metze</last></author>
       <author><first>Roldano</first><last>Cattoni</last></author>
       <author><first>Erica</first><last>Costantini</last></author>
@@ -991,9 +991,9 @@
     </frontmatter>
     <paper id="1">
       <title>A Multilingual Approach to Disambiguate Prepositions and Case Suffixes</title>
-      <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>Mikel</first><last>Lersundi</last></author>
-      <author><first>David</first><last>Martinez</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
+      <author id="mikel-lersundi"><first>Mikel</first><last>Lersundi</last></author>
+      <author id="david-martinez"><first>David</first><last>Martinez</last></author>
       <doi>10.3115/1118675.1118676</doi>
       <pages>1–8</pages>
       <url hash="e0d72cf4">W02-0801</url>
@@ -1018,7 +1018,7 @@
     <paper id="4">
       <title>Defining and Representing Preposition Senses: a preliminary analysis</title>
       <author><first>Emmanuelle</first><last>Cannesson</last></author>
-      <author><first>Patrick</first><last>Saint-Dizier</last></author>
+      <author id="patrick-saint-dizier"><first>Patrick</first><last>Saint-Dizier</last></author>
       <doi>10.3115/1118675.1118679</doi>
       <pages>25–31</pages>
       <url hash="52f8fb80">W02-0804</url>
@@ -1052,9 +1052,9 @@
     </paper>
     <paper id="8">
       <title>Sense Discrimination with Parallel Corpora</title>
-      <author><first>Nancy</first><last>Ide</last></author>
-      <author><first>Tomaz</first><last>Erjavec</last></author>
-      <author><first>Dan</first><last>Tufis</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
+      <author id="tomaz-erjavec"><first>Tomaz</first><last>Erjavec</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufis</last></author>
       <doi>10.3115/1118675.1118683</doi>
       <pages>61–66</pages>
       <url hash="1289ca76">W02-0808</url>
@@ -1062,10 +1062,10 @@
     </paper>
     <paper id="9">
       <title><fixed-case>D</fixed-case>utch Word Sense Disambiguation: Optimizing the Localness of Context</title>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <author><first>Iris</first><last>Hendrickx</last></author>
-      <author><first>Veronique</first><last>Hoste</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="veronique-hoste"><first>Veronique</first><last>Hoste</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <doi>10.3115/1118675.1118684</doi>
       <pages>61–66</pages>
       <url hash="56a70ccc">W02-0809</url>
@@ -1073,7 +1073,7 @@
     </paper>
     <paper id="10">
       <title>Unsupervised <fixed-case>I</fixed-case>talian Word Sense Disambiguation using <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>ets and Unlabeled Corpora</title>
-      <author><first>Radu</first><last>Florian</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
       <author><first>Richard</first><last>Wicentowski</last></author>
       <doi>10.3115/1118675.1118685</doi>
       <pages>67–73</pages>
@@ -1086,7 +1086,7 @@
       <author><first>Kristina</first><last>Toutanova</last></author>
       <author><first>H. Tolga</first><last>Ilhan</last></author>
       <author><first>Sepandar D.</first><last>Kamvar</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <doi>10.3115/1118675.1118686</doi>
       <pages>74–80</pages>
       <url hash="274ed7a5">W02-0811</url>
@@ -1102,8 +1102,8 @@
     </paper>
     <paper id="13">
       <title>Combining Contextual Features for Word Sense Disambiguation</title>
-      <author><first>Hoa Trang</first><last>Dang</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="hoa-trang-dang"><first>Hoa Trang</first><last>Dang</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <doi>10.3115/1118675.1118688</doi>
       <pages>88–94</pages>
       <url hash="28b575d2">W02-0813</url>
@@ -1111,10 +1111,10 @@
     </paper>
     <paper id="14">
       <title>Evaluating the results of a memory-based word-expert approach to unrestricted word sense disambiguation</title>
-      <author><first>Veronique</first><last>Hoste</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="veronique-hoste"><first>Veronique</first><last>Hoste</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <author><first>Iris</first><last>Hendrickx</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <doi>10.3115/1118675.1118689</doi>
       <pages>95–081</pages>
       <url hash="2442f4a5">W02-0814</url>
@@ -1131,7 +1131,7 @@
     </paper>
     <paper id="16">
       <title>Lexical Substitution as a Task for <fixed-case>WSD</fixed-case> Evaluation</title>
-      <author><first>Diana</first><last>McCarthy</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
       <doi>10.3115/1118675.1118691</doi>
       <pages>089–115</pages>
       <url hash="0fb3b121">W02-0816</url>
@@ -1139,8 +1139,8 @@
     </paper>
     <paper id="17">
       <title>Building a Sense Tagged Corpus with Open Mind Word Expert</title>
-      <author><first>Timothy</first><last>Chklovski</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="timothy-chklovski"><first>Timothy</first><last>Chklovski</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <doi>10.3115/1118675.1118692</doi>
       <pages>116–122</pages>
       <url hash="db8916d8">W02-0817</url>
@@ -1197,7 +1197,7 @@
       <title>Using Co-Composition for Acquiring Syntactic and Semantic Subcategorisation</title>
       <author><first>Pablo</first><last>Gamallo</last></author>
       <author><first>Alexandre</first><last>Agustini</last></author>
-      <author><first>Gabriel P.</first><last>Lopes</last></author>
+      <author id="gabriel-lopes"><first>Gabriel P.</first><last>Lopes</last></author>
       <doi>10.3115/1118627.1118632</doi>
       <pages>34–41</pages>
       <url hash="ab36c774">W02-0905</url>
@@ -1205,10 +1205,10 @@
     </paper>
     <paper id="6">
       <title>Learning Argument/Adjunct Dictinction for <fixed-case>B</fixed-case>asque</title>
-      <author><first>Izaskun</first><last>Aldezabal</last></author>
+      <author id="izaskun-aldezabal"><first>Izaskun</first><last>Aldezabal</last></author>
       <author><first>Maxux</first><last>Aranzabe</last></author>
-      <author><first>Koldo</first><last>Gojenola</last></author>
-      <author><first>Kepa</first><last>Sarasola</last></author>
+      <author id="koldo-gojenola"><first>Koldo</first><last>Gojenola</last></author>
+      <author id="kepa-sarasola"><first>Kepa</first><last>Sarasola</last></author>
       <author><first>Aitziber</first><last>Atutxa</last></author>
       <doi>10.3115/1118627.1118633</doi>
       <pages>42–50</pages>
@@ -1225,7 +1225,7 @@
     </paper>
     <paper id="8">
       <title>Improvements in Automatic Thesaurus Extraction</title>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <author><first>Marc</first><last>Moens</last></author>
       <doi>10.3115/1118627.1118635</doi>
       <pages>59–66</pages>
@@ -1234,7 +1234,7 @@
     </paper>
     <paper id="9">
       <title>Acquiring Collocations for Lexical Choice between Near-Synonyms</title>
-      <author><first>Diana Zaiu</first><last>Inkpen</last></author>
+      <author id="diana-inkpen"><first>Diana Zaiu</first><last>Inkpen</last></author>
       <author><first>Graeme</first><last>Hirst</last></author>
       <doi>10.3115/1118627.1118636</doi>
       <pages>67–76</pages>
@@ -1256,7 +1256,7 @@
     </frontmatter>
     <paper id="1">
       <title>Discriminative Training Methods for Hidden <fixed-case>M</fixed-case>arkov Models: Theory and Experiments with Perceptron Algorithms</title>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <doi>10.3115/1118693.1118694</doi>
       <pages>1–8</pages>
       <award>Best Paper</award>
@@ -1267,7 +1267,7 @@
     <paper id="2">
       <title>Conditional Structure versus Conditional Estimation in <fixed-case>NLP</fixed-case> Models</title>
       <author><first>Dan</first><last>Klein</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <doi>10.3115/1118693.1118695</doi>
       <pages>9–16</pages>
       <url hash="1b669b46">W02-1002</url>
@@ -1275,7 +1275,7 @@
     </paper>
     <paper id="3">
       <title>An Incremental Decision List Learner</title>
-      <author><first>Joshua</first><last>Goodman</last></author>
+      <author id="joshua-goodman"><first>Joshua</first><last>Goodman</last></author>
       <doi>10.3115/1118693.1118696</doi>
       <pages>17–24</pages>
       <url hash="06cf0d00">W02-1003</url>
@@ -1283,7 +1283,7 @@
     </paper>
     <paper id="4">
       <title>Modeling Consensus: Classifier Combination for Word Sense Disambiguation</title>
-      <author><first>Radu</first><last>Florian</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
       <author><first>David</first><last>Yarowsky</last></author>
       <doi>10.3115/1118693.1118697</doi>
       <pages>25–32</pages>
@@ -1292,7 +1292,7 @@
     </paper>
     <paper id="5">
       <title>Augmented Mixture Models for Lexical Disambiguation</title>
-      <author><first>Silviu</first><last>Cucerzan</last></author>
+      <author id="silviu-cucerzan"><first>Silviu</first><last>Cucerzan</last></author>
       <author><first>David</first><last>Yarowsky</last></author>
       <doi>10.3115/1118693.1118698</doi>
       <pages>33–40</pages>
@@ -1301,7 +1301,7 @@
     </paper>
     <paper id="6">
       <title>An Empirical Evaluation of Knowledge Sources and Learning Algorithms for Word Sense Disambiguation</title>
-      <author><first>Yoong Keok</first><last>Lee</last></author>
+      <author id="yoong-keok-lee"><first>Yoong Keok</first><last>Lee</last></author>
       <author><first>Hwee Tou</first><last>Ng</last></author>
       <doi>10.3115/1118693.1118699</doi>
       <pages>41–48</pages>
@@ -1321,7 +1321,7 @@
     <paper id="8">
       <title>Combining Sample Selection and Error-Driven Pruning for Machine Learning of Coreference Rules</title>
       <author><first>Vincent</first><last>Ng</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <doi>10.3115/1118693.1118701</doi>
       <pages>55–62</pages>
       <url hash="3b05525d">W02-1008</url>
@@ -1329,7 +1329,7 @@
     </paper>
     <paper id="9">
       <title>Transformational Priors Over Grammars</title>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <doi>10.3115/1118693.1118702</doi>
       <pages>63–70</pages>
       <url hash="48aa070d">W02-1009</url>
@@ -1360,7 +1360,7 @@
       <title>Extensions to <fixed-case>HMM</fixed-case>-based Statistical Word Alignment Models</title>
       <author><first>Kristina</first><last>Toutanova</last></author>
       <author><first>H. Tolga</first><last>Ilhan</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <doi>10.3115/1118693.1118705</doi>
       <pages>87–94</pages>
       <url hash="ec0baaa6">W02-1012</url>
@@ -1368,7 +1368,7 @@
     </paper>
     <paper id="13">
       <title>From Words to Corpora: Recognizing Translation</title>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <doi>10.3115/1118693.1118706</doi>
       <pages>95–102</pages>
       <url hash="3175eda0">W02-1013</url>
@@ -1376,7 +1376,7 @@
     </paper>
     <paper id="14">
       <title>Fast <fixed-case>LR</fixed-case> parsing Using Rich (Tree Adjoining) Grammars</title>
-      <author><first>Carlos A.</first><last>Prolo</last></author>
+      <author id="carlos-a-prolo"><first>Carlos A.</first><last>Prolo</last></author>
       <doi>10.3115/1118693.1118707</doi>
       <pages>103–110</pages>
       <url hash="66a05edb">W02-1014</url>
@@ -1393,7 +1393,7 @@
     <paper id="16">
       <title>Spectral Clustering for <fixed-case>G</fixed-case>erman Verbs</title>
       <author><first>Chris</first><last>Brew</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <doi>10.3115/1118693.1118709</doi>
       <pages>117–124</pages>
       <url hash="e5c6e55e">W02-1016</url>
@@ -1402,7 +1402,7 @@
     <paper id="17">
       <title>Exploiting Strong Syntactic Heuristics and Co-Training to Learn Semantic Lexicons</title>
       <author><first>William</first><last>Phillips</last></author>
-      <author><first>Ellen</first><last>Riloff</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
       <doi>10.3115/1118693.1118710</doi>
       <pages>125–132</pages>
       <url hash="90e29887">W02-1017</url>
@@ -1429,7 +1429,7 @@
     <paper id="20">
       <title>User-Friendly Text Prediction For Translators</title>
       <author><first>George</first><last>Foster</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <author><first>Guy</first><last>Lapalme</last></author>
       <doi>10.3115/1118693.1118713</doi>
       <pages>148–155</pages>
@@ -1439,8 +1439,8 @@
     <paper id="21">
       <title>Generation of Word Graphs in Statistical Machine Translation</title>
       <author><first>Nicola</first><last>Ueffing</last></author>
-      <author><first>Franz Josef</first><last>Och</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="franz-josef-och"><first>Franz Josef</first><last>Och</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <doi>10.3115/1118693.1118714</doi>
       <pages>156–163</pages>
       <url hash="93dbbf71">W02-1021</url>
@@ -1458,8 +1458,8 @@
     <paper id="23">
       <title><fixed-case>NLP</fixed-case> Found Helpful (at least for one Text Categorization Task)</title>
       <author><first>Carl</first><last>Sable</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
-      <author><first>Kenneth</first><last>Church</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kenneth-church"><first>Kenneth</first><last>Church</last></author>
       <doi>10.3115/1118693.1118716</doi>
       <pages>172–179</pages>
       <url hash="04bb7d25">W02-1023</url>
@@ -1467,9 +1467,9 @@
     </paper>
     <paper id="24">
       <title>A Hybrid Approach to Natural Language Web Search</title>
-      <author><first>Jennifer</first><last>Chu-Carroll</last></author>
+      <author id="jennifer-chu-carroll"><first>Jennifer</first><last>Chu-Carroll</last></author>
       <author><first>John</first><last>Prager</last></author>
-      <author><first>Yael</first><last>Ravin</last></author>
+      <author id="yael-ravin"><first>Yael</first><last>Ravin</last></author>
       <author><first>Christian</first><last>Cesar</last></author>
       <doi>10.3115/1118693.1118717</doi>
       <pages>180–187</pages>
@@ -1507,7 +1507,7 @@
     <paper id="28">
       <title>A Bootstrapping Method for Learning Semantic Lexicons using Extraction Pattern Contexts</title>
       <author><first>Michael</first><last>Thelen</last></author>
-      <author><first>Ellen</first><last>Riloff</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
       <doi>10.3115/1118693.1118721</doi>
       <pages>214–221</pages>
       <url hash="efa1429f">W02-1028</url>
@@ -1515,7 +1515,7 @@
     </paper>
     <paper id="29">
       <title>Ensemble Methods for Automatic Thesaurus Extraction</title>
-      <author><first>James</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James</first><last>Curran</last></author>
       <doi>10.3115/1118693.1118722</doi>
       <pages>222–229</pages>
       <url hash="c4bea920">W02-1029</url>
@@ -1534,8 +1534,8 @@
     </paper>
     <paper id="31">
       <title>The <fixed-case>S</fixed-case>uper<fixed-case>ARV</fixed-case> Language Model: Investigating the Effectiveness of Tightly Integrating Multiple Knowledge Sources</title>
-      <author><first>Wen</first><last>Wang</last></author>
-      <author><first>Mary P.</first><last>Harper</last></author>
+      <author id="wen-wang"><first>Wen</first><last>Wang</last></author>
+      <author id="mary-harper"><first>Mary P.</first><last>Harper</last></author>
       <doi>10.3115/1118693.1118724</doi>
       <pages>238–247</pages>
       <url hash="5311ad4c">W02-1031</url>
@@ -1565,7 +1565,7 @@
       <title>A Machine-Learning Approach to Introspection in a Question Answering System</title>
       <author><first>Krzysztof</first><last>Czuba</last></author>
       <author><first>John</first><last>Prager</last></author>
-      <author><first>Jennifer</first><last>Chu-Carroll</last></author>
+      <author id="jennifer-chu-carroll"><first>Jennifer</first><last>Chu-Carroll</last></author>
       <doi>10.3115/1118693.1118727</doi>
       <pages>265–272</pages>
       <url hash="a96c8b09">W02-1034</url>
@@ -1574,7 +1574,7 @@
     <paper id="35">
       <title>Extracting Clauses for Spoken Language Understanding in Conversational Systems</title>
       <author><first>Narendra</first><last>Gupta</last></author>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
       <doi>10.3115/1118693.1118728</doi>
       <pages>273–280</pages>
       <url hash="fa206254">W02-1035</url>
@@ -1592,7 +1592,7 @@
     </paper>
     <paper id="37">
       <title>Processing Comparable Corpora With Bilingual Suffix Trees</title>
-      <author><first>Dragos Stefan</first><last>Munteanu</last></author>
+      <author id="dragos-stefan-munteanu"><first>Dragos Stefan</first><last>Munteanu</last></author>
       <author><first>Daniel</first><last>Marcu</last></author>
       <doi>10.3115/1118693.1118730</doi>
       <pages>289–295</pages>
@@ -1611,7 +1611,7 @@
     </paper>
     <paper id="39">
       <title>Phrasal Cohesion and Statistical Machine Translation</title>
-      <author><first>Heidi</first><last>Fox</last></author>
+      <author id="heidi-fox"><first>Heidi</first><last>Fox</last></author>
       <doi>10.3115/1118693.1118732</doi>
       <pages>304–3111</pages>
       <url hash="bea8f874">W02-1039</url>
@@ -1621,7 +1621,7 @@
       <title>The Influence of Minimum Edit Distance on Reference Resolution</title>
       <author><first>Michael</first><last>Strube</last></author>
       <author><first>Stefan</first><last>Rapp</last></author>
-      <author><first>Christoph</first><last>Müller</last></author>
+      <author id="christoph-muller"><first>Christoph</first><last>Müller</last></author>
       <doi>10.3115/1118693.1118733</doi>
       <pages>312–319</pages>
       <url hash="37d72393">W02-1040</url>
@@ -1630,7 +1630,7 @@
     <paper id="41">
       <title>Information Extraction from Voicemail Transcripts</title>
       <author><first>Martin</first><last>Jansche</last></author>
-      <author><first>Steven</first><last>Abney</last></author>
+      <author id="steven-abney"><first>Steven</first><last>Abney</last></author>
       <doi>10.3115/1118693.1118734</doi>
       <pages>320–327</pages>
       <url hash="eb90c753">W02-1041</url>
@@ -1664,15 +1664,15 @@
       <title>Induction of Classification from Lexicon Expansion: Assigning Domain Tags to <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Entries</title>
       <author><first>Echa</first><last>Chang</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
-      <author><first>Sue-Jin</first><last>Ker</last></author>
+      <author id="sue-j-ker"><first>Sue-Jin</first><last>Ker</last></author>
       <author><first>Chang-Hua</first><last>Yang</last></author>
       <url hash="525a5d31">W02-1102</url>
       <bibkey>chang-etal-2002-induction</bibkey>
     </paper>
     <paper id="3">
       <title>Semiautomatic Creation of Taxonomies</title>
-      <author><first>Javier</first><last>Farreres</last></author>
-      <author><first>Horacio</first><last>Rodríguez</last></author>
+      <author id="javier-farreres"><first>Javier</first><last>Farreres</last></author>
+      <author id="horacio-rodriguez"><first>Horacio</first><last>Rodríguez</last></author>
       <author><first>Karina</first><last>Gibert</last></author>
       <url hash="79b06d78">W02-1103</url>
       <bibkey>farreres-etal-2002-semiautomatic</bibkey>
@@ -1685,7 +1685,7 @@
     </paper>
     <paper id="5">
       <title>Building Semantic/Ontological Knowledge by Text Mining</title>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <url hash="00e7eaab">W02-1105</url>
       <bibkey>hovy-2002-building</bibkey>
     </paper>
@@ -1714,24 +1714,24 @@
     </paper>
     <paper id="9">
       <title>A <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et-Based Approach to Named Entites Recognition</title>
-      <author><first>Bernardo</first><last>Magnini</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Roberto</first><last>Prevete</last></author>
-      <author><first>Hristo</first><last>Tanev</last></author>
+      <author id="hristo-tanev"><first>Hristo</first><last>Tanev</last></author>
       <url hash="5db07849">W02-1109</url>
       <bibkey>magnini-etal-2002-wordnet</bibkey>
     </paper>
     <paper id="10">
       <title>Frameworks, Implementation and Open Problems for the Collaborative Building of a Multilingual Lexical Database</title>
-      <author><first>Mathieu</first><last>Mangeot-Lerebours</last></author>
-      <author><first>Gilles</first><last>Sérasset</last></author>
+      <author id="mathieu-mangeot"><first>Mathieu</first><last>Mangeot-Lerebours</last></author>
+      <author id="gilles-serasset"><first>Gilles</first><last>Sérasset</last></author>
       <author><first>Frédéric</first><last>Andrès</last></author>
       <url hash="f20994bd">W02-1110</url>
       <bibkey>mangeot-lerebours-etal-2002-frameworks</bibkey>
     </paper>
     <paper id="11">
       <title>Fine-Grained Proper Noun Ontologies for Question Answering</title>
-      <author><first>Gideon S.</first><last>Mann</last></author>
+      <author id="gideon-mann"><first>Gideon S.</first><last>Mann</last></author>
       <url hash="7cd0d57f">W02-1111</url>
       <bibkey>mann-2002-fine</bibkey>
     </paper>
@@ -1764,7 +1764,7 @@
     </paper>
     <paper id="16">
       <title>A Maximum Entropy Approach to <fixed-case>H</fixed-case>ow<fixed-case>N</fixed-case>et-Based <fixed-case>C</fixed-case>hinese Word Sense Disambiguation</title>
-      <author><first>Ping Wai</first><last>Wong</last></author>
+      <author id="ping-wai-wong"><first>Ping Wai</first><last>Wong</last></author>
       <author><first>Yongsheng</first><last>Yang</last></author>
       <url hash="1cb7a4cf">W02-1116</url>
       <bibkey>wong-yang-2002-maximum</bibkey>
@@ -1802,11 +1802,11 @@
     </paper>
     <paper id="2">
       <title><fixed-case>A</fixed-case>nn<fixed-case>C</fixed-case>orra: Building Tree-banks in <fixed-case>I</fixed-case>ndian Languages</title>
-      <author><first>Akshar</first><last>Bharati</last></author>
+      <author id="akshar-bharati"><first>Akshar</first><last>Bharati</last></author>
       <author><first>Rajeev</first><last>Sangal</last></author>
       <author><first>Vineet</first><last>Chaitanya</last></author>
       <author><first>Amba</first><last>Kulkarni</last></author>
-      <author><first>Dipti Misra</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></author>
       <author><first>K.V.</first><last>Ramakrishnamacharyulu</last></author>
       <url hash="36a89f4c">W02-1202</url>
       <bibkey>bharati-etal-2002-anncorra</bibkey>
@@ -1814,16 +1814,16 @@
     <paper id="3">
       <title><fixed-case>U</fixed-case>rdu and the Parallel Grammar Project</title>
       <author><first>Miriam</first><last>Butt</last></author>
-      <author><first>Tracy Holloway</first><last>King</last></author>
+      <author id="tracy-holloway-king"><first>Tracy Holloway</first><last>King</last></author>
       <url hash="93b56405">W02-1203</url>
       <bibkey>butt-king-2002-urdu</bibkey>
     </paper>
     <paper id="4">
       <title>Broadening the Scope of the <fixed-case>EAGLES</fixed-case>/<fixed-case>ISLE</fixed-case> Lexical Standardization Initiative</title>
-      <author><first>Nicoletta</first><last>Calzolari</last></author>
+      <author id="nicoletta-calzolari"><first>Nicoletta</first><last>Calzolari</last></author>
       <author><first>Alessandro</first><last>Lenci</last></author>
       <author><first>Francesca</first><last>Bertagna</last></author>
-      <author><first>Antonio</first><last>Zampolli</last></author>
+      <author id="antonio-zampolli"><first>Antonio</first><last>Zampolli</last></author>
       <url hash="2f1be7e5">W02-1204</url>
       <bibkey>calzolari-etal-2002-broadening</bibkey>
     </paper>
@@ -1853,14 +1853,14 @@
       <title>Automatic Word Spacing Using Hidden <fixed-case>M</fixed-case>arkov Model for Refining <fixed-case>K</fixed-case>orean Text Corpora</title>
       <author><first>Do-Gil</first><last>Lee</last></author>
       <author><first>Sang-Zoo</first><last>Lee</last></author>
-      <author><first>Hae-Chang</first><last>Rim</last></author>
-      <author><first>Heui-Seok</first><last>Lim</last></author>
+      <author id="hae-chang-rim"><first>Hae-Chang</first><last>Rim</last></author>
+      <author id="heui-seok-lim"><first>Heui-Seok</first><last>Lim</last></author>
       <url hash="a948a6b6">W02-1208</url>
       <bibkey>lee-etal-2002-automatic</bibkey>
     </paper>
     <paper id="9">
       <title>Decomposition for <fixed-case>ISO</fixed-case>/<fixed-case>IEC</fixed-case> 10646 Ideographic Characters</title>
-      <author><first>Qin</first><last>Lu</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <author><first>Shiu Tong</first><last>Chan</last></author>
       <author><first>Yin</first><last>Li</last></author>
       <author><first>Ngai Ling</first><last>Li</last></author>
@@ -1870,7 +1870,7 @@
     <paper id="10">
       <title>Efficient Deep Processing of <fixed-case>J</fixed-case>apanese</title>
       <author><first>Melanie</first><last>Siegel</last></author>
-      <author><first>Emily M.</first><last>Bender</last></author>
+      <author id="emily-m-bender"><first>Emily M.</first><last>Bender</last></author>
       <url hash="dad0691f">W02-1210</url>
       <bibkey>siegel-bender-2002-efficient</bibkey>
     </paper>
@@ -1880,7 +1880,7 @@
       <author><first>Song</first><last>Xue</last></author>
       <author><first>Weimin</first><last>Qu</last></author>
       <author><first>Xiaofeng</first><last>Wang</last></author>
-      <author><first>Yufang</first><last>Sun</last></author>
+      <author id="yufang-sun"><first>Yufang</first><last>Sun</last></author>
       <url hash="46c66075">W02-1211</url>
       <bibkey>sun-etal-2002-constructing</bibkey>
     </paper>
@@ -1897,13 +1897,13 @@
     </frontmatter>
     <paper id="1">
       <title>Speech-Related Technologies - Where Will the Field Go in 10 Years?</title>
-      <author><first>Niels Ole</first><last>Bernsen</last></author>
+      <author id="niels-ole-bernsen"><first>Niels Ole</first><last>Bernsen</last></author>
       <url hash="27c57e86">W02-1301</url>
       <bibkey>bernsen-2002-speech</bibkey>
     </paper>
     <paper id="2">
       <title>Towards a Road Map on Human Language Technology: Natural Language Processing</title>
-      <author><first>Andreas</first><last>Eisele</last></author>
+      <author id="andreas-eisele"><first>Andreas</first><last>Eisele</last></author>
       <author><first>Dorothea</first><last>Ziegler-Eisele</last></author>
       <url hash="0cdc550d">W02-1302</url>
       <bibkey>eisele-ziegler-eisele-2002-towards</bibkey>
@@ -1911,20 +1911,20 @@
     <paper id="3">
       <title>Why <fixed-case>NLP</fixed-case> Should Move into <fixed-case>IAS</fixed-case></title>
       <author><first>Victor</first><last>Raskin</last></author>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <author><first>Mikhail J.</first><last>Atallah</last></author>
-      <author><first>Christian F.</first><last>Hempelmann</last></author>
+      <author id="christian-f-hempelmann"><first>Christian F.</first><last>Hempelmann</last></author>
       <author><first>Katrina E.</first><last>Triezenberg</last></author>
       <url hash="7cc7829d">W02-1303</url>
       <bibkey>raskin-etal-2002-nlp</bibkey>
     </paper>
     <paper id="4">
       <title><fixed-case>MEANING</fixed-case>: a Roadmap to Knowledge Technologies</title>
-      <author><first>German</first><last>Rigau</last></author>
-      <author><first>Bernardo</first><last>Magnini</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <author><first>Piek</first><last>Vossen</last></author>
-      <author><first>John</first><last>Carroll</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
       <url hash="f28624d7">W02-1304</url>
       <bibkey>rigau-etal-2002-meaning</bibkey>
     </paper>
@@ -1942,38 +1942,38 @@
     <paper id="1">
       <title>Disambiguating Noun Compounds with Latent Semantic Indexing</title>
       <author><first>Alan M.</first><last>Buckeridge</last></author>
-      <author><first>Richard F. E.</first><last>Sutcliffe</last></author>
+      <author id="richard-f-e-sutcliffe"><first>Richard F. E.</first><last>Sutcliffe</last></author>
       <url hash="2fb195e5">W02-1401</url>
       <bibkey>buckeridge-sutcliffe-2002-disambiguating</bibkey>
     </paper>
     <paper id="2">
       <title>An Intelligent Terminology Database as a Pre-processor for Statistical Machine Translation</title>
       <author><first>Michael</first><last>Carl</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <url hash="9c7374c1">W02-1402</url>
       <bibkey>carl-langlais-2002-intelligent</bibkey>
     </paper>
     <paper id="3">
       <title>Lexically-Based Terminology Structuring: Some Inherent Limits</title>
       <author><first>Natalia</first><last>Grabar</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <url hash="9d271ce8">W02-1403</url>
       <bibkey>grabar-zweigenbaum-2002-lexically</bibkey>
     </paper>
     <paper id="4">
       <title>Alignment and Extraction of Bilingual Legal Terminology from Context Profiles</title>
-      <author><first>Oi Yee</first><last>Kwong</last></author>
-      <author><first>Benjamin K.</first><last>Tsou</last></author>
-      <author><first>Tom B.Y.</first><last>Lai</last></author>
-      <author><first>Robert W.P.</first><last>Luk</last></author>
-      <author><first>Lawrence Y.L.</first><last>Cheung</last></author>
+      <author id="olivia-o-y-kwong"><first>Oi Yee</first><last>Kwong</last></author>
+      <author id="benjamin-k-tsou"><first>Benjamin K.</first><last>Tsou</last></author>
+      <author id="tom-b-y-lai"><first>Tom B.Y.</first><last>Lai</last></author>
+      <author id="robert-w-p-luk"><first>Robert W.P.</first><last>Luk</last></author>
+      <author id="lawrence-y-l-cheung"><first>Lawrence Y.L.</first><last>Cheung</last></author>
       <author><first>Francis C.Y.</first><last>Chik</last></author>
       <url hash="9c365321">W02-1404</url>
       <bibkey>kwong-etal-2002-alignment</bibkey>
     </paper>
     <paper id="5">
       <title>Improving a general-purpose Statistical Translation Engine by Terminological lexicons</title>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <url hash="806c76df">W02-1405</url>
       <bibkey>langlais-2002-improving</bibkey>
     </paper>
@@ -1993,8 +1993,8 @@
     </paper>
     <paper id="8">
       <title>Automatic Discovery of Term Similarities Using Pattern Mining</title>
-      <author><first>Goran</first><last>Nenadić</last></author>
-      <author><first>Irena</first><last>Spasić</last></author>
+      <author id="goran-nenadic"><first>Goran</first><last>Nenadić</last></author>
+      <author id="irena-spasic"><first>Irena</first><last>Spasić</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
       <url hash="ce801b4b">W02-1408</url>
       <bibkey>nenadic-etal-2002-automatic-discovery</bibkey>
@@ -2034,15 +2034,15 @@
       <title>Grammar and Lexicon in the Robust Parsing of <fixed-case>I</fixed-case>talian towards a Non-Naïve Interplay</title>
       <author><first>Roberto</first><last>Bartolini</last></author>
       <author><first>Alessandro</first><last>Lenci</last></author>
-      <author><first>Simonetta</first><last>Montemagni</last></author>
+      <author id="simonetta-montemagni"><first>Simonetta</first><last>Montemagni</last></author>
       <author><first>Vito</first><last>Pirrelli</last></author>
       <url hash="90341ea0">W02-1501</url>
       <bibkey>bartolini-etal-2002-grammar</bibkey>
     </paper>
     <paper id="2">
       <title>The Grammar Matrix: An Open-Source Starter-Kit for the Rapid Development of Cross-linguistically Consistent Broad-Coverage Precision Grammars</title>
-      <author><first>Emily M.</first><last>Bender</last></author>
-      <author><first>Dan</first><last>Flickinger</last></author>
+      <author id="emily-m-bender"><first>Emily M.</first><last>Bender</last></author>
+      <author id="dan-flickinger"><first>Dan</first><last>Flickinger</last></author>
       <author><first>Stephan</first><last>Oepen</last></author>
       <url hash="e5bf1079">W02-1502</url>
       <bibkey>bender-etal-2002-grammar</bibkey>
@@ -2051,8 +2051,8 @@
       <title>The Parallel Grammar Project</title>
       <author><first>Miriam</first><last>Butt</last></author>
       <author><first>Helge</first><last>Dyvik</last></author>
-      <author><first>Tracy Holloway</first><last>King</last></author>
-      <author><first>Hiroshi</first><last>Masuichi</last></author>
+      <author id="tracy-holloway-king"><first>Tracy Holloway</first><last>King</last></author>
+      <author id="hiroshi-masuichi"><first>Hiroshi</first><last>Masuichi</last></author>
       <author><first>Christian</first><last>Rohrer</last></author>
       <url hash="fc646358">W02-1503</url>
       <bibkey>butt-etal-2002-parallel</bibkey>
@@ -2069,38 +2069,38 @@
     <paper id="5">
       <title>Encoding and Reusing Linguistic Information Expressed by Linguistic Properties</title>
       <author><first>Caroline</first><last>Hagège</last></author>
-      <author><first>Gabriel G.</first><last>Bès</last></author>
+      <author id="gabriel-g-bes"><first>Gabriel G.</first><last>Bès</last></author>
       <url hash="0d09d69a">W02-1505</url>
       <bibkey>hagege-bes-2002-encoding</bibkey>
     </paper>
     <paper id="6">
       <title>Adapting Existing Grammars: The <fixed-case>XLE</fixed-case> Experience</title>
-      <author><first>Ronald M.</first><last>Kaplan</last></author>
-      <author><first>Tracy Holloway</first><last>King</last></author>
-      <author><first>John T.</first><last>Maxwell III</last></author>
+      <author id="ronald-m-kaplan"><first>Ronald M.</first><last>Kaplan</last></author>
+      <author id="tracy-holloway-king"><first>Tracy Holloway</first><last>King</last></author>
+      <author id="john-t-maxwell-iii"><first>John T.</first><last>Maxwell III</last></author>
       <url hash="620b3678">W02-1506</url>
       <bibkey>kaplan-etal-2002-adapting</bibkey>
     </paper>
     <paper id="7">
       <title>A Classification of Grammar Development Strategies</title>
       <author><first>Alexandra</first><last>Kinyon</last></author>
-      <author><first>Carlos A.</first><last>Prolo</last></author>
+      <author id="carlos-a-prolo"><first>Carlos A.</first><last>Prolo</last></author>
       <url hash="41dc95bb">W02-1507</url>
       <bibkey>kinyon-prolo-2002-classification</bibkey>
     </paper>
     <paper id="8">
       <title>Parallel Distributed Grammar Engineering for Practical Applications</title>
       <author><first>Stephan</first><last>Oepen</last></author>
-      <author><first>Emily M.</first><last>Bender</last></author>
+      <author id="emily-m-bender"><first>Emily M.</first><last>Bender</last></author>
       <author><first>Uli</first><last>Callmeier</last></author>
-      <author><first>Dan</first><last>Flickinger</last></author>
+      <author id="dan-flickinger"><first>Dan</first><last>Flickinger</last></author>
       <author><first>Melanie</first><last>Siegel</last></author>
       <url hash="d48b15d8">W02-1508</url>
       <bibkey>oepen-etal-2002-parallel</bibkey>
     </paper>
     <paper id="9">
       <title>Coping with Problems in Grammars Automatically Extracted from Treebanks</title>
-      <author><first>Carlos A.</first><last>Prolo</last></author>
+      <author id="carlos-a-prolo"><first>Carlos A.</first><last>Prolo</last></author>
       <url hash="37d65a7c">W02-1509</url>
       <bibkey>prolo-2002-coping</bibkey>
     </paper>
@@ -2123,15 +2123,15 @@
     </frontmatter>
     <paper id="1">
       <title>A Synchronization Structure of <fixed-case>SSTC</fixed-case> and Its Applications in Machine Translation</title>
-      <author><first>Mosleh H.</first><last>Al-Adhaileh</last></author>
-      <author><last>Tang</last><first>Enya Kong</first></author>
-      <author><first>Zaharin</first><last>Yusoff</last></author>
+      <author id="mosleh-hmoud-al-adhaileh"><first>Mosleh H.</first><last>Al-Adhaileh</last></author>
+      <author><first>Enya Kong</first><last>Tang</last></author>
+      <author id="zaharin-yusoff"><first>Zaharin</first><last>Yusoff</last></author>
       <url hash="ece738ba">W02-1601</url>
       <bibkey>al-adhaileh-etal-2002-synchronization</bibkey>
     </paper>
     <paper id="2">
       <title>Coedition to Share Text Revision across Languages and Improve <fixed-case>MT</fixed-case> a Posteriori</title>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <author><first>Wang-Ju</first><last>Tsai</last></author>
       <url hash="85824775">W02-1602</url>
       <bibkey>boitet-tsai-2002-coedition</bibkey>
@@ -2166,14 +2166,14 @@
       <title>Word Sense Disambiguation in a <fixed-case>K</fixed-case>orean-to-<fixed-case>J</fixed-case>apanese <fixed-case>MT</fixed-case> System Using Neural Networks</title>
       <author><first>You-Jin</first><last>Chung</last></author>
       <author><first>Sin-Jae</first><last>Kang</last></author>
-      <author><first>Kyong-Hi</first><last>Moon</last></author>
+      <author id="kyong-hi-moon"><first>Kyong-Hi</first><last>Moon</last></author>
       <author><first>Jong-Hyeok</first><last>Lee</last></author>
       <url hash="ef31856e">W02-1606</url>
       <bibkey>chung-etal-2002-word</bibkey>
     </paper>
     <paper id="7">
       <title>Building a Training Corpus for Word Sense Disambiguation in <fixed-case>E</fixed-case>nglish-to-<fixed-case>V</fixed-case>ietnamese Machine Translation</title>
-      <author><first>Dien</first><last>Dinh</last></author>
+      <author id="dinh-dien"><first>Dien</first><last>Dinh</last></author>
       <url hash="ec3da1e1">W02-1607</url>
       <bibkey>dinh-2002-building</bibkey>
     </paper>
@@ -2192,16 +2192,16 @@
     </paper>
     <paper id="10">
       <title>Learning Domain-Specific Transfer Rules: An Experiment with <fixed-case>K</fixed-case>orean to <fixed-case>E</fixed-case>nglish Translation</title>
-      <author><first>Benoit</first><last>Lavoie</last></author>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="benoit-lavoie"><first>Benoit</first><last>Lavoie</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <author><first>Tanya</first><last>Korelsky</last></author>
       <url hash="354b5ba5">W02-1610</url>
       <bibkey>lavoie-etal-2002-learning</bibkey>
     </paper>
     <paper id="11">
       <title>Identifying Synonymous Expressions from a Bilingual Corpus for Example-Based Machine Translation</title>
-      <author><first>Mitsuo</first><last>Shimohata</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="mitsuo-shimohata"><first>Mitsuo</first><last>Shimohata</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <url hash="cf48b532">W02-1611</url>
       <bibkey>shimohata-sumita-2002-identifying</bibkey>
     </paper>
@@ -2215,9 +2215,9 @@
     </paper>
     <paper id="13">
       <title>Automatic Information Transfer between <fixed-case>E</fixed-case>nglish and <fixed-case>C</fixed-case>hinese</title>
-      <author><first>Jianmin</first><last>Yao</last></author>
+      <author id="jianmin-yao"><first>Jianmin</first><last>Yao</last></author>
       <author><first>Hao</first><last>Yu</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <author><first>Xiaohong</first><last>Li</last></author>
       <url hash="c300386f">W02-1613</url>
       <bibkey>yao-etal-2002-automatic-information</bibkey>
@@ -2235,9 +2235,9 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>RDF</fixed-case>(<fixed-case>S</fixed-case>)/<fixed-case>XML</fixed-case> Linguistic Annotation of Semantic Web Pages</title>
-      <author><first>Guadalupe Aguado</first><last>de Cea</last></author>
+      <author id="guadalupe-aguado-de-cea"><first>Guadalupe Aguado</first><last>de Cea</last></author>
       <author><first>Inmaculada</first><last>Álvarez-de-Mon</last></author>
-      <author><first>Antonio</first><last>Pareja-Lora</last></author>
+      <author id="antonio-pareja-lora"><first>Antonio</first><last>Pareja-Lora</last></author>
       <author><first>Rosario</first><last>Plaza-Arteche</last></author>
       <url hash="3d1549c8">W02-1701</url>
       <bibkey>de-cea-etal-2002-rdf</bibkey>
@@ -2252,8 +2252,8 @@
     </paper>
     <paper id="3">
       <title>A Brief Introduction to the <fixed-case>G</fixed-case>e<fixed-case>M</fixed-case> Annotation Schema for Complex Document Layout</title>
-      <author><first>John</first><last>Bateman</last></author>
-      <author><first>Renate</first><last>Henschel</last></author>
+      <author id="john-bateman"><first>John</first><last>Bateman</last></author>
+      <author id="renate-henschel"><first>Renate</first><last>Henschel</last></author>
       <author><first>Judy</first><last>Delin</last></author>
       <url hash="eb666c40">W02-1703</url>
       <bibkey>bateman-etal-2002-brief</bibkey>
@@ -2268,9 +2268,9 @@
     </paper>
     <paper id="5">
       <title>The <fixed-case>PAPILLON</fixed-case> Project: Cooperatively Building a Multilingual Lexical Data-base to Derive Open Source Dictionaries &amp; Lexicons</title>
-      <author><first>Christian</first><last>Boitet</last></author>
-      <author><first>Mathieu</first><last>Mangeot</last></author>
-      <author><first>Gilles</first><last>Sérasset</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
+      <author id="mathieu-mangeot"><first>Mathieu</first><last>Mangeot</last></author>
+      <author id="gilles-serasset"><first>Gilles</first><last>Sérasset</last></author>
       <url hash="c3c47b8b">W02-1705</url>
       <bibkey>boitet-etal-2002-papillon</bibkey>
     </paper>
@@ -2318,7 +2318,7 @@
     </paper>
     <paper id="12">
       <title>Cascaded Regular Grammars over <fixed-case>XML</fixed-case> Documents</title>
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <author><first>Milen</first><last>Kouylekov</last></author>
       <author><first>Alexander</first><last>Simov</last></author>
       <url hash="7d0e675f">W02-1712</url>
@@ -2334,7 +2334,7 @@
       <title><fixed-case>X</fixed-case>i<fixed-case>STS</fixed-case> - <fixed-case>XML</fixed-case> in Speech Technology Systems</title>
       <author><first>Michael</first><last>Walsh</last></author>
       <author><first>Stephen</first><last>Wilson</last></author>
-      <author><first>Julie</first><last>Carson-Berndsen</last></author>
+      <author id="julie-carson-berndsen"><first>Julie</first><last>Carson-Berndsen</last></author>
       <url hash="2d73ee67">W02-1714</url>
       <bibkey>walsh-etal-2002-xists</bibkey>
     </paper>
@@ -2357,7 +2357,7 @@
     </frontmatter>
     <paper id="1">
       <title>Extraction of Translation Unit from <fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish Parallel Corpora</title>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <author><first>Pernilla</first><last>Danielsson</last></author>
       <author><first>Wolfgang</first><last>Teubert</last></author>
       <url hash="e38c583d">W02-1801</url>
@@ -2368,15 +2368,15 @@
       <author><first>Lawrence</first><last>Cheung</last></author>
       <author><first>Tom</first><last>Lai</last></author>
       <author><first>Robert</first><last>Luk</last></author>
-      <author><first>Oi Yee</first><last>Kwong</last></author>
-      <author><first>King Kui</first><last>Sin</last></author>
-      <author><first>Benjamin K.</first><last>Tsou</last></author>
+      <author id="olivia-o-y-kwong"><first>Oi Yee</first><last>Kwong</last></author>
+      <author id="king-kui-sin"><first>King Kui</first><last>Sin</last></author>
+      <author id="benjamin-k-tsou"><first>Benjamin K.</first><last>Tsou</last></author>
       <url hash="6c13ef9a">W02-1802</url>
       <bibkey>cheung-etal-2002-considerations</bibkey>
     </paper>
     <paper id="3">
       <title>Developing Guidelines for the Annotation of Anaphors in the <fixed-case>C</fixed-case>hinese Treebank</title>
-      <author><first>Susan</first><last>Converse</last></author>
+      <author id="susan-p-converse"><first>Susan</first><last>Converse</last></author>
       <url hash="47fe3d45">W02-1803</url>
       <bibkey>converse-2002-developing</bibkey>
     </paper>
@@ -2385,7 +2385,7 @@
       <author><first>Hongzhao</first><last>He</last></author>
       <author><first>Jianfeng</first><last>Gao</last></author>
       <author><first>Pilian</first><last>He</last></author>
-      <author><first>Changning</first><last>Huang</last></author>
+      <author id="changning-huang"><first>Changning</first><last>Huang</last></author>
       <url hash="4e373034">W02-1804</url>
       <bibkey>he-etal-2002-finding</bibkey>
     </paper>
@@ -2407,7 +2407,7 @@
     </paper>
     <paper id="7">
       <title>A Knowledge Based Approach to Identification of Serial Verb Construction in <fixed-case>C</fixed-case>hinese-to-<fixed-case>K</fixed-case>orean Machine Translation System</title>
-      <author><first>Dong-il</first><last>Kim</last></author>
+      <author id="dong-il-kim"><first>Dong-il</first><last>Kim</last></author>
       <author><first>Zheng</first><last>Cui</last></author>
       <author><first>Jinji</first><last>Li</last></author>
       <author><first>Jong-Hyeok</first><last>Lee</last></author>
@@ -2416,7 +2416,7 @@
     </paper>
     <paper id="8">
       <title>Learning Case-based Knowledge for Disambiguating <fixed-case>C</fixed-case>hinese Word Segmentation: A Preliminary Study</title>
-      <author><first>Chunyu</first><last>Kit</last></author>
+      <author id="chunyu-kit"><first>Chunyu</first><last>Kit</last></author>
       <author><first>Haihua</first><last>Pan</last></author>
       <author><first>Hongbiao</first><last>Chen</last></author>
       <url hash="78bd4d95">W02-1808</url>
@@ -2424,8 +2424,8 @@
     </paper>
     <paper id="9">
       <title>Corpus-Based <fixed-case>P</fixed-case>inyin Name Resolution</title>
-      <author><first>Kui-Lam</first><last>Kwok</last></author>
-      <author><first>Peter</first><last>Deng</last></author>
+      <author id="kui-lam-kwok"><first>Kui-Lam</first><last>Kwok</last></author>
+      <author id="peter-deng"><first>Peter</first><last>Deng</last></author>
       <url hash="bf357571">W02-1809</url>
       <bibkey>kwok-deng-2002-corpus</bibkey>
     </paper>
@@ -2440,15 +2440,15 @@
     </paper>
     <paper id="11">
       <title>Design of <fixed-case>C</fixed-case>hinese Morphological Analyzer</title>
-      <author><first>Huihsin</first><last>Tseng</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="huihsin-tseng"><first>Huihsin</first><last>Tseng</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <url hash="ce806a49">W02-1811</url>
       <bibkey>tseng-chen-2002-design</bibkey>
     </paper>
     <paper id="12">
       <title>A Word Segmentation Method with Dynamic Adapting to Text Using Inductive Learning</title>
       <author><first>Zhongjian</first><last>Wang</last></author>
-      <author><first>Kenji</first><last>Araki</last></author>
+      <author id="kenji-araki"><first>Kenji</first><last>Araki</last></author>
       <author><first>Koji</first><last>Tochinai</last></author>
       <url hash="6eea686a">W02-1812</url>
       <bibkey>wang-etal-2002-word</bibkey>
@@ -2466,14 +2466,14 @@
       <title>Extracting Pronunciation-translated Names from <fixed-case>C</fixed-case>hinese Texts using Bootstrapping Approach</title>
       <author><first>Jing</first><last>Xiao</last></author>
       <author><first>Jimin</first><last>Liu</last></author>
-      <author><first>Tat-Seng</first><last>Chua</last></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last></author>
       <url hash="4443a672">W02-1814</url>
       <bibkey>xiao-etal-2002-extracting</bibkey>
     </paper>
     <paper id="15">
       <title>Combining Classifiers for <fixed-case>C</fixed-case>hinese Word Segmentation</title>
       <author><first>Nianwen</first><last>Xue</last></author>
-      <author><first>Susan P.</first><last>Converse</last></author>
+      <author id="susan-p-converse"><first>Susan P.</first><last>Converse</last></author>
       <url hash="48cdd32d">W02-1815</url>
       <bibkey>xue-converse-2002-combining</bibkey>
     </paper>
@@ -2488,7 +2488,7 @@
       <author><first>Kevin</first><last>Zhang</last></author>
       <author><first>Qun</first><last>Liu</last></author>
       <author><first>Hao</first><last>Zhang</last></author>
-      <author><first>Xue-Qi</first><last>Cheng</last></author>
+      <author id="xueqi-cheng"><first>Xue-Qi</first><last>Cheng</last></author>
       <url hash="465a621e">W02-1817</url>
       <bibkey>zhang-etal-2002-automatic</bibkey>
     </paper>
@@ -2535,15 +2535,15 @@
     </paper>
     <paper id="2">
       <title>Multilingual Question Answering with High Portability on Relational Databases</title>
-      <author><first>Hanmin</first><last>Jung</last></author>
-      <author><first>Gary Geunbae</first><last>Lee</last></author>
+      <author id="han-min-jung"><first>Hanmin</first><last>Jung</last></author>
+      <author id="gary-geunbae-lee"><first>Gary Geunbae</first><last>Lee</last></author>
       <url hash="58afa969">W02-1902</url>
       <bibkey>jung-lee-2002-multilingual</bibkey>
     </paper>
     <paper id="3">
       <title>A Reliable Indexing Method for a Practical <fixed-case>QA</fixed-case> System</title>
       <author><first>Harksoo</first><last>Kim</last></author>
-      <author><first>Jungyun</first><last>Seo</last></author>
+      <author id="jungyun-seo"><first>Jungyun</first><last>Seo</last></author>
       <url hash="24eb1c99">W02-1903</url>
       <bibkey>kim-seo-2002-reliable</bibkey>
     </paper>
@@ -2558,7 +2558,7 @@
     <paper id="5">
       <title>Extracting Exact Answers to Questions Based on Structural Links</title>
       <author><first>Wei</first><last>Li</last></author>
-      <author><first>Rohini K.</first><last>Srihari</last></author>
+      <author id="rohini-k-srihari"><first>Rohini K.</first><last>Srihari</last></author>
       <author><first>Xiaoge</first><last>Li</last></author>
       <author id="munirathnam-srikanth"><first>M.</first><last>Srikanth</last></author>
       <author><first>Xiuhong</first><last>Zhang</last></author>
@@ -2568,9 +2568,9 @@
     </paper>
     <paper id="6">
       <title>Passage Selection to Improve Question Answering</title>
-      <author><first>Fernando</first><last>LLopis</last></author>
-      <author><first>José Luis</first><last>Vicedo</last></author>
-      <author><first>Antonio</first><last>Ferrández</last></author>
+      <author id="fernando-llopis"><first>Fernando</first><last>LLopis</last></author>
+      <author id="jose-luis-vicedo"><first>José Luis</first><last>Vicedo</last></author>
+      <author id="antonio-ferrandez"><first>Antonio</first><last>Ferrández</last></author>
       <url hash="75f184f3">W02-1906</url>
       <bibkey>llopis-etal-2002-passage</bibkey>
     </paper>
@@ -2594,31 +2594,31 @@
     </frontmatter>
     <paper id="1">
       <title>Extracting the Unextractable: A Case Study on Verb-particles</title>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Aline</first><last>Villavicencio</last></author>
       <url hash="fa33ec0a">W02-2001</url>
       <bibkey>baldwin-villavicencio-2002-extracting</bibkey>
     </paper>
     <paper id="2">
       <title>Language Independent Named Entity Classification by modified Transformation-based Learning and by Decision Tree Induction</title>
-      <author><first>William J.</first><last>Black</last></author>
+      <author id="william-j-black"><first>William J.</first><last>Black</last></author>
       <author><first>Argyrios</first><last>Vasilakopoulos</last></author>
       <url hash="499cb927">W02-2002</url>
       <bibkey>black-vasilakopoulos-2002-language</bibkey>
     </paper>
     <paper id="3">
       <title>Statistical Named Entity Recognizer Adaptation</title>
-      <author><first>John D.</first><last>Burger</last></author>
-      <author><first>John C.</first><last>Henderson</last></author>
-      <author><first>William T.</first><last>Morgan</last></author>
+      <author id="john-d-burger"><first>John D.</first><last>Burger</last></author>
+      <author id="john-henderson"><first>John C.</first><last>Henderson</last></author>
+      <author id="william-morgan"><first>William T.</first><last>Morgan</last></author>
       <url hash="b723311a">W02-2003</url>
       <bibkey>burger-etal-2002-statistical</bibkey>
     </paper>
     <paper id="4">
       <title>Named Entity Extraction using <fixed-case>A</fixed-case>da<fixed-case>B</fixed-case>oost</title>
       <author><first>Xavier</first><last>Carreras</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
-      <author><first>Lluís</first><last>Padró</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
+      <author id="lluis-padro"><first>Lluís</first><last>Padró</last></author>
       <url hash="8ad4dd58">W02-2004</url>
       <bibkey>carreras-etal-2002-named</bibkey>
     </paper>
@@ -2630,21 +2630,21 @@
     </paper>
     <paper id="6">
       <title>Bootstrapping a Multilingual Part-of-speech Tagger in One Person-day</title>
-      <author><first>Silviu</first><last>Cucerzan</last></author>
+      <author id="silviu-cucerzan"><first>Silviu</first><last>Cucerzan</last></author>
       <author><first>David</first><last>Yarowsky</last></author>
       <url hash="36e0d281">W02-2006</url>
       <bibkey>cucerzan-yarowsky-2002-bootstrapping</bibkey>
     </paper>
     <paper id="7">
       <title>Language Independent <fixed-case>NER</fixed-case> using a Unified Model of Internal and Contextual Evidence</title>
-      <author><first>Silviu</first><last>Cucerzan</last></author>
+      <author id="silviu-cucerzan"><first>Silviu</first><last>Cucerzan</last></author>
       <author><first>David</first><last>Yarowsky</last></author>
       <url hash="42266797">W02-2007</url>
       <bibkey>cucerzan-yarowsky-2002-language</bibkey>
     </paper>
     <paper id="8">
       <title>A Very Very Large Corpus Doesn’t Always Yield Reliable Estimates</title>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <author><first>Miles</first><last>Osborne</last></author>
       <url hash="29483c75">W02-2008</url>
       <bibkey>curran-osborne-2002-large</bibkey>
@@ -2659,15 +2659,15 @@
     </paper>
     <paper id="10">
       <title>Named Entity Recognition as a House of Cards: Classifier Stacking</title>
-      <author><first>Radu</first><last>Florian</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
       <url hash="db80b3d9">W02-2010</url>
       <bibkey>florian-2002-named</bibkey>
     </paper>
     <paper id="11">
       <title>Combining Labelled and Unlabelled Data: A Case Study on Fisher Kernels and Transductive Inference for Biological Entity Recognition</title>
-      <author><first>Cyril</first><last>Goutte</last></author>
-      <author><first>Hervé</first><last>Déjean</last></author>
-      <author><first>Eric</first><last>Gaussier</last></author>
+      <author id="cyril-goutte"><first>Cyril</first><last>Goutte</last></author>
+      <author id="herve-dejean"><first>Hervé</first><last>Déjean</last></author>
+      <author id="eric-gaussier"><first>Eric</first><last>Gaussier</last></author>
       <author><first>Nicola</first><last>Cancedda</last></author>
       <author><first>Jean-Michel</first><last>Renders</last></author>
       <url hash="afe4bd3b">W02-2011</url>
@@ -2675,7 +2675,7 @@
     </paper>
     <paper id="12">
       <title><fixed-case>G</fixed-case>ra<fixed-case>S</fixed-case>p: Grammar Learning from Unlabelled Speech Corpora</title>
-      <author><first>Peter Juel</first><last>Henrichsen</last></author>
+      <author id="peter-juel-henrichsen"><first>Peter Juel</first><last>Henrichsen</last></author>
       <url hash="5af242bf">W02-2012</url>
       <bibkey>henrichsen-2002-grasp</bibkey>
     </paper>
@@ -2700,8 +2700,8 @@
     </paper>
     <paper id="16">
       <title><fixed-case>J</fixed-case>apanese Dependency Analysis using Cascaded Chunking</title>
-      <author><first>Taku</first><last>Kudo</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="taku-kudo"><first>Taku</first><last>Kudo</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <url hash="c4d33f8b">W02-2016</url>
       <bibkey>kudo-matsumoto-2002-japanese</bibkey>
     </paper>
@@ -2714,13 +2714,13 @@
     </paper>
     <paper id="18">
       <title>A Comparison of Algorithms for Maximum Entropy Parameter Estimation</title>
-      <author><first>Robert</first><last>Malouf</last></author>
+      <author id="robert-malouf"><first>Robert</first><last>Malouf</last></author>
       <url hash="e15a31a7">W02-2018</url>
       <bibkey>malouf-2002-comparison</bibkey>
     </paper>
     <paper id="19">
       <title><fixed-case>M</fixed-case>arkov Models for Language-independent Named Entity Recognition</title>
-      <author><first>Robert</first><last>Malouf</last></author>
+      <author id="robert-malouf"><first>Robert</first><last>Malouf</last></author>
       <url hash="c59efba2">W02-2019</url>
       <bibkey>malouf-2002-markov</bibkey>
     </paper>
@@ -2733,14 +2733,14 @@
     </paper>
     <paper id="21">
       <title>Letter Level Learning for Language Independent Diacritics Restoration</title>
-      <author><first>Rada</first><last>Mihalcea</last></author>
-      <author><first>Vivi</first><last>Nastase</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
+      <author id="vivi-nastase"><first>Vivi</first><last>Nastase</last></author>
       <url hash="97ad8aff">W02-2021</url>
       <bibkey>mihalcea-nastase-2002-letter</bibkey>
     </paper>
     <paper id="22">
       <title><fixed-case>SLINERC</fixed-case>: The <fixed-case>S</fixed-case>ydney Language-Independent Named Entity Recogniser and Classifier</title>
-      <author><first>Jon</first><last>Patrick</last></author>
+      <author id="jon-patrick"><first>Jon</first><last>Patrick</last></author>
       <author><first>Casey</first><last>Whitelaw</last></author>
       <author><first>Robert</first><last>Munro</last></author>
       <url hash="3abfa63f">W02-2022</url>
@@ -2749,20 +2749,20 @@
     <paper id="23">
       <title>Named Entity Learning and Verification: Expectation Maximization in Large Corpora</title>
       <author><first>Uwe</first><last>Quasthoff</last></author>
-      <author><first>Christian</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Christian</first><last>Biemann</last></author>
       <author><first>Christian</first><last>Wolff</last></author>
       <url hash="34040df1">W02-2023</url>
       <bibkey>quasthoff-etal-2002-named</bibkey>
     </paper>
     <paper id="24">
       <title>Introduction to the <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>-2002 Shared Task: Language-Independent Named Entity Recognition</title>
-      <author><first>Erik F.</first><last>Tjong Kim Sang</last></author>
+      <author id="erik-tjong-kim-sang"><first>Erik F.</first><last>Tjong Kim Sang</last></author>
       <url hash="717fd506">W02-2024</url>
       <bibkey>tjong-kim-sang-2002-introduction</bibkey>
     </paper>
     <paper id="25">
       <title>Memory-Based Named Entity Recognition</title>
-      <author><first>Erik F.</first><last>Tjong Kim Sang</last></author>
+      <author id="erik-tjong-kim-sang"><first>Erik F.</first><last>Tjong Kim Sang</last></author>
       <url hash="81c08dfa">W02-2025</url>
       <bibkey>tjong-kim-sang-2002-memory</bibkey>
     </paper>
@@ -2782,7 +2782,7 @@
     <paper id="28">
       <title>Two-dimensional Clustering for Text Categorization</title>
       <author><first>Hiroya</first><last>Takamura</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <url hash="edfded54">W02-2028</url>
       <bibkey>takamura-matsumoto-2002-two</bibkey>
     </paper>
@@ -2796,7 +2796,7 @@
     <paper id="30">
       <title>Feature Selection for a Rich <fixed-case>HPSG</fixed-case> Grammar Using Decision Trees</title>
       <author><first>Kristina</first><last>Toutanova</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <url hash="57c58d31">W02-2030</url>
       <bibkey>toutanova-manning-2002-feature</bibkey>
     </paper>
@@ -2811,7 +2811,7 @@
     <paper id="32">
       <title>Topological Field Chunking for <fixed-case>G</fixed-case>erman</title>
       <author><first>Jorn</first><last>Veenstra</last></author>
-      <author><first>Frank Henrik</first><last>Müller</last></author>
+      <author id="frank-henrik-muller"><first>Frank Henrik</first><last>Müller</last></author>
       <author><first>Tylman</first><last>Ule</last></author>
       <url hash="789168e6">W02-2032</url>
       <bibkey>veenstra-etal-2002-topological</bibkey>
@@ -2824,7 +2824,7 @@
     </paper>
     <paper id="34">
       <title>Learning to Disambiguate Potentially Subjective Expressions</title>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <author><first>Theresa</first><last>Wilson</last></author>
       <url hash="ef9b2fb0">W02-2034</url>
       <bibkey>wiebe-wilson-2002-learning</bibkey>
@@ -2843,7 +2843,7 @@
   <volume id="21" type="proceedings">
     <meta>
       <booktitle>Proceedings of the International Natural Language Generation Conference</booktitle>
-      <editor><first>Kathleen</first><last>McKeown</last></editor>
+      <editor id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Harriman, New York, USA</address>
       <month>July</month>
@@ -2863,9 +2863,9 @@
     </paper>
     <paper id="2">
       <title>The Importance of Lexicalized Syntax Models for Natural Language Generation Tasks</title>
-      <author><first>Hal</first><last>Daume III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daume III</last></author>
       <author><first>Kevin</first><last>Knight</last></author>
-      <author><first>Irene</first><last>Langkilde-Geary</last></author>
+      <author id="irene-langkilde"><first>Irene</first><last>Langkilde-Geary</last></author>
       <author><first>Daniel</first><last>Marcu</last></author>
       <author><first>Kenji</first><last>Yamada</last></author>
       <pages>9–16</pages>
@@ -2874,7 +2874,7 @@
     </paper>
     <paper id="3">
       <title>An Empirical Verification of Coverage and Correctness for a General-Purpose Sentence Generator</title>
-      <author><first>Irene</first><last>Langkilde-Geary</last></author>
+      <author id="irene-langkilde"><first>Irene</first><last>Langkilde-Geary</last></author>
       <pages>17–24</pages>
       <url hash="88e7fdb1">W02-2103</url>
       <bibkey>langkilde-geary-2002-empirical</bibkey>
@@ -2890,17 +2890,17 @@
     </paper>
     <paper id="5">
       <title>An Overview of Amalgam: A Machine-learned Generation Module</title>
-      <author><first>Simon</first><last>Corston-Oliver</last></author>
+      <author id="simon-corston-oliver"><first>Simon</first><last>Corston-Oliver</last></author>
       <author><first>Michael</first><last>Gamon</last></author>
-      <author><first>Eric</first><last>Ringger</last></author>
-      <author><first>Robert</first><last>Moore</last></author>
+      <author id="eric-ringger"><first>Eric</first><last>Ringger</last></author>
+      <author id="robert-c-moore"><first>Robert</first><last>Moore</last></author>
       <pages>33–40</pages>
       <url hash="c1256efb">W02-2105</url>
       <bibkey>corston-oliver-etal-2002-overview</bibkey>
     </paper>
     <paper id="6">
       <title>A Complete, Efficient Sentence-Realization Algorithm for Unification Grammar</title>
-      <author><first>Robert</first><last>Moore</last></author>
+      <author id="robert-c-moore"><first>Robert</first><last>Moore</last></author>
       <pages>41–48</pages>
       <url hash="34f8fa13">W02-2106</url>
       <bibkey>moore-2002-complete</bibkey>
@@ -2916,7 +2916,7 @@
     <paper id="8">
       <title>Towards Emotional Variation in Speech-Based Natural Language Processing</title>
       <author><first>Michael</first><last>Fleischman</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>57–64</pages>
       <url hash="0bffe4d6">W02-2108</url>
       <bibkey>fleischman-hovy-2002-towards</bibkey>
@@ -2953,7 +2953,7 @@
     </paper>
     <paper id="12">
       <title>Content Planner Construction via Evolutionary Algorithms and a Corpus-based Fitness Function</title>
-      <author><first>Pablo</first><last>Duboue</last></author>
+      <author id="pablo-duboue"><first>Pablo</first><last>Duboue</last></author>
       <author><first>Kathleen</first><last>McKeown</last></author>
       <pages>89–96</pages>
       <url hash="3b8d6d26">W02-2112</url>
@@ -2962,7 +2962,7 @@
     <paper id="13">
       <title>Should Corpora Texts Be Gold Standards for <fixed-case>NLG</fixed-case>?</title>
       <author><first>Ehud</first><last>Reiter</last></author>
-      <author><first>Somayajulu</first><last>Sripada</last></author>
+      <author id="somayajulu-sripada"><first>Somayajulu</first><last>Sripada</last></author>
       <pages>97–104</pages>
       <url hash="1adcbaff">W02-2113</url>
       <bibkey>reiter-sripada-2002-corpora</bibkey>
@@ -2976,8 +2976,8 @@
     </paper>
     <paper id="15">
       <title>Generating Easy References: the Case of Document Deixis</title>
-      <author><first>Ivandre</first><last>Paraboni</last></author>
-      <author><first>Kees</first><last>van Deemter</last></author>
+      <author id="ivandre-paraboni"><first>Ivandre</first><last>Paraboni</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
       <pages>113–119</pages>
       <url hash="ea5205b5">W02-2115</url>
       <attachment hash="21c111ad">W02-2115.Attachment.pdf</attachment>
@@ -2986,7 +2986,7 @@
     <paper id="16">
       <title>The <fixed-case>DIAG</fixed-case> experiments: Natural Language Generation for Intelligent Tutoring Systems</title>
       <author><first>Barbara</first><last>Di Eugenio</last></author>
-      <author><first>Michael</first><last>Glass</last></author>
+      <author id="michael-glass"><first>Michael</first><last>Glass</last></author>
       <author><first>Michael</first><last>Trolio</last></author>
       <pages>120–127</pages>
       <url hash="9a2908cd">W02-2116</url>
@@ -2995,7 +2995,7 @@
     <paper id="17">
       <title>An Evaluation of Procedural Instructional Text</title>
       <author><first>Nathalie</first><last>Colineau</last></author>
-      <author><first>Cecile</first><last>Paris</last></author>
+      <author id="cecile-paris"><first>Cecile</first><last>Paris</last></author>
       <author><first>Keith</first><last>Vander Linden</last></author>
       <pages>128–135</pages>
       <url hash="2bd066c3">W02-2117</url>
@@ -3004,16 +3004,16 @@
     <paper id="18">
       <title>A Constraint-Based Approach for Cooperative Information-Seeking Dialogue</title>
       <author><first>Yan</first><last>Qu</last></author>
-      <author><first>Nancy</first><last>Green</last></author>
+      <author id="nancy-green"><first>Nancy</first><last>Green</last></author>
       <pages>136–143</pages>
       <url hash="63ca3c79">W02-2118</url>
       <bibkey>qu-green-2002-constraint</bibkey>
     </paper>
     <paper id="19">
       <title>What is <fixed-case>NLG</fixed-case>?</title>
-      <author><first>Roger</first><last>Evans</last></author>
+      <author id="roger-evans"><first>Roger</first><last>Evans</last></author>
       <author><first>Paul</first><last>Piwek</last></author>
-      <author><first>Lynne</first><last>Cahill</last></author>
+      <author id="lynne-cahill"><first>Lynne</first><last>Cahill</last></author>
       <pages>144–151</pages>
       <url hash="1325bce5">W02-2119</url>
       <bibkey>evans-etal-2002-nlg</bibkey>
@@ -3048,7 +3048,7 @@
     </paper>
     <paper id="24">
       <title>Use of Description Logic and <fixed-case>SDRT</fixed-case> in an <fixed-case>NLG</fixed-case> System</title>
-      <author><first>Adil</first><last>El Ghali</last></author>
+      <author id="adil-el-ghali"><first>Adil</first><last>El Ghali</last></author>
       <pages>179–184</pages>
       <url hash="d81f2ec7">W02-2124</url>
       <bibkey>el-ghali-2002-use</bibkey>
@@ -3078,7 +3078,7 @@
     </frontmatter>
     <paper id="1">
       <title>Compositional Semantics for Relative Clauses in <fixed-case>L</fixed-case>exicalized <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars</title>
-      <author><first>Chung-hye</first><last>Han</last></author>
+      <author id="chung-hye-han"><first>Chung-hye</first><last>Han</last></author>
       <pages>1–10</pages>
       <url hash="bb1ed301">W02-2201</url>
       <bibkey>han-2002-compositional</bibkey>
@@ -3123,7 +3123,7 @@
     <paper id="7">
       <title>Statistical Morphological Tagging and Parsing of <fixed-case>K</fixed-case>orean with an <fixed-case>LTAG</fixed-case> Grammar</title>
       <author><first>Anoop</first><last>Sarkar</last></author>
-      <author><first>Chung-Hye</first><last>Han</last></author>
+      <author id="chung-hye-han"><first>Chung-Hye</first><last>Han</last></author>
       <pages>48–56</pages>
       <url hash="93183fc4">W02-2207</url>
       <bibkey>sarkar-han-2002-statistical</bibkey>
@@ -3140,7 +3140,7 @@
       <author><first>Gregory M.</first><last>Kobele</last></author>
       <author><first>Travis</first><last>Collier</last></author>
       <author><first>Charles</first><last>Taylor</last></author>
-      <author><first>Edward P.</first><last>Stabler</last></author>
+      <author id="edward-stabler"><first>Edward P.</first><last>Stabler</last></author>
       <pages>66–73</pages>
       <url hash="3f1ae4cd">W02-2209</url>
       <bibkey>kobele-etal-2002-learning</bibkey>
@@ -3164,16 +3164,16 @@
     </paper>
     <paper id="12">
       <title>Relative Clause Attachment and Anaphora: A Case for Short Binding</title>
-      <author><first>Rodolfo</first><last>Delmonte</last></author>
+      <author id="rodolfo-delmonte"><first>Rodolfo</first><last>Delmonte</last></author>
       <pages>84–89</pages>
       <url hash="d81ac0c5">W02-2212</url>
       <bibkey>delmonte-2002-relative</bibkey>
     </paper>
     <paper id="13">
       <title>A Left Corner Parser for <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars</title>
-      <author><first>Victor J.</first><last>Díaz</last></author>
+      <author id="victor-j-diaz"><first>Victor J.</first><last>Díaz</last></author>
       <author><first>Vicente</first><last>Carrillo</last></author>
-      <author><first>Miguel A.</first><last>Alonso</last></author>
+      <author id="miguel-a-alonso"><first>Miguel A.</first><last>Alonso</last></author>
       <pages>90–95</pages>
       <url hash="3b7b5221">W02-2213</url>
       <bibkey>diaz-etal-2002-left</bibkey>
@@ -3181,9 +3181,9 @@
     <paper id="14">
       <title>Context-Free Parsing of a <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammar Using Finite-State Machines</title>
       <author><first>Alexis</first><last>Nasr</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <author><first>John</first><last>Chen</last></author>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
       <pages>96–101</pages>
       <url hash="f7a57d0e">W02-2214</url>
       <bibkey>nasr-etal-2002-context</bibkey>
@@ -3239,7 +3239,7 @@
     </paper>
     <paper id="22">
       <title>A Note on the Complexity of Associative-Commutative <fixed-case>L</fixed-case>ambek Calculus</title>
-      <author><first>Christophe</first><last>Costa Florêncio</last></author>
+      <author id="christophe-costa-florencio"><first>Christophe</first><last>Costa Florêncio</last></author>
       <pages>159–162</pages>
       <url hash="b468a47d">W02-2222</url>
       <bibkey>costa-florencio-2002-note</bibkey>
@@ -3253,25 +3253,25 @@
     </paper>
     <paper id="24">
       <title>On the Affinity of <fixed-case>TAG</fixed-case> with Projective, Bilexical Dependency Grammar</title>
-      <author><first>Tom B.Y.</first><last>Lai</last></author>
-      <author><first>Changning</first><last>Huang</last></author>
-      <author><first>Robert W.P.</first><last>Luk</last></author>
+      <author id="tom-b-y-lai"><first>Tom B.Y.</first><last>Lai</last></author>
+      <author id="changning-huang"><first>Changning</first><last>Huang</last></author>
+      <author id="robert-w-p-luk"><first>Robert W.P.</first><last>Luk</last></author>
       <pages>169–174</pages>
       <url hash="5fbcfc53">W02-2224</url>
       <bibkey>lai-etal-2002-affinity</bibkey>
     </paper>
     <paper id="25">
       <title>The Theory of Control Applied to the <fixed-case>P</fixed-case>rague Dependency Treebank (<fixed-case>PDT</fixed-case>)</title>
-      <author><first>Jarmila</first><last>Panevová</last></author>
+      <author id="jarmila-panevova"><first>Jarmila</first><last>Panevová</last></author>
       <author><first>Veronika</first><last>Řezníčková</last></author>
-      <author><first>Zdeňka</first><last>Urešová</last></author>
+      <author id="zdenka-uresova"><first>Zdeňka</first><last>Urešová</last></author>
       <pages>175–180</pages>
       <url hash="31606c3f">W02-2225</url>
       <bibkey>panevova-etal-2002-theory</bibkey>
     </paper>
     <paper id="26">
       <title>Systematic Grammar Development in the <fixed-case>XTAG</fixed-case> Project</title>
-      <author><first>Carlos</first><last>Prolo</last></author>
+      <author id="carlos-a-prolo"><first>Carlos</first><last>Prolo</last></author>
       <pages>181–186</pages>
       <url hash="0f938914">W02-2226</url>
       <bibkey>prolo-2002-systematic</bibkey>
@@ -3280,14 +3280,14 @@
       <title>A Formal Proof of Strong Equivalence for a Grammar Conversion from <fixed-case>LTAG</fixed-case> to <fixed-case>HPSG</fixed-case>-style</title>
       <author><first>Naoki</first><last>Yoshinaga</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>187–192</pages>
       <url hash="49a3a557">W02-2227</url>
       <bibkey>yoshinaga-etal-2002-formal</bibkey>
     </paper>
     <paper id="28">
       <title>Parsing <fixed-case>MCS</fixed-case> languages with Thread Automata</title>
-      <author><first>Éric</first><last>Villemonte de la Clergerie</last></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Éric</first><last>Villemonte de la Clergerie</last></author>
       <pages>193–200</pages>
       <url hash="971141f8">W02-2228</url>
       <bibkey>villemonte-de-la-clergerie-2002-parsing-mcs</bibkey>
@@ -3295,8 +3295,8 @@
     <paper id="29">
       <title>Evaluation of <fixed-case>LTAG</fixed-case> Parsing with Supertag Compaction</title>
       <author><first>Olga</first><last>Shaumyan</last></author>
-      <author><first>John</first><last>Carroll</last></author>
-      <author><first>David</first><last>Weir</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
+      <author id="david-weir"><first>David</first><last>Weir</last></author>
       <pages>201–205</pages>
       <url hash="0d42b561">W02-2229</url>
       <bibkey>shaumyan-etal-2002-evaluation</bibkey>
@@ -3304,14 +3304,14 @@
     <paper id="30">
       <title><fixed-case>K</fixed-case>orean-<fixed-case>E</fixed-case>nglish <fixed-case>MT</fixed-case> and <fixed-case>S</fixed-case>-<fixed-case>TAG</fixed-case></title>
       <author><first>Mark</first><last>Dras</last></author>
-      <author><first>Chung-hye</first><last>Han</last></author>
+      <author id="chung-hye-han"><first>Chung-hye</first><last>Han</last></author>
       <pages>206–215</pages>
       <url hash="acdd0522">W02-2230</url>
       <bibkey>dras-han-2002-korean</bibkey>
     </paper>
     <paper id="31">
       <title>Tectogrammatical representation: towards a minimal transfer in machine translation</title>
-      <author><first>Jan</first><last>Hajič</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
       <pages>216–226</pages>
       <url hash="fa2eaf8a">W02-2231</url>
       <bibkey>hajic-2002-tectogrammatical</bibkey>
@@ -3320,7 +3320,7 @@
       <title>Clustering for obtaining syntactic classes of words from automatically extracted <fixed-case>LTAG</fixed-case> grammars</title>
       <author><first>Tadayoshi</first><last>Hara</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>227–233</pages>
       <url hash="bd669abc">W02-2232</url>
       <bibkey>hara-etal-2002-clustering</bibkey>
@@ -3328,7 +3328,7 @@
     <paper id="33">
       <title>A new metagrammar compiler</title>
       <author><first>Bertrand</first><last>Gaiffe</last></author>
-      <author><first>Benoit</first><last>Crabbé</last></author>
+      <author id="benoit-crabbe"><first>Benoit</first><last>Crabbé</last></author>
       <author><first>Azim</first><last>Roussanaly</last></author>
       <pages>234–241</pages>
       <url hash="9dd2fe35">W02-2233</url>
@@ -3344,7 +3344,7 @@
     <paper id="35">
       <title>Cross-serial dependencies in <fixed-case>T</fixed-case>agalog</title>
       <author><first>Anna</first><last>Maclachlan</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>252–258</pages>
       <url hash="2e80eb71">W02-2235</url>
       <bibkey>maclachlan-rambow-2002-cross</bibkey>
@@ -3352,9 +3352,9 @@
     <paper id="36">
       <title>Reranking an n-gram supertagger</title>
       <author><first>John</first><last>Chen</last></author>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>259–268</pages>
       <url hash="ffd5e5dc">W02-2236</url>
       <bibkey>chen-etal-2002-reranking</bibkey>
diff --git a/data/xml/W03.xml b/data/xml/W03.xml
index e1d50afcf5..d4d491cb04 100644
--- a/data/xml/W03.xml
+++ b/data/xml/W03.xml
@@ -13,11 +13,11 @@
     <paper id="1">
       <title>Experiments with geographic knowledge for information extraction</title>
       <author><first>Dimitar</first><last>Manov</last></author>
-      <author><first>Atanas</first><last>Kiryakov</last></author>
+      <author id="atanas-kiryakov"><first>Atanas</first><last>Kiryakov</last></author>
       <author><first>Borislav</first><last>Popov</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
       <author><first>Diana</first><last>Maynard</last></author>
-      <author><first>Hamish</first><last>Cunningham</last></author>
+      <author id="hamish-cunningham"><first>Hamish</first><last>Cunningham</last></author>
       <pages>1–9</pages>
       <url hash="f34d620c">W03-0101</url>
       <bibkey>manov-etal-2003-experiments</bibkey>
@@ -40,7 +40,7 @@
     </paper>
     <paper id="4">
       <title><fixed-case>G</fixed-case>eo<fixed-case>N</fixed-case>ame: a system for back-transliterating pinyin place names</title>
-      <author><first>Kui Lam</first><last>Kwok</last></author>
+      <author id="kui-lam-kwok"><first>Kui Lam</first><last>Kwok</last></author>
       <author><first>Qiang</first><last>Deng</last></author>
       <pages>26–30</pages>
       <url hash="16145afe">W03-0104</url>
@@ -48,17 +48,17 @@
     </paper>
     <paper id="5">
       <title>Grounding spatial named entities for information extraction and question answering</title>
-      <author><first>Jochen L.</first><last>Leidner</last></author>
+      <author id="jochen-l-leidner"><first>Jochen L.</first><last>Leidner</last></author>
       <author><first>Gail</first><last>Sinclair</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <pages>31–38</pages>
       <url hash="c87a65fa">W03-0105</url>
       <bibkey>leidner-etal-2003-grounding</bibkey>
     </paper>
     <paper id="6">
       <title><fixed-case>I</fixed-case>nfo<fixed-case>X</fixed-case>tract location normalization: a hybrid approach to geographic references in information extraction</title>
-      <author><first>Huifeng</first><last>Li</last></author>
-      <author><first>K. Rohini</first><last>Srihari</last></author>
+      <author id="huifeng-li"><first>Huifeng</first><last>Li</last></author>
+      <author id="rohini-k-srihari"><first>K. Rohini</first><last>Srihari</last></author>
       <author><first>Cheng</first><last>Niu</last></author>
       <author><first>Wei</first><last>Li</last></author>
       <pages>39–44</pages>
@@ -67,8 +67,8 @@
     </paper>
     <paper id="7">
       <title>Bootstrapping toponym classifiers</title>
-      <author><first>David A.</first><last>Smith</last></author>
-      <author><first>Gideon S.</first><last>Mann</last></author>
+      <author id="david-a-smith"><first>David A.</first><last>Smith</last></author>
+      <author id="gideon-mann"><first>Gideon S.</first><last>Mann</last></author>
       <pages>45–49</pages>
       <url hash="15bd96ef">W03-0107</url>
       <bibkey>smith-mann-2003-bootstrapping</bibkey>
@@ -127,12 +127,12 @@
     </frontmatter>
     <paper id="1">
       <title>Utterance Classification in <fixed-case>A</fixed-case>uto<fixed-case>T</fixed-case>utor</title>
-      <author><first>Andrew</first><last>Olney</last></author>
+      <author id="andrew-olney"><first>Andrew</first><last>Olney</last></author>
       <author><first>Max</first><last>Louwerse</last></author>
       <author><first>Eric</first><last>Matthews</last></author>
       <author><first>Johanna</first><last>Marineau</last></author>
       <author><first>Heather</first><last>Hite-Mitchell</last></author>
-      <author><first>Arthur</first><last>Graesser</last></author>
+      <author id="arthur-c-graesser"><first>Arthur</first><last>Graesser</last></author>
       <pages>1–8</pages>
       <url hash="da406804">W03-0201</url>
       <bibkey>olney-etal-2003-utterance</bibkey>
@@ -147,7 +147,7 @@
     </paper>
     <paper id="3">
       <title>Computer-Aided Generation of Multiple-Choice Tests</title>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <author><first>Le An</first><last>Ha</last></author>
       <pages>17–22</pages>
       <url hash="28ae3610">W03-0203</url>
@@ -171,10 +171,10 @@
     </paper>
     <paper id="5">
       <title>A Comparison of Tutor and Student Behavior in Speech Versus Text Based Tutoring</title>
-      <author><first>Carolyn P.</first><last>Rosé</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="carolyn-rose"><first>Carolyn P.</first><last>Rosé</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <author><first>Dumisizwe</first><last>Bhembe</last></author>
-      <author><first>Kate</first><last>Forbes</last></author>
+      <author id="kate-forbes-riley"><first>Kate</first><last>Forbes</last></author>
       <author><first>Scott</first><last>Silliman</last></author>
       <author><first>Ramesh</first><last>Srivastava</last></author>
       <author><first>Kurt</first><last>VanLehn</last></author>
@@ -186,14 +186,14 @@
       <title>Transforming Grammar Checking Technology into a Learning Environment for Second Language Writing</title>
       <author><first>Ola</first><last>Knutsson</last></author>
       <author><first>Teresa</first><last>Cerrato Pargman</last></author>
-      <author><first>Kerstin</first><last>Severinson Eklundh</last></author>
+      <author id="kerstin-severinson-eklundh"><first>Kerstin</first><last>Severinson Eklundh</last></author>
       <pages>38–45</pages>
       <url hash="4e8c1149">W03-0206</url>
       <bibkey>knutsson-etal-2003-transforming</bibkey>
     </paper>
     <paper id="7">
       <title>Pasteur’s Quadrant: Computational Linguistics, <fixed-case>LSA</fixed-case>, and Education</title>
-      <author><first>Thomas</first><last>Landauer</last></author>
+      <author id="thomas-landauer"><first>Thomas</first><last>Landauer</last></author>
       <pages>46–52</pages>
       <url hash="dc9cde2b">W03-0207</url>
       <bibkey>landauer-2003-pasteurs</bibkey>
@@ -217,7 +217,7 @@
     </paper>
     <paper id="10">
       <title>A Hybrid Text Classification Approach for Analysis of Student Essays</title>
-      <author><first>Carolyn P.</first><last>Rosé</last></author>
+      <author id="carolyn-rose"><first>Carolyn P.</first><last>Rosé</last></author>
       <author><first>Antonio</first><last>Roque</last></author>
       <author><first>Dumisizwe</first><last>Bhembe</last></author>
       <author><first>Kurt</first><last>VanLehn</last></author>
@@ -238,7 +238,7 @@
     </frontmatter>
     <paper id="1">
       <title>An Evaluation Exercise for Word Alignment</title>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <author><first>Ted</first><last>Pedersen</last></author>
       <pages>1–10</pages>
       <url hash="5b57f118">W03-0301</url>
@@ -255,7 +255,7 @@
     <paper id="3">
       <title>Word Alignment Based on Bilingual Bracketing</title>
       <author><first>Bing</first><last>Zhao</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>15–18</pages>
       <url hash="30b82721">W03-0303</url>
       <bibkey>zhao-vogel-2003-word</bibkey>
@@ -263,16 +263,16 @@
     <paper id="4">
       <title>Statistical Translation Alignment with Compositionality Constraints</title>
       <author><first>Michel</first><last>Simard</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <pages>19–22</pages>
       <url hash="172ce136">W03-0304</url>
       <bibkey>simard-langlais-2003-statistical</bibkey>
     </paper>
     <paper id="5">
       <title>Reducing Parameter Space for Word Alignment</title>
-      <author><first>Herve</first><last>Dejean</last></author>
-      <author><first>Eric</first><last>Gaussier</last></author>
-      <author><first>Cyril</first><last>Goutte</last></author>
+      <author id="herve-dejean"><first>Herve</first><last>Dejean</last></author>
+      <author id="eric-gaussier"><first>Eric</first><last>Gaussier</last></author>
+      <author id="cyril-goutte"><first>Cyril</first><last>Goutte</last></author>
       <author><first>Kenji</first><last>Yamada</last></author>
       <pages>23–26</pages>
       <url hash="b231bd22">W03-0305</url>
@@ -280,7 +280,7 @@
     </paper>
     <paper id="6">
       <title>Word Alignment Baselines</title>
-      <author><first>John C.</first><last>Henderson</last></author>
+      <author id="john-henderson"><first>John C.</first><last>Henderson</last></author>
       <pages>27–30</pages>
       <url hash="c879b33c">W03-0306</url>
       <bibkey>henderson-2003-word</bibkey>
@@ -288,14 +288,14 @@
     <paper id="7">
       <title>Phrase-based Evaluation of Word-to-Word Alignments</title>
       <author><first>Michael</first><last>Carl</last></author>
-      <author><first>Sisay</first><last>Fissaha</last></author>
+      <author id="sisay-fissaha-adafre"><first>Sisay</first><last>Fissaha</last></author>
       <pages>31–35</pages>
       <url hash="0f56bef1">W03-0307</url>
       <bibkey>carl-fissaha-2003-phrase</bibkey>
     </paper>
     <paper id="8">
       <title><fixed-case>TREQ</fixed-case>-<fixed-case>AL</fixed-case>: A word alignment system with limited language resources</title>
-      <author><first>Dan</first><last>Tufiş</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufiş</last></author>
       <author><first>Ana-Maria</first><last>Barbu</last></author>
       <author><first>Radu</first><last>Ion</last></author>
       <pages>36–39</pages>
@@ -304,7 +304,7 @@
     </paper>
     <paper id="9">
       <title>The <fixed-case>D</fixed-case>uluth Word Alignment System</title>
-      <author><first>Bridget Thomson</first><last>McInnes</last></author>
+      <author id="bridget-mcinnes"><first>Bridget Thomson</first><last>McInnes</last></author>
       <author><first>Ted</first><last>Pedersen</last></author>
       <pages>40–43</pages>
       <url hash="8c67ecd7">W03-0309</url>
@@ -320,9 +320,9 @@
     </paper>
     <paper id="11">
       <title>Retrieving Meaning-equivalent Sentences for Example-based Rough Translation</title>
-      <author><first>Mitsuo</first><last>Shimohata</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="mitsuo-shimohata"><first>Mitsuo</first><last>Shimohata</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>50–56</pages>
       <url hash="7fa21f2c">W03-0311</url>
       <bibkey>shimohata-etal-2003-retrieving</bibkey>
@@ -331,7 +331,7 @@
       <title>Word Selection for <fixed-case>EBMT</fixed-case> based on Monolingual Similarity and Translation Confidence</title>
       <author><first>Eiji</first><last>Aramaki</last></author>
       <author><first>Sadao</first><last>Kurohashi</last></author>
-      <author><first>Hideki</first><last>Kashioka</last></author>
+      <author id="hideki-kashioka"><first>Hideki</first><last>Kashioka</last></author>
       <author><first>Hideki</first><last>Tanaka</last></author>
       <pages>57–64</pages>
       <url hash="98b21631">W03-0312</url>
@@ -347,9 +347,9 @@
     <paper id="14">
       <title>Learning Sequence-to-Sequence Correspondences from Parallel Corpora via Sequential Pattern Mining</title>
       <author><first>Kaoru</first><last>Yamamoto</last></author>
-      <author><first>Taku</first><last>Kudo</last></author>
+      <author id="taku-kudo"><first>Taku</first><last>Kudo</last></author>
       <author><first>Yuta</first><last>Tsuboi</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>73–80</pages>
       <url hash="bae8090d">W03-0314</url>
       <bibkey>yamamoto-etal-2003-learning</bibkey>
@@ -358,24 +358,24 @@
       <title>Efficient Optimization for Bilingual Sentence Alignment Based on Linear Regression</title>
       <author><first>Bing</first><last>Zhao</last></author>
       <author><first>Klaus</first><last>Zechner</last></author>
-      <author><first>Stephen</first><last>Vogel</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="stephan-vogel"><first>Stephen</first><last>Vogel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>81–87</pages>
       <url hash="79136e2a">W03-0315</url>
       <bibkey>zhao-etal-2003-efficient</bibkey>
     </paper>
     <paper id="16">
       <title><fixed-case>POS</fixed-case>-Tagger for <fixed-case>E</fixed-case>nglish-<fixed-case>V</fixed-case>ietnamese Bilingual Corpus</title>
-      <author><first>Dinh</first><last>Dien</last></author>
-      <author><first>Hoang</first><last>Kiem</last></author>
+      <author id="dinh-dien"><first>Dinh</first><last>Dien</last></author>
+      <author id="hoang-kiem"><first>Hoang</first><last>Kiem</last></author>
       <pages>88–95</pages>
       <url hash="b39cbd3b">W03-0316</url>
       <bibkey>dien-kiem-2003-pos</bibkey>
     </paper>
     <paper id="17">
       <title>Acquisition of <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>hinese Transliterated Word Pairs from Parallel-Aligned Texts using a Statistical Machine Transliteration Model</title>
-      <author><first>Chun-Jen</first><last>Lee</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="chun-jen-lee"><first>Chun-Jen</first><last>Lee</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>96–103</pages>
       <url hash="a16e4e35">W03-0317</url>
       <bibkey>lee-chang-2003-acquisition</bibkey>
@@ -383,7 +383,7 @@
     <paper id="18">
       <title>Input Sentence Splitting and Translating</title>
       <author><first>Takao</first><last>Doi</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>104–110</pages>
       <url hash="dd6b9720">W03-0318</url>
       <bibkey>doi-sumita-2003-input</bibkey>
@@ -427,7 +427,7 @@
     <paper id="1">
       <title>A model of syntactic disambiguation based on lexicalized grammars</title>
       <author><first>Yusuke</first><last>Miyao</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>1–8</pages>
       <url hash="4b194576">W03-0401</url>
       <bibkey>miyao-tsujii-2003-model</bibkey>
@@ -435,7 +435,7 @@
     <paper id="2">
       <title>An <fixed-case>SVM</fixed-case>-based voting algorithm with application to parse reranking</title>
       <author><first>Libin</first><last>Shen</last></author>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
       <pages>9–16</pages>
       <url hash="f9e26239">W03-0402</url>
       <bibkey>shen-joshi-2003-svm</bibkey>
@@ -450,8 +450,8 @@
     </paper>
     <paper id="4">
       <title>Learning subjective nouns using extraction pattern bootstrapping</title>
-      <author><first>Ellen</first><last>Riloff</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <author><first>Theresa</first><last>Wilson</last></author>
       <pages>25–32</pages>
       <url hash="adb581ac">W03-0404</url>
@@ -459,7 +459,7 @@
     </paper>
     <paper id="5">
       <title>Unsupervised Personal Name Disambiguation</title>
-      <author><first>Gideon</first><last>Mann</last></author>
+      <author id="gideon-mann"><first>Gideon</first><last>Mann</last></author>
       <author><first>David</first><last>Yarowsky</last></author>
       <pages>33–40</pages>
       <url hash="870b5f86">W03-0405</url>
@@ -476,7 +476,7 @@
     <paper id="7">
       <title>Bootstrapping <fixed-case>POS</fixed-case>-taggers using unlabelled data</title>
       <author><first>Stephen</first><last>Clark</last></author>
-      <author><first>James</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James</first><last>Curran</last></author>
       <author><first>Miles</first><last>Osborne</last></author>
       <pages>49–55</pages>
       <url hash="1b9bdc6f">W03-0407</url>
@@ -485,8 +485,8 @@
     <paper id="8">
       <title>Updating an <fixed-case>NLP</fixed-case> system to fit new domains: an empirical study on the sentence segmentation problem</title>
       <author><first>Tong</first><last>Zhang</last></author>
-      <author><first>Fred</first><last>Damerau</last></author>
-      <author><first>David</first><last>Johnson</last></author>
+      <author id="fred-damerau"><first>Fred</first><last>Damerau</last></author>
+      <author id="david-e-johnson"><first>David</first><last>Johnson</last></author>
       <pages>56–62</pages>
       <url hash="50744417">W03-0408</url>
       <bibkey>zhang-etal-2003-updating</bibkey>
@@ -494,7 +494,7 @@
     <paper id="9">
       <title>Exceptionality and Natural Language Learning</title>
       <author><first>Mihai</first><last>Rotaru</last></author>
-      <author><first>Diane J.</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane J.</first><last>Litman</last></author>
       <pages>63–70</pages>
       <url hash="151bf300">W03-0409</url>
       <bibkey>rotaru-litman-2003-exceptionality</bibkey>
@@ -510,7 +510,7 @@
     <paper id="11">
       <title>Preposition Semantic Classification via Treebank and <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et</title>
       <author><first>Tom</first><last>O’Hara</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <pages>79–86</pages>
       <url hash="1f1f20b7">W03-0411</url>
       <bibkey>ohara-wiebe-2003-preposition</bibkey>
@@ -551,7 +551,7 @@
       <title>An efficient clustering algorithm for class-based language models</title>
       <author><first>Takuya</first><last>Matsuzaki</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>119–126</pages>
       <url hash="9a124100">W03-0416</url>
       <bibkey>matsuzaki-etal-2003-efficient</bibkey>
@@ -559,14 +559,14 @@
     <paper id="17">
       <title>Training a Naive <fixed-case>B</fixed-case>ayes Classifier via the <fixed-case>EM</fixed-case> Algorithm with a Class Distribution Constraint</title>
       <author><first>Yoshimasa</first><last>Tsuruoka</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>127–134</pages>
       <url hash="d34ad244">W03-0417</url>
       <bibkey>tsuruoka-tsujii-2003-training</bibkey>
     </paper>
     <paper id="18">
       <title>Identifying Events using Similarity and Context</title>
-      <author><first>Dominic R.</first><last>Jones</last></author>
+      <author id="dominic-r-jones"><first>Dominic R.</first><last>Jones</last></author>
       <author><first>Cynthia A.</first><last>Thompson</last></author>
       <pages>135–141</pages>
       <url hash="1d69a4cb">W03-0418</url>
@@ -574,7 +574,7 @@
     </paper>
     <paper id="19">
       <title>Introduction to the <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>-2003 Shared Task: Language-Independent Named Entity Recognition</title>
-      <author><first>Erik F.</first><last>Tjong Kim Sang</last></author>
+      <author id="erik-tjong-kim-sang"><first>Erik F.</first><last>Tjong Kim Sang</last></author>
       <author><first>Fien</first><last>De Meulder</last></author>
       <pages>142–147</pages>
       <url hash="fa77a52b">W03-0419</url>
@@ -583,8 +583,8 @@
     <paper id="20">
       <title>Maximum Entropy Models for Named Entity Recognition</title>
       <author><first>Oliver</first><last>Bender</last></author>
-      <author><first>Franz Josef</first><last>Och</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="franz-josef-och"><first>Franz Josef</first><last>Och</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>148–151</pages>
       <url hash="f8c32201">W03-0420</url>
       <bibkey>bender-etal-2003-maximum</bibkey>
@@ -592,8 +592,8 @@
     <paper id="21">
       <title>A Simple Named Entity Extractor using <fixed-case>A</fixed-case>da<fixed-case>B</fixed-case>oost</title>
       <author><first>Xavier</first><last>Carreras</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
-      <author><first>Lluís</first><last>Padró</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
+      <author id="lluis-padro"><first>Lluís</first><last>Padró</last></author>
       <pages>152–155</pages>
       <url hash="963c7139">W03-0421</url>
       <bibkey>carreras-etal-2003-simple</bibkey>
@@ -601,8 +601,8 @@
     <paper id="22">
       <title>Learning a Perceptron-Based Named Entity Chunker via Online Recognition Feedback</title>
       <author><first>Xavier</first><last>Carreras</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
-      <author><first>Lluís</first><last>Padró</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
+      <author id="lluis-padro"><first>Lluís</first><last>Padró</last></author>
       <pages>156–159</pages>
       <url hash="4cc21d63">W03-0422</url>
       <bibkey>carreras-etal-2003-learning</bibkey>
@@ -617,7 +617,7 @@
     </paper>
     <paper id="24">
       <title>Language Independent <fixed-case>NER</fixed-case> using a Maximum Entropy Tagger</title>
-      <author><first>James</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James</first><last>Curran</last></author>
       <author><first>Stephen</first><last>Clark</last></author>
       <pages>164–167</pages>
       <url hash="90d81f5e">W03-0424</url>
@@ -625,9 +625,9 @@
     </paper>
     <paper id="25">
       <title>Named Entity Recognition through Classifier Combination</title>
-      <author><first>Radu</first><last>Florian</last></author>
-      <author><first>Abe</first><last>Ittycheriah</last></author>
-      <author><first>Hongyan</first><last>Jing</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
+      <author id="abe-ittycheriah"><first>Abe</first><last>Ittycheriah</last></author>
+      <author id="hongyan-jing"><first>Hongyan</first><last>Jing</last></author>
       <author><first>Tong</first><last>Zhang</last></author>
       <pages>168–171</pages>
       <url hash="6d5f652c">W03-0425</url>
@@ -643,7 +643,7 @@
     <paper id="27">
       <title>Memory-based one-step named-entity recognition: Effects of seed list features, classifier stacking, and unannotated data</title>
       <author><first>Iris</first><last>Hendrickx</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <pages>176–179</pages>
       <url hash="0a44d34f">W03-0427</url>
       <bibkey>hendrickx-van-den-bosch-2003-memory</bibkey>
@@ -653,7 +653,7 @@
       <author><first>Dan</first><last>Klein</last></author>
       <author><first>Joseph</first><last>Smarr</last></author>
       <author id="huy-nguyen-stanford"><first>Huy</first><last>Nguyen</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>180–183</pages>
       <url hash="06af4376">W03-0428</url>
       <bibkey>klein-etal-2003-named</bibkey>
@@ -662,7 +662,7 @@
       <title>Named Entity Recognition using Hundreds of Thousands of Features</title>
       <author><first>James</first><last>Mayfield</last></author>
       <author><first>Paul</first><last>McNamee</last></author>
-      <author><first>Christine</first><last>Piatko</last></author>
+      <author id="christine-piatko"><first>Christine</first><last>Piatko</last></author>
       <pages>184–187</pages>
       <url hash="f6fd37b1">W03-0429</url>
       <bibkey>mayfield-etal-2003-named</bibkey>
@@ -679,7 +679,7 @@
       <title>Meta-Learning Orthographic and Contextual Models for Language Independent Named Entity Recognition</title>
       <author><first>Robert</first><last>Munro</last></author>
       <author><first>Daren</first><last>Ler</last></author>
-      <author><first>Jon</first><last>Patrick</last></author>
+      <author id="jon-patrick"><first>Jon</first><last>Patrick</last></author>
       <pages>192–195</pages>
       <url hash="24a65a2a">W03-0431</url>
       <bibkey>munro-etal-2003-meta</bibkey>
@@ -687,7 +687,7 @@
     <paper id="32">
       <title>Named Entity Recognition Using a Character-based Probabilistic Approach</title>
       <author><first>Casey</first><last>Whitelaw</last></author>
-      <author><first>Jon</first><last>Patrick</last></author>
+      <author id="jon-patrick"><first>Jon</first><last>Patrick</last></author>
       <pages>196–199</pages>
       <url hash="53d11712">W03-0432</url>
       <bibkey>whitelaw-patrick-2003-named</bibkey>
@@ -704,7 +704,7 @@
     <paper id="34">
       <title>A Robust Risk Minimization based Named Entity Recognition System</title>
       <author><first>Tong</first><last>Zhang</last></author>
-      <author><first>David</first><last>Johnson</last></author>
+      <author id="david-e-johnson"><first>David</first><last>Johnson</last></author>
       <pages>204–207</pages>
       <url hash="d0ec9a47">W03-0434</url>
       <bibkey>zhang-johnson-2003-robust</bibkey>
@@ -712,7 +712,7 @@
     <paper id="35">
       <title>Memory-Based Named Entity Recognition using Unannotated Data</title>
       <author><first>Fien</first><last>De Meulder</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>208–211</pages>
       <url hash="cf7285b1">W03-0435</url>
       <bibkey>de-meulder-daelemans-2003-memory</bibkey>
@@ -730,9 +730,9 @@
     </frontmatter>
     <paper id="1">
       <title>Hedge Trimmer: A Parse-and-Trim Approach to Headline Generation</title>
-      <author><first>Bonnie</first><last>Dorr</last></author>
+      <author id="bonnie-dorr"><first>Bonnie</first><last>Dorr</last></author>
       <author><first>David</first><last>Zajic</last></author>
-      <author><first>Richard</first><last>Schwartz</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
       <pages>1–8</pages>
       <url hash="9c4363a4">W03-0501</url>
       <bibkey>dorr-etal-2003-hedge</bibkey>
@@ -740,7 +740,7 @@
     <paper id="2">
       <title>Sub-event based multi-document summarization</title>
       <author><first>Naomi</first><last>Daniel</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <author><first>Timothy</first><last>Allison</last></author>
       <pages>9–16</pages>
       <url hash="020ab03b">W03-0502</url>
@@ -751,14 +751,14 @@
       <author><first>Amardeep</first><last>Grewal</last></author>
       <author><first>Timothy</first><last>Allison</last></author>
       <author><first>Stanko</first><last>Dimitrov</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <pages>17–24</pages>
       <url hash="592fcc8c">W03-0503</url>
       <bibkey>grewal-etal-2003-multi</bibkey>
     </paper>
     <paper id="4">
       <title>Summarization of Noisy Documents: A Pilot Study</title>
-      <author><first>Hongyan</first><last>Jing</last></author>
+      <author id="hongyan-jing"><first>Hongyan</first><last>Jing</last></author>
       <author><first>Daniel</first><last>Lopresti</last></author>
       <author><first>Chilin</first><last>Shih</last></author>
       <pages>25–32</pages>
@@ -778,7 +778,7 @@
       <title>A Study for Document Summarization Based on Personal Annotation</title>
       <author><first>Haiqin</first><last>Zhang</last></author>
       <author><first>Zheng</first><last>Chen</last></author>
-      <author><first>Wei-ying</first><last>Ma</last></author>
+      <author id="wei-ying-ma"><first>Wei-ying</first><last>Ma</last></author>
       <author><first>Qingsheng</first><last>Cai</last></author>
       <pages>41–48</pages>
       <url hash="70f72be4">W03-0506</url>
@@ -786,7 +786,7 @@
     </paper>
     <paper id="7">
       <title>Text Summarization Challenge 2 - Text summarization evaluation at <fixed-case>NTCIR</fixed-case> Workshop 3</title>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <author><first>Takahiro</first><last>Fukusima</last></author>
       <author><first>Hidetsugu</first><last>Nanba</last></author>
       <pages>49–56</pages>
@@ -795,7 +795,7 @@
     </paper>
     <paper id="8">
       <title>Examining the consensus between human summaries: initial experiments with factoid analysis</title>
-      <author><first>Hans</first><last>van Halteren</last></author>
+      <author id="hans-van-halteren"><first>Hans</first><last>van Halteren</last></author>
       <author><first>Simone</first><last>Teufel</last></author>
       <pages>57–64</pages>
       <url hash="ee8e14f7">W03-0508</url>
@@ -811,8 +811,8 @@
     </paper>
     <paper id="10">
       <title>The Potential and Limitations of Automatic Sentence Extraction for Summarization</title>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>73–80</pages>
       <url hash="d511255f">W03-0510</url>
       <bibkey>lin-hovy-2003-potential</bibkey>
@@ -832,7 +832,7 @@
       <title>Word Sense Disambiguation with Pictures</title>
       <author><first>Kobus</first><last>Barnard</last></author>
       <author><first>Matthew</first><last>Johnson</last></author>
-      <author><first>David</first><last>Forsyth</last></author>
+      <author id="david-forsyth"><first>David</first><last>Forsyth</last></author>
       <pages>1–5</pages>
       <url hash="9814c805">W03-0601</url>
       <bibkey>barnard-etal-2003-word</bibkey>
@@ -840,8 +840,8 @@
     <paper id="2">
       <title>Words and Pictures in the News</title>
       <author><first>Jaety</first><last>Edwards</last></author>
-      <author><first>Ryan</first><last>White</last></author>
-      <author><first>David</first><last>Forsyth</last></author>
+      <author id="ryen-white"><first>Ryan</first><last>White</last></author>
+      <author id="david-forsyth"><first>David</first><last>Forsyth</last></author>
       <pages>6–13</pages>
       <url hash="ec524ebc">W03-0602</url>
       <bibkey>edwards-etal-2003-words</bibkey>
@@ -849,7 +849,7 @@
     <paper id="3">
       <title>Understanding Complex Visually Referring Utterances</title>
       <author><first>Peter</first><last>Gorniak</last></author>
-      <author><first>Deb</first><last>Roy</last></author>
+      <author id="deb-roy"><first>Deb</first><last>Roy</last></author>
       <pages>14–21</pages>
       <url hash="d0ea715d">W03-0603</url>
       <bibkey>gorniak-roy-2003-understanding</bibkey>
@@ -907,7 +907,7 @@
     </paper>
     <paper id="10">
       <title>Conversational Robots: Building Blocks for Grounding Word Meaning</title>
-      <author><first>Deb</first><last>Roy</last></author>
+      <author id="deb-roy"><first>Deb</first><last>Roy</last></author>
       <author><first>Kai-Yuh</first><last>Hsiao</last></author>
       <author><first>Nikolaos</first><last>Mavridis</last></author>
       <pages>70–77</pages>
@@ -917,7 +917,7 @@
     <paper id="11">
       <title>Learning the Meaning and Usage of Time Phrases from a Parallel Text-Data Corpus</title>
       <author><first>Ehud</first><last>Reiter</last></author>
-      <author><first>Somayajulu</first><last>Sripada</last></author>
+      <author id="somayajulu-sripada"><first>Somayajulu</first><last>Sripada</last></author>
       <pages>78–85</pages>
       <url hash="6209d92d">W03-0611</url>
       <bibkey>reiter-sripada-2003-learning</bibkey>
@@ -933,7 +933,7 @@
     <paper id="13">
       <title>Learning Word Meanings and Descriptive Parameter Spaces from Music</title>
       <author><first>Brian</first><last>Whitman</last></author>
-      <author><first>Deb</first><last>Roy</last></author>
+      <author id="deb-roy"><first>Deb</first><last>Roy</last></author>
       <author><first>Barry</first><last>Vercoe</last></author>
       <pages>92–99</pages>
       <url hash="e62e6b47">W03-0613</url>
@@ -952,7 +952,7 @@
     </frontmatter>
     <paper id="1">
       <title>Combining Semantic and Temporal Constraints for Multimodal Integration in Conversation Systems</title>
-      <author><first>Joyce Y.</first><last>Chai</last></author>
+      <author id="joyce-chai"><first>Joyce Y.</first><last>Chai</last></author>
       <author><first>Pengyu</first><last>Hong</last></author>
       <author><first>Michelle X.</first><last>Zhou</last></author>
       <pages>1–3</pages>
@@ -961,8 +961,8 @@
     </paper>
     <paper id="2">
       <title>Conceptual Language Models for Dialog Systems</title>
-      <author><first>Renato</first><last>De Mori</last></author>
-      <author><first>Frederic</first><last>Béchet</last></author>
+      <author id="renato-de-mori"><first>Renato</first><last>De Mori</last></author>
+      <author id="frederic-bechet"><first>Frederic</first><last>Béchet</last></author>
       <pages>4–6</pages>
       <url hash="fe0c88f8">W03-0702</url>
       <bibkey>de-mori-bechet-2003-conceptual</bibkey>
@@ -970,7 +970,7 @@
     <paper id="3">
       <title>Directions For Multi-Party Human-Computer Interaction Research</title>
       <author><first>Katrin</first><last>Kirchhoff</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
       <pages>7–9</pages>
       <url hash="ead01711">W03-0703</url>
       <bibkey>kirchhoff-ostendorf-2003-directions</bibkey>
@@ -978,7 +978,7 @@
     <paper id="4">
       <title>Dialogue Management for an Automated Multilingual Call Center</title>
       <author><first>Hilda</first><last>Hardy</last></author>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
       <author><first>Min</first><last>Wu</last></author>
       <pages>10–12</pages>
       <url hash="fef418f9">W03-0704</url>
@@ -988,7 +988,7 @@
       <title>Dialogue complexity with portability? Research directions for the Information State approach</title>
       <author><first>Carl</first><last>Burke</last></author>
       <author><first>Christy</first><last>Doran</last></author>
-      <author><first>Abigail</first><last>Gertner</last></author>
+      <author id="abigail-s-gertner"><first>Abigail</first><last>Gertner</last></author>
       <author><first>Andy</first><last>Gregorowicz</last></author>
       <author><first>Lisa</first><last>Harper</last></author>
       <author><first>Joel</first><last>Korb</last></author>
@@ -1000,15 +1000,15 @@
     <paper id="6">
       <title>The Pragmatics of Taking a Spoken Language System Out of the Laboratory</title>
       <author><first>Jody J.</first><last>Daniels</last></author>
-      <author><first>Helen Wright</first><last>Hastie</last></author>
+      <author id="helen-hastie"><first>Helen Wright</first><last>Hastie</last></author>
       <pages>16–18</pages>
       <url hash="7aa5aa5b">W03-0706</url>
       <bibkey>daniels-hastie-2003-pragmatics</bibkey>
     </paper>
     <paper id="7">
       <title>Flexible and Personalizable Mixed-Initiative Dialogue Systems</title>
-      <author><first>James</first><last>Glass</last></author>
-      <author><first>Stephanie</first><last>Seneff</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
+      <author id="stephanie-seneff"><first>Stephanie</first><last>Seneff</last></author>
       <pages>19–21</pages>
       <url hash="e3402071">W03-0707</url>
       <bibkey>glass-seneff-2003-flexible</bibkey>
@@ -1026,16 +1026,16 @@
     </frontmatter>
     <paper id="1">
       <title>The Talent System: <fixed-case>TEXTRACT</fixed-case> Architecture and Data Model</title>
-      <author><first>Mary S.</first><last>Neff</last></author>
+      <author id="mary-s-neff"><first>Mary S.</first><last>Neff</last></author>
       <author><first>Roy J.</first><last>Byrd</last></author>
-      <author><first>Branimir K.</first><last>Boguraev</last></author>
+      <author id="branimir-boguraev"><first>Branimir K.</first><last>Boguraev</last></author>
       <pages>1–8</pages>
       <url hash="43a8c7b6">W03-0801</url>
       <bibkey>neff-etal-2003-talent</bibkey>
     </paper>
     <paper id="2">
       <title><fixed-case>WHAT</fixed-case>: An <fixed-case>XSLT</fixed-case>-based Infrastructure for the Integration of Natural Language Processing Components</title>
-      <author><first>Ulrich</first><last>Schäfer</last></author>
+      <author id="ulrich-schafer"><first>Ulrich</first><last>Schäfer</last></author>
       <pages>9–16</pages>
       <url hash="1714c33d">W03-0802</url>
       <bibkey>schafer-2003-xslt</bibkey>
@@ -1043,18 +1043,18 @@
     <paper id="3">
       <title><fixed-case>OLLIE</fixed-case>: On-Line Learning for Information Extraction</title>
       <author><first>Valentin</first><last>Tablan</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
       <author><first>Diana</first><last>Maynard</last></author>
-      <author><first>Hamish</first><last>Cunningham</last></author>
+      <author id="hamish-cunningham"><first>Hamish</first><last>Cunningham</last></author>
       <pages>17–24</pages>
       <url hash="c84b8217">W03-0803</url>
       <bibkey>tablan-etal-2003-ollie</bibkey>
     </paper>
     <paper id="4">
       <title>International Standard for a Linguistic Annotation Framework</title>
-      <author><first>Nancy</first><last>Ide</last></author>
-      <author><first>Laurent</first><last>Romary</last></author>
-      <author><first>Eric</first><last>de la Clergerie</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Eric</first><last>de la Clergerie</last></author>
       <pages>25–30</pages>
       <url hash="0ed61d13">W03-0804</url>
       <bibkey>ide-etal-2003-international</bibkey>
@@ -1069,24 +1069,24 @@
     </paper>
     <paper id="6">
       <title>Blueprint for a High Performance <fixed-case>NLP</fixed-case> Infrastructure</title>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <pages>39–44</pages>
       <url hash="25a85e16">W03-0806</url>
       <bibkey>curran-2003-blueprint</bibkey>
     </paper>
     <paper id="7">
       <title>Current Issues in Software Engineering for Natural Language Processing</title>
-      <author><first>Jochen</first><last>Leidner</last></author>
+      <author id="jochen-l-leidner"><first>Jochen</first><last>Leidner</last></author>
       <pages>45–50</pages>
       <url hash="d3c3a125">W03-0807</url>
       <bibkey>leidner-2003-current</bibkey>
     </paper>
     <paper id="8">
       <title><fixed-case>I</fixed-case>nfo<fixed-case>X</fixed-case>tract: A Customizable Intermediate Level Information Extraction Engine</title>
-      <author><first>Rohini K.</first><last>Srihari</last></author>
+      <author id="rohini-k-srihari"><first>Rohini K.</first><last>Srihari</last></author>
       <author><first>Wei</first><last>Li</last></author>
       <author><first>Cheng</first><last>Niu</last></author>
-      <author><first>Thomas</first><last>Cornell</last></author>
+      <author id="thomas-l-cornell"><first>Thomas</first><last>Cornell</last></author>
       <pages>51–58</pages>
       <url hash="36132d02">W03-0808</url>
       <bibkey>srihari-etal-2003-infoxtract</bibkey>
@@ -1121,7 +1121,7 @@
     </paper>
     <paper id="12">
       <title><fixed-case>SDL</fixed-case>—<fixed-case>A</fixed-case> Description Language for Building <fixed-case>NLP</fixed-case> Systems</title>
-      <author><first>Hans-Ulrich</first><last>Krieger</last></author>
+      <author id="hans-ulrich-krieger"><first>Hans-Ulrich</first><last>Krieger</last></author>
       <pages>83–90</pages>
       <url hash="1556e19b">W03-0812</url>
       <bibkey>krieger-2003-sdl</bibkey>
@@ -1140,7 +1140,7 @@
     <paper id="1">
       <title>A knowledge-driven approach to text meaning processing</title>
       <author><first>Peter</first><last>Clark</last></author>
-      <author><first>Phil</first><last>Harrison</last></author>
+      <author id="phil-harrison"><first>Phil</first><last>Harrison</last></author>
       <author><first>John</first><last>Thompson</last></author>
       <pages>1–6</pages>
       <url hash="64dabeae">W03-0901</url>
@@ -1148,7 +1148,7 @@
     </paper>
     <paper id="2">
       <title>Extracting and evaluating general world knowledge from the Brown Corpus</title>
-      <author><first>Lenhart</first><last>Schubert</last></author>
+      <author id="lenhart-schubert"><first>Lenhart</first><last>Schubert</last></author>
       <author><first>Matthew</first><last>Tong</last></author>
       <pages>7–13</pages>
       <url hash="8daa3021">W03-0902</url>
@@ -1168,7 +1168,7 @@
     </paper>
     <paper id="4">
       <title>Operative strategies in ontological semantics</title>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <author><first>Marjorie</first><last>McShane</last></author>
       <author><first>Stephen</first><last>Beale</last></author>
       <pages>22–29</pages>
@@ -1178,8 +1178,8 @@
     <paper id="5">
       <title>The genesis of a script for bankruptcy in ontological semantics</title>
       <author><first>Victor</first><last>Raskin</last></author>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
-      <author><first>Christian F.</first><last>Hempelmann</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="christian-f-hempelmann"><first>Christian F.</first><last>Hempelmann</last></author>
       <author><first>Inna</first><last>Nirenburg</last></author>
       <author><first>Katrina E.</first><last>Triezenberg</last></author>
       <pages>30–37</pages>
@@ -1190,9 +1190,9 @@
       <title>Entailment, intensionality and text understanding</title>
       <author><first>Cleo</first><last>Condoravdi</last></author>
       <author><first>Dick</first><last>Crouch</last></author>
-      <author><first>Valeria</first><last>de Paiva</last></author>
+      <author id="valeria-de-paiva"><first>Valeria</first><last>de Paiva</last></author>
       <author><first>Reinhard</first><last>Stolle</last></author>
-      <author><first>Daniel G.</first><last>Bobrow</last></author>
+      <author id="daniel-bobrow"><first>Daniel G.</first><last>Bobrow</last></author>
       <pages>38–45</pages>
       <url hash="915261f0">W03-0906</url>
       <bibkey>condoravdi-etal-2003-entailment</bibkey>
@@ -1208,8 +1208,8 @@
       <title>Towards light semantic processing for question answering</title>
       <author><first>Benjamin</first><last>Van Durme</last></author>
       <author><first>Yifen</first><last>Huang</last></author>
-      <author><first>Anna</first><last>Kupść</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="anna-kupsc"><first>Anna</first><last>Kupść</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <pages>54–61</pages>
       <url hash="91b3b7c9">W03-0908</url>
       <bibkey>van-durme-etal-2003-towards</bibkey>
@@ -1224,7 +1224,7 @@
     <paper id="10">
       <title>Deriving verb-meaning clusters from syntactic structure</title>
       <author><first>Paul</first><last>Kingsbury</last></author>
-      <author><first>Karin</first><last>Kipper</last></author>
+      <author id="karin-kipper"><first>Karin</first><last>Kipper</last></author>
       <pages>70–77</pages>
       <url hash="e6302df0">W03-0910</url>
       <bibkey>kingsbury-kipper-2003-deriving</bibkey>
@@ -1242,7 +1242,7 @@
     </frontmatter>
     <paper id="1">
       <title>A Projection Extension Algorithm for Statistical Machine Translation</title>
-      <author><first>Christoph</first><last>Tillmann</last></author>
+      <author id="christoph-tillmann"><first>Christoph</first><last>Tillmann</last></author>
       <pages>1–8</pages>
       <url hash="e5bb1430">W03-1001</url>
       <bibkey>tillmann-2003-projection</bibkey>
@@ -1258,7 +1258,7 @@
     <paper id="3">
       <title>Cross-Lingual Lexical Triggers in Statistical Language Modeling</title>
       <author><first>Woosung</first><last>Kim</last></author>
-      <author><first>Sanjeev</first><last>Khudanpur</last></author>
+      <author id="sanjeev-khudanpur"><first>Sanjeev</first><last>Khudanpur</last></author>
       <pages>17–24</pages>
       <url hash="ae58804a">W03-1003</url>
       <bibkey>kim-khudanpur-2003-cross</bibkey>
@@ -1266,7 +1266,7 @@
     <paper id="4">
       <title>Sentence Alignment for Monolingual Comparable Corpora</title>
       <author><first>Regina</first><last>Barzilay</last></author>
-      <author><first>Noemie</first><last>Elhadad</last></author>
+      <author id="noemie-elhadad"><first>Noemie</first><last>Elhadad</last></author>
       <pages>25–32</pages>
       <url hash="496894a7">W03-1004</url>
       <bibkey>barzilay-elhadad-2003-sentence</bibkey>
@@ -1282,7 +1282,7 @@
     <paper id="6">
       <title>Use of Deep Linguistic Features for the Recognition and Labeling of Semantic Arguments</title>
       <author><first>John</first><last>Chen</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>41–48</pages>
       <url hash="469a7a5a">W03-1006</url>
       <bibkey>chen-rambow-2003-use</bibkey>
@@ -1291,7 +1291,7 @@
       <title>Maximum Entropy Models for <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et Classification</title>
       <author><first>Michael</first><last>Fleischman</last></author>
       <author><first>Namhee</first><last>Kwon</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>49–56</pages>
       <url hash="c2cb00c8">W03-1007</url>
       <bibkey>fleischman-etal-2003-maximum</bibkey>
@@ -1314,7 +1314,7 @@
     </paper>
     <paper id="10">
       <title>A Plethora of Methods for Learning <fixed-case>E</fixed-case>nglish Countability</title>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Francis</first><last>Bond</last></author>
       <pages>73–80</pages>
       <url hash="a2bc7fb0">W03-1010</url>
@@ -1323,7 +1323,7 @@
     <paper id="11">
       <title>A General Framework for Distributional Similarity</title>
       <author><first>Julie</first><last>Weeds</last></author>
-      <author><first>David</first><last>Weir</last></author>
+      <author id="david-weir"><first>David</first><last>Weir</last></author>
       <pages>81–88</pages>
       <url hash="8c446628">W03-1011</url>
       <bibkey>weeds-weir-2003-general</bibkey>
@@ -1332,7 +1332,7 @@
       <title>Using <fixed-case>LTAG</fixed-case> Based Features in Parse Reranking</title>
       <author><first>Libin</first><last>Shen</last></author>
       <author><first>Anoop</first><last>Sarkar</last></author>
-      <author><first>Aravind</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
       <pages>89–96</pages>
       <url hash="f131deb7">W03-1012</url>
       <bibkey>shen-etal-2003-using</bibkey>
@@ -1340,15 +1340,15 @@
     <paper id="13">
       <title>Log-Linear Models for Wide-Coverage <fixed-case>CCG</fixed-case> Parsing</title>
       <author><first>Stephen</first><last>Clark</last></author>
-      <author><first>James</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James</first><last>Curran</last></author>
       <pages>97–104</pages>
       <url hash="5fcaa5af">W03-1013</url>
       <bibkey>clark-curran-2003-log</bibkey>
     </paper>
     <paper id="14">
       <title>Learning Extraction Patterns for Subjective Expressions</title>
-      <author><first>Ellen</first><last>Riloff</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <pages>105–112</pages>
       <url hash="44be7b51">W03-1014</url>
       <bibkey>riloff-wiebe-2003-learning</bibkey>
@@ -1356,15 +1356,15 @@
     <paper id="15">
       <title>Bootstrapping Coreference Classifiers with Multiple Machine Learning Algorithms</title>
       <author><first>Vincent</first><last>Ng</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>113–120</pages>
       <url hash="7874eb73">W03-1015</url>
       <bibkey>ng-cardie-2003-bootstrapping</bibkey>
     </paper>
     <paper id="16">
       <title>Statistical Acquisition of Content Selection Rules for Natural Language Generation</title>
-      <author><first>Pablo Ariel</first><last>Duboue</last></author>
-      <author><first>Kathleen R.</first><last>McKeown</last></author>
+      <author id="pablo-duboue"><first>Pablo Ariel</first><last>Duboue</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen R.</first><last>McKeown</last></author>
       <pages>121–128</pages>
       <url hash="c8e7718a">W03-1016</url>
       <bibkey>duboue-mckeown-2003-statistical</bibkey>
@@ -1379,8 +1379,8 @@
     </paper>
     <paper id="18">
       <title>Evaluation and Extension of Maximum Entropy Models with Inequality Constraints</title>
-      <author><first>Jun’ichi</first><last>Kazama</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-kazama"><first>Jun’ichi</first><last>Kazama</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>137–144</pages>
       <url hash="3f2ece41">W03-1018</url>
       <bibkey>kazama-tsujii-2003-evaluation</bibkey>
@@ -1398,7 +1398,7 @@
       <title>A Fast Algorithm for Feature Selection in Conditional Maximum Entropy Modeling</title>
       <author><first>Yaqian</first><last>Zhou</last></author>
       <author><first>Fuliang</first><last>Weng</last></author>
-      <author><first>Lide</first><last>Wu</last></author>
+      <author id="lide-wu"><first>Lide</first><last>Wu</last></author>
       <author><first>Hauke</first><last>Schmidt</last></author>
       <pages>153–159</pages>
       <url hash="4609bd00">W03-1020</url>
@@ -1408,7 +1408,7 @@
       <title>Training Connectionist Models for the <fixed-case>S</fixed-case>tructured <fixed-case>L</fixed-case>anguage <fixed-case>M</fixed-case>odel</title>
       <author><first>Peng</first><last>Xu</last></author>
       <author><first>Ahmad</first><last>Emami</last></author>
-      <author><first>Frederick</first><last>Jelinek</last></author>
+      <author id="frederick-jelinek"><first>Frederick</first><last>Jelinek</last></author>
       <pages>160–167</pages>
       <url hash="8906cca2">W03-1021</url>
       <bibkey>xu-etal-2003-training</bibkey>
@@ -1423,7 +1423,7 @@
     </paper>
     <paper id="23">
       <title>Using the Web in Machine Learning for Other-Anaphora Resolution</title>
-      <author><first>Natalia N.</first><last>Modjeska</last></author>
+      <author id="natalia-n-modjeska"><first>Natalia N.</first><last>Modjeska</last></author>
       <author><first>Katja</first><last>Markert</last></author>
       <author><first>Malvina</first><last>Nissim</last></author>
       <pages>176–183</pages>
@@ -1440,16 +1440,16 @@
     </paper>
     <paper id="25">
       <title>A Maximum Entropy <fixed-case>C</fixed-case>hinese Character-Based Parser</title>
-      <author><first>Xiaoqiang</first><last>Luo</last></author>
+      <author id="xiaoqiang-luo"><first>Xiaoqiang</first><last>Luo</last></author>
       <pages>192–199</pages>
       <url hash="aefd7ab2">W03-1025</url>
       <bibkey>luo-2003-maximum</bibkey>
     </paper>
     <paper id="26">
       <title><fixed-case>H</fixed-case>owtogeta<fixed-case>C</fixed-case>hinese<fixed-case>N</fixed-case>ame(<fixed-case>E</fixed-case>ntity): Segmentation and Combination Issues</title>
-      <author><first>Hongyan</first><last>Jing</last></author>
-      <author><first>Radu</first><last>Florian</last></author>
-      <author><first>Xiaoqiang</first><last>Luo</last></author>
+      <author id="hongyan-jing"><first>Hongyan</first><last>Jing</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
+      <author id="xiaoqiang-luo"><first>Xiaoqiang</first><last>Luo</last></author>
       <author><first>Tong</first><last>Zhang</last></author>
       <author><first>Abraham</first><last>Ittycheriah</last></author>
       <pages>200–207</pages>
@@ -1486,7 +1486,7 @@
     </frontmatter>
     <paper id="1">
       <title>Improving Summarization Performance by Sentence Compression — A Pilot Study</title>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <doi>10.3115/1118935.1118936</doi>
       <pages>1–8</pages>
       <url hash="a80ca98e">W03-1101</url>
@@ -1524,7 +1524,7 @@
       <title><fixed-case>P</fixed-case>oisson Naive <fixed-case>B</fixed-case>ayes for Text Classification with Feature Weighting</title>
       <author><first>Sang-Bum</first><last>Kim</last></author>
       <author><first>Hee-Cheol</first><last>Seo</last></author>
-      <author><first>Hae-Chang</first><last>Rim</last></author>
+      <author id="hae-chang-rim"><first>Hae-Chang</first><last>Rim</last></author>
       <doi>10.3115/1118935.1118940</doi>
       <pages>33–40</pages>
       <url hash="7cf428f4">W03-1105</url>
@@ -1533,7 +1533,7 @@
     <paper id="6">
       <title>Text Classification in <fixed-case>A</fixed-case>sian Languages without Word Segmentation</title>
       <author><first>Fuchun</first><last>Peng</last></author>
-      <author><first>Xiangji</first><last>Huang</last></author>
+      <author id="xiangji-huang"><first>Xiangji</first><last>Huang</last></author>
       <author><first>Dale</first><last>Schuurmans</last></author>
       <author><first>Shaojun</first><last>Wang</last></author>
       <doi>10.3115/1118935.1118941</doi>
@@ -1545,7 +1545,7 @@
       <title>Feature Selection in Categorizing Procedural Expressions</title>
       <author><first>Mineki</first><last>Takechi</last></author>
       <author><first>Takenobu</first><last>Tokunaga</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <author><first>Hozumi</first><last>Tanaka</last></author>
       <doi>10.3115/1118935.1118942</doi>
       <pages>49–56</pages>
@@ -1556,7 +1556,7 @@
       <title>Learning Bilingual Translations from Comparable Corpora to Cross-Language Information Retrieval: Hybrid Statistics-based and Linguistics-based Approach</title>
       <author><first>Fatiha</first><last>Sadat</last></author>
       <author><first>Masatoshi</first><last>Yoshikawa</last></author>
-      <author><first>Shunsuke</first><last>Uemura</last></author>
+      <author id="shunsuke-uemura"><first>Shunsuke</first><last>Uemura</last></author>
       <doi>10.3115/1118935.1118943</doi>
       <pages>57–64</pages>
       <url hash="fd8ab304">W03-1108</url>
@@ -1576,7 +1576,7 @@
     </paper>
     <paper id="10">
       <title>Issues in Pre- and Post-translation Document Expansion: Untranslatable Cognates and Missegmented Words</title>
-      <author><first>Gina-Anne</first><last>Levow</last></author>
+      <author id="gina-anne-levow"><first>Gina-Anne</first><last>Levow</last></author>
       <doi>10.3115/1118935.1118945</doi>
       <pages>77–83</pages>
       <url hash="02eba071">W03-1110</url>
@@ -1594,7 +1594,7 @@
     <paper id="12">
       <title><fixed-case>A</fixed-case>ny<fixed-case>Q</fixed-case>: Answer Set based Information Retrieval System</title>
       <author><first>Hyo-Jung</first><last>Oh</last></author>
-      <author><first>Myung-Gil</first><last>Jang</last></author>
+      <author id="myung-gil-jang"><first>Myung-Gil</first><last>Jang</last></author>
       <author><first>Moon-Soo</first><last>Chang</last></author>
       <doi>10.3115/1118935.1118947</doi>
       <pages>92–99</pages>
@@ -1615,7 +1615,7 @@
     </paper>
     <paper id="14">
       <title>Improving Document Clustering by Utilizing Meta-Data</title>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <author><first>Nam-Kiu</first><last>Chan</last></author>
       <author><first>Kam-Lai</first><last>Wong</last></author>
       <doi>10.3115/1118935.1118949</doi>
@@ -1637,7 +1637,7 @@
       <title>Extraction of User Preferences from a Few Positive Documents</title>
       <author><first>Byeong Man</first><last>Kim</last></author>
       <author><first>Qing</first><last>Li</last></author>
-      <author><first>Jong Wan</first><last>Kim</last></author>
+      <author id="bong-wan-kim"><first>Jong Wan</first><last>Kim</last></author>
       <doi>10.3115/1118935.1118951</doi>
       <pages>124–131</pages>
       <url hash="66b5b6f5">W03-1116</url>
@@ -1654,8 +1654,8 @@
     <paper id="18">
       <title>Text Categorization Using Automatically Acquired Domain Ontology</title>
       <author><first>Shih-Hung</first><last>Wu</last></author>
-      <author><first>Tzong-Han</first><last>Tsai</last></author>
-      <author><first>Wen-Lian</first><last>Hsu</last></author>
+      <author id="richard-tzong-han-tsai"><first>Tzong-Han</first><last>Tsai</last></author>
+      <author id="wen-lian-hsu"><first>Wen-Lian</first><last>Hsu</last></author>
       <doi>10.3115/1118935.1118953</doi>
       <pages>138–145</pages>
       <url hash="50f4bba6">W03-1118</url>
@@ -1663,7 +1663,7 @@
     </paper>
     <paper id="19">
       <title>A Sentence Reduction using Syntax Control</title>
-      <author><first>Minh Le</first><last>Nguyen</last></author>
+      <author id="minh-le-nguyen"><first>Minh Le</first><last>Nguyen</last></author>
       <author><first>Susumu</first><last>Horiguchi</last></author>
       <doi>10.3115/1118935.1118954</doi>
       <pages>146–152</pages>
@@ -1675,7 +1675,7 @@
       <author><first>Fuminori</first><last>Kimura</last></author>
       <author><first>Akira</first><last>Maeda</last></author>
       <author><first>Masatoshi</first><last>Yoshikawa</last></author>
-      <author><first>Shunsuke</first><last>Uemura</last></author>
+      <author id="shunsuke-uemura"><first>Shunsuke</first><last>Uemura</last></author>
       <doi>10.3115/1118935.1118955</doi>
       <pages>153–160</pages>
       <url hash="85262898">W03-1120</url>
@@ -1685,7 +1685,7 @@
       <title><fixed-case>K</fixed-case>orean Named Entity Recognition using <fixed-case>HMM</fixed-case> and <fixed-case>C</fixed-case>o<fixed-case>T</fixed-case>raining Model</title>
       <author><first>Euisok</first><last>Chung</last></author>
       <author><first>Yi-Gyu</first><last>Hwang</last></author>
-      <author><first>Myung-Gil</first><last>Jang</last></author>
+      <author id="myung-gil-jang"><first>Myung-Gil</first><last>Jang</last></author>
       <doi>10.3115/1118935.1118956</doi>
       <pages>161–167</pages>
       <url hash="cb03658f">W03-1121</url>
@@ -1693,7 +1693,7 @@
     </paper>
     <paper id="22">
       <title>Question-Answering Based on Virtually Integrated Lexical Knowledge Base</title>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <author><first>Jae-Ho</first><last>Kim</last></author>
       <author><first>Masaru</first><last>Miyazaki</last></author>
       <author><first>Jun</first><last>Goto</last></author>
@@ -1723,7 +1723,7 @@
       <author><first>Apurva</first><last>Jadhav</last></author>
       <author><first>Ashutosh</first><last>Joshi</last></author>
       <author><first>Soumen</first><last>Chakrabarti</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <doi>10.3115/1119312.1119313</doi>
       <pages>1–10</pages>
       <url hash="b1045f3d">W03-1201</url>
@@ -1733,7 +1733,7 @@
       <title>Using Thematic Information in Statistical Headline Generation</title>
       <author><first>Stephen</first><last>Wan</last></author>
       <author><first>Mark</first><last>Dras</last></author>
-      <author><first>Cécile</first><last>Paris</last></author>
+      <author id="cecile-paris"><first>Cécile</first><last>Paris</last></author>
       <author><first>Robert</first><last>Dale</last></author>
       <doi>10.3115/1119312.1119314</doi>
       <pages>11–20</pages>
@@ -1744,7 +1744,7 @@
       <title>Combining Optimal Clustering and Hidden <fixed-case>M</fixed-case>arkov Models for Extractive Summarization</title>
       <author><first>Pascale</first><last>Fung</last></author>
       <author><first>Grace</first><last>Ngai</last></author>
-      <author><first>Chi-Shun</first><last>Cheung</last></author>
+      <author id="chi-shun-cheung"><first>Chi-Shun</first><last>Cheung</last></author>
       <doi>10.3115/1119312.1119315</doi>
       <pages>21–28</pages>
       <url hash="34265602">W03-1203</url>
@@ -1762,7 +1762,7 @@
     </paper>
     <paper id="5">
       <title>An Evolutionary Approach for Improving the Quality of Automatic Summaries</title>
-      <author><first>Constantin</first><last>Orasan</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orasan</last></author>
       <doi>10.3115/1119312.1119317</doi>
       <pages>37–45</pages>
       <url hash="c8db6eb2">W03-1205</url>
@@ -1770,10 +1770,10 @@
     </paper>
     <paper id="6">
       <title><fixed-case>HITIQA</fixed-case>: An Interactive Question Answering System: A Preliminary Report</title>
-      <author><first>Sharon</first><last>Small</last></author>
+      <author id="sharon-small"><first>Sharon</first><last>Small</last></author>
       <author><first>Ting</first><last>Liu</last></author>
       <author><first>Nobuyuki</first><last>Shimizu</last></author>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
       <doi>10.3115/1119312.1119318</doi>
       <pages>46–53</pages>
       <url hash="f27dc848">W03-1206</url>
@@ -1781,9 +1781,9 @@
     </paper>
     <paper id="7">
       <title>Discovery of Manner Relations and Their Applicability to Question Answering</title>
-      <author><first>Roxana</first><last>Girju</last></author>
+      <author id="roxana-girju"><first>Roxana</first><last>Girju</last></author>
       <author><first>Manju</first><last>Putcha</last></author>
-      <author><first>Dan</first><last>Moldovan</last></author>
+      <author id="dan-moldovan"><first>Dan</first><last>Moldovan</last></author>
       <doi>10.3115/1119312.1119319</doi>
       <pages>54–60</pages>
       <url hash="481159ec">W03-1207</url>
@@ -1803,8 +1803,8 @@
     <paper id="9">
       <title>Statistical <fixed-case>QA</fixed-case> - Classifier vs. Re-ranker: What’s the difference?</title>
       <author><first>Deepak</first><last>Ravichandran</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
-      <author><first>Franz Josef</first><last>Och</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
+      <author id="franz-josef-och"><first>Franz Josef</first><last>Och</last></author>
       <doi>10.3115/1119312.1119321</doi>
       <pages>69–75</pages>
       <url hash="ff376a9b">W03-1209</url>
@@ -1812,7 +1812,7 @@
     </paper>
     <paper id="10">
       <title>Automatic Detection of Causal Relations for Question Answering</title>
-      <author><first>Roxana</first><last>Girju</last></author>
+      <author id="roxana-girju"><first>Roxana</first><last>Girju</last></author>
       <doi>10.3115/1119312.1119322</doi>
       <pages>76–83</pages>
       <url hash="1a6c5aa6">W03-1210</url>
@@ -1821,7 +1821,7 @@
     <paper id="11">
       <title>Question Answering on a Case Insensitive Corpus</title>
       <author><first>Wei</first><last>Li</last></author>
-      <author><first>Rohini</first><last>Srihari</last></author>
+      <author id="rohini-k-srihari"><first>Rohini</first><last>Srihari</last></author>
       <author><first>Cheng</first><last>Niu</last></author>
       <author><first>Xiaoge</first><last>Li</last></author>
       <doi>10.3115/1119312.1119323</doi>
@@ -1846,8 +1846,8 @@
     <paper id="1">
       <title>Gene Name Extraction Using <fixed-case>F</fixed-case>ly<fixed-case>B</fixed-case>ase Resources</title>
       <author><first>Alex</first><last>Morgan</last></author>
-      <author><first>Lynette</first><last>Hirschman</last></author>
-      <author><first>Alexander</first><last>Yeh</last></author>
+      <author id="lynette-hirschman"><first>Lynette</first><last>Hirschman</last></author>
+      <author id="alexander-yeh"><first>Alexander</first><last>Yeh</last></author>
       <author><first>Marc</first><last>Colosimo</last></author>
       <doi>10.3115/1118958.1118959</doi>
       <pages>1–8</pages>
@@ -1869,8 +1869,8 @@
     </paper>
     <paper id="3">
       <title>Using Domain-Specific Verbs for Term Classification</title>
-      <author><first>Irena</first><last>Spasic</last></author>
-      <author><first>Goran</first><last>Nenadic</last></author>
+      <author id="irena-spasic"><first>Irena</first><last>Spasic</last></author>
+      <author id="goran-nenadic"><first>Goran</first><last>Nenadic</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
       <doi>10.3115/1118958.1118961</doi>
       <pages>17–24</pages>
@@ -1879,7 +1879,7 @@
     </paper>
     <paper id="4">
       <title>Enhancing Performance of Protein Name Recognizers Using Collocation</title>
-      <author><first>Wen-Juan</first><last>Hou</last></author>
+      <author id="wen-juan-hou"><first>Wen-Juan</first><last>Hou</last></author>
       <author><first>Hsin-Hsi</first><last>Chen</last></author>
       <doi>10.3115/1118958.1118962</doi>
       <pages>25–32</pages>
@@ -1890,7 +1890,7 @@
       <title>Two-Phase Biomedical <fixed-case>NE</fixed-case> Recognition based on <fixed-case>SVM</fixed-case>s</title>
       <author><first>Ki-Joong</first><last>Lee</last></author>
       <author><first>Young-Sook</first><last>Hwang</last></author>
-      <author><first>Hae-Chang</first><last>Rim</last></author>
+      <author id="hae-chang-rim"><first>Hae-Chang</first><last>Rim</last></author>
       <doi>10.3115/1118958.1118963</doi>
       <pages>33–40</pages>
       <url hash="5ba8fe80">W03-1305</url>
@@ -1899,7 +1899,7 @@
     <paper id="6">
       <title>Boosting Precision and Recall of Dictionary-Based Protein Name Recognition</title>
       <author><first>Yoshimasa</first><last>Tsuruoka</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <doi>10.3115/1118958.1118964</doi>
       <pages>41–48</pages>
       <url hash="1afa3b1e">W03-1306</url>
@@ -1909,9 +1909,9 @@
       <title>Effective Adaptation of Hidden <fixed-case>M</fixed-case>arkov Model-based Named Entity Recognizer for Biomedical Domain</title>
       <author><first>Dan</first><last>Shen</last></author>
       <author><first>Jie</first><last>Zhang</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <author><first>Jian</first><last>Su</last></author>
-      <author><first>Chew-Lim</first><last>Tan</last></author>
+      <author id="chew-lim-tan"><first>Chew-Lim</first><last>Tan</last></author>
       <doi>10.3115/1118958.1118965</doi>
       <pages>49–56</pages>
       <url hash="7bd64cf8">W03-1307</url>
@@ -1929,9 +1929,9 @@
     <paper id="9">
       <title>Protein Name Tagging for Biomedical Annotation in Text</title>
       <author><first>Kaoru</first><last>Yamamoto</last></author>
-      <author><first>Taku</first><last>Kudo</last></author>
+      <author id="taku-kudo"><first>Taku</first><last>Kudo</last></author>
       <author><first>Akihiko</first><last>Konagaya</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <doi>10.3115/1118958.1118967</doi>
       <pages>65–72</pages>
       <url hash="abbaf077">W03-1309</url>
@@ -1950,11 +1950,11 @@
     </paper>
     <paper id="11">
       <title>Extracting Information on Pneumonia in Infants Using Natural Language Processing of Radiology Reports</title>
-      <author><first>Eneida A.</first><last>Mendonca</last></author>
+      <author id="eneida-a-mendonca"><first>Eneida A.</first><last>Mendonca</last></author>
       <author><first>Janet</first><last>Haas</last></author>
       <author><first>Lyudmila</first><last>Shagina</last></author>
       <author><first>Elaine</first><last>Larson</last></author>
-      <author><first>Carol</first><last>Friedman</last></author>
+      <author id="carol-friedman"><first>Carol</first><last>Friedman</last></author>
       <doi>10.3115/1118958.1118969</doi>
       <pages>81–88</pages>
       <url hash="16ebebfb">W03-1311</url>
@@ -1962,9 +1962,9 @@
     </paper>
     <paper id="12">
       <title>Identification of Patients with Congestive Heart Failure using a Binary Classifier: A Case Study.</title>
-      <author><first>Serguei V.</first><last>Pakhomov</last></author>
+      <author id="serguei-pakhomov"><first>Serguei V.</first><last>Pakhomov</last></author>
       <author><first>James</first><last>Buntrock</last></author>
-      <author><first>Christopher G.</first><last>Chute</last></author>
+      <author id="christopher-chute"><first>Christopher G.</first><last>Chute</last></author>
       <doi>10.3115/1118958.1118970</doi>
       <pages>89–96</pages>
       <url hash="6016aac6">W03-1312</url>
@@ -1972,11 +1972,11 @@
     </paper>
     <paper id="13">
       <title>Encoding Biomedical Resources in <fixed-case>TEI</fixed-case>: The Case of the <fixed-case>GENIA</fixed-case> Corpus</title>
-      <author><first>Tomaz</first><last>Erjavec</last></author>
+      <author id="tomaz-erjavec"><first>Tomaz</first><last>Erjavec</last></author>
       <author><first>Jin-Dong</first><last>Kim</last></author>
       <author><first>Tomoko</first><last>Ohta</last></author>
-      <author><first>Yuka</first><last>Tateisi</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="yuka-tateisi"><first>Yuka</first><last>Tateisi</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <doi>10.3115/1118958.1118971</doi>
       <pages>97–104</pages>
       <url hash="d7fa81de">W03-1313</url>
@@ -1985,7 +1985,7 @@
     <paper id="14">
       <title>Exploring Adjectival Modification in Biomedical Discourse Across Two Genres</title>
       <author><first>Olivier</first><last>Bodenreider</last></author>
-      <author><first>Serguei V.</first><last>Pakhomov</last></author>
+      <author id="serguei-pakhomov"><first>Serguei V.</first><last>Pakhomov</last></author>
       <doi>10.3115/1118958.1118972</doi>
       <pages>105–112</pages>
       <url hash="829ced7c">W03-1314</url>
@@ -1995,7 +1995,7 @@
       <title>An Investigation of Various Information Sources for Classifying Biological names</title>
       <author><first>Manabu</first><last>Torii</last></author>
       <author><first>Sachin</first><last>Kamboj</last></author>
-      <author><first>K.</first><last>Vijay-Shanker</last></author>
+      <author id="k-vijay-shanker"><first>K.</first><last>Vijay-Shanker</last></author>
       <doi>10.3115/1118958.1118973</doi>
       <pages>113–120</pages>
       <url hash="2fd0fb54">W03-1315</url>
@@ -2003,9 +2003,9 @@
     </paper>
     <paper id="16">
       <title>Selecting Text Features for Gene Name Classification: from Documents to Terms</title>
-      <author><first>Goran</first><last>Nenadic</last></author>
+      <author id="goran-nenadic"><first>Goran</first><last>Nenadic</last></author>
       <author><first>Simon</first><last>Rice</last></author>
-      <author><first>Irena</first><last>Spasic</last></author>
+      <author id="irena-spasic"><first>Irena</first><last>Spasic</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
       <author><first>Benjamin</first><last>Stapley</last></author>
       <doi>10.3115/1118958.1118974</doi>
@@ -2029,7 +2029,7 @@
     </frontmatter>
     <paper id="1">
       <title>Metonymy as a Cross-lingual Phenomenon</title>
-      <author><first>Wim</first><last>Peters</last></author>
+      <author id="wim-peters"><first>Wim</first><last>Peters</last></author>
       <doi>10.3115/1118975.1118976</doi>
       <pages>1–9</pages>
       <url hash="fe173dd6">W03-1401</url>
@@ -2046,7 +2046,7 @@
     </paper>
     <paper id="3">
       <title>Is There a Way to Represent Metaphors in <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>ets? Insights from the <fixed-case>H</fixed-case>amburg Metaphor Database</title>
-      <author><first>Birte</first><last>Lönneker</last></author>
+      <author id="birte-lonneker"><first>Birte</first><last>Lönneker</last></author>
       <doi>10.3115/1118975.1118978</doi>
       <pages>18–27</pages>
       <url hash="af9cdabe">W03-1403</url>
@@ -2063,7 +2063,7 @@
     <paper id="5">
       <title>Conceptual Metaphors: Ontology-based Representation and Corpora Driven Mapping Principles</title>
       <author><first>Kathleen</first><last>Ahrens</last></author>
-      <author><first>Siaw Fong</first><last>Chung</last></author>
+      <author id="siaw-fong-chung"><first>Siaw Fong</first><last>Chung</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <doi>10.3115/1118975.1118980</doi>
       <pages>36–42</pages>
@@ -2072,7 +2072,7 @@
     </paper>
     <paper id="6">
       <title>Let’s Paint the Town Red for a Few Hours: Composition of Aspect in Idioms</title>
-      <author><first>Sheila R.</first><last>Glasbey</last></author>
+      <author id="sheila-r-glasbey"><first>Sheila R.</first><last>Glasbey</last></author>
       <doi>10.3115/1118975.1118981</doi>
       <pages>43–49</pages>
       <url hash="dae4ea1d">W03-1406</url>
@@ -2121,8 +2121,8 @@
     <paper id="2">
       <title>Automatic Extraction of Named Entity Translingual Equivalence Based on Multi-Feature Cost Minimization</title>
       <author><first>Fei</first><last>Huang</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <doi>10.3115/1119384.1119386</doi>
       <pages>9–16</pages>
       <url hash="9abe17a5">W03-1502</url>
@@ -2131,7 +2131,7 @@
     <paper id="3">
       <title>Construction and Analysis of <fixed-case>J</fixed-case>apanese-<fixed-case>E</fixed-case>nglish Broadcast News Corpus with Named Entity Tags</title>
       <author><first>Tadashi</first><last>Kumano</last></author>
-      <author><first>Hideki</first><last>Kashioka</last></author>
+      <author id="hideki-kashioka"><first>Hideki</first><last>Kashioka</last></author>
       <author><first>Hideki</first><last>Tanaka</last></author>
       <author><first>Takahiro</first><last>Fukusima</last></author>
       <doi>10.3115/1119384.1119387</doi>
@@ -2141,10 +2141,10 @@
     </paper>
     <paper id="4">
       <title>Low-cost Named Entity Classification for <fixed-case>C</fixed-case>atalan: Exploiting Multilingual Resources and Unlabeled Data</title>
-      <author><first>Lluís</first><last>Màrquez</last></author>
-      <author><first>Adrià</first><last>de Gispert</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
+      <author id="adria-de-gispert"><first>Adrià</first><last>de Gispert</last></author>
       <author><first>Xavier</first><last>Carreras</last></author>
-      <author><first>Lluís</first><last>Padró</last></author>
+      <author id="lluis-padro"><first>Lluís</first><last>Padró</last></author>
       <doi>10.3115/1119384.1119388</doi>
       <pages>25–32</pages>
       <url hash="a3cb72a9">W03-1504</url>
@@ -2154,7 +2154,7 @@
       <title><fixed-case>NE</fixed-case> Recognition Without Training Data on a Language You Don’t Speak</title>
       <author><first>Diana</first><last>Maynard</last></author>
       <author><first>Valentin</first><last>Tablan</last></author>
-      <author><first>Hamish</first><last>Cunningham</last></author>
+      <author id="hamish-cunningham"><first>Hamish</first><last>Cunningham</last></author>
       <doi>10.3115/1119384.1119389</doi>
       <pages>33–40</pages>
       <url hash="cdbe3780">W03-1505</url>
@@ -2171,7 +2171,7 @@
     </paper>
     <paper id="7">
       <title>Multilingual Resources for Entity Extraction</title>
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <author><first>Alexis</first><last>Mitchell</last></author>
       <doi>10.3115/1119384.1119391</doi>
       <pages>49–56</pages>
@@ -2181,7 +2181,7 @@
     <paper id="8">
       <title>Transliteration of Proper Names in Cross-Lingual Information Retrieval</title>
       <author><first>Paola</first><last>Virga</last></author>
-      <author><first>Sanjeev</first><last>Khudanpur</last></author>
+      <author id="sanjeev-khudanpur"><first>Sanjeev</first><last>Khudanpur</last></author>
       <doi>10.3115/1119384.1119392</doi>
       <pages>57–64</pages>
       <url hash="3af40eb4">W03-1508</url>
@@ -2214,8 +2214,8 @@
     <paper id="1">
       <title>Generation of Single-sentence Paraphrases from Predicate/Argument Structure using Lexico-grammatical Resources</title>
       <author><first>Raymond</first><last>Kozlowski</last></author>
-      <author><first>Kathleen F.</first><last>McCoy</last></author>
-      <author><first>K.</first><last>Vijay-Shanker</last></author>
+      <author id="kathleen-f-mccoy"><first>Kathleen F.</first><last>McCoy</last></author>
+      <author id="k-vijay-shanker"><first>K.</first><last>Vijay-Shanker</last></author>
       <doi>10.3115/1118984.1118985</doi>
       <pages>1–8</pages>
       <url hash="5647206e">W03-1601</url>
@@ -2249,7 +2249,7 @@
       <author><first>James</first><last>Dowdall</last></author>
       <author><first>Kaarel</first><last>Kaljurand</last></author>
       <author><first>Michael</first><last>Hess</last></author>
-      <author><first>Diego</first><last>Mollá</last></author>
+      <author id="diego-molla"><first>Diego</first><last>Mollá</last></author>
       <doi>10.3115/1118984.1118988</doi>
       <pages>25–32</pages>
       <url hash="e11b5533">W03-1604</url>
@@ -2356,7 +2356,7 @@
       <title>Unsupervised Training for Overlapping Ambiguity Resolution in <fixed-case>C</fixed-case>hinese Word Segmentation</title>
       <author><first>Mu</first><last>Li</last></author>
       <author><first>Jianfeng</first><last>Gao</last></author>
-      <author><first>Chang-Ning</first><last>Huang</last></author>
+      <author id="changning-huang"><first>Chang-Ning</first><last>Huang</last></author>
       <author><first>Jianfeng</first><last>Li</last></author>
       <doi>10.3115/1119250.1119251</doi>
       <pages>1–7</pages>
@@ -2366,7 +2366,7 @@
     <paper id="2">
       <title>Class Based Sense Definition Model for Word Sense Tagging and Disambiguation</title>
       <author><first>Tracy</first><last>Lin</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <doi>10.3115/1119250.1119252</doi>
       <pages>8–15</pages>
       <url hash="eb2d481f">W03-1702</url>
@@ -2375,7 +2375,7 @@
     <paper id="3">
       <title>Utterance Segmentation Using Combined Approach Based on Bi-directional N-gram and Maximum Entropy</title>
       <author><first>Ding</first><last>Liu</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <doi>10.3115/1119250.1119253</doi>
       <pages>16–23</pages>
       <url hash="6e1ab232">W03-1703</url>
@@ -2392,8 +2392,8 @@
     </paper>
     <paper id="5">
       <title>A Bottom-up Merging Algorithm for <fixed-case>C</fixed-case>hinese Unknown Word Extraction</title>
-      <author><first>Wei-Yun</first><last>Ma</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="wei-yun-ma"><first>Wei-Yun</first><last>Ma</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <doi>10.3115/1119250.1119255</doi>
       <pages>31–38</pages>
       <url hash="ab497fc6">W03-1705</url>
@@ -2402,7 +2402,7 @@
     <paper id="6">
       <title>The Effect of Rhythm on Structural Disambiguation in <fixed-case>C</fixed-case>hinese</title>
       <author><first>Honglin</first><last>Sun</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <doi>10.3115/1119250.1119256</doi>
       <pages>39–46</pages>
       <url hash="1bc77ef2">W03-1706</url>
@@ -2411,7 +2411,7 @@
     <paper id="7">
       <title>Annotating the Propositions in the <fixed-case>P</fixed-case>enn <fixed-case>C</fixed-case>hinese Treebank</title>
       <author><first>Nianwen</first><last>Xue</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <doi>10.3115/1119250.1119257</doi>
       <pages>47–54</pages>
       <url hash="72d647ab">W03-1707</url>
@@ -2431,7 +2431,7 @@
       <title><fixed-case>C</fixed-case>hinese Lexical Analysis Using Hierarchical Hidden <fixed-case>M</fixed-case>arkov Model</title>
       <author><first>Hua-Ping</first><last>Zhang</last></author>
       <author><first>Qun</first><last>Liu</last></author>
-      <author><first>Xue-Qi</first><last>Cheng</last></author>
+      <author id="xueqi-cheng"><first>Xue-Qi</first><last>Cheng</last></author>
       <author><first>Hao</first><last>Zhang</last></author>
       <author><first>Hong-Kui</first><last>Yu</last></author>
       <doi>10.3115/1119250.1119259</doi>
@@ -2441,7 +2441,7 @@
     </paper>
     <paper id="10">
       <title>Modeling of Long Distance Context Dependency in <fixed-case>C</fixed-case>hinese</title>
-      <author><first>GuoDong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>GuoDong</first><last>Zhou</last></author>
       <doi>10.3115/1119250.1119260</doi>
       <pages>71–77</pages>
       <url hash="ea656a00">W03-1710</url>
@@ -2449,7 +2449,7 @@
     </paper>
     <paper id="11">
       <title>A <fixed-case>C</fixed-case>hinese Efficient Analyser Integrating Word Segmentation, Part-Of-Speech Tagging, Partial Parsing and Full Parsing</title>
-      <author><first>GuoDong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>GuoDong</first><last>Zhou</last></author>
       <author><first>Jian</first><last>Su</last></author>
       <doi>10.3115/1119250.1119261</doi>
       <pages>78–83</pages>
@@ -2520,7 +2520,7 @@
       <author><first>Xiaodan</first><last>Zhu</last></author>
       <author><first>Mu</first><last>Li</last></author>
       <author><first>Jianfeng</first><last>Gao</last></author>
-      <author><first>Chang-Ning</first><last>Huang</last></author>
+      <author id="changning-huang"><first>Chang-Ning</first><last>Huang</last></author>
       <doi>10.3115/1119250.1119268</doi>
       <pages>125–132</pages>
       <url hash="57adde07">W03-1718</url>
@@ -2528,7 +2528,7 @@
     </paper>
     <paper id="19">
       <title>The First International <fixed-case>C</fixed-case>hinese Word Segmentation Bakeoff</title>
-      <author><first>Richard</first><last>Sproat</last></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last></author>
       <author><first>Thomas</first><last>Emerson</last></author>
       <doi>10.3115/1119250.1119269</doi>
       <pages>133–143</pages>
@@ -2538,9 +2538,9 @@
     <paper id="20">
       <title>Combining Segmenter and Chunker for <fixed-case>C</fixed-case>hinese Word Segmentation</title>
       <author><first>Masayuki</first><last>Asahara</last></author>
-      <author><first>Chooi Ling</first><last>Goh</last></author>
+      <author id="chooi-ling-goh"><first>Chooi Ling</first><last>Goh</last></author>
       <author><first>Xiaojie</first><last>Wang</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <doi>10.3115/1119250.1119270</doi>
       <pages>144–147</pages>
       <url hash="3c864fbd">W03-1720</url>
@@ -2558,7 +2558,7 @@
       <title><fixed-case>C</fixed-case>hinese Word Segmentation at Peking University</title>
       <author><last>Duan</last><first>Huiming</first></author>
       <author><last>Bai</last><first>Xiaojing</first></author>
-      <author><last>Chang</last><first>Baobao</first></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <author><last>Yu</last><first>Shiwen</first></author>
       <doi>10.3115/1119250.1119272</doi>
       <pages>152–155</pages>
@@ -2576,9 +2576,9 @@
     </paper>
     <paper id="24">
       <title>Integrating Ngram Model and Case-based Learning for <fixed-case>C</fixed-case>hinese Word Segmentation</title>
-      <author><first>Chunyu</first><last>Kit</last></author>
-      <author><first>Zhiming</first><last>Xu</last></author>
-      <author><first>Jonathan J.</first><last>Webster</last></author>
+      <author id="chunyu-kit"><first>Chunyu</first><last>Kit</last></author>
+      <author id="zhiming-xu"><first>Zhiming</first><last>Xu</last></author>
+      <author id="jonathan-j-webster"><first>Jonathan J.</first><last>Webster</last></author>
       <doi>10.3115/1119250.1119274</doi>
       <pages>160–163</pages>
       <url hash="d39630e1">W03-1724</url>
@@ -2599,8 +2599,8 @@
     </paper>
     <paper id="26">
       <title>Introduction to <fixed-case>CKIP</fixed-case> <fixed-case>C</fixed-case>hinese Word Segmentation System for the First International <fixed-case>C</fixed-case>hinese Word Segmentation Bakeoff</title>
-      <author><first>Wei-Yun</first><last>Ma</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="wei-yun-ma"><first>Wei-Yun</first><last>Ma</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <doi>10.3115/1119250.1119276</doi>
       <pages>168–171</pages>
       <url hash="52a33389">W03-1726</url>
@@ -2627,7 +2627,7 @@
       <title><fixed-case>SYSTRAN</fixed-case>’s <fixed-case>C</fixed-case>hinese Word Segmentation</title>
       <author><first>Jin</first><last>Yang</last></author>
       <author><first>Jean</first><last>Senellart</last></author>
-      <author><first>Remi</first><last>Zajac</last></author>
+      <author id="remi-zajac"><first>Remi</first><last>Zajac</last></author>
       <doi>10.3115/1119250.1119279</doi>
       <pages>180–183</pages>
       <url hash="fec300ce">W03-1729</url>
@@ -2637,7 +2637,7 @@
       <title><fixed-case>HHMM</fixed-case>-based <fixed-case>C</fixed-case>hinese Lexical Analyzer <fixed-case>ICTCLAS</fixed-case></title>
       <author><first>Hua-Ping</first><last>Zhang</last></author>
       <author><first>Hong-Kui</first><last>Yu</last></author>
-      <author><first>De-Yi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>De-Yi</first><last>Xiong</last></author>
       <author><first>Qun</first><last>Liu</last></author>
       <doi>10.3115/1119250.1119280</doi>
       <pages>184–187</pages>
@@ -2646,7 +2646,7 @@
     </paper>
     <paper id="31">
       <title>Chunking-based <fixed-case>C</fixed-case>hinese Word Tokenization</title>
-      <author><first>GuoDong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>GuoDong</first><last>Zhou</last></author>
       <doi>10.3115/1119250.1119281</doi>
       <pages>188–191</pages>
       <url hash="ca2be69c">W03-1731</url>
@@ -2670,7 +2670,7 @@
       <title>Complex Structuring of Term Variants for Question Answering</title>
       <author><first>James</first><last>Dowdall</last></author>
       <author><first>Fabio</first><last>Rinaldi</last></author>
-      <author><first>Fidelia</first><last>Ibekwe-SanJuan</last></author>
+      <author id="fidelia-ibekwe-sanjuan"><first>Fidelia</first><last>Ibekwe-SanJuan</last></author>
       <author><first>Eric</first><last>SanJuan</last></author>
       <doi>10.3115/1119282.1119283</doi>
       <pages>1–8</pages>
@@ -2679,7 +2679,7 @@
     </paper>
     <paper id="2">
       <title>Conceptual Structuring through Term Variations</title>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <doi>10.3115/1119282.1119284</doi>
       <pages>9–16</pages>
       <url hash="a0b3bc00">W03-1802</url>
@@ -2688,7 +2688,7 @@
     <paper id="3">
       <title>Noun-Noun Compound Machine Translation A Feasibility Study on Shallow Processing</title>
       <author><first>Takaaki</first><last>Tanaka</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <doi>10.3115/1119282.1119285</doi>
       <pages>17–24</pages>
       <url hash="5dbbbeb9">W03-1803</url>
@@ -2697,7 +2697,7 @@
     <paper id="4">
       <title>Using Masks, Suffix Array-based Data Structures and Multidimensional Arrays to Compute Positional Ngram Statistics from Corpora</title>
       <author><first>Alexandre</first><last>Gil</last></author>
-      <author><first>Gaël</first><last>Dias</last></author>
+      <author id="gael-dias"><first>Gaël</first><last>Dias</last></author>
       <doi>10.3115/1119282.1119286</doi>
       <pages>25–32</pages>
       <url hash="863a51ce">W03-1804</url>
@@ -2714,7 +2714,7 @@
     </paper>
     <paper id="6">
       <title>Multiword Unit Hybrid Extraction</title>
-      <author><first>Gaël</first><last>Dias</last></author>
+      <author id="gael-dias"><first>Gaël</first><last>Dias</last></author>
       <doi>10.3115/1119282.1119288</doi>
       <pages>41–48</pages>
       <url hash="b87cfb5f">W03-1806</url>
@@ -2722,10 +2722,10 @@
     </paper>
     <paper id="7">
       <title>Extracting Multiword Expressions with A Semantic Tagger</title>
-      <author><first>Scott S. L.</first><last>Piao</last></author>
+      <author id="scott-s-l-piao"><first>Scott S. L.</first><last>Piao</last></author>
       <author><first>Paul</first><last>Rayson</last></author>
       <author><first>Dawn</first><last>Archer</last></author>
-      <author><first>Andrew</first><last>Wilson</last></author>
+      <author id="andrew-wilson"><first>Andrew</first><last>Wilson</last></author>
       <author><first>Tony</first><last>McEnery</last></author>
       <doi>10.3115/1119282.1119289</doi>
       <pages>49–56</pages>
@@ -2743,7 +2743,7 @@
     <paper id="9">
       <title>A Statistical Approach to the Semantics of Verb-Particles</title>
       <author><first>Colin</first><last>Bannard</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Alex</first><last>Lascarides</last></author>
       <doi>10.3115/1119282.1119291</doi>
       <pages>65–72</pages>
@@ -2752,9 +2752,9 @@
     </paper>
     <paper id="10">
       <title>Detecting a Continuum of Compositionality in Phrasal Verbs</title>
-      <author><first>Diana</first><last>McCarthy</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
       <author><first>Bill</first><last>Keller</last></author>
-      <author><first>John</first><last>Carroll</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
       <doi>10.3115/1119282.1119292</doi>
       <pages>73–80</pages>
       <url hash="32505bd0">W03-1810</url>
@@ -2771,7 +2771,7 @@
     </paper>
     <paper id="12">
       <title>An Empirical Model of Multiword Expression Decomposability</title>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Colin</first><last>Bannard</last></author>
       <author><first>Takaaki</first><last>Tanaka</last></author>
       <author><first>Dominic</first><last>Widdows</last></author>
@@ -2782,7 +2782,7 @@
     </paper>
     <paper id="13">
       <title>Licensing Complex Prepositions via Lexical Constraints</title>
-      <author><first>Beata</first><last>Trawinski</last></author>
+      <author id="beata-trawinski"><first>Beata</first><last>Trawinski</last></author>
       <doi>10.3115/1119282.1119295</doi>
       <pages>97–104</pages>
       <url hash="4be2ef69">W03-1813</url>
@@ -2804,8 +2804,8 @@
     </frontmatter>
     <paper id="1">
       <title>Outline of the International Standard Linguistic Annotation Framework</title>
-      <author><first>Nancy</first><last>Ide</last></author>
-      <author><first>Laurent</first><last>Romary</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
       <doi>10.3115/1119296.1119297</doi>
       <pages>1–5</pages>
       <url hash="bc371e2e">W03-1901</url>
@@ -2813,7 +2813,7 @@
     </paper>
     <paper id="2">
       <title>From Concrete to Virtual Annotation Mark-up Language: The Case of <fixed-case>COMMO</fixed-case>n-<fixed-case>REF</fixed-case>s</title>
-      <author><first>Renata</first><last>Vieira</last></author>
+      <author id="renata-vieira"><first>Renata</first><last>Vieira</last></author>
       <author><first>Caroline</first><last>Gasperin</last></author>
       <author><first>Rodrigo</first><last>Goulart</last></author>
       <author><first>Susanne</first><last>Salmon-Alt</last></author>
@@ -2824,7 +2824,7 @@
     </paper>
     <paper id="3">
       <title>Ontology-based Linguistic Annotation</title>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <author><first>Siegfried</first><last>Handschuh</last></author>
       <doi>10.3115/1119296.1119299</doi>
       <pages>14–21</pages>
@@ -2833,10 +2833,10 @@
     </paper>
     <paper id="4">
       <title>Putting <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et Data into the <fixed-case>ISO</fixed-case> Linguistic Annotation Framework</title>
-      <author><first>Srinivas</first><last>Narayanan</last></author>
-      <author><first>Miriam R. L.</first><last>Petruck</last></author>
-      <author><first>Collin F.</first><last>Baker</last></author>
-      <author><first>Charles J.</first><last>Fillmore</last></author>
+      <author id="srini-narayanan"><first>Srinivas</first><last>Narayanan</last></author>
+      <author id="miriam-r-l-petruck"><first>Miriam R. L.</first><last>Petruck</last></author>
+      <author id="collin-f-baker"><first>Collin F.</first><last>Baker</last></author>
+      <author id="charles-j-fillmore"><first>Charles J.</first><last>Fillmore</last></author>
       <doi>10.3115/1119296.1119300</doi>
       <pages>22–29</pages>
       <url hash="27ee6fd1">W03-1904</url>
@@ -2844,9 +2844,9 @@
     </paper>
     <paper id="5">
       <title><fixed-case>RDF</fixed-case> Instantiation of <fixed-case>ISLE</fixed-case>/<fixed-case>MILE</fixed-case> Lexical Entries</title>
-      <author><first>Nancy</first><last>Ide</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
       <author><first>Alessandro</first><last>Lenci</last></author>
-      <author><first>Nicoletta</first><last>Calzolari</last></author>
+      <author id="nicoletta-calzolari"><first>Nicoletta</first><last>Calzolari</last></author>
       <doi>10.3115/1119296.1119301</doi>
       <pages>30–37</pages>
       <url hash="dcbf4fcb">W03-1905</url>
@@ -2858,7 +2858,7 @@
       <author><first>James</first><last>Dowdall</last></author>
       <author><first>Michael</first><last>Hess</last></author>
       <author><first>Kaarel</first><last>Kaljurand</last></author>
-      <author><first>Andreas</first><last>Persidis</last></author>
+      <author id="andreas-persidis"><first>Andreas</first><last>Persidis</last></author>
       <doi>10.3115/1119296.1119302</doi>
       <pages>38–46</pages>
       <url hash="a8183947">W03-1906</url>
@@ -2893,7 +2893,7 @@
       <author><first>Jean-Charles</first><last>Lamirel</last></author>
       <author><first>Shadi Al</first><last>Shehabi</last></author>
       <author><first>Martial</first><last>Hoffmann</last></author>
-      <author><first>Claire</first><last>Francois</last></author>
+      <author id="claire-francois"><first>Claire</first><last>Francois</last></author>
       <doi>10.3115/1119303.1119305</doi>
       <pages>7–23</pages>
       <url hash="9b514299">W03-2002</url>
@@ -2940,7 +2940,7 @@
     <paper id="7">
       <title>Patent Claim Processing for Readability - Structure Analysis and Term Explanation</title>
       <author><first>Akihiro</first><last>Shinmori</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <author><first>Yuzo</first><last>Marukawa</last></author>
       <author><first>Makoto</first><last>Iwayama</last></author>
       <doi>10.3115/1119303.1119310</doi>
@@ -2969,10 +2969,10 @@
     </frontmatter>
     <paper id="1">
       <title>Understanding Information Graphics: A Discourse-Level Problem</title>
-      <author><first>Sandra</first><last>Carberry</last></author>
+      <author id="sandra-carberry"><first>Sandra</first><last>Carberry</last></author>
       <author><first>Stephanie</first><last>Elzer</last></author>
-      <author><first>Nancy</first><last>Green</last></author>
-      <author><first>Kathleen</first><last>McCoy</last></author>
+      <author id="nancy-green"><first>Nancy</first><last>Green</last></author>
+      <author id="kathleen-f-mccoy"><first>Kathleen</first><last>McCoy</last></author>
       <author><first>Daniel</first><last>Chester</last></author>
       <pages>1–12</pages>
       <url hash="4fe74938">W03-2101</url>
@@ -2981,7 +2981,7 @@
     <paper id="2">
       <title>Annotating Opinions in the World Press</title>
       <author><first>Theresa</first><last>Wilson</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <pages>13–22</pages>
       <url hash="d2c0702c">W03-2102</url>
       <bibkey>wilson-wiebe-2003-annotating</bibkey>
@@ -2989,7 +2989,7 @@
     <paper id="3">
       <title>Answering Clarification Questions</title>
       <author><first>Matthew</first><last>Purver</last></author>
-      <author><first>Patrick G.T.</first><last>Healey</last></author>
+      <author id="patrick-healey"><first>Patrick G.T.</first><last>Healey</last></author>
       <author><first>James</first><last>King</last></author>
       <author><first>Jonathan</first><last>Ginzburg</last></author>
       <author><first>Greg J.</first><last>Mills</last></author>
@@ -3026,7 +3026,7 @@
       <author><first>Fumihiro</first><last>Adachi</last></author>
       <author><first>Shinichi</first><last>Ueno</last></author>
       <author><first>Tatsuya</first><last>Kawahara</last></author>
-      <author><first>Hiroshi G.</first><last>Okuno</last></author>
+      <author id="hiroshi-g-okuno"><first>Hiroshi G.</first><last>Okuno</last></author>
       <pages>87–96</pages>
       <url hash="f4281ca4">W03-2107</url>
       <bibkey>komatani-etal-2003-flexible</bibkey>
@@ -3035,7 +3035,7 @@
       <title>Building a New <fixed-case>I</fixed-case>nternet Chat System for Sharing Timing Information</title>
       <author><first>Kanayo</first><last>Ogura</last></author>
       <author><first>Takeshi</first><last>Masuda</last></author>
-      <author><first>Masato</first><last>Ishizaki</last></author>
+      <author id="masato-ishizaki"><first>Masato</first><last>Ishizaki</last></author>
       <pages>97–104</pages>
       <url hash="eebbd18e">W03-2108</url>
       <bibkey>ogura-etal-2003-building</bibkey>
@@ -3044,14 +3044,14 @@
       <title>Interpreter for Highly Portable Spoken Dialogue System</title>
       <author><first>Masamitsu</first><last>Umeda</last></author>
       <author><first>Satoru</first><last>Kogure</last></author>
-      <author><first>Seiichi</first><last>Nakagawa</last></author>
+      <author id="seiichi-nakagawa"><first>Seiichi</first><last>Nakagawa</last></author>
       <pages>105–114</pages>
       <url hash="ca990ad7">W03-2109</url>
       <bibkey>umeda-etal-2003-interpreter</bibkey>
     </paper>
     <paper id="10">
       <title>Spoken Dialogue for Virtual Advisers in a semi-immersive Command and Control environment</title>
-      <author><first>Dominique</first><last>Estival</last></author>
+      <author id="dominique-estival"><first>Dominique</first><last>Estival</last></author>
       <author><first>Michael</first><last>Broughton</last></author>
       <author><first>Andrew</first><last>Zschorn</last></author>
       <author><first>Elizabeth</first><last>Pronger</last></author>
@@ -3061,8 +3061,8 @@
     </paper>
     <paper id="11">
       <title>Using <fixed-case>W</fixed-case>izard-of-<fixed-case>O</fixed-case>z simulations to bootstrap Reinforcement - Learning based dialog management systems</title>
-      <author><first>Jason D.</first><last>Williams</last></author>
-      <author><first>Steve</first><last>Young</last></author>
+      <author id="jason-d-williams"><first>Jason D.</first><last>Williams</last></author>
+      <author id="steve-young"><first>Steve</first><last>Young</last></author>
       <pages>135–139</pages>
       <url hash="f228916e">W03-2111</url>
       <bibkey>williams-young-2003-using</bibkey>
@@ -3081,7 +3081,7 @@
     <paper id="13">
       <title>Some empirical findings on dialogue management and domain ontologies in dialogue systems - Implications from an evaluation of <fixed-case>B</fixed-case>ird<fixed-case>Q</fixed-case>uest</title>
       <author><first>Annika</first><last>Flycht-Eriksson</last></author>
-      <author><first>Arne</first><last>Jönsson</last></author>
+      <author id="arne-jonsson"><first>Arne</first><last>Jönsson</last></author>
       <pages>158–167</pages>
       <url hash="1fdc306d">W03-2113</url>
       <bibkey>flycht-eriksson-jonsson-2003-empirical</bibkey>
@@ -3099,7 +3099,7 @@
       <title>Ontology-based Contextual Coherence Scoring</title>
       <author><first>Robert</first><last>Porzel</last></author>
       <author><first>Iryna</first><last>Gurevych</last></author>
-      <author><first>Christof E.</first><last>Müller</last></author>
+      <author id="christof-muller"><first>Christof E.</first><last>Müller</last></author>
       <pages>178–186</pages>
       <url hash="b4ec2952">W03-2115</url>
       <bibkey>porzel-etal-2003-ontology</bibkey>
@@ -3114,7 +3114,7 @@
     </paper>
     <paper id="17">
       <title>Multi-Level Annotation in <fixed-case>MMAX</fixed-case></title>
-      <author><first>Christoph</first><last>Müller</last></author>
+      <author id="christoph-muller"><first>Christoph</first><last>Müller</last></author>
       <author><first>Michael</first><last>Strube</last></author>
       <pages>198–207</pages>
       <url hash="e1eaaf03">W03-2117</url>
@@ -3122,10 +3122,10 @@
     </paper>
     <paper id="18">
       <title>Domain Specific Speech Acts for Spoken Language Translation</title>
-      <author><first>Lori</first><last>Levin</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
       <author><first>Chad</first><last>Langley</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
-      <author><first>Donna</first><last>Gates</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
+      <author id="donna-gates"><first>Donna</first><last>Gates</last></author>
       <author><first>Dorcas</first><last>Wallace</last></author>
       <author><first>Kay</first><last>Peterson</last></author>
       <pages>208–217</pages>
@@ -3143,7 +3143,7 @@
     </paper>
     <paper id="20">
       <title><fixed-case>PAL</fixed-case>ink<fixed-case>A</fixed-case>: A highly customisable tool for discourse annotation</title>
-      <author><first>Constantin</first><last>Orăsan</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orăsan</last></author>
       <pages>39–43</pages>
       <url hash="23fb4ccc">W03-2120</url>
       <bibkey>orasan-2003-palinka</bibkey>
@@ -3176,18 +3176,18 @@
     </paper>
     <paper id="24">
       <title>Learning to Speak to a Spoken Language System: Vocabulary Convergence in Novice Users</title>
-      <author><first>Gina-Anne</first><last>Levow</last></author>
+      <author id="gina-anne-levow"><first>Gina-Anne</first><last>Levow</last></author>
       <pages>149–153</pages>
       <url hash="e9ef0d6f">W03-2124</url>
       <bibkey>levow-2003-learning</bibkey>
     </paper>
     <paper id="25">
       <title>A procedure assistant for astronauts in a functional programming architecture, with step previewing and spoken correction of dialogue moves</title>
-      <author><first>Gregory</first><last>Aist</last></author>
-      <author><first>Manny</first><last>Rayner</last></author>
-      <author><first>John</first><last>Dowding</last></author>
-      <author><first>Beth Ann</first><last>Hockey</last></author>
-      <author><first>Susana</first><last>Early</last></author>
+      <author id="gregory-aist"><first>Gregory</first><last>Aist</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
+      <author id="john-dowding"><first>John</first><last>Dowding</last></author>
+      <author id="beth-ann-hockey"><first>Beth Ann</first><last>Hockey</last></author>
+      <author id="susana-early"><first>Susana</first><last>Early</last></author>
       <author><first>Jim</first><last>Hieronymus</last></author>
       <pages>154–157</pages>
       <url hash="794f8278">W03-2125</url>
@@ -3195,8 +3195,8 @@
     </paper>
     <paper id="26">
       <title>Dialog Input Ranking in a Multi-Domain Environment Using Transferable Belief Model</title>
-      <author><first>Hong-I</first><last>Ng</last></author>
-      <author><first>Kim-Teng</first><last>Lua</last></author>
+      <author id="hong-i-ng"><first>Hong-I</first><last>Ng</last></author>
+      <author id="kim-teng-lua"><first>Kim-Teng</first><last>Lua</last></author>
       <pages>187–191</pages>
       <url hash="5c17de37">W03-2126</url>
       <bibkey>ng-lua-2003-dialog</bibkey>
@@ -3204,7 +3204,7 @@
     <paper id="27">
       <title>Annotating emotion in dialogue</title>
       <author><first>Richard</first><last>Craggs</last></author>
-      <author><first>Mary McGee</first><last>Wood</last></author>
+      <author id="mary-mcgee-wood"><first>Mary McGee</first><last>Wood</last></author>
       <pages>218–225</pages>
       <url hash="31d7a97d">W03-2127</url>
       <bibkey>craggs-wood-2003-annotating</bibkey>
@@ -3212,7 +3212,7 @@
     <paper id="28">
       <title>Developing a Typology of Dialogue Acts: Some Boundary Problems</title>
       <author><first>Tiit</first><last>Hennoste</last></author>
-      <author><first>Mare</first><last>Koit</last></author>
+      <author id="mare-koit"><first>Mare</first><last>Koit</last></author>
       <author><first>Andriela</first><last>Rääbis</last></author>
       <author><first>Krista</first><last>Strandson</last></author>
       <author><first>Maret</first><last>Valdisoo</last></author>
@@ -3236,7 +3236,7 @@
     <paper id="1">
       <title>Improving Machine Translation Quality with Automatic Named Entity Recognition</title>
       <author><first>Bogdan</first><last>Babych</last></author>
-      <author><first>Anthony</first><last>Hartley</last></author>
+      <author id="anthony-hartley"><first>Anthony</first><last>Hartley</last></author>
       <url hash="bfa7b2d2">W03-2201</url>
       <bibkey>babych-hartley-2003-improving</bibkey>
     </paper>
@@ -3245,13 +3245,13 @@
       <author><first>Rob</first><last>Koeling</last></author>
       <author><first>Adam</first><last>Kilgarriff</last></author>
       <author><first>David</first><last>Tugwell</last></author>
-      <author><first>Roger</first><last>Evans</last></author>
+      <author id="roger-evans"><first>Roger</first><last>Evans</last></author>
       <url hash="eeec6830">W03-2202</url>
       <bibkey>koeling-etal-2003-evaluation</bibkey>
     </paper>
     <paper id="3">
       <title>Two Approaches to Aspect Assignment in an <fixed-case>E</fixed-case>nglish-<fixed-case>P</fixed-case>olish Machine Translation System</title>
-      <author><first>Anna</first><last>Kupsc</last></author>
+      <author id="anna-kupsc"><first>Anna</first><last>Kupsc</last></author>
       <url hash="3e9462e5">W03-2203</url>
       <bibkey>kupsc-2003-two</bibkey>
     </paper>
@@ -3264,14 +3264,14 @@
     <paper id="5">
       <title>Parallel Corpora Segmentation Using Anchor Words</title>
       <author><first>Francisco</first><last>Nevado</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
-      <author><first>Enrique</first><last>Vidal</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="enrique-vidal"><first>Enrique</first><last>Vidal</last></author>
       <url hash="4bdfa9d1">W03-2205</url>
       <bibkey>nevado-etal-2003-parallel</bibkey>
     </paper>
     <paper id="6">
       <title>Computer-based Support for Patients with Limited <fixed-case>E</fixed-case>nglish</title>
-      <author><first>Harold</first><last>Somers</last></author>
+      <author id="harold-somers"><first>Harold</first><last>Somers</last></author>
       <author><first>Hermione</first><last>Lovel</last></author>
       <url hash="70507408">W03-2206</url>
       <bibkey>somers-lovel-2003-computer</bibkey>
@@ -3289,7 +3289,7 @@
       <address>Budapest, Hungary</address>
       <editor><first>Ehud</first><last>Reiter</last></editor>
       <editor><first>Helmut</first><last>Horacek</last></editor>
-      <editor><first>Kees</first><last>van Deemter</last></editor>
+      <editor id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></editor>
       <month>April</month>
       <year>2003</year>
       <url hash="a96c09f8">W03-23</url>
@@ -3301,8 +3301,8 @@
     </frontmatter>
     <paper id="1">
       <title>Dynamic Generation of Cooperative Natural Language Responses in <fixed-case>WEBCOOP</fixed-case></title>
-      <author><first>Farah</first><last>Benamara</last></author>
-      <author><first>Patrick</first><last>Saint Dizier</last></author>
+      <author id="farah-benamara"><first>Farah</first><last>Benamara</last></author>
+      <author id="patrick-saint-dizier"><first>Patrick</first><last>Saint Dizier</last></author>
       <url hash="ddbdd997">W03-2301</url>
       <bibkey>benamara-saint-dizier-2003-dynamic</bibkey>
     </paper>
@@ -3314,7 +3314,7 @@
     </paper>
     <paper id="3">
       <title>Multilingual Revision</title>
-      <author><first>Charles</first><last>Callaway</last></author>
+      <author id="charles-b-callaway"><first>Charles</first><last>Callaway</last></author>
       <url hash="0c2bbf42">W03-2303</url>
       <bibkey>callaway-2003-multilingual</bibkey>
     </paper>
@@ -3339,7 +3339,7 @@
     </paper>
     <paper id="7">
       <title>A New Model for Generating Multimodal Referring Expressions</title>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <author><first>Ielka</first><last>van der Sluis</last></author>
       <url hash="3f71aa48">W03-2307</url>
       <bibkey>krahmer-van-der-sluis-2003-new</bibkey>
@@ -3354,14 +3354,14 @@
     </paper>
     <paper id="9">
       <title>Phrasal Generator for Describing Relational Database Queries</title>
-      <author><first>Michael J.</first><last>Minock</last></author>
+      <author id="michael-minock"><first>Michael J.</first><last>Minock</last></author>
       <url hash="52cb833f">W03-2309</url>
       <bibkey>minock-2003-phrasal</bibkey>
     </paper>
     <paper id="10">
       <title>Porting to an <fixed-case>I</fixed-case>talian Surface Realizer: A Case Study</title>
       <author><first>Alessandra</first><last>Novello</last></author>
-      <author><first>Charles B.</first><last>Callaway</last></author>
+      <author id="charles-b-callaway"><first>Charles B.</first><last>Callaway</last></author>
       <url hash="5c433700">W03-2310</url>
       <bibkey>novello-callaway-2003-porting</bibkey>
     </paper>
@@ -3375,7 +3375,7 @@
     <paper id="12">
       <title>Acquiring and Using Limited User Models in <fixed-case>NLG</fixed-case></title>
       <author><first>Ehud</first><last>Reiter</last></author>
-      <author><first>Somayajulu</first><last>Sripada</last></author>
+      <author id="somayajulu-sripada"><first>Somayajulu</first><last>Sripada</last></author>
       <author><first>Sandra</first><last>Williams</last></author>
       <url hash="bda081f5">W03-2312</url>
       <bibkey>reiter-etal-2003-acquiring</bibkey>
@@ -3403,7 +3403,7 @@
     </paper>
     <paper id="16">
       <title>Adapting Chart Realization to <fixed-case>CCG</fixed-case></title>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <author><first>Jason</first><last>Baldridge</last></author>
       <url hash="623ad747">W03-2316</url>
       <bibkey>white-baldridge-2003-adapting</bibkey>
@@ -3434,19 +3434,19 @@
     </frontmatter>
     <paper id="1">
       <title>The <fixed-case>PARC</fixed-case> 700 Dependency Bank</title>
-      <author><first>Tracy Holloway</first><last>King</last></author>
+      <author id="tracy-holloway-king"><first>Tracy Holloway</first><last>King</last></author>
       <author><first>Richard</first><last>Crouch</last></author>
       <author><first>Stefan</first><last>Riezler</last></author>
       <author><first>Mary</first><last>Dalrymple</last></author>
-      <author><first>Ronald M.</first><last>Kaplan</last></author>
+      <author id="ronald-m-kaplan"><first>Ronald M.</first><last>Kaplan</last></author>
       <url hash="5f34000e">W03-2401</url>
       <bibkey>king-etal-2003-parc</bibkey>
     </paper>
     <paper id="2">
       <title>Issues in the Syntactic Annotation of <fixed-case>C</fixed-case>ast3<fixed-case>LB</fixed-case></title>
-      <author><first>Montserrat</first><last>Civit</last></author>
+      <author id="montserrat-civit"><first>Montserrat</first><last>Civit</last></author>
       <author><first>Ma. Antònia</first><last>Martí</last></author>
-      <author><first>Borja</first><last>Navarro</last></author>
+      <author id="borja-navarro"><first>Borja</first><last>Navarro</last></author>
       <author><first>Núria</first><last>Bufí</last></author>
       <author><first>Belén</first><last>Fernández</last></author>
       <author><first>Raquel</first><last>Marcos</last></author>
@@ -3455,7 +3455,7 @@
     </paper>
     <paper id="3">
       <title>Practical Annotation Scheme for an <fixed-case>HPSG</fixed-case> Treebank of <fixed-case>B</fixed-case>ulgarian</title>
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <author><first>Petya</first><last>Osenova</last></author>
       <url hash="5fe0fce2">W03-2403</url>
       <bibkey>simov-osenova-2003-practical</bibkey>
@@ -3476,9 +3476,9 @@
     </paper>
     <paper id="6">
       <title>Automatic Multi-Layer Corpus Annotation for Evaluation Question Answering Methods: <fixed-case>CBC</fixed-case>4<fixed-case>K</fixed-case>ids</title>
-      <author><first>Jochen L.</first><last>Leidner</last></author>
+      <author id="jochen-l-leidner"><first>Jochen L.</first><last>Leidner</last></author>
       <author><first>Tiphaine</first><last>Dalmas</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <author><first>Johan</first><last>Bos</last></author>
       <author><first>Claire</first><last>Grover</last></author>
       <url hash="a84c6183">W03-2406</url>
@@ -3493,8 +3493,8 @@
     </paper>
     <paper id="8">
       <title>Open Mind Word Expert: Creating Large Annotated Data Collections with Web Users’ Help</title>
-      <author><first>Rada</first><last>Mihalcea</last></author>
-      <author><first>Timothy</first><last>Chklovski</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
+      <author id="timothy-chklovski"><first>Timothy</first><last>Chklovski</last></author>
       <url hash="385a4c98">W03-2408</url>
       <bibkey>mihalcea-chklovski-2003-open</bibkey>
     </paper>
@@ -3529,7 +3529,7 @@
     <paper id="13">
       <title>The Spoken <fixed-case>D</fixed-case>utch Corpus and its Exploitation Environment</title>
       <author><first>Nelleke</first><last>Oostdijk</last></author>
-      <author><first>Daan</first><last>Broeder</last></author>
+      <author id="daan-broeder"><first>Daan</first><last>Broeder</last></author>
       <url hash="989f98da">W03-2413</url>
       <bibkey>oostdijk-broeder-2003-spoken</bibkey>
     </paper>
@@ -3551,18 +3551,18 @@
     </paper>
     <paper id="16">
       <title>Stretching <fixed-case>TEI</fixed-case>: Converting the <fixed-case>G</fixed-case>enia Corpus</title>
-      <author><first>Tomaz</first><last>Erjavec</last></author>
+      <author id="tomaz-erjavec"><first>Tomaz</first><last>Erjavec</last></author>
       <author><first>Jin-Dong</first><last>Kim</last></author>
       <author><first>Tomoko</first><last>Ohta</last></author>
-      <author><first>Yuka</first><last>Tateisi</last></author>
-      <author><first>Jun-ichi</first><last>Tsujii</last></author>
+      <author id="yuka-tateisi"><first>Yuka</first><last>Tateisi</last></author>
+      <author id="junichi-tsujii"><first>Jun-ichi</first><last>Tsujii</last></author>
       <url hash="9767e6c1">W03-2416</url>
       <bibkey>erjavec-etal-2003-stretching</bibkey>
     </paper>
     <paper id="17">
       <title>The <fixed-case>M</fixed-case>eta<fixed-case>G</fixed-case>rammar: a cross-framework and cross-language test-suite generation tool</title>
       <author><first>Alexandra</first><last>Kinyon</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <url hash="59a438e9">W03-2417</url>
       <bibkey>kinyon-rambow-2003-metagrammar</bibkey>
     </paper>
@@ -3584,7 +3584,7 @@
     <paper id="1">
       <title>Exploiting Long Distance Collocational Relations in Predictive Typing</title>
       <author><first>Johannes</first><last>Matiasek</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <url hash="bb5dda68">W03-2501</url>
       <bibkey>matiasek-baroni-2003-exploiting</bibkey>
     </paper>
@@ -3597,7 +3597,7 @@
     </paper>
     <paper id="3">
       <title>Language-Models for Questions</title>
-      <author><first>Ed</first><last>Schofield</last></author>
+      <author id="edward-schofield"><first>Ed</first><last>Schofield</last></author>
       <url hash="28c42649">W03-2503</url>
       <bibkey>schofield-2003-language</bibkey>
     </paper>
@@ -3605,7 +3605,7 @@
       <title>Automatic Acquisition of Word Interaction Patterns from Corpora</title>
       <author><first>Veska</first><last>Noncheva</last></author>
       <author><first>Joaqium Ferreira</first><last>da Silva</last></author>
-      <author><first>Gabriel</first><last>Lopes</last></author>
+      <author id="gabriel-lopes"><first>Gabriel</first><last>Lopes</last></author>
       <url hash="a7e44d4a">W03-2504</url>
       <bibkey>noncheva-etal-2003-automatic</bibkey>
     </paper>
@@ -3627,7 +3627,7 @@
     <paper id="7">
       <title>Word N-Grams for Cluster Keyboards</title>
       <author><first>Nils</first><last>Klarlund</last></author>
-      <author><first>Michael</first><last>Riley</last></author>
+      <author id="michael-riley"><first>Michael</first><last>Riley</last></author>
       <url hash="ba92ec98">W03-2507</url>
       <bibkey>klarlund-riley-2003-word</bibkey>
     </paper>
@@ -3644,7 +3644,7 @@
     <paper id="9">
       <title>Domain-Specific Disambiguation for Typing with Ambiguous Keyboards</title>
       <author><first>Karin</first><last>Harbusch</last></author>
-      <author><first>Sasa</first><last>Hasan</last></author>
+      <author id="sasa-hasan"><first>Sasa</first><last>Hasan</last></author>
       <author><first>Hajo</first><last>Hoffmann</last></author>
       <author><first>Michael</first><last>Kühn</last></author>
       <author><first>Bernhard</first><last>Schüler</last></author>
@@ -3670,7 +3670,7 @@
     <paper id="1">
       <title>Intermediate Parsing for Anaphora Resolution? Implementing the Lappin and Leass non-coreference filters</title>
       <author><first>Judita</first><last>Preiss</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <url hash="11c9a11c">W03-2601</url>
       <bibkey>preiss-briscoe-2003-intermediate</bibkey>
     </paper>
@@ -3691,13 +3691,13 @@
       <author><first>Ryu</first><last>Iida</last></author>
       <author><first>Kentaro</first><last>Inui</last></author>
       <author><first>Hiroya</first><last>Takamura</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <url hash="70c8668e">W03-2604</url>
       <bibkey>iida-etal-2003-incorporating</bibkey>
     </paper>
     <paper id="5">
       <title>Associative Descriptions and Salience: A Preliminary Investigation</title>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <url hash="574f2a9d">W03-2605</url>
       <bibkey>poesio-2003-associative</bibkey>
     </paper>
@@ -3705,13 +3705,13 @@
       <title>Using the Web for Nominal Anaphora Resolution</title>
       <author><first>Katja</first><last>Markert</last></author>
       <author><first>Malvina</first><last>Nissim</last></author>
-      <author><first>Natalia</first><last>Modjeska</last></author>
+      <author id="natalia-n-modjeska"><first>Natalia</first><last>Modjeska</last></author>
       <url hash="497d27c4">W03-2606</url>
       <bibkey>markert-etal-2003-using</bibkey>
     </paper>
     <paper id="7">
       <title>Associative Anaphora Resolution: A Web-Based Approach</title>
-      <author><first>Razvan</first><last>Bunescu</last></author>
+      <author id="razvan-bunescu"><first>Razvan</first><last>Bunescu</last></author>
       <url hash="b007ce78">W03-2607</url>
       <bibkey>bunescu-2003-associative</bibkey>
     </paper>
@@ -3719,9 +3719,9 @@
       <title>Anaphoric arguments of discourse connectives: Semantic properties of antecedents versus non-antecedents</title>
       <author><first>Eleni</first><last>Miltsakaki</last></author>
       <author><first>Cassandre</first><last>Creswell</last></author>
-      <author><first>Katherine</first><last>Forbes</last></author>
-      <author><first>Aravind</first><last>Joshi</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="kate-forbes-riley"><first>Katherine</first><last>Forbes</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <url hash="fb965393">W03-2608</url>
       <bibkey>miltsakaki-etal-2003-anaphoric</bibkey>
     </paper>
@@ -3751,17 +3751,17 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>I</fixed-case>ntroduction: Dialogue Systems: Interaction, Adaptation and Styles of Management</title>
-      <author><first>Kristiina</first><last>Jokinen</last></author>
-      <author><first>Björn</first><last>Gämback</last></author>
-      <author><first>William</first><last>Black</last></author>
-      <author><first>Roberta</first><last>Catizone</last></author>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="kristiina-jokinen"><first>Kristiina</first><last>Jokinen</last></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gämback</last></author>
+      <author id="william-j-black"><first>William</first><last>Black</last></author>
+      <author id="roberta-catizone"><first>Roberta</first><last>Catizone</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <url hash="68abfbea">W03-2701</url>
       <bibkey>jokinen-etal-2003-introduction</bibkey>
     </paper>
     <paper id="2">
       <title>Why a Static Interpretation Is Not Sufficient in Spatial Communication</title>
-      <author><first>John A.</first><last>Bateman</last></author>
+      <author id="john-bateman"><first>John A.</first><last>Bateman</last></author>
       <author><first>Kerstin</first><last>Fischer</last></author>
       <author><first>Thora</first><last>Tenbrink</last></author>
       <url hash="10ff0295">W03-2702</url>
@@ -3769,7 +3769,7 @@
     </paper>
     <paper id="3">
       <title>Learning to Classify Utterances in a Task-Oriented Dialogue</title>
-      <author><first>William</first><last>Black</last></author>
+      <author id="william-j-black"><first>William</first><last>Black</last></author>
       <author><first>Paul</first><last>Thompson</last></author>
       <author><first>Adam</first><last>Funk</last></author>
       <author><first>Andrew</first><last>Conroy</last></author>
@@ -3778,7 +3778,7 @@
     </paper>
     <paper id="4">
       <title>Flexibility and Efficiency through Personalisation? Experiments with a conversational Program Guide Information System</title>
-      <author><first>Péter</first><last>Boda</last></author>
+      <author id="peter-pal-boda"><first>Péter</first><last>Boda</last></author>
       <author><first>Suresh</first><last>Chande</last></author>
       <author><first>Elviira</first><last>Hartikainen</last></author>
       <author><first>Nidhi</first><last>Gupta</last></author>
@@ -3788,9 +3788,9 @@
     </paper>
     <paper id="5">
       <title>Multimodal Dialogue Management in the <fixed-case>COMIC</fixed-case> Project</title>
-      <author><first>Roberta</first><last>Catizone</last></author>
-      <author><first>Andrea</first><last>Setzer</last></author>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="roberta-catizone"><first>Roberta</first><last>Catizone</last></author>
+      <author id="andrea-setzer"><first>Andrea</first><last>Setzer</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <url hash="cba206ee">W03-2705</url>
       <bibkey>catizone-etal-2003-multimodal</bibkey>
     </paper>
@@ -3810,7 +3810,7 @@
     <paper id="8">
       <title>Distributed Dialogue Management in a Blackboard Architecture</title>
       <author><first>Antti</first><last>Kerminen</last></author>
-      <author><first>Kristiina</first><last>Jokinen</last></author>
+      <author id="kristiina-jokinen"><first>Kristiina</first><last>Jokinen</last></author>
       <url hash="6c155371">W03-2708</url>
       <bibkey>kerminen-jokinen-2003-distributed</bibkey>
     </paper>
@@ -3824,8 +3824,8 @@
     <paper id="10">
       <title>Machine Learning for Shallow Interpretation of User Utterances in Spoken Dialogue Systems</title>
       <author><first>Piroska</first><last>Lendvai</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <url hash="22ba43fb">W03-2710</url>
       <bibkey>lendvai-etal-2003-machine</bibkey>
     </paper>
@@ -3886,17 +3886,17 @@
     </frontmatter>
     <paper id="1">
       <title>Reuse and Challenges in Evaluating Language Generation Systems: Position Paper</title>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
       <pages>3-10</pages>
       <url hash="ee055015">W03-2801</url>
       <bibkey>bontcheva-2003-reuse</bibkey>
     </paper>
     <paper id="2">
       <title>The <fixed-case>PEACE</fixed-case> <fixed-case>SLDS</fixed-case> understanding evaluation paradigm of the <fixed-case>F</fixed-case>rench <fixed-case>MEDIA</fixed-case> campaign</title>
-      <author><first>Laurence</first><last>Devillers</last></author>
-      <author><first>Hélène</first><last>Maynard</last></author>
-      <author><first>Patrick</first><last>Paroubek</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="laurence-devillers"><first>Laurence</first><last>Devillers</last></author>
+      <author id="helene-bonneau-maynard"><first>Hélène</first><last>Maynard</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <pages>11-18</pages>
       <url hash="f5d843ed">W03-2802</url>
       <bibkey>devillers-etal-2003-peace</bibkey>
@@ -3913,7 +3913,7 @@
       <title>A Quantitative Method for Machine Translation Evaluation</title>
       <author><first>Jesús</first><last>Tomás</last></author>
       <author><first>Josep Àngel</first><last>Mas</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <pages>27-34</pages>
       <url hash="f3cc1fb0">W03-2804</url>
       <bibkey>tomas-etal-2003-quantitative</bibkey>
@@ -3928,7 +3928,7 @@
     </paper>
     <paper id="6">
       <title>Intrinsic versus Extrinsic Evaluations of Parsing Systems</title>
-      <author><first>Diego</first><last>Mollá</last></author>
+      <author id="diego-molla"><first>Diego</first><last>Mollá</last></author>
       <author><first>Ben</first><last>Hutchinson</last></author>
       <pages>43-50</pages>
       <url hash="abc52adc">W03-2806</url>
@@ -3957,8 +3957,8 @@
     </paper>
     <paper id="10">
       <title>Setting up an Evaluation Infrastructure for Human Language Technologies in <fixed-case>E</fixed-case>urope</title>
-      <author><first>Kevin</first><last>McTait</last></author>
-      <author><first>Khalid</first><last>Choukri</last></author>
+      <author id="kevin-mctait"><first>Kevin</first><last>McTait</last></author>
+      <author id="khalid-choukri"><first>Khalid</first><last>Choukri</last></author>
       <pages>7377</pages>
       <url hash="36bc79aa">W03-2810</url>
       <bibkey>mctait-choukri-2003-setting</bibkey>
@@ -3988,10 +3988,10 @@
     </paper>
     <paper id="2">
       <title>A Large-scale Inheritance-based Morphological Lexicon for <fixed-case>R</fixed-case>ussian</title>
-      <author><first>Roger</first><last>Evans</last></author>
+      <author id="roger-evans"><first>Roger</first><last>Evans</last></author>
       <author><first>Carole</first><last>Tiberius</last></author>
       <author><first>Dunstan</first><last>Brown</last></author>
-      <author><first>Greville C.</first><last>Corbett</last></author>
+      <author id="greville-c-corbett"><first>Greville C.</first><last>Corbett</last></author>
       <url hash="8543c565">W03-2902</url>
       <pages>9–16</pages>
       <bibkey>evans-etal-2003-large</bibkey>
@@ -3999,18 +3999,18 @@
     <paper id="3">
       <title>Automatic Lexical Acquisition from Raw Corpora: An Application to <fixed-case>R</fixed-case>ussian</title>
       <author><first>Antoni</first><last>Oliver</last></author>
-      <author><first>Irene</first><last>Castellón</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="irene-castellon"><first>Irene</first><last>Castellón</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <url hash="39a42b56">W03-2903</url>
       <pages>17–24</pages>
       <bibkey>oliver-etal-2003-automatic</bibkey>
     </paper>
     <paper id="4">
       <title>The <fixed-case>MULTEXT</fixed-case>-East Morphosyntactic Specification for <fixed-case>S</fixed-case>lavic Languages</title>
-      <author><first>Tomaž</first><last>Erjavec</last></author>
+      <author id="tomaz-erjavec"><first>Tomaž</first><last>Erjavec</last></author>
       <author><first>Cvetana</first><last>Krstev</last></author>
-      <author><first>Vladimír</first><last>Petkevič</last></author>
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="vladimir-petkevic"><first>Vladimír</first><last>Petkevič</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <author><first>Marko</first><last>Tadić</last></author>
       <author><first>Duško</first><last>Vitas</last></author>
       <url hash="9b4af139">W03-2904</url>
@@ -4064,8 +4064,8 @@
     </paper>
     <paper id="11">
       <title>Morpho-syntactic Clues for Terminological Processing in <fixed-case>S</fixed-case>erbian</title>
-      <author><first>Goran</first><last>Nenadić</last></author>
-      <author><first>Irena</first><last>Spasić</last></author>
+      <author id="goran-nenadic"><first>Goran</first><last>Nenadić</last></author>
+      <author id="irena-spasic"><first>Irena</first><last>Spasić</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
       <url hash="8edc7c79">W03-2911</url>
       <pages>79–86</pages>
@@ -4073,7 +4073,7 @@
     </paper>
     <paper id="12">
       <title><fixed-case>R</fixed-case>ussian Morphology: Ressources and <fixed-case>J</fixed-case>ava Software Application</title>
-      <author><first>Serge</first><last>Yablonsky</last></author>
+      <author id="serge-a-yablonsky"><first>Serge</first><last>Yablonsky</last></author>
       <url hash="7551a2a9">W03-2912</url>
       <pages>87–94</pages>
       <bibkey>yablonsky-2003-russian</bibkey>
@@ -4092,8 +4092,8 @@
     </frontmatter>
     <paper id="1">
       <title>Parsing <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars and Tree Insertion Grammars with Simultaneous Adjunctions</title>
-      <author><first>Miguel A.</first><last>Alonso</last></author>
-      <author><first>Víctor J.</first><last>Díaz</last></author>
+      <author id="miguel-a-alonso"><first>Miguel A.</first><last>Alonso</last></author>
+      <author id="victor-j-diaz"><first>Víctor J.</first><last>Díaz</last></author>
       <abstract>A large part of wide coverage Tree Adjoining Grammars (TAG) is formed by trees that satisfy the restrictions imposed by Tree Insertion Grammars (TIG). This characteristic can be used to reduce the practical complexity of TAG parsing, applying the standard adjunction operation only in those cases in which the simpler cubic-time TIG adjunction cannot be applied. In this paper, we describe a parsing algorithm managing simultaneous adjunctions in TAG and TIG.</abstract>
       <pages>19–30</pages>
       <url hash="9c962f07">W03-3001</url>
@@ -4102,7 +4102,7 @@
     <paper id="2">
       <title>Implémentation du système <fixed-case>MASPAR</fixed-case> selon une approche multi-agent</title>
       <author><first>Chafik</first><last>Aloulou</last></author>
-      <author><first>Lamia</first><last>Hadrich Belguith</last></author>
+      <author id="lamia-hadrich-belguith"><first>Lamia</first><last>Hadrich Belguith</last></author>
       <author><first>Ahmed</first><last>Hadj Kacem</last></author>
       <author><first>Souha</first><last>Hammami Mezghani</last></author>
       <abstract>Le traitement automatique du langage naturel est un axe de recherche qui connaît chaque jour de nouvelles théories et approches. Les systèmes d’analyse automatique qui sont fondés sur une approche séquentielle présentent plusieurs inconvénients. Afin de pallier ces limites, nous nous sommes intéressés à la réalisation d’un système d’analyse syntaxique de textes arabes basé sur l’approche multi-agent : MASPAR « Multi-Agent System for Parsing ARabic ».</abstract>
@@ -4158,7 +4158,7 @@
     </paper>
     <paper id="9">
       <title>Subtree Parsing to Speed up Deep Analysis</title>
-      <author><first>Kilian</first><last>Foth</last></author>
+      <author id="kilian-a-foth"><first>Kilian</first><last>Foth</last></author>
       <author><first>Wolfgang</first><last>Menzel</last></author>
       <abstract>Within a grammar formalism that treats syntax analysis as a global optimization problem, methods are investigated to improve parsing performance by recombining the solutions of smaller and easier subproblems. The robust nature of the formalism allows the application of this technique with little change to the original grammar.</abstract>
       <pages>91–102</pages>
@@ -4175,7 +4175,7 @@
     </paper>
     <paper id="11">
       <title>Generative versus Discriminative Models for Statistical Left-Corner Parsing</title>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <abstract>We propose two statistical left-corner parsers and investigate their accuracy at varying speeds. The parser based on a generative probability model achieves state-of-the-art accuracy when sufficient time is available, but when high speed is required the parser based on a discriminative probability model performs better. Neural network probability estimation is used to handle conditioning on both the unbounded parse histories and the unbounded lookahead strings.</abstract>
       <pages>115–126</pages>
       <url hash="4c521f44">W03-3011</url>
@@ -4183,8 +4183,8 @@
     </paper>
     <paper id="12">
       <title><fixed-case>PACE</fixed-case> — Parser Comparison and Evaluation</title>
-      <author><first>Vladimir</first><last>Kadlec</last></author>
-      <author><first>Pavel</first><last>Smrz</last></author>
+      <author id="vladimir-kadlec"><first>Vladimir</first><last>Kadlec</last></author>
+      <author id="pavel-smrz"><first>Pavel</first><last>Smrz</last></author>
       <abstract>The paper introduces PACE — a parser comparison and evaluation system for the syntactic processing of natural languages. The analysis is based on context free grammar with contextual extensions (constraints). The system is able to manage very large and extremely ambiguous CF grammars. It is independent of the parsing algorithm used. The tool can solve the contextual constraints on the resulting CF structure, select the best parsing trees according to their probabilities, or combine them. We discuss the advantages and disadvantages of our modular design as well as how efficiently it processes the standard evaluation grammars.</abstract>
       <pages>211–212</pages>
       <url hash="6052aaf4">W03-3012</url>
@@ -4194,7 +4194,7 @@
       <title><fixed-case>GLR</fixed-case> Parser with Conditional Action Model using Surface Phrasal Types for <fixed-case>K</fixed-case>orean</title>
       <author><first>Yong-Jae</first><last>Kwak</last></author>
       <author><first>So-Young</first><last>Park</last></author>
-      <author><first>Hae-Chang</first><last>Rim</last></author>
+      <author id="hae-chang-rim"><first>Hae-Chang</first><last>Rim</last></author>
       <abstract>In this paper, we propose a new probabilistic GLR parsing method that can solve the problems of conventional methods. Our proposed Conditional Action Model uses Surface Phrasal Types (SPTs) encoding the functional word sequences of the sub-trees for describing structural characteristics of the partial parse. And, the proposed GLR model outperforms the previous methods by about 6~8%.</abstract>
       <pages>213–214</pages>
       <url hash="7b8fe78f">W03-3013</url>
@@ -4203,7 +4203,7 @@
     <paper id="14">
       <title>Parsing Domain Actions with Phrase-Level Grammars and Memory-Based Learners</title>
       <author><first>Chad</first><last>Langley</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <abstract>In this paper, we describe an approach to analysis for spoken language translation that combines phrase-level grammar-based parsing and automatic domain action classification. The job of the analyzer is to transform utterances into a shallow semantic task-oriented interlingua representation. The goal of our hybrid approach is to provide accurate real-time analyses and to improve robustness and portability to new domains and languages.</abstract>
       <pages>127–136</pages>
       <url hash="4412416f">W03-3014</url>
@@ -4236,7 +4236,7 @@
     </paper>
     <paper id="18">
       <title>Dependency parsing using dependency graph for storing alternative structures</title>
-      <author><first>Tomasz</first><last>Obrebski</last></author>
+      <author id="tomasz-obrebski"><first>Tomasz</first><last>Obrebski</last></author>
       <abstract>In this paper an efficient algorithm for dependency parsing is described in which ambiguous dependency structure of a sentence is represented in the form of a graph. The idea of the algorithm is shortly outlined and some issues as to its time complexity are discussed.</abstract>
       <url hash="36a93ea0">W03-3018</url>
       <bibkey>obrebski-2003-dependency</bibkey>
@@ -4244,7 +4244,7 @@
     <paper id="19">
       <title>Combining Rule-based and Data-driven Techniques for Grammatical Relation Extraction in Spoken Language</title>
       <author><first>Kenji</first><last>Sagae</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <abstract>We investigate an aspect of the relationship between parsing and corpus-based methods in NLP that has received relatively little attention: coverage augmentation in rule-based parsers. In the specific task of determining grammatical relations (such as subjects and objects) in transcribed spoken language, we show that a combination of rule-based and corpus-based approaches, where a rule-based system is used as the teacher (or an automatic data annotator) to a corpus-based system, outperforms either system in isolation.</abstract>
       <url hash="e103fb6f">W03-3019</url>
       <bibkey>sagae-lavie-2003-combining</bibkey>
@@ -4253,7 +4253,7 @@
       <title>Partially Ordered Multiset Context-free Grammars and Free-word-order Parsing</title>
       <author><first>Mark-Jan</first><last>Nederhof</last></author>
       <author><first>Giorgio</first><last>Satta</last></author>
-      <author><first>Stuart</first><last>Shieber</last></author>
+      <author id="stuart-m-shieber"><first>Stuart</first><last>Shieber</last></author>
       <abstract>We present a new formalism, partially ordered multiset context-free grammars (poms-CFG), along with an Earley-style parsing algorithm. The formalism, which can be thought of as a generalization of context-free grammars with partially ordered right-hand sides, is of interest in its own right, and also as infrastructure for obtaining tighter complexity bounds for more expressive context-free formalisms intended to express free or multiple word-order, such as ID/LP grammars. We reduce ID/LP grammars to poms-grammars, thereby getting finer-grained bounds on the parsing complexity of ID/LP grammars. We argue that in practice, the width of attested ID/LP grammars is small, yielding effectively polynomial time complexity for ID/LP grammar parsing.</abstract>
       <pages>171–182</pages>
       <url hash="88a73627">W03-3020</url>
@@ -4261,7 +4261,7 @@
     </paper>
     <paper id="21">
       <title>On maximizing metrics for syntactic disambiguation</title>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <abstract>Given a probabilistic parsing model and an evaluation metric for scoring the match between parse-trees, e.g., PARSEVAL [Black et al., 1991], this paper addresses the problem of how to select the on average best scoring parse-tree for an input sentence. Common wisdom dictates that it is optimal to select the parse with the highest probability, regardless of the evaluation metric. In contrast, the Maximizing Metrics (MM) method [Goodman, 1998, Stolcke et al., 1997] proposes that an algorithm that optimizes the evaluation metric itself constitutes the optimal choice. We study the MM method within parsing. We observe that the MM does not always hold for tree-bank models, and that optimizing weak metrics is not interesting for semantic processing. Subsequently, we state an alternative proposition: the optimal algorithm must maximize the metric that scores parse-trees according to linguistically relevant features. We present new algorithms that optimize metrics that take into account increasingly more linguistic features, and exhibit experiments in support of our claim.</abstract>
       <pages>183–194</pages>
       <url hash="c27caafc">W03-3021</url>
@@ -4273,13 +4273,13 @@
       <author><first>Yong-Jae</first><last>Kwak</last></author>
       <author><first>Hoo-Jung</first><last>Chung</last></author>
       <author><first>Young-Sook</first><last>Hwang</last></author>
-      <author><first>Hae-Chang</first><last>Rim</last></author>
+      <author id="hae-chang-rim"><first>Hae-Chang</first><last>Rim</last></author>
       <bibkey>park-etal-2003-automatic</bibkey>
     </paper>
     <paper id="23">
       <title>Statistical Dependency Analysis with Support Vector Machines</title>
       <author><first>Hiroyasu</first><last>Yamada</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <abstract>In this paper, we propose a method for analyzing word-word dependencies using deterministic bottom-up manner using Support Vector machines. We experimented with dependency trees converted from Penn treebank data, and achieved over 90% accuracy of word-word dependency. Though the result is little worse than the most up-to-date phrase structure based parsers, it looks satisfactorily accurate considering that our parser uses no information from phrase structures.</abstract>
       <pages>195–206</pages>
       <url hash="1f1ee1c7">W03-3023</url>
diff --git a/data/xml/W04.xml b/data/xml/W04.xml
index a5ec08a978..fb2fb5b62e 100644
--- a/data/xml/W04.xml
+++ b/data/xml/W04.xml
@@ -41,7 +41,7 @@
     </paper>
     <paper id="4">
       <title>Automatic Acquisition of Feature-Based Phonotactic Resources</title>
-      <author><first>Julie</first><last>Carson-Berndsen</last></author>
+      <author id="julie-carson-berndsen"><first>Julie</first><last>Carson-Berndsen</last></author>
       <author><first>Robert</first><last>Kelly</last></author>
       <author><first>Moritz</first><last>Neugebauer</last></author>
       <pages>27–34</pages>
@@ -50,7 +50,7 @@
     </paper>
     <paper id="5">
       <title>Priors in <fixed-case>B</fixed-case>ayesian Learning of Phonological Rules</title>
-      <author><first>Sharon</first><last>Goldwater</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
       <author><first>Mark</first><last>Johnson</last></author>
       <pages>35–42</pages>
       <url hash="5a300117">W04-0105</url>
@@ -67,9 +67,9 @@
     <paper id="7">
       <title>Unsupervised Induction of Natural Language Morphology Inflection Classes</title>
       <author><first>Christian</first><last>Monson</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
       <pages>52–61</pages>
       <url hash="b09ee4b8">W04-0107</url>
       <bibkey>monson-etal-2004-unsupervised</bibkey>
@@ -78,8 +78,8 @@
       <title>A Comparison of Two Different Approaches to Morphological Analysis of <fixed-case>D</fixed-case>utch</title>
       <author><first>Guy</first><last>De Pauw</last></author>
       <author><first>Tom</first><last>Laureys</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
-      <author><first>Hugo</first><last>Van hamme</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
+      <author id="hugo-van-hamme"><first>Hugo</first><last>Van hamme</last></author>
       <pages>62–69</pages>
       <url hash="c1003340">W04-0108</url>
       <bibkey>de-pauw-etal-2004-comparison</bibkey>
@@ -122,7 +122,7 @@
     </frontmatter>
     <paper id="1">
       <title>A Framework for Feature based Description of Low level Discourse</title>
-      <author><first>Laura</first><last>Alonso Alemany</last></author>
+      <author id="laura-alonso-alemany"><first>Laura</first><last>Alonso Alemany</last></author>
       <author><first>Ezequiel Andujar</first><last>Hinojosa</last></author>
       <author><first>Robert Sola</first><last>Salvatierra</last></author>
       <pages>1–8</pages>
@@ -131,9 +131,9 @@
     </paper>
     <paper id="2">
       <title><fixed-case>COOPML</fixed-case>: Towards Annotating Cooperative Discourse</title>
-      <author><first>Farah</first><last>Benamara</last></author>
-      <author><first>Veronique</first><last>Moriceau</last></author>
-      <author><first>Patrick</first><last>Saint-Dizier</last></author>
+      <author id="farah-benamara"><first>Farah</first><last>Benamara</last></author>
+      <author id="veronique-moriceau"><first>Veronique</first><last>Moriceau</last></author>
+      <author id="patrick-saint-dizier"><first>Patrick</first><last>Saint-Dizier</last></author>
       <pages>9–16</pages>
       <url hash="dd101fa7">W04-0202</url>
       <bibkey>benamara-etal-2004-coopml</bibkey>
@@ -163,8 +163,8 @@
     </paper>
     <paper id="6">
       <title>Discourse-level Annotation for Investigating Information Structure</title>
-      <author><first>Ivana</first><last>Kruijff-Korbayova</last></author>
-      <author><first>Geert-Jan M.</first><last>Kruijff</last></author>
+      <author id="ivana-kruijff-korbayova"><first>Ivana</first><last>Kruijff-Korbayova</last></author>
+      <author id="geert-jan-m-kruijff"><first>Geert-Jan M.</first><last>Kruijff</last></author>
       <pages>41–48</pages>
       <url hash="1038af28">W04-0206</url>
       <bibkey>kruijff-korbayova-kruijff-2004-discourse</bibkey>
@@ -172,7 +172,7 @@
     <paper id="7">
       <title>Text Type Structure and Logical Document Structure</title>
       <author><first>Hagen</first><last>Langer</last></author>
-      <author><first>Harald</first><last>Lungen</last></author>
+      <author id="harald-lungen"><first>Harald</first><last>Lungen</last></author>
       <author><first>Petra Saskia</first><last>Bayerl</last></author>
       <pages>49–56</pages>
       <url hash="3fb70a77">W04-0207</url>
@@ -181,23 +181,23 @@
     <paper id="8">
       <title>Temporal Discourse Models for Narrative Structure</title>
       <author><first>Inderjeet</first><last>Mani</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <pages>57–64</pages>
       <url hash="3ecaf78b">W04-0208</url>
       <bibkey>mani-pustejovsky-2004-temporal</bibkey>
     </paper>
     <paper id="9">
       <title>Exploiting Semantic Information for Manual Anaphoric Annotation in <fixed-case>C</fixed-case>ast3<fixed-case>LB</fixed-case> Corpus</title>
-      <author><first>Borja</first><last>Navarro</last></author>
-      <author><first>Ruben</first><last>Izquierdo</last></author>
-      <author><first>Maximiliano</first><last>Saiz-Noeda</last></author>
+      <author id="borja-navarro"><first>Borja</first><last>Navarro</last></author>
+      <author id="ruben-izquierdo"><first>Ruben</first><last>Izquierdo</last></author>
+      <author id="maximiliano-saiz-noeda"><first>Maximiliano</first><last>Saiz-Noeda</last></author>
       <pages>65–71</pages>
       <url hash="99fbfc05">W04-0209</url>
       <bibkey>navarro-etal-2004-exploiting</bibkey>
     </paper>
     <paper id="10">
       <title>Discourse Annotation and Semantic Annotation in the <fixed-case>GNOME</fixed-case> corpus</title>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>72–79</pages>
       <url hash="85a4bd53">W04-0210</url>
       <bibkey>poesio-2004-discourse</bibkey>
@@ -205,7 +205,7 @@
     <paper id="11">
       <title>Sentential Structure and Discourse Parsing</title>
       <author><first>Livia</first><last>Polanyi</last></author>
-      <author><first>Chris</first><last>Culy</last></author>
+      <author id="chris-culy"><first>Chris</first><last>Culy</last></author>
       <author><first>Martin</first><last>van den Berg</last></author>
       <author><first>Gian Lorenzo</first><last>Thione</last></author>
       <author><first>David</first><last>Ahn</last></author>
@@ -217,8 +217,8 @@
       <title>Annotation and Data Mining of the <fixed-case>P</fixed-case>enn <fixed-case>D</fixed-case>iscourse <fixed-case>T</fixed-case>ree<fixed-case>B</fixed-case>ank</title>
       <author><first>Rashmi</first><last>Prasad</last></author>
       <author><first>Eleni</first><last>Miltsakaki</last></author>
-      <author><first>Aravind</first><last>Joshi</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <pages>88–95</pages>
       <url hash="f1204b3d">W04-0212</url>
       <bibkey>prasad-etal-2004-annotation</bibkey>
@@ -232,11 +232,11 @@
     </paper>
     <paper id="14">
       <title>Discourse Annotation in the Monroe Corpus</title>
-      <author><first>Joel</first><last>Tetreault</last></author>
-      <author><first>Mary</first><last>Swift</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
+      <author id="mary-swift"><first>Mary</first><last>Swift</last></author>
       <author><first>Preethum</first><last>Prithviraj</last></author>
-      <author><first>Myroslava</first><last>Dzikovska</last></author>
-      <author><first>James</first><last>Allen</last></author>
+      <author id="myroslava-o-dzikovska"><first>Myroslava</first><last>Dzikovska</last></author>
+      <author id="james-allen"><first>James</first><last>Allen</last></author>
       <pages>103–109</pages>
       <url hash="4eb4cc36">W04-0214</url>
       <bibkey>tetreault-etal-2004-discourse</bibkey>
@@ -245,7 +245,7 @@
       <title><fixed-case>L</fixed-case>ive<fixed-case>T</fixed-case>ree: An Integrated Workbench for Discourse Processing</title>
       <author><first>Gian Lorenzo</first><last>Thione</last></author>
       <author><first>Martin</first><last>van den Berg</last></author>
-      <author><first>Chris</first><last>Culy</last></author>
+      <author id="chris-culy"><first>Chris</first><last>Culy</last></author>
       <author><first>Livia</first><last>Polanyi</last></author>
       <pages>110–117</pages>
       <url hash="aa0adc1e">W04-0215</url>
@@ -260,7 +260,7 @@
       <author><first>Andrew</first><last>Koontz-Garboden</last></author>
       <author><first>Tatiana</first><last>Nikitina</last></author>
       <author><first>M. Catherine</first><last>O’Connor</last></author>
-      <author><first>Tom</first><last>Wasow</last></author>
+      <author id="thomas-wasow"><first>Tom</first><last>Wasow</last></author>
       <pages>118–125</pages>
       <url hash="28262d6b">W04-0216</url>
       <bibkey>zaenen-etal-2004-animacy</bibkey>
@@ -282,7 +282,7 @@
     <paper id="1">
       <title>Competence and Performance Grammar in Incremental Processing</title>
       <author><first>Vincenzo</first><last>Lombardo</last></author>
-      <author><first>Alessandro</first><last>Mazzei</last></author>
+      <author id="alessandro-mazzei"><first>Alessandro</first><last>Mazzei</last></author>
       <author><first>Patrick</first><last>Sturt</last></author>
       <pages>1–8</pages>
       <url hash="a7141e35">W04-0301</url>
@@ -290,7 +290,7 @@
     </paper>
     <paper id="2">
       <title>Stochastically Evaluating the Validity of Partial Parse Trees in Incremental Parsing</title>
-      <author><first>Yoshihide</first><last>Kato</last></author>
+      <author id="yoshihide-kato"><first>Yoshihide</first><last>Kato</last></author>
       <author><first>Shigeki</first><last>Matsubara</last></author>
       <author><first>Yasuyoshi</first><last>Inagaki</last></author>
       <pages>9–15</pages>
@@ -306,16 +306,16 @@
     </paper>
     <paper id="4">
       <title>Incremental Parsing with Reference Interaction</title>
-      <author><first>Scott C.</first><last>Stoness</last></author>
-      <author><first>Joel</first><last>Tetreault</last></author>
-      <author><first>James</first><last>Allen</last></author>
+      <author id="scott-c-stoness"><first>Scott C.</first><last>Stoness</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
+      <author id="james-allen"><first>James</first><last>Allen</last></author>
       <pages>18–25</pages>
       <url hash="c6dda52e">W04-0304</url>
       <bibkey>stoness-etal-2004-incremental</bibkey>
     </paper>
     <paper id="5">
       <title>Lookahead in Deterministic Left-Corner Parsing</title>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <pages>26–33</pages>
       <url hash="3b74b3d3">W04-0305</url>
       <bibkey>henderson-2004-lookahead</bibkey>
@@ -330,8 +330,8 @@
     </paper>
     <paper id="7">
       <title>A Statistical Constraint Dependency Grammar (<fixed-case>CDG</fixed-case>) Parser</title>
-      <author><first>Wen</first><last>Wang</last></author>
-      <author><first>Mary P.</first><last>Harper</last></author>
+      <author id="wen-wang"><first>Wen</first><last>Wang</last></author>
+      <author id="mary-harper"><first>Mary P.</first><last>Harper</last></author>
       <pages>42–49</pages>
       <url hash="147c9948">W04-0307</url>
       <bibkey>wang-harper-2004-statistical</bibkey>
@@ -345,7 +345,7 @@
     </paper>
     <paper id="9">
       <title>The Information-Processing Difficulty of Incremental Parsing</title>
-      <author><first>John</first><last>Hale</last></author>
+      <author id="john-hale"><first>John</first><last>Hale</last></author>
       <pages>58–65</pages>
       <url hash="414c2334">W04-0309</url>
       <bibkey>hale-2004-information</bibkey>
@@ -375,7 +375,7 @@
     <paper id="13">
       <title>Modeling Sentence Processing in <fixed-case>ACT</fixed-case>-<fixed-case>R</fixed-case></title>
       <author><first>Shravan</first><last>Vasishth</last></author>
-      <author><first>Richard L.</first><last>Lewis</last></author>
+      <author id="richard-l-lewis"><first>Richard L.</first><last>Lewis</last></author>
       <pages>82–87</pages>
       <url hash="2bb846d4">W04-0313</url>
       <bibkey>vasishth-lewis-2004-modeling</bibkey>
@@ -408,7 +408,7 @@
       <author><first>Atsushi</first><last>Fujita</last></author>
       <author><first>Kentaro</first><last>Furihata</last></author>
       <author><first>Kentaro</first><last>Inui</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <author><first>Koichi</first><last>Takeuchi</last></author>
       <pages>9–16</pages>
       <url hash="ccc9af29">W04-0402</url>
@@ -423,7 +423,7 @@
     </paper>
     <paper id="4">
       <title>Translation by Machine of Complex Nominals: Getting it Right</title>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Takaaki</first><last>Tanaka</last></author>
       <pages>24–31</pages>
       <url hash="24733618">W04-0404</url>
@@ -449,12 +449,12 @@
     </paper>
     <paper id="7">
       <title>Representation and Treatment of Multiword Expressions in <fixed-case>B</fixed-case>asque</title>
-      <author><first>Iñaki</first><last>Alegria</last></author>
-      <author><first>Olatz</first><last>Ansa</last></author>
-      <author><first>Xabier</first><last>Artola</last></author>
-      <author><first>Nerea</first><last>Ezeiza</last></author>
-      <author><first>Koldo</first><last>Gojenola</last></author>
-      <author><first>Ruben</first><last>Urizar</last></author>
+      <author id="inaki-alegria"><first>Iñaki</first><last>Alegria</last></author>
+      <author id="olatz-ansa"><first>Olatz</first><last>Ansa</last></author>
+      <author id="xabier-artola"><first>Xabier</first><last>Artola</last></author>
+      <author id="nerea-ezeiza"><first>Nerea</first><last>Ezeiza</last></author>
+      <author id="koldo-gojenola"><first>Koldo</first><last>Gojenola</last></author>
+      <author id="ruben-urizar"><first>Ruben</first><last>Urizar</last></author>
       <pages>48–55</pages>
       <url hash="709e56f3">W04-0407</url>
       <bibkey>alegria-etal-2004-representation</bibkey>
@@ -469,7 +469,7 @@
     <paper id="9">
       <title>Integrating Morphology with Multi-word Expression Processing in <fixed-case>T</fixed-case>urkish</title>
       <author><first>Kemal</first><last>Oflazer</last></author>
-      <author><first>Özlem</first><last>Çetinoğlu</last></author>
+      <author id="ozlem-cetinoglu"><first>Özlem</first><last>Çetinoğlu</last></author>
       <author><first>Bilge</first><last>Say</last></author>
       <pages>64–71</pages>
       <url hash="5f9a627a">W04-0409</url>
@@ -504,9 +504,9 @@
     </paper>
     <paper id="13">
       <title><fixed-case>NP</fixed-case>-External Arguments: A Study of Argument Sharing in <fixed-case>E</fixed-case>nglish</title>
-      <author><first>Adam</first><last>Meyers</last></author>
+      <author id="adam-meyers"><first>Adam</first><last>Meyers</last></author>
       <author><first>Ruth</first><last>Reeves</last></author>
-      <author><first>Catherine</first><last>Macleod</last></author>
+      <author id="catherine-macleod"><first>Catherine</first><last>Macleod</last></author>
       <pages>96–103</pages>
       <url hash="2ac78f11">W04-0413</url>
       <bibkey>meyers-etal-2004-np</bibkey>
@@ -536,7 +536,7 @@
       <title>Evaluation of Restricted Domain Question-Answering Systems</title>
       <author><first>Anne R.</first><last>Diekema</last></author>
       <author><first>Ozgur</first><last>Yilmazel</last></author>
-      <author><first>Elizabeth D.</first><last>Liddy</last></author>
+      <author id="elizabeth-d-liddy"><first>Elizabeth D.</first><last>Liddy</last></author>
       <pages>2–7</pages>
       <url hash="a0f27089">W04-0502</url>
       <bibkey>diekema-etal-2004-evaluation</bibkey>
@@ -552,7 +552,7 @@
     <paper id="4">
       <title>A Qualitative Comparison of Scientific and Journalistic Texts from the Perspective of Extracting Definitions</title>
       <author><first>Igal</first><last>Gabbay</last></author>
-      <author><first>Richard F.E.</first><last>Sutcliffe</last></author>
+      <author id="richard-f-e-sutcliffe"><first>Richard F.E.</first><last>Sutcliffe</last></author>
       <pages>16–22</pages>
       <url hash="3fba3759">W04-0504</url>
       <bibkey>gabbay-sutcliffe-2004-qualitative</bibkey>
@@ -560,27 +560,27 @@
     <paper id="5">
       <title><fixed-case>B</fixed-case>io<fixed-case>G</fixed-case>rapher: Biography Questions as a Restricted Domain Question Answering Task</title>
       <author><first>Oren</first><last>Tsur</last></author>
-      <author><first>Maarten</first><last>de Rijke</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="maarten-de-rijke"><first>Maarten</first><last>de Rijke</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <pages>23–30</pages>
       <url hash="f037070b">W04-0505</url>
       <bibkey>tsur-etal-2004-biographer</bibkey>
     </paper>
     <paper id="6">
       <title>Cooperative Question Answering in Restricted Domains: the <fixed-case>WEBCOOP</fixed-case> Experiment</title>
-      <author><first>Farah</first><last>Benamara</last></author>
+      <author id="farah-benamara"><first>Farah</first><last>Benamara</last></author>
       <pages>31–38</pages>
       <url hash="5831db25">W04-0506</url>
       <bibkey>benamara-2004-cooperative</bibkey>
     </paper>
     <paper id="7">
       <title>A Practical <fixed-case>QA</fixed-case> System in Restricted Domains</title>
-      <author><first>Hoojung</first><last>Chung</last></author>
+      <author id="hoojung-chung"><first>Hoojung</first><last>Chung</last></author>
       <author><first>Young-In</first><last>Song</last></author>
       <author><first>Kyoung-Soo</first><last>Han</last></author>
       <author><first>Do-Sang</first><last>Yoon</last></author>
-      <author><first>Joo-Young</first><last>Lee</last></author>
-      <author><first>Hae-Chang</first><last>Rim</last></author>
+      <author id="joo-young-lee"><first>Joo-Young</first><last>Lee</last></author>
+      <author id="hae-chang-rim"><first>Hae-Chang</first><last>Rim</last></author>
       <author><first>Soo-Hong</first><last>Kim</last></author>
       <pages>39–45</pages>
       <url hash="708d683a">W04-0507</url>
@@ -591,7 +591,7 @@
       <author><first>Fabio</first><last>Rinaldi</last></author>
       <author><first>James</first><last>Dowdall</last></author>
       <author><first>Gerold</first><last>Schneider</last></author>
-      <author><first>Andreas</first><last>Persidis</last></author>
+      <author id="andreas-persidis"><first>Andreas</first><last>Persidis</last></author>
       <pages>46–53</pages>
       <url hash="c11a82fe">W04-0508</url>
       <bibkey>rinaldi-etal-2004-answering</bibkey>
@@ -620,16 +620,16 @@
     </frontmatter>
     <paper id="1">
       <title>Techniques for Text Planning with <fixed-case>XSLT</fixed-case></title>
-      <author><first>Mary Ellen</first><last>Foster</last></author>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="mary-ellen-foster"><first>Mary Ellen</first><last>Foster</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <pages>1–8</pages>
       <url hash="1c3e113d">W04-0601</url>
       <bibkey>foster-white-2004-techniques</bibkey>
     </paper>
     <paper id="2">
       <title>Towards Metadata Interoperability</title>
-      <author><first>Peter</first><last>Wittenburg</last></author>
-      <author><first>Daan</first><last>Broeder</last></author>
+      <author id="peter-wittenburg"><first>Peter</first><last>Wittenburg</last></author>
+      <author id="daan-broeder"><first>Daan</first><last>Broeder</last></author>
       <author><first>Paul</first><last>Buitelaar</last></author>
       <pages>9–16</pages>
       <url hash="c5c47c73">W04-0602</url>
@@ -639,7 +639,7 @@
       <title>A Web Application using <fixed-case>RDF</fixed-case>/<fixed-case>RDFS</fixed-case> for Metadata Navigation</title>
       <author><first>Xi</first><last>Guo</last></author>
       <author><first>Mark</first><last>Chaudhary</last></author>
-      <author><first>Christopher</first><last>Dozier</last></author>
+      <author id="christopher-dozier"><first>Christopher</first><last>Dozier</last></author>
       <author><first>Yogi</first><last>Arumainayagam</last></author>
       <author><first>Venkatesan</first><last>Subramanian</last></author>
       <pages>17–24</pages>
@@ -649,9 +649,9 @@
     <paper id="4">
       <title>The Semantics of Markup: Mapping Legacy Markup Schemas to a Common Semantics</title>
       <author><first>Gary</first><last>Simons</last></author>
-      <author><first>William</first><last>Lewis</last></author>
+      <author id="william-lewis"><first>William</first><last>Lewis</last></author>
       <author><first>Scott</first><last>Farrar</last></author>
-      <author><first>Terence</first><last>Langendoen</last></author>
+      <author id="d-terence-langendoen"><first>Terence</first><last>Langendoen</last></author>
       <author><first>Brian</first><last>Fitzsimons</last></author>
       <author><first>Hector</first><last>Gonzalez</last></author>
       <pages>25–32</pages>
@@ -687,14 +687,14 @@
       <title>An Extensible Framework for Efficient Document Management using <fixed-case>RDF</fixed-case> and <fixed-case>OWL</fixed-case></title>
       <author><first>Erica</first><last>Meena</last></author>
       <author><first>Ashwani</first><last>Kumar</last></author>
-      <author><first>Laurent</first><last>Romary</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
       <pages>51–58</pages>
       <url hash="3d2795c3">W04-0608</url>
       <bibkey>meena-etal-2004-extensible</bibkey>
     </paper>
     <paper id="9">
       <title>Towards Ontology-based Natural Language Processing</title>
-      <author><first>Dominique</first><last>Estival</last></author>
+      <author id="dominique-estival"><first>Dominique</first><last>Estival</last></author>
       <author><first>Chris</first><last>Nowak</last></author>
       <author><first>Andrew</first><last>Zschorn</last></author>
       <pages>59–66</pages>
@@ -718,14 +718,14 @@
     <paper id="1">
       <title>Multi-Document Person Name Resolution</title>
       <author><first>Michael</first><last>Fleischman</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>1–8</pages>
       <url hash="71348dea">W04-0701</url>
       <bibkey>fleischman-hovy-2004-multi</bibkey>
     </paper>
     <paper id="2">
       <title>Cross Document Co-Reference Resolution Applications for People in the Legal Domain</title>
-      <author><first>Christopher</first><last>Dozier</last></author>
+      <author id="christopher-dozier"><first>Christopher</first><last>Dozier</last></author>
       <author><first>Thomas</first><last>Zielund</last></author>
       <pages>9–16</pages>
       <url hash="9f03e068">W04-0702</url>
@@ -751,7 +751,7 @@
     <paper id="5">
       <title>Applying Coreference to Improve Name Recognition</title>
       <author><first>Heng</first><last>Ji</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <pages>32–39</pages>
       <url hash="cded4684">W04-0705</url>
       <bibkey>ji-grishman-2004-applying</bibkey>
@@ -759,17 +759,17 @@
     <paper id="6">
       <title>Using Word Similarity Lists for Resolving Indirect Anaphora</title>
       <author><first>Caroline</first><last>Gasperin</last></author>
-      <author><first>Renata</first><last>Vieira</last></author>
+      <author id="renata-vieira"><first>Renata</first><last>Vieira</last></author>
       <pages>40–46</pages>
       <url hash="68368a62">W04-0706</url>
       <bibkey>gasperin-vieira-2004-using</bibkey>
     </paper>
     <paper id="7">
       <title>Discourse-New Detectors for Definite Description Resolution: A Survey and a Preliminary Proposal</title>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <author><first>Olga</first><last>Uryupina</last></author>
-      <author><first>Renata</first><last>Vieira</last></author>
-      <author><first>Mijail</first><last>Alexandrov-Kabadjov</last></author>
+      <author id="renata-vieira"><first>Renata</first><last>Vieira</last></author>
+      <author id="mijail-kabadjov"><first>Mijail</first><last>Alexandrov-Kabadjov</last></author>
       <author><first>Rodrigo</first><last>Goulart</last></author>
       <pages>47–54</pages>
       <url hash="6c3d411c">W04-0707</url>
@@ -791,7 +791,7 @@
     </paper>
     <paper id="10">
       <title>Reference Resolution over a Restricted Domain: References to Documents</title>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <author><first>Denis</first><last>Lalanne</last></author>
       <pages>71–78</pages>
       <url hash="fce842a5">W04-0710</url>
@@ -799,8 +799,8 @@
     </paper>
     <paper id="11">
       <title><fixed-case>B</fixed-case>io<fixed-case>AR</fixed-case>: Anaphora Resolution for Relating Protein Names to Proteome Database Entries</title>
-      <author><first>Jung-Jae</first><last>Kim</last></author>
-      <author><first>Jong C.</first><last>Park</last></author>
+      <author id="jung-jae-kim"><first>Jung-Jae</first><last>Kim</last></author>
+      <author id="jong-c-park"><first>Jong C.</first><last>Park</last></author>
       <pages>79–86</pages>
       <url hash="7afadb58">W04-0711</url>
       <bibkey>kim-park-2004-bioar</bibkey>
@@ -808,7 +808,7 @@
     <paper id="12">
       <title>Ellipsis Resolution by Controlled Default Unification for Multi-modal and Speech Dialog Systems</title>
       <author><first>Michael</first><last>Streit</last></author>
-      <author><first>HansUlrich</first><last>Krieger</last></author>
+      <author id="hans-ulrich-krieger"><first>HansUlrich</first><last>Krieger</last></author>
       <pages>87–94</pages>
       <url hash="84033af1">W04-0712</url>
       <bibkey>streit-krieger-2004-ellipsis</bibkey>
@@ -823,7 +823,7 @@
     <paper id="14">
       <title>Topic Identification in <fixed-case>C</fixed-case>hinese Based on Centering Model</title>
       <author><first>Ching-Long</first><last>Yeh</last></author>
-      <author><first>Yi-Chun</first><last>Chen</last></author>
+      <author id="yichun-chen"><first>Yi-Chun</first><last>Chen</last></author>
       <pages>103–109</pages>
       <url hash="150297e7">W04-0714</url>
       <bibkey>yeh-chen-2004-topic</bibkey>
@@ -844,10 +844,10 @@
     </frontmatter>
     <paper id="1">
       <title>The <fixed-case>B</fixed-case>asque lexical-sample task</title>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <author><first>Itziar</first><last>Aldabe</last></author>
-      <author><first>Mikel</first><last>Lersundi</last></author>
-      <author><first>David</first><last>Martínez</last></author>
+      <author id="mikel-lersundi"><first>Mikel</first><last>Lersundi</last></author>
+      <author id="david-martinez"><first>David</first><last>Martínez</last></author>
       <author><first>Eli</first><last>Pociello</last></author>
       <author><first>Larraitz</first><last>Uria</last></author>
       <pages>1–4</pages>
@@ -856,8 +856,8 @@
     </paper>
     <paper id="2">
       <title>The Senseval-3 Multilingual <fixed-case>E</fixed-case>nglish-<fixed-case>H</fixed-case>indi lexical sample task</title>
-      <author><first>Timothy</first><last>Chklovski</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="timothy-chklovski"><first>Timothy</first><last>Chklovski</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <author><first>Ted</first><last>Pedersen</last></author>
       <author><first>Amruta</first><last>Purandare</last></author>
       <pages>5–8</pages>
@@ -880,7 +880,7 @@
     </paper>
     <paper id="5">
       <title>The <fixed-case>I</fixed-case>talian lexical sample task at Senseval-3</title>
-      <author><first>Bernardo</first><last>Magnini</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
       <author><first>Danilo</first><last>Giampiccolo</last></author>
       <author><first>Alessandro</first><last>Vallin</last></author>
       <pages>17–20</pages>
@@ -889,21 +889,21 @@
     </paper>
     <paper id="6">
       <title>Senseval-3: The <fixed-case>S</fixed-case>panish lexical sample task</title>
-      <author><first>Lluis</first><last>Màrquez</last></author>
-      <author><first>Mariona</first><last>Taulé</last></author>
-      <author><first>Antonia</first><last>Martí</last></author>
-      <author><first>Núria</first><last>Artigas</last></author>
-      <author><first>Mar</first><last>García</last></author>
+      <author id="lluis-marquez"><first>Lluis</first><last>Màrquez</last></author>
+      <author id="mariona-taule"><first>Mariona</first><last>Taulé</last></author>
+      <author id="m-antonia-marti"><first>Antonia</first><last>Martí</last></author>
+      <author id="nuria-artigas"><first>Núria</first><last>Artigas</last></author>
+      <author id="mar-garcia"><first>Mar</first><last>García</last></author>
       <author><first>Francis</first><last>Real</last></author>
-      <author><first>Dani</first><last>Ferrés</last></author>
+      <author id="daniel-ferres"><first>Dani</first><last>Ferrés</last></author>
       <pages>21–24</pages>
       <url hash="b6c246c2">W04-0806</url>
       <bibkey>marquez-etal-2004-senseval</bibkey>
     </paper>
     <paper id="7">
       <title>The Senseval-3 <fixed-case>E</fixed-case>nglish lexical sample task</title>
-      <author><first>Rada</first><last>Mihalcea</last></author>
-      <author><first>Timothy</first><last>Chklovski</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
+      <author id="timothy-chklovski"><first>Timothy</first><last>Chklovski</last></author>
       <author><first>Adam</first><last>Kilgarriff</last></author>
       <pages>25–28</pages>
       <url hash="82c7c70f">W04-0807</url>
@@ -911,11 +911,11 @@
     </paper>
     <paper id="8">
       <title>An evaluation exercise for <fixed-case>R</fixed-case>omanian Word Sense Disambiguation</title>
-      <author><first>Rada</first><last>Mihalcea</last></author>
-      <author><first>Vivi</first><last>Năstase</last></author>
-      <author><first>Timothy</first><last>Chklovski</last></author>
-      <author><first>Doina</first><last>Tătar</last></author>
-      <author><first>Dan</first><last>Tufiş</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
+      <author id="vivi-nastase"><first>Vivi</first><last>Năstase</last></author>
+      <author id="timothy-chklovski"><first>Timothy</first><last>Chklovski</last></author>
+      <author id="doina-tatar"><first>Doina</first><last>Tătar</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufiş</last></author>
       <author><first>Florentina</first><last>Hristea</last></author>
       <pages>29–32</pages>
       <url hash="046a84d2">W04-0808</url>
@@ -939,7 +939,7 @@
     <paper id="11">
       <title>The <fixed-case>E</fixed-case>nglish all-words task</title>
       <author><first>Benjamin</first><last>Snyder</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>41–43</pages>
       <url hash="f5aa3165">W04-0811</url>
       <bibkey>snyder-palmer-2004-english</bibkey>
@@ -949,14 +949,14 @@
       <author><first>Marisa</first><last>Ulivieri</last></author>
       <author><first>Elisabetta</first><last>Guazzini</last></author>
       <author><first>Francesca</first><last>Bertagna</last></author>
-      <author><first>Nicoletta</first><last>Calzolari</last></author>
+      <author id="nicoletta-calzolari"><first>Nicoletta</first><last>Calzolari</last></author>
       <url hash="b4be2fab">W04-0812</url>
       <bibkey>ulivieri-etal-2004-senseval</bibkey>
     </paper>
     <paper id="13">
       <title>The <fixed-case>B</fixed-case>asque Country University system: <fixed-case>E</fixed-case>nglish and <fixed-case>B</fixed-case>asque tasks</title>
-      <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>David</first><last>Martínez</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
+      <author id="david-martinez"><first>David</first><last>Martínez</last></author>
       <pages>44–48</pages>
       <url hash="643c32a8">W04-0813</url>
       <bibkey>agirre-martinez-2004-basque</bibkey>
@@ -964,9 +964,9 @@
     <paper id="14">
       <title>The <fixed-case>U</fixed-case>niversity of <fixed-case>A</fixed-case>msterdam at Senseval-3: Semantic roles and Logic forms</title>
       <author><first>David</first><last>Ahn</last></author>
-      <author><first>Sisay</first><last>Fissaha</last></author>
+      <author id="sisay-fissaha-adafre"><first>Sisay</first><last>Fissaha</last></author>
       <author><first>Valentin</first><last>Jijkoun</last></author>
-      <author><first>Maarten</first><last>De Rijke</last></author>
+      <author id="maarten-de-rijke"><first>Maarten</first><last>De Rijke</last></author>
       <pages>49–53</pages>
       <url hash="5b05e3af">W04-0814</url>
       <bibkey>ahn-etal-2004-university</bibkey>
@@ -974,7 +974,7 @@
     <paper id="15">
       <title>Dependency based logical form transformations</title>
       <author><first>Stephen</first><last>Anthony</last></author>
-      <author><first>Jon</first><last>Patrick</last></author>
+      <author id="jon-patrick"><first>Jon</first><last>Patrick</last></author>
       <pages>54–57</pages>
       <url hash="0e594aa9">W04-0815</url>
       <bibkey>anthony-patrick-2004-dependency</bibkey>
@@ -982,7 +982,7 @@
     <paper id="16">
       <title>Word Sense Disambiguation based on term to term similarity in a context space</title>
       <author><first>Javier</first><last>Artiles</last></author>
-      <author><first>Anselmo</first><last>Penas</last></author>
+      <author id="anselmo-penas"><first>Anselmo</first><last>Penas</last></author>
       <author><first>Felisa</first><last>Verdejo</last></author>
       <pages>58–63</pages>
       <url hash="aecfae16">W04-0816</url>
@@ -992,7 +992,7 @@
       <title>Semantic role labelling with similarity-based generalization using <fixed-case>EM</fixed-case>-based clustering</title>
       <author><first>Ulrike</first><last>Baldewein</last></author>
       <author><first>Katrin</first><last>Erk</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <author><first>Detlef</first><last>Prescher</last></author>
       <pages>64–68</pages>
       <url hash="32a447fe">W04-0817</url>
@@ -1000,21 +1000,21 @@
     </paper>
     <paper id="18">
       <title>The <fixed-case>MITRE</fixed-case> logical form generation system</title>
-      <author><first>Samuel</first><last>Bayer</last></author>
+      <author id="samuel-bayer"><first>Samuel</first><last>Bayer</last></author>
       <author id="john-d-burger"><first>John</first><last>Burger</last></author>
       <author><first>John</first><last>Greiff</last></author>
-      <author><first>Ben</first><last>Wellner</last></author>
+      <author id="ben-wellner"><first>Ben</first><last>Wellner</last></author>
       <pages>69–72</pages>
       <url hash="80fec1ff">W04-0818</url>
       <bibkey>bayer-etal-2004-mitre</bibkey>
     </paper>
     <paper id="19">
       <title>Semantic parsing based on <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et</title>
-      <author><first>Cosmin Adrian</first><last>Bejan</last></author>
+      <author id="cosmin-adrian-bejan"><first>Cosmin Adrian</first><last>Bejan</last></author>
       <author><first>Alessandro</first><last>Moschitti</last></author>
-      <author><first>Paul</first><last>Morărescu</last></author>
+      <author id="paul-morarescu"><first>Paul</first><last>Morărescu</last></author>
       <author><first>Gabriel</first><last>Nicolae</last></author>
-      <author><first>Sanda</first><last>Harabagiu</last></author>
+      <author id="sanda-harabagiu"><first>Sanda</first><last>Harabagiu</last></author>
       <pages>73–76</pages>
       <url hash="b445b9f4">W04-0819</url>
       <bibkey>bejan-etal-2004-semantic</bibkey>
@@ -1051,7 +1051,7 @@
       <author><first>Mauro</first><last>Castillo</last></author>
       <author><first>Francis</first><last>Real</last></author>
       <author><first>Jordi</first><last>Asterias</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <pages>93–96</pages>
       <url hash="c789630c">W04-0823</url>
       <bibkey>castillo-etal-2004-talp</bibkey>
@@ -1073,7 +1073,7 @@
     </paper>
     <paper id="26">
       <title><fixed-case>UBBNBC</fixed-case> <fixed-case>WSD</fixed-case> system description</title>
-      <author><first>András</first><last>Csomai</last></author>
+      <author id="andras-csomai"><first>András</first><last>Csomai</last></author>
       <pages>105–107</pages>
       <url hash="a4731862">W04-0826</url>
       <bibkey>csomai-2004-ubbnbc</bibkey>
@@ -1081,9 +1081,9 @@
     <paper id="27">
       <title><fixed-case>GAMBL</fixed-case>, genetic algorithm optimization of memory-based <fixed-case>WSD</fixed-case></title>
       <author><first>Bart</first><last>Decadt</last></author>
-      <author><first>Véronique</first><last>Hoste</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="veronique-hoste"><first>Véronique</first><last>Hoste</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <pages>108–112</pages>
       <url hash="156417cf">W04-0827</url>
       <bibkey>decadt-etal-2004-gambl</bibkey>
@@ -1091,25 +1091,25 @@
     <paper id="28">
       <title><fixed-case>TALP</fixed-case> system for the <fixed-case>E</fixed-case>nglish lexical sample task</title>
       <author><first>Gerard</first><last>Escudero</last></author>
-      <author><first>Lluis</first><last>Màrquez</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="lluis-marquez"><first>Lluis</first><last>Màrquez</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <pages>113–116</pages>
       <url hash="72f0e618">W04-0828</url>
       <bibkey>escudero-etal-2004-talp</bibkey>
     </paper>
     <paper id="29">
       <title><fixed-case>WSD</fixed-case> based on mutual information and syntactic patterns</title>
-      <author><first>David</first><last>Férnandez-Amorós</last></author>
+      <author id="david-fernandez-amoros"><first>David</first><last>Férnandez-Amorós</last></author>
       <pages>117–120</pages>
       <url hash="bad12022">W04-0829</url>
       <bibkey>fernandez-amoros-2004-wsd</bibkey>
     </paper>
     <paper id="30">
       <title>The <fixed-case>U</fixed-case>niversity of <fixed-case>J</fixed-case>aén Word Sense Disambiguation system</title>
-      <author><first>Manuel</first><last>García-Vega</last></author>
-      <author><first>Miguel</first><last>García-Cumbreras</last></author>
-      <author><first>M. Teresa</first><last>Martín-Valdivia</last></author>
-      <author><first>L. Alfonso</first><last>Urena-López</last></author>
+      <author id="manuel-garcia-vega"><first>Manuel</first><last>García-Vega</last></author>
+      <author id="miguel-angel-garcia-cumbreras"><first>Miguel</first><last>García-Cumbreras</last></author>
+      <author id="m-teresa-martin-valdivia"><first>M. Teresa</first><last>Martín-Valdivia</last></author>
+      <author id="l-alfonso-urena-lopez"><first>L. Alfonso</first><last>Urena-López</last></author>
       <pages>121–124</pages>
       <url hash="9e3efc61">W04-0830</url>
       <bibkey>garcia-vega-etal-2004-university</bibkey>
@@ -1125,7 +1125,7 @@
       <title>Senseval automatic labeling of semantic roles using Maximum Entropy models</title>
       <author><first>Namhee</first><last>Kwon</last></author>
       <author><first>Michael</first><last>Fleischman</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>129–132</pages>
       <url hash="01f830f3">W04-0832</url>
       <bibkey>kwon-etal-2004-senseval</bibkey>
@@ -1141,7 +1141,7 @@
     </paper>
     <paper id="34">
       <title>Supervised Word Sense Disambiguation with Support Vector Machines and multiple knowledge sources</title>
-      <author><first>Yoong Keok</first><last>Lee</last></author>
+      <author id="yoong-keok-lee"><first>Yoong Keok</first><last>Lee</last></author>
       <author><first>Hwee Tou</first><last>Ng</last></author>
       <author><first>Tee Kiah</first><last>Chia</last></author>
       <pages>137–140</pages>
@@ -1157,29 +1157,29 @@
     </paper>
     <paper id="36">
       <title>Senseval-3: The <fixed-case>C</fixed-case>atalan lexical sample task</title>
-      <author><first>Lluis</first><last>Màrquez</last></author>
-      <author><first>Mariona</first><last>Taulé</last></author>
-      <author><first>Antonia</first><last>Martí</last></author>
-      <author><first>Mar</first><last>García</last></author>
+      <author id="lluis-marquez"><first>Lluis</first><last>Màrquez</last></author>
+      <author id="mariona-taule"><first>Mariona</first><last>Taulé</last></author>
+      <author id="m-antonia-marti"><first>Antonia</first><last>Martí</last></author>
+      <author id="mar-garcia"><first>Mar</first><last>García</last></author>
       <author><first>Francis</first><last>Real</last></author>
-      <author><first>Dani</first><last>Ferrés</last></author>
+      <author id="daniel-ferres"><first>Dani</first><last>Ferrés</last></author>
       <pages>147–150</pages>
       <url hash="b1c01a08">W04-0836</url>
       <bibkey>marquez-etal-2004-senseval-3</bibkey>
     </paper>
     <paper id="37">
       <title>Using automatically acquired predominant senses for Word Sense Disambiguation</title>
-      <author><first>Diana</first><last>McCarthy</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
       <author><first>Rob</first><last>Koeling</last></author>
       <author><first>Julie</first><last>Weeds</last></author>
-      <author><first>John</first><last>Carroll</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
       <pages>151–154</pages>
       <url hash="3d459616">W04-0837</url>
       <bibkey>mccarthy-etal-2004-using</bibkey>
     </paper>
     <paper id="38">
       <title><fixed-case>S</fixed-case>ense<fixed-case>L</fixed-case>earner: Minimally supervised Word Sense Disambiguation for all words in open text</title>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <author><first>Ehsanul</first><last>Faruque</last></author>
       <pages>155–158</pages>
       <url hash="8dd65f74">W04-0838</url>
@@ -1187,7 +1187,7 @@
     </paper>
     <paper id="39">
       <title>Complementarity of lexical and simple syntactic features: The <fixed-case>S</fixed-case>ynta<fixed-case>L</fixed-case>ex approach to Senseval-3</title>
-      <author><first>Saif</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
       <author><first>Ted</first><last>Pedersen</last></author>
       <pages>159–162</pages>
       <url hash="a9df2c9f">W04-0839</url>
@@ -1196,7 +1196,7 @@
     <paper id="40">
       <title>Senseval-3 logic forms: A system and possible improvements</title>
       <author><first>Altaf</first><last>Mohammed</last></author>
-      <author><first>Dan</first><last>Moldovan</last></author>
+      <author id="dan-moldovan"><first>Dan</first><last>Moldovan</last></author>
       <author><first>Paul</first><last>Parker</last></author>
       <pages>163–166</pages>
       <url hash="cdbc53e5">W04-0840</url>
@@ -1204,8 +1204,8 @@
     </paper>
     <paper id="41">
       <title><fixed-case>SVM</fixed-case> classification of <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et semantic roles</title>
-      <author><first>Dan</first><last>Moldovan</last></author>
-      <author><first>Roxana</first><last>Gîrju</last></author>
+      <author id="dan-moldovan"><first>Dan</first><last>Moldovan</last></author>
+      <author id="roxana-girju"><first>Roxana</first><last>Gîrju</last></author>
       <author><first>Marian</first><last>Olteanu</last></author>
       <author><first>Ovidiu</first><last>Fortu</last></author>
       <pages>167–170</pages>
@@ -1223,10 +1223,10 @@
     </paper>
     <paper id="43">
       <title>Using a Word Sense Disambiguation system for translation disambiguation: the <fixed-case>LIA</fixed-case>-<fixed-case>LIDILEM</fixed-case> team experiment</title>
-      <author><first>Grégoire</first><last>Moreau de Montcheuil</last></author>
-      <author><first>Marc</first><last>El-Bèze</last></author>
+      <author id="gregoire-moreau-de-montcheuil"><first>Grégoire</first><last>Moreau de Montcheuil</last></author>
+      <author id="marc-el-beze"><first>Marc</first><last>El-Bèze</last></author>
       <author><first>Boxing</first><last>Chen</last></author>
-      <author><first>Olivier</first><last>Kraif</last></author>
+      <author id="olivier-kraif"><first>Olivier</first><last>Kraif</last></author>
       <pages>175–178</pages>
       <url hash="d0ae07cc">W04-0843</url>
       <bibkey>moreau-de-montcheuil-etal-2004-using</bibkey>
@@ -1234,7 +1234,7 @@
     <paper id="44">
       <title>Structural semantic interconnection: a knowledge-based approach to Word Sense Disambiguation</title>
       <author><first>Roberto</first><last>Navigli</last></author>
-      <author><first>Paola</first><last>Velardi</last></author>
+      <author id="paola-velardi"><first>Paola</first><last>Velardi</last></author>
       <pages>179–182</pages>
       <url hash="67258207">W04-0844</url>
       <bibkey>navigli-velardi-2004-structural</bibkey>
@@ -1244,7 +1244,7 @@
       <author><first>Grace</first><last>Ngai</last></author>
       <author><first>Dekai</first><last>Wu</last></author>
       <author><first>Marine</first><last>Carpuat</last></author>
-      <author><first>Chi-Shing</first><last>Wang</last></author>
+      <author id="chi-shing-wang"><first>Chi-Shing</first><last>Wang</last></author>
       <author><first>Chi-Yung</first><last>Wang</last></author>
       <pages>183–186</pages>
       <url hash="332dc338">W04-0845</url>
@@ -1254,8 +1254,8 @@
       <title>Context clustering for Word Sense Disambiguation based on modeling pairwise context similarities</title>
       <author><first>Cheng</first><last>Niu</last></author>
       <author><first>Wei</first><last>Li</last></author>
-      <author><first>Rohini K.</first><last>Srihari</last></author>
-      <author><first>Huifeng</first><last>Li</last></author>
+      <author id="rohini-k-srihari"><first>Rohini K.</first><last>Srihari</last></author>
+      <author id="huifeng-li"><first>Huifeng</first><last>Li</last></author>
       <author><first>Laurie</first><last>Crist</last></author>
       <pages>187–190</pages>
       <url hash="039a681f">W04-0846</url>
@@ -1263,9 +1263,9 @@
     </paper>
     <paper id="47">
       <title>Optimizing feature set for <fixed-case>C</fixed-case>hinese Word Sense Disambiguation</title>
-      <author><first>Zheng-Yu</first><last>Niu</last></author>
-      <author><first>Dong-Hong</first><last>Ji</last></author>
-      <author><first>Chew-Lim</first><last>Tan</last></author>
+      <author id="zheng-yu-niu"><first>Zheng-Yu</first><last>Niu</last></author>
+      <author id="donghong-ji"><first>Dong-Hong</first><last>Ji</last></author>
+      <author id="chew-lim-tan"><first>Chew-Lim</first><last>Tan</last></author>
       <pages>191–194</pages>
       <url hash="f51b3c70">W04-0847</url>
       <bibkey>niu-etal-2004-optimizing</bibkey>
@@ -1273,9 +1273,9 @@
     <paper id="48">
       <title><fixed-case>LCC</fixed-case>’s <fixed-case>WSD</fixed-case> systems for Senseval-3</title>
       <author><first>Adrian</first><last>Novischi</last></author>
-      <author><first>Dan</first><last>Moldovan</last></author>
+      <author id="dan-moldovan"><first>Dan</first><last>Moldovan</last></author>
       <author><first>Paul</first><last>Parker</last></author>
-      <author><first>Adriana</first><last>Bădulescu</last></author>
+      <author id="adriana-badulescu"><first>Adriana</first><last>Bădulescu</last></author>
       <author><first>Bob</first><last>Hauser</last></author>
       <pages>195–198</pages>
       <url hash="eff412ab">W04-0848</url>
@@ -1284,9 +1284,9 @@
     <paper id="49">
       <title>Class-based collocations for Word Sense Disambiguation</title>
       <author><first>Tom</first><last>O’Hara</last></author>
-      <author><first>Rebecca</first><last>Bruce</last></author>
+      <author id="rebecca-bruce"><first>Rebecca</first><last>Bruce</last></author>
       <author><first>Jeff</first><last>Donner</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <pages>199–202</pages>
       <url hash="b03d2742">W04-0849</url>
       <bibkey>ohara-etal-2004-class</bibkey>
@@ -1316,7 +1316,7 @@
       <title>A gloss-centered algorithm for disambiguation</title>
       <author><first>Ganesh</first><last>Ramakrishnan</last></author>
       <author><first>B.</first><last>Prithviraj</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>217–221</pages>
       <url hash="4af4482e">W04-0853</url>
       <bibkey>ramakrishnan-etal-2004-gloss</bibkey>
@@ -1324,7 +1324,7 @@
     <paper id="54">
       <title><fixed-case>KUNLP</fixed-case> system in Senseval-3</title>
       <author><first>Hee-Cheol</first><last>Seo</last></author>
-      <author><first>Hae-Chang</first><last>Rim</last></author>
+      <author id="hae-chang-rim"><first>Hae-Chang</first><last>Rim</last></author>
       <author><first>Soo-Hong</first><last>Kim</last></author>
       <pages>222–225</pages>
       <url hash="bb3a2ac1">W04-0854</url>
@@ -1332,8 +1332,8 @@
     </paper>
     <paper id="55">
       <title><fixed-case>UBB</fixed-case> system at Senseval-3</title>
-      <author><first>Gabriela</first><last>Şerban</last></author>
-      <author><first>Doina</first><last>Tătar</last></author>
+      <author id="gabriela-serban"><first>Gabriela</first><last>Şerban</last></author>
+      <author id="doina-tatar"><first>Doina</first><last>Tătar</last></author>
       <pages>226–228</pages>
       <url hash="913cfc25">W04-0855</url>
       <bibkey>serban-tatar-2004-ubb</bibkey>
@@ -1341,8 +1341,8 @@
     <paper id="56">
       <title>Pattern abstraction and term similarity for Word Sense Disambiguation: <fixed-case>IRST</fixed-case> at Senseval-3</title>
       <author><first>Carlo</first><last>Strapparava</last></author>
-      <author><first>Alfio</first><last>Gliozzo</last></author>
-      <author><first>Claudio</first><last>Giuliano</last></author>
+      <author id="alfio-gliozzo"><first>Alfio</first><last>Gliozzo</last></author>
+      <author id="claudio-giuliano"><first>Claudio</first><last>Giuliano</last></author>
       <pages>229–234</pages>
       <url hash="bada371d">W04-0856</url>
       <bibkey>strapparava-etal-2004-pattern</bibkey>
@@ -1358,48 +1358,48 @@
     </paper>
     <paper id="58">
       <title>Word Sense Disambiguation by Web mining for word co-occurrence probabilities</title>
-      <author><first>Peter</first><last>Turney</last></author>
+      <author id="peter-turney"><first>Peter</first><last>Turney</last></author>
       <pages>239–242</pages>
       <url hash="f9dc0f10">W04-0858</url>
       <bibkey>turney-2004-word</bibkey>
     </paper>
     <paper id="59">
       <title>The <fixed-case>U</fixed-case>niversity of <fixed-case>A</fixed-case>licante systems at Senseval-3</title>
-      <author><first>Sonia</first><last>Vázquez</last></author>
+      <author id="sonia-vazquez"><first>Sonia</first><last>Vázquez</last></author>
       <author><first>Rafael</first><last>Romero</last></author>
-      <author><first>Armando</first><last>Suárez</last></author>
-      <author><first>Andrés</first><last>Montoyo</last></author>
+      <author id="armando-suarez"><first>Armando</first><last>Suárez</last></author>
+      <author id="andres-montoyo"><first>Andrés</first><last>Montoyo</last></author>
       <author><first>Iulia</first><last>Nica</last></author>
-      <author><first>Antonia</first><last>Martí</last></author>
+      <author id="m-antonia-marti"><first>Antonia</first><last>Martí</last></author>
       <pages>243–247</pages>
       <url hash="3cfc79d2">W04-0859</url>
       <bibkey>vazquez-etal-2004-university</bibkey>
     </paper>
     <paper id="60">
       <title>The <fixed-case>R</fixed-case>2<fixed-case>D</fixed-case>2 team at Senseval-3</title>
-      <author><first>Sonia</first><last>Vázquez</last></author>
+      <author id="sonia-vazquez"><first>Sonia</first><last>Vázquez</last></author>
       <author><first>Rafael</first><last>Romero</last></author>
-      <author><first>Armando</first><last>Suárez</last></author>
-      <author><first>Andrés</first><last>Montoyo</last></author>
-      <author><first>Manuel</first><last>García</last></author>
-      <author><first>M. Teresa</first><last>Martín</last></author>
-      <author><first>M. Ángel</first><last>García</last></author>
-      <author><first>L. Alfonso</first><last>Urena</last></author>
+      <author id="armando-suarez"><first>Armando</first><last>Suárez</last></author>
+      <author id="andres-montoyo"><first>Andrés</first><last>Montoyo</last></author>
+      <author id="manuel-garcia-vega"><first>Manuel</first><last>García</last></author>
+      <author id="m-teresa-martin-valdivia"><first>M. Teresa</first><last>Martín</last></author>
+      <author id="miguel-angel-garcia-cumbreras"><first>M. Ángel</first><last>García</last></author>
+      <author id="l-alfonso-urena-lopez"><first>L. Alfonso</first><last>Urena</last></author>
       <pages>248–252</pages>
       <url hash="1b27df0f">W04-0860</url>
       <bibkey>vazquez-etal-2004-r2d2</bibkey>
     </paper>
     <paper id="61">
       <title>The “Meaning” system on the <fixed-case>E</fixed-case>nglish all-words task</title>
-      <author><first>Luís</first><last>Villarejo</last></author>
-      <author><first>Lluis</first><last>Màrquez</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>David</first><last>Martínez</last></author>
-      <author><first>Bernardo</first><last>Magnini</last></author>
+      <author id="luis-villarejo"><first>Luís</first><last>Villarejo</last></author>
+      <author id="lluis-marquez"><first>Lluis</first><last>Màrquez</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
+      <author id="david-martinez"><first>David</first><last>Martínez</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
       <author><first>Carlo</first><last>Strapparava</last></author>
-      <author><first>Diana</first><last>McCarthy</last></author>
-      <author><first>Andrés</first><last>Montoyo</last></author>
-      <author><first>Armando</first><last>Suárez</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
+      <author id="andres-montoyo"><first>Andrés</first><last>Montoyo</last></author>
+      <author id="armando-suarez"><first>Armando</first><last>Suárez</last></author>
       <pages>253–256</pages>
       <url hash="a3eb651b">W04-0861</url>
       <bibkey>villarejo-etal-2004-meaning</bibkey>
@@ -1457,8 +1457,8 @@
       <title>Solving logic puzzles: From robust processing to precise semantics</title>
       <author><first>Iddo</first><last>Lev</last></author>
       <author><first>Bill</first><last>MacCartney</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
-      <author><first>Roger</first><last>Levy</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
       <pages>9–16</pages>
       <url hash="0e13233e">W04-0902</url>
       <bibkey>lev-etal-2004-solving</bibkey>
@@ -1474,7 +1474,7 @@
       <title><fixed-case>O</fixed-case>nto<fixed-case>S</fixed-case>em and <fixed-case>SIMPLE</fixed-case>: Two multi-lingual world views</title>
       <author><first>Marjorie</first><last>McShane</last></author>
       <author><first>Margalit</first><last>Zabludowski</last></author>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <author><first>Stephen</first><last>Beale</last></author>
       <pages>25–32</pages>
       <url hash="82658067">W04-0904</url>
@@ -1482,7 +1482,7 @@
     </paper>
     <paper id="5">
       <title>Evaluating the performance of the <fixed-case>O</fixed-case>nto<fixed-case>S</fixed-case>em semantic analyzer</title>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <author><first>Stephen</first><last>Beale</last></author>
       <author><first>Marjorie</first><last>McShane</last></author>
       <pages>33–40</pages>
@@ -1492,9 +1492,9 @@
     <paper id="6">
       <title>Question answering using ontological semantics</title>
       <author><first>Stephen</first><last>Beale</last></author>
-      <author><first>Benoit</first><last>Lavoie</last></author>
+      <author id="benoit-lavoie"><first>Benoit</first><last>Lavoie</last></author>
       <author><first>Marjorie</first><last>McShane</last></author>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <author><first>Tanya</first><last>Korelsky</last></author>
       <pages>41–48</pages>
       <url hash="527dd7bd">W04-0906</url>
@@ -1502,7 +1502,7 @@
     </paper>
     <paper id="7">
       <title>Making sense of <fixed-case>J</fixed-case>apanese relative clause constructions</title>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>49–56</pages>
       <url hash="850096da">W04-0907</url>
       <bibkey>baldwin-2004-making</bibkey>
@@ -1520,7 +1520,7 @@
     <paper id="9">
       <title>Inducing a semantic frame lexicon from <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et data</title>
       <author><first>Rebecca</first><last>Green</last></author>
-      <author><first>Bonnie</first><last>Dorr</last></author>
+      <author id="bonnie-dorr"><first>Bonnie</first><last>Dorr</last></author>
       <pages>65–72</pages>
       <url hash="7ba00c8f">W04-0909</url>
       <bibkey>green-dorr-2004-inducing</bibkey>
@@ -1537,7 +1537,7 @@
     <paper id="11">
       <title>Lexical-semantic interpretation of language input in mathematical dialogs</title>
       <author><first>Magdalena</first><last>Wolska</last></author>
-      <author><first>Ivana</first><last>Kruijff-Korbayová</last></author>
+      <author id="ivana-kruijff-korbayova"><first>Ivana</first><last>Kruijff-Korbayová</last></author>
       <author><first>Helmut</first><last>Horacek</last></author>
       <pages>81–88</pages>
       <url hash="38c51c67">W04-0911</url>
@@ -1552,7 +1552,7 @@
     </paper>
     <paper id="13">
       <title>Text Understanding with <fixed-case>GETARUNS</fixed-case> for <fixed-case>Q</fixed-case>/A and Summarization</title>
-      <author><first>Rodolfo</first><last>Delmonte</last></author>
+      <author id="rodolfo-delmonte"><first>Rodolfo</first><last>Delmonte</last></author>
       <pages>97–104</pages>
       <url hash="1f61e990">W04-0913</url>
       <bibkey>delmonte-2004-text</bibkey>
@@ -1560,7 +1560,7 @@
     <paper id="14">
       <title>Semantic forensics: An application of ontological semantics to information assurance</title>
       <author><first>Victor</first><last>Raskin</last></author>
-      <author><first>Christian F.</first><last>Hempelmann</last></author>
+      <author id="christian-f-hempelmann"><first>Christian F.</first><last>Hempelmann</last></author>
       <author><first>Katrina E.</first><last>Triezenberg</last></author>
       <pages>105–112</pages>
       <url hash="5175d3a4">W04-0914</url>
@@ -1596,10 +1596,10 @@
     </paper>
     <paper id="2">
       <title>Extending Document Summarization to Information Graphics</title>
-      <author><first>Sandra</first><last>Carberry</last></author>
+      <author id="sandra-carberry"><first>Sandra</first><last>Carberry</last></author>
       <author><first>Stephanie</first><last>Elzer</last></author>
-      <author><first>Nancy</first><last>Green</last></author>
-      <author><first>Kathleen</first><last>McCoy</last></author>
+      <author id="nancy-green"><first>Nancy</first><last>Green</last></author>
+      <author id="kathleen-f-mccoy"><first>Kathleen</first><last>McCoy</last></author>
       <author><first>Daniel</first><last>Chester</last></author>
       <pages>3–9</pages>
       <url hash="e354069c">W04-1002</url>
@@ -1624,7 +1624,7 @@
     <paper id="5">
       <title>Vocabulary Usage in Newswire Summaries</title>
       <author><first>Terry</first><last>Copeck</last></author>
-      <author><first>Stan</first><last>Szpakowicz</last></author>
+      <author id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></author>
       <pages>19–26</pages>
       <url hash="0cfe92ec">W04-1005</url>
       <bibkey>copeck-szpakowicz-2004-vocabulary</bibkey>
@@ -1647,8 +1647,8 @@
     </paper>
     <paper id="8">
       <title>Task-Focused Summarization of Email</title>
-      <author><first>Simon</first><last>Corston-Oliver</last></author>
-      <author><first>Eric</first><last>Ringger</last></author>
+      <author id="simon-corston-oliver"><first>Simon</first><last>Corston-Oliver</last></author>
+      <author id="eric-ringger"><first>Eric</first><last>Ringger</last></author>
       <author><first>Michael</first><last>Gamon</last></author>
       <author><first>Richard</first><last>Campbell</last></author>
       <pages>43–50</pages>
@@ -1660,7 +1660,7 @@
       <author><first>Gian Lorenzo</first><last>Thione</last></author>
       <author><first>Martin</first><last>van den Berg</last></author>
       <author><first>Livia</first><last>Polanyi</last></author>
-      <author><first>Chris</first><last>Culy</last></author>
+      <author id="chris-culy"><first>Chris</first><last>Culy</last></author>
       <pages>51–55</pages>
       <url hash="645e383e">W04-1009</url>
       <bibkey>thione-etal-2004-hybrid</bibkey>
@@ -1668,14 +1668,14 @@
     <paper id="10">
       <title>Template-Filtered Headline Summarization</title>
       <author><first>Liang</first><last>Zhou</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>56–60</pages>
       <url hash="3ed0c6b4">W04-1010</url>
       <bibkey>zhou-hovy-2004-template</bibkey>
     </paper>
     <paper id="11">
       <title>Handling Figures in Document Summarization</title>
-      <author><first>Robert P.</first><last>Futrelle</last></author>
+      <author id="robert-p-futrelle"><first>Robert P.</first><last>Futrelle</last></author>
       <pages>61–65</pages>
       <url hash="0996f546">W04-1011</url>
       <bibkey>futrelle-2004-handling</bibkey>
@@ -1691,7 +1691,7 @@
     </paper>
     <paper id="13">
       <title><fixed-case>ROUGE</fixed-case>: A Package for Automatic Evaluation of Summaries</title>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <pages>74–81</pages>
       <url hash="9b536b10">W04-1013</url>
       <bibkey>lin-2004-rouge</bibkey>
@@ -1715,7 +1715,7 @@
     </paper>
     <paper id="16">
       <title>Generic Sentence Fusion is an Ill-Defined Summarization Task</title>
-      <author><first>Hal</first><last>Daume III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daume III</last></author>
       <author><first>Daniel</first><last>Marcu</last></author>
       <pages>96–103</pages>
       <url hash="2a9fe7c4">W04-1016</url>
@@ -1733,7 +1733,7 @@
       <title><fixed-case>C</fixed-case>hinese Text Summarization Based on Thematic Area Detection</title>
       <author><first>Po</first><last>Hu</last></author>
       <author><first>Tingting</first><last>He</last></author>
-      <author><first>Donghong</first><last>Ji</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
       <pages>112–119</pages>
       <url hash="40d24ad3">W04-1018</url>
       <bibkey>hu-etal-2004-chinese</bibkey>
@@ -1772,8 +1772,8 @@
     </paper>
     <paper id="3">
       <title>Document Re-ranking based on Global and Local Terms</title>
-      <author><first>Lingpeng</first><last>Yang</last></author>
-      <author><first>DongHong</first><last>Ji</last></author>
+      <author id="lingpeng-yang"><first>Lingpeng</first><last>Yang</last></author>
+      <author id="donghong-ji"><first>DongHong</first><last>Ji</last></author>
       <author><first>Li</first><last>Tang</last></author>
       <pages>17–23</pages>
       <url hash="ffd3959d">W04-1103</url>
@@ -1781,7 +1781,7 @@
     </paper>
     <paper id="4">
       <title>Adaptive Compression-based Approach for <fixed-case>C</fixed-case>hinese <fixed-case>P</fixed-case>inyin Input</title>
-      <author><first>JinHu</first><last>Huang</last></author>
+      <author id="jin-hu-huang"><first>JinHu</first><last>Huang</last></author>
       <author><first>David</first><last>Powers</last></author>
       <pages>24–27</pages>
       <url hash="511a7dc4">W04-1104</url>
@@ -1799,7 +1799,7 @@
     </paper>
     <paper id="6">
       <title>Character-Sense Association and Compounding Template Similarity: Automatic Semantic Classification of <fixed-case>C</fixed-case>hinese Compounds</title>
-      <author><first>Chao-Jan</first><last>Chen</last></author>
+      <author id="chao-jan-chen"><first>Chao-Jan</first><last>Chen</last></author>
       <pages>33–40</pages>
       <url hash="8ceae28d">W04-1106</url>
       <bibkey>chen-2004-character</bibkey>
@@ -1807,7 +1807,7 @@
     <paper id="7">
       <title><fixed-case>C</fixed-case>hinese Chunking with Another Type of Spec</title>
       <author><first>Hongqiao</first><last>Li</last></author>
-      <author><first>Changning</first><last>Huang</last></author>
+      <author id="changning-huang"><first>Changning</first><last>Huang</last></author>
       <author><first>Jianfeng</first><last>Gao</last></author>
       <author><first>Xiaozhong</first><last>Fan</last></author>
       <pages>41–48</pages>
@@ -1825,9 +1825,9 @@
     </paper>
     <paper id="9">
       <title><fixed-case>C</fixed-case>hinese Word Segmentation by Classification of Characters</title>
-      <author><first>Chooi-Ling</first><last>Goh</last></author>
+      <author id="chooi-ling-goh"><first>Chooi-Ling</first><last>Goh</last></author>
       <author><first>Masayuki</first><last>Asahara</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>57–64</pages>
       <url hash="ba5b9858">W04-1109</url>
       <bibkey>goh-etal-2004-chinese</bibkey>
@@ -1837,7 +1837,7 @@
       <author><first>Jui-Feng</first><last>Yeh</last></author>
       <author><first>Chung-Hsien</first><last>Wu</last></author>
       <author><first>Ming-Jun</first><last>Chen</last></author>
-      <author><first>Liang-chih</first><last>Yu</last></author>
+      <author id="liang-chih-yu"><first>Liang-chih</first><last>Yu</last></author>
       <pages>65–71</pages>
       <url hash="86bc8a24">W04-1110</url>
       <bibkey>yeh-etal-2004-automated-alignment</bibkey>
@@ -1846,7 +1846,7 @@
       <title>A Statistical Model for Hangeul-Hanja Conversion in Terminology Domain</title>
       <author><first>Jin-Xia</first><last>Huang</last></author>
       <author><first>Sun-Mee</first><last>Bae</last></author>
-      <author><first>Key-sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-sun</first><last>Choi</last></author>
       <pages>72–78</pages>
       <url hash="42f5a5b4">W04-1111</url>
       <bibkey>huang-etal-2004-statistical</bibkey>
@@ -1863,7 +1863,7 @@
     <paper id="13">
       <title>Using Synonym Relations in <fixed-case>C</fixed-case>hinese Collocation Extraction</title>
       <author><first>Wanyin</first><last>Li</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <author><first>Ruifeng</first><last>Xu</last></author>
       <pages>86–93</pages>
       <url hash="7d549f88">W04-1113</url>
@@ -1872,7 +1872,7 @@
     <paper id="14">
       <title>The Construction of A <fixed-case>C</fixed-case>hinese Shallow Treebank</title>
       <author><first>Ruifeng</first><last>Xu</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <author><first>Yin</first><last>Li</last></author>
       <author><first>Wanyin</first><last>Li</last></author>
       <pages>94–101</pages>
@@ -1881,7 +1881,7 @@
     </paper>
     <paper id="15">
       <title>Combining Prosodic and Text Features for Segmentation of <fixed-case>M</fixed-case>andarin Broadcast News</title>
-      <author><first>Gina-Anne</first><last>Levow</last></author>
+      <author id="gina-anne-levow"><first>Gina-Anne</first><last>Levow</last></author>
       <pages>102–108</pages>
       <url hash="75ad8946">W04-1115</url>
       <bibkey>levow-2004-combining</bibkey>
@@ -1889,7 +1889,7 @@
     <paper id="16">
       <title>Automatic Semantic Role Assignment for a Tree Structure</title>
       <author><first>Jia-Ming</first><last>You</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <pages>109–115</pages>
       <url hash="03cd2c77">W04-1116</url>
       <bibkey>you-chen-2004-automatic</bibkey>
@@ -1897,8 +1897,8 @@
     <paper id="17">
       <title>A Large-Scale Semantic Structure for <fixed-case>C</fixed-case>hinese Sentences</title>
       <author><first>Li</first><last>Tang</last></author>
-      <author><first>Donghong</first><last>Ji</last></author>
-      <author><first>Lingpeng</first><last>Yang</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
+      <author id="lingpeng-yang"><first>Lingpeng</first><last>Yang</last></author>
       <pages>116–121</pages>
       <url hash="6e54d729">W04-1117</url>
       <bibkey>tang-etal-2004-large</bibkey>
@@ -1907,7 +1907,7 @@
       <title>Do We Need <fixed-case>C</fixed-case>hinese Word Segmentation for Statistical Machine Translation?</title>
       <author><first>Jia</first><last>Xu</last></author>
       <author><first>Richard</first><last>Zens</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>122–128</pages>
       <url hash="e079e220">W04-1118</url>
       <bibkey>xu-etal-2004-need</bibkey>
@@ -1932,7 +1932,7 @@
     </paper>
     <paper id="21">
       <title>Aligning Bilingual Corpora Using Sentences Location Information</title>
-      <author><first>Weigang</first><last>Li</last></author>
+      <author id="weigang-li"><first>Weigang</first><last>Li</last></author>
       <author><first>Ting</first><last>Liu</last></author>
       <author><first>Zhen</first><last>Wang</last></author>
       <author><first>Sheng</first><last>Li</last></author>
@@ -1954,7 +1954,7 @@
       <booktitle>Proceedings of the International Joint Workshop on Natural Language Processing in Biomedicine and its Applications (<fixed-case>NLPBA</fixed-case>/<fixed-case>B</fixed-case>io<fixed-case>NLP</fixed-case>)</booktitle>
       <editor><first>Nigel</first><last>Collier</last></editor>
       <editor><first>Patrick</first><last>Ruch</last></editor>
-      <editor><first>Adeline</first><last>Nazarenko</last></editor>
+      <editor id="adeline-nazarenko"><first>Adeline</first><last>Nazarenko</last></editor>
       <publisher>COLING</publisher>
       <address>Geneva, Switzerland</address>
       <month>August 28th and 29th</month>
@@ -1967,7 +1967,7 @@
     </frontmatter>
     <paper id="1">
       <title>Recognizing Names in Biomedical Texts using Hidden <fixed-case>M</fixed-case>arkov Model and <fixed-case>SVM</fixed-case> plus Sigmoid</title>
-      <author><first>GuoDong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>GuoDong</first><last>Zhou</last></author>
       <pages>1–7</pages>
       <url hash="b7b9629a">W04-1201</url>
       <bibkey>zhou-2004-recognizing</bibkey>
@@ -2026,18 +2026,18 @@
     </paper>
     <paper id="7">
       <title>Event-Based Information Extraction for the Biomedical Domain: the Caderige Project</title>
-      <author><first>Erick</first><last>Alphonse</last></author>
+      <author id="erick-alphonse"><first>Erick</first><last>Alphonse</last></author>
       <author><first>Sophie</first><last>Aubin</last></author>
       <author><first>Philippe</first><last>Bessières</last></author>
       <author><first>Gilles</first><last>Bisson</last></author>
-      <author><first>Thierry</first><last>Hamon</last></author>
+      <author id="thierry-hamon"><first>Thierry</first><last>Hamon</last></author>
       <author><first>Sandrine</first><last>Lagarrigue</last></author>
       <author><first>Adeline</first><last>Nazarenko</last></author>
       <author><first>Alain-Pierre</first><last>Manine</last></author>
-      <author><first>Claire</first><last>Nédellec</last></author>
+      <author id="claire-nedellec"><first>Claire</first><last>Nédellec</last></author>
       <author><first>Mohamed Ould Abdel</first><last>Vetah</last></author>
       <author><first>Thierry</first><last>Poibeau</last></author>
-      <author><first>Davy</first><last>Weissenbacher</last></author>
+      <author id="davy-weissenbacher"><first>Davy</first><last>Weissenbacher</last></author>
       <pages>43–49</pages>
       <url hash="3e4000e0">W04-1207</url>
       <bibkey>alphonse-etal-2004-event</bibkey>
@@ -2045,7 +2045,7 @@
     <paper id="8">
       <title>Distributed Modules for Text Annotation and <fixed-case>IE</fixed-case> Applied to the Biomedical Domain</title>
       <author><first>Harald</first><last>Kirsch</last></author>
-      <author><first>Dietrich</first><last>Rebholz-Schuhmann</last></author>
+      <author id="dietrich-rebholz-schuhmann"><first>Dietrich</first><last>Rebholz-Schuhmann</last></author>
       <pages>50–56</pages>
       <url hash="81ec08bc">W04-1208</url>
       <bibkey>kirsch-rebholz-schuhmann-2004-distributed</bibkey>
@@ -2053,7 +2053,7 @@
     <paper id="9">
       <title>Support Vector Machine Approach to Extracting Gene References into Function from Biological Documents</title>
       <author><first>Chih</first><last>Lee</last></author>
-      <author><first>Wen-Juan</first><last>Hou</last></author>
+      <author id="wen-juan-hou"><first>Wen-Juan</first><last>Hou</last></author>
       <author><first>Hsin-Hsi</first><last>Chen</last></author>
       <pages>57–60</pages>
       <url hash="4ebfdee0">W04-1209</url>
@@ -2070,9 +2070,9 @@
     </paper>
     <paper id="11">
       <title>Creating a Test Corpus of Clinical Notes Manually Tagged for Part-of-Speech Information</title>
-      <author><first>Serguei</first><last>Pakhomov</last></author>
+      <author id="serguei-pakhomov"><first>Serguei</first><last>Pakhomov</last></author>
       <author><first>Anni</first><last>Coden</last></author>
-      <author><first>Christopher</first><last>Chute</last></author>
+      <author id="christopher-chute"><first>Christopher</first><last>Chute</last></author>
       <pages>65–68</pages>
       <url hash="35f3da6f">W04-1211</url>
       <bibkey>pakhomov-etal-2004-creating</bibkey>
@@ -2080,7 +2080,7 @@
     <paper id="12">
       <title>Classification from Full Text: A Comparison of Canonical Sections of Scientific Papers</title>
       <author><first>Gail</first><last>Sinclair</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <pages>69–72</pages>
       <url hash="8dae2f8d">W04-1212</url>
       <bibkey>sinclair-webber-2004-classification</bibkey>
@@ -2090,7 +2090,7 @@
       <author><first>Nigel</first><last>Collier</last></author>
       <author><first>Tomoko</first><last>Ohta</last></author>
       <author><first>Yoshimasa</first><last>Tsuruoka</last></author>
-      <author><first>Yuka</first><last>Tateisi</last></author>
+      <author id="yuka-tateisi"><first>Yuka</first><last>Tateisi</last></author>
       <author><first>Jin-Dong</first><last>Kim</last></author>
       <pages>73–78</pages>
       <url hash="90440c2b">W04-1213</url>
@@ -2102,7 +2102,7 @@
       <author><first>Seon-Ho</first><last>Kim</last></author>
       <author><first>Ki-Joong</first><last>Lee</last></author>
       <author><first>Do-Gil</first><last>Lee</last></author>
-      <author><first>Hae-Chang</first><last>Rim</last></author>
+      <author id="hae-chang-rim"><first>Hae-Chang</first><last>Rim</last></author>
       <pages>79–82</pages>
       <url hash="2b2aa23f">W04-1214</url>
       <bibkey>park-etal-2004-incorporating</bibkey>
@@ -2110,7 +2110,7 @@
     <paper id="15">
       <title>Annotating Multiple Types of Biomedical Entities: A Single Word Classification Approach</title>
       <author><first>Chih</first><last>Lee</last></author>
-      <author><first>Wen-Juan</first><last>Hou</last></author>
+      <author id="wen-juan-hou"><first>Wen-Juan</first><last>Hou</last></author>
       <author><first>Hsin-Hsi</first><last>Chen</last></author>
       <pages>83–86</pages>
       <url hash="0f96000c">W04-1215</url>
@@ -2125,11 +2125,11 @@
     </paper>
     <paper id="17">
       <title>Exploiting Context for Biomedical Entity Recognition: From Syntax to the Web</title>
-      <author><first>Jenny</first><last>Finkel</last></author>
+      <author id="jenny-rose-finkel"><first>Jenny</first><last>Finkel</last></author>
       <author><first>Shipra</first><last>Dingare</last></author>
       <author id="huy-nguyen-stanford"><first>Huy</first><last>Nguyen</last></author>
       <author><first>Malvina</first><last>Nissim</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <author><first>Gail</first><last>Sinclair</last></author>
       <pages>91–94</pages>
       <url hash="351dc48f">W04-1217</url>
@@ -2144,7 +2144,7 @@
     </paper>
     <paper id="19">
       <title>Exploring Deep Knowledge Resources in Biomedical Name Recognition</title>
-      <author><last>Zhou</last><first>GuoDong</first></author>
+      <author id="guodong-zhou"><first>GuoDong</first><last>Zhou</last></author>
       <author><last>Su</last><first>Jian</first></author>
       <pages>99–102</pages>
       <url hash="e7162f4f">W04-1219</url>
@@ -2154,7 +2154,7 @@
       <title><fixed-case>POSBIOTM</fixed-case>-<fixed-case>NER</fixed-case> in the Shared Task of <fixed-case>B</fixed-case>io<fixed-case>NLP</fixed-case>/<fixed-case>NLPBA</fixed-case>2004</title>
       <author><first>Yu</first><last>Song</last></author>
       <author><first>Eunju</first><last>Kim</last></author>
-      <author><first>Gary Geunbae</first><last>Lee</last></author>
+      <author id="gary-geunbae-lee"><first>Gary Geunbae</first><last>Lee</last></author>
       <author><first>Byoung-kee</first><last>Yi</last></author>
       <pages>103–106</pages>
       <url hash="fbf8442a">W04-1220</url>
@@ -2191,7 +2191,7 @@
     </paper>
     <paper id="2">
       <title>On Statistical Parameter Setting</title>
-      <author><first>Damir</first><last>Ćavar</last></author>
+      <author id="damir-cavar"><first>Damir</first><last>Ćavar</last></author>
       <author><first>Joshua</first><last>Herring</last></author>
       <author><first>Toshikazu</first><last>Ikuta</last></author>
       <author><first>Paul</first><last>Rodrigues</last></author>
@@ -2232,7 +2232,7 @@
     <paper id="7">
       <title>Statistics Learning and Universal Grammar: Modeling Word Segmentation</title>
       <author><first>Timothy</first><last>Gambell</last></author>
-      <author><first>Charles</first><last>Yang</last></author>
+      <author id="charles-yang"><first>Charles</first><last>Yang</last></author>
       <pages>51–54</pages>
       <url hash="fa2e43db">W04-1307</url>
       <bibkey>gambell-yang-2004-statistics</bibkey>
@@ -2249,8 +2249,8 @@
     <paper id="9">
       <title>A Computational Model of Emergent Simple Syntax: Supporting the Natural Transition from the One-Word Stage to the Two-Word Stage</title>
       <author><first>Kris</first><last>Jack</last></author>
-      <author><first>Chris</first><last>Reed</last></author>
-      <author><first>Annalu</first><last>Waller</last></author>
+      <author id="chris-reed"><first>Chris</first><last>Reed</last></author>
+      <author id="annalu-waller"><first>Annalu</first><last>Waller</last></author>
       <pages>63–70</pages>
       <url hash="a7c8b079">W04-1309</url>
       <bibkey>jack-etal-2004-computational</bibkey>
@@ -2265,7 +2265,7 @@
     </paper>
     <paper id="11">
       <title>Some Tests of an Unsupervised Model of Language Acquisition</title>
-      <author><first>Bo</first><last>Pedersen</last></author>
+      <author id="bolette-sandford-pedersen"><first>Bo</first><last>Pedersen</last></author>
       <author><first>Shimon</first><last>Edelman</last></author>
       <author><first>Zach</first><last>Solan</last></author>
       <author><first>David</first><last>Horn</last></author>
@@ -2311,20 +2311,20 @@
     </paper>
     <paper id="2">
       <title><fixed-case>CESTA</fixed-case>: Machine Translation Evaluation Campaign [Work-in-Progress Project Report]</title>
-      <author><first>Widad</first><last>Mustafa El Hadi</last></author>
-      <author><first>Marianne</first><last>Dabbadie</last></author>
-      <author><first>Ismaïl</first><last>Timimi</last></author>
-      <author><first>Martin</first><last>Rajman</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="widad-mustafa-el-hadi"><first>Widad</first><last>Mustafa El Hadi</last></author>
+      <author id="marianne-dabbadie"><first>Marianne</first><last>Dabbadie</last></author>
+      <author id="ismail-timimi"><first>Ismaïl</first><last>Timimi</last></author>
+      <author id="martin-rajman"><first>Martin</first><last>Rajman</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <author><first>Antony</first><last>Hartley</last></author>
-      <author><first>Andrei</first><last>Popescu Belis</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu Belis</last></author>
       <pages>8–17</pages>
       <url hash="ae70e6e6">W04-1402</url>
       <bibkey>mustafa-el-hadi-etal-2004-cesta</bibkey>
     </paper>
     <paper id="3">
       <title>Language Resources and Localisation</title>
-      <author><first>Reinhard</first><last>Schäler</last></author>
+      <author id="reinhard-schaler"><first>Reinhard</first><last>Schäler</last></author>
       <pages>18–25</pages>
       <url hash="eb57fc39">W04-1403</url>
       <bibkey>schaler-2004-language</bibkey>
@@ -2401,7 +2401,7 @@
     <paper id="3">
       <title>A Simple String-Rewriting Formalism for Dependency Grammar</title>
       <author><first>Alexis</first><last>Nasr</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>17–24</pages>
       <url hash="d504dde8">W04-1503</url>
       <bibkey>nasr-rambow-2004-simple</bibkey>
@@ -2443,8 +2443,8 @@
     </paper>
     <paper id="8">
       <title>Some Notes on Generative Capacity of Dependency Grammar</title>
-      <author><first>Tomasz</first><last>Obrebski</last></author>
-      <author><first>Filip</first><last>Gralinski</last></author>
+      <author id="tomasz-obrebski"><first>Tomasz</first><last>Obrebski</last></author>
+      <author id="filip-gralinski"><first>Filip</first><last>Gralinski</last></author>
       <pages>57–63</pages>
       <url hash="97d9db18">W04-1508</url>
       <bibkey>obrebski-gralinski-2004-notes</bibkey>
@@ -2460,16 +2460,16 @@
       <title>Extensible Dependency Grammar: A New Methodology</title>
       <author><first>Ralph</first><last>Debusmann</last></author>
       <author><first>Denys</first><last>Duchier</last></author>
-      <author><first>Geert-Jan M.</first><last>Kruijff</last></author>
+      <author id="geert-jan-m-kruijff"><first>Geert-Jan M.</first><last>Kruijff</last></author>
       <pages>70–76</pages>
       <url hash="e303140b">W04-1510</url>
       <bibkey>debusmann-etal-2004-extensible</bibkey>
     </paper>
     <paper id="11">
       <title>From a Surface Analysis to a Dependency Structure</title>
-      <author><first>Luisa</first><last>Coheur</last></author>
-      <author><first>Nuno</first><last>Mamede</last></author>
-      <author><first>Gabriel G.</first><last>Bes</last></author>
+      <author id="luisa-coheur"><first>Luisa</first><last>Coheur</last></author>
+      <author id="nuno-mamede"><first>Nuno</first><last>Mamede</last></author>
+      <author id="gabriel-g-bes"><first>Gabriel G.</first><last>Bes</last></author>
       <pages>77–81</pages>
       <url hash="9efd2837">W04-1511</url>
       <bibkey>coheur-etal-2004-surface</bibkey>
@@ -2484,7 +2484,7 @@
     <paper id="13">
       <title>Synchronous Dependency Insertion Grammars: A Grammar Formalism for Syntax Based Statistical <fixed-case>MT</fixed-case></title>
       <author><first>Yuan</first><last>Ding</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>90–97</pages>
       <url hash="e9d0e79f">W04-1513</url>
       <bibkey>ding-palmer-2004-synchronous</bibkey>
@@ -2512,7 +2512,7 @@
     </paper>
     <paper id="2">
       <title>Developing an <fixed-case>A</fixed-case>rabic Treebank: Methods, Guidelines, Procedures, and Tools</title>
-      <author><first>Mohamed</first><last>Maamouri</last></author>
+      <author id="mohamed-maamouri"><first>Mohamed</first><last>Maamouri</last></author>
       <author><first>Ann</first><last>Bies</last></author>
       <pages>2–9</pages>
       <url hash="6b56a65b">W04-1602</url>
@@ -2527,7 +2527,7 @@
     </paper>
     <paper id="4">
       <title>The Architecture of a <fixed-case>S</fixed-case>tandard <fixed-case>A</fixed-case>rabic Lexical Database. Some Figures, Ratios and Categories from the <fixed-case>DIINAR</fixed-case>.1 Source Program</title>
-      <author><first>Ramzi</first><last>Abbès</last></author>
+      <author id="ramzi-abbes"><first>Ramzi</first><last>Abbès</last></author>
       <author><first>Joseph</first><last>Dichy</last></author>
       <author><first>Mohamed</first><last>Hassoun</last></author>
       <pages>15–22</pages>
@@ -2565,7 +2565,7 @@
     </paper>
     <paper id="9">
       <title>An Unsupervised Approach for Bootstrapping <fixed-case>A</fixed-case>rabic Sense Tagging</title>
-      <author><first>Mona T.</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona T.</first><last>Diab</last></author>
       <pages>43–50</pages>
       <url hash="de4a9eac">W04-1609</url>
       <bibkey>diab-2004-unsupervised</bibkey>
@@ -2582,8 +2582,8 @@
     <paper id="11">
       <title>A Transcription Scheme for Languages Employing the <fixed-case>A</fixed-case>rabic Script Motivated by Speech Processing Applications</title>
       <author><first>Shadi</first><last>Ganjavi</last></author>
-      <author><first>Panayiotis G.</first><last>Georgiou</last></author>
-      <author><first>Shrikanth</first><last>Narayanan</last></author>
+      <author id="panayiotis-georgiou"><first>Panayiotis G.</first><last>Georgiou</last></author>
+      <author id="shrikanth-narayanan"><first>Shrikanth</first><last>Narayanan</last></author>
       <pages>59–65</pages>
       <url hash="481a0438">W04-1611</url>
       <bibkey>ganjavi-etal-2004-transcription</bibkey>
@@ -2628,7 +2628,7 @@
     <paper id="17">
       <title>Language Weaver <fixed-case>A</fixed-case>rabic-&gt;<fixed-case>E</fixed-case>nglish <fixed-case>MT</fixed-case></title>
       <author><first>Daniel</first><last>Marcu</last></author>
-      <author><first>Alex</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alex</first><last>Fraser</last></author>
       <author><first>William</first><last>Wong</last></author>
       <author><first>Kevin</first><last>Knight</last></author>
       <pages>89–89</pages>
@@ -2651,14 +2651,14 @@
     </frontmatter>
     <paper id="1">
       <title>Integrating Natural Language Processing into <fixed-case>E</fixed-case>-Learning - A Case of <fixed-case>C</fixed-case>zech</title>
-      <author><first>Pavel</first><last>Smrž</last></author>
+      <author id="pavel-smrz"><first>Pavel</first><last>Smrž</last></author>
       <pages>1–10</pages>
       <url hash="44523ffe">W04-1701</url>
       <bibkey>smrz-2004-integrating</bibkey>
     </paper>
     <paper id="2">
       <title><fixed-case>NLP</fixed-case> serving the cause of language learning</title>
-      <author><first>Frédérique</first><last>Segond</last></author>
+      <author id="frederique-segond"><first>Frédérique</first><last>Segond</last></author>
       <author><first>Thibault</first><last>Parmentier</last></author>
       <pages>11–17</pages>
       <url hash="a6750989">W04-1702</url>
@@ -2686,7 +2686,7 @@
     </paper>
     <paper id="5">
       <title>Indexing Student Essays Paragraphs Using <fixed-case>LSA</fixed-case> Over an Integrated Ontological Space</title>
-      <author><first>Gaston G.</first><last>Burek</last></author>
+      <author id="gaston-burek"><first>Gaston G.</first><last>Burek</last></author>
       <author><first>Maria</first><last>Vargas-Vera</last></author>
       <author><first>Emanuela</first><last>Moreale</last></author>
       <pages>32–37</pages>
@@ -2697,7 +2697,7 @@
       <title><fixed-case>E</fixed-case>-Assessment using Latent Semantic Analysis in the Computer Science Domain: A Pilot Study</title>
       <author><first>Pete</first><last>Thomas</last></author>
       <author><first>Debra</first><last>Haley</last></author>
-      <author><first>Anne</first><last>deRoeck</last></author>
+      <author id="anne-de-roeck"><first>Anne</first><last>deRoeck</last></author>
       <author><first>Marian</first><last>Petre</last></author>
       <pages>38–44</pages>
       <url hash="00a87ebd">W04-1706</url>
@@ -2711,7 +2711,7 @@
       <author><first>Milena</first><last>Yankova</last></author>
       <author><first>Svetla</first><last>Boytcheva</last></author>
       <author><first>Irena</first><last>Vitanova</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>45–52</pages>
       <url hash="225235c6">W04-1707</url>
       <bibkey>angelova-etal-2004-towards</bibkey>
@@ -2720,7 +2720,7 @@
       <title>Automatic Measuring of <fixed-case>E</fixed-case>nglish Language Proficiency using <fixed-case>MT</fixed-case> Evaluation Technology</title>
       <author><first>Keiji</first><last>Yasuda</last></author>
       <author><first>Fumiaki</first><last>Sugaya</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Toshiyuki</first><last>Takezawa</last></author>
       <author><first>Genichiro</first><last>Kikui</last></author>
       <author><first>Seiichi</first><last>Yamamoto</last></author>
@@ -2748,8 +2748,8 @@
       <author><first>Katrina</first><last>Keogh</last></author>
       <author><first>Thomas</first><last>Koller</last></author>
       <author><first>Monica</first><last>Ward</last></author>
-      <author><first>Elaine</first><last>Uí Dhonnchadha</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="elaine-ui-dhonnchadha"><first>Elaine</first><last>Uí Dhonnchadha</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>79–85</pages>
       <url hash="c509b04c">W04-1711</url>
       <bibkey>keogh-etal-2004-cl</bibkey>
@@ -2789,14 +2789,14 @@
     </frontmatter>
     <paper id="1">
       <title>A Lexico-semantic Approach to the Structuring of Terminology</title>
-      <author><first>Marie-Claude</first><last>L’Homme</last></author>
+      <author id="marie-claude-lhomme"><first>Marie-Claude</first><last>L’Homme</last></author>
       <pages>7–14</pages>
       <url hash="71382f05">W04-1801</url>
       <bibkey>lhomme-2004-lexico</bibkey>
     </paper>
     <paper id="2">
       <title>Metalinguistic Information Extraction for Terminology</title>
-      <author><first>Carlos</first><last>Rodríguez Penagos</last></author>
+      <author id="carlos-rodriguez-penagos"><first>Carlos</first><last>Rodríguez Penagos</last></author>
       <pages>15–22</pages>
       <url hash="9cbb51ad">W04-1802</url>
       <bibkey>rodriguez-penagos-2004-metalinguistic</bibkey>
@@ -2821,7 +2821,7 @@
     <paper id="5">
       <title>Discovering Specific Semantic Relationships between Nouns and Verbs in a Specialized <fixed-case>F</fixed-case>rench Corpus</title>
       <author><first>Vincent</first><last>Claveau</last></author>
-      <author><first>Marie-Claude</first><last>L’Homme</last></author>
+      <author id="marie-claude-lhomme"><first>Marie-Claude</first><last>L’Homme</last></author>
       <pages>39–46</pages>
       <url hash="4f70007a">W04-1805</url>
       <bibkey>claveau-lhomme-2004-discovering</bibkey>
@@ -2829,8 +2829,8 @@
     <paper id="6">
       <title>Automatically Inducing Ontologies from Corpora</title>
       <author><first>Inderjeet</first><last>Mani</last></author>
-      <author><first>Ken</first><last>Samuel</last></author>
-      <author><first>Kris</first><last>Concepcion</last></author>
+      <author id="ken-samuel"><first>Ken</first><last>Samuel</last></author>
+      <author id="kristian-concepcion"><first>Kris</first><last>Concepcion</last></author>
       <author><first>David</first><last>Vogel</last></author>
       <pages>47–54</pages>
       <url hash="715da976">W04-1806</url>
@@ -2839,7 +2839,7 @@
     <paper id="7">
       <title>Detecting Semantic Relations between Terms in Definitions</title>
       <author><first>Véronique</first><last>Malaisé</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <author><first>Bruno</first><last>Bachimont</last></author>
       <pages>55–62</pages>
       <url hash="8dec8023">W04-1807</url>
@@ -2847,7 +2847,7 @@
     </paper>
     <paper id="8">
       <title>Discovering Synonyms and Other Related Words</title>
-      <author><first>Krister</first><last>Lindén</last></author>
+      <author id="krister-linden"><first>Krister</first><last>Lindén</last></author>
       <author><first>Jussi</first><last>Piitulainen</last></author>
       <pages>63–70</pages>
       <url hash="8bd158e3">W04-1808</url>
@@ -2889,7 +2889,7 @@
     <paper id="13">
       <title>Determining the Specificity of Terms based on Information Theoretic Measures</title>
       <author><first>Pum-Mo</first><last>Ryu</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <pages>87–90</pages>
       <url hash="cb06e920">W04-1813</url>
       <bibkey>ryu-choi-2004-determining</bibkey>
@@ -2898,8 +2898,8 @@
       <title>Construction of Grammar Based Term Extraction Model for <fixed-case>J</fixed-case>apanese</title>
       <author><first>Koichi</first><last>Takeuchi</last></author>
       <author><first>Kyo</first><last>Kageura</last></author>
-      <author><first>Béatrice</first><last>Daille</last></author>
-      <author><first>Laurent</first><last>Romary</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
       <pages>91–94</pages>
       <url hash="e09ddd88">W04-1814</url>
       <bibkey>takeuchi-etal-2004-construction</bibkey>
@@ -2908,7 +2908,7 @@
   <volume id="19" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 5th International Workshop on Linguistically Interpreted Corpora</booktitle>
-      <editor><first>Silvia</first><last>Hansen-Schirra</last></editor>
+      <editor id="silvia-hansen-schirra"><first>Silvia</first><last>Hansen-Schirra</last></editor>
       <editor><first>Stephan</first><last>Oepen</last></editor>
       <editor><first>Hans</first><last>Uszkoreit</last></editor>
       <publisher>COLING</publisher>
@@ -2940,7 +2940,7 @@
       <title>Inflectional Syncretism and Corpora</title>
       <author><first>Dunstan</first><last>Brown</last></author>
       <author><first>Carole</first><last>Tiberius</last></author>
-      <author><first>Greville G.</first><last>Corbett</last></author>
+      <author id="greville-c-corbett"><first>Greville G.</first><last>Corbett</last></author>
       <pages>11–18</pages>
       <url hash="73e2b61f">W04-1902</url>
       <bibkey>brown-etal-2004-inflectional</bibkey>
@@ -2949,7 +2949,7 @@
       <title>The <fixed-case>S</fixed-case>zeged Corpus. A <fixed-case>POS</fixed-case> Tagged and Syntactically Annotated <fixed-case>H</fixed-case>ungarian Natural Language Corpus</title>
       <author><first>Dóra</first><last>Csendes</last></author>
       <author><first>János</first><last>Csirik</last></author>
-      <author><first>Tibor</first><last>Gyimóthy</last></author>
+      <author id="tibor-gyimothy"><first>Tibor</first><last>Gyimóthy</last></author>
       <pages>19–22</pages>
       <url hash="c5a6dab4">W04-1903</url>
       <bibkey>csendes-etal-2004-szeged</bibkey>
@@ -2966,7 +2966,7 @@
     <paper id="5">
       <title>Towards a Dependency-Based Gold Standard for <fixed-case>G</fixed-case>erman Parsers. The <fixed-case>TIGER</fixed-case> Dependency Bank</title>
       <author><first>Martin</first><last>Forst</last></author>
-      <author><first>Núria</first><last>Bertomeu</last></author>
+      <author id="nuria-bertomeu"><first>Núria</first><last>Bertomeu</last></author>
       <author><first>Berthold</first><last>Crysmann</last></author>
       <author><first>Frederik</first><last>Fouvry</last></author>
       <author><first>Silvia</first><last>Hansen-Schirra</last></author>
@@ -2978,7 +2978,7 @@
     <paper id="6">
       <title>Corpus-based Induction of an <fixed-case>LFG</fixed-case> Syntax-Semantics Interface for Frame Semantic Processing</title>
       <author><first>Anette</first><last>Frank</last></author>
-      <author><first>Jirí</first><last>Semecky</last></author>
+      <author id="jiri-semecky"><first>Jirí</first><last>Semecky</last></author>
       <pages>39–46</pages>
       <url hash="f36b5ce1">W04-1906</url>
       <bibkey>frank-semecky-2004-corpus</bibkey>
@@ -2994,7 +2994,7 @@
     </paper>
     <paper id="8">
       <title>Automated Induction of Sense in Context</title>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <author><first>Patrick</first><last>Hanks</last></author>
       <author><first>Anna</first><last>Rumshisky</last></author>
       <pages>55–58</pages>
@@ -3004,7 +3004,7 @@
     <paper id="9">
       <title>Mining Linguistically Interpreted Texts</title>
       <author><first>Cassiana Fagundes</first><last>da Silva</last></author>
-      <author><first>Renata</first><last>Vieira</last></author>
+      <author id="renata-vieira"><first>Renata</first><last>Vieira</last></author>
       <author><first>Fernando Santos</first><last>Osório</last></author>
       <author><first>Paulo</first><last>Quaresma</last></author>
       <pages>59–62</pages>
@@ -3044,7 +3044,7 @@
     <paper id="1">
       <title>Introduction to <fixed-case>ROMAND</fixed-case> 2004</title>
       <author><first>Vincenzo</first><last>Pallotta</last></author>
-      <author><first>Amalia</first><last>Todirascu</last></author>
+      <author id="amalia-todirascu"><first>Amalia</first><last>Todirascu</last></author>
       <pages>7–10</pages>
       <url hash="df8aa38e">W04-2001</url>
       <bibkey>pallotta-todirascu-2004-introduction</bibkey>
@@ -3074,23 +3074,23 @@
     </paper>
     <paper id="5">
       <title>Evaluating <fixed-case>GETARUNS</fixed-case> parser with <fixed-case>GREVAL</fixed-case> test suite</title>
-      <author><first>Rodolfo</first><last>Delmonte</last></author>
+      <author id="rodolfo-delmonte"><first>Rodolfo</first><last>Delmonte</last></author>
       <pages>32–41</pages>
       <url hash="0b553b9b">W04-2005</url>
       <bibkey>delmonte-2004-evaluating</bibkey>
     </paper>
     <paper id="6">
       <title>A step towards incremental generation of logical forms</title>
-      <author><first>Luísa</first><last>Coheur</last></author>
-      <author><first>Nuno</first><last>Mamede</last></author>
-      <author><first>Gabriel</first><last>Bès</last></author>
+      <author id="luisa-coheur"><first>Luísa</first><last>Coheur</last></author>
+      <author id="nuno-mamede"><first>Nuno</first><last>Mamede</last></author>
+      <author id="gabriel-g-bes"><first>Gabriel</first><last>Bès</last></author>
       <pages>42–50</pages>
       <url hash="15608018">W04-2006</url>
       <bibkey>coheur-etal-2004-step</bibkey>
     </paper>
     <paper id="7">
       <title>Using an incremental robust parser to automatically generate semantic <fixed-case>UNL</fixed-case> graphs</title>
-      <author><first>Nuria</first><last>Gala</last></author>
+      <author id="nuria-gala"><first>Nuria</first><last>Gala</last></author>
       <pages>51–58</pages>
       <url hash="c95aa876">W04-2007</url>
       <bibkey>gala-2004-using</bibkey>
@@ -3098,7 +3098,7 @@
     <paper id="8">
       <title>An algorithm for open text semantic parsing</title>
       <author><first>Lei</first><last>Shi</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>59–67</pages>
       <url hash="81bd249e">W04-2008</url>
       <bibkey>shi-mihalcea-2004-algorithm</bibkey>
@@ -3112,8 +3112,8 @@
     </paper>
     <paper id="10">
       <title>Robust ending guessing rules with application to slavonic languages</title>
-      <author><first>Preslav</first><last>Nakov</last></author>
-      <author><first>Elena</first><last>Paskaleva</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
+      <author id="elena-paskaleva"><first>Elena</first><last>Paskaleva</last></author>
       <pages>76–85</pages>
       <url hash="f4812458">W04-2010</url>
       <bibkey>nakov-paskaleva-2004-robust</bibkey>
@@ -3132,7 +3132,7 @@
       <title>Answer validation by keyword association</title>
       <author><first>Masatsugu</first><last>Tonoike</last></author>
       <author><first>Takehito</first><last>Utsuro</last></author>
-      <author><first>Satoshi</first><last>Sato</last></author>
+      <author id="satoshi-sato"><first>Satoshi</first><last>Sato</last></author>
       <pages>95–103</pages>
       <url hash="3445b3d5">W04-2012</url>
       <bibkey>tonoike-etal-2004-answer</bibkey>
@@ -3182,7 +3182,7 @@
     </paper>
     <paper id="4">
       <title>Standards going concrete : from <fixed-case>LMF</fixed-case> to Morphalou</title>
-      <author><first>Laurent</first><last>Romary</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
       <author><first>Susanne</first><last>Salmon-Alt</last></author>
       <author><first>Gil</first><last>Francopoulo</last></author>
       <pages>22–28</pages>
@@ -3240,8 +3240,8 @@
     </paper>
     <paper id="10">
       <title>A Very Large Dictionary with Paradigmatic, Syntagmatic, and Paronymic</title>
-      <author><first>Igor</first><last>Bolshakov</last></author>
-      <author><first>Alexander</first><last>Gelbukh</last></author>
+      <author id="igor-a-bolshakov"><first>Igor</first><last>Bolshakov</last></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last></author>
       <pages>53–56</pages>
       <url hash="4981ea0c">W04-2110</url>
       <bibkey>bolshakov-gelbukh-2004-large</bibkey>
@@ -3255,7 +3255,7 @@
     </paper>
     <paper id="12">
       <title><fixed-case>R</fixed-case>{j}ecnik.com : <fixed-case>E</fixed-case>nglish - <fixed-case>S</fixed-case>erbo-<fixed-case>C</fixed-case>roatian Electronic Dictionary</title>
-      <author><first>Vlado</first><last>Kešelj</last></author>
+      <author id="vlado-keselj"><first>Vlado</first><last>Kešelj</last></author>
       <author><first>Tanja</first><last>Kešelj</last></author>
       <author><first>Larisa</first><last>Zlatić</last></author>
       <pages>61–64</pages>
@@ -3289,7 +3289,7 @@
     <paper id="16">
       <title>Empirical Acquisition of Differentiating Relations from Definitions</title>
       <author><first>Tom</first><last>O’Hara</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <pages>77–80</pages>
       <url hash="e7f29331">W04-2116</url>
       <bibkey>ohara-wiebe-2004-empirical</bibkey>
@@ -3303,7 +3303,7 @@
     </paper>
     <paper id="18">
       <title>Identification, Quantitative Description, and Preliminary Distributional Analysis of <fixed-case>G</fixed-case>erman Particle Verbs</title>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>85–88</pages>
       <url hash="7d112c97">W04-2118</url>
       <bibkey>schulte-im-walde-2004-identification</bibkey>
@@ -3331,8 +3331,8 @@
     </frontmatter>
     <paper id="1">
       <title>Multilinguality in <fixed-case>ETAP</fixed-case>-3: Reuse of Lexical Resources</title>
-      <author><first>Igor</first><last>Boguslavsky</last></author>
-      <author><first>Leonid</first><last>Iomdin</last></author>
+      <author id="igor-boguslavsky"><first>Igor</first><last>Boguslavsky</last></author>
+      <author id="leonid-iomdin"><first>Leonid</first><last>Iomdin</last></author>
       <author><first>Victor</first><last>Sizov</last></author>
       <pages>1–8</pages>
       <url hash="24d4ea16">W04-2201</url>
@@ -3356,7 +3356,7 @@
     <paper id="4">
       <title>Automatic Construction of a Transfer Dictionary Considering Directionality</title>
       <author><first>Kyonghee</first><last>Paik</last></author>
-      <author><first>Satoshi</first><last>Shirai</last></author>
+      <author id="satoshi-shirai"><first>Satoshi</first><last>Shirai</last></author>
       <author><first>Hiromi</first><last>Nakaiwa</last></author>
       <pages>25–32</pages>
       <url hash="21b25b32">W04-2204</url>
@@ -3405,7 +3405,7 @@
     </paper>
     <paper id="10">
       <title>A Generic Collaborative Platform for Multilingual Lexical Database Development</title>
-      <author><first>Gilles</first><last>Sérasset</last></author>
+      <author id="gilles-serasset"><first>Gilles</first><last>Sérasset</last></author>
       <pages>73–79</pages>
       <url hash="0caea709">W04-2210</url>
       <bibkey>serasset-2004-generic</bibkey>
@@ -3413,8 +3413,8 @@
     <paper id="11">
       <title>Semi-Automatic Construction of <fixed-case>K</fixed-case>orean-<fixed-case>C</fixed-case>hinese Verb Patterns Based on Translation Equivalency</title>
       <author><first>Munpyo</first><last>Hong</last></author>
-      <author><first>Young-Kil</first><last>Kim</last></author>
-      <author><first>Sang-Kyu</first><last>Park</last></author>
+      <author id="young-gil-kim"><first>Young-Kil</first><last>Kim</last></author>
+      <author id="sang-kyu-park"><first>Sang-Kyu</first><last>Park</last></author>
       <author><first>Young-Jik</first><last>Lee</last></author>
       <pages>80–85</pages>
       <url hash="c7aaf198">W04-2211</url>
@@ -3445,8 +3445,8 @@
       <title>Revising the <fixed-case>W</fixed-case>ordnet Domains Hierarchy: semantics, coverage and balancing</title>
       <author><first>Luisa</first><last>Bentivogli</last></author>
       <author><first>Pamela</first><last>Forner</last></author>
-      <author><first>Bernardo</first><last>Magnini</last></author>
-      <author><first>Emanuele</first><last>Pianta</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
+      <author id="emanuele-pianta"><first>Emanuele</first><last>Pianta</last></author>
       <pages>94–101</pages>
       <url hash="60caf850">W04-2214</url>
       <bibkey>bentivogli-etal-2004-revising</bibkey>
@@ -3454,7 +3454,7 @@
     <paper id="15">
       <title><fixed-case>P</fixed-case>olyphra<fixed-case>Z</fixed-case>: a Tool for the Management of Parallel Corpora</title>
       <author><first>Najeh</first><last>Hajlaoui</last></author>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <pages>102–109</pages>
       <url hash="67d01865">W04-2215</url>
       <bibkey>hajlaoui-boitet-2004-polyphraz</bibkey>
@@ -3482,7 +3482,7 @@
     </frontmatter>
     <paper id="1">
       <title>Usability and Acceptability Studies of Conversational Virtual Human Technology</title>
-      <author><first>Curry</first><last>Guinn</last></author>
+      <author id="curry-i-guinn"><first>Curry</first><last>Guinn</last></author>
       <author><first>Robert</first><last>Hubal</last></author>
       <author><first>Geoffrey</first><last>Frank</last></author>
       <author><first>Henry</first><last>Schwetzke</last></author>
@@ -3502,8 +3502,8 @@
     </paper>
     <paper id="2">
       <title>Stochastic Language Generation in a Dialogue System: Toward a Domain Independent Generator</title>
-      <author><first>Nathanael</first><last>Chambers</last></author>
-      <author><first>James</first><last>Allen</last></author>
+      <author id="nathanael-chambers"><first>Nathanael</first><last>Chambers</last></author>
+      <author id="james-allen"><first>James</first><last>Allen</last></author>
       <pages>9–18</pages>
       <url hash="f12c7fe3">W04-2302</url>
       <bibkey>chambers-allen-2004-stochastic</bibkey>
@@ -3525,7 +3525,7 @@
       <author><first>Linda</first><last>Bell</last></author>
       <author><first>Johan</first><last>Boye</last></author>
       <author><first>Anders</first><last>Lindström</last></author>
-      <author><first>Mats</first><last>Wirén</last></author>
+      <author id="mats-wiren"><first>Mats</first><last>Wirén</last></author>
       <pages>23–26</pages>
       <url hash="f02b80bc">W04-2304</url>
       <bibkey>gustafson-etal-2004-nice</bibkey>
@@ -3542,9 +3542,9 @@
       <author><first>Stefan</first><last>Hamerich</last></author>
       <author><first>Volker</first><last>Schubert</last></author>
       <author><first>Volker</first><last>Schless</last></author>
-      <author><first>Ricardo</first><last>de Córdoba</last></author>
-      <author><first>José M.</first><last>Pardo</last></author>
-      <author><first>Luis F.</first><last>d’Haro</last></author>
+      <author id="ricardo-de-cordoba"><first>Ricardo</first><last>de Córdoba</last></author>
+      <author id="jose-manuel-pardo"><first>José M.</first><last>Pardo</last></author>
+      <author id="luis-fernando-dharo"><first>Luis F.</first><last>d’Haro</last></author>
       <author><first>Basilis</first><last>Kladis</last></author>
       <author><first>Otilia</first><last>Kocsis</last></author>
       <author><first>Stefan</first><last>Igel</last></author>
@@ -3564,7 +3564,7 @@
       <title>Other-Initiated Self-Repairs in <fixed-case>E</fixed-case>stonian Information Dialogues: Solving Communication Problems in Cooperation</title>
       <author><first>Olga</first><last>Gerassimenko</last></author>
       <author><first>Tiit</first><last>Hennoste</last></author>
-      <author><first>Mare</first><last>Koit</last></author>
+      <author id="mare-koit"><first>Mare</first><last>Koit</last></author>
       <author><first>Andriela</first><last>Rääbis</last></author>
       <pages>39–42</pages>
       <url hash="02f5478b">W04-2308</url>
@@ -3583,7 +3583,7 @@
       <author><first>Manav Ratan</first><last>Mital</last></author>
       <author><first>Sumit</first><last>Kumar</last></author>
       <author><first>Amitabha</first><last>Mukerjee</last></author>
-      <author><first>Achla M.</first><last>Raina</last></author>
+      <author id="achla-m-raina"><first>Achla M.</first><last>Raina</last></author>
       <pages>47–50</pages>
       <url hash="68be9ce1">W04-2310</url>
       <bibkey>jain-etal-2004-anaphora</bibkey>
@@ -3607,7 +3607,7 @@
     <paper id="13">
       <title>Towards Automatic Identification of Discourse Markers in Dialogs: The Case of Like</title>
       <author><first>Sandrine</first><last>Zufferey</last></author>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <pages>63–71</pages>
       <url hash="50eaa87c">W04-2313</url>
       <bibkey>zufferey-popescu-belis-2004-towards</bibkey>
@@ -3615,8 +3615,8 @@
     <paper id="14">
       <title>Bootstrapping Spoken Dialog Systems with Data Reuse</title>
       <author><first>Guiseppe</first><last>Di Fabbrizio</last></author>
-      <author><first>Gokhan</first><last>Tur</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tür</last></author>
+      <author id="gokhan-tur"><first>Gokhan</first><last>Tur</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tür</last></author>
       <pages>72–80</pages>
       <url hash="29d617de">W04-2314</url>
       <bibkey>di-fabbrizio-etal-2004-bootstrapping</bibkey>
@@ -3648,14 +3648,14 @@
     </paper>
     <paper id="18">
       <title>Prosodic Cues to Discourse Segment Boundaries in Human-Computer Dialogue</title>
-      <author><first>Gina-Anne</first><last>Levow</last></author>
+      <author id="gina-anne-levow"><first>Gina-Anne</first><last>Levow</last></author>
       <pages>93–96</pages>
       <url hash="586a7bc5">W04-2318</url>
       <bibkey>levow-2004-prosodic</bibkey>
     </paper>
     <paper id="19">
       <title>The <fixed-case>ICSI</fixed-case> Meeting Recorder Dialog Act (<fixed-case>MRDA</fixed-case>) Corpus</title>
-      <author><first>Elizabeth</first><last>Shriberg</last></author>
+      <author id="elizabeth-shriberg"><first>Elizabeth</first><last>Shriberg</last></author>
       <author><first>Raj</first><last>Dhillon</last></author>
       <author><first>Sonali</first><last>Bhagat</last></author>
       <author><first>Jeremy</first><last>Ang</last></author>
@@ -3674,9 +3674,9 @@
     <paper id="21">
       <title>On the Use of Confidence for Statistical Decision in Dialogue Strategies</title>
       <author><first>Christian</first><last>Raymond</last></author>
-      <author><first>Frédéric</first><last>Béchet</last></author>
-      <author><first>Renato</first><last>De Mori</last></author>
-      <author><first>Géraldine</first><last>Damnati</last></author>
+      <author id="frederic-bechet"><first>Frédéric</first><last>Béchet</last></author>
+      <author id="renato-de-mori"><first>Renato</first><last>De Mori</last></author>
+      <author id="geraldine-damnati"><first>Géraldine</first><last>Damnati</last></author>
       <pages>102–107</pages>
       <url hash="f8594b9e">W04-2321</url>
       <bibkey>raymond-etal-2004-use</bibkey>
@@ -3684,7 +3684,7 @@
     <paper id="22">
       <title>A Rule Based Approach to Discourse Parsing</title>
       <author><first>Livia</first><last>Polanyi</last></author>
-      <author><first>Chris</first><last>Culy</last></author>
+      <author id="chris-culy"><first>Chris</first><last>Culy</last></author>
       <author><first>Martin</first><last>van den Berg</last></author>
       <author><first>Gian Lorenzo</first><last>Thione</last></author>
       <author><first>David</first><last>Ahn</last></author>
@@ -3696,8 +3696,8 @@
       <title>Unifying Annotated Discourse Hierarchies to Create a Gold Standard</title>
       <author><first>Marco</first><last>Carbone</last></author>
       <author><first>Ya’akov</first><last>Gal</last></author>
-      <author><first>Stuart</first><last>Shieber</last></author>
-      <author><first>Barbara</first><last>Grosz</last></author>
+      <author id="stuart-m-shieber"><first>Stuart</first><last>Shieber</last></author>
+      <author id="barbara-j-grosz"><first>Barbara</first><last>Grosz</last></author>
       <pages>118–126</pages>
       <url hash="9db0d373">W04-2323</url>
       <bibkey>carbone-etal-2004-unifying</bibkey>
@@ -3718,15 +3718,15 @@
     </paper>
     <paper id="26">
       <title>Annotating Student Emotional States in Spoken Tutoring Dialogues</title>
-      <author><first>Diane J.</first><last>Litman</last></author>
-      <author><first>Kate</first><last>Forbes-Riley</last></author>
+      <author id="diane-litman"><first>Diane J.</first><last>Litman</last></author>
+      <author id="kate-forbes-riley"><first>Kate</first><last>Forbes-Riley</last></author>
       <pages>144–153</pages>
       <url hash="8ddfb7be">W04-2326</url>
       <bibkey>litman-forbes-riley-2004-annotating</bibkey>
     </paper>
     <paper id="27">
       <title>The <fixed-case>MATE</fixed-case>/<fixed-case>GNOME</fixed-case> Proposals for Anaphoric Annotation, Revisited</title>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>154–162</pages>
       <url hash="af9483ed">W04-2327</url>
       <bibkey>poesio-2004-mate</bibkey>
@@ -3734,7 +3734,7 @@
     <paper id="28">
       <title>Multi-level Dialogue Act Tags</title>
       <author><first>Alexander</first><last>Clark</last></author>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <pages>163–170</pages>
       <url hash="e90853bc">W04-2328</url>
       <bibkey>clark-popescu-belis-2004-multi</bibkey>
@@ -3756,14 +3756,14 @@
     <paper id="1">
       <title>A Linear Programming Formulation for Global Inference in Natural Language Tasks</title>
       <author><first>Dan</first><last>Roth</last></author>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <pages>1–8</pages>
       <url hash="9cc26766">W04-2401</url>
       <bibkey>roth-yih-2004-linear</bibkey>
     </paper>
     <paper id="2">
       <title>Semantic Lexicon Construction: Learning from Unlabeled Data via Spectral Analysis</title>
-      <author><first>Rie Kubota</first><last>Ando</last></author>
+      <author id="rie-johnson"><first>Rie Kubota</first><last>Ando</last></author>
       <pages>9–16</pages>
       <url hash="70bf198d">W04-2402</url>
       <bibkey>ando-2004-semantic</bibkey>
@@ -3771,14 +3771,14 @@
     <paper id="3">
       <title>A Semantic Kernel for Predicate Argument Classification</title>
       <author><first>Alessandro</first><last>Moschitti</last></author>
-      <author><first>Cosmin Adrian</first><last>Bejan</last></author>
+      <author id="cosmin-adrian-bejan"><first>Cosmin Adrian</first><last>Bejan</last></author>
       <pages>17–24</pages>
       <url hash="4a845992">W04-2403</url>
       <bibkey>moschitti-bejan-2004-semantic</bibkey>
     </paper>
     <paper id="4">
       <title>Combining Lexical and Syntactic Features for Supervised Word Sense Disambiguation</title>
-      <author><first>Saif</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
       <author><first>Ted</first><last>Pedersen</last></author>
       <pages>25–32</pages>
       <url hash="14c9cf05">W04-2404</url>
@@ -3786,7 +3786,7 @@
     </paper>
     <paper id="5">
       <title>Co-training and Self-training for Word Sense Disambiguation</title>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>33–40</pages>
       <url hash="11c9fe9c">W04-2405</url>
       <bibkey>mihalcea-2004-co</bibkey>
@@ -3811,7 +3811,7 @@
     <paper id="8">
       <title>Modeling Category Structures with a Kernel Function</title>
       <author><first>Hiroya</first><last>Takamura</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <author><first>Hiroyasu</first><last>Yamada</last></author>
       <pages>57–64</pages>
       <url hash="31665113">W04-2408</url>
@@ -3843,7 +3843,7 @@
     <paper id="12">
       <title>Introduction to the <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>-2004 Shared Task: Semantic Role Labeling</title>
       <author><first>Xavier</first><last>Carreras</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <pages>89–97</pages>
       <url hash="4fe750c2">W04-2412</url>
       <bibkey>carreras-marquez-2004-introduction</bibkey>
@@ -3852,7 +3852,7 @@
       <title>Semantic Role Labelling With Chunk Sequences</title>
       <author><first>Ulrike</first><last>Baldewein</last></author>
       <author><first>Katrin</first><last>Erk</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <author><first>Detlef</first><last>Prescher</last></author>
       <pages>98–101</pages>
       <url hash="64da9968">W04-2413</url>
@@ -3860,11 +3860,11 @@
     </paper>
     <paper id="14">
       <title>Memory-based semantic role labeling: Optimizing features, algorithm, and output</title>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <author><first>Sander</first><last>Canisius</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <author><first>Iris</first><last>Hendrickx</last></author>
-      <author><first>Erik</first><last>Tjong Kim Sang</last></author>
+      <author id="erik-tjong-kim-sang"><first>Erik</first><last>Tjong Kim Sang</last></author>
       <pages>102–105</pages>
       <url hash="68d81f54">W04-2414</url>
       <bibkey>van-den-bosch-etal-2004-memory</bibkey>
@@ -3872,19 +3872,19 @@
     <paper id="15">
       <title>Hierarchical Recognition of Propositional Arguments with Perceptrons</title>
       <author><first>Xavier</first><last>Carreras</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
-      <author><first>Grzegorz</first><last>Chrupała</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
+      <author id="grzegorz-chrupala"><first>Grzegorz</first><last>Chrupała</last></author>
       <pages>106–109</pages>
       <url hash="5d80bfdf">W04-2415</url>
       <bibkey>carreras-etal-2004-hierarchical</bibkey>
     </paper>
     <paper id="16">
       <title>Semantic Role Labeling by Tagging Syntactic Chunks</title>
-      <author><first>Kadri</first><last>Hacioglu</last></author>
-      <author><first>Sameer</first><last>Pradhan</last></author>
-      <author><first>Wayne</first><last>Ward</last></author>
-      <author><first>James H.</first><last>Martin</last></author>
-      <author><first>Daniel</first><last>Jurafsky</last></author>
+      <author id="kadri-hacioglu"><first>Kadri</first><last>Hacioglu</last></author>
+      <author id="sameer-pradhan"><first>Sameer</first><last>Pradhan</last></author>
+      <author id="wayne-ward"><first>Wayne</first><last>Ward</last></author>
+      <author id="james-h-martin"><first>James H.</first><last>Martin</last></author>
+      <author id="dan-jurafsky"><first>Daniel</first><last>Jurafsky</last></author>
       <pages>110–113</pages>
       <url hash="b9057468">W04-2416</url>
       <bibkey>hacioglu-etal-2004-semantic</bibkey>
@@ -3908,7 +3908,7 @@
       <author><first>Joon-Ho</first><last>Lim</last></author>
       <author><first>Young-Sook</first><last>Hwang</last></author>
       <author><first>So-Young</first><last>Park</last></author>
-      <author><first>Hae-Chang</first><last>Rim</last></author>
+      <author id="hae-chang-rim"><first>Hae-Chang</first><last>Rim</last></author>
       <pages>122–125</pages>
       <url hash="8da2e98d">W04-2419</url>
       <bibkey>lim-etal-2004-semantic</bibkey>
@@ -3917,7 +3917,7 @@
       <title>Two-Phase Semantic Role Labeling based on Support Vector Machines</title>
       <author><first>Kyung-Mi</first><last>Park</last></author>
       <author><first>Young-Sook</first><last>Hwang</last></author>
-      <author><first>Hae-Chang</first><last>Rim</last></author>
+      <author id="hae-chang-rim"><first>Hae-Chang</first><last>Rim</last></author>
       <pages>126–129</pages>
       <url hash="c7ebfb26">W04-2420</url>
       <bibkey>park-etal-2004-two</bibkey>
@@ -3926,7 +3926,7 @@
       <title>Semantic Role Labeling Via Generalized Inference Over Classifiers</title>
       <author><first>Vasin</first><last>Punyakanok</last></author>
       <author><first>Dan</first><last>Roth</last></author>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <author><first>Dav</first><last>Zimak</last></author>
       <author><first>Yuancheng</first><last>Tu</last></author>
       <pages>130–133</pages>
@@ -3936,7 +3936,7 @@
     <paper id="22">
       <title>Learning Transformation Rules for Semantic Role Labeling</title>
       <author><first>Ken</first><last>Williams</last></author>
-      <author><first>Christopher</first><last>Dozier</last></author>
+      <author id="christopher-dozier"><first>Christopher</first><last>Dozier</last></author>
       <author><first>Andrew</first><last>McCulloh</last></author>
       <pages>134–137</pages>
       <url hash="e26fd2f5">W04-2422</url>
@@ -3944,14 +3944,14 @@
     </paper>
     <paper id="23">
       <title>Language Learning: Beyond Thunderdome</title>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>138–138</pages>
       <url hash="ff351e31">W04-2423</url>
       <bibkey>manning-2004-language</bibkey>
     </paper>
     <paper id="24">
       <title>Putting Meaning into Your Trees</title>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>139–139</pages>
       <url hash="a52519cc">W04-2424</url>
       <bibkey>palmer-2004-putting</bibkey>
@@ -3972,16 +3972,16 @@
     </frontmatter>
     <paper id="1">
       <title>Strategies for Advanced Question Answering</title>
-      <author><first>Sanda</first><last>Harabagiu</last></author>
-      <author><first>Finley</first><last>Lacatusu</last></author>
+      <author id="sanda-harabagiu"><first>Sanda</first><last>Harabagiu</last></author>
+      <author id="finley-lacatusu"><first>Finley</first><last>Lacatusu</last></author>
       <pages>1–9</pages>
       <url hash="505a9a46">W04-2501</url>
       <bibkey>harabagiu-lacatusu-2004-strategies</bibkey>
     </paper>
     <paper id="2">
       <title>Answering Questions Using Advanced Semantics and Probabilistic Inference</title>
-      <author><first>Srini</first><last>Narayanan</last></author>
-      <author><first>Sanda</first><last>Harabagiu</last></author>
+      <author id="srini-narayanan"><first>Srini</first><last>Narayanan</last></author>
+      <author id="sanda-harabagiu"><first>Sanda</first><last>Harabagiu</last></author>
       <pages>10–16</pages>
       <url hash="3aacb68d">W04-2502</url>
       <bibkey>narayanan-harabagiu-2004-answering</bibkey>
@@ -3997,7 +3997,7 @@
     </paper>
     <paper id="4">
       <title>Discourse Structure for Context Question Answering</title>
-      <author><first>Joyce Y.</first><last>Chai</last></author>
+      <author id="joyce-chai"><first>Joyce Y.</first><last>Chai</last></author>
       <author><first>Rong</first><last>Jin</last></author>
       <pages>23–30</pages>
       <url hash="3bae6977">W04-2504</url>
@@ -4005,10 +4005,10 @@
     </paper>
     <paper id="5">
       <title>Intentions, Implicatures and Processing of Complex Questions</title>
-      <author><first>Sanda</first><last>Harabagiu</last></author>
-      <author><first>Steven</first><last>Maiorano</last></author>
+      <author id="sanda-harabagiu"><first>Sanda</first><last>Harabagiu</last></author>
+      <author id="steven-j-maiorano"><first>Steven</first><last>Maiorano</last></author>
       <author><first>Alessandro</first><last>Moschitti</last></author>
-      <author><first>Cosmin</first><last>Bejan</last></author>
+      <author id="cosmin-adrian-bejan"><first>Cosmin</first><last>Bejan</last></author>
       <pages>31–42</pages>
       <url hash="baf142f9">W04-2505</url>
       <bibkey>harabagiu-etal-2004-intentions</bibkey>
@@ -4016,20 +4016,20 @@
     <paper id="6">
       <title>A Novel Approach to Focus Identification in Question/Answering Systems</title>
       <author><first>Alessandro</first><last>Moschitti</last></author>
-      <author><first>Sanda</first><last>Harabagiu</last></author>
+      <author id="sanda-harabagiu"><first>Sanda</first><last>Harabagiu</last></author>
       <pages>43–51</pages>
       <url hash="e4bd3c1e">W04-2506</url>
       <bibkey>moschitti-harabagiu-2004-novel</bibkey>
     </paper>
     <paper id="7">
       <title><fixed-case>HITIQA</fixed-case>: Scenario Based Question Answering</title>
-      <author><first>Sharon</first><last>Small</last></author>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="sharon-small"><first>Sharon</first><last>Small</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
       <author><first>Ting</first><last>Liu</last></author>
       <author><first>Sean</first><last>Ryan</last></author>
       <author><first>Robert</first><last>Salkin</last></author>
       <author><first>Nobuyuki</first><last>Shimizu</last></author>
-      <author><first>Paul</first><last>Kantor</last></author>
+      <author id="paul-kantor"><first>Paul</first><last>Kantor</last></author>
       <author><first>Diane</first><last>Kelly</last></author>
       <author><first>Robert</first><last>Rittman</last></author>
       <author><first>Nina</first><last>Wacholder</last></author>
@@ -4043,7 +4043,7 @@
       <author><first>Andrew</first><last>Hickl</last></author>
       <author><first>John</first><last>Lehmann</last></author>
       <author><first>John</first><last>Williams</last></author>
-      <author><first>Sanda</first><last>Harabagiu</last></author>
+      <author id="sanda-harabagiu"><first>Sanda</first><last>Harabagiu</last></author>
       <pages>60–69</pages>
       <url hash="b30c5327">W04-2508</url>
       <bibkey>hickl-etal-2004-experiments</bibkey>
@@ -4051,8 +4051,8 @@
     <paper id="9">
       <title>Handling Information Access Dialogue through <fixed-case>QA</fixed-case> Technologies - A novel challenge for open-domain question answering</title>
       <author><first>Tsuneaki</first><last>Kato</last></author>
-      <author><first>Jun’ichi</first><last>Fukumoto</last></author>
-      <author><first>Fumito</first><last>Masui</last></author>
+      <author id="junichi-fukumoto"><first>Jun’ichi</first><last>Fukumoto</last></author>
+      <author id="fumito-masui"><first>Fumito</first><last>Masui</last></author>
       <author><first>Noriko</first><last>Kando</last></author>
       <pages>70–77</pages>
       <url hash="b2bf3b65">W04-2509</url>
@@ -4060,11 +4060,11 @@
     </paper>
     <paper id="10">
       <title>Ontological resources and question answering</title>
-      <author><first>Roberto</first><last>Basili</last></author>
-      <author><first>Dorte H.</first><last>Hansen</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
+      <author id="dorte-haltrup-hansen"><first>Dorte H.</first><last>Hansen</last></author>
       <author><first>Patrizia</first><last>Paggio</last></author>
-      <author><first>Maria Teresa</first><last>Pazienza</last></author>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last></author>
+      <author id="maria-teresa-pazienza"><first>Maria Teresa</first><last>Pazienza</last></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last></author>
       <pages>78–84</pages>
       <url hash="345b4a60">W04-2510</url>
       <bibkey>basili-etal-2004-ontological</bibkey>
@@ -4087,7 +4087,7 @@
       <title><fixed-case>O</fixed-case>nto<fixed-case>S</fixed-case>em Methods for Processing Semantic Ellipsis</title>
       <author><first>Marjorie</first><last>McShane</last></author>
       <author><first>Stephen</first><last>Beale</last></author>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <pages>1–8</pages>
       <url hash="c54992d9">W04-2601</url>
       <bibkey>mcshane-etal-2004-ontosem-methods</bibkey>
@@ -4095,7 +4095,7 @@
     <paper id="2">
       <title>Towards Full Automation of Lexicon Construction</title>
       <author><first>Richard</first><last>Rohwer</last></author>
-      <author><first>Dayne</first><last>Freitag</last></author>
+      <author id="dayne-freitag"><first>Dayne</first><last>Freitag</last></author>
       <pages>9–16</pages>
       <url hash="5744a986">W04-2602</url>
       <bibkey>rohwer-freitag-2004-towards</bibkey>
@@ -4112,9 +4112,9 @@
     </paper>
     <paper id="4">
       <title>Using prepositions to extend a verb lexicon</title>
-      <author><first>Karin</first><last>Kipper</last></author>
+      <author id="karin-kipper"><first>Karin</first><last>Kipper</last></author>
       <author><first>Benjamin</first><last>Snyder</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>23–29</pages>
       <url hash="924a4f6f">W04-2604</url>
       <bibkey>kipper-etal-2004-using</bibkey>
@@ -4130,7 +4130,7 @@
     <paper id="6">
       <title>Extended Lexical-Semantic Classification of <fixed-case>E</fixed-case>nglish Verbs</title>
       <author><first>Anna</first><last>Korhonen</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <pages>38–45</pages>
       <url hash="c8c8d661">W04-2606</url>
       <bibkey>korhonen-briscoe-2004-extended</bibkey>
@@ -4152,23 +4152,23 @@
     </paper>
     <paper id="9">
       <title>Models for the Semantic Classification of Noun Phrases</title>
-      <author><first>Dan</first><last>Moldovan</last></author>
-      <author><first>Adriana</first><last>Badulescu</last></author>
+      <author id="dan-moldovan"><first>Dan</first><last>Moldovan</last></author>
+      <author id="adriana-badulescu"><first>Adriana</first><last>Badulescu</last></author>
       <author><first>Marta</first><last>Tatu</last></author>
       <author><first>Daniel</first><last>Antohe</last></author>
-      <author><first>Roxana</first><last>Girju</last></author>
+      <author id="roxana-girju"><first>Roxana</first><last>Girju</last></author>
       <pages>60–67</pages>
       <url hash="bcbbed4b">W04-2609</url>
       <bibkey>moldovan-etal-2004-models</bibkey>
     </paper>
     <paper id="10">
       <title>Support Vector Machines Applied to the Classification of Semantic Relations in Nominalized Noun Phrases</title>
-      <author><first>Roxana</first><last>Girju</last></author>
+      <author id="roxana-girju"><first>Roxana</first><last>Girju</last></author>
       <author><first>Ana-Maria</first><last>Giuglea</last></author>
       <author><first>Marian</first><last>Olteanu</last></author>
       <author><first>Ovidiu</first><last>Fortu</last></author>
       <author><first>Orest</first><last>Bolohan</last></author>
-      <author><first>Dan</first><last>Moldovan</last></author>
+      <author id="dan-moldovan"><first>Dan</first><last>Moldovan</last></author>
       <pages>68–75</pages>
       <url hash="9961abc9">W04-2610</url>
       <bibkey>girju-etal-2004-support</bibkey>
@@ -4176,7 +4176,7 @@
     <paper id="11">
       <title>Abstraction Summarization for Managing the Biomedical Research Literature</title>
       <author><first>Marcelo</first><last>Fiszman</last></author>
-      <author><first>Thomas C.</first><last>Rindflesch</last></author>
+      <author id="thomas-c-rindflesch"><first>Thomas C.</first><last>Rindflesch</last></author>
       <author><first>Halil</first><last>Kilicoglu</last></author>
       <pages>76–83</pages>
       <url hash="9ee8c752">W04-2611</url>
@@ -4184,7 +4184,7 @@
     </paper>
     <paper id="12">
       <title>Comparing, Integrating Lexical Definitional Knowledge From Multiple Sources</title>
-      <author><first>Lucja M.</first><last>Iwanska</last></author>
+      <author id="lucja-iwanska"><first>Lucja M.</first><last>Iwanska</last></author>
       <pages>84–91</pages>
       <url hash="133ed55f">W04-2612</url>
       <bibkey>iwanska-2004-comparing</bibkey>
@@ -4220,7 +4220,7 @@
     </frontmatter>
     <paper id="1">
       <title>Introduction to Frontiers in Corpus Annotation</title>
-      <author><first>Adam</first><last>Meyers</last></author>
+      <author id="adam-meyers"><first>Adam</first><last>Meyers</last></author>
       <pages>1–2</pages>
       <url hash="dd2678b4">W04-2701</url>
       <bibkey>meyers-2004-introduction</bibkey>
@@ -4236,9 +4236,9 @@
     <paper id="3">
       <title>Annotating Discourse Connectives and Their Arguments</title>
       <author><first>Eleni</first><last>Miltsakaki</last></author>
-      <author><first>Aravind</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
       <author><first>Rashmi</first><last>Prasad</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <pages>9–16</pages>
       <url hash="e96ba9c8">W04-2703</url>
       <bibkey>miltsakaki-etal-2004-annotating</bibkey>
@@ -4246,9 +4246,9 @@
     <paper id="4">
       <title><fixed-case>P</fixed-case>roposition <fixed-case>B</fixed-case>ank <fixed-case>II</fixed-case>: Delving Deeper</title>
       <author><first>Olga</first><last>Babko-Malaya</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Nianwen</first><last>Xue</last></author>
-      <author><first>Aravind</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
       <author><first>Seth</first><last>Kulick</last></author>
       <pages>17–23</pages>
       <url hash="4d4962fe">W04-2704</url>
@@ -4256,22 +4256,22 @@
     </paper>
     <paper id="5">
       <title>The <fixed-case>N</fixed-case>om<fixed-case>B</fixed-case>ank Project: An Interim Report</title>
-      <author><first>Adam</first><last>Meyers</last></author>
+      <author id="adam-meyers"><first>Adam</first><last>Meyers</last></author>
       <author><first>Ruth</first><last>Reeves</last></author>
-      <author><first>Catherine</first><last>Macleod</last></author>
+      <author id="catherine-macleod"><first>Catherine</first><last>Macleod</last></author>
       <author><first>Rachel</first><last>Szekely</last></author>
       <author><first>Veronika</first><last>Zielinska</last></author>
       <author><first>Brian</first><last>Young</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <pages>24–31</pages>
       <url hash="ef9b7183">W04-2705</url>
       <bibkey>meyers-etal-2004-nombank</bibkey>
     </paper>
     <paper id="6">
       <title>Deep Syntactic Annotation: Tectogrammatical Representation and Beyond</title>
-      <author><first>Petr</first><last>Sgall</last></author>
-      <author><first>Jarmila</first><last>Panevová</last></author>
-      <author><first>Eva</first><last>Hajičová</last></author>
+      <author id="petr-sgall"><first>Petr</first><last>Sgall</last></author>
+      <author id="jarmila-panevova"><first>Jarmila</first><last>Panevová</last></author>
+      <author id="eva-hajicova"><first>Eva</first><last>Hajičová</last></author>
       <pages>32–38</pages>
       <url hash="a6b18bf7">W04-2706</url>
       <bibkey>sgall-etal-2004-deep</bibkey>
@@ -4280,9 +4280,9 @@
       <title>Multi-dimensional annotation of linguistic corpora for investigating information structure</title>
       <author><first>Stefan</first><last>Baumann</last></author>
       <author><first>Caren</first><last>Brinckmann</last></author>
-      <author><first>Silvia</first><last>Hansen-Schirra</last></author>
-      <author><first>Geert-Jan</first><last>Kruijff</last></author>
-      <author><first>Ivana</first><last>Kruijff-Korbayová</last></author>
+      <author id="silvia-hansen-schirra"><first>Silvia</first><last>Hansen-Schirra</last></author>
+      <author id="geert-jan-m-kruijff"><first>Geert-Jan</first><last>Kruijff</last></author>
+      <author id="ivana-kruijff-korbayova"><first>Ivana</first><last>Kruijff-Korbayová</last></author>
       <author><first>Stella</first><last>Neumann</last></author>
       <author><first>Elke</first><last>Teich</last></author>
       <pages>39–46</pages>
@@ -4291,9 +4291,9 @@
     </paper>
     <paper id="8">
       <title><fixed-case>P</fixed-case>rague <fixed-case>C</fixed-case>zech-<fixed-case>E</fixed-case>nglish <fixed-case>D</fixed-case>ependency <fixed-case>T</fixed-case>reebank: Any Hopes for a Common Annotation Scheme?</title>
-      <author><first>Martin</first><last>Čmejrek</last></author>
-      <author><first>Jan</first><last>Cuřín</last></author>
-      <author><first>Jiří</first><last>Havelka</last></author>
+      <author id="martin-cmejrek"><first>Martin</first><last>Čmejrek</last></author>
+      <author id="jan-curin"><first>Jan</first><last>Cuřín</last></author>
+      <author id="jiri-havelka"><first>Jiří</first><last>Havelka</last></author>
       <pages>47–54</pages>
       <url hash="d068ccd9">W04-2708</url>
       <bibkey>cmejrek-etal-2004-prague-czech</bibkey>
@@ -4301,15 +4301,15 @@
     <paper id="9">
       <title>Interlingual Annotation of Multilingual Text Corpora</title>
       <author><first>Stephen</first><last>Helmreich</last></author>
-      <author><first>David</first><last>Farwell</last></author>
-      <author><first>Bonnie</first><last>Dorr</last></author>
+      <author id="david-farwell"><first>David</first><last>Farwell</last></author>
+      <author id="bonnie-dorr"><first>Bonnie</first><last>Dorr</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
       <author><first>Teruko</first><last>Mitamura</last></author>
-      <author><first>Florence</first><last>Reeder</last></author>
-      <author><first>Keith</first><last>Miller</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="florence-reeder"><first>Florence</first><last>Reeder</last></author>
+      <author id="keith-j-miller"><first>Keith</first><last>Miller</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <author><first>Advaith</first><last>Siddharthan</last></author>
       <pages>55–62</pages>
       <url hash="02b6fcea">W04-2709</url>
@@ -4326,8 +4326,8 @@
     </paper>
     <paper id="11">
       <title>Valency Frames of <fixed-case>C</fixed-case>zech Verbs in <fixed-case>VALLEX</fixed-case> 1.0</title>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
-      <author><first>Markéta</first><last>Lopatková</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="marketa-lopatkova"><first>Markéta</first><last>Lopatková</last></author>
       <pages>70–77</pages>
       <url hash="6da80fbe">W04-2711</url>
       <bibkey>zabokrtsky-lopatkova-2004-valency</bibkey>
@@ -4348,8 +4348,8 @@
     </frontmatter>
     <paper id="1">
       <title>Robustness versus Fidelity in Natural Language Understanding</title>
-      <author><first>Mark G.</first><last>Core</last></author>
-      <author><first>Johanna D.</first><last>Moore</last></author>
+      <author id="mark-g-core"><first>Mark G.</first><last>Core</last></author>
+      <author id="johanna-d-moore"><first>Johanna D.</first><last>Moore</last></author>
       <pages>1–8</pages>
       <url hash="e83877c9">W04-2801</url>
       <bibkey>core-moore-2004-robustness</bibkey>
@@ -4364,7 +4364,7 @@
     </paper>
     <paper id="3">
       <title>A Little Goes a Long Way: Quick Authoring of Semantic Knowledge Sources for Interpretation</title>
-      <author><first>Carolyn Penstein</first><last>Rosé</last></author>
+      <author id="carolyn-rose"><first>Carolyn Penstein</first><last>Rosé</last></author>
       <author><first>Brian S.</first><last>Hall</last></author>
       <pages>17–24</pages>
       <url hash="72cba88c">W04-2803</url>
@@ -4402,9 +4402,9 @@
     </paper>
     <paper id="7">
       <title>Different Sense Granularities for Different Applications</title>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Olga</first><last>Babko-Malaya</last></author>
-      <author><first>Hoa Trang</first><last>Dang</last></author>
+      <author id="hoa-trang-dang"><first>Hoa Trang</first><last>Dang</last></author>
       <pages>49–56</pages>
       <url hash="ee2fa070">W04-2807</url>
       <bibkey>palmer-etal-2004-different</bibkey>
@@ -4443,7 +4443,7 @@
       <title>A System for Searching and Browsing Spoken Communications</title>
       <author><first>Lee</first><last>Begeja</last></author>
       <author><first>Bernard</first><last>Renger</last></author>
-      <author><first>Murat</first><last>Saraclar</last></author>
+      <author id="murat-saraclar"><first>Murat</first><last>Saraclar</last></author>
       <author><first>David</first><last>Gibbon</last></author>
       <author><first>Zhu</first><last>Liu</last></author>
       <author><first>Behzad</first><last>Shahraray</last></author>
@@ -4453,8 +4453,8 @@
     </paper>
     <paper id="2">
       <title>Analysis and Processing of Lecture Audio Data: Preliminary Investigations</title>
-      <author><first>James</first><last>Glass</last></author>
-      <author><first>Timothy J.</first><last>Hazen</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
+      <author id="timothy-j-hazen"><first>Timothy J.</first><last>Hazen</last></author>
       <author><first>Lee</first><last>Hetherington</last></author>
       <author><first>Chao</first><last>Wang</last></author>
       <pages>9–12</pages>
@@ -4467,17 +4467,17 @@
       <author><first>Fred</first><last>Goodman</last></author>
       <author><first>Stanley</first><last>Boykin</last></author>
       <author><first>Randy</first><last>Fish</last></author>
-      <author><first>Warren</first><last>Greiff</last></author>
+      <author id="warren-greiff"><first>Warren</first><last>Greiff</last></author>
       <pages>13–17</pages>
       <url hash="adbdac8b">W04-2903</url>
       <bibkey>hu-etal-2004-audio</bibkey>
     </paper>
     <paper id="4">
       <title>Scoring Algorithms for Wordspotting Systems</title>
-      <author><first>Robert W.</first><last>Morris</last></author>
+      <author id="robert-w-morris"><first>Robert W.</first><last>Morris</last></author>
       <author><first>Jon A.</first><last>Arrowood</last></author>
       <author><first>Peter S.</first><last>Cardillo</last></author>
-      <author><first>Mark A.</first><last>Clements</last></author>
+      <author id="mark-a-clements"><first>Mark A.</first><last>Clements</last></author>
       <pages>18–21</pages>
       <url hash="e2eb5654">W04-2904</url>
       <bibkey>morris-etal-2004-scoring</bibkey>
@@ -4485,14 +4485,14 @@
     <paper id="5">
       <title>Using Soundex Codes for Indexing Names in <fixed-case>ASR</fixed-case> Documents</title>
       <author><first>Hema</first><last>Raghavan</last></author>
-      <author><first>James</first><last>Allan</last></author>
+      <author id="james-allan"><first>James</first><last>Allan</last></author>
       <pages>22–27</pages>
       <url hash="1b776c43">W04-2905</url>
       <bibkey>raghavan-allan-2004-using</bibkey>
     </paper>
     <paper id="6">
       <title>Assessing Prosodic and Text Features for Segmentation of <fixed-case>M</fixed-case>andarin Broadcast News</title>
-      <author><first>Gina-Anne</first><last>Levow</last></author>
+      <author id="gina-anne-levow"><first>Gina-Anne</first><last>Levow</last></author>
       <pages>28–32</pages>
       <url hash="4cb0133f">W04-2906</url>
       <bibkey>levow-2004-assessing</bibkey>
@@ -4501,7 +4501,7 @@
       <title>General Indexation of Weighted Automata - Application to Spoken Utterance Retrieval</title>
       <author><first>Cyril</first><last>Allauzen</last></author>
       <author><first>Mehryar</first><last>Mohri</last></author>
-      <author><first>Murat</first><last>Saraclar</last></author>
+      <author id="murat-saraclar"><first>Murat</first><last>Saraclar</last></author>
       <pages>33–40</pages>
       <url hash="bc22439e">W04-2907</url>
       <bibkey>allauzen-etal-2004-general</bibkey>
@@ -4522,7 +4522,7 @@
     </frontmatter>
     <paper id="1">
       <title>Invited Talk: Sentence Interpretation using Stochastic Finite State Transducers</title>
-      <author><first>Renato</first><last>De Mori</last></author>
+      <author id="renato-de-mori"><first>Renato</first><last>De Mori</last></author>
       <pages>1–1</pages>
       <url hash="a0de4926">W04-3001</url>
       <bibkey>de-mori-2004-invited</bibkey>
@@ -4530,7 +4530,7 @@
     <paper id="2">
       <title>Hybrid Statistical and Structural Semantic Modeling for <fixed-case>T</fixed-case>hai Multi-Stage Spoken Language Understanding</title>
       <author><first>Chai</first><last>Wutiwiwatchai</last></author>
-      <author><first>Sadaoki</first><last>Furui</last></author>
+      <author id="sadaoki-furui"><first>Sadaoki</first><last>Furui</last></author>
       <pages>2–9</pages>
       <url hash="a9a878d4">W04-3002</url>
       <bibkey>wutiwiwatchai-furui-2004-hybrid</bibkey>
@@ -4548,7 +4548,7 @@
     </paper>
     <paper id="4">
       <title>Virtual Modality: a Framework for Testing and Building Multimodal Applications</title>
-      <author><first>Péter Pál</first><last>Boda</last></author>
+      <author id="peter-pal-boda"><first>Péter Pál</first><last>Boda</last></author>
       <author><first>Edward</first><last>Filisko</last></author>
       <pages>17–24</pages>
       <url hash="4dfce0dc">W04-3004</url>
@@ -4565,7 +4565,7 @@
     <paper id="6">
       <title>Error Detection and Recovery in Spoken Dialogue Systems</title>
       <author><first>Edward</first><last>Filisko</last></author>
-      <author><first>Stephanie</first><last>Seneff</last></author>
+      <author id="stephanie-seneff"><first>Stephanie</first><last>Seneff</last></author>
       <pages>31–38</pages>
       <url hash="7b3ffd85">W04-3006</url>
       <bibkey>filisko-seneff-2004-error</bibkey>
@@ -4573,14 +4573,14 @@
     <paper id="7">
       <title>Robustness Issues in a Data-Driven Spoken Language Understanding System</title>
       <author><first>Yulan</first><last>He</last></author>
-      <author><first>Steve</first><last>Young</last></author>
+      <author id="steve-young"><first>Steve</first><last>Young</last></author>
       <pages>39–46</pages>
       <url hash="b29d6c29">W04-3007</url>
       <bibkey>he-young-2004-robustness</bibkey>
     </paper>
     <paper id="8">
       <title>Invited Talk: Spoken Language Understanding: The Research/Industry Chasm</title>
-      <author><first>Roberto</first><last>Pieraccini</last></author>
+      <author id="roberto-pieraccini"><first>Roberto</first><last>Pieraccini</last></author>
       <pages>47–47</pages>
       <url hash="abd31915">W04-3008</url>
       <bibkey>pieraccini-2004-invited</bibkey>
@@ -4589,7 +4589,7 @@
       <title>Using Higher-level Linguistic Knowledge for Speech Recognition Error Correction in a Spoken <fixed-case>Q</fixed-case>/A Dialog</title>
       <author><first>Minwoo</first><last>Jeong</last></author>
       <author><first>Byeongchang</first><last>Kim</last></author>
-      <author><first>Gary Geunbae</first><last>Lee</last></author>
+      <author id="gary-geunbae-lee"><first>Gary Geunbae</first><last>Lee</last></author>
       <pages>48–55</pages>
       <url hash="bee71a9d">W04-3009</url>
       <bibkey>jeong-etal-2004-using</bibkey>
@@ -4613,7 +4613,7 @@
     <paper id="11">
       <title>Modeling Prosodic Consistency for Automatic Speech Recognition: Preliminary Investigations</title>
       <author><first>Ernest&gt;</first><last>Pusateri</last></author>
-      <author><first>James</first><last>Glass</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
       <pages>64–69</pages>
       <url hash="6047004d">W04-3011</url>
       <bibkey>pusateri-glass-2004-modeling</bibkey>
@@ -4650,10 +4650,10 @@
     </frontmatter>
     <paper id="1">
       <title>A Resource for Constructing Customized Test Suites for Molecular Biology Entity Identification Systems</title>
-      <author><first>K. Bretonnel</first><last>Cohen</last></author>
+      <author id="k-bretonnel-cohen"><first>K. Bretonnel</first><last>Cohen</last></author>
       <author><first>Lorraine</first><last>Tanabe</last></author>
       <author><first>Shuhei</first><last>Kinoshita</last></author>
-      <author><first>Lawrence</first><last>Hunter</last></author>
+      <author id="lawrence-hunter"><first>Lawrence</first><last>Hunter</last></author>
       <pages>1–8</pages>
       <url hash="195f1fb0">W04-3101</url>
       <bibkey>cohen-etal-2004-resource</bibkey>
@@ -4669,7 +4669,7 @@
     <paper id="3">
       <title>The Language of Bioscience: Facts, Speculations, and Statements In Between</title>
       <author><first>Marc</first><last>Light</last></author>
-      <author><first>Xin Ying</first><last>Qiu</last></author>
+      <author id="xin-ying-qiu"><first>Xin Ying</first><last>Qiu</last></author>
       <author><first>Padmini</first><last>Srinivasan</last></author>
       <pages>17–24</pages>
       <url hash="3d35479d">W04-3103</url>
@@ -4724,7 +4724,7 @@
     <paper id="10">
       <title>A Large Scale Terminology Resource for Biomedical Text Processing</title>
       <author><first>Henk</first><last>Harkema</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <author><first>Mark</first><last>Hepple</last></author>
       <author><first>Angus</first><last>Roberts</last></author>
       <author><first>Ian</first><last>Roberts</last></author>
@@ -4738,14 +4738,14 @@
       <title>Integrated Annotation for Biomedical Information Extraction</title>
       <author><first>Seth</first><last>Kulick</last></author>
       <author><first>Ann</first><last>Bies</last></author>
-      <author><first>Mark</first><last>Liberman</last></author>
+      <author id="mark-liberman"><first>Mark</first><last>Liberman</last></author>
       <author><first>Mark</first><last>Mandel</last></author>
       <author><first>Ryan</first><last>McDonald</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Andrew</first><last>Schein</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <author><first>Scott</first><last>Winters</last></author>
-      <author><first>Pete</first><last>White</last></author>
+      <author id="peter-white"><first>Pete</first><last>White</last></author>
       <pages>61–68</pages>
       <url hash="8d6d2c2d">W04-3111</url>
       <bibkey>kulick-etal-2004-integrated</bibkey>
@@ -4754,17 +4754,17 @@
       <title>Using Natural Language Processing, <fixed-case>L</fixed-case>ocus<fixed-case>L</fixed-case>ink and the Gene Ontology to Compare <fixed-case>OMIM</fixed-case> to <fixed-case>MEDLINE</fixed-case></title>
       <author><first>Bisharah</first><last>Libbus</last></author>
       <author><first>Halil</first><last>Kilicoglu</last></author>
-      <author><first>Thomas C.</first><last>Rindflesch</last></author>
-      <author><first>James G.</first><last>Mork</last></author>
-      <author><first>Alan R.</first><last>Aronson</last></author>
+      <author id="thomas-c-rindflesch"><first>Thomas C.</first><last>Rindflesch</last></author>
+      <author id="james-g-mork"><first>James G.</first><last>Mork</last></author>
+      <author id="alan-r-aronson"><first>Alan R.</first><last>Aronson</last></author>
       <pages>69–76</pages>
       <url hash="ef6e0296">W04-3112</url>
       <bibkey>libbus-etal-2004-using</bibkey>
     </paper>
     <paper id="13">
       <title>A Design Methodology for a Biomedical Literature Indexing Tool Using the Rhetoric of Science</title>
-      <author><first>Robert E.</first><last>Mercer</last></author>
-      <author><first>Chrysanne</first><last>Di Marco</last></author>
+      <author id="robert-e-mercer"><first>Robert E.</first><last>Mercer</last></author>
+      <author id="chrysanne-dimarco"><first>Chrysanne</first><last>Di Marco</last></author>
       <pages>77–84</pages>
       <url hash="d9f56105">W04-3113</url>
       <bibkey>mercer-di-marco-2004-design</bibkey>
@@ -4789,9 +4789,9 @@
       <title>Max-Margin Parsing</title>
       <author><first>Ben</first><last>Taskar</last></author>
       <author><first>Dan</first><last>Klein</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <author><first>Daphne</first><last>Koller</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <pages>1–8</pages>
       <url hash="24b6e1a5">W04-3201</url>
       <bibkey>taskar-etal-2004-max</bibkey>
@@ -4813,15 +4813,15 @@
     </paper>
     <paper id="4">
       <title>Unsupervised <fixed-case>WSD</fixed-case> based on Automatically Retrieved Examples: The Importance of Bias</title>
-      <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>David</first><last>Martinez</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
+      <author id="david-martinez"><first>David</first><last>Martinez</last></author>
       <pages>25–32</pages>
       <url hash="04396ea2">W04-3204</url>
       <bibkey>agirre-martinez-2004-unsupervised</bibkey>
     </paper>
     <paper id="5">
       <title><fixed-case>V</fixed-case>erb<fixed-case>O</fixed-case>cean: Mining the Web for Fine-Grained Semantic Verb Relations</title>
-      <author><first>Timothy</first><last>Chklovski</last></author>
+      <author id="timothy-chklovski"><first>Timothy</first><last>Chklovski</last></author>
       <author><first>Patrick</first><last>Pantel</last></author>
       <pages>33–40</pages>
       <url hash="eb4ff533">W04-3205</url>
@@ -4830,7 +4830,7 @@
     <paper id="6">
       <title>Scaling Web-based Acquisition of Entailment Relations</title>
       <author><first>Idan</first><last>Szpektor</last></author>
-      <author><first>Hristo</first><last>Tanev</last></author>
+      <author id="hristo-tanev"><first>Hristo</first><last>Tanev</last></author>
       <author><first>Ido</first><last>Dagan</last></author>
       <author><first>Bonaventura</first><last>Coppola</last></author>
       <pages>41–48</pages>
@@ -4839,8 +4839,8 @@
     </paper>
     <paper id="7">
       <title>Bilingual Parsing with Factored Estimation: Using <fixed-case>E</fixed-case>nglish to Parse <fixed-case>K</fixed-case>orean</title>
-      <author><first>David A.</first><last>Smith</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="david-a-smith"><first>David A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>49–56</pages>
       <url hash="0b0aa129">W04-3207</url>
       <bibkey>smith-smith-2004-bilingual</bibkey>
@@ -4856,9 +4856,9 @@
     <paper id="9">
       <title>Comparing and Combining Generative and Posterior Probability Models: Some Advances in Sentence Boundary Detection in Speech</title>
       <author id="yang-liu-icsi"><first>Yang</first><last>Liu</last></author>
-      <author><first>Andreas</first><last>Stolcke</last></author>
-      <author><first>Elizabeth</first><last>Shriberg</last></author>
-      <author><first>Mary</first><last>Harper</last></author>
+      <author id="andreas-stolcke"><first>Andreas</first><last>Stolcke</last></author>
+      <author id="elizabeth-shriberg"><first>Elizabeth</first><last>Shriberg</last></author>
+      <author id="mary-harper"><first>Mary</first><last>Harper</last></author>
       <pages>64–71</pages>
       <url hash="8f09790b">W04-3209</url>
       <bibkey>liu-etal-2004-comparing</bibkey>
@@ -4873,8 +4873,8 @@
     </paper>
     <paper id="11">
       <title>Mixing Weak Learners in Semantic Parsin</title>
-      <author><first>Rodney D.</first><last>Nielsen</last></author>
-      <author><first>Sameer</first><last>Pradhan</last></author>
+      <author id="rodney-nielsen"><first>Rodney D.</first><last>Nielsen</last></author>
+      <author id="sameer-pradhan"><first>Sameer</first><last>Pradhan</last></author>
       <pages>80–87</pages>
       <url hash="155372e2">W04-3211</url>
       <bibkey>nielsen-pradhan-2004-mixing</bibkey>
@@ -4882,14 +4882,14 @@
     <paper id="12">
       <title>Calibrating Features for Semantic Role Labeling</title>
       <author><first>Nianwen</first><last>Xue</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>88–94</pages>
       <url hash="1797edd4">W04-3212</url>
       <bibkey>xue-palmer-2004-calibrating</bibkey>
     </paper>
     <paper id="13">
       <title>Unsupervised Semantic Role Labellin</title>
-      <author><first>Robert S.</first><last>Swier</last></author>
+      <author id="robert-s-swier"><first>Robert S.</first><last>Swier</last></author>
       <author><first>Suzanne</first><last>Stevenson</last></author>
       <pages>95–102</pages>
       <url hash="17d3dd0c">W04-3213</url>
@@ -4897,8 +4897,8 @@
     </paper>
     <paper id="14">
       <title>The Influence of Argument Structure on Semantic Role Assignment</title>
-      <author><first>Sebastian</first><last>Padó</last></author>
-      <author><first>Gemma</first><last>Boleda</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
+      <author id="gemma-boleda"><first>Gemma</first><last>Boleda</last></author>
       <pages>103–110</pages>
       <url hash="f60a23ec">W04-3214</url>
       <bibkey>pado-boleda-2004-influence</bibkey>
@@ -4906,15 +4906,15 @@
     <paper id="15">
       <title>Object-Extraction and Question-Parsing using <fixed-case>CCG</fixed-case></title>
       <author><first>Stephen</first><last>Clark</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <pages>111–118</pages>
       <url hash="893f8213">W04-3215</url>
       <bibkey>clark-etal-2004-object</bibkey>
     </paper>
     <paper id="16">
       <title>A Phrase-Based <fixed-case>HMM</fixed-case> Approach to Document/Abstract Alignment</title>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <author><first>Daniel</first><last>Marcu</last></author>
       <pages>119–126</pages>
       <url hash="45e16090">W04-3216</url>
@@ -4923,7 +4923,7 @@
     <paper id="17">
       <title>Automatic Analysis of Plot for Story Rewriting</title>
       <author><first>Harry</first><last>Halpin</last></author>
-      <author><first>Johanna D.</first><last>Moore</last></author>
+      <author id="johanna-d-moore"><first>Johanna D.</first><last>Moore</last></author>
       <author><first>Judy</first><last>Robertson</last></author>
       <pages>127–133</pages>
       <url hash="a7f72a5d">W04-3217</url>
@@ -4931,9 +4931,9 @@
     </paper>
     <paper id="18">
       <title>Mining Spoken Dialogue Corpora for System Evaluation and Modelin</title>
-      <author><first>Frederic</first><last>Bechet</last></author>
+      <author id="frederic-bechet"><first>Frederic</first><last>Bechet</last></author>
       <author><first>Giuseppe</first><last>Riccardi</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></author>
       <pages>134–141</pages>
       <url hash="28a95543">W04-3218</url>
       <bibkey>bechet-etal-2004-mining</bibkey>
@@ -4942,7 +4942,7 @@
       <title>Monolingual Machine Translation for Paraphrase Generation</title>
       <author><first>Chris</first><last>Quirk</last></author>
       <author><first>Chris</first><last>Brockett</last></author>
-      <author><first>William</first><last>Dolan</last></author>
+      <author id="william-b-dolan"><first>William</first><last>Dolan</last></author>
       <pages>142–149</pages>
       <url hash="86024666">W04-3219</url>
       <bibkey>quirk-etal-2004-monolingual</bibkey>
@@ -4951,7 +4951,7 @@
       <title>Verb Sense and Subcategorization: Using Joint Inference to Improve Performance on Complementary Task</title>
       <author><first>Galen</first><last>Andrew</last></author>
       <author><first>Trond</first><last>Grenager</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <pages>150–157</pages>
       <url hash="cb46492a">W04-3220</url>
       <bibkey>andrew-etal-2004-verb</bibkey>
@@ -4959,7 +4959,7 @@
     <paper id="21">
       <title>Attribute-Based and Value-Based Clustering: An Evaluation</title>
       <author><first>Abdulrahman</first><last>Almuhareb</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>158–165</pages>
       <url hash="23a800bf">W04-3221</url>
       <bibkey>almuhareb-poesio-2004-attribute</bibkey>
@@ -4968,7 +4968,7 @@
       <title>The Leaf Path Projection View of Parse Trees: Exploring String Kernels for <fixed-case>HPSG</fixed-case> Parse Selection</title>
       <author><first>Kristina</first><last>Toutanova</last></author>
       <author><first>Penka</first><last>Markova</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <pages>166–173</pages>
       <url hash="9d3f8f0d">W04-3222</url>
       <bibkey>toutanova-etal-2004-leaf</bibkey>
@@ -4976,14 +4976,14 @@
     <paper id="23">
       <title>Incremental Feature Selection and l1 Regularization for Relaxed Maximum-Entropy Modeling</title>
       <author><first>Stefan</first><last>Riezler</last></author>
-      <author><first>Alexander</first><last>Vasserman</last></author>
+      <author id="alexander-vasserman"><first>Alexander</first><last>Vasserman</last></author>
       <pages>174–181</pages>
       <url hash="89b33aeb">W04-3223</url>
       <bibkey>riezler-vasserman-2004-incremental</bibkey>
     </paper>
     <paper id="24">
       <title>A Distributional Analysis of a Lexicalized Statistical Parsing Model</title>
-      <author><first>Daniel M.</first><last>Bikel</last></author>
+      <author id="daniel-m-bikel"><first>Daniel M.</first><last>Bikel</last></author>
       <pages>182–189</pages>
       <url hash="6d3bb6d8">W04-3224</url>
       <bibkey>bikel-2004-distributional</bibkey>
@@ -4992,7 +4992,7 @@
       <title>Adaptive Language and Translation Models for Interactive Machine Translation</title>
       <author><first>Laurent</first><last>Nepveu</last></author>
       <author><first>Guy</first><last>Lapalme</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <author><first>George</first><last>Foster</last></author>
       <pages>190–197</pages>
       <url hash="6b20882f">W04-3225</url>
@@ -5009,9 +5009,9 @@
     <paper id="27">
       <title>Phrase Pair Rescoring with Term Weighting for Statistical Machine Translation</title>
       <author><first>Bing</first><last>Zhao</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <author><first>Matthias</first><last>Eck</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>206–213</pages>
       <url hash="ab1466ab">W04-3227</url>
       <bibkey>zhao-etal-2004-phrase</bibkey>
@@ -5034,9 +5034,9 @@
     </paper>
     <paper id="30">
       <title>Applying Conditional Random Fields to <fixed-case>J</fixed-case>apanese Morphological Analysis</title>
-      <author><first>Taku</first><last>Kudo</last></author>
+      <author id="taku-kudo"><first>Taku</first><last>Kudo</last></author>
       <author><first>Kaoru</first><last>Yamamoto</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>230–237</pages>
       <url hash="c8b955a9">W04-3230</url>
       <bibkey>kudo-etal-2004-applying</bibkey>
@@ -5044,7 +5044,7 @@
     <paper id="31">
       <title>A Hybrid Model for Morpho-Syntactic Annotation of <fixed-case>G</fixed-case>erman with a Large Tagset</title>
       <author><first>Julia</first><last>Trushkina</last></author>
-      <author><first>Erhard</first><last>Hinrichs</last></author>
+      <author id="erhard-hinrichs"><first>Erhard</first><last>Hinrichs</last></author>
       <pages>238–245</pages>
       <url hash="1196757a">W04-3231</url>
       <bibkey>trushkina-hinrichs-2004-hybrid</bibkey>
@@ -5052,8 +5052,8 @@
     <paper id="32">
       <title>Identifying Broken Plurals in Unvowelised <fixed-case>A</fixed-case>rabic Tex</title>
       <author><first>Abduelbaset</first><last>Goweder</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
-      <author><first>Anne</first><last>De Roeck</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
+      <author id="anne-de-roeck"><first>Anne</first><last>De Roeck</last></author>
       <author><first>Jeff</first><last>Reynolds</last></author>
       <pages>246–253</pages>
       <url hash="2f2549e9">W04-3232</url>
@@ -5061,7 +5061,7 @@
     </paper>
     <paper id="33">
       <title><fixed-case>NP</fixed-case> Bracketing by Maximum Entropy Tagging and <fixed-case>SVM</fixed-case> Reranking</title>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <author><first>Daniel</first><last>Marcu</last></author>
       <pages>254–261</pages>
       <url hash="33e64314">W04-3233</url>
@@ -5069,15 +5069,15 @@
     </paper>
     <paper id="34">
       <title>Trained Named Entity Recognition using Distributional Clusters</title>
-      <author><first>Dayne</first><last>Freitag</last></author>
+      <author id="dayne-freitag"><first>Dayne</first><last>Freitag</last></author>
       <pages>262–269</pages>
       <url hash="636e1fe2">W04-3234</url>
       <bibkey>freitag-2004-trained</bibkey>
     </paper>
     <paper id="35">
       <title>Error Measures and <fixed-case>B</fixed-case>ayes Decision Rules Revisited with Applications to <fixed-case>POS</fixed-case> Tagging</title>
-      <author><first>Hermann</first><last>Ney</last></author>
-      <author><first>Maja</first><last>Popović</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
       <author><first>David</first><last>Sündermann</last></author>
       <pages>270–276</pages>
       <url hash="04943976">W04-3235</url>
@@ -5101,7 +5101,7 @@
     </paper>
     <paper id="38">
       <title>Spelling Correction as an Iterative Process that Exploits the Collective Knowledge of Web Users</title>
-      <author><first>Silviu</first><last>Cucerzan</last></author>
+      <author id="silviu-cucerzan"><first>Silviu</first><last>Cucerzan</last></author>
       <author><first>Eric</first><last>Brill</last></author>
       <pages>293–300</pages>
       <url hash="d3020fa8">W04-3238</url>
@@ -5109,17 +5109,17 @@
     </paper>
     <paper id="39">
       <title>A Boosting Algorithm for Classification of Semi-Structured Text</title>
-      <author><first>Taku</first><last>Kudo</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="taku-kudo"><first>Taku</first><last>Kudo</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>301–308</pages>
       <url hash="32837495">W04-3239</url>
       <bibkey>kudo-matsumoto-2004-boosting</bibkey>
     </paper>
     <paper id="40">
       <title>Learning to Classify Email into “Speech Acts”</title>
-      <author><first>William W.</first><last>Cohen</last></author>
-      <author><first>Vitor R.</first><last>Carvalho</last></author>
-      <author><first>Tom M.</first><last>Mitchell</last></author>
+      <author id="william-cohen"><first>William W.</first><last>Cohen</last></author>
+      <author id="vitor-carvalho"><first>Vitor R.</first><last>Carvalho</last></author>
+      <author id="tom-mitchell"><first>Tom M.</first><last>Mitchell</last></author>
       <pages>309–316</pages>
       <url hash="ad862e3d">W04-3240</url>
       <bibkey>cohen-etal-2004-learning</bibkey>
@@ -5134,14 +5134,14 @@
     <paper id="42">
       <title>Random Forests in Language Modelin</title>
       <author><first>Peng</first><last>Xu</last></author>
-      <author><first>Frederick</first><last>Jelinek</last></author>
+      <author id="frederick-jelinek"><first>Frederick</first><last>Jelinek</last></author>
       <pages>325–332</pages>
       <url hash="552eeebb">W04-3242</url>
       <bibkey>xu-jelinek-2004-random</bibkey>
     </paper>
     <paper id="43">
       <title>On Log-Likelihood-Ratios and the Significance of Rare Events</title>
-      <author><first>Robert C.</first><last>Moore</last></author>
+      <author id="robert-c-moore"><first>Robert C.</first><last>Moore</last></author>
       <pages>333–340</pages>
       <url hash="2ffcbb1a">W04-3243</url>
       <bibkey>moore-2004-log</bibkey>
@@ -5159,13 +5159,13 @@
       <title>From Machine Translation to Computer Assisted Translation using Finite-State Models</title>
       <author><first>Jorge</first><last>Civera</last></author>
       <author><first>Elsa</first><last>Cubel</last></author>
-      <author><first>Antonio L.</first><last>Lagarda</last></author>
-      <author><first>David</first><last>Picó</last></author>
+      <author id="antonio-l-lagarda"><first>Antonio L.</first><last>Lagarda</last></author>
+      <author id="david-pico"><first>David</first><last>Picó</last></author>
       <author><first>Jorge</first><last>González</last></author>
-      <author><first>Enrique</first><last>Vidal</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
-      <author><first>Juan M.</first><last>Vilar</last></author>
-      <author><first>Sergio</first><last>Barrachina</last></author>
+      <author id="enrique-vidal"><first>Enrique</first><last>Vidal</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="juan-miguel-vilar"><first>Juan M.</first><last>Vilar</last></author>
+      <author id="sergio-barrachina"><first>Sergio</first><last>Barrachina</last></author>
       <pages>349–356</pages>
       <url hash="13d48a9b">W04-3245</url>
       <bibkey>civera-etal-2004-machine</bibkey>
@@ -5181,8 +5181,8 @@
     </paper>
     <paper id="47">
       <title><fixed-case>L</fixed-case>ex<fixed-case>P</fixed-case>age<fixed-case>R</fixed-case>ank: Prestige in Multi-Document Text Summarization</title>
-      <author><first>Güneş</first><last>Erkan</last></author>
-      <author><first>Dragomir R.</first><last>Radev</last></author>
+      <author id="gunes-erkan"><first>Güneş</first><last>Erkan</last></author>
+      <author id="dragomir-radev"><first>Dragomir R.</first><last>Radev</last></author>
       <pages>365–371</pages>
       <url hash="f7c650fa">W04-3247</url>
       <bibkey>erkan-radev-2004-lexpagerank</bibkey>
@@ -5190,7 +5190,7 @@
     <paper id="48">
       <title>A New Approach for <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>hinese Named Entity Alignment</title>
       <author><first>Donghui</first><last>Feng</last></author>
-      <author><first>Yajuan</first><last>Lv</last></author>
+      <author id="yajuan-lu"><first>Yajuan</first><last>Lv</last></author>
       <author><first>Ming</first><last>Zhou</last></author>
       <pages>372–379</pages>
       <url hash="4c68088a">W04-3248</url>
@@ -5198,8 +5198,8 @@
     </paper>
     <paper id="49">
       <title>Unsupervised Domain Relevance Estimation for Word Sense Disambiguation</title>
-      <author><first>Alfio</first><last>Gliozzo</last></author>
-      <author><first>Bernardo</first><last>Magnini</last></author>
+      <author id="alfio-gliozzo"><first>Alfio</first><last>Gliozzo</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
       <author><first>Carlo</first><last>Strapparava</last></author>
       <pages>380–387</pages>
       <url hash="7873c82d">W04-3249</url>
@@ -5214,15 +5214,15 @@
     </paper>
     <paper id="51">
       <title>Instance-Based Question Answering: A Data-Driven Approach</title>
-      <author><first>Lucian Vlad</first><last>Lita</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
+      <author id="lucian-vlad-lita"><first>Lucian Vlad</first><last>Lita</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
       <pages>396–403</pages>
       <url hash="1a3e7568">W04-3251</url>
       <bibkey>lita-carbonell-2004-instance</bibkey>
     </paper>
     <paper id="52">
       <title><fixed-case>T</fixed-case>ext<fixed-case>R</fixed-case>ank: Bringing Order into Text</title>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <author><first>Paul</first><last>Tarau</last></author>
       <pages>404–411</pages>
       <url hash="774484f9">W04-3252</url>
@@ -5239,7 +5239,7 @@
     <paper id="54">
       <title>Evaluating Information Content by Factoid Analysis: Human annotation and stability</title>
       <author><first>Simone</first><last>Teufel</last></author>
-      <author><first>Hans</first><last>van Halteren</last></author>
+      <author id="hans-van-halteren"><first>Hans</first><last>van Halteren</last></author>
       <pages>419–426</pages>
       <url hash="e07e31e8">W04-3254</url>
       <bibkey>teufel-van-halteren-2004-evaluating</bibkey>
@@ -5256,7 +5256,7 @@
       <title>Multi-Document Biography Summarization</title>
       <author><first>Liang</first><last>Zhou</last></author>
       <author><first>Miruna</first><last>Ticrea</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>434–441</pages>
       <url hash="af6f30de">W04-3256</url>
       <bibkey>zhou-etal-2004-multi</bibkey>
@@ -5271,7 +5271,7 @@
     <meta>
       <booktitle>Proceedings of the 7th International Workshop on Tree Adjoining Grammar and Related Formalisms</booktitle>
       <url hash="169c8369">W04-33</url>
-      <editor><first>Owen</first><last>Rambow</last></editor>
+      <editor id="owen-rambow"><first>Owen</first><last>Rambow</last></editor>
       <editor><first>Matthew</first><last>Stone</last></editor>
       <publisher>Simon Fraser University</publisher>
       <address>Vancouver, Canada</address>
@@ -5306,7 +5306,7 @@
     </paper>
     <paper id="4">
       <title>N-Best Hidden <fixed-case>M</fixed-case>arkov Model Supertagging to Improve Typing on an Ambiguous Keyboard</title>
-      <author><first>Saša</first><last>Hasan</last></author>
+      <author id="sasa-hasan"><first>Saša</first><last>Hasan</last></author>
       <author><first>Karin</first><last>Harbusch</last></author>
       <pages>24–31</pages>
       <url hash="92a8fe24">W04-3304</url>
@@ -5369,7 +5369,7 @@
     </paper>
     <paper id="12">
       <title>Synchronous Grammars as Tree Transducers</title>
-      <author><first>Stuart M.</first><last>Shieber</last></author>
+      <author id="stuart-m-shieber"><first>Stuart M.</first><last>Shieber</last></author>
       <pages>88–95</pages>
       <url hash="18741900">W04-3312</url>
       <bibkey>shieber-2004-synchronous</bibkey>
@@ -5384,7 +5384,7 @@
     <paper id="14">
       <title>Generalizing Subcategorization Frames Acquired from Corpora Using Lexicalized Grammars</title>
       <author><first>Naoki</first><last>Yoshinaga</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>104–110</pages>
       <url hash="d2a3800e">W04-3314</url>
       <bibkey>yoshinaga-tsujii-2004-generalizing</bibkey>
@@ -5456,7 +5456,7 @@
       <title>Context-free Approximation of <fixed-case>LTAG</fixed-case> towards <fixed-case>CFG</fixed-case> Filtering</title>
       <author><first>Kenta</first><last>Oouchida</last></author>
       <author><first>Naoki</first><last>Yoshinaga</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>171–177</pages>
       <url hash="32f74b51">W04-3323</url>
       <bibkey>oouchida-etal-2004-context</bibkey>
@@ -5480,7 +5480,7 @@
     <paper id="26">
       <title>Assigning <fixed-case>XTAG</fixed-case> Trees to <fixed-case>V</fixed-case>erb<fixed-case>N</fixed-case>et</title>
       <author><first>Neville</first><last>Ryant</last></author>
-      <author><first>Karin</first><last>Kipper</last></author>
+      <author id="karin-kipper"><first>Karin</first><last>Kipper</last></author>
       <pages>194–198</pages>
       <url hash="890f3856">W04-3326</url>
       <bibkey>ryant-kipper-2004-assigning</bibkey>
diff --git a/data/xml/W05.xml b/data/xml/W05.xml
index 107b35091c..62398c669c 100644
--- a/data/xml/W05.xml
+++ b/data/xml/W05.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the Second <fixed-case>ACL</fixed-case> Workshop on Effective Tools and Methodologies for Teaching <fixed-case>NLP</fixed-case> and <fixed-case>CL</fixed-case></booktitle>
       <url hash="4c585a1e">W05-01</url>
       <editor><first>Chris</first><last>Brew</last></editor>
-      <editor><first>Dragomir</first><last>Radev</last></editor>
+      <editor id="dragomir-radev"><first>Dragomir</first><last>Radev</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Ann Arbor, Michigan</address>
       <month>June</month>
@@ -18,7 +18,7 @@
     </frontmatter>
     <paper id="1">
       <title>Teaching Applied Natural Language Processing: Triumphs and Tribulations</title>
-      <author><first>Marti</first><last>Hearst</last></author>
+      <author id="marti-a-hearst"><first>Marti</first><last>Hearst</last></author>
       <pages>1–8</pages>
       <url hash="28e555d6">W05-0101</url>
       <bibkey>hearst-2005-teaching</bibkey>
@@ -35,7 +35,7 @@
       <title>“Language and <fixed-case>C</fixed-case>omputers”: Creating an Introduction for a General Undergraduate Audience</title>
       <author><first>Chris</first><last>Brew</last></author>
       <author><first>Markus</first><last>Dickinson</last></author>
-      <author><first>W. Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>W. Detmar</first><last>Meurers</last></author>
       <pages>15–22</pages>
       <url hash="178a6aec">W05-0103</url>
       <bibkey>brew-etal-2005-language</bibkey>
@@ -75,8 +75,8 @@
       <title>Language Technology from a <fixed-case>E</fixed-case>uropean Perspective</title>
       <author><first>Hans</first><last>Uszkoreit</last></author>
       <author><first>Valia</first><last>Kordoni</last></author>
-      <author><first>Vladislav</first><last>Kubon</last></author>
-      <author><first>Michael</first><last>Rosner</last></author>
+      <author id="vladislav-kubon"><first>Vladislav</first><last>Kubon</last></author>
+      <author id="michael-rosner"><first>Michael</first><last>Rosner</last></author>
       <author><first>Sabine</first><last>Kirchmeier-Andersen</last></author>
       <pages>43–48</pages>
       <url hash="81a46319">W05-0108</url>
@@ -84,7 +84,7 @@
     </paper>
     <paper id="9">
       <title>Natural Language Processing at the <fixed-case>S</fixed-case>chool of <fixed-case>I</fixed-case>nformation <fixed-case>S</fixed-case>tudies for <fixed-case>A</fixed-case>frica</title>
-      <author><first>Björn</first><last>Gambäck</last></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gambäck</last></author>
       <author><first>Gunnar</first><last>Eriksson</last></author>
       <author><first>Athanassia</first><last>Fourla</last></author>
       <pages>49–56</pages>
@@ -94,7 +94,7 @@
     <paper id="10">
       <title>Teaching Language Technology at the <fixed-case>N</fixed-case>orth-<fixed-case>W</fixed-case>est <fixed-case>U</fixed-case>niversity</title>
       <author><first>Suléne</first><last>Pilon</last></author>
-      <author><first>Gerhard B</first><last>van Huyssteen</last></author>
+      <author id="gerhard-b-van-huyssteen"><first>Gerhard B</first><last>van Huyssteen</last></author>
       <author><first>Bertus</first><last>van Rooy</last></author>
       <pages>57–61</pages>
       <url hash="0179088c">W05-0110</url>
@@ -102,8 +102,8 @@
     </paper>
     <paper id="11">
       <title>Hands-On <fixed-case>NLP</fixed-case> for an Interdisciplinary Audience</title>
-      <author><first>Elizabeth</first><last>Liddy</last></author>
-      <author><first>Nancy</first><last>McCracken</last></author>
+      <author id="elizabeth-d-liddy"><first>Elizabeth</first><last>Liddy</last></author>
+      <author id="nancy-mccracken"><first>Nancy</first><last>McCracken</last></author>
       <pages>62–68</pages>
       <url hash="18e33130">W05-0111</url>
       <bibkey>liddy-mccracken-2005-hands</bibkey>
@@ -129,7 +129,7 @@
       <title>Applications of Lexical Information for Algorithmically Composing Multiple-Choice Cloze Items</title>
       <author><first>Chao-Lin</first><last>Liu</last></author>
       <author><first>Chun-Hung</first><last>Wang</last></author>
-      <author><first>Zhao-Ming</first><last>Gao</last></author>
+      <author id="zhao-ming-gao"><first>Zhao-Ming</first><last>Gao</last></author>
       <author><first>Shang-Ming</first><last>Huang</last></author>
       <pages>1–8</pages>
       <url hash="1fde8be9">W05-0201</url>
@@ -137,8 +137,8 @@
     </paper>
     <paper id="2">
       <title>Automatic Short Answer Marking</title>
-      <author><first>Stephen G.</first><last>Pulman</last></author>
-      <author><first>Jana Z.</first><last>Sukkarieh</last></author>
+      <author id="stephen-pulman"><first>Stephen G.</first><last>Pulman</last></author>
+      <author id="jana-sukkarieh"><first>Jana Z.</first><last>Sukkarieh</last></author>
       <pages>9–16</pages>
       <url hash="b4667313">W05-0202</url>
       <bibkey>pulman-sukkarieh-2005-automatic</bibkey>
@@ -154,7 +154,7 @@
     <paper id="4">
       <title>Predicting Learning in Tutoring with the Landscape Model of Memory</title>
       <author><first>Arthur</first><last>Ward</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <pages>21–24</pages>
       <url hash="51ab7db3">W05-0204</url>
       <bibkey>ward-litman-2005-predicting</bibkey>
@@ -179,7 +179,7 @@
     </paper>
     <paper id="7">
       <title>Using Syntactic Information to Identify Plagiarism</title>
-      <author><first>Özlem</first><last>Uzuner</last></author>
+      <author id="ozlem-uzuner"><first>Özlem</first><last>Uzuner</last></author>
       <author><first>Boris</first><last>Katz</last></author>
       <author><first>Thade</first><last>Nahnsen</last></author>
       <pages>37–44</pages>
@@ -193,7 +193,7 @@
       <author><first>Carol</first><last>Pai</last></author>
       <author><first>Regan</first><last>Carey</last></author>
       <author><first>Zachary</first><last>Zaiss</last></author>
-      <author><first>Carolyn</first><last>Rosé</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rosé</last></author>
       <pages>45–52</pages>
       <url hash="0df8a362">W05-0208</url>
       <bibkey>gweon-etal-2005-towards</bibkey>
@@ -213,7 +213,7 @@
     </paper>
     <paper id="10">
       <title>Measuring Non-native Speakers’ Proficiency of <fixed-case>E</fixed-case>nglish by Using a Test with Automatically-Generated Fill-in-the-Blank Questions</title>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Fumiaki</first><last>Sugaya</last></author>
       <author><first>Seiichi</first><last>Yamamoto</last></author>
       <pages>61–68</pages>
@@ -222,10 +222,10 @@
     </paper>
     <paper id="11">
       <title>Evaluating State-of-the-Art <fixed-case>T</fixed-case>reebank-style Parsers for <fixed-case>C</fixed-case>oh-<fixed-case>M</fixed-case>etrix and Other Learning Technology Environments</title>
-      <author><first>Christian F.</first><last>Hempelmann</last></author>
+      <author id="christian-f-hempelmann"><first>Christian F.</first><last>Hempelmann</last></author>
       <author><first>Vasile</first><last>Rus</last></author>
-      <author><first>Arthur C.</first><last>Graesser</last></author>
-      <author><first>Danielle S.</first><last>McNamara</last></author>
+      <author id="arthur-c-graesser"><first>Arthur C.</first><last>Graesser</last></author>
+      <author id="danielle-s-mcnamara"><first>Danielle S.</first><last>McNamara</last></author>
       <pages>69–76</pages>
       <url hash="305e2ef4">W05-0211</url>
       <bibkey>hempelmann-etal-2005-evaluating</bibkey>
@@ -240,7 +240,7 @@
     </paper>
     <paper id="13">
       <title>Situational Language Training for Hotel Receptionists</title>
-      <author><first>Frédérique</first><last>Segond</last></author>
+      <author id="frederique-segond"><first>Frédérique</first><last>Segond</last></author>
       <author><first>Thibault</first><last>Parmentier</last></author>
       <author><first>Roberta</first><last>Stock</last></author>
       <author><first>Ran</first><last>Rosner</last></author>
@@ -254,7 +254,7 @@
     <meta>
       <booktitle>Proceedings of the Workshop on Frontiers in Corpus Annotations <fixed-case>II</fixed-case>: Pie in the Sky</booktitle>
       <url hash="368139d3">W05-03</url>
-      <editor><first>Adam</first><last>Meyers</last></editor>
+      <editor id="adam-meyers"><first>Adam</first><last>Meyers</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Ann Arbor, Michigan</address>
       <month>June</month>
@@ -274,18 +274,18 @@
     </paper>
     <paper id="2">
       <title>Merging <fixed-case>P</fixed-case>rop<fixed-case>B</fixed-case>ank, <fixed-case>N</fixed-case>om<fixed-case>B</fixed-case>ank, <fixed-case>T</fixed-case>ime<fixed-case>B</fixed-case>ank, <fixed-case>P</fixed-case>enn <fixed-case>D</fixed-case>iscourse <fixed-case>T</fixed-case>reebank and Coreference</title>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <author><first>Adam</first><last>Meyers</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>5–12</pages>
       <url hash="061d3355">W05-0302</url>
       <bibkey>pustejovsky-etal-2005-merging</bibkey>
     </paper>
     <paper id="3">
       <title>A Unified Representation for Morphological, Syntactic, Semantic, and Referential Annotations</title>
-      <author><first>Erhard W.</first><last>Hinrichs</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="erhard-hinrichs"><first>Erhard W.</first><last>Hinrichs</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <author><first>Karin</first><last>Naumann</last></author>
       <pages>13–20</pages>
       <url hash="fff0001e">W05-0303</url>
@@ -306,8 +306,8 @@
       <author><first>Alan</first><last>Lee</last></author>
       <author><first>Eleni</first><last>Miltsakaki</last></author>
       <author><first>Rashmi</first><last>Prasad</last></author>
-      <author><first>Aravind</first><last>Joshi</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <pages>29–36</pages>
       <url hash="88a2b91b">W05-0305</url>
       <bibkey>dinesh-etal-2005-attribution</bibkey>
@@ -315,7 +315,7 @@
     <paper id="6">
       <title>Investigating the Characteristics of Causal Relations in <fixed-case>J</fixed-case>apanese Text</title>
       <author><first>Takashi</first><last>Inui</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>37–44</pages>
       <url hash="46b88259">W05-0306</url>
       <bibkey>inui-okumura-2005-investigating</bibkey>
@@ -324,8 +324,8 @@
       <title>A Framework for Annotating Information Structure in Discourse</title>
       <author><first>Sasha</first><last>Calhoun</last></author>
       <author><first>Malvina</first><last>Nissim</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
-      <author><first>Jason</first><last>Brenier</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
+      <author id="jason-brenier"><first>Jason</first><last>Brenier</last></author>
       <pages>45–52</pages>
       <url hash="8d42bc06">W05-0307</url>
       <bibkey>calhoun-etal-2005-framework</bibkey>
@@ -333,14 +333,14 @@
     <paper id="8">
       <title>Annotating Attributions and Private States</title>
       <author><first>Theresa</first><last>Wilson</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <pages>53–60</pages>
       <url hash="e8d6906e">W05-0308</url>
       <bibkey>wilson-wiebe-2005-annotating</bibkey>
     </paper>
     <paper id="9">
       <title>A Parallel <fixed-case>P</fixed-case>roposition <fixed-case>B</fixed-case>ank <fixed-case>II</fixed-case> for <fixed-case>C</fixed-case>hinese and <fixed-case>E</fixed-case>nglish</title>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Nianwen</first><last>Xue</last></author>
       <author><first>Olga</first><last>Babko-Malaya</last></author>
       <author><first>Jinying</first><last>Chen</last></author>
@@ -352,16 +352,16 @@
     <paper id="10">
       <title>Semantically Rich Human-Aided Machine Annotation</title>
       <author><first>Marjorie</first><last>McShane</last></author>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <author><first>Stephen</first><last>Beale</last></author>
-      <author><first>Thomas</first><last>O’Hara</last></author>
+      <author id="thomas-p-ohara"><first>Thomas</first><last>O’Hara</last></author>
       <pages>68–75</pages>
       <url hash="81336e76">W05-0310</url>
       <bibkey>mcshane-etal-2005-semantically</bibkey>
     </paper>
     <paper id="11">
       <title>The Reliability of Anaphoric Annotation, Reconsidered: Taking Ambiguity into Account</title>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <author><first>Ron</first><last>Artstein</last></author>
       <pages>76–83</pages>
       <url hash="9250632e">W05-0311</url>
@@ -379,7 +379,7 @@
     <meta>
       <booktitle>Proceedings of the <fixed-case>ACL</fixed-case> Workshop on Feature Engineering for Machine Learning in Natural Language Processing</booktitle>
       <url hash="14fd57be">W05-04</url>
-      <editor><first>Eric</first><last>Ringger</last></editor>
+      <editor id="eric-ringger"><first>Eric</first><last>Ringger</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Ann Arbor, Michigan</address>
       <month>June</month>
@@ -400,8 +400,8 @@
     </paper>
     <paper id="2">
       <title>Feature Engineering and Post-Processing for Temporal Expression Recognition Using Conditional Random Fields</title>
-      <author><first>Sisay</first><last>Fissaha Adafre</last></author>
-      <author><first>Maarten</first><last>de Rijke</last></author>
+      <author id="sisay-fissaha-adafre"><first>Sisay</first><last>Fissaha Adafre</last></author>
+      <author id="maarten-de-rijke"><first>Maarten</first><last>de Rijke</last></author>
       <pages>9–16</pages>
       <url hash="b78f068d">W05-0402</url>
       <bibkey>fissaha-adafre-de-rijke-2005-feature</bibkey>
@@ -416,8 +416,8 @@
     </paper>
     <paper id="4">
       <title>Using Semantic and Syntactic Graphs for Call Classification</title>
-      <author><first>Dilek</first><last>Hakkani-Tür</last></author>
-      <author><first>Gokhan</first><last>Tur</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tür</last></author>
+      <author id="gokhan-tur"><first>Gokhan</first><last>Tur</last></author>
       <author><first>Ananlada</first><last>Chotimongkol</last></author>
       <pages>24–31</pages>
       <url hash="befa0fe9">W05-0404</url>
@@ -426,7 +426,7 @@
     <paper id="5">
       <title>Feature-Based Segmentation of Narrative Documents</title>
       <author><first>David</first><last>Kauchak</last></author>
-      <author><first>Francine</first><last>Chen</last></author>
+      <author id="francine-chen"><first>Francine</first><last>Chen</last></author>
       <pages>32–39</pages>
       <url hash="3163f4af">W05-0405</url>
       <bibkey>kauchak-chen-2005-feature</bibkey>
@@ -435,7 +435,7 @@
       <title>Identifying Non-Referential it: A Machine Learning Approach Incorporating Linguistically Motivated Patterns</title>
       <author><first>Adriane</first><last>Boyd</last></author>
       <author><first>Whitney</first><last>Gegg-Harrison</last></author>
-      <author><first>Donna</first><last>Byron</last></author>
+      <author id="donna-byron"><first>Donna</first><last>Byron</last></author>
       <pages>40–47</pages>
       <url hash="66c864ab">W05-0406</url>
       <bibkey>boyd-etal-2005-identifying</bibkey>
@@ -445,7 +445,7 @@
       <author><first>Alessandro</first><last>Moschitti</last></author>
       <author><first>Bonaventura</first><last>Coppola</last></author>
       <author><first>Daniele</first><last>Pighin</last></author>
-      <author><first>Roberto</first><last>Basili</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
       <pages>48–56</pages>
       <url hash="e7e652bd">W05-0407</url>
       <bibkey>moschitti-etal-2005-engineering</bibkey>
@@ -461,7 +461,7 @@
     <paper id="9">
       <title>Studying Feature Generation from Various Data Representations for Answer Extraction</title>
       <author><first>Dan</first><last>Shen</last></author>
-      <author><first>Geert-Jan M.</first><last>Kruijff</last></author>
+      <author id="geert-jan-m-kruijff"><first>Geert-Jan M.</first><last>Kruijff</last></author>
       <author><first>Dietrich</first><last>Klakow</last></author>
       <pages>65–72</pages>
       <url hash="43d66eef">W05-0409</url>
@@ -504,7 +504,7 @@
     <paper id="3">
       <title>Using Morphology and Syntax Together in Unsupervised Learning</title>
       <author><first>Yu</first><last>Hu</last></author>
-      <author><first>Irina</first><last>Matveeva</last></author>
+      <author id="irina-matveeva"><first>Irina</first><last>Matveeva</last></author>
       <author><first>John</first><last>Goldsmith</last></author>
       <author><first>Colin</first><last>Sprague</last></author>
       <pages>20–27</pages>
@@ -514,7 +514,7 @@
     <paper id="4">
       <title>Refining the <fixed-case>SED</fixed-case> Heuristic for Morpheme Discovery: Another Look at <fixed-case>S</fixed-case>wahili</title>
       <author><first>Yu</first><last>Hu</last></author>
-      <author><first>Irina</first><last>Matveeva</last></author>
+      <author id="irina-matveeva"><first>Irina</first><last>Matveeva</last></author>
       <author><first>John</first><last>Goldsmith</last></author>
       <author><first>Colin</first><last>Sprague</last></author>
       <pages>28–35</pages>
@@ -524,7 +524,7 @@
     <paper id="5">
       <title>A Connectionist Model of Language-Scene Interaction</title>
       <author><first>Marshall R.</first><last>Mayberry</last></author>
-      <author><first>Matthew W.</first><last>Crocker</last></author>
+      <author id="matthew-crocker"><first>Matthew W.</first><last>Crocker</last></author>
       <author><first>Pia</first><last>Knoeferle</last></author>
       <pages>36–44</pages>
       <url hash="de2cbbbd">W05-0505</url>
@@ -547,7 +547,7 @@
     </paper>
     <paper id="8">
       <title>Statistics vs. <fixed-case>UG</fixed-case> in Language Acquisition: Does a Bigram Analysis Predict Auxiliary Inversion?</title>
-      <author><first>Xuân-Nga Cao</first><last>Kam</last></author>
+      <author id="xuan-nga-cao"><first>Xuân-Nga Cao</first><last>Kam</last></author>
       <author><first>Iglika</first><last>Stoyneshka</last></author>
       <author><first>Lidiya</first><last>Tornyova</last></author>
       <author><first>William Gregory</first><last>Sakas</last></author>
@@ -560,7 +560,7 @@
       <title>Climbing the Path to Grammar: A Maximum Entropy Model of Subject/Object Learning</title>
       <author><first>Felice</first><last>Dell’Orletta</last></author>
       <author><first>Alessandro</first><last>Lenci</last></author>
-      <author><first>Simonetta</first><last>Montemagni</last></author>
+      <author id="simonetta-montemagni"><first>Simonetta</first><last>Montemagni</last></author>
       <author><first>Vito</first><last>Pirrelli</last></author>
       <pages>72–81</pages>
       <url hash="a16eeca6">W05-0509</url>
@@ -600,7 +600,7 @@
     </frontmatter>
     <paper id="1">
       <title>Effective use of <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Semantics via Kernel-Based Learning</title>
-      <author><first>Roberto</first><last>Basili</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
       <author><first>Marco</first><last>Cammisa</last></author>
       <author><first>Alessandro</first><last>Moschitti</last></author>
       <pages>1–8</pages>
@@ -610,22 +610,22 @@
     <paper id="2">
       <title>A Statistical Semantic Parser that Integrates Syntax and Semantics</title>
       <author><first>Ruifang</first><last>Ge</last></author>
-      <author><first>Raymond</first><last>Mooney</last></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last></author>
       <pages>9–16</pages>
       <url hash="d91cafdb">W05-0602</url>
       <bibkey>ge-mooney-2005-statistical</bibkey>
     </paper>
     <paper id="3">
       <title>Search Engine Statistics Beyond the n-Gram: Application to Noun Compound Bracketing</title>
-      <author><first>Preslav</first><last>Nakov</last></author>
-      <author><first>Marti</first><last>Hearst</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
+      <author id="marti-a-hearst"><first>Marti</first><last>Hearst</last></author>
       <pages>17–24</pages>
       <url hash="0117a9ad">W05-0603</url>
       <bibkey>nakov-hearst-2005-search</bibkey>
     </paper>
     <paper id="4">
       <title>New Experiments in Distributional Representations of Synonymy</title>
-      <author><first>Dayne</first><last>Freitag</last></author>
+      <author id="dayne-freitag"><first>Dayne</first><last>Freitag</last></author>
       <author><first>Matthias</first><last>Blume</last></author>
       <author><first>John</first><last>Byrnes</last></author>
       <author><first>Edmond</first><last>Chow</last></author>
@@ -640,8 +640,8 @@
       <title>Word Independent Context Pair Classification Model for Word Sense Disambiguation</title>
       <author><first>Cheng</first><last>Niu</last></author>
       <author><first>Wei</first><last>Li</last></author>
-      <author><first>Rohini K.</first><last>Srihari</last></author>
-      <author><first>Huifeng</first><last>Li</last></author>
+      <author id="rohini-k-srihari"><first>Rohini K.</first><last>Srihari</last></author>
+      <author id="huifeng-li"><first>Huifeng</first><last>Li</last></author>
       <pages>33–39</pages>
       <url hash="86b10c9a">W05-0605</url>
       <bibkey>niu-etal-2005-word</bibkey>
@@ -657,15 +657,15 @@
     <paper id="7">
       <title>A <fixed-case>B</fixed-case>ayesian Mixture Model for Term Re-occurrence and Burstiness</title>
       <author><first>Avik</first><last>Sarkar</last></author>
-      <author><first>Paul H</first><last>Garthwaite</last></author>
-      <author><first>Anne</first><last>De Roeck</last></author>
+      <author id="paul-h-garthwaite"><first>Paul H</first><last>Garthwaite</last></author>
+      <author id="anne-de-roeck"><first>Anne</first><last>De Roeck</last></author>
       <pages>48–55</pages>
       <url hash="a26accfd">W05-0607</url>
       <bibkey>sarkar-etal-2005-bayesian</bibkey>
     </paper>
     <paper id="8">
       <title>Domain Kernels for Text Categorization</title>
-      <author><first>Alfio</first><last>Gliozzo</last></author>
+      <author id="alfio-gliozzo"><first>Alfio</first><last>Gliozzo</last></author>
       <author><first>Carlo</first><last>Strapparava</last></author>
       <pages>56–63</pages>
       <url hash="1304b4fa">W05-0608</url>
@@ -682,16 +682,16 @@
     <paper id="10">
       <title>Using Uneven Margins <fixed-case>SVM</fixed-case> and Perceptron for Information Extraction</title>
       <author><first>Yaoyong</first><last>Li</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
-      <author><first>Hamish</first><last>Cunningham</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="hamish-cunningham"><first>Hamish</first><last>Cunningham</last></author>
       <pages>72–79</pages>
       <url hash="238e79a6">W05-0610</url>
       <bibkey>li-etal-2005-using</bibkey>
     </paper>
     <paper id="11">
       <title>Improving Sequence Segmentation Learning by Predicting Trigrams</title>
-      <author><first>Antal</first><last>van den Bosch</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>80–87</pages>
       <url hash="893bd7a9">W05-0611</url>
       <bibkey>van-den-bosch-daelemans-2005-improving</bibkey>
@@ -715,14 +715,14 @@
     <paper id="14">
       <title>Intentional Context in Situated Natural Language Learning</title>
       <author><first>Michael</first><last>Fleischman</last></author>
-      <author><first>Deb</first><last>Roy</last></author>
+      <author id="deb-roy"><first>Deb</first><last>Roy</last></author>
       <pages>104–111</pages>
       <url hash="a6871783">W05-0614</url>
       <bibkey>fleischman-roy-2005-intentional</bibkey>
     </paper>
     <paper id="15">
       <title>Representational Bias in Unsupervised Learning of Syllable Structure</title>
-      <author><first>Sharon</first><last>Goldwater</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
       <author><first>Mark</first><last>Johnson</last></author>
       <pages>112–119</pages>
       <url hash="fae880dc">W05-0615</url>
@@ -738,7 +738,7 @@
     </paper>
     <paper id="17">
       <title>Morphology Induction from Term Clusters</title>
-      <author><first>Dayne</first><last>Freitag</last></author>
+      <author id="dayne-freitag"><first>Dayne</first><last>Freitag</last></author>
       <pages>128–135</pages>
       <url hash="6538e3f2">W05-0617</url>
       <bibkey>freitag-2005-morphology</bibkey>
@@ -754,7 +754,7 @@
     <paper id="19">
       <title>Investigating the Effects of Selective Sampling on the Annotation Task</title>
       <author><first>Ben</first><last>Hachey</last></author>
-      <author><first>Beatrice</first><last>Alex</last></author>
+      <author id="beatrice-alex"><first>Beatrice</first><last>Alex</last></author>
       <author><first>Markus</first><last>Becker</last></author>
       <pages>144–151</pages>
       <url hash="b9021ec0">W05-0619</url>
@@ -763,24 +763,24 @@
     <paper id="20">
       <title>Introduction to the <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>-2005 Shared Task: Semantic Role Labeling</title>
       <author><first>Xavier</first><last>Carreras</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <pages>152–164</pages>
       <url hash="77f9cf0e">W05-0620</url>
       <bibkey>carreras-marquez-2005-introduction</bibkey>
     </paper>
     <paper id="21">
       <title>Inferring Semantic Roles Using Sub-Categorization Frames and Maximum Entropy Model</title>
-      <author><first>Akshar</first><last>Bharati</last></author>
+      <author id="akshar-bharati"><first>Akshar</first><last>Bharati</last></author>
       <author><first>Sriram</first><last>Venkatapathy</last></author>
-      <author><first>Prashanth</first><last>Reddy</last></author>
+      <author id="prashanth-mannem"><first>Prashanth</first><last>Reddy</last></author>
       <pages>165–168</pages>
       <url hash="ccfff118">W05-0621</url>
       <bibkey>bharati-etal-2005-inferring</bibkey>
     </paper>
     <paper id="22">
       <title>Semantic Role Labelling with Tree Conditional Random Fields</title>
-      <author><first>Trevor</first><last>Cohn</last></author>
-      <author><first>Philip</first><last>Blunsom</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
+      <author id="phil-blunsom"><first>Philip</first><last>Blunsom</last></author>
       <pages>169–172</pages>
       <url hash="66e6c765">W05-0622</url>
       <bibkey>cohn-blunsom-2005-semantic</bibkey>
@@ -789,7 +789,7 @@
       <title>A Joint Model for Semantic Role Labeling</title>
       <author><first>Aria</first><last>Haghighi</last></author>
       <author><first>Kristina</first><last>Toutanova</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <pages>173–176</pages>
       <url hash="27a524d6">W05-0623</url>
       <bibkey>haghighi-etal-2005-joint</bibkey>
@@ -807,14 +807,14 @@
       <author><first>Peter</first><last>Koomen</last></author>
       <author><first>Vasin</first><last>Punyakanok</last></author>
       <author><first>Dan</first><last>Roth</last></author>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <pages>181–184</pages>
       <url hash="73f80251">W05-0625</url>
       <bibkey>koomen-etal-2005-generalized</bibkey>
     </paper>
     <paper id="26">
       <title>Semantic Role Labeling via Consensus in Pattern-Matching</title>
-      <author><first>Chi-San</first><last>Lin</last></author>
+      <author id="chi-san-althon-lin"><first>Chi-San</first><last>Lin</last></author>
       <author><first>Tony C.</first><last>Smith</last></author>
       <pages>185–188</pages>
       <url hash="9b45b4b8">W05-0626</url>
@@ -833,9 +833,9 @@
     </paper>
     <paper id="28">
       <title>Semantic Role Labeling as Sequential Tagging</title>
-      <author><first>Lluís</first><last>Màrquez</last></author>
-      <author><first>Pere</first><last>Comas</last></author>
-      <author><first>Jesús</first><last>Giménez</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
+      <author id="pere-comas"><first>Pere</first><last>Comas</last></author>
+      <author id="jesus-gimenez"><first>Jesús</first><last>Giménez</last></author>
       <author><first>Neus</first><last>Català</last></author>
       <pages>193–196</pages>
       <url hash="4c81796d">W05-0628</url>
@@ -857,7 +857,7 @@
       <author><first>Alessandro</first><last>Moschitti</last></author>
       <author><first>Ana-Maria</first><last>Giuglea</last></author>
       <author><first>Bonaventura</first><last>Coppola</last></author>
-      <author><first>Roberto</first><last>Basili</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
       <pages>201–204</pages>
       <url hash="d8f78327">W05-0630</url>
       <bibkey>moschitti-etal-2005-hierarchical</bibkey>
@@ -865,7 +865,7 @@
     <paper id="31">
       <title>Semantic Role Labeling Using lib<fixed-case>SVM</fixed-case></title>
       <author><first>Necati Ercan</first><last>Ozgencil</last></author>
-      <author><first>Nancy</first><last>McCracken</last></author>
+      <author id="nancy-mccracken"><first>Nancy</first><last>McCracken</last></author>
       <pages>205–208</pages>
       <url hash="0ebaa86b">W05-0631</url>
       <bibkey>ozgencil-mccracken-2005-semantic</bibkey>
@@ -873,14 +873,14 @@
     <paper id="32">
       <title>Maximum Entropy Based Semantic Role Labeling</title>
       <author><first>Kyung-Mi</first><last>Park</last></author>
-      <author><first>Hae-Chang</first><last>Rim</last></author>
+      <author id="hae-chang-rim"><first>Hae-Chang</first><last>Rim</last></author>
       <pages>209–212</pages>
       <url hash="d35ce57b">W05-0632</url>
       <bibkey>park-rim-2005-maximum</bibkey>
     </paper>
     <paper id="33">
       <title>Semantic Role Labeling Using Lexical Statistical Information</title>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <author><first>Michael</first><last>Strube</last></author>
       <pages>213–216</pages>
       <url hash="fda0e0c2">W05-0633</url>
@@ -888,11 +888,11 @@
     </paper>
     <paper id="34">
       <title>Semantic Role Chunking Combining Complementary Syntactic Views</title>
-      <author><first>Sameer</first><last>Pradhan</last></author>
-      <author><first>Kadri</first><last>Hacioglu</last></author>
-      <author><first>Wayne</first><last>Ward</last></author>
-      <author><first>James H.</first><last>Martin</last></author>
-      <author><first>Daniel</first><last>Jurafsky</last></author>
+      <author id="sameer-pradhan"><first>Sameer</first><last>Pradhan</last></author>
+      <author id="kadri-hacioglu"><first>Kadri</first><last>Hacioglu</last></author>
+      <author id="wayne-ward"><first>Wayne</first><last>Ward</last></author>
+      <author id="james-h-martin"><first>James H.</first><last>Martin</last></author>
+      <author id="dan-jurafsky"><first>Daniel</first><last>Jurafsky</last></author>
       <pages>217–220</pages>
       <url hash="43ae6f2e">W05-0634</url>
       <bibkey>pradhan-etal-2005-semantic</bibkey>
@@ -900,7 +900,7 @@
     <paper id="35">
       <title>Semantic Role Labeling Using Complete Syntactic Analysis</title>
       <author><first>Mihai</first><last>Surdeanu</last></author>
-      <author><first>Jordi</first><last>Turmo</last></author>
+      <author id="jordi-turmo"><first>Jordi</first><last>Turmo</last></author>
       <pages>221–224</pages>
       <url hash="99c46194">W05-0635</url>
       <bibkey>surdeanu-turmo-2005-semantic</bibkey>
@@ -915,9 +915,9 @@
     </paper>
     <paper id="37">
       <title>Applying Spelling Error Correction Techniques for Improving Semantic Role Labelling</title>
-      <author><first>Erik</first><last>Tjong Kim Sang</last></author>
+      <author id="erik-tjong-kim-sang"><first>Erik</first><last>Tjong Kim Sang</last></author>
       <author><first>Sander</first><last>Canisius</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <author><first>Toine</first><last>Bogers</last></author>
       <pages>229–232</pages>
       <url hash="5576bc95">W05-0637</url>
@@ -925,18 +925,18 @@
     </paper>
     <paper id="38">
       <title>Exploiting Full Parsing Information to Label Semantic Roles Using an Ensemble of <fixed-case>ME</fixed-case> and <fixed-case>SVM</fixed-case> via Integer Linear Programming</title>
-      <author><first>Tzong-Han</first><last>Tsai</last></author>
+      <author id="richard-tzong-han-tsai"><first>Tzong-Han</first><last>Tsai</last></author>
       <author><first>Chia-Wei</first><last>Wu</last></author>
       <author><first>Yu-Chun</first><last>Lin</last></author>
-      <author><first>Wen-Lian</first><last>Hsu</last></author>
+      <author id="wen-lian-hsu"><first>Wen-Lian</first><last>Hsu</last></author>
       <pages>233–236</pages>
       <url hash="f5fe6d51">W05-0638</url>
       <bibkey>tsai-etal-2005-exploiting</bibkey>
     </paper>
     <paper id="39">
       <title>The Integration of Syntactic Parsing and Semantic Role Labeling</title>
-      <author><first>Szu-ting</first><last>Yi</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="szu-ting-yi"><first>Szu-ting</first><last>Yi</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>237–240</pages>
       <url hash="291f00ce">W05-0639</url>
       <bibkey>yi-palmer-2005-integration</bibkey>
@@ -947,7 +947,7 @@
       <booktitle>Proceedings of the <fixed-case>ACL</fixed-case> Workshop on Computational Approaches to <fixed-case>S</fixed-case>emitic Languages</booktitle>
       <url hash="90b0f266">W05-07</url>
       <editor><first>Kareem</first><last>Darwish</last></editor>
-      <editor><first>Mona</first><last>Diab</last></editor>
+      <editor id="mona-diab"><first>Mona</first><last>Diab</last></editor>
       <editor><first>Nizar</first><last>Habash</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Ann Arbor, Michigan</address>
@@ -962,7 +962,7 @@
     <paper id="1">
       <title>Memory-Based Morphological Analysis Generation and Part-of-Speech Tagging of <fixed-case>A</fixed-case>rabic</title>
       <author><first>Erwin</first><last>Marsi</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <author><first>Abdelhadi</first><last>Soudi</last></author>
       <pages>1–8</pages>
       <url hash="2bc77ce6">W05-0701</url>
@@ -979,8 +979,8 @@
     <paper id="3">
       <title>Morphological Analysis and Generation for <fixed-case>A</fixed-case>rabic Dialects</title>
       <author><first>Nizar</first><last>Habash</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
-      <author><first>George</first><last>Kiraz</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
+      <author id="george-anton-kiraz"><first>George</first><last>Kiraz</last></author>
       <pages>17–24</pages>
       <url hash="938d2123">W05-0703</url>
       <bibkey>habash-etal-2005-morphological</bibkey>
@@ -988,7 +988,7 @@
     <paper id="4">
       <title>Examining the Effect of Improved Context Sensitive Morphology on <fixed-case>A</fixed-case>rabic Information Retrieval</title>
       <author><first>Kareem</first><last>Darwish</last></author>
-      <author><first>Hany</first><last>Hassan</last></author>
+      <author id="hany-hassan-awadalla"><first>Hany</first><last>Hassan</last></author>
       <author><first>Ossama</first><last>Emam</last></author>
       <pages>25–30</pages>
       <url hash="bcd89162">W05-0704</url>
@@ -998,7 +998,7 @@
       <title>Modifying a Natural Language Processing System for <fixed-case>E</fixed-case>uropean Languages to Treat <fixed-case>A</fixed-case>rabic in Information Processing and Information Retrieval Applications</title>
       <author><first>Gregory</first><last>Grefenstette</last></author>
       <author><first>Nasredine</first><last>Semmar</last></author>
-      <author><first>Faïza</first><last>Elkateb-Gara</last></author>
+      <author id="faiza-elkateb-gara"><first>Faïza</first><last>Elkateb-Gara</last></author>
       <pages>31–38</pages>
       <url hash="22ee897f">W05-0705</url>
       <bibkey>grefenstette-etal-2005-modifying</bibkey>
@@ -1006,7 +1006,7 @@
     <paper id="6">
       <title>Choosing an Optimal Architecture for Segmentation and <fixed-case>POS</fixed-case>-Tagging of <fixed-case>M</fixed-case>odern <fixed-case>H</fixed-case>ebrew</title>
       <author><first>Roy</first><last>Bar-Haim</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <author><first>Yoad</first><last>Winter</last></author>
       <pages>39–46</pages>
       <url hash="7758c6b5">W05-0706</url>
@@ -1014,7 +1014,7 @@
     </paper>
     <paper id="7">
       <title>Part of Speech Tagging for <fixed-case>A</fixed-case>mharic using Conditional Random Fields</title>
-      <author><first>Sisay</first><last>Fissaha Adafre</last></author>
+      <author id="sisay-fissaha-adafre"><first>Sisay</first><last>Fissaha Adafre</last></author>
       <pages>47–54</pages>
       <url hash="8afbdd39">W05-0707</url>
       <bibkey>fissaha-adafre-2005-part</bibkey>
@@ -1030,9 +1030,9 @@
     <paper id="9">
       <title>The Impact of Morphological Stemming on <fixed-case>A</fixed-case>rabic Mention Detection and Coreference Resolution</title>
       <author><first>Imed</first><last>Zitouni</last></author>
-      <author><first>Jeffrey</first><last>Sorensen</last></author>
-      <author><first>Xiaoqiang</first><last>Luo</last></author>
-      <author><first>Radu</first><last>Florian</last></author>
+      <author id="jeffrey-sorensen"><first>Jeffrey</first><last>Sorensen</last></author>
+      <author id="xiaoqiang-luo"><first>Xiaoqiang</first><last>Luo</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
       <pages>63–70</pages>
       <url hash="67fac2ad">W05-0709</url>
       <bibkey>zitouni-etal-2005-impact</bibkey>
@@ -1040,7 +1040,7 @@
     <paper id="10">
       <title>Classifying <fixed-case>A</fixed-case>mharic News Text Using Self-Organizing Maps</title>
       <author><first>Samuel</first><last>Eyassu</last></author>
-      <author><first>Björn</first><last>Gambäck</last></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gambäck</last></author>
       <pages>71–78</pages>
       <url hash="afd71e9e">W05-0710</url>
       <bibkey>eyassu-gamback-2005-classifying</bibkey>
@@ -1048,15 +1048,15 @@
     <paper id="11">
       <title><fixed-case>A</fixed-case>rabic Diacritization Using Weighted Finite-State Transducers</title>
       <author><first>Rani</first><last>Nelken</last></author>
-      <author><first>Stuart M.</first><last>Shieber</last></author>
+      <author id="stuart-m-shieber"><first>Stuart M.</first><last>Shieber</last></author>
       <pages>79–86</pages>
       <url hash="7004106a">W05-0711</url>
       <bibkey>nelken-shieber-2005-arabic</bibkey>
     </paper>
     <paper id="12">
       <title>An Integrated Approach for <fixed-case>A</fixed-case>rabic-<fixed-case>E</fixed-case>nglish Named Entity Translation</title>
-      <author><first>Hany</first><last>Hassan</last></author>
-      <author><first>Jeffrey</first><last>Sorensen</last></author>
+      <author id="hany-hassan-awadalla"><first>Hany</first><last>Hassan</last></author>
+      <author id="jeffrey-sorensen"><first>Jeffrey</first><last>Sorensen</last></author>
       <pages>87–93</pages>
       <url hash="26e1fe33">W05-0712</url>
       <bibkey>hassan-sorensen-2005-integrated</bibkey>
@@ -1068,7 +1068,7 @@
       <url hash="6c069b1a">W05-08</url>
       <editor><first>Philipp</first><last>Koehn</last></editor>
       <editor><first>Joel</first><last>Martin</last></editor>
-      <editor><first>Rada</first><last>Mihalcea</last></editor>
+      <editor id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></editor>
       <editor><first>Christof</first><last>Monz</last></editor>
       <editor><first>Ted</first><last>Pedersen</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -1083,14 +1083,14 @@
     </frontmatter>
     <paper id="1">
       <title>Association-Based Bilingual Word Alignment</title>
-      <author><first>Robert C.</first><last>Moore</last></author>
+      <author id="robert-c-moore"><first>Robert C.</first><last>Moore</last></author>
       <pages>1–8</pages>
       <url hash="9c7e7af3">W05-0801</url>
       <bibkey>moore-2005-association</bibkey>
     </paper>
     <paper id="2">
       <title>Cross Language Text Categorization by Acquiring Multilingual Domain Models from Comparable Corpora</title>
-      <author><first>Alfio</first><last>Gliozzo</last></author>
+      <author id="alfio-gliozzo"><first>Alfio</first><last>Gliozzo</last></author>
       <author><first>Carlo</first><last>Strapparava</last></author>
       <pages>9–16</pages>
       <url hash="3e59526d">W05-0802</url>
@@ -1106,8 +1106,8 @@
     <paper id="4">
       <title>Bilingual Word Spectral Clustering for Statistical Machine Translation</title>
       <author><first>Bing</first><last>Zhao</last></author>
-      <author><first>Eric P.</first><last>Xing</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="eric-xing"><first>Eric P.</first><last>Xing</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>25–32</pages>
       <url hash="4f844cbc">W05-0804</url>
       <bibkey>zhao-etal-2005-bilingual</bibkey>
@@ -1121,9 +1121,9 @@
     </paper>
     <paper id="6">
       <title>Augmenting a Small Parallel Text with Morpho-Syntactic Language</title>
-      <author><first>Maja</first><last>Popović</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
       <author><first>David</first><last>Vilar</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <author><first>Slobodan</first><last>Jovičić</last></author>
       <author><first>Zoran</first><last>Šarić</last></author>
       <pages>41–48</pages>
@@ -1132,7 +1132,7 @@
     </paper>
     <paper id="7">
       <title>Induction of Fine-Grained Part-of-Speech Taggers via Classifier Combination and Crosslingual Projection</title>
-      <author><first>Elliott</first><last>Drábek</last></author>
+      <author id="elliott-franco-drabek"><first>Elliott</first><last>Drábek</last></author>
       <author><first>David</first><last>Yarowsky</last></author>
       <pages>49–56</pages>
       <url hash="e6561a49">W05-0807</url>
@@ -1141,7 +1141,7 @@
     <paper id="8">
       <title>A Hybrid Approach to Align Sentences and Words in <fixed-case>E</fixed-case>nglish-<fixed-case>H</fixed-case>indi Parallel Corpora</title>
       <author><first>Niraj</first><last>Aswani</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <pages>57–64</pages>
       <url hash="c2de62d5">W05-0808</url>
       <bibkey>aswani-gaizauskas-2005-hybrid</bibkey>
@@ -1157,7 +1157,7 @@
     </paper>
     <paper id="10">
       <title><fixed-case>NUKTI</fixed-case>: <fixed-case>E</fixed-case>nglish-<fixed-case>I</fixed-case>nuktitut Word Alignment System Description</title>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <author><first>Fabrizio</first><last>Gotti</last></author>
       <author><first>Guihong</first><last>Cao</last></author>
       <pages>75–78</pages>
@@ -1167,7 +1167,7 @@
     <paper id="11">
       <title>Models for <fixed-case>I</fixed-case>nuktitut-<fixed-case>E</fixed-case>nglish Word Alignment</title>
       <author><first>Charles</first><last>Schafer</last></author>
-      <author><first>Elliott</first><last>Drábek</last></author>
+      <author id="elliott-franco-drabek"><first>Elliott</first><last>Drábek</last></author>
       <pages>79–82</pages>
       <url hash="1efc1492">W05-0811</url>
       <bibkey>schafer-drabek-2005-models</bibkey>
@@ -1182,17 +1182,17 @@
     </paper>
     <paper id="13">
       <title>Symmetric Probabilistic Alignment</title>
-      <author><first>Ralf D.</first><last>Brown</last></author>
+      <author id="ralf-d-brown"><first>Ralf D.</first><last>Brown</last></author>
       <author><first>Jae Dong</first><last>Kim</last></author>
-      <author><first>Peter J.</first><last>Jansen</last></author>
-      <author><first>Jaime G.</first><last>Carbonell</last></author>
+      <author id="peter-jansen"><first>Peter J.</first><last>Jansen</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime G.</first><last>Carbonell</last></author>
       <pages>87–90</pages>
       <url hash="443ea8c5">W05-0813</url>
       <bibkey>brown-etal-2005-symmetric</bibkey>
     </paper>
     <paper id="14">
       <title><fixed-case>ISI</fixed-case>‘s Participation in the <fixed-case>R</fixed-case>omanian-<fixed-case>E</fixed-case>nglish Alignment Task</title>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <author><first>Daniel</first><last>Marcu</last></author>
       <pages>91–94</pages>
       <url hash="c89f2bc2">W05-0814</url>
@@ -1200,14 +1200,14 @@
     </paper>
     <paper id="15">
       <title>Experiments Using <fixed-case>MAR</fixed-case> for Aligning Corpora</title>
-      <author><first>Juan Miguel</first><last>Vilar</last></author>
+      <author id="juan-miguel-vilar"><first>Juan Miguel</first><last>Vilar</last></author>
       <pages>95–98</pages>
       <url hash="2c1459f6">W05-0815</url>
       <bibkey>vilar-2005-experiments</bibkey>
     </paper>
     <paper id="16">
       <title>Comparison, Selection and Use of Sentence Alignment Algorithms for New Language Pairs</title>
-      <author><first>Anil Kumar</first><last>Singh</last></author>
+      <author id="anil-kumar-singh"><first>Anil Kumar</first><last>Singh</last></author>
       <author><first>Samar</first><last>Husain</last></author>
       <pages>99–106</pages>
       <url hash="5a622b26">W05-0816</url>
@@ -1215,10 +1215,10 @@
     </paper>
     <paper id="17">
       <title>Combined Word Alignments</title>
-      <author><first>Dan</first><last>Tufiş</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufiş</last></author>
       <author><first>Radu</first><last>Ion</last></author>
-      <author><first>Alexandru</first><last>Ceauşu</last></author>
-      <author><first>Dan</first><last>Ştefănescu</last></author>
+      <author id="alexandru-ceausu"><first>Alexandru</first><last>Ceauşu</last></author>
+      <author id="dan-stefanescu"><first>Dan</first><last>Ştefănescu</last></author>
       <pages>107–110</pages>
       <url hash="2f91cd4c">W05-0817</url>
       <bibkey>tufis-etal-2005-combined</bibkey>
@@ -1227,7 +1227,7 @@
       <title><fixed-case>LIHLA</fixed-case>: Shared Task System Description</title>
       <author><first>Helena M.</first><last>Caseli</last></author>
       <author><first>Maria G. V.</first><last>Nunes</last></author>
-      <author><first>Mikel L.</first><last>Forcada</last></author>
+      <author id="mikel-l-forcada"><first>Mikel L.</first><last>Forcada</last></author>
       <pages>111–114</pages>
       <url hash="98303b5c">W05-0818</url>
       <bibkey>caseli-etal-2005-lihla</bibkey>
@@ -1235,7 +1235,7 @@
     <paper id="19">
       <title>Aligning Words in <fixed-case>E</fixed-case>nglish-<fixed-case>H</fixed-case>indi Parallel Corpora</title>
       <author><first>Niraj</first><last>Aswani</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <pages>115–118</pages>
       <url hash="207f18e1">W05-0819</url>
       <bibkey>aswani-gaizauskas-2005-aligning</bibkey>
@@ -1271,18 +1271,18 @@
     </paper>
     <paper id="23">
       <title>Statistical Machine Translation of <fixed-case>E</fixed-case>uparl Data by using Bilingual N-grams</title>
-      <author><first>Rafael E.</first><last>Banchs</last></author>
-      <author><first>Josep M.</first><last>Crego</last></author>
-      <author><first>Adrià</first><last>de Gispert</last></author>
+      <author id="rafael-e-banchs"><first>Rafael E.</first><last>Banchs</last></author>
+      <author id="josep-m-crego"><first>Josep M.</first><last>Crego</last></author>
+      <author id="adria-de-gispert"><first>Adrià</first><last>de Gispert</last></author>
       <author><first>Patrik</first><last>Lambert</last></author>
-      <author><first>José B.</first><last>Mariño</last></author>
+      <author id="jose-b-marino"><first>José B.</first><last>Mariño</last></author>
       <pages>133–136</pages>
       <url hash="dbd5bc1c">W05-0823</url>
       <bibkey>banchs-etal-2005-statistical</bibkey>
     </paper>
     <paper id="24">
       <title><fixed-case>RALI</fixed-case>: <fixed-case>SMT</fixed-case> Shared Task System Description</title>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <author><first>Guihong</first><last>Cao</last></author>
       <author><first>Fabrizio</first><last>Gotti</last></author>
       <pages>137–140</pages>
@@ -1292,38 +1292,38 @@
     <paper id="25">
       <title>A Generalized Alignment-Free Phrase Extraction</title>
       <author><first>Bing</first><last>Zhao</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>141–144</pages>
       <url hash="144224b3">W05-0825</url>
       <bibkey>zhao-vogel-2005-generalized</bibkey>
     </paper>
     <paper id="26">
       <title>Combining Linguistic Data Views for Phrase-based <fixed-case>SMT</fixed-case></title>
-      <author><first>Jesús</first><last>Giménez</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="jesus-gimenez"><first>Jesús</first><last>Giménez</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <pages>145–148</pages>
       <url hash="8a8e5939">W05-0826</url>
       <bibkey>gimenez-marquez-2005-combining</bibkey>
     </paper>
     <paper id="27">
       <title>Improving Phrase-Based Statistical Translation by Modifying Phrase Extraction and Including Several Features</title>
-      <author><first>Marta</first><last>Ruiz Costa-jussà</last></author>
-      <author><first>José A. R.</first><last>Fonollosa</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta</first><last>Ruiz Costa-jussà</last></author>
+      <author id="jose-a-r-fonollosa"><first>José A. R.</first><last>Fonollosa</last></author>
       <pages>149–154</pages>
       <url hash="d7b47f3b">W05-0827</url>
       <bibkey>ruiz-costa-jussa-fonollosa-2005-improving</bibkey>
     </paper>
     <paper id="28">
       <title>First Steps towards Multi-Engine Machine Translation</title>
-      <author><first>Andreas</first><last>Eisele</last></author>
+      <author id="andreas-eisele"><first>Andreas</first><last>Eisele</last></author>
       <pages>155–158</pages>
       <url hash="2e57fdb0">W05-0828</url>
       <bibkey>eisele-2005-first</bibkey>
     </paper>
     <paper id="29">
       <title>Competitive Grouping in Integrated Phrase Segmentation and Alignment Model</title>
-      <author><first>Ying</first><last>Zhang</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="ying-zhang"><first>Ying</first><last>Zhang</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>159–162</pages>
       <url hash="37cd95a9">W05-0829</url>
       <bibkey>zhang-vogel-2005-competitive</bibkey>
@@ -1342,15 +1342,15 @@
       <author><first>David</first><last>Vilar</last></author>
       <author><first>Evgeny</first><last>Matusov</last></author>
       <author><first>Richard</first><last>Zens</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>167–174</pages>
       <url hash="3551207a">W05-0831</url>
       <bibkey>kanthak-etal-2005-novel</bibkey>
     </paper>
     <paper id="32">
       <title>Gaming Fluency: Evaluating the Bounds and Expectations of Segment-based Translation Memory</title>
-      <author><first>John</first><last>Henderson</last></author>
-      <author><first>William</first><last>Morgan</last></author>
+      <author id="john-henderson"><first>John</first><last>Henderson</last></author>
+      <author id="william-morgan"><first>William</first><last>Morgan</last></author>
       <pages>175–182</pages>
       <url hash="89be5bf6">W05-0832</url>
       <bibkey>henderson-morgan-2005-gaming</bibkey>
@@ -1366,15 +1366,15 @@
     <paper id="34">
       <title>Word Graphs for Statistical Machine Translation</title>
       <author><first>Richard</first><last>Zens</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>191–198</pages>
       <url hash="38b807d9">W05-0834</url>
       <bibkey>zens-ney-2005-word</bibkey>
     </paper>
     <paper id="35">
       <title>A Recursive Statistical Translation Model</title>
-      <author><first>Juan Miguel</first><last>Vilar</last></author>
-      <author><first>Enrique</first><last>Vidal</last></author>
+      <author id="juan-miguel-vilar"><first>Juan Miguel</first><last>Vilar</last></author>
+      <author id="enrique-vidal"><first>Enrique</first><last>Vidal</last></author>
       <pages>199–207</pages>
       <url hash="2412d1c1">W05-0835</url>
       <bibkey>vilar-vidal-2005-recursive</bibkey>
@@ -1383,7 +1383,7 @@
       <title>Training and Evaluating Error Minimization Decision Rules for Statistical Machine Translation</title>
       <author><first>Ashish</first><last>Venugopal</last></author>
       <author><first>Andreas</first><last>Zollmann</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>208–215</pages>
       <url hash="0d30a62b">W05-0836</url>
       <bibkey>venugopal-etal-2005-training</bibkey>
@@ -1393,10 +1393,10 @@
     <meta>
       <booktitle>Proceedings of the <fixed-case>ACL</fixed-case> Workshop on Intrinsic and Extrinsic Evaluation Measures for Machine Translation and/or Summarization</booktitle>
       <url hash="ae8a65cc">W05-09</url>
-      <editor><first>Jade</first><last>Goldstein</last></editor>
-      <editor><first>Alon</first><last>Lavie</last></editor>
-      <editor><first>Chin-Yew</first><last>Lin</last></editor>
-      <editor><first>Clare</first><last>Voss</last></editor>
+      <editor id="jade-goldstein"><first>Jade</first><last>Goldstein</last></editor>
+      <editor id="alon-lavie"><first>Alon</first><last>Lavie</last></editor>
+      <editor id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></editor>
+      <editor id="clare-voss"><first>Clare</first><last>Voss</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Ann Arbor, Michigan</address>
       <month>June</month>
@@ -1409,10 +1409,10 @@
     </frontmatter>
     <paper id="1">
       <title>A Methodology for Extrinsic Evaluation of Text Summarization: Does <fixed-case>ROUGE</fixed-case> Correlate?</title>
-      <author><first>Bonnie</first><last>Dorr</last></author>
+      <author id="bonnie-dorr"><first>Bonnie</first><last>Dorr</last></author>
       <author><first>Christof</first><last>Monz</last></author>
       <author><first>Stacy</first><last>President</last></author>
-      <author><first>Richard</first><last>Schwartz</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
       <author><first>David</first><last>Zajic</last></author>
       <pages>1–8</pages>
       <url hash="d08a0c2c">W05-0901</url>
@@ -1431,7 +1431,7 @@
       <author><first>Gregor</first><last>Leusch</last></author>
       <author><first>Nicola</first><last>Ueffing</last></author>
       <author><first>David</first><last>Vilar</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>17–24</pages>
       <url hash="97f13860">W05-0903</url>
       <bibkey>leusch-etal-2005-preprocessing</bibkey>
@@ -1449,7 +1449,7 @@
       <author><first>Gabriel</first><last>Murray</last></author>
       <author><first>Steve</first><last>Renals</last></author>
       <author><first>Jean</first><last>Carletta</last></author>
-      <author><first>Johanna</first><last>Moore</last></author>
+      <author id="johanna-d-moore"><first>Johanna</first><last>Moore</last></author>
       <pages>33–40</pages>
       <url hash="dcc225f8">W05-0905</url>
       <bibkey>murray-etal-2005-evaluating</bibkey>
@@ -1464,9 +1464,9 @@
     </paper>
     <paper id="7">
       <title>Evaluating <fixed-case>DUC</fixed-case> 2004 Tasks with the <fixed-case>QARLA</fixed-case> Framework</title>
-      <author><first>Enrique</first><last>Amigó</last></author>
+      <author id="enrique-amigo"><first>Enrique</first><last>Amigó</last></author>
       <author><first>Julio</first><last>Gonzalo</last></author>
-      <author><first>Anselmo</first><last>Peñas</last></author>
+      <author id="anselmo-penas"><first>Anselmo</first><last>Peñas</last></author>
       <author><first>Felisa</first><last>Verdejo</last></author>
       <pages>49–56</pages>
       <url hash="1e9d751d">W05-0907</url>
@@ -1475,7 +1475,7 @@
     <paper id="8">
       <title>On Some Pitfalls in Automatic Evaluation and Significance Testing for <fixed-case>MT</fixed-case></title>
       <author><first>Stefan</first><last>Riezler</last></author>
-      <author><first>John T.</first><last>Maxwell</last></author>
+      <author id="john-t-maxwell-iii"><first>John T.</first><last>Maxwell</last></author>
       <pages>57–64</pages>
       <url hash="38461b6e">W05-0908</url>
       <bibkey>riezler-maxwell-2005-pitfalls</bibkey>
@@ -1493,7 +1493,7 @@
     <meta>
       <booktitle>Proceedings of the <fixed-case>ACL</fixed-case>-<fixed-case>SIGLEX</fixed-case> Workshop on Deep Lexical Acquisition</booktitle>
       <url hash="758cf3af">W05-10</url>
-      <editor><first>Timothy</first><last>Baldwin</last></editor>
+      <editor id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></editor>
       <editor><first>Anna</first><last>Korhonen</last></editor>
       <editor><first>Aline</first><last>Villavicencio</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -1508,8 +1508,8 @@
     </frontmatter>
     <paper id="1">
       <title>Data Homogeneity and Semantic Role Tagging in <fixed-case>C</fixed-case>hinese</title>
-      <author><first>Oi Yee</first><last>Kwong</last></author>
-      <author><first>Benjamin K.</first><last>Tsou</last></author>
+      <author id="olivia-o-y-kwong"><first>Oi Yee</first><last>Kwong</last></author>
+      <author id="benjamin-k-tsou"><first>Benjamin K.</first><last>Tsou</last></author>
       <pages>1–9</pages>
       <url hash="4550bd87">W05-1001</url>
       <bibkey>kwong-tsou-2005-data</bibkey>
@@ -1517,14 +1517,14 @@
     <paper id="2">
       <title>Verb Subcategorization Kernels for Automatic Semantic Labeling</title>
       <author><first>Alessandro</first><last>Moschitti</last></author>
-      <author><first>Roberto</first><last>Basili</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
       <pages>10–17</pages>
       <url hash="a854adcb">W05-1002</url>
       <bibkey>moschitti-basili-2005-verb</bibkey>
     </paper>
     <paper id="3">
       <title>Identifying Concept Attributes Using a Classifier</title>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <author><first>Abdulrahman</first><last>Almuhareb</last></author>
       <pages>18–27</pages>
       <url hash="0375c075">W05-1003</url>
@@ -1532,7 +1532,7 @@
     </paper>
     <paper id="4">
       <title>Automatically Learning Qualia Structures from the Web</title>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <author><first>Johanna</first><last>Wenderoth</last></author>
       <pages>28–37</pages>
       <url hash="cc94b10f">W05-1004</url>
@@ -1558,7 +1558,7 @@
     <paper id="7">
       <title>Frame Semantic Enhancement of Lexical-Semantic Resources</title>
       <author><first>Rebecca</first><last>Green</last></author>
-      <author><first>Bonnie</first><last>Dorr</last></author>
+      <author id="bonnie-dorr"><first>Bonnie</first><last>Dorr</last></author>
       <pages>57–66</pages>
       <url hash="e96bc395">W05-1007</url>
       <bibkey>green-dorr-2005-frame</bibkey>
@@ -1572,17 +1572,17 @@
     </paper>
     <paper id="9">
       <title>Morphology vs. Syntax in Adjective Class Acquisition</title>
-      <author><first>Gemma</first><last>Boleda</last></author>
+      <author id="gemma-boleda"><first>Gemma</first><last>Boleda</last></author>
       <author><first>Toni</first><last>Badia</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>77–86</pages>
       <url hash="11af80c7">W05-1009</url>
       <bibkey>boleda-etal-2005-morphology</bibkey>
     </paper>
     <paper id="10">
       <title>Automatic Acquisition of Bilingual Rules for Extraction of Bilingual Word Pairs from Parallel Corpora</title>
-      <author><first>Hiroshi</first><last>Echizen-ya</last></author>
-      <author><first>Kenji</first><last>Araki</last></author>
+      <author id="hiroshi-echizen-ya"><first>Hiroshi</first><last>Echizen-ya</last></author>
+      <author id="kenji-araki"><first>Kenji</first><last>Araki</last></author>
       <author><first>Yoshio</first><last>Momouchi</last></author>
       <pages>87–96</pages>
       <url hash="aeeb1829">W05-1010</url>
@@ -1591,7 +1591,7 @@
     <paper id="11">
       <title>Approximate Searching for Distributional Similarity</title>
       <author><first>James</first><last>Gorman</last></author>
-      <author><first>James</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James</first><last>Curran</last></author>
       <pages>97–104</pages>
       <url hash="caf8e29a">W05-1011</url>
       <bibkey>gorman-curran-2005-approximate</bibkey>
@@ -1613,7 +1613,7 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>T</fixed-case>ext<fixed-case>T</fixed-case>ree Construction for Parser and Treebank Development</title>
-      <author><first>Paula S.</first><last>Newman</last></author>
+      <author id="paula-newman"><first>Paula S.</first><last>Newman</last></author>
       <pages>1–13</pages>
       <url hash="9ed143bf">W05-1101</url>
       <bibkey>newman-2005-texttree</bibkey>
@@ -1628,14 +1628,14 @@
     </paper>
     <paper id="3">
       <title>Interleaved Preparation and Output in the <fixed-case>COMIC</fixed-case> Fission Module</title>
-      <author><first>Mary Ellen</first><last>Foster</last></author>
+      <author id="mary-ellen-foster"><first>Mary Ellen</first><last>Foster</last></author>
       <pages>34–46</pages>
       <url hash="0ec70159">W05-1103</url>
       <bibkey>foster-2005-interleaved</bibkey>
     </paper>
     <paper id="4">
       <title>Designing an Extensible <fixed-case>API</fixed-case> for Integrating Language Modeling and Realization</title>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <pages>47-64</pages>
       <url hash="7d10c62f">W05-1104</url>
       <bibkey>white-2005-designing</bibkey>
@@ -1652,10 +1652,10 @@
       <title><fixed-case>H</fixed-case>unmorph: Open Source Word Analysis</title>
       <author><first>Viktor</first><last>Trón</last></author>
       <author><first>Gyögy</first><last>Gyepesi</last></author>
-      <author><first>Péter</first><last>Halácsky</last></author>
+      <author id="peter-halacsy"><first>Péter</first><last>Halácsky</last></author>
       <author><first>András</first><last>Kornai</last></author>
       <author><first>László</first><last>Németh</last></author>
-      <author><first>Dániel</first><last>Varga</last></author>
+      <author id="daniel-varga"><first>Dániel</first><last>Varga</last></author>
       <pages>77–85</pages>
       <url hash="36aa9b11">W05-1106</url>
       <bibkey>tron-etal-2005-hunmorph</bibkey>
@@ -1669,7 +1669,7 @@
     </paper>
     <paper id="8">
       <title><fixed-case>XFST</fixed-case>2<fixed-case>FSA</fixed-case>: Comparing Two Finite-State Toolboxes</title>
-      <author><first>Yael</first><last>Cohen-Sygal</last></author>
+      <author id="yael-cohen-sygal"><first>Yael</first><last>Cohen-Sygal</last></author>
       <author><first>Shuly</first><last>Wintner</last></author>
       <pages>100–117</pages>
       <url hash="f7a40eda">W05-1108</url>
@@ -1685,7 +1685,7 @@
     <meta>
       <booktitle>Proceedings of the <fixed-case>ACL</fixed-case> Workshop on Empirical Modeling of Semantic Equivalence and Entailment</booktitle>
       <url hash="dcb3ca9e">W05-12</url>
-      <editor><first>Bill</first><last>Dolan</last></editor>
+      <editor id="william-b-dolan"><first>Bill</first><last>Dolan</last></editor>
       <editor><first>Ido</first><last>Dagan</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Ann Arbor, Michigan</address>
@@ -1700,7 +1700,7 @@
     <paper id="1">
       <title>Classification of Semantic Relations by Humans and Machines</title>
       <author><first>Erwin</first><last>Marsi</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <pages>1–6</pages>
       <url hash="de4f714f">W05-1201</url>
       <bibkey>marsi-krahmer-2005-classification</bibkey>
@@ -1708,7 +1708,7 @@
     <paper id="2">
       <title>The Distributional Similarity of Sub-Parses</title>
       <author><first>Julie</first><last>Weeds</last></author>
-      <author><first>David</first><last>Weir</last></author>
+      <author id="david-weir"><first>David</first><last>Weir</last></author>
       <author><first>Bill</first><last>Keller</last></author>
       <pages>7–12</pages>
       <url hash="26cfe31f">W05-1202</url>
@@ -1716,15 +1716,15 @@
     </paper>
     <paper id="3">
       <title>Measuring the Semantic Similarity of Texts</title>
-      <author><first>Courtney</first><last>Corley</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="courtney-d-corley"><first>Courtney</first><last>Corley</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>13–18</pages>
       <url hash="9f3e8f3f">W05-1203</url>
       <bibkey>corley-mihalcea-2005-measuring</bibkey>
     </paper>
     <paper id="4">
       <title>Training Data Modification for <fixed-case>SMT</fixed-case> Considering Groups of Synonymous Sentences</title>
-      <author><first>Hideki</first><last>Kashioka</last></author>
+      <author id="hideki-kashioka"><first>Hideki</first><last>Kashioka</last></author>
       <pages>19–24</pages>
       <url hash="57f80bc9">W05-1204</url>
       <bibkey>kashioka-2005-training</bibkey>
@@ -1747,8 +1747,8 @@
     </paper>
     <paper id="7">
       <title>Discovering Entailment Relations Using “Textual Entailment Patterns”</title>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last></author>
-      <author><first>Maria Teresa</first><last>Pazienza</last></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last></author>
+      <author id="maria-teresa-pazienza"><first>Maria Teresa</first><last>Pazienza</last></author>
       <author><first>Marco</first><last>Pennacchiotti</last></author>
       <pages>37–42</pages>
       <url hash="064efc65">W05-1207</url>
@@ -1784,10 +1784,10 @@
     <meta>
       <booktitle>Proceedings of the <fixed-case>ACL</fixed-case>-<fixed-case>ISMB</fixed-case> Workshop on Linking Biological Literature, Ontologies and Databases: Mining Biological Semantics</booktitle>
       <url hash="156516e5">W05-13</url>
-      <editor><first>K. Bretonnel</first><last>Cohen</last></editor>
-      <editor><first>Lynette</first><last>Hirschman</last></editor>
+      <editor id="k-bretonnel-cohen"><first>K. Bretonnel</first><last>Cohen</last></editor>
+      <editor id="lynette-hirschman"><first>Lynette</first><last>Hirschman</last></editor>
       <editor><first>Hagit</first><last>Shatkay</last></editor>
-      <editor><first>Christian</first><last>Blaschke</last></editor>
+      <editor id="christian-blaschke"><first>Christian</first><last>Blaschke</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Detroit</address>
       <month>June</month>
@@ -1800,16 +1800,16 @@
     </frontmatter>
     <paper id="1">
       <title>Weakly Supervised Learning Methods for Improving the Quality of Gene Name Normalization Data</title>
-      <author><first>Ben</first><last>Wellner</last></author>
+      <author id="ben-wellner"><first>Ben</first><last>Wellner</last></author>
       <pages>1–8</pages>
       <url hash="0a5caf32">W05-1301</url>
       <bibkey>wellner-2005-weakly</bibkey>
     </paper>
     <paper id="2">
       <title>Adaptive String Similarity Metrics for Biomedical Reference Resolution</title>
-      <author><first>Ben</first><last>Wellner</last></author>
-      <author><first>José</first><last>Castaño</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="ben-wellner"><first>Ben</first><last>Wellner</last></author>
+      <author id="jose-castano"><first>José</first><last>Castaño</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <pages>9–16</pages>
       <url hash="f2546dc1">W05-1302</url>
       <bibkey>wellner-etal-2005-adaptive</bibkey>
@@ -1825,7 +1825,7 @@
       <title>A Machine Learning Approach to Acronym Generation</title>
       <author><first>Yoshimasa</first><last>Tsuruoka</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>25–31</pages>
       <url hash="5164116b">W05-1304</url>
       <bibkey>tsuruoka-etal-2005-machine</bibkey>
@@ -1834,7 +1834,7 @@
       <title><fixed-case>M</fixed-case>ed<fixed-case>T</fixed-case>ag: A Collection of Biomedical Annotations</title>
       <author><first>Lawrence H.</first><last>Smith</last></author>
       <author><first>Lorraine</first><last>Tanabe</last></author>
-      <author><first>Thomas</first><last>Rindflesch</last></author>
+      <author id="thomas-c-rindflesch"><first>Thomas</first><last>Rindflesch</last></author>
       <author><first>W. John</first><last>Wilbur</last></author>
       <pages>32–37</pages>
       <url hash="ef81644b">W05-1305</url>
@@ -1844,8 +1844,8 @@
       <title>Corpus Design for Biomedical Natural Language Processing</title>
       <author><first>K. Bretonnel</first><last>Cohen</last></author>
       <author><first>Lynne</first><last>Fox</last></author>
-      <author><first>Philip V.</first><last>Ogren</last></author>
-      <author><first>Lawrence</first><last>Hunter</last></author>
+      <author id="philip-ogren"><first>Philip V.</first><last>Ogren</last></author>
+      <author id="lawrence-hunter"><first>Lawrence</first><last>Hunter</last></author>
       <pages>38–45</pages>
       <url hash="06a9615f">W05-1306</url>
       <bibkey>cohen-etal-2005-corpus</bibkey>
@@ -1853,8 +1853,8 @@
     <paper id="7">
       <title>Using Biomedical Literature Mining to Consolidate the Set of Known Human Protein-Protein Interactions</title>
       <author><first>Arun</first><last>Ramani</last></author>
-      <author><first>Razvan</first><last>Bunescu</last></author>
-      <author><first>Raymond</first><last>Mooney</last></author>
+      <author id="razvan-bunescu"><first>Razvan</first><last>Bunescu</last></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last></author>
       <author><first>Edward</first><last>Marcotte</last></author>
       <pages>46–53</pages>
       <url hash="2002ced7">W05-1307</url>
@@ -1875,8 +1875,8 @@
     <meta>
       <booktitle>Proceedings of the Ninth International Workshop on Parsing Technology</booktitle>
       <url hash="d25ce8c8">W05-15</url>
-      <editor><first>Harry</first><last>Bunt</last></editor>
-      <editor><first>Robert</first><last>Malouf</last></editor>
+      <editor id="harry-bunt"><first>Harry</first><last>Bunt</last></editor>
+      <editor id="robert-malouf"><first>Robert</first><last>Malouf</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Vancouver, British Columbia</address>
       <month>October</month>
@@ -1890,7 +1890,7 @@
     <paper id="1">
       <title>Efficient and Robust <fixed-case>LFG</fixed-case> Parsing: <fixed-case>S</fixed-case>x<fixed-case>LFG</fixed-case></title>
       <author><first>Pierre</first><last>Boullier</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <pages>1–10</pages>
       <url hash="0ee215dd">W05-1501</url>
       <bibkey>boullier-sagot-2005-efficient</bibkey>
@@ -1898,7 +1898,7 @@
     <paper id="2">
       <title>Parsing Linear Context-Free Rewriting Systems</title>
       <author><first>Håkan</first><last>Burden</last></author>
-      <author><first>Peter</first><last>Ljunglöf</last></author>
+      <author id="peter-ljunglof"><first>Peter</first><last>Ljunglöf</last></author>
       <pages>11–17</pages>
       <url hash="c8c509d1">W05-1502</url>
       <bibkey>burden-ljunglof-2005-parsing</bibkey>
@@ -1913,15 +1913,15 @@
     </paper>
     <paper id="4">
       <title>Parsing with Soft and Hard Constraints on Dependency Length</title>
-      <author><first>Jason</first><last>Eisner</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>30–41</pages>
       <url hash="db7527c2">W05-1504</url>
       <bibkey>eisner-smith-2005-parsing</bibkey>
     </paper>
     <paper id="5">
       <title>Corrective Modeling for Non-Projective Dependency Parsing</title>
-      <author><first>Keith</first><last>Hall</last></author>
+      <author id="keith-hall"><first>Keith</first><last>Hall</last></author>
       <author><first>Václav</first><last>Novák</last></author>
       <pages>42–52</pages>
       <url hash="c7269725">W05-1505</url>
@@ -1955,7 +1955,7 @@
     </paper>
     <paper id="9">
       <title>Lexical and Structural Biases for Function Parsing</title>
-      <author><first>Gabriele</first><last>Musillo</last></author>
+      <author id="gabriele-musillo"><first>Gabriele</first><last>Musillo</last></author>
       <author><first>Paola</first><last>Merlo</last></author>
       <pages>83–92</pages>
       <url hash="f8d25014">W05-1509</url>
@@ -1965,7 +1965,7 @@
       <title>Probabilistic Models for Disambiguation of an <fixed-case>HPSG</fixed-case>-Based Chart Generator</title>
       <author><first>Hiroko</first><last>Nakanishi</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>93–102</pages>
       <url hash="1c7d21cb">W05-1510</url>
       <bibkey>nakanishi-etal-2005-probabilistic</bibkey>
@@ -1975,7 +1975,7 @@
       <author><first>Takashi</first><last>Ninomiya</last></author>
       <author><first>Yoshimasa</first><last>Tsuruoka</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>103–114</pages>
       <url hash="22637208">W05-1511</url>
       <bibkey>ninomiya-etal-2005-efficacy</bibkey>
@@ -1990,7 +1990,7 @@
     <paper id="13">
       <title>A Classifier-Based Parser with Linear Run-Time Complexity</title>
       <author><first>Kenji</first><last>Sagae</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <pages>125–132</pages>
       <url hash="061bf90f">W05-1513</url>
       <bibkey>sagae-lavie-2005-classifier</bibkey>
@@ -1998,14 +1998,14 @@
     <paper id="14">
       <title>Chunk Parsing Revisited</title>
       <author><first>Yoshimasa</first><last>Tsuruoka</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>133–140</pages>
       <url hash="9a203296">W05-1514</url>
       <bibkey>tsuruoka-tsujii-2005-chunk</bibkey>
     </paper>
     <paper id="15">
       <title>Constituent Parsing by Classification</title>
-      <author><first>Joseph</first><last>Turian</last></author>
+      <author id="joseph-turian"><first>Joseph</first><last>Turian</last></author>
       <author><first>I. Dan</first><last>Melamed</last></author>
       <pages>141–151</pages>
       <url hash="5b848123">W05-1515</url>
@@ -2023,16 +2023,16 @@
     <paper id="17">
       <title>Efficient Extraction of Grammatical Relations</title>
       <author><first>Rebecca</first><last>Watson</last></author>
-      <author><first>John</first><last>Carroll</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <pages>160–170</pages>
       <url hash="9e61038a">W05-1517</url>
       <bibkey>watson-etal-2005-efficient</bibkey>
     </paper>
     <paper id="18">
       <title>Improving Parsing Accuracy by Combining Diverse Dependency Parsers</title>
-      <author><first>Daniel</first><last>Zeman</last></author>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
       <pages>171–178</pages>
       <url hash="d9efcbb7">W05-1518</url>
       <bibkey>zeman-zabokrtsky-2005-improving</bibkey>
@@ -2049,7 +2049,7 @@
       <title>Statistical Shallow Semantic Parsing despite Little Training Data</title>
       <author><first>Rahul</first><last>Bhagat</last></author>
       <author><first>Anton</first><last>Leuski</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>186–187</pages>
       <url hash="a03b221a">W05-1520</url>
       <bibkey>bhagat-etal-2005-statistical</bibkey>
@@ -2063,7 +2063,7 @@
     </paper>
     <paper id="22">
       <title>From metagrammars to factorized <fixed-case>TAG</fixed-case>/<fixed-case>TIG</fixed-case> parsers</title>
-      <author><first>Éric</first><last>Villemonte de la Clergerie</last></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Éric</first><last>Villemonte de la Clergerie</last></author>
       <pages>190–191</pages>
       <url hash="bdc9316f">W05-1522</url>
       <bibkey>villemonte-de-la-clergerie-2005-metagrammars</bibkey>
@@ -2077,18 +2077,18 @@
     </paper>
     <paper id="24">
       <title><fixed-case>TFLEX</fixed-case>: Speeding Up Deep Parsing with Strategic Pruning</title>
-      <author><first>Myroslava O.</first><last>Dzikovska</last></author>
-      <author><first>Carolyn P.</first><last>Rose</last></author>
+      <author id="myroslava-o-dzikovska"><first>Myroslava O.</first><last>Dzikovska</last></author>
+      <author id="carolyn-rose"><first>Carolyn P.</first><last>Rose</last></author>
       <pages>194–195</pages>
       <url hash="b6423a77">W05-1524</url>
       <bibkey>dzikovska-rose-2005-tflex</bibkey>
     </paper>
     <paper id="25">
       <title>Generic Parsing for Multi-Domain Semantic Interpretation</title>
-      <author><first>Myroslava</first><last>Dzikovska</last></author>
-      <author><first>Mary</first><last>Swift</last></author>
-      <author><first>James</first><last>Allen</last></author>
-      <author><first>William</first><last>de Beaumont</last></author>
+      <author id="myroslava-o-dzikovska"><first>Myroslava</first><last>Dzikovska</last></author>
+      <author id="mary-swift"><first>Mary</first><last>Swift</last></author>
+      <author id="james-allen"><first>James</first><last>Allen</last></author>
+      <author id="william-de-beaumont"><first>William</first><last>de Beaumont</last></author>
       <pages>196–197</pages>
       <url hash="df20026a">W05-1525</url>
       <bibkey>dzikovska-etal-2005-generic</bibkey>
@@ -2096,8 +2096,8 @@
     <paper id="26">
       <title>Online Statistics for a Unification-Based Dialogue Parser</title>
       <author><first>Micha</first><last>Elsner</last></author>
-      <author><first>Mary</first><last>Swift</last></author>
-      <author><first>James</first><last>Allen</last></author>
+      <author id="mary-swift"><first>Mary</first><last>Swift</last></author>
+      <author id="james-allen"><first>James</first><last>Allen</last></author>
       <author><first>Daniel</first><last>Gildea</last></author>
       <pages>198–199</pages>
       <url hash="86552051">W05-1526</url>
@@ -2105,11 +2105,11 @@
     </paper>
     <paper id="27">
       <title><fixed-case>SUPPLE</fixed-case>: A Practical Parser for Natural Language Engineering Applications</title>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <author><first>Mark</first><last>Hepple</last></author>
       <author><first>Horacio</first><last>Saggion</last></author>
-      <author><first>Mark A.</first><last>Greenwood</last></author>
-      <author><first>Kevin</first><last>Humphreys</last></author>
+      <author id="mark-a-greenwood"><first>Mark A.</first><last>Greenwood</last></author>
+      <author id="kevin-humphreys"><first>Kevin</first><last>Humphreys</last></author>
       <pages>200–201</pages>
       <url hash="cfa82cac">W05-1527</url>
       <bibkey>gaizauskas-etal-2005-supple</bibkey>
@@ -2125,7 +2125,7 @@
       <title>Robust Extraction of Subcategorization Data from Spoken Language</title>
       <author><first>Jianguo</first><last>Li</last></author>
       <author><first>Chris</first><last>Brew</last></author>
-      <author><first>Eric</first><last>Fosler-Lussier</last></author>
+      <author id="eric-fosler-lussier"><first>Eric</first><last>Fosler-Lussier</last></author>
       <pages>204–205</pages>
       <url hash="74516f4b">W05-1529</url>
       <bibkey>li-etal-2005-robust</bibkey>
@@ -2134,9 +2134,9 @@
   <volume id="16" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Tenth <fixed-case>E</fixed-case>uropean Workshop on Natural Language Generation (<fixed-case>ENLG</fixed-case>-05)</booktitle>
-      <editor><first>Graham</first><last>Wilcock</last></editor>
-      <editor><first>Kristiina</first><last>Jokinen</last></editor>
-      <editor><first>Chris</first><last>Mellish</last></editor>
+      <editor id="graham-wilcock"><first>Graham</first><last>Wilcock</last></editor>
+      <editor id="kristiina-jokinen"><first>Kristiina</first><last>Jokinen</last></editor>
+      <editor id="chris-mellish"><first>Chris</first><last>Mellish</last></editor>
       <editor><first>Ehud</first><last>Reiter</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Aberdeen, Scotland</address>
@@ -2150,7 +2150,7 @@
     </frontmatter>
     <paper id="1">
       <title>Statistical Generation: Three Methods Compared and Evaluated</title>
-      <author><first>Anja</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anja</first><last>Belz</last></author>
       <url hash="46ba00a0">W05-1601</url>
       <bibkey>belz-2005-statistical</bibkey>
     </paper>
@@ -2158,7 +2158,7 @@
       <title>Interactive Authoring of Logical Forms for Multilingual Generation</title>
       <author><first>Ofer</first><last>Biller</last></author>
       <author><first>Michael</first><last>Elhadad</last></author>
-      <author><first>Yael</first><last>Netzer</last></author>
+      <author id="yael-netzer"><first>Yael</first><last>Netzer</last></author>
       <url hash="5d37fe9a">W05-1602</url>
       <bibkey>biller-etal-2005-interactive</bibkey>
     </paper>
@@ -2170,7 +2170,7 @@
     </paper>
     <paper id="4">
       <title>Real-Time Stochastic Language Generation for Dialogue Systems</title>
-      <author><first>Nathanael</first><last>Chambers</last></author>
+      <author id="nathanael-chambers"><first>Nathanael</first><last>Chambers</last></author>
       <url hash="d65ce8cf">W05-1604</url>
       <bibkey>chambers-2005-real</bibkey>
     </paper>
@@ -2189,8 +2189,8 @@
     </paper>
     <paper id="7">
       <title>A Context-dependent Algorithm for Generating Locative Expressions in Physically Situated Environments</title>
-      <author><first>John</first><last>Kelleher</last></author>
-      <author><first>Geert-Jan</first><last>Kruijff</last></author>
+      <author id="john-kelleher"><first>John</first><last>Kelleher</last></author>
+      <author id="geert-jan-m-kruijff"><first>Geert-Jan</first><last>Kruijff</last></author>
       <url hash="e92dbf93">W05-1607</url>
       <bibkey>kelleher-kruijff-2005-context</bibkey>
     </paper>
@@ -2203,13 +2203,13 @@
     </paper>
     <paper id="9">
       <title>Context-sensitive Utterance Planning for <fixed-case>CCG</fixed-case></title>
-      <author><first>Geert-Jan</first><last>Kruijff</last></author>
+      <author id="geert-jan-m-kruijff"><first>Geert-Jan</first><last>Kruijff</last></author>
       <url hash="3003020a">W05-1609</url>
       <bibkey>kruijff-2005-context</bibkey>
     </paper>
     <paper id="10">
       <title>Narratological Knowledge for Natural Language Generation</title>
-      <author><first>Birte</first><last>Lönneker</last></author>
+      <author id="birte-lonneker"><first>Birte</first><last>Lönneker</last></author>
       <url hash="7c7b8544">W05-1610</url>
       <bibkey>lonneker-2005-narratological</bibkey>
     </paper>
@@ -2223,7 +2223,7 @@
     <paper id="12">
       <title>Explorations in Sentence Fusion</title>
       <author><first>Erwin</first><last>Marsi</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <url hash="2c709b95">W05-1612</url>
       <bibkey>marsi-krahmer-2005-explorations</bibkey>
     </paper>
@@ -2236,13 +2236,13 @@
     </paper>
     <paper id="14">
       <title>Computational Mechanisms for Pun Generation</title>
-      <author><first>Graeme</first><last>Ritchie</last></author>
+      <author id="graeme-ritchie"><first>Graeme</first><last>Ritchie</last></author>
       <url hash="540d77e6">W05-1614</url>
       <bibkey>ritchie-2005-computational</bibkey>
     </paper>
     <paper id="15">
       <title>Evaluation of an <fixed-case>NLG</fixed-case> System using Post-Edit Data: Lessons Learnt</title>
-      <author><first>Somayajulu</first><last>Sripada</last></author>
+      <author id="somayajulu-sripada"><first>Somayajulu</first><last>Sripada</last></author>
       <author><first>Ehud</first><last>Reiter</last></author>
       <author><first>Lezan</first><last>Hawizy</last></author>
       <url hash="84976704">W05-1615</url>
@@ -2266,20 +2266,20 @@
     <paper id="18">
       <title>Towards Generating Procedural Texts: An Exploration of their Rhetorical and Argumentative Structure</title>
       <author><first>Farida</first><last>Aouladomar</last></author>
-      <author><first>Patrick</first><last>Saint-Dizier</last></author>
+      <author id="patrick-saint-dizier"><first>Patrick</first><last>Saint-Dizier</last></author>
       <url hash="e5ba7d95">W05-1618</url>
       <bibkey>aouladomar-saint-dizier-2005-towards</bibkey>
     </paper>
     <paper id="19">
       <title>The Types and Distributions of Errors in a Wide Coverage Surface Realizer Evaluation</title>
-      <author><first>Charles</first><last>Callaway</last></author>
+      <author id="charles-b-callaway"><first>Charles</first><last>Callaway</last></author>
       <url hash="2c824e05">W05-1619</url>
       <bibkey>callaway-2005-types</bibkey>
     </paper>
     <paper id="20">
       <title>An Evolutionary Approach to Referring Expression Generation and Aggregation</title>
       <author><first>Raquel</first><last>Hervás</last></author>
-      <author><first>Pablo</first><last>Gervás</last></author>
+      <author id="pablo-gervas"><first>Pablo</first><last>Gervás</last></author>
       <url hash="bd7175ba">W05-1620</url>
       <bibkey>hervas-gervas-2005-evolutionary</bibkey>
     </paper>
@@ -2304,12 +2304,12 @@
     </paper>
     <paper id="24">
       <title>An Experiment Setup for Collecting Data for Adaptive Output Planning in a Multimodal Dialogue System</title>
-      <author><first>Ivana</first><last>Kruijff-Korbayová</last></author>
-      <author><first>Nate</first><last>Blaylock</last></author>
+      <author id="ivana-kruijff-korbayova"><first>Ivana</first><last>Kruijff-Korbayová</last></author>
+      <author id="nate-blaylock"><first>Nate</first><last>Blaylock</last></author>
       <author><first>Ciprian</first><last>Gerstenberger</last></author>
       <author><first>Verena</first><last>Rieser</last></author>
       <author><first>Tilman</first><last>Becker</last></author>
-      <author><first>Michael</first><last>Kaisser</last></author>
+      <author id="michael-kaisser"><first>Michael</first><last>Kaisser</last></author>
       <author><first>Peter</first><last>Poller</last></author>
       <author><first>Jan</first><last>Schehl</last></author>
       <url hash="f61af010">W05-1624</url>
@@ -2317,7 +2317,7 @@
     </paper>
     <paper id="25">
       <title>Answer Generation with Temporal Data Integration</title>
-      <author><first>Véronique</first><last>Moriceau</last></author>
+      <author id="veronique-moriceau"><first>Véronique</first><last>Moriceau</last></author>
       <url hash="92fece6a">W05-1625</url>
       <bibkey>moriceau-2005-answer</bibkey>
     </paper>
@@ -2360,8 +2360,8 @@
     <paper id="1">
       <title>Robust stochastic parsing: Comparing and combining two approaches for processing extra-grammatical sentences</title>
       <author><first>Marita</first><last>Ailomaa</last></author>
-      <author><first>Vladimír</first><last>Kadlec</last></author>
-      <author><first>Martin</first><last>Rajman</last></author>
+      <author id="vladimir-kadlec"><first>Vladimír</first><last>Kadlec</last></author>
+      <author id="martin-rajman"><first>Martin</first><last>Rajman</last></author>
       <author><first>Jean-Cédric</first><last>Chappelier</last></author>
       <pages>1–7</pages>
       <url hash="3a8dc69f">W05-1701</url>
@@ -2386,7 +2386,7 @@
     </paper>
     <paper id="4">
       <title>Dictionary acquisition using parallel text and co-occurrence statistics</title>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <author><first>Uwe</first><last>Quasthoff</last></author>
       <pages>22–29</pages>
       <url hash="0d4c7f75">W05-1704</url>
@@ -2487,7 +2487,7 @@
     <paper id="17">
       <title>Synthetic regional <fixed-case>D</fixed-case>anish</title>
       <author><first>Bodil</first><last>Kyst</last></author>
-      <author><first>Peter Juel</first><last>Henrichsen</last></author>
+      <author id="peter-juel-henrichsen"><first>Peter Juel</first><last>Henrichsen</last></author>
       <pages>116–123</pages>
       <url hash="929d1143">W05-1717</url>
       <bibkey>kyst-henrichsen-2006-synthetic</bibkey>
@@ -2509,7 +2509,7 @@
     </paper>
     <paper id="20">
       <title><fixed-case>SU</fixed-case>i<fixed-case>S</fixed-case>–cross-language ontology-driven information retrieval in a restricted domain</title>
-      <author><first>Kristina</first><last>Nilsson</last></author>
+      <author id="kristina-nilsson-bjorkenstam"><first>Kristina</first><last>Nilsson</last></author>
       <author><first>Hans</first><last>Hjelm</last></author>
       <author><first>Henrik</first><last>Oxhammar</last></author>
       <pages>139–145</pages>
@@ -2518,7 +2518,7 @@
     </paper>
     <paper id="21">
       <title>Towards automatic recognition of product names: an exploratory study of brand names in economic texts</title>
-      <author><first>Kristina</first><last>Nilsson</last></author>
+      <author id="kristina-nilsson-bjorkenstam"><first>Kristina</first><last>Nilsson</last></author>
       <author><first>Aisha</first><last>Malmgren</last></author>
       <pages>146–155</pages>
       <url hash="962d5c79">W05-1721</url>
@@ -2556,15 +2556,15 @@
     </paper>
     <paper id="26">
       <title><fixed-case>D</fixed-case>an<fixed-case>PO</fixed-case>–a transcription-based dictionary for <fixed-case>D</fixed-case>anish speech technology</title>
-      <author><first>Peter</first><last>Rossen Skadhauge</last></author>
-      <author><first>Peter Juel</first><last>Henrichsen</last></author>
+      <author id="peter-rossen-skadhauge"><first>Peter</first><last>Rossen Skadhauge</last></author>
+      <author id="peter-juel-henrichsen"><first>Peter Juel</first><last>Henrichsen</last></author>
       <pages>186–192</pages>
       <url hash="07859c79">W05-1726</url>
       <bibkey>rossen-skadhauge-henrichsen-2006-danpo</bibkey>
     </paper>
     <paper id="27">
       <title>Functionality in grammar design</title>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <author><first>Petter</first><last>Haugereid</last></author>
       <pages>193–202</pages>
       <url hash="1c4ccb9c">W05-1727</url>
@@ -2580,7 +2580,7 @@
     <paper id="29">
       <title>Rigorous dimensionality reduction through linguistically motivated feature selection for text categorization</title>
       <author><first>Hans Friedrich</first><last>Witschel</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>210–217</pages>
       <url hash="1746a2a2">W05-1729</url>
       <bibkey>witschel-biemann-2006-rigorous</bibkey>
diff --git a/data/xml/W06.xml b/data/xml/W06.xml
index 9f44b300be..fab204d231 100644
--- a/data/xml/W06.xml
+++ b/data/xml/W06.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the Fifth <fixed-case>SIGHAN</fixed-case> Workshop on <fixed-case>C</fixed-case>hinese Language Processing</booktitle>
       <url hash="ade77d3a">W06-01</url>
       <editor><first>Hwee Tou</first><last>Ng</last></editor>
-      <editor><first>Olivia O.Y.</first><last>Kwong</last></editor>
+      <editor id="olivia-o-y-kwong"><first>Olivia O.Y.</first><last>Kwong</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Sydney, Australia</address>
       <month>July</month>
@@ -19,7 +19,7 @@
     <paper id="1">
       <title>Improving Context Vector Models by Feature Clustering for Automatic Thesaurus Construction</title>
       <author><first>Jia-Ming</first><last>You</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <pages>1–8</pages>
       <url hash="9ff491d4">W06-0101</url>
       <bibkey>you-chen-2006-improving</bibkey>
@@ -27,7 +27,7 @@
     <paper id="2">
       <title>Regional Variation of Domain-Specific Lexical Items: Toward a Pan-<fixed-case>C</fixed-case>hinese Lexical Resource</title>
       <author><first>Oi Yee</first><last>Kwong</last></author>
-      <author><first>Benjamin K.</first><last>Tsou</last></author>
+      <author id="benjamin-k-tsou"><first>Benjamin K.</first><last>Tsou</last></author>
       <pages>9–16</pages>
       <url hash="e3e67bf5">W06-0102</url>
       <bibkey>kwong-tsou-2006-regional</bibkey>
@@ -57,7 +57,7 @@
     </paper>
     <paper id="6">
       <title>A Clustering Approach for Unsupervised <fixed-case>C</fixed-case>hinese Coreference Resolution</title>
-      <author><first>Chi-shing</first><last>Wang</last></author>
+      <author id="chi-shing-wang"><first>Chi-shing</first><last>Wang</last></author>
       <author><first>Grace</first><last>Ngai</last></author>
       <pages>40–47</pages>
       <url hash="d8d391d4">W06-0106</url>
@@ -66,8 +66,8 @@
     <paper id="7">
       <title>Latent Features in Automatic Tense Translation between <fixed-case>C</fixed-case>hinese and <fixed-case>E</fixed-case>nglish</title>
       <author><first>Yang</first><last>Ye</last></author>
-      <author><first>Victoria Li</first><last>Fossum</last></author>
-      <author><first>Steven</first><last>Abney</last></author>
+      <author id="victoria-fossum"><first>Victoria Li</first><last>Fossum</last></author>
+      <author id="steven-abney"><first>Steven</first><last>Abney</last></author>
       <pages>48–55</pages>
       <url hash="1b745004">W06-0107</url>
       <bibkey>ye-etal-2006-latent</bibkey>
@@ -92,7 +92,7 @@
       <title>Hybrid Models for <fixed-case>C</fixed-case>hinese Named Entity Recognition</title>
       <author><first>Lishuang</first><last>Li</last></author>
       <author><first>Tingting</first><last>Mao</last></author>
-      <author><first>Degen</first><last>Huang</last></author>
+      <author id="degen-huang"><first>Degen</first><last>Huang</last></author>
       <author><first>Yuansheng</first><last>Yang</last></author>
       <pages>72–78</pages>
       <url hash="70aa370a">W06-0110</url>
@@ -110,7 +110,7 @@
     <paper id="12">
       <title>A Hybrid Approach to <fixed-case>C</fixed-case>hinese Base Noun Phrase Chunking</title>
       <author><first>Fang</first><last>Xu</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <author><first>Jun</first><last>Zhao</last></author>
       <pages>87–93</pages>
       <url hash="b64cb8d3">W06-0112</url>
@@ -137,7 +137,7 @@
     </paper>
     <paper id="15">
       <title>The Third International <fixed-case>C</fixed-case>hinese Language Processing Bakeoff: Word Segmentation and Named Entity Recognition</title>
-      <author><first>Gina-Anne</first><last>Levow</last></author>
+      <author id="gina-anne-levow"><first>Gina-Anne</first><last>Levow</last></author>
       <pages>108–117</pages>
       <url hash="561143cb">W06-0115</url>
       <bibkey>levow-2006-third</bibkey>
@@ -180,11 +180,11 @@
     </paper>
     <paper id="20">
       <title>On Closed Task of <fixed-case>C</fixed-case>hinese Word Segmentation: An Improved <fixed-case>CRF</fixed-case> Model Coupled with Character Clustering and Automatically Generated Template Matching</title>
-      <author><first>Richard Tzong-Han</first><last>Tsai</last></author>
+      <author id="richard-tzong-han-tsai"><first>Richard Tzong-Han</first><last>Tsai</last></author>
       <author><first>Hsieh-Chuan</first><last>Hung</last></author>
       <author><first>Cheng-Lung</first><last>Sung</last></author>
       <author><first>Hong-Jie</first><last>Dai</last></author>
-      <author><first>Wen-Lian</first><last>Hsu</last></author>
+      <author id="wen-lian-hsu"><first>Wen-Lian</first><last>Hsu</last></author>
       <pages>134–137</pages>
       <url hash="e210c271">W06-0120</url>
       <bibkey>tsai-etal-2006-closed</bibkey>
@@ -192,7 +192,7 @@
     <paper id="21">
       <title><fixed-case>C</fixed-case>hinese Word Segmentation with Maximum Entropy and N-gram Language Model</title>
       <author><first>Xinhao</first><last>Wang</last></author>
-      <author><first>Xiaojun</first><last>Lin</last></author>
+      <author id="xiaojun-lin"><first>Xiaojun</first><last>Lin</last></author>
       <author><first>Dianhai</first><last>Yu</last></author>
       <author><first>Hao</first><last>Tian</last></author>
       <author><first>Xihong</first><last>Wu</last></author>
@@ -204,8 +204,8 @@
       <title>On Using Ensemble Methods for <fixed-case>C</fixed-case>hinese Named Entity Recognition</title>
       <author><first>Chia-Wei</first><last>Wu</last></author>
       <author><first>Shyh-Yi</first><last>Jan</last></author>
-      <author><first>Richard Tzong-Han</first><last>Tsai</last></author>
-      <author><first>Wen-Lian</first><last>Hsu</last></author>
+      <author id="richard-tzong-han-tsai"><first>Richard Tzong-Han</first><last>Tsai</last></author>
+      <author id="wen-lian-hsu"><first>Wen-Lian</first><last>Hsu</last></author>
       <pages>142–145</pages>
       <url hash="1efe895e">W06-0122</url>
       <bibkey>wu-etal-2006-using</bibkey>
@@ -232,9 +232,9 @@
     <paper id="25">
       <title><fixed-case>C</fixed-case>hinese Word Segmentation and Named Entity Recognition Based on a Context-Dependent Mutual Information Independence Model</title>
       <author><first>Min</first><last>Zhang</last></author>
-      <author><first>GuoDong</first><last>Zhou</last></author>
-      <author><first>LingPeng</first><last>Yang</last></author>
-      <author><first>DongHong</first><last>Ji</last></author>
+      <author id="guodong-zhou"><first>GuoDong</first><last>Zhou</last></author>
+      <author id="lingpeng-yang"><first>LingPeng</first><last>Yang</last></author>
+      <author id="donghong-ji"><first>DongHong</first><last>Ji</last></author>
       <pages>154–157</pages>
       <url hash="ed9c76dd">W06-0125</url>
       <bibkey>zhang-etal-2006-chinese</bibkey>
@@ -243,7 +243,7 @@
       <title>Word Segmentation and Named Entity Recognition for <fixed-case>SIGHAN</fixed-case> Bakeoff3</title>
       <author><first>Suxiang</first><last>Zhang</last></author>
       <author><first>Ying</first><last>Qin</last></author>
-      <author><first>Juan</first><last>Wen</last></author>
+      <author id="wen-juan-hou"><first>Juan</first><last>Wen</last></author>
       <author><first>Xiaojie</first><last>Wang</last></author>
       <pages>158–161</pages>
       <url hash="339dd718">W06-0126</url>
@@ -252,7 +252,7 @@
     <paper id="27">
       <title>An Improved <fixed-case>C</fixed-case>hinese Word Segmentation System with Conditional Random Field</title>
       <author><first>Hai</first><last>Zhao</last></author>
-      <author><first>Chang-Ning</first><last>Huang</last></author>
+      <author id="changning-huang"><first>Chang-Ning</first><last>Huang</last></author>
       <author><first>Mu</first><last>Li</last></author>
       <pages>162–165</pages>
       <url hash="5a7919dc">W06-0127</url>
@@ -313,7 +313,7 @@
       <title>A Pragmatic <fixed-case>C</fixed-case>hinese Word Segmentation System</title>
       <author><first>Wei</first><last>Jiang</last></author>
       <author><first>Yi</first><last>Guan</last></author>
-      <author><first>Xiao-Long</first><last>Wang</last></author>
+      <author id="xiao-long-wang"><first>Xiao-Long</first><last>Wang</last></author>
       <pages>189–192</pages>
       <url hash="412c0de5">W06-0134</url>
       <bibkey>jiang-etal-2006-pragmatic-chinese</bibkey>
@@ -339,7 +339,7 @@
       <title><fixed-case>C</fixed-case>hinese Word Segmentation Based on an Approach of Maximum Entropy Modeling</title>
       <author><first>Yan</first><last>Song</last></author>
       <author><first>Jiaqing</first><last>Guo</last></author>
-      <author><first>Dongfeng</first><last>Cai</last></author>
+      <author id="dongfeng-cai"><first>Dongfeng</first><last>Cai</last></author>
       <pages>201–204</pages>
       <url hash="802a7085">W06-0137</url>
       <bibkey>song-etal-2006-chinese</bibkey>
@@ -365,8 +365,8 @@
       <title><fixed-case>C</fixed-case>hinese Named Entity Recognition with a Multi-Phase Model</title>
       <author><first>Junsheng</first><last>Zhou</last></author>
       <author><first>Liang</first><last>He</last></author>
-      <author><first>Xinyu</first><last>Dai</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
+      <author id="xinyu-dai"><first>Xinyu</first><last>Dai</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
       <pages>213–216</pages>
       <url hash="e05f53b7">W06-0140</url>
       <bibkey>zhou-etal-2006-chinese</bibkey>
@@ -387,8 +387,8 @@
     <meta>
       <booktitle>Proceedings of the Workshop on Information Extraction Beyond The Document</booktitle>
       <url hash="f970c373">W06-02</url>
-      <editor><first>Mary Elaine</first><last>Califf</last></editor>
-      <editor><first>Mark A.</first><last>Greenwood</last></editor>
+      <editor id="mary-elaine-califf"><first>Mary Elaine</first><last>Califf</last></editor>
+      <editor id="mark-a-greenwood"><first>Mark A.</first><last>Greenwood</last></editor>
       <editor><first>Mark</first><last>Stevenson</last></editor>
       <editor><first>Roman</first><last>Yangarber</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -440,7 +440,7 @@
     </paper>
     <paper id="5">
       <title>Automatic Knowledge Representation using a Graph-based Algorithm for Language-Independent Lexical Chaining</title>
-      <author><first>Gaël</first><last>Dias</last></author>
+      <author id="gael-dias"><first>Gaël</first><last>Dias</last></author>
       <author><first>Cláudia</first><last>Santos</last></author>
       <author><first>Guillaume</first><last>Cleuziou</last></author>
       <pages>36–47</pages>
@@ -450,7 +450,7 @@
     <paper id="6">
       <title>Data Selection in Semi-supervised Learning for Name Tagging</title>
       <author><first>Heng</first><last>Ji</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <pages>48–55</pages>
       <url hash="d61e2fcc">W06-0206</url>
       <bibkey>ji-grishman-2006-data</bibkey>
@@ -466,7 +466,7 @@
     <paper id="8">
       <title>Learning Domain-Specific Information Extraction Patterns from the Web</title>
       <author><first>Siddharth</first><last>Patwardhan</last></author>
-      <author><first>Ellen</first><last>Riloff</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
       <pages>66–73</pages>
       <url hash="dbffb2d7">W06-0208</url>
       <bibkey>patwardhan-riloff-2006-learning</bibkey>
@@ -491,7 +491,7 @@
     <paper id="1">
       <title>Extracting Opinions, Opinion Holders, and Topics Expressed in Online News Media Text</title>
       <author><first>Soo-Min</first><last>Kim</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>1–8</pages>
       <url hash="de138499">W06-0301</url>
       <bibkey>kim-hovy-2006-extracting</bibkey>
@@ -499,7 +499,7 @@
     <paper id="2">
       <title>Toward Opinion Summarization: Linking the Sources</title>
       <author><first>Veselin</first><last>Stoyanov</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>9–14</pages>
       <url hash="a99c356c">W06-0302</url>
       <bibkey>stoyanov-cardie-2006-toward</bibkey>
@@ -514,8 +514,8 @@
     </paper>
     <paper id="4">
       <title>User-directed Sentiment Analysis: Visualizing the Affective Content of Documents</title>
-      <author><first>Michelle L.</first><last>Gregory</last></author>
-      <author><first>Nancy</first><last>Chinchor</last></author>
+      <author id="michelle-gregory"><first>Michelle L.</first><last>Gregory</last></author>
+      <author id="nancy-chinchor"><first>Nancy</first><last>Chinchor</last></author>
       <author><first>Paul</first><last>Whitney</last></author>
       <author><first>Richard</first><last>Carter</last></author>
       <author><first>Elizabeth</first><last>Hetzler</last></author>
@@ -529,8 +529,8 @@
       <author><first>Rashmi</first><last>Prasad</last></author>
       <author><first>Nikhil</first><last>Dinesh</last></author>
       <author><first>Alan</first><last>Lee</last></author>
-      <author><first>Aravind</first><last>Joshi</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <pages>31–38</pages>
       <url hash="bc0b9e00">W06-0305</url>
       <bibkey>prasad-etal-2006-annotating</bibkey>
@@ -548,9 +548,9 @@
     <paper id="7">
       <title>Exploitation in Affect Detection in Open-Ended Improvisational Text</title>
       <author id="li-zhang-birmingham"><first>Li</first><last>Zhang</last></author>
-      <author><first>John A.</first><last>Barnden</last></author>
-      <author><first>Robert J.</first><last>Hendley</last></author>
-      <author><first>Alan M.</first><last>Wallington</last></author>
+      <author id="john-barnden"><first>John A.</first><last>Barnden</last></author>
+      <author id="robert-j-hendley"><first>Robert J.</first><last>Hendley</last></author>
+      <author id="alan-wallington"><first>Alan M.</first><last>Wallington</last></author>
       <pages>47–54</pages>
       <url hash="0c79030a">W06-0307</url>
       <bibkey>zhang-etal-2006-exploitation</bibkey>
@@ -558,7 +558,7 @@
     <paper id="8">
       <title>Towards a validated model for affective classification of texts</title>
       <author><first>Michel</first><last>Généreux</last></author>
-      <author><first>Roger</first><last>Evans</last></author>
+      <author id="roger-evans"><first>Roger</first><last>Evans</last></author>
       <pages>55–62</pages>
       <url hash="65bf8d63">W06-0308</url>
       <bibkey>genereux-evans-2006-towards</bibkey>
@@ -624,7 +624,7 @@
     <paper id="6">
       <title>Capturing Disjunction in Lexicalization with Extensible Dependency Grammar</title>
       <author><first>Jorge</first><last>Marques Pelizzoni</last></author>
-      <author><first>Maria das Graças</first><last>Volpe Nunes</last></author>
+      <author id="maria-das-gracas-volpe-nunes"><first>Maria das Graças</first><last>Volpe Nunes</last></author>
       <pages>41–50</pages>
       <url hash="efc6ec80">W06-0406</url>
       <bibkey>marques-pelizzoni-volpe-nunes-2006-capturing</bibkey>
@@ -635,7 +635,7 @@
       <booktitle>Proceedings of the 2nd Workshop on Ontology Learning and Population: Bridging the Gap between Text and Knowledge</booktitle>
       <url hash="b0d5e209">W06-05</url>
       <editor><first>Paul</first><last>Buitelaar</last></editor>
-      <editor><first>Philipp</first><last>Cimiano</last></editor>
+      <editor id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></editor>
       <editor><first>Berenike</first><last>Loos</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Sydney, Australia</address>
@@ -650,7 +650,7 @@
     <paper id="1">
       <title>Enriching a Formal Ontology with a Thesaurus: an Application in the Cultural Heritage Domain</title>
       <author><first>Roberto</first><last>Navigli</last></author>
-      <author><first>Paola</first><last>Velardi</last></author>
+      <author id="paola-velardi"><first>Paola</first><last>Velardi</last></author>
       <pages>1–9</pages>
       <url hash="6f7c9dbe">W06-0501</url>
       <bibkey>navigli-velardi-2006-enriching</bibkey>
@@ -661,14 +661,14 @@
       <author><first>Francis</first><last>Bond</last></author>
       <author><first>Takaaki</first><last>Tanaka</last></author>
       <author><first>Sanae</first><last>Fujita</last></author>
-      <author><first>Dan</first><last>Flickinger</last></author>
+      <author id="dan-flickinger"><first>Dan</first><last>Flickinger</last></author>
       <pages>10–17</pages>
       <url hash="0dd23cad">W06-0502</url>
       <bibkey>nichols-etal-2006-multilingual</bibkey>
     </paper>
     <paper id="3">
       <title><fixed-case>LEILA</fixed-case>: Learning to Extract Information by Linguistic Analysis</title>
-      <author><first>Fabian M.</first><last>Suchanek</last></author>
+      <author id="fabian-suchanek"><first>Fabian M.</first><last>Suchanek</last></author>
       <author><first>Georgiana</first><last>Ifrim</last></author>
       <author><first>Gerhard</first><last>Weikum</last></author>
       <pages>18–25</pages>
@@ -677,10 +677,10 @@
     </paper>
     <paper id="4">
       <title>Ontology Population from Textual Mentions: Task Definition and Benchmark</title>
-      <author><first>Bernardo</first><last>Magnini</last></author>
-      <author><first>Emanuele</first><last>Pianta</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
+      <author id="emanuele-pianta"><first>Emanuele</first><last>Pianta</last></author>
       <author><first>Octavian</first><last>Popescu</last></author>
-      <author><first>Manuela</first><last>Speranza</last></author>
+      <author id="manuela-speranza"><first>Manuela</first><last>Speranza</last></author>
       <pages>26–32</pages>
       <url hash="b2740efa">W06-0504</url>
       <bibkey>magnini-etal-2006-ontology</bibkey>
@@ -688,7 +688,7 @@
     <paper id="5">
       <title>Efficient Hierarchical Entity Classifier Using Conditional Random Fields</title>
       <author><first>Koen</first><last>Deschacht</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>33–40</pages>
       <url hash="48bda8a2">W06-0505</url>
       <bibkey>deschacht-moens-2006-efficient</bibkey>
@@ -696,7 +696,7 @@
     <paper id="6">
       <title>Taxonomy Learning using Term Specificity and Similarity</title>
       <author><first>Pum-Mo</first><last>Ryu</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <pages>41–48</pages>
       <url hash="f03fdb9c">W06-0506</url>
       <bibkey>ryu-choi-2006-taxonomy</bibkey>
@@ -704,8 +704,8 @@
     <paper id="7">
       <title>Towards Large-scale Non-taxonomic Relation Extraction: Estimating the Precision of Rote Extractors</title>
       <author><first>Enrique</first><last>Alfonseca</last></author>
-      <author><first>Maria</first><last>Ruiz-Casado</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="maria-ruiz-casado"><first>Maria</first><last>Ruiz-Casado</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <author><first>Pablo</first><last>Castells</last></author>
       <pages>49–56</pages>
       <url hash="635917c0">W06-0507</url>
@@ -724,9 +724,9 @@
     <meta>
       <booktitle>Proceedings of the Workshop on Frontiers in Linguistically Annotated Corpora 2006</booktitle>
       <url hash="288136e7">W06-06</url>
-      <editor><first>Timothy</first><last>Baldwin</last></editor>
+      <editor id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></editor>
       <editor><first>Francis</first><last>Bond</last></editor>
-      <editor><first>Adam</first><last>Meyers</last></editor>
+      <editor id="adam-meyers"><first>Adam</first><last>Meyers</last></editor>
       <editor><first>Shigeko</first><last>Nariyama</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Sydney, Australia</address>
@@ -740,9 +740,9 @@
     </frontmatter>
     <paper id="1">
       <title>Challenges for Annotating Images for Sense Disambiguation</title>
-      <author><first>Cecilia Ovesdotter</first><last>Alm</last></author>
+      <author id="cecilia-ovesdotter-alm"><first>Cecilia Ovesdotter</first><last>Alm</last></author>
       <author><first>Nicolas</first><last>Loeff</last></author>
-      <author><first>David A.</first><last>Forsyth</last></author>
+      <author id="david-forsyth"><first>David A.</first><last>Forsyth</last></author>
       <pages>1–4</pages>
       <url hash="aa8d6c57">W06-0601</url>
       <bibkey>alm-etal-2006-challenges</bibkey>
@@ -750,11 +750,11 @@
     <paper id="2">
       <title>A Semi-Automatic Method for Annotating a Biomedical <fixed-case>P</fixed-case>roposition <fixed-case>B</fixed-case>ank</title>
       <author><first>Wen-Chi</first><last>Chou</last></author>
-      <author><first>Richard Tzong-Han</first><last>Tsai</last></author>
+      <author id="richard-tzong-han-tsai"><first>Richard Tzong-Han</first><last>Tsai</last></author>
       <author><first>Ying-Shan</first><last>Su</last></author>
       <author><first>Wei</first><last>Ku</last></author>
       <author><first>Ting-Yi</first><last>Sung</last></author>
-      <author><first>Wen-Lian</first><last>Hsu</last></author>
+      <author id="wen-lian-hsu"><first>Wen-Lian</first><last>Hsu</last></author>
       <pages>5–12</pages>
       <url hash="a9045d57">W06-0602</url>
       <bibkey>chou-etal-2006-semi</bibkey>
@@ -762,7 +762,7 @@
     <paper id="3">
       <title>How and Where do People Fail with Time: Temporal Reference Mapping Annotation by <fixed-case>C</fixed-case>hinese and <fixed-case>E</fixed-case>nglish Bilinguals</title>
       <author><first>Yang</first><last>Ye</last></author>
-      <author><first>Steven</first><last>Abney</last></author>
+      <author id="steven-abney"><first>Steven</first><last>Abney</last></author>
       <pages>13–20</pages>
       <url hash="1ddcedfb">W06-0603</url>
       <bibkey>ye-abney-2006-people</bibkey>
@@ -771,7 +771,7 @@
       <title>Probing the Space of Grammatical Variation: Induction of Cross-Lingual Grammatical Constraints from Treebanks</title>
       <author><first>Felice</first><last>Dell’Orletta</last></author>
       <author><first>Alessandro</first><last>Lenci</last></author>
-      <author><first>Simonetta</first><last>Montemagni</last></author>
+      <author id="simonetta-montemagni"><first>Simonetta</first><last>Montemagni</last></author>
       <author><first>Vito</first><last>Pirrelli</last></author>
       <pages>21–28</pages>
       <url hash="13032588">W06-0604</url>
@@ -795,9 +795,9 @@
     <paper id="7">
       <title>Manual Annotation of Opinion Categories in Meetings</title>
       <author><first>Swapna</first><last>Somasundaran</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <author><first>Paul</first><last>Hoffmann</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <pages>54–61</pages>
       <url hash="58f52acf">W06-0607</url>
       <bibkey>somasundaran-etal-2006-manual</bibkey>
@@ -816,9 +816,9 @@
       <author><first>Olga</first><last>Babko-Malaya</last></author>
       <author><first>Ann</first><last>Bies</last></author>
       <author><first>Ann</first><last>Taylor</last></author>
-      <author><first>Szuting</first><last>Yi</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
-      <author><first>Mitch</first><last>Marcus</last></author>
+      <author id="szu-ting-yi"><first>Szuting</first><last>Yi</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
+      <author id="mitch-marcus"><first>Mitch</first><last>Marcus</last></author>
       <author><first>Seth</first><last>Kulick</last></author>
       <author><first>Libin</first><last>Shen</last></author>
       <pages>70–77</pages>
@@ -835,7 +835,7 @@
     <paper id="11">
       <title>Corpus Annotation by Generation</title>
       <author><first>Elke</first><last>Teich</last></author>
-      <author><first>John A.</first><last>Bateman</last></author>
+      <author id="john-bateman"><first>John A.</first><last>Bateman</last></author>
       <author><first>Richard</first><last>Eckart</last></author>
       <pages>86–93</pages>
       <url hash="6809bc03">W06-0611</url>
@@ -843,7 +843,7 @@
     </paper>
     <paper id="12">
       <title>Constructing an <fixed-case>E</fixed-case>nglish Valency Lexicon</title>
-      <author><first>Jiří</first><last>Semecký</last></author>
+      <author id="jiri-semecky"><first>Jiří</first><last>Semecký</last></author>
       <author><first>Silvie</first><last>Cinková</last></author>
       <pages>94–97</pages>
       <url hash="9446e23d">W06-0612</url>
@@ -854,8 +854,8 @@
     <meta>
       <booktitle>Proceedings of the Workshop on Task-Focused Summarization and Question Answering</booktitle>
       <url hash="53bcf0b1">W06-07</url>
-      <editor><first>Tat-Seng</first><last>Chua</last></editor>
-      <editor><first>Jade</first><last>Goldstein</last></editor>
+      <editor id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last></editor>
+      <editor id="jade-goldstein"><first>Jade</first><last>Goldstein</last></editor>
       <editor><first>Simone</first><last>Teufel</last></editor>
       <editor><first>Lucy</first><last>Vanderwende</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -880,7 +880,7 @@
     <paper id="2">
       <title>Challenges in Evaluating Summaries of Short Stories</title>
       <author><first>Anna</first><last>Kazantseva</last></author>
-      <author><first>Stan</first><last>Szpakowicz</last></author>
+      <author id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></author>
       <pages>8–15</pages>
       <url hash="24dc9b96">W06-0702</url>
       <bibkey>kazantseva-szpakowicz-2006-challenges</bibkey>
@@ -903,7 +903,7 @@
     </paper>
     <paper id="5">
       <title>Using Scenario Knowledge in Automatic Question Answering</title>
-      <author><first>Sanda</first><last>Harabagiu</last></author>
+      <author id="sanda-harabagiu"><first>Sanda</first><last>Harabagiu</last></author>
       <author><first>Andrew</first><last>Hickl</last></author>
       <pages>32–39</pages>
       <url hash="32478566">W06-0705</url>
@@ -919,7 +919,7 @@
     </paper>
     <paper id="7">
       <title><fixed-case>DUC</fixed-case> 2005: Evaluation of Question-Focused Summarization Systems</title>
-      <author><first>Hoa Trang</first><last>Dang</last></author>
+      <author id="hoa-trang-dang"><first>Hoa Trang</first><last>Dang</last></author>
       <pages>48–55</pages>
       <url hash="5058723c">W06-0707</url>
       <bibkey>dang-2006-duc</bibkey>
@@ -929,7 +929,7 @@
     <meta>
       <booktitle>Proceedings of the Workshop on How Can Computational Linguistics Improve Information Retrieval?</booktitle>
       <url hash="5a8ee141">W06-08</url>
-      <editor><first>John</first><last>Tait</last></editor>
+      <editor id="john-tait"><first>John</first><last>Tait</last></editor>
       <editor><first>Michael</first><last>Oakes</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Sydney, Australia</address>
@@ -945,14 +945,14 @@
       <title><fixed-case>I</fixed-case>ndonesian-<fixed-case>J</fixed-case>apanese <fixed-case>CLIR</fixed-case> Using Only Limited Resource</title>
       <author><first>Ayu</first><last>Purwarianti</last></author>
       <author><first>Masatoshi</first><last>Tsuchiya</last></author>
-      <author><first>Seiichi</first><last>Nakagawa</last></author>
+      <author id="seiichi-nakagawa"><first>Seiichi</first><last>Nakagawa</last></author>
       <pages>1–8</pages>
       <url hash="8946d8dd">W06-0801</url>
       <bibkey>purwarianti-etal-2006-indonesian</bibkey>
     </paper>
     <paper id="2">
       <title>Hybrid Systems for Information Extraction and Question Answering</title>
-      <author><first>Rodolfo</first><last>Delmonte</last></author>
+      <author id="rodolfo-delmonte"><first>Rodolfo</first><last>Delmonte</last></author>
       <pages>9–16</pages>
       <url hash="e2432ad4">W06-0802</url>
       <bibkey>delmonte-2006-hybrid</bibkey>
@@ -980,7 +980,7 @@
       <author><first>Hua</first><last>Cheng</last></author>
       <author><first>Yan</first><last>Qu</last></author>
       <author><first>Jesse</first><last>Montgomery</last></author>
-      <author><first>David A.</first><last>Evans</last></author>
+      <author id="david-a-evans"><first>David A.</first><last>Evans</last></author>
       <pages>33–40</pages>
       <url hash="27374ed2">W06-0805</url>
       <bibkey>cheng-etal-2006-exploring</bibkey>
@@ -990,9 +990,9 @@
     <meta>
       <booktitle>Proceedings of the Workshop on Annotating and Reasoning about Time and Events</booktitle>
       <url hash="f97cf503">W06-09</url>
-      <editor><first>Branimir</first><last>Boguraev</last></editor>
-      <editor><first>Rafael</first><last>Muñoz</last></editor>
-      <editor><first>James</first><last>Pustejovsky</last></editor>
+      <editor id="branimir-boguraev"><first>Branimir</first><last>Boguraev</last></editor>
+      <editor id="rafael-munoz"><first>Rafael</first><last>Muñoz</last></editor>
+      <editor id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Sydney, Australia</address>
       <month>July</month>
@@ -1013,7 +1013,7 @@
     <paper id="2">
       <title>Local Semantics in the Interpretation of Temporal Expressions</title>
       <author><first>Robert</first><last>Dale</last></author>
-      <author><first>Paweł</first><last>Mazur</last></author>
+      <author id="pawel-mazur"><first>Paweł</first><last>Mazur</last></author>
       <pages>9–16</pages>
       <url hash="9c53f25c">W06-0902</url>
       <bibkey>dale-mazur-2006-local</bibkey>
@@ -1021,7 +1021,7 @@
     <paper id="3">
       <title>Automatic Dating of Documents and Temporal Text Classification</title>
       <author><first>Angelo</first><last>Dalli</last></author>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <pages>17–22</pages>
       <url hash="ad2a128a">W06-0903</url>
       <bibkey>dalli-wilks-2006-automatic</bibkey>
@@ -1029,16 +1029,16 @@
     <paper id="4">
       <title>A Pilot Study on Acquiring Metric Temporal Constraints for Events</title>
       <author><first>Inderjeet</first><last>Mani</last></author>
-      <author><first>Ben</first><last>Wellner</last></author>
+      <author id="ben-wellner"><first>Ben</first><last>Wellner</last></author>
       <pages>23–29</pages>
       <url hash="1ab92bcf">W06-0904</url>
       <bibkey>mani-wellner-2006-pilot</bibkey>
     </paper>
     <paper id="5">
       <title>Evaluating Knowledge-based Approaches to the Multilingual Extension of a Temporal Expression Normalizer</title>
-      <author><first>Matteo</first><last>Negri</last></author>
-      <author><first>Estela</first><last>Saquete</last></author>
-      <author><first>Patricio</first><last>Martínez-Barco</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
+      <author id="estela-saquete"><first>Estela</first><last>Saquete</last></author>
+      <author id="patricio-martinez-barco"><first>Patricio</first><last>Martínez-Barco</last></author>
       <author><first>Rafael</first><last>Muñoz</last></author>
       <pages>30–37</pages>
       <url hash="23876f0e">W06-0905</url>
@@ -1047,8 +1047,8 @@
     <paper id="6">
       <title>Extending <fixed-case>T</fixed-case>ime<fixed-case>ML</fixed-case> with Typical Durations of Events</title>
       <author><first>Feng</first><last>Pan</last></author>
-      <author><first>Rutu</first><last>Mulkar</last></author>
-      <author><first>Jerry R.</first><last>Hobbs</last></author>
+      <author id="rutu-mulkar-mehta"><first>Rutu</first><last>Mulkar</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry R.</first><last>Hobbs</last></author>
       <pages>38–45</pages>
       <url hash="83bea092">W06-0906</url>
       <bibkey>pan-etal-2006-extending</bibkey>
@@ -1056,7 +1056,7 @@
     <paper id="7">
       <title>Marking Time in Developmental Biology: Annotating Developmental Events and their Links with Molecular Events</title>
       <author><first>Gail</first><last>Sinclair</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <author><first>Duncan</first><last>Davidson</last></author>
       <pages>46–53</pages>
       <url hash="0dbc8c52">W06-0907</url>
@@ -1068,8 +1068,8 @@
       <booktitle>Proceedings of the Workshop on Multilingual Language Resources and Interoperability</booktitle>
       <url hash="19fb4af7">W06-10</url>
       <editor><first>Andreas</first><last>Witt</last></editor>
-      <editor><first>Gilles</first><last>Sérasset</last></editor>
-      <editor><first>Susan</first><last>Armstrong</last></editor>
+      <editor id="gilles-serasset"><first>Gilles</first><last>Sérasset</last></editor>
+      <editor id="susan-armstrong"><first>Susan</first><last>Armstrong</last></editor>
       <editor><first>Jim</first><last>Breen</last></editor>
       <editor><first>Ulrich</first><last>Heid</last></editor>
       <editor><first>Felix</first><last>Sasaki</last></editor>
@@ -1086,9 +1086,9 @@
     <paper id="1">
       <title>Lexical Markup Framework (<fixed-case>LMF</fixed-case>) for <fixed-case>NLP</fixed-case> Multilingual Resources</title>
       <author><first>Gil</first><last>Francopoulo</last></author>
-      <author><first>Nuria</first><last>Bel</last></author>
+      <author id="nuria-bel"><first>Nuria</first><last>Bel</last></author>
       <author><first>Monte</first><last>George</last></author>
-      <author><first>Nicoletta</first><last>Calzolari</last></author>
+      <author id="nicoletta-calzolari"><first>Nicoletta</first><last>Calzolari</last></author>
       <author><first>Monica</first><last>Monachini</last></author>
       <author><first>Mandy</first><last>Pet</last></author>
       <author><first>Claudia</first><last>Soria</last></author>
@@ -1106,12 +1106,12 @@
     <paper id="3">
       <title>Towards Agent-based Cross-Lingual Interoperability of Distributed Lexical Resources</title>
       <author><first>Claudia</first><last>Soria</last></author>
-      <author><first>Maurizio</first><last>Tesconi</last></author>
+      <author id="maurizio-tesconi"><first>Maurizio</first><last>Tesconi</last></author>
       <author><first>Andrea</first><last>Marchetti</last></author>
       <author><first>Francesca</first><last>Bertagna</last></author>
       <author><first>Monica</first><last>Monachini</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
-      <author><first>Nicoletta</first><last>Calzolari</last></author>
+      <author id="nicoletta-calzolari"><first>Nicoletta</first><last>Calzolari</last></author>
       <pages>17–24</pages>
       <url hash="38ca04fc">W06-1003</url>
       <bibkey>soria-etal-2006-towards</bibkey>
@@ -1138,14 +1138,14 @@
     <paper id="6">
       <title>Multilingual Collocation Extraction: Issues and Solutions</title>
       <author><first>Violeta</first><last>Seretan</last></author>
-      <author><first>Eric</first><last>Wehrli</last></author>
+      <author id="eric-wehrli"><first>Eric</first><last>Wehrli</last></author>
       <pages>40–49</pages>
       <url hash="5dae2385">W06-1006</url>
       <bibkey>seretan-wehrli-2006-multilingual</bibkey>
     </paper>
     <paper id="7">
       <title>Structural Properties of Lexical Systems: Monolingual and Multilingual Perspectives</title>
-      <author><first>Alain</first><last>Polguère</last></author>
+      <author id="alain-polguere"><first>Alain</first><last>Polguère</last></author>
       <pages>50–59</pages>
       <url hash="64f95989">W06-1007</url>
       <bibkey>polguere-2006-structural</bibkey>
@@ -1161,7 +1161,7 @@
     </paper>
     <paper id="9">
       <title>Evaluation of the <fixed-case>B</fixed-case>ible as a Resource for Cross-Language Information Retrieval</title>
-      <author><first>Peter A.</first><last>Chew</last></author>
+      <author id="peter-a-chew"><first>Peter A.</first><last>Chew</last></author>
       <author><first>Steve J.</first><last>Verzi</last></author>
       <author><first>Travis L.</first><last>Bauer</last></author>
       <author><first>Jonathan T.</first><last>McClain</last></author>
@@ -1175,7 +1175,7 @@
       <booktitle>Proceedings of the Workshop on Linguistic Distances</booktitle>
       <url hash="a938312c">W06-11</url>
       <editor><first>John</first><last>Nerbonne</last></editor>
-      <editor><first>Erhard</first><last>Hinrichs</last></editor>
+      <editor id="erhard-hinrichs"><first>Erhard</first><last>Hinrichs</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Sydney, Australia</address>
       <month>July</month>
@@ -1204,7 +1204,7 @@
     <paper id="3">
       <title>Similarity Judgments: Philosophical, Psychological and Mathematical Investigations</title>
       <author><first>Claude</first><last>St-Jacques</last></author>
-      <author><first>Caroline</first><last>Barrière</last></author>
+      <author id="caroline-barriere"><first>Caroline</first><last>Barrière</last></author>
       <pages>8–15</pages>
       <url hash="0d55861d">W06-1103</url>
       <bibkey>st-jacques-barriere-2006-similarity</bibkey>
@@ -1252,14 +1252,14 @@
     </paper>
     <paper id="9">
       <title>Study of Some Distance Measures for Language and Encoding Identification</title>
-      <author><first>Anil Kumar</first><last>Singh</last></author>
+      <author id="anil-kumar-singh"><first>Anil Kumar</first><last>Singh</last></author>
       <pages>63–72</pages>
       <url hash="34ac7d37">W06-1109</url>
       <bibkey>singh-2006-study</bibkey>
     </paper>
     <paper id="10">
       <title>Towards Case-Based Parsing: Are Chunks Reliable Indicators for Syntax Trees?</title>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <pages>73–81</pages>
       <url hash="5563e308">W06-1110</url>
       <bibkey>kubler-2006-towards</bibkey>
@@ -1275,7 +1275,7 @@
     <paper id="12">
       <title>A Structural Similarity Measure</title>
       <author><first>Petr</first><last>Homola</last></author>
-      <author><first>Vladislav</first><last>Kuboň</last></author>
+      <author id="vladislav-kubon"><first>Vladislav</first><last>Kuboň</last></author>
       <pages>91–99</pages>
       <url hash="02273a01">W06-1112</url>
       <bibkey>homola-kubon-2006-structural</bibkey>
@@ -1290,7 +1290,7 @@
     <paper id="14">
       <title>Total Rank Distance and Scaled Total Rank Distance: Two Alternative Metrics in Computational Linguistics</title>
       <author><first>Anca</first><last>Dinu</last></author>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <pages>109–116</pages>
       <url hash="d20c9d44">W06-1114</url>
       <bibkey>dinu-dinu-2006-total</bibkey>
@@ -1300,9 +1300,9 @@
     <meta>
       <booktitle>Proceedings of the Workshop on Multiword Expressions: Identifying and Exploiting Underlying Properties</booktitle>
       <url hash="6c160881">W06-12</url>
-      <editor><first>Begoña Villada</first><last>Moirón</last></editor>
+      <editor id="begona-villada-moiron"><first>Begoña Villada</first><last>Moirón</last></editor>
       <editor><first>Aline</first><last>Villavicencio</last></editor>
-      <editor><first>Diana</first><last>McCarthy</last></editor>
+      <editor id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></editor>
       <editor><first>Stefan</first><last>Evert</last></editor>
       <editor><first>Suzanne</first><last>Stevenson</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -1317,17 +1317,17 @@
     </frontmatter>
     <paper id="1">
       <title>Compositionality and Multiword Expressions: Six of One, Half a Dozen of the Other?</title>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>1</pages>
       <url hash="0ab5036b">W06-1201</url>
       <bibkey>baldwin-2006-compositionality</bibkey>
     </paper>
     <paper id="2">
       <title>Measuring <fixed-case>MWE</fixed-case> Compositionality Using Semantic Annotation</title>
-      <author><first>Scott S.L.</first><last>Piao</last></author>
+      <author id="scott-s-l-piao"><first>Scott S.L.</first><last>Piao</last></author>
       <author><first>Paul</first><last>Rayson</last></author>
       <author><first>Olga</first><last>Mudraya</last></author>
-      <author><first>Andrew</first><last>Wilson</last></author>
+      <author id="andrew-wilson"><first>Andrew</first><last>Wilson</last></author>
       <author><first>Roger</first><last>Garside</last></author>
       <pages>2–11</pages>
       <url hash="1f306c84">W06-1202</url>
@@ -1335,7 +1335,7 @@
     </paper>
     <paper id="3">
       <title>Automatic Identification of Non-Compositional Multi-Word Expressions using Latent Semantic Analysis</title>
-      <author><first>Graham</first><last>Katz</last></author>
+      <author id="graham-katz"><first>Graham</first><last>Katz</last></author>
       <author><first>Eugenie</first><last>Giesbrecht</last></author>
       <pages>12–19</pages>
       <url hash="c885a32d">W06-1203</url>
@@ -1344,7 +1344,7 @@
     <paper id="4">
       <title>Using Information about Multi-word Expressions for the Word-Alignment Task</title>
       <author><first>Sriram</first><last>Venkatapathy</last></author>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
       <pages>20–27</pages>
       <url hash="465e6fcd">W06-1204</url>
       <bibkey>venkatapathy-joshi-2006-using</bibkey>
@@ -1353,7 +1353,7 @@
       <title>Detecting Complex Predicates in <fixed-case>H</fixed-case>indi using <fixed-case>POS</fixed-case> Projection across Parallel Corpora</title>
       <author><first>Amitabha</first><last>Mukerjee</last></author>
       <author><first>Ankit</first><last>Soni</last></author>
-      <author><first>Achla M</first><last>Raina</last></author>
+      <author id="achla-m-raina"><first>Achla M</first><last>Raina</last></author>
       <pages>28–35</pages>
       <url hash="f4dad07f">W06-1205</url>
       <bibkey>mukerjee-etal-2006-detecting</bibkey>
@@ -1379,7 +1379,7 @@
     <paper id="8">
       <title>Interpretation of Compound Nominalisations using Corpus and Web Statistics</title>
       <author><first>Jeremy</first><last>Nicholson</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>54–61</pages>
       <url hash="ef3295d5">W06-1208</url>
       <bibkey>nicholson-baldwin-2006-interpretation</bibkey>
@@ -1418,7 +1418,7 @@
       <author><first>Kazuhiro</first><last>Nakadai</last></author>
       <author><first>Hiroshi</first><last>Tsujino</last></author>
       <author><first>Tetsuya</first><last>Ogata</last></author>
-      <author><first>Hiroshi G.</first><last>Okuno</last></author>
+      <author id="hiroshi-g-okuno"><first>Hiroshi G.</first><last>Okuno</last></author>
       <pages>9–17</pages>
       <url hash="e5e743b9">W06-1302</url>
       <bibkey>komatani-etal-2006-multi</bibkey>
@@ -1427,7 +1427,7 @@
       <title>Building Effective Question Answering Characters</title>
       <author><first>Anton</first><last>Leuski</last></author>
       <author><first>Ronakkumar</first><last>Patel</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <author><first>Brandon</first><last>Kennedy</last></author>
       <pages>18–27</pages>
       <url hash="0dbbe136">W06-1303</url>
@@ -1452,7 +1452,7 @@
     <paper id="6">
       <title>Multidimensional Dialogue Management</title>
       <author><first>Simon</first><last>Keizer</last></author>
-      <author><first>Harry</first><last>Bunt</last></author>
+      <author id="harry-bunt"><first>Harry</first><last>Bunt</last></author>
       <pages>37–45</pages>
       <url hash="7a352986">W06-1306</url>
       <bibkey>keizer-bunt-2006-multidimensional</bibkey>
@@ -1466,9 +1466,9 @@
     </paper>
     <paper id="8">
       <title>Resolution of Referents Groupings in Practical Dialogues</title>
-      <author><first>Alexandre</first><last>Denis</last></author>
+      <author id="alexandre-denis"><first>Alexandre</first><last>Denis</last></author>
       <author><first>Guillaume</first><last>Pitel</last></author>
-      <author><first>Matthieu</first><last>Quignard</last></author>
+      <author id="matthieu-quignard"><first>Matthieu</first><last>Quignard</last></author>
       <pages>54–59</pages>
       <url hash="bfd002cc">W06-1308</url>
       <bibkey>denis-etal-2006-resolution</bibkey>
@@ -1512,17 +1512,17 @@
     <paper id="13">
       <title>An Information State-Based Dialogue Manager for Call for Fire Dialogues</title>
       <author><first>Antonio</first><last>Roque</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <pages>88–95</pages>
       <url hash="d14e64b2">W06-1313</url>
       <bibkey>roque-traum-2006-information</bibkey>
     </paper>
     <paper id="14">
       <title>Automatically Detecting Action Items in Audio Meeting Recordings</title>
-      <author><first>William</first><last>Morgan</last></author>
-      <author><first>Pi-Chuan</first><last>Chang</last></author>
+      <author id="william-morgan"><first>William</first><last>Morgan</last></author>
+      <author id="pi-chuan-chang"><first>Pi-Chuan</first><last>Chang</last></author>
       <author><first>Surabhi</first><last>Gupta</last></author>
-      <author><first>Jason M.</first><last>Brenier</last></author>
+      <author id="jason-brenier"><first>Jason M.</first><last>Brenier</last></author>
       <pages>96–103</pages>
       <url hash="1c376280">W06-1314</url>
       <bibkey>morgan-etal-2006-automatically</bibkey>
@@ -1538,7 +1538,7 @@
     </paper>
     <paper id="16">
       <title>Multimodal Dialog Description Language for Rapid System Development</title>
-      <author><first>Masahiro</first><last>Araki</last></author>
+      <author id="masahiro-araki"><first>Masahiro</first><last>Araki</last></author>
       <author><first>Kenji</first><last>Tachibana</last></author>
       <pages>109–116</pages>
       <url hash="52541789">W06-1316</url>
@@ -1546,11 +1546,11 @@
     </paper>
     <paper id="17">
       <title>Classification of Discourse Coherence Relations: An Exploratory Study using Multiple Knowledge Sources</title>
-      <author><first>Ben</first><last>Wellner</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="ben-wellner"><first>Ben</first><last>Wellner</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <author><first>Catherine</first><last>Havasi</last></author>
       <author><first>Anna</first><last>Rumshisky</last></author>
-      <author><first>Roser</first><last>Saurí</last></author>
+      <author id="roser-sauri"><first>Roser</first><last>Saurí</last></author>
       <pages>117–125</pages>
       <url hash="de0cbe3f">W06-1317</url>
       <bibkey>wellner-etal-2006-classification</bibkey>
@@ -1558,7 +1558,7 @@
     <paper id="18">
       <title>Measuring annotator agreement in a complex hierarchical dialogue act annotation scheme</title>
       <author><first>Jeroen</first><last>Geertzen</last></author>
-      <author><first>Harry</first><last>Bunt</last></author>
+      <author id="harry-bunt"><first>Harry</first><last>Bunt</last></author>
       <pages>126–133</pages>
       <url hash="3578a063">W06-1318</url>
       <bibkey>geertzen-bunt-2006-measuring</bibkey>
@@ -1576,14 +1576,14 @@
       <title>An Analysis of Quantitative Aspects in the Evaluation of Thematic Segmentation Algorithms</title>
       <author><first>Maria</first><last>Georgescul</last></author>
       <author><first>Alexander</first><last>Clark</last></author>
-      <author><first>Susan</first><last>Armstrong</last></author>
+      <author id="susan-armstrong"><first>Susan</first><last>Armstrong</last></author>
       <pages>144–151</pages>
       <url hash="7794cd98">W06-1320</url>
       <bibkey>georgescul-etal-2006-analysis</bibkey>
     </paper>
     <paper id="21">
       <title>Discourse and Dialogue Processing in Spoken Intelligent Tutoring Systems</title>
-      <author><first>Diane J.</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane J.</first><last>Litman</last></author>
       <pages>152</pages>
       <url hash="4ecbde48">W06-1321</url>
       <bibkey>litman-2006-discourse</bibkey>
@@ -1611,7 +1611,7 @@
       <booktitle>Proceedings of the Fourth International Natural Language Generation Conference</booktitle>
       <url hash="57887282">W06-14</url>
       <editor><first>Nathalie</first><last>Colineau</last></editor>
-      <editor><first>Cécile</first><last>Paris</last></editor>
+      <editor id="cecile-paris"><first>Cécile</first><last>Paris</last></editor>
       <editor><first>Stephen</first><last>Wan</last></editor>
       <editor><first>Robert</first><last>Dale</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -1626,7 +1626,7 @@
     </frontmatter>
     <paper id="1">
       <title>Lessons Learned from Large Scale Evaluation of Systems that Produce Text: Nightmares and Pleasant Surprises</title>
-      <author><first>Kathleen R.</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen R.</first><last>McKeown</last></author>
       <pages>3–5</pages>
       <url hash="44a36aed">W06-1401</url>
       <bibkey>mckeown-2006-lessons</bibkey>
@@ -1643,7 +1643,7 @@
     </paper>
     <paper id="3">
       <title><fixed-case>CCG</fixed-case> Chart Realization from Disjunctive Inputs</title>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <pages>12–19</pages>
       <url hash="06c7ae52">W06-1403</url>
       <bibkey>white-2006-ccg</bibkey>
@@ -1659,7 +1659,7 @@
       <title>Individuality and Alignment in Generated Dialogues</title>
       <author><first>Amy</first><last>Isard</last></author>
       <author><first>Carsten</first><last>Brockmann</last></author>
-      <author><first>Jon</first><last>Oberlander</last></author>
+      <author id="jon-oberlander"><first>Jon</first><last>Oberlander</last></author>
       <pages>25–32</pages>
       <url hash="ee392bd9">W06-1405</url>
       <bibkey>isard-etal-2006-individuality</bibkey>
@@ -1675,7 +1675,7 @@
       <title>Adjective-to-Verb Paraphrasing in <fixed-case>J</fixed-case>apanese Based on Lexical Constraints of Verbs</title>
       <author><first>Atsushi</first><last>Fujita</last></author>
       <author><first>Naruaki</first><last>Masuno</last></author>
-      <author><first>Satoshi</first><last>Sato</last></author>
+      <author id="satoshi-sato"><first>Satoshi</first><last>Sato</last></author>
       <author><first>Takehito</first><last>Utsuro</last></author>
       <pages>41–43</pages>
       <url hash="fb1a94d8">W06-1407</url>
@@ -1690,9 +1690,9 @@
     </paper>
     <paper id="9">
       <title>Overspecified Reference in Hierarchical Domains: Measuring the Benefits for Readers</title>
-      <author><first>Ivandré</first><last>Paraboni</last></author>
+      <author id="ivandre-paraboni"><first>Ivandré</first><last>Paraboni</last></author>
       <author><first>Judith</first><last>Masthoff</last></author>
-      <author><first>Kees</first><last>van Deemter</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
       <pages>55–62</pages>
       <url hash="c5648041">W06-1409</url>
       <bibkey>paraboni-etal-2006-overspecified</bibkey>
@@ -1717,9 +1717,9 @@
     <paper id="12">
       <title>Noun Phrase Generation for Situated Dialogs</title>
       <author><first>Laura</first><last>Stoia</last></author>
-      <author><first>Darla Magdalene</first><last>Shockley</last></author>
-      <author><first>Donna K.</first><last>Byron</last></author>
-      <author><first>Eric</first><last>Fosler-Lussier</last></author>
+      <author id="darla-magdalene-shockley"><first>Darla Magdalene</first><last>Shockley</last></author>
+      <author id="donna-byron"><first>Donna K.</first><last>Byron</last></author>
+      <author id="eric-fosler-lussier"><first>Eric</first><last>Fosler-Lussier</last></author>
       <pages>81–88</pages>
       <url hash="2ee41580">W06-1412</url>
       <bibkey>stoia-etal-2006-noun</bibkey>
@@ -1727,8 +1727,8 @@
     <paper id="13">
       <title>The Clarity-Brevity Trade-off in Generating Referring Expressions</title>
       <author><first>Imtiaz Hussain</first><last>Khan</last></author>
-      <author><first>Graeme</first><last>Ritchie</last></author>
-      <author><first>Kees</first><last>van Deemter</last></author>
+      <author id="graeme-ritchie"><first>Graeme</first><last>Ritchie</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
       <pages>89–91</pages>
       <url hash="14dbaaa8">W06-1413</url>
       <bibkey>khan-etal-2006-clarity</bibkey>
@@ -1742,7 +1742,7 @@
     </paper>
     <paper id="15">
       <title>Generating Intelligent Numerical Answers in a Question-Answering System</title>
-      <author><first>Véronique</first><last>Moriceau</last></author>
+      <author id="veronique-moriceau"><first>Véronique</first><last>Moriceau</last></author>
       <pages>103–110</pages>
       <url hash="48be4b48">W06-1415</url>
       <bibkey>moriceau-2006-generating</bibkey>
@@ -1751,21 +1751,21 @@
       <title>Generating Multiple-Choice Test Items from Medical Text: A Pilot Study</title>
       <author><first>Nikiforos</first><last>Karamanis</last></author>
       <author><first>Le An</first><last>Ha</last></author>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <pages>111–113</pages>
       <url hash="82218d63">W06-1416</url>
       <bibkey>karamanis-etal-2006-generating</bibkey>
     </paper>
     <paper id="17">
       <title>Generation of Biomedical Arguments for Lay Readers</title>
-      <author><first>Nancy</first><last>Green</last></author>
+      <author id="nancy-green"><first>Nancy</first><last>Green</last></author>
       <pages>114–121</pages>
       <url hash="3152a6ec">W06-1417</url>
       <bibkey>green-2006-generation</bibkey>
     </paper>
     <paper id="18">
       <title>Introduction to the <fixed-case>INLG</fixed-case>’06 Special Session on Sharing Data and Comparative Evaluation</title>
-      <author><first>Anja</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anja</first><last>Belz</last></author>
       <author><first>Robert</first><last>Dale</last></author>
       <pages>125–126</pages>
       <url hash="f0f903eb">W06-1418</url>
@@ -1782,7 +1782,7 @@
     </paper>
     <paper id="20">
       <title>Building a Semantically Transparent Corpus for the Generation of Referring Expressions.</title>
-      <author><first>Kees</first><last>van Deemter</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
       <author><first>Ielka</first><last>van der Sluis</last></author>
       <author><first>Albert</first><last>Gatt</last></author>
       <pages>130–132</pages>
@@ -1791,7 +1791,7 @@
     </paper>
     <paper id="21">
       <title>Shared-Task Evaluations in <fixed-case>HLT</fixed-case>: Lessons for <fixed-case>NLG</fixed-case></title>
-      <author><first>Anja</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anja</first><last>Belz</last></author>
       <author><first>Adam</first><last>Kilgarriff</last></author>
       <pages>133–135</pages>
       <url hash="f73a2508">W06-1421</url>
@@ -1800,7 +1800,7 @@
     <paper id="22">
       <title><fixed-case>GENEVAL</fixed-case>: A Proposal for Shared-task Evaluation in <fixed-case>NLG</fixed-case></title>
       <author><first>Ehud</first><last>Reiter</last></author>
-      <author><first>Anja</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anja</first><last>Belz</last></author>
       <pages>136–138</pages>
       <url hash="ba1df382">W06-1422</url>
       <bibkey>reiter-belz-2006-geneval</bibkey>
@@ -1825,15 +1825,15 @@
     <paper id="1">
       <title>The Hidden <fixed-case>TAG</fixed-case> Model: Synchronous Grammars for Parsing Resource-Poor Languages</title>
       <author><first>David</first><last>Chiang</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>1–8</pages>
       <url hash="78d80d42">W06-1501</url>
       <bibkey>chiang-rambow-2006-hidden</bibkey>
     </paper>
     <paper id="2">
       <title>A Constraint Driven Metagrammar</title>
-      <author><first>Joseph</first><last>Le Roux</last></author>
-      <author><first>Benoît</first><last>Crabbé</last></author>
+      <author id="joseph-le-roux"><first>Joseph</first><last>Le Roux</last></author>
+      <author id="benoit-crabbe"><first>Benoît</first><last>Crabbé</last></author>
       <author><first>Yannick</first><last>Parmentier</last></author>
       <pages>9–16</pages>
       <url hash="710336ba">W06-1502</url>
@@ -1842,10 +1842,10 @@
     <paper id="3">
       <title>The Metagrammar Goes Multilingual: A Cross-Linguistic Look at the V2-Phenomenon</title>
       <author><first>Alexandra</first><last>Kinyon</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <author><first>Tatjana</first><last>Scheffler</last></author>
       <author><first>SinWon</first><last>Yoon</last></author>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
       <pages>17–24</pages>
       <url hash="802e5a4c">W06-1503</url>
       <bibkey>kinyon-etal-2006-metagrammar</bibkey>
@@ -1859,7 +1859,7 @@
     </paper>
     <paper id="5">
       <title>A <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammar Analysis of the Syntax and Semantics of <i>It</i>-Clefts</title>
-      <author><first>Chung-hye</first><last>Han</last></author>
+      <author id="chung-hye-han"><first>Chung-hye</first><last>Han</last></author>
       <author><first>Nancy</first><last>Hedberg</last></author>
       <pages>33–40</pages>
       <url hash="22e4a1b6">W06-1505</url>
@@ -1867,14 +1867,14 @@
     </paper>
     <paper id="6">
       <title>Pied-Piping in Relative Clauses: Syntax and Compositional Semantics Based on <fixed-case>S</fixed-case>ynchronous <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammar</title>
-      <author><first>Chung-hye</first><last>Han</last></author>
+      <author id="chung-hye-han"><first>Chung-hye</first><last>Han</last></author>
       <pages>41–48</pages>
       <url hash="956a3104">W06-1506</url>
       <bibkey>han-2006-pied</bibkey>
     </paper>
     <paper id="7">
       <title>Negative Concord and Restructuring in Palestinian <fixed-case>A</fixed-case>rabic: A Comparison of <fixed-case>TAG</fixed-case> and <fixed-case>CCG</fixed-case> Analyses</title>
-      <author><first>Frederick M.</first><last>Hoyt</last></author>
+      <author id="frederick-m-hoyt"><first>Frederick M.</first><last>Hoyt</last></author>
       <pages>49–56</pages>
       <url hash="c308e506">W06-1507</url>
       <bibkey>hoyt-2006-negative</bibkey>
@@ -1930,7 +1930,7 @@
     <paper id="14">
       <title>Generating <fixed-case>XTAG</fixed-case> Parsers from Algebraic Specifications</title>
       <author><first>Carlos</first><last>Gómez-Rodríguez</last></author>
-      <author><first>Miguel A.</first><last>Alonso</last></author>
+      <author id="miguel-a-alonso"><first>Miguel A.</first><last>Alonso</last></author>
       <author><first>Manuel</first><last>Vilares</last></author>
       <pages>103–108</pages>
       <url hash="f143963e">W06-1514</url>
@@ -1978,7 +1978,7 @@
     </paper>
     <paper id="20">
       <title>Handling Unlike Coordinated Phrases in <fixed-case>TAG</fixed-case> by Mixing Syntactic Category and Grammatical Function</title>
-      <author><first>Carlos A.</first><last>Prolo</last></author>
+      <author id="carlos-a-prolo"><first>Carlos A.</first><last>Prolo</last></author>
       <pages>137–140</pages>
       <url hash="af9edba1">W06-1520</url>
       <bibkey>prolo-2006-handling</bibkey>
@@ -1992,8 +1992,8 @@
     </paper>
     <paper id="22">
       <title>Modeling and Analysis of Elliptic Coordination by Dynamic Exploitation of Derivation Forests in <fixed-case>LTAG</fixed-case> Parsing</title>
-      <author><first>Djamé</first><last>Seddah</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <pages>147–152</pages>
       <url hash="35809edb">W06-1522</url>
       <bibkey>seddah-sagot-2006-modeling</bibkey>
@@ -2007,7 +2007,7 @@
     </paper>
     <paper id="24">
       <title>Reconsidering Raising and Experiencers in <fixed-case>E</fixed-case>nglish</title>
-      <author><first>Dennis Ryan</first><last>Storoshenko</last></author>
+      <author id="dennis-ryan-storoshenko"><first>Dennis Ryan</first><last>Storoshenko</last></author>
       <pages>159–164</pages>
       <url hash="6a6252a3">W06-1524</url>
       <bibkey>storoshenko-2006-reconsidering</bibkey>
@@ -2017,8 +2017,8 @@
     <meta>
       <booktitle>Proceedings of the 2006 Conference on Empirical Methods in Natural Language Processing</booktitle>
       <url hash="48d0612b">W06-16</url>
-      <editor><first>Dan</first><last>Jurafsky</last></editor>
-      <editor><first>Eric</first><last>Gaussier</last></editor>
+      <editor id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></editor>
+      <editor id="eric-gaussier"><first>Eric</first><last>Gaussier</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Sydney, Australia</address>
       <month>July</month>
@@ -2032,7 +2032,7 @@
     <paper id="1">
       <title>Unsupervised Discovery of a Statistical Verb Lexicon</title>
       <author><first>Trond</first><last>Grenager</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>1–8</pages>
       <url hash="68057114">W06-1601</url>
       <bibkey>grenager-manning-2006-unsupervised</bibkey>
@@ -2049,7 +2049,7 @@
       <title>Paraphrase Recognition via Dissimilarity Significance Classification</title>
       <author><first>Long</first><last>Qiu</last></author>
       <author><first>Min-Yen</first><last>Kan</last></author>
-      <author><first>Tat-Seng</first><last>Chua</last></author>
+      <author id="tat-seng-chua"><first>Tat-Seng</first><last>Chua</last></author>
       <pages>18–26</pages>
       <url hash="83ab277a">W06-1603</url>
       <bibkey>qiu-etal-2006-paraphrase</bibkey>
@@ -2065,7 +2065,7 @@
     </paper>
     <paper id="5">
       <title>Distributional measures of concept-distance: A task-oriented evaluation</title>
-      <author><first>Saif</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
       <author><first>Graeme</first><last>Hirst</last></author>
       <pages>35–43</pages>
       <url hash="95606eaf">W06-1605</url>
@@ -2093,15 +2093,15 @@
     <paper id="8">
       <title>The impact of parse quality on syntactically-informed statistical machine translation</title>
       <author><first>Chris</first><last>Quirk</last></author>
-      <author><first>Simon</first><last>Corston-Oliver</last></author>
+      <author id="simon-corston-oliver"><first>Simon</first><last>Corston-Oliver</last></author>
       <pages>62–69</pages>
       <url hash="8c7cf780">W06-1608</url>
       <bibkey>quirk-corston-oliver-2006-impact</bibkey>
     </paper>
     <paper id="9">
       <title>Statistical Machine Reordering</title>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
-      <author><first>José A. R.</first><last>Fonollosa</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="jose-a-r-fonollosa"><first>José A. R.</first><last>Fonollosa</last></author>
       <pages>70–76</pages>
       <url hash="ad79dc37">W06-1609</url>
       <bibkey>costa-jussa-fonollosa-2006-statistical</bibkey>
@@ -2109,8 +2109,8 @@
     <paper id="10">
       <title>Re-evaluating Machine Translation Results with Paraphrase Support</title>
       <author><first>Liang</first><last>Zhou</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>77–84</pages>
       <url hash="aadd8776">W06-1610</url>
       <bibkey>zhou-etal-2006-evaluating</bibkey>
@@ -2118,7 +2118,7 @@
     <paper id="11">
       <title>Exploiting Discourse Structure for Spoken Dialogue Performance Analysis</title>
       <author><first>Mihai</first><last>Rotaru</last></author>
-      <author><first>Diane J.</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane J.</first><last>Litman</last></author>
       <pages>85–93</pages>
       <url hash="5e2fdaf8">W06-1611</url>
       <bibkey>rotaru-litman-2006-exploiting</bibkey>
@@ -2141,8 +2141,8 @@
     </paper>
     <paper id="14">
       <title>Is it Really that Difficult to Parse <fixed-case>G</fixed-case>erman?</title>
-      <author><first>Sandra</first><last>Kübler</last></author>
-      <author><first>Erhard W.</first><last>Hinrichs</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
+      <author id="erhard-hinrichs"><first>Erhard W.</first><last>Hinrichs</last></author>
       <author><first>Wolfgang</first><last>Maier</last></author>
       <pages>111–119</pages>
       <url hash="4a2af7bb">W06-1614</url>
@@ -2167,7 +2167,7 @@
     </paper>
     <paper id="17">
       <title>Semantic Role Labeling of <fixed-case>N</fixed-case>om<fixed-case>B</fixed-case>ank: A Maximum Entropy Approach</title>
-      <author><first>Zheng Ping</first><last>Jiang</last></author>
+      <author id="zheng-ping-jiang"><first>Zheng Ping</first><last>Jiang</last></author>
       <author><first>Hwee Tou</first><last>Ng</last></author>
       <pages>138–145</pages>
       <url hash="7273162a">W06-1617</url>
@@ -2176,7 +2176,7 @@
     <paper id="18">
       <title>Identification of Event Mentions and their Semantic Class</title>
       <author><first>Steven</first><last>Bethard</last></author>
-      <author><first>James H.</first><last>Martin</last></author>
+      <author id="james-h-martin"><first>James H.</first><last>Martin</last></author>
       <pages>146–154</pages>
       <url hash="93b9297c">W06-1618</url>
       <bibkey>bethard-martin-2006-identification</bibkey>
@@ -2187,15 +2187,15 @@
       <author><first>Takuya</first><last>Matsuzaki</last></author>
       <author><first>Yoshimasa</first><last>Tsuruoka</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>155–163</pages>
       <url hash="e37d7fda">W06-1619</url>
       <bibkey>ninomiya-etal-2006-extremely</bibkey>
     </paper>
     <paper id="20">
       <title>Multilingual Deep Lexical Acquisition for <fixed-case>HPSG</fixed-case>s via Supertagging</title>
-      <author><first>Phil</first><last>Blunsom</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>164–171</pages>
       <url hash="e1c2ded9">W06-1620</url>
       <bibkey>blunsom-baldwin-2006-multilingual</bibkey>
@@ -2211,7 +2211,7 @@
     </paper>
     <paper id="22">
       <title>Semantic Role Labeling via Instance-Based Learning</title>
-      <author><first>Chi-San Althon</first><last>Lin</last></author>
+      <author id="chi-san-althon-lin"><first>Chi-San Althon</first><last>Lin</last></author>
       <author><first>Tony C.</first><last>Smith</last></author>
       <pages>180–188</pages>
       <url hash="a0bfdbbf">W06-1622</url>
@@ -2221,7 +2221,7 @@
       <title>Inducing Temporal Graphs</title>
       <author><first>Philip</first><last>Bramsen</last></author>
       <author><first>Pawan</first><last>Deshpande</last></author>
-      <author><first>Yoong Keok</first><last>Lee</last></author>
+      <author id="yoong-keok-lee"><first>Yoong Keok</first><last>Lee</last></author>
       <author><first>Regina</first><last>Barzilay</last></author>
       <pages>189–198</pages>
       <url hash="79c0acce">W06-1623</url>
@@ -2231,7 +2231,7 @@
       <title>A Weakly Supervised Learning Approach for Spoken Language Understanding</title>
       <author><first>Wei-Lin</first><last>Wu</last></author>
       <author><first>Ru-Zhan</first><last>Lu</last></author>
-      <author><first>Jian-Yong</first><last>Duan</last></author>
+      <author id="jianyong-duan"><first>Jian-Yong</first><last>Duan</last></author>
       <author><first>Hui</first><last>Liu</last></author>
       <author><first>Feng</first><last>Gao</last></author>
       <author><first>Yu-Quan</first><last>Chen</last></author>
@@ -2242,16 +2242,16 @@
     <paper id="25">
       <title><fixed-case>H</fixed-case>umor: Prosody Analysis and Automatic Recognition for <fixed-case>F</fixed-case>*<fixed-case>R</fixed-case>*<fixed-case>I</fixed-case>*<fixed-case>E</fixed-case>*<fixed-case>N</fixed-case>*<fixed-case>D</fixed-case>*<fixed-case>S</fixed-case>*</title>
       <author><first>Amruta</first><last>Purandare</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <pages>208–215</pages>
       <url hash="59865edd">W06-1625</url>
       <bibkey>purandare-litman-2006-humor</bibkey>
     </paper>
     <paper id="26">
       <title>Distributed Language Modeling for <tex-math>N</tex-math>-best List Re-ranking</title>
-      <author><first>Ying</first><last>Zhang</last></author>
-      <author><first>Almut Silja</first><last>Hildebrand</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="ying-zhang"><first>Ying</first><last>Zhang</last></author>
+      <author id="almut-silja-hildebrand"><first>Almut Silja</first><last>Hildebrand</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>216–223</pages>
       <url hash="c2b2c684">W06-1626</url>
       <bibkey>zhang-etal-2006-distributed</bibkey>
@@ -2267,8 +2267,8 @@
     <paper id="28">
       <title>A Discriminative Model for Tree-to-Tree Translation</title>
       <author><first>Brooke</first><last>Cowan</last></author>
-      <author><first>Ivona</first><last>Kuc̆erová</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="ivona-kucerova"><first>Ivona</first><last>Kuc̆erová</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <pages>232–241</pages>
       <url hash="c28773de">W06-1628</url>
       <bibkey>cowan-etal-2006-discriminative</bibkey>
@@ -2285,10 +2285,10 @@
     <paper id="30">
       <title>Unsupervised Named Entity Transliteration Using Temporal and Phonetic Correlation</title>
       <author><first>Tao</first><last>Tao</last></author>
-      <author><first>Su-Youn</first><last>Yoon</last></author>
+      <author id="su-youn-yoon"><first>Su-Youn</first><last>Yoon</last></author>
       <author><first>Andrew</first><last>Fister</last></author>
-      <author><first>Richard</first><last>Sproat</last></author>
-      <author><first>ChengXiang</first><last>Zhai</last></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last></author>
+      <author id="chengxiang-zhai"><first>ChengXiang</first><last>Zhai</last></author>
       <pages>250–257</pages>
       <url hash="7e01f312">W06-1630</url>
       <bibkey>tao-etal-2006-unsupervised</bibkey>
@@ -2323,8 +2323,8 @@
       <author><first>Akane</first><last>Yakushiji</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
       <author><first>Tomoko</first><last>Ohta</last></author>
-      <author><first>Yuka</first><last>Tateisi</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="yuka-tateisi"><first>Yuka</first><last>Tateisi</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>284–292</pages>
       <url hash="bfbd948f">W06-1634</url>
       <bibkey>yakushiji-etal-2006-automatic</bibkey>
@@ -2332,7 +2332,7 @@
     <paper id="35">
       <title>Protein folding and chart parsing</title>
       <author><first>Julia</first><last>Hockenmaier</last></author>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
       <author><first>Ken A.</first><last>Dill</last></author>
       <pages>293–300</pages>
       <url hash="96216b30">W06-1635</url>
@@ -2359,7 +2359,7 @@
     <paper id="38">
       <title>Better Informed Training of Latent Syntactic Features</title>
       <author><first>Markus</first><last>Dreyer</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>317–326</pages>
       <url hash="8ad93bf5">W06-1638</url>
       <bibkey>dreyer-eisner-2006-better</bibkey>
@@ -2376,7 +2376,7 @@
     <paper id="40">
       <title>Partially Supervised Coreference Resolution for Opinion Summarization through Structured Rule Learning</title>
       <author><first>Veselin</first><last>Stoyanov</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>336–344</pages>
       <url hash="a8ee114e">W06-1640</url>
       <bibkey>stoyanov-cardie-2006-partially</bibkey>
@@ -2406,8 +2406,8 @@
     </paper>
     <paper id="44">
       <title>Style &amp; Topic Language Model Adaptation Using <fixed-case>HMM</fixed-case>-<fixed-case>LDA</fixed-case></title>
-      <author><first>Bo-June Paul</first><last>Hsu</last></author>
-      <author><first>James</first><last>Glass</last></author>
+      <author id="bo-june-paul-hsu"><first>Bo-June Paul</first><last>Hsu</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
       <pages>373–381</pages>
       <url hash="1f17b984">W06-1644</url>
       <bibkey>hsu-glass-2006-style</bibkey>
@@ -2415,8 +2415,8 @@
     <paper id="45">
       <title>Text data acquisition for domain-specific language models</title>
       <author><first>Abhinav</first><last>Sethy</last></author>
-      <author><first>Panayiotis G.</first><last>Georgiou</last></author>
-      <author><first>Shrikanth</first><last>Narayanan</last></author>
+      <author id="panayiotis-georgiou"><first>Panayiotis G.</first><last>Georgiou</last></author>
+      <author id="shrikanth-narayanan"><first>Shrikanth</first><last>Narayanan</last></author>
       <pages>382–389</pages>
       <url hash="95ed53e9">W06-1645</url>
       <bibkey>sethy-etal-2006-text</bibkey>
@@ -2424,7 +2424,7 @@
     <paper id="46">
       <title>Corrective Models for Speech Recognition of Inflected Languages</title>
       <author><first>Izhak</first><last>Shafran</last></author>
-      <author><first>Keith</first><last>Hall</last></author>
+      <author id="keith-hall"><first>Keith</first><last>Hall</last></author>
       <pages>390–398</pages>
       <url hash="9026a942">W06-1646</url>
       <bibkey>shafran-hall-2006-corrective</bibkey>
@@ -2447,9 +2447,9 @@
     </paper>
     <paper id="49">
       <title>Partially Supervised Sense Disambiguation by Learning Sense Number from Tagged and Untagged Corpora</title>
-      <author><first>Zheng-Yu</first><last>Niu</last></author>
-      <author><first>Dong-Hong</first><last>Ji</last></author>
-      <author><first>Chew Lim</first><last>Tan</last></author>
+      <author id="zheng-yu-niu"><first>Zheng-Yu</first><last>Niu</last></author>
+      <author id="donghong-ji"><first>Dong-Hong</first><last>Ji</last></author>
+      <author id="chew-lim-tan"><first>Chew Lim</first><last>Tan</last></author>
       <pages>415–422</pages>
       <url hash="bf95e182">W06-1649</url>
       <bibkey>niu-etal-2006-partially</bibkey>
@@ -2458,7 +2458,7 @@
       <title>Automatically Assessing Review Helpfulness</title>
       <author><first>Soo-Min</first><last>Kim</last></author>
       <author><first>Patrick</first><last>Pantel</last></author>
-      <author><first>Tim</first><last>Chklovski</last></author>
+      <author id="timothy-chklovski"><first>Tim</first><last>Chklovski</last></author>
       <author><first>Marco</first><last>Pennacchiotti</last></author>
       <pages>423–430</pages>
       <url hash="1b8ab1ba">W06-1650</url>
@@ -2467,17 +2467,17 @@
     <paper id="51">
       <title>Joint Extraction of Entities and Relations for Opinion Recognition</title>
       <author><first>Yejin</first><last>Choi</last></author>
-      <author><first>Eric</first><last>Breck</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="eric-breck"><first>Eric</first><last>Breck</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>431–439</pages>
       <url hash="882c8108">W06-1651</url>
       <bibkey>choi-etal-2006-joint</bibkey>
     </paper>
     <paper id="52">
       <title>Feature Subsumption for Opinion Analysis</title>
-      <author><first>Ellen</first><last>Riloff</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
       <author><first>Siddharth</first><last>Patwardhan</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <pages>440–448</pages>
       <url hash="17dc9d74">W06-1652</url>
       <bibkey>riloff-etal-2006-feature</bibkey>
@@ -2493,7 +2493,7 @@
     <paper id="54">
       <title>Random Indexing using Statistical Weight Functions</title>
       <author><first>James</first><last>Gorman</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <pages>457–464</pages>
       <url hash="82f009f9">W06-1654</url>
       <bibkey>gorman-curran-2006-random</bibkey>
@@ -2525,15 +2525,15 @@
       <title>Entity Annotation based on Inverse Index Operations</title>
       <author><first>Ganesh</first><last>Ramakrishnan</last></author>
       <author><first>Sreeram</first><last>Balakrishnan</last></author>
-      <author><first>Sachindra</first><last>Joshi</last></author>
+      <author id="sachindra-joshi"><first>Sachindra</first><last>Joshi</last></author>
       <pages>492–500</pages>
       <url hash="adf2a1ba">W06-1658</url>
       <bibkey>ramakrishnan-etal-2006-entity</bibkey>
     </paper>
     <paper id="59">
       <title>Unsupervised Information Extraction Approach Using Graph Mutual Reinforcement</title>
-      <author><first>Hany</first><last>Hassan</last></author>
-      <author><first>Ahmed</first><last>Hassan</last></author>
+      <author id="hany-hassan-awadalla"><first>Hany</first><last>Hassan</last></author>
+      <author id="ahmed-hassan"><first>Ahmed</first><last>Hassan</last></author>
       <author><first>Ossama</first><last>Emam</last></author>
       <pages>501–508</pages>
       <url hash="d1ae9c38">W06-1659</url>
@@ -2558,8 +2558,8 @@
     </paper>
     <paper id="62">
       <title>Sentence ordering with manifold-based classification in multi-document summarization</title>
-      <author><first>Paul D</first><last>Ji</last></author>
-      <author><first>Stephen</first><last>Pulman</last></author>
+      <author id="paul-d-ji"><first>Paul D</first><last>Ji</last></author>
+      <author id="stephen-pulman"><first>Stephen</first><last>Pulman</last></author>
       <pages>526–533</pages>
       <url hash="3571be76">W06-1662</url>
       <bibkey>ji-pulman-2006-sentence</bibkey>
@@ -2567,7 +2567,7 @@
     <paper id="63">
       <title>Quality Assessment of Large Scale Knowledge Resources</title>
       <author><first>Montse</first><last>Cuadros</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <pages>534–541</pages>
       <url hash="62089d57">W06-1663</url>
       <bibkey>cuadros-rigau-2006-quality</bibkey>
@@ -2585,7 +2585,7 @@
     <paper id="65">
       <title>Context-Dependent Term Relations for Information Retrieval</title>
       <author><first>Jing</first><last>Bai</last></author>
-      <author><first>Jian-Yun</first><last>Nie</last></author>
+      <author id="jian-yun-nie"><first>Jian-Yun</first><last>Nie</last></author>
       <author><first>Guihong</first><last>Cao</last></author>
       <pages>551–559</pages>
       <url hash="17e0fdb1">W06-1665</url>
@@ -2594,7 +2594,7 @@
     <paper id="66">
       <title>Loss Minimization in Parse Reranking</title>
       <author><first>Ivan</first><last>Titov</last></author>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <pages>560–567</pages>
       <url hash="0a8e7b29">W06-1666</url>
       <bibkey>titov-henderson-2006-loss</bibkey>
@@ -2602,9 +2602,9 @@
     <paper id="67">
       <title>Unsupervised Relation Disambiguation with Order Identification Capabilities</title>
       <author><first>Jinxiu</first><last>Chen</last></author>
-      <author><first>Donghong</first><last>Ji</last></author>
-      <author><first>Chew Lim</first><last>Tan</last></author>
-      <author><first>Zhengyu</first><last>Niu</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
+      <author id="chew-lim-tan"><first>Chew Lim</first><last>Tan</last></author>
+      <author id="zheng-yu-niu"><first>Zhengyu</first><last>Niu</last></author>
       <pages>568–575</pages>
       <url hash="d05bc27b">W06-1667</url>
       <bibkey>chen-etal-2006-unsupervised-relation</bibkey>
@@ -2618,10 +2618,10 @@
     </paper>
     <paper id="69">
       <title>Two graph-based algorithms for state-of-the-art <fixed-case>WSD</fixed-case></title>
-      <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>David</first><last>Martínez</last></author>
-      <author><first>Oier</first><last>López de Lacalle</last></author>
-      <author><first>Aitor</first><last>Soroa</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
+      <author id="david-martinez"><first>David</first><last>Martínez</last></author>
+      <author id="oier-lopez-de-lacalle"><first>Oier</first><last>López de Lacalle</last></author>
+      <author id="aitor-soroa"><first>Aitor</first><last>Soroa</last></author>
       <pages>585–593</pages>
       <url hash="1e7b76af">W06-1669</url>
       <bibkey>agirre-etal-2006-two</bibkey>
@@ -2653,9 +2653,9 @@
     </paper>
     <paper id="73">
       <title>Solving the Problem of Cascading Errors: Approximate <fixed-case>B</fixed-case>ayesian Inference for Linguistic Annotation Pipelines</title>
-      <author><first>Jenny Rose</first><last>Finkel</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
-      <author><first>Andrew Y.</first><last>Ng</last></author>
+      <author id="jenny-rose-finkel"><first>Jenny Rose</first><last>Finkel</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
+      <author id="andrew-y-ng"><first>Andrew Y.</first><last>Ng</last></author>
       <pages>618–626</pages>
       <url hash="7f7526d9">W06-1673</url>
       <bibkey>finkel-etal-2006-solving</bibkey>
@@ -2674,11 +2674,11 @@
     <paper id="1">
       <title>Web-based frequency dictionaries for medium density languages</title>
       <author><first>András</first><last>Kornai</last></author>
-      <author><first>Péter</first><last>Halácsy</last></author>
+      <author id="peter-halacsy"><first>Péter</first><last>Halácsy</last></author>
       <author><first>Viktor</first><last>Nagy</last></author>
       <author><first>Csaba</first><last>Oravecz</last></author>
       <author><first>Viktor</first><last>Trón</last></author>
-      <author><first>Dániel</first><last>Varga</last></author>
+      <author id="daniel-varga"><first>Dániel</first><last>Varga</last></author>
       <url hash="211cd4f0">W06-1701</url>
       <bibkey>kornai-etal-2006-web</bibkey>
     </paper>
@@ -2702,7 +2702,7 @@
     </paper>
     <paper id="4">
       <title><fixed-case>CUCW</fixed-case>eb: A <fixed-case>C</fixed-case>atalan corpus built from the Web</title>
-      <author><first>Gemma</first><last>Boleda</last></author>
+      <author id="gemma-boleda"><first>Gemma</first><last>Boleda</last></author>
       <author><first>Stefan</first><last>Bott</last></author>
       <author><first>Rodrigo</first><last>Meza</last></author>
       <author><first>Carlos</first><last>Castillo</last></author>
@@ -2729,7 +2729,7 @@
     </paper>
     <paper id="7">
       <title><fixed-case>C</fixed-case>orporator: A tool for creating <fixed-case>RSS</fixed-case>-based specialized corpora</title>
-      <author><first>Cédrick</first><last>Fairon</last></author>
+      <author id="cedrick-fairon"><first>Cédrick</first><last>Fairon</last></author>
       <url hash="20ed5f7c">W06-1707</url>
       <bibkey>fairon-2006-corporator</bibkey>
     </paper>
@@ -2771,7 +2771,7 @@
     </frontmatter>
     <paper id="1">
       <title>Language and Reasoning for Question Answering: State of the Artand Future Directions</title>
-      <author><first>Farah</first><last>Benamara</last></author>
+      <author id="farah-benamara"><first>Farah</first><last>Benamara</last></author>
       <url hash="a05de60a">W06-1801</url>
       <bibkey>benamara-2006-language</bibkey>
     </paper>
@@ -2783,8 +2783,8 @@
     </paper>
     <paper id="3">
       <title>Interpretation and Generation in a Knowledge-Based <fixed-case>T</fixed-case>utorial<fixed-case>S</fixed-case>ystem</title>
-      <author><first>Myroslava O.</first><last>Dzikovska</last></author>
-      <author><first>Charles B.</first><last>Callaway</last></author>
+      <author id="myroslava-o-dzikovska"><first>Myroslava O.</first><last>Dzikovska</last></author>
+      <author id="charles-b-callaway"><first>Charles B.</first><last>Callaway</last></author>
       <author><first>Elaine</first><last>Farrow</last></author>
       <url hash="51afe967">W06-1803</url>
       <bibkey>dzikovska-etal-2006-interpretation</bibkey>
@@ -2820,7 +2820,7 @@
     </paper>
     <paper id="8">
       <title>Numerical Data Integration for Cooperative Question-Answering</title>
-      <author><first>Véronique</first><last>Moriceau</last></author>
+      <author id="veronique-moriceau"><first>Véronique</first><last>Moriceau</last></author>
       <url hash="4bb97ce4">W06-1808</url>
       <bibkey>moriceau-2006-numerical</bibkey>
     </paper>
@@ -2860,7 +2860,7 @@
     <paper id="3">
       <title>Cross-Cutting Aspects of Cross-Language Question Answering Systems</title>
       <author><first>Bogdan</first><last>Sacaleanu</last></author>
-      <author><first>Günter</first><last>Neumann</last></author>
+      <author id="gunter-neumann"><first>Günter</first><last>Neumann</last></author>
       <url hash="ad1c8b5a">W06-1903</url>
       <bibkey>sacaleanu-neumann-2006-cross</bibkey>
     </paper>
@@ -2884,9 +2884,9 @@
     </paper>
     <paper id="6">
       <title><fixed-case>BRUJA</fixed-case>: Question Classification for <fixed-case>S</fixed-case>panish. Using Machine Translationand an <fixed-case>E</fixed-case>nglish Classifier</title>
-      <author><first>Miguel Á.</first><last>García Cumbreras</last></author>
-      <author><first>L. Alfonso</first><last>Ureña López</last></author>
-      <author><first>Fernando</first><last>Martínez Santiago</last></author>
+      <author id="miguel-angel-garcia-cumbreras"><first>Miguel Á.</first><last>García Cumbreras</last></author>
+      <author id="l-alfonso-urena-lopez"><first>L. Alfonso</first><last>Ureña López</last></author>
+      <author id="fernando-martinez-santiago"><first>Fernando</first><last>Martínez Santiago</last></author>
       <url hash="31e311f6">W06-1906</url>
       <bibkey>a-garcia-cumbreras-etal-2006-bruja</bibkey>
     </paper>
@@ -2904,7 +2904,7 @@
       <title>Dialogue based Question Answering System in <fixed-case>T</fixed-case>elugu</title>
       <author><first>Rami</first><last>Reddy</last></author>
       <author><first>Nandi</first><last>Reddy</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <url hash="5f66efc5">W06-1908</url>
       <bibkey>reddy-etal-2006-dialogue</bibkey>
     </paper>
@@ -2916,8 +2916,8 @@
     </paper>
     <paper id="10">
       <title>Experiments Adapting an Open-Domain Question Answering System to the Geographical Domain Using Scope-Based Resources</title>
-      <author><first>Daniel</first><last>Ferrés</last></author>
-      <author><first>Horacio</first><last>Rodríguez</last></author>
+      <author id="daniel-ferres"><first>Daniel</first><last>Ferrés</last></author>
+      <author id="horacio-rodriguez"><first>Horacio</first><last>Rodríguez</last></author>
       <url hash="ed277b9f">W06-1910</url>
       <bibkey>ferres-rodriguez-2006-experiments</bibkey>
     </paper>
@@ -2986,7 +2986,7 @@
     <paper id="7">
       <title>Word Sense Disambiguation Using Automatically Translated Sense Examples</title>
       <author><first>Xinglong</first><last>Wang</last></author>
-      <author><first>David</first><last>Martinez</last></author>
+      <author id="david-martinez"><first>David</first><last>Martinez</last></author>
       <url hash="cb05ba7a">W06-2007</url>
       <bibkey>wang-martinez-2006-word</bibkey>
     </paper>
@@ -3009,8 +3009,8 @@
     </frontmatter>
     <paper id="1">
       <title>Spatial Prepositions in Context: The Semantics of near in the Presence of Distractor Objects</title>
-      <author><first>Fintan J.</first><last>Costello</last></author>
-      <author><first>John D.</first><last>Kelleher</last></author>
+      <author id="fintan-j-costello"><first>Fintan J.</first><last>Costello</last></author>
+      <author id="john-kelleher"><first>John D.</first><last>Kelleher</last></author>
       <url hash="0790d207">W06-2101</url>
       <bibkey>costello-kelleher-2006-spatial</bibkey>
     </paper>
@@ -3022,7 +3022,7 @@
     </paper>
     <paper id="3">
       <title>A Quantitative Approach to Preposition-Pronoun Contraction in <fixed-case>P</fixed-case>olish</title>
-      <author><first>Beata</first><last>Trawiński</last></author>
+      <author id="beata-trawinski"><first>Beata</first><last>Trawiński</last></author>
       <url hash="c58e8077">W06-2103</url>
       <bibkey>trawinski-2006-quantitative</bibkey>
     </paper>
@@ -3057,34 +3057,34 @@
       <title>A Conceptual Analysis of the Notion of Instrumentality via a Multilingual Analysis</title>
       <author><first>Asanee</first><last>Kawtrakul</last></author>
       <author><first>Mukda</first><last>Suktarachan</last></author>
-      <author><first>Bali</first><last>Ranaivo-Malancon</last></author>
+      <author id="bali-ranaivo-malancon"><first>Bali</first><last>Ranaivo-Malancon</last></author>
       <author><first>Pek</first><last>Kuan</last></author>
-      <author><first>Achla</first><last>Raina</last></author>
+      <author id="achla-m-raina"><first>Achla</first><last>Raina</last></author>
       <author><first>Sudeshna</first><last>Sarkar</last></author>
       <author><first>Alda</first><last>Mari</last></author>
-      <author><first>Sina</first><last>Zarriess</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarriess</last></author>
       <author><first>Elixabete</first><last>Murguia</last></author>
-      <author><first>Patrick</first><last>Saint-Dizier</last></author>
+      <author id="patrick-saint-dizier"><first>Patrick</first><last>Saint-Dizier</last></author>
       <url hash="d4d299d6">W06-2108</url>
       <bibkey>kawtrakul-etal-2006-conceptual</bibkey>
     </paper>
     <paper id="9">
       <title><fixed-case>G</fixed-case>erman Particle Verbs and Pleonastic Prepositions</title>
       <author><first>Ines</first><last>Rehbein</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <url hash="d94a82f5">W06-2109</url>
       <bibkey>rehbein-van-genabith-2006-german</bibkey>
     </paper>
     <paper id="10">
       <title>Automatic Identification of <fixed-case>E</fixed-case>nglish Verb Particle Constructions using Linguistic Features</title>
       <author><first>Su Nam</first><last>Kim</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <url hash="081dfecf">W06-2110</url>
       <bibkey>kim-baldwin-2006-automatic</bibkey>
     </paper>
     <paper id="11">
       <title>On the Prepositions which Introduce an Adjunct of Duration</title>
-      <author><first>Frank</first><last>Van Eynde</last></author>
+      <author id="frank-van-eynde"><first>Frank</first><last>Van Eynde</last></author>
       <url hash="69061baa">W06-2111</url>
       <bibkey>van-eynde-2006-prepositions</bibkey>
     </paper>
@@ -3096,8 +3096,8 @@
     </paper>
     <paper id="13">
       <title>Handling of Prepositions in <fixed-case>E</fixed-case>nglish to <fixed-case>B</fixed-case>engali Machine Translation</title>
-      <author><first>Sudip Kumar</first><last>Naskar</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <url hash="cbcde40f">W06-2113</url>
       <bibkey>naskar-bandyopadhyay-2006-handling</bibkey>
     </paper>
@@ -3115,30 +3115,30 @@
     <paper id="1">
       <title>Learning Effective Surface Text Patterns for Information Extraction</title>
       <author><first>Gijs</first><last>Geleijnse</last></author>
-      <author><first>Jan</first><last>Korst</last></author>
+      <author id="jan-kors"><first>Jan</first><last>Korst</last></author>
       <url hash="45edceb6">W06-2201</url>
       <bibkey>geleijnse-korst-2006-learning</bibkey>
     </paper>
     <paper id="2">
       <title>Simple Information Extraction (<fixed-case>SIE</fixed-case>): A Portable and Effective <fixed-case>IE</fixed-case> System</title>
-      <author><first>Claudio</first><last>Giuliano</last></author>
-      <author><first>Alberto</first><last>Lavelli</last></author>
-      <author><first>Lorenza</first><last>Romano</last></author>
+      <author id="claudio-giuliano"><first>Claudio</first><last>Giuliano</last></author>
+      <author id="alberto-lavelli"><first>Alberto</first><last>Lavelli</last></author>
+      <author id="lorenza-romano"><first>Lorenza</first><last>Romano</last></author>
       <url hash="af149f32">W06-2202</url>
       <bibkey>giuliano-etal-2006-simple</bibkey>
     </paper>
     <paper id="3">
       <title>An Experimental Study on Boundary Classification Algorithms for Information Extraction using <fixed-case>SVM</fixed-case></title>
-      <author><first>Jose</first><last>Iria</last></author>
+      <author id="jose-iria"><first>Jose</first><last>Iria</last></author>
       <author><first>Neil</first><last>Ireson</last></author>
-      <author><first>Fabio</first><last>Ciravegna</last></author>
+      <author id="fabio-ciravegna"><first>Fabio</first><last>Ciravegna</last></author>
       <url hash="5b76a95f">W06-2203</url>
       <bibkey>iria-etal-2006-experimental</bibkey>
     </paper>
     <paper id="4">
       <title>Transductive Pattern Learning for Information Extraction</title>
       <author><first>Brian</first><last>McLernon</last></author>
-      <author><first>Nicholas</first><last>Kushmerick</last></author>
+      <author id="nicholas-kushmerick"><first>Nicholas</first><last>Kushmerick</last></author>
       <url hash="95d4d15b">W06-2204</url>
       <bibkey>mclernon-kushmerick-2006-transductive</bibkey>
     </paper>
@@ -3153,14 +3153,14 @@
       <author><first>Caroline</first><last>Sporleder</last></author>
       <author><first>Marieke</first><last>van Erp</last></author>
       <author><first>Tijn</first><last>Porcelijn</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <url hash="a6edc8e8">W06-2206</url>
       <bibkey>sporleder-etal-2006-spotting</bibkey>
     </paper>
     <paper id="7">
       <title>A Hybrid Approach for the Acquisition of Information Extraction Patterns</title>
       <author><first>Mihai</first><last>Surdeanu</last></author>
-      <author><first>Jordi</first><last>Turmo</last></author>
+      <author id="jordi-turmo"><first>Jordi</first><last>Turmo</last></author>
       <author><first>Alicia</first><last>Ageno</last></author>
       <url hash="f2fa6765">W06-2207</url>
       <bibkey>surdeanu-etal-2006-hybrid</bibkey>
@@ -3168,7 +3168,7 @@
     <paper id="8">
       <title>Expanding the Recall of Relation Extraction by Bootstrapping</title>
       <author><first>Junji</first><last>Tomita</last></author>
-      <author><first>Stephen</first><last>Soderland</last></author>
+      <author id="stephen-soderland"><first>Stephen</first><last>Soderland</last></author>
       <author><first>Oren</first><last>Etzioni</last></author>
       <url hash="769e6d27">W06-2208</url>
       <bibkey>tomita-etal-2006-expanding</bibkey>
@@ -3192,22 +3192,22 @@
     </frontmatter>
     <paper id="1">
       <title>Robust Parsing, Error Mining, Automated Lexical Acquisition, and Evaluation</title>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <url hash="c1c35835">W06-2301</url>
       <bibkey>van-noord-2006-robust</bibkey>
     </paper>
     <paper id="2">
       <title>Another Evaluation of Anaphora Resolution Algorithms and a Comparison with <fixed-case>GETARUNS</fixed-case>’ Knowledge Rich Approach</title>
-      <author><first>Rodolfo</first><last>Delmonte</last></author>
+      <author id="rodolfo-delmonte"><first>Rodolfo</first><last>Delmonte</last></author>
       <author><first>Antonella</first><last>Bristot</last></author>
-      <author><first>Marco Aldo Piccolino</first><last>Boniforti</last></author>
+      <author id="marco-aldo-piccolino-boniforti"><first>Marco Aldo Piccolino</first><last>Boniforti</last></author>
       <author><first>Sara</first><last>Tonelli</last></author>
       <url hash="ed073f91">W06-2302</url>
       <bibkey>delmonte-etal-2006-another</bibkey>
     </paper>
     <paper id="3">
       <title>Robust Parsing of the <fixed-case>P</fixed-case>roposition <fixed-case>B</fixed-case>ank</title>
-      <author><first>Gabriele</first><last>Musillo</last></author>
+      <author id="gabriele-musillo"><first>Gabriele</first><last>Musillo</last></author>
       <author><first>Paola</first><last>Merlo</last></author>
       <url hash="7fcac93a">W06-2303</url>
       <bibkey>musillo-merlo-2006-robust</bibkey>
@@ -3220,7 +3220,7 @@
     </paper>
     <paper id="5">
       <title>Robust Parsing: More with Less</title>
-      <author><first>Kilian</first><last>Foth</last></author>
+      <author id="kilian-a-foth"><first>Kilian</first><last>Foth</last></author>
       <author><first>Wolfgang</first><last>Menzel</last></author>
       <url hash="089bc3bb">W06-2305</url>
       <bibkey>foth-menzel-2006-robust</bibkey>
@@ -3238,8 +3238,8 @@
     </frontmatter>
     <paper id="1">
       <title>Named Entities Translation Based on Comparable Corpora</title>
-      <author><first>Iñaki</first><last>Alegria</last></author>
-      <author><first>Nerea</first><last>Ezeiza</last></author>
+      <author id="inaki-alegria"><first>Iñaki</first><last>Alegria</last></author>
+      <author id="nerea-ezeiza"><first>Nerea</first><last>Ezeiza</last></author>
       <author><first>Izaskun</first><last>Fernandez</last></author>
       <url hash="c1968c90">W06-2401</url>
       <bibkey>alegria-etal-2006-named</bibkey>
@@ -3247,13 +3247,13 @@
     <paper id="2">
       <title>Grouping Multi-word Expressions According to Part-Of-Speech in Statistical Machine Translation</title>
       <author><first>Patrik</first><last>Lambert</last></author>
-      <author><first>Rafael</first><last>Banchs</last></author>
+      <author id="rafael-e-banchs"><first>Rafael</first><last>Banchs</last></author>
       <url hash="047f5c82">W06-2402</url>
       <bibkey>lambert-banchs-2006-grouping</bibkey>
     </paper>
     <paper id="3">
       <title>Automatic Extraction of <fixed-case>C</fixed-case>hinese Multiword Expressions with a Statistical Tool</title>
-      <author><first>Scott S.L.</first><last>Piao</last></author>
+      <author id="scott-s-l-piao"><first>Scott S.L.</first><last>Piao</last></author>
       <author><first>Guangfan</first><last>Sun</last></author>
       <author><first>Paul</first><last>Rayson</last></author>
       <author><first>Qi</first><last>Yuan</last></author>
@@ -3268,15 +3268,15 @@
       <author><first>Takehito</first><last>Utsuro</last></author>
       <author><first>Kiyotaka</first><last>Uchimoto</last></author>
       <author><first>Suguru</first><last>Matsuyoshi</last></author>
-      <author><first>Satoshi</first><last>Sato</last></author>
-      <author><first>Seiichi</first><last>Nakagawa</last></author>
+      <author id="satoshi-sato"><first>Satoshi</first><last>Sato</last></author>
+      <author id="seiichi-nakagawa"><first>Seiichi</first><last>Nakagawa</last></author>
       <url hash="1e74f57a">W06-2404</url>
       <bibkey>tsuchiya-etal-2006-chunking</bibkey>
     </paper>
     <paper id="5">
       <title>Identifying idiomatic expressions using automatic word-alignment</title>
-      <author><first>Begoña Villada</first><last>Moirón</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="begona-villada-moiron"><first>Begoña Villada</first><last>Moirón</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <url hash="ac096793">W06-2405</url>
       <bibkey>moiron-tiedemann-2006-identifying</bibkey>
     </paper>
@@ -3296,7 +3296,7 @@
     </paper>
     <paper id="8">
       <title>Multi-word verbs in a flective language: the case of <fixed-case>E</fixed-case>stonian</title>
-      <author><first>Heiki-Jaan</first><last>Kaalep</last></author>
+      <author id="heiki-jaan-kaalep"><first>Heiki-Jaan</first><last>Kaalep</last></author>
       <author><first>Kadri</first><last>Muischnek</last></author>
       <url hash="13100318">W06-2408</url>
       <bibkey>kaalep-muischnek-2006-multi</bibkey>
@@ -3310,7 +3310,7 @@
     </paper>
     <paper id="10">
       <title>Multiword Units in an <fixed-case>MT</fixed-case> Lexicon</title>
-      <author><first>Tamás</first><last>Váradi</last></author>
+      <author id="tamas-varadi"><first>Tamás</first><last>Váradi</last></author>
       <url hash="6aaff61b">W06-2410</url>
       <bibkey>varadi-2006-multiword</bibkey>
     </paper>
@@ -3336,13 +3336,13 @@
       <title>Cluster Stopping Rules for Word Sense Discrimination</title>
       <author><first>Guergana</first><last>Savova</last></author>
       <author><first>Terry</first><last>Therneau</last></author>
-      <author><first>Christopher</first><last>Chute</last></author>
+      <author id="christopher-chute"><first>Christopher</first><last>Chute</last></author>
       <url hash="556e0330">W06-2502</url>
       <bibkey>savova-etal-2006-cluster</bibkey>
     </paper>
     <paper id="3">
       <title>Relating <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Senses for Word Sense Disambiguation</title>
-      <author><first>Diana</first><last>McCarthy</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
       <url hash="c8edb6d5">W06-2503</url>
       <bibkey>mccarthy-2006-relating</bibkey>
     </paper>
@@ -3355,7 +3355,7 @@
     <paper id="5">
       <title>Multilingual versus Monolingual <fixed-case>WSD</fixed-case></title>
       <author><first>Lucia</first><last>Specia</last></author>
-      <author><first>Maria das Graças</first><last>Volpe Nunes</last></author>
+      <author id="maria-das-gracas-volpe-nunes"><first>Maria das Graças</first><last>Volpe Nunes</last></author>
       <author><first>Mark</first><last>Stevenson</last></author>
       <author><first>Gabriela Castelo Branco</first><last>Ribeiro</last></author>
       <url hash="e4d0839a">W06-2505</url>
@@ -3364,7 +3364,7 @@
     <paper id="6">
       <title>Characterizing Response Types and Revealing Noun Ambiguity in <fixed-case>G</fixed-case>erman Association Norms</title>
       <author><first>Alissa</first><last>Melinger</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <author><first>Andrea</first><last>Weber</last></author>
       <url hash="edb28110">W06-2506</url>
       <bibkey>melinger-etal-2006-characterizing</bibkey>
@@ -3391,8 +3391,8 @@
     <paper id="2">
       <title>Constraint Satisfaction Inference: Non-probabilistic Global Inference for Sequence Labelling</title>
       <author><first>Sander</first><last>Canisius</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <url hash="2e2e68ee">W06-2602</url>
       <bibkey>canisius-etal-2006-constraint</bibkey>
     </paper>
@@ -3409,7 +3409,7 @@
     <paper id="4">
       <title>A Multiclassifier based Document Categorization System: profiting from the Singular Value Decomposition Dimensionality Reduction Technique</title>
       <author><first>Ana</first><last>Zelaia</last></author>
-      <author><first>Iñaki</first><last>Alegria</last></author>
+      <author id="inaki-alegria"><first>Iñaki</first><last>Alegria</last></author>
       <author><first>Olatz</first><last>Arregi</last></author>
       <author><first>Basilio</first><last>Sierra</last></author>
       <url hash="0c6fb140">W06-2604</url>
@@ -3425,9 +3425,9 @@
     </paper>
     <paper id="6">
       <title>Reranking Translation Hypotheses Using Structural Properties</title>
-      <author><first>Saša</first><last>Hasan</last></author>
+      <author id="sasa-hasan"><first>Saša</first><last>Hasan</last></author>
       <author><first>Oliver</first><last>Bender</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <url hash="ba3cc9bc">W06-2606</url>
       <bibkey>hasan-etal-2006-reranking</bibkey>
     </paper>
@@ -3435,14 +3435,14 @@
       <title>Tree Kernel Engineering in Semantic Role Labeling Systems</title>
       <author><first>Alessandro</first><last>Moschitti</last></author>
       <author><first>Daniele</first><last>Pighin</last></author>
-      <author><first>Roberto</first><last>Basili</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
       <url hash="b239d931">W06-2607</url>
       <bibkey>moschitti-etal-2006-tree</bibkey>
     </paper>
     <paper id="8">
       <title>Syntagmatic Kernels: a Word Sense Disambiguation Case Study</title>
-      <author><first>Claudio</first><last>Giuliano</last></author>
-      <author><first>Alfio</first><last>Gliozzo</last></author>
+      <author id="claudio-giuliano"><first>Claudio</first><last>Giuliano</last></author>
+      <author id="alfio-gliozzo"><first>Alfio</first><last>Gliozzo</last></author>
       <author><first>Carlo</first><last>Strapparava</last></author>
       <url hash="3225c224">W06-2608</url>
       <bibkey>giuliano-etal-2006-syntagmatic</bibkey>
@@ -3490,7 +3490,7 @@
       <author><first>Wouter</first><last>Alink</last></author>
       <author><first>Valentin</first><last>Jijkoun</last></author>
       <author><first>David</first><last>Ahn</last></author>
-      <author><first>Maarten</first><last>de Rijke</last></author>
+      <author id="maarten-de-rijke"><first>Maarten</first><last>de Rijke</last></author>
       <author><first>Peter</first><last>Boncz</last></author>
       <author><first>Arjen</first><last>de Vries</last></author>
       <url hash="1938cc91">W06-2701</url>
@@ -3498,11 +3498,11 @@
     </paper>
     <paper id="2">
       <title>Annotation and Disambiguation of Semantic Types in Biomedical Text: A Cascaded Approach to Named Entity Recognition</title>
-      <author><first>Dietrich</first><last>Rebholz-Schuhmann</last></author>
+      <author id="dietrich-rebholz-schuhmann"><first>Dietrich</first><last>Rebholz-Schuhmann</last></author>
       <author><first>Harald</first><last>Kirsch</last></author>
       <author><first>Sylvain</first><last>Gaudan</last></author>
       <author><first>Miguel</first><last>Arregui</last></author>
-      <author><first>Goran</first><last>Nenadic</last></author>
+      <author id="goran-nenadic"><first>Goran</first><last>Nenadic</last></author>
       <url hash="3107294a">W06-2702</url>
       <bibkey>rebholz-schuhmann-etal-2006-annotation</bibkey>
     </paper>
@@ -3524,7 +3524,7 @@
     </paper>
     <paper id="5">
       <title>Multi-dimensional Annotation and Alignment in an <fixed-case>E</fixed-case>nglish-<fixed-case>G</fixed-case>erman Translation Corpus</title>
-      <author><first>Silvia</first><last>Hansen-Schirra</last></author>
+      <author id="silvia-hansen-schirra"><first>Silvia</first><last>Hansen-Schirra</last></author>
       <author><first>Stella</first><last>Neumann</last></author>
       <author><first>Mihaela</first><last>Vela</last></author>
       <url hash="27eb5b6d">W06-2705</url>
@@ -3547,8 +3547,8 @@
     </paper>
     <paper id="8">
       <title>Tools for hierarchical annotation of typed dialogue</title>
-      <author><first>Myroslava</first><last>Dzikovska</last></author>
-      <author><first>Charles</first><last>Callaway</last></author>
+      <author id="myroslava-o-dzikovska"><first>Myroslava</first><last>Dzikovska</last></author>
+      <author id="charles-b-callaway"><first>Charles</first><last>Callaway</last></author>
       <author><first>Elaine</first><last>Farrow</last></author>
       <url hash="8bba12a4">W06-2708</url>
       <bibkey>dzikovska-etal-2006-tools</bibkey>
@@ -3569,7 +3569,7 @@
     </paper>
     <paper id="11">
       <title>The <fixed-case>SAMMIE</fixed-case> Multimodal Dialogue Corpus Meets the Nite <fixed-case>XML</fixed-case> Toolkit</title>
-      <author><first>Ivana</first><last>Kruijff-Korbayová</last></author>
+      <author id="ivana-kruijff-korbayova"><first>Ivana</first><last>Kruijff-Korbayová</last></author>
       <author><first>Verena</first><last>Rieser</last></author>
       <author><first>Ciprian</first><last>Gerstenberger</last></author>
       <author><first>Jan</first><last>Schehl</last></author>
@@ -3579,22 +3579,22 @@
     </paper>
     <paper id="12">
       <title>Representing and Accessing Multi-Level Annotations in <fixed-case>MMAX</fixed-case>2</title>
-      <author><first>Christoph</first><last>Müller</last></author>
+      <author id="christoph-muller"><first>Christoph</first><last>Müller</last></author>
       <url hash="08223247">W06-2712</url>
       <bibkey>muller-2006-representing</bibkey>
     </paper>
     <paper id="13">
       <title>Representing and Accessing Multilevel Linguistic Annotation using the <fixed-case>MEANING</fixed-case> Format</title>
-      <author><first>Emanuele</first><last>Pianta</last></author>
+      <author id="emanuele-pianta"><first>Emanuele</first><last>Pianta</last></author>
       <author><first>Luisa</first><last>Bentivogli</last></author>
-      <author><first>Christian</first><last>Girardi</last></author>
-      <author><first>Bernardo</first><last>Magnini</last></author>
+      <author id="christian-girardi"><first>Christian</first><last>Girardi</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
       <url hash="b81d17b2">W06-2713</url>
       <bibkey>pianta-etal-2006-representing</bibkey>
     </paper>
     <paper id="14">
       <title>Middleware for Creating and Combining Multi-dimensional <fixed-case>NLP</fixed-case> Markup</title>
-      <author><first>Ulrich</first><last>Schäfer</last></author>
+      <author id="ulrich-schafer"><first>Ulrich</first><last>Schäfer</last></author>
       <url hash="2c0ab42e">W06-2714</url>
       <bibkey>schafer-2006-middleware</bibkey>
     </paper>
@@ -3603,7 +3603,7 @@
       <author><first>Maik</first><last>Stührenberg</last></author>
       <author><first>Andreas</first><last>Witt</last></author>
       <author><first>Daniela</first><last>Goecke</last></author>
-      <author><first>Dieter</first><last>Metzing</last></author>
+      <author id="dieter-metzing"><first>Dieter</first><last>Metzing</last></author>
       <author><first>Oliver</first><last>Schonefeld</last></author>
       <url hash="f13ce723">W06-2715</url>
       <bibkey>stuhrenberg-etal-2006-multidimensional</bibkey>
@@ -3611,14 +3611,14 @@
     <paper id="16">
       <title>Layering and Merging Linguistic Annotations</title>
       <author><first>Keith</first><last>Suderman</last></author>
-      <author><first>Nancy</first><last>Ide</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
       <url hash="0d941533">W06-2716</url>
       <bibkey>suderman-ide-2006-layering</bibkey>
     </paper>
     <paper id="17">
       <title><fixed-case>XML</fixed-case>-based Phrase Alignment in Parallel Treebanks</title>
       <author><first>Martin</first><last>Volk</last></author>
-      <author><first>Sofia</first><last>Gustafson-Capková</last></author>
+      <author id="sofia-gustafson-capkova"><first>Sofia</first><last>Gustafson-Capková</last></author>
       <author><first>Joakim</first><last>Lundborg</last></author>
       <author><first>Torsten</first><last>Marek</last></author>
       <author><first>Yvonne</first><last>Samuelsson</last></author>
@@ -3671,7 +3671,7 @@
     <paper id="5">
       <title>Learning to Recognize Blogs: A Preliminary Exploration</title>
       <author><first>Erik</first><last>Elgersma</last></author>
-      <author><first>Maarten</first><last>de Rijke</last></author>
+      <author id="maarten-de-rijke"><first>Maarten</first><last>de Rijke</last></author>
       <url hash="01f01470">W06-2805</url>
       <bibkey>elgersma-de-rijke-2006-learning</bibkey>
     </paper>
@@ -3692,27 +3692,27 @@
     <paper id="8">
       <title>Anomaly Detecting within Dynamic <fixed-case>C</fixed-case>hinese Chat Text</title>
       <author><first>Yunqing</first><last>Xia</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <url hash="efd2193e">W06-2808</url>
       <bibkey>xia-wong-2006-anomaly</bibkey>
     </paper>
     <paper id="9">
       <title>A proposal to automatically build and maintain gazetteers for Named Entity Recognition by using <fixed-case>W</fixed-case>ikipedia</title>
       <author><first>Antonio</first><last>Toral</last></author>
-      <author><first>Rafael</first><last>Muñoz</last></author>
+      <author id="rafael-munoz"><first>Rafael</first><last>Muñoz</last></author>
       <url hash="0f194764">W06-2809</url>
       <bibkey>toral-munoz-2006-proposal</bibkey>
     </paper>
     <paper id="10">
       <title>Finding Similar Sentences across Multiple Languages in <fixed-case>W</fixed-case>ikipedia</title>
-      <author><first>Sisay Fissaha</first><last>Adafre</last></author>
-      <author><first>Maarten</first><last>de Rijke</last></author>
+      <author id="sisay-fissaha-adafre"><first>Sisay Fissaha</first><last>Adafre</last></author>
+      <author id="maarten-de-rijke"><first>Maarten</first><last>de Rijke</last></author>
       <url hash="7ab96ccb">W06-2810</url>
       <bibkey>adafre-de-rijke-2006-finding</bibkey>
     </paper>
     <paper id="11">
       <title>Multilingual interactive experiments with <fixed-case>F</fixed-case>lickr</title>
-      <author><first>Paul D.</first><last>Clough</last></author>
+      <author id="paul-clough"><first>Paul D.</first><last>Clough</last></author>
       <author><first>Julio</first><last>Gonzales</last></author>
       <author><first>Jussi</first><last>Karlgren</last></author>
       <url hash="503a4f2c">W06-2811</url>
@@ -3723,7 +3723,7 @@
     <meta>
       <booktitle>Proceedings of the Tenth Conference on Computational Natural Language Learning (<fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>-X)</booktitle>
       <url hash="bc7c4b37">W06-29</url>
-      <editor><first>Lluís</first><last>Màrquez</last></editor>
+      <editor id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></editor>
       <editor><first>Dan</first><last>Klein</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>New York City</address>
@@ -3737,7 +3737,7 @@
     </frontmatter>
     <paper id="1">
       <title>A Mission for Computational Natural Language Learning</title>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>1–5</pages>
       <url hash="5a151d4e">W06-2901</url>
       <bibkey>daelemans-2006-mission</bibkey>
@@ -3745,7 +3745,7 @@
     <paper id="2">
       <title>Porting Statistical Parsers with Data-Defined Kernels</title>
       <author><first>Ivan</first><last>Titov</last></author>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <pages>6–13</pages>
       <url hash="913303bd">W06-2902</url>
       <bibkey>titov-henderson-2006-porting</bibkey>
@@ -3788,7 +3788,7 @@
       <title>Investigating Lexical Substitution Scoring for Subtitle Generation</title>
       <author><first>Oren</first><last>Glickman</last></author>
       <author><first>Ido</first><last>Dagan</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <author><first>Mikaela</first><last>Keller</last></author>
       <author><first>Samy</first><last>Bengio</last></author>
       <pages>45–52</pages>
@@ -3797,7 +3797,7 @@
     </paper>
     <paper id="8">
       <title>Semantic Role Recognition Using Kernels on Weighted Marked Ordered Labeled Trees</title>
-      <author><first>Jun’ichi</first><last>Kazama</last></author>
+      <author id="junichi-kazama"><first>Jun’ichi</first><last>Kazama</last></author>
       <author><first>Kentaro</first><last>Torisawa</last></author>
       <pages>53–60</pages>
       <url hash="829d4298">W06-2908</url>
@@ -3807,21 +3807,21 @@
       <title>Semantic Role Labeling via Tree Kernel Joint Inference</title>
       <author><first>Alessandro</first><last>Moschitti</last></author>
       <author><first>Daniele</first><last>Pighin</last></author>
-      <author><first>Roberto</first><last>Basili</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
       <pages>61–68</pages>
       <url hash="2a8d3b0e">W06-2909</url>
       <bibkey>moschitti-etal-2006-semantic</bibkey>
     </paper>
     <paper id="10">
       <title>Can Human Verb Associations Help Identify Salient Features for Semantic Verb Classification?</title>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>69–76</pages>
       <url hash="4c1488d0">W06-2910</url>
       <bibkey>schulte-im-walde-2006-human-verb</bibkey>
     </paper>
     <paper id="11">
       <title>Applying Alternating Structure Optimization to Word Sense Disambiguation</title>
-      <author><first>Rie Kubota</first><last>Ando</last></author>
+      <author id="rie-johnson"><first>Rie Kubota</first><last>Ando</last></author>
       <pages>77–84</pages>
       <url hash="84dd0eaa">W06-2911</url>
       <bibkey>ando-2006-applying</bibkey>
@@ -3836,7 +3836,7 @@
     <paper id="13">
       <title>A Lattice-Based Framework for Enhancing Statistical Parsers with Information from Unlabeled Corpora</title>
       <author><first>Michaela</first><last>Atterer</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>93–100</pages>
       <url hash="3d714354">W06-2913</url>
       <bibkey>atterer-schutze-2006-lattice</bibkey>
@@ -3845,7 +3845,7 @@
       <title>Word Distributions for Thematic Segmentation in a <fixed-case>S</fixed-case>upport <fixed-case>V</fixed-case>ector <fixed-case>M</fixed-case>achine Approach</title>
       <author><first>Maria</first><last>Georgescul</last></author>
       <author><first>Alexander</first><last>Clark</last></author>
-      <author><first>Susan</first><last>Armstrong</last></author>
+      <author id="susan-armstrong"><first>Susan</first><last>Armstrong</last></author>
       <pages>101–108</pages>
       <url hash="aa17c34a">W06-2914</url>
       <bibkey>georgescul-etal-2006-word</bibkey>
@@ -3854,8 +3854,8 @@
       <title>Which Side are You on? Identifying Perspectives at the Document and Sentence Levels</title>
       <author><first>Wei-Hao</first><last>Lin</last></author>
       <author><first>Theresa</first><last>Wilson</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
-      <author><first>Alexander</first><last>Hauptmann</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
+      <author id="alexander-g-hauptmann"><first>Alexander</first><last>Hauptmann</last></author>
       <pages>109–116</pages>
       <url hash="71e14482">W06-2915</url>
       <bibkey>lin-etal-2006-side</bibkey>
@@ -3877,7 +3877,7 @@
     </paper>
     <paper id="18">
       <title>Using Gazetteers in Discriminative Information Extraction</title>
-      <author><first>Andrew</first><last>Smith</last></author>
+      <author id="andrew-smith"><first>Andrew</first><last>Smith</last></author>
       <author><first>Miles</first><last>Osborne</last></author>
       <pages>133–140</pages>
       <url hash="4d507256">W06-2918</url>
@@ -3885,9 +3885,9 @@
     </paper>
     <paper id="19">
       <title>A Context Pattern Induction Method for Named Entity Extraction</title>
-      <author><first>Partha Pratim</first><last>Talukdar</last></author>
+      <author id="partha-talukdar"><first>Partha Pratim</first><last>Talukdar</last></author>
       <author><first>Thorsten</first><last>Brants</last></author>
-      <author><first>Mark</first><last>Liberman</last></author>
+      <author id="mark-liberman"><first>Mark</first><last>Liberman</last></author>
       <author><first>Fernando</first><last>Pereira</last></author>
       <pages>141–148</pages>
       <url hash="a60870de">W06-2919</url>
@@ -3917,7 +3917,7 @@
     </paper>
     <paper id="23">
       <title><fixed-case>L</fixed-case>ing<fixed-case>P</fixed-case>ars, a Linguistically Inspired, Language-Independent Machine Learner for Dependency Treebanks</title>
-      <author><first>Eckhard</first><last>Bick</last></author>
+      <author id="eckhard-bick"><first>Eckhard</first><last>Bick</last></author>
       <pages>171–175</pages>
       <url hash="2948e14c">W06-2923</url>
       <bibkey>bick-2006-lingpars</bibkey>
@@ -3926,9 +3926,9 @@
       <title>Dependency Parsing by Inference over High-recall Dependency Predictions</title>
       <author><first>Sander</first><last>Canisius</last></author>
       <author><first>Toine</first><last>Bogers</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <author><first>Jeroen</first><last>Geertzen</last></author>
-      <author><first>Erik</first><last>Tjong Kim Sang</last></author>
+      <author id="erik-tjong-kim-sang"><first>Erik</first><last>Tjong Kim Sang</last></author>
       <pages>176–180</pages>
       <url hash="65a918fb">W06-2924</url>
       <bibkey>canisius-etal-2006-dependency</bibkey>
@@ -3955,14 +3955,14 @@
       <title>Multi-lingual Dependency Parsing at <fixed-case>NAIST</fixed-case></title>
       <author><first>Yuchang</first><last>Cheng</last></author>
       <author><first>Masayuki</first><last>Asahara</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>191–195</pages>
       <url hash="54ed248a">W06-2927</url>
       <bibkey>cheng-etal-2006-multi</bibkey>
     </paper>
     <paper id="28">
       <title>Dependency Parsing with Reference to <fixed-case>S</fixed-case>lovene, <fixed-case>S</fixed-case>panish and <fixed-case>S</fixed-case>wedish</title>
-      <author><first>Simon</first><last>Corston-Oliver</last></author>
+      <author id="simon-corston-oliver"><first>Simon</first><last>Corston-Oliver</last></author>
       <author><first>Anthony</first><last>Aue</last></author>
       <pages>196–200</pages>
       <url hash="7803c9a4">W06-2928</url>
@@ -3971,8 +3971,8 @@
     <paper id="29">
       <title>Vine Parsing and Minimum Risk Reranking for Speed and Precision</title>
       <author><first>Markus</first><last>Dreyer</last></author>
-      <author><first>David A.</first><last>Smith</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="david-a-smith"><first>David A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>201–205</pages>
       <url hash="177e8f83">W06-2929</url>
       <bibkey>dreyer-etal-2006-vine</bibkey>
@@ -4009,7 +4009,7 @@
       <author><first>Joakim</first><last>Nivre</last></author>
       <author><first>Johan</first><last>Hall</last></author>
       <author><first>Jens</first><last>Nilsson</last></author>
-      <author><first>Gülşen</first><last>Eryiǧit</last></author>
+      <author id="gulsen-eryigit"><first>Gülşen</first><last>Eryiǧit</last></author>
       <author><first>Svetoslav</first><last>Marinov</last></author>
       <pages>221–225</pages>
       <url hash="e89beb08">W06-2933</url>
@@ -4018,8 +4018,8 @@
     <paper id="34">
       <title>Multi-lingual Dependency Parsing with Incremental Integer Linear Programming</title>
       <author><first>Sebastian</first><last>Riedel</last></author>
-      <author><first>Ruket</first><last>Çakıcı</last></author>
-      <author><first>Ivan</first><last>Meza-Ruiz</last></author>
+      <author id="ruket-cakici"><first>Ruket</first><last>Çakıcı</last></author>
+      <author id="ivan-meza-ruiz"><first>Ivan</first><last>Meza-Ruiz</last></author>
       <pages>226–230</pages>
       <url hash="ca48d1c2">W06-2934</url>
       <bibkey>riedel-etal-2006-multi</bibkey>
@@ -4060,7 +4060,7 @@
     <meta>
       <booktitle>Proceedings of the Interactive Question Answering Workshop at <fixed-case>HLT</fixed-case>-<fixed-case>NAACL</fixed-case> 2006</booktitle>
       <url hash="499a854b">W06-30</url>
-      <editor><first>Nick</first><last>Webb</last></editor>
+      <editor id="nick-webb"><first>Nick</first><last>Webb</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>New York, NY, USA</address>
       <month>June</month>
@@ -4073,11 +4073,11 @@
     </frontmatter>
     <paper id="1">
       <title>Contextual phenomena and thematic relations in database <fixed-case>QA</fixed-case> dialogues: results from a <fixed-case>W</fixed-case>izard-of-<fixed-case>O</fixed-case>z Experiment</title>
-      <author><first>Núria</first><last>Bertomeu</last></author>
+      <author id="nuria-bertomeu"><first>Núria</first><last>Bertomeu</last></author>
       <author><first>Hans</first><last>Uszkoreit</last></author>
       <author><first>Anette</first><last>Frank</last></author>
-      <author><first>Hans-Ulrich</first><last>Krieger</last></author>
-      <author><first>Brigitte</first><last>Jörg</last></author>
+      <author id="hans-ulrich-krieger"><first>Hans-Ulrich</first><last>Krieger</last></author>
+      <author id="brigitte-jorg"><first>Brigitte</first><last>Jörg</last></author>
       <pages>1–8</pages>
       <url hash="46ba004f">W06-3001</url>
       <bibkey>bertomeu-etal-2006-contextual</bibkey>
@@ -4085,8 +4085,8 @@
     <paper id="2">
       <title><fixed-case>W</fixed-case>o<fixed-case>Z</fixed-case> Simulation of Interactive Question Answering</title>
       <author><first>Tsuneaki</first><last>Kato</last></author>
-      <author><first>Jun’ichi</first><last>Fukumoto</last></author>
-      <author><first>Fumito</first><last>Masui</last></author>
+      <author id="junichi-fukumoto"><first>Jun’ichi</first><last>Fukumoto</last></author>
+      <author id="fumito-masui"><first>Fumito</first><last>Masui</last></author>
       <author><first>Noriko</first><last>Kando</last></author>
       <pages>9–16</pages>
       <url hash="4c978edd">W06-3002</url>
@@ -4094,13 +4094,13 @@
     </paper>
     <paper id="3">
       <title>Modeling Reference Interviews as a Basis for Improving Automatic <fixed-case>QA</fixed-case> Systems</title>
-      <author><first>Nancy J.</first><last>McCracken</last></author>
+      <author id="nancy-mccracken"><first>Nancy J.</first><last>McCracken</last></author>
       <author><first>Anne R.</first><last>Diekema</last></author>
       <author><first>Grant</first><last>Ingersoll</last></author>
       <author><first>Sarah C.</first><last>Harwell</last></author>
       <author><first>Eileen E.</first><last>Allen</last></author>
       <author><first>Ozgur</first><last>Yilmazel</last></author>
-      <author><first>Elizabeth D.</first><last>Liddy</last></author>
+      <author id="elizabeth-d-liddy"><first>Elizabeth D.</first><last>Liddy</last></author>
       <pages>17–24</pages>
       <url hash="2a014e5a">W06-3003</url>
       <bibkey>mccracken-etal-2006-modeling</bibkey>
@@ -4108,7 +4108,7 @@
     <paper id="4">
       <title>Enhanced Interactive Question-Answering with Conditional Random Fields</title>
       <author><first>Andrew</first><last>Hickl</last></author>
-      <author><first>Sanda</first><last>Harabagiu</last></author>
+      <author id="sanda-harabagiu"><first>Sanda</first><last>Harabagiu</last></author>
       <pages>25–32</pages>
       <url hash="4e56d465">W06-3004</url>
       <bibkey>hickl-harabagiu-2006-enhanced</bibkey>
@@ -4117,14 +4117,14 @@
       <title>A Data Driven Approach to Relevancy Recognition for Contextual Question Answering</title>
       <author><first>Fan</first><last>Yang</last></author>
       <author><first>Junlan</first><last>Feng</last></author>
-      <author><first>Giuseppe</first><last>Di Fabbrizio</last></author>
+      <author id="giuseppe-di-fabbrizio"><first>Giuseppe</first><last>Di Fabbrizio</last></author>
       <pages>33–40</pages>
       <url hash="06a762dc">W06-3005</url>
       <bibkey>yang-etal-2006-data</bibkey>
     </paper>
     <paper id="6">
       <title>Answering questions of Information Access Dialogue (<fixed-case>IAD</fixed-case>) task using ellipsis handling of follow-up questions</title>
-      <author><first>Junichi</first><last>Fukumoto</last></author>
+      <author id="junichi-fukumoto"><first>Junichi</first><last>Fukumoto</last></author>
       <pages>41–48</pages>
       <url hash="fb1a8267">W06-3006</url>
       <bibkey>fukumoto-2006-answering</bibkey>
@@ -4132,7 +4132,7 @@
     <paper id="7">
       <title>User-Centered Evaluation of Interactive Question Answering Systems</title>
       <author><first>Diane</first><last>Kelly</last></author>
-      <author><first>Paul</first><last>Kantor</last></author>
+      <author id="paul-kantor"><first>Paul</first><last>Kantor</last></author>
       <author><first>Emile</first><last>Morse</last></author>
       <author><first>Jean</first><last>Scholtz</last></author>
       <author><first>Ying</first><last>Sun</last></author>
@@ -4159,21 +4159,21 @@
     </frontmatter>
     <paper id="1">
       <title>Morpho-syntactic Information for Automatic Error Analysis of Statistical Machine Translation Output</title>
-      <author><first>Maja</first><last>Popović</last></author>
-      <author><first>Adrià</first><last>de Gispert</last></author>
-      <author><first>Deepa</first><last>Gupta</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
+      <author id="adria-de-gispert"><first>Adrià</first><last>de Gispert</last></author>
+      <author id="deepak-gupta"><first>Deepa</first><last>Gupta</last></author>
       <author><first>Patrik</first><last>Lambert</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
-      <author><first>José B.</first><last>Mariño</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
+      <author id="jose-b-marino"><first>José B.</first><last>Mariño</last></author>
       <author><first>Marcello</first><last>Federico</last></author>
-      <author><first>Rafael</first><last>Banchs</last></author>
+      <author id="rafael-e-banchs"><first>Rafael</first><last>Banchs</last></author>
       <pages>1–6</pages>
       <url hash="3da80607">W06-3101</url>
       <bibkey>popovic-etal-2006-morpho</bibkey>
     </paper>
     <paper id="2">
       <title>Initial Explorations in <fixed-case>E</fixed-case>nglish to <fixed-case>T</fixed-case>urkish Statistical Machine Translation</title>
-      <author><first>İlknur</first><last>Durgar El-Kahlout</last></author>
+      <author id="ilknur-durgar-el-kahlout"><first>İlknur</first><last>Durgar El-Kahlout</last></author>
       <author><first>Kemal</first><last>Oflazer</last></author>
       <pages>7–14</pages>
       <url hash="87336f96">W06-3102</url>
@@ -4181,18 +4181,18 @@
     </paper>
     <paper id="3">
       <title>Morpho-syntactic <fixed-case>A</fixed-case>rabic Preprocessing for <fixed-case>A</fixed-case>rabic to <fixed-case>E</fixed-case>nglish Statistical Machine Translation</title>
-      <author><first>Anas</first><last>El Isbihani</last></author>
+      <author id="anas-el-isbihani"><first>Anas</first><last>El Isbihani</last></author>
       <author><first>Shahram</first><last>Khadivi</last></author>
       <author><first>Oliver</first><last>Bender</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>15–22</pages>
       <url hash="24533fb3">W06-3103</url>
       <bibkey>el-isbihani-etal-2006-morpho</bibkey>
     </paper>
     <paper id="4">
       <title>Quasi-Synchronous Grammars: Alignment by Soft Projection of Syntactic Dependencies</title>
-      <author><first>David</first><last>Smith</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="david-a-smith"><first>David</first><last>Smith</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>23–30</pages>
       <url hash="8e77e9a9">W06-3104</url>
       <bibkey>smith-eisner-2006-quasi</bibkey>
@@ -4200,7 +4200,7 @@
     <paper id="5">
       <title>Why Generative Phrase Models Underperform Surface Heuristics</title>
       <author><first>John</first><last>DeNero</last></author>
-      <author><first>Dan</first><last>Gillick</last></author>
+      <author id="dan-gillick"><first>Dan</first><last>Gillick</last></author>
       <author><first>James</first><last>Zhang</last></author>
       <author><first>Dan</first><last>Klein</last></author>
       <pages>31–38</pages>
@@ -4209,7 +4209,7 @@
     </paper>
     <paper id="6">
       <title>Phrase-Based <fixed-case>SMT</fixed-case> with Shallow Tree-Phrases</title>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <author><first>Fabrizio</first><last>Gotti</last></author>
       <pages>39–46</pages>
       <url hash="8112e7a7">W06-3106</url>
@@ -4218,7 +4218,7 @@
     <paper id="7">
       <title>Searching for alignments in <fixed-case>SMT</fixed-case>. A novel approach based on an Estimation of Distribution Algorithm</title>
       <author><first>Luis</first><last>Rodríguez</last></author>
-      <author><first>Ismael</first><last>García-Varea</last></author>
+      <author id="ismael-garcia-varea"><first>Ismael</first><last>García-Varea</last></author>
       <author><first>José A.</first><last>Gámez</last></author>
       <pages>47–54</pages>
       <url hash="05aad8d9">W06-3107</url>
@@ -4227,16 +4227,16 @@
     <paper id="8">
       <title>Discriminative Reordering Models for Statistical Machine Translation</title>
       <author><first>Richard</first><last>Zens</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>55–63</pages>
       <url hash="631c1f0e">W06-3108</url>
       <bibkey>zens-ney-2006-discriminative</bibkey>
     </paper>
     <paper id="9">
       <title>Generalized Stack Decoding Algorithms for Statistical Machine Translation</title>
-      <author><first>Daniel</first><last>Ortiz Martínez</last></author>
-      <author><first>Ismael</first><last>García Varea</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="daniel-ortiz-martinez"><first>Daniel</first><last>Ortiz Martínez</last></author>
+      <author id="ismael-garcia-varea"><first>Ismael</first><last>García Varea</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <pages>64–71</pages>
       <url hash="8c28dc14">W06-3109</url>
       <bibkey>ortiz-martinez-etal-2006-generalized</bibkey>
@@ -4244,7 +4244,7 @@
     <paper id="10">
       <title>N-Gram Posterior Probabilities for Statistical Machine Translation</title>
       <author><first>Richard</first><last>Zens</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>72–77</pages>
       <url hash="5f951197">W06-3110</url>
       <bibkey>zens-ney-2006-n</bibkey>
@@ -4253,7 +4253,7 @@
       <title>Partitioning Parallel Documents Using Binary Segmentation</title>
       <author><first>Jia</first><last>Xu</last></author>
       <author><first>Richard</first><last>Zens</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>78–85</pages>
       <url hash="0bac3e1a">W06-3111</url>
       <bibkey>xu-etal-2006-partitioning</bibkey>
@@ -4262,7 +4262,7 @@
       <title>Contextual Bitext-Derived Paraphrases in Automatic <fixed-case>MT</fixed-case> Evaluation</title>
       <author><first>Karolina</first><last>Owczarzak</last></author>
       <author><first>Declan</first><last>Groves</last></author>
-      <author><first>Josef</first><last>Van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>Van Genabith</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <pages>86–93</pages>
       <url hash="085c9541">W06-3112</url>
@@ -4297,15 +4297,15 @@
       <title>Mood at work: Ramses versus Pharaoh</title>
       <author><first>Alexandre</first><last>Patry</last></author>
       <author><first>Fabrizio</first><last>Gotti</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <pages>126–129</pages>
       <url hash="b9cb9da7">W06-3116</url>
       <bibkey>patry-etal-2006-mood-work</bibkey>
     </paper>
     <paper id="17">
       <title>Stochastic Inversion Transduction Grammars for Obtaining Word Phrases for Phrase-based Statistical Machine Translation</title>
-      <author><first>Joan Andreu</first><last>Sánchez</last></author>
-      <author><first>José Miguel</first><last>Benedí</last></author>
+      <author id="joan-andreu-sanchez"><first>Joan Andreu</first><last>Sánchez</last></author>
+      <author id="jose-miguel-benedi"><first>José Miguel</first><last>Benedí</last></author>
       <pages>130–133</pages>
       <url hash="1a4dcfff">W06-3117</url>
       <bibkey>sanchez-benedi-2006-stochastic</bibkey>
@@ -4333,14 +4333,14 @@
     </paper>
     <paper id="20">
       <title><fixed-case>TALP</fixed-case> Phrase-based statistical translation system for <fixed-case>E</fixed-case>uropean language pairs</title>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
-      <author><first>Josep M.</first><last>Crego</last></author>
-      <author><first>Adrià</first><last>de Gispert</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="josep-m-crego"><first>Josep M.</first><last>Crego</last></author>
+      <author id="adria-de-gispert"><first>Adrià</first><last>de Gispert</last></author>
       <author><first>Patrik</first><last>Lambert</last></author>
       <author><first>Maxim</first><last>Khalilov</last></author>
-      <author><first>José B.</first><last>Mariño</last></author>
-      <author><first>José A. R.</first><last>Fonollosa</last></author>
-      <author><first>Rafael</first><last>Banchs</last></author>
+      <author id="jose-b-marino"><first>José B.</first><last>Mariño</last></author>
+      <author id="jose-a-r-fonollosa"><first>José A. R.</first><last>Fonollosa</last></author>
+      <author id="rafael-e-banchs"><first>Rafael</first><last>Banchs</last></author>
       <pages>142–145</pages>
       <url hash="55921a6b">W06-3120</url>
       <bibkey>costa-jussa-etal-2006-talp</bibkey>
@@ -4348,9 +4348,9 @@
     <paper id="21">
       <title>Phramer - An Open Source Statistical Phrase-Based Translator</title>
       <author><first>Marian</first><last>Olteanu</last></author>
-      <author><first>Chris</first><last>Davis</last></author>
+      <author id="chris-irwin-davis"><first>Chris</first><last>Davis</last></author>
       <author><first>Ionut</first><last>Volosen</last></author>
-      <author><first>Dan</first><last>Moldovan</last></author>
+      <author id="dan-moldovan"><first>Dan</first><last>Moldovan</last></author>
       <pages>146–149</pages>
       <url hash="aa69c937">W06-3121</url>
       <bibkey>olteanu-etal-2006-phramer</bibkey>
@@ -4359,7 +4359,7 @@
       <title>Language Models and Reranking for Machine Translation</title>
       <author><first>Marian</first><last>Olteanu</last></author>
       <author><first>Pasin</first><last>Suriyentrakorn</last></author>
-      <author><first>Dan</first><last>Moldovan</last></author>
+      <author id="dan-moldovan"><first>Dan</first><last>Moldovan</last></author>
       <pages>150–153</pages>
       <url hash="770588e6">W06-3122</url>
       <bibkey>olteanu-etal-2006-language</bibkey>
@@ -4385,22 +4385,22 @@
     </paper>
     <paper id="25">
       <title>N-gram-based <fixed-case>SMT</fixed-case> System Enhanced with Reordering Patterns</title>
-      <author><first>Josep M.</first><last>Crego</last></author>
-      <author><first>Adrià</first><last>de Gispert</last></author>
+      <author id="josep-m-crego"><first>Josep M.</first><last>Crego</last></author>
+      <author id="adria-de-gispert"><first>Adrià</first><last>de Gispert</last></author>
       <author><first>Patrik</first><last>Lambert</last></author>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
       <author><first>Maxim</first><last>Khalilov</last></author>
-      <author><first>Rafael</first><last>Banchs</last></author>
-      <author><first>José B.</first><last>Mariño</last></author>
-      <author><first>José A. R.</first><last>Fonollosa</last></author>
+      <author id="rafael-e-banchs"><first>Rafael</first><last>Banchs</last></author>
+      <author id="jose-b-marino"><first>José B.</first><last>Mariño</last></author>
+      <author id="jose-a-r-fonollosa"><first>José A. R.</first><last>Fonollosa</last></author>
       <pages>162–165</pages>
       <url hash="fee9091d">W06-3125</url>
       <bibkey>crego-etal-2006-n</bibkey>
     </paper>
     <paper id="26">
       <title>The <fixed-case>LDV</fixed-case>-<fixed-case>COMBO</fixed-case> system for <fixed-case>SMT</fixed-case></title>
-      <author><first>Jesús</first><last>Giménez</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="jesus-gimenez"><first>Jesús</first><last>Giménez</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <pages>166–169</pages>
       <url hash="e534a846">W06-3126</url>
       <bibkey>gimenez-marquez-2006-ldv</bibkey>
@@ -4425,7 +4425,7 @@
     <paper id="1">
       <title>A Combined Phonetic-Phonological Approach to Estimating Cross-Language Phoneme Similarity in an <fixed-case>ASR</fixed-case> Environment</title>
       <author><first>Lynette</first><last>Melnar</last></author>
-      <author id="chen-liu"><first>Chen</first><last>Liu</last></author>
+      <author><first>Chen</first><last>Liu</last></author>
       <pages>1–10</pages>
       <url hash="dfc5b5cf">W06-3201</url>
       <bibkey>melnar-liu-2006-combined</bibkey>
@@ -4462,7 +4462,7 @@
     </paper>
     <paper id="6">
       <title>Improved morpho-phonological sequence processing with constraint satisfaction inference</title>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <author><first>Sander</first><last>Canisius</last></author>
       <pages>41–49</pages>
       <url hash="ce6a5717">W06-3206</url>
@@ -4470,7 +4470,7 @@
     </paper>
     <paper id="7">
       <title>Richness of the Base and Probabilistic Unsupervised Learning in <fixed-case>O</fixed-case>ptimality <fixed-case>T</fixed-case>heory</title>
-      <author><first>Gaja</first><last>Jarosz</last></author>
+      <author id="gaja-jarosz"><first>Gaja</first><last>Jarosz</last></author>
       <pages>50–59</pages>
       <url hash="6443dfb1">W06-3207</url>
       <bibkey>jarosz-2006-richness</bibkey>
@@ -4479,7 +4479,7 @@
       <title>Morphology Induction from Limited Noisy Data Using Approximate String Matching</title>
       <author><first>Burcu</first><last>Karagol-Ayan</last></author>
       <author><first>David</first><last>Doermann</last></author>
-      <author><first>Amy</first><last>Weinberg</last></author>
+      <author id="amy-weinberg"><first>Amy</first><last>Weinberg</last></author>
       <pages>60–68</pages>
       <url hash="cda20401">W06-3208</url>
       <bibkey>karagol-ayan-etal-2006-morphology</bibkey>
@@ -4503,8 +4503,8 @@
     <meta>
       <booktitle>Proceedings of the <fixed-case>HLT</fixed-case>-<fixed-case>NAACL</fixed-case> <fixed-case>B</fixed-case>io<fixed-case>NLP</fixed-case> Workshop on Linking Natural Language and Biology</booktitle>
       <url hash="8dc887e9">W06-33</url>
-      <editor><first>Karin</first><last>Verspoor</last></editor>
-      <editor><first>Kevin Bretonnel</first><last>Cohen</last></editor>
+      <editor id="karin-verspoor"><first>Karin</first><last>Verspoor</last></editor>
+      <editor id="k-bretonnel-cohen"><first>Kevin Bretonnel</first><last>Cohen</last></editor>
       <editor><first>Ben</first><last>Goertzel</last></editor>
       <editor><first>Inderjeet</first><last>Mani</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -4549,7 +4549,7 @@
       <author><first>Christian</first><last>Posse</last></author>
       <author><first>Banu</first><last>Gopalan</last></author>
       <author><first>Stephen</first><last>Tratz</last></author>
-      <author><first>Michelle</first><last>Gregory</last></author>
+      <author id="michelle-gregory"><first>Michelle</first><last>Gregory</last></author>
       <pages>25–32</pages>
       <url hash="14b22b95">W06-3304</url>
       <bibkey>sanfilippo-etal-2006-integrating</bibkey>
@@ -4568,15 +4568,15 @@
       <author><first>Kevin</first><last>Murphy</last></author>
       <author><first>Yang</first><last>Jin</last></author>
       <author><first>Jessica</first><last>Kim</last></author>
-      <author><first>Peter</first><last>White</last></author>
+      <author id="peter-white"><first>Peter</first><last>White</last></author>
       <pages>41–48</pages>
       <url hash="c86e7058">W06-3306</url>
       <bibkey>fang-etal-2006-human</bibkey>
     </paper>
     <paper id="7">
       <title>Integrating Co-occurrence Statistics with Information Extraction for Robust Retrieval of Protein Interactions from <fixed-case>M</fixed-case>edline</title>
-      <author><first>Razvan</first><last>Bunescu</last></author>
-      <author><first>Raymond</first><last>Mooney</last></author>
+      <author id="razvan-bunescu"><first>Razvan</first><last>Bunescu</last></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last></author>
       <author><first>Arun</first><last>Ramani</last></author>
       <author><first>Edward</first><last>Marcotte</last></author>
       <pages>49–56</pages>
@@ -4585,14 +4585,14 @@
     </paper>
     <paper id="8">
       <title><fixed-case>BIOSMILE</fixed-case>: Adapting Semantic Role Labeling for Biomedical Verbs:</title>
-      <author><first>Richard Tzong-Han</first><last>Tsai</last></author>
+      <author id="richard-tzong-han-tsai"><first>Richard Tzong-Han</first><last>Tsai</last></author>
       <author><first>Wen-Chi</first><last>Chou</last></author>
       <author><first>Yu-Chun</first><last>Lin</last></author>
       <author><first>Cheng-Lung</first><last>Sung</last></author>
       <author><first>Wei</first><last>Ku</last></author>
       <author><first>Ying-Shan</first><last>Su</last></author>
       <author><first>Ting-Yi</first><last>Sung</last></author>
-      <author><first>Wen-Lian</first><last>Hsu</last></author>
+      <author id="wen-lian-hsu"><first>Wen-Lian</first><last>Hsu</last></author>
       <pages>57–64</pages>
       <url hash="c26c92f4">W06-3308</url>
       <bibkey>tsai-etal-2006-biosmile</bibkey>
@@ -4602,7 +4602,7 @@
       <author><first>Jimmy</first><last>Lin</last></author>
       <author><first>Damianos</first><last>Karakos</last></author>
       <author><first>Dina</first><last>Demner-Fushman</last></author>
-      <author><first>Sanjeev</first><last>Khudanpur</last></author>
+      <author id="sanjeev-khudanpur"><first>Sanjeev</first><last>Khudanpur</last></author>
       <pages>65–72</pages>
       <url hash="1a4fd0c3">W06-3309</url>
       <bibkey>lin-etal-2006-generative</bibkey>
@@ -4611,7 +4611,7 @@
       <title>Exploring Text and Image Features to Classify Images in Bioscience Literature</title>
       <author><first>Barry</first><last>Rafkind</last></author>
       <author><first>Minsuk</first><last>Lee</last></author>
-      <author><first>Shih-Fu</first><last>Chang</last></author>
+      <author id="shih-fu-chang"><first>Shih-Fu</first><last>Chang</last></author>
       <author><first>Hong</first><last>Yu</last></author>
       <pages>73–80</pages>
       <url hash="fd079021">W06-3310</url>
@@ -4644,7 +4644,7 @@
     </paper>
     <paper id="15">
       <title>A Graph-Search Framework for <fixed-case>G</fixed-case>ene<fixed-case>I</fixed-case>d Ranking</title>
-      <author><first>William</first><last>Cohen</last></author>
+      <author id="william-cohen"><first>William</first><last>Cohen</last></author>
       <pages>93–95</pages>
       <url hash="b5aba6ae">W06-3315</url>
       <bibkey>cohen-2006-graph</bibkey>
@@ -4686,21 +4686,21 @@
     </paper>
     <paper id="20">
       <title>Refactoring Corpora</title>
-      <author><first>Helen L.</first><last>Johnson</last></author>
-      <author><first>William A.</first><last>Baumgartner Jr.</last></author>
-      <author><first>Martin</first><last>Krallinger</last></author>
+      <author id="helen-l-johnson"><first>Helen L.</first><last>Johnson</last></author>
+      <author id="william-a-baumgartner-jr"><first>William A.</first><last>Baumgartner Jr.</last></author>
+      <author id="martin-krallinger"><first>Martin</first><last>Krallinger</last></author>
       <author><first>K. Bretonnel</first><last>Cohen</last></author>
-      <author><first>Lawrence</first><last>Hunter</last></author>
+      <author id="lawrence-hunter"><first>Lawrence</first><last>Hunter</last></author>
       <pages>116–117</pages>
       <url hash="e8da40f4">W06-3320</url>
       <bibkey>johnson-etal-2006-refactoring</bibkey>
     </paper>
     <paper id="21">
       <title>Rapid Adaptation of <fixed-case>POS</fixed-case> Tagging for Domain Specific Uses</title>
-      <author><first>John E.</first><last>Miller</last></author>
+      <author id="john-miller"><first>John E.</first><last>Miller</last></author>
       <author><first>Michael</first><last>Bloodgood</last></author>
       <author><first>Manabu</first><last>Torii</last></author>
-      <author><first>K.</first><last>Vijay-Shanker</last></author>
+      <author id="k-vijay-shanker"><first>K.</first><last>Vijay-Shanker</last></author>
       <pages>118–119</pages>
       <url hash="acc46f64">W06-3321</url>
       <bibkey>miller-etal-2006-rapid</bibkey>
@@ -4739,17 +4739,17 @@
     </paper>
     <paper id="26">
       <title>Summarizing Key Concepts using Citation Sentences</title>
-      <author><first>Ariel S.</first><last>Schwartz</last></author>
-      <author><first>Marti</first><last>Hearst</last></author>
+      <author id="ariel-schwartz"><first>Ariel S.</first><last>Schwartz</last></author>
+      <author id="marti-a-hearst"><first>Marti</first><last>Hearst</last></author>
       <pages>134–135</pages>
       <url hash="1d7c75a6">W06-3326</url>
       <bibkey>schwartz-hearst-2006-summarizing</bibkey>
     </paper>
     <paper id="27">
       <title>Subdomain adaptation of a <fixed-case>POS</fixed-case> tagger with a small corpus</title>
-      <author><first>Yuka</first><last>Tateisi</last></author>
+      <author id="yuka-tateisi"><first>Yuka</first><last>Tateisi</last></author>
       <author><first>Yoshimasa</first><last>Tsuruoka</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>136–137</pages>
       <url hash="116f79b9">W06-3327</url>
       <bibkey>tateisi-etal-2006-subdomain</bibkey>
@@ -4767,7 +4767,7 @@
     <meta>
       <booktitle>Proceedings of the Analyzing Conversations in Text and Speech</booktitle>
       <url hash="d29e521a">W06-34</url>
-      <editor><first>Eduard</first><last>Hovy</last></editor>
+      <editor id="eduard-hovy"><first>Eduard</first><last>Hovy</last></editor>
       <editor><first>Klaus</first><last>Zechner</last></editor>
       <editor><first>Liang</first><last>Zhou</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -4783,7 +4783,7 @@
     <paper id="1">
       <title>Prosodic Correlates of Rhetorical Relations</title>
       <author><first>Gabriel</first><last>Murray</last></author>
-      <author><first>Maite</first><last>Taboada</last></author>
+      <author id="maite-taboada"><first>Maite</first><last>Taboada</last></author>
       <author><first>Steve</first><last>Renals</last></author>
       <pages>1–7</pages>
       <url hash="2e50429d">W06-3401</url>
@@ -4812,7 +4812,7 @@
     <paper id="4">
       <title>You Are What You Say: Using Meeting Participants’ Speech to Detect their Roles and Expertise</title>
       <author><first>Satanjeev</first><last>Banerjee</last></author>
-      <author><first>Alexander</first><last>Rudnicky</last></author>
+      <author id="alexander-rudnicky"><first>Alexander</first><last>Rudnicky</last></author>
       <pages>23–30</pages>
       <url hash="1751eb08">W06-3404</url>
       <bibkey>banerjee-rudnicky-2006-say</bibkey>
@@ -4828,8 +4828,8 @@
     </paper>
     <paper id="6">
       <title>Improving “Email Speech Acts” Analysis via N-gram Selection</title>
-      <author><first>Vitor</first><last>Carvalho</last></author>
-      <author><first>William</first><last>Cohen</last></author>
+      <author id="vitor-carvalho"><first>Vitor</first><last>Carvalho</last></author>
+      <author id="william-cohen"><first>William</first><last>Cohen</last></author>
       <pages>35–41</pages>
       <url hash="54d6562a">W06-3406</url>
       <bibkey>carvalho-cohen-2006-improving</bibkey>
@@ -4837,14 +4837,14 @@
     <paper id="7">
       <title>Topic-Segmentation of Dialogue</title>
       <author><first>Jaime</first><last>Arguello</last></author>
-      <author><first>Carolyn</first><last>Rosé</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rosé</last></author>
       <pages>42–49</pages>
       <url hash="cf88d7fb">W06-3407</url>
       <bibkey>arguello-rose-2006-topic</bibkey>
     </paper>
     <paper id="8">
       <title><fixed-case>C</fixed-case>h<fixed-case>AT</fixed-case>: A Time-Linked System for Conversational Analysis</title>
-      <author><first>Michelle L.</first><last>Gregory</last></author>
+      <author id="michelle-gregory"><first>Michelle L.</first><last>Gregory</last></author>
       <author><first>Douglas</first><last>Love</last></author>
       <author><first>Stuart</first><last>Rose</last></author>
       <author><first>Anne</first><last>Schur</last></author>
@@ -4873,7 +4873,7 @@
     <meta>
       <booktitle>Proceedings of the Third Workshop on Scalable Natural Language Understanding</booktitle>
       <url hash="1ac1dfc7">W06-35</url>
-      <editor><first>James</first><last>Allen</last></editor>
+      <editor id="james-allen"><first>James</first><last>Allen</last></editor>
       <editor><first>Jan</first><last>Alexandersson</last></editor>
       <editor><first>Jerome</first><last>Feldman</last></editor>
       <editor><first>Robert</first><last>Porzel</last></editor>
@@ -4896,15 +4896,15 @@
     </paper>
     <paper id="2">
       <title>Backbone Extraction and Pruning for Speeding Up a Deep Parser for Dialogue Systems</title>
-      <author><first>Myroslava O.</first><last>Dzikovska</last></author>
-      <author><first>Carolyn P.</first><last>Rosé</last></author>
+      <author id="myroslava-o-dzikovska"><first>Myroslava O.</first><last>Dzikovska</last></author>
+      <author id="carolyn-rose"><first>Carolyn P.</first><last>Rosé</last></author>
       <pages>9–16</pages>
       <url hash="7cad808e">W06-3502</url>
       <bibkey>dzikovska-rose-2006-backbone</bibkey>
     </paper>
     <paper id="3">
       <title>Understanding Complex Natural Language Explanations in Tutorial Applications</title>
-      <author><first>Pamela W.</first><last>Jordan</last></author>
+      <author id="pamela-jordan"><first>Pamela W.</first><last>Jordan</last></author>
       <author><first>Maxim</first><last>Makatchev</last></author>
       <author><first>Umarani</first><last>Pappuswamy</last></author>
       <pages>17–24</pages>
@@ -4913,10 +4913,10 @@
     </paper>
     <paper id="4">
       <title>Increasing the coverage of a domain independent dialogue lexicon with <fixed-case>VERBNET</fixed-case></title>
-      <author><first>Benoit</first><last>Crabbé</last></author>
-      <author><first>Myroslava O.</first><last>Dzikovska</last></author>
-      <author><first>William</first><last>de Beaumont</last></author>
-      <author><first>Mary</first><last>Swift</last></author>
+      <author id="benoit-crabbe"><first>Benoit</first><last>Crabbé</last></author>
+      <author id="myroslava-o-dzikovska"><first>Myroslava O.</first><last>Dzikovska</last></author>
+      <author id="william-de-beaumont"><first>William</first><last>de Beaumont</last></author>
+      <author id="mary-swift"><first>Mary</first><last>Swift</last></author>
       <pages>25–32</pages>
       <url hash="0b526988">W06-3504</url>
       <bibkey>crabbe-etal-2006-increasing</bibkey>
@@ -4932,7 +4932,7 @@
       <title>Catching Metaphors</title>
       <author><first>Matt</first><last>Gedigian</last></author>
       <author><first>John</first><last>Bryant</last></author>
-      <author><first>Srini</first><last>Narayanan</last></author>
+      <author id="srini-narayanan"><first>Srini</first><last>Narayanan</last></author>
       <author><first>Branimir</first><last>Ciric</last></author>
       <pages>41–48</pages>
       <url hash="36267ad9">W06-3506</url>
@@ -4954,7 +4954,7 @@
     </paper>
     <paper id="9">
       <title>Embodied construction grammar as layered modal languages</title>
-      <author><first>Anders</first><last>Sogaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Sogaard</last></author>
       <pages>65–72</pages>
       <url hash="81acee76">W06-3509</url>
       <bibkey>sogaard-2006-embodied</bibkey>
@@ -4974,10 +4974,10 @@
       <url hash="88475af4">W06-36</url>
       <editor><first>Ryan</first><last>McDonald</last></editor>
       <editor><first>Charles</first><last>Sutton</last></editor>
-      <editor><first>Hal</first><last>Daumé III</last></editor>
+      <editor id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></editor>
       <editor><first>Andrew</first><last>McCallum</last></editor>
       <editor><first>Fernando</first><last>Pereira</last></editor>
-      <editor><first>Jeff</first><last>Bilmes</last></editor>
+      <editor id="jeff-bilmes"><first>Jeff</first><last>Bilmes</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>New York City, New York</address>
       <month>June</month>
@@ -4992,21 +4992,21 @@
       <title>A Syntax-Directed Translator with Extended Domain of Locality</title>
       <author><first>Liang</first><last>Huang</last></author>
       <author><first>Kevin</first><last>Knight</last></author>
-      <author><first>Aravind</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
       <pages>1–8</pages>
       <url hash="429c714b">W06-3601</url>
       <bibkey>huang-etal-2006-syntax</bibkey>
     </paper>
     <paper id="2">
       <title>Efficient Dynamic Programming Search Algorithms for Phrase-Based <fixed-case>SMT</fixed-case></title>
-      <author><first>Christoph</first><last>Tillmann</last></author>
+      <author id="christoph-tillmann"><first>Christoph</first><last>Tillmann</last></author>
       <pages>9–16</pages>
       <url hash="c1f0944b">W06-3602</url>
       <bibkey>tillmann-2006-efficient</bibkey>
     </paper>
     <paper id="3">
       <title>Computational Challenges in Parsing by Classification</title>
-      <author><first>Joseph</first><last>Turian</last></author>
+      <author id="joseph-turian"><first>Joseph</first><last>Turian</last></author>
       <author><first>I. Dan</first><last>Melamed</last></author>
       <pages>17–24</pages>
       <url hash="7c038187">W06-3603</url>
@@ -5014,7 +5014,7 @@
     </paper>
     <paper id="4">
       <title>All-word Prediction as the Ultimate Confusible Disambiguation</title>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <pages>25–32</pages>
       <url hash="c9af617d">W06-3604</url>
       <bibkey>van-den-bosch-2006-word</bibkey>
@@ -5040,7 +5040,7 @@
       <title>Re-Ranking Algorithms for Name Tagging</title>
       <author><first>Heng</first><last>Ji</last></author>
       <author><first>Cynthia</first><last>Rudin</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <pages>49–56</pages>
       <url hash="f06c7ee4">W06-3607</url>
       <bibkey>ji-etal-2006-ranking</bibkey>
@@ -5050,10 +5050,10 @@
     <meta>
       <booktitle>Proceedings of the First International Workshop on Medical Speech Translation</booktitle>
       <url hash="909bec25">W06-37</url>
-      <editor><first>Pierrette</first><last>Bouillon</last></editor>
+      <editor id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></editor>
       <editor><first>Farzad</first><last>Ehsani</last></editor>
-      <editor><first>Robert</first><last>Frederking</last></editor>
-      <editor><first>Manny</first><last>Rayner</last></editor>
+      <editor id="robert-frederking"><first>Robert</first><last>Frederking</last></editor>
+      <editor id="manny-rayner"><first>Manny</first><last>Rayner</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>New York, New York</address>
       <month>June</month>
@@ -5079,7 +5079,7 @@
       <author><first>Manny</first><last>Rayner</last></author>
       <author><first>Marianne</first><last>Santaholma</last></author>
       <author><first>Marianne</first><last>Starlander</last></author>
-      <author><first>Beth Ann</first><last>Hockey</last></author>
+      <author id="beth-ann-hockey"><first>Beth Ann</first><last>Hockey</last></author>
       <pages>5–12</pages>
       <url hash="9c6549ec">W06-3702</url>
       <bibkey>chatzichrisafis-etal-2006-evaluating</bibkey>
@@ -5087,8 +5087,8 @@
     <paper id="3">
       <title>Speech to Speech Translation for Medical Triage in <fixed-case>K</fixed-case>orean</title>
       <author><first>Farzad</first><last>Ehsani</last></author>
-      <author><first>Jim</first><last>Kinzey</last></author>
-      <author><first>Demetrios</first><last>Master</last></author>
+      <author id="jim-kinzey"><first>Jim</first><last>Kinzey</last></author>
+      <author id="demetrios-master"><first>Demetrios</first><last>Master</last></author>
       <author><first>Karen</first><last>Lesea</last></author>
       <author><first>Hunil</first><last>Park</last></author>
       <pages>13–19</pages>
@@ -5106,7 +5106,7 @@
     </paper>
     <paper id="5">
       <title>Language Engineering and the Pathway to Healthcare: A User-Oriented View</title>
-      <author><first>Harold</first><last>Somers</last></author>
+      <author id="harold-somers"><first>Harold</first><last>Somers</last></author>
       <pages>28–35</pages>
       <url hash="cc492c60">W06-3705</url>
       <bibkey>somers-2006-language</bibkey>
@@ -5126,7 +5126,7 @@
       <author><first>Nikos</first><last>Chatzichrisafis</last></author>
       <author><first>Marianne</first><last>Santaholma</last></author>
       <author><first>Marianne</first><last>Starlander</last></author>
-      <author><first>Beth Ann</first><last>Hockey</last></author>
+      <author id="beth-ann-hockey"><first>Beth Ann</first><last>Hockey</last></author>
       <author><first>Yukie</first><last>Nakao</last></author>
       <author><first>Hitoshi</first><last>Isahara</last></author>
       <author><first>Kyoko</first><last>Kanzaki</last></author>
@@ -5137,8 +5137,8 @@
     <paper id="8">
       <title><fixed-case>S</fixed-case>-<fixed-case>MINDS</fixed-case> 2-Way Speech-to-Speech Translation System</title>
       <author><first>Farzad</first><last>Ehsani</last></author>
-      <author><first>Jim</first><last>Kinzey</last></author>
-      <author><first>Demetrios</first><last>Master</last></author>
+      <author id="jim-kinzey"><first>Jim</first><last>Kinzey</last></author>
+      <author id="demetrios-master"><first>Demetrios</first><last>Master</last></author>
       <author><first>Karen</first><last>Sudre</last></author>
       <author><first>David</first><last>Domingo</last></author>
       <author><first>Hunil</first><last>Park</last></author>
@@ -5166,16 +5166,16 @@
     </paper>
     <paper id="11">
       <title><fixed-case>IBM</fixed-case> <fixed-case>MASTOR</fixed-case> SYSTEM: Multilingual Automatic Speech-to-Speech Translator</title>
-      <author><first>Yuqing</first><last>Gao</last></author>
+      <author id="yuqing-guo"><first>Yuqing</first><last>Gao</last></author>
       <author><first>Bowen</first><last>Zhou</last></author>
-      <author><first>Ruhi</first><last>Sarikaya</last></author>
+      <author id="ruhi-sarikaya"><first>Ruhi</first><last>Sarikaya</last></author>
       <author><first>Mohamed</first><last>Afify</last></author>
       <author><first>Hong-Kwang</first><last>Kuo</last></author>
       <author><first>Wei-zhong</first><last>Zhu</last></author>
       <author><first>Yonggang</first><last>Deng</last></author>
       <author><first>Charles</first><last>Prosser</last></author>
       <author><first>Wei</first><last>Zhang</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <pages>53–56</pages>
       <url hash="fed37956">W06-3711</url>
       <bibkey>gao-etal-2006-ibm</bibkey>
@@ -5185,8 +5185,8 @@
     <meta>
       <booktitle>Proceedings of <fixed-case>T</fixed-case>ext<fixed-case>G</fixed-case>raphs: the First Workshop on Graph Based Methods for Natural Language Processing</booktitle>
       <url hash="b14e0835">W06-38</url>
-      <editor><first>Rada</first><last>Mihalcea</last></editor>
-      <editor><first>Dragomir</first><last>Radev</last></editor>
+      <editor id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></editor>
+      <editor id="dragomir-radev"><first>Dragomir</first><last>Radev</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>New York City</address>
       <month>June</month>
@@ -5200,16 +5200,16 @@
     <paper id="1">
       <title>A Graphical Framework for Contextual Search and Name Disambiguation in Email</title>
       <author><first>Einat</first><last>Minkov</last></author>
-      <author><first>William</first><last>Cohen</last></author>
-      <author><first>Andrew</first><last>Ng</last></author>
+      <author id="william-cohen"><first>William</first><last>Cohen</last></author>
+      <author id="andrew-y-ng"><first>Andrew</first><last>Ng</last></author>
       <pages>1–8</pages>
       <url hash="028dab02">W06-3801</url>
       <bibkey>minkov-etal-2006-graphical</bibkey>
     </paper>
     <paper id="2">
       <title>Graph Based Semi-Supervised Approach for Information Extraction</title>
-      <author><first>Hany</first><last>Hassan</last></author>
-      <author><first>Ahmed</first><last>Hassan</last></author>
+      <author id="hany-hassan-awadalla"><first>Hany</first><last>Hassan</last></author>
+      <author id="ahmed-hassan"><first>Ahmed</first><last>Hassan</last></author>
       <author><first>Sara</first><last>Noeman</last></author>
       <pages>9–16</pages>
       <url hash="14657885">W06-3802</url>
@@ -5224,7 +5224,7 @@
     </paper>
     <paper id="4">
       <title>Measuring Aboutness of an Entity in a Text</title>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <author><first>Patrick</first><last>Jeuniaux</last></author>
       <author><first>Roxana</first><last>Angheluta</last></author>
       <author><first>Rudradeb</first><last>Mitra</last></author>
@@ -5234,15 +5234,15 @@
     </paper>
     <paper id="5">
       <title>A Study of Two Graph Algorithms in Topic-driven Summarization</title>
-      <author><first>Vivi</first><last>Nastase</last></author>
-      <author><first>Stan</first><last>Szpakowicz</last></author>
+      <author id="vivi-nastase"><first>Vivi</first><last>Nastase</last></author>
+      <author id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></author>
       <pages>29–32</pages>
       <url hash="83e7595f">W06-3805</url>
       <bibkey>nastase-szpakowicz-2006-study</bibkey>
     </paper>
     <paper id="6">
       <title>Similarity between Pairs of Co-indexed Trees for Textual Entailment Recognition</title>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last></author>
       <author><first>Alessandro</first><last>Moschitti</last></author>
       <pages>33–36</pages>
       <url hash="903460eb">W06-3806</url>
@@ -5250,15 +5250,15 @@
     </paper>
     <paper id="7">
       <title>Learning of Graph-based Question Answering Rules</title>
-      <author><first>Diego</first><last>Mollá</last></author>
+      <author id="diego-molla"><first>Diego</first><last>Mollá</last></author>
       <pages>37–44</pages>
       <url hash="99133ccd">W06-3807</url>
       <bibkey>molla-2006-learning</bibkey>
     </paper>
     <paper id="8">
       <title>Seeing stars when there aren’t many stars: Graph-based semi-supervised learning for sentiment categorization</title>
-      <author><first>Andrew</first><last>Goldberg</last></author>
-      <author><first>Xiaojin</first><last>Zhu</last></author>
+      <author id="andrew-b-goldberg"><first>Andrew</first><last>Goldberg</last></author>
+      <author id="xiaojin-zhu"><first>Xiaojin</first><last>Zhu</last></author>
       <pages>45–52</pages>
       <url hash="7b025f3f">W06-3808</url>
       <bibkey>goldberg-zhu-2006-seeing</bibkey>
@@ -5273,8 +5273,8 @@
     </paper>
     <paper id="10">
       <title>Graph-based Generalized Latent Semantic Analysis for Document Representation</title>
-      <author><first>Irina</first><last>Matveeva</last></author>
-      <author><first>Gina-Anne</first><last>Levow</last></author>
+      <author id="irina-matveeva"><first>Irina</first><last>Matveeva</last></author>
+      <author id="gina-anne-levow"><first>Gina-Anne</first><last>Levow</last></author>
       <pages>61–64</pages>
       <url hash="0d9694d2">W06-3810</url>
       <bibkey>matveeva-levow-2006-graph</bibkey>
@@ -5290,25 +5290,25 @@
     </paper>
     <paper id="12">
       <title><fixed-case>C</fixed-case>hinese Whispers - an Efficient Graph Clustering Algorithm and its Application to Natural Language Processing Problems</title>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>73–80</pages>
       <url hash="904faa12">W06-3812</url>
       <bibkey>biemann-2006-chinese</bibkey>
     </paper>
     <paper id="13">
       <title>Matching syntactic-semantic graphs for semantic relation assignment</title>
-      <author><first>Vivi</first><last>Nastase</last></author>
-      <author><first>Stan</first><last>Szpakowicz</last></author>
+      <author id="vivi-nastase"><first>Vivi</first><last>Nastase</last></author>
+      <author id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></author>
       <pages>81–88</pages>
       <url hash="470482e7">W06-3813</url>
       <bibkey>nastase-szpakowicz-2006-matching</bibkey>
     </paper>
     <paper id="14">
       <title>Evaluating and optimizing the parameters of an unsupervised graph-based <fixed-case>WSD</fixed-case> algorithm</title>
-      <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>David</first><last>Martínez</last></author>
-      <author><first>Oier</first><last>López de Lacalle</last></author>
-      <author><first>Aitor</first><last>Soroa</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
+      <author id="david-martinez"><first>David</first><last>Martínez</last></author>
+      <author id="oier-lopez-de-lacalle"><first>Oier</first><last>López de Lacalle</last></author>
+      <author id="aitor-soroa"><first>Aitor</first><last>Soroa</last></author>
       <pages>89–96</pages>
       <url hash="300b9681">W06-3814</url>
       <bibkey>agirre-etal-2006-evaluating</bibkey>
@@ -5344,9 +5344,9 @@
     <paper id="2">
       <title>Extracting formal specifications from natural language regulatory documents</title>
       <author><first>Nikhil</first><last>Dinesh</last></author>
-      <author><first>Aravind</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
       <author><first>Insup</first><last>Lee</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <url hash="3c729d6c">W06-3902</url>
       <bibkey>dinesh-etal-2006-extracting</bibkey>
     </paper>
@@ -5366,7 +5366,7 @@
     </paper>
     <paper id="5">
       <title>Towards a redundancy elimination algorithm for underspecified descriptions</title>
-      <author><first>Leonardo</first><last>Lesmo</last></author>
+      <author id="leonardo-lesmo"><first>Leonardo</first><last>Lesmo</last></author>
       <author><first>Livio</first><last>Robaldo</last></author>
       <author><first>Jelle</first><last>Gerbrandy</last></author>
       <url hash="a40a9162">W06-3905</url>
@@ -5442,7 +5442,7 @@
     </paper>
     <paper id="15">
       <title>Ingredients of a first-order account of bridging</title>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <url hash="a7353e28">W06-3915</url>
       <bibkey>cimiano-2006-ingredients</bibkey>
     </paper>
diff --git a/data/xml/W07.xml b/data/xml/W07.xml
index 111528f2db..0e560d51b5 100644
--- a/data/xml/W07.xml
+++ b/data/xml/W07.xml
@@ -27,7 +27,7 @@
     <paper id="2">
       <title>Corpus-driven Metaphor Harvesting</title>
       <author><first>Astrid</first><last>Reining</last></author>
-      <author><first>Birte</first><last>Lönneker-Rodman</last></author>
+      <author id="birte-lonneker"><first>Birte</first><last>Lönneker-Rodman</last></author>
       <pages>5–12</pages>
       <url hash="807202a1">W07-0102</url>
       <bibkey>reining-lonneker-rodman-2007-corpus</bibkey>
@@ -35,7 +35,7 @@
     <paper id="3">
       <title>Hunting Elusive Metaphors Using Lexical Resources.</title>
       <author><first>Saisuresh</first><last>Krishnakumaran</last></author>
-      <author><first>Xiaojin</first><last>Zhu</last></author>
+      <author id="xiaojin-zhu"><first>Xiaojin</first><last>Zhu</last></author>
       <pages>13–20</pages>
       <url hash="150538e3">W07-0103</url>
       <bibkey>krishnakumaran-zhu-2007-hunting</bibkey>
@@ -53,10 +53,10 @@
     <meta>
       <booktitle>Proceedings of the Second Workshop on <fixed-case>T</fixed-case>ext<fixed-case>G</fixed-case>raphs: Graph-Based Algorithms for Natural Language Processing</booktitle>
       <url hash="46d5d08b">W07-02</url>
-      <editor><first>Chris</first><last>Biemann</last></editor>
-      <editor><first>Irina</first><last>Matveeva</last></editor>
-      <editor><first>Rada</first><last>Mihalcea</last></editor>
-      <editor><first>Dragomir</first><last>Radev</last></editor>
+      <editor id="chris-biemann"><first>Chris</first><last>Biemann</last></editor>
+      <editor id="irina-matveeva"><first>Irina</first><last>Matveeva</last></editor>
+      <editor id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></editor>
+      <editor id="dragomir-radev"><first>Dragomir</first><last>Radev</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Rochester, NY, USA</address>
       <year>2007</year>
@@ -86,7 +86,7 @@
       <author><first>Daniel S.</first><last>Leite</last></author>
       <author><first>Lucia H. M.</first><last>Rino</last></author>
       <author><first>Thiago A. S.</first><last>Pardo</last></author>
-      <author><first>Maria das Graças V.</first><last>Nunes</last></author>
+      <author id="maria-das-gracas-volpe-nunes"><first>Maria das Graças V.</first><last>Nunes</last></author>
       <pages>17–24</pages>
       <url hash="9dbe7f8f">W07-0203</url>
       <bibkey>leite-etal-2007-extractive</bibkey>
@@ -117,7 +117,7 @@
     </paper>
     <paper id="7">
       <title>Latent Semantic Grammar Induction: Context, Projectivity, and Prior Distributions</title>
-      <author><first>Andrew M.</first><last>Olney</last></author>
+      <author id="andrew-olney"><first>Andrew M.</first><last>Olney</last></author>
       <pages>45–52</pages>
       <url hash="9bd1e5de">W07-0207</url>
       <bibkey>olney-2007-latent</bibkey>
@@ -125,15 +125,15 @@
     <paper id="8">
       <title>Learning to Transform Linguistic Graphs</title>
       <author><first>Valentin</first><last>Jijkoun</last></author>
-      <author><first>Maarten</first><last>de Rijke</last></author>
+      <author id="maarten-de-rijke"><first>Maarten</first><last>de Rijke</last></author>
       <pages>53–60</pages>
       <url hash="4ab59bdc">W07-0208</url>
       <bibkey>jijkoun-de-rijke-2007-learning</bibkey>
     </paper>
     <paper id="9">
       <title>Semi-supervised Algorithm for Human-Computer Dialogue Mining</title>
-      <author><first>Calkin S.</first><last>Montero</last></author>
-      <author><first>Kenji</first><last>Araki</last></author>
+      <author id="calkin-s-montero"><first>Calkin S.</first><last>Montero</last></author>
+      <author id="kenji-araki"><first>Kenji</first><last>Araki</last></author>
       <pages>61–64</pages>
       <url hash="08098d77">W07-0209</url>
       <bibkey>montero-araki-2007-semi</bibkey>
@@ -151,9 +151,9 @@
     <paper id="11">
       <title><fixed-case>DLSITE</fixed-case>-2: Semantic Similarity Based on Syntactic Dependency Trees Applied to Textual Entailment</title>
       <author><first>Daniel</first><last>Micol</last></author>
-      <author><first>Óscar</first><last>Ferrández</last></author>
-      <author><first>Rafael</first><last>Muñoz</last></author>
-      <author><first>Manuel</first><last>Palomar</last></author>
+      <author id="oscar-ferrandez"><first>Óscar</first><last>Ferrández</last></author>
+      <author id="rafael-munoz"><first>Rafael</first><last>Muñoz</last></author>
+      <author id="manuel-palomar"><first>Manuel</first><last>Palomar</last></author>
       <pages>73–80</pages>
       <url hash="fcbd281d">W07-0211</url>
       <bibkey>micol-etal-2007-dlsite</bibkey>
@@ -184,7 +184,7 @@
       <url hash="aa536d34">W07-03</url>
       <editor><first>Fuliang</first><last>Weng</last></editor>
       <editor><first>Ye-Yi</first><last>Wang</last></editor>
-      <editor><first>Gokhan</first><last>Tur</last></editor>
+      <editor id="gokhan-tur"><first>Gokhan</first><last>Tur</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Rochester, NY</address>
       <month>April</month>
@@ -197,7 +197,7 @@
     </frontmatter>
     <paper id="1">
       <title>Applying <fixed-case>POMDP</fixed-case>s to Dialog Systems in the Troubleshooting Domain</title>
-      <author><first>Jason</first><last>Williams</last></author>
+      <author id="jason-d-williams"><first>Jason</first><last>Williams</last></author>
       <pages>1–8</pages>
       <url hash="2485603d">W07-0301</url>
       <bibkey>williams-2007-applying</bibkey>
@@ -208,14 +208,14 @@
       <author><first>Jost</first><last>Schatzmann</last></author>
       <author><first>Karl</first><last>Weilhammer</last></author>
       <author><first>Hui</first><last>Ye</last></author>
-      <author><first>Steve</first><last>Young</last></author>
+      <author id="steve-young"><first>Steve</first><last>Young</last></author>
       <pages>9–16</pages>
       <url hash="d9a710b8">W07-0302</url>
       <bibkey>thomson-etal-2007-training</bibkey>
     </paper>
     <paper id="3">
       <title>The Multimodal Presentation Dashboard</title>
-      <author><first>Michael</first><last>Johnston</last></author>
+      <author id="michael-johnston"><first>Michael</first><last>Johnston</last></author>
       <author><first>Patrick</first><last>Ehlen</last></author>
       <author><first>David</first><last>Gibbon</last></author>
       <author><first>Zhu</first><last>Liu</last></author>
@@ -231,18 +231,18 @@
       <author><first>Phillip</first><last>Hunter</last></author>
       <author><first>Peter</first><last>Krogh</last></author>
       <author><first>Esther</first><last>Levin</last></author>
-      <author><first>Roberto</first><last>Pieraccini</last></author>
+      <author id="roberto-pieraccini"><first>Roberto</first><last>Pieraccini</last></author>
       <pages>25–31</pages>
       <url hash="7d67eefe">W07-0304</url>
       <bibkey>acomb-etal-2007-technical</bibkey>
     </paper>
     <paper id="5">
       <title><fixed-case>O</fixed-case>lympus: an open-source framework for conversational spoken language interface research</title>
-      <author><first>Dan</first><last>Bohus</last></author>
+      <author id="dan-bohus"><first>Dan</first><last>Bohus</last></author>
       <author><first>Antoine</first><last>Raux</last></author>
       <author><first>Thomas</first><last>Harris</last></author>
       <author><first>Maxine</first><last>Eskenazi</last></author>
-      <author><first>Alexander</first><last>Rudnicky</last></author>
+      <author id="alexander-rudnicky"><first>Alexander</first><last>Rudnicky</last></author>
       <pages>32–39</pages>
       <url hash="338464e1">W07-0305</url>
       <bibkey>bohus-etal-2007-olympus</bibkey>
@@ -256,16 +256,16 @@
     </paper>
     <paper id="7">
       <title>Experiments on the <fixed-case>F</fixed-case>rance Telecom 3000 Voice Agency corpus: academic research on an industrial spoken dialog system</title>
-      <author><first>Géraldine</first><last>Damnati</last></author>
-      <author><first>Frédéric</first><last>Béchet</last></author>
-      <author><first>Renato</first><last>De Mori</last></author>
+      <author id="geraldine-damnati"><first>Géraldine</first><last>Damnati</last></author>
+      <author id="frederic-bechet"><first>Frédéric</first><last>Béchet</last></author>
+      <author id="renato-de-mori"><first>Renato</first><last>De Mori</last></author>
       <pages>48–55</pages>
       <url hash="9cdd2d39">W07-0307</url>
       <bibkey>damnati-etal-2007-experiments</bibkey>
     </paper>
     <paper id="8">
       <title>Experiences of an In-Service <fixed-case>W</fixed-case>izard-of-<fixed-case>O</fixed-case>z Data Collection for the Deployment of a Call-Routing Application</title>
-      <author><first>Mats</first><last>Wirén</last></author>
+      <author id="mats-wiren"><first>Mats</first><last>Wirén</last></author>
       <author><first>Robert</first><last>Eklund</last></author>
       <pages>56–63</pages>
       <url hash="3dc54be0">W07-0308</url>
@@ -273,7 +273,7 @@
     </paper>
     <paper id="9">
       <title><fixed-case>A</fixed-case>da<fixed-case>RTE</fixed-case>: An Extensible and Adaptable Architecture for Dialog Systems</title>
-      <author><first>Lina</first><last>Rojas</last></author>
+      <author id="lina-m-rojas-barahona"><first>Lina</first><last>Rojas</last></author>
       <author><first>Toni</first><last>Giorgino</last></author>
       <pages>64–67</pages>
       <url hash="308e8fc5">W07-0309</url>
@@ -282,7 +282,7 @@
     <paper id="10">
       <title>Multi-slot semantics for natural-language call routing systems</title>
       <author><first>Johan</first><last>Boye</last></author>
-      <author><first>Mats</first><last>Wirén</last></author>
+      <author id="mats-wiren"><first>Mats</first><last>Wirén</last></author>
       <pages>68–75</pages>
       <url hash="fc1e5c5b">W07-0310</url>
       <bibkey>boye-wiren-2007-multi</bibkey>
@@ -296,7 +296,7 @@
     </paper>
     <paper id="12">
       <title><fixed-case>WIRE</fixed-case>: A Wearable Spoken Language Understanding System for the Military</title>
-      <author><first>Helen</first><last>Hastie</last></author>
+      <author id="helen-hastie"><first>Helen</first><last>Hastie</last></author>
       <author><first>Patrick</first><last>Craven</last></author>
       <author><first>Michael</first><last>Orr</last></author>
       <pages>84–88</pages>
@@ -305,8 +305,8 @@
     </paper>
     <paper id="13">
       <title>Different measurement metrics to evaluate a chatbot system</title>
-      <author><first>Bayan</first><last>Abu Shawar</last></author>
-      <author><first>Eric</first><last>Atwell</last></author>
+      <author id="bayan-abu-shawar"><first>Bayan</first><last>Abu Shawar</last></author>
+      <author id="eric-atwell"><first>Eric</first><last>Atwell</last></author>
       <pages>89–96</pages>
       <url hash="facb5a6e">W07-0313</url>
       <bibkey>abu-shawar-atwell-2007-different</bibkey>
@@ -332,7 +332,7 @@
       <title>Chunk-Level Reordering of Source Language Sentences with Automatically Learned Rules for Statistical Machine Translation</title>
       <author><first>Yuqi</first><last>Zhang</last></author>
       <author><first>Richard</first><last>Zens</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>1–8</pages>
       <url hash="5e90b61d">W07-0401</url>
       <bibkey>zhang-etal-2007-chunk</bibkey>
@@ -340,7 +340,7 @@
     <paper id="2">
       <title>Extraction Phenomena in Synchronous <fixed-case>TAG</fixed-case> Syntax and Semantics</title>
       <author><first>Rebecca</first><last>Nesson</last></author>
-      <author><first>Stuart M.</first><last>Shieber</last></author>
+      <author id="stuart-m-shieber"><first>Stuart M.</first><last>Shieber</last></author>
       <pages>9–16</pages>
       <url hash="078f96aa">W07-0402</url>
       <bibkey>nesson-shieber-2007-extraction</bibkey>
@@ -379,14 +379,14 @@
     <paper id="7">
       <title>Discriminative word alignment by learning the alignment structure and syntactic divergence between a language pair</title>
       <author><first>Sriram</first><last>Venkatapathy</last></author>
-      <author><first>Aravind</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
       <pages>49–56</pages>
       <url hash="d010203b">W07-0407</url>
       <bibkey>venkatapathy-joshi-2007-discriminative</bibkey>
     </paper>
     <paper id="8">
       <title>Generation in Machine Translation from Deep Syntactic Trees</title>
-      <author><first>Keith</first><last>Hall</last></author>
+      <author id="keith-hall"><first>Keith</first><last>Hall</last></author>
       <author><first>Petr</first><last>Němec</last></author>
       <pages>57–64</pages>
       <url hash="5dc5d3c0">W07-0408</url>
@@ -394,9 +394,9 @@
     </paper>
     <paper id="9">
       <title>Combining Morphosyntactic Enriched Representation with n-best Reranking in Statistical Translation</title>
-      <author><first>Hélène</first><last>Bonneau-Maynard</last></author>
+      <author id="helene-bonneau-maynard"><first>Hélène</first><last>Bonneau-Maynard</last></author>
       <author><first>Alexandre</first><last>Allauzen</last></author>
-      <author><first>Daniel</first><last>Déchelotte</last></author>
+      <author id="daniel-dechelotte"><first>Daniel</first><last>Déchelotte</last></author>
       <author><first>Holger</first><last>Schwenk</last></author>
       <pages>65–71</pages>
       <url hash="74e6d0fa">W07-0409</url>
@@ -404,8 +404,8 @@
     </paper>
     <paper id="10">
       <title>A Walk on the Other Side: Using <fixed-case>SMT</fixed-case> Components in a Transfer-Based Translation System</title>
-      <author><first>Ariadna</first><last>Font Llitjós</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="ariadna-font-llitjos"><first>Ariadna</first><last>Font Llitjós</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>72–79</pages>
       <url hash="0a4c2b73">W07-0410</url>
       <bibkey>font-llitjos-vogel-2007-walk</bibkey>
@@ -413,7 +413,7 @@
     <paper id="11">
       <title>Dependency-Based Automatic Evaluation for Machine Translation</title>
       <author><first>Karolina</first><last>Owczarzak</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <pages>80–87</pages>
       <url hash="8d311c1e">W07-0411</url>
@@ -421,7 +421,7 @@
     </paper>
     <paper id="12">
       <title>Probabilistic Synchronous <fixed-case>T</fixed-case>ree-<fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars for Machine Translation: The Argument from Bilingual Dictionaries</title>
-      <author><first>Stuart M.</first><last>Shieber</last></author>
+      <author id="stuart-m-shieber"><first>Stuart M.</first><last>Shieber</last></author>
       <pages>88–95</pages>
       <url hash="412e6a89">W07-0412</url>
       <bibkey>shieber-2007-probabilistic</bibkey>
@@ -429,7 +429,7 @@
     <paper id="13">
       <title>Three models for discriminative machine translation using Global Lexical Selection and Sentence Reconstruction</title>
       <author><first>Sriram</first><last>Venkatapathy</last></author>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
       <pages>96–102</pages>
       <url hash="9aa03b20">W07-0413</url>
       <bibkey>venkatapathy-bangalore-2007-three</bibkey>
@@ -437,8 +437,8 @@
     <paper id="14">
       <title>Comparing Reordering Constraints for <fixed-case>SMT</fixed-case> Using Efficient <fixed-case>BLEU</fixed-case> Oracle Computation</title>
       <author><first>Markus</first><last>Dreyer</last></author>
-      <author><first>Keith</first><last>Hall</last></author>
-      <author><first>Sanjeev</first><last>Khudanpur</last></author>
+      <author id="keith-hall"><first>Keith</first><last>Hall</last></author>
+      <author id="sanjeev-khudanpur"><first>Sanjeev</first><last>Khudanpur</last></author>
       <pages>103–110</pages>
       <url hash="53edfe22">W07-0414</url>
       <bibkey>dreyer-etal-2007-comparing</bibkey>
@@ -491,7 +491,7 @@
       <title>High-accuracy Annotation and Parsing of <fixed-case>CHILDES</fixed-case> Transcripts</title>
       <author><first>Kenji</first><last>Sagae</last></author>
       <author><first>Eric</first><last>Davis</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <author><first>Brian</first><last>MacWhinney</last></author>
       <author><first>Shuly</first><last>Wintner</last></author>
       <pages>25–32</pages>
@@ -516,7 +516,7 @@
     </paper>
     <paper id="7">
       <title><fixed-case>ISA</fixed-case> meets <fixed-case>L</fixed-case>ara: An incremental word space model for cognitively plausible simulations of semantic learning</title>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <author><first>Alessandro</first><last>Lenci</last></author>
       <author><first>Luca</first><last>Onnis</last></author>
       <pages>49–56</pages>
@@ -542,14 +542,14 @@
     <paper id="10">
       <title>The Topology of Synonymy and Homonymy Networks</title>
       <author><first>James</first><last>Gorman</last></author>
-      <author><first>James</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James</first><last>Curran</last></author>
       <pages>73–80</pages>
       <url hash="38f07788">W07-0610</url>
       <bibkey>gorman-curran-2007-topology</bibkey>
     </paper>
     <paper id="11">
       <title>The Benefits of Errors: Learning an <fixed-case>OT</fixed-case> Grammar with a Structured Candidate Set</title>
-      <author><first>Tamás</first><last>Biró</last></author>
+      <author id="tamas-biro"><first>Tamás</first><last>Biró</last></author>
       <pages>81–88</pages>
       <url hash="429f26a8">W07-0611</url>
       <bibkey>biro-2007-benefits</bibkey>
@@ -557,7 +557,7 @@
     <paper id="12">
       <title>Learning to interpret novel noun-noun compounds: evidence from a category learning experiment</title>
       <author><first>Barry</first><last>Devereux</last></author>
-      <author><first>Fintan</first><last>Costello</last></author>
+      <author id="fintan-j-costello"><first>Fintan</first><last>Costello</last></author>
       <pages>89–96</pages>
       <url hash="3f32db88">W07-0612</url>
       <bibkey>devereux-costello-2007-learning</bibkey>
@@ -569,7 +569,7 @@
       <url hash="05980242">W07-07</url>
       <editor><first>Chris</first><last>Callison-Burch</last></editor>
       <editor><first>Philipp</first><last>Koehn</last></editor>
-      <editor><first>Cameron Shaw</first><last>Fordyce</last></editor>
+      <editor id="cameron-shaw-fordyce"><first>Cameron Shaw</first><last>Fordyce</last></editor>
       <editor><first>Christof</first><last>Monz</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Prague, Czech Republic</address>
@@ -612,7 +612,7 @@
     <paper id="4">
       <title>Exploring Different Representational Units in <fixed-case>E</fixed-case>nglish-to-<fixed-case>T</fixed-case>urkish Statistical Machine Translation</title>
       <author><first>Kemal</first><last>Oflazer</last></author>
-      <author><first>İlknur</first><last>Durgar El-Kahlout</last></author>
+      <author id="ilknur-durgar-el-kahlout"><first>İlknur</first><last>Durgar El-Kahlout</last></author>
       <pages>25–32</pages>
       <url hash="f084e4d3">W07-0704</url>
       <bibkey>oflazer-durgar-el-kahlout-2007-exploring</bibkey>
@@ -621,14 +621,14 @@
       <title>Can We Translate Letters?</title>
       <author><first>David</first><last>Vilar</last></author>
       <author><first>Jan-Thorsten</first><last>Peter</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>33–39</pages>
       <url hash="13db2eec">W07-0705</url>
       <bibkey>vilar-etal-2007-translate</bibkey>
     </paper>
     <paper id="6">
       <title>A Dependency Treelet String Correspondence Model for Statistical Machine Translation</title>
-      <author><first>Deyi</first><last>Xiong</last></author>
+      <author id="deyi-xiong"><first>Deyi</first><last>Xiong</last></author>
       <author><first>Qun</first><last>Liu</last></author>
       <author><first>Shouxun</first><last>Lin</last></author>
       <pages>40–47</pages>
@@ -637,8 +637,8 @@
     </paper>
     <paper id="7">
       <title>Word Error Rates: Decomposition over <fixed-case>POS</fixed-case> classes and Applications for Error Analysis</title>
-      <author><first>Maja</first><last>Popović</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>48–55</pages>
       <url hash="5cce8265">W07-0707</url>
       <bibkey>popovic-ney-2007-word</bibkey>
@@ -647,8 +647,8 @@
       <title>Speech-Input Multi-Target Machine Translation</title>
       <author><first>Alicia</first><last>Pérez</last></author>
       <author><first>M. Teresa</first><last>González</last></author>
-      <author><first>M. Inés</first><last>Torres</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="m-ines-torres"><first>M. Inés</first><last>Torres</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <pages>56–63</pages>
       <url hash="495f374f">W07-0708</url>
       <bibkey>perez-etal-2007-speech</bibkey>
@@ -656,7 +656,7 @@
     <paper id="9">
       <title>Meta-Structure Transformation Model for Statistical Machine Translation</title>
       <author><first>Jiadong</first><last>Sun</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <author><first>Huashen</first><last>Liang</last></author>
       <pages>64–71</pages>
       <url hash="9def833f">W07-0709</url>
@@ -690,8 +690,8 @@
       <title>Human Evaluation of Machine Translation Through Binary System Comparisons</title>
       <author><first>David</first><last>Vilar</last></author>
       <author><first>Gregor</first><last>Leusch</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
-      <author><first>Rafael E.</first><last>Banchs</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
+      <author id="rafael-e-banchs"><first>Rafael E.</first><last>Banchs</last></author>
       <pages>96–103</pages>
       <url hash="45918198">W07-0713</url>
       <bibkey>vilar-etal-2007-human</bibkey>
@@ -699,7 +699,7 @@
     <paper id="14">
       <title>Labelled Dependencies in Machine Translation Evaluation</title>
       <author><first>Karolina</first><last>Owczarzak</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <pages>104–111</pages>
       <url hash="ba2a3045">W07-0714</url>
@@ -707,7 +707,7 @@
     </paper>
     <paper id="15">
       <title>An Iteratively-Trained Segmentation-Free Phrase Translation Model for Statistical Machine Translation</title>
-      <author><first>Robert</first><last>Moore</last></author>
+      <author id="robert-c-moore"><first>Robert</first><last>Moore</last></author>
       <author><first>Chris</first><last>Quirk</last></author>
       <pages>112–119</pages>
       <url hash="95167c8b">W07-0715</url>
@@ -716,9 +716,9 @@
     <paper id="16">
       <title>Using Paraphrases for Parameter Tuning in Statistical Machine Translation</title>
       <author><first>Nitin</first><last>Madnani</last></author>
-      <author><first>Necip</first><last>Fazil Ayan</last></author>
+      <author id="necip-fazil-ayan"><first>Necip</first><last>Fazil Ayan</last></author>
       <author><first>Philip</first><last>Resnik</last></author>
-      <author><first>Bonnie</first><last>Dorr</last></author>
+      <author id="bonnie-dorr"><first>Bonnie</first><last>Dorr</last></author>
       <pages>120–127</pages>
       <url hash="9e5080d1">W07-0716</url>
       <bibkey>madnani-etal-2007-using</bibkey>
@@ -744,29 +744,29 @@
     </paper>
     <paper id="19">
       <title>Context-aware Discriminative Phrase Selection for Statistical Machine Translation</title>
-      <author><first>Jesús</first><last>Giménez</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="jesus-gimenez"><first>Jesús</first><last>Giménez</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <pages>159–166</pages>
       <url hash="7d75bc69">W07-0719</url>
       <bibkey>gimenez-marquez-2007-context</bibkey>
     </paper>
     <paper id="20">
       <title>Ngram-Based Statistical Machine Translation Enhanced with Multiple Weighted Reordering Hypotheses</title>
-      <author><first>Marta</first><last>R. Costa-jussà</last></author>
-      <author><first>Josep M.</first><last>Crego</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta</first><last>R. Costa-jussà</last></author>
+      <author id="josep-m-crego"><first>Josep M.</first><last>Crego</last></author>
       <author><first>Patrik</first><last>Lambert</last></author>
       <author><first>Maxim</first><last>Khalilov</last></author>
-      <author><first>José A.</first><last>R. Fonollosa</last></author>
-      <author><first>José B.</first><last>Mariño</last></author>
-      <author><first>Rafael E.</first><last>Banchs</last></author>
+      <author id="jose-a-r-fonollosa"><first>José A.</first><last>R. Fonollosa</last></author>
+      <author id="jose-b-marino"><first>José B.</first><last>Mariño</last></author>
+      <author id="rafael-e-banchs"><first>Rafael E.</first><last>Banchs</last></author>
       <pages>167–170</pages>
       <url hash="20852ff7">W07-0720</url>
       <bibkey>r-costa-jussa-etal-2007-ngram</bibkey>
     </paper>
     <paper id="21">
       <title>Analysis of Statistical and Morphological Classes to Generate Weigthed Reordering Hypotheses on a Statistical Machine Translation System</title>
-      <author><first>Marta</first><last>R. Costa-jussà</last></author>
-      <author><first>José A.</first><last>R. Fonollosa</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta</first><last>R. Costa-jussà</last></author>
+      <author id="jose-a-r-fonollosa"><first>José A.</first><last>R. Fonollosa</last></author>
       <pages>171–176</pages>
       <url hash="0bca02fd">W07-0721</url>
       <bibkey>r-costa-jussa-r-fonollosa-2007-analysis</bibkey>
@@ -774,7 +774,7 @@
     <paper id="22">
       <title>Domain Adaptation in Statistical Machine Translation with Mixture Modelling</title>
       <author><first>Jorge</first><last>Civera</last></author>
-      <author><first>Alfons</first><last>Juan</last></author>
+      <author id="alfons-juan"><first>Alfons</first><last>Juan</last></author>
       <pages>177–180</pages>
       <url hash="9d0bea98">W07-0722</url>
       <bibkey>civera-juan-2007-domain</bibkey>
@@ -808,7 +808,7 @@
     <paper id="26">
       <title>Multi-Engine Machine Translation with an Open-Source <fixed-case>SMT</fixed-case> Decoder</title>
       <author><first>Yu</first><last>Chen</last></author>
-      <author><first>Andreas</first><last>Eisele</last></author>
+      <author id="andreas-eisele"><first>Andreas</first><last>Eisele</last></author>
       <author><first>Christian</first><last>Federmann</last></author>
       <author><first>Eva</first><last>Hasler</last></author>
       <author><first>Michael</first><last>Jellinghaus</last></author>
@@ -822,8 +822,8 @@
       <author><first>Matthias</first><last>Paulik</last></author>
       <author><first>Kay</first><last>Rottmann</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
-      <author><first>Silja</first><last>Hildebrand</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="almut-silja-hildebrand"><first>Silja</first><last>Hildebrand</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>197–202</pages>
       <url hash="4cb2c733">W07-0727</url>
       <bibkey>paulik-etal-2007-isl</bibkey>
@@ -840,15 +840,15 @@
     </paper>
     <paper id="29">
       <title>The “Noisier Channel”: Translation from Morphologically Complex Languages</title>
-      <author><first>Christopher J.</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Christopher J.</first><last>Dyer</last></author>
       <pages>207–211</pages>
       <url hash="8c6c7153">W07-0729</url>
       <bibkey>dyer-2007-noisier</bibkey>
     </paper>
     <paper id="30">
       <title><fixed-case>UCB</fixed-case> System Description for the <fixed-case>WMT</fixed-case> 2007 Shared Task</title>
-      <author><first>Preslav</first><last>Nakov</last></author>
-      <author><first>Marti</first><last>Hearst</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
+      <author id="marti-a-hearst"><first>Marti</first><last>Hearst</last></author>
       <pages>212–215</pages>
       <url hash="fb0d051f">W07-0730</url>
       <bibkey>nakov-hearst-2007-ucb-system</bibkey>
@@ -858,14 +858,14 @@
       <author><first>Andreas</first><last>Zollmann</last></author>
       <author><first>Ashish</first><last>Venugopal</last></author>
       <author><first>Matthias</first><last>Paulik</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>216–219</pages>
       <url hash="aaf70bc3">W07-0731</url>
       <bibkey>zollmann-etal-2007-syntax</bibkey>
     </paper>
     <paper id="32">
       <title>Statistical Post-Editing on <fixed-case>SYSTRAN</fixed-case>‘s Rule-Based Translation System</title>
-      <author><first>Loïc</first><last>Dugast</last></author>
+      <author id="loic-dugast"><first>Loïc</first><last>Dugast</last></author>
       <author><first>Jean</first><last>Senellart</last></author>
       <author><first>Philipp</first><last>Koehn</last></author>
       <pages>220–223</pages>
@@ -882,7 +882,7 @@
     </paper>
     <paper id="34">
       <title><fixed-case>METEOR</fixed-case>: An Automatic Metric for <fixed-case>MT</fixed-case> Evaluation with High Levels of Correlation with Human Judgments</title>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <author><first>Abhaya</first><last>Agarwal</last></author>
       <pages>228–231</pages>
       <url hash="e9354851">W07-0734</url>
@@ -890,7 +890,7 @@
     </paper>
     <paper id="35">
       <title><fixed-case>E</fixed-case>nglish-to-<fixed-case>C</fixed-case>zech Factored Machine Translation</title>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>232–239</pages>
       <url hash="6c1b1b4b">W07-0735</url>
       <bibkey>bojar-2007-english</bibkey>
@@ -899,7 +899,7 @@
       <title>Sentence Level Machine Translation Evaluation as a Ranking</title>
       <author><first>Yang</first><last>Ye</last></author>
       <author><first>Ming</first><last>Zhou</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <pages>240–247</pages>
       <url hash="e03b8bd5">W07-0736</url>
       <bibkey>ye-etal-2007-sentence</bibkey>
@@ -914,8 +914,8 @@
     </paper>
     <paper id="38">
       <title>Linguistic Features for Automatic Evaluation of Heterogenous <fixed-case>MT</fixed-case> Systems</title>
-      <author><first>Jesús</first><last>Giménez</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="jesus-gimenez"><first>Jesús</first><last>Giménez</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <pages>256–264</pages>
       <url hash="50370725">W07-0738</url>
       <bibkey>gimenez-marquez-2007-linguistic</bibkey>
@@ -939,7 +939,7 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>E</fixed-case>lixir<fixed-case>FM</fixed-case> – Implementation of Functional <fixed-case>A</fixed-case>rabic Morphology</title>
-      <author><first>Otakar</first><last>Smrž</last></author>
+      <author id="otakar-smrz"><first>Otakar</first><last>Smrž</last></author>
       <pages>1–8</pages>
       <url hash="d9852ab5">W07-0801</url>
       <bibkey>smrz-2007-elixirfm</bibkey>
@@ -964,14 +964,14 @@
       <author><first>Walid</first><last>Magdy</last></author>
       <author><first>Kareem</first><last>Darwish</last></author>
       <author><first>Ossama</first><last>Emam</last></author>
-      <author><first>Hany</first><last>Hassan</last></author>
+      <author id="hany-hassan-awadalla"><first>Hany</first><last>Hassan</last></author>
       <pages>25–32</pages>
       <url hash="b551a2ca">W07-0804</url>
       <bibkey>magdy-etal-2007-arabic</bibkey>
     </paper>
     <paper id="5">
       <title>Syllable-Based Speech Recognition for <fixed-case>A</fixed-case>mharic</title>
-      <author><first>Solomon Teferra</first><last>Abate</last></author>
+      <author id="solomon-teferra-abate"><first>Solomon Teferra</first><last>Abate</last></author>
       <author><first>Wolfgang</first><last>Menzel</last></author>
       <pages>33–40</pages>
       <url hash="7465711e">W07-0805</url>
@@ -979,10 +979,10 @@
     </paper>
     <paper id="6">
       <title>Adapting a Medical speech to speech translation system (<fixed-case>M</fixed-case>ed<fixed-case>SLT</fixed-case>) to <fixed-case>A</fixed-case>rabic</title>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Sonia</first><last>Halimi</last></author>
-      <author><first>Manny</first><last>Rayner</last></author>
-      <author><first>Beth Ann</first><last>Hockey</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
+      <author id="beth-ann-hockey"><first>Beth Ann</first><last>Hockey</last></author>
       <pages>41–48</pages>
       <url hash="70202f8e">W07-0806</url>
       <bibkey>bouillon-etal-2007-adapting</bibkey>
@@ -998,7 +998,7 @@
     </paper>
     <paper id="8">
       <title>Can You Tag the Modal? You Should.</title>
-      <author><first>Yael</first><last>Netzer</last></author>
+      <author id="yael-netzer"><first>Yael</first><last>Netzer</last></author>
       <author><first>Meni</first><last>Adler</last></author>
       <author><first>David</first><last>Gabay</last></author>
       <author><first>Michael</first><last>Elhadad</last></author>
@@ -1016,14 +1016,14 @@
     <paper id="10">
       <title><fixed-case>A</fixed-case>rabic to <fixed-case>F</fixed-case>rench Sentence Alignment: Exploration of A Cross-language Information Retrieval Approach</title>
       <author><first>Nasredine</first><last>Semmar</last></author>
-      <author><first>Christian</first><last>Fluhr</last></author>
+      <author id="christian-fluhr"><first>Christian</first><last>Fluhr</last></author>
       <pages>73–80</pages>
       <url hash="c248a112">W07-0810</url>
       <bibkey>semmar-fluhr-2007-arabic</bibkey>
     </paper>
     <paper id="11">
       <title>An <fixed-case>A</fixed-case>rabic Slot Grammar Parser</title>
-      <author><first>Michael</first><last>McCord</last></author>
+      <author id="michael-c-mccord"><first>Michael</first><last>McCord</last></author>
       <author><first>Violetta</first><last>Cavalli-Sforza</last></author>
       <pages>81–88</pages>
       <url hash="c04ff43f">W07-0811</url>
@@ -1031,7 +1031,7 @@
     </paper>
     <paper id="12">
       <title>Improved <fixed-case>A</fixed-case>rabic Base Phrase Chunking with a new enriched <fixed-case>POS</fixed-case> tag set</title>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>89–96</pages>
       <url hash="e2b70cb9">W07-0812</url>
       <bibkey>diab-2007-improved</bibkey>
@@ -1039,7 +1039,7 @@
     <paper id="13">
       <title>Smoothing a Lexicon-based <fixed-case>POS</fixed-case> Tagger for <fixed-case>A</fixed-case>rabic and <fixed-case>H</fixed-case>ebrew</title>
       <author><first>Saib</first><last>Manour</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <author><first>Yoad</first><last>Winter</last></author>
       <pages>97–103</pages>
       <url hash="e01e05bf">W07-0813</url>
@@ -1059,7 +1059,7 @@
       <booktitle>Proceedings of the Workshop on Language Technology for Cultural Heritage Data (<fixed-case>L</fixed-case>a<fixed-case>T</fixed-case>e<fixed-case>CH</fixed-case> 2007).</booktitle>
       <url hash="9a6a9015">W07-09</url>
       <editor><first>Caroline</first><last>Sporleder</last></editor>
-      <editor><first>Antal</first><last>van den Bosch</last></editor>
+      <editor id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></editor>
       <editor><first>Claire</first><last>Grover</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Prague, Czech Republic</address>
@@ -1082,9 +1082,9 @@
     </paper>
     <paper id="2">
       <title><fixed-case>V</fixed-case>iterbi Based Alignment between Text Images and their Transcripts</title>
-      <author><first>Alejandro H.</first><last>Toselli</last></author>
+      <author id="alejandro-h-toselli"><first>Alejandro H.</first><last>Toselli</last></author>
       <author><first>Verónica</first><last>Romero</last></author>
-      <author><first>Enrique</first><last>Vidal</last></author>
+      <author id="enrique-vidal"><first>Enrique</first><last>Vidal</last></author>
       <pages>9–16</pages>
       <url hash="cda30677">W07-0902</url>
       <bibkey>toselli-etal-2007-viterbi</bibkey>
@@ -1099,7 +1099,7 @@
     <paper id="4">
       <title>Concept Disambiguation for Improved Subject Access Using Multiple Knowledge Sources</title>
       <author><first>Tandeep</first><last>Sidhu</last></author>
-      <author><first>Judith</first><last>Klavans</last></author>
+      <author id="judith-l-klavans"><first>Judith</first><last>Klavans</last></author>
       <author><first>Jimmy</first><last>Lin</last></author>
       <pages>25–32</pages>
       <url hash="ec4e55cb">W07-0904</url>
@@ -1108,7 +1108,7 @@
     <paper id="5">
       <title>The <fixed-case>L</fixed-case>atin Dependency Treebank in a Cultural Heritage Digital Library</title>
       <author><first>David</first><last>Bamman</last></author>
-      <author><first>Gregory</first><last>Crane</last></author>
+      <author id="gregory-crane"><first>Gregory</first><last>Crane</last></author>
       <pages>33–40</pages>
       <url hash="1067bddd">W07-0905</url>
       <bibkey>bamman-crane-2007-latin</bibkey>
@@ -1123,7 +1123,7 @@
     <paper id="7">
       <title>Dynamic Path Prediction and Recommendation in a Museum Environment</title>
       <author><first>Karl</first><last>Grieser</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Steven</first><last>Bird</last></author>
       <pages>49–56</pages>
       <url hash="a7fa325b">W07-0907</url>
@@ -1134,7 +1134,7 @@
       <author><first>Véronique</first><last>Malaisé</last></author>
       <author><first>Antoine</first><last>Isaac</last></author>
       <author><first>Luit</first><last>Gazendam</last></author>
-      <author><first>Hennie</first><last>Brugman</last></author>
+      <author id="hennie-brugman"><first>Hennie</first><last>Brugman</last></author>
       <pages>57–64</pages>
       <url hash="a842b2da">W07-0908</url>
       <bibkey>malaise-etal-2007-anchoring</bibkey>
@@ -1143,7 +1143,7 @@
       <title>Cross Lingual and Semantic Retrieval for Cultural Heritage Appreciation</title>
       <author><first>Idan</first><last>Szpektor</last></author>
       <author><first>Ido</first><last>Dagan</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <author><first>Danny</first><last>Shacham</last></author>
       <author><first>Shuly</first><last>Wintner</last></author>
       <pages>65–72</pages>
@@ -1162,8 +1162,8 @@
     </paper>
     <paper id="11">
       <title>Multilingual Search for Cultural Heritage Archives via Combining Multiple Translation Resources</title>
-      <author><first>Gareth J. F.</first><last>Jones</last></author>
-      <author><first>Ying</first><last>Zhang</last></author>
+      <author id="gareth-j-f-jones"><first>Gareth J. F.</first><last>Jones</last></author>
+      <author id="ying-zhang"><first>Ying</first><last>Zhang</last></author>
       <author><first>Eamonn</first><last>Newman</last></author>
       <author><first>Fabio</first><last>Fantino</last></author>
       <author><first>Franca</first><last>Debole</last></author>
@@ -1173,7 +1173,7 @@
     </paper>
     <paper id="12">
       <title>Invited Talk: Lessons from the <fixed-case>MALACH</fixed-case> Project: Applying New Technologies to Improve Intellectual Access to Large Oral History Collections</title>
-      <author><first>Douglas W.</first><last>Oard</last></author>
+      <author id="douglas-w-oard"><first>Douglas W.</first><last>Oard</last></author>
       <pages>89</pages>
       <url hash="b3f49c6d">W07-0912</url>
       <bibkey>oard-2007-invited</bibkey>
@@ -1183,11 +1183,11 @@
     <meta>
       <booktitle>Biological, translational, and clinical language processing</booktitle>
       <url hash="225dfc47">W07-10</url>
-      <editor><first>K. Bretonnel</first><last>Cohen</last></editor>
+      <editor id="k-bretonnel-cohen"><first>K. Bretonnel</first><last>Cohen</last></editor>
       <editor><first>Dina</first><last>Demner-Fushman</last></editor>
-      <editor><first>Carol</first><last>Friedman</last></editor>
-      <editor><first>Lynette</first><last>Hirschman</last></editor>
-      <editor><first>John</first><last>Pestian</last></editor>
+      <editor id="carol-friedman"><first>Carol</first><last>Friedman</last></editor>
+      <editor id="lynette-hirschman"><first>Lynette</first><last>Hirschman</last></editor>
+      <editor id="john-pestian"><first>John</first><last>Pestian</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Prague, Czech Republic</address>
       <month>June</month>
@@ -1209,9 +1209,9 @@
     </paper>
     <paper id="2">
       <title>Determining the Syntactic Structure of Medical Terms in Clinical Notes</title>
-      <author><first>Bridget</first><last>McInnes</last></author>
+      <author id="bridget-mcinnes"><first>Bridget</first><last>McInnes</last></author>
       <author><first>Ted</first><last>Pedersen</last></author>
-      <author><first>Serguei</first><last>Pakhomov</last></author>
+      <author id="serguei-pakhomov"><first>Serguei</first><last>Pakhomov</last></author>
       <pages>9–16</pages>
       <url hash="62acc930">W07-1002</url>
       <bibkey>mcinnes-etal-2007-determining</bibkey>
@@ -1241,7 +1241,7 @@
       <title>An Unsupervised Method for Extracting Domain-specific Affixes in Biological Literature</title>
       <author><first>Haibin</first><last>Liu</last></author>
       <author><first>Christian</first><last>Blouin</last></author>
-      <author><first>Vlado</first><last>Kešelj</last></author>
+      <author id="vlado-keselj"><first>Vlado</first><last>Kešelj</last></author>
       <pages>33–40</pages>
       <url hash="7f6fe772">W07-1005</url>
       <bibkey>liu-etal-2007-unsupervised</bibkey>
@@ -1257,7 +1257,7 @@
     </paper>
     <paper id="7">
       <title>Mining a Lexicon of Technical Terms and Lay Equivalents</title>
-      <author><first>Noemie</first><last>Elhadad</last></author>
+      <author id="noemie-elhadad"><first>Noemie</first><last>Elhadad</last></author>
       <author><first>Komal</first><last>Sutaria</last></author>
       <pages>49–56</pages>
       <url hash="4abc4c27">W07-1007</url>
@@ -1265,8 +1265,8 @@
     </paper>
     <paper id="8">
       <title>Annotation of Chemical Named Entities</title>
-      <author><first>Peter</first><last>Corbett</last></author>
-      <author><first>Colin</first><last>Batchelor</last></author>
+      <author id="peter-corbett"><first>Peter</first><last>Corbett</last></author>
+      <author id="colin-batchelor"><first>Colin</first><last>Batchelor</last></author>
       <author><first>Simone</first><last>Teufel</last></author>
       <pages>57–64</pages>
       <url hash="f9028e33">W07-1008</url>
@@ -1274,7 +1274,7 @@
     </paper>
     <paper id="9">
       <title>Recognising Nested Named Entities in Biomedical Text</title>
-      <author><first>Beatrice</first><last>Alex</last></author>
+      <author id="beatrice-alex"><first>Beatrice</first><last>Alex</last></author>
       <author><first>Barry</first><last>Haddow</last></author>
       <author><first>Claire</first><last>Grover</last></author>
       <pages>65–72</pages>
@@ -1283,7 +1283,7 @@
     </paper>
     <paper id="10">
       <title>Exploring the Efficacy of Caption Search for Bioscience Journal Search Interfaces</title>
-      <author><first>Marti</first><last>Hearst</last></author>
+      <author id="marti-a-hearst"><first>Marti</first><last>Hearst</last></author>
       <author><first>Anna</first><last>Divoli</last></author>
       <author><first>Ye</first><last>Jerry</last></author>
       <author><first>Michael</first><last>Wooldridge</last></author>
@@ -1293,7 +1293,7 @@
     </paper>
     <paper id="11">
       <title><fixed-case>C</fixed-case>on<fixed-case>T</fixed-case>ext: An Algorithm for Identifying Contextual Features from Clinical Text</title>
-      <author><first>Wendy</first><last>Chapman</last></author>
+      <author id="wendy-chapman"><first>Wendy</first><last>Chapman</last></author>
       <author><first>John</first><last>Dowling</last></author>
       <author><first>David</first><last>Chu</last></author>
       <pages>81–88</pages>
@@ -1304,8 +1304,8 @@
       <title><fixed-case>B</fixed-case>io<fixed-case>N</fixed-case>oculars: Extracting Protein-Protein Interactions from Biomedical Text</title>
       <author><first>Amgad</first><last>Madkour</last></author>
       <author><first>Kareem</first><last>Darwish</last></author>
-      <author><first>Hany</first><last>Hassan</last></author>
-      <author><first>Ahmed</first><last>Hassan</last></author>
+      <author id="hany-hassan-awadalla"><first>Hany</first><last>Hassan</last></author>
+      <author id="ahmed-hassan"><first>Ahmed</first><last>Hassan</last></author>
       <author><first>Ossama</first><last>Emam</last></author>
       <pages>89–96</pages>
       <url hash="a04b6ad7">W07-1012</url>
@@ -1326,13 +1326,13 @@
     </paper>
     <paper id="14">
       <title>From indexing the biomedical literature to coding clinical text: experience with <fixed-case>MTI</fixed-case> and machine learning approaches</title>
-      <author><first>Alan R.</first><last>Aronson</last></author>
+      <author id="alan-r-aronson"><first>Alan R.</first><last>Aronson</last></author>
       <author><first>Olivier</first><last>Bodenreider</last></author>
       <author><first>Dina</first><last>Demner-Fushman</last></author>
       <author><first>Kin Wah</first><last>Fung</last></author>
-      <author><first>Vivian K.</first><last>Lee</last></author>
-      <author><first>James G.</first><last>Mork</last></author>
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="vivian-k-lee"><first>Vivian K.</first><last>Lee</last></author>
+      <author id="james-g-mork"><first>James G.</first><last>Mork</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <author><first>Lee</first><last>Peters</last></author>
       <author><first>Willie J.</first><last>Rogers</last></author>
       <pages>105–112</pages>
@@ -1349,7 +1349,7 @@
     </paper>
     <paper id="16">
       <title>A Study of Structured Clinical Abstracts and the Semantic Classification of Sentences</title>
-      <author><first>Grace</first><last>Chung</last></author>
+      <author id="grace-chung"><first>Grace</first><last>Chung</last></author>
       <author><first>Enrico</first><last>Coiera</last></author>
       <pages>121–128</pages>
       <url hash="8eff1592">W07-1016</url>
@@ -1360,7 +1360,7 @@
       <author><first>Koby</first><last>Crammer</last></author>
       <author><first>Mark</first><last>Dredze</last></author>
       <author><first>Kuzman</first><last>Ganchev</last></author>
-      <author><first>Partha</first><last>Pratim Talukdar</last></author>
+      <author id="partha-talukdar"><first>Partha</first><last>Pratim Talukdar</last></author>
       <author><first>Steven</first><last>Carroll</last></author>
       <pages>129–136</pages>
       <url hash="05038c6a">W07-1017</url>
@@ -1370,9 +1370,9 @@
       <title>Interpreting comparative constructions in biomedical text</title>
       <author><first>Marcelo</first><last>Fiszman</last></author>
       <author><first>Dina</first><last>Demner-Fushman</last></author>
-      <author><first>Francois M.</first><last>Lang</last></author>
+      <author id="francois-michel-lang"><first>Francois M.</first><last>Lang</last></author>
       <author><first>Philip</first><last>Goetz</last></author>
-      <author><first>Thomas C.</first><last>Rindflesch</last></author>
+      <author id="thomas-c-rindflesch"><first>Thomas C.</first><last>Rindflesch</last></author>
       <pages>137–144</pages>
       <url hash="c70228c3">W07-1018</url>
       <bibkey>fiszman-etal-2007-interpreting</bibkey>
@@ -1410,16 +1410,16 @@
     <paper id="23">
       <title>Challenges for extracting biomedical knowledge from full text</title>
       <author><first>Tara</first><last>McIntosh</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <pages>171–178</pages>
       <url hash="5ec5c25d">W07-1023</url>
       <bibkey>mcintosh-curran-2007-challenges</bibkey>
     </paper>
     <paper id="24">
       <title>Adaptation of <fixed-case>POS</fixed-case> Tagging for Multiple <fixed-case>B</fixed-case>io<fixed-case>M</fixed-case>edical Domains</title>
-      <author><first>John E.</first><last>Miller</last></author>
+      <author id="john-miller"><first>John E.</first><last>Miller</last></author>
       <author><first>Manabu</first><last>Torii</last></author>
-      <author><first>K.</first><last>Vijay-Shanker</last></author>
+      <author id="k-vijay-shanker"><first>K.</first><last>Vijay-Shanker</last></author>
       <pages>179–180</pages>
       <url hash="38be91a0">W07-1024</url>
       <bibkey>miller-etal-2007-adaptation</bibkey>
@@ -1427,23 +1427,23 @@
     <paper id="25">
       <title>Information Extraction from Patients’ Free Form Documentation</title>
       <author><first>Agnieszka</first><last>Mykowiecka</last></author>
-      <author><first>Malgorzata</first><last>Marciniak</last></author>
+      <author id="malgorzata-marciniak"><first>Malgorzata</first><last>Marciniak</last></author>
       <pages>181–182</pages>
       <url hash="0e1241cd">W07-1025</url>
       <bibkey>mykowiecka-marciniak-2007-information</bibkey>
     </paper>
     <paper id="26">
       <title>Automatic Indexing of Specialized Documents: Using Generic vs. Domain-Specific Document Representations</title>
-      <author><first>Aurélie</first><last>Névéol</last></author>
-      <author><first>James G.</first><last>Mork</last></author>
-      <author><first>Alan R.</first><last>Aronson</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
+      <author id="james-g-mork"><first>James G.</first><last>Mork</last></author>
+      <author id="alan-r-aronson"><first>Alan R.</first><last>Aronson</last></author>
       <pages>183–190</pages>
       <url hash="7399c2aa">W07-1026</url>
       <bibkey>neveol-etal-2007-automatic</bibkey>
     </paper>
     <paper id="27">
       <title>Developing Feature Types for Classifying Clinical Notes</title>
-      <author><first>Jon</first><last>Patrick</last></author>
+      <author id="jon-patrick"><first>Jon</first><last>Patrick</last></author>
       <author><first>Yitao</first><last>Zhang</last></author>
       <author><first>Yefeng</first><last>Wang</last></author>
       <pages>191–192</pages>
@@ -1460,8 +1460,8 @@
     </paper>
     <paper id="29">
       <title>Discovering contradicting protein-protein interactions in text</title>
-      <author><first>Olivia</first><last>Sanchez</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="olivia-sanchez-graillet"><first>Olivia</first><last>Sanchez</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>195–196</pages>
       <url hash="cfa563c0">W07-1029</url>
       <bibkey>sanchez-poesio-2007-discovering</bibkey>
@@ -1469,7 +1469,7 @@
     <paper id="30">
       <title>Marking time in developmental biology</title>
       <author><first>Gail</first><last>Sinclair</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <pages>197–198</pages>
       <url hash="2a2ec12b">W07-1030</url>
       <bibkey>sinclair-webber-2007-marking</bibkey>
@@ -1491,7 +1491,7 @@
     <paper id="33">
       <title>Reranking for Biomedical Named-Entity Recognition</title>
       <author><first>Kazuhiro</first><last>Yoshida</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>209–216</pages>
       <url hash="d346a0cf">W07-1033</url>
       <bibkey>yoshida-tsujii-2007-reranking</bibkey>
@@ -1501,7 +1501,7 @@
     <meta>
       <booktitle>Proceedings of the Workshop on A Broader Perspective on Multiword Expressions</booktitle>
       <url hash="ffe9b371">W07-11</url>
-      <editor><first>Nicole</first><last>Gregoire</last></editor>
+      <editor id="nicole-gregoire"><first>Nicole</first><last>Gregoire</last></editor>
       <editor><first>Stefan</first><last>Evert</last></editor>
       <editor><first>Su Nam</first><last>Kim</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -1538,15 +1538,15 @@
     </paper>
     <paper id="4">
       <title>Semantics-based Multiword Expression Extraction</title>
-      <author><first>Tim</first><last>Van de Cruys</last></author>
-      <author><first>Begoña</first><last>Villada Moirón</last></author>
+      <author id="tim-van-de-cruys"><first>Tim</first><last>Van de Cruys</last></author>
+      <author id="begona-villada-moiron"><first>Begoña</first><last>Villada Moirón</last></author>
       <pages>25–32</pages>
       <url hash="c84a15a4">W07-1104</url>
       <bibkey>van-de-cruys-villada-moiron-2007-semantics</bibkey>
     </paper>
     <paper id="5">
       <title><fixed-case>S</fixed-case>panish Adverbial Frozen Expressions</title>
-      <author><first>Dolors</first><last>Català</last></author>
+      <author id="dolors-catala"><first>Dolors</first><last>Català</last></author>
       <author><first>Jorge</first><last>Baptista</last></author>
       <pages>33–40</pages>
       <url hash="79628c4b">W07-1105</url>
@@ -1583,7 +1583,7 @@
       <author><first>Takao</first><last>Shime</last></author>
       <author><first>Masatoshi</first><last>Tsuchiya</last></author>
       <author><first>Suguru</first><last>Matsuyoshi</last></author>
-      <author><first>Satoshi</first><last>Sato</last></author>
+      <author id="satoshi-sato"><first>Satoshi</first><last>Sato</last></author>
       <pages>65–72</pages>
       <url hash="7fa52d89">W07-1109</url>
       <bibkey>utsuro-etal-2007-learning</bibkey>
@@ -1602,11 +1602,11 @@
     <meta>
       <booktitle><fixed-case>ACL</fixed-case> 2007 Workshop on Deep Linguistic Processing</booktitle>
       <url hash="d4368edc">W07-12</url>
-      <editor><first>Timothy</first><last>Baldwin</last></editor>
+      <editor id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></editor>
       <editor><first>Mark</first><last>Dras</last></editor>
       <editor><first>Julia</first><last>Hockenmaier</last></editor>
-      <editor><first>Tracy Holloway</first><last>King</last></editor>
-      <editor><first>Gertjan</first><last>van Noord</last></editor>
+      <editor id="tracy-holloway-king"><first>Tracy Holloway</first><last>King</last></editor>
+      <editor id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Prague, Czech Republic</address>
       <month>June</month>
@@ -1620,7 +1620,7 @@
     <paper id="1">
       <title>Multi-Component <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars, Dependency Graph Models, and Linguistic Analyses</title>
       <author><first>Joan</first><last>Chen-Main</last></author>
-      <author><first>Aravind</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
       <pages>1–8</pages>
       <url hash="5a583196">W07-1201</url>
       <bibkey>chen-main-joshi-2007-multi</bibkey>
@@ -1628,7 +1628,7 @@
     <paper id="2">
       <title>Perceptron Training for a Wide-Coverage Lexicalized-Grammar Parser</title>
       <author><first>Stephen</first><last>Clark</last></author>
-      <author><first>James</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James</first><last>Curran</last></author>
       <pages>9–16</pages>
       <url hash="d58b3537">W07-1202</url>
       <bibkey>clark-curran-2007-perceptron</bibkey>
@@ -1660,25 +1660,25 @@
     </paper>
     <paper id="6">
       <title>Question Answering based on Semantic Roles</title>
-      <author><first>Michael</first><last>Kaisser</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="michael-kaisser"><first>Michael</first><last>Kaisser</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <pages>41–48</pages>
       <url hash="9f712601">W07-1206</url>
       <bibkey>kaisser-webber-2007-question</bibkey>
     </paper>
     <paper id="7">
       <title>Deep Linguistic Processing for Spoken Dialogue Systems</title>
-      <author><first>James</first><last>Allen</last></author>
-      <author><first>Myroslava</first><last>Dzikovska</last></author>
+      <author id="james-allen"><first>James</first><last>Allen</last></author>
+      <author id="myroslava-o-dzikovska"><first>Myroslava</first><last>Dzikovska</last></author>
       <author><first>Mehdi</first><last>Manshadi</last></author>
-      <author><first>Mary</first><last>Swift</last></author>
+      <author id="mary-swift"><first>Mary</first><last>Swift</last></author>
       <pages>49–56</pages>
       <url hash="59e12184">W07-1207</url>
       <bibkey>allen-etal-2007-deep</bibkey>
     </paper>
     <paper id="8">
       <title>Self- or Pre-Tuning? Deep Linguistic Processing of Language Variants</title>
-      <author><first>António</first><last>Branco</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
       <author><first>Francisco</first><last>Costa</last></author>
       <pages>57–64</pages>
       <url hash="040e6ef6">W07-1208</url>
@@ -1688,7 +1688,7 @@
       <title>Pruning the Search Space of a Hand-Crafted Parsing System with a Probabilistic Parser</title>
       <author><first>Aoife</first><last>Cahill</last></author>
       <author><first>Tracy Holloway</first><last>King</last></author>
-      <author><first>John T.</first><last>Maxwell III</last></author>
+      <author id="john-t-maxwell-iii"><first>John T.</first><last>Maxwell III</last></author>
       <pages>65–72</pages>
       <url hash="cfaf3b78">W07-1209</url>
       <bibkey>cahill-etal-2007-pruning</bibkey>
@@ -1702,7 +1702,7 @@
     </paper>
     <paper id="11">
       <title>A Task-based Comparison of Information Extraction Pattern Models</title>
-      <author><first>Mark</first><last>Greenwood</last></author>
+      <author id="mark-a-greenwood"><first>Mark</first><last>Greenwood</last></author>
       <author><first>Mark</first><last>Stevenson</last></author>
       <pages>81–88</pages>
       <url hash="080bd3dc">W07-1211</url>
@@ -1711,7 +1711,7 @@
     <paper id="12">
       <title>Creating a Systemic Functional Grammar Corpus from the <fixed-case>P</fixed-case>enn <fixed-case>T</fixed-case>reebank</title>
       <author><first>Matthew</first><last>Honnibal</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <pages>89–96</pages>
       <url hash="8ee486f2">W07-1212</url>
       <bibkey>honnibal-curran-2007-creating</bibkey>
@@ -1728,8 +1728,8 @@
     </paper>
     <paper id="14">
       <title>The <fixed-case>S</fixed-case>panish Resource Grammar: Pre-processing Strategy and Lexical Acquisition</title>
-      <author><first>Montserrat</first><last>Marimon</last></author>
-      <author><first>Núria</first><last>Bel</last></author>
+      <author id="montserrat-marimon"><first>Montserrat</first><last>Marimon</last></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last></author>
       <author><first>Sergio</first><last>Espeja</last></author>
       <author><first>Natalia</first><last>Seghezzi</last></author>
       <pages>105–111</pages>
@@ -1739,14 +1739,14 @@
     <paper id="15">
       <title>Extracting a Verb Lexicon for Deep Parsing from <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et</title>
       <author><first>Mark</first><last>McConville</last></author>
-      <author><first>Myroslava O.</first><last>Dzikovska</last></author>
+      <author id="myroslava-o-dzikovska"><first>Myroslava O.</first><last>Dzikovska</last></author>
       <pages>112–119</pages>
       <url hash="4b26d3a4">W07-1215</url>
       <bibkey>mcconville-dzikovska-2007-extracting</bibkey>
     </paper>
     <paper id="16">
       <title>Fips, A “Deep” Linguistic Multilingual Parser</title>
-      <author><first>Eric</first><last>Wehrli</last></author>
+      <author id="eric-wehrli"><first>Eric</first><last>Wehrli</last></author>
       <pages>120–127</pages>
       <url hash="8135667e">W07-1216</url>
       <bibkey>wehrli-2007-fips</bibkey>
@@ -1762,7 +1762,7 @@
     </paper>
     <paper id="18">
       <title>Validation and Regression Testing for a Cross-linguistic Grammar Resource</title>
-      <author><first>Emily M.</first><last>Bender</last></author>
+      <author id="emily-m-bender"><first>Emily M.</first><last>Bender</last></author>
       <author><first>Laurie</first><last>Poulson</last></author>
       <author><first>Scott</first><last>Drellishak</last></author>
       <author><first>Chris</first><last>Evans</last></author>
@@ -1792,7 +1792,7 @@
       <booktitle>Proceedings of Ninth Meeting of the <fixed-case>ACL</fixed-case> Special Interest Group in Computational Morphology and Phonology</booktitle>
       <url hash="efc4ab76">W07-13</url>
       <editor><first>John</first><last>Nerbonne</last></editor>
-      <editor><first>T. Mark</first><last>Ellison</last></editor>
+      <editor id="t-mark-ellison"><first>T. Mark</first><last>Ellison</last></editor>
       <editor><first>Grzegorz</first><last>Kondrak</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Prague, Czech Republic</address>
@@ -1846,7 +1846,7 @@
     </paper>
     <paper id="6">
       <title>Can Corpus Based Measures be Used for Comparative Study of Languages?</title>
-      <author><first>Anil Kumar</first><last>Singh</last></author>
+      <author id="anil-kumar-singh"><first>Anil Kumar</first><last>Singh</last></author>
       <author><first>Harshit</first><last>Surana</last></author>
       <pages>40–47</pages>
       <url hash="d7e12b7b">W07-1306</url>
@@ -1922,9 +1922,9 @@
     <paper id="15">
       <title><fixed-case>P</fixed-case>ara<fixed-case>M</fixed-case>or: Minimally Supervised Induction of Paradigm Structure and Morphological Analysis</title>
       <author><first>Christian</first><last>Monson</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
       <pages>117–125</pages>
       <url hash="03db837a">W07-1315</url>
       <bibkey>monson-etal-2007-paramor</bibkey>
@@ -1954,9 +1954,9 @@
       <editor><first>Satoshi</first><last>Sekine</last></editor>
       <editor><first>Kentaro</first><last>Inui</last></editor>
       <editor><first>Ido</first><last>Dagan</last></editor>
-      <editor><first>Bill</first><last>Dolan</last></editor>
+      <editor id="william-b-dolan"><first>Bill</first><last>Dolan</last></editor>
       <editor><first>Danilo</first><last>Giampiccolo</last></editor>
-      <editor><first>Bernardo</first><last>Magnini</last></editor>
+      <editor id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Prague</address>
       <month>June</month>
@@ -1989,13 +1989,13 @@
     </paper>
     <paper id="3">
       <title>Precision-focused Textual Inference</title>
-      <author><first>Daniel</first><last>Bobrow</last></author>
+      <author id="daniel-bobrow"><first>Daniel</first><last>Bobrow</last></author>
       <author><first>Dick</first><last>Crouch</last></author>
-      <author><first>Tracy Holloway</first><last>King</last></author>
+      <author id="tracy-holloway-king"><first>Tracy Holloway</first><last>King</last></author>
       <author><first>Cleo</first><last>Condoravdi</last></author>
       <author><first>Lauri</first><last>Karttunen</last></author>
       <author><first>Rowan</first><last>Nairn</last></author>
-      <author><first>Valeria</first><last>de Paiva</last></author>
+      <author id="valeria-de-paiva"><first>Valeria</first><last>de Paiva</last></author>
       <author><first>Annie</first><last>Zaenen</last></author>
       <pages>16–21</pages>
       <url hash="75fb28d4">W07-1403</url>
@@ -2004,15 +2004,15 @@
     <paper id="4">
       <title><fixed-case>COGEX</fixed-case> at <fixed-case>RTE</fixed-case> 3</title>
       <author><first>Marta</first><last>Tatu</last></author>
-      <author><first>Dan</first><last>Moldovan</last></author>
+      <author id="dan-moldovan"><first>Dan</first><last>Moldovan</last></author>
       <pages>22–27</pages>
       <url hash="1f605170">W07-1404</url>
       <bibkey>tatu-moldovan-2007-cogex</bibkey>
     </paper>
     <paper id="5">
       <title>A Corpus of Fine-Grained Entailment Relations</title>
-      <author><first>Rodney D.</first><last>Nielsen</last></author>
-      <author><first>Wayne</first><last>Ward</last></author>
+      <author id="rodney-nielsen"><first>Rodney D.</first><last>Nielsen</last></author>
+      <author id="wayne-ward"><first>Wayne</first><last>Ward</last></author>
       <pages>28–35</pages>
       <url hash="a2349697">W07-1405</url>
       <bibkey>nielsen-ward-2007-corpus</bibkey>
@@ -2020,7 +2020,7 @@
     <paper id="6">
       <title>Recognizing Textual Entailment Using Sentence Similarity based on Dependency Tree Skeletons</title>
       <author><first>Rui</first><last>Wang</last></author>
-      <author><first>Günter</first><last>Neumann</last></author>
+      <author id="gunter-neumann"><first>Günter</first><last>Neumann</last></author>
       <pages>36–41</pages>
       <url hash="aac895a6">W07-1406</url>
       <bibkey>wang-neumann-2007-recognizing</bibkey>
@@ -2035,9 +2035,9 @@
     </paper>
     <paper id="8">
       <title>Entailment and Anaphora Resolution in <fixed-case>RTE</fixed-case>3</title>
-      <author><first>Rodolfo</first><last>Delmonte</last></author>
+      <author id="rodolfo-delmonte"><first>Rodolfo</first><last>Delmonte</last></author>
       <author><first>Antonella</first><last>Bristot</last></author>
-      <author><first>Marco Aldo</first><last>Piccolino Boniforti</last></author>
+      <author id="marco-aldo-piccolino-boniforti"><first>Marco Aldo</first><last>Piccolino Boniforti</last></author>
       <author><first>Sara</first><last>Tonelli</last></author>
       <pages>48–53</pages>
       <url hash="3e8103d5">W07-1408</url>
@@ -2046,10 +2046,10 @@
     <paper id="9">
       <title>On the Role of Lexical and World Knowledge in <fixed-case>RTE</fixed-case>3</title>
       <author><first>Peter</first><last>Clark</last></author>
-      <author><first>Phil</first><last>Harrison</last></author>
+      <author id="phil-harrison"><first>Phil</first><last>Harrison</last></author>
       <author><first>John</first><last>Thompson</last></author>
-      <author><first>William</first><last>Murray</last></author>
-      <author><first>Jerry</first><last>Hobbs</last></author>
+      <author id="william-r-murray"><first>William</first><last>Murray</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry</first><last>Hobbs</last></author>
       <author><first>Christiane</first><last>Fellbaum</last></author>
       <pages>54–59</pages>
       <url hash="b9bebe6c">W07-1409</url>
@@ -2057,25 +2057,25 @@
     </paper>
     <paper id="10">
       <title>Machine Learning with Semantic-Based Distances Between Sentences for Textual Entailment</title>
-      <author><first>Daniel</first><last>Ferrés</last></author>
-      <author><first>Horacio</first><last>Rodríguez</last></author>
+      <author id="daniel-ferres"><first>Daniel</first><last>Ferrés</last></author>
+      <author id="horacio-rodriguez"><first>Horacio</first><last>Rodríguez</last></author>
       <pages>60–65</pages>
       <url hash="4edcd0d1">W07-1410</url>
       <bibkey>ferres-rodriguez-2007-machine</bibkey>
     </paper>
     <paper id="11">
       <title>A Perspective-Based Approach for Solving Textual Entailment Recognition</title>
-      <author><first>Óscar</first><last>Ferrández</last></author>
+      <author id="oscar-ferrandez"><first>Óscar</first><last>Ferrández</last></author>
       <author><first>Daniel</first><last>Micol</last></author>
-      <author><first>Rafael</first><last>Muñoz</last></author>
-      <author><first>Manuel</first><last>Palomar</last></author>
+      <author id="rafael-munoz"><first>Rafael</first><last>Muñoz</last></author>
+      <author id="manuel-palomar"><first>Manuel</first><last>Palomar</last></author>
       <pages>66–71</pages>
       <url hash="5774cc36">W07-1411</url>
       <bibkey>ferrandez-etal-2007-perspective</bibkey>
     </paper>
     <paper id="12">
       <title>Shallow Semantic in Fast Textual Entailment Rule Learners</title>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last></author>
       <author><first>Marco</first><last>Pennacchiotti</last></author>
       <author><first>Alessandro</first><last>Moschitti</last></author>
       <pages>72–77</pages>
@@ -2085,11 +2085,11 @@
     <paper id="13">
       <title>Combining Lexical-Syntactic Information with Machine Learning for Recognizing Textual Entailment</title>
       <author><first>Arturo</first><last>Montejo-Ráez</last></author>
-      <author><first>Jose Manuel</first><last>Perea</last></author>
-      <author><first>Fernando</first><last>Martínez-Santiago</last></author>
-      <author><first>Miguel Ángel</first><last>García-Cumbreras</last></author>
-      <author><first>Maite</first><last>Martín-Valdivia</last></author>
-      <author><first>Alfonso</first><last>Ureña-López</last></author>
+      <author id="jose-manuel-perea-ortega"><first>Jose Manuel</first><last>Perea</last></author>
+      <author id="fernando-martinez-santiago"><first>Fernando</first><last>Martínez-Santiago</last></author>
+      <author id="miguel-angel-garcia-cumbreras"><first>Miguel Ángel</first><last>García-Cumbreras</last></author>
+      <author id="m-teresa-martin-valdivia"><first>Maite</first><last>Martín-Valdivia</last></author>
+      <author id="l-alfonso-urena-lopez"><first>Alfonso</first><last>Ureña-López</last></author>
       <pages>78–82</pages>
       <url hash="551154db">W07-1413</url>
       <bibkey>montejo-raez-etal-2007-combining</bibkey>
@@ -2097,7 +2097,7 @@
     <paper id="14">
       <title>Dependency-based paraphrasing for recognizing textual entailment</title>
       <author><first>Erwin</first><last>Marsi</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <author><first>Wauter</first><last>Bosma</last></author>
       <pages>83–88</pages>
       <url hash="c8741a25">W07-1414</url>
@@ -2105,8 +2105,8 @@
     </paper>
     <paper id="15">
       <title>Experiments of <fixed-case>UNED</fixed-case> at the Third Recognising Textual Entailment Challenge</title>
-      <author><first>Álvaro</first><last>Rodrigo</last></author>
-      <author><first>Anselmo</first><last>Peñas</last></author>
+      <author id="alvaro-rodrigo"><first>Álvaro</first><last>Rodrigo</last></author>
+      <author id="anselmo-penas"><first>Anselmo</first><last>Peñas</last></author>
       <author><first>Jesús</first><last>Herrera</last></author>
       <author><first>Felisa</first><last>Verdejo</last></author>
       <pages>89–94</pages>
@@ -2137,9 +2137,9 @@
     </paper>
     <paper id="19">
       <title><fixed-case>SVO</fixed-case> triple based Latent Semantic Analysis for recognising textual entailment</title>
-      <author><first>Gaston</first><last>Burek</last></author>
+      <author id="gaston-burek"><first>Gaston</first><last>Burek</last></author>
       <author><first>Christian</first><last>Pietsch</last></author>
-      <author><first>Anne</first><last>De Roeck</last></author>
+      <author id="anne-de-roeck"><first>Anne</first><last>De Roeck</last></author>
       <pages>113–118</pages>
       <url hash="23da81ed">W07-1419</url>
       <bibkey>burek-etal-2007-svo</bibkey>
@@ -2149,7 +2149,7 @@
       <author><first>Rod</first><last>Adams</last></author>
       <author><first>Gabriel</first><last>Nicolae</last></author>
       <author><first>Cristina</first><last>Nicolae</last></author>
-      <author><first>Sanda</first><last>Harabagiu</last></author>
+      <author id="sanda-harabagiu"><first>Sanda</first><last>Harabagiu</last></author>
       <pages>119–124</pages>
       <url hash="5dfe6f6e">W07-1420</url>
       <bibkey>adams-etal-2007-textual</bibkey>
@@ -2157,7 +2157,7 @@
     <paper id="21">
       <title>Hypothesis Transformation and Semantic Variability Rules Used in Recognizing Textual Entailment</title>
       <author><first>Adrian</first><last>Iftene</last></author>
-      <author><first>Alexandra</first><last>Balahur-Dobrescu</last></author>
+      <author id="alexandra-balahur"><first>Alexandra</first><last>Balahur-Dobrescu</last></author>
       <pages>125–130</pages>
       <url hash="710a72e5">W07-1421</url>
       <bibkey>iftene-balahur-dobrescu-2007-hypothesis</bibkey>
@@ -2182,7 +2182,7 @@
     </paper>
     <paper id="24">
       <title><fixed-case>M</fixed-case>utaphrase: Paraphrasing with <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et</title>
-      <author><first>Michael</first><last>Ellsworth</last></author>
+      <author id="michael-ellsworth"><first>Michael</first><last>Ellsworth</last></author>
       <author><first>Adam</first><last>Janin</last></author>
       <pages>143–150</pages>
       <url hash="a4cdab65">W07-1424</url>
@@ -2193,7 +2193,7 @@
       <author><first>Atsushi</first><last>Fujita</last></author>
       <author><first>Shuhei</first><last>Kato</last></author>
       <author><first>Naoki</first><last>Kato</last></author>
-      <author><first>Satoshi</first><last>Sato</last></author>
+      <author id="satoshi-sato"><first>Satoshi</first><last>Sato</last></author>
       <pages>151–158</pages>
       <url hash="5a2ababe">W07-1425</url>
       <bibkey>fujita-etal-2007-compositional</bibkey>
@@ -2210,16 +2210,16 @@
     </paper>
     <paper id="27">
       <title>Learning Alignments and Leveraging Natural Logic</title>
-      <author><first>Nathanael</first><last>Chambers</last></author>
+      <author id="nathanael-chambers"><first>Nathanael</first><last>Chambers</last></author>
       <author><first>Daniel</first><last>Cer</last></author>
       <author><first>Trond</first><last>Grenager</last></author>
       <author><first>David</first><last>Hall</last></author>
-      <author><first>Chloe</first><last>Kiddon</last></author>
+      <author id="chloe-kiddon"><first>Chloe</first><last>Kiddon</last></author>
       <author><first>Bill</first><last>MacCartney</last></author>
-      <author><first>Marie-Catherine</first><last>de Marneffe</last></author>
+      <author id="marie-catherine-de-marneffe"><first>Marie-Catherine</first><last>de Marneffe</last></author>
       <author><first>Daniel</first><last>Ramage</last></author>
       <author><first>Eric</first><last>Yeh</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>165–170</pages>
       <url hash="71a84c12">W07-1427</url>
       <bibkey>chambers-etal-2007-learning</bibkey>
@@ -2234,8 +2234,8 @@
     </paper>
     <paper id="29">
       <title>Biology Based Alignments of Paraphrases for Sentence Compression</title>
-      <author><first>João</first><last>Cordeiro</last></author>
-      <author><first>Gäel</first><last>Dias</last></author>
+      <author id="joao-paulo-cordeiro"><first>João</first><last>Cordeiro</last></author>
+      <author id="gael-dias"><first>Gäel</first><last>Dias</last></author>
       <author><first>Guillaume</first><last>Cleuziou</last></author>
       <pages>177–184</pages>
       <url hash="52f1d7fd">W07-1429</url>
@@ -2252,7 +2252,7 @@
     <paper id="31">
       <title>Natural Logic for Textual Inference</title>
       <author><first>Bill</first><last>MacCartney</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>193–200</pages>
       <url hash="79708d94">W07-1431</url>
       <bibkey>maccartney-manning-2007-natural</bibkey>
@@ -2262,13 +2262,13 @@
     <meta>
       <booktitle>Proceedings of the Linguistic Annotation Workshop</booktitle>
       <url hash="2dd225be">W07-15</url>
-      <editor><first>Branimir</first><last>Boguraev</last></editor>
-      <editor><first>Nancy</first><last>Ide</last></editor>
-      <editor><first>Adam</first><last>Meyers</last></editor>
+      <editor id="branimir-boguraev"><first>Branimir</first><last>Boguraev</last></editor>
+      <editor id="nancy-ide"><first>Nancy</first><last>Ide</last></editor>
+      <editor id="adam-meyers"><first>Adam</first><last>Meyers</last></editor>
       <editor><first>Shigeko</first><last>Nariyama</last></editor>
       <editor><first>Manfred</first><last>Stede</last></editor>
-      <editor><first>Janyce</first><last>Wiebe</last></editor>
-      <editor><first>Graham</first><last>Wilcock</last></editor>
+      <editor id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></editor>
+      <editor id="graham-wilcock"><first>Graham</first><last>Wilcock</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Prague, Czech Republic</address>
       <month>June</month>
@@ -2306,7 +2306,7 @@
     </paper>
     <paper id="4">
       <title>Assocating Facial Displays with Syntactic Constituents for Generation</title>
-      <author><first>Mary Ellen</first><last>Foster</last></author>
+      <author id="mary-ellen-foster"><first>Mary Ellen</first><last>Foster</last></author>
       <pages>25–32</pages>
       <url hash="8634c222">W07-1504</url>
       <bibkey>foster-2007-assocating</bibkey>
@@ -2316,8 +2316,8 @@
       <author><first>Udo</first><last>Hahn</last></author>
       <author><first>Ekaterina</first><last>Buyko</last></author>
       <author><first>Katrin</first><last>Tomanek</last></author>
-      <author><first>Scott</first><last>Piao</last></author>
-      <author><first>John</first><last>McNaught</last></author>
+      <author id="scott-s-l-piao"><first>Scott</first><last>Piao</last></author>
+      <author id="john-mcnaught"><first>John</first><last>McNaught</last></author>
       <author><first>Yoshimasa</first><last>Tsuruoka</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
       <pages>33–40</pages>
@@ -2334,7 +2334,7 @@
     <paper id="7">
       <title>Usage of <fixed-case>XSL</fixed-case> Stylesheets for the Annotation of the <fixed-case>S</fixed-case>ámi Language Corpora.</title>
       <author><first>Saara</first><last>Huhmarniemi</last></author>
-      <author><first>Sjur N.</first><last>Moshagen</last></author>
+      <author id="sjur-moshagen"><first>Sjur N.</first><last>Moshagen</last></author>
       <author><first>Trond</first><last>Trosterud</last></author>
       <pages>45–48</pages>
       <url hash="363038b9">W07-1507</url>
@@ -2344,11 +2344,11 @@
       <title>Criteria for the Manual Grouping of Verb Senses</title>
       <author><first>Cecily Jill</first><last>Duffield</last></author>
       <author><first>Jena D.</first><last>Hwang</last></author>
-      <author><first>Susan Windisch</first><last>Brown</last></author>
+      <author id="susan-windisch-brown"><first>Susan Windisch</first><last>Brown</last></author>
       <author><first>Dmitriy</first><last>Dligach</last></author>
-      <author><first>Sarah E.</first><last>Vieweg</last></author>
+      <author id="sarah-vieweg"><first>Sarah E.</first><last>Vieweg</last></author>
       <author><first>Jenny</first><last>Davis</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>49–52</pages>
       <url hash="1c8b368d">W07-1508</url>
       <bibkey>duffield-etal-2007-criteria</bibkey>
@@ -2359,7 +2359,7 @@
       <author><first>Fernando</first><last>Pereira</last></author>
       <author><first>Mark</first><last>Mandel</last></author>
       <author><first>Steven</first><last>Carroll</last></author>
-      <author><first>Peter</first><last>White</last></author>
+      <author id="peter-white"><first>Peter</first><last>White</last></author>
       <pages>53–56</pages>
       <url hash="92384b0d">W07-1509</url>
       <bibkey>ganchev-etal-2007-semi</bibkey>
@@ -2374,8 +2374,8 @@
     <paper id="11">
       <title>Annotating <fixed-case>C</fixed-case>hinese Collocations with Multi Information</title>
       <author><first>Ruifeng</first><last>Xu</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <author><first>Wenjie</first><last>Li</last></author>
       <pages>61–68</pages>
       <url hash="57a730c2">W07-1511</url>
@@ -2410,14 +2410,14 @@
       <title>Annotating Expressions of Appraisal in <fixed-case>E</fixed-case>nglish</title>
       <author><first>Jonathon</first><last>Read</last></author>
       <author><first>David</first><last>Hope</last></author>
-      <author><first>John</first><last>Carroll</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
       <pages>93–100</pages>
       <url hash="198b1e5e">W07-1515</url>
       <bibkey>read-etal-2007-annotating</bibkey>
     </paper>
     <paper id="16">
       <title>Active Learning for Part-of-Speech Tagging: Accelerating Corpus Annotation</title>
-      <author><first>Eric</first><last>Ringger</last></author>
+      <author id="eric-ringger"><first>Eric</first><last>Ringger</last></author>
       <author><first>Peter</first><last>McClanahan</last></author>
       <author><first>Robbie</first><last>Haertel</last></author>
       <author><first>George</first><last>Busby</last></author>
@@ -2433,7 +2433,7 @@
       <title>Combining Independent Syntactic and Semantic Annotation Schemes</title>
       <author><first>Marc</first><last>Verhagen</last></author>
       <author><first>Amber</first><last>Stubbs</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <pages>109–112</pages>
       <url hash="72ed4bff">W07-1517</url>
       <bibkey>verhagen-etal-2007-combining</bibkey>
@@ -2447,7 +2447,7 @@
     </paper>
     <paper id="19">
       <title><fixed-case>ITU</fixed-case> Treebank Annotation Tool</title>
-      <author><first>Gülşen</first><last>Eryiǧit</last></author>
+      <author id="gulsen-eryigit"><first>Gülşen</first><last>Eryiǧit</last></author>
       <pages>117–120</pages>
       <url hash="792c2dba">W07-1519</url>
       <bibkey>eryigit-2007-itu</bibkey>
@@ -2474,7 +2474,7 @@
       <author><first>Ryu</first><last>Iida</last></author>
       <author><first>Mamoru</first><last>Komachi</last></author>
       <author><first>Kentaro</first><last>Inui</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>132–139</pages>
       <url hash="f32fd31d">W07-1522</url>
       <bibkey>iida-etal-2007-annotating</bibkey>
@@ -2492,10 +2492,10 @@
     </paper>
     <paper id="24">
       <title>Standoff Coordination for Multi-Tool Annotation in a Dialogue Corpus</title>
-      <author><first>Kepa Joseba</first><last>Rodríguez</last></author>
+      <author id="kepa-joseba-rodriguez"><first>Kepa Joseba</first><last>Rodríguez</last></author>
       <author><first>Stefanie</first><last>Dipper</last></author>
       <author><first>Michael</first><last>Götze</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <author><first>Giuseppe</first><last>Riccardi</last></author>
       <author><first>Christian</first><last>Raymond</last></author>
       <author><first>Joanna</first><last>Rabiega-Wiśniewska</last></author>
@@ -2520,7 +2520,7 @@
     </paper>
     <paper id="27">
       <title>Experiments with an Annotation Scheme for a Knowledge-rich Noun Phrase Interpretation System</title>
-      <author><first>Roxana</first><last>Girju</last></author>
+      <author id="roxana-girju"><first>Roxana</first><last>Girju</last></author>
       <pages>168–175</pages>
       <url hash="b01fb42d">W07-1527</url>
       <bibkey>girju-2007-experiments</bibkey>
@@ -2547,10 +2547,10 @@
       <title>Discourse Annotation Working Group Report</title>
       <author><first>Manfred</first><last>Stede</last></author>
       <author><first>Janyce</first><last>Wiebe</last></author>
-      <author><first>Eva</first><last>Hajičová</last></author>
+      <author id="eva-hajicova"><first>Eva</first><last>Hajičová</last></author>
       <author><first>Brian</first><last>Reese</last></author>
       <author><first>Simone</first><last>Teufel</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <author><first>Theresa</first><last>Wilson</last></author>
       <pages>191–196</pages>
       <url hash="30fd30f9">W07-1530</url>
@@ -2561,8 +2561,8 @@
     <meta>
       <booktitle>Proceedings of the Fourth <fixed-case>ACL</fixed-case>-<fixed-case>SIGSEM</fixed-case> Workshop on Prepositions</booktitle>
       <url hash="3e99f1ee">W07-16</url>
-      <editor><first>Fintan</first><last>Costello</last></editor>
-      <editor><first>John</first><last>Kelleher</last></editor>
+      <editor id="fintan-j-costello"><first>Fintan</first><last>Costello</last></editor>
+      <editor id="john-kelleher"><first>John</first><last>Kelleher</last></editor>
       <editor><first>Martin</first><last>Volk</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Prague, Czech Republic</address>
@@ -2584,7 +2584,7 @@
     <paper id="2">
       <title>Landmark Classification for Route Directions</title>
       <author><first>Aidan</first><last>Furlan</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Alex</first><last>Klippel</last></author>
       <pages>9–16</pages>
       <url hash="3b019cc4">W07-1602</url>
@@ -2600,8 +2600,8 @@
     </paper>
     <paper id="4">
       <title>Detection of Grammatical Errors Involving Prepositions</title>
-      <author><first>Martin</first><last>Chodorow</last></author>
-      <author><first>Joel</first><last>Tetreault</last></author>
+      <author id="martin-chodorow"><first>Martin</first><last>Chodorow</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
       <author><first>Na-Rae</first><last>Han</last></author>
       <pages>25–30</pages>
       <url hash="5968e88c">W07-1604</url>
@@ -2628,7 +2628,7 @@
     <paper id="7">
       <title>Automatically Acquiring Models of Preposition Use</title>
       <author><first>Rachele</first><last>De Felice</last></author>
-      <author><first>Stephen</first><last>Pulman</last></author>
+      <author id="stephen-pulman"><first>Stephen</first><last>Pulman</last></author>
       <pages>45–50</pages>
       <url hash="a9a49fd5">W07-1607</url>
       <bibkey>de-felice-pulman-2007-automatically</bibkey>
@@ -2636,7 +2636,7 @@
     <paper id="8">
       <title>Simple Preposition Correspondence: A Problem in <fixed-case>E</fixed-case>nglish to <fixed-case>I</fixed-case>ndian Language Machine Translation</title>
       <author><first>Samar</first><last>Husain</last></author>
-      <author><first>Dipti Misra</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></author>
       <author><first>Manohar</first><last>Reddy</last></author>
       <pages>51–58</pages>
       <url hash="b67ef1c0">W07-1608</url>
@@ -2648,7 +2648,7 @@
       <booktitle>Proceedings of the Workshop on <fixed-case>B</fixed-case>alto-<fixed-case>S</fixed-case>lavonic Natural Language Processing</booktitle>
       <url hash="93b76c26">W07-17</url>
       <editor><first>Jakub</first><last>Piskorski</last></editor>
-      <editor><first>Hristo</first><last>Tanev</last></editor>
+      <editor id="hristo-tanev"><first>Hristo</first><last>Tanev</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Prague, Czech Republic</address>
       <month>June</month>
@@ -2677,8 +2677,8 @@
     <paper id="3">
       <title>A Language Independent Approach for Name Categorization and Discrimination</title>
       <author><first>Zornitsa</first><last>Kozareva</last></author>
-      <author><first>Sonia</first><last>Vázquez</last></author>
-      <author><first>Andrés</first><last>Montoyo</last></author>
+      <author id="sonia-vazquez"><first>Sonia</first><last>Vázquez</last></author>
+      <author id="andres-montoyo"><first>Andrés</first><last>Montoyo</last></author>
       <pages>19–26</pages>
       <url hash="8d82f462">W07-1703</url>
       <bibkey>kozareva-etal-2007-language</bibkey>
@@ -2687,14 +2687,14 @@
       <title>Lemmatization of <fixed-case>P</fixed-case>olish Person Names</title>
       <author><first>Jakub</first><last>Piskorski</last></author>
       <author><first>Marcin</first><last>Sydow</last></author>
-      <author><first>Anna</first><last>Kupść</last></author>
+      <author id="anna-kupsc"><first>Anna</first><last>Kupść</last></author>
       <pages>27–34</pages>
       <url hash="b3b297b8">W07-1704</url>
       <bibkey>piskorski-etal-2007-lemmatization</bibkey>
     </paper>
     <paper id="5">
       <title>Automatic Processing of Diabetic Patients’ Hospital Documentation</title>
-      <author><first>Małgorzata</first><last>Marciniak</last></author>
+      <author id="malgorzata-marciniak"><first>Małgorzata</first><last>Marciniak</last></author>
       <author><first>Agnieszka</first><last>Mykowiecka</last></author>
       <pages>35–42</pages>
       <url hash="9907319a">W07-1705</url>
@@ -2705,10 +2705,10 @@
       <author><first>Adam</first><last>Przepiórkowski</last></author>
       <author><first>Łukasz</first><last>Degórski</last></author>
       <author><first>Miroslav</first><last>Spousta</last></author>
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <author><first>Petya</first><last>Osenova</last></author>
       <author><first>Lothar</first><last>Lemnitzer</last></author>
-      <author><first>Vladislav</first><last>Kuboň</last></author>
+      <author id="vladislav-kubon"><first>Vladislav</first><last>Kuboň</last></author>
       <author><first>Beata</first><last>Wójtowicz</last></author>
       <pages>43–50</pages>
       <url hash="1eb0bb2f">W07-1706</url>
@@ -2732,11 +2732,11 @@
     </paper>
     <paper id="9">
       <title>The Best of Two Worlds: Cooperation of Statistical and Rule-Based Taggers for <fixed-case>C</fixed-case>zech</title>
-      <author><first>Drahomíra “johanka”</first><last>Spoustová</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
+      <author id="drahomira-johanka-spoustova"><first>Drahomíra “johanka”</first><last>Spoustová</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
       <author><first>Jan</first><last>Votrubec</last></author>
       <author><first>Pavel</first><last>Krbec</last></author>
-      <author><first>Pavel</first><last>Květoň</last></author>
+      <author id="pavel-kveton"><first>Pavel</first><last>Květoň</last></author>
       <pages>67–74</pages>
       <url hash="fc1e0c58">W07-1709</url>
       <bibkey>spoustova-etal-2007-best</bibkey>
@@ -2752,7 +2752,7 @@
     <paper id="11">
       <title>Multilingual Word Sense Discrimination: A Comparative Cross-Linguistic Study</title>
       <author><first>Alla</first><last>Rozovskaya</last></author>
-      <author><first>Richard</first><last>Sproat</last></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last></author>
       <pages>82–87</pages>
       <url hash="955c9587">W07-1711</url>
       <bibkey>rozovskaya-sproat-2007-multilingual</bibkey>
@@ -2768,7 +2768,7 @@
     <paper id="13">
       <title>Morphological Annotation of the <fixed-case>L</fixed-case>ithuanian Corpus</title>
       <author><first>Erika</first><last>Rimkutė</last></author>
-      <author><first>Vidas</first><last>Daudaravičius</last></author>
+      <author id="vidas-daudaravicius"><first>Vidas</first><last>Daudaravičius</last></author>
       <author><first>Andrius</first><last>Utka</last></author>
       <pages>94–99</pages>
       <url hash="b3fbf945">W07-1713</url>
@@ -2779,8 +2779,8 @@
     <meta>
       <booktitle>Proceedings of the Workshop on Grammar-Based Approaches to Spoken Language Processing</booktitle>
       <url hash="85b7bf9b">W07-18</url>
-      <editor><first>Pierrette</first><last>Bouillon</last></editor>
-      <editor><first>Manny</first><last>Rayner</last></editor>
+      <editor id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></editor>
+      <editor id="manny-rayner"><first>Manny</first><last>Rayner</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Prague, Czech Republic</address>
       <month>June</month>
@@ -2800,7 +2800,7 @@
     </paper>
     <paper id="2">
       <title>Converting Grammatical Framework to Regulus</title>
-      <author><first>Peter</first><last>Ljunglöf</last></author>
+      <author id="peter-ljunglof"><first>Peter</first><last>Ljunglöf</last></author>
       <pages>9–16</pages>
       <url hash="d17f1022">W07-1802</url>
       <bibkey>ljunglof-2007-converting</bibkey>
@@ -2825,7 +2825,7 @@
       <author><first>Tim</first><last>Paek</last></author>
       <author><first>Sudeep</first><last>Gandhe</last></author>
       <author><first>Max</first><last>Chickering</last></author>
-      <author><first>Yun Cheng</first><last>Ju</last></author>
+      <author id="yun-cheng-ju"><first>Yun Cheng</first><last>Ju</last></author>
       <pages>33–40</pages>
       <url hash="e066bab6">W07-1805</url>
       <bibkey>paek-etal-2007-handling</bibkey>
@@ -2839,7 +2839,7 @@
       <author><first>Marianne</first><last>Santaholma</last></author>
       <author><first>Nikos</first><last>Tsourakis</last></author>
       <author><first>Manny</first><last>Rayner</last></author>
-      <author><first>Beth Ann</first><last>Hockey</last></author>
+      <author id="beth-ann-hockey"><first>Beth Ann</first><last>Hockey</last></author>
       <pages>41–48</pages>
       <url hash="22717605">W07-1806</url>
       <bibkey>bouillon-etal-2007-bidirectional</bibkey>
@@ -2873,7 +2873,7 @@
     </frontmatter>
     <paper id="1">
       <title>Comparing Rule-Based and Data-Driven Selection of Facial Displays</title>
-      <author><first>Mary Ellen</first><last>Foster</last></author>
+      <author id="mary-ellen-foster"><first>Mary Ellen</first><last>Foster</last></author>
       <pages>1–8</pages>
       <url hash="417e5063">W07-1901</url>
       <bibkey>foster-2007-comparing</bibkey>
@@ -2896,7 +2896,7 @@
     <paper id="4">
       <title>Which Way to Turn? Guide Orientation in Virtual Way Finding</title>
       <author><first>Mark</first><last>Evers</last></author>
-      <author><first>Mariët</first><last>Theune</last></author>
+      <author id="mariet-theune"><first>Mariët</first><last>Theune</last></author>
       <author><first>Joyce</first><last>Karreman</last></author>
       <pages>25–32</pages>
       <url hash="1f812b95">W07-1904</url>
@@ -2908,7 +2908,7 @@
       <author><first>Beatriz</first><last>López</last></author>
       <author><first>David</first><last>Díaz</last></author>
       <author><first>Rubén</first><last>Fernández</last></author>
-      <author><first>Luis</first><last>Hernández</last></author>
+      <author id="luis-hernandez"><first>Luis</first><last>Hernández</last></author>
       <author><first>Javier</first><last>Caminero</last></author>
       <pages>33–40</pages>
       <url hash="cd9889cc">W07-1905</url>
@@ -2936,7 +2936,7 @@
     <paper id="8">
       <title>Dynamic Movement and Positioning of Embodied Agents in Multiparty Conversations</title>
       <author><first>Dušan</first><last>Jan</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <pages>59–66</pages>
       <url hash="bcf43988">W07-1908</url>
       <bibkey>jan-traum-2007-dynamic</bibkey>
@@ -2947,7 +2947,7 @@
       <author><first>Álvaro</first><last>Hernández</last></author>
       <author><first>David</first><last>Díaz</last></author>
       <author><first>Rubén</first><last>Fernández</last></author>
-      <author><first>Luis</first><last>Hernández</last></author>
+      <author id="luis-hernandez"><first>Luis</first><last>Hernández</last></author>
       <author><first>Doroteo</first><last>Torre</last></author>
       <pages>67–74</pages>
       <url hash="4343c0ae">W07-1909</url>
@@ -2958,7 +2958,7 @@
     <meta>
       <booktitle>Proceedings of the Tenth International Conference on Parsing Technologies</booktitle>
       <url hash="1f3c4c8d">W07-22</url>
-      <editor><first>Harry</first><last>Bunt</last></editor>
+      <editor id="harry-bunt"><first>Harry</first><last>Bunt</last></editor>
       <editor><first>Paola</first><last>Merlo</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Prague, Czech Republic</address>
@@ -2972,7 +2972,7 @@
     </frontmatter>
     <paper id="1">
       <title>Using Self-Trained Bilexical Preferences to Improve Disambiguation Accuracy</title>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <pages>1–10</pages>
       <url hash="fa919e47">W07-2201</url>
       <bibkey>van-noord-2007-using</bibkey>
@@ -2981,7 +2981,7 @@
       <title>Evaluating Impact of Re-training a Lexical Disambiguation Model on Domain Adaptation of an <fixed-case>HPSG</fixed-case> Parser</title>
       <author><first>Tadayoshi</first><last>Hara</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>11–22</pages>
       <url hash="7e6e043c">W07-2202</url>
       <bibkey>hara-etal-2007-evaluating</bibkey>
@@ -2989,8 +2989,8 @@
     <paper id="3">
       <title>Semi-supervised Training of a Statistical Parser from Unlabeled Partially-bracketed Data</title>
       <author><first>Rebecca</first><last>Watson</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
-      <author><first>John</first><last>Carroll</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
       <pages>23–32</pages>
       <url hash="e1dfde6a">W07-2203</url>
       <bibkey>watson-etal-2007-semi</bibkey>
@@ -2999,19 +2999,19 @@
       <title>Adapting <fixed-case>WSJ</fixed-case>-Trained Parsers to the <fixed-case>B</fixed-case>ritish <fixed-case>N</fixed-case>ational <fixed-case>C</fixed-case>orpus using In-Domain Self-Training</title>
       <author><first>Jennifer</first><last>Foster</last></author>
       <author><first>Joachim</first><last>Wagner</last></author>
-      <author><first>Djamé</first><last>Seddah</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>33–35</pages>
       <url hash="6b9e7675">W07-2204</url>
       <bibkey>foster-etal-2007-adapting</bibkey>
     </paper>
     <paper id="5">
       <title>The Impact of Deep Linguistic Processing on Parsing Technology</title>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Mark</first><last>Dras</last></author>
       <author><first>Julia</first><last>Hockenmaier</last></author>
-      <author><first>Tracy Holloway</first><last>King</last></author>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="tracy-holloway-king"><first>Tracy Holloway</first><last>King</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <pages>36–38</pages>
       <url hash="5700e1f0">W07-2205</url>
       <bibkey>baldwin-etal-2007-impact</bibkey>
@@ -3019,7 +3019,7 @@
     <paper id="6">
       <title>Improving the Efficiency of a Wide-Coverage <fixed-case>CCG</fixed-case> Parser</title>
       <author><first>Bojan</first><last>Djordjevic</last></author>
-      <author><first>James</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James</first><last>Curran</last></author>
       <author><first>Stephen</first><last>Clark</last></author>
       <pages>39–47</pages>
       <url hash="873914c0">W07-2206</url>
@@ -3029,7 +3029,7 @@
       <title>Efficiency in Unification-Based N-Best Parsing</title>
       <author><first>Yi</first><last>Zhang</last></author>
       <author><first>Stephan</first><last>Oepen</last></author>
-      <author><first>John</first><last>Carroll</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
       <pages>48–59</pages>
       <url hash="b85c909e">W07-2207</url>
       <bibkey>zhang-etal-2007-efficiency</bibkey>
@@ -3039,14 +3039,14 @@
       <author><first>Takashi</first><last>Ninomiya</last></author>
       <author><first>Takuya</first><last>Matsuzaki</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>60–68</pages>
       <url hash="a879382b">W07-2208</url>
       <bibkey>ninomiya-etal-2007-log</bibkey>
     </paper>
     <paper id="9">
       <title>Ambiguity Resolution by Reordering Rules in Text Containing Errors</title>
-      <author><first>Sylvana</first><last>Sofkova Hashemi</last></author>
+      <author id="sylvana-sofkova-hashemi"><first>Sylvana</first><last>Sofkova Hashemi</last></author>
       <pages>69–79</pages>
       <url hash="1985599e">W07-2209</url>
       <bibkey>sofkova-hashemi-2007-ambiguity</bibkey>
@@ -3060,14 +3060,14 @@
     </paper>
     <paper id="11">
       <title>Symbolic Preference Using Simple Scoring</title>
-      <author><first>Paula</first><last>Newman</last></author>
+      <author id="paula-newman"><first>Paula</first><last>Newman</last></author>
       <pages>83–92</pages>
       <url hash="b24b13f3">W07-2211</url>
       <bibkey>newman-2007-symbolic</bibkey>
     </paper>
     <paper id="12">
       <title>Synchronous Grammars and Transducers: Good News and Bad News</title>
-      <author><first>Stuart</first><last>Shieber</last></author>
+      <author id="stuart-m-shieber"><first>Stuart</first><last>Shieber</last></author>
       <pages>93</pages>
       <url hash="57d3e514">W07-2212</url>
       <bibkey>shieber-2007-synchronous</bibkey>
@@ -3075,14 +3075,14 @@
     <paper id="13">
       <title>Are Very Large Context-Free Grammars Tractable?</title>
       <author><first>Pierre</first><last>Boullier</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <pages>94–105</pages>
       <url hash="e62e25c9">W07-2213</url>
       <bibkey>boullier-sagot-2007-large</bibkey>
     </paper>
     <paper id="14">
       <title>Pomset mcfgs</title>
-      <author><first>Michael</first><last>Pan</last></author>
+      <author id="michael-j-pan"><first>Michael</first><last>Pan</last></author>
       <pages>106–108</pages>
       <url hash="b0617bd6">W07-2214</url>
       <bibkey>pan-2007-pomset</bibkey>
@@ -3115,7 +3115,7 @@
     <paper id="18">
       <title>A Latent Variable Model for Generative Dependency Parsing</title>
       <author><first>Ivan</first><last>Titov</last></author>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <pages>144–155</pages>
       <url hash="d73056d7">W07-2218</url>
       <bibkey>titov-henderson-2007-latent</bibkey>
@@ -3123,7 +3123,7 @@
     <paper id="19">
       <title>Three-Dimensional Parametrization for Parsing Morphologically Rich Languages</title>
       <author><first>Reut</first><last>Tsarfaty</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <pages>156–167</pages>
       <url hash="5bab80e9">W07-2219</url>
       <bibkey>tsarfaty-simaan-2007-three</bibkey>
@@ -3153,14 +3153,14 @@
     </frontmatter>
     <paper id="1">
       <title>Quality of Service and Communicative Competence in <fixed-case>NLG</fixed-case> Evaluation</title>
-      <author><first>Kristiina</first><last>Jokinen</last></author>
+      <author id="kristiina-jokinen"><first>Kristiina</first><last>Jokinen</last></author>
       <pages>3–6</pages>
       <url hash="05c811e9">W07-2301</url>
       <bibkey>jokinen-2007-quality</bibkey>
     </paper>
     <paper id="2">
       <title>Generation of repeated references to discourse entities</title>
-      <author><first>Anja</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anja</first><last>Belz</last></author>
       <author><first>Sebastian</first><last>Varges</last></author>
       <pages>9–16</pages>
       <url hash="1270890f">W07-2302</url>
@@ -3177,19 +3177,19 @@
     </paper>
     <paper id="4">
       <title>Modelling control in generation</title>
-      <author><first>Roger</first><last>Evans</last></author>
-      <author><first>David</first><last>Weir</last></author>
-      <author><first>John</first><last>Carroll</last></author>
-      <author><first>Daniel</first><last>Paiva</last></author>
-      <author><first>Anja</first><last>Belz</last></author>
+      <author id="roger-evans"><first>Roger</first><last>Evans</last></author>
+      <author id="david-weir"><first>David</first><last>Weir</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
+      <author id="daniel-paiva"><first>Daniel</first><last>Paiva</last></author>
+      <author id="anja-belz"><first>Anja</first><last>Belz</last></author>
       <pages>25–32</pages>
       <url hash="f42663e2">W07-2304</url>
       <bibkey>evans-etal-2007-modelling</bibkey>
     </paper>
     <paper id="5">
       <title>Avoiding Repetition in Generated Text</title>
-      <author><first>Mary Ellen</first><last>Foster</last></author>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="mary-ellen-foster"><first>Mary Ellen</first><last>Foster</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <pages>33–40</pages>
       <url hash="da2f4a4a">W07-2305</url>
       <bibkey>foster-white-2007-avoiding</bibkey>
@@ -3206,7 +3206,7 @@
       <title>Evaluating algorithms for the Generation of Referring Expressions using a balanced corpus</title>
       <author><first>Albert</first><last>Gatt</last></author>
       <author><first>Ielka</first><last>van der Sluis</last></author>
-      <author><first>Kees</first><last>van Deemter</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
       <pages>49–56</pages>
       <url hash="e3ad84fe">W07-2307</url>
       <bibkey>gatt-etal-2007-evaluating</bibkey>
@@ -3214,8 +3214,8 @@
     <paper id="8">
       <title>Generating Politeness in Task Based Interaction: An Evaluation of the Effect of Linguistic Form and Culture</title>
       <author><first>Swati</first><last>Gupta</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
-      <author><first>Daniela</first><last>Romano</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
+      <author id="daniela-m-romano"><first>Daniela</first><last>Romano</last></author>
       <pages>57–64</pages>
       <url hash="f172d9ab">W07-2308</url>
       <bibkey>gupta-etal-2007-generating</bibkey>
@@ -3233,7 +3233,7 @@
     <paper id="10">
       <title>Using <fixed-case>WYSIWYM</fixed-case> to Create an Open-ended Interface for the Semantic Grid</title>
       <author><first>Feikje</first><last>Hielkema</last></author>
-      <author><first>Chris</first><last>Mellish</last></author>
+      <author id="chris-mellish"><first>Chris</first><last>Mellish</last></author>
       <author><first>Peter</first><last>Edwards</last></author>
       <pages>69–72</pages>
       <url hash="2761a878">W07-2310</url>
@@ -3249,13 +3249,13 @@
     <paper id="12">
       <title>Measuring Variability in Sentence Ordering for News Summarization</title>
       <author><first>Nitin</first><last>Madnani</last></author>
-      <author><first>Rebecca</first><last>Passonneau</last></author>
-      <author><first>Necip Fazil</first><last>Ayan</last></author>
-      <author><first>John</first><last>Conroy</last></author>
-      <author><first>Bonnie</first><last>Dorr</last></author>
-      <author><first>Judith</first><last>Klavans</last></author>
-      <author><first>Dianne</first><last>O’Leary</last></author>
-      <author><first>Judith</first><last>Schlesinger</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca</first><last>Passonneau</last></author>
+      <author id="necip-fazil-ayan"><first>Necip Fazil</first><last>Ayan</last></author>
+      <author id="john-conroy"><first>John</first><last>Conroy</last></author>
+      <author id="bonnie-dorr"><first>Bonnie</first><last>Dorr</last></author>
+      <author id="judith-l-klavans"><first>Judith</first><last>Klavans</last></author>
+      <author id="dianne-p-oleary"><first>Dianne</first><last>O’Leary</last></author>
+      <author id="judith-d-schlesinger"><first>Judith</first><last>Schlesinger</last></author>
       <pages>81–88</pages>
       <url hash="6dcaaeda">W07-2312</url>
       <bibkey>madnani-etal-2007-measuring</bibkey>
@@ -3264,7 +3264,7 @@
       <title>Visualising Discourse Structure in Interactive Documents</title>
       <author><first>Clara</first><last>Mancini</last></author>
       <author><first>Christian</first><last>Pietsch</last></author>
-      <author><first>Donia</first><last>Scott</last></author>
+      <author id="donia-scott"><first>Donia</first><last>Scott</last></author>
       <pages>89–92</pages>
       <url hash="988c4be8">W07-2313</url>
       <bibkey>mancini-etal-2007-visualising</bibkey>
@@ -3286,14 +3286,14 @@
     <paper id="16">
       <title>An Experiment on “Free Generation” from Single <fixed-case>RDF</fixed-case> Triples</title>
       <author><first>Xiantang</first><last>Sun</last></author>
-      <author><first>Chris</first><last>Mellish</last></author>
+      <author id="chris-mellish"><first>Chris</first><last>Mellish</last></author>
       <pages>105–108</pages>
       <url hash="3a987e45">W07-2316</url>
       <bibkey>sun-mellish-2007-experiment</bibkey>
     </paper>
     <paper id="17">
       <title>The Narrator: <fixed-case>NLG</fixed-case> for digital storytelling</title>
-      <author><first>Mariët</first><last>Theune</last></author>
+      <author id="mariet-theune"><first>Mariët</first><last>Theune</last></author>
       <author><first>Nanda</first><last>Slabbers</last></author>
       <author><first>Feikje</first><last>Hielkema</last></author>
       <pages>109–112</pages>
@@ -3310,8 +3310,8 @@
     </paper>
     <paper id="19">
       <title>Determining tutorial remediation strategies from a corpus of human-human tutoring dialogues</title>
-      <author><first>Charles</first><last>Callaway</last></author>
-      <author><first>Johanna</first><last>Moore</last></author>
+      <author id="charles-b-callaway"><first>Charles</first><last>Callaway</last></author>
+      <author id="johanna-d-moore"><first>Johanna</first><last>Moore</last></author>
       <pages>123–130</pages>
       <url hash="a785057b">W07-2319</url>
       <bibkey>callaway-moore-2007-determining</bibkey>
@@ -3334,7 +3334,7 @@
     </paper>
     <paper id="22">
       <title>Generating Multilingual Descriptions from Linguistically Annotated <fixed-case>OWL</fixed-case> Ontologies: the <fixed-case>N</fixed-case>atural<fixed-case>OWL</fixed-case> System</title>
-      <author><first>Dimitrios</first><last>Galanis</last></author>
+      <author id="dimitrios-galanis"><first>Dimitrios</first><last>Galanis</last></author>
       <author><first>Ion</first><last>Androutsopoulos</last></author>
       <pages>143–146</pages>
       <url hash="e3419442">W07-2322</url>
@@ -3360,7 +3360,7 @@
       <title>A Comparison of Hedged and Non-hedged <fixed-case>NLG</fixed-case> Texts</title>
       <author><first>Saad</first><last>Mahamood</last></author>
       <author><first>Ehud</first><last>Reiter</last></author>
-      <author><first>Chris</first><last>Mellish</last></author>
+      <author id="chris-mellish"><first>Chris</first><last>Mellish</last></author>
       <pages>155–158</pages>
       <url hash="be8e4f72">W07-2325</url>
       <bibkey>mahamood-etal-2007-comparison</bibkey>
@@ -3368,7 +3368,7 @@
     <paper id="26">
       <title>Cueing the Virtual Storyteller: Analysis of cue phrase usage in fairy tales</title>
       <author><first>Manon</first><last>Penning</last></author>
-      <author><first>Mariët</first><last>Theune</last></author>
+      <author id="mariet-theune"><first>Mariët</first><last>Theune</last></author>
       <pages>159–162</pages>
       <url hash="80733e99">W07-2326</url>
       <bibkey>penning-theune-2007-cueing</bibkey>
@@ -3376,7 +3376,7 @@
     <paper id="27">
       <title><fixed-case>A</fixed-case>tlas.txt: Linking Geo-referenced Data to Text for <fixed-case>NLG</fixed-case></title>
       <author><first>Kavita</first><last>Thomas</last></author>
-      <author><first>Somayajulu</first><last>Sripada</last></author>
+      <author id="somayajulu-sripada"><first>Somayajulu</first><last>Sripada</last></author>
       <pages>163–166</pages>
       <url hash="df460217">W07-2327</url>
       <bibkey>thomas-sripada-2007-atlas</bibkey>
@@ -3396,9 +3396,9 @@
       <booktitle>Proceedings of the 16th Nordic Conference of Computational Linguistics (<fixed-case>NODALIDA</fixed-case> 2007)</booktitle>
       <url hash="3974d324">W07-24</url>
       <editor><first>Joakim</first><last>Nivre</last></editor>
-      <editor><first>Heiki-Jaan</first><last>Kaalep</last></editor>
+      <editor id="heiki-jaan-kaalep"><first>Heiki-Jaan</first><last>Kaalep</last></editor>
       <editor><first>Kadri</first><last>Muischnek</last></editor>
-      <editor><first>Mare</first><last>Koit</last></editor>
+      <editor id="mare-koit"><first>Mare</first><last>Koit</last></editor>
       <publisher>University of Tartu, Estonia</publisher>
       <address>Tartu, Estonia</address>
       <month>May</month>
@@ -3411,14 +3411,14 @@
     </frontmatter>
     <paper id="1">
       <title>Invited talk: Evaluating Automatic Approaches for Word Meaning Discovery and Disambiguation using Lexical Substitution</title>
-      <author><first>Diana F.</first><last>McCarthy</last></author>
+      <author id="diana-mccarthy"><first>Diana F.</first><last>McCarthy</last></author>
       <pages>2–2</pages>
       <url hash="5432bd01">W07-2401</url>
       <bibkey>mccarthy-2007-invited</bibkey>
     </paper>
     <paper id="2">
       <title>Invited talk: Text Analysis and Machine Learning for Stylometrics and Stylogenetics</title>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>3–3</pages>
       <url hash="639ee702">W07-2402</url>
       <bibkey>daelemans-2007-invited</bibkey>
@@ -3432,17 +3432,17 @@
     </paper>
     <paper id="4">
       <title>Dependency-Based Hybrid Model of Syntactic Analysis for the Languages with a Rather Free Word Order</title>
-      <author><first>Guntis</first><last>Bārzdiņš</last></author>
-      <author><first>Normunds</first><last>Grūzītis</last></author>
-      <author><first>Gunta</first><last>Nešpore</last></author>
-      <author><first>Baiba</first><last>Saulīte</last></author>
+      <author id="guntis-barzdins"><first>Guntis</first><last>Bārzdiņš</last></author>
+      <author id="normunds-gruzitis"><first>Normunds</first><last>Grūzītis</last></author>
+      <author id="gunta-nespore"><first>Gunta</first><last>Nešpore</last></author>
+      <author id="baiba-saulite"><first>Baiba</first><last>Saulīte</last></author>
       <pages>13–20</pages>
       <url hash="6aec0709">W07-2404</url>
       <bibkey>barzdins-etal-2007-dependency</bibkey>
     </paper>
     <paper id="5">
       <title>Using <fixed-case>D</fixed-case>anish as a <fixed-case>CG</fixed-case> Interlingua: A Wide-Coverage <fixed-case>N</fixed-case>orwegian-<fixed-case>E</fixed-case>nglish Machine Translation System</title>
-      <author><first>Eckhard</first><last>Bick</last></author>
+      <author id="eckhard-bick"><first>Eckhard</first><last>Bick</last></author>
       <author><first>Lars</first><last>Nygaard</last></author>
       <pages>21–28</pages>
       <url hash="71f184e5">W07-2405</url>
@@ -3450,9 +3450,9 @@
     </paper>
     <paper id="6">
       <title>An Advanced Speech Corpus for <fixed-case>N</fixed-case>orwegian</title>
-      <author><first>Janne Bondi</first><last>Johannessen</last></author>
+      <author id="janne-bondi-johannessen"><first>Janne Bondi</first><last>Johannessen</last></author>
       <author><first>Kristin</first><last>Hagen</last></author>
-      <author><first>Joel James</first><last>Priestley</last></author>
+      <author id="joel-priestley"><first>Joel James</first><last>Priestley</last></author>
       <author><first>Lars</first><last>Nygaard</last></author>
       <pages>29–36</pages>
       <url hash="f619b927">W07-2406</url>
@@ -3493,7 +3493,7 @@
     <paper id="11">
       <title>Development of Text-To-Speech system for <fixed-case>L</fixed-case>atvian</title>
       <author><first>Kārlis</first><last>Goba</last></author>
-      <author><first>Andrejs</first><last>Vasiļjevs</last></author>
+      <author id="andrejs-vasiljevs"><first>Andrejs</first><last>Vasiļjevs</last></author>
       <pages>67–72</pages>
       <url hash="37d58d67">W07-2411</url>
       <bibkey>goba-vasiljevs-2007-development</bibkey>
@@ -3548,7 +3548,7 @@
     <paper id="18">
       <title>The Extraction of Trajectories from Real Texts Based on Linear Classification</title>
       <author><first>Hanjing</first><last>Li</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <author><first>Sheng</first><last>Li</last></author>
       <author><first>Jiyuan</first><last>Zhao</last></author>
       <pages>121–127</pages>
@@ -3558,14 +3558,14 @@
     <paper id="19">
       <title><fixed-case>I</fixed-case>ce<fixed-case>P</fixed-case>arser: An Incremental Finite-State Parser for <fixed-case>I</fixed-case>celandic</title>
       <author><first>Hrafn</first><last>Loftsson</last></author>
-      <author><first>Eiríkur</first><last>Rögnvaldsson</last></author>
+      <author id="eirikur-rognvaldsson"><first>Eiríkur</first><last>Rögnvaldsson</last></author>
       <pages>128–135</pages>
       <url hash="a7b87e71">W07-2419</url>
       <bibkey>loftsson-rognvaldsson-2007-iceparser</bibkey>
     </paper>
     <paper id="20">
       <title>The <fixed-case>S</fixed-case>wedish-<fixed-case>T</fixed-case>urkish Parallel Corpus and Tools for its Creation</title>
-      <author><first>Beata</first><last>Megyesi</last></author>
+      <author id="beata-megyesi"><first>Beata</first><last>Megyesi</last></author>
       <author><first>Bengt</first><last>Dahlqvist</last></author>
       <pages>136–143</pages>
       <url hash="e24f711d">W07-2420</url>
@@ -3573,8 +3573,8 @@
     </paper>
     <paper id="21">
       <title>Multivariate Cepstral Feature Compensation on Band-limited Data for Robust Speech Recognition</title>
-      <author><first>Nicolas</first><last>Morales</last></author>
-      <author><first>Doroteo T.</first><last>Toledano</last></author>
+      <author id="nicolas-morales"><first>Nicolas</first><last>Morales</last></author>
+      <author id="doroteo-t-toledano"><first>Doroteo T.</first><last>Toledano</last></author>
       <author><first>John H. L.</first><last>Hansen</last></author>
       <author><first>Javier</first><last>Garrido</last></author>
       <pages>144–151</pages>
@@ -3584,7 +3584,7 @@
     <paper id="22">
       <title>Theoretically Motivated Treebank Coverage</title>
       <author><first>Victoria</first><last>Rosén</last></author>
-      <author><first>Koenraad</first><last>de Smedt</last></author>
+      <author id="koenraad-de-smedt"><first>Koenraad</first><last>de Smedt</last></author>
       <pages>152–159</pages>
       <url hash="cc856009">W07-2422</url>
       <bibkey>rosen-de-smedt-2007-theoretically</bibkey>
@@ -3602,8 +3602,8 @@
     </paper>
     <paper id="24">
       <title>Comprehension Assistant for Languages of <fixed-case>B</fixed-case>altic States</title>
-      <author><first>Inguna</first><last>Skadiņa</last></author>
-      <author><first>Andrejs</first><last>Vasiļjevs</last></author>
+      <author id="inguna-skadina"><first>Inguna</first><last>Skadiņa</last></author>
+      <author id="andrejs-vasiljevs"><first>Andrejs</first><last>Vasiļjevs</last></author>
       <author><first>Daiga</first><last>Deksne</last></author>
       <author><first>Raivis</first><last>Skadiņš</last></author>
       <author><first>Linda</first><last>Goldberga</last></author>
@@ -3614,7 +3614,7 @@
     <paper id="25">
       <title>Combining Contexts in Lexicon Learning for Semantic Parsing</title>
       <author><first>Richard</first><last>Socher</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <author><first>Rainer</first><last>Osswald</last></author>
       <pages>175–182</pages>
       <url hash="563b3560">W07-2425</url>
@@ -3622,7 +3622,7 @@
     </paper>
     <paper id="26">
       <title>Polynomial Charts For Totally Unordered Languages</title>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>183–190</pages>
       <url hash="cdfdc507">W07-2426</url>
       <bibkey>sogaard-2007-polynomial</bibkey>
@@ -3639,7 +3639,7 @@
       <title>Interview and Delivery: Dialogue Strategies for Conversational Recommender Systems</title>
       <author><first>Pontus</first><last>Wärnestål</last></author>
       <author><first>Lars</first><last>Degerstedt</last></author>
-      <author><first>Arne</first><last>Jönsson</last></author>
+      <author id="arne-jonsson"><first>Arne</first><last>Jönsson</last></author>
       <pages>199–205</pages>
       <url hash="153e4f9d">W07-2428</url>
       <bibkey>warnestal-etal-2007-interview</bibkey>
@@ -3668,7 +3668,7 @@
     </paper>
     <paper id="32">
       <title>Decomposing <fixed-case>S</fixed-case>wedish Compounds Using Memory-Based Learning</title>
-      <author><first>Karin</first><last>Friberg Heppin</last></author>
+      <author id="karin-friberg-heppin"><first>Karin</first><last>Friberg Heppin</last></author>
       <pages>224–230</pages>
       <url hash="6497a366">W07-2432</url>
       <bibkey>friberg-heppin-2007-decomposing</bibkey>
@@ -3739,8 +3739,8 @@
     </paper>
     <paper id="42">
       <title>Posterior Probability Based Confidence Measures Applied to a Children’s Speech Reading Tracking System</title>
-      <author><first>Daniel</first><last>Bolanos</last></author>
-      <author><first>Wayne H.</first><last>Ward</last></author>
+      <author id="daniel-bolanos"><first>Daniel</first><last>Bolanos</last></author>
+      <author id="wayne-ward"><first>Wayne H.</first><last>Ward</last></author>
       <pages>274–277</pages>
       <url hash="ae379f19">W07-2442</url>
       <bibkey>bolanos-ward-2007-posterior</bibkey>
@@ -3767,7 +3767,7 @@
       <title>Íslenskur Orðasjóður – Building a Large <fixed-case>I</fixed-case>celandic Corpus</title>
       <author><first>Erla</first><last>Hallsteinsdóttir</last></author>
       <author><first>Thomas</first><last>Eckart</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <author><first>Uwe</first><last>Quasthoff</last></author>
       <author><first>Matthias</first><last>Richter</last></author>
       <pages>288–291</pages>
@@ -3793,7 +3793,7 @@
     </paper>
     <paper id="48">
       <title>A <fixed-case>N</fixed-case>orwegian Letter-to-Sound Engine with <fixed-case>D</fixed-case>anish as a Catalyst</title>
-      <author><first>Peter Juel</first><last>Henrichsen</last></author>
+      <author id="peter-juel-henrichsen"><first>Peter Juel</first><last>Henrichsen</last></author>
       <pages>305–309</pages>
       <url hash="27992369">W07-2448</url>
       <bibkey>henrichsen-2007-norwegian</bibkey>
@@ -3832,7 +3832,7 @@
     <paper id="53">
       <title>Lexical Parameters, Based on Corpus Analysis of <fixed-case>E</fixed-case>nglish and <fixed-case>S</fixed-case>wedish Cancer Data, of Relevance for <fixed-case>NLG</fixed-case></title>
       <author><first>Dimitrios</first><last>Kokkinakis</last></author>
-      <author><first>Maria</first><last>Toporowska Gronostaj</last></author>
+      <author id="maria-toporowska-gronostaj"><first>Maria</first><last>Toporowska Gronostaj</last></author>
       <author><first>Catalina</first><last>Hallett</last></author>
       <author><first>David</first><last>Hardcastle</last></author>
       <pages>333–336</pages>
@@ -3889,7 +3889,7 @@
     <paper id="60">
       <title>Evaluating Evaluation Measures</title>
       <author><first>Ines</first><last>Rehbein</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>372–379</pages>
       <url hash="193beac9">W07-2460</url>
       <bibkey>rehbein-van-genabith-2007-evaluating</bibkey>
@@ -3915,7 +3915,7 @@
     <paper id="63">
       <title>Recreating Humorous Split Compound Errors in <fixed-case>S</fixed-case>wedish by Using Grammaticality</title>
       <author><first>Jonas</first><last>Sjöbergh</last></author>
-      <author><first>Kenji</first><last>Araki</last></author>
+      <author id="kenji-araki"><first>Kenji</first><last>Araki</last></author>
       <pages>389–393</pages>
       <url hash="ea086453">W07-2463</url>
       <bibkey>sjobergh-araki-2007-recreating</bibkey>
@@ -3930,7 +3930,7 @@
     <paper id="65">
       <title>Interpretation of Yes/No Questions as Metaphor Recognition</title>
       <author><first>Tarmo</first><last>Truu</last></author>
-      <author><first>Haldur</first><last>Õim</last></author>
+      <author id="haldur-oim"><first>Haldur</first><last>Õim</last></author>
       <author><first>Mare</first><last>Koit</last></author>
       <pages>398–401</pages>
       <url hash="357c9073">W07-2465</url>
diff --git a/data/xml/W08.xml b/data/xml/W08.xml
index bdb0037d16..b779be3d60 100644
--- a/data/xml/W08.xml
+++ b/data/xml/W08.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the 9th <fixed-case>SIG</fixed-case>dial Workshop on Discourse and Dialogue</booktitle>
       <url hash="ae485fe6">W08-01</url>
       <editor><first>David</first><last>Schlangen</last></editor>
-      <editor><first>Beth Ann</first><last>Hockey</last></editor>
+      <editor id="beth-ann-hockey"><first>Beth Ann</first><last>Hockey</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Columbus, Ohio</address>
       <month>June</month>
@@ -33,10 +33,10 @@
     </paper>
     <paper id="3">
       <title>Learning N-Best Correction Models from Implicit User Feedback in a Multi-Modal Local Search Application</title>
-      <author><first>Dan</first><last>Bohus</last></author>
+      <author id="dan-bohus"><first>Dan</first><last>Bohus</last></author>
       <author><first>Xiao</first><last>Li</last></author>
       <author><first>Patrick</first><last>Nguyen</last></author>
-      <author><first>Geoffrey</first><last>Zweig</last></author>
+      <author id="geoffrey-zweig"><first>Geoffrey</first><last>Zweig</last></author>
       <pages>21–28</pages>
       <url hash="cc0c8088">W08-0103</url>
       <bibkey>bohus-etal-2008-learning</bibkey>
@@ -44,7 +44,7 @@
     <paper id="4">
       <title>Agreement and Disputes in Dialogue</title>
       <author><first>Alex</first><last>Lascarides</last></author>
-      <author><first>Nicholas</first><last>Asher</last></author>
+      <author id="nicholas-asher"><first>Nicholas</first><last>Asher</last></author>
       <pages>29–36</pages>
       <url hash="9d624e0c">W08-0104</url>
       <bibkey>lascarides-asher-2008-agreement</bibkey>
@@ -61,7 +61,7 @@
     <paper id="6">
       <title>Semantic negotiation in dialogue: the mechanisms of alignment</title>
       <author><first>Gregory</first><last>Mills</last></author>
-      <author><first>Pat</first><last>Healey</last></author>
+      <author id="patrick-healey"><first>Pat</first><last>Healey</last></author>
       <pages>46–53</pages>
       <url hash="9ce6dcac">W08-0106</url>
       <bibkey>mills-healey-2008-semantic</bibkey>
@@ -69,7 +69,7 @@
     <paper id="7">
       <title>Degrees of Grounding Based on Evidence of Understanding</title>
       <author><first>Antonio</first><last>Roque</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <pages>54–63</pages>
       <url hash="e6945e78">W08-0107</url>
       <bibkey>roque-traum-2008-degrees</bibkey>
@@ -143,7 +143,7 @@
       <title>Quantifying Ellipsis in Dialogue: an index of mutual understanding</title>
       <author><first>Marcus</first><last>Colman</last></author>
       <author><first>Arash</first><last>Eshghi</last></author>
-      <author><first>Pat</first><last>Healey</last></author>
+      <author id="patrick-healey"><first>Pat</first><last>Healey</last></author>
       <pages>96–99</pages>
       <url hash="8fe9b24d">W08-0116</url>
       <bibkey>colman-etal-2008-quantifying</bibkey>
@@ -154,7 +154,7 @@
       <author><first>Yosuke</first><last>Matsusaka</last></author>
       <author><first>Yasuharu</first><last>Den</last></author>
       <author><first>Mika</first><last>Enomoto</last></author>
-      <author><first>Masato</first><last>Ishizaki</last></author>
+      <author id="masato-ishizaki"><first>Masato</first><last>Ishizaki</last></author>
       <author><first>Katsuya</first><last>Takanashi</last></author>
       <pages>100–103</pages>
       <url hash="fcc715c0">W08-0117</url>
@@ -164,20 +164,20 @@
       <title>Optimal Dialog in Consumer-Rating Systems using <fixed-case>POMDP</fixed-case> Framework</title>
       <author><first>Zhifei</first><last>Li</last></author>
       <author><first>Patrick</first><last>Nguyen</last></author>
-      <author><first>Geoffrey</first><last>Zweig</last></author>
+      <author id="geoffrey-zweig"><first>Geoffrey</first><last>Zweig</last></author>
       <pages>104–111</pages>
       <url hash="39877e87">W08-0118</url>
       <bibkey>li-etal-2008-optimal</bibkey>
     </paper>
     <paper id="19">
       <title>Training and Evaluation of the <fixed-case>HIS</fixed-case> <fixed-case>POMDP</fixed-case> Dialogue System in Noise</title>
-      <author><first>Milica</first><last>Gašić</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gašić</last></author>
       <author><first>Simon</first><last>Keizer</last></author>
-      <author><first>Francois</first><last>Mairesse</last></author>
+      <author id="francois-mairesse"><first>Francois</first><last>Mairesse</last></author>
       <author><first>Jost</first><last>Schatzmann</last></author>
       <author><first>Blaise</first><last>Thomson</last></author>
       <author><first>Kai</first><last>Yu</last></author>
-      <author><first>Steve</first><last>Young</last></author>
+      <author id="steve-young"><first>Steve</first><last>Young</last></author>
       <pages>112–119</pages>
       <url hash="eb69c759">W08-0119</url>
       <bibkey>gasic-etal-2008-training</bibkey>
@@ -186,15 +186,15 @@
       <title>A Frame-Based Probabilistic Framework for Spoken Dialog Management Using Dialog Examples</title>
       <author><first>Kyungduk</first><last>Kim</last></author>
       <author><first>Cheongjae</first><last>Lee</last></author>
-      <author><first>Sangkeun</first><last>Jung</last></author>
-      <author><first>Gary Geunbae</first><last>Lee</last></author>
+      <author id="sangkeun-jung"><first>Sangkeun</first><last>Jung</last></author>
+      <author id="gary-geunbae-lee"><first>Gary Geunbae</first><last>Lee</last></author>
       <pages>120–127</pages>
       <url hash="67859284">W08-0120</url>
       <bibkey>kim-etal-2008-frame</bibkey>
     </paper>
     <paper id="21">
       <title>Speaking More Like You: Lexical, Acoustic/Prosodic, and Discourse Entrainment in Spoken Dialogue Systems</title>
-      <author><first>Julia</first><last>Hirschberg</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
       <pages>128</pages>
       <url hash="600348d2">W08-0121</url>
       <bibkey>hirschberg-2008-speaking</bibkey>
@@ -203,7 +203,7 @@
       <title>Discourse Level Opinion Relations: An Annotation Study</title>
       <author><first>Swapna</first><last>Somasundaran</last></author>
       <author><first>Josef</first><last>Ruppenhofer</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <pages>129–137</pages>
       <url hash="15cc9e09">W08-0122</url>
       <bibkey>somasundaran-etal-2008-discourse-level</bibkey>
@@ -220,7 +220,7 @@
     <paper id="24">
       <title>Modeling Vocal Interaction for Text-Independent Participant Characterization in Multi-Party Conversation</title>
       <author><first>Kornel</first><last>Laskowski</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
       <author><first>Tanja</first><last>Schultz</last></author>
       <pages>148–155</pages>
       <url hash="fa315b40">W08-0124</url>
@@ -228,7 +228,7 @@
     </paper>
     <paper id="25">
       <title>Modelling and Detecting Decisions in Multi-party Dialogue</title>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <author><first>Matthew</first><last>Frampton</last></author>
       <author><first>Patrick</first><last>Ehlen</last></author>
       <author><first>Matthew</first><last>Purver</last></author>
@@ -248,7 +248,7 @@
     <paper id="27">
       <title>Evaluation Understudy for Dialogue Coherence Models</title>
       <author><first>Sudeep</first><last>Gandhe</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <pages>172–181</pages>
       <url hash="10be45f7">W08-0127</url>
       <bibkey>gandhe-traum-2008-evaluation</bibkey>
@@ -256,14 +256,14 @@
     <paper id="28">
       <title>A Framework for Model-based Evaluation of Spoken Dialog Systems</title>
       <author><first>Sebastian</first><last>Möller</last></author>
-      <author><first>Nigel</first><last>Ward</last></author>
+      <author id="nigel-ward"><first>Nigel</first><last>Ward</last></author>
       <pages>182–189</pages>
       <url hash="9199b8b6">W08-0128</url>
       <bibkey>moller-ward-2008-framework</bibkey>
     </paper>
     <paper id="29">
       <title>The Effect of Dialogue System Output Style Variation on Users’ Evaluation Judgments and Input Style</title>
-      <author><first>Ivana</first><last>Kruijff-Korbayová</last></author>
+      <author id="ivana-kruijff-korbayova"><first>Ivana</first><last>Kruijff-Korbayová</last></author>
       <author><first>Olga</first><last>Kukina</last></author>
       <pages>190–197</pages>
       <url hash="7029bbfe">W08-0129</url>
@@ -272,7 +272,7 @@
     <paper id="30">
       <title>Making Grammar-Based Generation Easier to Deploy in Dialogue Systems</title>
       <author><first>David</first><last>DeVault</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <author><first>Ron</first><last>Artstein</last></author>
       <pages>198–207</pages>
       <url hash="b4bcca8c">W08-0130</url>
@@ -283,7 +283,7 @@
     <meta>
       <booktitle>Proceedings of the Third Workshop on Issues in Teaching Computational Linguistics</booktitle>
       <url hash="c348795b">W08-02</url>
-      <editor><first>Martha</first><last>Palmer</last></editor>
+      <editor id="martha-palmer"><first>Martha</first><last>Palmer</last></editor>
       <editor><first>Chris</first><last>Brew</last></editor>
       <editor><first>Fei</first><last>Xia</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -306,7 +306,7 @@
     </paper>
     <paper id="2">
       <title>Building a Flexible, Collaborative, Intensive Master’s Program in Computational Linguistics</title>
-      <author><first>Emily M.</first><last>Bender</last></author>
+      <author id="emily-m-bender"><first>Emily M.</first><last>Bender</last></author>
       <author><first>Fei</first><last>Xia</last></author>
       <author><first>Erik</first><last>Bansleben</last></author>
       <pages>10–18</pages>
@@ -329,7 +329,7 @@
     </paper>
     <paper id="5">
       <title>Strategies for Teaching “Mixed” Computational Linguistics Classes</title>
-      <author><first>Eric</first><last>Fosler-Lussier</last></author>
+      <author id="eric-fosler-lussier"><first>Eric</first><last>Fosler-Lussier</last></author>
       <pages>36–44</pages>
       <url hash="23ac118e">W08-0205</url>
       <bibkey>fosler-lussier-2008-strategies</bibkey>
@@ -361,14 +361,14 @@
     <paper id="9">
       <title>Combining Open-Source with Research to Re-engineer a Hands-on Introductory <fixed-case>NLP</fixed-case> Course</title>
       <author><first>Nitin</first><last>Madnani</last></author>
-      <author><first>Bonnie J.</first><last>Dorr</last></author>
+      <author id="bonnie-dorr"><first>Bonnie J.</first><last>Dorr</last></author>
       <pages>71–79</pages>
       <url hash="b08b7394">W08-0209</url>
       <bibkey>madnani-dorr-2008-combining</bibkey>
     </paper>
     <paper id="10">
       <title>Zero to Spoken Dialogue System in One Quarter: Teaching Computational Linguistics to Linguists Using Regulus</title>
-      <author><first>Beth Ann</first><last>Hockey</last></author>
+      <author id="beth-ann-hockey"><first>Beth Ann</first><last>Hockey</last></author>
       <author><first>Gwen</first><last>Christian</last></author>
       <pages>80–86</pages>
       <url hash="e801d0c8">W08-0210</url>
@@ -376,8 +376,8 @@
     </paper>
     <paper id="11">
       <title>The <fixed-case>N</fixed-case>orth <fixed-case>A</fixed-case>merican Computational Linguistics Olympiad (<fixed-case>NACLO</fixed-case>)</title>
-      <author><first>Dragomir R.</first><last>Radev</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
+      <author id="dragomir-radev"><first>Dragomir R.</first><last>Radev</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
       <author><first>Thomas E.</first><last>Payne</last></author>
       <pages>87–96</pages>
       <url hash="2d21d6a1">W08-0211</url>
@@ -385,15 +385,15 @@
     </paper>
     <paper id="12">
       <title>Competitive Grammar Writing</title>
-      <author><first>Jason</first><last>Eisner</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>97–105</pages>
       <url hash="845e0690">W08-0212</url>
       <bibkey>eisner-smith-2008-competitive</bibkey>
     </paper>
     <paper id="13">
       <title>Studying Discourse and Dialogue with <fixed-case>SIDG</fixed-case>rid</title>
-      <author><first>Gina-Anne</first><last>Levow</last></author>
+      <author id="gina-anne-levow"><first>Gina-Anne</first><last>Levow</last></author>
       <pages>106–113</pages>
       <url hash="ee180c53">W08-0213</url>
       <bibkey>levow-2008-studying</bibkey>
@@ -428,7 +428,7 @@
       <editor><first>Philipp</first><last>Koehn</last></editor>
       <editor><first>Christof</first><last>Monz</last></editor>
       <editor><first>Josh</first><last>Schroeder</last></editor>
-      <editor><first>Cameron Shaw</first><last>Fordyce</last></editor>
+      <editor id="cameron-shaw-fordyce"><first>Cameron Shaw</first><last>Fordyce</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Columbus, Ohio</address>
       <month>June</month>
@@ -453,7 +453,7 @@
     <paper id="2">
       <title>Rich Source-Side Context for Statistical Machine Translation</title>
       <author><first>Kevin</first><last>Gimpel</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>9–17</pages>
       <url hash="639f6258">W08-0302</url>
       <bibkey>gimpel-smith-2008-rich</bibkey>
@@ -461,7 +461,7 @@
     <paper id="3">
       <title>Discriminative Word Alignment via Alignment Matrix Modeling</title>
       <author><first>Jan</first><last>Niehues</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>18–25</pages>
       <url hash="0f9c9124">W08-0303</url>
       <bibkey>niehues-vogel-2008-discriminative</bibkey>
@@ -469,8 +469,8 @@
     <paper id="4">
       <title>Regularization and Search for Minimum Error Rate Training</title>
       <author><first>Daniel</first><last>Cer</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>26–34</pages>
       <url hash="5f329f81">W08-0304</url>
       <bibkey>cer-etal-2008-regularization</bibkey>
@@ -486,16 +486,16 @@
     </paper>
     <paper id="6">
       <title>Using Syntax to Improve Word Alignment Precision for Syntax-Based Machine Translation</title>
-      <author><first>Victoria</first><last>Fossum</last></author>
+      <author id="victoria-fossum"><first>Victoria</first><last>Fossum</last></author>
       <author><first>Kevin</first><last>Knight</last></author>
-      <author><first>Steven</first><last>Abney</last></author>
+      <author id="steven-abney"><first>Steven</first><last>Abney</last></author>
       <pages>44–52</pages>
       <url hash="810f1dc7">W08-0306</url>
       <bibkey>fossum-etal-2008-using</bibkey>
     </paper>
     <paper id="7">
       <title>Using Shallow Syntax Information to Improve Word Alignment and Reordering for <fixed-case>SMT</fixed-case></title>
-      <author><first>Josep M.</first><last>Crego</last></author>
+      <author id="josep-m-crego"><first>Josep M.</first><last>Crego</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
       <pages>53–61</pages>
       <url hash="ccce0c63">W08-0307</url>
@@ -522,13 +522,13 @@
     </paper>
     <paper id="10">
       <title>Limsi’s Statistical Translation Systems for <fixed-case>WMT</fixed-case>‘08</title>
-      <author><first>Daniel</first><last>Déchelotte</last></author>
-      <author><first>Gilles</first><last>Adda</last></author>
+      <author id="daniel-dechelotte"><first>Daniel</first><last>Déchelotte</last></author>
+      <author id="gilles-adda"><first>Gilles</first><last>Adda</last></author>
       <author><first>Alexandre</first><last>Allauzen</last></author>
-      <author><first>Hélène</first><last>Bonneau-Maynard</last></author>
+      <author id="helene-bonneau-maynard"><first>Hélène</first><last>Bonneau-Maynard</last></author>
       <author><first>Olivier</first><last>Galibert</last></author>
       <author><first>Jean-Luc</first><last>Gauvain</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <author><first>François</first><last>Yvon</last></author>
       <pages>107–110</pages>
       <url hash="23e5395b">W08-0310</url>
@@ -537,8 +537,8 @@
     <paper id="11">
       <title>The <fixed-case>M</fixed-case>eta<fixed-case>M</fixed-case>orpho Translation System</title>
       <author><first>Attila</first><last>Novák</last></author>
-      <author><first>László</first><last>Tihanyi</last></author>
-      <author><first>Gábor</first><last>Prószéky</last></author>
+      <author id="laszlo-tihanyi"><first>László</first><last>Tihanyi</last></author>
+      <author id="gabor-proszeky"><first>Gábor</first><last>Prószéky</last></author>
       <pages>111–114</pages>
       <url hash="326a1cbb">W08-0311</url>
       <bibkey>novak-etal-2008-metamorpho</bibkey>
@@ -546,7 +546,7 @@
     <paper id="12">
       <title>Meteor, <fixed-case>M</fixed-case>-<fixed-case>BLEU</fixed-case> and <fixed-case>M</fixed-case>-<fixed-case>TER</fixed-case>: Evaluation Metrics for High-Correlation with Human Rankings of Machine Translation Output</title>
       <author><first>Abhaya</first><last>Agarwal</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <pages>115–118</pages>
       <url hash="f5db91b5">W08-0312</url>
       <bibkey>agarwal-lavie-2008-meteor</bibkey>
@@ -573,14 +573,14 @@
     <paper id="15">
       <title>The <fixed-case>TALP</fixed-case>-<fixed-case>UPC</fixed-case> <fixed-case>N</fixed-case>gram-Based Statistical Machine Translation System for <fixed-case>ACL</fixed-case>-<fixed-case>WMT</fixed-case> 2008</title>
       <author><first>Maxim</first><last>Khalilov</last></author>
-      <author><first>Adolfo</first><last>Hernández H.</last></author>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
-      <author><first>Josep M.</first><last>Crego</last></author>
-      <author><first>Carlos A.</first><last>Henríquez Q.</last></author>
+      <author id="adolfo-hernandez-h"><first>Adolfo</first><last>Hernández H.</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="josep-m-crego"><first>Josep M.</first><last>Crego</last></author>
+      <author id="carlos-henriquez"><first>Carlos A.</first><last>Henríquez Q.</last></author>
       <author><first>Patrik</first><last>Lambert</last></author>
-      <author><first>José A. R.</first><last>Fonollosa</last></author>
-      <author><first>José B.</first><last>Mariño</last></author>
-      <author><first>Rafael E.</first><last>Banchs</last></author>
+      <author id="jose-a-r-fonollosa"><first>José A. R.</first><last>Fonollosa</last></author>
+      <author id="jose-b-marino"><first>José B.</first><last>Mariño</last></author>
+      <author id="rafael-e-banchs"><first>Rafael E.</first><last>Banchs</last></author>
       <pages>127–130</pages>
       <url hash="646d3d62">W08-0315</url>
       <bibkey>khalilov-etal-2008-talp</bibkey>
@@ -588,7 +588,7 @@
     <paper id="16">
       <title><fixed-case>E</fixed-case>uropean Language Translation with Weighted Finite State Transducers: The <fixed-case>CUED</fixed-case> <fixed-case>MT</fixed-case> System for the 2008 <fixed-case>ACL</fixed-case> Workshop on <fixed-case>SMT</fixed-case></title>
       <author><first>Graeme</first><last>Blackwood</last></author>
-      <author><first>Adrià</first><last>de Gispert</last></author>
+      <author id="adria-de-gispert"><first>Adrià</first><last>de Gispert</last></author>
       <author><first>Jamie</first><last>Brunning</last></author>
       <author id="bill-byrne"><first>William</first><last>Byrne</last></author>
       <pages>131–134</pages>
@@ -615,15 +615,15 @@
     </paper>
     <paper id="19">
       <title>Phrase-Based and Deep Syntactic <fixed-case>E</fixed-case>nglish-to-<fixed-case>C</fixed-case>zech Statistical Machine Translation</title>
-      <author><first>Ondřej</first><last>Bojar</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
       <pages>143–146</pages>
       <url hash="11814c96">W08-0319</url>
       <bibkey>bojar-hajic-2008-phrase</bibkey>
     </paper>
     <paper id="20">
       <title>Improving <fixed-case>E</fixed-case>nglish-<fixed-case>S</fixed-case>panish Statistical Machine Translation: Experiments in Domain Adaptation, Sentence Paraphrasing, Tokenization, and Recasing</title>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>147–150</pages>
       <url hash="451fd6bd">W08-0320</url>
       <bibkey>nakov-2008-improving</bibkey>
@@ -632,7 +632,7 @@
       <title>Improving Word Alignment with Language Model Based Confidence Scores</title>
       <author><first>Nguyen</first><last>Bach</last></author>
       <author><first>Qin</first><last>Gao</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>151–154</pages>
       <url hash="61c087be">W08-0321</url>
       <bibkey>bach-etal-2008-improving</bibkey>
@@ -661,15 +661,15 @@
       <author><first>Vamshi</first><last>Ambati</last></author>
       <author><first>Alok</first><last>Parlikar</last></author>
       <author><first>Erik</first><last>Peterson</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <pages>163–166</pages>
       <url hash="5e42b22d">W08-0324</url>
       <bibkey>hanneman-etal-2008-statistical</bibkey>
     </paper>
     <paper id="25">
       <title><fixed-case>T</fixed-case>ecto<fixed-case>MT</fixed-case>: Highly Modular <fixed-case>MT</fixed-case> System with Tectogrammatics Used as Transfer Layer</title>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
-      <author><first>Jan</first><last>Ptáček</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="jan-ptacek"><first>Jan</first><last>Ptáček</last></author>
       <author><first>Petr</first><last>Pajas</last></author>
       <pages>167–170</pages>
       <url hash="58b5840f">W08-0325</url>
@@ -687,7 +687,7 @@
     </paper>
     <paper id="27">
       <title>Can we Relearn an <fixed-case>RBMT</fixed-case> System?</title>
-      <author><first>Loïc</first><last>Dugast</last></author>
+      <author id="loic-dugast"><first>Loïc</first><last>Dugast</last></author>
       <author><first>Jean</first><last>Senellart</last></author>
       <author><first>Philipp</first><last>Koehn</last></author>
       <pages>175–178</pages>
@@ -696,9 +696,9 @@
     </paper>
     <paper id="28">
       <title>Using <fixed-case>M</fixed-case>oses to Integrate Multiple Rule-Based Machine Translation Engines into a Hybrid System</title>
-      <author><first>Andreas</first><last>Eisele</last></author>
+      <author id="andreas-eisele"><first>Andreas</first><last>Eisele</last></author>
       <author><first>Christian</first><last>Federmann</last></author>
-      <author><first>Hervé</first><last>Saint-Amand</last></author>
+      <author id="herve-saint-amand"><first>Hervé</first><last>Saint-Amand</last></author>
       <author><first>Michael</first><last>Jellinghaus</last></author>
       <author><first>Teresa</first><last>Herrmann</last></author>
       <author><first>Yu</first><last>Chen</last></author>
@@ -708,10 +708,10 @@
     </paper>
     <paper id="29">
       <title>Incremental Hypothesis Alignment for Building Confusion Networks with Application to Machine Translation System Combination</title>
-      <author><first>Antti-Veikko</first><last>Rosti</last></author>
+      <author id="antti-veikko-rosti"><first>Antti-Veikko</first><last>Rosti</last></author>
       <author><first>Bing</first><last>Zhang</last></author>
       <author><first>Spyros</first><last>Matsoukas</last></author>
-      <author><first>Richard</first><last>Schwartz</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
       <pages>183–186</pages>
       <url hash="9dd0312a">W08-0329</url>
       <bibkey>rosti-etal-2008-incremental</bibkey>
@@ -733,15 +733,15 @@
     </paper>
     <paper id="32">
       <title>A Smorgasbord of Features for Automatic <fixed-case>MT</fixed-case> Evaluation</title>
-      <author><first>Jesús</first><last>Giménez</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="jesus-gimenez"><first>Jesús</first><last>Giménez</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <pages>195–198</pages>
       <url hash="65ab33c2">W08-0332</url>
       <bibkey>gimenez-marquez-2008-smorgasbord</bibkey>
     </paper>
     <paper id="33">
       <title>Fast, Easy, and Cheap: Construction of Statistical Machine Translation Models with <fixed-case>M</fixed-case>ap<fixed-case>R</fixed-case>educe</title>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <author><first>Aaron</first><last>Cordova</last></author>
       <author><first>Alex</first><last>Mont</last></author>
       <author><first>Jimmy</first><last>Lin</last></author>
@@ -752,7 +752,7 @@
     <paper id="34">
       <title>Dynamic Model Interpolation for Statistical Machine Translation</title>
       <author><first>Andrew</first><last>Finch</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>208–215</pages>
       <url hash="6f75a044">W08-0334</url>
       <bibkey>finch-sumita-2008-dynamic</bibkey>
@@ -761,16 +761,16 @@
       <title>Improved Statistical Machine Translation by Multiple <fixed-case>C</fixed-case>hinese Word Segmentation</title>
       <author><first>Ruiqiang</first><last>Zhang</last></author>
       <author><first>Keiji</first><last>Yasuda</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>216–223</pages>
       <url hash="1a330f85">W08-0335</url>
       <bibkey>zhang-etal-2008-improved</bibkey>
     </paper>
     <paper id="36">
       <title>Optimizing <fixed-case>C</fixed-case>hinese Word Segmentation for Machine Translation Performance</title>
-      <author><first>Pi-Chuan</first><last>Chang</last></author>
+      <author id="pi-chuan-chang"><first>Pi-Chuan</first><last>Chang</last></author>
       <author><first>Michel</first><last>Galley</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>224–232</pages>
       <url hash="b5d52af6">W08-0336</url>
       <bibkey>chang-etal-2008-optimizing</bibkey>
@@ -794,9 +794,9 @@
     </frontmatter>
     <paper id="1">
       <title>Imposing Constraints from the Source Tree on <fixed-case>ITG</fixed-case> Constraints for <fixed-case>SMT</fixed-case></title>
-      <author><first>Hirofumi</first><last>Yamamoto</last></author>
+      <author id="hirofumi-yamamoto"><first>Hirofumi</first><last>Yamamoto</last></author>
       <author><first>Hideo</first><last>Okuma</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>1–9</pages>
       <url hash="017d54ad">W08-0401</url>
       <bibkey>yamamoto-etal-2008-imposing</bibkey>
@@ -804,7 +804,7 @@
     <paper id="2">
       <title>A Scalable Decoder for Parsing-Based Machine Translation with Equivalent Language Model State Maintenance</title>
       <author><first>Zhifei</first><last>Li</last></author>
-      <author><first>Sanjeev</first><last>Khudanpur</last></author>
+      <author id="sanjeev-khudanpur"><first>Sanjeev</first><last>Khudanpur</last></author>
       <pages>10–18</pages>
       <url hash="74ab495f">W08-0402</url>
       <bibkey>li-khudanpur-2008-scalable</bibkey>
@@ -814,7 +814,7 @@
       <author><first>Bowen</first><last>Zhou</last></author>
       <author><first>Bing</first><last>Xiang</last></author>
       <author><first>Xiaodan</first><last>Zhu</last></author>
-      <author><first>Yuqing</first><last>Gao</last></author>
+      <author id="yuqing-guo"><first>Yuqing</first><last>Gao</last></author>
       <pages>19–27</pages>
       <url hash="83e40fc0">W08-0403</url>
       <bibkey>zhou-etal-2008-prior</bibkey>
@@ -828,7 +828,7 @@
     </paper>
     <paper id="5">
       <title>A Rule-Driven Dynamic Programming Decoder for Statistical <fixed-case>MT</fixed-case></title>
-      <author><first>Christoph</first><last>Tillmann</last></author>
+      <author id="christoph-tillmann"><first>Christoph</first><last>Tillmann</last></author>
       <pages>37–45</pages>
       <url hash="ab1d5d50">W08-0405</url>
       <bibkey>tillmann-2008-rule</bibkey>
@@ -851,7 +851,7 @@
     <paper id="8">
       <title>Multiple Reorderings in Phrase-Based Machine Translation</title>
       <author><first>Niyu</first><last>Ge</last></author>
-      <author><first>Abe</first><last>Ittycheriah</last></author>
+      <author id="abe-ittycheriah"><first>Abe</first><last>Ittycheriah</last></author>
       <author><first>Kishore</first><last>Papineni</last></author>
       <pages>61–68</pages>
       <url hash="dc29037c">W08-0408</url>
@@ -869,16 +869,16 @@
     </paper>
     <paper id="10">
       <title>Inductive Detection of Language Features via Clustering Minimal Pairs: Toward Feature-Rich Grammars in Machine Translation</title>
-      <author><first>Jonathan H.</first><last>Clark</last></author>
-      <author><first>Robert</first><last>Frederking</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
+      <author id="jonathan-h-clark"><first>Jonathan H.</first><last>Clark</last></author>
+      <author id="robert-frederking"><first>Robert</first><last>Frederking</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
       <pages>78–86</pages>
       <url hash="56a0b70a">W08-0410</url>
       <bibkey>clark-etal-2008-inductive</bibkey>
     </paper>
     <paper id="11">
       <title>Syntax-Driven Learning of Sub-Sentential Translation Equivalents and Translation Rules from Parsed Parallel Corpora</title>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <author><first>Alok</first><last>Parlikar</last></author>
       <author><first>Vamshi</first><last>Ambati</last></author>
       <pages>87–95</pages>
@@ -890,7 +890,7 @@
     <meta>
       <booktitle>Software Engineering, Testing, and Quality Assurance for Natural Language Processing</booktitle>
       <url hash="6ace7814">W08-05</url>
-      <editor><first>K. Bretonnel</first><last>Cohen</last></editor>
+      <editor id="k-bretonnel-cohen"><first>K. Bretonnel</first><last>Cohen</last></editor>
       <editor><first>Bob</first><last>Carpenter</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Columbus, Ohio</address>
@@ -913,14 +913,14 @@
     <paper id="2">
       <title>Type-checking in Formally Non-typed Systems</title>
       <author><first>Dick</first><last>Crouch</last></author>
-      <author><first>Tracy Holloway</first><last>King</last></author>
+      <author id="tracy-holloway-king"><first>Tracy Holloway</first><last>King</last></author>
       <pages>3–4</pages>
       <url hash="35bebe43">W08-0502</url>
       <bibkey>crouch-king-2008-type</bibkey>
     </paper>
     <paper id="3">
       <title>zymake: A Computational Workflow System for Machine Learning and Natural Language Processing</title>
-      <author><first>Eric</first><last>Breck</last></author>
+      <author id="eric-breck"><first>Eric</first><last>Breck</last></author>
       <pages>5–13</pages>
       <url hash="b44bf809">W08-0503</url>
       <bibkey>breck-2008-zymake</bibkey>
@@ -929,8 +929,8 @@
       <title>Evaluating the Effects of Treebank Size in a Practical Application for Parsing</title>
       <author><first>Kenji</first><last>Sagae</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
-      <author><first>Rune</first><last>Saetre</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="rune-saetre"><first>Rune</first><last>Saetre</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>14–20</pages>
       <url hash="7c149743">W08-0504</url>
       <bibkey>sagae-etal-2008-evaluating</bibkey>
@@ -945,8 +945,8 @@
     <paper id="6">
       <title>Software Testing and the Naturally Occurring Data Assumption in Natural Language Processing</title>
       <author><first>K. Bretonnel</first><last>Cohen</last></author>
-      <author><first>William A.</first><last>Baumgartner Jr.</last></author>
-      <author><first>Lawrence</first><last>Hunter</last></author>
+      <author id="william-a-baumgartner-jr"><first>William A.</first><last>Baumgartner Jr.</last></author>
+      <author id="lawrence-hunter"><first>Lawrence</first><last>Hunter</last></author>
       <pages>23–30</pages>
       <url hash="5d389371">W08-0506</url>
       <bibkey>cohen-etal-2008-software</bibkey>
@@ -971,7 +971,7 @@
     <paper id="9">
       <title>Parallel Implementations of Word Alignment Tool</title>
       <author><first>Qin</first><last>Gao</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>49–57</pages>
       <url hash="5b8e6ff8">W08-0509</url>
       <bibkey>gao-vogel-2008-parallel</bibkey>
@@ -987,7 +987,7 @@
     <paper id="11">
       <title><fixed-case>B</fixed-case>uckwalter-based Lookup Tool as Language Resource for <fixed-case>A</fixed-case>rabic Language Learners</title>
       <author><first>Jeffrey</first><last>Micher</last></author>
-      <author><first>Clare</first><last>Voss</last></author>
+      <author id="clare-voss"><first>Clare</first><last>Voss</last></author>
       <pages>66–67</pages>
       <url hash="5df4eefd">W08-0511</url>
       <bibkey>micher-voss-2008-buckwalter</bibkey>
@@ -995,12 +995,12 @@
     <paper id="12">
       <title>Reengineering a Domain-Independent Framework for Spoken Dialogue Systems</title>
       <author><first>Filipe M.</first><last>Martins</last></author>
-      <author><first>Ana</first><last>Mendes</last></author>
+      <author id="ana-cristina-mendes"><first>Ana</first><last>Mendes</last></author>
       <author><first>Mácio Freitas</first><last>Viveiros</last></author>
       <author><first>Joana Paulo</first><last>Pardal</last></author>
       <author><first>Pedro</first><last>Arez</last></author>
-      <author><first>Nuno J.</first><last>Mamede</last></author>
-      <author><first>João Paulo</first><last>Neto</last></author>
+      <author id="nuno-mamede"><first>Nuno J.</first><last>Mamede</last></author>
+      <author id="joao-p-neto"><first>João Paulo</first><last>Neto</last></author>
       <pages>68–76</pages>
       <url hash="7470d9ce">W08-0512</url>
       <bibkey>martins-etal-2008-reengineering</bibkey>
@@ -1012,10 +1012,10 @@
       <url hash="71e0f3d0">W08-06</url>
       <editor><first>Dina</first><last>Demner-Fushman</last></editor>
       <editor><first>Sophia</first><last>Ananiadou</last></editor>
-      <editor><first>Kevin Bretonnel</first><last>Cohen</last></editor>
-      <editor><first>John</first><last>Pestian</last></editor>
-      <editor><first>Jun’ichi</first><last>Tsujii</last></editor>
-      <editor><first>Bonnie</first><last>Webber</last></editor>
+      <editor id="k-bretonnel-cohen"><first>Kevin Bretonnel</first><last>Cohen</last></editor>
+      <editor id="john-pestian"><first>John</first><last>Pestian</last></editor>
+      <editor id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></editor>
+      <editor id="bonnie-webber"><first>Bonnie</first><last>Webber</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Columbus, Ohio</address>
       <month>June</month>
@@ -1041,7 +1041,7 @@
     <paper id="2">
       <title>Extracting Clinical Relationships from Patient Narratives</title>
       <author><first>Angus</first><last>Roberts</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <author><first>Mark</first><last>Hepple</last></author>
       <pages>10–18</pages>
       <url hash="15362a7c">W08-0602</url>
@@ -1056,8 +1056,8 @@
     </paper>
     <paper id="4">
       <title>Mining the Biomedical Literature for Genic Information</title>
-      <author><first>Catalina O.</first><last>Tudor</last></author>
-      <author><first>K.</first><last>Vijay-Shanker</last></author>
+      <author id="catalina-oana-tudor"><first>Catalina O.</first><last>Tudor</last></author>
+      <author id="k-vijay-shanker"><first>K.</first><last>Vijay-Shanker</last></author>
       <author><first>Carl J.</first><last>Schmidt</last></author>
       <pages>28–29</pages>
       <url hash="e6a5d409">W08-0604</url>
@@ -1076,7 +1076,7 @@
       <title>The <fixed-case>B</fixed-case>io<fixed-case>S</fixed-case>cope corpus: annotation for negation, uncertainty and their scope in biomedical texts</title>
       <author><first>György</first><last>Szarvas</last></author>
       <author><first>Veronika</first><last>Vincze</last></author>
-      <author><first>Richárd</first><last>Farkas</last></author>
+      <author id="richard-farkas"><first>Richárd</first><last>Farkas</last></author>
       <author><first>János</first><last>Csirik</last></author>
       <pages>38–45</pages>
       <url hash="b1a2c937">W08-0606</url>
@@ -1092,7 +1092,7 @@
     </paper>
     <paper id="8">
       <title>Cascaded Classifiers for Confidence-Based Chemical Named Entity Recognition</title>
-      <author><first>Peter</first><last>Corbett</last></author>
+      <author id="peter-corbett"><first>Peter</first><last>Corbett</last></author>
       <author><first>Ann</first><last>Copestake</last></author>
       <pages>54–62</pages>
       <url hash="3a1a7440">W08-0608</url>
@@ -1102,7 +1102,7 @@
       <title>How to Make the Most of <fixed-case>NE</fixed-case> Dictionaries in Statistical <fixed-case>NER</fixed-case></title>
       <author><first>Yutaka</first><last>Sasaki</last></author>
       <author><first>Yoshimasa</first><last>Tsuruoka</last></author>
-      <author><first>John</first><last>McNaught</last></author>
+      <author id="john-mcnaught"><first>John</first><last>McNaught</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
       <pages>63–70</pages>
       <url hash="135b1491">W08-0609</url>
@@ -1120,15 +1120,15 @@
       <title>Knowledge Sources for Word Sense Disambiguation of Biomedical Text</title>
       <author><first>Mark</first><last>Stevenson</last></author>
       <author><first>Yinkun</first><last>Guo</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
-      <author><first>David</first><last>Martinez</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="david-martinez"><first>David</first><last>Martinez</last></author>
       <pages>80–87</pages>
       <url hash="f4174aff">W08-0611</url>
       <bibkey>stevenson-etal-2008-knowledge</bibkey>
     </paper>
     <paper id="12">
       <title>Automatic inference of indexing rules for <fixed-case>MEDLINE</fixed-case></title>
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <author><first>Sonya</first><last>Shooshan</last></author>
       <author><first>Vincent</first><last>Claveau</last></author>
       <pages>88–89</pages>
@@ -1155,10 +1155,10 @@
       <title>A Pilot Annotation to Investigate Discourse Connectivity in Biomedical Text</title>
       <author><first>Hong</first><last>Yu</last></author>
       <author><first>Nadya</first><last>Frid</last></author>
-      <author><first>Susan</first><last>McRoy</last></author>
+      <author id="susan-w-mcroy"><first>Susan</first><last>McRoy</last></author>
       <author><first>Rashmi</first><last>Prasad</last></author>
       <author><first>Alan</first><last>Lee</last></author>
-      <author><first>Aravind</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
       <pages>92–93</pages>
       <url hash="c0b766ee">W08-0614</url>
       <bibkey>yu-etal-2008-pilot</bibkey>
@@ -1167,7 +1167,7 @@
       <title>Conditional Random Fields and Support Vector Machines for Disorder Named Entity Recognition in Clinical Texts</title>
       <author><first>Dingcheng</first><last>Li</last></author>
       <author><first>Guergana</first><last>Savova</last></author>
-      <author><first>Karin</first><last>Kipper-Schuler</last></author>
+      <author id="karin-kipper"><first>Karin</first><last>Kipper-Schuler</last></author>
       <pages>94–95</pages>
       <url hash="d9e69cf1">W08-0615</url>
       <bibkey>li-etal-2008-conditional</bibkey>
@@ -1194,7 +1194,7 @@
     </paper>
     <paper id="18">
       <title>A preliminary approach to extract drugs by combining <fixed-case>UMLS</fixed-case> resources and <fixed-case>USAN</fixed-case> naming conventions</title>
-      <author><first>Isabel</first><last>Segura-Bedmar</last></author>
+      <author id="isabel-segura-bedmar"><first>Isabel</first><last>Segura-Bedmar</last></author>
       <author><first>Paloma</first><last>Martínez</last></author>
       <author><first>Doaa</first><last>Samy</last></author>
       <pages>100–101</pages>
@@ -1204,7 +1204,7 @@
     <paper id="19">
       <title>Mapping Clinical Notes to Medical Terminologies at Point of Care</title>
       <author><first>Yefeng</first><last>Wang</last></author>
-      <author><first>Jon</first><last>Patrick</last></author>
+      <author id="jon-patrick"><first>Jon</first><last>Patrick</last></author>
       <pages>102–103</pages>
       <url hash="8f601230">W08-0619</url>
       <bibkey>wang-patrick-2008-mapping</bibkey>
@@ -1212,16 +1212,16 @@
     <paper id="20">
       <title>An Approach to Reducing Annotation Costs for <fixed-case>B</fixed-case>io<fixed-case>NLP</fixed-case></title>
       <author><first>Michael</first><last>Bloodgood</last></author>
-      <author><first>K.</first><last>Vijay-Shanker</last></author>
+      <author id="k-vijay-shanker"><first>K.</first><last>Vijay-Shanker</last></author>
       <pages>104–105</pages>
       <url hash="9a27c1d3">W08-0620</url>
       <bibkey>bloodgood-vijay-shanker-2008-approach</bibkey>
     </paper>
     <paper id="21">
       <title>Temporal Annotation of Clinical Text</title>
-      <author><first>Danielle</first><last>Mowery</last></author>
+      <author id="danielle-l-mowery"><first>Danielle</first><last>Mowery</last></author>
       <author><first>Henk</first><last>Harkema</last></author>
-      <author><first>Wendy</first><last>Chapman</last></author>
+      <author id="wendy-chapman"><first>Wendy</first><last>Chapman</last></author>
       <pages>106–107</pages>
       <url hash="37b00cde">W08-0621</url>
       <bibkey>mowery-etal-2008-temporal</bibkey>
@@ -1230,7 +1230,7 @@
       <title><fixed-case>CBR</fixed-case>-Tagger: a case-based reasoning approach to the gene/protein mention problem</title>
       <author><first>Mariana</first><last>Neves</last></author>
       <author><first>Monica</first><last>Chagoyen</last></author>
-      <author><first>José María</first><last>Carazo</last></author>
+      <author id="jose-maria-carazo"><first>José María</first><last>Carazo</last></author>
       <author><first>Alberto</first><last>Pascual-Montano</last></author>
       <pages>108–109</pages>
       <url hash="f3955ab9">W08-0622</url>
@@ -1239,7 +1239,7 @@
     <paper id="23">
       <title>Textual Information for Predicting Functional Properties of the Genes</title>
       <author><first>Oana</first><last>Frunza</last></author>
-      <author><first>Diana</first><last>Inkpen</last></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last></author>
       <pages>110–111</pages>
       <url hash="46e7c3c6">W08-0623</url>
       <bibkey>frunza-inkpen-2008-textual</bibkey>
@@ -1249,7 +1249,7 @@
       <author><first>Pieter</first><last>van der Horn</last></author>
       <author><first>Bart</first><last>Bakker</last></author>
       <author><first>Gijs</first><last>Geleijnse</last></author>
-      <author><first>Jan</first><last>Korst</last></author>
+      <author id="jan-kors"><first>Jan</first><last>Korst</last></author>
       <author><first>Sergei</first><last>Kurkin</last></author>
       <pages>112–113</pages>
       <url hash="0fec0a4e">W08-0624</url>
@@ -1258,7 +1258,7 @@
     <paper id="25">
       <title>Statistical Term Profiling for Query Pattern Mining</title>
       <author><first>Paul</first><last>Buitelaar</last></author>
-      <author><first>Pinar</first><last>Oezden Wennerberg</last></author>
+      <author id="pinar-oezden-wennerberg"><first>Pinar</first><last>Oezden Wennerberg</last></author>
       <author><first>Sonja</first><last>Zillner</last></author>
       <pages>114–115</pages>
       <url hash="0b7419c7">W08-0625</url>
@@ -1277,7 +1277,7 @@
       <author><first>Yue</first><last>Wang</last></author>
       <author><first>Kazuhiro</first><last>Yoshida</last></author>
       <author><first>Jin-Dong</first><last>Kim</last></author>
-      <author><first>Rune</first><last>Saetre</last></author>
+      <author id="rune-saetre"><first>Rune</first><last>Saetre</last></author>
       <author><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>118–119</pages>
       <url hash="dd6319f3">W08-0627</url>
@@ -1287,7 +1287,7 @@
       <title>Adaptive Information Extraction for Complex Biomedical Tasks</title>
       <author><first>Donghui</first><last>Feng</last></author>
       <author><first>Gully</first><last>Burns</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>120–121</pages>
       <url hash="e6502e76">W08-0628</url>
       <bibkey>feng-etal-2008-adaptive</bibkey>
@@ -1297,7 +1297,7 @@
     <meta>
       <booktitle>Proceedings of the Tenth Meeting of <fixed-case>ACL</fixed-case> Special Interest Group on Computational Morphology and Phonology</booktitle>
       <url hash="b4dcbe29">W08-07</url>
-      <editor><first>Jason</first><last>Eisner</last></editor>
+      <editor id="jason-eisner"><first>Jason</first><last>Eisner</last></editor>
       <editor><first>Jeffrey</first><last>Heinz</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Columbus, Ohio</address>
@@ -1325,7 +1325,7 @@
     </paper>
     <paper id="3">
       <title>A <fixed-case>B</fixed-case>ayesian Model of Natural Language Phonology: Generating Alternations from Underlying Forms</title>
-      <author><first>David</first><last>Ellis</last></author>
+      <author id="david-ellis"><first>David</first><last>Ellis</last></author>
       <pages>12–19</pages>
       <url hash="a7e1e3a3">W08-0703</url>
       <bibkey>ellis-2008-bayesian</bibkey>
@@ -1354,7 +1354,7 @@
     </paper>
     <paper id="7">
       <title>Phonotactic Probability and the <fixed-case>M</fixed-case>aori Passive: A Computational Approach</title>
-      <author><first>‘Ōiwi</first><last>Parker Jones</last></author>
+      <author id="oiwi-parker-jones"><first>‘Ōiwi</first><last>Parker Jones</last></author>
       <pages>39–48</pages>
       <url hash="63d91221">W08-0707</url>
       <bibkey>parker-jones-2008-phonotactic</bibkey>
@@ -1362,9 +1362,9 @@
     <paper id="8">
       <title>Evaluating an Agglutinative Segmentation Model for <fixed-case>P</fixed-case>ara<fixed-case>M</fixed-case>or</title>
       <author><first>Christian</first><last>Monson</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
       <pages>49–58</pages>
       <url hash="a7ffdb5b">W08-0708</url>
       <bibkey>monson-etal-2008-evaluating</bibkey>
@@ -1389,11 +1389,11 @@
     <paper id="1">
       <title>A Multimodal Home Entertainment Interface via a Mobile Device</title>
       <author><first>Alexander</first><last>Gruenstein</last></author>
-      <author><first>Bo-June Paul</first><last>Hsu</last></author>
-      <author><first>James</first><last>Glass</last></author>
-      <author><first>Stephanie</first><last>Seneff</last></author>
+      <author id="bo-june-paul-hsu"><first>Bo-June Paul</first><last>Hsu</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
+      <author id="stephanie-seneff"><first>Stephanie</first><last>Seneff</last></author>
       <author><first>Lee</first><last>Hetherington</last></author>
-      <author><first>Scott</first><last>Cyphers</last></author>
+      <author id="scott-cyphers"><first>Scott</first><last>Cyphers</last></author>
       <author><first>Ibrahim</first><last>Badr</last></author>
       <author><first>Chao</first><last>Wang</last></author>
       <author><first>Sean</first><last>Liu</last></author>
@@ -1406,9 +1406,9 @@
       <author><first>Kriste</first><last>Krstovski</last></author>
       <author><first>Michael</first><last>Decerbo</last></author>
       <author><first>Rohit</first><last>Prasad</last></author>
-      <author><first>David</first><last>Stallard</last></author>
+      <author id="david-stallard"><first>David</first><last>Stallard</last></author>
       <author><first>Shirin</first><last>Saleem</last></author>
-      <author><first>Premkumar</first><last>Natarajan</last></author>
+      <author id="prem-natarajan"><first>Premkumar</first><last>Natarajan</last></author>
       <pages>10–12</pages>
       <url hash="69587f90">W08-0802</url>
       <bibkey>krstovski-etal-2008-wearable</bibkey>
@@ -1417,7 +1417,7 @@
       <title>Information extraction using finite state automata and syllable n-grams in a mobile environment</title>
       <author><first>Choong-Nyoung</first><last>Seon</last></author>
       <author><first>Harksoo</first><last>Kim</last></author>
-      <author><first>Jungyun</first><last>Seo</last></author>
+      <author id="jungyun-seo"><first>Jungyun</first><last>Seo</last></author>
       <pages>13–18</pages>
       <url hash="a9d3f7ac">W08-0803</url>
       <bibkey>seon-etal-2008-information</bibkey>
@@ -1433,7 +1433,7 @@
     <paper id="5">
       <title>Mixture Pruning and Roughening for Scalable Acoustic Models</title>
       <author><first>David</first><last>Huggins-Daines</last></author>
-      <author><first>Alexander I.</first><last>Rudnicky</last></author>
+      <author id="alexander-rudnicky"><first>Alexander I.</first><last>Rudnicky</last></author>
       <pages>21–24</pages>
       <url hash="af665ad3">W08-0805</url>
       <bibkey>huggins-daines-rudnicky-2008-mixture</bibkey>
@@ -1457,7 +1457,7 @@
   <volume id="9" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Third Workshop on Innovative Use of <fixed-case>NLP</fixed-case> for Building Educational Applications</booktitle>
-      <editor><first>Joel</first><last>Tetreault</last></editor>
+      <editor id="joel-tetreault"><first>Joel</first><last>Tetreault</last></editor>
       <editor><first>Jill</first><last>Burstein</last></editor>
       <editor><first>Rachele</first><last>De Felice</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -1480,16 +1480,16 @@
     </paper>
     <paper id="2">
       <title>Classification Errors in a Domain-Independent Assessment System</title>
-      <author><first>Rodney D.</first><last>Nielsen</last></author>
-      <author><first>Wayne</first><last>Ward</last></author>
-      <author><first>James H.</first><last>Martin</last></author>
+      <author id="rodney-nielsen"><first>Rodney D.</first><last>Nielsen</last></author>
+      <author id="wayne-ward"><first>Wayne</first><last>Ward</last></author>
+      <author id="james-h-martin"><first>James H.</first><last>Martin</last></author>
       <pages>10–18</pages>
       <url hash="be9cde8d">W08-0902</url>
       <bibkey>nielsen-etal-2008-classification</bibkey>
     </paper>
     <paper id="3">
       <title>King Alfred: A Translation Environment for Learners of <fixed-case>A</fixed-case>nglo-<fixed-case>S</fixed-case>axon <fixed-case>E</fixed-case>nglish</title>
-      <author><first>Lisa N.</first><last>Michaud</last></author>
+      <author id="lisa-n-michaud"><first>Lisa N.</first><last>Michaud</last></author>
       <pages>19–26</pages>
       <url hash="df0bd130">W08-0903</url>
       <bibkey>michaud-2008-king</bibkey>
@@ -1497,7 +1497,7 @@
     <paper id="4">
       <title>Recognizing Noisy <fixed-case>R</fixed-case>omanized <fixed-case>J</fixed-case>apanese Words in Learner <fixed-case>E</fixed-case>nglish</title>
       <author><first>Ryo</first><last>Nagata</last></author>
-      <author><first>Jun-ichi</first><last>Kakegawa</last></author>
+      <author id="junichi-kakegawa"><first>Jun-ichi</first><last>Kakegawa</last></author>
       <author><first>Hiromi</first><last>Sugimoto</last></author>
       <author><first>Yukiko</first><last>Yabuta</last></author>
       <pages>27–35</pages>
@@ -1506,7 +1506,7 @@
     </paper>
     <paper id="5">
       <title>An Annotated Corpus Outside Its Original Context: A Corpus-Based Exercise Book</title>
-      <author><first>Barbora</first><last>Hladká</last></author>
+      <author id="barbora-hladka"><first>Barbora</first><last>Hladká</last></author>
       <author><first>Ondřej</first><last>Kučera</last></author>
       <pages>36–43</pages>
       <url hash="c21c6dd0">W08-0905</url>
@@ -1522,11 +1522,11 @@
     </paper>
     <paper id="7">
       <title>Learner Characteristics and Feedback in Tutorial Dialogue</title>
-      <author><first>Kristy</first><last>Boyer</last></author>
-      <author><first>Robert</first><last>Phillips</last></author>
+      <author id="kristy-boyer"><first>Kristy</first><last>Boyer</last></author>
+      <author id="robert-phillips"><first>Robert</first><last>Phillips</last></author>
       <author><first>Michael</first><last>Wallis</last></author>
       <author><first>Mladen</first><last>Vouk</last></author>
-      <author><first>James</first><last>Lester</last></author>
+      <author id="james-lester"><first>James</first><last>Lester</last></author>
       <pages>53–61</pages>
       <url hash="65d74683">W08-0907</url>
       <bibkey>boyer-etal-2008-learner</bibkey>
@@ -1577,7 +1577,7 @@
     <paper id="13">
       <title>Diagnosing Meaning Errors in Short Answers to Reading Comprehension Questions</title>
       <author><first>Stacey</first><last>Bailey</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <pages>107–115</pages>
       <url hash="89613bc5">W08-0913</url>
       <bibkey>bailey-meurers-2008-diagnosing</bibkey>
@@ -1587,7 +1587,7 @@
     <meta>
       <booktitle>Proceedings of the Workshop on Parsing <fixed-case>G</fixed-case>erman</booktitle>
       <url hash="4ce95a1c">W08-10</url>
-      <editor><first>Sandra</first><last>Kübler</last></editor>
+      <editor id="sandra-kubler"><first>Sandra</first><last>Kübler</last></editor>
       <editor><first>Gerald</first><last>Penn</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Columbus, Ohio</address>
@@ -1623,7 +1623,7 @@
     <paper id="4">
       <title>Revisiting the Impact of Different Annotation Schemes on <fixed-case>PCFG</fixed-case> Parsing: A Grammatical Dependency Evaluation</title>
       <author><first>Adriane</first><last>Boyd</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <pages>24–32</pages>
       <url hash="0ca6fc64">W08-1004</url>
       <bibkey>boyd-meurers-2008-revisiting</bibkey>
@@ -1638,8 +1638,8 @@
     </paper>
     <paper id="6">
       <title>Parsing Three <fixed-case>G</fixed-case>erman Treebanks: Lexicalized and Unlexicalized Baselines</title>
-      <author><first>Anna</first><last>Rafferty</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="anna-n-rafferty"><first>Anna</first><last>Rafferty</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>40–46</pages>
       <url hash="9975d09b">W08-1006</url>
       <bibkey>rafferty-manning-2008-parsing</bibkey>
@@ -1663,7 +1663,7 @@
   <volume id="11" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Fifth International Natural Language Generation Conference</booktitle>
-      <editor><first>Michael</first><last>White</last></editor>
+      <editor id="michael-white"><first>Michael</first><last>White</last></editor>
       <editor><first>Crystal</first><last>Nakatsu</last></editor>
       <editor id="david-d-mcdonald"><first>David</first><last>McDonald</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -1694,8 +1694,8 @@
     <paper id="3">
       <title>Generating Textual Summaries of Bar Charts</title>
       <author><first>Seniz</first><last>Demir</last></author>
-      <author><first>Sandra</first><last>Carberry</last></author>
-      <author><first>Kathleen</first><last>McCoy</last></author>
+      <author id="sandra-carberry"><first>Sandra</first><last>Carberry</last></author>
+      <author id="kathleen-f-mccoy"><first>Kathleen</first><last>McCoy</last></author>
       <pages>7–15</pages>
       <url hash="6088c1e6">W08-1103</url>
       <bibkey>demir-etal-2008-generating</bibkey>
@@ -1703,9 +1703,9 @@
     <paper id="4">
       <title>Using Spatial Reference Frames to Generate Grounded Textual Summaries of Georeferenced Data</title>
       <author><first>Ross</first><last>Turner</last></author>
-      <author><first>Somayajulu</first><last>Sripada</last></author>
+      <author id="somayajulu-sripada"><first>Somayajulu</first><last>Sripada</last></author>
       <author><first>Ehud</first><last>Reiter</last></author>
-      <author><first>Ian</first><last>Davy</last></author>
+      <author id="ian-p-davy"><first>Ian</first><last>Davy</last></author>
       <pages>16–24</pages>
       <url hash="7bcd7f0e">W08-1104</url>
       <bibkey>turner-etal-2008-using</bibkey>
@@ -1721,7 +1721,7 @@
     <paper id="6">
       <title>Extractive vs. <fixed-case>NLG</fixed-case>-based Abstractive Summarization of Evaluative Text: The Effect of Corpus Controversiality</title>
       <author><first>Giuseppe</first><last>Carenini</last></author>
-      <author><first>Jackie C. K.</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie C. K.</first><last>Cheung</last></author>
       <pages>33–41</pages>
       <url hash="7a886a78">W08-1106</url>
       <bibkey>carenini-cheung-2008-extractive</bibkey>
@@ -1738,7 +1738,7 @@
     <paper id="8">
       <title>Attribute Selection for Referring Expression Generation: New Algorithms and Evaluation Methods</title>
       <author><first>Albert</first><last>Gatt</last></author>
-      <author><first>Anja</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anja</first><last>Belz</last></author>
       <pages>50–58</pages>
       <url hash="13b71133">W08-1108</url>
       <bibkey>gatt-belz-2008-attribute</bibkey>
@@ -1754,7 +1754,7 @@
     <paper id="10">
       <title>Using Tactical <fixed-case>NLG</fixed-case> to Induce Affective States: Empirical Investigations</title>
       <author><first>Ielka</first><last>van der Sluis</last></author>
-      <author><first>Chris</first><last>Mellish</last></author>
+      <author id="chris-mellish"><first>Chris</first><last>Mellish</last></author>
       <pages>68–76</pages>
       <url hash="0cd4a952">W08-1110</url>
       <bibkey>van-der-sluis-mellish-2008-using</bibkey>
@@ -1762,7 +1762,7 @@
     <paper id="11">
       <title>Practical Grammar-Based <fixed-case>NLG</fixed-case> from Examples</title>
       <author><first>David</first><last>DeVault</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <author><first>Ron</first><last>Artstein</last></author>
       <pages>77–85</pages>
       <url hash="384ebe8c">W08-1111</url>
@@ -1770,16 +1770,16 @@
     </paper>
     <paper id="12">
       <title>Accurate and Robust <fixed-case>LFG</fixed-case>-Based Generation for <fixed-case>C</fixed-case>hinese</title>
-      <author><first>Yuqing</first><last>Guo</last></author>
+      <author id="yuqing-guo"><first>Yuqing</first><last>Guo</last></author>
       <author><first>Haifeng</first><last>Wang</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>86–94</pages>
       <url hash="06e294c9">W08-1112</url>
       <bibkey>guo-etal-2008-accurate</bibkey>
     </paper>
     <paper id="13">
       <title>Automated Metrics That Agree With Human Judgements On Generated Output for an Embodied Conversational Agent</title>
-      <author><first>Mary Ellen</first><last>Foster</last></author>
+      <author id="mary-ellen-foster"><first>Mary Ellen</first><last>Foster</last></author>
       <pages>95–103</pages>
       <url hash="a44fa34b">W08-1113</url>
       <bibkey>foster-2008-automated</bibkey>
@@ -1811,7 +1811,7 @@
     </paper>
     <paper id="17">
       <title>The Effect of Dialogue System Output Style Variation on Users’ Evaluation Judgments and Input Style</title>
-      <author><first>Ivana</first><last>Kruijff-Korbayová</last></author>
+      <author id="ivana-kruijff-korbayova"><first>Ivana</first><last>Kruijff-Korbayová</last></author>
       <author><first>Ciprian</first><last>Gerstenberger</last></author>
       <author><first>Olga</first><last>Kukina</last></author>
       <author><first>Jan</first><last>Schehl</last></author>
@@ -1822,7 +1822,7 @@
     <paper id="18">
       <title>Evaluating an Ontology-Driven <fixed-case>WYSIWYM</fixed-case> Interface</title>
       <author><first>Feikje</first><last>Hielkema</last></author>
-      <author><first>Chris</first><last>Mellish</last></author>
+      <author id="chris-mellish"><first>Chris</first><last>Mellish</last></author>
       <author><first>Peter</first><last>Edwards</last></author>
       <pages>138–146</pages>
       <url hash="ffea5f92">W08-1118</url>
@@ -1848,7 +1848,7 @@
     <paper id="21">
       <title>Degree of Abstraction in Referring Expression Generation and its Relation with the Construction of the Contrast Set</title>
       <author><first>Raquel</first><last>Hervás</last></author>
-      <author><first>Pablo</first><last>Gervás</last></author>
+      <author id="pablo-gervas"><first>Pablo</first><last>Gervás</last></author>
       <pages>161–164</pages>
       <url hash="b46e966d">W08-1121</url>
       <bibkey>hervas-gervas-2008-degree</bibkey>
@@ -1858,7 +1858,7 @@
       <author><first>Deirdre</first><last>Hogan</last></author>
       <author><first>Jennifer</first><last>Foster</last></author>
       <author><first>Joachim</first><last>Wagner</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>165–168</pages>
       <url hash="815eefc8">W08-1122</url>
       <bibkey>hogan-etal-2008-parser</bibkey>
@@ -1867,14 +1867,14 @@
       <title>Creation of a New Domain and Evaluation of Comparison Generation in a Natural Language Generation System</title>
       <author><first>Matthew</first><last>Marge</last></author>
       <author><first>Amy</first><last>Isard</last></author>
-      <author><first>Johanna</first><last>Moore</last></author>
+      <author id="johanna-d-moore"><first>Johanna</first><last>Moore</last></author>
       <pages>169–172</pages>
       <url hash="1b89d84d">W08-1123</url>
       <bibkey>marge-etal-2008-creation</bibkey>
     </paper>
     <paper id="24">
       <title>Generating Baseball Summaries from Multiple Perspectives by Reordering Content</title>
-      <author><first>Alice</first><last>Oh</last></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last></author>
       <author><first>Howard</first><last>Shrobe</last></author>
       <pages>173–176</pages>
       <url hash="1c3db1c3">W08-1124</url>
@@ -1889,7 +1889,7 @@
     </paper>
     <paper id="26">
       <title><fixed-case>REG</fixed-case> Challenge Preface</title>
-      <author><first>Anja</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anja</first><last>Belz</last></author>
       <author><first>Albert</first><last>Gatt</last></author>
       <pages>181–182</pages>
       <url hash="d0c11064">W08-1126</url>
@@ -1897,7 +1897,7 @@
     </paper>
     <paper id="27">
       <title>The <fixed-case>GREC</fixed-case> Challenge 2008: Overview and Evaluation Results</title>
-      <author><first>Anja</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anja</first><last>Belz</last></author>
       <author><first>Eric</first><last>Kow</last></author>
       <author><first>Jette</first><last>Viethen</last></author>
       <author><first>Albert</first><last>Gatt</last></author>
@@ -1915,7 +1915,7 @@
     <paper id="29">
       <title><fixed-case>CNTS</fixed-case>: Memory-Based Learning of Generating Repeated References</title>
       <author><first>Iris</first><last>Hendrickx</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <author><first>Kim</first><last>Luyckx</last></author>
       <author><first>Roser</first><last>Morante</last></author>
       <author><first>Vincent</first><last>Van Asch</last></author>
@@ -1926,7 +1926,7 @@
     <paper id="30">
       <title><fixed-case>OSU</fixed-case>-2: Generating Referring Expressions with a Maximum Entropy Classifier</title>
       <author><first>Emily</first><last>Jamison</last></author>
-      <author><first>Dennis</first><last>Mehay</last></author>
+      <author id="dennis-mehay"><first>Dennis</first><last>Mehay</last></author>
       <pages>196–197</pages>
       <url hash="a5a36c5b">W08-1130</url>
       <bibkey>jamison-mehay-2008-osu</bibkey>
@@ -1934,7 +1934,7 @@
     <paper id="31">
       <title>The <fixed-case>TUNA</fixed-case> Challenge 2008: Overview and Evaluation Results</title>
       <author><first>Albert</first><last>Gatt</last></author>
-      <author><first>Anja</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anja</first><last>Belz</last></author>
       <author><first>Eric</first><last>Kow</last></author>
       <pages>198–206</pages>
       <url hash="43bc907e">W08-1131</url>
@@ -1949,16 +1949,16 @@
     </paper>
     <paper id="33">
       <title>Referring Expression Generation Using Speaker-based Attribute Selection and Trainable Realization (<fixed-case>ATTR</fixed-case>)</title>
-      <author><first>Giuseppe</first><last>Di Fabbrizio</last></author>
-      <author><first>Amanda J.</first><last>Stent</last></author>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="giuseppe-di-fabbrizio"><first>Giuseppe</first><last>Di Fabbrizio</last></author>
+      <author id="amanda-stent"><first>Amanda J.</first><last>Stent</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
       <pages>211–214</pages>
       <url hash="4d6f0bf2">W08-1133</url>
       <bibkey>di-fabbrizio-etal-2008-referring</bibkey>
     </paper>
     <paper id="34">
       <title><fixed-case>NIL</fixed-case>-<fixed-case>UCM</fixed-case>: Most-Frequent-Value-First Attribute Selection and Best-Scoring-Choice Realization</title>
-      <author><first>Pablo</first><last>Gervás</last></author>
+      <author id="pablo-gervas"><first>Pablo</first><last>Gervás</last></author>
       <author><first>Raquel</first><last>Hervás</last></author>
       <author><first>Carlos</first><last>León</last></author>
       <pages>215–218</pages>
@@ -1968,14 +1968,14 @@
     <paper id="35">
       <title><fixed-case>USP</fixed-case>-<fixed-case>EACH</fixed-case> Frequency-based Greedy Attribute Selection for Referring Expressions Generation</title>
       <author><first>Diego Jesus</first><last>de Lucena</last></author>
-      <author><first>Ivandré</first><last>Paraboni</last></author>
+      <author id="ivandre-paraboni"><first>Ivandré</first><last>Paraboni</last></author>
       <pages>219–220</pages>
       <url hash="3969a5ed">W08-1135</url>
       <bibkey>de-lucena-paraboni-2008-usp</bibkey>
     </paper>
     <paper id="36">
       <title>Referring Expression Generation Challenge 2008 <fixed-case>DIT</fixed-case> System Descriptions (<fixed-case>DIT</fixed-case>-<fixed-case>FBI</fixed-case>, <fixed-case>DIT</fixed-case>-<fixed-case>TVAS</fixed-case>, <fixed-case>DIT</fixed-case>-<fixed-case>CBSR</fixed-case>, <fixed-case>DIT</fixed-case>-<fixed-case>RBR</fixed-case>, <fixed-case>DIT</fixed-case>-<fixed-case>FBI</fixed-case>-<fixed-case>CBSR</fixed-case>, <fixed-case>DIT</fixed-case>-<fixed-case>TVAS</fixed-case>-<fixed-case>RBR</fixed-case>)</title>
-      <author><first>John D.</first><last>Kelleher</last></author>
+      <author id="john-kelleher"><first>John D.</first><last>Kelleher</last></author>
       <author><first>Brian</first><last>Mac Namee</last></author>
       <pages>221–224</pages>
       <url hash="28eb0b8d">W08-1136</url>
@@ -1990,8 +1990,8 @@
     </paper>
     <paper id="38">
       <title><fixed-case>GRAPH</fixed-case>: The Costs of Redundancy in Referring Expressions</title>
-      <author><first>Emiel</first><last>Krahmer</last></author>
-      <author><first>Mariët</first><last>Theune</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
+      <author id="mariet-theune"><first>Mariët</first><last>Theune</last></author>
       <author><first>Jette</first><last>Viethen</last></author>
       <author><first>Iris</first><last>Hendrickx</last></author>
       <pages>227–229</pages>
@@ -2001,15 +2001,15 @@
     <paper id="39">
       <title><fixed-case>JU</fixed-case>-<fixed-case>PTBSGRE</fixed-case>: <fixed-case>GRE</fixed-case> Using Prefix Tree Based Structure</title>
       <author><first>Sibabrata</first><last>Paladhi</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>230–231</pages>
       <url hash="624b1873">W08-1139</url>
       <bibkey>paladhi-bandyopadhyay-2008-ju</bibkey>
     </paper>
     <paper id="40">
       <title>From <fixed-case>TUNA</fixed-case> Attribute Sets to <fixed-case>P</fixed-case>ortuguese Text: a First Report</title>
-      <author><first>Daniel Bastos</first><last>Pereira</last></author>
-      <author><first>Ivandré</first><last>Paraboni</last></author>
+      <author id="daniel-bastos-pereira"><first>Daniel Bastos</first><last>Pereira</last></author>
+      <author id="ivandre-paraboni"><first>Ivandré</first><last>Paraboni</last></author>
       <pages>232–233</pages>
       <url hash="6dbdd3e4">W08-1140</url>
       <bibkey>pereira-paraboni-2008-tuna</bibkey>
@@ -2026,9 +2026,9 @@
       <booktitle>Coling 2008: Proceedings of the workshop on Human Judgements in Computational Linguistics</booktitle>
       <url hash="fa3dd3b0">W08-12</url>
       <editor><first>Ron</first><last>Artstein</last></editor>
-      <editor><first>Gemma</first><last>Boleda</last></editor>
+      <editor id="gemma-boleda"><first>Gemma</first><last>Boleda</last></editor>
       <editor><first>Frank</first><last>Keller</last></editor>
-      <editor><first>Sabine</first><last>Schulte im Walde</last></editor>
+      <editor id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></editor>
       <publisher>Coling 2008 Organizing Committee</publisher>
       <address>Manchester, UK</address>
       <month>August</month>
@@ -2041,8 +2041,8 @@
     </frontmatter>
     <paper id="1">
       <title>Invited Talk: The Relevance of a Cognitive Model of the Mental Lexicon to Automatic Word Sense Disambiguation</title>
-      <author><first>Martha</first><last>Palmer</last></author>
-      <author><first>Susan</first><last>Brown</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
+      <author id="susan-windisch-brown"><first>Susan</first><last>Brown</last></author>
       <pages>1</pages>
       <url hash="ea90e49a">W08-1201</url>
       <bibkey>palmer-brown-2008-invited</bibkey>
@@ -2069,15 +2069,15 @@
       <author><first>Jean-Baptiste</first><last>Berthelin</last></author>
       <author><first>Cyril</first><last>Grouin</last></author>
       <author><first>Martine</first><last>Hurault-Plantet</last></author>
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <pages>17–23</pages>
       <url hash="ff1c6562">W08-1204</url>
       <bibkey>berthelin-etal-2008-human</bibkey>
     </paper>
     <paper id="5">
       <title>Native Judgments of Non-Native Usage: Experiments in Preposition Error Detection</title>
-      <author><first>Joel</first><last>Tetreault</last></author>
-      <author><first>Martin</first><last>Chodorow</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
+      <author id="martin-chodorow"><first>Martin</first><last>Chodorow</last></author>
       <pages>24–32</pages>
       <url hash="eb4279e3">W08-1205</url>
       <bibkey>tetreault-chodorow-2008-native</bibkey>
@@ -2122,21 +2122,21 @@
       <booktitle>Coling 2008: Proceedings of the workshop on Cross-Framework and Cross-Domain Parser Evaluation</booktitle>
       <url hash="2b84d193">W08-13</url>
       <editor><first>Johan</first><last>Bos</last></editor>
-      <editor><first>Edward</first><last>Briscoe</last></editor>
+      <editor id="ted-briscoe"><first>Edward</first><last>Briscoe</last></editor>
       <editor><first>Aoife</first><last>Cahill</last></editor>
-      <editor><first>John</first><last>Carroll</last></editor>
+      <editor id="john-a-carroll"><first>John</first><last>Carroll</last></editor>
       <editor><first>Stephen</first><last>Clark</last></editor>
       <editor><first>Ann</first><last>Copestake</last></editor>
-      <editor><first>Dan</first><last>Flickinger</last></editor>
-      <editor><first>Josef</first><last>van Genabith</last></editor>
+      <editor id="dan-flickinger"><first>Dan</first><last>Flickinger</last></editor>
+      <editor id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></editor>
       <editor><first>Julia</first><last>Hockenmaier</last></editor>
-      <editor><first>Aravind</first><last>Joshi</last></editor>
-      <editor><first>Ronald</first><last>Kaplan</last></editor>
-      <editor><first>Tracy Holloway</first><last>King</last></editor>
-      <editor><first>Sandra</first><last>Kuebler</last></editor>
+      <editor id="aravind-joshi"><first>Aravind</first><last>Joshi</last></editor>
+      <editor id="ronald-m-kaplan"><first>Ronald</first><last>Kaplan</last></editor>
+      <editor id="tracy-holloway-king"><first>Tracy Holloway</first><last>King</last></editor>
+      <editor id="sandra-kubler"><first>Sandra</first><last>Kuebler</last></editor>
       <editor><first>Dekang</first><last>Lin</last></editor>
       <editor><first>Jan Tore</first><last>Lønning</last></editor>
-      <editor><first>Christopher</first><last>Manning</last></editor>
+      <editor id="christopher-d-manning"><first>Christopher</first><last>Manning</last></editor>
       <editor><first>Yusuke</first><last>Miyao</last></editor>
       <editor><first>Joakim</first><last>Nivre</last></editor>
       <editor><first>Stephan</first><last>Oepen</last></editor>
@@ -2155,7 +2155,7 @@
     </frontmatter>
     <paper id="1">
       <title>The <fixed-case>S</fixed-case>tanford Typed Dependencies Representation</title>
-      <author><first>Marie-Catherine</first><last>de Marneffe</last></author>
+      <author id="marie-catherine-de-marneffe"><first>Marie-Catherine</first><last>de Marneffe</last></author>
       <author><first>Christopher D.</first><last>Manning</last></author>
       <pages>1–8</pages>
       <url hash="61ce9bc1">W08-1301</url>
@@ -2163,15 +2163,15 @@
     </paper>
     <paper id="2">
       <title>Exploring an Auxiliary Distribution Based Approach to Domain Adaptation of a Syntactic Disambiguation Model</title>
-      <author><first>Barbara</first><last>Plank</last></author>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <pages>9–16</pages>
       <url hash="be5880d1">W08-1302</url>
       <bibkey>plank-van-noord-2008-exploring</bibkey>
     </paper>
     <paper id="3">
       <title>Toward an Underspecifiable Corpus Annotation Scheme</title>
-      <author><first>Yuka</first><last>Tateisi</last></author>
+      <author id="yuka-tateisi"><first>Yuka</first><last>Tateisi</last></author>
       <pages>17–23</pages>
       <url hash="4e223c58">W08-1303</url>
       <bibkey>tateisi-2008-toward</bibkey>
@@ -2185,10 +2185,10 @@
     </paper>
     <paper id="5">
       <title>Parser Evaluation Across Frameworks without Format Conversion</title>
-      <author><first>Wai Lok</first><last>Tam</last></author>
+      <author id="wai-lok-tam"><first>Wai Lok</first><last>Tam</last></author>
       <author><first>Yo</first><last>Sato</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
-      <author><first>Junichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Junichi</first><last>Tsujii</last></author>
       <pages>29–35</pages>
       <url hash="c68c154e">W08-1305</url>
       <bibkey>tam-etal-2008-parser</bibkey>
@@ -2197,10 +2197,10 @@
       <title>Large Scale Production of Syntactic Annotations to Move Forward</title>
       <author><first>Anne</first><last>Vilnat</last></author>
       <author><first>Gil</first><last>Francopoulo</last></author>
-      <author><first>Olivier</first><last>Hamon</last></author>
+      <author id="olivier-hamon"><first>Olivier</first><last>Hamon</last></author>
       <author><first>Sylvain</first><last>Loiseau</last></author>
-      <author><first>Patrick</first><last>Paroubek</last></author>
-      <author><first>Eric</first><last>Villemonte de la Clergerie</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Eric</first><last>Villemonte de la Clergerie</last></author>
       <pages>36–43</pages>
       <url hash="e2e2e0c9">W08-1306</url>
       <bibkey>vilnat-etal-2008-large</bibkey>
@@ -2216,7 +2216,7 @@
     <paper id="8">
       <title>‘Deep’ Grammatical Relations for Semantic Interpretation</title>
       <author><first>Mark</first><last>McConville</last></author>
-      <author><first>Myroslava O.</first><last>Dzikovska</last></author>
+      <author id="myroslava-o-dzikovska"><first>Myroslava O.</first><last>Dzikovska</last></author>
       <pages>51–58</pages>
       <url hash="fc8b8940">W08-1308</url>
       <bibkey>mcconville-dzikovska-2008-deep</bibkey>
@@ -2226,7 +2226,7 @@
     <meta>
       <booktitle>Coling 2008: Proceedings of the workshop Multi-source Multilingual Information Extraction and Summarization</booktitle>
       <url hash="1140c84b">W08-14</url>
-      <editor><first>Sivaji</first><last>Bandyopadhyay</last></editor>
+      <editor id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></editor>
       <editor><first>Thierry</first><last>Poibeau</last></editor>
       <editor><first>Horacio</first><last>Saggion</last></editor>
       <editor><first>Roman</first><last>Yangarber</last></editor>
@@ -2242,7 +2242,7 @@
     </frontmatter>
     <paper id="1">
       <title>Generating Image Captions using Topic Focused Multi-document Summarization</title>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <pages>1</pages>
       <url hash="0057bb1d">W08-1401</url>
       <bibkey>gaizauskas-2008-generating</bibkey>
@@ -2250,8 +2250,8 @@
     <paper id="2">
       <title>Learning to Match Names Across Languages</title>
       <author><first>Inderjeet</first><last>Mani</last></author>
-      <author><first>Alex</first><last>Yeh</last></author>
-      <author><first>Sherri</first><last>Condon</last></author>
+      <author id="alexander-yeh"><first>Alex</first><last>Yeh</last></author>
+      <author id="sherri-condon"><first>Sherri</first><last>Condon</last></author>
       <pages>2–9</pages>
       <url hash="82f123cb">W08-1402</url>
       <bibkey>mani-etal-2008-learning</bibkey>
@@ -2274,7 +2274,7 @@
     </paper>
     <paper id="5">
       <title><fixed-case>M</fixed-case>ulti<fixed-case>S</fixed-case>um: Query-Based Multi-Document Summarization</title>
-      <author><first>Mike</first><last>Rosner</last></author>
+      <author id="michael-rosner"><first>Mike</first><last>Rosner</last></author>
       <author><first>Carl</first><last>Camilleri</last></author>
       <pages>25–32</pages>
       <url hash="30903bd0">W08-1405</url>
@@ -2282,8 +2282,8 @@
     </paper>
     <paper id="6">
       <title>Mixed-Source Multi-Document Speech-to-Text Summarization</title>
-      <author><first>Ricardo</first><last>Ribeiro</last></author>
-      <author><first>David Martins</first><last>de Matos</last></author>
+      <author id="ricardo-ribeiro"><first>Ricardo</first><last>Ribeiro</last></author>
+      <author id="david-martins-de-matos"><first>David Martins</first><last>de Matos</last></author>
       <pages>33–40</pages>
       <url hash="910624fb">W08-1406</url>
       <bibkey>ribeiro-de-matos-2008-mixed</bibkey>
@@ -2291,7 +2291,7 @@
     <paper id="7">
       <title>Evaluating automatically generated user-focused multi-document summaries for geo-referenced images</title>
       <author><first>Ahmet</first><last>Aker</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <pages>41–48</pages>
       <url hash="da1ea93b">W08-1407</url>
       <bibkey>aker-gaizauskas-2008-evaluating</bibkey>
@@ -2317,11 +2317,11 @@
     <meta>
       <booktitle>Coling 2008: Proceedings of the workshop on Speech Processing for Safety Critical Translation and Pervasive Applications</booktitle>
       <url hash="b46e8dd2">W08-15</url>
-      <editor><first>Pierrette</first><last>Bouillon</last></editor>
+      <editor id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></editor>
       <editor><first>Farzad</first><last>Ehsani</last></editor>
-      <editor><first>Robert</first><last>Frederking</last></editor>
-      <editor><first>Michael</first><last>McTear</last></editor>
-      <editor><first>Manny</first><last>Rayner</last></editor>
+      <editor id="robert-frederking"><first>Robert</first><last>Frederking</last></editor>
+      <editor id="michael-f-mctear"><first>Michael</first><last>McTear</last></editor>
+      <editor id="manny-rayner"><first>Manny</first><last>Rayner</last></editor>
       <publisher>Coling 2008 Organizing Committee</publisher>
       <address>Manchester, UK</address>
       <month>August</month>
@@ -2335,8 +2335,8 @@
     <paper id="1">
       <title>Mitigation of Data Sparsity in Classifier-Based Translation</title>
       <author><first>Emil</first><last>Ettelaie</last></author>
-      <author><first>Panayiotis G.</first><last>Georgiou</last></author>
-      <author><first>Shrikanth S.</first><last>Narayanan</last></author>
+      <author id="panayiotis-georgiou"><first>Panayiotis G.</first><last>Georgiou</last></author>
+      <author id="shrikanth-narayanan"><first>Shrikanth S.</first><last>Narayanan</last></author>
       <pages>1–4</pages>
       <url hash="c9561dc0">W08-1501</url>
       <bibkey>ettelaie-etal-2008-mitigation</bibkey>
@@ -2350,10 +2350,10 @@
     </paper>
     <paper id="3">
       <title>An Integrated Dialog Simulation Technique for Evaluating Spoken Dialog Systems</title>
-      <author><first>Sangkeun</first><last>Jung</last></author>
+      <author id="sangkeun-jung"><first>Sangkeun</first><last>Jung</last></author>
       <author><first>Cheongjae</first><last>Lee</last></author>
       <author><first>Kyungduk</first><last>Kim</last></author>
-      <author><first>Gary Geunbae</first><last>Lee</last></author>
+      <author id="gary-geunbae-lee"><first>Gary Geunbae</first><last>Lee</last></author>
       <pages>9–16</pages>
       <url hash="81a8bf31">W08-1503</url>
       <bibkey>jung-etal-2008-integrated</bibkey>
@@ -2381,7 +2381,7 @@
       <author><first>Jane</first><last>Brotanek</last></author>
       <author><first>Glenn</first><last>Flores</last></author>
       <author><first>Sonia</first><last>Halimi</last></author>
-      <author><first>Beth Ann</first><last>Hockey</last></author>
+      <author id="beth-ann-hockey"><first>Beth Ann</first><last>Hockey</last></author>
       <author><first>Hitoshi</first><last>Isahara</last></author>
       <author><first>Kyoko</first><last>Kanzaki</last></author>
       <author><first>Elisabeth</first><last>Kron</last></author>
@@ -2395,7 +2395,7 @@
     </paper>
     <paper id="7">
       <title>Language Understanding in <fixed-case>M</fixed-case>aryland Virtual Patient</title>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <author><first>Stephen</first><last>Beale</last></author>
       <author><first>Marjorie</first><last>McShane</last></author>
       <author><first>Bruce</first><last>Jarrell</last></author>
@@ -2415,7 +2415,7 @@
     <paper id="9">
       <title>Speech Translation for Triage of Emergency Phonecalls in Minority Languages</title>
       <author><first>Udhyakumar</first><last>Nallasamy</last></author>
-      <author><first>Alan</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan</first><last>Black</last></author>
       <author><first>Tanja</first><last>Schultz</last></author>
       <author><first>Robert</first><last>Frederking</last></author>
       <author><first>Jerry</first><last>Weltman</last></author>
@@ -2426,9 +2426,9 @@
     <paper id="10">
       <title>Speech to Speech Translation for Nurse Patient Interaction</title>
       <author><first>Farzad</first><last>Ehsani</last></author>
-      <author><first>Jim</first><last>Kimzey</last></author>
+      <author id="jim-kinzey"><first>Jim</first><last>Kimzey</last></author>
       <author><first>Elaine</first><last>Zuber</last></author>
-      <author><first>Demitrios</first><last>Master</last></author>
+      <author id="demetrios-master"><first>Demitrios</first><last>Master</last></author>
       <author><first>Karen</first><last>Sudre</last></author>
       <pages>54–59</pages>
       <url hash="e34e4bc9">W08-1510</url>
@@ -2441,7 +2441,7 @@
       <author><first>Glenn</first><last>Flores</last></author>
       <author><first>Farzad</first><last>Ehsani</last></author>
       <author><first>Marianne</first><last>Starlander</last></author>
-      <author><first>Beth Ann</first><last>Hockey</last></author>
+      <author id="beth-ann-hockey"><first>Beth Ann</first><last>Hockey</last></author>
       <author><first>Jane</first><last>Brotanek</last></author>
       <author><first>Lukas</first><last>Biewald</last></author>
       <pages>60–63</pages>
@@ -2453,8 +2453,8 @@
     <meta>
       <booktitle>Coling 2008: Proceedings of the workshop on Knowledge and Reasoning for Answering Questions</booktitle>
       <url hash="8825cf20">W08-16</url>
-      <editor><first>Marie-Francine</first><last>Moens</last></editor>
-      <editor><first>Patrick</first><last>Saint-Dizier</last></editor>
+      <editor id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></editor>
+      <editor id="patrick-saint-dizier"><first>Patrick</first><last>Saint-Dizier</last></editor>
       <publisher>Coling 2008 Organizing Committee</publisher>
       <address>Manchester, UK</address>
       <month>August</month>
@@ -2467,7 +2467,7 @@
     </frontmatter>
     <paper id="1">
       <title>Semantic Chunk Annotation for complex questions using Conditional Random Field</title>
-      <author><first>Shixi</first><last>Fan</last></author>
+      <author id="shixi-fan"><first>Shixi</first><last>Fan</last></author>
       <author><first>Yaoyun</first><last>Zhang</last></author>
       <author><first>Wing W. Y.</first><last>Ng</last></author>
       <author><first>Xuan</first><last>Wang</last></author>
@@ -2479,8 +2479,8 @@
     <paper id="2">
       <title>Context Inducing Nouns</title>
       <author><first>Charlotte</first><last>Price</last></author>
-      <author><first>Valeria</first><last>de Paiva</last></author>
-      <author><first>Tracy Holloway</first><last>King</last></author>
+      <author id="valeria-de-paiva"><first>Valeria</first><last>de Paiva</last></author>
+      <author id="tracy-holloway-king"><first>Tracy Holloway</first><last>King</last></author>
       <pages>9–16</pages>
       <url hash="01fd17c1">W08-1602</url>
       <bibkey>price-etal-2008-context</bibkey>
@@ -2496,7 +2496,7 @@
     </paper>
     <paper id="4">
       <title>Context Modelling for <fixed-case>IQA</fixed-case>: the Role of Tasks and Entities</title>
-      <author><first>Raffaella</first><last>Bernardi</last></author>
+      <author id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last></author>
       <author><first>Manuel</first><last>Kirschner</last></author>
       <pages>25–32</pages>
       <url hash="95b73270">W08-1604</url>
@@ -2523,7 +2523,7 @@
       <booktitle>Coling 2008: Proceedings of the workshop on Grammar Engineering Across Frameworks</booktitle>
       <url hash="a8c7a723">W08-17</url>
       <editor><first>Stephen</first><last>Clark</last></editor>
-      <editor><first>Tracy Holloway</first><last>King</last></editor>
+      <editor id="tracy-holloway-king"><first>Tracy Holloway</first><last>King</last></editor>
       <publisher>Coling 2008 Organizing Committee</publisher>
       <address>Manchester, England</address>
       <month>August</month>
@@ -2549,9 +2549,9 @@
     <paper id="2">
       <title>Making Speech Look Like Text in the Regulus Development Environment</title>
       <author><first>Elisabeth</first><last>Kron</last></author>
-      <author><first>Manny</first><last>Rayner</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
       <author><first>Marianne</first><last>Santaholma</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Agnes</first><last>Lisowska</last></author>
       <pages>9–16</pages>
       <url hash="169c458c">W08-1702</url>
@@ -2559,8 +2559,8 @@
     </paper>
     <paper id="3">
       <title>A More Precise Analysis of Punctuation for Broad-Coverage Surface Realization with <fixed-case>CCG</fixed-case></title>
-      <author><first>Michael</first><last>White</last></author>
-      <author><first>Rajakrishnan</first><last>Rajkumar</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
+      <author id="rajakrishnan-rajkumar"><first>Rajakrishnan</first><last>Rajkumar</last></author>
       <pages>17–24</pages>
       <url hash="515d7781">W08-1703</url>
       <bibkey>white-rajkumar-2008-precise</bibkey>
@@ -2575,7 +2575,7 @@
     <paper id="5">
       <title>Speeding up <fixed-case>LFG</fixed-case> Parsing Using <fixed-case>C</fixed-case>-Structure Pruning</title>
       <author><first>Aoife</first><last>Cahill</last></author>
-      <author><first>John T.</first><last>Maxwell III</last></author>
+      <author id="john-t-maxwell-iii"><first>John T.</first><last>Maxwell III</last></author>
       <author><first>Paul</first><last>Meurer</last></author>
       <author><first>Christian</first><last>Rohrer</last></author>
       <author><first>Victoria</first><last>Rosén</last></author>
@@ -2592,7 +2592,7 @@
     </paper>
     <paper id="7">
       <title>Designing Testsuites for Grammar-based Systems in Applications</title>
-      <author><first>Valeria</first><last>de Paiva</last></author>
+      <author id="valeria-de-paiva"><first>Valeria</first><last>de Paiva</last></author>
       <author><first>Tracy Holloway</first><last>King</last></author>
       <pages>49–56</pages>
       <url hash="d84eb7bb">W08-1707</url>
@@ -2612,7 +2612,7 @@
     <meta>
       <booktitle>Coling 2008: Proceedings of the 2nd workshop on Information Retrieval for Question Answering</booktitle>
       <url hash="0f3bdd41">W08-18</url>
-      <editor><first>Mark A.</first><last>Greenwood</last></editor>
+      <editor id="mark-a-greenwood"><first>Mark A.</first><last>Greenwood</last></editor>
       <publisher>Coling 2008 Organizing Committee</publisher>
       <address>Manchester, UK</address>
       <month>August</month>
@@ -2625,8 +2625,8 @@
     </frontmatter>
     <paper id="1">
       <title>Improving Text Retrieval Precision and Answer Accuracy in Question Answering Systems</title>
-      <author><first>Matthew</first><last>Bilotti</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="matthew-w-bilotti"><first>Matthew</first><last>Bilotti</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <pages>1–8</pages>
       <url hash="4676b218">W08-1801</url>
       <bibkey>bilotti-nyberg-2008-improving</bibkey>
@@ -2634,7 +2634,7 @@
     <paper id="2">
       <title>Exact Phrases in Information Retrieval for Question Answering</title>
       <author><first>Svetlana</first><last>Stoyanchev</last></author>
-      <author><first>Young Chol</first><last>Song</last></author>
+      <author id="young-chol-song"><first>Young Chol</first><last>Song</last></author>
       <author><first>William</first><last>Lahti</last></author>
       <pages>9–16</pages>
       <url hash="818c260b">W08-1802</url>
@@ -2642,7 +2642,7 @@
     </paper>
     <paper id="3">
       <title>Simple is Best: Experiments with Different Document Segmentation Strategies for Passage Retrieval</title>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <author><first>Jori</first><last>Mur</last></author>
       <pages>17–25</pages>
       <url hash="d234b871">W08-1803</url>
@@ -2658,9 +2658,9 @@
     </paper>
     <paper id="5">
       <title>A Data Driven Approach to Query Expansion in Question Answering</title>
-      <author><first>Leon</first><last>Derczynski</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
       <author><first>Jun</first><last>Wang</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <author><first>Mark A.</first><last>Greenwood</last></author>
       <pages>34–41</pages>
       <url hash="e2ebd329">W08-1805</url>
@@ -2677,8 +2677,8 @@
     </paper>
     <paper id="7">
       <title>Using Lexico-Semantic Information for Query Expansion in Passage Retrieval for Question Answering</title>
-      <author><first>Lonneke</first><last>van der Plas</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="lonneke-van-der-plas"><first>Lonneke</first><last>van der Plas</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>50–57</pages>
       <url hash="234f330a">W08-1807</url>
       <bibkey>van-der-plas-tiedemann-2008-using</bibkey>
@@ -2687,7 +2687,7 @@
       <title>Evaluation of Automatically Reformulated Questions in Question Series</title>
       <author><first>Richard</first><last>Shaw</last></author>
       <author><first>Ben</first><last>Solway</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <author><first>Mark A.</first><last>Greenwood</last></author>
       <pages>58–65</pages>
       <url hash="279ccfda">W08-1808</url>
@@ -2696,15 +2696,15 @@
     <paper id="9">
       <title>Topic Indexing and Retrieval for Factoid <fixed-case>QA</fixed-case></title>
       <author><first>Kisuh</first><last>Ahn</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <pages>66–73</pages>
       <url hash="7581643c">W08-1809</url>
       <bibkey>ahn-webber-2008-topic</bibkey>
     </paper>
     <paper id="10">
       <title>Indexing on Semantic Roles for Question Answering</title>
-      <author><first>Luiz Augusto</first><last>Pizzato</last></author>
-      <author><first>Diego</first><last>Mollá</last></author>
+      <author id="luiz-augusto-pizzato"><first>Luiz Augusto</first><last>Pizzato</last></author>
+      <author id="diego-molla"><first>Diego</first><last>Mollá</last></author>
       <pages>74–81</pages>
       <url hash="75b961f4">W08-1810</url>
       <bibkey>pizzato-molla-2008-indexing</bibkey>
@@ -2754,14 +2754,14 @@
     <paper id="4">
       <title><fixed-case>P</fixed-case>ro<fixed-case>POSEL</fixed-case>: a human-oriented prosody and <fixed-case>P</fixed-case>o<fixed-case>S</fixed-case> <fixed-case>E</fixed-case>nglish lexicon for machine-learning and <fixed-case>NLP</fixed-case></title>
       <author><first>Claire</first><last>Brierley</last></author>
-      <author><first>Eric</first><last>Atwell</last></author>
+      <author id="eric-atwell"><first>Eric</first><last>Atwell</last></author>
       <pages>25–31</pages>
       <url hash="4a126728">W08-1904</url>
       <bibkey>brierley-atwell-2008-proposel</bibkey>
     </paper>
     <paper id="5">
       <title>Natural Language Searching in Onomasiological Dictionaries</title>
-      <author><first>Gerardo</first><last>Sierra</last></author>
+      <author id="gerardo-sierra"><first>Gerardo</first><last>Sierra</last></author>
       <pages>32–38</pages>
       <url hash="b6b16630">W08-1905</url>
       <bibkey>sierra-2008-natural</bibkey>
@@ -2788,8 +2788,8 @@
     <paper id="8">
       <title>Extracting Sense Trees from the <fixed-case>R</fixed-case>omanian Thesaurus by Sense Segmentation &amp; Dependency Parsing</title>
       <author><first>Neculai</first><last>Curteanu</last></author>
-      <author><first>Alex</first><last>Moruz</last></author>
-      <author><first>Diana</first><last>Trandabăţ</last></author>
+      <author id="alex-moruz"><first>Alex</first><last>Moruz</last></author>
+      <author id="diana-trandabat"><first>Diana</first><last>Trandabăţ</last></author>
       <pages>55–63</pages>
       <url hash="ec0c9410">W08-1908</url>
       <bibkey>curteanu-etal-2008-extracting</bibkey>
@@ -2822,7 +2822,7 @@
       <title>Toward a cognitive organization for electronic dictionaries, the case for semantic proxemy</title>
       <author><first>Bruno</first><last>Gaume</last></author>
       <author><first>Karine</first><last>Duvignau</last></author>
-      <author><first>Laurent</first><last>Prévot</last></author>
+      <author id="laurent-prevot"><first>Laurent</first><last>Prévot</last></author>
       <author><first>Yann</first><last>Desalle</last></author>
       <pages>86–93</pages>
       <url hash="7ea8042c">W08-1912</url>
@@ -2832,7 +2832,7 @@
       <title>Cognitively Salient Relations for Multilingual Lexicography</title>
       <author><first>Gerhard</first><last>Kremer</last></author>
       <author><first>Andrea</first><last>Abel</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <pages>94–101</pages>
       <url hash="47af04ab">W08-1913</url>
       <bibkey>kremer-etal-2008-cognitively</bibkey>
@@ -2849,10 +2849,10 @@
     <meta>
       <booktitle>Coling 2008: Proceedings of the 3rd Textgraphs workshop on Graph-based Algorithms for Natural Language Processing</booktitle>
       <url hash="362871dd">W08-20</url>
-      <editor><first>Irina</first><last>Matveeva</last></editor>
-      <editor><first>Chris</first><last>Biemann</last></editor>
+      <editor id="irina-matveeva"><first>Irina</first><last>Matveeva</last></editor>
+      <editor id="chris-biemann"><first>Chris</first><last>Biemann</last></editor>
       <editor><first>Monojit</first><last>Choudhury</last></editor>
-      <editor><first>Mona</first><last>Diab</last></editor>
+      <editor id="mona-diab"><first>Mona</first><last>Diab</last></editor>
       <publisher>Coling 2008 Organizing Committee</publisher>
       <address>Manchester, UK</address>
       <month>August</month>
@@ -2892,7 +2892,7 @@
     <paper id="4">
       <title>Encoding Tree Pair-Based Graphs in Learning Algorithms: The Textual Entailment Recognition Case</title>
       <author><first>Alessandro</first><last>Moschitti</last></author>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last></author>
       <pages>25–32</pages>
       <url hash="40a51887">W08-2004</url>
       <bibkey>moschitti-zanzotto-2008-encoding</bibkey>
@@ -2927,14 +2927,14 @@
       <title>Concept-Graph Based Biomedical Automatic Summarization Using Ontologies</title>
       <author><first>Laura</first><last>Plaza</last></author>
       <author><first>Alberto</first><last>Díaz</last></author>
-      <author><first>Pablo</first><last>Gervás</last></author>
+      <author id="pablo-gervas"><first>Pablo</first><last>Gervás</last></author>
       <pages>53–56</pages>
       <url hash="216f9a32">W08-2008</url>
       <bibkey>plaza-etal-2008-concept</bibkey>
     </paper>
     <paper id="9">
       <title>Random Graph Model Simulations of Semantic Networks for Associative Concept Dictionaries</title>
-      <author><first>Hiroyuki</first><last>Akama</last></author>
+      <author id="hiroyuki-akama"><first>Hiroyuki</first><last>Akama</last></author>
       <author><first>Jaeyoung</first><last>Jung</last></author>
       <author><first>Terry</first><last>Joyce</last></author>
       <author><first>Maki</first><last>Miyake</last></author>
@@ -2962,7 +2962,7 @@
     <paper id="1">
       <title>Semantic Parsing for High-Precision Semantic Role Labelling</title>
       <author><first>Paola</first><last>Merlo</last></author>
-      <author><first>Gabriele</first><last>Musillo</last></author>
+      <author id="gabriele-musillo"><first>Gabriele</first><last>Musillo</last></author>
       <pages>1–8</pages>
       <url hash="619f8458">W08-2101</url>
       <bibkey>merlo-musillo-2008-semantic</bibkey>
@@ -2970,7 +2970,7 @@
     <paper id="2">
       <title><fixed-case>TAG</fixed-case>, Dynamic Programming, and the Perceptron for Efficient, Feature-Rich Parsing</title>
       <author><first>Xavier</first><last>Carreras</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <author><first>Terry</first><last>Koo</last></author>
       <pages>9–16</pages>
       <url hash="9e33c393">W08-2102</url>
@@ -2986,14 +2986,14 @@
     </paper>
     <paper id="4">
       <title>Linguistic features in data-driven dependency parsing</title>
-      <author><first>Lilja</first><last>Ovrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Ovrelid</last></author>
       <pages>25–32</pages>
       <url hash="ce01e164">W08-2104</url>
       <bibkey>ovrelid-2008-linguistic</bibkey>
     </paper>
     <paper id="5">
       <title>Transforming Meaning Representation Grammars to Improve Semantic Parsing</title>
-      <author><first>Rohit</first><last>Kate</last></author>
+      <author id="rohit-kate"><first>Rohit</first><last>Kate</last></author>
       <pages>33–40</pages>
       <url hash="455a0459">W08-2105</url>
       <bibkey>kate-2008-transforming</bibkey>
@@ -3065,7 +3065,7 @@
     </paper>
     <paper id="13">
       <title>Fully Unsupervised Graph-Based Discovery of General-Specific Noun Relationships from Web Corpora Frequency Counts</title>
-      <author><first>Gaël</first><last>Dias</last></author>
+      <author id="gael-dias"><first>Gaël</first><last>Dias</last></author>
       <author><first>Raycho</first><last>Mukelov</last></author>
       <author><first>Guillaume</first><last>Cleuziou</last></author>
       <pages>97–104</pages>
@@ -3074,7 +3074,7 @@
     </paper>
     <paper id="14">
       <title>Acquiring Knowledge from the Web to be used as Selectors for Noun Sense Disambiguation</title>
-      <author><first>Hansen A.</first><last>Schwartz</last></author>
+      <author id="h-andrew-schwartz"><first>Hansen A.</first><last>Schwartz</last></author>
       <author><first>Fernando</first><last>Gomez</last></author>
       <pages>105–112</pages>
       <url hash="2cb41020">W08-2114</url>
@@ -3083,7 +3083,7 @@
     <paper id="15">
       <title>Automatic <fixed-case>C</fixed-case>hinese Catchword Extraction Based on Time Series Analysis</title>
       <author><first>Han</first><last>Ren</last></author>
-      <author><first>Donghong</first><last>Ji</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
       <author><first>Jing</first><last>Wan</last></author>
       <author><first>Lei</first><last>Han</last></author>
       <pages>113–118</pages>
@@ -3092,8 +3092,8 @@
     </paper>
     <paper id="16">
       <title>Easy as <fixed-case>ABC</fixed-case>? Facilitating Pictorial Communication via Semantically Enhanced Layout</title>
-      <author><first>Andrew B.</first><last>Goldberg</last></author>
-      <author><first>Xiaojin</first><last>Zhu</last></author>
+      <author id="andrew-b-goldberg"><first>Andrew B.</first><last>Goldberg</last></author>
+      <author id="xiaojin-zhu"><first>Xiaojin</first><last>Zhu</last></author>
       <author><first>Charles R.</first><last>Dyer</last></author>
       <author><first>Mohamed</first><last>Eldawy</last></author>
       <author><first>Lijie</first><last>Heng</last></author>
@@ -3103,15 +3103,15 @@
     </paper>
     <paper id="17">
       <title>A Nearest-Neighbor Approach to the Automatic Analysis of <fixed-case>A</fixed-case>ncient <fixed-case>G</fixed-case>reek Morphology</title>
-      <author><first>John</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
       <pages>127–134</pages>
       <url hash="476fa579">W08-2117</url>
       <bibkey>lee-2008-nearest</bibkey>
     </paper>
     <paper id="18">
       <title>Context-based <fixed-case>A</fixed-case>rabic Morphological Analysis for Machine Translation</title>
-      <author><first>ThuyLinh</first><last>Nguyen</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="thuylinh-nguyen"><first>ThuyLinh</first><last>Nguyen</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>135–142</pages>
       <url hash="b2e6d45a">W08-2118</url>
       <bibkey>nguyen-vogel-2008-context</bibkey>
@@ -3120,18 +3120,18 @@
       <title>A Tree-to-String Phrase-based Model for Statistical Machine Translation</title>
       <author><first>Thai Phuong</first><last>Nguyen</last></author>
       <author><first>Akira</first><last>Shimazu</last></author>
-      <author><first>Tu-Bao</first><last>Ho</last></author>
-      <author><first>Minh Le</first><last>Nguyen</last></author>
-      <author><first>Vinh Van</first><last>Nguyen</last></author>
+      <author id="tu-bao-ho"><first>Tu-Bao</first><last>Ho</last></author>
+      <author id="minh-le-nguyen"><first>Minh Le</first><last>Nguyen</last></author>
+      <author id="vinh-van-nguyen"><first>Vinh Van</first><last>Nguyen</last></author>
       <pages>143–150</pages>
       <url hash="f15e30fc">W08-2119</url>
       <bibkey>nguyen-etal-2008-tree</bibkey>
     </paper>
     <paper id="20">
       <title>Trainable Speaker-Based Referring Expression Generation</title>
-      <author><first>Giuseppe</first><last>Di Fabbrizio</last></author>
-      <author><first>Amanda</first><last>Stent</last></author>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="giuseppe-di-fabbrizio"><first>Giuseppe</first><last>Di Fabbrizio</last></author>
+      <author id="amanda-stent"><first>Amanda</first><last>Stent</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
       <pages>151–158</pages>
       <url hash="5a7a92f6">W08-2120</url>
       <bibkey>di-fabbrizio-etal-2008-trainable</bibkey>
@@ -3140,8 +3140,8 @@
       <title>The <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case> 2008 Shared Task on Joint Parsing of Syntactic and Semantic Dependencies</title>
       <author><first>Mihai</first><last>Surdeanu</last></author>
       <author><first>Richard</first><last>Johansson</last></author>
-      <author><first>Adam</first><last>Meyers</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="adam-meyers"><first>Adam</first><last>Meyers</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <author><first>Joakim</first><last>Nivre</last></author>
       <pages>159–177</pages>
       <url hash="982dbe3a">W08-2121</url>
@@ -3149,9 +3149,9 @@
     </paper>
     <paper id="22">
       <title>A Latent Variable Model of Synchronous Parsing for Syntactic and Semantic Dependencies</title>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <author><first>Paola</first><last>Merlo</last></author>
-      <author><first>Gabriele</first><last>Musillo</last></author>
+      <author id="gabriele-musillo"><first>Gabriele</first><last>Musillo</last></author>
       <author><first>Ivan</first><last>Titov</last></author>
       <pages>178–182</pages>
       <url hash="b6556e1d">W08-2122</url>
@@ -3168,7 +3168,7 @@
     <paper id="24">
       <title>A Joint Model for Parsing Syntactic and Semantic Dependencies</title>
       <author><first>Xavier</first><last>Lluís</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <pages>188–192</pages>
       <url hash="e217d3cd">W08-2124</url>
       <bibkey>lluis-marquez-2008-joint</bibkey>
@@ -3176,7 +3176,7 @@
     <paper id="25">
       <title>Collective Semantic Role Labelling with <fixed-case>M</fixed-case>arkov <fixed-case>L</fixed-case>ogic</title>
       <author><first>Sebastian</first><last>Riedel</last></author>
-      <author><first>Ivan</first><last>Meza-Ruiz</last></author>
+      <author id="ivan-meza-ruiz"><first>Ivan</first><last>Meza-Ruiz</last></author>
       <pages>193–197</pages>
       <url hash="40e98687">W08-2125</url>
       <bibkey>riedel-meza-ruiz-2008-collective</bibkey>
@@ -3193,7 +3193,7 @@
     <paper id="27">
       <title>Parsing Syntactic and Semantic Dependencies with Two Single-Stage Maximum Entropy Models</title>
       <author><first>Hai</first><last>Zhao</last></author>
-      <author><first>Chunyu</first><last>Kit</last></author>
+      <author id="chunyu-kit"><first>Chunyu</first><last>Kit</last></author>
       <pages>203–207</pages>
       <url hash="5f9699f3">W08-2127</url>
       <bibkey>zhao-kit-2008-parsing</bibkey>
@@ -3201,7 +3201,7 @@
     <paper id="28">
       <title>A Combined Memory-Based Semantic Role Labeler of <fixed-case>E</fixed-case>nglish</title>
       <author><first>Roser</first><last>Morante</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <author><first>Vincent</first><last>Van Asch</last></author>
       <pages>208–212</pages>
       <url hash="f19236ab">W08-2128</url>
@@ -3210,7 +3210,7 @@
     <paper id="29">
       <title>A Puristic Approach for Joint Dependency Parsing and Semantic Role Labeling</title>
       <author><first>Alexander</first><last>Volokh</last></author>
-      <author><first>Günter</first><last>Neumann</last></author>
+      <author id="gunter-neumann"><first>Günter</first><last>Neumann</last></author>
       <pages>213–217</pages>
       <url hash="79913496">W08-2129</url>
       <bibkey>volokh-neumann-2008-puristic</bibkey>
@@ -3218,7 +3218,7 @@
     <paper id="30">
       <title>Discriminative Learning of Syntactic and Semantic Dependencies</title>
       <author><first>Lu</first><last>Li</last></author>
-      <author><first>Shixi</first><last>Fan</last></author>
+      <author id="shixi-fan"><first>Shixi</first><last>Fan</last></author>
       <author><first>Xuan</first><last>Wang</last></author>
       <author><first>Xiaolong</first><last>Wang</last></author>
       <pages>218–222</pages>
@@ -3239,16 +3239,16 @@
       <author><first>Yotaro</first><last>Watanabe</last></author>
       <author><first>Masakazu</first><last>Iwatate</last></author>
       <author><first>Masayuki</first><last>Asahara</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>228–232</pages>
       <url hash="53448d9d">W08-2132</url>
       <bibkey>watanabe-etal-2008-pipeline</bibkey>
     </paper>
     <paper id="33">
       <title>Semantic Dependency Parsing using N-best Semantic Role Sequences and Roleset Information</title>
-      <author><first>Joo-Young</first><last>Lee</last></author>
+      <author id="joo-young-lee"><first>Joo-Young</first><last>Lee</last></author>
       <author><first>Han-Cheol</first><last>Cho</last></author>
-      <author><first>Hae-Chang</first><last>Rim</last></author>
+      <author id="hae-chang-rim"><first>Hae-Chang</first><last>Rim</last></author>
       <pages>233–237</pages>
       <url hash="cd4e446c">W08-2133</url>
       <bibkey>lee-etal-2008-semantic</bibkey>
@@ -3268,7 +3268,7 @@
     </paper>
     <paper id="35">
       <title>The Integration of Dependency Relation Classification and Semantic Role Labeling Using Bilayer Maximum Entropy <fixed-case>M</fixed-case>arkov Models</title>
-      <author><first>Weiwei</first><last>Sun</last></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last></author>
       <author><first>Hongzhan</first><last>Li</last></author>
       <author><first>Zhifang</first><last>Sui</last></author>
       <pages>243–247</pages>
@@ -3291,8 +3291,8 @@
       <title>Dependency Tree-based <fixed-case>SRL</fixed-case> with Proper Pruning and Extensive Feature Engineering</title>
       <author><first>Hongling</first><last>Wang</last></author>
       <author><first>Honglin</first><last>Wang</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
-      <author><first>Qiaoming</first><last>Zhu</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
+      <author id="qiaoming-zhu"><first>Qiaoming</first><last>Zhu</last></author>
       <pages>253–257</pages>
       <url hash="dd00978b">W08-2137</url>
       <bibkey>wang-etal-2008-dependency</bibkey>
@@ -3329,7 +3329,7 @@
     <meta>
       <booktitle>Semantics in Text Processing. <fixed-case>STEP</fixed-case> 2008 Conference Proceedings</booktitle>
       <editor><first>Johan</first><last>Bos</last></editor>
-      <editor><first>Rodolfo</first><last>Delmonte</last></editor>
+      <editor id="rodolfo-delmonte"><first>Rodolfo</first><last>Delmonte</last></editor>
       <publisher>College Publications</publisher>
       <year>2008</year>
       <venue>step</venue>
@@ -3340,7 +3340,7 @@
     </frontmatter>
     <paper id="1">
       <title>A New Life for Semantic Annotations?</title>
-      <author><first>Harry</first><last>Bunt</last></author>
+      <author id="harry-bunt"><first>Harry</first><last>Bunt</last></author>
       <pages>1–2</pages>
       <url hash="9f2bc677">W08-2201</url>
       <bibkey>bunt-2008-new</bibkey>
@@ -3350,14 +3350,14 @@
       <author><first>Pierpaolo</first><last>Basile</last></author>
       <author><first>Marco</first><last>de Gemmis</last></author>
       <author><first>Pasquale</first><last>Lops</last></author>
-      <author><first>Giovanni</first><last>Semeraro</last></author>
+      <author id="giovanni-semeraro"><first>Giovanni</first><last>Semeraro</last></author>
       <pages>5–16</pages>
       <url hash="e48fe9f6">W08-2202</url>
       <bibkey>basile-etal-2008-combining</bibkey>
     </paper>
     <paper id="3">
       <title>Semantic Representations of Syntactically Marked Discourse Status in Crosslinguistic Perspective</title>
-      <author><first>Emily M.</first><last>Bender</last></author>
+      <author id="emily-m-bender"><first>Emily M.</first><last>Bender</last></author>
       <author><first>David</first><last>Goss-Grubbs</last></author>
       <pages>17–29</pages>
       <url hash="820bf9cd">W08-2203</url>
@@ -3365,7 +3365,7 @@
     </paper>
     <paper id="4">
       <title>High Precision Analysis of <fixed-case>NP</fixed-case>s with a Deep Processing Grammar</title>
-      <author><first>António</first><last>Branco</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
       <author><first>Francisco</first><last>Costa</last></author>
       <pages>31–43</pages>
       <url hash="22938739">W08-2204</url>
@@ -3375,9 +3375,9 @@
       <title>Augmenting <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et for Deep Understanding of Text</title>
       <author><first>Peter</first><last>Clark</last></author>
       <author><first>Christiane</first><last>Fellbaum</last></author>
-      <author><first>Jerry R.</first><last>Hobbs</last></author>
-      <author><first>Phil</first><last>Harrison</last></author>
-      <author><first>William R.</first><last>Murray</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry R.</first><last>Hobbs</last></author>
+      <author id="phil-harrison"><first>Phil</first><last>Harrison</last></author>
+      <author id="william-r-murray"><first>William R.</first><last>Murray</last></author>
       <author><first>John</first><last>Thompson</last></author>
       <pages>45–57</pages>
       <url hash="11860038">W08-2205</url>
@@ -3393,7 +3393,7 @@
     <paper id="7">
       <title><fixed-case>K</fixed-case>now<fixed-case>N</fixed-case>et: A Proposal for Building Highly Connected and Dense Knowledge Bases from the <fixed-case>W</fixed-case>eb</title>
       <author><first>Montse</first><last>Cuadros</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <pages>71–84</pages>
       <url hash="501e862b">W08-2207</url>
       <bibkey>cuadros-rigau-2008-knownet-proposal</bibkey>
@@ -3403,7 +3403,7 @@
       <author><first>Diego</first><last>De Cao</last></author>
       <author><first>Danilo</first><last>Croce</last></author>
       <author><first>Marco</first><last>Pennacchiotti</last></author>
-      <author><first>Roberto</first><last>Basili</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
       <pages>85–101</pages>
       <url hash="6f424a06">W08-2208</url>
       <bibkey>de-cao-etal-2008-combining</bibkey>
@@ -3411,7 +3411,7 @@
     <paper id="9">
       <title>Answering Why-Questions in Closed Domains from a Discourse Model</title>
       <author><first>Rodolfo</first><last>Delmonte</last></author>
-      <author><first>Emanuele</first><last>Pianta</last></author>
+      <author id="emanuele-pianta"><first>Emanuele</first><last>Pianta</last></author>
       <pages>103–114</pages>
       <url hash="8d40fc25">W08-2209</url>
       <bibkey>delmonte-pianta-2008-answering</bibkey>
@@ -3419,7 +3419,7 @@
     <paper id="10">
       <title>Analyzing the Explanation Structure of Procedural Texts: Dealing with Advice and Warnings</title>
       <author><first>Lionel</first><last>Fontan</last></author>
-      <author><first>Patrick</first><last>Saint-Dizier</last></author>
+      <author id="patrick-saint-dizier"><first>Patrick</first><last>Saint-Dizier</last></author>
       <pages>115–127</pages>
       <url hash="4b554bfd">W08-2210</url>
       <bibkey>fontan-saint-dizier-2008-analyzing</bibkey>
@@ -3427,7 +3427,7 @@
     <paper id="11">
       <title>From Predicting Predominant Senses to Local Context for Word Sense Disambiguation</title>
       <author><first>Rob</first><last>Koeling</last></author>
-      <author><first>Diana</first><last>McCarthy</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
       <pages>129–138</pages>
       <url hash="bf747512">W08-2211</url>
       <bibkey>koeling-mccarthy-2008-predicting</bibkey>
@@ -3435,7 +3435,7 @@
     <paper id="12">
       <title>Automatic Fine-Grained Semantic Classification for Domain Adaptation</title>
       <author><first>Maria</first><last>Liakata</last></author>
-      <author><first>Stephen</first><last>Pulman</last></author>
+      <author id="stephen-pulman"><first>Stephen</first><last>Pulman</last></author>
       <pages>139–153</pages>
       <url hash="b93cb829">W08-2212</url>
       <bibkey>liakata-pulman-2008-automatic</bibkey>
@@ -3452,14 +3452,14 @@
     <paper id="14">
       <title>The Idiom–Reference Connection</title>
       <author><first>Marjorie</first><last>McShane</last></author>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <pages>165–177</pages>
       <url hash="b1e92254">W08-2214</url>
       <bibkey>mcshane-nirenburg-2008-idiom</bibkey>
     </paper>
     <paper id="15">
       <title>Resolving Paraphrases to Support Modeling Language Perception in an Intelligent Agent</title>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <author><first>Marjorie</first><last>McShane</last></author>
       <author><first>Stephen</first><last>Beale</last></author>
       <pages>179–192</pages>
@@ -3478,7 +3478,7 @@
       <title>Refining the Meaning of Sense Labels in <fixed-case>PDTB</fixed-case>: “Concession”</title>
       <author><first>Livio</first><last>Robaldo</last></author>
       <author><first>Eleni</first><last>Miltsakaki</last></author>
-      <author><first>Jerry R.</first><last>Hobbs</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry R.</first><last>Hobbs</last></author>
       <pages>207–219</pages>
       <url hash="ac6f2fe2">W08-2217</url>
       <bibkey>robaldo-etal-2008-refining</bibkey>
@@ -3493,7 +3493,7 @@
     <paper id="19">
       <title>Open Knowledge Extraction through Compositional Language Processing</title>
       <author><first>Benjamin</first><last>Van Durme</last></author>
-      <author><first>Lenhart</first><last>Schubert</last></author>
+      <author id="lenhart-schubert"><first>Lenhart</first><last>Schubert</last></author>
       <pages>239–254</pages>
       <url hash="1d0ccd42">W08-2219</url>
       <bibkey>van-durme-schubert-2008-open</bibkey>
@@ -3508,7 +3508,7 @@
     <paper id="21">
       <title><fixed-case>B</fixed-case>oeing’s <fixed-case>NLP</fixed-case> System and the Challenges of Semantic Representation</title>
       <author><first>Peter</first><last>Clark</last></author>
-      <author><first>Phil</first><last>Harrison</last></author>
+      <author id="phil-harrison"><first>Phil</first><last>Harrison</last></author>
       <pages>263–276</pages>
       <url hash="0288c9a4">W08-2221</url>
       <bibkey>clark-harrison-2008-boeings</bibkey>
@@ -3529,7 +3529,7 @@
     </paper>
     <paper id="24">
       <title><fixed-case>LXG</fixed-case>ram in the Shared Task “<fixed-case>C</fixed-case>omparing <fixed-case>S</fixed-case>emantic <fixed-case>R</fixed-case>epresentations” of <fixed-case>STEP</fixed-case> 2008</title>
-      <author><first>António</first><last>Branco</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
       <author><first>Francisco</first><last>Costa</last></author>
       <pages>299–314</pages>
       <url hash="ad3682f0">W08-2224</url>
@@ -3537,7 +3537,7 @@
     </paper>
     <paper id="25">
       <title>Baseline Evaluation of <fixed-case>WSD</fixed-case> and Semantic Dependency in <fixed-case>O</fixed-case>nto<fixed-case>S</fixed-case>em</title>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <author><first>Stephen</first><last>Beale</last></author>
       <author><first>Marjorie</first><last>McShane</last></author>
       <pages>315–326</pages>
@@ -3546,26 +3546,26 @@
     </paper>
     <paper id="26">
       <title>The <fixed-case>T</fixed-case>ext<fixed-case>C</fixed-case>ap Semantic Interpreter</title>
-      <author><first>Charles B.</first><last>Callaway</last></author>
+      <author id="charles-b-callaway"><first>Charles B.</first><last>Callaway</last></author>
       <pages>327–342</pages>
       <url hash="bac1a5a3">W08-2226</url>
       <bibkey>callaway-2008-textcap</bibkey>
     </paper>
     <paper id="27">
       <title>Deep Semantic Analysis of Text</title>
-      <author><first>James F.</first><last>Allen</last></author>
-      <author><first>Mary</first><last>Swift</last></author>
-      <author><first>Will</first><last>de Beaumont</last></author>
+      <author id="james-allen"><first>James F.</first><last>Allen</last></author>
+      <author id="mary-swift"><first>Mary</first><last>Swift</last></author>
+      <author id="william-de-beaumont"><first>Will</first><last>de Beaumont</last></author>
       <pages>343–354</pages>
       <url hash="f694a3ac">W08-2227</url>
       <bibkey>allen-etal-2008-deep</bibkey>
     </paper>
     <paper id="28">
       <title>Textual Entailment as an Evaluation Framework for Metaphor Resolution: A Proposal</title>
-      <author><first>Rodrigo</first><last>Agerri</last></author>
-      <author><first>John</first><last>Barnden</last></author>
-      <author><first>Mark</first><last>Lee</last></author>
-      <author><first>Alan</first><last>Wallington</last></author>
+      <author id="rodrigo-agerri"><first>Rodrigo</first><last>Agerri</last></author>
+      <author id="john-barnden"><first>John</first><last>Barnden</last></author>
+      <author id="mark-lee"><first>Mark</first><last>Lee</last></author>
+      <author id="alan-wallington"><first>Alan</first><last>Wallington</last></author>
       <pages>357–363</pages>
       <url hash="1236595b">W08-2228</url>
       <bibkey>agerri-etal-2008-textual</bibkey>
@@ -3583,8 +3583,8 @@
     <paper id="30">
       <title>Addressing the Resource Bottleneck to Create Large-Scale Annotated Texts</title>
       <author><first>Jon</first><last>Chamberlain</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
-      <author><first>Udo</first><last>Kruschwitz</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
+      <author id="udo-kruschwitz"><first>Udo</first><last>Kruschwitz</last></author>
       <pages>375–380</pages>
       <url hash="edff7c4f">W08-2230</url>
       <bibkey>chamberlain-etal-2008-addressing</bibkey>
@@ -3592,7 +3592,7 @@
     <paper id="31">
       <title>A Resource-Poor Approach for Linking Ontology Classes to <fixed-case>W</fixed-case>ikipedia Articles</title>
       <author><first>Nils</first><last>Reiter</last></author>
-      <author><first>Matthias</first><last>Hartung</last></author>
+      <author id="matthias-hartung"><first>Matthias</first><last>Hartung</last></author>
       <author><first>Anette</first><last>Frank</last></author>
       <pages>381–387</pages>
       <url hash="2438ec52">W08-2231</url>
@@ -3600,9 +3600,9 @@
     </paper>
     <paper id="32">
       <title>Top-Down Cohesion Segmentation in Summarization</title>
-      <author><first>Doina</first><last>Tatar</last></author>
+      <author id="doina-tatar"><first>Doina</first><last>Tatar</last></author>
       <author><first>Andreea Diana</first><last>Mihis</last></author>
-      <author><first>Gabriela</first><last>Serban</last></author>
+      <author id="gabriela-serban"><first>Gabriela</first><last>Serban</last></author>
       <pages>389–397</pages>
       <url hash="275cf1e3">W08-2232</url>
       <bibkey>tatar-etal-2008-top</bibkey>
@@ -3634,7 +3634,7 @@
     <paper id="2">
       <title>Flexible Composition, Multiple Adjoining and Word Order Variation</title>
       <author><first>Joan</first><last>Chen-Main</last></author>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
       <pages>9–16</pages>
       <url hash="f5bd931b">W08-2302</url>
       <bibkey>chen-main-joshi-2008-flexible</bibkey>
@@ -3657,9 +3657,9 @@
     </paper>
     <paper id="5">
       <title>Compositional Semantics of Coordination using Synchronous <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammar</title>
-      <author><first>Chung-hye</first><last>Han</last></author>
+      <author id="chung-hye-han"><first>Chung-hye</first><last>Han</last></author>
       <author><first>David</first><last>Potter</last></author>
-      <author><first>Dennis R.</first><last>Storoshenko</last></author>
+      <author id="dennis-ryan-storoshenko"><first>Dennis R.</first><last>Storoshenko</last></author>
       <pages>33–40</pages>
       <url hash="77dbe8ec">W08-2305</url>
       <bibkey>han-etal-2008-compositional</bibkey>
@@ -3697,14 +3697,14 @@
     <paper id="10">
       <title>Synchronous Vector <fixed-case>TAG</fixed-case> for Syntax and Semantics: Control Verbs, Relative Clauses, and Inverse Linking</title>
       <author><first>Rebecca</first><last>Nesson</last></author>
-      <author><first>Stuart</first><last>Shieber</last></author>
+      <author id="stuart-m-shieber"><first>Stuart</first><last>Shieber</last></author>
       <pages>73–80</pages>
       <url hash="a6ca8ee3">W08-2310</url>
       <bibkey>nesson-shieber-2008-synchronous</bibkey>
     </paper>
     <paper id="11">
       <title>The use of <fixed-case>MCTAG</fixed-case> to Process Elliptic Coordination</title>
-      <author><first>Djamé</first><last>Seddah</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
       <pages>81–88</pages>
       <url hash="f5cf3681">W08-2311</url>
       <bibkey>seddah-2008-use</bibkey>
@@ -3751,8 +3751,8 @@
     </paper>
     <paper id="17">
       <title>A Metagrammar for <fixed-case>V</fixed-case>ietnamese <fixed-case>LTAG</fixed-case></title>
-      <author><first>Phương</first><last>Lê Hồng</last></author>
-      <author><first>Thị Minh Huyền</first><last>Nguyễn</last></author>
+      <author id="phuong-le-hong"><first>Phương</first><last>Lê Hồng</last></author>
+      <author id="thi-minh-huyen-nguyen"><first>Thị Minh Huyền</first><last>Nguyễn</last></author>
       <author><first>Azim</first><last>Roussanaly</last></author>
       <pages>129–132</pages>
       <url hash="47e8c37e">W08-2317</url>
@@ -3761,7 +3761,7 @@
     <paper id="18">
       <title>Is Coordination Quantification?</title>
       <author><first>Kevin</first><last>Lerman</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>133–140</pages>
       <url hash="4042f507">W08-2318</url>
       <bibkey>lerman-rambow-2008-coordination</bibkey>
@@ -3769,15 +3769,15 @@
     <paper id="19">
       <title>Feature Unification in <fixed-case>TAG</fixed-case> Derivation Trees</title>
       <author><first>Sylvain</first><last>Schmitz</last></author>
-      <author><first>Joseph</first><last>Le Roux</last></author>
+      <author id="joseph-le-roux"><first>Joseph</first><last>Le Roux</last></author>
       <pages>141–148</pages>
       <url hash="c838f819">W08-2319</url>
       <bibkey>schmitz-le-roux-2008-feature</bibkey>
     </paper>
     <paper id="20">
       <title>Reflexivity in <fixed-case>E</fixed-case>nglish: an <fixed-case>STAG</fixed-case> analysis</title>
-      <author><first>Dennis R.</first><last>Storoshenko</last></author>
-      <author><first>Chung-hye</first><last>Han</last></author>
+      <author id="dennis-ryan-storoshenko"><first>Dennis R.</first><last>Storoshenko</last></author>
+      <author id="chung-hye-han"><first>Chung-hye</first><last>Han</last></author>
       <author><first>David</first><last>Potter</last></author>
       <pages>149–156</pages>
       <url hash="1d019774">W08-2320</url>
diff --git a/data/xml/W09.xml b/data/xml/W09.xml
index 10eb054316..5f0a991e0c 100644
--- a/data/xml/W09.xml
+++ b/data/xml/W09.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the <fixed-case>EACL</fixed-case> 2009 Workshop on the Interaction between Linguistics and Computational Linguistics: Virtuous, Vicious or Vacuous?</booktitle>
       <url hash="bb6a3df0">W09-01</url>
-      <editor><first>Timothy</first><last>Baldwin</last></editor>
+      <editor id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></editor>
       <editor><first>Valia</first><last>Kordoni</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Athens, Greece</address>
@@ -18,14 +18,14 @@
     </frontmatter>
     <paper id="1">
       <title>Machine Translation and its Philosophical Accounts</title>
-      <author><first>Stelios</first><last>Piperidis</last></author>
+      <author id="stelios-piperidis"><first>Stelios</first><last>Piperidis</last></author>
       <pages>1</pages>
       <url hash="d7249f9d">W09-0101</url>
       <bibkey>piperidis-2009-machine</bibkey>
     </paper>
     <paper id="2">
       <title>The Annotation Conundrum</title>
-      <author><first>Mark</first><last>Liberman</last></author>
+      <author id="mark-liberman"><first>Mark</first><last>Liberman</last></author>
       <pages>2</pages>
       <url hash="97c68956">W09-0102</url>
       <bibkey>liberman-2009-annotation</bibkey>
@@ -39,7 +39,7 @@
     </paper>
     <paper id="4">
       <title>Computational Linguistics and Generative Linguistics: The Triumph of Hope over Experience</title>
-      <author><first>Geoffrey</first><last>Pullum</last></author>
+      <author id="geoffrey-k-pullum"><first>Geoffrey</first><last>Pullum</last></author>
       <pages>12–21</pages>
       <url hash="b931a285">W09-0104</url>
       <bibkey>pullum-2009-computational</bibkey>
@@ -53,14 +53,14 @@
     </paper>
     <paper id="6">
       <title>Linguistically Naïve != Language Independent: Why <fixed-case>NLP</fixed-case> Needs Linguistic Typology</title>
-      <author><first>Emily M.</first><last>Bender</last></author>
+      <author id="emily-m-bender"><first>Emily M.</first><last>Bender</last></author>
       <pages>26–32</pages>
       <url hash="358ddbf8">W09-0106</url>
       <bibkey>bender-2009-linguistically</bibkey>
     </paper>
     <paper id="7">
       <title>Parsed Corpora for Linguistics</title>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <author><first>Gosse</first><last>Bouma</last></author>
       <pages>33–39</pages>
       <url hash="50765cc7">W09-0107</url>
@@ -75,7 +75,7 @@
     </paper>
     <paper id="9">
       <title>What Do Computational Linguists Need to Know about Linguistics?</title>
-      <author><first>Robert C.</first><last>Moore</last></author>
+      <author id="robert-c-moore"><first>Robert C.</first><last>Moore</last></author>
       <pages>41–42</pages>
       <url hash="94562fdf">W09-0109</url>
       <bibkey>moore-2009-computational</bibkey>
@@ -92,7 +92,7 @@
     <meta>
       <booktitle>Proceedings of the Workshop on Geometrical Models of Natural Language Semantics</booktitle>
       <url hash="3d3e1b25">W09-02</url>
-      <editor><first>Roberto</first><last>Basili</last></editor>
+      <editor id="roberto-basili"><first>Roberto</first><last>Basili</last></editor>
       <editor><first>Marco</first><last>Pennacchiotti</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Athens, Greece</address>
@@ -106,7 +106,7 @@
     </frontmatter>
     <paper id="1">
       <title>One Distributional Memory, Many Semantic Spaces</title>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <author><first>Alessandro</first><last>Lenci</last></author>
       <pages>1–8</pages>
       <url hash="05a8fe5d">W09-0201</url>
@@ -123,7 +123,7 @@
     <paper id="3">
       <title>Unsupervised Classification with Dependency Based Word Spaces</title>
       <author><first>Klaus</first><last>Rothenhäusler</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>17–24</pages>
       <url hash="3bf36ef8">W09-0203</url>
       <bibkey>rothenhausler-schutze-2009-unsupervised</bibkey>
@@ -131,15 +131,15 @@
     <paper id="4">
       <title>A Study of Convolution Tree Kernel with Local Alignment</title>
       <author><first>Lidan</first><last>Zhang</last></author>
-      <author><first>Kwok-Ping</first><last>Chan</last></author>
+      <author id="kwok-ping-chan"><first>Kwok-Ping</first><last>Chan</last></author>
       <pages>25–32</pages>
       <url hash="6ec4f806">W09-0204</url>
       <bibkey>zhang-chan-2009-study</bibkey>
     </paper>
     <paper id="5">
       <title><fixed-case>B</fixed-case>ag<fixed-case>P</fixed-case>ack: A General Framework to Represent Semantic Relations</title>
-      <author><first>Amaç</first><last>Herdağdelen</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="amac-herdagdelen"><first>Amaç</first><last>Herdağdelen</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <pages>33–40</pages>
       <url hash="97d6b1c7">W09-0205</url>
       <bibkey>herdagdelen-baroni-2009-bagpack</bibkey>
@@ -148,14 +148,14 @@
       <title>Positioning for Conceptual Development using Latent Semantic Analysis</title>
       <author><first>Fridolin</first><last>Wild</last></author>
       <author><first>Bernhard</first><last>Hoisl</last></author>
-      <author><first>Gaston</first><last>Burek</last></author>
+      <author id="gaston-burek"><first>Gaston</first><last>Burek</last></author>
       <pages>41–48</pages>
       <url hash="5c2ef3a7">W09-0206</url>
       <bibkey>wild-etal-2009-positioning</bibkey>
     </paper>
     <paper id="7">
       <title>Semantic Similarity of Distractors in Multiple-Choice Tests: Extrinsic Evaluation</title>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <author><first>Le An</first><last>Ha</last></author>
       <author><first>Andrea</first><last>Varga</last></author>
       <author><first>Luz</first><last>Rello</last></author>
@@ -166,7 +166,7 @@
     <paper id="8">
       <title>Paraphrase Assessment in Structured Vector Space: Exploring Parameters and Datasets</title>
       <author><first>Katrin</first><last>Erk</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <pages>57–65</pages>
       <url hash="cc3ef6bc">W09-0208</url>
       <bibkey>erk-pado-2009-paraphrase</bibkey>
@@ -174,7 +174,7 @@
     <paper id="9">
       <title><fixed-case>SVD</fixed-case> Feature Selection for Probabilistic Taxonomy Learning</title>
       <author><first>Francesca</first><last>Fallucchi</last></author>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last></author>
       <pages>66–73</pages>
       <url hash="2309c05c">W09-0209</url>
       <bibkey>fallucchi-zanzotto-2009-svd</bibkey>
@@ -190,7 +190,7 @@
     </paper>
     <paper id="11">
       <title>A Non-negative Tensor Factorization Model for Selectional Preference Induction</title>
-      <author><first>Tim</first><last>Van de Cruys</last></author>
+      <author id="tim-van-de-cruys"><first>Tim</first><last>Van de Cruys</last></author>
       <pages>83–90</pages>
       <url hash="c1181b7b">W09-0211</url>
       <bibkey>van-de-cruys-2009-non</bibkey>
@@ -208,7 +208,7 @@
     </paper>
     <paper id="13">
       <title>Handling Sparsity for Verb Noun <fixed-case>MWE</fixed-case> Token Classification</title>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <author><first>Madhav</first><last>Krishna</last></author>
       <pages>96–103</pages>
       <url hash="63e9cae5">W09-0213</url>
@@ -249,7 +249,7 @@
     </frontmatter>
     <paper id="1">
       <title>Content Analysis of Museum Documentation in a Transdisciplinary Perspective</title>
-      <author><first>Guenther</first><last>Goerz</last></author>
+      <author id="guenther-goerz"><first>Guenther</first><last>Goerz</last></author>
       <author><first>Martin</first><last>Scholz</last></author>
       <pages>1–9</pages>
       <url hash="0ac372a8">W09-0301</url>
@@ -259,7 +259,7 @@
       <title>An Intelligent Authoring Environment for Abstract Semantic Representations of Cultural Object Descriptions</title>
       <author><first>Stasinos</first><last>Konstantopoulos</last></author>
       <author><first>Vangelis</first><last>Karkaletsis</last></author>
-      <author><first>Dimitris</first><last>Bilidas</last></author>
+      <author id="dimitrios-bilidas"><first>Dimitris</first><last>Bilidas</last></author>
       <pages>10–17</pages>
       <url hash="640dad9f">W09-0302</url>
       <bibkey>konstantopoulos-etal-2009-intelligent</bibkey>
@@ -284,9 +284,9 @@
     </paper>
     <paper id="5">
       <title>A Web-Enabled and Speech-Enhanced Parallel Corpus of <fixed-case>G</fixed-case>reek-<fixed-case>B</fixed-case>ulgarian Cultural Texts</title>
-      <author><first>Voula</first><last>Giouli</last></author>
+      <author id="voula-giouli"><first>Voula</first><last>Giouli</last></author>
       <author><first>Nikos</first><last>Glaros</last></author>
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <author><first>Petya</first><last>Osenova</last></author>
       <pages>35–42</pages>
       <url hash="373f908c">W09-0305</url>
@@ -303,7 +303,7 @@
     <paper id="7">
       <title>Applying <fixed-case>NLP</fixed-case> Technologies to the Collection and Enrichment of Language Data on the Web to Aid Linguistic Research</title>
       <author><first>Fei</first><last>Xia</last></author>
-      <author><first>William</first><last>Lewis</last></author>
+      <author id="william-lewis"><first>William</first><last>Lewis</last></author>
       <pages>51–59</pages>
       <url hash="1a37b8e8">W09-0307</url>
       <bibkey>xia-lewis-2009-applying</bibkey>
@@ -311,7 +311,7 @@
     <paper id="8">
       <title>Instance-Driven Discovery of Ontological Relation Labels</title>
       <author><first>Marieke</first><last>van Erp</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <author><first>Sander</first><last>Wubben</last></author>
       <author><first>Steve</first><last>Hunt</last></author>
       <pages>60–68</pages>
@@ -357,8 +357,8 @@
     </paper>
     <paper id="2">
       <title>Syntax-Oriented Evaluation Measures for Machine Translation Output</title>
-      <author><first>Maja</first><last>Popović</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>29–32</pages>
       <url hash="9828fc13">W09-0402</url>
       <bibkey>popovic-ney-2009-syntax</bibkey>
@@ -366,7 +366,7 @@
     <paper id="3">
       <title>A Simple Automatic <fixed-case>MT</fixed-case> Evaluation Metric</title>
       <author><first>Petr</first><last>Homola</last></author>
-      <author><first>Vladislav</first><last>Kuboň</last></author>
+      <author id="vladislav-kubon"><first>Vladislav</first><last>Kuboň</last></author>
       <author><first>Pavel</first><last>Pecina</last></author>
       <pages>33–36</pages>
       <url hash="d7aacc8f">W09-0403</url>
@@ -374,10 +374,10 @@
     </paper>
     <paper id="4">
       <title>Machine Translation Evaluation with Textual Entailment Features</title>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <author><first>Michel</first><last>Galley</last></author>
-      <author><first>Daniel</first><last>Jurafsky</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="dan-jurafsky"><first>Daniel</first><last>Jurafsky</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>37–41</pages>
       <url hash="a93a4af3">W09-0404</url>
       <bibkey>pado-etal-2009-machine</bibkey>
@@ -386,7 +386,7 @@
       <title>Combining Multi-Engine Translations with <fixed-case>M</fixed-case>oses</title>
       <author><first>Yu</first><last>Chen</last></author>
       <author><first>Michael</first><last>Jellinghaus</last></author>
-      <author><first>Andreas</first><last>Eisele</last></author>
+      <author id="andreas-eisele"><first>Andreas</first><last>Eisele</last></author>
       <author><first>Yi</first><last>Zhang</last></author>
       <author><first>Sabine</first><last>Hunsicker</last></author>
       <author><first>Silke</first><last>Theison</last></author>
@@ -398,8 +398,8 @@
     </paper>
     <paper id="6">
       <title><fixed-case>CMU</fixed-case> System Combination for <fixed-case>WMT</fixed-case>‘09</title>
-      <author><first>Almut Silja</first><last>Hildebrand</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="almut-silja-hildebrand"><first>Almut Silja</first><last>Hildebrand</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>47–50</pages>
       <url hash="7c16332e">W09-0406</url>
       <bibkey>hildebrand-vogel-2009-cmu</bibkey>
@@ -408,7 +408,7 @@
       <title>The <fixed-case>RWTH</fixed-case> System Combination System for <fixed-case>WMT</fixed-case> 2009</title>
       <author><first>Gregor</first><last>Leusch</last></author>
       <author><first>Evgeny</first><last>Matusov</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>51–55</pages>
       <url hash="52f1e3be">W09-0407</url>
       <bibkey>leusch-etal-2009-rwth</bibkey>
@@ -417,28 +417,28 @@
       <title>Machine Translation System Combination with Flexible Word Ordering</title>
       <author><first>Kenneth</first><last>Heafield</last></author>
       <author><first>Greg</first><last>Hanneman</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <pages>56–60</pages>
       <url hash="e159e782">W09-0408</url>
       <bibkey>heafield-etal-2009-machine</bibkey>
     </paper>
     <paper id="9">
       <title>Incremental Hypothesis Alignment with Flexible Matching for Building Confusion Networks: <fixed-case>BBN</fixed-case> System Description for <fixed-case>WMT</fixed-case>09 System Combination Task</title>
-      <author><first>Antti-Veikko</first><last>Rosti</last></author>
+      <author id="antti-veikko-rosti"><first>Antti-Veikko</first><last>Rosti</last></author>
       <author><first>Bing</first><last>Zhang</last></author>
       <author><first>Spyros</first><last>Matsoukas</last></author>
-      <author><first>Richard</first><last>Schwartz</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
       <pages>61–65</pages>
       <url hash="86d2b795">W09-0409</url>
       <bibkey>rosti-etal-2009-incremental</bibkey>
     </paper>
     <paper id="10">
       <title>The <fixed-case>RWTH</fixed-case> Machine Translation System for <fixed-case>WMT</fixed-case> 2009</title>
-      <author><first>Maja</first><last>Popović</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
       <author><first>David</first><last>Vilar</last></author>
       <author><first>Daniel</first><last>Stein</last></author>
       <author><first>Evgeny</first><last>Matusov</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>66–69</pages>
       <url hash="05615fdb">W09-0410</url>
       <bibkey>popovic-etal-2009-rwth</bibkey>
@@ -447,7 +447,7 @@
       <title>Translation Combination using Factored Word Substitution</title>
       <author><first>Christian</first><last>Federmann</last></author>
       <author><first>Silke</first><last>Theison</last></author>
-      <author><first>Andreas</first><last>Eisele</last></author>
+      <author id="andreas-eisele"><first>Andreas</first><last>Eisele</last></author>
       <author><first>Hans</first><last>Uszkoreit</last></author>
       <author><first>Yu</first><last>Chen</last></author>
       <author><first>Michael</first><last>Jellinghaus</last></author>
@@ -458,7 +458,7 @@
     </paper>
     <paper id="12">
       <title><fixed-case>NUS</fixed-case> at <fixed-case>WMT</fixed-case>09: Domain Adaptation Experiments for <fixed-case>E</fixed-case>nglish-<fixed-case>S</fixed-case>panish Machine Translation of News Commentary Text</title>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Hwee Tou</first><last>Ng</last></author>
       <pages>75–79</pages>
       <url hash="b660d8e0">W09-0412</url>
@@ -469,27 +469,27 @@
       <author><first>Jan</first><last>Niehues</last></author>
       <author><first>Teresa</first><last>Herrmann</last></author>
       <author><first>Muntsin</first><last>Kolss</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>80–84</pages>
       <url hash="4e2c7664">W09-0413</url>
       <bibkey>niehues-etal-2009-universitat</bibkey>
     </paper>
     <paper id="14">
       <title>The <fixed-case>TALP</fixed-case>-<fixed-case>UPC</fixed-case> Phrase-Based Translation System for <fixed-case>EACL</fixed-case>-<fixed-case>WMT</fixed-case> 2009</title>
-      <author><first>José A. R.</first><last>Fonollosa</last></author>
+      <author id="jose-a-r-fonollosa"><first>José A. R.</first><last>Fonollosa</last></author>
       <author><first>Maxim</first><last>Khalilov</last></author>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
-      <author><first>José B.</first><last>Mariño</last></author>
-      <author><first>Carlos A.</first><last>Henríquez Q.</last></author>
-      <author><first>Adolfo</first><last>Hernández H.</last></author>
-      <author><first>Rafael E.</first><last>Banchs</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="jose-b-marino"><first>José B.</first><last>Mariño</last></author>
+      <author id="carlos-henriquez"><first>Carlos A.</first><last>Henríquez Q.</last></author>
+      <author id="adolfo-hernandez-h"><first>Adolfo</first><last>Hernández H.</last></author>
+      <author id="rafael-e-banchs"><first>Rafael E.</first><last>Banchs</last></author>
       <pages>85–89</pages>
       <url hash="9a8eaee3">W09-0414</url>
       <bibkey>fonollosa-etal-2009-talp</bibkey>
     </paper>
     <paper id="15">
       <title>Deep Linguistic Multilingual Translation and Bilingual Dictionaries</title>
-      <author><first>Eric</first><last>Wehrli</last></author>
+      <author id="eric-wehrli"><first>Eric</first><last>Wehrli</last></author>
       <author><first>Luka</first><last>Nerima</last></author>
       <author><first>Yves</first><last>Scherrer</last></author>
       <pages>90–94</pages>
@@ -509,7 +509,7 @@
     <paper id="17">
       <title><fixed-case>LIMSI</fixed-case>‘s Statistical Translation Systems for <fixed-case>WMT</fixed-case>‘09</title>
       <author><first>Alexandre</first><last>Allauzen</last></author>
-      <author><first>Josep</first><last>Crego</last></author>
+      <author id="josep-m-crego"><first>Josep</first><last>Crego</last></author>
       <author><first>Aurélien</first><last>Max</last></author>
       <author><first>François</first><last>Yvon</last></author>
       <pages>100–104</pages>
@@ -518,16 +518,16 @@
     </paper>
     <paper id="18">
       <title><fixed-case>NICT</fixed-case>@<fixed-case>WMT</fixed-case>09: Model Adaptation and Transliteration for <fixed-case>S</fixed-case>panish-<fixed-case>E</fixed-case>nglish <fixed-case>SMT</fixed-case></title>
-      <author><first>Michael</first><last>Paul</last></author>
+      <author id="michael-paul"><first>Michael</first><last>Paul</last></author>
       <author><first>Andrew</first><last>Finch</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>105–109</pages>
       <url hash="a6e4a074">W09-0418</url>
       <bibkey>paul-etal-2009-nict</bibkey>
     </paper>
     <paper id="19">
       <title>Statistical Post Editing and Dictionary Extraction: <fixed-case>S</fixed-case>ystran/<fixed-case>E</fixed-case>dinburgh Submissions for <fixed-case>ACL</fixed-case>-<fixed-case>WMT</fixed-case>2009</title>
-      <author><first>Loic</first><last>Dugast</last></author>
+      <author id="loic-dugast"><first>Loic</first><last>Dugast</last></author>
       <author><first>Jean</first><last>Senellart</last></author>
       <author><first>Philipp</first><last>Koehn</last></author>
       <pages>110–114</pages>
@@ -536,7 +536,7 @@
     </paper>
     <paper id="20">
       <title>Experiments in Morphosyntactic Processing for Translating to and from <fixed-case>G</fixed-case>erman</title>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>115–119</pages>
       <url hash="36e5c34f">W09-0420</url>
       <bibkey>fraser-2009-experiments</bibkey>
@@ -553,13 +553,13 @@
     </paper>
     <paper id="22">
       <title><fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>zech <fixed-case>MT</fixed-case> in 2008</title>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <author><first>David</first><last>Mareček</last></author>
       <author><first>Václav</first><last>Novák</last></author>
       <author><first>Martin</first><last>Popel</last></author>
-      <author><first>Jan</first><last>Ptáček</last></author>
+      <author id="jan-ptacek"><first>Jan</first><last>Ptáček</last></author>
       <author><first>Jan</first><last>Rouš</last></author>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
       <pages>125–129</pages>
       <url hash="13cb0a1c">W09-0422</url>
       <bibkey>bojar-etal-2009-english</bibkey>
@@ -578,12 +578,12 @@
       <title><fixed-case>J</fixed-case>oshua: An Open Source Toolkit for Parsing-Based Machine Translation</title>
       <author><first>Zhifei</first><last>Li</last></author>
       <author><first>Chris</first><last>Callison-Burch</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Sanjeev</first><last>Khudanpur</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="sanjeev-khudanpur"><first>Sanjeev</first><last>Khudanpur</last></author>
       <author><first>Lane</first><last>Schwartz</last></author>
       <author><first>Wren</first><last>Thornton</last></author>
       <author><first>Jonathan</first><last>Weese</last></author>
-      <author><first>Omar</first><last>Zaidan</last></author>
+      <author id="omar-zaidan"><first>Omar</first><last>Zaidan</last></author>
       <pages>135–139</pages>
       <url hash="34f90f33">W09-0424</url>
       <bibkey>li-etal-2009-joshua</bibkey>
@@ -592,16 +592,16 @@
       <title>An Improved Statistical Transfer System for <fixed-case>F</fixed-case>rench-<fixed-case>E</fixed-case>nglish Machine Translation</title>
       <author><first>Greg</first><last>Hanneman</last></author>
       <author><first>Vamshi</first><last>Ambati</last></author>
-      <author><first>Jonathan H.</first><last>Clark</last></author>
+      <author id="jonathan-h-clark"><first>Jonathan H.</first><last>Clark</last></author>
       <author><first>Alok</first><last>Parlikar</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <pages>140–144</pages>
       <url hash="de620feb">W09-0425</url>
       <bibkey>hanneman-etal-2009-improved</bibkey>
     </paper>
     <paper id="26">
       <title>The <fixed-case>U</fixed-case>niversity of <fixed-case>M</fixed-case>aryland Statistical Machine Translation System for the <fixed-case>F</fixed-case>ourth <fixed-case>W</fixed-case>orkshop on <fixed-case>M</fixed-case>achine <fixed-case>T</fixed-case>ranslation</title>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <author><first>Hendra</first><last>Setiawan</last></author>
       <author><first>Yuval</first><last>Marton</last></author>
       <author><first>Philip</first><last>Resnik</last></author>
@@ -636,8 +636,8 @@
       <author><first>Thi-Ngoc-Diep</first><last>Do</last></author>
       <author><first>Viet-Bac</first><last>Le</last></author>
       <author><first>Brigitte</first><last>Bigi</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
-      <author><first>Eric</first><last>Castelli</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
+      <author id="eric-castelli"><first>Eric</first><last>Castelli</last></author>
       <pages>165–172</pages>
       <url hash="998e03a6">W09-0430</url>
       <bibkey>do-etal-2009-mining</bibkey>
@@ -662,7 +662,7 @@
       <title><fixed-case>C</fixed-case>hinese Syntactic Reordering for Adequate Generation of <fixed-case>K</fixed-case>orean Verbal Phrases in <fixed-case>C</fixed-case>hinese-to-<fixed-case>K</fixed-case>orean <fixed-case>SMT</fixed-case></title>
       <author><first>Jin-Ji</first><last>Li</last></author>
       <author><first>Jungi</first><last>Kim</last></author>
-      <author><first>Dong-Il</first><last>Kim</last></author>
+      <author id="dong-il-kim"><first>Dong-Il</first><last>Kim</last></author>
       <author><first>Jong-Hyeok</first><last>Lee</last></author>
       <pages>190–196</pages>
       <url hash="1191dfba">W09-0433</url>
@@ -671,7 +671,7 @@
     <paper id="34">
       <title>A Quantitative Analysis of Reordering Phenomena</title>
       <author><first>Alexandra</first><last>Birch</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
       <author><first>Miles</first><last>Osborne</last></author>
       <pages>197–205</pages>
       <url hash="d42ae0ee">W09-0434</url>
@@ -687,9 +687,9 @@
     </paper>
     <paper id="36">
       <title>Disambiguating “<fixed-case>DE</fixed-case>” for <fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish Machine Translation</title>
-      <author><first>Pi-Chuan</first><last>Chang</last></author>
-      <author><first>Daniel</first><last>Jurafsky</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="pi-chuan-chang"><first>Pi-Chuan</first><last>Chang</last></author>
+      <author id="dan-jurafsky"><first>Daniel</first><last>Jurafsky</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>215–223</pages>
       <url hash="fc0f8ef0">W09-0436</url>
       <bibkey>chang-etal-2009-disambiguating</bibkey>
@@ -707,9 +707,9 @@
     <paper id="38">
       <title>A Deep Learning Approach to Machine Transliteration</title>
       <author><first>Thomas</first><last>Deselaers</last></author>
-      <author><first>Saša</first><last>Hasan</last></author>
+      <author id="sasa-hasan"><first>Saša</first><last>Hasan</last></author>
       <author><first>Oliver</first><last>Bender</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>233–241</pages>
       <url hash="d19ecdaa">W09-0438</url>
       <bibkey>deselaers-etal-2009-deep</bibkey>
@@ -724,18 +724,18 @@
     </paper>
     <paper id="40">
       <title>On the Robustness of Syntactic and Semantic Features for Automatic <fixed-case>MT</fixed-case> Evaluation</title>
-      <author><first>Jesús</first><last>Giménez</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="jesus-gimenez"><first>Jesús</first><last>Giménez</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <pages>250–258</pages>
       <url hash="c3b2fc88">W09-0440</url>
       <bibkey>gimenez-marquez-2009-robustness</bibkey>
     </paper>
     <paper id="41">
       <title>Fluency, Adequacy, or <fixed-case>HTER</fixed-case>? <fixed-case>E</fixed-case>xploring Different Human Judgments with a Tunable <fixed-case>MT</fixed-case> Metric</title>
-      <author><first>Matthew</first><last>Snover</last></author>
+      <author id="matthew-snover"><first>Matthew</first><last>Snover</last></author>
       <author><first>Nitin</first><last>Madnani</last></author>
-      <author><first>Bonnie</first><last>Dorr</last></author>
-      <author><first>Richard</first><last>Schwartz</last></author>
+      <author id="bonnie-dorr"><first>Bonnie</first><last>Dorr</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
       <pages>259–268</pages>
       <url hash="74a2f770">W09-0441</url>
       <bibkey>snover-etal-2009-fluency</bibkey>
@@ -776,7 +776,7 @@
       <author><first>Johannes</first><last>Matiasek</last></author>
       <author><first>Jeremy</first><last>Jancsary</last></author>
       <author><first>Alexandra</first><last>Klein</last></author>
-      <author><first>Harald</first><last>Trost</last></author>
+      <author id="harald-trost"><first>Harald</first><last>Trost</last></author>
       <pages>19–25</pages>
       <url hash="b3fce2bb">W09-0503</url>
       <bibkey>matiasek-etal-2009-identifying</bibkey>
@@ -802,14 +802,14 @@
     <paper id="6">
       <title>Predicting Concept Types in User Corrections in Dialog</title>
       <author><first>Svetlana</first><last>Stoyanchev</last></author>
-      <author><first>Amanda</first><last>Stent</last></author>
+      <author id="amanda-stent"><first>Amanda</first><last>Stent</last></author>
       <pages>42–49</pages>
       <url hash="80f76be5">W09-0506</url>
       <bibkey>stoyanchev-stent-2009-predicting</bibkey>
     </paper>
     <paper id="7">
       <title>Deeper Spoken Language Understanding for Man-Machine Dialogue on Broader Application Domains: A Logical Alternative to Concept Spotting</title>
-      <author><first>Jeanne</first><last>Villaneau</last></author>
+      <author id="jeanne-villaneau"><first>Jeanne</first><last>Villaneau</last></author>
       <author><first>Jean-Yves</first><last>Antoine</last></author>
       <pages>50–57</pages>
       <url hash="64ba97b9">W09-0507</url>
@@ -818,7 +818,7 @@
     <paper id="8">
       <title>An Integrated Approach to Robust Processing of Situated Spoken Dialogue</title>
       <author><first>Pierre</first><last>Lison</last></author>
-      <author><first>Geert-Jan M.</first><last>Kruijff</last></author>
+      <author id="geert-jan-m-kruijff"><first>Geert-Jan M.</first><last>Kruijff</last></author>
       <pages>58–65</pages>
       <url hash="b1d1efe2">W09-0508</url>
       <bibkey>lison-kruijff-2009-integrated</bibkey>
@@ -845,8 +845,8 @@
     <meta>
       <booktitle>Proceedings of the 12th <fixed-case>E</fixed-case>uropean Workshop on Natural Language Generation (<fixed-case>ENLG</fixed-case> 2009)</booktitle>
       <url hash="56a357f5">W09-06</url>
-      <editor><first>Emiel</first><last>Krahmer</last></editor>
-      <editor><first>Mariët</first><last>Theune</last></editor>
+      <editor id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></editor>
+      <editor id="mariet-theune"><first>Mariët</first><last>Theune</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Athens, Greece</address>
       <month>March</month>
@@ -864,7 +864,7 @@
       <author><first>Norman</first><last>Alm</last></author>
       <author><first>Rolf</first><last>Black</last></author>
       <author><first>Martin</first><last>Dempster</last></author>
-      <author><first>Annalu</first><last>Waller</last></author>
+      <author id="annalu-waller"><first>Annalu</first><last>Waller</last></author>
       <pages>1–8</pages>
       <url hash="c7d7e79c">W09-0601</url>
       <bibkey>reiter-etal-2009-using</bibkey>
@@ -878,7 +878,7 @@
     </paper>
     <paper id="3">
       <title>System Building Cost vs. Output Quality in Data-to-Text Generation</title>
-      <author><first>Anja</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anja</first><last>Belz</last></author>
       <author><first>Eric</first><last>Kow</last></author>
       <pages>16–24</pages>
       <url hash="4b8356b9">W09-0603</url>
@@ -889,7 +889,7 @@
       <author><first>Erwin</first><last>Marsi</last></author>
       <author><first>Emiel</first><last>Krahmer</last></author>
       <author><first>Iris</first><last>Hendrickx</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>25–32</pages>
       <url hash="e5190efd">W09-0604</url>
       <bibkey>marsi-etal-2009-sentence</bibkey>
@@ -938,7 +938,7 @@
       <author><first>Daniel</first><last>Dionne</last></author>
       <author><first>Salvador</first><last>de la Puente</last></author>
       <author><first>Carlos</first><last>León</last></author>
-      <author><first>Pablo</first><last>Gervás</last></author>
+      <author id="pablo-gervas"><first>Pablo</first><last>Gervás</last></author>
       <author><first>Raquel</first><last>Hervás</last></author>
       <pages>66–73</pages>
       <url hash="f78d0f76">W09-0610</url>
@@ -946,7 +946,7 @@
     </paper>
     <paper id="11">
       <title>Learning Lexical Alignment Policies for Generating Referring Expressions for Spoken Dialogue Systems</title>
-      <author><first>Srinivasan</first><last>Janarthanam</last></author>
+      <author id="srinivasan-janarthanam"><first>Srinivasan</first><last>Janarthanam</last></author>
       <author><first>Oliver</first><last>Lemon</last></author>
       <pages>74–81</pages>
       <url hash="6da4acef">W09-0611</url>
@@ -971,7 +971,7 @@
     </paper>
     <paper id="14">
       <title>A <fixed-case>W</fixed-case>izard-of-<fixed-case>O</fixed-case>z Environment to Study Referring Expression Generation in a Situated Spoken Dialogue Task</title>
-      <author><first>Srinivasan</first><last>Janarthanam</last></author>
+      <author id="srinivasan-janarthanam"><first>Srinivasan</first><last>Janarthanam</last></author>
       <author><first>Oliver</first><last>Lemon</last></author>
       <pages>94–97</pages>
       <url hash="6ae465d2">W09-0614</url>
@@ -980,8 +980,8 @@
     <paper id="15">
       <title>A Hearer-Oriented Evaluation of Referring Expression Generation</title>
       <author><first>Imtiaz Hussain</first><last>Khan</last></author>
-      <author><first>Kees</first><last>van Deemter</last></author>
-      <author><first>Graeme</first><last>Ritchie</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
+      <author id="graeme-ritchie"><first>Graeme</first><last>Ritchie</last></author>
       <author><first>Albert</first><last>Gatt</last></author>
       <author><first>Alexandra A.</first><last>Cleland</last></author>
       <pages>98–101</pages>
@@ -1016,7 +1016,7 @@
       <title>The Effect of Linguistic Devices in Information Presentation Messages on Recall and Comprehension</title>
       <author><first>Martin I.</first><last>Tietze</last></author>
       <author><first>Andi</first><last>Winterboer</last></author>
-      <author><first>Johanna</first><last>Moore</last></author>
+      <author id="johanna-d-moore"><first>Johanna</first><last>Moore</last></author>
       <pages>114–117</pages>
       <url hash="9408194a">W09-0619</url>
       <bibkey>tietze-etal-2009-effect</bibkey>
@@ -1032,7 +1032,7 @@
     <paper id="21">
       <title>Clustering and Matching Headlines for Automatic Paraphrase Acquisition</title>
       <author><first>Sander</first><last>Wubben</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <author><first>Emiel</first><last>Krahmer</last></author>
       <author><first>Erwin</first><last>Marsi</last></author>
       <pages>122–125</pages>
@@ -1042,8 +1042,8 @@
     <paper id="22">
       <title>A Situated Context Model for Resolution and Generation of Referring Expressions</title>
       <author><first>Hendrik</first><last>Zender</last></author>
-      <author><first>Geert-Jan M.</first><last>Kruijff</last></author>
-      <author><first>Ivana</first><last>Kruijff-Korbayová</last></author>
+      <author id="geert-jan-m-kruijff"><first>Geert-Jan M.</first><last>Kruijff</last></author>
+      <author id="ivana-kruijff-korbayova"><first>Ivana</first><last>Kruijff-Korbayová</last></author>
       <pages>126–129</pages>
       <url hash="82062332">W09-0622</url>
       <bibkey>zender-etal-2009-situated</bibkey>
@@ -1068,21 +1068,21 @@
     <paper id="25">
       <title>Towards Empirical Evaluation of Affective Tactical <fixed-case>NLG</fixed-case></title>
       <author><first>Ielka</first><last>van der Sluis</last></author>
-      <author><first>Chris</first><last>Mellish</last></author>
+      <author id="chris-mellish"><first>Chris</first><last>Mellish</last></author>
       <pages>146–153</pages>
       <url hash="20fe65af">W09-0625</url>
       <bibkey>van-der-sluis-mellish-2009-towards</bibkey>
     </paper>
     <paper id="26">
       <title>What Game Theory Can Do for <fixed-case>NLG</fixed-case>: The Case of Vague Language (Invited Talk)</title>
-      <author><first>Kees</first><last>van Deemter</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
       <pages>154–161</pages>
       <url hash="cca8b479">W09-0626</url>
       <bibkey>van-deemter-2009-game</bibkey>
     </paper>
     <paper id="27">
       <title>Generation <fixed-case>C</fixed-case>hallenges 2009: Preface</title>
-      <author><first>Anja</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anja</first><last>Belz</last></author>
       <author><first>Albert</first><last>Gatt</last></author>
       <pages>162–164</pages>
       <url hash="7c9d1a90">W09-0627</url>
@@ -1090,13 +1090,13 @@
     </paper>
     <paper id="28">
       <title>Report on the <fixed-case>F</fixed-case>irst <fixed-case>NLG</fixed-case> <fixed-case>C</fixed-case>hallenge on <fixed-case>G</fixed-case>enerating <fixed-case>I</fixed-case>nstructions in <fixed-case>V</fixed-case>irtual <fixed-case>E</fixed-case>nvironments (<fixed-case>GIVE</fixed-case>)</title>
-      <author><first>Donna</first><last>Byron</last></author>
+      <author id="donna-byron"><first>Donna</first><last>Byron</last></author>
       <author><first>Alexander</first><last>Koller</last></author>
       <author><first>Kristina</first><last>Striegnitz</last></author>
       <author><first>Justine</first><last>Cassell</last></author>
       <author><first>Robert</first><last>Dale</last></author>
-      <author><first>Johanna</first><last>Moore</last></author>
-      <author><first>Jon</first><last>Oberlander</last></author>
+      <author id="johanna-d-moore"><first>Johanna</first><last>Moore</last></author>
+      <author id="jon-oberlander"><first>Jon</first><last>Oberlander</last></author>
       <pages>165–173</pages>
       <url hash="53a25072">W09-0628</url>
       <bibkey>byron-etal-2009-report</bibkey>
@@ -1104,7 +1104,7 @@
     <paper id="29">
       <title>The <fixed-case>TUNA</fixed-case>-<fixed-case>REG</fixed-case> <fixed-case>C</fixed-case>hallenge 2009: Overview and Evaluation Results</title>
       <author><first>Albert</first><last>Gatt</last></author>
-      <author><first>Anja</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anja</first><last>Belz</last></author>
       <author><first>Eric</first><last>Kow</last></author>
       <pages>174–182</pages>
       <url hash="0bb5bed1">W09-0629</url>
@@ -1130,7 +1130,7 @@
     <paper id="32">
       <title>Evolutionary and Case-Based Approaches to <fixed-case>REG</fixed-case>: <fixed-case>NIL</fixed-case>-<fixed-case>UCM</fixed-case>-<fixed-case>E</fixed-case>vo<fixed-case>TAP</fixed-case>, <fixed-case>NIL</fixed-case>-<fixed-case>UCM</fixed-case>-<fixed-case>V</fixed-case>alues<fixed-case>CBR</fixed-case> and <fixed-case>NIL</fixed-case>-<fixed-case>UCM</fixed-case>-<fixed-case>E</fixed-case>vo<fixed-case>CBR</fixed-case></title>
       <author><first>Raquel</first><last>Hervás</last></author>
-      <author><first>Pablo</first><last>Gervás</last></author>
+      <author id="pablo-gervas"><first>Pablo</first><last>Gervás</last></author>
       <pages>187–188</pages>
       <url hash="e640b7ec">W09-0632</url>
       <bibkey>hervas-gervas-2009-evolutionary</bibkey>
@@ -1138,7 +1138,7 @@
     <paper id="33">
       <title><fixed-case>USP</fixed-case>-<fixed-case>EACH</fixed-case>: Improved Frequency-based Greedy Attribute Selection</title>
       <author><first>Diego Jesus</first><last>de Lucena</last></author>
-      <author><first>Ivandré</first><last>Paraboni</last></author>
+      <author id="ivandre-paraboni"><first>Ivandré</first><last>Paraboni</last></author>
       <pages>189–190</pages>
       <url hash="00ee9d9f">W09-0633</url>
       <bibkey>de-lucena-paraboni-2009-usp</bibkey>
@@ -1158,12 +1158,12 @@
     <meta>
       <booktitle>Proceedings of the First Workshop on Language Technologies for <fixed-case>A</fixed-case>frican Languages</booktitle>
       <url hash="e5792678">W09-07</url>
-      <editor><first>Lori</first><last>Levin</last></editor>
+      <editor id="lori-levin"><first>Lori</first><last>Levin</last></editor>
       <editor><first>John</first><last>Kiango</last></editor>
-      <editor><first>Judith</first><last>Klavans</last></editor>
+      <editor id="judith-l-klavans"><first>Judith</first><last>Klavans</last></editor>
       <editor><first>Guy</first><last>De Pauw</last></editor>
       <editor><first>Gilles-Maurice</first><last>de Schryver</last></editor>
-      <editor><first>Peter Waiganjo</first><last>Wagacha</last></editor>
+      <editor id="peter-waiganjo-wagacha"><first>Peter Waiganjo</first><last>Wagacha</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Athens, Greece</address>
       <month>March</month>
@@ -1218,7 +1218,7 @@
     </paper>
     <paper id="5">
       <title>Using Technology Transfer to Advance Automatic Lemmatisation for <fixed-case>S</fixed-case>etswana</title>
-      <author><first>Hendrik Johannes</first><last>Groenewald</last></author>
+      <author id="hendrik-johannes-groenewald"><first>Hendrik Johannes</first><last>Groenewald</last></author>
       <pages>32–37</pages>
       <url hash="2f4e6629">W09-0705</url>
       <bibkey>groenewald-2009-using</bibkey>
@@ -1228,7 +1228,7 @@
       <author><first>Gertrud</first><last>Faaß</last></author>
       <author><first>Ulrich</first><last>Heid</last></author>
       <author><first>Elsabé</first><last>Taljard</last></author>
-      <author><first>Danie</first><last>Prinsloo</last></author>
+      <author id="danie-j-prinsloo"><first>Danie</first><last>Prinsloo</last></author>
       <pages>38–45</pages>
       <url hash="6fa027f4">W09-0706</url>
       <bibkey>faass-etal-2009-part</bibkey>
@@ -1276,7 +1276,7 @@
     </paper>
     <paper id="12">
       <title>Towards an Electronic Dictionary of <fixed-case>T</fixed-case>amajaq Language in <fixed-case>N</fixed-case>iger</title>
-      <author><first>Chantal</first><last>Enguehard</last></author>
+      <author id="chantal-enguehard"><first>Chantal</first><last>Enguehard</last></author>
       <author><first>Issouf</first><last>Modi</last></author>
       <pages>81–88</pages>
       <url hash="4b2debda">W09-0712</url>
@@ -1293,14 +1293,14 @@
     <paper id="14">
       <title>Exploiting Cross-Linguistic Similarities in <fixed-case>Z</fixed-case>ulu and <fixed-case>X</fixed-case>hosa Computational Morphology</title>
       <author><first>Laurette</first><last>Pretorius</last></author>
-      <author><first>Sonja</first><last>Bosch</last></author>
+      <author id="sonja-bosch"><first>Sonja</first><last>Bosch</last></author>
       <pages>96–103</pages>
       <url hash="26340115">W09-0714</url>
       <bibkey>pretorius-bosch-2009-exploiting</bibkey>
     </paper>
     <paper id="15">
       <title>Methods for <fixed-case>A</fixed-case>mharic Part-of-Speech Tagging</title>
-      <author><first>Björn</first><last>Gambäck</last></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gambäck</last></author>
       <author><first>Fredrik</first><last>Olsson</last></author>
       <author><first>Atelach</first><last>Alemu Argaw</last></author>
       <author><first>Lars</first><last>Asker</last></author>
@@ -1310,7 +1310,7 @@
     </paper>
     <paper id="16">
       <title>An Ontology for Accessing Transcription Systems (<fixed-case>OATS</fixed-case>)</title>
-      <author><first>Steven</first><last>Moran</last></author>
+      <author id="steven-moran"><first>Steven</first><last>Moran</last></author>
       <pages>112–120</pages>
       <url hash="a214994a">W09-0716</url>
       <bibkey>moran-2009-ontology</bibkey>
@@ -1320,7 +1320,7 @@
     <meta>
       <booktitle>Proceedings of the <fixed-case>EACL</fixed-case> 2009 Workshop on Computational Approaches to <fixed-case>S</fixed-case>emitic Languages</booktitle>
       <url hash="131319af">W09-08</url>
-      <editor><first>Mike</first><last>Rosner</last></editor>
+      <editor id="michael-rosner"><first>Mike</first><last>Rosner</last></editor>
       <editor><first>Shuly</first><last>Wintner</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Athens, Greece</address>
@@ -1341,7 +1341,7 @@
     </paper>
     <paper id="2">
       <title>The <fixed-case>K</fixed-case>aramel System and <fixed-case>S</fixed-case>emitic Languages: Structured Multi-Tiered Morphology</title>
-      <author><first>François</first><last>Barthélemy</last></author>
+      <author id="francois-barthelemy"><first>François</first><last>Barthélemy</last></author>
       <pages>10–18</pages>
       <url hash="a7dc06ff">W09-0802</url>
       <bibkey>barthelemy-2009-karamel</bibkey>
@@ -1375,7 +1375,7 @@
       <title>Automatic Treebank-Based Acquisition of <fixed-case>A</fixed-case>rabic <fixed-case>LFG</fixed-case> Dependency Structures</title>
       <author><first>Lamia</first><last>Tounsi</last></author>
       <author><first>Mohammed</first><last>Attia</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>45–52</pages>
       <url hash="91f93f91">W09-0806</url>
       <bibkey>tounsi-etal-2009-automatic</bibkey>
@@ -1383,7 +1383,7 @@
     <paper id="7">
       <title>Spoken <fixed-case>A</fixed-case>rabic Dialect Identification Using Phonotactic Modeling</title>
       <author><first>Fadi</first><last>Biadsy</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
       <pages>53–61</pages>
       <url hash="9eb0c56d">W09-0807</url>
@@ -1486,7 +1486,7 @@
     <meta>
       <booktitle>Proceedings of the <fixed-case>EACL</fixed-case> 2009 Workshop on Computational Linguistic Aspects of Grammatical Inference</booktitle>
       <url hash="310f0281">W09-10</url>
-      <editor><first>Menno</first><last>van Zaanen</last></editor>
+      <editor id="menno-van-zaanen"><first>Menno</first><last>van Zaanen</last></editor>
       <editor><first>Colin</first><last>de la Higuera</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Athens, Greece</address>
@@ -1508,7 +1508,7 @@
     </paper>
     <paper id="2">
       <title>On Bootstrapping of Linguistic Features for Bootstrapping Grammars</title>
-      <author><first>Damir</first><last>Ćavar</last></author>
+      <author id="damir-cavar"><first>Damir</first><last>Ćavar</last></author>
       <pages>5–6</pages>
       <url hash="af837564">W09-1002</url>
       <bibkey>cavar-2009-bootstrapping</bibkey>
@@ -1531,7 +1531,7 @@
     <paper id="5">
       <title><fixed-case>GREAT</fixed-case>: A Finite-State Machine Translation Toolkit Implementing a Grammatical Inference Approach for Transducer Inference (<fixed-case>GIATI</fixed-case>)</title>
       <author><first>Jorge</first><last>González</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <pages>24–32</pages>
       <url hash="e8c804ba">W09-1005</url>
       <bibkey>gonzalez-casacuberta-2009-great</bibkey>
@@ -1555,9 +1555,9 @@
     </paper>
     <paper id="8">
       <title>On Statistical Parsing of <fixed-case>F</fixed-case>rench with Supervised and Semi-Supervised Strategies</title>
-      <author><first>Marie</first><last>Candito</last></author>
-      <author><first>Benoit</first><last>Crabbé</last></author>
-      <author><first>Djamé</first><last>Seddah</last></author>
+      <author id="marie-candito"><first>Marie</first><last>Candito</last></author>
+      <author id="benoit-crabbe"><first>Benoit</first><last>Crabbé</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
       <pages>49–57</pages>
       <bibkey>candito-etal-2009-statistical</bibkey>
     </paper>
@@ -1602,7 +1602,7 @@
     </paper>
     <paper id="2">
       <title>Modeling Word Learning As Communicative Inference</title>
-      <author><first>Michael C.</first><last>Frank</last></author>
+      <author id="michael-c-frank"><first>Michael C.</first><last>Frank</last></author>
       <pages>2</pages>
       <url hash="a622c11a">W09-1102</url>
       <bibkey>frank-2009-modeling</bibkey>
@@ -1626,7 +1626,7 @@
     <paper id="5">
       <title>A Metalearning Approach to Processing the Scope of Negation</title>
       <author><first>Roser</first><last>Morante</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>21–29</pages>
       <url hash="b7fa3ae5">W09-1105</url>
       <bibkey>morante-daelemans-2009-metalearning</bibkey>
@@ -1642,7 +1642,7 @@
     <paper id="7">
       <title>A Method for Stopping Active Learning Based on Stabilizing Predictions and the Need for User-Adjustable Stopping</title>
       <author><first>Michael</first><last>Bloodgood</last></author>
-      <author><first>K.</first><last>Vijay-Shanker</last></author>
+      <author id="k-vijay-shanker"><first>K.</first><last>Vijay-Shanker</last></author>
       <pages>39–47</pages>
       <url hash="932343be">W09-1107</url>
       <bibkey>bloodgood-vijay-shanker-2009-method</bibkey>
@@ -1673,8 +1673,8 @@
     </paper>
     <paper id="11">
       <title>Mining the Web for Reciprocal Relationships</title>
-      <author><first>Michael</first><last>Paul</last></author>
-      <author><first>Roxana</first><last>Girju</last></author>
+      <author id="michael-paul"><first>Michael</first><last>Paul</last></author>
+      <author id="roxana-girju"><first>Roxana</first><last>Girju</last></author>
       <author><first>Chen</first><last>Li</last></author>
       <pages>75–83</pages>
       <url hash="04befa59">W09-1111</url>
@@ -1701,9 +1701,9 @@
     <paper id="14">
       <title><fixed-case>M</fixed-case>onte <fixed-case>C</fixed-case>arlo inference and maximization for phrase-based translation</title>
       <author><first>Abhishek</first><last>Arun</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <author><first>Barry</first><last>Haddow</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
       <author><first>Adam</first><last>Lopez</last></author>
       <author><first>Philipp</first><last>Koehn</last></author>
       <pages>102–110</pages>
@@ -1713,7 +1713,7 @@
     <paper id="15">
       <title>Investigating Automatic Alignment Methods for Slide Generation from Academic Papers</title>
       <author><first>Brandon</first><last>Beamer</last></author>
-      <author><first>Roxana</first><last>Girju</last></author>
+      <author id="roxana-girju"><first>Roxana</first><last>Girju</last></author>
       <pages>111–119</pages>
       <url hash="ee4443b5">W09-1115</url>
       <bibkey>beamer-girju-2009-investigating</bibkey>
@@ -1746,7 +1746,7 @@
     </paper>
     <paper id="19">
       <title>Design Challenges and Misconceptions in Named Entity Recognition</title>
-      <author><first>Lev</first><last>Ratinov</last></author>
+      <author id="lev-ratinov"><first>Lev</first><last>Ratinov</last></author>
       <author><first>Dan</first><last>Roth</last></author>
       <pages>147–155</pages>
       <url hash="21c1a87a">W09-1119</url>
@@ -1770,7 +1770,7 @@
     </paper>
     <paper id="22">
       <title>Lexical Patterns or Dependency Patterns: Which Is Better for Hypernym Extraction?</title>
-      <author><first>Erik</first><last>Tjong Kim Sang</last></author>
+      <author id="erik-tjong-kim-sang"><first>Erik</first><last>Tjong Kim Sang</last></author>
       <author><first>Katja</first><last>Hofmann</last></author>
       <pages>174–182</pages>
       <url hash="cdcfd89d">W09-1122</url>
@@ -1780,23 +1780,23 @@
       <title>Improving Text Classification by a Sense Spectrum Approach to Term Expansion</title>
       <author><first>Peter</first><last>Wittek</last></author>
       <author><first>Sándor</first><last>Darányi</last></author>
-      <author><first>Chew Lim</first><last>Tan</last></author>
+      <author id="chew-lim-tan"><first>Chew Lim</first><last>Tan</last></author>
       <pages>183–191</pages>
       <url hash="77d895f6">W09-1123</url>
       <bibkey>wittek-etal-2009-improving</bibkey>
     </paper>
     <paper id="24">
       <title>A simple feature-copying approach for long-distance dependencies</title>
-      <author><first>Marc</first><last>Vilain</last></author>
+      <author id="marc-vilain"><first>Marc</first><last>Vilain</last></author>
       <author><first>Jonathan</first><last>Huggins</last></author>
-      <author><first>Ben</first><last>Wellner</last></author>
+      <author id="ben-wellner"><first>Ben</first><last>Wellner</last></author>
       <pages>192–200</pages>
       <url hash="70d52230">W09-1124</url>
       <bibkey>vilain-etal-2009-simple</bibkey>
     </paper>
     <paper id="25">
       <title>Fine-Grained Classification of Named Entities Exploiting Latent Semantic Kernels</title>
-      <author><first>Claudio</first><last>Giuliano</last></author>
+      <author id="claudio-giuliano"><first>Claudio</first><last>Giuliano</last></author>
       <pages>201–209</pages>
       <url hash="8155a346">W09-1125</url>
       <bibkey>giuliano-2009-fine</bibkey>
@@ -1804,7 +1804,7 @@
     <paper id="26">
       <title>Using Encyclopedic Knowledge for Automatic Topic Identification</title>
       <author><first>Kino</first><last>Coursey</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <author><first>William</first><last>Moen</last></author>
       <pages>210–218</pages>
       <url hash="d72762b7">W09-1126</url>
@@ -1823,7 +1823,7 @@
     <meta>
       <booktitle>Proceedings of the Thirteenth Conference on Computational Natural Language Learning (<fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case> 2009): Shared Task</booktitle>
       <url hash="92e6daed">W09-12</url>
-      <editor><first>Jan</first><last>Hajič</last></editor>
+      <editor id="jan-hajic"><first>Jan</first><last>Hajič</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Boulder, Colorado</address>
       <month>June</month>
@@ -1840,11 +1840,11 @@
       <author><first>Massimiliano</first><last>Ciaramita</last></author>
       <author><first>Richard</first><last>Johansson</last></author>
       <author><first>Daisuke</first><last>Kawahara</last></author>
-      <author><first>Maria Antònia</first><last>Martí</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
-      <author><first>Adam</first><last>Meyers</last></author>
+      <author id="m-antonia-marti"><first>Maria Antònia</first><last>Martí</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
+      <author id="adam-meyers"><first>Adam</first><last>Meyers</last></author>
       <author><first>Joakim</first><last>Nivre</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <author><first>Jan</first><last>Štěpánek</last></author>
       <author><first>Pavel</first><last>Straňák</last></author>
       <author><first>Mihai</first><last>Surdeanu</last></author>
@@ -1867,7 +1867,7 @@
       <title>Joint Memory-Based Learning of Syntactic and Semantic Dependencies in Multiple Languages</title>
       <author><first>Roser</first><last>Morante</last></author>
       <author><first>Vincent</first><last>Van Asch</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <pages>25–30</pages>
       <url hash="84c25ce2">W09-1203</url>
       <bibkey>morante-etal-2009-joint</bibkey>
@@ -1884,7 +1884,7 @@
     <paper id="5">
       <title>A Latent Variable Model of Synchronous Syntactic-Semantic Parsing for Multiple Languages</title>
       <author><first>Andrea</first><last>Gesmundo</last></author>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <author><first>Paola</first><last>Merlo</last></author>
       <author><first>Ivan</first><last>Titov</last></author>
       <pages>37–42</pages>
@@ -1916,8 +1916,8 @@
       <title>Multilingual Dependency Learning: A Huge Feature Engineering Method to Semantic Dependency Parsing</title>
       <author><first>Hai</first><last>Zhao</last></author>
       <author><first>Wenliang</first><last>Chen</last></author>
-      <author><first>Chunyu</first><last>Kit</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="chunyu-kit"><first>Chunyu</first><last>Kit</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>55–60</pages>
       <url hash="44bcd46a">W09-1208</url>
       <bibkey>zhao-etal-2009-multilingual</bibkey>
@@ -1926,7 +1926,7 @@
       <title>Multilingual Dependency Learning: Exploiting Rich Features for Tagging Syntactic and Semantic Dependencies</title>
       <author><first>Hai</first><last>Zhao</last></author>
       <author><first>Wenliang</first><last>Chen</last></author>
-      <author><first>Jun’ichi</first><last>Kazama</last></author>
+      <author id="junichi-kazama"><first>Jun’ichi</first><last>Kazama</last></author>
       <author><first>Kiyotaka</first><last>Uchimoto</last></author>
       <author><first>Kentaro</first><last>Torisawa</last></author>
       <pages>61–66</pages>
@@ -1954,14 +1954,14 @@
       <title>A Second-Order Joint Eisner Model for Syntactic and Semantic Dependency Parsing</title>
       <author><first>Xavier</first><last>Lluís</last></author>
       <author><first>Stefan</first><last>Bott</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <pages>79–84</pages>
       <url hash="0f4b7668">W09-1212</url>
       <bibkey>lluis-etal-2009-second</bibkey>
     </paper>
     <paper id="13">
       <title>Multilingual Semantic Role Labelling with <fixed-case>M</fixed-case>arkov <fixed-case>L</fixed-case>ogic</title>
-      <author><first>Ivan</first><last>Meza-Ruiz</last></author>
+      <author id="ivan-meza-ruiz"><first>Ivan</first><last>Meza-Ruiz</last></author>
       <author><first>Sebastian</first><last>Riedel</last></author>
       <pages>85–90</pages>
       <url hash="9278d7e5">W09-1213</url>
@@ -1978,7 +1978,7 @@
     <paper id="15">
       <title>Parsing Syntactic and Semantic Dependencies for Multiple Languages with A Pipeline Approach</title>
       <author><first>Han</first><last>Ren</last></author>
-      <author><first>Donghong</first><last>Ji</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
       <author><first>Jing</first><last>Wan</last></author>
       <author><first>Mingyao</first><last>Zhang</last></author>
       <pages>97–102</pages>
@@ -2007,14 +2007,14 @@
       <title>Multilingual Syntactic-Semantic Dependency Parsing with Three-Stage Approximate Max-Margin Linear Models</title>
       <author><first>Yotaro</first><last>Watanabe</last></author>
       <author><first>Masayuki</first><last>Asahara</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>114–119</pages>
       <url hash="2757cef4">W09-1218</url>
       <bibkey>watanabe-etal-2009-multilingual</bibkey>
     </paper>
     <paper id="19">
       <title>A Simple Generative Pipeline Approach to Dependency Parsing and Semantic Role Labeling</title>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <pages>120–125</pages>
       <url hash="ae1ea14e">W09-1219</url>
       <bibkey>zeman-2009-simple</bibkey>
@@ -2024,12 +2024,12 @@
     <meta>
       <booktitle>Proceedings of the <fixed-case>B</fixed-case>io<fixed-case>NLP</fixed-case> 2009 Workshop</booktitle>
       <url hash="ec4bcd1f">W09-13</url>
-      <editor><first>K. Bretonnel</first><last>Cohen</last></editor>
+      <editor id="k-bretonnel-cohen"><first>K. Bretonnel</first><last>Cohen</last></editor>
       <editor><first>Dina</first><last>Demner-Fushman</last></editor>
       <editor><first>Sophia</first><last>Ananiadou</last></editor>
-      <editor><first>John</first><last>Pestian</last></editor>
-      <editor><first>Jun’ichi</first><last>Tsujii</last></editor>
-      <editor><first>Bonnie</first><last>Webber</last></editor>
+      <editor id="john-pestian"><first>John</first><last>Pestian</last></editor>
+      <editor id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></editor>
+      <editor id="bonnie-webber"><first>Bonnie</first><last>Webber</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Boulder, Colorado</address>
       <month>June</month>
@@ -2052,11 +2052,11 @@
     </paper>
     <paper id="2">
       <title>Distinguishing Historical from Current Problems in Clinical Reports – Which Textual Features Help?</title>
-      <author><first>Danielle</first><last>Mowery</last></author>
+      <author id="danielle-l-mowery"><first>Danielle</first><last>Mowery</last></author>
       <author><first>Henk</first><last>Harkema</last></author>
       <author><first>John</first><last>Dowling</last></author>
       <author><first>Jonathan</first><last>Lustgarten</last></author>
-      <author><first>Wendy</first><last>Chapman</last></author>
+      <author id="wendy-chapman"><first>Wendy</first><last>Chapman</last></author>
       <pages>10–18</pages>
       <url hash="8f988897">W09-1302</url>
       <bibkey>mowery-etal-2009-distinguishing</bibkey>
@@ -2067,7 +2067,7 @@
       <author><first>Henk</first><last>Harkema</last></author>
       <author><first>Peter</first><last>Haug</last></author>
       <author><first>Jeannie</first><last>Irwin</last></author>
-      <author><first>Wendy</first><last>Chapman</last></author>
+      <author id="wendy-chapman"><first>Wendy</first><last>Chapman</last></author>
       <pages>19–27</pages>
       <url hash="424c3ee9">W09-1303</url>
       <bibkey>christensen-etal-2009-onyx</bibkey>
@@ -2075,7 +2075,7 @@
     <paper id="4">
       <title>Learning the Scope of Hedge Cues in Biomedical Texts</title>
       <author><first>Roser</first><last>Morante</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>28–36</pages>
       <url hash="f9c30e47">W09-1304</url>
       <bibkey>morante-daelemans-2009-learning</bibkey>
@@ -2085,8 +2085,8 @@
       <author><first>Udo</first><last>Hahn</last></author>
       <author><first>Katrin</first><last>Tomanek</last></author>
       <author><first>Ekaterina</first><last>Buyko</last></author>
-      <author><first>Jung-jae</first><last>Kim</last></author>
-      <author><first>Dietrich</first><last>Rebholz-Schuhmann</last></author>
+      <author id="jung-jae-kim"><first>Jung-jae</first><last>Kim</last></author>
+      <author id="dietrich-rebholz-schuhmann"><first>Dietrich</first><last>Rebholz-Schuhmann</last></author>
       <pages>37–45</pages>
       <url hash="0330d337">W09-1305</url>
       <bibkey>hahn-etal-2009-feasible</bibkey>
@@ -2094,7 +2094,7 @@
     <paper id="6">
       <title>Extraction of Named Entities from Tables in Gene Mutation Literature</title>
       <author><first>Wern</first><last>Wong</last></author>
-      <author><first>David</first><last>Martinez</last></author>
+      <author id="david-martinez"><first>David</first><last>Martinez</last></author>
       <author><first>Lawrence</first><last>Cavedon</last></author>
       <pages>46–54</pages>
       <url hash="698daf96">W09-1306</url>
@@ -2123,7 +2123,7 @@
       <author><first>Mark</first><last>Stevenson</last></author>
       <author><first>Yikun</first><last>Guo</last></author>
       <author><first>Abdulaziz</first><last>Alamri</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <pages>71–79</pages>
       <url hash="02b871b3">W09-1309</url>
       <bibkey>stevenson-etal-2009-disambiguation</bibkey>
@@ -2139,7 +2139,7 @@
     </paper>
     <paper id="11">
       <title>Exploring Graph Structure for Detection of Reliability Zones within Synonym Resources: Experiment with the Gene Ontology</title>
-      <author><first>Thierry</first><last>Hamon</last></author>
+      <author id="thierry-hamon"><first>Thierry</first><last>Hamon</last></author>
       <author><first>Natalia</first><last>Grabar</last></author>
       <pages>89–96</pages>
       <url hash="e54ce421">W09-1311</url>
@@ -2196,7 +2196,7 @@
       <title>Identifying Interaction Sentences from Biological Literature Using Automatically Extracted Patterns</title>
       <author><first>Haibin</first><last>Liu</last></author>
       <author><first>Christian</first><last>Blouin</last></author>
-      <author><first>Vlado</first><last>Kešelj</last></author>
+      <author id="vlado-keselj"><first>Vlado</first><last>Kešelj</last></author>
       <pages>133–141</pages>
       <url hash="8dc729b0">W09-1317</url>
       <bibkey>liu-etal-2009-identifying</bibkey>
@@ -2212,7 +2212,7 @@
     </paper>
     <paper id="19">
       <title>Exploring Two Biomedical Text Genres for Disease Recognition</title>
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <author><first>Won</first><last>Kim</last></author>
       <author><first>W. John</first><last>Wilbur</last></author>
       <author><first>Zhiyong</first><last>Lu</last></author>
@@ -2261,7 +2261,7 @@
       <author><first>Yasuhide</first><last>Miura</last></author>
       <author><first>Masatsugu</first><last>Tonoike</last></author>
       <author><first>Tomoko</first><last>Ohkuma</last></author>
-      <author><first>Hiroshi</first><last>Mashuichi</last></author>
+      <author id="hiroshi-masuichi"><first>Hiroshi</first><last>Mashuichi</last></author>
       <author><first>Kazuhiko</first><last>Ohe</last></author>
       <pages>185–192</pages>
       <url hash="ad6bc330">W09-1324</url>
@@ -2281,7 +2281,7 @@
     <meta>
       <booktitle>Proceedings of the <fixed-case>B</fixed-case>io<fixed-case>NLP</fixed-case> 2009 Workshop Companion Volume for Shared Task</booktitle>
       <url hash="18460517">W09-14</url>
-      <editor><first>Jun’ichi</first><last>Tsujii</last></editor>
+      <editor id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Boulder, Colorado</address>
       <month>June</month>
@@ -2339,7 +2339,7 @@
       <author><first>Andreas</first><last>Vlachos</last></author>
       <author><first>Paula</first><last>Buttery</last></author>
       <author><first>Diarmuid</first><last>Ó Séaghdha</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <pages>37–40</pages>
       <url hash="91d1d329">W09-1405</url>
       <bibkey>vlachos-etal-2009-biomedical</bibkey>
@@ -2356,14 +2356,14 @@
     </paper>
     <paper id="7">
       <title>High-precision biological event extraction with a concept recognizer</title>
-      <author><first>K. Bretonnel</first><last>Cohen</last></author>
-      <author><first>Karin</first><last>Verspoor</last></author>
-      <author><first>Helen</first><last>Johnson</last></author>
-      <author><first>Chris</first><last>Roeder</last></author>
-      <author><first>Philip</first><last>Ogren</last></author>
-      <author><first>William</first><last>Baumgartner</last></author>
+      <author id="k-bretonnel-cohen"><first>K. Bretonnel</first><last>Cohen</last></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last></author>
+      <author id="helen-l-johnson"><first>Helen</first><last>Johnson</last></author>
+      <author id="christophe-roeder"><first>Chris</first><last>Roeder</last></author>
+      <author id="philip-ogren"><first>Philip</first><last>Ogren</last></author>
+      <author id="william-a-baumgartner-jr"><first>William</first><last>Baumgartner</last></author>
       <author><first>Elizabeth</first><last>White</last></author>
-      <author><first>Lawrence</first><last>Hunter</last></author>
+      <author id="lawrence-hunter"><first>Lawrence</first><last>Hunter</last></author>
       <pages>50–58</pages>
       <url hash="9aafb279">W09-1407</url>
       <bibkey>cohen-etal-2009-high</bibkey>
@@ -2372,7 +2372,7 @@
       <title>A memory-based learning approach to event extraction in biomedical texts</title>
       <author><first>Roser</first><last>Morante</last></author>
       <author><first>Vincent</first><last>Van Asch</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>59–67</pages>
       <url hash="28cab458">W09-1408</url>
       <bibkey>morante-etal-2009-memory</bibkey>
@@ -2380,7 +2380,7 @@
     <paper id="9">
       <title>Extraction of biomedical events using case-based reasoning</title>
       <author><first>Mariana</first><last>Neves</last></author>
-      <author><first>José-María</first><last>Carazo</last></author>
+      <author id="jose-maria-carazo"><first>José-María</first><last>Carazo</last></author>
       <author><first>Alberto</first><last>Pascual-Montano</last></author>
       <pages>68–76</pages>
       <url hash="7ef2ccc3">W09-1409</url>
@@ -2388,9 +2388,9 @@
     </paper>
     <paper id="10">
       <title>Biomedical Event Annotation with <fixed-case>CRF</fixed-case>s and Precision Grammars</title>
-      <author><first>Andrew</first><last>MacKinlay</last></author>
-      <author><first>David</first><last>Martinez</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="andrew-mackinlay"><first>Andrew</first><last>MacKinlay</last></author>
+      <author id="david-martinez"><first>David</first><last>Martinez</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>77–85</pages>
       <url hash="8ff827a9">W09-1410</url>
       <bibkey>mackinlay-etal-2009-biomedical</bibkey>
@@ -2403,7 +2403,7 @@
       <author><first>Luis</first><last>Tari</last></author>
       <author><first>Astrid</first><last>Rheinländer</last></author>
       <author><first>Nguyen</first><last>Quang Long</last></author>
-      <author><first>Graciela</first><last>Gonzalez</last></author>
+      <author id="graciela-gonzalez"><first>Graciela</first><last>Gonzalez</last></author>
       <author><first>Ulf</first><last>Leser</last></author>
       <pages>86–94</pages>
       <url hash="79f43568">W09-1411</url>
@@ -2415,7 +2415,7 @@
       <author><first>Kuzman</first><last>Ganchev</last></author>
       <author><first>Vassil</first><last>Momchev</last></author>
       <author><first>Deyan</first><last>Peychev</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Angus</first><last>Roberts</last></author>
       <pages>95–98</pages>
       <url hash="a59283b4">W09-1412</url>
@@ -2433,7 +2433,7 @@
     </paper>
     <paper id="14">
       <title>From Protein-Protein Interaction to Molecular Event Extraction</title>
-      <author><first>Rune</first><last>Sætre</last></author>
+      <author id="rune-saetre"><first>Rune</first><last>Sætre</last></author>
       <author><first>Makoto</first><last>Miwa</last></author>
       <author><first>Kazuhiro</first><last>Yoshida</last></author>
       <author><first>Jun’ichi</first><last>Tsujii</last></author>
@@ -2446,17 +2446,17 @@
       <author><first>Hyoung-Gyu</first><last>Lee</last></author>
       <author><first>Han-Cheol</first><last>Cho</last></author>
       <author><first>Min-Jeong</first><last>Kim</last></author>
-      <author><first>Joo-Young</first><last>Lee</last></author>
+      <author id="joo-young-lee"><first>Joo-Young</first><last>Lee</last></author>
       <author><first>Gumwon</first><last>Hong</last></author>
-      <author><first>Hae-Chang</first><last>Rim</last></author>
+      <author id="hae-chang-rim"><first>Hae-Chang</first><last>Rim</last></author>
       <pages>107–110</pages>
       <url hash="f84d6598">W09-1415</url>
       <bibkey>lee-etal-2009-multi</bibkey>
     </paper>
     <paper id="16">
       <title>Supervised Classification for Extracting Biomedical Events</title>
-      <author><first>Arzucan</first><last>Özgür</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="arzucan-ozgur"><first>Arzucan</first><last>Özgür</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <pages>111–114</pages>
       <url hash="a853dbac">W09-1416</url>
       <bibkey>ozgur-radev-2009-supervised</bibkey>
@@ -2468,7 +2468,7 @@
       <author><first>Reza</first><last>Mohammadi</last></author>
       <author><first>Jonathan</first><last>Dickerson</last></author>
       <author><first>David</first><last>Robertson</last></author>
-      <author><first>Goran</first><last>Nenadic</last></author>
+      <author id="goran-nenadic"><first>Goran</first><last>Nenadic</last></author>
       <pages>115–118</pages>
       <url hash="d14127bc">W09-1417</url>
       <bibkey>sarafraz-etal-2009-biomedical</bibkey>
@@ -2494,7 +2494,7 @@
     <paper id="20">
       <title>Exploring ways beyond the simple supervised learning approach for biological event extraction</title>
       <author><first>György</first><last>Móra</last></author>
-      <author><first>Richárd</first><last>Farkas</last></author>
+      <author id="richard-farkas"><first>Richárd</first><last>Farkas</last></author>
       <author><first>György</first><last>Szarvas</last></author>
       <author><first>Zsolt</first><last>Molnár</last></author>
       <pages>137–140</pages>
@@ -2506,7 +2506,7 @@
     <meta>
       <booktitle>Proceedings of the Workshop on Software Engineering, Testing, and Quality Assurance for Natural Language Processing (<fixed-case>SETQA</fixed-case>-<fixed-case>NLP</fixed-case> 2009)</booktitle>
       <url hash="851a3821">W09-15</url>
-      <editor><first>Kevin Bretonnel</first><last>Cohen</last></editor>
+      <editor id="k-bretonnel-cohen"><first>Kevin Bretonnel</first><last>Cohen</last></editor>
       <editor><first>Marc</first><last>Light</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Boulder, Colorado</address>
@@ -2520,7 +2520,7 @@
     </frontmatter>
     <paper id="1">
       <title>Building Test Suites for <fixed-case>UIMA</fixed-case> Components</title>
-      <author><first>Philip</first><last>Ogren</last></author>
+      <author id="philip-ogren"><first>Philip</first><last>Ogren</last></author>
       <author><first>Steven</first><last>Bethard</last></author>
       <pages>1–4</pages>
       <url hash="108c9289">W09-1501</url>
@@ -2529,15 +2529,15 @@
     <paper id="2">
       <title>Context-Dependent Regression Testing for Natural Language Processing</title>
       <author><first>Elaine</first><last>Farrow</last></author>
-      <author><first>Myroslava O.</first><last>Dzikovska</last></author>
+      <author id="myroslava-o-dzikovska"><first>Myroslava O.</first><last>Dzikovska</last></author>
       <pages>5–13</pages>
       <url hash="69853fa8">W09-1502</url>
       <bibkey>farrow-dzikovska-2009-context</bibkey>
     </paper>
     <paper id="3">
       <title>Using Paraphrases of Deep Semantic Representions to Support Regression Testing in Spoken Dialogue Systems</title>
-      <author><first>Beth Ann</first><last>Hockey</last></author>
-      <author><first>Manny</first><last>Rayner</last></author>
+      <author id="beth-ann-hockey"><first>Beth Ann</first><last>Hockey</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
       <pages>14–21</pages>
       <url hash="c174e12e">W09-1503</url>
       <bibkey>hockey-rayner-2009-using</bibkey>
@@ -2547,7 +2547,7 @@
       <author><first>Yoshinobu</first><last>Kano</last></author>
       <author><first>Luke</first><last>McCrohon</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>22–30</pages>
       <url hash="9b6bda46">W09-1504</url>
       <bibkey>kano-etal-2009-integrated</bibkey>
@@ -2563,7 +2563,7 @@
     </paper>
     <paper id="6">
       <title>Scaling up a <fixed-case>NLU</fixed-case> system from text to dialogue understanding</title>
-      <author><first>Rodolfo</first><last>Delmonte</last></author>
+      <author id="rodolfo-delmonte"><first>Rodolfo</first><last>Delmonte</last></author>
       <author><first>Antonella</first><last>Bristot</last></author>
       <author><first>Gloria</first><last>Voltolina</last></author>
       <author><first>Vincenzo</first><last>Pallotta</last></author>
@@ -2573,7 +2573,7 @@
     </paper>
     <paper id="7">
       <title>Towards Agile and Test-Driven Development in <fixed-case>NLP</fixed-case> Applications</title>
-      <author><first>Jana</first><last>Sukkarieh</last></author>
+      <author id="jana-sukkarieh"><first>Jana</first><last>Sukkarieh</last></author>
       <author><first>Jyoti</first><last>Kamal</last></author>
       <pages>42–44</pages>
       <url hash="f236dfb2">W09-1507</url>
@@ -2582,21 +2582,21 @@
     <paper id="8">
       <title>Grammar Engineering for <fixed-case>CCG</fixed-case> using Ant and <fixed-case>XSLT</fixed-case></title>
       <author><first>Scott</first><last>Martin</last></author>
-      <author><first>Rajakrishnan</first><last>Rajkumar</last></author>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="rajakrishnan-rajkumar"><first>Rajakrishnan</first><last>Rajkumar</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <pages>45–46</pages>
       <url hash="8ae7c94f">W09-1508</url>
       <bibkey>martin-etal-2009-grammar</bibkey>
     </paper>
     <paper id="9">
       <title>Web Service Integration for Next Generation Localisation</title>
-      <author><first>David</first><last>Lewis</last></author>
+      <author id="david-d-lewis"><first>David</first><last>Lewis</last></author>
       <author><first>Stephen</first><last>Curran</last></author>
       <author><first>Kevin</first><last>Feeney</last></author>
       <author><first>Zohar</first><last>Etzioni</last></author>
       <author><first>John</first><last>Keeney</last></author>
       <author><first>Andy</first><last>Way</last></author>
-      <author><first>Reinhard</first><last>Schäler</last></author>
+      <author id="reinhard-schaler"><first>Reinhard</first><last>Schäler</last></author>
       <pages>47–55</pages>
       <url hash="3cc7dc37">W09-1509</url>
       <bibkey>lewis-etal-2009-web</bibkey>
@@ -2630,12 +2630,12 @@
     <meta>
       <booktitle>Proceedings of the Third International Workshop on Cross Lingual Information Access: Addressing the Information Need of Multilingual Societies (<fixed-case>CLIAWS</fixed-case>3)</booktitle>
       <url hash="ae447f21">W09-16</url>
-      <editor><first>Sivaji</first><last>Bandyopadhyay</last></editor>
-      <editor><first>Pushpak</first><last>Bhattacharyya</last></editor>
+      <editor id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></editor>
+      <editor id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></editor>
       <editor><first>Vasudeva</first><last>Varma</last></editor>
       <editor><first>Sudeshna</first><last>Sarkar</last></editor>
-      <editor><first>A</first><last>Kumaran</last></editor>
-      <editor><first>Raghavendra</first><last>Udupa</last></editor>
+      <editor id="a-kumaran"><first>A</first><last>Kumaran</last></editor>
+      <editor id="raghavendra-udupa"><first>Raghavendra</first><last>Udupa</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Boulder, Colorado</address>
       <month>June</month>
@@ -2648,7 +2648,7 @@
     </frontmatter>
     <paper id="1">
       <title>Cross-Language Information Access: Looking Backward, Looking Forward</title>
-      <author><first>Douglas W.</first><last>Oard</last></author>
+      <author id="douglas-w-oard"><first>Douglas W.</first><last>Oard</last></author>
       <pages>1–2</pages>
       <url hash="28e69359">W09-1601</url>
       <bibkey>oard-2009-cross</bibkey>
@@ -2656,7 +2656,7 @@
     <paper id="2">
       <title>Speech Retrieval in Unknown Languages: a Pilot Study</title>
       <author><first>Xiaodan</first><last>Zhuang</last></author>
-      <author><first>Jui Ting</first><last>Huang</last></author>
+      <author id="jui-ting-huang"><first>Jui Ting</first><last>Huang</last></author>
       <author><first>Mark</first><last>Hasegawa-Johnson</last></author>
       <pages>3–11</pages>
       <url hash="b6998473">W09-1602</url>
@@ -2674,7 +2674,7 @@
       <title>Cross-lingual Alignment and Completion of <fixed-case>W</fixed-case>ikipedia Templates</title>
       <author><first>Gosse</first><last>Bouma</last></author>
       <author><first>Sergio</first><last>Duarte</last></author>
-      <author><first>Zahurul</first><last>Islam</last></author>
+      <author id="zahurul-islam"><first>Zahurul</first><last>Islam</last></author>
       <pages>21–29</pages>
       <url hash="4a1c9db1">W09-1604</url>
       <bibkey>bouma-etal-2009-cross</bibkey>
@@ -2705,8 +2705,8 @@
     </paper>
     <paper id="8">
       <title>An Approach to Text Summarization.</title>
-      <author><first>Sankar</first><last>K</last></author>
-      <author><first>Sobha</first><last>L</last></author>
+      <author id="sankar-kuppan"><first>Sankar</first><last>K</last></author>
+      <author id="sobha-l"><first>Sobha</first><last>L</last></author>
       <pages>53–60</pages>
       <url hash="bbf560b8">W09-1608</url>
       <bibkey>k-l-2009-approach</bibkey>
@@ -2714,7 +2714,7 @@
     <paper id="9">
       <title><fixed-case>NE</fixed-case> Tagging for <fixed-case>U</fixed-case>rdu based on Bootstrap <fixed-case>POS</fixed-case> Learning</title>
       <author><first>Smruthi</first><last>Mukund</last></author>
-      <author><first>Rohini K.</first><last>Srihari</last></author>
+      <author id="rohini-k-srihari"><first>Rohini K.</first><last>Srihari</last></author>
       <pages>61–69</pages>
       <url hash="0364efc8">W09-1609</url>
       <bibkey>mukund-srihari-2009-ne</bibkey>
@@ -2725,7 +2725,7 @@
       <booktitle>Proceedings of the Workshop on Unsupervised and Minimally Supervised Learning of Lexical Semantics</booktitle>
       <url hash="92d886ce">W09-17</url>
       <editor><first>Suresh</first><last>Manandhar</last></editor>
-      <editor><first>Ioannis P.</first><last>Klapaftis</last></editor>
+      <editor id="ioannis-klapaftis"><first>Ioannis P.</first><last>Klapaftis</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Boulder, Colorado, USA</address>
       <month>June</month>
@@ -2738,7 +2738,7 @@
     </frontmatter>
     <paper id="1">
       <title>Acquiring Applicable Common Sense Knowledge from the Web</title>
-      <author><first>Hansen A.</first><last>Schwartz</last></author>
+      <author id="h-andrew-schwartz"><first>Hansen A.</first><last>Schwartz</last></author>
       <author><first>Fernando</first><last>Gomez</last></author>
       <pages>1–9</pages>
       <url hash="a224c0ed">W09-1701</url>
@@ -2755,7 +2755,7 @@
     <paper id="3">
       <title>Corpus-based Semantic Lexicon Induction with Web-based Corroboration</title>
       <author><first>Sean</first><last>Igo</last></author>
-      <author><first>Ellen</first><last>Riloff</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
       <pages>18–26</pages>
       <url hash="fdb98d57">W09-1703</url>
       <bibkey>igo-riloff-2009-corpus</bibkey>
@@ -2778,15 +2778,15 @@
     </paper>
     <paper id="6">
       <title>Combining Syntactic Co-occurrences and Nearest Neighbours in Distributional Methods to Remedy Data Sparseness.</title>
-      <author><first>Lonneke</first><last>van der Plas</last></author>
+      <author id="lonneke-van-der-plas"><first>Lonneke</first><last>van der Plas</last></author>
       <pages>45–53</pages>
       <url hash="b6b16cff">W09-1706</url>
       <bibkey>van-der-plas-2009-combining</bibkey>
     </paper>
     <paper id="7">
       <title>Using <fixed-case>DEDICOM</fixed-case> for Completely Unsupervised Part-of-Speech Tagging</title>
-      <author><first>Peter</first><last>Chew</last></author>
-      <author><first>Brett</first><last>Bader</last></author>
+      <author id="peter-a-chew"><first>Peter</first><last>Chew</last></author>
+      <author id="brett-w-bader"><first>Brett</first><last>Bader</last></author>
       <author><first>Alla</first><last>Rozovskaya</last></author>
       <pages>54–62</pages>
       <url hash="64693b0d">W09-1707</url>
@@ -2811,16 +2811,16 @@
     </frontmatter>
     <paper id="1">
       <title>Summarization with a Joint Model for Sentence Extraction and Compression</title>
-      <author><first>André</first><last>Martins</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="andre-f-t-martins"><first>André</first><last>Martins</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>1–9</pages>
       <url hash="5ac13775">W09-1801</url>
       <bibkey>martins-smith-2009-summarization</bibkey>
     </paper>
     <paper id="2">
       <title>A Scalable Global Model for Summarization</title>
-      <author><first>Dan</first><last>Gillick</last></author>
-      <author><first>Benoit</first><last>Favre</last></author>
+      <author id="dan-gillick"><first>Dan</first><last>Gillick</last></author>
+      <author id="benoit-favre"><first>Benoit</first><last>Favre</last></author>
       <pages>10–18</pages>
       <url hash="c0fbfe6d">W09-1802</url>
       <bibkey>gillick-favre-2009-scalable</bibkey>
@@ -2844,7 +2844,7 @@
     </paper>
     <paper id="5">
       <title>A Constraint Programming Approach to Probabilistic Syntactic Processing</title>
-      <author><first>Irene</first><last>Langkilde-Geary</last></author>
+      <author id="irene-langkilde"><first>Irene</first><last>Langkilde-Geary</last></author>
       <pages>36–37</pages>
       <url hash="a5eab2a9">W09-1805</url>
       <bibkey>langkilde-geary-2009-constraint</bibkey>
@@ -2854,7 +2854,7 @@
     <meta>
       <booktitle>Proceedings of the <fixed-case>NAACL</fixed-case> <fixed-case>HLT</fixed-case> 2009 Workshop on Active Learning for Natural Language Processing</booktitle>
       <url hash="66f7a667">W09-19</url>
-      <editor><first>Eric</first><last>Ringger</last></editor>
+      <editor id="eric-ringger"><first>Eric</first><last>Ringger</last></editor>
       <editor><first>Robbie</first><last>Haertel</last></editor>
       <editor><first>Katrin</first><last>Tomanek</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -2879,7 +2879,7 @@
       <author><first>Katrin</first><last>Tomanek</last></author>
       <author><first>Florian</first><last>Laws</last></author>
       <author><first>Udo</first><last>Hahn</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>9–17</pages>
       <url hash="d01dcef1">W09-1902</url>
       <bibkey>tomanek-etal-2009-proper</bibkey>
@@ -2887,8 +2887,8 @@
     <paper id="3">
       <title>Estimating Annotation Cost for Active Learning in a Multi-Annotator Environment</title>
       <author><first>Shilpa</first><last>Arora</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
-      <author><first>Carolyn P.</first><last>Rosé</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
+      <author id="carolyn-rose"><first>Carolyn P.</first><last>Rosé</last></author>
       <pages>18–26</pages>
       <url hash="8ce9701d">W09-1903</url>
       <bibkey>arora-etal-2009-estimating</bibkey>
@@ -2930,7 +2930,7 @@
     <paper id="8">
       <title>Proactive Learning for Building Machine Translation Systems for Minority Languages</title>
       <author><first>Vamshi</first><last>Ambati</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
       <pages>58–61</pages>
       <url hash="561e2ffd">W09-1908</url>
       <bibkey>ambati-carbonell-2009-proactive</bibkey>
@@ -2941,7 +2941,7 @@
       <booktitle>Proceedings of the Workshop on Computational Approaches to Linguistic Creativity</booktitle>
       <url hash="ac520c24">W09-20</url>
       <editor><first>Anna</first><last>Feldman</last></editor>
-      <editor><first>Birte</first><last>Loenneker-Rodman</last></editor>
+      <editor id="birte-lonneker"><first>Birte</first><last>Loenneker-Rodman</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Boulder, Colorado</address>
       <month>June</month>
@@ -2965,7 +2965,7 @@
       <title>Topic Model Analysis of Metaphor Frequency for Psycholinguistic Stimuli</title>
       <author><first>Steven</first><last>Bethard</last></author>
       <author><first>Vicky Tzuyin</first><last>Lai</last></author>
-      <author><first>James H.</first><last>Martin</last></author>
+      <author id="james-h-martin"><first>James H.</first><last>Martin</last></author>
       <pages>9–16</pages>
       <url hash="7f191936">W09-2002</url>
       <bibkey>bethard-etal-2009-topic</bibkey>
@@ -2987,7 +2987,7 @@
     </paper>
     <paper id="5">
       <title><fixed-case>G</fixed-case>aiku : Generating Haiku with Word Associations Norms</title>
-      <author><first>Yael</first><last>Netzer</last></author>
+      <author id="yael-netzer"><first>Yael</first><last>Netzer</last></author>
       <author><first>David</first><last>Gabay</last></author>
       <author><first>Yoav</first><last>Goldberg</last></author>
       <author><first>Michael</first><last>Elhadad</last></author>
@@ -2997,9 +2997,9 @@
     </paper>
     <paper id="6">
       <title>Automatic Generation of <fixed-case>T</fixed-case>amil Lyrics for Melodies</title>
-      <author><first>Ananth</first><last>Ramakrishnan A</last></author>
-      <author><first>Sankar</first><last>Kuppan</last></author>
-      <author><first>Sobha</first><last>Lalitha Devi</last></author>
+      <author id="ananth-ramakrishnan-a"><first>Ananth</first><last>Ramakrishnan A</last></author>
+      <author id="sankar-kuppan"><first>Sankar</first><last>Kuppan</last></author>
+      <author id="sobha-lalitha-devi"><first>Sobha</first><last>Lalitha Devi</last></author>
       <pages>40–46</pages>
       <url hash="3d0fa261">W09-2006</url>
       <bibkey>ramakrishnan-a-etal-2009-automatic</bibkey>
@@ -3044,7 +3044,7 @@
     </paper>
     <paper id="12">
       <title>How Creative is Your Writing?</title>
-      <author><first>Xiaojin</first><last>Zhu</last></author>
+      <author id="xiaojin-zhu"><first>Xiaojin</first><last>Zhu</last></author>
       <author><first>Zhiting</first><last>Xu</last></author>
       <author><first>Tushar</first><last>Khot</last></author>
       <pages>87–93</pages>
@@ -3064,7 +3064,7 @@
     <meta>
       <booktitle>Proceedings of the Fourth Workshop on Innovative Use of <fixed-case>NLP</fixed-case> for Building Educational Applications</booktitle>
       <url hash="771085be">W09-21</url>
-      <editor><first>Joel</first><last>Tetreault</last></editor>
+      <editor id="joel-tetreault"><first>Joel</first><last>Tetreault</last></editor>
       <editor><first>Jill</first><last>Burstein</last></editor>
       <editor><first>Claudia</first><last>Leacock</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -3081,7 +3081,7 @@
       <title>Automated Assessment of Spoken <fixed-case>M</fixed-case>odern <fixed-case>S</fixed-case>tandard <fixed-case>A</fixed-case>rabic</title>
       <author><first>Jian</first><last>Cheng</last></author>
       <author><first>Jared</first><last>Bernstein</last></author>
-      <author><first>Ulrike</first><last>Pado</last></author>
+      <author id="ulrike-pado"><first>Ulrike</first><last>Pado</last></author>
       <author><first>Masanori</first><last>Suzuki</last></author>
       <pages>1–9</pages>
       <url hash="5a07a3ea">W09-2101</url>
@@ -3098,19 +3098,19 @@
     </paper>
     <paper id="3">
       <title>Inferring Tutorial Dialogue Structure with Hidden <fixed-case>M</fixed-case>arkov Modeling</title>
-      <author><first>Kristy Elizabeth</first><last>Boyer</last></author>
-      <author><first>Eun Young</first><last>Ha</last></author>
-      <author><first>Robert</first><last>Phillips</last></author>
+      <author id="kristy-boyer"><first>Kristy Elizabeth</first><last>Boyer</last></author>
+      <author id="eun-young-ha"><first>Eun Young</first><last>Ha</last></author>
+      <author id="robert-phillips"><first>Robert</first><last>Phillips</last></author>
       <author><first>Michael</first><last>Wallis</last></author>
       <author><first>Mladen</first><last>Vouk</last></author>
-      <author><first>James</first><last>Lester</last></author>
+      <author id="james-lester"><first>James</first><last>Lester</last></author>
       <pages>19–26</pages>
       <url hash="7f213dd3">W09-2103</url>
       <bibkey>boyer-etal-2009-inferring</bibkey>
     </paper>
     <paper id="4">
       <title>A New Yardstick and Tool for Personalized Vocabulary Building</title>
-      <author><first>Thomas</first><last>Landauer</last></author>
+      <author id="thomas-landauer"><first>Thomas</first><last>Landauer</last></author>
       <author><first>Kirill</first><last>Kireyev</last></author>
       <author><first>Charles</first><last>Panaccione</last></author>
       <pages>27–33</pages>
@@ -3119,12 +3119,12 @@
     </paper>
     <paper id="5">
       <title>Supporting the Adaptation of Texts for Poor Literacy Readers: a Text Simplification Editor for <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese</title>
-      <author><first>Arnaldo</first><last>Candido</last></author>
-      <author><first>Erick</first><last>Maziero</last></author>
+      <author id="arnaldo-candido-jr"><first>Arnaldo</first><last>Candido</last></author>
+      <author id="erick-galani-maziero"><first>Erick</first><last>Maziero</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <author><first>Caroline</first><last>Gasperin</last></author>
       <author><first>Thiago</first><last>Pardo</last></author>
-      <author><first>Sandra</first><last>Aluisio</last></author>
+      <author id="sandra-aluisio"><first>Sandra</first><last>Aluisio</last></author>
       <pages>34–42</pages>
       <url hash="ae02faa3">W09-2105</url>
       <bibkey>candido-etal-2009-supporting</bibkey>
@@ -3158,7 +3158,7 @@
       <title><fixed-case>KSC</fixed-case>-<fixed-case>P</fixed-case>a<fixed-case>L</fixed-case>: A Peer Learning Agent that Encourages Students to take the Initiative</title>
       <author><first>Cynthia</first><last>Kersey</last></author>
       <author><first>Barbara</first><last>Di Eugenio</last></author>
-      <author><first>Pamela</first><last>Jordan</last></author>
+      <author id="pamela-jordan"><first>Pamela</first><last>Jordan</last></author>
       <author><first>Sandra</first><last>Katz</last></author>
       <pages>55–63</pages>
       <url hash="e1fd6aaa">W09-2109</url>
@@ -3212,7 +3212,7 @@
       <author><first>Andrew</first><last>Carlson</last></author>
       <author><first>Justin</first><last>Betteridge</last></author>
       <author><first>Estevam Rafael</first><last>Hruschka Junior</last></author>
-      <author><first>Tom M.</first><last>Mitchell</last></author>
+      <author id="tom-mitchell"><first>Tom M.</first><last>Mitchell</last></author>
       <pages>1–9</pages>
       <url hash="6d0c64cf">W09-2201</url>
       <bibkey>carlson-etal-2009-coupling</bibkey>
@@ -3220,15 +3220,15 @@
     <paper id="2">
       <title>Surrogate Learning - From Feature Independence to Semi-Supervised Classification</title>
       <author><first>Sriharsha</first><last>Veeramachaneni</last></author>
-      <author><first>Ravi Kumar</first><last>Kondadadi</last></author>
+      <author id="ravikumar-kondadadi"><first>Ravi Kumar</first><last>Kondadadi</last></author>
       <pages>10–18</pages>
       <url hash="d470efc5">W09-2202</url>
       <bibkey>veeramachaneni-kondadadi-2009-surrogate</bibkey>
     </paper>
     <paper id="3">
       <title>Keepin’ It Real: Semi-Supervised Learning with Realistic Tuning</title>
-      <author><first>Andrew B.</first><last>Goldberg</last></author>
-      <author><first>Xiaojin</first><last>Zhu</last></author>
+      <author id="andrew-b-goldberg"><first>Andrew B.</first><last>Goldberg</last></author>
+      <author id="xiaojin-zhu"><first>Xiaojin</first><last>Zhu</last></author>
       <pages>19–27</pages>
       <url hash="5985e461">W09-2203</url>
       <bibkey>goldberg-zhu-2009-keepin</bibkey>
@@ -3237,14 +3237,14 @@
       <title>Is Unlabeled Data Suitable for Multiclass <fixed-case>SVM</fixed-case>-based Web Page Classification?</title>
       <author><first>Arkaitz</first><last>Zubiaga</last></author>
       <author><first>Víctor</first><last>Fresno</last></author>
-      <author><first>Raquel</first><last>Martínez</last></author>
+      <author id="raquel-martinez"><first>Raquel</first><last>Martínez</last></author>
       <pages>28–36</pages>
       <url hash="6765cb61">W09-2204</url>
       <bibkey>zubiaga-etal-2009-unlabeled</bibkey>
     </paper>
     <paper id="5">
       <title>A Comparison of Structural Correspondence Learning and Self-training for Discriminative Parse Selection</title>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>37–42</pages>
       <url hash="70024dff">W09-2205</url>
       <bibkey>plank-2009-comparison</bibkey>
@@ -3252,7 +3252,7 @@
     <paper id="6">
       <title><fixed-case>L</fixed-case>atent <fixed-case>D</fixed-case>irichlet <fixed-case>A</fixed-case>llocation with Topic-in-Set Knowledge</title>
       <author><first>David</first><last>Andrzejewski</last></author>
-      <author><first>Xiaojin</first><last>Zhu</last></author>
+      <author id="xiaojin-zhu"><first>Xiaojin</first><last>Zhu</last></author>
       <pages>43–48</pages>
       <url hash="dd8f3aab">W09-2206</url>
       <bibkey>andrzejewski-zhu-2009-latent</bibkey>
@@ -3261,7 +3261,7 @@
       <title>An Analysis of Bootstrapping for the Recognition of Temporal Expressions</title>
       <author><first>Jordi</first><last>Poveda</last></author>
       <author><first>Mihai</first><last>Surdeanu</last></author>
-      <author><first>Jordi</first><last>Turmo</last></author>
+      <author id="jordi-turmo"><first>Jordi</first><last>Turmo</last></author>
       <pages>49–57</pages>
       <url hash="f7aca7ab">W09-2207</url>
       <bibkey>poveda-etal-2009-analysis</bibkey>
@@ -3284,7 +3284,7 @@
     </paper>
     <paper id="10">
       <title>On Semi-Supervised Learning of <fixed-case>G</fixed-case>aussian Mixture Models for Phonetic Classification</title>
-      <author><first>Jui-Ting</first><last>Huang</last></author>
+      <author id="jui-ting-huang"><first>Jui-Ting</first><last>Huang</last></author>
       <author><first>Mark</first><last>Hasegawa-Johnson</last></author>
       <pages>75–83</pages>
       <url hash="d009ee77">W09-2210</url>
@@ -3318,7 +3318,7 @@
     <paper id="1">
       <title>Decoding with Syntactic and Non-Syntactic Phrases in a Syntax-Based Machine Translation System</title>
       <author><first>Greg</first><last>Hanneman</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <pages>1–9</pages>
       <url hash="db3131ac">W09-2301</url>
       <bibkey>hanneman-lavie-2009-decoding</bibkey>
@@ -3333,7 +3333,7 @@
     </paper>
     <paper id="3">
       <title>Empirical Lower Bounds on Aligment Error Rates in Syntax-Based Machine Translation</title>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <author><first>Jonas</first><last>Kuhn</last></author>
       <pages>19–27</pages>
       <url hash="154ae94f">W09-2303</url>
@@ -3350,8 +3350,8 @@
     <paper id="5">
       <title>References Extension for the Automatic Evaluation of <fixed-case>MT</fixed-case> by Syntactic Hybridization</title>
       <author><first>Bo</first><last>Wang</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
-      <author><first>Muyun</first><last>Yang</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
+      <author id="muyun-yang"><first>Muyun</first><last>Yang</last></author>
       <author><first>Sheng</first><last>Li</last></author>
       <pages>37–44</pages>
       <url hash="b72f9c4d">W09-2305</url>
@@ -3361,25 +3361,25 @@
       <title>A Study of Translation Rule Classification for Syntax-based Statistical Machine Translation</title>
       <author><first>Hongfei</first><last>Jiang</last></author>
       <author><first>Sheng</first><last>Li</last></author>
-      <author><first>Muyun</first><last>Yang</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="muyun-yang"><first>Muyun</first><last>Yang</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <pages>45–50</pages>
       <url hash="70e15e62">W09-2306</url>
       <bibkey>jiang-etal-2009-study</bibkey>
     </paper>
     <paper id="7">
       <title>Discriminative Reordering with <fixed-case>C</fixed-case>hinese Grammatical Relations Features</title>
-      <author><first>Pi-Chuan</first><last>Chang</last></author>
-      <author><first>Huihsin</first><last>Tseng</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="pi-chuan-chang"><first>Pi-Chuan</first><last>Chang</last></author>
+      <author id="huihsin-tseng"><first>Huihsin</first><last>Tseng</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>51–59</pages>
       <url hash="22bc37e4">W09-2307</url>
       <bibkey>chang-etal-2009-discriminative</bibkey>
     </paper>
     <paper id="8">
       <title>On the Complexity of Alignment Problems in Two Synchronous Grammar Formalisms</title>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>60–68</pages>
       <url hash="ef75f5fb">W09-2308</url>
       <bibkey>sogaard-2009-complexity</bibkey>
@@ -3387,9 +3387,9 @@
     <paper id="9">
       <title>Reordering Model Using Syntactic Information of a Source Tree for Statistical Machine Translation</title>
       <author><first>Kei</first><last>Hashimoto</last></author>
-      <author><first>Hirohumi</first><last>Yamamoto</last></author>
+      <author id="hirofumi-yamamoto"><first>Hirohumi</first><last>Yamamoto</last></author>
       <author><first>Hideo</first><last>Okuma</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Keiichi</first><last>Tokuda</last></author>
       <pages>69–77</pages>
       <url hash="a0969824">W09-2309</url>
@@ -3398,7 +3398,7 @@
     <paper id="10">
       <title>Coupling Hierarchical Word Reordering and Decoding in Phrase-Based Statistical Machine Translation</title>
       <author><first>Maxim</first><last>Khalilov</last></author>
-      <author><first>José A. R.</first><last>Fonollosa</last></author>
+      <author id="jose-a-r-fonollosa"><first>José A. R.</first><last>Fonollosa</last></author>
       <author><first>Mark</first><last>Dras</last></author>
       <pages>78–86</pages>
       <url hash="076c9f1f">W09-2310</url>
@@ -3409,8 +3409,8 @@
     <meta>
       <booktitle>Proceedings of the Workshop on Semantic Evaluations: Recent Achievements and Future Directions (<fixed-case>SEW</fixed-case>-2009)</booktitle>
       <url hash="faec8933">W09-24</url>
-      <editor><first>Eneko</first><last>Agirre</last></editor>
-      <editor><first>Lluís</first><last>Màrquez</last></editor>
+      <editor id="eneko-agirre"><first>Eneko</first><last>Agirre</last></editor>
+      <editor id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></editor>
       <editor><first>Richard</first><last>Wicentowski</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Boulder, Colorado</address>
@@ -3424,16 +3424,16 @@
     </frontmatter>
     <paper id="1">
       <title>Invited Talk: Alternative Annotations of Word Usage</title>
-      <author><first>Diana</first><last>McCarthy</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
       <pages>1</pages>
       <url hash="32c059fc">W09-2401</url>
       <bibkey>mccarthy-2009-invited</bibkey>
     </paper>
     <paper id="2">
       <title>Making Sense of Word Sense Variation</title>
-      <author><first>Rebecca</first><last>Passonneau</last></author>
-      <author><first>Ansaf</first><last>Salleb-Aouissi</last></author>
-      <author><first>Nancy</first><last>Ide</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca</first><last>Passonneau</last></author>
+      <author id="ansaf-salleb-aouissi"><first>Ansaf</first><last>Salleb-Aouissi</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
       <pages>2–9</pages>
       <url hash="cd1aed11">W09-2402</url>
       <bibkey>passonneau-etal-2009-making</bibkey>
@@ -3443,7 +3443,7 @@
       <author><first>Judita</first><last>Preiss</last></author>
       <author><first>Jon</first><last>Dehdari</last></author>
       <author><first>Josh</first><last>King</last></author>
-      <author><first>Dennis</first><last>Mehay</last></author>
+      <author id="dennis-mehay"><first>Dennis</first><last>Mehay</last></author>
       <pages>10–18</pages>
       <url hash="0b466c2d">W09-2403</url>
       <bibkey>preiss-etal-2009-refining</bibkey>
@@ -3457,7 +3457,7 @@
     </paper>
     <paper id="5">
       <title>Using Web Selectors for the Disambiguation of All Words</title>
-      <author><first>Hansen A.</first><last>Schwartz</last></author>
+      <author id="h-andrew-schwartz"><first>Hansen A.</first><last>Schwartz</last></author>
       <author><first>Fernando</first><last>Gomez</last></author>
       <pages>28–36</pages>
       <url hash="f205ebb7">W09-2405</url>
@@ -3467,7 +3467,7 @@
       <title>Large-scale Semantic Networks: Annotation and Evaluation</title>
       <author><first>Václav</first><last>Novák</last></author>
       <author><first>Sven</first><last>Hartrumpf</last></author>
-      <author><first>Keith</first><last>Hall</last></author>
+      <author id="keith-hall"><first>Keith</first><last>Hall</last></author>
       <pages>37–45</pages>
       <url hash="02960ec2">W09-2406</url>
       <bibkey>novak-etal-2009-large</bibkey>
@@ -3475,7 +3475,7 @@
     <paper id="7">
       <title>Making Semantic Topicality Robust Through Term Abstraction</title>
       <author><first>Paul M.</first><last>Heider</last></author>
-      <author><first>Rohini K.</first><last>Srihari</last></author>
+      <author id="rohini-k-srihari"><first>Rohini K.</first><last>Srihari</last></author>
       <pages>46–51</pages>
       <url hash="a8cddc6d">W09-2407</url>
       <bibkey>heider-srihari-2009-making</bibkey>
@@ -3483,7 +3483,7 @@
     <paper id="8">
       <title>Meeting <fixed-case>T</fixed-case>emp<fixed-case>E</fixed-case>val-2: Shallow Approach for Temporal Tagger</title>
       <author><first>Oleksandr</first><last>Kolomiyets</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>52–57</pages>
       <url hash="502a7cbe">W09-2408</url>
       <bibkey>kolomiyets-moens-2009-meeting</bibkey>
@@ -3491,7 +3491,7 @@
     <paper id="9">
       <title>Using Lexical Patterns in the <fixed-case>G</fixed-case>oogle Web 1<fixed-case>T</fixed-case> Corpus to Deduce Semantic Relations Between Nouns</title>
       <author><first>Paul</first><last>Nulty</last></author>
-      <author><first>Fintan</first><last>Costello</last></author>
+      <author id="fintan-j-costello"><first>Fintan</first><last>Costello</last></author>
       <pages>58–63</pages>
       <url hash="267ffd55">W09-2409</url>
       <bibkey>nulty-costello-2009-using</bibkey>
@@ -3499,7 +3499,7 @@
     <paper id="10">
       <title>Improvements To Monolingual <fixed-case>E</fixed-case>nglish Word Sense Disambiguation</title>
       <author><first>Weiwei</first><last>Guo</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>64–69</pages>
       <url hash="65884462">W09-2410</url>
       <bibkey>guo-diab-2009-improvements</bibkey>
@@ -3507,8 +3507,8 @@
     <paper id="11">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2010 Task 1: Coreference Resolution in Multiple Languages</title>
       <author><first>Marta</first><last>Recasens</last></author>
-      <author><first>Toni</first><last>Martí</last></author>
-      <author><first>Mariona</first><last>Taulé</last></author>
+      <author id="m-antonia-marti"><first>Toni</first><last>Martí</last></author>
+      <author id="mariona-taule"><first>Mariona</first><last>Taulé</last></author>
       <author><first>Lluís</first><last>Màrquez</last></author>
       <author><first>Emili</first><last>Sapena</last></author>
       <pages>70–75</pages>
@@ -3518,8 +3518,8 @@
     <paper id="12">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2010 Task 2: Cross-Lingual Lexical Substitution</title>
       <author><first>Ravi</first><last>Sinha</last></author>
-      <author><first>Diana</first><last>McCarthy</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>76–81</pages>
       <url hash="72f00436">W09-2412</url>
       <bibkey>sinha-etal-2009-semeval</bibkey>
@@ -3527,14 +3527,14 @@
     <paper id="13">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2010 Task 3: Cross-lingual Word Sense Disambiguation</title>
       <author><first>Els</first><last>Lefever</last></author>
-      <author><first>Veronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Veronique</first><last>Hoste</last></author>
       <pages>82–87</pages>
       <url hash="7f671b11">W09-2413</url>
       <bibkey>lefever-hoste-2009-semeval</bibkey>
     </paper>
     <paper id="14">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2010 Task 7: Argument Selection and Coercion</title>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <author><first>Anna</first><last>Rumshisky</last></author>
       <pages>88–93</pages>
       <url hash="f0a527c4">W09-2414</url>
@@ -3545,12 +3545,12 @@
       <author><first>Iris</first><last>Hendrickx</last></author>
       <author><first>Su Nam</first><last>Kim</last></author>
       <author><first>Zornitsa</first><last>Kozareva</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Diarmuid</first><last>Ó Séaghdha</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <author><first>Marco</first><last>Pennacchiotti</last></author>
-      <author><first>Lorenza</first><last>Romano</last></author>
-      <author><first>Stan</first><last>Szpakowicz</last></author>
+      <author id="lorenza-romano"><first>Lorenza</first><last>Romano</last></author>
+      <author id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></author>
       <pages>94–99</pages>
       <url hash="d3715524">W09-2415</url>
       <bibkey>hendrickx-etal-2009-semeval</bibkey>
@@ -3559,9 +3559,9 @@
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2010 Task 9: The Interpretation of Noun Compounds Using Paraphrasing Verbs and Prepositions</title>
       <author><first>Cristina</first><last>Butnariu</last></author>
       <author><first>Su Nam</first><last>Kim</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Diarmuid</first><last>Ó Séaghdha</last></author>
-      <author><first>Stan</first><last>Szpakowicz</last></author>
+      <author id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></author>
       <author><first>Tony</first><last>Veale</last></author>
       <pages>100–105</pages>
       <url hash="f7bacdcd">W09-2416</url>
@@ -3572,15 +3572,15 @@
       <author><first>Josef</first><last>Ruppenhofer</last></author>
       <author><first>Caroline</first><last>Sporleder</last></author>
       <author><first>Roser</first><last>Morante</last></author>
-      <author><first>Collin</first><last>Baker</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="collin-f-baker"><first>Collin</first><last>Baker</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>106–111</pages>
       <url hash="3c1b7826">W09-2417</url>
       <bibkey>ruppenhofer-etal-2009-semeval</bibkey>
     </paper>
     <paper id="18">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2010 Task 13: Evaluating Events, Time Expressions, and Temporal Relations (<fixed-case>T</fixed-case>emp<fixed-case>E</fixed-case>val-2)</title>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <author><first>Marc</first><last>Verhagen</last></author>
       <pages>112–116</pages>
       <url hash="f2ff8d6b">W09-2418</url>
@@ -3589,7 +3589,7 @@
     <paper id="19">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2010 Task 14: Evaluation Setting for Word Sense Induction &amp; Disambiguation Systems</title>
       <author><first>Suresh</first><last>Manandhar</last></author>
-      <author><first>Ioannis</first><last>Klapaftis</last></author>
+      <author id="ioannis-klapaftis"><first>Ioannis</first><last>Klapaftis</last></author>
       <pages>117–122</pages>
       <url hash="896c54bc">W09-2419</url>
       <bibkey>manandhar-klapaftis-2009-semeval</bibkey>
@@ -3597,7 +3597,7 @@
     <paper id="20">
       <title><fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2010 Task 17: All-words Word Sense Disambiguation on a Specific Domain</title>
       <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>Oier</first><last>Lopez de Lacalle</last></author>
+      <author id="oier-lopez-de-lacalle"><first>Oier</first><last>Lopez de Lacalle</last></author>
       <author><first>Christiane</first><last>Fellbaum</last></author>
       <author><first>Andrea</first><last>Marchetti</last></author>
       <author><first>Antonio</first><last>Toral</last></author>
@@ -3608,10 +3608,10 @@
     </paper>
     <paper id="21">
       <title>Relation detection between named entities: report of a shared task</title>
-      <author><first>Cláudia</first><last>Freitas</last></author>
+      <author id="claudia-freitas"><first>Cláudia</first><last>Freitas</last></author>
       <author><first>Diana</first><last>Santos</last></author>
       <author><first>Cristina</first><last>Mota</last></author>
-      <author><first>Hugo</first><last>Gonçalo Oliveira</last></author>
+      <author id="hugo-goncalo-oliveira"><first>Hugo</first><last>Gonçalo Oliveira</last></author>
       <author><first>Paula</first><last>Carvalho</last></author>
       <pages>129–137</pages>
       <url hash="81a35f7a">W09-2421</url>
@@ -3619,15 +3619,15 @@
     </paper>
     <paper id="22">
       <title>Error Analysis of the <fixed-case>T</fixed-case>emp<fixed-case>E</fixed-case>val Temporal Relation Identification Task</title>
-      <author><first>Chong Min</first><last>Lee</last></author>
-      <author><first>Graham</first><last>Katz</last></author>
+      <author id="chungmin-lee"><first>Chong Min</first><last>Lee</last></author>
+      <author id="graham-katz"><first>Graham</first><last>Katz</last></author>
       <pages>138–145</pages>
       <url hash="8535ec72">W09-2422</url>
       <bibkey>lee-katz-2009-error</bibkey>
     </paper>
     <paper id="23">
       <title>Automatic Recognition of Logical Relations for <fixed-case>E</fixed-case>nglish, <fixed-case>C</fixed-case>hinese and <fixed-case>J</fixed-case>apanese in the <fixed-case>GLARF</fixed-case> Framework</title>
-      <author><first>Adam</first><last>Meyers</last></author>
+      <author id="adam-meyers"><first>Adam</first><last>Meyers</last></author>
       <author><first>Michiko</first><last>Kosaka</last></author>
       <author><first>Nianwen</first><last>Xue</last></author>
       <author><first>Heng</first><last>Ji</last></author>
@@ -3645,9 +3645,9 @@
       <url hash="1a8e76b6">W09-25</url>
       <editor><first>Chris</first><last>Callison-Burch</last></editor>
       <editor><first>Ido</first><last>Dagan</last></editor>
-      <editor><first>Christopher</first><last>Manning</last></editor>
+      <editor id="christopher-d-manning"><first>Christopher</first><last>Manning</last></editor>
       <editor><first>Marco</first><last>Pennacchiotti</last></editor>
-      <editor><first>Fabio Massimo</first><last>Zanzotto</last></editor>
+      <editor id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Suntec, Singapore</address>
       <month>August</month>
@@ -3660,8 +3660,8 @@
     </frontmatter>
     <paper id="1">
       <title>Multi-word expressions in textual inference: Much ado about nothing?</title>
-      <author><first>Marie-Catherine</first><last>de Marneffe</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="marie-catherine-de-marneffe"><first>Marie-Catherine</first><last>de Marneffe</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <author><first>Christopher D.</first><last>Manning</last></author>
       <pages>1–9</pages>
       <url hash="8e744425">W09-2501</url>
@@ -3692,7 +3692,7 @@
     <paper id="5">
       <title>Optimizing Textual Entailment Recognition Using Particle Swarm Optimization</title>
       <author><first>Yashar</first><last>Mehdad</last></author>
-      <author><first>Bernardo</first><last>Magnini</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
       <pages>36–43</pages>
       <url hash="97173b73">W09-2505</url>
       <bibkey>mehdad-magnini-2009-optimizing</bibkey>
@@ -3700,7 +3700,7 @@
     <paper id="6">
       <title>Ranking Paraphrases in Context</title>
       <author><first>Stefan</first><last>Thater</last></author>
-      <author><first>Georgiana</first><last>Dinu</last></author>
+      <author id="georgiana-dinu"><first>Georgiana</first><last>Dinu</last></author>
       <author><first>Manfred</first><last>Pinkal</last></author>
       <pages>44–47</pages>
       <url hash="5d7587e8">W09-2506</url>
@@ -3723,7 +3723,7 @@
     </paper>
     <paper id="9">
       <title>Automating Model Building in c-rater</title>
-      <author><first>Jana</first><last>Sukkarieh</last></author>
+      <author id="jana-sukkarieh"><first>Jana</first><last>Sukkarieh</last></author>
       <author><first>Svetlana</first><last>Stoyanchev</last></author>
       <pages>61–69</pages>
       <url hash="dea75c68">W09-2509</url>
@@ -3742,7 +3742,7 @@
     <meta>
       <booktitle>Proceedings of the 2009 Workshop on Grammar Engineering Across Frameworks (<fixed-case>GEAF</fixed-case> 2009)</booktitle>
       <url hash="5ac72be6">W09-26</url>
-      <editor><first>Tracy Holloway</first><last>King</last></editor>
+      <editor id="tracy-holloway-king"><first>Tracy Holloway</first><last>King</last></editor>
       <editor><first>Marianne</first><last>Santaholma</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Suntec, Singapore</address>
@@ -3764,7 +3764,7 @@
     </paper>
     <paper id="2">
       <title>Developing <fixed-case>G</fixed-case>erman Semantics on the basis of Parallel <fixed-case>LFG</fixed-case> Grammars</title>
-      <author><first>Sina</first><last>Zarrieß</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
       <pages>10–18</pages>
       <url hash="fe00040b">W09-2602</url>
       <bibkey>zarriess-2009-developing</bibkey>
@@ -3801,10 +3801,10 @@
     </paper>
     <paper id="7">
       <title>Using Artificially Generated Data to Evaluate Statistical Machine Translation</title>
-      <author><first>Manny</first><last>Rayner</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
       <author><first>Paula</first><last>Estrella</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
-      <author><first>Beth Ann</first><last>Hockey</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="beth-ann-hockey"><first>Beth Ann</first><last>Hockey</last></author>
       <author><first>Yukie</first><last>Nakao</last></author>
       <pages>54–62</pages>
       <url hash="48397d5c">W09-2607</url>
@@ -3822,7 +3822,7 @@
       <title>A generalized method for iterative error mining in parsing results</title>
       <author><first>Daniël</first><last>de Kok</last></author>
       <author><first>Jianqiang</first><last>Ma</last></author>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <pages>71–79</pages>
       <url hash="38b6a8e4">W09-2609</url>
       <bibkey>de-kok-etal-2009-generalized</bibkey>
@@ -3832,8 +3832,8 @@
     <meta>
       <booktitle>Proceedings of the 2009 Workshop on Knowledge and Reasoning for Answering Questions (<fixed-case>KRAQ</fixed-case> 2009)</booktitle>
       <url hash="97b8ab21">W09-27</url>
-      <editor><first>Patrick</first><last>Saint-Dizier</last></editor>
-      <editor><first>Marie-Francine</first><last>Moens</last></editor>
+      <editor id="patrick-saint-dizier"><first>Patrick</first><last>Saint-Dizier</last></editor>
+      <editor id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Suntec, Singapore</address>
       <month>August</month>
@@ -3846,7 +3846,7 @@
     </frontmatter>
     <paper id="1">
       <title>Knowledge and Reasoning for Medical Question-Answering</title>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <pages>1–2</pages>
       <url hash="9f0314e0">W09-2701</url>
       <bibkey>zweigenbaum-2009-knowledge</bibkey>
@@ -3871,9 +3871,9 @@
     </paper>
     <paper id="4">
       <title>Some Challenges in the Design of Comparative and Evaluative Question Answering Systems</title>
-      <author><first>Nathalie</first><last>Lim</last></author>
+      <author id="nathalie-rose-lim"><first>Nathalie</first><last>Lim</last></author>
       <author><first>Patrick</first><last>Saint-Dizier</last></author>
-      <author><first>Rachel</first><last>Roxas</last></author>
+      <author id="rachel-edita-roxas"><first>Rachel</first><last>Roxas</last></author>
       <pages>15–18</pages>
       <url hash="07d776dd">W09-2704</url>
       <bibkey>lim-etal-2009-challenges</bibkey>
@@ -3891,8 +3891,8 @@
     <meta>
       <booktitle>Proceedings of the 2009 Workshop on Language Generation and Summarisation (<fixed-case>UCNLG</fixed-case>+<fixed-case>S</fixed-case>um 2009)</booktitle>
       <url hash="74902f91">W09-28</url>
-      <editor><first>Anja</first><last>Belz</last></editor>
-      <editor><first>Roger</first><last>Evans</last></editor>
+      <editor id="anja-belz"><first>Anja</first><last>Belz</last></editor>
+      <editor id="roger-evans"><first>Roger</first><last>Evans</last></editor>
       <editor><first>Sebastian</first><last>Varges</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Suntec, Singapore</address>
@@ -3906,24 +3906,24 @@
     </frontmatter>
     <paper id="2">
       <title>Query-focused Summarization Using Text-to-Text Generation: When Information Comes from Multilingual Sources</title>
-      <author><first>Kathy</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathy</first><last>McKeown</last></author>
       <pages>3</pages>
       <url hash="69e394ca">W09-2802</url>
       <bibkey>mckeown-2009-query</bibkey>
     </paper>
     <paper id="4">
       <title>Optimization-based Content Selection for Opinion Summarization</title>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <author><first>Giuseppe</first><last>Carenini</last></author>
-      <author><first>Raymond T.</first><last>Ng</last></author>
+      <author id="raymond-ng"><first>Raymond T.</first><last>Ng</last></author>
       <pages>7–14</pages>
       <url hash="174db57d">W09-2804</url>
       <bibkey>cheung-etal-2009-optimization</bibkey>
     </paper>
     <paper id="5">
       <title>Unsupervised Induction of Sentence Compression Rules</title>
-      <author><first>João</first><last>Cordeiro</last></author>
-      <author><first>Gaël</first><last>Dias</last></author>
+      <author id="joao-paulo-cordeiro"><first>João</first><last>Cordeiro</last></author>
+      <author id="gael-dias"><first>Gaël</first><last>Dias</last></author>
       <author><first>Pavel</first><last>Brazdil</last></author>
       <pages>15–22</pages>
       <url hash="f19c606d">W09-2805</url>
@@ -3932,7 +3932,7 @@
     <paper id="6">
       <title>Evaluation of Automatic Summaries: Metrics under Varying Data Conditions</title>
       <author><first>Karolina</first><last>Owczarzak</last></author>
-      <author><first>Hoa Trang</first><last>Dang</last></author>
+      <author id="hoa-trang-dang"><first>Hoa Trang</first><last>Dang</last></author>
       <pages>23–30</pages>
       <url hash="e398589d">W09-2806</url>
       <bibkey>owczarzak-dang-2009-evaluation</bibkey>
@@ -3950,7 +3950,7 @@
       <author><first>Akinori</first><last>Kinoshita</last></author>
       <author><first>Takeshi</first><last>Kobayakawa</last></author>
       <author><first>Tadashi</first><last>Kumano</last></author>
-      <author><first>Naoto</first><last>Katoh</last></author>
+      <author id="naoto-kato"><first>Naoto</first><last>Katoh</last></author>
       <pages>39–47</pages>
       <url hash="138bcc04">W09-2808</url>
       <bibkey>tanaka-etal-2009-syntax</bibkey>
@@ -3958,7 +3958,7 @@
     <paper id="9">
       <title>A Parse-and-Trim Approach with Information Significance for <fixed-case>C</fixed-case>hinese Sentence Compression</title>
       <author><first>Wei</first><last>Xu</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <pages>48–55</pages>
       <url hash="7ed02b9d">W09-2809</url>
       <bibkey>xu-grishman-2009-parse</bibkey>
@@ -3966,7 +3966,7 @@
     <paper id="11">
       <title>Visual Development Process for Automatic Generation of Digital Games Narrative Content</title>
       <author><first>Maria Fernanda</first><last>Caropreso</last></author>
-      <author><first>Diana</first><last>Inkpen</last></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last></author>
       <author><first>Shahzad</first><last>Khan</last></author>
       <author><first>Fazel</first><last>Keshtkar</last></author>
       <pages>59–62</pages>
@@ -3976,9 +3976,9 @@
     <paper id="12">
       <title>Reducing Redundancy in Multi-document Summarization Using Lexical Semantic Similarity</title>
       <author><first>Iris</first><last>Hendrickx</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <author><first>Erwin</first><last>Marsi</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <pages>63–66</pages>
       <url hash="a7f11cbc">W09-2812</url>
       <bibkey>hendrickx-etal-2009-reducing</bibkey>
@@ -3988,7 +3988,7 @@
       <author><first>Mohit</first><last>Kumar</last></author>
       <author><first>Dipanjan</first><last>Das</last></author>
       <author><first>Sachin</first><last>Agarwal</last></author>
-      <author><first>Alexander</first><last>Rudnicky</last></author>
+      <author id="alexander-rudnicky"><first>Alexander</first><last>Rudnicky</last></author>
       <pages>67–71</pages>
       <url hash="283420ab">W09-2813</url>
       <bibkey>kumar-etal-2009-non</bibkey>
@@ -4024,7 +4024,7 @@
     </paper>
     <paper id="18">
       <title><fixed-case>ICSI</fixed-case>-<fixed-case>CRF</fixed-case>: The Generation of References to the Main Subject and Named Entities Using Conditional Random Fields</title>
-      <author><first>Benoit</first><last>Favre</last></author>
+      <author id="benoit-favre"><first>Benoit</first><last>Favre</last></author>
       <author><first>Bernd</first><last>Bohnet</last></author>
       <pages>99–100</pages>
       <url hash="c4985b2b">W09-2818</url>
@@ -4033,7 +4033,7 @@
     <paper id="19">
       <title><fixed-case>UD</fixed-case>el: Generating Referring Expressions Guided by Psycholinguistc Findings</title>
       <author><first>Charles</first><last>Greenbacker</last></author>
-      <author><first>Kathleen</first><last>McCoy</last></author>
+      <author id="kathleen-f-mccoy"><first>Kathleen</first><last>McCoy</last></author>
       <pages>101–102</pages>
       <url hash="7273a8f0">W09-2819</url>
       <bibkey>greenbacker-mccoy-2009-udel</bibkey>
@@ -4041,7 +4041,7 @@
     <paper id="20">
       <title><fixed-case>JUNLG</fixed-case>-<fixed-case>MSR</fixed-case>: A Machine Learning Approach of Main Subject Reference Selection with Rule Based Improvement</title>
       <author><first>Samir</first><last>Gupta</last></author>
-      <author><first>Sivaji</first><last>Bandopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandopadhyay</last></author>
       <pages>103–104</pages>
       <url hash="d7939b94">W09-2820</url>
       <bibkey>gupta-bandopadhyay-2009-junlg</bibkey>
@@ -4049,14 +4049,14 @@
     <paper id="21">
       <title><fixed-case>UD</fixed-case>el: Extending Reference Generation to Multiple Entities</title>
       <author><first>Charles</first><last>Greenbacker</last></author>
-      <author><first>Kathleen</first><last>McCoy</last></author>
+      <author id="kathleen-f-mccoy"><first>Kathleen</first><last>McCoy</last></author>
       <pages>105–106</pages>
       <url hash="cab2b5ef">W09-2821</url>
       <bibkey>greenbacker-mccoy-2009-udel-extending</bibkey>
     </paper>
     <paper id="22">
       <title><fixed-case>WLV</fixed-case>: A Confidence-based Machine Learning Method for the <fixed-case>GREC</fixed-case>-<fixed-case>NEG</fixed-case>’09 Task</title>
-      <author><first>Constantin</first><last>Orăsan</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orăsan</last></author>
       <author><first>Iustin</first><last>Dornescu</last></author>
       <pages>107–108</pages>
       <url hash="c04e5a3d">W09-2822</url>
@@ -4069,7 +4069,7 @@
       <url hash="44f31bef">W09-29</url>
       <editor><first>Dimitra</first><last>Anastasiou</last></editor>
       <editor><first>Chikara</first><last>Hashimoto</last></editor>
-      <editor><first>Preslav</first><last>Nakov</last></editor>
+      <editor id="preslav-nakov"><first>Preslav</first><last>Nakov</last></editor>
       <editor><first>Su Nam</first><last>Kim</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Singapore</address>
@@ -4086,7 +4086,7 @@
       <author><first>Helena</first><last>Caseli</last></author>
       <author><first>Aline</first><last>Villavicencio</last></author>
       <author><first>André</first><last>Machado</last></author>
-      <author><first>Maria José</first><last>Finatto</last></author>
+      <author id="maria-jose-b-finatto"><first>Maria José</first><last>Finatto</last></author>
       <pages>1–8</pages>
       <url hash="0fe66d12">W09-2901</url>
       <bibkey>caseli-etal-2009-statistically</bibkey>
@@ -4101,7 +4101,7 @@
     </paper>
     <paper id="3">
       <title>Verb Noun Construction <fixed-case>MWE</fixed-case> Token Classification</title>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <author><first>Pravin</first><last>Bhutada</last></author>
       <pages>17–22</pages>
       <url hash="8e9a5420">W09-2903</url>
@@ -4109,7 +4109,7 @@
     </paper>
     <paper id="4">
       <title>Exploiting Translational Correspondences for Pattern-Independent <fixed-case>MWE</fixed-case> Identification</title>
-      <author><first>Sina</first><last>Zarrieß</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
       <author><first>Jonas</first><last>Kuhn</last></author>
       <pages>23–30</pages>
       <url hash="4ab9775b">W09-2904</url>
@@ -4126,7 +4126,7 @@
     </paper>
     <paper id="6">
       <title>Mining Complex Predicates In <fixed-case>H</fixed-case>indi Using A Parallel <fixed-case>H</fixed-case>indi-<fixed-case>E</fixed-case>nglish Corpus</title>
-      <author><first>R. Mahesh K.</first><last>Sinha</last></author>
+      <author id="r-mahesh-k-sinha"><first>R. Mahesh K.</first><last>Sinha</last></author>
       <pages>40–46</pages>
       <url hash="b0f6518b">W09-2906</url>
       <bibkey>sinha-2009-mining</bibkey>
@@ -4134,7 +4134,7 @@
     <paper id="7">
       <title>Improving Statistical Machine Translation Using Domain Bilingual Multiword Expressions</title>
       <author><first>Zhixiang</first><last>Ren</last></author>
-      <author><first>Yajuan</first><last>Lü</last></author>
+      <author id="yajuan-lu"><first>Yajuan</first><last>Lü</last></author>
       <author><first>Jie</first><last>Cao</last></author>
       <author><first>Qun</first><last>Liu</last></author>
       <author><first>Yun</first><last>Huang</last></author>
@@ -4169,8 +4169,8 @@
       <url hash="0bc52f71">W09-30</url>
       <editor><first>Manfred</first><last>Stede</last></editor>
       <editor><first>Chu-Ren</first><last>Huang</last></editor>
-      <editor><first>Nancy</first><last>Ide</last></editor>
-      <editor><first>Adam</first><last>Meyers</last></editor>
+      <editor id="nancy-ide"><first>Nancy</first><last>Ide</last></editor>
+      <editor id="adam-meyers"><first>Adam</first><last>Meyers</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Suntec, Singapore</address>
       <month>August</month>
@@ -4184,7 +4184,7 @@
     <paper id="1">
       <title>A Cognitive-based Annotation System for Emotion Computing</title>
       <author><first>Ying</first><last>Chen</last></author>
-      <author><first>Sophia Y. M.</first><last>Lee</last></author>
+      <author id="sophia-y-m-lee"><first>Sophia Y. M.</first><last>Lee</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <pages>1–9</pages>
       <url hash="c1f5b3ba">W09-3001</url>
@@ -4192,7 +4192,7 @@
     </paper>
     <paper id="2">
       <title>Complex Linguistic Annotation – No Easy Way Out! A Case from <fixed-case>B</fixed-case>angla and <fixed-case>H</fixed-case>indi <fixed-case>POS</fixed-case> Labeling Tasks</title>
-      <author><first>Sandipan</first><last>Dandapat</last></author>
+      <author id="sandipan-dandapat"><first>Sandipan</first><last>Dandapat</last></author>
       <author><first>Priyanka</first><last>Biswas</last></author>
       <author><first>Monojit</first><last>Choudhury</last></author>
       <author><first>Kalika</first><last>Bali</last></author>
@@ -4229,14 +4229,14 @@
     <paper id="6">
       <title>Annotating Subordinators in the <fixed-case>T</fixed-case>urkish Discourse Bank</title>
       <author><first>Deniz</first><last>Zeyrek</last></author>
-      <author><first>Umit Deniz</first><last>Turan</last></author>
-      <author><first>Cem</first><last>Bozsahin</last></author>
-      <author><first>Ruket</first><last>Cakici</last></author>
-      <author><first>Ayisigi B.</first><last>Sevdik-Calli</last></author>
-      <author><first>Isin</first><last>Demirsahin</last></author>
-      <author><first>Berfin</first><last>Aktas</last></author>
-      <author><first>İhsan</first><last>Yalcinkaya</last></author>
-      <author><first>Hale</first><last>Ogel</last></author>
+      <author id="umit-deniz-turan"><first>Umit Deniz</first><last>Turan</last></author>
+      <author id="cem-bozsahin"><first>Cem</first><last>Bozsahin</last></author>
+      <author id="ruket-cakici"><first>Ruket</first><last>Cakici</last></author>
+      <author id="ayisigi-b-sevdik-calli"><first>Ayisigi B.</first><last>Sevdik-Calli</last></author>
+      <author id="isin-demirsahin"><first>Isin</first><last>Demirsahin</last></author>
+      <author id="berfin-aktas"><first>Berfin</first><last>Aktas</last></author>
+      <author id="ihsan-yalcinkaya"><first>İhsan</first><last>Yalcinkaya</last></author>
+      <author id="hale-ogel-balaban"><first>Hale</first><last>Ogel</last></author>
       <pages>44–47</pages>
       <url hash="b4fa1fd9">W09-3006</url>
       <bibkey>zeyrek-etal-2009-annotating</bibkey>
@@ -4250,7 +4250,7 @@
     </paper>
     <paper id="8">
       <title>Designing a Language Game for Collecting Coreference Annotation</title>
-      <author><first>Barbora</first><last>Hladká</last></author>
+      <author id="barbora-hladka"><first>Barbora</first><last>Hladká</last></author>
       <author><first>Jiří</first><last>Mírovský</last></author>
       <author><first>Pavel</first><last>Schlesinger</last></author>
       <pages>52–55</pages>
@@ -4259,17 +4259,17 @@
     </paper>
     <paper id="9">
       <title>Explorations in Automatic Image Annotation using Textual Features</title>
-      <author><first>Chee Wee</first><last>Leong</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="chee-wee-leong"><first>Chee Wee</first><last>Leong</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>56–59</pages>
       <url hash="94237f08">W09-3009</url>
       <bibkey>leong-mihalcea-2009-explorations</bibkey>
     </paper>
     <paper id="10">
       <title>Human Evaluation of Article and Noun Number Usage: Influences of Context and Construction Variability</title>
-      <author><first>John</first><last>Lee</last></author>
-      <author><first>Joel</first><last>Tetreault</last></author>
-      <author><first>Martin</first><last>Chodorow</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
+      <author id="martin-chodorow"><first>Martin</first><last>Chodorow</last></author>
       <pages>60–63</pages>
       <url hash="29a69e2b">W09-3010</url>
       <bibkey>lee-etal-2009-human</bibkey>
@@ -4284,10 +4284,10 @@
     </paper>
     <paper id="12">
       <title>Committed Belief Annotation and Tagging</title>
-      <author><first>Mona</first><last>Diab</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
       <author><first>Teruko</first><last>Mitamura</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <author><first>Vinodkumar</first><last>Prabhakaran</last></author>
       <author><first>Weiwei</first><last>Guo</last></author>
       <pages>68–73</pages>
@@ -4296,8 +4296,8 @@
     </paper>
     <paper id="13">
       <title>Annotation of Sentence Structure; Capturing the Relationship among Clauses in <fixed-case>C</fixed-case>zech Sentences</title>
-      <author><first>Markéta</first><last>Lopatková</last></author>
-      <author><first>Natalia</first><last>Klyueva</last></author>
+      <author id="marketa-lopatkova"><first>Markéta</first><last>Lopatková</last></author>
+      <author id="natalia-klyueva"><first>Natalia</first><last>Klyueva</last></author>
       <author><first>Petr</first><last>Homola</last></author>
       <pages>74–81</pages>
       <url hash="b648af91">W09-3013</url>
@@ -4314,8 +4314,8 @@
     </paper>
     <paper id="15">
       <title>Syntactic annotation of spoken utterances: A case study on the <fixed-case>C</fixed-case>zech Academic Corpus</title>
-      <author><first>Barbora</first><last>Hladká</last></author>
-      <author><first>Zdeňka</first><last>Urešová</last></author>
+      <author id="barbora-hladka"><first>Barbora</first><last>Hladká</last></author>
+      <author id="zdenka-uresova"><first>Zdeňka</first><last>Urešová</last></author>
       <pages>90–98</pages>
       <url hash="d3d5c873">W09-3015</url>
       <bibkey>hladka-uresova-2009-syntactic</bibkey>
@@ -4323,7 +4323,7 @@
     <paper id="16">
       <title>High-Performance High-Volume Layered Corpora Annotation</title>
       <author><first>Tiago</first><last>Luís</last></author>
-      <author><first>David</first><last>Martins de Matos</last></author>
+      <author id="david-martins-de-matos"><first>David</first><last>Martins de Matos</last></author>
       <pages>99–107</pages>
       <url hash="5116817b">W09-3016</url>
       <bibkey>luis-martins-de-matos-2009-high</bibkey>
@@ -4351,7 +4351,7 @@
       <author><first>Michiko</first><last>Kosaka</last></author>
       <author><first>Heng</first><last>Ji</last></author>
       <author><first>Nianwen</first><last>Xue</last></author>
-      <author><first>Mary</first><last>Harper</last></author>
+      <author id="mary-harper"><first>Mary</first><last>Harper</last></author>
       <author><first>Ang</first><last>Sun</last></author>
       <author><first>Wei</first><last>Xu</last></author>
       <author><first>Shasha</first><last>Liao</last></author>
@@ -4361,8 +4361,8 @@
     </paper>
     <paper id="20">
       <title>Using Parallel Propbanks to enhance Word-alignments</title>
-      <author><first>Jinho</first><last>Choi</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="jinho-d-choi"><first>Jinho</first><last>Choi</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Nianwen</first><last>Xue</last></author>
       <pages>121–124</pages>
       <url hash="28415790">W09-3020</url>
@@ -4370,7 +4370,7 @@
     </paper>
     <paper id="21">
       <title><fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et and <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et as Complementary Resources for Annotation</title>
-      <author><first>Collin F.</first><last>Baker</last></author>
+      <author id="collin-f-baker"><first>Collin F.</first><last>Baker</last></author>
       <author><first>Christiane</first><last>Fellbaum</last></author>
       <pages>125–129</pages>
       <url hash="3a90dd6a">W09-3021</url>
@@ -4382,7 +4382,7 @@
       <author><first>Laurie</first><last>Buscail</last></author>
       <author><first>Marie</first><last>Garnier</last></author>
       <author><first>Arnaud</first><last>Rykner</last></author>
-      <author><first>Patrick</first><last>Saint-Dizier</last></author>
+      <author id="patrick-saint-dizier"><first>Patrick</first><last>Saint-Dizier</last></author>
       <pages>130–133</pages>
       <url hash="14a20d59">W09-3022</url>
       <bibkey>albert-etal-2009-annotating</bibkey>
@@ -4410,7 +4410,7 @@
       <title>Towards a Methodology for Named Entities Annotation</title>
       <author><first>Karën</first><last>Fort</last></author>
       <author><first>Maud</first><last>Ehrmann</last></author>
-      <author><first>Adeline</first><last>Nazarenko</last></author>
+      <author id="adeline-nazarenko"><first>Adeline</first><last>Nazarenko</last></author>
       <pages>142–145</pages>
       <url hash="285449f0">W09-3025</url>
       <bibkey>fort-etal-2009-towards</bibkey>
@@ -4430,7 +4430,7 @@
       <author><first>Suguru</first><last>Matsuyoshi</last></author>
       <author><first>Eric</first><last>Nichols</last></author>
       <author><first>Kentaro</first><last>Inui</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>150–153</pages>
       <url hash="4181c980">W09-3027</url>
       <bibkey>murakami-etal-2009-annotating</bibkey>
@@ -4448,19 +4448,19 @@
       <author><first>Umangi</first><last>Oza</last></author>
       <author><first>Rashmi</first><last>Prasad</last></author>
       <author><first>Sudheer</first><last>Kolachina</last></author>
-      <author><first>Dipti</first><last>Misra Sharma</last></author>
-      <author><first>Aravind</first><last>Joshi</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti</first><last>Misra Sharma</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
       <pages>158–161</pages>
       <url hash="be455ee6">W09-3029</url>
       <bibkey>oza-etal-2009-hindi</bibkey>
     </paper>
     <paper id="30">
       <title>Simple Parser for <fixed-case>I</fixed-case>ndian Languages in a Dependency Framework</title>
-      <author><first>Akshar</first><last>Bharati</last></author>
+      <author id="akshar-bharati"><first>Akshar</first><last>Bharati</last></author>
       <author><first>Mridul</first><last>Gupta</last></author>
       <author><first>Vineet</first><last>Yadav</last></author>
       <author><first>Karthik</first><last>Gali</last></author>
-      <author><first>Dipti Misra</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></author>
       <pages>162–165</pages>
       <url hash="6242fa15">W09-3030</url>
       <bibkey>bharati-etal-2009-simple</bibkey>
@@ -4491,8 +4491,8 @@
     <paper id="34">
       <title>The <fixed-case>SILT</fixed-case> and <fixed-case>F</fixed-case>la<fixed-case>R</fixed-case>e<fixed-case>N</fixed-case>et International Collaboration for Interoperability</title>
       <author><first>Nancy</first><last>Ide</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
-      <author><first>Nicoletta</first><last>Calzolari</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
+      <author id="nicoletta-calzolari"><first>Nicoletta</first><last>Calzolari</last></author>
       <author><first>Claudia</first><last>Soria</last></author>
       <pages>178–181</pages>
       <url hash="118caa37">W09-3034</url>
@@ -4500,22 +4500,22 @@
     </paper>
     <paper id="35">
       <title>Building a Large Syntactically-Annotated Corpus of <fixed-case>V</fixed-case>ietnamese</title>
-      <author><first>Phuong-Thai</first><last>Nguyen</last></author>
-      <author><first>Xuan-Luong</first><last>Vu</last></author>
-      <author><first>Thi-Minh-Huyen</first><last>Nguyen</last></author>
+      <author id="phuong-thai-nguyen"><first>Phuong-Thai</first><last>Nguyen</last></author>
+      <author id="xuan-luong-vu"><first>Xuan-Luong</first><last>Vu</last></author>
+      <author id="thi-minh-huyen-nguyen"><first>Thi-Minh-Huyen</first><last>Nguyen</last></author>
       <author><first>Van-Hiep</first><last>Nguyen</last></author>
-      <author><first>Hong-Phuong</first><last>Le</last></author>
+      <author id="phuong-le-hong"><first>Hong-Phuong</first><last>Le</last></author>
       <pages>182–185</pages>
       <url hash="6e6ccb5c">W09-3035</url>
       <bibkey>nguyen-etal-2009-building</bibkey>
     </paper>
     <paper id="36">
       <title>A Multi-Representational and Multi-Layered Treebank for <fixed-case>H</fixed-case>indi/<fixed-case>U</fixed-case>rdu</title>
-      <author><first>Rajesh</first><last>Bhatt</last></author>
+      <author id="rajesh-bhatt"><first>Rajesh</first><last>Bhatt</last></author>
       <author><first>Bhuvana</first><last>Narasimhan</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
-      <author><first>Dipti</first><last>Sharma</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti</first><last>Sharma</last></author>
       <author><first>Fei</first><last>Xia</last></author>
       <pages>186–189</pages>
       <url hash="3938e1eb">W09-3036</url>
@@ -4527,7 +4527,7 @@
       <booktitle>Proceedings of the 2nd Workshop on Building and Using Comparable Corpora: from Parallel to Non-parallel Corpora (<fixed-case>BUCC</fixed-case>)</booktitle>
       <url hash="5a10082c">W09-31</url>
       <editor><first>Pascale</first><last>Fung</last></editor>
-      <editor><first>Pierre</first><last>Zweigenbaum</last></editor>
+      <editor id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></editor>
       <editor><first>Reinhard</first><last>Rapp</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Singapore</address>
@@ -4541,7 +4541,7 @@
     </frontmatter>
     <paper id="1">
       <title>Repetition and Language Models and Comparable Corpora</title>
-      <author><first>Ken</first><last>Church</last></author>
+      <author id="kenneth-church"><first>Ken</first><last>Church</last></author>
       <pages>1</pages>
       <url hash="05b0b1fb">W09-3101</url>
       <bibkey>church-2009-repetition</bibkey>
@@ -4566,7 +4566,7 @@
     <paper id="4">
       <title>An Analysis of the Calque Phenomena Based on Comparable Corpora</title>
       <author><first>Marie</first><last>Garnier</last></author>
-      <author><first>Patrick</first><last>Saint-Dizier</last></author>
+      <author id="patrick-saint-dizier"><first>Patrick</first><last>Saint-Dizier</last></author>
       <pages>19–22</pages>
       <url hash="3de8185c">W09-3104</url>
       <bibkey>garnier-saint-dizier-2009-analysis</bibkey>
@@ -4583,7 +4583,7 @@
       <title>Train the Machine with What It Can <fixed-case>L</fixed-case>earn—<fixed-case>C</fixed-case>orpus Selection for <fixed-case>SMT</fixed-case></title>
       <author><first>Xiwu</first><last>Han</last></author>
       <author><first>Hanzhang</first><last>Li</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <pages>27–33</pages>
       <url hash="c44090a8">W09-3106</url>
       <bibkey>han-etal-2009-train</bibkey>
@@ -4615,15 +4615,15 @@
       <title>Compilation of Specialized Comparable Corpora in <fixed-case>F</fixed-case>rench and <fixed-case>J</fixed-case>apanese</title>
       <author><first>Lorraine</first><last>Goeuriot</last></author>
       <author><first>Emmanuel</first><last>Morin</last></author>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <pages>55–63</pages>
       <url hash="9669d3c7">W09-3110</url>
       <bibkey>goeuriot-etal-2009-compilation</bibkey>
     </paper>
     <paper id="11">
       <title>Toward Categorization of Sign Language Corpora</title>
-      <author><first>Jérémie</first><last>Segouat</last></author>
-      <author><first>Annelies</first><last>Braffort</last></author>
+      <author id="jeremie-segouat"><first>Jérémie</first><last>Segouat</last></author>
+      <author id="annelies-braffort"><first>Annelies</first><last>Braffort</last></author>
       <pages>64–67</pages>
       <url hash="24eb67da">W09-3111</url>
       <bibkey>segouat-braffort-2009-toward</bibkey>
@@ -4649,7 +4649,7 @@
     </frontmatter>
     <paper id="1">
       <title>Social (distributed) language modeling, clustering and dialectometry</title>
-      <author><first>David</first><last>Ellis</last></author>
+      <author id="david-ellis"><first>David</first><last>Ellis</last></author>
       <pages>1–4</pages>
       <url hash="44b74c9e">W09-3201</url>
       <bibkey>ellis-2009-social</bibkey>
@@ -4659,7 +4659,7 @@
       <author><first>Sitabhra</first><last>Sinha</last></author>
       <author><first>Raj Kumar</first><last>Pan</last></author>
       <author><first>Nisha</first><last>Yadav</last></author>
-      <author><first>Mayank</first><last>Vahia</last></author>
+      <author id="mayank-n-vahia"><first>Mayank</first><last>Vahia</last></author>
       <author><first>Iravatham</first><last>Mahadevan</last></author>
       <pages>5–13</pages>
       <url hash="15187326">W09-3202</url>
@@ -4676,8 +4676,8 @@
     <paper id="4">
       <title>Random Walks for Text Semantic Similarity</title>
       <author><first>Daniel</first><last>Ramage</last></author>
-      <author><first>Anna N.</first><last>Rafferty</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="anna-n-rafferty"><first>Anna N.</first><last>Rafferty</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>23–31</pages>
       <url hash="c04476e6">W09-3204</url>
       <bibkey>ramage-etal-2009-random</bibkey>
@@ -4694,18 +4694,18 @@
       <title><fixed-case>W</fixed-case>iki<fixed-case>W</fixed-case>alk: Random walks on <fixed-case>W</fixed-case>ikipedia for Semantic Relatedness</title>
       <author><first>Eric</first><last>Yeh</last></author>
       <author><first>Daniel</first><last>Ramage</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>Aitor</first><last>Soroa</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
+      <author id="aitor-soroa"><first>Aitor</first><last>Soroa</last></author>
       <pages>41–49</pages>
       <url hash="7dffc4c9">W09-3206</url>
       <bibkey>yeh-etal-2009-wikiwalk</bibkey>
     </paper>
     <paper id="7">
       <title>Measuring semantic relatedness with vector space models and random walks</title>
-      <author><first>Amaç</first><last>Herdağdelen</last></author>
+      <author id="amac-herdagdelen"><first>Amaç</first><last>Herdağdelen</last></author>
       <author><first>Katrin</first><last>Erk</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <pages>50–53</pages>
       <url hash="70d1624c">W09-3207</url>
       <bibkey>herdagdelen-etal-2009-measuring</bibkey>
@@ -4731,7 +4731,7 @@
       <author><first>Swapna</first><last>Somasundaran</last></author>
       <author><first>Galileo</first><last>Namata</last></author>
       <author><first>Lise</first><last>Getoor</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <pages>66–74</pages>
       <url hash="32ca6c42">W09-3210</url>
       <bibkey>somasundaran-etal-2009-opinion</bibkey>
@@ -4771,7 +4771,7 @@
     <paper id="1">
       <title>A Novel Approach to Automatic Gazetteer Generation using <fixed-case>W</fixed-case>ikipedia</title>
       <author><first>Ziqi</first><last>Zhang</last></author>
-      <author><first>José</first><last>Iria</last></author>
+      <author id="jose-iria"><first>José</first><last>Iria</last></author>
       <pages>1–9</pages>
       <url hash="9403f656">W09-3301</url>
       <bibkey>zhang-iria-2009-novel</bibkey>
@@ -4782,7 +4782,7 @@
       <author><first>Nicky</first><last>Ringland</last></author>
       <author><first>Joel</first><last>Nothman</last></author>
       <author><first>Tara</first><last>Murphy</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <pages>10–18</pages>
       <url hash="2b0f85ec">W09-3302</url>
       <bibkey>balasuriya-etal-2009-named</bibkey>
@@ -4792,8 +4792,8 @@
       <author><first>Emmanuel</first><last>Navarro</last></author>
       <author><first>Franck</first><last>Sajous</last></author>
       <author><first>Bruno</first><last>Gaume</last></author>
-      <author><first>Laurent</first><last>Prévot</last></author>
-      <author><first>ShuKai</first><last>Hsieh</last></author>
+      <author id="laurent-prevot"><first>Laurent</first><last>Prévot</last></author>
+      <author id="shu-kai-hsieh"><first>ShuKai</first><last>Hsieh</last></author>
       <author><first>Ivy</first><last>Kuo</last></author>
       <author><first>Pierre</first><last>Magistry</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
@@ -4805,7 +4805,7 @@
       <title>Using the <fixed-case>W</fixed-case>iktionary Graph Structure for Synonym Detection</title>
       <author><first>Timothy</first><last>Weale</last></author>
       <author><first>Chris</first><last>Brew</last></author>
-      <author><first>Eric</first><last>Fosler-Lussier</last></author>
+      <author id="eric-fosler-lussier"><first>Eric</first><last>Fosler-Lussier</last></author>
       <pages>28–31</pages>
       <url hash="dd8619c8">W09-3304</url>
       <bibkey>weale-etal-2009-using</bibkey>
@@ -4822,7 +4822,7 @@
       <title>Evaluating a Statistical <fixed-case>CCG</fixed-case> Parser on <fixed-case>W</fixed-case>ikipedia</title>
       <author><first>Matthew</first><last>Honnibal</last></author>
       <author><first>Joel</first><last>Nothman</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <pages>38–41</pages>
       <url hash="469cf71d">W09-3306</url>
       <bibkey>honnibal-etal-2009-evaluating</bibkey>
@@ -4839,7 +4839,7 @@
       <title>Acquiring High Quality Non-Expert Knowledge from On-Demand Workforce</title>
       <author><first>Donghui</first><last>Feng</last></author>
       <author><first>Sveva</first><last>Besana</last></author>
-      <author><first>Remi</first><last>Zajac</last></author>
+      <author id="remi-zajac"><first>Remi</first><last>Zajac</last></author>
       <pages>51–56</pages>
       <url hash="5fccdfc9">W09-3308</url>
       <bibkey>feng-etal-2009-acquiring</bibkey>
@@ -4847,8 +4847,8 @@
     <paper id="9">
       <title>Constructing an Anaphorically Annotated Corpus with Non-Experts: Assessing the Quality of Collaborative Annotations</title>
       <author><first>Jon</first><last>Chamberlain</last></author>
-      <author><first>Udo</first><last>Kruschwitz</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="udo-kruschwitz"><first>Udo</first><last>Kruschwitz</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>57–62</pages>
       <url hash="540a39d8">W09-3309</url>
       <bibkey>chamberlain-etal-2009-constructing</bibkey>
@@ -4858,7 +4858,7 @@
     <meta>
       <booktitle>Proceedings of the 7th Workshop on <fixed-case>A</fixed-case>sian Language Resources (<fixed-case>ALR</fixed-case>7)</booktitle>
       <url hash="04251ddc">W09-34</url>
-      <editor><first>Hammam</first><last>Riza</last></editor>
+      <editor id="hammam-riza"><first>Hammam</first><last>Riza</last></editor>
       <editor><first>Virach</first><last>Sornlertlamvanich</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Suntec, Singapore</address>
@@ -4884,10 +4884,10 @@
     </paper>
     <paper id="2">
       <title>An Empirical Study of <fixed-case>V</fixed-case>ietnamese Noun Phrase Chunking with Discriminative Sequence Models</title>
-      <author><first>Le Minh</first><last>Nguyen</last></author>
+      <author id="minh-le-nguyen"><first>Le Minh</first><last>Nguyen</last></author>
       <author><first>Huong Thao</first><last>Nguyen</last></author>
-      <author><first>Phuong Thai</first><last>Nguyen</last></author>
-      <author><first>Tu Bao</first><last>Ho</last></author>
+      <author id="phuong-thai-nguyen"><first>Phuong Thai</first><last>Nguyen</last></author>
+      <author id="tu-bao-ho"><first>Tu Bao</first><last>Ho</last></author>
       <author><first>Akira</first><last>Shimazu</last></author>
       <pages>9–16</pages>
       <url hash="fcf8c8f0">W09-3402</url>
@@ -4916,7 +4916,7 @@
       <author><first>Kiyonori</first><last>Ohtake</last></author>
       <author><first>Teruhisa</first><last>Misu</last></author>
       <author><first>Chiori</first><last>Hori</last></author>
-      <author><first>Hideki</first><last>Kashioka</last></author>
+      <author id="hideki-kashioka"><first>Hideki</first><last>Kashioka</last></author>
       <author><first>Satoshi</first><last>Nakamura</last></author>
       <pages>32–39</pages>
       <url hash="2a7d1974">W09-3405</url>
@@ -4933,7 +4933,7 @@
     </paper>
     <paper id="7">
       <title>Automated Mining Of Names Using Parallel <fixed-case>H</fixed-case>indi-<fixed-case>E</fixed-case>nglish Corpus</title>
-      <author><first>R. Mahesh K.</first><last>Sinha</last></author>
+      <author id="r-mahesh-k-sinha"><first>R. Mahesh K.</first><last>Sinha</last></author>
       <pages>48–54</pages>
       <url hash="3cf50b41">W09-3407</url>
       <bibkey>sinha-2009-automated</bibkey>
@@ -4942,15 +4942,15 @@
       <title>Basic Language Resources for Diverse <fixed-case>A</fixed-case>sian Languages: A Streamlined Approach for Resource Creation</title>
       <author><first>Heather</first><last>Simpson</last></author>
       <author><first>Kazuaki</first><last>Maeda</last></author>
-      <author><first>Christopher</first><last>Cieri</last></author>
+      <author id="christopher-cieri"><first>Christopher</first><last>Cieri</last></author>
       <pages>55–62</pages>
       <url hash="4cd4b641">W09-3408</url>
       <bibkey>simpson-etal-2009-basic</bibkey>
     </paper>
     <paper id="9">
       <title>Finite-State Description of <fixed-case>V</fixed-case>ietnamese Reduplication</title>
-      <author><last>Le Hong</last><first>Phuong</first></author>
-      <author><last>Nguyen</last><first>Thi Minh Huyen</first></author>
+      <author id="phuong-le-hong"><first>Phuong</first><last>Le Hong</last></author>
+      <author id="thi-minh-huyen-nguyen"><first>Thi Minh Huyen</first><last>Nguyen</last></author>
       <author><last>Roussanaly</last><first>Azim</first></author>
       <pages>63–69</pages>
       <url hash="aa176e56">W09-3409</url>
@@ -4969,7 +4969,7 @@
       <title><fixed-case>B</fixed-case>engali Verb Subcategorization Frame Acquisition - A Baseline Model</title>
       <author><first>Somnath</first><last>Banerjee</last></author>
       <author><first>Dipankar</first><last>Das</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>76–83</pages>
       <url hash="41b6d74e">W09-3411</url>
       <bibkey>banerjee-etal-2009-bengali</bibkey>
@@ -4978,7 +4978,7 @@
       <title>Phonological and Logographic Influences on Errors in Written <fixed-case>C</fixed-case>hinese Words</title>
       <author><first>Chao-Lin</first><last>Liu</last></author>
       <author><first>Kan-Wen</first><last>Tien</last></author>
-      <author><first>Min-Hua</first><last>Lai</last></author>
+      <author id="min-hua-lai"><first>Min-Hua</first><last>Lai</last></author>
       <author><first>Yi-Hsuan</first><last>Chuang</last></author>
       <author><first>Shih-Hung</first><last>Wu</last></author>
       <pages>84–91</pages>
@@ -4997,7 +4997,7 @@
     <paper id="14">
       <title>A Syntactic Resource for <fixed-case>T</fixed-case>hai: <fixed-case>CG</fixed-case> Treebank</title>
       <author><first>Taneth</first><last>Ruangrajitpakorn</last></author>
-      <author><first>Kanokorn</first><last>Trakultaweekoon</last></author>
+      <author id="kanokorn-trakultaweekoon"><first>Kanokorn</first><last>Trakultaweekoon</last></author>
       <author><first>Thepchai</first><last>Supnithi</last></author>
       <pages>96–102</pages>
       <url hash="4f198cf8">W09-3414</url>
@@ -5006,7 +5006,7 @@
     <paper id="15">
       <title>Part of Speech Tagging for <fixed-case>M</fixed-case>ongolian Corpus</title>
       <author><first>Purev</first><last>Jaimai</last></author>
-      <author><first>Odbayar</first><last>Chimeddorj</last></author>
+      <author id="odbayar-chimeddorj"><first>Odbayar</first><last>Chimeddorj</last></author>
       <pages>103–106</pages>
       <url hash="f746bb98">W09-3415</url>
       <bibkey>jaimai-chimeddorj-2009-part</bibkey>
@@ -5033,7 +5033,7 @@
     <paper id="18">
       <title><fixed-case>CWN</fixed-case>-<fixed-case>LMF</fixed-case>: <fixed-case>C</fixed-case>hinese <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et in the <fixed-case>L</fixed-case>exical <fixed-case>M</fixed-case>arkup <fixed-case>F</fixed-case>ramework</title>
       <author><first>Lung-Hao</first><last>Lee</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <pages>123–130</pages>
       <url hash="fddf7bbf">W09-3418</url>
@@ -5041,9 +5041,9 @@
     </paper>
     <paper id="19">
       <title><fixed-case>P</fixed-case>hilippine Language Resources: Trends and Directions</title>
-      <author><first>Rachel Edita</first><last>Roxas</last></author>
+      <author id="rachel-edita-roxas"><first>Rachel Edita</first><last>Roxas</last></author>
       <author><first>Charibeth</first><last>Cheng</last></author>
-      <author><first>Nathalie Rose</first><last>Lim</last></author>
+      <author id="nathalie-rose-lim"><first>Nathalie Rose</first><last>Lim</last></author>
       <pages>131–138</pages>
       <url hash="9f4d72b6">W09-3419</url>
       <bibkey>roxas-etal-2009-philippine</bibkey>
@@ -5065,14 +5065,14 @@
       <title>Query Expansion using <fixed-case>LMF</fixed-case>-Compliant Lexical Resources</title>
       <author><first>Takenobu</first><last>Tokunaga</last></author>
       <author><first>Dain</first><last>Kaplan</last></author>
-      <author><first>Nicoletta</first><last>Calzolari</last></author>
+      <author id="nicoletta-calzolari"><first>Nicoletta</first><last>Calzolari</last></author>
       <author><first>Monica</first><last>Monachini</last></author>
       <author><first>Claudia</first><last>Soria</last></author>
       <author><first>Virach</first><last>Sornlertlamvanich</last></author>
       <author><first>Thatsanee</first><last>Charoenporn</last></author>
-      <author><first>Yingju</first><last>Xia</last></author>
+      <author id="yingju-xia"><first>Yingju</first><last>Xia</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <author><first>Kiyoaki</first><last>Shirai</last></author>
       <pages>145–152</pages>
       <url hash="a5605c04">W09-3421</url>
@@ -5089,7 +5089,7 @@
     </paper>
     <paper id="23">
       <title>The <fixed-case>FL</fixed-case>a<fixed-case>R</fixed-case>e<fixed-case>N</fixed-case>et Thematic Network: A Global Forum for Cooperation</title>
-      <author><first>Nicoletta</first><last>Calzolari</last></author>
+      <author id="nicoletta-calzolari"><first>Nicoletta</first><last>Calzolari</last></author>
       <author><first>Claudia</first><last>Soria</last></author>
       <pages>161–164</pages>
       <url hash="c7db9fd6">W09-3423</url>
@@ -5111,7 +5111,7 @@
     </paper>
     <paper id="26">
       <title>Word Segmentation Standard in <fixed-case>C</fixed-case>hinese, <fixed-case>J</fixed-case>apanese and <fixed-case>K</fixed-case>orean</title>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <author><first>Hitoshi</first><last>Isahara</last></author>
       <author><first>Kyoko</first><last>Kanzaki</last></author>
       <author><first>Hansaem</first><last>Kim</last></author>
@@ -5127,7 +5127,7 @@
       <booktitle>Proceedings of the 2009 Named Entities Workshop: Shared Task on Transliteration (<fixed-case>NEWS</fixed-case> 2009)</booktitle>
       <url hash="f959b2e9">W09-35</url>
       <editor><first>Haizhou</first><last>Li</last></editor>
-      <editor><first>A</first><last>Kumaran</last></editor>
+      <editor id="a-kumaran"><first>A</first><last>Kumaran</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Suntec, Singapore</address>
       <month>August</month>
@@ -5179,14 +5179,14 @@
     <paper id="5">
       <title>Named Entity Transcription with Pair n-Gram Models</title>
       <author><first>Martin</first><last>Jansche</last></author>
-      <author><first>Richard</first><last>Sproat</last></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last></author>
       <pages>32–35</pages>
       <url hash="fb4b9447">W09-3505</url>
       <bibkey>jansche-sproat-2009-named</bibkey>
     </paper>
     <paper id="6">
       <title>Machine Transliteration using Target-Language Grapheme and Phoneme: Multi-engine Transliteration Approach</title>
-      <author><first>Jong-Hoon</first><last>Oh</last></author>
+      <author id="jong-hoon-oh"><first>Jong-Hoon</first><last>Oh</last></author>
       <author><first>Kiyotaka</first><last>Uchimoto</last></author>
       <author><first>Kentaro</first><last>Torisawa</last></author>
       <pages>36–39</pages>
@@ -5195,8 +5195,8 @@
     </paper>
     <paper id="7">
       <title>A Language-Independent Transliteration Schema Using Character Aligned Models at <fixed-case>NEWS</fixed-case> 2009</title>
-      <author><first>Praneeth</first><last>Shishtla</last></author>
-      <author><first>Surya Ganesh</first><last>V</last></author>
+      <author id="praneeth-m-shishtla"><first>Praneeth</first><last>Shishtla</last></author>
+      <author id="surya-ganesh"><first>Surya Ganesh</first><last>V</last></author>
       <author><first>Sethuramalingam</first><last>Subramaniam</last></author>
       <author><first>Vasudeva</first><last>Varma</last></author>
       <pages>40–43</pages>
@@ -5205,8 +5205,8 @@
     </paper>
     <paper id="8">
       <title>Experiences with <fixed-case>E</fixed-case>nglish-<fixed-case>H</fixed-case>indi, <fixed-case>E</fixed-case>nglish-<fixed-case>T</fixed-case>amil and <fixed-case>E</fixed-case>nglish-<fixed-case>K</fixed-case>annada Transliteration Tasks at <fixed-case>NEWS</fixed-case> 2009</title>
-      <author><first>Manoj Kumar</first><last>Chinnakotla</last></author>
-      <author><first>Om P.</first><last>Damani</last></author>
+      <author id="manoj-chinnakotla"><first>Manoj Kumar</first><last>Chinnakotla</last></author>
+      <author id="om-p-damani"><first>Om P.</first><last>Damani</last></author>
       <pages>44–47</pages>
       <url hash="790ca263">W09-3508</url>
       <bibkey>chinnakotla-damani-2009-experiences</bibkey>
@@ -5221,7 +5221,7 @@
     <paper id="10">
       <title>Transliteration by Bidirectional Statistical Machine Translation</title>
       <author><first>Andrew</first><last>Finch</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>52–56</pages>
       <url hash="29caf2b4">W09-3510</url>
       <bibkey>finch-sumita-2009-transliteration</bibkey>
@@ -5229,7 +5229,7 @@
     <paper id="11">
       <title>Transliteration of Name Entity via Improved Statistical Translation on Character Sequences</title>
       <author><first>Yan</first><last>Song</last></author>
-      <author><first>Chunyu</first><last>Kit</last></author>
+      <author id="chunyu-kit"><first>Chunyu</first><last>Kit</last></author>
       <author><first>Xiao</first><last>Chen</last></author>
       <pages>57–60</pages>
       <url hash="1c54c4a5">W09-3511</url>
@@ -5261,19 +5261,19 @@
     </paper>
     <paper id="15">
       <title>Combining a Two-step Conditional Random Field Model and a Joint Source Channel Model for Machine Transliteration</title>
-      <author><first>Dong</first><last>Yang</last></author>
-      <author><first>Paul</first><last>Dixon</last></author>
-      <author><first>Yi-Cheng</first><last>Pan</last></author>
+      <author id="dong-yang"><first>Dong</first><last>Yang</last></author>
+      <author id="paul-dixon"><first>Paul</first><last>Dixon</last></author>
+      <author id="yi-cheng-pan"><first>Yi-Cheng</first><last>Pan</last></author>
       <author><first>Tasuku</first><last>Oonishi</last></author>
       <author><first>Masanobu</first><last>Nakamura</last></author>
-      <author><first>Sadaoki</first><last>Furui</last></author>
+      <author id="sadaoki-furui"><first>Sadaoki</first><last>Furui</last></author>
       <pages>72–75</pages>
       <url hash="c0966631">W09-3515</url>
       <bibkey>yang-etal-2009-combining</bibkey>
     </paper>
     <paper id="16">
       <title>Phonological Context Approximation and Homophone Treatment for <fixed-case>NEWS</fixed-case> 2009 <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>hinese Transliteration Shared Task</title>
-      <author><first>Oi Yee</first><last>Kwong</last></author>
+      <author id="olivia-o-y-kwong"><first>Oi Yee</first><last>Kwong</last></author>
       <pages>76–79</pages>
       <url hash="889ed330">W09-3516</url>
       <bibkey>kwong-2009-phonological</bibkey>
@@ -5283,15 +5283,15 @@
       <author><first>Amitava</first><last>Das</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
       <author><first>Tapabrata</first><last>Mondal</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>80–83</pages>
       <url hash="ef265e46">W09-3517</url>
       <bibkey>das-etal-2009-english</bibkey>
     </paper>
     <paper id="18">
       <title>Improving Transliteration Accuracy Using Word-Origin Detection and Lexicon Lookup</title>
-      <author><first>Mitesh</first><last>Khapra</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh</first><last>Khapra</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>84–87</pages>
       <url hash="28b21f49">W09-3518</url>
       <bibkey>khapra-bhattacharyya-2009-improving</bibkey>
@@ -5332,9 +5332,9 @@
     <paper id="23">
       <title><fixed-case>E</fixed-case>nglish-<fixed-case>H</fixed-case>indi Transliteration Using Context-Informed <fixed-case>PB</fixed-case>-<fixed-case>SMT</fixed-case>: the <fixed-case>DCU</fixed-case> System for <fixed-case>NEWS</fixed-case> 2009</title>
       <author><first>Rejwanul</first><last>Haque</last></author>
-      <author><first>Sandipan</first><last>Dandapat</last></author>
-      <author><first>Ankit Kumar</first><last>Srivastava</last></author>
-      <author><first>Sudip Kumar</first><last>Naskar</last></author>
+      <author id="sandipan-dandapat"><first>Sandipan</first><last>Dandapat</last></author>
+      <author id="ankit-srivastava"><first>Ankit Kumar</first><last>Srivastava</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <pages>104–107</pages>
       <url hash="584ee40b">W09-3523</url>
@@ -5345,7 +5345,7 @@
       <author><first>Gumwon</first><last>Hong</last></author>
       <author><first>Min-Jeong</first><last>Kim</last></author>
       <author><first>Do-Gil</first><last>Lee</last></author>
-      <author><first>Hae-Chang</first><last>Rim</last></author>
+      <author id="hae-chang-rim"><first>Hae-Chang</first><last>Rim</last></author>
       <pages>108–111</pages>
       <url hash="76841998">W09-3524</url>
       <bibkey>hong-etal-2009-hybrid</bibkey>
@@ -5389,7 +5389,7 @@
     </paper>
     <paper id="30">
       <title>Name Transliteration with Bidirectional Perceptron Edit Models</title>
-      <author><first>Dayne</first><last>Freitag</last></author>
+      <author id="dayne-freitag"><first>Dayne</first><last>Freitag</last></author>
       <author><first>Zhiqiang</first><last>Wang</last></author>
       <pages>132–135</pages>
       <url hash="56ddf673">W09-3530</url>
@@ -5398,7 +5398,7 @@
     <paper id="31">
       <title>Bridging Languages by <fixed-case>S</fixed-case>uper<fixed-case>S</fixed-case>ense Entity Tagging</title>
       <author><first>Davide</first><last>Picca</last></author>
-      <author><first>Alfio Massimiliano</first><last>Gliozzo</last></author>
+      <author id="alfio-gliozzo"><first>Alfio Massimiliano</first><last>Gliozzo</last></author>
       <author><first>Simone</first><last>Campora</last></author>
       <pages>136–142</pages>
       <url hash="12575bc6">W09-3531</url>
@@ -5416,10 +5416,10 @@
     </paper>
     <paper id="33">
       <title>Name Matching between <fixed-case>R</fixed-case>oman and <fixed-case>C</fixed-case>hinese Scripts: Machine Complements Human</title>
-      <author><first>Ken</first><last>Samuel</last></author>
+      <author id="ken-samuel"><first>Ken</first><last>Samuel</last></author>
       <author><first>Alan</first><last>Rubenstein</last></author>
-      <author><first>Sherri</first><last>Condon</last></author>
-      <author><first>Alex</first><last>Yeh</last></author>
+      <author id="sherri-condon"><first>Sherri</first><last>Condon</last></author>
+      <author id="alexander-yeh"><first>Alex</first><last>Yeh</last></author>
       <pages>152–160</pages>
       <url hash="0931da3a">W09-3533</url>
       <bibkey>samuel-etal-2009-name</bibkey>
@@ -5428,7 +5428,7 @@
       <title>Analysis and Robust Extraction of Changing Named Entities</title>
       <author><first>Masatoshi</first><last>Tsuchiya</last></author>
       <author><first>Shoko</first><last>Endo</last></author>
-      <author><first>Seiichi</first><last>Nakagawa</last></author>
+      <author id="seiichi-nakagawa"><first>Seiichi</first><last>Nakagawa</last></author>
       <pages>161–167</pages>
       <url hash="3884408e">W09-3534</url>
       <bibkey>tsuchiya-etal-2009-analysis</bibkey>
@@ -5444,24 +5444,24 @@
     <paper id="36">
       <title>A Hybrid Model for <fixed-case>U</fixed-case>rdu <fixed-case>H</fixed-case>indi Transliteration</title>
       <author><first>Abbas</first><last>Malik</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
-      <author><first>Christian</first><last>Boitet</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>177–185</pages>
       <url hash="7a24b60b">W09-3536</url>
       <bibkey>malik-etal-2009-hybrid</bibkey>
     </paper>
     <paper id="37">
       <title>Graphemic Approximation of Phonological Context for <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>hinese Transliteration</title>
-      <author><first>Oi Yee</first><last>Kwong</last></author>
+      <author id="olivia-o-y-kwong"><first>Oi Yee</first><last>Kwong</last></author>
       <pages>186–193</pages>
       <url hash="49d5a57e">W09-3537</url>
       <bibkey>kwong-2009-graphemic</bibkey>
     </paper>
     <paper id="38">
       <title><fixed-case>C</fixed-case>zech Named Entity Corpus and <fixed-case>SVM</fixed-case>-based Recognizer</title>
-      <author><first>Jana</first><last>Kravalová</last></author>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="jana-kravalova"><first>Jana</first><last>Kravalová</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
       <pages>194–201</pages>
       <url hash="1aa82f28">W09-3538</url>
       <bibkey>kravalova-zabokrtsky-2009-czech</bibkey>
@@ -5469,7 +5469,7 @@
     <paper id="39">
       <title>Voted <fixed-case>NER</fixed-case> System using Appropriate Unlabeled Data</title>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>202–210</pages>
       <url hash="27770438">W09-3539</url>
       <bibkey>ekbal-bandyopadhyay-2009-voted</bibkey>
@@ -5493,8 +5493,8 @@
     </frontmatter>
     <paper id="1">
       <title>Researcher affiliation extraction from homepages</title>
-      <author><first>István</first><last>Nagy</last></author>
-      <author><first>Richárd</first><last>Farkas</last></author>
+      <author id="istvan-nagy-t"><first>István</first><last>Nagy</last></author>
+      <author id="richard-farkas"><first>Richárd</first><last>Farkas</last></author>
       <author><first>Márk</first><last>Jelasity</last></author>
       <pages>1–9</pages>
       <url hash="15f9d789">W09-3601</url>
@@ -5515,7 +5515,7 @@
       <title>Accurate Argumentative Zoning with Maximum Entropy models</title>
       <author><first>Stephen</first><last>Merity</last></author>
       <author><first>Tara</first><last>Murphy</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <pages>19–26</pages>
       <url hash="f5fc83b0">W09-3603</url>
       <bibkey>merity-etal-2009-accurate</bibkey>
@@ -5530,7 +5530,7 @@
     </paper>
     <paper id="5">
       <title>Detecting key sentences for automatic assistance in peer reviewing research articles in educational sciences</title>
-      <author><first>Ágnes</first><last>Sándor</last></author>
+      <author id="agnes-sandor"><first>Ágnes</first><last>Sándor</last></author>
       <author><first>Angela</first><last>Vorndran</last></author>
       <pages>36–44</pages>
       <url hash="80fc11c8">W09-3605</url>
@@ -5539,7 +5539,7 @@
     <paper id="6">
       <title>Designing a Citation-Sensitive Research Tool: An Initial Study of Browsing-Specific Information Needs</title>
       <author><first>Stephen</first><last>Wan</last></author>
-      <author><first>Cécile</first><last>Paris</last></author>
+      <author id="cecile-paris"><first>Cécile</first><last>Paris</last></author>
       <author><first>Michael</first><last>Muthukrishna</last></author>
       <author><first>Robert</first><last>Dale</last></author>
       <pages>45–53</pages>
@@ -5548,8 +5548,8 @@
     </paper>
     <paper id="7">
       <title>The <fixed-case>ACL</fixed-case> <fixed-case>A</fixed-case>nthology Network Corpus</title>
-      <author><first>Dragomir R.</first><last>Radev</last></author>
-      <author><first>Pradeep</first><last>Muthukrishnan</last></author>
+      <author id="dragomir-radev"><first>Dragomir R.</first><last>Radev</last></author>
+      <author id="pradeep-muthukrishnan"><first>Pradeep</first><last>Muthukrishnan</last></author>
       <author><first>Vahed</first><last>Qazvinian</last></author>
       <pages>54–61</pages>
       <url hash="b54bbe68">W09-3607</url>
@@ -5557,7 +5557,7 @@
     </paper>
     <paper id="8">
       <title><fixed-case>NLP</fixed-case> Support for Faceted Navigation in Scholarly Collection</title>
-      <author><first>Marti A.</first><last>Hearst</last></author>
+      <author id="marti-a-hearst"><first>Marti A.</first><last>Hearst</last></author>
       <author><first>Emilia</first><last>Stoica</last></author>
       <pages>62–70</pages>
       <url hash="d5d3a967">W09-3608</url>
@@ -5576,7 +5576,7 @@
       <title>Citations in the Digital Library of Classics: Extracting Canonical References by Using Conditional Random Fields</title>
       <author><first>Matteo</first><last>Romanello</last></author>
       <author><first>Federico</first><last>Boschetti</last></author>
-      <author><first>Gregory</first><last>Crane</last></author>
+      <author id="gregory-crane"><first>Gregory</first><last>Crane</last></author>
       <pages>80–87</pages>
       <url hash="ae47e1c6">W09-3610</url>
       <bibkey>romanello-etal-2009-citations</bibkey>
@@ -5594,7 +5594,7 @@
   <volume id="37" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Eight International Conference on Computational Semantics</booktitle>
-      <editor><first>Harry</first><last>Bunt</last></editor>
+      <editor id="harry-bunt"><first>Harry</first><last>Bunt</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Tilburg, The Netherlands</address>
       <month>January</month>
@@ -5614,14 +5614,14 @@
     </paper>
     <paper id="2">
       <title>Knowing a word (sense) by its company</title>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>2</pages>
       <url hash="73a6c49a">W09-3702</url>
       <bibkey>palmer-2009-knowing</bibkey>
     </paper>
     <paper id="3">
       <title>Play your way to an annotated corpus: Games with a purpose and anaphoric annotation</title>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>3</pages>
       <url hash="d8f9d31f">W09-3703</url>
       <bibkey>poesio-2009-play</bibkey>
@@ -5629,7 +5629,7 @@
     <paper id="4">
       <title>A computational account of comparative implicatures for a spoken dialogue agent</title>
       <author><first>Luciana</first><last>Benotti</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <pages>4–17</pages>
       <url hash="b76708b9">W09-3704</url>
       <bibkey>benotti-traum-2009-computational</bibkey>
@@ -5666,14 +5666,14 @@
     <paper id="9">
       <title>A Formal Model for Procedural Texts and its Use in Textual Integration</title>
       <author><first>Isabelle</first><last>Dautriche</last></author>
-      <author><first>Patrick</first><last>Saint-Dizier</last></author>
+      <author id="patrick-saint-dizier"><first>Patrick</first><last>Saint-Dizier</last></author>
       <pages>73–89</pages>
       <url hash="410ef6f1">W09-3709</url>
       <bibkey>dautriche-saint-dizier-2009-formal</bibkey>
     </paper>
     <paper id="10">
       <title>Inference Rules for Recognizing Textual Entailment</title>
-      <author><first>Georgiana</first><last>Dinu</last></author>
+      <author id="georgiana-dinu"><first>Georgiana</first><last>Dinu</last></author>
       <author><first>Rui</first><last>Wang</last></author>
       <pages>90–103</pages>
       <url hash="3120d25f">W09-3710</url>
@@ -5708,7 +5708,7 @@
     <paper id="14">
       <title>An extended model of natural logic</title>
       <author><first>Bill</first><last>MacCartney</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>140–156</pages>
       <url hash="dd12915d">W09-3714</url>
       <bibkey>maccartney-manning-2009-extended</bibkey>
@@ -5723,8 +5723,8 @@
     </paper>
     <paper id="16">
       <title><fixed-case>GLML</fixed-case>: Annotating Argument Selection and Coercion</title>
-      <author><first>James</first><last>Pustejovsky</last></author>
-      <author><first>Jessica</first><last>Moszkowicz</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
+      <author id="jessica-moszkowicz"><first>Jessica</first><last>Moszkowicz</last></author>
       <author><first>Olga</first><last>Batiukova</last></author>
       <author><first>Anna</first><last>Rumshisky</last></author>
       <pages>169–180</pages>
@@ -5768,7 +5768,7 @@
       <title>An Ordering of Terms Based on Semantic Relatedness</title>
       <author><first>Peter</first><last>Wittek</last></author>
       <author><first>Sándor</first><last>Darányi</last></author>
-      <author><first>Chew Lim</first><last>Tan</last></author>
+      <author id="chew-lim-tan"><first>Chew Lim</first><last>Tan</last></author>
       <pages>235–247</pages>
       <url hash="489153fc">W09-3721</url>
       <bibkey>wittek-etal-2009-ordering</bibkey>
@@ -5810,15 +5810,15 @@
     </paper>
     <paper id="26">
       <title>Flexible Semantic Composition with <fixed-case>DUDES</fixed-case> (short paper)</title>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <pages>272–276</pages>
       <url hash="3f86590c">W09-3726</url>
       <bibkey>cimiano-2009-flexible</bibkey>
     </paper>
     <paper id="27">
       <title>Computing Implicit Entities and Events with Getaruns (short paper)</title>
-      <author><first>Rodolfo</first><last>Delmonte</last></author>
-      <author><first>Emanuele</first><last>Pianta</last></author>
+      <author id="rodolfo-delmonte"><first>Rodolfo</first><last>Delmonte</last></author>
+      <author id="emanuele-pianta"><first>Emanuele</first><last>Pianta</last></author>
       <pages>277–281</pages>
       <url hash="e43b14cf">W09-3727</url>
       <bibkey>delmonte-pianta-2009-computing</bibkey>
@@ -5827,7 +5827,7 @@
       <title>Comparing Alternative Data-Driven Ontological Vistas of Natural History (short paper)</title>
       <author><first>Marieke</first><last>van Erp</last></author>
       <author><first>Piroska</first><last>Lendvai</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <pages>282–285</pages>
       <url hash="ea31ed30">W09-3728</url>
       <bibkey>van-erp-etal-2009-comparing</bibkey>
@@ -5855,7 +5855,7 @@
     </paper>
     <paper id="32">
       <title>A Study of a Segmentation Technique for Dialogue Act Assignation (short paper)</title>
-      <author><first>Carlos-D.</first><last>Martínez-Hinarejos</last></author>
+      <author id="carlos-d-martinez-hinarejos"><first>Carlos-D.</first><last>Martínez-Hinarejos</last></author>
       <pages>299–304</pages>
       <url hash="7d2aaa8e">W09-3732</url>
       <bibkey>martinez-hinarejos-2009-study</bibkey>
@@ -5863,7 +5863,7 @@
     <paper id="33">
       <title>Application of Cognitive Strategies to <fixed-case>C</fixed-case>hinese Noun Classifier <fixed-case>E</fixed-case>-learning (short paper)</title>
       <author><first>Wei</first><last>Ni</last></author>
-      <author><first>Helena Hong</first><last>Gao</last></author>
+      <author id="helena-hong-gao"><first>Helena Hong</first><last>Gao</last></author>
       <author><first>Shixiao</first><last>Ouyang</last></author>
       <pages>305–309</pages>
       <url hash="ab858d5e">W09-3733</url>
@@ -5879,7 +5879,7 @@
     <paper id="35">
       <title>Developing a Computer-facilitated Tool for Acquiring Near-synonyms in <fixed-case>C</fixed-case>hinese and <fixed-case>E</fixed-case>nglish (short paper)</title>
       <author><first>Shixiao</first><last>Ouyang</last></author>
-      <author><first>Helena Hong</first><last>Gao</last></author>
+      <author id="helena-hong-gao"><first>Helena Hong</first><last>Gao</last></author>
       <author><first>Soo Ngee</first><last>Koh</last></author>
       <pages>316–319</pages>
       <url hash="edbad94c">W09-3735</url>
@@ -5899,7 +5899,7 @@
       <author><first>Jaime</first><last>Snyder</last></author>
       <author><first>Michael A.</first><last>D’Eredita</last></author>
       <author><first>Ozgur</first><last>Yilmazel</last></author>
-      <author><first>Elizabeth D.</first><last>Liddy</last></author>
+      <author id="elizabeth-d-liddy"><first>Elizabeth D.</first><last>Liddy</last></author>
       <pages>326–332</pages>
       <url hash="e74a7c8c">W09-3737</url>
       <bibkey>snyder-etal-2009-towards</bibkey>
@@ -5914,7 +5914,7 @@
     <paper id="39">
       <title>An Application of Lexical Semantics Annotation to Question-Answering in e-Farming</title>
       <author><first>Mukda</first><last>Suktarachan</last></author>
-      <author><first>Patrick</first><last>Saint-Dizier</last></author>
+      <author id="patrick-saint-dizier"><first>Patrick</first><last>Saint-Dizier</last></author>
       <pages>338–341</pages>
       <url hash="73cf7b92">W09-3739</url>
       <bibkey>suktarachan-saint-dizier-2009-application</bibkey>
@@ -5922,7 +5922,7 @@
     <paper id="40">
       <title>A novel approach to mapping <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et lexical units to <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et synsets (short paper)</title>
       <author><first>Sara</first><last>Tonelli</last></author>
-      <author><first>Emanuele</first><last>Pianta</last></author>
+      <author id="emanuele-pianta"><first>Emanuele</first><last>Pianta</last></author>
       <pages>342–345</pages>
       <url hash="4fc50eed">W09-3740</url>
       <bibkey>tonelli-pianta-2009-novel</bibkey>
@@ -5947,14 +5947,14 @@
     <paper id="43">
       <title>A semantic relatedness metric based on free link structure (short paper)</title>
       <author><first>Sander</first><last>Wubben</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <pages>355–358</pages>
       <url hash="42fce6ea">W09-3743</url>
       <bibkey>wubben-van-den-bosch-2009-semantic</bibkey>
     </paper>
     <paper id="44">
       <title>Semantic Normalisation : a Framework and an Experiment</title>
-      <author><first>Paul</first><last>Bedaride</last></author>
+      <author id="paul-bedaride"><first>Paul</first><last>Bedaride</last></author>
       <author><first>Claire</first><last>Gardent</last></author>
       <pages>359–370</pages>
       <url hash="7b2313e2">W09-3744</url>
@@ -5965,8 +5965,8 @@
     <meta>
       <booktitle>Proceedings of the 11th International Conference on Parsing Technologies (<fixed-case>IWPT</fixed-case>’09)</booktitle>
       <url hash="7809b191">W09-38</url>
-      <editor><first>Harry</first><last>Bunt</last></editor>
-      <editor><first>Éric</first><last>Villemonte de la Clergerie</last></editor>
+      <editor id="harry-bunt"><first>Harry</first><last>Bunt</last></editor>
+      <editor id="eric-villemonte-de-la-clergerie"><first>Éric</first><last>Villemonte de la Clergerie</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Paris, France</address>
       <month>October</month>
@@ -6011,7 +6011,7 @@
     </paper>
     <paper id="5">
       <title>Empirical lower bounds on translation unit error rate for the full class of inversion transduction grammars</title>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <author><first>Dekai</first><last>Wu</last></author>
       <pages>33–36</pages>
       <url hash="31efbece">W09-3805</url>
@@ -6046,7 +6046,7 @@
     </paper>
     <paper id="9">
       <title>Deductive Parsing in Interaction Grammars</title>
-      <author><first>Joseph</first><last>Le Roux</last></author>
+      <author id="joseph-le-roux"><first>Joseph</first><last>Le Roux</last></author>
       <pages>65–68</pages>
       <url hash="5ffb8ca4">W09-3809</url>
       <bibkey>le-roux-2009-deductive</bibkey>
@@ -6071,9 +6071,9 @@
     </paper>
     <paper id="12">
       <title>Two stage constraint based hybrid approach to free word order language dependency parsing</title>
-      <author><first>Akshar</first><last>Bharati</last></author>
+      <author id="akshar-bharati"><first>Akshar</first><last>Bharati</last></author>
       <author><first>Samar</first><last>Husain</last></author>
-      <author><first>Dipti</first><last>Misra</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti</first><last>Misra</last></author>
       <author><first>Rajeev</first><last>Sangal</last></author>
       <pages>77–80</pages>
       <url hash="ecb483e6">W09-3812</url>
@@ -6089,7 +6089,7 @@
     <paper id="14">
       <title>Evaluating Contribution of Deep Syntactic Information to Shallow Semantic Analysis</title>
       <author><first>Sumire</first><last>Uematsu</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>85–88</pages>
       <url hash="e29fc33d">W09-3814</url>
       <bibkey>uematsu-tsujii-2009-evaluating</bibkey>
@@ -6105,7 +6105,7 @@
     <paper id="16">
       <title>Co-Parsing with Competitive Models</title>
       <author><first>Lidia</first><last>Khmylko</last></author>
-      <author><first>Kilian A.</first><last>Foth</last></author>
+      <author id="kilian-a-foth"><first>Kilian A.</first><last>Foth</last></author>
       <author><first>Wolfgang</first><last>Menzel</last></author>
       <pages>99–107</pages>
       <url hash="30c3f7d6">W09-3816</url>
@@ -6123,7 +6123,7 @@
       <title>Constructing parse forests that include exactly the n-best <fixed-case>PCFG</fixed-case> trees</title>
       <author><first>Pierre</first><last>Boullier</last></author>
       <author><first>Alexis</first><last>Nasr</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <pages>117–128</pages>
       <url hash="11b0d19f">W09-3818</url>
       <bibkey>boullier-etal-2009-constructing</bibkey>
@@ -6146,8 +6146,8 @@
     </paper>
     <paper id="21">
       <title>Improving generative statistical parsing with semi-supervised word clustering</title>
-      <author><first>Marie</first><last>Candito</last></author>
-      <author><first>Benoît</first><last>Crabbé</last></author>
+      <author id="marie-candito"><first>Marie</first><last>Candito</last></author>
+      <author id="benoit-crabbe"><first>Benoît</first><last>Crabbé</last></author>
       <pages>138–141</pages>
       <url hash="4b5387c1">W09-3821</url>
       <bibkey>candito-crabbe-2009-improving</bibkey>
@@ -6155,7 +6155,7 @@
     <paper id="22">
       <title>Application of feature propagation to dependency parsing</title>
       <author><first>Kepa</first><last>Bengoetxea</last></author>
-      <author><first>Koldo</first><last>Gojenola</last></author>
+      <author id="koldo-gojenola"><first>Koldo</first><last>Gojenola</last></author>
       <pages>142–145</pages>
       <url hash="33a31a6a">W09-3822</url>
       <bibkey>bengoetxea-gojenola-2009-application</bibkey>
@@ -6163,7 +6163,7 @@
     <paper id="23">
       <title>Guessing the Grammatical Function of a Non-Root <fixed-case>F</fixed-case>-Structure in <fixed-case>LFG</fixed-case></title>
       <author><first>Anton</first><last>Bryl</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <author><first>Yvette</first><last>Graham</last></author>
       <pages>146–149</pages>
       <url hash="1960eab8">W09-3823</url>
@@ -6171,9 +6171,9 @@
     </paper>
     <paper id="24">
       <title>Cross parser evaluation : a <fixed-case>F</fixed-case>rench Treebanks study</title>
-      <author><first>Djamé</first><last>Seddah</last></author>
-      <author><first>Marie</first><last>Candito</last></author>
-      <author><first>Benoît</first><last>Crabbé</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
+      <author id="marie-candito"><first>Marie</first><last>Candito</last></author>
+      <author id="benoit-crabbe"><first>Benoît</first><last>Crabbé</last></author>
       <pages>150–161</pages>
       <url hash="1c2165e3">W09-3824</url>
       <bibkey>seddah-etal-2009-cross</bibkey>
@@ -6205,7 +6205,7 @@
       <title>Effective Analysis of Causes and Inter-dependencies of Parsing Errors</title>
       <author><first>Tadayoshi</first><last>Hara</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>180–191</pages>
       <url hash="9aee967e">W09-3828</url>
       <bibkey>hara-etal-2009-effective</bibkey>
@@ -6213,7 +6213,7 @@
     <paper id="29">
       <title>Clustering Words by Syntactic Similarity improves Dependency Parsing of Predicate-argument Structures</title>
       <author><first>Kenji</first><last>Sagae</last></author>
-      <author><first>Andrew S.</first><last>Gordon</last></author>
+      <author id="andrew-gordon"><first>Andrew S.</first><last>Gordon</last></author>
       <pages>192–201</pages>
       <url hash="5fbba57d">W09-3829</url>
       <bibkey>sagae-gordon-2009-clustering</bibkey>
@@ -6227,7 +6227,7 @@
     </paper>
     <paper id="31">
       <title>Using a maximum entropy-based tagger to improve a very fast vine parser</title>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <author><first>Jonas</first><last>Kuhn</last></author>
       <pages>206–209</pages>
       <url hash="6b6a6feb">W09-3831</url>
@@ -6235,9 +6235,9 @@
     </paper>
     <paper id="32">
       <title><fixed-case>HPSG</fixed-case> Supertagging: A Sequence Labeling View</title>
-      <author><first>Yao-zhong</first><last>Zhang</last></author>
+      <author id="yao-zhong-zhang"><first>Yao-zhong</first><last>Zhang</last></author>
       <author><first>Takuya</first><last>Matsuzaki</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>210–213</pages>
       <url hash="751f1f73">W09-3832</url>
       <bibkey>zhang-etal-2009-hpsg</bibkey>
@@ -6246,7 +6246,7 @@
       <title>Smoothing fine-grained <fixed-case>PCFG</fixed-case> lexicons</title>
       <author><first>Tejaswini</first><last>Deoskar</last></author>
       <author><first>Mats</first><last>Rooth</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <pages>214–217</pages>
       <url hash="7d824ff2">W09-3833</url>
       <bibkey>deoskar-etal-2009-smoothing</bibkey>
@@ -6261,8 +6261,8 @@
     <paper id="35">
       <title>Interactive Predictive Parsing</title>
       <author><first>Ricardo</first><last>Sánchez-Sáez</last></author>
-      <author><first>Joan-Andreu</first><last>Sánchez</last></author>
-      <author><first>José-Miguel</first><last>Benedí</last></author>
+      <author id="joan-andreu-sanchez"><first>Joan-Andreu</first><last>Sánchez</last></author>
+      <author id="jose-miguel-benedi"><first>José-Miguel</first><last>Benedí</last></author>
       <pages>222–225</pages>
       <url hash="6b4db9b4">W09-3835</url>
       <bibkey>sanchez-saez-etal-2009-interactive</bibkey>
@@ -6278,7 +6278,7 @@
     </paper>
     <paper id="37">
       <title>Heuristic search in a cognitive model of human parsing</title>
-      <author><first>John</first><last>Hale</last></author>
+      <author id="john-hale"><first>John</first><last>Hale</last></author>
       <pages>230–233</pages>
       <url hash="a86d9c7a">W09-3837</url>
       <bibkey>hale-2009-heuristic</bibkey>
@@ -6286,7 +6286,7 @@
     <paper id="38">
       <title>Dependency Parsing with Energy-based Reinforcement Learning</title>
       <author><first>Lidan</first><last>Zhang</last></author>
-      <author><first>Kwok Ping</first><last>Chan</last></author>
+      <author id="kwok-ping-chan"><first>Kwok Ping</first><last>Chan</last></author>
       <pages>234–237</pages>
       <url hash="d16761bc">W09-3838</url>
       <bibkey>zhang-chan-2009-dependency</bibkey>
@@ -6302,7 +6302,7 @@
     </paper>
     <paper id="40">
       <title>Dependency Constraints for Lexical Disambiguation</title>
-      <author><first>Guillaume</first><last>Bonfante</last></author>
+      <author id="guillaume-bonfante"><first>Guillaume</first><last>Bonfante</last></author>
       <author><first>Bruno</first><last>Guillaume</last></author>
       <author><first>Mathieu</first><last>Morey</last></author>
       <pages>242–253</pages>
@@ -6312,7 +6312,7 @@
     <paper id="41">
       <title>Parsing Directed Acyclic Graphs with Range Concatenation Grammars</title>
       <author><first>Pierre</first><last>Boullier</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <pages>254–265</pages>
       <url hash="5f728bda">W09-3841</url>
       <bibkey>boullier-sagot-2009-parsing</bibkey>
@@ -6322,10 +6322,10 @@
     <meta>
       <booktitle>Proceedings of the <fixed-case>SIGDIAL</fixed-case> 2009 Conference</booktitle>
       <url hash="016c4da0">W09-39</url>
-      <editor><first>Patrick</first><last>Healey</last></editor>
-      <editor><first>Roberto</first><last>Pieraccini</last></editor>
-      <editor><first>Donna</first><last>Byron</last></editor>
-      <editor><first>Steve</first><last>Young</last></editor>
+      <editor id="patrick-healey"><first>Patrick</first><last>Healey</last></editor>
+      <editor id="roberto-pieraccini"><first>Roberto</first><last>Pieraccini</last></editor>
+      <editor id="donna-byron"><first>Donna</first><last>Byron</last></editor>
+      <editor id="steve-young"><first>Steve</first><last>Young</last></editor>
       <editor><first>Matthew</first><last>Purver</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>London, UK</address>
@@ -6341,7 +6341,7 @@
       <title>Evaluating the Effectiveness of Information Presentation in a Full End-To-End Dialogue System</title>
       <author><first>Taghi</first><last>Paksima</last></author>
       <author><first>Kallirroi</first><last>Georgila</last></author>
-      <author><first>Johanna</first><last>Moore</last></author>
+      <author id="johanna-d-moore"><first>Johanna</first><last>Moore</last></author>
       <pages>1–10</pages>
       <url hash="d17ccf7c">W09-3901</url>
       <bibkey>paksima-etal-2009-evaluating</bibkey>
@@ -6350,7 +6350,7 @@
       <title>Can <fixed-case>I</fixed-case> Finish? Learning When to Respond to Incremental Interpretation Results in Interactive Dialogue</title>
       <author><first>David</first><last>DeVault</last></author>
       <author><first>Kenji</first><last>Sagae</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <pages>11–20</pages>
       <url hash="a6ccc430">W09-3902</url>
       <bibkey>devault-etal-2009-finish</bibkey>
@@ -6381,10 +6381,10 @@
     </paper>
     <paper id="6">
       <title>Dealing with Interpretation Errors in Tutorial Dialogue</title>
-      <author><first>Myroslava</first><last>Dzikovska</last></author>
-      <author><first>Charles</first><last>Callaway</last></author>
+      <author id="myroslava-o-dzikovska"><first>Myroslava</first><last>Dzikovska</last></author>
+      <author id="charles-b-callaway"><first>Charles</first><last>Callaway</last></author>
       <author><first>Elaine</first><last>Farrow</last></author>
-      <author><first>Johanna</first><last>Moore</last></author>
+      <author id="johanna-d-moore"><first>Johanna</first><last>Moore</last></author>
       <author><first>Natalie</first><last>Steinhauser</last></author>
       <author><first>Gwendolyn</first><last>Campbell</last></author>
       <pages>38–45</pages>
@@ -6404,14 +6404,14 @@
     <paper id="8">
       <title>Participant Subjectivity and Involvement as a Basis for Discourse Segmentation</title>
       <author><first>John</first><last>Niekrasz</last></author>
-      <author><first>Johanna</first><last>Moore</last></author>
+      <author id="johanna-d-moore"><first>Johanna</first><last>Moore</last></author>
       <pages>54–61</pages>
       <url hash="91d6e888">W09-3908</url>
       <bibkey>niekrasz-moore-2009-participant</bibkey>
     </paper>
     <paper id="9">
       <title>Genre-Based Paragraph Classification for Sentiment Analysis</title>
-      <author><first>Maite</first><last>Taboada</last></author>
+      <author id="maite-taboada"><first>Maite</first><last>Taboada</last></author>
       <author><first>Julian</first><last>Brooke</last></author>
       <author><first>Manfred</first><last>Stede</last></author>
       <pages>62–70</pages>
@@ -6421,7 +6421,7 @@
     <paper id="10">
       <title>Detecting the Noteworthiness of Utterances in Human Meetings</title>
       <author><first>Satanjeev</first><last>Banerjee</last></author>
-      <author><first>Alexander</first><last>Rudnicky</last></author>
+      <author id="alexander-rudnicky"><first>Alexander</first><last>Rudnicky</last></author>
       <pages>71–78</pages>
       <url hash="b466f3ab">W09-3910</url>
       <bibkey>banerjee-rudnicky-2009-detecting</bibkey>
@@ -6438,7 +6438,7 @@
     <paper id="12">
       <title>Interactive Gesture in Dialogue: a <fixed-case>PTT</fixed-case> Model</title>
       <author><first>Hannes</first><last>Rieser</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>87–96</pages>
       <url hash="aab8a4d1">W09-3912</url>
       <bibkey>rieser-poesio-2009-interactive</bibkey>
@@ -6469,7 +6469,7 @@
     </paper>
     <paper id="16">
       <title>A Two-Tier User Simulation Model for Reinforcement Learning of Adaptive Referring Expression Generation Policies</title>
-      <author><first>Srinivasan</first><last>Janarthanam</last></author>
+      <author id="srinivasan-janarthanam"><first>Srinivasan</first><last>Janarthanam</last></author>
       <author><first>Oliver</first><last>Lemon</last></author>
       <pages>120–123</pages>
       <url hash="6c1dd0b1">W09-3916</url>
@@ -6497,7 +6497,7 @@
     </paper>
     <paper id="19">
       <title>Estimating Probability of Correctness for <fixed-case>ASR</fixed-case> <fixed-case>N</fixed-case>-<fixed-case>B</fixed-case>est Lists</title>
-      <author><first>Jason</first><last>Williams</last></author>
+      <author id="jason-d-williams"><first>Jason</first><last>Williams</last></author>
       <author><first>Suhrid</first><last>Balakrishnan</last></author>
       <pages>132–135</pages>
       <url hash="81a9e014">W09-3919</url>
@@ -6505,7 +6505,7 @@
     </paper>
     <paper id="20">
       <title>Not a Simple Yes or No: Uncertainty in Indirect Answers</title>
-      <author><first>Marie-Catherine</first><last>de Marneffe</last></author>
+      <author id="marie-catherine-de-marneffe"><first>Marie-Catherine</first><last>de Marneffe</last></author>
       <author><first>Scott</first><last>Grimm</last></author>
       <author><first>Christopher</first><last>Potts</last></author>
       <pages>136–143</pages>
@@ -6515,14 +6515,14 @@
     <paper id="21">
       <title>Concept Form Adaptation in Human-Computer Dialog</title>
       <author><first>Svetlana</first><last>Stoyanchev</last></author>
-      <author><first>Amanda</first><last>Stent</last></author>
+      <author id="amanda-stent"><first>Amanda</first><last>Stent</last></author>
       <pages>144–147</pages>
       <url hash="0b13c0ac">W09-3921</url>
       <bibkey>stoyanchev-stent-2009-concept</bibkey>
     </paper>
     <paper id="22">
       <title>Automatic Generation of Information State Update Dialogue Systems that Dynamically Create Voice <fixed-case>XML</fixed-case>, as Demonstrated on the i<fixed-case>P</fixed-case>hone</title>
-      <author><first>Helen</first><last>Hastie</last></author>
+      <author id="helen-hastie"><first>Helen</first><last>Hastie</last></author>
       <author><first>Xingkun</first><last>Liu</last></author>
       <author><first>Oliver</first><last>Lemon</last></author>
       <pages>148–151</pages>
@@ -6544,7 +6544,7 @@
       <author><first>Sebastian</first><last>Varges</last></author>
       <author><first>Silvia</first><last>Quarteroni</last></author>
       <author><first>Giuseppe</first><last>Riccardi</last></author>
-      <author><first>Alexei</first><last>Ivanov</last></author>
+      <author id="alexei-v-ivanov"><first>Alexei</first><last>Ivanov</last></author>
       <author><first>Pierluigi</first><last>Roberti</last></author>
       <pages>156–159</pages>
       <url hash="08db86a1">W09-3924</url>
@@ -6552,11 +6552,11 @@
     </paper>
     <paper id="25">
       <title>Speeding Up the Design of Dialogue Applications by Using Database Contents and Structure Information</title>
-      <author><first>Luis Fernando</first><last>D’Haro</last></author>
-      <author><first>Ricardo</first><last>de Cordoba</last></author>
-      <author><first>Juan Manuel</first><last>Lucas</last></author>
-      <author><first>Roberto</first><last>Barra-Chicote</last></author>
-      <author><first>Ruben</first><last>San-Segundo</last></author>
+      <author id="luis-fernando-dharo"><first>Luis Fernando</first><last>D’Haro</last></author>
+      <author id="ricardo-de-cordoba"><first>Ricardo</first><last>de Cordoba</last></author>
+      <author id="juan-manuel-lucas-cuesta"><first>Juan Manuel</first><last>Lucas</last></author>
+      <author id="roberto-barra-chicote"><first>Roberto</first><last>Barra-Chicote</last></author>
+      <author id="ruben-san-segundo"><first>Ruben</first><last>San-Segundo</last></author>
       <pages>160–169</pages>
       <url hash="8ff2d1a7">W09-3925</url>
       <bibkey>dharo-etal-2009-speeding</bibkey>
@@ -6575,7 +6575,7 @@
     <paper id="27">
       <title>Discourse Structure and Performance Analysis: Beyond the Correlation</title>
       <author><first>Mihai</first><last>Rotaru</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <pages>178–187</pages>
       <url hash="3275a104">W09-3927</url>
       <bibkey>rotaru-litman-2009-discourse</bibkey>
@@ -6583,7 +6583,7 @@
     <paper id="28">
       <title>The Role of Interactivity in Human-Machine Conversation for Automatic Word Acquisition</title>
       <author><first>Shaolin</first><last>Qu</last></author>
-      <author><first>Joyce</first><last>Chai</last></author>
+      <author id="joyce-chai"><first>Joyce</first><last>Chai</last></author>
       <pages>188–195</pages>
       <url hash="c7b71d66">W09-3928</url>
       <bibkey>qu-chai-2009-role</bibkey>
@@ -6597,15 +6597,15 @@
     </paper>
     <paper id="30">
       <title>What do We Know about Conversation Participants: Experiments on Conversation Entailment</title>
-      <author id="chen-zhang"><first>Chen</first><last>Zhang</last></author>
-      <author><first>Joyce</first><last>Chai</last></author>
+      <author><first>Chen</first><last>Zhang</last></author>
+      <author id="joyce-chai"><first>Joyce</first><last>Chai</last></author>
       <pages>206–215</pages>
       <url hash="3ff156fb">W09-3930</url>
       <bibkey>zhang-chai-2009-know</bibkey>
     </paper>
     <paper id="31">
       <title>Artificial <fixed-case>C</fixed-case>ompanions as Dialogue Agents</title>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <pages>216</pages>
       <url hash="61aa8817">W09-3931</url>
       <bibkey>wilks-2009-artificial</bibkey>
@@ -6623,7 +6623,7 @@
     </paper>
     <paper id="33">
       <title>Models for Multiparty Engagement in Open-World Dialog</title>
-      <author><first>Dan</first><last>Bohus</last></author>
+      <author id="dan-bohus"><first>Dan</first><last>Bohus</last></author>
       <author><first>Eric</first><last>Horvitz</last></author>
       <pages>225–234</pages>
       <url hash="af0589f7">W09-3933</url>
@@ -6631,9 +6631,9 @@
     </paper>
     <paper id="34">
       <title>Extracting Decisions from Multi-Party Dialogue Using Directed Graphical Models and Semantic Similarity</title>
-      <author><first>Trung</first><last>Bui</last></author>
+      <author id="trung-bui"><first>Trung</first><last>Bui</last></author>
       <author><first>Matthew</first><last>Frampton</last></author>
-      <author><first>John</first><last>Dowding</last></author>
+      <author id="john-dowding"><first>John</first><last>Dowding</last></author>
       <author><first>Stanley</first><last>Peters</last></author>
       <pages>235–243</pages>
       <url hash="3d176f34">W09-3934</url>
@@ -6641,7 +6641,7 @@
     </paper>
     <paper id="35">
       <title>Learning to Predict Engagement with a Spoken Dialog System in Open-World Settings</title>
-      <author><first>Dan</first><last>Bohus</last></author>
+      <author id="dan-bohus"><first>Dan</first><last>Bohus</last></author>
       <author><first>Eric</first><last>Horvitz</last></author>
       <pages>244–252</pages>
       <url hash="d12a1266">W09-3935</url>
@@ -6649,8 +6649,8 @@
     </paper>
     <paper id="36">
       <title>Turn-Yielding Cues in Task-Oriented Dialogue</title>
-      <author><first>Agustín</first><last>Gravano</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
+      <author id="agustin-gravano"><first>Agustín</first><last>Gravano</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
       <pages>253–261</pages>
       <url hash="1ccf46b0">W09-3936</url>
       <bibkey>gravano-hirschberg-2009-turn</bibkey>
@@ -6667,11 +6667,11 @@
     </paper>
     <paper id="38">
       <title>k-<fixed-case>N</fixed-case>earest Neighbor <fixed-case>M</fixed-case>onte-<fixed-case>C</fixed-case>arlo Control Algorithm for <fixed-case>POMDP</fixed-case>-Based Dialogue Systems</title>
-      <author><first>Fabrice</first><last>Lefèvre</last></author>
-      <author><first>Milica</first><last>Gašić</last></author>
-      <author><first>Filip</first><last>Jurčíček</last></author>
+      <author id="fabrice-lefevre"><first>Fabrice</first><last>Lefèvre</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gašić</last></author>
+      <author id="filip-jurcicek"><first>Filip</first><last>Jurčíček</last></author>
       <author><first>Simon</first><last>Keizer</last></author>
-      <author><first>François</first><last>Mairesse</last></author>
+      <author id="francois-mairesse"><first>François</first><last>Mairesse</last></author>
       <author><first>Blaise</first><last>Thomson</last></author>
       <author><first>Kai</first><last>Yu</last></author>
       <author><first>Steve</first><last>Young</last></author>
@@ -6683,22 +6683,22 @@
       <title>Comparison of Classification and Ranking Approaches to Pronominal Anaphora Resolution in <fixed-case>C</fixed-case>zech</title>
       <author><first>Giang Linh</first><last>Ngụy</last></author>
       <author><first>Václav</first><last>Novák</last></author>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
       <pages>276–285</pages>
       <url hash="a2a56e24">W09-3939</url>
       <bibkey>nguy-etal-2009-comparison</bibkey>
     </paper>
     <paper id="40">
       <title>Spoken Tutorial Dialogue and the Feeling of Another’s Knowing</title>
-      <author><first>Diane</first><last>Litman</last></author>
-      <author><first>Kate</first><last>Forbes-Riley</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
+      <author id="kate-forbes-riley"><first>Kate</first><last>Forbes-Riley</last></author>
       <pages>286–289</pages>
       <url hash="45a98e2d">W09-3940</url>
       <bibkey>litman-forbes-riley-2009-spoken</bibkey>
     </paper>
     <paper id="41">
       <title>Evaluating Automatic Extraction of Rules for Sentence Plan Construction</title>
-      <author><first>Amanda</first><last>Stent</last></author>
+      <author id="amanda-stent"><first>Amanda</first><last>Stent</last></author>
       <author><first>Martin</first><last>Molina</last></author>
       <pages>290–297</pages>
       <url hash="c6193990">W09-3941</url>
@@ -6724,7 +6724,7 @@
     <paper id="44">
       <title>Cascaded Lexicalised Classifiers for Second-Person Reference Resolution</title>
       <author><first>Matthew</first><last>Purver</last></author>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <author><first>Matthew</first><last>Frampton</last></author>
       <author><first>Stanley</first><last>Peters</last></author>
       <pages>306–309</pages>
@@ -6745,7 +6745,7 @@
       <author><first>Satoshi</first><last>Ikeda</last></author>
       <author><first>Yuichiro</first><last>Fukubayashi</last></author>
       <author><first>Tetsuya</first><last>Ogata</last></author>
-      <author><first>Hiroshi</first><last>Okuno</last></author>
+      <author id="hiroshi-g-okuno"><first>Hiroshi</first><last>Okuno</last></author>
       <pages>314–321</pages>
       <url hash="4b32dc3b">W09-3946</url>
       <bibkey>komatani-etal-2009-ranking</bibkey>
@@ -6761,23 +6761,23 @@
       <title>A Comparison between Dialog Corpora Acquired with Real and Simulated Users</title>
       <author><first>David</first><last>Griol</last></author>
       <author><first>Zoraida</first><last>Callejas</last></author>
-      <author><first>Ramón</first><last>López-Cózar</last></author>
+      <author id="ramon-lopez-cozar"><first>Ramón</first><last>López-Cózar</last></author>
       <pages>326–332</pages>
       <url hash="52c847ae">W09-3948</url>
       <bibkey>griol-etal-2009-comparison</bibkey>
     </paper>
     <paper id="49">
       <title>Simultaneous Dialogue Act Segmentation and Labelling using Lexical and Syntactic Features</title>
-      <author><first>Ramon</first><last>Granell</last></author>
-      <author><first>Stephen</first><last>Pulman</last></author>
-      <author><first>Carlos-D.</first><last>Martínez-Hinarejos</last></author>
+      <author id="ramon-granell"><first>Ramon</first><last>Granell</last></author>
+      <author id="stephen-pulman"><first>Stephen</first><last>Pulman</last></author>
+      <author id="carlos-d-martinez-hinarejos"><first>Carlos-D.</first><last>Martínez-Hinarejos</last></author>
       <pages>333–336</pages>
       <url hash="8ad84673">W09-3949</url>
       <bibkey>granell-etal-2009-simultaneous</bibkey>
     </paper>
     <paper id="50">
       <title>The Spoken Dialogue Challenge</title>
-      <author><first>Alan</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan</first><last>Black</last></author>
       <author><first>Maxine</first><last>Eskenazi</last></author>
       <pages>337–340</pages>
       <url hash="c8690ccc">W09-3950</url>
@@ -6786,15 +6786,15 @@
     <paper id="51">
       <title>Unsupervised Classification of Dialogue Acts using a <fixed-case>D</fixed-case>irichlet Process Mixture Model</title>
       <author><first>Nigel</first><last>Crook</last></author>
-      <author><first>Ramon</first><last>Granell</last></author>
-      <author><first>Stephen</first><last>Pulman</last></author>
+      <author id="ramon-granell"><first>Ramon</first><last>Granell</last></author>
+      <author id="stephen-pulman"><first>Stephen</first><last>Pulman</last></author>
       <pages>341–348</pages>
       <url hash="16a54fd2">W09-3951</url>
       <bibkey>crook-etal-2009-unsupervised</bibkey>
     </paper>
     <paper id="52">
       <title>A Handsome Set of Metrics to Measure Utterance Classification Performance in Spoken Dialog Systems</title>
-      <author><first>David</first><last>Suendermann</last></author>
+      <author id="david-suendermann-oeft"><first>David</first><last>Suendermann</last></author>
       <author><first>Jackson</first><last>Liscombe</last></author>
       <author><first>Krishna</first><last>Dayanidhi</last></author>
       <author><first>Roberto</first><last>Pieraccini</last></author>
@@ -6805,8 +6805,8 @@
     <paper id="53">
       <title>Contrasting the Interaction Structure of an Email and a Telephone Corpus: A Machine Learning Approach to Annotation of Dialogue Function Units</title>
       <author><first>Jun</first><last>Hu</last></author>
-      <author><first>Rebecca</first><last>Passonneau</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca</first><last>Passonneau</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>357–366</pages>
       <url hash="76b2e50f">W09-3953</url>
       <bibkey>hu-etal-2009-contrasting</bibkey>
@@ -6816,8 +6816,8 @@
     <meta>
       <booktitle>Proceedings of the Workshop Multilingual resources, technologies and evaluation for central and Eastern <fixed-case>E</fixed-case>uropean languages</booktitle>
       <url hash="3210dd85">W09-40</url>
-      <editor><first>Elena</first><last>Paskaleva</last></editor>
-      <editor><first>Stelios</first><last>Piperidis</last></editor>
+      <editor id="elena-paskaleva"><first>Elena</first><last>Paskaleva</last></editor>
+      <editor id="stelios-piperidis"><first>Stelios</first><last>Piperidis</last></editor>
       <editor><first>Milena</first><last>Slavcheva</last></editor>
       <editor><first>Cristina</first><last>Vertan</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -6843,7 +6843,7 @@
     <paper id="2">
       <title>On the behavior of <fixed-case>R</fixed-case>omanian syllables related to minimum effort laws</title>
       <author><first>Anca</first><last>Dinu</last></author>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <pages>9–13</pages>
       <url hash="126b2650">W09-4002</url>
       <bibkey>dinu-dinu-2009-behavior</bibkey>
@@ -6858,7 +6858,7 @@
     <paper id="4">
       <title><fixed-case>E</fixed-case>-Connecting <fixed-case>B</fixed-case>alkan Languages</title>
       <author><first>Cvetana</first><last>Krstev</last></author>
-      <author><first>Ranka</first><last>Stanković</last></author>
+      <author id="ranka-stankovic"><first>Ranka</first><last>Stanković</last></author>
       <author><first>Duško</first><last>Vitas</last></author>
       <author><first>Svetla</first><last>Koeva</last></author>
       <pages>19–25</pages>
@@ -6868,7 +6868,7 @@
     <paper id="5">
       <title>Converting <fixed-case>R</fixed-case>ussian Treebank <fixed-case>S</fixed-case>yn<fixed-case>T</fixed-case>ag<fixed-case>R</fixed-case>us into Praguian <fixed-case>PDT</fixed-case> Style</title>
       <author><first>David</first><last>Mareček</last></author>
-      <author><first>Natalia</first><last>Kljueva</last></author>
+      <author id="natalia-klyueva"><first>Natalia</first><last>Kljueva</last></author>
       <pages>26–31</pages>
       <url hash="38bc46b1">W09-4005</url>
       <bibkey>marecek-kljueva-2009-converting</bibkey>
@@ -6893,10 +6893,10 @@
     <meta>
       <booktitle>Proceedings of the Workshop on Adaptation of Language Resources and Technology to New Domains</booktitle>
       <url hash="1f645151">W09-41</url>
-      <editor><first>Núria</first><last>Bel</last></editor>
-      <editor><first>Erhard</first><last>Hinrichs</last></editor>
+      <editor id="nuria-bel"><first>Núria</first><last>Bel</last></editor>
+      <editor id="erhard-hinrichs"><first>Erhard</first><last>Hinrichs</last></editor>
       <editor><first>Petya</first><last>Osenova</last></editor>
-      <editor><first>Kiril</first><last>Simov</last></editor>
+      <editor id="kiril-simov"><first>Kiril</first><last>Simov</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Borovets, Bulgaria</address>
       <month>September</month>
@@ -6917,7 +6917,7 @@
     </paper>
     <paper id="2">
       <title>Maximal Phrases Based Analysis for Prototyping Online Discussion Forums Postings</title>
-      <author><first>Gaston</first><last>Burek</last></author>
+      <author id="gaston-burek"><first>Gaston</first><last>Burek</last></author>
       <author><first>Dale</first><last>Gerdemann</last></author>
       <pages>12–18</pages>
       <url hash="14e7d7d6">W09-4102</url>
@@ -6926,15 +6926,15 @@
     <paper id="3">
       <title><fixed-case>LEXIE</fixed-case> – an Experiment in Lexical Information Extraction</title>
       <author><first>John J.</first><last>Camilleri</last></author>
-      <author><first>Michael</first><last>Rosner</last></author>
+      <author id="michael-rosner"><first>Michael</first><last>Rosner</last></author>
       <pages>19–26</pages>
       <url hash="ac700779">W09-4103</url>
       <bibkey>camilleri-rosner-2009-lexie</bibkey>
     </paper>
     <paper id="4">
       <title>Adapting <fixed-case>NLP</fixed-case> and Corpus Analysis Techniques to Structured Imagery Analysis in Classical <fixed-case>C</fixed-case>hinese Poetry</title>
-      <author><first>Alex Chengyu</first><last>Fang</last></author>
-      <author><first>Fengju</first><last>Lo</last></author>
+      <author id="alex-chengyu-fang"><first>Alex Chengyu</first><last>Fang</last></author>
+      <author id="feng-ju-lo"><first>Fengju</first><last>Lo</last></author>
       <author><first>Cheuk Kit</first><last>Chinn</last></author>
       <pages>27–34</pages>
       <url hash="2ffae637">W09-4104</url>
@@ -6943,7 +6943,7 @@
     <paper id="5">
       <title>Cross-lingual Adaptation as a Baseline: Adapting Maximum Entropy Models to <fixed-case>B</fixed-case>ulgarian</title>
       <author><first>Georgi</first><last>Georgiev</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Petya</first><last>Osenova</last></author>
       <author><first>Kiril</first><last>Simov</last></author>
       <pages>35–38</pages>
@@ -6967,7 +6967,7 @@
     </paper>
     <paper id="8">
       <title><fixed-case>QALL</fixed-case>-<fixed-case>ME</fixed-case> needs <fixed-case>AIR</fixed-case>: a portability study</title>
-      <author><first>Constantin</first><last>Orăsan</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orăsan</last></author>
       <author><first>Iustin</first><last>Dornescu</last></author>
       <author><first>Natalia</first><last>Ponomareva</last></author>
       <pages>50–57</pages>
@@ -7007,7 +7007,7 @@
     </frontmatter>
     <paper id="1">
       <title>Finding Domain Specific Collocations and Concordances on the Web</title>
-      <author><first>Caroline</first><last>Barrière</last></author>
+      <author id="caroline-barriere"><first>Caroline</first><last>Barrière</last></author>
       <pages>1–8</pages>
       <url hash="d2757e91">W09-4201</url>
       <bibkey>barriere-2009-finding</bibkey>
@@ -7039,14 +7039,14 @@
     </paper>
     <paper id="5">
       <title>Evidence-Based Word Alignment</title>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>28–32</pages>
       <url hash="0e073709">W09-4205</url>
       <bibkey>tiedemann-2009-evidence</bibkey>
     </paper>
     <paper id="6">
       <title>A Discriminative Approach to Tree Alignment</title>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <author><first>Gideon</first><last>Kotzé</last></author>
       <pages>33–39</pages>
       <url hash="eb8eed2b">W09-4206</url>
@@ -7057,9 +7057,9 @@
     <meta>
       <booktitle>Proceedings of the Workshop on Events in Emerging Text Types</booktitle>
       <url hash="cae93442">W09-43</url>
-      <editor><first>Constantin</first><last>Orasan</last></editor>
+      <editor id="constantin-orasan"><first>Constantin</first><last>Orasan</last></editor>
       <editor><first>Laura</first><last>Hasler</last></editor>
-      <editor><first>Corina</first><last>Forăscu</last></editor>
+      <editor id="corina-forascu"><first>Corina</first><last>Forăscu</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Borovets, Bulgaria</address>
       <month>September</month>
@@ -7090,19 +7090,19 @@
       <title>A Pairwise Event Coreference Model, Feature Impact and Evaluation for Event Coreference Resolution</title>
       <author><first>Zheng</first><last>Chen</last></author>
       <author><first>Heng</first><last>Ji</last></author>
-      <author><first>Robert</first><last>Haralick</last></author>
+      <author id="robert-m-haralick"><first>Robert</first><last>Haralick</last></author>
       <pages>17–22</pages>
       <url hash="81f9cf44">W09-4303</url>
       <bibkey>chen-etal-2009-pairwise</bibkey>
     </paper>
     <paper id="4">
       <title>Summarizing Threads in Blogs Using Opinion Polarity</title>
-      <author><first>Alexandra</first><last>Balahur</last></author>
+      <author id="alexandra-balahur"><first>Alexandra</first><last>Balahur</last></author>
       <author><first>Elena</first><last>Lloret</last></author>
       <author><first>Ester</first><last>Boldrini</last></author>
-      <author><first>Andrés</first><last>Montoyo</last></author>
-      <author><first>Manuel</first><last>Palomar</last></author>
-      <author><first>Patricio</first><last>Martínez-Barco</last></author>
+      <author id="andres-montoyo"><first>Andrés</first><last>Montoyo</last></author>
+      <author id="manuel-palomar"><first>Manuel</first><last>Palomar</last></author>
+      <author id="patricio-martinez-barco"><first>Patricio</first><last>Martínez-Barco</last></author>
       <pages>23–31</pages>
       <url hash="9b4a665f">W09-4304</url>
       <bibkey>balahur-etal-2009-summarizing</bibkey>
@@ -7127,9 +7127,9 @@
     <meta>
       <booktitle>Proceedings of the 1st Workshop on Definition Extraction</booktitle>
       <url hash="fa287e03">W09-44</url>
-      <editor><first>Gerardo</first><last>Sierra</last></editor>
-      <editor><first>Mara</first><last>Pozzi</last></editor>
-      <editor><first>Juan-Manuel</first><last>Torres</last></editor>
+      <editor id="gerardo-sierra"><first>Gerardo</first><last>Sierra</last></editor>
+      <editor id="maria-pozzi"><first>Mara</first><last>Pozzi</last></editor>
+      <editor id="juan-manuel-torres-moreno"><first>Juan-Manuel</first><last>Torres</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Borovets, Bulgaria</address>
       <month>September</month>
@@ -7160,7 +7160,7 @@
     <paper id="3">
       <title>Enriching a Lexicographic Tool with Domain Definitions: Problems and Solutions</title>
       <author><first>María A.</first><last>Barrios</last></author>
-      <author><first>Guadalupe</first><last>Aguado de Cea</last></author>
+      <author id="guadalupe-aguado-de-cea"><first>Guadalupe</first><last>Aguado de Cea</last></author>
       <author><first>José Ángel</first><last>Ramos</last></author>
       <pages>14–20</pages>
       <url hash="92f61ca5">W09-4403</url>
@@ -7170,7 +7170,7 @@
       <title>Extraction of Author’s Definitions Using Indexed Reference Identification</title>
       <author><first>Marc</first><last>Bertin</last></author>
       <author><first>Iana</first><last>Atanassova</last></author>
-      <author><first>Jean-Pierre</first><last>Descles</last></author>
+      <author id="jean-pierre-descles"><first>Jean-Pierre</first><last>Descles</last></author>
       <pages>21–25</pages>
       <url hash="53da6e50">W09-4404</url>
       <bibkey>bertin-etal-2009-extraction</bibkey>
@@ -7178,16 +7178,16 @@
     <paper id="5">
       <title>Evolutionary Algorithms for Definition Extraction</title>
       <author><first>Claudia</first><last>Borg</last></author>
-      <author><first>Mike</first><last>Rosner</last></author>
-      <author><first>Gordon</first><last>Pace</last></author>
+      <author id="michael-rosner"><first>Mike</first><last>Rosner</last></author>
+      <author id="gordon-pace"><first>Gordon</first><last>Pace</last></author>
       <pages>26–32</pages>
       <url hash="10213ec2">W09-4405</url>
       <bibkey>borg-etal-2009-evolutionary</bibkey>
     </paper>
     <paper id="6">
       <title>Language Independent System for Definition Extraction: First Results Using Learning Algorithms</title>
-      <author><first>Rosa</first><last>Del Gaudio</last></author>
-      <author><first>António</first><last>Branco</last></author>
+      <author id="rosa-del-gaudio"><first>Rosa</first><last>Del Gaudio</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
       <pages>33–39</pages>
       <url hash="5bcf38b0">W09-4406</url>
       <bibkey>del-gaudio-branco-2009-language</bibkey>
@@ -7244,7 +7244,7 @@
       <title>Extraction and Exploration of Correlations in Patient Status Data</title>
       <author><first>Svetla</first><last>Boytcheva</last></author>
       <author><first>Ivelina</first><last>Nikolova</last></author>
-      <author><first>Elena</first><last>Paskaleva</last></author>
+      <author id="elena-paskaleva"><first>Elena</first><last>Paskaleva</last></author>
       <author><first>Galia</first><last>Angelova</last></author>
       <author><first>Dimitar</first><last>Tcharaktchiev</last></author>
       <author><first>Nadya</first><last>Dimitrova</last></author>
@@ -7256,7 +7256,7 @@
       <title>Semantic Portals in Biomedicine: Case Study</title>
       <author><first>Irina</first><last>Efimenko</last></author>
       <author><first>Sergey</first><last>Minor</last></author>
-      <author><first>Anatoli</first><last>Starostin</last></author>
+      <author id="anatoli-starostin"><first>Anatoli</first><last>Starostin</last></author>
       <author><first>Vladimir</first><last>Khoroshevsky</last></author>
       <pages>8–13</pages>
       <url hash="dcf277cf">W09-4502</url>
@@ -7265,7 +7265,7 @@
     <paper id="3">
       <title>A Joint Model for Normalizing Gene and Organism Mentions in Text</title>
       <author><first>Georgi</first><last>Georgiev</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Kuzman</first><last>Ganchev</last></author>
       <author><first>Deyan</first><last>Peychev</last></author>
       <author><first>Vassil</first><last>Momchev</last></author>
@@ -7294,12 +7294,12 @@
       <title>Natural Language Processing to Detect Risk Patterns Related to Hospital Acquired Infections</title>
       <author><first>Denys</first><last>Proux</last></author>
       <author><first>Pierre</first><last>Marchal</last></author>
-      <author><first>Frédérique</first><last>Segond</last></author>
+      <author id="frederique-segond"><first>Frédérique</first><last>Segond</last></author>
       <author><first>Ivan</first><last>Kergourlay</last></author>
-      <author><first>Stéfan</first><last>Darmoni</last></author>
+      <author id="stefan-darmoni"><first>Stéfan</first><last>Darmoni</last></author>
       <author><first>Suzanne</first><last>Pereira</last></author>
       <author><first>Quentin</first><last>Gicquel</last></author>
-      <author><first>Marie-Hélène</first><last>Metzger</last></author>
+      <author id="marie-helene-metzger"><first>Marie-Hélène</first><last>Metzger</last></author>
       <pages>35–41</pages>
       <url hash="4d4568da">W09-4506</url>
       <bibkey>proux-etal-2009-natural</bibkey>
@@ -7307,14 +7307,14 @@
     <paper id="7">
       <title>Cascading Classifiers for Named Entity Recognition in Clinical Notes</title>
       <author><first>Yefeng</first><last>Wang</last></author>
-      <author><first>Jon</first><last>Patrick</last></author>
+      <author id="jon-patrick"><first>Jon</first><last>Patrick</last></author>
       <pages>42–49</pages>
       <url hash="6334c026">W09-4507</url>
       <bibkey>wang-patrick-2009-cascading</bibkey>
     </paper>
     <paper id="8">
       <title>Deriving Clinical Query Patterns from Medical Corpora Using Domain Ontologies</title>
-      <author><first>Pinar Oezden</first><last>Wennerberg</last></author>
+      <author id="pinar-oezden-wennerberg"><first>Pinar Oezden</first><last>Wennerberg</last></author>
       <author><first>Paul</first><last>Buitelaar</last></author>
       <author><first>Sonja</first><last>Zillner</last></author>
       <pages>50–56</pages>
@@ -7326,8 +7326,8 @@
     <meta>
       <booktitle>Proceedings of the 17th Nordic Conference of Computational Linguistics (<fixed-case>NODALIDA</fixed-case> 2009)</booktitle>
       <url hash="ae04feee">W09-46</url>
-      <editor><first>Kristiina</first><last>Jokinen</last></editor>
-      <editor><first>Eckhard</first><last>Bick</last></editor>
+      <editor id="kristiina-jokinen"><first>Kristiina</first><last>Jokinen</last></editor>
+      <editor id="eckhard-bick"><first>Eckhard</first><last>Bick</last></editor>
       <publisher>Northern European Association for Language Technology (NEALT)</publisher>
       <address>Odense, Denmark</address>
       <month>May</month>
@@ -7354,7 +7354,7 @@
     </paper>
     <paper id="3">
       <title>Text Annotation with <fixed-case>O</fixed-case>pen<fixed-case>NLP</fixed-case> and <fixed-case>UIMA</fixed-case></title>
-      <author><first>Graham</first><last>Wilcock</last></author>
+      <author id="graham-wilcock"><first>Graham</first><last>Wilcock</last></author>
       <pages>7–8</pages>
       <url hash="f1ff488e">W09-4603</url>
       <bibkey>wilcock-2009-text</bibkey>
@@ -7398,7 +7398,7 @@
     <paper id="8">
       <title>Pattern-based <fixed-case>E</fixed-case>nglish-<fixed-case>L</fixed-case>atvian Toponym Translation</title>
       <author><first>Tatiana</first><last>Gornostay</last></author>
-      <author><first>Inguna</first><last>Skadiņa</last></author>
+      <author id="inguna-skadina"><first>Inguna</first><last>Skadiņa</last></author>
       <pages>41–47</pages>
       <url hash="a3c81c82">W09-4608</url>
       <bibkey>gornostay-skadina-2009-pattern</bibkey>
@@ -7408,7 +7408,7 @@
       <author><first>Nathan</first><last>Green</last></author>
       <author><first>Paul</first><last>Breimyer</last></author>
       <author><first>Vinay</first><last>Kumar</last></author>
-      <author><first>Nagiza F.</first><last>Samatova</last></author>
+      <author id="nagiza-samatova"><first>Nagiza F.</first><last>Samatova</last></author>
       <pages>48–56</pages>
       <url hash="7a2f523f">W09-4609</url>
       <bibkey>green-etal-2009-webbanc</bibkey>
@@ -7433,8 +7433,8 @@
     </paper>
     <paper id="12">
       <title>The <fixed-case>N</fixed-case>ordic Dialect Corpus–an advanced research tool</title>
-      <author><first>Janne Bondi</first><last>Johannessen</last></author>
-      <author><first>Joel James</first><last>Priestley</last></author>
+      <author id="janne-bondi-johannessen"><first>Janne Bondi</first><last>Johannessen</last></author>
+      <author id="joel-priestley"><first>Joel James</first><last>Priestley</last></author>
       <author><first>Kristin</first><last>Hagen</last></author>
       <author><first>Tor Anders</first><last>Åfarli</last></author>
       <author><first>Øystein Alexander</first><last>Vangsnes</last></author>
@@ -7451,15 +7451,15 @@
     </paper>
     <paper id="14">
       <title>Weighted Finite-State Morphological Analysis of <fixed-case>F</fixed-case>innish Compounding with <fixed-case>HFST</fixed-case>-<fixed-case>LEXC</fixed-case></title>
-      <author><first>Krister</first><last>Lindén</last></author>
-      <author><first>Tommi</first><last>Pirinen</last></author>
+      <author id="krister-linden"><first>Krister</first><last>Lindén</last></author>
+      <author id="tommi-a-pirinen"><first>Tommi</first><last>Pirinen</last></author>
       <pages>89–95</pages>
       <url hash="1403004e">W09-4614</url>
       <bibkey>linden-pirinen-2009-weighted</bibkey>
     </paper>
     <paper id="15">
       <title>Corpus-based Paradigm Selection for Morphological Entries</title>
-      <author><first>Krister</first><last>Lindén</last></author>
+      <author id="krister-linden"><first>Krister</first><last>Lindén</last></author>
       <author><first>Jussi</first><last>Tuovila</last></author>
       <pages>96–102</pages>
       <url hash="8a1a1938">W09-4615</url>
@@ -7470,14 +7470,14 @@
       <author><first>Hrafn</first><last>Loftsson</last></author>
       <author><first>Ida</first><last>Kramarczyk</last></author>
       <author><first>Sigrún</first><last>Helgadóttir</last></author>
-      <author><first>Eiríkur</first><last>Rögnvaldsson</last></author>
+      <author id="eirikur-rognvaldsson"><first>Eiríkur</first><last>Rögnvaldsson</last></author>
       <pages>103–110</pages>
       <url hash="afcf063c">W09-4616</url>
       <bibkey>loftsson-etal-2009-improving</bibkey>
     </paper>
     <paper id="17">
       <title>Disambiguation of Taxonomy Markers in Context: <fixed-case>R</fixed-case>ussian Nouns</title>
-      <author><first>Olga</first><last>Lashevskaja</last></author>
+      <author id="olga-n-lashevskaja"><first>Olga</first><last>Lashevskaja</last></author>
       <author><first>Olga</first><last>Mitrofanova</last></author>
       <pages>111–117</pages>
       <url hash="7f79d583">W09-4617</url>
@@ -7486,7 +7486,7 @@
     <paper id="18">
       <title>Towards automatic acquisition of linguistic features</title>
       <author><first>Yves</first><last>Lepage</last></author>
-      <author><first>Chooi Ling</first><last>Goh</last></author>
+      <author id="chooi-ling-goh"><first>Chooi Ling</first><last>Goh</last></author>
       <pages>118–125</pages>
       <url hash="7b8d05f6">W09-4618</url>
       <bibkey>lepage-goh-2009-towards</bibkey>
@@ -7494,7 +7494,7 @@
     <paper id="19">
       <title>Building a morphological and syntactic lexicon by merging various linguistic resources</title>
       <author><first>Miguel A.</first><last>Molinero</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <author><first>Lionel</first><last>Nicolas</last></author>
       <pages>126–133</pages>
       <url hash="3e02061c">W09-4619</url>
@@ -7502,7 +7502,7 @@
     </paper>
     <paper id="20">
       <title>Using Semantic Features Derived from Word-Space Models for <fixed-case>S</fixed-case>wedish Coreference Resolution</title>
-      <author><first>Kristina</first><last>Nilsson</last></author>
+      <author id="kristina-nilsson-bjorkenstam"><first>Kristina</first><last>Nilsson</last></author>
       <author><first>Hans</first><last>Hjelm</last></author>
       <pages>134–141</pages>
       <url hash="86459fc0">W09-4620</url>
@@ -7526,7 +7526,7 @@
     </paper>
     <paper id="23">
       <title>What do we need to know about humans? A view into the <fixed-case>D</fixed-case>an<fixed-case>N</fixed-case>et database</title>
-      <author><first>Bolette</first><last>Sandford Pedersen</last></author>
+      <author id="bolette-sandford-pedersen"><first>Bolette</first><last>Sandford Pedersen</last></author>
       <author><first>Anna</first><last>Braasch</last></author>
       <pages>158–165</pages>
       <url hash="dab6a54a">W09-4623</url>
@@ -7535,29 +7535,29 @@
     <paper id="24">
       <title>Dependency Parsing Resources for <fixed-case>F</fixed-case>rench: Converting Acquired <fixed-case>L</fixed-case>exical <fixed-case>F</fixed-case>unctional <fixed-case>G</fixed-case>rammar <fixed-case>F</fixed-case>-Structure Annotations and Parsing <fixed-case>F</fixed-case>-Structures Directly</title>
       <author><first>Natalie</first><last>Schluter</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>166–173</pages>
       <url hash="0ba107fa">W09-4624</url>
       <bibkey>schluter-van-genabith-2009-dependency</bibkey>
     </paper>
     <paper id="25">
       <title>Conflict Resolution Using Weighted Rules in <fixed-case>HFST</fixed-case>-<fixed-case>TWOLC</fixed-case></title>
-      <author><first>Miikka</first><last>Silfverberg</last></author>
-      <author><first>Krister</first><last>Lindén</last></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last></author>
+      <author id="krister-linden"><first>Krister</first><last>Lindén</last></author>
       <pages>174–181</pages>
       <url hash="ba6457b9">W09-4625</url>
       <bibkey>silfverberg-linden-2009-conflict</bibkey>
     </paper>
     <paper id="26">
       <title>A linear time extension of deterministic pushdown automata</title>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>182–189</pages>
       <url hash="020e7c86">W09-4626</url>
       <bibkey>sogaard-2009-linear</bibkey>
     </paper>
     <paper id="27">
       <title>Verifying context-sensitive treebanks and heuristic parses in polynomial time</title>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>190–197</pages>
       <url hash="1dd32c29">W09-4627</url>
       <bibkey>sogaard-2009-verifying</bibkey>
@@ -7580,7 +7580,7 @@
     <paper id="30">
       <title>Automatic Semantic Role Annotation for <fixed-case>S</fixed-case>panish</title>
       <author><first>Eckhard</first><last>Bick</last></author>
-      <author><first>M. Pilar</first><last>Valverde Ibáñez</last></author>
+      <author id="m-pilar-valverde-ibanez"><first>M. Pilar</first><last>Valverde Ibáñez</last></author>
       <pages>215–218</pages>
       <url hash="b68284cc">W09-4630</url>
       <bibkey>bick-valverde-ibanez-2009-automatic</bibkey>
@@ -7595,7 +7595,7 @@
     </paper>
     <paper id="32">
       <title><fixed-case>M</fixed-case>ed<fixed-case>E</fixed-case>val–six test collections in one</title>
-      <author><first>Karin</first><last>Friberg Heppin</last></author>
+      <author id="karin-friberg-heppin"><first>Karin</first><last>Friberg Heppin</last></author>
       <pages>223–226</pages>
       <url hash="178c553c">W09-4632</url>
       <bibkey>friberg-heppin-2009-medeval</bibkey>
@@ -7603,17 +7603,17 @@
     <paper id="33">
       <title>Active Learning in Example-Based Machine Translation</title>
       <author><first>Rashmi</first><last>Gangadharaiah</last></author>
-      <author><first>Ralf D.</first><last>Brown</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
+      <author id="ralf-d-brown"><first>Ralf D.</first><last>Brown</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
       <pages>227–230</pages>
       <url hash="663efece">W09-4633</url>
       <bibkey>gangadharaiah-etal-2009-active</bibkey>
     </paper>
     <paper id="34">
       <title>Context-Sensitive Spelling Correction and Rich Morphology</title>
-      <author><first>Anton K.</first><last>Ingason</last></author>
+      <author id="anton-karl-ingason"><first>Anton K.</first><last>Ingason</last></author>
       <author><first>Skúli B.</first><last>Jóhannsson</last></author>
-      <author><first>Eiríkur</first><last>Rögnvaldsson</last></author>
+      <author id="eirikur-rognvaldsson"><first>Eiríkur</first><last>Rögnvaldsson</last></author>
       <author><first>Hrafn</first><last>Loftsson</last></author>
       <author><first>Sigrún</first><last>Helgadóttir</last></author>
       <pages>231–234</pages>
@@ -7631,14 +7631,14 @@
     </paper>
     <paper id="36">
       <title>The Open Source Tagger <fixed-case>H</fixed-case>un<fixed-case>P</fixed-case>o<fixed-case>S</fixed-case> for <fixed-case>S</fixed-case>wedish</title>
-      <author><first>Beáta</first><last>Megyesi</last></author>
+      <author id="beata-megyesi"><first>Beáta</first><last>Megyesi</last></author>
       <pages>239–241</pages>
       <url hash="95dda58b">W09-4636</url>
       <bibkey>megyesi-2009-open</bibkey>
     </paper>
     <paper id="37">
       <title><fixed-case>E</fixed-case>nglish-<fixed-case>L</fixed-case>atvian <fixed-case>SMT</fixed-case>: knowledge or data?</title>
-      <author><first>Inguna</first><last>Skadiņa</last></author>
+      <author id="inguna-skadina"><first>Inguna</first><last>Skadiņa</last></author>
       <author><first>Edgars</first><last>Brālītis</last></author>
       <pages>242–245</pages>
       <url hash="5a211497">W09-4637</url>
@@ -7646,7 +7646,7 @@
     </paper>
     <paper id="38">
       <title>Cross-lingual porting of distributional semantic classification</title>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <pages>246–249</pages>
       <url hash="475c7896">W09-4638</url>
       <bibkey>ovrelid-2009-cross</bibkey>
@@ -7683,7 +7683,7 @@
     <paper id="43">
       <title><fixed-case>S</fixed-case>ub<fixed-case>TTS</fixed-case>: Light-weight automatic reading of subtitles</title>
       <author><first>Sandra</first><last>Derbring</last></author>
-      <author><first>Peter</first><last>Ljunglöf</last></author>
+      <author id="peter-ljunglof"><first>Peter</first><last>Ljunglöf</last></author>
       <author><first>Maria</first><last>Olsson</last></author>
       <pages>272–274</pages>
       <url hash="47d032b8">W09-4643</url>
@@ -7691,9 +7691,9 @@
     </paper>
     <paper id="44">
       <title><fixed-case>TRIK</fixed-case>: A Talking and Drawing Robot for Children with Communication Disabilities</title>
-      <author><first>Peter</first><last>Ljunglöf</last></author>
+      <author id="peter-ljunglof"><first>Peter</first><last>Ljunglöf</last></author>
       <author><first>Staffan</first><last>Larsson</last></author>
-      <author><first>Katarina</first><last>Heimann Mühlenbock</last></author>
+      <author id="katarina-heimann-muhlenbock"><first>Katarina</first><last>Heimann Mühlenbock</last></author>
       <author><first>Gunilla</first><last>Thunberg</last></author>
       <pages>275–278</pages>
       <url hash="8c8239f7">W09-4644</url>
@@ -7701,8 +7701,8 @@
     </paper>
     <paper id="45">
       <title><fixed-case>CAOS</fixed-case>–A Tool for the Construction of Terminological Ontologies</title>
-      <author><first>Bodil</first><last>Nistrup Madsen</last></author>
-      <author><first>Hanne</first><last>Erdman Thomsen</last></author>
+      <author id="bodil-nistrup-madsen"><first>Bodil</first><last>Nistrup Madsen</last></author>
+      <author id="hanne-erdman-thomsen"><first>Hanne</first><last>Erdman Thomsen</last></author>
       <pages>279–282</pages>
       <url hash="13cbe8e9">W09-4645</url>
       <bibkey>nistrup-madsen-erdman-thomsen-2009-caos</bibkey>
@@ -7711,7 +7711,7 @@
       <title>The <fixed-case>N</fixed-case>ordic Dialect Database: Mapping Microsyntactic Variation in the <fixed-case>S</fixed-case>candinavian Languages</title>
       <author><first>Arne Martinus</first><last>Lindstad</last></author>
       <author><first>Anders</first><last>Nøklestad</last></author>
-      <author><first>Janne Bondi</first><last>Johannessen</last></author>
+      <author id="janne-bondi-johannessen"><first>Janne Bondi</first><last>Johannessen</last></author>
       <author><first>Øystein Alexander</first><last>Vangsnes</last></author>
       <pages>283–286</pages>
       <url hash="2ea53c59">W09-4646</url>
diff --git a/data/xml/W10.xml b/data/xml/W10.xml
index 55a4e3abfa..4ebed2ba59 100644
--- a/data/xml/W10.xml
+++ b/data/xml/W10.xml
@@ -27,8 +27,8 @@
     <paper id="2">
       <title>Active Semi-Supervised Learning for Improving Word Alignment</title>
       <author><first>Vamshi</first><last>Ambati</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
       <pages>10–17</pages>
       <url hash="598d84b2">W10-0102</url>
       <bibkey>ambati-etal-2010-active-semi</bibkey>
@@ -36,7 +36,7 @@
     <paper id="3">
       <title><fixed-case>D</fixed-case>-Confidence: An Active Learning Strategy which Efficiently Identifies Small Classes</title>
       <author><first>Nuno</first><last>Escudeiro</last></author>
-      <author><first>Alípio</first><last>Jorge</last></author>
+      <author id="alipio-jorge"><first>Alípio</first><last>Jorge</last></author>
       <pages>18–26</pages>
       <url hash="84935b43">W10-0103</url>
       <bibkey>escudeiro-jorge-2010-confidence</bibkey>
@@ -45,7 +45,7 @@
       <title>Domain Adaptation meets Active Learning</title>
       <author><first>Piyush</first><last>Rai</last></author>
       <author><first>Avishek</first><last>Saha</last></author>
-      <author><first>Hal</first><last>Daumé</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé</last></author>
       <author><first>Suresh</first><last>Venkatasubramanian</last></author>
       <pages>27–32</pages>
       <url hash="b32002d1">W10-0104</url>
@@ -55,7 +55,7 @@
       <title>Parallel Active Learning: Eliminating Wait Time with Minimal Staleness</title>
       <author><first>Robbie</first><last>Haertel</last></author>
       <author><first>Paul</first><last>Felt</last></author>
-      <author><first>Eric K.</first><last>Ringger</last></author>
+      <author id="eric-ringger"><first>Eric K.</first><last>Ringger</last></author>
       <author><first>Kevin</first><last>Seppi</last></author>
       <pages>33–41</pages>
       <url hash="29f642fb">W10-0105</url>
@@ -66,7 +66,7 @@
     <meta>
       <booktitle>Proceedings of the <fixed-case>NAACL</fixed-case> <fixed-case>HLT</fixed-case> 2010 Workshop on Computational Approaches to Analysis and Generation of Emotion in Text</booktitle>
       <url hash="6573cef5">W10-02</url>
-      <editor><first>Diana</first><last>Inkpen</last></editor>
+      <editor id="diana-inkpen"><first>Diana</first><last>Inkpen</last></editor>
       <editor><first>Carlo</first><last>Strapparava</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Los Angeles, CA</address>
@@ -89,7 +89,7 @@
       <title>Emotion Detection in Email Customer Care</title>
       <author><first>Narendra</first><last>Gupta</last></author>
       <author><first>Mazin</first><last>Gilbert</last></author>
-      <author><first>Giuseppe</first><last>Di Fabbrizio</last></author>
+      <author id="giuseppe-di-fabbrizio"><first>Giuseppe</first><last>Di Fabbrizio</last></author>
       <pages>10–16</pages>
       <url hash="88b753c6">W10-0202</url>
       <bibkey>gupta-etal-2010-emotion</bibkey>
@@ -97,8 +97,8 @@
     <paper id="3">
       <title>Toward Plot Units: Automatic Affect State Analysis</title>
       <author><first>Amit</first><last>Goyal</last></author>
-      <author><first>Ellen</first><last>Riloff</last></author>
-      <author><first>Hal</first><last>Daume III</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daume III</last></author>
       <author><first>Nathan</first><last>Gilbert</last></author>
       <pages>17–25</pages>
       <url hash="2df89ee9">W10-0203</url>
@@ -106,8 +106,8 @@
     </paper>
     <paper id="4">
       <title>Emotions Evoked by Common Words and Phrases: Using <fixed-case>M</fixed-case>echanical <fixed-case>T</fixed-case>urk to Create an Emotion Lexicon</title>
-      <author><first>Saif</first><last>Mohammad</last></author>
-      <author><first>Peter</first><last>Turney</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
+      <author id="peter-turney"><first>Peter</first><last>Turney</last></author>
       <pages>26–34</pages>
       <url hash="c25e3db8">W10-0204</url>
       <bibkey>mohammad-turney-2010-emotions</bibkey>
@@ -122,7 +122,7 @@
     </paper>
     <paper id="6">
       <title>A Text-driven Rule-based System for Emotion Cause Detection</title>
-      <author><first>Sophia Yat Mei</first><last>Lee</last></author>
+      <author id="sophia-yat-mei-lee"><first>Sophia Yat Mei</first><last>Lee</last></author>
       <author><first>Ying</first><last>Chen</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <pages>45–53</pages>
@@ -140,7 +140,7 @@
     </paper>
     <paper id="8">
       <title>Evaluation of Unsupervised Emotion Models to Textual Affect Recognition</title>
-      <author><first>Sunghwan Mac</first><last>Kim</last></author>
+      <author id="sunghwan-mac-kim"><first>Sunghwan Mac</first><last>Kim</last></author>
       <author><first>Alessandro</first><last>Valitutti</last></author>
       <author><first>Rafael A.</first><last>Calvo</last></author>
       <pages>62–70</pages>
@@ -176,7 +176,7 @@
       <title>Emotional Perception of Fairy Tales: Achieving Agreement in Emotion Annotation of Text</title>
       <author><first>Ekaterina P.</first><last>Volkova</last></author>
       <author><first>Betty</first><last>Mohler</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <author><first>Dale</first><last>Gerdemann</last></author>
       <author><first>Heinrich H.</first><last>Bülthoff</last></author>
       <pages>98–106</pages>
@@ -187,7 +187,7 @@
       <title>Experiments on Summary-based Opinion Classification</title>
       <author><first>Elena</first><last>Lloret</last></author>
       <author><first>Horacio</first><last>Saggion</last></author>
-      <author><first>Manuel</first><last>Palomar</last></author>
+      <author id="manuel-palomar"><first>Manuel</first><last>Palomar</last></author>
       <pages>107–115</pages>
       <url hash="545dc5a2">W10-0213</url>
       <bibkey>lloret-etal-2010-experiments</bibkey>
@@ -195,7 +195,7 @@
     <paper id="14">
       <title>Recognizing Stances in Ideological On-Line Debates</title>
       <author><first>Swapna</first><last>Somasundaran</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <pages>116–124</pages>
       <url hash="1a9a68bd">W10-0214</url>
       <bibkey>somasundaran-wiebe-2010-recognizing</bibkey>
@@ -214,8 +214,8 @@
       <title>Sentiment Classification using Automatically Extracted Subgraph Features</title>
       <author><first>Shilpa</first><last>Arora</last></author>
       <author><first>Elijah</first><last>Mayfield</last></author>
-      <author><first>Carolyn</first><last>Penstein-Rosé</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Penstein-Rosé</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <pages>131–139</pages>
       <url hash="7cb03ae0">W10-0216</url>
       <bibkey>arora-etal-2010-sentiment</bibkey>
@@ -224,7 +224,7 @@
       <title>Hierarchical versus Flat Classification of Emotions in Text</title>
       <author><first>Diman</first><last>Ghazi</last></author>
       <author><first>Diana</first><last>Inkpen</last></author>
-      <author><first>Stan</first><last>Szpakowicz</last></author>
+      <author id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></author>
       <pages>140–146</pages>
       <url hash="28c2f4e4">W10-0217</url>
       <bibkey>ghazi-etal-2010-hierarchical</bibkey>
@@ -265,7 +265,7 @@
     <paper id="3">
       <title>Comparing Semantic Role Labeling with Typed Dependency Parsing in Computational Metaphor Identification</title>
       <author><first>Eric P. S.</first><last>Baumer</last></author>
-      <author><first>James P.</first><last>White</last></author>
+      <author id="james-paul-white"><first>James P.</first><last>White</last></author>
       <author><first>Bill</first><last>Tomlinson</last></author>
       <pages>14–22</pages>
       <url hash="e2ca0f24">W10-0303</url>
@@ -273,15 +273,15 @@
     </paper>
     <paper id="4">
       <title>Engineering Linguistic Creativity: Bird Flight and Jet Planes</title>
-      <author><first>Pablo</first><last>Gervás</last></author>
+      <author id="pablo-gervas"><first>Pablo</first><last>Gervás</last></author>
       <pages>23–30</pages>
       <url hash="ed70a245">W10-0304</url>
       <bibkey>gervas-2010-engineering</bibkey>
     </paper>
     <paper id="5">
       <title>An alternate approach towards meaningful lyric generation in <fixed-case>T</fixed-case>amil</title>
-      <author><first>Ananth</first><last>Ramakrishnan A</last></author>
-      <author><first>Sobha</first><last>Lalitha Devi</last></author>
+      <author id="ananth-ramakrishnan-a"><first>Ananth</first><last>Ramakrishnan A</last></author>
+      <author id="sobha-lalitha-devi"><first>Sobha</first><last>Lalitha Devi</last></author>
       <pages>31–39</pages>
       <url hash="61f55603">W10-0305</url>
       <bibkey>ramakrishnan-a-lalitha-devi-2010-alternate</bibkey>
@@ -289,7 +289,7 @@
     <paper id="6">
       <title>Representing Story Plans in <fixed-case>SUMO</fixed-case></title>
       <author><first>Jeffrey</first><last>Cua</last></author>
-      <author><first>Ruli</first><last>Manurung</last></author>
+      <author id="ruli-manurung"><first>Ruli</first><last>Manurung</last></author>
       <author><first>Ethel</first><last>Ong</last></author>
       <author><first>Adam</first><last>Pease</last></author>
       <pages>40–48</pages>
@@ -330,7 +330,7 @@
     </paper>
     <paper id="2">
       <title>Scientific Authoring Support: A Tool to Navigate in Typed Citation Graphs</title>
-      <author><first>Ulrich</first><last>Schäfer</last></author>
+      <author id="ulrich-schafer"><first>Ulrich</first><last>Schäfer</last></author>
       <author><first>Uwe</first><last>Kasterka</last></author>
       <pages>7–14</pages>
       <url hash="821bbe8f">W10-0402</url>
@@ -380,10 +380,10 @@
       <title>Exploring Individual Differences in Student Writing with a Narrative Composition Support Environment</title>
       <author><first>Julius</first><last>Goth</last></author>
       <author><first>Alok</first><last>Baikadi</last></author>
-      <author><first>Eun Young</first><last>Ha</last></author>
+      <author id="eun-young-ha"><first>Eun Young</first><last>Ha</last></author>
       <author><first>Jonathan</first><last>Rowe</last></author>
       <author><first>Bradford</first><last>Mott</last></author>
-      <author><first>James</first><last>Lester</last></author>
+      <author id="james-lester"><first>James</first><last>Lester</last></author>
       <pages>56–64</pages>
       <url hash="e11bd3e8">W10-0408</url>
       <bibkey>goth-etal-2010-exploring</bibkey>
@@ -430,7 +430,7 @@
     </paper>
     <paper id="3">
       <title>Detecting Word Misuse in <fixed-case>C</fixed-case>hinese</title>
-      <author id="wei-liu"><first>Wei</first><last>Liu</last></author>
+      <author><first>Wei</first><last>Liu</last></author>
       <pages>5–6</pages>
       <url hash="8b4c3656">W10-0503</url>
       <bibkey>liu-2010-detecting</bibkey>
@@ -469,13 +469,13 @@
     </paper>
     <paper id="8">
       <title>Intelligent Linux Information Access by Data Mining: the <fixed-case>ILIAD</fixed-case> Project</title>
-      <author><first>Timothy</first><last>Baldwin</last></author>
-      <author><first>David</first><last>Martinez</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
+      <author id="david-martinez"><first>David</first><last>Martinez</last></author>
       <author><first>Richard</first><last>Penman</last></author>
       <author><first>Su Nam</first><last>Kim</last></author>
       <author><first>Marco</first><last>Lui</last></author>
       <author><first>Li</first><last>Wang</last></author>
-      <author><first>Andrew</first><last>MacKinlay</last></author>
+      <author id="andrew-mackinlay"><first>Andrew</first><last>MacKinlay</last></author>
       <pages>15–16</pages>
       <url hash="e3442e18">W10-0508</url>
       <bibkey>baldwin-etal-2010-intelligent</bibkey>
@@ -484,7 +484,7 @@
       <title>Mining User Experiences from Online Forums: An Exploration</title>
       <author><first>Valentin</first><last>Jijkoun</last></author>
       <author><first>Wouter</first><last>Weerkamp</last></author>
-      <author><first>Maarten</first><last>de Rijke</last></author>
+      <author id="maarten-de-rijke"><first>Maarten</first><last>de Rijke</last></author>
       <author><first>Paul</first><last>Ackermans</last></author>
       <author><first>Gijs</first><last>Geleijnse</last></author>
       <pages>17–18</pages>
@@ -495,8 +495,8 @@
       <title>Social Links from Latent Topics in Microblogs</title>
       <author><first>Kriti</first><last>Puniyani</last></author>
       <author><first>Jacob</first><last>Eisenstein</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
-      <author><first>Eric</first><last>Xing</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
+      <author id="eric-xing"><first>Eric</first><last>Xing</last></author>
       <pages>19–20</pages>
       <url hash="b257ffd2">W10-0510</url>
       <bibkey>puniyani-etal-2010-social</bibkey>
@@ -511,17 +511,17 @@
     </paper>
     <paper id="12">
       <title><fixed-case>T</fixed-case>witter in Mass Emergency: What <fixed-case>NLP</fixed-case> Can Contribute</title>
-      <author><first>William J.</first><last>Corvey</last></author>
-      <author><first>Sarah</first><last>Vieweg</last></author>
+      <author id="william-j-corvey"><first>William J.</first><last>Corvey</last></author>
+      <author id="sarah-vieweg"><first>Sarah</first><last>Vieweg</last></author>
       <author><first>Travis</first><last>Rood</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>23–24</pages>
       <url hash="45037e33">W10-0512</url>
       <bibkey>corvey-etal-2010-twitter</bibkey>
     </paper>
     <paper id="13">
       <title>The <fixed-case>E</fixed-case>dinburgh <fixed-case>T</fixed-case>witter Corpus</title>
-      <author><first>Saša</first><last>Petrović</last></author>
+      <author id="sasa-petrovic"><first>Saša</first><last>Petrović</last></author>
       <author><first>Miles</first><last>Osborne</last></author>
       <author><first>Victor</first><last>Lavrenko</last></author>
       <pages>25–26</pages>
@@ -530,7 +530,7 @@
     </paper>
     <paper id="14">
       <title>Labelling and Spatio-Temporal Grounding of News Events</title>
-      <author><first>Bea</first><last>Alex</last></author>
+      <author id="beatrice-alex"><first>Bea</first><last>Alex</last></author>
       <author><first>Claire</first><last>Grover</last></author>
       <pages>27–28</pages>
       <url hash="3f3201d5">W10-0514</url>
@@ -540,7 +540,7 @@
       <title>Tracking Information Flow between Primary and Secondary News Sources</title>
       <author><first>Will</first><last>Radford</last></author>
       <author><first>Ben</first><last>Hachey</last></author>
-      <author><first>James</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James</first><last>Curran</last></author>
       <author><first>Maria</first><last>Milosavljevic</last></author>
       <pages>29–30</pages>
       <url hash="e8740918">W10-0515</url>
@@ -560,7 +560,7 @@
       <booktitle>Proceedings of the <fixed-case>NAACL</fixed-case> <fixed-case>HLT</fixed-case> 2010 First Workshop on Computational Neurolinguistics</booktitle>
       <url hash="14634684">W10-06</url>
       <editor><first>Brian</first><last>Murphy</last></editor>
-      <editor><first>Kai-min Kevin</first><last>Chang</last></editor>
+      <editor id="kai-min-kevin-chang"><first>Kai-min Kevin</first><last>Chang</last></editor>
       <editor><first>Anna</first><last>Korhonen</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Los Angeles, USA</address>
@@ -602,7 +602,7 @@
       <title>Network Analysis of <fixed-case>K</fixed-case>orean Word Associations</title>
       <author><first>Jaeyoung</first><last>Jung</last></author>
       <author><first>Na</first><last>Li</last></author>
-      <author><first>Hiroyuki</first><last>Akama</last></author>
+      <author id="hiroyuki-akama"><first>Hiroyuki</first><last>Akama</last></author>
       <pages>27–35</pages>
       <url hash="fdf8a636">W10-0604</url>
       <bibkey>jung-etal-2010-network</bibkey>
@@ -610,7 +610,7 @@
     <paper id="5">
       <title>Detecting Semantic Category in Simultaneous <fixed-case>EEG</fixed-case>/<fixed-case>MEG</fixed-case> Recordings</title>
       <author><first>Brian</first><last>Murphy</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>36–44</pages>
       <url hash="dfda995d">W10-0605</url>
       <bibkey>murphy-poesio-2010-detecting</bibkey>
@@ -618,7 +618,7 @@
     <paper id="6">
       <title>Hemispheric processing of <fixed-case>C</fixed-case>hinese polysemy in the disyllabic verb/ noun compounds: an event-related potential study</title>
       <author><first>Chih-ying</first><last>Huang</last></author>
-      <author><first>Chia-ying</first><last>Lee</last></author>
+      <author id="chia-ying-lee"><first>Chia-ying</first><last>Lee</last></author>
       <pages>45–51</pages>
       <url hash="71183d8c">W10-0606</url>
       <bibkey>huang-lee-2010-hemispheric</bibkey>
@@ -681,7 +681,7 @@
       <author><first>Jacob</first><last>Andreas</last></author>
       <author><first>Kapil</first><last>Thadani</last></author>
       <author><first>Sara</first><last>Rosenthal</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>13–20</pages>
       <url hash="ab54a111">W10-0702</url>
       <bibkey>jha-etal-2010-corpus</bibkey>
@@ -697,7 +697,7 @@
     <paper id="4">
       <title>Semi-supervised Word Alignment with <fixed-case>M</fixed-case>echanical <fixed-case>T</fixed-case>urk</title>
       <author><first>Qin</first><last>Gao</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>30–34</pages>
       <url hash="1d114835">W10-0704</url>
       <bibkey>gao-vogel-2010-semi</bibkey>
@@ -705,7 +705,7 @@
     <paper id="5">
       <title>Rating Computer-Generated Questions with <fixed-case>M</fixed-case>echanical <fixed-case>T</fixed-case>urk</title>
       <author><first>Michael</first><last>Heilman</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>35–40</pages>
       <url hash="031fa8e0">W10-0705</url>
       <bibkey>heilman-smith-2010-rating</bibkey>
@@ -720,10 +720,10 @@
     </paper>
     <paper id="7">
       <title>Document Image Collection Using <fixed-case>A</fixed-case>mazon’s <fixed-case>M</fixed-case>echanical <fixed-case>T</fixed-case>urk</title>
-      <author><first>Audrey</first><last>Le</last></author>
+      <author id="audrey-le"><first>Audrey</first><last>Le</last></author>
       <author><first>Jerome</first><last>Ajot</last></author>
-      <author><first>Mark</first><last>Przybocki</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="mark-przybocki"><first>Mark</first><last>Przybocki</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <pages>45–52</pages>
       <bibkey>le-etal-2010-document</bibkey>
     </paper>
@@ -739,7 +739,7 @@
     <paper id="9">
       <title>Exploring Normalization Techniques for Human Judgments of Machine Translation Adequacy Collected Using <fixed-case>A</fixed-case>mazon <fixed-case>M</fixed-case>echanical <fixed-case>T</fixed-case>urk</title>
       <author><first>Michael</first><last>Denkowski</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <pages>57–61</pages>
       <url hash="59e9fe25">W10-0709</url>
       <bibkey>denkowski-lavie-2010-exploring</bibkey>
@@ -747,7 +747,7 @@
     <paper id="10">
       <title>Can Crowds Build parallel corpora for Machine Translation Systems?</title>
       <author><first>Vamshi</first><last>Ambati</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>62–65</pages>
       <url hash="de02af05">W10-0710</url>
       <bibkey>ambati-vogel-2010-crowds</bibkey>
@@ -756,7 +756,7 @@
       <title>Turker-Assisted Paraphrasing for <fixed-case>E</fixed-case>nglish-<fixed-case>A</fixed-case>rabic Machine Translation</title>
       <author><first>Michael</first><last>Denkowski</last></author>
       <author><first>Hassan</first><last>Al-Haj</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <pages>66–70</pages>
       <url hash="a5c0ff56">W10-0711</url>
       <bibkey>denkowski-etal-2010-turker</bibkey>
@@ -766,14 +766,14 @@
       <author><first>Nolan</first><last>Lawson</last></author>
       <author><first>Kevin</first><last>Eustice</last></author>
       <author><first>Mike</first><last>Perkowitz</last></author>
-      <author><first>Meliha</first><last>Yetisgen-Yildiz</last></author>
+      <author id="meliha-yetisgen-yildiz"><first>Meliha</first><last>Yetisgen-Yildiz</last></author>
       <pages>71–79</pages>
       <url hash="88dd8b64">W10-0712</url>
       <bibkey>lawson-etal-2010-annotating</bibkey>
     </paper>
     <paper id="13">
       <title>Annotating Named Entities in <fixed-case>T</fixed-case>witter Data with Crowdsourcing</title>
-      <author><first>Tim</first><last>Finin</last></author>
+      <author id="tim-finin"><first>Tim</first><last>Finin</last></author>
       <author><first>William</first><last>Murnane</last></author>
       <author><first>Anand</first><last>Karandikar</last></author>
       <author><first>Nicholas</first><last>Keller</last></author>
@@ -794,7 +794,7 @@
     </paper>
     <paper id="15">
       <title>An Enriched <fixed-case>MT</fixed-case> Grammar for Under $100</title>
-      <author><first>Omar F.</first><last>Zaidan</last></author>
+      <author id="omar-zaidan"><first>Omar F.</first><last>Zaidan</last></author>
       <author><first>Juri</first><last>Ganitkevitch</last></author>
       <pages>93–98</pages>
       <url hash="9787090f">W10-0715</url>
@@ -804,7 +804,7 @@
       <title>Using the <fixed-case>A</fixed-case>mazon <fixed-case>M</fixed-case>echanical <fixed-case>T</fixed-case>urk to Transcribe and Annotate Meeting Speech for Extractive Summarization</title>
       <author><first>Matthew</first><last>Marge</last></author>
       <author><first>Satanjeev</first><last>Banerjee</last></author>
-      <author><first>Alexander</first><last>Rudnicky</last></author>
+      <author id="alexander-rudnicky"><first>Alexander</first><last>Rudnicky</last></author>
       <pages>99–107</pages>
       <url hash="0791bcc5">W10-0716</url>
       <bibkey>marge-etal-2010-using</bibkey>
@@ -822,9 +822,9 @@
       <author><first>Bart</first><last>Mellebeek</last></author>
       <author><first>Francesc</first><last>Benavent</last></author>
       <author><first>Jens</first><last>Grivolla</last></author>
-      <author><first>Joan</first><last>Codina</last></author>
-      <author><first>Marta</first><last>R. Costa-jussà</last></author>
-      <author><first>Rafael</first><last>Banchs</last></author>
+      <author id="joan-codina-filba"><first>Joan</first><last>Codina</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta</first><last>R. Costa-jussà</last></author>
+      <author id="rafael-e-banchs"><first>Rafael</first><last>Banchs</last></author>
       <pages>114–121</pages>
       <url hash="d1fe0d46">W10-0718</url>
       <bibkey>mellebeek-etal-2010-opinion</bibkey>
@@ -838,7 +838,7 @@
       <author><first>Robin</first><last>Melnick</last></author>
       <author><first>Christopher</first><last>Potts</last></author>
       <author><first>Tyler</first><last>Schnoebelen</last></author>
-      <author><first>Harry</first><last>Tily</last></author>
+      <author id="harry-j-tily"><first>Harry</first><last>Tily</last></author>
       <pages>122–130</pages>
       <url hash="a6fb6d45">W10-0719</url>
       <bibkey>munro-etal-2010-crowdsourcing</bibkey>
@@ -862,7 +862,7 @@
     </paper>
     <paper id="22">
       <title>Non-Expert Evaluation of Summarization Systems is Risky</title>
-      <author><first>Dan</first><last>Gillick</last></author>
+      <author id="dan-gillick"><first>Dan</first><last>Gillick</last></author>
       <author id="yang-liu-icsi"><first>Yang</first><last>Liu</last></author>
       <pages>148–151</pages>
       <url hash="75131e32">W10-0722</url>
@@ -872,7 +872,7 @@
       <title>Shedding (a Thousand Points of) Light on Biased Language</title>
       <author><first>Tae</first><last>Yano</last></author>
       <author><first>Philip</first><last>Resnik</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>152–158</pages>
       <url hash="1fcef66c">W10-0723</url>
       <bibkey>yano-etal-2010-shedding</bibkey>
@@ -881,7 +881,7 @@
       <title>Evaluation of Commonsense Knowledge with <fixed-case>M</fixed-case>echanical <fixed-case>T</fixed-case>urk</title>
       <author><first>Jonathan</first><last>Gordon</last></author>
       <author><first>Benjamin</first><last>Van Durme</last></author>
-      <author><first>Lenhart</first><last>Schubert</last></author>
+      <author id="lenhart-schubert"><first>Lenhart</first><last>Schubert</last></author>
       <pages>159–162</pages>
       <url hash="ae74490a">W10-0724</url>
       <bibkey>gordon-etal-2010-evaluation</bibkey>
@@ -896,8 +896,8 @@
     </paper>
     <paper id="26">
       <title>The Wisdom of the Crowd’s Ear: Speech Accent Rating and Annotation with <fixed-case>A</fixed-case>mazon <fixed-case>M</fixed-case>echanical <fixed-case>T</fixed-case>urk</title>
-      <author><first>Stephen</first><last>Kunath</last></author>
-      <author><first>Steven</first><last>Weinberger</last></author>
+      <author id="stephen-kunath"><first>Stephen</first><last>Kunath</last></author>
+      <author id="steven-h-weinberger"><first>Steven</first><last>Weinberger</last></author>
       <pages>168–171</pages>
       <url hash="a3c2905d">W10-0726</url>
       <bibkey>kunath-weinberger-2010-wisdom</bibkey>
@@ -912,7 +912,7 @@
     </paper>
     <paper id="28">
       <title>Preliminary Experiments with <fixed-case>A</fixed-case>mazon’s <fixed-case>M</fixed-case>echanical <fixed-case>T</fixed-case>urk for Annotating Medical Named Entities</title>
-      <author><first>Meliha</first><last>Yetisgen-Yildiz</last></author>
+      <author id="meliha-yetisgen-yildiz"><first>Meliha</first><last>Yetisgen-Yildiz</last></author>
       <author><first>Imre</first><last>Solti</last></author>
       <author><first>Fei</first><last>Xia</last></author>
       <author><first>Scott</first><last>Halgrim</last></author>
@@ -925,7 +925,7 @@
       <author><first>Ian</first><last>Lane</last></author>
       <author><first>Matthias</first><last>Eck</last></author>
       <author><first>Kay</first><last>Rottmann</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>184–187</pages>
       <url hash="dd6a6a5c">W10-0729</url>
       <bibkey>lane-etal-2010-tools</bibkey>
@@ -943,17 +943,17 @@
       <title><fixed-case>A</fixed-case>mazon <fixed-case>M</fixed-case>echanical <fixed-case>T</fixed-case>urk for Subjectivity Word Sense Disambiguation</title>
       <author><first>Cem</first><last>Akkaya</last></author>
       <author><first>Alexander</first><last>Conrad</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>195–203</pages>
       <url hash="acd7e2ee">W10-0731</url>
       <bibkey>akkaya-etal-2010-amazon</bibkey>
     </paper>
     <paper id="32">
       <title>Non-Expert Correction of Automatically Generated Relation Annotations</title>
-      <author><first>Matthew R.</first><last>Gormley</last></author>
+      <author id="matthew-r-gormley"><first>Matthew R.</first><last>Gormley</last></author>
       <author><first>Adam</first><last>Gerber</last></author>
-      <author><first>Mary</first><last>Harper</last></author>
+      <author id="mary-harper"><first>Mary</first><last>Harper</last></author>
       <author><first>Mark</first><last>Dredze</last></author>
       <pages>204–207</pages>
       <url hash="8b8fa73b">W10-0732</url>
@@ -969,7 +969,7 @@
     </paper>
     <paper id="34">
       <title>Creating a Bi-lingual Entailment Corpus through Translations with <fixed-case>M</fixed-case>echanical <fixed-case>T</fixed-case>urk: $100 for a 10-day Rush</title>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Yashar</first><last>Mehdad</last></author>
       <pages>212–216</pages>
       <url hash="5e29b87b">W10-0734</url>
@@ -1003,8 +1003,8 @@
     <paper id="1">
       <title>Towards a Domain Independent Semantics: Enhancing Semantic Representation with Construction Grammar</title>
       <author><first>Jena D.</first><last>Hwang</last></author>
-      <author><first>Rodney D.</first><last>Nielsen</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="rodney-nielsen"><first>Rodney D.</first><last>Nielsen</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>1–8</pages>
       <url hash="58972a00">W10-0801</url>
       <bibkey>hwang-etal-2010-towards</bibkey>
@@ -1045,7 +1045,7 @@
     </paper>
     <paper id="6">
       <title>Automatic Extraction of Constructional Schemas</title>
-      <author><first>Gerhard</first><last>van Huyssteen</last></author>
+      <author id="gerhard-b-van-huyssteen"><first>Gerhard</first><last>van Huyssteen</last></author>
       <author><first>Marelie</first><last>Davel</last></author>
       <pages>39–46</pages>
       <url hash="4db0ba1c">W10-0806</url>
@@ -1056,12 +1056,12 @@
     <meta>
       <booktitle>Proceedings of the <fixed-case>NAACL</fixed-case> <fixed-case>HLT</fixed-case> 2010 First International Workshop on Formalisms and Methodology for Learning by Reading</booktitle>
       <url hash="dc0b3526">W10-09</url>
-      <editor><first>Rutu</first><last>Mulkar-Mehta</last></editor>
-      <editor><first>James</first><last>Allen</last></editor>
-      <editor><first>Jerry</first><last>Hobbs</last></editor>
-      <editor><first>Eduard</first><last>Hovy</last></editor>
-      <editor><first>Bernardo</first><last>Magnini</last></editor>
-      <editor><first>Chris</first><last>Manning</last></editor>
+      <editor id="rutu-mulkar-mehta"><first>Rutu</first><last>Mulkar-Mehta</last></editor>
+      <editor id="james-allen"><first>James</first><last>Allen</last></editor>
+      <editor id="jerry-r-hobbs"><first>Jerry</first><last>Hobbs</last></editor>
+      <editor id="eduard-hovy"><first>Eduard</first><last>Hovy</last></editor>
+      <editor id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></editor>
+      <editor id="christopher-d-manning"><first>Chris</first><last>Manning</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Los Angeles, California</address>
       <month>June</month>
@@ -1075,7 +1075,7 @@
     <paper id="1">
       <title>Machine Reading as a Process of Partial Question-Answering</title>
       <author><first>Peter</first><last>Clark</last></author>
-      <author><first>Phil</first><last>Harrison</last></author>
+      <author id="phil-harrison"><first>Phil</first><last>Harrison</last></author>
       <pages>1–9</pages>
       <url hash="1671e515">W10-0901</url>
       <bibkey>clark-harrison-2010-machine</bibkey>
@@ -1084,14 +1084,14 @@
       <title>Building an end-to-end text reading system based on a packed representation</title>
       <author><first>Doo Soon</first><last>Kim</last></author>
       <author><first>Ken</first><last>Barker</last></author>
-      <author><first>Bruce</first><last>Porter</last></author>
+      <author id="bruce-porter"><first>Bruce</first><last>Porter</last></author>
       <pages>10–14</pages>
       <url hash="063e338e">W10-0902</url>
       <bibkey>kim-etal-2010-building</bibkey>
     </paper>
     <paper id="3">
       <title>Semantic Enrichment of Text with Background Knowledge</title>
-      <author><first>Anselmo</first><last>Peñas</last></author>
+      <author id="anselmo-penas"><first>Anselmo</first><last>Peñas</last></author>
       <author><first>Eduard</first><last>Hovy</last></author>
       <pages>15–23</pages>
       <url hash="fd406643">W10-0903</url>
@@ -1099,7 +1099,7 @@
     </paper>
     <paper id="4">
       <title>Large Scale Relation Detection</title>
-      <author><first>Chris</first><last>Welty</last></author>
+      <author id="chris-welty"><first>Chris</first><last>Welty</last></author>
       <author><first>James</first><last>Fan</last></author>
       <author><first>David</first><last>Gondek</last></author>
       <author><first>Andrew</first><last>Schlaikjer</last></author>
@@ -1117,8 +1117,8 @@
     </paper>
     <paper id="6">
       <title>Open-domain Commonsense Reasoning Using Discourse Relations from a Corpus of Weblog Stories</title>
-      <author><first>Matthew</first><last>Gerber</last></author>
-      <author><first>Andrew</first><last>Gordon</last></author>
+      <author id="matthew-gerber"><first>Matthew</first><last>Gerber</last></author>
+      <author id="andrew-gordon"><first>Andrew</first><last>Gordon</last></author>
       <author><first>Kenji</first><last>Sagae</last></author>
       <pages>43–51</pages>
       <url hash="6cefdf45">W10-0906</url>
@@ -1128,7 +1128,7 @@
       <title>Semantic Role Labeling for Open Information Extraction</title>
       <author><first>Janara</first><last>Christensen</last></author>
       <author><last>Mausam</last></author>
-      <author><first>Stephen</first><last>Soderland</last></author>
+      <author id="stephen-soderland"><first>Stephen</first><last>Soderland</last></author>
       <author><first>Oren</first><last>Etzioni</last></author>
       <pages>52–60</pages>
       <url hash="0060ef51">W10-0907</url>
@@ -1139,7 +1139,7 @@
       <author><first>Marjorie</first><last>Freedman</last></author>
       <author><first>Edward</first><last>Loper</last></author>
       <author><first>Elizabeth</first><last>Boschee</last></author>
-      <author><first>Ralph</first><last>Weischedel</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
       <pages>61–69</pages>
       <url hash="e560e93d">W10-0908</url>
       <bibkey>freedman-etal-2010-empirical</bibkey>
@@ -1159,7 +1159,7 @@
     <paper id="10">
       <title>Unsupervised techniques for discovering ontology elements from <fixed-case>W</fixed-case>ikipedia article links</title>
       <author><first>Zareen</first><last>Syed</last></author>
-      <author><first>Tim</first><last>Finin</last></author>
+      <author id="tim-finin"><first>Tim</first><last>Finin</last></author>
       <pages>78–86</pages>
       <url hash="d76bb487">W10-0910</url>
       <bibkey>syed-finin-2010-unsupervised</bibkey>
@@ -1170,15 +1170,15 @@
       <author><first>Janara</first><last>Christensen</last></author>
       <author><first>Pedro</first><last>Domingos</last></author>
       <author><first>Oren</first><last>Etzioni</last></author>
-      <author><first>Raphael</first><last>Hoffmann</last></author>
-      <author><first>Chloe</first><last>Kiddon</last></author>
+      <author id="raphael-hoffmann"><first>Raphael</first><last>Hoffmann</last></author>
+      <author id="chloe-kiddon"><first>Chloe</first><last>Kiddon</last></author>
       <author><first>Thomas</first><last>Lin</last></author>
       <author><first>Xiao</first><last>Ling</last></author>
-      <author><last>Mausam</last></author>
+      <author><first/><last>Mausam</last></author>
       <author><first>Alan</first><last>Ritter</last></author>
-      <author><first>Stefan</first><last>Schoenmackers</last></author>
-      <author><first>Stephen</first><last>Soderland</last></author>
-      <author><first>Dan</first><last>Weld</last></author>
+      <author><last>Schoenmackers</last><first>Stefan</first></author>
+      <author id="stephen-soderland"><first>Stephen</first><last>Soderland</last></author>
+      <author id="daniel-s-weld"><first>Dan</first><last>Weld</last></author>
       <author><first>Fei</first><last>Wu</last></author>
       <author><first>Congle</first><last>Zhang</last></author>
       <pages>87–95</pages>
@@ -1188,7 +1188,7 @@
     <paper id="12">
       <title>Analogical Dialogue Acts: Supporting Learning by Reading Analogies</title>
       <author><first>David</first><last>Barbella</last></author>
-      <author><first>Kenneth</first><last>Forbus</last></author>
+      <author id="kenneth-forbus"><first>Kenneth</first><last>Forbus</last></author>
       <pages>96–104</pages>
       <url hash="22685963">W10-0912</url>
       <bibkey>barbella-forbus-2010-analogical</bibkey>
@@ -1205,8 +1205,8 @@
       <title>Supporting rule-based representations with corpus-derived lexical information.</title>
       <author><first>Annie</first><last>Zaenen</last></author>
       <author><first>Cleo</first><last>Condoravdi</last></author>
-      <author><first>Daniel</first><last>Bobrow</last></author>
-      <author><first>Raphael</first><last>Hoffmann</last></author>
+      <author id="daniel-bobrow"><first>Daniel</first><last>Bobrow</last></author>
+      <author id="raphael-hoffmann"><first>Raphael</first><last>Hoffmann</last></author>
       <pages>114–121</pages>
       <url hash="0fa3ce26">W10-0914</url>
       <bibkey>zaenen-etal-2010-supporting</bibkey>
@@ -1226,7 +1226,7 @@
     <meta>
       <booktitle>Proceedings of the <fixed-case>NAACL</fixed-case> <fixed-case>HLT</fixed-case> 2010 Fifth Workshop on Innovative Use of <fixed-case>NLP</fixed-case> for Building Educational Applications</booktitle>
       <url hash="875cadac">W10-10</url>
-      <editor><first>Joel</first><last>Tetreault</last></editor>
+      <editor id="joel-tetreault"><first>Joel</first><last>Tetreault</last></editor>
       <editor><first>Jill</first><last>Burstein</last></editor>
       <editor><first>Claudia</first><last>Leacock</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -1241,17 +1241,17 @@
     </frontmatter>
     <paper id="1">
       <title>Readability Assessment for Text Simplification</title>
-      <author><first>Sandra</first><last>Aluisio</last></author>
+      <author id="sandra-aluisio"><first>Sandra</first><last>Aluisio</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <author><first>Caroline</first><last>Gasperin</last></author>
-      <author><first>Carolina</first><last>Scarton</last></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last></author>
       <pages>1–9</pages>
       <url hash="7f061d2c">W10-1001</url>
       <bibkey>aluisio-etal-2010-readability</bibkey>
     </paper>
     <paper id="2">
       <title>Enhancing Authentic Web Pages for Language Learners</title>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <author><first>Ramon</first><last>Ziai</last></author>
       <author><first>Luiz</first><last>Amaral</last></author>
       <author><first>Adriane</first><last>Boyd</last></author>
@@ -1292,7 +1292,7 @@
       <title>Rethinking Grammatical Error Annotation and Evaluation with the <fixed-case>A</fixed-case>mazon <fixed-case>M</fixed-case>echanical <fixed-case>T</fixed-case>urk</title>
       <author><first>Joel</first><last>Tetreault</last></author>
       <author><first>Elena</first><last>Filatova</last></author>
-      <author><first>Martin</first><last>Chodorow</last></author>
+      <author id="martin-chodorow"><first>Martin</first><last>Chodorow</last></author>
       <pages>45–48</pages>
       <url hash="0eb050cb">W10-1006</url>
       <bibkey>tetreault-etal-2010-rethinking</bibkey>
@@ -1315,12 +1315,12 @@
     </paper>
     <paper id="9">
       <title>Leveraging Hidden Dialogue State to Select Tutorial Moves</title>
-      <author><first>Kristy</first><last>Boyer</last></author>
-      <author><first>Rob</first><last>Phillips</last></author>
-      <author><first>Eun Young</first><last>Ha</last></author>
+      <author id="kristy-boyer"><first>Kristy</first><last>Boyer</last></author>
+      <author id="robert-phillips"><first>Rob</first><last>Phillips</last></author>
+      <author id="eun-young-ha"><first>Eun Young</first><last>Ha</last></author>
       <author><first>Michael</first><last>Wallis</last></author>
       <author><first>Mladen</first><last>Vouk</last></author>
-      <author><first>James</first><last>Lester</last></author>
+      <author id="james-lester"><first>James</first><last>Lester</last></author>
       <pages>66–73</pages>
       <url hash="7c95a6d1">W10-1009</url>
       <bibkey>boyer-etal-2010-leveraging</bibkey>
@@ -1379,7 +1379,7 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>M</fixed-case>ed<fixed-case>E</fixed-case>val- A <fixed-case>S</fixed-case>wedish Medical Test Collection with Doctors and Patients User Groups</title>
-      <author><first>Karin</first><last>Friberg Heppin</last></author>
+      <author id="karin-friberg-heppin"><first>Karin</first><last>Friberg Heppin</last></author>
       <pages>1–7</pages>
       <url hash="b6860809">W10-1101</url>
       <bibkey>friberg-heppin-2010-medeval</bibkey>
@@ -1408,7 +1408,7 @@
       <author><first>Stephanie</first><last>Schreitter</last></author>
       <author><first>Alexandra</first><last>Klein</last></author>
       <author><first>Johannes</first><last>Matiasek</last></author>
-      <author><first>Harald</first><last>Trost</last></author>
+      <author id="harald-trost"><first>Harald</first><last>Trost</last></author>
       <pages>22–28</pages>
       <url hash="960f40a0">W10-1104</url>
       <bibkey>schreitter-etal-2010-using</bibkey>
@@ -1426,7 +1426,7 @@
     </paper>
     <paper id="6">
       <title>Reliability and Type of Consumer Health Documents on the World Wide Web: an Annotation Study</title>
-      <author><first>Melanie</first><last>Martin</last></author>
+      <author id="melanie-martin"><first>Melanie</first><last>Martin</last></author>
       <pages>38–45</pages>
       <url hash="29e6870e">W10-1106</url>
       <bibkey>martin-2010-reliability</bibkey>
@@ -1434,9 +1434,9 @@
     <paper id="7">
       <title>Automated Identification of Synonyms in Biomedical Acronym Sense Inventories</title>
       <author><first>Genevieve B.</first><last>Melton</last></author>
-      <author><first>SungRim</first><last>Moon</last></author>
-      <author><first>Bridget</first><last>McInnes</last></author>
-      <author><first>Serguei</first><last>Pakhomov</last></author>
+      <author id="sungrim-moon"><first>SungRim</first><last>Moon</last></author>
+      <author id="bridget-mcinnes"><first>Bridget</first><last>McInnes</last></author>
+      <author id="serguei-pakhomov"><first>Serguei</first><last>Pakhomov</last></author>
       <pages>46–52</pages>
       <url hash="79f4b466">W10-1107</url>
       <bibkey>melton-etal-2010-automated</bibkey>
@@ -1447,7 +1447,7 @@
       <author><first>Elin</first><last>Carlsson</last></author>
       <author><first>Hercules</first><last>Dalianis</last></author>
       <author><first>Riitta</first><last>Danielsson-Ojala</last></author>
-      <author><first>Vidas</first><last>Daudaravicius</last></author>
+      <author id="vidas-daudaravicius"><first>Vidas</first><last>Daudaravicius</last></author>
       <author><first>Martin</first><last>Hassel</last></author>
       <author><first>Dimitrios</first><last>Kokkinakis</last></author>
       <author><first>Heljä</first><last>Lundgren-Laine</last></author>
@@ -1467,7 +1467,7 @@
       <author><first>Fei</first><last>Xia</last></author>
       <author><first>Imre</first><last>Solti</last></author>
       <author><first>Eithon</first><last>Cadag</last></author>
-      <author><first>Özlem</first><last>Uzuner</last></author>
+      <author id="ozlem-uzuner"><first>Özlem</first><last>Uzuner</last></author>
       <pages>61–67</pages>
       <url hash="47ee2173">W10-1109</url>
       <bibkey>halgrim-etal-2010-extracting</bibkey>
@@ -1475,14 +1475,14 @@
     <paper id="10">
       <title>Linking <fixed-case>S</fixed-case>we<fixed-case>FN</fixed-case>++ with Medical Resources, towards a <fixed-case>M</fixed-case>ed<fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et for <fixed-case>S</fixed-case>wedish</title>
       <author><first>Dimitrios</first><last>Kokkinakis</last></author>
-      <author><first>Maria</first><last>Toporowska Gronostaj</last></author>
+      <author id="maria-toporowska-gronostaj"><first>Maria</first><last>Toporowska Gronostaj</last></author>
       <pages>68–71</pages>
       <url hash="36ae998e">W10-1110</url>
       <bibkey>kokkinakis-toporowska-gronostaj-2010-linking</bibkey>
     </paper>
     <paper id="11">
       <title>Measuring Risk and Information Preservation: Toward New Metrics for De-identification of Clinical Texts</title>
-      <author><first>Lynette</first><last>Hirschman</last></author>
+      <author id="lynette-hirschman"><first>Lynette</first><last>Hirschman</last></author>
       <author><first>John</first><last>Aberdeen</last></author>
       <pages>72–75</pages>
       <url hash="529b319c">W10-1111</url>
@@ -1500,7 +1500,7 @@
     <paper id="13">
       <title>Machine learning and features selection for semi-automatic <fixed-case>ICD</fixed-case>-9-<fixed-case>CM</fixed-case> encoding</title>
       <author><first>Julia</first><last>Medori</last></author>
-      <author><first>Cédrick</first><last>Fairon</last></author>
+      <author id="cedrick-fairon"><first>Cédrick</first><last>Fairon</last></author>
       <pages>84–89</pages>
       <url hash="4dab538a">W10-1113</url>
       <bibkey>medori-fairon-2010-machine</bibkey>
@@ -1508,7 +1508,7 @@
     <paper id="14">
       <title>Extracting Formulaic and Free Text Clinical Research Articles Metadata using Conditional Random Fields</title>
       <author><first>Sein</first><last>Lin</last></author>
-      <author><first>Jun-Ping</first><last>Ng</last></author>
+      <author id="jun-ping-ng"><first>Jun-Ping</first><last>Ng</last></author>
       <author><first>Shreyasee</first><last>Pradhan</last></author>
       <author><first>Jatin</first><last>Shah</last></author>
       <author><first>Ricardo</first><last>Pietrobon</last></author>
@@ -1524,8 +1524,8 @@
       <url hash="faed387d">W10-12</url>
       <editor><first>Donghui</first><last>Feng</last></editor>
       <editor><first>Jamie</first><last>Callan</last></editor>
-      <editor><first>Eduard</first><last>Hovy</last></editor>
-      <editor><first>Marius</first><last>Pasca</last></editor>
+      <editor id="eduard-hovy"><first>Eduard</first><last>Hovy</last></editor>
+      <editor id="marius-pasca"><first>Marius</first><last>Pasca</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Los Angeles, California</address>
       <month>June</month>
@@ -1539,8 +1539,8 @@
     <paper id="1">
       <title><fixed-case>LDA</fixed-case> Based Similarity Modeling for Question Answering</title>
       <author><first>Asli</first><last>Celikyilmaz</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last></author>
-      <author><first>Gokhan</first><last>Tur</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></author>
+      <author id="gokhan-tur"><first>Gokhan</first><last>Tur</last></author>
       <pages>1–9</pages>
       <url hash="9dc38bd1">W10-1201</url>
       <bibkey>celikyilmaz-etal-2010-lda</bibkey>
@@ -1567,7 +1567,7 @@
     <paper id="4">
       <title>A Graph-Based Semi-Supervised Learning for Question Semantic Labeling</title>
       <author><first>Asli</first><last>Celikyilmaz</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></author>
       <pages>27–35</pages>
       <url hash="81853e2c">W10-1204</url>
       <bibkey>celikyilmaz-hakkani-tur-2010-graph</bibkey>
@@ -1575,7 +1575,7 @@
     <paper id="5">
       <title>Capturing the Stars: Predicting Ratings for Service and Product Reviews</title>
       <author><first>Narendra</first><last>Gupta</last></author>
-      <author><first>Giuseppe</first><last>Di Fabbrizio</last></author>
+      <author id="giuseppe-di-fabbrizio"><first>Giuseppe</first><last>Di Fabbrizio</last></author>
       <author><first>Patrick</first><last>Haffner</last></author>
       <pages>36–43</pages>
       <url hash="6a01ff29">W10-1205</url>
@@ -1584,7 +1584,7 @@
     <paper id="6">
       <title>Object Search: Supporting Structured Queries in Web Search Engines</title>
       <author><first>Kim</first><last>Pham</last></author>
-      <author><first>Nicholas</first><last>Rizzolo</last></author>
+      <author id="nick-rizzolo"><first>Nicholas</first><last>Rizzolo</last></author>
       <author><first>Kevin</first><last>Small</last></author>
       <author><first>Kevin Chen-Chuan</first><last>Chang</last></author>
       <author><first>Dan</first><last>Roth</last></author>
@@ -1598,7 +1598,7 @@
       <booktitle>Proceedings of the <fixed-case>NAACL</fixed-case> <fixed-case>HLT</fixed-case> 2010 Workshop on Speech and Language Processing for Assistive Technologies</booktitle>
       <url hash="01acfe5f">W10-13</url>
       <editor><first>Melanie</first><last>Fried-Oken</last></editor>
-      <editor><first>Kathleen F.</first><last>McCoy</last></editor>
+      <editor id="kathleen-f-mccoy"><first>Kathleen F.</first><last>McCoy</last></editor>
       <editor><first>Brian</first><last>Roark</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Los Angeles, California</address>
@@ -1616,7 +1616,7 @@
       <author><first>Joseph</first><last>Reddington</last></author>
       <author><first>Ehud</first><last>Reiter</last></author>
       <author><first>Nava</first><last>Tintarev</last></author>
-      <author><first>Annalu</first><last>Waller</last></author>
+      <author id="annalu-waller"><first>Annalu</first><last>Waller</last></author>
       <pages>1–9</pages>
       <url hash="151f613d">W10-1301</url>
       <bibkey>black-etal-2010-using</bibkey>
@@ -1670,7 +1670,7 @@
     <paper id="7">
       <title>Using Reinforcement Learning to Create Communication Channel Management Strategies for Diverse Users</title>
       <author><first>Rebecca</first><last>Lunsford</last></author>
-      <author><first>Peter A.</first><last>Heeman</last></author>
+      <author id="peter-a-heeman"><first>Peter A.</first><last>Heeman</last></author>
       <pages>53–61</pages>
       <url hash="cf73acb2">W10-1307</url>
       <bibkey>lunsford-heeman-2010-using</bibkey>
@@ -1693,7 +1693,7 @@
     </paper>
     <paper id="10">
       <title>State-Transition Interpolation and <fixed-case>MAP</fixed-case> Adaptation for <fixed-case>HMM</fixed-case>-based Dysarthric Speech Recognition</title>
-      <author><first>Harsh Vardhan</first><last>Sharma</last></author>
+      <author id="harsh-vardhan-sharma"><first>Harsh Vardhan</first><last>Sharma</last></author>
       <author><first>Mark</first><last>Hasegawa-Johnson</last></author>
       <pages>72–79</pages>
       <url hash="efbe5689">W10-1310</url>
@@ -1727,7 +1727,7 @@
     <meta>
       <booktitle>Proceedings of the <fixed-case>NAACL</fixed-case> <fixed-case>HLT</fixed-case> 2010 First Workshop on Statistical Parsing of Morphologically-Rich Languages</booktitle>
       <url hash="10ad226e">W10-14</url>
-      <editor><first>Djame</first><last>Seddah</last></editor>
+      <editor id="djame-seddah"><first>Djame</first><last>Seddah</last></editor>
       <editor><first>Sandra</first><last>Koebler</last></editor>
       <editor><first>Reut</first><last>Tsarfaty</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -1745,9 +1745,9 @@
       <author><first>Reut</first><last>Tsarfaty</last></author>
       <author><first>Djamé</first><last>Seddah</last></author>
       <author><first>Yoav</first><last>Goldberg</last></author>
-      <author><first>Sandra</first><last>Kuebler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kuebler</last></author>
       <author><first>Yannick</first><last>Versley</last></author>
-      <author><first>Marie</first><last>Candito</last></author>
+      <author id="marie-candito"><first>Marie</first><last>Candito</last></author>
       <author><first>Jennifer</first><last>Foster</last></author>
       <author><first>Ines</first><last>Rehbein</last></author>
       <author><first>Lamia</first><last>Tounsi</last></author>
@@ -1759,7 +1759,7 @@
       <title>Improving <fixed-case>A</fixed-case>rabic Dependency Parsing with Lexical and Inflectional Morphological Features</title>
       <author><first>Yuval</first><last>Marton</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>13–21</pages>
       <url hash="927c2ce7">W10-1402</url>
       <bibkey>marton-etal-2010-improving</bibkey>
@@ -1769,7 +1769,7 @@
       <author><first>Bharat Ram</first><last>Ambati</last></author>
       <author><first>Samar</first><last>Husain</last></author>
       <author><first>Sambhav</first><last>Jain</last></author>
-      <author><first>Dipti Misra</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></author>
       <author><first>Rajeev</first><last>Sangal</last></author>
       <pages>22–30</pages>
       <url hash="580ff9db">W10-1403</url>
@@ -1778,7 +1778,7 @@
     <paper id="4">
       <title>Application of Different Techniques to Dependency Parsing of <fixed-case>B</fixed-case>asque</title>
       <author><first>Kepa</first><last>Bengoetxea</last></author>
-      <author><first>Koldo</first><last>Gojenola</last></author>
+      <author id="koldo-gojenola"><first>Koldo</first><last>Gojenola</last></author>
       <pages>31–39</pages>
       <url hash="5a05ba6c">W10-1404</url>
       <bibkey>bengoetxea-gojenola-2010-application</bibkey>
@@ -1786,7 +1786,7 @@
     <paper id="5">
       <title>Modeling Morphosyntactic Agreement in Constituency-Based Parsing of <fixed-case>M</fixed-case>odern <fixed-case>H</fixed-case>ebrew</title>
       <author><first>Reut</first><last>Tsarfaty</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <pages>40–48</pages>
       <url hash="c53e8dfc">W10-1405</url>
       <bibkey>tsarfaty-simaan-2010-modeling</bibkey>
@@ -1812,16 +1812,16 @@
       <author><first>Mohammed</first><last>Attia</last></author>
       <author><first>Jennifer</first><last>Foster</last></author>
       <author><first>Deirdre</first><last>Hogan</last></author>
-      <author><first>Joseph</first><last>Le Roux</last></author>
+      <author id="joseph-le-roux"><first>Joseph</first><last>Le Roux</last></author>
       <author><first>Lamia</first><last>Tounsi</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>67–75</pages>
       <url hash="439c797b">W10-1408</url>
       <bibkey>attia-etal-2010-handling</bibkey>
     </paper>
     <paper id="9">
       <title>Parsing Word Clusters</title>
-      <author><first>Marie</first><last>Candito</last></author>
+      <author id="marie-candito"><first>Marie</first><last>Candito</last></author>
       <author><first>Djamé</first><last>Seddah</last></author>
       <pages>76–84</pages>
       <url hash="b2ffa76c">W10-1409</url>
@@ -1830,10 +1830,10 @@
     <paper id="10">
       <title>Lemmatization and Lexicalized Statistical Parsing of Morphologically-Rich Languages: the Case of <fixed-case>F</fixed-case>rench</title>
       <author><first>Djamé</first><last>Seddah</last></author>
-      <author><first>Grzegorz</first><last>Chrupała</last></author>
-      <author><first>Özlem</first><last>Çetinoğlu</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
-      <author><first>Marie</first><last>Candito</last></author>
+      <author id="grzegorz-chrupala"><first>Grzegorz</first><last>Chrupała</last></author>
+      <author id="ozlem-cetinoglu"><first>Özlem</first><last>Çetinoğlu</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
+      <author id="marie-candito"><first>Marie</first><last>Candito</last></author>
       <pages>85–93</pages>
       <url hash="b8f408b9">W10-1410</url>
       <bibkey>seddah-etal-2010-lemmatization</bibkey>
@@ -1875,7 +1875,7 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>N</fixed-case>o<fixed-case>W</fixed-case>a<fixed-case>C</fixed-case>: a large web-based corpus for <fixed-case>N</fixed-case>orwegian</title>
-      <author><first>Emiliano Raul</first><last>Guevara</last></author>
+      <author id="emiliano-raul-guevara"><first>Emiliano Raul</first><last>Guevara</last></author>
       <pages>1–7</pages>
       <url hash="b9005206">W10-1501</url>
       <bibkey>guevara-2010-nowac</bibkey>
@@ -1893,7 +1893,7 @@
       <title>Sketching Techniques for Large Scale <fixed-case>NLP</fixed-case></title>
       <author><first>Amit</first><last>Goyal</last></author>
       <author><first>Jagadeesh</first><last>Jagarlamudi</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <author><first>Suresh</first><last>Venkatasubramanian</last></author>
       <pages>17–25</pages>
       <url hash="0d2e6fb7">W10-1503</url>
@@ -1934,15 +1934,15 @@
       <title>Computational Linguistics in <fixed-case>B</fixed-case>razil: An Overview</title>
       <author><first>Thiago</first><last>Pardo</last></author>
       <author><first>Caroline</first><last>Gasperin</last></author>
-      <author><first>Helena</first><last>de Medeiros Caseli</last></author>
-      <author><first>Maria das Graças</first><last>Nunes</last></author>
+      <author id="helena-de-medeiros-caseli"><first>Helena</first><last>de Medeiros Caseli</last></author>
+      <author id="maria-das-gracas-volpe-nunes"><first>Maria das Graças</first><last>Nunes</last></author>
       <pages>1–7</pages>
       <url hash="d36e9ae3">W10-1601</url>
       <bibkey>pardo-etal-2010-computational</bibkey>
     </paper>
     <paper id="2">
       <title>Data-driven computational linguistics at <fixed-case>F</fixed-case>a<fixed-case>MAF</fixed-case>-<fixed-case>UNC</fixed-case>, <fixed-case>A</fixed-case>rgentina</title>
-      <author><first>Laura</first><last>Alonso Alemany</last></author>
+      <author id="laura-alonso-alemany"><first>Laura</first><last>Alonso Alemany</last></author>
       <author><first>Gabriel</first><last>Infante-Lopez</last></author>
       <pages>8–14</pages>
       <url hash="3153a18e">W10-1602</url>
@@ -1950,7 +1950,7 @@
     </paper>
     <paper id="3">
       <title>Variable-Length <fixed-case>M</fixed-case>arkov Models and Ambiguous Words in <fixed-case>P</fixed-case>ortuguese</title>
-      <author><first>Fabio Natanael</first><last>Kepler</last></author>
+      <author id="fabio-kepler"><first>Fabio Natanael</first><last>Kepler</last></author>
       <author><first>Marcelo</first><last>Finger</last></author>
       <pages>15–23</pages>
       <url hash="a3495143">W10-1603</url>
@@ -1958,7 +1958,7 @@
     </paper>
     <paper id="4">
       <title>Using Common Sense to generate culturally contextualized Machine Translation</title>
-      <author><first>Helena</first><last>de Medeiros Caseli</last></author>
+      <author id="helena-de-medeiros-caseli"><first>Helena</first><last>de Medeiros Caseli</last></author>
       <author><first>Bruno Akio</first><last>Sugiyama</last></author>
       <author><first>Junia Coutinho</first><last>Anacleto</last></author>
       <pages>24–31</pages>
@@ -1984,7 +1984,7 @@
     </paper>
     <paper id="7">
       <title>Fostering Digital Inclusion and Accessibility: The <fixed-case>P</fixed-case>or<fixed-case>S</fixed-case>imples project for Simplification of <fixed-case>P</fixed-case>ortuguese Texts</title>
-      <author><first>Sandra</first><last>Aluísio</last></author>
+      <author id="sandra-aluisio"><first>Sandra</first><last>Aluísio</last></author>
       <author><first>Caroline</first><last>Gasperin</last></author>
       <pages>46–53</pages>
       <url hash="a05a672c">W10-1607</url>
@@ -2001,7 +2001,7 @@
     </paper>
     <paper id="9">
       <title>A Machine Learning Approach for Recognizing Textual Entailment in <fixed-case>S</fixed-case>panish</title>
-      <author><first>Julio</first><last>Castillo</last></author>
+      <author id="julio-castillo"><first>Julio</first><last>Castillo</last></author>
       <pages>62–67</pages>
       <url hash="5c435317">W10-1609</url>
       <bibkey>castillo-2010-machine</bibkey>
@@ -2009,31 +2009,31 @@
     <paper id="10">
       <title>The emergence of the modern concept of introspection: a quantitative linguistic analysis</title>
       <author><first>Iván</first><last>Raskovsky</last></author>
-      <author><first>Diego</first><last>Fernández Slezak</last></author>
+      <author id="diego-fernandez-slezak"><first>Diego</first><last>Fernández Slezak</last></author>
       <author><first>Carlos</first><last>Diuk</last></author>
-      <author><first>Guillermo A.</first><last>Cecchi</last></author>
+      <author id="guillermo-a-cecchi"><first>Guillermo A.</first><last>Cecchi</last></author>
       <pages>68–75</pages>
       <url hash="01ab779d">W10-1610</url>
       <bibkey>raskovsky-etal-2010-emergence</bibkey>
     </paper>
     <paper id="11">
       <title>Combining <fixed-case>CBIR</fixed-case> and <fixed-case>NLP</fixed-case> for Multilingual Terminology Alignment and Cross-Language Image Indexing</title>
-      <author><first>Diego</first><last>Burgos</last></author>
+      <author id="diego-a-burgos"><first>Diego</first><last>Burgos</last></author>
       <pages>76–83</pages>
       <url hash="745b42a8">W10-1611</url>
       <bibkey>burgos-2010-combining</bibkey>
     </paper>
     <paper id="12">
       <title><fixed-case>IRAS</fixed-case>ubcat, a highly parametrizable, language independent tool for the acquisition of verbal subcategorization information from corpus</title>
-      <author><first>Ivana Romina</first><last>Altamirano</last></author>
-      <author><first>Laura</first><last>Alonso Alemany</last></author>
+      <author id="romina-altamirano"><first>Ivana Romina</first><last>Altamirano</last></author>
+      <author id="laura-alonso-alemany"><first>Laura</first><last>Alonso Alemany</last></author>
       <pages>84–91</pages>
       <url hash="1fb4a157">W10-1612</url>
       <bibkey>altamirano-alonso-alemany-2010-irasubcat</bibkey>
     </paper>
     <paper id="13">
       <title>The <fixed-case>T</fixed-case>ermi<fixed-case>N</fixed-case>et Project: an Overview</title>
-      <author><first>Ariani</first><last>Di Felippo</last></author>
+      <author id="ariani-di-felippo"><first>Ariani</first><last>Di Felippo</last></author>
       <pages>92–99</pages>
       <url hash="5baf7c08">W10-1613</url>
       <bibkey>di-felippo-2010-terminet</bibkey>
@@ -2050,14 +2050,14 @@
       <title>Recognition and extraction of definitional contexts in <fixed-case>S</fixed-case>panish for sketching a lexical network</title>
       <author><first>César</first><last>Aguilar</last></author>
       <author><first>Olga</first><last>Acosta</last></author>
-      <author><first>Gerardo</first><last>Sierra</last></author>
+      <author id="gerardo-sierra"><first>Gerardo</first><last>Sierra</last></author>
       <pages>109–116</pages>
       <url hash="ab4162dc">W10-1615</url>
       <bibkey>aguilar-etal-2010-recognition</bibkey>
     </paper>
     <paper id="16">
       <title>Computational Linguistics for helping Requirements Elicitation: a dream about Automated Software Development</title>
-      <author><first>Carlos Mario</first><last>Zapata Jaramillo</last></author>
+      <author id="carlos-mario-zapata-jaramillo"><first>Carlos Mario</first><last>Zapata Jaramillo</last></author>
       <pages>117–124</pages>
       <url hash="b30dcac2">W10-1616</url>
       <bibkey>zapata-jaramillo-2010-computational</bibkey>
@@ -2065,8 +2065,8 @@
     <paper id="17">
       <title>Text Generation for <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese: the Surface Realization Task</title>
       <author><first>Eder</first><last>Novais</last></author>
-      <author><first>Thiago</first><last>Tadeu</last></author>
-      <author><first>Ivandré</first><last>Paraboni</last></author>
+      <author id="thiago-d-tadeu"><first>Thiago</first><last>Tadeu</last></author>
+      <author id="ivandre-paraboni"><first>Ivandré</first><last>Paraboni</last></author>
       <pages>125–131</pages>
       <url hash="8f437412">W10-1617</url>
       <bibkey>novais-etal-2010-text</bibkey>
@@ -2089,7 +2089,7 @@
       <editor><first>Philipp</first><last>Koehn</last></editor>
       <editor><first>Christof</first><last>Monz</last></editor>
       <editor><first>Kay</first><last>Peterson</last></editor>
-      <editor><first>Omar</first><last>Zaidan</last></editor>
+      <editor id="omar-zaidan"><first>Omar</first><last>Zaidan</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Uppsala, Sweden</address>
       <month>July</month>
@@ -2104,7 +2104,7 @@
       <title>A Semi-Supervised Word Alignment Algorithm with Partial Manual Alignments</title>
       <author><first>Qin</first><last>Gao</last></author>
       <author><first>Nguyen</first><last>Bach</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>1–10</pages>
       <url hash="a0974f20">W10-1701</url>
       <bibkey>gao-etal-2010-semi</bibkey>
@@ -2124,7 +2124,7 @@
       <author><first>Philipp</first><last>Koehn</last></author>
       <author><first>Christof</first><last>Monz</last></author>
       <author><first>Kay</first><last>Peterson</last></author>
-      <author><first>Mark</first><last>Przybocki</last></author>
+      <author id="mark-przybocki"><first>Mark</first><last>Przybocki</last></author>
       <author><first>Omar</first><last>Zaidan</last></author>
       <pages>17–53</pages>
       <url hash="7c268b95">W10-1703</url>
@@ -2133,8 +2133,8 @@
     <paper id="4">
       <title><fixed-case>LIMSI</fixed-case>’s Statistical Translation Systems for <fixed-case>WMT</fixed-case>’10</title>
       <author><first>Alexandre</first><last>Allauzen</last></author>
-      <author><first>Josep M.</first><last>Crego</last></author>
-      <author><first>İlknur</first><last>Durgar El-Kahlout</last></author>
+      <author id="josep-m-crego"><first>Josep M.</first><last>Crego</last></author>
+      <author id="ilknur-durgar-el-kahlout"><first>İlknur</first><last>Durgar El-Kahlout</last></author>
       <author><first>François</first><last>Yvon</last></author>
       <pages>54–59</pages>
       <url hash="1eed1d60">W10-1704</url>
@@ -2142,7 +2142,7 @@
     </paper>
     <paper id="5">
       <title>2010 Failures in <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>zech Phrase-Based <fixed-case>MT</fixed-case></title>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <author><first>Kamil</first><last>Kos</last></author>
       <pages>60–66</pages>
       <url hash="d714f2d9">W10-1705</url>
@@ -2152,7 +2152,7 @@
       <title>An Empirical Study on Development Set Selection Strategy for Machine Translation Learning</title>
       <author><first>Cong</first><last>Hui</last></author>
       <author><first>Hai</first><last>Zhao</last></author>
-      <author><first>Bao-Liang</first><last>Lu</last></author>
+      <author id="bao-liang-lu"><first>Bao-Liang</first><last>Lu</last></author>
       <author><first>Yan</first><last>Song</last></author>
       <pages>67–71</pages>
       <url hash="963c42f2">W10-1706</url>
@@ -2161,7 +2161,7 @@
     <paper id="7">
       <title>The <fixed-case>U</fixed-case>niversity of <fixed-case>M</fixed-case>aryland Statistical Machine Translation System for the Fifth Workshop on Machine Translation</title>
       <author><first>Vladimir</first><last>Eidelman</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <author><first>Philip</first><last>Resnik</last></author>
       <pages>72–76</pages>
       <url hash="93a8b76f">W10-1707</url>
@@ -2170,7 +2170,7 @@
     <paper id="8">
       <title>Further Experiments with Shallow Hybrid <fixed-case>MT</fixed-case> Systems</title>
       <author><first>Christian</first><last>Federmann</last></author>
-      <author><first>Andreas</first><last>Eisele</last></author>
+      <author id="andreas-eisele"><first>Andreas</first><last>Eisele</last></author>
       <author><first>Yu</first><last>Chen</last></author>
       <author><first>Sabine</first><last>Hunsicker</last></author>
       <author><first>Jia</first><last>Xu</last></author>
@@ -2182,8 +2182,8 @@
     <paper id="9">
       <title>Improved Features and Grammar Selection for Syntax-Based <fixed-case>MT</fixed-case></title>
       <author><first>Greg</first><last>Hanneman</last></author>
-      <author><first>Jonathan</first><last>Clark</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="jonathan-h-clark"><first>Jonathan</first><last>Clark</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <pages>82–87</pages>
       <url hash="3c649f5b">W10-1709</url>
       <bibkey>hanneman-etal-2010-improved</bibkey>
@@ -2205,18 +2205,18 @@
       <author><first>Gregor</first><last>Leusch</last></author>
       <author><first>Saab</first><last>Mansour</last></author>
       <author><first>Daniel</first><last>Stein</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>93–97</pages>
       <url hash="5b0df812">W10-1711</url>
       <bibkey>heger-etal-2010-rwth</bibkey>
     </paper>
     <paper id="12">
       <title>Using Collocation Segmentation to Augment the Phrase Table</title>
-      <author><first>Carlos A.</first><last>Henríquez Q.</last></author>
-      <author><first>Marta</first><last>Ruiz Costa-jussà</last></author>
-      <author><first>Vidas</first><last>Daudaravicius</last></author>
-      <author><first>Rafael E.</first><last>Banchs</last></author>
-      <author><first>José B.</first><last>Mariño</last></author>
+      <author id="carlos-henriquez"><first>Carlos A.</first><last>Henríquez Q.</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta</first><last>Ruiz Costa-jussà</last></author>
+      <author id="vidas-daudaravicius"><first>Vidas</first><last>Daudaravicius</last></author>
+      <author id="rafael-e-banchs"><first>Rafael E.</first><last>Banchs</last></author>
+      <author id="jose-b-marino"><first>José B.</first><last>Mariño</last></author>
       <pages>98–102</pages>
       <url hash="3555a76d">W10-1712</url>
       <bibkey>henriquez-q-etal-2010-using</bibkey>
@@ -2226,7 +2226,7 @@
       <author><first>Stéphane</first><last>Huet</last></author>
       <author><first>Julien</first><last>Bourdaillet</last></author>
       <author><first>Alexandre</first><last>Patry</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <pages>103–109</pages>
       <url hash="ac69864e">W10-1713</url>
       <bibkey>huet-etal-2010-rali</bibkey>
@@ -2235,7 +2235,7 @@
       <title>Exodus - Exploring <fixed-case>SMT</fixed-case> for <fixed-case>EU</fixed-case> Institutions</title>
       <author><first>Michael</first><last>Jellinghaus</last></author>
       <author><first>Alexandros</first><last>Poulis</last></author>
-      <author><first>David</first><last>Kolovratník</last></author>
+      <author id="david-kolovratnik"><first>David</first><last>Kolovratník</last></author>
       <pages>110–114</pages>
       <url hash="cd12b28c">W10-1714</url>
       <bibkey>jellinghaus-etal-2010-exodus</bibkey>
@@ -2276,10 +2276,10 @@
       <title><fixed-case>J</fixed-case>oshua 2.0: A Toolkit for Parsing-Based Machine Translation with Syntax, Semirings, Discriminative Training and Other Goodies</title>
       <author><first>Zhifei</first><last>Li</last></author>
       <author><first>Chris</first><last>Callison-Burch</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <author><first>Juri</first><last>Ganitkevitch</last></author>
       <author><first>Ann</first><last>Irvine</last></author>
-      <author><first>Sanjeev</first><last>Khudanpur</last></author>
+      <author id="sanjeev-khudanpur"><first>Sanjeev</first><last>Khudanpur</last></author>
       <author><first>Lane</first><last>Schwartz</last></author>
       <author><first>Wren</first><last>Thornton</last></author>
       <author><first>Ziyuan</first><last>Wang</last></author>
@@ -2294,7 +2294,7 @@
       <author><first>Jan</first><last>Niehues</last></author>
       <author><first>Teresa</first><last>Herrmann</last></author>
       <author><first>Mohammed</first><last>Mediani</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>138–142</pages>
       <url hash="b198cd2c">W10-1719</url>
       <bibkey>niehues-etal-2010-karlsruhe</bibkey>
@@ -2303,13 +2303,13 @@
       <title><fixed-case>MATREX</fixed-case>: The <fixed-case>DCU</fixed-case> <fixed-case>MT</fixed-case> System for <fixed-case>WMT</fixed-case> 2010</title>
       <author><first>Sergio</first><last>Penkale</last></author>
       <author><first>Rejwanul</first><last>Haque</last></author>
-      <author><first>Sandipan</first><last>Dandapat</last></author>
+      <author id="sandipan-dandapat"><first>Sandipan</first><last>Dandapat</last></author>
       <author><first>Pratyush</first><last>Banerjee</last></author>
-      <author><first>Ankit K.</first><last>Srivastava</last></author>
+      <author id="ankit-srivastava"><first>Ankit K.</first><last>Srivastava</last></author>
       <author><first>Jinhua</first><last>Du</last></author>
       <author><first>Pavel</first><last>Pecina</last></author>
-      <author><first>Sudip Kumar</first><last>Naskar</last></author>
-      <author><first>Mikel L.</first><last>Forcada</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last></author>
+      <author id="mikel-l-forcada"><first>Mikel L.</first><last>Forcada</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <pages>143–148</pages>
       <url hash="bd296ca4">W10-1720</url>
@@ -2326,7 +2326,7 @@
       <title>The <fixed-case>CUED</fixed-case> <fixed-case>H</fixed-case>i<fixed-case>FST</fixed-case> System for the <fixed-case>WMT</fixed-case>10 Translation Shared Task</title>
       <author><first>Juan</first><last>Pino</last></author>
       <author><first>Gonzalo</first><last>Iglesias</last></author>
-      <author><first>Adrià</first><last>de Gispert</last></author>
+      <author id="adria-de-gispert"><first>Adrià</first><last>de Gispert</last></author>
       <author><first>Graeme</first><last>Blackwood</last></author>
       <author><first>Jamie</first><last>Brunning</last></author>
       <author id="bill-byrne"><first>William</first><last>Byrne</last></author>
@@ -2337,8 +2337,8 @@
     <paper id="23">
       <title>The <fixed-case>LIG</fixed-case> Machine Translation System for <fixed-case>WMT</fixed-case> 2010</title>
       <author><first>Marion</first><last>Potet</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
-      <author><first>Hervé</first><last>Blanchon</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
+      <author id="herve-blanchon"><first>Hervé</first><last>Blanchon</last></author>
       <pages>161–166</pages>
       <url hash="7e19cbcd">W10-1723</url>
       <bibkey>potet-etal-2010-lig</bibkey>
@@ -2354,14 +2354,14 @@
     </paper>
     <paper id="25">
       <title><fixed-case>UPV</fixed-case>-<fixed-case>PRHLT</fixed-case> <fixed-case>E</fixed-case>nglish–<fixed-case>S</fixed-case>panish System for <fixed-case>WMT</fixed-case>10</title>
-      <author><first>Germán</first><last>Sanchis-Trilles</last></author>
+      <author id="german-sanchis-trilles"><first>Germán</first><last>Sanchis-Trilles</last></author>
       <author><first>Jesús</first><last>Andrés-Ferrer</last></author>
       <author><first>Guillem</first><last>Gascó</last></author>
-      <author><first>Jesús</first><last>González-Rubio</last></author>
+      <author id="jesus-gonzalez-rubio"><first>Jesús</first><last>González-Rubio</last></author>
       <author><first>Pascual</first><last>Martínez-Gómez</last></author>
-      <author><first>Martha-Alicia</first><last>Rocha</last></author>
-      <author><first>Joan-Andreu</first><last>Sánchez</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="martha-alicia-rocha"><first>Martha-Alicia</first><last>Rocha</last></author>
+      <author id="joan-andreu-sanchez"><first>Joan-Andreu</first><last>Sánchez</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <pages>172–176</pages>
       <url hash="30c5a2a2">W10-1725</url>
       <bibkey>sanchis-trilles-etal-2010-upv</bibkey>
@@ -2386,7 +2386,7 @@
     </paper>
     <paper id="28">
       <title>To Cache or Not To Cache? Experiments with Adaptive Models in Statistical Machine Translation</title>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>189–194</pages>
       <url hash="8ce22fbb">W10-1728</url>
       <bibkey>tiedemann-2010-cache</bibkey>
@@ -2394,8 +2394,8 @@
     <paper id="29">
       <title>Applying Morphological Decompositions to Statistical Machine Translation</title>
       <author><first>Sami</first><last>Virpioja</last></author>
-      <author><first>Jaakko</first><last>Väyrynen</last></author>
-      <author><first>André</first><last>Mansikkaniemi</last></author>
+      <author id="jaakko-vayrynen"><first>Jaakko</first><last>Väyrynen</last></author>
+      <author id="andre-mansikkaniemi"><first>André</first><last>Mansikkaniemi</last></author>
       <author><first>Mikko</first><last>Kurimo</last></author>
       <pages>195–200</pages>
       <url hash="2f88d6a4">W10-1729</url>
@@ -2403,7 +2403,7 @@
     </paper>
     <paper id="30">
       <title>Maximum Entropy Translation Model in Dependency-Based <fixed-case>MT</fixed-case> Framework</title>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
       <author><first>Martin</first><last>Popel</last></author>
       <author><first>David</first><last>Mareček</last></author>
       <pages>201–206</pages>
@@ -2413,21 +2413,21 @@
     <paper id="31">
       <title><fixed-case>UCH</fixed-case>-<fixed-case>UPV</fixed-case> <fixed-case>E</fixed-case>nglish–<fixed-case>S</fixed-case>panish System for <fixed-case>WMT</fixed-case>10</title>
       <author><first>Francisco</first><last>Zamora-Martínez</last></author>
-      <author><first>Germán</first><last>Sanchis-Trilles</last></author>
+      <author id="german-sanchis-trilles"><first>Germán</first><last>Sanchis-Trilles</last></author>
       <pages>207–211</pages>
       <url hash="94788450">W10-1731</url>
       <bibkey>zamora-martinez-sanchis-trilles-2010-uch</bibkey>
     </paper>
     <paper id="32">
       <title>Hierarchical Phrase-Based <fixed-case>MT</fixed-case> at the <fixed-case>C</fixed-case>harles <fixed-case>U</fixed-case>niversity for the <fixed-case>WMT</fixed-case> 2010 Shared Task</title>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <pages>212–215</pages>
       <url hash="a075a505">W10-1732</url>
       <bibkey>zeman-2010-hierarchical</bibkey>
     </paper>
     <paper id="33">
       <title>Incremental Decoding for Phrase-Based Statistical Machine Translation</title>
-      <author><first>Baskaran</first><last>Sankaran</last></author>
+      <author id="baskaran-sankaran"><first>Baskaran</first><last>Sankaran</last></author>
       <author><first>Ajeet</first><last>Grewal</last></author>
       <author><first>Anoop</first><last>Sarkar</last></author>
       <pages>216–223</pages>
@@ -2437,7 +2437,7 @@
     <paper id="34">
       <title>How to Avoid Burning Ducks: Combining Linguistic Analysis and Corpus Statistics for <fixed-case>G</fixed-case>erman Compound Processing</title>
       <author><first>Fabienne</first><last>Fritzinger</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>224–234</pages>
       <url hash="e6785813">W10-1734</url>
       <bibkey>fritzinger-fraser-2010-avoid</bibkey>
@@ -2473,7 +2473,7 @@
       <author><first>David</first><last>Vilar</last></author>
       <author><first>Daniel</first><last>Stein</last></author>
       <author><first>Matthias</first><last>Huck</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>262–270</pages>
       <url hash="17678394">W10-1738</url>
       <bibkey>vilar-etal-2010-jane</bibkey>
@@ -2487,7 +2487,7 @@
     </paper>
     <paper id="40">
       <title>Adaptive Model Weighting and Transductive Regression for Predicting Best System Combinations</title>
-      <author><first>Ergun</first><last>Biçici</last></author>
+      <author id="ergun-bicici"><first>Ergun</first><last>Biçici</last></author>
       <author><first>S. Serdar</first><last>Kozat</last></author>
       <pages>276–281</pages>
       <url hash="82a7e900">W10-1740</url>
@@ -2495,7 +2495,7 @@
     </paper>
     <paper id="41">
       <title><fixed-case>L</fixed-case>1 Regularized Regression for Reranking and System Combination in Machine Translation</title>
-      <author><first>Ergun</first><last>Biçici</last></author>
+      <author id="ergun-bicici"><first>Ergun</first><last>Biçici</last></author>
       <author><first>Deniz</first><last>Yuret</last></author>
       <pages>282–289</pages>
       <url hash="43f120a8">W10-1741</url>
@@ -2512,14 +2512,14 @@
     </paper>
     <paper id="43">
       <title>The <fixed-case>UPV</fixed-case>-<fixed-case>PRHLT</fixed-case> Combination System for <fixed-case>WMT</fixed-case> 2010</title>
-      <author><first>Jesús</first><last>González-Rubio</last></author>
-      <author><first>Germán</first><last>Sanchis-Trilles</last></author>
-      <author><first>Joan-Andreu</first><last>Sánchez</last></author>
+      <author id="jesus-gonzalez-rubio"><first>Jesús</first><last>González-Rubio</last></author>
+      <author id="german-sanchis-trilles"><first>Germán</first><last>Sanchis-Trilles</last></author>
+      <author id="joan-andreu-sanchez"><first>Joan-Andreu</first><last>Sánchez</last></author>
       <author><first>Jesús</first><last>Andrés-Ferrer</last></author>
       <author><first>Guillem</first><last>Gascó</last></author>
       <author><first>Pascual</first><last>Martínez-Gómez</last></author>
-      <author><first>Martha-Alicia</first><last>Rocha</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="martha-alicia-rocha"><first>Martha-Alicia</first><last>Rocha</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <pages>296–300</pages>
       <url hash="6e72b483">W10-1743</url>
       <bibkey>gonzalez-rubio-etal-2010-upv</bibkey>
@@ -2527,15 +2527,15 @@
     <paper id="44">
       <title><fixed-case>CMU</fixed-case> Multi-Engine Machine Translation for <fixed-case>WMT</fixed-case> 2010</title>
       <author><first>Kenneth</first><last>Heafield</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <pages>301–306</pages>
       <url hash="34a9cb1c">W10-1744</url>
       <bibkey>heafield-lavie-2010-cmu</bibkey>
     </paper>
     <paper id="45">
       <title><fixed-case>CMU</fixed-case> System Combination via Hypothesis Selection for <fixed-case>WMT</fixed-case>’10</title>
-      <author><first>Almut Silja</first><last>Hildebrand</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="almut-silja-hildebrand"><first>Almut Silja</first><last>Hildebrand</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>307–310</pages>
       <url hash="1301cfa1">W10-1745</url>
       <bibkey>hildebrand-vogel-2010-cmu</bibkey>
@@ -2550,17 +2550,17 @@
     <paper id="47">
       <title>The <fixed-case>RWTH</fixed-case> System Combination System for <fixed-case>WMT</fixed-case> 2010</title>
       <author><first>Gregor</first><last>Leusch</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>315–320</pages>
       <url hash="03771fb8">W10-1747</url>
       <bibkey>leusch-ney-2010-rwth</bibkey>
     </paper>
     <paper id="48">
       <title><fixed-case>BBN</fixed-case> System Description for <fixed-case>WMT</fixed-case>10 System Combination Task</title>
-      <author><first>Antti-Veikko</first><last>Rosti</last></author>
+      <author id="antti-veikko-rosti"><first>Antti-Veikko</first><last>Rosti</last></author>
       <author><first>Bing</first><last>Zhang</last></author>
       <author><first>Spyros</first><last>Matsoukas</last></author>
-      <author><first>Richard</first><last>Schwartz</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
       <pages>321–326</pages>
       <url hash="62955155">W10-1748</url>
       <bibkey>rosti-etal-2010-bbn</bibkey>
@@ -2575,10 +2575,10 @@
     </paper>
     <paper id="50">
       <title>Document-Level Automatic <fixed-case>MT</fixed-case> Evaluation based on Discourse Representations</title>
-      <author><first>Elisabet</first><last>Comelles</last></author>
-      <author><first>Jesús</first><last>Giménez</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
-      <author><first>Irene</first><last>Castellón</last></author>
+      <author id="elisabet-comelles"><first>Elisabet</first><last>Comelles</last></author>
+      <author id="jesus-gimenez"><first>Jesús</first><last>Giménez</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
+      <author id="irene-castellon"><first>Irene</first><last>Castellón</last></author>
       <author><first>Victoria</first><last>Arranz</last></author>
       <pages>333–338</pages>
       <url hash="39bf8467">W10-1750</url>
@@ -2587,7 +2587,7 @@
     <paper id="51">
       <title><fixed-case>METEOR</fixed-case>-<fixed-case>NEXT</fixed-case> and the <fixed-case>METEOR</fixed-case> Paraphrase Tables: Improved Evaluation Support for Five Target Languages</title>
       <author><first>Michael</first><last>Denkowski</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <pages>339–342</pages>
       <url hash="1b28762e">W10-1751</url>
       <bibkey>denkowski-lavie-2010-meteor</bibkey>
@@ -2596,7 +2596,7 @@
       <title>Normalized Compression Distance Based Measures for <fixed-case>M</fixed-case>etrics<fixed-case>MATR</fixed-case> 2010</title>
       <author><first>Marcus</first><last>Dobrinkat</last></author>
       <author><first>Tero</first><last>Tapiovaara</last></author>
-      <author><first>Jaakko</first><last>Väyrynen</last></author>
+      <author id="jaakko-vayrynen"><first>Jaakko</first><last>Väyrynen</last></author>
       <author><first>Kimmo</first><last>Kettunen</last></author>
       <pages>343–348</pages>
       <url hash="55244286">W10-1752</url>
@@ -2607,7 +2607,7 @@
       <author><first>Yifan</first><last>He</last></author>
       <author><first>Jinhua</first><last>Du</last></author>
       <author><first>Andy</first><last>Way</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>349–353</pages>
       <url hash="a292fd3c">W10-1753</url>
       <bibkey>he-etal-2010-dcu</bibkey>
@@ -2624,7 +2624,7 @@
     <paper id="55">
       <title>The Parameter-Optimized <fixed-case>ATEC</fixed-case> Metric for <fixed-case>MT</fixed-case> Evaluation</title>
       <author><first>Billy</first><last>Wong</last></author>
-      <author><first>Chunyu</first><last>Kit</last></author>
+      <author id="chunyu-kit"><first>Chunyu</first><last>Kit</last></author>
       <pages>360–364</pages>
       <url hash="0e8e6d5a">W10-1755</url>
       <bibkey>wong-kit-2010-parameter</bibkey>
@@ -2651,7 +2651,7 @@
     </paper>
     <paper id="58">
       <title>Taming Structured Perceptrons on Wild Feature Vectors</title>
-      <author><first>Ralf</first><last>Brown</last></author>
+      <author id="ralf-d-brown"><first>Ralf</first><last>Brown</last></author>
       <pages>384–391</pages>
       <url hash="37cea40d">W10-1758</url>
       <bibkey>brown-2010-taming</bibkey>
@@ -2667,9 +2667,9 @@
     </paper>
     <paper id="60">
       <title>Integration of Multiple Bilingually-Learned Segmentation Schemes into Statistical Machine Translation</title>
-      <author><first>Michael</first><last>Paul</last></author>
+      <author id="michael-paul"><first>Michael</first><last>Paul</last></author>
       <author><first>Andrew</first><last>Finch</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>400–408</pages>
       <url hash="cd9a5db5">W10-1760</url>
       <bibkey>paul-etal-2010-integration</bibkey>
@@ -2697,8 +2697,8 @@
       <title>Decision Trees for Lexical Smoothing in Statistical Machine Translation</title>
       <author><first>Rabih</first><last>Zbib</last></author>
       <author><first>Spyros</first><last>Matsoukas</last></author>
-      <author><first>Richard</first><last>Schwartz</last></author>
-      <author><first>John</first><last>Makhoul</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
+      <author id="john-makhoul"><first>John</first><last>Makhoul</last></author>
       <pages>428–437</pages>
       <url hash="b0c50d0d">W10-1763</url>
       <bibkey>zbib-etal-2010-decision</bibkey>
@@ -2709,7 +2709,7 @@
       <booktitle>Proceedings of the Fourth Linguistic Annotation Workshop</booktitle>
       <url hash="2e8d35bb">W10-18</url>
       <editor><first>Nianwen</first><last>Xue</last></editor>
-      <editor><first>Massimo</first><last>Poesio</last></editor>
+      <editor id="massimo-poesio"><first>Massimo</first><last>Poesio</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Uppsala, Sweden</address>
       <month>July</month>
@@ -2723,9 +2723,9 @@
     <paper id="1">
       <title><fixed-case>E</fixed-case>moti<fixed-case>B</fixed-case>log: A Finer-Grained and More Precise Learning of Subjectivity Expression Models</title>
       <author><first>Ester</first><last>Boldrini</last></author>
-      <author><first>Alexandra</first><last>Balahur</last></author>
-      <author><first>Patricio</first><last>Martínez-Barco</last></author>
-      <author><first>Andrés</first><last>Montoyo</last></author>
+      <author id="alexandra-balahur"><first>Alexandra</first><last>Balahur</last></author>
+      <author id="patricio-martinez-barco"><first>Patricio</first><last>Martínez-Barco</last></author>
+      <author id="andres-montoyo"><first>Andrés</first><last>Montoyo</last></author>
       <pages>1–10</pages>
       <url hash="15e4d6b6">W10-1801</url>
       <bibkey>boldrini-etal-2010-emotiblog</bibkey>
@@ -2743,18 +2743,18 @@
     <paper id="3">
       <title>Annotation Scheme for Social Network Extraction from Text</title>
       <author><first>Apoorv</first><last>Agarwal</last></author>
-      <author><first>Owen C.</first><last>Rambow</last></author>
-      <author><first>Rebecca J.</first><last>Passonneau</last></author>
+      <author id="owen-rambow"><first>Owen C.</first><last>Rambow</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca J.</first><last>Passonneau</last></author>
       <pages>20–28</pages>
       <url hash="6bdcbdc3">W10-1803</url>
       <bibkey>agarwal-etal-2010-annotation</bibkey>
     </paper>
     <paper id="4">
       <title>Agile Corpus Annotation in Practice: An Overview of Manual and Automatic Annotation of <fixed-case>CV</fixed-case>s</title>
-      <author><first>Bea</first><last>Alex</last></author>
+      <author id="beatrice-alex"><first>Bea</first><last>Alex</last></author>
       <author><first>Claire</first><last>Grover</last></author>
       <author><first>Rongzhou</first><last>Shen</last></author>
-      <author><first>Mijail</first><last>Kabadjov</last></author>
+      <author id="mijail-kabadjov"><first>Mijail</first><last>Kabadjov</last></author>
       <pages>29–37</pages>
       <url hash="74f90ad0">W10-1804</url>
       <bibkey>alex-etal-2010-agile</bibkey>
@@ -2770,9 +2770,9 @@
     <paper id="6">
       <title><fixed-case>A</fixed-case>nveshan: A Framework for Analysis of Multiple Annotators’ Labeling Behavior</title>
       <author><first>Vikas</first><last>Bhardwaj</last></author>
-      <author><first>Rebecca</first><last>Passonneau</last></author>
-      <author><first>Ansaf</first><last>Salleb-Aouissi</last></author>
-      <author><first>Nancy</first><last>Ide</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca</first><last>Passonneau</last></author>
+      <author id="ansaf-salleb-aouissi"><first>Ansaf</first><last>Salleb-Aouissi</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
       <pages>47–55</pages>
       <url hash="2b341131">W10-1806</url>
       <bibkey>bhardwaj-etal-2010-anveshan</bibkey>
@@ -2780,7 +2780,7 @@
     <paper id="7">
       <title>Influence of Pre-Annotation on <fixed-case>POS</fixed-case>-Tagged Corpus Development</title>
       <author><first>Karën</first><last>Fort</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <pages>56–63</pages>
       <url hash="9f39d258">W10-1807</url>
       <bibkey>fort-sagot-2010-influence</bibkey>
@@ -2788,15 +2788,15 @@
     <paper id="8">
       <title>To Annotate More Accurately or to Annotate More</title>
       <author><first>Dmitriy</first><last>Dligach</last></author>
-      <author><first>Rodney</first><last>Nielsen</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="rodney-nielsen"><first>Rodney</first><last>Nielsen</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>64–72</pages>
       <url hash="c5059248">W10-1808</url>
       <bibkey>dligach-etal-2010-annotate</bibkey>
     </paper>
     <paper id="9">
       <title>Annotating Underquantification</title>
-      <author><first>Aurelie</first><last>Herbelot</last></author>
+      <author id="aurelie-herbelot"><first>Aurelie</first><last>Herbelot</last></author>
       <author><first>Ann</first><last>Copestake</last></author>
       <pages>73–81</pages>
       <url hash="3d499c53">W10-1809</url>
@@ -2806,19 +2806,19 @@
       <title><fixed-case>P</fixed-case>rop<fixed-case>B</fixed-case>ank Annotation of Multilingual Light Verb Constructions</title>
       <author><first>Jena D.</first><last>Hwang</last></author>
       <author><first>Archna</first><last>Bhatia</last></author>
-      <author><first>Claire</first><last>Bonial</last></author>
+      <author id="claire-bonial"><first>Claire</first><last>Bonial</last></author>
       <author><first>Aous</first><last>Mansouri</last></author>
       <author><first>Ashwini</first><last>Vaidya</last></author>
       <author><first>Nianwen</first><last>Xue</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>82–90</pages>
       <url hash="8f0e775d">W10-1810</url>
       <bibkey>hwang-etal-2010-propbank</bibkey>
     </paper>
     <paper id="11">
       <title>Retrieving Correct Semantic Boundaries in Dependency Structure</title>
-      <author><first>Jinho</first><last>Choi</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="jinho-d-choi"><first>Jinho</first><last>Choi</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>91–99</pages>
       <url hash="9f4dd744">W10-1811</url>
       <bibkey>choi-palmer-2010-retrieving</bibkey>
@@ -2843,8 +2843,8 @@
     </paper>
     <paper id="14">
       <title>Cross-Lingual Validity of <fixed-case>P</fixed-case>rop<fixed-case>B</fixed-case>ank in the Manual Annotation of <fixed-case>F</fixed-case>rench</title>
-      <author><first>Lonneke</first><last>van der Plas</last></author>
-      <author><first>Tanja</first><last>Samardžić</last></author>
+      <author id="lonneke-van-der-plas"><first>Lonneke</first><last>van der Plas</last></author>
+      <author id="tanja-samardzic"><first>Tanja</first><last>Samardžić</last></author>
       <author><first>Paola</first><last>Merlo</last></author>
       <pages>113–117</pages>
       <url hash="d16c06fb">W10-1814</url>
@@ -2852,7 +2852,7 @@
     </paper>
     <paper id="15">
       <title>Characteristics of High Agreement Affect Annotation in Text</title>
-      <author><first>Cecilia Ovesdotter</first><last>Alm</last></author>
+      <author id="cecilia-ovesdotter-alm"><first>Cecilia Ovesdotter</first><last>Alm</last></author>
       <pages>118–122</pages>
       <url hash="c0d1a84e">W10-1815</url>
       <bibkey>alm-2010-characteristics</bibkey>
@@ -2863,7 +2863,7 @@
       <author><first>Xiangli</first><last>Wang</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
       <author><first>Takuya</first><last>Matsuzaki</last></author>
-      <author><first>Junichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Junichi</first><last>Tsujii</last></author>
       <pages>123–126</pages>
       <url hash="28239985">W10-1816</url>
       <bibkey>yu-etal-2010-deep</bibkey>
@@ -2903,9 +2903,9 @@
     </paper>
     <paper id="21">
       <title>Chunking <fixed-case>G</fixed-case>erman: An Unsolved Problem</title>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <author><first>Kathrin</first><last>Beck</last></author>
-      <author><first>Erhard</first><last>Hinrichs</last></author>
+      <author id="erhard-hinrichs"><first>Erhard</first><last>Hinrichs</last></author>
       <author><first>Heike</first><last>Telljohann</last></author>
       <pages>147–151</pages>
       <url hash="22353a93">W10-1821</url>
@@ -2922,8 +2922,8 @@
     </paper>
     <paper id="23">
       <title>A Feature Type Classification for Therapeutic Purposes: A Preliminary Evaluation with Non-Expert Speakers</title>
-      <author><first>Gianluca E.</first><last>Lebani</last></author>
-      <author><first>Emanuele</first><last>Pianta</last></author>
+      <author id="gianluca-e-lebani"><first>Gianluca E.</first><last>Lebani</last></author>
+      <author id="emanuele-pianta"><first>Emanuele</first><last>Pianta</last></author>
       <pages>157–161</pages>
       <url hash="69318d02">W10-1823</url>
       <bibkey>lebani-pianta-2010-feature</bibkey>
@@ -2982,12 +2982,12 @@
       <author><first>Emmanuel</first><last>Bruno</last></author>
       <author><first>Brigitte</first><last>Bigi</last></author>
       <author><first>Robert</first><last>Espesser</last></author>
-      <author><first>Gaelle</first><last>Ferré</last></author>
+      <author id="gaelle-ferre"><first>Gaelle</first><last>Ferré</last></author>
       <author><first>Mathilde</first><last>Guardiola</last></author>
       <author><first>Daniel</first><last>Hirst</last></author>
       <author><first>Ning</first><last>Tan</last></author>
       <author><first>Edlira</first><last>Cela</last></author>
-      <author><first>Jean-Claude</first><last>Martin</last></author>
+      <author id="jean-claude-martin"><first>Jean-Claude</first><last>Martin</last></author>
       <author><first>Stéphane</first><last>Rauzy</last></author>
       <author><first>Mary-Annick</first><last>Morel</last></author>
       <author><first>Elisabeth</first><last>Murisasco</last></author>
@@ -2999,7 +2999,7 @@
     <paper id="30">
       <title>Combining Parallel Treebanks and Geo-Tagging</title>
       <author><first>Martin</first><last>Volk</last></author>
-      <author><first>Anne</first><last>Goehring</last></author>
+      <author id="anne-gohring"><first>Anne</first><last>Goehring</last></author>
       <author><first>Torsten</first><last>Marek</last></author>
       <pages>192–196</pages>
       <url hash="66e8d9bb">W10-1830</url>
@@ -3015,8 +3015,8 @@
     </paper>
     <paper id="32">
       <title>Discourse Relation Configurations in <fixed-case>T</fixed-case>urkish and an Annotation Environment</title>
-      <author><first>Berfin</first><last>Aktaş</last></author>
-      <author><first>Cem</first><last>Bozsahin</last></author>
+      <author id="berfin-aktas"><first>Berfin</first><last>Aktaş</last></author>
+      <author id="cem-bozsahin"><first>Cem</first><last>Bozsahin</last></author>
       <author><first>Deniz</first><last>Zeyrek</last></author>
       <pages>202–206</pages>
       <url hash="83078199">W10-1832</url>
@@ -3026,8 +3026,8 @@
       <title>An Overview of the <fixed-case>CRAFT</fixed-case> Concept Annotation Guidelines</title>
       <author><first>Michael</first><last>Bada</last></author>
       <author><first>Miriam</first><last>Eckert</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
-      <author><first>Lawrence</first><last>Hunter</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
+      <author id="lawrence-hunter"><first>Lawrence</first><last>Hunter</last></author>
       <pages>207–211</pages>
       <url hash="1d8cec66">W10-1833</url>
       <bibkey>bada-etal-2010-overview</bibkey>
@@ -3042,8 +3042,8 @@
     <paper id="35">
       <title>An Integrated Tool for Annotating Historical Corpora</title>
       <author><first>Pablo Picasso Feliciano</first><last>de Faria</last></author>
-      <author><first>Fabio Natanael</first><last>Kepler</last></author>
-      <author><first>Maria Clara</first><last>Paixão de Sousa</last></author>
+      <author id="fabio-kepler"><first>Fabio Natanael</first><last>Kepler</last></author>
+      <author id="maria-clara-paixao-de-sousa"><first>Maria Clara</first><last>Paixão de Sousa</last></author>
       <pages>217–221</pages>
       <url hash="b0ea095e">W10-1835</url>
       <bibkey>de-faria-etal-2010-integrated</bibkey>
@@ -3051,10 +3051,10 @@
     <paper id="36">
       <title>The Revised <fixed-case>A</fixed-case>rabic <fixed-case>P</fixed-case>rop<fixed-case>B</fixed-case>ank</title>
       <author><first>Wajdi</first><last>Zaghouani</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <author><first>Aous</first><last>Mansouri</last></author>
-      <author><first>Sameer</first><last>Pradhan</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="sameer-pradhan"><first>Sameer</first><last>Pradhan</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>222–226</pages>
       <url hash="b4be7adb">W10-1836</url>
       <bibkey>zaghouani-etal-2010-revised</bibkey>
@@ -3064,7 +3064,7 @@
       <author><first>Nathan</first><last>Green</last></author>
       <author><first>Paul</first><last>Breimyer</last></author>
       <author><first>Vinay</first><last>Kumar</last></author>
-      <author><first>Nagiza</first><last>Samatova</last></author>
+      <author id="nagiza-samatova"><first>Nagiza</first><last>Samatova</last></author>
       <pages>227–234</pages>
       <url hash="7319baeb">W10-1837</url>
       <bibkey>green-etal-2010-packplay</bibkey>
@@ -3091,8 +3091,8 @@
     </paper>
     <paper id="40">
       <title>Anatomy of Annotation Schemes: Mapping to <fixed-case>G</fixed-case>r<fixed-case>AF</fixed-case></title>
-      <author><first>Nancy</first><last>Ide</last></author>
-      <author><first>Harry</first><last>Bunt</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
+      <author id="harry-bunt"><first>Harry</first><last>Bunt</last></author>
       <pages>247–255</pages>
       <url hash="08e6d843">W10-1840</url>
       <bibkey>ide-bunt-2010-anatomy</bibkey>
@@ -3100,14 +3100,14 @@
     <paper id="41">
       <title>Annotating Participant Reference in <fixed-case>E</fixed-case>nglish Spoken Conversation</title>
       <author><first>John</first><last>Niekrasz</last></author>
-      <author><first>Johanna D.</first><last>Moore</last></author>
+      <author id="johanna-d-moore"><first>Johanna D.</first><last>Moore</last></author>
       <pages>256–264</pages>
       <url hash="20c9112e">W10-1841</url>
       <bibkey>niekrasz-moore-2010-annotating</bibkey>
     </paper>
     <paper id="42">
       <title>Design and Evaluation of Shared Prosodic Annotation for Spontaneous <fixed-case>F</fixed-case>rench Speech: From Expert Knowledge to Non-Expert Annotation</title>
-      <author><first>Anne</first><last>Lacheret-Dujour</last></author>
+      <author id="anne-lacheret"><first>Anne</first><last>Lacheret-Dujour</last></author>
       <author><first>Nicolas</first><last>Obin</last></author>
       <author><first>Mathieu</first><last>Avanzi</last></author>
       <pages>265–273</pages>
@@ -3116,11 +3116,11 @@
     </paper>
     <paper id="43">
       <title>Depends on What the <fixed-case>F</fixed-case>rench Say - Spoken Corpus Annotation with and beyond Syntactic Functions</title>
-      <author><first>José</first><last>Deulofeu</last></author>
+      <author id="jose-deulofeu"><first>José</first><last>Deulofeu</last></author>
       <author><first>Lucie</first><last>Duffort</last></author>
       <author><first>Kim</first><last>Gerdes</last></author>
       <author><first>Sylvain</first><last>Kahane</last></author>
-      <author><first>Paola</first><last>Pietrandrea</last></author>
+      <author id="paola-pietrandrea"><first>Paola</first><last>Pietrandrea</last></author>
       <pages>274–281</pages>
       <url hash="4236a83c">W10-1843</url>
       <bibkey>deulofeu-etal-2010-depends</bibkey>
@@ -3128,11 +3128,11 @@
     <paper id="44">
       <title>The Annotation Scheme of the <fixed-case>T</fixed-case>urkish Discourse Bank and an Evaluation of Inconsistent Annotations</title>
       <author><first>Deniz</first><last>Zeyrek</last></author>
-      <author><first>Işin</first><last>Demirşahin</last></author>
-      <author><first>Ayişiği</first><last>Sevdik-Çalli</last></author>
-      <author><first>Hale</first><last>Ögel Balaban</last></author>
-      <author><first>İhsan</first><last>Yalçinkaya</last></author>
-      <author><first>Ümit Deniz</first><last>Turan</last></author>
+      <author id="isin-demirsahin"><first>Işin</first><last>Demirşahin</last></author>
+      <author id="ayisigi-b-sevdik-calli"><first>Ayişiği</first><last>Sevdik-Çalli</last></author>
+      <author id="hale-ogel-balaban"><first>Hale</first><last>Ögel Balaban</last></author>
+      <author id="ihsan-yalcinkaya"><first>İhsan</first><last>Yalçinkaya</last></author>
+      <author id="umit-deniz-turan"><first>Ümit Deniz</first><last>Turan</last></author>
       <pages>282–289</pages>
       <url hash="d2bfd7a8">W10-1844</url>
       <bibkey>zeyrek-etal-2010-annotation</bibkey>
@@ -3142,12 +3142,12 @@
     <meta>
       <booktitle>Proceedings of the 2010 Workshop on Biomedical Natural Language Processing</booktitle>
       <url hash="0696cb9b">W10-19</url>
-      <editor><first>K. Bretonnel</first><last>Cohen</last></editor>
+      <editor id="k-bretonnel-cohen"><first>K. Bretonnel</first><last>Cohen</last></editor>
       <editor><first>Dina</first><last>Demner-Fushman</last></editor>
       <editor><first>Sophia</first><last>Ananiadou</last></editor>
-      <editor><first>John</first><last>Pestian</last></editor>
-      <editor><first>Jun’ichi</first><last>Tsujii</last></editor>
-      <editor><first>Bonnie</first><last>Webber</last></editor>
+      <editor id="john-pestian"><first>John</first><last>Pestian</last></editor>
+      <editor id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></editor>
+      <editor id="bonnie-webber"><first>Bonnie</first><last>Webber</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Uppsala, Sweden</address>
       <month>July</month>
@@ -3212,7 +3212,7 @@
       <author><first>Marcelo</first><last>Fiszman</last></author>
       <author><first>Graciela</first><last>Rosemblat</last></author>
       <author><first>Sean</first><last>Marimpietri</last></author>
-      <author><first>Thomas</first><last>Rindflesch</last></author>
+      <author id="thomas-c-rindflesch"><first>Thomas</first><last>Rindflesch</last></author>
       <pages>46–54</pages>
       <url hash="9ca00dd1">W10-1906</url>
       <bibkey>kilicoglu-etal-2010-arguments</bibkey>
@@ -3229,7 +3229,7 @@
     <paper id="8">
       <title>Cancer Stage Prediction Based on Patient Online Discourse</title>
       <author><first>Mukund</first><last>Jha</last></author>
-      <author><first>Noémie</first><last>Elhadad</last></author>
+      <author id="noemie-elhadad"><first>Noémie</first><last>Elhadad</last></author>
       <pages>64–71</pages>
       <url hash="049012c0">W10-1908</url>
       <bibkey>jha-elhadad-2010-cancer</bibkey>
@@ -3237,7 +3237,7 @@
     <paper id="9">
       <title>An Exploration of Mining Gene Expression Mentions and Their Anatomical Locations from Biomedical Text</title>
       <author><first>Martin</first><last>Gerner</last></author>
-      <author><first>Goran</first><last>Nenadic</last></author>
+      <author id="goran-nenadic"><first>Goran</first><last>Nenadic</last></author>
       <author><first>Casey M.</first><last>Bergman</last></author>
       <pages>72–80</pages>
       <url hash="4fb12eee">W10-1909</url>
@@ -3254,7 +3254,7 @@
     <paper id="11">
       <title>Disease Mention Recognition with Specific Features</title>
       <author><first>Md. Faisal Mahbub</first><last>Chowdhury</last></author>
-      <author><first>Alberto</first><last>Lavelli</last></author>
+      <author id="alberto-lavelli"><first>Alberto</first><last>Lavelli</last></author>
       <pages>83–90</pages>
       <url hash="6c47e566">W10-1911</url>
       <bibkey>chowdhury-lavelli-2010-disease</bibkey>
@@ -3262,7 +3262,7 @@
     <paper id="12">
       <title>Extraction of Disease-Treatment Semantic Relations from Biomedical Sentences</title>
       <author><first>Oana</first><last>Frunza</last></author>
-      <author><first>Diana</first><last>Inkpen</last></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last></author>
       <pages>91–98</pages>
       <url hash="05fa50e7">W10-1912</url>
       <bibkey>frunza-inkpen-2010-extraction</bibkey>
@@ -3295,7 +3295,7 @@
       <author><first>Ryan</first><last>Sullivan</last></author>
       <author><first>Annie</first><last>Skariah</last></author>
       <author><first>Jian</first><last>Yang</last></author>
-      <author><first>Graciela</first><last>Gonzalez</last></author>
+      <author id="graciela-gonzalez"><first>Graciela</first><last>Gonzalez</last></author>
       <pages>117–125</pages>
       <url hash="5a249436">W10-1915</url>
       <bibkey>leaman-etal-2010-towards</bibkey>
@@ -3360,7 +3360,7 @@
     <meta>
       <booktitle>Proceedings of the 2010 Workshop on Cognitive Modeling and Computational Linguistics</booktitle>
       <url hash="d98da33f">W10-20</url>
-      <editor><first>John T.</first><last>Hale</last></editor>
+      <editor id="john-hale"><first>John T.</first><last>Hale</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Uppsala, Sweden</address>
       <month>July</month>
@@ -3374,7 +3374,7 @@
     <paper id="1">
       <title>Using Sentence Type Information for Syntactic Category Acquisition</title>
       <author><first>Stella</first><last>Frank</last></author>
-      <author><first>Sharon</first><last>Goldwater</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
       <author><first>Frank</first><last>Keller</last></author>
       <pages>1–8</pages>
       <url hash="424be016">W10-2001</url>
@@ -3390,9 +3390,9 @@
     </paper>
     <paper id="3">
       <title>Syntactic Adaptation in Language Comprehension</title>
-      <author><first>Alex</first><last>Fine</last></author>
+      <author id="alex-fine"><first>Alex</first><last>Fine</last></author>
       <author><first>Ting</first><last>Qian</last></author>
-      <author><first>T. Florian</first><last>Jaeger</last></author>
+      <author id="t-florian-jaeger"><first>T. Florian</first><last>Jaeger</last></author>
       <author><first>Robert</first><last>Jacobs</last></author>
       <pages>18–26</pages>
       <url hash="3042931f">W10-2003</url>
@@ -3400,7 +3400,7 @@
     </paper>
     <paper id="4">
       <title><fixed-case>HHMM</fixed-case> Parsing with Limited Parallelism</title>
-      <author><first>Tim</first><last>Miller</last></author>
+      <author id="timothy-miller"><first>Tim</first><last>Miller</last></author>
       <author><first>William</first><last>Schuler</last></author>
       <pages>27–35</pages>
       <url hash="dc8e452c">W10-2004</url>
@@ -3416,7 +3416,7 @@
     <paper id="6">
       <title>Close = Relevant? The Role of Context in Efficient Language Production</title>
       <author><first>Ting</first><last>Qian</last></author>
-      <author><first>T. Florian</first><last>Jaeger</last></author>
+      <author id="t-florian-jaeger"><first>T. Florian</first><last>Jaeger</last></author>
       <pages>45–53</pages>
       <url hash="3eeffff6">W10-2006</url>
       <bibkey>qian-jaeger-2010-close</bibkey>
@@ -3424,7 +3424,7 @@
     <paper id="7">
       <title>Predicting Cognitively Salient Modifiers of the Constitutive Parts of Concepts</title>
       <author><first>Gerhard</first><last>Kremer</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <pages>54–62</pages>
       <url hash="f138d842">W10-2007</url>
       <bibkey>kremer-baroni-2010-predicting</bibkey>
@@ -3448,7 +3448,7 @@
     </paper>
     <paper id="10">
       <title>Uncertainty Reduction as a Measure of Cognitive Processing Effort</title>
-      <author><first>Stefan</first><last>Frank</last></author>
+      <author id="stefan-l-frank"><first>Stefan</first><last>Frank</last></author>
       <pages>81–89</pages>
       <url hash="ec174493">W10-2010</url>
       <bibkey>frank-2010-uncertainty</bibkey>
@@ -3459,8 +3459,8 @@
       <booktitle>Proceedings of the 2010 Workshop on <fixed-case>NLP</fixed-case> and Linguistics: Finding the Common Ground</booktitle>
       <url hash="5e6142f5">W10-21</url>
       <editor><first>Fei</first><last>Xia</last></editor>
-      <editor><first>William</first><last>Lewis</last></editor>
-      <editor><first>Lori</first><last>Levin</last></editor>
+      <editor id="william-lewis"><first>William</first><last>Lewis</last></editor>
+      <editor id="lori-levin"><first>Lori</first><last>Levin</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Uppsala, Sweden</address>
       <month>July</month>
@@ -3483,14 +3483,14 @@
       <title>Evidentiality for Text Trustworthiness Detection</title>
       <author><first>Qi</first><last>Su</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
-      <author><first>Kai-yun</first><last>Chen</last></author>
+      <author id="helen-kaiyun-chen"><first>Kai-yun</first><last>Chen</last></author>
       <pages>10–17</pages>
       <url hash="278770cf">W10-2102</url>
       <bibkey>su-etal-2010-evidentiality</bibkey>
     </paper>
     <paper id="3">
       <title>On the Role of <fixed-case>NLP</fixed-case> in Linguistics</title>
-      <author><first>Dipti Misra</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></author>
       <pages>18–21</pages>
       <url hash="b402cf66">W10-2103</url>
       <bibkey>sharma-2010-role</bibkey>
@@ -3504,15 +3504,15 @@
     </paper>
     <paper id="5">
       <title>Grammar-Driven versus Data-Driven: Which Parsing System Is More Affected by Domain Shifts?</title>
-      <author><first>Barbara</first><last>Plank</last></author>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <pages>25–33</pages>
       <url hash="85e57209">W10-2105</url>
       <bibkey>plank-van-noord-2010-grammar</bibkey>
     </paper>
     <paper id="6">
       <title>A Cross-Lingual Induction Technique for <fixed-case>G</fixed-case>erman Adverbial Participles</title>
-      <author><first>Sina</first><last>Zarrieß</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
       <author><first>Aoife</first><last>Cahill</last></author>
       <author><first>Jonas</first><last>Kuhn</last></author>
       <author><first>Christian</first><last>Rohrer</last></author>
@@ -3530,7 +3530,7 @@
     </paper>
     <paper id="8">
       <title>Cross-Lingual Variation of Light Verb Constructions: Using Parallel Corpora and Automatic Alignment for Linguistic Research</title>
-      <author><first>Tanja</first><last>Samardžić</last></author>
+      <author id="tanja-samardzic"><first>Tanja</first><last>Samardžić</last></author>
       <author><first>Paola</first><last>Merlo</last></author>
       <pages>52–60</pages>
       <url hash="6a62652d">W10-2108</url>
@@ -3551,14 +3551,14 @@
       <author><first>Frans</first><last>Plank</last></author>
       <author><first>Peter</first><last>Bak</last></author>
       <author><first>Miriam</first><last>Butt</last></author>
-      <author><first>Daniel A.</first><last>Keim</last></author>
+      <author id="daniel-keim"><first>Daniel A.</first><last>Keim</last></author>
       <pages>70–78</pages>
       <url hash="a2671a60">W10-2110</url>
       <bibkey>mayer-etal-2010-consonant</bibkey>
     </paper>
     <paper id="11">
       <title>Injecting Linguistics into <fixed-case>NLP</fixed-case> through Annotation</title>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>79</pages>
       <url hash="c541cf00">W10-2111</url>
       <bibkey>hovy-2010-injecting</bibkey>
@@ -3569,7 +3569,7 @@
       <booktitle>Proceedings of the 11th Meeting of the <fixed-case>ACL</fixed-case> Special Interest Group on Computational Morphology and Phonology</booktitle>
       <url hash="90cd64a3">W10-22</url>
       <editor><first>Jeffrey</first><last>Heinz</last></editor>
-      <editor><first>Lynne</first><last>Cahill</last></editor>
+      <editor id="lynne-cahill"><first>Lynne</first><last>Cahill</last></editor>
       <editor><first>Richard</first><last>Wicentowski</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Uppsala, Sweden</address>
@@ -3583,7 +3583,7 @@
     </frontmatter>
     <paper id="1">
       <title>Instance-Based Acquisition of Vowel Harmony</title>
-      <author><first>Frédéric</first><last>Mailhot</last></author>
+      <author id="frederic-mailhot"><first>Frédéric</first><last>Mailhot</last></author>
       <pages>1–8</pages>
       <url hash="ca8f568e">W10-2201</url>
       <bibkey>mailhot-2010-instance</bibkey>
@@ -3613,7 +3613,7 @@
     <paper id="5">
       <title>A Method for Compiling Two-Level Rules with Multiple Contexts</title>
       <author><first>Kimmo</first><last>Koskenniemi</last></author>
-      <author><first>Miikka</first><last>Silfverberg</last></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last></author>
       <pages>38–45</pages>
       <url hash="2d11ef11">W10-2205</url>
       <bibkey>koskenniemi-silfverberg-2010-method</bibkey>
@@ -3621,7 +3621,7 @@
     <paper id="6">
       <title>Exploring Dialect Phonetic Variation Using <fixed-case>PARAFAC</fixed-case></title>
       <author><first>Jelena</first><last>Prokić</last></author>
-      <author><first>Tim</first><last>Van de Cruys</last></author>
+      <author id="tim-van-de-cruys"><first>Tim</first><last>Van de Cruys</last></author>
       <pages>46–53</pages>
       <url hash="2ffbd55d">W10-2206</url>
       <bibkey>prokic-van-de-cruys-2010-exploring</bibkey>
@@ -3675,7 +3675,7 @@
       <editor><first>Carmen</first><last>Banea</last></editor>
       <editor><first>Alessandro</first><last>Moschitti</last></editor>
       <editor><first>Swapna</first><last>Somasundaran</last></editor>
-      <editor><first>Fabio Massimo</first><last>Zanzotto</last></editor>
+      <editor id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Uppsala, Sweden</address>
       <month>July</month>
@@ -3696,7 +3696,7 @@
     </paper>
     <paper id="2">
       <title>Towards the Automatic Creation of a <fixed-case>W</fixed-case>ordnet from a Term-Based Lexical Network</title>
-      <author><first>Hugo</first><last>Gonçalo Oliveira</last></author>
+      <author id="hugo-goncalo-oliveira"><first>Hugo</first><last>Gonçalo Oliveira</last></author>
       <author><first>Paulo</first><last>Gomes</last></author>
       <pages>10–18</pages>
       <url hash="4463622e">W10-2302</url>
@@ -3714,7 +3714,7 @@
     <paper id="4">
       <title>Robust and Efficient Page Rank for Word Sense Disambiguation</title>
       <author><first>Diego</first><last>De Cao</last></author>
-      <author><first>Roberto</first><last>Basili</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
       <author><first>Matteo</first><last>Luciani</last></author>
       <author><first>Francesco</first><last>Mesiano</last></author>
       <author><first>Riccardo</first><last>Rossi</last></author>
@@ -3754,7 +3754,7 @@
     </paper>
     <paper id="9">
       <title>Co-Occurrence Cluster Features for Lexical Substitutions in Context</title>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>55–59</pages>
       <url hash="6fc32786">W10-2309</url>
       <bibkey>biemann-2010-co</bibkey>
@@ -3776,7 +3776,7 @@
     </paper>
     <paper id="12">
       <title>Experiments with <fixed-case>CST</fixed-case>-Based Multidocument Summarization</title>
-      <author><first>Maria Lucía</first><last>Castro Jorge</last></author>
+      <author id="maria-lucia-castro-jorge"><first>Maria Lucía</first><last>Castro Jorge</last></author>
       <author><first>Thiago</first><last>Pardo</last></author>
       <pages>74–82</pages>
       <url hash="5599c827">W10-2312</url>
@@ -3784,10 +3784,10 @@
     </paper>
     <paper id="13">
       <title>Distinguishing between Positive and Negative Opinions with Complex Network Features</title>
-      <author><first>Diego Raphael</first><last>Amancio</last></author>
+      <author id="diego-raphael-amancio"><first>Diego Raphael</first><last>Amancio</last></author>
       <author><first>Renato</first><last>Fabbri</last></author>
-      <author><first>Osvaldo Novais</first><last>Oliveira Jr.</last></author>
-      <author><first>Maria das Graças Volpe</first><last>Nunes</last></author>
+      <author id="osvaldo-novais-oliveira-jr"><first>Osvaldo Novais</first><last>Oliveira Jr.</last></author>
+      <author id="maria-das-gracas-volpe-nunes"><first>Maria das Graças Volpe</first><last>Nunes</last></author>
       <author><first>Luciano da Fontoura</first><last>Costa</last></author>
       <pages>83–87</pages>
       <url hash="a0d31839">W10-2313</url>
@@ -3837,7 +3837,7 @@
     <meta>
       <booktitle>Proceedings of the 2010 Named Entities Workshop</booktitle>
       <url hash="67bacf29">W10-24</url>
-      <editor><first>A</first><last>Kumaran</last></editor>
+      <editor id="a-kumaran"><first>A</first><last>Kumaran</last></editor>
       <editor><first>Haizhou</first><last>Li</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Uppsala, Sweden</address>
@@ -3872,7 +3872,7 @@
     <paper id="3">
       <title>Report of <fixed-case>NEWS</fixed-case> 2010 Transliteration Mining Shared Task</title>
       <author><first>A</first><last>Kumaran</last></author>
-      <author><first>Mitesh</first><last>M. Khapra</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh</first><last>M. Khapra</last></author>
       <author><first>Haizhou</first><last>Li</last></author>
       <pages>21–28</pages>
       <url hash="17615dbe">W10-2403</url>
@@ -3881,7 +3881,7 @@
     <paper id="4">
       <title>Whitepaper of <fixed-case>NEWS</fixed-case> 2010 Shared Task on Transliteration Mining</title>
       <author><first>A</first><last>Kumaran</last></author>
-      <author><first>Mitesh</first><last>M. Khapra</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh</first><last>M. Khapra</last></author>
       <author><first>Haizhou</first><last>Li</last></author>
       <pages>29–38</pages>
       <url hash="01a9840c">W10-2404</url>
@@ -3903,7 +3903,7 @@
     <paper id="6">
       <title>Transliteration Using a Phrase-Based Statistical Machine Translation System to Re-Score the Output of a Joint Multigram Model</title>
       <author><first>Andrew</first><last>Finch</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>48–52</pages>
       <url hash="1ae955d2">W10-2406</url>
       <bibkey>finch-sumita-2010-transliteration</bibkey>
@@ -3926,7 +3926,7 @@
     <paper id="9">
       <title>Reranking with Multiple Features for Better Transliteration</title>
       <author><first>Yan</first><last>Song</last></author>
-      <author><first>Chunyu</first><last>Kit</last></author>
+      <author id="chunyu-kit"><first>Chunyu</first><last>Kit</last></author>
       <author><first>Hai</first><last>Zhao</last></author>
       <pages>62–65</pages>
       <url hash="aeb3b3a1">W10-2409</url>
@@ -3946,7 +3946,7 @@
       <author><first>Tanik</first><last>Saikh</last></author>
       <author><first>Tapabrata</first><last>Mondal</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>71–75</pages>
       <url hash="80c6f1af">W10-2411</url>
       <bibkey>das-etal-2010-english</bibkey>
@@ -3961,7 +3961,7 @@
     <paper id="13">
       <title>Phrase-Based Transliteration with Simple Heuristics</title>
       <author><first>Avinesh</first><last>PVS</last></author>
-      <author><first>Ankur</first><last>Parikh</last></author>
+      <author id="ankur-parikh"><first>Ankur</first><last>Parikh</last></author>
       <pages>81–84</pages>
       <url hash="6cfdf1a2">W10-2413</url>
       <bibkey>pvs-parikh-2010-phrase</bibkey>
@@ -3980,7 +3980,7 @@
       <author><first>Asif</first><last>Ekbal</last></author>
       <author><first>Eva</first><last>Sourjikova</last></author>
       <author><first>Anette</first><last>Frank</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <pages>93–101</pages>
       <url hash="febd0713">W10-2415</url>
       <bibkey>ekbal-etal-2010-assessing</bibkey>
@@ -3991,7 +3991,7 @@
       <author><first>You</first><last>Ouyang</last></author>
       <author><first>Wenjie</first><last>Li</last></author>
       <author><first>Dequan</first><last>Zheng</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <pages>102–109</pages>
       <url hash="adde5106">W10-2416</url>
       <bibkey>chen-etal-2010-using</bibkey>
@@ -4007,7 +4007,7 @@
     <paper id="18">
       <title>Think Globally, Apply Locally: Using Distributional Characteristics for <fixed-case>H</fixed-case>indi Named Entity Identification</title>
       <author><first>Shalini</first><last>Gupta</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>116–125</pages>
       <url hash="55307a54">W10-2418</url>
       <bibkey>gupta-bhattacharyya-2010-think</bibkey>
@@ -4023,7 +4023,7 @@
       <title><fixed-case>CONE</fixed-case>: Metrics for Automatic Evaluation of Named Entity Co-Reference Resolution</title>
       <author><first>Bo</first><last>Lin</last></author>
       <author><first>Rushin</first><last>Shah</last></author>
-      <author><first>Robert</first><last>Frederking</last></author>
+      <author id="robert-frederking"><first>Robert</first><last>Frederking</last></author>
       <author><first>Anatole</first><last>Gershman</last></author>
       <pages>136–144</pages>
       <url hash="0e703664">W10-2420</url>
@@ -4102,11 +4102,11 @@
     <meta>
       <booktitle>Proceedings of the 2010 Workshop on Domain Adaptation for Natural Language Processing</booktitle>
       <url hash="e69564de">W10-26</url>
-      <editor><first>Hal</first><last>Daumé III</last></editor>
+      <editor id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></editor>
       <editor><first>Tejaswini</first><last>Deoskar</last></editor>
       <editor><first>David</first><last>McClosky</last></editor>
-      <editor><first>Barbara</first><last>Plank</last></editor>
-      <editor><first>Jörg</first><last>Tiedemann</last></editor>
+      <editor id="barbara-plank"><first>Barbara</first><last>Plank</last></editor>
+      <editor id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Uppsala, Sweden</address>
       <month>July</month>
@@ -4137,7 +4137,7 @@
       <author><first>Oana</first><last>Sandu</last></author>
       <author><first>Giuseppe</first><last>Carenini</last></author>
       <author><first>Gabriel</first><last>Murray</last></author>
-      <author><first>Raymond</first><last>Ng</last></author>
+      <author id="raymond-ng"><first>Raymond</first><last>Ng</last></author>
       <pages>16–22</pages>
       <url hash="5cc3ae43">W10-2603</url>
       <bibkey>sandu-etal-2010-domain</bibkey>
@@ -4153,7 +4153,7 @@
     <paper id="5">
       <title>Using Domain Similarity for Performance Estimation</title>
       <author><first>Vincent</first><last>Van Asch</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>31–36</pages>
       <url hash="fd30cc42">W10-2605</url>
       <bibkey>van-asch-daelemans-2010-using</bibkey>
@@ -4169,7 +4169,7 @@
       <title>Domain Adaptation with Unlabeled Data for Dialog Act Tagging</title>
       <author><first>Anna</first><last>Margolis</last></author>
       <author><first>Karen</first><last>Livescu</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
       <pages>45–52</pages>
       <url hash="8ed8ffe8">W10-2607</url>
       <bibkey>margolis-etal-2010-domain</bibkey>
@@ -4188,8 +4188,8 @@
     <meta>
       <booktitle>Proceedings of the 2010 Workshop on Companionable Dialogue Systems</booktitle>
       <url hash="1bcd4859">W10-27</url>
-      <editor><first>Yorick</first><last>Wilks</last></editor>
-      <editor><first>Björn</first><last>Gambäck</last></editor>
+      <editor id="yorick-wilks"><first>Yorick</first><last>Wilks</last></editor>
+      <editor id="bjorn-gamback"><first>Björn</first><last>Gambäck</last></editor>
       <editor><first>Morena</first><last>Danieli</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Uppsala, Sweden</address>
@@ -4211,10 +4211,10 @@
     </paper>
     <paper id="2">
       <title><fixed-case>MANA</fixed-case> for the Ageing</title>
-      <author><first>David M W</first><last>Powers</last></author>
+      <author id="david-m-w-powers"><first>David M W</first><last>Powers</last></author>
       <author><first>Martin H</first><last>Luerssen</last></author>
       <author><first>Trent W</first><last>Lewis</last></author>
-      <author><first>Richard E</first><last>Leibbrandt</last></author>
+      <author id="richard-e-leibbrandt"><first>Richard E</first><last>Leibbrandt</last></author>
       <author><first>Marissa</first><last>Milne</last></author>
       <author><first>John</first><last>Pashalis</last></author>
       <author><first>Kenneth</first><last>Treharne</last></author>
@@ -4254,7 +4254,7 @@
     </paper>
     <paper id="7">
       <title>How Was Your Day?</title>
-      <author><first>Stephen</first><last>Pulman</last></author>
+      <author id="stephen-pulman"><first>Stephen</first><last>Pulman</last></author>
       <author><first>Johan</first><last>Boye</last></author>
       <author><first>Marc</first><last>Cavazza</last></author>
       <author><first>Cameron</first><last>Smith</last></author>
@@ -4266,9 +4266,9 @@
     <paper id="8">
       <title><fixed-case>VCA</fixed-case>: An Experiment with a Multiparty Virtual Chat Agent</title>
       <author><first>Samira</first><last>Shaikh</last></author>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
-      <author><first>Sarah</first><last>Taylor</last></author>
-      <author><first>Nick</first><last>Webb</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="sarah-taylor"><first>Sarah</first><last>Taylor</last></author>
+      <author id="nick-webb"><first>Nick</first><last>Webb</last></author>
       <pages>43–48</pages>
       <url hash="8371ff60">W10-2708</url>
       <bibkey>shaikh-etal-2010-vca</bibkey>
@@ -4278,7 +4278,7 @@
     <meta>
       <booktitle>Proceedings of the 2010 Workshop on <fixed-case>GE</fixed-case>ometrical Models of Natural Language Semantics</booktitle>
       <url hash="e9d84e97">W10-28</url>
-      <editor><first>Roberto</first><last>Basili</last></editor>
+      <editor id="roberto-basili"><first>Roberto</first><last>Basili</last></editor>
       <editor><first>Marco</first><last>Pennacchiotti</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Uppsala, Sweden</address>
@@ -4315,15 +4315,15 @@
     </paper>
     <paper id="4">
       <title>Relatedness Curves for Acquiring Paraphrases</title>
-      <author><first>Georgiana</first><last>Dinu</last></author>
-      <author><first>Grzegorz</first><last>Chrupała</last></author>
+      <author id="georgiana-dinu"><first>Georgiana</first><last>Dinu</last></author>
+      <author id="grzegorz-chrupala"><first>Grzegorz</first><last>Chrupała</last></author>
       <pages>27–32</pages>
       <url hash="c9eee559">W10-2804</url>
       <bibkey>dinu-chrupala-2010-relatedness</bibkey>
     </paper>
     <paper id="5">
       <title>A Regression Model of Adjective-Noun Compositionality in Distributional Semantics</title>
-      <author><first>Emiliano</first><last>Guevara</last></author>
+      <author id="emiliano-raul-guevara"><first>Emiliano</first><last>Guevara</last></author>
       <pages>33–37</pages>
       <url hash="80b9fd0e">W10-2805</url>
       <bibkey>guevara-2010-regression</bibkey>
@@ -4332,7 +4332,7 @@
       <title>Semantic Composition with Quotient Algebras</title>
       <author><first>Daoud</first><last>Clarke</last></author>
       <author><first>Rudi</first><last>Lutz</last></author>
-      <author><first>David</first><last>Weir</last></author>
+      <author id="david-weir"><first>David</first><last>Weir</last></author>
       <pages>38–44</pages>
       <url hash="b9a3347f">W10-2806</url>
       <bibkey>clarke-etal-2010-semantic</bibkey>
@@ -4348,7 +4348,7 @@
       <title>Sketch Techniques for Scaling Distributional Similarity to the Web</title>
       <author><first>Amit</first><last>Goyal</last></author>
       <author><first>Jagadeesh</first><last>Jagarlamudi</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <author><first>Suresh</first><last>Venkatasubramanian</last></author>
       <pages>51–56</pages>
       <url hash="9bde9a92">W10-2808</url>
@@ -4358,7 +4358,7 @@
       <title>Active Learning for Constrained <fixed-case>D</fixed-case>irichlet Process Mixture Models</title>
       <author><first>Andreas</first><last>Vlachos</last></author>
       <author><first>Zoubin</first><last>Ghahramani</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <pages>57–61</pages>
       <url hash="65c181cc">W10-2809</url>
       <bibkey>vlachos-etal-2010-active</bibkey>
@@ -4382,17 +4382,17 @@
     </frontmatter>
     <paper id="1">
       <title>Improvements in Unsupervised Co-Occurrence Based Parsing</title>
-      <author><first>Christian</first><last>Hänig</last></author>
+      <author id="christian-hanig"><first>Christian</first><last>Hänig</last></author>
       <pages>1–8</pages>
       <url hash="9e509964">W10-2901</url>
       <bibkey>hanig-2010-improvements</bibkey>
     </paper>
     <paper id="2">
       <title><fixed-case>V</fixed-case>iterbi Training Improves Unsupervised Dependency Parsing</title>
-      <author><first>Valentin I.</first><last>Spitkovsky</last></author>
-      <author><first>Hiyan</first><last>Alshawi</last></author>
-      <author><first>Daniel</first><last>Jurafsky</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="valentin-i-spitkovsky"><first>Valentin I.</first><last>Spitkovsky</last></author>
+      <author id="hiyan-alshawi"><first>Hiyan</first><last>Alshawi</last></author>
+      <author id="dan-jurafsky"><first>Daniel</first><last>Jurafsky</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>9–17</pages>
       <url hash="a05f9683">W10-2902</url>
       <bibkey>spitkovsky-etal-2010-viterbi</bibkey>
@@ -4475,7 +4475,7 @@
     <paper id="12">
       <title>Recession Segmentation: Simpler Online Word Segmentation Using Limited Resources</title>
       <author><first>Constantine</first><last>Lignos</last></author>
-      <author><first>Charles</first><last>Yang</last></author>
+      <author id="charles-yang"><first>Charles</first><last>Yang</last></author>
       <pages>88–97</pages>
       <url hash="39a71b12">W10-2912</url>
       <bibkey>lignos-yang-2010-recession</bibkey>
@@ -4499,7 +4499,7 @@
     <paper id="15">
       <title>Learning Probabilistic Synchronous <fixed-case>CFG</fixed-case>s for Phrase-Based Translation</title>
       <author><first>Markos</first><last>Mylonakis</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <pages>117–125</pages>
       <url hash="d8cbc3d8">W10-2915</url>
       <bibkey>mylonakis-simaan-2010-learning</bibkey>
@@ -4508,8 +4508,8 @@
       <title>A Semi-Supervised Batch-Mode Active Learning Strategy for Improved Statistical Machine Translation</title>
       <author><first>Sankaranarayanan</first><last>Ananthakrishnan</last></author>
       <author><first>Rohit</first><last>Prasad</last></author>
-      <author><first>David</first><last>Stallard</last></author>
-      <author><first>Prem</first><last>Natarajan</last></author>
+      <author id="david-stallard"><first>David</first><last>Stallard</last></author>
+      <author id="prem-natarajan"><first>Prem</first><last>Natarajan</last></author>
       <pages>126–134</pages>
       <url hash="acd2efd1">W10-2916</url>
       <bibkey>ananthakrishnan-etal-2010-semi</bibkey>
@@ -4518,8 +4518,8 @@
       <title>Improving Word Alignment by Semi-Supervised Ensemble</title>
       <author><first>Shujian</first><last>Huang</last></author>
       <author><first>Kangxi</first><last>Li</last></author>
-      <author><first>Xinyu</first><last>Dai</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
+      <author id="xinyu-dai"><first>Xinyu</first><last>Dai</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
       <pages>135–143</pages>
       <url hash="57721c0e">W10-2917</url>
       <bibkey>huang-etal-2010-improving</bibkey>
@@ -4535,9 +4535,9 @@
     </paper>
     <paper id="19">
       <title>A Hybrid Approach to Emotional Sentence Polarity and Intensity Classification</title>
-      <author><first>Jorge</first><last>Carrillo de Albornoz</last></author>
+      <author id="jorge-carrillo-de-albornoz"><first>Jorge</first><last>Carrillo de Albornoz</last></author>
       <author><first>Laura</first><last>Plaza</last></author>
-      <author><first>Pablo</first><last>Gervás</last></author>
+      <author id="pablo-gervas"><first>Pablo</first><last>Gervás</last></author>
       <pages>153–161</pages>
       <url hash="d4302141">W10-2919</url>
       <bibkey>carrillo-de-albornoz-etal-2010-hybrid</bibkey>
@@ -4563,7 +4563,7 @@
     </paper>
     <paper id="22">
       <title>Online Entropy-Based Model of Lexical Category Acquisition</title>
-      <author><first>Grzegorz</first><last>Chrupała</last></author>
+      <author id="grzegorz-chrupala"><first>Grzegorz</first><last>Chrupała</last></author>
       <author><first>Afra</first><last>Alishahi</last></author>
       <pages>182–191</pages>
       <url hash="3f3d44e9">W10-2922</url>
@@ -4573,15 +4573,15 @@
       <title>Tagging and Linking Web Forum Posts</title>
       <author><first>Su Nam</first><last>Kim</last></author>
       <author><first>Li</first><last>Wang</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>192–202</pages>
       <url hash="7f6b09f0">W10-2923</url>
       <bibkey>kim-etal-2010-tagging</bibkey>
     </paper>
     <paper id="24">
       <title>Joint Entity and Relation Extraction Using Card-Pyramid Parsing</title>
-      <author><first>Rohit J.</first><last>Kate</last></author>
-      <author><first>Raymond</first><last>Mooney</last></author>
+      <author id="rohit-kate"><first>Rohit J.</first><last>Kate</last></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last></author>
       <pages>203–212</pages>
       <url hash="688952fe">W10-2924</url>
       <bibkey>kate-mooney-2010-joint</bibkey>
@@ -4590,7 +4590,7 @@
       <title>Distributed Asynchronous Online Learning for Natural Language Processing</title>
       <author><first>Kevin</first><last>Gimpel</last></author>
       <author><first>Dipanjan</first><last>Das</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>213–222</pages>
       <url hash="18412408">W10-2925</url>
       <bibkey>gimpel-etal-2010-distributed</bibkey>
@@ -4616,7 +4616,7 @@
     <meta>
       <booktitle>Proceedings of the Fourteenth Conference on Computational Natural Language Learning – Shared Task</booktitle>
       <url hash="d0966fb3">W10-30</url>
-      <editor><first>Richárd</first><last>Farkas</last></editor>
+      <editor id="richard-farkas"><first>Richárd</first><last>Farkas</last></editor>
       <editor><first>Veronika</first><last>Vincze</last></editor>
       <editor><first>György</first><last>Szarvas</last></editor>
       <editor><first>György</first><last>Móra</last></editor>
@@ -4648,7 +4648,7 @@
       <author><first>Xiaolong</first><last>Wang</last></author>
       <author><first>Xuan</first><last>Wang</last></author>
       <author><first>Bo</first><last>Yuan</last></author>
-      <author><first>Shixi</first><last>Fan</last></author>
+      <author id="shixi-fan"><first>Shixi</first><last>Fan</last></author>
       <pages>13–17</pages>
       <url hash="c492ebed">W10-3002</url>
       <bibkey>tang-etal-2010-cascade</bibkey>
@@ -4672,7 +4672,7 @@
       <title>Detecting Hedge Cues and their Scopes with Average Perceptron</title>
       <author><first>Feng</first><last>Ji</last></author>
       <author><first>Xipeng</first><last>Qiu</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>32–39</pages>
       <url hash="617a35d5">W10-3005</url>
       <bibkey>ji-etal-2010-detecting</bibkey>
@@ -4681,7 +4681,7 @@
       <title>Memory-Based Resolution of In-Sentence Scopes of Hedge Cues</title>
       <author><first>Roser</first><last>Morante</last></author>
       <author><first>Vincent</first><last>Van Asch</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>40–47</pages>
       <url hash="088d52fd">W10-3006</url>
       <bibkey>morante-etal-2010-memory</bibkey>
@@ -4689,7 +4689,7 @@
     <paper id="7">
       <title>Resolving Speculation: <fixed-case>M</fixed-case>ax<fixed-case>E</fixed-case>nt Cue Classification and Dependency-Based Scope Rules</title>
       <author><first>Erik</first><last>Velldal</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <author><first>Stephan</first><last>Oepen</last></author>
       <pages>48–55</pages>
       <url hash="435a0eab">W10-3007</url>
@@ -4698,16 +4698,16 @@
     <paper id="8">
       <title>Combining Manual Rules and Supervised Learning for Hedge Cue and Scope Detection</title>
       <author><first>Marek</first><last>Rei</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <pages>56–63</pages>
       <url hash="944bd6eb">W10-3008</url>
       <bibkey>rei-briscoe-2010-combining</bibkey>
     </paper>
     <paper id="9">
       <title>Hedge Detection Using the <fixed-case>R</fixed-case>el<fixed-case>H</fixed-case>unter Approach</title>
-      <author><first>Eraldo</first><last>Fernandes</last></author>
+      <author id="eraldo-fernandes"><first>Eraldo</first><last>Fernandes</last></author>
       <author><first>Carlos</first><last>Crestana</last></author>
-      <author><first>Ruy</first><last>Milidiú</last></author>
+      <author id="ruy-luiz-milidiu"><first>Ruy</first><last>Milidiú</last></author>
       <pages>64–69</pages>
       <url hash="7748076b">W10-3009</url>
       <bibkey>fernandes-etal-2010-hedge</bibkey>
@@ -4746,8 +4746,8 @@
       <title>Hedge Detection and Scope Finding by Sequence Labeling with Procedural Feature Selection</title>
       <author><first>Shaodian</first><last>Zhang</last></author>
       <author><first>Hai</first><last>Zhao</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
-      <author><first>Bao-Liang</first><last>Lu</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
+      <author id="bao-liang-lu"><first>Bao-Liang</first><last>Lu</last></author>
       <pages>92–99</pages>
       <url hash="10512bbe">W10-3013</url>
       <bibkey>zhang-etal-2010-hedge</bibkey>
@@ -4755,8 +4755,8 @@
     <paper id="14">
       <title>Learning to Detect Hedges and their Scope Using <fixed-case>CRF</fixed-case></title>
       <author><first>Qi</first><last>Zhao</last></author>
-      <author><first>Chengjie</first><last>Sun</last></author>
-      <author><first>Bingquan</first><last>Liu</last></author>
+      <author id="cheng-jie-sun"><first>Chengjie</first><last>Sun</last></author>
+      <author id="bingquan-liu"><first>Bingquan</first><last>Liu</last></author>
       <author><first>Yong</first><last>Cheng</last></author>
       <pages>100–105</pages>
       <url hash="d2cc4eaa">W10-3014</url>
@@ -4764,9 +4764,9 @@
     </paper>
     <paper id="15">
       <title>Exploiting Multi-Features to Detect Hedges and their Scope in Biomedical Texts</title>
-      <author><first>Huiwei</first><last>Zhou</last></author>
+      <author id="huiwei-zhou"><first>Huiwei</first><last>Zhou</last></author>
       <author><first>Xiaoyan</first><last>Li</last></author>
-      <author><first>Degen</first><last>Huang</last></author>
+      <author id="degen-huang"><first>Degen</first><last>Huang</last></author>
       <author><first>Zezhong</first><last>Li</last></author>
       <author><first>Yuansheng</first><last>Yang</last></author>
       <pages>106–113</pages>
@@ -4790,7 +4790,7 @@
     </paper>
     <paper id="18">
       <title>Exploiting <fixed-case>CCG</fixed-case> Structures with Tree Kernels for Speculation Detection</title>
-      <author><first>Liliana</first><last>Mamani Sánchez</last></author>
+      <author id="liliana-mamani-sanchez"><first>Liliana</first><last>Mamani Sánchez</last></author>
       <author><first>Baoli</first><last>Li</last></author>
       <author><first>Carl</first><last>Vogel</last></author>
       <pages>126–131</pages>
@@ -4823,7 +4823,7 @@
     </paper>
     <paper id="22">
       <title>A Baseline Approach for Detecting Sentences Containing Uncertainty</title>
-      <author><first>Erik</first><last>Tjong Kim Sang</last></author>
+      <author id="erik-tjong-kim-sang"><first>Erik</first><last>Tjong Kim Sang</last></author>
       <pages>148–150</pages>
       <url hash="4fd2a162">W10-3022</url>
       <bibkey>tjong-kim-sang-2010-baseline</bibkey>
@@ -4880,7 +4880,7 @@
     <paper id="4">
       <title>Does negation really matter?</title>
       <author><first>Ira</first><last>Goldstein</last></author>
-      <author><first>Özlem</first><last>Uzuner</last></author>
+      <author id="ozlem-uzuner"><first>Özlem</first><last>Uzuner</last></author>
       <pages>23–27</pages>
       <url hash="f150d47c">W10-3104</url>
       <bibkey>goldstein-uzuner-2010-negation</bibkey>
@@ -4911,14 +4911,14 @@
     </paper>
     <paper id="8">
       <title>Importance of negations and experimental qualifiers in biomedical literature</title>
-      <author><first>Martin</first><last>Krallinger</last></author>
+      <author id="martin-krallinger"><first>Martin</first><last>Krallinger</last></author>
       <pages>46–49</pages>
       <url hash="a3321881">W10-3108</url>
       <bibkey>krallinger-2010-importance</bibkey>
     </paper>
     <paper id="9">
       <title>Negation and modality in distributional semantics</title>
-      <author><first>Ed</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Ed</first><last>Hovy</last></author>
       <pages>50</pages>
       <url hash="6b5eec3c">W10-3109</url>
       <bibkey>hovy-2010-negation</bibkey>
@@ -4935,10 +4935,10 @@
     <paper id="11">
       <title>A survey on the role of negation in sentiment analysis</title>
       <author><first>Michael</first><last>Wiegand</last></author>
-      <author><first>Alexandra</first><last>Balahur</last></author>
+      <author id="alexandra-balahur"><first>Alexandra</first><last>Balahur</last></author>
       <author><first>Benjamin</first><last>Roth</last></author>
       <author><first>Dietrich</first><last>Klakow</last></author>
-      <author><first>Andrés</first><last>Montoyo</last></author>
+      <author id="andres-montoyo"><first>Andrés</first><last>Montoyo</last></author>
       <pages>60–68</pages>
       <url hash="b3b264e1">W10-3111</url>
       <bibkey>wiegand-etal-2010-survey</bibkey>
@@ -4955,14 +4955,14 @@
     <paper id="13">
       <title>Using <fixed-case>SVM</fixed-case>s with the Command Relation features to identify negated events in biomedical literature</title>
       <author><first>Farzaneh</first><last>Sarafraz</last></author>
-      <author><first>Goran</first><last>Nenadic</last></author>
+      <author id="goran-nenadic"><first>Goran</first><last>Nenadic</last></author>
       <pages>78–85</pages>
       <url hash="38796b83">W10-3113</url>
       <bibkey>sarafraz-nenadic-2010-using</bibkey>
     </paper>
     <paper id="14">
       <title>Contradiction-focused qualitative evaluation of textual entailment</title>
-      <author><first>Bernardo</first><last>Magnini</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
       <author><first>Elena</first><last>Cabrio</last></author>
       <pages>86–94</pages>
       <url hash="79a1fe1d">W10-3114</url>
@@ -4975,7 +4975,7 @@
       <url hash="353a8682">W10-32</url>
       <editor><first>Sarmad</first><last>Hussain</last></editor>
       <editor><first>Virach</first><last>Sornlertlamvanich</last></editor>
-      <editor><first>Hammam</first><last>Riza</last></editor>
+      <editor id="hammam-riza"><first>Hammam</first><last>Riza</last></editor>
       <publisher>Coling 2010 Organizing Committee</publisher>
       <address>Beijing, China</address>
       <month>August</month>
@@ -5008,7 +5008,7 @@
     <paper id="3">
       <title>Considerations on Automatic Mapping Large-Scale Heterogeneous Language Resources: <fixed-case>S</fixed-case>ejong Semantic Classes and <fixed-case>K</fixed-case>or<fixed-case>L</fixed-case>ex</title>
       <author><first>Heum</first><last>Park</last></author>
-      <author><first>Ae sun</first><last>Yoon</last></author>
+      <author id="aesun-yoon"><first>Ae sun</first><last>Yoon</last></author>
       <author><first>Woo Chul</first><last>Park</last></author>
       <author><first>Hyuk-Chul</first><last>Kwon</last></author>
       <pages>14–21</pages>
@@ -5039,7 +5039,7 @@
       <author><first>Masaaki</first><last>Yasuhara</last></author>
       <author><first>Asuka</first><last>Terai</last></author>
       <author><first>David</first><last>Morris</last></author>
-      <author><first>Anja</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anja</first><last>Belz</last></author>
       <pages>38–46</pages>
       <url hash="96c6c570">W10-3206</url>
       <bibkey>tokunaga-etal-2010-construction</bibkey>
@@ -5047,7 +5047,7 @@
     <paper id="7">
       <title>Labeling Emotion in <fixed-case>B</fixed-case>engali Blog Corpus – A Fine Grained Tagging at Sentence Level</title>
       <author><first>Dipankar</first><last>Das</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>47–55</pages>
       <url hash="ae087dcc">W10-3207</url>
       <bibkey>das-bandyopadhyay-2010-labeling</bibkey>
@@ -5055,7 +5055,7 @@
     <paper id="8">
       <title><fixed-case>S</fixed-case>enti<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et for <fixed-case>I</fixed-case>ndian Languages</title>
       <author><first>Amitava</first><last>Das</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>56–63</pages>
       <url hash="1d2bfc70">W10-3208</url>
       <bibkey>das-bandyopadhyay-2010-sentiwordnet</bibkey>
@@ -5127,7 +5127,7 @@
     <paper id="16">
       <title>A Preliminary Work on <fixed-case>H</fixed-case>indi Causatives</title>
       <author><first>Rafiya</first><last>Begum</last></author>
-      <author><first>Dipti Misra</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></author>
       <pages>120–128</pages>
       <url hash="0f076bcb">W10-3216</url>
       <bibkey>begum-sharma-2010-preliminary</bibkey>
@@ -5138,7 +5138,7 @@
       <author><first>Chanon</first><last>Onman</last></author>
       <author><first>Peerachet</first><last>Porkaew</last></author>
       <author><first>Taneth</first><last>Ruangrajitpakorn</last></author>
-      <author><first>Kanokorn</first><last>Trakultaweekool</last></author>
+      <author id="kanokorn-trakultaweekoon"><first>Kanokorn</first><last>Trakultaweekool</last></author>
       <author><first>Asanee</first><last>Kawtrakul</last></author>
       <pages>129–136</pages>
       <url hash="bd2d6e36">W10-3217</url>
@@ -5196,7 +5196,7 @@
       <url hash="56c2a663">W10-33</url>
       <editor><first>Alessandro</first><last>Oltramari</last></editor>
       <editor><first>Piek</first><last>Vossen</last></editor>
-      <editor><first>Qin</first><last>Lu</last></editor>
+      <editor id="qin-lu"><first>Qin</first><last>Lu</last></editor>
       <publisher>Coling 2010 Organizing Committee</publisher>
       <address>Beijing, China</address>
       <month>August</month>
@@ -5210,9 +5210,9 @@
     <paper id="1">
       <title><fixed-case>KYOTO</fixed-case>: an open platform for mining facts</title>
       <author><first>Piek</first><last>Vossen</last></author>
-      <author><first>German</first><last>Rigau</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>Aitor</first><last>Soroa</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
+      <author id="aitor-soroa"><first>Aitor</first><last>Soroa</last></author>
       <author><first>Monica</first><last>Monachini</last></author>
       <author><first>Roberto</first><last>Bartolini</last></author>
       <pages>1–10</pages>
@@ -5232,17 +5232,17 @@
       <title>Multilingual Lexical Network from the Archives of the Digital Silk Road</title>
       <author><first>Hans-Mohammad</first><last>Daoud</last></author>
       <author><first>Kyo</first><last>Kageura</last></author>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <author><first>Asanobu</first><last>Kitamoto</last></author>
-      <author><first>Mathieu</first><last>Mangeot</last></author>
+      <author id="mathieu-mangeot"><first>Mathieu</first><last>Mangeot</last></author>
       <pages>19–27</pages>
       <url hash="9372d6c0">W10-3303</url>
       <bibkey>daoud-etal-2010-multilingual</bibkey>
     </paper>
     <paper id="4">
       <title>Finding Medical Term Variations using Parallel Corpora and Distributional Similarity</title>
-      <author><first>Lonneke</first><last>van der Plas</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="lonneke-van-der-plas"><first>Lonneke</first><last>van der Plas</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>28–37</pages>
       <url hash="c96eb71a">W10-3304</url>
       <bibkey>van-der-plas-tiedemann-2010-finding</bibkey>
@@ -5257,9 +5257,9 @@
     <paper id="6">
       <title>Intrinsic Property-based Taxonomic Relation Extraction from Category Structure</title>
       <author><first>DongHyun</first><last>Choi</last></author>
-      <author><first>Eun-Kyung</first><last>Kim</last></author>
+      <author id="eun-kyung-kim"><first>Eun-Kyung</first><last>Kim</last></author>
       <author><first>Sang-Ah</first><last>Shim</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <pages>48–57</pages>
       <url hash="23121af4">W10-3306</url>
       <bibkey>choi-etal-2010-intrinsic</bibkey>
@@ -5268,15 +5268,15 @@
       <title>Developing a Biosurveillance Application Ontology for Influenza-Like-Illness</title>
       <author><first>Mike</first><last>Conway</last></author>
       <author><first>John</first><last>Dowling</last></author>
-      <author><first>Wendy</first><last>Chapman</last></author>
+      <author id="wendy-chapman"><first>Wendy</first><last>Chapman</last></author>
       <pages>58–66</pages>
       <url hash="17bd0eb2">W10-3307</url>
       <bibkey>conway-etal-2010-developing</bibkey>
     </paper>
     <paper id="8">
       <title>Interfacing the Lexicon and the Ontology in a Semantic Analyzer</title>
-      <author><first>Igor</first><last>Boguslavsky</last></author>
-      <author><first>Leonid</first><last>Iomdin</last></author>
+      <author id="igor-boguslavsky"><first>Igor</first><last>Boguslavsky</last></author>
+      <author id="leonid-iomdin"><first>Leonid</first><last>Iomdin</last></author>
       <author><first>Victor</first><last>Sizov</last></author>
       <author><first>Svetlana</first><last>Timoshenko</last></author>
       <pages>67–76</pages>
@@ -5285,8 +5285,8 @@
     </paper>
     <paper id="9">
       <title>Ontolexical resources for feature-based opinion mining: a case-study</title>
-      <author><first>Anaïs</first><last>Cadilhac</last></author>
-      <author><first>Farah</first><last>Benamara</last></author>
+      <author id="anais-cadilhac"><first>Anaïs</first><last>Cadilhac</last></author>
+      <author id="farah-benamara"><first>Farah</first><last>Benamara</last></author>
       <author><first>Nathalie</first><last>Aussenac-Gilles</last></author>
       <pages>77–86</pages>
       <url hash="bbf1d41b">W10-3309</url>
@@ -5311,7 +5311,7 @@
     </frontmatter>
     <paper id="1">
       <title>Distributional Semantics and the Lexicon</title>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>1</pages>
       <url hash="6d4517d4">W10-3401</url>
       <bibkey>hovy-2010-distributional</bibkey>
@@ -5319,15 +5319,15 @@
     <paper id="2">
       <title><fixed-case>S</fixed-case>emantic<fixed-case>N</fixed-case>et-Perception of Human Pragmatics</title>
       <author><first>Amitava</first><last>Das</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>2–11</pages>
       <url hash="dbe6a120">W10-3402</url>
       <bibkey>das-bandyopadhyay-2010-semanticnet</bibkey>
     </paper>
     <paper id="3">
       <title>Exploiting Lexical Resources for Therapeutic Purposes: the Case of <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et and <fixed-case>ST</fixed-case>a<fixed-case>RS</fixed-case>.sys</title>
-      <author><first>Gianluca E.</first><last>Lebani</last></author>
-      <author><first>Emanuele</first><last>Pianta</last></author>
+      <author id="gianluca-e-lebani"><first>Gianluca E.</first><last>Lebani</last></author>
+      <author id="emanuele-pianta"><first>Emanuele</first><last>Pianta</last></author>
       <pages>12–17</pages>
       <url hash="12d4929a">W10-3403</url>
       <bibkey>lebani-pianta-2010-exploiting</bibkey>
@@ -5344,7 +5344,7 @@
     <paper id="5">
       <title>The Color of Emotions in Texts</title>
       <author><first>Carlo</first><last>Strapparava</last></author>
-      <author><first>Gozde</first><last>Ozbal</last></author>
+      <author id="gozde-ozbal"><first>Gozde</first><last>Ozbal</last></author>
       <pages>28–32</pages>
       <url hash="c8eb3c1a">W10-3405</url>
       <bibkey>strapparava-ozbal-2010-color</bibkey>
@@ -5360,8 +5360,8 @@
     <paper id="7">
       <title>An Optimal and Portable Parsing Method for <fixed-case>R</fixed-case>omanian, <fixed-case>F</fixed-case>rench, and <fixed-case>G</fixed-case>erman Large Dictionaries</title>
       <author><first>Neculai</first><last>Curteanu</last></author>
-      <author><first>Alex</first><last>Moruz</last></author>
-      <author><first>Diana</first><last>Trandabăţ</last></author>
+      <author id="alex-moruz"><first>Alex</first><last>Moruz</last></author>
+      <author id="diana-trandabat"><first>Diana</first><last>Trandabăţ</last></author>
       <pages>38–47</pages>
       <url hash="177b59d5">W10-3407</url>
       <bibkey>curteanu-etal-2010-optimal</bibkey>
@@ -5376,14 +5376,14 @@
     </paper>
     <paper id="9">
       <title>Computational Lexicography: A Feature-based Approach in Designing an <fixed-case>E</fixed-case>-dictionary of <fixed-case>C</fixed-case>hinese Classifiers</title>
-      <author><first>Helena</first><last>Gao</last></author>
+      <author id="helena-hong-gao"><first>Helena</first><last>Gao</last></author>
       <pages>56–65</pages>
       <url hash="545e75c3">W10-3409</url>
       <bibkey>gao-2010-computational</bibkey>
     </paper>
     <paper id="10">
       <title>In Search of the ’Right’ Word</title>
-      <author><first>Stella</first><last>Markantonatou</last></author>
+      <author id="stella-markantonatou"><first>Stella</first><last>Markantonatou</last></author>
       <author><first>Aggeliki</first><last>Fotopoulou</last></author>
       <author><first>Maria</first><last>Alexopoulou</last></author>
       <author><first>Marianna</first><last>Mini</last></author>
@@ -5438,9 +5438,9 @@
       <title>Extending <fixed-case>E</fixed-case>nglish <fixed-case>ACE</fixed-case> 2005 Corpus Annotation with Ground-truth Links to <fixed-case>W</fixed-case>ikipedia</title>
       <author><first>Luisa</first><last>Bentivogli</last></author>
       <author><first>Pamela</first><last>Forner</last></author>
-      <author><first>Claudio</first><last>Giuliano</last></author>
+      <author id="claudio-giuliano"><first>Claudio</first><last>Giuliano</last></author>
       <author><first>Alessandro</first><last>Marchetti</last></author>
-      <author><first>Emanuele</first><last>Pianta</last></author>
+      <author id="emanuele-pianta"><first>Emanuele</first><last>Pianta</last></author>
       <author><first>Kateryna</first><last>Tymoshenko</last></author>
       <pages>19–27</pages>
       <url hash="1144cecd">W10-3503</url>
@@ -5448,7 +5448,7 @@
     </paper>
     <paper id="4">
       <title>Expanding textual entailment corpora from<fixed-case>W</fixed-case>ikipedia using co-training</title>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last></author>
       <author><first>Marco</first><last>Pennacchiotti</last></author>
       <pages>28–36</pages>
       <url hash="60134803">W10-3504</url>
@@ -5478,7 +5478,7 @@
       <author><first>Arjumand</first><last>Younus</last></author>
       <author><first>Muhammad</first><last>Saeed</last></author>
       <author><first>Nasir</first><last>Touheed</last></author>
-      <author><first>Emanuele</first><last>Pianta</last></author>
+      <author id="emanuele-pianta"><first>Emanuele</first><last>Pianta</last></author>
       <author><first>Kateryna</first><last>Tymoshenko</last></author>
       <pages>55–62</pages>
       <url hash="70681f43">W10-3507</url>
@@ -5488,7 +5488,7 @@
       <title>Helping Volunteer Translators, Fostering Language Resources</title>
       <author><first>Masao</first><last>Utiyama</last></author>
       <author><first>Takeshi</first><last>Abekawa</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Kyo</first><last>Kageura</last></author>
       <pages>63–66</pages>
       <url hash="48ac5fc7">W10-3508</url>
@@ -5512,8 +5512,8 @@
     </frontmatter>
     <paper id="1">
       <title>Boosting N-gram Coverage for Unsegmented Languages Using Multiple Text Segmentation Approach</title>
-      <author><first>Solomon Teferra</first><last>Abate</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="solomon-teferra-abate"><first>Solomon Teferra</first><last>Abate</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <author><first>Sopheap</first><last>Seng</last></author>
       <pages>1–7</pages>
       <url hash="a3f37686">W10-3601</url>
@@ -5522,7 +5522,7 @@
     <paper id="2">
       <title><fixed-case>T</fixed-case>hai Sentence-Breaking for Large-Scale <fixed-case>SMT</fixed-case></title>
       <author><first>Glenn</first><last>Slayden</last></author>
-      <author><first>Mei-Yuh</first><last>Hwang</last></author>
+      <author id="mei-yuh-hwang"><first>Mei-Yuh</first><last>Hwang</last></author>
       <author><first>Lee</first><last>Schwartz</last></author>
       <pages>8–16</pages>
       <url hash="3029e42a">W10-3602</url>
@@ -5532,7 +5532,7 @@
       <title>Clause Identification and Classification in <fixed-case>B</fixed-case>engali</title>
       <author><first>Aniruddha</first><last>Ghosh</last></author>
       <author><first>Amitava</first><last>Das</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>17–25</pages>
       <url hash="f358f7ad">W10-3603</url>
       <bibkey>ghosh-etal-2010-clause</bibkey>
@@ -5541,31 +5541,31 @@
       <title>A Paradigm-Based Finite State Morphological Analyzer for <fixed-case>M</fixed-case>arathi</title>
       <author><first>Mugdha</first><last>Bapat</last></author>
       <author><first>Harshada</first><last>Gune</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>26–34</pages>
       <url hash="7fa28811">W10-3604</url>
       <bibkey>bapat-etal-2010-paradigm</bibkey>
     </paper>
     <paper id="5">
       <title>Web Based <fixed-case>M</fixed-case>anipuri Corpus for Multiword <fixed-case>NER</fixed-case> and Reduplicated <fixed-case>MWE</fixed-case>s Identification using <fixed-case>SVM</fixed-case></title>
-      <author><first>Thoudam Doren</first><last>Singh</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="thoudam-doren-singh"><first>Thoudam Doren</first><last>Singh</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>35–42</pages>
       <url hash="6c853776">W10-3605</url>
       <bibkey>singh-bandyopadhyay-2010-web</bibkey>
     </paper>
     <paper id="6">
       <title>A Word Segmentation System for Handling Space Omission Problem in <fixed-case>U</fixed-case>rdu Script</title>
-      <author><first>Gurpreet</first><last>Lehal</last></author>
+      <author id="gurpreet-singh-lehal"><first>Gurpreet</first><last>Lehal</last></author>
       <pages>43–50</pages>
       <url hash="21b4aaa5">W10-3606</url>
       <bibkey>lehal-2010-word</bibkey>
     </paper>
     <paper id="7">
       <title>Hybrid Stemmer for <fixed-case>G</fixed-case>ujarati</title>
-      <author><first>Pratikkumar</first><last>Patel</last></author>
+      <author id="pratikkumar-patel"><first>Pratikkumar</first><last>Patel</last></author>
       <author><first>Kashyap</first><last>Popat</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>51–55</pages>
       <url hash="94a73046">W10-3607</url>
       <bibkey>patel-etal-2010-hybrid</bibkey>
@@ -5575,8 +5575,8 @@
     <meta>
       <booktitle>Proceedings of the 2010 Workshop on Multiword Expressions: from Theory to Applications</booktitle>
       <url hash="576d7099">W10-37</url>
-      <editor><first>Éric</first><last>Laporte</last></editor>
-      <editor><first>Preslav</first><last>Nakov</last></editor>
+      <editor id="eric-laporte"><first>Éric</first><last>Laporte</last></editor>
+      <editor id="preslav-nakov"><first>Preslav</first><last>Nakov</last></editor>
       <editor><first>Carlos</first><last>Ramisch</last></editor>
       <editor><first>Aline</first><last>Villavicencio</last></editor>
       <publisher>Coling 2010 Organizing Committee</publisher>
@@ -5598,7 +5598,7 @@
     </paper>
     <paper id="2">
       <title>Computational Lexicography of Multi-Word Units. How Efficient Can It Be?</title>
-      <author><first>Filip</first><last>Graliński</last></author>
+      <author id="filip-gralinski"><first>Filip</first><last>Graliński</last></author>
       <author><first>Agata</first><last>Savary</last></author>
       <author><first>Monika</first><last>Czerepowicka</last></author>
       <author><first>Filip</first><last>Makowiecki</last></author>
@@ -5620,14 +5620,14 @@
       <author><first>Antonio</first><last>Toral</last></author>
       <author><first>Lamia</first><last>Tounsi</last></author>
       <author><first>Pavel</first><last>Pecina</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>19–27</pages>
       <url hash="72063099">W10-3704</url>
       <bibkey>attia-etal-2010-automatic</bibkey>
     </paper>
     <paper id="5">
       <title>Sentence Analysis and Collocation Identification</title>
-      <author><first>Eric</first><last>Wehrli</last></author>
+      <author id="eric-wehrli"><first>Eric</first><last>Wehrli</last></author>
       <author><first>Violeta</first><last>Seretan</last></author>
       <author><first>Luka</first><last>Nerima</last></author>
       <pages>28–36</pages>
@@ -5640,7 +5640,7 @@
       <author><first>Santanu</first><last>Pal</last></author>
       <author><first>Tapabrata</first><last>Mondal</last></author>
       <author><first>Tanmoy</first><last>Chakraborty</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>37–45</pages>
       <url hash="21f9bd01">W10-3706</url>
       <bibkey>das-etal-2010-automatic</bibkey>
@@ -5648,9 +5648,9 @@
     <paper id="7">
       <title>Handling Named Entities and Compound Verbs in Phrase-Based Statistical Machine Translation</title>
       <author><first>Santanu</first><last>Pal</last></author>
-      <author><first>Sudip Kumar</first><last>Naskar</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last></author>
       <author><first>Pavel</first><last>Pecina</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <pages>46–54</pages>
       <url hash="01a3bbf5">W10-3707</url>
@@ -5671,7 +5671,7 @@
       <author><first>Tomoko</first><last>Izumi</last></author>
       <author><first>Kenji</first><last>Imamura</last></author>
       <author><first>Genichiro</first><last>Kikui</last></author>
-      <author><first>Satoshi</first><last>Sato</last></author>
+      <author id="satoshi-sato"><first>Satoshi</first><last>Sato</last></author>
       <pages>64–72</pages>
       <url hash="13ed7eba">W10-3709</url>
       <bibkey>izumi-etal-2010-standardizing</bibkey>
@@ -5679,7 +5679,7 @@
     <paper id="10">
       <title>Identification of Reduplication in <fixed-case>B</fixed-case>engali Corpus and their Semantic Analysis: A Rule Based Approach</title>
       <author><first>Tanmoy</first><last>Chakraborty</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>73–76</pages>
       <url hash="68f0cfc8">W10-3710</url>
       <bibkey>chakraborty-bandyopadhyay-2010-identification</bibkey>
@@ -5689,7 +5689,7 @@
       <author><first>Francesca</first><last>Bonin</last></author>
       <author><first>Felice</first><last>Dell’Orletta</last></author>
       <author><first>Giulia</first><last>Venturi</last></author>
-      <author><first>Simonetta</first><last>Montemagni</last></author>
+      <author id="simonetta-montemagni"><first>Simonetta</first><last>Montemagni</last></author>
       <pages>77–80</pages>
       <url hash="def6881a">W10-3711</url>
       <bibkey>bonin-etal-2010-contrastive-filtering</bibkey>
@@ -5712,7 +5712,7 @@
     </paper>
     <paper id="14">
       <title>Multiword Expressions as Discourse Relation Markers (<fixed-case>DRM</fixed-case>s)</title>
-      <author><first>Aravind</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
       <pages>89</pages>
       <url hash="1b57f02b">W10-3714</url>
       <bibkey>joshi-2010-multiword</bibkey>
@@ -5763,7 +5763,7 @@
       <title>Syntactic Constraints on Phrase Extraction for Phrase-Based Machine Translation</title>
       <author><first>Hailong</first><last>Cao</last></author>
       <author><first>Andrew</first><last>Finch</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>28–33</pages>
       <url hash="54c218d5">W10-3804</url>
       <bibkey>cao-etal-2010-syntactic</bibkey>
@@ -5772,7 +5772,7 @@
       <title>Phrase Based Decoding using a Discriminative Model</title>
       <author><first>Prasanth</first><last>Kolachina</last></author>
       <author><first>Sriram</first><last>Venkatapathy</last></author>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
       <author><first>Sudheer</first><last>Kolachina</last></author>
       <author><first>Avinesh</first><last>PVS</last></author>
       <pages>34–42</pages>
@@ -5782,7 +5782,7 @@
     <paper id="6">
       <title>Seeding Statistical Machine Translation with Translation Memory Output through Tree-Based Structural Alignment</title>
       <author><first>Ventsislav</first><last>Zhechev</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>43–51</pages>
       <url hash="ad105ffa">W10-3806</url>
       <bibkey>zhechev-van-genabith-2010-seeding</bibkey>
@@ -5799,7 +5799,7 @@
       <title><fixed-case>A</fixed-case>rabic morpho-syntactic feature disambiguation in a translation context</title>
       <author><first>Ines</first><last>Turki Khemakhem</last></author>
       <author><first>Salma</first><last>Jamoussi</last></author>
-      <author><first>Abdelmajid</first><last>Ben Hamadou</last></author>
+      <author id="abdelmajid-ben-hamadou"><first>Abdelmajid</first><last>Ben Hamadou</last></author>
       <pages>61–65</pages>
       <url hash="71c6d162">W10-3808</url>
       <bibkey>turki-khemakhem-etal-2010-arabic</bibkey>
@@ -5808,7 +5808,7 @@
       <title>A Discriminative Approach for Dependency Based Statistical Machine Translation</title>
       <author><first>Sriram</first><last>Venkatapathy</last></author>
       <author><first>Rajeev</first><last>Sangal</last></author>
-      <author><first>Aravind</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
       <author><first>Karthik</first><last>Gali</last></author>
       <pages>66–74</pages>
       <url hash="5071ac4f">W10-3809</url>
@@ -5825,8 +5825,8 @@
     </paper>
     <paper id="11">
       <title><fixed-case>M</fixed-case>anipuri-<fixed-case>E</fixed-case>nglish Bidirectional Statistical Machine Translation Systems using Morphology and Dependency Relations</title>
-      <author><first>Thoudam Doren</first><last>Singh</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="thoudam-doren-singh"><first>Thoudam Doren</first><last>Singh</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>83–91</pages>
       <url hash="8ca06730">W10-3811</url>
       <bibkey>singh-bandyopadhyay-2010-manipuri</bibkey>
@@ -5834,7 +5834,7 @@
     <paper id="12">
       <title>A Discriminative Syntactic Model for Source Permutation via Tree Transduction</title>
       <author><first>Maxim</first><last>Khalilov</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <pages>92–100</pages>
       <url hash="89f3b3d8">W10-3812</url>
       <bibkey>khalilov-simaan-2010-discriminative</bibkey>
@@ -5850,7 +5850,7 @@
     <paper id="14">
       <title>New Parameterizations and Features for <fixed-case>PSCFG</fixed-case>-Based Machine Translation</title>
       <author><first>Andreas</first><last>Zollmann</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>110–117</pages>
       <url hash="d3262a78">W10-3814</url>
       <bibkey>zollmann-vogel-2010-new</bibkey>
@@ -5858,7 +5858,7 @@
     <paper id="15">
       <title>Deep Syntax Language Models and Statistical Machine Translation</title>
       <author><first>Yvette</first><last>Graham</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>118–126</pages>
       <url hash="a72eb1ff">W10-3815</url>
       <bibkey>graham-van-genabith-2010-deep</bibkey>
@@ -5913,7 +5913,7 @@
       <author><first>Megumi</first><last>Ohki</last></author>
       <author><first>Suguru</first><last>Matsuyoshi</last></author>
       <author><first>Kentaro</first><last>Inui</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>21–30</pages>
       <url hash="2ff15157">W10-3904</url>
       <bibkey>murakami-etal-2010-automatic</bibkey>
@@ -5936,7 +5936,7 @@
     <paper id="7">
       <title>A Look inside the Distributionally Similar Terms</title>
       <author><first>Kow</first><last>Kuroda</last></author>
-      <author><first>Jun’ichi</first><last>Kazama</last></author>
+      <author id="junichi-kazama"><first>Jun’ichi</first><last>Kazama</last></author>
       <author><first>Kentaro</first><last>Torisawa</last></author>
       <pages>40–49</pages>
       <url hash="ae25f632">W10-3907</url>
@@ -5953,7 +5953,7 @@
     <paper id="9">
       <title>Large Corpus-based Semantic Feature Extraction for Pronoun Coreference</title>
       <author><first>Shasha</first><last>Liao</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <pages>60–68</pages>
       <url hash="d52e1580">W10-3909</url>
       <bibkey>liao-grishman-2010-large</bibkey>
@@ -5963,7 +5963,7 @@
       <author><first>Minh</first><last>Nghiem Quoc</last></author>
       <author><first>Keisuke</first><last>Yokoi</last></author>
       <author><first>Yuichiroh</first><last>Matsubayashi</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>69–74</pages>
       <url hash="fd91f07c">W10-3910</url>
       <bibkey>nghiem-quoc-etal-2010-mining</bibkey>
@@ -5975,7 +5975,7 @@
       <author><first>Tomoko</first><last>Ohkuma</last></author>
       <author><first>Masatsugu</first><last>Tonoike</last></author>
       <author><first>Daigo</first><last>Sugihara</last></author>
-      <author><first>Hiroshi</first><last>Masuichi</last></author>
+      <author id="hiroshi-masuichi"><first>Hiroshi</first><last>Masuichi</last></author>
       <author><first>Kazuhiko</first><last>Ohe</last></author>
       <pages>75–83</pages>
       <url hash="3ad81dfd">W10-3911</url>
@@ -5989,7 +5989,7 @@
       <editor><first>Sudeshna</first><last>Sarkar</last></editor>
       <editor><first>Min</first><last>Zhang</last></editor>
       <editor><first>Adam</first><last>Lopez</last></editor>
-      <editor><first>Raghavendra</first><last>Udupa</last></editor>
+      <editor id="raghavendra-udupa"><first>Raghavendra</first><last>Udupa</last></editor>
       <publisher>Coling 2010 Organizing Committee</publisher>
       <address>Beijing, China</address>
       <month>August</month>
@@ -6002,7 +6002,7 @@
     </frontmatter>
     <paper id="1">
       <title>Word Sense Disambiguation and <fixed-case>IR</fixed-case></title>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>1</pages>
       <url hash="05aeeb1f">W10-4001</url>
       <bibkey>bhattacharyya-2010-word</bibkey>
@@ -6026,8 +6026,8 @@
     </paper>
     <paper id="4">
       <title>How to Get the Same News from Different Language News Papers</title>
-      <author><first>T. Pattabhi</first><last>R. K Rao</last></author>
-      <author><first>Sobha</first><last>Lalitha Devi</last></author>
+      <author id="pattabhi-rk-rao"><first>T. Pattabhi</first><last>R. K Rao</last></author>
+      <author id="sobha-lalitha-devi"><first>Sobha</first><last>Lalitha Devi</last></author>
       <pages>11–15</pages>
       <url hash="936de90d">W10-4004</url>
       <bibkey>r-k-rao-lalitha-devi-2010-get</bibkey>
@@ -6045,7 +6045,7 @@
     <paper id="6">
       <title>Multi-Word Expression-Sensitive Word Alignment</title>
       <author><first>Tsuyoshi</first><last>Okita</last></author>
-      <author><first>Alfredo</first><last>Maldonado Guerra</last></author>
+      <author id="alfredo-maldonado"><first>Alfredo</first><last>Maldonado Guerra</last></author>
       <author><first>Yvette</first><last>Graham</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <pages>26–34</pages>
@@ -6076,8 +6076,8 @@
       <author><first>Achille</first><last>Falaise</last></author>
       <author><first>David</first><last>Rouquet</last></author>
       <author><first>Didier</first><last>Schwab</last></author>
-      <author><first>Hervé</first><last>Blanchon</last></author>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="herve-blanchon"><first>Hervé</first><last>Blanchon</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <pages>52–60</pages>
       <url hash="6e07faf7">W10-4009</url>
       <bibkey>falaise-etal-2010-ontology</bibkey>
@@ -6087,7 +6087,7 @@
       <author><first>Marina</first><last>Litvak</last></author>
       <author><first>Mark</first><last>Last</last></author>
       <author><first>Slava</first><last>Kisilevich</last></author>
-      <author><first>Daniel</first><last>Keim</last></author>
+      <author id="daniel-keim"><first>Daniel</first><last>Keim</last></author>
       <author><first>Hagay</first><last>Lipman</last></author>
       <author><first>Assaf</first><last>Ben Gur</last></author>
       <pages>61–69</pages>
@@ -6097,9 +6097,9 @@
     <paper id="11">
       <title>More Languages, More <fixed-case>MAP</fixed-case>?: A Study of Multiple Assisting Languages in Multilingual <fixed-case>PRF</fixed-case></title>
       <author><first>Vishal</first><last>Vachhani</last></author>
-      <author><first>Manoj</first><last>Chinnakotla</last></author>
-      <author><first>Mitesh</first><last>Khapra</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="manoj-chinnakotla"><first>Manoj</first><last>Chinnakotla</last></author>
+      <author id="mitesh-m-khapra"><first>Mitesh</first><last>Khapra</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>70–78</pages>
       <url hash="a76c9ba5">W10-4011</url>
       <bibkey>vachhani-etal-2010-languages</bibkey>
@@ -6107,7 +6107,7 @@
     <paper id="12">
       <title>Multilinguization and Personalization of <fixed-case>NL</fixed-case>-based Systems</title>
       <author><first>Najeh</first><last>Hajlaoui</last></author>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <pages>79–87</pages>
       <url hash="d0cfd41b">W10-4012</url>
       <bibkey>hajlaoui-boitet-2010-multilinguization</bibkey>
@@ -6136,7 +6136,7 @@
       <title>Textual Emotion Processing From Event Analysis</title>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <author><first>Ying</first><last>Chen</last></author>
-      <author><first>Sophia Yat Mei</first><last>Lee</last></author>
+      <author id="sophia-yat-mei-lee"><first>Sophia Yat Mei</first><last>Lee</last></author>
       <url hash="99c36dbb">W10-4102</url>
       <bibkey>huang-etal-2010-textual</bibkey>
     </paper>
@@ -6175,7 +6175,7 @@
       <title>Reducing the False Alarm Rate of <fixed-case>C</fixed-case>hinese Character Error Detection and Correction</title>
       <author><first>Shih-Hung</first><last>Wu</last></author>
       <author><first>Yong-Zhi</first><last>Chen</last></author>
-      <author><first>Ping-che</first><last>Yang</last></author>
+      <author id="ping-che-yang"><first>Ping-che</first><last>Yang</last></author>
       <author><first>Tsun</first><last>Ku</last></author>
       <author><first>Chao-Lin</first><last>Liu</last></author>
       <url hash="a8a7cae8">W10-4107</url>
@@ -6191,25 +6191,25 @@
     <paper id="9">
       <title>Bigram <fixed-case>HMM</fixed-case> with Context Distribution Clustering for Unsupervised <fixed-case>C</fixed-case>hinese Part-of-Speech tagging</title>
       <author><first>Lidan</first><last>Zhang</last></author>
-      <author><first>Kwok-Ping</first><last>Chan</last></author>
-      <author><first>Chunyu</first><last>Kit</last></author>
-      <author><first>Dongfeng</first><last>Cai</last></author>
+      <author id="kwok-ping-chan"><first>Kwok-Ping</first><last>Chan</last></author>
+      <author id="chunyu-kit"><first>Chunyu</first><last>Kit</last></author>
+      <author id="dongfeng-cai"><first>Dongfeng</first><last>Cai</last></author>
       <url hash="b37bf5b5">W10-4109</url>
       <bibkey>zhang-etal-2010-bigram</bibkey>
     </paper>
     <paper id="10">
       <title>Mining Large-scale Parallel Corpora from Multilingual Patents: An <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>hinese example and its application to <fixed-case>SMT</fixed-case></title>
       <author><first>Bin</first><last>Lu</last></author>
-      <author><first>Benjamin K.</first><last>Tsou</last></author>
+      <author id="benjamin-k-tsou"><first>Benjamin K.</first><last>Tsou</last></author>
       <author><first>Tao</first><last>Jiang</last></author>
-      <author><first>Oi Yee</first><last>Kwong</last></author>
+      <author id="olivia-o-y-kwong"><first>Oi Yee</first><last>Kwong</last></author>
       <author><first>Jingbo</first><last>Zhu</last></author>
       <url hash="f39f66e6">W10-4110</url>
       <bibkey>lu-etal-2010-mining</bibkey>
     </paper>
     <paper id="11">
       <title>Studies on Automatic Recognition of Common <fixed-case>C</fixed-case>hinese Adverb’s usages Based on Statistics Methods</title>
-      <author><first>Hongying</first><last>Zan</last></author>
+      <author id="hongying-zan"><first>Hongying</first><last>Zan</last></author>
       <author><first>Junhui</first><last>Zhang</last></author>
       <author><first>Xuefeng</first><last>Zhu</last></author>
       <author><first>Shiwen</first><last>Yu</last></author>
@@ -6220,8 +6220,8 @@
       <title>Automatic Identification of Predicate Heads in <fixed-case>C</fixed-case>hinese Sentences</title>
       <author><first>Xiaona</first><last>Ren</last></author>
       <author><first>Qiaoli</first><last>Zhou</last></author>
-      <author><first>Chunyu</first><last>Kit</last></author>
-      <author><first>Dongfeng</first><last>Cai</last></author>
+      <author id="chunyu-kit"><first>Chunyu</first><last>Kit</last></author>
+      <author id="dongfeng-cai"><first>Dongfeng</first><last>Cai</last></author>
       <url hash="0ec1e561">W10-4112</url>
       <bibkey>ren-etal-2010-automatic</bibkey>
     </paper>
@@ -6240,7 +6240,7 @@
       <author><first>Zhen</first><last>Hai</last></author>
       <author><first>Kuiyu</first><last>Chang</last></author>
       <author><first>Qinbao</first><last>Song</last></author>
-      <author><first>Jung-jae</first><last>Kim</last></author>
+      <author id="jung-jae-kim"><first>Jung-jae</first><last>Kim</last></author>
       <url hash="12f6cf98">W10-4114</url>
       <bibkey>hai-etal-2010-statistical</bibkey>
     </paper>
@@ -6250,7 +6250,7 @@
       <author><first>Wenjie</first><last>Li</last></author>
       <author><first>Yan</first><last>Liu</last></author>
       <author><first>Dequan</first><last>Zheng</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <url hash="1c3422e0">W10-4115</url>
       <bibkey>chen-etal-2010-exploring</bibkey>
     </paper>
@@ -6265,7 +6265,7 @@
     <paper id="17">
       <title>Exploiting Social <fixed-case>Q</fixed-case>&amp;<fixed-case>A</fixed-case> Collection in Answering Complex Questions</title>
       <author><first>Youzheng</first><last>Wu</last></author>
-      <author><first>Kawai</first><last>Hisashi</last></author>
+      <author id="hisashi-kawai"><first>Kawai</first><last>Hisashi</last></author>
       <url hash="e6e902fb">W10-4117</url>
       <bibkey>wu-hisashi-2010-exploiting</bibkey>
     </paper>
@@ -6293,8 +6293,8 @@
       <title>Active Learning Based Corpus Annotation</title>
       <author><first>Hongyan</first><last>Song</last></author>
       <author><first>Tianfang</first><last>Yao</last></author>
-      <author><first>Chunyu</first><last>Kit</last></author>
-      <author><first>Dongfeng</first><last>Cai</last></author>
+      <author id="chunyu-kit"><first>Chunyu</first><last>Kit</last></author>
+      <author id="dongfeng-cai"><first>Dongfeng</first><last>Cai</last></author>
       <url hash="f3c1fa01">W10-4121</url>
       <bibkey>song-etal-2010-active</bibkey>
     </paper>
@@ -6310,7 +6310,7 @@
       <title><fixed-case>CMDMC</fixed-case>: A Diachronic Digital Museum of <fixed-case>C</fixed-case>hinese <fixed-case>M</fixed-case>andarin</title>
       <author><first>Min</first><last>Hou</last></author>
       <author><first>Yu</first><last>Zou</last></author>
-      <author><first>Yonglin</first><last>Teng</last></author>
+      <author id="yonglin-teng"><first>Yonglin</first><last>Teng</last></author>
       <author><first>Wei</first><last>He</last></author>
       <author><first>Yan</first><last>Wang</last></author>
       <author><first>Jun</first><last>Liu</last></author>
@@ -6321,7 +6321,7 @@
     <paper id="24">
       <title><fixed-case>K</fixed-case>azakh Segmentation System of Inflectional Affixes</title>
       <author><first>Gulila</first><last>Altenbek</last></author>
-      <author><last>Wang</last><first>Xiao-long</first></author>
+      <author id="xiao-long-wang"><first>Xiao-long</first><last>Wang</last></author>
       <url hash="7b6a8f34">W10-4124</url>
       <bibkey>altenbek-wang-2010-kazakh</bibkey>
     </paper>
@@ -6343,13 +6343,13 @@
     <paper id="27">
       <title>A Multi-layer <fixed-case>C</fixed-case>hinese Word Segmentation System Optimized for Out-of-domain Tasks</title>
       <author><first>Qin</first><last>Gao</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <url hash="13517981">W10-4127</url>
       <bibkey>gao-vogel-2010-multi</bibkey>
     </paper>
     <paper id="28">
       <title><fixed-case>HMM</fixed-case> Revises Low Marginal Probability by <fixed-case>CRF</fixed-case> for <fixed-case>C</fixed-case>hinese Word Segmentation</title>
-      <author><first>Degen</first><last>Huang</last></author>
+      <author id="degen-huang"><first>Degen</first><last>Huang</last></author>
       <author><first>Deqin</first><last>Tong</last></author>
       <author><first>Yanyan</first><last>Luo</last></author>
       <url hash="a871b71c">W10-4128</url>
@@ -6384,14 +6384,14 @@
       <title>Adaptive <fixed-case>C</fixed-case>hinese Word Segmentation with Online Passive-Aggressive Algorithm</title>
       <author><first>Wenjun</first><last>Gao</last></author>
       <author><first>Xipeng</first><last>Qiu</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <url hash="8a367fed">W10-4132</url>
       <bibkey>gao-etal-2010-adaptive</bibkey>
     </paper>
     <paper id="33">
       <title>A Character-Based Joint Model for <fixed-case>CIPS</fixed-case>-<fixed-case>SIGHAN</fixed-case> Word Segmentation Bakeoff 2010</title>
       <author><first>Kun</first><last>Wang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <author><first>Keh-Yih</first><last>Su</last></author>
       <url hash="f52cf2c0">W10-4133</url>
       <bibkey>wang-etal-2010-character</bibkey>
@@ -6401,7 +6401,7 @@
       <author><first>Hua-Ping</first><last>Zhang</last></author>
       <author><first>Jian</first><last>Gao</last></author>
       <author><first>Qian</first><last>Mo</last></author>
-      <author><first>He-Yan</first><last>Huang</last></author>
+      <author id="he-yan-huang"><first>He-Yan</first><last>Huang</last></author>
       <url hash="bd24f055">W10-4134</url>
       <bibkey>zhang-etal-2010-incorporating</bibkey>
     </paper>
@@ -6416,8 +6416,8 @@
     </paper>
     <paper id="36">
       <title><fixed-case>C</fixed-case>hinese word segmentation model using bootstrapping</title>
-      <author><first>Baobao</first><last>Chang</last></author>
-      <author><first>Mansur</first><last>Mairgup</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
+      <author id="mairgup-mansur"><first>Mansur</first><last>Mairgup</last></author>
       <url hash="a663b3f9">W10-4136</url>
       <bibkey>chang-mairgup-2010-chinese</bibkey>
     </paper>
@@ -6433,10 +6433,10 @@
     </paper>
     <paper id="38">
       <title>Term Contributed Boundary Tagging by Conditional Random Fields for <fixed-case>SIGHAN</fixed-case> 2010 <fixed-case>C</fixed-case>hinese Word Segmentation Bakeoff</title>
-      <author><first>Tian-Jian</first><last>Jiang</last></author>
+      <author id="mike-tian-jian-jiang"><first>Tian-Jian</first><last>Jiang</last></author>
       <author><first>Shih-Hung</first><last>Liu</last></author>
       <author><first>Cheng-Lung</first><last>Sung</last></author>
-      <author><first>Wen-Lian</first><last>Hsu</last></author>
+      <author id="wen-lian-hsu"><first>Wen-Lian</first><last>Hsu</last></author>
       <url hash="8f3f5fb9">W10-4138</url>
       <bibkey>jiang-etal-2010-term-contributed</bibkey>
     </paper>
@@ -6481,7 +6481,7 @@
     </paper>
     <paper id="44">
       <title>Discriminative Parse Reranking for <fixed-case>C</fixed-case>hinese with Homogeneous and Heterogeneous Annotations</title>
-      <author><first>Weiwei</first><last>Sun</last></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last></author>
       <author><first>Rui</first><last>Wang</last></author>
       <author><first>Yi</first><last>Zhang</last></author>
       <url hash="c12cf47f">W10-4144</url>
@@ -6493,7 +6493,7 @@
       <author><first>Wenjing</first><last>Lang</last></author>
       <author><first>Yingying</first><last>Wang</last></author>
       <author><first>Yan</first><last>Wang</last></author>
-      <author><first>Dongfeng</first><last>Cai</last></author>
+      <author id="dongfeng-cai"><first>Dongfeng</first><last>Cai</last></author>
       <url hash="d0f66796">W10-4145</url>
       <bibkey>zhou-etal-2010-sau</bibkey>
     </paper>
@@ -6502,7 +6502,7 @@
       <author><first>Xuezhe</first><last>Ma</last></author>
       <author><first>Xiaotian</first><last>Zhang</last></author>
       <author><first>Hai</first><last>Zhao</last></author>
-      <author><first>Bao-Liang</first><last>Lu</last></author>
+      <author id="bao-liang-lu"><first>Bao-Liang</first><last>Lu</last></author>
       <url hash="c721c1bd">W10-4146</url>
       <bibkey>ma-etal-2010-dependency</bibkey>
     </paper>
@@ -6515,8 +6515,8 @@
     <paper id="48">
       <title><fixed-case>CRF</fixed-case> tagging for head recognition based on <fixed-case>S</fixed-case>tanford parser</title>
       <author><first>Yong</first><last>Cheng</last></author>
-      <author><first>Chengjie</first><last>Sun</last></author>
-      <author><first>Bingquan</first><last>Liu</last></author>
+      <author id="cheng-jie-sun"><first>Chengjie</first><last>Sun</last></author>
+      <author id="bingquan-liu"><first>Bingquan</first><last>Liu</last></author>
       <author><first>Lei</first><last>Lin</last></author>
       <url hash="ca3957ad">W10-4148</url>
       <bibkey>cheng-etal-2010-crf</bibkey>
@@ -6524,7 +6524,7 @@
     <paper id="49">
       <title>Treebank Conversion based Self-training Strategy for Parsing</title>
       <author><first>Zhiguo</first><last>Wang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <url hash="90d3ead7">W10-4149</url>
       <bibkey>wang-zong-2010-treebank</bibkey>
     </paper>
@@ -6570,8 +6570,8 @@
       <title>Combine Person Name and Person Identity Recognition and Document Clustering for <fixed-case>C</fixed-case>hinese Person Name Disambiguation</title>
       <author><first>Ruifeng</first><last>Xu</last></author>
       <author><first>Jun</first><last>Xu</last></author>
-      <author><first>Xiangying</first><last>Dai</last></author>
-      <author><first>Chunyu</first><last>Kit</last></author>
+      <author id="xiang-dai"><first>Xiangying</first><last>Dai</last></author>
+      <author id="chunyu-kit"><first>Chunyu</first><last>Kit</last></author>
       <url hash="a974c1bc">W10-4154</url>
       <bibkey>xu-etal-2010-combine</bibkey>
     </paper>
@@ -6589,7 +6589,7 @@
       <author><first>Xiang</first><last>Zhu</last></author>
       <author><first>Xiaodong</first><last>Shi</last></author>
       <author><first>Ningfeng</first><last>Liu</last></author>
-      <author><first>YingMei</first><last>Guo</last></author>
+      <author id="ying-mei-guo"><first>YingMei</first><last>Guo</last></author>
       <author><first>Yidong</first><last>Chen</last></author>
       <url hash="f0f46c42">W10-4156</url>
       <bibkey>zhu-etal-2010-chinese</bibkey>
@@ -6599,7 +6599,7 @@
       <author><first>Hua-Ping</first><last>Zhang</last></author>
       <author><first>Zhi-Hua</first><last>Liu</last></author>
       <author><first>Qian</first><last>Mo</last></author>
-      <author><first>He-Yan</first><last>Huang</last></author>
+      <author id="he-yan-huang"><first>He-Yan</first><last>Huang</last></author>
       <url hash="273fcc1f">W10-4157</url>
       <bibkey>zhang-etal-2010-chinese-personal</bibkey>
     </paper>
@@ -6608,8 +6608,8 @@
       <author><first>Yu</first><last>Hong</last></author>
       <author><first>Fei</first><last>Pei</last></author>
       <author><first>Yue-hui</first><last>Yang</last></author>
-      <author><first>Jian-min</first><last>Yao</last></author>
-      <author><first>Qiao-ming</first><last>Zhu</last></author>
+      <author id="jianmin-yao"><first>Jian-min</first><last>Yao</last></author>
+      <author id="qiaoming-zhu"><first>Qiao-ming</first><last>Zhu</last></author>
       <url hash="0dfd4d79">W10-4158</url>
       <bibkey>hong-etal-2010-jumping</bibkey>
     </paper>
@@ -6623,7 +6623,7 @@
     <paper id="60">
       <title><fixed-case>DLUT</fixed-case>: <fixed-case>C</fixed-case>hinese Personal Name Disambiguation with Rich Features</title>
       <author><first>Dongliang</first><last>Wang</last></author>
-      <author><first>Degen</first><last>Huang</last></author>
+      <author id="degen-huang"><first>Degen</first><last>Huang</last></author>
       <url hash="bcf9b093">W10-4160</url>
       <bibkey>wang-huang-2010-dlut</bibkey>
     </paper>
@@ -6666,7 +6666,7 @@
       <title>Triplet-Based <fixed-case>C</fixed-case>hinese Word Sense Induction</title>
       <author><first>Zhao</first><last>Liu</last></author>
       <author><first>Xipeng</first><last>Qiu</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <url hash="a8b577ee">W10-4165</url>
       <bibkey>liu-etal-2010-triplet</bibkey>
     </paper>
@@ -6715,8 +6715,8 @@
       <title>Soochow University: Description and Analysis of the <fixed-case>C</fixed-case>hinese Word Sense Induction System for <fixed-case>CLP</fixed-case>2010</title>
       <author><first>Hua</first><last>Xu</last></author>
       <author><first>Bing</first><last>Liu</last></author>
-      <author><first>Longhua</first><last>Qian</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="longhua-qian"><first>Longhua</first><last>Qian</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <url hash="58f3da9f">W10-4171</url>
       <bibkey>xu-etal-2010-soochow</bibkey>
     </paper>
@@ -6741,7 +6741,7 @@
   <volume id="42" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 6th International Natural Language Generation Conference</booktitle>
-      <editor><first>John</first><last>Kelleher</last></editor>
+      <editor id="john-kelleher"><first>John</first><last>Kelleher</last></editor>
       <editor><first>Brian Mac</first><last>Namee</last></editor>
       <editor><first>Ielka van der</first><last>Sluis</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -6756,7 +6756,7 @@
     </frontmatter>
     <paper id="1">
       <title>Comparing Rating Scales and Preference Judgements in Language Evaluation</title>
-      <author><first>Anja</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anja</first><last>Belz</last></author>
       <author><first>Eric</first><last>Kow</last></author>
       <url hash="8e10d899">W10-4201</url>
       <bibkey>belz-kow-2010-comparing</bibkey>
@@ -6764,14 +6764,14 @@
     <paper id="2">
       <title>A Discourse-Aware Graph-Based Content-Selection Framework</title>
       <author><first>Seniz</first><last>Demir</last></author>
-      <author><first>Sandra</first><last>Carberry</last></author>
-      <author><first>Kathleen F.</first><last>McCoy</last></author>
+      <author id="sandra-carberry"><first>Sandra</first><last>Carberry</last></author>
+      <author id="kathleen-f-mccoy"><first>Kathleen F.</first><last>McCoy</last></author>
       <url hash="8fef1779">W10-4202</url>
       <bibkey>demir-etal-2010-discourse</bibkey>
     </paper>
     <paper id="3">
       <title>Generating Referring Expressions with Reference Domain Theory</title>
-      <author><first>Alexandre</first><last>Denis</last></author>
+      <author id="alexandre-denis"><first>Alexandre</first><last>Denis</last></author>
       <url hash="6835706a">W10-4203</url>
       <bibkey>denis-2010-generating</bibkey>
     </paper>
@@ -6784,8 +6784,8 @@
     </paper>
     <paper id="5">
       <title>Tense and Aspect Assignment in Narrative Discourse</title>
-      <author><first>David</first><last>Elson</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="david-elson"><first>David</first><last>Elson</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <url hash="c66b5ce5">W10-4205</url>
       <bibkey>elson-mckeown-2010-tense</bibkey>
     </paper>
@@ -6799,30 +6799,30 @@
     <paper id="7">
       <title>Situated Reference in a Hybrid Human-Robot Interaction System</title>
       <author><first>Manuel</first><last>Giuliani</last></author>
-      <author><first>Mary Ellen</first><last>Foster</last></author>
+      <author id="mary-ellen-foster"><first>Mary Ellen</first><last>Foster</last></author>
       <author><first>Amy</first><last>Isard</last></author>
       <author><first>Colin</first><last>Matheson</last></author>
-      <author><first>Jon</first><last>Oberlander</last></author>
+      <author id="jon-oberlander"><first>Jon</first><last>Oberlander</last></author>
       <author><first>Alois</first><last>Knoll</last></author>
       <url hash="cf37e259">W10-4207</url>
       <bibkey>giuliani-etal-2010-situated</bibkey>
     </paper>
     <paper id="8">
       <title>Towards a Programmable Instrumented Generator</title>
-      <author><first>Chris</first><last>Mellish</last></author>
+      <author id="chris-mellish"><first>Chris</first><last>Mellish</last></author>
       <url hash="bb5d4025">W10-4208</url>
       <bibkey>mellish-2010-towards</bibkey>
     </paper>
     <paper id="9">
       <title>Using Semantic Web Technology to Support <fixed-case>NLG</fixed-case>. Case Study: <fixed-case>OWL</fixed-case> finds <fixed-case>RAGS</fixed-case></title>
-      <author><first>Chris</first><last>Mellish</last></author>
+      <author id="chris-mellish"><first>Chris</first><last>Mellish</last></author>
       <url hash="954eca21">W10-4209</url>
       <bibkey>mellish-2010-using</bibkey>
     </paper>
     <paper id="10">
       <title>Natural Reference to Objects in a Visual Domain</title>
       <author><first>Margaret</first><last>Mitchell</last></author>
-      <author><first>Kees</first><last>van Deemter</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
       <author><first>Ehud</first><last>Reiter</last></author>
       <url hash="ee53c7c4">W10-4210</url>
       <bibkey>mitchell-etal-2010-natural</bibkey>
@@ -6831,14 +6831,14 @@
       <title>Generating and Validating Abstracts of Meeting Conversations: a User Study</title>
       <author><first>Gabriel</first><last>Murray</last></author>
       <author><first>Giuseppe</first><last>Carenini</last></author>
-      <author><first>Raymond</first><last>Ng</last></author>
+      <author id="raymond-ng"><first>Raymond</first><last>Ng</last></author>
       <url hash="cd2d425f">W10-4211</url>
       <bibkey>murray-etal-2010-generating</bibkey>
     </paper>
     <paper id="12">
       <title>Charting the Potential of Description Logic for the Generation of Referring Expressions</title>
       <author><first>Yuan</first><last>Ren</last></author>
-      <author><first>Kees</first><last>van Deemter</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
       <author><first>Jeff Z.</first><last>Pan</last></author>
       <url hash="aabd288c">W10-4212</url>
       <bibkey>ren-etal-2010-charting</bibkey>
@@ -6874,7 +6874,7 @@
     </paper>
     <paper id="17">
       <title>Extracting Parallel Fragments from Comparable Corpora for Data-to-text Generation</title>
-      <author><first>Anja</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anja</first><last>Belz</last></author>
       <author><first>Eric</first><last>Kow</last></author>
       <url hash="1e75606b">W10-4217</url>
       <bibkey>belz-kow-2010-extracting</bibkey>
@@ -6894,16 +6894,16 @@
     </paper>
     <paper id="20">
       <title>‘If you’ve heard it, you can say it’ - Towards an Account of Expressibility</title>
-      <author><first>David D.</first><last>McDonald</last></author>
+      <author id="david-d-mcdonald"><first>David D.</first><last>McDonald</last></author>
       <author><first>Charlie</first><last>Greenbacker</last></author>
       <url hash="197c598b">W10-4220</url>
       <bibkey>mcdonald-greenbacker-2010-youve</bibkey>
     </paper>
     <paper id="21">
       <title>Cross-linguistic Attribute Selection for <fixed-case>REG</fixed-case>: Comparing <fixed-case>D</fixed-case>utch and <fixed-case>E</fixed-case>nglish</title>
-      <author><first>Mariët</first><last>Theune</last></author>
+      <author id="mariet-theune"><first>Mariët</first><last>Theune</last></author>
       <author><first>Ruud</first><last>Koolen</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <url hash="b5f62366">W10-4221</url>
       <bibkey>theune-etal-2010-cross</bibkey>
     </paper>
@@ -6917,8 +6917,8 @@
     <paper id="23">
       <title>Paraphrase Generation as Monolingual Translation: Data and Evaluation</title>
       <author><first>Sander</first><last>Wubben</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <url hash="fb891480">W10-4223</url>
       <bibkey>wubben-etal-2010-paraphrase</bibkey>
     </paper>
@@ -6927,13 +6927,13 @@
       <author><first>Hendrik</first><last>Zender</last></author>
       <author><first>Christopher</first><last>Koppermann</last></author>
       <author><first>Fai</first><last>Greeve</last></author>
-      <author><first>Geert-Jan</first><last>Kruijff</last></author>
+      <author id="geert-jan-m-kruijff"><first>Geert-Jan</first><last>Kruijff</last></author>
       <url hash="8a500ea6">W10-4224</url>
       <bibkey>zender-etal-2010-anchor</bibkey>
     </paper>
     <paper id="25">
       <title>Generation Challenges 2010 Preface</title>
-      <author><first>Anja</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anja</first><last>Belz</last></author>
       <author><first>Albert</first><last>Gatt</last></author>
       <author><first>Alexander</first><last>Koller</last></author>
       <url hash="ef99e7bc">W10-4225</url>
@@ -6941,7 +6941,7 @@
     </paper>
     <paper id="26">
       <title>The <fixed-case>GREC</fixed-case> Challenges 2010: Overview and Evaluation Results</title>
-      <author><first>Anja</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anja</first><last>Belz</last></author>
       <author><first>Eric</first><last>Kow</last></author>
       <url hash="b2a16b0f">W10-4226</url>
       <bibkey>belz-kow-2010-grec</bibkey>
@@ -6954,9 +6954,9 @@
     </paper>
     <paper id="28">
       <title>Poly-co: An Unsupervised Co-reference Detection System</title>
-      <author><first>Éric</first><last>Charton</last></author>
+      <author id="eric-charton"><first>Éric</first><last>Charton</last></author>
       <author><first>Michel</first><last>Gagnon</last></author>
-      <author><first>Benoit</first><last>Ozell</last></author>
+      <author id="benoit-ozell"><first>Benoit</first><last>Ozell</last></author>
       <url hash="d6bb96b7">W10-4228</url>
       <bibkey>charton-etal-2010-poly</bibkey>
     </paper>
@@ -6965,13 +6965,13 @@
       <author><first>Amitava</first><last>Das</last></author>
       <author><first>Tanik</first><last>Saikh</last></author>
       <author><first>Tapabrata</first><last>Mondal</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <url hash="75571c65">W10-4229</url>
       <bibkey>das-etal-2010-ju</bibkey>
     </paper>
     <paper id="30">
       <title>The <fixed-case>UMUS</fixed-case> System for Named Entity Generation at <fixed-case>GREC</fixed-case> 2010</title>
-      <author><first>Benoit</first><last>Favre</last></author>
+      <author id="benoit-favre"><first>Benoit</first><last>Favre</last></author>
       <author><first>Bernd</first><last>Bohnet</last></author>
       <url hash="6ab1a5fd">W10-4230</url>
       <bibkey>favre-bohnet-2010-umus</bibkey>
@@ -6980,7 +6980,7 @@
       <title><fixed-case>UD</fixed-case>el: Refining a Method of Named Entity Generation</title>
       <author><first>Charles</first><last>Greenbacker</last></author>
       <author><first>Nicole</first><last>Sparks</last></author>
-      <author><first>Kathleen</first><last>McCoy</last></author>
+      <author id="kathleen-f-mccoy"><first>Kathleen</first><last>McCoy</last></author>
       <author><first>Che-Yu</first><last>Kuo</last></author>
       <url hash="194dc868">W10-4231</url>
       <bibkey>greenbacker-etal-2010-udel</bibkey>
@@ -6989,7 +6989,7 @@
       <title><fixed-case>UD</fixed-case>el: Named Entity Recognition and Reference Regeneration from Surface Text</title>
       <author><first>Nicole</first><last>Sparks</last></author>
       <author><first>Charles</first><last>Greenbacker</last></author>
-      <author><first>Kathleen</first><last>McCoy</last></author>
+      <author id="kathleen-f-mccoy"><first>Kathleen</first><last>McCoy</last></author>
       <author><first>Che-Yu</first><last>Kuo</last></author>
       <url hash="ea8201ef">W10-4232</url>
       <bibkey>sparks-etal-2010-udel</bibkey>
@@ -6999,11 +6999,11 @@
       <author><first>Alexander</first><last>Koller</last></author>
       <author><first>Kristina</first><last>Striegnitz</last></author>
       <author><first>Andrew</first><last>Gargett</last></author>
-      <author><first>Donna</first><last>Byron</last></author>
+      <author id="donna-byron"><first>Donna</first><last>Byron</last></author>
       <author><first>Justine</first><last>Cassell</last></author>
       <author><first>Robert</first><last>Dale</last></author>
-      <author><first>Johanna</first><last>Moore</last></author>
-      <author><first>Jon</first><last>Oberlander</last></author>
+      <author id="johanna-d-moore"><first>Johanna</first><last>Moore</last></author>
+      <author id="jon-oberlander"><first>Jon</first><last>Oberlander</last></author>
       <url hash="abf9cb17">W10-4233</url>
       <bibkey>koller-etal-2010-report</bibkey>
     </paper>
@@ -7014,14 +7014,14 @@
       <author><first>Paul</first><last>Piwek</last></author>
       <author><first>Mihai</first><last>Lintean</last></author>
       <author><first>Svetlana</first><last>Stoyanchev</last></author>
-      <author><first>Christian</first><last>Moldovan</last></author>
+      <author id="christian-moldovan"><first>Christian</first><last>Moldovan</last></author>
       <url hash="2d2ca3b2">W10-4234</url>
       <bibkey>rus-etal-2010-first</bibkey>
     </paper>
     <paper id="35">
       <title>Generation Under Uncertainty</title>
       <author><first>Oliver</first><last>Lemon</last></author>
-      <author><first>Srini</first><last>Janarthanam</last></author>
+      <author id="srinivasan-janarthanam"><first>Srini</first><last>Janarthanam</last></author>
       <author><first>Verena</first><last>Rieser</last></author>
       <url hash="6e561a05">W10-4235</url>
       <bibkey>lemon-etal-2010-generation</bibkey>
@@ -7035,11 +7035,11 @@
     </paper>
     <paper id="37">
       <title>Finding Common Ground: Towards a Surface Realisation Shared Task</title>
-      <author><first>Anja</first><last>Belz</last></author>
-      <author><first>Mike</first><last>White</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="anja-belz"><first>Anja</first><last>Belz</last></author>
+      <author id="michael-white"><first>Mike</first><last>White</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <author><first>Deirdre</first><last>Hogan</last></author>
-      <author><first>Amanda</first><last>Stent</last></author>
+      <author id="amanda-stent"><first>Amanda</first><last>Stent</last></author>
       <url hash="51a7a4e0">W10-4237</url>
       <bibkey>belz-etal-2010-finding</bibkey>
     </paper>
@@ -7084,7 +7084,7 @@
     </paper>
     <paper id="3">
       <title>Dynamic Adaptation in Dialog Systems</title>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <pages>17</pages>
       <url hash="4d3d378b">W10-4303</url>
       <bibkey>walker-2010-dynamic</bibkey>
@@ -7153,7 +7153,7 @@
     <paper id="10">
       <title>Using entity features to classify implicit discourse relations</title>
       <author><first>Annie</first><last>Louis</last></author>
-      <author><first>Aravind</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
       <author><first>Rashmi</first><last>Prasad</last></author>
       <author><first>Ani</first><last>Nenkova</last></author>
       <pages>59–62</pages>
@@ -7186,18 +7186,18 @@
     </paper>
     <paper id="14">
       <title>Exploring the Effectiveness of Lexical Ontologies for Modeling Temporal Relations with <fixed-case>M</fixed-case>arkov <fixed-case>L</fixed-case>ogic</title>
-      <author><first>Eun Y.</first><last>Ha</last></author>
+      <author id="eun-young-ha"><first>Eun Y.</first><last>Ha</last></author>
       <author><first>Alok</first><last>Baikadi</last></author>
       <author><first>Carlyle</first><last>Licata</last></author>
       <author><first>Bradford</first><last>Mott</last></author>
-      <author><first>James</first><last>Lester</last></author>
+      <author id="james-lester"><first>James</first><last>Lester</last></author>
       <pages>75–78</pages>
       <url hash="92d55e44">W10-4314</url>
       <bibkey>ha-etal-2010-exploring</bibkey>
     </paper>
     <paper id="15">
       <title>Reference reversibility with Reference Domain Theory</title>
-      <author><first>Alexandre</first><last>Denis</last></author>
+      <author id="alexandre-denis"><first>Alexandre</first><last>Denis</last></author>
       <pages>79–82</pages>
       <url hash="a41d70e5">W10-4315</url>
       <bibkey>denis-2010-reference</bibkey>
@@ -7205,8 +7205,8 @@
     <paper id="16">
       <title>Utilizing Review Summarization in a Spoken Recommendation System</title>
       <author><first>Jingjing</first><last>Liu</last></author>
-      <author><first>Stephanie</first><last>Seneff</last></author>
-      <author><first>Victor</first><last>Zue</last></author>
+      <author id="stephanie-seneff"><first>Stephanie</first><last>Seneff</last></author>
+      <author id="victor-zue"><first>Victor</first><last>Zue</last></author>
       <pages>83–86</pages>
       <url hash="f15132d2">W10-4316</url>
       <bibkey>liu-etal-2010-utilizing</bibkey>
@@ -7214,7 +7214,7 @@
     <paper id="17">
       <title>Dialogue Management Based on Entities and Constraints</title>
       <author><first>Yushi</first><last>Xu</last></author>
-      <author><first>Stephanie</first><last>Seneff</last></author>
+      <author id="stephanie-seneff"><first>Stephanie</first><last>Seneff</last></author>
       <pages>87–90</pages>
       <url hash="e859831d">W10-4317</url>
       <bibkey>xu-seneff-2010-dialogue</bibkey>
@@ -7223,8 +7223,8 @@
       <title>Towards Improving the Naturalness of Social Conversations with Dialogue Systems</title>
       <author><first>Matthew</first><last>Marge</last></author>
       <author><first>João</first><last>Miranda</last></author>
-      <author><first>Alan</first><last>Black</last></author>
-      <author><first>Alexander</first><last>Rudnicky</last></author>
+      <author id="alan-w-black"><first>Alan</first><last>Black</last></author>
+      <author id="alexander-rudnicky"><first>Alexander</first><last>Rudnicky</last></author>
       <pages>91–94</pages>
       <url hash="e1bd4c72">W10-4318</url>
       <bibkey>marge-etal-2010-towards</bibkey>
@@ -7251,7 +7251,7 @@
       <title>Learning Dialogue Strategies from Older and Younger Simulated Users</title>
       <author><first>Kallirroi</first><last>Georgila</last></author>
       <author><first>Maria</first><last>Wolters</last></author>
-      <author><first>Johanna</first><last>Moore</last></author>
+      <author id="johanna-d-moore"><first>Johanna</first><last>Moore</last></author>
       <pages>103–106</pages>
       <url hash="90f502aa">W10-4321</url>
       <bibkey>georgila-etal-2010-learning</bibkey>
@@ -7268,19 +7268,19 @@
     <paper id="23">
       <title>Parameter estimation for agenda-based user simulation</title>
       <author><first>Simon</first><last>Keizer</last></author>
-      <author><first>Milica</first><last>Gašić</last></author>
-      <author><first>Filip</first><last>Jurčíček</last></author>
-      <author><first>François</first><last>Mairesse</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gašić</last></author>
+      <author id="filip-jurcicek"><first>Filip</first><last>Jurčíček</last></author>
+      <author id="francois-mairesse"><first>François</first><last>Mairesse</last></author>
       <author><first>Blaise</first><last>Thomson</last></author>
       <author><first>Kai</first><last>Yu</last></author>
-      <author><first>Steve</first><last>Young</last></author>
+      <author id="steve-young"><first>Steve</first><last>Young</last></author>
       <pages>116–123</pages>
       <url hash="d8384ffc">W10-4323</url>
       <bibkey>keizer-etal-2010-parameter</bibkey>
     </paper>
     <paper id="24">
       <title>Adaptive Referring Expression Generation in Spoken Dialogue Systems: Evaluation with Real Users</title>
-      <author><first>Srinivasan</first><last>Janarthanam</last></author>
+      <author id="srinivasan-janarthanam"><first>Srinivasan</first><last>Janarthanam</last></author>
       <author><first>Oliver</first><last>Lemon</last></author>
       <pages>124–131</pages>
       <url hash="f499f762">W10-4324</url>
@@ -7295,9 +7295,9 @@
     </paper>
     <paper id="26">
       <title>The Effects of Discourse Connectives Prediction on Implicit Discourse Relation Recognition</title>
-      <author><first>Zhi Min</first><last>Zhou</last></author>
-      <author><first>Man</first><last>Lan</last></author>
-      <author><first>Zheng Yu</first><last>Niu</last></author>
+      <author id="zhi-min-zhou"><first>Zhi Min</first><last>Zhou</last></author>
+      <author id="man-lan"><first>Man</first><last>Lan</last></author>
+      <author id="zheng-yu-niu"><first>Zheng Yu</first><last>Niu</last></author>
       <author><first>Yu</first><last>Xu</last></author>
       <author><first>Jian</first><last>Su</last></author>
       <pages>139–146</pages>
@@ -7307,7 +7307,7 @@
     <paper id="27">
       <title>Discourse indicators for content selection in summarization</title>
       <author><first>Annie</first><last>Louis</last></author>
-      <author><first>Aravind</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
       <author><first>Ani</first><last>Nenkova</last></author>
       <pages>147–156</pages>
       <url hash="8ecb2b9e">W10-4327</url>
@@ -7316,7 +7316,7 @@
     <paper id="28">
       <title>Comparing Spoken Language Route Instructions for Robots across Environment Representations</title>
       <author><first>Matthew</first><last>Marge</last></author>
-      <author><first>Alexander</first><last>Rudnicky</last></author>
+      <author id="alexander-rudnicky"><first>Alexander</first><last>Rudnicky</last></author>
       <pages>157–164</pages>
       <url hash="173debb1">W10-4328</url>
       <bibkey>marge-rudnicky-2010-comparing</bibkey>
@@ -7360,20 +7360,20 @@
       <title>Don’t tell anyone! Two Experiments on Gossip Conversations</title>
       <author><first>Jenny</first><last>Brusk</last></author>
       <author><first>Ron</first><last>Artstein</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <pages>193–200</pages>
       <url hash="226ea1ca">W10-4333</url>
       <bibkey>brusk-etal-2010-dont</bibkey>
     </paper>
     <paper id="34">
       <title><fixed-case>G</fixed-case>aussian Processes for Fast Policy Optimisation of <fixed-case>POMDP</fixed-case>-based Dialogue Managers</title>
-      <author><first>Milica</first><last>Gašić</last></author>
-      <author><first>Filip</first><last>Jurčíček</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gašić</last></author>
+      <author id="filip-jurcicek"><first>Filip</first><last>Jurčíček</last></author>
       <author><first>Simon</first><last>Keizer</last></author>
-      <author><first>Francois</first><last>Mairesse</last></author>
+      <author id="francois-mairesse"><first>Francois</first><last>Mairesse</last></author>
       <author><first>Blaise</first><last>Thomson</last></author>
       <author><first>Kai</first><last>Yu</last></author>
-      <author><first>Steve</first><last>Young</last></author>
+      <author id="steve-young"><first>Steve</first><last>Young</last></author>
       <pages>201–204</pages>
       <url hash="ceaf6147">W10-4334</url>
       <bibkey>gasic-etal-2010-gaussian</bibkey>
@@ -7389,7 +7389,7 @@
     </paper>
     <paper id="36">
       <title>Representing Uncertainty about Complex User Goals in Statistical Dialogue Systems</title>
-      <author><first>Paul A.</first><last>Crook</last></author>
+      <author id="paul-a-crook"><first>Paul A.</first><last>Crook</last></author>
       <author><first>Oliver</first><last>Lemon</last></author>
       <pages>209–212</pages>
       <url hash="54471d1e">W10-4336</url>
@@ -7400,14 +7400,14 @@
       <author><first>Sebastian</first><last>Varges</last></author>
       <author><first>Silvia</first><last>Quarteroni</last></author>
       <author><first>Giuseppe</first><last>Riccardi</last></author>
-      <author><first>Alexei</first><last>Ivanov</last></author>
+      <author id="alexei-v-ivanov"><first>Alexei</first><last>Ivanov</last></author>
       <pages>213–216</pages>
       <url hash="d4d9f169">W10-4337</url>
       <bibkey>varges-etal-2010-investigating</bibkey>
     </paper>
     <paper id="38">
       <title>Cooperative User Models in Statistical Dialog Simulators</title>
-      <author><first>Meritxell</first><last>González</last></author>
+      <author id="meritxell-gonzalez"><first>Meritxell</first><last>González</last></author>
       <author><first>Silvia</first><last>Quarteroni</last></author>
       <author><first>Giuseppe</first><last>Riccardi</last></author>
       <author><first>Sebastian</first><last>Varges</last></author>
@@ -7421,8 +7421,8 @@
       <author><first>Komei</first><last>Sugiura</last></author>
       <author><first>Kiyonori</first><last>Ohtake</last></author>
       <author><first>Chiori</first><last>Hori</last></author>
-      <author><first>Hideki</first><last>Kashioka</last></author>
-      <author><first>Hisashi</first><last>Kawai</last></author>
+      <author id="hideki-kashioka"><first>Hideki</first><last>Kashioka</last></author>
+      <author id="hisashi-kawai"><first>Hisashi</first><last>Kawai</last></author>
       <author><first>Satoshi</first><last>Nakamura</last></author>
       <pages>221–224</pages>
       <url hash="652b126e">W10-4339</url>
@@ -7473,17 +7473,17 @@
     <paper id="45">
       <title><fixed-case>I</fixed-case>’ve said it before, and <fixed-case>I</fixed-case>’ll say it again: An empirical investigation of the upper bound of the selection approach to dialogue</title>
       <author><first>Sudeep</first><last>Gandhe</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <pages>245–248</pages>
       <url hash="d5a9b324">W10-4345</url>
       <bibkey>gandhe-traum-2010-ive</bibkey>
     </paper>
     <paper id="46">
       <title>Autism and Interactional Aspects of Dialogue</title>
-      <author><first>Peter</first><last>Heeman</last></author>
+      <author id="peter-a-heeman"><first>Peter</first><last>Heeman</last></author>
       <author><first>Rebecca</first><last>Lunsford</last></author>
-      <author><first>Ethan</first><last>Selfridge</last></author>
-      <author><first>Lois</first><last>Black</last></author>
+      <author id="ethan-selfridge"><first>Ethan</first><last>Selfridge</last></author>
+      <author id="lois-m-black"><first>Lois</first><last>Black</last></author>
       <author><first>Jan</first><last>van Santen</last></author>
       <pages>249–252</pages>
       <url hash="9f8e9d1f">W10-4346</url>
@@ -7501,9 +7501,9 @@
     </paper>
     <paper id="48">
       <title>How to Drink from a Fire Hose: One Person Can Annoscribe One Million Utterances in One Month</title>
-      <author><first>David</first><last>Suendermann</last></author>
+      <author id="david-suendermann-oeft"><first>David</first><last>Suendermann</last></author>
       <author><first>Jackson</first><last>Liscombe</last></author>
-      <author><first>Roberto</first><last>Pieraccini</last></author>
+      <author id="roberto-pieraccini"><first>Roberto</first><last>Pieraccini</last></author>
       <pages>257–260</pages>
       <url hash="61f945b7">W10-4348</url>
       <bibkey>suendermann-etal-2010-drink</bibkey>
@@ -7529,7 +7529,7 @@
       <title>Statistical Dialog Management Methodologies for Real Applications</title>
       <author><first>David</first><last>Griol</last></author>
       <author><first>Zoraida</first><last>Callejas</last></author>
-      <author><first>Ramón</first><last>López-Cózar</last></author>
+      <author id="ramon-lopez-cozar"><first>Ramón</first><last>López-Cózar</last></author>
       <pages>269–272</pages>
       <url hash="7cd3763b">W10-4351</url>
       <bibkey>griol-etal-2010-statistical</bibkey>
@@ -7537,7 +7537,7 @@
     <paper id="52">
       <title><fixed-case>Y</fixed-case>ou<fixed-case>B</fixed-case>ot: A Simple Framework for Building Virtual Networking Agents</title>
       <author><first>Seiji</first><last>Takegata</last></author>
-      <author><first>Kumiko</first><last>Tanaka-Ishii</last></author>
+      <author id="kumiko-tanaka-ishii"><first>Kumiko</first><last>Tanaka-Ishii</last></author>
       <pages>273–276</pages>
       <url hash="25735b57">W10-4352</url>
       <bibkey>takegata-tanaka-ishii-2010-youbot</bibkey>
@@ -7547,7 +7547,7 @@
       <author><first>Marc</first><last>Cavazza</last></author>
       <author><first>Raúl</first><last>Santos de la Cámara</last></author>
       <author><first>Markku</first><last>Turunen</last></author>
-      <author><first>José</first><last>Relaño Gil</last></author>
+      <author id="jose-relano-gil"><first>José</first><last>Relaño Gil</last></author>
       <author><first>Jaakko</first><last>Hakulinen</last></author>
       <author><first>Nigel</first><last>Crook</last></author>
       <author><first>Debora</first><last>Field</last></author>
@@ -7557,7 +7557,7 @@
     </paper>
     <paper id="54">
       <title><tex-math>F^2</tex-math> - New Technique for Recognition of User Emotional States in Spoken Dialogue Systems</title>
-      <author><first>Ramón</first><last>López-Cózar</last></author>
+      <author id="ramon-lopez-cozar"><first>Ramón</first><last>López-Cózar</last></author>
       <author><first>Jan</first><last>Silovsky</last></author>
       <author><first>David</first><last>Griol</last></author>
       <pages>281–288</pages>
@@ -7567,19 +7567,19 @@
     <paper id="55">
       <title>Online Error Detection of Barge-In Utterances by Using Individual Users’ Utterance Histories in Spoken Dialogue System</title>
       <author><first>Kazunori</first><last>Komatani</last></author>
-      <author><first>Hiroshi G.</first><last>Okuno</last></author>
+      <author id="hiroshi-g-okuno"><first>Hiroshi G.</first><last>Okuno</last></author>
       <pages>289–296</pages>
       <url hash="e50bf284">W10-4355</url>
       <bibkey>komatani-okuno-2010-online</bibkey>
     </paper>
     <paper id="56">
       <title>Dialogue Act Modeling in a Complex Task-Oriented Domain</title>
-      <author><first>Kristy</first><last>Boyer</last></author>
-      <author><first>Eun Y.</first><last>Ha</last></author>
-      <author><first>Robert</first><last>Phillips</last></author>
+      <author id="kristy-boyer"><first>Kristy</first><last>Boyer</last></author>
+      <author id="eun-young-ha"><first>Eun Y.</first><last>Ha</last></author>
+      <author id="robert-phillips"><first>Robert</first><last>Phillips</last></author>
       <author><first>Michael</first><last>Wallis</last></author>
       <author><first>Mladen</first><last>Vouk</last></author>
-      <author><first>James</first><last>Lester</last></author>
+      <author id="james-lester"><first>James</first><last>Lester</last></author>
       <pages>297–305</pages>
       <url hash="349c3e39">W10-4356</url>
       <bibkey>boyer-etal-2010-dialogue</bibkey>
@@ -7587,7 +7587,7 @@
     <paper id="57">
       <title>Hand Gestures in Disambiguating Types of You Expressions in Multiparty Meetings</title>
       <author><first>Tyler</first><last>Baldwin</last></author>
-      <author><first>Joyce</first><last>Chai</last></author>
+      <author id="joyce-chai"><first>Joyce</first><last>Chai</last></author>
       <author><first>Katrin</first><last>Kirchhoff</last></author>
       <pages>306–313</pages>
       <url hash="292f3eff">W10-4357</url>
@@ -7607,7 +7607,7 @@
     <paper id="59">
       <title>Towards an Empirically Motivated Typology of Follow-Up Questions: The Role of Dialogue Context</title>
       <author><first>Manuel</first><last>Kirschner</last></author>
-      <author><first>Raffaella</first><last>Bernardi</last></author>
+      <author id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last></author>
       <pages>322–331</pages>
       <url hash="864b5498">W10-4359</url>
       <bibkey>kirschner-bernardi-2010-towards</bibkey>
@@ -7625,7 +7625,7 @@
     <meta>
       <booktitle>Proceedings of the 10th International Workshop on Tree Adjoining Grammar and Related Frameworks (<fixed-case>TAG</fixed-case>+10)</booktitle>
       <url hash="83d02e08">W10-44</url>
-      <editor><first>Srinivas</first><last>Bangalore</last></editor>
+      <editor id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></editor>
       <editor><first>Robert</first><last>Frank</last></editor>
       <editor><first>Maribel</first><last>Romero</last></editor>
       <publisher>Linguistic Department, Yale University</publisher>
@@ -7647,9 +7647,9 @@
     </paper>
     <paper id="2">
       <title>Non-local Right Node Raising: an Analysis using Delayed Tree-Local <fixed-case>MC</fixed-case>-<fixed-case>TAG</fixed-case></title>
-      <author><first>Chung-hye</first><last>Han</last></author>
+      <author id="chung-hye-han"><first>Chung-hye</first><last>Han</last></author>
       <author><first>David</first><last>Potter</last></author>
-      <author><first>Dennis Ryan</first><last>Storoshenko</last></author>
+      <author id="dennis-ryan-storoshenko"><first>Dennis Ryan</first><last>Storoshenko</last></author>
       <pages>9–16</pages>
       <url hash="0d3d75d5">W10-4402</url>
       <bibkey>han-etal-2010-non</bibkey>
@@ -7686,7 +7686,7 @@
     <paper id="7">
       <title>Unavoidable Ill-nestedness in Natural Language and the Adequacy of Tree Local-<fixed-case>MCTAG</fixed-case> Induced Dependency Structures</title>
       <author><first>Joan</first><last>Chen-Main</last></author>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
       <pages>53–60</pages>
       <url hash="dc5f7d53">W10-4407</url>
       <bibkey>chen-main-joshi-2010-unavoidable</bibkey>
@@ -7695,7 +7695,7 @@
       <title>Generating <fixed-case>LTAG</fixed-case> grammars from a lexicon/ontology interface</title>
       <author><first>Christina</first><last>Unger</last></author>
       <author><first>Felix</first><last>Hieber</last></author>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <pages>61–68</pages>
       <url hash="8aa9f85c">W10-4408</url>
       <bibkey>unger-etal-2010-generating</bibkey>
@@ -7732,8 +7732,8 @@
     </paper>
     <paper id="13">
       <title>Control Verb, Argument Cluster Coordination and Multi Component <fixed-case>TAG</fixed-case></title>
-      <author><first>Djamé</first><last>Seddah</last></author>
-      <author><first>Benoit</first><last>Sagot</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
+      <author id="benoit-sagot"><first>Benoit</first><last>Sagot</last></author>
       <author><first>Laurence</first><last>Danlos</last></author>
       <pages>101–110</pages>
       <url hash="36f26b30">W10-4413</url>
@@ -7741,7 +7741,7 @@
     </paper>
     <paper id="14">
       <title>Building factorized <fixed-case>TAG</fixed-case>s with meta-grammars</title>
-      <author><first>Éric</first><last>Villemonte de la Clergerie</last></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Éric</first><last>Villemonte de la Clergerie</last></author>
       <pages>111–118</pages>
       <url hash="6da4ace8">W10-4414</url>
       <bibkey>villemonte-de-la-clergerie-2010-building</bibkey>
@@ -7771,8 +7771,8 @@
     </paper>
     <paper id="18">
       <title>Binding Variables in <fixed-case>E</fixed-case>nglish: An Analysis Using Delayed Tree Locality</title>
-      <author><first>Dennis Ryan</first><last>Storoshenko</last></author>
-      <author><first>Chung-hye</first><last>Han</last></author>
+      <author id="dennis-ryan-storoshenko"><first>Dennis Ryan</first><last>Storoshenko</last></author>
+      <author id="chung-hye-han"><first>Chung-hye</first><last>Han</last></author>
       <pages>143–150</pages>
       <url hash="b8d1a48f">W10-4418</url>
       <bibkey>storoshenko-han-2010-binding</bibkey>
@@ -7794,9 +7794,9 @@
     </paper>
     <paper id="21">
       <title>Automated Extraction of <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars from a Treebank for <fixed-case>V</fixed-case>ietnamese</title>
-      <author><first>Phuong</first><last>Le-Hong</last></author>
-      <author><first>Thi Minh Huyen</first><last>Nguyen</last></author>
-      <author><first>Phuong Thai</first><last>Nguyen</last></author>
+      <author id="phuong-le-hong"><first>Phuong</first><last>Le-Hong</last></author>
+      <author id="thi-minh-huyen-nguyen"><first>Thi Minh Huyen</first><last>Nguyen</last></author>
+      <author id="phuong-thai-nguyen"><first>Phuong Thai</first><last>Nguyen</last></author>
       <author><first>Azim</first><last>Roussanaly</last></author>
       <pages>165–174</pages>
       <url hash="d2597b6b">W10-4421</url>
diff --git a/data/xml/W11.xml b/data/xml/W11.xml
index da033dfa1d..dc8175501a 100644
--- a/data/xml/W11.xml
+++ b/data/xml/W11.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the Ninth International Conference on Computational Semantics (<fixed-case>IWCS</fixed-case> 2011)</booktitle>
       <editor><first>Johan</first><last>Bos</last></editor>
-      <editor><first>Stephen</first><last>Pulman</last></editor>
+      <editor id="stephen-pulman"><first>Stephen</first><last>Pulman</last></editor>
       <venue>iwcs</venue>
       <year>2011</year>
     </meta>
@@ -14,20 +14,20 @@
     </frontmatter>
     <paper id="1">
       <title>The Semantics of Dialogue Acts</title>
-      <author><first>Harry</first><last>Bunt</last></author>
+      <author id="harry-bunt"><first>Harry</first><last>Bunt</last></author>
       <url hash="d7a7b217">W11-0101</url>
       <bibkey>bunt-2011-semantics</bibkey>
     </paper>
     <paper id="2">
       <title>A New Semantics: Merging Propositional and Distributional Information</title>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <url hash="a90e22c6">W11-0102</url>
       <bibkey>hovy-2011-new</bibkey>
     </paper>
     <paper id="3">
       <title>Deterministic Statistical Mapping of Sentences to Underspecified Semantics</title>
-      <author><first>Hiyan</first><last>Alshawi</last></author>
-      <author><first>Pi-Chuan</first><last>Chang</last></author>
+      <author id="hiyan-alshawi"><first>Hiyan</first><last>Alshawi</last></author>
+      <author id="pi-chuan-chang"><first>Pi-Chuan</first><last>Chang</last></author>
       <author><first>Michael</first><last>Ringgaard</last></author>
       <url hash="d6902fd8">W11-0103</url>
       <bibkey>alshawi-etal-2011-deterministic</bibkey>
@@ -35,7 +35,7 @@
     <paper id="4">
       <title>Word Sense Disambiguation with Multilingual Features</title>
       <author><first>Carmen</first><last>Banea</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <url hash="69965221">W11-0104</url>
       <bibkey>banea-mihalcea-2011-word</bibkey>
     </paper>
@@ -51,23 +51,23 @@
     <paper id="6">
       <title>A Model for Composing Semantic Relations</title>
       <author><first>Eduardo</first><last>Blanco</last></author>
-      <author><first>Dan</first><last>Moldovan</last></author>
+      <author id="dan-moldovan"><first>Dan</first><last>Moldovan</last></author>
       <url hash="c2e93e3c">W11-0106</url>
       <bibkey>blanco-moldovan-2011-model</bibkey>
     </paper>
     <paper id="7">
       <title>Implementing Weighted Abduction in <fixed-case>M</fixed-case>arkov <fixed-case>L</fixed-case>ogic</title>
       <author><first>James</first><last>Blythe</last></author>
-      <author><first>Jerry</first><last>Hobbs</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry</first><last>Hobbs</last></author>
       <author><first>Pedro</first><last>Domingos</last></author>
-      <author><first>Rohit</first><last>Kate</last></author>
-      <author><first>Raymond</first><last>Mooney</last></author>
+      <author id="rohit-kate"><first>Rohit</first><last>Kate</last></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last></author>
       <url hash="103e5f4c">W11-0107</url>
       <bibkey>blythe-etal-2011-implementing</bibkey>
     </paper>
     <paper id="8">
       <title>Modular Graph Rewriting to Compute Semantics</title>
-      <author><first>Guillaume</first><last>Bonfante</last></author>
+      <author id="guillaume-bonfante"><first>Guillaume</first><last>Bonfante</last></author>
       <author><first>Bruno</first><last>Guillaume</last></author>
       <author><first>Mathieu</first><last>Morey</last></author>
       <author><first>Guy</first><last>Perrier</last></author>
@@ -84,16 +84,16 @@
     </paper>
     <paper id="10">
       <title><fixed-case>V</fixed-case>erb<fixed-case>N</fixed-case>et Class Assignment as a <fixed-case>WSD</fixed-case> Task</title>
-      <author><first>Susan Windisch</first><last>Brown</last></author>
+      <author id="susan-windisch-brown"><first>Susan Windisch</first><last>Brown</last></author>
       <author><first>Dmitriy</first><last>Dligach</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <url hash="4d9d066a">W11-0110</url>
       <bibkey>brown-etal-2011-verbnet</bibkey>
     </paper>
     <paper id="11">
       <title>Acquiring entailment pairs across languages and domains: A Data Analysis</title>
       <author><first>Manaal</first><last>Faruqui</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <url hash="ab209157">W11-0111</url>
       <bibkey>faruqui-pado-2011-acquiring</bibkey>
     </paper>
@@ -101,7 +101,7 @@
       <title>Integrating Logical Representations with Probabilistic Information using <fixed-case>M</fixed-case>arkov <fixed-case>L</fixed-case>ogic</title>
       <author><first>Dan</first><last>Garrette</last></author>
       <author><first>Katrin</first><last>Erk</last></author>
-      <author><first>Raymond</first><last>Mooney</last></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last></author>
       <url hash="2c170d93">W11-0112</url>
       <bibkey>garrette-etal-2011-integrating</bibkey>
     </paper>
@@ -113,8 +113,8 @@
     </paper>
     <paper id="14">
       <title>Concrete Sentence Spaces for Compositional Distributional Models of Meaning</title>
-      <author><first>Edward</first><last>Grefenstette</last></author>
-      <author><first>Mehrnoosh</first><last>Sadrzadeh</last></author>
+      <author id="edward-grefenstette"><first>Edward</first><last>Grefenstette</last></author>
+      <author id="mehrnoosh-sadrzadeh"><first>Mehrnoosh</first><last>Sadrzadeh</last></author>
       <author><first>Stephen</first><last>Clark</last></author>
       <author><first>Bob</first><last>Coecke</last></author>
       <author><first>Stephen</first><last>Pulman</last></author>
@@ -123,52 +123,52 @@
     </paper>
     <paper id="15">
       <title>Computing Semantic Compositionality in Distributional Semantics</title>
-      <author><first>Emiliano Raul</first><last>Guevara</last></author>
+      <author id="emiliano-raul-guevara"><first>Emiliano Raul</first><last>Guevara</last></author>
       <url hash="014232f9">W11-0115</url>
       <bibkey>guevara-2011-computing</bibkey>
     </paper>
     <paper id="16">
       <title>Using Query Patterns to Learn the Duration of Events</title>
       <author><first>Andrey</first><last>Gusev</last></author>
-      <author><first>Nathanael</first><last>Chambers</last></author>
+      <author id="nathanael-chambers"><first>Nathanael</first><last>Chambers</last></author>
       <author><first>Divye Raj</first><last>Khilnani</last></author>
       <author><first>Pranav</first><last>Khaitan</last></author>
       <author><first>Steven</first><last>Bethard</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <url hash="a48237df">W11-0116</url>
       <bibkey>gusev-etal-2011-using</bibkey>
     </paper>
     <paper id="17">
       <title>A Representation Framework for Cross-lingual/Interlingual Lexical Semantic Correspondences</title>
-      <author><first>Yoshihiko</first><last>Hayashi</last></author>
+      <author id="yoshihiko-hayashi"><first>Yoshihiko</first><last>Hayashi</last></author>
       <url hash="0fa7e49b">W11-0117</url>
       <bibkey>hayashi-2011-representation</bibkey>
     </paper>
     <paper id="18">
       <title>Formalising and specifying underquantification</title>
-      <author><first>Aurelie</first><last>Herbelot</last></author>
+      <author id="aurelie-herbelot"><first>Aurelie</first><last>Herbelot</last></author>
       <author><first>Ann</first><last>Copestake</last></author>
       <url hash="36c4afd8">W11-0118</url>
       <bibkey>herbelot-copestake-2011-formalising</bibkey>
     </paper>
     <paper id="19">
       <title>The Exploitation of Spatial Information in Narrative Discourse</title>
-      <author><first>Blake Stephen</first><last>Howald</last></author>
-      <author><first>E. Graham</first><last>Katz</last></author>
+      <author id="blake-howald"><first>Blake Stephen</first><last>Howald</last></author>
+      <author id="graham-katz"><first>E. Graham</first><last>Katz</last></author>
       <url hash="f1a12ff0">W11-0119</url>
       <bibkey>howald-katz-2011-exploitation</bibkey>
     </paper>
     <paper id="20">
       <title>Measuring the semantic relatedness between words and images</title>
-      <author><first>Chee Wee</first><last>Leong</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="chee-wee-leong"><first>Chee Wee</first><last>Leong</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <url hash="e55e63eb">W11-0120</url>
       <bibkey>leong-mihalcea-2011-measuring</bibkey>
     </paper>
     <paper id="21">
       <title>Elaborating a Knowledge Base for Deep Lexical Semantics</title>
       <author><first>Niloofar</first><last>Montazeri</last></author>
-      <author><first>Jerry</first><last>Hobbs</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry</first><last>Hobbs</last></author>
       <url hash="4c79e83b">W11-0121</url>
       <bibkey>montazeri-hobbs-2011-elaborating</bibkey>
     </paper>
@@ -188,7 +188,7 @@
       <author><first>Junta</first><last>Mizuno</last></author>
       <author><first>Shouko</first><last>Masuda</last></author>
       <author><first>Kentaro</first><last>Inui</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <url hash="c47f4356">W11-0123</url>
       <bibkey>ohki-etal-2011-recognizing</bibkey>
     </paper>
@@ -197,16 +197,16 @@
       <author><first>Ekaterina</first><last>Ovchinnikova</last></author>
       <author><first>Niloofar</first><last>Montazeri</last></author>
       <author><first>Theodore</first><last>Alexandrov</last></author>
-      <author><first>Jerry</first><last>Hobbs</last></author>
-      <author><first>Michael C.</first><last>McCord</last></author>
-      <author><first>Rutu</first><last>Mulkar-Mehta</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry</first><last>Hobbs</last></author>
+      <author id="michael-c-mccord"><first>Michael C.</first><last>McCord</last></author>
+      <author id="rutu-mulkar-mehta"><first>Rutu</first><last>Mulkar-Mehta</last></author>
       <url hash="afef16f7">W11-0124</url>
       <bibkey>ovchinnikova-etal-2011-abductive</bibkey>
     </paper>
     <paper id="25">
       <title>Incremental dialogue act understanding</title>
       <author><first>Volha</first><last>Petukhova</last></author>
-      <author><first>Harry</first><last>Bunt</last></author>
+      <author id="harry-bunt"><first>Harry</first><last>Bunt</last></author>
       <url hash="11687245">W11-0125</url>
       <bibkey>petukhova-bunt-2011-incremental</bibkey>
     </paper>
@@ -214,7 +214,7 @@
       <title>Extracting aspects of determiner meaning from dialogue in a virtual world environment</title>
       <author><first>Hilke</first><last>Reckman</last></author>
       <author><first>Jeff</first><last>Orkin</last></author>
-      <author><first>Deb</first><last>Roy</last></author>
+      <author id="deb-roy"><first>Deb</first><last>Roy</last></author>
       <url hash="cab4b988">W11-0126</url>
       <bibkey>reckman-etal-2011-extracting</bibkey>
     </paper>
@@ -227,7 +227,7 @@
     <paper id="28">
       <title>Ontology-based Distinction between Polysemy and Homonymy</title>
       <author><first>Jason</first><last>Utt</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <url hash="ee8afe8a">W11-0128</url>
       <bibkey>utt-pado-2011-ontology</bibkey>
     </paper>
@@ -253,17 +253,17 @@
     </paper>
     <paper id="32">
       <title>Discovering Semantic Classes for <fixed-case>U</fixed-case>rdu N-<fixed-case>V</fixed-case> Complex Predicates</title>
-      <author><first>Tafseer</first><last>Ahmed</last></author>
+      <author id="tafseer-ahmed"><first>Tafseer</first><last>Ahmed</last></author>
       <author><first>Miriam</first><last>Butt</last></author>
       <url hash="408b90c8">W11-0132</url>
       <bibkey>ahmed-butt-2011-discovering</bibkey>
     </paper>
     <paper id="33">
       <title><fixed-case>DISCUSS</fixed-case>: A dialogue move taxonomy layered over semantic representations</title>
-      <author><first>Lee</first><last>Becker</last></author>
-      <author><first>Wayne</first><last>Ward</last></author>
+      <author id="lee-becker"><first>Lee</first><last>Becker</last></author>
+      <author id="wayne-ward"><first>Wayne</first><last>Ward</last></author>
       <author><first>Sarel</first><last>van Vuuren</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <url hash="daafc8c1">W11-0133</url>
       <bibkey>becker-etal-2011-discuss</bibkey>
     </paper>
@@ -277,14 +277,14 @@
     <paper id="35">
       <title>Towards Component-Based Textual Entailment</title>
       <author><first>Elena</first><last>Cabrio</last></author>
-      <author><first>Bernardo</first><last>Magnini</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
       <url hash="fe082a8f">W11-0135</url>
       <bibkey>cabrio-magnini-2011-towards</bibkey>
     </paper>
     <paper id="36">
       <title>Algebraic Approaches to Compositional Distributional Semantics</title>
       <author><first>Daoud</first><last>Clarke</last></author>
-      <author><first>David</first><last>Weir</last></author>
+      <author id="david-weir"><first>David</first><last>Weir</last></author>
       <author><first>Rudi</first><last>Lutz</last></author>
       <url hash="2759a6e5">W11-0136</url>
       <bibkey>clarke-etal-2011-algebraic</bibkey>
@@ -297,8 +297,8 @@
     </paper>
     <paper id="38">
       <title>Towards a More Natural Multilingual Controlled Language Interface to <fixed-case>OWL</fixed-case></title>
-      <author><first>Normunds</first><last>Gruzitis</last></author>
-      <author><first>Guntis</first><last>Barzdins</last></author>
+      <author id="normunds-gruzitis"><first>Normunds</first><last>Gruzitis</last></author>
+      <author id="guntis-barzdins"><first>Guntis</first><last>Barzdins</last></author>
       <url hash="9ff415a1">W11-0138</url>
       <bibkey>gruzitis-barzdins-2011-towards</bibkey>
     </paper>
@@ -312,8 +312,8 @@
     </paper>
     <paper id="40">
       <title>An Ontology Based Architecture for Translation</title>
-      <author><first>Leonardo</first><last>Lesmo</last></author>
-      <author><first>Alessandro</first><last>Mazzei</last></author>
+      <author id="leonardo-lesmo"><first>Leonardo</first><last>Lesmo</last></author>
+      <author id="alessandro-mazzei"><first>Alessandro</first><last>Mazzei</last></author>
       <author><first>Daniele P.</first><last>Radicioni</last></author>
       <url hash="2d89a9b7">W11-0140</url>
       <bibkey>lesmo-etal-2011-ontology</bibkey>
@@ -322,7 +322,7 @@
       <title>Corpus-based approaches to processing the scope of negation cues: an evaluation of the state of the art</title>
       <author><first>Roser</first><last>Morante</last></author>
       <author><first>Sarah</first><last>Schrauwen</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <url hash="7ac5efb6">W11-0141</url>
       <bibkey>morante-etal-2011-corpus</bibkey>
     </paper>
@@ -334,9 +334,9 @@
     </paper>
     <paper id="43">
       <title>Granularity in Natural Language Discourse</title>
-      <author><first>Rutu</first><last>Mulkar-Mehta</last></author>
-      <author><first>Jerry</first><last>Hobbs</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="rutu-mulkar-mehta"><first>Rutu</first><last>Mulkar-Mehta</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry</first><last>Hobbs</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <url hash="67c6f752">W11-0143</url>
       <bibkey>mulkar-mehta-etal-2011-granularity</bibkey>
     </paper>
@@ -351,16 +351,16 @@
     <paper id="45">
       <title>Extracting Contextual Evaluativity</title>
       <author><first>Kevin</first><last>Reschke</last></author>
-      <author><first>Pranav</first><last>Anand</last></author>
+      <author id="pranav-anand"><first>Pranav</first><last>Anand</last></author>
       <url hash="20d1159a">W11-0145</url>
       <bibkey>reschke-anand-2011-extracting</bibkey>
     </paper>
     <paper id="46">
       <title>Using <fixed-case>MMIL</fixed-case> for the High Level Semantic Annotation of the <fixed-case>F</fixed-case>rench <fixed-case>MEDIA</fixed-case> Dialogue Corpus</title>
-      <author><first>Lina Maria</first><last>Rojas-Barahona</last></author>
+      <author id="lina-m-rojas-barahona"><first>Lina Maria</first><last>Rojas-Barahona</last></author>
       <author><first>Thierry</first><last>Bazillon</last></author>
-      <author><first>Matthieu</first><last>Quignard</last></author>
-      <author><first>Fabrice</first><last>Lefevre</last></author>
+      <author id="matthieu-quignard"><first>Matthieu</first><last>Quignard</last></author>
+      <author id="fabrice-lefevre"><first>Fabrice</first><last>Lefevre</last></author>
       <url hash="76842c4f">W11-0146</url>
       <bibkey>rojas-barahona-etal-2011-using</bibkey>
     </paper>
@@ -368,7 +368,7 @@
       <title>Collecting Semantic Data from <fixed-case>M</fixed-case>echanical <fixed-case>T</fixed-case>urk for a Lexical Knowledge Resource in a Text to Picture Generating System</title>
       <author><first>Masoud</first><last>Rouhizadeh</last></author>
       <author><first>Margit</first><last>Bowler</last></author>
-      <author><first>Richard</first><last>Sproat</last></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last></author>
       <author><first>Bob</first><last>Coyne</last></author>
       <url hash="70d1c49d">W11-0147</url>
       <bibkey>rouhizadeh-etal-2011-collecting</bibkey>
@@ -399,12 +399,12 @@
     <meta>
       <booktitle>Proceedings of <fixed-case>B</fixed-case>io<fixed-case>NLP</fixed-case> 2011 Workshop</booktitle>
       <url hash="14df7c45">W11-02</url>
-      <editor><first>Kevin Bretonnel</first><last>Cohen</last></editor>
+      <editor id="k-bretonnel-cohen"><first>Kevin Bretonnel</first><last>Cohen</last></editor>
       <editor><first>Dina</first><last>Demner-Fushman</last></editor>
       <editor><first>Sophia</first><last>Ananiadou</last></editor>
-      <editor><first>John</first><last>Pestian</last></editor>
-      <editor><first>Jun’ichi</first><last>Tsujii</last></editor>
-      <editor><first>Bonnie</first><last>Webber</last></editor>
+      <editor id="john-pestian"><first>John</first><last>Pestian</last></editor>
+      <editor id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></editor>
+      <editor id="bonnie-webber"><first>Bonnie</first><last>Webber</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Portland, Oregon, USA</address>
       <month>June</month>
@@ -429,14 +429,14 @@
     <paper id="2">
       <title>Unsupervised Entailment Detection between Dependency Graph Fragments</title>
       <author><first>Marek</first><last>Rei</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <pages>10–18</pages>
       <url hash="b0d007e5">W11-0202</url>
       <bibkey>rei-briscoe-2011-unsupervised</bibkey>
     </paper>
     <paper id="3">
       <title>Learning Phenotype Mapping for Integrating Large Genetic Data</title>
-      <author><first>Chun-Nan</first><last>Hsu</last></author>
+      <author id="chun-nan-hsu"><first>Chun-Nan</first><last>Hsu</last></author>
       <author><first>Cheng-Ju</first><last>Kuo</last></author>
       <author><first>Congxing</first><last>Cai</last></author>
       <author><first>Sarah</first><last>Pendergrass</last></author>
@@ -459,10 +459,10 @@
     <paper id="5">
       <title>Fast and simple semantic class assignment for biomedical text</title>
       <author><first>K. Bretonnel</first><last>Cohen</last></author>
-      <author><first>Thomas</first><last>Christiansen</last></author>
-      <author><first>William</first><last>Baumgartner Jr.</last></author>
-      <author><first>Karin</first><last>Verspoor</last></author>
-      <author><first>Lawrence</first><last>Hunter</last></author>
+      <author id="thomas-ulrich-christiansen"><first>Thomas</first><last>Christiansen</last></author>
+      <author id="william-a-baumgartner-jr"><first>William</first><last>Baumgartner Jr.</last></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last></author>
+      <author id="lawrence-hunter"><first>Lawrence</first><last>Hunter</last></author>
       <pages>38–45</pages>
       <url hash="df4f1b49">W11-0205</url>
       <bibkey>cohen-etal-2011-fast</bibkey>
@@ -471,8 +471,8 @@
       <title>The Role of Information Extraction in the Design of a Document Triage Application for Biocuration</title>
       <author><first>Sandeep</first><last>Pokkunuri</last></author>
       <author><first>Cartic</first><last>Ramakrishnan</last></author>
-      <author><first>Ellen</first><last>Riloff</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <author><first>Gully</first><last>Burns</last></author>
       <pages>46–55</pages>
       <url hash="c3215589">W11-0206</url>
@@ -481,7 +481,7 @@
     <paper id="7">
       <title>Medical Entity Recognition: A Comparaison of Semantic and Statistical Methods</title>
       <author><first>Asma</first><last>Ben Abacha</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <pages>56–64</pages>
       <url hash="de405627">W11-0207</url>
       <bibkey>ben-abacha-zweigenbaum-2011-medical</bibkey>
@@ -490,7 +490,7 @@
       <title>Automatic Acquisition of Huge Training Data for Bio-Medical Named Entity Recognition</title>
       <author><first>Yu</first><last>Usami</last></author>
       <author><first>Han-Cheol</first><last>Cho</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>65–73</pages>
       <url hash="0d3fe621">W11-0208</url>
@@ -507,7 +507,7 @@
     </paper>
     <paper id="10">
       <title>Building a Coreference-Annotated Corpus from the Domain of Biochemistry</title>
-      <author><first>Riza Theresa</first><last>Batista-Navarro</last></author>
+      <author id="riza-theresa-batista-navarro"><first>Riza Theresa</first><last>Batista-Navarro</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
       <pages>83–91</pages>
       <url hash="9d404c43">W11-0210</url>
@@ -515,7 +515,7 @@
     </paper>
     <paper id="11">
       <title>Towards Morphologically Annotated Corpus of Hospital Discharge Reports in <fixed-case>P</fixed-case>olish</title>
-      <author><first>Malgorzata</first><last>Marciniak</last></author>
+      <author id="malgorzata-marciniak"><first>Malgorzata</first><last>Marciniak</last></author>
       <author><first>Agnieszka</first><last>Mykowiecka</last></author>
       <pages>92–100</pages>
       <url hash="8636c099">W11-0211</url>
@@ -531,7 +531,7 @@
     </paper>
     <paper id="13">
       <title>Automatic extraction of data deposition statements: where do the research results go?</title>
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <author><first>W. John</first><last>Wilbur</last></author>
       <author><first>Zhiyong</first><last>Lu</last></author>
       <pages>103–104</pages>
@@ -560,7 +560,7 @@
     <paper id="16">
       <title>A Study on Dependency Tree Kernels for Automatic Extraction of Protein-Protein Interaction</title>
       <author><first>Faisal Md.</first><last>Chowdhury</last></author>
-      <author><first>Alberto</first><last>Lavelli</last></author>
+      <author id="alberto-lavelli"><first>Alberto</first><last>Lavelli</last></author>
       <author><first>Alessandro</first><last>Moschitti</last></author>
       <pages>124–133</pages>
       <url hash="1ba81419">W11-0216</url>
@@ -587,11 +587,11 @@
     <paper id="19">
       <title>Building Timelines from Narrative Clinical Records: Initial Results Based-on Deep Natural Language Understanding</title>
       <author><first>Hyuckchul</first><last>Jung</last></author>
-      <author><first>James</first><last>Allen</last></author>
-      <author><first>Nate</first><last>Blaylock</last></author>
-      <author><first>William</first><last>de Beaumont</last></author>
+      <author id="james-allen"><first>James</first><last>Allen</last></author>
+      <author id="nate-blaylock"><first>Nate</first><last>Blaylock</last></author>
+      <author id="william-de-beaumont"><first>William</first><last>de Beaumont</last></author>
       <author><first>Lucian</first><last>Galescu</last></author>
-      <author><first>Mary</first><last>Swift</last></author>
+      <author id="mary-swift"><first>Mary</first><last>Swift</last></author>
       <pages>146–154</pages>
       <url hash="76fa63d8">W11-0219</url>
       <bibkey>jung-etal-2011-building</bibkey>
@@ -599,7 +599,7 @@
     <paper id="20">
       <title>Text Mining Techniques for Leveraging Positively Labeled Data</title>
       <author><first>Lana</first><last>Yeganova</last></author>
-      <author><first>Donald C.</first><last>Comeau</last></author>
+      <author id="donald-c-comeau"><first>Donald C.</first><last>Comeau</last></author>
       <author><first>Won</first><last>Kim</last></author>
       <author><first>W. John</first><last>Wilbur</last></author>
       <pages>155–163</pages>
@@ -609,7 +609,7 @@
     <paper id="21">
       <title>Parsing Natural Language Queries for Life Science Knowledge</title>
       <author><first>Tadayoshi</first><last>Hara</last></author>
-      <author><first>Yuka</first><last>Tateisi</last></author>
+      <author id="yuka-tateisi"><first>Yuka</first><last>Tateisi</last></author>
       <author><first>Jin-Dong</first><last>Kim</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
       <pages>164–173</pages>
@@ -619,7 +619,7 @@
     <paper id="22">
       <title>Unlocking Medical Ontologies for Non-Ontology Experts</title>
       <author><first>Shao Fen</first><last>Liang</last></author>
-      <author><first>Donia</first><last>Scott</last></author>
+      <author id="donia-scott"><first>Donia</first><last>Scott</last></author>
       <author><first>Robert</first><last>Stevens</last></author>
       <author><first>Alan</first><last>Rector</last></author>
       <pages>174–181</pages>
@@ -628,8 +628,8 @@
     </paper>
     <paper id="23">
       <title>Self-training and co-training in biomedical word sense disambiguation</title>
-      <author><first>Antonio</first><last>Jimeno-Yepes</last></author>
-      <author><first>Alan</first><last>Aronson</last></author>
+      <author id="antonio-jimeno-yepes"><first>Antonio</first><last>Jimeno-Yepes</last></author>
+      <author id="alan-r-aronson"><first>Alan</first><last>Aronson</last></author>
       <pages>182–183</pages>
       <url hash="cc50427e">W11-0223</url>
       <bibkey>jimeno-yepes-aronson-2011-self</bibkey>
@@ -637,7 +637,7 @@
     <paper id="24">
       <title>Medstract - The Next Generation</title>
       <author><first>Marc</first><last>Verhagen</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <pages>184–185</pages>
       <url hash="a8b649eb">W11-0224</url>
       <bibkey>verhagen-pustejovsky-2011-medstract</bibkey>
@@ -657,8 +657,8 @@
     <meta>
       <booktitle>Proceedings of the Fifteenth Conference on Computational Natural Language Learning</booktitle>
       <url hash="281f18f9">W11-03</url>
-      <editor><first>Sharon</first><last>Goldwater</last></editor>
-      <editor><first>Christopher</first><last>Manning</last></editor>
+      <editor id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></editor>
+      <editor id="christopher-d-manning"><first>Christopher</first><last>Manning</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Portland, Oregon, USA</address>
       <month>June</month>
@@ -671,7 +671,7 @@
     </frontmatter>
     <paper id="1">
       <title>Modeling Syntactic Context Improves Morphological Segmentation</title>
-      <author><first>Yoong Keok</first><last>Lee</last></author>
+      <author id="yoong-keok-lee"><first>Yoong Keok</first><last>Lee</last></author>
       <author><first>Aria</first><last>Haghighi</last></author>
       <author><first>Regina</first><last>Barzilay</last></author>
       <pages>1–9</pages>
@@ -687,9 +687,9 @@
     </paper>
     <paper id="3">
       <title><fixed-case>P</fixed-case>unctuation: Making a Point in Unsupervised Dependency Parsing</title>
-      <author><first>Valentin I.</first><last>Spitkovsky</last></author>
-      <author><first>Hiyan</first><last>Alshawi</last></author>
-      <author><first>Daniel</first><last>Jurafsky</last></author>
+      <author id="valentin-i-spitkovsky"><first>Valentin I.</first><last>Spitkovsky</last></author>
+      <author id="hiyan-alshawi"><first>Hiyan</first><last>Alshawi</last></author>
+      <author id="dan-jurafsky"><first>Daniel</first><last>Jurafsky</last></author>
       <pages>19–28</pages>
       <url hash="330e90fa">W11-0303</url>
       <bibkey>spitkovsky-etal-2011-punctuation</bibkey>
@@ -728,7 +728,7 @@
       <title>Using Sequence Kernels to identify Opinion Entities in <fixed-case>U</fixed-case>rdu</title>
       <author><first>Smruthi</first><last>Mukund</last></author>
       <author><first>Debanjan</first><last>Ghosh</last></author>
-      <author><first>Rohini</first><last>Srihari</last></author>
+      <author id="rohini-k-srihari"><first>Rohini</first><last>Srihari</last></author>
       <pages>58–67</pages>
       <url hash="96a4fd41">W11-0308</url>
       <bibkey>mukund-etal-2011-using</bibkey>
@@ -752,9 +752,9 @@
     <paper id="11">
       <title>Improving the Impact of Subjectivity Word Sense Disambiguation on Contextual Opinion Analysis</title>
       <author><first>Cem</first><last>Akkaya</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <author><first>Alexander</first><last>Conrad</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>87–96</pages>
       <url hash="a59938db">W11-0311</url>
       <bibkey>akkaya-etal-2011-improving</bibkey>
@@ -770,7 +770,7 @@
     <paper id="13">
       <title>Assessing Benefit from Feature Feedback in Active Learning for Text Classification</title>
       <author><first>Shilpa</first><last>Arora</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <pages>106–114</pages>
       <url hash="6018315d">W11-0313</url>
       <bibkey>arora-nyberg-2011-assessing</bibkey>
@@ -779,7 +779,7 @@
       <title><fixed-case>ULISSE</fixed-case>: an Unsupervised Algorithm for Detecting Reliable Dependency Parses</title>
       <author><first>Felice</first><last>Dell’Orletta</last></author>
       <author><first>Giulia</first><last>Venturi</last></author>
-      <author><first>Simonetta</first><last>Montemagni</last></author>
+      <author id="simonetta-montemagni"><first>Simonetta</first><last>Montemagni</last></author>
       <pages>115–124</pages>
       <url hash="b38d10fa">W11-0314</url>
       <bibkey>dellorletta-etal-2011-ulisse</bibkey>
@@ -806,10 +806,10 @@
     </paper>
     <paper id="17">
       <title>Using Second-order Vectors in a Knowledge-based Method for Acronym Disambiguation</title>
-      <author><first>Bridget T.</first><last>McInnes</last></author>
+      <author id="bridget-mcinnes"><first>Bridget T.</first><last>McInnes</last></author>
       <author><first>Ted</first><last>Pedersen</last></author>
       <author><first>Ying</first><last>Liu</last></author>
-      <author><first>Serguei V.</first><last>Pakhomov</last></author>
+      <author id="serguei-pakhomov"><first>Serguei V.</first><last>Pakhomov</last></author>
       <author><first>Genevieve B.</first><last>Melton</last></author>
       <pages>145–153</pages>
       <url hash="b7e28ef6">W11-0317</url>
@@ -820,7 +820,7 @@
       <author><first>Kohei</first><last>Ozaki</last></author>
       <author><first>Masashi</first><last>Shimbo</last></author>
       <author><first>Mamoru</first><last>Komachi</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>154–162</pages>
       <url hash="50a40fb3">W11-0318</url>
       <bibkey>ozaki-etal-2011-using</bibkey>
@@ -860,7 +860,7 @@
     <paper id="23">
       <title>Filling the Gap: Semi-Supervised Learning for Opinion Detection Across Domains</title>
       <author><first>Ning</first><last>Yu</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <pages>200–209</pages>
       <url hash="8a12c9fa">W11-0323</url>
       <bibkey>yu-kubler-2011-filling</bibkey>
@@ -883,8 +883,8 @@
       <title>Composing Simple Image Descriptions using Web-scale N-grams</title>
       <author><first>Siming</first><last>Li</last></author>
       <author><first>Girish</first><last>Kulkarni</last></author>
-      <author><first>Tamara L</first><last>Berg</last></author>
-      <author><first>Alexander C</first><last>Berg</last></author>
+      <author id="tamara-berg"><first>Tamara L</first><last>Berg</last></author>
+      <author id="alexander-berg"><first>Alexander C</first><last>Berg</last></author>
       <author><first>Yejin</first><last>Choi</last></author>
       <pages>220–228</pages>
       <url hash="3dc74bde">W11-0326</url>
@@ -902,17 +902,17 @@
       <title>Learning with Lookahead: Can History-Based Models Rival Globally Optimized Models?</title>
       <author><first>Yoshimasa</first><last>Tsuruoka</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
-      <author><first>Jun’ichi</first><last>Kazama</last></author>
+      <author id="junichi-kazama"><first>Jun’ichi</first><last>Kazama</last></author>
       <pages>238–246</pages>
       <url hash="04caddaf">W11-0328</url>
       <bibkey>tsuruoka-etal-2011-learning</bibkey>
     </paper>
     <paper id="29">
       <title>Learning Discriminative Projections for Text Similarity Measures</title>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <author><first>Kristina</first><last>Toutanova</last></author>
-      <author><first>John C.</first><last>Platt</last></author>
-      <author><first>Christopher</first><last>Meek</last></author>
+      <author id="john-c-platt"><first>John C.</first><last>Platt</last></author>
+      <author id="christopher-meek"><first>Christopher</first><last>Meek</last></author>
       <pages>247–256</pages>
       <url hash="303c9ea5">W11-0329</url>
       <attachment type="presentation" hash="06256a7f">W11-0329.Presentation.pptx</attachment>
@@ -923,9 +923,9 @@
     <meta>
       <booktitle>Proceedings of the 5th Linguistic Annotation Workshop</booktitle>
       <url hash="d54590d0">W11-04</url>
-      <editor><first>Nancy</first><last>Ide</last></editor>
-      <editor><first>Adam</first><last>Meyers</last></editor>
-      <editor><first>Sameer</first><last>Pradhan</last></editor>
+      <editor id="nancy-ide"><first>Nancy</first><last>Ide</last></editor>
+      <editor id="adam-meyers"><first>Adam</first><last>Meyers</last></editor>
+      <editor id="sameer-pradhan"><first>Sameer</first><last>Pradhan</last></editor>
       <editor><first>Katrin</first><last>Tomanek</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Portland, Oregon, USA</address>
@@ -939,9 +939,9 @@
     </frontmatter>
     <paper id="1">
       <title>On the Development of the <fixed-case>RST</fixed-case> <fixed-case>S</fixed-case>panish Treebank</title>
-      <author><first>Iria</first><last>da Cunha</last></author>
-      <author><first>Juan-Manuel</first><last>Torres-Moreno</last></author>
-      <author><first>Gerardo</first><last>Sierra</last></author>
+      <author id="iria-da-cunha"><first>Iria</first><last>da Cunha</last></author>
+      <author id="juan-manuel-torres-moreno"><first>Juan-Manuel</first><last>Torres-Moreno</last></author>
+      <author id="gerardo-sierra"><first>Gerardo</first><last>Sierra</last></author>
       <pages>1–10</pages>
       <url hash="8aeaa14d">W11-0401</url>
       <bibkey>da-cunha-etal-2011-development</bibkey>
@@ -949,7 +949,7 @@
     <paper id="2">
       <title><fixed-case>OWL</fixed-case>/<fixed-case>DL</fixed-case> formalization of the <fixed-case>MULTEXT</fixed-case>-East morphosyntactic specifications</title>
       <author><first>Christian</first><last>Chiarcos</last></author>
-      <author><first>Tomaž</first><last>Erjavec</last></author>
+      <author id="tomaz-erjavec"><first>Tomaž</first><last>Erjavec</last></author>
       <pages>11–20</pages>
       <url hash="820b35ca">W11-0402</url>
       <bibkey>chiarcos-erjavec-2011-owl</bibkey>
@@ -957,8 +957,8 @@
     <paper id="3">
       <title>Analysis of the <fixed-case>H</fixed-case>indi <fixed-case>P</fixed-case>roposition <fixed-case>B</fixed-case>ank using Dependency Structure</title>
       <author><first>Ashwini</first><last>Vaidya</last></author>
-      <author><first>Jinho</first><last>Choi</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="jinho-d-choi"><first>Jinho</first><last>Choi</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Bhuvana</first><last>Narasimhan</last></author>
       <pages>21–29</pages>
       <url hash="1f1bdada">W11-0403</url>
@@ -967,7 +967,7 @@
     <paper id="4">
       <title>How Good is the Crowd at “real” <fixed-case>WSD</fixed-case>?</title>
       <author><first>Jisup</first><last>Hong</last></author>
-      <author><first>Collin F.</first><last>Baker</last></author>
+      <author id="collin-f-baker"><first>Collin F.</first><last>Baker</last></author>
       <pages>30–37</pages>
       <url hash="f515113e">W11-0404</url>
       <bibkey>hong-baker-2011-good</bibkey>
@@ -991,11 +991,11 @@
     </paper>
     <paper id="7">
       <title>A Collaborative Annotation between Human Annotators and a Statistical Parser</title>
-      <author><first>Shun’ya</first><last>Iwasawa</last></author>
+      <author id="shunya-iwasawa"><first>Shun’ya</first><last>Iwasawa</last></author>
       <author><first>Hiroki</first><last>Hanaoka</last></author>
       <author><first>Takuya</first><last>Matsuzaki</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>56–64</pages>
       <url hash="dd054feb">W11-0407</url>
       <bibkey>iwasawa-etal-2011-collaborative</bibkey>
@@ -1003,7 +1003,7 @@
     <paper id="8">
       <title>Reducing the Need for Double Annotation</title>
       <author><first>Dmitriy</first><last>Dligach</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>65–73</pages>
       <url hash="d7b83062">W11-0408</url>
       <bibkey>dligach-palmer-2011-reducing</bibkey>
@@ -1017,9 +1017,9 @@
     </paper>
     <paper id="10">
       <title>A scaleable automated quality assurance technique for semantic representations and proposition banks</title>
-      <author><first>K. Bretonnel</first><last>Cohen</last></author>
-      <author><first>Lawrence</first><last>Hunter</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="k-bretonnel-cohen"><first>K. Bretonnel</first><last>Cohen</last></author>
+      <author id="lawrence-hunter"><first>Lawrence</first><last>Hunter</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>82–91</pages>
       <url hash="2bbe656f">W11-0410</url>
       <bibkey>cohen-etal-2011-scaleable</bibkey>
@@ -1027,8 +1027,8 @@
     <paper id="11">
       <title>Proposal for an Extension of Traditional Named Entities: From Guidelines to Evaluation, an Overview</title>
       <author><first>Cyril</first><last>Grouin</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <author><first>Karën</first><last>Fort</last></author>
       <author><first>Olivier</first><last>Galibert</last></author>
       <author><first>Ludovic</first><last>Quintard</last></author>
@@ -1039,7 +1039,7 @@
     <paper id="12">
       <title>Assessing the practical usability of an automatically annotated corpus</title>
       <author><first>Md. Faisal Mahbub</first><last>Chowdhury</last></author>
-      <author><first>Alberto</first><last>Lavelli</last></author>
+      <author id="alberto-lavelli"><first>Alberto</first><last>Lavelli</last></author>
       <pages>101–109</pages>
       <url hash="9c061804">W11-0412</url>
       <bibkey>chowdhury-lavelli-2011-assessing</bibkey>
@@ -1047,7 +1047,7 @@
     <paper id="13">
       <title>Subjectivity and Sentiment Annotation of <fixed-case>M</fixed-case>odern <fixed-case>S</fixed-case>tandard <fixed-case>A</fixed-case>rabic Newswire</title>
       <author><first>Muhammad</first><last>Abdul-Mageed</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>110–118</pages>
       <url hash="52628222">W11-0413</url>
       <bibkey>abdul-mageed-diab-2011-subjectivity</bibkey>
@@ -1055,7 +1055,7 @@
     <paper id="14">
       <title>Creating an Annotated <fixed-case>T</fixed-case>amil Corpus as a Discourse Resource</title>
       <author><first>Ravi Teja</first><last>Rachakonda</last></author>
-      <author><first>Dipti Misra</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></author>
       <pages>119–123</pages>
       <url hash="54ff17b8">W11-0414</url>
       <bibkey>rachakonda-sharma-2011-creating</bibkey>
@@ -1065,7 +1065,7 @@
       <author><first>Silke</first><last>Scheible</last></author>
       <author><first>Richard J.</first><last>Whitt</last></author>
       <author><first>Martin</first><last>Durrell</last></author>
-      <author><first>Paul</first><last>Bennett</last></author>
+      <author id="paul-bennett"><first>Paul</first><last>Bennett</last></author>
       <pages>124–128</pages>
       <url hash="1139337c">W11-0415</url>
       <bibkey>scheible-etal-2011-gold</bibkey>
@@ -1081,7 +1081,7 @@
       <title>Empty Categories in <fixed-case>H</fixed-case>indi Dependency Treebank: Analysis and Recovery</title>
       <author><first>Chaitanya</first><last>GSK</last></author>
       <author><first>Samar</first><last>Husain</last></author>
-      <author><first>Prashanth</first><last>Mannem</last></author>
+      <author id="prashanth-mannem"><first>Prashanth</first><last>Mannem</last></author>
       <pages>134–142</pages>
       <url hash="a292ac52">W11-0417</url>
       <bibkey>gsk-etal-2011-empty</bibkey>
@@ -1089,17 +1089,17 @@
     <paper id="18">
       <title>Annotating Events, Temporal Expressions and Relations in <fixed-case>I</fixed-case>talian: the It-Timeml Experience for the Ita-<fixed-case>T</fixed-case>ime<fixed-case>B</fixed-case>ank</title>
       <author><first>Tommaso</first><last>Caselli</last></author>
-      <author><first>Valentina</first><last>Bartalesi Lenzi</last></author>
-      <author><first>Rachele</first><last>Sprugnoli</last></author>
-      <author><first>Emanuele</first><last>Pianta</last></author>
-      <author><first>Irina</first><last>Prodanof</last></author>
+      <author id="valentina-bartalesi-lenzi"><first>Valentina</first><last>Bartalesi Lenzi</last></author>
+      <author id="rachele-sprugnoli"><first>Rachele</first><last>Sprugnoli</last></author>
+      <author id="emanuele-pianta"><first>Emanuele</first><last>Pianta</last></author>
+      <author id="irina-prodanof"><first>Irina</first><last>Prodanof</last></author>
       <pages>143–151</pages>
       <url hash="0e90d027">W11-0418</url>
       <bibkey>caselli-etal-2011-annotating</bibkey>
     </paper>
     <paper id="19">
       <title>Increasing Informativeness in Temporal Annotation</title>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <author><first>Amber</first><last>Stubbs</last></author>
       <pages>152–160</pages>
       <url hash="c956365d">W11-0419</url>
@@ -1119,7 +1119,7 @@
       <booktitle>Proceedings of the Workshop on Automatic Summarization for Different Genres, Media, and Languages</booktitle>
       <url hash="00b82daf">W11-05</url>
       <editor><first>Ani</first><last>Nenkova</last></editor>
-      <editor><first>Julia</first><last>Hirschberg</last></editor>
+      <editor id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></editor>
       <editor id="yang-liu-icsi"><first>Yang</first><last>Liu</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Portland, Oregon</address>
@@ -1133,8 +1133,8 @@
     </frontmatter>
     <paper id="1">
       <title>Plans Toward Automated Chat Summarization</title>
-      <author><first>David C.</first><last>Uthus</last></author>
-      <author><first>David W.</first><last>Aha</last></author>
+      <author id="david-c-uthus"><first>David C.</first><last>Uthus</last></author>
+      <author id="david-w-aha"><first>David W.</first><last>Aha</last></author>
       <pages>1–7</pages>
       <url hash="41e52dbc">W11-0501</url>
       <bibkey>uthus-aha-2011-plans</bibkey>
@@ -1143,8 +1143,8 @@
       <title>Towards Multi-Document Summarization of Scientific Articles:Making Interesting Comparisons with <fixed-case>S</fixed-case>ci<fixed-case>S</fixed-case>umm</title>
       <author><first>Nitin</first><last>Agarwal</last></author>
       <author><first>Ravi Shankar</first><last>Reddy</last></author>
-      <author><first>Kiran</first><last>Gvr</last></author>
-      <author><first>Carolyn Penstein</first><last>Rosé</last></author>
+      <author id="kiran-gvr"><first>Kiran</first><last>Gvr</last></author>
+      <author id="carolyn-rose"><first>Carolyn Penstein</first><last>Rosé</last></author>
       <pages>8–15</pages>
       <url hash="72e32613">W11-0502</url>
       <bibkey>agarwal-etal-2011-towards</bibkey>
@@ -1152,7 +1152,7 @@
     <paper id="3">
       <title>Summarizing Decisions in Spoken Meetings</title>
       <author><first>Lu</first><last>Wang</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>16–24</pages>
       <url hash="96cddfe3">W11-0503</url>
       <bibkey>wang-cardie-2011-summarizing</bibkey>
@@ -1160,14 +1160,14 @@
     <paper id="4">
       <title>Who wrote What Where: Analyzing the content of human and automatic summaries</title>
       <author><first>Karolina</first><last>Owczarzak</last></author>
-      <author><first>Hoa</first><last>Dang</last></author>
+      <author id="hoa-trang-dang"><first>Hoa</first><last>Dang</last></author>
       <pages>25–32</pages>
       <url hash="d0ac488b">W11-0504</url>
       <bibkey>owczarzak-dang-2011-wrote</bibkey>
     </paper>
     <paper id="5">
       <title><fixed-case>W</fixed-case>iki<fixed-case>T</fixed-case>opics: What is Popular on <fixed-case>W</fixed-case>ikipedia and Why</title>
-      <author><first>Byung Gyu</first><last>Ahn</last></author>
+      <author id="byung-gyu-ahn"><first>Byung Gyu</first><last>Ahn</last></author>
       <author><first>Benjamin</first><last>Van Durme</last></author>
       <author><first>Chris</first><last>Callison-Burch</last></author>
       <pages>33–40</pages>
@@ -1178,8 +1178,8 @@
       <title>Abstractive Summarization of Line Graphs from Popular Media</title>
       <author><first>Charles</first><last>Greenbacker</last></author>
       <author><first>Peng</first><last>Wu</last></author>
-      <author><first>Sandra</first><last>Carberry</last></author>
-      <author><first>Kathleen</first><last>McCoy</last></author>
+      <author id="sandra-carberry"><first>Sandra</first><last>Carberry</last></author>
+      <author id="kathleen-f-mccoy"><first>Kathleen</first><last>McCoy</last></author>
       <author><first>Stephanie</first><last>Elzer</last></author>
       <pages>41–48</pages>
       <url hash="9338d37b">W11-0506</url>
@@ -1214,7 +1214,7 @@
       <title>Testing the Robustness of Online Word Segmentation: Effects of Linguistic Diversity and Phonetic Variation</title>
       <author><first>Luc</first><last>Boruta</last></author>
       <author><first>Sharon</first><last>Peperkamp</last></author>
-      <author><first>Benoît</first><last>Crabbé</last></author>
+      <author id="benoit-crabbe"><first>Benoît</first><last>Crabbé</last></author>
       <author><first>Emmanuel</first><last>Dupoux</last></author>
       <pages>1–9</pages>
       <url hash="6f780829">W11-0601</url>
@@ -1223,37 +1223,37 @@
     <paper id="2">
       <title>A <fixed-case>B</fixed-case>ayesian Belief Updating Model of Phonetic Recalibration and Selective Adaptation</title>
       <author><first>Dave</first><last>Kleinschmidt</last></author>
-      <author><first>T. Florian</first><last>Jaeger</last></author>
+      <author id="t-florian-jaeger"><first>T. Florian</first><last>Jaeger</last></author>
       <pages>10–19</pages>
       <url hash="9a2ed5fc">W11-0602</url>
       <bibkey>kleinschmidt-jaeger-2011-bayesian</bibkey>
     </paper>
     <paper id="3">
       <title>Unsupervised Syntactic Chunking with Acoustic Cues: Computational Models for Prosodic Bootstrapping</title>
-      <author><first>John</first><last>Pate</last></author>
-      <author><first>Sharon</first><last>Goldwater</last></author>
+      <author id="john-k-pate"><first>John</first><last>Pate</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
       <pages>20–29</pages>
       <url hash="ec1cf908">W11-0603</url>
       <bibkey>pate-goldwater-2011-unsupervised</bibkey>
     </paper>
     <paper id="4">
       <title>A Statistical Test for Grammar</title>
-      <author><first>Charles</first><last>Yang</last></author>
+      <author id="charles-yang"><first>Charles</first><last>Yang</last></author>
       <pages>30–38</pages>
       <url hash="62446a11">W11-0604</url>
       <bibkey>yang-2011-statistical</bibkey>
     </paper>
     <paper id="5">
       <title>Top-Down Recognizers for <fixed-case>MCFG</fixed-case>s and <fixed-case>MG</fixed-case>s</title>
-      <author><first>Edward</first><last>Stabler</last></author>
+      <author id="edward-stabler"><first>Edward</first><last>Stabler</last></author>
       <pages>39–48</pages>
       <url hash="0206f313">W11-0605</url>
       <bibkey>stabler-2011-top</bibkey>
     </paper>
     <paper id="6">
       <title>Exploring the Relationship Between Learnability and Linguistic Universals</title>
-      <author><first>Anna N.</first><last>Rafferty</last></author>
-      <author><first>Thomas L.</first><last>Griffiths</last></author>
+      <author id="anna-n-rafferty"><first>Anna N.</first><last>Rafferty</last></author>
+      <author id="thomas-l-griffiths"><first>Thomas L.</first><last>Griffiths</last></author>
       <author><first>Marc</first><last>Ettlinger</last></author>
       <pages>49–57</pages>
       <url hash="4b52e324">W11-0606</url>
@@ -1285,9 +1285,9 @@
     </paper>
     <paper id="10">
       <title>Classification of Atypical Language in Autism</title>
-      <author><first>Emily T.</first><last>Prud’hommeaux</last></author>
+      <author id="emily-prudhommeaux"><first>Emily T.</first><last>Prud’hommeaux</last></author>
       <author><first>Brian</first><last>Roark</last></author>
-      <author><first>Lois M.</first><last>Black</last></author>
+      <author id="lois-m-black"><first>Lois M.</first><last>Black</last></author>
       <author><first>Jan</first><last>van Santen</last></author>
       <pages>88–96</pages>
       <url hash="fd01ae89">W11-0610</url>
@@ -1295,7 +1295,7 @@
     </paper>
     <paper id="11">
       <title>Colourful Language: Measuring Word-Colour Associations</title>
-      <author><first>Saif</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
       <pages>97–106</pages>
       <url hash="64947114">W11-0611</url>
       <bibkey>mohammad-2011-colourful</bibkey>
@@ -1313,7 +1313,7 @@
     <meta>
       <booktitle>Proceedings of the Workshop on Language in Social Media (<fixed-case>LSM</fixed-case> 2011)</booktitle>
       <url hash="6cb156b8">W11-07</url>
-      <editor><first>Meenakshi</first><last>Nagarajan</last></editor>
+      <editor id="meenakshi-nagarajan"><first>Meenakshi</first><last>Nagarajan</last></editor>
       <editor><first>Michael</first><last>Gamon</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Portland, Oregon</address>
@@ -1335,9 +1335,9 @@
     <paper id="2">
       <title>How can you say such things?!?: Recognizing Disagreement in Informal Political Argument</title>
       <author><first>Rob</first><last>Abbott</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
-      <author><first>Pranav</first><last>Anand</last></author>
-      <author><first>Jean E.</first><last>Fox Tree</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
+      <author id="pranav-anand"><first>Pranav</first><last>Anand</last></author>
+      <author id="jean-e-fox-tree"><first>Jean E.</first><last>Fox Tree</last></author>
       <author><first>Robeson</first><last>Bowmani</last></author>
       <author><first>Joseph</first><last>King</last></author>
       <pages>2–11</pages>
@@ -1347,7 +1347,7 @@
     <paper id="3">
       <title>What pushes their buttons? Predicting comment polarity from the content of political blog posts</title>
       <author><first>Ramnath</first><last>Balasubramanyan</last></author>
-      <author><first>William W.</first><last>Cohen</last></author>
+      <author id="william-cohen"><first>William W.</first><last>Cohen</last></author>
       <author><first>Doug</first><last>Pierce</last></author>
       <author><first>David P.</first><last>Redlawsk</last></author>
       <pages>12–19</pages>
@@ -1359,7 +1359,7 @@
       <author><first>Stephan</first><last>Gouws</last></author>
       <author><first>Donald</first><last>Metzler</last></author>
       <author><first>Congxing</first><last>Cai</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>20–29</pages>
       <url hash="91c4fa55">W11-0704</url>
       <bibkey>gouws-etal-2011-contextual</bibkey>
@@ -1369,8 +1369,8 @@
       <author><first>Apoorv</first><last>Agarwal</last></author>
       <author><first>Boyi</first><last>Xie</last></author>
       <author><first>Ilia</first><last>Vovsha</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
-      <author><first>Rebecca</first><last>Passonneau</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca</first><last>Passonneau</last></author>
       <pages>30–38</pages>
       <url hash="4272be60">W11-0705</url>
       <bibkey>agarwal-etal-2011-sentiment</bibkey>
@@ -1379,21 +1379,21 @@
       <title>Detecting Forum Authority Claims in Online Discussions</title>
       <author><first>Alex</first><last>Marin</last></author>
       <author><first>Bin</first><last>Zhang</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
       <pages>39–47</pages>
       <url hash="5debe6fe">W11-0706</url>
       <bibkey>marin-etal-2011-detecting</bibkey>
     </paper>
     <paper id="7">
       <title>Annotating Social Acts: Authority Claims and Alignment Moves in <fixed-case>W</fixed-case>ikipedia Talk Pages</title>
-      <author><first>Emily M.</first><last>Bender</last></author>
+      <author id="emily-m-bender"><first>Emily M.</first><last>Bender</last></author>
       <author><first>Jonathan T.</first><last>Morgan</last></author>
       <author><first>Meghan</first><last>Oxley</last></author>
       <author><first>Mark</first><last>Zachry</last></author>
       <author><first>Brian</first><last>Hutchinson</last></author>
       <author><first>Alex</first><last>Marin</last></author>
       <author><first>Bin</first><last>Zhang</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
       <pages>48–57</pages>
       <url hash="5e65d5a6">W11-0707</url>
       <bibkey>bender-etal-2011-annotating</bibkey>
@@ -1422,7 +1422,7 @@
     <paper id="10">
       <title>Language use as a reflection of socialization in online communities</title>
       <author><first>Dong</first><last>Nguyen</last></author>
-      <author><first>Carolyn</first><last>P. Rosé</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>P. Rosé</last></author>
       <pages>76–85</pages>
       <url hash="11426f76">W11-0710</url>
       <bibkey>nguyen-p-rose-2011-language</bibkey>
@@ -1456,15 +1456,15 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>MWE</fixed-case>s and Topic Modelling: Enhancing Machine Learning with Linguistics</title>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>1</pages>
       <url hash="7c7f015d">W11-0801</url>
       <bibkey>baldwin-2011-mwes</bibkey>
     </paper>
     <paper id="2">
       <title>Automatic Extraction of <fixed-case>NV</fixed-case> Expressions in <fixed-case>B</fixed-case>asque: Basic Issues on Cooccurrence Techniques</title>
-      <author><first>Antton</first><last>Gurrutxaga</last></author>
-      <author><first>Iñaki</first><last>Alegria</last></author>
+      <author id="antton-gurrutxaga"><first>Antton</first><last>Gurrutxaga</last></author>
+      <author id="inaki-alegria"><first>Iñaki</first><last>Alegria</last></author>
       <pages>2–7</pages>
       <url hash="1860d8b8">W11-0802</url>
       <bibkey>gurrutxaga-alegria-2011-automatic</bibkey>
@@ -1473,7 +1473,7 @@
       <title>Semantic Clustering: an Attempt to Identify Multiword Expressions in <fixed-case>B</fixed-case>engali</title>
       <author><first>Tanmoy</first><last>Chakraborty</last></author>
       <author><first>Dipankar</first><last>Das</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>8–13</pages>
       <url hash="8a3e2777">W11-0803</url>
       <bibkey>chakraborty-etal-2011-semantic</bibkey>
@@ -1482,14 +1482,14 @@
       <title>Decreasing Lexical Data Sparsity in Statistical Syntactic Parsing - Experiments with Named Entities</title>
       <author><first>Deirdre</first><last>Hogan</last></author>
       <author><first>Jennifer</first><last>Foster</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>14–19</pages>
       <url hash="74c5d97f">W11-0804</url>
       <bibkey>hogan-etal-2011-decreasing</bibkey>
     </paper>
     <paper id="5">
       <title>Detecting Multi-Word Expressions Improves Word Sense Disambiguation</title>
-      <author><first>Mark</first><last>Finlayson</last></author>
+      <author id="mark-finlayson"><first>Mark</first><last>Finlayson</last></author>
       <author><first>Nidhi</first><last>Kulkarni</last></author>
       <pages>20–24</pages>
       <url hash="156fb4d6">W11-0805</url>
@@ -1498,7 +1498,7 @@
     <paper id="6">
       <title>Tree-Rewriting Models of Multi-Word Expressions</title>
       <author><first>William</first><last>Schuler</last></author>
-      <author><first>Aravind</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
       <pages>25–30</pages>
       <url hash="e03bc9dd">W11-0806</url>
       <bibkey>schuler-joshi-2011-tree</bibkey>
@@ -1520,7 +1520,7 @@
     </paper>
     <paper id="9">
       <title><fixed-case>MWU</fixed-case>-Aware Part-of-Speech Tagging with a <fixed-case>CRF</fixed-case> Model and Lexical Resources</title>
-      <author><first>Matthieu</first><last>Constant</last></author>
+      <author id="matthieu-constant"><first>Matthieu</first><last>Constant</last></author>
       <author><first>Anthony</first><last>Sigogne</last></author>
       <pages>49–56</pages>
       <url hash="685e95f3">W11-0809</url>
@@ -1544,9 +1544,9 @@
     </paper>
     <paper id="12">
       <title>Identifying and Analyzing <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese Complex Predicates</title>
-      <author><first>Magali</first><last>Sanches Duran</last></author>
+      <author id="magali-sanches-duran"><first>Magali</first><last>Sanches Duran</last></author>
       <author><first>Carlos</first><last>Ramisch</last></author>
-      <author><first>Sandra Maria</first><last>Aluísio</last></author>
+      <author id="sandra-aluisio"><first>Sandra Maria</first><last>Aluísio</last></author>
       <author><first>Aline</first><last>Villavicencio</last></author>
       <pages>74–82</pages>
       <url hash="e1a3f7a3">W11-0812</url>
@@ -1589,7 +1589,7 @@
     <paper id="17">
       <title>Detecting Noun Compounds and Light Verb Constructions: a Contrastive Study</title>
       <author><first>Veronika</first><last>Vincze</last></author>
-      <author><first>István</first><last>Nagy T.</last></author>
+      <author id="istvan-nagy-t"><first>István</first><last>Nagy T.</last></author>
       <author><first>Gábor</first><last>Berend</last></author>
       <pages>116–121</pages>
       <url hash="365ca7fc">W11-0817</url>
@@ -1598,7 +1598,7 @@
     <paper id="18">
       <title>j<fixed-case>MWE</fixed-case>: A <fixed-case>J</fixed-case>ava Toolkit for Detecting Multi-Word Expressions</title>
       <author><first>Nidhi</first><last>Kulkarni</last></author>
-      <author><first>Mark</first><last>Finlayson</last></author>
+      <author id="mark-finlayson"><first>Mark</first><last>Finlayson</last></author>
       <pages>122–124</pages>
       <url hash="3258491c">W11-0818</url>
       <bibkey>kulkarni-finlayson-2011-jmwe</bibkey>
@@ -1606,7 +1606,7 @@
     <paper id="19">
       <title><fixed-case>F</fixed-case>ips<fixed-case>C</fixed-case>o<fixed-case>V</fixed-case>iew: On-line Visualisation of Collocations Extracted from Multilingual Parallel Corpora</title>
       <author><first>Violeta</first><last>Seretan</last></author>
-      <author><first>Eric</first><last>Wehrli</last></author>
+      <author id="eric-wehrli"><first>Eric</first><last>Wehrli</last></author>
       <pages>125–127</pages>
       <url hash="b82aeaf5">W11-0819</url>
       <bibkey>seretan-wehrli-2011-fipscoview</bibkey>
@@ -1623,7 +1623,7 @@
       <title>The Ngram Statistics Package (Text::<fixed-case>NSP</fixed-case>) : A Flexible Tool for Identifying Ngrams, Collocations, and Word Associations</title>
       <author><first>Ted</first><last>Pedersen</last></author>
       <author><first>Satanjeev</first><last>Banerjee</last></author>
-      <author><first>Bridget</first><last>McInnes</last></author>
+      <author id="bridget-mcinnes"><first>Bridget</first><last>McInnes</last></author>
       <author><first>Saiyam</first><last>Kohli</last></author>
       <author><first>Mahesh</first><last>Joshi</last></author>
       <author><first>Ying</first><last>Liu</last></author>
@@ -1633,7 +1633,7 @@
     </paper>
     <paper id="22">
       <title>Fast and Flexible <fixed-case>MWE</fixed-case> Candidate Generation with the mwetoolkit</title>
-      <author><first>Vitor</first><last>De Araujo</last></author>
+      <author id="vitor-de-araujo"><first>Vitor</first><last>De Araujo</last></author>
       <author><first>Carlos</first><last>Ramisch</last></author>
       <author><first>Aline</first><last>Villavicencio</last></author>
       <pages>134–136</pages>
@@ -1642,7 +1642,7 @@
     </paper>
     <paper id="23">
       <title>How Many Multiword Expressions do People Know?</title>
-      <author><first>Kenneth</first><last>Church</last></author>
+      <author id="kenneth-church"><first>Kenneth</first><last>Church</last></author>
       <pages>137–144</pages>
       <url hash="7cd5b032">W11-0823</url>
       <bibkey>church-2011-many</bibkey>
@@ -1654,10 +1654,10 @@
       <url hash="a90830cf">W11-09</url>
       <editor><first>Su Nam</first><last>Kim</last></editor>
       <editor><first>Zornitsa</first><last>Kozareva</last></editor>
-      <editor><first>Preslav</first><last>Nakov</last></editor>
+      <editor id="preslav-nakov"><first>Preslav</first><last>Nakov</last></editor>
       <editor><first>Diarmuid</first><last>Ó Séaghdha</last></editor>
-      <editor><first>Sebastian</first><last>Padó</last></editor>
-      <editor><first>Stan</first><last>Szpakowicz</last></editor>
+      <editor id="sebastian-pado"><first>Sebastian</first><last>Padó</last></editor>
+      <editor id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Portland, Oregon, USA</address>
       <month>June</month>
@@ -1670,7 +1670,7 @@
     </frontmatter>
     <paper id="1">
       <title>Going Beyond Shallow Semantics</title>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>1</pages>
       <url hash="ef7e8d2f">W11-0901</url>
       <bibkey>palmer-2011-going</bibkey>
@@ -1681,7 +1681,7 @@
       <author><first>David</first><last>McClosky</last></author>
       <author><first>Mason</first><last>Smith</last></author>
       <author><first>Andrey</first><last>Gusev</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <pages>2–10</pages>
       <url hash="bd31f3e6">W11-0902</url>
       <bibkey>surdeanu-etal-2011-customizing</bibkey>
@@ -1689,7 +1689,7 @@
     <paper id="3">
       <title>Extraction of Semantic Word Relations in <fixed-case>T</fixed-case>urkish from Dictionary Definitions</title>
       <author><first>Şerbetçi</first><last>Ayşe</last></author>
-      <author><first>Orhan</first><last>Zeynep</last></author>
+      <author id="zeynep-orhan"><first>Orhan</first><last>Zeynep</last></author>
       <author><first>Pehlivan</first><last>İlknur</last></author>
       <pages>11–18</pages>
       <url hash="4e02f026">W11-0903</url>
@@ -1697,10 +1697,10 @@
     </paper>
     <paper id="4">
       <title>Identifying Event-Sentiment Association using Lexical Equivalence and Co-reference Approaches</title>
-      <author><first>Anup</first><last>Kolya</last></author>
+      <author id="anup-kumar-kolya"><first>Anup</first><last>Kolya</last></author>
       <author><first>Dipankar</first><last>Das</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>19–27</pages>
       <url hash="f583780e">W11-0904</url>
       <bibkey>kolya-etal-2011-identifying</bibkey>
@@ -1709,15 +1709,15 @@
       <title><fixed-case>V</fixed-case>ig<fixed-case>N</fixed-case>et: Grounding Language in Graphics using Frame Semantics</title>
       <author><first>Bob</first><last>Coyne</last></author>
       <author><first>Daniel</first><last>Bauer</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>28–36</pages>
       <url hash="c866ee82">W11-0905</url>
       <bibkey>coyne-etal-2011-vignet</bibkey>
     </paper>
     <paper id="6">
       <title>Transition-based Semantic Role Labeling Using Predicate Argument Clustering</title>
-      <author><first>Jinho D.</first><last>Choi</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>37–45</pages>
       <url hash="e9731732">W11-0906</url>
       <bibkey>choi-palmer-2011-transition</bibkey>
@@ -1732,15 +1732,15 @@
     <paper id="8">
       <title>Desperately Seeking Implicit Arguments in Text</title>
       <author><first>Sara</first><last>Tonelli</last></author>
-      <author><first>Rodolfo</first><last>Delmonte</last></author>
+      <author id="rodolfo-delmonte"><first>Rodolfo</first><last>Delmonte</last></author>
       <pages>54–62</pages>
       <url hash="ec9021ec">W11-0908</url>
       <bibkey>tonelli-delmonte-2011-desperately</bibkey>
     </paper>
     <paper id="9">
       <title>A Joint Model of Implicit Arguments for Nominal Predicates</title>
-      <author><first>Matthew</first><last>Gerber</last></author>
-      <author><first>Joyce</first><last>Chai</last></author>
+      <author id="matthew-gerber"><first>Matthew</first><last>Gerber</last></author>
+      <author id="joyce-chai"><first>Joyce</first><last>Chai</last></author>
       <author><first>Robert</first><last>Bart</last></author>
       <pages>63–71</pages>
       <url hash="95da2991">W11-0909</url>
@@ -1748,11 +1748,11 @@
     </paper>
     <paper id="10">
       <title>Incorporating Coercive Constructions into a Verb Lexicon</title>
-      <author><first>Claire</first><last>Bonial</last></author>
-      <author><first>Susan Windisch</first><last>Brown</last></author>
+      <author id="claire-bonial"><first>Claire</first><last>Bonial</last></author>
+      <author id="susan-windisch-brown"><first>Susan Windisch</first><last>Brown</last></author>
       <author><first>Jena D.</first><last>Hwang</last></author>
       <author><first>Christopher</first><last>Parisien</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Suzanne</first><last>Stevenson</last></author>
       <pages>72–80</pages>
       <url hash="2192d92b">W11-0910</url>
@@ -1780,7 +1780,7 @@
     <paper id="1">
       <title>Automatic Projection of Semantic Structures: an Application to Pairwise Translation Ranking</title>
       <author><first>Daniele</first><last>Pighin</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <pages>1–9</pages>
       <url hash="26602590">W11-1001</url>
       <bibkey>pighin-marquez-2011-automatic</bibkey>
@@ -1796,7 +1796,7 @@
     <paper id="3">
       <title>Semantic Mapping Using Automatic Word Alignment and Semantic Role Labeling</title>
       <author><first>Shumin</first><last>Wu</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>21–30</pages>
       <url hash="c4c959dc">W11-1003</url>
       <bibkey>wu-palmer-2011-semantic</bibkey>
@@ -1821,7 +1821,7 @@
     <paper id="6">
       <title>An Evaluation and Possible Improvement Path for Current <fixed-case>SMT</fixed-case> Behavior on Ambiguous Nouns</title>
       <author><first>Els</first><last>Lefever</last></author>
-      <author><first>Véronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Véronique</first><last>Hoste</last></author>
       <pages>52–60</pages>
       <url hash="95e1d781">W11-1006</url>
       <bibkey>lefever-hoste-2011-evaluation</bibkey>
@@ -1847,14 +1847,14 @@
       <title>A Dependency Based Statistical Translation Model</title>
       <author><first>Giuseppe</first><last>Attardi</last></author>
       <author><first>Atanas</first><last>Chanev</last></author>
-      <author><first>Antonio Valerio</first><last>Miceli Barone</last></author>
+      <author id="antonio-valerio-miceli-barone"><first>Antonio Valerio</first><last>Miceli Barone</last></author>
       <pages>79–87</pages>
       <url hash="aa500143">W11-1009</url>
       <bibkey>attardi-etal-2011-dependency</bibkey>
     </paper>
     <paper id="10">
       <title>Improving <fixed-case>MT</fixed-case> Word Alignment Using Aligned Multi-Stage Parses</title>
-      <author><first>Adam</first><last>Meyers</last></author>
+      <author id="adam-meyers"><first>Adam</first><last>Meyers</last></author>
       <author><first>Michiko</first><last>Kosaka</last></author>
       <author><first>Shasha</first><last>Liao</last></author>
       <author><first>Nianwen</first><last>Xue</last></author>
@@ -1865,7 +1865,7 @@
     <paper id="11">
       <title>Automatic Category Label Coarsening for Syntax-Based Machine Translation</title>
       <author><first>Greg</first><last>Hanneman</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <pages>98–106</pages>
       <url hash="e9ab0a34">W11-1011</url>
       <bibkey>hanneman-lavie-2011-automatic</bibkey>
@@ -1873,26 +1873,26 @@
     <paper id="12">
       <title>Utilizing Target-Side Semantic Role Labels to Assist Hierarchical Phrase-based Machine Translation</title>
       <author><first>Qin</first><last>Gao</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>107–115</pages>
       <url hash="dc298de2">W11-1012</url>
       <bibkey>gao-vogel-2011-utilizing</bibkey>
     </paper>
     <paper id="13">
       <title>Combining statistical and semantic approaches to the translation of ontologies and taxonomies</title>
-      <author><first>John</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John</first><last>McCrae</last></author>
       <author><first>Mauricio</first><last>Espinoza</last></author>
       <author><first>Elena</first><last>Montiel-Ponsoda</last></author>
-      <author><first>Guadalupe</first><last>Aguado-de-Cea</last></author>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="guadalupe-aguado-de-cea"><first>Guadalupe</first><last>Aguado-de-Cea</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <pages>116–125</pages>
       <url hash="0bc157e1">W11-1013</url>
       <bibkey>mccrae-etal-2011-combining</bibkey>
     </paper>
     <paper id="14">
       <title>A Semantic Feature for Statistical Machine Translation</title>
-      <author><first>Rafael E.</first><last>Banchs</last></author>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="rafael-e-banchs"><first>Rafael E.</first><last>Banchs</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
       <pages>126–134</pages>
       <url hash="8d1eaf56">W11-1014</url>
       <bibkey>banchs-costa-jussa-2011-semantic</bibkey>
@@ -1901,7 +1901,7 @@
       <title>A General-Purpose Rule Extractor for <fixed-case>SCFG</fixed-case>-Based Machine Translation</title>
       <author><first>Greg</first><last>Hanneman</last></author>
       <author><first>Michelle</first><last>Burroughs</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <pages>135–144</pages>
       <url hash="b381eb22">W11-1015</url>
       <bibkey>hanneman-etal-2011-general</bibkey>
@@ -1911,10 +1911,10 @@
     <meta>
       <booktitle>Proceedings of <fixed-case>T</fixed-case>ext<fixed-case>G</fixed-case>raphs-6: Graph-based Methods for Natural Language Processing</booktitle>
       <url hash="342ec1a7">W11-11</url>
-      <editor><first>Irina</first><last>Matveeva</last></editor>
+      <editor id="irina-matveeva"><first>Irina</first><last>Matveeva</last></editor>
       <editor><first>Alessandro</first><last>Moschitti</last></editor>
-      <editor><first>Lluís</first><last>Màrquez</last></editor>
-      <editor><first>Fabio</first><last>Massimo Zanzotto</last></editor>
+      <editor id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></editor>
+      <editor id="fabio-massimo-zanzotto"><first>Fabio</first><last>Massimo Zanzotto</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Portland, Oregon</address>
       <month>June</month>
@@ -1944,7 +1944,7 @@
     </paper>
     <paper id="3">
       <title>Invariants and Variability of Synonymy Networks: Self Mediated Agreement by Confluence</title>
-      <author><first>Benoît</first><last>Gaillard</last></author>
+      <author id="benoit-gaillard"><first>Benoît</first><last>Gaillard</last></author>
       <author><first>Bruno</first><last>Gaume</last></author>
       <author><first>Emmanuel</first><last>Navarro</last></author>
       <pages>15–23</pages>
@@ -1961,7 +1961,7 @@
     <paper id="5">
       <title>Using a <fixed-case>W</fixed-case>ikipedia-based Semantic Relatedness Measure for Document Clustering</title>
       <author><first>Majid</first><last>Yazdani</last></author>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <pages>29–36</pages>
       <url hash="a6fcd217">W11-1105</url>
       <bibkey>yazdani-popescu-belis-2011-using</bibkey>
@@ -1970,15 +1970,15 @@
       <title><fixed-case>G</fixed-case>rawl<fixed-case>TCQ</fixed-case>: Terminology and Corpora Building by Ranking Simultaneously Terms, Queries and Documents using Graph Random Walks</title>
       <author><first>Xavier</first><last>Tannier</last></author>
       <author><first>Javier</first><last>Couto</last></author>
-      <author><first>Clément</first><last>de Groc</last></author>
+      <author id="clement-de-groc"><first>Clément</first><last>de Groc</last></author>
       <pages>37–41</pages>
       <url hash="b63291f4">W11-1106</url>
       <bibkey>tannier-etal-2011-grawltcq</bibkey>
     </paper>
     <paper id="7">
       <title>Simultaneous Similarity Learning and Feature-Weight Learning for Document Clustering</title>
-      <author><first>Pradeep</first><last>Muthukrishnan</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="pradeep-muthukrishnan"><first>Pradeep</first><last>Muthukrishnan</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <author><first>Qiaozhu</first><last>Mei</last></author>
       <pages>42–50</pages>
       <url hash="6a3317f4">W11-1107</url>
@@ -1987,14 +1987,14 @@
     <paper id="8">
       <title>Unrestricted Quantifier Scope Disambiguation</title>
       <author><first>Mehdi</first><last>Manshadi</last></author>
-      <author><first>James</first><last>Allen</last></author>
+      <author id="james-allen"><first>James</first><last>Allen</last></author>
       <pages>51–59</pages>
       <url hash="9501f73f">W11-1108</url>
       <bibkey>manshadi-allen-2011-unrestricted</bibkey>
     </paper>
     <paper id="9">
       <title>From ranked words to dependency trees: two-stage unsupervised non-projective dependency parsing</title>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>60–68</pages>
       <url hash="c5a2126c">W11-1109</url>
       <bibkey>sogaard-2011-ranked</bibkey>
@@ -2004,7 +2004,7 @@
     <meta>
       <booktitle>Proceedings of the 4th Workshop on Building and Using Comparable Corpora: Comparable Corpora and the Web</booktitle>
       <url hash="8703e7f0">W11-12</url>
-      <editor><first>Pierre</first><last>Zweigenbaum</last></editor>
+      <editor id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></editor>
       <editor><first>Reinhard</first><last>Rapp</last></editor>
       <editor><first>Serge</first><last>Sharoff</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -2027,7 +2027,7 @@
     <paper id="2">
       <title>The Copiale Cipher</title>
       <author><first>Kevin</first><last>Knight</last></author>
-      <author><first>Beáta</first><last>Megyesi</last></author>
+      <author id="beata-megyesi"><first>Beáta</first><last>Megyesi</last></author>
       <author><first>Christiane</first><last>Schaefer</last></author>
       <pages>2–9</pages>
       <url hash="21d4f575">W11-1202</url>
@@ -2037,7 +2037,7 @@
       <title>Learning the Optimal Use of Dependency-parsing Information for Finding Translations with Comparable Corpora</title>
       <author><first>Daniel</first><last>Andrade</last></author>
       <author><first>Takuya</first><last>Matsuzaki</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>10–18</pages>
       <url hash="eba63584">W11-1203</url>
       <bibkey>andrade-etal-2011-learning</bibkey>
@@ -2046,7 +2046,7 @@
       <title>Building and Using Comparable Corpora for Domain-Specific Bilingual Lexicon Extraction</title>
       <author><first>Darja</first><last>Fišer</last></author>
       <author><first>Nikola</first><last>Ljubešić</last></author>
-      <author><first>Špela</first><last>Vintar</last></author>
+      <author id="spela-vintar"><first>Špela</first><last>Vintar</last></author>
       <author><first>Senja</first><last>Pollak</last></author>
       <pages>19–26</pages>
       <url hash="8f8a88d8">W11-1204</url>
@@ -2064,7 +2064,7 @@
       <title>Bilingual Lexicon Extraction from Comparable Corpora as Metasearch</title>
       <author><first>Amir</first><last>Hazem</last></author>
       <author><first>Emmanuel</first><last>Morin</last></author>
-      <author><first>Sebastian</first><last>Peña Saldarriaga</last></author>
+      <author id="sebastian-pena-saldarriaga"><first>Sebastian</first><last>Peña Saldarriaga</last></author>
       <pages>35–43</pages>
       <url hash="6917eaa3">W11-1206</url>
       <bibkey>hazem-etal-2011-bilingual</bibkey>
@@ -2072,7 +2072,7 @@
     <paper id="7">
       <title>Two Ways to Use a Noisy Parallel News Corpus for Improving Statistical Machine Translation</title>
       <author><first>Souhir</first><last>Gahbiche-Braham</last></author>
-      <author><first>Hélène</first><last>Bonneau-Maynard</last></author>
+      <author id="helene-bonneau-maynard"><first>Hélène</first><last>Bonneau-Maynard</last></author>
       <author><first>François</first><last>Yvon</last></author>
       <pages>44–51</pages>
       <url hash="b04c743d">W11-1207</url>
@@ -2089,7 +2089,7 @@
     <paper id="9">
       <title>Extracting Parallel Phrases from Comparable Data</title>
       <author><first>Sanjika</first><last>Hewavitharana</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>61–68</pages>
       <url hash="4c6c5524">W11-1209</url>
       <bibkey>hewavitharana-vogel-2011-extracting</bibkey>
@@ -2098,8 +2098,8 @@
       <title>Active Learning with Multiple Annotations for Comparable Data Classification Task</title>
       <author><first>Vamshi</first><last>Ambati</last></author>
       <author><first>Sanjika</first><last>Hewavitharana</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
       <pages>69–77</pages>
       <url hash="c320e9f8">W11-1210</url>
       <bibkey>ambati-etal-2011-active</bibkey>
@@ -2109,7 +2109,7 @@
       <author><first>Bruno</first><last>Cartoni</last></author>
       <author><first>Sandrine</first><last>Zufferey</last></author>
       <author><first>Thomas</first><last>Meyer</last></author>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <pages>78–86</pages>
       <url hash="023be966">W11-1211</url>
       <bibkey>cartoni-etal-2011-comparable</bibkey>
@@ -2117,14 +2117,14 @@
     <paper id="12">
       <title>Identifying Parallel Documents from a Large Bilingual Collection of Texts: Application to Parallel Article Extraction in <fixed-case>W</fixed-case>ikipedia.</title>
       <author><first>Alexandre</first><last>Patry</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <pages>87–95</pages>
       <url hash="5aab7cd3">W11-1212</url>
       <bibkey>patry-langlais-2011-identifying</bibkey>
     </paper>
     <paper id="13">
       <title>Comparable Fora</title>
-      <author><first>Johanka</first><last>Spoustová</last></author>
+      <author id="drahomira-johanka-spoustova"><first>Johanka</first><last>Spoustová</last></author>
       <author><first>Miroslav</first><last>Spousta</last></author>
       <pages>96–101</pages>
       <url hash="dba0a47f">W11-1213</url>
@@ -2132,7 +2132,7 @@
     </paper>
     <paper id="14">
       <title>Unsupervised Alignment of Comparable Data and Text Resources</title>
-      <author><first>Anja</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anja</first><last>Belz</last></author>
       <author><first>Eric</first><last>Kow</last></author>
       <pages>102–109</pages>
       <url hash="00c65328">W11-1214</url>
@@ -2140,14 +2140,14 @@
     </paper>
     <paper id="15">
       <title>Cross-lingual Slot Filling from Comparable Corpora</title>
-      <author><first>Matthew</first><last>Snover</last></author>
+      <author id="matthew-snover"><first>Matthew</first><last>Snover</last></author>
       <author><first>Xiang</first><last>Li</last></author>
       <author><first>Wen-Pin</first><last>Lin</last></author>
       <author><first>Zheng</first><last>Chen</last></author>
       <author><first>Suzanne</first><last>Tamang</last></author>
       <author><first>Mingmin</first><last>Ge</last></author>
       <author><first>Adam</first><last>Lee</last></author>
-      <author id="qi-li"><first>Qi</first><last>Li</last></author>
+      <author><first>Qi</first><last>Li</last></author>
       <author><first>Hao</first><last>Li</last></author>
       <author><first>Sam</first><last>Anzaroot</last></author>
       <author><first>Heng</first><last>Ji</last></author>
@@ -2157,7 +2157,7 @@
     </paper>
     <paper id="16">
       <title>Towards a Data Model for the Universal Corpus</title>
-      <author><first>Steven</first><last>Abney</last></author>
+      <author id="steven-abney"><first>Steven</first><last>Abney</last></author>
       <author><first>Steven</first><last>Bird</last></author>
       <pages>120–127</pages>
       <url hash="dd4723ca">W11-1216</url>
@@ -2166,7 +2166,7 @@
     <paper id="17">
       <title>An Expectation Maximization Algorithm for Textual Unit Alignment</title>
       <author><first>Radu</first><last>Ion</last></author>
-      <author><first>Alexandru</first><last>Ceauşu</last></author>
+      <author id="alexandru-ceausu"><first>Alexandru</first><last>Ceauşu</last></author>
       <author><first>Elena</first><last>Irimia</last></author>
       <pages>128–135</pages>
       <url hash="28913d8d">W11-1217</url>
@@ -2193,7 +2193,7 @@
     <meta>
       <booktitle>Proceedings of the Workshop on Distributional Semantics and Compositionality</booktitle>
       <url hash="e93fa474">W11-13</url>
-      <editor><first>Chris</first><last>Biemann</last></editor>
+      <editor id="chris-biemann"><first>Chris</first><last>Biemann</last></editor>
       <editor><first>Eugenie</first><last>Giesbrecht</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Portland, Oregon, USA</address>
@@ -2207,8 +2207,8 @@
     </frontmatter>
     <paper id="1">
       <title>(Linear) Maps of the Impossible: Capturing Semantic Anomalies in Distributional Space</title>
-      <author><first>Eva Maria</first><last>Vecchi</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="eva-maria-vecchi"><first>Eva Maria</first><last>Vecchi</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <author><first>Roberto</first><last>Zamparelli</last></author>
       <pages>1–9</pages>
       <url hash="007dec83">W11-1301</url>
@@ -2216,7 +2216,7 @@
     </paper>
     <paper id="2">
       <title>Distributed Structures and Distributional Meaning</title>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last></author>
       <author><first>Lorenzo</first><last>Dell’Arciprete</last></author>
       <pages>10–15</pages>
       <url hash="8f5ae2c5">W11-1302</url>
@@ -2224,7 +2224,7 @@
     </paper>
     <paper id="3">
       <title>Two Multivariate Generalizations of Pointwise Mutual Information</title>
-      <author><first>Tim</first><last>Van de Cruys</last></author>
+      <author id="tim-van-de-cruys"><first>Tim</first><last>Van de Cruys</last></author>
       <pages>16–20</pages>
       <url hash="af062465">W11-1303</url>
       <bibkey>van-de-cruys-2011-two</bibkey>
@@ -2239,10 +2239,10 @@
     </paper>
     <paper id="5">
       <title>Shared Task System Description: Frustratingly Hard Compositionality Prediction</title>
-      <author><first>Anders</first><last>Johannsen</last></author>
-      <author><first>Hector</first><last>Martinez</last></author>
+      <author id="anders-johannsen"><first>Anders</first><last>Johannsen</last></author>
+      <author id="hector-martinez-alonso"><first>Hector</first><last>Martinez</last></author>
       <author><first>Christian</first><last>Rishøj</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>29–32</pages>
       <url hash="c6572490">W11-1305</url>
       <bibkey>johannsen-etal-2011-shared</bibkey>
@@ -2260,7 +2260,7 @@
       <author><first>Santanu</first><last>Pal</last></author>
       <author><first>Tapabrata</first><last>Mondal</last></author>
       <author><first>Tanik</first><last>Saikh</last></author>
-      <author><first>Sivaju</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaju</first><last>Bandyopadhyay</last></author>
       <pages>38–42</pages>
       <url hash="dadb37b5">W11-1307</url>
       <bibkey>chakraborty-etal-2011-shared</bibkey>
@@ -2268,14 +2268,14 @@
     <paper id="8">
       <title>Detecting Compositionality Using Semantic Vector Space Models Based on Syntactic Context. Shared Task System Description</title>
       <author><first>Guillermo</first><last>Garrido</last></author>
-      <author><first>Anselmo</first><last>Peñas</last></author>
+      <author id="anselmo-penas"><first>Anselmo</first><last>Peñas</last></author>
       <pages>43–47</pages>
       <url hash="6ec73aa9">W11-1308</url>
       <bibkey>garrido-penas-2011-detecting</bibkey>
     </paper>
     <paper id="9">
       <title>Measuring the Compositionality of Collocations via Word Co-occurrence Vectors: Shared Task System Description</title>
-      <author><first>Alfredo</first><last>Maldonado-Guerra</last></author>
+      <author id="alfredo-maldonado"><first>Alfredo</first><last>Maldonado-Guerra</last></author>
       <author><first>Martin</first><last>Emms</last></author>
       <pages>48–53</pages>
       <url hash="cc32bc36">W11-1309</url>
@@ -2284,7 +2284,7 @@
     <paper id="10">
       <title>Exemplar-Based Word-Space Model for Compositionality Detection: Shared Task System Description</title>
       <author><first>Siva</first><last>Reddy</last></author>
-      <author><first>Diana</first><last>McCarthy</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
       <author><first>Suresh</first><last>Manandhar</last></author>
       <author><first>Spandana</first><last>Gella</last></author>
       <pages>54–60</pages>
@@ -2296,7 +2296,7 @@
     <meta>
       <booktitle>Proceedings of the Sixth Workshop on Innovative Use of <fixed-case>NLP</fixed-case> for Building Educational Applications</booktitle>
       <url hash="d334993f">W11-14</url>
-      <editor><first>Joel</first><last>Tetreault</last></editor>
+      <editor id="joel-tetreault"><first>Joel</first><last>Tetreault</last></editor>
       <editor><first>Jill</first><last>Burstein</last></editor>
       <editor><first>Claudia</first><last>Leacock</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -2313,7 +2313,7 @@
       <title>Automatic Question Generation using Discourse Cues</title>
       <author><first>Manish</first><last>Agarwal</last></author>
       <author><first>Rakshit</first><last>Shah</last></author>
-      <author><first>Prashanth</first><last>Mannem</last></author>
+      <author id="prashanth-mannem"><first>Prashanth</first><last>Mannem</last></author>
       <pages>1–9</pages>
       <url hash="5755d68f">W11-1401</url>
       <bibkey>agarwal-etal-2011-automatic</bibkey>
@@ -2321,14 +2321,14 @@
     <paper id="2">
       <title>Understanding Differences in Perceived Peer-Review Helpfulness using Natural Language Processing</title>
       <author><first>Wenting</first><last>Xiong</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <pages>10–19</pages>
       <url hash="b3ba8f1b">W11-1402</url>
       <bibkey>xiong-litman-2011-understanding</bibkey>
     </paper>
     <paper id="3">
       <title>Generating Varied Narrative Probability Exercises</title>
-      <author><first>Mariët</first><last>Theune</last></author>
+      <author id="mariet-theune"><first>Mariët</first><last>Theune</last></author>
       <author><first>Roan</first><last>Boer Rookhuiszen</last></author>
       <author><first>Rieks</first><last>op den Akker</last></author>
       <author><first>Hanneke</first><last>Geerlings</last></author>
@@ -2348,7 +2348,7 @@
     <paper id="5">
       <title>Detecting Structural Events for Assessing Non-Native Speech</title>
       <author><first>Lei</first><last>Chen</last></author>
-      <author><first>Su-Youn</first><last>Yoon</last></author>
+      <author id="su-youn-yoon"><first>Su-Youn</first><last>Yoon</last></author>
       <pages>38–45</pages>
       <url hash="39ce9d60">W11-1405</url>
       <bibkey>chen-yoon-2011-detecting</bibkey>
@@ -2366,7 +2366,7 @@
     <paper id="7">
       <title>Automatic Gap-fill Question Generation from Text Books</title>
       <author><first>Manish</first><last>Agarwal</last></author>
-      <author><first>Prashanth</first><last>Mannem</last></author>
+      <author id="prashanth-mannem"><first>Prashanth</first><last>Mannem</last></author>
       <pages>56–64</pages>
       <url hash="109d7558">W11-1407</url>
       <bibkey>agarwal-mannem-2011-automatic</bibkey>
@@ -2409,11 +2409,11 @@
     </paper>
     <paper id="12">
       <title><fixed-case>GRASP</fixed-case>: Grammar- and Syntax-based Pattern-Finder in <fixed-case>CALL</fixed-case></title>
-      <author><first>Chung-Chi</first><last>Huang</last></author>
-      <author><first>Mei-Hua</first><last>Chen</last></author>
-      <author><first>Shih-Ting</first><last>Huang</last></author>
+      <author id="chung-chi-huang"><first>Chung-Chi</first><last>Huang</last></author>
+      <author id="mei-hua-chen"><first>Mei-Hua</first><last>Chen</last></author>
+      <author id="shih-ting-huang"><first>Shih-Ting</first><last>Huang</last></author>
       <author><first>Hsien-Chin</first><last>Liou</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>96–104</pages>
       <url hash="8d94217c">W11-1412</url>
       <bibkey>huang-etal-2011-grasp</bibkey>
@@ -2428,8 +2428,8 @@
     </paper>
     <paper id="14">
       <title>Generating Concept Map Exercises from Textbooks</title>
-      <author><first>Andrew</first><last>Olney</last></author>
-      <author><first>Whitney</first><last>Cade</last></author>
+      <author id="andrew-olney"><first>Andrew</first><last>Olney</last></author>
+      <author id="whitney-l-cade"><first>Whitney</first><last>Cade</last></author>
       <author><first>Claire</first><last>Williams</last></author>
       <pages>111–119</pages>
       <url hash="6b8d6a80">W11-1414</url>
@@ -2438,7 +2438,7 @@
     <paper id="15">
       <title>Readability Annotation: Replacing the Expert by the Crowd</title>
       <author><first>Philip</first><last>van Oosten</last></author>
-      <author><first>Véronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Véronique</first><last>Hoste</last></author>
       <pages>120–129</pages>
       <url hash="abb63243">W11-1415</url>
       <bibkey>van-oosten-hoste-2011-readability</bibkey>
@@ -2454,7 +2454,7 @@
     <paper id="17">
       <title>Predicting Change in Student Motivation by Measuring Cohesion between Tutor and Student</title>
       <author><first>Arthur</first><last>Ward</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <author><first>Maxine</first><last>Eskenazi</last></author>
       <pages>136–141</pages>
       <url hash="9787958a">W11-1417</url>
@@ -2470,7 +2470,7 @@
     </paper>
     <paper id="19">
       <title>Non-scorable Response Detection for Automated Speaking Proficiency Assessment</title>
-      <author><first>Su-Youn</first><last>Yoon</last></author>
+      <author id="su-youn-yoon"><first>Su-Youn</first><last>Yoon</last></author>
       <author><first>Keelan</first><last>Evanini</last></author>
       <author><first>Klaus</first><last>Zechner</last></author>
       <pages>152–160</pages>
@@ -2479,7 +2479,7 @@
     </paper>
     <paper id="20">
       <title>Non-<fixed-case>E</fixed-case>nglish Response Detection Method for Automated Proficiency Scoring System</title>
-      <author><first>Su-Youn</first><last>Yoon</last></author>
+      <author id="su-youn-yoon"><first>Su-Youn</first><last>Yoon</last></author>
       <author><first>Derrick</first><last>Higgins</last></author>
       <pages>161–169</pages>
       <url hash="1db585ff">W11-1420</url>
@@ -2488,8 +2488,8 @@
     <paper id="21">
       <title>Bilingual Random Walk Models for Automated Grammar Correction of <fixed-case>ESL</fixed-case> Author-Produced Text</title>
       <author><first>Randy</first><last>West</last></author>
-      <author><first>Y. Albert</first><last>Park</last></author>
-      <author><first>Roger</first><last>Levy</last></author>
+      <author id="y-albert-park"><first>Y. Albert</first><last>Park</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
       <pages>170–179</pages>
       <url hash="39595844">W11-1421</url>
       <bibkey>west-etal-2011-bilingual</bibkey>
@@ -2506,7 +2506,7 @@
     <meta>
       <booktitle>Proceedings of the 5th <fixed-case>ACL</fixed-case>-<fixed-case>HLT</fixed-case> Workshop on Language Technology for Cultural Heritage, Social Sciences, and Humanities</booktitle>
       <url hash="84f47ce0">W11-15</url>
-      <editor><first>Kalliopi</first><last>Zervanou</last></editor>
+      <editor id="kalliopi-zervanou"><first>Kalliopi</first><last>Zervanou</last></editor>
       <editor><first>Piroska</first><last>Lendvai</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Portland, OR, USA</address>
@@ -2520,9 +2520,9 @@
     </frontmatter>
     <paper id="1">
       <title>Extending the tool, or how to annotate historical language varieties</title>
-      <author><first>Cristina</first><last>Sánchez-Marco</last></author>
-      <author><first>Gemma</first><last>Boleda</last></author>
-      <author><first>Lluís</first><last>Padró</last></author>
+      <author id="cristina-sanchez-marco"><first>Cristina</first><last>Sánchez-Marco</last></author>
+      <author id="gemma-boleda"><first>Gemma</first><last>Boleda</last></author>
+      <author id="lluis-padro"><first>Lluís</first><last>Padró</last></author>
       <pages>1–9</pages>
       <url hash="e3e056c2">W11-1501</url>
       <bibkey>sanchez-marco-etal-2011-extending</bibkey>
@@ -2541,7 +2541,7 @@
       <author><first>Silke</first><last>Scheible</last></author>
       <author><first>Richard J.</first><last>Whitt</last></author>
       <author><first>Martin</first><last>Durrell</last></author>
-      <author><first>Paul</first><last>Bennett</last></author>
+      <author id="paul-bennett"><first>Paul</first><last>Bennett</last></author>
       <pages>19–23</pages>
       <url hash="2e94ab93">W11-1503</url>
       <bibkey>scheible-etal-2011-evaluating</bibkey>
@@ -2556,14 +2556,14 @@
     </paper>
     <paper id="5">
       <title>Automatic linguistic annotation of historical language: <fixed-case>T</fixed-case>o<fixed-case>T</fixed-case>r<fixed-case>T</fixed-case>a<fixed-case>L</fixed-case>e and <fixed-case>XIX</fixed-case> century <fixed-case>S</fixed-case>lovene</title>
-      <author><first>Tomaž</first><last>Erjavec</last></author>
+      <author id="tomaz-erjavec"><first>Tomaž</first><last>Erjavec</last></author>
       <pages>33–38</pages>
       <url hash="1f34482b">W11-1505</url>
       <bibkey>erjavec-2011-automatic</bibkey>
     </paper>
     <paper id="6">
       <title>Historical Event Extraction from Text</title>
-      <author><first>Agata Katarzyna</first><last>Cybulska</last></author>
+      <author id="agata-cybulska"><first>Agata Katarzyna</first><last>Cybulska</last></author>
       <author><first>Piek</first><last>Vossen</last></author>
       <pages>39–43</pages>
       <url hash="18c2dec7">W11-1506</url>
@@ -2573,7 +2573,7 @@
       <title>Enrichment and Structuring of Archival Description Metadata</title>
       <author><first>Kalliopi</first><last>Zervanou</last></author>
       <author><first>Ioannis</first><last>Korkontzelos</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
       <pages>44–53</pages>
       <url hash="4c5d3b79">W11-1507</url>
@@ -2581,10 +2581,10 @@
     </paper>
     <paper id="8">
       <title>Structure-Preserving Pipelines for Digital Libraries</title>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <author><first>Eduard</first><last>Barbu</last></author>
-      <author><first>Egon</first><last>Stemle</last></author>
-      <author><first>Christian</first><last>Girardi</last></author>
+      <author id="egon-stemle"><first>Egon</first><last>Stemle</last></author>
+      <author id="christian-girardi"><first>Christian</first><last>Girardi</last></author>
       <pages>54–62</pages>
       <url hash="026b5f46">W11-1508</url>
       <bibkey>poesio-etal-2011-structure</bibkey>
@@ -2602,12 +2602,12 @@
     </paper>
     <paper id="10">
       <title>Crowdsourcing syntactic relatedness judgements for opinion mining in the study of information technology adoption</title>
-      <author><first>Asad B.</first><last>Sayeed</last></author>
+      <author id="asad-sayeed"><first>Asad B.</first><last>Sayeed</last></author>
       <author><first>Bryan</first><last>Rusk</last></author>
       <author><first>Martin</first><last>Petrov</last></author>
       <author><first>Hieu C.</first><last>Nguyen</last></author>
       <author><first>Timothy J.</first><last>Meyer</last></author>
-      <author><first>Amy</first><last>Weinberg</last></author>
+      <author id="amy-weinberg"><first>Amy</first><last>Weinberg</last></author>
       <pages>69–77</pages>
       <url hash="1c87b78d">W11-1510</url>
       <bibkey>sayeed-etal-2011-crowdsourcing</bibkey>
@@ -2632,14 +2632,14 @@
       <title>Topic Modeling on Historical Newspapers</title>
       <author><first>Tze-I</first><last>Yang</last></author>
       <author><first>Andrew</first><last>Torget</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>96–104</pages>
       <url hash="ce14c781">W11-1513</url>
       <bibkey>yang-etal-2011-topic</bibkey>
     </paper>
     <paper id="14">
       <title>From Once Upon a Time to Happily Ever After: Tracking Emotions in Novels and Fairy Tales</title>
-      <author><first>Saif</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
       <pages>105–114</pages>
       <url hash="414ab8b1">W11-1514</url>
       <bibkey>mohammad-2011-upon</bibkey>
@@ -2647,8 +2647,8 @@
     <paper id="15">
       <title>Author Age Prediction from Text using Linear Regression</title>
       <author><first>Dong</first><last>Nguyen</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
-      <author><first>Carolyn P.</first><last>Rosé</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
+      <author id="carolyn-rose"><first>Carolyn P.</first><last>Rosé</last></author>
       <pages>115–123</pages>
       <url hash="43b72fd5">W11-1515</url>
       <bibkey>nguyen-etal-2011-author</bibkey>
@@ -2657,8 +2657,8 @@
       <title>A Study of Academic Collaborations in Computational Linguistics using a Latent Mixture of Authors Model</title>
       <author><first>Nikhil</first><last>Johri</last></author>
       <author><first>Daniel</first><last>Ramage</last></author>
-      <author><first>Daniel</first><last>McFarland</last></author>
-      <author><first>Daniel</first><last>Jurafsky</last></author>
+      <author id="dan-mcfarland"><first>Daniel</first><last>McFarland</last></author>
+      <author id="dan-jurafsky"><first>Daniel</first><last>Jurafsky</last></author>
       <pages>124–132</pages>
       <url hash="8a744557">W11-1516</url>
       <bibkey>johri-etal-2011-study</bibkey>
@@ -2682,7 +2682,7 @@
     </frontmatter>
     <paper id="1">
       <title>Learning to Simplify Sentences Using <fixed-case>W</fixed-case>ikipedia</title>
-      <author><first>Will</first><last>Coster</last></author>
+      <author id="william-coster"><first>Will</first><last>Coster</last></author>
       <author><first>David</first><last>Kauchak</last></author>
       <pages>1–9</pages>
       <url hash="4cf7d26b">W11-1601</url>
@@ -2710,8 +2710,8 @@
       <title>Comparing Phrase-based and Syntax-based Paraphrase Generation</title>
       <author><first>Sander</first><last>Wubben</last></author>
       <author><first>Erwin</first><last>Marsi</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <pages>27–33</pages>
       <url hash="f2d883ef">W11-1604</url>
       <bibkey>wubben-etal-2011-comparing</bibkey>
@@ -2727,7 +2727,7 @@
     <paper id="6">
       <title>Towards Strict Sentence Intersection: Decoding and Evaluation Strategies</title>
       <author><first>Kapil</first><last>Thadani</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>43–53</pages>
       <url hash="1673dcbc">W11-1606</url>
       <bibkey>thadani-mckeown-2011-towards</bibkey>
@@ -2751,7 +2751,7 @@
     <paper id="9">
       <title>Creating Disjunctive Logical Forms from Aligned Sentences for Grammar-Based Paraphrase Generation</title>
       <author><first>Scott</first><last>Martin</last></author>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <pages>74–83</pages>
       <url hash="af853490">W11-1609</url>
       <bibkey>martin-white-2011-creating</bibkey>
@@ -2780,10 +2780,10 @@
     <meta>
       <booktitle>Proceedings of the 2nd Workshop on Computational Approaches to Subjectivity and Sentiment Analysis (<fixed-case>WASSA</fixed-case> 2.011)</booktitle>
       <url hash="0c234040">W11-17</url>
-      <editor><first>Alexandra</first><last>Balahur</last></editor>
+      <editor id="alexandra-balahur"><first>Alexandra</first><last>Balahur</last></editor>
       <editor><first>Ester</first><last>Boldrini</last></editor>
-      <editor><first>Andres</first><last>Montoyo</last></editor>
-      <editor><first>Patricio</first><last>Martinez-Barco</last></editor>
+      <editor id="andres-montoyo"><first>Andres</first><last>Montoyo</last></editor>
+      <editor id="patricio-martinez-barco"><first>Patricio</first><last>Martinez-Barco</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Portland, Oregon</address>
       <month>June</month>
@@ -2796,10 +2796,10 @@
     </frontmatter>
     <paper id="1">
       <title>Cats Rule and Dogs Drool!: Classifying Stance in Online Debate</title>
-      <author><first>Pranav</first><last>Anand</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="pranav-anand"><first>Pranav</first><last>Anand</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <author><first>Rob</first><last>Abbott</last></author>
-      <author><first>Jean E.</first><last>Fox Tree</last></author>
+      <author id="jean-e-fox-tree"><first>Jean E.</first><last>Fox Tree</last></author>
       <author><first>Robeson</first><last>Bowmani</last></author>
       <author><first>Michael</first><last>Minor</last></author>
       <pages>1–9</pages>
@@ -2816,8 +2816,8 @@
     </paper>
     <paper id="3">
       <title>Experiments with a Differential Semantics Annotation for <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et 3.0</title>
-      <author><first>Dan</first><last>Tufiş</last></author>
-      <author><first>Dan</first><last>Ştefănescu</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufiş</last></author>
+      <author id="dan-stefanescu"><first>Dan</first><last>Ştefănescu</last></author>
       <pages>19–27</pages>
       <url hash="44325454">W11-1703</url>
       <bibkey>tufis-stefanescu-2011-experiments</bibkey>
@@ -2828,12 +2828,12 @@
       <author><first>Polina</first><last>Lenkova</last></author>
       <author><first>Mohamed</first><last>Ebrahim</last></author>
       <author><first>Maud</first><last>Ehrmann</last></author>
-      <author><first>Ali</first><last>Hurriyetoglu</last></author>
-      <author><first>Mijail</first><last>Kabadjov</last></author>
+      <author id="ali-hurriyetoglu"><first>Ali</first><last>Hurriyetoglu</last></author>
+      <author id="mijail-kabadjov"><first>Mijail</first><last>Kabadjov</last></author>
       <author><first>Ralf</first><last>Steinberger</last></author>
-      <author><first>Hristo</first><last>Tanev</last></author>
+      <author id="hristo-tanev"><first>Hristo</first><last>Tanev</last></author>
       <author><first>Vanni</first><last>Zavarella</last></author>
-      <author><first>Silvia</first><last>Vázquez</last></author>
+      <author id="silvia-vazquez"><first>Silvia</first><last>Vázquez</last></author>
       <pages>28–36</pages>
       <url hash="8782f153">W11-1704</url>
       <bibkey>steinberger-etal-2011-creating</bibkey>
@@ -2841,7 +2841,7 @@
     <paper id="5">
       <title>Generating Semantic Orientation Lexicon using Large Data and Thesaurus</title>
       <author><first>Amit</first><last>Goyal</last></author>
-      <author><first>Hal</first><last>Daumé</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé</last></author>
       <pages>37–43</pages>
       <url hash="f31ef840">W11-1705</url>
       <bibkey>goyal-daume-2011-generating</bibkey>
@@ -2867,14 +2867,14 @@
     <paper id="8">
       <title>A Link to the Past: Constructing Historical Social Networks</title>
       <author><first>Matje</first><last>van de Camp</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <pages>61–69</pages>
       <url hash="73392ee8">W11-1708</url>
       <bibkey>van-de-camp-van-den-bosch-2011-link</bibkey>
     </paper>
     <paper id="9">
       <title>Tracking Sentiment in Mail: How Genders Differ on Emotional Axes</title>
-      <author><first>Saif</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
       <author><first>Tony</first><last>Yang</last></author>
       <pages>70–79</pages>
       <url hash="27c14f33">W11-1709</url>
@@ -2884,8 +2884,8 @@
       <title>Developing <fixed-case>J</fixed-case>apanese <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Affect for Analyzing Emotions</title>
       <author><first>Yoshimitsu</first><last>Torii</last></author>
       <author><first>Dipankar</first><last>Das</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>80–86</pages>
       <url hash="c3176208">W11-1710</url>
       <bibkey>torii-etal-2011-developing</bibkey>
@@ -2910,7 +2910,7 @@
     <paper id="13">
       <title>Automatic Emotion Classification for Interpersonal Communication</title>
       <author><first>Frederik</first><last>Vaassen</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>104–110</pages>
       <url hash="9723c789">W11-1713</url>
       <bibkey>vaassen-daelemans-2011-automatic</bibkey>
@@ -2919,7 +2919,7 @@
       <title>Automatic Sentiment Classification of Product Reviews Using Maximal Phrases Based Analysis</title>
       <author><first>Maria</first><last>Tchalakova</last></author>
       <author><first>Dale</first><last>Gerdemann</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <pages>111–117</pages>
       <url hash="b63f3234">W11-1714</url>
       <bibkey>tchalakova-etal-2011-automatic</bibkey>
@@ -2944,17 +2944,17 @@
     </paper>
     <paper id="17">
       <title>Robust Sense-based Sentiment Classification</title>
-      <author><first>Balamurali</first><last>AR</last></author>
+      <author id="balamurali-ar"><first>Balamurali</first><last>AR</last></author>
       <author><first>Aditya</first><last>Joshi</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>132–138</pages>
       <url hash="e951b11e">W11-1717</url>
       <bibkey>ar-etal-2011-robust</bibkey>
     </paper>
     <paper id="18">
       <title>Sentiment Classification Using Semantic Features Extracted from <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et-based Resources</title>
-      <author><first>Yoan</first><last>Gutiérrez</last></author>
-      <author><first>Sonia</first><last>Vázquez</last></author>
+      <author id="yoan-gutierrez"><first>Yoan</first><last>Gutiérrez</last></author>
+      <author id="sonia-vazquez"><first>Sonia</first><last>Vázquez</last></author>
       <author><first>Andrés</first><last>Montoyo</last></author>
       <pages>139–145</pages>
       <url hash="301fd636">W11-1718</url>
@@ -2963,7 +2963,7 @@
     <paper id="19">
       <title>On the Difficulty of Clustering Microblog Texts for Online Reputation Management</title>
       <author><first>Fernando</first><last>Perez-Tellez</last></author>
-      <author><first>David</first><last>Pinto</last></author>
+      <author id="david-pinto"><first>David</first><last>Pinto</last></author>
       <author><first>John</first><last>Cardiff</last></author>
       <author><first>Paolo</first><last>Rosso</last></author>
       <pages>146–152</pages>
@@ -2993,7 +2993,7 @@
       <title>Towards a Unified Approach for Opinion Question Answering and Summarization</title>
       <author><first>Elena</first><last>Lloret</last></author>
       <author><first>Alexandra</first><last>Balahur</last></author>
-      <author><first>Manuel</first><last>Palomar</last></author>
+      <author id="manuel-palomar"><first>Manuel</first><last>Palomar</last></author>
       <author><first>Andrés</first><last>Montoyo</last></author>
       <pages>168–174</pages>
       <url hash="2b5c489f">W11-1722</url>
@@ -3018,13 +3018,13 @@
     </paper>
     <paper id="25">
       <title>Sentimatrix – Multilingual Sentiment Analysis Service</title>
-      <author><first>Alexandru-Lucian</first><last>Gînscă</last></author>
-      <author><first>Emanuela</first><last>Boroș</last></author>
+      <author id="alexandru-lucian-ginsca"><first>Alexandru-Lucian</first><last>Gînscă</last></author>
+      <author id="emanuela-boros"><first>Emanuela</first><last>Boroș</last></author>
       <author><first>Adrian</first><last>Iftene</last></author>
-      <author><first>Diana</first><last>Trandabăț</last></author>
+      <author id="diana-trandabat"><first>Diana</first><last>Trandabăț</last></author>
       <author><first>Mihai</first><last>Toader</last></author>
       <author><first>Marius</first><last>Corîci</last></author>
-      <author><first>Cenel-Augusto</first><last>Perez</last></author>
+      <author id="cenel-augusto-perez"><first>Cenel-Augusto</first><last>Perez</last></author>
       <author><first>Dan</first><last>Cristea</last></author>
       <pages>189–195</pages>
       <url hash="5afca35c">W11-1725</url>
@@ -3035,7 +3035,7 @@
     <meta>
       <booktitle>Proceedings of <fixed-case>B</fixed-case>io<fixed-case>NLP</fixed-case> Shared Task 2011 Workshop</booktitle>
       <url hash="c1dab716">W11-18</url>
-      <editor><first>Jun’ichi</first><last>Tsujii</last></editor>
+      <editor id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></editor>
       <editor><first>Jin-Dong</first><last>Kim</last></editor>
       <editor><first>Sampo</first><last>Pyysalo</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -3106,7 +3106,7 @@
       <title>Event Extraction as Dependency Parsing for <fixed-case>B</fixed-case>io<fixed-case>NLP</fixed-case> 2011</title>
       <author><first>David</first><last>McClosky</last></author>
       <author><first>Mihai</first><last>Surdeanu</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <pages>41–45</pages>
       <url hash="b447b179">W11-1806</url>
       <bibkey>mcclosky-etal-2011-event-extraction</bibkey>
@@ -3125,7 +3125,7 @@
       <author><first>David</first><last>McClosky</last></author>
       <author><first>Mihai</first><last>Surdeanu</last></author>
       <author><first>Andrew</first><last>McCallum</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>51–55</pages>
       <url hash="546e6179">W11-1808</url>
       <bibkey>riedel-etal-2011-model</bibkey>
@@ -3136,7 +3136,7 @@
       <author><first>Julien</first><last>Jourde</last></author>
       <author><first>Philippe</first><last>Bessières</last></author>
       <author><first>Maarten</first><last>van de Guchte</last></author>
-      <author><first>Claire</first><last>Nédellec</last></author>
+      <author id="claire-nedellec"><first>Claire</first><last>Nédellec</last></author>
       <pages>56–64</pages>
       <url hash="8e3f30e6">W11-1809</url>
       <bibkey>bossy-etal-2011-bionlp</bibkey>
@@ -3148,7 +3148,7 @@
       <author><first>Philippe</first><last>Veber</last></author>
       <author><first>Karën</first><last>Fort</last></author>
       <author><first>Robert</first><last>Bossy</last></author>
-      <author><first>Erick</first><last>Alphonse</last></author>
+      <author id="erick-alphonse"><first>Erick</first><last>Alphonse</last></author>
       <author><first>Philippe</first><last>Bessières</last></author>
       <pages>65–73</pages>
       <url hash="f56f21d2">W11-1810</url>
@@ -3175,7 +3175,7 @@
     <paper id="13">
       <title>The Taming of Reconcile as a Biomedical Coreference Resolver</title>
       <author><first>Youngjun</first><last>Kim</last></author>
-      <author><first>Ellen</first><last>Riloff</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
       <author><first>Nathan</first><last>Gilbert</last></author>
       <pages>89–93</pages>
       <url hash="19635602">W11-1813</url>
@@ -3195,7 +3195,7 @@
       <author><first>Wiktoria</first><last>Golik</last></author>
       <author><first>Pierre</first><last>Warnier</last></author>
       <author><first>Philippe</first><last>Veber</last></author>
-      <author><first>Claire</first><last>Nédellec</last></author>
+      <author id="claire-nedellec"><first>Claire</first><last>Nédellec</last></author>
       <pages>102–111</pages>
       <url hash="aa9a65b1">W11-1815</url>
       <bibkey>ratkovic-etal-2011-bionlp</bibkey>
@@ -3215,7 +3215,7 @@
     <paper id="17">
       <title>Sentence Filtering for <fixed-case>B</fixed-case>io<fixed-case>NLP</fixed-case>: Searching for Renaming Acts</title>
       <author><first>Pierre</first><last>Warnier</last></author>
-      <author><first>Claire</first><last>Nédellec</last></author>
+      <author id="claire-nedellec"><first>Claire</first><last>Nédellec</last></author>
       <pages>121–129</pages>
       <url hash="6a67cf34">W11-1817</url>
       <bibkey>warnier-nedellec-2011-sentence</bibkey>
@@ -3224,7 +3224,7 @@
       <title>Complex Biological Event Extraction from Full Text using Signatures of Linguistic and Semantic Features</title>
       <author><first>Liam R.</first><last>McGrath</last></author>
       <author><first>Kelly</first><last>Domico</last></author>
-      <author><first>Courtney D.</first><last>Corley</last></author>
+      <author id="courtney-d-corley"><first>Courtney D.</first><last>Corley</last></author>
       <author><first>Bobbie-Jo</first><last>Webb-Robertson</last></author>
       <pages>130–137</pages>
       <url hash="a6ff0cf0">W11-1818</url>
@@ -3232,11 +3232,11 @@
     </paper>
     <paper id="19">
       <title>Using Kybots for Extracting Events in Biomedical Texts</title>
-      <author><first>Arantza</first><last>Casillas</last></author>
-      <author><first>Arantza</first><last>Díaz de Ilarraza</last></author>
-      <author><first>Koldo</first><last>Gojenola</last></author>
-      <author><first>Maite</first><last>Oronoz</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="arantza-casillas"><first>Arantza</first><last>Casillas</last></author>
+      <author id="arantza-diaz-de-ilarraza"><first>Arantza</first><last>Díaz de Ilarraza</last></author>
+      <author id="koldo-gojenola"><first>Koldo</first><last>Gojenola</last></author>
+      <author id="maite-oronoz"><first>Maite</first><last>Oronoz</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <pages>138–142</pages>
       <url hash="29e5a70c">W11-1819</url>
       <bibkey>casillas-etal-2011-using</bibkey>
@@ -3261,9 +3261,9 @@
     </paper>
     <paper id="22">
       <title>A Pattern Approach for Biomedical Event Annotation</title>
-      <author><first>Quang</first><last>Le Minh</last></author>
+      <author id="quang-le-minh"><first>Quang</first><last>Le Minh</last></author>
       <author><first>Son</first><last>Nguyen Truong</last></author>
-      <author><first>Quoc</first><last>Ho Bao</last></author>
+      <author id="bao-quoc-ho"><first>Quoc</first><last>Ho Bao</last></author>
       <pages>149–150</pages>
       <url hash="306b614b">W11-1822</url>
       <bibkey>le-minh-etal-2011-pattern</bibkey>
@@ -3283,7 +3283,7 @@
       <title>Double Layered Learning for Biological Event Extraction from Text</title>
       <author><first>Ehsan</first><last>Emadzadeh</last></author>
       <author><first>Azadeh</first><last>Nikfarjam</last></author>
-      <author><first>Graciela</first><last>Gonzalez</last></author>
+      <author id="graciela-gonzalez"><first>Graciela</first><last>Gonzalez</last></author>
       <pages>153–154</pages>
       <url hash="3bfbb9a8">W11-1824</url>
       <bibkey>emadzadeh-etal-2011-double</bibkey>
@@ -3301,8 +3301,8 @@
     <paper id="26">
       <title>From Graphs to Events: A Subgraph Matching Approach for Information Extraction from Biomedical Text</title>
       <author><first>Haibin</first><last>Liu</last></author>
-      <author><first>Ravikumar</first><last>Komandur</last></author>
-      <author><first>Karin</first><last>Verspoor</last></author>
+      <author id="ravikumar-komandur"><first>Ravikumar</first><last>Komandur</last></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last></author>
       <pages>164–172</pages>
       <url hash="4c343d41">W11-1826</url>
       <bibkey>liu-etal-2011-graphs</bibkey>
@@ -3328,7 +3328,7 @@
     <meta>
       <booktitle>Proceedings of the Fifteenth Conference on Computational Natural Language Learning: Shared Task</booktitle>
       <url hash="fe80ec8e">W11-19</url>
-      <editor><first>Sameer</first><last>Pradhan</last></editor>
+      <editor id="sameer-pradhan"><first>Sameer</first><last>Pradhan</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Portland, Oregon, USA</address>
       <month>June</month>
@@ -3342,10 +3342,10 @@
     <paper id="1">
       <title><fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>-2011 Shared Task: Modeling Unrestricted Coreference in <fixed-case>O</fixed-case>nto<fixed-case>N</fixed-case>otes</title>
       <author><first>Sameer</first><last>Pradhan</last></author>
-      <author><first>Lance</first><last>Ramshaw</last></author>
-      <author><first>Mitchell</first><last>Marcus</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
-      <author><first>Ralph</first><last>Weischedel</last></author>
+      <author id="lance-ramshaw"><first>Lance</first><last>Ramshaw</last></author>
+      <author id="mitch-marcus"><first>Mitchell</first><last>Marcus</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
       <author><first>Nianwen</first><last>Xue</last></author>
       <pages>1–27</pages>
       <url hash="21c42cb9">W11-1901</url>
@@ -3355,10 +3355,10 @@
       <title><fixed-case>S</fixed-case>tanford’s Multi-Pass Sieve Coreference Resolution System at the <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>-2011 Shared Task</title>
       <author><first>Heeyoung</first><last>Lee</last></author>
       <author><first>Yves</first><last>Peirsman</last></author>
-      <author><first>Angel</first><last>Chang</last></author>
-      <author><first>Nathanael</first><last>Chambers</last></author>
+      <author id="angel-chang"><first>Angel</first><last>Chang</last></author>
+      <author id="nathanael-chambers"><first>Nathanael</first><last>Chambers</last></author>
       <author><first>Mihai</first><last>Surdeanu</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <pages>28–34</pages>
       <url hash="12557f26">W11-1902</url>
       <bibkey>lee-etal-2011-stanfords</bibkey>
@@ -3366,8 +3366,8 @@
     <paper id="3">
       <title><fixed-case>R</fixed-case>elax<fixed-case>C</fixed-case>or Participation in <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case> Shared Task on Coreference Resolution</title>
       <author><first>Emili</first><last>Sapena</last></author>
-      <author><first>Lluís</first><last>Padró</last></author>
-      <author><first>Jordi</first><last>Turmo</last></author>
+      <author id="lluis-padro"><first>Lluís</first><last>Padró</last></author>
+      <author id="jordi-turmo"><first>Jordi</first><last>Turmo</last></author>
       <pages>35–39</pages>
       <url hash="31b7db12">W11-1903</url>
       <bibkey>sapena-etal-2011-relaxcor</bibkey>
@@ -3377,7 +3377,7 @@
       <author><first>Kai-Wei</first><last>Chang</last></author>
       <author><first>Rajhans</first><last>Samdani</last></author>
       <author><first>Alla</first><last>Rozovskaya</last></author>
-      <author><first>Nick</first><last>Rizzolo</last></author>
+      <author id="nick-rizzolo"><first>Nick</first><last>Rizzolo</last></author>
       <author><first>Mark</first><last>Sammons</last></author>
       <author><first>Dan</first><last>Roth</last></author>
       <pages>40–44</pages>
@@ -3394,7 +3394,7 @@
     </paper>
     <paper id="6">
       <title>Rule and Tree Ensembles for Unrestricted Coreference Resolution</title>
-      <author><first>Cicero</first><last>Nogueira dos Santos</last></author>
+      <author id="cicero-dos-santos"><first>Cicero</first><last>Nogueira dos Santos</last></author>
       <author><first>Davi</first><last>Lopes Carvalho</last></author>
       <pages>51–55</pages>
       <url hash="cc694ba9">W11-1906</url>
@@ -3414,16 +3414,16 @@
       <author><first>Olga</first><last>Uryupina</last></author>
       <author><first>Sriparna</first><last>Saha</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>61–65</pages>
       <url hash="b8fbf449">W11-1908</url>
       <bibkey>uryupina-etal-2011-multi</bibkey>
     </paper>
     <paper id="9">
       <title>Combining Syntactic and Semantic Features by <fixed-case>SVM</fixed-case> for Unrestricted Coreference Resolution</title>
-      <author><first>Huiwei</first><last>Zhou</last></author>
+      <author id="huiwei-zhou"><first>Huiwei</first><last>Zhou</last></author>
       <author><first>Yao</first><last>Li</last></author>
-      <author><first>Degen</first><last>Huang</last></author>
+      <author id="degen-huang"><first>Degen</first><last>Huang</last></author>
       <author><first>Yan</first><last>Zhang</last></author>
       <author><first>Chunlong</first><last>Wu</last></author>
       <author><first>Yuansheng</first><last>Yang</last></author>
@@ -3434,7 +3434,7 @@
     <paper id="10">
       <title>Supervised Coreference Resolution with <fixed-case>SUCRE</fixed-case></title>
       <author><first>Hamidreza</first><last>Kobdani</last></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <pages>71–75</pages>
       <url hash="105a5fa5">W11-1910</url>
       <bibkey>kobdani-schuetze-2011-supervised</bibkey>
@@ -3446,7 +3446,7 @@
       <author><first>Fandong</first><last>Meng</last></author>
       <author id="yang-liu-ict"><first>Yang</first><last>Liu</last></author>
       <author><first>Qun</first><last>Liu</last></author>
-      <author><first>Yajuan</first><last>Lv</last></author>
+      <author id="yajuan-lu"><first>Yajuan</first><last>Lv</last></author>
       <pages>76–80</pages>
       <url hash="5f111d8b">W11-1911</url>
       <bibkey>xiong-etal-2011-ets</bibkey>
@@ -3463,25 +3463,25 @@
       <title>Narrative Schema as World Knowledge for Coreference Resolution</title>
       <author><first>Joseph</first><last>Irwin</last></author>
       <author><first>Mamoru</first><last>Komachi</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>86–92</pages>
       <url hash="b35eba09">W11-1913</url>
       <bibkey>irwin-etal-2011-narrative</bibkey>
     </paper>
     <paper id="14">
       <title>Hybrid Approach for Coreference Resolution</title>
-      <author><last>Sobha</last><first>Lalitha Devi</first></author>
-      <author><first>Pattabhi RK</first><last>Rao</last></author>
-      <author><first>R. Vijay Sundar</first><last>Ram</last></author>
-      <author><first>CS.</first><last>Malarkodi</last></author>
-      <author><first>A.</first><last>Akilandeswari</last></author>
+      <author id="sobha-lalitha-devi"><first>Lalitha Devi</first><last>Sobha</last></author>
+      <author id="pattabhi-rk-rao"><first>Pattabhi RK</first><last>Rao</last></author>
+      <author id="vijay-sundar-ram"><first>R. Vijay Sundar</first><last>Ram</last></author>
+      <author id="malarkodi-c-s"><first>CS.</first><last>Malarkodi</last></author>
+      <author id="a-akilandeswari"><first>A.</first><last>Akilandeswari</last></author>
       <pages>93–96</pages>
       <url hash="49eb8a7c">W11-1914</url>
       <bibkey>sobha-etal-2011-hybrid</bibkey>
     </paper>
     <paper id="15">
       <title>Poly-co: a multilayer perceptron approach for coreference detection</title>
-      <author><first>Eric</first><last>Charton</last></author>
+      <author id="eric-charton"><first>Eric</first><last>Charton</last></author>
       <author><first>Michel</first><last>Gagnon</last></author>
       <pages>97–101</pages>
       <url hash="61ae997c">W11-1915</url>
@@ -3509,7 +3509,7 @@
     <paper id="18">
       <title><fixed-case>UBIU</fixed-case>: A Robust System for Resolving Unrestricted Coreference</title>
       <author><first>Desislava</first><last>Zhekova</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <pages>112–116</pages>
       <url hash="22c657ce">W11-1918</url>
       <bibkey>zhekova-kubler-2011-ubiu</bibkey>
@@ -3518,7 +3518,7 @@
       <title>A Machine Learning-Based Coreference Detection System for <fixed-case>O</fixed-case>nto<fixed-case>N</fixed-case>otes</title>
       <author><first>Yaqin</first><last>Yang</last></author>
       <author><first>Nianwen</first><last>Xue</last></author>
-      <author><first>Peter</first><last>Anick</last></author>
+      <author id="peter-anick"><first>Peter</first><last>Anick</last></author>
       <pages>117–121</pages>
       <url hash="697079a5">W11-1919</url>
       <bibkey>yang-etal-2011-machine</bibkey>
@@ -3528,7 +3528,7 @@
       <author><first>Veselin</first><last>Stoyanov</last></author>
       <author><first>Uday</first><last>Babbar</last></author>
       <author><first>Pracheer</first><last>Gupta</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>122–126</pages>
       <url hash="95012a63">W11-1920</url>
       <bibkey>stoyanov-etal-2011-reconciling</bibkey>
@@ -3556,10 +3556,10 @@
     <meta>
       <booktitle>Proceedings of the <fixed-case>SIGDIAL</fixed-case> 2011 Conference</booktitle>
       <url hash="737302f3">W11-20</url>
-      <editor><first>Joyce Y.</first><last>Chai</last></editor>
-      <editor><first>Johanna D.</first><last>Moore</last></editor>
-      <editor><first>Rebecca J.</first><last>Passonneau</last></editor>
-      <editor><first>David R.</first><last>Traum</last></editor>
+      <editor id="joyce-chai"><first>Joyce Y.</first><last>Chai</last></editor>
+      <editor id="johanna-d-moore"><first>Johanna D.</first><last>Moore</last></editor>
+      <editor id="rebecca-j-passonneau"><first>Rebecca J.</first><last>Passonneau</last></editor>
+      <editor id="david-traum"><first>David R.</first><last>Traum</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Portland, Oregon</address>
       <month>June</month>
@@ -3579,19 +3579,19 @@
     </paper>
     <paper id="2">
       <title>Spoken Dialog Challenge 2010: Comparison of Live and Control Test Results</title>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <author><first>Susanne</first><last>Burger</last></author>
       <author><first>Alistair</first><last>Conkie</last></author>
-      <author><first>Helen</first><last>Hastie</last></author>
+      <author id="helen-hastie"><first>Helen</first><last>Hastie</last></author>
       <author><first>Simon</first><last>Keizer</last></author>
       <author><first>Oliver</first><last>Lemon</last></author>
       <author><first>Nicolas</first><last>Merigaud</last></author>
       <author><first>Gabriel</first><last>Parent</last></author>
       <author><first>Gabriel</first><last>Schubiner</last></author>
       <author><first>Blaise</first><last>Thomson</last></author>
-      <author><first>Jason D.</first><last>Williams</last></author>
+      <author id="jason-d-williams"><first>Jason D.</first><last>Williams</last></author>
       <author><first>Kai</first><last>Yu</last></author>
-      <author><first>Steve</first><last>Young</last></author>
+      <author id="steve-young"><first>Steve</first><last>Young</last></author>
       <author><first>Maxine</first><last>Eskenazi</last></author>
       <pages>2–7</pages>
       <url hash="b4d0a7e1">W11-2002</url>
@@ -3612,7 +3612,7 @@
       <author><first>Kazunori</first><last>Komatani</last></author>
       <author><first>Kyoko</first><last>Matsuyama</last></author>
       <author><first>Kotaro</first><last>Funakoshi</last></author>
-      <author><first>Hiroshi G.</first><last>Okuno</last></author>
+      <author id="hiroshi-g-okuno"><first>Hiroshi G.</first><last>Okuno</last></author>
       <pages>18–29</pages>
       <url hash="4fad4566">W11-2004</url>
       <bibkey>nakano-etal-2011-two</bibkey>
@@ -3636,10 +3636,10 @@
     </paper>
     <paper id="7">
       <title>The Impact of Task-Oriented Feature Sets on <fixed-case>HMM</fixed-case>s for Dialogue Modeling</title>
-      <author><first>Kristy</first><last>Boyer</last></author>
-      <author><first>Eun Young</first><last>Ha</last></author>
-      <author><first>Robert</first><last>Phillips</last></author>
-      <author><first>James</first><last>Lester</last></author>
+      <author id="kristy-boyer"><first>Kristy</first><last>Boyer</last></author>
+      <author id="eun-young-ha"><first>Eun Young</first><last>Ha</last></author>
+      <author id="robert-phillips"><first>Robert</first><last>Phillips</last></author>
+      <author id="james-lester"><first>James</first><last>Lester</last></author>
       <pages>49–58</pages>
       <url hash="e157ce58">W11-2007</url>
       <bibkey>boyer-etal-2011-impact</bibkey>
@@ -3663,7 +3663,7 @@
     <paper id="10">
       <title>Giving instructions in virtual environments by corpus based selection</title>
       <author><first>Luciana</first><last>Benotti</last></author>
-      <author><first>Alexandre</first><last>Denis</last></author>
+      <author id="alexandre-denis"><first>Alexandre</first><last>Denis</last></author>
       <pages>68–77</pages>
       <url hash="f2647032">W11-2010</url>
       <bibkey>benotti-denis-2011-giving</bibkey>
@@ -3688,7 +3688,7 @@
     </paper>
     <paper id="13">
       <title>Multiparty Turn Taking in Situated Dialog: Study, Lessons, and Directions</title>
-      <author><first>Dan</first><last>Bohus</last></author>
+      <author id="dan-bohus"><first>Dan</first><last>Bohus</last></author>
       <author><first>Eric</first><last>Horvitz</last></author>
       <pages>98–109</pages>
       <url hash="668796dc">W11-2013</url>
@@ -3696,10 +3696,10 @@
     </paper>
     <paper id="14">
       <title>Stability and Accuracy in Incremental Speech Recognition</title>
-      <author><first>Ethan</first><last>Selfridge</last></author>
+      <author id="ethan-selfridge"><first>Ethan</first><last>Selfridge</last></author>
       <author><first>Iker</first><last>Arizmendi</last></author>
-      <author><first>Peter</first><last>Heeman</last></author>
-      <author><first>Jason</first><last>Williams</last></author>
+      <author id="peter-a-heeman"><first>Peter</first><last>Heeman</last></author>
+      <author id="jason-d-williams"><first>Jason</first><last>Williams</last></author>
       <pages>110–119</pages>
       <url hash="e9883fda">W11-2014</url>
       <bibkey>selfridge-etal-2011-stability</bibkey>
@@ -3714,15 +3714,15 @@
     </paper>
     <paper id="16">
       <title>An Empirical Evaluation of a Statistical Dialog System in Public Use</title>
-      <author><first>Jason</first><last>Williams</last></author>
+      <author id="jason-d-williams"><first>Jason</first><last>Williams</last></author>
       <pages>130–141</pages>
       <url hash="94e38d8e">W11-2016</url>
       <bibkey>williams-2011-empirical</bibkey>
     </paper>
     <paper id="17">
       <title>“The day after the day after tomorrow?” A machine learning approach to adaptive temporal expression generation: training and evaluation with real users</title>
-      <author><first>Srinivasan</first><last>Janarthanam</last></author>
-      <author><first>Helen</first><last>Hastie</last></author>
+      <author id="srinivasan-janarthanam"><first>Srinivasan</first><last>Janarthanam</last></author>
+      <author id="helen-hastie"><first>Helen</first><last>Hastie</last></author>
       <author><first>Oliver</first><last>Lemon</last></author>
       <author><first>Xingkun</first><last>Liu</last></author>
       <pages>142–151</pages>
@@ -3732,14 +3732,14 @@
     <paper id="18">
       <title>Detecting Levels of Interest from Spoken Dialog with Multistream Prediction Feedback and Similarity Based Hierarchical Fusion Learning</title>
       <author><first>William Yang</first><last>Wang</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
       <pages>152–161</pages>
       <url hash="b73401da">W11-2018</url>
       <bibkey>wang-hirschberg-2011-detecting</bibkey>
     </paper>
     <paper id="19">
       <title>Exploring User Satisfaction in a Tutorial Dialogue System</title>
-      <author><first>Myroslava O.</first><last>Dzikovska</last></author>
+      <author id="myroslava-o-dzikovska"><first>Myroslava O.</first><last>Dzikovska</last></author>
       <author><first>Johanna D.</first><last>Moore</last></author>
       <author><first>Natalie</first><last>Steinhauser</last></author>
       <author><first>Gwendolyn</first><last>Campbell</last></author>
@@ -3759,7 +3759,7 @@
     <paper id="21">
       <title>Topics as Contextual Indicators for Word Choice in <fixed-case>SMS</fixed-case> Conversations</title>
       <author><first>Ute</first><last>Winter</last></author>
-      <author><first>Roni</first><last>Ben-Aharon</last></author>
+      <author id="roni-ben-aharon"><first>Roni</first><last>Ben-Aharon</last></author>
       <author><first>Daniel</first><last>Chernobrov</last></author>
       <author><first>Ron</first><last>Hecht</last></author>
       <pages>185–193</pages>
@@ -3769,7 +3769,7 @@
     <paper id="22">
       <title>Multilingual Annotation and Disambiguation of Discourse Connectives for Machine Translation</title>
       <author><first>Thomas</first><last>Meyer</last></author>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <author><first>Sandrine</first><last>Zufferey</last></author>
       <author><first>Bruno</first><last>Cartoni</last></author>
       <pages>194–203</pages>
@@ -3778,9 +3778,9 @@
     </paper>
     <paper id="23">
       <title>Commitments to Preferences in Dialogue</title>
-      <author><first>Anais</first><last>Cadilhac</last></author>
-      <author><first>Nicholas</first><last>Asher</last></author>
-      <author><first>Farah</first><last>Benamara</last></author>
+      <author id="anais-cadilhac"><first>Anais</first><last>Cadilhac</last></author>
+      <author id="nicholas-asher"><first>Nicholas</first><last>Asher</last></author>
+      <author id="farah-benamara"><first>Farah</first><last>Benamara</last></author>
       <author><first>Alex</first><last>Lascarides</last></author>
       <pages>204–215</pages>
       <url hash="7363600f">W11-2023</url>
@@ -3789,8 +3789,8 @@
     </paper>
     <paper id="24">
       <title>Using Performance Trajectories to Analyze the Immediate Impact of User State Misclassification in an Adaptive Spoken Dialogue System</title>
-      <author><first>Kate</first><last>Forbes-Riley</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="kate-forbes-riley"><first>Kate</first><last>Forbes-Riley</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <pages>216–226</pages>
       <url hash="a80957f1">W11-2024</url>
       <bibkey>forbes-riley-litman-2011-using</bibkey>
@@ -3798,7 +3798,7 @@
     <paper id="25">
       <title>Comparing Triggering Policies for Social Behaviors</title>
       <author><first>Rohit</first><last>Kumar</last></author>
-      <author><first>Carolyn</first><last>Rosé</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rosé</last></author>
       <pages>227–238</pages>
       <url hash="854285bb">W11-2025</url>
       <bibkey>kumar-rose-2011-comparing</bibkey>
@@ -3816,7 +3816,7 @@
       <author><first>Rebecca J.</first><last>Passonneau</last></author>
       <author><first>Susan L.</first><last>Epstein</last></author>
       <author><first>Tiziana</first><last>Ligorio</last></author>
-      <author><first>Joshua</first><last>Gordon</last></author>
+      <author id="joshua-b-gordon"><first>Joshua</first><last>Gordon</last></author>
       <pages>248–258</pages>
       <url hash="1feffc8e">W11-2027</url>
       <bibkey>passonneau-etal-2011-embedded</bibkey>
@@ -3827,7 +3827,7 @@
       <author><first>Etsuo</first><last>Mizukami</last></author>
       <author><first>Yoshinori</first><last>Shiga</last></author>
       <author><first>Shinichi</first><last>Kawamoto</last></author>
-      <author><first>Hisashi</first><last>Kawai</last></author>
+      <author id="hisashi-kawai"><first>Hisashi</first><last>Kawai</last></author>
       <author><first>Satoshi</first><last>Nakamura</last></author>
       <pages>259–265</pages>
       <url hash="7b8c06d6">W11-2028</url>
@@ -3835,7 +3835,7 @@
     </paper>
     <paper id="29">
       <title>Learning to Balance Grounding Rationales for Dialogue Systems</title>
-      <author><first>Joshua</first><last>Gordon</last></author>
+      <author id="joshua-b-gordon"><first>Joshua</first><last>Gordon</last></author>
       <author><first>Rebecca J.</first><last>Passonneau</last></author>
       <author><first>Susan L.</first><last>Epstein</last></author>
       <pages>266–271</pages>
@@ -3901,7 +3901,7 @@
     <paper id="36">
       <title>Examining the Impacts of Dialogue Content and System Automation on Affect Models in a Spoken Tutorial Dialogue System</title>
       <author><first>Joanna</first><last>Drummond</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <pages>312–318</pages>
       <url hash="63430c3f">W11-2036</url>
       <bibkey>drummond-litman-2011-examining</bibkey>
@@ -3925,8 +3925,8 @@
     </paper>
     <paper id="39">
       <title>An Incremental Architecture for the Semantic Annotation of Dialogue Corpora with High-Level Structures. A case of study for the <fixed-case>MEDIA</fixed-case> corpus.</title>
-      <author><first>Lina Maria</first><last>Rojas-Barahona</last></author>
-      <author><first>Matthieu</first><last>Quignard</last></author>
+      <author id="lina-m-rojas-barahona"><first>Lina Maria</first><last>Rojas-Barahona</last></author>
+      <author id="matthieu-quignard"><first>Matthieu</first><last>Quignard</last></author>
       <pages>332–334</pages>
       <url hash="7f53230f">W11-2039</url>
       <bibkey>rojas-barahona-quignard-2011-incremental</bibkey>
@@ -3941,7 +3941,7 @@
     </paper>
     <paper id="41">
       <title>Beetle <fixed-case>II</fixed-case>: an adaptable tutorial dialogue system</title>
-      <author><first>Myroslava</first><last>Dzikovska</last></author>
+      <author id="myroslava-o-dzikovska"><first>Myroslava</first><last>Dzikovska</last></author>
       <author><first>Amy</first><last>Isard</last></author>
       <author><first>Peter</first><last>Bell</last></author>
       <author><first>Johanna</first><last>Moore</last></author>
@@ -3968,8 +3968,8 @@
       <title><fixed-case>POMY</fixed-case>: A Conversational Virtual Environment for Language Learning in <fixed-case>POSTECH</fixed-case></title>
       <author><first>Hyungjong</first><last>Noh</last></author>
       <author><first>Kyusong</first><last>Lee</last></author>
-      <author><first>Sungjin</first><last>Lee</last></author>
-      <author><first>Gary Geunbae</first><last>Lee</last></author>
+      <author id="sungjin-lee"><first>Sungjin</first><last>Lee</last></author>
+      <author id="gary-geunbae-lee"><first>Gary Geunbae</first><last>Lee</last></author>
       <pages>344–346</pages>
       <url hash="aad2557c">W11-2043</url>
       <bibkey>noh-etal-2011-pomy</bibkey>
@@ -3986,7 +3986,7 @@
     </paper>
     <paper id="45">
       <title>A Just-in-Time Document Retrieval System for Dialogues or Monologues</title>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <author><first>Majid</first><last>Yazdani</last></author>
       <author><first>Alexandre</first><last>Nanchen</last></author>
       <author><first>Philip N.</first><last>Garner</last></author>
@@ -4002,7 +4002,7 @@
       <editor><first>Chris</first><last>Callison-Burch</last></editor>
       <editor><first>Philipp</first><last>Koehn</last></editor>
       <editor><first>Christof</first><last>Monz</last></editor>
-      <editor><first>Omar F.</first><last>Zaidan</last></editor>
+      <editor id="omar-zaidan"><first>Omar F.</first><last>Zaidan</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Edinburgh, Scotland</address>
       <month>July</month>
@@ -4015,7 +4015,7 @@
     </frontmatter>
     <paper id="1">
       <title>A Grain of Salt for the <fixed-case>WMT</fixed-case> Manual Evaluation</title>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <author><first>Miloš</first><last>Ercegovčević</last></author>
       <author><first>Martin</first><last>Popel</last></author>
       <author><first>Omar</first><last>Zaidan</last></author>
@@ -4028,9 +4028,9 @@
       <author><first>David</first><last>Talbot</last></author>
       <author><first>Hideto</first><last>Kazawa</last></author>
       <author><first>Hiroshi</first><last>Ichikawa</last></author>
-      <author><first>Jason</first><last>Katz-Brown</last></author>
+      <author id="jason-katz-brown"><first>Jason</first><last>Katz-Brown</last></author>
       <author><first>Masakazu</first><last>Seno</last></author>
-      <author><first>Franz</first><last>Och</last></author>
+      <author id="franz-josef-och"><first>Franz</first><last>Och</last></author>
       <pages>12–21</pages>
       <url hash="564cff51">W11-2102</url>
       <bibkey>talbot-etal-2011-lightweight</bibkey>
@@ -4048,7 +4048,7 @@
     <paper id="4">
       <title>Evaluate with Confidence Estimation: Machine ranking of translation outputs using grammatical features</title>
       <author><first>Eleftherios</first><last>Avramidis</last></author>
-      <author><first>Maja</first><last>Popovic</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popovic</last></author>
       <author><first>David</first><last>Vilar</last></author>
       <author><first>Aljoscha</first><last>Burchardt</last></author>
       <pages>65–70</pages>
@@ -4075,7 +4075,7 @@
     <paper id="7">
       <title>Meteor 1.3: Automatic Metric for Reliable Optimization and Evaluation of Machine Translation Systems</title>
       <author><first>Michael</first><last>Denkowski</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <pages>85–91</pages>
       <url hash="d42d3220">W11-2107</url>
       <bibkey>denkowski-lavie-2011-meteor</bibkey>
@@ -4083,14 +4083,14 @@
     <paper id="8">
       <title>Approximating a Deep-Syntactic Metric for <fixed-case>MT</fixed-case> Evaluation and Tuning</title>
       <author><first>Matouš</first><last>Macháček</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>92–98</pages>
       <url hash="7e2df500">W11-2108</url>
       <bibkey>machacek-bojar-2011-approximating</bibkey>
     </paper>
     <paper id="9">
       <title>Evaluation without references: <fixed-case>IBM</fixed-case>1 scores as evaluation metrics</title>
-      <author><first>Maja</first><last>Popović</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
       <author><first>David</first><last>Vilar</last></author>
       <author><first>Eleftherios</first><last>Avramidis</last></author>
       <author><first>Aljoscha</first><last>Burchardt</last></author>
@@ -4100,7 +4100,7 @@
     </paper>
     <paper id="10">
       <title>Morphemes and <fixed-case>POS</fixed-case> tags for n-gram based evaluation metrics</title>
-      <author><first>Maja</first><last>Popović</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
       <pages>104–107</pages>
       <url hash="d0105223">W11-2110</url>
       <bibkey>popovic-2011-morphemes</bibkey>
@@ -4108,9 +4108,9 @@
     <paper id="11">
       <title><fixed-case>E</fixed-case>-rating Machine Translation</title>
       <author><first>Kristen</first><last>Parton</last></author>
-      <author><first>Joel</first><last>Tetreault</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
       <author><first>Nitin</first><last>Madnani</last></author>
-      <author><first>Martin</first><last>Chodorow</last></author>
+      <author id="martin-chodorow"><first>Martin</first><last>Chodorow</last></author>
       <pages>108–115</pages>
       <url hash="16311ee6">W11-2111</url>
       <bibkey>parton-etal-2011-e</bibkey>
@@ -4127,7 +4127,7 @@
     <paper id="13">
       <title>Regression and Ranking based Optimisation for Sentence Level <fixed-case>MT</fixed-case> Evaluation</title>
       <author><first>Xingyi</first><last>Song</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>123–129</pages>
       <url hash="a359bfcc">W11-2113</url>
       <bibkey>song-cohn-2011-regression</bibkey>
@@ -4148,8 +4148,8 @@
     </paper>
     <paper id="16">
       <title>The <fixed-case>UPV</fixed-case>-<fixed-case>PRHLT</fixed-case> combination system for <fixed-case>WMT</fixed-case> 2011</title>
-      <author><first>Jesús</first><last>González-Rubio</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="jesus-gonzalez-rubio"><first>Jesús</first><last>González-Rubio</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <pages>140–144</pages>
       <url hash="27f29645">W11-2116</url>
       <bibkey>gonzalez-rubio-casacuberta-2011-upv</bibkey>
@@ -4157,7 +4157,7 @@
     <paper id="17">
       <title><fixed-case>CMU</fixed-case> System Combination in <fixed-case>WMT</fixed-case> 2011</title>
       <author><first>Kenneth</first><last>Heafield</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <pages>145–151</pages>
       <url hash="fcd730d9">W11-2117</url>
       <bibkey>heafield-lavie-2011-cmu</bibkey>
@@ -4166,17 +4166,17 @@
       <title>The <fixed-case>RWTH</fixed-case> System Combination System for <fixed-case>WMT</fixed-case> 2011</title>
       <author><first>Gregor</first><last>Leusch</last></author>
       <author><first>Markus</first><last>Freitag</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>152–158</pages>
       <url hash="ca1be9c7">W11-2118</url>
       <bibkey>leusch-etal-2011-rwth</bibkey>
     </paper>
     <paper id="19">
       <title>Expected <fixed-case>BLEU</fixed-case> Training for Graphs: <fixed-case>BBN</fixed-case> System Description for <fixed-case>WMT</fixed-case>11 System Combination Task</title>
-      <author><first>Antti-Veikko</first><last>Rosti</last></author>
+      <author id="antti-veikko-rosti"><first>Antti-Veikko</first><last>Rosti</last></author>
       <author><first>Bing</first><last>Zhang</last></author>
       <author><first>Spyros</first><last>Matsoukas</last></author>
-      <author><first>Richard</first><last>Schwartz</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
       <pages>159–165</pages>
       <url hash="c50cee8f">W11-2119</url>
       <bibkey>rosti-etal-2011-expected</bibkey>
@@ -4217,8 +4217,8 @@
       <title>Wider Context by Using Bilingual Language Models in Machine Translation</title>
       <author><first>Jan</first><last>Niehues</last></author>
       <author><first>Teresa</first><last>Herrmann</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>198–206</pages>
       <url hash="6b214e3d">W11-2124</url>
       <bibkey>niehues-etal-2011-wider</bibkey>
@@ -4226,7 +4226,7 @@
     <paper id="25">
       <title>A Minimally Supervised Approach for Detecting and Ranking Document Translation Pairs</title>
       <author><first>Kriste</first><last>Krstovski</last></author>
-      <author><first>David A.</first><last>Smith</last></author>
+      <author id="david-a-smith"><first>David A.</first><last>Smith</last></author>
       <pages>207–216</pages>
       <url hash="7ea03f60">W11-2125</url>
       <bibkey>krstovski-smith-2011-minimally</bibkey>
@@ -4243,7 +4243,7 @@
       <title>Fuzzy Syntactic Reordering for Phrase-based Statistical Machine Translation</title>
       <author><first>Jacob</first><last>Andreas</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>227–236</pages>
       <url hash="edd9635e">W11-2127</url>
       <bibkey>andreas-etal-2011-fuzzy</bibkey>
@@ -4276,7 +4276,7 @@
     </paper>
     <paper id="31">
       <title>Instance Selection for Machine Translation using Feature Decay Algorithms</title>
-      <author><first>Ergun</first><last>Biçici</last></author>
+      <author id="ergun-bicici"><first>Ergun</first><last>Biçici</last></author>
       <author><first>Deniz</first><last>Yuret</last></author>
       <pages>272–283</pages>
       <url hash="64058183">W11-2131</url>
@@ -4286,7 +4286,7 @@
       <title>Investigations on Translation Model Adaptation Using Monolingual Data</title>
       <author><first>Patrik</first><last>Lambert</last></author>
       <author><first>Holger</first><last>Schwenk</last></author>
-      <author><first>Christophe</first><last>Servan</last></author>
+      <author id="christophe-servan"><first>Christophe</first><last>Servan</last></author>
       <author><first>Sadaf</first><last>Abdul-Rauf</last></author>
       <pages>284–293</pages>
       <url hash="18e5b5bc">W11-2132</url>
@@ -4311,16 +4311,16 @@
     <paper id="35">
       <title><fixed-case>LIMSI</fixed-case> @ <fixed-case>WMT</fixed-case>11</title>
       <author><first>Alexandre</first><last>Allauzen</last></author>
-      <author><first>Hélène</first><last>Bonneau-Maynard</last></author>
-      <author><first>Hai-Son</first><last>Le</last></author>
+      <author id="helene-bonneau-maynard"><first>Hélène</first><last>Bonneau-Maynard</last></author>
+      <author id="hai-son-le"><first>Hai-Son</first><last>Le</last></author>
       <author><first>Aurélien</first><last>Max</last></author>
       <author><first>Guillaume</first><last>Wisniewski</last></author>
       <author><first>François</first><last>Yvon</last></author>
-      <author><first>Gilles</first><last>Adda</last></author>
-      <author><first>Josep Maria</first><last>Crego</last></author>
+      <author id="gilles-adda"><first>Gilles</first><last>Adda</last></author>
+      <author id="josep-m-crego"><first>Josep Maria</first><last>Crego</last></author>
       <author><first>Adrien</first><last>Lardilleux</last></author>
       <author><first>Thomas</first><last>Lavergne</last></author>
-      <author><first>Artem</first><last>Sokolov</last></author>
+      <author id="artem-sokolov"><first>Artem</first><last>Sokolov</last></author>
       <pages>309–315</pages>
       <url hash="d74a953c">W11-2135</url>
       <bibkey>allauzen-etal-2011-limsi</bibkey>
@@ -4336,7 +4336,7 @@
     </paper>
     <paper id="37">
       <title><fixed-case>R</fixed-case>eg<fixed-case>MT</fixed-case> System for Machine Translation, System Combination, and Evaluation</title>
-      <author><first>Ergun</first><last>Biçici</last></author>
+      <author id="ergun-bicici"><first>Ergun</first><last>Biçici</last></author>
       <author><first>Deniz</first><last>Yuret</last></author>
       <pages>323–329</pages>
       <url hash="031c3e70">W11-2137</url>
@@ -4344,7 +4344,7 @@
     </paper>
     <paper id="38">
       <title>Improving Translation Model by Monolingual Data</title>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <author><first>Aleš</first><last>Tamchyna</last></author>
       <pages>330–336</pages>
       <url hash="966f0d51">W11-2138</url>
@@ -4352,10 +4352,10 @@
     </paper>
     <paper id="39">
       <title>The <fixed-case>CMU</fixed-case>-<fixed-case>ARK</fixed-case> <fixed-case>G</fixed-case>erman-<fixed-case>E</fixed-case>nglish Translation System</title>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <author><first>Kevin</first><last>Gimpel</last></author>
-      <author><first>Jonathan H.</first><last>Clark</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="jonathan-h-clark"><first>Jonathan H.</first><last>Clark</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>337–343</pages>
       <url hash="211b9ead">W11-2139</url>
       <bibkey>dyer-etal-2011-cmu</bibkey>
@@ -4383,14 +4383,14 @@
       <author><first>Gregor</first><last>Leusch</last></author>
       <author><first>Joern</first><last>Wuebker</last></author>
       <author><first>Stephan</first><last>Peitz</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <author><first>Teresa</first><last>Herrmann</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <author><first>Alexandre</first><last>Allauzen</last></author>
-      <author><first>Gilles</first><last>Adda</last></author>
-      <author><first>Josep Maria</first><last>Crego</last></author>
-      <author><first>Bianka</first><last>Buschbeck</last></author>
+      <author id="gilles-adda"><first>Gilles</first><last>Adda</last></author>
+      <author id="josep-m-crego"><first>Josep Maria</first><last>Crego</last></author>
+      <author id="bianka-buschbeck"><first>Bianka</first><last>Buschbeck</last></author>
       <author><first>Tonio</first><last>Wandmacher</last></author>
       <author><first>Jean</first><last>Senellart</last></author>
       <pages>358–364</pages>
@@ -4400,7 +4400,7 @@
     <paper id="43">
       <title><fixed-case>CMU</fixed-case> Syntax-Based Machine Translation at <fixed-case>WMT</fixed-case> 2011</title>
       <author><first>Greg</first><last>Hanneman</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <pages>365–371</pages>
       <url hash="a9844778">W11-2143</url>
       <bibkey>hanneman-lavie-2011-cmu</bibkey>
@@ -4408,7 +4408,7 @@
     <paper id="44">
       <title>The <fixed-case>U</fixed-case>ppsala-<fixed-case>FBK</fixed-case> systems at <fixed-case>WMT</fixed-case> 2011</title>
       <author><first>Christian</first><last>Hardmeier</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <author><first>Markus</first><last>Saers</last></author>
       <author><first>Marcello</first><last>Federico</last></author>
       <author><last>Mathur</last><first>Prashant</first></author>
@@ -4421,7 +4421,7 @@
       <author><first>Teresa</first><last>Herrmann</last></author>
       <author><first>Mohammed</first><last>Mediani</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>379–385</pages>
       <url hash="f7f17f30">W11-2145</url>
       <bibkey>herrmann-etal-2011-karlsruhe</bibkey>
@@ -4432,7 +4432,7 @@
       <author><first>Nguyen</first><last>Bach</last></author>
       <author><first>Qin</first><last>Gao</last></author>
       <author><first>Vamshi</first><last>Ambati</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>386–392</pages>
       <url hash="7f968e61">W11-2146</url>
       <bibkey>hewavitharana-etal-2011-cmu</bibkey>
@@ -4469,7 +4469,7 @@
       <author><first>Arnaud</first><last>Dagnelies</last></author>
       <author><first>Saab</first><last>Mansour</last></author>
       <author><first>Gregor</first><last>Leusch</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>405–412</pages>
       <url hash="59e6ad61">W11-2149</url>
       <bibkey>huck-etal-2011-rwth</bibkey>
@@ -4477,7 +4477,7 @@
     <paper id="50">
       <title><fixed-case>ILLC</fixed-case>-<fixed-case>U</fixed-case>v<fixed-case>A</fixed-case> translation system for <fixed-case>EMNLP</fixed-case>-<fixed-case>WMT</fixed-case> 2011</title>
       <author><first>Maxim</first><last>Khalilov</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <pages>413–419</pages>
       <url hash="802f753e">W11-2150</url>
       <bibkey>khalilov-simaan-2011-illc</bibkey>
@@ -4485,7 +4485,7 @@
     <paper id="51">
       <title><fixed-case>UPM</fixed-case> system for the translation task</title>
       <author><first>Verónica</first><last>López-Ludeña</last></author>
-      <author><first>Rubén</first><last>San-Segundo</last></author>
+      <author id="ruben-san-segundo"><first>Rubén</first><last>San-Segundo</last></author>
       <pages>420–425</pages>
       <url hash="70cba4fb">W11-2151</url>
       <bibkey>lopez-ludena-san-segundo-2011-upm</bibkey>
@@ -4495,7 +4495,7 @@
       <author><first>David</first><last>Mareček</last></author>
       <author><first>Rudolf</first><last>Rosa</last></author>
       <author><first>Petra</first><last>Galuščáková</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>426–432</pages>
       <url hash="79c937a4">W11-2152</url>
       <bibkey>marecek-etal-2011-two</bibkey>
@@ -4505,7 +4505,7 @@
       <author><first>Martin</first><last>Popel</last></author>
       <author><first>David</first><last>Mareček</last></author>
       <author><first>Nathan</first><last>Green</last></author>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
       <pages>433–439</pages>
       <url hash="27ec3f05">W11-2153</url>
       <bibkey>popel-etal-2011-influence</bibkey>
@@ -4513,12 +4513,12 @@
     <paper id="54">
       <title>The <fixed-case>LIGA</fixed-case> (<fixed-case>LIG</fixed-case>/<fixed-case>LIA</fixed-case>) Machine Translation System for <fixed-case>WMT</fixed-case> 2011</title>
       <author><first>Marion</first><last>Potet</last></author>
-      <author><first>Raphaël</first><last>Rubino</last></author>
+      <author id="raphael-rubino"><first>Raphaël</first><last>Rubino</last></author>
       <author><first>Benjamin</first><last>Lecouteux</last></author>
       <author><first>Stéphane</first><last>Huet</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
-      <author><first>Hervé</first><last>Blanchon</last></author>
-      <author><first>Fabrice</first><last>Lefèvre</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
+      <author id="herve-blanchon"><first>Hervé</first><last>Blanchon</last></author>
+      <author id="fabrice-lefevre"><first>Fabrice</first><last>Lefèvre</last></author>
       <pages>440–446</pages>
       <url hash="51990cbb">W11-2154</url>
       <bibkey>potet-etal-2011-liga</bibkey>
@@ -4526,22 +4526,22 @@
     <paper id="55">
       <title>Factored Translation with Unsupervised Word Clusters</title>
       <author><first>Christian</first><last>Rishøj</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>447–451</pages>
       <url hash="39dc7ca1">W11-2155</url>
       <bibkey>rishoj-sogaard-2011-factored</bibkey>
     </paper>
     <paper id="56">
       <title>The <fixed-case>BM</fixed-case>-<fixed-case>I</fixed-case>2<fixed-case>R</fixed-case> <fixed-case>H</fixed-case>aitian-Créole-to-<fixed-case>E</fixed-case>nglish translation system description for the <fixed-case>WMT</fixed-case> 2011 evaluation campaign</title>
-      <author><first>Marta</first><last>R. Costa-jussà</last></author>
-      <author><first>Rafael E.</first><last>Banchs</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta</first><last>R. Costa-jussà</last></author>
+      <author id="rafael-e-banchs"><first>Rafael E.</first><last>Banchs</last></author>
       <pages>452–456</pages>
       <url hash="347083ed">W11-2156</url>
       <bibkey>r-costa-jussa-banchs-2011-bm</bibkey>
     </paper>
     <paper id="57">
       <title>The <fixed-case>U</fixed-case>niversitat d’Alacant hybrid machine translation system for <fixed-case>WMT</fixed-case> 2011</title>
-      <author><first>Víctor M.</first><last>Sánchez-Cartagena</last></author>
+      <author id="victor-m-sanchez-cartagena"><first>Víctor M.</first><last>Sánchez-Cartagena</last></author>
       <author><first>Felipe</first><last>Sánchez-Martínez</last></author>
       <author><first>Juan Antonio</first><last>Pérez-Ortiz</last></author>
       <pages>457–463</pages>
@@ -4553,7 +4553,7 @@
       <author><first>Holger</first><last>Schwenk</last></author>
       <author><first>Patrik</first><last>Lambert</last></author>
       <author><first>Loïc</first><last>Barrault</last></author>
-      <author><first>Christophe</first><last>Servan</last></author>
+      <author id="christophe-servan"><first>Christophe</first><last>Servan</last></author>
       <author><first>Sadaf</first><last>Abdul-Rauf</last></author>
       <author><first>Haithem</first><last>Afli</last></author>
       <author><first>Kashif</first><last>Shah</last></author>
@@ -4583,7 +4583,7 @@
       <title><fixed-case>DFKI</fixed-case> Hybrid Machine Translation System for <fixed-case>WMT</fixed-case> 2011 - On the Integration of <fixed-case>SMT</fixed-case> and <fixed-case>RBMT</fixed-case></title>
       <author><first>Jia</first><last>Xu</last></author>
       <author><first>Hans</first><last>Uszkoreit</last></author>
-      <author><first>Casey</first><last>Kennington</last></author>
+      <author id="casey-kennington"><first>Casey</first><last>Kennington</last></author>
       <author><first>David</first><last>Vilar</last></author>
       <author><first>Xiaojun</first><last>Zhang</last></author>
       <pages>485–489</pages>
@@ -4593,23 +4593,23 @@
     <paper id="62">
       <title><fixed-case>CEU</fixed-case>-<fixed-case>UPV</fixed-case> <fixed-case>E</fixed-case>nglish-<fixed-case>S</fixed-case>panish system for <fixed-case>WMT</fixed-case>11</title>
       <author><first>Francisco</first><last>Zamora-Martínez</last></author>
-      <author><first>Maria Jose</first><last>Castro-Bleda</last></author>
+      <author id="maria-jose-castro-bleda"><first>Maria Jose</first><last>Castro-Bleda</last></author>
       <pages>490–495</pages>
       <url hash="bac53093">W11-2162</url>
       <bibkey>zamora-martinez-castro-bleda-2011-ceu</bibkey>
     </paper>
     <paper id="63">
       <title>Hierarchical Phrase-Based <fixed-case>MT</fixed-case> at the <fixed-case>C</fixed-case>harles <fixed-case>U</fixed-case>niversity for the <fixed-case>WMT</fixed-case> 2011 Shared Task</title>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <pages>496–500</pages>
       <url hash="157f90e4">W11-2163</url>
       <bibkey>zeman-2011-hierarchical</bibkey>
     </paper>
     <paper id="64">
       <title>Crisis <fixed-case>MT</fixed-case>: Developing A Cookbook for <fixed-case>MT</fixed-case> in Crisis Situations</title>
-      <author><first>William</first><last>Lewis</last></author>
+      <author id="william-lewis"><first>William</first><last>Lewis</last></author>
       <author><first>Robert</first><last>Munro</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>501–511</pages>
       <url hash="47ca5a0e">W11-2164</url>
       <bibkey>lewis-etal-2011-crisis</bibkey>
@@ -4617,7 +4617,7 @@
     <paper id="65">
       <title>Generative Models of Monolingual and Bilingual Gappy Patterns</title>
       <author><first>Kevin</first><last>Gimpel</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>512–522</pages>
       <url hash="6eac27b5">W11-2165</url>
       <bibkey>gimpel-smith-2011-generative</bibkey>
@@ -4633,8 +4633,8 @@
     </paper>
     <paper id="67">
       <title><fixed-case>B</fixed-case>ayesian Extraction of Minimal <fixed-case>SCFG</fixed-case> Rules for Hierarchical Phrase-based Translation</title>
-      <author><first>Baskaran</first><last>Sankaran</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="baskaran-sankaran"><first>Baskaran</first><last>Sankaran</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <author><first>Anoop</first><last>Sarkar</last></author>
       <pages>533–541</pages>
       <url hash="e78599c1">W11-2167</url>
@@ -4644,7 +4644,7 @@
       <title>From n-gram-based to <fixed-case>CRF</fixed-case>-based Translation Models</title>
       <author><first>Thomas</first><last>Lavergne</last></author>
       <author><first>Alexandre</first><last>Allauzen</last></author>
-      <author><first>Josep Maria</first><last>Crego</last></author>
+      <author id="josep-m-crego"><first>Josep Maria</first><last>Crego</last></author>
       <author><first>François</first><last>Yvon</last></author>
       <pages>542–553</pages>
       <url hash="5e72ff66">W11-2168</url>
@@ -4671,7 +4671,7 @@
     </frontmatter>
     <paper id="1">
       <title>Unsupervised <fixed-case>NLP</fixed-case> and Human Language Acquisition: Making Connections to Make Progress</title>
-      <author><first>Sharon</first><last>Goldwater</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
       <pages>1</pages>
       <url hash="e44fbbc6">W11-2201</url>
       <bibkey>goldwater-2011-unsupervised</bibkey>
@@ -4680,9 +4680,9 @@
       <title>Structured Databases of Named Entities from <fixed-case>B</fixed-case>ayesian Nonparametrics</title>
       <author><first>Jacob</first><last>Eisenstein</last></author>
       <author><first>Tae</first><last>Yano</last></author>
-      <author><first>William</first><last>Cohen</last></author>
-      <author><first>Noah</first><last>Smith</last></author>
-      <author><first>Eric</first><last>Xing</last></author>
+      <author id="william-cohen"><first>William</first><last>Cohen</last></author>
+      <author id="noah-a-smith"><first>Noah</first><last>Smith</last></author>
+      <author id="eric-xing"><first>Eric</first><last>Xing</last></author>
       <pages>2–12</pages>
       <url hash="e10850ed">W11-2202</url>
       <bibkey>eisenstein-etal-2011-structured</bibkey>
@@ -4711,7 +4711,7 @@
     <paper id="6">
       <title>Unsupervised Language-Independent Name Translation Mining from <fixed-case>W</fixed-case>ikipedia Infoboxes</title>
       <author><first>Wen-Pin</first><last>Lin</last></author>
-      <author><first>Matthew</first><last>Snover</last></author>
+      <author id="matthew-snover"><first>Matthew</first><last>Snover</last></author>
       <author><first>Heng</first><last>Ji</last></author>
       <pages>43–52</pages>
       <url hash="ee304461">W11-2206</url>
@@ -4730,9 +4730,9 @@
     <paper id="8">
       <title>Unsupervised Bilingual <fixed-case>POS</fixed-case> Tagging with <fixed-case>M</fixed-case>arkov Random Fields</title>
       <author><first>Desai</first><last>Chen</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Shay</first><last>Cohen</last></author>
-      <author><first>Noah</first><last>Smith</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="shay-b-cohen"><first>Shay</first><last>Cohen</last></author>
+      <author id="noah-a-smith"><first>Noah</first><last>Smith</last></author>
       <pages>64–71</pages>
       <url hash="3f554501">W11-2208</url>
       <bibkey>chen-etal-2011-unsupervised</bibkey>
@@ -4743,7 +4743,7 @@
       <author><first>Boris</first><last>Detienne</last></author>
       <author><first>Stéphane</first><last>Huet</last></author>
       <author><first>Dominique</first><last>Quadri</last></author>
-      <author><first>Fabrice</first><last>Lefèvre</last></author>
+      <author id="fabrice-lefevre"><first>Fabrice</first><last>Lefèvre</last></author>
       <pages>72–81</pages>
       <url hash="9c93939b">W11-2209</url>
       <bibkey>camelin-etal-2011-unsupervised</bibkey>
@@ -4762,7 +4762,7 @@
       <author><first>Matthias</first><last>Huck</last></author>
       <author><first>David</first><last>Vilar</last></author>
       <author><first>Daniel</first><last>Stein</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>91–96</pages>
       <url hash="03e8d1db">W11-2211</url>
       <bibkey>huck-etal-2011-lightly</bibkey>
@@ -4770,7 +4770,7 @@
     <paper id="12">
       <title>Unsupervised Alignment for Segmental-based Language Understanding</title>
       <author><first>Stéphane</first><last>Huet</last></author>
-      <author><first>Fabrice</first><last>Lefèvre</last></author>
+      <author id="fabrice-lefevre"><first>Fabrice</first><last>Lefèvre</last></author>
       <pages>97–104</pages>
       <url hash="f3fda0b3">W11-2212</url>
       <bibkey>huet-lefevre-2011-unsupervised</bibkey>
@@ -4831,7 +4831,7 @@
       <title>Towards technology-assisted co-construction with communication partners</title>
       <author><first>Brian</first><last>Roark</last></author>
       <author><first>Andrew</first><last>Fowler</last></author>
-      <author><first>Richard</first><last>Sproat</last></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last></author>
       <author><first>Christopher</first><last>Gibbons</last></author>
       <author><first>Melanie</first><last>Fried-Oken</last></author>
       <pages>22–31</pages>
@@ -4848,7 +4848,7 @@
     </paper>
     <paper id="5">
       <title>Asynchronous fixed-grid scanning with dynamic codes</title>
-      <author><first>Russ</first><last>Beckley</last></author>
+      <author id="russell-beckley"><first>Russ</first><last>Beckley</last></author>
       <author><first>Brian</first><last>Roark</last></author>
       <pages>43–51</pages>
       <url hash="0200b789">W11-2305</url>
@@ -4858,10 +4858,10 @@
       <title>Improving the Accessibility of Line Graphs in Multimodal Documents</title>
       <author><first>Charles F.</first><last>Greenbacker</last></author>
       <author><first>Peng</first><last>Wu</last></author>
-      <author><first>Sandra</first><last>Carberry</last></author>
-      <author><first>Kathleen F.</first><last>McCoy</last></author>
+      <author id="sandra-carberry"><first>Sandra</first><last>Carberry</last></author>
+      <author id="kathleen-f-mccoy"><first>Kathleen F.</first><last>McCoy</last></author>
       <author><first>Stephanie</first><last>Elzer</last></author>
-      <author><first>David D.</first><last>McDonald</last></author>
+      <author id="david-d-mcdonald"><first>David D.</first><last>McDonald</last></author>
       <author><first>Daniel</first><last>Chester</last></author>
       <author><first>Seniz</first><last>Demir</last></author>
       <pages>52–62</pages>
@@ -4874,11 +4874,11 @@
       <author><first>Badri</first><last>Narayan</last></author>
       <author><first>Nagarajan</first><last>Madasamy</last></author>
       <author><first>Ashwin</first><last>Bellur</last></author>
-      <author><first>Raghava</first><last>Krishnan</last></author>
+      <author id="raghava-krishnan"><first>Raghava</first><last>Krishnan</last></author>
       <author><first>Kasthuri</first><last>G.</last></author>
       <author><first>Vinodh M.</first><last>Vishwanath</last></author>
       <author><first>Kishore</first><last>Prahallad</last></author>
-      <author><first>Hema A.</first><last>Murthy</last></author>
+      <author id="hema-a-murthy"><first>Hema A.</first><last>Murthy</last></author>
       <pages>63–72</pages>
       <url hash="5ed88338">W11-2307</url>
       <bibkey>kurian-etal-2011-indian</bibkey>
@@ -4886,7 +4886,7 @@
     <paper id="8">
       <title><fixed-case>READ</fixed-case>–<fixed-case>IT</fixed-case>: Assessing Readability of <fixed-case>I</fixed-case>talian Texts with a View to Text Simplification</title>
       <author><first>Felice</first><last>Dell’Orletta</last></author>
-      <author><first>Simonetta</first><last>Montemagni</last></author>
+      <author id="simonetta-montemagni"><first>Simonetta</first><last>Montemagni</last></author>
       <author><first>Giulia</first><last>Venturi</last></author>
       <pages>73–83</pages>
       <url hash="edc9ddc6">W11-2308</url>
@@ -4895,9 +4895,9 @@
     <paper id="9">
       <title>Source Language Categorization for improving a Speech into Sign Language Translation System</title>
       <author><first>Verónica</first><last>López-Ludeña</last></author>
-      <author><first>Rubén</first><last>San-Segundo</last></author>
+      <author id="ruben-san-segundo"><first>Rubén</first><last>San-Segundo</last></author>
       <author><first>Syaheerah</first><last>Lufti</last></author>
-      <author><first>Juan Manuel</first><last>Lucas-Cuesta</last></author>
+      <author id="juan-manuel-lucas-cuesta"><first>Juan Manuel</first><last>Lucas-Cuesta</last></author>
       <author><first>Julián David</first><last>Echevarry</last></author>
       <author><first>Beatriz</first><last>Martínez-González</last></author>
       <pages>84–93</pages>
@@ -4923,7 +4923,7 @@
     </paper>
     <paper id="12">
       <title><fixed-case>L</fixed-case>ekbot: A talking and playing robot for children with disabilities</title>
-      <author><first>Peter</first><last>Ljunglöf</last></author>
+      <author id="peter-ljunglof"><first>Peter</first><last>Ljunglöf</last></author>
       <author><first>Britt</first><last>Claesson</last></author>
       <author><first>Ingrid</first><last>Mattsson Müller</last></author>
       <author><first>Stina</first><last>Ericsson</last></author>
@@ -4936,7 +4936,7 @@
     </paper>
     <paper id="13">
       <title>Using lexical and corpus resources for augmenting the <fixed-case>AAC</fixed-case>-lexicon</title>
-      <author><first>Katarina</first><last>Heimann Mühlenbock</last></author>
+      <author id="katarina-heimann-muhlenbock"><first>Katarina</first><last>Heimann Mühlenbock</last></author>
       <author><first>Mats</first><last>Lundälv</last></author>
       <pages>120–127</pages>
       <url hash="382e150c">W11-2313</url>
@@ -4946,7 +4946,7 @@
       <title>Experimental Identification of the Use of Hedges in the Simplification of Numerical Expressions</title>
       <author><first>Susana</first><last>Bautista</last></author>
       <author><first>Raquel</first><last>Hervás</last></author>
-      <author><first>Pablo</first><last>Gervás</last></author>
+      <author id="pablo-gervas"><first>Pablo</first><last>Gervás</last></author>
       <author><first>Richard</first><last>Power</last></author>
       <author><first>Sandra</first><last>Williams</last></author>
       <pages>128–136</pages>
@@ -4955,17 +4955,17 @@
     </paper>
     <paper id="15">
       <title>Towards an on-demand Simple <fixed-case>P</fixed-case>ortuguese <fixed-case>W</fixed-case>ikipedia</title>
-      <author><first>Arnaldo</first><last>Candido Jr</last></author>
+      <author id="arnaldo-candido-jr"><first>Arnaldo</first><last>Candido Jr</last></author>
       <author><first>Ann</first><last>Copestake</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
-      <author><first>Sandra Maria</first><last>Aluísio</last></author>
+      <author id="sandra-aluisio"><first>Sandra Maria</first><last>Aluísio</last></author>
       <pages>137–147</pages>
       <url hash="fac2ef01">W11-2315</url>
       <bibkey>candido-jr-etal-2011-towards</bibkey>
     </paper>
     <paper id="16">
       <title><fixed-case>SLPAT</fixed-case> Demo Session</title>
-      <editor><first>Annalu</first><last>Waller</last></editor>
+      <editor id="annalu-waller"><first>Annalu</first><last>Waller</last></editor>
       <pages>148–149</pages>
       <url hash="f730b520">W11-2316</url>
       <bibkey>waller-2011-slpat</bibkey>
@@ -4975,7 +4975,7 @@
     <meta>
       <booktitle>Proceedings of the <fixed-case>T</fixed-case>ext<fixed-case>I</fixed-case>nfer 2011 Workshop on Textual Entailment</booktitle>
       <url hash="b4806fdb">W11-24</url>
-      <editor><first>Sebastian</first><last>Padó</last></editor>
+      <editor id="sebastian-pado"><first>Sebastian</first><last>Padó</last></editor>
       <editor><first>Stefan</first><last>Thater</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Edinburgh, Scottland, UK</address>
@@ -4989,7 +4989,7 @@
     </frontmatter>
     <paper id="1">
       <title>Evaluating Answers to Reading Comprehension Questions in Context: Results for <fixed-case>G</fixed-case>erman and the Role of Information Structure</title>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <author><first>Ramon</first><last>Ziai</last></author>
       <author><first>Niels</first><last>Ott</last></author>
       <author><first>Janina</first><last>Kopp</last></author>
@@ -5020,7 +5020,7 @@
       <title>Is it Worth Submitting this Run? Assess your <fixed-case>RTE</fixed-case> System with a Good Sparring Partner</title>
       <author><first>Milen</first><last>Kouylekov</last></author>
       <author><first>Yashar</first><last>Mehdad</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <pages>30–34</pages>
       <url hash="c22ea48a">W11-2404</url>
       <bibkey>kouylekov-etal-2011-worth</bibkey>
@@ -5036,7 +5036,7 @@
     <paper id="6">
       <title>Representing and resolving ambiguities in ontology-based question answering</title>
       <author><first>Christina</first><last>Unger</last></author>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <pages>40–49</pages>
       <url hash="383ed79b">W11-2406</url>
       <bibkey>unger-cimiano-2011-representing</bibkey>
@@ -5051,7 +5051,7 @@
     <paper id="8">
       <title>Discovering Commonsense Entailment Rules Implicit in Sentences</title>
       <author><first>Jonathan</first><last>Gordon</last></author>
-      <author><first>Lenhart</first><last>Schubert</last></author>
+      <author id="lenhart-schubert"><first>Lenhart</first><last>Schubert</last></author>
       <pages>59–63</pages>
       <url hash="4841b38a">W11-2408</url>
       <bibkey>gordon-schubert-2011-discovering</bibkey>
@@ -5061,7 +5061,7 @@
     <meta>
       <booktitle>Proceedings of the <fixed-case>GEMS</fixed-case> 2011 Workshop on <fixed-case>GE</fixed-case>ometrical Models of Natural Language Semantics</booktitle>
       <url hash="6cf6f380">W11-25</url>
-      <editor><first>Sebastian</first><last>Pado</last></editor>
+      <editor id="sebastian-pado"><first>Sebastian</first><last>Pado</last></editor>
       <editor><first>Yves</first><last>Peirsman</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Edinburgh, UK</address>
@@ -5075,7 +5075,7 @@
     </frontmatter>
     <paper id="1">
       <title>How we <fixed-case>BLESS</fixed-case>ed distributional semantic evaluation</title>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <author><first>Alessandro</first><last>Lenci</last></author>
       <pages>1–10</pages>
       <url hash="a447c369">W11-2501</url>
@@ -5091,8 +5091,8 @@
     <paper id="3">
       <title>Distributional semantics from text and images</title>
       <author><first>Elia</first><last>Bruni</last></author>
-      <author><first>Giang Binh</first><last>Tran</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="giang-binh-tran"><first>Giang Binh</first><last>Tran</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <pages>22–32</pages>
       <url hash="cfce6ae8">W11-2503</url>
       <bibkey>bruni-etal-2011-distributional</bibkey>
@@ -5110,14 +5110,14 @@
       <title>Encoding syntactic dependencies by vector permutation</title>
       <author><first>Pierpaolo</first><last>Basile</last></author>
       <author><first>Annalina</first><last>Caputo</last></author>
-      <author><first>Giovanni</first><last>Semeraro</last></author>
+      <author id="giovanni-semeraro"><first>Giovanni</first><last>Semeraro</last></author>
       <pages>43–51</pages>
       <url hash="ce46e6e2">W11-2505</url>
       <bibkey>basile-etal-2011-encoding</bibkey>
     </paper>
     <paper id="6">
       <title>Assessing Interpretable, Attribute-related Meaning Representations for Adjective-Noun Phrases in a Similarity Prediction Task</title>
-      <author><first>Matthias</first><last>Hartung</last></author>
+      <author id="matthias-hartung"><first>Matthias</first><last>Hartung</last></author>
       <author><first>Anette</first><last>Frank</last></author>
       <pages>52–61</pages>
       <url hash="5ff91fb9">W11-2506</url>
@@ -5125,8 +5125,8 @@
     </paper>
     <paper id="7">
       <title>Experimenting with transitive verbs in a <fixed-case>D</fixed-case>is<fixed-case>C</fixed-case>o<fixed-case>C</fixed-case>at</title>
-      <author><first>Edward</first><last>Grefenstette</last></author>
-      <author><first>Mehrnoosh</first><last>Sadrzadeh</last></author>
+      <author id="edward-grefenstette"><first>Edward</first><last>Grefenstette</last></author>
+      <author id="mehrnoosh-sadrzadeh"><first>Mehrnoosh</first><last>Sadrzadeh</last></author>
       <pages>62–66</pages>
       <url hash="d67249c4">W11-2507</url>
       <bibkey>grefenstette-sadrzadeh-2011-experimenting</bibkey>
@@ -5134,7 +5134,7 @@
     <paper id="8">
       <title>A distributional similarity approach to the detection of semantic change in the <fixed-case>G</fixed-case>oogle <fixed-case>B</fixed-case>ooks Ngram corpus.</title>
       <author><first>Kristina</first><last>Gulordava</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <pages>67–71</pages>
       <url hash="784a2300">W11-2508</url>
       <bibkey>gulordava-baroni-2011-distributional</bibkey>
@@ -5146,7 +5146,7 @@
       <url hash="20e75283">W11-26</url>
       <editor><first>Jeremy</first><last>Jancsary</last></editor>
       <editor><first>Friedrich</first><last>Neubarth</last></editor>
-      <editor><first>Harald</first><last>Trost</last></editor>
+      <editor id="harald-trost"><first>Harald</first><last>Trost</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Edinburgh, Scotland</address>
       <month>July</month>
@@ -5159,10 +5159,10 @@
     </frontmatter>
     <paper id="1">
       <title>Dialect Translation: Integrating <fixed-case>B</fixed-case>ayesian Co-segmentation Models with Pivot-based <fixed-case>SMT</fixed-case></title>
-      <author><first>Michael</first><last>Paul</last></author>
+      <author id="michael-paul"><first>Michael</first><last>Paul</last></author>
       <author><first>Andrew</first><last>Finch</last></author>
-      <author><first>Paul R.</first><last>Dixon</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="paul-dixon"><first>Paul R.</first><last>Dixon</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>1–9</pages>
       <url hash="0c0a3dd3">W11-2601</url>
       <bibkey>paul-etal-2011-dialect</bibkey>
@@ -5178,7 +5178,7 @@
     <paper id="3">
       <title><fixed-case>P</fixed-case>addy<fixed-case>W</fixed-case>a<fixed-case>C</fixed-case>: A Minimally-Supervised Web-Corpus of Hiberno-<fixed-case>E</fixed-case>nglish</title>
       <author><first>Brian</first><last>Murphy</last></author>
-      <author><first>Egon W.</first><last>Stemle</last></author>
+      <author id="egon-stemle"><first>Egon W.</first><last>Stemle</last></author>
       <pages>22–29</pages>
       <url hash="3584c253">W11-2603</url>
       <bibkey>murphy-stemle-2011-paddywac</bibkey>
@@ -5193,9 +5193,9 @@
     <paper id="5">
       <title>Learning word-level dialectal variation as phonological replacement rules using a limited parallel corpus</title>
       <author><first>Mans</first><last>Hulden</last></author>
-      <author><first>Iñaki</first><last>Alegria</last></author>
+      <author id="inaki-alegria"><first>Iñaki</first><last>Alegria</last></author>
       <author><first>Izaskun</first><last>Etxeberria</last></author>
-      <author><first>Montse</first><last>Maritxalar</last></author>
+      <author id="montse-maritxalar"><first>Montse</first><last>Maritxalar</last></author>
       <pages>39–48</pages>
       <url hash="c07655f4">W11-2605</url>
       <bibkey>hulden-etal-2011-learning</bibkey>
@@ -5204,18 +5204,18 @@
       <title>Modeling of Stylistic Variation in Social Media with Stretchy Patterns</title>
       <author><first>Philip</first><last>Gianfortoni</last></author>
       <author><first>David</first><last>Adamson</last></author>
-      <author><first>Carolyn P.</first><last>Rosé</last></author>
+      <author id="carolyn-rose"><first>Carolyn P.</first><last>Rosé</last></author>
       <pages>49–59</pages>
       <url hash="6e1e0f15">W11-2606</url>
       <bibkey>gianfortoni-etal-2011-modeling</bibkey>
     </paper>
     <paper id="7">
       <title>Adapting <fixed-case>S</fixed-case>lovak <fixed-case>ASR</fixed-case> for native Germans speaking <fixed-case>S</fixed-case>lovak</title>
-      <author><first>Štefan</first><last>Beňuš</last></author>
+      <author id="stefan-benus"><first>Štefan</first><last>Beňuš</last></author>
       <author><first>Miloš</first><last>Cerňak</last></author>
       <author><first>Sakhia</first><last>Darjaa</last></author>
       <author><first>Milan</first><last>Rusko</last></author>
-      <author><first>Marián</first><last>Trnka</last></author>
+      <author id="marian-trnka"><first>Marián</first><last>Trnka</last></author>
       <pages>60–64</pages>
       <url hash="51e9b94a">W11-2607</url>
       <bibkey>benus-etal-2011-adapting</bibkey>
@@ -5243,8 +5243,8 @@
     <meta>
       <booktitle>Proceedings of the <fixed-case>UCNLG</fixed-case>+<fixed-case>E</fixed-case>val: Language Generation and Evaluation Workshop</booktitle>
       <url hash="10b2ef18">W11-27</url>
-      <editor><first>Anja</first><last>Belz</last></editor>
-      <editor><first>Roger</first><last>Evans</last></editor>
+      <editor id="anja-belz"><first>Anja</first><last>Belz</last></editor>
+      <editor id="roger-evans"><first>Roger</first><last>Evans</last></editor>
       <editor><first>Albert</first><last>Gatt</last></editor>
       <editor><first>Kristina</first><last>Striegnitz</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -5259,7 +5259,7 @@
     </frontmatter>
     <paper id="1">
       <title>A New Sentence Compression Dataset and Its Use in an Abstractive Generate-and-Rank Sentence Compressor</title>
-      <author><first>Dimitrios</first><last>Galanis</last></author>
+      <author id="dimitrios-galanis"><first>Dimitrios</first><last>Galanis</last></author>
       <author><first>Ion</first><last>Androutsopoulos</last></author>
       <pages>1–11</pages>
       <url hash="83e8da47">W11-2701</url>
@@ -5276,8 +5276,8 @@
     <paper id="3">
       <title>A Corpus of Human-written Summaries of Line Graphs</title>
       <author><first>Charles</first><last>Greenbacker</last></author>
-      <author><first>Sandra</first><last>Carberry</last></author>
-      <author><first>Kathleen</first><last>McCoy</last></author>
+      <author id="sandra-carberry"><first>Sandra</first><last>Carberry</last></author>
+      <author id="kathleen-f-mccoy"><first>Kathleen</first><last>McCoy</last></author>
       <pages>23–27</pages>
       <url hash="b37d9cd7">W11-2703</url>
       <bibkey>greenbacker-etal-2011-corpus</bibkey>
@@ -5292,16 +5292,16 @@
     <paper id="5">
       <title>Exploring linguistically-rich patterns for question generation</title>
       <author><first>Sérgio</first><last>Curto</last></author>
-      <author><first>Ana Cristina</first><last>Mendes</last></author>
-      <author><first>Luísa</first><last>Coheur</last></author>
+      <author id="ana-cristina-mendes"><first>Ana Cristina</first><last>Mendes</last></author>
+      <author id="luisa-coheur"><first>Luísa</first><last>Coheur</last></author>
       <pages>33–38</pages>
       <url hash="1fffd551">W11-2705</url>
       <bibkey>curto-etal-2011-exploring</bibkey>
     </paper>
     <paper id="6">
       <title>Linguistically Motivated Complementizer Choice in Surface Realization</title>
-      <author><first>Rajakrishnan</first><last>Rajkumar</last></author>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="rajakrishnan-rajkumar"><first>Rajakrishnan</first><last>Rajkumar</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <pages>39–44</pages>
       <url hash="0dd9d892">W11-2706</url>
       <bibkey>rajkumar-white-2011-linguistically</bibkey>
@@ -5309,7 +5309,7 @@
     <paper id="7">
       <title>Exciting and interesting: issues in the generation of binomials</title>
       <author><first>Ann</first><last>Copestake</last></author>
-      <author><first>Aurélie</first><last>Herbelot</last></author>
+      <author id="aurelie-herbelot"><first>Aurélie</first><last>Herbelot</last></author>
       <pages>45–53</pages>
       <url hash="96297082">W11-2707</url>
       <bibkey>copestake-herbelot-2011-exciting</bibkey>
@@ -5400,7 +5400,7 @@
     <paper id="8">
       <title>Two Approaches for Generating Size Modifiers</title>
       <author><first>Margaret</first><last>Mitchell</last></author>
-      <author><first>Kees</first><last>van Deemter</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
       <author><first>Ehud</first><last>Reiter</last></author>
       <pages>63–70</pages>
       <url hash="80f0b0bc">W11-2808</url>
@@ -5465,7 +5465,7 @@
     </paper>
     <paper id="16">
       <title>Language Generation for Spoken Dialogue Systems [Invited Talk]</title>
-      <author><first>Johanna D.</first><last>Moore</last></author>
+      <author id="johanna-d-moore"><first>Johanna D.</first><last>Moore</last></author>
       <pages>132</pages>
       <url hash="ca184b1d">W11-2816</url>
       <bibkey>moore-2011-language</bibkey>
@@ -5481,7 +5481,7 @@
     <paper id="18">
       <title><fixed-case>E</fixed-case>asy<fixed-case>T</fixed-case>ext: an Operational <fixed-case>NLG</fixed-case> System</title>
       <author><first>Laurence</first><last>Danlos</last></author>
-      <author><first>Frédéric</first><last>Meunier</last></author>
+      <author id="frederic-meunier"><first>Frédéric</first><last>Meunier</last></author>
       <author><first>Vanessa</first><last>Combet</last></author>
       <pages>139–144</pages>
       <url hash="cee23032">W11-2818</url>
@@ -5499,7 +5499,7 @@
       <title>A Policy-Based Approach to Context Dependent Natural Language Generation</title>
       <author><first>Thomas</first><last>Bouttaz</last></author>
       <author><first>Edoardo</first><last>Pignotti</last></author>
-      <author><first>Chris</first><last>Mellish</last></author>
+      <author id="chris-mellish"><first>Chris</first><last>Mellish</last></author>
       <author><first>Peter</first><last>Edwards</last></author>
       <pages>151–157</pages>
       <url hash="15b91693">W11-2820</url>
@@ -5516,14 +5516,14 @@
     </paper>
     <paper id="22">
       <title>Using semantic roles to improve summaries</title>
-      <author><first>Diana</first><last>Trandabăț</last></author>
+      <author id="diana-trandabat"><first>Diana</first><last>Trandabăț</last></author>
       <pages>164–169</pages>
       <url hash="ed20e7b0">W11-2822</url>
       <bibkey>trandabat-2011-using</bibkey>
     </paper>
     <paper id="23">
       <title>Building a Generator for <fixed-case>I</fixed-case>talian <fixed-case>S</fixed-case>ign <fixed-case>L</fixed-case>anguage</title>
-      <author><first>Alessandro</first><last>Mazzei</last></author>
+      <author id="alessandro-mazzei"><first>Alessandro</first><last>Mazzei</last></author>
       <pages>170–175</pages>
       <url hash="127b0263">W11-2823</url>
       <bibkey>mazzei-2011-building</bibkey>
@@ -5548,14 +5548,14 @@
     <paper id="26">
       <title>Generation of Formal and Informal Sentences</title>
       <author><first>Fadi</first><last>Abu Sheikha</last></author>
-      <author><first>Diana</first><last>Inkpen</last></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last></author>
       <pages>187–193</pages>
       <url hash="8db90b0f">W11-2826</url>
       <bibkey>abu-sheikha-inkpen-2011-generation</bibkey>
     </paper>
     <paper id="27">
       <title>Glue Rules for Robust Chart Realization</title>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <pages>194–199</pages>
       <url hash="cacc2ab0">W11-2827</url>
       <bibkey>white-2011-glue</bibkey>
@@ -5571,7 +5571,7 @@
     </paper>
     <paper id="29">
       <title>Generation Challenges 2011 Preface</title>
-      <author><first>Anja</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anja</first><last>Belz</last></author>
       <author><first>Albert</first><last>Gatt</last></author>
       <author><first>Alexander</first><last>Koller</last></author>
       <author><first>Kristina</first><last>Striegnitz</last></author>
@@ -5581,7 +5581,7 @@
     </paper>
     <paper id="30">
       <title>The <fixed-case>GRUVE</fixed-case> Challenge: Generating Routes under Uncertainty in Virtual Environments</title>
-      <author><first>Srini</first><last>Janarthanam</last></author>
+      <author id="srinivasan-janarthanam"><first>Srini</first><last>Janarthanam</last></author>
       <author><first>Oliver</first><last>Lemon</last></author>
       <pages>208–211</pages>
       <url hash="2dce52fb">W11-2830</url>
@@ -5589,7 +5589,7 @@
     </paper>
     <paper id="31">
       <title>A Proposal for a <fixed-case>S</fixed-case>panish Surface Realization Shared Task</title>
-      <author><first>Pablo</first><last>Gervás</last></author>
+      <author id="pablo-gervas"><first>Pablo</first><last>Gervás</last></author>
       <author><first>Miguel</first><last>Ballesteros</last></author>
       <pages>212–216</pages>
       <url hash="ca81a615">W11-2831</url>
@@ -5597,28 +5597,28 @@
     </paper>
     <paper id="32">
       <title>The First Surface Realisation Shared Task: Overview and Evaluation Results</title>
-      <author><first>Anja</first><last>Belz</last></author>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="anja-belz"><first>Anja</first><last>Belz</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <author><first>Dominic</first><last>Espinosa</last></author>
       <author><first>Eric</first><last>Kow</last></author>
       <author><first>Deirdre</first><last>Hogan</last></author>
-      <author><first>Amanda</first><last>Stent</last></author>
+      <author id="amanda-stent"><first>Amanda</first><last>Stent</last></author>
       <pages>217–226</pages>
       <url hash="0c73a760">W11-2832</url>
       <bibkey>belz-etal-2011-first</bibkey>
     </paper>
     <paper id="33">
       <title><fixed-case>DCU</fixed-case> at Generation Challenges 2011 Surface Realisation Track</title>
-      <author><first>Yuqing</first><last>Guo</last></author>
+      <author id="yuqing-guo"><first>Yuqing</first><last>Guo</last></author>
       <author><first>Deirdre</first><last>Hogan</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>227–229</pages>
       <url hash="648345ec">W11-2833</url>
       <bibkey>guo-etal-2011-dcu</bibkey>
     </paper>
     <paper id="34">
       <title><fixed-case>ATT</fixed-case>-0: Submission to Generation Challenges 2011 Surface Realization Shared Task</title>
-      <author><first>Amanda</first><last>Stent</last></author>
+      <author id="amanda-stent"><first>Amanda</first><last>Stent</last></author>
       <pages>230–231</pages>
       <url hash="fac4708e">W11-2834</url>
       <bibkey>stent-2011-att</bibkey>
@@ -5627,7 +5627,7 @@
       <title>&lt;<fixed-case>S</fixed-case>tu<fixed-case>M</fixed-case>a<fixed-case>B</fixed-case>a&gt;: From Deep Representation to Surface</title>
       <author><first>Bernd</first><last>Bohnet</last></author>
       <author><first>Simon</first><last>Mille</last></author>
-      <author><first>Benoît</first><last>Favre</last></author>
+      <author id="benoit-favre"><first>Benoît</first><last>Favre</last></author>
       <author><first>Leo</first><last>Wanner</last></author>
       <pages>232–235</pages>
       <url hash="19a06daa">W11-2835</url>
@@ -5635,16 +5635,16 @@
     </paper>
     <paper id="36">
       <title>The <fixed-case>OSU</fixed-case> System for Surface Realization at Generation Challenges 2011</title>
-      <author><first>Rajakrishnan</first><last>Rajkumar</last></author>
+      <author id="rajakrishnan-rajkumar"><first>Rajakrishnan</first><last>Rajkumar</last></author>
       <author><first>Dominic</first><last>Espinosa</last></author>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <pages>236–238</pages>
       <url hash="df0ea5b0">W11-2836</url>
       <bibkey>rajkumar-etal-2011-osu</bibkey>
     </paper>
     <paper id="37">
       <title><fixed-case>UCM</fixed-case> Submission to the Surface Realization Challenge</title>
-      <author><first>Pablo</first><last>Gervás</last></author>
+      <author id="pablo-gervas"><first>Pablo</first><last>Gervás</last></author>
       <pages>239–241</pages>
       <url hash="487f23d9">W11-2837</url>
       <bibkey>gervas-2011-ucm</bibkey>
@@ -5662,7 +5662,7 @@
       <author><first>Pinaki</first><last>Bhaskar</last></author>
       <author><first>Aniruddha</first><last>Ghosh</last></author>
       <author><first>Santanu</first><last>Pal</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>250–253</pages>
       <url hash="137f111b">W11-2839</url>
       <bibkey>bhaskar-etal-2011-may</bibkey>
@@ -5705,7 +5705,7 @@
     <paper id="44">
       <title>Data-Driven Correction of <fixed-case>F</fixed-case>unction<fixed-case>W</fixed-case>ords in Non-Native <fixed-case>E</fixed-case>nglish</title>
       <author><first>Adriane</first><last>Boyd</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <pages>267–269</pages>
       <url hash="c782b9bf">W11-2844</url>
       <bibkey>boyd-meurers-2011-data</bibkey>
@@ -5713,11 +5713,11 @@
     <paper id="45">
       <title>Report on the Second Second Challenge on Generating Instructions in Virtual Environments (<fixed-case>GIVE</fixed-case>-2.5)</title>
       <author><first>Kristina</first><last>Striegnitz</last></author>
-      <author><first>Alexandre</first><last>Denis</last></author>
+      <author id="alexandre-denis"><first>Alexandre</first><last>Denis</last></author>
       <author><first>Andrew</first><last>Gargett</last></author>
       <author><first>Konstantina</first><last>Garoufi</last></author>
       <author><first>Alexander</first><last>Koller</last></author>
-      <author><first>Mariët</first><last>Theune</last></author>
+      <author id="mariet-theune"><first>Mariët</first><last>Theune</last></author>
       <pages>270–279</pages>
       <url hash="81aa74ee">W11-2845</url>
       <bibkey>striegnitz-etal-2011-report</bibkey>
@@ -5725,7 +5725,7 @@
     <paper id="46">
       <title>Direction giving: an attempt to increase user engagement</title>
       <author><first>Bob</first><last>Duncan</last></author>
-      <author><first>Kees</first><last>van Deemter</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
       <pages>280–283</pages>
       <url hash="7ee7ef42">W11-2846</url>
       <bibkey>duncan-van-deemter-2011-direction</bibkey>
@@ -5739,9 +5739,9 @@
     </paper>
     <paper id="48">
       <title>The <fixed-case>GIVE</fixed-case>-2.5 <fixed-case>C</fixed-case> Generation System</title>
-      <author><first>David Nicolás</first><last>Racca</last></author>
+      <author id="david-nicolas-racca"><first>David Nicolás</first><last>Racca</last></author>
       <author><first>Luciana</first><last>Benotti</last></author>
-      <author><first>Pablo</first><last>Duboue</last></author>
+      <author id="pablo-duboue"><first>Pablo</first><last>Duboue</last></author>
       <pages>290–295</pages>
       <url hash="94ac866f">W11-2848</url>
       <bibkey>racca-etal-2011-give</bibkey>
@@ -5749,14 +5749,14 @@
     <paper id="49">
       <title><fixed-case>CL</fixed-case> system: Giving instructions by corpus based selection</title>
       <author><first>Luciana</first><last>Benotti</last></author>
-      <author><first>Alexandre</first><last>Denis</last></author>
+      <author id="alexandre-denis"><first>Alexandre</first><last>Denis</last></author>
       <pages>296–301</pages>
       <url hash="ae33b36b">W11-2849</url>
       <bibkey>benotti-denis-2011-cl</bibkey>
     </paper>
     <paper id="50">
       <title>The Loria Instruction Generation System <fixed-case>L</fixed-case> in <fixed-case>GIVE</fixed-case> 2.5</title>
-      <author><first>Alexandre</first><last>Denis</last></author>
+      <author id="alexandre-denis"><first>Alexandre</first><last>Denis</last></author>
       <pages>302–306</pages>
       <url hash="f122658a">W11-2850</url>
       <bibkey>denis-2011-loria</bibkey>
@@ -5774,7 +5774,7 @@
       <author><first>Saskia</first><last>Akkersdijk</last></author>
       <author><first>Marin</first><last>Langenbach</last></author>
       <author><first>Frieder</first><last>Loch</last></author>
-      <author><first>Mariët</first><last>Theune</last></author>
+      <author id="mariet-theune"><first>Mariët</first><last>Theune</last></author>
       <pages>312–317</pages>
       <url hash="5199b952">W11-2852</url>
       <bibkey>akkersdijk-etal-2011-thumbs</bibkey>
@@ -5786,7 +5786,7 @@
       <author><first>Paul</first><last>Piwek</last></author>
       <author><first>Mihai</first><last>Lintean</last></author>
       <author><first>Svetlana</first><last>Stoyanchev</last></author>
-      <author><first>Cristian</first><last>Moldovan</last></author>
+      <author id="christian-moldovan"><first>Cristian</first><last>Moldovan</last></author>
       <pages>318–320</pages>
       <url hash="dc9bffa8">W11-2853</url>
       <bibkey>rus-etal-2011-question</bibkey>
@@ -5796,9 +5796,9 @@
     <meta>
       <booktitle>Proceedings of the 12th International Conference on Parsing Technologies</booktitle>
       <url hash="20558676">W11-29</url>
-      <editor><first>Harry</first><last>Bunt</last></editor>
+      <editor id="harry-bunt"><first>Harry</first><last>Bunt</last></editor>
       <editor><first>Joakim</first><last>Nivre</last></editor>
-      <editor><first>Özlem</first><last>Çetinoglu</last></editor>
+      <editor id="ozlem-cetinoglu"><first>Özlem</first><last>Çetinoglu</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Dublin, Ireland</address>
       <month>October</month>
@@ -5811,7 +5811,7 @@
     </frontmatter>
     <paper id="1">
       <title>Computing Scope in a <fixed-case>CCG</fixed-case> Parser</title>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>1</pages>
       <url hash="7923d521">W11-2901</url>
       <bibkey>steedman-2011-computing</bibkey>
@@ -5843,16 +5843,16 @@
     </paper>
     <paper id="5">
       <title>A Word Clustering Approach to Domain Adaptation: Effective Parsing of Biomedical Texts</title>
-      <author><first>Marie</first><last>Candito</last></author>
-      <author><first>Enrique</first><last>Henestroza Anguiano</last></author>
-      <author><first>Djamé</first><last>Seddah</last></author>
+      <author id="marie-candito"><first>Marie</first><last>Candito</last></author>
+      <author id="enrique-henestroza-anguiano"><first>Enrique</first><last>Henestroza Anguiano</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
       <pages>37–42</pages>
       <url hash="a0878409">W11-2905</url>
       <bibkey>candito-etal-2011-word</bibkey>
     </paper>
     <paper id="6">
       <title>Sentence-Level Instance-Weighting for Graph-Based and Transition-Based Dependency Parsing</title>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <author><first>Martin</first><last>Haulrich</last></author>
       <pages>43–47</pages>
       <url hash="b791d276">W11-2906</url>
@@ -5864,7 +5864,7 @@
       <author><first>Yusuke</first><last>Miyao</last></author>
       <author><first>Takuya</first><last>Matsuzaki</last></author>
       <author><first>Xiangli</first><last>Wang</last></author>
-      <author><first>Junichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Junichi</first><last>Tsujii</last></author>
       <pages>48–57</pages>
       <url hash="8cb3fa91">W11-2907</url>
       <bibkey>yu-etal-2011-analysis</bibkey>
@@ -5879,7 +5879,7 @@
     </paper>
     <paper id="9">
       <title><fixed-case>B</fixed-case>ayesian Network Automata for Modelling Unbounded Structures</title>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <pages>63–74</pages>
       <url hash="252e0974">W11-2909</url>
       <bibkey>henderson-2011-bayesian</bibkey>
@@ -5897,7 +5897,7 @@
       <title>Learning Structural Dependencies of Words in the <fixed-case>Z</fixed-case>ipfian Tail</title>
       <author><first>Tejaswini</first><last>Deoskar</last></author>
       <author><first>Markos</first><last>Mylonakis</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <pages>80–91</pages>
       <url hash="8d0ebd8f">W11-2911</url>
       <bibkey>deoskar-etal-2011-learning</bibkey>
@@ -5941,7 +5941,7 @@
       <author><first>Gregory F.</first><last>Coppola</last></author>
       <author><first>Alexandra</first><last>Birch</last></author>
       <author><first>Tejaswini</first><last>Deoskar</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>129–139</pages>
       <url hash="9a990d32">W11-2916</url>
       <bibkey>coppola-etal-2011-simple</bibkey>
@@ -5956,7 +5956,7 @@
     </paper>
     <paper id="18">
       <title><fixed-case>L</fixed-case>agrangian Relaxation for Inference in Natural Language Processing</title>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <pages>150</pages>
       <url hash="87f6f681">W11-2918</url>
       <bibkey>collins-2011-lagrangian</bibkey>
@@ -5998,14 +5998,14 @@
     <paper id="23">
       <title>Large-Scale Corpus-Driven <fixed-case>PCFG</fixed-case> Approximation of an <fixed-case>HPSG</fixed-case></title>
       <author><first>Yi</first><last>Zhang</last></author>
-      <author><first>Hans-Ulrich</first><last>Krieger</last></author>
+      <author id="hans-ulrich-krieger"><first>Hans-Ulrich</first><last>Krieger</last></author>
       <pages>198–208</pages>
       <url hash="1e2bbc5c">W11-2923</url>
       <bibkey>zhang-krieger-2011-large</bibkey>
     </paper>
     <paper id="24">
       <title>Features for Phrase-Structure Reranking from Dependency Parses</title>
-      <author><first>Richárd</first><last>Farkas</last></author>
+      <author id="richard-farkas"><first>Richárd</first><last>Farkas</last></author>
       <author><first>Bernd</first><last>Bohnet</last></author>
       <author><first>Helmut</first><last>Schmid</last></author>
       <pages>209–214</pages>
@@ -6017,7 +6017,7 @@
       <author><first>Jennifer</first><last>Foster</last></author>
       <author><first>Özlem</first><last>Çetinoğlu</last></author>
       <author><first>Joachim</first><last>Wagner</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>215–219</pages>
       <url hash="801527f0">W11-2925</url>
       <bibkey>foster-etal-2011-comparing</bibkey>
@@ -6061,7 +6061,7 @@
       <booktitle>Proceedings of the 2nd Workshop on South Southeast <fixed-case>A</fixed-case>sian Natural Language Processing (<fixed-case>WSSANLP</fixed-case>)</booktitle>
       <url hash="cc9df3e3">W11-30</url>
       <editor><first>Rajeev</first><last>Sangal</last></editor>
-      <editor><first>M. G. Abbas</first><last>Malik</last></editor>
+      <editor id="m-g-abbas-malik"><first>M. G. Abbas</first><last>Malik</last></editor>
       <publisher>Asian Federation of Natural Language Processing</publisher>
       <address>Chiang Mai, Thailand</address>
       <month>November</month>
@@ -6076,7 +6076,7 @@
       <title>Hybrid Inflectional Stemmer and Rule-based Derivational Stemmer for <fixed-case>G</fixed-case>ujarati</title>
       <author><first>Kartik</first><last>Suba</last></author>
       <author><first>Dipti</first><last>Jiandani</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>1–8</pages>
       <url hash="b3334a60">W11-3001</url>
       <bibkey>suba-etal-2011-hybrid</bibkey>
@@ -6113,7 +6113,7 @@
       <title>Towards a <fixed-case>M</fixed-case>alay Derivational Lexicon: Learning Affixes Using Expectation Maximization</title>
       <author><first>Suriani</first><last>Sulaiman</last></author>
       <author><first>Michael</first><last>Gasser</last></author>
-      <author><first>Sandra</first><last>Kuebler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kuebler</last></author>
       <pages>30–34</pages>
       <url hash="26666def">W11-3005</url>
       <bibkey>sulaiman-etal-2011-towards</bibkey>
@@ -6121,7 +6121,7 @@
     <paper id="6">
       <title><fixed-case>P</fixed-case>unjabi Language Stemmer for nouns and proper names</title>
       <author><first>Vishal</first><last>Gupta</last></author>
-      <author><first>Gurpreet Singh</first><last>Lehal</last></author>
+      <author id="gurpreet-singh-lehal"><first>Gurpreet Singh</first><last>Lehal</last></author>
       <pages>35–39</pages>
       <url hash="90efe032">W11-3006</url>
       <bibkey>gupta-lehal-2011-punjabi</bibkey>
@@ -6129,7 +6129,7 @@
     <paper id="7">
       <title>Challenges in <fixed-case>U</fixed-case>rdu Text Tokenization and Sentence Boundary Disambiguation</title>
       <author><first>Zobia</first><last>Rehman</last></author>
-      <author><first>Waqas</first><last>Anwar</last></author>
+      <author id="waqas-anwar"><first>Waqas</first><last>Anwar</last></author>
       <author><first>Usama Ijaz</first><last>Bajwa</last></author>
       <pages>40–45</pages>
       <url hash="4aee4762">W11-3007</url>
@@ -6138,7 +6138,7 @@
     <paper id="8">
       <title>Challenges in Developing a Rule based <fixed-case>U</fixed-case>rdu Stemmer</title>
       <author><first>Sajjad Ahmad</first><last>Khan</last></author>
-      <author><first>Waqas</first><last>Anwar</last></author>
+      <author id="waqas-anwar"><first>Waqas</first><last>Anwar</last></author>
       <author><first>Usama Ijaz</first><last>Bajwa</last></author>
       <pages>46–51</pages>
       <url hash="2e3c9abc">W11-3008</url>
@@ -6147,7 +6147,7 @@
     <paper id="9">
       <title>Developing a New System for <fixed-case>A</fixed-case>rabic Morphological Analysis and Generation</title>
       <author><first>Mourad</first><last>Gridach</last></author>
-      <author><first>Noureddine</first><last>Chenfour</last></author>
+      <author id="noureddine-chenfour"><first>Noureddine</first><last>Chenfour</last></author>
       <pages>52–57</pages>
       <url hash="91d251dd">W11-3009</url>
       <bibkey>gridach-chenfour-2011-developing</bibkey>
@@ -6157,7 +6157,7 @@
     <meta>
       <booktitle>Proceedings of the <fixed-case>KRAQ</fixed-case>11 workshop</booktitle>
       <url hash="d76594b5">W11-31</url>
-      <editor><first>Patrick</first><last>Saint-Dizier</last></editor>
+      <editor id="patrick-saint-dizier"><first>Patrick</first><last>Saint-Dizier</last></editor>
       <publisher>Asian Federation of Natural Language Processing</publisher>
       <address>Chiang Mai</address>
       <month>November</month>
@@ -6206,7 +6206,7 @@
       <title>A Rule Based Approach for Analysis of Comparative or Evaluative Questions in Tourism Domain</title>
       <author><first>Bidhan Chandra</first><last>Pal</last></author>
       <author><first>Pinaki</first><last>Bhaskar</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>29–37</pages>
       <url hash="ed679b2b">W11-3105</url>
       <bibkey>pal-etal-2011-rule</bibkey>
@@ -6228,7 +6228,7 @@
       <url hash="75b36ef9">W11-32</url>
       <editor><first>Min</first><last>Zhang</last></editor>
       <editor><first>Haizhou</first><last>Li</last></editor>
-      <editor><first>A</first><last>Kumaran</last></editor>
+      <editor id="a-kumaran"><first>A</first><last>Kumaran</last></editor>
       <publisher>Asian Federation of Natural Language Processing</publisher>
       <address>Chiang Mai, Thailand</address>
       <month>November</month>
@@ -6261,8 +6261,8 @@
     <paper id="3">
       <title>Integrating Models Derived from non-Parametric <fixed-case>B</fixed-case>ayesian Co-segmentation into a Statistical Machine Transliteration System</title>
       <author><first>Andrew</first><last>Finch</last></author>
-      <author><first>Paul</first><last>Dixon</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="paul-dixon"><first>Paul</first><last>Dixon</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>23–27</pages>
       <url hash="cfa5bf15">W11-3203</url>
       <bibkey>finch-etal-2011-integrating</bibkey>
@@ -6279,7 +6279,7 @@
     <paper id="5">
       <title><fixed-case>E</fixed-case>nglish-<fixed-case>K</fixed-case>orean Named Entity Transliteration Using Statistical Substring-based and Rule-based Approaches</title>
       <author><first>Yu-Chun</first><last>Wang</last></author>
-      <author><first>Richard Tzong-Han</first><last>Tsai</last></author>
+      <author id="richard-tzong-han-tsai"><first>Richard Tzong-Han</first><last>Tsai</last></author>
       <pages>32–35</pages>
       <url hash="62d2b94b">W11-3205</url>
       <bibkey>wang-tsai-2011-english</bibkey>
@@ -6296,7 +6296,7 @@
     </paper>
     <paper id="7">
       <title>Comparative Evaluation of <fixed-case>S</fixed-case>panish Segmentation Strategies for <fixed-case>S</fixed-case>panish-<fixed-case>C</fixed-case>hinese Transliteration</title>
-      <author><first>Rafael E.</first><last>Banchs</last></author>
+      <author id="rafael-e-banchs"><first>Rafael E.</first><last>Banchs</last></author>
       <pages>41–48</pages>
       <url hash="6aa2aaa4">W11-3207</url>
       <bibkey>banchs-2011-comparative</bibkey>
@@ -6306,7 +6306,7 @@
       <author><first>Takaaki</first><last>Fukunishi</last></author>
       <author><first>Andrew</first><last>Finch</last></author>
       <author><first>Seiichi</first><last>Yamamoto</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>49–57</pages>
       <url hash="0a85b635">W11-3208</url>
       <bibkey>fukunishi-etal-2011-using</bibkey>
@@ -6323,7 +6323,7 @@
       <title>Mining Multi-word Named Entity Equivalents from Comparable Corpora</title>
       <author><first>Abhijit</first><last>Bhole</last></author>
       <author><first>Goutham</first><last>Tholpadi</last></author>
-      <author><first>Raghavendra</first><last>Udupa</last></author>
+      <author id="raghavendra-udupa"><first>Raghavendra</first><last>Udupa</last></author>
       <pages>65–72</pages>
       <url hash="e36ac05f">W11-3210</url>
       <bibkey>bhole-etal-2011-mining</bibkey>
@@ -6346,9 +6346,9 @@
     </paper>
     <paper id="13">
       <title><fixed-case>E</fixed-case>nglish-to-<fixed-case>C</fixed-case>hinese Machine Transliteration using Accessor Variety Features of Source Graphemes</title>
-      <author><first>Mike Tian-Jian</first><last>Jiang</last></author>
-      <author><first>Chan-Hung</first><last>Kuo</last></author>
-      <author><first>Wen-Lian</first><last>Hsu</last></author>
+      <author id="mike-tian-jian-jiang"><first>Mike Tian-Jian</first><last>Jiang</last></author>
+      <author id="chan-hung-kuo"><first>Chan-Hung</first><last>Kuo</last></author>
+      <author id="wen-lian-hsu"><first>Wen-Lian</first><last>Hsu</last></author>
       <pages>86–90</pages>
       <url hash="bbeb1de3">W11-3213</url>
       <bibkey>jiang-etal-2011-english</bibkey>
@@ -6364,7 +6364,7 @@
     </paper>
     <paper id="15">
       <title><fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>hinese Personal Name Transliteration by Syllable-Based Maximum Matching</title>
-      <author><first>Oi Yee</first><last>Kwong</last></author>
+      <author id="olivia-o-y-kwong"><first>Oi Yee</first><last>Kwong</last></author>
       <pages>96–100</pages>
       <url hash="1a1a3a60">W11-3215</url>
       <bibkey>kwong-2011-english-chinese</bibkey>
@@ -6381,7 +6381,7 @@
     <paper id="17">
       <title>Named Entity Transliteration Generation Leveraging Statistical Machine Translation Technology</title>
       <author><first>Pradeep</first><last>Dasigi</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>106–111</pages>
       <url hash="d950b6ed">W11-3217</url>
       <bibkey>dasigi-diab-2011-named</bibkey>
@@ -6391,9 +6391,9 @@
     <meta>
       <booktitle>Proceedings of the Workshop on Language Resources, Technology and Services in the Sharing Paradigm</booktitle>
       <url hash="c02bfb3a">W11-33</url>
-      <editor><first>Nicoletta</first><last>Calzolari</last></editor>
+      <editor id="nicoletta-calzolari"><first>Nicoletta</first><last>Calzolari</last></editor>
       <editor><first>Toru</first><last>Ishida</last></editor>
-      <editor><first>Stelios</first><last>Piperidis</last></editor>
+      <editor id="stelios-piperidis"><first>Stelios</first><last>Piperidis</last></editor>
       <editor><first>Virach</first><last>Sornlertlamvanich</last></editor>
       <publisher>Asian Federation of Natural Language Processing</publisher>
       <address>Chiang Mai, Thailand</address>
@@ -6407,16 +6407,16 @@
     </frontmatter>
     <paper id="1">
       <title>Prospects for an Ontology-Grounded Language Service Infrastructure</title>
-      <author><first>Yoshihiko</first><last>Hayashi</last></author>
+      <author id="yoshihiko-hayashi"><first>Yoshihiko</first><last>Hayashi</last></author>
       <pages>1–7</pages>
       <url hash="a3c7f6d0">W11-3301</url>
       <bibkey>hayashi-2011-prospects</bibkey>
     </paper>
     <paper id="2">
       <title>A Method Towards the Fully Automatic Merging of Lexical Resources</title>
-      <author><first>Núria</first><last>Bel</last></author>
-      <author><first>Muntsa</first><last>Padró</last></author>
-      <author><first>Silvia</first><last>Necsulescu</last></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last></author>
+      <author id="muntsa-padro"><first>Muntsa</first><last>Padró</last></author>
+      <author id="silvia-necsulescu"><first>Silvia</first><last>Necsulescu</last></author>
       <pages>8–15</pages>
       <url hash="fcd5720c">W11-3302</url>
       <bibkey>bel-etal-2011-method</bibkey>
@@ -6441,7 +6441,7 @@
     <paper id="5">
       <title>Interoperability and Technology for a Language Resources Factory</title>
       <author><first>Marc</first><last>Poch</last></author>
-      <author><first>Núria</first><last>Bel</last></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last></author>
       <pages>32–40</pages>
       <url hash="0a522d65">W11-3305</url>
       <bibkey>poch-bel-2011-interoperability</bibkey>
@@ -6459,10 +6459,10 @@
       <title>Promoting Interoperability of Resources in <fixed-case>META</fixed-case>-<fixed-case>SHARE</fixed-case></title>
       <author><first>Paul</first><last>Thompson</last></author>
       <author><first>Yoshinobu</first><last>Kano</last></author>
-      <author><first>John</first><last>McNaught</last></author>
+      <author id="john-mcnaught"><first>John</first><last>McNaught</last></author>
       <author><first>Steve</first><last>Pettifer</last></author>
       <author><first>Teresa</first><last>Attwood</last></author>
-      <author><first>John</first><last>Keane</last></author>
+      <author id="john-keane"><first>John</first><last>Keane</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
       <pages>50–58</pages>
       <url hash="334d6e88">W11-3307</url>
@@ -6490,9 +6490,9 @@
     </paper>
     <paper id="10">
       <title>Proposal for the International Standard Language Resource Number</title>
-      <author><first>Khalid</first><last>Choukri</last></author>
+      <author id="khalid-choukri"><first>Khalid</first><last>Choukri</last></author>
       <author><first>Jungyeul</first><last>Park</last></author>
-      <author><first>Olivier</first><last>Hamon</last></author>
+      <author id="olivier-hamon"><first>Olivier</first><last>Hamon</last></author>
       <author><first>Victoria</first><last>Arranz</last></author>
       <pages>75–83</pages>
       <url hash="b4759135">W11-3310</url>
@@ -6500,14 +6500,14 @@
     </paper>
     <paper id="11">
       <title>A Metadata Schema for the Description of Language Resources (<fixed-case>LR</fixed-case>s)</title>
-      <author><first>Maria</first><last>Gavrilidou</last></author>
-      <author><first>Penny</first><last>Labropoulou</last></author>
+      <author id="maria-gavrilidou"><first>Maria</first><last>Gavrilidou</last></author>
+      <author id="penny-labropoulou"><first>Penny</first><last>Labropoulou</last></author>
       <author><first>Stelios</first><last>Piperidis</last></author>
       <author><first>Monica</first><last>Monachini</last></author>
       <author><first>Francesca</first><last>Frontini</last></author>
       <author><first>Gil</first><last>Francopoulo</last></author>
       <author><first>Victoria</first><last>Arranz</last></author>
-      <author><first>Valérie</first><last>Mapelli</last></author>
+      <author id="valerie-mapelli"><first>Valérie</first><last>Mapelli</last></author>
       <pages>84–92</pages>
       <url hash="79f09f64">W11-3311</url>
       <bibkey>gavrilidou-etal-2011-metadata</bibkey>
@@ -6515,7 +6515,7 @@
     <paper id="12">
       <title>The Language Library: Many Layers, More Knowledge</title>
       <author><first>Nicoletta</first><last>Calzolari</last></author>
-      <author><first>Riccardo</first><last>Del Gratta</last></author>
+      <author id="riccardo-del-gratta"><first>Riccardo</first><last>Del Gratta</last></author>
       <author><first>Francesca</first><last>Frontini</last></author>
       <author><first>Irene</first><last>Russo</last></author>
       <pages>93–97</pages>
@@ -6524,7 +6524,7 @@
     </paper>
     <paper id="13">
       <title>Sharing Resources in <fixed-case>CLARIN</fixed-case>-<fixed-case>NL</fixed-case></title>
-      <author><first>Jan</first><last>Odijk</last></author>
+      <author id="jan-odijk"><first>Jan</first><last>Odijk</last></author>
       <author><first>Arjan</first><last>van Hessen</last></author>
       <pages>98–106</pages>
       <url hash="32342ef3">W11-3313</url>
@@ -6532,12 +6532,12 @@
     </paper>
     <paper id="14">
       <title><fixed-case>META</fixed-case>-<fixed-case>NORD</fixed-case>: Towards Sharing of Language Resources in <fixed-case>N</fixed-case>ordic and <fixed-case>B</fixed-case>altic Countries</title>
-      <author><first>Inguna</first><last>Skadiņa</last></author>
-      <author><first>Andrejs</first><last>Vasiļjevs</last></author>
+      <author id="inguna-skadina"><first>Inguna</first><last>Skadiņa</last></author>
+      <author id="andrejs-vasiljevs"><first>Andrejs</first><last>Vasiļjevs</last></author>
       <author><first>Lars</first><last>Borin</last></author>
-      <author><first>Koenraad</first><last>De Smedt</last></author>
-      <author><first>Krister</first><last>Lindén</last></author>
-      <author><first>Eiríkur</first><last>Rögnvaldsson</last></author>
+      <author id="koenraad-de-smedt"><first>Koenraad</first><last>De Smedt</last></author>
+      <author id="krister-linden"><first>Krister</first><last>Lindén</last></author>
+      <author id="eirikur-rognvaldsson"><first>Eiríkur</first><last>Rögnvaldsson</last></author>
       <pages>107–114</pages>
       <url hash="a4b65bb3">W11-3314</url>
       <bibkey>skadina-etal-2011-meta</bibkey>
@@ -6547,9 +6547,9 @@
     <meta>
       <booktitle>Proceedings of the 9th Workshop on <fixed-case>A</fixed-case>sian Language Resources</booktitle>
       <url hash="c59f4972">W11-34</url>
-      <editor><first>Rachel Edita O.</first><last>Roxas</last></editor>
+      <editor id="rachel-edita-roxas"><first>Rachel Edita O.</first><last>Roxas</last></editor>
       <editor><first>Sarmad</first><last>Hussain</last></editor>
-      <editor><first>Key-Sun</first><last>Choi</last></editor>
+      <editor id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></editor>
       <publisher>Asian Federation of Natural Language Processing</publisher>
       <address>Chiang Mai, Thailand</address>
       <month>November</month>
@@ -6601,7 +6601,7 @@
       <author><first>Rahul</first><last>Agarwal</last></author>
       <author><first>Mridul</first><last>Gupta</last></author>
       <author><first>Samar</first><last>Husain</last></author>
-      <author><first>Dipti Misra</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></author>
       <pages>23–30</pages>
       <url hash="15bef63e">W11-3405</url>
       <bibkey>ambati-etal-2011-error</bibkey>
@@ -6616,7 +6616,7 @@
     </paper>
     <paper id="7">
       <title>Feasibility of Leveraging Crowd Sourcing for the Creation of a Large Scale Annotated Resource for <fixed-case>H</fixed-case>indi <fixed-case>E</fixed-case>nglish Code Switched Data: A Pilot Annotation</title>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <author><first>Ankit</first><last>Kamboj</last></author>
       <pages>36–40</pages>
       <url hash="f34bad57">W11-3407</url>
@@ -6640,7 +6640,7 @@
     </paper>
     <paper id="10">
       <title><fixed-case>P</fixed-case>hilippine Languages Online Corpora: Status, issues, and prospects</title>
-      <author><first>Shirley</first><last>Dita</last></author>
+      <author id="shirley-dita"><first>Shirley</first><last>Dita</last></author>
       <author><first>Rachel Edita</first><last>Roxas</last></author>
       <pages>59–62</pages>
       <url hash="be987f36">W11-3410</url>
@@ -6657,7 +6657,7 @@
     </paper>
     <paper id="12">
       <title>Towards a Computational Semantic Analyzer for <fixed-case>U</fixed-case>rdu</title>
-      <author><first>Annette</first><last>Hautli</last></author>
+      <author id="annette-hautli"><first>Annette</first><last>Hautli</last></author>
       <author><first>Miriam</first><last>Butt</last></author>
       <pages>71–78</pages>
       <url hash="ac49d9ec">W11-3412</url>
@@ -6666,7 +6666,7 @@
     <paper id="13">
       <title>Word Disambiguation in Shahmukhi to Gurmukhi Transliteration</title>
       <author><first>Tejinder Singh</first><last>Saini</last></author>
-      <author><first>Gurpreet Singh</first><last>Lehal</last></author>
+      <author id="gurpreet-singh-lehal"><first>Gurpreet Singh</first><last>Lehal</last></author>
       <pages>79–87</pages>
       <url hash="cd951312">W11-3413</url>
       <bibkey>saini-lehal-2011-word</bibkey>
@@ -6678,7 +6678,7 @@
       <url hash="abd7a889">W11-35</url>
       <editor><first>Hideto</first><last>Kazawa</last></editor>
       <editor><first>Hisami</first><last>Suzuki</last></editor>
-      <editor><first>Taku</first><last>Kudo</last></editor>
+      <editor id="taku-kudo"><first>Taku</first><last>Kudo</last></editor>
       <publisher>Asian Federation of Natural Language Processing</publisher>
       <address>Chiang Mai, Thailand</address>
       <month>November</month>
@@ -6724,7 +6724,7 @@
       <author><first>Asad</first><last>Habib</last></author>
       <author><first>Masakazu</first><last>Iwatate</last></author>
       <author><first>Masayuki</first><last>Asahara</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>26–30</pages>
       <url hash="1bf0c118">W11-3504</url>
       <bibkey>habib-etal-2011-different</bibkey>
@@ -6744,7 +6744,7 @@
       <author><first>Seiji</first><last>Kasahara</last></author>
       <author><first>Mamoru</first><last>Komachi</last></author>
       <author><first>Masaaki</first><last>Nagata</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>38–42</pages>
       <url hash="a4fab4cc">W11-3506</url>
       <bibkey>kasahara-etal-2011-error</bibkey>
@@ -6767,11 +6767,11 @@
     </paper>
     <paper id="9">
       <title>Robustness Analysis of Adaptive <fixed-case>C</fixed-case>hinese Input Methods</title>
-      <author><first>Mike Tian-Jian</first><last>Jiang</last></author>
+      <author id="mike-tian-jian-jiang"><first>Mike Tian-Jian</first><last>Jiang</last></author>
       <author><first>Cheng-Wei</first><last>Lee</last></author>
       <author><first>Chad</first><last>Liu</last></author>
       <author><first>Yung-Chun</first><last>Chang</last></author>
-      <author><first>Wen-Lian</first><last>Hsu</last></author>
+      <author id="wen-lian-hsu"><first>Wen-Lian</first><last>Hsu</last></author>
       <pages>53–61</pages>
       <url hash="5d2d4c2d">W11-3509</url>
       <bibkey>jiang-etal-2011-robustness</bibkey>
@@ -6782,7 +6782,7 @@
       <booktitle>Proceedings of the Fifth International Workshop On Cross Lingual Information Access</booktitle>
       <url hash="2fb6d946">W11-36</url>
       <editor><first>Asif</first><last>Ekbal</last></editor>
-      <editor><first>Deyi</first><last>Xiong</last></editor>
+      <editor id="deyi-xiong"><first>Deyi</first><last>Xiong</last></editor>
       <publisher>Asian Federation of Natural Language Processing</publisher>
       <address>Chiang Mai, Thailand</address>
       <month>November</month>
@@ -6803,7 +6803,7 @@
     <paper id="2">
       <title>Using Explicit Semantic Analysis for Cross-Lingual Link Discovery</title>
       <author><first>Petr</first><last>Knoth</last></author>
-      <author><first>Lukas</first><last>Zilka</last></author>
+      <author id="lukas-zilka"><first>Lukas</first><last>Zilka</last></author>
       <author><first>Zdenek</first><last>Zdrahal</last></author>
       <pages>2–10</pages>
       <url hash="573c14c8">W11-3602</url>
@@ -6828,7 +6828,7 @@
       <title>Soundex-based Translation Correction in <fixed-case>U</fixed-case>rdu–<fixed-case>E</fixed-case>nglish Cross-Language Information Retrieval</title>
       <author><first>Manaal</first><last>Faruqui</last></author>
       <author><first>Prasenjit</first><last>Majumder</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <pages>25–29</pages>
       <url hash="dd90ae24">W11-3605</url>
       <bibkey>faruqui-etal-2011-soundex</bibkey>
@@ -6839,7 +6839,7 @@
       <author><first>Erik</first><last>Peterson</last></author>
       <author><first>John</first><last>Chen</last></author>
       <author><first>Yana</first><last>Petrova</last></author>
-      <author><first>Rohini</first><last>Srihari</last></author>
+      <author id="rohini-k-srihari"><first>Rohini</first><last>Srihari</last></author>
       <pages>30–34</pages>
       <url hash="125eb334">W11-3606</url>
       <bibkey>yang-etal-2011-unsupervised-russian</bibkey>
@@ -6856,8 +6856,8 @@
     <meta>
       <booktitle>Proceedings of the Workshop on Sentiment Analysis where <fixed-case>AI</fixed-case> meets Psychology (<fixed-case>SAAIP</fixed-case> 2011)</booktitle>
       <url hash="27a28ddc">W11-37</url>
-      <editor><first>Sivaji</first><last>Bandyopadhyay</last></editor>
-      <editor><first>Manabu</first><last>Okumura</last></editor>
+      <editor id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></editor>
+      <editor id="manabu-okumura"><first>Manabu</first><last>Okumura</last></editor>
       <publisher>Asian Federation of Natural Language Processing</publisher>
       <address>Chiang Mai, Thailand</address>
       <month>November</month>
@@ -6870,7 +6870,7 @@
     </frontmatter>
     <paper id="1">
       <title>Invited Keynote: What are Subjectivity, Sentiment, and Affect?</title>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>1</pages>
       <url hash="68960f9a">W11-3701</url>
       <bibkey>hovy-2011-invited</bibkey>
@@ -6895,7 +6895,7 @@
       <title>Towards automatic detection of antisocial behavior from texts</title>
       <author><first>Myriam</first><last>Munezero</last></author>
       <author><first>Tuomo</first><last>Kakkonen</last></author>
-      <author><first>Calkin</first><last>Montero</last></author>
+      <author id="calkin-s-montero"><first>Calkin</first><last>Montero</last></author>
       <pages>20–27</pages>
       <url hash="5f576070">W11-3704</url>
       <bibkey>munezero-etal-2011-towards</bibkey>
@@ -6905,7 +6905,7 @@
       <author><first>Leila</first><last>Amgoud</last></author>
       <author><first>Florence</first><last>Bannay</last></author>
       <author><first>Charlotte</first><last>Costedoat</last></author>
-      <author><first>Patrick</first><last>Saint-Dizier</last></author>
+      <author id="patrick-saint-dizier"><first>Patrick</first><last>Saint-Dizier</last></author>
       <author><first>Camille</first><last>Albert</last></author>
       <pages>28–34</pages>
       <url hash="c30af72d">W11-3705</url>
@@ -6923,8 +6923,8 @@
     <paper id="7">
       <title>Sense-level Subjectivity in a Multilingual Setting</title>
       <author><first>Carmen</first><last>Banea</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <pages>44–50</pages>
       <url hash="25238a3d">W11-3707</url>
       <bibkey>banea-etal-2011-sense</bibkey>
@@ -7003,7 +7003,7 @@
     <meta>
       <booktitle>Proceedings of the Second Workshop on Statistical Parsing of Morphologically Rich Languages</booktitle>
       <url hash="ec03450e">W11-38</url>
-      <editor><first>Djamé</first><last>Seddah</last></editor>
+      <editor id="djame-seddah"><first>Djamé</first><last>Seddah</last></editor>
       <editor><first>Reut</first><last>Tsarfaty</last></editor>
       <editor><first>Jennifer</first><last>Foster</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -7018,8 +7018,8 @@
     </frontmatter>
     <paper id="1">
       <title>Statistical Dependency Parsing in <fixed-case>K</fixed-case>orean: From Corpus Generation To Automatic Parsing</title>
-      <author><first>Jinho D.</first><last>Choi</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>1–11</pages>
       <url hash="199e626f">W11-3801</url>
       <bibkey>choi-palmer-2011-statistical</bibkey>
@@ -7028,7 +7028,7 @@
       <title>Morphological Features for Parsing Morphologically-rich Languages: A Case of <fixed-case>A</fixed-case>rabic</title>
       <author><first>Jon</first><last>Dehdari</last></author>
       <author><first>Lamia</first><last>Tounsi</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>12–21</pages>
       <url hash="be17fb62">W11-3802</url>
       <bibkey>dehdari-etal-2011-morphological</bibkey>
@@ -7036,8 +7036,8 @@
     <paper id="3">
       <title><fixed-case>F</fixed-case>rench parsing enhanced with a word clustering method based on a syntactic lexicon</title>
       <author><first>Anthony</first><last>Sigogne</last></author>
-      <author><first>Matthieu</first><last>Constant</last></author>
-      <author><first>Éric</first><last>Laporte</last></author>
+      <author id="matthieu-constant"><first>Matthieu</first><last>Constant</last></author>
+      <author id="eric-laporte"><first>Éric</first><last>Laporte</last></author>
       <pages>22–27</pages>
       <url hash="8275a68e">W11-3803</url>
       <bibkey>sigogne-etal-2011-french</bibkey>
@@ -7045,8 +7045,8 @@
     <paper id="4">
       <title>Testing the Effect of Morphological Disambiguation in Dependency Parsing of <fixed-case>B</fixed-case>asque</title>
       <author><first>Kepa</first><last>Bengoetxea</last></author>
-      <author><first>Arantza</first><last>Casillas</last></author>
-      <author><first>Koldo</first><last>Gojenola</last></author>
+      <author id="arantza-casillas"><first>Arantza</first><last>Casillas</last></author>
+      <author id="koldo-gojenola"><first>Koldo</first><last>Gojenola</last></author>
       <pages>28–33</pages>
       <url hash="9e372104">W11-3804</url>
       <bibkey>bengoetxea-etal-2011-testing</bibkey>
@@ -7054,7 +7054,7 @@
     <paper id="5">
       <title>Discontinuous Data-Oriented Parsing: A mildly context-sensitive all-fragments grammar</title>
       <author><first>Andreas</first><last>van Cranenburgh</last></author>
-      <author><first>Remko</first><last>Scha</last></author>
+      <author id="remko-scha"><first>Remko</first><last>Scha</last></author>
       <author><first>Federico</first><last>Sangati</last></author>
       <pages>34–44</pages>
       <url hash="3d96f6f8">W11-3805</url>
@@ -7062,7 +7062,7 @@
     </paper>
     <paper id="6">
       <title>Multiword Expressions in Statistical Dependency Parsing</title>
-      <author><first>Gülşen</first><last>Eryiğit</last></author>
+      <author id="gulsen-eryigit"><first>Gülşen</first><last>Eryiğit</last></author>
       <author><first>Tugay</first><last>İlbay</last></author>
       <author><first>Ozan Arkan</first><last>Can</last></author>
       <pages>45–55</pages>
@@ -7072,7 +7072,7 @@
     <paper id="7">
       <title>Linguistically Rich Graph Based Data Driven Parsing For <fixed-case>H</fixed-case>indi</title>
       <author><first>Samar</first><last>Husain</last></author>
-      <author><first>Raghu Pujitha</first><last>Gade</last></author>
+      <author id="raghu-pujitha-gade"><first>Raghu Pujitha</first><last>Gade</last></author>
       <author><first>Rajeev</first><last>Sangal</last></author>
       <pages>56–61</pages>
       <url hash="6235c2de">W11-3807</url>
@@ -7090,8 +7090,8 @@
     <meta>
       <booktitle>Proceedings of Workshop on Robust Unsupervised and Semisupervised Methods in Natural Language Processing</booktitle>
       <url hash="c7aeb259">W11-39</url>
-      <editor><first>Chris</first><last>Biemann</last></editor>
-      <editor><first>Anders</first><last>Søgaard</last></editor>
+      <editor id="chris-biemann"><first>Chris</first><last>Biemann</last></editor>
+      <editor id="anders-sogaard"><first>Anders</first><last>Søgaard</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Hissar, Bulgaria</address>
       <month>September</month>
@@ -7105,7 +7105,7 @@
     <paper id="1">
       <title><fixed-case>G</fixed-case>ibbs Sampling with Treeness Constraint in Unsupervised Dependency Parsing</title>
       <author><first>David</first><last>Mareček</last></author>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
       <pages>1–8</pages>
       <url hash="ada37c51">W11-3901</url>
       <bibkey>marecek-zabokrtsky-2011-gibbs</bibkey>
@@ -7114,7 +7114,7 @@
       <title>Guided Self Training for Sentiment Classification</title>
       <author><first>Brett</first><last>Drury</last></author>
       <author><first>Luís</first><last>Torgo</last></author>
-      <author><first>Jose Joao</first><last>Almeida</last></author>
+      <author id="jose-joao-almeida"><first>Jose Joao</first><last>Almeida</last></author>
       <pages>9–16</pages>
       <url hash="1377edfc">W11-3902</url>
       <bibkey>drury-etal-2011-guided</bibkey>
@@ -7152,7 +7152,7 @@
     </paper>
     <paper id="6">
       <title>Investigation of Co-training Views and Variations for Semantic Role Labeling</title>
-      <author><first>Rasoul</first><last>Samad Zadeh Kaljahi</last></author>
+      <author id="rasoul-samad-zadeh-kaljahi"><first>Rasoul</first><last>Samad Zadeh Kaljahi</last></author>
       <author><first>Mohd Sapiyan</first><last>Baba</last></author>
       <pages>41–49</pages>
       <url hash="afa5d007">W11-3906</url>
@@ -7163,10 +7163,10 @@
     <meta>
       <booktitle>Proceedings of the <fixed-case>RANLP</fixed-case> 2011 Workshop on Information Extraction and Knowledge Acquisition</booktitle>
       <url hash="b6dd02eb">W11-40</url>
-      <editor><first>Preslav</first><last>Nakov</last></editor>
+      <editor id="preslav-nakov"><first>Preslav</first><last>Nakov</last></editor>
       <editor><first>Zornitsa</first><last>Kozareva</last></editor>
       <editor><first>Kuzman</first><last>Ganchev</last></editor>
-      <editor><first>Jerry</first><last>Hobbs</last></editor>
+      <editor id="jerry-r-hobbs"><first>Jerry</first><last>Hobbs</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Hissar, Bulgaria</address>
       <month>September</month>
@@ -7179,7 +7179,7 @@
     </frontmatter>
     <paper id="1">
       <title>INVITED TALK 1: The Knowledge Base Population Task: Challenges for Information Extraction</title>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <pages>1</pages>
       <url hash="52a7836f">W11-4001</url>
       <bibkey>grishman-2011-invited</bibkey>
@@ -7187,7 +7187,7 @@
     <paper id="2">
       <title>Fine-grained Entity Set Refinement with User Feedback</title>
       <author><first>Bonan</first><last>Min</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <pages>2–6</pages>
       <url hash="7b3628ef">W11-4002</url>
       <bibkey>min-grishman-2011-fine</bibkey>
@@ -7195,7 +7195,7 @@
     <paper id="3">
       <title>Extraction of Domain-specific Opinion Words for Similar Domains</title>
       <author><first>Ilia</first><last>Chetviorkin</last></author>
-      <author><first>Natalia</first><last>Loukachevitch</last></author>
+      <author id="natalia-loukachevitch"><first>Natalia</first><last>Loukachevitch</last></author>
       <pages>7–12</pages>
       <url hash="3854b282">W11-4003</url>
       <bibkey>chetviorkin-loukachevitch-2011-extraction</bibkey>
@@ -7219,14 +7219,14 @@
     <paper id="6">
       <title>How to Distinguish a Kidney Theft from a Death Car? Experiments in Clustering Urban-Legend Texts</title>
       <author><first>Roman</first><last>Grundkiewicz</last></author>
-      <author><first>Filip</first><last>Graliński</last></author>
+      <author id="filip-gralinski"><first>Filip</first><last>Graliński</last></author>
       <pages>29–36</pages>
       <url hash="8b0d3e77">W11-4006</url>
       <bibkey>grundkiewicz-gralinski-2011-distinguish</bibkey>
     </paper>
     <paper id="7">
       <title>Machine Reading Between the Lines: A Simple Evaluation Framework for Extracted Knowledge Bases</title>
-      <author><first>Avirup</first><last>Sil</last></author>
+      <author id="avirup-sil"><first>Avirup</first><last>Sil</last></author>
       <author><first>Alexander</first><last>Yates</last></author>
       <pages>37–40</pages>
       <url hash="69aff865">W11-4007</url>
@@ -7235,7 +7235,7 @@
     <paper id="8">
       <title>Temporal Expressions Extraction in <fixed-case>SMS</fixed-case> messages</title>
       <author><first>Stéphanie</first><last>Weiser</last></author>
-      <author><first>Louis-Amélie</first><last>Cougnon</last></author>
+      <author id="louise-amelie-cougnon"><first>Louis-Amélie</first><last>Cougnon</last></author>
       <author><first>Patrick</first><last>Watrin</last></author>
       <pages>41–44</pages>
       <url hash="451f4d9f">W11-4008</url>
@@ -7256,7 +7256,7 @@
       <editor><first>Cristina</first><last>Vertan</last></editor>
       <editor><first>Milena</first><last>Slavcheva</last></editor>
       <editor><first>Petya</first><last>Osenova</last></editor>
-      <editor><first>Stelios</first><last>Piperidis</last></editor>
+      <editor id="stelios-piperidis"><first>Stelios</first><last>Piperidis</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Hissar, Bulgaria</address>
       <month>September</month>
@@ -7269,7 +7269,7 @@
     </frontmatter>
     <paper id="1">
       <title>Endangered <fixed-case>U</fixed-case>ralic Languages and Language Technologies</title>
-      <author><first>Gábor</first><last>Prószéky</last></author>
+      <author id="gabor-proszeky"><first>Gábor</first><last>Prószéky</last></author>
       <pages>1–2</pages>
       <url hash="55f70d84">W11-4101</url>
       <bibkey>proszeky-2011-endangered</bibkey>
@@ -7287,7 +7287,7 @@
     <paper id="3">
       <title>Query classification via Topic Models for an art image archive</title>
       <author><first>Dieu-Thu</first><last>Le</last></author>
-      <author><first>Raffaella</first><last>Bernardi</last></author>
+      <author id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last></author>
       <author><first>Ed</first><last>Vald</last></author>
       <pages>11–18</pages>
       <url hash="3a56fd8b">W11-4103</url>
@@ -7331,7 +7331,7 @@
       <title>Language Technology Support for Semantic Annotation of Icono-graphic Descriptions</title>
       <author><first>Kamenka</first><last>Staykova</last></author>
       <author><first>Gennady</first><last>Agre</last></author>
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <author><first>Petya</first><last>Osenova</last></author>
       <pages>51–56</pages>
       <url hash="d0043790">W11-4108</url>
@@ -7365,8 +7365,8 @@
     </paper>
     <paper id="12">
       <title>Diachronic Stylistic Changes in <fixed-case>B</fixed-case>ritish and <fixed-case>A</fixed-case>merican Varieties of 20th Century Written <fixed-case>E</fixed-case>nglish Language</title>
-      <author><first>Sanja</first><last>Štajner</last></author>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="sanja-stajner"><first>Sanja</first><last>Štajner</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <pages>78–85</pages>
       <url hash="e195ef82">W11-4112</url>
       <bibkey>stajner-mitkov-2011-diachronic</bibkey>
@@ -7378,7 +7378,7 @@
       <author><first>Rolf</first><last>Bardeli</last></author>
       <author><first>Oliver</first><last>Schreer</last></author>
       <author><first>Stefano</first><last>Masneri</last></author>
-      <author><first>Peter</first><last>Wittenburg</last></author>
+      <author id="peter-wittenburg"><first>Peter</first><last>Wittenburg</last></author>
       <author><first>Han</first><last>Sloetjes</last></author>
       <author><first>Przemek</first><last>Lenkiewicz</last></author>
       <author><first>Eric</first><last>Auer</last></author>
@@ -7389,10 +7389,10 @@
     <paper id="14">
       <title>Handwritten Text Recognition for Historical Documents</title>
       <author><first>Verónica</first><last>Romero</last></author>
-      <author><first>Nicolás</first><last>Serrano</last></author>
-      <author><first>Alejandro H.</first><last>Toselli</last></author>
-      <author><first>Joan Andreu</first><last>Sánchez</last></author>
-      <author><first>Enrique</first><last>Vidal</last></author>
+      <author id="nicolas-serrano"><first>Nicolás</first><last>Serrano</last></author>
+      <author id="alejandro-h-toselli"><first>Alejandro H.</first><last>Toselli</last></author>
+      <author id="joan-andreu-sanchez"><first>Joan Andreu</first><last>Sánchez</last></author>
+      <author id="enrique-vidal"><first>Enrique</first><last>Vidal</last></author>
       <pages>90–96</pages>
       <url hash="5a71eefe">W11-4114</url>
       <bibkey>romero-etal-2011-handwritten</bibkey>
@@ -7411,7 +7411,7 @@
       <booktitle>Proceedings of the Second Workshop on Biomedical Natural Language Processing</booktitle>
       <url hash="0e7e4586">W11-42</url>
       <editor><first>Guergana</first><last>Savova</last></editor>
-      <editor><first>Kevin Bretonnel</first><last>Cohen</last></editor>
+      <editor id="k-bretonnel-cohen"><first>Kevin Bretonnel</first><last>Cohen</last></editor>
       <editor><first>Galia</first><last>Angelova</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Hissar, Bulgaria</address>
@@ -7457,7 +7457,7 @@
       <author><first>Georgi</first><last>Georgiev</last></author>
       <author><first>Valentin</first><last>Zhikov</last></author>
       <author><first>Borislav</first><last>Popov</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>27–34</pages>
       <url hash="3763a8ea">W11-4205</url>
       <bibkey>georgiev-etal-2011-building</bibkey>
@@ -7475,9 +7475,9 @@
       <author><first>Caroline</first><last>Hagège</last></author>
       <author><first>Quentin</first><last>Gicquel</last></author>
       <author><first>Suzanne</first><last>Pereira</last></author>
-      <author><first>Stefan</first><last>Darmoni</last></author>
-      <author><first>Frédérique</first><last>Segond</last></author>
-      <author><first>Marie-Hélène</first><last>Metzger</last></author>
+      <author id="stefan-darmoni"><first>Stefan</first><last>Darmoni</last></author>
+      <author id="frederique-segond"><first>Frédérique</first><last>Segond</last></author>
+      <author id="marie-helene-metzger"><first>Marie-Hélène</first><last>Metzger</last></author>
       <pages>43–48</pages>
       <url hash="68b1368a">W11-4207</url>
       <bibkey>proux-etal-2011-architecture</bibkey>
@@ -7495,10 +7495,10 @@
     <meta>
       <booktitle>Proceedings of the Second Workshop on Annotation and Exploitation of Parallel Corpora</booktitle>
       <url hash="1690c4e2">W11-43</url>
-      <editor><first>Kiril</first><last>Simov</last></editor>
+      <editor id="kiril-simov"><first>Kiril</first><last>Simov</last></editor>
       <editor><first>Petya</first><last>Osenova</last></editor>
-      <editor><first>Jörg</first><last>Tiedemann</last></editor>
-      <editor><first>Radovan</first><last>Garabik</last></editor>
+      <editor id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></editor>
+      <editor id="radovan-garabik"><first>Radovan</first><last>Garabik</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Hissar, Bulgaria</address>
       <month>September</month>
@@ -7511,7 +7511,7 @@
     </frontmatter>
     <paper id="1">
       <title>Reusing Parallel Corpora between Related Languages</title>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>1</pages>
       <url hash="9dc15bb7">W11-4301</url>
       <bibkey>nakov-2011-reusing</bibkey>
@@ -7519,7 +7519,7 @@
     <paper id="2">
       <title>Discontinuous Constituents: a Problematic Case for Parallel Corpora Annotation and Querying</title>
       <author><first>Marilisa</first><last>Amoia</last></author>
-      <author><first>Kerstin</first><last>Kunz</last></author>
+      <author id="kerstin-kunz"><first>Kerstin</first><last>Kunz</last></author>
       <author><first>Ekaterina</first><last>Lapshinova-Koltunski</last></author>
       <pages>2–10</pages>
       <url hash="94c4c438">W11-4302</url>
@@ -7582,7 +7582,7 @@
       <booktitle>Proceedings of the 9th International Workshop on Finite State Methods and Natural Language Processing</booktitle>
       <url hash="795ad3fe">W11-44</url>
       <editor><first>Andreas</first><last>Maletti</last></editor>
-      <editor><first>Matthieu</first><last>Constant</last></editor>
+      <editor id="matthieu-constant"><first>Matthieu</first><last>Constant</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Blois, France</address>
       <month>July</month>
@@ -7616,8 +7616,8 @@
     </paper>
     <paper id="4">
       <title>Supervised and Semi-Supervised Sequence Learning for Recognition of Requisite Part and Effectuation Part in Law Sentences</title>
-      <author><first>Le-Minh</first><last>Nguyen</last></author>
-      <author><first>Ngo Xuan</first><last>Bach</last></author>
+      <author id="minh-le-nguyen"><first>Le-Minh</first><last>Nguyen</last></author>
+      <author id="ngo-xuan-bach"><first>Ngo Xuan</first><last>Bach</last></author>
       <author><first>Akira</first><last>Shimazu</last></author>
       <pages>21–29</pages>
       <url hash="789289c8">W11-4404</url>
@@ -7641,7 +7641,7 @@
       <title><fixed-case>E</fixed-case>-Dictionaries and Finite-State Automata for the Recognition of Named Entities</title>
       <author><first>Cvetana</first><last>Krstev</last></author>
       <author><first>Duško</first><last>Vitas</last></author>
-      <author><first>Ivan</first><last>Obradović</last></author>
+      <author id="ivan-obradovic"><first>Ivan</first><last>Obradović</last></author>
       <author><first>Miloš</first><last>Utvić</last></author>
       <pages>48–56</pages>
       <url hash="87e1702b">W11-4407</url>
@@ -7657,7 +7657,7 @@
     <paper id="9">
       <title>Open Source <fixed-case>WFST</fixed-case> Tools for <fixed-case>LVCSR</fixed-case> Cascade Development</title>
       <author><first>Josef R.</first><last>Novak</last></author>
-      <author><first>Nobuaki</first><last>Minematsu</last></author>
+      <author id="nobuaki-minematsu"><first>Nobuaki</first><last>Minematsu</last></author>
       <author><first>Keikichi</first><last>Hirose</last></author>
       <pages>65–73</pages>
       <url hash="1c62e880">W11-4409</url>
@@ -7665,7 +7665,7 @@
     </paper>
     <paper id="10">
       <title>Intersection of Multitape Transducers vs. Cascade of Binary Transducers: The Example of <fixed-case>E</fixed-case>gyptian Hieroglyphs Transliteration</title>
-      <author><first>François</first><last>Barthélemy</last></author>
+      <author id="francois-barthelemy"><first>François</first><last>Barthélemy</last></author>
       <author><first>Serge</first><last>Rosmorduc</last></author>
       <pages>74–82</pages>
       <url hash="dfe2855a">W11-4410</url>
@@ -7681,7 +7681,7 @@
     <paper id="12">
       <title><fixed-case>FT</fixed-case>race: A Tool for Finite-State Morphology</title>
       <author><first>James</first><last>Kilbury</last></author>
-      <author><first>Katina</first><last>Bontcheva</last></author>
+      <author id="katina-bontcheva"><first>Katina</first><last>Bontcheva</last></author>
       <author><first>Younes</first><last>Samih</last></author>
       <pages>88–92</pages>
       <url hash="a84aa180">W11-4412</url>
@@ -7699,8 +7699,8 @@
     </paper>
     <paper id="14">
       <title>Stochastic <fixed-case>K</fixed-case>-<fixed-case>TSS</fixed-case> Bi-Languages for Machine Translation</title>
-      <author><first>M. Inés</first><last>Torres</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="m-ines-torres"><first>M. Inés</first><last>Torres</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <pages>98–106</pages>
       <url hash="7ec776f4">W11-4414</url>
       <bibkey>torres-casacuberta-2011-stochastic</bibkey>
@@ -7709,16 +7709,16 @@
       <title>Measuring the Confusability of Pronunciations in Speech Recognition</title>
       <author><first>Panagiota</first><last>Karanasou</last></author>
       <author><first>François</first><last>Yvon</last></author>
-      <author><first>Lori</first><last>Lamel</last></author>
+      <author id="lori-lamel"><first>Lori</first><last>Lamel</last></author>
       <pages>107–115</pages>
       <url hash="44151786">W11-4415</url>
       <bibkey>karanasou-etal-2011-measuring</bibkey>
     </paper>
     <paper id="16">
       <title>Fast Yet Rich Morphological Analysis</title>
-      <author><first>Mohamed</first><last>Altantawy</last></author>
+      <author id="mohamed-altantawy"><first>Mohamed</first><last>Altantawy</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>116–124</pages>
       <url hash="7cedc858">W11-4416</url>
       <bibkey>altantawy-etal-2011-fast</bibkey>
@@ -7729,7 +7729,7 @@
       <author><first>Pavel</first><last>Pecina</last></author>
       <author><first>Antonio</first><last>Toral</last></author>
       <author><first>Lamia</first><last>Tounsi</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>125–133</pages>
       <url hash="4ad5d024">W11-4417</url>
       <bibkey>attia-etal-2011-open</bibkey>
@@ -7738,7 +7738,7 @@
       <title>Recognition and Translation of <fixed-case>A</fixed-case>rabic Named Entities with <fixed-case>N</fixed-case>oo<fixed-case>J</fixed-case> Using a New Representation Model</title>
       <author><first>Héla</first><last>Fehri</last></author>
       <author><first>Kais</first><last>Haddar</last></author>
-      <author><first>Abdelmajid</first><last>Ben Hamadou</last></author>
+      <author id="abdelmajid-ben-hamadou"><first>Abdelmajid</first><last>Ben Hamadou</last></author>
       <pages>134–142</pages>
       <url hash="dfdd35af">W11-4418</url>
       <bibkey>fehri-etal-2011-recognition</bibkey>
@@ -7756,7 +7756,7 @@
     </frontmatter>
     <paper id="1">
       <title>Multi-Document Discourse Parsing Using Traditional and Hierarchical Machine Learning</title>
-      <author><first>Erick Galani</first><last>Maziero</last></author>
+      <author id="erick-galani-maziero"><first>Erick Galani</first><last>Maziero</last></author>
       <author><first>Thiago Alexandre Salgueiro</first><last>Pardo</last></author>
       <url hash="a32ba389">W11-4501</url>
       <bibkey>maziero-pardo-2011-multi</bibkey>
@@ -7770,15 +7770,15 @@
     </paper>
     <paper id="3">
       <title><fixed-case>V</fixed-case>erb<fixed-case>N</fixed-case>et.<fixed-case>B</fixed-case>r: construção semiautomática de um léxico computacional de verbos para o português do Brasil (<fixed-case>V</fixed-case>erb<fixed-case>N</fixed-case>et.<fixed-case>B</fixed-case>r: semiautomatic construction of a computational verb lexicon for <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese) [in <fixed-case>P</fixed-case>ortuguese]</title>
-      <author><first>Carolina Evaristo</first><last>Scarton</last></author>
+      <author id="carolina-scarton"><first>Carolina Evaristo</first><last>Scarton</last></author>
       <url hash="1df21dcb">W11-4503</url>
       <bibkey>scarton-2011-verbnet</bibkey>
     </paper>
     <paper id="4">
       <title>Comparando Avaliações de Inteligibilidade Textual entre Originais e Traduções de Textos Literários (Comparing Textual Intelligibility Evaluations among Literary Source Texts and their Translations) [in <fixed-case>P</fixed-case>ortuguese]</title>
       <author><first>Bianca Franco</first><last>Pasqualini</last></author>
-      <author><first>Carolina Evaristo</first><last>Scarton</last></author>
-      <author><first>Maria José B.</first><last>Finatto</last></author>
+      <author id="carolina-scarton"><first>Carolina Evaristo</first><last>Scarton</last></author>
+      <author id="maria-jose-b-finatto"><first>Maria José B.</first><last>Finatto</last></author>
       <url hash="e58f69db">W11-4504</url>
       <bibkey>pasqualini-etal-2011-comparando</bibkey>
     </paper>
@@ -7793,17 +7793,17 @@
     </paper>
     <paper id="6">
       <title>Características do jornalismo popular: avaliação da inteligibilidade e auxílio à descrição do gênero (Characteristics of Popular News: the Evaluation of Intelligibility and Support to the Genre Description) [in <fixed-case>P</fixed-case>ortuguese]</title>
-      <author><first>Maria José B.</first><last>Finatto</last></author>
-      <author><first>Carolina Evaristo</first><last>Scarton</last></author>
+      <author id="maria-jose-b-finatto"><first>Maria José B.</first><last>Finatto</last></author>
+      <author id="carolina-scarton"><first>Carolina Evaristo</first><last>Scarton</last></author>
       <author><first>Amanda</first><last>Rocha</last></author>
-      <author><first>Sandra</first><last>Aluísio</last></author>
+      <author id="sandra-aluisio"><first>Sandra</first><last>Aluísio</last></author>
       <url hash="9e4a0442">W11-4506</url>
       <bibkey>finatto-etal-2011-caracteristicas</bibkey>
     </paper>
     <paper id="7">
       <title>Construction of a <fixed-case>P</fixed-case>ortuguese Opinion Lexicon from multiple resources</title>
       <author><first>Marlo</first><last>Souza</last></author>
-      <author><first>Renata</first><last>Vieira</last></author>
+      <author id="renata-vieira"><first>Renata</first><last>Vieira</last></author>
       <author><first>Débora</first><last>Busetti</last></author>
       <author><first>Rove</first><last>Chishman</last></author>
       <author><first>Isa Mara</first><last>Alves</last></author>
@@ -7813,14 +7813,14 @@
     <paper id="8">
       <title>Using machine learning methods to avoid the pitfall of cognates and false friends in <fixed-case>S</fixed-case>panish-<fixed-case>P</fixed-case>ortuguese word pairs</title>
       <author><first>Lianet</first><last>Sepúlveda Torres</last></author>
-      <author><first>Sandra Maria</first><last>Aluísio</last></author>
+      <author id="sandra-aluisio"><first>Sandra Maria</first><last>Aluísio</last></author>
       <url hash="ac6870cc">W11-4508</url>
       <bibkey>sepulveda-torres-aluisio-2011-using</bibkey>
     </paper>
     <paper id="9">
       <title>Análise automática de aspectos relacionados a coerência semântica em resumos acadêmicos (Automatic Analysis of Semantic Coherence Aspects in Academic Abstracts) [in <fixed-case>P</fixed-case>ortuguese]</title>
-      <author><first>Vinícius Mourão Alves</first><last>de Souza</last></author>
-      <author><first>Valéria Delisandra</first><last>Feltrim</last></author>
+      <author id="vinicius-mourao-alves-de-souza"><first>Vinícius Mourão Alves</first><last>de Souza</last></author>
+      <author id="valeria-delisandra-feltrim"><first>Valéria Delisandra</first><last>Feltrim</last></author>
       <url hash="1c9e8bad">W11-4509</url>
       <bibkey>de-souza-feltrim-2011-analise</bibkey>
     </paper>
@@ -7852,7 +7852,7 @@
     <paper id="13">
       <title>Extração de Contextos Definitórios a partir de Textos em Língua Portuguesa (Extraction of Defining Contexts from Texts in <fixed-case>P</fixed-case>ortuguese) [in <fixed-case>P</fixed-case>ortuguese]</title>
       <author><first>Igor S.</first><last>Wendt</last></author>
-      <author><first>Renata</first><last>Vieira</last></author>
+      <author id="renata-vieira"><first>Renata</first><last>Vieira</last></author>
       <url hash="208a5285">W11-4513</url>
       <bibkey>wendt-vieira-2011-extracao</bibkey>
     </paper>
@@ -7869,16 +7869,16 @@
     <paper id="15">
       <title>Minimização do Impacto do Problema de Desvio de Conceito por Meio de Acoplamento em Ambiente de Aprendizado Sem Fim (Minimizing the Impact of the Concept Drift Problem by Using a Framework of Endless Learning) [in <fixed-case>P</fixed-case>ortuguese]</title>
       <author><first>Maisa Cristina</first><last>Duarte</last></author>
-      <author><first>Estevam R.</first><last>Hruschka Jr.</last></author>
+      <author id="estevam-r-hruschka-jr"><first>Estevam R.</first><last>Hruschka Jr.</last></author>
       <author><first>Maria</first><last>do Carmo Nicoletti</last></author>
       <url hash="4fa94d39">W11-4515</url>
       <bibkey>duarte-etal-2011-minimizacao</bibkey>
     </paper>
     <paper id="16">
       <title>Generating a Pronunciation Dictionary for <fixed-case>E</fixed-case>uropean <fixed-case>P</fixed-case>ortuguese Using a Joint-Sequence Model with Embedded Stress Assignment</title>
-      <author><first>Arlindo</first><last>Veiga</last></author>
+      <author id="arlindo-veiga"><first>Arlindo</first><last>Veiga</last></author>
       <author><first>Sara</first><last>Candeias</last></author>
-      <author><first>Fernando</first><last>Perdigão</last></author>
+      <author id="fernando-perdigao"><first>Fernando</first><last>Perdigão</last></author>
       <url hash="7e6358de">W11-4516</url>
       <bibkey>veiga-etal-2011-generating</bibkey>
     </paper>
@@ -7892,14 +7892,14 @@
     <paper id="18">
       <title>Part-of-Speech Tagging of <fixed-case>P</fixed-case>ortuguese Using Hidden <fixed-case>M</fixed-case>arkov Models with Character Language Model Emissions</title>
       <author><first>Marcelo Rodrigues</first><last>de Holanda Maia</last></author>
-      <author><first>Geraldo Bonorino</first><last>Xexéo</last></author>
+      <author id="geraldo-bonorino-xexeo"><first>Geraldo Bonorino</first><last>Xexéo</last></author>
       <url hash="fce74449">W11-4518</url>
       <bibkey>de-holanda-maia-xexeo-2011-part</bibkey>
     </paper>
     <paper id="19">
       <title><fixed-case>P</fixed-case>ropbank-Br: a <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese corpus annotated with semantic role labels</title>
-      <author><first>Magali Sanches</first><last>Duran</last></author>
-      <author><first>Sandra Maria</first><last>Aluísio</last></author>
+      <author id="magali-sanches-duran"><first>Magali Sanches</first><last>Duran</last></author>
+      <author id="sandra-aluisio"><first>Sandra Maria</first><last>Aluísio</last></author>
       <url hash="533b3427">W11-4519</url>
       <bibkey>duran-aluisio-2011-propbank</bibkey>
     </paper>
@@ -7921,21 +7921,21 @@
     <paper id="22">
       <title><fixed-case>P</fixed-case>or<fixed-case>TA</fixed-case>l: Recursos e Ferramentas de Tradução Automática para o Português do Brasil (<fixed-case>P</fixed-case>or<fixed-case>TA</fixed-case>l: Resources and Tools for Machine Translation of <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese) [in <fixed-case>P</fixed-case>ortuguese]</title>
       <author><first>Thiago Lima</first><last>Vieira</last></author>
-      <author><first>Helena</first><last>de Medeiros Caseli</last></author>
+      <author id="helena-de-medeiros-caseli"><first>Helena</first><last>de Medeiros Caseli</last></author>
       <url hash="1c4a340e">W11-4522</url>
       <bibkey>vieira-de-medeiros-caseli-2011-portal</bibkey>
     </paper>
     <paper id="23">
       <title>The Use of Metrics for Measuring Informality Levels in Web 2.0 Texts</title>
       <author><first>Alejandro</first><last>Mosquera</last></author>
-      <author><first>Paloma</first><last>Moreda</last></author>
+      <author id="paloma-moreda-pozo"><first>Paloma</first><last>Moreda</last></author>
       <url hash="5fa1582a">W11-4523</url>
       <bibkey>mosquera-moreda-2011-use</bibkey>
     </paper>
     <paper id="24">
       <title>Uma abordagem de classificação automática para Tipo de Pergunta e Tipo de Resposta (An Automatic Approach for Classification of Question Type and Answer Type) [in <fixed-case>P</fixed-case>ortuguese]</title>
-      <author><first>Patricia Nunes</first><last>Gonçalves</last></author>
-      <author><first>António Horta</first><last>Branco</last></author>
+      <author id="patricia-goncalves"><first>Patricia Nunes</first><last>Gonçalves</last></author>
+      <author id="antonio-branco"><first>António Horta</first><last>Branco</last></author>
       <url hash="29078f81">W11-4524</url>
       <bibkey>goncalves-branco-2011-uma</bibkey>
     </paper>
@@ -7958,7 +7958,7 @@
       <title>Quotation Extraction for <fixed-case>P</fixed-case>ortuguese</title>
       <author><first>William Paulo Ducca</first><last>Fernandes</last></author>
       <author><first>Eduardo</first><last>Motta</last></author>
-      <author><first>Ruy Luiz</first><last>Milidiú</last></author>
+      <author id="ruy-luiz-milidiu"><first>Ruy Luiz</first><last>Milidiú</last></author>
       <url hash="ec454b0a">W11-4527</url>
       <bibkey>fernandes-etal-2011-quotation</bibkey>
     </paper>
@@ -7988,7 +7988,7 @@
     </paper>
     <paper id="31">
       <title>A Generative Approach for Multi-Document Summarization using Semantic-Discursive information</title>
-      <author><first>Maria Lucía Castro</first><last>Jorge</last></author>
+      <author id="maria-lucia-castro-jorge"><first>Maria Lucía Castro</first><last>Jorge</last></author>
       <author><first>Thiago Alexandre Salgueiro</first><last>Pardo</last></author>
       <url hash="a05f9c85">W11-4531</url>
       <bibkey>jorge-pardo-2011-generative</bibkey>
@@ -8012,9 +8012,9 @@
     <meta>
       <booktitle>Proceedings of the 18th Nordic Conference of Computational Linguistics (<fixed-case>NODALIDA</fixed-case> 2011)</booktitle>
       <url hash="1e445f7e">W11-46</url>
-      <editor><first>Bolette Sandford</first><last>Pedersen</last></editor>
-      <editor><first>Gunta</first><last>Nešpore</last></editor>
-      <editor><first>Inguna</first><last>Skadiņa</last></editor>
+      <editor id="bolette-sandford-pedersen"><first>Bolette Sandford</first><last>Pedersen</last></editor>
+      <editor id="gunta-nespore"><first>Gunta</first><last>Nešpore</last></editor>
+      <editor id="inguna-skadina"><first>Inguna</first><last>Skadiņa</last></editor>
       <publisher>Northern European Association for Language Technology (NEALT)</publisher>
       <address>Riga, Latvia</address>
       <month>May</month>
@@ -8027,7 +8027,7 @@
     </frontmatter>
     <paper id="1">
       <title>Invited Paper: When <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et meets a Controlled Natural Language</title>
-      <author><first>Guntis</first><last>Bārzdiņš</last></author>
+      <author id="guntis-barzdins"><first>Guntis</first><last>Bārzdiņš</last></author>
       <pages>2–5</pages>
       <url hash="bbbb0aba">W11-4601</url>
       <bibkey>barzdins-2011-invited</bibkey>
@@ -8041,15 +8041,15 @@
     </paper>
     <paper id="3">
       <title>Invited Paper: Discourse Structures and Language Technologies</title>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <pages>12–16</pages>
       <url hash="056f8fc1">W11-4603</url>
       <bibkey>webber-2011-invited</bibkey>
     </paper>
     <paper id="4">
       <title>Identification of sense selection in regular polysemy using shallow features</title>
-      <author><first>Héctor Martínez</first><last>Alonso</last></author>
-      <author><first>Núria</first><last>Bel</last></author>
+      <author id="hector-martinez-alonso"><first>Héctor Martínez</first><last>Alonso</last></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last></author>
       <author><first>Bolette</first><last>Sandford Pedersen</last></author>
       <pages>18–25</pages>
       <url hash="55303926">W11-4604</url>
@@ -8066,7 +8066,7 @@
     </paper>
     <paper id="6">
       <title>A <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et for <fixed-case>D</fixed-case>anish</title>
-      <author><first>Eckhard</first><last>Bick</last></author>
+      <author id="eckhard-bick"><first>Eckhard</first><last>Bick</last></author>
       <pages>34–41</pages>
       <url hash="446349b6">W11-4606</url>
       <bibkey>bick-2011-framenet</bibkey>
@@ -8080,7 +8080,7 @@
     </paper>
     <paper id="8">
       <title>The Formal Patterns of the <fixed-case>L</fixed-case>ithuanian Verb Forms</title>
-      <author><first>Loïc</first><last>Boizou</last></author>
+      <author id="loic-boizou"><first>Loïc</first><last>Boizou</last></author>
       <pages>50–57</pages>
       <url hash="c325c453">W11-4608</url>
       <bibkey>boizou-2011-formal</bibkey>
@@ -8111,17 +8111,17 @@
     </paper>
     <paper id="12">
       <title>Experiments on <fixed-case>L</fixed-case>ithuanian Term Extraction</title>
-      <author><first>Gintarė</first><last>Grigonytė</last></author>
+      <author id="gintare-grigonyte"><first>Gintarė</first><last>Grigonytė</last></author>
       <author><first>Erika</first><last>Rimkutė</last></author>
       <author><first>Andrius</first><last>Utka</last></author>
-      <author><first>Loic</first><last>Boizou</last></author>
+      <author id="loic-boizou"><first>Loic</first><last>Boizou</last></author>
       <pages>82–89</pages>
       <url hash="71739005">W11-4612</url>
       <bibkey>grigonyte-etal-2011-experiments</bibkey>
     </paper>
     <paper id="13">
       <title>Fishing in a Speech Stream – Angling for a Lexicon</title>
-      <author><first>Peter Juel</first><last>Henrichsen</last></author>
+      <author id="peter-juel-henrichsen"><first>Peter Juel</first><last>Henrichsen</last></author>
       <pages>90–97</pages>
       <url hash="baeb44d5">W11-4613</url>
       <bibkey>henrichsen-2011-fishing</bibkey>
@@ -8155,15 +8155,15 @@
     </paper>
     <paper id="17">
       <title>What kind of corpus is a web corpus?</title>
-      <author><first>Janne Bondi</first><last>Johannessen</last></author>
-      <author><first>Emiliano Raul</first><last>Guevara</last></author>
+      <author id="janne-bondi-johannessen"><first>Janne Bondi</first><last>Johannessen</last></author>
+      <author id="emiliano-raul-guevara"><first>Emiliano Raul</first><last>Guevara</last></author>
       <pages>122–129</pages>
       <url hash="b0fada4d">W11-4617</url>
       <bibkey>johannessen-guevara-2011-kind</bibkey>
     </paper>
     <paper id="18">
       <title>Morphological analysis of a non-standard language variety</title>
-      <author><first>Heiki-Jaan</first><last>Kaalep</last></author>
+      <author id="heiki-jaan-kaalep"><first>Heiki-Jaan</first><last>Kaalep</last></author>
       <author><first>Kadri</first><last>Muischnek</last></author>
       <pages>130–137</pages>
       <url hash="476b627f">W11-4618</url>
@@ -8171,7 +8171,7 @@
     </paper>
     <paper id="19">
       <title>Editing Syntax Trees on the Surface</title>
-      <author><first>Peter</first><last>Ljunglöf</last></author>
+      <author id="peter-ljunglof"><first>Peter</first><last>Ljunglöf</last></author>
       <pages>138–145</pages>
       <url hash="131c63af">W11-4619</url>
       <bibkey>ljunglof-2011-editing</bibkey>
@@ -8179,7 +8179,7 @@
     <paper id="20">
       <title>Do wordnets also improve human performance on <fixed-case>NLP</fixed-case> tasks?</title>
       <author><first>Kristiina</first><last>Muhonen</last></author>
-      <author><first>Krister</first><last>Lindén</last></author>
+      <author id="krister-linden"><first>Krister</first><last>Lindén</last></author>
       <pages>146–152</pages>
       <url hash="43c0a27c">W11-4620</url>
       <bibkey>muhonen-linden-2011-wordnets</bibkey>
@@ -8187,9 +8187,9 @@
     <paper id="21">
       <title>Creating Comparable Multimodal Corpora for <fixed-case>N</fixed-case>ordic Languages</title>
       <author><first>Costanza</first><last>Navarretta</last></author>
-      <author><first>Elisabeth</first><last>Ahlsén</last></author>
+      <author id="elisabeth-ahlsen"><first>Elisabeth</first><last>Ahlsén</last></author>
       <author><first>Jens</first><last>Allwood</last></author>
-      <author><first>Kristiina</first><last>Jokinen</last></author>
+      <author id="kristiina-jokinen"><first>Kristiina</first><last>Jokinen</last></author>
       <author><first>Patrizia</first><last>Paggio</last></author>
       <pages>153–160</pages>
       <url hash="9ef8a4a2">W11-4621</url>
@@ -8220,8 +8220,8 @@
     </paper>
     <paper id="25">
       <title>Combining Statistical Models for <fixed-case>POS</fixed-case> Tagging using Finite-State Calculus</title>
-      <author><first>Miikka</first><last>Silfverberg</last></author>
-      <author><first>Krister</first><last>Lindén</last></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last></author>
+      <author id="krister-linden"><first>Krister</first><last>Lindén</last></author>
       <pages>183–190</pages>
       <url hash="ee387c53">W11-4625</url>
       <bibkey>silfverberg-linden-2011-combining</bibkey>
@@ -8238,21 +8238,21 @@
     <paper id="27">
       <title>Automatic summarization as means of simplifying texts, an evaluation for <fixed-case>S</fixed-case>wedish</title>
       <author><first>Christian</first><last>Smith</last></author>
-      <author><first>Arne</first><last>Jönsson</last></author>
+      <author id="arne-jonsson"><first>Arne</first><last>Jönsson</last></author>
       <pages>198–205</pages>
       <url hash="94a7285d">W11-4627</url>
       <bibkey>smith-jonsson-2011-automatic</bibkey>
     </paper>
     <paper id="28">
       <title>Using graphical models for <fixed-case>PP</fixed-case> attachment</title>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>206–213</pages>
       <url hash="d8796317">W11-4628</url>
       <bibkey>sogaard-2011-using</bibkey>
     </paper>
     <paper id="29">
       <title>Corrective re-synthesis of deviant speech using unit selection</title>
-      <author><first>Sofia</first><last>Strömbergsson</last></author>
+      <author id="sofia-stronbergsson"><first>Sofia</first><last>Strömbergsson</last></author>
       <pages>214–217</pages>
       <url hash="6f5b7ffd">W11-4629</url>
       <bibkey>strombergsson-2011-corrective</bibkey>
@@ -8355,14 +8355,14 @@
     </paper>
     <paper id="42">
       <title>Knowledge-free Verb Detection through Tag Sequence Alignment</title>
-      <author><first>Christian</first><last>Hänig</last></author>
+      <author id="christian-hanig"><first>Christian</first><last>Hänig</last></author>
       <pages>291–294</pages>
       <url hash="b228179d">W11-4642</url>
       <bibkey>hanig-2011-knowledge</bibkey>
     </paper>
     <paper id="43">
       <title>“Andre ord” – a wordnet browser for the <fixed-case>D</fixed-case>anish wordnet, <fixed-case>D</fixed-case>an<fixed-case>N</fixed-case>et</title>
-      <author><first>Anders</first><last>Johannsen</last></author>
+      <author id="anders-johannsen"><first>Anders</first><last>Johannsen</last></author>
       <author><first>Bolette</first><last>Sandford Pedersen</last></author>
       <pages>295–298</pages>
       <url hash="47277f10">W11-4643</url>
@@ -8370,17 +8370,17 @@
     </paper>
     <paper id="44">
       <title>Modularisation of <fixed-case>F</fixed-case>innish Finite-State Language Description – Towards Wide Collaboration in Open Source Development of a Morphological Analyser</title>
-      <author><first>Tommi</first><last>Pirinen</last></author>
+      <author id="tommi-a-pirinen"><first>Tommi</first><last>Pirinen</last></author>
       <pages>299–302</pages>
       <url hash="578ca53b">W11-4644</url>
       <bibkey>pirinen-2011-modularisation</bibkey>
     </paper>
     <paper id="45">
       <title>A <fixed-case>P</fixed-case>rague Markup Language profile for the <fixed-case>S</fixed-case>em<fixed-case>T</fixed-case>i-Kamols grammar model</title>
-      <author><first>Lauma</first><last>Pretkalniņa</last></author>
+      <author id="lauma-pretkalnina"><first>Lauma</first><last>Pretkalniņa</last></author>
       <author><first>Gunta</first><last>Nešpore</last></author>
-      <author><first>Kristīne</first><last>Levāne-Petrova</last></author>
-      <author><first>Baiba</first><last>Saulīte</last></author>
+      <author id="kristine-levane-petrova"><first>Kristīne</first><last>Levāne-Petrova</last></author>
+      <author id="baiba-saulite"><first>Baiba</first><last>Saulīte</last></author>
       <pages>303–306</pages>
       <url hash="6263c6bc">W11-4645</url>
       <bibkey>pretkalnina-etal-2011-prague</bibkey>
diff --git a/data/xml/W12.xml b/data/xml/W12.xml
index 7e251073f8..0464f7e04c 100644
--- a/data/xml/W12.xml
+++ b/data/xml/W12.xml
@@ -4,9 +4,9 @@
     <meta>
       <booktitle>Proceedings of the Joint Workshop on Exploiting Synergies between Information Retrieval and Machine Translation (<fixed-case>ESIRMT</fixed-case>) and Hybrid Approaches to Machine Translation (<fixed-case>H</fixed-case>y<fixed-case>T</fixed-case>ra)</booktitle>
       <url hash="d2fd022e">W12-01</url>
-      <editor><first>Marta R.</first><last>Costa-jussà</last></editor>
+      <editor id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></editor>
       <editor><first>Patrik</first><last>Lambert</last></editor>
-      <editor><first>Rafael E.</first><last>Banchs</last></editor>
+      <editor id="rafael-e-banchs"><first>Rafael E.</first><last>Banchs</last></editor>
       <editor><first>Reinhard</first><last>Rapp</last></editor>
       <editor><first>Bogdan</first><last>Babych</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -38,7 +38,7 @@
     <paper id="3">
       <title>Full Machine Translation for Factoid Question Answering</title>
       <author><first>Cristina</first><last>España-Bonet</last></author>
-      <author><first>Pere R.</first><last>Comas</last></author>
+      <author id="pere-comas"><first>Pere R.</first><last>Comas</last></author>
       <pages>20–29</pages>
       <url hash="a3aa88ac">W12-0103</url>
       <bibkey>espana-bonet-comas-2012-full</bibkey>
@@ -63,10 +63,10 @@
     </paper>
     <paper id="6">
       <title>Combining <fixed-case>EBMT</fixed-case>, <fixed-case>SMT</fixed-case>, <fixed-case>TM</fixed-case> and <fixed-case>IR</fixed-case> Technologies for Quality and Scale</title>
-      <author><first>Sandipan</first><last>Dandapat</last></author>
+      <author id="sandipan-dandapat"><first>Sandipan</first><last>Dandapat</last></author>
       <author><first>Sara</first><last>Morrissey</last></author>
       <author><first>Andy</first><last>Way</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>48–58</pages>
       <url hash="3f64fad9">W12-0106</url>
       <bibkey>dandapat-etal-2012-combining</bibkey>
@@ -90,7 +90,7 @@
     <paper id="9">
       <title><fixed-case>PLUTO</fixed-case>: Automated Solutions for Patent Translation</title>
       <author><first>John</first><last>Tinsley</last></author>
-      <author><first>Alexandru</first><last>Ceausu</last></author>
+      <author id="alexandru-ceausu"><first>Alexandru</first><last>Ceausu</last></author>
       <author><first>Jian</first><last>Zhang</last></author>
       <pages>69–71</pages>
       <url hash="d0291327">W12-0109</url>
@@ -105,14 +105,14 @@
     </paper>
     <paper id="11">
       <title>Tree-based Hybrid Machine Translation</title>
-      <author><first>Andreas Søeborg</first><last>Kirkedal</last></author>
+      <author id="andreas-soeborg-kirkedal"><first>Andreas Søeborg</first><last>Kirkedal</last></author>
       <pages>77–86</pages>
       <url hash="8ff58e6c">W12-0111</url>
       <bibkey>kirkedal-2012-tree</bibkey>
     </paper>
     <paper id="12">
       <title>Were the clocks striking or surprising? Using <fixed-case>WSD</fixed-case> to improve <fixed-case>MT</fixed-case> performance</title>
-      <author><first>Špela</first><last>Vintar</last></author>
+      <author id="spela-vintar"><first>Špela</first><last>Vintar</last></author>
       <author><first>Darja</first><last>Fišer</last></author>
       <author><first>Aljoša</first><last>Vrščaj</last></author>
       <pages>87–92</pages>
@@ -122,7 +122,7 @@
     <paper id="13">
       <title>Bootstrapping Method for Chunk Alignment in Phrase Based <fixed-case>SMT</fixed-case></title>
       <author><first>Santanu</first><last>Pal</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>93–100</pages>
       <url hash="1f3f72ea">W12-0113</url>
       <bibkey>pal-bandyopadhyay-2012-bootstrapping</bibkey>
@@ -131,9 +131,9 @@
       <title>Design of a hybrid high quality machine translation system</title>
       <author><first>Bogdan</first><last>Babych</last></author>
       <author><first>Kurt</first><last>Eberle</last></author>
-      <author><first>Johanna</first><last>Geiß</last></author>
-      <author><first>Mireia</first><last>Ginestí-Rosell</last></author>
-      <author><first>Anthony</first><last>Hartley</last></author>
+      <author id="johanna-geiss"><first>Johanna</first><last>Geiß</last></author>
+      <author id="mireia-ginesti-rosell"><first>Mireia</first><last>Ginestí-Rosell</last></author>
+      <author id="anthony-hartley"><first>Anthony</first><last>Hartley</last></author>
       <author><first>Reinhard</first><last>Rapp</last></author>
       <author><first>Serge</first><last>Sharoff</last></author>
       <author><first>Martin</first><last>Thomas</last></author>
@@ -152,7 +152,7 @@
       <title>Linguistically-Augmented <fixed-case>B</fixed-case>ulgarian-to-<fixed-case>E</fixed-case>nglish Statistical Machine Translation Model</title>
       <author><first>Rui</first><last>Wang</last></author>
       <author><first>Petya</first><last>Osenova</last></author>
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <pages>119–128</pages>
       <url hash="b828c395">W12-0116</url>
       <bibkey>wang-etal-2012-linguistically</bibkey>
@@ -160,7 +160,7 @@
     <paper id="17">
       <title>Using Sense-labeled Discourse Connectives for Statistical Machine Translation</title>
       <author><first>Thomas</first><last>Meyer</last></author>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <pages>129–138</pages>
       <url hash="80cbc27d">W12-0117</url>
       <bibkey>meyer-popescu-belis-2012-using</bibkey>
@@ -199,9 +199,9 @@
       <title>Lexical Semantics and Distribution of Suffixes - A Visual Analysis</title>
       <author><first>Christian</first><last>Rohrdantz</last></author>
       <author><first>Andreas</first><last>Niekler</last></author>
-      <author><first>Annette</first><last>Hautli</last></author>
+      <author id="annette-hautli"><first>Annette</first><last>Hautli</last></author>
       <author><first>Miriam</first><last>Butt</last></author>
-      <author><first>Daniel A.</first><last>Keim</last></author>
+      <author id="daniel-keim"><first>Daniel A.</first><last>Keim</last></author>
       <pages>7–15</pages>
       <url hash="197e5a69">W12-0202</url>
       <bibkey>rohrdantz-etal-2012-lexical</bibkey>
@@ -237,7 +237,7 @@
     <paper id="6">
       <title>Automating Second Language Acquisition Research: Integrating Information Visualisation and Machine Learning</title>
       <author><first>Helen</first><last>Yannakoudakis</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <author><first>Theodora</first><last>Alexopoulou</last></author>
       <pages>35–43</pages>
       <url hash="1b057354">W12-0206</url>
@@ -249,7 +249,7 @@
       <author><first>Ekaterina</first><last>Lapshinova-Koltunski</last></author>
       <author><first>Stefania</first><last>Degaetano-Ortlieb</last></author>
       <author><first>Henrik</first><last>Dittmann</last></author>
-      <author><first>Chris</first><last>Culy</last></author>
+      <author id="chris-culy"><first>Chris</first><last>Culy</last></author>
       <pages>44–48</pages>
       <url hash="919bbb0e">W12-0207</url>
       <bibkey>lyding-etal-2012-visualising</bibkey>
@@ -346,11 +346,11 @@
     </frontmatter>
     <paper id="1">
       <title>From Character to Word Level: Enabling the Linguistic Analyses of Inputlog Process Data</title>
-      <author><first>Mariëlle</first><last>Leijten</last></author>
+      <author id="marielle-leijten"><first>Mariëlle</first><last>Leijten</last></author>
       <author><first>Lieve</first><last>Macken</last></author>
-      <author><first>Veronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Veronique</first><last>Hoste</last></author>
       <author><first>Eric</first><last>Van Horenbeeck</last></author>
-      <author><first>Luuk</first><last>Van Waes</last></author>
+      <author id="luuk-van-waes"><first>Luuk</first><last>Van Waes</last></author>
       <pages>1–8</pages>
       <url hash="65bf7407">W12-0301</url>
       <bibkey>leijten-etal-2012-character</bibkey>
@@ -383,7 +383,7 @@
       <author><first>Camille</first><last>Albert</last></author>
       <author><first>Flore</first><last>Barcellini</last></author>
       <author><first>Corinne</first><last>Grosse</last></author>
-      <author><first>Patrick</first><last>Saint-Dizier</last></author>
+      <author id="patrick-saint-dizier"><first>Patrick</first><last>Saint-Dizier</last></author>
       <pages>35–38</pages>
       <url hash="73c105b6">W12-0305</url>
       <bibkey>albert-etal-2012-lelie</bibkey>
@@ -401,8 +401,8 @@
     <meta>
       <booktitle>Proceedings of the Workshop on Computational Approaches to Deception Detection</booktitle>
       <url hash="98935f21">W12-04</url>
-      <editor><first>Eileen</first><last>Fitzpatrick</last></editor>
-      <editor><first>Joan</first><last>Bachenko</last></editor>
+      <editor id="eileen-fitzpatrick"><first>Eileen</first><last>Fitzpatrick</last></editor>
+      <editor id="joan-bachenko"><first>Joan</first><last>Bachenko</last></editor>
       <editor><first>Tommaso</first><last>Fornaciari</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Avignon, France</address>
@@ -447,7 +447,7 @@
       <author><first>Jeff</first><last>Hancock</last></author>
       <author><first>Poornima</first><last>Prabhu</last></author>
       <author><first>Myle</first><last>Ott</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>23–30</pages>
       <url hash="b0849d5b">W12-0404</url>
       <bibkey>gokhman-etal-2012-search</bibkey>
@@ -463,7 +463,7 @@
     <paper id="6">
       <title>On the Use of Homogenous Sets of Subjects in Deceptive Language Analysis</title>
       <author><first>Tommaso</first><last>Fornaciari</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>39–47</pages>
       <url hash="c357af38">W12-0406</url>
       <bibkey>fornaciari-poesio-2012-use</bibkey>
@@ -501,9 +501,9 @@
     </paper>
     <paper id="11">
       <title>Pastiche Detection Based on Stopword Rankings. Exposing Impersonators of a <fixed-case>R</fixed-case>omanian Writer</title>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <author><first>Vlad</first><last>Niculae</last></author>
-      <author><first>Maria-Octavia</first><last>Sulea</last></author>
+      <author id="octavia-maria-sulea"><first>Maria-Octavia</first><last>Sulea</last></author>
       <pages>72–77</pages>
       <url hash="d4eabe64">W12-0411</url>
       <bibkey>dinu-etal-2012-pastiche</bibkey>
@@ -533,7 +533,7 @@
     </paper>
     <paper id="15">
       <title>Identification of Truth and Deception in Text: Application of Vector Space Model to <fixed-case>R</fixed-case>hetorical <fixed-case>S</fixed-case>tructure <fixed-case>T</fixed-case>heory</title>
-      <author><first>Victoria L.</first><last>Rubin</last></author>
+      <author id="victoria-l-rubin"><first>Victoria L.</first><last>Rubin</last></author>
       <author><first>Tatiana</first><last>Vashchilko</last></author>
       <pages>97–106</pages>
       <url hash="f32d41c9">W12-0415</url>
@@ -547,7 +547,7 @@
       <editor><first>Natalia</first><last>Grabar</last></editor>
       <editor><first>Marie</first><last>Dupuch</last></editor>
       <editor><first>Amandine</first><last>Périnet</last></editor>
-      <editor><first>Thierry</first><last>Hamon</last></editor>
+      <editor id="thierry-hamon"><first>Thierry</first><last>Hamon</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Avignon, France</address>
       <month>April</month>
@@ -562,7 +562,7 @@
       <title>Experiments on Hybrid Corpus-Based Sentiment Lexicon Acquisition</title>
       <author><first>Goran</first><last>Glavaš</last></author>
       <author><first>Jan</first><last>Šnajder</last></author>
-      <author><first>Bojana</first><last>Dalbelo Bašić</last></author>
+      <author id="bojana-dalbelo-basic"><first>Bojana</first><last>Dalbelo Bašić</last></author>
       <pages>1–9</pages>
       <url hash="c4e371cf">W12-0501</url>
       <bibkey>glavas-etal-2012-experiments</bibkey>
@@ -578,7 +578,7 @@
     <paper id="3">
       <title>Hybrid Combination of Constituency and Dependency Trees into an Ensemble Dependency Parser</title>
       <author><first>Nathan</first><last>Green</last></author>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
       <pages>19–26</pages>
       <url hash="4d24e977">W12-0503</url>
       <bibkey>green-zabokrtsky-2012-hybrid</bibkey>
@@ -594,14 +594,14 @@
     <paper id="5">
       <title>An Unsupervised and Data-Driven Approach for Spell Checking in <fixed-case>V</fixed-case>ietnamese <fixed-case>OCR</fixed-case>-scanned Texts</title>
       <author><first>Cong Duy Vu</first><last>Hoang</last></author>
-      <author><first>Ai Ti</first><last>Aw</last></author>
+      <author id="aiti-aw"><first>Ai Ti</first><last>Aw</last></author>
       <pages>36–44</pages>
       <url hash="df83c505">W12-0505</url>
       <bibkey>hoang-aw-2012-unsupervised</bibkey>
     </paper>
     <paper id="6">
       <title>Multilingual Natural Language Processing</title>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>45</pages>
       <url hash="74193a64">W12-0506</url>
       <bibkey>mihalcea-2012-multilingual</bibkey>
@@ -617,16 +617,16 @@
     <paper id="8">
       <title>A Joint Named Entity Recognition and Entity Linking System</title>
       <author><first>Rosa</first><last>Stern</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
-      <author><first>Frédéric</first><last>Béchet</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
+      <author id="frederic-bechet"><first>Frédéric</first><last>Béchet</last></author>
       <pages>52–60</pages>
       <url hash="db559b24">W12-0508</url>
       <bibkey>stern-etal-2012-joint</bibkey>
     </paper>
     <paper id="9">
       <title>Collaborative Annotation of Dialogue Acts: Application of a New <fixed-case>ISO</fixed-case> Standard to the Switchboard Corpus</title>
-      <author><first>Alex C.</first><last>Fang</last></author>
-      <author><first>Harry</first><last>Bunt</last></author>
+      <author id="alex-chengyu-fang"><first>Alex C.</first><last>Fang</last></author>
+      <author id="harry-bunt"><first>Harry</first><last>Bunt</last></author>
       <author><first>Jing</first><last>Cao</last></author>
       <author><first>Xiaoyue</first><last>Liu</last></author>
       <pages>61–68</pages>
@@ -658,7 +658,7 @@
       <title>Methods Combination and <fixed-case>ML</fixed-case>-based Re-ranking of Multiple Hypothesis for Question-Answering Systems</title>
       <author><first>Arnaud</first><last>Grappy</last></author>
       <author><first>Brigitte</first><last>Grau</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <pages>87–96</pages>
       <url hash="7d2a9af8">W12-0512</url>
       <bibkey>grappy-etal-2012-methods</bibkey>
@@ -667,7 +667,7 @@
       <title>A Generalised Hybrid Architecture for <fixed-case>NLP</fixed-case></title>
       <author><first>Alistair</first><last>Willis</last></author>
       <author><first>Hui</first><last>Yang</last></author>
-      <author><first>Anne</first><last>De Roeck</last></author>
+      <author id="anne-de-roeck"><first>Anne</first><last>De Roeck</last></author>
       <pages>97–105</pages>
       <url hash="bfca29c0">W12-0513</url>
       <bibkey>willis-etal-2012-generalised</bibkey>
@@ -696,7 +696,7 @@
       <booktitle>Proceedings of the Workshop on Semantic Analysis in Social Media</booktitle>
       <url hash="1140846e">W12-06</url>
       <editor><first>Atefeh</first><last>Farzindar</last></editor>
-      <editor><first>Diana</first><last>Inkpen</last></editor>
+      <editor id="diana-inkpen"><first>Diana</first><last>Inkpen</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Avignon, France</address>
       <month>April</month>
@@ -710,7 +710,7 @@
     <paper id="1">
       <title>Unsupervised Part-of-Speech Tagging in Noisy and Esoteric Domains With a Syntactic-Semantic <fixed-case>B</fixed-case>ayesian <fixed-case>HMM</fixed-case></title>
       <author><first>William M.</first><last>Darling</last></author>
-      <author><first>Michael J.</first><last>Paul</last></author>
+      <author id="michael-paul"><first>Michael J.</first><last>Paul</last></author>
       <author><first>Fei</first><last>Song</last></author>
       <pages>1–9</pages>
       <url hash="6e012838">W12-0601</url>
@@ -751,16 +751,16 @@
     </paper>
     <paper id="6">
       <title>A Hybrid Framework for Scalable Opinion Mining in Social Media: Detecting Polarities and Attitude Targets</title>
-      <author><first>Carlos</first><last>Rodríguez-Penagos</last></author>
+      <author id="carlos-rodriguez-penagos"><first>Carlos</first><last>Rodríguez-Penagos</last></author>
       <author><first>Jens</first><last>Grivolla</last></author>
-      <author><first>Joan</first><last>Codina-Filba</last></author>
+      <author id="joan-codina-filba"><first>Joan</first><last>Codina-Filba</last></author>
       <pages>46–52</pages>
       <url hash="42a8840a">W12-0606</url>
       <bibkey>rodriguez-penagos-etal-2012-hybrid</bibkey>
     </paper>
     <paper id="7">
       <title>Predicting the 2011 <fixed-case>D</fixed-case>utch Senate Election Results with <fixed-case>T</fixed-case>witter</title>
-      <author><first>Erik</first><last>Tjong Kim Sang</last></author>
+      <author id="erik-tjong-kim-sang"><first>Erik</first><last>Tjong Kim Sang</last></author>
       <author><first>Johan</first><last>Bos</last></author>
       <pages>53–60</pages>
       <url hash="fd5b295b">W12-0607</url>
@@ -780,11 +780,11 @@
       <booktitle>Proceedings of the Joint Workshop on Unsupervised and Semi-Supervised Learning in <fixed-case>NLP</fixed-case></booktitle>
       <url hash="8a4c08fb">W12-07</url>
       <editor><first>Omri</first><last>Abend</last></editor>
-      <editor><first>Chris</first><last>Biemann</last></editor>
+      <editor id="chris-biemann"><first>Chris</first><last>Biemann</last></editor>
       <editor><first>Anna</first><last>Korhonen</last></editor>
       <editor><first>Ari</first><last>Rappoport</last></editor>
       <editor><first>Roi</first><last>Reichart</last></editor>
-      <editor><first>Anders</first><last>Søgaard</last></editor>
+      <editor id="anders-sogaard"><first>Anders</first><last>Søgaard</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Avignon, France</address>
       <month>April</month>
@@ -829,17 +829,17 @@
     </paper>
     <paper id="5">
       <title>Improving Distantly Supervised Extraction of Drug-Drug and Protein-Protein Interactions</title>
-      <author><first>Tamara</first><last>Bobić</last></author>
+      <author id="tamara-bobic"><first>Tamara</first><last>Bobić</last></author>
       <author><first>Roman</first><last>Klinger</last></author>
       <author><first>Philippe</first><last>Thomas</last></author>
-      <author><first>Martin</first><last>Hofmann-Apitius</last></author>
+      <author id="martin-hofmann-apitius"><first>Martin</first><last>Hofmann-Apitius</last></author>
       <pages>35–43</pages>
       <url hash="94cadc16">W12-0705</url>
       <bibkey>bobic-etal-2012-improving</bibkey>
     </paper>
     <paper id="6">
       <title>Robust Induction of Parts-of-Speech in Child-Directed Language by Co-Clustering of Words and Contexts</title>
-      <author><first>Richard E.</first><last>Leibbrandt</last></author>
+      <author id="richard-e-leibbrandt"><first>Richard E.</first><last>Leibbrandt</last></author>
       <author><first>David MW</first><last>Powers</last></author>
       <pages>44–54</pages>
       <url hash="cb7cfef7">W12-0706</url>
@@ -847,7 +847,7 @@
     </paper>
     <paper id="7">
       <title>Dependency Parsing Domain Adaptation using Transductive <fixed-case>SVM</fixed-case></title>
-      <author><first>Antonio Valerio</first><last>Miceli-Barone</last></author>
+      <author id="antonio-valerio-miceli-barone"><first>Antonio Valerio</first><last>Miceli-Barone</last></author>
       <author><first>Giuseppe</first><last>Attardi</last></author>
       <pages>55–59</pages>
       <url hash="0982ae56">W12-0707</url>
@@ -891,7 +891,7 @@
     <paper id="3">
       <title><fixed-case>TTT</fixed-case>: A Tree Transduction Language for Syntactic and Semantic Processing</title>
       <author><first>Adam</first><last>Purtee</last></author>
-      <author><first>Lenhart</first><last>Schubert</last></author>
+      <author id="lenhart-schubert"><first>Lenhart</first><last>Schubert</last></author>
       <pages>21–30</pages>
       <url hash="a8890538">W12-0803</url>
       <bibkey>purtee-schubert-2012-ttt</bibkey>
@@ -908,7 +908,7 @@
     <meta>
       <booktitle>Proceedings of the Workshop on Computational Models of Language Acquisition and Loss</booktitle>
       <url hash="2751d7ce">W12-09</url>
-      <editor><first>Robert</first><last>Berwick</last></editor>
+      <editor id="robert-c-berwick"><first>Robert</first><last>Berwick</last></editor>
       <editor><first>Anna</first><last>Korhonen</last></editor>
       <editor><first>Thierry</first><last>Poibeau</last></editor>
       <editor><first>Aline</first><last>Villavicencio</last></editor>
@@ -924,7 +924,7 @@
     </frontmatter>
     <paper id="1">
       <title>Distinguishing Contact-Induced Change from Language Drift in Genetically Related Languages</title>
-      <author><first>T. Mark</first><last>Ellison</last></author>
+      <author id="t-mark-ellison"><first>T. Mark</first><last>Ellison</last></author>
       <author><first>Luisa</first><last>Miceli</last></author>
       <pages>1–9</pages>
       <url hash="cc411b42">W12-0901</url>
@@ -940,7 +940,7 @@
     </paper>
     <paper id="3">
       <title>Probabilistic Models of Grammar Acquisition</title>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>19</pages>
       <url hash="4ad82e4b">W12-0903</url>
       <bibkey>steedman-2012-probabilistic</bibkey>
@@ -975,8 +975,8 @@
     </paper>
     <paper id="7">
       <title>Webservices for <fixed-case>B</fixed-case>ayesian Learning</title>
-      <author><first>Muntsa</first><last>Padró</last></author>
-      <author><first>Núria</first><last>Bel</last></author>
+      <author id="muntsa-padro"><first>Muntsa</first><last>Padró</last></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last></author>
       <pages>29–31</pages>
       <url hash="f71eaf28">W12-0907</url>
       <bibkey>padro-bel-2012-webservices</bibkey>
@@ -984,7 +984,7 @@
     <paper id="8">
       <title>Unseen features. Collecting semantic data from congenital blind subjects</title>
       <author><first>Alessandro</first><last>Lenci</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <author><first>Giovanna</first><last>Marotta</last></author>
       <pages>32</pages>
       <url hash="eae11cce">W12-0908</url>
@@ -1011,7 +1011,7 @@
       <author><first>Aline</first><last>Villavicencio</last></author>
       <author><first>Marco</first><last>Idiart</last></author>
       <author><first>Carlos</first><last>Ramisch</last></author>
-      <author><first>Vítor</first><last>Araújo</last></author>
+      <author id="vitor-de-araujo"><first>Vítor</first><last>Araújo</last></author>
       <author><first>Beracah</first><last>Yankama</last></author>
       <author><first>Robert</first><last>Berwick</last></author>
       <pages>43–50</pages>
@@ -1030,8 +1030,8 @@
     <meta>
       <booktitle>Proceedings of the 6th Workshop on Language Technology for Cultural Heritage, Social Sciences, and Humanities</booktitle>
       <url hash="4243b006">W12-10</url>
-      <editor><first>Kalliopi</first><last>Zervanou</last></editor>
-      <editor><first>Antal</first><last>van den Bosch</last></editor>
+      <editor id="kalliopi-zervanou"><first>Kalliopi</first><last>Zervanou</last></editor>
+      <editor id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Avignon, France</address>
       <month>April</month>
@@ -1045,7 +1045,7 @@
     <paper id="1">
       <title>Lexicon Construction and Corpus Annotation of Historical Language with the <fixed-case>C</fixed-case>o<fixed-case>B</fixed-case>a<fixed-case>LT</fixed-case> Editor</title>
       <author><first>Tom</first><last>Kenter</last></author>
-      <author><first>Tomaž</first><last>Erjavec</last></author>
+      <author id="tomaz-erjavec"><first>Tomaž</first><last>Erjavec</last></author>
       <author><first>Maja</first><last>Žorga Dulmin</last></author>
       <author><first>Darja</first><last>Fišer</last></author>
       <pages>1–6</pages>
@@ -1058,7 +1058,7 @@
       <author><first>Jeremy</first><last>Hammond</last></author>
       <author><first>Herman</first><last>Stehouwer</last></author>
       <author><first>Aarthy</first><last>Somasundaram</last></author>
-      <author><first>Sebastian</first><last>Drude</last></author>
+      <author id="sebastian-drude"><first>Sebastian</first><last>Drude</last></author>
       <pages>7–12</pages>
       <url hash="4838277f">W12-1002</url>
       <bibkey>dingemanse-etal-2012-high</bibkey>
@@ -1066,7 +1066,7 @@
     <paper id="3">
       <title><fixed-case>BAD</fixed-case>: An Assistant tool for making verses in <fixed-case>B</fixed-case>asque</title>
       <author><first>Manex</first><last>Agirrezabal</last></author>
-      <author><first>Iñaki</first><last>Alegria</last></author>
+      <author id="inaki-alegria"><first>Iñaki</first><last>Alegria</last></author>
       <author><first>Bertol</first><last>Arrieta</last></author>
       <author><first>Mans</first><last>Hulden</last></author>
       <pages>13–17</pages>
@@ -1093,7 +1093,7 @@
       <title>Ontology-Based Incremental Annotation of Characters in Folktales</title>
       <author><first>Thierry</first><last>Declerck</last></author>
       <author><first>Nikolina</first><last>Koleva</last></author>
-      <author><first>Hans-Ulrich</first><last>Krieger</last></author>
+      <author id="hans-ulrich-krieger"><first>Hans-Ulrich</first><last>Krieger</last></author>
       <pages>30–34</pages>
       <url hash="0b4d9235">W12-1006</url>
       <bibkey>declerck-etal-2012-ontology</bibkey>
@@ -1109,7 +1109,7 @@
     </paper>
     <paper id="8">
       <title>Distributional techniques for philosophical enquiry</title>
-      <author><first>Aurélie</first><last>Herbelot</last></author>
+      <author id="aurelie-herbelot"><first>Aurélie</first><last>Herbelot</last></author>
       <author><first>Eva</first><last>von Redecker</last></author>
       <author><first>Johanna</first><last>Müller</last></author>
       <pages>45–54</pages>
@@ -1128,7 +1128,7 @@
     <paper id="10">
       <title>Parsing the Past - Identification of Verb Constructions in Historical Text</title>
       <author><first>Eva</first><last>Pettersson</last></author>
-      <author><first>Beáta</first><last>Megyesi</last></author>
+      <author id="beata-megyesi"><first>Beáta</first><last>Megyesi</last></author>
       <author><first>Joakim</first><last>Nivre</last></author>
       <pages>65–74</pages>
       <url hash="f1bf726c">W12-1010</url>
@@ -1136,7 +1136,7 @@
     </paper>
     <paper id="11">
       <title>A Classical <fixed-case>C</fixed-case>hinese Corpus with Nested Part-of-Speech Tags</title>
-      <author><first>John</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
       <pages>75–84</pages>
       <url hash="8fd1cc11">W12-1011</url>
       <bibkey>lee-2012-classical</bibkey>
@@ -1151,11 +1151,11 @@
     </paper>
     <paper id="13">
       <title>Enabling the Discovery of Digital Cultural Heritage Objects through <fixed-case>W</fixed-case>ikipedia</title>
-      <author><first>Mark Michael</first><last>Hall</last></author>
-      <author><first>Oier</first><last>Lopez de Lacalle</last></author>
-      <author><first>Aitor</first><last>Soroa Etxabe</last></author>
-      <author><first>Paul</first><last>Clough</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="mark-hall"><first>Mark Michael</first><last>Hall</last></author>
+      <author id="oier-lopez-de-lacalle"><first>Oier</first><last>Lopez de Lacalle</last></author>
+      <author id="aitor-soroa"><first>Aitor</first><last>Soroa Etxabe</last></author>
+      <author id="paul-clough"><first>Paul</first><last>Clough</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <pages>94–100</pages>
       <url hash="1287f84d">W12-1013</url>
       <bibkey>hall-etal-2012-enabling</bibkey>
@@ -1171,8 +1171,8 @@
     <paper id="15">
       <title>Natural Language Inspired Approach for Handwritten Text Line Detection in Legacy Documents</title>
       <author><first>Vicente</first><last>Bosch</last></author>
-      <author><first>Alejandro Héctor</first><last>Toselli</last></author>
-      <author><first>Enrique</first><last>Vidal</last></author>
+      <author id="alejandro-h-toselli"><first>Alejandro Héctor</first><last>Toselli</last></author>
+      <author id="enrique-vidal"><first>Enrique</first><last>Vidal</last></author>
       <pages>107–111</pages>
       <url hash="b385c048">W12-1015</url>
       <bibkey>bosch-etal-2012-natural</bibkey>
@@ -1192,7 +1192,7 @@
       <url hash="d4f80995">W12-11</url>
       <editor><first>Cyril</first><last>Grouin</last></editor>
       <editor><first>Dominic</first><last>Forest</last></editor>
-      <editor><first>Gilles</first><last>Sérasset</last></editor>
+      <editor id="gilles-serasset"><first>Gilles</first><last>Sérasset</last></editor>
       <publisher>ATALA/AFCP</publisher>
       <address>Grenoble, France</address>
       <month>June</month>
@@ -1205,8 +1205,8 @@
     </frontmatter>
     <paper id="1">
       <title>Indexation libre et contrôlée d’articles scientifiques. Présentation et résultats du défi fouille de textes <fixed-case>DEFT</fixed-case>2012 (Controlled and free indexing of scientific papers. Presentation and results of the <fixed-case>DEFT</fixed-case>2012 text-mining challenge) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Patrick</first><last>Paroubek</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <author><first>Dominic</first><last>Forest</last></author>
       <author><first>Cyril</first><last>Grouin</last></author>
       <pages>1–13</pages>
@@ -1217,14 +1217,14 @@
       <title>Key-concept extraction from <fixed-case>F</fixed-case>rench articles with <fixed-case>KX</fixed-case></title>
       <author><first>Sara</first><last>Tonelli</last></author>
       <author><first>Elena</first><last>Cabrio</last></author>
-      <author><first>Emanuele</first><last>Pianta</last></author>
+      <author id="emanuele-pianta"><first>Emanuele</first><last>Pianta</last></author>
       <pages>15–24</pages>
       <url hash="e7ad9b5c">W12-1102</url>
       <bibkey>tonelli-etal-2012-key</bibkey>
     </paper>
     <paper id="3">
       <title>Acquisition terminologique pour identifier les mots-clés d’articles scientifiques (Terminological acquisition for identifying keywords of scientific articles) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Thierry</first><last>Hamon</last></author>
+      <author id="thierry-hamon"><first>Thierry</first><last>Hamon</last></author>
       <pages>25–31</pages>
       <url hash="ed1c17ff">W12-1103</url>
       <bibkey>hamon-2012-acquisition</bibkey>
@@ -1233,7 +1233,7 @@
       <title>Indexation à base des syntagmes nominaux (Nominal-chunk based indexing) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Amine</first><last>Amri</last></author>
       <author><first>Maroua</first><last>Mbarek</last></author>
-      <author><first>Chedi</first><last>Bechikh</last></author>
+      <author id="chedi-bechikh-ali"><first>Chedi</first><last>Bechikh</last></author>
       <author><first>Chiraz</first><last>Latiri</last></author>
       <author><first>Hatem</first><last>Haddad</last></author>
       <pages>33–39</pages>
@@ -1246,7 +1246,7 @@
       <author><first>Mathieu</first><last>Boucher</last></author>
       <author><first>Romain</first><last>Brixtel</last></author>
       <author><first>Gaël</first><last>Lejeune</last></author>
-      <author><first>Gaël</first><last>Dias</last></author>
+      <author id="gael-dias"><first>Gaël</first><last>Dias</last></author>
       <pages>41–48</pages>
       <url hash="d5d15aeb">W12-1105</url>
       <bibkey>doualan-etal-2012-detection</bibkey>
@@ -1264,7 +1264,7 @@
       <author><first>Florian</first><last>Boudin</last></author>
       <author><first>Amir</first><last>Hazem</last></author>
       <author><first>Nicolas</first><last>Hernandez</last></author>
-      <author><first>Prajol</first><last>Shrestha</last></author>
+      <author id="prajwol-shrestha"><first>Prajol</first><last>Shrestha</last></author>
       <pages>61–68</pages>
       <url hash="4ccf8d93">W12-1107</url>
       <bibkey>boudin-etal-2012-participation</bibkey>
@@ -1282,8 +1282,8 @@
     </paper>
     <paper id="9">
       <title>Enrichir et raisonner sur des espaces sémantiques pour l’attribution de mots-clés (Enriching and reasoning on semantic spaces for keyword extraction) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Adil</first><last>El Ghali</last></author>
-      <author><first>Daniel</first><last>Hromada</last></author>
+      <author id="adil-el-ghali"><first>Adil</first><last>El Ghali</last></author>
+      <author id="daniel-hromada"><first>Daniel</first><last>Hromada</last></author>
       <author><first>Kaoutar</first><last>El Ghali</last></author>
       <pages>77–90</pages>
       <url hash="ce1f5b98">W12-1109</url>
@@ -1294,9 +1294,9 @@
     <meta>
       <booktitle>JEP-TALN-RECITAL 2012, Workshop DEGELS 2012: Défi GEste Langue des Signes (DEGELS 2012: Gestures and Sign Language Challenge)</booktitle>
       <url hash="9a8c8bcf">W12-12</url>
-      <editor><first>Annelies</first><last>Braffort</last></editor>
+      <editor id="annelies-braffort"><first>Annelies</first><last>Braffort</last></editor>
       <editor><first>Leïla</first><last>Boutora</last></editor>
-      <editor><first>Gilles</first><last>Sérasset</last></editor>
+      <editor id="gilles-serasset"><first>Gilles</first><last>Sérasset</last></editor>
       <publisher>ATALA/AFCP</publisher>
       <address>Grenoble, France</address>
       <month>June</month>
@@ -1317,7 +1317,7 @@
     </paper>
     <paper id="2">
       <title>Critères de segmentation de la gestualité co-verbale (Segmentation criteria for the annotation of co-speech gestures) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Gaëlle</first><last>Ferré</last></author>
+      <author id="gaelle-ferre"><first>Gaëlle</first><last>Ferré</last></author>
       <pages>9–21</pages>
       <url hash="560be5e2">W12-1202</url>
       <bibkey>ferre-2012-criteres</bibkey>
@@ -1351,7 +1351,7 @@
     <paper id="6">
       <title>Influence de la segmentation temporelle sur la caractérisation de signes (Influence of the temporal segmentation on the sign characterization) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>François</first><last>Lefebvre-Albaret</last></author>
-      <author><first>Jérémie</first><last>Segouat</last></author>
+      <author id="jeremie-segouat"><first>Jérémie</first><last>Segouat</last></author>
       <pages>73–83</pages>
       <url hash="bc3fe917">W12-1206</url>
       <bibkey>lefebvre-albaret-segouat-2012-influence</bibkey>
@@ -1375,9 +1375,9 @@
     <meta>
       <booktitle><fixed-case>JEP</fixed-case>-<fixed-case>TALN</fixed-case>-<fixed-case>RECITAL</fixed-case> 2012, Workshop <fixed-case>TALA</fixed-case>f 2012: Traitement Automatique des Langues Africaines (<fixed-case>TALA</fixed-case>f 2012: <fixed-case>A</fixed-case>frican Language Processing)</booktitle>
       <url hash="3e1314af">W12-13</url>
-      <editor><first>Chantal</first><last>Enguehard</last></editor>
-      <editor><first>Mathieu</first><last>Mangeot</last></editor>
-      <editor><first>Gilles</first><last>Sérasset</last></editor>
+      <editor id="chantal-enguehard"><first>Chantal</first><last>Enguehard</last></editor>
+      <editor id="mathieu-mangeot"><first>Mathieu</first><last>Mangeot</last></editor>
+      <editor id="gilles-serasset"><first>Gilles</first><last>Sérasset</last></editor>
       <publisher>ATALA/AFCP</publisher>
       <address>Grenoble, France</address>
       <month>June</month>
@@ -1392,8 +1392,8 @@
       <title><fixed-case>M</fixed-case>bochi : corpus oral, traitement automatique et exploration phonologique (<fixed-case>M</fixed-case>boshi: oral corpus, automatic processing &amp; phonological mining) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Annie</first><last>Rialland</last></author>
       <author><first>Martial</first><last>Embanga Aborobongui</last></author>
-      <author><first>Martine</first><last>Adda-Decker</last></author>
-      <author><first>Lori</first><last>Lamel</last></author>
+      <author id="martine-adda-decker"><first>Martine</first><last>Adda-Decker</last></author>
+      <author id="lori-lamel"><first>Lori</first><last>Lamel</last></author>
       <pages>1–12</pages>
       <url hash="4f8306ce">W12-1301</url>
       <bibkey>rialland-etal-2012-mbochi</bibkey>
@@ -1432,8 +1432,8 @@
     <paper id="5">
       <title>Analyse des performances de modèles de langage sub-lexicale pour des langues peu-dotées à morphologie riche (Performance analysis of sub-word language modeling for under-resourced languages with rich morphology: case study on <fixed-case>S</fixed-case>wahili and <fixed-case>A</fixed-case>mharic) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Hadrien</first><last>Gelas</last></author>
-      <author><first>Solomon Teferra</first><last>Abate</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="solomon-teferra-abate"><first>Solomon Teferra</first><last>Abate</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <author><first>François</first><last>Pellegrino</last></author>
       <pages>53–62</pages>
       <url hash="4d97230c">W12-1305</url>
@@ -1466,7 +1466,7 @@
     <paper id="9">
       <title>Décrire la morphologie des verbes en ikota au moyen d’une métagrammaire (Describing the Morphology of Verbs in Ikota using a Metagrammar) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Denys</first><last>Duchier</last></author>
-      <author><first>Brunelle Magnana</first><last>Ekoukou</last></author>
+      <author id="brunelle-magnana-ekoukou"><first>Brunelle Magnana</first><last>Ekoukou</last></author>
       <author><first>Yannick</first><last>Parmentier</last></author>
       <author><first>Simon</first><last>Petitjean</last></author>
       <author><first>Emmanuel</first><last>Schang</last></author>
@@ -1478,7 +1478,7 @@
       <title>Extraction de lexiques bilingues à partir de Wikipédia (Bilingual lexicon extraction from <fixed-case>W</fixed-case>ikipedia) [in <fixed-case>F</fixed-case>rench]</title>
       <author><first>Rahma</first><last>Sellami</last></author>
       <author><first>Fatiha</first><last>Sadat</last></author>
-      <author><first>Lamia</first><last>Hadrich Belguith</last></author>
+      <author id="lamia-hadrich-belguith"><first>Lamia</first><last>Hadrich Belguith</last></author>
       <pages>107–117</pages>
       <url hash="353f7108">W12-1310</url>
       <bibkey>sellami-etal-2012-extraction</bibkey>
@@ -1490,7 +1490,7 @@
       <url hash="0c14a30a">W12-14</url>
       <editor><first>François</first><last>Portet</last></editor>
       <editor><first>Michel</first><last>Vacher</last></editor>
-      <editor><first>Gilles</first><last>Sérasset</last></editor>
+      <editor id="gilles-serasset"><first>Gilles</first><last>Sérasset</last></editor>
       <publisher>ATALA/AFCP</publisher>
       <address>Grenoble, France</address>
       <month>June</month>
@@ -1561,7 +1561,7 @@
       <booktitle><fixed-case>INLG</fixed-case> 2012 Proceedings of the Seventh International Natural Language Generation Conference</booktitle>
       <url hash="b00c128b">W12-15</url>
       <editor><first>Barbara</first><last>Di Eugenio</last></editor>
-      <editor><first>Susan</first><last>McRoy</last></editor>
+      <editor id="susan-w-mcroy"><first>Susan</first><last>McRoy</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Utica, IL</address>
       <month>May</month>
@@ -1574,14 +1574,14 @@
     </frontmatter>
     <paper id="1">
       <title>Natural Language Generation and Assistive Technologies</title>
-      <author><first>Kathleen</first><last>McCoy</last></author>
+      <author id="kathleen-f-mccoy"><first>Kathleen</first><last>McCoy</last></author>
       <pages>1</pages>
       <url hash="23d1d000">W12-1501</url>
       <bibkey>mccoy-2012-natural</bibkey>
     </paper>
     <paper id="2">
       <title>Expressive <fixed-case>NLG</fixed-case> for Next-Generation Learning Environments: Language, Affect, and Narrative</title>
-      <author><first>James</first><last>Lester</last></author>
+      <author id="james-lester"><first>James</first><last>Lester</last></author>
       <pages>2</pages>
       <url hash="1242b0b9">W12-1502</url>
       <bibkey>lester-2012-expressive</bibkey>
@@ -1589,8 +1589,8 @@
     <paper id="3">
       <title>Learning Preferences for Referring Expression Generation: Effects of Domain, Language and Algorithm</title>
       <author><last>Koolen</last><first>Ruud</first></author>
-      <author><last>Krahmer</last><first>Emiel</first></author>
-      <author><last>Theune</last><first>Mariët</first></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
+      <author id="mariet-theune"><first>Mariët</first><last>Theune</last></author>
       <pages>3–11</pages>
       <url hash="73da7bb0">W12-1503</url>
       <bibkey>koolen-etal-2012-learning</bibkey>
@@ -1608,10 +1608,10 @@
       <title><fixed-case>M</fixed-case>ink<fixed-case>A</fixed-case>pp: Generating Spatio-temporal Summaries for Nature Conservation Volunteers</title>
       <author><first>Nava</first><last>Tintarev</last></author>
       <author><first>Yolanda</first><last>Melero</last></author>
-      <author><first>Somayajulu</first><last>Sripada</last></author>
+      <author id="somayajulu-sripada"><first>Somayajulu</first><last>Sripada</last></author>
       <author><first>Elizabeth</first><last>Tait</last></author>
-      <author><first>Rene</first><last>Van Der Wal</last></author>
-      <author><first>Chris</first><last>Mellish</last></author>
+      <author id="rene-van-der-wal"><first>Rene</first><last>Van Der Wal</last></author>
+      <author id="chris-mellish"><first>Chris</first><last>Mellish</last></author>
       <pages>17–21</pages>
       <url hash="835da2a0">W12-1505</url>
       <bibkey>tintarev-etal-2012-minkapp</bibkey>
@@ -1628,7 +1628,7 @@
     <paper id="7">
       <title>Generation for Grammar Engineering</title>
       <author><first>Claire</first><last>Gardent</last></author>
-      <author><first>German</first><last>Kruszewski</last></author>
+      <author id="german-kruszewski"><first>German</first><last>Kruszewski</last></author>
       <pages>31–39</pages>
       <url hash="5b784c82">W12-1507</url>
       <bibkey>gardent-kruszewski-2012-generation</bibkey>
@@ -1637,7 +1637,7 @@
       <title>Perceptions of Alignment and Personality in Generated Dialogue</title>
       <author><first>Alastair</first><last>Gill</last></author>
       <author><first>Carsten</first><last>Brockmann</last></author>
-      <author><first>Jon</first><last>Oberlander</last></author>
+      <author id="jon-oberlander"><first>Jon</first><last>Oberlander</last></author>
       <pages>40–48</pages>
       <url hash="cc273f37">W12-1508</url>
       <bibkey>gill-etal-2012-perceptions</bibkey>
@@ -1645,7 +1645,7 @@
     <paper id="9">
       <title>Optimising Incremental Generation for Spoken Dialogue Systems: Reducing the Need for Fillers</title>
       <author><first>Nina</first><last>Dethlefs</last></author>
-      <author><first>Helen</first><last>Hastie</last></author>
+      <author id="helen-hastie"><first>Helen</first><last>Hastie</last></author>
       <author><first>Verena</first><last>Rieser</last></author>
       <author><first>Oliver</first><last>Lemon</last></author>
       <pages>49–58</pages>
@@ -1677,7 +1677,7 @@
     </paper>
     <paper id="13">
       <title>Extractive email thread summarization: Can we do better than He Said She Said?</title>
-      <author><first>Pablo</first><last>Duboue</last></author>
+      <author id="pablo-duboue"><first>Pablo</first><last>Duboue</last></author>
       <pages>85–89</pages>
       <url hash="fc194079">W12-1513</url>
       <bibkey>duboue-2012-extractive</bibkey>
@@ -1692,7 +1692,7 @@
     </paper>
     <paper id="15">
       <title>Reformulating student contributions in tutorial dialogue</title>
-      <author><first>Pamela</first><last>Jordan</last></author>
+      <author id="pamela-jordan"><first>Pamela</first><last>Jordan</last></author>
       <author><first>Sandra</first><last>Katz</last></author>
       <author><first>Patricia</first><last>Albacete</last></author>
       <author><first>Michael</first><last>Ford</last></author>
@@ -1711,7 +1711,7 @@
     </paper>
     <paper id="17">
       <title>Sign Language Generation with Expert Systems and <fixed-case>CCG</fixed-case></title>
-      <author><first>Alessandro</first><last>Mazzei</last></author>
+      <author id="alessandro-mazzei"><first>Alessandro</first><last>Mazzei</last></author>
       <pages>105–109</pages>
       <url hash="8470f318">W12-1517</url>
       <bibkey>mazzei-2012-sign</bibkey>
@@ -1738,10 +1738,10 @@
     <paper id="20">
       <title>Blogging birds: Generating narratives about reintroduced species to promote public engagement</title>
       <author><first>Advaith</first><last>Siddharthan</last></author>
-      <author><first>Matthew</first><last>Green</last></author>
-      <author><first>Kees</first><last>van Deemter</last></author>
-      <author><first>Chris</first><last>Mellish</last></author>
-      <author><first>René</first><last>van der Wal</last></author>
+      <author id="matthew-j-green"><first>Matthew</first><last>Green</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
+      <author id="chris-mellish"><first>Chris</first><last>Mellish</last></author>
+      <author id="rene-van-der-wal"><first>René</first><last>van der Wal</last></author>
       <pages>120–124</pages>
       <url hash="d0a1aef3">W12-1520</url>
       <bibkey>siddharthan-etal-2012-blogging</bibkey>
@@ -1786,11 +1786,11 @@
     </paper>
     <paper id="25">
       <title>The Surface Realisation Task: Recent Developments and Future Plans</title>
-      <author><first>Anja</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anja</first><last>Belz</last></author>
       <author><first>Bernd</first><last>Bohnet</last></author>
       <author><first>Simon</first><last>Mille</last></author>
       <author><first>Leo</first><last>Wanner</last></author>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <pages>136–140</pages>
       <url hash="c67edee9">W12-1525</url>
       <bibkey>belz-etal-2012-surface</bibkey>
@@ -1799,7 +1799,7 @@
       <title><fixed-case>KBG</fixed-case>en – Text Generation from Knowledge Bases as a New Shared Task</title>
       <author><first>Eva</first><last>Banik</last></author>
       <author><first>Claire</first><last>Gardent</last></author>
-      <author><first>Donia</first><last>Scott</last></author>
+      <author id="donia-scott"><first>Donia</first><last>Scott</last></author>
       <author><first>Nikhil</first><last>Dinesh</last></author>
       <author><first>Fennie</first><last>Liang</last></author>
       <pages>141–145</pages>
@@ -1811,14 +1811,14 @@
       <author><first>Nadjet</first><last>Bouayad-Agha</last></author>
       <author><first>Gerard</first><last>Casamayor</last></author>
       <author><first>Leo</first><last>Wanner</last></author>
-      <author><first>Chris</first><last>Mellish</last></author>
+      <author id="chris-mellish"><first>Chris</first><last>Mellish</last></author>
       <pages>146–149</pages>
       <url hash="6bb71909">W12-1527</url>
       <bibkey>bouayad-agha-etal-2012-content</bibkey>
     </paper>
     <paper id="28">
       <title>Shared Task Proposal: Syntactic Paraphrase Ranking</title>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <pages>150–153</pages>
       <url hash="5fb201a4">W12-1528</url>
       <bibkey>white-2012-shared</bibkey>
@@ -1828,10 +1828,10 @@
     <meta>
       <booktitle>Proceedings of the 13th Annual Meeting of the Special Interest Group on Discourse and Dialogue</booktitle>
       <url hash="0a2fad0d">W12-16</url>
-      <editor><first>Gary Geunbae</first><last>Lee</last></editor>
+      <editor id="gary-geunbae-lee"><first>Gary Geunbae</first><last>Lee</last></editor>
       <editor><first>Jonathan</first><last>Ginzburg</last></editor>
       <editor><first>Claire</first><last>Gardent</last></editor>
-      <editor><first>Amanda</first><last>Stent</last></editor>
+      <editor id="amanda-stent"><first>Amanda</first><last>Stent</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Seoul, South Korea</address>
       <month>July</month>
@@ -1851,7 +1851,7 @@
     </paper>
     <paper id="2">
       <title>An End-to-End Evaluation of Two Situated Dialog Systems</title>
-      <author><first>Lina M.</first><last>Rojas-Barahona</last></author>
+      <author id="lina-m-rojas-barahona"><first>Lina M.</first><last>Rojas-Barahona</last></author>
       <author><first>Alejandra</first><last>Lorenzo</last></author>
       <author><first>Claire</first><last>Gardent</last></author>
       <pages>10–19</pages>
@@ -1863,7 +1863,7 @@
       <author><first>William Yang</first><last>Wang</last></author>
       <author><first>Samantha</first><last>Finkelstein</last></author>
       <author><first>Amy</first><last>Ogan</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <author><first>Justine</first><last>Cassell</last></author>
       <pages>20–29</pages>
       <url hash="addea904">W12-1603</url>
@@ -1874,7 +1874,7 @@
       <author><first>Alexander</first><last>Koller</last></author>
       <author><first>Konstantina</first><last>Garoufi</last></author>
       <author><first>Maria</first><last>Staudte</last></author>
-      <author><first>Matthew</first><last>Crocker</last></author>
+      <author id="matthew-crocker"><first>Matthew</first><last>Crocker</last></author>
       <pages>30–39</pages>
       <url hash="57d24897">W12-1604</url>
       <bibkey>koller-etal-2012-enhancing</bibkey>
@@ -1882,14 +1882,14 @@
     <paper id="5">
       <title>Unsupervised Topic Modeling Approaches to Decision Summarization in Spoken Meetings</title>
       <author><first>Lu</first><last>Wang</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>40–49</pages>
       <url hash="fa382b1f">W12-1605</url>
       <bibkey>wang-cardie-2012-unsupervised</bibkey>
     </paper>
     <paper id="6">
       <title>An Unsupervised Approach to User Simulation: Toward Self-Improving Dialog Systems</title>
-      <author><first>Sungjin</first><last>Lee</last></author>
+      <author id="sungjin-lee"><first>Sungjin</first><last>Lee</last></author>
       <author><first>Maxine</first><last>Eskenazi</last></author>
       <pages>50–59</pages>
       <url hash="ad42d792">W12-1606</url>
@@ -1899,27 +1899,27 @@
       <title>Hierarchical Conversation Structure Prediction in Multi-Party Chat</title>
       <author><first>Elijah</first><last>Mayfield</last></author>
       <author><first>David</first><last>Adamson</last></author>
-      <author><first>Carolyn</first><last>Penstein Rosé</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Penstein Rosé</last></author>
       <pages>60–69</pages>
       <url hash="bd4e034e">W12-1607</url>
       <bibkey>mayfield-etal-2012-hierarchical</bibkey>
     </paper>
     <paper id="8">
       <title>Rapid Development Process of Spoken Dialogue Systems using Collaboratively Constructed Semantic Resources</title>
-      <author><first>Masahiro</first><last>Araki</last></author>
+      <author id="masahiro-araki"><first>Masahiro</first><last>Araki</last></author>
       <pages>70–73</pages>
       <url hash="945d586a">W12-1608</url>
       <bibkey>araki-2012-rapid</bibkey>
     </paper>
     <paper id="9">
       <title>The Effect of Cognitive Load on a Statistical Dialogue System</title>
-      <author><first>Milica</first><last>Gašić</last></author>
-      <author><first>Pirros</first><last>Tsiakoulis</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gašić</last></author>
+      <author id="pirros-tsiakoulis"><first>Pirros</first><last>Tsiakoulis</last></author>
       <author><first>Matthew</first><last>Henderson</last></author>
       <author><first>Blaise</first><last>Thomson</last></author>
       <author><first>Kai</first><last>Yu</last></author>
       <author><first>Eli</first><last>Tzirkel</last></author>
-      <author><first>Steve</first><last>Young</last></author>
+      <author id="steve-young"><first>Steve</first><last>Young</last></author>
       <pages>74–78</pages>
       <url hash="01a26d9f">W12-1609</url>
       <bibkey>gasic-etal-2012-effect</bibkey>
@@ -1929,7 +1929,7 @@
       <author><first>Christine</first><last>Howes</last></author>
       <author><first>Matthew</first><last>Purver</last></author>
       <author><first>Rose</first><last>McCabe</last></author>
-      <author><first>Patrick G. T.</first><last>Healey</last></author>
+      <author id="patrick-healey"><first>Patrick G. T.</first><last>Healey</last></author>
       <author><first>Mary</first><last>Lavelle</last></author>
       <pages>79–83</pages>
       <url hash="6abf5538">W12-1610</url>
@@ -1940,16 +1940,16 @@
       <author><first>Teruhisa</first><last>Misu</last></author>
       <author><first>Kallirroi</first><last>Georgila</last></author>
       <author><first>Anton</first><last>Leuski</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <pages>84–93</pages>
       <url hash="e13fcd26">W12-1611</url>
       <bibkey>misu-etal-2012-reinforcement</bibkey>
     </paper>
     <paper id="12">
       <title>From Strangers to Partners: Examining Convergence within a Longitudinal Study of Task-Oriented Dialogue</title>
-      <author><first>Christopher M.</first><last>Mitchell</last></author>
-      <author><first>Kristy Elizabeth</first><last>Boyer</last></author>
-      <author><first>James C.</first><last>Lester</last></author>
+      <author id="christopher-mitchell"><first>Christopher M.</first><last>Mitchell</last></author>
+      <author id="kristy-boyer"><first>Kristy Elizabeth</first><last>Boyer</last></author>
+      <author id="james-lester"><first>James C.</first><last>Lester</last></author>
       <pages>94–98</pages>
       <url hash="3fd607d8">W12-1612</url>
       <bibkey>mitchell-etal-2012-strangers</bibkey>
@@ -1957,7 +1957,7 @@
     <paper id="13">
       <title>The Structure and Generality of Spoken Route Instructions</title>
       <author><first>Aasish</first><last>Pappu</last></author>
-      <author><first>Alexander</first><last>Rudnicky</last></author>
+      <author id="alexander-rudnicky"><first>Alexander</first><last>Rudnicky</last></author>
       <pages>99–107</pages>
       <url hash="613e06dd">W12-1613</url>
       <bibkey>pappu-rudnicky-2012-structure</bibkey>
@@ -1965,15 +1965,15 @@
     <paper id="14">
       <title>Improving Implicit Discourse Relation Recognition Through Feature Set Optimization</title>
       <author><first>Joonsuk</first><last>Park</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>108–112</pages>
       <url hash="06314a3a">W12-1614</url>
       <bibkey>park-cardie-2012-improving</bibkey>
     </paper>
     <paper id="15">
       <title>A Temporal Simulator for Developing Turn-Taking Methods for Spoken Dialogue Systems</title>
-      <author><first>Ethan O.</first><last>Selfridge</last></author>
-      <author><first>Peter A.</first><last>Heeman</last></author>
+      <author id="ethan-selfridge"><first>Ethan O.</first><last>Selfridge</last></author>
+      <author id="peter-a-heeman"><first>Peter A.</first><last>Heeman</last></author>
       <pages>113–117</pages>
       <url hash="c9da0a00">W12-1615</url>
       <bibkey>selfridge-heeman-2012-temporal</bibkey>
@@ -1989,7 +1989,7 @@
     <paper id="17">
       <title>Estimating Adaptation of Dialogue Partners with Different Verbal Intelligence</title>
       <author><first>Kseniya</first><last>Zablotskaya</last></author>
-      <author><first>Fernando</first><last>Fernández-Martínez</last></author>
+      <author id="fernando-fernandez-martinez"><first>Fernando</first><last>Fernández-Martínez</last></author>
       <author><first>Wolfgang</first><last>Minker</last></author>
       <pages>126–130</pages>
       <url hash="91b89a93">W12-1617</url>
@@ -1998,20 +1998,20 @@
     <paper id="18">
       <title>A Demonstration of Incremental Speech Understanding and Confidence Estimation in a Virtual Human Dialogue System</title>
       <author><first>David</first><last>DeVault</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <pages>131–133</pages>
       <url hash="af247259">W12-1618</url>
       <bibkey>devault-traum-2012-demonstration</bibkey>
     </paper>
     <paper id="19">
       <title>Integrating Location, Visibility, and Question-Answering in a Spoken Dialogue System for Pedestrian City Exploration</title>
-      <author><first>Srinivasan</first><last>Janarthanam</last></author>
+      <author id="srinivasan-janarthanam"><first>Srinivasan</first><last>Janarthanam</last></author>
       <author><first>Oliver</first><last>Lemon</last></author>
       <author><first>Xingkun</first><last>Liu</last></author>
       <author><first>Phil</first><last>Bartie</last></author>
       <author><first>William</first><last>Mackaness</last></author>
       <author><first>Tiphaine</first><last>Dalmas</last></author>
-      <author><first>Jana</first><last>Goetze</last></author>
+      <author id="jana-gotze"><first>Jana</first><last>Goetze</last></author>
       <pages>134–136</pages>
       <url hash="510da523">W12-1619</url>
       <bibkey>janarthanam-etal-2012-integrating</bibkey>
@@ -2022,8 +2022,8 @@
       <author><first>Eric</first><last>Forbell</last></author>
       <author><first>David</first><last>DeVault</last></author>
       <author><first>Kenji</first><last>Sagae</last></author>
-      <author><first>David</first><last>Traum</last></author>
-      <author><first>Albert</first><last>Rizzo</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
+      <author id="albert-a-rizzo"><first>Albert</first><last>Rizzo</last></author>
       <pages>137–139</pages>
       <url hash="1035c271">W12-1620</url>
       <bibkey>morbini-etal-2012-mixed</bibkey>
@@ -2032,7 +2032,7 @@
       <title>Towards Mediating Shared Perceptual Basis in Situated Dialogue</title>
       <author><first>Changsong</first><last>Liu</last></author>
       <author><first>Rui</first><last>Fang</last></author>
-      <author><first>Joyce</first><last>Chai</last></author>
+      <author id="joyce-chai"><first>Joyce</first><last>Chai</last></author>
       <pages>140–149</pages>
       <url hash="3ecec97b">W12-1621</url>
       <bibkey>liu-etal-2012-towards</bibkey>
@@ -2048,8 +2048,8 @@
     </paper>
     <paper id="23">
       <title>A Reranking Model for Discourse Segmentation using Subtree Features</title>
-      <author><first>Ngo</first><last>Xuan Bach</last></author>
-      <author><first>Nguyen</first><last>Le Minh</last></author>
+      <author id="ngo-xuan-bach"><first>Ngo</first><last>Xuan Bach</last></author>
+      <author id="minh-le-nguyen"><first>Nguyen</first><last>Le Minh</last></author>
       <author><first>Akira</first><last>Shimazu</last></author>
       <pages>160–168</pages>
       <url hash="e6b6b78a">W12-1623</url>
@@ -2074,7 +2074,7 @@
     </paper>
     <paper id="26">
       <title>Exploiting Machine-Transcribed Dialog Corpus to Improve Multiple Dialog States Tracking Methods</title>
-      <author><first>Sungjin</first><last>Lee</last></author>
+      <author id="sungjin-lee"><first>Sungjin</first><last>Lee</last></author>
       <author><first>Maxine</first><last>Eskenazi</last></author>
       <pages>189–196</pages>
       <url hash="7f9557d0">W12-1626</url>
@@ -2082,14 +2082,14 @@
     </paper>
     <paper id="27">
       <title>Cohesion, Entrainment and Task Success in Educational Dialog</title>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <pages>197</pages>
       <url hash="f0b30ebb">W12-1627</url>
       <bibkey>litman-2012-cohesion</bibkey>
     </paper>
     <paper id="28">
       <title>A Bottom-Up Exploration of the Dimensions of Dialog State in Spoken Interaction</title>
-      <author><first>Nigel G.</first><last>Ward</last></author>
+      <author id="nigel-ward"><first>Nigel G.</first><last>Ward</last></author>
       <author><first>Alejandro</first><last>Vega</last></author>
       <pages>198–206</pages>
       <url hash="cf1074ca">W12-1628</url>
@@ -2098,7 +2098,7 @@
     <paper id="29">
       <title>Using Group History to Identify Character-Directed Utterances in Multi-Child Interactions</title>
       <author><first>Hannaneh</first><last>Hajishirzi</last></author>
-      <author><first>Jill F.</first><last>Lehman</last></author>
+      <author id="jill-fain-lehman"><first>Jill F.</first><last>Lehman</last></author>
       <author><first>Jessica K.</first><last>Hodgins</last></author>
       <pages>207–216</pages>
       <url hash="bf66e97c">W12-1629</url>
@@ -2106,8 +2106,8 @@
     </paper>
     <paper id="30">
       <title>Adapting to Multiple Affective States in Spoken Dialogue</title>
-      <author><first>Kate</first><last>Forbes-Riley</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="kate-forbes-riley"><first>Kate</first><last>Forbes-Riley</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <pages>217–226</pages>
       <url hash="9f47fbf6">W12-1630</url>
       <bibkey>forbes-riley-litman-2012-adapting</bibkey>
@@ -2141,11 +2141,11 @@
     </paper>
     <paper id="34">
       <title>Combining Verbal and Nonverbal Features to Overcome the “Information Gap” in Task-Oriented Dialogue</title>
-      <author><first>Eun Young</first><last>Ha</last></author>
-      <author><first>Joseph F.</first><last>Grafsgaard</last></author>
-      <author><first>Christopher</first><last>Mitchell</last></author>
-      <author><first>Kristy Elizabeth</first><last>Boyer</last></author>
-      <author><first>James C.</first><last>Lester</last></author>
+      <author id="eun-young-ha"><first>Eun Young</first><last>Ha</last></author>
+      <author id="joseph-f-grafsgaard"><first>Joseph F.</first><last>Grafsgaard</last></author>
+      <author id="christopher-mitchell"><first>Christopher</first><last>Mitchell</last></author>
+      <author id="kristy-boyer"><first>Kristy Elizabeth</first><last>Boyer</last></author>
+      <author id="james-lester"><first>James C.</first><last>Lester</last></author>
       <pages>247–256</pages>
       <url hash="0819537e">W12-1634</url>
       <bibkey>ha-etal-2012-combining</bibkey>
@@ -2153,7 +2153,7 @@
     <paper id="35">
       <title>Semantic Specificity in Spoken Dialogue Requests</title>
       <author><first>Ben</first><last>Hixon</last></author>
-      <author><first>Rebecca J.</first><last>Passonneau</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca J.</first><last>Passonneau</last></author>
       <author><first>Susan L.</first><last>Epstein</last></author>
       <pages>257–260</pages>
       <url hash="550ff57c">W12-1635</url>
@@ -2177,10 +2177,10 @@
     </paper>
     <paper id="38">
       <title>Integrating Incremental Speech Recognition and <fixed-case>POMDP</fixed-case>-Based Dialogue Systems</title>
-      <author><first>Ethan O.</first><last>Selfridge</last></author>
+      <author id="ethan-selfridge"><first>Ethan O.</first><last>Selfridge</last></author>
       <author><first>Iker</first><last>Arizmendi</last></author>
-      <author><first>Peter A.</first><last>Heeman</last></author>
-      <author><first>Jason D.</first><last>Williams</last></author>
+      <author id="peter-a-heeman"><first>Peter A.</first><last>Heeman</last></author>
+      <author id="jason-d-williams"><first>Jason D.</first><last>Williams</last></author>
       <pages>275–279</pages>
       <url hash="290a6a6f">W12-1638</url>
       <bibkey>selfridge-etal-2012-integrating</bibkey>
@@ -2216,14 +2216,14 @@
     <paper id="42">
       <title>Focused Meeting Summarization via Unsupervised Relation Extraction</title>
       <author><first>Lu</first><last>Wang</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>304–313</pages>
       <url hash="7b66350d">W12-1642</url>
       <bibkey>wang-cardie-2012-focused</bibkey>
     </paper>
     <paper id="43">
       <title><fixed-case>M</fixed-case>arkov <fixed-case>L</fixed-case>ogic <fixed-case>N</fixed-case>etworks for Situated Incremental Natural Language Understanding</title>
-      <author><first>Casey</first><last>Kennington</last></author>
+      <author id="casey-kennington"><first>Casey</first><last>Kennington</last></author>
       <author><first>David</first><last>Schlangen</last></author>
       <pages>314–323</pages>
       <url hash="0501909e">W12-1643</url>
@@ -2235,7 +2235,7 @@
       <booktitle>Proceedings of the 3rd Workshop on Cognitive Modeling and Computational Linguistics (<fixed-case>CMCL</fixed-case> 2012)</booktitle>
       <url hash="b3f94445">W12-17</url>
       <editor><first>David</first><last>Reitter</last></editor>
-      <editor><first>Roger</first><last>Levy</last></editor>
+      <editor id="roger-levy"><first>Roger</first><last>Levy</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Montréal, Canada</address>
       <month>June</month>
@@ -2283,7 +2283,7 @@
     </paper>
     <paper id="5">
       <title>Connectionist-Inspired Incremental <fixed-case>PCFG</fixed-case> Parsing</title>
-      <author><first>Marten</first><last>van Schijndel</last></author>
+      <author id="marten-van-schijndel"><first>Marten</first><last>van Schijndel</last></author>
       <author><first>Andy</first><last>Exley</last></author>
       <author><first>William</first><last>Schuler</last></author>
       <pages>51–60</pages>
@@ -2292,7 +2292,7 @@
     </paper>
     <paper id="6">
       <title>Sequential vs. Hierarchical Syntactic Models of Human Incremental Sentence Processing</title>
-      <author><first>Victoria</first><last>Fossum</last></author>
+      <author id="victoria-fossum"><first>Victoria</first><last>Fossum</last></author>
       <author><first>Roger</first><last>Levy</last></author>
       <pages>61–69</pages>
       <url hash="f62de9d3">W12-1706</url>
@@ -2302,7 +2302,7 @@
       <title>Modeling covert event retrieval in logical metonymy: probabilistic and distributional accounts</title>
       <author><first>Alessandra</first><last>Zarcone</last></author>
       <author><first>Jason</first><last>Utt</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <pages>70–79</pages>
       <url hash="a87c1f77">W12-1707</url>
       <bibkey>zarcone-etal-2012-modeling</bibkey>
@@ -2322,8 +2322,8 @@
       <booktitle><fixed-case>NAACL</fixed-case>-<fixed-case>HLT</fixed-case> Workshop on Future directions and needs in the Spoken Dialog Community: Tools and Data (<fixed-case>SDCTD</fixed-case> 2012)</booktitle>
       <url hash="3bb3989d">W12-18</url>
       <editor><first>Maxine</first><last>Eskenazi</last></editor>
-      <editor><first>Alan</first><last>Black</last></editor>
-      <editor><first>David</first><last>Traum</last></editor>
+      <editor id="alan-w-black"><first>Alan</first><last>Black</last></editor>
+      <editor id="david-traum"><first>David</first><last>Traum</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Montréal, Canada</address>
       <month>June</month>
@@ -2337,7 +2337,7 @@
     <paper id="1">
       <title>Up from Limited Dialog Systems!</title>
       <author><first>Giuseppe</first><last>Riccardi</last></author>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <author><first>Alexandros</first><last>Potamianos</last></author>
       <author><first>Christina</first><last>Unger</last></author>
       <pages>1–2</pages>
@@ -2346,7 +2346,7 @@
     </paper>
     <paper id="2">
       <title>Directions for Research on Spoken Dialog Systems, Broadly Defined</title>
-      <author><first>Nigel G.</first><last>Ward</last></author>
+      <author id="nigel-ward"><first>Nigel G.</first><last>Ward</last></author>
       <pages>3–4</pages>
       <url hash="a8672276">W12-1802</url>
       <bibkey>ward-2012-directions</bibkey>
@@ -2386,7 +2386,7 @@
     </paper>
     <paper id="7">
       <title>Towards Situated Collaboration</title>
-      <author><first>Dan</first><last>Bohus</last></author>
+      <author id="dan-bohus"><first>Dan</first><last>Bohus</last></author>
       <author><first>Ece</first><last>Kamar</last></author>
       <author><first>Eric</first><last>Horvitz</last></author>
       <pages>13–14</pages>
@@ -2395,7 +2395,7 @@
     </paper>
     <paper id="8">
       <title>Incremental Spoken Dialogue Systems: Tools and Data</title>
-      <author><first>Helen</first><last>Hastie</last></author>
+      <author id="helen-hastie"><first>Helen</first><last>Hastie</last></author>
       <author><first>Oliver</first><last>Lemon</last></author>
       <author><first>Nina</first><last>Dethlefs</last></author>
       <pages>15–16</pages>
@@ -2404,7 +2404,7 @@
     </paper>
     <paper id="9">
       <title>After Dialog Went Pervasive: Separating Dialog Behavior Modeling and Task Modeling</title>
-      <author><first>Amanda</first><last>Stent</last></author>
+      <author id="amanda-stent"><first>Amanda</first><last>Stent</last></author>
       <pages>17–18</pages>
       <url hash="fe39e6bf">W12-1809</url>
       <bibkey>stent-2012-dialog</bibkey>
@@ -2419,21 +2419,21 @@
     </paper>
     <paper id="11">
       <title>Bridging Gaps for Spoken Dialog System Frameworks in Instructional Settings</title>
-      <author><first>Gina-Anne</first><last>Levow</last></author>
+      <author id="gina-anne-levow"><first>Gina-Anne</first><last>Levow</last></author>
       <pages>21–22</pages>
       <url hash="036886a2">W12-1811</url>
       <bibkey>levow-2012-bridging</bibkey>
     </paper>
     <paper id="12">
       <title>A belief tracking challenge task for spoken dialog systems</title>
-      <author><first>Jason</first><last>Williams</last></author>
+      <author id="jason-d-williams"><first>Jason</first><last>Williams</last></author>
       <pages>23–24</pages>
       <url hash="dd55e967">W12-1812</url>
       <bibkey>williams-2012-belief</bibkey>
     </paper>
     <paper id="13">
       <title>Framework for the Development of Spoken Dialogue System based on Collaboratively Constructed Semantic Resources</title>
-      <author><first>Masahiro</first><last>Araki</last></author>
+      <author id="masahiro-araki"><first>Masahiro</first><last>Araki</last></author>
       <author><first>Daisuke</first><last>Takegoshi</last></author>
       <pages>25–28</pages>
       <url hash="69f095dd">W12-1813</url>
@@ -2457,8 +2457,8 @@
     </paper>
     <paper id="16">
       <title>Mining Search Query Logs for Spoken Language Understanding</title>
-      <author><first>Dilek</first><last>Hakkani-Tür</last></author>
-      <author><first>Gokhan</first><last>Tür</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tür</last></author>
+      <author id="gokhan-tur"><first>Gokhan</first><last>Tür</last></author>
       <author><first>Asli</first><last>Celikyilmaz</last></author>
       <pages>37–40</pages>
       <url hash="8d984b3f">W12-1816</url>
@@ -2479,8 +2479,8 @@
     </paper>
     <paper id="18">
       <title>One Year of Contender: What Have We Learned about Assessing and Tuning Industrial Spoken Dialog Systems?</title>
-      <author><first>David</first><last>Suendermann</last></author>
-      <author><first>Roberto</first><last>Pieraccini</last></author>
+      <author id="david-suendermann-oeft"><first>David</first><last>Suendermann</last></author>
+      <author id="roberto-pieraccini"><first>Roberto</first><last>Pieraccini</last></author>
       <pages>45–48</pages>
       <url hash="b3efdc35">W12-1818</url>
       <bibkey>suendermann-pieraccini-2012-one</bibkey>
@@ -2499,9 +2499,9 @@
     <meta>
       <booktitle>Proceedings of the <fixed-case>NAACL</fixed-case>-<fixed-case>HLT</fixed-case> Workshop on the Induction of Linguistic Structure</booktitle>
       <url hash="126cb838">W12-19</url>
-      <editor><first>Trevor</first><last>Cohn</last></editor>
-      <editor><first>Phil</first><last>Blunsom</last></editor>
-      <editor><first>Joao</first><last>Graca</last></editor>
+      <editor id="trevor-cohn"><first>Trevor</first><last>Cohn</last></editor>
+      <editor id="phil-blunsom"><first>Phil</first><last>Blunsom</last></editor>
+      <editor id="joao-graca"><first>Joao</first><last>Graca</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Montréal, Canada</address>
       <month>June</month>
@@ -2535,9 +2535,9 @@
     </paper>
     <paper id="3">
       <title>Capitalization Cues Improve Dependency Grammar Induction</title>
-      <author><first>Valentin I.</first><last>Spitkovsky</last></author>
-      <author><first>Hiyan</first><last>Alshawi</last></author>
-      <author><first>Daniel</first><last>Jurafsky</last></author>
+      <author id="valentin-i-spitkovsky"><first>Valentin I.</first><last>Spitkovsky</last></author>
+      <author id="hiyan-alshawi"><first>Hiyan</first><last>Alshawi</last></author>
+      <author id="dan-jurafsky"><first>Daniel</first><last>Jurafsky</last></author>
       <pages>16–22</pages>
       <url hash="347d6e6f">W12-1903</url>
       <bibkey>spitkovsky-etal-2012-capitalization</bibkey>
@@ -2554,7 +2554,7 @@
     <paper id="5">
       <title>Exploiting Partial Annotations with <fixed-case>EM</fixed-case> Training</title>
       <author><first>Dirk</first><last>Hovy</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>31–38</pages>
       <url hash="20861d60">W12-1905</url>
       <bibkey>hovy-hovy-2012-exploiting</bibkey>
@@ -2594,7 +2594,7 @@
     </paper>
     <paper id="10">
       <title>Two baselines for unsupervised dependency parsing</title>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>81–83</pages>
       <url hash="ff55f412">W12-1910</url>
       <bibkey>sogaard-2012-two</bibkey>
@@ -2602,7 +2602,7 @@
     <paper id="11">
       <title>Unsupervised Dependency Parsing using Reducibility and Fertility features</title>
       <author><first>David</first><last>Mareček</last></author>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
       <pages>84–89</pages>
       <url hash="f5a4b519">W12-1911</url>
       <bibkey>marecek-zabokrtsky-2012-unsupervised</bibkey>
@@ -2618,15 +2618,15 @@
     <paper id="13">
       <title>Turning the pipeline into a loop: Iterated unsupervised dependency parsing and <fixed-case>P</fixed-case>o<fixed-case>S</fixed-case> induction</title>
       <author><first>Christos</first><last>Christodoulopoulos</last></author>
-      <author><first>Sharon</first><last>Goldwater</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>96–99</pages>
       <url hash="be2829cf">W12-1913</url>
       <bibkey>christodoulopoulos-etal-2012-turning</bibkey>
     </paper>
     <paper id="14">
       <title>Hierarchical clustering of word class distributions</title>
-      <author><first>Grzegorz</first><last>Chrupała</last></author>
+      <author id="grzegorz-chrupala"><first>Grzegorz</first><last>Chrupała</last></author>
       <pages>100–104</pages>
       <url hash="8dad5f66">W12-1914</url>
       <bibkey>chrupala-2012-hierarchical</bibkey>
@@ -2643,7 +2643,7 @@
     <meta>
       <booktitle>Proceedings of the Seventh Workshop on Building Educational Applications Using <fixed-case>NLP</fixed-case></booktitle>
       <url hash="652c8236">W12-20</url>
-      <editor><first>Joel</first><last>Tetreault</last></editor>
+      <editor id="joel-tetreault"><first>Joel</first><last>Tetreault</last></editor>
       <editor><first>Jill</first><last>Burstein</last></editor>
       <editor><first>Claudia</first><last>Leacock</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -2658,10 +2658,10 @@
     </frontmatter>
     <paper id="1">
       <title>Question Ranking and Selection in Tutorial Dialogues</title>
-      <author><first>Lee</first><last>Becker</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="lee-becker"><first>Lee</first><last>Becker</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Sarel</first><last>van Vuuren</last></author>
-      <author><first>Wayne</first><last>Ward</last></author>
+      <author id="wayne-ward"><first>Wayne</first><last>Ward</last></author>
       <pages>1–11</pages>
       <url hash="cec0b1e8">W12-2001</url>
       <bibkey>becker-etal-2012-question</bibkey>
@@ -2670,9 +2670,9 @@
       <title>Identifying science concepts and student misconceptions in an interactive essay writing tutor</title>
       <author><first>Steven</first><last>Bethard</last></author>
       <author><first>Ifeyinwa</first><last>Okoye</last></author>
-      <author><first>Md. Arafat</first><last>Sultan</last></author>
+      <author id="md-arafat-sultan"><first>Md. Arafat</first><last>Sultan</last></author>
       <author><first>Haojie</first><last>Hang</last></author>
-      <author><first>James H.</first><last>Martin</last></author>
+      <author id="james-h-martin"><first>James H.</first><last>Martin</last></author>
       <author><first>Tamara</first><last>Sumner</last></author>
       <pages>12–21</pages>
       <url hash="c0db4740">W12-2002</url>
@@ -2680,7 +2680,7 @@
     </paper>
     <paper id="3">
       <title>Automatic Grading of Scientific Inquiry</title>
-      <author><first>Avirup</first><last>Sil</last></author>
+      <author id="avirup-sil"><first>Avirup</first><last>Sil</last></author>
       <author><first>Angela</first><last>Shelton</last></author>
       <author><first>Diane Jass</first><last>Ketelhut</last></author>
       <author><first>Alexander</first><last>Yates</last></author>
@@ -2691,7 +2691,7 @@
     <paper id="4">
       <title>Modeling coherence in <fixed-case>ESOL</fixed-case> learner texts</title>
       <author><first>Helen</first><last>Yannakoudakis</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <pages>33–43</pages>
       <url hash="40959c1d">W12-2004</url>
       <bibkey>yannakoudakis-briscoe-2012-modeling</bibkey>
@@ -2700,7 +2700,7 @@
       <title>Exploring Grammatical Error Correction with Not-So-Crummy Machine Translation</title>
       <author><first>Nitin</first><last>Madnani</last></author>
       <author><first>Joel</first><last>Tetreault</last></author>
-      <author><first>Martin</first><last>Chodorow</last></author>
+      <author id="martin-chodorow"><first>Martin</first><last>Chodorow</last></author>
       <pages>44–53</pages>
       <url hash="6ad20c83">W12-2005</url>
       <bibkey>madnani-etal-2012-exploring</bibkey>
@@ -2731,11 +2731,11 @@
     </paper>
     <paper id="9">
       <title><fixed-case>PREFER</fixed-case>: Using a Graph-Based Approach to Generate Paraphrases for Language Learning</title>
-      <author><first>Mei-Hua</first><last>Chen</last></author>
-      <author><first>Shi-Ting</first><last>Huang</last></author>
-      <author><first>Chung-Chi</first><last>Huang</last></author>
+      <author id="mei-hua-chen"><first>Mei-Hua</first><last>Chen</last></author>
+      <author id="shih-ting-huang"><first>Shi-Ting</first><last>Huang</last></author>
+      <author id="chung-chi-huang"><first>Chung-Chi</first><last>Huang</last></author>
       <author><first>Hsien-Chin</first><last>Liou</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>80–85</pages>
       <url hash="f2d2303e">W12-2009</url>
       <bibkey>chen-etal-2012-prefer</bibkey>
@@ -2751,7 +2751,7 @@
     <paper id="11">
       <title>Predicting Learner Levels for Online Exercises of <fixed-case>H</fixed-case>ebrew</title>
       <author><first>Markus</first><last>Dickinson</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <author><first>Anthony</first><last>Meyer</last></author>
       <pages>95–104</pages>
       <url hash="963b5341">W12-2011</url>
@@ -2778,14 +2778,14 @@
       <title>Scoring Spoken Responses Based on Content Accuracy</title>
       <author><first>Fei</first><last>Huang</last></author>
       <author><first>Lei</first><last>Chen</last></author>
-      <author><first>Jana</first><last>Sukkarieh</last></author>
+      <author id="jana-sukkarieh"><first>Jana</first><last>Sukkarieh</last></author>
       <pages>122–126</pages>
       <url hash="1a1cec0d">W12-2014</url>
       <bibkey>huang-etal-2012-scoring</bibkey>
     </paper>
     <paper id="15">
       <title>Developing <fixed-case>ARET</fixed-case>: An <fixed-case>NLP</fixed-case>-based Educational Tool Set for <fixed-case>A</fixed-case>rabic Reading Enhancement</title>
-      <author><first>Mohammed</first><last>Maamouri</last></author>
+      <author id="mohamed-maamouri"><first>Mohammed</first><last>Maamouri</last></author>
       <author><first>Wajdi</first><last>Zaghouani</last></author>
       <author><first>Violetta</first><last>Cavalli-Sforza</last></author>
       <author><first>Dave</first><last>Graff</last></author>
@@ -2806,7 +2806,7 @@
       <title>Generating Grammar Exercises</title>
       <author><first>Laura</first><last>Perez-Beltrachini</last></author>
       <author><first>Claire</first><last>Gardent</last></author>
-      <author><first>German</first><last>Kruszewski</last></author>
+      <author id="german-kruszewski"><first>German</first><last>Kruszewski</last></author>
       <pages>147–156</pages>
       <url hash="1f9d227c">W12-2017</url>
       <bibkey>perez-beltrachini-etal-2012-generating</bibkey>
@@ -2822,7 +2822,7 @@
     <paper id="19">
       <title>On Improving the Accuracy of Readability Classification using Insights from Second Language Acquisition</title>
       <author><first>Sowmya</first><last>Vajjala</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <pages>163–173</pages>
       <url hash="c42b7537">W12-2019</url>
       <bibkey>vajjala-meurers-2012-improving</bibkey>
@@ -2830,7 +2830,7 @@
     <paper id="20">
       <title>An Interactive Analytic Tool for Peer-Review Exploration</title>
       <author><first>Wenting</first><last>Xiong</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <author><first>Jingtao</first><last>Wang</last></author>
       <author><first>Christian</first><last>Schunn</last></author>
       <pages>174–179</pages>
@@ -2839,7 +2839,7 @@
     </paper>
     <paper id="21">
       <title>Vocabulary Profile as a Measure of Vocabulary Sophistication</title>
-      <author><first>Su-Youn</first><last>Yoon</last></author>
+      <author id="su-youn-yoon"><first>Su-Youn</first><last>Yoon</last></author>
       <author><first>Suma</first><last>Bhat</last></author>
       <author><first>Klaus</first><last>Zechner</last></author>
       <pages>180–189</pages>
@@ -2850,7 +2850,7 @@
       <title>Short Answer Assessment: Establishing Links Between Research Strands</title>
       <author><first>Ramon</first><last>Ziai</last></author>
       <author><first>Niels</first><last>Ott</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <pages>190–200</pages>
       <url hash="98324e9e">W12-2022</url>
       <bibkey>ziai-etal-2012-short</bibkey>
@@ -2860,7 +2860,7 @@
       <author><first>Pinaki</first><last>Bhaskar</last></author>
       <author><first>Aniruddha</first><last>Ghosh</last></author>
       <author><first>Santanu</first><last>Pal</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>201–207</pages>
       <url hash="c8eaeb2d">W12-2023</url>
       <bibkey>bhaskar-etal-2012-detection</bibkey>
@@ -2869,7 +2869,7 @@
       <title>Informing Determiner and Preposition Error Correction with Hierarchical Word Clustering</title>
       <author><first>Adriane</first><last>Boyd</last></author>
       <author><first>Marion</first><last>Zepf</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <pages>208–215</pages>
       <url hash="8b6015da">W12-2024</url>
       <bibkey>boyd-etal-2012-informing</bibkey>
@@ -2885,7 +2885,7 @@
     </paper>
     <paper id="26">
       <title><fixed-case>VTEX</fixed-case> Determiner and Preposition Correction System for the <fixed-case>HOO</fixed-case> 2012 Shared Task</title>
-      <author><first>Vidas</first><last>Daudaravičius</last></author>
+      <author id="vidas-daudaravicius"><first>Vidas</first><last>Daudaravičius</last></author>
       <pages>225–232</pages>
       <url hash="85c27f6f">W12-2026</url>
       <bibkey>daudaravicius-2012-vtex</bibkey>
@@ -2902,8 +2902,8 @@
     <paper id="28">
       <title><fixed-case>HOO</fixed-case> 2012 Error Recognition and Correction Shared Task: <fixed-case>C</fixed-case>ambridge <fixed-case>U</fixed-case>niversity Submission Report</title>
       <author><first>Ekaterina</first><last>Kochmar</last></author>
-      <author><first>Øistein</first><last>Andersen</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="oistein-e-andersen"><first>Øistein</first><last>Andersen</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <pages>242–250</pages>
       <url hash="4ad73220">W12-2028</url>
       <bibkey>kochmar-etal-2012-hoo</bibkey>
@@ -2912,7 +2912,7 @@
       <title><fixed-case>K</fixed-case>orea <fixed-case>U</fixed-case>niversity System in the <fixed-case>HOO</fixed-case> 2012 Shared Task</title>
       <author><first>Jieun</first><last>Lee</last></author>
       <author><first>Jung-Tae</first><last>Lee</last></author>
-      <author><first>Hae-Chang</first><last>Rim</last></author>
+      <author id="hae-chang-rim"><first>Hae-Chang</first><last>Rim</last></author>
       <pages>251–256</pages>
       <url hash="5fa3afeb">W12-2029</url>
       <bibkey>lee-etal-2012-korea</bibkey>
@@ -2930,7 +2930,7 @@
       <title><fixed-case>KU</fixed-case> Leuven at <fixed-case>HOO</fixed-case>-2012: A Hybrid Approach to Detection and Correction of Determiner and Preposition Errors in Non-native <fixed-case>E</fixed-case>nglish Text</title>
       <author><first>Li</first><last>Quan</last></author>
       <author><first>Oleksandr</first><last>Kolomiyets</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>263–271</pages>
       <url hash="3abe296b">W12-2031</url>
       <bibkey>quan-etal-2012-ku</bibkey>
@@ -2952,14 +2952,14 @@
       <author><first>Lis</first><last>Kanashiro</last></author>
       <author><first>Tomoya</first><last>Mizumoto</last></author>
       <author><first>Mamoru</first><last>Komachi</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>281–288</pages>
       <url hash="de8791ee">W12-2033</url>
       <bibkey>sakaguchi-etal-2012-naist</bibkey>
     </paper>
     <paper id="34">
       <title>Memory-based text correction for preposition and determiner errors</title>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <author><first>Peter</first><last>Berck</last></author>
       <pages>289–294</pages>
       <url hash="69496a3e">W12-2034</url>
@@ -2967,12 +2967,12 @@
     </paper>
     <paper id="35">
       <title>Helping Our Own: <fixed-case>NTHU</fixed-case> <fixed-case>NLPLAB</fixed-case> System Description</title>
-      <author><first>Jian-Cheng</first><last>Wu</last></author>
-      <author><first>Joseph</first><last>Chang</last></author>
-      <author><first>Yi-Chun</first><last>Chen</last></author>
-      <author><first>Shih-Ting</first><last>Huang</last></author>
-      <author><first>Mei-Hua</first><last>Chen</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jian-cheng-wu"><first>Jian-Cheng</first><last>Wu</last></author>
+      <author id="joseph-z-chang"><first>Joseph</first><last>Chang</last></author>
+      <author id="yichun-chen"><first>Yi-Chun</first><last>Chen</last></author>
+      <author id="shih-ting-huang"><first>Shih-Ting</first><last>Huang</last></author>
+      <author id="mei-hua-chen"><first>Mei-Hua</first><last>Chen</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>295–301</pages>
       <url hash="3531b76d">W12-2035</url>
       <bibkey>wu-etal-2012-helping</bibkey>
@@ -2987,8 +2987,8 @@
     </paper>
     <paper id="37">
       <title>Crowdsourced Comprehension: Predicting Prerequisite Structure in <fixed-case>W</fixed-case>ikipedia</title>
-      <author><first>Partha</first><last>Talukdar</last></author>
-      <author><first>William</first><last>Cohen</last></author>
+      <author id="partha-talukdar"><first>Partha</first><last>Talukdar</last></author>
+      <author id="william-cohen"><first>William</first><last>Cohen</last></author>
       <pages>307–315</pages>
       <url hash="6fd91510">W12-2037</url>
       <bibkey>talukdar-cohen-2012-crowdsourced</bibkey>
@@ -3005,7 +3005,7 @@
     <paper id="39">
       <title>Evaluating the Meaning of Answers to Reading Comprehension Questions: A Semantics-Based Approach</title>
       <author><first>Michael</first><last>Hahn</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <pages>326–336</pages>
       <url hash="51aa6e36">W12-2039</url>
       <bibkey>hahn-meurers-2012-evaluating</bibkey>
@@ -3016,7 +3016,7 @@
       <booktitle>Proceedings of the Second Workshop on Language in Social Media</booktitle>
       <url hash="65bc0e28">W12-21</url>
       <editor><first>Sara Owsley</first><last>Sood</last></editor>
-      <editor><first>Meenakshi</first><last>Nagarajan</last></editor>
+      <editor id="meenakshi-nagarajan"><first>Meenakshi</first><last>Nagarajan</last></editor>
       <editor><first>Michael</first><last>Gamon</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Montréal, Canada</address>
@@ -3031,7 +3031,7 @@
     <paper id="1">
       <title>Analyzing <fixed-case>U</fixed-case>rdu Social Media for Sentiments using Transfer Learning with Controlled Translations</title>
       <author><first>Smruthi</first><last>Mukund</last></author>
-      <author><first>Rohini</first><last>Srihari</last></author>
+      <author id="rohini-k-srihari"><first>Rohini</first><last>Srihari</last></author>
       <pages>1–8</pages>
       <url hash="def89f2d">W12-2101</url>
       <bibkey>mukund-srihari-2012-analyzing</bibkey>
@@ -3039,8 +3039,8 @@
     <paper id="2">
       <title>Detecting Distressed and Non-distressed Affect States in Short Forum Texts</title>
       <author><first>Michael</first><last>Thaul Lehrman</last></author>
-      <author><first>Cecilia</first><last>Ovesdotter Alm</last></author>
-      <author><first>Rubén A.</first><last>Proaño</last></author>
+      <author id="cecilia-ovesdotter-alm"><first>Cecilia</first><last>Ovesdotter Alm</last></author>
+      <author id="ruben-a-proano"><first>Rubén A.</first><last>Proaño</last></author>
       <pages>9–18</pages>
       <url hash="3a6477a5">W12-2102</url>
       <bibkey>thaul-lehrman-etal-2012-detecting</bibkey>
@@ -3048,7 +3048,7 @@
     <paper id="3">
       <title>Detecting Hate Speech on the World Wide Web</title>
       <author><first>William</first><last>Warner</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
       <pages>19–26</pages>
       <url hash="3fa782d9">W12-2103</url>
       <bibkey>warner-hirschberg-2012-detecting</bibkey>
@@ -3070,8 +3070,8 @@
       <author><first>Or</first><last>Biran</last></author>
       <author><first>Sara</first><last>Rosenthal</last></author>
       <author><first>Jacob</first><last>Andreas</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>37–45</pages>
       <url hash="f3ece98e">W12-2105</url>
       <bibkey>biran-etal-2012-detecting</bibkey>
@@ -3088,9 +3088,9 @@
     <paper id="7">
       <title>Robust kaomoji detection in <fixed-case>T</fixed-case>witter</title>
       <author><first>Steven</first><last>Bedrick</last></author>
-      <author><first>Russell</first><last>Beckley</last></author>
+      <author id="russell-beckley"><first>Russell</first><last>Beckley</last></author>
       <author><first>Brian</first><last>Roark</last></author>
-      <author><first>Richard</first><last>Sproat</last></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last></author>
       <pages>56–64</pages>
       <url hash="628be1a6">W12-2107</url>
       <bibkey>bedrick-etal-2012-robust</bibkey>
@@ -3146,7 +3146,7 @@
     </paper>
     <paper id="2">
       <title>Towards Automatic Lexical Simplification in <fixed-case>S</fixed-case>panish: An Empirical Study</title>
-      <author><first>Biljana</first><last>Drndarević</last></author>
+      <author id="biljana-drndarevic"><first>Biljana</first><last>Drndarević</last></author>
       <author><first>Horacio</first><last>Saggion</last></author>
       <pages>8–16</pages>
       <url hash="6eb2429a">W12-2202</url>
@@ -3184,8 +3184,8 @@
     <paper id="6">
       <title>Making Readability Indices Readable</title>
       <author><first>Sara</first><last>Tonelli</last></author>
-      <author><first>Ke</first><last>Tran Manh</last></author>
-      <author><first>Emanuele</first><last>Pianta</last></author>
+      <author id="ke-m-tran"><first>Ke</first><last>Tran Manh</last></author>
+      <author id="emanuele-pianta"><first>Emanuele</first><last>Pianta</last></author>
       <pages>40–48</pages>
       <url hash="00d20893">W12-2206</url>
       <bibkey>tonelli-etal-2012-making</bibkey>
@@ -3202,7 +3202,7 @@
       <title>Comparing human versus automatic feature extraction for fine-grained elementary readability assessment</title>
       <author><first>Yi</first><last>Ma</last></author>
       <author><first>Ritu</first><last>Singh</last></author>
-      <author><first>Eric</first><last>Fosler-Lussier</last></author>
+      <author id="eric-fosler-lussier"><first>Eric</first><last>Fosler-Lussier</last></author>
       <author><first>Robert</first><last>Lofthus</last></author>
       <pages>58–64</pages>
       <url hash="c88bf94e">W12-2208</url>
@@ -3213,7 +3213,7 @@
     <meta>
       <booktitle>Proceedings of the Twelfth Meeting of the Special Interest Group on Computational Morphology and Phonology</booktitle>
       <url hash="4e186769">W12-23</url>
-      <editor><first>Lynne</first><last>Cahill</last></editor>
+      <editor id="lynne-cahill"><first>Lynne</first><last>Cahill</last></editor>
       <editor><first>Adam</first><last>Albright</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Montréal, Canada</address>
@@ -3238,7 +3238,7 @@
       <title><fixed-case>H</fixed-case>indi Derivational Morphological Analyzer</title>
       <author><first>Nikhil</first><last>Kanuparthi</last></author>
       <author><first>Abhilash</first><last>Inumella</last></author>
-      <author><first>Dipti</first><last>Misra Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti</first><last>Misra Sharma</last></author>
       <pages>10–16</pages>
       <url hash="f1066450">W12-2302</url>
       <bibkey>kanuparthi-etal-2012-hindi</bibkey>
@@ -3287,7 +3287,7 @@
       <author><first>Joe</first><last>Pater</last></author>
       <author><first>Robert</first><last>Staubs</last></author>
       <author><first>Karen</first><last>Jesney</last></author>
-      <author><first>Brian</first><last>Smith</last></author>
+      <author id="brian-cantwell-smith"><first>Brian</first><last>Smith</last></author>
       <pages>62–71</pages>
       <url hash="7498833f">W12-2308</url>
       <bibkey>pater-etal-2012-learning</bibkey>
@@ -3304,12 +3304,12 @@
     <meta>
       <booktitle><fixed-case>B</fixed-case>io<fixed-case>NLP</fixed-case>: Proceedings of the 2012 Workshop on Biomedical Natural Language Processing</booktitle>
       <url hash="d671a79a">W12-24</url>
-      <editor><first>Kevin B.</first><last>Cohen</last></editor>
+      <editor id="k-bretonnel-cohen"><first>Kevin B.</first><last>Cohen</last></editor>
       <editor><first>Dina</first><last>Demner-Fushman</last></editor>
       <editor><first>Sophia</first><last>Ananiadou</last></editor>
-      <editor><first>Bonnie</first><last>Webber</last></editor>
-      <editor><first>Jun’ichi</first><last>Tsujii</last></editor>
-      <editor><first>John</first><last>Pestian</last></editor>
+      <editor id="bonnie-webber"><first>Bonnie</first><last>Webber</last></editor>
+      <editor id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></editor>
+      <editor id="john-pestian"><first>John</first><last>Pestian</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Montréal, Canada</address>
       <month>June</month>
@@ -3322,7 +3322,7 @@
     </frontmatter>
     <paper id="1">
       <title>Graph-based alignment of narratives for automated neurological assessment</title>
-      <author><first>Emily</first><last>Prud’hommeaux</last></author>
+      <author id="emily-prudhommeaux"><first>Emily</first><last>Prud’hommeaux</last></author>
       <author><first>Brian</first><last>Roark</last></author>
       <pages>1–10</pages>
       <url hash="edbce1a3">W12-2401</url>
@@ -3331,7 +3331,7 @@
     <paper id="2">
       <title>Bootstrapping Biomedical Ontologies for Scientific Text using <fixed-case>NELL</fixed-case></title>
       <author><first>Dana</first><last>Movshovitz-Attias</last></author>
-      <author><first>William W.</first><last>Cohen</last></author>
+      <author id="william-cohen"><first>William W.</first><last>Cohen</last></author>
       <pages>11–19</pages>
       <url hash="82fea39f">W12-2402</url>
       <bibkey>movshovitz-attias-cohen-2012-bootstrapping</bibkey>
@@ -3340,7 +3340,7 @@
       <title>Semantic distance and terminology structuring methods for the detection of semantically close terms</title>
       <author><first>Marie</first><last>Dupuch</last></author>
       <author><first>Laëtitia</first><last>Dupuch</last></author>
-      <author><first>Thierry</first><last>Hamon</last></author>
+      <author id="thierry-hamon"><first>Thierry</first><last>Hamon</last></author>
       <author><first>Natalia</first><last>Grabar</last></author>
       <pages>20–28</pages>
       <url hash="0ff6cf3d">W12-2403</url>
@@ -3349,8 +3349,8 @@
     <paper id="4">
       <title>Temporal Classification of Medical Events</title>
       <author><first>Preethi</first><last>Raghavan</last></author>
-      <author><first>Eric</first><last>Fosler-Lussier</last></author>
-      <author><first>Albert</first><last>Lai</last></author>
+      <author id="eric-fosler-lussier"><first>Eric</first><last>Fosler-Lussier</last></author>
+      <author id="albert-m-lai"><first>Albert</first><last>Lai</last></author>
       <pages>29–37</pages>
       <url hash="7177afe1">W12-2404</url>
       <bibkey>raghavan-etal-2012-temporal</bibkey>
@@ -3368,33 +3368,33 @@
     <paper id="6">
       <title>Alignment-<fixed-case>HMM</fixed-case>-based Extraction of Abbreviations from Biomedical Text</title>
       <author><first>Dana</first><last>Movshovitz-Attias</last></author>
-      <author><first>William W.</first><last>Cohen</last></author>
+      <author id="william-cohen"><first>William W.</first><last>Cohen</last></author>
       <pages>47–55</pages>
       <url hash="f1b023a3">W12-2406</url>
       <bibkey>movshovitz-attias-cohen-2012-alignment</bibkey>
     </paper>
     <paper id="7">
       <title>Medical diagnosis lost in translation – Analysis of uncertainty and negation expressions in <fixed-case>E</fixed-case>nglish and <fixed-case>S</fixed-case>wedish clinical texts</title>
-      <author><first>Danielle L</first><last>Mowery</last></author>
+      <author id="danielle-l-mowery"><first>Danielle L</first><last>Mowery</last></author>
       <author><first>Sumithra</first><last>Velupillai</last></author>
-      <author><first>Wendy W</first><last>Chapman</last></author>
+      <author id="wendy-chapman"><first>Wendy W</first><last>Chapman</last></author>
       <pages>56–64</pages>
       <url hash="c899c1be">W12-2407</url>
       <bibkey>mowery-etal-2012-medical</bibkey>
     </paper>
     <paper id="8">
       <title>A Hybrid Stepwise Approach for De-identifying Person Names in Clinical Documents</title>
-      <author><first>Oscar</first><last>Ferrández</last></author>
+      <author id="oscar-ferrandez"><first>Oscar</first><last>Ferrández</last></author>
       <author><first>Brett</first><last>South</last></author>
       <author><first>Shuying</first><last>Shen</last></author>
-      <author><first>Stéphane</first><last>Meystre</last></author>
+      <author id="stephane-meystre"><first>Stéphane</first><last>Meystre</last></author>
       <pages>65–72</pages>
       <url hash="ebd6b7a1">W12-2408</url>
       <bibkey>ferrandez-etal-2012-hybrid</bibkey>
     </paper>
     <paper id="9">
       <title>Active Learning for Coreference Resolution</title>
-      <author><first>Timothy</first><last>Miller</last></author>
+      <author id="timothy-miller"><first>Timothy</first><last>Miller</last></author>
       <author><first>Dmitriy</first><last>Dligach</last></author>
       <author><first>Guergana</first><last>Savova</last></author>
       <pages>73–81</pages>
@@ -3417,7 +3417,7 @@
     </paper>
     <paper id="11">
       <title>An improved corpus of disease mentions in <fixed-case>P</fixed-case>ub<fixed-case>M</fixed-case>ed citations</title>
-      <author><first>Rezarta</first><last>Islamaj Doğan</last></author>
+      <author id="rezarta-islamaj-dogan"><first>Rezarta</first><last>Islamaj Doğan</last></author>
       <author><first>Zhiyong</first><last>Lu</last></author>
       <pages>91–99</pages>
       <url hash="d1861df2">W12-2411</url>
@@ -3436,7 +3436,7 @@
     </paper>
     <paper id="13">
       <title>Combining Compositionality and Pagerank for the Identification of Semantic Relations between Biomedical Words</title>
-      <author><first>Thierry</first><last>Hamon</last></author>
+      <author id="thierry-hamon"><first>Thierry</first><last>Hamon</last></author>
       <author><first>Christopher</first><last>Engström</last></author>
       <author><first>Mounira</first><last>Manser</last></author>
       <author><first>Zina</first><last>Badji</last></author>
@@ -3462,7 +3462,7 @@
       <author><first>Michael</first><last>Shafir</last></author>
       <author><first>Michael</first><last>Crivaro</last></author>
       <author><first>Bensiin</first><last>Borukhov</last></author>
-      <author><first>Marie</first><last>Meteer</last></author>
+      <author id="marie-meteer"><first>Marie</first><last>Meteer</last></author>
       <pages>122–129</pages>
       <url hash="936410f3">W12-2415</url>
       <bibkey>thamrongrattanarit-etal-2012-nlp</bibkey>
@@ -3474,14 +3474,14 @@
       <author><first>Jianwei</first><last>Leng</last></author>
       <author><first>Tyler</first><last>Forbush</last></author>
       <author><first>Scott</first><last>DuVall</last></author>
-      <author><first>Wendy</first><last>Chapman</last></author>
+      <author id="wendy-chapman"><first>Wendy</first><last>Chapman</last></author>
       <pages>130–139</pages>
       <url hash="c385dac0">W12-2416</url>
       <bibkey>south-etal-2012-prototype</bibkey>
     </paper>
     <paper id="17">
       <title><fixed-case>M</fixed-case>ed<fixed-case>L</fixed-case>ing<fixed-case>M</fixed-case>ap: A growing resource mapping the Bio-Medical <fixed-case>NLP</fixed-case> field</title>
-      <author><first>Marie</first><last>Meteer</last></author>
+      <author id="marie-meteer"><first>Marie</first><last>Meteer</last></author>
       <author><first>Bensiin</first><last>Borukhov</last></author>
       <author><first>Mike</first><last>Crivaro</last></author>
       <author><first>Michael</first><last>Shafir</last></author>
@@ -3496,7 +3496,7 @@
       <author><first>Leslie</first><last>Lange</last></author>
       <author><first>Jose Luis</first><last>Ambite</last></author>
       <author><first>Yigal</first><last>Arens</last></author>
-      <author><first>Chun-Nan</first><last>Hsu</last></author>
+      <author id="chun-nan-hsu"><first>Chun-Nan</first><last>Hsu</last></author>
       <pages>146–154</pages>
       <url hash="260e563e">W12-2418</url>
       <bibkey>sharma-etal-2012-exploring</bibkey>
@@ -3505,7 +3505,7 @@
       <title>Evaluating Joint Modeling of Yeast Biology Literature and Protein-Protein Interaction Networks</title>
       <author><first>Ramnath</first><last>Balasubramanyan</last></author>
       <author><first>Kathryn</first><last>Rivard</last></author>
-      <author><first>William W.</first><last>Cohen</last></author>
+      <author id="william-cohen"><first>William W.</first><last>Cohen</last></author>
       <author><first>Jelena</first><last>Jakovljevic</last></author>
       <author><first>John L.</first><last>Woolford</last></author>
       <pages>155–162</pages>
@@ -3514,8 +3514,8 @@
     </paper>
     <paper id="20">
       <title><fixed-case>R</fixed-case>ank<fixed-case>P</fixed-case>ref: Ranking Sentences Describing Relations between Biomedical Entities with an Application</title>
-      <author><first>Catalina Oana</first><last>Tudor</last></author>
-      <author><first>K</first><last>Vijay-Shanker</last></author>
+      <author id="catalina-oana-tudor"><first>Catalina Oana</first><last>Tudor</last></author>
+      <author id="k-vijay-shanker"><first>K</first><last>Vijay-Shanker</last></author>
       <pages>163–171</pages>
       <url hash="e1e5a6b7">W12-2420</url>
       <bibkey>tudor-vijay-shanker-2012-rankpref</bibkey>
@@ -3525,7 +3525,7 @@
       <author><first>Ying</first><last>Yan</last></author>
       <author><first>Jee-Hyub</first><last>Kim</last></author>
       <author><first>Samuel</first><last>Croset</last></author>
-      <author><first>Dietrich</first><last>Rebholz-Schuhmann</last></author>
+      <author id="dietrich-rebholz-schuhmann"><first>Dietrich</first><last>Rebholz-Schuhmann</last></author>
       <pages>172–175</pages>
       <url hash="c49459c6">W12-2421</url>
       <bibkey>yan-etal-2012-finding</bibkey>
@@ -3543,7 +3543,7 @@
       <title>Classifying Gene Sentences in Biomedical Literature by Combining High-Precision Gene Identifiers</title>
       <author><first>Sun</first><last>Kim</last></author>
       <author><first>Won</first><last>Kim</last></author>
-      <author><first>Don</first><last>Comeau</last></author>
+      <author id="donald-c-comeau"><first>Don</first><last>Comeau</last></author>
       <author><first>W. John</first><last>Wilbur</last></author>
       <pages>185–192</pages>
       <url hash="72a6fd05">W12-2423</url>
@@ -3567,7 +3567,7 @@
     </paper>
     <paper id="26">
       <title>Using Natural Language Processing to Extract Drug-Drug Interaction Information from Package Inserts</title>
-      <author><first>Richard</first><last>Boyce</last></author>
+      <author id="richard-d-boyce"><first>Richard</first><last>Boyce</last></author>
       <author><first>Gregory</first><last>Gardner</last></author>
       <author><first>Henk</first><last>Harkema</last></author>
       <pages>206–213</pages>
@@ -3579,7 +3579,7 @@
       <author><first>Nate</first><last>Sutton</last></author>
       <author><first>Laura</first><last>Wojtulewicz</last></author>
       <author><first>Neel</first><last>Mehta</last></author>
-      <author><first>Graciela</first><last>Gonzalez</last></author>
+      <author id="graciela-gonzalez"><first>Graciela</first><last>Gonzalez</last></author>
       <pages>214–222</pages>
       <url hash="39ff704f">W12-2427</url>
       <bibkey>sutton-etal-2012-automatic</bibkey>
@@ -3617,10 +3617,10 @@
     <meta>
       <booktitle>Proceedings of the <fixed-case>NAACL</fixed-case>-<fixed-case>HLT</fixed-case> 2012 Workshop on Computational Linguistics for Literature</booktitle>
       <url hash="d160bcbe">W12-25</url>
-      <editor><first>David</first><last>Elson</last></editor>
+      <editor id="david-elson"><first>David</first><last>Elson</last></editor>
       <editor><first>Anna</first><last>Kazantseva</last></editor>
-      <editor><first>Rada</first><last>Mihalcea</last></editor>
-      <editor><first>Stan</first><last>Szpakowicz</last></editor>
+      <editor id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></editor>
+      <editor id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Montréal, Canada</address>
       <month>June</month>
@@ -3643,7 +3643,7 @@
     <paper id="2">
       <title>A Computational Analysis of Style, Affect, and Imagery in Contemporary Poetry</title>
       <author><first>Justine</first><last>Kao</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <pages>8–17</pages>
       <url hash="32729a99">W12-2502</url>
       <bibkey>kao-jurafsky-2012-computational</bibkey>
@@ -3651,7 +3651,7 @@
     <paper id="3">
       <title>Towards a Literary Machine Translation: The Role of Referential Cohesion</title>
       <author><first>Rob</first><last>Voigt</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <pages>18–25</pages>
       <url hash="a4bc0e76">W12-2503</url>
       <bibkey>voigt-jurafsky-2012-towards</bibkey>
@@ -3683,7 +3683,7 @@
     </paper>
     <paper id="7">
       <title>Mining wisdom</title>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>54–58</pages>
       <url hash="0633ee6b">W12-2507</url>
       <bibkey>sogaard-2012-mining</bibkey>
@@ -3707,7 +3707,7 @@
     <paper id="10">
       <title>A Dictionary of Wisdom and Wit: Learning to Extract Quotable Phrases</title>
       <author><first>Michael</first><last>Bendersky</last></author>
-      <author><first>David</first><last>Smith</last></author>
+      <author id="david-a-smith"><first>David</first><last>Smith</last></author>
       <pages>69–77</pages>
       <url hash="946e2ebd">W12-2510</url>
       <bibkey>bendersky-smith-2012-dictionary</bibkey>
@@ -3716,7 +3716,7 @@
       <title>A Pilot <fixed-case>P</fixed-case>rop<fixed-case>B</fixed-case>ank Annotation for <fixed-case>Q</fixed-case>uranic <fixed-case>A</fixed-case>rabic</title>
       <author><first>Wajdi</first><last>Zaghouani</last></author>
       <author><first>Abdelati</first><last>Hawwari</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>78–83</pages>
       <url hash="9cf7c4c6">W12-2511</url>
       <bibkey>zaghouani-etal-2012-pilot</bibkey>
@@ -3733,7 +3733,7 @@
       <author><first>Apoorv</first><last>Agarwal</last></author>
       <author><first>Augusto</first><last>Corvalan</last></author>
       <author><first>Jacob</first><last>Jensen</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>88–96</pages>
       <url hash="290fb740">W12-2513</url>
       <bibkey>agarwal-etal-2012-social</bibkey>
@@ -3750,8 +3750,8 @@
     <meta>
       <booktitle>Proceedings of Workshop on Evaluation Metrics and System Comparison for Automatic Summarization</booktitle>
       <url hash="d2492fae">W12-26</url>
-      <editor><first>John M.</first><last>Conroy</last></editor>
-      <editor><first>Hoa Trang</first><last>Dang</last></editor>
+      <editor id="john-conroy"><first>John M.</first><last>Conroy</last></editor>
+      <editor id="hoa-trang-dang"><first>Hoa Trang</first><last>Dang</last></editor>
       <editor><first>Ani</first><last>Nenkova</last></editor>
       <editor><first>Karolina</first><last>Owczarzak</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -3778,7 +3778,7 @@
       <title>Using the Omega Index for Evaluating Abstractive Community Detection</title>
       <author><first>Gabriel</first><last>Murray</last></author>
       <author><first>Giuseppe</first><last>Carenini</last></author>
-      <author><first>Raymond</first><last>Ng</last></author>
+      <author id="raymond-ng"><first>Raymond</first><last>Ng</last></author>
       <pages>10–18</pages>
       <url hash="72219c98">W12-2602</url>
       <bibkey>murray-etal-2012-using</bibkey>
@@ -3803,7 +3803,7 @@
     </paper>
     <paper id="5">
       <title>The Heterogeneity Principle in Evaluation Measures for Automatic Summarization</title>
-      <author><first>Enrique</first><last>Amigó</last></author>
+      <author id="enrique-amigo"><first>Enrique</first><last>Amigó</last></author>
       <author><first>Julio</first><last>Gonzalo</last></author>
       <author><first>Felisa</first><last>Verdejo</last></author>
       <pages>36–43</pages>
@@ -3824,8 +3824,8 @@
     <meta>
       <booktitle>Proceedings of the <fixed-case>NAACL</fixed-case>-<fixed-case>HLT</fixed-case> 2012 Workshop: Will We Ever Really Replace the N-gram Model? On the Future of Language Modeling for <fixed-case>HLT</fixed-case></booktitle>
       <url hash="50220c38">W12-27</url>
-      <editor><first>Bhuvana</first><last>Ramabhadran</last></editor>
-      <editor><first>Sanjeev</first><last>Khudanpur</last></editor>
+      <editor id="bhuvana-ramabhadran"><first>Bhuvana</first><last>Ramabhadran</last></editor>
+      <editor id="sanjeev-khudanpur"><first>Sanjeev</first><last>Khudanpur</last></editor>
       <editor><first>Ebru</first><last>Arisoy</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Montréal, Canada</address>
@@ -3839,7 +3839,7 @@
     </frontmatter>
     <paper id="1">
       <title>Measuring the Influence of Long Range Dependencies with Neural Network Language Models</title>
-      <author><first>Hai Son</first><last>Le</last></author>
+      <author id="hai-son-le"><first>Hai Son</first><last>Le</last></author>
       <author><first>Alexandre</first><last>Allauzen</last></author>
       <author><first>François</first><last>Yvon</last></author>
       <pages>1–10</pages>
@@ -3859,7 +3859,7 @@
       <title>Deep Neural Network Language Models</title>
       <author><first>Ebru</first><last>Arisoy</last></author>
       <author><first>Tara N.</first><last>Sainath</last></author>
-      <author><first>Brian</first><last>Kingsbury</last></author>
+      <author id="brian-kingsbury"><first>Brian</first><last>Kingsbury</last></author>
       <author><first>Bhuvana</first><last>Ramabhadran</last></author>
       <pages>20–28</pages>
       <url hash="d68e42d0">W12-2703</url>
@@ -3867,15 +3867,15 @@
     </paper>
     <paper id="4">
       <title>A Challenge Set for Advancing Language Modeling</title>
-      <author><first>Geoffrey</first><last>Zweig</last></author>
-      <author><first>Chris J.C.</first><last>Burges</last></author>
+      <author id="geoffrey-zweig"><first>Geoffrey</first><last>Zweig</last></author>
+      <author id="christopher-j-c-burges"><first>Chris J.C.</first><last>Burges</last></author>
       <pages>29–36</pages>
       <url hash="51e2c682">W12-2704</url>
       <bibkey>zweig-burges-2012-challenge</bibkey>
     </paper>
     <paper id="5">
       <title>Unsupervised Vocabulary Adaptation for Morph-based Language Models</title>
-      <author><first>André</first><last>Mansikkaniemi</last></author>
+      <author id="andre-mansikkaniemi"><first>André</first><last>Mansikkaniemi</last></author>
       <author><first>Mikko</first><last>Kurimo</last></author>
       <pages>37–40</pages>
       <url hash="40a0adff">W12-2705</url>
@@ -3948,10 +3948,10 @@
       <booktitle>Proceedings of the Third Workshop on Speech and Language Processing for Assistive Technologies</booktitle>
       <url hash="401c0a83">W12-29</url>
       <editor><first>Jan</first><last>Alexandersson</last></editor>
-      <editor><first>Peter</first><last>Ljunglöf</last></editor>
-      <editor><first>Kathleen F.</first><last>McCoy</last></editor>
+      <editor id="peter-ljunglof"><first>Peter</first><last>Ljunglöf</last></editor>
+      <editor id="kathleen-f-mccoy"><first>Kathleen F.</first><last>McCoy</last></editor>
       <editor><first>Brian</first><last>Roark</last></editor>
-      <editor><first>Annalu</first><last>Waller</last></editor>
+      <editor id="annalu-waller"><first>Annalu</first><last>Waller</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Montréal, Canada</address>
       <month>June</month>
@@ -3975,8 +3975,8 @@
       <title><fixed-case>W</fixed-case>ink<fixed-case>T</fixed-case>alk: a demonstration of a multimodal speech synthesis platform linking facial expressions to expressive synthetic voices</title>
       <author><first>Éva</first><last>Székely</last></author>
       <author><first>Zeeshan</first><last>Ahmed</last></author>
-      <author><first>João P.</first><last>Cabral</last></author>
-      <author><first>Julie</first><last>Carson-Berndsen</last></author>
+      <author id="joao-paulo-cabral"><first>João P.</first><last>Cabral</last></author>
+      <author id="julie-carson-berndsen"><first>Julie</first><last>Carson-Berndsen</last></author>
       <pages>5–8</pages>
       <url hash="073228a3">W12-2902</url>
       <bibkey>szekely-etal-2012-winktalk</bibkey>
@@ -3984,7 +3984,7 @@
     <paper id="3">
       <title>Discourse-Based Modeling for <fixed-case>AAC</fixed-case></title>
       <author><first>Margaret</first><last>Mitchell</last></author>
-      <author><first>Richard</first><last>Sproat</last></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last></author>
       <pages>9–18</pages>
       <url hash="c20da61f">W12-2903</url>
       <bibkey>mitchell-sproat-2012-discourse</bibkey>
@@ -4010,9 +4010,9 @@
     </paper>
     <paper id="6">
       <title>Assisting Social Conversation between Persons with <fixed-case>A</fixed-case>lzheimer’s Disease and their Conversational Partners</title>
-      <author><first>Nancy</first><last>Green</last></author>
-      <author><first>Curry</first><last>Guinn</last></author>
-      <author><first>Ronnie</first><last>Smith</last></author>
+      <author id="nancy-green"><first>Nancy</first><last>Green</last></author>
+      <author id="curry-i-guinn"><first>Curry</first><last>Guinn</last></author>
+      <author id="ronnie-w-smith"><first>Ronnie</first><last>Smith</last></author>
       <pages>37–46</pages>
       <url hash="ec5d707a">W12-2906</url>
       <bibkey>green-etal-2012-assisting</bibkey>
@@ -4060,11 +4060,11 @@
       <booktitle>Proceedings of the Joint Workshop on Automatic Knowledge Base Construction and Web-scale Knowledge Extraction (<fixed-case>AKBC</fixed-case>-<fixed-case>WEKEX</fixed-case>)</booktitle>
       <url hash="7e9bd079">W12-30</url>
       <editor><first>James</first><last>Fan</last></editor>
-      <editor><first>Raphael</first><last>Hoffman</last></editor>
+      <editor id="raphael-hoffmann"><first>Raphael</first><last>Hoffman</last></editor>
       <editor><first>Aditya</first><last>Kalyanpur</last></editor>
       <editor><first>Sebastian</first><last>Riedel</last></editor>
-      <editor><first>Fabian</first><last>Suchanek</last></editor>
-      <editor><first>Partha Pratim</first><last>Talukdar</last></editor>
+      <editor id="fabian-suchanek"><first>Fabian</first><last>Suchanek</last></editor>
+      <editor id="partha-talukdar"><first>Partha Pratim</first><last>Talukdar</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Montréal, Canada</address>
       <month>June</month>
@@ -4086,8 +4086,8 @@
     </paper>
     <paper id="2">
       <title>Collectively Representing Semi-Structured Data from the Web</title>
-      <author><first>Bhavana</first><last>Dalvi</last></author>
-      <author><first>William</first><last>Cohen</last></author>
+      <author id="bhavana-dalvi"><first>Bhavana</first><last>Dalvi</last></author>
+      <author id="william-cohen"><first>William</first><last>Cohen</last></author>
       <author><first>Jamie</first><last>Callan</last></author>
       <pages>7–12</pages>
       <url hash="b6e46a2c">W12-3002</url>
@@ -4122,7 +4122,7 @@
     <paper id="6">
       <title>Web Based Collection and Comparison of Cognitive Properties in <fixed-case>E</fixed-case>nglish and <fixed-case>C</fixed-case>hinese</title>
       <author><first>Bin</first><last>Li</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
       <author><first>Yingjie</first><last>Zhang</last></author>
       <pages>31–34</pages>
       <url hash="e2b2a4b5">W12-3006</url>
@@ -4131,14 +4131,14 @@
     <paper id="7">
       <title>Population of a Knowledge Base for News Metadata from Unstructured Text and Web Data</title>
       <author><first>Rosa</first><last>Stern</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <pages>35–40</pages>
       <url hash="0e65e9ad">W12-3007</url>
       <bibkey>stern-sagot-2012-population</bibkey>
     </paper>
     <paper id="8">
       <title>Real-time Population of Knowledge Bases: Opportunities and Challenges</title>
-      <author><first>Ndapandula</first><last>Nakashole</last></author>
+      <author id="ndapandula-nakashole"><first>Ndapandula</first><last>Nakashole</last></author>
       <author><first>Gerhard</first><last>Weikum</last></author>
       <pages>41–45</pages>
       <url hash="cc867ae8">W12-3008</url>
@@ -4161,7 +4161,7 @@
     </paper>
     <paper id="11">
       <title>Structural Linguistics and Unsupervised Information Extraction</title>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <pages>57–61</pages>
       <url hash="ae57a956">W12-3011</url>
       <bibkey>grishman-2012-structural</bibkey>
@@ -4171,10 +4171,10 @@
       <author><first>Veselin</first><last>Stoyanov</last></author>
       <author><first>James</first><last>Mayfield</last></author>
       <author><first>Tan</first><last>Xu</last></author>
-      <author><first>Douglas</first><last>Oard</last></author>
+      <author id="douglas-w-oard"><first>Douglas</first><last>Oard</last></author>
       <author><first>Dawn</first><last>Lawrie</last></author>
       <author><first>Tim</first><last>Oates</last></author>
-      <author><first>Tim</first><last>Finin</last></author>
+      <author id="tim-finin"><first>Tim</first><last>Finin</last></author>
       <pages>62–67</pages>
       <url hash="29890747">W12-3012</url>
       <bibkey>stoyanov-etal-2012-context</bibkey>
@@ -4182,7 +4182,7 @@
     <paper id="13">
       <title>Evaluating the Quality of a Knowledge Base Populated from Text</title>
       <author><first>James</first><last>Mayfield</last></author>
-      <author><first>Tim</first><last>Finin</last></author>
+      <author id="tim-finin"><first>Tim</first><last>Finin</last></author>
       <pages>68–73</pages>
       <url hash="3ed9b11a">W12-3013</url>
       <bibkey>mayfield-finin-2012-evaluating</bibkey>
@@ -4190,7 +4190,7 @@
     <paper id="14">
       <title>Constructing a Textual <fixed-case>KB</fixed-case> from a Biology <fixed-case>T</fixed-case>ext<fixed-case>B</fixed-case>ook</title>
       <author><first>Peter</first><last>Clark</last></author>
-      <author><first>Phil</first><last>Harrison</last></author>
+      <author id="phil-harrison"><first>Phil</first><last>Harrison</last></author>
       <author><first>Niranjan</first><last>Balasubramanian</last></author>
       <author><first>Oren</first><last>Etzioni</last></author>
       <pages>74–78</pages>
@@ -4199,7 +4199,7 @@
     </paper>
     <paper id="15">
       <title>Knowledge Extraction and Joint Inference Using Tractable <fixed-case>M</fixed-case>arkov <fixed-case>L</fixed-case>ogic</title>
-      <author><first>Chloé</first><last>Kiddon</last></author>
+      <author id="chloe-kiddon"><first>Chloé</first><last>Kiddon</last></author>
       <author><first>Pedro</first><last>Domingos</last></author>
       <pages>79–83</pages>
       <url hash="8002bf0b">W12-3015</url>
@@ -4226,7 +4226,7 @@
     <paper id="18">
       <title>Annotated <fixed-case>G</fixed-case>igaword</title>
       <author><first>Courtney</first><last>Napoles</last></author>
-      <author><first>Matthew</first><last>Gormley</last></author>
+      <author id="matthew-r-gormley"><first>Matthew</first><last>Gormley</last></author>
       <author><first>Benjamin</first><last>Van Durme</last></author>
       <pages>95–100</pages>
       <url hash="01e31777">W12-3018</url>
@@ -4235,7 +4235,7 @@
     <paper id="19">
       <title>Rel-grams: A Probabilistic Model of Relations in Text</title>
       <author><first>Niranjan</first><last>Balasubramanian</last></author>
-      <author><first>Stephen</first><last>Soderland</last></author>
+      <author id="stephen-soderland"><first>Stephen</first><last>Soderland</last></author>
       <author><first/><last>Mausam</last></author>
       <author><first>Oren</first><last>Etzioni</last></author>
       <pages>101–105</pages>
@@ -4244,7 +4244,7 @@
     </paper>
     <paper id="20">
       <title>Automatic Knowledge Base Construction using Probabilistic Extraction, Deductive Reasoning, and Human Feedback</title>
-      <author><first>Daisy Zhe</first><last>Wang</last></author>
+      <author id="daisy-zhe-wang"><first>Daisy Zhe</first><last>Wang</last></author>
       <author><first>Yang</first><last>Chen</last></author>
       <author><first>Sean</first><last>Goldberg</last></author>
       <author><first>Christan</first><last>Grant</last></author>
@@ -4274,7 +4274,7 @@
     <paper id="23">
       <title>Using Textual Patterns to Learn Expected Event Frequencies</title>
       <author><first>Jonathan</first><last>Gordon</last></author>
-      <author><first>Lenhart</first><last>Schubert</last></author>
+      <author id="lenhart-schubert"><first>Lenhart</first><last>Schubert</last></author>
       <pages>122–127</pages>
       <url hash="4bbea024">W12-3023</url>
       <bibkey>gordon-schubert-2012-using</bibkey>
@@ -4321,7 +4321,7 @@
     </paper>
     <paper id="3">
       <title>Semantic Textual Similarity for <fixed-case>MT</fixed-case> evaluation</title>
-      <author><first>Julio</first><last>Castillo</last></author>
+      <author id="julio-castillo"><first>Julio</first><last>Castillo</last></author>
       <author><first>Paula</first><last>Estrella</last></author>
       <pages>52–58</pages>
       <url hash="3f09e6db">W12-3103</url>
@@ -4340,15 +4340,15 @@
       <title><fixed-case>T</fixed-case>error<fixed-case>C</fixed-case>at: a Translation Error Categorization-based <fixed-case>MT</fixed-case> Quality Metric</title>
       <author><first>Mark</first><last>Fishel</last></author>
       <author><first>Rico</first><last>Sennrich</last></author>
-      <author><first>Maja</first><last>Popović</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>64–70</pages>
       <url hash="e18fa297">W12-3105</url>
       <bibkey>fishel-etal-2012-terrorcat</bibkey>
     </paper>
     <paper id="6">
       <title>Class error rates for evaluation of machine translation output</title>
-      <author><first>Maja</first><last>Popović</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
       <pages>71–75</pages>
       <url hash="36479f5a">W12-3106</url>
       <bibkey>popovic-2012-class</bibkey>
@@ -4356,7 +4356,7 @@
     <paper id="7">
       <title><fixed-case>SPEDE</fixed-case>: Probabilistic Edit Distance Metrics for <fixed-case>MT</fixed-case> Evaluation</title>
       <author><first>Mengqiu</first><last>Wang</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <pages>76–83</pages>
       <url hash="24a70f3b">W12-3107</url>
       <bibkey>wang-manning-2012-spede</bibkey>
@@ -4385,9 +4385,9 @@
     </paper>
     <paper id="11">
       <title><fixed-case>PRHLT</fixed-case> Submission to the <fixed-case>WMT</fixed-case>12 Quality Estimation Task</title>
-      <author><first>Jesús</first><last>González Rubio</last></author>
+      <author id="jesus-gonzalez-rubio"><first>Jesús</first><last>González Rubio</last></author>
       <author><first>Alberto</first><last>Sanchis</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <pages>104–108</pages>
       <url hash="b7c86e70">W12-3111</url>
       <bibkey>gonzalez-rubio-etal-2012-prhlt</bibkey>
@@ -4396,7 +4396,7 @@
       <title>Tree Kernels for Machine Translation Quality Estimation</title>
       <author><first>Christian</first><last>Hardmeier</last></author>
       <author><first>Joakim</first><last>Nivre</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>109–113</pages>
       <url hash="d9922ee7">W12-3112</url>
       <bibkey>hardmeier-etal-2012-tree</bibkey>
@@ -4405,7 +4405,7 @@
       <title><fixed-case>LORIA</fixed-case> System for the <fixed-case>WMT</fixed-case>12 Quality Estimation Shared Task</title>
       <author><first>David</first><last>Langlois</last></author>
       <author><first>Sylvain</first><last>Raybaud</last></author>
-      <author><first>Kamel</first><last>Smaïli</last></author>
+      <author id="kamel-smaili"><first>Kamel</first><last>Smaïli</last></author>
       <pages>114–119</pages>
       <url hash="13d2ef3d">W12-3113</url>
       <bibkey>langlois-etal-2012-loria</bibkey>
@@ -4421,26 +4421,26 @@
     <paper id="15">
       <title>The <fixed-case>UPC</fixed-case> Submission to the <fixed-case>WMT</fixed-case> 2012 Shared Task on Quality Estimation</title>
       <author><first>Daniele</first><last>Pighin</last></author>
-      <author><first>Meritxell</first><last>González</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="meritxell-gonzalez"><first>Meritxell</first><last>González</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <pages>127–132</pages>
       <url hash="148a81bf">W12-3115</url>
       <bibkey>pighin-etal-2012-upc</bibkey>
     </paper>
     <paper id="16">
       <title>Morpheme- and <fixed-case>POS</fixed-case>-based <fixed-case>IBM</fixed-case>1 and language model scores for translation quality estimation</title>
-      <author><first>Maja</first><last>Popović</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
       <pages>133–137</pages>
       <url hash="1c9c4eb1">W12-3116</url>
       <bibkey>popovic-2012-morpheme</bibkey>
     </paper>
     <paper id="17">
       <title><fixed-case>DCU</fixed-case>-Symantec Submission for the <fixed-case>WMT</fixed-case> 2012 Quality Estimation Task</title>
-      <author><first>Raphael</first><last>Rubino</last></author>
+      <author id="raphael-rubino"><first>Raphael</first><last>Rubino</last></author>
       <author><first>Jennifer</first><last>Foster</last></author>
       <author><first>Joachim</first><last>Wagner</last></author>
       <author><first>Johann</first><last>Roturier</last></author>
-      <author><first>Rasul</first><last>Samad Zadeh Kaljahi</last></author>
+      <author id="rasoul-samad-zadeh-kaljahi"><first>Rasul</first><last>Samad Zadeh Kaljahi</last></author>
       <author><first>Fred</first><last>Hollowood</last></author>
       <pages>138–144</pages>
       <url hash="3110370c">W12-3117</url>
@@ -4483,7 +4483,7 @@
     <paper id="22">
       <title>Match without a Referee: Evaluating <fixed-case>MT</fixed-case> Adequacy without Reference Translations</title>
       <author><first>Yashar</first><last>Mehdad</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marcello</first><last>Federico</last></author>
       <pages>171–180</pages>
       <url hash="ac0700ff">W12-3122</url>
@@ -4498,15 +4498,15 @@
     </paper>
     <paper id="24">
       <title>Review of Hypothesis Alignment Algorithms for <fixed-case>MT</fixed-case> System Combination via Confusion Network Decoding</title>
-      <author><first>Antti-Veikko</first><last>Rosti</last></author>
+      <author id="antti-veikko-rosti"><first>Antti-Veikko</first><last>Rosti</last></author>
       <author><first>Xiaodong</first><last>He</last></author>
       <author><first>Damianos</first><last>Karakos</last></author>
       <author><first>Gregor</first><last>Leusch</last></author>
       <author><first>Yuan</first><last>Cao</last></author>
       <author><first>Markus</first><last>Freitag</last></author>
       <author><first>Spyros</first><last>Matsoukas</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
-      <author><first>Jason</first><last>Smith</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
+      <author id="jason-smith"><first>Jason</first><last>Smith</last></author>
       <author><first>Bing</first><last>Zhang</last></author>
       <pages>191–199</pages>
       <url hash="f21115d0">W12-3124</url>
@@ -4515,7 +4515,7 @@
     <paper id="25">
       <title>On Hierarchical Re-ordering and Permutation Parsing for Phrase-based Decoding</title>
       <author><first>Colin</first><last>Cherry</last></author>
-      <author><first>Robert C.</first><last>Moore</last></author>
+      <author id="robert-c-moore"><first>Robert C.</first><last>Moore</last></author>
       <author><first>Chris</first><last>Quirk</last></author>
       <pages>200–209</pages>
       <url hash="7b0520be">W12-3125</url>
@@ -4523,7 +4523,7 @@
     </paper>
     <paper id="26">
       <title><fixed-case>CCG</fixed-case> Syntactic Reordering Models for Phrase-based Machine Translation</title>
-      <author><first>Dennis Nolan</first><last>Mehay</last></author>
+      <author id="dennis-mehay"><first>Dennis Nolan</first><last>Mehay</last></author>
       <author><first>Christopher Hardie</first><last>Brew</last></author>
       <pages>210–221</pages>
       <url hash="d64365d7">W12-3126</url>
@@ -4540,10 +4540,10 @@
     </paper>
     <paper id="28">
       <title>Using Syntactic Head Information in Hierarchical Phrase-Based Translation</title>
-      <author><first>Junhui</first><last>Li</last></author>
+      <author id="junhui-li"><first>Junhui</first><last>Li</last></author>
       <author><first>Zhaopeng</first><last>Tu</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>232–242</pages>
       <url hash="2a999bb7">W12-3128</url>
       <bibkey>li-etal-2012-using</bibkey>
@@ -4559,7 +4559,7 @@
     </paper>
     <paper id="30">
       <title>Probes in a Taxonomy of Factored Phrase-Based Models</title>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <author><first>Bushra</first><last>Jawaid</last></author>
       <author><first>Amir</first><last>Kamran</last></author>
       <pages>253–260</pages>
@@ -4570,7 +4570,7 @@
       <title>The <fixed-case>CMU</fixed-case>-Avenue <fixed-case>F</fixed-case>rench-<fixed-case>E</fixed-case>nglish Translation System</title>
       <author><first>Michael</first><last>Denkowski</last></author>
       <author><first>Greg</first><last>Hanneman</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <pages>261–266</pages>
       <url hash="9e4e02b6">W12-3131</url>
       <bibkey>denkowski-etal-2012-cmu</bibkey>
@@ -4578,7 +4578,7 @@
     <paper id="32">
       <title>Formemes in <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>zech Deep Syntactic <fixed-case>MT</fixed-case></title>
       <author><first>Ondřej</first><last>Dušek</last></author>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
       <author><first>Martin</first><last>Popel</last></author>
       <author><first>Martin</first><last>Majliš</last></author>
       <author><first>Michal</first><last>Novák</last></author>
@@ -4589,12 +4589,12 @@
     </paper>
     <paper id="33">
       <title>The <fixed-case>TALP</fixed-case>-<fixed-case>UPC</fixed-case> phrase-based translation systems for <fixed-case>WMT</fixed-case>12: Morphology simplification and domain adaptation</title>
-      <author><first>Lluís</first><last>Formiga</last></author>
-      <author><first>Carlos A.</first><last>Henríquez Q.</last></author>
-      <author><first>Adolfo</first><last>Hernández</last></author>
-      <author><first>José B.</first><last>Mariño</last></author>
+      <author id="lluis-formiga"><first>Lluís</first><last>Formiga</last></author>
+      <author id="carlos-henriquez"><first>Carlos A.</first><last>Henríquez Q.</last></author>
+      <author id="adolfo-hernandez-h"><first>Adolfo</first><last>Hernández</last></author>
+      <author id="jose-b-marino"><first>José B.</first><last>Mariño</last></author>
       <author><first>Enric</first><last>Monte</last></author>
-      <author><first>José A. R.</first><last>Fonollosa</last></author>
+      <author id="jose-a-r-fonollosa"><first>José A. R.</first><last>Fonollosa</last></author>
       <pages>275–282</pages>
       <url hash="ebea92c1">W12-3133</url>
       <bibkey>formiga-etal-2012-talp</bibkey>
@@ -4619,10 +4619,10 @@
     </paper>
     <paper id="36">
       <title><fixed-case>QCRI</fixed-case> at <fixed-case>WMT</fixed-case>12: Experiments in <fixed-case>S</fixed-case>panish-<fixed-case>E</fixed-case>nglish and <fixed-case>G</fixed-case>erman-<fixed-case>E</fixed-case>nglish Machine Translation of News Text</title>
-      <author><first>Francisco</first><last>Guzmán</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzmán</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Ahmed</first><last>Thabet</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>298–303</pages>
       <url hash="5bc5984d">W12-3136</url>
       <bibkey>guzman-etal-2012-qcri</bibkey>
@@ -4633,7 +4633,7 @@
       <author><first>Stephan</first><last>Peitz</last></author>
       <author><first>Markus</first><last>Freitag</last></author>
       <author><first>Malte</first><last>Nuhn</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>304–311</pages>
       <url hash="81b925d6">W12-3137</url>
       <bibkey>huck-etal-2012-rwth</bibkey>
@@ -4660,15 +4660,15 @@
       <author><first>Markus</first><last>Freitag</last></author>
       <author><first>Stephan</first><last>Peitz</last></author>
       <author><first>Matthias</first><last>Huck</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
       <author><first>Teresa</first><last>Herrmann</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
-      <author><last>Le</last><first>Hai-son</first></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
+      <author id="hai-son-le"><first>Hai-son</first><last>Le</last></author>
       <author><first>Thomas</first><last>Lavergne</last></author>
       <author><first>Alexandre</first><last>Allauzen</last></author>
-      <author><first>Bianka</first><last>Buschbeck</last></author>
-      <author><first>Josep Maria</first><last>Crego</last></author>
+      <author id="bianka-buschbeck"><first>Bianka</first><last>Buschbeck</last></author>
+      <author id="josep-m-crego"><first>Josep Maria</first><last>Crego</last></author>
       <author><first>Jean</first><last>Senellart</last></author>
       <pages>322–329</pages>
       <url hash="e52235a1">W12-3140</url>
@@ -4676,13 +4676,13 @@
     </paper>
     <paper id="41">
       <title><fixed-case>LIMSI</fixed-case> @ <fixed-case>WMT</fixed-case>12</title>
-      <author><first>Hai-Son</first><last>Le</last></author>
+      <author id="hai-son-le"><first>Hai-Son</first><last>Le</last></author>
       <author><first>Thomas</first><last>Lavergne</last></author>
       <author><first>Alexandre</first><last>Allauzen</last></author>
       <author><first>Marianna</first><last>Apidianaki</last></author>
       <author><first>Li</first><last>Gong</last></author>
       <author><first>Aurélien</first><last>Max</last></author>
-      <author><first>Artem</first><last>Sokolov</last></author>
+      <author id="artem-sokolov"><first>Artem</first><last>Sokolov</last></author>
       <author><first>Guillaume</first><last>Wisniewski</last></author>
       <author><first>François</first><last>Yvon</last></author>
       <pages>330–337</pages>
@@ -4692,8 +4692,8 @@
     <paper id="42">
       <title><fixed-case>UPM</fixed-case> system for <fixed-case>WMT</fixed-case> 2012</title>
       <author><first>Verónica</first><last>López-Ludeña</last></author>
-      <author><first>Rubén</first><last>San-Segundo</last></author>
-      <author><first>Juan M.</first><last>Montero</last></author>
+      <author id="ruben-san-segundo"><first>Rubén</first><last>San-Segundo</last></author>
+      <author id="juan-m-montero"><first>Juan M.</first><last>Montero</last></author>
       <pages>338–344</pages>
       <url hash="bee4641d">W12-3142</url>
       <bibkey>lopez-ludena-etal-2012-upm</bibkey>
@@ -4712,7 +4712,7 @@
       <author><first>Mohammed</first><last>Mediani</last></author>
       <author><first>Teresa</first><last>Herrmann</last></author>
       <author><first>Eunah</first><last>Cho</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>349–355</pages>
       <url hash="f8e49114">W12-3144</url>
       <bibkey>niehues-etal-2012-karlsruhe</bibkey>
@@ -4720,7 +4720,7 @@
     <paper id="45">
       <title>Kriya - The <fixed-case>SFU</fixed-case> System for Translation Task at <fixed-case>WMT</fixed-case>-12</title>
       <author><first>Majid</first><last>Razmara</last></author>
-      <author><first>Baskaran</first><last>Sankaran</last></author>
+      <author id="baskaran-sankaran"><first>Baskaran</first><last>Sankaran</last></author>
       <author><first>Ann</first><last>Clifton</last></author>
       <author><first>Anoop</first><last>Sarkar</last></author>
       <pages>356–361</pages>
@@ -4738,7 +4738,7 @@
     </paper>
     <paper id="47">
       <title><fixed-case>LIUM</fixed-case>’s <fixed-case>SMT</fixed-case> Machine Translation Systems for <fixed-case>WMT</fixed-case> 2012</title>
-      <author><first>Christophe</first><last>Servan</last></author>
+      <author id="christophe-servan"><first>Christophe</first><last>Servan</last></author>
       <author><first>Patrik</first><last>Lambert</last></author>
       <author><first>Anthony</first><last>Rousseau</last></author>
       <author><first>Holger</first><last>Schwenk</last></author>
@@ -4753,7 +4753,7 @@
       <author><first>Petra</first><last>Galuščáková</last></author>
       <author><first>Amir</first><last>Kamran</last></author>
       <author><first>Miloš</first><last>Stanojević</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>374–381</pages>
       <url hash="d1be1d53">W12-3148</url>
       <bibkey>tamchyna-etal-2012-selecting</bibkey>
@@ -4775,7 +4775,7 @@
     </paper>
     <paper id="51">
       <title>Data Issues of the Multilingual Translation Matrix</title>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <pages>395–400</pages>
       <url hash="3e7f1841">W12-3151</url>
       <bibkey>zeman-2012-data</bibkey>
@@ -4827,7 +4827,7 @@
     <paper id="57">
       <title>Phrase Model Training for Statistical Machine Translation with Word Lattices of Preprocessing Alternatives</title>
       <author><first>Joern</first><last>Wuebker</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>450–459</pages>
       <url hash="74b3e990">W12-3157</url>
       <bibkey>wuebker-ney-2012-phrase</bibkey>
@@ -4835,7 +4835,7 @@
     <paper id="58">
       <title>Leave-One-Out Phrase Model Training for Large-Scale Deployment</title>
       <author><first>Joern</first><last>Wuebker</last></author>
-      <author><first>Mei-Yuh</first><last>Hwang</last></author>
+      <author id="mei-yuh-hwang"><first>Mei-Yuh</first><last>Hwang</last></author>
       <author><first>Chris</first><last>Quirk</last></author>
       <pages>460–467</pages>
       <url hash="f8497718">W12-3158</url>
@@ -4861,7 +4861,7 @@
     <meta>
       <booktitle>Proceedings of the <fixed-case>ACL</fixed-case>-2012 Special Workshop on Rediscovering 50 Years of Discoveries</booktitle>
       <url hash="0ea290e6">W12-32</url>
-      <editor><first>Rafael E.</first><last>Banchs</last></editor>
+      <editor id="rafael-e-banchs"><first>Rafael E.</first><last>Banchs</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Jeju Island, Korea</address>
       <month>July</month>
@@ -4874,7 +4874,7 @@
     </frontmatter>
     <paper id="1">
       <title>Rediscovering <fixed-case>ACL</fixed-case> Discoveries Through the Lens of <fixed-case>ACL</fixed-case> <fixed-case>A</fixed-case>nthology Network Citing Sentences</title>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <author><first>Amjad</first><last>Abu-Jbara</last></author>
       <pages>1–12</pages>
       <url hash="ca013c74">W12-3201</url>
@@ -4883,8 +4883,8 @@
     <paper id="2">
       <title>Towards a Computational History of the <fixed-case>ACL</fixed-case>: 1980-2008</title>
       <author><first>Ashton</first><last>Anderson</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
-      <author><first>Daniel A.</first><last>McFarland</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-mcfarland"><first>Daniel A.</first><last>McFarland</last></author>
       <pages>13–21</pages>
       <url hash="0fe03143">W12-3202</url>
       <bibkey>anderson-etal-2012-towards</bibkey>
@@ -4892,8 +4892,8 @@
     <paper id="3">
       <title>Discovering Factions in the Computational Linguistics Community</title>
       <author><first>Yanchuan</first><last>Sim</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
-      <author><first>David A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
+      <author id="david-a-smith"><first>David A.</first><last>Smith</last></author>
       <pages>22–32</pages>
       <url hash="8d3a1182">W12-3203</url>
       <bibkey>sim-etal-2012-discovering</bibkey>
@@ -4901,15 +4901,15 @@
     <paper id="4">
       <title>He Said, She Said: Gender in the <fixed-case>ACL</fixed-case> <fixed-case>A</fixed-case>nthology</title>
       <author><first>Adam</first><last>Vogel</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <pages>33–41</pages>
       <url hash="b5e64384">W12-3204</url>
       <bibkey>vogel-jurafsky-2012-said</bibkey>
     </paper>
     <paper id="5">
       <title>Discourse Structure and Computation: Past, Present and Future</title>
-      <author><first>Bonnie</first><last>Webber</last></author>
-      <author><first>Aravind</first><last>Joshi</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
       <pages>42–54</pages>
       <url hash="5b447d9b">W12-3205</url>
       <bibkey>webber-joshi-2012-discourse</bibkey>
@@ -4917,7 +4917,7 @@
     <paper id="6">
       <title>Extracting glossary sentences from scholarly articles: A comparative evaluation of pattern bootstrapping and deep analysis</title>
       <author><first>Melanie</first><last>Reiplinger</last></author>
-      <author><first>Ulrich</first><last>Schäfer</last></author>
+      <author id="ulrich-schafer"><first>Ulrich</first><last>Schäfer</last></author>
       <author><first>Magdalena</first><last>Wolska</last></author>
       <pages>55–65</pages>
       <url hash="cf267950">W12-3206</url>
@@ -4925,7 +4925,7 @@
     </paper>
     <paper id="7">
       <title>Applying Collocation Segmentation to the <fixed-case>ACL</fixed-case> <fixed-case>A</fixed-case>nthology Reference Corpus</title>
-      <author><first>Vidas</first><last>Daudaravičius</last></author>
+      <author id="vidas-daudaravicius"><first>Vidas</first><last>Daudaravičius</last></author>
       <pages>66–75</pages>
       <url hash="1a908e76">W12-3207</url>
       <bibkey>daudaravicius-2012-applying</bibkey>
@@ -4948,7 +4948,7 @@
     </paper>
     <paper id="10">
       <title>Towards an <fixed-case>ACL</fixed-case> <fixed-case>A</fixed-case>nthology Corpus with Logical Document Structure. An Overview of the <fixed-case>ACL</fixed-case> 2012 Contributed Task</title>
-      <author><first>Ulrich</first><last>Schäfer</last></author>
+      <author id="ulrich-schafer"><first>Ulrich</first><last>Schäfer</last></author>
       <author><first>Jonathon</first><last>Read</last></author>
       <author><first>Stephan</first><last>Oepen</last></author>
       <pages>88–97</pages>
@@ -4966,7 +4966,7 @@
     </paper>
     <paper id="12">
       <title>Combining <fixed-case>OCR</fixed-case> Outputs for Logical Document Structure Markup. Technical Background to the <fixed-case>ACL</fixed-case> 2012 Contributed Task</title>
-      <author><first>Ulrich</first><last>Schäfer</last></author>
+      <author id="ulrich-schafer"><first>Ulrich</first><last>Schäfer</last></author>
       <author><first>Benjamin</first><last>Weitz</last></author>
       <pages>104–109</pages>
       <url hash="a416ae60">W12-3212</url>
@@ -4985,9 +4985,9 @@
     <meta>
       <booktitle>Proceedings of <fixed-case>ACL</fixed-case> 2012 Student Research Workshop</booktitle>
       <url hash="c8800a2e">W12-33</url>
-      <editor><first>Jackie C. K.</first><last>Cheung</last></editor>
+      <editor id="jackie-chi-kit-cheung"><first>Jackie C. K.</first><last>Cheung</last></editor>
       <editor><first>Jun</first><last>Hatori</last></editor>
-      <editor><first>Carlos</first><last>Henriquez</last></editor>
+      <editor id="carlos-henriquez"><first>Carlos</first><last>Henriquez</last></editor>
       <editor><first>Ann</first><last>Irvine</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Jeju Island, Korea</address>
@@ -5002,7 +5002,7 @@
     <paper id="1">
       <title>A Broad Evaluation of Techniques for Automatic Acquisition of Multiword Expressions</title>
       <author><first>Carlos</first><last>Ramisch</last></author>
-      <author><first>Vitor</first><last>De Araujo</last></author>
+      <author id="vitor-de-araujo"><first>Vitor</first><last>De Araujo</last></author>
       <author><first>Aline</first><last>Villavicencio</last></author>
       <pages>1–6</pages>
       <url hash="42e427b6">W12-3301</url>
@@ -5019,8 +5019,8 @@
       <title>Active Learning with Transfer Learning</title>
       <author><first>Chunyong</first><last>Luo</last></author>
       <author><first>Yangsheng</first><last>Ji</last></author>
-      <author><first>Xinyu</first><last>Dai</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
+      <author id="xinyu-dai"><first>Xinyu</first><last>Dai</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
       <pages>13–18</pages>
       <url hash="d8adb4b0">W12-3303</url>
       <bibkey>luo-etal-2012-active</bibkey>
@@ -5028,7 +5028,7 @@
     <paper id="4">
       <title>Query classification using topic models and support vector machine</title>
       <author><first>Dieu-Thu</first><last>Le</last></author>
-      <author><first>Raffaella</first><last>Bernardi</last></author>
+      <author id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last></author>
       <pages>19–24</pages>
       <url hash="1107d68d">W12-3304</url>
       <bibkey>le-bernardi-2012-query</bibkey>
@@ -5050,7 +5050,7 @@
     <paper id="7">
       <title><fixed-case>T</fixed-case>opic<fixed-case>T</fixed-case>iling: A Text Segmentation Algorithm based on <fixed-case>LDA</fixed-case></title>
       <author><first>Martin</first><last>Riedl</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>37–42</pages>
       <url hash="613592de">W12-3307</url>
       <bibkey>riedl-biemann-2012-topictiling</bibkey>
@@ -5073,7 +5073,7 @@
     </paper>
     <paper id="10">
       <title>Discourse Structure in Simultaneous Spoken <fixed-case>T</fixed-case>urkish</title>
-      <author><first>Isin</first><last>Demirşahin</last></author>
+      <author id="isin-demirsahin"><first>Isin</first><last>Demirşahin</last></author>
       <pages>55–60</pages>
       <url hash="1f99a105">W12-3310</url>
       <bibkey>demirsahin-2012-discourse</bibkey>
@@ -5105,7 +5105,7 @@
       <editor><first>Ido</first><last>Dagan</last></editor>
       <editor><first>Jennifer</first><last>Foster</last></editor>
       <editor><first>Yuval</first><last>Marton</last></editor>
-      <editor><first>Djamé</first><last>Seddah</last></editor>
+      <editor id="djame-seddah"><first>Djamé</first><last>Seddah</last></editor>
       <editor><first>Reut</first><last>Tsarfaty</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Jeju, Republic of Korea</address>
@@ -5119,8 +5119,8 @@
     </frontmatter>
     <paper id="1">
       <title>Probabilistic Lexical Generalization for <fixed-case>F</fixed-case>rench Dependency Parsing</title>
-      <author><first>Enrique</first><last>Henestroza Anguiano</last></author>
-      <author><first>Marie</first><last>Candito</last></author>
+      <author id="enrique-henestroza-anguiano"><first>Enrique</first><last>Henestroza Anguiano</last></author>
+      <author id="marie-candito"><first>Marie</first><last>Candito</last></author>
       <pages>1–11</pages>
       <url hash="ec235bb0">W12-3401</url>
       <bibkey>henestroza-anguiano-candito-2012-probabilistic</bibkey>
@@ -5136,7 +5136,7 @@
       <title>Building an <fixed-case>A</fixed-case>rabic Multiword Expressions Repository</title>
       <author><first>Abdelati</first><last>Hawwari</last></author>
       <author><first>Kfir</first><last>Bar</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>24–29</pages>
       <url hash="2b68adc8">W12-3403</url>
       <bibkey>hawwari-etal-2012-building</bibkey>
@@ -5169,9 +5169,9 @@
     <paper id="7">
       <title>Combining Rule-Based and Statistical Syntactic Analyzers</title>
       <author><first>Iakes</first><last>Goenaga</last></author>
-      <author><first>Koldobika</first><last>Gojenola</last></author>
+      <author id="koldo-gojenola"><first>Koldobika</first><last>Gojenola</last></author>
       <author><first>María Jesús</first><last>Aranzabe</last></author>
-      <author><first>Arantza</first><last>Díaz de Ilarraza</last></author>
+      <author id="arantza-diaz-de-ilarraza"><first>Arantza</first><last>Díaz de Ilarraza</last></author>
       <author><first>Kepa</first><last>Bengoetxea</last></author>
       <pages>48–54</pages>
       <url hash="affb09da">W12-3407</url>
@@ -5179,8 +5179,8 @@
     </paper>
     <paper id="8">
       <title>Statistical Parsing of <fixed-case>S</fixed-case>panish and Data Driven Lemmatization</title>
-      <author><first>Joseph</first><last>Le Roux</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="joseph-le-roux"><first>Joseph</first><last>Le Roux</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <author><first>Djamé</first><last>Seddah</last></author>
       <pages>55–61</pages>
       <url hash="76d79802">W12-3408</url>
@@ -5188,8 +5188,8 @@
     </paper>
     <paper id="9">
       <title>Assigning Deep Lexical Types Using Structured Classifier Features for Grammatical Dependencies</title>
-      <author><first>João</first><last>Silva</last></author>
-      <author><first>António</first><last>Branco</last></author>
+      <author id="joao-silva"><first>João</first><last>Silva</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
       <pages>62–71</pages>
       <url hash="115d0fd2">W12-3409</url>
       <bibkey>silva-branco-2012-assigning</bibkey>
@@ -5198,7 +5198,7 @@
       <title>Using an <fixed-case>SVM</fixed-case> Ensemble System for Improved <fixed-case>T</fixed-case>amil Dependency Parsing</title>
       <author><first>Nathan</first><last>Green</last></author>
       <author><first>Loganathan</first><last>Ramasamy</last></author>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
       <pages>72–77</pages>
       <url hash="f92de023">W12-3410</url>
       <bibkey>green-etal-2012-using</bibkey>
@@ -5207,15 +5207,15 @@
       <title><fixed-case>K</fixed-case>orean Treebank Transformation for Parser Training</title>
       <author><first>DongHyun</first><last>Choi</last></author>
       <author><first>Jungyeul</first><last>Park</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <pages>78–88</pages>
       <url hash="c8553e14">W12-3411</url>
       <bibkey>choi-etal-2012-korean</bibkey>
     </paper>
     <paper id="12">
       <title>Generative Constituent Parsing and Discriminative Dependency Reranking: Experiments on <fixed-case>E</fixed-case>nglish and <fixed-case>F</fixed-case>rench</title>
-      <author><first>Joseph</first><last>Le Roux</last></author>
-      <author><first>Benoît</first><last>Favre</last></author>
+      <author id="joseph-le-roux"><first>Joseph</first><last>Le Roux</last></author>
+      <author id="benoit-favre"><first>Benoît</first><last>Favre</last></author>
       <author><first>Alexis</first><last>Nasr</last></author>
       <author><first>Seyed Abolghasem</first><last>Mirroshandel</last></author>
       <pages>89–99</pages>
@@ -5256,7 +5256,7 @@
       <title>Integration of Multimodal Interaction as Assistance in Virtual Environments</title>
       <author><first>Kiran</first><last>Pala</last></author>
       <author><first>Ram</first><last>Naresh</last></author>
-      <author><first>Sachin</first><last>Joshi</last></author>
+      <author id="sachindra-joshi"><first>Sachin</first><last>Joshi</last></author>
       <author><first>Suryakanth V</first><last>Ganagshetty</last></author>
       <pages>8–12</pages>
       <url hash="531a80a9">W12-3502</url>
@@ -5291,11 +5291,11 @@
     <paper id="6">
       <title>Towards a Self-Learning Assistive Vocal Interface: Vocabulary and Grammar Learning</title>
       <author><first>Janneke</first><last>van de Loo</last></author>
-      <author><first>Jort F.</first><last>Gemmeke</last></author>
+      <author id="jort-florent-gemmeke"><first>Jort F.</first><last>Gemmeke</last></author>
       <author><first>Guy</first><last>De Pauw</last></author>
       <author><first>Joris</first><last>Driesen</last></author>
-      <author><first>Hugo</first><last>Van hamme</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="hugo-van-hamme"><first>Hugo</first><last>Van hamme</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>34–42</pages>
       <url hash="c5872c83">W12-3506</url>
       <bibkey>van-de-loo-etal-2012-towards</bibkey>
@@ -5313,7 +5313,7 @@
     <meta>
       <booktitle>Proceedings of the Sixth Linguistic Annotation Workshop</booktitle>
       <url hash="14e889f8">W12-36</url>
-      <editor><first>Nancy</first><last>Ide</last></editor>
+      <editor id="nancy-ide"><first>Nancy</first><last>Ide</last></editor>
       <editor><first>Fei</first><last>Xia</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Jeju, Republic of Korea</address>
@@ -5327,7 +5327,7 @@
     </frontmatter>
     <paper id="1">
       <title>The Role of Linguistic Models and Language Annotation in Feature Selection for Machine Learning</title>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <pages>1</pages>
       <url hash="83757985">W12-3601</url>
       <bibkey>pustejovsky-2012-role</bibkey>
@@ -5336,8 +5336,8 @@
       <title>Who Did What to Whom? A Contrastive Study of Syntacto-Semantic Dependencies</title>
       <author><first>Angelina</first><last>Ivanova</last></author>
       <author><first>Stephan</first><last>Oepen</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
-      <author><first>Dan</first><last>Flickinger</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="dan-flickinger"><first>Dan</first><last>Flickinger</last></author>
       <pages>2–11</pages>
       <url hash="f8955191">W12-3602</url>
       <bibkey>ivanova-etal-2012-contrastive</bibkey>
@@ -5360,8 +5360,8 @@
     </paper>
     <paper id="5">
       <title>Pair Annotation: Adaption of Pair Programming to Corpus Annotation</title>
-      <author><first>Isin</first><last>Demirşahin</last></author>
-      <author><first>İhsan</first><last>Yalcinkaya</last></author>
+      <author id="isin-demirsahin"><first>Isin</first><last>Demirşahin</last></author>
+      <author id="ihsan-yalcinkaya"><first>İhsan</first><last>Yalcinkaya</last></author>
       <author><first>Deniz</first><last>Zeyrek</last></author>
       <pages>31–39</pages>
       <url hash="b576c8c4">W12-3605</url>
@@ -5369,12 +5369,12 @@
     </paper>
     <paper id="6">
       <title>Structured Named Entities in two distinct press corpora: Contemporary Broadcast News and Old Newspapers</title>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <author><first>Cyril</first><last>Grouin</last></author>
       <author><first>Karën</first><last>Fort</last></author>
       <author><first>Olivier</first><last>Galibert</last></author>
       <author><first>Juliette</first><last>Kahn</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <pages>40–48</pages>
       <url hash="8f798998">W12-3606</url>
       <bibkey>rosset-etal-2012-structured</bibkey>
@@ -5384,7 +5384,7 @@
       <author><first>Prudhvi</first><last>Kosaraju</last></author>
       <author><first>Bharat Ram</first><last>Ambati</last></author>
       <author><first>Samar</first><last>Husain</last></author>
-      <author><first>Dipti Misra</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></author>
       <author><first>Rajeev</first><last>Sangal</last></author>
       <pages>49–56</pages>
       <url hash="0f18fa1d">W12-3607</url>
@@ -5400,15 +5400,15 @@
     </paper>
     <paper id="9">
       <title>A <fixed-case>G</fixed-case>r<fixed-case>AF</fixed-case>-compliant <fixed-case>I</fixed-case>ndonesian Speech Recognition Web Service on the Language Grid for Transcription Crowdsourcing</title>
-      <author><first>Bayu</first><last>Distiawan</last></author>
-      <author><first>Ruli</first><last>Manurung</last></author>
+      <author id="bayu-distiawan"><first>Bayu</first><last>Distiawan</last></author>
+      <author id="ruli-manurung"><first>Ruli</first><last>Manurung</last></author>
       <pages>67–74</pages>
       <url hash="5c3e108b">W12-3609</url>
       <bibkey>distiawan-manurung-2012-graf</bibkey>
     </paper>
     <paper id="10">
       <title>Towards Adaptation of Linguistic Annotations to Scholarly Annotation Formalisms on the Semantic Web</title>
-      <author><first>Karin</first><last>Verspoor</last></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last></author>
       <author><first>Kevin</first><last>Livingston</last></author>
       <pages>75–84</pages>
       <url hash="dd89c553">W12-3610</url>
@@ -5418,8 +5418,8 @@
       <title>Intonosyntactic Data Structures: The Rhapsodie Treebank of Spoken <fixed-case>F</fixed-case>rench</title>
       <author><first>Kim</first><last>Gerdes</last></author>
       <author><first>Sylvain</first><last>Kahane</last></author>
-      <author><first>Anne</first><last>Lacheret</last></author>
-      <author><first>Paola</first><last>Pietandrea</last></author>
+      <author id="anne-lacheret"><first>Anne</first><last>Lacheret</last></author>
+      <author id="paola-pietrandrea"><first>Paola</first><last>Pietandrea</last></author>
       <author><first>Arthur</first><last>Truong</last></author>
       <pages>85–94</pages>
       <url hash="7c90b7c0">W12-3611</url>
@@ -5428,12 +5428,12 @@
     <paper id="12">
       <title>Annotation Schemes to Encode Domain Knowledge in Medical Narratives</title>
       <author><first>Wilson</first><last>McCoy</last></author>
-      <author><first>Cecilia Ovesdotter</first><last>Alm</last></author>
+      <author id="cecilia-ovesdotter-alm"><first>Cecilia Ovesdotter</first><last>Alm</last></author>
       <author><first>Cara</first><last>Calvelli</last></author>
       <author><first>Rui</first><last>Li</last></author>
       <author><first>Jeff B.</first><last>Pelz</last></author>
       <author><first>Pengcheng</first><last>Shi</last></author>
-      <author><first>Anne</first><last>Haake</last></author>
+      <author id="anne-haake"><first>Anne</first><last>Haake</last></author>
       <pages>95–103</pages>
       <url hash="4269a827">W12-3612</url>
       <bibkey>mccoy-etal-2012-annotation</bibkey>
@@ -5449,7 +5449,7 @@
       <title>Search Result Diversification Methods to Assist Lexicographers</title>
       <author><first>Lars</first><last>Borin</last></author>
       <author><first>Markus</first><last>Forsberg</last></author>
-      <author><first>Karin</first><last>Friberg Heppin</last></author>
+      <author id="karin-friberg-heppin"><first>Karin</first><last>Friberg Heppin</last></author>
       <author><first>Richard</first><last>Johansson</last></author>
       <author><first>Annika</first><last>Kjellandsson</last></author>
       <pages>113–117</pages>
@@ -5486,24 +5486,24 @@
       <author><first>Francesca</first><last>Bonin</last></author>
       <author><first>Fabio</first><last>Cavulli</last></author>
       <author><first>Aronne</first><last>Noriller</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
-      <author><first>Egon W.</first><last>Stemle</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
+      <author id="egon-stemle"><first>Egon W.</first><last>Stemle</last></author>
       <pages>134–138</pages>
       <url hash="d80a6307">W12-3618</url>
       <bibkey>bonin-etal-2012-annotating</bibkey>
     </paper>
     <paper id="19">
       <title>Annotating Preferences in Chats for Strategic Games</title>
-      <author><first>Anaïs</first><last>Cadilhac</last></author>
-      <author><first>Nicholas</first><last>Asher</last></author>
-      <author><first>Farah</first><last>Benamara</last></author>
+      <author id="anais-cadilhac"><first>Anaïs</first><last>Cadilhac</last></author>
+      <author id="nicholas-asher"><first>Nicholas</first><last>Asher</last></author>
+      <author id="farah-benamara"><first>Farah</first><last>Benamara</last></author>
       <pages>139–143</pages>
       <url hash="bb01f172">W12-3619</url>
       <bibkey>cadilhac-etal-2012-annotating-preferences</bibkey>
     </paper>
     <paper id="20">
       <title>Morpheme Segmentation in the <fixed-case>METU</fixed-case>-Sabancı <fixed-case>T</fixed-case>urkish Treebank</title>
-      <author><first>Ruket</first><last>Cakici</last></author>
+      <author id="ruket-cakici"><first>Ruket</first><last>Cakici</last></author>
       <pages>144–148</pages>
       <url hash="429f1a3f">W12-3620</url>
       <bibkey>cakici-2012-morpheme</bibkey>
@@ -5512,7 +5512,7 @@
       <title><fixed-case>A</fixed-case>lvis<fixed-case>AE</fixed-case>: a collaborative Web text annotation editor for knowledge acquisition</title>
       <author><first>Frédéric</first><last>Papazian</last></author>
       <author><first>Robert</first><last>Bossy</last></author>
-      <author><first>Claire</first><last>Nédellec</last></author>
+      <author id="claire-nedellec"><first>Claire</first><last>Nédellec</last></author>
       <pages>149–152</pages>
       <url hash="25d664d3">W12-3621</url>
       <bibkey>papazian-etal-2012-alvisae</bibkey>
@@ -5527,8 +5527,8 @@
     </paper>
     <paper id="23">
       <title>Dependency Treebank of <fixed-case>U</fixed-case>rdu and its Evaluation</title>
-      <author><first>Riyaz Ahmad</first><last>Bhat</last></author>
-      <author><first>Dipti Misra</first><last>Sharma</last></author>
+      <author id="riyaz-ahmad-bhat"><first>Riyaz Ahmad</first><last>Bhat</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></author>
       <pages>157–165</pages>
       <url hash="59f789f3">W12-3623</url>
       <bibkey>bhat-sharma-2012-dependency</bibkey>
@@ -5536,8 +5536,8 @@
     <paper id="24">
       <title>Annotating Coordination in the <fixed-case>P</fixed-case>enn <fixed-case>T</fixed-case>reebank</title>
       <author><first>Wolfgang</first><last>Maier</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
-      <author><first>Erhard</first><last>Hinrichs</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
+      <author id="erhard-hinrichs"><first>Erhard</first><last>Hinrichs</last></author>
       <author><first>Julia</first><last>Krivanek</last></author>
       <pages>166–174</pages>
       <url hash="24f138a7">W12-3624</url>
@@ -5568,9 +5568,9 @@
     <meta>
       <booktitle>Proceedings of the 3rd Workshop in Computational Approaches to Subjectivity and Sentiment Analysis</booktitle>
       <url hash="2a25580c">W12-37</url>
-      <editor><first>Alexandra</first><last>Balahur</last></editor>
-      <editor><first>Andres</first><last>Montoyo</last></editor>
-      <editor><first>Patricio Martinez</first><last>Barco</last></editor>
+      <editor id="alexandra-balahur"><first>Alexandra</first><last>Balahur</last></editor>
+      <editor id="andres-montoyo"><first>Andres</first><last>Montoyo</last></editor>
+      <editor id="patricio-martinez-barco"><first>Patricio Martinez</first><last>Barco</last></editor>
       <editor><first>Ester</first><last>Boldrini</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Jeju, Korea</address>
@@ -5584,14 +5584,14 @@
     </frontmatter>
     <paper id="1">
       <title>Multimodal Sentiment Analysis</title>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>1</pages>
       <url hash="1891660c">W12-3701</url>
       <bibkey>mihalcea-2012-multimodal</bibkey>
     </paper>
     <paper id="2">
       <title>Subjectivity Word Sense Disambiguation</title>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <pages>2</pages>
       <url hash="04564a95">W12-3702</url>
       <bibkey>wiebe-2012-subjectivity</bibkey>
@@ -5599,9 +5599,9 @@
     <paper id="3">
       <title>Random Walk Weighting over <fixed-case>S</fixed-case>enti<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et for Sentiment Polarity Detection on <fixed-case>T</fixed-case>witter</title>
       <author><first>Arturo</first><last>Montejo-Ráez</last></author>
-      <author><first>Eugenio</first><last>Martínez-Cámara</last></author>
-      <author><first>M. Teresa</first><last>Martín-Valdivia</last></author>
-      <author><first>L. Alfonso</first><last>Ureña-López</last></author>
+      <author id="eugenio-martinez-camara"><first>Eugenio</first><last>Martínez-Cámara</last></author>
+      <author id="m-teresa-martin-valdivia"><first>M. Teresa</first><last>Martín-Valdivia</last></author>
+      <author id="l-alfonso-urena-lopez"><first>L. Alfonso</first><last>Ureña-López</last></author>
       <pages>3–10</pages>
       <url hash="682d5445">W12-3703</url>
       <bibkey>montejo-raez-etal-2012-random</bibkey>
@@ -5621,8 +5621,8 @@
     <paper id="5">
       <title><fixed-case>SAMAR</fixed-case>: A System for Subjectivity and Sentiment Analysis of <fixed-case>A</fixed-case>rabic Social Media</title>
       <author><first>Muhammad</first><last>Abdul-Mageed</last></author>
-      <author><first>Sandra</first><last>Kuebler</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kuebler</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>19–28</pages>
       <url hash="63b7e09c">W12-3705</url>
       <bibkey>abdul-mageed-etal-2012-samar</bibkey>
@@ -5630,8 +5630,8 @@
     <paper id="6">
       <title><fixed-case>O</fixed-case>pinum: statistical sentiment analysis for opinion classification</title>
       <author><first>Boyan</first><last>Bonev</last></author>
-      <author><first>Gema</first><last>Ramírez-Sánchez</last></author>
-      <author><first>Sergio</first><last>Ortiz Rojas</last></author>
+      <author id="gema-ramirez-sanchez"><first>Gema</first><last>Ramírez-Sánchez</last></author>
+      <author id="sergio-ortiz-rojas"><first>Sergio</first><last>Ortiz Rojas</last></author>
       <pages>29–37</pages>
       <url hash="103d9a87">W12-3706</url>
       <bibkey>bonev-etal-2012-opinum</bibkey>
@@ -5639,7 +5639,7 @@
     <paper id="7">
       <title><fixed-case>S</fixed-case>entimantics: Conceptual Spaces for Lexical Sentiment Polarity Representation with Contextuality</title>
       <author><first>Amitava</first><last>Das</last></author>
-      <author><last>Gambäck</last><first>Björn</first></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gambäck</last></author>
       <pages>38–46</pages>
       <url hash="9df783b3">W12-3707</url>
       <bibkey>das-gamback-2012-sentimantics</bibkey>
@@ -5665,7 +5665,7 @@
       <author><first>Jie</first><last>Yin</last></author>
       <author><first>Nalin</first><last>Narang</last></author>
       <author><first>Paul</first><last>Thomas</last></author>
-      <author><first>Cecile</first><last>Paris</last></author>
+      <author id="cecile-paris"><first>Cecile</first><last>Paris</last></author>
       <pages>61–69</pages>
       <url hash="2ac4ff85">W12-3710</url>
       <bibkey>yin-etal-2012-unifying</bibkey>
@@ -5673,8 +5673,8 @@
     <paper id="11">
       <title>Prior versus Contextual Emotion of a Word in a Sentence</title>
       <author><first>Diman</first><last>Ghazi</last></author>
-      <author><first>Diana</first><last>Inkpen</last></author>
-      <author><first>Stan</first><last>Szpakowicz</last></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last></author>
+      <author id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></author>
       <pages>70–78</pages>
       <url hash="db5b2c46">W12-3711</url>
       <bibkey>ghazi-etal-2012-prior</bibkey>
@@ -5682,7 +5682,7 @@
     <paper id="12">
       <title>Cross-discourse Development of Supervised Sentiment Analysis in the Clinical Domain</title>
       <author><first>Phillip</first><last>Smith</last></author>
-      <author><first>Mark</first><last>Lee</last></author>
+      <author id="mark-lee"><first>Mark</first><last>Lee</last></author>
       <pages>79–83</pages>
       <url hash="ff3936ed">W12-3712</url>
       <bibkey>smith-lee-2012-cross</bibkey>
@@ -5690,7 +5690,7 @@
     <paper id="13">
       <title><fixed-case>POLITICAL</fixed-case>-<fixed-case>ADS</fixed-case>: An annotated corpus for modeling event-level evaluativity</title>
       <author><first>Kevin</first><last>Reschke</last></author>
-      <author><first>Pranav</first><last>Anand</last></author>
+      <author id="pranav-anand"><first>Pranav</first><last>Anand</last></author>
       <pages>84–88</pages>
       <url hash="154bc8c9">W12-3713</url>
       <bibkey>reschke-anand-2012-political</bibkey>
@@ -5699,7 +5699,7 @@
       <title>Automatically Annotating A Five-Billion-Word Corpus of <fixed-case>J</fixed-case>apanese Blogs for Affect and Sentiment Analysis</title>
       <author><first>Michal</first><last>Ptaszynski</last></author>
       <author><first>Rafal</first><last>Rzepka</last></author>
-      <author><first>Kenji</first><last>Araki</last></author>
+      <author id="kenji-araki"><first>Kenji</first><last>Araki</last></author>
       <author><first>Yoshio</first><last>Momouchi</last></author>
       <pages>89–98</pages>
       <url hash="b21983d4">W12-3714</url>
@@ -5751,22 +5751,22 @@
       <title>Disfluencies as Extra-Propositional Indicators of Cognitive Processing</title>
       <author><first>Kathryn</first><last>Womack</last></author>
       <author><first>Wilson</first><last>McCoy</last></author>
-      <author><first>Cecilia</first><last>Ovesdotter Alm</last></author>
+      <author id="cecilia-ovesdotter-alm"><first>Cecilia</first><last>Ovesdotter Alm</last></author>
       <author><first>Cara</first><last>Calvelli</last></author>
       <author><first>Jeff B.</first><last>Pelz</last></author>
       <author><first>Pengcheng</first><last>Shi</last></author>
-      <author><first>Anne</first><last>Haake</last></author>
+      <author id="anne-haake"><first>Anne</first><last>Haake</last></author>
       <pages>1–9</pages>
       <url hash="01ae12dd">W12-3801</url>
       <bibkey>womack-etal-2012-disfluencies</bibkey>
     </paper>
     <paper id="2">
       <title>How do Negation and Modality Impact on Opinions?</title>
-      <author><first>Farah</first><last>Benamara</last></author>
+      <author id="farah-benamara"><first>Farah</first><last>Benamara</last></author>
       <author><first>Baptiste</first><last>Chardon</last></author>
-      <author><first>Yannick</first><last>Mathieu</last></author>
+      <author id="yannick-mathieu"><first>Yannick</first><last>Mathieu</last></author>
       <author><first>Vladimir</first><last>Popescu</last></author>
-      <author><first>Nicholas</first><last>Asher</last></author>
+      <author id="nicholas-asher"><first>Nicholas</first><last>Asher</last></author>
       <pages>10–18</pages>
       <url hash="69a3d982">W12-3802</url>
       <bibkey>benamara-etal-2012-negation</bibkey>
@@ -5774,11 +5774,11 @@
     <paper id="3">
       <title>Linking Uncertainty in Physicians’ Narratives to Diagnostic Correctness</title>
       <author><first>Wilson</first><last>McCoy</last></author>
-      <author><first>Cecilia</first><last>Ovesdotter Alm</last></author>
+      <author id="cecilia-ovesdotter-alm"><first>Cecilia</first><last>Ovesdotter Alm</last></author>
       <author><first>Cara</first><last>Calvelli</last></author>
       <author><first>Jeff B.</first><last>Pelz</last></author>
       <author><first>Pengcheng</first><last>Shi</last></author>
-      <author><first>Anne</first><last>Haake</last></author>
+      <author id="anne-haake"><first>Anne</first><last>Haake</last></author>
       <pages>19–27</pages>
       <url hash="9c54e24d">W12-3803</url>
       <bibkey>mccoy-etal-2012-linking</bibkey>
@@ -5806,7 +5806,7 @@
       <author><first>Sampo</first><last>Pyysalo</last></author>
       <author><first>Tomoko</first><last>Ohta</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>47–56</pages>
       <url hash="0cf5d25d">W12-3806</url>
       <bibkey>stenetorp-etal-2012-bridging</bibkey>
@@ -5815,11 +5815,11 @@
       <title>Statistical Modality Tagging from Rule-based Annotations and Crowdsourcing</title>
       <author><first>Vinodkumar</first><last>Prabhakaran</last></author>
       <author><first>Michael</first><last>Bloodgood</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
-      <author><first>Bonnie</first><last>Dorr</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
-      <author><first>Christine D.</first><last>Piatko</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
+      <author id="bonnie-dorr"><first>Bonnie</first><last>Dorr</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
+      <author id="christine-piatko"><first>Christine D.</first><last>Piatko</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <author><first>Benjamin</first><last>Van Durme</last></author>
       <pages>57–64</pages>
       <url hash="ba84015c">W12-3807</url>
@@ -5827,7 +5827,7 @@
     </paper>
     <paper id="8">
       <title>Annotating the Focus of Negation in terms of Questions Under Discussion</title>
-      <author><first>Pranav</first><last>Anand</last></author>
+      <author id="pranav-anand"><first>Pranav</first><last>Anand</last></author>
       <author><first>Craig</first><last>Martell</last></author>
       <pages>65–69</pages>
       <url hash="4902dcef">W12-3808</url>
@@ -5847,7 +5847,7 @@
     <paper id="10">
       <title>Recognizing Arguing Subjectivity and Argument Tags</title>
       <author><first>Alexander</first><last>Conrad</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <author><first>Rebecca</first><last>Hwa</last></author>
       <pages>80–88</pages>
       <url hash="b2ee9b41">W12-3810</url>
@@ -5861,7 +5861,7 @@
       <editor><first>Jagadeesh</first><last>Jagarlamudi</last></editor>
       <editor><first>Sujith</first><last>Ravi</last></editor>
       <editor><first>Xiaojun</first><last>Wan</last></editor>
-      <editor><first>Hal</first><last>Daume III</last></editor>
+      <editor id="hal-daume-iii"><first>Hal</first><last>Daume III</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Jeju, Republic of Korea</address>
       <month>July</month>
@@ -5893,7 +5893,7 @@
     <paper id="3">
       <title>The Study of Effect of Length in Morphological Segmentation of Agglutinative Languages</title>
       <author><first>Loganathan</first><last>Ramasamy</last></author>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
       <author><first>Sowmya</first><last>Vajjala</last></author>
       <pages>18–24</pages>
       <url hash="46ead906">W12-3903</url>
@@ -5904,8 +5904,8 @@
       <author><first>Roger</first><last>Granada</last></author>
       <author><first>Lucelene</first><last>Lopes</last></author>
       <author><first>Carlos</first><last>Ramisch</last></author>
-      <author><first>Cassia</first><last>Trojahn</last></author>
-      <author><first>Renata</first><last>Vieira</last></author>
+      <author id="cassia-trojahn"><first>Cassia</first><last>Trojahn</last></author>
+      <author id="renata-vieira"><first>Renata</first><last>Vieira</last></author>
       <author><first>Aline</first><last>Villavicencio</last></author>
       <pages>25–31</pages>
       <url hash="105974ff">W12-3904</url>
@@ -5917,7 +5917,7 @@
       <booktitle>Proceedings of the 3rd Workshop on the People’s Web Meets <fixed-case>NLP</fixed-case>: Collaboratively Constructed Semantic Resources and their Applications to <fixed-case>NLP</fixed-case></booktitle>
       <url hash="4e32018d">W12-40</url>
       <editor><first>Iryna</first><last>Gurevych</last></editor>
-      <editor><first>Nicoletta Calzolari</first><last>Zamorani</last></editor>
+      <editor id="nicoletta-calzolari"><first>Nicoletta Calzolari</first><last>Zamorani</last></editor>
       <editor><first>Jungi</first><last>Kim</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Jeju, Republic of Korea</address>
@@ -5931,7 +5931,7 @@
     </frontmatter>
     <paper id="1">
       <title>Sentiment Analysis Using a Novel Human Computation Game</title>
-      <author><first>Claudiu-Cristian</first><last>Musat</last></author>
+      <author id="claudiu-musat"><first>Claudiu-Cristian</first><last>Musat</last></author>
       <author><first>Alireza</first><last>Ghasemi</last></author>
       <author><first>Boi</first><last>Faltings</last></author>
       <pages>1–9</pages>
@@ -5942,7 +5942,7 @@
     </paper>
     <paper id="2">
       <title>A Serious Game for Building a <fixed-case>P</fixed-case>ortuguese Lexical-Semantic Network</title>
-      <author><first>Mathieu</first><last>Mangeot</last></author>
+      <author id="mathieu-mangeot"><first>Mathieu</first><last>Mangeot</last></author>
       <author><first>Carlos</first><last>Ramisch</last></author>
       <pages>10–14</pages>
       <url hash="7b88a009">W12-4002</url>
@@ -5951,7 +5951,7 @@
     <paper id="3">
       <title>Collaboratively Building Language Resources while Localising the Web</title>
       <author><first>Asanka</first><last>Wasala</last></author>
-      <author><first>Reinhard</first><last>Schäler</last></author>
+      <author id="reinhard-schaler"><first>Reinhard</first><last>Schäler</last></author>
       <author><first>Ruvan</first><last>Weerasinghe</last></author>
       <author><first>Chris</first><last>Exton</last></author>
       <pages>15–19</pages>
@@ -5967,7 +5967,7 @@
     </paper>
     <paper id="5">
       <title><fixed-case>EAGER</fixed-case>: Extending Automatically Gazetteers for Entity Recognition</title>
-      <author><first>Omer Farukhan</first><last>Gunes</last></author>
+      <author id="omer-farukhan-gunes"><first>Omer Farukhan</first><last>Gunes</last></author>
       <author><first>Tim</first><last>Furche</last></author>
       <author><first>Christian</first><last>Schallhart</last></author>
       <author><first>Jens</first><last>Lehmann</last></author>
@@ -5979,7 +5979,7 @@
     <paper id="6">
       <title>Extracting Context-Rich Entailment Rules from <fixed-case>W</fixed-case>ikipedia Revision History</title>
       <author><first>Elena</first><last>Cabrio</last></author>
-      <author><first>Bernardo</first><last>Magnini</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
       <author><first>Angelina</first><last>Ivanova</last></author>
       <pages>34–43</pages>
       <url hash="2847fae4">W12-4006</url>
@@ -5990,9 +5990,9 @@
     <meta>
       <booktitle>Workshop Proceedings of <fixed-case>T</fixed-case>ext<fixed-case>G</fixed-case>raphs-7: Graph-based Methods for Natural Language Processing</booktitle>
       <url hash="46651f7f">W12-41</url>
-      <editor><first>Irina</first><last>Matveeva</last></editor>
-      <editor><first>Ahmed</first><last>Hassan</last></editor>
-      <editor><first>Gael</first><last>Dias</last></editor>
+      <editor id="irina-matveeva"><first>Irina</first><last>Matveeva</last></editor>
+      <editor id="ahmed-hassan"><first>Ahmed</first><last>Hassan</last></editor>
+      <editor id="gael-dias"><first>Gael</first><last>Dias</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Jeju, Republic of Korea</address>
       <month>July</month>
@@ -6015,7 +6015,7 @@
       <title>Extracting Signed Social Networks from Text</title>
       <author><first>Ahmed</first><last>Hassan</last></author>
       <author><first>Amjad</first><last>Abu-Jbara</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <pages>6–14</pages>
       <url hash="714d9934">W12-4102</url>
       <bibkey>hassan-etal-2012-extracting</bibkey>
@@ -6024,7 +6024,7 @@
       <title>Using Link Analysis to Discover Interesting Messages Spread Across <fixed-case>T</fixed-case>witter</title>
       <author><first>Min-Chul</first><last>Yang</last></author>
       <author><first>Jung-Tae</first><last>Lee</last></author>
-      <author><first>Hae-Chang</first><last>Rim</last></author>
+      <author id="hae-chang-rim"><first>Hae-Chang</first><last>Rim</last></author>
       <pages>15–19</pages>
       <url hash="e11a6689">W12-4103</url>
       <bibkey>yang-etal-2012-using</bibkey>
@@ -6032,7 +6032,7 @@
     <paper id="4">
       <title>Graph Based Similarity Measures for Synonym Extraction from Parsed Text</title>
       <author><first>Einat</first><last>Minkov</last></author>
-      <author><first>William</first><last>Cohen</last></author>
+      <author id="william-cohen"><first>William</first><last>Cohen</last></author>
       <pages>20–24</pages>
       <url hash="bc7643ac">W12-4104</url>
       <bibkey>minkov-cohen-2012-graph</bibkey>
@@ -6040,7 +6040,7 @@
     <paper id="5">
       <title>Semantic Relatedness for Biomedical Word Sense Disambiguation</title>
       <author><first>Kiem-Hieu</first><last>Nguyen</last></author>
-      <author><first>Cheol-Young</first><last>Ock</last></author>
+      <author id="cheol-young-ock"><first>Cheol-Young</first><last>Ock</last></author>
       <pages>25–29</pages>
       <url hash="c5269bef">W12-4105</url>
       <bibkey>nguyen-ock-2012-semantic</bibkey>
@@ -6090,7 +6090,7 @@
       <title><fixed-case>WSD</fixed-case> for n-best reranking and local language modeling in <fixed-case>SMT</fixed-case></title>
       <author><first>Marianna</first><last>Apidianaki</last></author>
       <author><first>Guillaume</first><last>Wisniewski</last></author>
-      <author><first>Artem</first><last>Sokolov</last></author>
+      <author id="artem-sokolov"><first>Artem</first><last>Sokolov</last></author>
       <author><first>Aurélien</first><last>Max</last></author>
       <author><first>François</first><last>Yvon</last></author>
       <pages>1–9</pages>
@@ -6101,7 +6101,7 @@
       <title>Linguistically-Enriched Models for <fixed-case>B</fixed-case>ulgarian-to-<fixed-case>E</fixed-case>nglish Machine Translation</title>
       <author><first>Rui</first><last>Wang</last></author>
       <author><first>Petya</first><last>Osenova</last></author>
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <pages>10–19</pages>
       <url hash="8875582f">W12-4202</url>
       <bibkey>wang-etal-2012-linguistically-enriched</bibkey>
@@ -6116,7 +6116,7 @@
     </paper>
     <paper id="4">
       <title>Towards a Predicate-Argument Evaluation for <fixed-case>MT</fixed-case></title>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <author><first>Dekai</first><last>Wu</last></author>
       <pages>30–38</pages>
       <url hash="021fa329">W12-4204</url>
@@ -6170,7 +6170,7 @@
     </paper>
     <paper id="10">
       <title>Using Domain-specific and Collaborative Resources for Term Translation</title>
-      <author><first>Mihael</first><last>Arcan</last></author>
+      <author id="mihael-arcan"><first>Mihael</first><last>Arcan</last></author>
       <author><first>Christian</first><last>Federmann</last></author>
       <author><first>Paul</first><last>Buitelaar</last></author>
       <pages>86–94</pages>
@@ -6180,7 +6180,7 @@
     <paper id="11">
       <title>Improving Statistical Machine Translation through co-joining parts of verbal constructs in <fixed-case>E</fixed-case>nglish-<fixed-case>H</fixed-case>indi translation</title>
       <author><first>Karunesh Kumar</first><last>Arora</last></author>
-      <author><first>R Mahesh K</first><last>Sinha</last></author>
+      <author id="r-mahesh-k-sinha"><first>R Mahesh K</first><last>Sinha</last></author>
       <pages>95–101</pages>
       <url hash="005702e7">W12-4211</url>
       <bibkey>arora-sinha-2012-improving</bibkey>
@@ -6214,7 +6214,7 @@
     <meta>
       <booktitle>Proceedings of the Workshop on Detecting Structure in Scholarly Discourse</booktitle>
       <url hash="115f8c20">W12-43</url>
-      <editor><first>Antal</first><last>Van Den Bosch</last></editor>
+      <editor id="antal-van-den-bosch"><first>Antal</first><last>Van Den Bosch</last></editor>
       <editor><first>Hagit</first><last>Shatkay</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Jeju Island, Korea</address>
@@ -6236,7 +6236,7 @@
     </paper>
     <paper id="2">
       <title>Identifying Claimed Knowledge Updates in Biomedical Research Articles</title>
-      <author><first>Ágnes</first><last>Sándor</last></author>
+      <author id="agnes-sandor"><first>Ágnes</first><last>Sándor</last></author>
       <author><first>Anita</first><last>de Waard</last></author>
       <pages>10–17</pages>
       <url hash="8d7a3157">W12-4302</url>
@@ -6254,7 +6254,7 @@
       <title>Open-domain Anatomical Entity Mention Detection</title>
       <author><first>Tomoko</first><last>Ohta</last></author>
       <author><first>Sampo</first><last>Pyysalo</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
       <pages>27–36</pages>
       <url hash="302de641">W12-4304</url>
@@ -6287,7 +6287,7 @@
       <url hash="ff323ae3">W12-44</url>
       <editor><first>Min</first><last>Zhang</last></editor>
       <editor><first>Haizhou</first><last>Li</last></editor>
-      <editor><first>A</first><last>Kumaran</last></editor>
+      <editor id="a-kumaran"><first>A</first><last>Kumaran</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Jeju, Korea</address>
       <month>July</month>
@@ -6321,7 +6321,7 @@
     <paper id="3">
       <title>Accurate Unsupervised Joint Named-Entity Extraction from Unaligned Parallel Text</title>
       <author><first>Robert</first><last>Munro</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>21–29</pages>
       <url hash="3e20cdfa">W12-4403</url>
       <bibkey>munro-manning-2012-accurate</bibkey>
@@ -6337,7 +6337,7 @@
     <paper id="5">
       <title>Automatically generated <fixed-case>NE</fixed-case> tagged corpora for <fixed-case>E</fixed-case>nglish and <fixed-case>H</fixed-case>ungarian</title>
       <author><first>Eszter</first><last>Simon</last></author>
-      <author><first>Dávid Márk</first><last>Nemeskey</last></author>
+      <author id="david-mark-nemeskey"><first>Dávid Márk</first><last>Nemeskey</last></author>
       <pages>38–46</pages>
       <url hash="f66d5636">W12-4405</url>
       <bibkey>simon-nemeskey-2012-automatically</bibkey>
@@ -6345,8 +6345,8 @@
     <paper id="6">
       <title>Rescoring a Phrase-based Machine Transliteration System with Recurrent Neural Network Language Models</title>
       <author><first>Andrew</first><last>Finch</last></author>
-      <author><first>Paul</first><last>Dixon</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="paul-dixon"><first>Paul</first><last>Dixon</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>47–51</pages>
       <url hash="1b9460b0">W12-4406</url>
       <bibkey>finch-etal-2012-rescoring</bibkey>
@@ -6355,16 +6355,16 @@
       <title>Syllable-based Machine Transliteration with Extra Phrase Features</title>
       <author><first>Chunyue</first><last>Zhang</last></author>
       <author><first>Tingting</first><last>Li</last></author>
-      <author><first>Tiejun</first><last>Zhao</last></author>
+      <author id="tiejun-zhao"><first>Tiejun</first><last>Zhao</last></author>
       <pages>52–56</pages>
       <url hash="fd4cfcdc">W12-4407</url>
       <bibkey>zhang-etal-2012-syllable</bibkey>
     </paper>
     <paper id="8">
       <title><fixed-case>E</fixed-case>nglish-<fixed-case>K</fixed-case>orean Named Entity Transliteration Using Substring Alignment and Re-ranking Methods</title>
-      <author><first>Chun-Kai</first><last>Wu</last></author>
+      <author id="chun-kai-wu"><first>Chun-Kai</first><last>Wu</last></author>
       <author><first>Yu-Chun</first><last>Wang</last></author>
-      <author><first>Richard Tzong-Han</first><last>Tsai</last></author>
+      <author id="richard-tzong-han-tsai"><first>Richard Tzong-Han</first><last>Tsai</last></author>
       <pages>57–60</pages>
       <url hash="28fa9b1a">W12-4408</url>
       <bibkey>wu-etal-2012-english</bibkey>
@@ -6379,8 +6379,8 @@
     <paper id="10">
       <title>Transliteration by Sequence Labeling with Lattice Encodings and Reranking</title>
       <author><first>Waleed</first><last>Ammar</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Noah</first><last>Smith</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="noah-a-smith"><first>Noah</first><last>Smith</last></author>
       <pages>66–70</pages>
       <url hash="8ab16b2b">W12-4410</url>
       <bibkey>ammar-etal-2012-transliteration</bibkey>
@@ -6396,11 +6396,11 @@
     </paper>
     <paper id="12">
       <title>Cost-benefit Analysis of Two-Stage Conditional Random Fields based <fixed-case>E</fixed-case>nglish-to-<fixed-case>C</fixed-case>hinese Machine Transliteration</title>
-      <author><first>Chan-Hung</first><last>Kuo</last></author>
+      <author id="chan-hung-kuo"><first>Chan-Hung</first><last>Kuo</last></author>
       <author><first>Shih-Hung</first><last>Liu</last></author>
-      <author><first>Mike Tian-Jian</first><last>Jiang</last></author>
+      <author id="mike-tian-jian-jiang"><first>Mike Tian-Jian</first><last>Jiang</last></author>
       <author><first>Cheng-Wei</first><last>Lee</last></author>
-      <author><first>Wen-Lian</first><last>Hsu</last></author>
+      <author id="wen-lian-hsu"><first>Wen-Lian</first><last>Hsu</last></author>
       <pages>76–80</pages>
       <url hash="42e55058">W12-4412</url>
       <bibkey>kuo-etal-2012-cost</bibkey>
@@ -6410,7 +6410,7 @@
     <meta>
       <booktitle>Joint Conference on <fixed-case>EMNLP</fixed-case> and <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case> - Shared Task</booktitle>
       <url hash="6b3e7bba">W12-45</url>
-      <editor><first>Sameer</first><last>Pradhan</last></editor>
+      <editor id="sameer-pradhan"><first>Sameer</first><last>Pradhan</last></editor>
       <editor><first>Alessandro</first><last>Moschitti</last></editor>
       <editor><first>Nianwen</first><last>Xue</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -6436,9 +6436,9 @@
     </paper>
     <paper id="2">
       <title>Latent Structure Perceptron with Feature Induction for Unrestricted Coreference Resolution</title>
-      <author><first>Eraldo</first><last>Fernandes</last></author>
-      <author><first>Cícero</first><last>dos Santos</last></author>
-      <author><first>Ruy</first><last>Milidiú</last></author>
+      <author id="eraldo-fernandes"><first>Eraldo</first><last>Fernandes</last></author>
+      <author id="cicero-dos-santos"><first>Cícero</first><last>dos Santos</last></author>
+      <author id="ruy-luiz-milidiu"><first>Ruy</first><last>Milidiú</last></author>
       <pages>41–48</pages>
       <url hash="28b24262">W12-4502</url>
       <bibkey>fernandes-etal-2012-latent</bibkey>
@@ -6446,7 +6446,7 @@
     <paper id="3">
       <title>Data-driven Multilingual Coreference Resolution using Resolver Stacking</title>
       <author><first>Anders</first><last>Björkelund</last></author>
-      <author><first>Richárd</first><last>Farkas</last></author>
+      <author id="richard-farkas"><first>Richárd</first><last>Farkas</last></author>
       <pages>49–55</pages>
       <url hash="32e34ebb">W12-4503</url>
       <bibkey>bjorkelund-farkas-2012-data</bibkey>
@@ -6503,7 +6503,7 @@
     <paper id="9">
       <title><fixed-case>UBIU</fixed-case> for Multilingual Coreference Resolution in <fixed-case>O</fixed-case>nto<fixed-case>N</fixed-case>otes</title>
       <author><first>Desislava</first><last>Zhekova</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <author><first>Joshua</first><last>Bonner</last></author>
       <author><first>Marwa</first><last>Ragheb</last></author>
       <author><first>Yu-Yin</first><last>Hsu</last></author>
@@ -6568,7 +6568,7 @@
       <title><fixed-case>BART</fixed-case> goes multilingual: The <fixed-case>U</fixed-case>ni<fixed-case>TN</fixed-case> / <fixed-case>E</fixed-case>ssex submission to the <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>-2012 Shared Task</title>
       <author><first>Olga</first><last>Uryupina</last></author>
       <author><first>Alessandro</first><last>Moschitti</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>122–128</pages>
       <url hash="1bc6a50f">W12-4515</url>
       <bibkey>uryupina-etal-2012-bart</bibkey>
@@ -6586,7 +6586,7 @@
       <booktitle>Proceedings of the 11th International Workshop on Tree Adjoining Grammars and Related Formalisms (<fixed-case>TAG</fixed-case>+11)</booktitle>
       <url hash="8ad49bda">W12-46</url>
       <editor><first>Giorgio</first><last>Satta</last></editor>
-      <editor><first>Chung-Hye</first><last>Han</last></editor>
+      <editor id="chung-hye-han"><first>Chung-Hye</first><last>Han</last></editor>
       <address>Paris, France</address>
       <month>September</month>
       <year>2012</year>
@@ -6600,14 +6600,14 @@
       <title>Delayed Tree-Locality, Set-locality, and Clitic Climbing</title>
       <author><first>Joan</first><last>Chen-Main</last></author>
       <author><first>Tonia</first><last>Bleam</last></author>
-      <author><first>Aravind</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
       <pages>1–9</pages>
       <url hash="d1fc786d">W12-4601</url>
       <bibkey>chen-main-etal-2012-delayed</bibkey>
     </paper>
     <paper id="2">
       <title>Deriving syntax-semantics mappings: node linking, type shifting and scope ambiguity</title>
-      <author><first>Dennis Ryan</first><last>Storoshenko</last></author>
+      <author id="dennis-ryan-storoshenko"><first>Dennis Ryan</first><last>Storoshenko</last></author>
       <author><first>Robert</first><last>Frank</last></author>
       <pages>10–18</pages>
       <url hash="25e59693">W12-4602</url>
@@ -6652,7 +6652,7 @@
     </paper>
     <paper id="8">
       <title>Incremental Neo-<fixed-case>D</fixed-case>avidsonian semantic construction for <fixed-case>TAG</fixed-case></title>
-      <author><first>Asad</first><last>Sayeed</last></author>
+      <author id="asad-sayeed"><first>Asad</first><last>Sayeed</last></author>
       <author><first>Vera</first><last>Demberg</last></author>
       <pages>64–72</pages>
       <url hash="9c8b6876">W12-4608</url>
@@ -6669,7 +6669,7 @@
       <title>Describing São Tomense Using a Tree-Adjoining Meta-Grammar</title>
       <author><first>Emmanuel</first><last>Schang</last></author>
       <author><first>Denys</first><last>Duchier</last></author>
-      <author><first>Brunelle</first><last>Magnana Ekoukou</last></author>
+      <author id="brunelle-magnana-ekoukou"><first>Brunelle</first><last>Magnana Ekoukou</last></author>
       <author><first>Yannick</first><last>Parmentier</last></author>
       <author><first>Simon</first><last>Petitjean</last></author>
       <pages>82–89</pages>
@@ -6725,7 +6725,7 @@
     </paper>
     <paper id="17">
       <title>Practical Parsing of Parallel Multiple Context-Free Grammars</title>
-      <author><first>Peter</first><last>Ljunglöf</last></author>
+      <author id="peter-ljunglof"><first>Peter</first><last>Ljunglöf</last></author>
       <pages>144–152</pages>
       <url hash="66ceb51a">W12-4617</url>
       <bibkey>ljunglof-2012-practical</bibkey>
@@ -6739,8 +6739,8 @@
     </paper>
     <paper id="19">
       <title>Creating a <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammar from a Multilayer Treebank</title>
-      <author><first>Rajesh</first><last>Bhatt</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="rajesh-bhatt"><first>Rajesh</first><last>Bhatt</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <author><first>Fei</first><last>Xia</last></author>
       <pages>162–170</pages>
       <url hash="b0fc6216">W12-4619</url>
@@ -6787,8 +6787,8 @@
     <paper id="25">
       <title>A linguistically-motivated 2-stage Tree to Graph Transformation</title>
       <author><first>Corentin</first><last>Ribeyre</last></author>
-      <author><first>Djamé</first><last>Seddah</last></author>
-      <author><first>Eric</first><last>Villemonte de la Clergerie</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Eric</first><last>Villemonte de la Clergerie</last></author>
       <pages>214–222</pages>
       <url hash="a0f7ff8e">W12-4625</url>
       <bibkey>ribeyre-etal-2012-linguistically</bibkey>
@@ -6803,7 +6803,7 @@
     <paper id="27">
       <title>The Shape of Elementary Trees and Scope Possibilities in <fixed-case>STAG</fixed-case></title>
       <author><first>Robert</first><last>Frank</last></author>
-      <author><first>Dennis Ryan</first><last>Storoshenko</last></author>
+      <author id="dennis-ryan-storoshenko"><first>Dennis Ryan</first><last>Storoshenko</last></author>
       <pages>232–240</pages>
       <url hash="10447235">W12-4627</url>
       <bibkey>frank-storoshenko-2012-shape</bibkey>
@@ -6813,7 +6813,7 @@
     <meta>
       <booktitle>Proceedings of the Workshop on Advances in Discourse Analysis and its Computational Aspects</booktitle>
       <url hash="da09000e">W12-47</url>
-      <editor><first>Eva</first><last>Hajičová</last></editor>
+      <editor id="eva-hajicova"><first>Eva</first><last>Hajičová</last></editor>
       <editor><first>Lucie</first><last>Poláková</last></editor>
       <editor><first>Jiří</first><last>Mírovský</last></editor>
       <publisher>The COLING 2012 Organizing Committee</publisher>
@@ -6845,7 +6845,7 @@
     </paper>
     <paper id="3">
       <title>Measuring the Strength of Linguistic Cues for Discourse Relations</title>
-      <author><first>Fatemeh Torabi</first><last>Asr</last></author>
+      <author id="fatemeh-torabi-asr"><first>Fatemeh Torabi</first><last>Asr</last></author>
       <author><first>Vera</first><last>Demberg</last></author>
       <pages>33–42</pages>
       <url hash="9db192bd">W12-4703</url>
@@ -6906,7 +6906,7 @@
       <title>Multi-objective Optimization for Efficient <fixed-case>B</fixed-case>rahmic Keyboards</title>
       <author><first>Albert</first><last>Brouillette</last></author>
       <author><first>Devraj</first><last>Sarmah</last></author>
-      <author><first>Jugal</first><last>Kalita</last></author>
+      <author id="jugal-kalita"><first>Jugal</first><last>Kalita</last></author>
       <pages>29–44</pages>
       <url hash="9cb5d54b">W12-4803</url>
       <bibkey>brouillette-etal-2012-multi</bibkey>
@@ -6931,7 +6931,7 @@
     </paper>
     <paper id="6">
       <title><fixed-case>B</fixed-case>angla Phonetic Input Method with Foreign Words Handling</title>
-      <author><first>Khan Md. Anwarus</first><last>Salam</last></author>
+      <author id="md-anwarus-salam-khan"><first>Khan Md. Anwarus</first><last>Salam</last></author>
       <author><first>Setsuo</first><last>Yamada</last></author>
       <author><first>Tetsuro</first><last>Nishino</last></author>
       <pages>73–78</pages>
@@ -6976,7 +6976,7 @@
       <booktitle>Proceedings of the First Workshop on Eye-tracking and Natural Language Processing</booktitle>
       <url hash="1f4d791e">W12-49</url>
       <editor><first>Michael</first><last>Carl</last></editor>
-      <editor><first>Pushpak</first><last>Bhattacharyya</last></editor>
+      <editor id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></editor>
       <editor><first>Kamal Kumar</first><last>Choudhary</last></editor>
       <publisher>The COLING 2012 Organizing Committee</publisher>
       <address>Mumbai, India</address>
@@ -6990,7 +6990,7 @@
     </frontmatter>
     <paper id="1">
       <title>Grounding spoken interaction with real-time gaze in dynamic virtual environments</title>
-      <author><first>Matthew</first><last>Crocker</last></author>
+      <author id="matthew-crocker"><first>Matthew</first><last>Crocker</last></author>
       <pages>1–4</pages>
       <url hash="d12a8cca">W12-4901</url>
       <bibkey>crocker-2012-grounding</bibkey>
@@ -7026,7 +7026,7 @@
       <author><first>Tadayoshi</first><last>Hara</last></author>
       <author><first>Daichi</first><last>Mochihashi</last></author>
       <author><first>Yoshinobu</first><last>Kano</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>55–70</pages>
       <url hash="fbaab6f5">W12-4905</url>
       <bibkey>hara-etal-2012-predicting</bibkey>
@@ -7059,7 +7059,7 @@
     </frontmatter>
     <paper id="1">
       <title>Computational evidence that <fixed-case>H</fixed-case>indi and <fixed-case>U</fixed-case>rdu share a grammar but not the lexicon</title>
-      <author><first>K.V.S</first><last>Prasad</last></author>
+      <author id="k-v-s-prasad"><first>K.V.S</first><last>Prasad</last></author>
       <author><first>Shafqat Mumtaz</first><last>Virk</last></author>
       <pages>1–14</pages>
       <url hash="32f23de7">W12-5001</url>
@@ -7078,7 +7078,7 @@
     <paper id="3">
       <title><fixed-case>B</fixed-case>engali Question Classification: Towards Developing <fixed-case>QA</fixed-case> System</title>
       <author><first>Somnath</first><last>Banerjee</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>25–40</pages>
       <url hash="cd0944f5">W12-5003</url>
       <bibkey>banerjee-bandyopadhyay-2012-bengali</bibkey>
@@ -7088,14 +7088,14 @@
       <author><first>Khumbar</first><last>Debbarma</last></author>
       <author><first>Braja Gopal</first><last>Patra</last></author>
       <author><first>Dipankar</first><last>Das</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>41–52</pages>
       <url hash="0d4bce3f">W12-5004</url>
       <bibkey>debbarma-etal-2012-morphological</bibkey>
     </paper>
     <paper id="5">
       <title>Comparing Different Criteria for <fixed-case>V</fixed-case>ietnamese Word Segmentation</title>
-      <author><first>Quy T.</first><last>Nguyen</last></author>
+      <author id="quy-nguyen"><first>Quy T.</first><last>Nguyen</last></author>
       <author><first>Ngan L.T.</first><last>Nguyen</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
       <pages>53–68</pages>
@@ -7105,7 +7105,7 @@
     <paper id="6">
       <title>A Light Weight Stemmer for <fixed-case>U</fixed-case>rdu Language: A Scarce Resourced Language</title>
       <author><first>Sajjad Ahmad</first><last>Khan</last></author>
-      <author><first>Waqas</first><last>Anwar</last></author>
+      <author id="waqas-anwar"><first>Waqas</first><last>Anwar</last></author>
       <author><first>Usama Ijaz</first><last>Bajwa</last></author>
       <author><first>Xuan</first><last>Wang</last></author>
       <pages>69–78</pages>
@@ -7124,7 +7124,7 @@
       <author><first>Kishorjit</first><last>Nongmeikapam</last></author>
       <author><first>Vidya Raj</first><last>RK</last></author>
       <author><first>Nirmal</first><last>Y</last></author>
-      <author><first>Sivaji</first><last>B</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>B</last></author>
       <pages>95–108</pages>
       <url hash="91e5b621">W12-5008</url>
       <bibkey>nongmeikapam-etal-2012-manipuri</bibkey>
@@ -7149,7 +7149,7 @@
     <paper id="11">
       <title>Tagger Voting for <fixed-case>U</fixed-case>rdu</title>
       <author><first>Bushra</first><last>Jawaid</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>135–144</pages>
       <url hash="f0cf71a8">W12-5011</url>
       <bibkey>jawaid-bojar-2012-tagger</bibkey>
@@ -7157,7 +7157,7 @@
     <paper id="12">
       <title><fixed-case>BIS</fixed-case> Annotation Standards With Reference to <fixed-case>K</fixed-case>onkani Language</title>
       <author><first>Madhavi</first><last>Sardesai</last></author>
-      <author><first>Jyoti</first><last>Pawar</last></author>
+      <author id="jyoti-pawar"><first>Jyoti</first><last>Pawar</last></author>
       <author><first>Shantaram</first><last>Walawalikar</last></author>
       <author><first>Edna</first><last>Vaz</last></author>
       <pages>145–152</pages>
@@ -7177,7 +7177,7 @@
     <paper id="14">
       <title>Influences of particles on <fixed-case>V</fixed-case>ietnamese tonal Co-articulation</title>
       <author><first>Thị Lan</first><last>Nguyen</last></author>
-      <author><first>Do Dat</first><last>Tran</last></author>
+      <author id="do-dat-tran"><first>Do Dat</first><last>Tran</last></author>
       <pages>163–172</pages>
       <url hash="0600d162">W12-5014</url>
       <bibkey>nguyen-tran-2012-influences</bibkey>
@@ -7193,7 +7193,7 @@
     </paper>
     <paper id="16">
       <title>Bidirectional <fixed-case>B</fixed-case>engali Script and Meetei Mayek Transliteration of Web Based <fixed-case>M</fixed-case>anipuri News Corpus</title>
-      <author><first>Thoudam Doren</first><last>Singh</last></author>
+      <author id="thoudam-doren-singh"><first>Thoudam Doren</first><last>Singh</last></author>
       <pages>181–190</pages>
       <url hash="2c5c15d8">W12-5016</url>
       <bibkey>singh-2012-bidirectional</bibkey>
@@ -7201,17 +7201,17 @@
     <paper id="17">
       <title>Rule-based Machine Translation between <fixed-case>I</fixed-case>ndonesian and <fixed-case>M</fixed-case>alaysian</title>
       <author><first>Raymond Hendy</first><last>Susanto</last></author>
-      <author><first>Septina Dian</first><last>Larasati</last></author>
-      <author><first>Francis M.</first><last>Tyers</last></author>
+      <author id="septina-dian-larasati"><first>Septina Dian</first><last>Larasati</last></author>
+      <author id="francis-tyers"><first>Francis M.</first><last>Tyers</last></author>
       <pages>191–200</pages>
       <url hash="ff164c7b">W12-5017</url>
       <bibkey>susanto-etal-2012-rule</bibkey>
     </paper>
     <paper id="18">
       <title>Building Multilingual Search Index using open source framework</title>
-      <author><first>Arjun</first><last>Atreya</last></author>
+      <author id="arjun-atreya-v"><first>Arjun</first><last>Atreya</last></author>
       <author><first>Swapnil</first><last>Chaudhari</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <author><first>Ganesh</first><last>Ramakrishnan</last></author>
       <pages>201–210</pages>
       <url hash="df87c096">W12-5018</url>
@@ -7227,8 +7227,8 @@
     <paper id="20">
       <title>Error tracking in search engine development</title>
       <author><first>Swapnil</first><last>Chaudhari</last></author>
-      <author><first>Arjun Atreya</first><last>V</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="arjun-atreya-v"><first>Arjun Atreya</first><last>V</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <author><first>Ganesh</first><last>Ramakrishnan</last></author>
       <pages>221–228</pages>
       <url hash="d9435950">W12-5020</url>
@@ -7274,7 +7274,7 @@
     </frontmatter>
     <paper id="1">
       <title>Like a Lexicographer Weaving Her Lexical Network</title>
-      <author><first>Alain</first><last>Polguère</last></author>
+      <author id="alain-polguere"><first>Alain</first><last>Polguère</last></author>
       <pages>1–4</pages>
       <url hash="008a8714">W12-5101</url>
       <bibkey>polguere-2012-like</bibkey>
@@ -7289,10 +7289,10 @@
     </paper>
     <paper id="3">
       <title>On discriminating f<fixed-case>MRI</fixed-case> representations of abstract <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et taxonomic categories</title>
-      <author><first>Andrew</first><last>Anderson</last></author>
+      <author id="andrew-j-anderson"><first>Andrew</first><last>Anderson</last></author>
       <author><first>Tao</first><last>Yuan</last></author>
       <author><first>Brian</first><last>Murphy</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>21–32</pages>
       <url hash="562b6f21">W12-5103</url>
       <bibkey>anderson-etal-2012-discriminating</bibkey>
@@ -7300,7 +7300,7 @@
     <paper id="4">
       <title>Automatic index creation to support navigation in lexical graphs encoding part_of relations</title>
       <author><first>Michael</first><last>Zock</last></author>
-      <author><first>Debela</first><last>Tesfaye</last></author>
+      <author id="debela-tesfaye-gemechu"><first>Debela</first><last>Tesfaye</last></author>
       <pages>33–52</pages>
       <url hash="84877ef3">W12-5104</url>
       <bibkey>zock-tesfaye-2012-automatic</bibkey>
@@ -7317,7 +7317,7 @@
       <title>Verb interpretation for basic action types: annotation, ontology induction and creation of prototypical scenes</title>
       <author><first>Francesca</first><last>Frontini</last></author>
       <author><first>Irene</first><last>De Felice</last></author>
-      <author><first>Fahad</first><last>Khan</last></author>
+      <author id="fahad-khan"><first>Fahad</first><last>Khan</last></author>
       <author><first>Irene</first><last>Russo</last></author>
       <author><first>Monica</first><last>Monachini</last></author>
       <author><first>Gloria</first><last>Gagliardi</last></author>
@@ -7331,7 +7331,7 @@
       <author><first>Emmanuel</first><last>Eckard</last></author>
       <author><first>Lucie</first><last>Barque</last></author>
       <author><first>Alexis</first><last>Nasr</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <pages>81–94</pages>
       <url hash="f8484ee5">W12-5107</url>
       <bibkey>eckard-etal-2012-dictionary</bibkey>
@@ -7340,7 +7340,7 @@
       <title>Automatic Construction of a <fixed-case>M</fixed-case>ulti<fixed-case>W</fixed-case>ord Expressions Bilingual Lexicon: A Statistical Machine Translation Evaluation Perspective</title>
       <author><first>Dhouha</first><last>Bouamor</last></author>
       <author><first>Nasredine</first><last>Semmar</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <pages>95–108</pages>
       <url hash="77b3c1b9">W12-5108</url>
       <bibkey>bouamor-etal-2012-automatic</bibkey>
@@ -7348,8 +7348,8 @@
     <paper id="9">
       <title>Hand-Crafting a Lexical Network With a Knowledge-Based Graph Editor</title>
       <author><first>Nabil</first><last>Gader</last></author>
-      <author><first>Veronika</first><last>Lux-Pogodalla</last></author>
-      <author><first>Alain</first><last>Polguère</last></author>
+      <author id="veronika-lux"><first>Veronika</first><last>Lux-Pogodalla</last></author>
+      <author id="alain-polguere"><first>Alain</first><last>Polguère</last></author>
       <pages>109–126</pages>
       <url hash="736e36be">W12-5109</url>
       <bibkey>gader-etal-2012-hand</bibkey>
@@ -7357,14 +7357,14 @@
     <paper id="10">
       <title>A Procedural <fixed-case>DTD</fixed-case> Project for Dictionary Entry Parsing Described with Parameterized Grammars</title>
       <author><first>Neculai</first><last>Curteanu</last></author>
-      <author><first>Mihai Alex</first><last>Moruz</last></author>
+      <author id="alex-moruz"><first>Mihai Alex</first><last>Moruz</last></author>
       <pages>127–136</pages>
       <url hash="6d5666c6">W12-5110</url>
       <bibkey>curteanu-moruz-2012-procedural</bibkey>
     </paper>
     <paper id="11">
       <title>Automatic Generation of the Universal Word Explanation from <fixed-case>UNL</fixed-case> Ontology</title>
-      <author><first>Khan Md.</first><last>Anwarus Salam</last></author>
+      <author id="md-anwarus-salam-khan"><first>Khan Md.</first><last>Anwarus Salam</last></author>
       <author><first>Hiroshi</first><last>Uchida</last></author>
       <author><first>Tetsuro</first><last>Nishino</last></author>
       <pages>137–146</pages>
@@ -7404,7 +7404,7 @@
     <paper id="15">
       <title>Where’s the meeting that was cancelled? existential implications of transitive verbs</title>
       <author><first>Patricia</first><last>Amaral</last></author>
-      <author><first>Valeria</first><last>de Paiva</last></author>
+      <author id="valeria-de-paiva"><first>Valeria</first><last>de Paiva</last></author>
       <author><first>Cleo</first><last>Condoravdi</last></author>
       <author><first>Annie</first><last>Zaenen</last></author>
       <pages>183–194</pages>
@@ -7439,7 +7439,7 @@
       <editor><first>Ruvan</first><last>Weerasinghe</last></editor>
       <editor><first>Sarmad</first><last>Hussain</last></editor>
       <editor><first>Virach</first><last>Sornlertlamvanich</last></editor>
-      <editor><first>Rachel Edita O.</first><last>Roxas</last></editor>
+      <editor id="rachel-edita-roxas"><first>Rachel Edita O.</first><last>Roxas</last></editor>
       <publisher>The COLING 2012 Organizing Committee</publisher>
       <address>Mumbai, India</address>
       <month>December</month>
@@ -7452,19 +7452,19 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>K</fixed-case>orean <fixed-case>NLP</fixed-case>2<fixed-case>RDF</fixed-case> Resources</title>
-      <author><first>YoungGyun</first><last>Hahm</last></author>
-      <author><first>KyungTae</first><last>Lim</last></author>
+      <author id="younggyun-hahm"><first>YoungGyun</first><last>Hahm</last></author>
+      <author id="kyungtae-lim"><first>KyungTae</first><last>Lim</last></author>
       <author><first>Jungyeul</first><last>Park</last></author>
       <author><first>Yongun</first><last>Yoon</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <pages>1–10</pages>
       <url hash="ca3712d8">W12-5201</url>
       <bibkey>hahm-etal-2012-korean</bibkey>
     </paper>
     <paper id="2">
       <title>Building Large Scale Text Corpus for <fixed-case>T</fixed-case>ibetan Natural Language Processing by Extracting Text from Web Pages</title>
-      <author><first>Huidan</first><last>Liu</last></author>
-      <author><first>Minghua</first><last>Nuo</last></author>
+      <author id="huidan-liu"><first>Huidan</first><last>Liu</last></author>
+      <author id="minghua-nuo"><first>Minghua</first><last>Nuo</last></author>
       <author><first>Jian</first><last>Wu</last></author>
       <author><first>Yeping</first><last>He</last></author>
       <pages>11–20</pages>
@@ -7477,7 +7477,7 @@
       <author><first>Himadri</first><last>Bharali</last></author>
       <author><first>Ambeswar</first><last>Gogoi</last></author>
       <author><first>Ratul</first><last>Deka</last></author>
-      <author><first>Anup Kr.</first><last>Barman</last></author>
+      <author id="anup-barman"><first>Anup Kr.</first><last>Barman</last></author>
       <pages>21–28</pages>
       <url hash="82b5e520">W12-5203</url>
       <bibkey>sarma-etal-2012-structured</bibkey>
@@ -7485,7 +7485,7 @@
     <paper id="4">
       <title>Corpus Building of Literary Lesser Rich Language-<fixed-case>B</fixed-case>odo: Insights and Challenges</title>
       <author><first>Biswajit</first><last>Brahma</last></author>
-      <author><first>Anup Kr.</first><last>Barman</last></author>
+      <author id="anup-barman"><first>Anup Kr.</first><last>Barman</last></author>
       <author id="shikhar-kumar-sarma-gu"><first>Shikhar Kr.</first><last>Sarma</last></author>
       <author><first>Bhatima</first><last>Boro</last></author>
       <pages>29–34</pages>
@@ -7495,7 +7495,7 @@
     <paper id="5">
       <title>Dependency Parsers for <fixed-case>P</fixed-case>ersian</title>
       <author><first>Mojgan</first><last>Seraji</last></author>
-      <author><first>Beata</first><last>Megyesi</last></author>
+      <author id="beata-megyesi"><first>Beata</first><last>Megyesi</last></author>
       <author><first>Joakim</first><last>Nivre</last></author>
       <pages>35–44</pages>
       <url hash="0fd316ee">W12-5205</url>
@@ -7511,7 +7511,7 @@
     </paper>
     <paper id="7">
       <title>A Hybrid Dependency Parser for <fixed-case>B</fixed-case>angla</title>
-      <author><first>Arnab</first><last>Dhar</last></author>
+      <author id="arnab-dhar"><first>Arnab</first><last>Dhar</last></author>
       <author><first>Sanjay</first><last>Chatterji</last></author>
       <author><first>Sudeshna</first><last>Sarkar</last></author>
       <author><first>Anupam</first><last>Basu</last></author>
@@ -7523,7 +7523,7 @@
       <title>Repairing <fixed-case>B</fixed-case>engali Verb Chunks for Improved <fixed-case>B</fixed-case>engali to <fixed-case>H</fixed-case>indi Machine Translation</title>
       <author><first>Sanjay</first><last>Chatterji</last></author>
       <author><first>Nabanita</first><last>Datta</last></author>
-      <author><first>Arnab</first><last>Dhar</last></author>
+      <author id="arnab-dhar"><first>Arnab</first><last>Dhar</last></author>
       <author><first>Biswanath</first><last>Barik</last></author>
       <author><first>Sudeshna</first><last>Sarkar</last></author>
       <author><first>Anupam</first><last>Basu</last></author>
@@ -7534,7 +7534,7 @@
     <paper id="9">
       <title>Domain Specific Ontology Extractor For <fixed-case>I</fixed-case>ndian Languages</title>
       <author><first>Brijesh</first><last>Bhatt</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>75–84</pages>
       <url hash="19d97e60">W12-5209</url>
       <bibkey>bhatt-bhattacharyya-2012-domain</bibkey>
@@ -7542,7 +7542,7 @@
     <paper id="10">
       <title>Constrained Hidden <fixed-case>M</fixed-case>arkov Model for Bilingual Keyword Pairs Alignment</title>
       <author><first>Denny</first><last>Cahyadi</last></author>
-      <author><first>Fabien</first><last>Cromieres</last></author>
+      <author id="fabien-cromieres"><first>Fabien</first><last>Cromieres</last></author>
       <author><first>Sadao</first><last>Kurohashi</last></author>
       <pages>85–94</pages>
       <url hash="30556987">W12-5210</url>
@@ -7551,7 +7551,7 @@
     <paper id="11">
       <title>N-gram and Gazetteer List Based Named Entity Recognition for <fixed-case>U</fixed-case>rdu: A Scarce Resourced Language</title>
       <author><first>Faryal</first><last>Jahangir</last></author>
-      <author><first>Waqas</first><last>Anwar</last></author>
+      <author id="waqas-anwar"><first>Waqas</first><last>Anwar</last></author>
       <author><first>Usama Ijaz</first><last>Bajwa</last></author>
       <author><first>Xuan</first><last>Wang</last></author>
       <pages>95–104</pages>
@@ -7569,7 +7569,7 @@
     </paper>
     <paper id="13">
       <title>Enhancing Lemmatization for <fixed-case>M</fixed-case>ongolian and its Application to Statistical Machine Translation</title>
-      <author><first>Chimeddorj</first><last>Odbayar</last></author>
+      <author id="odbayar-chimeddorj"><first>Chimeddorj</first><last>Odbayar</last></author>
       <author><first>Atsushi</first><last>Fujii</last></author>
       <pages>115–124</pages>
       <url hash="1ade9af0">W12-5213</url>
@@ -7589,8 +7589,8 @@
     <meta>
       <booktitle>Proceedings of the 2nd Workshop on Sentiment Analysis where <fixed-case>AI</fixed-case> meets Psychology</booktitle>
       <url hash="94652f61">W12-53</url>
-      <editor><first>Sivaji</first><last>Bandyopadhyay</last></editor>
-      <editor><first>Manabu</first><last>Okumura</last></editor>
+      <editor id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></editor>
+      <editor id="manabu-okumura"><first>Manabu</first><last>Okumura</last></editor>
       <publisher>The COLING 2012 Organizing Committee</publisher>
       <address>Mumbai, India</address>
       <month>December</month>
@@ -7611,7 +7611,7 @@
     <paper id="2">
       <title>A <fixed-case>CCG</fixed-case>-based Approach to Fine-Grained Sentiment Analysis</title>
       <author><first>Phillip</first><last>Smith</last></author>
-      <author><first>Mark</first><last>Lee</last></author>
+      <author id="mark-lee"><first>Mark</first><last>Lee</last></author>
       <pages>3–16</pages>
       <url hash="d238b0d6">W12-5302</url>
       <bibkey>smith-lee-2012-ccg</bibkey>
@@ -7669,7 +7669,7 @@
     <paper id="9">
       <title>How Human Analyse Lexical Indicators of Sentiments- A Cognitive Analysis Using Reaction-Time</title>
       <author><first>Marimuthu</first><last>K</last></author>
-      <author><first>Sobha Lalitha</first><last>Devi</last></author>
+      <author id="sobha-lalitha-devi"><first>Sobha Lalitha</first><last>Devi</last></author>
       <pages>81–90</pages>
       <url hash="e906ef76">W12-5309</url>
       <bibkey>k-devi-2012-human</bibkey>
@@ -7692,7 +7692,7 @@
     </paper>
     <paper id="12">
       <title>Rule-Based Sentiment Analysis in Narrow Domain: Detecting Sentiment in Daily Horoscopes Using Sentiscope</title>
-      <author><first>Zeljko</first><last>Agic</last></author>
+      <author id="zeljko-agic"><first>Zeljko</first><last>Agic</last></author>
       <author><first>Danijela</first><last>Merkler</last></author>
       <pages>115–124</pages>
       <url hash="44564ca8">W12-5312</url>
@@ -7733,7 +7733,7 @@
       <title>An Experiment in Integrating Sentiment Features for Tech Stock Prediction in <fixed-case>T</fixed-case>witter</title>
       <author><first>Tien Thanh</first><last>Vu</last></author>
       <author><first>Shu</first><last>Chang</last></author>
-      <author><first>Quang Thuy</first><last>Ha</last></author>
+      <author id="quang-thuy-ha"><first>Quang Thuy</first><last>Ha</last></author>
       <author><first>Nigel</first><last>Collier</last></author>
       <pages>23–38</pages>
       <url hash="989bca09">W12-5503</url>
@@ -7752,10 +7752,10 @@
     <meta>
       <booktitle>Proceedings of the Workshop on Machine Translation and Parsing in <fixed-case>I</fixed-case>ndian Languages</booktitle>
       <url hash="703a7001">W12-56</url>
-      <editor><first>Dipti Misra</first><last>Sharma</last></editor>
-      <editor><first>Prashanth</first><last>Mannem</last></editor>
+      <editor id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></editor>
+      <editor id="prashanth-mannem"><first>Prashanth</first><last>Mannem</last></editor>
       <editor><first>Joseph</first><last>vanGenabith</last></editor>
-      <editor><first>Sobha Lalitha</first><last>Devi</last></editor>
+      <editor id="sobha-lalitha-devi"><first>Sobha Lalitha</first><last>Devi</last></editor>
       <editor><first>Radhika</first><last>Mamidi</last></editor>
       <editor><first>Ranjani</first><last>Parthasarathi</last></editor>
       <publisher>The COLING 2012 Organizing Committee</publisher>
@@ -7779,7 +7779,7 @@
     <paper id="2">
       <title>Semantic Parsing of <fixed-case>T</fixed-case>amil Sentences</title>
       <author><first>Balaji</first><last>Jagan</last></author>
-      <author><first>Geetha</first><last>T V</last></author>
+      <author id="t-v-geetha"><first>Geetha</first><last>T V</last></author>
       <author><first>Ranjani</first><last>Parthasarathi</last></author>
       <pages>15–22</pages>
       <url hash="174dcab3">W12-5602</url>
@@ -7787,8 +7787,8 @@
     </paper>
     <paper id="3">
       <title><fixed-case>T</fixed-case>amil <fixed-case>NER</fixed-case> - Coping with Real Time Challenges</title>
-      <author><first>Malarkodi</first><last>C.S</last></author>
-      <author><first>Pattabhi</first><last>RK Rao</last></author>
+      <author id="malarkodi-c-s"><first>Malarkodi</first><last>C.S</last></author>
+      <author id="pattabhi-rk-rao"><first>Pattabhi</first><last>RK Rao</last></author>
       <author><first>Sobha</first><last>Lalitha Devi</last></author>
       <pages>23–38</pages>
       <url hash="5dd2a147">W12-5603</url>
@@ -7796,7 +7796,7 @@
     </paper>
     <paper id="4">
       <title>Sublexical Translations for Low-Resource Language</title>
-      <author><first>Khan Md.</first><last>Anwarus Salam</last></author>
+      <author id="md-anwarus-salam-khan"><first>Khan Md.</first><last>Anwarus Salam</last></author>
       <author><first>Setsuo</first><last>Yamada</last></author>
       <author><first>Tetsuro</first><last>Nishino</last></author>
       <pages>39–52</pages>
@@ -7813,9 +7813,9 @@
     <paper id="6">
       <title>A Diagnostic Evaluation Approach Targeting <fixed-case>MT</fixed-case> Systems for <fixed-case>I</fixed-case>ndian Languages</title>
       <author><first>Renu</first><last>Balyan</last></author>
-      <author><first>Sudip Kumar</first><last>Naskar</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last></author>
       <author><first>Antonio</first><last>Toral</last></author>
-      <author><first>Niladri</first><last>Chatterjee</last></author>
+      <author id="niladri-chatterjee"><first>Niladri</first><last>Chatterjee</last></author>
       <pages>61–72</pages>
       <url hash="efba8da0">W12-5606</url>
       <bibkey>balyan-etal-2012-diagnostic</bibkey>
@@ -7830,8 +7830,8 @@
     </paper>
     <paper id="8">
       <title>Clause Boundary Identification for <fixed-case>M</fixed-case>alayalam Using <fixed-case>CRF</fixed-case></title>
-      <author><first>Lakshmi</first><last>S.</last></author>
-      <author><first>Vijay Sundar Ram</first><last>R</last></author>
+      <author id="lakshmi-s"><first>Lakshmi</first><last>S.</last></author>
+      <author id="vijay-sundar-ram"><first>Vijay Sundar Ram</first><last>R</last></author>
       <author><first>Sobha</first><last>Lalitha Devi</last></author>
       <pages>83–92</pages>
       <url hash="755785d1">W12-5608</url>
@@ -7841,14 +7841,14 @@
       <title>Disambiguation of pre/post positions in <fixed-case>E</fixed-case>nglish - <fixed-case>M</fixed-case>alayalam Text Translation</title>
       <author><first>Jayan</first><last>V</last></author>
       <author><first>Sunil</first><last>R</last></author>
-      <author><first>Bhadran</first><last>V K</last></author>
+      <author id="bhadran-v-k"><first>Bhadran</first><last>V K</last></author>
       <pages>93–102</pages>
       <url hash="b60d017c">W12-5609</url>
       <bibkey>v-etal-2012-disambiguation</bibkey>
     </paper>
     <paper id="10">
       <title>Resolution for Pronouns in <fixed-case>T</fixed-case>amil Using <fixed-case>CRF</fixed-case></title>
-      <author><first>Akilandeswari</first><last>A</last></author>
+      <author id="a-akilandeswari"><first>Akilandeswari</first><last>A</last></author>
       <author><first>Sobha</first><last>Lalitha Devi</last></author>
       <pages>103–112</pages>
       <url hash="64b02155">W12-5610</url>
@@ -7857,8 +7857,8 @@
     <paper id="11">
       <title>Morphological Processing for <fixed-case>E</fixed-case>nglish-<fixed-case>T</fixed-case>amil Statistical Machine Translation</title>
       <author><first>Loganathan</first><last>Ramasamy</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
       <pages>113–122</pages>
       <url hash="7ec3b543">W12-5611</url>
       <bibkey>ramasamy-etal-2012-morphological</bibkey>
@@ -7883,7 +7883,7 @@
     </paper>
     <paper id="14">
       <title><fixed-case>CUNI</fixed-case>: Feature Selection and Error Analysis of a Transition-Based Parser</title>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <pages>143–148</pages>
       <url hash="a170bc80">W12-5614</url>
       <bibkey>zeman-2012-cuni</bibkey>
@@ -7891,7 +7891,7 @@
     <paper id="15">
       <title>Parsing <fixed-case>H</fixed-case>indi with <fixed-case>MDP</fixed-case>arser</title>
       <author><first>Alexander</first><last>Volokh</last></author>
-      <author><first>Günter</first><last>Neumann</last></author>
+      <author id="gunter-neumann"><first>Günter</first><last>Neumann</last></author>
       <pages>149–154</pages>
       <url hash="bcbbf9be">W12-5615</url>
       <bibkey>volokh-neumann-2012-parsing</bibkey>
@@ -7899,7 +7899,7 @@
     <paper id="16">
       <title>A Three Stage Hybrid Parser for <fixed-case>H</fixed-case>indi</title>
       <author><first>Sanjay</first><last>Chatterji</last></author>
-      <author><first>Arnad</first><last>Dhar</last></author>
+      <author id="arnab-dhar"><first>Arnad</first><last>Dhar</last></author>
       <author><first>Sudeshna</first><last>Sarkar</last></author>
       <author><first>Anupam</first><last>Basu</last></author>
       <pages>155–162</pages>
@@ -7927,8 +7927,8 @@
     <paper id="19">
       <title>Ensembling Various Dependency Parsers: Adopting Turbo Parser for <fixed-case>I</fixed-case>ndian Languages</title>
       <author><first>Puneeth</first><last>Kukkadapu</last></author>
-      <author><first>Deepak</first><last>Malladi</last></author>
-      <author><first>Aswarth</first><last>Dara</last></author>
+      <author id="deepak-kumar-malladi"><first>Deepak</first><last>Malladi</last></author>
+      <author id="aswarth-abhilash-dara"><first>Aswarth</first><last>Dara</last></author>
       <pages>179–184</pages>
       <url hash="12490713">W12-5619</url>
       <bibkey>kukkadapu-etal-2012-ensembling</bibkey>
@@ -7947,11 +7947,11 @@
     <meta>
       <booktitle>Proceedings of the Second Workshop on Applying Machine Learning Techniques to Optimise the Division of Labour in Hybrid <fixed-case>MT</fixed-case></booktitle>
       <url hash="7af64bb9">W12-57</url>
-      <editor><first>Josef</first><last>van Genabith</last></editor>
+      <editor id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></editor>
       <editor><first>Toni</first><last>Badia</last></editor>
       <editor><first>Christian</first><last>Federmann</last></editor>
       <editor><first>Maite</first><last>Melero</last></editor>
-      <editor><first>Marta R.</first><last>Costa-jussà</last></editor>
+      <editor id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></editor>
       <editor><first>Tsuyoshi</first><last>Okita</last></editor>
       <publisher>The COLING 2012 Organizing Committee</publisher>
       <address>Mumbai, India</address>
@@ -7966,7 +7966,7 @@
     <paper id="1">
       <title>Hybrid Adaptation of Named Entity Recognition for Statistical Machine Translation</title>
       <author><first>Vassilina</first><last>Nikoulina</last></author>
-      <author><first>Agnes</first><last>Sandor</last></author>
+      <author id="agnes-sandor"><first>Agnes</first><last>Sandor</last></author>
       <author><first>Marc</first><last>Dymetman</last></author>
       <pages>1–16</pages>
       <url hash="abd402ee">W12-5701</url>
@@ -7975,7 +7975,7 @@
     <paper id="2">
       <title>Confusion Network Based System Combination for <fixed-case>C</fixed-case>hinese Translation Output: Word-Level or Character-Level?</title>
       <author><first>Maoxi</first><last>Li</last></author>
-      <author><first>MingWen</first><last>Wang</last></author>
+      <author id="mingwen-wang"><first>MingWen</first><last>Wang</last></author>
       <pages>17–24</pages>
       <url hash="fa7d8a0e">W12-5702</url>
       <bibkey>li-wang-2012-confusion</bibkey>
@@ -7985,7 +7985,7 @@
       <author><first>Kartik</first><last>Asooja</last></author>
       <author><first>Jorge</first><last>Gracia</last></author>
       <author><first>Nitish</first><last>Aggarwal</last></author>
-      <author><first>Asunción Gómez</first><last>Pérez</last></author>
+      <author id="asuncion-gomez-perez"><first>Asunción Gómez</first><last>Pérez</last></author>
       <pages>25–36</pages>
       <url hash="2286b286">W12-5703</url>
       <bibkey>asooja-etal-2012-using</bibkey>
@@ -8012,7 +8012,7 @@
     <paper id="6">
       <title>Sentence-Level Quality Estimation for <fixed-case>MT</fixed-case> System Combination</title>
       <author><first>Tsuyoshi</first><last>Okita</last></author>
-      <author><first>Raphaël</first><last>Rubino</last></author>
+      <author id="raphael-rubino"><first>Raphaël</first><last>Rubino</last></author>
       <author><first>Josef</first><last>van Genabith</last></author>
       <pages>55–64</pages>
       <url hash="f85fafb8">W12-5706</url>
@@ -8074,7 +8074,7 @@
     <paper id="2">
       <title>Automatically Assessing Free Texts</title>
       <author><first>Yllias</first><last>Chali</last></author>
-      <author><first>Sadid A.</first><last>Hasan</last></author>
+      <author id="sadid-a-hasan"><first>Sadid A.</first><last>Hasan</last></author>
       <pages>9–16</pages>
       <url hash="29407a05">W12-5802</url>
       <bibkey>chali-hasan-2012-automatically</bibkey>
@@ -8107,7 +8107,7 @@
       <title>Textbook Construction from Lecture Transcripts</title>
       <author><first>Aliabbas</first><last>Petiwala</last></author>
       <author><first>Kannan</first><last>Moudgalya</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>43–50</pages>
       <url hash="eba0a99b">W12-5806</url>
       <bibkey>petiwala-etal-2012-textbook</bibkey>
@@ -8161,7 +8161,7 @@
       <title>Genre-oriented Readability Assessment: a Case Study</title>
       <author><first>Felice</first><last>Dell’Orletta</last></author>
       <author><first>Giulia</first><last>Venturi</last></author>
-      <author><first>Simonetta</first><last>Montemagni</last></author>
+      <author id="simonetta-montemagni"><first>Simonetta</first><last>Montemagni</last></author>
       <pages>91–98</pages>
       <url hash="aae5e75d">W12-5812</url>
       <bibkey>dellorletta-etal-2012-genre</bibkey>
@@ -8173,7 +8173,7 @@
       <url hash="5b409be7">W12-59</url>
       <editor><first>Karthik</first><last>Visweswariah</last></editor>
       <editor><first>Ananthakrishnan</first><last>Ramanathan</last></editor>
-      <editor><first>Mitesh M.</first><last>Khapra</last></editor>
+      <editor id="mitesh-m-khapra"><first>Mitesh M.</first><last>Khapra</last></editor>
       <publisher>The COLING 2012 Organizing Committee</publisher>
       <address>Mumbai, India</address>
       <month>December</month>
@@ -8205,7 +8205,7 @@
     <paper id="3">
       <title>A Tagging-style Reordering Model for Phrase-based <fixed-case>SMT</fixed-case></title>
       <author><first>Minwei</first><last>Feng</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>17–26</pages>
       <url hash="8368bf78">W12-5903</url>
       <bibkey>feng-ney-2012-tagging</bibkey>
@@ -8222,7 +8222,7 @@
       <title>Learning Improved Reordering Models for <fixed-case>U</fixed-case>rdu, <fixed-case>F</fixed-case>arsi and <fixed-case>I</fixed-case>talian using <fixed-case>SMT</fixed-case></title>
       <author><first>Rohit</first><last>Gupta</last></author>
       <author><first>Raj Nath</first><last>Patel</last></author>
-      <author><first>Ritesh</first><last>Shah</last></author>
+      <author id="ritesh-shah"><first>Ritesh</first><last>Shah</last></author>
       <pages>37–46</pages>
       <url hash="b81bb086">W12-5905</url>
       <bibkey>gupta-etal-2012-learning</bibkey>
@@ -8230,7 +8230,7 @@
     <paper id="6">
       <title>Partially modelling word reordering as a sequence labelling problem</title>
       <author><first>Anoop</first><last>Kunchukuttan</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>47–54</pages>
       <url hash="c7b30911">W12-5906</url>
       <bibkey>kunchukuttan-bhattacharyya-2012-partially</bibkey>
@@ -8240,8 +8240,8 @@
     <meta>
       <booktitle>Proceedings of the Workshop on Question Answering for Complex Domains</booktitle>
       <url hash="5032b27a">W12-60</url>
-      <editor><first>Nanda</first><last>Kambhatla</last></editor>
-      <editor><first>Sachindra</first><last>Joshi</last></editor>
+      <editor id="nanda-kambhatla"><first>Nanda</first><last>Kambhatla</last></editor>
+      <editor id="sachindra-joshi"><first>Sachindra</first><last>Joshi</last></editor>
       <editor><first>Ganesh</first><last>Ramakrishnan</last></editor>
       <editor><first>Kiran</first><last>Kate</last></editor>
       <editor><first>Priyanka</first><last>Agrawal</last></editor>
@@ -8258,7 +8258,7 @@
     <paper id="1">
       <title>Simple or Complex? Classifying Questions by Answering Complexity</title>
       <author><first>Yllias</first><last>Chali</last></author>
-      <author><first>Sadid A.</first><last>Hasan</last></author>
+      <author id="sadid-a-hasan"><first>Sadid A.</first><last>Hasan</last></author>
       <pages>1–10</pages>
       <url hash="1cec2502">W12-6001</url>
       <bibkey>chali-hasan-2012-simple</bibkey>
@@ -8266,7 +8266,7 @@
     <paper id="2">
       <title>Question Classification and Answering from Procedural Text in <fixed-case>E</fixed-case>nglish</title>
       <author><first>Somnath</first><last>Banerjee</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>11–26</pages>
       <url hash="f593669f">W12-6002</url>
       <bibkey>banerjee-bandyopadhyay-2012-question</bibkey>
@@ -8298,7 +8298,7 @@
     </paper>
     <paper id="6">
       <title><fixed-case>W</fixed-case>iki<fixed-case>T</fixed-case>alk: A Spoken <fixed-case>W</fixed-case>ikipedia-based Open-Domain Knowledge Access System</title>
-      <author><first>Graham</first><last>Wilcock</last></author>
+      <author id="graham-wilcock"><first>Graham</first><last>Wilcock</last></author>
       <pages>57–70</pages>
       <url hash="dd63ad4b">W12-6006</url>
       <bibkey>wilcock-2012-wikitalk</bibkey>
@@ -8308,11 +8308,11 @@
     <meta>
       <booktitle>Proceedings of the First International Workshop on Optimization Techniques for Human Language Technology</booktitle>
       <url hash="f3eee9c3">W12-61</url>
-      <editor><first>Pushpak</first><last>Bhattacharyya</last></editor>
+      <editor id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></editor>
       <editor><first>Asif</first><last>Ekbal</last></editor>
       <editor><first>Sriparna</first><last>Saha</last></editor>
       <editor><first>Mark</first><last>Johnson</last></editor>
-      <editor><first>Diego</first><last>Molla-Aliod</last></editor>
+      <editor id="diego-molla"><first>Diego</first><last>Molla-Aliod</last></editor>
       <editor><first>Mark</first><last>Dras</last></editor>
       <publisher>The COLING 2012 Organizing Committee</publisher>
       <address>Mumbai, India</address>
@@ -8328,16 +8328,16 @@
       <title><fixed-case>B</fixed-case>io<fixed-case>POS</fixed-case>: Biologically Inspired Algorithms for <fixed-case>POS</fixed-case> Tagging</title>
       <author><first>Ana Paula</first><last>Silva</last></author>
       <author><first>Arlindo</first><last>Silva</last></author>
-      <author><first>Irene</first><last>Rodrigues</last></author>
+      <author id="irene-rodrigues"><first>Irene</first><last>Rodrigues</last></author>
       <pages>1–16</pages>
       <url hash="d50ed65a">W12-6101</url>
       <bibkey>silva-etal-2012-biopos</bibkey>
     </paper>
     <paper id="2">
       <title>Optimization for Efficient Determination of Chunk in Automatic Evaluation for Machine Translation</title>
-      <author><first>Hiroshi</first><last>Echizen’ya</last></author>
-      <author><first>Kenji</first><last>Araki</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="hiroshi-echizen-ya"><first>Hiroshi</first><last>Echizen’ya</last></author>
+      <author id="kenji-araki"><first>Kenji</first><last>Araki</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>17–30</pages>
       <url hash="ffcf8d81">W12-6102</url>
       <bibkey>echizenya-etal-2012-optimization</bibkey>
@@ -8355,7 +8355,7 @@
       <title>Selection of Discriminative Features for Translation Texts</title>
       <author><first>Kuo-Ming</first><last>Tang</last></author>
       <author><first>Chien-Kang</first><last>Huang</last></author>
-      <author><first>Chia-Ming</first><last>Lee</last></author>
+      <author id="chia-ming-lee"><first>Chia-Ming</first><last>Lee</last></author>
       <pages>49–64</pages>
       <url hash="c872edda">W12-6104</url>
       <bibkey>tang-etal-2012-selection</bibkey>
@@ -8379,7 +8379,7 @@
     </paper>
     <paper id="7">
       <title>Iterative <fixed-case>C</fixed-case>hinese Bi-gram Term Extraction Using Machine-learning Classification Approach</title>
-      <author><first>Chia-Ming</first><last>Lee</last></author>
+      <author id="chia-ming-lee"><first>Chia-Ming</first><last>Lee</last></author>
       <author><first>Chien-Kang</first><last>Huang</last></author>
       <author><first>Kuo-Ming</first><last>Tang</last></author>
       <pages>95–108</pages>
@@ -8389,9 +8389,9 @@
     <paper id="8">
       <title>Parameter estimation under uncertainty with Simulated Annealing applied to an ant colony based probabilistic <fixed-case>WSD</fixed-case> algorithm</title>
       <author><first>Andon</first><last>Tchechmedjiev</last></author>
-      <author><first>Jérôme</first><last>Goulian</last></author>
+      <author id="jerome-goulian"><first>Jérôme</first><last>Goulian</last></author>
       <author><first>Didier</first><last>Schwab</last></author>
-      <author><first>Gilles</first><last>Sérasset</last></author>
+      <author id="gilles-serasset"><first>Gilles</first><last>Sérasset</last></author>
       <pages>109–124</pages>
       <url hash="45f13cac">W12-6108</url>
       <bibkey>tchechmedjiev-etal-2012-parameter</bibkey>
@@ -8401,7 +8401,7 @@
     <meta>
       <booktitle>Proceedings of the 10th International Workshop on Finite State Methods and Natural Language Processing</booktitle>
       <url hash="931c75fc">W12-62</url>
-      <editor><first>Iñaki</first><last>Alegria</last></editor>
+      <editor id="inaki-alegria"><first>Iñaki</first><last>Alegria</last></editor>
       <editor><first>Mans</first><last>Hulden</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Donostia–San Sebastián</address>
@@ -8415,7 +8415,7 @@
     </frontmatter>
     <paper id="1">
       <title>Effect of Language and Error Models on Efficiency of Finite-State Spell-Checking and Correction</title>
-      <author><first>Tommi A</first><last>Pirinen</last></author>
+      <author id="tommi-a-pirinen"><first>Tommi A</first><last>Pirinen</last></author>
       <author><first>Sam</first><last>Hardwick</last></author>
       <pages>1–9</pages>
       <url hash="5fbcb11d">W12-6201</url>
@@ -8446,7 +8446,7 @@
     </paper>
     <paper id="5">
       <title>Integrating Aspectually Relevant Properties of Verbs into a Morphological Analyzer for <fixed-case>E</fixed-case>nglish</title>
-      <author><first>Katina</first><last>Bontcheva</last></author>
+      <author id="katina-bontcheva"><first>Katina</first><last>Bontcheva</last></author>
       <pages>30–34</pages>
       <url hash="7c0a6362">W12-6205</url>
       <bibkey>bontcheva-2012-integrating</bibkey>
@@ -8472,7 +8472,7 @@
     <paper id="8">
       <title><fixed-case>WFST</fixed-case>-Based Grapheme-to-Phoneme Conversion: Open Source tools for Alignment, Model-Building and Decoding</title>
       <author><first>Josef R.</first><last>Novak</last></author>
-      <author><first>Nobuaki</first><last>Minematsu</last></author>
+      <author id="nobuaki-minematsu"><first>Nobuaki</first><last>Minematsu</last></author>
       <author><first>Keikichi</first><last>Hirose</last></author>
       <pages>45–49</pages>
       <url hash="fe18edab">W12-6208</url>
@@ -8488,7 +8488,7 @@
     <paper id="10">
       <title>Implementation of Replace Rules Using Preference Operator</title>
       <author><first>Senka</first><last>Drobac</last></author>
-      <author><first>Miikka</first><last>Silfverberg</last></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last></author>
       <author><first>Anssi</first><last>Yli-Jyrä</last></author>
       <pages>55–59</pages>
       <url hash="a46582d8">W12-6210</url>
@@ -8507,9 +8507,9 @@
     </paper>
     <paper id="12">
       <title>Developing an Open-Source <fixed-case>FST</fixed-case> Grammar for Verb Chain Transfer in a <fixed-case>S</fixed-case>panish-<fixed-case>B</fixed-case>asque <fixed-case>MT</fixed-case> System</title>
-      <author><first>Aingeru</first><last>Mayor</last></author>
+      <author id="aingeru-mayor"><first>Aingeru</first><last>Mayor</last></author>
       <author><first>Mans</first><last>Hulden</last></author>
-      <author><first>Gorka</first><last>Labaka</last></author>
+      <author id="gorka-labaka"><first>Gorka</first><last>Labaka</last></author>
       <pages>65–69</pages>
       <url hash="f21b4bfa">W12-6212</url>
       <bibkey>mayor-etal-2012-developing</bibkey>
@@ -8526,8 +8526,8 @@
       <title>A Methodology for Obtaining Concept Graphs from Word Graphs</title>
       <author><first>Marcos</first><last>Calvo</last></author>
       <author><first>Jon Ander</first><last>Gómez</last></author>
-      <author><first>Lluís-F.</first><last>Hurtado</last></author>
-      <author><first>Emilio</first><last>Sanchis</last></author>
+      <author id="lluis-f-hurtado"><first>Lluís-F.</first><last>Hurtado</last></author>
+      <author id="emilio-sanchis"><first>Emilio</first><last>Sanchis</last></author>
       <pages>75–79</pages>
       <url hash="f722f749">W12-6214</url>
       <bibkey>calvo-etal-2012-methodology</bibkey>
@@ -8549,8 +8549,8 @@
     <paper id="17">
       <title>Finite-State Acoustic and Translation Model Composition in Statistical Speech Translation: Empirical Assessment</title>
       <author><first>Alicia</first><last>Pérez</last></author>
-      <author><first>M. Inés</first><last>Torres</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="m-ines-torres"><first>M. Inés</first><last>Torres</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <pages>99–107</pages>
       <url hash="f2eee723">W12-6217</url>
       <bibkey>perez-etal-2012-finite</bibkey>
@@ -8597,14 +8597,14 @@
     </paper>
     <paper id="2">
       <title>Linguistic foundation for <fixed-case>NLP</fixed-case></title>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>2</pages>
       <url hash="e7420301">W12-6302</url>
       <bibkey>zhou-2012-linguistic</bibkey>
     </paper>
     <paper id="3">
       <title>A Language Modeling Approach to Identifying Code-Switched Sentences and Words</title>
-      <author><first>Liang-Chih</first><last>Yu</last></author>
+      <author id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last></author>
       <author><first>Wei-Cheng</first><last>He</last></author>
       <author><first>Wei-Nan</first><last>Chien</last></author>
       <pages>3–8</pages>
@@ -8614,7 +8614,7 @@
     <paper id="4">
       <title>Semi-automatic Annotation of <fixed-case>C</fixed-case>hinese Word Structure</title>
       <author><first>Jianqiang</first><last>Ma</last></author>
-      <author><first>Chunyu</first><last>Kit</last></author>
+      <author id="chunyu-kit"><first>Chunyu</first><last>Kit</last></author>
       <author><first>Dale</first><last>Gerdemann</last></author>
       <pages>9–17</pages>
       <url hash="f1e747d8">W12-6304</url>
@@ -8694,8 +8694,8 @@
       <author><first>Shujian</first><last>Huang</last></author>
       <author><first>Yinggong</first><last>Zhao</last></author>
       <author><first>Hao</first><last>Zhou</last></author>
-      <author><first>Xinyu</first><last>Dai</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
+      <author id="xinyu-dai"><first>Xinyu</first><last>Dai</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
       <pages>63–68</pages>
       <url hash="c7df938d">W12-6312</url>
       <bibkey>xi-etal-2012-adapting</bibkey>
@@ -8713,7 +8713,7 @@
     <paper id="14">
       <title>Rules-based <fixed-case>C</fixed-case>hinese Word Segmentation on <fixed-case>M</fixed-case>icro<fixed-case>B</fixed-case>log for <fixed-case>CIPS</fixed-case>-<fixed-case>SIGHAN</fixed-case> on <fixed-case>CLP</fixed-case>2012</title>
       <author><first>Jing</first><last>Zhang</last></author>
-      <author><first>Degen</first><last>Huang</last></author>
+      <author id="degen-huang"><first>Degen</first><last>Huang</last></author>
       <author><first>Xia</first><last>Han</last></author>
       <author><first>Wei</first><last>Wang</last></author>
       <pages>74–78</pages>
@@ -8753,7 +8753,7 @@
     <paper id="18">
       <title>A Comparison of <fixed-case>C</fixed-case>hinese Word Segmentation on News and Microblog Corpora with a Lexicon Based Method</title>
       <author><first>Yuxiang</first><last>Jia</last></author>
-      <author><first>Hongying</first><last>Zan</last></author>
+      <author id="hongying-zan"><first>Hongying</first><last>Zan</last></author>
       <author><first>Ming</first><last>Fan</last></author>
       <author><first>Zhimin</first><last>Wang</last></author>
       <pages>95–98</pages>
@@ -8830,7 +8830,7 @@
       <title>Explore <fixed-case>C</fixed-case>hinese Encyclopedic Knowledge to Disambiguate Person Names</title>
       <author><first>Jie</first><last>Liu</last></author>
       <author><first>Ruifeng</first><last>Xu</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <author><first>Jian</first><last>Xu</last></author>
       <pages>138–145</pages>
       <url hash="e71beb26">W12-6326</url>
@@ -8849,7 +8849,7 @@
     <paper id="28">
       <title><fixed-case>C</fixed-case>hinese Personal Name Disambiguation Based on Vector Space Model</title>
       <author><first>Qing-hu</first><last>Fan</last></author>
-      <author><first>Hong-ying</first><last>Zan</last></author>
+      <author id="hongying-zan"><first>Hong-ying</first><last>Zan</last></author>
       <author><first>Yu-mei</first><last>Chai</last></author>
       <author><first>Yu-xiang</first><last>Jia</last></author>
       <author><first>Gui-ling</first><last>Niu</last></author>
@@ -8908,9 +8908,9 @@
     </paper>
     <paper id="35">
       <title>Traditional <fixed-case>C</fixed-case>hinese Parsing Evaluation at <fixed-case>SIGHAN</fixed-case> Bake-offs 2012</title>
-      <author><first>Yuen-Hsien</first><last>Tseng</last></author>
+      <author id="yuen-hsien-tseng"><first>Yuen-Hsien</first><last>Tseng</last></author>
       <author><first>Lung-Hao</first><last>Lee</last></author>
-      <author><first>Liang-Chih</first><last>Yu</last></author>
+      <author id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last></author>
       <pages>199–205</pages>
       <url hash="0c6ff10e">W12-6335</url>
       <bibkey>tseng-etal-2012-traditional</bibkey>
@@ -8939,8 +8939,8 @@
       <title>Improving <fixed-case>PCFG</fixed-case> <fixed-case>C</fixed-case>hinese Parsing with Context-Dependent Probability Re-estimation</title>
       <author><first>Yu-Ming</first><last>Hsieh</last></author>
       <author><first>Ming-Hong</first><last>Bai</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <pages>216–221</pages>
       <url hash="40e1e8a5">W12-6338</url>
       <bibkey>hsieh-etal-2012-improving</bibkey>
@@ -8956,7 +8956,7 @@
     </paper>
     <paper id="40">
       <title>A Conditional Random Field-based Traditional <fixed-case>C</fixed-case>hinese Base Phrase Parser for <fixed-case>SIGHAN</fixed-case> Bake-off 2012 Evaluation</title>
-      <author><first>Yih-Ru</first><last>Wang</last></author>
+      <author id="yih-ru-wang"><first>Yih-Ru</first><last>Wang</last></author>
       <author><first>Yuan-Fu</first><last>Liao</last></author>
       <pages>231–236</pages>
       <url hash="931af113">W12-6340</url>
diff --git a/data/xml/W13.xml b/data/xml/W13.xml
index b3a11bbd51..ccd8c0527f 100644
--- a/data/xml/W13.xml
+++ b/data/xml/W13.xml
@@ -33,11 +33,11 @@
     </paper>
     <paper id="3">
       <title>Automatically Deriving Event Ontologies for a <fixed-case>C</fixed-case>ommon<fixed-case>S</fixed-case>ense Knowledge Base</title>
-      <author><first>James</first><last>Allen</last></author>
-      <author><first>Will</first><last>de Beaumont</last></author>
+      <author id="james-allen"><first>James</first><last>Allen</last></author>
+      <author id="william-de-beaumont"><first>Will</first><last>de Beaumont</last></author>
       <author><first>Lucian</first><last>Galescu</last></author>
       <author><first>Jansen</first><last>Orfan</last></author>
-      <author><first>Mary</first><last>Swift</last></author>
+      <author id="mary-swift"><first>Mary</first><last>Swift</last></author>
       <author><first>Choh Man</first><last>Teng</last></author>
       <pages>23–34</pages>
       <url hash="e3e69cdf">W13-0103</url>
@@ -45,8 +45,8 @@
     </paper>
     <paper id="4">
       <title>Intensionality was only alleged: On adjective-noun composition in distributional semantics</title>
-      <author><first>Gemma</first><last>Boleda</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="gemma-boleda"><first>Gemma</first><last>Boleda</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <author><first>The Nghia</first><last>Pham</last></author>
       <author><first>Louise</first><last>McNally</last></author>
       <pages>35–46</pages>
@@ -56,10 +56,10 @@
     <paper id="5">
       <title>Sentiment Composition Using a Parabolic Model</title>
       <author><first>Baptiste</first><last>Chardon</last></author>
-      <author><first>Farah</first><last>Benamara</last></author>
-      <author><first>Yannick</first><last>Mathieu</last></author>
+      <author id="farah-benamara"><first>Farah</first><last>Benamara</last></author>
+      <author id="yannick-mathieu"><first>Yannick</first><last>Mathieu</last></author>
       <author><first>Vladimir</first><last>Popescu</last></author>
-      <author><first>Nicholas</first><last>Asher</last></author>
+      <author id="nicholas-asher"><first>Nicholas</first><last>Asher</last></author>
       <pages>47–58</pages>
       <url hash="2c03dd11">W13-0105</url>
       <bibkey>chardon-etal-2013-sentiment</bibkey>
@@ -67,15 +67,15 @@
     <paper id="6">
       <title>Temporal Relation Classification Based on Temporal Reasoning</title>
       <author><first>Francisco</first><last>Costa</last></author>
-      <author><first>António</first><last>Branco</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
       <pages>59–70</pages>
       <url hash="1139abfd">W13-0106</url>
       <bibkey>costa-branco-2013-temporal</bibkey>
     </paper>
     <paper id="7">
       <title>Empirical Validation of Reichenbach’s Tense Framework</title>
-      <author><first>Leon</first><last>Derczynski</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <pages>71–82</pages>
       <url hash="f6db2d12">W13-0107</url>
       <bibkey>derczynski-gaizauskas-2013-empirical</bibkey>
@@ -106,7 +106,7 @@
     </paper>
     <paper id="11">
       <title>Towards Weakly Supervised Resolution of Null Instantiations</title>
-      <author><first>Philip</first><last>Gorinski</last></author>
+      <author id="philip-gorinski"><first>Philip</first><last>Gorinski</last></author>
       <author><first>Josef</first><last>Ruppenhofer</last></author>
       <author><first>Caroline</first><last>Sporleder</last></author>
       <pages>119–130</pages>
@@ -126,8 +126,8 @@
     </paper>
     <paper id="13">
       <title>Domain Adaptable Semantic Clustering in Statistical <fixed-case>NLG</fixed-case></title>
-      <author><first>Blake</first><last>Howald</last></author>
-      <author><first>Ravikumar</first><last>Kondadadi</last></author>
+      <author id="blake-howald"><first>Blake</first><last>Howald</last></author>
+      <author id="ravikumar-kondadadi"><first>Ravikumar</first><last>Kondadadi</last></author>
       <author><first>Frank</first><last>Schilder</last></author>
       <pages>143–154</pages>
       <url hash="216435e2">W13-0113</url>
@@ -136,14 +136,14 @@
     <paper id="14">
       <title>Sources of Evidence for Implicit Argument Resolution</title>
       <author><first>Egoitz</first><last>Laparra</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <pages>155–166</pages>
       <url hash="76af0fd8">W13-0114</url>
       <bibkey>laparra-rigau-2013-sources</bibkey>
     </paper>
     <paper id="15">
       <title>Recognising Sets and Their Elements: Tree Kernels for Entity Instantiation Identification</title>
-      <author><first>Andrew</first><last>McKinlay</last></author>
+      <author id="andrew-mackinlay"><first>Andrew</first><last>McKinlay</last></author>
       <author><first>Katja</first><last>Markert</last></author>
       <pages>167–178</pages>
       <url hash="bda5b027">W13-0115</url>
@@ -152,7 +152,7 @@
     <paper id="16">
       <title>A corpus study of clause combination</title>
       <author><first>Olga</first><last>Nikitina</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <pages>179–190</pages>
       <url hash="9ae4d64a">W13-0116</url>
       <bibkey>nikitina-pado-2013-corpus</bibkey>
@@ -167,7 +167,7 @@
     <paper id="18">
       <title>The Impact of Selectional Preference Agreement on Semantic Relational Similarity</title>
       <author><first>Bryan</first><last>Rink</last></author>
-      <author><first>Sanda</first><last>Harabagiu</last></author>
+      <author id="sanda-harabagiu"><first>Sanda</first><last>Harabagiu</last></author>
       <pages>204–215</pages>
       <url hash="aa4f4dba">W13-0118</url>
       <bibkey>rink-harabagiu-2013-impact</bibkey>
@@ -175,8 +175,8 @@
     <paper id="19">
       <title>Recognizing Spatial Containment Relations between Event Mentions</title>
       <author><first>Kirk</first><last>Roberts</last></author>
-      <author><first>Michael A.</first><last>Skinner</last></author>
-      <author><first>Sanda M.</first><last>Harabagiu</last></author>
+      <author id="michael-skinner"><first>Michael A.</first><last>Skinner</last></author>
+      <author id="sanda-harabagiu"><first>Sanda M.</first><last>Harabagiu</last></author>
       <pages>216–227</pages>
       <url hash="34b17184">W13-0119</url>
       <bibkey>roberts-etal-2013-recognizing</bibkey>
@@ -185,7 +185,7 @@
       <title>Regular Meaning Shifts in <fixed-case>G</fixed-case>erman Particle Verbs: A Case Study</title>
       <author><first>Sylvia</first><last>Springorum</last></author>
       <author><first>Jason</first><last>Utt</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>228–239</pages>
       <url hash="f8d0eed0">W13-0120</url>
       <bibkey>springorum-etal-2013-regular</bibkey>
@@ -204,7 +204,7 @@
     </paper>
     <paper id="22">
       <title>Parsimonious Semantic Representations with Projection Pointers</title>
-      <author><first>Noortje J.</first><last>Venhuizen</last></author>
+      <author id="noortje-venhuizen"><first>Noortje J.</first><last>Venhuizen</last></author>
       <author><first>Johan</first><last>Bos</last></author>
       <author><first>Harm</first><last>Brouwer</last></author>
       <pages>252–263</pages>
@@ -220,15 +220,15 @@
     </paper>
     <paper id="24">
       <title>What excludes an Alternative in Coherence Relations?</title>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <pages>276–287</pages>
       <url hash="5b51fb06">W13-0124</url>
       <bibkey>webber-2013-excludes</bibkey>
     </paper>
     <paper id="25">
       <title>A Search Task Dataset for <fixed-case>G</fixed-case>erman Textual Entailment</title>
-      <author><first>Britta D.</first><last>Zeller</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="britta-zeller"><first>Britta D.</first><last>Zeller</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <pages>288–299</pages>
       <url hash="1f86b8f7">W13-0125</url>
       <bibkey>zeller-pado-2013-search</bibkey>
@@ -258,7 +258,7 @@
     </paper>
     <paper id="2">
       <title>The semantic annotation of quantification</title>
-      <author><first>Harry</first><last>Bunt</last></author>
+      <author id="harry-bunt"><first>Harry</first><last>Bunt</last></author>
       <pages>307–313</pages>
       <url hash="4b02d0b8">W13-0202</url>
       <bibkey>bunt-2013-semantic</bibkey>
@@ -273,7 +273,7 @@
     </paper>
     <paper id="4">
       <title>What is in a text, what isn’t, and what this has to do with lexical semantics</title>
-      <author><first>Aurelie</first><last>Herbelot</last></author>
+      <author id="aurelie-herbelot"><first>Aurelie</first><last>Herbelot</last></author>
       <pages>321–327</pages>
       <url hash="14e5c8ca">W13-0204</url>
       <bibkey>herbelot-2013-text</bibkey>
@@ -283,7 +283,7 @@
       <author><first>Elias</first><last>Iosif</last></author>
       <author><first>Alexandros</first><last>Potamianos</last></author>
       <author><first>Maria</first><last>Giannoudaki</last></author>
-      <author><first>Kalliopi</first><last>Zervanou</last></author>
+      <author id="kalliopi-zervanou"><first>Kalliopi</first><last>Zervanou</last></author>
       <pages>328–334</pages>
       <url hash="6356c637">W13-0205</url>
       <bibkey>iosif-etal-2013-semantic</bibkey>
@@ -340,13 +340,13 @@
     </paper>
     <paper id="12">
       <title>A Pilot Experiment in Knowledge Authoring as Dialogue</title>
-      <author><first>Artemis</first><last>Parvizi</last></author>
+      <author id="artemis-parvizi"><first>Artemis</first><last>Parvizi</last></author>
       <author><first>Caroline</first><last>Jay</last></author>
       <author><first>Christopher</first><last>Mellish</last></author>
       <author><first>Jeff Z.</first><last>Pan</last></author>
       <author><first>Yuan</first><last>Ren</last></author>
       <author><first>Robert</first><last>Stevens</last></author>
-      <author><first>Kees</first><last>van Deemter</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
       <pages>376–382</pages>
       <url hash="43fee60b">W13-0212</url>
       <bibkey>parvizi-etal-2013-pilot</bibkey>
@@ -370,7 +370,7 @@
     </paper>
     <paper id="15">
       <title>Gamification for Word Sense Labeling</title>
-      <author><first>Noortje J.</first><last>Venhuizen</last></author>
+      <author id="noortje-venhuizen"><first>Noortje J.</first><last>Venhuizen</last></author>
       <author><first>Valerio</first><last>Basile</last></author>
       <author><first>Kilian</first><last>Evang</last></author>
       <author><first>Johan</first><last>Bos</last></author>
@@ -382,7 +382,7 @@
       <title>Fitting, Not Clashing! A Distributional Semantic Model of Logical Metonymy</title>
       <author><first>Alessandra</first><last>Zarcone</last></author>
       <author><first>Alessandro</first><last>Lenci</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <author><first>Jason</first><last>Utt</last></author>
       <pages>404–410</pages>
       <url hash="687ab02a">W13-0216</url>
@@ -395,7 +395,7 @@
       <url hash="14949f79">W13-03</url>
       <editor><first>Paul</first><last>Portner</last></editor>
       <editor><first>Aynat</first><last>Rubinstein</last></editor>
-      <editor><first>Graham</first><last>Katz</last></editor>
+      <editor id="graham-katz"><first>Graham</first><last>Katz</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Potsdam, Germany</address>
       <month>March</month>
@@ -407,7 +407,7 @@
     </frontmatter>
     <paper id="1">
       <title>Challenges in modality annotation in a <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese Spontaneous Speech Corpus</title>
-      <author><first>Luciana Beatriz</first><last>Avila</last></author>
+      <author id="luciana-beatriz-avila"><first>Luciana Beatriz</first><last>Avila</last></author>
       <author><first>Heliana</first><last>Mello</last></author>
       <pages>1–6</pages>
       <url hash="43e7a0af">W13-0301</url>
@@ -449,7 +449,7 @@
       <author><first>Aynat</first><last>Rubinstein</last></author>
       <author><first>Hillary</first><last>Harner</last></author>
       <author><first>Elizabeth</first><last>Krawczyk</last></author>
-      <author><first>Daniel</first><last>Simonson</last></author>
+      <author id="dan-simonson"><first>Daniel</first><last>Simonson</last></author>
       <author><first>Graham</first><last>Katz</last></author>
       <author><first>Paul</first><last>Portner</last></author>
       <pages>38–46</pages>
@@ -472,7 +472,7 @@
       <url hash="3af40f7a">W13-04</url>
       <editor><first>Stephen</first><last>Wu</last></editor>
       <editor><first>Nigam</first><last>Shah</last></editor>
-      <editor><first>Kevin Bretonnel</first><last>Cohen</last></editor>
+      <editor id="k-bretonnel-cohen"><first>Kevin Bretonnel</first><last>Cohen</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Potsdam, Germany</address>
       <month>March</month>
@@ -484,7 +484,7 @@
     </frontmatter>
     <paper id="1">
       <title>A Framework to Generate Sets of Terms from Large Scale Medical Vocabularies for Natural Language Processing</title>
-      <author><first>Salah</first><last>Aït-Mokhtar</last></author>
+      <author id="salah-ait-mokhtar"><first>Salah</first><last>Aït-Mokhtar</last></author>
       <author><first>Caroline</first><last>Hagège</last></author>
       <author><first>Pajolma</first><last>Rupi</last></author>
       <pages>1–6</pages>
@@ -510,17 +510,17 @@
     <paper id="4">
       <title>Evaluating the Use of Empirically Constructed Lexical Resources for Named Entity Recognition</title>
       <author><first>Siddhartha</first><last>Jonnalagadda</last></author>
-      <author><first>Trevor</first><last>Cohen</last></author>
+      <author id="trevor-cohen"><first>Trevor</first><last>Cohen</last></author>
       <author><first>Stephen</first><last>Wu</last></author>
       <author><first>Hongfang</first><last>Liu</last></author>
-      <author><first>Graciela</first><last>Gonzalez</last></author>
+      <author id="graciela-gonzalez"><first>Graciela</first><last>Gonzalez</last></author>
       <pages>23–33</pages>
       <url hash="4af10481">W13-0404</url>
       <bibkey>jonnalagadda-etal-2013-evaluating</bibkey>
     </paper>
     <paper id="5">
       <title>Towards Converting Clinical Phrases into <fixed-case>SNOMED</fixed-case> <fixed-case>CT</fixed-case> Expressions</title>
-      <author><first>Rohit J.</first><last>Kate</last></author>
+      <author id="rohit-kate"><first>Rohit J.</first><last>Kate</last></author>
       <pages>34–43</pages>
       <url hash="c7d19e13">W13-0405</url>
       <bibkey>kate-2013-towards</bibkey>
@@ -534,7 +534,7 @@
       <author><first>Siddhartha</first><last>Jonnalagadda</last></author>
       <author><first>Kavishwar</first><last>Wagholikar</last></author>
       <author><first>Stephen</first><last>Wu</last></author>
-      <author><first>Christopher</first><last>Chute</last></author>
+      <author id="christopher-chute"><first>Christopher</first><last>Chute</last></author>
       <author><first>Hongfang</first><last>Liu</last></author>
       <pages>44–53</pages>
       <url hash="b70f1fae">W13-0406</url>
@@ -544,7 +544,7 @@
       <title>The <fixed-case>VERICLIG</fixed-case> Project: Extraction of Computer Interpretable Guidelines via Syntactic and Semantic Annotation</title>
       <author><first>Camilo</first><last>Thorne</last></author>
       <author><first>Marco</first><last>Montali</last></author>
-      <author><first>Diego</first><last>Calvanese</last></author>
+      <author id="diego-calvanese"><first>Diego</first><last>Calvanese</last></author>
       <author><first>Elena</first><last>Cardillo</last></author>
       <author><first>Claudio</first><last>Eccher</last></author>
       <pages>54–58</pages>
@@ -556,7 +556,7 @@
     <meta>
       <booktitle>Proceedings of the 9th Joint <fixed-case>ISO</fixed-case> - <fixed-case>ACL</fixed-case> <fixed-case>SIGSEM</fixed-case> Workshop on Interoperable Semantic Annotation</booktitle>
       <url hash="0ab8eac2">W13-05</url>
-      <editor><first>Harry</first><last>Bunt</last></editor>
+      <editor id="harry-bunt"><first>Harry</first><last>Bunt</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Potsdam, Germany</address>
       <month>March</month>
@@ -571,7 +571,7 @@
     <paper id="1">
       <title>Cross-linguistic annotation of modality: a data-driven hierarchical model</title>
       <author><first>Malvina</first><last>Nissim</last></author>
-      <author><first>Paola</first><last>Pietrandrea</last></author>
+      <author id="paola-pietrandrea"><first>Paola</first><last>Pietrandrea</last></author>
       <author><first>Andrea</first><last>Sansò</last></author>
       <author><first>Caterina</first><last>Mauri</last></author>
       <pages>7–14</pages>
@@ -587,7 +587,7 @@
     </paper>
     <paper id="3">
       <title>Capturing Motion in <fixed-case>ISO</fixed-case>-<fixed-case>S</fixed-case>pace<fixed-case>B</fixed-case>ank</title>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <author><first>Zachary</first><last>Yocum</last></author>
       <pages>25–34</pages>
       <url hash="42ccfaa8">W13-0503</url>
@@ -611,7 +611,7 @@
     <paper id="6">
       <title>More Than Only Noun-Noun Compounds: Towards an Annotation Scheme for the Semantic Modelling of Other Noun Compound Types</title>
       <author><first>Ben</first><last>Verhoeven</last></author>
-      <author><first>Gerhard B.</first><last>van Huyssteen</last></author>
+      <author id="gerhard-b-van-huyssteen"><first>Gerhard B.</first><last>van Huyssteen</last></author>
       <pages>59–66</pages>
       <url hash="033f1c28">W13-0506</url>
       <bibkey>verhoeven-van-huyssteen-2013-noun</bibkey>
@@ -619,7 +619,7 @@
     <paper id="7">
       <title>Issues in the addition of <fixed-case>ISO</fixed-case> standard annotations to the Switchboard corpus</title>
       <author><first>Harry</first><last>Bunt</last></author>
-      <author><first>Alex C.</first><last>Fang</last></author>
+      <author id="alex-chengyu-fang"><first>Alex C.</first><last>Fang</last></author>
       <author><first>Xiaoyue</first><last>Liu</last></author>
       <author><first>Jing</first><last>Cao</last></author>
       <author><first>Volha</first><last>Petukhova</last></author>
@@ -638,7 +638,7 @@
     </paper>
     <paper id="9">
       <title>Inference Patterns with Intensional Adjectives</title>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <pages>85–89</pages>
       <url hash="dac8c11e">W13-0509</url>
       <bibkey>pustejovsky-2013-inference</bibkey>
@@ -648,9 +648,9 @@
     <meta>
       <booktitle>Proceedings of the <fixed-case>IWCS</fixed-case> 2013 Workshop Towards a Formal Distributional Semantics</booktitle>
       <url hash="cc98c4b3">W13-06</url>
-      <editor><first>Aurelie</first><last>Herbelot</last></editor>
+      <editor id="aurelie-herbelot"><first>Aurelie</first><last>Herbelot</last></editor>
       <editor><first>Roberto</first><last>Zamparelli</last></editor>
-      <editor><first>Gemma</first><last>Boleda</last></editor>
+      <editor id="gemma-boleda"><first>Gemma</first><last>Boleda</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Potsdam, Germany</address>
       <month>March</month>
@@ -663,7 +663,7 @@
     <paper id="1">
       <title>Semantic transparency: challenges for distributional semantics</title>
       <author><first>Melanie J.</first><last>Bell</last></author>
-      <author><first>Martin</first><last>Schäfer</last></author>
+      <author id="martin-schaler"><first>Martin</first><last>Schäfer</last></author>
       <pages>1–10</pages>
       <url hash="a7879abc">W13-0601</url>
       <bibkey>bell-schafer-2013-semantic</bibkey>
@@ -677,10 +677,10 @@
     </paper>
     <paper id="3">
       <title>Sentence paraphrase detection: When determiners and word order make the difference</title>
-      <author><first>Nghia</first><last>Pham</last></author>
-      <author><first>Raffaella</first><last>Bernardi</last></author>
-      <author><first>Yao Zhong</first><last>Zhang</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="nghia-the-pham"><first>Nghia</first><last>Pham</last></author>
+      <author id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last></author>
+      <author id="yao-zhong-zhang"><first>Yao Zhong</first><last>Zhang</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <pages>21–29</pages>
       <url hash="bb9299d5">W13-0603</url>
       <bibkey>pham-etal-2013-sentence</bibkey>
@@ -689,7 +689,7 @@
       <title>The Curious Case of Metonymic Verbs: A Distributional Characterization</title>
       <author><first>Jason</first><last>Utt</last></author>
       <author><first>Alessandro</first><last>Lenci</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <author><first>Alessandra</first><last>Zarcone</last></author>
       <pages>30–39</pages>
       <url hash="a5b89760">W13-0604</url>
@@ -700,7 +700,7 @@
     <meta>
       <booktitle>Proceedings of the <fixed-case>IWCS</fixed-case> 2013 Workshop on Computational Models of Spatial Language Interpretation and Generation (<fixed-case>C</fixed-case>o<fixed-case>SLI</fixed-case>-3)</booktitle>
       <url hash="ecd19dca">W13-07</url>
-      <editor><first>John</first><last>Kelleher</last></editor>
+      <editor id="john-kelleher"><first>John</first><last>Kelleher</last></editor>
       <editor><first>Robert</first><last>Ross</last></editor>
       <editor><first>Simon</first><last>Dobnik</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -722,7 +722,7 @@
     </paper>
     <paper id="2">
       <title>Deriving Salience Models from Human Route Directions</title>
-      <author><first>Jana</first><last>Götze</last></author>
+      <author id="jana-gotze"><first>Jana</first><last>Götze</last></author>
       <author><first>Johan</first><last>Boye</last></author>
       <pages>7–12</pages>
       <url hash="38609e3c">W13-0702</url>
@@ -746,7 +746,7 @@
     </paper>
     <paper id="5">
       <title>Where Things Happen: On the Semantics of Event Localization</title>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <pages>29–39</pages>
       <url hash="0a4f189d">W13-0705</url>
       <bibkey>pustejovsky-2013-things</bibkey>
@@ -778,7 +778,7 @@
     </paper>
     <paper id="2">
       <title>Taste of Two Different Flavours: Which <fixed-case>M</fixed-case>anipuri Script works better for <fixed-case>E</fixed-case>nglish-<fixed-case>M</fixed-case>anipuri Language pair <fixed-case>SMT</fixed-case> Systems?</title>
-      <author><first>Thoudam Doren</first><last>Singh</last></author>
+      <author id="thoudam-doren-singh"><first>Thoudam Doren</first><last>Singh</last></author>
       <pages>11–18</pages>
       <url hash="82117f93">W13-0802</url>
       <bibkey>singh-2013-taste</bibkey>
@@ -786,7 +786,7 @@
     <paper id="3">
       <title>Hierarchical Alignment Decomposition Labels for <fixed-case>H</fixed-case>iero Grammar Rules</title>
       <author><first>Gideon</first><last>Maillette de Buy Wenniger</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <pages>19–28</pages>
       <url hash="3ce79474">W13-0803</url>
       <bibkey>maillette-de-buy-wenniger-simaan-2013-hierarchical</bibkey>
@@ -796,7 +796,7 @@
       <author><first>Matthias</first><last>Huck</last></author>
       <author><first>David</first><last>Vilar</last></author>
       <author><first>Markus</first><last>Freitag</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>29–38</pages>
       <url hash="e7891b73">W13-0804</url>
       <bibkey>huck-etal-2013-performance</bibkey>
@@ -805,7 +805,7 @@
       <title>Combining Word Reordering Methods on different Linguistic Abstraction Levels for Statistical Machine Translation</title>
       <author><first>Teresa</first><last>Herrmann</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>39–47</pages>
       <url hash="cb93f3bc">W13-0805</url>
       <bibkey>herrmann-etal-2013-combining</bibkey>
@@ -822,7 +822,7 @@
     <paper id="7">
       <title>A Formal Characterization of Parsing Word Alignments by Synchronous Grammars with Empirical Evidence to the <fixed-case>ITG</fixed-case> Hypothesis.</title>
       <author><first>Gideon</first><last>Maillette de Buy Wenniger</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <pages>58–67</pages>
       <url hash="c9530d59">W13-0807</url>
       <bibkey>maillette-de-buy-wenniger-simaan-2013-formal</bibkey>
@@ -841,7 +841,7 @@
       <url hash="b5293e24">W13-09</url>
       <editor><first>Ekaterina</first><last>Shutova</last></editor>
       <editor><first>Beata</first><last>Beigman Klebanov</last></editor>
-      <editor><first>Joel</first><last>Tetreault</last></editor>
+      <editor id="joel-tetreault"><first>Joel</first><last>Tetreault</last></editor>
       <editor><first>Zornitsa</first><last>Kozareva</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Atlanta, Georgia</address>
@@ -890,9 +890,9 @@
     </paper>
     <paper id="5">
       <title>Automatic Metaphor Detection using Large-Scale Lexical Resources and Conventional Metaphor Extraction</title>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <author><first>Adam</first><last>Dalton</last></author>
-      <author><first>James</first><last>Allen</last></author>
+      <author id="james-allen"><first>James</first><last>Allen</last></author>
       <author><first>Lucian</first><last>Galescu</last></author>
       <pages>36–44</pages>
       <url hash="cf6a3624">W13-0905</url>
@@ -916,7 +916,7 @@
       <author><first>Kartik</first><last>Goyal</last></author>
       <author><first>Huying</first><last>Li</last></author>
       <author><first>Whitney</first><last>Sanders</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>52–57</pages>
       <url hash="c2f3d108">W13-0907</url>
       <bibkey>hovy-etal-2013-identifying</bibkey>
@@ -929,17 +929,17 @@
       <author><first>Dave</first><last>Barner</last></author>
       <author><first>Donald</first><last>Black</last></author>
       <author><first>Majorie</first><last>Friedman</last></author>
-      <author><first>Ralph</first><last>Weischedel</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
       <pages>58–66</pages>
       <url hash="f8d2d3b1">W13-0908</url>
       <bibkey>heintz-etal-2013-automatic</bibkey>
     </paper>
     <paper id="9">
       <title>Robust Extraction of Metaphor from Novel Data</title>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
-      <author><first>George Aaron</first><last>Broadwell</last></author>
-      <author><first>Sarah</first><last>Taylor</last></author>
-      <author><first>Laurie</first><last>Feldman</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="george-aaron-broadwell"><first>George Aaron</first><last>Broadwell</last></author>
+      <author id="sarah-taylor"><first>Sarah</first><last>Taylor</last></author>
+      <author id="laurie-feldman"><first>Laurie</first><last>Feldman</last></author>
       <author><first>Samira</first><last>Shaikh</last></author>
       <author><first>Ting</first><last>Liu</last></author>
       <author><first>Boris</first><last>Yamrom</last></author>
@@ -981,8 +981,8 @@
     </frontmatter>
     <paper id="1">
       <title>Managing Multiword Expressions in a Lexicon-Based Sentiment Analysis System for <fixed-case>S</fixed-case>panish</title>
-      <author><first>Antonio</first><last>Moreno-Ortiz</last></author>
-      <author><first>Chantal</first><last>Pérez-Hernández</last></author>
+      <author id="antonio-moreno-ortiz"><first>Antonio</first><last>Moreno-Ortiz</last></author>
+      <author id="chantal-perez-hernandez"><first>Chantal</first><last>Pérez-Hernández</last></author>
       <author><first>Maria</first><last>Del-Olmo</last></author>
       <pages>1–10</pages>
       <url hash="0c2897bd">W13-1001</url>
@@ -999,7 +999,7 @@
     <paper id="3">
       <title>Improving Word Translation Disambiguation by Capturing Multiword Expressions with Dictionaries</title>
       <author><first>Lars</first><last>Bungum</last></author>
-      <author><first>Björn</first><last>Gambäck</last></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gambäck</last></author>
       <author><first>André</first><last>Lynum</last></author>
       <author><first>Erwin</first><last>Marsi</last></author>
       <pages>21–30</pages>
@@ -1008,7 +1008,7 @@
     </paper>
     <paper id="4">
       <title>Complex Predicates are Multi-Word Expressions</title>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>31</pages>
       <url hash="345b9e3b">W13-1004</url>
       <bibkey>palmer-2013-complex</bibkey>
@@ -1016,7 +1016,7 @@
     <paper id="5">
       <title>The (Un)expected Effects of Applying Standard Cleansing Models to Human Ratings on Compositionality</title>
       <author><first>Stephen</first><last>Roller</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <author><first>Silke</first><last>Scheible</last></author>
       <pages>32–41</pages>
       <url hash="4a168be4">W13-1005</url>
@@ -1048,10 +1048,10 @@
     </paper>
     <paper id="9">
       <title>An Analysis of Annotation of Verb-Noun Idiomatic Combinations in a Parallel Dependency Corpus</title>
-      <author><first>Zdenka</first><last>Uresova</last></author>
-      <author><first>Jan</first><last>Hajic</last></author>
-      <author><first>Eva</first><last>Fucikova</last></author>
-      <author><first>Jana</first><last>Sindlerova</last></author>
+      <author id="zdenka-uresova"><first>Zdenka</first><last>Uresova</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajic</last></author>
+      <author id="eva-fucikova"><first>Eva</first><last>Fucikova</last></author>
+      <author id="jana-sindlerova"><first>Jana</first><last>Sindlerova</last></author>
       <pages>58–63</pages>
       <url hash="ccad4d4a">W13-1009</url>
       <bibkey>uresova-etal-2013-analysis</bibkey>
@@ -1077,8 +1077,8 @@
     <paper id="12">
       <title>Exploring <fixed-case>MWE</fixed-case>s for Knowledge Acquisition from Corporate Technical Documents</title>
       <author><first>Bell</first><last>Manrique Losada</last></author>
-      <author><first>Carlos M.</first><last>Zapata Jaramillo</last></author>
-      <author><first>Diego A.</first><last>Burgos</last></author>
+      <author id="carlos-mario-zapata-jaramillo"><first>Carlos M.</first><last>Zapata Jaramillo</last></author>
+      <author id="diego-a-burgos"><first>Diego A.</first><last>Burgos</last></author>
       <pages>82–86</pages>
       <url hash="cdf1ec6b">W13-1012</url>
       <bibkey>manrique-losada-etal-2013-exploring</bibkey>
@@ -1093,9 +1093,9 @@
     </paper>
     <paper id="14">
       <title>Identifying Pronominal Verbs: Towards Automatic Disambiguation of the Clitic ‘se’ in <fixed-case>P</fixed-case>ortuguese</title>
-      <author><first>Magali</first><last>Sanches Duran</last></author>
-      <author><first>Carolina Evaristo</first><last>Scarton</last></author>
-      <author><first>Sandra Maria</first><last>Aluísio</last></author>
+      <author id="magali-sanches-duran"><first>Magali</first><last>Sanches Duran</last></author>
+      <author id="carolina-scarton"><first>Carolina Evaristo</first><last>Scarton</last></author>
+      <author id="sandra-aluisio"><first>Sandra Maria</first><last>Aluísio</last></author>
       <author><first>Carlos</first><last>Ramisch</last></author>
       <pages>93–100</pages>
       <url hash="b0898aec">W13-1014</url>
@@ -1120,8 +1120,8 @@
     </paper>
     <paper id="17">
       <title>Combining Different Features of Idiomaticity for the Automatic Classification of <fixed-case>N</fixed-case>oun+<fixed-case>V</fixed-case>erb Expressions in <fixed-case>B</fixed-case>asque</title>
-      <author><first>Antton</first><last>Gurrutxaga</last></author>
-      <author><first>Iñaki</first><last>Alegria</last></author>
+      <author id="antton-gurrutxaga"><first>Antton</first><last>Gurrutxaga</last></author>
+      <author id="inaki-alegria"><first>Iñaki</first><last>Alegria</last></author>
       <pages>116–125</pages>
       <url hash="eac64a98">W13-1017</url>
       <bibkey>gurrutxaga-alegria-2013-combining</bibkey>
@@ -1129,7 +1129,7 @@
     <paper id="18">
       <title>Semantic Roles for Nominal Predicates: Building a Lexical Resource</title>
       <author><first>Ashwini</first><last>Vaidya</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Bhuvana</first><last>Narasimhan</last></author>
       <pages>126–131</pages>
       <url hash="1f04ec33">W13-1018</url>
@@ -1159,7 +1159,7 @@
       <author><first>Keisuke</first><last>Sakaguchi</last></author>
       <author><first>Akifumi</first><last>Yoshimoto</last></author>
       <author><first>Frances</first><last>Yung</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>139–144</pages>
       <url hash="d803fb1f">W13-1021</url>
       <bibkey>shigeto-etal-2013-construction</bibkey>
@@ -1172,8 +1172,8 @@
       <editor><first>Cristian</first><last>Danescu-Niculescu-Mizil</last></editor>
       <editor><first>Atefeh</first><last>Farzindar</last></editor>
       <editor><first>Michael</first><last>Gamon</last></editor>
-      <editor><first>Diana</first><last>Inkpen</last></editor>
-      <editor><first>Meena</first><last>Nagarajan</last></editor>
+      <editor id="diana-inkpen"><first>Diana</first><last>Inkpen</last></editor>
+      <editor id="meenakshi-nagarajan"><first>Meena</first><last>Nagarajan</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Atlanta, Georgia</address>
       <month>June</month>
@@ -1188,7 +1188,7 @@
       <title>Does Size Matter? Text and Grammar Revision for Parsing Social Media Data</title>
       <author><first>Mohammad</first><last>Khan</last></author>
       <author><first>Markus</first><last>Dickinson</last></author>
-      <author><first>Sandra</first><last>Kuebler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kuebler</last></author>
       <pages>1–10</pages>
       <url hash="b8cac119">W13-1101</url>
       <bibkey>khan-etal-2013-size</bibkey>
@@ -1203,8 +1203,8 @@
     <paper id="3">
       <title>A Preliminary Study of Tweet Summarization using Information Extraction</title>
       <author><first>Wei</first><last>Xu</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
-      <author><first>Adam</first><last>Meyers</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
+      <author id="adam-meyers"><first>Adam</first><last>Meyers</last></author>
       <author><first>Alan</first><last>Ritter</last></author>
       <pages>20–29</pages>
       <url hash="7bea3804">W13-1103</url>
@@ -1212,19 +1212,19 @@
     </paper>
     <paper id="4">
       <title>Really? Well. Apparently Bootstrapping Improves the Performance of Sarcasm and Nastiness Classifiers for Online Dialogue</title>
-      <author><first>Stephanie</first><last>Lukin</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="stephanie-lukin"><first>Stephanie</first><last>Lukin</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <pages>30–40</pages>
       <url hash="474be315">W13-1104</url>
       <bibkey>lukin-walker-2013-really</bibkey>
     </paper>
     <paper id="5">
       <title>Topical Positioning: A New Method for Predicting Opinion Changes in Conversation</title>
-      <author><first>Ching-Sheng</first><last>Lin</last></author>
+      <author id="ching-sheng-lin"><first>Ching-Sheng</first><last>Lin</last></author>
       <author><first>Samira</first><last>Shaikh</last></author>
-      <author><first>Jennifer</first><last>Stromer-Galley</last></author>
+      <author id="jennifer-stromer-galley"><first>Jennifer</first><last>Stromer-Galley</last></author>
       <author><first>Jennifer</first><last>Crowley</last></author>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
       <author><first>Veena</first><last>Ravishankar</last></author>
       <pages>41–48</pages>
       <url hash="b01057aa">W13-1105</url>
@@ -1262,7 +1262,7 @@
     <paper id="9">
       <title>Translating Government Agencies’ Tweet Feeds: Specificities, Problems and (a few) Solutions</title>
       <author><first>Fabrizio</first><last>Gotti</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <author><first>Atefeh</first><last>Farzindar</last></author>
       <pages>80–89</pages>
       <url hash="7ce683b9">W13-1109</url>
@@ -1273,9 +1273,9 @@
     <meta>
       <booktitle>Workshop on Events: Definition, Detection, Coreference, and Representation</booktitle>
       <url hash="09ceeb64">W13-12</url>
-      <editor><first>Eduard</first><last>Hovy</last></editor>
+      <editor id="eduard-hovy"><first>Eduard</first><last>Hovy</last></editor>
       <editor><first>Teruko</first><last>Mitamura</last></editor>
-      <editor><first>Martha</first><last>Palmer</last></editor>
+      <editor id="martha-palmer"><first>Martha</first><last>Palmer</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Atlanta, Georgia</address>
       <month>June</month>
@@ -1288,7 +1288,7 @@
     </frontmatter>
     <paper id="1">
       <title>Coping With Implicit Arguments And Events Coreference</title>
-      <author><first>Rodolfo</first><last>Delmonte</last></author>
+      <author id="rodolfo-delmonte"><first>Rodolfo</first><last>Delmonte</last></author>
       <pages>1–10</pages>
       <url hash="17e61de2">W13-1201</url>
       <bibkey>delmonte-2013-coping</bibkey>
@@ -1299,9 +1299,9 @@
       <author><first>Marieke</first><last>van Erp</last></author>
       <author><first>Piek</first><last>Vossen</last></author>
       <author><first>Sara</first><last>Tonelli</last></author>
-      <author><first>Willem Robert</first><last>van Hage</last></author>
-      <author><first>Luciano</first><last>Serafini</last></author>
-      <author><first>Rachele</first><last>Sprugnoli</last></author>
+      <author id="willem-robert-van-hage"><first>Willem Robert</first><last>van Hage</last></author>
+      <author id="luciano-serafini"><first>Luciano</first><last>Serafini</last></author>
+      <author id="rachele-sprugnoli"><first>Rachele</first><last>Sprugnoli</last></author>
       <author><first>Jesper</first><last>Hoeksema</last></author>
       <pages>11–20</pages>
       <url hash="45084026">W13-1202</url>
@@ -1338,7 +1338,7 @@
       <title>Annotating Change of State for Clinical Events</title>
       <author><first>Lucy</first><last>Vanderwende</last></author>
       <author><first>Fei</first><last>Xia</last></author>
-      <author><first>Meliha</first><last>Yetisgen-Yildiz</last></author>
+      <author id="meliha-yetisgen-yildiz"><first>Meliha</first><last>Yetisgen-Yildiz</last></author>
       <pages>47–51</pages>
       <url hash="c9d9fbb1">W13-1206</url>
       <bibkey>vanderwende-etal-2013-annotating</bibkey>
@@ -1349,7 +1349,7 @@
       <booktitle>Proceedings of the Workshop on Vision and Natural Language Processing</booktitle>
       <url hash="a22ce0f9">W13-13</url>
       <editor><first>Julia</first><last>Hockenmaier</last></editor>
-      <editor><first>Tamara</first><last>Berg</last></editor>
+      <editor id="tamara-berg"><first>Tamara</first><last>Berg</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Atlanta, Georgia</address>
       <month>June</month>
@@ -1372,7 +1372,7 @@
       <title>Generating Natural-Language Video Descriptions Using Text-Mined Knowledge</title>
       <author><first>Niveda</first><last>Krishnamoorthy</last></author>
       <author><first>Girish</first><last>Malkarnenkar</last></author>
-      <author><first>Raymond</first><last>Mooney</last></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last></author>
       <author><first>Kate</first><last>Saenko</last></author>
       <author><first>Sergio</first><last>Guadarrama</last></author>
       <pages>10–19</pages>
@@ -1394,9 +1394,9 @@
     <meta>
       <booktitle>Proceedings of the Workshop on Computational Linguistics for Literature</booktitle>
       <url hash="1f5b2735">W13-14</url>
-      <editor><first>David</first><last>Elson</last></editor>
+      <editor id="david-elson"><first>David</first><last>Elson</last></editor>
       <editor><first>Anna</first><last>Kazantseva</last></editor>
-      <editor><first>Stan</first><last>Szpakowicz</last></editor>
+      <editor id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Atlanta, Georgia</address>
       <month>June</month>
@@ -1429,7 +1429,7 @@
     <paper id="3">
       <title>Tradition and Modernity in 20th Century <fixed-case>C</fixed-case>hinese Poetry</title>
       <author><first>Rob</first><last>Voigt</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <pages>17–22</pages>
       <url hash="364738b9">W13-1403</url>
       <bibkey>voigt-jurafsky-2013-tradition</bibkey>
@@ -1478,7 +1478,7 @@
       <author><first>Janneke</first><last>Rauscher</last></author>
       <author><first>Leonard</first><last>Swiezinski</last></author>
       <author><first>Martin</first><last>Riedl</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>61–71</pages>
       <url hash="9473e480">W13-1409</url>
       <bibkey>rauscher-etal-2013-exploring</bibkey>
@@ -1525,8 +1525,8 @@
     <paper id="2">
       <title>Open Book: a tool for helping <fixed-case>ASD</fixed-case> users’ semantic comprehension</title>
       <author><first>Eduard</first><last>Barbu</last></author>
-      <author><first>Maria Teresa</first><last>Martín-Valdivia</last></author>
-      <author><first>Luis Alfonso</first><last>Ureña-López</last></author>
+      <author id="m-teresa-martin-valdivia"><first>Maria Teresa</first><last>Martín-Valdivia</last></author>
+      <author id="l-alfonso-urena-lopez"><first>Luis Alfonso</first><last>Ureña-López</last></author>
       <pages>11–19</pages>
       <url hash="803aea5f">W13-1502</url>
       <bibkey>barbu-etal-2013-open</bibkey>
@@ -1544,7 +1544,7 @@
       <title>Lexical Tightness and Text Complexity</title>
       <author><first>Michael</first><last>Flor</last></author>
       <author><first>Beata</first><last>Beigman Klebanov</last></author>
-      <author><first>Kathleen M.</first><last>Sheehan</last></author>
+      <author id="kathleen-m-sheehan"><first>Kathleen M.</first><last>Sheehan</last></author>
       <pages>29–38</pages>
       <url hash="3c31262a">W13-1504</url>
       <bibkey>flor-etal-2013-lexical</bibkey>
@@ -1553,7 +1553,7 @@
       <title>A System for the Simplification of Numerical Expressions at Different Levels of Understandability</title>
       <author><first>Susana</first><last>Bautista</last></author>
       <author><first>Raquel</first><last>Hervás</last></author>
-      <author><first>Pablo</first><last>Gervás</last></author>
+      <author id="pablo-gervas"><first>Pablo</first><last>Gervás</last></author>
       <author><first>Richard</first><last>Power</last></author>
       <author><first>Sandra</first><last>Williams</last></author>
       <pages>39–48</pages>
@@ -1562,7 +1562,7 @@
     </paper>
     <paper id="6">
       <title>A Two-Stage Approach for Generating Unbiased Estimates of Text Complexity</title>
-      <author><first>Kathleen M.</first><last>Sheehan</last></author>
+      <author id="kathleen-m-sheehan"><first>Kathleen M.</first><last>Sheehan</last></author>
       <author><first>Michael</first><last>Flor</last></author>
       <author><first>Diane</first><last>Napolitano</last></author>
       <pages>49–58</pages>
@@ -1574,9 +1574,9 @@
     <meta>
       <booktitle>Proceedings of the 4th Workshop on Computational Approaches to Subjectivity, Sentiment and Social Media Analysis</booktitle>
       <url hash="04c77220">W13-16</url>
-      <editor><first>Alexandra</first><last>Balahur</last></editor>
-      <editor><first>Erik</first><last>van der Goot</last></editor>
-      <editor><first>Andres</first><last>Montoyo</last></editor>
+      <editor id="alexandra-balahur"><first>Alexandra</first><last>Balahur</last></editor>
+      <editor id="erik-van-der-goot"><first>Erik</first><last>van der Goot</last></editor>
+      <editor id="andres-montoyo"><first>Andres</first><last>Montoyo</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Atlanta, Georgia</address>
       <month>June</month>
@@ -1597,7 +1597,7 @@
     <paper id="2">
       <title>Bootstrapped Learning of Emotion Hashtags #hashtags4you</title>
       <author><first>Ashequl</first><last>Qadir</last></author>
-      <author><first>Ellen</first><last>Riloff</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
       <pages>2–11</pages>
       <url hash="c7216953">W13-1602</url>
       <bibkey>qadir-riloff-2013-bootstrapped</bibkey>
@@ -1605,7 +1605,7 @@
     <paper id="3">
       <title>Fine-Grained Emotion Recognition in Olympic Tweets Based on Human Computation</title>
       <author><first>Valentina</first><last>Sintsova</last></author>
-      <author><first>Claudiu</first><last>Musat</last></author>
+      <author id="claudiu-musat"><first>Claudiu</first><last>Musat</last></author>
       <author><first>Pearl</first><last>Pu</last></author>
       <pages>12–20</pages>
       <url hash="caabb4e3">W13-1603</url>
@@ -1614,7 +1614,7 @@
     <paper id="4">
       <title><fixed-case>S</fixed-case>panish <fixed-case>DAL</fixed-case>: A <fixed-case>S</fixed-case>panish Dictionary of Affect in Language</title>
       <author><first>Matías</first><last>Dell’ Amerlina Ríos</last></author>
-      <author><first>Agustín</first><last>Gravano</last></author>
+      <author id="agustin-gravano"><first>Agustín</first><last>Gravano</last></author>
       <pages>21–28</pages>
       <url hash="87f0de7a">W13-1604</url>
       <bibkey>dell-amerlina-rios-gravano-2013-spanish</bibkey>
@@ -1623,7 +1623,7 @@
       <title>The perfect solution for detecting sarcasm in tweets #not</title>
       <author><first>Christine</first><last>Liebrecht</last></author>
       <author><first>Florian</first><last>Kunneman</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <pages>29–37</pages>
       <url hash="c3f0f213">W13-1605</url>
       <bibkey>liebrecht-etal-2013-perfect</bibkey>
@@ -1632,7 +1632,7 @@
       <title>Using <fixed-case>PU</fixed-case>-Learning to Detect Deceptive Opinion Spam</title>
       <author><first>Donato</first><last>Hernández Fusilier</last></author>
       <author><first>Rafael</first><last>Guzmán Cabrera</last></author>
-      <author><first>Manuel</first><last>Montes-y-Gómez</last></author>
+      <author id="manuel-montes"><first>Manuel</first><last>Montes-y-Gómez</last></author>
       <author><first>Paolo</first><last>Rosso</last></author>
       <pages>38–45</pages>
       <url hash="4c8990ac">W13-1606</url>
@@ -1643,8 +1643,8 @@
       <author><first>Hugo Jair</first><last>Escalante</last></author>
       <author><first>Esaú</first><last>Villatoro-Tello</last></author>
       <author><first>Antonio</first><last>Juárez</last></author>
-      <author><first>Manuel</first><last>Montes-y-Gómez</last></author>
-      <author><first>Luis</first><last>Villaseñor</last></author>
+      <author id="manuel-montes"><first>Manuel</first><last>Montes-y-Gómez</last></author>
+      <author id="luis-villasenor-pineda"><first>Luis</first><last>Villaseñor</last></author>
       <pages>46–54</pages>
       <url hash="c04a6496">W13-1607</url>
       <bibkey>escalante-etal-2013-sexual</bibkey>
@@ -1686,21 +1686,21 @@
     </paper>
     <paper id="12">
       <title>Bilingual Experiments on an Opinion Comparable Corpus</title>
-      <author><first>Eugenio</first><last>Martínez-Cámara</last></author>
-      <author><first>M. Teresa</first><last>Martín-Valdivia</last></author>
-      <author><first>M. Dolores</first><last>Molina-González</last></author>
-      <author><first>L. Alfonso</first><last>Ureña-López</last></author>
+      <author id="eugenio-martinez-camara"><first>Eugenio</first><last>Martínez-Cámara</last></author>
+      <author id="m-teresa-martin-valdivia"><first>M. Teresa</first><last>Martín-Valdivia</last></author>
+      <author id="m-dolores-molina-gonzalez"><first>M. Dolores</first><last>Molina-González</last></author>
+      <author id="l-alfonso-urena-lopez"><first>L. Alfonso</first><last>Ureña-López</last></author>
       <pages>87–93</pages>
       <url hash="99398ae9">W13-1612</url>
       <bibkey>martinez-camara-etal-2013-bilingual</bibkey>
     </paper>
     <paper id="13">
       <title><fixed-case>RA</fixed-case>-<fixed-case>SR</fixed-case>: Using a ranking algorithm to automatically building resources for subjectivity analysis over annotated corpora</title>
-      <author><first>Yoan</first><last>Gutiérrez</last></author>
+      <author id="yoan-gutierrez"><first>Yoan</first><last>Gutiérrez</last></author>
       <author><first>Andy</first><last>González</last></author>
-      <author><first>Antonio</first><last>Fernández</last></author>
+      <author id="antonio-fernandez-orquin"><first>Antonio</first><last>Fernández</last></author>
       <author><first>Andrés</first><last>Montoyo</last></author>
-      <author><first>Rafael</first><last>Muñoz</last></author>
+      <author id="rafael-munoz"><first>Rafael</first><last>Muñoz</last></author>
       <pages>94–99</pages>
       <url hash="0c3fdf6b">W13-1613</url>
       <bibkey>gutierrez-etal-2013-ra</bibkey>
@@ -1717,7 +1717,7 @@
       <title>Sentence-Level Subjectivity Detection Using Neuro-Fuzzy Models</title>
       <author><first>Samir</first><last>Rustamov</last></author>
       <author><first>Elshan</first><last>Mustafayev</last></author>
-      <author><first>Mark</first><last>Clements</last></author>
+      <author id="mark-a-clements"><first>Mark</first><last>Clements</last></author>
       <pages>108–114</pages>
       <url hash="ca4d4829">W13-1615</url>
       <bibkey>rustamov-etal-2013-sentence</bibkey>
@@ -1742,7 +1742,7 @@
     <meta>
       <booktitle>Proceedings of the Eighth Workshop on Innovative Use of <fixed-case>NLP</fixed-case> for Building Educational Applications</booktitle>
       <url hash="9ed19581">W13-17</url>
-      <editor><first>Joel</first><last>Tetreault</last></editor>
+      <editor id="joel-tetreault"><first>Joel</first><last>Tetreault</last></editor>
       <editor><first>Jill</first><last>Burstein</last></editor>
       <editor><first>Claudia</first><last>Leacock</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -1783,7 +1783,7 @@
     </paper>
     <paper id="4">
       <title>Developing and testing a self-assessment and tutoring system</title>
-      <author><first>Øistein E.</first><last>Andersen</last></author>
+      <author id="oistein-e-andersen"><first>Øistein E.</first><last>Andersen</last></author>
       <author><first>Helen</first><last>Yannakoudakis</last></author>
       <author><first>Fiona</first><last>Barker</last></author>
       <author><first>Tim</first><last>Parish</last></author>
@@ -1838,21 +1838,21 @@
       <author><first>Amjad</first><last>Abu-Jbara</last></author>
       <author><first>Rahul</first><last>Jha</last></author>
       <author><first>Eric</first><last>Morley</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <pages>82–88</pages>
       <url hash="fe5bd803">W13-1710</url>
       <bibkey>abu-jbara-etal-2013-experimental</bibkey>
     </paper>
     <paper id="11">
       <title><fixed-case>VTEX</fixed-case> System Description for the <fixed-case>NLI</fixed-case> 2013 Shared Task</title>
-      <author><first>Vidas</first><last>Daudaravičius</last></author>
+      <author id="vidas-daudaravicius"><first>Vidas</first><last>Daudaravičius</last></author>
       <pages>89–95</pages>
       <url hash="a0cf52dc">W13-1711</url>
       <bibkey>daudaravicius-2013-vtex</bibkey>
     </paper>
     <paper id="12">
       <title>Feature Space Selection and Combination for Native Language Identification</title>
-      <author><first>Cyril</first><last>Goutte</last></author>
+      <author id="cyril-goutte"><first>Cyril</first><last>Goutte</last></author>
       <author><first>Serge</first><last>Léger</last></author>
       <author><first>Marine</first><last>Carpuat</last></author>
       <pages>96–100</pages>
@@ -1861,10 +1861,10 @@
     </paper>
     <paper id="13">
       <title>Discriminating Non-Native <fixed-case>E</fixed-case>nglish with 350 Words</title>
-      <author><first>John</first><last>Henderson</last></author>
+      <author id="john-henderson"><first>John</first><last>Henderson</last></author>
       <author><first>Guido</first><last>Zarrella</last></author>
       <author><first>Craig</first><last>Pfeifer</last></author>
-      <author><first>John D.</first><last>Burger</last></author>
+      <author id="john-d-burger"><first>John D.</first><last>Burger</last></author>
       <pages>101–110</pages>
       <url hash="8afd219e">W13-1713</url>
       <bibkey>henderson-etal-2013-discriminating</bibkey>
@@ -1887,7 +1887,7 @@
     </paper>
     <paper id="16">
       <title><fixed-case>NLI</fixed-case> Shared Task 2013: <fixed-case>MQ</fixed-case> Submission</title>
-      <author><first>Shervin</first><last>Malmasi</last></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></author>
       <author><first>Sze-Meng Jojo</first><last>Wong</last></author>
       <author><first>Mark</first><last>Dras</last></author>
       <pages>124–133</pages>
@@ -1900,7 +1900,7 @@
       <author><first>Yuta</first><last>Hayashibe</last></author>
       <author><first>Keisuke</first><last>Sakaguchi</last></author>
       <author><first>Mamoru</first><last>Komachi</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>134–139</pages>
       <url hash="e17fb69a">W13-1717</url>
       <bibkey>mizumoto-etal-2013-naist</bibkey>
@@ -1918,7 +1918,7 @@
     </paper>
     <paper id="19">
       <title>Exploring Syntactic Representations for Native Language Identification</title>
-      <author><first>Ben</first><last>Swanson</last></author>
+      <author id="ben-swanson"><first>Ben</first><last>Swanson</last></author>
       <pages>146–151</pages>
       <url hash="2ca159bf">W13-1719</url>
       <revision id="1" href="W13-1719v1" hash="a00111f8"/>
@@ -1982,7 +1982,7 @@
       <author><first>Serhiy</first><last>Bykh</last></author>
       <author><first>Sowmya</first><last>Vajjala</last></author>
       <author><first>Julia</first><last>Krivanek</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <pages>197–206</pages>
       <url hash="240375a5">W13-1726</url>
       <bibkey>bykh-etal-2013-combining</bibkey>
@@ -1992,16 +1992,16 @@
       <author><first>Andrea</first><last>Cimino</last></author>
       <author><first>Felice</first><last>Dell’Orletta</last></author>
       <author><first>Giulia</first><last>Venturi</last></author>
-      <author><first>Simonetta</first><last>Montemagni</last></author>
+      <author id="simonetta-montemagni"><first>Simonetta</first><last>Montemagni</last></author>
       <pages>207–215</pages>
       <url hash="09bb110c">W13-1727</url>
       <bibkey>cimino-etal-2013-linguistic</bibkey>
     </paper>
     <paper id="28">
       <title>Improving Native Language Identification with <fixed-case>TF</fixed-case>-<fixed-case>IDF</fixed-case> Weighting</title>
-      <author><first>Binyam Gebrekidan</first><last>Gebre</last></author>
+      <author id="binyam-gebrekidan-gebre"><first>Binyam Gebrekidan</first><last>Gebre</last></author>
       <author><first>Marcos</first><last>Zampieri</last></author>
-      <author><first>Peter</first><last>Wittenburg</last></author>
+      <author id="peter-wittenburg"><first>Peter</first><last>Wittenburg</last></author>
       <author><first>Tom</first><last>Heskes</last></author>
       <pages>216–223</pages>
       <url hash="e88fdda5">W13-1728</url>
@@ -2018,7 +2018,7 @@
     </paper>
     <paper id="30">
       <title>Feature Engineering in the <fixed-case>NLI</fixed-case> Shared Task 2013: <fixed-case>C</fixed-case>harles <fixed-case>U</fixed-case>niversity Submission Report</title>
-      <author><first>Barbora</first><last>Hladká</last></author>
+      <author id="barbora-hladka"><first>Barbora</first><last>Hladká</last></author>
       <author><first>Martin</first><last>Holub</last></author>
       <author><first>Vincent</first><last>Kríž</last></author>
       <pages>232–241</pages>
@@ -2032,7 +2032,7 @@
       <author><first>Kristopher</first><last>Kyle</last></author>
       <author><first>Scott</first><last>Crossley</last></author>
       <author><first>Jianmin</first><last>Dai</last></author>
-      <author><first>Danielle</first><last>McNamara</last></author>
+      <author id="danielle-s-mcnamara"><first>Danielle</first><last>McNamara</last></author>
       <pages>242–250</pages>
       <url hash="113ddfab">W13-1731</url>
       <bibkey>kyle-etal-2013-native</bibkey>
@@ -2040,7 +2040,7 @@
     <paper id="32">
       <title>Using N-gram and Word Network Features for Native Language Identification</title>
       <author><first>Shibamouli</first><last>Lahiri</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>251–259</pages>
       <url hash="e95c760c">W13-1732</url>
       <bibkey>lahiri-mihalcea-2013-using</bibkey>
@@ -2079,7 +2079,7 @@
       <author><first>Manaal</first><last>Faruqui</last></author>
       <author><first>Victor</first><last>Chahuneau</last></author>
       <author><first>Shuly</first><last>Wintner</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <pages>279–287</pages>
       <url hash="3e3d15df">W13-1736</url>
       <bibkey>tsvetkov-etal-2013-identifying</bibkey>
@@ -2094,9 +2094,9 @@
     </paper>
     <paper id="38">
       <title>Improving interpretation robustness in a tutorial dialogue system</title>
-      <author><first>Myroslava</first><last>Dzikovska</last></author>
+      <author id="myroslava-o-dzikovska"><first>Myroslava</first><last>Dzikovska</last></author>
       <author><first>Elaine</first><last>Farrow</last></author>
-      <author><first>Johanna</first><last>Moore</last></author>
+      <author id="johanna-d-moore"><first>Johanna</first><last>Moore</last></author>
       <pages>293–299</pages>
       <url hash="b073cc53">W13-1738</url>
       <bibkey>dzikovska-etal-2013-improving</bibkey>
@@ -2104,7 +2104,7 @@
     <paper id="39">
       <title>Detecting Missing Hyphens in Learner Text</title>
       <author><first>Aoife</first><last>Cahill</last></author>
-      <author><first>Martin</first><last>Chodorow</last></author>
+      <author id="martin-chodorow"><first>Martin</first><last>Chodorow</last></author>
       <author><first>Susanne</first><last>Wolff</last></author>
       <author><first>Nitin</first><last>Madnani</last></author>
       <pages>300–305</pages>
@@ -2113,7 +2113,7 @@
     </paper>
     <paper id="40">
       <title>Applying Machine Translation Metrics to Student-Written Translations</title>
-      <author><first>Lisa</first><last>Michaud</last></author>
+      <author id="lisa-n-michaud"><first>Lisa</first><last>Michaud</last></author>
       <author><first>Patricia Ann</first><last>McCoy</last></author>
       <pages>306–311</pages>
       <url hash="93479108">W13-1740</url>
@@ -2145,7 +2145,7 @@
     </paper>
     <paper id="2">
       <title>Stochastic Bi-Languages to model Dialogs</title>
-      <author><first>M. Inés</first><last>Torres</last></author>
+      <author id="m-ines-torres"><first>M. Inés</first><last>Torres</last></author>
       <pages>9–17</pages>
       <url hash="324b925b">W13-1802</url>
       <bibkey>torres-2013-stochastic</bibkey>
@@ -2212,8 +2212,8 @@
     </paper>
     <paper id="10">
       <title>Modeling Graph Languages with Grammars Extracted via Tree Decompositions</title>
-      <author><first>Bevan Keeley</first><last>Jones</last></author>
-      <author><first>Sharon</first><last>Goldwater</last></author>
+      <author id="bevan-jones"><first>Bevan Keeley</first><last>Jones</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
       <author><first>Mark</first><last>Johnson</last></author>
       <pages>54–62</pages>
       <url hash="0e63ee32">W13-1810</url>
@@ -2251,16 +2251,16 @@
     <paper id="15">
       <title>A Finite-State Approach to Translate <fixed-case>SNOMED</fixed-case> <fixed-case>CT</fixed-case> Terms into <fixed-case>B</fixed-case>asque Using Medical Prefixes and Suffixes</title>
       <author><first>Olatz</first><last>Perez-de-Viñaspre</last></author>
-      <author><first>Maite</first><last>Oronoz</last></author>
+      <author id="maite-oronoz"><first>Maite</first><last>Oronoz</last></author>
       <author><first>Manex</first><last>Agirrezabal</last></author>
-      <author><first>Mikel</first><last>Lersundi</last></author>
+      <author id="mikel-lersundi"><first>Mikel</first><last>Lersundi</last></author>
       <pages>99–103</pages>
       <url hash="959b824a">W13-1815</url>
       <bibkey>perez-de-vinaspre-etal-2013-finite</bibkey>
     </paper>
     <paper id="16">
       <title>Syncretism and How to Deal with it in a Morphological Analyzer: a <fixed-case>G</fixed-case>erman Example</title>
-      <author><first>Katina</first><last>Bontcheva</last></author>
+      <author id="katina-bontcheva"><first>Katina</first><last>Bontcheva</last></author>
       <pages>104–107</pages>
       <url hash="a9e9a87f">W13-1816</url>
       <bibkey>bontcheva-2013-syncretism</bibkey>
@@ -2278,11 +2278,11 @@
     <meta>
       <booktitle>Proceedings of the 2013 Workshop on Biomedical Natural Language Processing</booktitle>
       <url hash="d577b639">W13-19</url>
-      <editor><first>Kevin Bretonnel</first><last>Cohen</last></editor>
+      <editor id="k-bretonnel-cohen"><first>Kevin Bretonnel</first><last>Cohen</last></editor>
       <editor><first>Dina</first><last>Demner-Fushman</last></editor>
       <editor><first>Sophia</first><last>Ananiadou</last></editor>
-      <editor><first>John</first><last>Pestian</last></editor>
-      <editor><first>Jun’ichi</first><last>Tsujii</last></editor>
+      <editor id="john-pestian"><first>John</first><last>Pestian</last></editor>
+      <editor id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Sofia, Bulgaria</address>
       <month>August</month>
@@ -2300,7 +2300,7 @@
       <author><first>Katherine D.</first><last>Holland</last></author>
       <author><first>Tracy A.</first><last>Glauser</last></author>
       <author><first>Shannon M.</first><last>Standridge</last></author>
-      <author><first>Karin M.</first><last>Verspoor</last></author>
+      <author id="karin-verspoor"><first>Karin M.</first><last>Verspoor</last></author>
       <author><first>John</first><last>Pestian</last></author>
       <pages>1–9</pages>
       <url hash="3dd14eeb">W13-1901</url>
@@ -2308,8 +2308,8 @@
     </paper>
     <paper id="2">
       <title>Identification of Patients with Acute Lung Injury from Free-Text Chest <fixed-case>X</fixed-case>-Ray Reports</title>
-      <author><first>Meliha</first><last>Yetisgen-Yildiz</last></author>
-      <author><first>Cosmin</first><last>Bejan</last></author>
+      <author id="meliha-yetisgen-yildiz"><first>Meliha</first><last>Yetisgen-Yildiz</last></author>
+      <author id="cosmin-adrian-bejan"><first>Cosmin</first><last>Bejan</last></author>
       <author><first>Mark</first><last>Wurfel</last></author>
       <pages>10–17</pages>
       <url hash="6af25f67">W13-1902</url>
@@ -2317,10 +2317,10 @@
     </paper>
     <paper id="3">
       <title>Discovering Temporal Narrative Containers in Clinical Text</title>
-      <author><first>Timothy</first><last>Miller</last></author>
+      <author id="timothy-miller"><first>Timothy</first><last>Miller</last></author>
       <author><first>Steven</first><last>Bethard</last></author>
       <author><first>Dmitriy</first><last>Dligach</last></author>
-      <author><first>Sameer</first><last>Pradhan</last></author>
+      <author id="sameer-pradhan"><first>Sameer</first><last>Pradhan</last></author>
       <author><first>Chen</first><last>Lin</last></author>
       <author><first>Guergana</first><last>Savova</last></author>
       <pages>18–26</pages>
@@ -2351,7 +2351,7 @@
       <title>Unsupervised Linguistically-Driven Reliable Dependency Parses Detection and Self-Training for Adaptation to the Biomedical Domain</title>
       <author><first>Felice</first><last>Dell’Orletta</last></author>
       <author><first>Giulia</first><last>Venturi</last></author>
-      <author><first>Simonetta</first><last>Montemagni</last></author>
+      <author id="simonetta-montemagni"><first>Simonetta</first><last>Montemagni</last></author>
       <pages>45–53</pages>
       <url hash="9ef31445">W13-1906</url>
       <bibkey>dellorletta-etal-2013-unsupervised</bibkey>
@@ -2389,9 +2389,9 @@
       <author><first>Alexander</first><last>Klenner</last></author>
       <author><first>Sumit</first><last>Madan</last></author>
       <author><first>Sam</first><last>Ansari</last></author>
-      <author><first>Tamara</first><last>Bobic</last></author>
+      <author id="tamara-bobic"><first>Tamara</first><last>Bobic</last></author>
       <author><first>Julia</first><last>Hoeng</last></author>
-      <author><first>Martin</first><last>Hofmann--Apitius</last></author>
+      <author id="martin-hofmann-apitius"><first>Martin</first><last>Hofmann--Apitius</last></author>
       <author><first>Manuel</first><last>Peitsch</last></author>
       <pages>80–88</pages>
       <url hash="7707cdf4">W13-1910</url>
@@ -2401,7 +2401,7 @@
       <title>Exploring Word Class N-grams to Measure Language Development in Children</title>
       <author><first>Gabriela</first><last>Ramírez de la Rosa</last></author>
       <author><first>Thamar</first><last>Solorio</last></author>
-      <author><first>Manuel</first><last>Montes</last></author>
+      <author id="manuel-montes"><first>Manuel</first><last>Montes</last></author>
       <author id="yang-liu-icsi"><first>Yang</first><last>Liu</last></author>
       <author><first>Lisa</first><last>Bedore</last></author>
       <author><first>Elizabeth</first><last>Peña</last></author>
@@ -2419,9 +2419,9 @@
     </paper>
     <paper id="13">
       <title>Using the Argumentative Structure of Scientific Literature to Improve Information Access</title>
-      <author><first>Antonio</first><last>Jimeno Yepes</last></author>
-      <author><first>James</first><last>Mork</last></author>
-      <author><first>Alan</first><last>Aronson</last></author>
+      <author id="antonio-jimeno-yepes"><first>Antonio</first><last>Jimeno Yepes</last></author>
+      <author id="james-g-mork"><first>James</first><last>Mork</last></author>
+      <author id="alan-r-aronson"><first>Alan</first><last>Aronson</last></author>
       <pages>102–110</pages>
       <url hash="f3484c09">W13-1913</url>
       <bibkey>jimeno-yepes-etal-2013-using</bibkey>
@@ -2446,7 +2446,7 @@
     <paper id="16">
       <title>Parallels between Linguistics and Biology</title>
       <author><first>Sutanu</first><last>Chakraborti</last></author>
-      <author><first>Ashish</first><last>Tendulkar</last></author>
+      <author id="ashish-v-tendulkar"><first>Ashish</first><last>Tendulkar</last></author>
       <pages>120–123</pages>
       <url hash="5daddaf0">W13-1916</url>
       <bibkey>chakraborti-tendulkar-2013-parallels</bibkey>
@@ -2456,13 +2456,13 @@
     <meta>
       <booktitle>Proceedings of the <fixed-case>B</fixed-case>io<fixed-case>NLP</fixed-case> Shared Task 2013 Workshop</booktitle>
       <url hash="30a280e8">W13-20</url>
-      <editor><first>Claire</first><last>Nédellec</last></editor>
+      <editor id="claire-nedellec"><first>Claire</first><last>Nédellec</last></editor>
       <editor><first>Robert</first><last>Bossy</last></editor>
       <editor><first>Jin-Dong</first><last>Kim</last></editor>
-      <editor><first>Jung-jae</first><last>Kim</last></editor>
+      <editor id="jung-jae-kim"><first>Jung-jae</first><last>Kim</last></editor>
       <editor><first>Tomoko</first><last>Ohta</last></editor>
       <editor><first>Sampo</first><last>Pyysalo</last></editor>
-      <editor><first>Pierre</first><last>Zweigenbaum</last></editor>
+      <editor id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Sofia, Bulgaria</address>
       <month>August</month>
@@ -2516,12 +2516,12 @@
     </paper>
     <paper id="5">
       <title>Extracting Biomedical Events and Modifications Using Subgraph Matching with Noisy Training Data</title>
-      <author><first>Andrew</first><last>MacKinlay</last></author>
-      <author><first>David</first><last>Martinez</last></author>
-      <author><first>Antonio</first><last>Jimeno Yepes</last></author>
+      <author id="andrew-mackinlay"><first>Andrew</first><last>MacKinlay</last></author>
+      <author id="david-martinez"><first>David</first><last>Martinez</last></author>
+      <author id="antonio-jimeno-yepes"><first>Antonio</first><last>Jimeno Yepes</last></author>
       <author><first>Haibin</first><last>Liu</last></author>
       <author><first>W. John</first><last>Wilbur</last></author>
-      <author><first>Karin</first><last>Verspoor</last></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last></author>
       <pages>35–44</pages>
       <url hash="e76ca9a8">W13-2005</url>
       <bibkey>mackinlay-etal-2013-extracting</bibkey>
@@ -2539,8 +2539,8 @@
       <title><fixed-case>GRO</fixed-case> Task: Populating the Gene Regulation Ontology with events and relations</title>
       <author><first>Jung-jae</first><last>Kim</last></author>
       <author><first>Xu</first><last>Han</last></author>
-      <author><first>Vivian</first><last>Lee</last></author>
-      <author><first>Dietrich</first><last>Rebholz-Schuhmann</last></author>
+      <author id="vivian-k-lee"><first>Vivian</first><last>Lee</last></author>
+      <author id="dietrich-rebholz-schuhmann"><first>Dietrich</first><last>Rebholz-Schuhmann</last></author>
       <pages>50–57</pages>
       <url hash="8f55b79f">W13-2007</url>
       <bibkey>kim-etal-2013-gro</bibkey>
@@ -2564,7 +2564,7 @@
       <author><first>Sung-Jae</first><last>Jung</last></author>
       <author><first>Sung-Pil</first><last>Choi</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>67–75</pages>
       <url hash="29fbcd70">W13-2009</url>
       <bibkey>ohta-etal-2013-overview</bibkey>
@@ -2572,9 +2572,9 @@
     <paper id="10">
       <title>Generalizing an Approximate Subgraph Matching-based System to Extract Events in Molecular Biology and Cancer Genetics</title>
       <author><first>Haibin</first><last>Liu</last></author>
-      <author><first>Karin</first><last>Verspoor</last></author>
-      <author><first>Donald C.</first><last>Comeau</last></author>
-      <author><first>Andrew</first><last>MacKinlay</last></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last></author>
+      <author id="donald-c-comeau"><first>Donald C.</first><last>Comeau</last></author>
+      <author id="andrew-mackinlay"><first>Andrew</first><last>MacKinlay</last></author>
       <author><first>W. John</first><last>Wilbur</last></author>
       <pages>76–85</pages>
       <url hash="59b890b5">W13-2010</url>
@@ -2582,8 +2582,8 @@
     </paper>
     <paper id="11">
       <title>Performance and limitations of the linguistically motivated Cocoa/Peaberry system in a broad biological domain.</title>
-      <author><first>SV</first><last>Ramanan</last></author>
-      <author><first>P. Senthil</first><last>Nathan</last></author>
+      <author id="sv-ramanan"><first>SV</first><last>Ramanan</last></author>
+      <author id="p-senthil-nathan"><first>P. Senthil</first><last>Nathan</last></author>
       <pages>86–93</pages>
       <url hash="be70cda9">W13-2011</url>
       <bibkey>ramanan-nathan-2013-performance</bibkey>
@@ -2600,9 +2600,9 @@
       <title><fixed-case>B</fixed-case>io<fixed-case>NLP</fixed-case> Shared Task 2013: Supporting Resources</title>
       <author><first>Pontus</first><last>Stenetorp</last></author>
       <author><first>Wiktoria</first><last>Golik</last></author>
-      <author><first>Thierry</first><last>Hamon</last></author>
-      <author><first>Donald C.</first><last>Comeau</last></author>
-      <author><first>Rezarta</first><last>Islamaj Doğan</last></author>
+      <author id="thierry-hamon"><first>Thierry</first><last>Hamon</last></author>
+      <author id="donald-c-comeau"><first>Donald C.</first><last>Comeau</last></author>
+      <author id="rezarta-islamaj-dogan"><first>Rezarta</first><last>Islamaj Doğan</last></author>
       <author><first>Haibin</first><last>Liu</last></author>
       <author><first>W. John</first><last>Wilbur</last></author>
       <pages>99–103</pages>
@@ -2613,8 +2613,8 @@
       <title>A fast rule-based approach for biomedical event extraction</title>
       <author><first>Quoc-Chinh</first><last>Bui</last></author>
       <author><first>David</first><last>Campos</last></author>
-      <author><first>Erik</first><last>van Mulligen</last></author>
-      <author><first>Jan</first><last>Kors</last></author>
+      <author id="erik-van-mulligen"><first>Erik</first><last>van Mulligen</last></author>
+      <author id="jan-kors"><first>Jan</first><last>Kors</last></author>
       <pages>104–108</pages>
       <url hash="42253f13">W13-2014</url>
       <bibkey>bui-etal-2013-fast</bibkey>
@@ -2623,7 +2623,7 @@
       <title>Improving Feature-Based Biomedical Event Extraction System by Integrating Argument Information</title>
       <author><first>Lishuang</first><last>Li</last></author>
       <author><first>Yiwen</first><last>Wang</last></author>
-      <author><first>Degen</first><last>Huang</last></author>
+      <author id="degen-huang"><first>Degen</first><last>Huang</last></author>
       <pages>109–115</pages>
       <url hash="e6d06ac1">W13-2015</url>
       <bibkey>li-etal-2013-improving</bibkey>
@@ -2635,7 +2635,7 @@
       <author><first>Tilia</first><last>Ellendorff</last></author>
       <author><first>Don</first><last>Tuggener</last></author>
       <author><first>Fabio</first><last>Rinaldi</last></author>
-      <author><first>Gintarė</first><last>Grigonytė</last></author>
+      <author id="gintare-grigonyte"><first>Gintarė</first><last>Grigonytė</last></author>
       <pages>116–120</pages>
       <url hash="d2637e2a">W13-2016</url>
       <bibkey>schneider-etal-2013-uzh</bibkey>
@@ -2643,8 +2643,8 @@
     <paper id="17">
       <title>A Hybrid approach for biomedical event extraction</title>
       <author><first>Xuan Quang</first><last>Pham</last></author>
-      <author><first>Minh Quang</first><last>Le</last></author>
-      <author><first>Bao Quoc</first><last>Ho</last></author>
+      <author id="quang-le-minh"><first>Minh Quang</first><last>Le</last></author>
+      <author id="bao-quoc-ho"><first>Bao Quoc</first><last>Ho</last></author>
       <pages>121–124</pages>
       <url hash="ab067c73">W13-2017</url>
       <bibkey>pham-etal-2013-hybrid</bibkey>
@@ -2659,9 +2659,9 @@
     </paper>
     <paper id="19">
       <title>Exploring a Probabilistic <fixed-case>E</fixed-case>arley Parser for Event Composition in Biomedical Texts</title>
-      <author><first>Mai-Vu</first><last>Tran</last></author>
+      <author id="mai-vu-tran"><first>Mai-Vu</first><last>Tran</last></author>
       <author><first>Nigel</first><last>Collier</last></author>
-      <author><first>Hoang-Quynh</first><last>Le</last></author>
+      <author id="hoang-quynh-le"><first>Hoang-Quynh</first><last>Le</last></author>
       <author><first>Van-Thuy</first><last>Phi</last></author>
       <author><first>Thanh-Binh</first><last>Pham</last></author>
       <pages>130–134</pages>
@@ -2671,7 +2671,7 @@
     <paper id="20">
       <title>Detecting Relations in the Gene Regulation Network</title>
       <author><first>Thomas</first><last>Provoost</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>135–138</pages>
       <url hash="f51ff718">W13-2020</url>
       <bibkey>provoost-moens-2013-detecting</bibkey>
@@ -2715,7 +2715,7 @@
     <paper id="25">
       <title>Bacteria Biotope Detection, Ontology-based Normalization, and Relation Extraction using Syntactic Rules</title>
       <author><first>İlknur</first><last>Karadeniz</last></author>
-      <author><first>Arzucan</first><last>Özgür</last></author>
+      <author id="arzucan-ozgur"><first>Arzucan</first><last>Özgür</last></author>
       <pages>170–177</pages>
       <url hash="498a55b8">W13-2025</url>
       <bibkey>karadeniz-ozgur-2013-bacteria</bibkey>
@@ -2764,7 +2764,7 @@
     </paper>
     <paper id="2">
       <title>Exploiting Ontology Lexica for Generating Natural Language Texts from <fixed-case>RDF</fixed-case> Data</title>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <author><first>Janna</first><last>Lüker</last></author>
       <author><first>David</first><last>Nagel</last></author>
       <author><first>Christina</first><last>Unger</last></author>
@@ -2783,9 +2783,9 @@
     </paper>
     <paper id="4">
       <title>Enhancing the Expression of Contrast in the <fixed-case>SP</fixed-case>a<fixed-case>RK</fixed-case>y Restaurant Corpus</title>
-      <author><first>David</first><last>Howcroft</last></author>
+      <author id="david-m-howcroft"><first>David</first><last>Howcroft</last></author>
       <author><first>Crystal</first><last>Nakatsu</last></author>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <pages>30–39</pages>
       <url hash="249eee5e">W13-2104</url>
       <bibkey>howcroft-etal-2013-enhancing</bibkey>
@@ -2818,7 +2818,7 @@
       <title>Graphs and Spatial Relations in the Generation of Referring Expressions</title>
       <author><first>Jette</first><last>Viethen</last></author>
       <author><first>Margaret</first><last>Mitchell</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <pages>72–81</pages>
       <url hash="2df2efca">W13-2108</url>
       <bibkey>viethen-etal-2013-graphs</bibkey>
@@ -2852,14 +2852,14 @@
       <author><first>Nadjet</first><last>Bouayad-Agha</last></author>
       <author><first>Gerard</first><last>Casamayor</last></author>
       <author><first>Leo</first><last>Wanner</last></author>
-      <author><first>Chris</first><last>Mellish</last></author>
+      <author id="chris-mellish"><first>Chris</first><last>Mellish</last></author>
       <pages>98–102</pages>
       <url hash="a9f90675">W13-2112</url>
       <bibkey>bouayad-agha-etal-2013-overview</bibkey>
     </paper>
     <paper id="13">
       <title>Narrative Composition: Achieving the Perceived Linearity of Narrative</title>
-      <author><first>Pablo</first><last>Gervás</last></author>
+      <author id="pablo-gervas"><first>Pablo</first><last>Gervás</last></author>
       <pages>103–104</pages>
       <url hash="c5e41173">W13-2113</url>
       <bibkey>gervas-2013-narrative</bibkey>
@@ -2877,8 +2877,8 @@
     <paper id="15">
       <title>Generating Student Feedback from Time-Series Data Using Reinforcement Learning</title>
       <author><first>Dimitra</first><last>Gkatzia</last></author>
-      <author><first>Helen</first><last>Hastie</last></author>
-      <author><first>Srinivasan</first><last>Janarthanam</last></author>
+      <author id="helen-hastie"><first>Helen</first><last>Hastie</last></author>
+      <author id="srinivasan-janarthanam"><first>Srinivasan</first><last>Janarthanam</last></author>
       <author><first>Oliver</first><last>Lemon</last></author>
       <pages>115–124</pages>
       <url hash="a733519c">W13-2115</url>
@@ -2898,7 +2898,7 @@
       <author><first>Yashar</first><last>Mehdad</last></author>
       <author><first>Giuseppe</first><last>Carenini</last></author>
       <author><first>Frank</first><last>Tompa</last></author>
-      <author><first>Raymond T.</first><last>Ng</last></author>
+      <author id="raymond-ng"><first>Raymond T.</first><last>Ng</last></author>
       <pages>136–146</pages>
       <url hash="e44caa0a">W13-2117</url>
       <bibkey>mehdad-etal-2013-abstractive</bibkey>
@@ -2915,7 +2915,7 @@
       <title><fixed-case>MIME</fixed-case> - <fixed-case>NLG</fixed-case> in Pre-Hospital Care</title>
       <author><first>Anne</first><last>Schneider</last></author>
       <author><first>Alasdair</first><last>Mort</last></author>
-      <author><first>Chris</first><last>Mellish</last></author>
+      <author id="chris-mellish"><first>Chris</first><last>Mellish</last></author>
       <author><first>Ehud</first><last>Reiter</last></author>
       <author><first>Phil</first><last>Wilson</last></author>
       <author><first>Pierre-Luc</first><last>Vaudry</last></author>
@@ -2926,8 +2926,8 @@
     <paper id="20">
       <title>Generation of Quantified Referring Expressions: Evidence from Experimental Data</title>
       <author><first>Dale</first><last>Barr</last></author>
-      <author><first>Kees</first><last>van Deemter</last></author>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <pages>157–161</pages>
       <url hash="31cd201d">W13-2120</url>
       <bibkey>barr-etal-2013-generation</bibkey>
@@ -2952,7 +2952,7 @@
     </paper>
     <paper id="23">
       <title>On the Feasibility of Automatically Describing n-dimensional Objects</title>
-      <author><first>Pablo</first><last>Duboue</last></author>
+      <author id="pablo-duboue"><first>Pablo</first><last>Duboue</last></author>
       <pages>172–177</pages>
       <url hash="e9b1fca0">W13-2123</url>
       <bibkey>duboue-2013-feasibility</bibkey>
@@ -2960,8 +2960,8 @@
     <paper id="24">
       <title><fixed-case>G</fixed-case>en<fixed-case>N</fixed-case>ext: A Consolidated Domain Adaptable <fixed-case>NLG</fixed-case> System</title>
       <author><first>Frank</first><last>Schilder</last></author>
-      <author><first>Blake</first><last>Howald</last></author>
-      <author><first>Ravi</first><last>Kondadadi</last></author>
+      <author id="blake-howald"><first>Blake</first><last>Howald</last></author>
+      <author id="ravikumar-kondadadi"><first>Ravi</first><last>Kondadadi</last></author>
       <pages>178–182</pages>
       <url hash="a893ea1b">W13-2124</url>
       <bibkey>schilder-etal-2013-gennext</bibkey>
@@ -2977,7 +2977,7 @@
     <paper id="26">
       <title>A Case Study Towards <fixed-case>T</fixed-case>urkish Paraphrase Alignment</title>
       <author><first>Seniz</first><last>Demir</last></author>
-      <author><first>İlknur</first><last>Durgar El-Kahlout</last></author>
+      <author id="ilknur-durgar-el-kahlout"><first>İlknur</first><last>Durgar El-Kahlout</last></author>
       <author><first>Erdem</first><last>Unal</last></author>
       <pages>188–192</pages>
       <url hash="94148e64">W13-2126</url>
@@ -2996,7 +2996,7 @@
       <title><fixed-case>MIME</fixed-case>- <fixed-case>NLG</fixed-case> Support for Complex and Unstable Pre-hospital Emergencies</title>
       <author><first>Anne</first><last>Schneider</last></author>
       <author><first>Alasdair</first><last>Mort</last></author>
-      <author><first>Chris</first><last>Mellish</last></author>
+      <author id="chris-mellish"><first>Chris</first><last>Mellish</last></author>
       <author><first>Ehud</first><last>Reiter</last></author>
       <author><first>Phil</first><last>Wilson</last></author>
       <author><first>Pierre-Luc</first><last>Vaudry</last></author>
@@ -3006,14 +3006,14 @@
     </paper>
     <paper id="29">
       <title><fixed-case>T</fixed-case>houghtland: Natural Language Descriptions for Machine Learning n-dimensional Error Functions</title>
-      <author><first>Pablo</first><last>Duboue</last></author>
+      <author id="pablo-duboue"><first>Pablo</first><last>Duboue</last></author>
       <pages>200–201</pages>
       <url hash="b9254bb4">W13-2129</url>
       <bibkey>duboue-2013-thoughtland</bibkey>
     </paper>
     <paper id="30">
       <title>An Automatic Method for Building a Data-to-Text Generator</title>
-      <author><first>Sina</first><last>Zarriess</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarriess</last></author>
       <author><first>Kyle</first><last>Richardson</last></author>
       <pages>202–203</pages>
       <url hash="e2167ec8">W13-2130</url>
@@ -3032,7 +3032,7 @@
       <author><first>Keith</first><last>Butler</last></author>
       <author><first>Priscilla</first><last>Moraes</last></author>
       <author><first>Ian</first><last>Tabolt</last></author>
-      <author><first>Kathleen F.</first><last>McCoy</last></author>
+      <author id="kathleen-f-mccoy"><first>Kathleen F.</first><last>McCoy</last></author>
       <pages>206–207</pages>
       <url hash="8332f308">W13-2132</url>
       <bibkey>butler-etal-2013-team</bibkey>
@@ -3040,8 +3040,8 @@
     <paper id="33">
       <title>Content Selection Challenge - <fixed-case>U</fixed-case>niversity of <fixed-case>A</fixed-case>berdeen Entry</title>
       <author><first>Roman</first><last>Kutlak</last></author>
-      <author><first>Chris</first><last>Mellish</last></author>
-      <author><first>Kees</first><last>van Deemter</last></author>
+      <author id="chris-mellish"><first>Chris</first><last>Mellish</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
       <pages>208–209</pages>
       <url hash="c2a7d0a2">W13-2133</url>
       <bibkey>kutlak-etal-2013-content</bibkey>
@@ -3059,14 +3059,14 @@
     <meta>
       <booktitle>Proceedings of the Eighth Workshop on Statistical Machine Translation</booktitle>
       <url hash="2aec40e5">W13-22</url>
-      <editor><first>Ondrej</first><last>Bojar</last></editor>
+      <editor id="ondrej-bojar"><first>Ondrej</first><last>Bojar</last></editor>
       <editor><first>Christian</first><last>Buck</last></editor>
       <editor><first>Chris</first><last>Callison-Burch</last></editor>
       <editor><first>Barry</first><last>Haddow</last></editor>
       <editor><first>Philipp</first><last>Koehn</last></editor>
       <editor><first>Christof</first><last>Monz</last></editor>
       <editor><first>Matt</first><last>Post</last></editor>
-      <editor><first>Herve</first><last>Saint-Amand</last></editor>
+      <editor id="herve-saint-amand"><first>Herve</first><last>Saint-Amand</last></editor>
       <editor><first>Radu</first><last>Soricut</last></editor>
       <editor><first>Lucia</first><last>Specia</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -3110,7 +3110,7 @@
       <author><first>Alexandra</first><last>Birch</last></author>
       <author><first>Barry</first><last>Haddow</last></author>
       <author><first>Ulrich</first><last>Germann</last></author>
-      <author><first>Maria</first><last>Nadejde</last></author>
+      <author id="maria-nadejde"><first>Maria</first><last>Nadejde</last></author>
       <author><first>Christian</first><last>Buck</last></author>
       <author><first>Philipp</first><last>Koehn</last></author>
       <pages>52–61</pages>
@@ -3121,11 +3121,11 @@
       <title><fixed-case>LIMSI</fixed-case> @ <fixed-case>WMT</fixed-case>13</title>
       <author><first>Alexander</first><last>Allauzen</last></author>
       <author><first>Nicolas</first><last>Pécheux</last></author>
-      <author><first>Quoc Khanh</first><last>Do</last></author>
+      <author id="quoc-khanh-do"><first>Quoc Khanh</first><last>Do</last></author>
       <author><first>Marco</first><last>Dinarelli</last></author>
       <author><first>Thomas</first><last>Lavergne</last></author>
       <author><first>Aurélien</first><last>Max</last></author>
-      <author><first>Hai-Son</first><last>Le</last></author>
+      <author id="hai-son-le"><first>Hai-Son</first><last>Le</last></author>
       <author><first>François</first><last>Yvon</last></author>
       <pages>62–69</pages>
       <url hash="1b33e8b2">W13-2204</url>
@@ -3141,15 +3141,15 @@
       <author><first>Austin</first><last>Matthews</last></author>
       <author><first>Kenton</first><last>Murray</last></author>
       <author><first>Nicola</first><last>Segall</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <pages>70–77</pages>
       <url hash="fb23ecd7">W13-2205</url>
       <bibkey>ammar-etal-2013-cmu</bibkey>
     </paper>
     <paper id="6">
       <title>Feature Decay Algorithms for Fast Deployment of Accurate Statistical Machine Translation Systems</title>
-      <author><first>Ergun</first><last>Biçici</last></author>
+      <author id="ergun-bicici"><first>Ergun</first><last>Biçici</last></author>
       <pages>78–84</pages>
       <url hash="26eadcab">W13-2206</url>
       <bibkey>bicici-2013-feature</bibkey>
@@ -3157,7 +3157,7 @@
     <paper id="7">
       <title><fixed-case>CU</fixed-case>ni Multilingual Matrix in the <fixed-case>WMT</fixed-case> 2013 Shared Task</title>
       <author><first>Karel</first><last>Bílek</last></author>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <pages>85–91</pages>
       <url hash="f66fc1f6">W13-2207</url>
       <bibkey>bilek-zeman-2013-cuni</bibkey>
@@ -3188,14 +3188,14 @@
       <author><first>Jan</first><last>Niehues</last></author>
       <author><first>Teresa</first><last>Herrmann</last></author>
       <author><first>Isabel</first><last>Slawik</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>104–108</pages>
       <url hash="6bb1547a">W13-2210</url>
       <bibkey>cho-etal-2013-karlsruhe</bibkey>
     </paper>
     <paper id="11">
       <title><fixed-case>TÜBİTAK</fixed-case>-<fixed-case>BİLGEM</fixed-case> <fixed-case>G</fixed-case>erman-<fixed-case>E</fixed-case>nglish Machine Translation Systems for W13</title>
-      <author><first>İlknur</first><last>Durgar El-Kahlout</last></author>
+      <author id="ilknur-durgar-el-kahlout"><first>İlknur</first><last>Durgar El-Kahlout</last></author>
       <author><first>Coşkun</first><last>Mermer</last></author>
       <pages>109–113</pages>
       <url hash="17ed6c91">W13-2211</url>
@@ -3214,10 +3214,10 @@
     <paper id="13">
       <title><fixed-case>M</fixed-case>unich-<fixed-case>E</fixed-case>dinburgh-<fixed-case>S</fixed-case>tuttgart Submissions of <fixed-case>OSM</fixed-case> Systems at <fixed-case>WMT</fixed-case>13</title>
       <author><first>Nadir</first><last>Durrani</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <author><first>Helmut</first><last>Schmid</last></author>
       <author><first>Hassan</first><last>Sajjad</last></author>
-      <author><first>Richárd</first><last>Farkas</last></author>
+      <author id="richard-farkas"><first>Richárd</first><last>Farkas</last></author>
       <pages>122–127</pages>
       <url hash="1f9be9b3">W13-2213</url>
       <bibkey>durrani-etal-2013-munich</bibkey>
@@ -3226,7 +3226,7 @@
       <title>Towards Efficient Large-Scale Feature-Rich Statistical Machine Translation</title>
       <author><first>Vladimir</first><last>Eidelman</last></author>
       <author><first>Ke</first><last>Wu</last></author>
-      <author><first>Ferhan</first><last>Ture</last></author>
+      <author id="ferhan-ture"><first>Ferhan</first><last>Ture</last></author>
       <author><first>Philip</first><last>Resnik</last></author>
       <author><first>Jimmy</first><last>Lin</last></author>
       <pages>128–133</pages>
@@ -3235,12 +3235,12 @@
     </paper>
     <paper id="15">
       <title>The <fixed-case>TALP</fixed-case>-<fixed-case>UPC</fixed-case> Phrase-Based Translation Systems for <fixed-case>WMT</fixed-case>13: System Combination with Morphology Generation, Domain Adaptation and Corpus Filtering</title>
-      <author><first>Lluís</first><last>Formiga</last></author>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
-      <author><first>José B.</first><last>Mariño</last></author>
-      <author><first>José A. R.</first><last>Fonollosa</last></author>
+      <author id="lluis-formiga"><first>Lluís</first><last>Formiga</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="jose-b-marino"><first>José B.</first><last>Mariño</last></author>
+      <author id="jose-a-r-fonollosa"><first>José A. R.</first><last>Fonollosa</last></author>
       <author><first>Alberto</first><last>Barrón-Cedeño</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <pages>134–140</pages>
       <url hash="e2b50512">W13-2215</url>
       <bibkey>formiga-etal-2013-talp</bibkey>
@@ -3261,10 +3261,10 @@
       <author><first>Kevin</first><last>Reschke</last></author>
       <author><first>Rob</first><last>Voigt</last></author>
       <author><first>John</first><last>Bauer</last></author>
-      <author><first>Sida</first><last>Wang</last></author>
+      <author id="sida-i-wang"><first>Sida</first><last>Wang</last></author>
       <author><first>Natalia</first><last>Silveira</last></author>
       <author><first>Julia</first><last>Neidert</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>148–153</pages>
       <url hash="44a6946a">W13-2217</url>
       <bibkey>green-etal-2013-feature</bibkey>
@@ -3273,7 +3273,7 @@
       <title>Factored Machine Translation Systems for <fixed-case>R</fixed-case>ussian-<fixed-case>E</fixed-case>nglish</title>
       <author><first>Stéphane</first><last>Huet</last></author>
       <author><first>Elena</first><last>Manishina</last></author>
-      <author><first>Fabrice</first><last>Lefèvre</last></author>
+      <author id="fabrice-lefevre"><first>Fabrice</first><last>Lefèvre</last></author>
       <pages>154–157</pages>
       <url hash="e7e3a332">W13-2218</url>
       <bibkey>huet-etal-2013-factored</bibkey>
@@ -3288,7 +3288,7 @@
     </paper>
     <paper id="20">
       <title>Pre-Reordering for Machine Translation Using Transition-Based Walks on Dependency Parse Trees</title>
-      <author><first>Antonio Valerio</first><last>Miceli-Barone</last></author>
+      <author id="antonio-valerio-miceli-barone"><first>Antonio Valerio</first><last>Miceli-Barone</last></author>
       <author><first>Giuseppe</first><last>Attardi</last></author>
       <pages>164–169</pages>
       <url hash="b7fc1a85">W13-2220</url>
@@ -3296,7 +3296,7 @@
     </paper>
     <paper id="21">
       <title><fixed-case>E</fixed-case>dinburgh’s Syntax-Based Machine Translation Systems</title>
-      <author><first>Maria</first><last>Nadejde</last></author>
+      <author id="maria-nadejde"><first>Maria</first><last>Nadejde</last></author>
       <author><first>Philip</first><last>Williams</last></author>
       <author><first>Philipp</first><last>Koehn</last></author>
       <pages>170–176</pages>
@@ -3307,7 +3307,7 @@
       <title>Shallow Semantically-Informed <fixed-case>PBSMT</fixed-case> and <fixed-case>HPBSMT</fixed-case></title>
       <author><first>Tsuyoshi</first><last>Okita</last></author>
       <author><first>Qun</first><last>Liu</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>177–184</pages>
       <url hash="bdd6c539">W13-2222</url>
       <bibkey>okita-etal-2013-shallow</bibkey>
@@ -3318,15 +3318,15 @@
       <author><first>Saab</first><last>Mansour</last></author>
       <author><first>Matthias</first><last>Huck</last></author>
       <author><first>Markus</first><last>Freitag</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <author><first>Eunah</first><last>Cho</last></author>
       <author><first>Teresa</first><last>Herrmann</last></author>
       <author><first>Mohammed</first><last>Mediani</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <author><first>Alexander</first><last>Allauzen</last></author>
-      <author><first>Quoc Khanh</first><last>Do</last></author>
-      <author><first>Bianka</first><last>Buschbeck</last></author>
+      <author id="quoc-khanh-do"><first>Quoc Khanh</first><last>Do</last></author>
+      <author id="bianka-buschbeck"><first>Bianka</first><last>Buschbeck</last></author>
       <author><first>Tonio</first><last>Wandmacher</last></author>
       <pages>185–192</pages>
       <url hash="fad3ea33">W13-2223</url>
@@ -3341,7 +3341,7 @@
       <author><first>Joern</first><last>Wuebker</last></author>
       <author><first>Matthias</first><last>Huck</last></author>
       <author><first>Markus</first><last>Freitag</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>193–199</pages>
       <url hash="df549311">W13-2224</url>
       <bibkey>peitz-etal-2013-rwth</bibkey>
@@ -3351,7 +3351,7 @@
       <author><first>Juan</first><last>Pino</last></author>
       <author><first>Aurelien</first><last>Waite</last></author>
       <author><first>Tong</first><last>Xiao</last></author>
-      <author><first>Adrià</first><last>de Gispert</last></author>
+      <author id="adria-de-gispert"><first>Adrià</first><last>de Gispert</last></author>
       <author><first>Federico</first><last>Flego</last></author>
       <author id="bill-byrne"><first>William</first><last>Byrne</last></author>
       <pages>200–205</pages>
@@ -3372,9 +3372,9 @@
     </paper>
     <paper id="27">
       <title>The <fixed-case>CNGL</fixed-case>-<fixed-case>DCU</fixed-case>-<fixed-case>P</fixed-case>rompsit Translation Systems for <fixed-case>WMT</fixed-case>13</title>
-      <author><first>Raphael</first><last>Rubino</last></author>
+      <author id="raphael-rubino"><first>Raphael</first><last>Rubino</last></author>
       <author><first>Antonio</first><last>Toral</last></author>
-      <author><first>Santiago</first><last>Cortés Vaíllo</last></author>
+      <author id="santiago-cortes-vaillo"><first>Santiago</first><last>Cortés Vaíllo</last></author>
       <author><first>Jun</first><last>Xie</last></author>
       <author><first>Xiaofeng</first><last>Wu</last></author>
       <author><first>Stephen</first><last>Doherty</last></author>
@@ -3388,7 +3388,7 @@
       <author><first>Hassan</first><last>Sajjad</last></author>
       <author><first>Svetlana</first><last>Smekalova</last></author>
       <author><first>Nadir</first><last>Durrani</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <author><first>Helmut</first><last>Schmid</last></author>
       <pages>219–224</pages>
       <url hash="2bb9ee3c">W13-2228</url>
@@ -3398,7 +3398,7 @@
       <title>Tunable Distortion Limits and Corpus Cleaning for <fixed-case>SMT</fixed-case></title>
       <author><first>Sara</first><last>Stymne</last></author>
       <author><first>Christian</first><last>Hardmeier</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <author><first>Joakim</first><last>Nivre</last></author>
       <pages>225–231</pages>
       <url hash="b3e1aa51">W13-2229</url>
@@ -3409,11 +3409,11 @@
       <author><first>Marion</first><last>Weller</last></author>
       <author><first>Max</first><last>Kisselew</last></author>
       <author><first>Svetlana</first><last>Smekalova</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <author><first>Helmut</first><last>Schmid</last></author>
       <author><first>Nadir</first><last>Durrani</last></author>
       <author><first>Hassan</first><last>Sajjad</last></author>
-      <author><first>Richárd</first><last>Farkas</last></author>
+      <author id="richard-farkas"><first>Richárd</first><last>Farkas</last></author>
       <pages>232–239</pages>
       <url hash="d1f1b4a4">W13-2230</url>
       <bibkey>weller-etal-2013-munich</bibkey>
@@ -3421,7 +3421,7 @@
     <paper id="31">
       <title>Coping with the Subjectivity of Human Judgements in <fixed-case>MT</fixed-case> Quality Estimation</title>
       <author><first>Marco</first><last>Turchi</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marcello</first><last>Federico</last></author>
       <pages>240–251</pages>
       <url hash="b7947994">W13-2231</url>
@@ -3430,7 +3430,7 @@
     <paper id="32">
       <title>Online Polylingual Topic Models for Fast Document Translation Detection</title>
       <author><first>Kriste</first><last>Krstovski</last></author>
-      <author><first>David A.</first><last>Smith</last></author>
+      <author id="david-a-smith"><first>David A.</first><last>Smith</last></author>
       <pages>252–261</pages>
       <url hash="b9f8ed7c">W13-2232</url>
       <bibkey>krstovski-smith-2013-online</bibkey>
@@ -3446,8 +3446,8 @@
     <paper id="34">
       <title>Generating <fixed-case>E</fixed-case>nglish Determiners in Phrase-Based Translation with Synthetic Translation Options</title>
       <author><first>Yulia</first><last>Tsvetkov</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
       <author><first>Archna</first><last>Bhatia</last></author>
       <pages>271–280</pages>
       <url hash="6f05f604">W13-2234</url>
@@ -3455,7 +3455,7 @@
     </paper>
     <paper id="35">
       <title>Dramatically Reducing Training Data Size Through Vocabulary Saturation</title>
-      <author><first>William</first><last>Lewis</last></author>
+      <author id="william-lewis"><first>William</first><last>Lewis</last></author>
       <author><first>Sauleh</first><last>Eetemadi</last></author>
       <pages>281–291</pages>
       <url hash="68b0e621">W13-2235</url>
@@ -3481,7 +3481,7 @@
     <paper id="38">
       <title>Length-Incremental Phrase Training for <fixed-case>SMT</fixed-case></title>
       <author><first>Joern</first><last>Wuebker</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>309–319</pages>
       <url hash="77ba2ab8">W13-2238</url>
       <bibkey>wuebker-ney-2013-length</bibkey>
@@ -3489,8 +3489,8 @@
     <paper id="39">
       <title>Positive Diversity Tuning for Machine Translation System Combination</title>
       <author><first>Daniel</first><last>Cer</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <pages>320–328</pages>
       <url hash="803d5f16">W13-2239</url>
       <bibkey>cer-etal-2013-positive</bibkey>
@@ -3498,16 +3498,16 @@
     <paper id="40">
       <title>Selecting Feature Sets for Comparative and Time-Oriented Quality Estimation of Machine Translation Output</title>
       <author><first>Eleftherios</first><last>Avramidis</last></author>
-      <author><first>Maja</first><last>Popović</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
       <pages>329–336</pages>
       <url hash="d096b876">W13-2240</url>
       <bibkey>avramidis-popovic-2013-selecting</bibkey>
     </paper>
     <paper id="41">
       <title><fixed-case>SHEF</fixed-case>-<fixed-case>L</fixed-case>ite: When Less is More for Translation Quality Estimation</title>
-      <author><first>Daniel</first><last>Beck</last></author>
+      <author id="daniel-beck"><first>Daniel</first><last>Beck</last></author>
       <author><first>Kashif</first><last>Shah</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>337–342</pages>
       <url hash="f4c63c0b">W13-2241</url>
@@ -3515,28 +3515,28 @@
     </paper>
     <paper id="42">
       <title>Referential Translation Machines for Quality Estimation</title>
-      <author><first>Ergun</first><last>Biçici</last></author>
+      <author id="ergun-bicici"><first>Ergun</first><last>Biçici</last></author>
       <pages>343–351</pages>
       <url hash="55b36353">W13-2242</url>
       <bibkey>bicici-2013-referential</bibkey>
     </paper>
     <paper id="43">
       <title><fixed-case>FBK</fixed-case>-<fixed-case>UE</fixed-case>din Participation to the <fixed-case>WMT</fixed-case>13 Quality Estimation Shared Task</title>
-      <author><first>José Guilherme</first><last>Camargo de Souza</last></author>
+      <author id="jose-g-c-de-souza"><first>José Guilherme</first><last>Camargo de Souza</last></author>
       <author><first>Christian</first><last>Buck</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <pages>352–358</pages>
       <url hash="2805b49f">W13-2243</url>
       <bibkey>camargo-de-souza-etal-2013-fbk</bibkey>
     </paper>
     <paper id="44">
       <title>The <fixed-case>TALP</fixed-case>-<fixed-case>UPC</fixed-case> Approach to System Selection: <fixed-case>A</fixed-case>siya Features and Pairwise Classification Using Random Forests</title>
-      <author><first>Lluís</first><last>Formiga</last></author>
-      <author><first>Meritxell</first><last>Gonzàlez</last></author>
+      <author id="lluis-formiga"><first>Lluís</first><last>Formiga</last></author>
+      <author id="meritxell-gonzalez"><first>Meritxell</first><last>Gonzàlez</last></author>
       <author><first>Alberto</first><last>Barrón-Cedeño</last></author>
-      <author><first>José A. R.</first><last>Fonollosa</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="jose-a-r-fonollosa"><first>José A. R.</first><last>Fonollosa</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <pages>359–364</pages>
       <url hash="19259bfb">W13-2244</url>
       <bibkey>formiga-etal-2013-talp-upc</bibkey>
@@ -3555,8 +3555,8 @@
     </paper>
     <paper id="46">
       <title><fixed-case>MT</fixed-case> Quality Estimation: The <fixed-case>CMU</fixed-case> System for <fixed-case>WMT</fixed-case>‘13</title>
-      <author><first>Silja</first><last>Hildebrand</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="almut-silja-hildebrand"><first>Silja</first><last>Hildebrand</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>373–379</pages>
       <url hash="6530713b">W13-2246</url>
       <bibkey>hildebrand-vogel-2013-mt</bibkey>
@@ -3564,27 +3564,27 @@
     <paper id="47">
       <title><fixed-case>LORIA</fixed-case> System for the <fixed-case>WMT</fixed-case>13 Quality Estimation Shared Task</title>
       <author><first>David</first><last>Langlois</last></author>
-      <author><first>Kamel</first><last>Smaïli</last></author>
+      <author id="kamel-smaili"><first>Kamel</first><last>Smaïli</last></author>
       <pages>380–385</pages>
       <url hash="7bceb6f7">W13-2247</url>
       <bibkey>langlois-smaili-2013-loria</bibkey>
     </paper>
     <paper id="48">
       <title><fixed-case>LIG</fixed-case> System for <fixed-case>WMT</fixed-case>13 <fixed-case>QE</fixed-case> Task: Investigating the Usefulness of Features in Word Confidence Estimation for <fixed-case>MT</fixed-case></title>
-      <author><first>Ngoc-Quang</first><last>Luong</last></author>
+      <author id="ngoc-quang-luong"><first>Ngoc-Quang</first><last>Luong</last></author>
       <author><first>Benjamin</first><last>Lecouteux</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <pages>386–391</pages>
       <url hash="280348c7">W13-2248</url>
       <bibkey>luong-etal-2013-lig</bibkey>
     </paper>
     <paper id="49">
       <title><fixed-case>DCU</fixed-case>-<fixed-case>S</fixed-case>ymantec at the <fixed-case>WMT</fixed-case> 2013 Quality Estimation Shared Task</title>
-      <author><first>Raphael</first><last>Rubino</last></author>
+      <author id="raphael-rubino"><first>Raphael</first><last>Rubino</last></author>
       <author><first>Joachim</first><last>Wagner</last></author>
       <author><first>Jennifer</first><last>Foster</last></author>
       <author><first>Johann</first><last>Roturier</last></author>
-      <author><first>Rasoul</first><last>Samad Zadeh Kaljahi</last></author>
+      <author id="rasoul-samad-zadeh-kaljahi"><first>Rasoul</first><last>Samad Zadeh Kaljahi</last></author>
       <author><first>Fred</first><last>Hollowood</last></author>
       <pages>392–397</pages>
       <url hash="bdd66974">W13-2249</url>
@@ -3592,7 +3592,7 @@
     </paper>
     <paper id="50">
       <title><fixed-case>LIMSI</fixed-case> Submission for the <fixed-case>WMT</fixed-case>‘13 Quality Estimation Task: an Experiment with N-Gram Posteriors</title>
-      <author><first>Anil Kumar</first><last>Singh</last></author>
+      <author id="anil-kumar-singh"><first>Anil Kumar</first><last>Singh</last></author>
       <author><first>Guillaume</first><last>Wisniewski</last></author>
       <author><first>François</first><last>Yvon</last></author>
       <pages>398–404</pages>
@@ -3637,7 +3637,7 @@
     <paper id="55">
       <title>An Approach Using Style Classification Features for Quality Estimation</title>
       <author><first>Erwan</first><last>Moreau</last></author>
-      <author><first>Raphael</first><last>Rubino</last></author>
+      <author id="raphael-rubino"><first>Raphael</first><last>Rubino</last></author>
       <pages>429–434</pages>
       <url hash="eb023299">W13-2255</url>
       <bibkey>moreau-rubino-2013-approach</bibkey>
@@ -3664,7 +3664,7 @@
       <author><first>Matthias</first><last>Huck</last></author>
       <author><first>Joern</first><last>Wuebker</last></author>
       <author><first>Felix</first><last>Rietig</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>452–463</pages>
       <url hash="93d3500d">W13-2258</url>
       <bibkey>huck-etal-2013-phrase</bibkey>
@@ -3687,7 +3687,7 @@
     </paper>
     <paper id="61">
       <title>Evaluating (and Improving) Sentence Alignment under Noisy Conditions</title>
-      <author><first>Omar</first><last>Zaidan</last></author>
+      <author id="omar-zaidan"><first>Omar</first><last>Zaidan</last></author>
       <author><first>Vishal</first><last>Chowdhary</last></author>
       <pages>484–493</pages>
       <url hash="788f66b1">W13-2261</url>
@@ -3706,7 +3706,7 @@
       <title>Hidden <fixed-case>M</fixed-case>arkov Tree Model for Word Alignment</title>
       <author><first>Shuhei</first><last>Kondo</last></author>
       <author><first>Kevin</first><last>Duh</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>503–511</pages>
       <url hash="be5fc2ca">W13-2263</url>
       <bibkey>kondo-etal-2013-hidden</bibkey>
@@ -3714,7 +3714,7 @@
     <paper id="64">
       <title>An <fixed-case>MT</fixed-case> Error-Driven Discriminative Word Lexicon using Sentence Structure Features</title>
       <author><first>Jan</first><last>Niehues</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>512–520</pages>
       <url hash="489f93a4">W13-2264</url>
       <bibkey>niehues-waibel-2013-mt</bibkey>
@@ -3724,7 +3724,7 @@
     <meta>
       <booktitle>Proceedings of the 7th Linguistic Annotation Workshop and Interoperability with Discourse</booktitle>
       <url hash="602df61b">W13-23</url>
-      <editor><first>Antonio</first><last>Pareja-Lora</last></editor>
+      <editor id="antonio-pareja-lora"><first>Antonio</first><last>Pareja-Lora</last></editor>
       <editor><first>Maria</first><last>Liakata</last></editor>
       <editor><first>Stefanie</first><last>Dipper</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -3743,7 +3743,7 @@
       <author><first>Nizar</first><last>Habash</last></author>
       <author><first>Ann</first><last>Bies</last></author>
       <author><first>Seth</first><last>Kulick</last></author>
-      <author><first>Mohamed</first><last>Maamouri</last></author>
+      <author id="mohamed-maamouri"><first>Mohamed</first><last>Maamouri</last></author>
       <pages>1–10</pages>
       <url hash="b76c8267">W13-2301</url>
       <revision id="1" href="W13-2301v1" hash="765c67bf"/>
@@ -3759,7 +3759,7 @@
     </paper>
     <paper id="3">
       <title>Utilizing State-of-the-art Parsers to Diagnose Problems in Treebank Annotation for a Less Resourced Language</title>
-      <author><first>Quy</first><last>Nguyen</last></author>
+      <author id="quy-nguyen"><first>Quy</first><last>Nguyen</last></author>
       <author><first>Ngan</first><last>Nguyen</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
       <pages>19–27</pages>
@@ -3776,7 +3776,7 @@
     <paper id="5">
       <title>Continuous Measurement Scales in Human Evaluation of Machine Translation</title>
       <author><first>Yvette</first><last>Graham</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Alistair</first><last>Moffat</last></author>
       <author><first>Justin</first><last>Zobel</last></author>
       <pages>33–41</pages>
@@ -3799,8 +3799,8 @@
       <author><first>Naomi</first><last>Saphra</last></author>
       <author><first>David</first><last>Bamman</last></author>
       <author><first>Manaal</first><last>Faruqui</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <author><first>Jason</first><last>Baldridge</last></author>
       <pages>51–60</pages>
       <url hash="827fa242">W13-2307</url>
@@ -3809,7 +3809,7 @@
     <paper id="8">
       <title>Converting <fixed-case>I</fixed-case>talian Treebanks: Towards an <fixed-case>I</fixed-case>talian <fixed-case>S</fixed-case>tanford Dependency Treebank</title>
       <author><first>Cristina</first><last>Bosco</last></author>
-      <author><first>Simonetta</first><last>Montemagni</last></author>
+      <author id="simonetta-montemagni"><first>Simonetta</first><last>Montemagni</last></author>
       <author><first>Maria</first><last>Simi</last></author>
       <pages>61–69</pages>
       <url hash="a1f6421b">W13-2308</url>
@@ -3820,7 +3820,7 @@
       <author><first>Hen-Hsen</first><last>Huang</last></author>
       <author><first>Chi-Hsin</first><last>Yu</last></author>
       <author><first>Tai-Wei</first><last>Chang</last></author>
-      <author><first>Cong-Kai</first><last>Lin</last></author>
+      <author id="cong-kai-lin"><first>Cong-Kai</first><last>Lin</last></author>
       <author><first>Hsin-Hsi</first><last>Chen</last></author>
       <pages>70–78</pages>
       <url hash="25290fca">W13-2309</url>
@@ -3830,7 +3830,7 @@
       <title>Towards a Better Understanding of Discourse: Integrating Multiple Discourse Annotation Perspectives Using <fixed-case>UIMA</fixed-case></title>
       <author><first>Claudiu</first><last>Mihăilă</last></author>
       <author><first>Georgios</first><last>Kontonatsios</last></author>
-      <author><first>Riza Theresa</first><last>Batista-Navarro</last></author>
+      <author id="riza-theresa-batista-navarro"><first>Riza Theresa</first><last>Batista-Navarro</last></author>
       <author><first>Paul</first><last>Thompson</last></author>
       <author><first>Ioannis</first><last>Korkontzelos</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
@@ -3849,7 +3849,7 @@
     <paper id="12">
       <title>Importing <fixed-case>MASC</fixed-case> into the <fixed-case>ANNIS</fixed-case> linguistic database: A case study of mapping <fixed-case>G</fixed-case>r<fixed-case>AF</fixed-case></title>
       <author><first>Arne</first><last>Neumann</last></author>
-      <author><first>Nancy</first><last>Ide</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
       <author><first>Manfred</first><last>Stede</last></author>
       <pages>98–102</pages>
       <url hash="fab5e88e">W13-2312</url>
@@ -3873,9 +3873,9 @@
     </paper>
     <paper id="15">
       <title>Applicative Structures and Immediate Discourse in the <fixed-case>T</fixed-case>urkish Discourse Bank</title>
-      <author><first>Isin</first><last>Demirşahin</last></author>
+      <author id="isin-demirsahin"><first>Isin</first><last>Demirşahin</last></author>
       <author><first>Adnan</first><last>Öztürel</last></author>
-      <author><first>Cem</first><last>Bozşahin</last></author>
+      <author id="cem-bozsahin"><first>Cem</first><last>Bozşahin</last></author>
       <author><first>Deniz</first><last>Zeyrek</last></author>
       <pages>122–130</pages>
       <url hash="18335488">W13-2315</url>
@@ -3883,11 +3883,11 @@
     </paper>
     <paper id="16">
       <title><fixed-case>TURKSENT</fixed-case>: A Sentiment Annotation Tool for Social Media</title>
-      <author><first>Gülşen</first><last>Eryiǧit</last></author>
+      <author id="gulsen-eryigit"><first>Gülşen</first><last>Eryiǧit</last></author>
       <author><first>Fatih Samet</first><last>Çetin</last></author>
       <author><first>Meltem</first><last>Yanık</last></author>
       <author><first>Tanel</first><last>Temel</last></author>
-      <author><first>İlyas</first><last>Çiçekli</last></author>
+      <author id="ilyas-cicekli"><first>İlyas</first><last>Çiçekli</last></author>
       <pages>131–134</pages>
       <url hash="34edd08e">W13-2316</url>
       <bibkey>eryigit-etal-2013-turksent</bibkey>
@@ -3897,17 +3897,17 @@
       <author><first>Stephen</first><last>Tratz</last></author>
       <author><first>Douglas</first><last>Briesch</last></author>
       <author><first>Jamal</first><last>Laoudi</last></author>
-      <author><first>Clare</first><last>Voss</last></author>
+      <author id="clare-voss"><first>Clare</first><last>Voss</last></author>
       <pages>135–139</pages>
       <url hash="347f4d7c">W13-2317</url>
       <bibkey>tratz-etal-2013-tweet</bibkey>
     </paper>
     <paper id="18">
       <title>Relation Annotation for Understanding Research Papers</title>
-      <author><first>Yuka</first><last>Tateisi</last></author>
+      <author id="yuka-tateisi"><first>Yuka</first><last>Tateisi</last></author>
       <author><first>Yo</first><last>Shidahara</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>140–148</pages>
       <url hash="f13757e7">W13-2318</url>
       <bibkey>tateisi-etal-2013-relation</bibkey>
@@ -3926,22 +3926,22 @@
     <paper id="20">
       <title><fixed-case>A</fixed-case>nimacy Annotation in the <fixed-case>H</fixed-case>indi Treebank</title>
       <author><first>Itisree</first><last>Jena</last></author>
-      <author><first>Riyaz Ahmad</first><last>Bhat</last></author>
+      <author id="riyaz-ahmad-bhat"><first>Riyaz Ahmad</first><last>Bhat</last></author>
       <author><first>Sambhav</first><last>Jain</last></author>
-      <author><first>Dipti Misra</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></author>
       <pages>159–167</pages>
       <url hash="b7ad3cfd">W13-2320</url>
       <bibkey>jena-etal-2013-animacy</bibkey>
     </paper>
     <paper id="21">
       <title>Automatic Named Entity Pre-annotation for Out-of-domain Human Annotation</title>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <author><first>Cyril</first><last>Grouin</last></author>
       <author><first>Thomas</first><last>Lavergne</last></author>
       <author><first>Mohamed</first><last>Ben Jannet</last></author>
-      <author><first>Jérémy</first><last>Leixa</last></author>
+      <author id="jeremy-leixa"><first>Jérémy</first><last>Leixa</last></author>
       <author><first>Olivier</first><last>Galibert</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <pages>168–177</pages>
       <url hash="0b92c180">W13-2321</url>
       <bibkey>rosset-etal-2013-automatic</bibkey>
@@ -3949,14 +3949,14 @@
     <paper id="22">
       <title><fixed-case>A</fixed-case>bstract <fixed-case>M</fixed-case>eaning <fixed-case>R</fixed-case>epresentation for Sembanking</title>
       <author><first>Laura</first><last>Banarescu</last></author>
-      <author><first>Claire</first><last>Bonial</last></author>
+      <author id="claire-bonial"><first>Claire</first><last>Bonial</last></author>
       <author><first>Shu</first><last>Cai</last></author>
       <author><first>Madalina</first><last>Georgescu</last></author>
       <author><first>Kira</first><last>Griffitt</last></author>
       <author><first>Ulf</first><last>Hermjakob</last></author>
       <author><first>Kevin</first><last>Knight</last></author>
       <author><first>Philipp</first><last>Koehn</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Nathan</first><last>Schneider</last></author>
       <pages>178–186</pages>
       <url hash="4ea321a7">W13-2322</url>
@@ -3964,7 +3964,7 @@
     </paper>
     <paper id="23">
       <title>The Benefits of a Model of Annotation</title>
-      <author><first>Rebecca J.</first><last>Passonneau</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca J.</first><last>Passonneau</last></author>
       <author><first>Bob</first><last>Carpenter</last></author>
       <pages>187–195</pages>
       <url hash="17e10e1c">W13-2323</url>
@@ -4008,7 +4008,7 @@
       <author><first>Amália</first><last>Mendes</last></author>
       <author><first>Iris</first><last>Hendrickx</last></author>
       <author><first>Agostinho</first><last>Salgueiro</last></author>
-      <author><first>Luciana</first><last>Ávila</last></author>
+      <author id="luciana-beatriz-avila"><first>Luciana</first><last>Ávila</last></author>
       <pages>228–237</pages>
       <url hash="280e94ad">W13-2328</url>
       <bibkey>mendes-etal-2013-annotating</bibkey>
@@ -4020,7 +4020,7 @@
       <url hash="631d417d">W13-24</url>
       <editor><first>Jakub</first><last>Piskorski</last></editor>
       <editor><first>Lidia</first><last>Pivovarova</last></editor>
-      <editor><first>Hristo</first><last>Tanev</last></editor>
+      <editor id="hristo-tanev"><first>Hristo</first><last>Tanev</last></editor>
       <editor><first>Roman</first><last>Yangarber</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Sofia, Bulgaria</address>
@@ -4034,14 +4034,14 @@
     </frontmatter>
     <paper id="1">
       <title>Invited Talk: Ontologies and Linked Open Data for Acquisition and Exploitation of Language Resources</title>
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <pages>1</pages>
       <url hash="8a49d136">W13-2401</url>
       <bibkey>simov-2013-invited</bibkey>
     </paper>
     <paper id="2">
       <title>A Comparison of Approaches for Sentiment Classification on <fixed-case>L</fixed-case>ithuanian <fixed-case>I</fixed-case>nternet Comments</title>
-      <author><first>Jurgita</first><last>Kapočiūtė-Dzikienė</last></author>
+      <author id="jurgita-kapociute-dzikiene"><first>Jurgita</first><last>Kapočiūtė-Dzikienė</last></author>
       <author><first>Algis</first><last>Krupavičius</last></author>
       <author><first>Tomas</first><last>Krilavičius</last></author>
       <pages>2–11</pages>
@@ -4051,7 +4051,7 @@
     <paper id="3">
       <title>Evaluating Sentiment Analysis Systems in <fixed-case>R</fixed-case>ussian</title>
       <author><first>Ilia</first><last>Chetviorkin</last></author>
-      <author><first>Natalia</first><last>Loukachevitch</last></author>
+      <author id="natalia-loukachevitch"><first>Natalia</first><last>Loukachevitch</last></author>
       <pages>12–17</pages>
       <url hash="a28516b6">W13-2403</url>
       <bibkey>chetviorkin-loukachevitch-2013-evaluating</bibkey>
@@ -4067,7 +4067,7 @@
     </paper>
     <paper id="5">
       <title>Frequently Asked Questions Retrieval for <fixed-case>C</fixed-case>roatian Based on Semantic Textual Similarity</title>
-      <author><first>Vanja Mladen</first><last>Karan</last></author>
+      <author id="vanja-m-karan"><first>Vanja Mladen</first><last>Karan</last></author>
       <author><first>Lovro</first><last>Žmak</last></author>
       <author><first>Jan</first><last>Šnajder</last></author>
       <pages>24–33</pages>
@@ -4094,7 +4094,7 @@
     </paper>
     <paper id="8">
       <title>Lemmatization and Morphosyntactic Tagging of <fixed-case>C</fixed-case>roatian and <fixed-case>S</fixed-case>erbian</title>
-      <author><first>Željko</first><last>Agić</last></author>
+      <author id="zeljko-agic"><first>Željko</first><last>Agić</last></author>
       <author><first>Nikola</first><last>Ljubešić</last></author>
       <author><first>Danijela</first><last>Merkler</last></author>
       <pages>48–57</pages>
@@ -4104,7 +4104,7 @@
     <paper id="9">
       <title>Modernizing historical <fixed-case>S</fixed-case>lovene words with character-based <fixed-case>SMT</fixed-case></title>
       <author><first>Yves</first><last>Scherrer</last></author>
-      <author><first>Tomaž</first><last>Erjavec</last></author>
+      <author id="tomaz-erjavec"><first>Tomaž</first><last>Erjavec</last></author>
       <pages>58–62</pages>
       <url hash="d5dcb91c">W13-2409</url>
       <bibkey>scherrer-erjavec-2013-modernizing</bibkey>
@@ -4181,7 +4181,7 @@
       <booktitle>Proceedings of the Sixth Workshop on Building and Using Comparable Corpora</booktitle>
       <url hash="2c963ebb">W13-25</url>
       <editor><first>Serge</first><last>Sharoff</last></editor>
-      <editor><first>Pierre</first><last>Zweigenbaum</last></editor>
+      <editor id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></editor>
       <editor><first>Reinhard</first><last>Rapp</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Sofia, Bulgaria</address>
@@ -4269,7 +4269,7 @@
       <title>Improving <fixed-case>MT</fixed-case> System Using Extracted Parallel Fragments of Text from Comparable Corpora</title>
       <author><first>Rajdeep</first><last>Gupta</last></author>
       <author><first>Santanu</first><last>Pal</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>69–76</pages>
       <url hash="a572c425">W13-2509</url>
       <bibkey>gupta-etal-2013-improving</bibkey>
@@ -4296,7 +4296,7 @@
       <author><first>Georgios</first><last>Kontonatsios</last></author>
       <author><first>Ioannis</first><last>Korkontzelos</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>95–104</pages>
       <url hash="adcd03e5">W13-2512</url>
       <bibkey>kontonatsios-etal-2013-using</bibkey>
@@ -4305,14 +4305,14 @@
       <title>Comparing Multilingual Comparable Articles Based On Opinions</title>
       <author><first>Motaz</first><last>Saad</last></author>
       <author><first>David</first><last>Langlois</last></author>
-      <author><first>Kamel</first><last>Smaïli</last></author>
+      <author id="kamel-smaili"><first>Kamel</first><last>Smaïli</last></author>
       <pages>105–111</pages>
       <url hash="92f78f4c">W13-2513</url>
       <bibkey>saad-etal-2013-comparing</bibkey>
     </paper>
     <paper id="14">
       <title>Mining for Domain-specific Parallel Text from <fixed-case>W</fixed-case>ikipedia</title>
-      <author><first>Magdalena</first><last>Plamadă</last></author>
+      <author id="magdalena-plamada"><first>Magdalena</first><last>Plamadă</last></author>
       <author><first>Martin</first><last>Volk</last></author>
       <pages>112–120</pages>
       <url hash="b15fca06">W13-2514</url>
@@ -4322,7 +4322,7 @@
       <title>Gathering and Generating Paraphrases from <fixed-case>T</fixed-case>witter with Application to Normalization</title>
       <author><first>Wei</first><last>Xu</last></author>
       <author><first>Alan</first><last>Ritter</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <pages>121–128</pages>
       <url hash="e7596767">W13-2515</url>
       <bibkey>xu-etal-2013-gathering</bibkey>
@@ -4338,7 +4338,7 @@
       <title>Finding More Bilingual Webpages with High Credibility via Link Analysis</title>
       <author><first>Chengzhi</first><last>Zhang</last></author>
       <author><first>Xuchen</first><last>Yao</last></author>
-      <author><first>Chunyu</first><last>Kit</last></author>
+      <author id="chunyu-kit"><first>Chunyu</first><last>Kit</last></author>
       <pages>138–143</pages>
       <url hash="2554fc46">W13-2517</url>
       <bibkey>zhang-etal-2013-finding</bibkey>
@@ -4349,7 +4349,7 @@
       <booktitle>Proceedings of the Fourth Annual Workshop on Cognitive Modeling and Computational Linguistics (<fixed-case>CMCL</fixed-case>)</booktitle>
       <url hash="b448393b">W13-26</url>
       <editor><first>Vera</first><last>Demberg</last></editor>
-      <editor><first>Roger</first><last>Levy</last></editor>
+      <editor id="roger-levy"><first>Roger</first><last>Levy</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Sofia, Bulgaria</address>
       <month>August</month>
@@ -4363,7 +4363,7 @@
     <paper id="1">
       <title>Why is <fixed-case>E</fixed-case>nglish so easy to segment?</title>
       <author><first>Abdellah</first><last>Fourtassi</last></author>
-      <author><first>Benjamin</first><last>Börschinger</last></author>
+      <author id="benjamin-borschinger"><first>Benjamin</first><last>Börschinger</last></author>
       <author><first>Mark</first><last>Johnson</last></author>
       <author><first>Emmanuel</first><last>Dupoux</last></author>
       <pages>1–10</pages>
@@ -4382,7 +4382,7 @@
     <paper id="3">
       <title>Learning non-concatenative morphology</title>
       <author><first>Michelle</first><last>Fullwood</last></author>
-      <author><first>Tim</first><last>O’Donnell</last></author>
+      <author id="timothy-odonnell"><first>Tim</first><last>O’Donnell</last></author>
       <pages>21–27</pages>
       <url hash="e85e1c5a">W13-2603</url>
       <bibkey>fullwood-odonnell-2013-learning</bibkey>
@@ -4398,7 +4398,7 @@
     </paper>
     <paper id="5">
       <title>An Analysis of Memory-based Processing Costs using Incremental Deep Syntactic Dependency Parsing</title>
-      <author><first>Marten</first><last>van Schijndel</last></author>
+      <author id="marten-van-schijndel"><first>Marten</first><last>van Schijndel</last></author>
       <author><first>Luan</first><last>Nguyen</last></author>
       <author><first>William</first><last>Schuler</last></author>
       <pages>37–46</pages>
@@ -4416,7 +4416,7 @@
     </paper>
     <paper id="7">
       <title>The semantic augmentation of a psycholinguistically-motivated syntactic formalism</title>
-      <author><first>Asad</first><last>Sayeed</last></author>
+      <author id="asad-sayeed"><first>Asad</first><last>Sayeed</last></author>
       <author><first>Vera</first><last>Demberg</last></author>
       <pages>57–65</pages>
       <url hash="96960a1c">W13-2607</url>
@@ -4441,7 +4441,7 @@
     </paper>
     <paper id="10">
       <title>On the Information Conveyed by Discourse Markers</title>
-      <author><first>Fatemeh</first><last>Torabi Asr</last></author>
+      <author id="fatemeh-torabi-asr"><first>Fatemeh</first><last>Torabi Asr</last></author>
       <author><first>Vera</first><last>Demberg</last></author>
       <pages>84–93</pages>
       <url hash="2b97dca3">W13-2610</url>
@@ -4462,7 +4462,7 @@
       <booktitle>Proceedings of the 7th Workshop on Language Technology for Cultural Heritage, Social Sciences, and Humanities</booktitle>
       <url hash="a24aea4b">W13-27</url>
       <editor><first>Piroska</first><last>Lendvai</last></editor>
-      <editor><first>Kalliopi</first><last>Zervanou</last></editor>
+      <editor id="kalliopi-zervanou"><first>Kalliopi</first><last>Zervanou</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Sofia, Bulgaria</address>
       <month>August</month>
@@ -4477,10 +4477,10 @@
       <title>Generating Paths through Cultural Heritage Collections</title>
       <author><first>Samuel</first><last>Fernando</last></author>
       <author><first>Paula</first><last>Goodale</last></author>
-      <author><first>Paul</first><last>Clough</last></author>
+      <author id="paul-clough"><first>Paul</first><last>Clough</last></author>
       <author><first>Mark</first><last>Stevenson</last></author>
-      <author><first>Mark</first><last>Hall</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="mark-hall"><first>Mark</first><last>Hall</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <pages>1–10</pages>
       <url hash="5d32b57f">W13-2701</url>
       <bibkey>fernando-etal-2013-generating</bibkey>
@@ -4488,8 +4488,8 @@
     <paper id="2">
       <title>Using character overlap to improve language transformation</title>
       <author><first>Sander</first><last>Wubben</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <pages>11–19</pages>
       <url hash="b11ffd7b">W13-2702</url>
       <bibkey>wubben-etal-2013-using</bibkey>
@@ -4514,7 +4514,7 @@
     <paper id="5">
       <title>Language Technology for Agile Social Media Science</title>
       <author><first>Simon</first><last>Wibberley</last></author>
-      <author><first>David</first><last>Weir</last></author>
+      <author id="david-weir"><first>David</first><last>Weir</last></author>
       <author><first>Jeremy</first><last>Reffin</last></author>
       <pages>36–42</pages>
       <url hash="12d2d939">W13-2705</url>
@@ -4541,7 +4541,7 @@
     </paper>
     <paper id="8">
       <title>Towards a Tool for Interactive Concept Building for Large Scale Analysis in the Humanities</title>
-      <author><first>Andre</first><last>Blessing</last></author>
+      <author id="andre-blessing"><first>Andre</first><last>Blessing</last></author>
       <author><first>Jonathan</first><last>Sonntag</last></author>
       <author><first>Fritz</first><last>Kliche</last></author>
       <author><first>Ulrich</first><last>Heid</last></author>
@@ -4555,15 +4555,15 @@
       <title>Learning to Extract Folktale Keywords</title>
       <author><first>Dolf</first><last>Trieschnigg</last></author>
       <author><first>Dong</first><last>Nguyen</last></author>
-      <author><first>Mariët</first><last>Theune</last></author>
+      <author id="mariet-theune"><first>Mariët</first><last>Theune</last></author>
       <pages>65–73</pages>
       <url hash="a38f33d7">W13-2709</url>
       <bibkey>trieschnigg-etal-2013-learning</bibkey>
     </paper>
     <paper id="10">
       <title>Towards Creating Precision Grammars from Interlinear Glossed Text: Inferring Large-Scale Typological Properties</title>
-      <author><first>Emily M.</first><last>Bender</last></author>
-      <author><first>Michael Wayne</first><last>Goodman</last></author>
+      <author id="emily-m-bender"><first>Emily M.</first><last>Bender</last></author>
+      <author id="michael-wayne-goodman"><first>Michael Wayne</first><last>Goodman</last></author>
       <author><first>Joshua</first><last>Crowgey</last></author>
       <author><first>Fei</first><last>Xia</last></author>
       <pages>74–83</pages>
@@ -4573,7 +4573,7 @@
     <paper id="11">
       <title>Using Comparable Collections of Historical Texts for Building a Diachronic Dictionary for Spelling Normalization</title>
       <author><first>Marilisa</first><last>Amoia</last></author>
-      <author><first>Jose Manuel</first><last>Martinez</last></author>
+      <author id="jose-manuel-martinez"><first>Jose Manuel</first><last>Martinez</last></author>
       <pages>84–89</pages>
       <url hash="a680e781">W13-2711</url>
       <bibkey>amoia-martinez-2013-using</bibkey>
@@ -4595,11 +4595,11 @@
     </paper>
     <paper id="14">
       <title>Temporal classification for historical <fixed-case>R</fixed-case>omanian texts</title>
-      <author><first>Alina Maria</first><last>Ciobanu</last></author>
+      <author id="alina-maria-ciobanu"><first>Alina Maria</first><last>Ciobanu</last></author>
       <author><first>Anca</first><last>Dinu</last></author>
-      <author><first>Liviu</first><last>Dinu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu</first><last>Dinu</last></author>
       <author><first>Vlad</first><last>Niculae</last></author>
-      <author><first>Octavia-Maria</first><last>Şulea</last></author>
+      <author id="octavia-maria-sulea"><first>Octavia-Maria</first><last>Şulea</last></author>
       <pages>102–106</pages>
       <url hash="688f8ce2">W13-2714</url>
       <bibkey>ciobanu-etal-2013-temporal-classification</bibkey>
@@ -4620,11 +4620,11 @@
     <meta>
       <booktitle>Proceedings of the Second Workshop on Hybrid Approaches to Translation</booktitle>
       <url hash="200344dc">W13-28</url>
-      <editor><first>Marta Ruiz</first><last>Costa-jussà</last></editor>
+      <editor id="marta-r-costa-jussa"><first>Marta Ruiz</first><last>Costa-jussà</last></editor>
       <editor><first>Reinhard</first><last>Rapp</last></editor>
       <editor><first>Patrik</first><last>Lambert</last></editor>
       <editor><first>Kurt</first><last>Eberle</last></editor>
-      <editor><first>Rafael E.</first><last>Banchs</last></editor>
+      <editor id="rafael-e-banchs"><first>Rafael E.</first><last>Banchs</last></editor>
       <editor><first>Bogdan</first><last>Babych</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Sofia, Bulgaria</address>
@@ -4650,7 +4650,7 @@
     </paper>
     <paper id="2">
       <title>Statistical <fixed-case>MT</fixed-case> Systems Revisited: How much Hybridity do they have?</title>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>7</pages>
       <url hash="2721b158">W13-2802</url>
       <bibkey>ney-2013-statistical</bibkey>
@@ -4664,8 +4664,8 @@
     </paper>
     <paper id="4">
       <title>Machine Learning Disambiguation of <fixed-case>Q</fixed-case>uechua Verb Morphology</title>
-      <author><first>Annette</first><last>Rios Gonzales</last></author>
-      <author><first>Anne</first><last>Göhring</last></author>
+      <author id="annette-rios-gonzales"><first>Annette</first><last>Rios Gonzales</last></author>
+      <author id="anne-gohring"><first>Anne</first><last>Göhring</last></author>
       <pages>13–18</pages>
       <url hash="ce6c81b1">W13-2804</url>
       <bibkey>rios-gonzales-gohring-2013-machine</bibkey>
@@ -4673,7 +4673,7 @@
     <paper id="5">
       <title>Improvements to Syntax-based Machine Translation using Ensemble Dependency Parsers</title>
       <author><first>Nathan</first><last>Green</last></author>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
       <pages>19–24</pages>
       <url hash="1c7e5490">W13-2805</url>
       <bibkey>green-zabokrtsky-2013-improvements</bibkey>
@@ -4694,7 +4694,7 @@
       <author><first>Raj Nath</first><last>Patel</last></author>
       <author><first>Rohit</first><last>Gupta</last></author>
       <author><first>Prakash B.</first><last>Pimpale</last></author>
-      <author><first>Sasikumar</first><last>M</last></author>
+      <author id="sasikumar-m"><first>Sasikumar</first><last>M</last></author>
       <pages>34–41</pages>
       <url hash="7569f262">W13-2807</url>
       <bibkey>patel-etal-2013-reordering</bibkey>
@@ -4710,7 +4710,7 @@
     </paper>
     <paper id="9">
       <title>Controlled Ascent: Imbuing Statistical <fixed-case>MT</fixed-case> with Linguistic Knowledge</title>
-      <author><first>William</first><last>Lewis</last></author>
+      <author id="william-lewis"><first>William</first><last>Lewis</last></author>
       <author><first>Chris</first><last>Quirk</last></author>
       <pages>51–66</pages>
       <url hash="300d7361">W13-2809</url>
@@ -4729,7 +4729,7 @@
       <title>Integrating morpho-syntactic features in <fixed-case>E</fixed-case>nglish-<fixed-case>A</fixed-case>rabic statistical machine translation</title>
       <author><first>Ines</first><last>Turki Khemakhem</last></author>
       <author><first>Salma</first><last>Jamoussi</last></author>
-      <author><first>Abdelmajid</first><last>Ben Hamadou</last></author>
+      <author id="abdelmajid-ben-hamadou"><first>Abdelmajid</first><last>Ben Hamadou</last></author>
       <pages>74–81</pages>
       <url hash="d9ba8bb2">W13-2811</url>
       <bibkey>turki-khemakhem-etal-2013-integrating</bibkey>
@@ -4746,9 +4746,9 @@
     <paper id="13">
       <title>Building bilingual lexicon to create Dialect <fixed-case>T</fixed-case>unisian corpora and adapt language model</title>
       <author><first>Rahma</first><last>Boujelbane</last></author>
-      <author><first>Mariem</first><last>Ellouze khemekhem</last></author>
+      <author id="mariem-ellouze-khemekhem"><first>Mariem</first><last>Ellouze khemekhem</last></author>
       <author><first>Siwar</first><last>BenAyed</last></author>
-      <author><first>Lamia</first><last>Hadrich Belguith</last></author>
+      <author id="lamia-hadrich-belguith"><first>Lamia</first><last>Hadrich Belguith</last></author>
       <pages>88–93</pages>
       <url hash="a4f53577">W13-2813</url>
       <bibkey>boujelbane-etal-2013-building</bibkey>
@@ -4756,15 +4756,15 @@
     <paper id="14">
       <title>A Hybrid Word Alignment Model for Phrase-Based Statistical Machine Translation</title>
       <author><first>Santanu</first><last>Pal</last></author>
-      <author><first>Sudip</first><last>Naskar</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip</first><last>Naskar</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>94–101</pages>
       <url hash="67a7ea98">W13-2814</url>
       <bibkey>pal-etal-2013-hybrid</bibkey>
     </paper>
     <paper id="15">
       <title>Lexical Selection for Hybrid <fixed-case>MT</fixed-case> with Sequence Labeling</title>
-      <author><first>Alex</first><last>Rudnick</last></author>
+      <author id="alex-rudnick"><first>Alex</first><last>Rudnick</last></author>
       <author><first>Michael</first><last>Gasser</last></author>
       <pages>102–108</pages>
       <url hash="0638b3f7">W13-2815</url>
@@ -4772,11 +4772,11 @@
     </paper>
     <paper id="16">
       <title>Two Approaches to Correcting Homophone Confusions in a Hybrid Machine Translation System</title>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Johanna</first><last>Gerlach</last></author>
       <author><first>Ulrich</first><last>Germann</last></author>
       <author><first>Barry</first><last>Haddow</last></author>
-      <author><first>Manny</first><last>Rayner</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
       <pages>109–116</pages>
       <url hash="cba5497a">W13-2816</url>
       <bibkey>bouillon-etal-2013-two</bibkey>
@@ -4868,7 +4868,7 @@
       <author><first>Tadayoshi</first><last>Hara</last></author>
       <author><first>Chen</first><last>Chen</last></author>
       <author><first>Yoshinobu</first><last>Kano</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>49–58</pages>
       <url hash="5a22afde">W13-2906</url>
       <bibkey>hara-etal-2013-modeling</bibkey>
@@ -4876,7 +4876,7 @@
     <paper id="7">
       <title>On The Applicability of Readability Models to Web Texts</title>
       <author><first>Sowmya</first><last>Vajjala</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <pages>59–68</pages>
       <url hash="d5836c34">W13-2907</url>
       <bibkey>vajjala-meurers-2013-applicability</bibkey>
@@ -4917,7 +4917,7 @@
     <paper id="1">
       <title>Distributions on <fixed-case>M</fixed-case>inimalist <fixed-case>G</fixed-case>rammar Derivations</title>
       <author><first>Tim</first><last>Hunter</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <pages>1–11</pages>
       <url hash="3be90c8b">W13-3001</url>
       <bibkey>hunter-dyer-2013-distributions</bibkey>
@@ -4949,7 +4949,7 @@
       <title>The <fixed-case>F</fixed-case>robenius Anatomy of Relative Pronouns</title>
       <author><first>Stephen</first><last>Clark</last></author>
       <author><first>Bob</first><last>Coecke</last></author>
-      <author><first>Mehrnoosh</first><last>Sadrzadeh</last></author>
+      <author id="mehrnoosh-sadrzadeh"><first>Mehrnoosh</first><last>Sadrzadeh</last></author>
       <pages>41–51</pages>
       <url hash="ba2f678e">W13-3005</url>
       <bibkey>clark-etal-2013-frobenius</bibkey>
@@ -4990,7 +4990,7 @@
     <paper id="10">
       <title>Investigating Connectivity and Consistency Criteria for Phrase Pair Extraction in Statistical Machine Translation</title>
       <author><first>Spyros</first><last>Martzoukos</last></author>
-      <author><first>Christophe</first><last>Costa Florêncio</last></author>
+      <author id="christophe-costa-florencio"><first>Christophe</first><last>Costa Florêncio</last></author>
       <author><first>Christof</first><last>Monz</last></author>
       <pages>93–101</pages>
       <url hash="6e65cc8c">W13-3010</url>
@@ -5021,7 +5021,7 @@
     <paper id="1">
       <title>Multi-document multilingual summarization corpus preparation, Part 1: <fixed-case>A</fixed-case>rabic, <fixed-case>E</fixed-case>nglish, <fixed-case>G</fixed-case>reek, <fixed-case>C</fixed-case>hinese, <fixed-case>R</fixed-case>omanian</title>
       <author><first>Lei</first><last>Li</last></author>
-      <author><first>Corina</first><last>Forascu</last></author>
+      <author id="corina-forascu"><first>Corina</first><last>Forascu</last></author>
       <author><first>Mahmoud</first><last>El-Haj</last></author>
       <author><first>George</first><last>Giannakopoulos</last></author>
       <pages>1–12</pages>
@@ -5048,8 +5048,8 @@
     <paper id="4">
       <title><fixed-case>ACL</fixed-case> 2013 <fixed-case>M</fixed-case>ulti<fixed-case>L</fixed-case>ing Pilot Overview</title>
       <author><first>Jeff</first><last>Kubina</last></author>
-      <author><first>John</first><last>Conroy</last></author>
-      <author><first>Judith</first><last>Schlesinger</last></author>
+      <author id="john-conroy"><first>John</first><last>Conroy</last></author>
+      <author id="judith-d-schlesinger"><first>Judith</first><last>Schlesinger</last></author>
       <pages>29–38</pages>
       <url hash="be5010ba">W13-3104</url>
       <bibkey>kubina-etal-2013-acl</bibkey>
@@ -5082,12 +5082,12 @@
     </paper>
     <paper id="8">
       <title>Multilingual Summarization: Dimensionality Reduction and a Step Towards Optimal Term Coverage</title>
-      <author><first>John</first><last>Conroy</last></author>
-      <author><first>Sashka T.</first><last>Davis</last></author>
+      <author id="john-conroy"><first>John</first><last>Conroy</last></author>
+      <author id="sashka-t-davis"><first>Sashka T.</first><last>Davis</last></author>
       <author><first>Jeff</first><last>Kubina</last></author>
       <author><first>Yi-Kai</first><last>Liu</last></author>
-      <author><first>Dianne P.</first><last>O’Leary</last></author>
-      <author><first>Judith D.</first><last>Schlesinger</last></author>
+      <author id="dianne-p-oleary"><first>Dianne P.</first><last>O’Leary</last></author>
+      <author id="judith-d-schlesinger"><first>Judith D.</first><last>Schlesinger</last></author>
       <pages>55–63</pages>
       <url hash="b628ed11">W13-3108</url>
       <bibkey>conroy-etal-2013-multilingual</bibkey>
@@ -5123,7 +5123,7 @@
       <url hash="45dd4038">W13-32</url>
       <editor><first>Alexandre</first><last>Allauzen</last></editor>
       <editor><first>Hugo</first><last>Larochelle</last></editor>
-      <editor><first>Christopher</first><last>Manning</last></editor>
+      <editor id="christopher-d-manning"><first>Christopher</first><last>Manning</last></editor>
       <editor><first>Richard</first><last>Socher</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Sofia, Bulgaria</address>
@@ -5138,7 +5138,7 @@
     <paper id="1">
       <title>Vector Space Semantic Parsing: A Framework for Compositional Vector Space Models</title>
       <author><first>Jayant</first><last>Krishnamurthy</last></author>
-      <author><first>Tom</first><last>Mitchell</last></author>
+      <author id="tom-mitchell"><first>Tom</first><last>Mitchell</last></author>
       <pages>1–10</pages>
       <url hash="5640e26f">W13-3201</url>
       <bibkey>krishnamurthy-mitchell-2013-vector</bibkey>
@@ -5147,7 +5147,7 @@
       <title>Learning from errors: Using vector-based compositional semantics for parse reranking</title>
       <author><first>Phong</first><last>Le</last></author>
       <author><first>Willem</first><last>Zuidema</last></author>
-      <author><first>Remko</first><last>Scha</last></author>
+      <author id="remko-scha"><first>Remko</first><last>Scha</last></author>
       <pages>11–19</pages>
       <url hash="fef208bd">W13-3202</url>
       <bibkey>le-etal-2013-learning</bibkey>
@@ -5159,7 +5159,7 @@
       <author><first>Huiying</first><last>Li</last></author>
       <author><first>Mrinmaya</first><last>Sachan</last></author>
       <author><first>Shashank</first><last>Srivastava</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>20–29</pages>
       <url hash="0ccd1b33">W13-3203</url>
       <bibkey>goyal-etal-2013-structured</bibkey>
@@ -5168,14 +5168,14 @@
       <title>Letter N-Gram-based Input Encoding for Continuous Space Language Models</title>
       <author><first>Henning</first><last>Sperr</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>30–39</pages>
       <url hash="7883701a">W13-3204</url>
       <bibkey>sperr-etal-2013-letter</bibkey>
     </paper>
     <paper id="5">
       <title>Transducing Sentences to Syntactic Feature Vectors: an Alternative Way to “Parse”?</title>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last></author>
       <author><first>Lorenzo</first><last>Dell’Arciprete</last></author>
       <pages>40–49</pages>
       <url hash="8d18a25c">W13-3205</url>
@@ -5183,9 +5183,9 @@
     </paper>
     <paper id="6">
       <title>General estimation and evaluation of compositional distributional semantic models</title>
-      <author><first>Georgiana</first><last>Dinu</last></author>
-      <author><first>Nghia The</first><last>Pham</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="georgiana-dinu"><first>Georgiana</first><last>Dinu</last></author>
+      <author id="nghia-the-pham"><first>Nghia The</first><last>Pham</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <pages>50–58</pages>
       <url hash="fc15ca67">W13-3206</url>
       <bibkey>dinu-etal-2013-general</bibkey>
@@ -5193,7 +5193,7 @@
     <paper id="7">
       <title>Applicative structure in vector space models</title>
       <author><first>Márton</first><last>Makrai</last></author>
-      <author><first>David Mark</first><last>Nemeskey</last></author>
+      <author id="david-mark-nemeskey"><first>David Mark</first><last>Nemeskey</last></author>
       <author><first>András</first><last>Kornai</last></author>
       <pages>59–63</pages>
       <url hash="989274f8">W13-3207</url>
@@ -5211,8 +5211,8 @@
     <paper id="9">
       <title>“Not not bad” is not “bad”: A distributional account of negation</title>
       <author><first>Karl Moritz</first><last>Hermann</last></author>
-      <author><first>Edward</first><last>Grefenstette</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
+      <author id="edward-grefenstette"><first>Edward</first><last>Grefenstette</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
       <pages>74–82</pages>
       <url hash="8fa22ff7">W13-3209</url>
       <bibkey>hermann-etal-2013-bad</bibkey>
@@ -5221,7 +5221,7 @@
       <title>Towards Dynamic Word Sense Discrimination with Random Indexing</title>
       <author><first>Hans</first><last>Moen</last></author>
       <author><first>Erwin</first><last>Marsi</last></author>
-      <author><first>Björn</first><last>Gambäck</last></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gambäck</last></author>
       <pages>83–90</pages>
       <url hash="2ce4c851">W13-3210</url>
       <bibkey>moen-etal-2013-towards</bibkey>
@@ -5253,7 +5253,7 @@
     <paper id="14">
       <title>Recurrent Convolutional Neural Networks for Discourse Compositionality</title>
       <author><first>Nal</first><last>Kalchbrenner</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
       <pages>119–126</pages>
       <url hash="528330a3">W13-3214</url>
       <bibkey>kalchbrenner-blunsom-2013-recurrent-convolutional</bibkey>
@@ -5263,10 +5263,10 @@
     <meta>
       <booktitle>Proceedings of the Workshop on Discourse in Machine Translation</booktitle>
       <url hash="998b5fd1">W13-33</url>
-      <editor><first>Bonnie</first><last>Webber</last></editor>
-      <editor><first>Andrei</first><last>Popescu-Belis</last></editor>
+      <editor id="bonnie-webber"><first>Bonnie</first><last>Webber</last></editor>
+      <editor id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></editor>
       <editor><first>Katja</first><last>Markert</last></editor>
-      <editor><first>Jörg</first><last>Tiedemann</last></editor>
+      <editor id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Sofia, Bulgaria</address>
       <month>August</month>
@@ -5280,7 +5280,7 @@
     <paper id="1">
       <title>Meaning Unit Segmentation in <fixed-case>E</fixed-case>nglish and <fixed-case>C</fixed-case>hinese: a New Approach to Discourse Phenomena</title>
       <author><first>Jennifer</first><last>Williams</last></author>
-      <author><first>Rafael</first><last>Banchs</last></author>
+      <author id="rafael-e-banchs"><first>Rafael</first><last>Banchs</last></author>
       <author><first>Haizhou</first><last>Li</last></author>
       <pages>1–9</pages>
       <url hash="e1c22680">W13-3301</url>
@@ -5330,7 +5330,7 @@
       <title>Translation of “It” in a Deep Syntax Framework</title>
       <author><first>Michal</first><last>Novák</last></author>
       <author><first>Anna</first><last>Nedoluzhko</last></author>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
       <pages>51–59</pages>
       <url hash="90d5110d">W13-3307</url>
       <bibkey>novak-etal-2013-translation</bibkey>
@@ -5351,7 +5351,7 @@
       <booktitle>Proceedings of the Fourth Workshop on Teaching <fixed-case>NLP</fixed-case> and <fixed-case>CL</fixed-case></booktitle>
       <url hash="5e9ad572">W13-34</url>
       <editor><first>Ivan</first><last>Derzhanski</last></editor>
-      <editor><first>Dragomir</first><last>Radev</last></editor>
+      <editor id="dragomir-radev"><first>Dragomir</first><last>Radev</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Sofia, Bulgaria</address>
       <month>August</month>
@@ -5382,8 +5382,8 @@
     <paper id="3">
       <title>Introducing Computational Concepts in a Linguistics Olympiad</title>
       <author><first>Patrick</first><last>Littell</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <author><first>Dragomir</first><last>Radev</last></author>
       <pages>18–26</pages>
       <url hash="9e4a0e53">W13-3403</url>
@@ -5398,10 +5398,10 @@
     </paper>
     <paper id="5">
       <title>Learning from <fixed-case>O</fixed-case>z<fixed-case>CLO</fixed-case>, the <fixed-case>A</fixed-case>ustralian Computational and Linguistics Olympiad</title>
-      <author><first>Dominique</first><last>Estival</last></author>
-      <author><first>John</first><last>Henderson</last></author>
+      <author id="dominique-estival"><first>Dominique</first><last>Estival</last></author>
+      <author id="john-henderson"><first>John</first><last>Henderson</last></author>
       <author><first>Mary</first><last>Laughren</last></author>
-      <author><first>Diego</first><last>Mollá</last></author>
+      <author id="diego-molla"><first>Diego</first><last>Mollá</last></author>
       <author><first>Cathy</first><last>Bow</last></author>
       <author><first>Rachel</first><last>Nordlinger</last></author>
       <author><first>Verna</first><last>Rieschild</last></author>
@@ -5438,7 +5438,7 @@
     </paper>
     <paper id="9">
       <title>Treebanking for Data-driven Research in the Classroom</title>
-      <author><first>John</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
       <author><first>Ying Cheuk</first><last>Hui</last></author>
       <author><first>Yin Hei</first><last>Kong</last></author>
       <pages>56–60</pages>
@@ -5457,7 +5457,7 @@
     <paper id="11">
       <title>A Virtual Manipulative for Learning Log-Linear Models</title>
       <author><first>Francis</first><last>Ferraro</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>66–76</pages>
       <url hash="4ccb1136">W13-3411</url>
       <bibkey>ferraro-eisner-2013-virtual</bibkey>
@@ -5471,10 +5471,10 @@
     </paper>
     <paper id="13">
       <title>Semantic Technologies in <fixed-case>IBM</fixed-case> <fixed-case>W</fixed-case>atson</title>
-      <author><first>Alfio</first><last>Gliozzo</last></author>
+      <author id="alfio-gliozzo"><first>Alfio</first><last>Gliozzo</last></author>
       <author><first>Or</first><last>Biran</last></author>
       <author><first>Siddharth</first><last>Patwardhan</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>85–92</pages>
       <url hash="ee68c0ff">W13-3413</url>
       <bibkey>gliozzo-etal-2013-semantic</bibkey>
@@ -5514,7 +5514,7 @@
     </paper>
     <paper id="3">
       <title>Improving Pointwise Mutual Information (<fixed-case>PMI</fixed-case>) by Incorporating Significant Co-occurrence</title>
-      <author><first>Om</first><last>Damani</last></author>
+      <author id="om-p-damani"><first>Om</first><last>Damani</last></author>
       <pages>20–28</pages>
       <url hash="91e3c91f">W13-3503</url>
       <bibkey>damani-2013-improving</bibkey>
@@ -5548,9 +5548,9 @@
     <paper id="7">
       <title>Spectral Learning of Refinement <fixed-case>HMM</fixed-case>s</title>
       <author><first>Karl</first><last>Stratos</last></author>
-      <author><first>Alexander</first><last>Rush</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <pages>56–64</pages>
       <url hash="8a2a272c">W13-3507</url>
       <bibkey>stratos-etal-2013-spectral</bibkey>
@@ -5558,7 +5558,7 @@
     <paper id="8">
       <title>Sentence Compression with Joint Structural Inference</title>
       <author><first>Kapil</first><last>Thadani</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>65–74</pages>
       <url hash="256e8b3d">W13-3508</url>
       <bibkey>thadani-mckeown-2013-sentence</bibkey>
@@ -5576,15 +5576,15 @@
       <title>Documents and Dependencies: an Exploration of Vector Space Models for Semantic Composition</title>
       <author><first>Alona</first><last>Fyshe</last></author>
       <author><first>Brian</first><last>Murphy</last></author>
-      <author><first>Partha</first><last>Talukdar</last></author>
-      <author><first>Tom</first><last>Mitchell</last></author>
+      <author id="partha-talukdar"><first>Partha</first><last>Talukdar</last></author>
+      <author id="tom-mitchell"><first>Tom</first><last>Mitchell</last></author>
       <pages>84–93</pages>
       <url hash="10d27763">W13-3510</url>
       <bibkey>fyshe-etal-2013-documents</bibkey>
     </paper>
     <paper id="11">
       <title>Hidden <fixed-case>M</fixed-case>arkov tree models for semantic class induction</title>
-      <author><first>Édouard</first><last>Grave</last></author>
+      <author id="edouard-grave"><first>Édouard</first><last>Grave</last></author>
       <author><first>Guillaume</first><last>Obozinski</last></author>
       <author><first>Francis</first><last>Bach</last></author>
       <pages>94–103</pages>
@@ -5593,9 +5593,9 @@
     </paper>
     <paper id="12">
       <title>Better Word Representations with Recursive Neural Networks for Morphology</title>
-      <author><first>Thang</first><last>Luong</last></author>
+      <author id="minh-thang-luong"><first>Thang</first><last>Luong</last></author>
       <author><first>Richard</first><last>Socher</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <pages>104–113</pages>
       <url hash="cca22490">W13-3512</url>
       <bibkey>luong-etal-2013-better</bibkey>
@@ -5603,8 +5603,8 @@
     <paper id="13">
       <title>Separating Disambiguation from Composition in Distributional Semantics</title>
       <author><first>Dimitri</first><last>Kartsaklis</last></author>
-      <author><first>Mehrnoosh</first><last>Sadrzadeh</last></author>
-      <author><first>Stephen</first><last>Pulman</last></author>
+      <author id="mehrnoosh-sadrzadeh"><first>Mehrnoosh</first><last>Sadrzadeh</last></author>
+      <author id="stephen-pulman"><first>Stephen</first><last>Pulman</last></author>
       <pages>114–123</pages>
       <url hash="fae7965d">W13-3513</url>
       <bibkey>kartsaklis-etal-2013-separating</bibkey>
@@ -5620,14 +5620,14 @@
     <paper id="15">
       <title>Philosophers are Mortal: Inferring the Truth of Unseen Facts</title>
       <author><first>Gabor</first><last>Angeli</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <pages>133–142</pages>
       <url hash="0244f16c">W13-3515</url>
       <bibkey>angeli-manning-2013-philosophers</bibkey>
     </paper>
     <paper id="16">
       <title>Towards Robust Linguistic Analysis using <fixed-case>O</fixed-case>nto<fixed-case>N</fixed-case>otes</title>
-      <author><first>Sameer</first><last>Pradhan</last></author>
+      <author id="sameer-pradhan"><first>Sameer</first><last>Pradhan</last></author>
       <author><first>Alessandro</first><last>Moschitti</last></author>
       <author><first>Nianwen</first><last>Xue</last></author>
       <author><first>Hwee Tou</first><last>Ng</last></author>
@@ -5644,7 +5644,7 @@
       <author><first>Jiaping</first><last>Zheng</last></author>
       <author><first>Luke</first><last>Vilnis</last></author>
       <author><first>Sameer</first><last>Singh</last></author>
-      <author><first>Jinho D.</first><last>Choi</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
       <author><first>Andrew</first><last>McCallum</last></author>
       <pages>153–162</pages>
       <url hash="72a58368">W13-3517</url>
@@ -5662,7 +5662,7 @@
     <paper id="19">
       <title>Collapsed Variational <fixed-case>B</fixed-case>ayesian Inference for <fixed-case>PCFG</fixed-case>s</title>
       <author><first>Pengyu</first><last>Wang</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
       <pages>173–182</pages>
       <url hash="a69c9f83">W13-3519</url>
       <bibkey>wang-blunsom-2013-collapsed</bibkey>
@@ -5671,7 +5671,7 @@
       <title><fixed-case>P</fixed-case>olyglot: Distributed Word Representations for Multilingual <fixed-case>NLP</fixed-case></title>
       <author><first>Rami</first><last>Al-Rfou’</last></author>
       <author><first>Bryan</first><last>Perozzi</last></author>
-      <author><first>Steven</first><last>Skiena</last></author>
+      <author id="steven-skiena"><first>Steven</first><last>Skiena</last></author>
       <pages>183–192</pages>
       <url hash="d792538c">W13-3520</url>
       <bibkey>al-rfou-etal-2013-polyglot</bibkey>
@@ -5679,10 +5679,10 @@
     <paper id="21">
       <title>Exploiting multiple hypotheses for Multilingual Spoken Language Understanding</title>
       <author><first>Marcos</first><last>Calvo</last></author>
-      <author><first>Fernando</first><last>García</last></author>
-      <author><first>Lluís-F.</first><last>Hurtado</last></author>
+      <author id="fernando-garcia"><first>Fernando</first><last>García</last></author>
+      <author id="lluis-f-hurtado"><first>Lluís-F.</first><last>Hurtado</last></author>
       <author><first>Santiago</first><last>Jiménez</last></author>
-      <author><first>Emilio</first><last>Sanchis</last></author>
+      <author id="emilio-sanchis"><first>Emilio</first><last>Sanchis</last></author>
       <pages>193–201</pages>
       <url hash="e77cbc2c">W13-3521</url>
       <bibkey>calvo-etal-2013-exploiting</bibkey>
@@ -5699,7 +5699,7 @@
       <title>Topic Models + Word Alignment = A Flexible Framework for Extracting Bilingual Dictionary from Comparable Corpus</title>
       <author><first>Xiaodong</first><last>Liu</last></author>
       <author><first>Kevin</first><last>Duh</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>212–221</pages>
       <url hash="01ba4ba7">W13-3523</url>
       <bibkey>liu-etal-2013-topic</bibkey>
@@ -5727,7 +5727,7 @@
       <booktitle>Proceedings of the Seventeenth Conference on Computational Natural Language Learning: Shared Task</booktitle>
       <url hash="dbd50acc">W13-36</url>
       <editor><first>Hwee Tou</first><last>Ng</last></editor>
-      <editor><first>Joel</first><last>Tetreault</last></editor>
+      <editor id="joel-tetreault"><first>Joel</first><last>Tetreault</last></editor>
       <editor><first>Siew Mei</first><last>Wu</last></editor>
       <editor><first>Yuanbin</first><last>Wu</last></editor>
       <editor><first>Christian</first><last>Hadiwinoto</last></editor>
@@ -5764,13 +5764,13 @@
     </paper>
     <paper id="3">
       <title><fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>-2013 Shared Task: Grammatical Error Correction <fixed-case>NTHU</fixed-case> System Description</title>
-      <author><first>Ting-Hui</first><last>Kao</last></author>
-      <author><first>Yu-Wei</first><last>Chang</last></author>
-      <author><first>Hsun-Wen</first><last>Chiu</last></author>
+      <author id="ting-hui-kao"><first>Ting-Hui</first><last>Kao</last></author>
+      <author id="yu-wei-chang"><first>Yu-Wei</first><last>Chang</last></author>
+      <author id="hsun-wen-chiu"><first>Hsun-Wen</first><last>Chiu</last></author>
       <author><first>Tzu-Hsi</first><last>Yen</last></author>
       <author><first>Joanne</first><last>Boisson</last></author>
-      <author><first>Jian-Cheng</first><last>Wu</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jian-cheng-wu"><first>Jian-Cheng</first><last>Wu</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>20–25</pages>
       <url hash="bc9b368e">W13-3603</url>
       <bibkey>kao-etal-2013-conll</bibkey>
@@ -5784,7 +5784,7 @@
       <author><first>Tomoya</first><last>Mizumoto</last></author>
       <author><first>Yuta</first><last>Hayashibe</last></author>
       <author><first>Mamoru</first><last>Komachi</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>26–33</pages>
       <url hash="b8e02050">W13-3604</url>
       <bibkey>yoshimoto-etal-2013-naist</bibkey>
@@ -5820,15 +5820,15 @@
       <title><fixed-case>LFG</fixed-case>-based Features for Noun Number and Article Grammatical Errors</title>
       <author><first>Gábor</first><last>Berend</last></author>
       <author><first>Veronika</first><last>Vincze</last></author>
-      <author><first>Sina</first><last>Zarrieß</last></author>
-      <author><first>Richárd</first><last>Farkas</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
+      <author id="richard-farkas"><first>Richárd</first><last>Farkas</last></author>
       <pages>62–67</pages>
       <url hash="2445c886">W13-3608</url>
       <bibkey>berend-etal-2013-lfg</bibkey>
     </paper>
     <paper id="9">
       <title>Toward More Precision in Correction of Grammatical Errors</title>
-      <author><first>Dan</first><last>Flickinger</last></author>
+      <author id="dan-flickinger"><first>Dan</first><last>Flickinger</last></author>
       <author><first>Jiye</first><last>Yu</last></author>
       <pages>68–73</pages>
       <url hash="50d4a00a">W13-3609</url>
@@ -5846,8 +5846,8 @@
     <paper id="11">
       <title><fixed-case>IITB</fixed-case> System for <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case> 2013 Shared Task: A Hybrid Approach to Grammatical Error Correction</title>
       <author><first>Anoop</first><last>Kunchukuttan</last></author>
-      <author><first>Ritesh</first><last>Shah</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="ritesh-shah"><first>Ritesh</first><last>Shah</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>82–87</pages>
       <url hash="aa44eac5">W13-3611</url>
       <bibkey>kunchukuttan-etal-2013-iitb</bibkey>
@@ -5865,7 +5865,7 @@
       <author><first>Grigori</first><last>Sidorov</last></author>
       <author><first>Anubhav</first><last>Gupta</last></author>
       <author><first>Martin</first><last>Tozer</last></author>
-      <author><first>Dolors</first><last>Catala</last></author>
+      <author id="dolors-catala"><first>Dolors</first><last>Catala</last></author>
       <author><first>Angels</first><last>Catena</last></author>
       <author><first>Sandrine</first><last>Fuentes</last></author>
       <pages>96–101</pages>
@@ -5874,7 +5874,7 @@
     </paper>
     <paper id="14">
       <title>Memory-based Grammatical Error Correction</title>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <author><first>Peter</first><last>Berck</last></author>
       <pages>102–108</pages>
       <url hash="ff1f2ff0">W13-3614</url>
@@ -5903,7 +5903,7 @@
       <title><fixed-case>KUNLP</fixed-case> Grammatical Error Correction System For <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>-2013 Shared Task</title>
       <author><first>Bong-Jun</first><last>Yi</last></author>
       <author><first>Ho-Chang</first><last>Lee</last></author>
-      <author><first>Hae-Chang</first><last>Rim</last></author>
+      <author id="hae-chang-rim"><first>Hae-Chang</first><last>Rim</last></author>
       <pages>123–127</pages>
       <url hash="a1f9a5b3">W13-3617</url>
       <bibkey>yi-etal-2013-kunlp</bibkey>
@@ -5913,7 +5913,7 @@
     <meta>
       <booktitle>Proceedings of the Second International Conference on Dependency Linguistics (<fixed-case>D</fixed-case>ep<fixed-case>L</fixed-case>ing 2013)</booktitle>
       <url hash="58f67767">W13-37</url>
-      <editor><first>Eva</first><last>Hajičová</last></editor>
+      <editor id="eva-hajicova"><first>Eva</first><last>Hajičová</last></editor>
       <editor><first>Kim</first><last>Gerdes</last></editor>
       <editor><first>Leo</first><last>Wanner</last></editor>
       <publisher>Charles University in Prague, Matfyzpress, Prague, Czech Republic</publisher>
@@ -5928,14 +5928,14 @@
     </frontmatter>
     <paper id="1">
       <title>Invited talk: Dependency Structure and Cognition</title>
-      <author><first>Richard</first><last>Hudson</last></author>
+      <author id="richard-a-hudson"><first>Richard</first><last>Hudson</last></author>
       <pages>1–11</pages>
       <url hash="d9b1e9a8">W13-3701</url>
       <bibkey>hudson-2013-invited</bibkey>
     </paper>
     <paper id="2">
       <title>Invited talk: Dependency Representations, Grammars, Folded Structures, among Other Things!</title>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
       <pages>12</pages>
       <url hash="3df987e7">W13-3702</url>
       <bibkey>joshi-2013-invited</bibkey>
@@ -5951,7 +5951,7 @@
     </paper>
     <paper id="4">
       <title>Towards Joint Morphological Analysis and Dependency Parsing of <fixed-case>T</fixed-case>urkish</title>
-      <author><first>Özlem</first><last>Çetinoğlu</last></author>
+      <author id="ozlem-cetinoglu"><first>Özlem</first><last>Çetinoğlu</last></author>
       <author><first>Jonas</first><last>Kuhn</last></author>
       <pages>23–32</pages>
       <url hash="009ad73b">W13-3704</url>
@@ -5959,9 +5959,9 @@
     </paper>
     <paper id="5">
       <title>Divergences in <fixed-case>E</fixed-case>nglish-<fixed-case>H</fixed-case>indi Parallel Dependency Treebanks</title>
-      <author><first>Himani</first><last>Chaudhry</last></author>
+      <author id="himani-chaudhry"><first>Himani</first><last>Chaudhry</last></author>
       <author><first>Himanshu</first><last>Sharma</last></author>
-      <author><first>Dipti Misra</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></author>
       <pages>33–40</pages>
       <url hash="6219f044">W13-3705</url>
       <bibkey>chaudhry-etal-2013-divergences</bibkey>
@@ -6022,7 +6022,7 @@
     <paper id="13">
       <title>Towards a Psycholinguistically Motivated Dependency Grammar for <fixed-case>H</fixed-case>indi</title>
       <author><first>Samar</first><last>Husain</last></author>
-      <author><first>Rajesh</first><last>Bhatt</last></author>
+      <author id="rajesh-bhatt"><first>Rajesh</first><last>Bhatt</last></author>
       <author><first>Shravan</first><last>Vasishth</last></author>
       <pages>108–117</pages>
       <url hash="d81443a0">W13-3713</url>
@@ -6054,7 +6054,7 @@
     <paper id="17">
       <title>The Representation of <fixed-case>C</fixed-case>zech Light Verb Constructions in a Valency Lexicon</title>
       <author><first>Václava</first><last>Kettnerová</last></author>
-      <author><first>Markéta</first><last>Lopatková</last></author>
+      <author id="marketa-lopatkova"><first>Markéta</first><last>Lopatková</last></author>
       <pages>147–156</pages>
       <url hash="d6c32ed6">W13-3717</url>
       <bibkey>kettnerova-lopatkova-2013-representation</bibkey>
@@ -6084,19 +6084,19 @@
     </paper>
     <paper id="21">
       <title>More Constructions, More Genres: Extending <fixed-case>S</fixed-case>tanford Dependencies</title>
-      <author><first>Marie-Catherine</first><last>de Marneffe</last></author>
+      <author id="marie-catherine-de-marneffe"><first>Marie-Catherine</first><last>de Marneffe</last></author>
       <author><first>Miriam</first><last>Connor</last></author>
       <author><first>Natalia</first><last>Silveira</last></author>
-      <author><first>Samuel R.</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel R.</first><last>Bowman</last></author>
       <author><first>Timothy</first><last>Dozat</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>187–196</pages>
       <url hash="ccc644a8">W13-3721</url>
       <bibkey>de-marneffe-etal-2013-constructions</bibkey>
     </paper>
     <paper id="22">
       <title>Why So Many Nodes?</title>
-      <author><first>Dan</first><last>Maxwell</last></author>
+      <author id="daniel-maxwell"><first>Dan</first><last>Maxwell</last></author>
       <pages>197–206</pages>
       <url hash="4623da71">W13-3722</url>
       <bibkey>maxwell-2013-many</bibkey>
@@ -6122,9 +6122,9 @@
       <author><first>Debanka</first><last>Nandi</last></author>
       <author><first>Maaz</first><last>Nomani</last></author>
       <author><first>Himanshu</first><last>Sharma</last></author>
-      <author><first>Himani</first><last>Chaudhary</last></author>
+      <author id="himani-chaudhry"><first>Himani</first><last>Chaudhary</last></author>
       <author><first>Sambhav</first><last>Jain</last></author>
-      <author><first>Dipti Misra</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></author>
       <pages>227–235</pages>
       <url hash="7363f43f">W13-3725</url>
       <bibkey>nandi-etal-2013-towards</bibkey>
@@ -6174,7 +6174,7 @@
       <title>Dependency and Constituency in Translation Shift Analysis</title>
       <author><first>Manuela</first><last>Sanguinetti</last></author>
       <author><first>Cristina</first><last>Bosco</last></author>
-      <author><first>Leonardo</first><last>Lesmo</last></author>
+      <author id="leonardo-lesmo"><first>Leonardo</first><last>Lesmo</last></author>
       <pages>282–291</pages>
       <url hash="5c224ae6">W13-3731</url>
       <bibkey>sanguinetti-etal-2013-dependency</bibkey>
@@ -6182,7 +6182,7 @@
     <paper id="32">
       <title>Managing a Multilingual Treebank Project</title>
       <author><first>Milan</first><last>Souček</last></author>
-      <author><first>Timo</first><last>Järvinen</last></author>
+      <author id="timo-jarvinen"><first>Timo</first><last>Järvinen</last></author>
       <author><first>Adam</first><last>LaMontagne</last></author>
       <pages>292–297</pages>
       <url hash="4436d3d1">W13-3732</url>
@@ -6190,7 +6190,7 @@
     </paper>
     <paper id="33">
       <title>An Empirical Study of Differences between Conversion Schemes and Annotation Guidelines</title>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>298–307</pages>
       <url hash="5cd07bfa">W13-3733</url>
       <bibkey>sogaard-2013-empirical</bibkey>
@@ -6201,7 +6201,7 @@
       <booktitle>Proceedings of the Joint Symposium on Semantic Processing. Textual Inference and Structures in Corpora</booktitle>
       <url hash="99f675d5">W13-38</url>
       <editor><first>Octavian</first><last>Popescu</last></editor>
-      <editor><first>Alberto</first><last>Lavelli</last></editor>
+      <editor id="alberto-lavelli"><first>Alberto</first><last>Lavelli</last></editor>
       <address>Trento, Italy</address>
       <month>November</month>
       <year>2013</year>
@@ -6213,7 +6213,7 @@
     </frontmatter>
     <paper id="1">
       <title>Text Understanding using Knowledge-Bases and Random Walks</title>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <pages>1</pages>
       <url hash="515c801d">W13-3801</url>
       <bibkey>agirre-2013-text</bibkey>
@@ -6227,7 +6227,7 @@
     </paper>
     <paper id="3">
       <title>Ontology Lexicalization as a core task in a language-enhanced Semantic Web</title>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <pages>3</pages>
       <url hash="5f135260">W13-3803</url>
       <bibkey>cimiano-2013-ontology</bibkey>
@@ -6242,14 +6242,14 @@
     <paper id="5">
       <title>Entailment graphs for text exploration</title>
       <author><first>Ido</first><last>Dagan</last></author>
-      <author><first>Bernardo</first><last>Magnini</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
       <pages>5</pages>
       <url hash="23ca6bc2">W13-3805</url>
       <bibkey>dagan-magnini-2013-entailment</bibkey>
     </paper>
     <paper id="6">
       <title>Semantic Textual Similarity: past present and future</title>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>6</pages>
       <url hash="b3ae3e5c">W13-3806</url>
       <bibkey>diab-2013-semantic</bibkey>
@@ -6277,8 +6277,8 @@
     </paper>
     <paper id="10">
       <title>Design and Realization of the <fixed-case>EXCITEMENT</fixed-case> Open Platform for Textual Entailment</title>
-      <author><first>Günter</first><last>Neumann</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="gunter-neumann"><first>Günter</first><last>Neumann</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <pages>11</pages>
       <url hash="3bd98e84">W13-3810</url>
       <bibkey>neumann-pado-2013-design</bibkey>
@@ -6299,7 +6299,7 @@
     </paper>
     <paper id="13">
       <title>Potential and limits of distributional approaches for semantic relatedness</title>
-      <author><first>Sabine</first><last>Schulte in Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte in Walde</last></author>
       <pages>14</pages>
       <url hash="98160528">W13-3813</url>
       <bibkey>schulte-in-walde-2013-potential</bibkey>
@@ -6308,7 +6308,7 @@
       <title>Towards Compositional Tree Kernels</title>
       <author><first>Paolo</first><last>Annesi</last></author>
       <author><first>Danilo</first><last>Croce</last></author>
-      <author><first>Roberto</first><last>Basili</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
       <pages>15-23</pages>
       <url hash="1813ddc8">W13-3814</url>
       <bibkey>annesi-etal-2013-towards</bibkey>
@@ -6334,8 +6334,8 @@
     <paper id="17">
       <title>Abduction for Discourse Interpretation: A Probabilistic Framework</title>
       <author><first>Ekaterina</first><last>Ovchinnikova</last></author>
-      <author><first>Andrew</first><last>Gordon</last></author>
-      <author><first>Jerry</first><last>Hobbs</last></author>
+      <author id="andrew-gordon"><first>Andrew</first><last>Gordon</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry</first><last>Hobbs</last></author>
       <pages>42-50</pages>
       <url hash="7c205a77">W13-3817</url>
       <bibkey>ovchinnikova-etal-2013-abduction</bibkey>
@@ -6361,7 +6361,7 @@
       <author><first>Emanuele</first><last>Bastianelli</last></author>
       <author><first>Giuseppe</first><last>Castellucci</last></author>
       <author><first>Danilo</first><last>Croce</last></author>
-      <author><first>Roberto</first><last>Basili</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
       <pages>65-69</pages>
       <url hash="6a16b6cc">W13-3820</url>
       <revision id="1" href="W13-3820v1" hash="327af7ef"/>
@@ -6371,7 +6371,7 @@
     <paper id="21">
       <title>An empirical classification of verbs based on Semantic Types: the case of the ‘poison’ verbs.</title>
       <author><first>Jane</first><last>Bradbury</last></author>
-      <author><first>Ismail</first><last>El Maarouf</last></author>
+      <author id="ismail-el-maarouf"><first>Ismail</first><last>El Maarouf</last></author>
       <pages>70-74</pages>
       <url hash="6244589b">W13-3821</url>
       <bibkey>bradbury-el-maarouf-2013-empirical</bibkey>
@@ -6386,7 +6386,7 @@
     <paper id="23">
       <title>Alternative measures of word relatedness in distributional semantics</title>
       <author><first>Anca</first><last>Dinu</last></author>
-      <author><first>Alina</first><last>Ciobanu</last></author>
+      <author id="alina-maria-ciobanu"><first>Alina</first><last>Ciobanu</last></author>
       <pages>80-84</pages>
       <url hash="b0729f8e">W13-3823</url>
       <bibkey>dinu-ciobanu-2013-alternative</bibkey>
@@ -6394,7 +6394,7 @@
     <paper id="24">
       <title>Linear Compositional Distributional Semantics and Structural Kernels</title>
       <author><first>Lorenzo</first><last>Ferrone</last></author>
-      <author><first>Fabio Massimo</first><last>Zanzotto</last></author>
+      <author id="fabio-massimo-zanzotto"><first>Fabio Massimo</first><last>Zanzotto</last></author>
       <pages>85-89</pages>
       <url hash="e92f02f9">W13-3824</url>
       <bibkey>ferrone-zanzotto-2013-linear</bibkey>
@@ -6408,7 +6408,7 @@
     </paper>
     <paper id="26">
       <title>Automatic classification of semantic patterns from the Pattern Dictionary of <fixed-case>E</fixed-case>nglish Verbs</title>
-      <author><first>Ismaïl</first><last>El Maarouf</last></author>
+      <author id="ismail-el-maarouf"><first>Ismaïl</first><last>El Maarouf</last></author>
       <author><first>Vít</first><last>Baisa</last></author>
       <pages>95-99</pages>
       <url hash="a2e97ca3">W13-3826</url>
@@ -6451,8 +6451,8 @@
       <booktitle>Proceedings of the Fourth Workshop on Speech and Language Processing for Assistive Technologies</booktitle>
       <url hash="f5eb5fb5">W13-39</url>
       <editor><first>Jan</first><last>Alexandersson</last></editor>
-      <editor><first>Peter</first><last>Ljunglöf</last></editor>
-      <editor><first>Kathleen F.</first><last>McCoy</last></editor>
+      <editor id="peter-ljunglof"><first>Peter</first><last>Ljunglöf</last></editor>
+      <editor id="kathleen-f-mccoy"><first>Kathleen F.</first><last>McCoy</last></editor>
       <editor><first>François</first><last>Portet</last></editor>
       <editor><first>Brian</first><last>Roark</last></editor>
       <editor><first>Frank</first><last>Rudzicz</last></editor>
@@ -6507,8 +6507,8 @@
       <title>Comparing and combining classifiers for self-taught vocal interfaces</title>
       <author><first>Lize</first><last>Broekx</last></author>
       <author><first>Katrien</first><last>Dreesen</last></author>
-      <author><first>Jort Florent</first><last>Gemmeke</last></author>
-      <author><first>Hugo</first><last>Van hamme</last></author>
+      <author id="jort-florent-gemmeke"><first>Jort Florent</first><last>Gemmeke</last></author>
+      <author id="hugo-van-hamme"><first>Hugo</first><last>Van hamme</last></author>
       <pages>21–28</pages>
       <url hash="6761e8ae">W13-3905</url>
       <bibkey>broekx-etal-2013-comparing</bibkey>
@@ -6529,7 +6529,7 @@
       <author><first>Hanne</first><last>Deprez</last></author>
       <author><first>Emre</first><last>Yilmaz</last></author>
       <author><first>Stefan</first><last>Lievens</last></author>
-      <author><first>Hugo</first><last>Van hamme</last></author>
+      <author id="hugo-van-hamme"><first>Hugo</first><last>Van hamme</last></author>
       <pages>35–40</pages>
       <url hash="c5bdd5a7">W13-3907</url>
       <bibkey>deprez-etal-2013-automating</bibkey>
@@ -6540,16 +6540,16 @@
       <author><first>Oscar</first><last>Koller</last></author>
       <author><first>Christian</first><last>Oberdörfer</last></author>
       <author><first>Yannick</first><last>Gweth</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>41–46</pages>
       <url hash="ec3d6ddf">W13-3908</url>
       <bibkey>forster-etal-2013-improving</bibkey>
     </paper>
     <paper id="9">
       <title>Automatic speech recognition in the diagnosis of primary progressive aphasia</title>
-      <author><first>Kathleen</first><last>Fraser</last></author>
+      <author id="kathleen-c-fraser"><first>Kathleen</first><last>Fraser</last></author>
       <author><first>Frank</first><last>Rudzicz</last></author>
-      <author><first>Naida</first><last>Graham</last></author>
+      <author id="naida-graham"><first>Naida</first><last>Graham</last></author>
       <author><first>Elizabeth</first><last>Rochon</last></author>
       <pages>47–54</pages>
       <url hash="8bd6a46b">W13-3909</url>
@@ -6590,10 +6590,10 @@
       <author><first>Bart</first><last>Ons</last></author>
       <author><first>Netsanet</first><last>Tessema</last></author>
       <author><first>Janneke</first><last>van de Loo</last></author>
-      <author><first>Jort</first><last>Gemmeke</last></author>
+      <author id="jort-florent-gemmeke"><first>Jort</first><last>Gemmeke</last></author>
       <author><first>Guy</first><last>De Pauw</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
-      <author><first>Hugo</first><last>Van hamme</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
+      <author id="hugo-van-hamme"><first>Hugo</first><last>Van hamme</last></author>
       <pages>73–81</pages>
       <url hash="ee37293a">W13-3913</url>
       <bibkey>ons-etal-2013-self</bibkey>
@@ -6601,7 +6601,7 @@
     <paper id="14">
       <title>The dramatic piece reader for the blind and visually impaired</title>
       <author><first>Milan</first><last>Rusko</last></author>
-      <author><first>Marian</first><last>Trnka</last></author>
+      <author id="marian-trnka"><first>Marian</first><last>Trnka</last></author>
       <author><first>Sakhia</first><last>Darjaa</last></author>
       <author><first>Juraj</first><last>Hamar</last></author>
       <pages>83–91</pages>
@@ -6649,7 +6649,7 @@
       <author><first>Lode</first><last>Vuegen</last></author>
       <author><first>Bert</first><last>Van Den Broeck</last></author>
       <author><first>Peter</first><last>Karsmakers</last></author>
-      <author><first>Hugo</first><last>Van hamme</last></author>
+      <author id="hugo-van-hamme"><first>Hugo</first><last>Van hamme</last></author>
       <author><first>Bart</first><last>Vanrumste</last></author>
       <pages>113–118</pages>
       <url hash="a676c2f4">W13-3918</url>
@@ -6659,8 +6659,8 @@
       <title>Word Recognition from Continuous Articulatory Movement Time-series Data using Symbolic Representations</title>
       <author><first>Jun</first><last>Wang</last></author>
       <author><first>Arvind</first><last>Balasubramanian</last></author>
-      <author><first>Luis</first><last>Mojica de la Vega</last></author>
-      <author><first>Jordan R.</first><last>Green</last></author>
+      <author id="luis-gerardo-mojica-de-la-vega"><first>Luis</first><last>Mojica de la Vega</last></author>
+      <author id="jordan-r-green"><first>Jordan R.</first><last>Green</last></author>
       <author><first>Ashok</first><last>Samal</last></author>
       <author><first>Balakrishnan</first><last>Prabhakaran</last></author>
       <pages>119–127</pages>
@@ -6684,7 +6684,7 @@
       <editor><first>Maxine</first><last>Eskenazi</last></editor>
       <editor><first>Michael</first><last>Strube</last></editor>
       <editor><first>Barbara</first><last>Di Eugenio</last></editor>
-      <editor><first>Jason D.</first><last>Williams</last></editor>
+      <editor id="jason-d-williams"><first>Jason D.</first><last>Williams</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Metz, France</address>
       <month>August</month>
@@ -6697,7 +6697,7 @@
     </frontmatter>
     <paper id="1">
       <title>Discourse Relations, Discourse Structure, Discourse Semantics</title>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <pages>1</pages>
       <url hash="9404c544">W13-4001</url>
       <bibkey>webber-2013-discourse</bibkey>
@@ -6705,7 +6705,7 @@
     <paper id="2">
       <title>Expressivity and comparison of models of discourse structure</title>
       <author><first>Antoine</first><last>Venant</last></author>
-      <author><first>Nicholas</first><last>Asher</last></author>
+      <author id="nicholas-asher"><first>Nicholas</first><last>Asher</last></author>
       <author><first>Philippe</first><last>Muller</last></author>
       <author><first>Pascal</first><last>Denis</last></author>
       <author><first>Stergos</first><last>Afantenos</last></author>
@@ -6716,7 +6716,7 @@
     <paper id="3">
       <title>Unsupervised structured semantic inference for spoken dialog reservation tasks</title>
       <author><first>Alejandra</first><last>Lorenzo</last></author>
-      <author><first>Lina</first><last>Rojas-Barahona</last></author>
+      <author id="lina-m-rojas-barahona"><first>Lina</first><last>Rojas-Barahona</last></author>
       <author><first>Christophe</first><last>Cerisara</last></author>
       <pages>12–20</pages>
       <url hash="9c2b2a3e">W13-4003</url>
@@ -6725,7 +6725,7 @@
     <paper id="4">
       <title>Toward a Better Understanding of Causality between Verbal Events: Extraction and Analysis of the Causal Power of Verb-Verb Associations</title>
       <author><first>Mehwish</first><last>Riaz</last></author>
-      <author><first>Roxana</first><last>Girju</last></author>
+      <author id="roxana-girju"><first>Roxana</first><last>Girju</last></author>
       <pages>21–30</pages>
       <url hash="9b35e478">W13-4004</url>
       <bibkey>riaz-girju-2013-toward</bibkey>
@@ -6741,7 +6741,7 @@
     <paper id="6">
       <title>Topic Independent Identification of Agreement and Disagreement in Social Media Dialogue</title>
       <author><first>Amita</first><last>Misra</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <pages>41–50</pages>
       <url hash="62cce097">W13-4006</url>
       <bibkey>misra-walker-2013-topic</bibkey>
@@ -6751,7 +6751,7 @@
       <author><first>Zhou</first><last>Yu</last></author>
       <author><first>David</first><last>Gerritsen</last></author>
       <author><first>Amy</first><last>Ogan</last></author>
-      <author><first>Alan</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan</first><last>Black</last></author>
       <author><first>Justine</first><last>Cassell</last></author>
       <pages>51–60</pages>
       <url hash="85c07154">W13-4007</url>
@@ -6770,7 +6770,7 @@
       <title>Generating More Specific Questions for Acquiring Attributes of Unknown Concepts from Users</title>
       <author><first>Tsugumi</first><last>Otsuka</last></author>
       <author><first>Kazunori</first><last>Komatani</last></author>
-      <author><first>Satoshi</first><last>Sato</last></author>
+      <author id="satoshi-sato"><first>Satoshi</first><last>Sato</last></author>
       <author><first>Mikio</first><last>Nakano</last></author>
       <pages>70–77</pages>
       <url hash="363dd9cf">W13-4009</url>
@@ -6781,14 +6781,14 @@
       <author><first>Changsong</first><last>Liu</last></author>
       <author><first>Rui</first><last>Fang</last></author>
       <author><first>Lanbo</first><last>She</last></author>
-      <author><first>Joyce</first><last>Chai</last></author>
+      <author id="joyce-chai"><first>Joyce</first><last>Chai</last></author>
       <pages>78–86</pages>
       <url hash="f2220c13">W13-4010</url>
       <bibkey>liu-etal-2013-modeling</bibkey>
     </paper>
     <paper id="11">
       <title>A quantitative view of feedback lexical markers in conversational <fixed-case>F</fixed-case>rench</title>
-      <author><first>Laurent</first><last>Prévot</last></author>
+      <author id="laurent-prevot"><first>Laurent</first><last>Prévot</last></author>
       <author><first>Brigitte</first><last>Bigi</last></author>
       <author><first>Roxane</first><last>Bertrand</last></author>
       <pages>87–91</pages>
@@ -6797,8 +6797,8 @@
     </paper>
     <paper id="12">
       <title>On the contribution of discourse structure to topic segmentation</title>
-      <author><first>Paula</first><last>Cardoso</last></author>
-      <author><first>Maite</first><last>Taboada</last></author>
+      <author id="paula-cardoso"><first>Paula</first><last>Cardoso</last></author>
+      <author id="maite-taboada"><first>Maite</first><last>Taboada</last></author>
       <author><first>Thiago</first><last>Pardo</last></author>
       <pages>92–96</pages>
       <url hash="481f62a4">W13-4012</url>
@@ -6823,7 +6823,7 @@
     </paper>
     <paper id="15">
       <title>Patterns of Importance Variation in Spoken Dialog</title>
-      <author><first>Nigel</first><last>Ward</last></author>
+      <author id="nigel-ward"><first>Nigel</first><last>Ward</last></author>
       <author><first>Karen</first><last>Richart-Ruiz</last></author>
       <pages>107–111</pages>
       <url hash="9fa42a87">W13-4015</url>
@@ -6840,9 +6840,9 @@
       <title>Dialogue Act Recognition in Synchronous and Asynchronous Conversations</title>
       <author><first>Maryam</first><last>Tavafi</last></author>
       <author><first>Yashar</first><last>Mehdad</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <author><first>Giuseppe</first><last>Carenini</last></author>
-      <author><first>Raymond</first><last>Ng</last></author>
+      <author id="raymond-ng"><first>Raymond</first><last>Ng</last></author>
       <pages>117–121</pages>
       <url hash="699f3399">W13-4017</url>
       <bibkey>tavafi-etal-2013-dialogue</bibkey>
@@ -6868,7 +6868,7 @@
       <title>Exploring Features For Localized Detection of Speech Recognition Errors</title>
       <author><first>Eli</first><last>Pincus</last></author>
       <author><first>Svetlana</first><last>Stoyanchev</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
       <pages>132–136</pages>
       <url hash="e765bbac">W13-4020</url>
       <bibkey>pincus-etal-2013-exploring</bibkey>
@@ -6876,8 +6876,8 @@
     <paper id="21">
       <title>Modelling Human Clarification Strategies</title>
       <author><first>Svetlana</first><last>Stoyanchev</last></author>
-      <author><first>Alex</first><last>Liu</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
+      <author id="alex-liu"><first>Alex</first><last>Liu</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
       <pages>137–141</pages>
       <url hash="2ada7664">W13-4021</url>
       <bibkey>stoyanchev-etal-2013-modelling</bibkey>
@@ -6895,7 +6895,7 @@
     </paper>
     <paper id="23">
       <title><fixed-case>AIDA</fixed-case>: Artificial Intelligent Dialogue Agent</title>
-      <author><first>Rafael E.</first><last>Banchs</last></author>
+      <author id="rafael-e-banchs"><first>Rafael E.</first><last>Banchs</last></author>
       <author><first>Ridong</first><last>Jiang</last></author>
       <author><first>Seokhwan</first><last>Kim</last></author>
       <author><first>Arthur</first><last>Niswar</last></author>
@@ -6906,8 +6906,8 @@
     </paper>
     <paper id="24">
       <title>Demonstration of an Always-On Companion for Isolated Older Adults</title>
-      <author><first>Candace</first><last>Sidner</last></author>
-      <author><first>Timothy</first><last>Bickmore</last></author>
+      <author id="candace-l-sidner"><first>Candace</first><last>Sidner</last></author>
+      <author id="timothy-w-bickmore"><first>Timothy</first><last>Bickmore</last></author>
       <author><first>Charles</first><last>Rich</last></author>
       <author><first>Barbara</first><last>Barry</last></author>
       <author><first>Lazlo</first><last>Ring</last></author>
@@ -6919,7 +6919,7 @@
     </paper>
     <paper id="25">
       <title>A Multithreaded Conversational Interface for Pedestrian Navigation and Question Answering</title>
-      <author><first>Srinivasan</first><last>Janarthanam</last></author>
+      <author id="srinivasan-janarthanam"><first>Srinivasan</first><last>Janarthanam</last></author>
       <author><first>Oliver</first><last>Lemon</last></author>
       <author><first>Xingkun</first><last>Liu</last></author>
       <author><first>Phil</first><last>Bartie</last></author>
@@ -6931,20 +6931,20 @@
     </paper>
     <paper id="26">
       <title>Demonstration of the <fixed-case>PARLANCE</fixed-case> system: a data-driven incremental, spoken dialogue system for interactive search</title>
-      <author><first>Helen</first><last>Hastie</last></author>
+      <author id="helen-hastie"><first>Helen</first><last>Hastie</last></author>
       <author><first>Marie-Aude</first><last>Aufaure</last></author>
       <author><first>Panos</first><last>Alexopoulos</last></author>
       <author><first>Heriberto</first><last>Cuayáhuitl</last></author>
       <author><first>Nina</first><last>Dethlefs</last></author>
-      <author><first>Milica</first><last>Gasic</last></author>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gasic</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <author><first>Oliver</first><last>Lemon</last></author>
       <author><first>Xingkun</first><last>Liu</last></author>
       <author><first>Peter</first><last>Mika</last></author>
       <author><first>Nesrine</first><last>Ben Mustapha</last></author>
       <author><first>Verena</first><last>Rieser</last></author>
       <author><first>Blaise</first><last>Thomson</last></author>
-      <author><first>Pirros</first><last>Tsiakoulis</last></author>
+      <author id="pirros-tsiakoulis"><first>Pirros</first><last>Tsiakoulis</last></author>
       <author><first>Yves</first><last>Vanrompay</last></author>
       <pages>154–156</pages>
       <url hash="bf453bde">W13-4026</url>
@@ -6980,7 +6980,7 @@
     </paper>
     <paper id="30">
       <title>Interpreting Situated Dialogue Utterances: an Update Model that Uses Speech, Gaze, and Gesture Information</title>
-      <author><first>Casey</first><last>Kennington</last></author>
+      <author id="casey-kennington"><first>Casey</first><last>Kennington</last></author>
       <author><first>Spyros</first><last>Kousidis</last></author>
       <author><first>David</first><last>Schlangen</last></author>
       <pages>173–182</pages>
@@ -7001,9 +7001,9 @@
       <author><first>Kallirroi</first><last>Georgila</last></author>
       <author><first>Ron</first><last>Artstein</last></author>
       <author><first>Fabrizio</first><last>Morbini</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <author><first>Stefan</first><last>Scherer</last></author>
-      <author><first>Albert Skip</first><last>Rizzo</last></author>
+      <author id="albert-a-rizzo"><first>Albert Skip</first><last>Rizzo</last></author>
       <author><first>Louis-Philippe</first><last>Morency</last></author>
       <pages>193–202</pages>
       <url hash="70193692">W13-4032</url>
@@ -7018,24 +7018,24 @@
     </paper>
     <paper id="34">
       <title>Learning Dialogue Management Models for Task-Oriented Dialogue with Parallel Dialogue and Task Streams</title>
-      <author><first>Eun</first><last>Ha</last></author>
-      <author><first>Christopher</first><last>Mitchell</last></author>
-      <author><first>Kristy</first><last>Boyer</last></author>
-      <author><first>James</first><last>Lester</last></author>
+      <author id="eun-young-ha"><first>Eun</first><last>Ha</last></author>
+      <author id="christopher-mitchell"><first>Christopher</first><last>Mitchell</last></author>
+      <author id="kristy-boyer"><first>Kristy</first><last>Boyer</last></author>
+      <author id="james-lester"><first>James</first><last>Lester</last></author>
       <pages>204–213</pages>
       <url hash="bd3346cb">W13-4034</url>
       <bibkey>ha-etal-2013-learning</bibkey>
     </paper>
     <paper id="35">
       <title><fixed-case>POMDP</fixed-case>-based dialogue manager adaptation to extended domains</title>
-      <author><first>Milica</first><last>Gašić</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gašić</last></author>
       <author><first>Catherine</first><last>Breslin</last></author>
       <author><first>Matthew</first><last>Henderson</last></author>
       <author><first>Dongho</first><last>Kim</last></author>
-      <author><first>Martin</first><last>Szummer</last></author>
+      <author id="marcin-szummer"><first>Martin</first><last>Szummer</last></author>
       <author><first>Blaise</first><last>Thomson</last></author>
-      <author><first>Pirros</first><last>Tsiakoulis</last></author>
-      <author><first>Steve</first><last>Young</last></author>
+      <author id="pirros-tsiakoulis"><first>Pirros</first><last>Tsiakoulis</last></author>
+      <author id="steve-young"><first>Steve</first><last>Young</last></author>
       <pages>214–222</pages>
       <url hash="1656c3b8">W13-4035</url>
       <bibkey>gasic-etal-2013-pomdp</bibkey>
@@ -7043,7 +7043,7 @@
     <paper id="36">
       <title>Training and evaluation of an <fixed-case>MDP</fixed-case> model for social multi-user human-robot interaction</title>
       <author><first>Simon</first><last>Keizer</last></author>
-      <author><first>Mary Ellen</first><last>Foster</last></author>
+      <author id="mary-ellen-foster"><first>Mary Ellen</first><last>Foster</last></author>
       <author><first>Oliver</first><last>Lemon</last></author>
       <author><first>Andre</first><last>Gaschler</last></author>
       <author><first>Manuel</first><last>Giuliani</last></author>
@@ -7058,7 +7058,7 @@
       <author><first>André</first><last>Berton</last></author>
       <author><first>Angela</first><last>Mahr</last></author>
       <author><first>Rafael</first><last>Math</last></author>
-      <author><first>Christian</first><last>Müller</last></author>
+      <author id="christian-mueller"><first>Christian</first><last>Müller</last></author>
       <pages>233–241</pages>
       <url hash="9994f5ef">W13-4037</url>
       <bibkey>hofmann-etal-2013-evaluation</bibkey>
@@ -7066,7 +7066,7 @@
     <paper id="38">
       <title>Predicting Tasks in Goal-Oriented Spoken Dialog Systems using Semantic Knowledge Bases</title>
       <author><first>Aasish</first><last>Pappu</last></author>
-      <author><first>Alexander</first><last>Rudnicky</last></author>
+      <author id="alexander-rudnicky"><first>Alexander</first><last>Rudnicky</last></author>
       <pages>242–250</pages>
       <url hash="1ac2b185">W13-4038</url>
       <bibkey>pappu-rudnicky-2013-predicting</bibkey>
@@ -7074,7 +7074,7 @@
     <paper id="39">
       <title>Surface Text based Dialogue Models for Virtual Humans</title>
       <author><first>Sudeep</first><last>Gandhe</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <pages>251–260</pages>
       <url hash="6d391578">W13-4039</url>
       <bibkey>gandhe-traum-2013-surface</bibkey>
@@ -7119,7 +7119,7 @@
     </paper>
     <paper id="44">
       <title>Tacit Social Contracts for Wheelchairs</title>
-      <author><first>Daniel</first><last>Couto Vale</last></author>
+      <author id="daniel-couto-vale"><first>Daniel</first><last>Couto Vale</last></author>
       <author><first>Vivien</first><last>Mast</last></author>
       <pages>294–303</pages>
       <url hash="193a94d5">W13-4044</url>
@@ -7137,7 +7137,7 @@
     </paper>
     <paper id="46">
       <title><fixed-case>IMHO</fixed-case>: An Exploratory Study of Hedging in Web Forums</title>
-      <author><first>Liliana</first><last>Mamani Sanchez</last></author>
+      <author id="liliana-mamani-sanchez"><first>Liliana</first><last>Mamani Sanchez</last></author>
       <author><first>Carl</first><last>Vogel</last></author>
       <pages>309–313</pages>
       <url hash="b5139266">W13-4046</url>
@@ -7147,7 +7147,7 @@
       <title>Impact of <fixed-case>ASR</fixed-case> N-Best Information on <fixed-case>B</fixed-case>ayesian Dialogue Act Recognition</title>
       <author><first>Heriberto</first><last>Cuayáhuitl</last></author>
       <author><first>Nina</first><last>Dethlefs</last></author>
-      <author><first>Helen</first><last>Hastie</last></author>
+      <author id="helen-hastie"><first>Helen</first><last>Hastie</last></author>
       <author><first>Oliver</first><last>Lemon</last></author>
       <pages>314–318</pages>
       <url hash="2ad198a2">W13-4047</url>
@@ -7156,7 +7156,7 @@
     <paper id="48">
       <title>Investigating speaker gaze and pointing behaviour in human-computer interaction with the mint.tools collection</title>
       <author><first>Spyros</first><last>Kousidis</last></author>
-      <author><first>Casey</first><last>Kennington</last></author>
+      <author id="casey-kennington"><first>Casey</first><last>Kennington</last></author>
       <author><first>David</first><last>Schlangen</last></author>
       <pages>319–323</pages>
       <url hash="afbcf2b9">W13-4048</url>
@@ -7165,14 +7165,14 @@
     <paper id="49">
       <title>In-Context Evaluation of Unsupervised Dialogue Act Models for Tutorial Dialogue</title>
       <author><first>Aysu</first><last>Ezen-Can</last></author>
-      <author><first>Kristy</first><last>Boyer</last></author>
+      <author id="kristy-boyer"><first>Kristy</first><last>Boyer</last></author>
       <pages>324–328</pages>
       <url hash="48b05d0f">W13-4049</url>
       <bibkey>ezen-can-boyer-2013-context</bibkey>
     </paper>
     <paper id="50">
       <title>Spoken Dialog Systems for Automated Survey Interviewing</title>
-      <author><first>Michael</first><last>Johnston</last></author>
+      <author id="michael-johnston"><first>Michael</first><last>Johnston</last></author>
       <author><first>Patrick</first><last>Ehlen</last></author>
       <author><first>Frederick G.</first><last>Conrad</last></author>
       <author><first>Michael F.</first><last>Schober</last></author>
@@ -7198,9 +7198,9 @@
     </paper>
     <paper id="52">
       <title>Evaluating State Representations for Reinforcement Learning of Turn-Taking Policies in Tutorial Dialogue</title>
-      <author><first>Christopher</first><last>Mitchell</last></author>
-      <author><first>Kristy</first><last>Boyer</last></author>
-      <author><first>James</first><last>Lester</last></author>
+      <author id="christopher-mitchell"><first>Christopher</first><last>Mitchell</last></author>
+      <author id="kristy-boyer"><first>Kristy</first><last>Boyer</last></author>
+      <author id="james-lester"><first>James</first><last>Lester</last></author>
       <pages>339–343</pages>
       <url hash="52b54547">W13-4052</url>
       <bibkey>mitchell-etal-2013-evaluating</bibkey>
@@ -7221,7 +7221,7 @@
       <author><first>Sangdo</first><last>Han</last></author>
       <author><first>Kyusong</first><last>Lee</last></author>
       <author><first>Donghyeon</first><last>Lee</last></author>
-      <author><first>Gary Geunbae</first><last>Lee</last></author>
+      <author id="gary-geunbae-lee"><first>Gary Geunbae</first><last>Lee</last></author>
       <pages>349–353</pages>
       <url hash="a57e1403">W13-4054</url>
       <bibkey>han-etal-2013-counseling</bibkey>
@@ -7241,15 +7241,15 @@
       <author><first>Claire</first><last>Gardent</last></author>
       <author><first>Alejandra</first><last>Lorenzo</last></author>
       <author><first>Laura</first><last>Perez-Beltrachini</last></author>
-      <author><first>Lina</first><last>Rojas-Barahona</last></author>
+      <author id="lina-m-rojas-barahona"><first>Lina</first><last>Rojas-Barahona</last></author>
       <pages>357–359</pages>
       <url hash="97a77a72">W13-4056</url>
       <bibkey>gardent-etal-2013-weakly</bibkey>
     </paper>
     <paper id="57">
       <title>Open-Domain Information Access with Talking Robots</title>
-      <author><first>Kristiina</first><last>Jokinen</last></author>
-      <author><first>Graham</first><last>Wilcock</last></author>
+      <author id="kristiina-jokinen"><first>Kristiina</first><last>Jokinen</last></author>
+      <author id="graham-wilcock"><first>Graham</first><last>Wilcock</last></author>
       <pages>360–362</pages>
       <url hash="7281e310">W13-4057</url>
       <bibkey>jokinen-wilcock-2013-open</bibkey>
@@ -7257,8 +7257,8 @@
     <paper id="58">
       <title>Demonstration of the <fixed-case>E</fixed-case>mote<fixed-case>W</fixed-case>izard of <fixed-case>O</fixed-case>z Interface for Empathic Robotic Tutors</title>
       <author><first>Shweta</first><last>Bhargava</last></author>
-      <author><first>Srinivasan</first><last>Janarthanam</last></author>
-      <author><first>Helen</first><last>Hastie</last></author>
+      <author id="srinivasan-janarthanam"><first>Srinivasan</first><last>Janarthanam</last></author>
+      <author id="helen-hastie"><first>Helen</first><last>Hastie</last></author>
       <author><first>Amol</first><last>Deshmukh</last></author>
       <author><first>Ruth</first><last>Aylett</last></author>
       <author><first>Lee</first><last>Corrigan</last></author>
@@ -7292,8 +7292,8 @@
       <author><first>Fabrizio</first><last>Morbini</last></author>
       <author><first>Kelly</first><last>Christoffersen</last></author>
       <author><first>Kenji</first><last>Sagae</last></author>
-      <author><first>David</first><last>Traum</last></author>
-      <author><first>Albert A.</first><last>Rizzo</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
+      <author id="albert-a-rizzo"><first>Albert A.</first><last>Rizzo</last></author>
       <pages>372–374</pages>
       <url hash="2a1ad9a5">W13-4061</url>
       <bibkey>forbell-etal-2013-roundtable</bibkey>
@@ -7309,9 +7309,9 @@
     </paper>
     <paper id="63">
       <title>Continuously Predicting and Processing Barge-in During a Live Spoken Dialogue Task</title>
-      <author><first>Ethan</first><last>Selfridge</last></author>
+      <author id="ethan-selfridge"><first>Ethan</first><last>Selfridge</last></author>
       <author><first>Iker</first><last>Arizmendi</last></author>
-      <author><first>Peter</first><last>Heeman</last></author>
+      <author id="peter-a-heeman"><first>Peter</first><last>Heeman</last></author>
       <author><first>Jason</first><last>Williams</last></author>
       <pages>384–393</pages>
       <url hash="809f63ec">W13-4063</url>
@@ -7323,11 +7323,11 @@
       <author><first>Kartik</first><last>Audhkhasi</last></author>
       <author><first>Kenji</first><last>Sagae</last></author>
       <author><first>Ron</first><last>Artstein</last></author>
-      <author><first>Doğan</first><last>Can</last></author>
-      <author><first>Panayiotis</first><last>Georgiou</last></author>
-      <author><first>Shri</first><last>Narayanan</last></author>
+      <author id="dogan-can"><first>Doğan</first><last>Can</last></author>
+      <author id="panayiotis-georgiou"><first>Panayiotis</first><last>Georgiou</last></author>
+      <author id="shrikanth-narayanan"><first>Shri</first><last>Narayanan</last></author>
       <author><first>Anton</first><last>Leuski</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <pages>394–403</pages>
       <url hash="bc9c9b18">W13-4064</url>
       <bibkey>morbini-etal-2013-asr</bibkey>
@@ -7337,14 +7337,14 @@
       <author><first>Jason</first><last>Williams</last></author>
       <author><first>Antoine</first><last>Raux</last></author>
       <author><first>Deepak</first><last>Ramachandran</last></author>
-      <author><first>Alan</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan</first><last>Black</last></author>
       <pages>404–413</pages>
       <url hash="52567147">W13-4065</url>
       <bibkey>williams-etal-2013-dialog</bibkey>
     </paper>
     <paper id="66">
       <title>Recipe For Building Robust Spoken Dialog State Trackers: Dialog State Tracking Challenge System Description</title>
-      <author><first>Sungjin</first><last>Lee</last></author>
+      <author id="sungjin-lee"><first>Sungjin</first><last>Lee</last></author>
       <author><first>Maxine</first><last>Eskenazi</last></author>
       <pages>414–422</pages>
       <url hash="e53f4144">W13-4066</url>
@@ -7367,17 +7367,17 @@
     </paper>
     <paper id="69">
       <title>Structured Discriminative Model For Dialog State Tracking</title>
-      <author><first>Sungjin</first><last>Lee</last></author>
+      <author id="sungjin-lee"><first>Sungjin</first><last>Lee</last></author>
       <pages>442–451</pages>
       <url hash="d8c605b2">W13-4069</url>
       <bibkey>lee-2013-structured</bibkey>
     </paper>
     <paper id="70">
       <title>Comparison of <fixed-case>B</fixed-case>ayesian Discriminative and Generative Models for Dialogue State Tracking</title>
-      <author><first>Lukáš</first><last>Žilka</last></author>
+      <author id="lukas-zilka"><first>Lukáš</first><last>Žilka</last></author>
       <author><first>David</first><last>Marek</last></author>
       <author><first>Matěj</first><last>Korvas</last></author>
-      <author><first>Filip</first><last>Jurčíček</last></author>
+      <author id="filip-jurcicek"><first>Filip</first><last>Jurčíček</last></author>
       <pages>452–456</pages>
       <url hash="93b3cdd5">W13-4070</url>
       <bibkey>zilka-etal-2013-comparison</bibkey>
@@ -7407,7 +7407,7 @@
       <title>Deep Neural Network Approach for the Dialog State Tracking Challenge</title>
       <author><first>Matthew</first><last>Henderson</last></author>
       <author><first>Blaise</first><last>Thomson</last></author>
-      <author><first>Steve</first><last>Young</last></author>
+      <author id="steve-young"><first>Steve</first><last>Young</last></author>
       <pages>467–471</pages>
       <url hash="b44b4dd2">W13-4073</url>
       <bibkey>henderson-etal-2013-deep</bibkey>
@@ -7417,8 +7417,8 @@
     <meta>
       <booktitle>Proceedings of the 3rd Workshop on Sentiment Analysis where <fixed-case>AI</fixed-case> meets Psychology</booktitle>
       <url hash="77e327b0">W13-41</url>
-      <editor><first>Sivaji</first><last>Bandyopadhyay</last></editor>
-      <editor><first>Manabu</first><last>Okumura</last></editor>
+      <editor id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></editor>
+      <editor id="manabu-okumura"><first>Manabu</first><last>Okumura</last></editor>
       <publisher>Asian Federation of Natural Language Processing</publisher>
       <address>Nagoya, Japan</address>
       <month>October</month>
@@ -7432,7 +7432,7 @@
     <paper id="1">
       <title>Why Words Alone Are Not Enough: Error Analysis of Lexicon-based Polarity Classifier for <fixed-case>C</fixed-case>zech</title>
       <author><first>Kateřina</first><last>Veselovská</last></author>
-      <author><first>Jan</first><last>Hajič jr.</last></author>
+      <author id="jan-hajic-jr"><first>Jan</first><last>Hajič jr.</last></author>
       <pages>1–5</pages>
       <url hash="3f094d15">W13-4101</url>
       <bibkey>veselovska-hajic-jr-2013-words</bibkey>
@@ -7442,7 +7442,7 @@
       <author><first>Yasuhide</first><last>Miura</last></author>
       <author><first>Keigo</first><last>Hattori</last></author>
       <author><first>Tomoko</first><last>Ohkuma</last></author>
-      <author><first>Hiroshi</first><last>Masuichi</last></author>
+      <author id="hiroshi-masuichi"><first>Hiroshi</first><last>Masuichi</last></author>
       <pages>6–14</pages>
       <url hash="79fab1ef">W13-4102</url>
       <bibkey>miura-etal-2013-topic</bibkey>
@@ -7471,7 +7471,7 @@
     <meta>
       <booktitle>Proceedings of the <fixed-case>IJCNLP</fixed-case> 2013 Workshop on Natural Language Processing for Social Media (<fixed-case>S</fixed-case>ocial<fixed-case>NLP</fixed-case>)</booktitle>
       <url hash="ebc89530">W13-42</url>
-      <editor><first>Shou-de</first><last>Lin</last></editor>
+      <editor id="shou-de-lin"><first>Shou-de</first><last>Lin</last></editor>
       <editor><first>Lun-Wei</first><last>Ku</last></editor>
       <editor><first>Tsung-Ting</first><last>Kuo</last></editor>
       <publisher>Asian Federation of Natural Language Processing</publisher>
@@ -7487,7 +7487,7 @@
     <paper id="1">
       <title>Predicting <fixed-case>TV</fixed-case> Audience Rating with Social Media</title>
       <author><first>Wen-Tai</first><last>Hsieh</last></author>
-      <author><first>Seng-cho T.</first><last>Chou</last></author>
+      <author id="seng-cho-t-chou"><first>Seng-cho T.</first><last>Chou</last></author>
       <author><first>Yu-Hsuan</first><last>Cheng</last></author>
       <author><first>Chen-Ming</first><last>Wu</last></author>
       <pages>1–5</pages>
@@ -7499,14 +7499,14 @@
       <author><first>Choochart</first><last>Haruechaiyasak</last></author>
       <author><first>Alisa</first><last>Kongthon</last></author>
       <author><first>Pornpimon</first><last>Palingoon</last></author>
-      <author><first>Kanokorn</first><last>Trakultaweekoon</last></author>
+      <author id="kanokorn-trakultaweekoon"><first>Kanokorn</first><last>Trakultaweekoon</last></author>
       <pages>6–13</pages>
       <url hash="82d8857a">W13-4202</url>
       <bibkey>haruechaiyasak-etal-2013-sense</bibkey>
     </paper>
     <paper id="3">
       <title>Social Metaphor Detection via Topical Analysis</title>
-      <author><first>Ting-Hao</first><last>Huang</last></author>
+      <author id="ting-hao-huang"><first>Ting-Hao</first><last>Huang</last></author>
       <pages>14–22</pages>
       <url hash="349956e5">W13-4203</url>
       <bibkey>huang-2013-social</bibkey>
@@ -7540,8 +7540,8 @@
     <meta>
       <booktitle>Proceedings of the 11th Workshop on <fixed-case>A</fixed-case>sian Language Resources</booktitle>
       <url hash="5f807781">W13-43</url>
-      <editor><first>Pushpak</first><last>Bhattacharyya</last></editor>
-      <editor><first>Key-Sun</first><last>Choi</last></editor>
+      <editor id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></editor>
+      <editor id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></editor>
       <publisher>Asian Federation of Natural Language Processing</publisher>
       <address>Nagoya, Japan</address>
       <month>October</month>
@@ -7587,10 +7587,10 @@
     </paper>
     <paper id="5">
       <title>Event and Event Actor Alignment in Phrase Based Statistical Machine Translation</title>
-      <author><first>Anup</first><last>Kolya</last></author>
+      <author id="anup-kumar-kolya"><first>Anup</first><last>Kolya</last></author>
       <author><first>Santanu</first><last>Pal</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>36–44</pages>
       <url hash="a30ac801">W13-4305</url>
       <bibkey>kolya-etal-2013-event</bibkey>
@@ -7608,15 +7608,15 @@
     </paper>
     <paper id="7">
       <title>Annotating Legitimate Disagreement in Corpus Construction</title>
-      <author><first>Billy T.M.</first><last>Wong</last></author>
-      <author><first>Sophia Y.M.</first><last>Lee</last></author>
+      <author id="billy-t-m-wong"><first>Billy T.M.</first><last>Wong</last></author>
+      <author id="sophia-y-m-lee"><first>Sophia Y.M.</first><last>Lee</last></author>
       <pages>51–57</pages>
       <url hash="d1694f2d">W13-4307</url>
       <bibkey>wong-lee-2013-annotating</bibkey>
     </paper>
     <paper id="8">
       <title>A Hybrid Statistical Approach for Named Entity Recognition for <fixed-case>M</fixed-case>alayalam Language</title>
-      <author><first>Jisha</first><last>P Jayan</last></author>
+      <author id="jisha-p-jayan"><first>Jisha</first><last>P Jayan</last></author>
       <author><first>Rajeev</first><last>R R</last></author>
       <author><first>Elizabeth</first><last>Sherly</last></author>
       <pages>58–63</pages>
@@ -7626,7 +7626,7 @@
     <paper id="9">
       <title>Designing a Generic Scheme for Etymological Annotation: a New Type of Language Corpora Annotation</title>
       <author><first>Niladri Sekhar</first><last>Dash</last></author>
-      <author><first>Mazhar Mehdi</first><last>Hussain</last></author>
+      <author id="mazhar-mehdi-hussain"><first>Mazhar Mehdi</first><last>Hussain</last></author>
       <pages>64–71</pages>
       <url hash="d8e6a750">W13-4309</url>
       <bibkey>dash-hussain-2013-designing</bibkey>
@@ -7644,8 +7644,8 @@
     <meta>
       <booktitle>Proceedings of the Seventh <fixed-case>SIGHAN</fixed-case> Workshop on <fixed-case>C</fixed-case>hinese Language Processing</booktitle>
       <url hash="1edb9db5">W13-44</url>
-      <editor><first>Liang-Chih</first><last>Yu</last></editor>
-      <editor><first>Yuen-Hsien</first><last>Tseng</last></editor>
+      <editor id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last></editor>
+      <editor id="yuen-hsien-tseng"><first>Yuen-Hsien</first><last>Tseng</last></editor>
       <editor><first>Jingbo</first><last>Zhu</last></editor>
       <editor><first>Fuji</first><last>Ren</last></editor>
       <publisher>Asian Federation of Natural Language Processing</publisher>
@@ -7660,7 +7660,7 @@
     </frontmatter>
     <paper id="1">
       <title>Keynote Speech: Lexical Semantics of <fixed-case>C</fixed-case>hinese Language</title>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <pages>1</pages>
       <url hash="7e285930">W13-4401</url>
       <bibkey>chen-2013-keynote</bibkey>
@@ -7668,7 +7668,7 @@
     <paper id="2">
       <title>Can <fixed-case>MDL</fixed-case> Improve Unsupervised <fixed-case>C</fixed-case>hinese Word Segmentation?</title>
       <author><first>Pierre</first><last>Magistry</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <pages>2–10</pages>
       <url hash="04000136">W13-4402</url>
       <bibkey>magistry-sagot-2013-mdl</bibkey>
@@ -7679,7 +7679,7 @@
       <author><first>Yi</first><last>Zhang</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
       <author><first>Takuya</first><last>Matsuzaki</last></author>
-      <author><first>Junichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Junichi</first><last>Tsujii</last></author>
       <pages>11–19</pages>
       <url hash="e917f392">W13-4403</url>
       <bibkey>wang-etal-2013-deep</bibkey>
@@ -7689,7 +7689,7 @@
       <author><first>Shu-Ling</first><last>Huang</last></author>
       <author><first>Yu-Ming</first><last>Hsieh</last></author>
       <author><first>Su-Chu</first><last>Lin</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <pages>20–28</pages>
       <url hash="f9d38b20">W13-4404</url>
       <bibkey>huang-etal-2013-lexical</bibkey>
@@ -7724,9 +7724,9 @@
     </paper>
     <paper id="8">
       <title><fixed-case>C</fixed-case>hinese Spelling Checker Based on Statistical Machine Translation</title>
-      <author><first>Hsun-wen</first><last>Chiu</last></author>
-      <author><first>Jian-cheng</first><last>Wu</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="hsun-wen-chiu"><first>Hsun-wen</first><last>Chiu</last></author>
+      <author id="jian-cheng-wu"><first>Jian-cheng</first><last>Wu</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>49–53</pages>
       <url hash="5f8b1af0">W13-4408</url>
       <bibkey>chiu-etal-2013-chinese</bibkey>
@@ -7737,7 +7737,7 @@
       <author><first>Fei</first><last>Cheng</last></author>
       <author><first>Yanyan</first><last>Luo</last></author>
       <author><first>Kevin</first><last>Duh</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>54–58</pages>
       <url hash="3bda2bff">W13-4409</url>
       <bibkey>liu-etal-2013-hybrid</bibkey>
@@ -7746,7 +7746,7 @@
       <title>Introduction to <fixed-case>CKIP</fixed-case> <fixed-case>C</fixed-case>hinese Spelling Check System for <fixed-case>SIGHAN</fixed-case> Bakeoff 2013 Evaluation</title>
       <author><first>Yu-Ming</first><last>Hsieh</last></author>
       <author><first>Ming-Hong</first><last>Bai</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <pages>59–63</pages>
       <url hash="2759474b">W13-4410</url>
       <bibkey>hsieh-etal-2013-introduction</bibkey>
@@ -7754,15 +7754,15 @@
     <paper id="11">
       <title>Automatic <fixed-case>C</fixed-case>hinese Confusion Words Extraction Using Conditional Random Fields and the Web</title>
       <author><first>Chun-Hung</first><last>Wang</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
-      <author><first>Jian-Cheng</first><last>Wu</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
+      <author id="jian-cheng-wu"><first>Jian-Cheng</first><last>Wu</last></author>
       <pages>64–68</pages>
       <url hash="48fff0a1">W13-4411</url>
       <bibkey>wang-etal-2013-automatic-chinese</bibkey>
     </paper>
     <paper id="12">
       <title>Conditional Random Field-based Parser and Language Model for Tradi-tional <fixed-case>C</fixed-case>hinese Spelling Checker</title>
-      <author><first>Yih-Ru</first><last>Wang</last></author>
+      <author id="yih-ru-wang"><first>Yih-Ru</first><last>Wang</last></author>
       <author><first>Yuan-Fu</first><last>Liao</last></author>
       <author><first>Yeh-Kuang</first><last>Wu</last></author>
       <author><first>Liang-Chun</first><last>Chang</last></author>
@@ -7773,7 +7773,7 @@
     <paper id="13">
       <title>A Maximum Entropy Approach to <fixed-case>C</fixed-case>hinese Spelling Check</title>
       <author><first>Dongxu</first><last>Han</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <pages>74–78</pages>
       <url hash="5ccf4d3d">W13-4413</url>
       <bibkey>han-chang-2013-maximum</bibkey>
@@ -7783,7 +7783,7 @@
       <author><first>Kuan-Yu</first><last>Chen</last></author>
       <author><first>Hung-Shin</first><last>Lee</last></author>
       <author><first>Chung-Han</first><last>Lee</last></author>
-      <author><first>Hsin-Min</first><last>Wang</last></author>
+      <author id="hsin-min-wang"><first>Hsin-Min</first><last>Wang</last></author>
       <author><first>Hsin-Hsi</first><last>Chen</last></author>
       <pages>79–83</pages>
       <url hash="0b610719">W13-4414</url>
@@ -7808,12 +7808,12 @@
     </paper>
     <paper id="17">
       <title>Sinica-<fixed-case>IASL</fixed-case> <fixed-case>C</fixed-case>hinese spelling check system at Sighan-7</title>
-      <author><first>Ting-Hao</first><last>Yang</last></author>
+      <author id="ting-hao-yang"><first>Ting-Hao</first><last>Yang</last></author>
       <author><first>Yu-Lun</first><last>Hsieh</last></author>
       <author><first>Yu-Hsuan</first><last>Chen</last></author>
       <author><first>Michael</first><last>Tsang</last></author>
       <author><first>Cheng-Wei</first><last>Shih</last></author>
-      <author><first>Wen-Lian</first><last>Hsu</last></author>
+      <author id="wen-lian-hsu"><first>Wen-Lian</first><last>Hsu</last></author>
       <pages>93–96</pages>
       <url hash="1cbe66c2">W13-4417</url>
       <bibkey>yang-etal-2013-sinica</bibkey>
@@ -7906,7 +7906,7 @@
     </paper>
     <paper id="5">
       <title>Extracting and Aggregating False Information from Microblogs</title>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Keita</first><last>Nabeshima</last></author>
       <author><first>Kento</first><last>Watanabe</last></author>
       <author><first>Junta</first><last>Mizuno</last></author>
@@ -7954,7 +7954,7 @@
       <title>Incorporating Knowledge Resources to Enhance Medical Information Extraction</title>
       <author><first>Yasuhide</first><last>Miura</last></author>
       <author><first>Tomoko</first><last>Ohkuma</last></author>
-      <author><first>Hiroshi</first><last>Masuichi</last></author>
+      <author id="hiroshi-masuichi"><first>Hiroshi</first><last>Masuichi</last></author>
       <author><first>Emiko</first><last>Yamada Shinohara</last></author>
       <author><first>Eiji</first><last>Aramaki</last></author>
       <author><first>Kazuhiko</first><last>Ohe</last></author>
@@ -7966,7 +7966,7 @@
       <title>Clinical Vocabulary and Clinical Finding Concepts in Medical Literature</title>
       <author><first>Takashi</first><last>Okumura</last></author>
       <author><first>Eiji</first><last>Aramaki</last></author>
-      <author><first>Yuka</first><last>Tateisi</last></author>
+      <author id="yuka-tateisi"><first>Yuka</first><last>Tateisi</last></author>
       <pages>7–13</pages>
       <url hash="8bc5d396">W13-4602</url>
       <bibkey>okumura-etal-2013-clinical</bibkey>
@@ -7984,9 +7984,9 @@
       <title>Towards High-Reliability Speech Translation in the Medical Domain</title>
       <author><first>Graham</first><last>Neubig</last></author>
       <author><first>Sakriani</first><last>Sakti</last></author>
-      <author><first>Tomoki</first><last>Toda</last></author>
+      <author id="tomoki-toda"><first>Tomoki</first><last>Toda</last></author>
       <author><first>Satoshi</first><last>Nakamura</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <author><first>Ryosuke</first><last>Isotani</last></author>
       <author><first>Yukichi</first><last>Ikeda</last></author>
       <pages>22–29</pages>
@@ -8004,8 +8004,8 @@
     </paper>
     <paper id="6">
       <title>Proper and Efficient Treatment of Anaphora and Long-Distance Dependency in Context-Free Grammar: An Experiment with Medical Text</title>
-      <author><first>Wailok</first><last>Tam</last></author>
-      <author><first>Koiti</first><last>Hasida</last></author>
+      <author id="wai-lok-tam"><first>Wailok</first><last>Tam</last></author>
+      <author id="koiti-hasida"><first>Koiti</first><last>Hasida</last></author>
       <author><first>Yusuke</first><last>Matsubara</last></author>
       <author><first>Eiji</first><last>Aramaki</last></author>
       <author><first>Mai</first><last>Miyabe</last></author>
@@ -8039,8 +8039,8 @@
     <meta>
       <booktitle>Proceedings of the 4th Workshop on South and Southeast <fixed-case>A</fixed-case>sian Natural Language Processing</booktitle>
       <url hash="6847daa4">W13-47</url>
-      <editor><first>Pushpak</first><last>Bhattacharyya</last></editor>
-      <editor><first>M. G. Abbas</first><last>Malik</last></editor>
+      <editor id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></editor>
+      <editor id="m-g-abbas-malik"><first>M. G. Abbas</first><last>Malik</last></editor>
       <publisher>Asian Federation of Natural Language Processing</publisher>
       <address>Nagoya Congress Center, Nagoya, Japan</address>
       <month>October</month>
@@ -8054,7 +8054,7 @@
     <paper id="1">
       <title>Fast Bootstrapping of Grapheme to Phoneme System for Under-resourced Languages - Application to the <fixed-case>I</fixed-case>ban Language</title>
       <author><first>Sarah</first><last>Samson Juan</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <pages>1–8</pages>
       <url hash="a1b8230e">W13-4701</url>
       <bibkey>samson-juan-besacier-2013-fast</bibkey>
@@ -8086,7 +8086,7 @@
     <paper id="5">
       <title>On Application of Conditional Random Field in Stemming of <fixed-case>B</fixed-case>engali Natural Language Text</title>
       <author><first>Sandipan</first><last>Sarkar</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>34–42</pages>
       <url hash="92ac1678">W13-4705</url>
       <bibkey>sarkar-bandyopadhyay-2013-application</bibkey>
@@ -8094,8 +8094,8 @@
     <paper id="6">
       <title><fixed-case>U</fixed-case>rdu <fixed-case>H</fixed-case>indi Machine Transliteration using <fixed-case>SMT</fixed-case></title>
       <author><first>M. G. Abbas</first><last>Malik</last></author>
-      <author><first>Christian</first><last>Boitet</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <author><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>43–57</pages>
       <url hash="21d2e5d4">W13-4706</url>
@@ -8104,7 +8104,7 @@
     <paper id="7">
       <title><fixed-case>U</fixed-case>rdu Spell Checking: Reverse Edit Distance Approach</title>
       <author><first>Saadat</first><last>Iqbal</last></author>
-      <author><first>Muhammad Waqas</first><last>Anwar</last></author>
+      <author id="waqas-anwar"><first>Muhammad Waqas</first><last>Anwar</last></author>
       <author><first>Usama Ijaz</first><last>Bajwa</last></author>
       <author><first>Zobia</first><last>Rehman</last></author>
       <pages>58–65</pages>
@@ -8122,7 +8122,7 @@
     <paper id="9">
       <title><fixed-case>E</fixed-case>nglish to <fixed-case>U</fixed-case>rdu Hierarchical Phrase-based Statistical Machine Translation</title>
       <author><first>Nadeem</first><last>Khan</last></author>
-      <author><first>Muhammad Waqas</first><last>Anwar</last></author>
+      <author id="waqas-anwar"><first>Muhammad Waqas</first><last>Anwar</last></author>
       <author><first>Usama Ijaz</first><last>Bajwa</last></author>
       <author><first>Nadir</first><last>Durrani</last></author>
       <pages>72–76</pages>
@@ -8138,8 +8138,8 @@
     </paper>
     <paper id="11">
       <title><fixed-case>M</fixed-case>alayalam Clause Boundary Identifier: Annotation and Evaluation</title>
-      <author><first>Sobha</first><last>Lalitha Devi</last></author>
-      <author><first>Lakshmi</first><last>S</last></author>
+      <author id="sobha-lalitha-devi"><first>Sobha</first><last>Lalitha Devi</last></author>
+      <author id="lakshmi-s"><first>Lakshmi</first><last>S</last></author>
       <pages>83–90</pages>
       <url hash="6f809e44">W13-4711</url>
       <bibkey>lalitha-devi-s-2013-malayalam</bibkey>
@@ -8157,15 +8157,15 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>A</fixed-case>gree<fixed-case>C</fixed-case>alc: Uma Ferramenta para Análise da Concordância entre Múltiplos Anotadores (<fixed-case>A</fixed-case>gree<fixed-case>C</fixed-case>alc: A Tool for the Analysis of Agreement Between Multiple Annotators) [in <fixed-case>P</fixed-case>ortuguese]</title>
-      <author><first>Alexandre Rossi</first><last>Alvares</last></author>
-      <author><first>Norton Trevisan</first><last>Roman</last></author>
+      <author id="alexandre-rossi-alvares"><first>Alexandre Rossi</first><last>Alvares</last></author>
+      <author id="norton-trevisan-roman"><first>Norton Trevisan</first><last>Roman</last></author>
       <url hash="94086ee1">W13-4801</url>
       <bibkey>alvares-roman-2013-agreecalc</bibkey>
     </paper>
     <paper id="2">
       <title><fixed-case>M</fixed-case>eta<fixed-case>A</fixed-case>nn: Um Gerador de Ferramentas para Anotação de Textos (<fixed-case>M</fixed-case>eta<fixed-case>A</fixed-case>nn: a Generator of Text Annotation Tools) [in <fixed-case>P</fixed-case>ortuguese]</title>
       <author><first>Tiago Emanuel Infante</first><last>Missão</last></author>
-      <author><first>Norton Trevisan</first><last>Roman</last></author>
+      <author id="norton-trevisan-roman"><first>Norton Trevisan</first><last>Roman</last></author>
       <url hash="b8f3d041">W13-4802</url>
       <bibkey>missao-roman-2013-metaann</bibkey>
     </paper>
@@ -8179,7 +8179,7 @@
     <paper id="4">
       <title>Uma Investigação sobre Algoritmos de Diferentes Abordagens de Aprendizado Supervisionado na Classificação de Papéis Retóricos em Resumos Científicos (Investigating Algorithms from Different Approaches of Supervised Learning for the Classification of Rhetorical Roles in Scientific Abstracts) [in <fixed-case>P</fixed-case>ortuguese]</title>
       <author><first>Vinícius M. A.</first><last>de Souza</last></author>
-      <author><first>Valéria D.</first><last>Feltrim</last></author>
+      <author id="valeria-delisandra-feltrim"><first>Valéria D.</first><last>Feltrim</last></author>
       <url hash="10799041">W13-4804</url>
       <bibkey>de-souza-feltrim-2013-uma</bibkey>
     </paper>
@@ -8193,43 +8193,43 @@
     </paper>
     <paper id="6">
       <title>Subtopic Annotation in a Corpus of News Texts: Steps Towards Automatic Subtopic Segmentation</title>
-      <author><first>Paula C. F.</first><last>Cardoso</last></author>
-      <author><first>Maite</first><last>Taboada</last></author>
+      <author id="paula-cardoso"><first>Paula C. F.</first><last>Cardoso</last></author>
+      <author id="maite-taboada"><first>Maite</first><last>Taboada</last></author>
       <author><first>Thiago A. S.</first><last>Pardo</last></author>
       <url hash="9dbee401">W13-4806</url>
       <bibkey>cardoso-etal-2013-subtopic</bibkey>
     </paper>
     <paper id="7">
       <title><fixed-case>O</fixed-case> Reconhecimento de Entidades Nomeadas por meio de Conditional Random Fields para a Língua Portuguesa (Named Entity Recognition with Conditional Random Fields for the <fixed-case>P</fixed-case>ortuguese Language) [in <fixed-case>P</fixed-case>ortuguese]</title>
-      <author><first>Daniela O. F.</first><last>do Amaral</last></author>
-      <author><first>Renata</first><last>Vieira</last></author>
+      <author id="daniela-oliveira-f-do-amaral"><first>Daniela O. F.</first><last>do Amaral</last></author>
+      <author id="renata-vieira"><first>Renata</first><last>Vieira</last></author>
       <url hash="69f8376b">W13-4807</url>
       <bibkey>do-amaral-vieira-2013-o</bibkey>
     </paper>
     <paper id="8">
       <title>Análise Automática de Coerência Usando o Modelo Grade de Entidades para o Português (Automatic Coherence Analysis Using the Entity-grid Model for <fixed-case>P</fixed-case>ortuguese) [in <fixed-case>P</fixed-case>ortuguese]</title>
       <author><first>Alison R. P.</first><last>Freitas</last></author>
-      <author><first>Valéria D.</first><last>Feltrim</last></author>
+      <author id="valeria-delisandra-feltrim"><first>Valéria D.</first><last>Feltrim</last></author>
       <url hash="93956eb6">W13-4808</url>
       <bibkey>freitas-feltrim-2013-analise</bibkey>
     </paper>
     <paper id="9">
       <title>Aplicando Pontos de Corte para Listas de Termos Extraídos (Applying Cut-off Points to Lists of Extracted Terms) [in <fixed-case>P</fixed-case>ortuguese]</title>
       <author><first>Lucelene</first><last>Lopes</last></author>
-      <author><first>Renata</first><last>Vieira</last></author>
+      <author id="renata-vieira"><first>Renata</first><last>Vieira</last></author>
       <url hash="755dde82">W13-4809</url>
       <bibkey>lopes-vieira-2013-aplicando</bibkey>
     </paper>
     <paper id="10">
       <title>Geração de Expressões de Referência usando Relações Espaciais (Referring Expression Generation Using Spatial Relations) [in <fixed-case>P</fixed-case>ortuguese]</title>
       <author><first>Diego</first><last>dos Santos Silva</last></author>
-      <author><first>Ivandré</first><last>Paraboni</last></author>
+      <author id="ivandre-paraboni"><first>Ivandré</first><last>Paraboni</last></author>
       <url hash="18b57cbb">W13-4810</url>
       <bibkey>dos-santos-silva-paraboni-2013-geracao</bibkey>
     </paper>
     <paper id="11">
       <title>Mac-Morpho Revisited: Towards Robust Part-of-Speech Tagging</title>
-      <author><first>Erick Rocha</first><last>Fonseca</last></author>
+      <author id="erick-fonseca"><first>Erick Rocha</first><last>Fonseca</last></author>
       <author><first>João Luís G.</first><last>Rosa</last></author>
       <url hash="e8674a3e">W13-4811</url>
       <bibkey>fonseca-rosa-2013-mac</bibkey>
@@ -8243,7 +8243,7 @@
     </paper>
     <paper id="13">
       <title>Text Simplification as Tree Transduction</title>
-      <author><first>Gustavo H.</first><last>Paetzold</last></author>
+      <author id="gustavo-paetzold"><first>Gustavo H.</first><last>Paetzold</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <url hash="b106ca55">W13-4813</url>
       <bibkey>paetzold-specia-2013-text</bibkey>
@@ -8251,7 +8251,7 @@
     <paper id="14">
       <title>Automatic Disambiguation of Homographic Heterophone Pairs Containing Open and Closed Mid Vowels</title>
       <author><first>Christopher</first><last>Shulby</last></author>
-      <author><first>Gustavo</first><last>Mendonça</last></author>
+      <author id="gustavo-mendonca"><first>Gustavo</first><last>Mendonça</last></author>
       <author><first>Vanessa</first><last>Marquiafável</last></author>
       <url hash="9d2d7d4f">W13-4814</url>
       <bibkey>shulby-etal-2013-automatic</bibkey>
@@ -8267,22 +8267,22 @@
       <title>Realização Superficial baseada em Regras (Rule-based Surface Realisation) [in <fixed-case>P</fixed-case>ortuguese]</title>
       <author><first>Douglas F. P.</first><last>da Silva Junior</last></author>
       <author><first>Eder M.</first><last>de Novais</last></author>
-      <author><first>Ivandré</first><last>Paraboni</last></author>
+      <author id="ivandre-paraboni"><first>Ivandré</first><last>Paraboni</last></author>
       <url hash="f2c54171">W13-4816</url>
       <bibkey>da-silva-junior-etal-2013-realizacao</bibkey>
     </paper>
     <paper id="17">
       <title><fixed-case>JWN</fixed-case>-Br - Uma <fixed-case>API</fixed-case> <fixed-case>J</fixed-case>ava para a <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et.<fixed-case>B</fixed-case>r (<fixed-case>JWN</fixed-case>-Br - an <fixed-case>J</fixed-case>ava <fixed-case>API</fixed-case> for <fixed-case>W</fixed-case>ordnet.<fixed-case>B</fixed-case>r) [in <fixed-case>P</fixed-case>ortuguese]</title>
       <author><first>Vitor Machado</first><last>Oliveira</last></author>
-      <author><first>Norton Trevisan</first><last>Roman</last></author>
+      <author id="norton-trevisan-roman"><first>Norton Trevisan</first><last>Roman</last></author>
       <url hash="076cdb69">W13-4817</url>
       <bibkey>oliveira-roman-2013-jwn</bibkey>
     </paper>
     <paper id="18">
       <title>Geração de features para resolução de correferência: Pessoa, Local e Organização (Feature Generation for Coreference Resolution: Person, Location and Organization) [in <fixed-case>P</fixed-case>ortuguese]</title>
-      <author><first>Evandro B.</first><last>Fonseca</last></author>
-      <author><first>Renata</first><last>Vieira</last></author>
-      <author><first>Aline A.</first><last>Vanin</last></author>
+      <author id="evandro-b-fonseca"><first>Evandro B.</first><last>Fonseca</last></author>
+      <author id="renata-vieira"><first>Renata</first><last>Vieira</last></author>
+      <author id="aline-a-vanin"><first>Aline A.</first><last>Vanin</last></author>
       <url hash="8a5012bf">W13-4818</url>
       <bibkey>fonseca-etal-2013-geracao</bibkey>
     </paper>
@@ -8295,16 +8295,16 @@
     </paper>
     <paper id="20">
       <title>Um repositório de verbos para a anotação de papéis semânticos disponível na web (A Verb Repository for Semantic Role Labeling Available in the Web) [in <fixed-case>P</fixed-case>ortuguese]</title>
-      <author><first>Magali Sanches</first><last>Duran</last></author>
+      <author id="magali-sanches-duran"><first>Magali Sanches</first><last>Duran</last></author>
       <author><first>Jhonata Pereira</first><last>Martins</last></author>
-      <author><first>Sandra Maria</first><last>Aluísio</last></author>
+      <author id="sandra-aluisio"><first>Sandra Maria</first><last>Aluísio</last></author>
       <url hash="c2bfae9d">W13-4820</url>
       <bibkey>duran-etal-2013-um</bibkey>
     </paper>
     <paper id="21">
       <title>Entity-centric Sentiment Analysis on <fixed-case>T</fixed-case>witter data for the Potuguese Language</title>
       <author><first>Marlo</first><last>Souza</last></author>
-      <author><first>Renata</first><last>Vieira</last></author>
+      <author id="renata-vieira"><first>Renata</first><last>Vieira</last></author>
       <url hash="fd3abceb">W13-4821</url>
       <bibkey>souza-vieira-2013-entity</bibkey>
     </paper>
@@ -8312,7 +8312,7 @@
       <title>Approaches for Helping <fixed-case>B</fixed-case>razilian Students Improve their Scientific Writings</title>
       <author><first>Ethel</first><last>Schuster</last></author>
       <author><first>Rick</first><last>Lizotte</last></author>
-      <author><first>Sandra M.</first><last>Aluísio</last></author>
+      <author id="sandra-aluisio"><first>Sandra M.</first><last>Aluísio</last></author>
       <author><first>Carmen</first><last>Dayrell</last></author>
       <url hash="3f08f87b">W13-4822</url>
       <bibkey>schuster-etal-2013-approaches</bibkey>
@@ -8327,14 +8327,14 @@
       <title>Geração de instruções em mundos virtuais: primeiros passos (Generation of Instructions in Virtual Worlds: First Steps) [in <fixed-case>P</fixed-case>ortuguese]</title>
       <author><first>Diego Cardozo</first><last>Sandrim</last></author>
       <author><first>Felipe</first><last>Freire</last></author>
-      <author><first>Ivandré</first><last>Paraboni</last></author>
+      <author id="ivandre-paraboni"><first>Ivandré</first><last>Paraboni</last></author>
       <url hash="f685c9b3">W13-4824</url>
       <bibkey>sandrim-etal-2013-geracao</bibkey>
     </paper>
     <paper id="25">
       <title>Extração de Vocabulário Multilíngue para Tradução em Domínios Especializados (Multilingual Vocabulary Extraction for Machine Translation in Specialized Domains) [in <fixed-case>P</fixed-case>ortuguese]</title>
-      <author><first>Lucas Welter</first><last>Hilgert</last></author>
-      <author><first>Renata</first><last>Vieira</last></author>
+      <author id="lucas-welter-hilgert"><first>Lucas Welter</first><last>Hilgert</last></author>
+      <author id="renata-vieira"><first>Renata</first><last>Vieira</last></author>
       <url hash="6ee861b5">W13-4825</url>
       <bibkey>hilgert-vieira-2013-extracao</bibkey>
     </paper>
@@ -8347,9 +8347,9 @@
     <paper id="27">
       <title>Acoustic, Phonetic and Prosodic Features of <fixed-case>P</fixed-case>arkinson’s disease Speech</title>
       <author><first>Jorge</first><last>Proença</last></author>
-      <author><first>Arlindo</first><last>Veiga</last></author>
+      <author id="arlindo-veiga"><first>Arlindo</first><last>Veiga</last></author>
       <author><first>Sara</first><last>Candeias</last></author>
-      <author><first>Fernando</first><last>Perdigão</last></author>
+      <author id="fernando-perdigao"><first>Fernando</first><last>Perdigão</last></author>
       <url hash="3331c1dd">W13-4827</url>
       <bibkey>proenca-etal-2013-acoustic</bibkey>
     </paper>
@@ -8364,9 +8364,9 @@
     </paper>
     <paper id="29">
       <title>An Evaluation of the <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese <fixed-case>LIWC</fixed-case> Dictionary for Sentiment Analysis</title>
-      <author><first>Pedro P. Balage</first><last>Filho</last></author>
+      <author id="pedro-balage-filho"><first>Pedro P. Balage</first><last>Filho</last></author>
       <author><first>Thiago Alexandre Salgueiro</first><last>Pardo</last></author>
-      <author><first>Sandra M.</first><last>Aluísio</last></author>
+      <author id="sandra-aluisio"><first>Sandra M.</first><last>Aluísio</last></author>
       <url hash="2452c731">W13-4829</url>
       <bibkey>filho-etal-2013-evaluation</bibkey>
     </paper>
@@ -8416,7 +8416,7 @@
     </paper>
     <paper id="2">
       <title><fixed-case>L</fixed-case>ithuanian Dependency Parsing with Rich Morphological Features</title>
-      <author><first>Jurgita</first><last>Kapočiūtė-Dzikienė</last></author>
+      <author id="jurgita-kapociute-dzikiene"><first>Jurgita</first><last>Kapočiūtė-Dzikienė</last></author>
       <author><first>Joakim</first><last>Nivre</last></author>
       <author><first>Algis</first><last>Krupavičius</last></author>
       <pages>12–21</pages>
@@ -8425,7 +8425,7 @@
     </paper>
     <paper id="3">
       <title>Parsing <fixed-case>C</fixed-case>roatian and <fixed-case>S</fixed-case>erbian by Using <fixed-case>C</fixed-case>roatian Dependency Treebanks</title>
-      <author><first>Željko</first><last>Agić</last></author>
+      <author id="zeljko-agic"><first>Željko</first><last>Agić</last></author>
       <author><first>Danijela</first><last>Merkler</last></author>
       <author><first>Daša</first><last>Berović</last></author>
       <pages>22–33</pages>
@@ -8441,16 +8441,16 @@
     </paper>
     <paper id="5">
       <title>The <fixed-case>LIGM</fixed-case>-<fixed-case>A</fixed-case>lpage architecture for the <fixed-case>SPMRL</fixed-case> 2013 Shared Task: Multiword Expression Analysis and Dependency Parsing</title>
-      <author><first>Matthieu</first><last>Constant</last></author>
-      <author><first>Marie</first><last>Candito</last></author>
-      <author><first>Djamé</first><last>Seddah</last></author>
+      <author id="matthieu-constant"><first>Matthieu</first><last>Constant</last></author>
+      <author id="marie-candito"><first>Marie</first><last>Candito</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
       <pages>46–52</pages>
       <url hash="e305f0e6">W13-4905</url>
       <bibkey>constant-etal-2013-ligm</bibkey>
     </paper>
     <paper id="6">
       <title>Exploring beam-based shift-reduce dependency parsing with <fixed-case>D</fixed-case>y<fixed-case>AL</fixed-case>og: Results from the <fixed-case>SPMRL</fixed-case> 2013 shared task</title>
-      <author><first>Éric</first><last>de la Clergerie</last></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Éric</first><last>de la Clergerie</last></author>
       <pages>53–62</pages>
       <url hash="b6155453">W13-4906</url>
       <bibkey>de-la-clergerie-2013-exploring</bibkey>
@@ -8465,8 +8465,8 @@
     <paper id="8">
       <title>Exploiting the Contribution of Morphological Information to Parsing: the <fixed-case>BASQUE</fixed-case> <fixed-case>TEAM</fixed-case> system in the <fixed-case>SPRML</fixed-case>‘2013 Shared Task</title>
       <author><first>Iakes</first><last>Goenaga</last></author>
-      <author><first>Koldo</first><last>Gojenola</last></author>
-      <author><first>Nerea</first><last>Ezeiza</last></author>
+      <author id="koldo-gojenola"><first>Koldo</first><last>Gojenola</last></author>
+      <author id="nerea-ezeiza"><first>Nerea</first><last>Ezeiza</last></author>
       <pages>71–77</pages>
       <url hash="e02e49d5">W13-4908</url>
       <bibkey>goenaga-etal-2013-exploiting</bibkey>
@@ -8483,7 +8483,7 @@
       <title><fixed-case>SPMRL</fixed-case>‘13 Shared Task System: The <fixed-case>CADIM</fixed-case> <fixed-case>A</fixed-case>rabic Dependency Parser</title>
       <author><first>Yuval</first><last>Marton</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <author><first>Sarah</first><last>Alkhulani</last></author>
       <pages>86–90</pages>
       <url hash="a39f139d">W13-4910</url>
@@ -8492,7 +8492,7 @@
     <paper id="11">
       <title>A Statistical Approach to Prediction of Empty Categories in <fixed-case>H</fixed-case>indi Dependency Treebank</title>
       <author><first>Puneeth</first><last>Kukkadapu</last></author>
-      <author><first>Prashanth</first><last>Mannem</last></author>
+      <author id="prashanth-mannem"><first>Prashanth</first><last>Mannem</last></author>
       <pages>91–96</pages>
       <url hash="7a773ed3">W13-4911</url>
       <bibkey>kukkadapu-mannem-2013-statistical</bibkey>
@@ -8500,8 +8500,8 @@
     <paper id="12">
       <title>An Empirical Study on the Effect of Morphological and Lexical Features in <fixed-case>P</fixed-case>ersian Dependency Parsing</title>
       <author><first>Mojtaba</first><last>Khallash</last></author>
-      <author><first>Ali</first><last>Hadian</last></author>
-      <author><first>Behrouz</first><last>Minaei-Bidgoli</last></author>
+      <author id="ali-hadian-cefidekhanie"><first>Ali</first><last>Hadian</last></author>
+      <author id="behrouz-minaei-bidgoli"><first>Behrouz</first><last>Minaei-Bidgoli</last></author>
       <pages>97–107</pages>
       <url hash="accd6b5c">W13-4912</url>
       <bibkey>khallash-etal-2013-empirical</bibkey>
@@ -8516,8 +8516,8 @@
     </paper>
     <paper id="14">
       <title>Context Based Statistical Morphological Analyzer and its Effect on <fixed-case>H</fixed-case>indi Dependency Parsing</title>
-      <author><first>Deepak Kumar</first><last>Malladi</last></author>
-      <author><first>Prashanth</first><last>Mannem</last></author>
+      <author id="deepak-kumar-malladi"><first>Deepak Kumar</first><last>Malladi</last></author>
+      <author id="prashanth-mannem"><first>Prashanth</first><last>Mannem</last></author>
       <pages>119–128</pages>
       <url hash="ee3e27ba">W13-4914</url>
       <bibkey>malladi-mannem-2013-context</bibkey>
@@ -8525,7 +8525,7 @@
     <paper id="15">
       <title>Representation of Morphosyntactic Units and Coordination Structures in the <fixed-case>T</fixed-case>urkish Dependency Treebank</title>
       <author><first>Umut</first><last>Sulubacak</last></author>
-      <author><first>Gülşen</first><last>Eryiğit</last></author>
+      <author id="gulsen-eryigit"><first>Gülşen</first><last>Eryiğit</last></author>
       <pages>129–134</pages>
       <url hash="c19adedd">W13-4915</url>
       <bibkey>sulubacak-eryigit-2013-representation</bibkey>
@@ -8533,9 +8533,9 @@
     <paper id="16">
       <title>(Re)ranking Meets Morphosyntax: State-of-the-art Results from the <fixed-case>SPMRL</fixed-case> 2013 Shared Task</title>
       <author><first>Anders</first><last>Björkelund</last></author>
-      <author><first>Özlem</first><last>Çetinoğlu</last></author>
-      <author><first>Richárd</first><last>Farkas</last></author>
-      <author><first>Thomas</first><last>Mueller</last></author>
+      <author id="ozlem-cetinoglu"><first>Özlem</first><last>Çetinoğlu</last></author>
+      <author id="richard-farkas"><first>Richárd</first><last>Farkas</last></author>
+      <author id="thomas-mueller"><first>Thomas</first><last>Mueller</last></author>
       <author><first>Wolfgang</first><last>Seeker</last></author>
       <pages>135–145</pages>
       <url hash="6e8ba719">W13-4916</url>
@@ -8543,15 +8543,15 @@
     </paper>
     <paper id="17">
       <title>Overview of the <fixed-case>SPMRL</fixed-case> 2013 Shared Task: A Cross-Framework Evaluation of Parsing Morphologically Rich Languages</title>
-      <author><first>Djamé</first><last>Seddah</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
       <author><first>Reut</first><last>Tsarfaty</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
-      <author><first>Marie</first><last>Candito</last></author>
-      <author><first>Jinho D.</first><last>Choi</last></author>
-      <author><first>Richárd</first><last>Farkas</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
+      <author id="marie-candito"><first>Marie</first><last>Candito</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
+      <author id="richard-farkas"><first>Richárd</first><last>Farkas</last></author>
       <author><first>Jennifer</first><last>Foster</last></author>
       <author><first>Iakes</first><last>Goenaga</last></author>
-      <author><first>Koldo</first><last>Gojenola Galletebeitia</last></author>
+      <author id="koldo-gojenola"><first>Koldo</first><last>Gojenola Galletebeitia</last></author>
       <author><first>Yoav</first><last>Goldberg</last></author>
       <author><first>Spence</first><last>Green</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
@@ -8559,13 +8559,13 @@
       <author><first>Wolfgang</first><last>Maier</last></author>
       <author><first>Joakim</first><last>Nivre</last></author>
       <author><first>Adam</first><last>Przepiórkowski</last></author>
-      <author><first>Ryan</first><last>Roth</last></author>
+      <author id="ryan-roth"><first>Ryan</first><last>Roth</last></author>
       <author><first>Wolfgang</first><last>Seeker</last></author>
       <author><first>Yannick</first><last>Versley</last></author>
       <author><first>Veronika</first><last>Vincze</last></author>
       <author><first>Marcin</first><last>Woliński</last></author>
       <author><first>Alina</first><last>Wróblewska</last></author>
-      <author><first>Eric</first><last>Villemonte de la Clergerie</last></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Eric</first><last>Villemonte de la Clergerie</last></author>
       <pages>146–182</pages>
       <url hash="3c61f260">W13-4917</url>
       <bibkey>seddah-etal-2013-overview</bibkey>
@@ -8576,9 +8576,9 @@
       <booktitle>Proceedings of <fixed-case>T</fixed-case>ext<fixed-case>G</fixed-case>raphs-8 Graph-based Methods for Natural Language Processing</booktitle>
       <url hash="bb8e8ef0">W13-50</url>
       <editor><first>Zornitsa</first><last>Kozareva</last></editor>
-      <editor><first>Irina</first><last>Matveeva</last></editor>
+      <editor id="irina-matveeva"><first>Irina</first><last>Matveeva</last></editor>
       <editor><first>Gabor</first><last>Melli</last></editor>
-      <editor><first>Vivi</first><last>Nastase</last></editor>
+      <editor id="vivi-nastase"><first>Vivi</first><last>Nastase</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Seattle, Washington, USA</address>
       <month>October</month>
@@ -8599,10 +8599,10 @@
     </paper>
     <paper id="2">
       <title><fixed-case>J</fixed-case>o<fixed-case>B</fixed-case>im<fixed-case>T</fixed-case>ext Visualizer: A Graph-based Approach to Contextualizing Distributional Similarity</title>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <author><first>Bonaventura</first><last>Coppola</last></author>
-      <author><first>Michael R.</first><last>Glass</last></author>
-      <author><first>Alfio</first><last>Gliozzo</last></author>
+      <author id="michael-glass"><first>Michael R.</first><last>Glass</last></author>
+      <author id="alfio-gliozzo"><first>Alfio</first><last>Gliozzo</last></author>
       <author><first>Matthew</first><last>Hatem</last></author>
       <author><first>Martin</first><last>Riedl</last></author>
       <pages>6–10</pages>
@@ -8622,7 +8622,7 @@
       <title>Reconstructing Big Semantic Similarity Networks</title>
       <author><first>Ai</first><last>He</last></author>
       <author><first>Shefali</first><last>Sharma</last></author>
-      <author><first>Chun-Nan</first><last>Hsu</last></author>
+      <author id="chun-nan-hsu"><first>Chun-Nan</first><last>Hsu</last></author>
       <pages>20–28</pages>
       <url hash="e0acf2e6">W13-5004</url>
       <bibkey>he-etal-2013-reconstructing</bibkey>
@@ -8630,7 +8630,7 @@
     <paper id="5">
       <title>Graph-Based Unsupervised Learning of Word Similarities Using Heterogeneous Feature Types</title>
       <author><first>Avneesh</first><last>Saluja</last></author>
-      <author><first>Jiří</first><last>Navrátil</last></author>
+      <author id="jiri-navratil"><first>Jiří</first><last>Navrátil</last></author>
       <pages>29–38</pages>
       <url hash="d8be3302">W13-5005</url>
       <bibkey>saluja-navratil-2013-graph</bibkey>
@@ -8638,7 +8638,7 @@
     <paper id="6">
       <title>From Global to Local Similarities: A Graph-Based Contextualization Method using Distributional Thesauri</title>
       <author><first>Martin</first><last>Riedl</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>39–43</pages>
       <url hash="f295d6f0">W13-5006</url>
       <bibkey>riedl-biemann-2013-global</bibkey>
@@ -8699,7 +8699,7 @@
       <booktitle>Proceedings of the Workshop on <fixed-case>NLP</fixed-case> for Medicine and Biology associated with <fixed-case>RANLP</fixed-case> 2013</booktitle>
       <url hash="0096b9fa">W13-51</url>
       <editor><first>Guergana</first><last>Savova</last></editor>
-      <editor><first>Kevin Bretonnel</first><last>Cohen</last></editor>
+      <editor id="k-bretonnel-cohen"><first>Kevin Bretonnel</first><last>Cohen</last></editor>
       <editor><first>Galia</first><last>Angelova</last></editor>
       <publisher>INCOMA Ltd. Shoumen, BULGARIA</publisher>
       <address>Hissar, Bulgaria</address>
@@ -8714,7 +8714,7 @@
     <paper id="1">
       <title>Active Learning for Phenotyping Tasks</title>
       <author><first>Dmitriy</first><last>Dligach</last></author>
-      <author><first>Timothy</first><last>Miller</last></author>
+      <author id="timothy-miller"><first>Timothy</first><last>Miller</last></author>
       <author><first>Guergana</first><last>Savova</last></author>
       <pages>1–8</pages>
       <url hash="b493b734">W13-5101</url>
@@ -8722,10 +8722,10 @@
     </paper>
     <paper id="2">
       <title>Finding Negative Symptoms of Schizophrenia in Patient Records</title>
-      <author><first>Genevieve</first><last>Gorrell</last></author>
+      <author id="genevieve-gorrell"><first>Genevieve</first><last>Gorrell</last></author>
       <author><first>Angus</first><last>Roberts</last></author>
       <author><first>Richard</first><last>Jackson</last></author>
-      <author><first>Robert</first><last>Stewart</last></author>
+      <author id="robert-stewart"><first>Robert</first><last>Stewart</last></author>
       <pages>9–17</pages>
       <url hash="092e3a75">W13-5102</url>
       <bibkey>gorrell-etal-2013-finding</bibkey>
@@ -8759,9 +8759,9 @@
       <editor><first>Marieke</first><last>van Erp</last></editor>
       <editor><first>Brian</first><last>Davis</last></editor>
       <editor><first>Petya</first><last>Osenova</last></editor>
-      <editor><first>Kiril</first><last>Simov</last></editor>
+      <editor id="kiril-simov"><first>Kiril</first><last>Simov</last></editor>
       <editor><first>Georgi</first><last>Georgiev</last></editor>
-      <editor><first>Preslav</first><last>Nakov</last></editor>
+      <editor id="preslav-nakov"><first>Preslav</first><last>Nakov</last></editor>
       <publisher>INCOMA Ltd. Shoumen, BULGARIA</publisher>
       <address>Hissar, Bulgaria</address>
       <month>September</month>
@@ -8781,7 +8781,7 @@
     </paper>
     <paper id="2">
       <title>Evaluation of <fixed-case>SPARQL</fixed-case> query generation from natural language questions</title>
-      <author><first>K. Bretonnel</first><last>Cohen</last></author>
+      <author id="k-bretonnel-cohen"><first>K. Bretonnel</first><last>Cohen</last></author>
       <author><first>Jin-Dong</first><last>Kim</last></author>
       <pages>3–7</pages>
       <url hash="80737bf8">W13-5202</url>
@@ -8789,8 +8789,8 @@
     </paper>
     <paper id="3">
       <title>Mining translations from the web of open linked data</title>
-      <author><first>John Philip</first><last>McCrae</last></author>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="john-philip-mccrae"><first>John Philip</first><last>McCrae</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <pages>8–11</pages>
       <url hash="e3a27f53">W13-5203</url>
       <bibkey>mccrae-cimiano-2013-mining</bibkey>
@@ -8823,7 +8823,7 @@
       <author><first>Hui</first><last>Yang</last></author>
       <author><first>Alistair</first><last>Willis</last></author>
       <author><first>David</first><last>Morse</last></author>
-      <author><first>Anne</first><last>de Roeck</last></author>
+      <author id="anne-de-roeck"><first>Anne</first><last>de Roeck</last></author>
       <pages>25–32</pages>
       <url hash="2331429b">W13-5207</url>
       <bibkey>yang-etal-2013-literature</bibkey>
@@ -8876,7 +8876,7 @@
     </frontmatter>
     <paper id="1">
       <title>Combining, Adapting and Reusing Bi-texts between Related Languages: Application to Statistical Machine Translation (invited talk)</title>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>1</pages>
       <url hash="5448dc30">W13-5301</url>
       <bibkey>nakov-2013-combining</bibkey>
@@ -8884,7 +8884,7 @@
     <paper id="2">
       <title>Language diversity and implications for Language technology in the Multilingual <fixed-case>E</fixed-case>urope</title>
       <author><first>Cristina</first><last>Vertan</last></author>
-      <author><first>Walther</first><last>von Hahn</last></author>
+      <author id="walther-von-hahn"><first>Walther</first><last>von Hahn</last></author>
       <pages>2–6</pages>
       <url hash="c3ecd258">W13-5302</url>
       <bibkey>vertan-von-hahn-2013-language</bibkey>
@@ -8892,9 +8892,9 @@
     <paper id="3">
       <title>Corpus development for machine translation between standard and dialectal varieties</title>
       <author><first>Barry</first><last>Haddow</last></author>
-      <author><first>Adolfo</first><last>Hernández</last></author>
+      <author id="adolfo-hernandez-h"><first>Adolfo</first><last>Hernández</last></author>
       <author><first>Friedrich</first><last>Neubarth</last></author>
-      <author><first>Harald</first><last>Trost</last></author>
+      <author id="harald-trost"><first>Harald</first><last>Trost</last></author>
       <pages>7–14</pages>
       <url hash="f7d8e118">W13-5303</url>
       <bibkey>haddow-etal-2013-corpus</bibkey>
@@ -8918,7 +8918,7 @@
     <paper id="6">
       <title>Lexicon induction and part-of-speech tagging of non-resourced languages without any bilingual resources</title>
       <author><first>Yves</first><last>Scherrer</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <pages>30–39</pages>
       <url hash="f647f3b9">W13-5306</url>
       <bibkey>scherrer-sagot-2013-lexicon</bibkey>
@@ -8926,7 +8926,7 @@
     <paper id="7">
       <title>The Mysterious Letter <fixed-case>J</fixed-case></title>
       <author><first>Andjelka</first><last>Zečević</last></author>
-      <author><first>Staša</first><last>Vujičić Stanković</last></author>
+      <author id="stasa-vujicic-stankovic"><first>Staša</first><last>Vujičić Stanković</last></author>
       <pages>40–44</pages>
       <url hash="edf9fdc6">W13-5307</url>
       <bibkey>zecevic-vujicic-stankovic-2013-mysterious</bibkey>
@@ -8936,7 +8936,7 @@
     <meta>
       <booktitle>Proceedings of the 6th International Conference on Generative Approaches to the Lexicon (<fixed-case>GL</fixed-case>2013)</booktitle>
       <url hash="f951d179">W13-54</url>
-      <editor><first>James</first><last>Pustejovsky</last></editor>
+      <editor id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Pisa, Italy</address>
       <month>September</month>
@@ -8966,7 +8966,7 @@
     <paper id="3">
       <title>To Coerce or Not to Coerce: A Corpus-based Exploration of Some Complement Coercion Verbs in <fixed-case>C</fixed-case>hinese</title>
       <author><first>Chan-Chia</first><last>Hsu</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <pages>13–20</pages>
       <url hash="ec91f0d1">W13-5403</url>
       <bibkey>hsu-hsieh-2013-coerce</bibkey>
@@ -8975,7 +8975,7 @@
       <title>Towards the automatic classification of complex-type nominals</title>
       <author><first>Lauren</first><last>Romeo</last></author>
       <author><first>Sara</first><last>Mendes</last></author>
-      <author><first>Núria</first><last>Bel</last></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last></author>
       <pages>21–28</pages>
       <url hash="a831b378">W13-5404</url>
       <bibkey>romeo-etal-2013-towards</bibkey>
@@ -8998,9 +8998,9 @@
     </paper>
     <paper id="7">
       <title>Expanding <fixed-case>V</fixed-case>erb<fixed-case>N</fixed-case>et with <fixed-case>S</fixed-case>ketch <fixed-case>E</fixed-case>ngine</title>
-      <author><first>Claire</first><last>Bonial</last></author>
+      <author id="claire-bonial"><first>Claire</first><last>Bonial</last></author>
       <author><first>Orin</first><last>Hargraves</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>44–53</pages>
       <url hash="db4fd210">W13-5407</url>
       <bibkey>bonial-etal-2013-expanding</bibkey>
@@ -9015,9 +9015,9 @@
     </paper>
     <paper id="9">
       <title><fixed-case>G</fixed-case>enerative <fixed-case>L</fixed-case>exicon Theory and Linguistic Linked Open Data</title>
-      <author><first>Fahad</first><last>Khan</last></author>
+      <author id="fahad-khan"><first>Fahad</first><last>Khan</last></author>
       <author><first>Francesca</first><last>Frontini</last></author>
-      <author><first>Riccardo</first><last>Del Gratta</last></author>
+      <author id="riccardo-del-gratta"><first>Riccardo</first><last>Del Gratta</last></author>
       <author><first>Monica</first><last>Monachini</last></author>
       <author><first>Valeria</first><last>Quochi</last></author>
       <pages>62–69</pages>
@@ -9029,7 +9029,7 @@
       <author><first>Irene</first><last>Russo</last></author>
       <author><first>Francesca</first><last>Frontini</last></author>
       <author><first>Irene</first><last>De Felice</last></author>
-      <author><first>Fahad</first><last>Khan</last></author>
+      <author id="fahad-khan"><first>Fahad</first><last>Khan</last></author>
       <author><first>Monica</first><last>Monachini</last></author>
       <pages>70–75</pages>
       <url hash="67adc08f">W13-5410</url>
@@ -9038,8 +9038,8 @@
     <paper id="11">
       <title>Class-based Word Sense Induction for dot-type nominals</title>
       <author><first>Lauren</first><last>Romeo</last></author>
-      <author><first>Héctor</first><last>Martínez Alonso</last></author>
-      <author><first>Núria</first><last>Bel</last></author>
+      <author id="hector-martinez-alonso"><first>Héctor</first><last>Martínez Alonso</last></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last></author>
       <pages>76–83</pages>
       <url hash="933a2cbe">W13-5411</url>
       <bibkey>romeo-etal-2013-class</bibkey>
@@ -9070,7 +9070,7 @@
     <paper id="15">
       <title>Features of Verb Complements in Co-composition: A case study of <fixed-case>C</fixed-case>hinese baking verb using <fixed-case>W</fixed-case>eibo corpus</title>
       <author><first>Yu-Yun</first><last>Chang</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <pages>106–114</pages>
       <url hash="47b3830a">W13-5415</url>
       <bibkey>chang-hsieh-2013-features</bibkey>
@@ -9078,7 +9078,7 @@
     <paper id="16">
       <title>A Lexico-Semantic Analysis of <fixed-case>C</fixed-case>hinese Locality Phrases - A Topic Clustering Approach</title>
       <author><first>August F.Y.</first><last>Chao</last></author>
-      <author><first>Siaw-Fong</first><last>Chung</last></author>
+      <author id="siaw-fong-chung"><first>Siaw-Fong</first><last>Chung</last></author>
       <pages>115–124</pages>
       <url hash="8adaae1a">W13-5416</url>
       <bibkey>chao-chung-2013-lexico</bibkey>
@@ -9095,9 +9095,9 @@
       <booktitle>Proceedings of the 2nd Workshop on Linked Data in Linguistics (<fixed-case>LDL</fixed-case>-2013): Representing and linking lexicons, terminologies and other language data</booktitle>
       <url hash="ffdc46f7">W13-55</url>
       <editor><first>Christian</first><last>Chiarcos</last></editor>
-      <editor><first>Philipp</first><last>Cimiano</last></editor>
+      <editor id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></editor>
       <editor><first>Thierry</first><last>Declerck</last></editor>
-      <editor><first>John P.</first><last>McCrae</last></editor>
+      <editor id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Pisa, Italy</address>
       <month>September</month>
@@ -9123,9 +9123,9 @@
     <paper id="2">
       <title>Linguistic Linked Data for Sentiment Analysis</title>
       <author><first>Paul</first><last>Buitelaar</last></author>
-      <author><first>Mihael</first><last>Arcan</last></author>
-      <author><first>Carlos</first><last>Iglesias</last></author>
-      <author><first>Fernando</first><last>Sánchez-Rada</last></author>
+      <author id="mihael-arcan"><first>Mihael</first><last>Arcan</last></author>
+      <author id="carlos-a-iglesias"><first>Carlos</first><last>Iglesias</last></author>
+      <author id="j-fernando-sanchez-rada"><first>Fernando</first><last>Sánchez-Rada</last></author>
       <author><first>Carlo</first><last>Strapparava</last></author>
       <pages>1 - 8</pages>
       <url hash="8a5688c0">W13-5502</url>
@@ -9133,9 +9133,9 @@
     </paper>
     <paper id="3">
       <title>Renewing and Revising <fixed-case>S</fixed-case>em<fixed-case>L</fixed-case>ink</title>
-      <author><first>Claire</first><last>Bonial</last></author>
+      <author id="claire-bonial"><first>Claire</first><last>Bonial</last></author>
       <author><first>Kevin</first><last>Stowe</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>9 - 17</pages>
       <url hash="0fc94977">W13-5503</url>
       <bibkey>bonial-etal-2013-renewing</bibkey>
@@ -9143,7 +9143,7 @@
     <paper id="4">
       <title><fixed-case>LIME</fixed-case>: Towards a Metadata Module for Ontolex</title>
       <author><first>Manuel</first><last>Fiorelli</last></author>
-      <author><first>Maria Teresa</first><last>Pazienza</last></author>
+      <author id="maria-teresa-pazienza"><first>Maria Teresa</first><last>Pazienza</last></author>
       <author><first>Armando</first><last>Stellato</last></author>
       <pages>8 - 27</pages>
       <url hash="ab80e6ac">W13-5504</url>
@@ -9151,7 +9151,7 @@
     </paper>
     <paper id="5">
       <title>Lemon-aid: using Lemon to aid quantitative historical linguistic analysis</title>
-      <author><first>Steven</first><last>Moran</last></author>
+      <author id="steven-moran"><first>Steven</first><last>Moran</last></author>
       <author><first>Martin</first><last>Brümmer</last></author>
       <pages>28 - 33</pages>
       <url hash="2ff2a7bf">W13-5505</url>
@@ -9159,7 +9159,7 @@
     </paper>
     <paper id="6">
       <title>Transforming the Data Transcription and Analysis Tool Metadata and Labels into a Linguistic Linked Open Data Cloud Resource</title>
-      <author><first>Antonio</first><last>Pareja-Lora</last></author>
+      <author id="antonio-pareja-lora"><first>Antonio</first><last>Pareja-Lora</last></author>
       <author><first>María</first><last>Blume</last></author>
       <author><first>Barbara</first><last>Lust</last></author>
       <pages>34 - 43</pages>
@@ -9179,7 +9179,7 @@
       <title>Linguistic Resources Enhanced with Geospatial Information</title>
       <author><first>Richard</first><last>Littauer</last></author>
       <author><first>Boris</first><last>Villazon-Terrazas</last></author>
-      <author><first>Steven</first><last>Moran</last></author>
+      <author id="steven-moran"><first>Steven</first><last>Moran</last></author>
       <pages>53 - 58</pages>
       <url hash="09f0b61e">W13-5508</url>
       <bibkey>littauer-etal-2013-linguistic</bibkey>
@@ -9201,7 +9201,7 @@
     </paper>
     <paper id="11">
       <title>Migrating Psycholinguistic Semantic Feature Norms into Linked Data in Linguistics</title>
-      <author><first>Yoshihiko</first><last>Hayashi</last></author>
+      <author id="yoshihiko-hayashi"><first>Yoshihiko</first><last>Hayashi</last></author>
       <pages>70 - 75</pages>
       <url hash="baee0f2c">W13-5511</url>
       <bibkey>hayashi-2013-migrating</bibkey>
@@ -9209,7 +9209,7 @@
     <paper id="12">
       <title>Towards the establishment of a linguistic linked data network for <fixed-case>I</fixed-case>talian</title>
       <author><first>Roberto</first><last>Bartolini</last></author>
-      <author><first>Riccardo</first><last>Del Gratta</last></author>
+      <author id="riccardo-del-gratta"><first>Riccardo</first><last>Del Gratta</last></author>
       <author><first>Francesca</first><last>Frontini</last></author>
       <pages>76 - 81</pages>
       <url hash="5fe5b662">W13-5512</url>
@@ -9222,7 +9222,7 @@
       <url hash="f96291a6">W13-56</url>
       <editor><first>Stephan</first><last>Oepen</last></editor>
       <editor><first>Kristin</first><last>Hagen</last></editor>
-      <editor><first>Janne Bondi</first><last>Johannessen</last></editor>
+      <editor id="janne-bondi-johannessen"><first>Janne Bondi</first><last>Johannessen</last></editor>
       <publisher>Linköping University Electronic Press, Sweden</publisher>
       <address>Oslo, Norway</address>
       <month>May</month>
@@ -9235,7 +9235,7 @@
     </frontmatter>
     <paper id="1">
       <title>Invited Keynote: The Conversational User Interface</title>
-      <author><first>Ron</first><last>Kaplan</last></author>
+      <author id="ronald-m-kaplan"><first>Ron</first><last>Kaplan</last></author>
       <pages>1–1</pages>
       <url hash="acf1274f">W13-5601</url>
       <bibkey>kaplan-2013-invited</bibkey>
@@ -9249,7 +9249,7 @@
     </paper>
     <paper id="3">
       <title>Invited Keynote: 6,909 Reasons to Mess Up Your Data</title>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>5–5</pages>
       <url hash="e911cebf">W13-5603</url>
       <bibkey>sogaard-2013-invited</bibkey>
@@ -9270,14 +9270,14 @@
     </paper>
     <paper id="6">
       <title>Experiences in Building the Let’s <fixed-case>MT</fixed-case>! Portal on <fixed-case>A</fixed-case>mazon <fixed-case>EC</fixed-case>2</title>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>11–11</pages>
       <url hash="90723b0c">W13-5606</url>
       <bibkey>tiedemann-2013-experiences</bibkey>
     </paper>
     <paper id="7">
       <title>Using Constraint Grammar for Chunking</title>
-      <author><first>Eckhard</first><last>Bick</last></author>
+      <author id="eckhard-bick"><first>Eckhard</first><last>Bick</last></author>
       <pages>13–26</pages>
       <url hash="a2c1e038">W13-5607</url>
       <bibkey>bick-2013-using</bibkey>
@@ -9285,8 +9285,8 @@
     <paper id="8">
       <title>Features Indicating Readability in <fixed-case>S</fixed-case>wedish Text</title>
       <author><first>Johan</first><last>Falkenjack</last></author>
-      <author><first>Katarina</first><last>Heimann Mühlenbock</last></author>
-      <author><first>Arne</first><last>Jönsson</last></author>
+      <author id="katarina-heimann-muhlenbock"><first>Katarina</first><last>Heimann Mühlenbock</last></author>
+      <author id="arne-jonsson"><first>Arne</first><last>Jönsson</last></author>
       <pages>27–40</pages>
       <url hash="d772c866">W13-5608</url>
       <bibkey>falkenjack-etal-2013-features</bibkey>
@@ -9317,7 +9317,7 @@
     </paper>
     <paper id="11">
       <title>Exploring Features for Named Entity Recognition in <fixed-case>L</fixed-case>ithuanian Text Corpus</title>
-      <author><first>Jurgita</first><last>Kapočiūtė-Dzikienė</last></author>
+      <author id="jurgita-kapociute-dzikiene"><first>Jurgita</first><last>Kapočiūtė-Dzikienė</last></author>
       <author><first>Anders</first><last>Nøklestad</last></author>
       <author><first>Janne Bondi</first><last>Johannessen</last></author>
       <author><first>Algis</first><last>Krupavičius</last></author>
@@ -9359,16 +9359,16 @@
     </paper>
     <paper id="16">
       <title><fixed-case>N</fixed-case>ordic and <fixed-case>B</fixed-case>altic Wordnets Aligned and Compared through “<fixed-case>W</fixed-case>ord<fixed-case>T</fixed-case>ies”</title>
-      <author><first>Bolette</first><last>Sandford Pedersen</last></author>
+      <author id="bolette-sandford-pedersen"><first>Bolette</first><last>Sandford Pedersen</last></author>
       <author><first>Lars</first><last>Borin</last></author>
       <author><first>Markus</first><last>Forsberg</last></author>
       <author><first>Neeme</first><last>Kahusk</last></author>
-      <author><first>Krister</first><last>Lindén</last></author>
+      <author id="krister-linden"><first>Krister</first><last>Lindén</last></author>
       <author><first>Jyrki</first><last>Niemi</last></author>
       <author><first>Niklas</first><last>Nisbeth</last></author>
       <author><first>Lars</first><last>Nygaard</last></author>
       <author><first>Heili</first><last>Orav</last></author>
-      <author><first>Eirikur</first><last>Rögnvaldsson</last></author>
+      <author id="eirikur-rognvaldsson"><first>Eirikur</first><last>Rögnvaldsson</last></author>
       <author><first>Mitchell</first><last>Seaton</last></author>
       <author><first>Kadri</first><last>Vider</last></author>
       <author><first>Kaarlo</first><last>Voionmaa</last></author>
@@ -9379,7 +9379,7 @@
     <paper id="17">
       <title>Normalisation of Historical Text Using Context-Sensitive Weighted <fixed-case>L</fixed-case>evenshtein Distance and Compound Splitting</title>
       <author><first>Eva</first><last>Pettersson</last></author>
-      <author><first>Beáta</first><last>Megyesi</last></author>
+      <author id="beata-megyesi"><first>Beáta</first><last>Megyesi</last></author>
       <author><first>Joakim</first><last>Nivre</last></author>
       <pages>163–179</pages>
       <url hash="2d5cac7d">W13-5617</url>
@@ -9388,22 +9388,22 @@
     <paper id="18">
       <title>Modeling <fixed-case>OOV</fixed-case> Words With Letter N-Grams in Statistical Taggers: Preliminary Work in Biomedical Entity Recognition</title>
       <author><first>Teemu</first><last>Ruokolainen</last></author>
-      <author><first>Miikka</first><last>Silfverberg</last></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last></author>
       <pages>181–193</pages>
       <url hash="a14d97f9">W13-5618</url>
       <bibkey>ruokolainen-silfverberg-2013-modeling</bibkey>
     </paper>
     <paper id="19">
       <title><fixed-case>B</fixed-case>altic and <fixed-case>N</fixed-case>ordic Parts of the <fixed-case>E</fixed-case>uropean Linguistic Infrastructure</title>
-      <author><first>Inguna</first><last>Skadiņa</last></author>
-      <author><first>Andrejs</first><last>Vasiļjevs</last></author>
+      <author id="inguna-skadina"><first>Inguna</first><last>Skadiņa</last></author>
+      <author id="andrejs-vasiljevs"><first>Andrejs</first><last>Vasiļjevs</last></author>
       <author><first>Lars</first><last>Borin</last></author>
-      <author><first>Krister</first><last>Lindén</last></author>
-      <author><first>Gyri</first><last>Losnegaard</last></author>
+      <author id="krister-linden"><first>Krister</first><last>Lindén</last></author>
+      <author id="gyri-smordal-losnegaard"><first>Gyri</first><last>Losnegaard</last></author>
       <author><first>Sussi</first><last>Olsen</last></author>
-      <author><first>Bolette</first><last>Sandford Pedersen</last></author>
+      <author id="bolette-sandford-pedersen"><first>Bolette</first><last>Sandford Pedersen</last></author>
       <author><first>Roberts</first><last>Rozis</last></author>
-      <author><first>Koenraad</first><last>De Smedt</last></author>
+      <author id="koenraad-de-smedt"><first>Koenraad</first><last>De Smedt</last></author>
       <pages>195–211</pages>
       <url hash="0d415c17">W13-5619</url>
       <bibkey>skadina-etal-2013-baltic</bibkey>
@@ -9420,7 +9420,7 @@
       <title>Using Factual Density to Measure Informativeness of Web Documents</title>
       <author><first>Christopher</first><last>Horn</last></author>
       <author><first>Alisa</first><last>Zhila</last></author>
-      <author><first>Alexander</first><last>Gelbukh</last></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last></author>
       <author><first>Roman</first><last>Kern</last></author>
       <author><first>Elisabeth</first><last>Lex</last></author>
       <pages>227–238</pages>
@@ -9438,23 +9438,23 @@
     <paper id="23">
       <title>Bootstrapping an Unsupervised Approach for Classifying Agreement and Disagreement</title>
       <author><first>Bernd</first><last>Opitz</last></author>
-      <author><first>Cäcilia</first><last>Zirn</last></author>
+      <author id="cacilia-zirn"><first>Cäcilia</first><last>Zirn</last></author>
       <pages>253–265</pages>
       <url hash="0e2fd12f">W13-5623</url>
       <bibkey>opitz-zirn-2013-bootstrapping</bibkey>
     </paper>
     <paper id="24">
       <title>Morphological Analysis with Limited Resources: <fixed-case>L</fixed-case>atvian Example</title>
-      <author><first>Pēteris</first><last>Paikens</last></author>
+      <author id="peteris-paikens"><first>Pēteris</first><last>Paikens</last></author>
       <author><first>Laura</first><last>Rituma</last></author>
-      <author><first>Lauma</first><last>Pretkalniņa</last></author>
+      <author id="lauma-pretkalnina"><first>Lauma</first><last>Pretkalniņa</last></author>
       <pages>267–277</pages>
       <url hash="1c227c6c">W13-5624</url>
       <bibkey>paikens-etal-2013-morphological</bibkey>
     </paper>
     <paper id="25">
       <title>Statistical Syntactic Parsing for <fixed-case>L</fixed-case>atvian</title>
-      <author><first>Lauma</first><last>Pretkalniņa</last></author>
+      <author id="lauma-pretkalnina"><first>Lauma</first><last>Pretkalniņa</last></author>
       <author><first>Laura</first><last>Rituma</last></author>
       <pages>279–289</pages>
       <url hash="1e3c218d">W13-5625</url>
@@ -9490,7 +9490,7 @@
     </paper>
     <paper id="29">
       <title>Analysis of Phonetic Transcription for <fixed-case>D</fixed-case>anish Automatic Speech Recognition</title>
-      <author><first>Andreas</first><last>Søeborg Kirkedal</last></author>
+      <author id="andreas-soeborg-kirkedal"><first>Andreas</first><last>Søeborg Kirkedal</last></author>
       <pages>321–330</pages>
       <url hash="2bd2b537">W13-5629</url>
       <bibkey>soeborg-kirkedal-2013-analysis</bibkey>
@@ -9507,8 +9507,8 @@
     </paper>
     <paper id="31">
       <title>Building an Open-Source Development Infrastructure for Language Technology Projects</title>
-      <author><first>Sjur N.</first><last>Moshagen</last></author>
-      <author><first>Tommi</first><last>Pirinen</last></author>
+      <author id="sjur-moshagen"><first>Sjur N.</first><last>Moshagen</last></author>
+      <author id="tommi-a-pirinen"><first>Tommi</first><last>Pirinen</last></author>
       <author><first>Trond</first><last>Trosterud</last></author>
       <pages>343–352</pages>
       <url hash="f5b766e0">W13-5631</url>
@@ -9534,7 +9534,7 @@
     <paper id="34">
       <title>Statistical Machine Translation with Readability Constraints</title>
       <author><first>Sara</first><last>Stymne</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <author><first>Christian</first><last>Hardmeier</last></author>
       <author><first>Joakim</first><last>Nivre</last></author>
       <pages>375–386</pages>
@@ -9555,7 +9555,7 @@
     </paper>
     <paper id="36">
       <title>Tone Restoration in Transcribed Kammu: Decision-List Word Sense Disambiguation for an Unwritten Language</title>
-      <author><first>Marcus</first><last>Uneson</last></author>
+      <author id="marcus-uneson"><first>Marcus</first><last>Uneson</last></author>
       <pages>399–409</pages>
       <url hash="2d9d5697">W13-5636</url>
       <bibkey>uneson-2013-tone</bibkey>
@@ -9574,7 +9574,7 @@
       <author><first>Liesbeth</first><last>Augustinus</last></author>
       <author><first>Vincent</first><last>Vandeghinste</last></author>
       <author><first>Ineke</first><last>Schuurman</last></author>
-      <author><first>Frank</first><last>Van Eynde</last></author>
+      <author id="frank-van-eynde"><first>Frank</first><last>Van Eynde</last></author>
       <pages>423–428</pages>
       <url hash="5f0f7a3f">W13-5638</url>
       <bibkey>augustinus-etal-2013-example</bibkey>
@@ -9606,7 +9606,7 @@
     <paper id="41">
       <title>Finite State Applications with Javascript</title>
       <author><first>Mans</first><last>Hulden</last></author>
-      <author><first>Miikka</first><last>Silfverberg</last></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last></author>
       <author><first>Jerid</first><last>Francom</last></author>
       <pages>441–446</pages>
       <url hash="74757450">W13-5641</url>
@@ -9627,9 +9627,9 @@
       <author><first>Paul</first><last>Meurer</last></author>
       <author><first>Helge</first><last>Dyvik</last></author>
       <author><first>Victoria</first><last>Rosén</last></author>
-      <author><first>Koenraad</first><last>De Smedt</last></author>
-      <author><first>Gunn Inger</first><last>Lyse</last></author>
-      <author><first>Gyri</first><last>Smørdal Losnegaard</last></author>
+      <author id="koenraad-de-smedt"><first>Koenraad</first><last>De Smedt</last></author>
+      <author id="gunn-inger-lyse"><first>Gunn Inger</first><last>Lyse</last></author>
+      <author id="gyri-smordal-losnegaard"><first>Gyri</first><last>Smørdal Losnegaard</last></author>
       <author><first>Martha</first><last>Thunes</last></author>
       <pages>453–458</pages>
       <url hash="1f0c224d">W13-5643</url>
@@ -9647,7 +9647,7 @@
     <meta>
       <booktitle>Proceedings of the 13th International Conference on Parsing Technologies (<fixed-case>IWPT</fixed-case> 2013)</booktitle>
       <url hash="9f7fc6f7">W13-57</url>
-      <editor><first>Harry</first><last>Bunt</last></editor>
+      <editor id="harry-bunt"><first>Harry</first><last>Bunt</last></editor>
       <editor><first>Khalil</first><last>Sima'an</last></editor>
       <editor><first>Liang</first><last>Huang</last></editor>
       <publisher>Assocation for Computational Linguistics</publisher>
@@ -9671,8 +9671,8 @@
     </paper>
     <paper id="2">
       <title>An Efficient Typed Feature Structure Index: Theory and Implementation</title>
-      <author><first>Bernd</first><last>Kiefer</last></author>
-      <author><first>Hans-Ulrich</first><last>Krieger</last></author>
+      <author id="bernd-kiefer"><first>Bernd</first><last>Kiefer</last></author>
+      <author id="hans-ulrich-krieger"><first>Hans-Ulrich</first><last>Krieger</last></author>
       <pages>17–25</pages>
       <url hash="e310d8a7">W13-5702</url>
       <bibkey>kiefer-krieger-2013-efficient</bibkey>
@@ -9687,7 +9687,7 @@
     </paper>
     <paper id="4">
       <title>Comparative Evaluation of Argument Extraction Algorithms in Discourse Relation Parsing</title>
-      <author><first>Evgeny</first><last>Stepanov</last></author>
+      <author id="evgeny-stepanov"><first>Evgeny</first><last>Stepanov</last></author>
       <author><first>Giuseppe</first><last>Riccardi</last></author>
       <pages>36–44</pages>
       <url hash="420a8e14">W13-5704</url>
@@ -9704,7 +9704,7 @@
     </paper>
     <paper id="6">
       <title>Improving a symbolic parser through partially supervised learning</title>
-      <author><first>Éric</first><last>de la Clergerie</last></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Éric</first><last>de la Clergerie</last></author>
       <pages>54–72</pages>
       <url hash="0fc1ce92">W13-5706</url>
       <bibkey>de-la-clergerie-2013-improving</bibkey>
@@ -9714,8 +9714,8 @@
       <author><first>Angelina</first><last>Ivanova</last></author>
       <author><first>Stephan</first><last>Oepen</last></author>
       <author><first>Rebecca</first><last>Dridan</last></author>
-      <author><first>Dan</first><last>Flickinger</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="dan-flickinger"><first>Dan</first><last>Flickinger</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <pages>63–72</pages>
       <url hash="91c52644">W13-5707</url>
       <bibkey>ivanova-etal-2013-different</bibkey>
@@ -9751,7 +9751,7 @@
     <paper id="11">
       <title>Active Learning for Dependency Parsing by A Committee of Parsers</title>
       <author><first>Saeed</first><last>Majidi</last></author>
-      <author><first>Gregory</first><last>Crane</last></author>
+      <author id="gregory-crane"><first>Gregory</first><last>Crane</last></author>
       <pages>98–105</pages>
       <url hash="2d7bfa9c">W13-5711</url>
       <bibkey>majidi-crane-2013-active</bibkey>
@@ -9776,7 +9776,7 @@
       <author><first>Jungyeul</first><last>Park</last></author>
       <author><first>Daisuke</first><last>Kawahara</last></author>
       <author><first>Sadao</first><last>Kurohashi</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <pages>120–126</pages>
       <url hash="f62e70de">W13-5714</url>
       <bibkey>park-etal-2013-towards</bibkey>
diff --git a/data/xml/W14.xml b/data/xml/W14.xml
index 9098a5109c..e12039b0b7 100644
--- a/data/xml/W14.xml
+++ b/data/xml/W14.xml
@@ -21,7 +21,7 @@
       <author><first>Purya</first><last>Aliabadi</last></author>
       <author><first>Mohammad Sina</first><last>Ahmadi</last></author>
       <author><first>Shahin</first><last>Salavati</last></author>
-      <author><first>Kyumars Sheykh</first><last>Esmaili</last></author>
+      <author id="kyumars-sheykh-esmaili"><first>Kyumars Sheykh</first><last>Esmaili</last></author>
       <pages>1-6</pages>
       <url hash="83934da3">W14-0101</url>
       <bibkey>aliabadi-etal-2014-towards</bibkey>
@@ -35,7 +35,7 @@
     </paper>
     <paper id="3">
       <title><fixed-case>O</fixed-case>nto.<fixed-case>PT</fixed-case>: recent developments of a large public domain <fixed-case>P</fixed-case>ortuguese wordnet</title>
-      <author><first>Hugo Gonçalo</first><last>Oliveira</last></author>
+      <author id="hugo-goncalo-oliveira"><first>Hugo Gonçalo</first><last>Oliveira</last></author>
       <author><first>Paulo</first><last>Gomes</last></author>
       <pages>16-22</pages>
       <url hash="4239209d">W14-0103</url>
@@ -52,7 +52,7 @@
       <title><fixed-case>W</fixed-case>o<fixed-case>N</fixed-case>e<fixed-case>F</fixed-case>, an improved, expanded and evaluated automatic <fixed-case>F</fixed-case>rench translation of <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et</title>
       <author><first>Quentin</first><last>Pradet</last></author>
       <author><first>Gaël</first><last>de Chalendar</last></author>
-      <author><first>Jeanne</first><last>Baguenier Desormeaux</last></author>
+      <author id="jeanne-baguenier-desormeaux"><first>Jeanne</first><last>Baguenier Desormeaux</last></author>
       <pages>32-39</pages>
       <url hash="caf297d9">W14-0105</url>
       <bibkey>pradet-etal-2014-wonef</bibkey>
@@ -69,7 +69,7 @@
     <paper id="7">
       <title>Modeling Prefix and Particle Verbs in <fixed-case>G</fixed-case>erma<fixed-case>N</fixed-case>et</title>
       <author><first>Christina</first><last>Hoppermann</last></author>
-      <author><first>Erhard</first><last>Hinrichs</last></author>
+      <author id="erhard-hinrichs"><first>Erhard</first><last>Hinrichs</last></author>
       <pages>49-54</pages>
       <url hash="5c721c2a">W14-0107</url>
       <bibkey>hoppermann-hinrichs-2014-modeling</bibkey>
@@ -86,7 +86,7 @@
     <paper id="9">
       <title>Aligning Word Senses in <fixed-case>G</fixed-case>erma<fixed-case>N</fixed-case>et and the <fixed-case>DWDS</fixed-case> Dictionary of the <fixed-case>G</fixed-case>erman Language</title>
       <author><first>Verena</first><last>Henrich</last></author>
-      <author><first>Erhard</first><last>Hinrichs</last></author>
+      <author id="erhard-hinrichs"><first>Erhard</first><last>Hinrichs</last></author>
       <author><first>Reinhild</first><last>Barkey</last></author>
       <pages>63-70</pages>
       <url hash="3ac31ef8">W14-0109</url>
@@ -103,7 +103,7 @@
     </paper>
     <paper id="11">
       <title><fixed-case>J</fixed-case>ava Libraries for Accessing the <fixed-case>P</fixed-case>rinceton <fixed-case>W</fixed-case>ordnet: Comparison and Evaluation</title>
-      <author><first>Mark</first><last>Finlayson</last></author>
+      <author id="mark-finlayson"><first>Mark</first><last>Finlayson</last></author>
       <pages>78-85</pages>
       <url hash="3ef3c86d">W14-0111</url>
       <bibkey>finlayson-2014-java</bibkey>
@@ -114,7 +114,7 @@
       <author><first>Neha</first><last>Prabhugaonkar</last></author>
       <author><first>Venkatesh</first><last>Prabhu</last></author>
       <author><first>Ramdas</first><last>Karmali</last></author>
-      <author><first>Jyoti</first><last>Pawar</last></author>
+      <author id="jyoti-pawar"><first>Jyoti</first><last>Pawar</last></author>
       <pages>86-94</pages>
       <url hash="7e4547f6">W14-0112</url>
       <bibkey>nagvenkar-etal-2014-concept</bibkey>
@@ -122,7 +122,7 @@
     <paper id="13">
       <title>Use of Sense Marking for Improving <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Coverage</title>
       <author><first>Neha</first><last>Prabhugaonkar</last></author>
-      <author><first>Jyoti</first><last>Pawar</last></author>
+      <author id="jyoti-pawar"><first>Jyoti</first><last>Pawar</last></author>
       <pages>95-99</pages>
       <url hash="c8403f2d">W14-0113</url>
       <bibkey>prabhugaonkar-pawar-2014-use</bibkey>
@@ -160,7 +160,7 @@
     </paper>
     <paper id="17">
       <title>Enriching <fixed-case>S</fixed-case>erbian<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et and Electronic Dictionaries with Terms from the Culinary Domain</title>
-      <author><first>Staša Vujičić</first><last>Stanković</last></author>
+      <author id="stasa-vujicic-stankovic"><first>Staša Vujičić</first><last>Stanković</last></author>
       <author><first>Cvetana</first><last>Krstev</last></author>
       <author><first>Duško</first><last>Vitas</last></author>
       <pages>127-132</pages>
@@ -185,15 +185,15 @@
     <paper id="20">
       <title>Taking stock of the <fixed-case>A</fixed-case>frican <fixed-case>W</fixed-case>ordnet project: 5 years of development</title>
       <author><first>Marissa</first><last>Griesel</last></author>
-      <author><first>Sonja</first><last>Bosch</last></author>
+      <author id="sonja-bosch"><first>Sonja</first><last>Bosch</last></author>
       <pages>148-153</pages>
       <url hash="3da308f5">W14-0120</url>
       <bibkey>griesel-bosch-2014-taking</bibkey>
     </paper>
     <paper id="21">
       <title><fixed-case>R</fixed-case>u<fixed-case>T</fixed-case>hes Linguistic Ontology vs. <fixed-case>R</fixed-case>ussian Wordnets</title>
-      <author><first>Natalia</first><last>Loukachevitch</last></author>
-      <author><first>Boris</first><last>Dobrov</last></author>
+      <author id="natalia-loukachevitch"><first>Natalia</first><last>Loukachevitch</last></author>
+      <author id="boris-v-dobrov"><first>Boris</first><last>Dobrov</last></author>
       <pages>154-162</pages>
       <url hash="29bd5fea">W14-0121</url>
       <bibkey>loukachevitch-dobrov-2014-ruthes</bibkey>
@@ -202,7 +202,7 @@
       <title>One Lexicon, Two Structures: So What Gives?</title>
       <author><first>Nabil</first><last>Gader</last></author>
       <author><first>Sandrine</first><last>Ollinger</last></author>
-      <author><first>Alain</first><last>Polguère</last></author>
+      <author id="alain-polguere"><first>Alain</first><last>Polguère</last></author>
       <pages>163-171</pages>
       <url hash="d4445ae9">W14-0122</url>
       <bibkey>gader-etal-2014-one</bibkey>
@@ -219,7 +219,7 @@
       <title>Graph Based Algorithm for Automatic Domain Segmentation of <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et</title>
       <author><first>Brijesh</first><last>Bhatt</last></author>
       <author><first>Subhash</first><last>Kunnath</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>178-185</pages>
       <url hash="99dd8b39">W14-0124</url>
       <bibkey>bhatt-etal-2014-graph</bibkey>
@@ -227,7 +227,7 @@
     <paper id="25">
       <title>Parse Ranking with Semantic Dependencies and <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et</title>
       <author><first>Xiaocheng</first><last>Yin</last></author>
-      <author><first>Jung-Jae</first><last>Kim</last></author>
+      <author id="jung-jae-kim"><first>Jung-Jae</first><last>Kim</last></author>
       <author><first>Zinaida</first><last>Pozen</last></author>
       <author><first>Francis</first><last>Bond</last></author>
       <pages>186-193</pages>
@@ -237,10 +237,10 @@
     <paper id="26">
       <title>Do not do processing, when you can look up: Towards a Discrimination Net for <fixed-case>WSD</fixed-case></title>
       <author><first>Diptesh</first><last>Kanojia</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <author><first>Raj</first><last>Dabre</last></author>
       <author><first>Siddhartha</first><last>Gunti</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>194-200</pages>
       <url hash="e5017fef">W14-0126</url>
       <bibkey>kanojia-etal-2014-processing</bibkey>
@@ -271,7 +271,7 @@
     </paper>
     <paper id="30">
       <title>Facilitating Multi-Lingual Sense Annotation: Human Mediated Lemmatizer</title>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <author><first>Ankit</first><last>Bahuguna</last></author>
       <author><first>Lavita</first><last>Talukdar</last></author>
       <author><first>Bornali</first><last>Phukan</last></author>
@@ -299,7 +299,7 @@
       <author id="shikhar-kumar-sarma-gu"><first>Shikhar Kr.</first><last>Sarma</last></author>
       <author><first>Dibyajyoti</first><last>Sarmah</last></author>
       <author><first>Ratul</first><last>Deka</last></author>
-      <author><first>Anup</first><last>Barman</last></author>
+      <author id="anup-barman"><first>Anup</first><last>Barman</last></author>
       <author><first>Jumi</first><last>Sarmah</last></author>
       <author><first>Himadri</first><last>Bharali</last></author>
       <author><first>Mayashree</first><last>Mahanta</last></author>
@@ -321,7 +321,7 @@
     </paper>
     <paper id="35">
       <title><fixed-case>A</fixed-case>ssamese <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et based Quality Enhancement of Bilingual Machine Translation System</title>
-      <author><first>Anup</first><last>Barman</last></author>
+      <author id="anup-barman"><first>Anup</first><last>Barman</last></author>
       <author><first>Jumi</first><last>Sarmah</last></author>
       <author id="shikhar-kumar-sarma-gu"><first>Shikhar Kumar</first><last>Sarma</last></author>
       <pages>256-261</pages>
@@ -338,9 +338,9 @@
     </paper>
     <paper id="37">
       <title>News about the <fixed-case>R</fixed-case>omanian <fixed-case>W</fixed-case>ordnet</title>
-      <author><first>Verginica Barbu</first><last>Mititelu</last></author>
-      <author><first>Ștefan Daniel</first><last>Dumitrescu</last></author>
-      <author><first>Dan</first><last>Tufiș</last></author>
+      <author id="verginica-barbu-mititelu"><first>Verginica Barbu</first><last>Mititelu</last></author>
+      <author id="stefan-daniel-dumitrescu"><first>Ștefan Daniel</first><last>Dumitrescu</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufiș</last></author>
       <pages>268-275</pages>
       <url hash="0f5e1e80">W14-0137</url>
       <bibkey>mititelu-etal-2014-news</bibkey>
@@ -354,7 +354,7 @@
     </paper>
     <paper id="39">
       <title>Leveraging Morpho-semantics for the Discovery of Relations in <fixed-case>C</fixed-case>hinese <fixed-case>W</fixed-case>ordnet</title>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <author><first>Yu-Yun</first><last>Chang</last></author>
       <pages>283-289</pages>
       <url hash="0ac1f5a1">W14-0139</url>
@@ -373,7 +373,7 @@
     <paper id="41">
       <title>Terminology in <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et and in pl<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et</title>
       <author><first>Marta</first><last>Dobrowolska</last></author>
-      <author><first>Stan</first><last>Szpakowicz</last></author>
+      <author id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></author>
       <pages>299-303</pages>
       <url hash="869b2acf">W14-0141</url>
       <bibkey>dobrowolska-szpakowicz-2014-terminology</bibkey>
@@ -383,7 +383,7 @@
       <author><first>Marek</first><last>Maziarz</last></author>
       <author><first>Maciej</first><last>Piasecki</last></author>
       <author><first>Ewa</first><last>Rudnicka</last></author>
-      <author><first>Stan</first><last>Szpakowicz</last></author>
+      <author id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></author>
       <pages>304-312</pages>
       <url hash="b65823e4">W14-0142</url>
       <bibkey>maziarz-etal-2014-plwordnet</bibkey>
@@ -413,8 +413,8 @@
       <author><first>Sudha</first><last>Bhingardive</last></author>
       <author><first>Tanuja</first><last>Ajotikar</last></author>
       <author><first>Irawati</first><last>Kulkarni</last></author>
-      <author><first>Malhar</first><last>Kulkarni</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="malhar-kulkarni"><first>Malhar</first><last>Kulkarni</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>324-329</pages>
       <url hash="aa419fc8">W14-0145</url>
       <bibkey>bhingardive-etal-2014-semi</bibkey>
@@ -424,7 +424,7 @@
       <author><first>Marek</first><last>Maziarz</last></author>
       <author><first>Maciej</first><last>Piasecki</last></author>
       <author><first>Ewa</first><last>Rudnicka</last></author>
-      <author><first>Stan</first><last>Szpakowicz</last></author>
+      <author id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></author>
       <pages>330-337</pages>
       <url hash="09e72588">W14-0146</url>
       <bibkey>maziarz-etal-2014-registers</bibkey>
@@ -433,7 +433,7 @@
       <title><fixed-case>I</fixed-case>ndo<fixed-case>W</fixed-case>ordnet Visualizer: A Graphical User Interface for Browsing and Exploring Wordnets of <fixed-case>I</fixed-case>ndian Languages</title>
       <author><first>Devendra Singh</first><last>Chaplot</last></author>
       <author><first>Sudha</first><last>Bhingardive</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>338-345</pages>
       <url hash="b13eafd4">W14-0147</url>
       <bibkey>chaplot-etal-2014-indowordnet</bibkey>
@@ -457,9 +457,9 @@
     </paper>
     <paper id="50">
       <title>First steps towards a Predicate Matrix</title>
-      <author><first>Maddalen</first><last>López de Lacalle</last></author>
+      <author id="maddalen-lopez-de-lacalle"><first>Maddalen</first><last>López de Lacalle</last></author>
       <author><first>Egoitz</first><last>Laparra</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <pages>363-371</pages>
       <url hash="033723db">W14-0150</url>
       <bibkey>lopez-de-lacalle-etal-2014-first</bibkey>
@@ -474,7 +474,7 @@
     <paper id="52">
       <title>Embedding <fixed-case>N</fixed-case>om<fixed-case>L</fixed-case>ex-<fixed-case>BR</fixed-case> nominalizations into <fixed-case>O</fixed-case>pen<fixed-case>W</fixed-case>ordnet-<fixed-case>PT</fixed-case></title>
       <author><first>Alexandre</first><last>Rademaker</last></author>
-      <author><first>Valeria</first><last>de Paiva</last></author>
+      <author id="valeria-de-paiva"><first>Valeria</first><last>de Paiva</last></author>
       <author><first>Gerard</first><last>de Melo</last></author>
       <author><first>Livy Maria Real</first><last>Coelho</last></author>
       <pages>378-382</pages>
@@ -484,10 +484,10 @@
     <paper id="53">
       <title><fixed-case>O</fixed-case>pen<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et-<fixed-case>PT</fixed-case>: A Project Report</title>
       <author><first>Alexandre</first><last>Rademaker</last></author>
-      <author><first>Valeria</first><last>de Paiva</last></author>
+      <author id="valeria-de-paiva"><first>Valeria</first><last>de Paiva</last></author>
       <author><first>Gerard</first><last>de Melo</last></author>
       <author><first>Livy</first><last>Real</last></author>
-      <author><first>Maira</first><last>Gatti</last></author>
+      <author id="maira-gatti"><first>Maira</first><last>Gatti</last></author>
       <pages>383-390</pages>
       <url hash="275d75b4">W14-0153</url>
       <bibkey>rademaker-etal-2014-openwordnet</bibkey>
@@ -514,12 +514,12 @@
       <booktitle>Proceedings of the <fixed-case>EACL</fixed-case> 2014 Workshop on Dialogue in Motion</booktitle>
       <url hash="00b9153d">W14-02</url>
       <editor><first>Tiphaine</first><last>Dalmas</last></editor>
-      <editor><first>Jana</first><last>Götze</last></editor>
+      <editor id="jana-gotze"><first>Jana</first><last>Götze</last></editor>
       <editor><first>Joakim</first><last>Gustafson</last></editor>
-      <editor><first>Srinivasan</first><last>Janarthanam</last></editor>
+      <editor id="srinivasan-janarthanam"><first>Srinivasan</first><last>Janarthanam</last></editor>
       <editor><first>Jan</first><last>Kleindienst</last></editor>
-      <editor><first>Christian</first><last>Mueller</last></editor>
-      <editor><first>Amanda</first><last>Stent</last></editor>
+      <editor id="christian-mueller"><first>Christian</first><last>Mueller</last></editor>
+      <editor id="amanda-stent"><first>Amanda</first><last>Stent</last></editor>
       <editor><first>Andreas</first><last>Vlachos</last></editor>
       <doi>10.3115/v1/W14-02</doi>
       <publisher>Association for Computational Linguistics</publisher>
@@ -557,7 +557,7 @@
     <paper id="3">
       <title>Click or Type: An Analysis of Wizard’s Interaction for Future Wizard Interface Design</title>
       <author><first>Srinivasan</first><last>Janarthanam</last></author>
-      <author><first>Robin</first><last>Hill</last></author>
+      <author id="robin-l-hill"><first>Robin</first><last>Hill</last></author>
       <author><first>Anna</first><last>Dickinson</last></author>
       <author><first>Morgan</first><last>Fredriksson</last></author>
       <pages>19–27</pages>
@@ -567,11 +567,11 @@
     </paper>
     <paper id="4">
       <title>Recipes for building voice search <fixed-case>UI</fixed-case>s for automotive</title>
-      <author><first>Martin</first><last>Labsky</last></author>
+      <author id="martin-labsky"><first>Martin</first><last>Labsky</last></author>
       <author><first>Ladislav</first><last>Kunc</last></author>
       <author><first>Tomas</first><last>Macek</last></author>
       <author><first>Jan</first><last>Kleindienst</last></author>
-      <author><first>Jan</first><last>Vystrcil</last></author>
+      <author id="jan-vystrcil"><first>Jan</first><last>Vystrcil</last></author>
       <pages>28–32</pages>
       <url hash="2f83eb85">W14-0204</url>
       <doi>10.3115/v1/W14-0204</doi>
@@ -597,7 +597,7 @@
     </paper>
     <paper id="7">
       <title>Collaborative Exploration in Human-Robot Teams: What’s in their Corpora of Dialog, Video, &amp; <fixed-case>LIDAR</fixed-case> Messages?</title>
-      <author><first>Clare</first><last>Voss</last></author>
+      <author id="clare-voss"><first>Clare</first><last>Voss</last></author>
       <author><first>Taylor</first><last>Cassidy</last></author>
       <author><first>Douglas</first><last>Summers-Stay</last></author>
       <pages>43–47</pages>
@@ -616,10 +616,10 @@
     </paper>
     <paper id="9">
       <title>Mostly Passive Information Delivery – a Prototype</title>
-      <author><first>Jan</first><last>Vystrčil</last></author>
+      <author id="jan-vystrcil"><first>Jan</first><last>Vystrčil</last></author>
       <author><first>Tomas</first><last>Macek</last></author>
       <author><first>David</first><last>Luksch</last></author>
-      <author><first>Martin</first><last>Labský</last></author>
+      <author id="martin-labsky"><first>Martin</first><last>Labský</last></author>
       <author><first>Ladislav</first><last>Kunc</last></author>
       <author><first>Jan</first><last>Kleindienst</last></author>
       <author><first>Tereza</first><last>Kašparová</last></author>
@@ -630,7 +630,7 @@
     </paper>
     <paper id="10">
       <title>Navigation Dialog of Blind People: Recovery from Getting Lost</title>
-      <author><first>Jan</first><last>Vystrcil</last></author>
+      <author id="jan-vystrcil"><first>Jan</first><last>Vystrcil</last></author>
       <author><first>Ivo</first><last>Maly</last></author>
       <author><first>Jan</first><last>Balata</last></author>
       <author><first>Zdenek</first><last>Mikovec</last></author>
@@ -644,7 +644,7 @@
       <author><first>Aasish</first><last>Pappu</last></author>
       <author><first>Ming</first><last>Sun</last></author>
       <author><first>Seshadri</first><last>Sridharan</last></author>
-      <author><first>Alexander</first><last>Rudnicky</last></author>
+      <author id="alexander-rudnicky"><first>Alexander</first><last>Rudnicky</last></author>
       <pages>63–67</pages>
       <url hash="aa2a64df">W14-0211</url>
       <doi>10.3115/v1/W14-0211</doi>
@@ -653,7 +653,7 @@
     <paper id="12">
       <title>Situationally Aware In-Car Information Presentation Using Incremental Speech Generation: Safer, and More Effective</title>
       <author><first>Spyros</first><last>Kousidis</last></author>
-      <author><first>Casey</first><last>Kennington</last></author>
+      <author id="casey-kennington"><first>Casey</first><last>Kennington</last></author>
       <author><first>Timo</first><last>Baumann</last></author>
       <author><first>Hendrik</first><last>Buschmeier</last></author>
       <author><first>Stefan</first><last>Kopp</last></author>
@@ -681,9 +681,9 @@
       <editor><first>Ulrich</first><last>Germann</last></editor>
       <editor><first>Michael</first><last>Carl</last></editor>
       <editor><first>Philipp</first><last>Koehn</last></editor>
-      <editor><first>Germán</first><last>Sanchis-Trilles</last></editor>
-      <editor><first>Francisco</first><last>Casacuberta</last></editor>
-      <editor><first>Robin</first><last>Hill</last></editor>
+      <editor id="german-sanchis-trilles"><first>Germán</first><last>Sanchis-Trilles</last></editor>
+      <editor id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></editor>
+      <editor id="robin-l-hill"><first>Robin</first><last>Hill</last></editor>
       <editor><first>Sharon</first><last>O’Brien</last></editor>
       <doi>10.3115/v1/W14-03</doi>
       <publisher>Association for Computational Linguistics</publisher>
@@ -698,8 +698,8 @@
     </frontmatter>
     <paper id="1">
       <title>Word Confidence Estimation for <fixed-case>SMT</fixed-case> N-best List Re-ranking</title>
-      <author><first>Ngoc-Quang</first><last>Luong</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="ngoc-quang-luong"><first>Ngoc-Quang</first><last>Luong</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <author><first>Benjamin</first><last>Lecouteux</last></author>
       <pages>1–9</pages>
       <url hash="05a26f4c">W14-0301</url>
@@ -708,8 +708,8 @@
     </paper>
     <paper id="2">
       <title>Proofreading Human Translations with an <fixed-case>E</fixed-case>-pen</title>
-      <author><first>Vicent</first><last>Alabau</last></author>
-      <author><first>Luis A.</first><last>Leiva</last></author>
+      <author id="vicent-alabau"><first>Vicent</first><last>Alabau</last></author>
+      <author id="luis-a-leiva"><first>Luis A.</first><last>Leiva</last></author>
       <pages>10–15</pages>
       <url hash="90dabf21">W14-0302</url>
       <doi>10.3115/v1/W14-0302</doi>
@@ -743,7 +743,7 @@
     </paper>
     <paper id="6">
       <title>Measuring the Cognitive Effort of Literal Translation Processes</title>
-      <author><first>Moritz</first><last>Schaeffer</last></author>
+      <author id="moritz-schaeffer"><first>Moritz</first><last>Schaeffer</last></author>
       <author><first>Michael</first><last>Carl</last></author>
       <pages>29–37</pages>
       <url hash="5cc97634">W14-0306</url>
@@ -773,7 +773,7 @@
       <title>Black-box integration of heterogeneous bilingual resources into an interactive translation system</title>
       <author><first>Juan Antonio</first><last>Pérez-Ortiz</last></author>
       <author><first>Daniel</first><last>Torregrosa</last></author>
-      <author><first>Mikel</first><last>Forcada</last></author>
+      <author id="mikel-l-forcada"><first>Mikel</first><last>Forcada</last></author>
       <pages>57–65</pages>
       <url hash="cb5a0cb1">W14-0309</url>
       <doi>10.3115/v1/W14-0309</doi>
@@ -784,7 +784,7 @@
       <author><first>Violeta</first><last>Seretan</last></author>
       <author><first>Johann</first><last>Roturier</last></author>
       <author><first>David</first><last>Silva</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <pages>66–71</pages>
       <url hash="bf9bcc0a">W14-0310</url>
       <doi>10.3115/v1/W14-0310</doi>
@@ -793,9 +793,9 @@
     <paper id="11">
       <title>Real Time Adaptive Machine Translation for Post-Editing with cdec and <fixed-case>T</fixed-case>rans<fixed-case>C</fixed-case>enter</title>
       <author><first>Michael</first><last>Denkowski</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <author><first>Isabel</first><last>Lacruz</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <pages>72–77</pages>
       <url hash="0ba11675">W14-0311</url>
       <doi>10.3115/v1/W14-0311</doi>
@@ -812,7 +812,7 @@
     </paper>
     <paper id="13">
       <title>Online Word Alignment for Online Adaptive Machine Translation</title>
-      <author><first>M. Amin</first><last>Farajian</last></author>
+      <author id="m-amin-farajian"><first>M. Amin</first><last>Farajian</last></author>
       <author><first>Nicola</first><last>Bertoldi</last></author>
       <author><first>Marcello</first><last>Federico</last></author>
       <pages>84–92</pages>
@@ -883,9 +883,9 @@
     </paper>
     <paper id="4">
       <title>Some Issues on the Normalization of a Corpus of Products Reviews in <fixed-case>P</fixed-case>ortuguese</title>
-      <author><first>Magali</first><last>Sanches Duran</last></author>
+      <author id="magali-sanches-duran"><first>Magali</first><last>Sanches Duran</last></author>
       <author><first>Lucas</first><last>Avanço</last></author>
-      <author><first>Sandra</first><last>Aluísio</last></author>
+      <author id="sandra-aluisio"><first>Sandra</first><last>Aluísio</last></author>
       <author><first>Thiago</first><last>Pardo</last></author>
       <author><first>Maria da Graça</first><last>Volpe Nunes</last></author>
       <pages>22–28</pages>
@@ -905,7 +905,7 @@
     <paper id="6">
       <title>The <fixed-case>PAISÀ</fixed-case> Corpus of <fixed-case>I</fixed-case>talian Web Texts</title>
       <author><first>Verena</first><last>Lyding</last></author>
-      <author><first>Egon</first><last>Stemle</last></author>
+      <author id="egon-stemle"><first>Egon</first><last>Stemle</last></author>
       <author><first>Claudia</first><last>Borghetti</last></author>
       <author><first>Marco</first><last>Brunello</last></author>
       <author><first>Sara</first><last>Castagnoli</last></author>
@@ -924,7 +924,7 @@
       <booktitle>Proceedings of the 5th Workshop on Cognitive Aspects of Computational Language Learning (<fixed-case>C</fixed-case>og<fixed-case>ACLL</fixed-case>)</booktitle>
       <url hash="3a1d62a3">W14-05</url>
       <editor><first>Alessandro</first><last>Lenci</last></editor>
-      <editor><first>Muntsa</first><last>Padró</last></editor>
+      <editor id="muntsa-padro"><first>Muntsa</first><last>Padró</last></editor>
       <editor><first>Thierry</first><last>Poibeau</last></editor>
       <editor><first>Aline</first><last>Villavicencio</last></editor>
       <doi>10.3115/v1/W14-05</doi>
@@ -995,7 +995,7 @@
       <author><first>Judith</first><last>Gaspers</last></author>
       <author><first>Maximilian</first><last>Panzner</last></author>
       <author><first>Andre</first><last>Lemme</last></author>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <author><first>Katharina J.</first><last>Rohlfing</last></author>
       <author><first>Sebastian</first><last>Wrede</last></author>
       <pages>30–37</pages>
@@ -1014,7 +1014,7 @@
     </paper>
     <paper id="9">
       <title>How well can a corpus-derived co-occurrence network simulate human associative behavior?</title>
-      <author><first>Gemma</first><last>Bel Enguix</last></author>
+      <author id="gemma-bel-enguix"><first>Gemma</first><last>Bel Enguix</last></author>
       <author><first>Reinhard</first><last>Rapp</last></author>
       <author><first>Michael</first><last>Zock</last></author>
       <pages>43–48</pages>
@@ -1025,7 +1025,7 @@
     <paper id="10">
       <title>Agent-based modeling of language evolution</title>
       <author><first>Torvald</first><last>Lekvam</last></author>
-      <author><first>Björn</first><last>Gambäck</last></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gambäck</last></author>
       <author><first>Lars</first><last>Bungum</last></author>
       <pages>49–54</pages>
       <url hash="5f234987">W14-0510</url>
@@ -1046,9 +1046,9 @@
     <meta>
       <booktitle>Proceedings of the 8th Workshop on Language Technology for Cultural Heritage, Social Sciences, and Humanities (<fixed-case>L</fixed-case>a<fixed-case>T</fixed-case>e<fixed-case>CH</fixed-case>)</booktitle>
       <url hash="e155bf11">W14-06</url>
-      <editor><first>Kalliopi</first><last>Zervanou</last></editor>
+      <editor id="kalliopi-zervanou"><first>Kalliopi</first><last>Zervanou</last></editor>
       <editor><first>Cristina</first><last>Vertan</last></editor>
-      <editor><first>Antal</first><last>van den Bosch</last></editor>
+      <editor id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></editor>
       <editor><first>Caroline</first><last>Sporleder</last></editor>
       <doi>10.3115/v1/W14-06</doi>
       <publisher>Association for Computational Linguistics</publisher>
@@ -1067,7 +1067,7 @@
       <author><first>Christoph</first><last>Teichmann</last></author>
       <author><first>Gerhard</first><last>Heyer</last></author>
       <author><first>Monica</first><last>Berti</last></author>
-      <author><first>Gregory</first><last>Crane</last></author>
+      <author id="gregory-crane"><first>Gregory</first><last>Crane</last></author>
       <pages>1–8</pages>
       <url hash="0c55cf43">W14-0601</url>
       <doi>10.3115/v1/W14-0601</doi>
@@ -1085,7 +1085,7 @@
     <paper id="3">
       <title>Bootstrapping a historical commodities lexicon with <fixed-case>SKOS</fixed-case> and <fixed-case>DB</fixed-case>pedia</title>
       <author><first>Ewan</first><last>Klein</last></author>
-      <author><first>Beatrice</first><last>Alex</last></author>
+      <author id="beatrice-alex"><first>Beatrice</first><last>Alex</last></author>
       <author><first>Jim</first><last>Clifford</last></author>
       <pages>13–21</pages>
       <url hash="fd27bdf7">W14-0603</url>
@@ -1108,7 +1108,7 @@
     <paper id="5">
       <title>A Multilingual Evaluation of Three Spelling Normalisation Methods for Historical Text</title>
       <author><first>Eva</first><last>Pettersson</last></author>
-      <author><first>Beáta</first><last>Megyesi</last></author>
+      <author id="beata-megyesi"><first>Beáta</first><last>Megyesi</last></author>
       <author><first>Joakim</first><last>Nivre</last></author>
       <pages>32–41</pages>
       <url hash="51ff0334">W14-0605</url>
@@ -1137,7 +1137,7 @@
     <paper id="8">
       <title>Automated Error Detection in Digitized Cultural Heritage Documents</title>
       <author><first>Kata</first><last>Gábor</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <pages>56–61</pages>
       <url hash="982fc069">W14-0608</url>
       <doi>10.3115/v1/W14-0608</doi>
@@ -1233,7 +1233,7 @@
     <paper id="18">
       <title>Automatic Wayang Ontology Construction using Relation Extraction from Free Text</title>
       <author><first>Hadaiq</first><last>Sanabila</last></author>
-      <author><first>Ruli</first><last>Manurung</last></author>
+      <author id="ruli-manurung"><first>Ruli</first><last>Manurung</last></author>
       <pages>128–136</pages>
       <url hash="42d8335e">W14-0618</url>
       <doi>10.3115/v1/W14-0618</doi>
@@ -1245,9 +1245,9 @@
       <booktitle>Proceedings of the <fixed-case>EACL</fixed-case> 2014 Workshop on Computational Approaches to Causality in Language (<fixed-case>CA</fixed-case>to<fixed-case>CL</fixed-case>)</booktitle>
       <url hash="535fc92a">W14-07</url>
       <editor><first>Oleksandr</first><last>Kolomiyets</last></editor>
-      <editor><first>Marie-Francine</first><last>Moens</last></editor>
-      <editor><first>Martha</first><last>Palmer</last></editor>
-      <editor><first>James</first><last>Pustejovsky</last></editor>
+      <editor id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></editor>
+      <editor id="martha-palmer"><first>Martha</first><last>Palmer</last></editor>
+      <editor id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></editor>
       <editor><first>Steven</first><last>Bethard</last></editor>
       <doi>10.3115/v1/W14-07</doi>
       <publisher>Association for Computational Linguistics</publisher>
@@ -1272,9 +1272,9 @@
     <paper id="2">
       <title>Annotating Causality in the <fixed-case>T</fixed-case>emp<fixed-case>E</fixed-case>val-3 Corpus</title>
       <author><first>Paramita</first><last>Mirza</last></author>
-      <author><first>Rachele</first><last>Sprugnoli</last></author>
+      <author id="rachele-sprugnoli"><first>Rachele</first><last>Sprugnoli</last></author>
       <author><first>Sara</first><last>Tonelli</last></author>
-      <author><first>Manuela</first><last>Speranza</last></author>
+      <author id="manuela-speranza"><first>Manuela</first><last>Speranza</last></author>
       <pages>10–19</pages>
       <url hash="5294feed">W14-0702</url>
       <doi>10.3115/v1/W14-0702</doi>
@@ -1283,7 +1283,7 @@
     <paper id="3">
       <title>Automatic Detection of Causal Relations in <fixed-case>G</fixed-case>erman Multilogs</title>
       <author><first>Tina</first><last>Bögel</last></author>
-      <author><first>Annette</first><last>Hautli-Janisz</last></author>
+      <author id="annette-hautli"><first>Annette</first><last>Hautli-Janisz</last></author>
       <author><first>Sebastian</first><last>Sulger</last></author>
       <author><first>Miriam</first><last>Butt</last></author>
       <pages>20–27</pages>
@@ -1311,7 +1311,7 @@
     </paper>
     <paper id="6">
       <title>Likelihood of External Causation in the Structure of Events</title>
-      <author><first>Tanja</first><last>Samardžić</last></author>
+      <author id="tanja-samardzic"><first>Tanja</first><last>Samardžić</last></author>
       <author><first>Paola</first><last>Merlo</last></author>
       <pages>40–47</pages>
       <url hash="fcb80ba7">W14-0706</url>
@@ -1321,7 +1321,7 @@
     <paper id="7">
       <title>Recognizing Causality in Verb-Noun Pairs via Noun and Verb Semantics</title>
       <author><first>Mehwish</first><last>Riaz</last></author>
-      <author><first>Roxana</first><last>Girju</last></author>
+      <author id="roxana-girju"><first>Roxana</first><last>Girju</last></author>
       <pages>48–57</pages>
       <url hash="211e5c0a">W14-0707</url>
       <doi>10.3115/v1/W14-0707</doi>
@@ -1335,7 +1335,7 @@
       <editor><first>Valia</first><last>Kordoni</last></editor>
       <editor><first>Markus</first><last>Egg</last></editor>
       <editor><first>Agata</first><last>Savary</last></editor>
-      <editor><first>Eric</first><last>Wehrli</last></editor>
+      <editor id="eric-wehrli"><first>Eric</first><last>Wehrli</last></editor>
       <editor><first>Stefan</first><last>Evert</last></editor>
       <doi>10.3115/v1/W14-08</doi>
       <publisher>Association for Computational Linguistics</publisher>
@@ -1359,7 +1359,7 @@
     <paper id="2">
       <title>A Supervised Model for Extraction of Multiword Expressions, Based on Statistical Context Features</title>
       <author><first>Meghdad</first><last>Farahmand</last></author>
-      <author><first>Ronaldo</first><last>Martins</last></author>
+      <author id="ronaldo-teixeira-martins"><first>Ronaldo</first><last>Martins</last></author>
       <pages>10–16</pages>
       <url hash="8a58143b">W14-0802</url>
       <doi>10.3115/v1/W14-0802</doi>
@@ -1367,7 +1367,7 @@
     </paper>
     <paper id="3">
       <title><fixed-case>VPCT</fixed-case>agger: Detecting Verb-Particle Constructions With Syntax-Based Methods</title>
-      <author><first>István</first><last>Nagy T.</last></author>
+      <author id="istvan-nagy-t"><first>István</first><last>Nagy T.</last></author>
       <author><first>Veronika</first><last>Vincze</last></author>
       <pages>17–25</pages>
       <url hash="f57387f3">W14-0803</url>
@@ -1385,7 +1385,7 @@
     <paper id="5">
       <title>Parsing <fixed-case>M</fixed-case>odern <fixed-case>G</fixed-case>reek verb <fixed-case>MWE</fixed-case>s with <fixed-case>LFG</fixed-case>/<fixed-case>XLE</fixed-case> grammars</title>
       <author><first>Niki</first><last>Samaridi</last></author>
-      <author><first>Stella</first><last>Markantonatou</last></author>
+      <author id="stella-markantonatou"><first>Stella</first><last>Markantonatou</last></author>
       <pages>33–37</pages>
       <url hash="56f2badc">W14-0805</url>
       <doi>10.3115/v1/W14-0805</doi>
@@ -1393,9 +1393,9 @@
     </paper>
     <paper id="6">
       <title>Evaluation of a Substitution Method for Idiom Transformation in Statistical Machine Translation</title>
-      <author><first>Giancarlo</first><last>Salton</last></author>
+      <author id="giancarlo-salton"><first>Giancarlo</first><last>Salton</last></author>
       <author><first>Robert</first><last>Ross</last></author>
-      <author><first>John</first><last>Kelleher</last></author>
+      <author id="john-kelleher"><first>John</first><last>Kelleher</last></author>
       <pages>38–42</pages>
       <url hash="17a19216">W14-0806</url>
       <doi>10.3115/v1/W14-0806</doi>
@@ -1404,8 +1404,8 @@
     <paper id="7">
       <title>Encoding <fixed-case>MWE</fixed-case>s in a conceptual lexicon</title>
       <author><first>Aggeliki</first><last>Fotopoulou</last></author>
-      <author><first>Stella</first><last>Markantonatou</last></author>
-      <author><first>Voula</first><last>Giouli</last></author>
+      <author id="stella-markantonatou"><first>Stella</first><last>Markantonatou</last></author>
+      <author id="voula-giouli"><first>Voula</first><last>Giouli</last></author>
       <pages>43–47</pages>
       <url hash="d1289c7e">W14-0807</url>
       <doi>10.3115/v1/W14-0807</doi>
@@ -1413,9 +1413,9 @@
     </paper>
     <paper id="8">
       <title><fixed-case>G</fixed-case>erman Compounds and Statistical Machine Translation. Can they get along?</title>
-      <author><first>Carla</first><last>Parra Escartín</last></author>
+      <author id="carla-parra-escartin"><first>Carla</first><last>Parra Escartín</last></author>
       <author><first>Stephan</first><last>Peitz</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>48–56</pages>
       <url hash="94041c5e">W14-0808</url>
       <doi>10.3115/v1/W14-0808</doi>
@@ -1435,7 +1435,7 @@
       <title>Mickey Mouse is not a Phrase: Improving Relevance in <fixed-case>E</fixed-case>-Commerce with Multiword Expressions</title>
       <author><first>Prathyusha</first><last>Senthil Kumar</last></author>
       <author><first>Vamsi</first><last>Salaka</last></author>
-      <author><first>Tracy Holloway</first><last>King</last></author>
+      <author id="tracy-holloway-king"><first>Tracy Holloway</first><last>King</last></author>
       <author><first>Brian</first><last>Johnson</last></author>
       <pages>62–66</pages>
       <url hash="f1c610e8">W14-0810</url>
@@ -1444,8 +1444,8 @@
     </paper>
     <paper id="11">
       <title>Encoding of Compounds in <fixed-case>S</fixed-case>wedish <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et</title>
-      <author><first>Karin</first><last>Friberg Heppin</last></author>
-      <author><first>Miriam R L</first><last>Petruck</last></author>
+      <author id="karin-friberg-heppin"><first>Karin</first><last>Friberg Heppin</last></author>
+      <author id="miriam-r-l-petruck"><first>Miriam R L</first><last>Petruck</last></author>
       <pages>67–71</pages>
       <url hash="237bc899">W14-0811</url>
       <doi>10.3115/v1/W14-0811</doi>
@@ -1480,7 +1480,7 @@
     <paper id="15">
       <title>Detecting change and emergence for multiword expressions</title>
       <author><first>Martin</first><last>Emms</last></author>
-      <author><first>Arun</first><last>Jayapal</last></author>
+      <author id="arun-kumar-jayapal"><first>Arun</first><last>Jayapal</last></author>
       <pages>89–93</pages>
       <url hash="5c773094">W14-0815</url>
       <doi>10.3115/v1/W14-0815</doi>
@@ -1488,10 +1488,10 @@
     </paper>
     <paper id="16">
       <title>An Approach to Take Multi-Word Expressions</title>
-      <author><first>Claire</first><last>Bonial</last></author>
+      <author id="claire-bonial"><first>Claire</first><last>Bonial</last></author>
       <author><first>Meredith</first><last>Green</last></author>
       <author><first>Jenette</first><last>Preciado</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>94–98</pages>
       <url hash="1a17a3f9">W14-0816</url>
       <doi>10.3115/v1/W14-0816</doi>
@@ -1509,7 +1509,7 @@
     <paper id="18">
       <title>Feature Norms of <fixed-case>G</fixed-case>erman Noun Compounds</title>
       <author><first>Stephen</first><last>Roller</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>104–108</pages>
       <url hash="ebf98c70">W14-0818</url>
       <doi>10.3115/v1/W14-0818</doi>
@@ -1520,7 +1520,7 @@
       <author><first>Lis</first><last>Pereira</last></author>
       <author><first>Elga</first><last>Strafella</last></author>
       <author><first>Kevin</first><last>Duh</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>109–113</pages>
       <url hash="ba44f68e">W14-0819</url>
       <doi>10.3115/v1/W14-0819</doi>
@@ -1529,7 +1529,7 @@
     <paper id="20">
       <title>Unsupervised Construction of a Lexicon and a Repository of Variation Patterns for <fixed-case>A</fixed-case>rabic Modal Multiword Expressions</title>
       <author><first>Rania</first><last>Al-Sabbagh</last></author>
-      <author><first>Roxana</first><last>Girju</last></author>
+      <author id="roxana-girju"><first>Roxana</first><last>Girju</last></author>
       <author><first>Jana</first><last>Diesner</last></author>
       <pages>114–123</pages>
       <url hash="b6fba21d">W14-0820</url>
@@ -1552,7 +1552,7 @@
       <url hash="82e422b6">W14-09</url>
       <editor><first>Anna</first><last>Feldman</last></editor>
       <editor><first>Anna</first><last>Kazantseva</last></editor>
-      <editor><first>Stan</first><last>Szpakowicz</last></editor>
+      <editor id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></editor>
       <doi>10.3115/v1/W14-09</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Gothenburg, Sweden</address>
@@ -1567,7 +1567,7 @@
     <paper id="1">
       <title>Generating Music from Literature</title>
       <author><first>Hannah</first><last>Davis</last></author>
-      <author><first>Saif</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
       <pages>1–10</pages>
       <url hash="aaa2c920">W14-0901</url>
       <doi>10.3115/v1/W14-0901</doi>
@@ -1576,7 +1576,7 @@
     <paper id="2">
       <title>Computational analysis to explore authors’ depiction of characters</title>
       <author><first>Joseph</first><last>Bullard</last></author>
-      <author><first>Cecilia</first><last>Ovesdotter Alm</last></author>
+      <author id="cecilia-ovesdotter-alm"><first>Cecilia</first><last>Ovesdotter Alm</last></author>
       <pages>11–16</pages>
       <url hash="b1c6996e">W14-0902</url>
       <doi>10.3115/v1/W14-0902</doi>
@@ -1602,7 +1602,7 @@
     </paper>
     <paper id="5">
       <title>Structure-based Clustering of Novels</title>
-      <author><first>Mariona</first><last>Coll Ardanuy</last></author>
+      <author id="mariona-coll-ardanuy"><first>Mariona</first><last>Coll Ardanuy</last></author>
       <author><first>Caroline</first><last>Sporleder</last></author>
       <pages>31–39</pages>
       <url hash="6f194834">W14-0905</url>
@@ -1642,8 +1642,8 @@
     <meta>
       <booktitle>Proceedings of the 3rd Workshop on Hybrid Approaches to Machine Translation (<fixed-case>H</fixed-case>y<fixed-case>T</fixed-case>ra)</booktitle>
       <url hash="1b70f99e">W14-10</url>
-      <editor><first>Rafael E.</first><last>Banchs</last></editor>
-      <editor><first>Marta R.</first><last>Costa-jussà</last></editor>
+      <editor id="rafael-e-banchs"><first>Rafael E.</first><last>Banchs</last></editor>
+      <editor id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></editor>
       <editor><first>Reinhard</first><last>Rapp</last></editor>
       <editor><first>Patrik</first><last>Lambert</last></editor>
       <editor><first>Kurt</first><last>Eberle</last></editor>
@@ -1703,7 +1703,7 @@
     </paper>
     <paper id="6">
       <title>Building a <fixed-case>S</fixed-case>panish-<fixed-case>G</fixed-case>erman Dictionary for Hybrid <fixed-case>MT</fixed-case></title>
-      <author><first>Anne</first><last>Göhring</last></author>
+      <author id="anne-gohring"><first>Anne</first><last>Göhring</last></author>
       <pages>30–35</pages>
       <url hash="0a3db887">W14-1006</url>
       <doi>10.3115/v1/W14-1006</doi>
@@ -1711,9 +1711,9 @@
     </paper>
     <paper id="7">
       <title>An Empirical Study of the Impact of Idioms on Phrase Based Statistical Machine Translation of <fixed-case>E</fixed-case>nglish to <fixed-case>B</fixed-case>razilian-<fixed-case>P</fixed-case>ortuguese</title>
-      <author><first>Giancarlo</first><last>Salton</last></author>
+      <author id="giancarlo-salton"><first>Giancarlo</first><last>Salton</last></author>
       <author><first>Robert</first><last>Ross</last></author>
-      <author><first>John</first><last>Kelleher</last></author>
+      <author id="john-kelleher"><first>John</first><last>Kelleher</last></author>
       <pages>36–41</pages>
       <url hash="8b2db158">W14-1007</url>
       <doi>10.3115/v1/W14-1007</doi>
@@ -1722,7 +1722,7 @@
     <paper id="8">
       <title>Resumptive Pronoun Detection for <fixed-case>M</fixed-case>odern <fixed-case>S</fixed-case>tandard <fixed-case>A</fixed-case>rabic to <fixed-case>E</fixed-case>nglish <fixed-case>MT</fixed-case></title>
       <author><first>Stephen</first><last>Tratz</last></author>
-      <author><first>Clare</first><last>Voss</last></author>
+      <author id="clare-voss"><first>Clare</first><last>Voss</last></author>
       <author><first>Jamal</first><last>Laoudi</last></author>
       <pages>42–47</pages>
       <url hash="3d8b982c">W14-1008</url>
@@ -1733,7 +1733,7 @@
       <title>Automatic Building and Using Parallel Resources for <fixed-case>SMT</fixed-case> from Comparable Corpora</title>
       <author><first>Santanu</first><last>Pal</last></author>
       <author><first>Partha</first><last>Pakray</last></author>
-      <author><first>Sudip Kumar</first><last>Naskar</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last></author>
       <pages>48–57</pages>
       <url hash="2fc41081">W14-1009</url>
       <doi>10.3115/v1/W14-1009</doi>
@@ -1776,7 +1776,7 @@
       <title>Deriving de/het gender classification for <fixed-case>D</fixed-case>utch nouns for rule-based <fixed-case>MT</fixed-case> generation tasks</title>
       <author><first>Bogdan</first><last>Babych</last></author>
       <author><first>Jonathan</first><last>Geiger</last></author>
-      <author><first>Mireia</first><last>Ginestí Rosell</last></author>
+      <author id="mireia-ginesti-rosell"><first>Mireia</first><last>Ginestí Rosell</last></author>
       <author><first>Kurt</first><last>Eberle</last></author>
       <pages>75–81</pages>
       <url hash="ae7d1676">W14-1014</url>
@@ -1803,7 +1803,7 @@
     </paper>
     <paper id="17">
       <title>How to overtake <fixed-case>G</fixed-case>oogle in <fixed-case>MT</fixed-case> quality - the <fixed-case>B</fixed-case>altic case</title>
-      <author><first>Andrejs</first><last>Vasiljevs</last></author>
+      <author id="andrejs-vasiljevs"><first>Andrejs</first><last>Vasiljevs</last></author>
       <pages>96</pages>
       <url hash="dc47c711">W14-1017</url>
       <doi>10.3115/v1/W14-1017</doi>
@@ -1862,7 +1862,7 @@
     <paper id="3">
       <title>The impact of near domain transfer on biomedical named entity recognition</title>
       <author><first>Nigel</first><last>Collier</last></author>
-      <author><first>Mai-vu</first><last>Tran</last></author>
+      <author id="mai-vu-tran"><first>Mai-vu</first><last>Tran</last></author>
       <author><first>Ferdinand</first><last>Paster</last></author>
       <pages>11–20</pages>
       <url hash="e1861984">W14-1103</url>
@@ -1880,7 +1880,7 @@
     </paper>
     <paper id="5">
       <title>Towards Cross-Domain <fixed-case>PDTB</fixed-case>-Style Discourse Parsing</title>
-      <author><first>Evgeny</first><last>Stepanov</last></author>
+      <author id="evgeny-stepanov"><first>Evgeny</first><last>Stepanov</last></author>
       <author><first>Giuseppe</first><last>Riccardi</last></author>
       <pages>30–37</pages>
       <url hash="ab9fc40f">W14-1105</url>
@@ -1890,7 +1890,7 @@
     <paper id="6">
       <title>Translating <fixed-case>SNOMED</fixed-case> <fixed-case>CT</fixed-case> Terminology into a Minor Language</title>
       <author><first>Olatz</first><last>Perez-de-Viñaspre</last></author>
-      <author><first>Maite</first><last>Oronoz</last></author>
+      <author id="maite-oronoz"><first>Maite</first><last>Oronoz</last></author>
       <pages>38–45</pages>
       <url hash="07386b0c">W14-1106</url>
       <doi>10.3115/v1/W14-1106</doi>
@@ -1956,10 +1956,10 @@
     <paper id="13">
       <title>Adverse Drug Event prediction combining shallow analysis and machine learning</title>
       <author><first>Sara</first><last>Santiso</last></author>
-      <author><first>Arantza</first><last>Casillas</last></author>
+      <author id="arantza-casillas"><first>Arantza</first><last>Casillas</last></author>
       <author><first>Alicia</first><last>Pérez</last></author>
-      <author><first>Maite</first><last>Oronoz</last></author>
-      <author><first>Koldo</first><last>Gojenola</last></author>
+      <author id="maite-oronoz"><first>Maite</first><last>Oronoz</last></author>
+      <author id="koldo-gojenola"><first>Koldo</first><last>Gojenola</last></author>
       <pages>85–89</pages>
       <url hash="a90acb12">W14-1113</url>
       <doi>10.3115/v1/W14-1113</doi>
@@ -1968,7 +1968,7 @@
     <paper id="14">
       <title>Reducing <fixed-case>VSM</fixed-case> data sparseness by generalizing contexts: application to health text mining</title>
       <author><first>Amandine</first><last>Périnet</last></author>
-      <author><first>Thierry</first><last>Hamon</last></author>
+      <author id="thierry-hamon"><first>Thierry</first><last>Hamon</last></author>
       <pages>90–95</pages>
       <url hash="510ba9b0">W14-1114</url>
       <doi>10.3115/v1/W14-1114</doi>
@@ -1977,7 +1977,7 @@
     <paper id="15">
       <title>Disambiguation of Period Characters in Clinical Narratives</title>
       <author><first>Markus</first><last>Kreuzthaler</last></author>
-      <author><first>Stefan</first><last>Schulz</last></author>
+      <author id="stefan-schulz"><first>Stefan</first><last>Schulz</last></author>
       <pages>96–100</pages>
       <url hash="df09cf32">W14-1115</url>
       <doi>10.3115/v1/W14-1115</doi>
@@ -1985,7 +1985,7 @@
     </paper>
     <paper id="16">
       <title>Tuning <fixed-case>H</fixed-case>eidel<fixed-case>T</fixed-case>ime for identifying time expressions in clinical texts in <fixed-case>E</fixed-case>nglish and <fixed-case>F</fixed-case>rench</title>
-      <author><first>Thierry</first><last>Hamon</last></author>
+      <author id="thierry-hamon"><first>Thierry</first><last>Hamon</last></author>
       <author><first>Natalia</first><last>Grabar</last></author>
       <pages>101–105</pages>
       <url hash="a341dbf5">W14-1116</url>
@@ -1994,7 +1994,7 @@
     </paper>
     <paper id="17">
       <title>Detecting drugs and adverse events from <fixed-case>S</fixed-case>panish social media streams</title>
-      <author><first>Isabel</first><last>Segura-Bedmar</last></author>
+      <author id="isabel-segura-bedmar"><first>Isabel</first><last>Segura-Bedmar</last></author>
       <author><first>Ricardo</first><last>Revert</last></author>
       <author><first>Paloma</first><last>Martínez</last></author>
       <pages>106–115</pages>
@@ -2036,8 +2036,8 @@
     </frontmatter>
     <paper id="1">
       <title>One Step Closer to Automatic Evaluation of Text Simplification Systems</title>
-      <author><first>Sanja</first><last>Štajner</last></author>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="sanja-stajner"><first>Sanja</first><last>Štajner</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <author><first>Horacio</first><last>Saggion</last></author>
       <pages>1–10</pages>
       <url hash="dd129ce8">W14-1201</url>
@@ -2047,7 +2047,7 @@
     <paper id="2">
       <title>Automatic diagnosis of understanding of medical words</title>
       <author><first>Natalia</first><last>Grabar</last></author>
-      <author><first>Thierry</first><last>Hamon</last></author>
+      <author id="thierry-hamon"><first>Thierry</first><last>Hamon</last></author>
       <author><first>Dany</first><last>Amiot</last></author>
       <pages>11–20</pages>
       <url hash="eb287e0b">W14-1202</url>
@@ -2057,7 +2057,7 @@
     <paper id="3">
       <title>Exploring Measures of “Readability” for Spoken Language: Analyzing linguistic features of subtitles to identify age-specific <fixed-case>TV</fixed-case> programs</title>
       <author><first>Sowmya</first><last>Vajjala</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <pages>21–29</pages>
       <url hash="8c88cab5">W14-1203</url>
       <doi>10.3115/v1/W14-1203</doi>
@@ -2075,7 +2075,7 @@
     </paper>
     <paper id="5">
       <title>An eye-tracking evaluation of some parser complexity metrics</title>
-      <author><first>Matthew J.</first><last>Green</last></author>
+      <author id="matthew-j-green"><first>Matthew J.</first><last>Green</last></author>
       <pages>38–46</pages>
       <url hash="8800a673">W14-1205</url>
       <doi>10.3115/v1/W14-1205</doi>
@@ -2115,10 +2115,10 @@
     </paper>
     <paper id="9">
       <title>Improving Readability of <fixed-case>S</fixed-case>wedish Electronic Health Records through Lexical Simplification: First Results</title>
-      <author><first>Gintarė</first><last>Grigonyte</last></author>
+      <author id="gintare-grigonyte"><first>Gintarė</first><last>Grigonyte</last></author>
       <author><first>Maria</first><last>Kvist</last></author>
       <author><first>Sumithra</first><last>Velupillai</last></author>
-      <author><first>Mats</first><last>Wirén</last></author>
+      <author id="mats-wiren"><first>Mats</first><last>Wirén</last></author>
       <pages>74–83</pages>
       <url hash="79d796f1">W14-1209</url>
       <doi>10.3115/v1/W14-1209</doi>
@@ -2136,7 +2136,7 @@
     <paper id="11">
       <title><fixed-case>EACL</fixed-case> - Expansion of Abbreviations in <fixed-case>CL</fixed-case>inical text</title>
       <author><first>Lisa</first><last>Tengstrand</last></author>
-      <author><first>Beáta</first><last>Megyesi</last></author>
+      <author id="beata-megyesi"><first>Beáta</first><last>Megyesi</last></author>
       <author><first>Aron</first><last>Henriksson</last></author>
       <author><first>Martin</first><last>Duneld</last></author>
       <author><first>Maria</first><last>Kvist</last></author>
@@ -2147,8 +2147,8 @@
     </paper>
     <paper id="12">
       <title>A Quantitative Insight into the Impact of Translation on Readability</title>
-      <author><first>Alina Maria</first><last>Ciobanu</last></author>
-      <author><first>Liviu</first><last>Dinu</last></author>
+      <author id="alina-maria-ciobanu"><first>Alina Maria</first><last>Ciobanu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu</first><last>Dinu</last></author>
       <pages>104–113</pages>
       <url hash="9c8137b7">W14-1212</url>
       <doi>10.3115/v1/W14-1212</doi>
@@ -2157,7 +2157,7 @@
     <paper id="13">
       <title>Classifying easy-to-read texts without parsing</title>
       <author><first>Johan</first><last>Falkenjack</last></author>
-      <author><first>Arne</first><last>Jönsson</last></author>
+      <author id="arne-jonsson"><first>Arne</first><last>Jönsson</last></author>
       <pages>114–122</pages>
       <url hash="dbd1dbfb">W14-1213</url>
       <doi>10.3115/v1/W14-1213</doi>
@@ -2165,7 +2165,7 @@
     </paper>
     <paper id="14">
       <title>An Analysis of Crowdsourced Text Simplifications</title>
-      <author><first>Marcelo</first><last>Amancio</last></author>
+      <author id="marcelo-adriano-amancio"><first>Marcelo</first><last>Amancio</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>123–130</pages>
       <url hash="ec7f0c14">W14-1214</url>
@@ -2174,8 +2174,8 @@
     </paper>
     <paper id="15">
       <title>An evaluation of syntactic simplification rules for people with autism</title>
-      <author><first>Richard</first><last>Evans</last></author>
-      <author><first>Constantin</first><last>Orăsan</last></author>
+      <author id="richard-evans"><first>Richard</first><last>Evans</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orăsan</last></author>
       <author><first>Iustin</first><last>Dornescu</last></author>
       <pages>131–140</pages>
       <url hash="e9e8bd6f">W14-1215</url>
@@ -2188,9 +2188,9 @@
       <booktitle>Proceedings of the 5th Workshop on Language Analysis for Social Media (<fixed-case>LASM</fixed-case>)</booktitle>
       <url hash="f7908d03">W14-13</url>
       <editor><first>Atefeh</first><last>Farzindar</last></editor>
-      <editor><first>Diana</first><last>Inkpen</last></editor>
+      <editor id="diana-inkpen"><first>Diana</first><last>Inkpen</last></editor>
       <editor><first>Michael</first><last>Gamon</last></editor>
-      <editor><first>Meena</first><last>Nagarajan</last></editor>
+      <editor id="meenakshi-nagarajan"><first>Meena</first><last>Nagarajan</last></editor>
       <doi>10.3115/v1/W14-13</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Gothenburg, Sweden</address>
@@ -2205,7 +2205,7 @@
     <paper id="1">
       <title>Mining Lexical Variants from Microblogs: An Unsupervised Multilingual Approach</title>
       <author><first>Alejandro</first><last>Mosquera</last></author>
-      <author><first>Paloma</first><last>Moreda Pozo</last></author>
+      <author id="paloma-moreda-pozo"><first>Paloma</first><last>Moreda Pozo</last></author>
       <pages>1–7</pages>
       <url hash="85d06fbd">W14-1301</url>
       <doi>10.3115/v1/W14-1301</doi>
@@ -2213,9 +2213,9 @@
     </paper>
     <paper id="2">
       <title>Estimating Time to Event from Tweets Using Temporal Expressions</title>
-      <author><first>Ali</first><last>Hürriyetoǧlu</last></author>
+      <author id="ali-hurriyetoglu"><first>Ali</first><last>Hürriyetoǧlu</last></author>
       <author><first>Nelleke</first><last>Oostdijk</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <pages>8–16</pages>
       <url hash="dc705ec8">W14-1302</url>
       <doi>10.3115/v1/W14-1302</doi>
@@ -2224,7 +2224,7 @@
     <paper id="3">
       <title>Accurate Language Identification of <fixed-case>T</fixed-case>witter Messages</title>
       <author><first>Marco</first><last>Lui</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>17–25</pages>
       <url hash="a6179faf">W14-1303</url>
       <doi>10.3115/v1/W14-1303</doi>
@@ -2234,7 +2234,7 @@
       <title>The (Un)Predictability of Emotional Hashtags in <fixed-case>T</fixed-case>witter</title>
       <author><first>Florian</first><last>Kunneman</last></author>
       <author><first>Christine</first><last>Liebrecht</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <pages>26–34</pages>
       <url hash="bbb1222e">W14-1304</url>
       <doi>10.3115/v1/W14-1304</doi>
@@ -2243,7 +2243,7 @@
     <paper id="5">
       <title>Finding Arguing Expressions of Divergent Viewpoints in Online Debates</title>
       <author><first>Amine</first><last>Trabelsi</last></author>
-      <author><first>Osmar R.</first><last>Zaïane</last></author>
+      <author id="osmar-r-zaiane"><first>Osmar R.</first><last>Zaïane</last></author>
       <pages>35–43</pages>
       <url hash="8c264680">W14-1305</url>
       <doi>10.3115/v1/W14-1305</doi>
@@ -2261,7 +2261,7 @@
     <paper id="7">
       <title>Vowel and Diacritic Restoration for Social Media Texts</title>
       <author><first>Kübra</first><last>Adali</last></author>
-      <author><first>Gülşen</first><last>Eryiǧit</last></author>
+      <author id="gulsen-eryigit"><first>Gülşen</first><last>Eryiǧit</last></author>
       <pages>53–61</pages>
       <url hash="b225a513">W14-1307</url>
       <doi>10.3115/v1/W14-1307</doi>
@@ -2269,8 +2269,8 @@
     </paper>
     <paper id="8">
       <title>A Cascaded Approach for Social Media Text Normalization of <fixed-case>T</fixed-case>urkish</title>
-      <author><first>Dilara</first><last>Torunoǧlu</last></author>
-      <author><first>Gülşen</first><last>Eryiǧit</last></author>
+      <author id="dilara-torunoglu-selamet"><first>Dilara</first><last>Torunoǧlu</last></author>
+      <author id="gulsen-eryigit"><first>Gülşen</first><last>Eryiǧit</last></author>
       <pages>62–70</pages>
       <url hash="84b56397">W14-1308</url>
       <doi>10.3115/v1/W14-1308</doi>
@@ -2352,7 +2352,7 @@
       <title>A Type-Driven Tensor-Based Semantics for <fixed-case>CCG</fixed-case></title>
       <author><first>Jean</first><last>Maillard</last></author>
       <author><first>Stephen</first><last>Clark</last></author>
-      <author><first>Edward</first><last>Grefenstette</last></author>
+      <author id="edward-grefenstette"><first>Edward</first><last>Grefenstette</last></author>
       <pages>46–54</pages>
       <url hash="21d5f3bd">W14-1406</url>
       <doi>10.3115/v1/W14-1406</doi>
@@ -2411,11 +2411,11 @@
       <booktitle>Proceedings of the 2nd Workshop on Continuous Vector Space Models and their Compositionality (<fixed-case>CVSC</fixed-case>)</booktitle>
       <url hash="14046274">W14-15</url>
       <editor><first>Alexandre</first><last>Allauzen</last></editor>
-      <editor><first>Raffaella</first><last>Bernardi</last></editor>
-      <editor><first>Edward</first><last>Grefenstette</last></editor>
+      <editor id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last></editor>
+      <editor id="edward-grefenstette"><first>Edward</first><last>Grefenstette</last></editor>
       <editor><first>Hugo</first><last>Larochelle</last></editor>
-      <editor><first>Christopher</first><last>Manning</last></editor>
-      <editor><first>Scott Wen-tau</first><last>Yih</last></editor>
+      <editor id="christopher-d-manning"><first>Christopher</first><last>Manning</last></editor>
+      <editor id="wen-tau-yih"><first>Scott Wen-tau</first><last>Yih</last></editor>
       <doi>10.3115/v1/W14-15</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Gothenburg, Sweden</address>
@@ -2439,7 +2439,7 @@
     <paper id="2">
       <title>Distributional Composition using Higher-Order Dependency Vectors</title>
       <author><first>Julie</first><last>Weeds</last></author>
-      <author><first>David</first><last>Weir</last></author>
+      <author id="david-weir"><first>David</first><last>Weir</last></author>
       <author><first>Jeremy</first><last>Reffin</last></author>
       <pages>11–20</pages>
       <url hash="ef4796e2">W14-1502</url>
@@ -2481,7 +2481,7 @@
       <booktitle>Proceedings of the Eighteenth Conference on Computational Natural Language Learning</booktitle>
       <url hash="141922a6">W14-16</url>
       <editor><first>Roser</first><last>Morante</last></editor>
-      <editor><first>Scott Wen-tau</first><last>Yih</last></editor>
+      <editor id="wen-tau-yih"><first>Scott Wen-tau</first><last>Yih</last></editor>
       <doi>10.3115/v1/W14-16</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Ann Arbor, Michigan</address>
@@ -2495,11 +2495,11 @@
     </frontmatter>
     <paper id="1">
       <title>What’s in a p-value in <fixed-case>NLP</fixed-case>?</title>
-      <author><first>Anders</first><last>Søgaard</last></author>
-      <author><first>Anders</first><last>Johannsen</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-johannsen"><first>Anders</first><last>Johannsen</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <author><first>Dirk</first><last>Hovy</last></author>
-      <author><first>Hector</first><last>Martínez Alonso</last></author>
+      <author id="hector-martinez-alonso"><first>Hector</first><last>Martínez Alonso</last></author>
       <pages>1–10</pages>
       <url hash="3e10b79c">W14-1601</url>
       <doi>10.3115/v1/W14-1601</doi>
@@ -2529,7 +2529,7 @@
       <author><first>Mohamed</first><last>Al-Badrashiny</last></author>
       <author><first>Ramy</first><last>Eskander</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>30–38</pages>
       <url hash="1ab2aa66">W14-1604</url>
       <doi>10.3115/v1/W14-1604</doi>
@@ -2566,7 +2566,7 @@
     <paper id="8">
       <title>Looking for Hyponyms in Vector Space</title>
       <author><first>Marek</first><last>Rei</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <pages>68–77</pages>
       <url hash="65e70dd1">W14-1608</url>
       <doi>10.3115/v1/W14-1608</doi>
@@ -2595,7 +2595,7 @@
     <paper id="11">
       <title>Improved Pattern Learning for Bootstrapped Entity Extraction</title>
       <author><first>Sonal</first><last>Gupta</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <pages>98–108</pages>
       <url hash="2592aa4d">W14-1611</url>
       <doi>10.3115/v1/W14-1611</doi>
@@ -2603,8 +2603,8 @@
     </paper>
     <paper id="12">
       <title>Towards Temporal Scoping of Relational Facts based on <fixed-case>W</fixed-case>ikipedia Data</title>
-      <author><first>Avirup</first><last>Sil</last></author>
-      <author><first>Silviu-Petru</first><last>Cucerzan</last></author>
+      <author id="avirup-sil"><first>Avirup</first><last>Sil</last></author>
+      <author id="silviu-cucerzan"><first>Silviu-Petru</first><last>Cucerzan</last></author>
       <pages>109–118</pages>
       <url hash="02c76126">W14-1612</url>
       <doi>10.3115/v1/W14-1612</doi>
@@ -2621,8 +2621,8 @@
     </paper>
     <paper id="14">
       <title>Treebank Translation for Cross-Lingual Parser Induction</title>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
-      <author><first>Željko</first><last>Agić</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="zeljko-agic"><first>Željko</first><last>Agić</last></author>
       <author><first>Joakim</first><last>Nivre</last></author>
       <pages>130–140</pages>
       <url hash="0b012e48">W14-1614</url>
@@ -2632,9 +2632,9 @@
     <paper id="15">
       <title>Weakly-Supervised <fixed-case>B</fixed-case>ayesian Learning of a <fixed-case>CCG</fixed-case> Supertagger</title>
       <author><first>Dan</first><last>Garrette</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <author><first>Jason</first><last>Baldridge</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>141–150</pages>
       <url hash="bb994adb">W14-1615</url>
       <doi>10.3115/v1/W14-1615</doi>
@@ -2643,7 +2643,7 @@
     <paper id="16">
       <title>Factored <fixed-case>M</fixed-case>arkov Translation with Robust Modeling</title>
       <author><first>Yang</first><last>Feng</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <author><first>Xinkai</first><last>Du</last></author>
       <pages>151–159</pages>
       <url hash="d49d25ac">W14-1616</url>
@@ -2696,7 +2696,7 @@
       <url hash="32f593f2">W14-17</url>
       <editor><first>Hwee Tou</first><last>Ng</last></editor>
       <editor><first>Siew Mei</first><last>Wu</last></editor>
-      <editor><first>Ted</first><last>Briscoe</last></editor>
+      <editor id="ted-briscoe"><first>Ted</first><last>Briscoe</last></editor>
       <editor><first>Christian</first><last>Hadiwinoto</last></editor>
       <editor><first>Raymond Hendy</first><last>Susanto</last></editor>
       <editor><first>Christopher</first><last>Bryant</last></editor>
@@ -2728,7 +2728,7 @@
       <title>Grammatical error correction using hybrid systems and type filtering</title>
       <author><first>Mariano</first><last>Felice</last></author>
       <author id="zheng-yuan-cambridge"><first>Zheng</first><last>Yuan</last></author>
-      <author><first>Øistein E.</first><last>Andersen</last></author>
+      <author id="oistein-e-andersen"><first>Øistein E.</first><last>Andersen</last></author>
       <author><first>Helen</first><last>Yannakoudakis</last></author>
       <author><first>Ekaterina</first><last>Kochmar</last></author>
       <pages>15–24</pages>
@@ -2759,10 +2759,10 @@
     </paper>
     <paper id="5">
       <title><fixed-case>RACAI</fixed-case> <fixed-case>GEC</fixed-case> – A hybrid approach to Grammatical Error Correction</title>
-      <author><first>Tiberiu</first><last>Boroș</last></author>
-      <author><first>Stefan Daniel</first><last>Dumitrescu</last></author>
+      <author id="tiberiu-boros"><first>Tiberiu</first><last>Boroș</last></author>
+      <author id="stefan-daniel-dumitrescu"><first>Stefan Daniel</first><last>Dumitrescu</last></author>
       <author><first>Adrian</first><last>Zafiu</last></author>
-      <author><first>Verginica</first><last>Barbu Mititelu</last></author>
+      <author id="verginica-barbu-mititelu"><first>Verginica</first><last>Barbu Mititelu</last></author>
       <author><first>Ionut Paul</first><last>Văduva</last></author>
       <pages>43–48</pages>
       <url hash="c1ee785b">W14-1705</url>
@@ -2790,7 +2790,7 @@
       <title>Tuning a Grammar Correction System for Increased Precision</title>
       <author><first>Anoop</first><last>Kunchukuttan</last></author>
       <author><first>Sriram</first><last>Chaudhury</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>60–64</pages>
       <url hash="5be786b5">W14-1708</url>
       <doi>10.3115/v1/W14-1708</doi>
@@ -2799,7 +2799,7 @@
     <paper id="9">
       <title><fixed-case>POSTECH</fixed-case> Grammatical Error Correction System in the <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>-2014 Shared Task</title>
       <author><first>Kyusong</first><last>Lee</last></author>
-      <author><first>Gary Geunbae</first><last>Lee</last></author>
+      <author id="gary-geunbae-lee"><first>Gary Geunbae</first><last>Lee</last></author>
       <pages>65–73</pages>
       <url hash="4fd69efa">W14-1709</url>
       <doi>10.3115/v1/W14-1709</doi>
@@ -2830,14 +2830,14 @@
     </paper>
     <paper id="12">
       <title><fixed-case>NTHU</fixed-case> at the <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>-2014 Shared Task</title>
-      <author><first>Jian-Cheng</first><last>Wu</last></author>
+      <author id="jian-cheng-wu"><first>Jian-Cheng</first><last>Wu</last></author>
       <author><first>Tzu-Hsi</first><last>Yen</last></author>
-      <author><first>Jim</first><last>Chang</last></author>
+      <author id="jim-chang"><first>Jim</first><last>Chang</last></author>
       <author><first>Guan-Cheng</first><last>Huang</last></author>
       <author><first>Jimmy</first><last>Chang</last></author>
       <author><first>Hsiang-Ling</first><last>Hsu</last></author>
-      <author><first>Yu-Wei</first><last>Chang</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="yu-wei-chang"><first>Yu-Wei</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>91–95</pages>
       <url hash="4759458b">W14-1712</url>
       <doi>10.3115/v1/W14-1712</doi>
@@ -2857,7 +2857,7 @@
     <meta>
       <booktitle>Proceedings of the Ninth Workshop on Innovative Use of <fixed-case>NLP</fixed-case> for Building Educational Applications</booktitle>
       <url hash="12f7fd89">W14-18</url>
-      <editor><first>Joel</first><last>Tetreault</last></editor>
+      <editor id="joel-tetreault"><first>Joel</first><last>Tetreault</last></editor>
       <editor><first>Jill</first><last>Burstein</last></editor>
       <editor><first>Claudia</first><last>Leacock</last></editor>
       <doi>10.3115/v1/W14-18</doi>
@@ -2874,7 +2874,7 @@
     <paper id="1">
       <title>Automated Measures of Specific Vocabulary Knowledge from Constructed Responses (‘Use These Words to Write a Sentence Based on this Picture’)</title>
       <author><first>Swapna</first><last>Somasundaran</last></author>
-      <author><first>Martin</first><last>Chodorow</last></author>
+      <author id="martin-chodorow"><first>Martin</first><last>Chodorow</last></author>
       <pages>1–11</pages>
       <url hash="586cfd63">W14-1801</url>
       <doi>10.3115/v1/W14-1801</doi>
@@ -2905,7 +2905,7 @@
       <author><first>Arti</first><last>Ramesh</last></author>
       <author><first>Dan</first><last>Goldwasser</last></author>
       <author><first>Bert</first><last>Huang</last></author>
-      <author><first>Hal</first><last>Daumé</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé</last></author>
       <author><first>Lise</first><last>Getoor</last></author>
       <pages>28–33</pages>
       <url hash="41cc4491">W14-1804</url>
@@ -2924,7 +2924,7 @@
     <paper id="6">
       <title>The pragmatics of margin comments: An empirical study</title>
       <author><first>Debora</first><last>Field</last></author>
-      <author><first>Stephen</first><last>Pulman</last></author>
+      <author id="stephen-pulman"><first>Stephen</first><last>Pulman</last></author>
       <author><first>Denise</first><last>Whitelock</last></author>
       <pages>43–53</pages>
       <url hash="0c9f9128">W14-1806</url>
@@ -2945,7 +2945,7 @@
       <author><first>Samuel</first><last>Leeman-Munk</last></author>
       <author><first>Angela</first><last>Shelton</last></author>
       <author><first>Eric</first><last>Wiebe</last></author>
-      <author><first>James</first><last>Lester</last></author>
+      <author id="james-lester"><first>James</first><last>Lester</last></author>
       <pages>61–67</pages>
       <url hash="3d025af1">W14-1808</url>
       <doi>10.3115/v1/W14-1808</doi>
@@ -2982,7 +2982,7 @@
     <paper id="12">
       <title>Improving Peer Feedback Prediction: The Sentence Level is Right</title>
       <author id="huy-nguyen-pgh"><first>Huy</first><last>Nguyen</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <pages>99–108</pages>
       <url hash="6f41a576">W14-1812</url>
       <doi>10.3115/v1/W14-1812</doi>
@@ -3004,7 +3004,7 @@
     </paper>
     <paper id="14">
       <title>Similarity-Based Non-Scorable Response Detection for Automated Speech Scoring</title>
-      <author><first>Su-Youn</first><last>Yoon</last></author>
+      <author id="su-youn-yoon"><first>Su-Youn</first><last>Yoon</last></author>
       <author><first>Shasha</first><last>Xie</last></author>
       <pages>116–123</pages>
       <url hash="c095caaf">W14-1814</url>
@@ -3013,7 +3013,7 @@
     </paper>
     <paper id="15">
       <title>Natural Language Generation with Vocabulary Constraints</title>
-      <author><first>Ben</first><last>Swanson</last></author>
+      <author id="ben-swanson"><first>Ben</first><last>Swanson</last></author>
       <author><first>Elif</first><last>Yamangil</last></author>
       <author><first>Eugene</first><last>Charniak</last></author>
       <pages>124–133</pages>
@@ -3025,12 +3025,12 @@
       <title>Automated scoring of speaking items in an assessment for teachers of <fixed-case>E</fixed-case>nglish as a Foreign Language</title>
       <author><first>Klaus</first><last>Zechner</last></author>
       <author><first>Keelan</first><last>Evanini</last></author>
-      <author><first>Su-Youn</first><last>Yoon</last></author>
+      <author id="su-youn-yoon"><first>Su-Youn</first><last>Yoon</last></author>
       <author><first>Lawrence</first><last>Davis</last></author>
       <author><first>Xinhao</first><last>Wang</last></author>
       <author><first>Lei</first><last>Chen</last></author>
-      <author><first>Chong Min</first><last>Lee</last></author>
-      <author><first>Chee Wee</first><last>Leong</last></author>
+      <author id="chungmin-lee"><first>Chong Min</first><last>Lee</last></author>
+      <author id="chee-wee-leong"><first>Chee Wee</first><last>Leong</last></author>
       <pages>134–142</pages>
       <url hash="65b416b1">W14-1816</url>
       <doi>10.3115/v1/W14-1816</doi>
@@ -3048,7 +3048,7 @@
     <paper id="18">
       <title>Sentence-level Rewriting Detection</title>
       <author><first>Fan</first><last>Zhang</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <pages>149–154</pages>
       <url hash="b57bf9d9">W14-1818</url>
       <doi>10.3115/v1/W14-1818</doi>
@@ -3069,7 +3069,7 @@
       <author><first>Martijn</first><last>Wieling</last></author>
       <author><first>Giulia</first><last>Venturi</last></author>
       <author><first>Andrea</first><last>Cimino</last></author>
-      <author><first>Simonetta</first><last>Montemagni</last></author>
+      <author id="simonetta-montemagni"><first>Simonetta</first><last>Montemagni</last></author>
       <pages>163–173</pages>
       <url hash="76e0e128">W14-1820</url>
       <doi>10.3115/v1/W14-1820</doi>
@@ -3096,7 +3096,7 @@
       <editor><first>Ani</first><last>Nenkova</last></editor>
       <editor><first>Rupal</first><last>Patel</last></editor>
       <editor><first>Frank</first><last>Rudzicz</last></editor>
-      <editor><first>Annalu</first><last>Waller</last></editor>
+      <editor id="annalu-waller"><first>Annalu</first><last>Waller</last></editor>
       <editor><first>Desislava</first><last>Zhekova</last></editor>
       <doi>10.3115/v1/W14-19</doi>
       <publisher>Association for Computational Linguistics</publisher>
@@ -3159,7 +3159,7 @@
       <title>Preliminary Test of a Real-Time, Interactive Silent Speech Interface Based on Electromagnetic Articulograph</title>
       <author><first>Jun</first><last>Wang</last></author>
       <author><first>Ashok</first><last>Samal</last></author>
-      <author><first>Jordan</first><last>Green</last></author>
+      <author id="jordan-r-green"><first>Jordan</first><last>Green</last></author>
       <pages>38–45</pages>
       <url hash="e8098130">W14-1906</url>
       <doi>10.3115/v1/W14-1906</doi>
@@ -3171,7 +3171,7 @@
       <booktitle>Proceedings of the Fifth Workshop on Cognitive Modeling and Computational Linguistics</booktitle>
       <url hash="5d9734b4">W14-20</url>
       <editor><first>Vera</first><last>Demberg</last></editor>
-      <editor><first>Timothy</first><last>O’Donnell</last></editor>
+      <editor id="timothy-odonnell"><first>Timothy</first><last>O’Donnell</last></editor>
       <doi>10.3115/v1/W14-20</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Baltimore, Maryland, USA</address>
@@ -3186,7 +3186,7 @@
     <paper id="1">
       <title>Computationally Rational Saccadic Control: An Explanation of Spillover Effects Based on Sampling from Noisy Perception and Memory</title>
       <author><first>Michael</first><last>Shvartsman</last></author>
-      <author><first>Richard</first><last>Lewis</last></author>
+      <author id="richard-l-lewis"><first>Richard</first><last>Lewis</last></author>
       <author><first>Satinder</first><last>Singh</last></author>
       <pages>1–9</pages>
       <url hash="23817bf7">W14-2001</url>
@@ -3196,7 +3196,7 @@
     <paper id="2">
       <title>Investigating the role of entropy in sentence processing</title>
       <author><first>Tal</first><last>Linzen</last></author>
-      <author><first>Florian</first><last>Jaeger</last></author>
+      <author id="t-florian-jaeger"><first>Florian</first><last>Jaeger</last></author>
       <pages>10–18</pages>
       <url hash="52673f40">W14-2002</url>
       <doi>10.3115/v1/W14-2002</doi>
@@ -3256,7 +3256,7 @@
     <paper id="8">
       <title>Quantifying the role of discourse topicality in speakers’ choices of referring expressions</title>
       <author><first>Naho</first><last>Orita</last></author>
-      <author><first>Naomi</first><last>Feldman</last></author>
+      <author id="naomi-feldman"><first>Naomi</first><last>Feldman</last></author>
       <author><first>Jordan</first><last>Boyd-Graber</last></author>
       <author><first>Eliana</first><last>Vornov</last></author>
       <pages>63–70</pages>
@@ -3269,11 +3269,11 @@
     <meta>
       <booktitle>Proceedings of the First Workshop on Argumentation Mining</booktitle>
       <url hash="989571f4">W14-21</url>
-      <editor><first>Nancy</first><last>Green</last></editor>
-      <editor><first>Kevin</first><last>Ashley</last></editor>
-      <editor><first>Diane</first><last>Litman</last></editor>
-      <editor><first>Chris</first><last>Reed</last></editor>
-      <editor><first>Vern</first><last>Walker</last></editor>
+      <editor id="nancy-green"><first>Nancy</first><last>Green</last></editor>
+      <editor id="kevin-d-ashley"><first>Kevin</first><last>Ashley</last></editor>
+      <editor id="diane-litman"><first>Diane</first><last>Litman</last></editor>
+      <editor id="chris-reed"><first>Chris</first><last>Reed</last></editor>
+      <editor id="vern-walker"><first>Vern</first><last>Walker</last></editor>
       <doi>10.3115/v1/W14-21</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Baltimore, Maryland</address>
@@ -3326,7 +3326,7 @@
     <paper id="5">
       <title>Identifying Appropriate Support for Propositions in Online User Comments</title>
       <author><first>Joonsuk</first><last>Park</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>29–38</pages>
       <url hash="9fe9b13b">W14-2105</url>
       <doi>10.3115/v1/W14-2105</doi>
@@ -3429,8 +3429,8 @@
     </paper>
     <paper id="15">
       <title>Survey in sentiment, polarity and function analysis of citation</title>
-      <author><first>Myriam</first><last>Hernández A</last></author>
-      <author><first>José M.</first><last>Gómez</last></author>
+      <author id="myriam-hernandez"><first>Myriam</first><last>Hernández A</last></author>
+      <author id="jose-m-gomez"><first>José M.</first><last>Gómez</last></author>
       <pages>102–103</pages>
       <url hash="08fe0543">W14-2115</url>
       <doi>10.3115/v1/W14-2115</doi>
@@ -3460,7 +3460,7 @@
     <paper id="18">
       <title>Requirement Mining in Technical Documents</title>
       <author><first>Juyeon</first><last>Kang</last></author>
-      <author><first>Patrick</first><last>Saint-Dizier</last></author>
+      <author id="patrick-saint-dizier"><first>Patrick</first><last>Saint-Dizier</last></author>
       <pages>108–109</pages>
       <url hash="2853c6a8">W14-2118</url>
       <doi>10.3115/v1/W14-2118</doi>
@@ -3472,8 +3472,8 @@
       <booktitle>Proceedings of the 2014 Workshop on the Use of Computational Methods in the Study of Endangered Languages</booktitle>
       <url hash="2a2e43bb">W14-22</url>
       <editor><first>Jeff</first><last>Good</last></editor>
-      <editor><first>Julia</first><last>Hirschberg</last></editor>
-      <editor><first>Owen</first><last>Rambow</last></editor>
+      <editor id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></editor>
+      <editor id="owen-rambow"><first>Owen</first><last>Rambow</last></editor>
       <doi>10.3115/v1/W14-22</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Baltimore, Maryland, USA</address>
@@ -3535,7 +3535,7 @@
       <author><first>Kaidi</first><last>Lõo</last></author>
       <author><first>Antti</first><last>Arppe</last></author>
       <author><first>Jordan</first><last>Lachler</last></author>
-      <author><first>Sjur</first><last>Moshagen</last></author>
+      <author id="sjur-moshagen"><first>Sjur</first><last>Moshagen</last></author>
       <author><first>Trond</first><last>Trosterud</last></author>
       <pages>34–42</pages>
       <url hash="f41eb5ba">W14-2205</url>
@@ -3544,9 +3544,9 @@
     </paper>
     <paper id="6">
       <title>Learning Grammar Specifications from <fixed-case>IGT</fixed-case>: A Case Study of Chintang</title>
-      <author><first>Emily M.</first><last>Bender</last></author>
+      <author id="emily-m-bender"><first>Emily M.</first><last>Bender</last></author>
       <author><first>Joshua</first><last>Crowgey</last></author>
-      <author><first>Michael Wayne</first><last>Goodman</last></author>
+      <author id="michael-wayne-goodman"><first>Michael Wayne</first><last>Goodman</last></author>
       <author><first>Fei</first><last>Xia</last></author>
       <pages>43–53</pages>
       <url hash="b7db8a1d">W14-2206</url>
@@ -3557,7 +3557,7 @@
       <title>Creating Lexical Resources for Endangered Languages</title>
       <author><first>Khang Nhut</first><last>Lam</last></author>
       <author><first>Feras</first><last>Al Tarouti</last></author>
-      <author><first>Jugal</first><last>Kalita</last></author>
+      <author id="jugal-kalita"><first>Jugal</first><last>Kalita</last></author>
       <pages>54–62</pages>
       <url hash="80097ba4">W14-2207</url>
       <doi>10.3115/v1/W14-2207</doi>
@@ -3574,7 +3574,7 @@
     <paper id="9">
       <title><fixed-case>I</fixed-case>nterlingua<fixed-case>P</fixed-case>lus Machine Translation Approach for Local Languages: Ekegusii &amp; <fixed-case>S</fixed-case>wahili</title>
       <author><first>Edward</first><last>Ombui</last></author>
-      <author><first>Peter</first><last>Wagacha</last></author>
+      <author id="peter-waiganjo-wagacha"><first>Peter</first><last>Wagacha</last></author>
       <author><first>Wanjiku</first><last>Ng’ang’a</last></author>
       <pages>68–72</pages>
       <url hash="d6f10344">W14-2209</url>
@@ -3650,7 +3650,7 @@
       <author><first>Hyeju</first><last>Jang</last></author>
       <author><first>Mario</first><last>Piergallini</last></author>
       <author><first>Miaomiao</first><last>Wen</last></author>
-      <author><first>Carolyn</first><last>Rosé</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rosé</last></author>
       <pages>1–10</pages>
       <url hash="940931a1">W14-2301</url>
       <doi>10.3115/v1/W14-2301</doi>
@@ -3670,7 +3670,7 @@
     <paper id="3">
       <title>Metaphor Detection through Term Relevance</title>
       <author><first>Marc</first><last>Schulder</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>18–26</pages>
       <url hash="7eec09d4">W14-2303</url>
       <doi>10.3115/v1/W14-2303</doi>
@@ -3691,7 +3691,7 @@
       <author><first>Suzanne</first><last>Wertheim</last></author>
       <author><first>Vladimir</first><last>Zaytsev</last></author>
       <author><first>Niloofar</first><last>Montazeri</last></author>
-      <author><first>Jerry</first><last>Hobbs</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry</first><last>Hobbs</last></author>
       <pages>33–41</pages>
       <url hash="000f6f95">W14-2305</url>
       <doi>10.3115/v1/W14-2305</doi>
@@ -3699,12 +3699,12 @@
     </paper>
     <paper id="6">
       <title>Computing Affect in Metaphors</title>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
       <author><first>Samira</first><last>Shaikh</last></author>
       <author><first>Kit</first><last>Cho</last></author>
-      <author><first>George Aaron</first><last>Broadwell</last></author>
-      <author><first>Laurie</first><last>Feldman</last></author>
-      <author><first>Sarah</first><last>Taylor</last></author>
+      <author id="george-aaron-broadwell"><first>George Aaron</first><last>Broadwell</last></author>
+      <author id="laurie-feldman"><first>Laurie</first><last>Feldman</last></author>
+      <author id="sarah-taylor"><first>Sarah</first><last>Taylor</last></author>
       <author><first>Boris</first><last>Yamrom</last></author>
       <author><first>Ting</first><last>Liu</last></author>
       <author><first>Ignacio</first><last>Cases</last></author>
@@ -3744,7 +3744,7 @@
     </frontmatter>
     <paper id="1">
       <title>Learning a Lexicon for Broad-coverage Semantic Parsing</title>
-      <author><first>James</first><last>Allen</last></author>
+      <author id="james-allen"><first>James</first><last>Allen</last></author>
       <pages>1–6</pages>
       <url hash="e70b38b8">W14-2401</url>
       <doi>10.3115/v1/W14-2401</doi>
@@ -3752,9 +3752,9 @@
     </paper>
     <paper id="2">
       <title>Semantic Parsing using Distributional Semantics and Probabilistic Logic</title>
-      <author><first>Islam</first><last>Beltagy</last></author>
+      <author id="islam-beltagy"><first>Islam</first><last>Beltagy</last></author>
       <author><first>Katrin</first><last>Erk</last></author>
-      <author><first>Raymond</first><last>Mooney</last></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last></author>
       <pages>7–11</pages>
       <url hash="8bc815d5">W14-2402</url>
       <doi>10.3115/v1/W14-2402</doi>
@@ -3772,9 +3772,9 @@
     </paper>
     <paper id="4">
       <title>Semantic Parsing for Text to 3<fixed-case>D</fixed-case> Scene Generation</title>
-      <author><first>Angel</first><last>Chang</last></author>
+      <author id="angel-chang"><first>Angel</first><last>Chang</last></author>
       <author><first>Manolis</first><last>Savva</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <pages>17–21</pages>
       <url hash="2329a09d">W14-2404</url>
       <doi>10.3115/v1/W14-2404</doi>
@@ -3782,8 +3782,8 @@
     </paper>
     <paper id="5">
       <title>A Deep Architecture for Semantic Parsing</title>
-      <author><first>Edward</first><last>Grefenstette</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
+      <author id="edward-grefenstette"><first>Edward</first><last>Grefenstette</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
       <author><first>Nando</first><last>de Freitas</last></author>
       <author><first>Karl Moritz</first><last>Hermann</last></author>
       <pages>22–27</pages>
@@ -3794,7 +3794,7 @@
     <paper id="6">
       <title>Combining Formal and Distributional Models of Temporal and Intensional Semantics</title>
       <author><first>Mike</first><last>Lewis</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>28–32</pages>
       <url hash="14234cec">W14-2406</url>
       <doi>10.3115/v1/W14-2406</doi>
@@ -3813,8 +3813,8 @@
     </paper>
     <paper id="8">
       <title>Representing Caused Motion in Embodied Construction Grammar</title>
-      <author><first>Ellen K.</first><last>Dodge</last></author>
-      <author><first>Miriam R. L.</first><last>Petruck</last></author>
+      <author id="ellen-k-dodge"><first>Ellen K.</first><last>Dodge</last></author>
+      <author id="miriam-r-l-petruck"><first>Miriam R. L.</first><last>Petruck</last></author>
       <pages>39–44</pages>
       <url hash="d2a4650e">W14-2408</url>
       <doi>10.3115/v1/W14-2408</doi>
@@ -3822,8 +3822,8 @@
     </paper>
     <paper id="9">
       <title>Low-Dimensional Embeddings of Logic</title>
-      <author><first>Tim</first><last>Rocktäschel</last></author>
-      <author><first>Matko</first><last>Bosnjak</last></author>
+      <author id="tim-rocktaschel"><first>Tim</first><last>Rocktäschel</last></author>
+      <author id="matko-bosnjak"><first>Matko</first><last>Bosnjak</last></author>
       <author><first>Sameer</first><last>Singh</last></author>
       <author><first>Sebastian</first><last>Riedel</last></author>
       <pages>45–49</pages>
@@ -3844,7 +3844,7 @@
     </paper>
     <paper id="11">
       <title>From Treebank Parses to Episodic Logic and Commonsense Inference</title>
-      <author><first>Lenhart</first><last>Schubert</last></author>
+      <author id="lenhart-schubert"><first>Lenhart</first><last>Schubert</last></author>
       <pages>55–60</pages>
       <url hash="cf6b396a">W14-2411</url>
       <doi>10.3115/v1/W14-2411</doi>
@@ -3881,7 +3881,7 @@
     </paper>
     <paper id="15">
       <title>Towards <fixed-case>README</fixed-case>-<fixed-case>EVAL</fixed-case> : Interpreting <fixed-case>README</fixed-case> File Instructions</title>
-      <author><first>James</first><last>White</last></author>
+      <author id="james-paul-white"><first>James</first><last>White</last></author>
       <pages>76–81</pages>
       <url hash="373f6322">W14-2415</url>
       <doi>10.3115/v1/W14-2415</doi>
@@ -3904,8 +3904,8 @@
       <url hash="b3829e2f">W14-25</url>
       <editor><first>Cristian</first><last>Danescu-Niculescu-Mizil</last></editor>
       <editor><first>Jacob</first><last>Eisenstein</last></editor>
-      <editor><first>Kathleen</first><last>McKeown</last></editor>
-      <editor><first>Noah A.</first><last>Smith</last></editor>
+      <editor id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></editor>
+      <editor id="noah-a-smith"><first>Noah A.</first><last>Smith</last></editor>
       <doi>10.3115/v1/W14-25</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Baltimore, MD, USA</address>
@@ -3952,9 +3952,9 @@
     <paper id="5">
       <title>Overview of the 2014 <fixed-case>NLP</fixed-case> Unshared Task in <fixed-case>P</fixed-case>oli<fixed-case>I</fixed-case>nformatics</title>
       <author><first>Noah A.</first><last>Smith</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <author><first>Anne</first><last>Washington</last></author>
-      <author><first>John</first><last>Wilkerson</last></author>
+      <author id="john-wilkerson"><first>John</first><last>Wilkerson</last></author>
       <pages>5–7</pages>
       <url hash="aeb9aca2">W14-2505</url>
       <doi>10.3115/v1/W14-2505</doi>
@@ -3971,10 +3971,10 @@
     <paper id="7">
       <title>Extracting Socioeconomic Patterns from the News: Modelling Text and Outlet Importance Jointly</title>
       <author><first>Vasileios</first><last>Lampos</last></author>
-      <author><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
+      <author id="daniel-preotiuc-pietro"><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
       <author><first>Sina</first><last>Samangooei</last></author>
       <author><first>Douwe</first><last>Gelling</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>13–17</pages>
       <url hash="1bd1c5e4">W14-2507</url>
       <doi>10.3115/v1/W14-2507</doi>
@@ -4035,7 +4035,7 @@
     <paper id="13">
       <title>Optimizing Features in Active Machine Learning for Complex Qualitative Content Analysis</title>
       <author><first>Jasy Suet Yan</first><last>Liew</last></author>
-      <author><first>Nancy</first><last>McCracken</last></author>
+      <author id="nancy-mccracken"><first>Nancy</first><last>McCracken</last></author>
       <author><first>Shichun</first><last>Zhou</last></author>
       <author><first>Kevin</first><last>Crowston</last></author>
       <pages>44–48</pages>
@@ -4047,7 +4047,7 @@
       <title>Power of Confidence: How Poll Scores Impact Topic Dynamics in Political Debates</title>
       <author><first>Vinodkumar</first><last>Prabhakaran</last></author>
       <author><first>Ashima</first><last>Arora</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>49</pages>
       <url hash="be9b95a0">W14-2514</url>
       <doi>10.3115/v1/W14-2514</doi>
@@ -4088,9 +4088,9 @@
     </paper>
     <paper id="18">
       <title>Using Simple <fixed-case>NLP</fixed-case> Tools to Trace the Globalization of the Art World</title>
-      <author><first>Mohamed</first><last>AlTantawy</last></author>
+      <author id="mohamed-altantawy"><first>Mohamed</first><last>AlTantawy</last></author>
       <author><first>Alix</first><last>Rule</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <author><first>Zhongyu</first><last>Wang</last></author>
       <author><first>Rupayan</first><last>Basu</last></author>
       <pages>66–70</pages>
@@ -4100,7 +4100,7 @@
     </paper>
     <paper id="19">
       <title>Issue Framing as a Generalizable Phenomenon</title>
-      <author><first>Amber</first><last>Boydstun</last></author>
+      <author id="amber-boydstun"><first>Amber</first><last>Boydstun</last></author>
       <pages>71</pages>
       <url hash="180550f4">W14-2519</url>
       <doi>10.3115/v1/W14-2519</doi>
@@ -4119,10 +4119,10 @@
     <meta>
       <booktitle>Proceedings of the 5th Workshop on Computational Approaches to Subjectivity, Sentiment and Social Media Analysis</booktitle>
       <url hash="e4b23f06">W14-26</url>
-      <editor><first>Alexandra</first><last>Balahur</last></editor>
-      <editor><first>Erik</first><last>van der Goot</last></editor>
+      <editor id="alexandra-balahur"><first>Alexandra</first><last>Balahur</last></editor>
+      <editor id="erik-van-der-goot"><first>Erik</first><last>van der Goot</last></editor>
       <editor><first>Ralf</first><last>Steinberger</last></editor>
-      <editor><first>Andres</first><last>Montoyo</last></editor>
+      <editor id="andres-montoyo"><first>Andres</first><last>Montoyo</last></editor>
       <doi>10.3115/v1/W14-26</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Baltimore, Maryland</address>
@@ -4136,7 +4136,7 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>W</fixed-case>ords: Evaluative, Emotional, Colourful, Musical!</title>
-      <author><first>Saif</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
       <pages>1</pages>
       <url hash="7c04eb50">W14-2601</url>
       <doi>10.3115/v1/W14-2601</doi>
@@ -4145,7 +4145,7 @@
     <paper id="2">
       <title>Robust Cross-Domain Sentiment Analysis for Low-Resource Languages</title>
       <author><first>Jakob</first><last>Elming</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <author><first>Dirk</first><last>Hovy</last></author>
       <pages>2–7</pages>
       <url hash="49f02a2b">W14-2602</url>
@@ -4155,7 +4155,7 @@
     <paper id="3">
       <title>An Investigation for Implicatures in <fixed-case>C</fixed-case>hinese : Implicatures in <fixed-case>C</fixed-case>hinese and in <fixed-case>E</fixed-case>nglish are similar !</title>
       <author><first>Lingjia</first><last>Deng</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <pages>8–17</pages>
       <url hash="cb33f14b">W14-2603</url>
       <doi>10.3115/v1/W14-2603</doi>
@@ -4191,7 +4191,7 @@
     </paper>
     <paper id="7">
       <title>Semantic Role Labeling of Emotions in Tweets</title>
-      <author><first>Saif</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
       <author><first>Xiaodan</first><last>Zhu</last></author>
       <author><first>Joel</first><last>Martin</last></author>
       <pages>32–41</pages>
@@ -4202,7 +4202,7 @@
     <paper id="8">
       <title>An Impact Analysis of Features in a Classification Approach to Irony Detection in Product Reviews</title>
       <author><first>Konstantin</first><last>Buschmeier</last></author>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <author><first>Roman</first><last>Klinger</last></author>
       <pages>42–49</pages>
       <url hash="1225d80d">W14-2608</url>
@@ -4222,9 +4222,9 @@
     <paper id="10">
       <title>Emotive or Non-emotive: That is The Question</title>
       <author><first>Michal</first><last>Ptaszynski</last></author>
-      <author><first>Fumito</first><last>Masui</last></author>
+      <author id="fumito-masui"><first>Fumito</first><last>Masui</last></author>
       <author><first>Rafal</first><last>Rzepka</last></author>
-      <author><first>Kenji</first><last>Araki</last></author>
+      <author id="kenji-araki"><first>Kenji</first><last>Araki</last></author>
       <pages>59–65</pages>
       <url hash="b071a1c0">W14-2610</url>
       <doi>10.3115/v1/W14-2610</doi>
@@ -4233,7 +4233,7 @@
     <paper id="11">
       <title>Challenges in Creating a Multilingual Sentiment Analysis Application for Social Media Mining</title>
       <author><first>Alexandra</first><last>Balahur</last></author>
-      <author><first>Hristo</first><last>Tanev</last></author>
+      <author id="hristo-tanev"><first>Hristo</first><last>Tanev</last></author>
       <author><first>Erik</first><last>van der Goot</last></author>
       <pages>66</pages>
       <url hash="a296aa7f">W14-2611</url>
@@ -4243,7 +4243,7 @@
     <paper id="12">
       <title>Two-Step Model for Sentiment Lexicon Extraction from <fixed-case>T</fixed-case>witter Streams</title>
       <author><first>Ilia</first><last>Chetviorkin</last></author>
-      <author><first>Natalia</first><last>Loukachevitch</last></author>
+      <author id="natalia-loukachevitch"><first>Natalia</first><last>Loukachevitch</last></author>
       <pages>67–72</pages>
       <url hash="382f6c9d">W14-2612</url>
       <doi>10.3115/v1/W14-2612</doi>
@@ -4283,7 +4283,7 @@
       <title>Sentiment classification of online political discussions: a comparison of a word-based and dependency-based method</title>
       <author><first>Hugo Lewi</first><last>Hammer</last></author>
       <author><first>Per Erik</first><last>Solberg</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <pages>90–96</pages>
       <url hash="bac5f83f">W14-2616</url>
       <doi>10.3115/v1/W14-2616</doi>
@@ -4292,7 +4292,7 @@
     <paper id="17">
       <title>Improving Agreement and Disagreement Identification in Online Discussions with A Socially-Tuned Sentiment Lexicon</title>
       <author><first>Lu</first><last>Wang</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>97–106</pages>
       <url hash="1a555fa0">W14-2617</url>
       <doi>10.3115/v1/W14-2617</doi>
@@ -4302,7 +4302,7 @@
       <title>Lexical Acquisition for Opinion Inference: A Sense-Level Lexicon of Benefactive and Malefactive Events</title>
       <author><first>Yoonjung</first><last>Choi</last></author>
       <author><first>Lingjia</first><last>Deng</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <pages>107–112</pages>
       <url hash="4b126797">W14-2618</url>
       <doi>10.3115/v1/W14-2618</doi>
@@ -4311,7 +4311,7 @@
     <paper id="19">
       <title>Dive deeper: Deep Semantics for Sentiment Analysis</title>
       <author><first>Nikhilkumar</first><last>Jadhav</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>113–118</pages>
       <url hash="ae88201c">W14-2619</url>
       <doi>10.3115/v1/W14-2619</doi>
@@ -4342,7 +4342,7 @@
       <title>Effect of Using Regression on Class Confidence Scores in Sentiment Analysis of <fixed-case>T</fixed-case>witter Data</title>
       <author><first>Itir</first><last>Onal</last></author>
       <author><first>Ali Mert</first><last>Ertugrul</last></author>
-      <author><first>Ruken</first><last>Cakici</last></author>
+      <author id="ruket-cakici"><first>Ruken</first><last>Cakici</last></author>
       <pages>136–141</pages>
       <url hash="b29b28fb">W14-2622</url>
       <doi>10.3115/v1/W14-2622</doi>
@@ -4352,7 +4352,7 @@
       <title>A cognitive study of subjectivity extraction in sentiment annotation</title>
       <author><first>Abhijit</first><last>Mishra</last></author>
       <author><first>Aditya</first><last>Joshi</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>142–146</pages>
       <url hash="7d987a34">W14-2623</url>
       <doi>10.3115/v1/W14-2623</doi>
@@ -4370,7 +4370,7 @@
     </paper>
     <paper id="25">
       <title>A Conceptual Framework for Inferring Implicatures</title>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <author><first>Lingjia</first><last>Deng</last></author>
       <pages>154–159</pages>
       <url hash="c88a2f2f">W14-2625</url>
@@ -4382,7 +4382,7 @@
     <meta>
       <booktitle>Proceedings of the Joint Workshop on Social Dynamics and Personal Attributes in Social Media</booktitle>
       <url hash="b23df726">W14-27</url>
-      <editor><first>Alice</first><last>Oh</last></editor>
+      <editor id="alice-oh"><first>Alice</first><last>Oh</last></editor>
       <editor><first>Benjamin</first><last>Van Durme</last></editor>
       <editor><first>David</first><last>Yarowsky</last></editor>
       <editor><first>Oren</first><last>Tsur</last></editor>
@@ -4410,7 +4410,7 @@
     </paper>
     <paper id="2">
       <title>Using County Demographics to Infer Attributes of <fixed-case>T</fixed-case>witter Users</title>
-      <author><first>Ehsan</first><last>Mohammady</last></author>
+      <author id="ehsan-mohammady-ardehaly"><first>Ehsan</first><last>Mohammady</last></author>
       <author><first>Aron</first><last>Culotta</last></author>
       <pages>7–16</pages>
       <url hash="fe92a387">W14-2702</url>
@@ -4420,7 +4420,7 @@
     <paper id="3">
       <title>The Enrollment Effect: A Study of <fixed-case>A</fixed-case>mazon’s Vine Program</title>
       <author><first>Dinesh</first><last>Puranam</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>17–27</pages>
       <url hash="a8780aaf">W14-2703</url>
       <doi>10.3115/v1/W14-2703</doi>
@@ -4432,7 +4432,7 @@
       <author><first>Heather</first><last>Pon-Barry</last></author>
       <author><first>Subbarao</first><last>Kambhampati</last></author>
       <author><first>Eric</first><last>Hekler</last></author>
-      <author><first>David W.</first><last>McDonald</last></author>
+      <author id="david-w-mcdonald"><first>David W.</first><last>McDonald</last></author>
       <pages>28–32</pages>
       <url hash="dac92e54">W14-2704</url>
       <doi>10.3115/v1/W14-2704</doi>
@@ -4451,7 +4451,7 @@
     <paper id="6">
       <title>Self-disclosure topic model for <fixed-case>T</fixed-case>witter conversations</title>
       <author><first>JinYeong</first><last>Bak</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <author><first>Alice</first><last>Oh</last></author>
       <pages>42–49</pages>
       <url hash="ec37e964">W14-2706</url>
@@ -4461,7 +4461,7 @@
     <paper id="7">
       <title>Detecting and Evaluating Local Text Reuse in Social Networks</title>
       <author><first>Shaobin</first><last>Xu</last></author>
-      <author><first>David</first><last>Smith</last></author>
+      <author id="david-a-smith"><first>David</first><last>Smith</last></author>
       <author><first>Abigail</first><last>Mullen</last></author>
       <author><first>Ryan</first><last>Cordell</last></author>
       <pages>50–57</pages>
@@ -4472,7 +4472,7 @@
     <paper id="8">
       <title>Generating Subjective Responses to Opinionated Articles in Social Media: An Agenda-Driven Architecture and a <fixed-case>T</fixed-case>uring-Like Test</title>
       <author><first>Tomer</first><last>Cagan</last></author>
-      <author><first>Stefan L.</first><last>Frank</last></author>
+      <author id="stefan-l-frank"><first>Stefan L.</first><last>Frank</last></author>
       <author><first>Reut</first><last>Tsarfaty</last></author>
       <pages>58–67</pages>
       <url hash="10226ac8">W14-2708</url>
@@ -4491,7 +4491,7 @@
       <title>Power of Confidence: How Poll Scores Impact Topic Dynamics in Political Debates</title>
       <author><first>Vinodkumar</first><last>Prabhakaran</last></author>
       <author><first>Ashima</first><last>Arora</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>77–82</pages>
       <url hash="52a0961b">W14-2710</url>
       <doi>10.3115/v1/W14-2710</doi>
@@ -4529,7 +4529,7 @@
     <paper id="14">
       <title>User Type Classification of Tweets with Implications for Event Recognition</title>
       <author><first>Lalindra</first><last>De Silva</last></author>
-      <author><first>Ellen</first><last>Riloff</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
       <pages>98–108</pages>
       <url hash="0b788011">W14-2714</url>
       <doi>10.3115/v1/W14-2714</doi>
@@ -4539,7 +4539,7 @@
       <title>Collective Stance Classification of Posts in Online Debate Forums</title>
       <author><first>Dhanya</first><last>Sridhar</last></author>
       <author><first>Lise</first><last>Getoor</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <pages>109–117</pages>
       <url hash="e9703c9e">W14-2715</url>
       <doi>10.3115/v1/W14-2715</doi>
@@ -4550,7 +4550,7 @@
     <meta>
       <booktitle>Proceedings of the 2014 Joint Meeting of <fixed-case>SIGMORPHON</fixed-case> and <fixed-case>SIGFSM</fixed-case></booktitle>
       <url hash="1a01a2dd">W14-28</url>
-      <editor><first>Özlem</first><last>Çetinoğlu</last></editor>
+      <editor id="ozlem-cetinoglu"><first>Özlem</first><last>Çetinoğlu</last></editor>
       <editor><first>Jeffrey</first><last>Heinz</last></editor>
       <editor><first>Andreas</first><last>Maletti</last></editor>
       <editor><first>Jason</first><last>Riggle</last></editor>
@@ -4584,8 +4584,8 @@
     </paper>
     <paper id="3">
       <title>Comparing Models of Phonotactics for Word Segmentation</title>
-      <author><first>Natalie</first><last>Schrimpf</last></author>
-      <author><first>Gaja</first><last>Jarosz</last></author>
+      <author id="natalie-m-schrimpf"><first>Natalie</first><last>Schrimpf</last></author>
+      <author id="gaja-jarosz"><first>Gaja</first><last>Jarosz</last></author>
       <pages>19–28</pages>
       <url hash="300e81c1">W14-2803</url>
       <doi>10.3115/v1/W14-2803</doi>
@@ -4602,7 +4602,7 @@
     <paper id="5">
       <title>Automatic Conversion of Dialectal <fixed-case>T</fixed-case>amil Text to Standard Written <fixed-case>T</fixed-case>amil Text using <fixed-case>FST</fixed-case>s</title>
       <author><first>Marimuthu</first><last>K</last></author>
-      <author><first>Sobha</first><last>Lalitha Devi</last></author>
+      <author id="sobha-lalitha-devi"><first>Sobha</first><last>Lalitha Devi</last></author>
       <pages>37–45</pages>
       <url hash="867d4a97">W14-2805</url>
       <doi>10.3115/v1/W14-2805</doi>
@@ -4611,7 +4611,7 @@
     <paper id="6">
       <title>Rule Based Morphological Analyzer of <fixed-case>K</fixed-case>azakh Language</title>
       <author><first>Gulshat</first><last>Kessikbayeva</last></author>
-      <author><first>Ilyas</first><last>Cicekli</last></author>
+      <author id="ilyas-cicekli"><first>Ilyas</first><last>Cicekli</last></author>
       <pages>46–54</pages>
       <url hash="d84c4bac">W14-2806</url>
       <doi>10.3115/v1/W14-2806</doi>
@@ -4621,8 +4621,8 @@
       <title>Rules, Analogy, and Social Factors Codetermine Past-tense Formation Patterns in <fixed-case>E</fixed-case>nglish</title>
       <author><first>Péter</first><last>Rácz</last></author>
       <author><first>Clayton</first><last>Beckner</last></author>
-      <author><first>Jennifer B.</first><last>Hay</last></author>
-      <author><first>Janet B.</first><last>Pierrehumbert</last></author>
+      <author id="jennifer-hay"><first>Jennifer B.</first><last>Hay</last></author>
+      <author id="janet-pierrehumbert"><first>Janet B.</first><last>Pierrehumbert</last></author>
       <pages>55–63</pages>
       <url hash="515f6739">W14-2807</url>
       <doi>10.3115/v1/W14-2807</doi>
@@ -4644,8 +4644,8 @@
       <booktitle>Proceedings of the Second Workshop on <fixed-case>EVENTS</fixed-case>: Definition, Detection, Coreference, and Representation</booktitle>
       <url hash="11b347ad">W14-29</url>
       <editor><first>Teruko</first><last>Mitamura</last></editor>
-      <editor><first>Eduard</first><last>Hovy</last></editor>
-      <editor><first>Martha</first><last>Palmer</last></editor>
+      <editor id="eduard-hovy"><first>Eduard</first><last>Hovy</last></editor>
+      <editor id="martha-palmer"><first>Martha</first><last>Palmer</last></editor>
       <doi>10.3115/v1/W14-29</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Baltimore, Maryland, USA</address>
@@ -4669,8 +4669,8 @@
     <paper id="2">
       <title>Verbal Valency Frame Detection and Selection in <fixed-case>C</fixed-case>zech and <fixed-case>E</fixed-case>nglish</title>
       <author><first>Ondřej</first><last>Dušek</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
-      <author><first>Zdeňka</first><last>Urešová</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
+      <author id="zdenka-uresova"><first>Zdeňka</first><last>Urešová</last></author>
       <pages>6–11</pages>
       <url hash="aeb0da4d">W14-2902</url>
       <doi>10.3115/v1/W14-2902</doi>
@@ -4710,7 +4710,7 @@
     </paper>
     <paper id="6">
       <title>Conceptual and Practical Steps in Event Coreference Analysis of Large-scale Data</title>
-      <author><first>Fatemeh</first><last>Torabi Asr</last></author>
+      <author id="fatemeh-torabi-asr"><first>Fatemeh</first><last>Torabi Asr</last></author>
       <author><first>Jonathan</first><last>Sonntag</last></author>
       <author><first>Yulia</first><last>Grishina</last></author>
       <author><first>Manfred</first><last>Stede</last></author>
@@ -4725,7 +4725,7 @@
       <author><first>Charley</first><last>Beller</last></author>
       <author><first>Paul</first><last>McNamee</last></author>
       <author><first>Benjamin</first><last>Van Durme</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <author><first>Zhiyi</first><last>Song</last></author>
       <author><first>Joe</first><last>Ellis</last></author>
       <pages>45–53</pages>
@@ -4766,7 +4766,7 @@
     <meta>
       <booktitle>Proceedings of Frame Semantics in <fixed-case>NLP</fixed-case>: A Workshop in Honor of Chuck <fixed-case>F</fixed-case>illmore (1929-2014)</booktitle>
       <url hash="11713e30">W14-30</url>
-      <editor><first>Miriam R. L.</first><last>Petruck</last></editor>
+      <editor id="miriam-r-l-petruck"><first>Miriam R. L.</first><last>Petruck</last></editor>
       <editor><first>Gerard</first><last>de Melo</last></editor>
       <doi>10.3115/v1/W14-30</doi>
       <publisher>Association for Computational Linguistics</publisher>
@@ -4781,7 +4781,7 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et: A Knowledge Base for Natural Language Processing</title>
-      <author><first>Collin F.</first><last>Baker</last></author>
+      <author id="collin-f-baker"><first>Collin F.</first><last>Baker</last></author>
       <pages>1–5</pages>
       <url hash="630c2925">W14-3001</url>
       <doi>10.3115/v1/W14-3001</doi>
@@ -4789,7 +4789,7 @@
     </paper>
     <paper id="2">
       <title>The Case for Empiricism (With and Without Statistics)</title>
-      <author><first>Kenneth</first><last>Church</last></author>
+      <author id="kenneth-church"><first>Kenneth</first><last>Church</last></author>
       <pages>6–9</pages>
       <url hash="f71220a6">W14-3002</url>
       <doi>10.3115/v1/W14-3002</doi>
@@ -4797,7 +4797,7 @@
     </paper>
     <paper id="3">
       <title>Case, Constructions, <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et, and the Deep Lexicon</title>
-      <author><first>Jerry</first><last>Hobbs</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry</first><last>Hobbs</last></author>
       <pages>10–12</pages>
       <url hash="4e07e5a9">W14-3003</url>
       <doi>10.3115/v1/W14-3003</doi>
@@ -4805,9 +4805,9 @@
     </paper>
     <paper id="4">
       <title><fixed-case>S</fixed-case>em<fixed-case>L</fixed-case>ink+: <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et, <fixed-case>V</fixed-case>erb<fixed-case>N</fixed-case>et and Event Ontologies</title>
-      <author><first>Martha</first><last>Palmer</last></author>
-      <author><first>Claire</first><last>Bonial</last></author>
-      <author><first>Diana</first><last>McCarthy</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
+      <author id="claire-bonial"><first>Claire</first><last>Bonial</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
       <pages>13–17</pages>
       <url hash="d416c75d">W14-3004</url>
       <doi>10.3115/v1/W14-3004</doi>
@@ -4815,7 +4815,7 @@
     </paper>
     <paper id="5">
       <title><fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et and Linked Data</title>
-      <author><first>Nancy</first><last>Ide</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
       <pages>18–21</pages>
       <url hash="4bda2fde">W14-3005</url>
       <doi>10.3115/v1/W14-3005</doi>
@@ -4823,7 +4823,7 @@
     </paper>
     <paper id="6">
       <title>Bridging Text and Knowledge with Frames</title>
-      <author><first>Srini</first><last>Narayanan</last></author>
+      <author id="srini-narayanan"><first>Srini</first><last>Narayanan</last></author>
       <pages>22–25</pages>
       <url hash="946c6984">W14-3006</url>
       <doi>10.3115/v1/W14-3006</doi>
@@ -4841,7 +4841,7 @@
       <title>Using Frame Semantics in Natural Language Processing</title>
       <author><first>Apoorv</first><last>Agarwal</last></author>
       <author><first>Daniel</first><last>Bauer</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>30–33</pages>
       <url hash="3a29074b">W14-3008</url>
       <doi>10.3115/v1/W14-3008</doi>
@@ -4871,7 +4871,7 @@
       <url hash="b76b4704">W14-31</url>
       <editor><first>Jason</first><last>Chuang</last></editor>
       <editor><first>Spence</first><last>Green</last></editor>
-      <editor><first>Marti</first><last>Hearst</last></editor>
+      <editor id="marti-a-hearst"><first>Marti</first><last>Hearst</last></editor>
       <editor><first>Jeffrey</first><last>Heer</last></editor>
       <editor><first>Philipp</first><last>Koehn</last></editor>
       <doi>10.3115/v1/W14-31</doi>
@@ -4895,9 +4895,9 @@
     </paper>
     <paper id="2">
       <title>Interactive Learning of Spatial Knowledge for Text to 3<fixed-case>D</fixed-case> Scene Generation</title>
-      <author><first>Angel</first><last>Chang</last></author>
+      <author id="angel-chang"><first>Angel</first><last>Chang</last></author>
       <author><first>Manolis</first><last>Savva</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <pages>14–21</pages>
       <url hash="c8f1d9b9">W14-3102</url>
       <doi>10.3115/v1/W14-3102</doi>
@@ -4925,10 +4925,10 @@
     </paper>
     <paper id="5">
       <title><fixed-case>GLANCE</fixed-case> Visualizes Lexical Phenomena for Language Learning</title>
-      <author><first>Mei-Hua</first><last>Chen</last></author>
-      <author><first>Shih-Ting</first><last>Huang</last></author>
-      <author><first>Ting-Hui</first><last>Kao</last></author>
-      <author><first>Hsun-wen</first><last>Chiu</last></author>
+      <author id="mei-hua-chen"><first>Mei-Hua</first><last>Chen</last></author>
+      <author id="shih-ting-huang"><first>Shih-Ting</first><last>Huang</last></author>
+      <author id="ting-hui-kao"><first>Ting-Hui</first><last>Kao</last></author>
+      <author id="hsun-wen-chiu"><first>Hsun-wen</first><last>Chiu</last></author>
       <author><first>Tzu-Hsi</first><last>Yen</last></author>
       <pages>34–37</pages>
       <url hash="a958516a">W14-3105</url>
@@ -4938,7 +4938,7 @@
     <paper id="6">
       <title><fixed-case>SPIED</fixed-case>: <fixed-case>S</fixed-case>tanford Pattern based Information Extraction and Diagnostics</title>
       <author><first>Sonal</first><last>Gupta</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <pages>38–44</pages>
       <url hash="a41c2949">W14-3106</url>
       <doi>10.3115/v1/W14-3106</doi>
@@ -4948,7 +4948,7 @@
       <title>Interactive Exploration of Asynchronous Conversations: Applying a User-centered Approach to Design a Visual Text Analytic System</title>
       <author><first>Enamul</first><last>Hoque</last></author>
       <author><first>Giuseppe</first><last>Carenini</last></author>
-      <author><first>Shafiq</first><last>Joty</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
       <pages>45–52</pages>
       <url hash="49721f6b">W14-3107</url>
       <doi>10.3115/v1/W14-3107</doi>
@@ -4964,7 +4964,7 @@
     </paper>
     <paper id="9">
       <title>Design of an Active Learning System with Human Correction for Content Analysis</title>
-      <author><first>Nancy</first><last>McCracken</last></author>
+      <author id="nancy-mccracken"><first>Nancy</first><last>McCracken</last></author>
       <author><first>Jasy Suet Yan</first><last>Liew</last></author>
       <author><first>Kevin</first><last>Crowston</last></author>
       <pages>59–62</pages>
@@ -5044,9 +5044,9 @@
     </paper>
     <paper id="3">
       <title>Comparison of different feature sets for identification of variants in progressive aphasia</title>
-      <author><first>Kathleen C.</first><last>Fraser</last></author>
+      <author id="kathleen-c-fraser"><first>Kathleen C.</first><last>Fraser</last></author>
       <author><first>Graeme</first><last>Hirst</last></author>
-      <author><first>Naida L.</first><last>Graham</last></author>
+      <author id="naida-graham"><first>Naida L.</first><last>Graham</last></author>
       <author><first>Jed A.</first><last>Meltzer</last></author>
       <author><first>Sandra E.</first><last>Black</last></author>
       <author><first>Elizabeth</first><last>Rochon</last></author>
@@ -5081,9 +5081,9 @@
     <paper id="6">
       <title>Detecting linguistic idiosyncratic interests in autism using distributional semantic models</title>
       <author><first>Masoud</first><last>Rouhizadeh</last></author>
-      <author><first>Emily</first><last>Prud’hommeaux</last></author>
+      <author id="emily-prudhommeaux"><first>Emily</first><last>Prud’hommeaux</last></author>
       <author><first>Jan</first><last>van Santen</last></author>
-      <author><first>Richard</first><last>Sproat</last></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last></author>
       <pages>46–50</pages>
       <url hash="30afcae3">W14-3206</url>
       <doi>10.3115/v1/W14-3206</doi>
@@ -5102,7 +5102,7 @@
     <paper id="8">
       <title>Applying prosodic speech features in mental health care: An exploratory study in a life-review intervention for depression</title>
       <author><first>Sanne M.A.</first><last>Lamers</last></author>
-      <author><first>Khiet P.</first><last>Truong</last></author>
+      <author id="khiet-p-truong"><first>Khiet P.</first><last>Truong</last></author>
       <author><first>Bas</first><last>Steunenberg</last></author>
       <author><first>Franciska</first><last>de Jong</last></author>
       <author><first>Gerben J.</first><last>Westerhof</last></author>
@@ -5136,7 +5136,7 @@
       <author><first>Hiroki</first><last>Tanaka</last></author>
       <author><first>Sakriani</first><last>Sakti</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
-      <author><first>Tomoki</first><last>Toda</last></author>
+      <author id="tomoki-toda"><first>Tomoki</first><last>Toda</last></author>
       <author><first>Satoshi</first><last>Nakamura</last></author>
       <pages>88–96</pages>
       <url hash="e76c3828">W14-3211</url>
@@ -5163,7 +5163,7 @@
       <author><first>Tong</first><last>Liu</last></author>
       <author><first>Megan</first><last>Lytle</last></author>
       <author><first>Vincent</first><last>Silenzio</last></author>
-      <author><first>Cecilia</first><last>Ovesdotter Alm</last></author>
+      <author id="cecilia-ovesdotter-alm"><first>Cecilia</first><last>Ovesdotter Alm</last></author>
       <pages>107–117</pages>
       <url hash="4d2f7ba3">W14-3213</url>
       <doi>10.3115/v1/W14-3213</doi>
@@ -5171,14 +5171,14 @@
     </paper>
     <paper id="14">
       <title>Towards Assessing Changes in Degree of Depression through <fixed-case>F</fixed-case>acebook</title>
-      <author><first>H. Andrew</first><last>Schwartz</last></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last></author>
       <author><first>Johannes</first><last>Eichstaedt</last></author>
-      <author><first>Margaret L.</first><last>Kern</last></author>
+      <author id="margaret-kern"><first>Margaret L.</first><last>Kern</last></author>
       <author><first>Gregory</first><last>Park</last></author>
       <author><first>Maarten</first><last>Sap</last></author>
       <author><first>David</first><last>Stillwell</last></author>
       <author><first>Michal</first><last>Kosinski</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <pages>118–125</pages>
       <url hash="df52b63b">W14-3214</url>
       <doi>10.3115/v1/W14-3214</doi>
@@ -5189,7 +5189,7 @@
     <meta>
       <booktitle>Proceedings of the Ninth Workshop on Statistical Machine Translation</booktitle>
       <url hash="687d0abf">W14-33</url>
-      <editor><first>Ondřej</first><last>Bojar</last></editor>
+      <editor id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></editor>
       <editor><first>Christian</first><last>Buck</last></editor>
       <editor><first>Christian</first><last>Federmann</last></editor>
       <editor><first>Barry</first><last>Haddow</last></editor>
@@ -5229,7 +5229,7 @@
       <author><first>Christof</first><last>Monz</last></author>
       <author><first>Pavel</first><last>Pecina</last></author>
       <author><first>Matt</first><last>Post</last></author>
-      <author><first>Herve</first><last>Saint-Amand</last></author>
+      <author id="herve-saint-amand"><first>Herve</first><last>Saint-Amand</last></author>
       <author><first>Radu</first><last>Soricut</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <author><first>Aleš</first><last>Tamchyna</last></author>
@@ -5240,7 +5240,7 @@
     </paper>
     <paper id="3">
       <title>Parallel <fixed-case>FDA</fixed-case>5 for Fast Deployment of Accurate Statistical Machine Translation Systems</title>
-      <author><first>Ergun</first><last>Biçici</last></author>
+      <author id="ergun-bicici"><first>Ergun</first><last>Biçici</last></author>
       <author><first>Qun</first><last>Liu</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <pages>59–65</pages>
@@ -5262,7 +5262,7 @@
       <author><first>Fabienne</first><last>Cap</last></author>
       <author><first>Marion</first><last>Weller</last></author>
       <author><first>Anita</first><last>Ramm</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>71–78</pages>
       <url hash="4e5c0ad8">W14-3305</url>
       <doi>10.3115/v1/W14-3305</doi>
@@ -5270,10 +5270,10 @@
     </paper>
     <paper id="6">
       <title><fixed-case>E</fixed-case>nglish-to-<fixed-case>H</fixed-case>indi system description for <fixed-case>WMT</fixed-case> 2014: Deep Source-Context Features for <fixed-case>M</fixed-case>oses</title>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
       <author><first>Parth</first><last>Gupta</last></author>
       <author><first>Paolo</first><last>Rosso</last></author>
-      <author><first>Rafael E.</first><last>Banchs</last></author>
+      <author id="rafael-e-banchs"><first>Rafael E.</first><last>Banchs</last></author>
       <pages>79–83</pages>
       <url hash="1b9e265f">W14-3306</url>
       <doi>10.3115/v1/W14-3306</doi>
@@ -5281,12 +5281,12 @@
     </paper>
     <paper id="7">
       <title>The <fixed-case>KIT</fixed-case>-<fixed-case>LIMSI</fixed-case> Translation System for <fixed-case>WMT</fixed-case> 2014</title>
-      <author><first>Quoc Khanh</first><last>Do</last></author>
+      <author id="quoc-khanh-do"><first>Quoc Khanh</first><last>Do</last></author>
       <author><first>Teresa</first><last>Herrmann</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
       <author><first>Alexander</first><last>Allauzen</last></author>
       <author><first>François</first><last>Yvon</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>84–89</pages>
       <url hash="71ddec22">W14-3307</url>
       <doi>10.3115/v1/W14-3307</doi>
@@ -5295,11 +5295,11 @@
     <paper id="8">
       <title>The <fixed-case>IIT</fixed-case> <fixed-case>B</fixed-case>ombay <fixed-case>H</fixed-case>indi-<fixed-case>E</fixed-case>nglish Translation System at <fixed-case>WMT</fixed-case> 2014</title>
       <author><first>Piyush</first><last>Dungarwal</last></author>
-      <author><first>Rajen</first><last>Chatterjee</last></author>
+      <author id="rajen-chatterjee"><first>Rajen</first><last>Chatterjee</last></author>
       <author><first>Abhijit</first><last>Mishra</last></author>
       <author><first>Anoop</first><last>Kunchukuttan</last></author>
-      <author><first>Ritesh</first><last>Shah</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="ritesh-shah"><first>Ritesh</first><last>Shah</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>90–96</pages>
       <url hash="34d743c1">W14-3308</url>
       <doi>10.3115/v1/W14-3308</doi>
@@ -5321,16 +5321,16 @@
       <author><first>Markus</first><last>Freitag</last></author>
       <author><first>Stephan</first><last>Peitz</last></author>
       <author><first>Joern</first><last>Wuebker</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <author><first>Matthias</first><last>Huck</last></author>
       <author><first>Rico</first><last>Sennrich</last></author>
       <author><first>Nadir</first><last>Durrani</last></author>
-      <author><first>Maria</first><last>Nadejde</last></author>
+      <author id="maria-nadejde"><first>Maria</first><last>Nadejde</last></author>
       <author><first>Philip</first><last>Williams</last></author>
       <author><first>Philipp</first><last>Koehn</last></author>
       <author><first>Teresa</first><last>Herrmann</last></author>
       <author><first>Eunah</first><last>Cho</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>105–113</pages>
       <url hash="5c48e971">W14-3310</url>
       <doi>10.3115/v1/W14-3310</doi>
@@ -5340,7 +5340,7 @@
       <title><fixed-case>P</fixed-case>hrasal: A Toolkit for New Directions in Statistical Machine Translation</title>
       <author><first>Spence</first><last>Green</last></author>
       <author><first>Daniel</first><last>Cer</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <pages>114–121</pages>
       <url hash="4fc4700a">W14-3311</url>
       <doi>10.3115/v1/W14-3311</doi>
@@ -5350,7 +5350,7 @@
       <title>Anaphora Models and Reordering for Phrase-Based <fixed-case>SMT</fixed-case></title>
       <author><first>Christian</first><last>Hardmeier</last></author>
       <author><first>Sara</first><last>Stymne</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <author><first>Aaron</first><last>Smith</last></author>
       <author><first>Joakim</first><last>Nivre</last></author>
       <pages>122–129</pages>
@@ -5367,7 +5367,7 @@
       <author><first>Jan</first><last>Niehues</last></author>
       <author><first>Isabel</first><last>Slawik</last></author>
       <author><first>Yuqi</first><last>Zhang</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>130–135</pages>
       <url hash="0a9b4ad7">W14-3313</url>
       <doi>10.3115/v1/W14-3313</doi>
@@ -5377,7 +5377,7 @@
       <title>The <fixed-case>DCU</fixed-case>-<fixed-case>ICTCAS</fixed-case> <fixed-case>MT</fixed-case> system at <fixed-case>WMT</fixed-case> 2014 on <fixed-case>G</fixed-case>erman-<fixed-case>E</fixed-case>nglish Translation Task</title>
       <author><first>Liangyou</first><last>Li</last></author>
       <author><first>Xiaofeng</first><last>Wu</last></author>
-      <author><first>Santiago Cortés</first><last>Vaíllo</last></author>
+      <author id="santiago-cortes-vaillo"><first>Santiago Cortés</first><last>Vaíllo</last></author>
       <author><first>Jun</first><last>Xie</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <author><first>Qun</first><last>Liu</last></author>
@@ -5396,8 +5396,8 @@
       <author><first>Eva</first><last>Schlinger</last></author>
       <author><first>Swabha</first><last>Swayamdipta</last></author>
       <author><first>Yulia</first><last>Tsvetkov</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <pages>142–149</pages>
       <url hash="904211f0">W14-3315</url>
       <doi>10.3115/v1/W14-3315</doi>
@@ -5409,7 +5409,7 @@
       <author><first>Sebastian</first><last>Schuster</last></author>
       <author><first>Spence</first><last>Green</last></author>
       <author><first>Kenneth</first><last>Heafield</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <pages>150–156</pages>
       <url hash="169462b7">W14-3316</url>
       <doi>10.3115/v1/W14-3316</doi>
@@ -5420,7 +5420,7 @@
       <author><first>Stephan</first><last>Peitz</last></author>
       <author><first>Joern</first><last>Wuebker</last></author>
       <author><first>Markus</first><last>Freitag</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>157–162</pages>
       <url hash="d38e282e">W14-3317</url>
       <doi>10.3115/v1/W14-3317</doi>
@@ -5437,12 +5437,12 @@
     </paper>
     <paper id="19">
       <title><fixed-case>A</fixed-case>bu-<fixed-case>M</fixed-case>a<fixed-case>T</fixed-case>ran at <fixed-case>WMT</fixed-case> 2014 Translation Task: Two-step Data Selection and <fixed-case>RBMT</fixed-case>-Style Synthetic Rules</title>
-      <author><first>Raphael</first><last>Rubino</last></author>
+      <author id="raphael-rubino"><first>Raphael</first><last>Rubino</last></author>
       <author><first>Antonio</first><last>Toral</last></author>
-      <author><first>Victor M.</first><last>Sánchez-Cartagena</last></author>
+      <author id="victor-m-sanchez-cartagena"><first>Victor M.</first><last>Sánchez-Cartagena</last></author>
       <author><first>Jorge</first><last>Ferrández-Tordera</last></author>
-      <author><first>Sergio</first><last>Ortiz-Rojas</last></author>
-      <author><first>Gema</first><last>Ramírez-Sánchez</last></author>
+      <author id="sergio-ortiz-rojas"><first>Sergio</first><last>Ortiz-Rojas</last></author>
+      <author id="gema-ramirez-sanchez"><first>Gema</first><last>Ramírez-Sánchez</last></author>
       <author><first>Felipe</first><last>Sánchez-Martínez</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <pages>171–177</pages>
@@ -5452,7 +5452,7 @@
     </paper>
     <paper id="20">
       <title>The <fixed-case>UA</fixed-case>-Prompsit hybrid machine translation system for the 2014 Workshop on Statistical Machine Translation</title>
-      <author><first>Víctor M.</first><last>Sánchez-Cartagena</last></author>
+      <author id="victor-m-sanchez-cartagena"><first>Víctor M.</first><last>Sánchez-Cartagena</last></author>
       <author><first>Juan Antonio</first><last>Pérez-Ortiz</last></author>
       <author><first>Felipe</first><last>Sánchez-Martínez</last></author>
       <pages>178–185</pages>
@@ -5463,7 +5463,7 @@
     <paper id="21">
       <title>Machine Translation and Monolingual Postediting: The <fixed-case>AFRL</fixed-case> <fixed-case>WMT</fixed-case>-14 System</title>
       <author><first>Lane</first><last>Schwartz</last></author>
-      <author><first>Timothy</first><last>Anderson</last></author>
+      <author id="tim-anderson"><first>Timothy</first><last>Anderson</last></author>
       <author><first>Jeremy</first><last>Gwinnup</last></author>
       <author><first>Katherine</first><last>Young</last></author>
       <pages>186–194</pages>
@@ -5497,7 +5497,7 @@
       <title><fixed-case>E</fixed-case>dinburgh’s Syntax-Based Systems at <fixed-case>WMT</fixed-case> 2014</title>
       <author><first>Philip</first><last>Williams</last></author>
       <author><first>Rico</first><last>Sennrich</last></author>
-      <author><first>Maria</first><last>Nadejde</last></author>
+      <author id="maria-nadejde"><first>Maria</first><last>Nadejde</last></author>
       <author><first>Matthias</first><last>Huck</last></author>
       <author><first>Eva</first><last>Hasler</last></author>
       <author><first>Philipp</first><last>Koehn</last></author>
@@ -5522,14 +5522,14 @@
     <paper id="26">
       <title>Machine Translation of Medical Texts in the Khresmoi Project</title>
       <author><first>Ondřej</first><last>Dušek</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
       <author><first>Jaroslava</first><last>Hlaváčová</last></author>
       <author><first>Michal</first><last>Novák</last></author>
       <author><first>Pavel</first><last>Pecina</last></author>
       <author><first>Rudolf</first><last>Rosa</last></author>
       <author><first>Aleš</first><last>Tamchyna</last></author>
-      <author><first>Zdeňka</first><last>Urešová</last></author>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="zdenka-uresova"><first>Zdeňka</first><last>Urešová</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <pages>221–228</pages>
       <url hash="0cdf11ad">W14-3326</url>
       <doi>10.3115/v1/W14-3326</doi>
@@ -5573,7 +5573,7 @@
       <title><fixed-case>LIMSI</fixed-case> @ <fixed-case>WMT</fixed-case>’14 Medical Translation Task</title>
       <author><first>Nicolas</first><last>Pécheux</last></author>
       <author><first>Li</first><last>Gong</last></author>
-      <author><first>Quoc Khanh</first><last>Do</last></author>
+      <author id="quoc-khanh-do"><first>Quoc Khanh</first><last>Do</last></author>
       <author><first>Benjamin</first><last>Marie</last></author>
       <author><first>Yulia</first><last>Ivanishcheva</last></author>
       <author><first>Alexander</first><last>Allauzen</last></author>
@@ -5611,7 +5611,7 @@
       <title>Randomized Significance Tests in Machine Translation</title>
       <author><first>Yvette</first><last>Graham</last></author>
       <author><first>Nitika</first><last>Mathur</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>266–274</pages>
       <url hash="c15b7ab0">W14-3333</url>
       <doi>10.3115/v1/W14-3333</doi>
@@ -5620,7 +5620,7 @@
     <paper id="34">
       <title>Estimating Word Alignment Quality for <fixed-case>SMT</fixed-case> Reordering Tasks</title>
       <author><first>Sara</first><last>Stymne</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <author><first>Joakim</first><last>Nivre</last></author>
       <pages>275–286</pages>
       <url hash="4b8f3476">W14-3334</url>
@@ -5656,7 +5656,7 @@
     </paper>
     <paper id="38">
       <title><fixed-case>SHEF</fixed-case>-Lite 2.0: Sparse Multi-task <fixed-case>G</fixed-case>aussian Processes for Translation Quality Estimation</title>
-      <author><first>Daniel</first><last>Beck</last></author>
+      <author id="daniel-beck"><first>Daniel</first><last>Beck</last></author>
       <author><first>Kashif</first><last>Shah</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>307–312</pages>
@@ -5666,7 +5666,7 @@
     </paper>
     <paper id="39">
       <title>Referential Translation Machines for Predicting Translation Quality</title>
-      <author><first>Ergun</first><last>Biçici</last></author>
+      <author id="ergun-bicici"><first>Ergun</first><last>Biçici</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <pages>313–321</pages>
       <url hash="4ae5df6f">W14-3339</url>
@@ -5675,11 +5675,11 @@
     </paper>
     <paper id="40">
       <title><fixed-case>FBK</fixed-case>-<fixed-case>UPV</fixed-case>-<fixed-case>UE</fixed-case>din participation in the <fixed-case>WMT</fixed-case>14 Quality Estimation shared-task</title>
-      <author><first>José Guilherme</first><last>Camargo de Souza</last></author>
-      <author><first>Jesús</first><last>González-Rubio</last></author>
+      <author id="jose-g-c-de-souza"><first>José Guilherme</first><last>Camargo de Souza</last></author>
+      <author id="jesus-gonzalez-rubio"><first>Jesús</first><last>González-Rubio</last></author>
       <author><first>Christian</first><last>Buck</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <pages>322–328</pages>
       <url hash="d2323985">W14-3340</url>
       <doi>10.3115/v1/W14-3340</doi>
@@ -5687,7 +5687,7 @@
     </paper>
     <paper id="41">
       <title>Target-Centric Features for Translation Quality Estimation</title>
-      <author><first>Chris</first><last>Hokamp</last></author>
+      <author id="chris-hokamp"><first>Chris</first><last>Hokamp</last></author>
       <author><first>Iacer</first><last>Calixto</last></author>
       <author><first>Joachim</first><last>Wagner</last></author>
       <author><first>Jian</first><last>Zhang</last></author>
@@ -5698,8 +5698,8 @@
     </paper>
     <paper id="42">
       <title><fixed-case>LIG</fixed-case> System for Word Level <fixed-case>QE</fixed-case> task at <fixed-case>WMT</fixed-case>14</title>
-      <author><first>Ngoc-Quang</first><last>Luong</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="ngoc-quang-luong"><first>Ngoc-Quang</first><last>Luong</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <author><first>Benjamin</first><last>Lecouteux</last></author>
       <pages>335–341</pages>
       <url hash="050870e6">W14-3342</url>
@@ -5708,7 +5708,7 @@
     </paper>
     <paper id="43">
       <title>Exploring Consensus in Machine Translation for Quality Estimation</title>
-      <author><first>Carolina</first><last>Scarton</last></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>342–347</pages>
       <url hash="2062f42b">W14-3343</url>
@@ -5728,7 +5728,7 @@
     </paper>
     <paper id="45">
       <title><fixed-case>P</fixed-case>armesan: Meteor without Paraphrases with Paraphrased References</title>
-      <author><first>Petra</first><last>Barančíková</last></author>
+      <author id="petra-barancikova"><first>Petra</first><last>Barančíková</last></author>
       <pages>355–361</pages>
       <url hash="ba266861">W14-3345</url>
       <doi>10.3115/v1/W14-3345</doi>
@@ -5745,8 +5745,8 @@
     </paper>
     <paper id="47">
       <title><fixed-case>VERT</fixed-case>a participation in the <fixed-case>WMT</fixed-case>14 Metrics Task</title>
-      <author><first>Elisabet</first><last>Comelles</last></author>
-      <author><first>Jordi</first><last>Atserias</last></author>
+      <author id="elisabet-comelles"><first>Elisabet</first><last>Comelles</last></author>
+      <author id="jordi-atserias"><first>Jordi</first><last>Atserias</last></author>
       <pages>368–375</pages>
       <url hash="a3ff68c3">W14-3347</url>
       <doi>10.3115/v1/W14-3347</doi>
@@ -5755,7 +5755,7 @@
     <paper id="48">
       <title>Meteor Universal: Language Specific Translation Evaluation for Any Target Language</title>
       <author><first>Michael</first><last>Denkowski</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <pages>376–380</pages>
       <url hash="bec38791">W14-3348</url>
       <doi>10.3115/v1/W14-3348</doi>
@@ -5763,9 +5763,9 @@
     </paper>
     <paper id="49">
       <title>Application of Prize based on Sentence Length in Chunk-based Automatic Evaluation of Machine Translation</title>
-      <author><first>Hiroshi</first><last>Echizen’ya</last></author>
-      <author><first>Kenji</first><last>Araki</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="hiroshi-echizen-ya"><first>Hiroshi</first><last>Echizen’ya</last></author>
+      <author id="kenji-araki"><first>Kenji</first><last>Araki</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>381–386</pages>
       <url hash="e9b83a47">W14-3349</url>
       <doi>10.3115/v1/W14-3349</doi>
@@ -5774,7 +5774,7 @@
     <paper id="50">
       <title><fixed-case>LAYERED</fixed-case>: Metric for Machine Translation Evaluation</title>
       <author><first>Shubham</first><last>Gautam</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>387–393</pages>
       <url hash="f0d4e8b4">W14-3350</url>
       <doi>10.3115/v1/W14-3350</doi>
@@ -5782,9 +5782,9 @@
     </paper>
     <paper id="51">
       <title><fixed-case>IPA</fixed-case> and <fixed-case>STOUT</fixed-case>: Leveraging Linguistic and Source-based Features for Machine Translation Evaluation</title>
-      <author><first>Meritxell</first><last>Gonzàlez</last></author>
+      <author id="meritxell-gonzalez"><first>Meritxell</first><last>Gonzàlez</last></author>
       <author><first>Alberto</first><last>Barrón-Cedeño</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <pages>394–401</pages>
       <url hash="234129c0">W14-3351</url>
       <doi>10.3115/v1/W14-3351</doi>
@@ -5792,10 +5792,10 @@
     </paper>
     <paper id="52">
       <title><fixed-case>D</fixed-case>isco<fixed-case>TK</fixed-case>: Using Discourse Structure for Machine Translation Evaluation</title>
-      <author><first>Shafiq</first><last>Joty</last></author>
-      <author><first>Francisco</first><last>Guzmán</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="shafiq-joty"><first>Shafiq</first><last>Joty</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzmán</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>402–408</pages>
       <url hash="0f24a0aa">W14-3352</url>
       <doi>10.3115/v1/W14-3352</doi>
@@ -5813,7 +5813,7 @@
     <paper id="54">
       <title><fixed-case>BEER</fixed-case>: <fixed-case>BE</fixed-case>tter Evaluation as Ranking</title>
       <author><first>Miloš</first><last>Stanojević</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <pages>414–419</pages>
       <url hash="5c3d02b1">W14-3354</url>
       <doi>10.3115/v1/W14-3354</doi>
@@ -5832,9 +5832,9 @@
     <paper id="56">
       <title>Crowdsourcing High-Quality Parallel Data Extraction from <fixed-case>T</fixed-case>witter</title>
       <author><first>Wang</first><last>Ling</last></author>
-      <author><first>Luís</first><last>Marujo</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
-      <author><first>Alan W.</first><last>Black</last></author>
+      <author id="luis-marujo"><first>Luís</first><last>Marujo</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
+      <author id="alan-w-black"><first>Alan W.</first><last>Black</last></author>
       <author><first>Isabel</first><last>Trancoso</last></author>
       <pages>426–436</pages>
       <url hash="51605abc">W14-3356</url>
@@ -5863,7 +5863,7 @@
     <paper id="59">
       <title>Unsupervised Adaptation for Statistical Machine Translation</title>
       <author><first>Saab</first><last>Mansour</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>457–465</pages>
       <url hash="06121ae1">W14-3359</url>
       <doi>10.3115/v1/W14-3359</doi>
@@ -5873,7 +5873,7 @@
       <title>An Empirical Comparison of Features and Tuning for Phrase-based Machine Translation</title>
       <author><first>Spence</first><last>Green</last></author>
       <author><first>Daniel</first><last>Cer</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <pages>466–476</pages>
       <url hash="f9193589">W14-3360</url>
       <doi>10.3115/v1/W14-3360</doi>
@@ -5901,7 +5901,7 @@
     <paper id="63">
       <title>Linear Mixture Models for Robust Machine Translation</title>
       <author><first>Marine</first><last>Carpuat</last></author>
-      <author><first>Cyril</first><last>Goutte</last></author>
+      <author id="cyril-goutte"><first>Cyril</first><last>Goutte</last></author>
       <author><first>George</first><last>Foster</last></author>
       <pages>499–509</pages>
       <url hash="7678a138">W14-3363</url>
@@ -5913,10 +5913,10 @@
     <meta>
       <booktitle>Proceedings of <fixed-case>B</fixed-case>io<fixed-case>NLP</fixed-case> 2014</booktitle>
       <url hash="f152f1a4">W14-34</url>
-      <editor><first>Kevin</first><last>Cohen</last></editor>
+      <editor id="k-bretonnel-cohen"><first>Kevin</first><last>Cohen</last></editor>
       <editor><first>Dina</first><last>Demner-Fushman</last></editor>
       <editor><first>Sophia</first><last>Ananiadou</last></editor>
-      <editor><first>Jun-ichi</first><last>Tsujii</last></editor>
+      <editor id="junichi-tsujii"><first>Jun-ichi</first><last>Tsujii</last></editor>
       <doi>10.3115/v1/W14-34</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Baltimore, Maryland</address>
@@ -5934,10 +5934,10 @@
       <author><first>Robert</first><last>Rivera</last></author>
       <author><first>Rachel</first><last>Beard</last></author>
       <author><first>Rob</first><last>Lauder</last></author>
-      <author><first>Davy</first><last>Weissenbacher</last></author>
+      <author id="davy-weissenbacher"><first>Davy</first><last>Weissenbacher</last></author>
       <author><first>Matthew</first><last>Scotch</last></author>
       <author><first>Garrick</first><last>Wallstrom</last></author>
-      <author><first>Graciela</first><last>Gonzalez</last></author>
+      <author id="graciela-gonzalez"><first>Graciela</first><last>Gonzalez</last></author>
       <pages>1–9</pages>
       <url hash="2c837b64">W14-3401</url>
       <doi>10.3115/v1/W14-3401</doi>
@@ -5945,7 +5945,7 @@
     </paper>
     <paper id="2">
       <title>Temporal Expression Recognition for Cell Cycle Phase Concepts in Biomedical Literature</title>
-      <author><first>Negacy</first><last>Hailu</last></author>
+      <author id="negacy-hailu"><first>Negacy</first><last>Hailu</last></author>
       <author><first>Natalya</first><last>Panteleyeva</last></author>
       <author><first>Kevin</first><last>Cohen</last></author>
       <pages>10–18</pages>
@@ -5984,7 +5984,7 @@
     </paper>
     <paper id="6">
       <title>Detecting Health Related Discussions in Everyday Telephone Conversations for Studying Medical Events in the Lives of Older Adults</title>
-      <author><first>Golnar</first><last>Sheikhshab</last></author>
+      <author id="golnar-sheikhshab"><first>Golnar</first><last>Sheikhshab</last></author>
       <author><first>Izhak</first><last>Shafran</last></author>
       <author><first>Jeffrey</first><last>Kaye</last></author>
       <pages>38–44</pages>
@@ -6003,12 +6003,12 @@
     </paper>
     <paper id="8">
       <title>Generating Patient Problem Lists from the <fixed-case>S</fixed-case>h<fixed-case>AR</fixed-case>e Corpus using <fixed-case>SNOMED</fixed-case> <fixed-case>CT</fixed-case>/<fixed-case>SNOMED</fixed-case> <fixed-case>CT</fixed-case> <fixed-case>CORE</fixed-case> Problem List</title>
-      <author><first>Danielle</first><last>Mowery</last></author>
+      <author id="danielle-l-mowery"><first>Danielle</first><last>Mowery</last></author>
       <author><first>Mindy</first><last>Ross</last></author>
       <author><first>Sumithra</first><last>Velupillai</last></author>
-      <author><first>Stephane</first><last>Meystre</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
-      <author><first>Wendy</first><last>Chapman</last></author>
+      <author id="stephane-meystre"><first>Stephane</first><last>Meystre</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
+      <author id="wendy-chapman"><first>Wendy</first><last>Chapman</last></author>
       <pages>54–58</pages>
       <url hash="4897efb0">W14-3408</url>
       <doi>10.3115/v1/W14-3408</doi>
@@ -6026,8 +6026,8 @@
     <paper id="10">
       <title>Structuring Operative Notes using Active Learning</title>
       <author><first>Kirk</first><last>Roberts</last></author>
-      <author><first>Sanda</first><last>Harabagiu</last></author>
-      <author><first>Michael</first><last>Skinner</last></author>
+      <author id="sanda-harabagiu"><first>Sanda</first><last>Harabagiu</last></author>
+      <author id="michael-skinner"><first>Michael</first><last>Skinner</last></author>
       <pages>68–76</pages>
       <url hash="beca8b9c">W14-3410</url>
       <doi>10.3115/v1/W14-3410</doi>
@@ -6036,7 +6036,7 @@
     <paper id="11">
       <title>Chunking Clinical Text Containing Non-Canonical Language</title>
       <author><first>Aleksandar</first><last>Savkov</last></author>
-      <author><first>John</first><last>Carroll</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
       <author><first>Jackie</first><last>Cassell</last></author>
       <pages>77–82</pages>
       <url hash="1a154a92">W14-3411</url>
@@ -6046,10 +6046,10 @@
     <paper id="12">
       <title>Decision Style in a Clinical Reasoning Corpus</title>
       <author><first>Limor</first><last>Hochberg</last></author>
-      <author><first>Cecilia</first><last>Ovesdotter Alm</last></author>
+      <author id="cecilia-ovesdotter-alm"><first>Cecilia</first><last>Ovesdotter Alm</last></author>
       <author><first>Esa M.</first><last>Rantanen</last></author>
       <author><first>Caroline M.</first><last>DeLong</last></author>
-      <author><first>Anne</first><last>Haake</last></author>
+      <author id="anne-haake"><first>Anne</first><last>Haake</last></author>
       <pages>83–87</pages>
       <url hash="f9bf06c9">W14-3412</url>
       <doi>10.3115/v1/W14-3412</doi>
@@ -6066,7 +6066,7 @@
     <paper id="14">
       <title>A repository of semantic types in the <fixed-case>MIMIC</fixed-case> <fixed-case>II</fixed-case> database clinical notes</title>
       <author><first>Richard</first><last>Osborne</last></author>
-      <author><first>Alan</first><last>Aronson</last></author>
+      <author id="alan-r-aronson"><first>Alan</first><last>Aronson</last></author>
       <author><first>Kevin</first><last>Cohen</last></author>
       <pages>93–97</pages>
       <url hash="ad2996df">W14-3414</url>
@@ -6075,7 +6075,7 @@
     </paper>
     <paper id="15">
       <title>Extracting drug indications and adverse drug reactions from <fixed-case>S</fixed-case>panish health social media</title>
-      <author><first>Isabel</first><last>Segura-Bedmar</last></author>
+      <author id="isabel-segura-bedmar"><first>Isabel</first><last>Segura-Bedmar</last></author>
       <author><first>Santiago</first><last>de la Peña González</last></author>
       <author><first>Paloma</first><last>Martínez</last></author>
       <pages>98–106</pages>
@@ -6103,10 +6103,10 @@
     </paper>
     <paper id="18">
       <title>Towards Gene Recognition from Rare and Ambiguous Abbreviations using a Filtering Approach</title>
-      <author><first>Matthias</first><last>Hartung</last></author>
+      <author id="matthias-hartung"><first>Matthias</first><last>Hartung</last></author>
       <author><first>Roman</first><last>Klinger</last></author>
       <author><first>Matthias</first><last>Zwick</last></author>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <pages>118–127</pages>
       <url hash="9529c5d2">W14-3418</url>
       <doi>10.3115/v1/W14-3418</doi>
@@ -6123,7 +6123,7 @@
     </paper>
     <paper id="20">
       <title>Using statistical parsing to detect agrammatic aphasia</title>
-      <author><first>Kathleen C.</first><last>Fraser</last></author>
+      <author id="kathleen-c-fraser"><first>Kathleen C.</first><last>Fraser</last></author>
       <author><first>Graeme</first><last>Hirst</last></author>
       <author><first>Jed A.</first><last>Meltzer</last></author>
       <author><first>Jennifer E.</first><last>Mack</last></author>
@@ -6154,7 +6154,7 @@
     <paper id="1">
       <title>Improving Collocation Correction by Ranking Suggestions Using Linguistic Knowledge</title>
       <author><first>Roberto</first><last>Carlini</last></author>
-      <author><first>Joan</first><last>Codina-Filba</last></author>
+      <author id="joan-codina-filba"><first>Joan</first><last>Codina-Filba</last></author>
       <author><first>Leo</first><last>Wanner</last></author>
       <pages>1–12</pages>
       <url hash="64fb893e">W14-3501</url>
@@ -6213,7 +6213,7 @@
       <title>A <fixed-case>VIEW</fixed-case> of <fixed-case>R</fixed-case>ussian: Visual Input Enhancement and Adaptive Feedback</title>
       <author><first>Robert</first><last>Reynolds</last></author>
       <author><first>Eduard</first><last>Schaf</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <pages>98-112</pages>
       <url hash="956dad0b">W14-3508</url>
       <bibkey>reynolds-etal-2014-view</bibkey>
@@ -6242,7 +6242,7 @@
       <booktitle>Proceedings of the <fixed-case>EMNLP</fixed-case> 2014 Workshop on <fixed-case>A</fixed-case>rabic Natural Language Processing (<fixed-case>ANLP</fixed-case>)</booktitle>
       <url hash="88d38008">W14-36</url>
       <editor><first>Nizar</first><last>Habash</last></editor>
-      <editor><first>Stephan</first><last>Vogel</last></editor>
+      <editor id="stephan-vogel"><first>Stephan</first><last>Vogel</last></editor>
       <doi>10.3115/v1/W14-36</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Doha, Qatar</address>
@@ -6266,7 +6266,7 @@
     <paper id="2">
       <title>The International Corpus of <fixed-case>A</fixed-case>rabic: Compilation, Analysis and Evaluation</title>
       <author><first>Sameh</first><last>Alansary</last></author>
-      <author><first>Magdy</first><last>Nagi</last></author>
+      <author id="magdi-nagi"><first>Magdy</first><last>Nagi</last></author>
       <pages>8–17</pages>
       <url hash="87803862">W14-3602</url>
       <doi>10.3115/v1/W14-3602</doi>
@@ -6307,7 +6307,7 @@
       <title>A Framework for the Classification and Annotation of Multiword Expressions in Dialectal <fixed-case>A</fixed-case>rabic</title>
       <author><first>Abdelati</first><last>Hawwari</last></author>
       <author><first>Mohammed</first><last>Attia</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>48–56</pages>
       <url hash="a4057734">W14-3606</url>
       <doi>10.3115/v1/W14-3606</doi>
@@ -6320,7 +6320,7 @@
       <author><first>Reham</first><last>Mohamed</last></author>
       <author><first>Alaa</first><last>Mohamed</last></author>
       <author><first>Bassant</first><last>Farouk</last></author>
-      <author><first>Nagwa</first><last>El-Makky</last></author>
+      <author id="nagwa-m-el-makky"><first>Nagwa</first><last>El-Makky</last></author>
       <author><first>Marwan</first><last>Torki</last></author>
       <pages>57–64</pages>
       <url hash="bb7b73ba">W14-3607</url>
@@ -6330,8 +6330,8 @@
     <paper id="8">
       <title>Automatic <fixed-case>A</fixed-case>rabic diacritics restoration based on deep nets</title>
       <author><first>Ahmad</first><last>Al Sallab</last></author>
-      <author><first>Mohsen</first><last>Rashwan</last></author>
-      <author><first>Hazem</first><last>M. Raafat</last></author>
+      <author id="mohsen-rashwan"><first>Mohsen</first><last>Rashwan</last></author>
+      <author id="hazem-raafat"><first>Hazem</first><last>M. Raafat</last></author>
       <author><first>Ahmed</first><last>Rafea</last></author>
       <pages>65–72</pages>
       <url hash="3f7338a6">W14-3608</url>
@@ -6350,7 +6350,7 @@
     <paper id="10">
       <title>Named Entity Recognition System for Dialectal <fixed-case>A</fixed-case>rabic</title>
       <author><first>Ayah</first><last>Zirikly</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>78–86</pages>
       <url hash="823daa63">W14-3610</url>
       <doi>10.3115/v1/W14-3610</doi>
@@ -6359,8 +6359,8 @@
     <paper id="11">
       <title>Semantic Query Expansion for <fixed-case>A</fixed-case>rabic Information Retrieval</title>
       <author><first>Ashraf</first><last>Mahgoub</last></author>
-      <author><first>Mohsen</first><last>Rashwan</last></author>
-      <author><first>Hazem</first><last>Raafat</last></author>
+      <author id="mohsen-rashwan"><first>Mohsen</first><last>Rashwan</last></author>
+      <author id="hazem-raafat"><first>Hazem</first><last>Raafat</last></author>
       <author><first>Mohamed</first><last>Zahran</last></author>
       <author><first>Magda</first><last>Fayek</last></author>
       <pages>87–92</pages>
@@ -6372,14 +6372,14 @@
       <title>Transliteration of <fixed-case>A</fixed-case>rabizi into <fixed-case>A</fixed-case>rabic Orthography: Developing a Parallel Annotated <fixed-case>A</fixed-case>rabizi-<fixed-case>A</fixed-case>rabic Script <fixed-case>SMS</fixed-case>/Chat Corpus</title>
       <author><first>Ann</first><last>Bies</last></author>
       <author><first>Zhiyi</first><last>Song</last></author>
-      <author><first>Mohamed</first><last>Maamouri</last></author>
+      <author id="mohamed-maamouri"><first>Mohamed</first><last>Maamouri</last></author>
       <author><first>Stephen</first><last>Grimes</last></author>
       <author><first>Haejoong</first><last>Lee</last></author>
       <author><first>Jonathan</first><last>Wright</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
       <author><first>Ramy</first><last>Eskander</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>93–103</pages>
       <url hash="df315fbf">W14-3612</url>
       <doi>10.3115/v1/W14-3612</doi>
@@ -6400,7 +6400,7 @@
       <author><first>Nadi</first><last>Tomeh</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
       <author><first>Ramy</first><last>Eskander</last></author>
-      <author><first>Joseph</first><last>Le Roux</last></author>
+      <author id="joseph-le-roux"><first>Joseph</first><last>Le Roux</last></author>
       <pages>114–120</pages>
       <url hash="d45a8b0e">W14-3614</url>
       <doi>10.3115/v1/W14-3614</doi>
@@ -6459,7 +6459,7 @@
       <title><fixed-case>GWU</fixed-case>-<fixed-case>HASP</fixed-case>: Hybrid <fixed-case>A</fixed-case>rabic Spelling and Punctuation Corrector</title>
       <author><first>Mohammed</first><last>Attia</last></author>
       <author><first>Mohamed</first><last>Al-Badrashiny</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>148–154</pages>
       <url hash="fbc630bd">W14-3620</url>
       <doi>10.3115/v1/W14-3620</doi>
@@ -6467,9 +6467,9 @@
     </paper>
     <paper id="21">
       <title><fixed-case>TECHLIMED</fixed-case> system description for the Shared Task on Automatic <fixed-case>A</fixed-case>rabic Error Correction</title>
-      <author><first>Djamel</first><last>Mostefa</last></author>
+      <author id="djamel-mostefa"><first>Djamel</first><last>Mostefa</last></author>
       <author><first>Omar</first><last>Asbayou</last></author>
-      <author><first>Ramzi</first><last>Abbes</last></author>
+      <author id="ramzi-abbes"><first>Ramzi</first><last>Abbes</last></author>
       <pages>155–159</pages>
       <url hash="d1fe9829">W14-3621</url>
       <doi>10.3115/v1/W14-3621</doi>
@@ -6493,7 +6493,7 @@
       <author><first>Ramy</first><last>Baly</last></author>
       <author><first>Hazem</first><last>Hajj</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
-      <author><first>Wassim</first><last>El-Hajj</last></author>
+      <author id="wassim-el-hajj"><first>Wassim</first><last>El-Hajj</last></author>
       <pages>165–173</pages>
       <url hash="d9f4ce89">W14-3623</url>
       <doi>10.3115/v1/W14-3623</doi>
@@ -6510,7 +6510,7 @@
     </paper>
     <paper id="25">
       <title><fixed-case>A</fixed-case>rabic Native Language Identification</title>
-      <author><first>Shervin</first><last>Malmasi</last></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></author>
       <author><first>Mark</first><last>Dras</last></author>
       <pages>180–186</pages>
       <url hash="733485ff">W14-3625</url>
@@ -6532,7 +6532,7 @@
       <author><first>Serena</first><last>Jeblee</last></author>
       <author><first>Weston</first><last>Feely</last></author>
       <author><first>Houda</first><last>Bouamor</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
       <author><first>Kemal</first><last>Oflazer</last></author>
       <pages>196–206</pages>
@@ -6546,7 +6546,7 @@
       <author><first>Hassan</first><last>Sajjad</last></author>
       <author><first>Alaa</first><last>Khader</last></author>
       <author><first>Fahad</first><last>Al Obaidli</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Stephan</first><last>Vogel</last></author>
       <pages>207–216</pages>
       <url hash="89711d68">W14-3628</url>
@@ -6567,9 +6567,9 @@
       <booktitle>Proceedings of <fixed-case>T</fixed-case>ext<fixed-case>G</fixed-case>raphs-9: the workshop on Graph-based Methods for Natural Language Processing</booktitle>
       <url hash="a0cc72bb">W14-37</url>
       <editor><first>V.G.Vinod</first><last>Vydiswaran</last></editor>
-      <editor><first>Amarnag</first><last>Subramanya</last></editor>
+      <editor id="amarnag-subramanya"><first>Amarnag</first><last>Subramanya</last></editor>
       <editor><first>Gabor</first><last>Melli</last></editor>
-      <editor><first>Irina</first><last>Matveeva</last></editor>
+      <editor id="irina-matveeva"><first>Irina</first><last>Matveeva</last></editor>
       <doi>10.3115/v1/W14-37</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Doha, Qatar</address>
@@ -6611,7 +6611,7 @@
     </paper>
     <paper id="4">
       <title>A Novel Two-stage Framework for Extracting Opinionated Sentences from News Articles</title>
-      <author><first>Pujari</first><last>Rajkumar</last></author>
+      <author id="rajkumar-pujari"><first>Pujari</first><last>Rajkumar</last></author>
       <author><first>Swara</first><last>Desai</last></author>
       <author><first>Niloy</first><last>Ganguly</last></author>
       <author><first>Pawan</first><last>Goyal</last></author>
@@ -6631,7 +6631,7 @@
     </paper>
     <paper id="6">
       <title>Semi-supervised Graph-based Genre Classification for Web Pages</title>
-      <author><first>Noushin</first><last>Rezapour Asheghi</last></author>
+      <author id="noushin-rezapour-asheghi"><first>Noushin</first><last>Rezapour Asheghi</last></author>
       <author><first>Katja</first><last>Markert</last></author>
       <author><first>Serge</first><last>Sharoff</last></author>
       <pages>39–47</pages>
@@ -6650,7 +6650,7 @@
     </paper>
     <paper id="8">
       <title>From Visualisation to Hypothesis Construction for Second Language Acquisition</title>
-      <author><first>Shervin</first><last>Malmasi</last></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></author>
       <author><first>Mark</first><last>Dras</last></author>
       <pages>56–64</pages>
       <url hash="20304873">W14-3708</url>
@@ -6662,8 +6662,8 @@
     <meta>
       <booktitle>Proceedings of the First Workshop on Computational Approaches to Code Switching</booktitle>
       <url hash="657a8401">W14-39</url>
-      <editor><first>Mona</first><last>Diab</last></editor>
-      <editor><first>Julia</first><last>Hirschberg</last></editor>
+      <editor id="mona-diab"><first>Mona</first><last>Diab</last></editor>
+      <editor id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></editor>
       <editor><first>Pascale</first><last>Fung</last></editor>
       <editor><first>Thamar</first><last>Solorio</last></editor>
       <doi>10.3115/v1/W14-39</doi>
@@ -6682,7 +6682,7 @@
       <author><first>Ramy</first><last>Eskander</last></author>
       <author><first>Mohamed</first><last>Al-Badrashiny</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>1–12</pages>
       <url hash="fc7e84c4">W14-3901</url>
       <doi>10.3115/v1/W14-3901</doi>
@@ -6770,8 +6770,8 @@
       <title>The <fixed-case>CMU</fixed-case> Submission for the Shared Task on Language Identification in Code-Switched Data</title>
       <author><first>Chu-Cheng</first><last>Lin</last></author>
       <author><first>Waleed</first><last>Ammar</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <pages>80–86</pages>
       <url hash="8a0394f4">W14-3909</url>
       <doi>10.3115/v1/W14-3909</doi>
@@ -6780,7 +6780,7 @@
     <paper id="10">
       <title>Language Identification in Code-Switching Scenario</title>
       <author><first>Naman</first><last>Jain</last></author>
-      <author><first>Riyaz Ahmad</first><last>Bhat</last></author>
+      <author id="riyaz-ahmad-bhat"><first>Riyaz Ahmad</first><last>Bhat</last></author>
       <pages>87–93</pages>
       <url hash="a2e8864f">W14-3910</url>
       <doi>10.3115/v1/W14-3910</doi>
@@ -6801,8 +6801,8 @@
       <author><first>Levi</first><last>King</last></author>
       <author><first>Eric</first><last>Baucom</last></author>
       <author><first>Timur</first><last>Gilmanov</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
-      <author><first>Dan</first><last>Whyatt</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
+      <author id="daniel-whyatt"><first>Dan</first><last>Whyatt</last></author>
       <author><first>Wolfgang</first><last>Maier</last></author>
       <author><first>Paul</first><last>Rodrigues</last></author>
       <pages>102–106</pages>
@@ -6833,7 +6833,7 @@
       <title><fixed-case>DCU</fixed-case>-<fixed-case>UVT</fixed-case>: Word-Level Language Classification with Code-Mixed Data</title>
       <author><first>Utsab</first><last>Barman</last></author>
       <author><first>Joachim</first><last>Wagner</last></author>
-      <author><first>Grzegorz</first><last>Chrupała</last></author>
+      <author id="grzegorz-chrupala"><first>Grzegorz</first><last>Chrupała</last></author>
       <author><first>Jennifer</first><last>Foster</last></author>
       <pages>127–132</pages>
       <url hash="614a7342">W14-3915</url>
@@ -6842,7 +6842,7 @@
     </paper>
     <paper id="16">
       <title>Incremental N-gram Approach for Language Identification in Code-Switched Text</title>
-      <author><first>Prajwol</first><last>Shrestha</last></author>
+      <author id="prajwol-shrestha"><first>Prajwol</first><last>Shrestha</last></author>
       <pages>133–138</pages>
       <url hash="8fdca147">W14-3916</url>
       <doi>10.3115/v1/W14-3916</doi>
@@ -6865,7 +6865,7 @@
       <editor><first>Dekai</first><last>Wu</last></editor>
       <editor><first>Marine</first><last>Carpuat</last></editor>
       <editor><first>Xavier</first><last>Carreras</last></editor>
-      <editor><first>Eva Maria</first><last>Vecchi</last></editor>
+      <editor id="eva-maria-vecchi"><first>Eva Maria</first><last>Vecchi</last></editor>
       <doi>10.3115/v1/W14-40</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Doha, Qatar</address>
@@ -6881,7 +6881,7 @@
       <title>Vector Space Models for Phrase-based Machine Translation</title>
       <author><first>Tamer</first><last>Alkhouli</last></author>
       <author><first>Andreas</first><last>Guta</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>1–10</pages>
       <url hash="36f26faa">W14-4001</url>
       <doi>10.3115/v1/W14-4001</doi>
@@ -6890,7 +6890,7 @@
     <paper id="2">
       <title>Bilingual <fixed-case>M</fixed-case>arkov Reordering Labels for Hierarchical <fixed-case>SMT</fixed-case></title>
       <author><first>Gideon</first><last>Maillette de Buy Wenniger</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <pages>11–21</pages>
       <url hash="5f9301db">W14-4002</url>
       <doi>10.3115/v1/W14-4002</doi>
@@ -6912,7 +6912,7 @@
       <author><first>Yuto</first><last>Hatakoshi</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
       <author><first>Sakriani</first><last>Sakti</last></author>
-      <author><first>Tomoki</first><last>Toda</last></author>
+      <author id="tomoki-toda"><first>Tomoki</first><last>Toda</last></author>
       <author><first>Satoshi</first><last>Nakamura</last></author>
       <pages>34–42</pages>
       <url hash="55851b84">W14-4004</url>
@@ -6933,8 +6933,8 @@
       <title>Reducing the Impact of Data Sparsity in Statistical Machine Translation</title>
       <author><first>Karan</first><last>Singla</last></author>
       <author><first>Kunal</first><last>Sachdeva</last></author>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
-      <author><first>Dipti Misra</first><last>Sharma</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></author>
       <author><first>Diksha</first><last>Yadav</last></author>
       <pages>51–56</pages>
       <url hash="fca6ba36">W14-4006</url>
@@ -7023,10 +7023,10 @@
     </paper>
     <paper id="15">
       <title>Word’s Vector Representations meet Machine Translation</title>
-      <author><first>Eva</first><last>Martínez Garcia</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="eva-martinez-garcia"><first>Eva</first><last>Martínez Garcia</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <author><first>Cristina</first><last>España-Bonet</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <pages>132–134</pages>
       <url hash="d1989bb2">W14-4015</url>
       <doi>10.3115/v1/W14-4015</doi>
@@ -7034,8 +7034,8 @@
     </paper>
     <paper id="16">
       <title>Context Sense Clustering for Translation</title>
-      <author><first>João</first><last>Casteleiro</last></author>
-      <author><first>Gabriel</first><last>Lopes</last></author>
+      <author id="joao-miguel-casteleiro"><first>João</first><last>Casteleiro</last></author>
+      <author id="gabriel-lopes"><first>Gabriel</first><last>Lopes</last></author>
       <author><first>Joaquim</first><last>Silva</last></author>
       <pages>135–137</pages>
       <url hash="f9f17fbc">W14-4016</url>
@@ -7045,7 +7045,7 @@
     <paper id="17">
       <title>Evaluating Word Order Recursively over Permutation-Forests</title>
       <author><first>Miloš</first><last>Stanojević</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <pages>138–147</pages>
       <url hash="d01bfab6">W14-4017</url>
       <doi>10.3115/v1/W14-4017</doi>
@@ -7064,7 +7064,7 @@
     <paper id="19">
       <title>How Synchronous are Adjuncts in Translation Data?</title>
       <author><first>Sophie</first><last>Arnoult</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <pages>157–165</pages>
       <url hash="23f7db74">W14-4019</url>
       <doi>10.3115/v1/W14-4019</doi>
@@ -7075,7 +7075,7 @@
     <meta>
       <booktitle>Proceedings of the <fixed-case>EMNLP</fixed-case> 2014 Workshop on Analysis of Large Scale Social Interaction in <fixed-case>MOOC</fixed-case>s</booktitle>
       <url hash="f42e99c5">W14-41</url>
-      <editor><first>Carolyn</first><last>Rose</last></editor>
+      <editor id="carolyn-rose"><first>Carolyn</first><last>Rose</last></editor>
       <editor><first>George</first><last>Siemens</last></editor>
       <doi>10.3115/v1/W14-41</doi>
       <publisher>Association for Computational Linguistics</publisher>
@@ -7198,7 +7198,7 @@
     <meta>
       <booktitle>Proceedings of the <fixed-case>EMNLP</fixed-case>’2014 Workshop on Language Technology for Closely Related Languages and Language Variants</booktitle>
       <url hash="cabf6f7b">W14-42</url>
-      <editor><first>Preslav</first><last>Nakov</last></editor>
+      <editor id="preslav-nakov"><first>Preslav</first><last>Nakov</last></editor>
       <editor><first>Petya</first><last>Osenova</last></editor>
       <editor><first>Cristina</first><last>Vertan</last></editor>
       <doi>10.3115/v1/W14-42</doi>
@@ -7233,8 +7233,8 @@
     </paper>
     <paper id="3">
       <title>Cross-lingual Dependency Parsing of Related Languages with Rich Morphosyntactic Tagsets</title>
-      <author><first>Željko</first><last>Agić</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="zeljko-agic"><first>Željko</first><last>Agić</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <author><first>Danijela</first><last>Merkler</last></author>
       <author><first>Simon</first><last>Krek</last></author>
       <author><first>Kaja</first><last>Dobrovoljc</last></author>
@@ -7263,12 +7263,12 @@
     </paper>
     <paper id="6">
       <title>Adapting Predicate Frames for <fixed-case>U</fixed-case>rdu <fixed-case>P</fixed-case>rop<fixed-case>B</fixed-case>anking</title>
-      <author><first>Riyaz Ahmad</first><last>Bhat</last></author>
+      <author id="riyaz-ahmad-bhat"><first>Riyaz Ahmad</first><last>Bhat</last></author>
       <author><first>Naman</first><last>Jain</last></author>
       <author><first>Ashwini</first><last>Vaidya</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
-      <author><first>Tafseer</first><last>Ahmed Khan</last></author>
-      <author><first>Dipti Misra</first><last>Sharma</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
+      <author id="tafseer-ahmed"><first>Tafseer</first><last>Ahmed Khan</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></author>
       <author><first>James</first><last>Babani</last></author>
       <pages>47–55</pages>
       <url hash="84514039">W14-4206</url>
@@ -7295,7 +7295,7 @@
     <paper id="9">
       <title>Proper Name Machine Translation from <fixed-case>J</fixed-case>apanese to <fixed-case>J</fixed-case>apanese <fixed-case>S</fixed-case>ign <fixed-case>L</fixed-case>anguage</title>
       <author><first>Taro</first><last>Miyazaki</last></author>
-      <author><first>Naoto</first><last>Kato</last></author>
+      <author id="naoto-kato"><first>Naoto</first><last>Kato</last></author>
       <author><first>Seiki</first><last>Inoue</last></author>
       <author><first>Shuichi</first><last>Umeda</last></author>
       <author><first>Makiko</first><last>Azuma</last></author>
@@ -7308,7 +7308,7 @@
     </paper>
     <paper id="10">
       <title>Exploring cross-language statistical machine translation for closely related <fixed-case>S</fixed-case>outh <fixed-case>S</fixed-case>lavic languages</title>
-      <author><first>Maja</first><last>Popović</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
       <author><first>Nikola</first><last>Ljubešić</last></author>
       <pages>76–84</pages>
       <url hash="c298f293">W14-4210</url>
@@ -7321,8 +7321,8 @@
       <author><first>Anupam</first><last>Singh</last></author>
       <author><first>Nishkarsh</first><last>Shastri</last></author>
       <author><first>Megha</first><last>Jhunjhunwala</last></author>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
-      <author><first>Dipti Misra</first><last>Sharma</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></author>
       <pages>85–91</pages>
       <url hash="74332ca6">W14-4211</url>
       <doi>10.3115/v1/W14-4211</doi>
@@ -7330,7 +7330,7 @@
     </paper>
     <paper id="12">
       <title>A Comparison of <fixed-case>MT</fixed-case> Methods for Closely Related Languages: a Case Study on <fixed-case>C</fixed-case>zech - <fixed-case>S</fixed-case>lovak Language Pair</title>
-      <author><first>Vladislav</first><last>Kuboň</last></author>
+      <author id="vladislav-kubon"><first>Vladislav</first><last>Kuboň</last></author>
       <author><first>Jernej</first><last>Vičič</last></author>
       <pages>92–98</pages>
       <url hash="20df413e">W14-4212</url>
@@ -7341,7 +7341,7 @@
       <title>Handling <fixed-case>OOV</fixed-case> Words in Dialectal <fixed-case>A</fixed-case>rabic to <fixed-case>E</fixed-case>nglish Machine Translation</title>
       <author><first>Maryam</first><last>Aminian</last></author>
       <author><first>Mahmoud</first><last>Ghoneim</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>99–108</pages>
       <url hash="ab7fd03c">W14-4213</url>
       <doi>10.3115/v1/W14-4213</doi>
@@ -7354,7 +7354,7 @@
       <url hash="2ef73786">W14-43</url>
       <editor><first>Kallirroi</first><last>Georgila</last></editor>
       <editor><first>Matthew</first><last>Stone</last></editor>
-      <editor><first>Helen</first><last>Hastie</last></editor>
+      <editor id="helen-hastie"><first>Helen</first><last>Hastie</last></editor>
       <editor><first>Ani</first><last>Nenkova</last></editor>
       <doi>10.3115/v1/W14-43</doi>
       <publisher>Association for Computational Linguistics</publisher>
@@ -7370,7 +7370,7 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>K</fixed-case>eynote: Statistical Approaches to Open-domain Spoken Dialogue Systems</title>
-      <author><first>Steve</first><last>Young</last></author>
+      <author id="steve-young"><first>Steve</first><last>Young</last></author>
       <pages>1</pages>
       <url hash="b68cb3e3">W14-4301</url>
       <doi>10.3115/v1/W14-4301</doi>
@@ -7422,7 +7422,7 @@
     <paper id="6">
       <title>Adapting to Personality Over Time: Examining the Effectiveness of Dialogue Policy Progressions in Task-Oriented Interaction</title>
       <author><first>Alexandria</first><last>Vail</last></author>
-      <author><first>Kristy</first><last>Boyer</last></author>
+      <author id="kristy-boyer"><first>Kristy</first><last>Boyer</last></author>
       <pages>41–50</pages>
       <url hash="2df6d6fe">W14-4306</url>
       <doi>10.3115/v1/W14-4306</doi>
@@ -7472,8 +7472,8 @@
       <title><fixed-case>A</fixed-case>lex: Bootstrapping a Spoken Dialogue System for a New Domain by Real Users</title>
       <author><first>Ondřej</first><last>Dušek</last></author>
       <author><first>Ondřej</first><last>Plátek</last></author>
-      <author><first>Lukáš</first><last>Žilka</last></author>
-      <author><first>Filip</first><last>Jurčíček</last></author>
+      <author id="lukas-zilka"><first>Lukáš</first><last>Žilka</last></author>
+      <author id="filip-jurcicek"><first>Filip</first><last>Jurčíček</last></author>
       <pages>79–83</pages>
       <url hash="2fa4c7b7">W14-4311</url>
       <doi>10.3115/v1/W14-4311</doi>
@@ -7481,7 +7481,7 @@
     </paper>
     <paper id="12">
       <title><fixed-case>I</fixed-case>npro<fixed-case>TK</fixed-case>s: A Toolkit for Incremental Situated Processing</title>
-      <author><first>Casey</first><last>Kennington</last></author>
+      <author id="casey-kennington"><first>Casey</first><last>Kennington</last></author>
       <author><first>Spyros</first><last>Kousidis</last></author>
       <author><first>David</first><last>Schlangen</last></author>
       <pages>84–88</pages>
@@ -7495,7 +7495,7 @@
       <author><first>Shaohua</first><last>Yang</last></author>
       <author><first>Yu</first><last>Cheng</last></author>
       <author><first>Yunyi</first><last>Jia</last></author>
-      <author><first>Joyce</first><last>Chai</last></author>
+      <author id="joyce-chai"><first>Joyce</first><last>Chai</last></author>
       <author><first>Ning</first><last>Xi</last></author>
       <pages>89–97</pages>
       <url hash="709a06e9">W14-4313</url>
@@ -7506,7 +7506,7 @@
       <title>An easy method to make dialogue systems incremental</title>
       <author><first>Hatim</first><last>Khouzaimi</last></author>
       <author><first>Romain</first><last>Laroche</last></author>
-      <author><first>Fabrice</first><last>Lefevre</last></author>
+      <author id="fabrice-lefevre"><first>Fabrice</first><last>Lefevre</last></author>
       <pages>98–107</pages>
       <url hash="4c7951b1">W14-4314</url>
       <doi>10.3115/v1/W14-4314</doi>
@@ -7515,7 +7515,7 @@
     <paper id="15">
       <title>Free on-line speech recogniser based on <fixed-case>K</fixed-case>aldi <fixed-case>ASR</fixed-case> toolkit producing word posterior lattices</title>
       <author><first>Ondřej</first><last>Plátek</last></author>
-      <author><first>Filip</first><last>Jurčíček</last></author>
+      <author id="filip-jurcicek"><first>Filip</first><last>Jurčíček</last></author>
       <pages>108–112</pages>
       <url hash="7d6f9b65">W14-4315</url>
       <doi>10.3115/v1/W14-4315</doi>
@@ -7524,7 +7524,7 @@
     <paper id="16">
       <title>Combining Task and Dialogue Streams in Unsupervised Dialogue Act Models</title>
       <author><first>Aysu</first><last>Ezen-Can</last></author>
-      <author><first>Kristy</first><last>Boyer</last></author>
+      <author id="kristy-boyer"><first>Kristy</first><last>Boyer</last></author>
       <pages>113–122</pages>
       <url hash="8e4755f4">W14-4316</url>
       <doi>10.3115/v1/W14-4316</doi>
@@ -7532,7 +7532,7 @@
     </paper>
     <paper id="17">
       <title>Dialogue Act Modeling for Non-Visual Web Access</title>
-      <author><first>Vikas</first><last>Ashok</last></author>
+      <author id="vikas-ganjigunte-ashok"><first>Vikas</first><last>Ashok</last></author>
       <author><first>Yevgen</first><last>Borodin</last></author>
       <author><first>Svetlana</first><last>Stoyanchev</last></author>
       <author><first>IV</first><last>Ramakrishnan</last></author>
@@ -7569,8 +7569,8 @@
     </paper>
     <paper id="21">
       <title>The Role of Polarity in Inferring Acceptance and Rejection in Dialogue</title>
-      <author><first>Julian</first><last>Schlöder</last></author>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="julian-j-schloder"><first>Julian</first><last>Schlöder</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <pages>151–160</pages>
       <url hash="5e821167">W14-4321</url>
       <doi>10.3115/v1/W14-4321</doi>
@@ -7579,7 +7579,7 @@
     <paper id="22">
       <title>In-depth Exploitation of Noun and Verb Semantics to Identify Causation in Verb-Noun Pairs</title>
       <author><first>Mehwish</first><last>Riaz</last></author>
-      <author><first>Roxana</first><last>Girju</last></author>
+      <author id="roxana-girju"><first>Roxana</first><last>Girju</last></author>
       <pages>161–170</pages>
       <url hash="68e0bbcf">W14-4322</url>
       <doi>10.3115/v1/W14-4322</doi>
@@ -7590,7 +7590,7 @@
       <author><first>Reid</first><last>Swanson</last></author>
       <author><first>Elahe</first><last>Rahimtoroghi</last></author>
       <author><first>Thomas</first><last>Corcoran</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <pages>171–180</pages>
       <url hash="fa5d8661">W14-4323</url>
       <doi>10.3115/v1/W14-4323</doi>
@@ -7598,8 +7598,8 @@
     </paper>
     <paper id="24">
       <title>Evaluating a Spoken Dialogue System that Detects and Adapts to User Affective States</title>
-      <author><first>Diane</first><last>Litman</last></author>
-      <author><first>Katherine</first><last>Forbes-Riley</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
+      <author id="kate-forbes-riley"><first>Katherine</first><last>Forbes-Riley</last></author>
       <pages>181–185</pages>
       <url hash="8829bf0c">W14-4324</url>
       <doi>10.3115/v1/W14-4324</doi>
@@ -7608,7 +7608,7 @@
     <paper id="25">
       <title>Initiative Taking in Negotiation</title>
       <author><first>Elnaz</first><last>Nouri</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <pages>186–193</pages>
       <url hash="d5be6c11">W14-4325</url>
       <doi>10.3115/v1/W14-4325</doi>
@@ -7617,7 +7617,7 @@
     <paper id="26">
       <title>Knowledge Acquisition Strategies for Goal-Oriented Dialog Systems</title>
       <author><first>Aasish</first><last>Pappu</last></author>
-      <author><first>Alexander</first><last>Rudnicky</last></author>
+      <author id="alexander-rudnicky"><first>Alexander</first><last>Rudnicky</last></author>
       <pages>194–198</pages>
       <url hash="b8095a8a">W14-4326</url>
       <doi>10.3115/v1/W14-4326</doi>
@@ -7653,7 +7653,7 @@
     </paper>
     <paper id="30">
       <title>Aspectual Properties of Conversational Activities</title>
-      <author><first>Rebecca J.</first><last>Passonneau</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca J.</first><last>Passonneau</last></author>
       <author><first>Boxuan</first><last>Guan</last></author>
       <author><first>Cho Ho</first><last>Yeung</last></author>
       <author><first>Yuan</first><last>Du</last></author>
@@ -7665,12 +7665,12 @@
     </paper>
     <paper id="31">
       <title>Detecting Inappropriate Clarification Requests in Spoken Dialogue Systems</title>
-      <author><first>Alex</first><last>Liu</last></author>
+      <author id="alex-liu"><first>Alex</first><last>Liu</last></author>
       <author><first>Rose</first><last>Sloan</last></author>
       <author><first>Mei-Vern</first><last>Then</last></author>
       <author><first>Svetlana</first><last>Stoyanchev</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
-      <author><first>Elizabeth</first><last>Shriberg</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
+      <author id="elizabeth-shriberg"><first>Elizabeth</first><last>Shriberg</last></author>
       <pages>238–242</pages>
       <url hash="564b1662">W14-4331</url>
       <doi>10.3115/v1/W14-4331</doi>
@@ -7689,7 +7689,7 @@
     <paper id="33">
       <title><fixed-case>SAWDUST</fixed-case>: a Semi-Automated Wizard Dialogue Utterance Selection Tool for domain-independent large-domain dialogue</title>
       <author><first>Sudeep</first><last>Gandhe</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <pages>251–253</pages>
       <url hash="283b0aa2">W14-4333</url>
       <doi>10.3115/v1/W14-4333</doi>
@@ -7701,7 +7701,7 @@
       <author><first>David</first><last>DeVault</last></author>
       <author><first>Kallirroi</first><last>Georgila</last></author>
       <author><first>Ron</first><last>Artstein</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <author><first>Louis-Philippe</first><last>Morency</last></author>
       <pages>254–256</pages>
       <url hash="bdd5196d">W14-4334</url>
@@ -7710,16 +7710,16 @@
     </paper>
     <paper id="35">
       <title><fixed-case>MVA</fixed-case>: The Multimodal Virtual Assistant</title>
-      <author><first>Michael</first><last>Johnston</last></author>
+      <author id="michael-johnston"><first>Michael</first><last>Johnston</last></author>
       <author><first>John</first><last>Chen</last></author>
       <author><first>Patrick</first><last>Ehlen</last></author>
       <author><first>Hyuckchul</first><last>Jung</last></author>
       <author><first>Jay</first><last>Lieske</last></author>
       <author><first>Aarthi</first><last>Reddy</last></author>
-      <author><first>Ethan</first><last>Selfridge</last></author>
+      <author id="ethan-selfridge"><first>Ethan</first><last>Selfridge</last></author>
       <author><first>Svetlana</first><last>Stoyanchev</last></author>
       <author><first>Brant</first><last>Vasilieff</last></author>
-      <author><first>Jay</first><last>Wilpon</last></author>
+      <author id="jay-wilpon"><first>Jay</first><last>Wilpon</last></author>
       <pages>257–259</pages>
       <url hash="e83fb50e">W14-4335</url>
       <doi>10.3115/v1/W14-4335</doi>
@@ -7734,8 +7734,8 @@
       <author><first>Catherine</first><last>Breslin</last></author>
       <author><first>Heriberto</first><last>Cuayáhuitl</last></author>
       <author><first>Nina</first><last>Dethlefs</last></author>
-      <author><first>Milica</first><last>Gašić</last></author>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gašić</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <author><first>Oliver</first><last>Lemon</last></author>
       <author><first>Xingkun</first><last>Liu</last></author>
       <author><first>Peter</first><last>Mika</last></author>
@@ -7743,11 +7743,11 @@
       <author><first>Tim</first><last>Potter</last></author>
       <author><first>Verena</first><last>Rieser</last></author>
       <author><first>Blaise</first><last>Thomson</last></author>
-      <author><first>Pirros</first><last>Tsiakoulis</last></author>
+      <author id="pirros-tsiakoulis"><first>Pirros</first><last>Tsiakoulis</last></author>
       <author><first>Yves</first><last>Vanrompay</last></author>
       <author><first>Boris</first><last>Villazon-Terrazas</last></author>
       <author><first>Majid</first><last>Yazdani</last></author>
-      <author><first>Steve</first><last>Young</last></author>
+      <author id="steve-young"><first>Steve</first><last>Young</last></author>
       <author><first>Yanchao</first><last>Yu</last></author>
       <pages>260–262</pages>
       <url hash="2822937c">W14-4336</url>
@@ -7758,7 +7758,7 @@
       <title>The Second Dialog State Tracking Challenge</title>
       <author><first>Matthew</first><last>Henderson</last></author>
       <author><first>Blaise</first><last>Thomson</last></author>
-      <author><first>Jason D.</first><last>Williams</last></author>
+      <author id="jason-d-williams"><first>Jason D.</first><last>Williams</last></author>
       <pages>263–272</pages>
       <url hash="2454730c">W14-4337</url>
       <doi>10.3115/v1/W14-4337</doi>
@@ -7777,7 +7777,7 @@
     </paper>
     <paper id="39">
       <title>Web-style ranking and <fixed-case>SLU</fixed-case> combination for dialog state tracking</title>
-      <author><first>Jason D.</first><last>Williams</last></author>
+      <author id="jason-d-williams"><first>Jason D.</first><last>Williams</last></author>
       <pages>282–291</pages>
       <url hash="b1070055">W14-4339</url>
       <doi>10.3115/v1/W14-4339</doi>
@@ -7787,7 +7787,7 @@
       <title>Word-Based Dialog State Tracking with Recurrent Neural Networks</title>
       <author><first>Matthew</first><last>Henderson</last></author>
       <author><first>Blaise</first><last>Thomson</last></author>
-      <author><first>Steve</first><last>Young</last></author>
+      <author id="steve-young"><first>Steve</first><last>Young</last></author>
       <pages>292–299</pages>
       <url hash="73f3dae4">W14-4340</url>
       <doi>10.3115/v1/W14-4340</doi>
@@ -7795,7 +7795,7 @@
     </paper>
     <paper id="41">
       <title>Comparative Error Analysis of Dialog State Tracking</title>
-      <author><first>Ronnie</first><last>Smith</last></author>
+      <author id="ronnie-w-smith"><first>Ronnie</first><last>Smith</last></author>
       <pages>300–309</pages>
       <url hash="35991dba">W14-4341</url>
       <doi>10.3115/v1/W14-4341</doi>
@@ -7803,7 +7803,7 @@
     </paper>
     <paper id="42">
       <title>Extrinsic Evaluation of Dialog State Tracking and Predictive Metrics for Dialog Policy Optimization</title>
-      <author><first>Sungjin</first><last>Lee</last></author>
+      <author id="sungjin-lee"><first>Sungjin</first><last>Lee</last></author>
       <pages>310–317</pages>
       <url hash="ec7e850c">W14-4342</url>
       <doi>10.3115/v1/W14-4342</doi>
@@ -7833,7 +7833,7 @@
     <paper id="45">
       <title>Sequential Labeling for Tracking Dynamic Dialog States</title>
       <author><first>Seokhwan</first><last>Kim</last></author>
-      <author><first>Rafael E.</first><last>Banchs</last></author>
+      <author id="rafael-e-banchs"><first>Rafael E.</first><last>Banchs</last></author>
       <pages>332–336</pages>
       <url hash="1dab20bf">W14-4345</url>
       <doi>10.3115/v1/W14-4345</doi>
@@ -7845,7 +7845,7 @@
       <booktitle>Proceedings of the 8th International Natural Language Generation Conference (<fixed-case>INLG</fixed-case>)</booktitle>
       <url hash="3f3784f5">W14-44</url>
       <editor><first>Margaret</first><last>Mitchell</last></editor>
-      <editor><first>Kathleen</first><last>McCoy</last></editor>
+      <editor id="kathleen-f-mccoy"><first>Kathleen</first><last>McCoy</last></editor>
       <editor id="david-d-mcdonald"><first>David</first><last>McDonald</last></editor>
       <editor><first>Aoife</first><last>Cahill</last></editor>
       <doi>10.3115/v1/W14-44</doi>
@@ -7862,7 +7862,7 @@
     </frontmatter>
     <paper id="1">
       <title>A Case Study: <fixed-case>NLG</fixed-case> meeting Weather Industry Demand for Quality and Quantity of Textual Weather Forecasts</title>
-      <author><first>Somayajulu</first><last>Sripada</last></author>
+      <author id="somayajulu-sripada"><first>Somayajulu</first><last>Sripada</last></author>
       <author><first>Neil</first><last>Burnett</last></author>
       <author><first>Ross</first><last>Turner</last></author>
       <author><first>John</first><last>Mastin</last></author>
@@ -7875,10 +7875,10 @@
     <paper id="2">
       <title><fixed-case>P</fixed-case>atient<fixed-case>N</fixed-case>arr: Towards generating patient-centric summaries of hospital stays</title>
       <author><first>Barbara</first><last>Di Eugenio</last></author>
-      <author><first>Andrew</first><last>Boyd</last></author>
+      <author id="andrew-boyd"><first>Andrew</first><last>Boyd</last></author>
       <author><first>Camillo</first><last>Lugaresi</last></author>
       <author><first>Abhinaya</first><last>Balasubramanian</last></author>
-      <author><first>Gail</first><last>Keenan</last></author>
+      <author id="gail-m-keenan"><first>Gail</first><last>Keenan</last></author>
       <author><first>Mike</first><last>Burton</last></author>
       <author><first>Tamara</first><last>Goncalves Rezende Macieira</last></author>
       <author><first>Jianrong</first><last>Li</last></author>
@@ -7900,7 +7900,7 @@
     </paper>
     <paper id="4">
       <title>Text simplification using synchronous dependency grammars: Generalising automatically harvested rules</title>
-      <author><first>Mandya</first><last>Angrosh</last></author>
+      <author id="angrosh-mandya"><first>Mandya</first><last>Angrosh</last></author>
       <author><first>Advaith</first><last>Siddharthan</last></author>
       <pages>16–25</pages>
       <url hash="36b0b0eb">W14-4404</url>
@@ -7931,7 +7931,7 @@
       <author><first>Tatsuro</first><last>Oya</last></author>
       <author><first>Yashar</first><last>Mehdad</last></author>
       <author><first>Giuseppe</first><last>Carenini</last></author>
-      <author><first>Raymond</first><last>Ng</last></author>
+      <author id="raymond-ng"><first>Raymond</first><last>Ng</last></author>
       <pages>45–53</pages>
       <url hash="f30d5839">W14-4407</url>
       <doi>10.3115/v1/W14-4407</doi>
@@ -7939,9 +7939,9 @@
     </paper>
     <paper id="8">
       <title>A Hybrid Approach to Multi-document Summarization of Opinions in Reviews</title>
-      <author><first>Giuseppe</first><last>Di Fabbrizio</last></author>
-      <author><first>Amanda</first><last>Stent</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="giuseppe-di-fabbrizio"><first>Giuseppe</first><last>Di Fabbrizio</last></author>
+      <author id="amanda-stent"><first>Amanda</first><last>Stent</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <pages>54–63</pages>
       <url hash="84710e69">W14-4408</url>
       <doi>10.3115/v1/W14-4408</doi>
@@ -7951,7 +7951,7 @@
       <title>Adapting Graph Summaries to the Users’ Reading Levels</title>
       <author><first>Priscilla</first><last>Moraes</last></author>
       <author><first>Kathleen</first><last>McCoy</last></author>
-      <author><first>Sandra</first><last>Carberry</last></author>
+      <author id="sandra-carberry"><first>Sandra</first><last>Carberry</last></author>
       <pages>64–73</pages>
       <url hash="800fd325">W14-4409</url>
       <doi>10.3115/v1/W14-4409</doi>
@@ -7959,7 +7959,7 @@
     </paper>
     <paper id="10">
       <title>Experimental Design to Improve Topic Analysis Based Summarization</title>
-      <author><first>John</first><last>Miller</last></author>
+      <author id="john-miller"><first>John</first><last>Miller</last></author>
       <author><first>Kathleen</first><last>McCoy</last></author>
       <pages>74–82</pages>
       <url hash="3bc9a4ac">W14-4410</url>
@@ -7969,7 +7969,7 @@
     <paper id="11">
       <title>Towards a Description of Symbolic Maps</title>
       <author><first>Rumiya</first><last>Izgalieva</last></author>
-      <author><first>Daniel</first><last>Vale</last></author>
+      <author id="daniel-couto-vale"><first>Daniel</first><last>Vale</last></author>
       <author><first>Elisa</first><last>Vales</last></author>
       <pages>83–92</pages>
       <url hash="54b6cb83">W14-4411</url>
@@ -7979,7 +7979,7 @@
     <paper id="12">
       <title>Adapting <fixed-case>S</fixed-case>imple<fixed-case>NLG</fixed-case> for <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese realisation</title>
       <author><first>Rodrigo</first><last>de Oliveira</last></author>
-      <author><first>Somayajulu</first><last>Sripada</last></author>
+      <author id="somayajulu-sripada"><first>Somayajulu</first><last>Sripada</last></author>
       <pages>93–94</pages>
       <url hash="4eb2ef2d">W14-4412</url>
       <doi>10.3115/v1/W14-4412</doi>
@@ -7990,7 +7990,7 @@
       <author><first>Priscilla</first><last>Moraes</last></author>
       <author><first>Gabriel</first><last>Sina</last></author>
       <author><first>Kathleen</first><last>McCoy</last></author>
-      <author><first>Sandra</first><last>Carberry</last></author>
+      <author id="sandra-carberry"><first>Sandra</first><last>Carberry</last></author>
       <pages>95–98</pages>
       <url hash="29a1b905">W14-4413</url>
       <doi>10.3115/v1/W14-4413</doi>
@@ -7999,7 +7999,7 @@
     <paper id="14">
       <title>Two-Stage Stochastic Email Synthesizer</title>
       <author><first>Yun-Nung</first><last>Chen</last></author>
-      <author><first>Alexander</first><last>Rudnicky</last></author>
+      <author id="alexander-rudnicky"><first>Alexander</first><last>Rudnicky</last></author>
       <pages>99–102</pages>
       <url hash="92afea76">W14-4414</url>
       <doi>10.3115/v1/W14-4414</doi>
@@ -8009,7 +8009,7 @@
       <title>A Framework for Health Behavior Change using Companionable Robots</title>
       <author><first>Bandita</first><last>Sarma</last></author>
       <author><first>Amitava</first><last>Das</last></author>
-      <author><first>Rodney</first><last>Nielsen</last></author>
+      <author id="rodney-nielsen"><first>Rodney</first><last>Nielsen</last></author>
       <pages>103–107</pages>
       <url hash="1fa36451">W14-4415</url>
       <doi>10.3115/v1/W14-4415</doi>
@@ -8028,9 +8028,9 @@
     <paper id="17">
       <title>Determining Content for Unknown Users: Lessons from the <fixed-case>M</fixed-case>ink<fixed-case>A</fixed-case>pp Case Study</title>
       <author><first>Gemma</first><last>Webster</last></author>
-      <author><first>Chris</first><last>Mellish</last></author>
-      <author><first>Somayajulu G.</first><last>Sripada</last></author>
-      <author><first>Rene</first><last>Van Der Wal</last></author>
+      <author id="chris-mellish"><first>Chris</first><last>Mellish</last></author>
+      <author id="somayajulu-sripada"><first>Somayajulu G.</first><last>Sripada</last></author>
+      <author id="rene-van-der-wal"><first>Rene</first><last>Van Der Wal</last></author>
       <author><first>Koen</first><last>Arts</last></author>
       <author><first>Yolanda</first><last>Melero</last></author>
       <author><first>Xavier</first><last>Lambin</last></author>
@@ -8074,7 +8074,7 @@
     <paper id="21">
       <title>Latent User Models for Online River Information Tailoring</title>
       <author><first>Xiwu</first><last>Han</last></author>
-      <author><first>Somayajulu</first><last>Sripada</last></author>
+      <author id="somayajulu-sripada"><first>Somayajulu</first><last>Sripada</last></author>
       <author><first>Kit</first><last>Macleod</last></author>
       <author><first>Antonio</first><last>Ioris</last></author>
       <pages>133–137</pages>
@@ -8085,7 +8085,7 @@
     <paper id="22">
       <title>Multi-adaptive Natural Language Generation using Principal Component Regression</title>
       <author><first>Dimitra</first><last>Gkatzia</last></author>
-      <author><first>Helen</first><last>Hastie</last></author>
+      <author id="helen-hastie"><first>Helen</first><last>Hastie</last></author>
       <author><first>Oliver</first><last>Lemon</last></author>
       <pages>138–142</pages>
       <url hash="f215865f">W14-4422</url>
@@ -8094,8 +8094,8 @@
     </paper>
     <paper id="23">
       <title><fixed-case>TBI</fixed-case>-Doc: Generating Patient &amp; Clinician Reports from Brain Imaging Data</title>
-      <author><first>Pamela</first><last>Jordan</last></author>
-      <author><first>Nancy</first><last>Green</last></author>
+      <author id="pamela-jordan"><first>Pamela</first><last>Jordan</last></author>
+      <author id="nancy-green"><first>Nancy</first><last>Green</last></author>
       <author><first>Chistopher</first><last>Thomas</last></author>
       <author><first>Susan</first><last>Holm</last></author>
       <pages>143–146</pages>
@@ -8105,7 +8105,7 @@
     </paper>
     <paper id="24">
       <title>Towards Surface Realization with <fixed-case>CCG</fixed-case>s Induced from Dependencies</title>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <pages>147–151</pages>
       <url hash="87c84cb2">W14-4424</url>
       <doi>10.3115/v1/W14-4424</doi>
@@ -8114,7 +8114,7 @@
     <paper id="25">
       <title>Two-Stage Stochastic Natural Language Generation for Email Synthesis by Modeling Sender Style and Topic Structure</title>
       <author><first>Yun-Nung</first><last>Chen</last></author>
-      <author><first>Alexander</first><last>Rudnicky</last></author>
+      <author id="alexander-rudnicky"><first>Alexander</first><last>Rudnicky</last></author>
       <pages>152–156</pages>
       <url hash="858effe4">W14-4425</url>
       <doi>10.3115/v1/W14-4425</doi>
@@ -8150,7 +8150,7 @@
     <paper id="2">
       <title>Mining temporal footprints from <fixed-case>W</fixed-case>ikipedia</title>
       <author><first>Michele</first><last>Filannino</last></author>
-      <author><first>Goran</first><last>Nenadic</last></author>
+      <author id="goran-nenadic"><first>Goran</first><last>Nenadic</last></author>
       <pages>7–13</pages>
       <url hash="4002495a">W14-4502</url>
       <doi>10.3115/v1/W14-4502</doi>
@@ -8327,7 +8327,7 @@
     </paper>
     <paper id="9">
       <title>gdbank: The beginnings of a corpus of dependency structures and type-logical grammar in <fixed-case>S</fixed-case>cottish <fixed-case>G</fixed-case>aelic</title>
-      <author><first>Colin</first><last>Batchelor</last></author>
+      <author id="colin-batchelor"><first>Colin</first><last>Batchelor</last></author>
       <pages>60–65</pages>
       <url hash="395f7b3a">W14-4609</url>
       <doi>10.3115/v1/W14-4609</doi>
@@ -8355,7 +8355,7 @@
     <paper id="12">
       <title>Subsegmental language detection in <fixed-case>C</fixed-case>eltic language text</title>
       <author><first>Akshay</first><last>Minocha</last></author>
-      <author><first>Francis</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last></author>
       <pages>76–80</pages>
       <url hash="20333ad4">W14-4612</url>
       <doi>10.3115/v1/W14-4612</doi>
@@ -8401,7 +8401,7 @@
     </paper>
     <paper id="3">
       <title>Deep Learning from Web-Scale Corpora for Better Dictionary Interfaces</title>
-      <author><first>Pavel</first><last>Smrz</last></author>
+      <author id="pavel-smrz"><first>Pavel</first><last>Smrz</last></author>
       <author><first>Lubomir</first><last>Otrusina</last></author>
       <pages>22–30</pages>
       <url hash="9a8abd45">W14-4703</url>
@@ -8411,8 +8411,8 @@
     <paper id="4">
       <title>Exploring the use of word embeddings and random walks on <fixed-case>W</fixed-case>ikipedia for the <fixed-case>C</fixed-case>og<fixed-case>A</fixed-case>lex shared task</title>
       <author><first>Josu</first><last>Goikoetxea</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>Aitor</first><last>Soroa</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
+      <author id="aitor-soroa"><first>Aitor</first><last>Soroa</last></author>
       <pages>31–34</pages>
       <url hash="53bb1615">W14-4704</url>
       <doi>10.3115/v1/W14-4704</doi>
@@ -8452,7 +8452,7 @@
     </paper>
     <paper id="8">
       <title>Retrieving Word Associations with a Simple Neighborhood Algorithm in a Graph-based Resource</title>
-      <author><first>Gemma</first><last>Bel-Enguix</last></author>
+      <author id="gemma-bel-enguix"><first>Gemma</first><last>Bel-Enguix</last></author>
       <pages>60–63</pages>
       <url hash="1f384459">W14-4708</url>
       <doi>10.3115/v1/W14-4708</doi>
@@ -8469,8 +8469,8 @@
     </paper>
     <paper id="10">
       <title><fixed-case>W</fixed-case>ord<fixed-case>F</fixed-case>inder</title>
-      <author><first>Catalin</first><last>Mititelu</last></author>
-      <author><first>Verginica</first><last>Barbu Mititelu</last></author>
+      <author id="catalin-mititelu"><first>Catalin</first><last>Mititelu</last></author>
+      <author id="verginica-barbu-mititelu"><first>Verginica</first><last>Barbu Mititelu</last></author>
       <pages>68–74</pages>
       <url hash="824de0e2">W14-4710</url>
       <doi>10.3115/v1/W14-4710</doi>
@@ -8495,10 +8495,10 @@
     </paper>
     <paper id="13">
       <title>Jibiki-<fixed-case>LINKS</fixed-case>: a tool between traditional dictionaries and lexical networks for modelling lexical resources</title>
-      <author><first>Ying</first><last>Zhang</last></author>
-      <author><first>Mathieu</first><last>Mangeot</last></author>
-      <author><first>Valérie</first><last>Bellynck</last></author>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="ying-zhang"><first>Ying</first><last>Zhang</last></author>
+      <author id="mathieu-mangeot"><first>Mathieu</first><last>Mangeot</last></author>
+      <author id="valerie-bellynck"><first>Valérie</first><last>Bellynck</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <pages>87–98</pages>
       <url hash="85fb4217">W14-4713</url>
       <doi>10.3115/v1/W14-4713</doi>
@@ -8526,7 +8526,7 @@
     <paper id="16">
       <title>A Computational Approach to Generate a Sensorial Lexicon</title>
       <author><first>Serra Sinem</first><last>Tekiroğlu</last></author>
-      <author><first>Gözde</first><last>Özbal</last></author>
+      <author id="gozde-ozbal"><first>Gözde</first><last>Özbal</last></author>
       <author><first>Carlo</first><last>Strapparava</last></author>
       <pages>114–125</pages>
       <url hash="cd4eed34">W14-4716</url>
@@ -8535,7 +8535,7 @@
     </paper>
     <paper id="17">
       <title>Database Design of an Online <fixed-case>E</fixed-case>-Learning Tool of <fixed-case>C</fixed-case>hinese Classifiers</title>
-      <author><first>Helena</first><last>Gao</last></author>
+      <author id="helena-hong-gao"><first>Helena</first><last>Gao</last></author>
       <pages>126–137</pages>
       <url hash="fb747751">W14-4717</url>
       <doi>10.3115/v1/W14-4717</doi>
@@ -8545,7 +8545,7 @@
       <title>Default Physical Measurements in <fixed-case>SUMO</fixed-case></title>
       <author><first>Francesca</first><last>Quattri</last></author>
       <author><first>Adam</first><last>Pease</last></author>
-      <author><first>John P.</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></author>
       <pages>138–143</pages>
       <url hash="e833dec4">W14-4718</url>
       <doi>10.3115/v1/W14-4718</doi>
@@ -8565,7 +8565,7 @@
       <title>A Lexical Network with a Morphological Model in It</title>
       <author><first>Nabil</first><last>Gader</last></author>
       <author><first>Aurore</first><last>Koehl</last></author>
-      <author><first>Alain</first><last>Polguère</last></author>
+      <author id="alain-polguere"><first>Alain</first><last>Polguère</last></author>
       <pages>154–165</pages>
       <url hash="85c798a9">W14-4720</url>
       <doi>10.3115/v1/W14-4720</doi>
@@ -8575,7 +8575,7 @@
       <title>Dimensions of Metaphorical Meaning</title>
       <author><first>Andrew</first><last>Gargett</last></author>
       <author><first>Josef</first><last>Ruppenhofer</last></author>
-      <author><first>John</first><last>Barnden</last></author>
+      <author id="john-barnden"><first>John</first><last>Barnden</last></author>
       <pages>166–173</pages>
       <url hash="976ebd43">W14-4721</url>
       <doi>10.3115/v1/W14-4721</doi>
@@ -8592,7 +8592,7 @@
     </paper>
     <paper id="23">
       <title>Frames and terminology: representing predicative terms in the field of the environment</title>
-      <author><first>Marie-Claude</first><last>L’ Homme</last></author>
+      <author id="marie-claude-lhomme"><first>Marie-Claude</first><last>L’ Homme</last></author>
       <author><first>Benoît</first><last>Robichaud</last></author>
       <pages>186–197</pages>
       <url hash="e7f755ac">W14-4723</url>
@@ -8601,10 +8601,10 @@
     </paper>
     <paper id="24">
       <title>Modelling the Semantics of Adjectives in the Ontology-Lexicon Interface</title>
-      <author><first>John P.</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></author>
       <author><first>Francesca</first><last>Quattri</last></author>
       <author><first>Christina</first><last>Unger</last></author>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <pages>198–209</pages>
       <url hash="25c2604c">W14-4724</url>
       <doi>10.3115/v1/W14-4724</doi>
@@ -8613,14 +8613,14 @@
     <paper id="25">
       <title>Discovering Conceptual Metaphors using Source Domain Spaces</title>
       <author><first>Samira</first><last>Shaikh</last></author>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
       <author><first>Kit</first><last>Cho</last></author>
       <author><first>Ting</first><last>Liu</last></author>
-      <author><first>George Aaron</first><last>Broadwell</last></author>
-      <author><first>Laurie</first><last>Feldman</last></author>
-      <author><first>Sarah</first><last>Taylor</last></author>
+      <author id="george-aaron-broadwell"><first>George Aaron</first><last>Broadwell</last></author>
+      <author id="laurie-feldman"><first>Laurie</first><last>Feldman</last></author>
+      <author id="sarah-taylor"><first>Sarah</first><last>Taylor</last></author>
       <author><first>Boris</first><last>Yamrom</last></author>
-      <author><first>Ching-Sheng</first><last>Lin</last></author>
+      <author id="ching-sheng-lin"><first>Ching-Sheng</first><last>Lin</last></author>
       <author><first>Ning</first><last>Sa</last></author>
       <author><first>Ignacio</first><last>Cases</last></author>
       <author><first>Yuliya</first><last>Peshkova</last></author>
@@ -8645,7 +8645,7 @@
       <url hash="e99add0d">W14-48</url>
       <editor><first>Patrick</first><last>Drouin</last></editor>
       <editor><first>Natalia</first><last>Grabar</last></editor>
-      <editor><first>Thierry</first><last>Hamon</last></editor>
+      <editor id="thierry-hamon"><first>Thierry</first><last>Hamon</last></editor>
       <editor><first>Kyo</first><last>Kageura</last></editor>
       <doi>10.3115/v1/W14-48</doi>
       <publisher>Association for Computational Linguistics and Dublin City University</publisher>
@@ -8669,9 +8669,9 @@
     </paper>
     <paper id="2">
       <title>Assigning Terms to Domains by Document Classification</title>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <author><first>Emma</first><last>Barker</last></author>
-      <author><first>Monica Lestari</first><last>Paramita</last></author>
+      <author id="monica-lestari-paramita"><first>Monica Lestari</first><last>Paramita</last></author>
       <author><first>Ahmet</first><last>Aker</last></author>
       <pages>11–21</pages>
       <url hash="dc0eff70">W14-4802</url>
@@ -8680,8 +8680,8 @@
     </paper>
     <paper id="3">
       <title>Identification of Bilingual Terms from Monolingual Documents for Statistical Machine Translation</title>
-      <author><first>Mihael</first><last>Arcan</last></author>
-      <author><first>Claudio</first><last>Giuliano</last></author>
+      <author id="mihael-arcan"><first>Mihael</first><last>Arcan</last></author>
+      <author id="claudio-giuliano"><first>Claudio</first><last>Giuliano</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <author><first>Paul</first><last>Buitelaar</last></author>
       <pages>22–31</pages>
@@ -8691,7 +8691,7 @@
     </paper>
     <paper id="4">
       <title>Terminology Questions in Texts Authored by Patients</title>
-      <author><first>Noemie</first><last>Elhadad</last></author>
+      <author id="noemie-elhadad"><first>Noemie</first><last>Elhadad</last></author>
       <pages>32</pages>
       <url hash="b17cee42">W14-4804</url>
       <doi>10.3115/v1/W14-4804</doi>
@@ -8699,7 +8699,7 @@
     </paper>
     <paper id="5">
       <title><fixed-case>NPMI</fixed-case> Driven Recognition of Nested Terms</title>
-      <author><first>Malgorzata</first><last>Marciniak</last></author>
+      <author id="malgorzata-marciniak"><first>Malgorzata</first><last>Marciniak</last></author>
       <author><first>Agnieszka</first><last>Mykowiecka</last></author>
       <pages>33–41</pages>
       <url hash="dd5780f7">W14-4805</url>
@@ -8718,7 +8718,7 @@
     </paper>
     <paper id="7">
       <title>The <fixed-case>ACL</fixed-case> <fixed-case>RD</fixed-case>-<fixed-case>TEC</fixed-case>: A Dataset for Benchmarking Terminology Extraction and Classification in Computational Linguistics</title>
-      <author><first>Behrang</first><last>Q. Zadeh</last></author>
+      <author id="behrang-qasemizadeh"><first>Behrang</first><last>Q. Zadeh</last></author>
       <author><first>Siegfried</first><last>Handschuh</last></author>
       <pages>52–63</pages>
       <url hash="de9e2eea">W14-4807</url>
@@ -8737,7 +8737,7 @@
     <paper id="9">
       <title>A comparative User Evaluation of Terminology Management Tools for Interpreters</title>
       <author><first>Hernani</first><last>Costa</last></author>
-      <author><first>Gloria</first><last>Corpas Pastor</last></author>
+      <author id="gloria-corpas-pastor"><first>Gloria</first><last>Corpas Pastor</last></author>
       <author><first>Isabel</first><last>Durán Muñoz</last></author>
       <pages>68–76</pages>
       <url hash="5b3873e1">W14-4809</url>
@@ -8797,7 +8797,7 @@
     <meta>
       <booktitle>Proceedings of <fixed-case>LAW</fixed-case> <fixed-case>VIII</fixed-case> - The 8th Linguistic Annotation Workshop</booktitle>
       <url hash="9da17ec7">W14-49</url>
-      <editor><first>Lori</first><last>Levin</last></editor>
+      <editor id="lori-levin"><first>Lori</first><last>Levin</last></editor>
       <editor><first>Manfred</first><last>Stede</last></editor>
       <doi>10.3115/v1/W14-49</doi>
       <publisher>Association for Computational Linguistics and Dublin City University</publisher>
@@ -8847,7 +8847,7 @@
     <paper id="5">
       <title>Sentence diagrams: their evaluation and combination</title>
       <author><first>Jirka</first><last>Hana</last></author>
-      <author><first>Barbora</first><last>Hladká</last></author>
+      <author id="barbora-hladka"><first>Barbora</first><last>Hladká</last></author>
       <author><first>Ivana</first><last>Lukšová</last></author>
       <pages>38–47</pages>
       <url hash="c0866900">W14-4905</url>
@@ -8858,7 +8858,7 @@
       <title>Finding your “Inner-Annotator”: An Experiment in Annotator Independence for Rating Discourse Coherence Quality in Essays</title>
       <author><first>Jill</first><last>Burstein</last></author>
       <author><first>Swapna</first><last>Somasundaran</last></author>
-      <author><first>Martin</first><last>Chodorow</last></author>
+      <author id="martin-chodorow"><first>Martin</first><last>Chodorow</last></author>
       <pages>48–53</pages>
       <url hash="2a284859">W14-4906</url>
       <doi>10.3115/v1/W14-4906</doi>
@@ -8868,7 +8868,7 @@
       <title>Optimizing annotation efforts to build reliable annotated corpora for training statistical models</title>
       <author><first>Cyril</first><last>Grouin</last></author>
       <author><first>Thomas</first><last>Lavergne</last></author>
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <pages>54–58</pages>
       <url hash="d9b25a7b">W14-4907</url>
       <doi>10.3115/v1/W14-4907</doi>
@@ -8876,7 +8876,7 @@
     </paper>
     <paper id="8">
       <title>A Web-based Geo-resolution Annotation and Evaluation Tool</title>
-      <author><first>Beatrice</first><last>Alex</last></author>
+      <author id="beatrice-alex"><first>Beatrice</first><last>Alex</last></author>
       <author><first>Kate</first><last>Byrne</last></author>
       <author><first>Claire</first><last>Grover</last></author>
       <author><first>Richard</first><last>Tobin</last></author>
@@ -8888,7 +8888,7 @@
     <paper id="9">
       <title>Annotating Uncertainty in <fixed-case>H</fixed-case>ungarian Webtext</title>
       <author><first>Veronika</first><last>Vincze</last></author>
-      <author><first>Katalin Ilona</first><last>Simkó</last></author>
+      <author id="katalin-ilona-simko"><first>Katalin Ilona</first><last>Simkó</last></author>
       <author><first>Viktor</first><last>Varga</last></author>
       <pages>64–69</pages>
       <url hash="1005ab4d">W14-4909</url>
@@ -8900,7 +8900,7 @@
       <author><first>Paul</first><last>Reisert</last></author>
       <author><first>Junta</first><last>Mizuno</last></author>
       <author><first>Miwa</first><last>Kanno</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Kentaro</first><last>Inui</last></author>
       <pages>70–74</pages>
       <url hash="dc861646">W14-4910</url>
@@ -8959,7 +8959,7 @@
     </paper>
     <paper id="16">
       <title>Annotating Discourse Connectives in Spoken <fixed-case>T</fixed-case>urkish</title>
-      <author><first>Isin</first><last>Demirşahin</last></author>
+      <author id="isin-demirsahin"><first>Isin</first><last>Demirşahin</last></author>
       <author><first>Deniz</first><last>Zeyrek</last></author>
       <pages>105–109</pages>
       <url hash="ff94964e">W14-4916</url>
@@ -8989,11 +8989,11 @@
     <paper id="19">
       <title>Towards Automatic Annotation of Clinical Decision-Making Style</title>
       <author><first>Limor</first><last>Hochberg</last></author>
-      <author><first>Cecilia</first><last>Ovesdotter Alm</last></author>
+      <author id="cecilia-ovesdotter-alm"><first>Cecilia</first><last>Ovesdotter Alm</last></author>
       <author><first>Esa M.</first><last>Rantanen</last></author>
       <author><first>Qi</first><last>Yu</last></author>
       <author><first>Caroline M.</first><last>DeLong</last></author>
-      <author><first>Anne</first><last>Haake</last></author>
+      <author id="anne-haake"><first>Anne</first><last>Haake</last></author>
       <pages>129–138</pages>
       <url hash="3e058d61">W14-4919</url>
       <doi>10.3115/v1/W14-4919</doi>
@@ -9002,7 +9002,7 @@
     <paper id="20">
       <title>Interactive Annotation for Event Modality in Modern Standard and <fixed-case>E</fixed-case>gyptian <fixed-case>A</fixed-case>rabic Tweets</title>
       <author><first>Rania</first><last>Al-Sabbagh</last></author>
-      <author><first>Roxana</first><last>Girju</last></author>
+      <author id="roxana-girju"><first>Roxana</first><last>Girju</last></author>
       <author><first>Jana</first><last>Diesner</last></author>
       <pages>139–148</pages>
       <url hash="944c8030">W14-4920</url>
@@ -9021,7 +9021,7 @@
     <paper id="22">
       <title>Focus Annotation in Reading Comprehension Data</title>
       <author><first>Ramon</first><last>Ziai</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <pages>159–168</pages>
       <url hash="f47f5dc0">W14-4922</url>
       <doi>10.3115/v1/W14-4922</doi>
@@ -9033,7 +9033,7 @@
       <booktitle>Proceedings of the <fixed-case>INLG</fixed-case> and <fixed-case>SIGDIAL</fixed-case> 2014 Joint Session</booktitle>
       <url hash="6969cfe0">W14-50</url>
       <editor><first>Margaret</first><last>Mitchell</last></editor>
-      <editor><first>Kathleen</first><last>McCoy</last></editor>
+      <editor id="kathleen-f-mccoy"><first>Kathleen</first><last>McCoy</last></editor>
       <editor id="david-d-mcdonald"><first>David</first><last>McDonald</last></editor>
       <editor><first>Aoife</first><last>Cahill</last></editor>
       <doi>10.3115/v1/W14-50</doi>
@@ -9069,7 +9069,7 @@
     <paper id="3">
       <title>Crowdsourcing Language Generation Templates for Dialogue Systems</title>
       <author><first>Margaret</first><last>Mitchell</last></author>
-      <author><first>Dan</first><last>Bohus</last></author>
+      <author id="dan-bohus"><first>Dan</first><last>Bohus</last></author>
       <author><first>Ece</first><last>Kamar</last></author>
       <pages>172–180</pages>
       <url hash="fa3b95a0">W14-5003</url>
@@ -9081,9 +9081,9 @@
     <meta>
       <booktitle>Proceedings of the 11th International Conference on Natural Language Processing</booktitle>
       <url hash="6c77eabf">W14-51</url>
-      <editor><first>Dipti Misra</first><last>Sharma</last></editor>
+      <editor id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></editor>
       <editor><first>Rajeev</first><last>Sangal</last></editor>
-      <editor><first>Jyoti D.</first><last>Pawar</last></editor>
+      <editor id="jyoti-pawar"><first>Jyoti D.</first><last>Pawar</last></editor>
       <publisher>NLP Association of India</publisher>
       <address>Goa, India</address>
       <month>December</month>
@@ -9096,7 +9096,7 @@
     </frontmatter>
     <paper id="1">
       <title>Keynote Lecture 1: Complexity of Dependency Representations for Natural Languages</title>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
       <pages>1</pages>
       <url hash="b0a5cb12">W14-5101</url>
       <bibkey>joshi-2014-keynote</bibkey>
@@ -9105,7 +9105,7 @@
       <title><fixed-case>SMT</fixed-case> from Agglutinative Languages: Use of Suffix Separation and Word Splitting</title>
       <author><first>Prakash B.</first><last>Pimpale</last></author>
       <author><first>Raj Nath</first><last>Patel</last></author>
-      <author><first>Sasikumar</first><last>M.</last></author>
+      <author id="sasikumar-m"><first>Sasikumar</first><last>M.</last></author>
       <pages>2–10</pages>
       <url hash="ffb0fb5c">W14-5102</url>
       <bibkey>pimpale-etal-2014-smt</bibkey>
@@ -9114,7 +9114,7 @@
       <title>Tackling Close Cousins: Experiences In Developing Statistical Machine Translation Systems For <fixed-case>M</fixed-case>arathi And <fixed-case>H</fixed-case>indi</title>
       <author><first>Raj</first><last>Dabre</last></author>
       <author><first>Jyotesh</first><last>Choudhari</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>11–19</pages>
       <url hash="cc67c16c">W14-5103</url>
       <bibkey>dabre-etal-2014-tackling</bibkey>
@@ -9122,16 +9122,16 @@
     <paper id="4">
       <title>Correlating decoding events with errors in Statistical Machine Translation</title>
       <author><first>Eleftherios</first><last>Avramidis</last></author>
-      <author><first>Maja</first><last>Popović</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
       <pages>20–29</pages>
       <url hash="70fe05f1">W14-5104</url>
       <bibkey>avramidis-popovic-2014-correlating</bibkey>
     </paper>
     <paper id="5">
       <title>Supertag Based Pre-ordering in Machine Translation</title>
-      <author><first>Rajen</first><last>Chatterjee</last></author>
+      <author id="rajen-chatterjee"><first>Rajen</first><last>Chatterjee</last></author>
       <author><first>Anoop</first><last>Kunchukuttan</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>30–38</pages>
       <url hash="d417ce1b">W14-5105</url>
       <bibkey>chatterjee-etal-2014-supertag</bibkey>
@@ -9140,7 +9140,7 @@
       <title>Duration Modeling by Multi-Models based on Vowel Production characteristics</title>
       <author><first>V Ramu</first><last>Reddy</last></author>
       <author><first>Parakrant</first><last>Sarkar</last></author>
-      <author><first>K Sreenivasa</first><last>Rao</last></author>
+      <author id="k-sreenivasa-rao"><first>K Sreenivasa</first><last>Rao</last></author>
       <pages>39–47</pages>
       <url hash="324b83dc">W14-5106</url>
       <bibkey>reddy-etal-2014-duration</bibkey>
@@ -9174,7 +9174,7 @@
     </paper>
     <paper id="10">
       <title>Keynote Lecture 2: Text Analysis for identifying Entities and their mentions in <fixed-case>I</fixed-case>ndian languages</title>
-      <author><first>Sobha</first><last>L</last></author>
+      <author id="sobha-l"><first>Sobha</first><last>L</last></author>
       <pages>68</pages>
       <url hash="67cec481">W14-5110</url>
       <bibkey>l-2014-keynote</bibkey>
@@ -9183,7 +9183,7 @@
       <title><fixed-case>H</fixed-case>in<fixed-case>MA</fixed-case>: Distributed Morphology based <fixed-case>H</fixed-case>indi Morphological Analyzer</title>
       <author><first>Ankit</first><last>Bahuguna</last></author>
       <author><first>Lavita</first><last>Talukdar</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <author><first>Smriti</first><last>Singh</last></author>
       <pages>69–75</pages>
       <url hash="b6b0abf1">W14-5111</url>
@@ -9191,7 +9191,7 @@
     </paper>
     <paper id="12">
       <title>Roles of Nominals in Construing Meaning at the Level of Discourse</title>
-      <author><first>Soumya Sankar</first><last>Ghosh</last></author>
+      <author id="soumya-sankar-ghosh"><first>Soumya Sankar</first><last>Ghosh</last></author>
       <author><first>Samir</first><last>Karmakar</last></author>
       <pages>76–81</pages>
       <url hash="dfb92daa">W14-5112</url>
@@ -9201,7 +9201,7 @@
       <title>Anou Tradir: Experiences In Building Statistical Machine Translation Systems For Mauritian Languages – Creole, <fixed-case>E</fixed-case>nglish, <fixed-case>F</fixed-case>rench</title>
       <author><first>Raj</first><last>Dabre</last></author>
       <author><first>Aneerav</first><last>Sukhoo</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>82–88</pages>
       <url hash="f5a3d4c5">W14-5113</url>
       <bibkey>dabre-etal-2014-anou</bibkey>
@@ -9211,9 +9211,9 @@
       <author><first>Santanu</first><last>Pal</last></author>
       <author><first>Braja Gopal</first><last>Patra</last></author>
       <author><first>Dipankar</first><last>Das</last></author>
-      <author><first>Sudip Kumar</first><last>Naskar</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>89–94</pages>
       <url hash="eb63db46">W14-5114</url>
       <bibkey>pal-etal-2014-sentiment</bibkey>
@@ -9224,8 +9224,8 @@
       <author><first>Jai</first><last>Paranjape</last></author>
       <author><first>Nilesh</first><last>Joshi</last></author>
       <author><first>Irawati</first><last>Kulkarni</last></author>
-      <author><first>Malhar</first><last>Kulkarni</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="malhar-kulkarni"><first>Malhar</first><last>Kulkarni</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>95–100</pages>
       <url hash="c9f49faf">W14-5115</url>
       <bibkey>redkar-etal-2014-introduction</bibkey>
@@ -9234,16 +9234,16 @@
       <title><fixed-case>LMS</fixed-case>im : Computing Domain-specific Semantic Word Similarities Using a Language Modeling Approach</title>
       <author><first>Sachin</first><last>Pawar</last></author>
       <author><first>Swapnil</first><last>Hingmire</last></author>
-      <author><first>Girish K.</first><last>Palshikar</last></author>
+      <author id="girish-palshikar"><first>Girish K.</first><last>Palshikar</last></author>
       <pages>101–106</pages>
       <url hash="8ea7449b">W14-5116</url>
       <bibkey>pawar-etal-2014-lmsim</bibkey>
     </paper>
     <paper id="17">
       <title>Multiobjective Optimization and Unsupervised Lexical Acquisition for Named Entity Recognition and Classification</title>
-      <author><first/><last>Govind</last></author>
+      <author id="govind-kothari"><first/><last>Govind</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>107–112</pages>
       <url hash="d221f3bd">W14-5117</url>
       <bibkey>govind-etal-2014-multiobjective</bibkey>
@@ -9253,7 +9253,7 @@
       <author><first>Aswathy P</first><last>V</last></author>
       <author><first>Arun</first><last>Gopi</last></author>
       <author><first>Sajini</first><last>T</last></author>
-      <author><first>Bhadran V</first><last>K</last></author>
+      <author id="bhadran-v-k"><first>Bhadran V</first><last>K</last></author>
       <pages>113–118</pages>
       <url hash="f9d3a40f">W14-5118</url>
       <bibkey>v-etal-2014-improving</bibkey>
@@ -9305,15 +9305,15 @@
     <paper id="24">
       <title>A Sentiment Analyzer for <fixed-case>H</fixed-case>indi Using <fixed-case>H</fixed-case>indi Senti Lexicon</title>
       <author><first>Raksha</first><last>Sharma</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>150–155</pages>
       <url hash="2b1a1e35">W14-5124</url>
       <bibkey>sharma-bhattacharyya-2014-sentiment</bibkey>
     </paper>
     <paper id="25">
       <title>A Sandhi Splitter for <fixed-case>M</fixed-case>alayalam</title>
-      <author><first>Devadath V</first><last>V</last></author>
-      <author><first>Litton J</first><last>Kurisinkel</last></author>
+      <author id="devadath-v-v"><first>Devadath V</first><last>V</last></author>
+      <author id="litton-j-kurisinkel"><first>Litton J</first><last>Kurisinkel</last></author>
       <author><first>Dipti Misra</first><last>Sharma</last></author>
       <author><first>Vasudeva</first><last>Varma</last></author>
       <pages>156–161</pages>
@@ -9323,9 +9323,9 @@
     <paper id="26">
       <title><fixed-case>P</fixed-case>a<fixed-case>CM</fixed-case>an : Parallel Corpus Management Workbench</title>
       <author><first>Diptesh</first><last>Kanojia</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <author><first>Raj</first><last>Dabre</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>162–166</pages>
       <url hash="475cb8bb">W14-5126</url>
       <bibkey>kanojia-etal-2014-pacman</bibkey>
@@ -9342,15 +9342,15 @@
       <title>A Domain-Restricted, Rule Based, <fixed-case>E</fixed-case>nglish-<fixed-case>H</fixed-case>indi Machine Translation System Based on Dependency Parsing</title>
       <author><first>Pratik</first><last>Desai</last></author>
       <author><first>Amit</first><last>Sangodkar</last></author>
-      <author><first>Om P.</first><last>Damani</last></author>
+      <author id="om-p-damani"><first>Om P.</first><last>Damani</last></author>
       <pages>177–185</pages>
       <url hash="39d51762">W14-5128</url>
       <bibkey>desai-etal-2014-domain</bibkey>
     </paper>
     <paper id="29">
       <title>Translation of <fixed-case>TO</fixed-case> infinitives in Anusaaraka Platform: an <fixed-case>E</fixed-case>nglish <fixed-case>H</fixed-case>indi <fixed-case>MT</fixed-case> system</title>
-      <author><first>Akshar</first><last>Bharati</last></author>
-      <author><first/><last>Sukhada</last></author>
+      <author id="akshar-bharati"><first>Akshar</first><last>Bharati</last></author>
+      <author id="sukhada"><first/><last>Sukhada</last></author>
       <author><first>Soma</first><last>Paul</last></author>
       <pages>186–195</pages>
       <url hash="bbd67954">W14-5129</url>
@@ -9368,7 +9368,7 @@
       <title>Naturalistic Audio-Visual Emotion Database</title>
       <author><first>Sudarsana Reddy</first><last>Kadiri</last></author>
       <author><first>P.</first><last>Gangamohan</last></author>
-      <author><first>V.K.</first><last>Mittal</last></author>
+      <author id="v-k-mittal"><first>V.K.</first><last>Mittal</last></author>
       <author><first>B.</first><last>Yegnanarayana</last></author>
       <pages>206–213</pages>
       <url hash="2647e705">W14-5131</url>
@@ -9385,7 +9385,7 @@
     </paper>
     <paper id="33">
       <title>Keynote Lecture 3: Modeling <fixed-case>N</fixed-case>on-<fixed-case>P</fixed-case>ropositional Semantics</title>
-      <author><first>Lori</first><last>Levin</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
       <pages>222</pages>
       <url hash="fcc5421a">W14-5133</url>
       <bibkey>levin-2014-keynote</bibkey>
@@ -9401,7 +9401,7 @@
     </paper>
     <paper id="35">
       <title><fixed-case>S</fixed-case>angam: A <fixed-case>P</fixed-case>erso-<fixed-case>A</fixed-case>rabic to <fixed-case>I</fixed-case>ndic Script Machine Transliteration Model</title>
-      <author><first>Gurpreet Singh</first><last>Lehal</last></author>
+      <author id="gurpreet-singh-lehal"><first>Gurpreet Singh</first><last>Lehal</last></author>
       <author><first>Tejinder Singh</first><last>Saini</last></author>
       <pages>232–239</pages>
       <url hash="c5452d4b">W14-5135</url>
@@ -9412,7 +9412,7 @@
       <author><first>Shilpa</first><last>Desai</last></author>
       <author><first>Neenad</first><last>Desai</last></author>
       <author><first>Jyoti</first><last>Pawar</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>240–248</pages>
       <url hash="9cb20d5b">W14-5136</url>
       <bibkey>desai-etal-2014-autoparse</bibkey>
@@ -9436,7 +9436,7 @@
     </paper>
     <paper id="39">
       <title>Accurate Identification of the Karta (Subject) Relation in <fixed-case>B</fixed-case>angla</title>
-      <author><first>Arnab</first><last>Dhar</last></author>
+      <author id="arnab-dhar"><first>Arnab</first><last>Dhar</last></author>
       <author><first>Sudeshna</first><last>Sarkar</last></author>
       <pages>267–276</pages>
       <url hash="dce33b99">W14-5139</url>
@@ -9447,7 +9447,7 @@
       <author><first>Kishorjit</first><last>Nongmeikapam</last></author>
       <author><first>Thiyam Ibungomacha</first><last>Singh</last></author>
       <author><first>Ngariyanbam Mayekleima</first><last>Chanu</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>277–286</pages>
       <url hash="1208bd37">W14-5140</url>
       <bibkey>nongmeikapam-etal-2014-manipuri</bibkey>
@@ -9514,7 +9514,7 @@
       <author><first>Sudha</first><last>Bhingardive</last></author>
       <author><first>Ratish</first><last>Puduppully</last></author>
       <author><first>Dhirendra</first><last>Singh</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>344–352</pages>
       <url hash="52e23a80">W14-5148</url>
       <bibkey>bhingardive-etal-2014-merging</bibkey>
@@ -9522,7 +9522,7 @@
     <paper id="49">
       <title>Hierarchical Recursive Tagset for Annotating Cooking Recipes</title>
       <author><first>Sharath Reddy</first><last>Gunamgari</last></author>
-      <author><first>Sandipan</first><last>Dandapat</last></author>
+      <author id="sandipan-dandapat"><first>Sandipan</first><last>Dandapat</last></author>
       <author><first>Monojit</first><last>Choudhury</last></author>
       <pages>353–361</pages>
       <url hash="96ef2b3d">W14-5149</url>
@@ -9548,14 +9548,14 @@
     <paper id="52">
       <title>Identifying Languages at the Word Level in Code-Mixed <fixed-case>I</fixed-case>ndian Social Media Text</title>
       <author><first>Amitava</first><last>Das</last></author>
-      <author><first>Björn</first><last>Gambäck</last></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gambäck</last></author>
       <pages>378–387</pages>
       <url hash="5df1cfa3">W14-5152</url>
       <bibkey>das-gamback-2014-identifying</bibkey>
     </paper>
     <paper id="53">
       <title>Unsupervised Detection and Promotion of Authoritative Domains for Medical Queries in Web Search</title>
-      <author><first>Manoj K.</first><last>Chinnakotla</last></author>
+      <author id="manoj-chinnakotla"><first>Manoj K.</first><last>Chinnakotla</last></author>
       <author><first>Rupesh K.</first><last>Mehta</last></author>
       <author><first>Vipul</first><last>Agrawal</last></author>
       <pages>388–394</pages>
@@ -9577,7 +9577,7 @@
     <meta>
       <booktitle>Proceedings of the Workshop on Open Infrastructures and Analysis Frameworks for <fixed-case>HLT</fixed-case></booktitle>
       <url hash="d7f95adf">W14-52</url>
-      <editor><first>Nancy</first><last>Ide</last></editor>
+      <editor id="nancy-ide"><first>Nancy</first><last>Ide</last></editor>
       <editor><first>Jens</first><last>Grivolla</last></editor>
       <doi>10.3115/v1/W14-52</doi>
       <publisher>Association for Computational Linguistics and Dublin City University</publisher>
@@ -9601,10 +9601,10 @@
     </paper>
     <paper id="2">
       <title>Integrating <fixed-case>UIMA</fixed-case> with Alveo, a human communication science virtual laboratory</title>
-      <author><first>Dominique</first><last>Estival</last></author>
+      <author id="dominique-estival"><first>Dominique</first><last>Estival</last></author>
       <author><first>Steve</first><last>Cassidy</last></author>
-      <author><first>Karin</first><last>Verspoor</last></author>
-      <author><first>Andrew</first><last>MacKinlay</last></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last></author>
+      <author id="andrew-mackinlay"><first>Andrew</first><last>MacKinlay</last></author>
       <author><first>Denis</first><last>Burnham</last></author>
       <pages>12–22</pages>
       <url hash="ceb850a4">W14-5202</url>
@@ -9624,7 +9624,7 @@
     <paper id="4">
       <title>The Language Application Grid Web Service Exchange Vocabulary</title>
       <author><first>Nancy</first><last>Ide</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <author><first>Keith</first><last>Suderman</last></author>
       <author><first>Marc</first><last>Verhagen</last></author>
       <pages>34–43</pages>
@@ -9637,7 +9637,7 @@
       <author><first>Tadayoshi</first><last>Hara</last></author>
       <author><first>Goran</first><last>Topić</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>44–52</pages>
       <url hash="b98ada01">W14-5205</url>
       <doi>10.3115/v1/W14-5205</doi>
@@ -9646,7 +9646,7 @@
     <paper id="6">
       <title>A Conceptual Framework of Online Natural Language Processing Pipeline Application</title>
       <author><first>Chunqi</first><last>Shi</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <author><first>Marc</first><last>Verhagen</last></author>
       <pages>53–59</pages>
       <url hash="283728ab">W14-5206</url>
@@ -9657,7 +9657,7 @@
       <title>Command-line utilities for managing and exploring annotated corpora</title>
       <author><first>Joel</first><last>Nothman</last></author>
       <author><first>Tim</first><last>Dawborn</last></author>
-      <author><first>James R.</first><last>Curran</last></author>
+      <author id="james-r-curran"><first>James R.</first><last>Curran</last></author>
       <pages>60–65</pages>
       <url hash="3bdaaad9">W14-5207</url>
       <doi>10.3115/v1/W14-5207</doi>
@@ -9665,10 +9665,10 @@
     </paper>
     <paper id="8">
       <title><fixed-case>SSF</fixed-case>: A Common Representation Scheme for Language Analysis for Language Technology Infrastructure Development</title>
-      <author><first>Akshar</first><last>Bharati</last></author>
+      <author id="akshar-bharati"><first>Akshar</first><last>Bharati</last></author>
       <author><first>Rajeev</first><last>Sangal</last></author>
-      <author><first>Dipti</first><last>Misra Sharma</last></author>
-      <author><first>Anil</first><last>Kumar Singh</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti</first><last>Misra Sharma</last></author>
+      <author id="anil-kumar-singh"><first>Anil</first><last>Kumar Singh</last></author>
       <pages>66–76</pages>
       <url hash="0e56a71a">W14-5208</url>
       <doi>10.3115/v1/W14-5208</doi>
@@ -9676,7 +9676,7 @@
     </paper>
     <paper id="9">
       <title>Quo Vadis <fixed-case>UIMA</fixed-case>?</title>
-      <author><first>Thilo</first><last>Götz</last></author>
+      <author id="thilo-gotz"><first>Thilo</first><last>Götz</last></author>
       <author><first>Jörn</first><last>Kottmann</last></author>
       <author><first>Alexander</first><last>Lang</last></author>
       <pages>77–82</pages>
@@ -9698,8 +9698,8 @@
     </paper>
     <paper id="11">
       <title>Intellectual Property Rights Management with Web Service Grids</title>
-      <author><first>Christopher</first><last>Cieri</last></author>
-      <author><first>Denise</first><last>DiPersio</last></author>
+      <author id="christopher-cieri"><first>Christopher</first><last>Cieri</last></author>
+      <author id="denise-dipersio"><first>Denise</first><last>DiPersio</last></author>
       <pages>93–100</pages>
       <url hash="f4d46753">W14-5211</url>
       <doi>10.3115/v1/W14-5211</doi>
@@ -9729,7 +9729,7 @@
       <editor><first>Marcos</first><last>Zampieri</last></editor>
       <editor><first>Liling</first><last>Tan</last></editor>
       <editor><first>Nikola</first><last>Ljubešić</last></editor>
-      <editor><first>Jörg</first><last>Tiedemann</last></editor>
+      <editor id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></editor>
       <doi>10.3115/v1/W14-53</doi>
       <publisher>Association for Computational Linguistics and Dublin City University</publisher>
       <address>Dublin, Ireland</address>
@@ -9780,7 +9780,7 @@
     </paper>
     <paper id="5">
       <title>Morphological Disambiguation and Text Normalization for <fixed-case>S</fixed-case>outhern <fixed-case>Q</fixed-case>uechua Varieties</title>
-      <author><first>Annette</first><last>Rios Gonzales</last></author>
+      <author id="annette-rios-gonzales"><first>Annette</first><last>Rios Gonzales</last></author>
       <author><first>Richard Alexander</first><last>Castro Mamani</last></author>
       <pages>39–47</pages>
       <url hash="a528298c">W14-5305</url>
@@ -9821,7 +9821,7 @@
       <title>Part-of-Speech Tag Disambiguation by Cross-Linguistic Majority Vote</title>
       <author><first>Noëmi</first><last>Aepli</last></author>
       <author><first>Ruprecht</first><last>von Waldenfels</last></author>
-      <author><first>Tanja</first><last>Samardžić</last></author>
+      <author id="tanja-samardzic"><first>Tanja</first><last>Samardžić</last></author>
       <pages>76–84</pages>
       <url hash="738688dc">W14-5309</url>
       <doi>10.3115/v1/W14-5309</doi>
@@ -9839,7 +9839,7 @@
     <paper id="11">
       <title>Automatically building a <fixed-case>T</fixed-case>unisian Lexicon for Deverbal Nouns</title>
       <author><first>Ahmed</first><last>Hamdi</last></author>
-      <author><first>Núria</first><last>Gala</last></author>
+      <author id="nuria-gala"><first>Núria</first><last>Gala</last></author>
       <author><first>Alexis</first><last>Nasr</last></author>
       <pages>95–102</pages>
       <url hash="25b01df8">W14-5311</url>
@@ -9858,9 +9858,9 @@
     </paper>
     <paper id="13">
       <title>Improved Sentence-Level <fixed-case>A</fixed-case>rabic Dialect Classification</title>
-      <author><first>Christoph</first><last>Tillmann</last></author>
+      <author id="christoph-tillmann"><first>Christoph</first><last>Tillmann</last></author>
       <author><first>Saab</first><last>Mansour</last></author>
-      <author><first>Yaser</first><last>Al-Onaizan</last></author>
+      <author id="yaser-al-onaizan"><first>Yaser</first><last>Al-Onaizan</last></author>
       <pages>110–119</pages>
       <url hash="68a7e66f">W14-5313</url>
       <doi>10.3115/v1/W14-5313</doi>
@@ -9868,7 +9868,7 @@
     </paper>
     <paper id="14">
       <title>Using Maximum Entropy Models to Discriminate between Similar Languages and Varieties</title>
-      <author><first>Jordi</first><last>Porta</last></author>
+      <author id="jordi-porta-zamorano"><first>Jordi</first><last>Porta</last></author>
       <author><first>José-Luis</first><last>Sancho</last></author>
       <pages>120–128</pages>
       <url hash="d1112755">W14-5314</url>
@@ -9880,9 +9880,9 @@
       <author><first>Marco</first><last>Lui</last></author>
       <author><first>Ned</first><last>Letcher</last></author>
       <author><first>Oliver</first><last>Adams</last></author>
-      <author><first>Long</first><last>Duong</last></author>
+      <author id="long-duong"><first>Long</first><last>Duong</last></author>
       <author><first>Paul</first><last>Cook</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>129–138</pages>
       <url hash="1b4336f5">W14-5315</url>
       <doi>10.3115/v1/W14-5315</doi>
@@ -9890,7 +9890,7 @@
     </paper>
     <paper id="16">
       <title>The <fixed-case>NRC</fixed-case> System for Discriminating Similar Languages</title>
-      <author><first>Cyril</first><last>Goutte</last></author>
+      <author id="cyril-goutte"><first>Cyril</first><last>Goutte</last></author>
       <author><first>Serge</first><last>Léger</last></author>
       <author><first>Marine</first><last>Carpuat</last></author>
       <pages>139–145</pages>
@@ -9901,8 +9901,8 @@
     <paper id="17">
       <title>Experiments in Sentence Language Identification with Groups of Similar Languages</title>
       <author><first>Ben</first><last>King</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
-      <author><first>Steven</first><last>Abney</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
+      <author id="steven-abney"><first>Steven</first><last>Abney</last></author>
       <pages>146–154</pages>
       <url hash="14fa2c48">W14-5317</url>
       <doi>10.3115/v1/W14-5317</doi>
@@ -9921,11 +9921,11 @@
     <meta>
       <booktitle>Proceedings of the Third Workshop on Vision and Language</booktitle>
       <url hash="67677153">W14-54</url>
-      <editor><first>Anja</first><last>Belz</last></editor>
+      <editor id="anja-belz"><first>Anja</first><last>Belz</last></editor>
       <editor><first>Darren</first><last>Cosker</last></editor>
       <editor><first>Frank</first><last>Keller</last></editor>
       <editor><first>William</first><last>Smith</last></editor>
-      <editor><first>Kalina</first><last>Bontcheva</last></editor>
+      <editor id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></editor>
       <editor><first>Sien</first><last>Moens</last></editor>
       <editor><first>Alan</first><last>Smeaton</last></editor>
       <doi>10.3115/v1/W14-54</doi>
@@ -9942,7 +9942,7 @@
     <paper id="1">
       <title>The Effect of Sensor Errors in Situated Human-Computer Dialogue</title>
       <author><first>Niels</first><last>Schütte</last></author>
-      <author><first>John</first><last>Kelleher</last></author>
+      <author id="john-kelleher"><first>John</first><last>Kelleher</last></author>
       <author><first>Brian</first><last>Mac Namee</last></author>
       <pages>1–8</pages>
       <url hash="bdf8b2ee">W14-5401</url>
@@ -9953,7 +9953,7 @@
       <title>Joint Navigation in Commander/Robot Teams: Dialog &amp; Task Performance When Vision is Bandwidth-Limited</title>
       <author><first>Douglas</first><last>Summers-Stay</last></author>
       <author><first>Taylor</first><last>Cassidy</last></author>
-      <author><first>Clare</first><last>Voss</last></author>
+      <author id="clare-voss"><first>Clare</first><last>Voss</last></author>
       <pages>9–16</pages>
       <url hash="2743313d">W14-5402</url>
       <doi>10.3115/v1/W14-5402</doi>
@@ -9963,7 +9963,7 @@
       <title><fixed-case>TUHOI</fixed-case>: <fixed-case>T</fixed-case>rento Universal Human Object Interaction Dataset</title>
       <author><first>Dieu-Thu</first><last>Le</last></author>
       <author><first>Jasper</first><last>Uijlings</last></author>
-      <author><first>Raffaella</first><last>Bernardi</last></author>
+      <author id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last></author>
       <pages>17–24</pages>
       <url hash="1c690aa4">W14-5403</url>
       <doi>10.3115/v1/W14-5403</doi>
@@ -9983,7 +9983,7 @@
     <paper id="5">
       <title>Exploration of functional semantics of prepositions from corpora of descriptions of visual scenes</title>
       <author><first>Simon</first><last>Dobnik</last></author>
-      <author><first>John</first><last>Kelleher</last></author>
+      <author id="john-kelleher"><first>John</first><last>Kelleher</last></author>
       <pages>33–37</pages>
       <url hash="cd4eb369">W14-5405</url>
       <doi>10.3115/v1/W14-5405</doi>
@@ -9994,7 +9994,7 @@
       <author><first>Josiah</first><last>Wang</last></author>
       <author><first>Fei</first><last>Yan</last></author>
       <author><first>Ahmet</first><last>Aker</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <pages>38–45</pages>
       <url hash="3f825cb1">W14-5406</url>
       <doi>10.3115/v1/W14-5406</doi>
@@ -10003,8 +10003,8 @@
     <paper id="7">
       <title>Key Event Detection in Video using <fixed-case>ASR</fixed-case> and Visual Data</title>
       <author><first>Niraj</first><last>Shrestha</last></author>
-      <author><first>Aparna N.</first><last>Venkitasubramanian</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="aparna-nurani-venkitasubramanian"><first>Aparna N.</first><last>Venkitasubramanian</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>46–53</pages>
       <url hash="adcbee8a">W14-5407</url>
       <doi>10.3115/v1/W14-5407</doi>
@@ -10024,7 +10024,7 @@
     </paper>
     <paper id="9">
       <title>Semantic and geometric enrichment of 3<fixed-case>D</fixed-case> geo-spatial models with captioned photos and labelled illustrations</title>
-      <author><first>Chris</first><last>Jones</last></author>
+      <author id="christopher-jones"><first>Chris</first><last>Jones</last></author>
       <author><first>Paul</first><last>Rosin</last></author>
       <author><first>Jonathan</first><last>Slade</last></author>
       <pages>62–67</pages>
@@ -10036,7 +10036,7 @@
       <title>Weakly supervised construction of a repository of iconic images</title>
       <author><first>Lydia</first><last>Weiland</last></author>
       <author><first>Wolfgang</first><last>Effelsberg</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <pages>68–73</pages>
       <url hash="70e0d017">W14-5410</url>
       <doi>10.3115/v1/W14-5410</doi>
@@ -10079,7 +10079,7 @@
       <author><first>Fabrice</first><last>Maurel</last></author>
       <author><first>Jean-Marc</first><last>Routoure</last></author>
       <author><first>Pierre</first><last>Beust</last></author>
-      <author><first>Gaël</first><last>Dias</last></author>
+      <author id="gael-dias"><first>Gaël</first><last>Dias</last></author>
       <pages>95–102</pages>
       <url hash="94c93127">W14-5414</url>
       <doi>10.3115/v1/W14-5414</doi>
@@ -10117,7 +10117,7 @@
       <title>Coloring Objects: Adjective-Noun Visual Semantic Compositionality</title>
       <author><first>Dat</first><last>Tien Nguyen</last></author>
       <author><first>Angeliki</first><last>Lazaridou</last></author>
-      <author><first>Raffaella</first><last>Bernardi</last></author>
+      <author id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last></author>
       <pages>112–114</pages>
       <url hash="c33a2ae8">W14-5418</url>
       <doi>10.3115/v1/W14-5418</doi>
@@ -10144,7 +10144,7 @@
     </paper>
     <paper id="21">
       <title>Keyphrase Extraction using Textual and Visual Features</title>
-      <author><first>Yaakov</first><last>HaCohen-Kerner</last></author>
+      <author id="yaakov-hacohen-kerner"><first>Yaakov</first><last>HaCohen-Kerner</last></author>
       <author><first>Stefanos</first><last>Vrochidis</last></author>
       <author><first>Dimitris</first><last>Liparas</last></author>
       <author><first>Anastasia</first><last>Moumtzidou</last></author>
@@ -10156,8 +10156,8 @@
     </paper>
     <paper id="22">
       <title>Towards automatic annotation of communicative gesturing</title>
-      <author><first>Kristiina</first><last>Jokinen</last></author>
-      <author><first>Graham</first><last>Wilcock</last></author>
+      <author id="kristiina-jokinen"><first>Kristiina</first><last>Jokinen</last></author>
+      <author id="graham-wilcock"><first>Graham</first><last>Wilcock</last></author>
       <pages>124–125</pages>
       <url hash="a156b247">W14-5422</url>
       <doi>10.3115/v1/W14-5422</doi>
@@ -10168,8 +10168,8 @@
     <meta>
       <booktitle>Proceedings of the Fifth Workshop on South and Southeast <fixed-case>A</fixed-case>sian Natural Language Processing</booktitle>
       <url hash="ec8aed92">W14-55</url>
-      <editor><first>Christian</first><last>Boitet</last></editor>
-      <editor><first>M.G. Abbas</first><last>Malik</last></editor>
+      <editor id="christian-boitet"><first>Christian</first><last>Boitet</last></editor>
+      <editor id="m-g-abbas-malik"><first>M.G. Abbas</first><last>Malik</last></editor>
       <doi>10.3115/v1/W14-55</doi>
       <publisher>Association for Computational Linguistics and Dublin City University</publisher>
       <address>Dublin, Ireland</address>
@@ -10202,7 +10202,7 @@
       <title>Integrating Dictionaries into an Unsupervised Model for <fixed-case>M</fixed-case>yanmar Word Segmentation</title>
       <author><first>Ye</first><last>Kyaw Thu</last></author>
       <author><first>Andrew</first><last>Finch</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <author><first>Yoshinori</first><last>Sagisaka</last></author>
       <pages>20–27</pages>
       <url hash="69c4af55">W14-5503</url>
@@ -10212,8 +10212,8 @@
     <paper id="4">
       <title>A Framework for Learning Morphology using Suffix Association Matrix</title>
       <author><first>Shilpa</first><last>Desai</last></author>
-      <author><first>Jyoti</first><last>Pawar</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="jyoti-pawar"><first>Jyoti</first><last>Pawar</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>28–36</pages>
       <url hash="b1e14a25">W14-5504</url>
       <doi>10.3115/v1/W14-5504</doi>
@@ -10223,7 +10223,7 @@
       <title><fixed-case>E</fixed-case>nglish to <fixed-case>U</fixed-case>rdu Statistical Machine Translation: Establishing a Baseline</title>
       <author><first>Bushra</first><last>Jawaid</last></author>
       <author><first>Amir</first><last>Kamran</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>37–42</pages>
       <url hash="4c57b5ee">W14-5505</url>
       <doi>10.3115/v1/W14-5505</doi>
@@ -10251,7 +10251,7 @@
     <paper id="8">
       <title>Developing an interlingual translation lexicon using <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>ets and Grammatical Framework</title>
       <author><first>Shafqat Mumtaz</first><last>Virk</last></author>
-      <author><first>K.V.S.</first><last>Prasad</last></author>
+      <author id="k-v-s-prasad"><first>K.V.S.</first><last>Prasad</last></author>
       <author><first>Aarne</first><last>Ranta</last></author>
       <author><first>Krasimir</first><last>Angelov</last></author>
       <pages>55–64</pages>
@@ -10281,7 +10281,7 @@
     <paper id="12">
       <title>Building <fixed-case>E</fixed-case>nglish-<fixed-case>V</fixed-case>ietnamese Named Entity Corpus with Aligned Bilingual News Articles</title>
       <author><first>Quoc Hung</first><last>Ngo</last></author>
-      <author><first>Dinh</first><last>Dien</last></author>
+      <author id="dinh-dien"><first>Dinh</first><last>Dien</last></author>
       <author><first>Werner</first><last>Winiwarter</last></author>
       <pages>85–93</pages>
       <url hash="0e5f5b6a">W14-5512</url>
@@ -10313,7 +10313,7 @@
     <meta>
       <booktitle>Proceedings of the Workshop on Automatic Text Simplification - Methods and Applications in the Multilingual Society (<fixed-case>ATS</fixed-case>-<fixed-case>MA</fixed-case> 2014)</booktitle>
       <url hash="f94b8c7d">W14-56</url>
-      <editor><first>Constantin</first><last>Orasan</last></editor>
+      <editor id="constantin-orasan"><first>Constantin</first><last>Orasan</last></editor>
       <editor><first>Petya</first><last>Osenova</last></editor>
       <editor><first>Cristina</first><last>Vertan</last></editor>
       <doi>10.3115/v1/W14-56</doi>
@@ -10330,7 +10330,7 @@
     <paper id="1">
       <title>Relative clause extraction for syntactic simplification</title>
       <author><first>Iustin</first><last>Dornescu</last></author>
-      <author><first>Richard</first><last>Evans</last></author>
+      <author id="richard-evans"><first>Richard</first><last>Evans</last></author>
       <author><first>Constantin</first><last>Orăsan</last></author>
       <pages>1–10</pages>
       <url hash="5eb650d6">W14-5601</url>
@@ -10341,7 +10341,7 @@
       <title>Making Biographical Data in <fixed-case>W</fixed-case>ikipedia Readable: A Pattern-based Multilingual Approach</title>
       <author><first>Itziar</first><last>Gonzalez-Dios</last></author>
       <author><first>María Jesús</first><last>Aranzabe</last></author>
-      <author><first>Arantza</first><last>Díaz de Ilarraza</last></author>
+      <author id="arantza-diaz-de-ilarraza"><first>Arantza</first><last>Díaz de Ilarraza</last></author>
       <pages>11–20</pages>
       <url hash="b1cd42cd">W14-5602</url>
       <doi>10.3115/v1/W14-5602</doi>
@@ -10352,7 +10352,7 @@
       <author><first>Kshitij</first><last>Mishra</last></author>
       <author><first>Ankush</first><last>Soni</last></author>
       <author><first>Rahul</first><last>Sharma</last></author>
-      <author><first>Dipti</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti</first><last>Sharma</last></author>
       <pages>21–29</pages>
       <url hash="13f41e62">W14-5603</url>
       <doi>10.3115/v1/W14-5603</doi>
@@ -10360,8 +10360,8 @@
     </paper>
     <paper id="4">
       <title>The Fewer, the Better? A Contrastive Study about Ways to Simplify</title>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
-      <author><first>Sanja</first><last>Štajner</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="sanja-stajner"><first>Sanja</first><last>Štajner</last></author>
       <pages>30–40</pages>
       <url hash="26b2c118">W14-5604</url>
       <doi>10.3115/v1/W14-5604</doi>
@@ -10377,8 +10377,8 @@
     </paper>
     <paper id="6">
       <title>Assessing Conformance of Manually Simplified Corpora with User Requirements: the Case of Autistic Readers</title>
-      <author><first>Sanja</first><last>Štajner</last></author>
-      <author><first>Richard</first><last>Evans</last></author>
+      <author id="sanja-stajner"><first>Sanja</first><last>Štajner</last></author>
+      <author id="richard-evans"><first>Richard</first><last>Evans</last></author>
       <author><first>Iustin</first><last>Dornescu</last></author>
       <pages>53–63</pages>
       <url hash="88f8c07f">W14-5606</url>
@@ -10388,7 +10388,7 @@
     <paper id="7">
       <title>Making historical texts accessible to everybody</title>
       <author><first>Cristina</first><last>Vertan</last></author>
-      <author><first>Walther</first><last>von Hahn</last></author>
+      <author id="walther-von-hahn"><first>Walther</first><last>von Hahn</last></author>
       <pages>64–68</pages>
       <url hash="3ec91eaa">W14-5607</url>
       <doi>10.3115/v1/W14-5607</doi>
@@ -10400,9 +10400,9 @@
       <booktitle>Proceedings of the First Workshop on Computational Approaches to Compound Analysis (<fixed-case>C</fixed-case>om<fixed-case>AC</fixed-case>om<fixed-case>A</fixed-case> 2014)</booktitle>
       <url hash="c64d12f5">W14-57</url>
       <editor><first>Ben</first><last>Verhoeven</last></editor>
-      <editor><first>Walter</first><last>Daelemans</last></editor>
-      <editor><first>Menno</first><last>van Zaanen</last></editor>
-      <editor><first>Gerhard</first><last>van Huyssteen</last></editor>
+      <editor id="walter-daelemans"><first>Walter</first><last>Daelemans</last></editor>
+      <editor id="menno-van-zaanen"><first>Menno</first><last>van Zaanen</last></editor>
+      <editor id="gerhard-b-van-huyssteen"><first>Gerhard</first><last>van Huyssteen</last></editor>
       <doi>10.3115/v1/W14-57</doi>
       <publisher>Association for Computational Linguistics and Dublin City University</publisher>
       <address>Dublin, Ireland</address>
@@ -10417,7 +10417,7 @@
     <paper id="1">
       <title>Modelling Regular Subcategorization Changes in <fixed-case>G</fixed-case>erman Particle Verbs</title>
       <author><first>Stefan</first><last>Bott</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>1–10</pages>
       <url hash="81e1d69e">W14-5701</url>
       <doi>10.3115/v1/W14-5701</doi>
@@ -10425,8 +10425,8 @@
     </paper>
     <paper id="2">
       <title>Splitting of Compound Terms in non-Prototypical Compounding Languages</title>
-      <author><first>Elizaveta</first><last>Clouet</last></author>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="elizaveta-loginova-clouet"><first>Elizaveta</first><last>Clouet</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <pages>11–19</pages>
       <url hash="b95fadd0">W14-5702</url>
       <doi>10.3115/v1/W14-5702</doi>
@@ -10455,7 +10455,7 @@
     <paper id="5">
       <title>Electrophysiological correlates of noun-noun compound processing by non-native speakers of <fixed-case>E</fixed-case>nglish</title>
       <author><first>Cecile</first><last>De Cat</last></author>
-      <author><first>Harald</first><last>Baayen</last></author>
+      <author id="harald-baayen"><first>Harald</first><last>Baayen</last></author>
       <author><first>Ekaterini</first><last>Klepousniotou</last></author>
       <pages>41–52</pages>
       <url hash="dd7c3809">W14-5705</url>
@@ -10475,8 +10475,8 @@
       <title><fixed-case>W</fixed-case>ordsyoudontknow: Evaluation of lexicon-based decompounding with unknown handling</title>
       <author><first>Karolina</first><last>Owczarzak</last></author>
       <author><first>Ferdinand</first><last>de Haan</last></author>
-      <author><first>George</first><last>Krupka</last></author>
-      <author><first>Don</first><last>Hindle</last></author>
+      <author id="george-krupka"><first>George</first><last>Krupka</last></author>
+      <author id="donald-hindle"><first>Don</first><last>Hindle</last></author>
       <pages>63–71</pages>
       <url hash="525a2a57">W14-5707</url>
       <doi>10.3115/v1/W14-5707</doi>
@@ -10484,7 +10484,7 @@
     </paper>
     <paper id="8">
       <title>Multiword noun compound bracketing using <fixed-case>W</fixed-case>ikipedia</title>
-      <author><first>Caroline</first><last>Barrière</last></author>
+      <author id="caroline-barriere"><first>Caroline</first><last>Barrière</last></author>
       <author><first>Pierre André</first><last>Ménard</last></author>
       <pages>72–80</pages>
       <url hash="fb3cd40d">W14-5708</url>
@@ -10495,9 +10495,9 @@
       <title>Distinguishing Degrees of Compositionality in Compound Splitting for Statistical Machine Translation</title>
       <author><first>Marion</first><last>Weller</last></author>
       <author><first>Fabienne</first><last>Cap</last></author>
-      <author><first>Stefan</first><last>Müller</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="stefan-muller"><first>Stefan</first><last>Müller</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>81–90</pages>
       <url hash="53523498">W14-5709</url>
       <doi>10.3115/v1/W14-5709</doi>
@@ -10509,13 +10509,13 @@
       <booktitle>Proceedings of Workshop on Lexical and Grammatical Resources for Language Processing</booktitle>
       <url hash="829f7986">W14-58</url>
       <editor><first>Jorge</first><last>Baptista</last></editor>
-      <editor><first>Pushpak</first><last>Bhattacharyya</last></editor>
+      <editor id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></editor>
       <editor><first>Christiane</first><last>Fellbaum</last></editor>
-      <editor><first>Mikel</first><last>Forcada</last></editor>
+      <editor id="mikel-l-forcada"><first>Mikel</first><last>Forcada</last></editor>
       <editor><first>Chu-Ren</first><last>Huang</last></editor>
       <editor><first>Svetla</first><last>Koeva</last></editor>
       <editor><first>Cvetana</first><last>Krstev</last></editor>
-      <editor><first>Eric</first><last>Laporte</last></editor>
+      <editor id="eric-laporte"><first>Eric</first><last>Laporte</last></editor>
       <doi>10.3115/v1/W14-58</doi>
       <publisher>Association for Computational Linguistics and Dublin City University</publisher>
       <address>Dublin, Ireland</address>
@@ -10538,7 +10538,7 @@
     <paper id="2">
       <title>Using language technology resources and tools to construct <fixed-case>S</fixed-case>wedish <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et</title>
       <author><first>Dana</first><last>Dannélls</last></author>
-      <author><first>Karin</first><last>Friberg Heppin</last></author>
+      <author id="karin-friberg-heppin"><first>Karin</first><last>Friberg Heppin</last></author>
       <author><first>Anna</first><last>Ehrlemark</last></author>
       <pages>8–17</pages>
       <url hash="4edda427">W14-5802</url>
@@ -10574,7 +10574,7 @@
     </paper>
     <paper id="6">
       <title>Linguistically motivated Language Resources for Sentiment Analysis</title>
-      <author><first>Voula</first><last>Giouli</last></author>
+      <author id="voula-giouli"><first>Voula</first><last>Giouli</last></author>
       <author><first>Aggeliki</first><last>Fotopoulou</last></author>
       <pages>39–45</pages>
       <url hash="879cb951">W14-5806</url>
@@ -10583,8 +10583,8 @@
     </paper>
     <paper id="7">
       <title>Using Morphosemantic Information in Construction of a Pilot Lexical Semantic Resource for <fixed-case>T</fixed-case>urkish</title>
-      <author><first>Gözde Gül</first><last>İşgüder</last></author>
-      <author><first>Eşref</first><last>Adalı</last></author>
+      <author id="gozde-gul-sahin"><first>Gözde Gül</first><last>İşgüder</last></author>
+      <author id="esref-adali"><first>Eşref</first><last>Adalı</last></author>
       <pages>46–54</pages>
       <url hash="8a90e926">W14-5807</url>
       <doi>10.3115/v1/W14-5807</doi>
@@ -10592,9 +10592,9 @@
     </paper>
     <paper id="8">
       <title>Comparing <fixed-case>C</fixed-case>zech and <fixed-case>E</fixed-case>nglish <fixed-case>AMR</fixed-case>s</title>
-      <author><first>Zdeňka</first><last>Urešová</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="zdenka-uresova"><first>Zdeňka</first><last>Urešová</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>55–64</pages>
       <url hash="5403c6cf">W14-5808</url>
       <doi>10.3115/v1/W14-5808</doi>
@@ -10639,8 +10639,8 @@
       <author><first>Amanda</first><last>Rassi</last></author>
       <author><first>Cristina</first><last>Santos-Turati</last></author>
       <author><first>Jorge</first><last>Baptista</last></author>
-      <author><first>Nuno</first><last>Mamede</last></author>
-      <author><first>Oto</first><last>Vale</last></author>
+      <author id="nuno-mamede"><first>Nuno</first><last>Mamede</last></author>
+      <author id="oto-vale"><first>Oto</first><last>Vale</last></author>
       <pages>92–101</pages>
       <url hash="7b01b831">W14-5812</url>
       <doi>10.3115/v1/W14-5812</doi>
@@ -10650,7 +10650,7 @@
       <title>Collaboratively Constructed Linguistic Resources for Language Variants and their Exploitation in <fixed-case>NLP</fixed-case> Application – the case of <fixed-case>T</fixed-case>unisian <fixed-case>A</fixed-case>rabic and the Social Media</title>
       <author><first>Fatiha</first><last>Sadat</last></author>
       <author><first>Fatma</first><last>Mallek</last></author>
-      <author><first>Mohamed</first><last>Boudabous</last></author>
+      <author id="mohamed-mahdi-boudabous"><first>Mohamed</first><last>Boudabous</last></author>
       <author><first>Rahma</first><last>Sellami</last></author>
       <author><first>Atefeh</first><last>Farzindar</last></author>
       <pages>102–110</pages>
@@ -10661,7 +10661,7 @@
     <paper id="14">
       <title>A Database of Paradigmatic Semantic Relation Pairs for <fixed-case>G</fixed-case>erman Nouns, Verbs, and Adjectives</title>
       <author><first>Silke</first><last>Scheible</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>111–119</pages>
       <url hash="6db8adda">W14-5814</url>
       <doi>10.3115/v1/W14-5814</doi>
@@ -10671,7 +10671,7 @@
       <title>Improving the Precision of Synset Links Between Cornetto and <fixed-case>P</fixed-case>rinceton <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et</title>
       <author><first>Leen</first><last>Sevens</last></author>
       <author><first>Vincent</first><last>Vandeghinste</last></author>
-      <author><first>Frank</first><last>Van Eynde</last></author>
+      <author id="frank-van-eynde"><first>Frank</first><last>Van Eynde</last></author>
       <pages>120–126</pages>
       <url hash="7d58bbc2">W14-5815</url>
       <doi>10.3115/v1/W14-5815</doi>
@@ -10680,8 +10680,8 @@
     <paper id="16">
       <title>Light verb constructions with ‘do’ and ‘be’ in <fixed-case>H</fixed-case>indi: A <fixed-case>TAG</fixed-case> analysis</title>
       <author><first>Ashwini</first><last>Vaidya</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>127–136</pages>
       <url hash="b97c85d2">W14-5816</url>
       <doi>10.3115/v1/W14-5816</doi>
@@ -10720,7 +10720,7 @@
     <meta>
       <booktitle>Proceedings of the Second Workshop on Natural Language Processing for Social Media (<fixed-case>S</fixed-case>ocial<fixed-case>NLP</fixed-case>)</booktitle>
       <url hash="2d960d4a">W14-59</url>
-      <editor><first>Shou-de</first><last>Lin</last></editor>
+      <editor id="shou-de-lin"><first>Shou-de</first><last>Lin</last></editor>
       <editor><first>Lun-Wei</first><last>Ku</last></editor>
       <editor><first>Erik</first><last>Cambria</last></editor>
       <editor><first>Tsung-Ting</first><last>Kuo</last></editor>
@@ -10746,7 +10746,7 @@
     <paper id="2">
       <title>Feature Selection for Highly Skewed Sentiment Analysis Tasks</title>
       <author><first>Can</first><last>Liu</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <author><first>Ning</first><last>Yu</last></author>
       <pages>2–11</pages>
       <url hash="ad5ab19d">W14-5902</url>
@@ -10760,7 +10760,7 @@
       <author><first>Daniel</first><last>Dakota</last></author>
       <author><first>Sridhar</first><last>Rajagopalan</last></author>
       <author><first>Wen</first><last>Li</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <author><first>Ning</first><last>Yu</last></author>
       <pages>12–21</pages>
       <url hash="7e44b521">W14-5903</url>
@@ -10783,7 +10783,7 @@
       <author><first>Erik</first><last>Cambria</last></author>
       <author><first>Lun-Wei</first><last>Ku</last></author>
       <author><first>Chen</first><last>Gui</last></author>
-      <author><first>Alexander</first><last>Gelbukh</last></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last></author>
       <pages>28–37</pages>
       <url hash="eaa878ec">W14-5905</url>
       <doi>10.3115/v1/W14-5905</doi>
@@ -10835,9 +10835,9 @@
     <meta>
       <booktitle>Proceedings of the <fixed-case>COLING</fixed-case> Workshop on Synchronic and Diachronic Approaches to Analyzing Technical Language</booktitle>
       <url hash="5352bb86">W14-60</url>
-      <editor><first>Adam</first><last>Meyers</last></editor>
+      <editor id="adam-meyers"><first>Adam</first><last>Meyers</last></editor>
       <editor><first>Yifan</first><last>He</last></editor>
-      <editor><first>Ralph</first><last>Grishman</last></editor>
+      <editor id="ralph-grishman"><first>Ralph</first><last>Grishman</last></editor>
       <doi>10.3115/v1/W14-60</doi>
       <publisher>Association for Computational Linguistics and Dublin City University</publisher>
       <address>Dublin, Ireland</address>
@@ -10851,7 +10851,7 @@
     </frontmatter>
     <paper id="1">
       <title>Investigating Context Parameters in Technology Term Recognition</title>
-      <author><first>Behrang Q.</first><last>Zadeh</last></author>
+      <author id="behrang-qasemizadeh"><first>Behrang Q.</first><last>Zadeh</last></author>
       <author><first>Siegfried</first><last>Handschuh</last></author>
       <pages>1–10</pages>
       <url hash="c5e77b8b">W14-6001</url>
@@ -10873,7 +10873,7 @@
     </paper>
     <paper id="3">
       <title>Ontology-based Technical Text Annotation</title>
-      <author><first>François</first><last>Lévy</last></author>
+      <author id="francois-levy"><first>François</first><last>Lévy</last></author>
       <author><first>Nadi</first><last>Tomeh</last></author>
       <author><first>Yue</first><last>Ma</last></author>
       <pages>21–30</pages>
@@ -10883,9 +10883,9 @@
     </paper>
     <paper id="4">
       <title>Extracting Aspects and Polarity from Patents</title>
-      <author><first>Peter</first><last>Anick</last></author>
+      <author id="peter-anick"><first>Peter</first><last>Anick</last></author>
       <author><first>Marc</first><last>Verhagen</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <pages>31–39</pages>
       <url hash="d952acb2">W14-6004</url>
       <doi>10.3115/v1/W14-6004</doi>
@@ -10921,8 +10921,8 @@
       <editor><first>Yuval</first><last>Marton</last></editor>
       <editor><first>Ines</first><last>Rehbein</last></editor>
       <editor><first>Yannick</first><last>Versley</last></editor>
-      <editor><first>Özlem</first><last>Çetinoğlu</last></editor>
-      <editor><first>Joel</first><last>Tetreault</last></editor>
+      <editor id="ozlem-cetinoglu"><first>Özlem</first><last>Çetinoğlu</last></editor>
+      <editor id="joel-tetreault"><first>Joel</first><last>Tetreault</last></editor>
       <publisher>Dublin City University</publisher>
       <address>Dublin, Ireland</address>
       <month>August</month>
@@ -10936,9 +10936,9 @@
     <paper id="1">
       <title>Parsing <fixed-case>G</fixed-case>erman: How Much Morphology Do We Need?</title>
       <author><first>Wolfgang</first><last>Maier</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <author><first>Daniel</first><last>Dakota</last></author>
-      <author><first>Daniel</first><last>Whyatt</last></author>
+      <author id="daniel-whyatt"><first>Daniel</first><last>Whyatt</last></author>
       <pages>1–14</pages>
       <url hash="31dc72cc">W14-6101</url>
       <bibkey>maier-etal-2014-parsing</bibkey>
@@ -10948,7 +10948,7 @@
       <author><first>Iliana</first><last>Simova</last></author>
       <author><first>Dimitar</first><last>Vasilev</last></author>
       <author><first>Alexander</first><last>Popov</last></author>
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <author><first>Petya</first><last>Osenova</last></author>
       <pages>15–25</pages>
       <url hash="49442054">W14-6102</url>
@@ -10972,7 +10972,7 @@
       <title>Exploring Options for Fast Domain Adaptation of Dependency Parsers</title>
       <author><first>Viktor</first><last>Pekar</last></author>
       <author><first>Juntao</first><last>Yu</last></author>
-      <author><first>Mohab</first><last>El-karef</last></author>
+      <author id="mohab-el-karef"><first>Mohab</first><last>El-karef</last></author>
       <author><first>Bernd</first><last>Bohnet</last></author>
       <pages>54–65</pages>
       <url hash="6c75f177">W14-6105</url>
@@ -10997,9 +10997,9 @@
     </paper>
     <paper id="8">
       <title>Initial Explorations in Two-phase <fixed-case>T</fixed-case>urkish Dependency Parsing by Incorporating Constituents</title>
-      <author><first>İlknur</first><last>Durgar El-Kahlout</last></author>
+      <author id="ilknur-durgar-el-kahlout"><first>İlknur</first><last>Durgar El-Kahlout</last></author>
       <author><first>Ahmet Afşın</first><last>Akın</last></author>
-      <author><first>Ertuǧrul</first><last>Yılmaz</last></author>
+      <author id="ertugrul-yilmaz"><first>Ertuǧrul</first><last>Yılmaz</last></author>
       <pages>82–89</pages>
       <url hash="a677d2e9">W14-6108</url>
       <bibkey>durgar-el-kahlout-etal-2014-initial</bibkey>
@@ -11007,7 +11007,7 @@
     <paper id="9">
       <title>Experiments for Dependency Parsing of <fixed-case>G</fixed-case>reek</title>
       <author><first>Prokopis</first><last>Prokopidis</last></author>
-      <author><first>Haris</first><last>Papageorgiou</last></author>
+      <author id="harris-papageorgiou"><first>Haris</first><last>Papageorgiou</last></author>
       <pages>90–96</pages>
       <url hash="4fd516cc">W14-6109</url>
       <bibkey>prokopidis-papageorgiou-2014-experiments</bibkey>
@@ -11016,9 +11016,9 @@
       <title>Introducing the <fixed-case>IMS</fixed-case>-Wrocław-<fixed-case>S</fixed-case>zeged-<fixed-case>CIS</fixed-case> entry at the <fixed-case>SPMRL</fixed-case> 2014 Shared Task: Reranking and Morpho-syntax meet Unlabeled Data</title>
       <author><first>Anders</first><last>Björkelund</last></author>
       <author><first>Özlem</first><last>Çetinoğlu</last></author>
-      <author><first>Agnieszka</first><last>Faleńska</last></author>
-      <author><first>Richárd</first><last>Farkas</last></author>
-      <author><first>Thomas</first><last>Mueller</last></author>
+      <author id="agnieszka-falenska"><first>Agnieszka</first><last>Faleńska</last></author>
+      <author id="richard-farkas"><first>Richárd</first><last>Farkas</last></author>
+      <author id="thomas-mueller"><first>Thomas</first><last>Mueller</last></author>
       <author><first>Wolfgang</first><last>Seeker</last></author>
       <author><first>Zsolt</first><last>Szántó</last></author>
       <pages>97–102</pages>
@@ -11027,8 +11027,8 @@
     </paper>
     <paper id="11">
       <title>Introducing the <fixed-case>SPMRL</fixed-case> 2014 Shared Task on Parsing Morphologically-rich Languages</title>
-      <author><first>Djamé</first><last>Seddah</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <author><first>Reut</first><last>Tsarfaty</last></author>
       <pages>103–109</pages>
       <url hash="f2b4d62e">W14-6111</url>
@@ -11092,8 +11092,8 @@
       <author><first>Tarek</first><last>Kirchhoffer</last></author>
       <author><first>Hans Werner</first><last>Müller</last></author>
       <author><first>Roman</first><last>Klinger</last></author>
-      <author><first>Matthias</first><last>Hartung</last></author>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="matthias-hartung"><first>Matthias</first><last>Hartung</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <pages>25–32</pages>
       <url hash="27fa7f5b">W14-6204</url>
       <doi>10.3115/v1/W14-6204</doi>
@@ -11137,10 +11137,10 @@
     </frontmatter>
     <paper id="1">
       <title>Automatic Analysis of Scientific and Literary Texts. Presentation and Results of the <fixed-case>DEFT</fixed-case>2014 Text Mining Challenge (Analyse automatique de textes littéraires et scientifiques : présentation et résultats du défi fouille de texte <fixed-case>DEFT</fixed-case>2014) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Thierry</first><last>Hamon</last></author>
+      <author id="thierry-hamon"><first>Thierry</first><last>Hamon</last></author>
       <author><first>Quentin</first><last>Pleplé</last></author>
-      <author><first>Patrick</first><last>Paroubek</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <author><first>Cyril</first><last>Grouin</last></author>
       <pages>1-10</pages>
       <url hash="acb163b1">W14-6301</url>
@@ -11156,7 +11156,7 @@
     </paper>
     <paper id="3">
       <title>Combining semantic spaces along with structure and constraints (Combiner espaces sémantiques, structure et contraintes) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Adil</first><last>El Ghali</last></author>
+      <author id="adil-el-ghali"><first>Adil</first><last>El Ghali</last></author>
       <author><first>Kaoutar</first><last>El Ghali</last></author>
       <pages>20-30</pages>
       <url hash="22225d2f">W14-6303</url>
@@ -11171,21 +11171,21 @@
     </paper>
     <paper id="5">
       <title>Fine-grained semantic categorization of opinion expressions for consensus detection (Catégorisation sémantique fine des expressions d’opinion pour la détection de consensus) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Farah</first><last>Benamara</last></author>
-      <author><first>Véronique</first><last>Moriceau</last></author>
-      <author><first>Yvette Yannick</first><last>Mathieu</last></author>
+      <author id="farah-benamara"><first>Farah</first><last>Benamara</last></author>
+      <author id="veronique-moriceau"><first>Véronique</first><last>Moriceau</last></author>
+      <author id="yannick-mathieu"><first>Yvette Yannick</first><last>Mathieu</last></author>
       <pages>36-44</pages>
       <url hash="11d006a0">W14-6305</url>
       <bibkey>benamara-etal-2014-fine</bibkey>
     </paper>
     <paper id="6">
       <title>Classification and Optimization Algorithms: the <fixed-case>LIA</fixed-case>/<fixed-case>ADOC</fixed-case> participation at <fixed-case>DEFT</fixed-case>’14 (Algorithmes de classification et d’optimisation : participation du <fixed-case>LIA</fixed-case>/<fixed-case>ADOC</fixed-case> à <fixed-case>DEFT</fixed-case>’14) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Luis Adrián</first><last>Cabrera-Diego</last></author>
+      <author id="luis-adrian-cabrera-diego"><first>Luis Adrián</first><last>Cabrera-Diego</last></author>
       <author><first>Stéphane</first><last>Huet</last></author>
       <author><first>Bassam</first><last>Jabaian</last></author>
       <author><first>Alejandro</first><last>Molina</last></author>
-      <author><first>Juan-Manuel</first><last>Torres-Moreno</last></author>
-      <author><first>Marc</first><last>El-Bèze</last></author>
+      <author id="juan-manuel-torres-moreno"><first>Juan-Manuel</first><last>Torres-Moreno</last></author>
+      <author id="marc-el-beze"><first>Marc</first><last>El-Bèze</last></author>
       <author><first>Barthélémy</first><last>Durette</last></author>
       <pages>45-52</pages>
       <url hash="28bab534">W14-6306</url>
@@ -11202,7 +11202,7 @@
     </paper>
     <paper id="8">
       <title>Introductory experiments with evolutionary optimization of reflective semantic</title>
-      <author><first>Daniel</first><last>Devatman Hromada</last></author>
+      <author id="daniel-hromada"><first>Daniel</first><last>Devatman Hromada</last></author>
       <pages>64-68</pages>
       <url hash="140b5a08">W14-6308</url>
       <bibkey>devatman-hromada-2014-introductory</bibkey>
@@ -11212,7 +11212,7 @@
     <meta>
       <booktitle><fixed-case>TALN</fixed-case>-<fixed-case>RECITAL</fixed-case> 2014 Workshop <fixed-case>F</fixed-case>ondamen<fixed-case>TAL</fixed-case> 2014 : Ressources lexicales et <fixed-case>TAL</fixed-case> - vue d’ensemble sur les dictionnaires électroniques de Jean <fixed-case>D</fixed-case>ubois et Françoise <fixed-case>D</fixed-case>ubois-<fixed-case>C</fixed-case>harlier (<fixed-case>F</fixed-case>ondamen<fixed-case>TAL</fixed-case> 2014 : Lexical Resources and <fixed-case>NLP</fixed-case>)</booktitle>
       <url hash="73a6677f">W14-64</url>
-      <editor><first>Marie-Hélène</first><last>Stéfanini</last></editor>
+      <editor id="marie-helene-stefanini"><first>Marie-Hélène</first><last>Stéfanini</last></editor>
       <editor><first>Denis</first><last>Le Pesant</last></editor>
       <publisher>Association pour le Traitement Automatique des Langues</publisher>
       <address>Marseille, France</address>
@@ -11242,7 +11242,7 @@
     </paper>
     <paper id="3">
       <title>The Electronic Dictionary <fixed-case>DEM</fixed-case> in <fixed-case>N</fixed-case>oo<fixed-case>J</fixed-case> (Le dictionnaire <fixed-case>DEM</fixed-case> dans <fixed-case>N</fixed-case>oo<fixed-case>J</fixed-case>) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Max</first><last>Silberztein</last></author>
+      <author id="max-silberztein"><first>Max</first><last>Silberztein</last></author>
       <pages>80-84</pages>
       <url hash="8059dd8f">W14-6403</url>
       <bibkey>silberztein-2014-electronic</bibkey>
@@ -11284,7 +11284,7 @@
     <meta>
       <booktitle><fixed-case>TALN</fixed-case>-<fixed-case>RECITAL</fixed-case> 2014 Workshop <fixed-case>TALA</fixed-case>f 2014 : Traitement Automatique des Langues Africaines (<fixed-case>TALA</fixed-case>f 2014: <fixed-case>A</fixed-case>frican Language Processing)</booktitle>
       <url hash="1b756fe6">W14-65</url>
-      <editor><first>Mathieu</first><last>Mangeot</last></editor>
+      <editor id="mathieu-mangeot"><first>Mathieu</first><last>Mangeot</last></editor>
       <editor><first>Fatiha</first><last>Sadat</last></editor>
       <publisher>Association pour le Traitement Automatique des Langues</publisher>
       <address>Marseille, France</address>
@@ -11319,7 +11319,7 @@
     </paper>
     <paper id="4">
       <title><fixed-case>PFM</fixed-case>: Ikota’s morphology implementation in <fixed-case>XMG</fixed-case> (<fixed-case>PFM</fixed-case> : pour une implémentation de la morphologie de l’ikota dans <fixed-case>XMG</fixed-case>) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Brunelle</first><last>Magnana Ekoukou</last></author>
+      <author id="brunelle-magnana-ekoukou"><first>Brunelle</first><last>Magnana Ekoukou</last></author>
       <pages>134-140</pages>
       <url hash="9b5691a9">W14-6504</url>
       <bibkey>magnana-ekoukou-2014-pfm</bibkey>
@@ -11375,14 +11375,14 @@
     <meta>
       <booktitle><fixed-case>TALN</fixed-case>-<fixed-case>RECITAL</fixed-case> 2014 Workshop <fixed-case>S</fixed-case>em<fixed-case>D</fixed-case>is 2014 : Enjeux actuels de la sémantique distributionnelle (<fixed-case>S</fixed-case>em<fixed-case>D</fixed-case>is 2014: Current Challenges in Distributional Semantics)</booktitle>
       <url hash="d389b8b8">W14-66</url>
-      <editor><first>Cécile</first><last>Fabre</last></editor>
+      <editor id="cecile-fabre"><first>Cécile</first><last>Fabre</last></editor>
       <editor><first>Nabil</first><last>Hathout</last></editor>
-      <editor><first>Lydia-Mai</first><last>Ho-Dac</last></editor>
+      <editor id="lydia-mai-ho-dac"><first>Lydia-Mai</first><last>Ho-Dac</last></editor>
       <editor><first>François</first><last>Morlane-Hondère</last></editor>
       <editor><first>Philippe</first><last>Muller</last></editor>
       <editor><first>Franck</first><last>Sajous</last></editor>
       <editor><first>Ludovic</first><last>Tanguy</last></editor>
-      <editor><first>Tim</first><last>Van de Cruys</last></editor>
+      <editor id="tim-van-de-cruys"><first>Tim</first><last>Van de Cruys</last></editor>
       <publisher>Association pour le Traitement Automatique des Langues</publisher>
       <address>Marseille, France</address>
       <month>July</month>
@@ -11463,7 +11463,7 @@
       <booktitle>TALN-RECITAL 2014 Workshop RLTLN 2014 : Réseaux Lexicaux pour le TAL (RLTLN 2014 : Lexical Networks for NLP)</booktitle>
       <url hash="e02a90c8">W14-67</url>
       <editor><first>Michael</first><last>Zock</last></editor>
-      <editor><first>Gemma</first><last>Bel-Enguix</last></editor>
+      <editor id="gemma-bel-enguix"><first>Gemma</first><last>Bel-Enguix</last></editor>
       <editor><first>Reinhard</first><last>Rapp</last></editor>
       <publisher>Association pour le Traitement Automatique des Langues</publisher>
       <address>Marseille, France</address>
@@ -11477,7 +11477,7 @@
     </frontmatter>
     <paper id="1">
       <title>Lexical Networks, Natural Language Processing and Linked Open Data (Réseaux Lexicaux, Traitement des Langues, et Données Liées Ouvertes) [in <fixed-case>F</fixed-case>rench]</title>
-      <author><first>Gilles</first><last>Sérasset</last></author>
+      <author id="gilles-serasset"><first>Gilles</first><last>Sérasset</last></author>
       <pages>280-280</pages>
       <url hash="097f941b">W14-6701</url>
       <bibkey>serasset-2014-lexical</bibkey>
@@ -11496,7 +11496,7 @@
       <author><first>Bruno</first><last>Gaume</last></author>
       <author><first>Emmanuel</first><last>Navarro</last></author>
       <author><first>Yann</first><last>Desalle</last></author>
-      <author><first>Benoît</first><last>Gaillard</last></author>
+      <author id="benoit-gaillard"><first>Benoît</first><last>Gaillard</last></author>
       <pages>291-301</pages>
       <url hash="5a7a0e36">W14-6703</url>
       <bibkey>gaume-etal-2014-measuring</bibkey>
@@ -11529,9 +11529,9 @@
       <booktitle>Proceedings of the Third <fixed-case>CIPS</fixed-case>-<fixed-case>SIGHAN</fixed-case> Joint Conference on <fixed-case>C</fixed-case>hinese Language Processing</booktitle>
       <url hash="46549048">W14-68</url>
       <editor><first>Le</first><last>Sun</last></editor>
-      <editor><first>Chengqing</first><last>Zong</last></editor>
+      <editor id="chengqing-zong"><first>Chengqing</first><last>Zong</last></editor>
       <editor><first>Min</first><last>Zhang</last></editor>
-      <editor><first>Gina-Anne</first><last>Levow</last></editor>
+      <editor id="gina-anne-levow"><first>Gina-Anne</first><last>Levow</last></editor>
       <doi>10.3115/v1/W14-68</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Wuhan, China</address>
@@ -11545,7 +11545,7 @@
     </frontmatter>
     <paper id="1">
       <title>Research on <fixed-case>C</fixed-case>hinese discourse rhetorical structure representation scheme and corpus annotation</title>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>1</pages>
       <url hash="e3f56dc7">W14-6801</url>
       <doi>10.3115/v1/W14-6801</doi>
@@ -11595,7 +11595,7 @@
       <author><first>Peijie</first><last>Huang</last></author>
       <author><first>Xianmao</first><last>Lin</last></author>
       <author><first>Zeqi</first><last>Lian</last></author>
-      <author><first>De</first><last>Yang</last></author>
+      <author id="dechuan-yang"><first>De</first><last>Yang</last></author>
       <author><first>Xiaoling</first><last>Tang</last></author>
       <author><first>Li</first><last>Huang</last></author>
       <author><first>Qiang</first><last>Huang</last></author>
@@ -11642,7 +11642,7 @@
     <paper id="10">
       <title>Bilingual Product Name Dictionary Construction Using a Two Stage Method</title>
       <author><first>Yatian</first><last>Shen</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>61–69</pages>
       <url hash="59d823e4">W14-6810</url>
       <doi>10.3115/v1/W14-6810</doi>
@@ -11752,9 +11752,9 @@
     </paper>
     <paper id="20">
       <title>Overview of <fixed-case>SIGHAN</fixed-case> 2014 Bake-off for <fixed-case>C</fixed-case>hinese Spelling Check</title>
-      <author><first>Liang-Chih</first><last>Yu</last></author>
+      <author id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last></author>
       <author><first>Lung-Hao</first><last>Lee</last></author>
-      <author><first>Yuen-Hsien</first><last>Tseng</last></author>
+      <author id="yuen-hsien-tseng"><first>Yuen-Hsien</first><last>Tseng</last></author>
       <author><first>Hsin-Hsi</first><last>Chen</last></author>
       <pages>126–132</pages>
       <url hash="f3590404">W14-6820</url>
@@ -11768,7 +11768,7 @@
       <author><first>Jianpeng</first><last>Hou</last></author>
       <author><first>Qianbo</first><last>Wang</last></author>
       <author><first>Yuanzhuo</first><last>Wang</last></author>
-      <author><first>Xueqi</first><last>Cheng</last></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last></author>
       <pages>133–138</pages>
       <url hash="16a14dbc">W14-6821</url>
       <doi>10.3115/v1/W14-6821</doi>
@@ -11841,7 +11841,7 @@
       <title>Introduction to <fixed-case>BIT</fixed-case> <fixed-case>C</fixed-case>hinese Spelling Correction System at <fixed-case>CLP</fixed-case> 2014 Bake-off</title>
       <author><first>Min</first><last>Liu</last></author>
       <author><first>Ping</first><last>Jian</last></author>
-      <author><first>Heyan</first><last>Huang</last></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last></author>
       <pages>179–185</pages>
       <url hash="54dad74a">W14-6828</url>
       <doi>10.3115/v1/W14-6828</doi>
@@ -11868,7 +11868,7 @@
       <author><first>Nan-chang</first><last>Cheng</last></author>
       <author><first>Cheng-qing</first><last>Zong</last></author>
       <author><first>Min</first><last>Hou</last></author>
-      <author><first>Yong-lin</first><last>Teng</last></author>
+      <author id="yonglin-teng"><first>Yong-lin</first><last>Teng</last></author>
       <pages>192–201</pages>
       <url hash="ee8e7046">W14-6831</url>
       <doi>10.3115/v1/W14-6831</doi>
@@ -11876,9 +11876,9 @@
     </paper>
     <paper id="32">
       <title><fixed-case>C</fixed-case>hinese Spell Checking Based on Noisy Channel Model</title>
-      <author><first>Hsun-wen</first><last>Chiu</last></author>
-      <author><first>Jian-cheng</first><last>Wu</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="hsun-wen-chiu"><first>Hsun-wen</first><last>Chiu</last></author>
+      <author id="jian-cheng-wu"><first>Jian-cheng</first><last>Wu</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>202–209</pages>
       <url hash="e3eb6bc6">W14-6832</url>
       <doi>10.3115/v1/W14-6832</doi>
@@ -11895,7 +11895,7 @@
     </paper>
     <paper id="34">
       <title><fixed-case>NCTU</fixed-case> and <fixed-case>NTUT</fixed-case>’s Entry to <fixed-case>CLP</fixed-case>-2014 <fixed-case>C</fixed-case>hinese Spelling Check Evaluation</title>
-      <author><first>Yih-Ru</first><last>Wang</last></author>
+      <author id="yih-ru-wang"><first>Yih-Ru</first><last>Wang</last></author>
       <author><first>Yuan-Fu</first><last>Liao</last></author>
       <pages>216–219</pages>
       <url hash="2baab93e">W14-6834</url>
@@ -11916,7 +11916,7 @@
     <meta>
       <booktitle>Proceedings of the Workshop on Natural Language Processing in the 5th Information Systems Research Working Days (<fixed-case>JISIC</fixed-case>)</booktitle>
       <url hash="bbfa134e">W14-69</url>
-      <editor><first>Myriam</first><last>Hernandez</last></editor>
+      <editor id="myriam-hernandez"><first>Myriam</first><last>Hernandez</last></editor>
       <editor><first>Josafá</first><last>de Jesus Aguiar Pontes</last></editor>
       <doi>10.3115/v1/W14-69</doi>
       <publisher>Association for Computational Linguistics</publisher>
@@ -11946,7 +11946,7 @@
     </paper>
     <paper id="3">
       <title>Language Technologies for Suicide Prevention in Social Media</title>
-      <author><first>Jose M.</first><last>Gomez</last></author>
+      <author id="jose-m-gomez"><first>Jose M.</first><last>Gomez</last></author>
       <pages>21-29</pages>
       <url hash="93f380b0">W14-6903</url>
       <doi>10.3115/v1/W14-6903</doi>
@@ -11955,8 +11955,8 @@
     <paper id="4">
       <title>A Supervised Approach for Sentiment Analysis using Skipgrams</title>
       <author><first>Javi</first><last>Fernández</last></author>
-      <author><first>José M.</first><last>Gómez</last></author>
-      <author><first>Patricio</first><last>Martínez-Barco</last></author>
+      <author id="jose-m-gomez"><first>José M.</first><last>Gómez</last></author>
+      <author id="patricio-martinez-barco"><first>Patricio</first><last>Martínez-Barco</last></author>
       <pages>30-36</pages>
       <url hash="6727154b">W14-6904</url>
       <doi>10.3115/v1/W14-6904</doi>
@@ -11965,7 +11965,7 @@
     <paper id="5">
       <title>Emotion Detection from text: A Survey</title>
       <author><first>Lea</first><last>Canales</last></author>
-      <author><first>Patricio</first><last>Martínez-Barco</last></author>
+      <author id="patricio-martinez-barco"><first>Patricio</first><last>Martínez-Barco</last></author>
       <pages>37-43</pages>
       <url hash="0e8e97b9">W14-6905</url>
       <doi>10.3115/v1/W14-6905</doi>
@@ -11979,7 +11979,7 @@
       <editor><first>Hideya</first><last>Mino</last></editor>
       <editor><first>Isao</first><last>Goto</last></editor>
       <editor><first>Sadao</first><last>Kurohashi</last></editor>
-      <editor><first>Eiichiro</first><last>Sumita</last></editor>
+      <editor id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></editor>
       <publisher>Workshop on Asian Translation</publisher>
       <address>Tokyo, Japan</address>
       <month>October</month>
@@ -12037,7 +12037,7 @@
       <author><first>Jingsheng</first><last>Cai</last></author>
       <author><first>Yujie</first><last>Zhang</last></author>
       <author><first>Hua</first><last>Shan</last></author>
-      <author><first>Jinan</first><last>Xu</last></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last></author>
       <url hash="178bd641">W14-7005</url>
       <pages>39–43</pages>
       <bibkey>cai-etal-2014-system</bibkey>
@@ -12066,7 +12066,7 @@
       <author><first>Sho</first><last>Hoshino</last></author>
       <author><first>Hubert</first><last>Soyer</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <url hash="bb7776eb">W14-7008</url>
       <attachment type="poster" hash="8b23413b">W14-7008.Poster.pdf</attachment>
       <pages>55–63</pages>
@@ -12106,7 +12106,7 @@
     <paper id="12">
       <title><fixed-case>K</fixed-case>yoto<fixed-case>EBMT</fixed-case> System Description for the 1st Workshop on <fixed-case>A</fixed-case>sian Translation</title>
       <author><first>John</first><last>Richardson</last></author>
-      <author><first>Fabien</first><last>Cromières</last></author>
+      <author id="fabien-cromieres"><first>Fabien</first><last>Cromières</last></author>
       <author><first>Toshiaki</first><last>Nakazawa</last></author>
       <author><first>Sadao</first><last>Kurohashi</last></author>
       <url hash="1b135bc3">W14-7012</url>
diff --git a/data/xml/W15.xml b/data/xml/W15.xml
index 0c66975ed9..0a2dfc2066 100644
--- a/data/xml/W15.xml
+++ b/data/xml/W15.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the 11th International Conference on Computational Semantics</booktitle>
       <url hash="cfb9bffd">W15-01</url>
       <editor><first>Matthew</first><last>Purver</last></editor>
-      <editor><first>Mehrnoosh</first><last>Sadrzadeh</last></editor>
+      <editor id="mehrnoosh-sadrzadeh"><first>Mehrnoosh</first><last>Sadrzadeh</last></editor>
       <editor><first>Matthew</first><last>Stone</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>London, UK</address>
@@ -37,7 +37,7 @@
     <paper id="3">
       <title>From Adjective Glosses to Attribute Concepts: Learning Different Aspects That an Adjective Can Describe</title>
       <author><first>Omid</first><last>Bakhshandeh</last></author>
-      <author><first>James</first><last>Allen</last></author>
+      <author id="james-allen"><first>James</first><last>Allen</last></author>
       <pages>23–33</pages>
       <url hash="1efa06fa">W15-0103</url>
       <bibkey>bakhshandeh-allen-2015-adjective</bibkey>
@@ -45,7 +45,7 @@
     <paper id="4">
       <title>Exploiting Fine-grained Syntactic Transfer Features to Predict the Compositionality of <fixed-case>G</fixed-case>erman Particle Verbs</title>
       <author><first>Stefan</first><last>Bott</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>34–39</pages>
       <url hash="923a3487">W15-0104</url>
       <bibkey>bott-schulte-im-walde-2015-exploiting</bibkey>
@@ -54,23 +54,23 @@
       <title>Multilingual Reliability and “Semantic” Structure of Continuous Word Spaces</title>
       <author><first>Maximilian</first><last>Köper</last></author>
       <author><first>Christian</first><last>Scheible</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>40–45</pages>
       <url hash="f0816ffb">W15-0105</url>
       <bibkey>koper-etal-2015-multilingual</bibkey>
     </paper>
     <paper id="6">
       <title>Clarifying Intentions in Dialogue: A Corpus Study</title>
-      <author><first>Julian J.</first><last>Schlöder</last></author>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="julian-j-schloder"><first>Julian J.</first><last>Schlöder</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <pages>46–51</pages>
       <url hash="eab15096">W15-0106</url>
       <bibkey>schloder-fernandez-2015-clarifying</bibkey>
     </paper>
     <paper id="7">
       <title>From distributional semantics to feature norms: grounding semantic models in human perceptual data</title>
-      <author><first>Luana</first><last>Fagarasan</last></author>
-      <author><first>Eva Maria</first><last>Vecchi</last></author>
+      <author id="luana-fagarasan"><first>Luana</first><last>Fagarasan</last></author>
+      <author id="eva-maria-vecchi"><first>Eva Maria</first><last>Vecchi</last></author>
       <author><first>Stephen</first><last>Clark</last></author>
       <pages>52–57</pages>
       <url hash="3e2e2bce">W15-0107</url>
@@ -79,7 +79,7 @@
     <paper id="8">
       <title>Obtaining a Better Understanding of Distributional Models of <fixed-case>G</fixed-case>erman Derivational Morphology</title>
       <author><first>Max</first><last>Kisselew</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <author><first>Alexis</first><last>Palmer</last></author>
       <author><first>Jan</first><last>Šnajder</last></author>
       <pages>58–63</pages>
@@ -105,10 +105,10 @@
     <paper id="11">
       <title>Alignment of Eye Movements and Spoken Language for Semantic Image Understanding</title>
       <author><first>Preethi</first><last>Vaidyanathan</last></author>
-      <author><first>Emily</first><last>Prud’hommeaux</last></author>
-      <author><first>Cecilia</first><last>O. Alm</last></author>
+      <author id="emily-prudhommeaux"><first>Emily</first><last>Prud’hommeaux</last></author>
+      <author id="cecilia-ovesdotter-alm"><first>Cecilia</first><last>O. Alm</last></author>
       <author><first>Jeff B.</first><last>Pelz</last></author>
-      <author><first>Anne R.</first><last>Haake</last></author>
+      <author id="anne-haake"><first>Anne R.</first><last>Haake</last></author>
       <pages>76–81</pages>
       <url hash="b7acee92">W15-0111</url>
       <bibkey>vaidyanathan-etal-2015-alignment</bibkey>
@@ -116,7 +116,7 @@
     <paper id="12">
       <title>From a Distance: Using Cross-lingual Word Alignments for Noun Compound Bracketing</title>
       <author><first>Patrick</first><last>Ziering</last></author>
-      <author><first>Lonneke</first><last>van der Plas</last></author>
+      <author id="lonneke-van-der-plas"><first>Lonneke</first><last>van der Plas</last></author>
       <pages>82–87</pages>
       <url hash="ea93bfa8">W15-0112</url>
       <bibkey>ziering-van-der-plas-2015-distance</bibkey>
@@ -124,15 +124,15 @@
     <paper id="13">
       <title>Unsupervised Learning of Coherent and General Semantic Classes for Entity Aggregates</title>
       <author><first>Henry</first><last>Anaya-Sánchez</last></author>
-      <author><first>Anselmo</first><last>Peñas</last></author>
+      <author id="anselmo-penas"><first>Anselmo</first><last>Peñas</last></author>
       <pages>88–93</pages>
       <url hash="54fdcd85">W15-0113</url>
       <bibkey>anaya-sanchez-penas-2015-unsupervised</bibkey>
     </paper>
     <paper id="14">
       <title>Crowdsourced Word Sense Annotations and Difficult Words and Examples</title>
-      <author><first>Oier</first><last>Lopez de Lacalle</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="oier-lopez-de-lacalle"><first>Oier</first><last>Lopez de Lacalle</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <pages>94–100</pages>
       <url hash="3880498d">W15-0114</url>
       <bibkey>lopez-de-lacalle-agirre-2015-crowdsourced</bibkey>
@@ -156,7 +156,7 @@
     </paper>
     <paper id="17">
       <title>Uniform Surprisal at the Level of Discourse Relations: Negation Markers and Discourse Connective Omission</title>
-      <author><first>Fatemeh</first><last>Torabi Asr</last></author>
+      <author id="fatemeh-torabi-asr"><first>Fatemeh</first><last>Torabi Asr</last></author>
       <author><first>Vera</first><last>Demberg</last></author>
       <pages>118–128</pages>
       <url hash="7d446df2">W15-0117</url>
@@ -172,7 +172,7 @@
     </paper>
     <paper id="19">
       <title>On the Proper Treatment of Quantifiers in Probabilistic Logic Semantics</title>
-      <author><first>Islam</first><last>Beltagy</last></author>
+      <author id="islam-beltagy"><first>Islam</first><last>Beltagy</last></author>
       <author><first>Katrin</first><last>Erk</last></author>
       <pages>140–150</pages>
       <url hash="6647b01d">W15-0119</url>
@@ -180,7 +180,7 @@
     </paper>
     <paper id="20">
       <title>Mr Darcy and Mr Toad, gentlemen: distributional names and their kinds</title>
-      <author><first>Aurélie</first><last>Herbelot</last></author>
+      <author id="aurelie-herbelot"><first>Aurélie</first><last>Herbelot</last></author>
       <pages>151–161</pages>
       <url hash="3246e309">W15-0120</url>
       <bibkey>herbelot-2015-mr</bibkey>
@@ -196,7 +196,7 @@
     <paper id="22">
       <title>Automatic Noun Compound Interpretation using Deep Neural Networks and Word Embeddings</title>
       <author><first>Corina</first><last>Dima</last></author>
-      <author><first>Erhard</first><last>Hinrichs</last></author>
+      <author id="erhard-hinrichs"><first>Erhard</first><last>Hinrichs</last></author>
       <pages>173–183</pages>
       <url hash="813ce53d">W15-0122</url>
       <bibkey>dima-hinrichs-2015-automatic</bibkey>
@@ -204,7 +204,7 @@
     <paper id="23">
       <title>Integrating Non-Linguistic Events into Discourse Structure</title>
       <author><first>Julie</first><last>Hunter</last></author>
-      <author><first>Nicholas</first><last>Asher</last></author>
+      <author id="nicholas-asher"><first>Nicholas</first><last>Asher</last></author>
       <author><first>Alex</first><last>Lascarides</last></author>
       <pages>184–194</pages>
       <url hash="4bf6ac4c">W15-0123</url>
@@ -212,7 +212,7 @@
     </paper>
     <paper id="24">
       <title>A Discriminative Model for Perceptually-Grounded Incremental Reference Resolution</title>
-      <author><first>Casey</first><last>Kennington</last></author>
+      <author id="casey-kennington"><first>Casey</first><last>Kennington</last></author>
       <author><first>Livia</first><last>Dia</last></author>
       <author><first>David</first><last>Schlangen</last></author>
       <pages>195–205</pages>
@@ -222,7 +222,7 @@
     <paper id="25">
       <title>Incremental Semantics for Dialogue Processing: Requirements, and a Comparison of Two Approaches</title>
       <author><first>Julian</first><last>Hough</last></author>
-      <author><first>Casey</first><last>Kennington</last></author>
+      <author id="casey-kennington"><first>Casey</first><last>Kennington</last></author>
       <author><first>David</first><last>Schlangen</last></author>
       <author><first>Jonathan</first><last>Ginzburg</last></author>
       <pages>206–216</pages>
@@ -231,7 +231,7 @@
     </paper>
     <paper id="26">
       <title>Semantic Dependency Graph Parsing Using Tree Approximations</title>
-      <author><first>Željko</first><last>Agić</last></author>
+      <author id="zeljko-agic"><first>Željko</first><last>Agić</last></author>
       <author><first>Alexander</first><last>Koller</last></author>
       <author><first>Stephan</first><last>Oepen</last></author>
       <pages>217–227</pages>
@@ -247,8 +247,8 @@
     </paper>
     <paper id="28">
       <title>Layers of Interpretation: On Grammar and Compositionality</title>
-      <author><first>Emily M.</first><last>Bender</last></author>
-      <author><first>Dan</first><last>Flickinger</last></author>
+      <author id="emily-m-bender"><first>Emily M.</first><last>Bender</last></author>
+      <author id="dan-flickinger"><first>Dan</first><last>Flickinger</last></author>
       <author><first>Stephan</first><last>Oepen</last></author>
       <author><first>Woodley</first><last>Packard</last></author>
       <author><first>Ann</first><last>Copestake</last></author>
@@ -258,8 +258,8 @@
     </paper>
     <paper id="29">
       <title>Pragmatic Rejection</title>
-      <author><first>Julian J.</first><last>Schlöder</last></author>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="julian-j-schloder"><first>Julian J.</first><last>Schlöder</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <pages>250–260</pages>
       <url hash="d618a661">W15-0129</url>
       <bibkey>schloder-fernandez-2015-pragmatic</bibkey>
@@ -278,7 +278,7 @@
     <paper id="31">
       <title>Dynamics of Public Commitments in Dialogue</title>
       <author><first>Antoine</first><last>Venant</last></author>
-      <author><first>Nicholas</first><last>Asher</last></author>
+      <author id="nicholas-asher"><first>Nicholas</first><last>Asher</last></author>
       <pages>272–282</pages>
       <url hash="7f91f034">W15-0131</url>
       <bibkey>venant-asher-2015-dynamics</bibkey>
@@ -292,8 +292,8 @@
     </paper>
     <paper id="33">
       <title>How hard is this query? Measuring the Semantic Complexity of Schema-agnostic Queries</title>
-      <author><first>André</first><last>Freitas</last></author>
-      <author><first>Juliano</first><last>Efson Sales</last></author>
+      <author id="andre-freitas"><first>André</first><last>Freitas</last></author>
+      <author id="juliano-efson-sales"><first>Juliano</first><last>Efson Sales</last></author>
       <author><first>Siegfried</first><last>Handschuh</last></author>
       <author><first>Edward</first><last>Curry</last></author>
       <pages>294–304</pages>
@@ -317,7 +317,7 @@
     </frontmatter>
     <paper id="1">
       <title>On the Principles of Semantic Annotation</title>
-      <author><first>Harry</first><last>Bunt</last></author>
+      <author id="harry-bunt"><first>Harry</first><last>Bunt</last></author>
       <url hash="339016ab">W15-0201</url>
       <bibkey>bunt-2015-principles</bibkey>
     </paper>
@@ -339,7 +339,7 @@
     <paper id="4">
       <title>The Semantics of Image Annotation</title>
       <author><first>Julia</first><last>Bosque-Gil</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <url hash="711577d7">W15-0204</url>
       <bibkey>bosque-gil-pustejovsky-2015-semantics</bibkey>
     </paper>
@@ -366,8 +366,8 @@
     <paper id="8">
       <title>A Flexible Tool for Manual Word Sense Annotation</title>
       <author><first>Steven</first><last>Neale</last></author>
-      <author><first>João</first><last>Silva</last></author>
-      <author><first>António</first><last>Branco</last></author>
+      <author id="joao-silva"><first>João</first><last>Silva</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
       <url hash="390676b1">W15-0208</url>
       <bibkey>neale-etal-2015-flexible</bibkey>
     </paper>
@@ -380,7 +380,7 @@
     <paper id="10">
       <title>Semantic Relations in Discourse: The Current State of <fixed-case>ISO</fixed-case> 24617-8</title>
       <author><first>Rashmi</first><last>Prasad</last></author>
-      <author><first>Harry</first><last>Bunt</last></author>
+      <author id="harry-bunt"><first>Harry</first><last>Bunt</last></author>
       <url hash="b2e4c03a">W15-0210</url>
       <bibkey>prasad-bunt-2015-semantic</bibkey>
     </paper>
@@ -388,17 +388,17 @@
       <title>Analysis of Temporal Expressions Annotated in Clinical Notes</title>
       <author><first>Hegler</first><last>Tissot</last></author>
       <author><first>Angus</first><last>Roberts</last></author>
-      <author><first>Leon</first><last>Derczynski</last></author>
-      <author><first>Genevieve</first><last>Gorrell</last></author>
-      <author><first>Marcus Didonet</first><last>Del Fabro</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
+      <author id="genevieve-gorrell"><first>Genevieve</first><last>Gorrell</last></author>
+      <author id="marcos-didonet-del-fabro"><first>Marcus Didonet</first><last>Del Fabro</last></author>
       <url hash="860d1111">W15-0211</url>
       <bibkey>tissot-etal-2015-analysis</bibkey>
     </paper>
     <paper id="12">
       <title>Rapid <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et annotation of spoken conversation transcripts</title>
-      <author><first>Jeremy</first><last>Trione</last></author>
-      <author><first>Frederic</first><last>Bechet</last></author>
-      <author><first>Benoit</first><last>Favre</last></author>
+      <author id="jeremy-trione"><first>Jeremy</first><last>Trione</last></author>
+      <author id="frederic-bechet"><first>Frederic</first><last>Bechet</last></author>
+      <author id="benoit-favre"><first>Benoit</first><last>Favre</last></author>
       <author><first>Alexis</first><last>Nasr</last></author>
       <url hash="55dbae29">W15-0212</url>
       <bibkey>trione-etal-2015-rapid</bibkey>
@@ -419,7 +419,7 @@
     </frontmatter>
     <paper id="1">
       <title>Towards a Unified Approach to Modality Annotation in <fixed-case>P</fixed-case>ortuguese</title>
-      <author><first>Luciana Beatriz</first><last>Ávila</last></author>
+      <author id="luciana-beatriz-avila"><first>Luciana Beatriz</first><last>Ávila</last></author>
       <author><first>Amália</first><last>Mendes</last></author>
       <author><first>Iris</first><last>Hendrickx</last></author>
       <url hash="210f5aed">W15-0301</url>
@@ -427,7 +427,7 @@
     </paper>
     <paper id="2">
       <title>A hedging annotation scheme focused on epistemic phrases for informal language</title>
-      <author><first>Liliana Mamani</first><last>Sanchez</last></author>
+      <author id="liliana-mamani-sanchez"><first>Liliana Mamani</first><last>Sanchez</last></author>
       <author><first>Carl</first><last>Vogel</last></author>
       <url hash="c8506cb3">W15-0302</url>
       <bibkey>sanchez-vogel-2015-hedging</bibkey>
@@ -456,18 +456,18 @@
     </frontmatter>
     <paper id="1">
       <title>Extending <fixed-case>OWL</fixed-case> Ontologies by <fixed-case>C</fixed-case>artesian Types to Represent N-ary Relations in Natural Language</title>
-      <author><first>Hans-Ulrich</first><last>Krieger</last></author>
+      <author id="hans-ulrich-krieger"><first>Hans-Ulrich</first><last>Krieger</last></author>
       <author><first>Christian</first><last>Willms</last></author>
       <url hash="2b03044a">W15-0401</url>
       <bibkey>krieger-willms-2015-extending</bibkey>
     </paper>
     <paper id="2">
       <title>Ontology Authoring Inspired By Dialogue</title>
-      <author><first>Artemis</first><last>Parvizi</last></author>
+      <author id="artemis-parvizi"><first>Artemis</first><last>Parvizi</last></author>
       <author><first>Yuan</first><last>Ren</last></author>
       <author><first>Markel</first><last>Vigo</last></author>
-      <author><first>Kees</first><last>van Deemter</last></author>
-      <author><first>Chris</first><last>Mellish</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
+      <author id="chris-mellish"><first>Chris</first><last>Mellish</last></author>
       <author><first>Jeff Z.</first><last>Pan</last></author>
       <author><first>Robert</first><last>Stevens</last></author>
       <author><first>Caroline</first><last>Jay</last></author>
@@ -483,14 +483,14 @@
     </paper>
     <paper id="4">
       <title>Using Ontologies to Model Polysemy in Lexical Resources</title>
-      <author><first>Fahad</first><last>Khan</last></author>
+      <author id="fahad-khan"><first>Fahad</first><last>Khan</last></author>
       <author><first>Francesca</first><last>Frontini</last></author>
       <url hash="39acd1be">W15-0404</url>
       <bibkey>khan-frontini-2015-using</bibkey>
     </paper>
     <paper id="5">
       <title>Modelling time and space in <fixed-case>B</fixed-case>razilian culture</title>
-      <author><first>Daniel</first><last>Couto-Vale</last></author>
+      <author id="daniel-couto-vale"><first>Daniel</first><last>Couto-Vale</last></author>
       <author><first>Rodrigo</first><last>de Oliveira</last></author>
       <url hash="37e6baee">W15-0405</url>
       <bibkey>couto-vale-de-oliveira-2015-modelling</bibkey>
@@ -506,7 +506,7 @@
     <paper id="7">
       <title>When is Lying the Right Choice?</title>
       <author><first>Federico</first><last>Cerutti</last></author>
-      <author><first>Artemis</first><last>Parvizi</last></author>
+      <author id="artemis-parvizi"><first>Artemis</first><last>Parvizi</last></author>
       <author><first>Alice</first><last>Toniolo</last></author>
       <author><first>Dave</first><last>Braines</last></author>
       <author><first>Geeth R.</first><last>de Mel</last></author>
@@ -524,7 +524,7 @@
     <meta>
       <booktitle>Proceedings of the 2nd Workshop on Argumentation Mining</booktitle>
       <url hash="0a40efcc">W15-05</url>
-      <editor><first>Claire</first><last>Cardie</last></editor>
+      <editor id="claire-cardie"><first>Claire</first><last>Cardie</last></editor>
       <doi>10.3115/v1/W15-05</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Denver, CO</address>
@@ -548,7 +548,7 @@
     </paper>
     <paper id="2">
       <title>Identifying Argumentation Schemes in Genetics Research Articles</title>
-      <author><first>Nancy</first><last>Green</last></author>
+      <author id="nancy-green"><first>Nancy</first><last>Green</last></author>
       <pages>12–21</pages>
       <url hash="db6e92f1">W15-0502</url>
       <doi>10.3115/v1/W15-0502</doi>
@@ -557,7 +557,7 @@
     <paper id="3">
       <title>Extracting Argument and Domain Words for Identifying Argument Components in Texts</title>
       <author id="huy-nguyen-pgh"><first>Huy</first><last>Nguyen</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <pages>22–28</pages>
       <url hash="bc5945dd">W15-0503</url>
       <doi>10.3115/v1/W15-0503</doi>
@@ -575,7 +575,7 @@
     <paper id="5">
       <title>A Shared Task on Argumentation Mining in Newspaper Editorials</title>
       <author><first>Johannes</first><last>Kiesel</last></author>
-      <author><first>Khalid</first><last>Al-Khatib</last></author>
+      <author id="khalid-al-khatib"><first>Khalid</first><last>Al-Khatib</last></author>
       <author><first>Matthias</first><last>Hagen</last></author>
       <author><first>Benno</first><last>Stein</last></author>
       <pages>35–38</pages>
@@ -597,7 +597,7 @@
       <title>A Computational Approach for Generating Toulmin Model Argumentation</title>
       <author><first>Paul</first><last>Reisert</last></author>
       <author><first>Naoya</first><last>Inoue</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Kentaro</first><last>Inui</last></author>
       <pages>45–55</pages>
       <url hash="c65ccb46">W15-0507</url>
@@ -618,7 +618,7 @@
     <paper id="9">
       <title>From Argumentation Mining to Stance Classification</title>
       <author><first>Parinaz</first><last>Sobhani</last></author>
-      <author><first>Diana</first><last>Inkpen</last></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last></author>
       <author><first>Stan</first><last>Matwin</last></author>
       <pages>67–77</pages>
       <url hash="4f99b23a">W15-0509</url>
@@ -628,7 +628,7 @@
     <paper id="10">
       <title>Argument Discovery and Extraction with the Argument Workbench</title>
       <author><first>Adam</first><last>Wyner</last></author>
-      <author><first>Wim</first><last>Peters</last></author>
+      <author id="wim-peters"><first>Wim</first><last>Peters</last></author>
       <author><first>David</first><last>Price</last></author>
       <pages>78–83</pages>
       <url hash="81e4b26c">W15-0510</url>
@@ -683,9 +683,9 @@
       <author><first>Shereen</first><last>Oraby</last></author>
       <author><first>Lena</first><last>Reed</last></author>
       <author><first>Ryan</first><last>Compton</last></author>
-      <author><first>Ellen</first><last>Riloff</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
-      <author><first>Steve</first><last>Whittaker</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
+      <author id="steve-whittaker"><first>Steve</first><last>Whittaker</last></author>
       <pages>116–126</pages>
       <url hash="5087b450">W15-0515</url>
       <doi>10.3115/v1/W15-0515</doi>
@@ -694,7 +694,7 @@
     <paper id="16">
       <title>Combining Argument Mining Techniques</title>
       <author><first>John</first><last>Lawrence</last></author>
-      <author><first>Chris</first><last>Reed</last></author>
+      <author id="chris-reed"><first>Chris</first><last>Reed</last></author>
       <pages>127–136</pages>
       <url hash="ccd300c7">W15-0516</url>
       <doi>10.3115/v1/W15-0516</doi>
@@ -705,7 +705,7 @@
     <meta>
       <booktitle>Proceedings of the Tenth Workshop on Innovative Use of <fixed-case>NLP</fixed-case> for Building Educational Applications</booktitle>
       <url hash="19716273">W15-06</url>
-      <editor><first>Joel</first><last>Tetreault</last></editor>
+      <editor id="joel-tetreault"><first>Joel</first><last>Tetreault</last></editor>
       <editor><first>Jill</first><last>Burstein</last></editor>
       <editor><first>Claudia</first><last>Leacock</last></editor>
       <doi>10.3115/v1/W15-06</doi>
@@ -743,7 +743,7 @@
     <paper id="3">
       <title>Incorporating Coherence of Topics as a Criterion in Automatic Response-to-Text Assessment of the Organization of Writing</title>
       <author><first>Zahra</first><last>Rahimi</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <author><first>Elaine</first><last>Wang</last></author>
       <author><first>Richard</first><last>Correnti</last></author>
       <pages>20–30</pages>
@@ -763,8 +763,8 @@
     <paper id="5">
       <title>Automated Scoring of Picture-based Story Narration</title>
       <author><first>Swapna</first><last>Somasundaran</last></author>
-      <author><first>Chong Min</first><last>Lee</last></author>
-      <author><first>Martin</first><last>Chodorow</last></author>
+      <author id="chungmin-lee"><first>Chong Min</first><last>Lee</last></author>
+      <author id="martin-chodorow"><first>Martin</first><last>Chodorow</last></author>
       <author><first>Xinhao</first><last>Wang</last></author>
       <pages>42–48</pages>
       <url hash="2911d022">W15-0605</url>
@@ -773,7 +773,7 @@
     </paper>
     <paper id="6">
       <title>Measuring Feature Diversity in Native Language Identification</title>
-      <author><first>Shervin</first><last>Malmasi</last></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></author>
       <author><first>Aoife</first><last>Cahill</last></author>
       <pages>49–55</pages>
       <url hash="1184f489">W15-0606</url>
@@ -782,7 +782,7 @@
     </paper>
     <paper id="7">
       <title>Automated Evaluation of Scientific Writing: <fixed-case>AESW</fixed-case> Shared Task Proposal</title>
-      <author><first>Vidas</first><last>Daudaravičius</last></author>
+      <author id="vidas-daudaravicius"><first>Vidas</first><last>Daudaravičius</last></author>
       <pages>56–63</pages>
       <url hash="fee593f1">W15-0607</url>
       <doi>10.3115/v1/W15-0607</doi>
@@ -802,7 +802,7 @@
     </paper>
     <paper id="9">
       <title>Towards Automatic Description of Knowledge Components</title>
-      <author><first>Cyril</first><last>Goutte</last></author>
+      <author id="cyril-goutte"><first>Cyril</first><last>Goutte</last></author>
       <author><first>Guillaume</first><last>Durand</last></author>
       <author><first>Serge</first><last>Léger</last></author>
       <pages>75–80</pages>
@@ -822,9 +822,9 @@
     <paper id="11">
       <title>Interpreting Questions with a Log-Linear Ranking Model in a Virtual Patient Dialogue System</title>
       <author><first>Evan</first><last>Jaffe</last></author>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <author><first>William</first><last>Schuler</last></author>
-      <author><first>Eric</first><last>Fosler-Lussier</last></author>
+      <author id="eric-fosler-lussier"><first>Eric</first><last>Fosler-Lussier</last></author>
       <author><first>Alex</first><last>Rosenfeld</last></author>
       <author><first>Douglas</first><last>Danforth</last></author>
       <pages>86–96</pages>
@@ -836,7 +836,7 @@
       <title>Identifying Patterns For Short Answer Scoring Using Graph-based Lexico-Semantic Text Matching</title>
       <author><first>Lakshmi</first><last>Ramachandran</last></author>
       <author><first>Jian</first><last>Cheng</last></author>
-      <author><first>Peter</first><last>Foltz</last></author>
+      <author id="peter-foltz"><first>Peter</first><last>Foltz</last></author>
       <pages>97–106</pages>
       <url hash="2c9e2fec">W15-0612</url>
       <doi>10.3115/v1/W15-0612</doi>
@@ -854,7 +854,7 @@
     <paper id="14">
       <title>The Jinan <fixed-case>C</fixed-case>hinese Learner Corpus</title>
       <author><first>Maolin</first><last>Wang</last></author>
-      <author><first>Shervin</first><last>Malmasi</last></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></author>
       <author><first>Mingxuan</first><last>Huang</last></author>
       <pages>118–123</pages>
       <url hash="b62161c3">W15-0614</url>
@@ -874,7 +874,7 @@
     <paper id="16">
       <title>Annotation and Classification of Argumentative Writing Revisions</title>
       <author><first>Fan</first><last>Zhang</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <pages>133–143</pages>
       <url hash="acedbe95">W15-0616</url>
       <doi>10.3115/v1/W15-0616</doi>
@@ -883,7 +883,7 @@
     <paper id="17">
       <title>Embarrassed or Awkward? Ranking Emotion Synonyms for <fixed-case>ESL</fixed-case> Learners’ Appropriate Wording</title>
       <author><first>Wei-Fan</first><last>Chen</last></author>
-      <author><first>Mei-Hua</first><last>Chen</last></author>
+      <author id="mei-hua-chen"><first>Mei-Hua</first><last>Chen</last></author>
       <author><first>Lun-Wei</first><last>Ku</last></author>
       <pages>144–153</pages>
       <url hash="981381cb">W15-0617</url>
@@ -893,8 +893,8 @@
     <paper id="18">
       <title><fixed-case>R</fixed-case>ev<fixed-case>UP</fixed-case>: Automatic Gap-Fill Question Generation from Educational Texts</title>
       <author><first>Girish</first><last>Kumar</last></author>
-      <author><first>Rafael</first><last>Banchs</last></author>
-      <author><first>Luis Fernando</first><last>D’Haro</last></author>
+      <author id="rafael-e-banchs"><first>Rafael</first><last>Banchs</last></author>
+      <author id="luis-fernando-dharo"><first>Luis Fernando</first><last>D’Haro</last></author>
       <pages>154–161</pages>
       <url hash="29dee565">W15-0618</url>
       <doi>10.3115/v1/W15-0618</doi>
@@ -903,7 +903,7 @@
     <paper id="19">
       <title>Preliminary Experiments on Crowdsourced Evaluation of Feedback Granularity</title>
       <author><first>Nitin</first><last>Madnani</last></author>
-      <author><first>Martin</first><last>Chodorow</last></author>
+      <author id="martin-chodorow"><first>Martin</first><last>Chodorow</last></author>
       <author><first>Aoife</first><last>Cahill</last></author>
       <author><first>Melissa</first><last>Lopez</last></author>
       <author><first>Yoko</first><last>Futagi</last></author>
@@ -915,7 +915,7 @@
     </paper>
     <paper id="20">
       <title>Oracle and Human Baselines for Native Language Identification</title>
-      <author><first>Shervin</first><last>Malmasi</last></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></author>
       <author><first>Joel</first><last>Tetreault</last></author>
       <author><first>Mark</first><last>Dras</last></author>
       <pages>172–178</pages>
@@ -944,7 +944,7 @@
     </paper>
     <paper id="23">
       <title>Judging the Quality of Automatically Generated Gap-fill Question using Active Learning</title>
-      <author><first>Nobal Bikram</first><last>Niraula</last></author>
+      <author id="nobal-bikram-niraula"><first>Nobal Bikram</first><last>Niraula</last></author>
       <author><first>Vasile</first><last>Rus</last></author>
       <pages>196–206</pages>
       <url hash="c8bc8211">W15-0623</url>
@@ -954,7 +954,7 @@
     <paper id="24">
       <title>Generating Reference Texts for Short Answer Scoring Using Graph-based Summarization</title>
       <author><first>Lakshmi</first><last>Ramachandran</last></author>
-      <author><first>Peter</first><last>Foltz</last></author>
+      <author id="peter-foltz"><first>Peter</first><last>Foltz</last></author>
       <pages>207–212</pages>
       <url hash="ea104667">W15-0624</url>
       <doi>10.3115/v1/W15-0624</doi>
@@ -982,7 +982,7 @@
     <paper id="27">
       <title>Using Learner Data to Improve Error Correction in Adjective–Noun Combinations</title>
       <author><first>Ekaterina</first><last>Kochmar</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <pages>233–242</pages>
       <url hash="68da0736">W15-0627</url>
       <doi>10.3115/v1/W15-0627</doi>
@@ -1012,7 +1012,7 @@
       <url hash="a1bee6b3">W15-07</url>
       <editor><first>Anna</first><last>Feldman</last></editor>
       <editor><first>Anna</first><last>Kazantseva</last></editor>
-      <editor><first>Stan</first><last>Szpakowicz</last></editor>
+      <editor id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></editor>
       <editor><first>Corina</first><last>Koolen</last></editor>
       <doi>10.3115/v1/W15-07</doi>
       <publisher>Association for Computational Linguistics</publisher>
@@ -1028,7 +1028,7 @@
     <paper id="1">
       <title>Tools for Digital Humanities: Enabling Access to the Old <fixed-case>O</fixed-case>ccitan <fixed-case>R</fixed-case>omance of Flamenca</title>
       <author><first>Olga</first><last>Scrivner</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <pages>1–11</pages>
       <url hash="6a8d2e53">W15-0701</url>
       <doi>10.3115/v1/W15-0701</doi>
@@ -1058,7 +1058,7 @@
       <author><first>Prashant</first><last>Jayannavar</last></author>
       <author><first>Apoorv</first><last>Agarwal</last></author>
       <author><first>Melody</first><last>Ju</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>32–41</pages>
       <url hash="c3725682">W15-0704</url>
       <doi>10.3115/v1/W15-0704</doi>
@@ -1077,7 +1077,7 @@
     <paper id="6">
       <title>A Pilot Experiment on Exploiting Translations for Literary Studies on Kafka’s “Verwandlung”</title>
       <author><first>Fabienne</first><last>Cap</last></author>
-      <author><first>Ina</first><last>Rösiger</last></author>
+      <author id="ina-roesiger"><first>Ina</first><last>Rösiger</last></author>
       <author><first>Jonas</first><last>Kuhn</last></author>
       <pages>48–57</pages>
       <url hash="c5562bc7">W15-0706</url>
@@ -1095,7 +1095,7 @@
     </paper>
     <paper id="8">
       <title>Visualizing Poetry with <fixed-case>SPARSAR</fixed-case> – Visual Maps from Poetic Content</title>
-      <author><first>Rodolfo</first><last>Delmonte</last></author>
+      <author id="rodolfo-delmonte"><first>Rodolfo</first><last>Delmonte</last></author>
       <pages>68–78</pages>
       <url hash="2ba24483">W15-0708</url>
       <doi>10.3115/v1/W15-0708</doi>
@@ -1139,7 +1139,7 @@
     </paper>
     <paper id="12">
       <title>A computational linguistic approach to <fixed-case>S</fixed-case>panish Golden Age Sonnets: metrical and semantic aspects</title>
-      <author><first>Borja</first><last>Navarro</last></author>
+      <author id="borja-navarro"><first>Borja</first><last>Navarro</last></author>
       <pages>105–113</pages>
       <url hash="d833e1f6">W15-0712</url>
       <doi>10.3115/v1/W15-0712</doi>
@@ -1147,7 +1147,7 @@
     </paper>
     <paper id="13">
       <title>Automated Translation of a Literary Work: A Pilot Study</title>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <author><first>Lane</first><last>Schwartz</last></author>
       <pages>114–122</pages>
       <url hash="82deebde">W15-0713</url>
@@ -1168,9 +1168,9 @@
     <meta>
       <booktitle>Proceedings of the 3rd Workshop on <fixed-case>EVENTS</fixed-case>: Definition, Detection, Coreference, and Representation</booktitle>
       <url hash="2ba1e53d">W15-08</url>
-      <editor><first>Eduard</first><last>Hovy</last></editor>
+      <editor id="eduard-hovy"><first>Eduard</first><last>Hovy</last></editor>
       <editor><first>Teruko</first><last>Mitamura</last></editor>
-      <editor><first>Martha</first><last>Palmer</last></editor>
+      <editor id="martha-palmer"><first>Martha</first><last>Palmer</last></editor>
       <doi>10.3115/v1/W15-08</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Denver, Colorado</address>
@@ -1184,7 +1184,7 @@
     </frontmatter>
     <paper id="1">
       <title>Translating Granularity of Event Slots into Features for Event Coreference Resolution.</title>
-      <author><first>Agata</first><last>Cybulska</last></author>
+      <author id="agata-cybulska"><first>Agata</first><last>Cybulska</last></author>
       <author><first>Piek</first><last>Vossen</last></author>
       <pages>1–10</pages>
       <url hash="63239e8e">W15-0801</url>
@@ -1193,7 +1193,7 @@
     </paper>
     <paper id="2">
       <title>Word Sense Disambiguation via <fixed-case>P</fixed-case>rop<fixed-case>S</fixed-case>tore and <fixed-case>O</fixed-case>nto<fixed-case>N</fixed-case>otes for Event Mention Detection</title>
-      <author><first>Nicolas R.</first><last>Fauceglia</last></author>
+      <author id="nicolas-r-fauceglia"><first>Nicolas R.</first><last>Fauceglia</last></author>
       <author><first>Yiu-Chang</first><last>Lin</last></author>
       <author><first>Xuezhe</first><last>Ma</last></author>
       <author><first>Eduard</first><last>Hovy</last></author>
@@ -1206,7 +1206,7 @@
       <title>Opposition Relations among Verb Frames</title>
       <author><first>Anna</first><last>Feltracco</last></author>
       <author><first>Elisabetta</first><last>Jezek</last></author>
-      <author><first>Bernardo</first><last>Magnini</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
       <pages>16–24</pages>
       <url hash="3c315d36">W15-0803</url>
       <doi>10.3115/v1/W15-0803</doi>
@@ -1214,8 +1214,8 @@
     </paper>
     <paper id="4">
       <title>Encoding event structure in <fixed-case>U</fixed-case>rdu/<fixed-case>H</fixed-case>indi <fixed-case>V</fixed-case>erb<fixed-case>N</fixed-case>et</title>
-      <author><first>Annette</first><last>Hautli-Janisz</last></author>
-      <author><first>Tracy Holloway</first><last>King</last></author>
+      <author id="annette-hautli"><first>Annette</first><last>Hautli-Janisz</last></author>
+      <author id="tracy-holloway-king"><first>Tracy Holloway</first><last>King</last></author>
       <author><first>Gilian</first><last>Ramchand</last></author>
       <pages>25–33</pages>
       <url hash="ed40455d">W15-0804</url>
@@ -1235,7 +1235,7 @@
     <paper id="6">
       <title>Detecting Causally Embedded Structures Using an Evolutionary Algorithm</title>
       <author><first>Chen</first><last>Li</last></author>
-      <author><first>Roxana</first><last>Girju</last></author>
+      <author id="roxana-girju"><first>Roxana</first><last>Girju</last></author>
       <pages>43–52</pages>
       <url hash="74540fc9">W15-0806</url>
       <doi>10.3115/v1/W15-0806</doi>
@@ -1254,7 +1254,7 @@
     <paper id="8">
       <title>Event analysis for information extraction from business-based technical documents</title>
       <author><first>Bell</first><last>Manrique Losada</last></author>
-      <author><first>Carlos Mario</first><last>Zapata Jaramillo</last></author>
+      <author id="carlos-mario-zapata-jaramillo"><first>Carlos Mario</first><last>Zapata Jaramillo</last></author>
       <pages>58–65</pages>
       <url hash="d3130007">W15-0808</url>
       <doi>10.3115/v1/W15-0808</doi>
@@ -1268,7 +1268,7 @@
       <author><first>Zhiyi</first><last>Song</last></author>
       <author><first>Ann</first><last>Bies</last></author>
       <author><first>Seth</first><last>Kulick</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <pages>66–76</pages>
       <url hash="0ba052ca">W15-0809</url>
       <doi>10.3115/v1/W15-0809</doi>
@@ -1285,7 +1285,7 @@
     <paper id="11">
       <title>Identifying Various Kinds of Event Mentions in K-Parser Output</title>
       <author><first>Arpit</first><last>Sharma</last></author>
-      <author><first>Nguyen</first><last>Vo</last></author>
+      <author id="nguyen-vo"><first>Nguyen</first><last>Vo</last></author>
       <author><first>Somak</first><last>Aditya</last></author>
       <author><first>Chitta</first><last>Baral</last></author>
       <pages>82–88</pages>
@@ -1297,7 +1297,7 @@
       <title>From Light to Rich <fixed-case>ERE</fixed-case>: Annotation of Entities, Relations, and Events</title>
       <author><first>Zhiyi</first><last>Song</last></author>
       <author><first>Ann</first><last>Bies</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <author><first>Tom</first><last>Riese</last></author>
       <author><first>Justin</first><last>Mott</last></author>
       <author><first>Joe</first><last>Ellis</last></author>
@@ -1327,7 +1327,7 @@
       <title>Semantic Interoperability for Cross-lingual and cross-document Event Detection</title>
       <author><first>Piek</first><last>Vossen</last></author>
       <author><first>Egoitz</first><last>Laparra</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <author><first>Itziar</first><last>Aldabe</last></author>
       <pages>108–116</pages>
       <url hash="55aa7f36">W15-0814</url>
@@ -1363,7 +1363,7 @@
     <paper id="1">
       <title>A Method of Accounting Bigrams in Topic Models</title>
       <author><first>Michael</first><last>Nokel</last></author>
-      <author><first>Natalia</first><last>Loukachevitch</last></author>
+      <author id="natalia-loukachevitch"><first>Natalia</first><last>Loukachevitch</last></author>
       <pages>1–9</pages>
       <url hash="60b36835">W15-0901</url>
       <doi>10.3115/v1/W15-0901</doi>
@@ -1383,7 +1383,7 @@
       <author><first>Fabienne</first><last>Cap</last></author>
       <author><first>Manju</first><last>Nirmal</last></author>
       <author><first>Marion</first><last>Weller</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>19–28</pages>
       <url hash="2c6ba6fc">W15-0903</url>
       <doi>10.3115/v1/W15-0903</doi>
@@ -1440,7 +1440,7 @@
       <author><first>Bahar</first><last>Salehi</last></author>
       <author><first>Nitika</first><last>Mathur</last></author>
       <author><first>Paul</first><last>Cook</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>54–59</pages>
       <url hash="3ddcd3b4">W15-0909</url>
       <doi>10.3115/v1/W15-0909</doi>
@@ -1448,8 +1448,8 @@
     </paper>
     <paper id="10">
       <title>The Bare Necessities: Increasing Lexical Coverage for Multi-Word Domain Terms with Less Lexical Data</title>
-      <author><first>Branimir</first><last>Boguraev</last></author>
-      <author><first>Esme</first><last>Manandise</last></author>
+      <author id="branimir-boguraev"><first>Branimir</first><last>Boguraev</last></author>
+      <author id="esmeralda-manandise"><first>Esme</first><last>Manandise</last></author>
       <author><first>Benjamin</first><last>Segal</last></author>
       <pages>60–64</pages>
       <url hash="11303ae4">W15-0910</url>
@@ -1460,7 +1460,7 @@
       <title>Phrase translation using a bilingual dictionary and n-gram data: A case study from <fixed-case>V</fixed-case>ietnamese to <fixed-case>E</fixed-case>nglish</title>
       <author><first>Khang Nhut</first><last>Lam</last></author>
       <author><first>Feras</first><last>Al Tarouti</last></author>
-      <author><first>Jugal</first><last>Kalita</last></author>
+      <author id="jugal-kalita"><first>Jugal</first><last>Kalita</last></author>
       <pages>65–69</pages>
       <url hash="44a1be39">W15-0911</url>
       <doi>10.3115/v1/W15-0911</doi>
@@ -1468,9 +1468,9 @@
     </paper>
     <paper id="12">
       <title>Annotation and Extraction of Multiword Expressions in <fixed-case>T</fixed-case>urkish Treebanks</title>
-      <author><first>Gülşen</first><last>Eryiǧit</last></author>
+      <author id="gulsen-eryigit"><first>Gülşen</first><last>Eryiǧit</last></author>
       <author><first>Kübra</first><last>Adali</last></author>
-      <author><first>Dilara</first><last>Torunoğlu-Selamet</last></author>
+      <author id="dilara-torunoglu-selamet"><first>Dilara</first><last>Torunoğlu-Selamet</last></author>
       <author><first>Umut</first><last>Sulubacak</last></author>
       <author><first>Tuğba</first><last>Pamay</last></author>
       <pages>70–76</pages>
@@ -1516,7 +1516,7 @@
       <url hash="57215f41">W15-10</url>
       <editor><first>Dekai</first><last>Wu</last></editor>
       <editor><first>Marine</first><last>Carpuat</last></editor>
-      <editor><first>Eneko</first><last>Agirre</last></editor>
+      <editor id="eneko-agirre"><first>Eneko</first><last>Agirre</last></editor>
       <editor><first>Nora</first><last>Aranberri</last></editor>
       <doi>10.3115/v1/W15-10</doi>
       <publisher>Association for Computational Linguistics</publisher>
@@ -1541,7 +1541,7 @@
     </paper>
     <paper id="2">
       <title>Non-projective Dependency-based Pre-Reordering with Recurrent Neural Network for Machine Translation</title>
-      <author><first>Antonio Valerio</first><last>Miceli-Barone</last></author>
+      <author id="antonio-valerio-miceli-barone"><first>Antonio Valerio</first><last>Miceli-Barone</last></author>
       <author><first>Giuseppe</first><last>Attardi</last></author>
       <pages>10–20</pages>
       <url hash="a41fccec">W15-1002</url>
@@ -1551,7 +1551,7 @@
     <paper id="3">
       <title>Translating Negation: Induction, Search And Model Errors</title>
       <author><first>Federico</first><last>Fancellu</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <pages>21–29</pages>
       <url hash="fbbae6fd">W15-1003</url>
       <doi>10.3115/v1/W15-1003</doi>
@@ -1569,7 +1569,7 @@
       <title>Unsupervised False Friend Disambiguation Using Contextual Word Clusters and Parallel Word Alignments</title>
       <author><first>Maryam</first><last>Aminian</last></author>
       <author><first>Mahmoud</first><last>Ghoneim</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>39–48</pages>
       <url hash="8c74d635">W15-1005</url>
       <doi>10.3115/v1/W15-1005</doi>
@@ -1588,8 +1588,8 @@
       <title>Analyzing <fixed-case>E</fixed-case>nglish-<fixed-case>S</fixed-case>panish Named-Entity enhanced Machine Translation</title>
       <author><first>Mikel</first><last>Artetxe</last></author>
       <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>Inaki</first><last>Alegria</last></author>
-      <author><first>Gorka</first><last>Labaka</last></author>
+      <author id="inaki-alegria"><first>Inaki</first><last>Alegria</last></author>
+      <author id="gorka-labaka"><first>Gorka</first><last>Labaka</last></author>
       <pages>52–54</pages>
       <url hash="b37b2241">W15-1007</url>
       <doi>10.3115/v1/W15-1007</doi>
@@ -1598,8 +1598,8 @@
     <paper id="8">
       <title>Predicting Prepositions for <fixed-case>SMT</fixed-case></title>
       <author><first>Marion</first><last>Weller</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>55–56</pages>
       <url hash="c895b7a1">W15-1008</url>
       <doi>10.3115/v1/W15-1008</doi>
@@ -1607,7 +1607,7 @@
     </paper>
     <paper id="9">
       <title>Translation reranking using source phrase dependency features</title>
-      <author><first>Antonio Valerio</first><last>Miceli-Barone</last></author>
+      <author id="antonio-valerio-miceli-barone"><first>Antonio Valerio</first><last>Miceli-Barone</last></author>
       <pages>57–60</pages>
       <url hash="69c1c400">W15-1009</url>
       <doi>10.3115/v1/W15-1009</doi>
@@ -1635,7 +1635,7 @@
     <paper id="12">
       <title>Improving <fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish <fixed-case>P</fixed-case>rop<fixed-case>B</fixed-case>ank Alignment</title>
       <author><first>Shumin</first><last>Wu</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>74–82</pages>
       <url hash="7584bd4e">W15-1012</url>
       <doi>10.3115/v1/W15-1012</doi>
@@ -1646,8 +1646,8 @@
     <meta>
       <booktitle>Proceedings of the 6th Workshop on Cognitive Modeling and Computational Linguistics</booktitle>
       <url hash="109f670d">W15-11</url>
-      <editor><first>Tim</first><last>O’Donnell</last></editor>
-      <editor><first>Marten</first><last>van Schijndel</last></editor>
+      <editor id="timothy-odonnell"><first>Tim</first><last>O’Donnell</last></editor>
+      <editor id="marten-van-schijndel"><first>Marten</first><last>van Schijndel</last></editor>
       <doi>10.3115/v1/W15-11</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Denver, Colorado</address>
@@ -1661,7 +1661,7 @@
     </frontmatter>
     <paper id="1">
       <title>Predictions for self-priming from incremental updating models unifying comprehension and production</title>
-      <author><first>Cassandra L.</first><last>Jacobs</last></author>
+      <author id="cassandra-l-jacobs"><first>Cassandra L.</first><last>Jacobs</last></author>
       <pages>1–8</pages>
       <url hash="61c7f7a7">W15-1101</url>
       <doi>10.3115/v1/W15-1101</doi>
@@ -1680,7 +1680,7 @@
     <paper id="3">
       <title>Audience size and contextual effects on information density in <fixed-case>T</fixed-case>witter conversations</title>
       <author><first>Gabriel</first><last>Doyle</last></author>
-      <author><first>Michael</first><last>Frank</last></author>
+      <author id="michael-c-frank"><first>Michael</first><last>Frank</last></author>
       <pages>19–28</pages>
       <url hash="90c0929a">W15-1103</url>
       <doi>10.3115/v1/W15-1103</doi>
@@ -1689,7 +1689,7 @@
     <paper id="4">
       <title>Centre Stage: How Social Network Position Shapes Linguistic Coordination</title>
       <author><first>Bill</first><last>Noble</last></author>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <pages>29–38</pages>
       <url hash="7f67bc54">W15-1104</url>
       <doi>10.3115/v1/W15-1104</doi>
@@ -1709,7 +1709,7 @@
       <title>Verb polysemy and frequency effects in thematic fit modeling</title>
       <author><first>Clayton</first><last>Greenberg</last></author>
       <author><first>Vera</first><last>Demberg</last></author>
-      <author><first>Asad</first><last>Sayeed</last></author>
+      <author id="asad-sayeed"><first>Asad</first><last>Sayeed</last></author>
       <pages>48–57</pages>
       <url hash="245ffe51">W15-1106</url>
       <doi>10.3115/v1/W15-1106</doi>
@@ -1745,7 +1745,7 @@
     </paper>
     <paper id="10">
       <title>Modeling f<fixed-case>MRI</fixed-case> time courses with linguistic structure at various grain sizes</title>
-      <author><first>John</first><last>Hale</last></author>
+      <author id="john-hale"><first>John</first><last>Hale</last></author>
       <author><first>David</first><last>Lutz</last></author>
       <author><first>Wen-Ming</first><last>Luh</last></author>
       <author><first>Jonathan</first><last>Brennan</last></author>
@@ -1793,14 +1793,14 @@
     </paper>
     <paper id="3">
       <title>The role of personality, age, and gender in tweeting about mental illness</title>
-      <author><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
+      <author id="daniel-preotiuc-pietro"><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
       <author><first>Johannes</first><last>Eichstaedt</last></author>
       <author><first>Gregory</first><last>Park</last></author>
       <author><first>Maarten</first><last>Sap</last></author>
       <author><first>Laura</first><last>Smith</last></author>
       <author><first>Victoria</first><last>Tobolsky</last></author>
-      <author><first>H. Andrew</first><last>Schwartz</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <pages>21–30</pages>
       <url hash="95e5b800">W15-1203</url>
       <doi>10.3115/v1/W15-1203</doi>
@@ -1821,10 +1821,10 @@
     </paper>
     <paper id="5">
       <title>Mental Illness Detection at the World Well-Being Project for the <fixed-case>CLP</fixed-case>sych 2015 Shared Task</title>
-      <author><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
+      <author id="daniel-preotiuc-pietro"><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
       <author><first>Maarten</first><last>Sap</last></author>
-      <author><first>H. Andrew</first><last>Schwartz</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <pages>40–45</pages>
       <url hash="9376c46a">W15-1205</url>
       <doi>10.3115/v1/W15-1205</doi>
@@ -1884,7 +1884,7 @@
     </paper>
     <paper id="11">
       <title>Towards Developing an Annotation Scheme for Depressive Disorder Symptoms: A Preliminary Study using <fixed-case>T</fixed-case>witter Data</title>
-      <author><first>Danielle</first><last>Mowery</last></author>
+      <author id="danielle-l-mowery"><first>Danielle</first><last>Mowery</last></author>
       <author><first>Craig</first><last>Bryan</last></author>
       <author><first>Mike</first><last>Conway</last></author>
       <pages>89–98</pages>
@@ -1924,7 +1924,7 @@
     <paper id="14">
       <title>Similarity Measures for Quantifying Restrictive and Repetitive Behavior in Conversations of Autistic Children</title>
       <author><first>Masoud</first><last>Rouhizadeh</last></author>
-      <author><first>Richard</first><last>Sproat</last></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last></author>
       <author><first>Jan</first><last>van Santen</last></author>
       <pages>117–123</pages>
       <url hash="7161fc05">W15-1214</url>
@@ -1934,7 +1934,7 @@
     <paper id="15">
       <title>Practical issues in developing semantic frameworks for the analysis of verbal fluency data: A <fixed-case>N</fixed-case>orwegian data case study</title>
       <author><first>Mark</first><last>Rosenstein</last></author>
-      <author><first>Peter</first><last>Foltz</last></author>
+      <author id="peter-foltz"><first>Peter</first><last>Foltz</last></author>
       <author><first>Anja</first><last>Vaskinn</last></author>
       <author><first>Brita</first><last>Elvevåg</last></author>
       <pages>124–133</pages>
@@ -1983,7 +1983,7 @@
     <paper id="1">
       <title>Translating Negation: A Manual Error Analysis</title>
       <author><first>Federico</first><last>Fancellu</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <pages>2–11</pages>
       <url hash="8bf81440">W15-1301</url>
       <doi>10.3115/v1/W15-1301</doi>
@@ -2002,7 +2002,7 @@
       <author><first>Halil</first><last>Kilicoglu</last></author>
       <author><first>Graciela</first><last>Rosemblat</last></author>
       <author><first>Michael</first><last>Cairelli</last></author>
-      <author><first>Thomas</first><last>Rindflesch</last></author>
+      <author id="thomas-c-rindflesch"><first>Thomas</first><last>Rindflesch</last></author>
       <pages>22–31</pages>
       <url hash="fa1d7bcb">W15-1303</url>
       <doi>10.3115/v1/W15-1303</doi>
@@ -2012,8 +2012,8 @@
       <title>Committed Belief Tagging on the Factbank and <fixed-case>LU</fixed-case> Corpora: A Comparative Study</title>
       <author><first>Gregory</first><last>Werner</last></author>
       <author><first>Vinodkumar</first><last>Prabhakaran</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>32–40</pages>
       <url hash="de598afb">W15-1304</url>
       <doi>10.3115/v1/W15-1304</doi>
@@ -2022,9 +2022,9 @@
     <paper id="5">
       <title>Extending <fixed-case>N</fixed-case>eg<fixed-case>E</fixed-case>x with Kernel Methods for Negation Detection in Clinical Text</title>
       <author><first>Chaitanya</first><last>Shivade</last></author>
-      <author><first>Marie-Catherine</first><last>de Marneffe</last></author>
-      <author><first>Eric</first><last>Fosler-Lussier</last></author>
-      <author><first>Albert M.</first><last>Lai</last></author>
+      <author id="marie-catherine-de-marneffe"><first>Marie-Catherine</first><last>de Marneffe</last></author>
+      <author id="eric-fosler-lussier"><first>Eric</first><last>Fosler-Lussier</last></author>
+      <author id="albert-m-lai"><first>Albert M.</first><last>Lai</last></author>
       <pages>41–46</pages>
       <url hash="b61c60a2">W15-1305</url>
       <doi>10.3115/v1/W15-1305</doi>
@@ -2053,7 +2053,7 @@
       <title>Effects of Situational Factors on Metaphor Detection in an Online Discussion Forum</title>
       <author><first>Hyeju</first><last>Jang</last></author>
       <author><first>Miaomiao</first><last>Wen</last></author>
-      <author><first>Carolyn</first><last>Rosé</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rosé</last></author>
       <pages>1–10</pages>
       <url hash="fbb19d7c">W15-1401</url>
       <doi>10.3115/v1/W15-1401</doi>
@@ -2062,7 +2062,7 @@
     <paper id="2">
       <title>Supervised Word-Level Metaphor Detection: Experiments with Concreteness and Reweighting of Examples</title>
       <author><first>Beata</first><last>Beigman Klebanov</last></author>
-      <author><first>Chee Wee</first><last>Leong</last></author>
+      <author id="chee-wee-leong"><first>Chee Wee</first><last>Leong</last></author>
       <author><first>Michael</first><last>Flor</last></author>
       <pages>11–20</pages>
       <url hash="db08d0fd">W15-1402</url>
@@ -2072,7 +2072,7 @@
     <paper id="3">
       <title>Modeling the interaction between sensory and affective meanings for detecting metaphor</title>
       <author><first>Andrew</first><last>Gargett</last></author>
-      <author><first>John</first><last>Barnden</last></author>
+      <author id="john-barnden"><first>John</first><last>Barnden</last></author>
       <pages>21–30</pages>
       <url hash="92538c80">W15-1403</url>
       <doi>10.3115/v1/W15-1403</doi>
@@ -2081,7 +2081,7 @@
     <paper id="4">
       <title>Exploring Sensorial Features for Metaphor Identification</title>
       <author><first>Serra Sinem</first><last>Tekiroğlu</last></author>
-      <author><first>Gözde</first><last>Özbal</last></author>
+      <author id="gozde-ozbal"><first>Gözde</first><last>Özbal</last></author>
       <author><first>Carlo</first><last>Strapparava</last></author>
       <pages>31–39</pages>
       <url hash="42087967">W15-1404</url>
@@ -2090,7 +2090,7 @@
     </paper>
     <paper id="5">
       <title><fixed-case>M</fixed-case>eta<fixed-case>N</fixed-case>et: Deep semantic automatic metaphor analysis</title>
-      <author><first>Ellen</first><last>Dodge</last></author>
+      <author id="ellen-k-dodge"><first>Ellen</first><last>Dodge</last></author>
       <author><first>Jisup</first><last>Hong</last></author>
       <author><first>Elise</first><last>Stickles</last></author>
       <pages>40–49</pages>
@@ -2101,7 +2101,7 @@
     <paper id="6">
       <title>High-Precision Abductive Mapping of Multilingual Metaphors</title>
       <author><first>Jonathan</first><last>Gordon</last></author>
-      <author><first>Jerry</first><last>Hobbs</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry</first><last>Hobbs</last></author>
       <author><first>Jonathan</first><last>May</last></author>
       <author><first>Fabrizio</first><last>Morbini</last></author>
       <pages>50–55</pages>
@@ -2112,7 +2112,7 @@
     <paper id="7">
       <title>A Corpus of Rich Metaphor Annotation</title>
       <author><first>Jonathan</first><last>Gordon</last></author>
-      <author><first>Jerry</first><last>Hobbs</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry</first><last>Hobbs</last></author>
       <author><first>Jonathan</first><last>May</last></author>
       <author><first>Michael</first><last>Mohler</last></author>
       <author><first>Fabrizio</first><last>Morbini</last></author>
@@ -2127,12 +2127,12 @@
     <paper id="8">
       <title>Understanding Cultural Conflicts using Metaphors and Sociolinguistic Measures of Influence</title>
       <author><first>Samira</first><last>Shaikh</last></author>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
-      <author><first>Sarah</first><last>Taylor</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="sarah-taylor"><first>Sarah</first><last>Taylor</last></author>
       <author><first>John</first><last>Lien</last></author>
       <author><first>Ting</first><last>Liu</last></author>
-      <author><first>George Aaron</first><last>Broadwell</last></author>
-      <author><first>Laurie</first><last>Feldman</last></author>
+      <author id="george-aaron-broadwell"><first>George Aaron</first><last>Broadwell</last></author>
+      <author id="laurie-feldman"><first>Laurie</first><last>Feldman</last></author>
       <author><first>Boris</first><last>Yamrom</last></author>
       <author><first>Kit</first><last>Cho</last></author>
       <author><first>Yuliya</first><last>Peshkova</last></author>
@@ -2165,9 +2165,9 @@
     <meta>
       <booktitle>Proceedings of the 1st Workshop on Vector Space Modeling for Natural Language Processing</booktitle>
       <url hash="7eb64095">W15-15</url>
-      <editor><first>Phil</first><last>Blunsom</last></editor>
-      <editor><first>Shay</first><last>Cohen</last></editor>
-      <editor><first>Paramveer</first><last>Dhillon</last></editor>
+      <editor id="phil-blunsom"><first>Phil</first><last>Blunsom</last></editor>
+      <editor id="shay-b-cohen"><first>Shay</first><last>Cohen</last></editor>
+      <editor id="paramveer-s-dhillon"><first>Paramveer</first><last>Dhillon</last></editor>
       <editor><first>Percy</first><last>Liang</last></editor>
       <doi>10.3115/v1/W15-15</doi>
       <publisher>Association for Computational Linguistics</publisher>
@@ -2233,7 +2233,7 @@
     <paper id="6">
       <title>Relation Extraction: Perspective from Convolutional Neural Networks</title>
       <author><first>Thien Huu</first><last>Nguyen</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <pages>39–48</pages>
       <url hash="dc65fda1">W15-1506</url>
       <doi>10.3115/v1/W15-1506</doi>
@@ -2251,8 +2251,8 @@
     </paper>
     <paper id="8">
       <title>A Deep Architecture for Non-Projective Dependency Parsing</title>
-      <author><first>Erick</first><last>Fonseca</last></author>
-      <author><first>Sandra</first><last>Aluísio</last></author>
+      <author id="erick-fonseca"><first>Erick</first><last>Fonseca</last></author>
+      <author id="sandra-aluisio"><first>Sandra</first><last>Aluísio</last></author>
       <pages>56–61</pages>
       <url hash="35c27c3a">W15-1508</url>
       <doi>10.3115/v1/W15-1508</doi>
@@ -2275,7 +2275,7 @@
     <paper id="10">
       <title>A Word-Embedding-based Sense Index for Regular Polysemy Representation</title>
       <author><first>Marco</first><last>Del Tredici</last></author>
-      <author><first>Núria</first><last>Bel</last></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last></author>
       <pages>70–78</pages>
       <url hash="da9bd61f">W15-1510</url>
       <doi>10.3115/v1/W15-1510</doi>
@@ -2284,7 +2284,7 @@
     <paper id="11">
       <title>Simple Semi-Supervised <fixed-case>POS</fixed-case> Tagging</title>
       <author><first>Karl</first><last>Stratos</last></author>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <pages>79–87</pages>
       <url hash="6f36fafe">W15-1511</url>
       <doi>10.3115/v1/W15-1511</doi>
@@ -2293,8 +2293,8 @@
     <paper id="12">
       <title>Learning Distributed Representations for Multilingual Text Sequences</title>
       <author><first>Hieu</first><last>Pham</last></author>
-      <author><first>Thang</first><last>Luong</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="minh-thang-luong"><first>Thang</first><last>Luong</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <pages>88–94</pages>
       <url hash="a530bd65">W15-1512</url>
       <doi>10.3115/v1/W15-1512</doi>
@@ -2331,7 +2331,7 @@
       <title>A Vector Space Approach for Aspect Based Sentiment Analysis</title>
       <author><first>Abdulaziz</first><last>Alghunaim</last></author>
       <author><first>Mitra</first><last>Mohtarami</last></author>
-      <author><first>Scott</first><last>Cyphers</last></author>
+      <author id="scott-cyphers"><first>Scott</first><last>Cyphers</last></author>
       <author><first>Jim</first><last>Glass</last></author>
       <pages>116–122</pages>
       <url hash="cd68d6b9">W15-1516</url>
@@ -2362,7 +2362,7 @@
     <paper id="19">
       <title>Towards Combined Matrix and Tensor Factorization for Universal Schema Relation Extraction</title>
       <author><first>Sameer</first><last>Singh</last></author>
-      <author><first>Tim</first><last>Rocktäschel</last></author>
+      <author id="tim-rocktaschel"><first>Tim</first><last>Rocktäschel</last></author>
       <author><first>Sebastian</first><last>Riedel</last></author>
       <pages>135–142</pages>
       <url hash="4aa4b151">W15-1519</url>
@@ -2372,8 +2372,8 @@
     <paper id="20">
       <title>Neural word embeddings with multiplicative feature interactions for tensor-based compositions</title>
       <author><first>Joo-Kyung</first><last>Kim</last></author>
-      <author><first>Marie-Catherine</first><last>de Marneffe</last></author>
-      <author><first>Eric</first><last>Fosler-Lussier</last></author>
+      <author id="marie-catherine-de-marneffe"><first>Marie-Catherine</first><last>de Marneffe</last></author>
+      <author id="eric-fosler-lussier"><first>Eric</first><last>Fosler-Lussier</last></author>
       <pages>143–150</pages>
       <url hash="6423103d">W15-1520</url>
       <doi>10.3115/v1/W15-1520</doi>
@@ -2381,9 +2381,9 @@
     </paper>
     <paper id="21">
       <title>Bilingual Word Representations with Monolingual Quality in Mind</title>
-      <author><first>Thang</first><last>Luong</last></author>
+      <author id="minh-thang-luong"><first>Thang</first><last>Luong</last></author>
       <author><first>Hieu</first><last>Pham</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>151–159</pages>
       <url hash="14f39c2f">W15-1521</url>
       <doi>10.3115/v1/W15-1521</doi>
@@ -2394,7 +2394,7 @@
       <author><first>Mahesh</first><last>Joshi</last></author>
       <author><first>Ethan</first><last>Hart</last></author>
       <author><first>Mirko</first><last>Vogel</last></author>
-      <author><first>Jean-David</first><last>Ruvini</last></author>
+      <author id="jean-david-ruvini"><first>Jean-David</first><last>Ruvini</last></author>
       <pages>160–167</pages>
       <url hash="0c25532a">W15-1522</url>
       <doi>10.3115/v1/W15-1522</doi>
@@ -2412,7 +2412,7 @@
     <paper id="24">
       <title>Named Entity Recognition for <fixed-case>A</fixed-case>rabic Social Media</title>
       <author><first>Ayah</first><last>Zirikly</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>176–185</pages>
       <url hash="fe606de7">W15-1524</url>
       <doi>10.3115/v1/W15-1524</doi>
@@ -2420,8 +2420,8 @@
     </paper>
     <paper id="25">
       <title>Vector Space Models for Scientific Document Summarization</title>
-      <author><first>John</first><last>Conroy</last></author>
-      <author><first>Sashka</first><last>Davis</last></author>
+      <author id="john-conroy"><first>John</first><last>Conroy</last></author>
+      <author id="sashka-t-davis"><first>Sashka</first><last>Davis</last></author>
       <pages>186–191</pages>
       <url hash="44e73364">W15-1525</url>
       <doi>10.3115/v1/W15-1525</doi>
@@ -2438,7 +2438,7 @@
     <paper id="27">
       <title>Estimating User Location in Social Media with Stacked Denoising Auto-encoders</title>
       <author><first>Ji</first><last>Liu</last></author>
-      <author><first>Diana</first><last>Inkpen</last></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last></author>
       <pages>201–210</pages>
       <url hash="002c0e0a">W15-1527</url>
       <doi>10.3115/v1/W15-1527</doi>
@@ -2449,7 +2449,7 @@
     <meta>
       <booktitle>Proceedings of the 9th Linguistic Annotation Workshop</booktitle>
       <url hash="c72e3e9a">W15-16</url>
-      <editor><first>Adam</first><last>Meyers</last></editor>
+      <editor id="adam-meyers"><first>Adam</first><last>Meyers</last></editor>
       <editor><first>Ines</first><last>Rehbein</last></editor>
       <editor><first>Heike</first><last>Zinsmeister</last></editor>
       <doi>10.3115/v1/W15-16</doi>
@@ -2466,9 +2466,9 @@
     <paper id="1">
       <title>Scaling Semantic Frame Annotation</title>
       <author><first>Nancy</first><last>Chang</last></author>
-      <author><first>Praveen</first><last>Paritosh</last></author>
+      <author id="praveen-paritosh"><first>Praveen</first><last>Paritosh</last></author>
       <author><first>David</first><last>Huynh</last></author>
-      <author><first>Collin F.</first><last>Baker</last></author>
+      <author id="collin-f-baker"><first>Collin F.</first><last>Baker</last></author>
       <pages>1–10</pages>
       <url hash="6d7b3147">W15-1601</url>
       <doi>10.3115/v1/W15-1601</doi>
@@ -2477,7 +2477,7 @@
     <paper id="2">
       <title>An Analytic and Empirical Evaluation of Return-on-Investment-Based Active Learning</title>
       <author><first>Robbie</first><last>Haertel</last></author>
-      <author><first>Eric</first><last>Ringger</last></author>
+      <author id="eric-ringger"><first>Eric</first><last>Ringger</last></author>
       <author><first>Kevin</first><last>Seppi</last></author>
       <author><first>Paul</first><last>Felt</last></author>
       <pages>11–20</pages>
@@ -2501,7 +2501,7 @@
       <author><first>Dominique</first><last>Brunato</last></author>
       <author><first>Felice</first><last>Dell’Orletta</last></author>
       <author><first>Giulia</first><last>Venturi</last></author>
-      <author><first>Simonetta</first><last>Montemagni</last></author>
+      <author id="simonetta-montemagni"><first>Simonetta</first><last>Montemagni</last></author>
       <pages>31–41</pages>
       <url hash="c7757eea">W15-1604</url>
       <doi>10.3115/v1/W15-1604</doi>
@@ -2531,19 +2531,19 @@
     </paper>
     <paper id="7">
       <title>A Qualitative Analysis of a Corpus of Opinion Summaries based on Aspects</title>
-      <author><first>Roque</first><last>López</last></author>
+      <author id="roque-lopez-condori"><first>Roque</first><last>López</last></author>
       <author><first>Thiago</first><last>Pardo</last></author>
       <author><first>Lucas</first><last>Avanço</last></author>
-      <author><first>Pedro</first><last>Filho</last></author>
+      <author id="pedro-balage-filho"><first>Pedro</first><last>Filho</last></author>
       <author><first>Alessandro</first><last>Bokan</last></author>
-      <author><first>Paula</first><last>Cardoso</last></author>
+      <author id="paula-cardoso"><first>Paula</first><last>Cardoso</last></author>
       <author><first>Márcio</first><last>Dias</last></author>
       <author><first>Fernando</first><last>Nóbrega</last></author>
       <author><first>Marco</first><last>Cabezudo</last></author>
-      <author><first>Jackson</first><last>Souza</last></author>
+      <author id="jackson-souza"><first>Jackson</first><last>Souza</last></author>
       <author><first>Andressa</first><last>Zacarias</last></author>
       <author><first>Eloize</first><last>Seno</last></author>
-      <author><first>Ariani</first><last>Di Felippo</last></author>
+      <author id="ariani-di-felippo"><first>Ariani</first><last>Di Felippo</last></author>
       <pages>62–71</pages>
       <url hash="2123e8f8">W15-1607</url>
       <doi>10.3115/v1/W15-1607</doi>
@@ -2564,7 +2564,7 @@
       <title>Annotating Geographical Entities on Microblog Text</title>
       <author><first>Koji</first><last>Matsuda</last></author>
       <author><first>Akira</first><last>Sasaki</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Kentaro</first><last>Inui</last></author>
       <pages>85–94</pages>
       <url hash="fc8c8ec5">W15-1609</url>
@@ -2575,8 +2575,8 @@
       <title>The Annotation Process of the <fixed-case>ITU</fixed-case> Web Treebank</title>
       <author><first>Tuğba</first><last>Pamay</last></author>
       <author><first>Umut</first><last>Sulubacak</last></author>
-      <author><first>Dilara</first><last>Torunoğlu-Selamet</last></author>
-      <author><first>Gülşen</first><last>Eryiğit</last></author>
+      <author id="dilara-torunoglu-selamet"><first>Dilara</first><last>Torunoğlu-Selamet</last></author>
+      <author id="gulsen-eryigit"><first>Gülşen</first><last>Eryiğit</last></author>
       <pages>95–101</pages>
       <url hash="ea9bdd6e">W15-1610</url>
       <doi>10.3115/v1/W15-1610</doi>
@@ -2599,7 +2599,7 @@
       <author><first>Nathan</first><last>Schneider</last></author>
       <author><first>Vivek</first><last>Srikumar</last></author>
       <author><first>Jena D.</first><last>Hwang</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>112–123</pages>
       <url hash="d50e2126">W15-1612</url>
       <doi>10.3115/v1/W15-1612</doi>
@@ -2607,11 +2607,11 @@
     </paper>
     <paper id="13">
       <title>Bilingual <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>zech Valency Lexicon Linked to a Parallel Corpus</title>
-      <author><first>Zdeňka</first><last>Urešová</last></author>
+      <author id="zdenka-uresova"><first>Zdeňka</first><last>Urešová</last></author>
       <author><first>Ondřej</first><last>Dušek</last></author>
-      <author><first>Eva</first><last>Fučíková</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
-      <author><first>Jana</first><last>Šindlerová</last></author>
+      <author id="eva-fucikova"><first>Eva</first><last>Fučíková</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
+      <author id="jana-sindlerova"><first>Jana</first><last>Šindlerová</last></author>
       <pages>124–128</pages>
       <url hash="62b316e7">W15-1613</url>
       <doi>10.3115/v1/W15-1613</doi>
@@ -2649,9 +2649,9 @@
     </paper>
     <paper id="17">
       <title>Non-canonical language is not harder to annotate than canonical language</title>
-      <author><first>Barbara</first><last>Plank</last></author>
-      <author><first>Héctor</first><last>Martínez Alonso</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
+      <author id="hector-martinez-alonso"><first>Héctor</first><last>Martínez Alonso</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>148–151</pages>
       <url hash="8492905e">W15-1617</url>
       <doi>10.3115/v1/W15-1617</doi>
@@ -2678,7 +2678,7 @@
       <title>Across Languages and Genres: Creating a Universal Annotation Scheme for Textual Relations</title>
       <author><first>Ekaterina</first><last>Lapshinova-Koltunski</last></author>
       <author><first>Anna</first><last>Nedoluzhko</last></author>
-      <author><first>Kerstin Anna</first><last>Kunz</last></author>
+      <author id="kerstin-kunz"><first>Kerstin Anna</first><last>Kunz</last></author>
       <pages>168–177</pages>
       <url hash="f1912690">W15-1620</url>
       <doi>10.3115/v1/W15-1620</doi>
@@ -2686,7 +2686,7 @@
     </paper>
     <paper id="21">
       <title>Annotating the Implicit Content of Sluices</title>
-      <author><first>Pranav</first><last>Anand</last></author>
+      <author id="pranav-anand"><first>Pranav</first><last>Anand</last></author>
       <author><first>Jim</first><last>McCloskey</last></author>
       <pages>178–187</pages>
       <url hash="de5d90ea">W15-1621</url>
@@ -2696,8 +2696,8 @@
     <paper id="22">
       <title>Annotating Causal Language Using Corpus Lexicography of Constructions</title>
       <author><first>Jesse</first><last>Dunietz</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
       <pages>188–196</pages>
       <url hash="e5dd1ab5">W15-1622</url>
       <doi>10.3115/v1/W15-1622</doi>
@@ -2708,7 +2708,7 @@
     <meta>
       <booktitle>Proceedings of the third International Workshop on Natural Language Processing for Social Media</booktitle>
       <url hash="1f3f2f48">W15-17</url>
-      <editor><first>Shou-de</first><last>Lin</last></editor>
+      <editor id="shou-de-lin"><first>Shou-de</first><last>Lin</last></editor>
       <editor><first>Lun-Wei</first><last>Ku</last></editor>
       <editor><first>Cheng-Te</first><last>Li</last></editor>
       <editor><first>Erik</first><last>Cambria</last></editor>
@@ -2778,7 +2778,7 @@
     <meta>
       <booktitle>Proceedings of the 20th Nordic Conference of Computational Linguistics (<fixed-case>NODALIDA</fixed-case> 2015)</booktitle>
       <url hash="ce185298">W15-18</url>
-      <editor><first>Beáta</first><last>Megyesi</last></editor>
+      <editor id="beata-megyesi"><first>Beáta</first><last>Megyesi</last></editor>
       <publisher>Linköping University Electronic Press, Sweden</publisher>
       <address>Vilnius, Lithuania</address>
       <month>May</month>
@@ -2831,21 +2831,21 @@
     </paper>
     <paper id="6">
       <title>Supersense tagging for <fixed-case>D</fixed-case>anish</title>
-      <author><first>Héctor</first><last>Martínez Alonso</last></author>
-      <author><first>Anders</first><last>Johannsen</last></author>
+      <author id="hector-martinez-alonso"><first>Héctor</first><last>Martínez Alonso</last></author>
+      <author id="anders-johannsen"><first>Anders</first><last>Johannsen</last></author>
       <author><first>Sussi</first><last>Olsen</last></author>
       <author><first>Sanni</first><last>Nimb</last></author>
       <author><first>Nicolai</first><last>Hartvig Sørensen</last></author>
       <author><first>Anna</first><last>Braasch</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
-      <author><first>Bolette</first><last>Sandford Pedersen</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
+      <author id="bolette-sandford-pedersen"><first>Bolette</first><last>Sandford Pedersen</last></author>
       <pages>21–29</pages>
       <url hash="040321a6">W15-1806</url>
       <bibkey>martinez-alonso-etal-2015-supersense</bibkey>
     </paper>
     <paper id="7">
       <title><fixed-case>CG</fixed-case>-3 — Beyond Classical Constraint Grammar</title>
-      <author><first>Eckhard</first><last>Bick</last></author>
+      <author id="eckhard-bick"><first>Eckhard</first><last>Bick</last></author>
       <author><first>Tino</first><last>Didriksen</last></author>
       <pages>31–39</pages>
       <url hash="7f8bb85f">W15-1807</url>
@@ -2853,7 +2853,7 @@
     </paper>
     <paper id="8">
       <title>Automatic Lemmatisation of <fixed-case>L</fixed-case>ithuanian <fixed-case>MWE</fixed-case>s</title>
-      <author><first>Loïc</first><last>Boizou</last></author>
+      <author id="loic-boizou"><first>Loïc</first><last>Boizou</last></author>
       <author><first>Jolanta</first><last>Kovalevskaitė</last></author>
       <author><first>Erika</first><last>Rimkutė</last></author>
       <pages>41–49</pages>
@@ -2871,7 +2871,7 @@
     </paper>
     <paper id="10">
       <title>Resolving Spatial References using Crowdsourced Geographical Data</title>
-      <author><first>Jana</first><last>Götze</last></author>
+      <author id="jana-gotze"><first>Jana</first><last>Götze</last></author>
       <author><first>Johan</first><last>Boye</last></author>
       <pages>61–68</pages>
       <url hash="6d6913f5">W15-1810</url>
@@ -2887,14 +2887,14 @@
     </paper>
     <paper id="12">
       <title>Talebob - an Interactive Speech Trainer for <fixed-case>D</fixed-case>anish</title>
-      <author><first>Peter Juel</first><last>Henrichsen</last></author>
+      <author id="peter-juel-henrichsen"><first>Peter Juel</first><last>Henrichsen</last></author>
       <pages>79–86</pages>
       <url hash="36970430">W15-1812</url>
       <bibkey>henrichsen-2015-talebob</bibkey>
     </paper>
     <paper id="13">
       <title>The Effect of Author Set Size in Authorship Attribution for <fixed-case>L</fixed-case>ithuanian</title>
-      <author><first>Jurgita</first><last>Kapočiūtė-Dzikienė</last></author>
+      <author id="jurgita-kapociute-dzikiene"><first>Jurgita</first><last>Kapočiūtė-Dzikienė</last></author>
       <author><first>Ligita</first><last>Šarkutė</last></author>
       <author><first>Andrius</first><last>Utka</last></author>
       <pages>87–96</pages>
@@ -2904,8 +2904,8 @@
     <paper id="14">
       <title>Looking hard: Eye tracking for detecting grammaticality of automatically compressed sentences</title>
       <author><first>Sigrid</first><last>Klerke</last></author>
-      <author><first>Héctor</first><last>Martínez Alonso</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="hector-martinez-alonso"><first>Héctor</first><last>Martínez Alonso</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>97–105</pages>
       <url hash="1dc08e08">W15-1814</url>
       <bibkey>klerke-etal-2015-looking</bibkey>
@@ -2926,7 +2926,7 @@
       <title>Improving cross-domain dependency parsing with dependency-derived clusters</title>
       <author><first>Jostein</first><last>Lien</last></author>
       <author><first>Erik</first><last>Velldal</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <pages>117–126</pages>
       <url hash="46b905a2">W15-1816</url>
       <bibkey>lien-etal-2015-improving</bibkey>
@@ -2950,7 +2950,7 @@
     <paper id="19">
       <title>Topic Models: Accounting Component Structure of Bigrams</title>
       <author><first>Michael</first><last>Nokel</last></author>
-      <author><first>Natalia</first><last>Loukachevitch</last></author>
+      <author id="natalia-loukachevitch"><first>Natalia</first><last>Loukachevitch</last></author>
       <pages>145–152</pages>
       <url hash="44bd9824">W15-1819</url>
       <bibkey>nokel-loukachevitch-2015-topic</bibkey>
@@ -2977,7 +2977,7 @@
     <paper id="22">
       <title>Automatic word stress annotation of <fixed-case>R</fixed-case>ussian unrestricted text</title>
       <author><first>Robert</first><last>Reynolds</last></author>
-      <author><first>Francis</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last></author>
       <pages>173–180</pages>
       <url hash="d8836817">W15-1822</url>
       <bibkey>reynolds-tyers-2015-automatic</bibkey>
@@ -2988,7 +2988,7 @@
       <author><first>Yuki</first><last>Asano</last></author>
       <author><first>Christian</first><last>Rohrdantz</last></author>
       <author><first>Felix</first><last>Hamborg</last></author>
-      <author><first>Daniel</first><last>Keim</last></author>
+      <author id="daniel-keim"><first>Daniel</first><last>Keim</last></author>
       <author><first>Bettina</first><last>Braun</last></author>
       <author><first>Miriam</first><last>Butt</last></author>
       <pages>181–189</pages>
@@ -2997,7 +2997,7 @@
     </paper>
     <paper id="24">
       <title>Improving the Cross-Lingual Projection of Syntactic Dependencies</title>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>191–199</pages>
       <url hash="d6ec19e5">W15-1824</url>
       <bibkey>tiedemann-2015-improving</bibkey>
@@ -3005,7 +3005,7 @@
     <paper id="25">
       <title>Assessing the Performance of Automatic Speech Recognition Systems When Used by Native and Non-Native Speakers of Three Major Languages in Dictation Workflows</title>
       <author><first>Julián</first><last>Zapata</last></author>
-      <author><first>Andreas</first><last>Søeborg Kirkedal</last></author>
+      <author id="andreas-soeborg-kirkedal"><first>Andreas</first><last>Søeborg Kirkedal</last></author>
       <pages>201–210</pages>
       <url hash="7c16429e">W15-1825</url>
       <bibkey>zapata-soeborg-kirkedal-2015-assessing</bibkey>
@@ -3023,7 +3023,7 @@
     <paper id="27">
       <title>Automatic conversion of colloquial Finnishto standard <fixed-case>F</fixed-case>innish</title>
       <author><first>Inari</first><last>Listenmaa</last></author>
-      <author><first>Francis M.</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis M.</first><last>Tyers</last></author>
       <pages>219–223</pages>
       <url hash="e27f0556">W15-1827</url>
       <bibkey>listenmaa-tyers-2015-automatic</bibkey>
@@ -3041,7 +3041,7 @@
     <paper id="29">
       <title>Sentiment analysis on conversational texts</title>
       <author><first>Birgitta</first><last>Ojamaa</last></author>
-      <author><first>Päivi Kristiina</first><last>Jokinen</last></author>
+      <author id="kristiina-jokinen"><first>Päivi Kristiina</first><last>Jokinen</last></author>
       <author><first>Kadri</first><last>Muischenk</last></author>
       <pages>233–237</pages>
       <url hash="d28c3b31">W15-1829</url>
@@ -3056,10 +3056,10 @@
     </paper>
     <paper id="31">
       <title>Active learning for sense annotation</title>
-      <author><first>Héctor</first><last>Martínez Alonso</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
-      <author><first>Anders</first><last>Johannsen</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="hector-martinez-alonso"><first>Héctor</first><last>Martínez Alonso</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
+      <author id="anders-johannsen"><first>Anders</first><last>Johannsen</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>245–249</pages>
       <url hash="00e38526">W15-1831</url>
       <bibkey>martinez-alonso-etal-2015-active</bibkey>
@@ -3074,12 +3074,12 @@
     </paper>
     <paper id="33">
       <title>A multivariate model for classifying texts’ readability</title>
-      <author><first>Katarina</first><last>Heimann Mühlenbock</last></author>
-      <author><first>Sofie</first><last>Johansson Kokkinakis</last></author>
+      <author id="katarina-heimann-muhlenbock"><first>Katarina</first><last>Heimann Mühlenbock</last></author>
+      <author id="sofie-johansson-kokkinakis"><first>Sofie</first><last>Johansson Kokkinakis</last></author>
       <author><first>Caroline</first><last>Liberg</last></author>
       <author><first>Åsa</first><last>af Geijerstam</last></author>
       <author><first>Jenny</first><last>Wiksten Folkeryd</last></author>
-      <author><first>Arne</first><last>Jönsson</last></author>
+      <author id="arne-jonsson"><first>Arne</first><last>Jönsson</last></author>
       <author><first>Erik</first><last>Kanebrant</last></author>
       <author><first>Johan</first><last>Falkenjack</last></author>
       <pages>257–261</pages>
@@ -3097,7 +3097,7 @@
     </paper>
     <paper id="35">
       <title>Using Positional Suffix Trees to Perform Agile Tree Kernel Calculation</title>
-      <author><first>Gustavo</first><last>Henrique Paetzold</last></author>
+      <author id="gustavo-paetzold"><first>Gustavo</first><last>Henrique Paetzold</last></author>
       <pages>269–273</pages>
       <url hash="8bd5d435">W15-1835</url>
       <bibkey>henrique-paetzold-2015-using</bibkey>
@@ -3121,7 +3121,7 @@
       <title>Analysing Inconsistencies and Errors in <fixed-case>P</fixed-case>o<fixed-case>S</fixed-case> Tagging in two <fixed-case>I</fixed-case>celandic Gold Standards</title>
       <author><first>Steinþór</first><last>Steingrímsson</last></author>
       <author><first>Sigrún</first><last>Helgadóttir</last></author>
-      <author><first>Eiríkur</first><last>Rögnvaldsson</last></author>
+      <author id="eirikur-rognvaldsson"><first>Eiríkur</first><last>Rögnvaldsson</last></author>
       <pages>287–291</pages>
       <url hash="3074f22f">W15-1838</url>
       <bibkey>steingrimsson-etal-2015-analysing</bibkey>
@@ -3138,7 +3138,7 @@
     </paper>
     <paper id="40">
       <title>The Corpus of <fixed-case>A</fixed-case>merican <fixed-case>N</fixed-case>orwegian Speech (<fixed-case>CANS</fixed-case>)</title>
-      <author><first>Janne Bondi</first><last>Johannessen</last></author>
+      <author id="janne-bondi-johannessen"><first>Janne Bondi</first><last>Johannessen</last></author>
       <pages>297–300</pages>
       <url hash="c668e1b9">W15-1840</url>
       <bibkey>johannessen-2015-corpus</bibkey>
@@ -3153,8 +3153,8 @@
     <paper id="42">
       <title>Extracting Semantic Frames using hfst-pmatch</title>
       <author><first>Sam</first><last>Hardwick</last></author>
-      <author><first>Miikka</first><last>Silfverberg</last></author>
-      <author><first>Krister</first><last>Lindén</last></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last></author>
+      <author id="krister-linden"><first>Krister</first><last>Lindén</last></author>
       <pages>305–308</pages>
       <url hash="73a058ae">W15-1842</url>
       <bibkey>hardwick-etal-2015-extracting</bibkey>
@@ -3168,7 +3168,7 @@
     </paper>
     <paper id="44">
       <title><fixed-case>O</fixed-case>morfi — Free and open source morphological lexical database for <fixed-case>F</fixed-case>innish</title>
-      <author><first>Tommi A</first><last>Pirinen</last></author>
+      <author id="tommi-a-pirinen"><first>Tommi A</first><last>Pirinen</last></author>
       <pages>313–315</pages>
       <url hash="399f19d9">W15-1844</url>
       <bibkey>pirinen-2015-omorfi</bibkey>
@@ -3176,7 +3176,7 @@
     <paper id="45">
       <title>A Tool for Automatic Simplification of <fixed-case>S</fixed-case>wedish Texts</title>
       <author><first>Evelina</first><last>Rennes</last></author>
-      <author><first>Arne</first><last>Jönsson</last></author>
+      <author id="arne-jonsson"><first>Arne</first><last>Jönsson</last></author>
       <pages>317–320</pages>
       <url hash="f7796009">W15-1845</url>
       <bibkey>rennes-jonsson-2015-tool</bibkey>
@@ -3209,7 +3209,7 @@
     </paper>
     <paper id="2">
       <title>Taking the <fixed-case>D</fixed-case>anish Speech Trainer from <fixed-case>CALL</fixed-case> to <fixed-case>ICALL</fixed-case></title>
-      <author><first>Peter Juel</first><last>Henrichsen</last></author>
+      <author id="peter-juel-henrichsen"><first>Peter Juel</first><last>Henrichsen</last></author>
       <pages>11–20</pages>
       <url hash="6bb95475">W15-1902</url>
       <bibkey>henrichsen-2015-taking</bibkey>
@@ -3233,7 +3233,7 @@
     </paper>
     <paper id="5">
       <title>Short Answer Grading: When Sorting Helps and When it Doesn’t</title>
-      <author><first>Ulrike</first><last>Pado</last></author>
+      <author id="ulrike-pado"><first>Ulrike</first><last>Pado</last></author>
       <author><first>Cornelia</first><last>Kiefer</last></author>
       <pages>42–50</pages>
       <url hash="cc235942">W15-1905</url>
@@ -3254,7 +3254,7 @@
     <meta>
       <booktitle>Proceedings of the workshop on Semantic resources and semantic annotation for Natural Language Processing and the Digital Humanities at <fixed-case>NODALIDA</fixed-case> 2015</booktitle>
       <url hash="9d202af9">W15-20</url>
-      <editor><first>Bolette Sandford</first><last>Pedersen</last></editor>
+      <editor id="bolette-sandford-pedersen"><first>Bolette Sandford</first><last>Pedersen</last></editor>
       <editor><first>Sussi</first><last>Olsen</last></editor>
       <editor><first>Lars</first><last>Borin</last></editor>
       <publisher>Northern European Association for Language Technology</publisher>
@@ -3279,7 +3279,7 @@
     </paper>
     <paper id="2">
       <title>Polysemy, underspecification, and aspects – Questions of lumping or splitting in the construction of <fixed-case>S</fixed-case>wedish <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et</title>
-      <author><first>Karin</first><last>Friberg Heppin</last></author>
+      <author id="karin-friberg-heppin"><first>Karin</first><last>Friberg Heppin</last></author>
       <author><first>Dana</first><last>Dannélls</last></author>
       <pages>12–20</pages>
       <url hash="e8118832">W15-2002</url>
@@ -3287,7 +3287,7 @@
     </paper>
     <paper id="3">
       <title>Determining the most frequent senses using <fixed-case>R</fixed-case>ussian linguistic ontology <fixed-case>R</fixed-case>u<fixed-case>T</fixed-case>hes</title>
-      <author><first>Natalia</first><last>Loukachevitch</last></author>
+      <author id="natalia-loukachevitch"><first>Natalia</first><last>Loukachevitch</last></author>
       <author><first>Ilia</first><last>Chetviorkin</last></author>
       <pages>21–27</pages>
       <url hash="fc57f2d5">W15-2003</url>
@@ -3305,8 +3305,8 @@
       <title>Coarse-grained sense annotation of <fixed-case>D</fixed-case>anish across textual domains</title>
       <author><first>Sussi</first><last>Olsen</last></author>
       <author><first>Bolette S.</first><last>Pedersen</last></author>
-      <author><first>Héctor</first><last>Martínez Alonso</last></author>
-      <author><first>Anders</first><last>Johannsen</last></author>
+      <author id="hector-martinez-alonso"><first>Héctor</first><last>Martínez Alonso</last></author>
+      <author id="anders-johannsen"><first>Anders</first><last>Johannsen</last></author>
       <pages>36–43</pages>
       <url hash="fbdda484">W15-2005</url>
       <bibkey>olsen-etal-2015-coarse</bibkey>
@@ -3317,7 +3317,7 @@
       <booktitle>Proceedings of the Third International Conference on Dependency Linguistics (Depling 2015)</booktitle>
       <url hash="d2ea6c24">W15-21</url>
       <editor><first>Joakim</first><last>Nivre</last></editor>
-      <editor><first>Eva</first><last>Hajičová</last></editor>
+      <editor id="eva-hajicova"><first>Eva</first><last>Hajičová</last></editor>
       <publisher>Uppsala University, Uppsala, Sweden</publisher>
       <address>Uppsala, Sweden</address>
       <month>August</month>
@@ -3330,14 +3330,14 @@
     </frontmatter>
     <paper id="1">
       <title>Invited Talk: The Case for <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies</title>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <pages>1</pages>
       <url hash="219308dc">W15-2101</url>
       <bibkey>manning-2015-invited</bibkey>
     </paper>
     <paper id="2">
       <title>Invited Talk: Lexicon Embedded Syntax</title>
-      <author><first>Alain</first><last>Polguère</last></author>
+      <author id="alain-polguere"><first>Alain</first><last>Polguère</last></author>
       <pages>2–9</pages>
       <url hash="ea6eabbb">W15-2102</url>
       <bibkey>polguere-2015-invited</bibkey>
@@ -3351,7 +3351,7 @@
     </paper>
     <paper id="4">
       <title>Targeted Paraphrasing on Deep Syntactic Layer for <fixed-case>MT</fixed-case> Evaluation</title>
-      <author><first>Petra</first><last>Barančíková</last></author>
+      <author id="petra-barancikova"><first>Petra</first><last>Barančíková</last></author>
       <author><first>Rudolf</first><last>Rosa</last></author>
       <pages>20–27</pages>
       <url hash="ca853e42">W15-2104</url>
@@ -3359,7 +3359,7 @@
     </paper>
     <paper id="5">
       <title>Universal and Language-specific Dependency Relations for Analysing <fixed-case>R</fixed-case>omanian</title>
-      <author><first>Verginica</first><last>Barbu Mititelu</last></author>
+      <author id="verginica-barbu-mititelu"><first>Verginica</first><last>Barbu Mititelu</last></author>
       <author><first>Cătălina</first><last>Mărănduc</last></author>
       <author><first>Elena</first><last>Irimia</last></author>
       <pages>28–37</pages>
@@ -3369,7 +3369,7 @@
     <paper id="6">
       <title>Emotion and Inner State Adverbials in <fixed-case>R</fixed-case>ussian</title>
       <author><first>Olga</first><last>Boguslavskaya</last></author>
-      <author><first>Igor</first><last>Boguslavsky</last></author>
+      <author id="igor-boguslavsky"><first>Igor</first><last>Boguslavsky</last></author>
       <pages>38–47</pages>
       <url hash="c6cdbccf">W15-2106</url>
       <bibkey>boguslavskaya-boguslavsky-2015-emotion</bibkey>
@@ -3387,7 +3387,7 @@
     <paper id="8">
       <title>A <fixed-case>B</fixed-case>ayesian Model for Generative Transition-based Dependency Parsing</title>
       <author><first>Jan</first><last>Buys</last></author>
-      <author><first>Phil</first><last>Blunsom</last></author>
+      <author id="phil-blunsom"><first>Phil</first><last>Blunsom</last></author>
       <pages>58–67</pages>
       <url hash="3f68e224">W15-2108</url>
       <bibkey>buys-blunsom-2015-bayesian</bibkey>
@@ -3413,11 +3413,11 @@
     <paper id="11">
       <title>Using Parallel Texts and Lexicons for Verbal Word Sense Disambiguation</title>
       <author><first>Ondřej</first><last>Dušek</last></author>
-      <author><first>Eva</first><last>Fučíková</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
+      <author id="eva-fucikova"><first>Eva</first><last>Fučíková</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
       <author><first>Martin</first><last>Popel</last></author>
-      <author><first>Jana</first><last>Šindlerová</last></author>
-      <author><first>Zdeňka</first><last>Urešová</last></author>
+      <author id="jana-sindlerova"><first>Jana</first><last>Šindlerová</last></author>
+      <author id="zdenka-uresova"><first>Zdeňka</first><last>Urešová</last></author>
       <pages>82–90</pages>
       <url hash="97bdedb4">W15-2111</url>
       <bibkey>dusek-etal-2015-using</bibkey>
@@ -3459,7 +3459,7 @@
       <title>Reconstructions of Deletions in a Dependency-based Description of <fixed-case>C</fixed-case>zech: Selected Issues</title>
       <author><first>Eva</first><last>Hajičová</last></author>
       <author><first>Marie</first><last>Mikulová</last></author>
-      <author><first>Jarmila</first><last>Panevová</last></author>
+      <author id="jarmila-panevova"><first>Jarmila</first><last>Panevová</last></author>
       <pages>131–140</pages>
       <url hash="20057822">W15-2116</url>
       <bibkey>hajicova-etal-2015-reconstructions</bibkey>
@@ -3489,9 +3489,9 @@
     </paper>
     <paper id="20">
       <title>Towards Cross-language Application of Dependency Grammar</title>
-      <author><first>Timo</first><last>Järvinen</last></author>
+      <author id="timo-jarvinen"><first>Timo</first><last>Järvinen</last></author>
       <author><first>Elisabeth</first><last>Bertol</last></author>
-      <author><first>Septina</first><last>Larasati</last></author>
+      <author id="septina-dian-larasati"><first>Septina</first><last>Larasati</last></author>
       <author><first>Monica-Mihaela</first><last>Rizea</last></author>
       <author><first>Maria</first><last>Ruiz Santabalbina</last></author>
       <author><first>Milan</first><last>Souček</last></author>
@@ -3510,7 +3510,7 @@
     <paper id="22">
       <title>At the Lexicon-Grammar Interface: The Case of Complex Predicates in the Functional Generative Description</title>
       <author><first>Václava</first><last>Kettnerová</last></author>
-      <author><first>Markéta</first><last>Lopatková</last></author>
+      <author id="marketa-lopatkova"><first>Markéta</first><last>Lopatková</last></author>
       <pages>191–200</pages>
       <url hash="679240aa">W15-2122</url>
       <bibkey>kettnerova-lopatkova-2015-lexicon</bibkey>
@@ -3518,8 +3518,8 @@
     <paper id="23">
       <title>Enhancing <fixed-case>F</fixed-case>ree<fixed-case>L</fixed-case>ing Rule-Based Dependency Grammars with Subcategorization Frames</title>
       <author><first>Marina</first><last>Lloberes</last></author>
-      <author><first>Irene</first><last>Castellón</last></author>
-      <author><first>Lluís</first><last>Padró</last></author>
+      <author id="irene-castellon"><first>Irene</first><last>Castellón</last></author>
+      <author id="lluis-padro"><first>Lluís</first><last>Padró</last></author>
       <pages>201–210</pages>
       <url hash="774a7973">W15-2123</url>
       <bibkey>lloberes-etal-2015-enhancing</bibkey>
@@ -3552,7 +3552,7 @@
     <paper id="27">
       <title>A Historical Overview of the Status of Function Words in Dependency Grammar</title>
       <author><first>Timothy</first><last>Osborne</last></author>
-      <author><first>Daniel</first><last>Maxwell</last></author>
+      <author id="daniel-maxwell"><first>Daniel</first><last>Maxwell</last></author>
       <pages>241–250</pages>
       <url hash="6b58ece3">W15-2127</url>
       <bibkey>osborne-maxwell-2015-historical</bibkey>
@@ -3607,14 +3607,14 @@
     <paper id="34">
       <title>Does <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies need a parsing representation? An investigation of <fixed-case>E</fixed-case>nglish</title>
       <author><first>Natalia</first><last>Silveira</last></author>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <pages>310–319</pages>
       <url hash="be3fb2df">W15-2134</url>
       <bibkey>silveira-manning-2015-universal</bibkey>
     </paper>
     <paper id="35">
       <title>Catena Operations for Unified Dependency Analysis</title>
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <author><first>Petya</first><last>Osenova</last></author>
       <pages>320–329</pages>
       <url hash="f432cb16">W15-2135</url>
@@ -3624,16 +3624,16 @@
     </paper>
     <paper id="36">
       <title>Zero Alignment of Verb Arguments in a Parallel Treebank</title>
-      <author><first>Jana</first><last>Šindlerová</last></author>
-      <author><first>Eva</first><last>Fučíková</last></author>
-      <author><first>Zdeňka</first><last>Urešová</last></author>
+      <author id="jana-sindlerova"><first>Jana</first><last>Šindlerová</last></author>
+      <author id="eva-fucikova"><first>Eva</first><last>Fučíková</last></author>
+      <author id="zdenka-uresova"><first>Zdeňka</first><last>Urešová</last></author>
       <pages>330–339</pages>
       <url hash="cb9b181c">W15-2136</url>
       <bibkey>sindlerova-etal-2015-zero</bibkey>
     </paper>
     <paper id="37">
       <title>Cross-Lingual Dependency Parsing with <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies and Predicted <fixed-case>P</fixed-case>o<fixed-case>S</fixed-case> Labels</title>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>340–349</pages>
       <url hash="9ab1efb3">W15-2137</url>
       <bibkey>tiedemann-2015-cross</bibkey>
@@ -3665,7 +3665,7 @@
     <paper id="1">
       <title>Domain Adaptation for Dependency Parsing via Self-Training</title>
       <author><first>Juntao</first><last>Yu</last></author>
-      <author><first>Mohab</first><last>Elkaref</last></author>
+      <author id="mohab-el-karef"><first>Mohab</first><last>Elkaref</last></author>
       <author><first>Bernd</first><last>Bohnet</last></author>
       <pages>1–10</pages>
       <url hash="7aba8d6f">W15-2201</url>
@@ -3721,8 +3721,8 @@
     <paper id="7">
       <title>Suitability of <fixed-case>P</fixed-case>ar<fixed-case>T</fixed-case>es Test Suite for Parsing Evaluation</title>
       <author><first>Marina</first><last>Lloberes</last></author>
-      <author><first>Irene</first><last>Castellón</last></author>
-      <author><first>Lluís</first><last>Padró</last></author>
+      <author id="irene-castellon"><first>Irene</first><last>Castellón</last></author>
+      <author id="lluis-padro"><first>Lluís</first><last>Padró</last></author>
       <pages>61–65</pages>
       <url hash="5fc741de">W15-2207</url>
       <doi>10.18653/v1/W15-2207</doi>
@@ -3733,7 +3733,7 @@
       <author><first>Akifumi</first><last>Yoshimoto</last></author>
       <author><first>Kazuo</first><last>Hara</last></author>
       <author><first>Masashi</first><last>Shimbo</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>66–70</pages>
       <url hash="3327f405">W15-2208</url>
       <doi>10.18653/v1/W15-2208</doi>
@@ -3742,7 +3742,7 @@
     <paper id="9">
       <title><fixed-case>MSTP</fixed-case>arser Model Interpolation for Multi-Source Delexicalized Transfer</title>
       <author><first>Rudolf</first><last>Rosa</last></author>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
       <pages>71–75</pages>
       <url hash="fbf869f7">W15-2209</url>
       <doi>10.18653/v1/W15-2209</doi>
@@ -3776,7 +3776,7 @@
     <paper id="13">
       <title><fixed-case>CKY</fixed-case> Parsing with Independence Constraints</title>
       <author><first>Joseph</first><last>Irwin</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>97–106</pages>
       <url hash="0723332a">W15-2213</url>
       <doi>10.18653/v1/W15-2213</doi>
@@ -3792,9 +3792,9 @@
     </paper>
     <paper id="15">
       <title>Stacking or Supertagging for Dependency Parsing – What’s the Difference?</title>
-      <author><first>Agnieszka</first><last>Faleńska</last></author>
+      <author id="agnieszka-falenska"><first>Agnieszka</first><last>Faleńska</last></author>
       <author><first>Anders</first><last>Björkelund</last></author>
-      <author><first>Özlem</first><last>Çetinoğlu</last></author>
+      <author id="ozlem-cetinoglu"><first>Özlem</first><last>Çetinoğlu</last></author>
       <author><first>Wolfgang</first><last>Seeker</last></author>
       <pages>118–129</pages>
       <url hash="92ed81ba">W15-2215</url>
@@ -3827,7 +3827,7 @@
       <author><first>James</first><last>Monette</last></author>
       <author><first>Gianpaul</first><last>Rachiele</last></author>
       <author><first>Aunika</first><last>Warren</last></author>
-      <author id="chong-zhang"><first>Chong</first><last>Zhang</last></author>
+      <author><first>Chong</first><last>Zhang</last></author>
       <pages>1–14</pages>
       <url hash="ce8f2057">W15-2301</url>
       <doi>10.3115/v1/W15-2301</doi>
@@ -3843,7 +3843,7 @@
     </paper>
     <paper id="3">
       <title>Topology of Language Classes</title>
-      <author><first>Sean A.</first><last>Fulop</last></author>
+      <author id="sean-a-fulop"><first>Sean A.</first><last>Fulop</last></author>
       <author><first>David</first><last>Kephart</last></author>
       <pages>26–38</pages>
       <url hash="a6363baa">W15-2303</url>
@@ -3871,7 +3871,7 @@
     <paper id="6">
       <title>A <fixed-case>F</fixed-case>robenius Model of Information Structure in Categorical Compositional Distributional Semantics</title>
       <author><first>Dimitri</first><last>Kartsaklis</last></author>
-      <author><first>Mehrnoosh</first><last>Sadrzadeh</last></author>
+      <author id="mehrnoosh-sadrzadeh"><first>Mehrnoosh</first><last>Sadrzadeh</last></author>
       <pages>62–74</pages>
       <url hash="e95a4526">W15-2306</url>
       <doi>10.3115/v1/W15-2306</doi>
@@ -3943,7 +3943,7 @@
     <meta>
       <booktitle>Proceedings of the Sixth Workshop on Cognitive Aspects of Computational Language Learning</booktitle>
       <url hash="50dd6a1d">W15-24</url>
-      <editor><first>Robert</first><last>Berwick</last></editor>
+      <editor id="robert-c-berwick"><first>Robert</first><last>Berwick</last></editor>
       <editor><first>Anna</first><last>Korhonen</last></editor>
       <editor><first>Alessandro</first><last>Lenci</last></editor>
       <editor><first>Thierry</first><last>Poibeau</last></editor>
@@ -3962,7 +3962,7 @@
     <paper id="1">
       <title>Using reading behavior to predict grammatical functions</title>
       <author><first>Maria</first><last>Barrett</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>1–5</pages>
       <url hash="2948f5a6">W15-2401</url>
       <doi>10.18653/v1/W15-2401</doi>
@@ -3973,7 +3973,7 @@
       <author><first>Sigrid</first><last>Klerke</last></author>
       <author><first>Sheila</first><last>Castilho</last></author>
       <author><first>Maria</first><last>Barrett</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>6–13</pages>
       <url hash="1d3c107c">W15-2402</url>
       <doi>10.18653/v1/W15-2402</doi>
@@ -3981,9 +3981,9 @@
     </paper>
     <paper id="3">
       <title>Evaluating Models of Computation and Storage in Human Sentence Processing</title>
-      <author><first>Thang</first><last>Luong</last></author>
-      <author><first>Timothy</first><last>O’Donnell</last></author>
-      <author><first>Noah</first><last>Goodman</last></author>
+      <author id="minh-thang-luong"><first>Thang</first><last>Luong</last></author>
+      <author id="timothy-odonnell"><first>Timothy</first><last>O’Donnell</last></author>
+      <author id="noah-goodman"><first>Noah</first><last>Goodman</last></author>
       <pages>14–21</pages>
       <url hash="6d4b924c">W15-2403</url>
       <doi>10.18653/v1/W15-2403</doi>
@@ -4003,7 +4003,7 @@
       <title>Towards a Model of Prediction-based Syntactic Category Acquisition: First Steps with Word Embeddings</title>
       <author><first>Robert</first><last>Grimm</last></author>
       <author><first>Giovanni</first><last>Cassani</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <author><first>Steven</first><last>Gillis</last></author>
       <pages>28–32</pages>
       <url hash="14f0988d">W15-2405</url>
@@ -4014,7 +4014,7 @@
       <title>Which distributional cues help the most? Unsupervised contexts selection for lexical category acquisition</title>
       <author><first>Giovanni</first><last>Cassani</last></author>
       <author><first>Robert</first><last>Grimm</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <author><first>Steven</first><last>Gillis</last></author>
       <pages>33–39</pages>
       <url hash="3111cff7">W15-2406</url>
@@ -4084,7 +4084,7 @@
     </paper>
     <paper id="14">
       <title>Modeling dative alternations of individual children</title>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <author><first>Joan</first><last>Bresnan</last></author>
       <pages>103–112</pages>
       <url hash="1888088c">W15-2414</url>
@@ -4096,9 +4096,9 @@
     <meta>
       <booktitle>Proceedings of the Second Workshop on Discourse in Machine Translation</booktitle>
       <url hash="b5f86fca">W15-25</url>
-      <editor><first>Bonnie</first><last>Webber</last></editor>
+      <editor id="bonnie-webber"><first>Bonnie</first><last>Webber</last></editor>
       <editor><first>Marine</first><last>Carpuat</last></editor>
-      <editor><first>Andrei</first><last>Popescu-Belis</last></editor>
+      <editor id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></editor>
       <editor><first>Christian</first><last>Hardmeier</last></editor>
       <doi>10.18653/v1/W15-25</doi>
       <publisher>Association for Computational Linguistics</publisher>
@@ -4114,9 +4114,9 @@
     <paper id="1">
       <title>Pronoun-Focused <fixed-case>MT</fixed-case> and Cross-Lingual Pronoun Prediction: Findings of the 2015 <fixed-case>D</fixed-case>isco<fixed-case>MT</fixed-case> Shared Task on Pronoun Translation</title>
       <author><first>Christian</first><last>Hardmeier</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Sara</first><last>Stymne</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <author><first>Yannick</first><last>Versley</last></author>
       <author><first>Mauro</first><last>Cettolo</last></author>
       <pages>1–16</pages>
@@ -4128,7 +4128,7 @@
       <title>Comparison of Coreference Resolvers for Deep Syntax Translation</title>
       <author><first>Michal</first><last>Novák</last></author>
       <author><first>Dieke</first><last>Oele</last></author>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <pages>17–23</pages>
       <url hash="2e52889c">W15-2502</url>
       <doi>10.18653/v1/W15-2502</doi>
@@ -4145,9 +4145,9 @@
     </paper>
     <paper id="4">
       <title>Document-Level Machine Translation Evaluation with Gist Consistency and Text Cohesion</title>
-      <author><first>Zhengxian</first><last>Gong</last></author>
+      <author id="zhengxian-gong"><first>Zhengxian</first><last>Gong</last></author>
       <author><first>Min</first><last>Zhang</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>33–40</pages>
       <url hash="2ada2e5d">W15-2504</url>
       <doi>10.18653/v1/W15-2504</doi>
@@ -4175,7 +4175,7 @@
     </paper>
     <paper id="7">
       <title>A Proposal for a Coherence Corpus in Machine Translation</title>
-      <author><first>Karin</first><last>Sim Smith</last></author>
+      <author id="karin-sim-smith"><first>Karin</first><last>Sim Smith</last></author>
       <author><first>Wilker</first><last>Aziz</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>52–58</pages>
@@ -4187,7 +4187,7 @@
       <title>Part-of-Speech Driven Cross-Lingual Pronoun Prediction with Feed-Forward Neural Networks</title>
       <author><first>Jimmy</first><last>Callin</last></author>
       <author><first>Christian</first><last>Hardmeier</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>59–64</pages>
       <url hash="3df9b63d">W15-2508</url>
       <doi>10.18653/v1/W15-2508</doi>
@@ -4220,7 +4220,7 @@
     <paper id="12">
       <title>Rule-Based Pronominal Anaphora Treatment for Machine Translation</title>
       <author><first>Sharid</first><last>Loáiciga</last></author>
-      <author><first>Éric</first><last>Wehrli</last></author>
+      <author id="eric-wehrli"><first>Éric</first><last>Wehrli</last></author>
       <pages>86–93</pages>
       <url hash="4fa4589b">W15-2512</url>
       <doi>10.18653/v1/W15-2512</doi>
@@ -4228,8 +4228,8 @@
     </paper>
     <paper id="13">
       <title>Pronoun Translation and Prediction with or without Coreference Links</title>
-      <author><first>Ngoc Quang</first><last>Luong</last></author>
-      <author><first>Lesly</first><last>Miculicich Werlen</last></author>
+      <author id="ngoc-quang-luong"><first>Ngoc Quang</first><last>Luong</last></author>
+      <author id="lesly-miculicich-werlen"><first>Lesly</first><last>Miculicich Werlen</last></author>
       <author><first>Andrei</first><last>Popescu-Belis</last></author>
       <pages>94–100</pages>
       <url hash="86570e65">W15-2513</url>
@@ -4238,8 +4238,8 @@
     </paper>
     <paper id="14">
       <title>Predicting Pronouns across Languages with Continuous Word Spaces</title>
-      <author><first>Ngoc-Quan</first><last>Pham</last></author>
-      <author><first>Lonneke</first><last>van der Plas</last></author>
+      <author id="ngoc-quan-pham"><first>Ngoc-Quan</first><last>Pham</last></author>
+      <author id="lonneke-van-der-plas"><first>Lonneke</first><last>van der Plas</last></author>
       <pages>101–107</pages>
       <url hash="0d35243a">W15-2514</url>
       <doi>10.18653/v1/W15-2514</doi>
@@ -4247,7 +4247,7 @@
     </paper>
     <paper id="15">
       <title>Baseline Models for Pronoun Prediction and Pronoun-Aware Translation</title>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>108–114</pages>
       <url hash="0f52164e">W15-2515</url>
       <doi>10.18653/v1/W15-2515</doi>
@@ -4286,7 +4286,7 @@
       <title>Crosslingual Annotation and Analysis of Implicit Discourse Connectives for Machine Translation</title>
       <author><first>Frances</first><last>Yung</last></author>
       <author><first>Kevin</first><last>Duh</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>142–152</pages>
       <url hash="829b9038">W15-2519</url>
       <doi>10.18653/v1/W15-2519</doi>
@@ -4323,9 +4323,9 @@
       <booktitle>Proceedings of the Sixth International Workshop on Health Text Mining and Information Analysis</booktitle>
       <url hash="f3090bd7">W15-26</url>
       <editor><first>Cyril</first><last>Grouin</last></editor>
-      <editor><first>Thierry</first><last>Hamon</last></editor>
-      <editor><first>Aurélie</first><last>Névéol</last></editor>
-      <editor><first>Pierre</first><last>Zweigenbaum</last></editor>
+      <editor id="thierry-hamon"><first>Thierry</first><last>Hamon</last></editor>
+      <editor id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></editor>
+      <editor id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></editor>
       <doi>10.18653/v1/W15-26</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Lisbon, Portugal</address>
@@ -4341,7 +4341,7 @@
       <title>In-depth annotation for patient level liver cancer staging</title>
       <author><first>Wen-wai</first><last>Yim</last></author>
       <author><first>Sharon</first><last>Kwan</last></author>
-      <author><first>Meliha</first><last>Yetisgen</last></author>
+      <author id="meliha-yetisgen-yildiz"><first>Meliha</first><last>Yetisgen</last></author>
       <pages>1–11</pages>
       <url hash="a65a9171">W15-2601</url>
       <doi>10.18653/v1/W15-2601</doi>
@@ -4381,8 +4381,8 @@
     </paper>
     <paper id="5">
       <title>An Analysis of Biomedical Tokenization: Problems and Strategies</title>
-      <author><first>Noa P.</first><last>Cruz Díaz</last></author>
-      <author><first>Manuel</first><last>Maña López</last></author>
+      <author id="noa-p-cruz-diaz"><first>Noa P.</first><last>Cruz Díaz</last></author>
+      <author id="manuel-j-mana-lopez"><first>Manuel</first><last>Maña López</last></author>
       <pages>40–49</pages>
       <url hash="9148d507">W15-2605</url>
       <doi>10.18653/v1/W15-2605</doi>
@@ -4391,7 +4391,7 @@
     </paper>
     <paper id="6">
       <title>Annotation of Clinically Important Follow-up Recommendations in Radiology Reports</title>
-      <author><first>Meliha</first><last>Yetisgen</last></author>
+      <author id="meliha-yetisgen-yildiz"><first>Meliha</first><last>Yetisgen</last></author>
       <author><first>Prescott</first><last>Klassen</last></author>
       <author><first>Lucas</first><last>McCarthy</last></author>
       <author><first>Elena</first><last>Pellicer</last></author>
@@ -4416,7 +4416,7 @@
     </paper>
     <paper id="8">
       <title>Exploring Word Embedding for Drug Name Recognition</title>
-      <author><first>Isabel</first><last>Segura-Bedmar</last></author>
+      <author id="isabel-segura-bedmar"><first>Isabel</first><last>Segura-Bedmar</last></author>
       <author><first>Víctor</first><last>Suárez-Paniagua</last></author>
       <author><first>Paloma</first><last>Martínez</last></author>
       <pages>64–72</pages>
@@ -4436,7 +4436,7 @@
     <paper id="10">
       <title>Parser Adaptation to the Biomedical Domain without Re-Training</title>
       <author><first>Jeff</first><last>Mitchell</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>79–89</pages>
       <url hash="b13f83d9">W15-2610</url>
       <doi>10.18653/v1/W15-2610</doi>
@@ -4477,7 +4477,7 @@
     <paper id="14">
       <title>Effectively Crowdsourcing Radiology Report Annotations</title>
       <author><first>Anne</first><last>Cocos</last></author>
-      <author><first>Aaron</first><last>Masino</last></author>
+      <author id="aaron-j-masino"><first>Aaron</first><last>Masino</last></author>
       <author><first>Ting</first><last>Qian</last></author>
       <author><first>Ellie</first><last>Pavlick</last></author>
       <author><first>Chris</first><last>Callison-Burch</last></author>
@@ -4498,7 +4498,7 @@
     </paper>
     <paper id="16">
       <title>Information Extraction from Biomedical Texts: Learning Models with Limited Supervision</title>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>120</pages>
       <url hash="14d75fc8">W15-2616</url>
       <doi>10.18653/v1/W15-2616</doi>
@@ -4518,7 +4518,7 @@
       <author><first>Giulia</first><last>Venturi</last></author>
       <author><first>Tommaso</first><last>Bellandi</last></author>
       <author><first>Felice</first><last>Dell’Orletta</last></author>
-      <author><first>Simonetta</first><last>Montemagni</last></author>
+      <author id="simonetta-montemagni"><first>Simonetta</first><last>Montemagni</last></author>
       <pages>131–141</pages>
       <url hash="188b619f">W15-2618</url>
       <doi>10.18653/v1/W15-2618</doi>
@@ -4526,7 +4526,7 @@
     </paper>
     <paper id="19">
       <title>Mining and Ranking Biomedical Synonym Candidates from <fixed-case>W</fixed-case>ikipedia</title>
-      <author><first>Abhyuday</first><last>Jagannatha</last></author>
+      <author id="abhyuday-jagannatha"><first>Abhyuday</first><last>Jagannatha</last></author>
       <author><first>Jinying</first><last>Chen</last></author>
       <author><first>Hong</first><last>Yu</last></author>
       <pages>142–151</pages>
@@ -4549,8 +4549,8 @@
       <url hash="752dc72c">W15-27</url>
       <editor><first>Michael</first><last>Roth</last></editor>
       <editor><first>Annie</first><last>Louis</last></editor>
-      <editor><first>Bonnie</first><last>Webber</last></editor>
-      <editor><first>Tim</first><last>Baldwin</last></editor>
+      <editor id="bonnie-webber"><first>Bonnie</first><last>Webber</last></editor>
+      <editor id="timothy-baldwin"><first>Tim</first><last>Baldwin</last></editor>
       <doi>10.18653/v1/W15-27</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Lisbon, Portugal</address>
@@ -4588,7 +4588,7 @@
       <title>Recovering discourse relations: Varying influence of discourse adverbials</title>
       <author><first>Hannah</first><last>Rohde</last></author>
       <author><first>Anna</first><last>Dickinson</last></author>
-      <author><first>Chris</first><last>Clark</last></author>
+      <author id="chris-clark"><first>Chris</first><last>Clark</last></author>
       <author><first>Annie</first><last>Louis</last></author>
       <author><first>Bonnie</first><last>Webber</last></author>
       <pages>22–31</pages>
@@ -4598,7 +4598,7 @@
     </paper>
     <paper id="4">
       <title>Semantics and Discourse Processing for Expressive <fixed-case>TTS</fixed-case></title>
-      <author><first>Rodolfo</first><last>Delmonte</last></author>
+      <author id="rodolfo-delmonte"><first>Rodolfo</first><last>Delmonte</last></author>
       <author><first>Rocco</first><last>Tripodi</last></author>
       <pages>32–43</pages>
       <url hash="133824de">W15-2704</url>
@@ -4632,8 +4632,8 @@
       <author><first>Rashmi</first><last>Prasad</last></author>
       <author><first>Bonnie</first><last>Webber</last></author>
       <author><first>Alan</first><last>Lee</last></author>
-      <author><first>Sameer</first><last>Pradhan</last></author>
-      <author><first>Aravind</first><last>Joshi</last></author>
+      <author id="sameer-pradhan"><first>Sameer</first><last>Pradhan</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
       <pages>64–69</pages>
       <url hash="b3493b10">W15-2707</url>
       <doi>10.18653/v1/W15-2707</doi>
@@ -4643,7 +4643,7 @@
       <title>Lexical Level Distribution of Metadiscourse in Spoken Language</title>
       <author><first>Rui</first><last>Correia</last></author>
       <author><first>Maxine</first><last>Eskenazi</last></author>
-      <author><first>Nuno</first><last>Mamede</last></author>
+      <author id="nuno-mamede"><first>Nuno</first><last>Mamede</last></author>
       <pages>70–75</pages>
       <url hash="43f5fd84">W15-2708</url>
       <doi>10.18653/v1/W15-2708</doi>
@@ -4653,7 +4653,7 @@
       <title>Idiom Paraphrases: Seventh Heaven vs Cloud Nine</title>
       <author><first>Maria</first><last>Pershina</last></author>
       <author><first>Yifan</first><last>He</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <pages>76–82</pages>
       <url hash="f2e2c871">W15-2709</url>
       <doi>10.18653/v1/W15-2709</doi>
@@ -4672,10 +4672,10 @@
     </paper>
     <paper id="11">
       <title>Predicting word sense annotation agreement</title>
-      <author><first>Héctor</first><last>Martínez Alonso</last></author>
-      <author><first>Anders</first><last>Johannsen</last></author>
-      <author><first>Oier</first><last>Lopez de Lacalle</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="hector-martinez-alonso"><first>Héctor</first><last>Martínez Alonso</last></author>
+      <author id="anders-johannsen"><first>Anders</first><last>Johannsen</last></author>
+      <author id="oier-lopez-de-lacalle"><first>Oier</first><last>Lopez de Lacalle</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <pages>89–94</pages>
       <url hash="c86d5d02">W15-2711</url>
       <doi>10.18653/v1/W15-2711</doi>
@@ -4683,9 +4683,9 @@
     </paper>
     <paper id="12">
       <title>Distributional Semantics in Use</title>
-      <author><first>Raffaella</first><last>Bernardi</last></author>
-      <author><first>Gemma</first><last>Boleda</last></author>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last></author>
+      <author id="gemma-boleda"><first>Gemma</first><last>Boleda</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <author><first>Denis</first><last>Paperno</last></author>
       <pages>95–101</pages>
       <url hash="0de16e41">W15-2712</url>
@@ -4697,10 +4697,10 @@
     <meta>
       <booktitle>Proceedings of the Fourth Workshop on Vision and Language</booktitle>
       <url hash="019a0d96">W15-28</url>
-      <editor><first>Anja</first><last>Belz</last></editor>
-      <editor><first>Luisa</first><last>Coheur</last></editor>
+      <editor id="anja-belz"><first>Anja</first><last>Belz</last></editor>
+      <editor id="luisa-coheur"><first>Luisa</first><last>Coheur</last></editor>
       <editor><first>Vittorio</first><last>Ferrari</last></editor>
-      <editor><first>Marie-Francine</first><last>Moens</last></editor>
+      <editor id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></editor>
       <editor><first>Katerina</first><last>Pastra</last></editor>
       <editor><first>Ivan</first><last>Vulić</last></editor>
       <doi>10.18653/v1/W15-28</doi>
@@ -4725,10 +4725,10 @@
     <paper id="2">
       <title>Computational Integration of Human Vision and Natural Language through Bitext Alignment</title>
       <author><first>Preethi</first><last>Vaidyanathan</last></author>
-      <author><first>Emily</first><last>Prud’hommeaux</last></author>
-      <author><first>Cecilia</first><last>O. Alm</last></author>
+      <author id="emily-prudhommeaux"><first>Emily</first><last>Prud’hommeaux</last></author>
+      <author id="cecilia-ovesdotter-alm"><first>Cecilia</first><last>O. Alm</last></author>
       <author><first>Jeff B.</first><last>Pelz</last></author>
-      <author><first>Anne R.</first><last>Haake</last></author>
+      <author id="anne-haake"><first>Anne R.</first><last>Haake</last></author>
       <pages>4–5</pages>
       <url hash="2a83eb61">W15-2802</url>
       <doi>10.18653/v1/W15-2802</doi>
@@ -4749,7 +4749,7 @@
     <paper id="4">
       <title>Lingusitic Analysis of Multi-Modal Recurrent Neural Networks</title>
       <author><first>Ákos</first><last>Kádár</last></author>
-      <author><first>Grzegorz</first><last>Chrupała</last></author>
+      <author id="grzegorz-chrupala"><first>Grzegorz</first><last>Chrupała</last></author>
       <author><first>Afra</first><last>Alishahi</last></author>
       <pages>8–9</pages>
       <url hash="f3972a09">W15-2804</url>
@@ -4758,7 +4758,7 @@
     </paper>
     <paper id="5">
       <title>Defining Visually Descriptive Language</title>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <author><first>Josiah</first><last>Wang</last></author>
       <author><first>Arnau</first><last>Ramisa</last></author>
       <pages>10–17</pages>
@@ -4770,7 +4770,7 @@
       <title>Semantic Tuples for Evaluation of Image to Sentence Generation</title>
       <author><first>Lily D.</first><last>Ellebracht</last></author>
       <author><first>Arnau</first><last>Ramisa</last></author>
-      <author><first>Pranava Swaroop</first><last>Madhyastha</last></author>
+      <author id="pranava-swaroop-madhyastha"><first>Pranava Swaroop</first><last>Madhyastha</last></author>
       <author><first>Jose</first><last>Cordero-Rama</last></author>
       <author><first>Francesc</first><last>Moreno-Noguer</last></author>
       <author><first>Ariadna</first><last>Quattoni</last></author>
@@ -4793,7 +4793,7 @@
       <title>Image with a Message: Towards Detecting Non-Literal Image Usages by Visual Linking</title>
       <author><first>Lydia</first><last>Weiland</last></author>
       <author><first>Laura</first><last>Dietz</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <pages>40–47</pages>
       <url hash="1c7f354a">W15-2808</url>
       <doi>10.18653/v1/W15-2808</doi>
@@ -4832,9 +4832,9 @@
       <title>Generating Semantically Precise Scene Graphs from Textual Descriptions for Improved Image Retrieval</title>
       <author><first>Sebastian</first><last>Schuster</last></author>
       <author><first>Ranjay</first><last>Krishna</last></author>
-      <author><first>Angel</first><last>Chang</last></author>
+      <author id="angel-chang"><first>Angel</first><last>Chang</last></author>
       <author><first>Li</first><last>Fei-Fei</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>70–80</pages>
       <url hash="dd67be1a">W15-2812</url>
       <doi>10.18653/v1/W15-2812</doi>
@@ -4844,7 +4844,7 @@
       <title>Do Distributed Semantic Models Dream of Electric Sheep? Visualizing Word Representations through Image Synthesis</title>
       <author><first>Angeliki</first><last>Lazaridou</last></author>
       <author><first>Dat</first><last>Tien Nguyen</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <pages>81–86</pages>
       <url hash="fd4d49d3">W15-2813</url>
       <doi>10.18653/v1/W15-2813</doi>
@@ -4888,10 +4888,10 @@
     <meta>
       <booktitle>Proceedings of the 6th Workshop on Computational Approaches to Subjectivity, Sentiment and Social Media Analysis</booktitle>
       <url hash="111feec2">W15-29</url>
-      <editor><first>Alexandra</first><last>Balahur</last></editor>
-      <editor><first>Erik</first><last>van der Goot</last></editor>
+      <editor id="alexandra-balahur"><first>Alexandra</first><last>Balahur</last></editor>
+      <editor id="erik-van-der-goot"><first>Erik</first><last>van der Goot</last></editor>
       <editor><first>Piek</first><last>Vossen</last></editor>
-      <editor><first>Andres</first><last>Montoyo</last></editor>
+      <editor id="andres-montoyo"><first>Andres</first><last>Montoyo</last></editor>
       <doi>10.18653/v1/W15-29</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Lisboa, Portugal</address>
@@ -4914,7 +4914,7 @@
     <paper id="2">
       <title>Sentiment Analysis on Monolingual, Multilingual and Code-Switching <fixed-case>T</fixed-case>witter Corpora</title>
       <author><first>David</first><last>Vilares</last></author>
-      <author><first>Miguel A.</first><last>Alonso</last></author>
+      <author id="miguel-a-alonso"><first>Miguel A.</first><last>Alonso</last></author>
       <author><first>Carlos</first><last>Gómez-Rodríguez</last></author>
       <pages>2–8</pages>
       <url hash="4db4afea">W15-2902</url>
@@ -4934,7 +4934,7 @@
       <title>Enhanced <fixed-case>T</fixed-case>witter Sentiment Classification Using Contextual Information</title>
       <author><first>Soroush</first><last>Vosoughi</last></author>
       <author><first>Helen</first><last>Zhou</last></author>
-      <author><first>Deb</first><last>Roy</last></author>
+      <author id="deb-roy"><first>Deb</first><last>Roy</last></author>
       <pages>16–24</pages>
       <url hash="20918cb4">W15-2904</url>
       <doi>10.18653/v1/W15-2904</doi>
@@ -4944,8 +4944,8 @@
       <title>Your Sentiment Precedes You: Using an author’s historical tweets to predict sarcasm</title>
       <author><first>Anupam</first><last>Khattri</last></author>
       <author><first>Aditya</first><last>Joshi</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
-      <author><first>Mark</first><last>Carman</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="mark-carman"><first>Mark</first><last>Carman</last></author>
       <pages>25–30</pages>
       <url hash="c0c66cc8">W15-2905</url>
       <doi>10.18653/v1/W15-2905</doi>
@@ -4954,7 +4954,7 @@
     <paper id="6">
       <title>Optimising Agile Social Media Analysis</title>
       <author><first>Thomas</first><last>Kober</last></author>
-      <author><first>David</first><last>Weir</last></author>
+      <author id="david-weir"><first>David</first><last>Weir</last></author>
       <pages>31–40</pages>
       <url hash="ec67730a">W15-2906</url>
       <doi>10.18653/v1/W15-2906</doi>
@@ -5002,7 +5002,7 @@
     <paper id="11">
       <title>Analysing domain suitability of a sentiment lexicon by identifying distributionally bipolar words</title>
       <author><first>Lucie</first><last>Flekova</last></author>
-      <author><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
+      <author id="daniel-preotiuc-pietro"><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
       <author><first>Eugen</first><last>Ruppert</last></author>
       <pages>77–84</pages>
       <url hash="dc490dff">W15-2911</url>
@@ -5011,7 +5011,7 @@
     </paper>
     <paper id="12">
       <title><fixed-case>I</fixed-case>magisaurus: An Interactive Visualizer of Valence and Emotion in the <fixed-case>R</fixed-case>oget’s Thesaurus</title>
-      <author><first>Saif</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
       <pages>85–91</pages>
       <url hash="f9ecf232">W15-2912</url>
       <doi>10.18653/v1/W15-2912</doi>
@@ -5019,7 +5019,7 @@
     </paper>
     <paper id="13">
       <title>Personality Traits on <fixed-case>T</fixed-case>witter—or—<fixed-case>H</fixed-case>ow to Get 1,500 Personality Tests in a Week</title>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <author><first>Dirk</first><last>Hovy</last></author>
       <pages>92–98</pages>
       <url hash="b1353fb5">W15-2913</url>
@@ -5030,7 +5030,7 @@
       <title>Negation Scope Detection for <fixed-case>T</fixed-case>witter Sentiment Analysis</title>
       <author><first>Johan</first><last>Reitan</last></author>
       <author><first>Jørgen</first><last>Faret</last></author>
-      <author><first>Björn</first><last>Gambäck</last></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gambäck</last></author>
       <author><first>Lars</first><last>Bungum</last></author>
       <pages>99–108</pages>
       <url hash="4117b5d4">W15-2914</url>
@@ -5041,7 +5041,7 @@
       <title>A Linguistically Informed Convolutional Neural Network</title>
       <author><first>Sebastian</first><last>Ebert</last></author>
       <author><first>Ngoc Thang</first><last>Vu</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>109–114</pages>
       <url hash="a35728ba">W15-2915</url>
       <doi>10.18653/v1/W15-2915</doi>
@@ -5050,7 +5050,7 @@
     <paper id="16">
       <title>How much does word sense disambiguation help in sentiment analysis of micropost data?</title>
       <author><first>Chiraag</first><last>Sumanth</last></author>
-      <author><first>Diana</first><last>Inkpen</last></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last></author>
       <pages>115–121</pages>
       <url hash="17c17252">W15-2916</url>
       <doi>10.18653/v1/W15-2916</doi>
@@ -5068,7 +5068,7 @@
     <paper id="18">
       <title>Beyond Sentiment: Social Psychological Analysis of Political <fixed-case>F</fixed-case>acebook Comments in <fixed-case>H</fixed-case>ungary</title>
       <author><first>Márton</first><last>Miháltz</last></author>
-      <author><first>Tamás</first><last>Váradi</last></author>
+      <author id="tamas-varadi"><first>Tamás</first><last>Váradi</last></author>
       <author><first>István</first><last>Csertő</last></author>
       <author><first>Éva</first><last>Fülöp</last></author>
       <author><first>Tibor</first><last>Pólya</last></author>
@@ -5139,7 +5139,7 @@
     <paper id="25">
       <title>Sentiment Classification via a Response Recalibration Framework</title>
       <author><first>Phillip</first><last>Smith</last></author>
-      <author><first>Mark</first><last>Lee</last></author>
+      <author id="mark-lee"><first>Mark</first><last>Lee</last></author>
       <pages>175–180</pages>
       <url hash="a1db302e">W15-2925</url>
       <doi>10.18653/v1/W15-2925</doi>
@@ -5150,11 +5150,11 @@
     <meta>
       <booktitle>Proceedings of the Tenth Workshop on Statistical Machine Translation</booktitle>
       <url hash="50610350">W15-30</url>
-      <editor><first>Ondřej</first><last>Bojar</last></editor>
-      <editor><first>Rajan</first><last>Chatterjee</last></editor>
+      <editor id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></editor>
+      <editor id="rajen-chatterjee"><first>Rajan</first><last>Chatterjee</last></editor>
       <editor><first>Christian</first><last>Federmann</last></editor>
       <editor><first>Barry</first><last>Haddow</last></editor>
-      <editor><first>Chris</first><last>Hokamp</last></editor>
+      <editor id="chris-hokamp"><first>Chris</first><last>Hokamp</last></editor>
       <editor><first>Matthias</first><last>Huck</last></editor>
       <editor><first>Varvara</first><last>Logacheva</last></editor>
       <editor><first>Pavel</first><last>Pecina</last></editor>
@@ -5180,9 +5180,9 @@
       <author><first>Philipp</first><last>Koehn</last></author>
       <author><first>Varvara</first><last>Logacheva</last></author>
       <author><first>Christof</first><last>Monz</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Matt</first><last>Post</last></author>
-      <author><first>Carolina</first><last>Scarton</last></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <pages>1–46</pages>
@@ -5205,7 +5205,7 @@
       <author><first>Amittai</first><last>Axelrod</last></author>
       <author><first>Philip</first><last>Resnik</last></author>
       <author><first>Xiaodong</first><last>He</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
       <pages>58–65</pages>
       <url hash="c7ea99c3">W15-3003</url>
       <doi>10.18653/v1/W15-3003</doi>
@@ -5214,7 +5214,7 @@
     <paper id="4">
       <title><fixed-case>DFKI</fixed-case>’s experimental hybrid <fixed-case>MT</fixed-case> system for <fixed-case>WMT</fixed-case> 2015</title>
       <author><first>Eleftherios</first><last>Avramidis</last></author>
-      <author><first>Maja</first><last>Popović</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
       <author><first>Aljoscha</first><last>Burchardt</last></author>
       <pages>66–73</pages>
       <url hash="37c77f2b">W15-3004</url>
@@ -5223,7 +5223,7 @@
     </paper>
     <paper id="5">
       <title><fixed-case>P</fixed-case>ar<fixed-case>FDA</fixed-case> for Fast Deployment of Accurate Statistical Machine Translation Systems, Benchmarks, and Statistics</title>
-      <author><first>Ergun</first><last>Biçici</last></author>
+      <author id="ergun-bicici"><first>Ergun</first><last>Biçici</last></author>
       <author><first>Qun</first><last>Liu</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <pages>74–78</pages>
@@ -5245,7 +5245,7 @@
       <author><first>Fabienne</first><last>Cap</last></author>
       <author><first>Marion</first><last>Weller</last></author>
       <author><first>Anita</first><last>Ramm</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>84–91</pages>
       <url hash="f2f68a04">W15-3007</url>
       <doi>10.18653/v1/W15-3007</doi>
@@ -5259,7 +5259,7 @@
       <author><first>Teresa</first><last>Herrmann</last></author>
       <author><first>Mohammed</first><last>Mediani</last></author>
       <author><first>Yuqi</first><last>Zhang</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>92–97</pages>
       <url hash="2d1f4086">W15-3008</url>
       <doi>10.18653/v1/W15-3008</doi>
@@ -5290,7 +5290,7 @@
     <paper id="11">
       <title>The <fixed-case>AFRL</fixed-case>-<fixed-case>MITLL</fixed-case> <fixed-case>WMT</fixed-case>15 System: There’s More than One Way to Decode It!</title>
       <author><first>Jeremy</first><last>Gwinnup</last></author>
-      <author><first>Tim</first><last>Anderson</last></author>
+      <author id="tim-anderson"><first>Tim</first><last>Anderson</last></author>
       <author><first>Grant</first><last>Erdmann</last></author>
       <author><first>Katherine</first><last>Young</last></author>
       <author><first>Christina</first><last>May</last></author>
@@ -5305,12 +5305,12 @@
     <paper id="12">
       <title>The <fixed-case>KIT</fixed-case>-<fixed-case>LIMSI</fixed-case> Translation System for <fixed-case>WMT</fixed-case> 2015</title>
       <author><first>Thanh-Le</first><last>Ha</last></author>
-      <author><first>Quoc-Khanh</first><last>Do</last></author>
+      <author id="quoc-khanh-do"><first>Quoc-Khanh</first><last>Do</last></author>
       <author><first>Eunah</first><last>Cho</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
       <author><first>Alexandre</first><last>Allauzen</last></author>
       <author><first>François</first><last>Yvon</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>120–125</pages>
       <url hash="ddd1b553">W15-3012</url>
       <doi>10.18653/v1/W15-3012</doi>
@@ -5354,7 +5354,7 @@
       <author><first>Benjamin</first><last>Marie</last></author>
       <author><first>Alexandre</first><last>Allauzen</last></author>
       <author><first>Franck</first><last>Burlot</last></author>
-      <author><first>Quoc-Khanh</first><last>Do</last></author>
+      <author id="quoc-khanh-do"><first>Quoc-Khanh</first><last>Do</last></author>
       <author><first>Julia</first><last>Ive</last></author>
       <author><first>Elena</first><last>Knyazeva</last></author>
       <author><first>Matthieu</first><last>Labeau</last></author>
@@ -5370,8 +5370,8 @@
     <paper id="17">
       <title><fixed-case>U</fixed-case>d<fixed-case>S</fixed-case>-Sant: <fixed-case>E</fixed-case>nglish–<fixed-case>G</fixed-case>erman Hybrid Machine Translation System</title>
       <author><first>Santanu</first><last>Pal</last></author>
-      <author><first>Sudip</first><last>Naskar</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip</first><last>Naskar</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>152–157</pages>
       <url hash="891cedf7">W15-3017</url>
       <doi>10.18653/v1/W15-3017</doi>
@@ -5382,7 +5382,7 @@
       <author><first>Jan-Thorsten</first><last>Peter</last></author>
       <author><first>Farzad</first><last>Toutounchi</last></author>
       <author><first>Joern</first><last>Wuebker</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>158–163</pages>
       <url hash="bb2714b6">W15-3018</url>
       <doi>10.18653/v1/W15-3018</doi>
@@ -5399,7 +5399,7 @@
     <paper id="20">
       <title><fixed-case>S</fixed-case>heffield Systems for the <fixed-case>F</fixed-case>innish-<fixed-case>E</fixed-case>nglish <fixed-case>WMT</fixed-case> Translation Task</title>
       <author><first>David</first><last>Steele</last></author>
-      <author><first>Karin</first><last>Sim Smith</last></author>
+      <author id="karin-sim-smith"><first>Karin</first><last>Sim Smith</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>172–176</pages>
       <url hash="6292e07e">W15-3020</url>
@@ -5408,7 +5408,7 @@
     </paper>
     <paper id="21">
       <title>Morphological Segmentation and <fixed-case>OPUS</fixed-case> for <fixed-case>F</fixed-case>innish-<fixed-case>E</fixed-case>nglish Machine Translation</title>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <author><first>Filip</first><last>Ginter</last></author>
       <author><first>Jenna</first><last>Kanerva</last></author>
       <pages>177–183</pages>
@@ -5418,11 +5418,11 @@
     </paper>
     <paper id="22">
       <title><fixed-case>A</fixed-case>bu-<fixed-case>M</fixed-case>a<fixed-case>T</fixed-case>ran at <fixed-case>WMT</fixed-case> 2015 Translation Task: Morphological Segmentation and Web Crawling</title>
-      <author><first>Raphael</first><last>Rubino</last></author>
-      <author><first>Tommi</first><last>Pirinen</last></author>
-      <author><first>Miquel</first><last>Esplà-Gomis</last></author>
+      <author id="raphael-rubino"><first>Raphael</first><last>Rubino</last></author>
+      <author id="tommi-a-pirinen"><first>Tommi</first><last>Pirinen</last></author>
+      <author id="miquel-espla-gomis"><first>Miquel</first><last>Esplà-Gomis</last></author>
       <author><first>Nikola</first><last>Ljubešić</last></author>
-      <author><first>Sergio</first><last>Ortiz-Rojas</last></author>
+      <author id="sergio-ortiz-rojas"><first>Sergio</first><last>Ortiz-Rojas</last></author>
       <author><first>Vassilis</first><last>Papavassiliou</last></author>
       <author><first>Prokopis</first><last>Prokopidis</last></author>
       <author><first>Antonio</first><last>Toral</last></author>
@@ -5454,7 +5454,7 @@
       <title><fixed-case>E</fixed-case>dinburgh’s Syntax-Based Systems at <fixed-case>WMT</fixed-case> 2015</title>
       <author><first>Philip</first><last>Williams</last></author>
       <author><first>Rico</first><last>Sennrich</last></author>
-      <author><first>Maria</first><last>Nadejde</last></author>
+      <author id="maria-nadejde"><first>Maria</first><last>Nadejde</last></author>
       <author><first>Matthias</first><last>Huck</last></author>
       <author><first>Philipp</first><last>Koehn</last></author>
       <pages>199–209</pages>
@@ -5466,7 +5466,7 @@
       <title>The <fixed-case>FBK</fixed-case> Participation in the <fixed-case>WMT</fixed-case>15 Automatic Post-editing Shared Task</title>
       <author><first>Rajen</first><last>Chatterjee</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <pages>210–215</pages>
       <url hash="8d712a06">W15-3025</url>
       <doi>10.18653/v1/W15-3025</doi>
@@ -5476,8 +5476,8 @@
       <title><fixed-case>USAAR</fixed-case>-<fixed-case>SAPE</fixed-case>: An <fixed-case>E</fixed-case>nglish–<fixed-case>S</fixed-case>panish Statistical Automatic Post-Editing System</title>
       <author><first>Santanu</first><last>Pal</last></author>
       <author><first>Mihaela</first><last>Vela</last></author>
-      <author><first>Sudip Kumar</first><last>Naskar</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>216–221</pages>
       <url hash="40d99e4d">W15-3026</url>
       <doi>10.18653/v1/W15-3026</doi>
@@ -5513,9 +5513,9 @@
     <paper id="30">
       <title><fixed-case>L</fixed-case>ist<fixed-case>N</fixed-case>et-based <fixed-case>MT</fixed-case> Rescoring</title>
       <author><first>Jan</first><last>Niehues</last></author>
-      <author><first>Quoc Khanh</first><last>Do</last></author>
+      <author id="quoc-khanh-do"><first>Quoc Khanh</first><last>Do</last></author>
       <author><first>Alexandre</first><last>Allauzen</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>248–255</pages>
       <url hash="ee952b4a">W15-3030</url>
       <doi>10.18653/v1/W15-3030</doi>
@@ -5548,7 +5548,7 @@
       <author><first>Joern</first><last>Wuebker</last></author>
       <author><first>Miguel</first><last>Graça</last></author>
       <author><first>Yunsu</first><last>Kim</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>282–293</pages>
       <url hash="e6629463">W15-3033</url>
       <doi>10.18653/v1/W15-3033</doi>
@@ -5558,7 +5558,7 @@
       <title>Investigations on Phrase-based Decoding with Recurrent Neural Network Language and Translation Models</title>
       <author><first>Tamer</first><last>Alkhouli</last></author>
       <author><first>Felix</first><last>Rietig</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>294–303</pages>
       <url hash="c0232479">W15-3034</url>
       <doi>10.18653/v1/W15-3034</doi>
@@ -5566,7 +5566,7 @@
     </paper>
     <paper id="35">
       <title>Referential Translation Machines for Predicting Translation Quality and Related Statistics</title>
-      <author><first>Ergun</first><last>Biçici</last></author>
+      <author id="ergun-bicici"><first>Ergun</first><last>Biçici</last></author>
       <author><first>Qun</first><last>Liu</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <pages>304–308</pages>
@@ -5576,9 +5576,9 @@
     </paper>
     <paper id="36">
       <title><fixed-case>UA</fixed-case>lacant word-level machine translation quality estimation system at <fixed-case>WMT</fixed-case> 2015</title>
-      <author><first>Miquel</first><last>Esplà-Gomis</last></author>
+      <author id="miquel-espla-gomis"><first>Miquel</first><last>Esplà-Gomis</last></author>
       <author><first>Felipe</first><last>Sánchez-Martínez</last></author>
-      <author><first>Mikel</first><last>Forcada</last></author>
+      <author id="mikel-l-forcada"><first>Mikel</first><last>Forcada</last></author>
       <pages>309–315</pages>
       <url hash="cba01484">W15-3036</url>
       <doi>10.18653/v1/W15-3036</doi>
@@ -5614,7 +5614,7 @@
     </paper>
     <paper id="40">
       <title><fixed-case>USHEF</fixed-case> and <fixed-case>USAAR</fixed-case>-<fixed-case>USHEF</fixed-case> participation in the <fixed-case>WMT</fixed-case>15 <fixed-case>QE</fixed-case> shared task</title>
-      <author><first>Carolina</first><last>Scarton</last></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last></author>
       <author><first>Liling</first><last>Tan</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>336–341</pages>
@@ -5626,9 +5626,9 @@
       <title><fixed-case>SHEF</fixed-case>-<fixed-case>NN</fixed-case>: Translation Quality Estimation with Neural Networks</title>
       <author><first>Kashif</first><last>Shah</last></author>
       <author><first>Varvara</first><last>Logacheva</last></author>
-      <author><first>Gustavo</first><last>Paetzold</last></author>
-      <author><first>Frederic</first><last>Blain</last></author>
-      <author><first>Daniel</first><last>Beck</last></author>
+      <author id="gustavo-paetzold"><first>Gustavo</first><last>Paetzold</last></author>
+      <author id="frederic-blain"><first>Frederic</first><last>Blain</last></author>
+      <author id="daniel-beck"><first>Daniel</first><last>Beck</last></author>
       <author><first>Fethi</first><last>Bougares</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>342–347</pages>
@@ -5639,7 +5639,7 @@
     <paper id="42">
       <title>Strategy-Based Technology for Estimating <fixed-case>MT</fixed-case> Quality</title>
       <author><first>Liugang</first><last>Shang</last></author>
-      <author><first>Dongfeng</first><last>Cai</last></author>
+      <author id="dongfeng-cai"><first>Dongfeng</first><last>Cai</last></author>
       <author><first>Duo</first><last>Ji</last></author>
       <pages>348–352</pages>
       <url hash="33e5653a">W15-3042</url>
@@ -5649,7 +5649,7 @@
     <paper id="43">
       <title><fixed-case>UGENT</fixed-case>-<fixed-case>LT</fixed-case>3 <fixed-case>SCATE</fixed-case> System for Machine Translation Quality Estimation</title>
       <author><first>Arda</first><last>Tezcan</last></author>
-      <author><first>Veronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Veronique</first><last>Hoste</last></author>
       <author><first>Bart</first><last>Desmet</last></author>
       <author><first>Lieve</first><last>Macken</last></author>
       <pages>353–360</pages>
@@ -5669,8 +5669,8 @@
     </paper>
     <paper id="45">
       <title><fixed-case>VERT</fixed-case>a: a Linguistically-motivated Metric at the <fixed-case>WMT</fixed-case>15 Metrics Task</title>
-      <author><first>Elisabet</first><last>Comelles</last></author>
-      <author><first>Jordi</first><last>Atserias</last></author>
+      <author id="elisabet-comelles"><first>Elisabet</first><last>Comelles</last></author>
+      <author id="jordi-atserias"><first>Jordi</first><last>Atserias</last></author>
       <pages>366–372</pages>
       <url hash="df464156">W15-3045</url>
       <doi>10.18653/v1/W15-3045</doi>
@@ -5679,8 +5679,8 @@
     <paper id="46">
       <title><fixed-case>UPF</fixed-case>-Cobalt Submission to <fixed-case>WMT</fixed-case>15 Metrics Task</title>
       <author><first>Marina</first><last>Fomicheva</last></author>
-      <author><first>Núria</first><last>Bel</last></author>
-      <author><first>Iria</first><last>da Cunha</last></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last></author>
+      <author id="iria-da-cunha"><first>Iria</first><last>da Cunha</last></author>
       <author><first>Anton</first><last>Malinovskiy</last></author>
       <pages>373–379</pages>
       <url hash="2b806259">W15-3046</url>
@@ -5690,8 +5690,8 @@
     <paper id="47">
       <title>Machine Translation Evaluation using Recurrent Neural Networks</title>
       <author><first>Rohit</first><last>Gupta</last></author>
-      <author><first>Constantin</first><last>Orăsan</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orăsan</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>380–384</pages>
       <url hash="6118e3c1">W15-3047</url>
       <doi>10.18653/v1/W15-3047</doi>
@@ -5708,7 +5708,7 @@
     </paper>
     <paper id="49">
       <title>chr<fixed-case>F</fixed-case>: character n-gram <fixed-case>F</fixed-case>-score for automatic <fixed-case>MT</fixed-case> evaluation</title>
-      <author><first>Maja</first><last>Popović</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
       <pages>392–395</pages>
       <url hash="e08319e7">W15-3049</url>
       <doi>10.18653/v1/W15-3049</doi>
@@ -5717,7 +5717,7 @@
     <paper id="50">
       <title><fixed-case>BEER</fixed-case> 1.1: <fixed-case>ILLC</fixed-case> <fixed-case>U</fixed-case>v<fixed-case>A</fixed-case> submission to metrics and tuning task</title>
       <author><first>Miloš</first><last>Stanojević</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <pages>396–401</pages>
       <url hash="d8a533ed">W15-3050</url>
       <doi>10.18653/v1/W15-3050</doi>
@@ -5788,7 +5788,7 @@
       <author><first>Graham</first><last>Neubig</last></author>
       <author><first>Koichiro</first><last>Yoshino</last></author>
       <author><first>Sakriani</first><last>Sakti</last></author>
-      <author><first>Tomoki</first><last>Toda</last></author>
+      <author id="tomoki-toda"><first>Tomoki</first><last>Toda</last></author>
       <author><first>Satoshi</first><last>Nakamura</last></author>
       <pages>442–449</pages>
       <url hash="d9d9a95b">W15-3057</url>
@@ -5806,11 +5806,11 @@
     </paper>
     <paper id="59">
       <title>How do Humans Evaluate Machine Translation</title>
-      <author><first>Francisco</first><last>Guzmán</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzmán</last></author>
       <author><first>Ahmed</first><last>Abdelali</last></author>
       <author><first>Irina</first><last>Temnikova</last></author>
       <author><first>Hassan</first><last>Sajjad</last></author>
-      <author><first>Stephan</first><last>Vogel</last></author>
+      <author id="stephan-vogel"><first>Stephan</first><last>Vogel</last></author>
       <pages>457–466</pages>
       <url hash="21255bf8">W15-3059</url>
       <doi>10.18653/v1/W15-3059</doi>
@@ -5822,7 +5822,7 @@
       <author><first>Jan-Thorsten</first><last>Peter</last></author>
       <author><first>Stephan</first><last>Peitz</last></author>
       <author><first>Minwei</first><last>Feng</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>467–476</pages>
       <url hash="aa0a7cfc">W15-3060</url>
       <doi>10.18653/v1/W15-3060</doi>
@@ -5833,7 +5833,7 @@
     <meta>
       <booktitle>Proceedings of the Eighth <fixed-case>SIGHAN</fixed-case> Workshop on <fixed-case>C</fixed-case>hinese Language Processing</booktitle>
       <url hash="49bd5be4">W15-31</url>
-      <editor><first>Liang-Chih</first><last>Yu</last></editor>
+      <editor id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last></editor>
       <editor><first>Zhifang</first><last>Sui</last></editor>
       <editor><first>Yue</first><last>Zhang</last></editor>
       <editor><first>Vincent</first><last>Ng</last></editor>
@@ -5852,7 +5852,7 @@
       <title>Sequential Annotation and Chunking of <fixed-case>C</fixed-case>hinese Discourse Structure</title>
       <author><first>Frances</first><last>Yung</last></author>
       <author><first>Kevin</first><last>Duh</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>1–6</pages>
       <url hash="365c7ab8">W15-3101</url>
       <doi>10.18653/v1/W15-3101</doi>
@@ -5888,7 +5888,7 @@
       <author><first>Wei</first><last>Lai</last></author>
       <author><first>Weiping</first><last>Ye</last></author>
       <author><first>Xinru</first><last>Zhao</last></author>
-      <author><first>Mark</first><last>Liberman</last></author>
+      <author id="mark-liberman"><first>Mark</first><last>Liberman</last></author>
       <pages>21–25</pages>
       <url hash="6b0df978">W15-3104</url>
       <doi>10.18653/v1/W15-3104</doi>
@@ -5896,7 +5896,7 @@
     </paper>
     <paper id="5">
       <title><fixed-case>ACB</fixed-case>i<fixed-case>MA</fixed-case>: Advanced <fixed-case>C</fixed-case>hinese Bi-Character Word Morphological Analyzer</title>
-      <author><first>Ting-Hao</first><last>Huang</last></author>
+      <author id="ting-hao-huang"><first>Ting-Hao</first><last>Huang</last></author>
       <author><first>Yun-Nung</first><last>Chen</last></author>
       <author><first>Lingpeng</first><last>Kong</last></author>
       <pages>26–31</pages>
@@ -5907,9 +5907,9 @@
     </paper>
     <paper id="6">
       <title>Introduction to <fixed-case>SIGHAN</fixed-case> 2015 Bake-off for <fixed-case>C</fixed-case>hinese Spelling Check</title>
-      <author><first>Yuen-Hsien</first><last>Tseng</last></author>
+      <author id="yuen-hsien-tseng"><first>Yuen-Hsien</first><last>Tseng</last></author>
       <author><first>Lung-Hao</first><last>Lee</last></author>
-      <author><first>Li-Ping</first><last>Chang</last></author>
+      <author id="li-ping-chang"><first>Li-Ping</first><last>Chang</last></author>
       <author><first>Hsin-Hsi</first><last>Chen</last></author>
       <pages>32–37</pages>
       <url hash="044731f5">W15-3106</url>
@@ -5923,7 +5923,7 @@
       <author><first>Jinhua</first><last>Xiong</last></author>
       <author><first>Jianpeng</first><last>Hou</last></author>
       <author><first>Qiao</first><last>Zhang</last></author>
-      <author><first>Xueqi</first><last>Cheng</last></author>
+      <author id="xueqi-cheng"><first>Xueqi</first><last>Cheng</last></author>
       <pages>38–45</pages>
       <url hash="fdd1324d">W15-3107</url>
       <doi>10.18653/v1/W15-3107</doi>
@@ -5931,7 +5931,7 @@
     </paper>
     <paper id="8">
       <title>Word Vector/Conditional Random Field-based <fixed-case>C</fixed-case>hinese Spelling Error Detection for <fixed-case>SIGHAN</fixed-case>-2015 Evaluation</title>
-      <author><first>Yih-Ru</first><last>Wang</last></author>
+      <author id="yih-ru-wang"><first>Yih-Ru</first><last>Wang</last></author>
       <author><first>Yuan-Fu</first><last>Liao</last></author>
       <pages>46–49</pages>
       <url hash="2be0ecaf">W15-3108</url>
@@ -6091,7 +6091,7 @@
     <paper id="23">
       <title>Rule-Based <fixed-case>W</fixed-case>eibo Messages Sentiment Polarity Classification towards Given Topics</title>
       <author><first>Hongzhao</first><last>Zhou</last></author>
-      <author><first>Yonglin</first><last>Teng</last></author>
+      <author id="yonglin-teng"><first>Yonglin</first><last>Teng</last></author>
       <author><first>Min</first><last>Hou</last></author>
       <author><first>Wei</first><last>He</last></author>
       <author><first>Hongtao</first><last>Zhu</last></author>
@@ -6107,7 +6107,7 @@
       <author><first>Chun</first><last>Liao</last></author>
       <author><first>Chong</first><last>Feng</last></author>
       <author><first>Sen</first><last>Yang</last></author>
-      <author><first>Heyan</first><last>Huang</last></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last></author>
       <pages>158–163</pages>
       <url hash="a9f54998">W15-3124</url>
       <doi>10.18653/v1/W15-3124</doi>
@@ -6167,7 +6167,7 @@
       <booktitle>Proceedings of the Second Workshop on <fixed-case>A</fixed-case>rabic Natural Language Processing</booktitle>
       <url hash="fbd6ea7d">W15-32</url>
       <editor><first>Nizar</first><last>Habash</last></editor>
-      <editor><first>Stephan</first><last>Vogel</last></editor>
+      <editor id="stephan-vogel"><first>Stephan</first><last>Vogel</last></editor>
       <editor><first>Kareem</first><last>Darwish</last></editor>
       <doi>10.18653/v1/W15-32</doi>
       <publisher>Association for Computational Linguistics</publisher>
@@ -6195,8 +6195,8 @@
       <author><first>Hazem</first><last>Hajj</last></author>
       <author><first>Gilbert</first><last>Badaro</last></author>
       <author><first>Ramy</first><last>Baly</last></author>
-      <author><first>Wassim</first><last>El Hajj</last></author>
-      <author><first>Khaled</first><last>Bashir Shaban</last></author>
+      <author id="wassim-el-hajj"><first>Wassim</first><last>El Hajj</last></author>
+      <author id="khaled-shaban"><first>Khaled</first><last>Bashir Shaban</last></author>
       <pages>9–17</pages>
       <url hash="c99b5171">W15-3202</url>
       <doi>10.18653/v1/W15-3202</doi>
@@ -6211,8 +6211,8 @@
       <author><first>Linda</first><last>Fayad</last></author>
       <author><first>Jeffrey</first><last>Khairallah</last></author>
       <author><first>Hazem</first><last>Hajj</last></author>
-      <author><first>Khaled</first><last>Shaban</last></author>
-      <author><first>Wassim</first><last>El-Hajj</last></author>
+      <author id="khaled-shaban"><first>Khaled</first><last>Shaban</last></author>
+      <author id="wassim-el-hajj"><first>Wassim</first><last>El-Hajj</last></author>
       <pages>18–25</pages>
       <url hash="4065998d">W15-3203</url>
       <doi>10.18653/v1/W15-3203</doi>
@@ -6243,7 +6243,7 @@
     <paper id="6">
       <title><fixed-case>DIWAN</fixed-case>: A Dialectal Word Annotation Tool for <fixed-case>A</fixed-case>rabic</title>
       <author><first>Faisal</first><last>Al-Shargi</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>49–58</pages>
       <url hash="07d582eb">W15-3206</url>
       <doi>10.18653/v1/W15-3206</doi>
@@ -6254,7 +6254,7 @@
       <author><first>Ahmed</first><last>Hamdi</last></author>
       <author><first>Alexis</first><last>Nasr</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
-      <author><first>Núria</first><last>Gala</last></author>
+      <author id="nuria-gala"><first>Núria</first><last>Gala</last></author>
       <pages>59–68</pages>
       <url hash="020442a9">W15-3207</url>
       <doi>10.18653/v1/W15-3207</doi>
@@ -6262,7 +6262,7 @@
     </paper>
     <paper id="8">
       <title>A Conventional Orthography for <fixed-case>A</fixed-case>lgerian <fixed-case>A</fixed-case>rabic</title>
-      <author><first>Houda</first><last>Saadane</last></author>
+      <author id="houda-saadane"><first>Houda</first><last>Saadane</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
       <pages>69–79</pages>
       <url hash="afd17efc">W15-3208</url>
@@ -6273,7 +6273,7 @@
       <title>A Pilot Study on <fixed-case>A</fixed-case>rabic Multi-Genre Corpus Diacritization</title>
       <author><first>Houda</first><last>Bouamor</last></author>
       <author><first>Wajdi</first><last>Zaghouani</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <author><first>Ossama</first><last>Obeid</last></author>
       <author><first>Kemal</first><last>Oflazer</last></author>
       <author><first>Mahmoud</first><last>Ghoneim</last></author>
@@ -6286,7 +6286,7 @@
     <paper id="10">
       <title>Annotating Targets of Opinions in <fixed-case>A</fixed-case>rabic using Crowdsourcing</title>
       <author><first>Noura</first><last>Farra</last></author>
-      <author><first>Kathy</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathy</first><last>McKeown</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
       <pages>89–98</pages>
       <url hash="fa9535f3">W15-3210</url>
@@ -6306,7 +6306,7 @@
     <paper id="12">
       <title>Joint <fixed-case>A</fixed-case>rabic Segmentation and Part-Of-Speech Tagging</title>
       <author><first>Shabib</first><last>AlGahtani</last></author>
-      <author><first>John</first><last>McNaught</last></author>
+      <author id="john-mcnaught"><first>John</first><last>McNaught</last></author>
       <pages>108–117</pages>
       <url hash="cf07fdb8">W15-3212</url>
       <doi>10.18653/v1/W15-3212</doi>
@@ -6345,7 +6345,7 @@
       <title><fixed-case>GWU</fixed-case>-<fixed-case>HASP</fixed-case>-2015@<fixed-case>QALB</fixed-case>-2015 Shared Task: Priming Spelling Candidates with Probability</title>
       <author><first>Mohammed</first><last>Attia</last></author>
       <author><first>Mohamed</first><last>Al-Badrashiny</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>138–143</pages>
       <url hash="f054e88d">W15-3216</url>
       <doi>10.18653/v1/W15-3216</doi>
@@ -6384,11 +6384,11 @@
     </paper>
     <paper id="20">
       <title><fixed-case>TECHLIMED</fixed-case>@<fixed-case>QALB</fixed-case>-Shared Task 2015: a hybrid <fixed-case>A</fixed-case>rabic Error Correction System</title>
-      <author><first>Djamel</first><last>Mostefa</last></author>
+      <author id="djamel-mostefa"><first>Djamel</first><last>Mostefa</last></author>
       <author><first>Jaber</first><last>Abualasal</last></author>
       <author><first>Omar</first><last>Asbayou</last></author>
       <author><first>Mahmoud</first><last>Gzawi</last></author>
-      <author><first>Ramzi</first><last>Abbes</last></author>
+      <author id="ramzi-abbes"><first>Ramzi</first><last>Abbes</last></author>
       <pages>161–165</pages>
       <url hash="55674931">W15-3220</url>
       <doi>10.18653/v1/W15-3220</doi>
@@ -6406,7 +6406,7 @@
     <paper id="22">
       <title>Robust Part-of-speech Tagging of <fixed-case>A</fixed-case>rabic Text</title>
       <author><first>Hanan</first><last>Aldarmaki</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>173–182</pages>
       <url hash="77d0d2c8">W15-3222</url>
       <doi>10.18653/v1/W15-3222</doi>
@@ -6437,9 +6437,9 @@
     <meta>
       <booktitle>Proceedings of the Grammar Engineering Across Frameworks (<fixed-case>GEAF</fixed-case>) 2015 Workshop</booktitle>
       <url hash="1a8dd413">W15-33</url>
-      <editor><first>Emily M.</first><last>Bender</last></editor>
-      <editor><first>Lori</first><last>Levin</last></editor>
-      <editor><first>Stefan</first><last>Müller</last></editor>
+      <editor id="emily-m-bender"><first>Emily M.</first><last>Bender</last></editor>
+      <editor id="lori-levin"><first>Lori</first><last>Levin</last></editor>
+      <editor id="stefan-muller"><first>Stefan</first><last>Müller</last></editor>
       <editor><first>Yannick</first><last>Parmentier</last></editor>
       <editor><first>Aarne</first><last>Ranta</last></editor>
       <doi>10.18653/v1/W15-33</doi>
@@ -6512,7 +6512,7 @@
     </paper>
     <paper id="7">
       <title>Formalising the <fixed-case>S</fixed-case>wedish Constructicon in Grammatical Framework</title>
-      <author><first>Normunds</first><last>Gruzitis</last></author>
+      <author id="normunds-gruzitis"><first>Normunds</first><last>Gruzitis</last></author>
       <author><first>Dana</first><last>Dannélls</last></author>
       <author><first>Benjamin</first><last>Lyngfelt</last></author>
       <author><first>Aarne</first><last>Ranta</last></author>
@@ -6543,7 +6543,7 @@
     <meta>
       <booktitle>Proceedings of the Eighth Workshop on Building and Using Comparable Corpora</booktitle>
       <url hash="3f16029f">W15-34</url>
-      <editor><first>Pierre</first><last>Zweigenbaum</last></editor>
+      <editor id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></editor>
       <editor><first>Serge</first><last>Sharoff</last></editor>
       <editor><first>Reinhard</first><last>Rapp</last></editor>
       <doi>10.18653/v1/W15-34</doi>
@@ -6559,7 +6559,7 @@
     </frontmatter>
     <paper id="1">
       <title>Augmented Comparative Corpora and Monitoring Corpus in <fixed-case>C</fixed-case>hinese: <fixed-case>LIVAC</fixed-case> and Sketch Search Engine Compared</title>
-      <author><first>Benjamin K.</first><last>Tsou</last></author>
+      <author id="benjamin-k-tsou"><first>Benjamin K.</first><last>Tsou</last></author>
       <pages>1–2</pages>
       <url hash="e743ebc2">W15-3401</url>
       <doi>10.18653/v1/W15-3401</doi>
@@ -6570,7 +6570,7 @@
       <author><first>Alberto</first><last>Barrón-Cedeño</last></author>
       <author><first>Cristina</first><last>España-Bonet</last></author>
       <author><first>Josu</first><last>Boldoba</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <pages>3–13</pages>
       <url hash="89d3790b">W15-3402</url>
       <doi>10.18653/v1/W15-3402</doi>
@@ -6589,7 +6589,7 @@
     <paper id="4">
       <title>Projective methods for mining missing translations in <fixed-case>DB</fixed-case>pedia</title>
       <author><first>Laurent</first><last>Jakubina</last></author>
-      <author><first>Phillippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Phillippe</first><last>Langlais</last></author>
       <pages>23–31</pages>
       <url hash="5fa2bdda">W15-3404</url>
       <doi>10.18653/v1/W15-3404</doi>
@@ -6598,7 +6598,7 @@
     <paper id="5">
       <title>Attempting to Bypass Alignment from Comparable Corpora via Pivot Language</title>
       <author><first>Alexis</first><last>Linard</last></author>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <author><first>Emmanuel</first><last>Morin</last></author>
       <pages>32–37</pages>
       <url hash="8761f618">W15-3405</url>
@@ -6608,7 +6608,7 @@
     <paper id="6">
       <title>Application of a Corpus to Identify Gaps between <fixed-case>E</fixed-case>nglish Learners and Native Speakers</title>
       <author><first>Katsunori</first><last>Kotani</last></author>
-      <author><first>Takehiko</first><last>Yoshimi</last></author>
+      <author id="takehiko-yoshimi"><first>Takehiko</first><last>Yoshimi</last></author>
       <pages>38–42</pages>
       <url hash="4b350fcb">W15-3406</url>
       <doi>10.18653/v1/W15-3406</doi>
@@ -6684,7 +6684,7 @@
       <author><first>Emmanuel</first><last>Morin</last></author>
       <author><first>Amir</first><last>Hazem</last></author>
       <author><first>Florian</first><last>Boudin</last></author>
-      <author><first>Elizaveta</first><last>Loginova-Clouet</last></author>
+      <author id="elizaveta-loginova-clouet"><first>Elizaveta</first><last>Loginova-Clouet</last></author>
       <pages>88–91</pages>
       <url hash="de913772">W15-3413</url>
       <doi>10.18653/v1/W15-3413</doi>
@@ -6696,7 +6696,7 @@
     <meta>
       <booktitle>Proceedings of the 1st Workshop on Semantics-Driven Statistical Machine Translation (<fixed-case>S</fixed-case>2<fixed-case>MT</fixed-case> 2015)</booktitle>
       <url hash="076f9a5c">W15-35</url>
-      <editor><first>Deyi</first><last>Xiong</last></editor>
+      <editor id="deyi-xiong"><first>Deyi</first><last>Xiong</last></editor>
       <editor><first>Kevin</first><last>Duh</last></editor>
       <editor><first>Christian</first><last>Hardmeier</last></editor>
       <editor><first>Roberto</first><last>Navigli</last></editor>
@@ -6732,7 +6732,7 @@
     </paper>
     <paper id="3">
       <title>Integrating Case Frame into <fixed-case>J</fixed-case>apanese to <fixed-case>C</fixed-case>hinese Hierarchical Phrase-based Translation Model</title>
-      <author><first>Jinan</first><last>Xu</last></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last></author>
       <author><first>Jiangming</first><last>Liu</last></author>
       <author><first>Yufeng</first><last>Chen</last></author>
       <author><first>Yujie</first><last>Zhang</last></author>
@@ -6760,7 +6760,7 @@
     <meta>
       <booktitle>Proceedings of the <fixed-case>ACL</fixed-case> 2015 Workshop on Novel Computational Approaches to Keyphrase Extraction</booktitle>
       <url hash="6135e68d">W15-36</url>
-      <editor><first>Sujatha Das</first><last>Gollapalli</last></editor>
+      <editor id="sujatha-das-gollapalli"><first>Sujatha Das</first><last>Gollapalli</last></editor>
       <editor><first>Cornelia</first><last>Caragea</last></editor>
       <editor><first>Xiaoli</first><last>Li</last></editor>
       <editor><first>C. Lee</first><last>Giles</last></editor>
@@ -6787,7 +6787,7 @@
     <paper id="2">
       <title>Technical Term Extraction Using Measures of Neology</title>
       <author><first>Christopher</first><last>Norman</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>2–9</pages>
       <url hash="eaa329a1">W15-3602</url>
       <doi>10.18653/v1/W15-3602</doi>
@@ -6806,7 +6806,7 @@
     </paper>
     <paper id="4">
       <title>The Web as an Implicit Training Set: Application to Noun Compounds Syntax and Semantics</title>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>18</pages>
       <url hash="4fa5b2cb">W15-3604</url>
       <doi>10.18653/v1/W15-3604</doi>
@@ -6847,9 +6847,9 @@
     <meta>
       <booktitle>Proceedings of the 9th <fixed-case>SIGHUM</fixed-case> Workshop on Language Technology for Cultural Heritage, Social Sciences, and Humanities (<fixed-case>L</fixed-case>a<fixed-case>T</fixed-case>e<fixed-case>CH</fixed-case>)</booktitle>
       <url hash="dfda7367">W15-37</url>
-      <editor><first>Kalliopi</first><last>Zervanou</last></editor>
+      <editor id="kalliopi-zervanou"><first>Kalliopi</first><last>Zervanou</last></editor>
       <editor><first>Marieke</first><last>van Erp</last></editor>
-      <editor><first>Beatrice</first><last>Alex</last></editor>
+      <editor id="beatrice-alex"><first>Beatrice</first><last>Alex</last></editor>
       <doi>10.18653/v1/W15-37</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Beijing, China</address>
@@ -6874,7 +6874,7 @@
     <paper id="2">
       <title>Five Centuries of Monarchy in <fixed-case>K</fixed-case>orea: Mining the Text of the Annals of the <fixed-case>J</fixed-case>oseon Dynasty</title>
       <author><first>JinYeong</first><last>Bak</last></author>
-      <author><first>Alice</first><last>Oh</last></author>
+      <author id="alice-oh"><first>Alice</first><last>Oh</last></author>
       <pages>10–14</pages>
       <url hash="f8b36b31">W15-3702</url>
       <doi>10.18653/v1/W15-3702</doi>
@@ -6893,7 +6893,7 @@
     <paper id="4">
       <title>Measuring the Structural and Conceptual Similarity of Folktales using Plot Graphs</title>
       <author><first>Victoria Anugrah</first><last>Lestari</last></author>
-      <author><first>Ruli</first><last>Manurung</last></author>
+      <author id="ruli-manurung"><first>Ruli</first><last>Manurung</last></author>
       <pages>25–33</pages>
       <url hash="5e87fe74">W15-3704</url>
       <doi>10.18653/v1/W15-3704</doi>
@@ -6910,7 +6910,7 @@
     <paper id="6">
       <title>Ranking Relevant Verb Phrases Extracted from Historical Text</title>
       <author><first>Eva</first><last>Pettersson</last></author>
-      <author><first>Beáta</first><last>Megyesi</last></author>
+      <author id="beata-megyesi"><first>Beáta</first><last>Megyesi</last></author>
       <author><first>Joakim</first><last>Nivre</last></author>
       <pages>39–47</pages>
       <url hash="5b5139a4">W15-3706</url>
@@ -6920,7 +6920,7 @@
     <paper id="7">
       <title>Ranking election issues through the lens of social media</title>
       <author><first>Stephen</first><last>Wan</last></author>
-      <author><first>Cécile</first><last>Paris</last></author>
+      <author id="cecile-paris"><first>Cécile</first><last>Paris</last></author>
       <pages>48–52</pages>
       <url hash="83e77a7b">W15-3707</url>
       <doi>10.18653/v1/W15-3707</doi>
@@ -6939,7 +6939,7 @@
       <title>Enriching Interlinear Text using Automatically Constructed Annotators</title>
       <author><first>Ryan</first><last>Georgi</last></author>
       <author><first>Fei</first><last>Xia</last></author>
-      <author><first>William</first><last>Lewis</last></author>
+      <author id="william-lewis"><first>William</first><last>Lewis</last></author>
       <pages>58–67</pages>
       <url hash="d6958b3a">W15-3709</url>
       <doi>10.18653/v1/W15-3709</doi>
@@ -6947,7 +6947,7 @@
     </paper>
     <paper id="10">
       <title>Automatic interlinear glossing as two-level sequence classification</title>
-      <author><first>Tanja</first><last>Samardžić</last></author>
+      <author id="tanja-samardzic"><first>Tanja</first><last>Samardžić</last></author>
       <author><first>Robert</first><last>Schikowski</last></author>
       <author><first>Sabine</first><last>Stoll</last></author>
       <pages>68–72</pages>
@@ -6957,7 +6957,7 @@
     </paper>
     <paper id="11">
       <title>Enriching Digitized Medieval Manuscripts: Linking Image, Text and Lexical Knowledge</title>
-      <author><first>Aitor</first><last>Arronte Álvarez</last></author>
+      <author id="aitor-alvarez"><first>Aitor</first><last>Arronte Álvarez</last></author>
       <pages>73–77</pages>
       <url hash="1467de61">W15-3711</url>
       <doi>10.18653/v1/W15-3711</doi>
@@ -6981,7 +6981,7 @@
       <author><first>Andrea</first><last>Bellandi</last></author>
       <author><first>Davide</first><last>Albanesi</last></author>
       <author><first>Giulia</first><last>Benotto</last></author>
-      <author><first>Emiliano</first><last>Giovannetti</last></author>
+      <author id="emiliano-giovannetti"><first>Emiliano</first><last>Giovannetti</last></author>
       <author><first>Gianfranco</first><last>Di Segni</last></author>
       <pages>84–88</pages>
       <url hash="8072efa3">W15-3713</url>
@@ -7025,10 +7025,10 @@
     <meta>
       <booktitle>Proceedings of <fixed-case>B</fixed-case>io<fixed-case>NLP</fixed-case> 15</booktitle>
       <url hash="614666c5">W15-38</url>
-      <editor><first>Kevin Bretonnel</first><last>Cohen</last></editor>
+      <editor id="k-bretonnel-cohen"><first>Kevin Bretonnel</first><last>Cohen</last></editor>
       <editor><first>Dina</first><last>Demner-Fushman</last></editor>
       <editor><first>Sophia</first><last>Ananiadou</last></editor>
-      <editor><first>Jun-ichi</first><last>Tsujii</last></editor>
+      <editor id="junichi-tsujii"><first>Jun-ichi</first><last>Tsujii</last></editor>
       <doi>10.18653/v1/W15-38</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Beijing, China</address>
@@ -7042,8 +7042,8 @@
     </frontmatter>
     <paper id="1">
       <title>Complex Event Extraction using <fixed-case>DRUM</fixed-case></title>
-      <author><first>James</first><last>Allen</last></author>
-      <author><first>Will</first><last>de Beaumont</last></author>
+      <author id="james-allen"><first>James</first><last>Allen</last></author>
+      <author id="william-de-beaumont"><first>Will</first><last>de Beaumont</last></author>
       <author><first>Lucian</first><last>Galescu</last></author>
       <author><first>Choh Man</first><last>Teng</last></author>
       <pages>1–11</pages>
@@ -7062,10 +7062,10 @@
     </paper>
     <paper id="3">
       <title>An extended dependency graph for relation extraction in biomedical texts</title>
-      <author><first>Yifan</first><last>Peng</last></author>
+      <author id="yifan-peng-cmu"><first>Yifan</first><last>Peng</last></author>
       <author><first>Samir</first><last>Gupta</last></author>
       <author><first>Cathy</first><last>Wu</last></author>
-      <author><first>Vijay</first><last>Shanker</last></author>
+      <author id="k-vijay-shanker"><first>Vijay</first><last>Shanker</last></author>
       <pages>21–30</pages>
       <url hash="25491250">W15-3803</url>
       <doi>10.18653/v1/W15-3803</doi>
@@ -7084,7 +7084,7 @@
       <title>Extracting Biological Pathway Models From <fixed-case>NLP</fixed-case> Event Representations</title>
       <author><first>Michael</first><last>Spranger</last></author>
       <author><first>Sucheendra</first><last>Palaniappan</last></author>
-      <author><first>Samik</first><last>Ghosh</last></author>
+      <author id="samik-ghosh"><first>Samik</first><last>Ghosh</last></author>
       <pages>42–51</pages>
       <url hash="20c8cda1">W15-3805</url>
       <doi>10.18653/v1/W15-3805</doi>
@@ -7093,7 +7093,7 @@
     <paper id="6">
       <title>Shallow Training is cheap but is it good enough? Experiments with Medical Fact Coding</title>
       <author><first>Ramesh</first><last>Nallapati</last></author>
-      <author><first>Radu</first><last>Florian</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
       <pages>52–60</pages>
       <url hash="ac0277fa">W15-3806</url>
       <doi>10.18653/v1/W15-3806</doi>
@@ -7102,7 +7102,7 @@
     <paper id="7">
       <title>Stacked Generalization for Medical Concept Extraction from Clinical Notes</title>
       <author><first>Youngjun</first><last>Kim</last></author>
-      <author><first>Ellen</first><last>Riloff</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
       <pages>61–70</pages>
       <url hash="7d9a304d">W15-3807</url>
       <doi>10.18653/v1/W15-3807</doi>
@@ -7121,7 +7121,7 @@
     </paper>
     <paper id="9">
       <title>Extracting Time Expressions from Clinical Text</title>
-      <author><first>Timothy</first><last>Miller</last></author>
+      <author id="timothy-miller"><first>Timothy</first><last>Miller</last></author>
       <author><first>Steven</first><last>Bethard</last></author>
       <author><first>Dmitriy</first><last>Dligach</last></author>
       <author><first>Chen</first><last>Lin</last></author>
@@ -7159,7 +7159,7 @@
       <author><first>Jin-Woo</first><last>Chung</last></author>
       <author><first>Hee-Jin</first><last>Lee</last></author>
       <author><first>Maria</first><last>Wolters</last></author>
-      <author><first>Jong</first><last>Park</last></author>
+      <author id="jong-c-park"><first>Jong</first><last>Park</last></author>
       <pages>104–113</pages>
       <url hash="3981d48d">W15-3812</url>
       <doi>10.18653/v1/W15-3812</doi>
@@ -7181,7 +7181,7 @@
       <author><first>Runqing</first><last>Song</last></author>
       <author><first>Maria</first><last>Liakata</last></author>
       <author><first>Andreas</first><last>Vlachos</last></author>
-      <author><first>Stephanie</first><last>Seneff</last></author>
+      <author id="stephanie-seneff"><first>Stephanie</first><last>Seneff</last></author>
       <author><first>Xiangrong</first><last>Zhang</last></author>
       <pages>121–126</pages>
       <url hash="aa3e3fa7">W15-3814</url>
@@ -7247,8 +7247,8 @@
     </paper>
     <paper id="21">
       <title>Investigating Public Health Surveillance using <fixed-case>T</fixed-case>witter</title>
-      <author><first>Antonio</first><last>Jimeno Yepes</last></author>
-      <author><first>Andrew</first><last>MacKinlay</last></author>
+      <author id="antonio-jimeno-yepes"><first>Antonio</first><last>Jimeno Yepes</last></author>
+      <author id="andrew-mackinlay"><first>Andrew</first><last>MacKinlay</last></author>
       <author><first>Bo</first><last>Han</last></author>
       <pages>164–170</pages>
       <url hash="825b6a18">W15-3821</url>
@@ -7272,7 +7272,7 @@
       <author><first>Dingcheng</first><last>Li</last></author>
       <author><first>Yue</first><last>Yu</last></author>
       <author><first>Hongfang</first><last>Liu</last></author>
-      <author><first>Christopher G.</first><last>Chute</last></author>
+      <author id="christopher-chute"><first>Christopher G.</first><last>Chute</last></author>
       <author><first>Guoqian</first><last>Jiang</last></author>
       <pages>177–182</pages>
       <url hash="6e120aca">W15-3823</url>
@@ -7286,10 +7286,10 @@
       <booktitle>Proceedings of the Fifth Named Entity Workshop</booktitle>
       <url hash="b6f5fcfd">W15-39</url>
       <editor><first>Xiangyu</first><last>Duan</last></editor>
-      <editor><first>Rafael E.</first><last>Banchs</last></editor>
+      <editor id="rafael-e-banchs"><first>Rafael E.</first><last>Banchs</last></editor>
       <editor><first>Min</first><last>Zhang</last></editor>
       <editor><first>Haizhou</first><last>Li</last></editor>
-      <editor><first>A</first><last>Kumaran</last></editor>
+      <editor id="a-kumaran"><first>A</first><last>Kumaran</last></editor>
       <doi>10.18653/v1/W15-39</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Beijing, China</address>
@@ -7334,7 +7334,7 @@
     </paper>
     <paper id="4">
       <title>Boosting Named Entity Recognition with Neural Character Embeddings</title>
-      <author><first>Cícero</first><last>dos Santos</last></author>
+      <author id="cicero-dos-santos"><first>Cícero</first><last>dos Santos</last></author>
       <author><first>Victor</first><last>Guimarães</last></author>
       <pages>25–33</pages>
       <url hash="c4822963">W15-3904</url>
@@ -7343,7 +7343,7 @@
     </paper>
     <paper id="5">
       <title>Regularity and Flexibility in <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>hinese Name Transliteration</title>
-      <author><first>Oi Yee</first><last>Kwong</last></author>
+      <author id="olivia-o-y-kwong"><first>Oi Yee</first><last>Kwong</last></author>
       <pages>34–42</pages>
       <url hash="589a8851">W15-3905</url>
       <doi>10.18653/v1/W15-3905</doi>
@@ -7361,9 +7361,9 @@
     <paper id="7">
       <title>Semi-supervised Learning for <fixed-case>V</fixed-case>ietnamese Named Entity Recognition using Online Conditional Random Fields</title>
       <author><first>Quang Hong</first><last>Pham</last></author>
-      <author><first>Minh-Le</first><last>Nguyen</last></author>
+      <author id="minh-le-nguyen"><first>Minh-Le</first><last>Nguyen</last></author>
       <author><first>Binh Thanh</first><last>Nguyen</last></author>
-      <author><first>Nguyen Viet</first><last>Cuong</last></author>
+      <author id="viet-cuong-nguyen"><first>Nguyen Viet</first><last>Cuong</last></author>
       <pages>50–55</pages>
       <url hash="0ac24896">W15-3907</url>
       <doi>10.18653/v1/W15-3907</doi>
@@ -7372,7 +7372,7 @@
     <paper id="8">
       <title>Boosting <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>hinese Machine Transliteration via High Quality Alignment and Multilingual Resources</title>
       <author><first>Yan</first><last>Shao</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <author><first>Joakim</first><last>Nivre</last></author>
       <pages>56–60</pages>
       <url hash="5dd21b30">W15-3908</url>
@@ -7384,7 +7384,7 @@
       <author><first>Andrew</first><last>Finch</last></author>
       <author><first>Lemao</first><last>Liu</last></author>
       <author><first>Xiaolin</first><last>Wang</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>61–66</pages>
       <url hash="2a7e2f58">W15-3909</url>
       <doi>10.18653/v1/W15-3909</doi>
@@ -7394,7 +7394,7 @@
       <title>A Hybrid Transliteration Model for <fixed-case>C</fixed-case>hinese/<fixed-case>E</fixed-case>nglish Named Entities —<fixed-case>BJTU</fixed-case>-<fixed-case>NLP</fixed-case> Report for the 5th Named Entities Workshop</title>
       <author><first>Dandan</first><last>Wang</last></author>
       <author><first>Xiaohui</first><last>Yang</last></author>
-      <author><first>Jinan</first><last>Xu</last></author>
+      <author id="jinan-xu"><first>Jinan</first><last>Xu</last></author>
       <author><first>Yufeng</first><last>Chen</last></author>
       <author><first>Nan</first><last>Wang</last></author>
       <author><first>Bojia</first><last>Liu</last></author>
@@ -7422,7 +7422,7 @@
     <paper id="12">
       <title>Data representation methods and use of mined corpora for <fixed-case>I</fixed-case>ndian language transliteration</title>
       <author><first>Anoop</first><last>Kunchukuttan</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>78–82</pages>
       <url hash="f5459135">W15-3912</url>
       <doi>10.18653/v1/W15-3912</doi>
@@ -7431,8 +7431,8 @@
     <paper id="13">
       <title><fixed-case>NCU</fixed-case> <fixed-case>IISR</fixed-case> <fixed-case>E</fixed-case>nglish-<fixed-case>K</fixed-case>orean and <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>hinese Named Entity Transliteration Using Different Grapheme Segmentation Approaches</title>
       <author><first>Yu-Chun</first><last>Wang</last></author>
-      <author><first>Chun-Kai</first><last>Wu</last></author>
-      <author><first>Richard Tzong-Han</first><last>Tsai</last></author>
+      <author id="chun-kai-wu"><first>Chun-Kai</first><last>Wu</last></author>
+      <author id="richard-tzong-han-tsai"><first>Richard Tzong-Han</first><last>Tsai</last></author>
       <pages>83–87</pages>
       <url hash="8ec362d1">W15-3913</url>
       <doi>10.18653/v1/W15-3913</doi>
@@ -7444,10 +7444,10 @@
       <booktitle>Proceedings of the 3rd Workshop on Continuous Vector Space Models and their Compositionality</booktitle>
       <url hash="b39ee48b">W15-40</url>
       <editor><first>Alexandre</first><last>Allauzen</last></editor>
-      <editor><first>Edward</first><last>Grefenstette</last></editor>
+      <editor id="edward-grefenstette"><first>Edward</first><last>Grefenstette</last></editor>
       <editor><first>Karl Moritz</first><last>Hermann</last></editor>
       <editor><first>Hugo</first><last>Larochelle</last></editor>
-      <editor><first>Scott Wen-tau</first><last>Yih</last></editor>
+      <editor id="wen-tau-yih"><first>Scott Wen-tau</first><last>Yih</last></editor>
       <doi>10.18653/v1/W15-40</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Beijing, China</address>
@@ -7470,9 +7470,9 @@
     </paper>
     <paper id="2">
       <title>Recursive Neural Networks Can Learn Logical Semantics</title>
-      <author><first>Samuel R.</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel R.</first><last>Bowman</last></author>
       <author><first>Christopher</first><last>Potts</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>12–21</pages>
       <url hash="602b48a6">W15-4002</url>
       <doi>10.18653/v1/W15-4002</doi>
@@ -7481,7 +7481,7 @@
     </paper>
     <paper id="3">
       <title>Concept Extensions as the Basis for Vector-Space Semantics: Combining Distributional and Ontological Information about Entities</title>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <pages>22–31</pages>
       <url hash="862e2ebf">W15-4003</url>
       <doi>10.18653/v1/W15-4003</doi>
@@ -7501,7 +7501,7 @@
     <paper id="5">
       <title>Exploring the effect of semantic similarity for Phrase-based Machine Translation</title>
       <author><first>Kunal</first><last>Sachdeva</last></author>
-      <author><first>Dipti</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti</first><last>Sharma</last></author>
       <pages>41–47</pages>
       <url hash="9290327c">W15-4005</url>
       <doi>10.18653/v1/W15-4005</doi>
@@ -7536,8 +7536,8 @@
       <editor><first>Kurt</first><last>Eberle</last></editor>
       <editor><first>Patrik</first><last>Lambert</last></editor>
       <editor><first>Reinhard</first><last>Rapp</last></editor>
-      <editor><first>Rafael E.</first><last>Banchs</last></editor>
-      <editor><first>Marta R.</first><last>Costa-jussà</last></editor>
+      <editor id="rafael-e-banchs"><first>Rafael E.</first><last>Banchs</last></editor>
+      <editor id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></editor>
       <doi>10.18653/v1/W15-41</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Beijing</address>
@@ -7551,10 +7551,10 @@
     </frontmatter>
     <paper id="1">
       <title>Bootstrapping a hybrid deep <fixed-case>MT</fixed-case> system</title>
-      <author><first>João</first><last>Silva</last></author>
-      <author><first>João</first><last>Rodrigues</last></author>
+      <author id="joao-silva"><first>João</first><last>Silva</last></author>
+      <author id="joao-rodrigues"><first>João</first><last>Rodrigues</last></author>
       <author><first>Luís</first><last>Gomes</last></author>
-      <author><first>António</first><last>Branco</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
       <pages>1–5</pages>
       <url hash="bc683e66">W15-4101</url>
       <doi>10.18653/v1/W15-4101</doi>
@@ -7572,7 +7572,7 @@
     <paper id="3">
       <title>What a Transfer-Based System Brings to the Combination with <fixed-case>PBMT</fixed-case></title>
       <author><first>Aleš</first><last>Tamchyna</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>11–20</pages>
       <url hash="c5c355e9">W15-4103</url>
       <doi>10.18653/v1/W15-4103</doi>
@@ -7590,7 +7590,7 @@
     <paper id="5">
       <title>Passive and Pervasive Use of Bilingual Dictionary in Statistical Machine Translation</title>
       <author><first>Liling</first><last>Tan</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <author><first>Francis</first><last>Bond</last></author>
       <pages>30–34</pages>
       <url hash="38d26899">W15-4105</url>
@@ -7607,7 +7607,7 @@
     </paper>
     <paper id="7">
       <title>A fuzzier approach to machine translation evaluation: A pilot study on post-editing productivity and automated metrics in commercial settings</title>
-      <author><first>Carla</first><last>Parra Escartín</last></author>
+      <author id="carla-parra-escartin"><first>Carla</first><last>Parra Escartín</last></author>
       <author><first>Manuel</first><last>Arcedillo</last></author>
       <pages>40–45</pages>
       <url hash="7a22eb32">W15-4107</url>
@@ -7660,10 +7660,10 @@
       <booktitle>Proceedings of the 4th Workshop on Linked Data in Linguistics: Resources and Applications</booktitle>
       <url hash="ce2d7636">W15-42</url>
       <editor><first>Christian</first><last>Chiarcos</last></editor>
-      <editor><first>John Philip</first><last>McCrae</last></editor>
+      <editor id="john-philip-mccrae"><first>John Philip</first><last>McCrae</last></editor>
       <editor><first>Petya</first><last>Osenova</last></editor>
-      <editor><first>Philipp</first><last>Cimiano</last></editor>
-      <editor><first>Nancy</first><last>Ide</last></editor>
+      <editor id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></editor>
+      <editor id="nancy-ide"><first>Nancy</first><last>Ide</last></editor>
       <doi>10.18653/v1/W15-42</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Beijing, China</address>
@@ -7687,8 +7687,8 @@
     </paper>
     <paper id="2">
       <title>A Linked Data Model for Multimodal Sentiment and Emotion Analysis</title>
-      <author><first>J. Fernando</first><last>Sánchez-Rada</last></author>
-      <author><first>Carlos A.</first><last>Iglesias</last></author>
+      <author id="j-fernando-sanchez-rada"><first>J. Fernando</first><last>Sánchez-Rada</last></author>
+      <author id="carlos-a-iglesias"><first>Carlos A.</first><last>Iglesias</last></author>
       <author><first>Ronald</first><last>Gil</last></author>
       <pages>11–19</pages>
       <url hash="267a208e">W15-4202</url>
@@ -7699,8 +7699,8 @@
       <title>Seeing is Correcting: curating lexical resources using social interfaces</title>
       <author><first>Livy</first><last>Real</last></author>
       <author><first>Fabricio</first><last>Chalub</last></author>
-      <author><first>Valeria</first><last>de Paiva</last></author>
-      <author><first>Claudia</first><last>Freitas</last></author>
+      <author id="valeria-de-paiva"><first>Valeria</first><last>de Paiva</last></author>
+      <author id="claudia-freitas"><first>Claudia</first><last>Freitas</last></author>
       <author><first>Alexandre</first><last>Rademaker</last></author>
       <pages>20–29</pages>
       <url hash="e362c139">W15-4203</url>
@@ -7723,7 +7723,7 @@
       <title>Reconciling Heterogeneous Descriptions of Language Resources</title>
       <author><first>John Philip</first><last>McCrae</last></author>
       <author><first>Philipp</first><last>Cimiano</last></author>
-      <author><first>Victor</first><last>Rodríguez Doncel</last></author>
+      <author id="victor-rodriguez-doncel"><first>Victor</first><last>Rodríguez Doncel</last></author>
       <author><first>Daniel</first><last>Vila-Suero</last></author>
       <author><first>Jorge</first><last>Gracia</last></author>
       <author><first>Luca</first><last>Matteis</last></author>
@@ -7738,8 +7738,8 @@
     </paper>
     <paper id="6">
       <title><fixed-case>RDF</fixed-case> Representation of Licenses for Language Resources</title>
-      <author><first>Victor</first><last>Rodriguez-Doncel</last></author>
-      <author><first>Penny</first><last>Labropoulou</last></author>
+      <author id="victor-rodriguez-doncel"><first>Victor</first><last>Rodriguez-Doncel</last></author>
+      <author id="penny-labropoulou"><first>Penny</first><last>Labropoulou</last></author>
       <pages>49–58</pages>
       <url hash="89defd47">W15-4206</url>
       <doi>10.18653/v1/W15-4206</doi>
@@ -7768,8 +7768,8 @@
     </paper>
     <paper id="9">
       <title>Linguistic Linked Data in <fixed-case>C</fixed-case>hinese: The Case of <fixed-case>C</fixed-case>hinese <fixed-case>W</fixed-case>ordnet</title>
-      <author><first>Chih-Yao</first><last>Lee</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="chi-yao-lee"><first>Chih-Yao</first><last>Lee</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <pages>70–74</pages>
       <url hash="9ae68406">W15-4209</url>
       <doi>10.18653/v1/W15-4209</doi>
@@ -7808,7 +7808,7 @@
       <title>Challenges of studying and processing dialects in social media</title>
       <author><first>Anna</first><last>Jørgensen</last></author>
       <author><first>Dirk</first><last>Hovy</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>9–18</pages>
       <url hash="a0ecb407">W15-4302</url>
       <doi>10.18653/v1/W15-4302</doi>
@@ -7820,7 +7820,7 @@
       <author><first>Norisma</first><last>Idris</last></author>
       <author><first>Liyana</first><last>Shuib</last></author>
       <author><first>Ram</first><last>Gopal Raj</last></author>
-      <author><first>AiTi</first><last>Aw</last></author>
+      <author id="aiti-aw"><first>AiTi</first><last>Aw</last></author>
       <pages>19–27</pages>
       <url hash="0c8f76f2">W15-4303</url>
       <doi>10.18653/v1/W15-4303</doi>
@@ -7838,8 +7838,8 @@
     </paper>
     <paper id="5">
       <title>A Normalizer for <fixed-case>UGC</fixed-case> in <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese</title>
-      <author><first>Magali</first><last>Sanches Duran</last></author>
-      <author><first>Maria das Graças</first><last>Volpe Nunes</last></author>
+      <author id="magali-sanches-duran"><first>Magali</first><last>Sanches Duran</last></author>
+      <author id="maria-das-gracas-volpe-nunes"><first>Maria das Graças</first><last>Volpe Nunes</last></author>
       <author><first>Lucas</first><last>Avanço</last></author>
       <pages>38–47</pages>
       <url hash="9b3ddcfe">W15-4305</url>
@@ -7848,9 +7848,9 @@
     </paper>
     <paper id="6">
       <title><fixed-case>USFD</fixed-case>: <fixed-case>T</fixed-case>witter <fixed-case>NER</fixed-case> with Drift Compensation and Linked Data</title>
-      <author><first>Leon</first><last>Derczynski</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
       <author><first>Isabelle</first><last>Augenstein</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
       <pages>48–53</pages>
       <url hash="3a7377a1">W15-4306</url>
       <doi>10.18653/v1/W15-4306</doi>
@@ -7870,7 +7870,7 @@
     <paper id="8">
       <title><fixed-case>IITP</fixed-case>: Multiobjective Differential Evolution based <fixed-case>T</fixed-case>witter Named Entity Recognition</title>
       <author><first>Md Shad</first><last>Akhtar</last></author>
-      <author><first>Utpal Kumar</first><last>Sikdar</last></author>
+      <author id="utpal-kumar-sikdar"><first>Utpal Kumar</first><last>Sikdar</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
       <pages>61–67</pages>
       <url hash="05a6fbe6">W15-4308</url>
@@ -7889,7 +7889,7 @@
     </paper>
     <paper id="10">
       <title><fixed-case>H</fixed-case>allym: Named Entity Recognition on <fixed-case>T</fixed-case>witter with Word Representation</title>
-      <author><first>Eun-Suk</first><last>Yang</last></author>
+      <author id="eun-suk-yang"><first>Eun-Suk</first><last>Yang</last></author>
       <author><first>Yu-Seop</first><last>Kim</last></author>
       <pages>72–77</pages>
       <url hash="d0e08df3">W15-4310</url>
@@ -7907,7 +7907,7 @@
     </paper>
     <paper id="12">
       <title><fixed-case>B</fixed-case>ekli:A Simple Approach to <fixed-case>T</fixed-case>witter Text Normalization.</title>
-      <author><first>Russell</first><last>Beckley</last></author>
+      <author id="russell-beckley"><first>Russell</first><last>Beckley</last></author>
       <pages>82–86</pages>
       <url hash="8ea58e8f">W15-4312</url>
       <doi>10.18653/v1/W15-4312</doi>
@@ -7933,7 +7933,7 @@
     </paper>
     <paper id="15">
       <title><fixed-case>LYSGROUP</fixed-case>: Adapting a <fixed-case>S</fixed-case>panish microtext normalization system to <fixed-case>E</fixed-case>nglish.</title>
-      <author><first>Yerai</first><last>Doval Mosquera</last></author>
+      <author id="yerai-doval"><first>Yerai</first><last>Doval Mosquera</last></author>
       <author><first>Jesús</first><last>Vilares</last></author>
       <author><first>Carlos</first><last>Gómez-Rodríguez</last></author>
       <pages>99–105</pages>
@@ -7944,7 +7944,7 @@
     <paper id="16">
       <title><fixed-case>IITP</fixed-case>: Hybrid Approach for Text Normalization in <fixed-case>T</fixed-case>witter</title>
       <author><first>Md Shad</first><last>Akhtar</last></author>
-      <author><first>Utpal Kumar</first><last>Sikdar</last></author>
+      <author id="utpal-kumar-sikdar"><first>Utpal Kumar</first><last>Sikdar</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
       <pages>106–110</pages>
       <url hash="73057383">W15-4316</url>
@@ -7971,8 +7971,8 @@
     </paper>
     <paper id="19">
       <title>Shared Tasks of the 2015 Workshop on Noisy User-generated Text: <fixed-case>T</fixed-case>witter Lexical Normalization and Named Entity Recognition</title>
-      <author><first>Timothy</first><last>Baldwin</last></author>
-      <author><first>Marie Catherine</first><last>de Marneffe</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
+      <author id="marie-catherine-de-marneffe"><first>Marie Catherine</first><last>de Marneffe</last></author>
       <author><first>Bo</first><last>Han</last></author>
       <author><first>Young-Bum</first><last>Kim</last></author>
       <author><first>Alan</first><last>Ritter</last></author>
@@ -8016,7 +8016,7 @@
     <paper id="23">
       <title><fixed-case>NCSU</fixed-case>_<fixed-case>SAS</fixed-case>_<fixed-case>SAM</fixed-case>: Deep Encoding and Reconstruction for Normalization of Noisy Text</title>
       <author><first>Samuel</first><last>Leeman-Munk</last></author>
-      <author><first>James</first><last>Lester</last></author>
+      <author id="james-lester"><first>James</first><last>Lester</last></author>
       <author><first>James</first><last>Cox</last></author>
       <pages>154–161</pages>
       <url hash="3149381f">W15-4323</url>
@@ -8026,7 +8026,7 @@
     <paper id="24">
       <title>Learning finite state word representations for unsupervised <fixed-case>T</fixed-case>witter adaptation of <fixed-case>POS</fixed-case> taggers</title>
       <author><first>Julie</first><last>Wulff</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>162–166</pages>
       <url hash="69670596">W15-4324</url>
       <bibkey>wulff-sogaard-2015-learning</bibkey>
@@ -8045,8 +8045,8 @@
       <booktitle>Proceedings of the 2nd Workshop on Natural Language Processing Techniques for Educational Applications</booktitle>
       <url hash="86a654c1">W15-44</url>
       <editor><first>Hsin-Hsi</first><last>Chen</last></editor>
-      <editor><first>Yuen-Hsien</first><last>Tseng</last></editor>
-      <editor><first>Yuji</first><last>Matsumoto</last></editor>
+      <editor id="yuen-hsien-tseng"><first>Yuen-Hsien</first><last>Tseng</last></editor>
+      <editor id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></editor>
       <editor><first>Lung Hsiang</first><last>Wong</last></editor>
       <doi>10.18653/v1/W15-44</doi>
       <publisher>Association for Computational Linguistics</publisher>
@@ -8062,8 +8062,8 @@
     <paper id="1">
       <title>Overview of the <fixed-case>NLP</fixed-case>-<fixed-case>TEA</fixed-case> 2015 Shared Task for <fixed-case>C</fixed-case>hinese Grammatical Error Diagnosis</title>
       <author><first>Lung-Hao</first><last>Lee</last></author>
-      <author><first>Liang-Chih</first><last>Yu</last></author>
-      <author><first>Li-Ping</first><last>Chang</last></author>
+      <author id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last></author>
+      <author id="li-ping-chang"><first>Li-Ping</first><last>Chang</last></author>
       <pages>1–6</pages>
       <url hash="b1210158">W15-4401</url>
       <doi>10.18653/v1/W15-4401</doi>
@@ -8075,7 +8075,7 @@
       <author><first>Po-Lin</first><last>Chen</last></author>
       <author><first>Shih-Hung</first><last>Wu</last></author>
       <author><first>Liang-Pu</first><last>Chen</last></author>
-      <author><first>Ping-Che</first><last>Yang</last></author>
+      <author id="ping-che-yang"><first>Ping-Che</first><last>Yang</last></author>
       <author><first>Ren-Dar</first><last>Yang</last></author>
       <pages>7–14</pages>
       <url hash="17610113">W15-4402</url>
@@ -8104,7 +8104,7 @@
       <title>Semi-automatic Generation of Multiple-Choice Tests from Mentions of Semantic Relations</title>
       <author><first>Renlong</first><last>Ai</last></author>
       <author><first>Sebastian</first><last>Krause</last></author>
-      <author><first>Walter</first><last>Kasper</last></author>
+      <author id="walter-kasper"><first>Walter</first><last>Kasper</last></author>
       <author><first>Feiyu</first><last>Xu</last></author>
       <author><first>Hans</first><last>Uszkoreit</last></author>
       <pages>26–33</pages>
@@ -8117,7 +8117,7 @@
       <author><first>Tao</first><last>Chen</last></author>
       <author><first>Naijia</first><last>Zheng</last></author>
       <author><first>Yue</first><last>Zhao</last></author>
-      <author><first>Muthu Kumar</first><last>Chandrasekaran</last></author>
+      <author id="muthu-kumar-chandrasekaran"><first>Muthu Kumar</first><last>Chandrasekaran</last></author>
       <author><first>Min-Yen</first><last>Kan</last></author>
       <pages>34–42</pages>
       <url hash="77ad609d">W15-4406</url>
@@ -8127,9 +8127,9 @@
     </paper>
     <paper id="7">
       <title>Bilingual Keyword Extraction and its Educational Application</title>
-      <author><first>Chung-Chi</first><last>Huang</last></author>
-      <author><first>Mei-Hua</first><last>Chen</last></author>
-      <author><first>Ping-Che</first><last>Yang</last></author>
+      <author id="chung-chi-huang"><first>Chung-Chi</first><last>Huang</last></author>
+      <author id="mei-hua-chen"><first>Mei-Hua</first><last>Chen</last></author>
+      <author id="ping-che-yang"><first>Ping-Che</first><last>Yang</last></author>
       <pages>43–48</pages>
       <url hash="0283fa8c">W15-4407</url>
       <doi>10.18653/v1/W15-4407</doi>
@@ -8202,7 +8202,7 @@
     <paper id="14">
       <title>Using Finite State Transducers for Helping Foreign Language Learning</title>
       <author><first>Hasan</first><last>Kaya</last></author>
-      <author><first>Gülşen</first><last>Eryiğit</last></author>
+      <author id="gulsen-eryigit"><first>Gülşen</first><last>Eryiğit</last></author>
       <pages>94–98</pages>
       <url hash="87cab837">W15-4414</url>
       <doi>10.18653/v1/W15-4414</doi>
@@ -8224,7 +8224,7 @@
       <author><first>Jui-Feng</first><last>Yeh</last></author>
       <author><first>Chan-Kun</first><last>Yeh</last></author>
       <author><first>Kai-Hsiang</first><last>Yu</last></author>
-      <author><first>Ya-Ting</first><last>Li</last></author>
+      <author id="ya-ting-lin"><first>Ya-Ting</first><last>Li</last></author>
       <author><first>Wan-Ling</first><last>Tsai</last></author>
       <pages>105–110</pages>
       <url hash="4f258951">W15-4416</url>
@@ -8264,10 +8264,10 @@
       <editor><first>Tommaso</first><last>Caselli</last></editor>
       <editor><first>Marieke</first><last>van Erp</last></editor>
       <editor><first>Anne-Lyse</first><last>Minard</last></editor>
-      <editor><first>Mark</first><last>Finlayson</last></editor>
+      <editor id="mark-finlayson"><first>Mark</first><last>Finlayson</last></editor>
       <editor><first>Ben</first><last>Miller</last></editor>
-      <editor><first>Jordi</first><last>Atserias</last></editor>
-      <editor><first>Alexandra</first><last>Balahur</last></editor>
+      <editor id="jordi-atserias"><first>Jordi</first><last>Atserias</last></editor>
+      <editor id="alexandra-balahur"><first>Alexandra</first><last>Balahur</last></editor>
       <editor><first>Piek</first><last>Vossen</last></editor>
       <doi>10.18653/v1/W15-45</doi>
       <publisher>Association for Computational Linguistics</publisher>
@@ -8282,7 +8282,7 @@
     </frontmatter>
     <paper id="1">
       <title>Interactions between Narrative Schemas and Document Categories</title>
-      <author><first>Dan</first><last>Simonson</last></author>
+      <author id="dan-simonson"><first>Dan</first><last>Simonson</last></author>
       <author><first>Anthony</first><last>Davis</last></author>
       <pages>1–10</pages>
       <url hash="24aab2aa">W15-4501</url>
@@ -8294,7 +8294,7 @@
       <author><first>Xiang</first><last>Li</last></author>
       <author><first>Thien Huu</first><last>Nguyen</last></author>
       <author><first>Kai</first><last>Cao</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <pages>11–15</pages>
       <url hash="cad064b7">W15-4502</url>
       <doi>10.18653/v1/W15-4502</doi>
@@ -8355,7 +8355,7 @@
       <title>From <fixed-case>T</fixed-case>ime<fixed-case>L</fixed-case>ines to <fixed-case>S</fixed-case>tory<fixed-case>L</fixed-case>ines: A preliminary proposal for evaluating narratives</title>
       <author><first>Egoitz</first><last>Laparra</last></author>
       <author><first>Itziar</first><last>Aldabe</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <pages>50–55</pages>
       <url hash="0a95f1bd">W15-4508</url>
       <doi>10.18653/v1/W15-4508</doi>
@@ -8379,9 +8379,9 @@
       <url hash="4a386014">W15-46</url>
       <editor><first>Alexander</first><last>Koller</last></editor>
       <editor><first>Gabriel</first><last>Skantze</last></editor>
-      <editor><first>Filip</first><last>Jurcicek</last></editor>
-      <editor><first>Masahiro</first><last>Araki</last></editor>
-      <editor><first>Carolyn Penstein</first><last>Rose</last></editor>
+      <editor id="filip-jurcicek"><first>Filip</first><last>Jurcicek</last></editor>
+      <editor id="masahiro-araki"><first>Masahiro</first><last>Araki</last></editor>
+      <editor id="carolyn-rose"><first>Carolyn Penstein</first><last>Rose</last></editor>
       <doi>10.18653/v1/W15-46</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Prague, Czech Republic</address>
@@ -8427,7 +8427,7 @@
     <paper id="4">
       <title>Miscommunication Recovery in Physically Situated Dialogue</title>
       <author><first>Matthew</first><last>Marge</last></author>
-      <author><first>Alexander</first><last>Rudnicky</last></author>
+      <author id="alexander-rudnicky"><first>Alexander</first><last>Rudnicky</last></author>
       <pages>22–31</pages>
       <url hash="c42a63d4">W15-4604</url>
       <doi>10.18653/v1/W15-4604</doi>
@@ -8438,7 +8438,7 @@
       <author><first>Takuya</first><last>Hiraoka</last></author>
       <author><first>Kallirroi</first><last>Georgila</last></author>
       <author><first>Elnaz</first><last>Nouri</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <author><first>Satoshi</first><last>Nakamura</last></author>
       <pages>32–41</pages>
       <url hash="dd035738">W15-4605</url>
@@ -8448,7 +8448,7 @@
     <paper id="6">
       <title>An Incremental Turn-Taking Model with Active System Barge-in for Spoken Dialog Systems</title>
       <author><first>Tiancheng</first><last>Zhao</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <author><first>Maxine</first><last>Eskenazi</last></author>
       <pages>42–50</pages>
       <url hash="da171c7b">W15-4606</url>
@@ -8457,7 +8457,7 @@
     </paper>
     <paper id="7">
       <title>Exploring the Effects of Redundancy within a Tutorial Dialogue System: Restating Students’ Responses</title>
-      <author><first>Pamela</first><last>Jordan</last></author>
+      <author id="pamela-jordan"><first>Pamela</first><last>Jordan</last></author>
       <author><first>Patricia</first><last>Albacete</last></author>
       <author><first>Sandra</first><last>Katz</last></author>
       <pages>51–59</pages>
@@ -8477,7 +8477,7 @@
     <paper id="9">
       <title>Belief Tracking with Stacked Relational Trees</title>
       <author><first>Deepak</first><last>Ramachandran</last></author>
-      <author><first>Adwait</first><last>Ratnaparkhi</last></author>
+      <author id="adwait-ratnaparkhi"><first>Adwait</first><last>Ratnaparkhi</last></author>
       <pages>68–76</pages>
       <url hash="44ab6aae">W15-4609</url>
       <doi>10.18653/v1/W15-4609</doi>
@@ -8486,7 +8486,7 @@
     <paper id="10">
       <title>“So, which one is it?” The effect of alternative incremental architectures in a high-performance game-playing agent</title>
       <author><first>Maike</first><last>Paetzel</last></author>
-      <author><first>Ramesh</first><last>Manuvinakurike</last></author>
+      <author id="ramesh-manuvinakurike"><first>Ramesh</first><last>Manuvinakurike</last></author>
       <author><first>David</first><last>DeVault</last></author>
       <pages>77–86</pages>
       <url hash="26e46dd9">W15-4610</url>
@@ -8510,7 +8510,7 @@
     <paper id="12">
       <title><fixed-case>PDTB</fixed-case> Discourse Parsing as a Tagging Task: The Two Taggers Approach</title>
       <author><first>Or</first><last>Biran</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>96–104</pages>
       <url hash="b0d4fdd1">W15-4612</url>
       <doi>10.18653/v1/W15-4612</doi>
@@ -8520,7 +8520,7 @@
       <title>Which Synthetic Voice Should <fixed-case>I</fixed-case> Choose for an Evocative Task?</title>
       <author><first>Eli</first><last>Pincus</last></author>
       <author><first>Kallirroi</first><last>Georgila</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <pages>105–113</pages>
       <url hash="e4207415">W15-4613</url>
       <doi>10.18653/v1/W15-4613</doi>
@@ -8538,7 +8538,7 @@
     <paper id="15">
       <title>Towards Improving Dialogue Topic Tracking Performances with Wikification of Concept Mentions</title>
       <author><first>Seokhwan</first><last>Kim</last></author>
-      <author><first>Rafael E.</first><last>Banchs</last></author>
+      <author id="rafael-e-banchs"><first>Rafael E.</first><last>Banchs</last></author>
       <author><first>Haizhou</first><last>Li</last></author>
       <pages>124–128</pages>
       <url hash="f6eabe34">W15-4615</url>
@@ -8550,7 +8550,7 @@
       <author><first>Sangdo</first><last>Han</last></author>
       <author><first>Jeesoo</first><last>Bang</last></author>
       <author><first>Seonghan</first><last>Ryu</last></author>
-      <author><first>Gary Geunbae</first><last>Lee</last></author>
+      <author id="gary-geunbae-lee"><first>Gary Geunbae</first><last>Lee</last></author>
       <pages>129–133</pages>
       <url hash="33d62908">W15-4616</url>
       <doi>10.18653/v1/W15-4616</doi>
@@ -8559,9 +8559,9 @@
     </paper>
     <paper id="17">
       <title>Automated Speech Recognition Technology for Dialogue Interaction with Non-Native Interlocutors</title>
-      <author><first>Alexei V.</first><last>Ivanov</last></author>
+      <author id="alexei-v-ivanov"><first>Alexei V.</first><last>Ivanov</last></author>
       <author><first>Vikram</first><last>Ramanarayanan</last></author>
-      <author><first>David</first><last>Suendermann-Oeft</last></author>
+      <author id="david-suendermann-oeft"><first>David</first><last>Suendermann-Oeft</last></author>
       <author><first>Melissa</first><last>Lopez</last></author>
       <author><first>Keelan</first><last>Evanini</last></author>
       <author><first>Jidong</first><last>Tao</last></author>
@@ -8573,10 +8573,10 @@
     <paper id="18">
       <title>Conversational Knowledge Teaching Agent that uses a Knowledge Base</title>
       <author><first>Kyusong</first><last>Lee</last></author>
-      <author><first>Paul Hongsuck</first><last>Seo</last></author>
+      <author id="hongsuck-seo"><first>Paul Hongsuck</first><last>Seo</last></author>
       <author><first>Junhwi</first><last>Choi</last></author>
       <author><first>Sangjun</first><last>Koo</last></author>
-      <author><first>Gary Geunbae</first><last>Lee</last></author>
+      <author id="gary-geunbae-lee"><first>Gary Geunbae</first><last>Lee</last></author>
       <pages>139–143</pages>
       <url hash="8b8c881c">W15-4618</url>
       <doi>10.18653/v1/W15-4618</doi>
@@ -8592,7 +8592,7 @@
     </paper>
     <paper id="20">
       <title>A <fixed-case>SIP</fixed-case> of <fixed-case>C</fixed-case>o<fixed-case>F</fixed-case>ee : A Sample of Interesting Productions of Conversational Feedback</title>
-      <author><first>Laurent</first><last>Prévot</last></author>
+      <author id="laurent-prevot"><first>Laurent</first><last>Prévot</last></author>
       <author><first>Jan</first><last>Gorisch</last></author>
       <author><first>Roxane</first><last>Bertrand</last></author>
       <author><first>Emilien</first><last>Gorène</last></author>
@@ -8604,7 +8604,7 @@
     </paper>
     <paper id="21">
       <title>Reinforcement Learning of Multi-Issue Negotiation Dialogue Policies</title>
-      <author><first>Alexandros</first><last>Papangelis</last></author>
+      <author id="alexandros-papangelis"><first>Alexandros</first><last>Papangelis</last></author>
       <author><first>Kallirroi</first><last>Georgila</last></author>
       <pages>154–158</pages>
       <url hash="4680d146">W15-4621</url>
@@ -8613,12 +8613,12 @@
     </paper>
     <paper id="22">
       <title>Fast and easy language understanding for dialog systems with <fixed-case>M</fixed-case>icrosoft Language Understanding Intelligent Service (<fixed-case>LUIS</fixed-case>)</title>
-      <author><first>Jason D.</first><last>Williams</last></author>
+      <author id="jason-d-williams"><first>Jason D.</first><last>Williams</last></author>
       <author><first>Eslam</first><last>Kamal</last></author>
       <author><first>Mokhtar</first><last>Ashour</last></author>
       <author><first>Hani</first><last>Amr</last></author>
       <author><first>Jessica</first><last>Miller</last></author>
-      <author><first>Geoff</first><last>Zweig</last></author>
+      <author id="geoffrey-zweig"><first>Geoff</first><last>Zweig</last></author>
       <pages>159–161</pages>
       <url hash="1a03b185">W15-4622</url>
       <doi>10.18653/v1/W15-4622</doi>
@@ -8626,8 +8626,8 @@
     </paper>
     <paper id="23">
       <title>Multilingual <fixed-case>W</fixed-case>iki<fixed-case>T</fixed-case>alk: <fixed-case>W</fixed-case>ikipedia-based talking robots that switch languages.</title>
-      <author><first>Graham</first><last>Wilcock</last></author>
-      <author><first>Kristiina</first><last>Jokinen</last></author>
+      <author id="graham-wilcock"><first>Graham</first><last>Wilcock</last></author>
+      <author id="kristiina-jokinen"><first>Kristiina</first><last>Jokinen</last></author>
       <pages>162–164</pages>
       <url hash="a5c5a2e3">W15-4623</url>
       <doi>10.18653/v1/W15-4623</doi>
@@ -8645,7 +8645,7 @@
     <paper id="25">
       <title><fixed-case>I</fixed-case> Couldn’t Agree More: The Role of Conversational Structure in Agreement and Disagreement Detection in Online Discussions</title>
       <author><first>Sara</first><last>Rosenthal</last></author>
-      <author><first>Kathy</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathy</first><last>McKeown</last></author>
       <pages>168–177</pages>
       <url hash="fb164682">W15-4625</url>
       <doi>10.18653/v1/W15-4625</doi>
@@ -8662,9 +8662,9 @@
     </paper>
     <paper id="27">
       <title>Generating Sentence Planning Variations for Story Telling</title>
-      <author><first>Stephanie</first><last>Lukin</last></author>
+      <author id="stephanie-lukin"><first>Stephanie</first><last>Lukin</last></author>
       <author><first>Lena</first><last>Reed</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <pages>188–197</pages>
       <url hash="30d3af7e">W15-4627</url>
       <doi>10.18653/v1/W15-4627</doi>
@@ -8672,7 +8672,7 @@
     </paper>
     <paper id="28">
       <title><fixed-case>K</fixed-case>eynote: Graph-based Approaches for Spoken Language Understanding</title>
-      <author><first>Dilek</first><last>Hakkani-Tur</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></author>
       <pages>198</pages>
       <url hash="08b592ff">W15-4628</url>
       <doi>10.18653/v1/W15-4628</doi>
@@ -8680,7 +8680,7 @@
     </paper>
     <paper id="29">
       <title>Evaluating Spoken Dialogue Processing for Time-Offset Interaction</title>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <author><first>Kallirroi</first><last>Georgila</last></author>
       <author><first>Ron</first><last>Artstein</last></author>
       <author><first>Anton</first><last>Leuski</last></author>
@@ -8692,9 +8692,9 @@
     <paper id="30">
       <title>The Real Challenge 2014: Progress and Prospects</title>
       <author><first>Maxine</first><last>Eskenazi</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
-      <author><first>Sungjin</first><last>Lee</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
+      <author id="sungjin-lee"><first>Sungjin</first><last>Lee</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <pages>209–216</pages>
       <url hash="19f7b9e7">W15-4630</url>
       <doi>10.18653/v1/W15-4630</doi>
@@ -8704,7 +8704,7 @@
       <title>Argument Mining: Extracting Arguments from Online Dialogue</title>
       <author><first>Reid</first><last>Swanson</last></author>
       <author><first>Brian</first><last>Ecker</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <pages>217–226</pages>
       <url hash="eb2542c2">W15-4631</url>
       <doi>10.18653/v1/W15-4631</doi>
@@ -8721,10 +8721,10 @@
     </paper>
     <paper id="33">
       <title>Call Centre Conversation Summarization: A Pilot Task at Multiling 2015</title>
-      <author><first>Benoit</first><last>Favre</last></author>
-      <author><first>Evgeny</first><last>Stepanov</last></author>
-      <author><first>Jérémy</first><last>Trione</last></author>
-      <author><first>Frédéric</first><last>Béchet</last></author>
+      <author id="benoit-favre"><first>Benoit</first><last>Favre</last></author>
+      <author id="evgeny-stepanov"><first>Evgeny</first><last>Stepanov</last></author>
+      <author id="jeremy-trione"><first>Jérémy</first><last>Trione</last></author>
+      <author id="frederic-bechet"><first>Frédéric</first><last>Béchet</last></author>
       <author><first>Giuseppe</first><last>Riccardi</last></author>
       <pages>232–236</pages>
       <url hash="5e5326d0">W15-4633</url>
@@ -8745,10 +8745,10 @@
     <paper id="35">
       <title>Comment-to-Article Linking in the Online News Domain</title>
       <author><first>Ahmet</first><last>Aker</last></author>
-      <author><first>Emina</first><last>Kurtic</last></author>
+      <author id="emina-kurtic"><first>Emina</first><last>Kurtic</last></author>
       <author><first>Mark</first><last>Hepple</last></author>
-      <author><first>Rob</first><last>Gaizauskas</last></author>
-      <author><first>Giuseppe</first><last>Di Fabbrizio</last></author>
+      <author id="robert-gaizauskas"><first>Rob</first><last>Gaizauskas</last></author>
+      <author id="giuseppe-di-fabbrizio"><first>Giuseppe</first><last>Di Fabbrizio</last></author>
       <pages>245–249</pages>
       <url hash="c4273e6c">W15-4635</url>
       <doi>10.18653/v1/W15-4635</doi>
@@ -8780,12 +8780,12 @@
       <title><fixed-case>M</fixed-case>ulti<fixed-case>L</fixed-case>ing 2015: Multilingual Summarization of Single and Multi-Documents, On-line Fora, and Call-center Conversations</title>
       <author><first>George</first><last>Giannakopoulos</last></author>
       <author><first>Jeff</first><last>Kubina</last></author>
-      <author><first>John</first><last>Conroy</last></author>
+      <author id="john-conroy"><first>John</first><last>Conroy</last></author>
       <author><first>Josef</first><last>Steinberger</last></author>
-      <author><first>Benoit</first><last>Favre</last></author>
-      <author><first>Mijail</first><last>Kabadjov</last></author>
-      <author><first>Udo</first><last>Kruschwitz</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="benoit-favre"><first>Benoit</first><last>Favre</last></author>
+      <author id="mijail-kabadjov"><first>Mijail</first><last>Kabadjov</last></author>
+      <author id="udo-kruschwitz"><first>Udo</first><last>Kruschwitz</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>270–274</pages>
       <url hash="b150ef99">W15-4638</url>
       <doi>10.18653/v1/W15-4638</doi>
@@ -8794,12 +8794,12 @@
     <paper id="39">
       <title>Stochastic Language Generation in Dialogue using Recurrent Neural Networks with Convolutional Sentence Reranking</title>
       <author><first>Tsung-Hsien</first><last>Wen</last></author>
-      <author><first>Milica</first><last>Gašić</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gašić</last></author>
       <author><first>Dongho</first><last>Kim</last></author>
       <author><first>Nikola</first><last>Mrkšić</last></author>
       <author><first>Pei-Hao</first><last>Su</last></author>
       <author><first>David</first><last>Vandyke</last></author>
-      <author><first>Steve</first><last>Young</last></author>
+      <author id="steve-young"><first>Steve</first><last>Young</last></author>
       <pages>275–284</pages>
       <url hash="0562517e">W15-4639</url>
       <doi>10.18653/v1/W15-4639</doi>
@@ -8809,7 +8809,7 @@
       <title>The <fixed-case>U</fixed-case>buntu Dialogue Corpus: A Large Dataset for Research in Unstructured Multi-Turn Dialogue Systems</title>
       <author><first>Ryan</first><last>Lowe</last></author>
       <author><first>Nissan</first><last>Pow</last></author>
-      <author><first>Iulian</first><last>Serban</last></author>
+      <author id="iulian-vlad-serban"><first>Iulian</first><last>Serban</last></author>
       <author><first>Joelle</first><last>Pineau</last></author>
       <pages>285–294</pages>
       <url hash="2c6bddc7">W15-4640</url>
@@ -8841,7 +8841,7 @@
       <title>Optimising Turn-Taking Strategies With Reinforcement Learning</title>
       <author><first>Hatim</first><last>Khouzaimi</last></author>
       <author><first>Romain</first><last>Laroche</last></author>
-      <author><first>Fabrice</first><last>Lefèvre</last></author>
+      <author id="fabrice-lefevre"><first>Fabrice</first><last>Lefèvre</last></author>
       <pages>315–324</pages>
       <url hash="1a1d63bc">W15-4643</url>
       <doi>10.18653/v1/W15-4643</doi>
@@ -8850,9 +8850,9 @@
     <paper id="44">
       <title>Acoustic-prosodic entrainment in <fixed-case>S</fixed-case>lovak, <fixed-case>S</fixed-case>panish, <fixed-case>E</fixed-case>nglish and <fixed-case>C</fixed-case>hinese: A cross-linguistic comparison</title>
       <author><first>Rivka</first><last>Levitan</last></author>
-      <author><first>Štefan</first><last>Beňuš</last></author>
-      <author><first>Agustín</first><last>Gravano</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
+      <author id="stefan-benus"><first>Štefan</first><last>Beňuš</last></author>
+      <author id="agustin-gravano"><first>Agustín</first><last>Gravano</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
       <pages>325–334</pages>
       <url hash="d6584dbd">W15-4644</url>
       <doi>10.18653/v1/W15-4644</doi>
@@ -8861,9 +8861,9 @@
     <paper id="45">
       <title>A statistical approach for Non-Sentential Utterance Resolution for Interactive <fixed-case>QA</fixed-case> System</title>
       <author><first>Dinesh</first><last>Raghu</last></author>
-      <author><first>Sathish</first><last>Indurthi</last></author>
+      <author id="sathish-reddy-indurthi"><first>Sathish</first><last>Indurthi</last></author>
       <author><first>Jitendra</first><last>Ajmera</last></author>
-      <author><first>Sachindra</first><last>Joshi</last></author>
+      <author id="sachindra-joshi"><first>Sachindra</first><last>Joshi</last></author>
       <pages>335–343</pages>
       <url hash="69cc614c">W15-4645</url>
       <doi>10.18653/v1/W15-4645</doi>
@@ -8927,7 +8927,7 @@
       <title>User Adaptive Restoration for Incorrectly-Segmented Utterances in Spoken Dialogue Systems</title>
       <author><first>Kazunori</first><last>Komatani</last></author>
       <author><first>Naoki</first><last>Hotta</last></author>
-      <author><first>Satoshi</first><last>Sato</last></author>
+      <author id="satoshi-sato"><first>Satoshi</first><last>Sato</last></author>
       <author><first>Mikio</first><last>Nakano</last></author>
       <pages>393–401</pages>
       <url hash="7cccb669">W15-4651</url>
@@ -8937,7 +8937,7 @@
     <paper id="52">
       <title>Incremental Coordination: Attention-Centric Speech Production in a Physically Situated Conversational Agent</title>
       <author><first>Zhou</first><last>Yu</last></author>
-      <author><first>Dan</first><last>Bohus</last></author>
+      <author id="dan-bohus"><first>Dan</first><last>Bohus</last></author>
       <author><first>Eric</first><last>Horvitz</last></author>
       <pages>402–406</pages>
       <url hash="c6a2b826">W15-4652</url>
@@ -8948,7 +8948,7 @@
       <title>Hyper-parameter Optimisation of <fixed-case>G</fixed-case>aussian Process Reinforcement Learning for Statistical Dialogue Management</title>
       <author><first>Lu</first><last>Chen</last></author>
       <author><first>Pei-Hao</first><last>Su</last></author>
-      <author><first>Milica</first><last>Gašić</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gašić</last></author>
       <pages>407–411</pages>
       <url hash="b6d5f9f6">W15-4653</url>
       <doi>10.18653/v1/W15-4653</doi>
@@ -8969,10 +8969,10 @@
       <title>Reward Shaping with Recurrent Neural Networks for Speeding up On-Line Policy Learning in Spoken Dialogue Systems</title>
       <author><first>Pei-Hao</first><last>Su</last></author>
       <author><first>David</first><last>Vandyke</last></author>
-      <author><first>Milica</first><last>Gašić</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gašić</last></author>
       <author><first>Nikola</first><last>Mrkšić</last></author>
       <author><first>Tsung-Hsien</first><last>Wen</last></author>
-      <author><first>Steve</first><last>Young</last></author>
+      <author id="steve-young"><first>Steve</first><last>Young</last></author>
       <pages>417–421</pages>
       <url hash="ffd121b2">W15-4655</url>
       <doi>10.18653/v1/W15-4655</doi>
@@ -8994,8 +8994,8 @@
       <author><first>Maria</first><last>Schmidt</last></author>
       <author><first>Markus</first><last>Müller</last></author>
       <author><first>Martin</first><last>Wagner</last></author>
-      <author><first>Sebastian</first><last>Stüker</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="sebastian-stuker"><first>Sebastian</first><last>Stüker</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <author><first>Hansjörg</first><last>Hofmann</last></author>
       <author><first>Steffen</first><last>Werner</last></author>
       <pages>427–431</pages>
@@ -9006,8 +9006,8 @@
     <paper id="58">
       <title>A distributed cloud-based dialog system for conversational application development</title>
       <author><first>Vikram</first><last>Ramanarayanan</last></author>
-      <author><first>David</first><last>Suendermann-Oeft</last></author>
-      <author><first>Alexei V.</first><last>Ivanov</last></author>
+      <author id="david-suendermann-oeft"><first>David</first><last>Suendermann-Oeft</last></author>
+      <author id="alexei-v-ivanov"><first>Alexei V.</first><last>Ivanov</last></author>
       <author><first>Keelan</first><last>Evanini</last></author>
       <pages>432–434</pages>
       <url hash="7500e905">W15-4658</url>
@@ -9021,7 +9021,7 @@
       <author><first>Ronald</first><last>Provine</last></author>
       <author><first>Peter</first><last>Yeh</last></author>
       <author><first>William</first><last>Jarrold</last></author>
-      <author><first>Adwait</first><last>Ratnaparkhi</last></author>
+      <author id="adwait-ratnaparkhi"><first>Adwait</first><last>Ratnaparkhi</last></author>
       <author><first>Benjamin</first><last>Douglas</last></author>
       <pages>435–437</pages>
       <url hash="83b4e1dc">W15-4659</url>
@@ -9030,12 +9030,12 @@
     </paper>
     <paper id="60">
       <title>Description of the <fixed-case>P</fixed-case>atient<fixed-case>G</fixed-case>enesys Dialogue System</title>
-      <author><first>Leonardo</first><last>Campillos Llanos</last></author>
+      <author id="leonardo-campillos-llanos"><first>Leonardo</first><last>Campillos Llanos</last></author>
       <author><first>Dhouha</first><last>Bouamor</last></author>
-      <author><first>Éric</first><last>Bilinski</last></author>
+      <author id="eric-bilinski"><first>Éric</first><last>Bilinski</last></author>
       <author><first>Anne-Laure</first><last>Ligozat</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
       <pages>438–440</pages>
       <url hash="223ea98b">W15-4660</url>
       <doi>10.18653/v1/W15-4660</doi>
@@ -9046,10 +9046,10 @@
       <author><first>Tejaswi</first><last>Kasturi</last></author>
       <author><first>Haojian</first><last>Jin</last></author>
       <author><first>Aasish</first><last>Pappu</last></author>
-      <author><first>Sungjin</first><last>Lee</last></author>
+      <author id="sungjin-lee"><first>Sungjin</first><last>Lee</last></author>
       <author><first>Beverley</first><last>Harrison</last></author>
       <author><first>Ramana</first><last>Murthy</last></author>
-      <author><first>Amanda</first><last>Stent</last></author>
+      <author id="amanda-stent"><first>Amanda</first><last>Stent</last></author>
       <pages>441–443</pages>
       <url hash="34473687">W15-4661</url>
       <doi>10.18653/v1/W15-4661</doi>
@@ -9060,7 +9060,7 @@
     <meta>
       <booktitle>Proceedings of the 15th <fixed-case>E</fixed-case>uropean Workshop on Natural Language Generation (<fixed-case>ENLG</fixed-case>)</booktitle>
       <url hash="1e34d915">W15-47</url>
-      <editor><first>Anya</first><last>Belz</last></editor>
+      <editor id="anja-belz"><first>Anya</first><last>Belz</last></editor>
       <editor><first>Albert</first><last>Gatt</last></editor>
       <editor><first>François</first><last>Portet</last></editor>
       <editor><first>Matthew</first><last>Purver</last></editor>
@@ -9079,7 +9079,7 @@
       <title>A Simple Surface Realization Engine for <fixed-case>T</fixed-case>elugu</title>
       <author><first>Sasi Raja Sekhar</first><last>Dokkara</last></author>
       <author><first>Suresh Verma</first><last>Penumathsa</last></author>
-      <author><first>Somayajulu Gowri</first><last>Sripada</last></author>
+      <author id="somayajulu-sripada"><first>Somayajulu Gowri</first><last>Sripada</last></author>
       <pages>1–8</pages>
       <url hash="b6ca7e3b">W15-4701</url>
       <doi>10.18653/v1/W15-4701</doi>
@@ -9106,8 +9106,8 @@
     </paper>
     <paper id="4">
       <title>Inducing Clause-Combining Rules: A Case Study with the <fixed-case>SP</fixed-case>a<fixed-case>RK</fixed-case>y Restaurant Corpus</title>
-      <author><first>Michael</first><last>White</last></author>
-      <author><first>David M.</first><last>Howcroft</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
+      <author id="david-m-howcroft"><first>David M.</first><last>Howcroft</last></author>
       <pages>28–37</pages>
       <url hash="bfcc6818">W15-4704</url>
       <doi>10.18653/v1/W15-4704</doi>
@@ -9115,7 +9115,7 @@
     </paper>
     <paper id="5">
       <title>Reading Times Predict the Quality of Generated Text Above and Beyond Human Ratings</title>
-      <author><first>Sina</first><last>Zarrieß</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
       <author><first>Sebastian</first><last>Loth</last></author>
       <author><first>David</first><last>Schlangen</last></author>
       <pages>38–47</pages>
@@ -9126,7 +9126,7 @@
     <paper id="6">
       <title>Moving Targets: Human References to Unstable Landmarks</title>
       <author><first>Adriana</first><last>Baltaretu</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <author><first>Alfons</first><last>Maes</last></author>
       <pages>48–51</pages>
       <url hash="6b729d86">W15-4706</url>
@@ -9136,8 +9136,8 @@
     <paper id="7">
       <title>A Framework for the Generation of Computer System Diagnostics in Natural Language using Finite State Methods</title>
       <author><first>Rachel</first><last>Farrell</last></author>
-      <author><first>Gordon</first><last>Pace</last></author>
-      <author><first>Michael</first><last>Rosner</last></author>
+      <author id="gordon-pace"><first>Gordon</first><last>Pace</last></author>
+      <author id="michael-rosner"><first>Michael</first><last>Rosner</last></author>
       <pages>52–56</pages>
       <url hash="ab6c8fcb">W15-4707</url>
       <doi>10.18653/v1/W15-4707</doi>
@@ -9156,7 +9156,7 @@
       <title><fixed-case>J</fixed-case>apanese Word Reordering Executed Concurrently with Dependency Parsing and Its Evaluation</title>
       <author><first>Tomohiro</first><last>Ohno</last></author>
       <author><first>Kazushi</first><last>Yoshida</last></author>
-      <author><first>Yoshihide</first><last>Kato</last></author>
+      <author id="yoshihide-kato"><first>Yoshihide</first><last>Kato</last></author>
       <author><first>Shigeki</first><last>Matsubara</last></author>
       <pages>61–65</pages>
       <url hash="308ad20f">W15-4709</url>
@@ -9178,7 +9178,7 @@
       <author><first>Leen</first><last>Sevens</last></author>
       <author><first>Vincent</first><last>Vandeghinste</last></author>
       <author><first>Ineke</first><last>Schuurman</last></author>
-      <author><first>Frank</first><last>Van Eynde</last></author>
+      <author id="frank-van-eynde"><first>Frank</first><last>Van Eynde</last></author>
       <pages>71–75</pages>
       <url hash="0a5c9f83">W15-4711</url>
       <doi>10.18653/v1/W15-4711</doi>
@@ -9186,7 +9186,7 @@
     </paper>
     <paper id="12">
       <title>Translating <fixed-case>I</fixed-case>talian to <fixed-case>LIS</fixed-case> in the Rail Stations</title>
-      <author><first>Alessandro</first><last>Mazzei</last></author>
+      <author id="alessandro-mazzei"><first>Alessandro</first><last>Mazzei</last></author>
       <pages>76–80</pages>
       <url hash="0f0b4794">W15-4712</url>
       <doi>10.18653/v1/W15-4712</doi>
@@ -9282,7 +9282,7 @@
     <paper id="22">
       <title>Generating Image Descriptions with Gold Standard Visual Inputs: Motivation, Evaluation and Baselines</title>
       <author><first>Josiah</first><last>Wang</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <pages>117–126</pages>
       <url hash="28f26c65">W15-4722</url>
       <doi>10.18653/v1/W15-4722</doi>
@@ -9385,8 +9385,8 @@
     <paper id="6">
       <title>Automated Lossless Hyper-Minimization for Morphological Analyzers</title>
       <author><first>Senka</first><last>Drobac</last></author>
-      <author><first>Miikka</first><last>Silfverberg</last></author>
-      <author><first>Krister</first><last>Lindén</last></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last></author>
+      <author id="krister-linden"><first>Krister</first><last>Lindén</last></author>
       <url hash="c9e6c64f">W15-4806</url>
       <bibkey>drobac-etal-2015-automated</bibkey>
     </paper>
@@ -9434,10 +9434,10 @@
   <volume id="49" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 18th Annual Conference of the <fixed-case>E</fixed-case>uropean Association for Machine Translation</booktitle>
-      <editor><first>İlknur Durgar</first><last>El-Kahlout</last></editor>
+      <editor id="ilknur-durgar-el-kahlout"><first>İlknur Durgar</first><last>El-Kahlout</last></editor>
       <editor><first>Mehmed</first><last>Özkan</last></editor>
       <editor><first>Felipe</first><last>Sánchez-Martínez</last></editor>
-      <editor><first>Gema</first><last>Ramírez-Sánchez</last></editor>
+      <editor id="gema-ramirez-sanchez"><first>Gema</first><last>Ramírez-Sánchez</last></editor>
       <editor><first>Fred</first><last>Hollowood</last></editor>
       <editor><first>Andy</first><last>Way</last></editor>
       <address>Antalya, Turkey</address>
@@ -9453,9 +9453,9 @@
     <paper id="1">
       <title>Exploiting portability to build an <fixed-case>RBMT</fixed-case> prototype for a new source language</title>
       <author><first>Nora</first><last>Aranberri</last></author>
-      <author><first>Gorka</first><last>Labaka</last></author>
-      <author><first>Arantza</first><last>Díaz de Ilarraza</last></author>
-      <author><first>Kepa</first><last>Sarasola</last></author>
+      <author id="gorka-labaka"><first>Gorka</first><last>Labaka</last></author>
+      <author id="arantza-diaz-de-ilarraza"><first>Arantza</first><last>Díaz de Ilarraza</last></author>
+      <author id="kepa-sarasola"><first>Kepa</first><last>Sarasola</last></author>
       <url hash="c76dd797">W15-4901</url>
       <pages>3–10</pages>
       <bibkey>aranberri-etal-2015-exploiting-portability</bibkey>
@@ -9463,24 +9463,24 @@
     <paper id="2">
       <title>Building hybrid machine translation systems by using an <fixed-case>EBMT</fixed-case> preprocessor to create partial translations</title>
       <author><first>Mikel</first><last>Artetxe</last></author>
-      <author><first>Gorka</first><last>Labaka</last></author>
-      <author><first>Kepa</first><last>Sarasola</last></author>
+      <author id="gorka-labaka"><first>Gorka</first><last>Labaka</last></author>
+      <author id="kepa-sarasola"><first>Kepa</first><last>Sarasola</last></author>
       <url hash="c4ea1d25">W15-4902</url>
       <pages>11–18</pages>
       <bibkey>artetxe-etal-2015-building-hybrid</bibkey>
     </paper>
     <paper id="3">
       <title>Using on-line available sources of bilingual information for word-level machine translation quality estimation</title>
-      <author><first>Miquel</first><last>Esplà-Gomis</last></author>
+      <author id="miquel-espla-gomis"><first>Miquel</first><last>Esplà-Gomis</last></author>
       <author><first>Felipe</first><last>Sánchez-Martínez</last></author>
-      <author><first>Mikel L.</first><last>Forcada</last></author>
+      <author id="mikel-l-forcada"><first>Mikel L.</first><last>Forcada</last></author>
       <url hash="e272913d">W15-4903</url>
       <pages>19–26</pages>
       <bibkey>espla-gomis-etal-2015-using-line</bibkey>
     </paper>
     <paper id="4">
       <title>A general framework for minimizing translation effort: towards a principled combination of translation technologies in computer-aided translation</title>
-      <author><first>Mikel L.</first><last>Forcada</last></author>
+      <author id="mikel-l-forcada"><first>Mikel L.</first><last>Forcada</last></author>
       <author><first>Felipe</first><last>Sánchez-Martínez</last></author>
       <url hash="ef1664e8">W15-4904</url>
       <pages>27–34</pages>
@@ -9489,10 +9489,10 @@
     <paper id="5">
       <title>Can Translation Memories afford not to use paraphrasing?</title>
       <author><first>Rohit</first><last>Gupta</last></author>
-      <author><first>Constantin</first><last>Orăsan</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orăsan</last></author>
       <author><first>Marcos</first><last>Zampieri</last></author>
       <author><first>Mihaela</first><last>Vela</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <url hash="2c05ec84">W15-4905</url>
       <pages>35–42</pages>
       <bibkey>gupta-etal-2015-translation-memories</bibkey>
@@ -9518,9 +9518,9 @@
     </paper>
     <paper id="8">
       <title>Document-Level Machine Translation with Word Vector Models</title>
-      <author><first>Eva Martínez</first><last>Garcia</last></author>
+      <author id="eva-martinez-garcia"><first>Eva Martínez</first><last>Garcia</last></author>
       <author><first>Cristina</first><last>España-Bonet</last></author>
-      <author><first>Lluís</first><last>Màrquez</last></author>
+      <author id="lluis-marquez"><first>Lluís</first><last>Màrquez</last></author>
       <url hash="0b3f68fe">W15-4908</url>
       <pages>59–66</pages>
       <bibkey>garcia-etal-2015-document-level</bibkey>
@@ -9551,23 +9551,23 @@
     </paper>
     <paper id="12">
       <title>Dynamic Terminology Integration Methods in Statistical Machine Translation</title>
-      <author><first>Mārcis</first><last>Pinnis</last></author>
+      <author id="marcis-pinnis"><first>Mārcis</first><last>Pinnis</last></author>
       <url hash="cc66d584">W15-4912</url>
       <pages>89–96</pages>
       <bibkey>pinnis-2015-dynamic-terminology</bibkey>
     </paper>
     <paper id="13">
       <title>Identifying main obstacles for statistical machine translation of morphologically rich <fixed-case>S</fixed-case>outh <fixed-case>S</fixed-case>lavic languages</title>
-      <author><first>Maja</first><last>Popović</last></author>
-      <author><first>Mihael</first><last>Arčan</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
+      <author id="mihael-arcan"><first>Mihael</first><last>Arčan</last></author>
       <url hash="b6f6bca2">W15-4913</url>
       <pages>97–104</pages>
       <bibkey>popovic-arcan-2015-identifying-main</bibkey>
     </paper>
     <paper id="14">
       <title>Poor man’s lemmatisation for automatic error classification</title>
-      <author><first>Maja</first><last>Popović</last></author>
-      <author><first>Mihael</first><last>Arčan</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
+      <author id="mihael-arcan"><first>Mihael</first><last>Arčan</last></author>
       <author><first>Eleftherios</first><last>Avramidis</last></author>
       <author><first>Aljoscha</first><last>Burchardt</last></author>
       <author><first>Arle</first><last>Lommel</last></author>
@@ -9585,10 +9585,10 @@
     </paper>
     <paper id="16">
       <title>Searching for Context: a Study on Document-Level Labels for Translation Quality Estimation</title>
-      <author><first>Carolina</first><last>Scarton</last></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last></author>
       <author><first>Marcos</first><last>Zampieri</last></author>
       <author><first>Mihaela</first><last>Vela</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <url hash="cd9ae888">W15-4916</url>
       <pages>121–128</pages>
@@ -9598,7 +9598,7 @@
       <title>Stripping Adjectives: Integration Techniques for Selective Stemming in <fixed-case>SMT</fixed-case> Systems</title>
       <author><first>Isabel</first><last>Slawik</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <url hash="16fc1448">W15-4917</url>
       <pages>129–136</pages>
       <bibkey>slawik-etal-2015-stripping-adjectives</bibkey>
@@ -9606,8 +9606,8 @@
     <paper id="18">
       <title>Evaluating machine translation for assimilation via a gap-filling task</title>
       <author><first>Ekaterina</first><last>Ageeva</last></author>
-      <author><first>Mikel L.</first><last>Forcada</last></author>
-      <author><first>Francis M.</first><last>Tyers</last></author>
+      <author id="mikel-l-forcada"><first>Mikel L.</first><last>Forcada</last></author>
+      <author id="francis-tyers"><first>Francis M.</first><last>Tyers</last></author>
       <author><first>Juan Antonio</first><last>Pérez-Ortiz</last></author>
       <url hash="c365198d">W15-4918</url>
       <pages>137–144</pages>
@@ -9615,9 +9615,9 @@
     </paper>
     <paper id="19">
       <title>Unsupervised training of maximum-entropy models for lexical selection in rule-based machine translation</title>
-      <author><first>Francis M.</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis M.</first><last>Tyers</last></author>
       <author><first>Felipe</first><last>Sánchez-Martínez</last></author>
-      <author><first>Mikel L.</first><last>Forcada</last></author>
+      <author id="mikel-l-forcada"><first>Mikel L.</first><last>Forcada</last></author>
       <url hash="75e89861">W15-4919</url>
       <pages>145–152</pages>
       <bibkey>tyers-etal-2015-unsupervised-training</bibkey>
@@ -9633,14 +9633,14 @@
     <paper id="21">
       <title>Re-assessing the <fixed-case>WMT</fixed-case>2013 Human Evaluation with Professional Translators Trainees</title>
       <author><first>Mihaela</first><last>Vela</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <url hash="85d5d8ea">W15-4921</url>
       <pages>161–168</pages>
       <bibkey>vela-van-genabith-2015-assessing-wmt2013</bibkey>
     </paper>
     <paper id="22">
       <title>Integrating a Large, Monolingual Corpus as Translation Memory into Statistical Machine Translation</title>
-      <author><first>Katharina</first><last>Wäschle</last></author>
+      <author id="katharina-waschle"><first>Katharina</first><last>Wäschle</last></author>
       <author><first>Stefan</first><last>Riezler</last></author>
       <url hash="5e04d367">W15-4922</url>
       <pages>169–176</pages>
@@ -9649,8 +9649,8 @@
     <paper id="23">
       <title>Target-Side Generation of Prepositions for <fixed-case>SMT</fixed-case></title>
       <author><first>Marion</first><last>Weller</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <url hash="703d9cf0">W15-4923</url>
       <pages>177–184</pages>
       <bibkey>weller-etal-2015-target</bibkey>
@@ -9675,7 +9675,7 @@
     </paper>
     <paper id="26">
       <title>Pre-reordering for Statistical Machine Translation of Non-fictional Subtitles</title>
-      <author><first>Magdalena</first><last>Plamadă</last></author>
+      <author id="magdalena-plamada"><first>Magdalena</first><last>Plamadă</last></author>
       <author><first>Gion</first><last>Linder</last></author>
       <author><first>Phillip</first><last>Ströbel</last></author>
       <author><first>Martin</first><last>Volk</last></author>
@@ -9702,7 +9702,7 @@
     </paper>
     <paper id="29">
       <title><fixed-case>M</fixed-case>ixed<fixed-case>E</fixed-case>motions: Social Semantic Emotion Analysis for Innovative Multilingual Big Data Analytics Markets</title>
-      <author><first>Mihael</first><last>Arcan</last></author>
+      <author id="mihael-arcan"><first>Mihael</first><last>Arcan</last></author>
       <author><first>Paul</first><last>Buitelaar</last></author>
       <url hash="4d61f7bb">W15-4929</url>
       <pages>211</pages>
@@ -9710,7 +9710,7 @@
     </paper>
     <paper id="30">
       <title>The <fixed-case>ACCEPT</fixed-case> Academic Portal: Bringing Together Pre-editing, <fixed-case>MT</fixed-case> and Post-editing into a Learning Environment</title>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Johanna</first><last>Gerlach</last></author>
       <author><first>Asheesh</first><last>Gulati</last></author>
       <author><first>Victoria</first><last>Porro</last></author>
@@ -9745,7 +9745,7 @@
     </paper>
     <paper id="34">
       <title><fixed-case>H</fixed-case>andy<fixed-case>CAT</fixed-case> - An Open-Source Platform for <fixed-case>CAT</fixed-case> Tool Research</title>
-      <author><first>Christopher</first><last>Hokamp</last></author>
+      <author id="chris-hokamp"><first>Christopher</first><last>Hokamp</last></author>
       <author><first>Qun</first><last>Liu</last></author>
       <url hash="f81f4b14">W15-4934</url>
       <pages>216</pages>
@@ -9758,15 +9758,15 @@
       <author><first>Markus</first><last>Egg</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <author><first>Lexi</first><last>Birch</last></author>
-      <author><first>Katia</first><last>Kermanidis</last></author>
+      <author id="katia-lida-kermanidis"><first>Katia</first><last>Kermanidis</last></author>
       <author><first>Vilelmini</first><last>Sosoni</last></author>
       <author><first>Dimitrios</first><last>Tsoumakos</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <author><first>Iris</first><last>Hendrickx</last></author>
       <author><first>Michael</first><last>Papadopoulos</last></author>
       <author><first>Panayota</first><last>Georgakopoulou</last></author>
       <author><first>Maria</first><last>Gialama</last></author>
-      <author><first>Menno</first><last>van Zaanen</last></author>
+      <author id="menno-van-zaanen"><first>Menno</first><last>van Zaanen</last></author>
       <author><first>Ioana</first><last>Buliga</last></author>
       <author><first>Mitja</first><last>Jermol</last></author>
       <author><first>Davor</first><last>Orlic</last></author>
@@ -9791,7 +9791,7 @@
     </paper>
     <paper id="38">
       <title><fixed-case>FALCON</fixed-case>: Federated Active Linguistic data <fixed-case>C</fixed-case>urati<fixed-case>ON</fixed-case></title>
-      <author><first>David</first><last>Lewis</last></author>
+      <author id="david-d-lewis"><first>David</first><last>Lewis</last></author>
       <url hash="c5cfa9f5">W15-4938</url>
       <pages>220</pages>
       <bibkey>lewis-2015-falcon-federated</bibkey>
@@ -9807,7 +9807,7 @@
     </paper>
     <paper id="40">
       <title><fixed-case>O</fixed-case>kapi+<fixed-case>Q</fixed-case>u<fixed-case>E</fixed-case>st: Translation Quality Estimation within Okapi</title>
-      <author><first>Gustavo Henrique</first><last>Paetzold</last></author>
+      <author id="gustavo-paetzold"><first>Gustavo Henrique</first><last>Paetzold</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <author><first>Yves</first><last>Savourel</last></author>
       <url hash="d92dcbe4">W15-4940</url>
@@ -9839,13 +9839,13 @@
     <paper id="44">
       <title><fixed-case>A</fixed-case>bu-<fixed-case>M</fixed-case>a<fixed-case>T</fixed-case>ran: Automatic building of Machine Translation</title>
       <author><first>Antonio</first><last>Toral</last></author>
-      <author><first>Tommi A.</first><last>Pirinen</last></author>
+      <author id="tommi-a-pirinen"><first>Tommi A.</first><last>Pirinen</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <author><first>Gema</first><last>Ramírez-Sánchez</last></author>
-      <author><first>Sergio Ortiz</first><last>Rojas</last></author>
-      <author><first>Raphael</first><last>Rubino</last></author>
-      <author><first>Miquel</first><last>Esplà</last></author>
-      <author><first>Mikel L.</first><last>Forcada</last></author>
+      <author id="sergio-ortiz-rojas"><first>Sergio Ortiz</first><last>Rojas</last></author>
+      <author id="raphael-rubino"><first>Raphael</first><last>Rubino</last></author>
+      <author id="miquel-espla-gomis"><first>Miquel</first><last>Esplà</last></author>
+      <author id="mikel-l-forcada"><first>Mikel L.</first><last>Forcada</last></author>
       <author><first>Vassilis</first><last>Papavassiliou</last></author>
       <author><first>Prokopis</first><last>Prokopidis</last></author>
       <author><first>Nikola</first><last>Ljubešić</last></author>
@@ -9858,7 +9858,7 @@
       <author><first>Masao</first><last>Utiyama</last></author>
       <author><first>Kyo</first><last>Kageura</last></author>
       <author><first>Martin</first><last>Thomas</last></author>
-      <author><first>Anthony</first><last>Hartley</last></author>
+      <author id="anthony-hartley"><first>Anthony</first><last>Hartley</last></author>
       <url hash="98fd1509">W15-4945</url>
       <pages>228</pages>
       <bibkey>utiyama-etal-2015-mnh-tt</bibkey>
@@ -9867,7 +9867,7 @@
       <title>Smart Computer Aided Translation Environment - <fixed-case>SCATE</fixed-case></title>
       <author><first>Vincent</first><last>Vandeghinste</last></author>
       <author><first>Tom</first><last>Vanallemeersch</last></author>
-      <author><first>Frank</first><last>Van Eynde</last></author>
+      <author id="frank-van-eynde"><first>Frank</first><last>Van Eynde</last></author>
       <author><first>Geert</first><last>Heyman</last></author>
       <author><first>Sien</first><last>Moens</last></author>
       <author><first>Joris</first><last>Pelemans</last></author>
@@ -9875,7 +9875,7 @@
       <author><first>Iulianna</first><last>Van der Lek - Ciudin</last></author>
       <author><first>Arda</first><last>Tezcan</last></author>
       <author><first>Lieve</first><last>Macken</last></author>
-      <author><first>Véronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Véronique</first><last>Hoste</last></author>
       <author><first>Eva</first><last>Geurts</last></author>
       <author><first>Mieke</first><last>Haesen</last></author>
       <url hash="bdfa79c2">W15-4946</url>
@@ -9891,7 +9891,7 @@
       <editor><first>Isao</first><last>Goto</last></editor>
       <editor><first>Graham</first><last>Neubig</last></editor>
       <editor><first>Sadao</first><last>Kurohashi</last></editor>
-      <editor><first>Eiichiro</first><last>Sumita</last></editor>
+      <editor id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></editor>
       <publisher>Workshop on Asian Translation</publisher>
       <address>Kyoto, Japan</address>
       <month>October</month>
@@ -9961,7 +9961,7 @@
       <author><first>John</first><last>Richardson</last></author>
       <author><first>Raj</first><last>Dabre</last></author>
       <author><first>Chenhui</first><last>Chu</last></author>
-      <author><first>Fabien</first><last>Cromières</last></author>
+      <author id="fabien-cromieres"><first>Fabien</first><last>Cromières</last></author>
       <author><first>Toshiaki</first><last>Nakazawa</last></author>
       <author><first>Sadao</first><last>Kurohashi</last></author>
       <url hash="48f820ec">W15-5006</url>
@@ -9982,7 +9982,7 @@
     <paper id="8">
       <title><fixed-case>NAVER</fixed-case> Machine Translation System for <fixed-case>WAT</fixed-case> 2015</title>
       <author><first>Hyoung-Gyu</first><last>Lee</last></author>
-      <author><first>JaeSong</first><last>Lee</last></author>
+      <author id="jaesong-lee"><first>JaeSong</first><last>Lee</last></author>
       <author><first>Jun-Seok</first><last>Kim</last></author>
       <author><first>Chang-Ki</first><last>Lee</last></author>
       <url hash="c12433de">W15-5008</url>
@@ -9995,7 +9995,7 @@
       <title>An Awkward Disparity between <fixed-case>BLEU</fixed-case> / <fixed-case>RIBES</fixed-case> Scores and Human Judgements in Machine Translation</title>
       <author><first>Liling</first><last>Tan</last></author>
       <author><first>Jon</first><last>Dehdari</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <url hash="b1b20e48">W15-5009</url>
       <attachment type="presentation" hash="bb6f01af">W15-5009.Presentation.pdf</attachment>
       <attachment type="poster" hash="ef5d8683">W15-5009.Poster.pdf</attachment>
@@ -10055,7 +10055,7 @@
       <editor><first>Jan</first><last>Alexandersson</last></editor>
       <editor><first>Ercan</first><last>Altinsoy</last></editor>
       <editor><first>Heidi</first><last>Christensen</last></editor>
-      <editor><first>Peter</first><last>Ljunglöf</last></editor>
+      <editor id="peter-ljunglof"><first>Peter</first><last>Ljunglöf</last></editor>
       <editor><first>François</first><last>Portet</last></editor>
       <editor><first>Frank</first><last>Rudzicz</last></editor>
       <doi>10.18653/v1/W15-51</doi>
@@ -10218,8 +10218,8 @@
       <author><first>Ka Ho</first><last>Wong</last></author>
       <author><first>Yu Ting</first><last>Yeung</last></author>
       <author><first>Patrick C. M.</first><last>Wong</last></author>
-      <author><first>Gina-Anne</first><last>Levow</last></author>
-      <author><first>Helen</first><last>Meng</last></author>
+      <author id="gina-anne-levow"><first>Gina-Anne</first><last>Levow</last></author>
+      <author id="helen-meng"><first>Helen</first><last>Meng</last></author>
       <pages>86–90</pages>
       <url hash="718eabd6">W15-5115</url>
       <doi>10.18653/v1/W15-5115</doi>
@@ -10274,7 +10274,7 @@
       <author><first>Leen</first><last>Sevens</last></author>
       <author><first>Vincent</first><last>Vandeghinste</last></author>
       <author><first>Ineke</first><last>Schuurman</last></author>
-      <author><first>Frank</first><last>Van Eynde</last></author>
+      <author id="frank-van-eynde"><first>Frank</first><last>Van Eynde</last></author>
       <pages>110–117</pages>
       <url hash="aad9cf8e">W15-5119</url>
       <doi>10.18653/v1/W15-5119</doi>
@@ -10316,7 +10316,7 @@
     <paper id="23">
       <title>Using linguistic features longitudinally to predict clinical scores for <fixed-case>A</fixed-case>lzheimer’s disease and related dementias</title>
       <author><first>Maria</first><last>Yancheva</last></author>
-      <author><first>Kathleen</first><last>Fraser</last></author>
+      <author id="kathleen-c-fraser"><first>Kathleen</first><last>Fraser</last></author>
       <author><first>Frank</first><last>Rudzicz</last></author>
       <pages>134–139</pages>
       <url hash="014036c7">W15-5123</url>
@@ -10326,7 +10326,7 @@
     <paper id="24">
       <title>From <fixed-case>E</fixed-case>uropean <fixed-case>P</fixed-case>ortuguese to <fixed-case>P</fixed-case>ortuguese <fixed-case>S</fixed-case>ign <fixed-case>L</fixed-case>anguage</title>
       <author><first>Inês</first><last>Almeida</last></author>
-      <author><first>Luísa</first><last>Coheur</last></author>
+      <author id="luisa-coheur"><first>Luísa</first><last>Coheur</last></author>
       <author><first>Sara</first><last>Candeias</last></author>
       <pages>140–143</pages>
       <url hash="da255c88">W15-5124</url>
@@ -10338,7 +10338,7 @@
     <meta>
       <booktitle>Proceedings of the Workshop Natural Language Processing for Translation Memories</booktitle>
       <url hash="8b0815b6">W15-52</url>
-      <editor><first>Constantin</first><last>Orasan</last></editor>
+      <editor id="constantin-orasan"><first>Constantin</first><last>Orasan</last></editor>
       <editor><first>Rohit</first><last>Gupta</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Hissar, Bulgaria</address>
@@ -10352,7 +10352,7 @@
     </frontmatter>
     <paper id="1">
       <title>Creation of new <fixed-case>TM</fixed-case> segments: Fulfilling translators’ wishes</title>
-      <author><first>Carla</first><last>Parra Escartín</last></author>
+      <author id="carla-parra-escartin"><first>Carla</first><last>Parra Escartín</last></author>
       <pages>1–8</pages>
       <url hash="17dbba79">W15-5201</url>
       <bibkey>parra-escartin-2015-creation</bibkey>
@@ -10367,7 +10367,7 @@
     <paper id="3">
       <title>Improving Translation Memory Matching through Clause Splitting</title>
       <author><first>Katerina</first><last>Raisa Timonera</last></author>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <pages>17–23</pages>
       <url hash="fc8e1f5f">W15-5203</url>
       <bibkey>raisa-timonera-mitkov-2015-improving</bibkey>
@@ -10390,12 +10390,12 @@
     </paper>
     <paper id="6">
       <title><fixed-case>CAT</fixed-case>a<fixed-case>L</fixed-case>og: New Approaches to <fixed-case>TM</fixed-case> and Post Editing Interfaces</title>
-      <author><first>Tapas</first><last>Nayek</last></author>
-      <author><first>Sudip Kumar</first><last>Naskar</last></author>
+      <author id="tapas-nayak"><first>Tapas</first><last>Nayek</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last></author>
       <author><first>Santanu</first><last>Pal</last></author>
       <author><first>Marcos</first><last>Zampieri</last></author>
       <author><first>Mihaela</first><last>Vela</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>36–42</pages>
       <url hash="a06ac34d">W15-5206</url>
       <bibkey>nayek-etal-2015-catalog</bibkey>
@@ -10408,7 +10408,7 @@
       <editor><first>Jakub</first><last>Piskorski</last></editor>
       <editor><first>Lidia</first><last>Pivovarova</last></editor>
       <editor><first>Jan</first><last>Šnajder</last></editor>
-      <editor><first>Hristo</first><last>Tanev</last></editor>
+      <editor id="hristo-tanev"><first>Hristo</first><last>Tanev</last></editor>
       <editor><first>Roman</first><last>Yangarber</last></editor>
       <publisher>INCOMA Ltd. Shoumen, BULGARIA</publisher>
       <address>Hissar, Bulgaria</address>
@@ -10422,7 +10422,7 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies for <fixed-case>C</fixed-case>roatian (that work for <fixed-case>S</fixed-case>erbian, too)</title>
-      <author><first>Željko</first><last>Agić</last></author>
+      <author id="zeljko-agic"><first>Željko</first><last>Agić</last></author>
       <author><first>Nikola</first><last>Ljubešić</last></author>
       <pages>1–8</pages>
       <url hash="ea8cce72">W15-5301</url>
@@ -10430,9 +10430,9 @@
     </paper>
     <paper id="2">
       <title>Analytic Morphology – Merging the Paradigmatic and Syntagmatic Perspective in a Treebank</title>
-      <author><first>Vladimír</first><last>Petkevič</last></author>
+      <author id="vladimir-petkevic"><first>Vladimír</first><last>Petkevič</last></author>
       <author><first>Alexandr</first><last>Rosen</last></author>
-      <author><first>Hana</first><last>Skoumalová</last></author>
+      <author id="hana-skoumalova"><first>Hana</first><last>Skoumalová</last></author>
       <author><first>Přemysl</first><last>Vítovec</last></author>
       <pages>9–16</pages>
       <url hash="86efe705">W15-5302</url>
@@ -10463,7 +10463,7 @@
     </paper>
     <paper id="6">
       <title>Regional Linguistic Data Initiative (<fixed-case>R</fixed-case>e<fixed-case>LDI</fixed-case>)</title>
-      <author><first>Tanja</first><last>Samardžić</last></author>
+      <author id="tanja-samardzic"><first>Tanja</first><last>Samardžić</last></author>
       <author><first>Nikola</first><last>Ljubešić</last></author>
       <author><first>Maja</first><last>Miličević</last></author>
       <pages>40–42</pages>
@@ -10484,7 +10484,7 @@
     </paper>
     <paper id="8">
       <title><fixed-case>E</fixed-case>-law Module Supporting Lawyers in the Process of Knowledge Discovery from Legal Documents</title>
-      <author><first>Marek</first><last>Kozłowski</last></author>
+      <author id="marek-kozlowski"><first>Marek</first><last>Kozłowski</last></author>
       <author><first>Maciej</first><last>Kowalski</last></author>
       <author><first>Maciej</first><last>Kazula</last></author>
       <pages>46–48</pages>
@@ -10530,14 +10530,14 @@
     <paper id="13">
       <title>Universalizing <fixed-case>B</fixed-case>ul<fixed-case>T</fixed-case>ree<fixed-case>B</fixed-case>ank: a Linguistic Tale about Glocalization</title>
       <author><first>Petya</first><last>Osenova</last></author>
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <pages>81–89</pages>
       <url hash="fe14b806">W15-5313</url>
       <bibkey>osenova-simov-2015-universalizing</bibkey>
     </paper>
     <paper id="14">
       <title>Types of Aspect Terms in Aspect-Oriented Sentiment Labeling</title>
-      <author><first>Natalia</first><last>Loukachevitch</last></author>
+      <author id="natalia-loukachevitch"><first>Natalia</first><last>Loukachevitch</last></author>
       <author><first>Evgeniy</first><last>Kotelnikov</last></author>
       <author><first>Pavel</first><last>Blinov</last></author>
       <pages>90–95</pages>
@@ -10546,7 +10546,7 @@
     </paper>
     <paper id="15">
       <title>Authorship Attribution and Author Profiling of <fixed-case>L</fixed-case>ithuanian Literary Texts</title>
-      <author><first>Jurgita</first><last>Kapočiūtė-Dzikienė</last></author>
+      <author id="jurgita-kapociute-dzikiene"><first>Jurgita</first><last>Kapočiūtė-Dzikienė</last></author>
       <author><first>Andrius</first><last>Utka</last></author>
       <author><first>Ligita</first><last>Šarkutė</last></author>
       <pages>96–105</pages>
@@ -10567,13 +10567,13 @@
     <meta>
       <booktitle>Proceedings of the Joint Workshop on Language Technology for Closely Related Languages, Varieties and Dialects</booktitle>
       <url hash="060e53da">W15-54</url>
-      <editor><first>Preslav</first><last>Nakov</last></editor>
+      <editor id="preslav-nakov"><first>Preslav</first><last>Nakov</last></editor>
       <editor><first>Marcos</first><last>Zampieri</last></editor>
       <editor><first>Petya</first><last>Osenova</last></editor>
       <editor><first>Liling</first><last>Tan</last></editor>
       <editor><first>Cristina</first><last>Vertan</last></editor>
       <editor><first>Nikola</first><last>Ljubešić</last></editor>
-      <editor><first>Jörg</first><last>Tiedemann</last></editor>
+      <editor id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Hissar, Bulgaria</address>
       <month>September</month>
@@ -10597,7 +10597,7 @@
     </paper>
     <paper id="2">
       <title>Handling and Mining Linguistic Variation in <fixed-case>UGC</fixed-case></title>
-      <author><first>Leon</first><last>Derczynski</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
       <pages>10</pages>
       <url hash="449d53a7">W15-5402</url>
       <bibkey>derczynski-2015-handling</bibkey>
@@ -10614,7 +10614,7 @@
     <paper id="4">
       <title>Joint <fixed-case>B</fixed-case>ayesian Morphology Learning for <fixed-case>D</fixed-case>ravidian Languages</title>
       <author><first>Arun</first><last>Kumar</last></author>
-      <author><first>Lluís</first><last>Padró</last></author>
+      <author id="lluis-padro"><first>Lluís</first><last>Padró</last></author>
       <author><first>Antoni</first><last>Oliver</last></author>
       <pages>17–23</pages>
       <url hash="7af159e8">W15-5404</url>
@@ -10632,14 +10632,14 @@
     </paper>
     <paper id="6">
       <title><fixed-case>W</fixed-case>iki<fixed-case>T</fixed-case>rans: <fixed-case>S</fixed-case>wedish-<fixed-case>D</fixed-case>anish Machine Translation in a Constraint Grammar Framework</title>
-      <author><first>Eckhard</first><last>Bick</last></author>
+      <author id="eckhard-bick"><first>Eckhard</first><last>Bick</last></author>
       <pages>34</pages>
       <url hash="2b50b87b">W15-5406</url>
       <bibkey>bick-2015-wikitrans</bibkey>
     </paper>
     <paper id="7">
       <title>Language Identification using Classifier Ensembles</title>
-      <author><first>Shervin</first><last>Malmasi</last></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></author>
       <author><first>Mark</first><last>Dras</last></author>
       <pages>35–43</pages>
       <url hash="138ccfcd">W15-5407</url>
@@ -10649,7 +10649,7 @@
       <title>Discriminating Similar Languages with Token-Based Backoff</title>
       <author><first>Tommi</first><last>Jauhiainen</last></author>
       <author><first>Heidi</first><last>Jauhiainen</last></author>
-      <author><first>Krister</first><last>Lindén</last></author>
+      <author id="krister-linden"><first>Krister</first><last>Lindén</last></author>
       <pages>44–51</pages>
       <url hash="78575f3b">W15-5408</url>
       <bibkey>jauhiainen-etal-2015-discriminating</bibkey>
@@ -10673,9 +10673,9 @@
     <paper id="11">
       <title>Comparing Approaches to the Identification of Similar Languages</title>
       <author><first>Marcos</first><last>Zampieri</last></author>
-      <author><first>Binyam Gebrekidan</first><last>Gebre</last></author>
+      <author id="binyam-gebrekidan-gebre"><first>Binyam Gebrekidan</first><last>Gebre</last></author>
       <author><first>Hernani</first><last>Costa</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>66–72</pages>
       <url hash="4b938666">W15-5411</url>
       <bibkey>zampieri-etal-2015-comparing</bibkey>
@@ -10691,7 +10691,7 @@
     </paper>
     <paper id="13">
       <title>Experiments in Discriminating Similar Languages</title>
-      <author><first>Cyril</first><last>Goutte</last></author>
+      <author id="cyril-goutte"><first>Cyril</first><last>Goutte</last></author>
       <author><first>Serge</first><last>Léger</last></author>
       <pages>78–84</pages>
       <url hash="9f86d4ff">W15-5413</url>
@@ -10701,7 +10701,7 @@
       <title>Building Monolingual Word Alignment Corpus for the Greater <fixed-case>C</fixed-case>hina Region</title>
       <author><first>Fan</first><last>Xu</last></author>
       <author><first>Xiongfei</first><last>Xu</last></author>
-      <author><first>Mingwen</first><last>Wang</last></author>
+      <author id="mingwen-wang"><first>Mingwen</first><last>Wang</last></author>
       <author><first>Maoxi</first><last>Li</last></author>
       <pages>85–94</pages>
       <url hash="ece4b029">W15-5414</url>
@@ -10713,9 +10713,9 @@
       <booktitle>Proceedings of the Second Workshop on Natural Language Processing and Linked Open Data</booktitle>
       <url hash="5c730f47">W15-55</url>
       <editor><first>Piek</first><last>Vossen</last></editor>
-      <editor><first>German</first><last>Rigau</last></editor>
+      <editor id="german-rigau"><first>German</first><last>Rigau</last></editor>
       <editor><first>Petya</first><last>Osenova</last></editor>
-      <editor><first>Kiril</first><last>Simov</last></editor>
+      <editor id="kiril-simov"><first>Kiril</first><last>Simov</last></editor>
       <publisher>INCOMA Ltd. Shoumen, BULGARIA</publisher>
       <address>Hissar, Bulgaria</address>
       <month>September</month>
@@ -10744,8 +10744,8 @@
     <paper id="3">
       <title>Small in Size, Big in Precision: A Case for Using Language-Specific Lexical Resources for Word Sense Disambiguation</title>
       <author><first>Steven</first><last>Neale</last></author>
-      <author><first>João</first><last>Silva</last></author>
-      <author><first>António</first><last>Branco</last></author>
+      <author id="joao-silva"><first>João</first><last>Silva</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
       <pages>6–15</pages>
       <url hash="1133f6dc">W15-5503</url>
       <bibkey>neale-etal-2015-small</bibkey>
@@ -10769,14 +10769,14 @@
     <paper id="6">
       <title>Accessing Linked Open Data via A Common Ontology</title>
       <author><first>Kiril</first><last>Simov</last></author>
-      <author><first>Atanas</first><last>Kiryakov</last></author>
+      <author id="atanas-kiryakov"><first>Atanas</first><last>Kiryakov</last></author>
       <pages>33–41</pages>
       <url hash="f1ae60da">W15-5506</url>
       <bibkey>simov-kiryakov-2015-accessing</bibkey>
     </paper>
     <paper id="7">
       <title>The <fixed-case>G</fixed-case>uan<fixed-case>X</fixed-case>i network: a new multilingual <fixed-case>LLOD</fixed-case> for Language Learning applications</title>
-      <author><first>Ismail</first><last>El Maarouf</last></author>
+      <author id="ismail-el-maarouf"><first>Ismail</first><last>El Maarouf</last></author>
       <author><first>Hatem</first><last>Mousselly-Sergieh</last></author>
       <author><first>Eugene</first><last>Alferov</last></author>
       <author><first>Haofen</first><last>Wang</last></author>
@@ -10790,7 +10790,7 @@
   <volume id="56" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 10th <fixed-case>B</fixed-case>razilian Symposium in Information and Human Language Technology</booktitle>
-      <editor><first>Claudia</first><last>Freitas</last></editor>
+      <editor id="claudia-freitas"><first>Claudia</first><last>Freitas</last></editor>
       <editor><first>Alexandre</first><last>Rademaker</last></editor>
       <publisher>Sociedade Brasileira de Computação</publisher>
       <address>Natal, Brazil</address>
@@ -10825,9 +10825,9 @@
     </paper>
     <paper id="3">
       <title>Comparative Analysis between Notations to Classify Named Entities using Conditional Random Fields</title>
-      <author><first>Daniela Oliveira F.</first><last>do Amaral</last></author>
+      <author id="daniela-oliveira-f-do-amaral"><first>Daniela Oliveira F.</first><last>do Amaral</last></author>
       <author><first>Maiki</first><last>Buffet</last></author>
-      <author><first>Renata</first><last>Vieira</last></author>
+      <author id="renata-vieira"><first>Renata</first><last>Vieira</last></author>
       <pages>27-31</pages>
       <url hash="10275626">W15-5603</url>
       <bibkey>do-amaral-etal-2015-comparative</bibkey>
@@ -10852,7 +10852,7 @@
     <paper id="6">
       <title>Análise Automática de Coerência Textual em Resumos Científicos: Avaliando Quebras de Linearidade (Automatic Analysis of Textual Coherence in Scientific Abstracts: Evaluating Linearity Breaks)</title>
       <author><first>Leandro Lago</first><last>da Silva</last></author>
-      <author><first>Valéria Delisandra</first><last>Feltrim</last></author>
+      <author id="valeria-delisandra-feltrim"><first>Valéria Delisandra</first><last>Feltrim</last></author>
       <pages>45-49</pages>
       <url hash="ebcea40b">W15-5606</url>
       <bibkey>da-silva-feltrim-2015-analise</bibkey>
@@ -10870,8 +10870,8 @@
       <title>Integrating support verb constructions into a parser</title>
       <author><first>Amanda</first><last>Rassi</last></author>
       <author><first>Jorge</first><last>Baptista</last></author>
-      <author><first>Nuno</first><last>Mamede</last></author>
-      <author><first>Oto</first><last>Vale</last></author>
+      <author id="nuno-mamede"><first>Nuno</first><last>Mamede</last></author>
+      <author id="oto-vale"><first>Oto</first><last>Vale</last></author>
       <pages>57-61</pages>
       <url hash="d20f7bf1">W15-5608</url>
       <bibkey>rassi-etal-2015-integrating</bibkey>
@@ -10879,7 +10879,7 @@
     <paper id="9">
       <title>Extração de Alvos em Comentários de Notícias em Português baseada na Teoria da Centralização (Target Extraction in News Reviews in <fixed-case>P</fixed-case>ortuguese based on Centering Theory)</title>
       <author><first>Frank Willian Cardoso</first><last>de Oliveira</last></author>
-      <author><first>Valéria Delisandra</first><last>Feltrim</last></author>
+      <author id="valeria-delisandra-feltrim"><first>Valéria Delisandra</first><last>Feltrim</last></author>
       <pages>63-67</pages>
       <url hash="5c45c1f5">W15-5609</url>
       <bibkey>de-oliveira-feltrim-2015-extracao</bibkey>
@@ -10891,7 +10891,7 @@
       <author><first>Marcel</first><last>Serikawa</last></author>
       <author><first>Matheus Antonio Ribeiro</first><last>Silva</last></author>
       <author><first>Régis</first><last>Zangirolami</last></author>
-      <author><first>Sandra Maria</first><last>Aluísio</last></author>
+      <author id="sandra-aluisio"><first>Sandra Maria</first><last>Aluísio</last></author>
       <pages>69-73</pages>
       <url hash="43ce8aa4">W15-5610</url>
       <bibkey>candido-junior-etal-2015-portal</bibkey>
@@ -10899,14 +10899,14 @@
     <paper id="11">
       <title><fixed-case>P</fixed-case>rep<fixed-case>N</fixed-case>et.<fixed-case>B</fixed-case>r: a Semantic Network for Prepositions</title>
       <author><first>Débora D.</first><last>Garcia</last></author>
-      <author><first>Bento Carlos Dias</first><last>da Silva</last></author>
+      <author id="bento-carlos-dias-da-silva"><first>Bento Carlos Dias</first><last>da Silva</last></author>
       <pages>75-79</pages>
       <url hash="ce46319b">W15-5611</url>
       <bibkey>garcia-da-silva-2015-prepnet</bibkey>
     </paper>
     <paper id="12">
       <title>Joint semantic discourse models for automatic multi-document summarization</title>
-      <author><first>Paula C. Figueira</first><last>Cardoso</last></author>
+      <author id="paula-cardoso"><first>Paula C. Figueira</first><last>Cardoso</last></author>
       <author><first>Thiago A. S.</first><last>Pardo</last></author>
       <pages>81-90</pages>
       <url hash="0014e22d">W15-5612</url>
@@ -10915,7 +10915,7 @@
     <paper id="13">
       <title>Building and Applying Profiles Through Term Extraction</title>
       <author><first>Lucelene</first><last>Lopes</last></author>
-      <author><first>Renata</first><last>Vieira</last></author>
+      <author id="renata-vieira"><first>Renata</first><last>Vieira</last></author>
       <pages>91-100</pages>
       <url hash="885ed527">W15-5613</url>
       <bibkey>lopes-vieira-2015-building</bibkey>
@@ -10923,7 +10923,7 @@
     <paper id="14">
       <title>An Annotated Corpus for Sentiment Analysis in Political News</title>
       <author><first>Gabriel Domingos</first><last>de Arruda</last></author>
-      <author><first>Norton Trevisan</first><last>Roman</last></author>
+      <author id="norton-trevisan-roman"><first>Norton Trevisan</first><last>Roman</last></author>
       <author><first>Ana Maria</first><last>Monteiro</last></author>
       <pages>101-110</pages>
       <url hash="1ebd21e7">W15-5614</url>
@@ -10932,7 +10932,7 @@
     <paper id="15">
       <title>Campos Aleatórios Condicionais Aplicados à Detecção de Estrutura Retórica em Resumos de Textos Acadêmicos em Português (Conditional Random Fields Applied to Rhetorical Structure Detection in Academic Abstracts in <fixed-case>P</fixed-case>ortuguese)</title>
       <author><first>Alexandre C.</first><last>Andreani</last></author>
-      <author><first>Valéria D.</first><last>Feltrim</last></author>
+      <author id="valeria-delisandra-feltrim"><first>Valéria D.</first><last>Feltrim</last></author>
       <pages>111-120</pages>
       <url hash="fe91d4f4">W15-5615</url>
       <bibkey>andreani-feltrim-2015-campos</bibkey>
@@ -10941,7 +10941,7 @@
       <title>Anotando um Corpus de Notícias para a Análise de Sentimentos: um Relato de Experiência (Annotating a corpus of News for Sentiment Analysis: An Experience Report)</title>
       <author><first>Mariza Miola</first><last>Dosciatti</last></author>
       <author><first>Lohann Paterno Coutinho</first><last>Ferreira</last></author>
-      <author><first>Emerson Cabrera</first><last>Paraiso</last></author>
+      <author id="emerson-cabrera-paraiso"><first>Emerson Cabrera</first><last>Paraiso</last></author>
       <pages>121-130</pages>
       <url hash="eac90f3e">W15-5616</url>
       <bibkey>dosciatti-etal-2015-anotando</bibkey>
@@ -10960,7 +10960,7 @@
     <paper id="18">
       <title>On Strategies of Human Multi-Document Summarization</title>
       <author><first>Renata Tironi</first><last>de Camargo</last></author>
-      <author><first>Ariani</first><last>Di Felippo</last></author>
+      <author id="ariani-di-felippo"><first>Ariani</first><last>Di Felippo</last></author>
       <author><first>Thiago A. S.</first><last>Pardo</last></author>
       <pages>141-150</pages>
       <url hash="c521df1a">W15-5618</url>
@@ -10977,7 +10977,7 @@
     <paper id="20">
       <title><fixed-case>V</fixed-case>erb<fixed-case>L</fixed-case>ex<fixed-case>P</fixed-case>or: um recurso léxico com anotação de papéis semânticos para o português (<fixed-case>V</fixed-case>erb<fixed-case>L</fixed-case>ex<fixed-case>P</fixed-case>or: a lexical resource annotated with semantic roles for <fixed-case>P</fixed-case>ortuguese)</title>
       <author><first>Leonardo</first><last>Zilio</last></author>
-      <author><first>Maria José Bocorny</first><last>Finatto</last></author>
+      <author id="maria-jose-b-finatto"><first>Maria José Bocorny</first><last>Finatto</last></author>
       <author><first>Aline</first><last>Villavicencio</last></author>
       <pages>161-170</pages>
       <url hash="b5b0b3d9">W15-5620</url>
@@ -10985,7 +10985,7 @@
     </paper>
     <paper id="21">
       <title>Novo dicionário de formas flexionadas do Unitex-<fixed-case>PB</fixed-case>: avaliação da flexão verbal (New Dictionary of Inflected forms of <fixed-case>UNITEX</fixed-case>-<fixed-case>PB</fixed-case>: Evaluation of Verbal Inflection)</title>
-      <author><first>Oto A.</first><last>Vale</last></author>
+      <author id="oto-vale"><first>Oto A.</first><last>Vale</last></author>
       <author><first>Jorge</first><last>Baptista</last></author>
       <pages>171-180</pages>
       <url hash="b61e3d90">W15-5621</url>
@@ -11009,8 +11009,8 @@
     </paper>
     <paper id="24">
       <title>Semi-Automatic Construction of a Textual Entailment Dataset: Selecting Candidates with Vector Space Models</title>
-      <author><first>Erick R.</first><last>Fonseca</last></author>
-      <author><first>Sandra Maria</first><last>Aluísio</last></author>
+      <author id="erick-fonseca"><first>Erick R.</first><last>Fonseca</last></author>
+      <author id="sandra-aluisio"><first>Sandra Maria</first><last>Aluísio</last></author>
       <pages>201-210</pages>
       <url hash="d2669740">W15-5624</url>
       <bibkey>fonseca-aluisio-2015-semi</bibkey>
@@ -11029,8 +11029,8 @@
     <meta>
       <booktitle>Proceedings of the 1st Deep Machine Translation Workshop</booktitle>
       <url hash="3c2bdeea">W15-57</url>
-      <editor><first>Jan</first><last>Hajič</last></editor>
-      <editor><first>António</first><last>Branco</last></editor>
+      <editor id="jan-hajic"><first>Jan</first><last>Hajič</last></editor>
+      <editor id="antonio-branco"><first>António</first><last>Branco</last></editor>
       <publisher>ÚFAL MFF UK</publisher>
       <address>Praha, Czechia</address>
       <year>2015</year>
@@ -11043,7 +11043,7 @@
     <paper id="1">
       <title>Modelling the Adjunct/Argument Distinction in Hierarchical Phrase-Based <fixed-case>SMT</fixed-case></title>
       <author><first>Sophie</first><last>Arnoult</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <pages>2–11</pages>
       <url hash="603d70a0">W15-5701</url>
       <bibkey>arnoult-simaan-2015-modelling</bibkey>
@@ -11052,7 +11052,7 @@
       <title>Towards Deeper <fixed-case>MT</fixed-case> - A Hybrid System for <fixed-case>G</fixed-case>erman</title>
       <author><first>Eleftherios</first><last>Avramidis</last></author>
       <author><first>Aljoscha</first><last>Burchardt</last></author>
-      <author><first>Maja</first><last>Popović</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
       <author><first>Hans</first><last>Uszkoreit</last></author>
       <pages>12–19</pages>
       <url hash="83b23a77">W15-5702</url>
@@ -11071,14 +11071,14 @@
     <paper id="4">
       <title>Delimiting Morphosyntactic Search Space with Source-Side Reordering Models</title>
       <author><first>Joachim</first><last>Daiber</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <pages>29–38</pages>
       <url hash="7940ab0b">W15-5704</url>
       <bibkey>daiber-simaan-2015-delimiting</bibkey>
     </paper>
     <paper id="5">
       <title>Evaluating a Machine Translation System in a Technical Support Scenario</title>
-      <author><first>Rosa</first><last>Del Gaudio</last></author>
+      <author id="rosa-del-gaudio"><first>Rosa</first><last>Del Gaudio</last></author>
       <author><first>Aljoscha</first><last>Burchardt</last></author>
       <author><first>Arle</first><last>Lommel</last></author>
       <pages>39–47</pages>
@@ -11094,12 +11094,12 @@
     </paper>
     <paper id="7">
       <title>Deep-syntax <fixed-case>T</fixed-case>ecto<fixed-case>MT</fixed-case> for <fixed-case>E</fixed-case>nglish-<fixed-case>S</fixed-case>panish <fixed-case>MT</fixed-case></title>
-      <author><first>Gorka</first><last>Labaka</last></author>
+      <author id="gorka-labaka"><first>Gorka</first><last>Labaka</last></author>
       <author><first>Oneka</first><last>Jauregi</last></author>
-      <author><first>Arantza</first><last>Díaz de Ilarraza</last></author>
+      <author id="arantza-diaz-de-ilarraza"><first>Arantza</first><last>Díaz de Ilarraza</last></author>
       <author><first>Michael</first><last>Ustaszewski</last></author>
       <author><first>Nora</first><last>Aranberri</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <pages>55–63</pages>
       <url hash="362d9e43">W15-5707</url>
       <bibkey>labaka-etal-2015-deep</bibkey>
@@ -11116,7 +11116,7 @@
     <paper id="9">
       <title>Lexical choice in Abstract Dependency Trees</title>
       <author><first>Dieke</first><last>Oele</last></author>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <pages>73–80</pages>
       <url hash="26a626f4">W15-5709</url>
       <bibkey>oele-van-noord-2015-lexical</bibkey>
@@ -11141,7 +11141,7 @@
     </paper>
     <paper id="12">
       <title>Factored models for Deep Machine Translation</title>
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <author><first>Iliana</first><last>Simova</last></author>
       <author><first>Velislava</first><last>Todorova</last></author>
       <author><first>Petya</first><last>Osenova</last></author>
@@ -11151,8 +11151,8 @@
     </paper>
     <paper id="13">
       <title>Machine Translation for Multilingual Troubleshooting in the <fixed-case>IT</fixed-case> Domain: A Comparison of Different Strategies</title>
-      <author><first>Sanja</first><last>Štajner</last></author>
-      <author><first>João</first><last>Rodrigues</last></author>
+      <author id="sanja-stajner"><first>Sanja</first><last>Štajner</last></author>
+      <author id="joao-rodrigues"><first>João</first><last>Rodrigues</last></author>
       <author><first>Luís</first><last>Gomes</last></author>
       <author><first>António</first><last>Branco</last></author>
       <pages>106–115</pages>
@@ -11164,7 +11164,7 @@
     <meta>
       <booktitle>Proceedings of the 12th International Conference on Natural Language Processing</booktitle>
       <url hash="2b99f33d">W15-59</url>
-      <editor><first>Dipti Misra</first><last>Sharma</last></editor>
+      <editor id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></editor>
       <editor><first>Rajeev</first><last>Sangal</last></editor>
       <editor><first>Elizabeth</first><last>Sherly</last></editor>
       <publisher>NLP Association of India</publisher>
@@ -11179,7 +11179,7 @@
     </frontmatter>
     <paper id="1">
       <title>Keynote Lecture 1: Scientific Paper Analysis</title>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>1</pages>
       <url hash="b11e8311">W15-5901</url>
       <bibkey>matsumoto-2015-keynote</bibkey>
@@ -11187,7 +11187,7 @@
     <paper id="2">
       <title>Addressing Class Imbalance in Grammatical Error Detection with Evaluation Metric Optimization</title>
       <author><first>Anoop</first><last>Kunchukuttan</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>2–10</pages>
       <url hash="77bffe5e">W15-5902</url>
       <bibkey>kunchukuttan-bhattacharyya-2015-addressing</bibkey>
@@ -11213,8 +11213,8 @@
       <title>Noun Phrase Chunking for <fixed-case>M</fixed-case>arathi using Distant Supervision</title>
       <author><first>Sachin</first><last>Pawar</last></author>
       <author><first>Nitin</first><last>Ramrakhiyani</last></author>
-      <author><first>Girish K.</first><last>Palshikar</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="girish-palshikar"><first>Girish K.</first><last>Palshikar</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <author><first>Swapnil</first><last>Hingmire</last></author>
       <pages>29–38</pages>
       <url hash="67cb8100">W15-5905</url>
@@ -11223,7 +11223,7 @@
     <paper id="6">
       <title>Self-Organizing Maps for Classification of a Multi-Labeled Corpus</title>
       <author><first>Lars</first><last>Bungum</last></author>
-      <author><first>Björn</first><last>Gambäck</last></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gambäck</last></author>
       <pages>39–48</pages>
       <url hash="8fa3ee19">W15-5906</url>
       <bibkey>bungum-gamback-2015-self</bibkey>
@@ -11242,7 +11242,7 @@
       <author><first>Sudha</first><last>Bhingardive</last></author>
       <author><first>Dhirendra</first><last>Singh</last></author>
       <author><first>Rudramurthy</first><last>V</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>59–64</pages>
       <url hash="d56cc653">W15-5908</url>
       <bibkey>bhingardive-etal-2015-using</bibkey>
@@ -11262,7 +11262,7 @@
       <author><first>Sandhya</first><last>Singh</last></author>
       <author><first>Nilesh</first><last>Joshi</last></author>
       <author><first>Anupam</first><last>Ghosh</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>71–78</pages>
       <url hash="079791f0">W15-5910</url>
       <bibkey>redkar-etal-2015-indowordnet</bibkey>
@@ -11270,17 +11270,17 @@
     <paper id="11">
       <title>Let Sense Bags Do Talking: Cross Lingual Word Semantic Similarity for <fixed-case>E</fixed-case>nglish and <fixed-case>H</fixed-case>indi</title>
       <author><first>Apurva</first><last>Nagvenkar</last></author>
-      <author><first>Jyoti</first><last>Pawar</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="jyoti-pawar"><first>Jyoti</first><last>Pawar</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>79–83</pages>
       <url hash="a4e262d7">W15-5911</url>
       <bibkey>nagvenkar-etal-2015-sense</bibkey>
     </paper>
     <paper id="12">
       <title>A temporal expression recognition system for medical documents by</title>
-      <author><first>Naman</first><last>Gupta</last></author>
+      <author id="naman-k-gupta"><first>Naman</first><last>Gupta</last></author>
       <author><first>Aditya</first><last>Joshi</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>84–88</pages>
       <url hash="90e5175b">W15-5912</url>
       <bibkey>gupta-etal-2015-temporal</bibkey>
@@ -11288,7 +11288,7 @@
     <paper id="13">
       <title>An unsupervised <fixed-case>EM</fixed-case> method to infer time variation in sense probabilities</title>
       <author><first>Martin</first><last>Emms</last></author>
-      <author><first>Arun</first><last>Jayapal</last></author>
+      <author id="arun-kumar-jayapal"><first>Arun</first><last>Jayapal</last></author>
       <pages>89–94</pages>
       <url hash="f6e02b01">W15-5913</url>
       <bibkey>emms-jayapal-2015-unsupervised</bibkey>
@@ -11297,7 +11297,7 @@
       <title>Solving Data Sparsity by Morphology Injection in Factored <fixed-case>SMT</fixed-case></title>
       <author><first>Sreelekha</first><last>S</last></author>
       <author><first>Piyush</first><last>Dungarwal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <author><first>Malathi</first><last>D</last></author>
       <pages>95–99</pages>
       <url hash="466b552f">W15-5914</url>
@@ -11317,8 +11317,8 @@
       <author><first>Diptesh</first><last>Kanojia</last></author>
       <author><first>Shehzaad</first><last>Dhuliawala</last></author>
       <author><first>Abhijit</first><last>Mishra</last></author>
-      <author><first>Naman</first><last>Gupta</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="naman-k-gupta"><first>Naman</first><last>Gupta</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>106–111</pages>
       <url hash="4a5b25fc">W15-5916</url>
       <bibkey>kanojia-etal-2015-transchat</bibkey>
@@ -11327,7 +11327,7 @@
       <title>A Database of Infant Cry Sounds to Study the Likely Cause of Cry</title>
       <author><first>Shivam</first><last>Sharma</last></author>
       <author><first>Shubham</first><last>Asthana</last></author>
-      <author><first>V. K.</first><last>Mittal</last></author>
+      <author id="v-k-mittal"><first>V. K.</first><last>Mittal</last></author>
       <pages>112–117</pages>
       <url hash="17d5b032">W15-5917</url>
       <bibkey>sharma-etal-2015-database</bibkey>
@@ -11341,7 +11341,7 @@
     </paper>
     <paper id="19">
       <title>An Empirical Study of Diversity of Word Alignment and its Symmetrization Techniques for System Combination</title>
-      <author><first>Thoudam Doren</first><last>Singh</last></author>
+      <author id="thoudam-doren-singh"><first>Thoudam Doren</first><last>Singh</last></author>
       <pages>124–129</pages>
       <url hash="6cb80ae8">W15-5919</url>
       <bibkey>singh-2015-empirical</bibkey>
@@ -11349,7 +11349,7 @@
     <paper id="20">
       <title>Domain Sentiment Matters: A Two Stage Sentiment Analyzer</title>
       <author><first>Raksha</first><last>Sharma</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>130–137</pages>
       <url hash="5dc02699">W15-5920</url>
       <bibkey>sharma-bhattacharyya-2015-domain</bibkey>
@@ -11387,9 +11387,9 @@
     <paper id="25">
       <title>Judge a Book by its Cover: Conservative Focused Crawling under Resource Constraints</title>
       <author><first>Shehzaad</first><last>Dhuliawala</last></author>
-      <author><first>Arjun Atreya</first><last>V</last></author>
+      <author id="arjun-atreya-v"><first>Arjun Atreya</first><last>V</last></author>
       <author><first>Ravi Kumar</first><last>Yadav</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>166–171</pages>
       <url hash="5bdc8596">W15-5925</url>
       <bibkey>dhuliawala-etal-2015-judge</bibkey>
@@ -11423,7 +11423,7 @@
     </paper>
     <paper id="29">
       <title>A Study on Divergence in <fixed-case>M</fixed-case>alayalam and <fixed-case>T</fixed-case>amil Language in Machine Translation Perceptive</title>
-      <author><first>Jisha P</first><last>Jayan</last></author>
+      <author id="jisha-p-jayan"><first>Jisha P</first><last>Jayan</last></author>
       <author><first>Elizabeth</first><last>Sherly</last></author>
       <pages>189–196</pages>
       <url hash="244b4e26">W15-5929</url>
@@ -11440,8 +11440,8 @@
     <paper id="31">
       <title>Logistic Regression for Automatic Lexical Level Morphological Paradigm Selection for <fixed-case>K</fixed-case>onkani Nouns</title>
       <author><first>Shilpa</first><last>Desai</last></author>
-      <author><first>Jyoti</first><last>Pawar</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="jyoti-pawar"><first>Jyoti</first><last>Pawar</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>203–208</pages>
       <url hash="bbfa4910">W15-5931</url>
       <bibkey>desai-etal-2015-logistic</bibkey>
@@ -11496,8 +11496,8 @@
       <title>Automated Analysis of <fixed-case>B</fixed-case>angla Poetry for Classification and Poet Identification</title>
       <author><first>Geetanjali</first><last>Rakshit</last></author>
       <author><first>Anupam</first><last>Ghosh</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>247–253</pages>
       <url hash="690244b3">W15-5937</url>
       <bibkey>rakshit-etal-2015-automated</bibkey>
@@ -11508,7 +11508,7 @@
       <author><first>Anupam</first><last>Jamatia</last></author>
       <author><first>Kunal</first><last>Chakma</last></author>
       <author><first>Amitava</first><last>Das</last></author>
-      <author><first>Björn</first><last>Gambäck</last></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gambäck</last></author>
       <pages>254–260</pages>
       <url hash="690383b7">W15-5938</url>
       <bibkey>rudrapal-etal-2015-sentence</bibkey>
@@ -11517,7 +11517,7 @@
       <title>Mood Classification of <fixed-case>H</fixed-case>indi Songs based on Lyrics</title>
       <author><first>Braja Gopal</first><last>Patra</last></author>
       <author><first>Dipankar</first><last>Das</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>261–267</pages>
       <url hash="bb1a91a1">W15-5939</url>
       <bibkey>patra-etal-2015-mood</bibkey>
@@ -11540,9 +11540,9 @@
     </paper>
     <paper id="42">
       <title>Simultaneous Feature Selection and Parameter Optimization Using Multi-objective Optimization for Sentiment Analysis</title>
-      <author><first>Mohammed Arif</first><last>Khan</last></author>
+      <author id="mohammed-arif-khan"><first>Mohammed Arif</first><last>Khan</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Eneldo Loza</first><last>Mencía</last></author>
+      <author id="eneldo-loza-mencia"><first>Eneldo Loza</first><last>Mencía</last></author>
       <pages>285–294</pages>
       <url hash="fdbfb17b">W15-5942</url>
       <bibkey>khan-etal-2015-simultaneous</bibkey>
@@ -11552,7 +11552,7 @@
       <author><first>Dhirendra</first><last>Singh</last></author>
       <author><first>Sudha</first><last>Bhingardive</last></author>
       <author><first>Kevin</first><last>Patel</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>295–302</pages>
       <url hash="ea1800f5">W15-5943</url>
       <bibkey>singh-etal-2015-detection</bibkey>
@@ -11561,7 +11561,7 @@
       <title>Augmenting Pivot based <fixed-case>SMT</fixed-case> with word segmentation</title>
       <author><first>Rohit</first><last>More</last></author>
       <author><first>Anoop</first><last>Kunchukuttan</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <author><first>Raj</first><last>Dabre</last></author>
       <pages>303–307</pages>
       <url hash="f9fca405">W15-5944</url>
@@ -11571,8 +11571,8 @@
       <title>Using Multilingual Topic Models for Improved Alignment in <fixed-case>E</fixed-case>nglish-<fixed-case>H</fixed-case>indi <fixed-case>MT</fixed-case></title>
       <author><first>Diptesh</first><last>Kanojia</last></author>
       <author><first>Aditya</first><last>Joshi</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
-      <author><first>Mark James</first><last>Carman</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="mark-carman"><first>Mark James</first><last>Carman</last></author>
       <pages>308–315</pages>
       <url hash="68e8a2ef">W15-5945</url>
       <bibkey>kanojia-etal-2015-using</bibkey>
@@ -11581,22 +11581,22 @@
       <title>Triangulation of Reordering Tables: An Advancement Over Phrase Table Triangulation in Pivot-Based <fixed-case>SMT</fixed-case></title>
       <author><first>Deepak</first><last>Patil</last></author>
       <author><first>Harshad</first><last>Chavan</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>316–324</pages>
       <url hash="5a3c388c">W15-5946</url>
       <bibkey>patil-etal-2015-triangulation</bibkey>
     </paper>
     <paper id="47">
       <title>Post-editing a chapter of a specialized textbook into 7 languages: importance of terminological proximity with <fixed-case>E</fixed-case>nglish for productivity</title>
-      <author><first>Ritesh</first><last>Shah</last></author>
-      <author><first>Christian</first><last>Boitet</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="ritesh-shah"><first>Ritesh</first><last>Shah</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <author><first>Mithun</first><last>Padmakumar</last></author>
       <author><first>Leonardo</first><last>Zilio</last></author>
       <author><first>Ruslan</first><last>Kalitvianski</last></author>
       <author><first>Mohammad</first><last>Nasiruddin</last></author>
       <author><first>Mutsuko</first><last>Tomokiyo</last></author>
-      <author><first>Sandra Castellanos</first><last>Páez</last></author>
+      <author id="sandra-milena-castellanos-paez"><first>Sandra Castellanos</first><last>Páez</last></author>
       <pages>325–332</pages>
       <url hash="1d078825">W15-5947</url>
       <bibkey>shah-etal-2015-post</bibkey>
@@ -11605,7 +11605,7 @@
       <title>Generating Translation Corpora in <fixed-case>I</fixed-case>ndic Languages: Cultivating Bilingual Texts for Cross Lingual Fertilization</title>
       <author><first>Niladri Sekhar</first><last>Dash</last></author>
       <author><first>Arulmozi</first><last>Selvraj</last></author>
-      <author><first>Mazhar</first><last>Hussain</last></author>
+      <author id="mazhar-mehdi-hussain"><first>Mazhar</first><last>Hussain</last></author>
       <pages>333–342</pages>
       <url hash="8ead7e6b">W15-5948</url>
       <bibkey>dash-etal-2015-generating</bibkey>
@@ -11613,7 +11613,7 @@
     <paper id="49">
       <title>Translation Quality and Effort: Options versus Post-editing</title>
       <author><first>Donald</first><last>Sturgeon</last></author>
-      <author><first>John S. Y.</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John S. Y.</first><last>Lee</last></author>
       <pages>343–350</pages>
       <url hash="3900c857">W15-5949</url>
       <bibkey>sturgeon-lee-2015-translation</bibkey>
@@ -11622,15 +11622,15 @@
       <title>Investigating the potential of post-ordering <fixed-case>SMT</fixed-case> output to improve translation quality</title>
       <author><first>Pratik</first><last>Mehta</last></author>
       <author><first>Anoop</first><last>Kunchukuttan</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>351–356</pages>
       <url hash="3d951fed">W15-5950</url>
       <bibkey>mehta-etal-2015-investigating</bibkey>
     </paper>
     <paper id="51">
       <title>Applying <fixed-case>S</fixed-case>anskrit Concepts for Reordering in <fixed-case>MT</fixed-case></title>
-      <author><first>Akshar</first><last>Bharati</last></author>
-      <author><first/><last>Sukhada</last></author>
+      <author id="akshar-bharati"><first>Akshar</first><last>Bharati</last></author>
+      <author id="sukhada"><first/><last>Sukhada</last></author>
       <author><first>Prajna</first><last>Jha</last></author>
       <author><first>Soma</first><last>Paul</last></author>
       <author><first>Dipti M</first><last>Sharma</last></author>
@@ -11667,7 +11667,7 @@
     </paper>
     <paper id="55">
       <title>Natural Language Processing for Solving Simple Word Problems</title>
-      <author><first>Sowmya S</first><last>Sundaram</last></author>
+      <author id="sowmya-s-sundaram"><first>Sowmya S</first><last>Sundaram</last></author>
       <author><first>Deepak</first><last>Khemani</last></author>
       <pages>394–402</pages>
       <url hash="7b98380b">W15-5955</url>
diff --git a/data/xml/W16.xml b/data/xml/W16.xml
index 41fd606e0d..48c25fe10b 100644
--- a/data/xml/W16.xml
+++ b/data/xml/W16.xml
@@ -6,7 +6,7 @@
       <editor><first>Mohit</first><last>Iyyer</last></editor>
       <editor><first>He</first><last>He</last></editor>
       <editor><first>Jordan</first><last>Boyd-Graber</last></editor>
-      <editor><first>Hal</first><last>Daumé III</last></editor>
+      <editor id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></editor>
       <doi>10.18653/v1/W16-01</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>San Diego, California</address>
@@ -21,7 +21,7 @@
     <paper id="1">
       <title><fixed-case>W</fixed-case>atson Discovery Advisor: Question-answering in an industrial setting</title>
       <author><first>Charley</first><last>Beller</last></author>
-      <author><first>Graham</first><last>Katz</last></author>
+      <author id="graham-katz"><first>Graham</first><last>Katz</last></author>
       <author><first>Allen</first><last>Ginsberg</last></author>
       <author><first>Chris</first><last>Phipps</last></author>
       <author><first>Sean</first><last>Bethard</last></author>
@@ -47,7 +47,7 @@
       <title>Attention-Based Convolutional Neural Network for Machine Comprehension</title>
       <author><first>Wenpeng</first><last>Yin</last></author>
       <author><first>Sebastian</first><last>Ebert</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>15–21</pages>
       <url hash="1d025637">W16-0103</url>
       <doi>10.18653/v1/W16-0103</doi>
@@ -56,7 +56,7 @@
     <paper id="4">
       <title>Open-domain Factoid Question Answering via Knowledge Graph Search</title>
       <author><first>Ahmad</first><last>Aghaebrahimian</last></author>
-      <author><first>Filip</first><last>Jurčíček</last></author>
+      <author id="filip-jurcicek"><first>Filip</first><last>Jurčíček</last></author>
       <pages>22–28</pages>
       <url hash="15d24a1c">W16-0104</url>
       <doi>10.18653/v1/W16-0104</doi>
@@ -122,7 +122,7 @@
       <booktitle>Proceedings of the Fifth Workshop on Computational Linguistics for Literature</booktitle>
       <editor><first>Anna</first><last>Feldman</last></editor>
       <editor><first>Anna</first><last>Kazantseva</last></editor>
-      <editor><first>Stan</first><last>Szpakowicz</last></editor>
+      <editor id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></editor>
       <doi>10.18653/v1/W16-02</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>San Diego, California, USA</address>
@@ -158,7 +158,7 @@
       <author><first>Andrea</first><last>Gagliano</last></author>
       <author><first>Emily</first><last>Paul</last></author>
       <author><first>Kyle</first><last>Booten</last></author>
-      <author><first>Marti A.</first><last>Hearst</last></author>
+      <author id="marti-a-hearst"><first>Marti A.</first><last>Hearst</last></author>
       <pages>20–31</pages>
       <url hash="412f8058">W16-0203</url>
       <doi>10.18653/v1/W16-0203</doi>
@@ -197,7 +197,7 @@
     <paper id="7">
       <title>Bilingual Chronological Classification of Hafez’s Poems</title>
       <author><first>Arya</first><last>Rahgozar</last></author>
-      <author><first>Diana</first><last>Inkpen</last></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last></author>
       <pages>54–62</pages>
       <url hash="83921997">W16-0207</url>
       <doi>10.18653/v1/W16-0207</doi>
@@ -208,7 +208,7 @@
     <meta>
       <booktitle>Proceedings of the Third Workshop on Computational Linguistics and Clinical Psychology</booktitle>
       <editor><first>Kristy</first><last>Hollingshead</last></editor>
-      <editor><first>Lyle</first><last>Ungar</last></editor>
+      <editor id="lyle-ungar"><first>Lyle</first><last>Ungar</last></editor>
       <doi>10.18653/v1/W16-03</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>San Diego, CA, USA</address>
@@ -222,7 +222,7 @@
     </frontmatter>
     <paper id="1">
       <title>Detecting late-life depression in <fixed-case>A</fixed-case>lzheimer’s disease through analysis of speech and language</title>
-      <author><first>Kathleen C.</first><last>Fraser</last></author>
+      <author id="kathleen-c-fraser"><first>Kathleen C.</first><last>Fraser</last></author>
       <author><first>Frank</first><last>Rudzicz</last></author>
       <author><first>Graeme</first><last>Hirst</last></author>
       <pages>1–11</pages>
@@ -233,10 +233,10 @@
     <paper id="2">
       <title>Towards Early Dementia Detection: Fusing Linguistic and Non-Linguistic Clinical Data</title>
       <author><first>Joseph</first><last>Bullard</last></author>
-      <author><first>Cecilia</first><last>Ovesdotter Alm</last></author>
+      <author id="cecilia-ovesdotter-alm"><first>Cecilia</first><last>Ovesdotter Alm</last></author>
       <author><first>Xumin</first><last>Liu</last></author>
       <author><first>Qi</first><last>Yu</last></author>
-      <author><first>Rubén</first><last>Proaño</last></author>
+      <author id="ruben-a-proano"><first>Rubén</first><last>Proaño</last></author>
       <pages>12–22</pages>
       <url hash="70cd17ba">W16-0302</url>
       <doi>10.18653/v1/W16-0302</doi>
@@ -271,7 +271,7 @@
     <paper id="5">
       <title>Building a Motivational Interviewing Dataset</title>
       <author><first>Verónica</first><last>Pérez-Rosas</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <author><first>Kenneth</first><last>Resnicow</last></author>
       <author><first>Satinder</first><last>Singh</last></author>
       <author><first>Lawrence</first><last>An</last></author>
@@ -306,12 +306,12 @@
     <paper id="8">
       <title>Exploring Autism Spectrum Disorders Using <fixed-case>HLT</fixed-case></title>
       <author><first>Julia</first><last>Parish-Morris</last></author>
-      <author><first>Mark</first><last>Liberman</last></author>
+      <author id="mark-liberman"><first>Mark</first><last>Liberman</last></author>
       <author><first>Neville</first><last>Ryant</last></author>
-      <author><first>Christopher</first><last>Cieri</last></author>
+      <author id="christopher-cieri"><first>Christopher</first><last>Cieri</last></author>
       <author><first>Leila</first><last>Bateman</last></author>
       <author><first>Emily</first><last>Ferguson</last></author>
-      <author><first>Robert</first><last>Schultz</last></author>
+      <author id="robert-t-schultz"><first>Robert</first><last>Schultz</last></author>
       <pages>74–84</pages>
       <url hash="d458602d">W16-0308</url>
       <doi>10.18653/v1/W16-0308</doi>
@@ -322,10 +322,10 @@
       <author><first>Mayuresh</first><last>Oak</last></author>
       <author><first>Anil</first><last>Behera</last></author>
       <author><first>Titus</first><last>Thomas</last></author>
-      <author><first>Cecilia</first><last>Ovesdotter Alm</last></author>
-      <author><first>Emily</first><last>Prud’hommeaux</last></author>
+      <author id="cecilia-ovesdotter-alm"><first>Cecilia</first><last>Ovesdotter Alm</last></author>
+      <author id="emily-prudhommeaux"><first>Emily</first><last>Prud’hommeaux</last></author>
       <author><first>Christopher</first><last>Homan</last></author>
-      <author><first>Raymond</first><last>Ptucha</last></author>
+      <author id="raymond-ptucha"><first>Raymond</first><last>Ptucha</last></author>
       <pages>85–94</pages>
       <url hash="a6afaac8">W16-0309</url>
       <doi>10.18653/v1/W16-0309</doi>
@@ -357,7 +357,7 @@
     </paper>
     <paper id="12">
       <title><fixed-case>CLP</fixed-case>sych 2016 Shared Task: Triaging content in online peer-support forums</title>
-      <author><first>David N.</first><last>Milne</last></author>
+      <author id="david-n-milne"><first>David N.</first><last>Milne</last></author>
       <author><first>Glen</first><last>Pink</last></author>
       <author><first>Ben</first><last>Hachey</last></author>
       <author><first>Rafael A.</first><last>Calvo</last></author>
@@ -368,10 +368,10 @@
     </paper>
     <paper id="13">
       <title><fixed-case>D</fixed-case>ata61-<fixed-case>CSIRO</fixed-case> systems at the <fixed-case>CLP</fixed-case>sych 2016 Shared Task</title>
-      <author><first>Sunghwan Mac</first><last>Kim</last></author>
+      <author id="sunghwan-mac-kim"><first>Sunghwan Mac</first><last>Kim</last></author>
       <author><first>Yufei</first><last>Wang</last></author>
       <author><first>Stephen</first><last>Wan</last></author>
-      <author><first>Cécile</first><last>Paris</last></author>
+      <author id="cecile-paris"><first>Cécile</first><last>Paris</last></author>
       <pages>128–132</pages>
       <url hash="91194a9e">W16-0313</url>
       <doi>10.18653/v1/W16-0313</doi>
@@ -379,7 +379,7 @@
     </paper>
     <paper id="14">
       <title>Predicting Post Severity in Mental Health Forums</title>
-      <author><first>Shervin</first><last>Malmasi</last></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></author>
       <author><first>Marcos</first><last>Zampieri</last></author>
       <author><first>Mark</first><last>Dras</last></author>
       <pages>133–137</pages>
@@ -409,7 +409,7 @@
       <title>Mental Distress Detection and Triage in Forum Posts: The <fixed-case>LT</fixed-case>3 <fixed-case>CLP</fixed-case>sych 2016 Shared Task System</title>
       <author><first>Bart</first><last>Desmet</last></author>
       <author><first>Gilles</first><last>Jacobs</last></author>
-      <author><first>Véronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Véronique</first><last>Hoste</last></author>
       <pages>148–152</pages>
       <url hash="c836252c">W16-0317</url>
       <doi>10.18653/v1/W16-0317</doi>
@@ -429,7 +429,7 @@
       <title>The <fixed-case>UMD</fixed-case> <fixed-case>CLP</fixed-case>sych 2016 Shared Task System: Text Representation for Predicting Triage of Forum Posts about Mental Health</title>
       <author><first>Meir</first><last>Friedenberg</last></author>
       <author><first>Hadi</first><last>Amiri</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <author><first>Philip</first><last>Resnik</last></author>
       <pages>158–161</pages>
       <url hash="a9e075b5">W16-0319</url>
@@ -511,7 +511,7 @@
     <paper id="27">
       <title>Text-based experiments for Predicting mental health emergencies in online web forum posts</title>
       <author><first>Hector-Hugo</first><last>Franco-Penya</last></author>
-      <author><first>Liliana</first><last>Mamani Sanchez</last></author>
+      <author id="liliana-mamani-sanchez"><first>Liliana</first><last>Mamani Sanchez</last></author>
       <pages>193–197</pages>
       <url hash="d059b5ff">W16-0327</url>
       <doi>10.18653/v1/W16-0327</doi>
@@ -521,10 +521,10 @@
   <volume id="4" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 7th Workshop on Computational Approaches to Subjectivity, Sentiment and Social Media Analysis</booktitle>
-      <editor><first>Alexandra</first><last>Balahur</last></editor>
-      <editor><first>Erik</first><last>van der Goot</last></editor>
+      <editor id="alexandra-balahur"><first>Alexandra</first><last>Balahur</last></editor>
+      <editor id="erik-van-der-goot"><first>Erik</first><last>van der Goot</last></editor>
       <editor><first>Piek</first><last>Vossen</last></editor>
-      <editor><first>Andres</first><last>Montoyo</last></editor>
+      <editor id="andres-montoyo"><first>Andres</first><last>Montoyo</last></editor>
       <doi>10.18653/v1/W16-04</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>San Diego, California</address>
@@ -555,7 +555,7 @@
     <paper id="3">
       <title>Rumor Identification and Belief Investigation on <fixed-case>T</fixed-case>witter</title>
       <author><first>Sardar</first><last>Hamidian</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>3–8</pages>
       <url hash="401fda19">W16-0403</url>
       <doi>10.18653/v1/W16-0403</doi>
@@ -563,12 +563,12 @@
     </paper>
     <paper id="4">
       <title>Modelling Valence and Arousal in <fixed-case>F</fixed-case>acebook posts</title>
-      <author><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
-      <author><first>H. Andrew</first><last>Schwartz</last></author>
+      <author id="daniel-preotiuc-pietro"><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last></author>
       <author><first>Gregory</first><last>Park</last></author>
       <author><first>Johannes</first><last>Eichstaedt</last></author>
-      <author><first>Margaret</first><last>Kern</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="margaret-kern"><first>Margaret</first><last>Kern</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <author><first>Elisabeth</first><last>Shulman</last></author>
       <pages>9–15</pages>
       <url hash="68d5b843">W16-0404</url>
@@ -625,7 +625,7 @@
     <paper id="10">
       <title>The Effect of Negators, Modals, and Degree Adverbs on Sentiment Composition</title>
       <author><first>Svetlana</first><last>Kiritchenko</last></author>
-      <author><first>Saif</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
       <pages>43–52</pages>
       <url hash="eab693f9">W16-0410</url>
       <doi>10.18653/v1/W16-0410</doi>
@@ -634,7 +634,7 @@
     <paper id="11">
       <title>How can <fixed-case>NLP</fixed-case> Tasks Mutually Benefit Sentiment Analysis? A Holistic Approach to Sentiment Analysis</title>
       <author><first>Lingjia</first><last>Deng</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <pages>53–59</pages>
       <url hash="f9973a96">W16-0411</url>
       <doi>10.18653/v1/W16-0411</doi>
@@ -654,7 +654,7 @@
     <paper id="13">
       <title>Threat detection in online discussions</title>
       <author><first>Aksel</first><last>Wester</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <author><first>Erik</first><last>Velldal</last></author>
       <author><first>Hugo Lewi</first><last>Hammer</last></author>
       <pages>66–71</pages>
@@ -665,7 +665,7 @@
     <paper id="14">
       <title>Classification of comment helpfulness to improve knowledge sharing among medical practitioners.</title>
       <author><first>Pierre André</first><last>Ménard</last></author>
-      <author><first>Caroline</first><last>Barrière</last></author>
+      <author id="caroline-barriere"><first>Caroline</first><last>Barrière</last></author>
       <pages>72–81</pages>
       <url hash="2fa3ced4">W16-0414</url>
       <doi>10.18653/v1/W16-0414</doi>
@@ -674,8 +674,8 @@
     <paper id="15">
       <title>Political Issue Extraction Model: A Novel Hierarchical Topic Model That Uses Tweets By Political And Non-Political Authors</title>
       <author><first>Aditya</first><last>Joshi</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
-      <author><first>Mark</first><last>Carman</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="mark-carman"><first>Mark</first><last>Carman</last></author>
       <pages>82–90</pages>
       <url hash="a8509dab">W16-0415</url>
       <doi>10.18653/v1/W16-0415</doi>
@@ -684,8 +684,8 @@
     <paper id="16">
       <title>Early text classification: a Naïve solution</title>
       <author><first>Hugo Jair</first><last>Escalante</last></author>
-      <author><first>Manuel</first><last>Montes y Gomez</last></author>
-      <author><first>Luis</first><last>Villasenor</last></author>
+      <author id="manuel-montes"><first>Manuel</first><last>Montes y Gomez</last></author>
+      <author id="luis-villasenor-pineda"><first>Luis</first><last>Villasenor</last></author>
       <author><first>Marcelo Luis</first><last>Errecalde</last></author>
       <pages>91–99</pages>
       <url hash="e76d887e">W16-0416</url>
@@ -746,10 +746,10 @@
     </paper>
     <paper id="22">
       <title>Domain Adaptation of Polarity Lexicon combining Term Frequency and Bootstrapping</title>
-      <author><first>Salud María</first><last>Jiménez-Zafra</last></author>
-      <author><first>Maite</first><last>Martin</last></author>
-      <author><first>M. Dolores</first><last>Molina-Gonzalez</last></author>
-      <author><first>L. Alfonso</first><last>Ureña-López</last></author>
+      <author id="salud-maria-jimenez-zafra"><first>Salud María</first><last>Jiménez-Zafra</last></author>
+      <author id="m-teresa-martin-valdivia"><first>Maite</first><last>Martin</last></author>
+      <author id="m-dolores-molina-gonzalez"><first>M. Dolores</first><last>Molina-Gonzalez</last></author>
+      <author id="l-alfonso-urena-lopez"><first>L. Alfonso</first><last>Ureña-López</last></author>
       <pages>137–146</pages>
       <url hash="ca22333b">W16-0422</url>
       <doi>10.18653/v1/W16-0422</doi>
@@ -757,10 +757,10 @@
     </paper>
     <paper id="23">
       <title>Do Enterprises Have Emotions?</title>
-      <author><first>Sven</first><last>Buechel</last></author>
+      <author id="sven-buechel"><first>Sven</first><last>Buechel</last></author>
       <author><first>Udo</first><last>Hahn</last></author>
       <author><first>Jan</first><last>Goldenstein</last></author>
-      <author><first>Sebastian G. M.</first><last>Händschke</last></author>
+      <author id="sebastian-g-m-handschke"><first>Sebastian G. M.</first><last>Händschke</last></author>
       <author><first>Peter</first><last>Walgenbach</last></author>
       <pages>147–153</pages>
       <url hash="5ed60783">W16-0423</url>
@@ -797,7 +797,7 @@
     </paper>
     <paper id="27">
       <title>Sentiment Analysis in <fixed-case>T</fixed-case>witter: A <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val Perspective</title>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>171–172</pages>
       <url hash="5ee25c69">W16-0427</url>
       <doi>10.18653/v1/W16-0427</doi>
@@ -813,7 +813,7 @@
     </paper>
     <paper id="29">
       <title>A Practical Guide to Sentiment Annotation: Challenges and Solutions</title>
-      <author><first>Saif</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
       <pages>174–179</pages>
       <url hash="a2a991ec">W16-0429</url>
       <doi>10.18653/v1/W16-0429</doi>
@@ -831,7 +831,7 @@
   <volume id="5" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 11th Workshop on Innovative Use of <fixed-case>NLP</fixed-case> for Building Educational Applications</booktitle>
-      <editor><first>Joel</first><last>Tetreault</last></editor>
+      <editor id="joel-tetreault"><first>Joel</first><last>Tetreault</last></editor>
       <editor><first>Jill</first><last>Burstein</last></editor>
       <editor><first>Claudia</first><last>Leacock</last></editor>
       <editor><first>Helen</first><last>Yannakoudakis</last></editor>
@@ -860,7 +860,7 @@
       <title>Text Readability Assessment for Second Language Learners</title>
       <author><first>Menglin</first><last>Xia</last></author>
       <author><first>Ekaterina</first><last>Kochmar</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <pages>12–22</pages>
       <url hash="308e9d6b">W16-0502</url>
       <doi>10.18653/v1/W16-0502</doi>
@@ -878,7 +878,7 @@
     <paper id="4">
       <title>Automated classification of collaborative problem solving interactions in simulated science tasks</title>
       <author><first>Michael</first><last>Flor</last></author>
-      <author><first>Su-Youn</first><last>Yoon</last></author>
+      <author id="su-youn-yoon"><first>Su-Youn</first><last>Yoon</last></author>
       <author><first>Jiangang</first><last>Hao</last></author>
       <author><first>Lei</first><last>Liu</last></author>
       <author><first>Alina</first><last>von Davier</last></author>
@@ -898,8 +898,8 @@
     </paper>
     <paper id="6">
       <title>A Report on the Automatic Evaluation of Scientific Writing Shared Task</title>
-      <author><first>Vidas</first><last>Daudaravicius</last></author>
-      <author><first>Rafael E.</first><last>Banchs</last></author>
+      <author id="vidas-daudaravicius"><first>Vidas</first><last>Daudaravicius</last></author>
+      <author id="rafael-e-banchs"><first>Rafael E.</first><last>Banchs</last></author>
       <author><first>Elena</first><last>Volodina</last></author>
       <author><first>Courtney</first><last>Napoles</last></author>
       <pages>53–62</pages>
@@ -930,7 +930,7 @@
     <paper id="9">
       <title>Characterizing Text Difficulty with Word Frequencies</title>
       <author><first>Xiaobin</first><last>Chen</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <pages>84–94</pages>
       <url hash="d712ff1d">W16-0509</url>
       <doi>10.18653/v1/W16-0509</doi>
@@ -940,7 +940,7 @@
       <title>Unsupervised Modeling of Topical Relevance in <fixed-case>L</fixed-case>2 Learner Text</title>
       <author><first>Ronan</first><last>Cummins</last></author>
       <author><first>Helen</first><last>Yannakoudakis</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <pages>95–104</pages>
       <url hash="1e190a37">W16-0510</url>
       <doi>10.18653/v1/W16-0510</doi>
@@ -948,8 +948,8 @@
     </paper>
     <paper id="11">
       <title><fixed-case>UW</fixed-case>-<fixed-case>S</fixed-case>tanford System Description for <fixed-case>AESW</fixed-case> 2016 Shared Task on Grammatical Error Detection</title>
-      <author><first>Dan</first><last>Flickinger</last></author>
-      <author><first>Michael</first><last>Goodman</last></author>
+      <author id="dan-flickinger"><first>Dan</first><last>Flickinger</last></author>
+      <author id="michael-wayne-goodman"><first>Michael</first><last>Goodman</last></author>
       <author><first>Woodley</first><last>Packard</last></author>
       <pages>105–111</pages>
       <url hash="3b2396c7">W16-0511</url>
@@ -969,8 +969,8 @@
       <title>The <fixed-case>NTNU</fixed-case>-<fixed-case>YZU</fixed-case> System in the <fixed-case>AESW</fixed-case> Shared Task: Automated Evaluation of Scientific Writing Using a Convolutional Neural Network</title>
       <author><first>Lung-Hao</first><last>Lee</last></author>
       <author><first>Bo-Lin</first><last>Lin</last></author>
-      <author><first>Liang-Chih</first><last>Yu</last></author>
-      <author><first>Yuen-Hsien</first><last>Tseng</last></author>
+      <author id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last></author>
+      <author id="yuen-hsien-tseng"><first>Yuen-Hsien</first><last>Tseng</last></author>
       <pages>122–129</pages>
       <url hash="b81f8193">W16-0513</url>
       <doi>10.18653/v1/W16-0513</doi>
@@ -997,8 +997,8 @@
     </paper>
     <paper id="16">
       <title><fixed-case>P</fixed-case>ictogrammar: an <fixed-case>AAC</fixed-case> device based on a semantic grammar</title>
-      <author><first>Fernando</first><last>Martínez-Santiago</last></author>
-      <author><first>Miguel Ángel</first><last>García-Cumbreras</last></author>
+      <author id="fernando-martinez-santiago"><first>Fernando</first><last>Martínez-Santiago</last></author>
+      <author id="miguel-angel-garcia-cumbreras"><first>Miguel Ángel</first><last>García-Cumbreras</last></author>
       <author><first>Arturo</first><last>Montejo-Ráez</last></author>
       <author><first>Manuel Carlos</first><last>Díaz-Galiano</last></author>
       <pages>142–150</pages>
@@ -1038,7 +1038,7 @@
       <title>Evaluation Dataset (<fixed-case>DT</fixed-case>-Grade) and Word Weighting Approach towards Constructed Short Answers Assessment in Tutorial Dialogue Context</title>
       <author><first>Rajendra</first><last>Banjade</last></author>
       <author><first>Nabin</first><last>Maharjan</last></author>
-      <author><first>Nobal Bikram</first><last>Niraula</last></author>
+      <author id="nobal-bikram-niraula"><first>Nobal Bikram</first><last>Niraula</last></author>
       <author><first>Dipesh</first><last>Gautam</last></author>
       <author><first>Borhan</first><last>Samei</last></author>
       <author><first>Vasile</first><last>Rus</last></author>
@@ -1050,7 +1050,7 @@
     <paper id="21">
       <title>Linguistically Aware Information Retrieval: Providing Input Enrichment for Second Language Learners</title>
       <author><first>Maria</first><last>Chinkina</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <pages>188–198</pages>
       <url hash="f558af2b">W16-0521</url>
       <doi>10.18653/v1/W16-0521</doi>
@@ -1089,7 +1089,7 @@
     </paper>
     <paper id="25">
       <title>Combined Tree Kernel-based classifiers for Assessing Quality of Scientific Text</title>
-      <author><first>Liliana</first><last>Mamani Sanchez</last></author>
+      <author id="liliana-mamani-sanchez"><first>Liliana</first><last>Mamani Sanchez</last></author>
       <author><first>Hector-Hugo</first><last>Franco-Penya</last></author>
       <pages>223–228</pages>
       <url hash="c5f2fd86">W16-0525</url>
@@ -1099,7 +1099,7 @@
     <paper id="26">
       <title>Augmenting Course Material with Open Access Textbooks</title>
       <author><first>Smitha</first><last>Milli</last></author>
-      <author><first>Marti A.</first><last>Hearst</last></author>
+      <author id="marti-a-hearst"><first>Marti A.</first><last>Hearst</last></author>
       <pages>229–234</pages>
       <url hash="86c7284e">W16-0526</url>
       <doi>10.18653/v1/W16-0526</doi>
@@ -1107,7 +1107,7 @@
     </paper>
     <paper id="27">
       <title>Exploring the Intersection of Short Answer Assessment, Authorship Attribution, and Plagiarism Detection</title>
-      <author><first>Björn</first><last>Rudzewitz</last></author>
+      <author id="bjorn-rudzewitz"><first>Björn</first><last>Rudzewitz</last></author>
       <pages>235–241</pages>
       <url hash="f61db8e3">W16-0527</url>
       <doi>10.18653/v1/W16-0527</doi>
@@ -1117,8 +1117,8 @@
       <title>Sentence-Level Grammatical Error Identification as Sequence-to-Sequence Correction</title>
       <author><first>Allen</first><last>Schmaltz</last></author>
       <author><first>Yoon</first><last>Kim</last></author>
-      <author><first>Alexander M.</first><last>Rush</last></author>
-      <author><first>Stuart</first><last>Shieber</last></author>
+      <author id="alexander-m-rush"><first>Alexander M.</first><last>Rush</last></author>
+      <author id="stuart-m-shieber"><first>Stuart</first><last>Shieber</last></author>
       <pages>242–251</pages>
       <url hash="568c2971">W16-0528</url>
       <doi>10.18653/v1/W16-0528</doi>
@@ -1136,7 +1136,7 @@
     <paper id="30">
       <title>Candidate re-ranking for <fixed-case>SMT</fixed-case>-based grammatical error correction</title>
       <author id="zheng-yuan-cambridge"><first>Zheng</first><last>Yuan</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <author><first>Mariano</first><last>Felice</last></author>
       <pages>256–266</pages>
       <url hash="ff6b8958">W16-0530</url>
@@ -1145,7 +1145,7 @@
     </paper>
     <paper id="31">
       <title>Spoken Text Difficulty Estimation Using Linguistic Features</title>
-      <author><first>Su-Youn</first><last>Yoon</last></author>
+      <author id="su-youn-yoon"><first>Su-Youn</first><last>Yoon</last></author>
       <author><first>Yeonsuk</first><last>Cho</last></author>
       <author><first>Diane</first><last>Napolitano</last></author>
       <pages>267–276</pages>
@@ -1156,7 +1156,7 @@
     <paper id="32">
       <title>Automatically Extracting Topical Components for a Response-to-Text Writing Assessment</title>
       <author><first>Zahra</first><last>Rahimi</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <pages>277–282</pages>
       <url hash="f88021b8">W16-0532</url>
       <doi>10.18653/v1/W16-0532</doi>
@@ -1192,9 +1192,9 @@
   <volume id="6" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 2nd Workshop on Semantics-Driven Machine Translation (<fixed-case>S</fixed-case>ed<fixed-case>MT</fixed-case> 2016)</booktitle>
-      <editor><first>Deyi</first><last>Xiong</last></editor>
+      <editor id="deyi-xiong"><first>Deyi</first><last>Xiong</last></editor>
       <editor><first>Kevin</first><last>Duh</last></editor>
-      <editor><first>Eneko</first><last>Agirre</last></editor>
+      <editor id="eneko-agirre"><first>Eneko</first><last>Agirre</last></editor>
       <editor><first>Nora</first><last>Aranberri</last></editor>
       <editor><first>Houfeng</first><last>Wang</last></editor>
       <doi>10.18653/v1/W16-06</doi>
@@ -1237,7 +1237,7 @@
     </paper>
     <paper id="4">
       <title>Towards Semantic-based Hybrid Machine Translation between <fixed-case>B</fixed-case>ulgarian and <fixed-case>E</fixed-case>nglish</title>
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <author><first>Petya</first><last>Osenova</last></author>
       <author><first>Alexander</first><last>Popov</last></author>
       <pages>22–26</pages>
@@ -1291,9 +1291,9 @@
     </paper>
     <paper id="4">
       <title>Beyond Identity Coreference: Contrasting Indicators of Textual Coherence in <fixed-case>E</fixed-case>nglish and <fixed-case>G</fixed-case>erman</title>
-      <author><first>Kerstin</first><last>Kunz</last></author>
+      <author id="kerstin-kunz"><first>Kerstin</first><last>Kunz</last></author>
       <author><first>Ekaterina</first><last>Lapshinova-Koltunski</last></author>
-      <author><first>José Manuel</first><last>Martínez</last></author>
+      <author id="jose-manuel-martinez"><first>José Manuel</first><last>Martínez</last></author>
       <pages>23–31</pages>
       <url hash="6bb2d15d">W16-0704</url>
       <doi>10.18653/v1/W16-0704</doi>
@@ -1302,8 +1302,8 @@
     <paper id="5">
       <title>Exploring the steps of Verb Phrase Ellipsis</title>
       <author><first>Zhengzhong</first><last>Liu</last></author>
-      <author><first>Edgar</first><last>Gonzàlez Pellicer</last></author>
-      <author><first>Daniel</first><last>Gillick</last></author>
+      <author id="edgar-gonzalez-pellicer"><first>Edgar</first><last>Gonzàlez Pellicer</last></author>
+      <author id="dan-gillick"><first>Daniel</first><last>Gillick</last></author>
       <pages>32–40</pages>
       <url hash="89fce31c">W16-0705</url>
       <doi>10.18653/v1/W16-0705</doi>
@@ -1330,8 +1330,8 @@
     <paper id="8">
       <title>Antecedent Prediction Without a Pipeline</title>
       <author><first>Sam</first><last>Wiseman</last></author>
-      <author><first>Alexander M.</first><last>Rush</last></author>
-      <author><first>Stuart</first><last>Shieber</last></author>
+      <author id="alexander-m-rush"><first>Alexander M.</first><last>Rush</last></author>
+      <author id="stuart-m-shieber"><first>Stuart</first><last>Shieber</last></author>
       <pages>53–58</pages>
       <url hash="8844df61">W16-0708</url>
       <doi>10.18653/v1/W16-0708</doi>
@@ -1350,10 +1350,10 @@
       <title>Coreference Resolution for the <fixed-case>B</fixed-case>asque Language with <fixed-case>BART</fixed-case></title>
       <author><first>Ander</first><last>Soraluze</last></author>
       <author><first>Olatz</first><last>Arregi</last></author>
-      <author><first>Xabier</first><last>Arregi</last></author>
-      <author><first>Arantza</first><last>Díaz de Ilarraza</last></author>
-      <author><first>Mijail</first><last>Kabadjov</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="xabier-arregi"><first>Xabier</first><last>Arregi</last></author>
+      <author id="arantza-diaz-de-ilarraza"><first>Arantza</first><last>Díaz de Ilarraza</last></author>
+      <author id="mijail-kabadjov"><first>Mijail</first><last>Kabadjov</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <pages>67–73</pages>
       <url hash="32a732ea">W16-0710</url>
       <doi>10.18653/v1/W16-0710</doi>
@@ -1373,8 +1373,8 @@
     </paper>
     <paper id="12">
       <title>How to Handle Split Antecedents in <fixed-case>T</fixed-case>amil?</title>
-      <author><first>Vijay</first><last>Sundar Ram</last></author>
-      <author><first>Sobha</first><last>Lalitha Devi</last></author>
+      <author id="vijay-sundar-ram"><first>Vijay</first><last>Sundar Ram</last></author>
+      <author id="sobha-lalitha-devi"><first>Sobha</first><last>Lalitha Devi</last></author>
       <pages>84–91</pages>
       <url hash="acec4f7f">W16-0712</url>
       <doi>10.18653/v1/W16-0712</doi>
@@ -1394,8 +1394,8 @@
     <meta>
       <booktitle>Proceedings of the Second Workshop on Computational Approaches to Deception Detection</booktitle>
       <editor><first>Tommaso</first><last>Fornaciari</last></editor>
-      <editor><first>Eileen</first><last>Fitzpatrick</last></editor>
-      <editor><first>Joan</first><last>Bachenko</last></editor>
+      <editor id="eileen-fitzpatrick"><first>Eileen</first><last>Fitzpatrick</last></editor>
+      <editor id="joan-bachenko"><first>Joan</first><last>Bachenko</last></editor>
       <doi>10.18653/v1/W16-08</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>San Diego, California</address>
@@ -1418,7 +1418,7 @@
     </paper>
     <paper id="2">
       <title>Fake News or Truth? Using Satirical Cues to Detect Potentially Misleading News</title>
-      <author><first>Victoria</first><last>Rubin</last></author>
+      <author id="victoria-l-rubin"><first>Victoria</first><last>Rubin</last></author>
       <author><first>Niall</first><last>Conroy</last></author>
       <author><first>Yimin</first><last>Chen</last></author>
       <author><first>Sarah</first><last>Cornwell</last></author>
@@ -1448,7 +1448,7 @@
     </paper>
     <paper id="5">
       <title>The Use of Second Life for Deception Detection Research</title>
-      <author><first>Stephen</first><last>Kunath</last></author>
+      <author id="stephen-kunath"><first>Stephen</first><last>Kunath</last></author>
       <author><first>Kevin</first><last>McCabe</last></author>
       <pages>32–39</pages>
       <url hash="ffea7b5c">W16-0805</url>
@@ -1463,7 +1463,7 @@
       <author><first>Michelle</first><last>Levine</last></author>
       <author><first>Rivka</first><last>Levitan</last></author>
       <author><first>Andrew</first><last>Rosenberg</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
       <pages>40–44</pages>
       <url hash="efd8dc5b">W16-0806</url>
       <doi>10.18653/v1/W16-0806</doi>
@@ -1483,8 +1483,8 @@
     <meta>
       <booktitle>Proceedings of the Workshop on Discontinuous Structures in Natural Language Processing</booktitle>
       <editor><first>Wolfgang</first><last>Maier</last></editor>
-      <editor><first>Sandra</first><last>Kübler</last></editor>
-      <editor><first>Constantin</first><last>Orasan</last></editor>
+      <editor id="sandra-kubler"><first>Sandra</first><last>Kübler</last></editor>
+      <editor id="constantin-orasan"><first>Constantin</first><last>Orasan</last></editor>
       <doi>10.18653/v1/W16-09</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>San Diego, California</address>
@@ -1506,9 +1506,9 @@
     </paper>
     <paper id="2">
       <title>Non-projectivity and valency</title>
-      <author><first>Zdenka</first><last>Uresova</last></author>
-      <author><first>Eva</first><last>Fucikova</last></author>
-      <author><first>Jan</first><last>Hajic</last></author>
+      <author id="zdenka-uresova"><first>Zdenka</first><last>Uresova</last></author>
+      <author id="eva-fucikova"><first>Eva</first><last>Fucikova</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajic</last></author>
       <pages>12–21</pages>
       <url hash="ef278552">W16-0902</url>
       <doi>10.18653/v1/W16-0902</doi>
@@ -1560,8 +1560,8 @@
   <volume id="10" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Fourth Workshop on Events</booktitle>
-      <editor><first>Martha</first><last>Palmer</last></editor>
-      <editor><first>Ed</first><last>Hovy</last></editor>
+      <editor id="martha-palmer"><first>Martha</first><last>Palmer</last></editor>
+      <editor id="eduard-hovy"><first>Ed</first><last>Hovy</last></editor>
       <editor><first>Teruko</first><last>Mitamura</last></editor>
       <editor><first>Tim</first><last>O’Gorman</last></editor>
       <doi>10.18653/v1/W16-10</doi>
@@ -1597,9 +1597,9 @@
     </paper>
     <paper id="3">
       <title>Multimodal Use of an Upper-Level Event Ontology</title>
-      <author><first>Claire</first><last>Bonial</last></author>
+      <author id="claire-bonial"><first>Claire</first><last>Bonial</last></author>
       <author><first>David</first><last>Tahmoush</last></author>
-      <author><first>Susan</first><last>Windisch Brown</last></author>
+      <author id="susan-windisch-brown"><first>Susan</first><last>Windisch Brown</last></author>
       <author><first>Martha</first><last>Palmer</last></author>
       <pages>18–26</pages>
       <url hash="8941e154">W16-1003</url>
@@ -1613,7 +1613,7 @@
       <author><first>Jeremy</first><last>Getman</last></author>
       <author><first>Joe</first><last>Ellis</last></author>
       <author><first>Justin</first><last>Mott</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <author><first>Martha</first><last>Palmer</last></author>
       <author><first>Teruko</first><last>Mitamura</last></author>
       <author><first>Marjorie</first><last>Freedman</last></author>
@@ -1628,10 +1628,10 @@
       <title>Event Nugget and Event Coreference Annotation</title>
       <author><first>Zhiyi</first><last>Song</last></author>
       <author><first>Ann</first><last>Bies</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <author><first>Joe</first><last>Ellis</last></author>
       <author><first>Teruko</first><last>Mitamura</last></author>
-      <author><first>Hoa Trang</first><last>Dang</last></author>
+      <author id="hoa-trang-dang"><first>Hoa Trang</first><last>Dang</last></author>
       <author><first>Yukari</first><last>Yamakawa</last></author>
       <author><first>Sue</first><last>Holm</last></author>
       <pages>37–45</pages>
@@ -1652,8 +1652,8 @@
       <title><fixed-case>C</fixed-case>a<fixed-case>T</fixed-case>e<fixed-case>RS</fixed-case>: Causal and Temporal Relation Scheme for Semantic Annotation of Event Structures</title>
       <author><first>Nasrin</first><last>Mostafazadeh</last></author>
       <author><first>Alyson</first><last>Grealish</last></author>
-      <author><first>Nathanael</first><last>Chambers</last></author>
-      <author><first>James</first><last>Allen</last></author>
+      <author id="nathanael-chambers"><first>Nathanael</first><last>Chambers</last></author>
+      <author id="james-allen"><first>James</first><last>Allen</last></author>
       <author><first>Lucy</first><last>Vanderwende</last></author>
       <pages>51–61</pages>
       <url hash="5e78df06">W16-1007</url>
@@ -1727,7 +1727,7 @@
     <meta>
       <booktitle>Proceedings of the Workshop on Multilingual and Cross-lingual Methods in <fixed-case>NLP</fixed-case></booktitle>
       <editor><first>Dipanjan</first><last>Das</last></editor>
-      <editor><first>Chris</first><last>Dyer</last></editor>
+      <editor id="chris-dyer"><first>Chris</first><last>Dyer</last></editor>
       <editor><first>Manaal</first><last>Faruqui</last></editor>
       <editor><first>Yulia</first><last>Tsvetkov</last></editor>
       <doi>10.18653/v1/W16-12</doi>
@@ -1744,7 +1744,7 @@
     <paper id="1">
       <title>Learning Cross-lingual Representations with Matrix Factorization</title>
       <author><first>Hanan</first><last>Aldarmaki</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>1–9</pages>
       <url hash="ae2a0c24">W16-1201</url>
       <doi>10.18653/v1/W16-1201</doi>
@@ -1773,7 +1773,7 @@
     <paper id="4">
       <title>Enhancing Automatic <fixed-case>W</fixed-case>ordnet Construction Using Word Embeddings</title>
       <author><first>Feras</first><last>Al Tarouti</last></author>
-      <author><first>Jugal</first><last>Kalita</last></author>
+      <author id="jugal-kalita"><first>Jugal</first><last>Kalita</last></author>
       <pages>30–34</pages>
       <url hash="a4e20fd3">W16-1204</url>
       <doi>10.18653/v1/W16-1204</doi>
@@ -1820,7 +1820,7 @@
     <meta>
       <booktitle>Proceedings of the 5th Workshop on Automated Knowledge Base Construction</booktitle>
       <editor><first>Jay</first><last>Pujara</last></editor>
-      <editor><first>Tim</first><last>Rocktaschel</last></editor>
+      <editor id="tim-rocktaschel"><first>Tim</first><last>Rocktaschel</last></editor>
       <editor><first>Danqi</first><last>Chen</last></editor>
       <editor><first>Sameer</first><last>Singh</last></editor>
       <doi>10.18653/v1/W16-13</doi>
@@ -1837,9 +1837,9 @@
     <paper id="1">
       <title>Using Graphs of Classifiers to Impose Constraints on Semi-supervised Relation Extraction</title>
       <author><first>Lidong</first><last>Bing</last></author>
-      <author><first>William</first><last>Cohen</last></author>
+      <author id="william-cohen"><first>William</first><last>Cohen</last></author>
       <author><first>Bhuwan</first><last>Dhingra</last></author>
-      <author><first>Richard</first><last>Wang</last></author>
+      <author id="richard-c-wang"><first>Richard</first><last>Wang</last></author>
       <pages>1–6</pages>
       <url hash="d761f9de">W16-1301</url>
       <doi>10.18653/v1/W16-1301</doi>
@@ -1857,9 +1857,9 @@
     </paper>
     <paper id="3">
       <title><fixed-case>IKE</fixed-case> - An Interactive Tool for Knowledge Extraction</title>
-      <author><first>Bhavana</first><last>Dalvi</last></author>
+      <author id="bhavana-dalvi"><first>Bhavana</first><last>Dalvi</last></author>
       <author><first>Sumithra</first><last>Bhakthavatsalam</last></author>
-      <author><first>Chris</first><last>Clark</last></author>
+      <author id="chris-clark"><first>Chris</first><last>Clark</last></author>
       <author><first>Peter</first><last>Clark</last></author>
       <author><first>Oren</first><last>Etzioni</last></author>
       <author><first>Anthony</first><last>Fader</last></author>
@@ -1884,7 +1884,7 @@
       <title>Knowledge Base Population for Organization Mentions in Email</title>
       <author><first>Ning</first><last>Gao</last></author>
       <author><first>Mark</first><last>Dredze</last></author>
-      <author><first>Douglas</first><last>Oard</last></author>
+      <author id="douglas-w-oard"><first>Douglas</first><last>Oard</last></author>
       <pages>24–28</pages>
       <url hash="c81335e5">W16-1305</url>
       <doi>10.18653/v1/W16-1305</doi>
@@ -1911,7 +1911,7 @@
     <paper id="8">
       <title>But What Do We Actually Know?</title>
       <author><first>Simon</first><last>Razniewski</last></author>
-      <author><first>Fabian</first><last>Suchanek</last></author>
+      <author id="fabian-suchanek"><first>Fabian</first><last>Suchanek</last></author>
       <author><first>Werner</first><last>Nutt</last></author>
       <pages>40–44</pages>
       <url hash="4f1514e7">W16-1308</url>
@@ -1984,7 +1984,7 @@
       <author><first>Sujit</first><last>Pal</last></author>
       <author><first>Darin</first><last>McBeath</last></author>
       <author><first>Brad</first><last>Allen</last></author>
-      <author><first>Ron</first><last>Daniel</last></author>
+      <author id="ron-daniel-jr"><first>Ron</first><last>Daniel</last></author>
       <pages>81–85</pages>
       <url hash="f702fcf0">W16-1315</url>
       <doi>10.18653/v1/W16-1315</doi>
@@ -2003,7 +2003,7 @@
     </paper>
     <paper id="17">
       <title>Call for Discussion: Building a New Standard Dataset for Relation Extraction Tasks</title>
-      <author><first>Teresa</first><last>Martin</last></author>
+      <author id="m-teresa-martin-valdivia"><first>Teresa</first><last>Martin</last></author>
       <author><first>Fiete</first><last>Botschen</last></author>
       <author><first>Ajay</first><last>Nagesh</last></author>
       <author><first>Andrew</first><last>McCallum</last></author>
@@ -2112,7 +2112,7 @@
     <meta>
       <booktitle>Proceedings of the Joint Workshop on Bibliometric-enhanced Information Retrieval and Natural Language Processing for Digital Libraries (<fixed-case>BIRNDL</fixed-case>)</booktitle>
       <editor><first>Guillaume</first><last>Cabanac</last></editor>
-      <editor><first>Muthu Kumar</first><last>Chandrasekaran</last></editor>
+      <editor id="muthu-kumar-chandrasekaran"><first>Muthu Kumar</first><last>Chandrasekaran</last></editor>
       <editor><first>Ingo</first><last>Frommholz</last></editor>
       <editor><first>Kokil</first><last>Jaidka</last></editor>
       <editor><first>Min-Yen</first><last>Kan</last></editor>
@@ -2203,9 +2203,9 @@
     </paper>
     <paper id="9">
       <title>A Study of Reuse and Plagiarism in Speech and Natural Language Processing papers</title>
-      <author><first>Joseph</first><last>Mariani</last></author>
+      <author id="joseph-mariani"><first>Joseph</first><last>Mariani</last></author>
       <author><first>Gil</first><last>Francopoulo</last></author>
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <pages>72–83</pages>
       <url hash="d84fe0ae">W16-1509</url>
       <attachment type="presentation" hash="10373ec0">W16-1509.Presentation.pdf</attachment>
@@ -2311,7 +2311,7 @@
     <paper id="20">
       <title>Trainable Citation-enhanced Summarization of Scientific Articles</title>
       <author><first>Horacio</first><last>Saggion</last></author>
-      <author><first>Ahmed</first><last>AbuRa’ed</last></author>
+      <author id="ahmed-aburaed"><first>Ahmed</first><last>AbuRa’ed</last></author>
       <author><first>Francesco</first><last>Ronzano</last></author>
       <pages>175–186</pages>
       <url hash="51f6e65c">W16-1520</url>
@@ -2322,14 +2322,14 @@
     <meta>
       <booktitle>Proceedings of the 1st Workshop on Representation Learning for <fixed-case>NLP</fixed-case></booktitle>
       <url hash="f1eff05e">W16-16</url>
-      <editor><first>Phil</first><last>Blunsom</last></editor>
+      <editor id="phil-blunsom"><first>Phil</first><last>Blunsom</last></editor>
       <editor><first>Kyunghyun</first><last>Cho</last></editor>
-      <editor><first>Shay</first><last>Cohen</last></editor>
-      <editor><first>Edward</first><last>Grefenstette</last></editor>
+      <editor id="shay-b-cohen"><first>Shay</first><last>Cohen</last></editor>
+      <editor id="edward-grefenstette"><first>Edward</first><last>Grefenstette</last></editor>
       <editor><first>Karl Moritz</first><last>Hermann</last></editor>
       <editor><first>Laura</first><last>Rimell</last></editor>
       <editor><first>Jason</first><last>Weston</last></editor>
-      <editor><first>Scott Wen-tau</first><last>Yih</last></editor>
+      <editor id="wen-tau-yih"><first>Scott Wen-tau</first><last>Yih</last></editor>
       <doi>10.18653/v1/W16-16</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Berlin, Germany</address>
@@ -2402,8 +2402,8 @@
     <paper id="7">
       <title>Adjusting Word Embeddings with Semantic Intensity Orders</title>
       <author><first>Joo-Kyung</first><last>Kim</last></author>
-      <author><first>Marie-Catherine</first><last>de Marneffe</last></author>
-      <author><first>Eric</first><last>Fosler-Lussier</last></author>
+      <author id="marie-catherine-de-marneffe"><first>Marie-Catherine</first><last>de Marneffe</last></author>
+      <author id="eric-fosler-lussier"><first>Eric</first><last>Fosler-Lussier</last></author>
       <pages>62–69</pages>
       <url hash="6e22fe3f">W16-1607</url>
       <doi>10.18653/v1/W16-1607</doi>
@@ -2422,7 +2422,7 @@
     <paper id="9">
       <title>An Empirical Evaluation of doc2vec with Practical Insights into Document Embedding Generation</title>
       <author><first>Jey Han</first><last>Lau</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>78–86</pages>
       <url hash="14461e55">W16-1609</url>
       <doi>10.18653/v1/W16-1609</doi>
@@ -2449,7 +2449,7 @@
     </paper>
     <paper id="12">
       <title>Mapping Unseen Words to Task-Trained Embedding Spaces</title>
-      <author><first>Pranava Swaroop</first><last>Madhyastha</last></author>
+      <author id="pranava-swaroop-madhyastha"><first>Pranava Swaroop</first><last>Madhyastha</last></author>
       <author><first>Mohit</first><last>Bansal</last></author>
       <author><first>Kevin</first><last>Gimpel</last></author>
       <author><first>Karen</first><last>Livescu</last></author>
@@ -2469,7 +2469,7 @@
     </paper>
     <paper id="14">
       <title>Towards cross-lingual distributed representations without parallel text trained with adversarial autoencoders</title>
-      <author><first>Antonio Valerio</first><last>Miceli Barone</last></author>
+      <author id="antonio-valerio-miceli-barone"><first>Antonio Valerio</first><last>Miceli Barone</last></author>
       <pages>121–126</pages>
       <url hash="0f6204b8">W16-1614</url>
       <doi>10.18653/v1/W16-1614</doi>
@@ -2487,7 +2487,7 @@
       <title>Learning Semantic Relatedness in Community Question Answering Using Neural Models</title>
       <author><first>Henry</first><last>Nassif</last></author>
       <author><first>Mitra</first><last>Mohtarami</last></author>
-      <author><first>James</first><last>Glass</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
       <pages>137–147</pages>
       <url hash="ac4d34d3">W16-1616</url>
       <doi>10.18653/v1/W16-1616</doi>
@@ -2508,7 +2508,7 @@
       <author><first>Thien Huu</first><last>Nguyen</last></author>
       <author><first>Lisheng</first><last>Fu</last></author>
       <author><first>Kyunghyun</first><last>Cho</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <pages>158–165</pages>
       <url hash="fa1f7704">W16-1618</url>
       <doi>10.18653/v1/W16-1618</doi>
@@ -2527,8 +2527,8 @@
     <paper id="20">
       <title>Making Sense of Word Embeddings</title>
       <author><first>Maria</first><last>Pelevina</last></author>
-      <author><first>Nikolay</first><last>Arefiev</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="nikolay-arefyev"><first>Nikolay</first><last>Arefiev</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <author><first>Alexander</first><last>Panchenko</last></author>
       <pages>174–183</pages>
       <url hash="ecb9da00">W16-1620</url>
@@ -2589,7 +2589,7 @@
       <author><first>Imran</first><last>Sheikh</last></author>
       <author><first>Irina</first><last>Illina</last></author>
       <author><first>Dominique</first><last>Fohr</last></author>
-      <author><first>Georges</first><last>Linarès</last></author>
+      <author id="georges-linares"><first>Georges</first><last>Linarès</last></author>
       <pages>222–229</pages>
       <url hash="312d2390">W16-1626</url>
       <doi>10.18653/v1/W16-1626</doi>
@@ -2606,7 +2606,7 @@
     <paper id="28">
       <title>Towards Generalizable Sentence Embeddings</title>
       <author><first>Eleni</first><last>Triantafillou</last></author>
-      <author><first>Jamie Ryan</first><last>Kiros</last></author>
+      <author id="jamie-kiros"><first>Jamie Ryan</first><last>Kiros</last></author>
       <author><first>Raquel</first><last>Urtasun</last></author>
       <author><first>Richard</first><last>Zemel</last></author>
       <pages>239–248</pages>
@@ -2657,7 +2657,7 @@
       <author><first>Tim</first><last>O’Gorman</last></author>
       <author><first>Sharone</first><last>Horowit-Hendler</last></author>
       <author><first>Heng</first><last>Ji</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>1–6</pages>
       <url hash="94b92129">W16-1701</url>
       <doi>10.18653/v1/W16-1701</doi>
@@ -2687,10 +2687,10 @@
     </paper>
     <paper id="4">
       <title>A Discourse-Annotated Corpus of Conjoined <fixed-case>VP</fixed-case>s</title>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <author><first>Rashmi</first><last>Prasad</last></author>
       <author><first>Alan</first><last>Lee</last></author>
-      <author><first>Aravind</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
       <pages>22–31</pages>
       <url hash="7e020982">W16-1704</url>
       <doi>10.18653/v1/W16-1704</doi>
@@ -2709,9 +2709,9 @@
     </paper>
     <paper id="6">
       <title>Supersense tagging with inter-annotator disagreement</title>
-      <author><first>Héctor</first><last>Martínez Alonso</last></author>
-      <author><first>Anders</first><last>Johannsen</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="hector-martinez-alonso"><first>Héctor</first><last>Martínez Alonso</last></author>
+      <author id="anders-johannsen"><first>Anders</first><last>Johannsen</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>43–48</pages>
       <url hash="67700dd3">W16-1706</url>
       <doi>10.18653/v1/W16-1706</doi>
@@ -2724,7 +2724,7 @@
       <author><first>Nathan</first><last>Schneider</last></author>
       <author><first>Christopher N. L.</first><last>Clark</last></author>
       <author><first>Annie</first><last>Louis</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <pages>49–58</pages>
       <url hash="18317d7b">W16-1707</url>
       <doi>10.18653/v1/W16-1707</doi>
@@ -2745,7 +2745,7 @@
     <paper id="9">
       <title>Different Flavors of <fixed-case>GUM</fixed-case>: Evaluating Genre and Sentence Type Effects on Multilayer Corpus Annotation Quality</title>
       <author><first>Amir</first><last>Zeldes</last></author>
-      <author><first>Dan</first><last>Simonson</last></author>
+      <author id="dan-simonson"><first>Dan</first><last>Simonson</last></author>
       <pages>68–78</pages>
       <url hash="7cf47140">W16-1709</url>
       <doi>10.18653/v1/W16-1709</doi>
@@ -2754,7 +2754,7 @@
     <paper id="10">
       <title>Addressing Annotation Complexity: The Case of Annotating Ideological Perspective in <fixed-case>E</fixed-case>gyptian Social Media</title>
       <author><first>Heba</first><last>Elfardy</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>79–88</pages>
       <url hash="21ae85ee">W16-1710</url>
       <doi>10.18653/v1/W16-1710</doi>
@@ -2779,7 +2779,7 @@
       <author><first>Abhijit</first><last>Suresh</last></author>
       <author><first>Kathryn</first><last>Conger</last></author>
       <author><first>Tim</first><last>O’Gorman</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>99–109</pages>
       <url hash="72cc45b5">W16-1712</url>
       <doi>10.18653/v1/W16-1712</doi>
@@ -2789,7 +2789,7 @@
       <title>Focus Annotation of Task-based Data: Establishing the Quality of Crowd Annotation</title>
       <author><first>Kordula</first><last>De Kuthy</last></author>
       <author><first>Ramon</first><last>Ziai</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <pages>110–119</pages>
       <url hash="2eb7764a">W16-1713</url>
       <doi>10.18653/v1/W16-1713</doi>
@@ -2797,7 +2797,7 @@
     </paper>
     <paper id="14">
       <title>Part of Speech Annotation of a <fixed-case>T</fixed-case>urkish-<fixed-case>G</fixed-case>erman Code-Switching Corpus</title>
-      <author><first>Özlem</first><last>Çetinoğlu</last></author>
+      <author id="ozlem-cetinoglu"><first>Özlem</first><last>Çetinoğlu</last></author>
       <author><first>Çağrı</first><last>Çöltekin</last></author>
       <pages>120–130</pages>
       <url hash="5838816a">W16-1714</url>
@@ -2816,9 +2816,9 @@
     <paper id="16">
       <title>Conversion from <fixed-case>P</fixed-case>aninian Karakas to <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies for <fixed-case>H</fixed-case>indi <fixed-case>D</fixed-case>ependency <fixed-case>T</fixed-case>reebank</title>
       <author><first>Juhi</first><last>Tandon</last></author>
-      <author><first>Himani</first><last>Chaudhary</last></author>
-      <author><first>Riyaz Ahmad</first><last>Bhat</last></author>
-      <author><first>Dipti Misra</first><last>Sharma</last></author>
+      <author id="himani-chaudhry"><first>Himani</first><last>Chaudhary</last></author>
+      <author id="riyaz-ahmad-bhat"><first>Riyaz Ahmad</first><last>Bhat</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></author>
       <pages>141–150</pages>
       <url hash="8b92ac15">W16-1716</url>
       <doi>10.18653/v1/W16-1716</doi>
@@ -2826,7 +2826,7 @@
     </paper>
     <paper id="17">
       <title>Phrase Generalization: a Corpus Study in Multi-Document Abstracts and Original News Alignments</title>
-      <author><first>Ariani</first><last>Di-Felippo</last></author>
+      <author id="ariani-di-felippo"><first>Ariani</first><last>Di-Felippo</last></author>
       <author><first>Ani</first><last>Nenkova</last></author>
       <pages>151–159</pages>
       <url hash="e8c14da4">W16-1717</url>
@@ -2837,7 +2837,7 @@
       <title>Generating Disambiguating Paraphrases for Structurally Ambiguous Sentences</title>
       <author><first>Manjuan</first><last>Duan</last></author>
       <author><first>Ethan</first><last>Hill</last></author>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <pages>160–170</pages>
       <url hash="61be5a26">W16-1718</url>
       <doi>10.18653/v1/W16-1718</doi>
@@ -2866,7 +2866,7 @@
     <paper id="21">
       <title>Creating a Novel Geolocation Corpus from Historical Texts</title>
       <author><first>Grant</first><last>DeLozier</last></author>
-      <author><first>Ben</first><last>Wing</last></author>
+      <author id="benjamin-wing"><first>Ben</first><last>Wing</last></author>
       <author><first>Jason</first><last>Baldridge</last></author>
       <author><first>Scott</first><last>Nesbit</last></author>
       <pages>188–198</pages>
@@ -2882,8 +2882,8 @@
       <editor><first>Valia</first><last>Kordoni</last></editor>
       <editor><first>Kostadin</first><last>Cholakov</last></editor>
       <editor><first>Markus</first><last>Egg</last></editor>
-      <editor><first>Stella</first><last>Markantonatou</last></editor>
-      <editor><first>Preslav</first><last>Nakov</last></editor>
+      <editor id="stella-markantonatou"><first>Stella</first><last>Markantonatou</last></editor>
+      <editor id="preslav-nakov"><first>Preslav</first><last>Nakov</last></editor>
       <doi>10.18653/v1/W16-18</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Berlin, Germany</address>
@@ -2898,9 +2898,9 @@
     <paper id="1">
       <title>Learning Paraphrasing for Multiword Expressions</title>
       <author><first>Seid Muhie</first><last>Yimam</last></author>
-      <author><first>Héctor</first><last>Martínez Alonso</last></author>
+      <author id="hector-martinez-alonso"><first>Héctor</first><last>Martínez Alonso</last></author>
       <author><first>Martin</first><last>Riedl</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>1–10</pages>
       <url hash="05079bf2">W16-1801</url>
       <doi>10.18653/v1/W16-1801</doi>
@@ -2918,7 +2918,7 @@
     <paper id="3">
       <title>Lexical Variability and Compositionality: Investigating Idiomaticity with Distributional Semantic Models</title>
       <author><first>Marco Silvio Giuseppe</first><last>Senaldi</last></author>
-      <author><first>Gianluca E.</first><last>Lebani</last></author>
+      <author id="gianluca-e-lebani"><first>Gianluca E.</first><last>Lebani</last></author>
       <author><first>Alessandro</first><last>Lenci</last></author>
       <pages>21–31</pages>
       <url hash="85b8283b">W16-1803</url>
@@ -2928,7 +2928,7 @@
     <paper id="4">
       <title>Filtering and Measuring the Intrinsic Quality of Human Compositionality Judgments</title>
       <author><first>Carlos</first><last>Ramisch</last></author>
-      <author><first>Silvio</first><last>Cordeiro</last></author>
+      <author id="silvio-cordeiro"><first>Silvio</first><last>Cordeiro</last></author>
       <author><first>Aline</first><last>Villavicencio</last></author>
       <pages>32–37</pages>
       <url hash="dc474799">W16-1804</url>
@@ -2938,8 +2938,8 @@
     <paper id="5">
       <title>Graph-based Clustering of Synonym Senses for <fixed-case>G</fixed-case>erman Particle Verbs</title>
       <author><first>Moritz</first><last>Wittmann</last></author>
-      <author><first>Marion</first><last>Weller-Di Marco</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="marion-weller-di-marco"><first>Marion</first><last>Weller-Di Marco</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>38–43</pages>
       <url hash="8891d610">W16-1805</url>
       <doi>10.18653/v1/W16-1805</doi>
@@ -2948,7 +2948,7 @@
     <paper id="6">
       <title>Accounting ngrams and multi-word terms can improve topic models</title>
       <author><first>Michael</first><last>Nokel</last></author>
-      <author><first>Natalia</first><last>Loukachevitch</last></author>
+      <author id="natalia-loukachevitch"><first>Natalia</first><last>Loukachevitch</last></author>
       <pages>44–49</pages>
       <url hash="7985a84b">W16-1806</url>
       <doi>10.18653/v1/W16-1806</doi>
@@ -2957,8 +2957,8 @@
     <paper id="7">
       <title>Top a Splitter: Using Distributional Semantics for Improving Compound Splitting</title>
       <author><first>Patrick</first><last>Ziering</last></author>
-      <author><first>Stefan</first><last>Müller</last></author>
-      <author><first>Lonneke</first><last>van der Plas</last></author>
+      <author id="stefan-muller"><first>Stefan</first><last>Müller</last></author>
+      <author id="lonneke-van-der-plas"><first>Lonneke</first><last>van der Plas</last></author>
       <pages>50–55</pages>
       <url hash="02f1edbc">W16-1807</url>
       <doi>10.18653/v1/W16-1807</doi>
@@ -2976,7 +2976,7 @@
     <paper id="9">
       <title>Modeling the Non-Substitutability of Multiword Expressions with Distributional Semantics and a Log-Linear Model</title>
       <author><first>Meghdad</first><last>Farahmand</last></author>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <pages>61–66</pages>
       <url hash="a459cd26">W16-1809</url>
       <doi>10.18653/v1/W16-1809</doi>
@@ -2993,8 +2993,8 @@
     </paper>
     <paper id="11">
       <title>Representing Support Verbs in <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et</title>
-      <author><first>Miriam R. L.</first><last>Petruck</last></author>
-      <author><first>Michael</first><last>Ellsworth</last></author>
+      <author id="miriam-r-l-petruck"><first>Miriam R. L.</first><last>Petruck</last></author>
+      <author id="michael-ellsworth"><first>Michael</first><last>Ellsworth</last></author>
       <pages>72–77</pages>
       <url hash="06e6c753">W16-1811</url>
       <doi>10.18653/v1/W16-1811</doi>
@@ -3002,9 +3002,9 @@
     </paper>
     <paper id="12">
       <title>Inherently Pronominal Verbs in <fixed-case>C</fixed-case>zech: Description and Conversion Based on Treebank Annotation</title>
-      <author><first>Zdeňka</first><last>Urešová</last></author>
+      <author id="zdenka-uresova"><first>Zdeňka</first><last>Urešová</last></author>
       <author><first>Eduard</first><last>Bejček</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
       <pages>78–83</pages>
       <url hash="6c14386d">W16-1812</url>
       <doi>10.18653/v1/W16-1812</doi>
@@ -3020,9 +3020,9 @@
     </paper>
     <paper id="14">
       <title>A study on the production of collocations by <fixed-case>E</fixed-case>uropean <fixed-case>P</fixed-case>ortuguese learners</title>
-      <author><first>Ângela</first><last>Costa</last></author>
-      <author><first>Luísa</first><last>Coheur</last></author>
-      <author><first>Teresa</first><last>Lino</last></author>
+      <author id="angela-costa"><first>Ângela</first><last>Costa</last></author>
+      <author id="luisa-coheur"><first>Luísa</first><last>Coheur</last></author>
+      <author id="maria-teresa-lino"><first>Teresa</first><last>Lino</last></author>
       <pages>91–95</pages>
       <url hash="3a1a4949">W16-1814</url>
       <doi>10.18653/v1/W16-1814</doi>
@@ -3039,7 +3039,7 @@
     <paper id="16">
       <title>Impact of <fixed-case>MWE</fixed-case> Resources on Multiword Recognition</title>
       <author><first>Martin</first><last>Riedl</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>107–111</pages>
       <url hash="a4464d41">W16-1816</url>
       <doi>10.18653/v1/W16-1816</doi>
@@ -3048,7 +3048,7 @@
     <paper id="17">
       <title>A Word Embedding Approach to Identifying Verb-Noun Idiomatic Combinations</title>
       <author><first>Waseem</first><last>Gharbieh</last></author>
-      <author><first>Virendra</first><last>Bhavsar</last></author>
+      <author id="virendrakumar-bhavsar"><first>Virendra</first><last>Bhavsar</last></author>
       <author><first>Paul</first><last>Cook</last></author>
       <pages>112–118</pages>
       <url hash="1785eda2">W16-1817</url>
@@ -3090,7 +3090,7 @@
       <author><first>Laura</first><last>Hernández-Domínguez</last></author>
       <author><first>Edgar</first><last>García-Cano</last></author>
       <author><first>Sylvie</first><last>Ratté</last></author>
-      <author><first>Gerardo</first><last>Sierra-Martínez</last></author>
+      <author id="gerardo-sierra"><first>Gerardo</first><last>Sierra-Martínez</last></author>
       <pages>10–15</pages>
       <url hash="26a778de">W16-1902</url>
       <doi>10.18653/v1/W16-1902</doi>
@@ -3111,7 +3111,7 @@
       <title>Leveraging Annotators’ Gaze Behaviour for Coreference Resolution</title>
       <author><first>Joe</first><last>Cheri</last></author>
       <author><first>Abhijit</first><last>Mishra</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>22–26</pages>
       <url hash="f363d235">W16-1904</url>
       <doi>10.18653/v1/W16-1904</doi>
@@ -3138,9 +3138,9 @@
     </paper>
     <paper id="7">
       <title>Longitudinal Studies of Variation Sets in Child-directed Speech</title>
-      <author><first>Mats</first><last>Wirén</last></author>
-      <author><first>Kristina</first><last>Nilsson Björkenstam</last></author>
-      <author><first>Gintarė</first><last>Grigonytė</last></author>
+      <author id="mats-wiren"><first>Mats</first><last>Wirén</last></author>
+      <author id="kristina-nilsson-bjorkenstam"><first>Kristina</first><last>Nilsson Björkenstam</last></author>
+      <author id="gintare-grigonyte"><first>Gintarė</first><last>Grigonytė</last></author>
       <author><first>Elisabet Eir</first><last>Cortes</last></author>
       <pages>44–52</pages>
       <url hash="863fd01b">W16-1907</url>
@@ -3151,7 +3151,7 @@
       <title>Learning Phone Embeddings for Word Segmentation of Child-Directed Speech</title>
       <author><first>Jianqiang</first><last>Ma</last></author>
       <author><first>Çağrı</first><last>Çöltekin</last></author>
-      <author><first>Erhard</first><last>Hinrichs</last></author>
+      <author id="erhard-hinrichs"><first>Erhard</first><last>Hinrichs</last></author>
       <pages>53–63</pages>
       <url hash="ff2240a5">W16-1908</url>
       <doi>10.18653/v1/W16-1908</doi>
@@ -3178,8 +3178,8 @@
     </paper>
     <paper id="11">
       <title>Modelling the informativeness and timing of non-verbal cues in parent-child interaction</title>
-      <author><first>Kristina</first><last>Nilsson Björkenstam</last></author>
-      <author><first>Mats</first><last>Wirén</last></author>
+      <author id="kristina-nilsson-bjorkenstam"><first>Kristina</first><last>Nilsson Björkenstam</last></author>
+      <author id="mats-wiren"><first>Mats</first><last>Wirén</last></author>
       <author><first>Robert</first><last>Östling</last></author>
       <pages>82–90</pages>
       <url hash="46376351">W16-1911</url>
@@ -3192,7 +3192,7 @@
       <booktitle>Proceedings of the 14th <fixed-case>SIGMORPHON</fixed-case> Workshop on Computational Research in Phonetics, Phonology, and Morphology</booktitle>
       <url hash="fc86503d">W16-20</url>
       <editor><first>Micha</first><last>Elsner</last></editor>
-      <editor><first>Sandra</first><last>Kuebler</last></editor>
+      <editor id="sandra-kubler"><first>Sandra</first><last>Kuebler</last></editor>
       <doi>10.18653/v1/W16-20</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Berlin, Germany</address>
@@ -3218,7 +3218,7 @@
       <author><first>Christo</first><last>Kirov</last></author>
       <author><first>John</first><last>Sylak-Glassman</last></author>
       <author><first>David</first><last>Yarowsky</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <author><first>Mans</first><last>Hulden</last></author>
       <pages>10–22</pages>
       <url hash="57774fe4">W16-2002</url>
@@ -3235,7 +3235,7 @@
     </paper>
     <paper id="4">
       <title><fixed-case>EHU</fixed-case> at the <fixed-case>SIGMORPHON</fixed-case> 2016 Shared Task. A Simple Proposal: Grapheme-to-Phoneme for Inflection</title>
-      <author><first>Iñaki</first><last>Alegria</last></author>
+      <author id="inaki-alegria"><first>Iñaki</first><last>Alegria</last></author>
       <author><first>Izaskun</first><last>Etxeberria</last></author>
       <pages>27–30</pages>
       <url hash="f797af1a">W16-2004</url>
@@ -3256,7 +3256,7 @@
     <paper id="6">
       <title>Morphological reinflection with conditional random fields and unsupervised features</title>
       <author><first>Ling</first><last>Liu</last></author>
-      <author><first>Lingshuang Jack</first><last>Mao</last></author>
+      <author id="lingshuang-jack-mao"><first>Lingshuang Jack</first><last>Mao</last></author>
       <pages>36–40</pages>
       <url hash="a734fc6d">W16-2006</url>
       <doi>10.18653/v1/W16-2006</doi>
@@ -3274,7 +3274,7 @@
     </paper>
     <paper id="8">
       <title>Evaluating Sequence Alignment for Learning Inflectional Morphology</title>
-      <author><first>David</first><last>King</last></author>
+      <author id="david-king"><first>David</first><last>King</last></author>
       <pages>49–53</pages>
       <url hash="523af288">W16-2008</url>
       <doi>10.18653/v1/W16-2008</doi>
@@ -3290,8 +3290,8 @@
     </paper>
     <paper id="10">
       <title><fixed-case>MED</fixed-case>: The <fixed-case>LMU</fixed-case> System for the <fixed-case>SIGMORPHON</fixed-case> 2016 Shared Task on Morphological Reinflection</title>
-      <author><first>Katharina</first><last>Kann</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>62–70</pages>
       <url hash="1561ea52">W16-2010</url>
       <doi>10.18653/v1/W16-2010</doi>
@@ -3302,7 +3302,7 @@
       <author><first>Dima</first><last>Taji</last></author>
       <author><first>Ramy</first><last>Eskander</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>71–75</pages>
       <url hash="f2eed862">W16-2011</url>
       <doi>10.18653/v1/W16-2011</doi>
@@ -3312,7 +3312,7 @@
       <title>Letter Sequence Labeling for Compound Splitting</title>
       <author><first>Jianqiang</first><last>Ma</last></author>
       <author><first>Verena</first><last>Henrich</last></author>
-      <author><first>Erhard</first><last>Hinrichs</last></author>
+      <author id="erhard-hinrichs"><first>Erhard</first><last>Hinrichs</last></author>
       <pages>76–81</pages>
       <url hash="9710383f">W16-2012</url>
       <doi>10.18653/v1/W16-2012</doi>
@@ -3341,7 +3341,7 @@
       <author><first>Max</first><last>Kisselew</last></author>
       <author><first>Laura</first><last>Rimell</last></author>
       <author><first>Alexis</first><last>Palmer</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <pages>93–98</pages>
       <url hash="616e0cca">W16-2015</url>
       <doi>10.18653/v1/W16-2015</doi>
@@ -3369,7 +3369,7 @@
     <paper id="18">
       <title>Towards robust cross-linguistic comparisons of phonological networks</title>
       <author><first>Philippa</first><last>Shoemark</last></author>
-      <author><first>Sharon</first><last>Goldwater</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
       <author><first>James</first><last>Kirby</last></author>
       <author><first>Rik</first><last>Sarkar</last></author>
       <pages>110–120</pages>
@@ -3410,8 +3410,8 @@
       <booktitle>Proceedings of the 10th <fixed-case>SIGHUM</fixed-case> Workshop on Language Technology for Cultural Heritage, Social Sciences, and Humanities</booktitle>
       <url hash="c23bb9f6">W16-21</url>
       <editor><first>Nils</first><last>Reiter</last></editor>
-      <editor><first>Beatrice</first><last>Alex</last></editor>
-      <editor><first>Kalliopi A.</first><last>Zervanou</last></editor>
+      <editor id="beatrice-alex"><first>Beatrice</first><last>Alex</last></editor>
+      <editor id="kalliopi-zervanou"><first>Kalliopi A.</first><last>Zervanou</last></editor>
       <doi>10.18653/v1/W16-21</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Berlin, Germany</address>
@@ -3433,7 +3433,7 @@
     </paper>
     <paper id="2">
       <title>Analysis of Policy Agendas: Lessons Learned from Automatic Topic Classification of <fixed-case>C</fixed-case>roatian Political Texts</title>
-      <author><first>Vanja Mladen</first><last>Karan</last></author>
+      <author id="vanja-m-karan"><first>Vanja Mladen</first><last>Karan</last></author>
       <author><first>Jan</first><last>Šnajder</last></author>
       <author><first>Daniela</first><last>Širinić</last></author>
       <author><first>Goran</first><last>Glavaš</last></author>
@@ -3481,7 +3481,7 @@
     </paper>
     <paper id="7">
       <title>You Shall Know People by the Company They Keep: Person Name Disambiguation for Social Network Construction</title>
-      <author><first>Mariona</first><last>Coll Ardanuy</last></author>
+      <author id="mariona-coll-ardanuy"><first>Mariona</first><last>Coll Ardanuy</last></author>
       <author><first>Maarten</first><last>van den Bos</last></author>
       <author><first>Caroline</first><last>Sporleder</last></author>
       <pages>63–73</pages>
@@ -3518,8 +3518,8 @@
     <paper id="11">
       <title>How Do Cultural Differences Impact the Quality of Sarcasm Annotation?: A Case Study of <fixed-case>I</fixed-case>ndian Annotators and <fixed-case>A</fixed-case>merican Text</title>
       <author><first>Aditya</first><last>Joshi</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
-      <author><first>Mark</first><last>Carman</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="mark-carman"><first>Mark</first><last>Carman</last></author>
       <author><first>Jaya</first><last>Saraswati</last></author>
       <author><first>Rajita</first><last>Shukla</last></author>
       <pages>95–99</pages>
@@ -3530,7 +3530,7 @@
     <paper id="12">
       <title>Combining Phonology and Morphology for the Normalization of Historical Texts</title>
       <author><first>Izaskun</first><last>Etxeberria</last></author>
-      <author><first>Iñaki</first><last>Alegria</last></author>
+      <author id="inaki-alegria"><first>Iñaki</first><last>Alegria</last></author>
       <author><first>Larraitz</first><last>Uria</last></author>
       <author><first>Mans</first><last>Hulden</last></author>
       <pages>100–105</pages>
@@ -3540,8 +3540,8 @@
     </paper>
     <paper id="13">
       <title>Towards Building a Political Protest Database to Explain Changes in the Welfare State</title>
-      <author><first>Çağıl</first><last>Sönmez</last></author>
-      <author><first>Arzucan</first><last>Özgür</last></author>
+      <author id="cagil-sonmez"><first>Çağıl</first><last>Sönmez</last></author>
+      <author id="arzucan-ozgur"><first>Arzucan</first><last>Özgür</last></author>
       <author><first>Erdem</first><last>Yörük</last></author>
       <pages>106–110</pages>
       <url hash="3c2ea8f2">W16-2113</url>
@@ -3579,7 +3579,7 @@
       <title>Towards a text analysis system for political debates</title>
       <author><first>Dieu-Thu</first><last>Le</last></author>
       <author><first>Ngoc Thang</first><last>Vu</last></author>
-      <author><first>Andre</first><last>Blessing</last></author>
+      <author id="andre-blessing"><first>Andre</first><last>Blessing</last></author>
       <pages>134–139</pages>
       <url hash="f5071594">W16-2117</url>
       <doi>10.18653/v1/W16-2117</doi>
@@ -3624,25 +3624,25 @@
   <volume id="22" type="proceedings">
     <meta>
       <booktitle>Proceedings of the First Conference on Machine Translation: Volume 1, Research Papers</booktitle>
-      <editor><first>Ondřej</first><last>Bojar</last></editor>
+      <editor id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></editor>
       <editor><first>Christian</first><last>Buck</last></editor>
-      <editor><first>Rajen</first><last>Chatterjee</last></editor>
+      <editor id="rajen-chatterjee"><first>Rajen</first><last>Chatterjee</last></editor>
       <editor><first>Christian</first><last>Federmann</last></editor>
       <editor><first>Liane</first><last>Guillou</last></editor>
       <editor><first>Barry</first><last>Haddow</last></editor>
       <editor><first>Matthias</first><last>Huck</last></editor>
-      <editor><first>Antonio Jimeno</first><last>Yepes</last></editor>
-      <editor><first>Aurélie</first><last>Névéol</last></editor>
+      <editor id="antonio-jimeno-yepes"><first>Antonio Jimeno</first><last>Yepes</last></editor>
+      <editor id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></editor>
       <editor><first>Mariana</first><last>Neves</last></editor>
       <editor><first>Pavel</first><last>Pecina</last></editor>
       <editor><first>Martin</first><last>Popel</last></editor>
       <editor><first>Philipp</first><last>Koehn</last></editor>
       <editor><first>Christof</first><last>Monz</last></editor>
-      <editor><first>Matteo</first><last>Negri</last></editor>
+      <editor id="matteo-negri"><first>Matteo</first><last>Negri</last></editor>
       <editor><first>Matt</first><last>Post</last></editor>
       <editor><first>Lucia</first><last>Specia</last></editor>
-      <editor><first>Karin</first><last>Verspoor</last></editor>
-      <editor><first>Jörg</first><last>Tiedemann</last></editor>
+      <editor id="karin-verspoor"><first>Karin</first><last>Verspoor</last></editor>
+      <editor id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></editor>
       <editor><first>Marco</first><last>Turchi</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Berlin, Germany</address>
@@ -3659,7 +3659,7 @@
       <title>Cross-language Projection of Dependency Trees with Constrained Partial Parsing for Tree-to-Tree Machine Translation</title>
       <author><first>Yu</first><last>Shen</last></author>
       <author><first>Chenhui</first><last>Chu</last></author>
-      <author><first>Fabien</first><last>Cromieres</last></author>
+      <author id="fabien-cromieres"><first>Fabien</first><last>Cromieres</last></author>
       <author><first>Sadao</first><last>Kurohashi</last></author>
       <pages>1–11</pages>
       <url hash="cf35b0fe">W16-2201</url>
@@ -3668,8 +3668,8 @@
     </paper>
     <paper id="2">
       <title>Improving Pronoun Translation by Modeling Coreference Uncertainty</title>
-      <author><first>Ngoc Quang</first><last>Luong</last></author>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="ngoc-quang-luong"><first>Ngoc Quang</first><last>Luong</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <pages>12–20</pages>
       <url hash="c7098c03">W16-2202</url>
       <doi>10.18653/v1/W16-2202</doi>
@@ -3678,7 +3678,7 @@
     <paper id="3">
       <title>Modeling verbal inflection for <fixed-case>E</fixed-case>nglish to <fixed-case>G</fixed-case>erman <fixed-case>SMT</fixed-case></title>
       <author><first>Anita</first><last>Ramm</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>21–31</pages>
       <url hash="f84ee4fa">W16-2203</url>
       <doi>10.18653/v1/W16-2203</doi>
@@ -3686,7 +3686,7 @@
     </paper>
     <paper id="4">
       <title>Modeling Selectional Preferences of Verbs and Nouns in String-to-Tree Machine Translation</title>
-      <author><first>Maria</first><last>Nădejde</last></author>
+      <author id="maria-nadejde"><first>Maria</first><last>Nădejde</last></author>
       <author><first>Alexandra</first><last>Birch</last></author>
       <author><first>Philipp</first><last>Koehn</last></author>
       <pages>32–42</pages>
@@ -3696,9 +3696,9 @@
     </paper>
     <paper id="5">
       <title>Modeling Complement Types in Phrase-Based <fixed-case>SMT</fixed-case></title>
-      <author><first>Marion</first><last>Weller-Di Marco</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="marion-weller-di-marco"><first>Marion</first><last>Weller-Di Marco</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>43–53</pages>
       <url hash="a5f0f442">W16-2205</url>
       <doi>10.18653/v1/W16-2205</doi>
@@ -3711,7 +3711,7 @@
       <author><first>Jan-Thorsten</first><last>Peter</last></author>
       <author><first>Mohammed</first><last>Hethnawi</last></author>
       <author><first>Andreas</first><last>Guta</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>54–65</pages>
       <url hash="bc06e8dc">W16-2206</url>
       <doi>10.18653/v1/W16-2206</doi>
@@ -3732,7 +3732,7 @@
       <author><first>Jan</first><last>Niehues</last></author>
       <author><first>Thanh-Le</first><last>Ha</last></author>
       <author><first>Eunah</first><last>Cho</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>74–82</pages>
       <url hash="026c1137">W16-2208</url>
       <doi>10.18653/v1/W16-2208</doi>
@@ -3750,8 +3750,8 @@
     <paper id="10">
       <title>A Framework for Discriminative Rule Selection in Hierarchical <fixed-case>M</fixed-case>oses</title>
       <author><first>Fabienne</first><last>Braune</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <author><first>Aleš</first><last>Tamchyna</last></author>
       <pages>92–101</pages>
       <url hash="8c627042">W16-2210</url>
@@ -3772,7 +3772,7 @@
       <author><first>Yunsu</first><last>Kim</last></author>
       <author><first>Andreas</first><last>Guta</last></author>
       <author><first>Joern</first><last>Wuebker</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>110–117</pages>
       <url hash="7dc7bcf1">W16-2212</url>
       <doi>10.18653/v1/W16-2212</doi>
@@ -3783,7 +3783,7 @@
       <author><first>Joachim</first><last>Daiber</last></author>
       <author><first>Miloš</first><last>Stanojević</last></author>
       <author><first>Wilker</first><last>Aziz</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <pages>118–130</pages>
       <url hash="38959903">W16-2213</url>
       <doi>10.18653/v1/W16-2213</doi>
@@ -3793,25 +3793,25 @@
   <volume id="23" type="proceedings">
     <meta>
       <booktitle>Proceedings of the First Conference on Machine Translation: Volume 2, Shared Task Papers</booktitle>
-      <editor><first>Ondřej</first><last>Bojar</last></editor>
+      <editor id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></editor>
       <editor><first>Christian</first><last>Buck</last></editor>
-      <editor><first>Rajen</first><last>Chatterjee</last></editor>
+      <editor id="rajen-chatterjee"><first>Rajen</first><last>Chatterjee</last></editor>
       <editor><first>Christian</first><last>Federmann</last></editor>
       <editor><first>Liane</first><last>Guillou</last></editor>
       <editor><first>Barry</first><last>Haddow</last></editor>
       <editor><first>Matthias</first><last>Huck</last></editor>
-      <editor><first>Antonio Jimeno</first><last>Yepes</last></editor>
-      <editor><first>Aurélie</first><last>Névéol</last></editor>
+      <editor id="antonio-jimeno-yepes"><first>Antonio Jimeno</first><last>Yepes</last></editor>
+      <editor id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></editor>
       <editor><first>Mariana</first><last>Neves</last></editor>
       <editor><first>Pavel</first><last>Pecina</last></editor>
       <editor><first>Martin</first><last>Popel</last></editor>
       <editor><first>Philipp</first><last>Koehn</last></editor>
       <editor><first>Christof</first><last>Monz</last></editor>
-      <editor><first>Matteo</first><last>Negri</last></editor>
+      <editor id="matteo-negri"><first>Matteo</first><last>Negri</last></editor>
       <editor><first>Matt</first><last>Post</last></editor>
       <editor><first>Lucia</first><last>Specia</last></editor>
-      <editor><first>Karin</first><last>Verspoor</last></editor>
-      <editor><first>Jörg</first><last>Tiedemann</last></editor>
+      <editor id="karin-verspoor"><first>Karin</first><last>Verspoor</last></editor>
+      <editor id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></editor>
       <editor><first>Marco</first><last>Turchi</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Berlin, Germany</address>
@@ -3840,8 +3840,8 @@
       <author><first>Mariana</first><last>Neves</last></author>
       <author><first>Martin</first><last>Popel</last></author>
       <author><first>Matt</first><last>Post</last></author>
-      <author><first>Raphael</first><last>Rubino</last></author>
-      <author><first>Carolina</first><last>Scarton</last></author>
+      <author id="raphael-rubino"><first>Raphael</first><last>Rubino</last></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <author><first>Karin</first><last>Verspoor</last></author>
@@ -3891,9 +3891,9 @@
     <paper id="5">
       <title><fixed-case>TÜBİTAK</fixed-case> <fixed-case>SMT</fixed-case> System Submission for <fixed-case>WMT</fixed-case>2016</title>
       <author><first>Emre</first><last>Bektaş</last></author>
-      <author><first>Ertuğrul</first><last>Yilmaz</last></author>
+      <author id="ertugrul-yilmaz"><first>Ertuğrul</first><last>Yilmaz</last></author>
       <author><first>Coşkun</first><last>Mermer</last></author>
-      <author><first>İlknur</first><last>Durgar El-Kahlout</last></author>
+      <author id="ilknur-durgar-el-kahlout"><first>İlknur</first><last>Durgar El-Kahlout</last></author>
       <pages>246–251</pages>
       <url hash="b5c16eb1">W16-2305</url>
       <doi>10.18653/v1/W16-2305</doi>
@@ -3901,7 +3901,7 @@
     </paper>
     <paper id="6">
       <title><fixed-case>P</fixed-case>ar<fixed-case>FDA</fixed-case> for Instance Selection for Statistical Machine Translation</title>
-      <author><first>Ergun</first><last>Biçici</last></author>
+      <author id="ergun-bicici"><first>Ergun</first><last>Biçici</last></author>
       <pages>252–258</pages>
       <url hash="a780ae7a">W16-2306</url>
       <doi>10.18653/v1/W16-2306</doi>
@@ -3911,7 +3911,7 @@
     </paper>
     <paper id="7">
       <title><fixed-case>S</fixed-case>heffield Systems for the <fixed-case>E</fixed-case>nglish-<fixed-case>R</fixed-case>omanian <fixed-case>WMT</fixed-case> Translation Task</title>
-      <author><first>Frédéric</first><last>Blain</last></author>
+      <author id="frederic-blain"><first>Frédéric</first><last>Blain</last></author>
       <author><first>Xingyi</first><last>Song</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>259–263</pages>
@@ -3973,7 +3973,7 @@
     <paper id="13">
       <title>The <fixed-case>AFRL</fixed-case>-<fixed-case>MITLL</fixed-case> <fixed-case>WMT</fixed-case>16 News-Translation Task Systems</title>
       <author><first>Jeremy</first><last>Gwinnup</last></author>
-      <author><first>Tim</first><last>Anderson</last></author>
+      <author id="tim-anderson"><first>Tim</first><last>Anderson</last></author>
       <author><first>Grant</first><last>Erdmann</last></author>
       <author><first>Katherine</first><last>Young</last></author>
       <author><first>Michaeel</first><last>Kazi</last></author>
@@ -3992,7 +3992,7 @@
       <author><first>Mohammed</first><last>Mediani</last></author>
       <author><first>Matthias</first><last>Sperber</last></author>
       <author><first>Alexandre</first><last>Allauzen</last></author>
-      <author><first>Alexander</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alexander</first><last>Waibel</last></author>
       <pages>303–310</pages>
       <url hash="c3c6c57e">W16-2314</url>
       <doi>10.18653/v1/W16-2314</doi>
@@ -4001,7 +4001,7 @@
     <paper id="15">
       <title>The <fixed-case>E</fixed-case>dinburgh/<fixed-case>LMU</fixed-case> Hierarchical Machine Translation System for <fixed-case>WMT</fixed-case> 2016</title>
       <author><first>Matthias</first><last>Huck</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <author><first>Barry</first><last>Haddow</last></author>
       <pages>311–318</pages>
       <url hash="46c67c9f">W16-2315</url>
@@ -4053,25 +4053,25 @@
       <title>The <fixed-case>QT</fixed-case>21/<fixed-case>H</fixed-case>im<fixed-case>L</fixed-case> Combined Machine Translation System</title>
       <author><first>Jan-Thorsten</first><last>Peter</last></author>
       <author><first>Tamer</first><last>Alkhouli</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <author><first>Matthias</first><last>Huck</last></author>
       <author><first>Fabienne</first><last>Braune</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <author><first>Aleš</first><last>Tamchyna</last></author>
       <author><first>Ondřej</first><last>Bojar</last></author>
       <author><first>Barry</first><last>Haddow</last></author>
       <author><first>Rico</first><last>Sennrich</last></author>
-      <author><first>Frédéric</first><last>Blain</last></author>
+      <author id="frederic-blain"><first>Frédéric</first><last>Blain</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <author><first>Alexandre</first><last>Allauzen</last></author>
       <author><first>Lauriane</first><last>Aufrant</last></author>
       <author><first>Franck</first><last>Burlot</last></author>
       <author><first>Elena</first><last>Knyazeva</last></author>
       <author><first>Thomas</first><last>Lavergne</last></author>
       <author><first>François</first><last>Yvon</last></author>
-      <author><first>Mārcis</first><last>Pinnis</last></author>
+      <author id="marcis-pinnis"><first>Mārcis</first><last>Pinnis</last></author>
       <author><first>Stella</first><last>Frank</last></author>
       <pages>344–355</pages>
       <url hash="01402526">W16-2320</url>
@@ -4083,7 +4083,7 @@
       <author><first>Jan-Thorsten</first><last>Peter</last></author>
       <author><first>Tamer</first><last>Alkhouli</last></author>
       <author><first>Andreas</first><last>Guta</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>356–361</pages>
       <url hash="6ce7c40d">W16-2321</url>
       <doi>10.18653/v1/W16-2321</doi>
@@ -4091,7 +4091,7 @@
     </paper>
     <paper id="22">
       <title><fixed-case>A</fixed-case>bu-<fixed-case>M</fixed-case>a<fixed-case>T</fixed-case>ran at <fixed-case>WMT</fixed-case> 2016 Translation Task: Deep Learning, Morphological Segmentation and Tuning on Character Sequences</title>
-      <author><first>Víctor M.</first><last>Sánchez-Cartagena</last></author>
+      <author id="victor-m-sanchez-cartagena"><first>Víctor M.</first><last>Sánchez-Cartagena</last></author>
       <author><first>Antonio</first><last>Toral</last></author>
       <pages>362–370</pages>
       <url hash="c415e119">W16-2322</url>
@@ -4123,7 +4123,7 @@
       <author><first>Aleš</first><last>Tamchyna</last></author>
       <author><first>Roman</first><last>Sudarikov</last></author>
       <author><first>Ondřej</first><last>Bojar</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>385–390</pages>
       <url hash="c8676507">W16-2325</url>
       <doi>10.18653/v1/W16-2325</doi>
@@ -4137,7 +4137,7 @@
       <author><first>Filip</first><last>Ginter</last></author>
       <author><first>Sara</first><last>Stymne</last></author>
       <author><first>Robert</first><last>Östling</last></author>
-      <author><first>Marion</first><last>Weller-Di Marco</last></author>
+      <author id="marion-weller-di-marco"><first>Marion</first><last>Weller-Di Marco</last></author>
       <pages>391–398</pages>
       <url hash="ddd63531">W16-2326</url>
       <doi>10.18653/v1/W16-2326</doi>
@@ -4147,7 +4147,7 @@
       <title><fixed-case>E</fixed-case>dinburgh’s Statistical Machine Translation Systems for <fixed-case>WMT</fixed-case>16</title>
       <author><first>Philip</first><last>Williams</last></author>
       <author><first>Rico</first><last>Sennrich</last></author>
-      <author><first>Maria</first><last>Nădejde</last></author>
+      <author id="maria-nadejde"><first>Maria</first><last>Nădejde</last></author>
       <author><first>Matthias</first><last>Huck</last></author>
       <author><first>Barry</first><last>Haddow</last></author>
       <author><first>Ondřej</first><last>Bojar</last></author>
@@ -4170,7 +4170,7 @@
       <author><first>Eleftherios</first><last>Avramidis</last></author>
       <author><first>Aljoscha</first><last>Burchardt</last></author>
       <author><first>Vivien</first><last>Macketanz</last></author>
-      <author><first>Ankit</first><last>Srivastava</last></author>
+      <author id="ankit-srivastava"><first>Ankit</first><last>Srivastava</last></author>
       <pages>415–422</pages>
       <url hash="cc43eb34">W16-2329</url>
       <doi>10.18653/v1/W16-2329</doi>
@@ -4180,7 +4180,7 @@
       <title><fixed-case>ILLC</fixed-case>-<fixed-case>U</fixed-case>v<fixed-case>A</fixed-case> Adaptation System (Scorpio) at <fixed-case>WMT</fixed-case>’16 <fixed-case>IT</fixed-case>-<fixed-case>DOMAIN</fixed-case> Task</title>
       <author><first>Hoang</first><last>Cuong</last></author>
       <author><first>Stella</first><last>Frank</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <pages>423–427</pages>
       <url hash="29e90868">W16-2330</url>
       <doi>10.18653/v1/W16-2330</doi>
@@ -4197,21 +4197,21 @@
     </paper>
     <paper id="32">
       <title><fixed-case>SMT</fixed-case> and Hybrid systems of the <fixed-case>QTL</fixed-case>eap project in the <fixed-case>WMT</fixed-case>16 <fixed-case>IT</fixed-case>-task</title>
-      <author><first>Rosa</first><last>Gaudio</last></author>
-      <author><first>Gorka</first><last>Labaka</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="rosa-del-gaudio"><first>Rosa</first><last>Gaudio</last></author>
+      <author id="gorka-labaka"><first>Gorka</first><last>Labaka</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <author><first>Petya</first><last>Osenova</last></author>
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <author><first>Martin</first><last>Popel</last></author>
       <author><first>Dieke</first><last>Oele</last></author>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <author><first>Luís</first><last>Gomes</last></author>
-      <author><first>João</first><last>António Rodrigues</last></author>
+      <author id="joao-rodrigues"><first>João</first><last>António Rodrigues</last></author>
       <author><first>Steven</first><last>Neale</last></author>
-      <author><first>João</first><last>Silva</last></author>
+      <author id="joao-silva"><first>João</first><last>Silva</last></author>
       <author><first>Andreia</first><last>Querido</last></author>
       <author><first>Nuno</first><last>Rendeiro</last></author>
-      <author><first>António</first><last>Branco</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
       <pages>435–441</pages>
       <url hash="c9289d8a">W16-2332</url>
       <doi>10.18653/v1/W16-2332</doi>
@@ -4222,9 +4222,9 @@
       <author><first>Koushik</first><last>Pahari</last></author>
       <author><first>Alapan</first><last>Kuila</last></author>
       <author><first>Santanu</first><last>Pal</last></author>
-      <author><first>Sudip Kumar</first><last>Naskar</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>442–448</pages>
       <url hash="464deeb7">W16-2333</url>
       <doi>10.18653/v1/W16-2333</doi>
@@ -4245,7 +4245,7 @@
     <paper id="35">
       <title><fixed-case>E</fixed-case>nglish-<fixed-case>P</fixed-case>ortuguese Biomedical Translation Task Using a Genuine Phrase-Based Statistical Machine Translation Approach</title>
       <author><first>José</first><last>Aires</last></author>
-      <author><first>Gabriel</first><last>Lopes</last></author>
+      <author id="gabriel-lopes"><first>Gabriel</first><last>Lopes</last></author>
       <author><first>Luís</first><last>Gomes</last></author>
       <pages>456–462</pages>
       <url hash="dd56352f">W16-2335</url>
@@ -4254,11 +4254,11 @@
     </paper>
     <paper id="36">
       <title>The <fixed-case>TALP</fixed-case>–<fixed-case>UPC</fixed-case> <fixed-case>S</fixed-case>panish–<fixed-case>E</fixed-case>nglish <fixed-case>WMT</fixed-case> Biomedical Task: Bilingual Embeddings and Char-based Neural Language Model Rescoring in a Phrase-based System</title>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
       <author><first>Cristina</first><last>España-Bonet</last></author>
-      <author><first>Pranava</first><last>Madhyastha</last></author>
+      <author id="pranava-swaroop-madhyastha"><first>Pranava</first><last>Madhyastha</last></author>
       <author><first>Carlos</first><last>Escolano</last></author>
-      <author><first>José A. R.</first><last>Fonollosa</last></author>
+      <author id="jose-a-r-fonollosa"><first>José A. R.</first><last>Fonollosa</last></author>
       <pages>463–468</pages>
       <url hash="804ca6e2">W16-2336</url>
       <doi>10.18653/v1/W16-2336</doi>
@@ -4277,7 +4277,7 @@
     <paper id="38">
       <title><fixed-case>IXA</fixed-case> Biomedical Translation System at <fixed-case>WMT</fixed-case>16 Biomedical Translation Task</title>
       <author><first>Olatz</first><last>Perez-de-Viñaspre</last></author>
-      <author><first>Gorka</first><last>Labaka</last></author>
+      <author id="gorka-labaka"><first>Gorka</first><last>Labaka</last></author>
       <pages>477–482</pages>
       <url hash="4699e644">W16-2338</url>
       <doi>10.18653/v1/W16-2338</doi>
@@ -4286,9 +4286,9 @@
     <paper id="39">
       <title><fixed-case>C</fixed-case>obalt<fixed-case>F</fixed-case>: A Fluent Metric for <fixed-case>MT</fixed-case> Evaluation</title>
       <author><first>Marina</first><last>Fomicheva</last></author>
-      <author><first>Núria</first><last>Bel</last></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
-      <author><first>Iria</first><last>da Cunha</last></author>
+      <author id="iria-da-cunha"><first>Iria</first><last>da Cunha</last></author>
       <author><first>Anton</first><last>Malinovskiy</last></author>
       <pages>483–490</pages>
       <url hash="e7d47af8">W16-2339</url>
@@ -4306,7 +4306,7 @@
     </paper>
     <paper id="41">
       <title>chr<fixed-case>F</fixed-case> deconstructed: beta parameters and n-gram weights</title>
-      <author><first>Maja</first><last>Popović</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
       <pages>499–504</pages>
       <url hash="9527a9c4">W16-2341</url>
       <doi>10.18653/v1/W16-2341</doi>
@@ -4317,7 +4317,7 @@
       <author><first>Weiyue</first><last>Wang</last></author>
       <author><first>Jan-Thorsten</first><last>Peter</last></author>
       <author><first>Hendrik</first><last>Rosendahl</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>505–510</pages>
       <url hash="cb1563c4">W16-2342</url>
       <doi>10.18653/v1/W16-2342</doi>
@@ -4332,7 +4332,7 @@
       <author><first>Zhiming</first><last>Chen</last></author>
       <author><first>Yiming</first><last>Tan</last></author>
       <author><first>Maoxi</first><last>Li</last></author>
-      <author><first>Mingwen</first><last>Wang</last></author>
+      <author id="mingwen-wang"><first>Mingwen</first><last>Wang</last></author>
       <pages>511–517</pages>
       <url hash="9f75b4e7">W16-2343</url>
       <doi>10.18653/v1/W16-2343</doi>
@@ -4351,13 +4351,13 @@
       <title>Findings of the 2016 <fixed-case>WMT</fixed-case> Shared Task on Cross-lingual Pronoun Prediction</title>
       <author><first>Liane</first><last>Guillou</last></author>
       <author><first>Christian</first><last>Hardmeier</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Sara</first><last>Stymne</last></author>
       <author><first>Jörg</first><last>Tiedemann</last></author>
       <author><first>Yannick</first><last>Versley</last></author>
       <author><first>Mauro</first><last>Cettolo</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <pages>525–542</pages>
       <url hash="a2e6e680">W16-2345</url>
       <doi>10.18653/v1/W16-2345</doi>
@@ -4367,7 +4367,7 @@
       <title>A Shared Task on Multimodal Machine Translation and Crosslingual Image Description</title>
       <author><first>Lucia</first><last>Specia</last></author>
       <author><first>Stella</first><last>Frank</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <author><first>Desmond</first><last>Elliott</last></author>
       <pages>543–553</pages>
       <url hash="32e923e2">W16-2346</url>
@@ -4395,7 +4395,7 @@
       <title>The <fixed-case>K</fixed-case>yoto <fixed-case>U</fixed-case>niversity Cross-Lingual Pronoun Translation System</title>
       <author><first>Raj</first><last>Dabre</last></author>
       <author><first>Yevgeniy</first><last>Puzikov</last></author>
-      <author><first>Fabien</first><last>Cromieres</last></author>
+      <author id="fabien-cromieres"><first>Fabien</first><last>Cromieres</last></author>
       <author><first>Sadao</first><last>Kurohashi</last></author>
       <pages>571–575</pages>
       <url hash="951e9661">W16-2349</url>
@@ -4422,8 +4422,8 @@
     </paper>
     <paper id="52">
       <title>Pronoun Language Model and Grammatical Heuristics for Aiding Pronoun Prediction</title>
-      <author><first>Ngoc Quang</first><last>Luong</last></author>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="ngoc-quang-luong"><first>Ngoc Quang</first><last>Luong</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <pages>589–595</pages>
       <url hash="ee11b73d">W16-2352</url>
       <doi>10.18653/v1/W16-2352</doi>
@@ -4477,7 +4477,7 @@
       <author><first>Walid</first><last>Aransa</last></author>
       <author><first>Yaxing</first><last>Wang</last></author>
       <author><first>Marc</first><last>Masana</last></author>
-      <author><first>Mercedes</first><last>García-Martínez</last></author>
+      <author id="mercedes-garcia-martinez"><first>Mercedes</first><last>García-Martínez</last></author>
       <author><first>Fethi</first><last>Bougares</last></author>
       <author><first>Loïc</first><last>Barrault</last></author>
       <author><first>Joost</first><last>van de Weijer</last></author>
@@ -4502,7 +4502,7 @@
       <author><first>Frederick</first><last>Liu</last></author>
       <author><first>Sz-Rung</first><last>Shiang</last></author>
       <author><first>Jean</first><last>Oh</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <pages>639–645</pages>
       <url hash="057d7722">W16-2360</url>
       <doi>10.18653/v1/W16-2360</doi>
@@ -4511,7 +4511,7 @@
     <paper id="61">
       <title><fixed-case>CUNI</fixed-case> System for <fixed-case>WMT</fixed-case>16 Automatic Post-Editing and Multimodal Translation Tasks</title>
       <author><first>Jindřich</first><last>Libovický</last></author>
-      <author><first>Jindřich</first><last>Helcl</last></author>
+      <author id="jindrich-helcl"><first>Jindřich</first><last>Helcl</last></author>
       <author><first>Marek</first><last>Tlustý</last></author>
       <author><first>Ondřej</first><last>Bojar</last></author>
       <author><first>Pavel</first><last>Pecina</last></author>
@@ -4523,7 +4523,7 @@
     <paper id="62">
       <title><fixed-case>WMT</fixed-case> 2016 Multimodal Translation System Description based on Bidirectional Recurrent Neural Networks with Double-Embeddings</title>
       <author><first>Sergio</first><last>Rodríguez Guasch</last></author>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
       <pages>655–659</pages>
       <url hash="1f1722cd">W16-2362</url>
       <doi>10.18653/v1/W16-2362</doi>
@@ -4559,7 +4559,7 @@
     </paper>
     <paper id="66">
       <title><fixed-case>YODA</fixed-case> System for <fixed-case>WMT</fixed-case>16 Shared Task: Bilingual Document Alignment</title>
-      <author><first>Aswarth Abhilash</first><last>Dara</last></author>
+      <author id="aswarth-abhilash-dara"><first>Aswarth Abhilash</first><last>Dara</last></author>
       <author><first>Yiu-Chang</first><last>Lin</last></author>
       <pages>679–684</pages>
       <url hash="65dfeb43">W16-2366</url>
@@ -4568,9 +4568,9 @@
     </paper>
     <paper id="67">
       <title>Bitextor’s participation in <fixed-case>WMT</fixed-case>’16: shared task on document alignment</title>
-      <author><first>Miquel</first><last>Esplà-Gomis</last></author>
-      <author><first>Mikel</first><last>Forcada</last></author>
-      <author><first>Sergio</first><last>Ortiz-Rojas</last></author>
+      <author id="miquel-espla-gomis"><first>Miquel</first><last>Esplà-Gomis</last></author>
+      <author id="mikel-l-forcada"><first>Mikel</first><last>Forcada</last></author>
+      <author id="sergio-ortiz-rojas"><first>Sergio</first><last>Ortiz-Rojas</last></author>
       <author><first>Jorge</first><last>Ferrández-Tordera</last></author>
       <pages>685–691</pages>
       <url hash="921d12a3">W16-2367</url>
@@ -4588,7 +4588,7 @@
     <paper id="69">
       <title>First Steps Towards Coverage-Based Document Alignment</title>
       <author><first>Luís</first><last>Gomes</last></author>
-      <author><first>Gabriel</first><last>Pereira Lopes</last></author>
+      <author id="gabriel-lopes"><first>Gabriel</first><last>Pereira Lopes</last></author>
       <pages>697–702</pages>
       <url hash="1c37937e">W16-2369</url>
       <doi>10.18653/v1/W16-2369</doi>
@@ -4597,7 +4597,7 @@
     <paper id="70">
       <title><fixed-case>BAD</fixed-case> <fixed-case>LUC</fixed-case>@<fixed-case>WMT</fixed-case> 2016: a Bilingual Document Alignment Platform Based on Lucene</title>
       <author><first>Laurent</first><last>Jakubina</last></author>
-      <author><first>Phillippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Phillippe</first><last>Langlais</last></author>
       <pages>703–709</pages>
       <url hash="5345b92d">W16-2370</url>
       <doi>10.18653/v1/W16-2370</doi>
@@ -4607,7 +4607,7 @@
       <title>Using Term Position Similarity and Language Modeling for Bilingual Document Alignment</title>
       <author><first>Thanh C.</first><last>Le</last></author>
       <author><first>Hoa Trong</first><last>Vu</last></author>
-      <author><first>Jonathan</first><last>Oberländer</last></author>
+      <author id="jon-oberlander"><first>Jonathan</first><last>Oberländer</last></author>
       <author><first>Ondřej</first><last>Bojar</last></author>
       <pages>710–716</pages>
       <url hash="0963b148">W16-2371</url>
@@ -4627,7 +4627,7 @@
     </paper>
     <paper id="73">
       <title><fixed-case>WMT</fixed-case>2016: A Hybrid Approach to Bilingual Document Alignment</title>
-      <author><first>Sainik</first><last>Mahata</last></author>
+      <author id="sainik-mahata"><first>Sainik</first><last>Mahata</last></author>
       <author><first>Dipankar</first><last>Das</last></author>
       <author><first>Santanu</first><last>Pal</last></author>
       <pages>724–727</pages>
@@ -4639,7 +4639,7 @@
       <title><fixed-case>E</fixed-case>nglish-<fixed-case>F</fixed-case>rench Document Alignment Based on Keywords and Statistical Translation</title>
       <author><first>Marek</first><last>Medveď</last></author>
       <author><first>Miloš</first><last>Jakubíček</last></author>
-      <author><first>Vojtech</first><last>Kovář</last></author>
+      <author id="vojtech-kovar"><first>Vojtech</first><last>Kovář</last></author>
       <pages>728–732</pages>
       <url hash="5d7d3a87">W16-2374</url>
       <doi>10.18653/v1/W16-2374</doi>
@@ -4649,7 +4649,7 @@
       <title>The <fixed-case>ILSP</fixed-case>/<fixed-case>ARC</fixed-case> submission to the <fixed-case>WMT</fixed-case> 2016 Bilingual Document Alignment Shared Task</title>
       <author><first>Vassilis</first><last>Papavassiliou</last></author>
       <author><first>Prokopis</first><last>Prokopidis</last></author>
-      <author><first>Stelios</first><last>Piperidis</last></author>
+      <author id="stelios-piperidis"><first>Stelios</first><last>Piperidis</last></author>
       <pages>733–739</pages>
       <url hash="a1a0d7eb">W16-2375</url>
       <doi>10.18653/v1/W16-2375</doi>
@@ -4668,7 +4668,7 @@
     <paper id="77">
       <title>The <fixed-case>FBK</fixed-case> Participation in the <fixed-case>WMT</fixed-case> 2016 Automatic Post-editing Shared Task</title>
       <author><first>Rajen</first><last>Chatterjee</last></author>
-      <author><first>José G.</first><last>C. de Souza</last></author>
+      <author id="jose-g-c-de-souza"><first>José G.</first><last>C. de Souza</last></author>
       <author><first>Matteo</first><last>Negri</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <pages>745–750</pages>
@@ -4689,7 +4689,7 @@
       <title><fixed-case>USAAR</fixed-case>: An Operation Sequential Model for Automatic Statistical Post-Editing</title>
       <author><first>Santanu</first><last>Pal</last></author>
       <author><first>Marcos</first><last>Zampieri</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>759–763</pages>
       <url hash="1b172b97">W16-2379</url>
       <doi>10.18653/v1/W16-2379</doi>
@@ -4699,7 +4699,7 @@
       <title>Bilingual Embeddings and Word Alignments for Translation Quality Estimation</title>
       <author><first>Amal</first><last>Abdelsalam</last></author>
       <author><first>Ondřej</first><last>Bojar</last></author>
-      <author><first>Samhaa</first><last>El-Beltagy</last></author>
+      <author id="samhaa-r-el-beltagy"><first>Samhaa</first><last>El-Beltagy</last></author>
       <pages>764–771</pages>
       <url hash="ff635db3">W16-2380</url>
       <doi>10.18653/v1/W16-2380</doi>
@@ -4707,9 +4707,9 @@
     </paper>
     <paper id="81">
       <title><fixed-case>SHEF</fixed-case>-<fixed-case>MIME</fixed-case>: Word-level Quality Estimation Using Imitation Learning</title>
-      <author><first>Daniel</first><last>Beck</last></author>
+      <author id="daniel-beck"><first>Daniel</first><last>Beck</last></author>
       <author><first>Andreas</first><last>Vlachos</last></author>
-      <author><first>Gustavo</first><last>Paetzold</last></author>
+      <author id="gustavo-paetzold"><first>Gustavo</first><last>Paetzold</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>772–776</pages>
       <url hash="c1aab797">W16-2381</url>
@@ -4718,7 +4718,7 @@
     </paper>
     <paper id="82">
       <title>Referential Translation Machines for Predicting Translation Performance</title>
-      <author><first>Ergun</first><last>Biçici</last></author>
+      <author id="ergun-bicici"><first>Ergun</first><last>Biçici</last></author>
       <pages>777–781</pages>
       <url hash="14dc0235">W16-2382</url>
       <doi>10.18653/v1/W16-2382</doi>
@@ -4726,9 +4726,9 @@
     </paper>
     <paper id="83">
       <title><fixed-case>UA</fixed-case>lacant word-level and phrase-level machine translation quality estimation systems at <fixed-case>WMT</fixed-case> 2016</title>
-      <author><first>Miquel</first><last>Esplà-Gomis</last></author>
+      <author id="miquel-espla-gomis"><first>Miquel</first><last>Esplà-Gomis</last></author>
       <author><first>Felipe</first><last>Sánchez-Martínez</last></author>
-      <author><first>Mikel</first><last>Forcada</last></author>
+      <author id="mikel-l-forcada"><first>Mikel</first><last>Forcada</last></author>
       <pages>782–786</pages>
       <url hash="c3ddfda0">W16-2383</url>
       <doi>10.18653/v1/W16-2383</doi>
@@ -4756,7 +4756,7 @@
     <paper id="86">
       <title><fixed-case>USFD</fixed-case>’s Phrase-level Quality Estimation Systems</title>
       <author><first>Varvara</first><last>Logacheva</last></author>
-      <author><first>Frédéric</first><last>Blain</last></author>
+      <author id="frederic-blain"><first>Frédéric</first><last>Blain</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>800–805</pages>
       <url hash="49fece9a">W16-2386</url>
@@ -4765,10 +4765,10 @@
     </paper>
     <paper id="87">
       <title>Unbabel’s Participation in the <fixed-case>WMT</fixed-case>16 Word-Level Translation Quality Estimation Shared Task</title>
-      <author><first>André F. T.</first><last>Martins</last></author>
-      <author><first>Ramón</first><last>Astudillo</last></author>
-      <author><first>Chris</first><last>Hokamp</last></author>
-      <author><first>Fabio</first><last>Kepler</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
+      <author id="ramon-fernandez-astudillo"><first>Ramón</first><last>Astudillo</last></author>
+      <author id="chris-hokamp"><first>Chris</first><last>Hokamp</last></author>
+      <author id="fabio-kepler"><first>Fabio</first><last>Kepler</last></author>
       <pages>806–811</pages>
       <url hash="372cd4a7">W16-2387</url>
       <doi>10.18653/v1/W16-2387</doi>
@@ -4776,7 +4776,7 @@
     </paper>
     <paper id="88">
       <title><fixed-case>S</fixed-case>imple<fixed-case>N</fixed-case>ets: Quality Estimation with Resource-Light Neural Networks</title>
-      <author><first>Gustavo</first><last>Paetzold</last></author>
+      <author id="gustavo-paetzold"><first>Gustavo</first><last>Paetzold</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>812–818</pages>
       <url hash="fa7a860e">W16-2388</url>
@@ -4786,7 +4786,7 @@
     <paper id="89">
       <title>Translation Quality Estimation using Recurrent Neural Network</title>
       <author><first>Raj Nath</first><last>Patel</last></author>
-      <author><first>Sasikumar</first><last>M</last></author>
+      <author id="sasikumar-m"><first>Sasikumar</first><last>M</last></author>
       <pages>819–824</pages>
       <url hash="1f8162a4">W16-2389</url>
       <doi>10.18653/v1/W16-2389</doi>
@@ -4803,10 +4803,10 @@
     </paper>
     <paper id="91">
       <title>Word embeddings and discourse information for Quality Estimation</title>
-      <author><first>Carolina</first><last>Scarton</last></author>
-      <author><first>Daniel</first><last>Beck</last></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last></author>
+      <author id="daniel-beck"><first>Daniel</first><last>Beck</last></author>
       <author><first>Kashif</first><last>Shah</last></author>
-      <author><first>Karin</first><last>Sim Smith</last></author>
+      <author id="karin-sim-smith"><first>Karin</first><last>Sim Smith</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>831–837</pages>
       <url hash="6cf9d1a5">W16-2391</url>
@@ -4827,7 +4827,7 @@
     <paper id="93">
       <title><fixed-case>UGENT</fixed-case>-<fixed-case>LT</fixed-case>3 <fixed-case>SCATE</fixed-case> Submission for <fixed-case>WMT</fixed-case>16 Shared Task on Quality Estimation</title>
       <author><first>Arda</first><last>Tezcan</last></author>
-      <author><first>Véronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Véronique</first><last>Hoste</last></author>
       <author><first>Lieve</first><last>Macken</last></author>
       <pages>843–850</pages>
       <url hash="2e962b80">W16-2393</url>
@@ -4883,7 +4883,7 @@
     <paper id="4">
       <title>Distributed representation and estimation of <fixed-case>WFST</fixed-case>-based n-gram models</title>
       <author><first>Cyril</first><last>Allauzen</last></author>
-      <author><first>Michael</first><last>Riley</last></author>
+      <author id="michael-riley"><first>Michael</first><last>Riley</last></author>
       <author><first>Brian</first><last>Roark</last></author>
       <pages>32–41</pages>
       <url hash="2b015162">W16-2404</url>
@@ -4901,9 +4901,9 @@
     </paper>
     <paper id="6">
       <title>Data-Driven Spelling Correction using Weighted Finite-State Methods</title>
-      <author><first>Miikka</first><last>Silfverberg</last></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last></author>
       <author><first>Pekka</first><last>Kauppinen</last></author>
-      <author><first>Krister</first><last>Lindén</last></author>
+      <author id="krister-linden"><first>Krister</first><last>Lindén</last></author>
       <pages>51–59</pages>
       <url hash="c33a8b89">W16-2406</url>
       <doi>10.18653/v1/W16-2406</doi>
@@ -4968,7 +4968,7 @@
       <author><first>Thomas</first><last>Kober</last></author>
       <author><first>Jeremy</first><last>Reffin</last></author>
       <author><first>Julie</first><last>Weeds</last></author>
-      <author><first>David</first><last>Weir</last></author>
+      <author id="david-weir"><first>David</first><last>Weir</last></author>
       <pages>7–12</pages>
       <url hash="1e4213ab">W16-2502</url>
       <doi>10.18653/v1/W16-2502</doi>
@@ -4986,7 +4986,7 @@
       <title>Evaluating Word Embeddings Using a Representative Suite of Practical Tasks</title>
       <author><first>Neha Nayak</first><last>Kennard</last></author>
       <author><first>Gabor</first><last>Angeli</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>19–23</pages>
       <url hash="45313556">W16-2504</url>
       <doi>10.18653/v1/W16-2504</doi>
@@ -4996,9 +4996,9 @@
       <title>Story Cloze Evaluator: Vector Space Representation Evaluation by Predicting What Happens Next</title>
       <author><first>Nasrin</first><last>Mostafazadeh</last></author>
       <author><first>Lucy</first><last>Vanderwende</last></author>
-      <author><first>Wen-tau</first><last>Yih</last></author>
+      <author id="wen-tau-yih"><first>Wen-tau</first><last>Yih</last></author>
       <author><first>Pushmeet</first><last>Kohli</last></author>
-      <author><first>James</first><last>Allen</last></author>
+      <author id="james-allen"><first>James</first><last>Allen</last></author>
       <pages>24–29</pages>
       <url hash="bbaf62d6">W16-2505</url>
       <doi>10.18653/v1/W16-2505</doi>
@@ -5009,7 +5009,7 @@
       <author><first>Manaal</first><last>Faruqui</last></author>
       <author><first>Yulia</first><last>Tsvetkov</last></author>
       <author><first>Pushpendre</first><last>Rastogi</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <pages>30–35</pages>
       <url hash="303468d0">W16-2506</url>
       <doi>10.18653/v1/W16-2506</doi>
@@ -5026,7 +5026,7 @@
     </paper>
     <paper id="8">
       <title>Find the word that does not belong: A Framework for an Intrinsic Evaluation of Word Vector Representations</title>
-      <author><first>José</first><last>Camacho-Collados</last></author>
+      <author id="jose-camacho-collados"><first>José</first><last>Camacho-Collados</last></author>
       <author><first>Roberto</first><last>Navigli</last></author>
       <pages>43–50</pages>
       <url hash="0c640b8f">W16-2508</url>
@@ -5056,7 +5056,7 @@
       <author><first>Sahar</first><last>Ghannay</last></author>
       <author><first>Yannick</first><last>Estève</last></author>
       <author><first>Nathalie</first><last>Camelin</last></author>
-      <author><first>Paul</first><last>Deleglise</last></author>
+      <author id="paul-deleglise"><first>Paul</first><last>Deleglise</last></author>
       <pages>62–66</pages>
       <url hash="4dfb636e">W16-2511</url>
       <doi>10.18653/v1/W16-2511</doi>
@@ -5092,7 +5092,7 @@
       <title>Evaluating multi-sense embeddings for semantic resolution monolingually and in word translation</title>
       <author><first>Gábor</first><last>Borbély</last></author>
       <author><first>Márton</first><last>Makrai</last></author>
-      <author><first>Dávid Márk</first><last>Nemeskey</last></author>
+      <author id="david-mark-nemeskey"><first>Dávid Márk</first><last>Nemeskey</last></author>
       <author><first>András</first><last>Kornai</last></author>
       <pages>83–89</pages>
       <url hash="8e5ec9f4">W16-2515</url>
@@ -5118,7 +5118,7 @@
     </paper>
     <paper id="18">
       <title>Thematic fit evaluation: an aspect of selectional preferences</title>
-      <author><first>Asad</first><last>Sayeed</last></author>
+      <author id="asad-sayeed"><first>Asad</first><last>Sayeed</last></author>
       <author><first>Clayton</first><last>Greenberg</last></author>
       <author><first>Vera</first><last>Demberg</last></author>
       <pages>99–105</pages>
@@ -5139,7 +5139,7 @@
       <title>Correlation-based Intrinsic Evaluation of Word Vector Representations</title>
       <author><first>Yulia</first><last>Tsvetkov</last></author>
       <author><first>Manaal</first><last>Faruqui</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <pages>111–115</pages>
       <url hash="fe42e8ce">W16-2520</url>
       <doi>10.18653/v1/W16-2520</doi>
@@ -5147,7 +5147,7 @@
     </paper>
     <paper id="21">
       <title>Evaluating word embeddings with f<fixed-case>MRI</fixed-case> and eye-tracking</title>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>116–121</pages>
       <url hash="3633db57">W16-2521</url>
       <doi>10.18653/v1/W16-2521</doi>
@@ -5189,7 +5189,7 @@
     <paper id="25">
       <title><fixed-case>SLEDDED</fixed-case>: A Proposed Dataset of Event Descriptions for Evaluating Phrase Representations</title>
       <author><first>Laura</first><last>Rimell</last></author>
-      <author><first>Eva Maria</first><last>Vecchi</last></author>
+      <author id="eva-maria-vecchi"><first>Eva Maria</first><last>Vecchi</last></author>
       <pages>140–144</pages>
       <url hash="08688e10">W16-2525</url>
       <doi>10.18653/v1/W16-2525</doi>
@@ -5213,7 +5213,7 @@
       <editor><first>Paul</first><last>Cook</last></editor>
       <editor><first>Stefan</first><last>Evert</last></editor>
       <editor><first>Roland</first><last>Schäfer</last></editor>
-      <editor><first>Egon</first><last>Stemle</last></editor>
+      <editor id="egon-stemle"><first>Egon</first><last>Stemle</last></editor>
       <doi>10.18653/v1/W16-26</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Berlin</address>
@@ -5307,7 +5307,7 @@
       <title>Babler - Data Collection from the Web to Support Speech Recognition and Keyword Search</title>
       <author><first>Gideon</first><last>Mendels</last></author>
       <author><first>Erica</first><last>Cooper</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
       <pages>72–81</pages>
       <url hash="c349f8a0">W16-2609</url>
       <doi>10.18653/v1/W16-2609</doi>
@@ -5343,7 +5343,7 @@
       <title><fixed-case>E</fixed-case>mpiri<fixed-case>ST</fixed-case>: <fixed-case>AIPHES</fixed-case> - Robust Tokenization and <fixed-case>POS</fixed-case>-Tagging for Different Genres</title>
       <author><first>Steffen</first><last>Remus</last></author>
       <author><first>Gerold</first><last>Hintz</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <author><first>Christian M.</first><last>Meyer</last></author>
       <author><first>Darina</first><last>Benikova</last></author>
       <author><first>Judith</first><last>Eckle-Kohler</last></author>
@@ -5377,10 +5377,10 @@
       <booktitle>Proceedings of the Sixth Named Entity Workshop</booktitle>
       <url hash="88ff6055">W16-27</url>
       <editor><first>Xiangyu</first><last>Duan</last></editor>
-      <editor><first>Rafael E.</first><last>Banchs</last></editor>
+      <editor id="rafael-e-banchs"><first>Rafael E.</first><last>Banchs</last></editor>
       <editor><first>Min</first><last>Zhang</last></editor>
       <editor><first>Haizhou</first><last>Li</last></editor>
-      <editor><first>A</first><last>Kumaran</last></editor>
+      <editor id="a-kumaran"><first>A</first><last>Kumaran</last></editor>
       <doi>10.18653/v1/W16-27</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Berlin, Germany</address>
@@ -5502,7 +5502,7 @@
       <author><first>Andrew</first><last>Finch</last></author>
       <author><first>Lemao</first><last>Liu</last></author>
       <author><first>Xiaolin</first><last>Wang</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>78–82</pages>
       <url hash="d197b9e3">W16-2711</url>
       <doi>10.18653/v1/W16-2711</doi>
@@ -5512,7 +5512,7 @@
       <title>Regulating Orthography-Phonology Relationship for <fixed-case>E</fixed-case>nglish to <fixed-case>T</fixed-case>hai Transliteration</title>
       <author><first>Binh Minh</first><last>Nguyen</last></author>
       <author><first>Hoang Gia</first><last>Ngo</last></author>
-      <author><first>Nancy F.</first><last>Chen</last></author>
+      <author id="nancy-chen"><first>Nancy F.</first><last>Chen</last></author>
       <pages>83–87</pages>
       <url hash="f4ba4070">W16-2712</url>
       <doi>10.18653/v1/W16-2712</doi>
@@ -5520,7 +5520,7 @@
     </paper>
     <paper id="13">
       <title><fixed-case>M</fixed-case>oses-based official baseline for <fixed-case>NEWS</fixed-case> 2016</title>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
       <pages>88–90</pages>
       <url hash="1e1c1f7a">W16-2713</url>
       <doi>10.18653/v1/W16-2713</doi>
@@ -5531,7 +5531,7 @@
     <meta>
       <booktitle>Proceedings of the Third Workshop on Argument Mining (<fixed-case>A</fixed-case>rg<fixed-case>M</fixed-case>ining2016)</booktitle>
       <url hash="0656cfa4">W16-28</url>
-      <editor><first>Chris</first><last>Reed</last></editor>
+      <editor id="chris-reed"><first>Chris</first><last>Reed</last></editor>
       <doi>10.18653/v1/W16-28</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Berlin, Germany</address>
@@ -5555,7 +5555,7 @@
     <paper id="2">
       <title>Summarizing Multi-Party Argumentative Conversations in Reader Comment on News</title>
       <author><first>Emma</first><last>Barker</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <pages>12–20</pages>
       <url hash="4ba4cb41">W16-2802</url>
       <doi>10.18653/v1/W16-2802</doi>
@@ -5595,7 +5595,7 @@
     <paper id="6">
       <title>Extracting Case Law Sentences for Argumentation about the Meaning of Statutory Terms</title>
       <author><first>Jaromír</first><last>Šavelka</last></author>
-      <author><first>Kevin D.</first><last>Ashley</last></author>
+      <author id="kevin-d-ashley"><first>Kevin D.</first><last>Ashley</last></author>
       <pages>50–59</pages>
       <url hash="6c855ea4">W16-2806</url>
       <doi>10.18653/v1/W16-2806</doi>
@@ -5603,7 +5603,7 @@
     </paper>
     <paper id="7">
       <title>Scrutable Feature Sets for Stance Classification</title>
-      <author><first>Angrosh</first><last>Mandya</last></author>
+      <author id="angrosh-mandya"><first>Angrosh</first><last>Mandya</last></author>
       <author><first>Advaith</first><last>Siddharthan</last></author>
       <author><first>Adam</first><last>Wyner</last></author>
       <pages>60–69</pages>
@@ -5756,10 +5756,10 @@
     <meta>
       <booktitle>Proceedings of the 15th Workshop on Biomedical Natural Language Processing</booktitle>
       <url hash="ae240577">W16-29</url>
-      <editor><first>Kevin Bretonnel</first><last>Cohen</last></editor>
+      <editor id="k-bretonnel-cohen"><first>Kevin Bretonnel</first><last>Cohen</last></editor>
       <editor><first>Dina</first><last>Demner-Fushman</last></editor>
       <editor><first>Sophia</first><last>Ananiadou</last></editor>
-      <editor><first>Jun-ichi</first><last>Tsujii</last></editor>
+      <editor id="junichi-tsujii"><first>Jun-ichi</first><last>Tsujii</last></editor>
       <doi>10.18653/v1/W16-29</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Berlin, Germany</address>
@@ -5773,7 +5773,7 @@
     </frontmatter>
     <paper id="1">
       <title>A Machine Learning Approach to Clinical Terms Normalization</title>
-      <author><first>José</first><last>Castaño</last></author>
+      <author id="jose-castano"><first>José</first><last>Castaño</last></author>
       <author><first>María Laura</first><last>Gambarte</last></author>
       <author><first>Hee Joon</first><last>Park</last></author>
       <author><first>Maria</first><last>del Pilar Avila Williams</last></author>
@@ -5800,9 +5800,9 @@
     <paper id="3">
       <title>Identification, characterization, and grounding of gradable terms in clinical text</title>
       <author><first>Chaitanya</first><last>Shivade</last></author>
-      <author><first>Marie-Catherine</first><last>de Marneffe</last></author>
-      <author><first>Eric</first><last>Fosler-Lussier</last></author>
-      <author><first>Albert M.</first><last>Lai</last></author>
+      <author id="marie-catherine-de-marneffe"><first>Marie-Catherine</first><last>de Marneffe</last></author>
+      <author id="eric-fosler-lussier"><first>Eric</first><last>Fosler-Lussier</last></author>
+      <author id="albert-m-lai"><first>Albert M.</first><last>Lai</last></author>
       <pages>17–26</pages>
       <url hash="951a8511">W16-2903</url>
       <doi>10.18653/v1/W16-2903</doi>
@@ -5810,7 +5810,7 @@
     </paper>
     <paper id="4">
       <title>Graph-based Semi-supervised Gene Mention Tagging</title>
-      <author><first>Golnar</first><last>Sheikhshab</last></author>
+      <author id="golnar-sheikhshab"><first>Golnar</first><last>Sheikhshab</last></author>
       <author><first>Elizabeth</first><last>Starks</last></author>
       <author><first>Aly</first><last>Karsan</last></author>
       <author><first>Anoop</first><last>Sarkar</last></author>
@@ -5822,7 +5822,7 @@
     </paper>
     <paper id="5">
       <title>Feature Derivation for Exploitation of Distant Annotation via Pattern Induction against Dependency Parses</title>
-      <author><first>Dayne</first><last>Freitag</last></author>
+      <author id="dayne-freitag"><first>Dayne</first><last>Freitag</last></author>
       <author><first>John</first><last>Niekrasz</last></author>
       <pages>36–45</pages>
       <url hash="461cc835">W16-2905</url>
@@ -5839,8 +5839,8 @@
     </paper>
     <paper id="7">
       <title><fixed-case>S</fixed-case>nap<fixed-case>T</fixed-case>o<fixed-case>G</fixed-case>rid: From Statistical to Interpretable Models for Biomedical Information Extraction</title>
-      <author><first>Marco A.</first><last>Valenzuela-Escárcega</last></author>
-      <author><first>Gus</first><last>Hahn-Powell</last></author>
+      <author id="marco-a-valenzuela-escarcega"><first>Marco A.</first><last>Valenzuela-Escárcega</last></author>
+      <author id="gus-hahn-powell"><first>Gus</first><last>Hahn-Powell</last></author>
       <author><first>Dane</first><last>Bell</last></author>
       <author><first>Mihai</first><last>Surdeanu</last></author>
       <pages>56–65</pages>
@@ -5870,8 +5870,8 @@
     <paper id="10">
       <title>Using Distributed Representations to Disambiguate Biomedical and Clinical Concepts</title>
       <author><first>Stéphan</first><last>Tulkens</last></author>
-      <author><first>Simon</first><last>Suster</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="simon-suster"><first>Simon</first><last>Suster</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>77–82</pages>
       <url hash="19b2bf91">W16-2910</url>
       <doi>10.18653/v1/W16-2910</doi>
@@ -5879,7 +5879,7 @@
     </paper>
     <paper id="11">
       <title>Unsupervised Document Classification with Informed Topic Models</title>
-      <author><first>Timothy</first><last>Miller</last></author>
+      <author id="timothy-miller"><first>Timothy</first><last>Miller</last></author>
       <author><first>Dmitriy</first><last>Dligach</last></author>
       <author><first>Guergana</first><last>Savova</last></author>
       <pages>83–91</pages>
@@ -5890,7 +5890,7 @@
     <paper id="12">
       <title>Vocabulary Development To Support Information Extraction of Substance Abuse from Psychiatry Notes</title>
       <author><first>Sumithra</first><last>Velupillai</last></author>
-      <author><first>Danielle L.</first><last>Mowery</last></author>
+      <author id="danielle-l-mowery"><first>Danielle L.</first><last>Mowery</last></author>
       <author><first>Mike</first><last>Conway</last></author>
       <author><first>John</first><last>Hurdle</last></author>
       <author><first>Brent</first><last>Kious</last></author>
@@ -5913,7 +5913,7 @@
     <paper id="14">
       <title>Improving Temporal Relation Extraction with Training Instance Augmentation</title>
       <author><first>Chen</first><last>Lin</last></author>
-      <author><first>Timothy</first><last>Miller</last></author>
+      <author id="timothy-miller"><first>Timothy</first><last>Miller</last></author>
       <author><first>Dmitriy</first><last>Dligach</last></author>
       <author><first>Steven</first><last>Bethard</last></author>
       <author><first>Guergana</first><last>Savova</last></author>
@@ -5936,7 +5936,7 @@
       <title>Measuring the State of the Art of Automated Pathway Curation Using Graph Algorithms - A Case Study of the m<fixed-case>TOR</fixed-case> Pathway</title>
       <author><first>Michael</first><last>Spranger</last></author>
       <author><first>Sucheendra</first><last>Palaniappan</last></author>
-      <author><first>Samik</first><last>Gosh</last></author>
+      <author id="samik-ghosh"><first>Samik</first><last>Gosh</last></author>
       <pages>119–127</pages>
       <url hash="8d54181a">W16-2916</url>
       <doi>10.18653/v1/W16-2916</doi>
@@ -5966,9 +5966,9 @@
       <author><first>Lana</first><last>Yeganova</last></author>
       <author><first>Won</first><last>Kim</last></author>
       <author><first>Sun</first><last>Kim</last></author>
-      <author><first>Rezarta</first><last>Islamaj Doğan</last></author>
+      <author id="rezarta-islamaj-dogan"><first>Rezarta</first><last>Islamaj Doğan</last></author>
       <author><first>Wanli</first><last>Liu</last></author>
-      <author><first>Donald C</first><last>Comeau</last></author>
+      <author id="donald-c-comeau"><first>Donald C</first><last>Comeau</last></author>
       <author><first>Zhiyong</first><last>Lu</last></author>
       <author><first>W John</first><last>Wilbur</last></author>
       <pages>141–145</pages>
@@ -5978,9 +5978,9 @@
     </paper>
     <paper id="20">
       <title>This before That: Causal Precedence in the Biomedical Domain</title>
-      <author><first>Gus</first><last>Hahn-Powell</last></author>
+      <author id="gus-hahn-powell"><first>Gus</first><last>Hahn-Powell</last></author>
       <author><first>Dane</first><last>Bell</last></author>
-      <author><first>Marco A.</first><last>Valenzuela-Escárcega</last></author>
+      <author id="marco-a-valenzuela-escarcega"><first>Marco A.</first><last>Valenzuela-Escárcega</last></author>
       <author><first>Mihai</first><last>Surdeanu</last></author>
       <pages>146–155</pages>
       <url hash="3fb05536">W16-2920</url>
@@ -5992,7 +5992,7 @@
       <author><first>Viviana</first><last>Cotik</last></author>
       <author><first>Vanesa</first><last>Stricker</last></author>
       <author><first>Jorge</first><last>Vivaldi</last></author>
-      <author><first>Horacio</first><last>Rodriguez</last></author>
+      <author id="horacio-rodriguez"><first>Horacio</first><last>Rodriguez</last></author>
       <pages>156–165</pages>
       <url hash="37e61fc8">W16-2921</url>
       <doi>10.18653/v1/W16-2921</doi>
@@ -6013,7 +6013,7 @@
       <title>An Information Foraging Approach to Determining the Number of Relevant Features</title>
       <author><first>Brian</first><last>Connolly</last></author>
       <author><first>Benjamin</first><last>Glass</last></author>
-      <author><first>John</first><last>Pestian</last></author>
+      <author id="john-pestian"><first>John</first><last>Pestian</last></author>
       <pages>175–180</pages>
       <url hash="3149b51b">W16-2923</url>
       <doi>10.18653/v1/W16-2923</doi>
@@ -6022,10 +6022,10 @@
     <paper id="24">
       <title>Assessing the Feasibility of an Automated Suggestion System for Communicating Critical Findings from Chest Radiology Reports to Referring Physicians</title>
       <author><first>Brian E.</first><last>Chapman</last></author>
-      <author><first>Danielle L.</first><last>Mowery</last></author>
+      <author id="danielle-l-mowery"><first>Danielle L.</first><last>Mowery</last></author>
       <author><first>Evan</first><last>Narasimhan</last></author>
       <author><first>Neel</first><last>Patel</last></author>
-      <author><first>Wendy</first><last>Chapman</last></author>
+      <author id="wendy-chapman"><first>Wendy</first><last>Chapman</last></author>
       <author><first>Marta</first><last>Heilbrun</last></author>
       <pages>181–185</pages>
       <url hash="0312ee26">W16-2924</url>
@@ -6052,12 +6052,12 @@
     </paper>
     <paper id="27">
       <title>Identifying First Episodes of Psychosis in Psychiatric Patient Records using Machine Learning</title>
-      <author><first>Genevieve</first><last>Gorrell</last></author>
+      <author id="genevieve-gorrell"><first>Genevieve</first><last>Gorrell</last></author>
       <author><first>Sherifat</first><last>Oduola</last></author>
       <author><first>Angus</first><last>Roberts</last></author>
       <author><first>Tom</first><last>Craig</last></author>
       <author><first>Craig</first><last>Morgan</last></author>
-      <author><first>Rob</first><last>Stewart</last></author>
+      <author id="robert-stewart"><first>Rob</first><last>Stewart</last></author>
       <pages>196–205</pages>
       <url hash="5494f034">W16-2927</url>
       <doi>10.18653/v1/W16-2927</doi>
@@ -6079,7 +6079,7 @@
     <meta>
       <booktitle>Proceedings of the 4th <fixed-case>B</fixed-case>io<fixed-case>NLP</fixed-case> Shared Task Workshop</booktitle>
       <url hash="d6351653">W16-30</url>
-      <editor><first>Claire</first><last>Nėdellec</last></editor>
+      <editor id="claire-nedellec"><first>Claire</first><last>Nėdellec</last></editor>
       <editor><first>Robert</first><last>Bossy</last></editor>
       <editor><first>Jin-Dong</first><last>Kim</last></editor>
       <doi>10.18653/v1/W16-30</doi>
@@ -6102,7 +6102,7 @@
       <author><first>Robert</first><last>Bossy</last></author>
       <author><first>Mouhamadou</first><last>Ba</last></author>
       <author><first>Louise</first><last>Deléger</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <author><first>Philippe</first><last>Bessières</last></author>
       <author><first>Loic</first><last>Lepiniec</last></author>
       <author><first>Claire</first><last>Nédellec</last></author>
@@ -6151,7 +6151,7 @@
     <paper id="5">
       <title><fixed-case>VERSE</fixed-case>: Event and Relation Extraction in the <fixed-case>B</fixed-case>io<fixed-case>NLP</fixed-case> 2016 Shared Task</title>
       <author><first>Jake</first><last>Lever</last></author>
-      <author><first>Steven JM</first><last>Jones</last></author>
+      <author id="steven-jm-jones"><first>Steven JM</first><last>Jones</last></author>
       <pages>42–49</pages>
       <url hash="bfc6cc03">W16-3005</url>
       <doi>10.18653/v1/W16-3005</doi>
@@ -6159,9 +6159,9 @@
     </paper>
     <paper id="6">
       <title>A dictionary- and rule-based system for identification of bacteria and habitats in text</title>
-      <author><first>Helen V</first><last>Cook</last></author>
+      <author id="helen-v-cook"><first>Helen V</first><last>Cook</last></author>
       <author><first>Evangelos</first><last>Pafilis</last></author>
-      <author><first>Lars Juhl</first><last>Jensen</last></author>
+      <author id="lars-juhl-jensen"><first>Lars Juhl</first><last>Jensen</last></author>
       <pages>50–55</pages>
       <url hash="a321929f">W16-3006</url>
       <doi>10.18653/v1/W16-3006</doi>
@@ -6173,7 +6173,7 @@
       <author><first>Hakan</first><last>Şahin</last></author>
       <author><first>Berfu</first><last>Büyüköz</last></author>
       <author><first>Alper</first><last>Yayıkçı</last></author>
-      <author><first>Arzucan</first><last>Özgür</last></author>
+      <author id="arzucan-ozgur"><first>Arzucan</first><last>Özgür</last></author>
       <pages>56–63</pages>
       <url hash="2796b774">W16-3007</url>
       <doi>10.18653/v1/W16-3007</doi>
@@ -6201,10 +6201,10 @@
     </paper>
     <paper id="10">
       <title><fixed-case>S</fixed-case>ee<fixed-case>D</fixed-case>ev Binary Event Extraction using <fixed-case>SVM</fixed-case>s and a Rich Feature Set</title>
-      <author><first>Nagesh</first><last>C. Panyam</last></author>
+      <author id="nagesh-c-panyam"><first>Nagesh</first><last>C. Panyam</last></author>
       <author><first>Gitansh</first><last>Khirbat</last></author>
-      <author><first>Karin</first><last>Verspoor</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <author><first>Kotagiri</first><last>Ramamohanarao</last></author>
       <pages>82–87</pages>
       <url hash="a7eef606">W16-3010</url>
@@ -6213,10 +6213,10 @@
     </paper>
     <paper id="11">
       <title>Extraction of Regulatory Events using Kernel-based Classifiers and Distant Supervision</title>
-      <author><first>Andre</first><last>Lamurias</last></author>
+      <author id="andre-lamurias"><first>Andre</first><last>Lamurias</last></author>
       <author><first>Miguel J.</first><last>Rodrigues</last></author>
-      <author><first>Luka A.</first><last>Clarke</last></author>
-      <author><first>Francisco M.</first><last>Couto</last></author>
+      <author id="luka-a-clarke"><first>Luka A.</first><last>Clarke</last></author>
+      <author id="francisco-m-couto"><first>Francisco M.</first><last>Couto</last></author>
       <pages>88–92</pages>
       <url hash="c8e2c059">W16-3011</url>
       <doi>10.18653/v1/W16-3011</doi>
@@ -6226,7 +6226,7 @@
       <title><fixed-case>DUTIR</fixed-case> in <fixed-case>B</fixed-case>io<fixed-case>NLP</fixed-case>-<fixed-case>ST</fixed-case> 2016: Utilizing Convolutional Network and Distributed Representation to Extract Complicate Relations</title>
       <author><first>Honglei</first><last>Li</last></author>
       <author><first>Jianhai</first><last>Zhang</last></author>
-      <author id="jian-wang"><first>Jian</first><last>Wang</last></author>
+      <author><first>Jian</first><last>Wang</last></author>
       <author><first>Hongfei</first><last>Lin</last></author>
       <author><first>Zhihao</first><last>Yang</last></author>
       <pages>93–100</pages>
@@ -6250,7 +6250,7 @@
     <meta>
       <booktitle>Proceedings of the Fourth <fixed-case>B</fixed-case>io<fixed-case>ASQ</fixed-case> workshop</booktitle>
       <url hash="e6e6c182">W16-31</url>
-      <editor><first>Ioannis A.</first><last>Kakadiaris</last></editor>
+      <editor id="ioannis-kakadiaris"><first>Ioannis A.</first><last>Kakadiaris</last></editor>
       <editor><first>George</first><last>Paliouras</last></editor>
       <editor><first>Anastasia</first><last>Krithara</last></editor>
       <doi>10.18653/v1/W16-31</doi>
@@ -6278,7 +6278,7 @@
     <paper id="2">
       <title>Using Learning-To-Rank to Enhance <fixed-case>NLM</fixed-case> Medical Text Indexer Results</title>
       <author><first>Ilya</first><last>Zavorin</last></author>
-      <author><first>James</first><last>Mork</last></author>
+      <author id="james-g-mork"><first>James</first><last>Mork</last></author>
       <author><first>Dina</first><last>Demner-Fushman</last></author>
       <pages>8–15</pages>
       <url hash="766f84fc">W16-3102</url>
@@ -6287,7 +6287,7 @@
     </paper>
     <paper id="3">
       <title><fixed-case>LABDA</fixed-case> at the 2016 <fixed-case>B</fixed-case>io<fixed-case>ASQ</fixed-case> challenge task 4a: Semantic Indexing by using <fixed-case>E</fixed-case>lastic<fixed-case>S</fixed-case>earch</title>
-      <author><first>Isabel</first><last>Segura-Bedmar</last></author>
+      <author id="isabel-segura-bedmar"><first>Isabel</first><last>Segura-Bedmar</last></author>
       <author><first>Adrián</first><last>Carruana</last></author>
       <author><first>Paloma</first><last>Martínez</last></author>
       <pages>16–22</pages>
@@ -6299,7 +6299,7 @@
       <title>Learning to Answer Biomedical Questions: <fixed-case>OAQA</fixed-case> at <fixed-case>B</fixed-case>io<fixed-case>ASQ</fixed-case> 4<fixed-case>B</fixed-case></title>
       <author><first>Zi</first><last>Yang</last></author>
       <author><first>Yue</first><last>Zhou</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <pages>23–37</pages>
       <url hash="5d8d47bc">W16-3104</url>
       <doi>10.18653/v1/W16-3104</doi>
@@ -6322,12 +6322,12 @@
     </paper>
     <paper id="6">
       <title><fixed-case>KSA</fixed-case>nswer: Question-answering System of Kangwon National University and Sogang University in the 2016 <fixed-case>B</fixed-case>io<fixed-case>ASQ</fixed-case> Challenge</title>
-      <author><first>Hyeon-gu</first><last>Lee</last></author>
+      <author id="hyeon-gu-lee"><first>Hyeon-gu</first><last>Lee</last></author>
       <author><first>Minkyoung</first><last>Kim</last></author>
       <author><first>Harksoo</first><last>Kim</last></author>
       <author><first>Juae</first><last>Kim</last></author>
       <author><first>Sunjae</first><last>Kwon</last></author>
-      <author><first>Jungyun</first><last>Seo</last></author>
+      <author id="jungyun-seo"><first>Jungyun</first><last>Seo</last></author>
       <author><first>Yi-reun</first><last>Kim</last></author>
       <author><first>Jung-Kyu</first><last>Choi</last></author>
       <pages>45–49</pages>
@@ -6355,7 +6355,7 @@
     <meta>
       <booktitle>Proceedings of the 5th Workshop on Vision and Language</booktitle>
       <url hash="0218f4d3">W16-32</url>
-      <editor><first>Anya</first><last>Belz</last></editor>
+      <editor id="anja-belz"><first>Anya</first><last>Belz</last></editor>
       <editor><first>Erkut</first><last>Erdem</last></editor>
       <editor><first>Krystian</first><last>Mikolajczyk</last></editor>
       <editor><first>Katerina</first><last>Pastra</last></editor>
@@ -6384,7 +6384,7 @@
     </paper>
     <paper id="2">
       <title>Combining Lexical and Spatial Knowledge to Predict Spatial Relations between Objects in Images</title>
-      <author><first>Manuela</first><last>Hürlimann</last></author>
+      <author id="manuela-huerlimann"><first>Manuela</first><last>Hürlimann</last></author>
       <author><first>Johan</first><last>Bos</last></author>
       <pages>10–18</pages>
       <url hash="857278a9">W16-3202</url>
@@ -6444,7 +6444,7 @@
       <title>Building a Bagpipe with a Bag and a Pipe: Exploring Conceptual Combination in Vision</title>
       <author><first>Sandro</first><last>Pezzelle</last></author>
       <author><first>Ravi</first><last>Shekhar</last></author>
-      <author><first>Raffaella</first><last>Bernardi</last></author>
+      <author id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last></author>
       <pages>60–64</pages>
       <url hash="b8cabc17">W16-3208</url>
       <doi>10.18653/v1/W16-3208</doi>
@@ -6464,7 +6464,7 @@
       <title><fixed-case>M</fixed-case>ulti30<fixed-case>K</fixed-case>: Multilingual <fixed-case>E</fixed-case>nglish-<fixed-case>G</fixed-case>erman Image Descriptions</title>
       <author><first>Desmond</first><last>Elliott</last></author>
       <author><first>Stella</first><last>Frank</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>70–74</pages>
       <url hash="db735aa0">W16-3210</url>
@@ -6473,12 +6473,12 @@
     </paper>
     <paper id="11">
       <title>“Look, some Green Circles!”: Learning to Quantify from Images</title>
-      <author><first>Ionut</first><last>Sorodoc</last></author>
+      <author id="ionut-sorodoc"><first>Ionut</first><last>Sorodoc</last></author>
       <author><first>Angeliki</first><last>Lazaridou</last></author>
-      <author><first>Gemma</first><last>Boleda</last></author>
-      <author><first>Aurélie</first><last>Herbelot</last></author>
+      <author id="gemma-boleda"><first>Gemma</first><last>Boleda</last></author>
+      <author id="aurelie-herbelot"><first>Aurélie</first><last>Herbelot</last></author>
       <author><first>Sandro</first><last>Pezzelle</last></author>
-      <author><first>Raffaella</first><last>Bernardi</last></author>
+      <author id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last></author>
       <pages>75–79</pages>
       <url hash="c844381c">W16-3211</url>
       <doi>10.18653/v1/W16-3211</doi>
@@ -6523,7 +6523,7 @@
     <paper id="1">
       <title>Coordination in <fixed-case>M</fixed-case>inimalist <fixed-case>G</fixed-case>rammars: Excorporation and Across the Board (Head) Movement</title>
       <author><first>John</first><last>Torr</last></author>
-      <author><first>Edward P.</first><last>Stabler</last></author>
+      <author id="edward-stabler"><first>Edward P.</first><last>Stabler</last></author>
       <pages>1–17</pages>
       <url hash="0964c3a3">W16-3301</url>
       <bibkey>torr-stabler-2016-coordination</bibkey>
@@ -6575,7 +6575,7 @@
     </paper>
     <paper id="7">
       <title>Modelling the ziji Blocking Effect and Constraining Bound Variable Derivations in <fixed-case>MC</fixed-case>-<fixed-case>TAG</fixed-case> with Delayed Locality</title>
-      <author><first>Dennis Ryan</first><last>Storoshenko</last></author>
+      <author id="dennis-ryan-storoshenko"><first>Dennis Ryan</first><last>Storoshenko</last></author>
       <pages>67–76</pages>
       <url hash="cc711ac7">W16-3307</url>
       <bibkey>storoshenko-2016-modelling</bibkey>
@@ -6592,8 +6592,8 @@
       <author><first>Wonchang</first><last>Chung</last></author>
       <author><first>Suhas Siddhesh</first><last>Mhatre</last></author>
       <author><first>Alexis</first><last>Nasr</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
       <pages>85–92</pages>
       <url hash="98de0fc9">W16-3309</url>
       <bibkey>chung-etal-2016-revisiting</bibkey>
@@ -6610,14 +6610,14 @@
     <paper id="11">
       <title>Hyperedge Replacement and Nonprojective Dependency Structures</title>
       <author><first>Daniel</first><last>Bauer</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>103–111</pages>
       <url hash="242e0a8e">W16-3311</url>
       <bibkey>bauer-rambow-2016-hyperedge</bibkey>
     </paper>
     <paper id="12">
       <title>Parasitic Gaps and the Heterogeneity of Dependency Formation in <fixed-case>STAG</fixed-case></title>
-      <author><first>Dennis Ryan</first><last>Storoshenko</last></author>
+      <author id="dennis-ryan-storoshenko"><first>Dennis Ryan</first><last>Storoshenko</last></author>
       <author><first>Robert</first><last>Frank</last></author>
       <pages>112–120</pages>
       <url hash="5c1f0268">W16-3312</url>
@@ -6656,7 +6656,7 @@
     <paper id="3">
       <title>Improving Phrase-Based <fixed-case>SMT</fixed-case> Using Cross-Granularity Embedding Similarity</title>
       <author><first>Peyman</first><last>Passban</last></author>
-      <author><first>Chris</first><last>Hokamp</last></author>
+      <author id="chris-hokamp"><first>Chris</first><last>Hokamp</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <author><first>Qun</first><last>Liu</last></author>
       <journal>Baltic Journal of Modern Computing</journal>
@@ -6677,8 +6677,8 @@
     </paper>
     <paper id="5">
       <title>Stand-off Annotation of Web Content as a Legally Safer Alternative to Crawling for Distribution</title>
-      <author><first>Mikel L.</first><last>Forcada</last></author>
-      <author><first>Miquel</first><last>Esplà-Gomis</last></author>
+      <author id="mikel-l-forcada"><first>Mikel L.</first><last>Forcada</last></author>
+      <author id="miquel-espla-gomis"><first>Miquel</first><last>Esplà-Gomis</last></author>
       <author><first>Juan Antonio</first><last>Pérez-Ortiz</last></author>
       <journal>Baltic Journal of Modern Computing</journal>
       <issue>2</issue>
@@ -6689,7 +6689,7 @@
     <paper id="6">
       <title>Combining Translation Memories and Syntax-Based <fixed-case>SMT</fixed-case>: Experiments with Real Industrial Data</title>
       <author><first>Liangyou</first><last>Li</last></author>
-      <author><first>Carla Parra</first><last>Escartin</last></author>
+      <author id="carla-parra-escartin"><first>Carla Parra</first><last>Escartin</last></author>
       <author><first>Qun</first><last>Liu</last></author>
       <journal>Baltic Journal of Modern Computing</journal>
       <issue>2</issue>
@@ -6699,7 +6699,7 @@
     </paper>
     <paper id="7">
       <title>The Trouble with Machine Translation Coherence</title>
-      <author><first>Karin Sim</first><last>Smith</last></author>
+      <author id="karin-sim-smith"><first>Karin Sim</first><last>Smith</last></author>
       <author><first>Wilker</first><last>Aziz</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <journal>Baltic Journal of Modern Computing</journal>
@@ -6711,7 +6711,7 @@
     <paper id="8">
       <title>Pivoting Methods and Data for <fixed-case>C</fixed-case>zech-<fixed-case>V</fixed-case>ietnamese Translation via <fixed-case>E</fixed-case>nglish</title>
       <author><first>Duc Tam</first><last>Hoang</last></author>
-      <author><first>Ondrej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondrej</first><last>Bojar</last></author>
       <journal>Baltic Journal of Modern Computing</journal>
       <issue>2</issue>
       <pages>190-202</pages>
@@ -6721,7 +6721,7 @@
     <paper id="9">
       <title>Detecting Grammatical Errors in Machine Translation Output Using Dependency Parsing and Treebank Querying</title>
       <author><first>Arda</first><last>Tezcan</last></author>
-      <author><first>Veronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Veronique</first><last>Hoste</last></author>
       <author><first>Lieve</first><last>Macken</last></author>
       <journal>Baltic Journal of Modern Computing</journal>
       <issue>2</issue>
@@ -6731,8 +6731,8 @@
     </paper>
     <paper id="10">
       <title>Potential and Limits of Using Post-edits as Reference Translations for <fixed-case>MT</fixed-case> Evaluation</title>
-      <author><first>Maja</first><last>Popovic</last></author>
-      <author><first>Mihael</first><last>Arčan</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popovic</last></author>
+      <author id="mihael-arcan"><first>Mihael</first><last>Arčan</last></author>
       <author><first>Arle</first><last>Lommel</last></author>
       <journal>Baltic Journal of Modern Computing</journal>
       <issue>2</issue>
@@ -6742,8 +6742,8 @@
     </paper>
     <paper id="11">
       <title>Can Text Simplification Help Machine Translation?</title>
-      <author><first>Sanja</first><last>Štajner</last></author>
-      <author><first>Maja</first><last>Popovic</last></author>
+      <author id="sanja-stajner"><first>Sanja</first><last>Štajner</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popovic</last></author>
       <journal>Baltic Journal of Modern Computing</journal>
       <issue>2</issue>
       <pages>230-242</pages>
@@ -6762,9 +6762,9 @@
     </paper>
     <paper id="13">
       <title>Semantic Textual Similarity in Quality Estimation</title>
-      <author><first>Hanna</first><last>Bechara</last></author>
-      <author><first>Carla Parra</first><last>Escartin</last></author>
-      <author><first>Constantin</first><last>Orasan</last></author>
+      <author id="hannah-bechara"><first>Hanna</first><last>Bechara</last></author>
+      <author id="carla-parra-escartin"><first>Carla Parra</first><last>Escartin</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orasan</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <journal>Baltic Journal of Modern Computing</journal>
       <issue>2</issue>
@@ -6776,7 +6776,7 @@
       <title>Climbing Mont <fixed-case>BLEU</fixed-case>: The Strange World of Reachable High-<fixed-case>BLEU</fixed-case> Translations</title>
       <author><first>Aaron</first><last>Smith</last></author>
       <author><first>Christian</first><last>Hardmeier</last></author>
-      <author><first>Joerg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Joerg</first><last>Tiedemann</last></author>
       <journal>Baltic Journal of Modern Computing</journal>
       <issue>2</issue>
       <pages>269-281</pages>
@@ -6786,8 +6786,8 @@
     <paper id="15">
       <title>Interactive-Predictive Translation Based on Multiple Word-Segments</title>
       <author><first>Miguel</first><last>Domingo</last></author>
-      <author><first>Alvaro</first><last>Peris</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="alvaro-peris"><first>Alvaro</first><last>Peris</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <journal>Baltic Journal of Modern Computing</journal>
       <issue>2</issue>
       <pages>282-291</pages>
@@ -6796,8 +6796,8 @@
     </paper>
     <paper id="16">
       <title>A Contextual Language Model to Improve Machine Translation of Pronouns by Re-ranking Translation Hypotheses</title>
-      <author><first>Ngoc Quang</first><last>Luong</last></author>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="ngoc-quang-luong"><first>Ngoc Quang</first><last>Luong</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <journal>Baltic Journal of Modern Computing</journal>
       <issue>2</issue>
       <pages>292-304</pages>
@@ -6826,10 +6826,10 @@
     </paper>
     <paper id="19">
       <title>Measuring Cognitive Translation Effort with Activity Units</title>
-      <author><first>Moritz Jonas</first><last>Schaeffer</last></author>
+      <author id="moritz-schaeffer"><first>Moritz Jonas</first><last>Schaeffer</last></author>
       <author><first>Michael</first><last>Carl</last></author>
       <author><first>Isabel</first><last>Lacruz</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <journal>Baltic Journal of Modern Computing</journal>
       <issue>2</issue>
       <pages>331-34195</pages>
@@ -6848,7 +6848,7 @@
     </paper>
     <paper id="21">
       <title>Dealing with Data Sparseness in <fixed-case>SMT</fixed-case> with Factured Models and Morphological Expansion: a Case Study on <fixed-case>C</fixed-case>roatian</title>
-      <author><first>Victor M.</first><last>Sánchez-Cartagena</last></author>
+      <author id="victor-m-sanchez-cartagena"><first>Victor M.</first><last>Sánchez-Cartagena</last></author>
       <author><first>Nikola</first><last>Ljubešić</last></author>
       <author><first>Filip</first><last>Klubička</last></author>
       <journal>Baltic Journal of Modern Computing</journal>
@@ -6860,7 +6860,7 @@
     <paper id="22">
       <title>Collaborative Development of a Rule-Based Machine Translator between <fixed-case>C</fixed-case>roatian and <fixed-case>S</fixed-case>erbian</title>
       <author><first>Filip</first><last>Klubička</last></author>
-      <author><first>Gema</first><last>Ramírez-Sánchez</last></author>
+      <author id="gema-ramirez-sanchez"><first>Gema</first><last>Ramírez-Sánchez</last></author>
       <author><first>Nikola</first><last>Ljubešić</last></author>
       <journal>Baltic Journal of Modern Computing</journal>
       <issue>2</issue>
@@ -6871,8 +6871,8 @@
     <paper id="23">
       <title>Re-assessing the Impact of <fixed-case>SMT</fixed-case> Techniques with Human Evaluation: a Case Study on <fixed-case>E</fixed-case>nglish—<fixed-case>C</fixed-case>roatian</title>
       <author><first>Antonio</first><last>Toral</last></author>
-      <author><first>Raphael</first><last>Rubino</last></author>
-      <author><first>Gema</first><last>Ramírez-Sánchez</last></author>
+      <author id="raphael-rubino"><first>Raphael</first><last>Rubino</last></author>
+      <author id="gema-ramirez-sanchez"><first>Gema</first><last>Ramírez-Sánchez</last></author>
       <journal>Baltic Journal of Modern Computing</journal>
       <issue>2</issue>
       <pages>368-375</pages>
@@ -6915,7 +6915,7 @@
     <paper id="3">
       <title>Processing Document Collections to Automatically Extract Linked Data: Semantic Storytelling Technologies for Smart Curation Workflows</title>
       <author><first>Peter</first><last>Bourgonje</last></author>
-      <author><first>Julian</first><last>Moreno Schneider</last></author>
+      <author id="julian-moreno-schneider"><first>Julian</first><last>Moreno Schneider</last></author>
       <author><first>Georg</first><last>Rehm</last></author>
       <author><first>Felix</first><last>Sasaki</last></author>
       <pages>13–16</pages>
@@ -6924,8 +6924,8 @@
     </paper>
     <paper id="4">
       <title>On the Robustness of Standalone Referring Expression Generation Algorithms Using <fixed-case>RDF</fixed-case> Data</title>
-      <author><first>Pablo</first><last>Duboue</last></author>
-      <author><first>Martin Ariel</first><last>Domínguez</last></author>
+      <author id="pablo-duboue"><first>Pablo</first><last>Duboue</last></author>
+      <author id="martin-ariel-dominguez"><first>Martin Ariel</first><last>Domínguez</last></author>
       <author><first>Paula</first><last>Estrella</last></author>
       <pages>17–24</pages>
       <url hash="649f1047">W16-3504</url>
@@ -7019,7 +7019,7 @@
     <meta>
       <booktitle>Proceedings of the 17th Annual Meeting of the Special Interest Group on Discourse and Dialogue</booktitle>
       <url hash="9e9eb5b8">W16-36</url>
-      <editor><first>Raquel</first><last>Fernandez</last></editor>
+      <editor id="raquel-fernandez"><first>Raquel</first><last>Fernandez</last></editor>
       <editor><first>Wolfgang</first><last>Minker</last></editor>
       <editor><first>Giuseppe</first><last>Carenini</last></editor>
       <editor><first>Ryuichiro</first><last>Higashinaka</last></editor>
@@ -7047,8 +7047,8 @@
     </paper>
     <paper id="2">
       <title>Task Lineages: Dialog State Tracking for Flexible Interaction</title>
-      <author><first>Sungjin</first><last>Lee</last></author>
-      <author><first>Amanda</first><last>Stent</last></author>
+      <author id="sungjin-lee"><first>Sungjin</first><last>Lee</last></author>
+      <author id="amanda-stent"><first>Amanda</first><last>Stent</last></author>
       <pages>11–21</pages>
       <url hash="9e1a136a">W16-3602</url>
       <doi>10.18653/v1/W16-3602</doi>
@@ -7069,8 +7069,8 @@
       <author><first>Vrindavan</first><last>Harrison</last></author>
       <author><first>Lena</first><last>Reed</last></author>
       <author><first>Ernesto</first><last>Hernandez</last></author>
-      <author><first>Ellen</first><last>Riloff</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <pages>31–41</pages>
       <url hash="eae71aea">W16-3604</url>
       <doi>10.18653/v1/W16-3604</doi>
@@ -7079,11 +7079,11 @@
     <paper id="5">
       <title>The <fixed-case>SENSEI</fixed-case> Annotated Corpus: Human Summaries of Reader Comment Conversations in On-line News</title>
       <author><first>Emma</first><last>Barker</last></author>
-      <author><first>Monica Lestari</first><last>Paramita</last></author>
+      <author id="monica-lestari-paramita"><first>Monica Lestari</first><last>Paramita</last></author>
       <author><first>Ahmet</first><last>Aker</last></author>
-      <author><first>Emina</first><last>Kurtic</last></author>
+      <author id="emina-kurtic"><first>Emina</first><last>Kurtic</last></author>
       <author><first>Mark</first><last>Hepple</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <pages>42–52</pages>
       <url hash="7f6b62df">W16-3605</url>
       <doi>10.18653/v1/W16-3605</doi>
@@ -7092,7 +7092,7 @@
     <paper id="6">
       <title>Special Session - The Future Directions of Dialogue-Based Intelligent Personal Assistants</title>
       <author><first>Yoichi</first><last>Matsuyama</last></author>
-      <author><first>Alexandros</first><last>Papangelis</last></author>
+      <author id="alexandros-papangelis"><first>Alexandros</first><last>Papangelis</last></author>
       <pages>53</pages>
       <url hash="74cd85f5">W16-3606</url>
       <doi>10.18653/v1/W16-3606</doi>
@@ -7100,7 +7100,7 @@
     </paper>
     <paper id="7">
       <title>Keynote - More than meets the ear: Processes that shape dialogue</title>
-      <author><first>Susan</first><last>Brennan</last></author>
+      <author id="susan-e-brennan"><first>Susan</first><last>Brennan</last></author>
       <pages>54</pages>
       <url hash="0fb28a43">W16-3607</url>
       <doi>10.18653/v1/W16-3607</doi>
@@ -7110,8 +7110,8 @@
       <title>A <fixed-case>W</fixed-case>izard-of-<fixed-case>O</fixed-case>z Study on A Non-Task-Oriented Dialog Systems That Reacts to User Engagement</title>
       <author><first>Zhou</first><last>Yu</last></author>
       <author><first>Leah</first><last>Nicolich-Henkin</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
-      <author><first>Alexander</first><last>Rudnicky</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
+      <author id="alexander-rudnicky"><first>Alexander</first><last>Rudnicky</last></author>
       <pages>55–63</pages>
       <url hash="5aa2cb60">W16-3608</url>
       <doi>10.18653/v1/W16-3608</doi>
@@ -7158,7 +7158,7 @@
     <paper id="12">
       <title>Character Identification on Multiparty Conversation: Identifying Mentions of Characters in <fixed-case>TV</fixed-case> Shows</title>
       <author><first>Yu-Hsin</first><last>Chen</last></author>
-      <author><first>Jinho D.</first><last>Choi</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
       <pages>90–100</pages>
       <url hash="b0b0de8a">W16-3612</url>
       <doi>10.18653/v1/W16-3612</doi>
@@ -7187,9 +7187,9 @@
     </paper>
     <paper id="15">
       <title>Extracting <fixed-case>PDTB</fixed-case> Discourse Relations from Student Essays</title>
-      <author><first>Kate</first><last>Forbes-Riley</last></author>
+      <author id="kate-forbes-riley"><first>Kate</first><last>Forbes-Riley</last></author>
       <author><first>Fan</first><last>Zhang</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <pages>117–127</pages>
       <url hash="26aaf3d6">W16-3615</url>
       <doi>10.18653/v1/W16-3615</doi>
@@ -7209,7 +7209,7 @@
       <title>The Role of Discourse Units in Near-Extractive Summarization</title>
       <author><first>Junyi Jessy</first><last>Li</last></author>
       <author><first>Kapil</first><last>Thadani</last></author>
-      <author><first>Amanda</first><last>Stent</last></author>
+      <author id="amanda-stent"><first>Amanda</first><last>Stent</last></author>
       <pages>137–147</pages>
       <url hash="c7bbfacb">W16-3617</url>
       <doi>10.18653/v1/W16-3617</doi>
@@ -7218,8 +7218,8 @@
     <paper id="18">
       <title>Initiations and Interruptions in a Spoken Dialog System</title>
       <author><first>Leah</first><last>Nicolich-Henkin</last></author>
-      <author><first>Carolyn</first><last>Rosé</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rosé</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <pages>148–156</pages>
       <url hash="1179dabf">W16-3618</url>
       <doi>10.18653/v1/W16-3618</doi>
@@ -7247,9 +7247,9 @@
     <paper id="21">
       <title>Syntactic parsing of chat language in contact center conversation corpus</title>
       <author><first>Alexis</first><last>Nasr</last></author>
-      <author><first>Geraldine</first><last>Damnati</last></author>
+      <author id="geraldine-damnati"><first>Geraldine</first><last>Damnati</last></author>
       <author><first>Aleksandra</first><last>Guerraz</last></author>
-      <author><first>Frederic</first><last>Bechet</last></author>
+      <author id="frederic-bechet"><first>Frederic</first><last>Bechet</last></author>
       <pages>175–184</pages>
       <url hash="57f279be">W16-3621</url>
       <doi>10.18653/v1/W16-3621</doi>
@@ -7258,7 +7258,7 @@
     <paper id="22">
       <title>A Context-aware Natural Language Generator for Dialogue Systems</title>
       <author><first>Ondřej</first><last>Dušek</last></author>
-      <author><first>Filip</first><last>Jurčíček</last></author>
+      <author id="filip-jurcicek"><first>Filip</first><last>Jurčíček</last></author>
       <pages>185–190</pages>
       <url hash="7ca5bb18">W16-3622</url>
       <doi>10.18653/v1/W16-3622</doi>
@@ -7268,7 +7268,7 @@
       <title>Identifying Teacher Questions Using Automatic Speech Recognition in Classrooms</title>
       <author><first>Nathaniel</first><last>Blanchard</last></author>
       <author><first>Patrick</first><last>Donnelly</last></author>
-      <author><first>Andrew M.</first><last>Olney</last></author>
+      <author id="andrew-olney"><first>Andrew M.</first><last>Olney</last></author>
       <author><first>Borhan</first><last>Samei</last></author>
       <author><first>Brooke</first><last>Ward</last></author>
       <author><first>Xiaoyi</first><last>Sun</last></author>
@@ -7304,7 +7304,7 @@
       <title>Rapid Prototyping of Form-driven Dialogue Systems Using an Open-source Framework</title>
       <author><first>Svetlana</first><last>Stoyanchev</last></author>
       <author><first>Pierre</first><last>Lison</last></author>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
       <pages>216–219</pages>
       <url hash="44a1d0c9">W16-3626</url>
       <doi>10.18653/v1/W16-3626</doi>
@@ -7312,9 +7312,9 @@
     </paper>
     <paper id="27">
       <title><fixed-case>LVCSR</fixed-case> System on a Hybrid <fixed-case>GPU</fixed-case>-<fixed-case>CPU</fixed-case> Embedded Platform for Real-Time Dialog Applications</title>
-      <author><first>Alexei V.</first><last>Ivanov</last></author>
-      <author><first>Patrick L.</first><last>Lange</last></author>
-      <author><first>David</first><last>Suendermann-Oeft</last></author>
+      <author id="alexei-v-ivanov"><first>Alexei V.</first><last>Ivanov</last></author>
+      <author id="patrick-l-lange"><first>Patrick L.</first><last>Lange</last></author>
+      <author id="david-suendermann-oeft"><first>David</first><last>Suendermann-Oeft</last></author>
       <pages>220–223</pages>
       <url hash="b746593d">W16-3627</url>
       <doi>10.18653/v1/W16-3627</doi>
@@ -7336,7 +7336,7 @@
     <paper id="29">
       <title>Selection method of an appropriate response in chat-oriented dialogue systems</title>
       <author><first>Hideaki</first><last>Mori</last></author>
-      <author><first>Masahiro</first><last>Araki</last></author>
+      <author id="masahiro-araki"><first>Masahiro</first><last>Araki</last></author>
       <pages>228–231</pages>
       <url hash="ffebb615">W16-3629</url>
       <doi>10.18653/v1/W16-3629</doi>
@@ -7344,8 +7344,8 @@
     </paper>
     <paper id="30">
       <title>Real-Time Understanding of Complex Discriminative Scene Descriptions</title>
-      <author><first>Ramesh</first><last>Manuvinakurike</last></author>
-      <author><first>Casey</first><last>Kennington</last></author>
+      <author id="ramesh-manuvinakurike"><first>Ramesh</first><last>Manuvinakurike</last></author>
+      <author id="casey-kennington"><first>Casey</first><last>Kennington</last></author>
       <author><first>David</first><last>DeVault</last></author>
       <author><first>David</first><last>Schlangen</last></author>
       <pages>232–241</pages>
@@ -7355,7 +7355,7 @@
     </paper>
     <paper id="31">
       <title>Supporting Spoken Assistant Systems with a Graphical User Interface that Signals Incremental Understanding and Prediction State</title>
-      <author><first>Casey</first><last>Kennington</last></author>
+      <author id="casey-kennington"><first>Casey</first><last>Kennington</last></author>
       <author><first>David</first><last>Schlangen</last></author>
       <pages>242–251</pages>
       <url hash="3fe41ddf">W16-3631</url>
@@ -7364,7 +7364,7 @@
     </paper>
     <paper id="32">
       <title>Toward incremental dialogue act segmentation in fast-paced interactive dialogue systems</title>
-      <author><first>Ramesh</first><last>Manuvinakurike</last></author>
+      <author id="ramesh-manuvinakurike"><first>Ramesh</first><last>Manuvinakurike</last></author>
       <author><first>Maike</first><last>Paetzel</last></author>
       <author><first>Cheng</first><last>Qu</last></author>
       <author><first>David</first><last>Schlangen</last></author>
@@ -7385,7 +7385,7 @@
     <paper id="34">
       <title>On the Evaluation of Dialogue Systems with Next Utterance Classification</title>
       <author><first>Ryan</first><last>Lowe</last></author>
-      <author><first>Iulian Vlad</first><last>Serban</last></author>
+      <author id="iulian-vlad-serban"><first>Iulian Vlad</first><last>Serban</last></author>
       <author><first>Michael</first><last>Noseworthy</last></author>
       <author><first>Laurent</first><last>Charlin</last></author>
       <author><first>Joelle</first><last>Pineau</last></author>
@@ -7396,8 +7396,8 @@
     </paper>
     <paper id="35">
       <title>Towards Using Conversations with Spoken Dialogue Systems in the Automated Assessment of Non-Native Speakers of <fixed-case>E</fixed-case>nglish</title>
-      <author><first>Diane</first><last>Litman</last></author>
-      <author><first>Steve</first><last>Young</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
+      <author id="steve-young"><first>Steve</first><last>Young</last></author>
       <author><first>Mark</first><last>Gales</last></author>
       <author><first>Kate</first><last>Knill</last></author>
       <author><first>Karen</first><last>Ottewell</last></author>
@@ -7412,7 +7412,7 @@
       <title>Measuring the Similarity of Sentential Arguments in Dialogue</title>
       <author><first>Amita</first><last>Misra</last></author>
       <author><first>Brian</first><last>Ecker</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <pages>276–287</pages>
       <url hash="87197a7c">W16-3636</url>
       <doi>10.18653/v1/W16-3636</doi>
@@ -7430,7 +7430,7 @@
     <paper id="38">
       <title>Do Characters Abuse More Than Words?</title>
       <author><first>Yashar</first><last>Mehdad</last></author>
-      <author><first>Joel</first><last>Tetreault</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
       <pages>299–303</pages>
       <url hash="3ae34259">W16-3638</url>
       <doi>10.18653/v1/W16-3638</doi>
@@ -7454,7 +7454,7 @@
       <author><first>Masahiro</first><last>Mizukami</last></author>
       <author><first>Koichiro</first><last>Yoshino</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <author><first>Satoshi</first><last>Nakamura</last></author>
       <pages>310–318</pages>
       <url hash="11862f96">W16-3640</url>
@@ -7475,7 +7475,7 @@
     <paper id="42">
       <title>Reference Resolution in Situated Dialogue with Learned Semantics</title>
       <author><first>Xiaolong</first><last>Li</last></author>
-      <author><first>Kristy</first><last>Boyer</last></author>
+      <author id="kristy-boyer"><first>Kristy</first><last>Boyer</last></author>
       <pages>329–338</pages>
       <url hash="a129bd0c">W16-3642</url>
       <doi>10.18653/v1/W16-3642</doi>
@@ -7495,7 +7495,7 @@
       <title>Learning Fine-Grained Knowledge about Contingent Relations between Everyday Events</title>
       <author><first>Elahe</first><last>Rahimtoroghi</last></author>
       <author><first>Ernesto</first><last>Hernandez</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <pages>350–359</pages>
       <url hash="c2b34dba">W16-3644</url>
       <doi>10.18653/v1/W16-3644</doi>
@@ -7526,7 +7526,7 @@
       <title>Automatic Recognition of Conversational Strategies in the Service of a Socially-Aware Dialog System</title>
       <author><first>Ran</first><last>Zhao</last></author>
       <author><first>Tanmay</first><last>Sinha</last></author>
-      <author><first>Alan</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan</first><last>Black</last></author>
       <author><first>Justine</first><last>Cassell</last></author>
       <pages>381–392</pages>
       <url hash="4bd36566">W16-3647</url>
@@ -7546,8 +7546,8 @@
       <title>Strategy and Policy Learning for Non-Task-Oriented Conversational Systems</title>
       <author><first>Zhou</first><last>Yu</last></author>
       <author><first>Ziyu</first><last>Xu</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
-      <author><first>Alexander</first><last>Rudnicky</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
+      <author id="alexander-rudnicky"><first>Alexander</first><last>Rudnicky</last></author>
       <pages>404–412</pages>
       <url hash="7782eede">W16-3649</url>
       <doi>10.18653/v1/W16-3649</doi>
@@ -7559,7 +7559,7 @@
       <booktitle>Proceedings of the 6th Workshop on South and Southeast <fixed-case>A</fixed-case>sian Natural Language Processing (<fixed-case>WSSANLP</fixed-case>2016)</booktitle>
       <url hash="39688d71">W16-37</url>
       <editor><first>Dekai</first><last>Wu</last></editor>
-      <editor><first>Pushpak</first><last>Bhattacharyya</last></editor>
+      <editor id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></editor>
       <publisher>The COLING 2016 Organizing Committee</publisher>
       <address>Osaka, Japan</address>
       <month>December</month>
@@ -7573,7 +7573,7 @@
     <paper id="1">
       <title>Compound Type Identification in <fixed-case>S</fixed-case>anskrit: What Roles do the Corpus and Grammar Play?</title>
       <author><first>Amrith</first><last>Krishna</last></author>
-      <author><first>Pavankumar</first><last>Satuluri</last></author>
+      <author id="pavankumar-satuluri"><first>Pavankumar</first><last>Satuluri</last></author>
       <author><first>Shubham</first><last>Sharma</last></author>
       <author><first>Apurv</first><last>Kumar</last></author>
       <author><first>Pawan</first><last>Goyal</last></author>
@@ -7640,7 +7640,7 @@
       <title>Enriching Source for <fixed-case>E</fixed-case>nglish-to-<fixed-case>U</fixed-case>rdu Machine Translation</title>
       <author><first>Bushra</first><last>Jawaid</last></author>
       <author><first>Amir</first><last>Kamran</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>54–63</pages>
       <url hash="882de253">W16-3706</url>
       <abstract>This paper focuses on the generation of case markers for free word order languages that use case markers as phrasal clitics for marking the relationship between the dependent-noun and its head. The generation of such clitics becomes essential task especially when translating from fixed word order languages where syntactic relations are identified by the positions of the dependent-nouns. To address the problem of missing markers on source-side, artificial markers are added in source to improve alignments with its target counterparts. Up to 1 BLEU point increase is observed over the baseline on different test sets for English-to-Urdu.</abstract>
@@ -7650,8 +7650,8 @@
       <title>The <fixed-case>IMAGACT</fixed-case>4<fixed-case>ALL</fixed-case> Ontology of Animated Images: Implications for Theoretical and Machine Translation of Action Verbs from <fixed-case>E</fixed-case>nglish-<fixed-case>I</fixed-case>ndian Languages</title>
       <author><first>Pitambar</first><last>Behera</last></author>
       <author><first>Sharmin</first><last>Muzaffar</last></author>
-      <author><first>Atul Ku.</first><last>Ojha</last></author>
-      <author><first>Girish</first><last>Jha</last></author>
+      <author id="atul-kr-ojha"><first>Atul Ku.</first><last>Ojha</last></author>
+      <author id="girish-nath-jha"><first>Girish</first><last>Jha</last></author>
       <pages>64–73</pages>
       <url hash="cabe047c">W16-3707</url>
       <abstract>Action verbs are one of the frequently occurring linguistic elements in any given natural language as the speakers use them during every linguistic intercourse. However, each language expresses action verbs in its own inherently unique manner by categorization. One verb can refer to several interpretations of actions and one action can be expressed by more than one verb. The inter-language and intra-language variations create ambiguity for the translation of languages from the source language to target language with respect to action verbs. IMAGACT is a corpus-based ontological platform of action verbs translated from prototypic animated images explained in English and Italian as meta-languages. In this paper, we are presenting the issues and challenges in translating action verbs of Indian languages as target and English as source language by observing the animated images. Among the ten Indian languages which have been annotated so far on the platform are Sanskrit, Hindi, Urdu, Odia (Oriya), Bengali, Manipuri, Tamil, Assamese, Magahi and Marathi. Out of them, Manipuri belongs to the Sino-Tibetan, Tamil comes off the Dravidian and the rest owe their genesis to the Indo-Aryan language family. One of the issues is that the one-word morphological English verbs are translated into most of the Indian languages as verbs having more than one-word form; for instance as in the case of conjunct, compound, serial verbs and so on. We are further presenting a cross-lingual comparison of action verbs among Indian languages. In addition, we are also dealing with the issues in disambiguating animated images by the L1 native speakers using competence-based judgements and the theoretical and machine translation implications they bear.</abstract>
@@ -7660,7 +7660,7 @@
     <paper id="8">
       <title>Crowdsourcing-based Annotation of Emotions in <fixed-case>F</fixed-case>ilipino and <fixed-case>E</fixed-case>nglish Tweets</title>
       <author><first>Fermin Roberto</first><last>Lapitan</last></author>
-      <author><first>Riza Theresa</first><last>Batista-Navarro</last></author>
+      <author id="riza-theresa-batista-navarro"><first>Riza Theresa</first><last>Batista-Navarro</last></author>
       <author><first>Eliezer</first><last>Albacea</last></author>
       <pages>74–82</pages>
       <url hash="49057879">W16-3708</url>
@@ -7715,7 +7715,7 @@
       <title>Clustering-based Phonetic Projection in Mismatched Crowdsourcing Channels for Low-resourced <fixed-case>ASR</fixed-case></title>
       <author><first>Wenda</first><last>Chen</last></author>
       <author><first>Mark</first><last>Hasegawa-Johnson</last></author>
-      <author><first>Nancy</first><last>Chen</last></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last></author>
       <author><first>Preethi</first><last>Jyothi</last></author>
       <author><first>Lav</first><last>Varshney</last></author>
       <pages>133–141</pages>
@@ -7766,7 +7766,7 @@
     <paper id="19">
       <title>Align Me: A framework to generate Parallel Corpus Using <fixed-case>OCR</fixed-case>s and Bilingual Dictionaries</title>
       <author><first>Priyam</first><last>Bakliwal</last></author>
-      <author><first>Devadath</first><last>V V</last></author>
+      <author id="devadath-v-v"><first>Devadath</first><last>V V</last></author>
       <author><first>C V</first><last>Jawahar</last></author>
       <pages>183–187</pages>
       <url hash="bf6c0bf7">W16-3719</url>
@@ -7775,7 +7775,7 @@
     </paper>
     <paper id="20">
       <title>Learning <fixed-case>I</fixed-case>ndonesian-<fixed-case>C</fixed-case>hinese Lexicon with Bilingual Word Embedding Models and Monolingual Signals</title>
-      <author><first>Xinying</first><last>Qiu</last></author>
+      <author id="xin-ying-qiu"><first>Xinying</first><last>Qiu</last></author>
       <author><first>Gangqin</first><last>Zhu</last></author>
       <pages>188–193</pages>
       <url hash="afdd4337">W16-3720</url>
@@ -7795,8 +7795,8 @@
     <meta>
       <booktitle>Proceedings of the Workshop on Grammar and Lexicon: interactions and interfaces (<fixed-case>G</fixed-case>ram<fixed-case>L</fixed-case>ex)</booktitle>
       <url hash="b6039f7b">W16-38</url>
-      <editor><first>Eva</first><last>Hajičová</last></editor>
-      <editor><first>Igor</first><last>Boguslavsky</last></editor>
+      <editor id="eva-hajicova"><first>Eva</first><last>Hajičová</last></editor>
+      <editor id="igor-boguslavsky"><first>Igor</first><last>Boguslavsky</last></editor>
       <publisher>The COLING 2016 Organizing Committee</publisher>
       <address>Osaka, Japan</address>
       <month>December</month>
@@ -7817,7 +7817,7 @@
     </paper>
     <paper id="2">
       <title>Multiword Expressions at the Grammar-Lexicon Interface</title>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>7</pages>
       <url hash="119ce7e7">W16-3802</url>
       <abstract>In this talk, I will outline a range of challenges presented by multiword expressions in terms of (lexicalist) precision grammar engineering, and different strategies for accommodating those challenges, in an attempt to strike the right balance in terms of generalisation and over- and under-generation.</abstract>
@@ -7825,7 +7825,7 @@
     </paper>
     <paper id="3">
       <title>Microsyntactic Phenomena as a Computational Linguistics Issue</title>
-      <author><first>Leonid</first><last>Iomdin</last></author>
+      <author id="leonid-iomdin"><first>Leonid</first><last>Iomdin</last></author>
       <pages>8–17</pages>
       <url hash="dbe3666a">W16-3803</url>
       <abstract>Microsyntactic linguistic units, such as syntactic idioms and non-standard syntactic constructions, are poorly represented in linguistic resources, mostly because the former are elements occupying an intermediate position between the lexicon and the grammar and the latter are too specific to be routinely tackled by general grammars. Consequently, many such units produce substantial gaps in systems intended to solve sophisticated computational linguistics tasks, such as parsing, deep semantic analysis, question answering, machine translation, or text generation. They also present obstacles for applying advanced techniques to these tasks, such as machine learning. The paper discusses an approach aimed at bridging such gaps, focusing on the development of monolingual and multilingual corpora where microsyntactic units are to be tagged.</abstract>
@@ -7833,7 +7833,7 @@
     </paper>
     <paper id="4">
       <title><fixed-case>A</fixed-case>lternations: From Lexicon to Grammar And Back Again</title>
-      <author><first>Markéta</first><last>Lopatková</last></author>
+      <author id="marketa-lopatkova"><first>Markéta</first><last>Lopatková</last></author>
       <author><first>Václava</first><last>Kettnerová</last></author>
       <pages>18–27</pages>
       <url hash="210c59be">W16-3804</url>
@@ -7843,7 +7843,7 @@
     <paper id="5">
       <title>Extra-Specific Multiword Expressions for Language-Endowed Intelligent Agents</title>
       <author><first>Marjorie</first><last>McShane</last></author>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <pages>28–37</pages>
       <url hash="7d97ec39">W16-3805</url>
       <abstract>Language-endowed intelligent agents benefit from leveraging lexical knowledge falling at different points along a spectrum of compositionality. This means that robust computational lexicons should include not only the compositional expectations of argument-taking words, but also non-compositional collocations (idioms), semi-compositional collocations that might be difficult for an agent to interpret (e.g., standard metaphors), and even collocations that could be compositionally analyzed but are so frequently encountered that recording their meaning increases the efficiency of interpretation. In this paper we argue that yet another type of string-to-meaning mapping can also be useful to intelligent agents: remembered semantic analyses of actual text inputs. These can be viewed as super-specific multi-word expressions whose recorded interpretations mimic a person’s memories of knowledge previously learned from language input. These differ from typical annotated corpora in two ways. First, they provide a full, context-sensitive semantic interpretation rather than select features. Second, they are are formulated in the ontologically-grounded metalanguage used in a particular agent environment, meaning that the interpretations contribute to the dynamically evolving cognitive capabilites of agents configured in that environment.</abstract>
@@ -7859,7 +7859,7 @@
     </paper>
     <paper id="7">
       <title>The Development of Multimodal Lexical Resources</title>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <author><first>Tuan</first><last>Do</last></author>
       <author><first>Gitit</first><last>Kehat</last></author>
       <author><first>Nikhil</first><last>Krishnaswamy</last></author>
@@ -7879,7 +7879,7 @@
     <paper id="9">
       <title>Improvement of <fixed-case>V</fixed-case>erb<fixed-case>N</fixed-case>et-like resources by frame typing</title>
       <author><first>Laurence</first><last>Danlos</last></author>
-      <author><first>Matthieu</first><last>Constant</last></author>
+      <author id="matthieu-constant"><first>Matthieu</first><last>Constant</last></author>
       <author><first>Lucie</first><last>Barque</last></author>
       <pages>61–70</pages>
       <url hash="240f92cb">W16-3809</url>
@@ -7888,9 +7888,9 @@
     </paper>
     <paper id="10">
       <title>Enriching a Valency Lexicon by Deverbative Nouns</title>
-      <author><first>Eva</first><last>Fučíková</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
-      <author><first>Zdeňka</first><last>Urešová</last></author>
+      <author id="eva-fucikova"><first>Eva</first><last>Fučíková</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
+      <author id="zdenka-uresova"><first>Zdeňka</first><last>Urešová</last></author>
       <pages>71–80</pages>
       <url hash="1c103335">W16-3810</url>
       <abstract>We present an attempt to automatically identify Czech deverbative nouns using several methods that use large corpora as well as existing lexical resources. The motivation for the task is to extend a verbal valency (i.e., predicate-argument) lexicon by adding nouns that share the valency properties with the base verb, assuming their properties can be derived (even if not trivially) from the underlying verb by deterministic grammatical rules. At the same time, even in inflective languages, not all deverbatives are simply created from their underlying base verb by regular lexical derivation processes. We have thus developed hybrid techniques that use both large parallel corpora and several standard lexical resources. Thanks to the use of parallel corpora, the resulting sets contain also synonyms, which the lexical derivation rules cannot get. For evaluation, we have manually created a small, 100-verb gold data since no such dataset was initially available for Czech.</abstract>
@@ -7899,7 +7899,7 @@
     <paper id="11">
       <title>The Grammar of <fixed-case>E</fixed-case>nglish Deverbal Compounds and their Meaning</title>
       <author><first>Gianina</first><last>Iordăchioaia</last></author>
-      <author><first>Lonneke</first><last>van der Plas</last></author>
+      <author id="lonneke-van-der-plas"><first>Lonneke</first><last>van der Plas</last></author>
       <author><first>Glorianna</first><last>Jagfeld</last></author>
       <pages>81–91</pages>
       <url hash="4ccbdd8b">W16-3811</url>
@@ -7921,7 +7921,7 @@
       <author><first>Akifumi</first><last>Yoshimoto</last></author>
       <author><first>Akihiko</first><last>Kato</last></author>
       <author><first>Hiroyuki</first><last>Shindo</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>102–109</pages>
       <url hash="907ccbf0">W16-3813</url>
       <abstract>This paper presents our ongoing work on compilation of English multi-word expression (MWE) lexicon. We are especially interested in collecting flexible MWEs, in which some other components can intervene the expression such as “a number of” vs “a large number of” where a modifier of “number” can be placed in the expression and inherit the original meaning. We fiest collect possible candidates of flexible English MWEs from the web, and annotate all of their occurrences in the Wall Street Journal portion of Ontonotes corpus. We make use of word dependency strcuture information of the sentences converted from the phrase structure annotation. This process enables semi-automatic annotation of MWEs in the corpus and simultanaously produces the internal and external dependency representation of flexible MWEs.</abstract>
@@ -7950,9 +7950,9 @@
       <url hash="6aa5caec">W16-39</url>
       <editor><first>Bo</first><last>Han</last></editor>
       <editor><first>Alan</first><last>Ritter</last></editor>
-      <editor><first>Leon</first><last>Derczynski</last></editor>
+      <editor id="leon-derczynski"><first>Leon</first><last>Derczynski</last></editor>
       <editor><first>Wei</first><last>Xu</last></editor>
-      <editor><first>Tim</first><last>Baldwin</last></editor>
+      <editor id="timothy-baldwin"><first>Tim</first><last>Baldwin</last></editor>
       <publisher>The COLING 2016 Organizing Committee</publisher>
       <address>Osaka, Japan</address>
       <month>December</month>
@@ -7965,7 +7965,7 @@
     </frontmatter>
     <paper id="1">
       <title>Processing non-canonical or noisy text: fortuitous data to the rescue</title>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>1</pages>
       <url hash="7f56f0b3">W16-3901</url>
       <abstract>Real world data differs radically from the benchmark corpora we use in NLP, resulting in large performance drops. The reason for this problem is obvious: NLP models are trained on limited samples from canonical varieties considered standard. However, there are many dimensions, e.g., sociodemographic, language, genre, sentence type, etc. on which texts can differ from the standard. The solution is not obvious: we cannot control for all factors, and it is not clear how to best go beyond the current practice of training on homogeneous data from a single domain and language. In this talk, I review the notion of canonicity, and how it shapes our community’s approach to language. I argue for the use of fortuitous data. Fortuitous data is data out there that just waits to be harvested. It includes data which is in plain sight, but is often neglected, and more distant sources like behavioral data, which first need to be refined. They provide additional contexts and a myriad of opportunities to build more adaptive language technology, some of which I will explore in this talk.</abstract>
@@ -7998,9 +7998,9 @@
     </paper>
     <paper id="5">
       <title>From Noisy Questions to <fixed-case>M</fixed-case>inecraft Texts: Annotation Challenges in Extreme Syntax Scenario</title>
-      <author><first>Héctor</first><last>Martínez Alonso</last></author>
-      <author><first>Djamé</first><last>Seddah</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="hector-martinez-alonso"><first>Héctor</first><last>Martínez Alonso</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <pages>13–23</pages>
       <url hash="c12fe1d0">W16-3905</url>
       <abstract>User-generated content presents many challenges for its automatic processing. While many of them do come from out-of-vocabulary effects, others spawn from different linguistic phenomena such as unusual syntax. In this work we present a French three-domain data set made up of question headlines from a cooking forum, game chat logs and associated forums from two popular online games (MINECRAFT &amp; LEAGUE OF LEGENDS). We chose these domains because they encompass different degrees of lexical and syntactic compliance with canonical language. We conduct an automatic and manual evaluation of the difficulties of processing these domains for part-of-speech prediction, and introduce a pilot study to determine whether dependency analysis lends itself well to annotate these data. We also discuss the development cost of our data set.</abstract>
@@ -8018,7 +8018,7 @@
     </paper>
     <paper id="7">
       <title>Veracity Computing from Lexical Cues and Perceived Certainty Trends</title>
-      <author><first>Uwe</first><last>Reichel</last></author>
+      <author id="uwe-reichel"><first>Uwe</first><last>Reichel</last></author>
       <author><first>Piroska</first><last>Lendvai</last></author>
       <pages>33–42</pages>
       <url hash="6fd6850b">W16-3907</url>
@@ -8093,7 +8093,7 @@
       <author><first>Julie</first><last>Pain</last></author>
       <author><first>Jessie</first><last>Levacher</last></author>
       <author><first>Adam</first><last>Quinquenel</last></author>
-      <author><first>Anja</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anja</first><last>Belz</last></author>
       <pages>94–101</pages>
       <url hash="e20030d4">W16-3914</url>
       <abstract>Postmarketing surveillance (PMS) has the vital aim to monitor effects of drugs after release for use by the general population, but suffers from under-reporting and limited coverage. Automatic methods for detecting drug effect reports, especially for social media, could vastly increase the scope of PMS. Very few automatic PMS methods are currently available, in particular for the messy text types encountered on Twitter. In this paper we describe first results for developing PMS methods specifically for tweets. We describe the corpus of 125,669 tweets we have created and annotated to train and test the tools. We find that generic tools perform well for tweet-level language identification and tweet-level sentiment analysis (both 0.94 F1-Score). For detection of effect mentions we are able to achieve 0.87 F1-Score, while effect-level adverse-vs.-beneficial analysis proves harder with an F1-Score of 0.64. Among other things, our results indicate that MetaMap semantic types provide a very promising basis for identifying drug effect mentions in tweets.</abstract>
@@ -8112,7 +8112,7 @@
     <paper id="16">
       <title>Exploring Word Embeddings for Unsupervised Textual User-Generated Content Normalization</title>
       <author><first>Thales Felipe</first><last>Costa Bertaglia</last></author>
-      <author><first>Maria das Graças</first><last>Volpe Nunes</last></author>
+      <author id="maria-das-gracas-volpe-nunes"><first>Maria das Graças</first><last>Volpe Nunes</last></author>
       <pages>112–120</pages>
       <url hash="3867f38e">W16-3916</url>
       <abstract>Text normalization techniques based on rules, lexicons or supervised training requiring large corpora are not scalable nor domain interchangeable, and this makes them unsuitable for normalizing user-generated content (UGC). Current tools available for Brazilian Portuguese make use of such techniques. In this work we propose a technique based on distributed representation of words (or word embeddings). It generates continuous numeric vectors of high-dimensionality to represent words. The vectors explicitly encode many linguistic regularities and patterns, as well as syntactic and semantic word relationships. Words that share semantic similarity are represented by similar vectors. Based on these features, we present a totally unsupervised, expandable and language and domain independent method for learning normalization lexicons from word embeddings. Our approach obtains high correction rate of orthographic errors and internet slang in product reviews, outperforming the current available tools for Brazilian Portuguese.</abstract>
@@ -8132,7 +8132,7 @@
       <title><fixed-case>J</fixed-case>apanese Text Normalization with Encoder-Decoder Model</title>
       <author><first>Taishi</first><last>Ikeda</last></author>
       <author><first>Hiroyuki</first><last>Shindo</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>129–137</pages>
       <url hash="d2482776">W16-3918</url>
       <abstract>Text normalization is the task of transforming lexical variants to their canonical forms. We model the problem of text normalization as a character-level sequence to sequence learning problem and present a neural encoder-decoder model for solving it. To train the encoder-decoder model, many sentences pairs are generally required. However, Japanese non-standard canonical pairs are scarce in the form of parallel corpora. To address this issue, we propose a method of data augmentation to increase data size by converting existing resources into synthesized non-standard forms using handcrafted rules. We conducted an experiment to demonstrate that the synthesized corpus contributes to stably train an encoder-decoder model and improve the performance of Japanese text normalization.</abstract>
@@ -8143,7 +8143,7 @@
       <author><first>Benjamin</first><last>Strauss</last></author>
       <author><first>Bethany</first><last>Toma</last></author>
       <author><first>Alan</first><last>Ritter</last></author>
-      <author><first>Marie-Catherine</first><last>de Marneffe</last></author>
+      <author id="marie-catherine-de-marneffe"><first>Marie-Catherine</first><last>de Marneffe</last></author>
       <author><first>Wei</first><last>Xu</last></author>
       <pages>138–144</pages>
       <url hash="0119d2e1">W16-3919</url>
@@ -8162,7 +8162,7 @@
     <paper id="21">
       <title>Learning to recognise named entities in tweets by exploiting weakly labelled data</title>
       <author><first>Kurt Junshean</first><last>Espinosa</last></author>
-      <author><first>Riza Theresa</first><last>Batista-Navarro</last></author>
+      <author id="riza-theresa-batista-navarro"><first>Riza Theresa</first><last>Batista-Navarro</last></author>
       <author><first>Sophia</first><last>Ananiadou</last></author>
       <pages>153–163</pages>
       <url hash="39fd6b92">W16-3921</url>
@@ -8171,8 +8171,8 @@
     </paper>
     <paper id="22">
       <title>Feature-Rich <fixed-case>T</fixed-case>witter Named Entity Recognition and Classification</title>
-      <author><first>Utpal Kumar</first><last>Sikdar</last></author>
-      <author><first>Björn</first><last>Gambäck</last></author>
+      <author id="utpal-kumar-sikdar"><first>Utpal Kumar</first><last>Sikdar</last></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gambäck</last></author>
       <pages>164–170</pages>
       <url hash="611f16cb">W16-3922</url>
       <abstract>Twitter named entity recognition is the process of identifying proper names and classifying them into some predefined labels/categories. The paper introduces a Twitter named entity system using a supervised machine learning approach, namely Conditional Random Fields. A large set of different features was developed and the system was trained using these. The Twitter named entity task can be divided into two parts: i) Named entity extraction from tweets and ii) Twitter name classification into ten different types. For Twitter named entity recognition on unseen test data, our system obtained the second highest F1 score in the shared task: 63.22%. The system performance on the classification task was worse, with an F1 measure of 40.06% on unseen test data, which was the fourth best of the ten systems participating in the shared task.</abstract>
@@ -8277,8 +8277,8 @@
     <meta>
       <booktitle>Proceedings of the Workshop on Language Technology Resources and Tools for Digital Humanities (<fixed-case>LT</fixed-case>4<fixed-case>DH</fixed-case>)</booktitle>
       <url hash="2a879f05">W16-40</url>
-      <editor><first>Erhard</first><last>Hinrichs</last></editor>
-      <editor><first>Marie</first><last>Hinrichs</last></editor>
+      <editor id="erhard-hinrichs"><first>Erhard</first><last>Hinrichs</last></editor>
+      <editor id="marie-hinrichs"><first>Marie</first><last>Hinrichs</last></editor>
       <editor><first>Thorsten</first><last>Trippel</last></editor>
       <publisher>The COLING 2016 Organizing Committee</publisher>
       <address>Osaka, Japan</address>
@@ -8300,7 +8300,7 @@
     </paper>
     <paper id="2">
       <title>Finding Rising and Falling Words</title>
-      <author><first>Erik</first><last>Tjong Kim Sang</last></author>
+      <author id="erik-tjong-kim-sang"><first>Erik</first><last>Tjong Kim Sang</last></author>
       <pages>2–9</pages>
       <url hash="307a279b">W16-4002</url>
       <abstract>We examine two different methods for finding rising words (among which neologisms) and falling words (among which archaisms) in decades of magazine texts (millions of words) and in years of tweets (billions of words): one based on correlation coefficients of relative frequencies and time, and one based on comparing initial and final word frequencies of time intervals. We find that smoothing frequency scores improves the precision scores of both methods and that the correlation coefficients perform better on magazine text but worse on tweets. Since the two ranking methods find different words they can be used in side-by-side to study the behavior of words over time.</abstract>
@@ -8310,7 +8310,7 @@
       <title>A Dataset for Multimodal Question Answering in the Cultural Heritage Domain</title>
       <author><first>Shurong</first><last>Sheng</last></author>
       <author><first>Luc</first><last>Van Gool</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>10–17</pages>
       <url hash="ff914652">W16-4003</url>
       <abstract>Multimodal question answering in the cultural heritage domain allows visitors to ask questions in a more natural way and thus provides better user experiences with cultural objects while visiting a museum, landmark or any other historical site. In this paper, we introduce the construction of a golden standard dataset that will aid research of multimodal question answering in the cultural heritage domain. The dataset, which will be soon released to the public, contains multimodal content including images of typical artworks from the fascinating old-Egyptian Amarna period, related image-containing documents of the artworks and over 800 multimodal queries integrating visual and textual questions. The multimodal questions and related documents are all in English. The multimodal questions are linked to relevant paragraphs in the related documents that contain the answer to the multimodal query.</abstract>
@@ -8359,7 +8359,7 @@
     </paper>
     <paper id="8">
       <title>Feelings from the <fixed-case>P</fixed-case>ast—<fixed-case>A</fixed-case>dapting Affective Lexicons for Historical Emotion Analysis</title>
-      <author><first>Sven</first><last>Buechel</last></author>
+      <author id="sven-buechel"><first>Sven</first><last>Buechel</last></author>
       <author><first>Johannes</first><last>Hellrich</last></author>
       <author><first>Udo</first><last>Hahn</last></author>
       <pages>54–61</pages>
@@ -8393,7 +8393,7 @@
       <author><first>Silvana</first><last>Hartmann</last></author>
       <author><first>Iryna</first><last>Gurevych</last></author>
       <author><first>Anette</first><last>Frank</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>76–84</pages>
       <url hash="5c204154">W16-4011</url>
       <abstract>We introduce the third major release of WebAnno, a generic web-based annotation tool for distributed teams. New features in this release focus on semantic annotation tasks (e.g. semantic role labelling or event annotation) and allow the tight integration of semantic annotations with syntactic annotations. In particular, we introduce the concept of slot features, a novel constraint mechanism that allows modelling the interaction between semantic and syntactic annotations, as well as a new annotation user interface. The new features were developed and used in an annotation project for semantic roles on German texts. The paper briefly introduces this project and reports on experiences performing annotations with the new tool. On a comparative evaluation, our tool reaches significant speedups over WebAnno 2 for a semantic annotation task.</abstract>
@@ -8407,7 +8407,7 @@
       <author><first>Mark</first><last>Janse</last></author>
       <author><first>Petra</first><last>Ajaka</last></author>
       <author><first>Micha</first><last>Elsner</last></author>
-      <author><first>Marie-Catherine</first><last>de Marneffe</last></author>
+      <author id="marie-catherine-de-marneffe"><first>Marie-Catherine</first><last>de Marneffe</last></author>
       <pages>85–93</pages>
       <url hash="fe450160">W16-4012</url>
       <abstract>Although spanning thousands of years and genres as diverse as liturgy, historiography, lyric and other forms of prose and poetry, the body of Latin texts is still relatively sparse compared to English. Data sparsity in Latin presents a number of challenges for traditional Named Entity Recognition techniques. Solving such challenges and enabling reliable Named Entity Recognition in Latin texts can facilitate many down-stream applications, from machine translation to digital historiography, enabling Classicists, historians, and archaeologists for instance, to track the relationships of historical persons, places, and groups on a large scale. This paper presents the first annotated corpus for evaluating Named Entity Recognition in Latin, as well as a fully supervised model that achieves over 90% F-score on a held-out test set, significantly outperforming a competitive baseline. We also present a novel active learning strategy that predicts how many and which sentences need to be annotated for named entities in order to attain a specified degree of accuracy when recognizing named entities automatically in a given text. This maximizes the productivity of annotators while simultaneously controlling quality.</abstract>
@@ -8441,7 +8441,7 @@
     <paper id="16">
       <title>Language technology tools and resources for the analysis of multimodal communication</title>
       <author><first>László</first><last>Hunyadi</last></author>
-      <author><first>Tamás</first><last>Váradi</last></author>
+      <author id="tamas-varadi"><first>Tamás</first><last>Váradi</last></author>
       <author><first>István</first><last>Szekrényes</last></author>
       <pages>117–124</pages>
       <url hash="c93697d0">W16-4016</url>
@@ -8470,7 +8470,7 @@
     <paper id="19">
       <title>Semantic Indexing of Multilingual Corpora and its Application on the History Domain</title>
       <author><first>Alessandro</first><last>Raganato</last></author>
-      <author><first>Jose</first><last>Camacho-Collados</last></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></author>
       <author><first>Antonio</first><last>Raganato</last></author>
       <author><first>Yunseo</first><last>Joung</last></author>
       <pages>140–147</pages>
@@ -8480,7 +8480,7 @@
     </paper>
     <paper id="20">
       <title>Tagging <fixed-case>I</fixed-case>ngush - Language Technology For Low-Resource Languages Using Resources From Linguistic Field Work</title>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <author><first>Johanna</first><last>Nichols</last></author>
       <author><first>Ronald</first><last>Sprouse</last></author>
       <pages>148–155</pages>
@@ -8501,7 +8501,7 @@
     </paper>
     <paper id="22">
       <title>Tools and Instruments for Building and Querying Diachronic Computational Lexica</title>
-      <author><first>Fahad</first><last>Khan</last></author>
+      <author id="fahad-khan"><first>Fahad</first><last>Khan</last></author>
       <author><first>Andrea</first><last>Bellandi</last></author>
       <author><first>Monica</first><last>Monachini</last></author>
       <pages>164–171</pages>
@@ -8558,7 +8558,7 @@
     </frontmatter>
     <paper id="1">
       <title>Could Machine Learning Shed Light on Natural Language Complexity?</title>
-      <author><first>Maria Dolores</first><last>Jiménez-López</last></author>
+      <author id="m-dolores-jimenez-lopez"><first>Maria Dolores</first><last>Jiménez-López</last></author>
       <author><first>Leonor</first><last>Becerra-Bonache</last></author>
       <pages>1–11</pages>
       <url hash="b8358efc">W16-4101</url>
@@ -8588,7 +8588,7 @@
     </paper>
     <paper id="4">
       <title>Addressing surprisal deficiencies in reading time models</title>
-      <author><first>Marten</first><last>van Schijndel</last></author>
+      <author id="marten-van-schijndel"><first>Marten</first><last>van Schijndel</last></author>
       <author><first>William</first><last>Schuler</last></author>
       <pages>32–37</pages>
       <url hash="c6e995fa">W16-4104</url>
@@ -8598,7 +8598,7 @@
     <paper id="5">
       <title>Towards grounding computational linguistic approaches to readability: Modeling reader-text interaction for easy and difficult texts</title>
       <author><first>Sowmya</first><last>Vajjala</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <author><first>Alexander</first><last>Eitel</last></author>
       <author><first>Katharina</first><last>Scheiter</last></author>
       <pages>38–48</pages>
@@ -8609,7 +8609,7 @@
     <paper id="6">
       <title>Memory access during incremental sentence processing causes reading time latency</title>
       <author><first>Cory</first><last>Shain</last></author>
-      <author><first>Marten</first><last>van Schijndel</last></author>
+      <author id="marten-van-schijndel"><first>Marten</first><last>van Schijndel</last></author>
       <author><first>Richard</first><last>Futrell</last></author>
       <author><first>Edward</first><last>Gibson</last></author>
       <author><first>William</first><last>Schuler</last></author>
@@ -8620,7 +8620,7 @@
     </paper>
     <paper id="7">
       <title>Reducing lexical complexity as a tool to increase text accessibility for children with dyslexia</title>
-      <author><first>Núria</first><last>Gala</last></author>
+      <author id="nuria-gala"><first>Núria</first><last>Gala</last></author>
       <author><first>Johannes</first><last>Ziegler</last></author>
       <pages>59–66</pages>
       <url hash="d87b23e6">W16-4107</url>
@@ -8629,7 +8629,7 @@
     </paper>
     <paper id="8">
       <title>Syntactic and Lexical Complexity in <fixed-case>I</fixed-case>talian Noncanonical Structures</title>
-      <author><first>Rodolfo</first><last>Delmonte</last></author>
+      <author id="rodolfo-delmonte"><first>Rodolfo</first><last>Delmonte</last></author>
       <pages>67–78</pages>
       <url hash="5ebb9cab">W16-4108</url>
       <abstract>In this paper we will be dealing with different levels of complexity in the processing of Italian, a Romance language inheriting many properties from Latin which make it an almost free word order language . The paper is concerned with syntactic complexity as measurable on the basis of the cognitive parser that incrementally builds up a syntactic representation to be used by the semantic component. The theory behind will be LFG and parsing preferences will be used to justify one choice both from a principled and a processing point of view. LFG is a transformationless theory in which there is no deep structure separate from surface syntactic structure. This is partially in accordance with constructional theories in which noncanonical structures containing non-argument functions FOCUS/TOPIC are treated as multifunctional constituents. Complexity is computed on a processing basis following suggestions made by Blache and demonstrated by Kluender and Chesi</abstract>
@@ -8637,7 +8637,7 @@
     </paper>
     <paper id="9">
       <title>Real Multi-Sense or Pseudo Multi-Sense: An Approach to Improve Word Representation</title>
-      <author><first>Haoyue</first><last>Shi</last></author>
+      <author id="freda-shi"><first>Haoyue</first><last>Shi</last></author>
       <author><first>Caihua</first><last>Li</last></author>
       <author><first>Junfeng</first><last>Hu</last></author>
       <pages>79–88</pages>
@@ -8649,7 +8649,7 @@
       <title>A Preliminary Study of Statistically Predictive Syntactic Complexity Features and Manual Simplifications in <fixed-case>B</fixed-case>asque</title>
       <author><first>Itziar</first><last>Gonzalez-Dios</last></author>
       <author><first>María Jesús</first><last>Aranzabe</last></author>
-      <author><first>Arantza</first><last>Díaz de Ilarraza</last></author>
+      <author id="arantza-diaz-de-ilarraza"><first>Arantza</first><last>Díaz de Ilarraza</last></author>
       <pages>89–97</pages>
       <url hash="d7549b89">W16-4110</url>
       <abstract>In this paper, we present a comparative analysis of statistically predictive syntactic features of complexity and the treatment of these features by humans when simplifying texts. To that end, we have used a list of the most five statistically predictive features obtained automatically and the Corpus of Basque Simplified Texts (CBST) to analyse how the syntactic phenomena in these features have been manually simplified. Our aim is to go beyond the descriptions of operations found in the corpus and relate the multidisciplinary findings to understand text complexity from different points of view. We also present some issues that can be important when analysing linguistic complexity.</abstract>
@@ -8667,7 +8667,7 @@
     <paper id="12">
       <title>Implicit readability ranking using the latent variable of a <fixed-case>B</fixed-case>ayesian Probit model</title>
       <author><first>Johan</first><last>Falkenjack</last></author>
-      <author><first>Arne</first><last>Jönsson</last></author>
+      <author id="arne-jonsson"><first>Arne</first><last>Jönsson</last></author>
       <pages>104–112</pages>
       <url hash="f85a4447">W16-4112</url>
       <abstract>Data driven approaches to readability analysis for languages other than English has been plagued by a scarcity of suitable corpora. Often, relevant corpora consist only of easy-to-read texts with no rank information or empirical readability scores, making only binary approaches, such as classification, applicable. We propose a Bayesian, latent variable, approach to get the most out of these kinds of corpora. In this paper we present results on using such a model for readability ranking. The model is evaluated on a preliminary corpus of ranked student texts with encouraging results. We also assess the model by showing that it performs readability classification on par with a state of the art classifier while at the same being transparent enough to allow more sophisticated interpretations.</abstract>
@@ -8676,7 +8676,7 @@
     <paper id="13">
       <title><fixed-case>CTAP</fixed-case>: A Web-Based Tool Supporting Automatic Complexity Analysis</title>
       <author><first>Xiaobin</first><last>Chen</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <pages>113–119</pages>
       <url hash="917cfb97">W16-4113</url>
       <abstract>Informed by research on readability and language acquisition, computational linguists have developed sophisticated tools for the analysis of linguistic complexity. While some tools are starting to become accessible on the web, there still is a disconnect between the features that can in principle be identified based on state-of-the-art computational linguistic analysis, and the analyses a second language acquisition researcher, teacher, or textbook writer can readily obtain and visualize for their own collection of texts. This short paper presents a web-based tool development that aims to meet this challenge. The Common Text Analysis Platform (CTAP) is designed to support fully configurable linguistic feature extraction for a wide range of complexity analyses. It features a user-friendly interface, modularized and reusable analysis component integration, and flexible corpus and feature management. Building on the Unstructured Information Management framework (UIMA), CTAP readily supports integration of state-of-the-art NLP and complexity feature extraction maintaining modularization and reusability. CTAP thereby aims at providing a common platform for complexity analysis, encouraging research collaboration and sharing of feature extraction components—to jointly advance the state-of-the-art in complexity analysis in a form that readily supports real-life use by ordinary users.</abstract>
@@ -8699,7 +8699,7 @@
       <author><first>Sawsan</first><last>Alqahtani</last></author>
       <author><first>Houda</first><last>Bouamor</last></author>
       <author><first>Mahmoud</first><last>Ghoneim</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <author><first>Kemal</first><last>Oflazer</last></author>
       <pages>127–136</pages>
       <url hash="fe6f7de0">W16-4115</url>
@@ -8718,9 +8718,9 @@
     <paper id="17">
       <title>A Comparison Between Morphological Complexity Measures: Typological Data vs. Language Corpora</title>
       <author><first>Christian</first><last>Bentz</last></author>
-      <author><first>Tatyana</first><last>Ruzsics</last></author>
+      <author id="tatyana-ruzsics"><first>Tatyana</first><last>Ruzsics</last></author>
       <author><first>Alexander</first><last>Koplenig</last></author>
-      <author><first>Tanja</first><last>Samardžić</last></author>
+      <author id="tanja-samardzic"><first>Tanja</first><last>Samardžić</last></author>
       <pages>142–153</pages>
       <url hash="1b1f1404">W16-4117</url>
       <abstract>Language complexity is an intriguing phenomenon argued to play an important role in both language learning and processing. The need to compare languages with regard to their complexity resulted in a multitude of approaches and methods, ranging from accounts targeting specific structural features to global quantification of variation more generally. In this paper, we investigate the degree to which morphological complexity measures are mutually correlated in a sample of more than 500 languages of 101 language families. We use human expert judgements from the World Atlas of Language Structures (WALS), and compare them to four quantitative measures automatically calculated from language corpora. These consist of three previously defined corpus-derived measures, which are all monolingual, and one new measure based on automatic word-alignment across pairs of languages. We find strong correlations between all the measures, illustrating that both expert judgements and automated approaches converge to similar complexity ratings, and can be used interchangeably.</abstract>
@@ -8730,7 +8730,7 @@
       <title>Similarity-Based Alignment of Monolingual Corpora for Text Simplification Purposes</title>
       <author><first>Sarah</first><last>Albertsson</last></author>
       <author><first>Evelina</first><last>Rennes</last></author>
-      <author><first>Arne</first><last>Jönsson</last></author>
+      <author id="arne-jonsson"><first>Arne</first><last>Jönsson</last></author>
       <pages>154–163</pages>
       <url hash="41260c1e">W16-4118</url>
       <abstract>Comparable or parallel corpora are beneficial for many NLP tasks. The automatic collection of corpora enables large-scale resources, even for less-resourced languages, which in turn can be useful for deducing rules and patterns for text rewriting algorithms, a subtask of automatic text simplification. We present two methods for the alignment of Swedish easy-to-read text segments to text segments from a reference corpus. The first method (M1) was originally developed for the task of text reuse detection, measuring sentence similarity by a modified version of a TF-IDF vector space model. A second method (M2), also accounting for part-of-speech tags, was developed, and the methods were compared. For evaluation, a crowdsourcing platform was built for human judgement data collection, and preliminary results showed that cosine similarity relates better to human ranks than the Dice coefficient. We also saw a tendency that including syntactic context to the TF-IDF vector space model is beneficial for this kind of paraphrase alignment task.</abstract>
@@ -8759,7 +8759,7 @@
       <author><first>Jixing</first><last>Li</last></author>
       <author><first>Jonathan</first><last>Brennan</last></author>
       <author><first>Adam</first><last>Mahar</last></author>
-      <author><first>John</first><last>Hale</last></author>
+      <author id="john-hale"><first>John</first><last>Hale</last></author>
       <pages>186–191</pages>
       <url hash="97e26c1c">W16-4121</url>
       <abstract>The relative contributions of meaning and form to sentence processing remains an outstanding issue across the language sciences. We examine this issue by formalizing four incremental complexity metrics and comparing them against freely-available ROI timecourses. Syntax-related metrics based on top-down parsing and structural dependency-distance turn out to significantly improve a regression model, compared to a simpler model that formalizes only conceptual combination using a distributional vector-space model. This confirms the view of the anterior temporal lobes as combinatory engines that deal in both form (see e.g. Brennan et al., 2012; Mazoyer, 1993) and meaning (see e.g., Patterson et al., 2007). This same characterization applies to a posterior temporal region in roughly “Wernicke’s Area.”</abstract>
@@ -8780,7 +8780,7 @@
       <author><first>Abhinav Deep</first><last>Singh</last></author>
       <author><first>Poojan</first><last>Mehta</last></author>
       <author><first>Samar</first><last>Husain</last></author>
-      <author><first>Rajkumar</first><last>Rajakrishnan</last></author>
+      <author id="rajakrishnan-rajkumar"><first>Rajkumar</first><last>Rajakrishnan</last></author>
       <pages>202–212</pages>
       <url hash="3576ca5b">W16-4123</url>
       <abstract>Eye-tracking reading times have been attested to reflect cognitive processes underlying sentence comprehension. However, the use of reading times in NLP applications is an underexplored area of research. In this initial work we build an automatic system to assess sentence complexity using automatically predicted eye-tracking reading time measures and demonstrate the efficacy of these reading times for a well known NLP task, namely, readability assessment. We use a machine learning model and a set of features known to be significant predictors of reading times in order to learn per-word reading times from a corpus of English text having reading times of human readers. Subsequently, we use the model to predict reading times for novel text in the context of the aforementioned task. A model based only on reading times gave competitive results compared to the systems that use extensive syntactic features to compute linguistic complexity. Our work, to the best of our knowledge, is the first study to show that automatically predicted reading times can successfully model the difficulty of a text and can be deployed in practical text processing applications.</abstract>
@@ -8789,7 +8789,7 @@
     <paper id="24">
       <title>Upper Bound of Entropy Rate Revisited —<fixed-case>A</fixed-case> New Extrapolation of Compressed Large-Scale Corpora—</title>
       <author><first>Ryosuke</first><last>Takahira</last></author>
-      <author><first>Kumiko</first><last>Tanaka-Ishii</last></author>
+      <author id="kumiko-tanaka-ishii"><first>Kumiko</first><last>Tanaka-Ishii</last></author>
       <author><first>Łukasz</first><last>Dębowski</last></author>
       <pages>213–221</pages>
       <url hash="e687183d">W16-4124</url>
@@ -8827,11 +8827,11 @@
     <paper id="1">
       <title>The impact of simple feature engineering in multilingual medical <fixed-case>NER</fixed-case></title>
       <author><first>Rebecka</first><last>Weegar</last></author>
-      <author><first>Arantza</first><last>Casillas</last></author>
-      <author><first>Arantza</first><last>Diaz de Ilarraza</last></author>
-      <author><first>Maite</first><last>Oronoz</last></author>
+      <author id="arantza-casillas"><first>Arantza</first><last>Casillas</last></author>
+      <author id="arantza-diaz-de-ilarraza"><first>Arantza</first><last>Diaz de Ilarraza</last></author>
+      <author id="maite-oronoz"><first>Maite</first><last>Oronoz</last></author>
       <author><first>Alicia</first><last>Pérez</last></author>
-      <author><first>Koldo</first><last>Gojenola</last></author>
+      <author id="koldo-gojenola"><first>Koldo</first><last>Gojenola</last></author>
       <pages>1–6</pages>
       <url hash="43cdd82a">W16-4201</url>
       <abstract>The goal of this paper is to examine the impact of simple feature engineering mechanisms before applying more sophisticated techniques to the task of medical NER. Sometimes papers using scientifically sound techniques present raw baselines that could be improved adding simple and cheap features. This work focuses on entity recognition for the clinical domain for three languages: English, Swedish and Spanish. The task is tackled using simple features, starting from the window size, capitalization, prefixes, and moving to POS and semantic tags. This work demonstrates that a simple initial step of feature engineering can improve the baseline results significantly. Hence, the contributions of this paper are: first, a short list of guidelines well supported with experimental results on three languages and, second, a detailed description of the relevance of these features for medical NER.</abstract>
@@ -8862,7 +8862,7 @@
       <title>Feature-Augmented Neural Networks for Patient Note De-identification</title>
       <author><first>Ji Young</first><last>Lee</last></author>
       <author><first>Franck</first><last>Dernoncourt</last></author>
-      <author><first>Özlem</first><last>Uzuner</last></author>
+      <author id="ozlem-uzuner"><first>Özlem</first><last>Uzuner</last></author>
       <author><first>Peter</first><last>Szolovits</last></author>
       <pages>17–22</pages>
       <url hash="1b2f81b6">W16-4204</url>
@@ -8874,7 +8874,7 @@
       <author><first>Pracheta</first><last>Sahoo</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
       <author><first>Sriparna</first><last>Saha</last></author>
-      <author><first>Diego</first><last>Mollá</last></author>
+      <author id="diego-molla"><first>Diego</first><last>Mollá</last></author>
       <author><first>Kaushik</first><last>Nandan</last></author>
       <pages>23–31</pages>
       <url hash="3f8aa572">W16-4205</url>
@@ -8886,7 +8886,7 @@
       <author><first>Shweta</first><last>Yadav</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
       <author><first>Sriparna</first><last>Saha</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>32–41</pages>
       <url hash="d556281d">W16-4206</url>
       <abstract>Rapid growth in Electronic Medical Records (EMR) has emerged to an expansion of data in the clinical domain. The majority of the available health care information is sealed in the form of narrative documents which form the rich source of clinical information. Text mining of such clinical records has gained huge attention in various medical applications like treatment and decision making. However, medical records enclose patient Private Health Information (PHI) which can reveal the identities of the patients. In order to retain the privacy of patients, it is mandatory to remove all the PHI information prior to making it publicly available. The aim is to de-identify or encrypt the PHI from the patient medical records. In this paper, we propose an algorithm based on deep learning architecture to solve this problem. We perform de-identification of seven PHI terms from the clinical records. Experiments on benchmark datasets show that our proposed approach achieves encouraging performance, which is better than the baseline model developed with Conditional Random Field.</abstract>
@@ -8894,7 +8894,7 @@
     </paper>
     <paper id="7">
       <title>Neural Clinical Paraphrase Generation with Attention</title>
-      <author><first>Sadid A.</first><last>Hasan</last></author>
+      <author id="sadid-a-hasan"><first>Sadid A.</first><last>Hasan</last></author>
       <author><first>Bo</first><last>Liu</last></author>
       <author><first>Joey</first><last>Liu</last></author>
       <author><first>Ashequl</first><last>Qadir</last></author>
@@ -8968,7 +8968,7 @@
       <author><first>Markus</first><last>Kreuzthaler</last></author>
       <author><first>Michel</first><last>Oleynik</last></author>
       <author><first>Alexander</first><last>Avian</last></author>
-      <author><first>Stefan</first><last>Schulz</last></author>
+      <author id="stefan-schulz"><first>Stefan</first><last>Schulz</last></author>
       <pages>91–98</pages>
       <url hash="4169dc4e">W16-4213</url>
       <abstract>Clinical narratives in electronic health record systems are a rich resource of patient-based information. They constitute an ongoing challenge for natural language processing, due to their high compactness and abundance of short forms. German medical texts exhibit numerous ad-hoc abbreviations that terminate with a period character. The disambiguation of period characters is therefore an important task for sentence and abbreviation detection. This task is addressed by a combination of co-occurrence information of word types with trailing period characters, a large domain dictionary, and a simple rule engine, thus merging statistical and dictionary-based disambiguation strategies. An F-measure of 0.95 could be reached by using the unsupervised approach presented in this paper. The results are promising for a domain-independent abbreviation detection strategy, because our approach avoids retraining of models or use case specific feature engineering efforts required for supervised machine learning approaches.</abstract>
@@ -8991,7 +8991,7 @@
       <url hash="ee0d3b04">W16-43</url>
       <editor><first>Malvina</first><last>Nissim</last></editor>
       <editor><first>Viviana</first><last>Patti</last></editor>
-      <editor><first>Barbara</first><last>Plank</last></editor>
+      <editor id="barbara-plank"><first>Barbara</first><last>Plank</last></editor>
       <publisher>The COLING 2016 Organizing Committee</publisher>
       <address>Osaka, Japan</address>
       <month>December</month>
@@ -9005,7 +9005,7 @@
     <paper id="1">
       <title>Zooming in on Gender Differences in Social Media</title>
       <author><first>Aparna</first><last>Garimella</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
       <pages>1–10</pages>
       <url hash="37291a0f">W16-4301</url>
       <abstract>Men are from Mars and women are from Venus - or so the genre of relationship literature would have us believe. But there is some truth in this idea, and researchers in fields as diverse as psychology, sociology, and linguistics have explored ways to better understand the differences between genders. In this paper, we take another look at the problem of gender discrimination and attempt to move beyond the typical surface-level text classification approach, by (1) identifying semantic and psycholinguistic word classes that reflect systematic differences between men and women and (2) finding differences between genders in the ways they use the same words. We describe several experiments and report results on a large collection of blogs authored by men and women.</abstract>
@@ -9092,7 +9092,7 @@
       <author><first>Lea</first><last>Canales</last></author>
       <author><first>Carlo</first><last>Strapparava</last></author>
       <author><first>Ester</first><last>Boldrini</last></author>
-      <author><first>Patricio</first><last>Martínez-Barco</last></author>
+      <author id="patricio-martinez-barco"><first>Patricio</first><last>Martínez-Barco</last></author>
       <pages>91–100</pages>
       <url hash="3659128f">W16-4310</url>
       <abstract>Detecting depression or personality traits, tutoring and student behaviour systems, or identifying cases of cyber-bulling are a few of the wide range of the applications, in which the automatic detection of emotion is a crucial element. Emotion detection has the potential of high impact by contributing the benefit of business, society, politics or education. Given this context, the main objective of our research is to contribute to the resolution of one of the most important challenges in textual emotion detection task: the problems of emotional corpora annotation. This will be tackled by proposing of a new semi-automatic methodology. Our innovative methodology consists in two main phases: (1) an automatic process to pre-annotate the unlabelled sentences with a reduced number of emotional categories; and (2) a refinement manual process where human annotators will determine which is the predominant emotion between the emotional categories selected in the phase 1. Our proposal in this paper is to show and evaluate the pre-annotation process to analyse the feasibility and the benefits by the methodology proposed. The results obtained are promising and allow obtaining a substantial improvement of annotation time and cost and confirm the usefulness of our pre-annotation process to improve the annotation task.</abstract>
@@ -9111,8 +9111,8 @@
     <paper id="12">
       <title>Predicting <fixed-case>B</fixed-case>rexit: Classifying Agreement is Better than Sentiment and Pollsters</title>
       <author><first>Fabio</first><last>Celli</last></author>
-      <author><first>Evgeny</first><last>Stepanov</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="evgeny-stepanov"><first>Evgeny</first><last>Stepanov</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <author><first>Giuseppe</first><last>Riccardi</last></author>
       <pages>110–118</pages>
       <url hash="21ecf61c">W16-4312</url>
@@ -9131,7 +9131,7 @@
     <paper id="14">
       <title>Social and linguistic behavior and its correlation to trait empathy</title>
       <author><first>Marina</first><last>Litvak</last></author>
-      <author><first>Jahna</first><last>Otterbacher</last></author>
+      <author id="jahna-otterbacher"><first>Jahna</first><last>Otterbacher</last></author>
       <author><first>Chee Siang</first><last>Ang</last></author>
       <author><first>David</first><last>Atkins</last></author>
       <pages>128–137</pages>
@@ -9143,7 +9143,7 @@
       <title>The Challenges of Multi-dimensional Sentiment Analysis Across Languages</title>
       <author><first>Emily</first><last>Öhman</last></author>
       <author><first>Timo</first><last>Honkela</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>138–142</pages>
       <url hash="4e3f9f05">W16-4315</url>
       <abstract>This paper outlines a pilot study on multi-dimensional and multilingual sentiment analysis of social media content. We use parallel corpora of movie subtitles as a proxy for colloquial language in social media channels and a multilingual emotion lexicon for fine-grained sentiment analyses. Parallel data sets make it possible to study the preservation of sentiments and emotions in translation and our assessment reveals that the lexical approach shows great inter-language agreement. However, our manual evaluation also suggests that the use of purely lexical methods is limited and further studies are necessary to pinpoint the cross-lingual differences and to develop better sentiment classifiers.</abstract>
@@ -9153,7 +9153,7 @@
       <title>The Social Mood of News: Self-reported Annotations to Design Automatic Mood Detection Systems</title>
       <author><first>Firoj</first><last>Alam</last></author>
       <author><first>Fabio</first><last>Celli</last></author>
-      <author><first>Evgeny A.</first><last>Stepanov</last></author>
+      <author id="evgeny-stepanov"><first>Evgeny A.</first><last>Stepanov</last></author>
       <author><first>Arindam</first><last>Ghosh</last></author>
       <author><first>Giuseppe</first><last>Riccardi</last></author>
       <pages>143–152</pages>
@@ -9183,8 +9183,8 @@
     </paper>
     <paper id="19">
       <title>Can We Make Computers Laugh at Talks?</title>
-      <author><first>Chong Min</first><last>Lee</last></author>
-      <author><first>Su-Youn</first><last>Yoon</last></author>
+      <author id="chungmin-lee"><first>Chong Min</first><last>Lee</last></author>
+      <author id="su-youn-yoon"><first>Su-Youn</first><last>Yoon</last></author>
       <author><first>Lei</first><last>Chen</last></author>
       <pages>173–181</pages>
       <url hash="aa5635bb">W16-4319</url>
@@ -9193,8 +9193,8 @@
     </paper>
     <paper id="20">
       <title>Towards Automatically Classifying Depressive Symptoms from <fixed-case>T</fixed-case>witter Data for Population Health</title>
-      <author><first>Danielle L.</first><last>Mowery</last></author>
-      <author><first>Albert</first><last>Park</last></author>
+      <author id="danielle-l-mowery"><first>Danielle L.</first><last>Mowery</last></author>
+      <author id="y-albert-park"><first>Albert</first><last>Park</last></author>
       <author><first>Craig</first><last>Bryan</last></author>
       <author><first>Mike</first><last>Conway</last></author>
       <pages>182–191</pages>
@@ -9207,7 +9207,7 @@
     <meta>
       <booktitle>Proceedings of the Open Knowledge Base and Question Answering Workshop (<fixed-case>OKBQA</fixed-case> 2016)</booktitle>
       <url hash="dedd8037">W16-44</url>
-      <editor><first>Key-Sun</first><last>Choi</last></editor>
+      <editor id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></editor>
       <editor><first>Christina</first><last>Unger</last></editor>
       <editor><first>Piek</first><last>Vossen</last></editor>
       <editor><first>Jin-Dong</first><last>Kim</last></editor>
@@ -9225,7 +9225,7 @@
     </frontmatter>
     <paper id="1">
       <title>Using <fixed-case>W</fixed-case>ikipedia and Semantic Resources to Find Answer Types and Appropriate Answer Candidate Sets in Question Answering</title>
-      <author><first>Po-Chun</first><last>Chen</last></author>
+      <author id="po-chun-chen"><first>Po-Chun</first><last>Chen</last></author>
       <author><first>Meng-Jie</first><last>Zhuang</last></author>
       <author><first>Chuan-Jie</first><last>Lin</last></author>
       <pages>1–10</pages>
@@ -9300,8 +9300,8 @@
     </paper>
     <paper id="8">
       <title>Double Topic Shifts in Open Domain Conversations: Natural Language Interface for a <fixed-case>W</fixed-case>ikipedia-based Robot Application</title>
-      <author><first>Kristiina</first><last>Jokinen</last></author>
-      <author><first>Graham</first><last>Wilcock</last></author>
+      <author id="kristiina-jokinen"><first>Kristiina</first><last>Jokinen</last></author>
+      <author id="graham-wilcock"><first>Graham</first><last>Wilcock</last></author>
       <pages>59–66</pages>
       <url hash="8edb41c1">W16-4408</url>
       <abstract>The paper describes topic shifting in dialogues with a robot that provides information from Wiki-pedia. The work focuses on a double topical construction of dialogue coherence which refers to discourse coherence on two levels: the evolution of dialogue topics via the interaction between the user and the robot system, and the creation of discourse topics via the content of the Wiki-pedia article itself. The user selects topics that are of interest to her, and the system builds a list of potential topics, anticipated to be the next topic, by the links in the article and by the keywords extracted from the article. The described system deals with Wikipedia articles, but could easily be adapted to other digital information providing systems.</abstract>
@@ -9309,13 +9309,13 @@
     </paper>
     <paper id="9">
       <title>Filling a Knowledge Graph with a Crowd</title>
-      <author><first>GyuHyeon</first><last>Choi</last></author>
+      <author id="gyuhyeon-choi"><first>GyuHyeon</first><last>Choi</last></author>
       <author><first>Sangha</first><last>Nam</last></author>
       <author><first>Dongho</first><last>Choi</last></author>
       <author><first>Key-Sun</first><last>Choi</last></author>
       <pages>67–71</pages>
       <url hash="53bb3064">W16-4409</url>
-      <abstract/>
+      <abstract></abstract>
       <bibkey>choi-etal-2016-filling</bibkey>
     </paper>
     <paper id="10">
@@ -9330,8 +9330,8 @@
     <paper id="11">
       <title><fixed-case>SRDF</fixed-case>: Extracting Lexical Knowledge Graph for Preserving Sentence Meaning</title>
       <author><first>Sangha</first><last>Nam</last></author>
-      <author><first>GyuHyeon</first><last>Choi</last></author>
-      <author><first>Younggyun</first><last>Hahm</last></author>
+      <author id="gyuhyeon-choi"><first>GyuHyeon</first><last>Choi</last></author>
+      <author id="younggyun-hahm"><first>Younggyun</first><last>Hahm</last></author>
       <author><first>Key-Sun</first><last>Choi</last></author>
       <pages>77–81</pages>
       <url hash="8c1e9a97">W16-4411</url>
@@ -9340,7 +9340,7 @@
     </paper>
     <paper id="12">
       <title><fixed-case>QAF</fixed-case>: Frame Semantics-based Question Interpretation</title>
-      <author><first>Younggyun</first><last>Hahm</last></author>
+      <author id="younggyun-hahm"><first>Younggyun</first><last>Hahm</last></author>
       <author><first>Sangha</first><last>Nam</last></author>
       <author><first>Key-Sun</first><last>Choi</last></author>
       <pages>82–90</pages>
@@ -9359,7 +9359,7 @@
     <paper id="14">
       <title>Dedicated Workflow Management for <fixed-case>OKBQA</fixed-case> Framework</title>
       <author><first>Jiseong</first><last>Kim</last></author>
-      <author><first>GyuHyeon</first><last>Choi</last></author>
+      <author id="gyuhyeon-choi"><first>GyuHyeon</first><last>Choi</last></author>
       <author><first>Key-Sun</first><last>Choi</last></author>
       <pages>97–101</pages>
       <url hash="da87a841">W16-4414</url>
@@ -9374,9 +9374,9 @@
       <editor><first>Patrik</first><last>Lambert</last></editor>
       <editor><first>Bogdan</first><last>Babych</last></editor>
       <editor><first>Kurt</first><last>Eberle</last></editor>
-      <editor><first>Rafael E.</first><last>Banchs</last></editor>
+      <editor id="rafael-e-banchs"><first>Rafael E.</first><last>Banchs</last></editor>
       <editor><first>Reinhard</first><last>Rapp</last></editor>
-      <editor><first>Marta R.</first><last>Costa-jussà</last></editor>
+      <editor id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></editor>
       <publisher>The COLING 2016 Organizing Committee</publisher>
       <address>Osaka, Japan</address>
       <month>December</month>
@@ -9411,7 +9411,7 @@
       <author><first>Mengyi</first><last>Liu</last></author>
       <author><first>Tongtao</first><last>Zhang</last></author>
       <author><first>Wenxuan</first><last>Zhou</last></author>
-      <author><first>Jianmin</first><last>Yao</last></author>
+      <author id="jianmin-yao"><first>Jianmin</first><last>Yao</last></author>
       <author><first>Heng</first><last>Ji</last></author>
       <pages>16–25</pages>
       <url hash="416aff29">W16-4503</url>
@@ -9441,7 +9441,7 @@
       <author><first>Roman</first><last>Sudarikov</last></author>
       <author><first>Ondřej</first><last>Dušek</last></author>
       <author><first>Martin</first><last>Holub</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <author><first>Vincent</first><last>Kríž</last></author>
       <pages>42–50</pages>
       <url hash="80887ada">W16-4506</url>
@@ -9460,8 +9460,8 @@
     </paper>
     <paper id="8">
       <title>Using Bilingual Segments in Generating Word-to-word Translations</title>
-      <author><first>Kavitha</first><last>Mahesh</last></author>
-      <author><first>Gabriel</first><last>Pereira Lopes</last></author>
+      <author id="kavi-mahesh"><first>Kavitha</first><last>Mahesh</last></author>
+      <author id="gabriel-lopes"><first>Gabriel</first><last>Pereira Lopes</last></author>
       <author><first>Luís</first><last>Gomes</last></author>
       <pages>61–71</pages>
       <url hash="a93a23c1">W16-4508</url>
@@ -9479,8 +9479,8 @@
       <editor><first>Isao</first><last>Goto</last></editor>
       <editor><first>Graham</first><last>Neubig</last></editor>
       <editor><first>Sadao</first><last>Kurohashi</last></editor>
-      <editor><first>Ir. Hammam</first><last>Riza</last></editor>
-      <editor><first>Pushpak</first><last>Bhattacharyya</last></editor>
+      <editor id="hammam-riza"><first>Ir. Hammam</first><last>Riza</last></editor>
+      <editor id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></editor>
       <publisher>The COLING 2016 Organizing Committee</publisher>
       <address>Osaka, Japan</address>
       <month>December</month>
@@ -9549,8 +9549,8 @@
       <title>Global Pre-ordering for Improving Sublanguage Translation</title>
       <author><first>Masaru</first><last>Fuji</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>84–93</pages>
       <url hash="6c893243">W16-4606</url>
       <abstract>When translating formal documents, capturing the sentence structure specific to the sublanguage is extremely necessary to obtain high-quality translations. This paper proposes a novel global reordering method with particular focus on long-distance reordering for capturing the global sentence structure of a sublanguage. The proposed method learns global reordering models from a non-annotated parallel corpus and works in conjunction with conventional syntactic reordering. Experimental results on the patent abstract sublanguage show substantial gains of more than 25 points in the RIBES metric and comparable BLEU scores both for Japanese-to-English and English-to-Japanese translations.</abstract>
@@ -9569,7 +9569,7 @@
     <paper id="8">
       <title>System Description of bjtu_nlp Neural Machine Translation System</title>
       <author><first>Shaotong</first><last>Li</last></author>
-      <author><first>JinAn</first><last>Xu</last></author>
+      <author id="jinan-xu"><first>JinAn</first><last>Xu</last></author>
       <author><first>Yufeng</first><last>Chen</last></author>
       <author><first>Yujie</first><last>Zhang</last></author>
       <pages>104–110</pages>
@@ -9596,7 +9596,7 @@
     <paper id="11">
       <title><fixed-case>NICT</fixed-case>-2 Translation System for <fixed-case>WAT</fixed-case>2016: Applying Domain Adaptation to Phrase-based Statistical Machine Translation</title>
       <author><first>Kenji</first><last>Imamura</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>126–132</pages>
       <url hash="fe375777">W16-4611</url>
       <abstract>This paper describes the NICT-2 translation system for the 3rd Workshop on Asian Translation. The proposed system employs a domain adaptation method based on feature augmentation. We regarded the Japan Patent Office Corpus as a mixture of four domain corpora and improved the translation quality of each domain. In addition, we incorporated language models constructed from Google n-grams as external knowledge. Our domain adaptation method can naturally incorporate such external knowledge that contributes to translation quality.</abstract>
@@ -9618,7 +9618,7 @@
       <author><first>Xiaolin</first><last>Wang</last></author>
       <author><first>Andrew</first><last>Finch</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>139–148</pages>
       <url hash="bebbd0db">W16-4613</url>
       <abstract>Simultaneous interpretation is a very challenging application of machine translation in which the input is a stream of words from a speech recognition engine. The key problem is how to segment the stream in an online manner into units suitable for translation. The segmentation process proceeds by calculating a confidence score for each word that indicates the soundness of placing a sentence boundary after it, and then heuristics are employed to determine the position of the boundaries. Multiple variants of the confidence scoring method and segmentation heuristics were studied. Experimental results show that the best performing strategy is not only efficient in terms of average latency per word, but also achieved end-to-end translation quality close to an offline baseline, and close to oracle segmentation.</abstract>
@@ -9628,7 +9628,7 @@
       <title>Similar <fixed-case>S</fixed-case>outheast <fixed-case>A</fixed-case>sian Languages: Corpus-Based Case Study on <fixed-case>T</fixed-case>hai-<fixed-case>L</fixed-case>aotian and <fixed-case>M</fixed-case>alay-<fixed-case>I</fixed-case>ndonesian</title>
       <author><first>Chenchen</first><last>Ding</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>149–156</pages>
       <url hash="19e33b36">W16-4614</url>
       <abstract>This paper illustrates the similarity between Thai and Laotian, and between Malay and Indonesian, based on an investigation on raw parallel data from Asian Language Treebank. The cross-lingual similarity is investigated and demonstrated on metrics of correspondence and order of tokens, based on several standard statistical machine translation techniques. The similarity shown in this study suggests a possibility on harmonious annotation and processing of the language pairs in future development.</abstract>
@@ -9646,7 +9646,7 @@
     </paper>
     <paper id="16">
       <title><fixed-case>K</fixed-case>yoto <fixed-case>U</fixed-case>niversity Participation to <fixed-case>WAT</fixed-case> 2016</title>
-      <author><first>Fabien</first><last>Cromieres</last></author>
+      <author id="fabien-cromieres"><first>Fabien</first><last>Cromieres</last></author>
       <author><first>Chenhui</first><last>Chu</last></author>
       <author><first>Toshiaki</first><last>Nakazawa</last></author>
       <author><first>Sadao</first><last>Kurohashi</last></author>
@@ -9729,7 +9729,7 @@
       <url hash="36ac847e">W16-47</url>
       <editor><first>Patrick</first><last>Drouin</last></editor>
       <editor><first>Natalia</first><last>Grabar</last></editor>
-      <editor><first>Thierry</first><last>Hamon</last></editor>
+      <editor id="thierry-hamon"><first>Thierry</first><last>Hamon</last></editor>
       <editor><first>Kyo</first><last>Kageura</last></editor>
       <editor><first>Koichi</first><last>Takeuchi</last></editor>
       <publisher>The COLING 2016 Organizing Committee</publisher>
@@ -9753,7 +9753,7 @@
     <paper id="2">
       <title>Local-Global Vectors to Improve Unigram Terminology Extraction</title>
       <author><first>Ehsan</first><last>Amjadian</last></author>
-      <author><first>Diana</first><last>Inkpen</last></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last></author>
       <author><first>Tahereh</first><last>Paribakht</last></author>
       <author><first>Farahnaz</first><last>Faez</last></author>
       <pages>2–11</pages>
@@ -9764,7 +9764,7 @@
     <paper id="3">
       <title>Recognition of non-domain phrases in automatically extracted lists of terms</title>
       <author><first>Agnieszka</first><last>Mykowiecka</last></author>
-      <author><first>Malgorzata</first><last>Marciniak</last></author>
+      <author id="malgorzata-marciniak"><first>Malgorzata</first><last>Marciniak</last></author>
       <author><first>Piotr</first><last>Rychlik</last></author>
       <pages>12–20</pages>
       <url hash="a043d397">W16-4703</url>
@@ -9773,7 +9773,7 @@
     </paper>
     <paper id="4">
       <title>Contextual term equivalent search using domain-driven disambiguation</title>
-      <author><first>Caroline</first><last>Barrière</last></author>
+      <author id="caroline-barriere"><first>Caroline</first><last>Barrière</last></author>
       <author><first>Pierre André</first><last>Ménard</last></author>
       <author><first>Daphnée</first><last>Azoulay</last></author>
       <pages>21–29</pages>
@@ -9794,7 +9794,7 @@
     </paper>
     <paper id="6">
       <title>Acquisition of semantic relations between terms: how far can we get with standard <fixed-case>NLP</fixed-case> tools?</title>
-      <author><first>Ina</first><last>Roesiger</last></author>
+      <author id="ina-roesiger"><first>Ina</first><last>Roesiger</last></author>
       <author><first>Julia</first><last>Bettinger</last></author>
       <author><first>Johannes</first><last>Schäfer</last></author>
       <author><first>Michael</first><last>Dorna</last></author>
@@ -9815,7 +9815,7 @@
     </paper>
     <paper id="8">
       <title>A Study on the Interplay Between the Corpus Size and Parameters of a Distributional Model for Term Classification</title>
-      <author><first>Behrang</first><last>QasemiZadeh</last></author>
+      <author id="behrang-qasemizadeh"><first>Behrang</first><last>QasemiZadeh</last></author>
       <pages>62–72</pages>
       <url hash="5eeb0946">W16-4708</url>
       <abstract>We propose and evaluate a method for identifying co-hyponym lexical units in a terminological resource. The principles of term recognition and distributional semantics are combined to extract terms from a similar category of concept. Given a set of candidate terms, random projections are employed to represent them as low-dimensional vectors. These vectors are derived automatically from the frequency of the co-occurrences of the candidate terms and words that appear within windows of text in their proximity (context-windows). In a <tex-math>k</tex-math>-nearest neighbours framework, these vectors are classified using a small set of manually annotated terms which exemplify concept categories. We then investigate the interplay between the size of the corpus that is used for collecting the co-occurrences and a number of factors that play roles in the performance of the proposed method: the configuration of context-windows for collecting co-occurrences, the selection of neighbourhood size (<tex-math>k</tex-math>), and the choice of similarity metric.</abstract>
@@ -9823,7 +9823,7 @@
     </paper>
     <paper id="9">
       <title>Pattern-based Word Sketches for the Extraction of Semantic Relations</title>
-      <author><first>Pilar</first><last>León-Araúz</last></author>
+      <author id="pilar-leon-arauz"><first>Pilar</first><last>León-Araúz</last></author>
       <author><first>Antonio</first><last>San Martín</last></author>
       <author><first>Pamela</first><last>Faber</last></author>
       <pages>73–82</pages>
@@ -9843,8 +9843,8 @@
     <paper id="11">
       <title>Providing and Analyzing <fixed-case>NLP</fixed-case> Terms for our Community</title>
       <author><first>Gil</first><last>Francopoulo</last></author>
-      <author><first>Joseph</first><last>Mariani</last></author>
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="joseph-mariani"><first>Joseph</first><last>Mariani</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <author><first>Frédéric</first><last>Vernier</last></author>
       <pages>94–103</pages>
       <url hash="449faa23">W16-4711</url>
@@ -9885,12 +9885,12 @@
     <meta>
       <booktitle>Proceedings of the Third Workshop on <fixed-case>NLP</fixed-case> for Similar Languages, Varieties and Dialects (<fixed-case>V</fixed-case>ar<fixed-case>D</fixed-case>ial3)</booktitle>
       <url hash="911e20c4">W16-48</url>
-      <editor><first>Preslav</first><last>Nakov</last></editor>
+      <editor id="preslav-nakov"><first>Preslav</first><last>Nakov</last></editor>
       <editor><first>Marcos</first><last>Zampieri</last></editor>
       <editor><first>Liling</first><last>Tan</last></editor>
       <editor><first>Nikola</first><last>Ljubešić</last></editor>
-      <editor><first>Jörg</first><last>Tiedemann</last></editor>
-      <editor><first>Shervin</first><last>Malmasi</last></editor>
+      <editor id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></editor>
+      <editor id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></editor>
       <publisher>The COLING 2016 Organizing Committee</publisher>
       <address>Osaka, Japan</address>
       <month>December</month>
@@ -9936,7 +9936,7 @@
       <title>The <fixed-case>GW</fixed-case>/<fixed-case>LT</fixed-case>3 <fixed-case>V</fixed-case>ar<fixed-case>D</fixed-case>ial 2016 Shared Task System for Dialects and Similar Languages Detection</title>
       <author><first>Ayah</first><last>Zirikly</last></author>
       <author><first>Bart</first><last>Desmet</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>33–41</pages>
       <url hash="5f4aa206">W16-4804</url>
       <abstract>This paper describes the GW/LT3 contribution to the 2016 VarDial shared task on the identification of similar languages (task 1) and Arabic dialects (task 2). For both tasks, we experimented with Logistic Regression and Neural Network classifiers in isolation. Additionally, we implemented a cascaded classifier that consists of coarse and fine-grained classifiers (task 1) and a classifier ensemble with majority voting for task 2. The submitted systems obtained state-of-the art performance and ranked first for the evaluation on social media data (test sets B1 and B2 for task 1), with a maximum weighted F1 score of 91.94%.</abstract>
@@ -9944,7 +9944,7 @@
     </paper>
     <paper id="5">
       <title>Processing Dialectal <fixed-case>A</fixed-case>rabic: Exploiting Variability and Similarity to Overcome Challenges and Discover Opportunities</title>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>42</pages>
       <url hash="921d95bb">W16-4805</url>
       <abstract>We recently witnessed an exponential growth in dialectal Arabic usage in both textual data and speech recordings especially in social media. Processing such media is of great utility for all kinds of applications ranging from information extraction to social media analytics for political and commercial purposes to building decision support systems. Compared to other languages, Arabic, especially the informal variety, poses a significant challenge to natural language processing algorithms since it comprises multiple dialects, linguistic code switching, and a lack of standardized orthographies, to top its relatively complex morphology. Inherently, the problem of processing Arabic in the context of social media is the problem of how to handle resource poor languages. In this talk I will go over some of our insights to some of these problems and show how there is a silver lining where we can generalize some of our solutions to other low resource language contexts.</abstract>
@@ -9952,8 +9952,8 @@
     </paper>
     <paper id="6">
       <title>Language Related Issues for Machine Translation between Closely Related <fixed-case>S</fixed-case>outh <fixed-case>S</fixed-case>lavic Languages</title>
-      <author><first>Maja</first><last>Popović</last></author>
-      <author><first>Mihael</first><last>Arčan</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
+      <author id="mihael-arcan"><first>Mihael</first><last>Arčan</last></author>
       <author><first>Filip</first><last>Klubička</last></author>
       <pages>43–52</pages>
       <url hash="0d412f70">W16-4806</url>
@@ -9993,7 +9993,7 @@
       <title>Automatic Verification and Augmentation of Multilingual Lexicons</title>
       <author><first>Maryam</first><last>Aminian</last></author>
       <author><first>Mohamed</first><last>Al-Badrashiny</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>73–81</pages>
       <url hash="49fa1414">W16-4810</url>
       <abstract>We present an approach for automatic verification and augmentation of multilingual lexica. We exploit existing parallel and monolingual corpora to extract multilingual correspondents via tri-angulation. We demonstrate the efficacy of our approach on two publicly available resources: Tharwa, a three-way lexicon comprising Dialectal Arabic, Modern Standard Arabic and English lemmas among other information (Diab et al., 2014); and BabelNet, a multilingual thesaurus comprising over 276 languages including Arabic variant entries (Navigli and Ponzetto, 2012). Our automated approach yields an F1-score of 71.71% in generating correct multilingual correspondents against gold Tharwa, and 54.46% against gold BabelNet without any human intervention.</abstract>
@@ -10002,7 +10002,7 @@
     <paper id="11">
       <title>Faster Decoding for Subword Level Phrase-based <fixed-case>SMT</fixed-case> between Related Languages</title>
       <author><first>Anoop</first><last>Kunchukuttan</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>82–88</pages>
       <url hash="ca1bd892">W16-4811</url>
       <abstract>A common and effective way to train translation systems between related languages is to consider sub-word level basic units. However, this increases the length of the sentences resulting in increased decoding time. The increase in length is also impacted by the specific choice of data format for representing the sentences as subwords. In a phrase-based SMT framework, we investigate different choices of decoder parameters as well as data format and their impact on decoding time and translation accuracy. We suggest best options for these settings that significantly improve decoding time with little impact on the translation accuracy.</abstract>
@@ -10018,7 +10018,7 @@
     </paper>
     <paper id="13">
       <title>Enlarging Scarce In-domain <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>roatian Corpus for <fixed-case>SMT</fixed-case> of <fixed-case>MOOC</fixed-case>s Using <fixed-case>S</fixed-case>erbian</title>
-      <author><first>Maja</first><last>Popović</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
       <author><first>Kostadin</first><last>Cholakov</last></author>
       <author><first>Valia</first><last>Kordoni</last></author>
       <author><first>Nikola</first><last>Ljubešić</last></author>
@@ -10039,9 +10039,9 @@
     <paper id="15">
       <title><fixed-case>DSL</fixed-case> Shared Task 2016: Perfect Is The Enemy of Good Language Discrimination Through Expectation–Maximization and Chunk-based Language Model</title>
       <author><first>Ondřej</first><last>Herman</last></author>
-      <author><first>Vít</first><last>Suchomel</last></author>
+      <author id="vit-suchomel"><first>Vít</first><last>Suchomel</last></author>
       <author><first>Vít</first><last>Baisa</last></author>
-      <author><first>Pavel</first><last>Rychlý</last></author>
+      <author id="pavel-rychly"><first>Pavel</first><last>Rychlý</last></author>
       <pages>114–118</pages>
       <url hash="893bf5f7">W16-4815</url>
       <abstract>In this paper we investigate two approaches to discrimination of similar languages: Expectation–maximization algorithm for estimating conditional probability P(word|language) and byte level language models similar to compression-based language modelling methods. The accuracy of these methods reached respectively 86.6% and 88.3% on set A of the DSL Shared task 2016 competition.</abstract>
@@ -10077,7 +10077,7 @@
     <paper id="19">
       <title>A Character-level Convolutional Neural Network for Distinguishing Similar Languages and Dialects</title>
       <author><first>Yonatan</first><last>Belinkov</last></author>
-      <author><first>James</first><last>Glass</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
       <pages>145–152</pages>
       <url hash="d2606bfa">W16-4819</url>
       <abstract>Discriminating between closely-related language varieties is considered a challenging and important task. This paper describes our submission to the DSL 2016 shared-task, which included two sub-tasks: one on discriminating similar languages and one on identifying Arabic dialects. We developed a character-level neural network for this task. Given a sequence of characters, our model embeds each character in vector space, runs the sequence through multiple convolutions with different filter widths, and pools the convolutional representations to obtain a hidden vector representation of the text that is used for predicting the language or dialect. We primarily focused on the Arabic dialect identification task and obtained an F1 score of 0.4834, ranking 6th out of 18 participants. We also analyze errors made by our system on the Arabic data in some detail, and point to challenges such an approach is faced with.</abstract>
@@ -10086,7 +10086,7 @@
     <paper id="20">
       <title><fixed-case>H</fixed-case>e<fixed-case>LI</fixed-case>, a Word-Based Backoff Method for Language Identification</title>
       <author><first>Tommi</first><last>Jauhiainen</last></author>
-      <author><first>Krister</first><last>Lindén</last></author>
+      <author id="krister-linden"><first>Krister</first><last>Lindén</last></author>
       <author><first>Heidi</first><last>Jauhiainen</last></author>
       <pages>153–162</pages>
       <url hash="b31717b1">W16-4820</url>
@@ -10106,8 +10106,8 @@
     <paper id="22">
       <title>Comparing Two Basic Methods for Discriminating Between Similar Languages and Varieties</title>
       <author><first>Pablo</first><last>Gamallo</last></author>
-      <author><first>Iñaki</first><last>Alegria</last></author>
-      <author><first>José Ramom</first><last>Pichel</last></author>
+      <author id="inaki-alegria"><first>Iñaki</first><last>Alegria</last></author>
+      <author id="jose-ramom-pichel-campos"><first>José Ramom</first><last>Pichel</last></author>
       <author><first>Manex</first><last>Agirrezabal</last></author>
       <pages>170–177</pages>
       <url hash="dbb4e869">W16-4822</url>
@@ -10116,7 +10116,7 @@
     </paper>
     <paper id="23">
       <title>Advances in Ngram-based Discrimination of Similar Languages</title>
-      <author><first>Cyril</first><last>Goutte</last></author>
+      <author id="cyril-goutte"><first>Cyril</first><last>Goutte</last></author>
       <author><first>Serge</first><last>Léger</last></author>
       <pages>178–184</pages>
       <url hash="38475e22">W16-4823</url>
@@ -10142,7 +10142,7 @@
     <paper id="26">
       <title><fixed-case>A</fixed-case>rabic Language <fixed-case>WEKA</fixed-case>-Based Dialect Classifier for <fixed-case>A</fixed-case>rabic Automatic Speech Recognition Transcripts</title>
       <author><first>Areej</first><last>Alshutayri</last></author>
-      <author><first>Eric</first><last>Atwell</last></author>
+      <author id="eric-atwell"><first>Eric</first><last>Atwell</last></author>
       <author><first>Abdulrahman</first><last>Alosaimy</last></author>
       <author><first>James</first><last>Dickins</last></author>
       <author><first>Michael</first><last>Ingleby</last></author>
@@ -10174,7 +10174,7 @@
     <paper id="29">
       <title>Tuning <fixed-case>B</fixed-case>ayes Baseline for Dialect Detection</title>
       <author><first>Hector-Hugo</first><last>Franco-Penya</last></author>
-      <author><first>Liliana</first><last>Mamani Sanchez</last></author>
+      <author id="liliana-mamani-sanchez"><first>Liliana</first><last>Mamani Sanchez</last></author>
       <pages>227–234</pages>
       <url hash="c3de7107">W16-4829</url>
       <abstract>This paper describes an analysis of our submissions to the Dialect Detection Shared Task 2016. We proposed three different systems that involved simplistic features, to name: a Naive-bayes system, a Support Vector Machines-based system and a Tree Kernel-based system. These systems underperform when compared to other submissions in this shared task, since the best one achieved an accuracy of ~0.834.</abstract>
@@ -10183,8 +10183,8 @@
     <paper id="30">
       <title>Vanilla Classifiers for Distinguishing between Similar Languages</title>
       <author><first>Sergiu</first><last>Nisioi</last></author>
-      <author><first>Alina Maria</first><last>Ciobanu</last></author>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="alina-maria-ciobanu"><first>Alina Maria</first><last>Ciobanu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <pages>235–242</pages>
       <url hash="d1bd2550">W16-4830</url>
       <abstract>In this paper we describe the submission of the UniBuc-NLP team for the Discriminating between Similar Languages Shared Task, DSL 2016. We present and analyze the results we obtained in the closed track of sub-task 1 (Similar languages and language varieties) and sub-task 2 (Arabic dialects). For sub-task 1 we used a logistic regression classifier with tf-idf feature weighting and for sub-task 2 a character-based string kernel with an SVM classifier. Our results show that good accuracy scores can be obtained with limited feature and model engineering. While certain limitations are to be acknowledged, our approach worked surprisingly well for out-of-domain, social media data, with 0.898 accuracy (3rd place) for dataset B1 and 0.838 accuracy (4th place) for dataset B2.</abstract>
@@ -10205,7 +10205,7 @@
       <booktitle>Proceedings of the 3rd Workshop on Natural Language Processing Techniques for Educational Applications (<fixed-case>NLPTEA</fixed-case>2016)</booktitle>
       <url hash="48222020">W16-49</url>
       <editor><first>Hsin-Hsi</first><last>Chen</last></editor>
-      <editor><first>Yuen-Hsien</first><last>Tseng</last></editor>
+      <editor id="yuen-hsien-tseng"><first>Yuen-Hsien</first><last>Tseng</last></editor>
       <editor><first>Vincent</first><last>Ng</last></editor>
       <editor><first>Xiaofei</first><last>Lu</last></editor>
       <publisher>The COLING 2016 Organizing Committee</publisher>
@@ -10221,7 +10221,7 @@
     <paper id="1">
       <title>Simplification of Example Sentences for Learners of <fixed-case>J</fixed-case>apanese Functional Expressions</title>
       <author><first>Jun</first><last>Liu</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>1–5</pages>
       <url hash="8a6165a6">W16-4901</url>
       <abstract>Learning functional expressions is one of the difficulties for language learners, since functional expressions tend to have multiple meanings and complicated usages in various situations. In this paper, we report an experiment of simplifying example sentences of Japanese functional expressions especially for Chinese-speaking learners. For this purpose, we developed “Japanese Functional Expressions List” and “Simple Japanese Replacement List”. To evaluate the method, we conduct a small-scale experiment with Chinese-speaking learners on the effectiveness of the simplified example sentences. The experimental results indicate that simplified sentences are helpful in learning Japanese functional expressions.</abstract>
@@ -10230,7 +10230,7 @@
     <paper id="2">
       <title>Effectiveness of Linguistic and Learner Features to Listenability Measurement Using a Decision Tree Classifier</title>
       <author><first>Katsunori</first><last>Kotani</last></author>
-      <author><first>Takehiko</first><last>Yoshimi</last></author>
+      <author id="takehiko-yoshimi"><first>Takehiko</first><last>Yoshimi</last></author>
       <pages>6–10</pages>
       <url hash="d32fc194">W16-4902</url>
       <abstract>In learning Asian languages, learners encounter the problem of character types that are different from those in their first language, for instance, between Chinese characters and the Latin alphabet. This problem also affects listening because learners reconstruct letters from speech sounds. Hence, special attention should be paid to listening practice for learners of Asian languages. However, to our knowledge, few studies have evaluated the ease of listening comprehension (listenability) in Asian languages. Therefore, as a pilot study of listenability in Asian languages, we developed a measurement method for learners of English in order to examine the discriminability of linguistic and learner features. The results showed that the accuracy of our method outperformed a simple majority vote, which suggests that a combination of linguistic and learner features should be used to measure listenability in Asian languages as well as in English.</abstract>
@@ -10240,7 +10240,7 @@
       <title>A Two-Phase Approach Towards Identifying Argument Structure in Natural Language</title>
       <author><first>Arkanath</first><last>Pathak</last></author>
       <author><first>Pawan</first><last>Goyal</last></author>
-      <author><first>Plaban</first><last>Bhowmick</last></author>
+      <author id="plaban-kr-bhowmick"><first>Plaban</first><last>Bhowmick</last></author>
       <pages>11–19</pages>
       <url hash="50a032db">W16-4903</url>
       <abstract>We propose a new approach for extracting argument structure from natural language texts that contain an underlying argument. Our approach comprises of two phases: Score Assignment and Structure Prediction. The Score Assignment phase trains models to classify relations between argument units (Support, Attack or Neutral). To that end, different training strategies have been explored. We identify different linguistic and lexical features for training the classifiers. Through ablation study, we observe that our novel use of word-embedding features is most effective for this task. The Structure Prediction phase makes use of the scores from the Score Assignment phase to arrive at the optimal structure. We perform experiments on three argumentation datasets, namely, AraucariaDB, Debatepedia and Wikipedia. We also propose two baselines and observe that the proposed approach outperforms baseline systems for the final task of Structure Prediction.</abstract>
@@ -10260,7 +10260,7 @@
       <title>A Comparison of Word Embeddings for <fixed-case>E</fixed-case>nglish and Cross-Lingual <fixed-case>C</fixed-case>hinese Word Sense Disambiguation</title>
       <author><first>Hong Jin</first><last>Kang</last></author>
       <author><first>Tao</first><last>Chen</last></author>
-      <author><first>Muthu Kumar</first><last>Chandrasekaran</last></author>
+      <author id="muthu-kumar-chandrasekaran"><first>Muthu Kumar</first><last>Chandrasekaran</last></author>
       <author><first>Min-Yen</first><last>Kan</last></author>
       <pages>30–39</pages>
       <url hash="ce0ed74c">W16-4905</url>
@@ -10271,10 +10271,10 @@
       <title>Overview of <fixed-case>NLP</fixed-case>-<fixed-case>TEA</fixed-case> 2016 Shared Task for <fixed-case>C</fixed-case>hinese Grammatical Error Diagnosis</title>
       <author><first>Lung-Hao</first><last>Lee</last></author>
       <author><first>Gaoqi</first><last>Rao</last></author>
-      <author><first>Liang-Chih</first><last>Yu</last></author>
+      <author id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last></author>
       <author><first>Endong</first><last>Xun</last></author>
       <author><first>Baolin</first><last>Zhang</last></author>
-      <author><first>Li-Ping</first><last>Chang</last></author>
+      <author id="li-ping-chang"><first>Li-Ping</first><last>Chang</last></author>
       <pages>40–48</pages>
       <url hash="b997a0ac">W16-4906</url>
       <abstract>This paper presents the NLP-TEA 2016 shared task for Chinese grammatical error diagnosis which seeks to identify grammatical error types and their range of occurrence within sentences written by learners of Chinese as foreign language. We describe the task definition, data preparation, performance metrics, and evaluation results. Of the 15 teams registered for this shared task, 9 teams developed the system and submitted a total of 36 runs. We expected this evaluation campaign could lead to the development of more advanced NLP techniques for educational applications, especially for Chinese error detection. All data sets with gold standards and scoring scripts are made publicly available to researchers.</abstract>
@@ -10296,7 +10296,7 @@
       <author><first>Yajun</first><last>Liu</last></author>
       <author><first>Yingjie</first><last>Han</last></author>
       <author><first>Liyan</first><last>Zhuo</last></author>
-      <author><first>Hongying</first><last>Zan</last></author>
+      <author id="hongying-zan"><first>Hongying</first><last>Zan</last></author>
       <pages>57–62</pages>
       <url hash="a269b4c8">W16-4908</url>
       <abstract>In the process of learning and using Chinese, foreigners may have grammatical errors due to negative migration of their native languages. Currently, the computer-oriented automatic detection method of grammatical errors is not mature enough. Based on the evaluating task — CGED2016, we select and analyze the classification model and design feature extraction method to obtain grammatical errors including Mission(M), Disorder(W), Selection (S) and Redundant (R) automatically. The experiment results based on the dynamic corpus of HSK show that the Chinese grammatical error automatic detection method, which uses CRF as classification model and n-gram as feature extraction method. It is simple and efficient which play a positive effect on the research of Chinese grammatical error automatic detection and also a supporting and guiding role in the teaching of Chinese as a foreign language.</abstract>
@@ -10307,7 +10307,7 @@
       <author><first>Po-Lin</first><last>Chen</last></author>
       <author><first>Shih-Hung</first><last>Wu</last></author>
       <author><first>Liang-Pu</first><last>Chen</last></author>
-      <author><first>Ping-Che</first><last>Yang</last></author>
+      <author id="ping-che-yang"><first>Ping-Che</first><last>Yang</last></author>
       <pages>63–72</pages>
       <url hash="5882fa13">W16-4909</url>
       <abstract>This paper describe the CYUT-III system on grammar error detection in the 2016 NLP-TEA Chinese Grammar Error Detection shared task CGED. In this task a system has to detect four types of errors, in-cluding redundant word error, missing word error, word selection error and word ordering error. Based on the conditional random fields (CRF) model, our system is a linear tagger that can detect the errors in learners’ essays. Since the system performance depends on the features heavily, in this paper, we are going to report how to integrate the collocation feature into the CRF model. Our system presents the best detection accuracy and Identification accuracy on the TOCFL dataset, which is in traditional Chi-nese. The same system also works well on the simplified Chinese HSK dataset.</abstract>
@@ -10318,7 +10318,7 @@
       <author><first>Wei-Chieh</first><last>Chou</last></author>
       <author><first>Chin-Kui</first><last>Lin</last></author>
       <author><first>Yuan-Fu</first><last>Liao</last></author>
-      <author><first>Yih-Ru</first><last>Wang</last></author>
+      <author id="yih-ru-wang"><first>Yih-Ru</first><last>Wang</last></author>
       <pages>73–81</pages>
       <url hash="b924daf4">W16-4910</url>
       <abstract>This paper discusses how to adapt two new word embedding features to build a more efficient Chinese Grammatical Error Diagnosis (CGED) systems to assist Chinese foreign learners (CFLs) in improving their written essays. The major idea is to apply word order sensitive Word2Vec approaches including (1) structured skip-gram and (2) continuous window (CWindow) models, because they are more suitable for solving syntax-based problems. The proposed new features were evaluated on the Test of Chinese as a Foreign Language (TOCFL) learner database provided by NLP-TEA-3&amp;CGED shared task. Experimental results showed that the new features did work better than the traditional word order insensitive Word2Vec approaches. Moreover, according to the official evaluation results, our system achieved the lowest (0.1362) false positive (FA) and the highest precision rates in all three measurements.</abstract>
@@ -10327,7 +10327,7 @@
     <paper id="11">
       <title>A Fluctuation Smoothing Approach for Unsupervised Automatic Short Answer Grading</title>
       <author><first>Shourya</first><last>Roy</last></author>
-      <author><first>Sandipan</first><last>Dandapat</last></author>
+      <author id="sandipan-dandapat"><first>Sandipan</first><last>Dandapat</last></author>
       <author><first>Y.</first><last>Narahari</last></author>
       <pages>82–91</pages>
       <url hash="c375667d">W16-4911</url>
@@ -10337,7 +10337,7 @@
     <paper id="12">
       <title><fixed-case>J</fixed-case>apanese Lexical Simplification for Non-Native Speakers</title>
       <author><first>Muhaimin</first><last>Hading</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <author><first>Maki</first><last>Sakamoto</last></author>
       <pages>92–96</pages>
       <url hash="5c8961c7">W16-4912</url>
@@ -10347,8 +10347,8 @@
     <paper id="13">
       <title>A Corpus-based Approach for <fixed-case>S</fixed-case>panish-<fixed-case>C</fixed-case>hinese Language Learning</title>
       <author><first>Shuyuan</first><last>Cao</last></author>
-      <author><first>Iria</first><last>da Cunha</last></author>
-      <author><first>Mikel</first><last>Iruskieta</last></author>
+      <author id="iria-da-cunha"><first>Iria</first><last>da Cunha</last></author>
+      <author id="mikel-iruskieta"><first>Mikel</first><last>Iruskieta</last></author>
       <pages>97–106</pages>
       <url hash="9a34ab99">W16-4913</url>
       <abstract>Due to the huge population that speaks Spanish and Chinese, these languages occupy an important position in the language learning studies. Although there are some automatic translation systems that benefit the learning of both languages, there is enough space to create resources in order to help language learners. As a quick and effective resource that can give large amount language information, corpus-based learning is becoming more and more popular. In this paper we enrich a Spanish-Chinese parallel corpus automatically with part of-speech (POS) information and manually with discourse segmentation (following the Rhetorical Structure Theory (RST) (Mann and Thompson, 1988)). Two search tools allow the Spanish-Chinese language learners to carry out different queries based on tokens and lemmas. The parallel corpus and the research tools are available to the academic community. We propose some examples to illustrate how learners can use the corpus to learn Spanish and Chinese.</abstract>
@@ -10367,9 +10367,9 @@
     <paper id="15">
       <title>An Aligned <fixed-case>F</fixed-case>rench-<fixed-case>C</fixed-case>hinese corpus of 10<fixed-case>K</fixed-case> segments from university educational material</title>
       <author><first>Ruslan</first><last>Kalitvianski</last></author>
-      <author><first>Lingxiao</first><last>Wang</last></author>
-      <author><first>Valérie</first><last>Bellynck</last></author>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="ling-xiao-wang"><first>Lingxiao</first><last>Wang</last></author>
+      <author id="valerie-bellynck"><first>Valérie</first><last>Bellynck</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <pages>117–121</pages>
       <url hash="3dea8738">W16-4915</url>
       <abstract>This paper describes a corpus of nearly 10K French-Chinese aligned segments, produced by post-editing machine translated computer science courseware. This corpus was built from 2013 to 2016 within the PROJECT_NAME project, by native Chinese students. The quality, as judged by native speakers, is ad-equate for understanding (far better than by reading only the original French) and for getting better marks. This corpus is annotated at segment-level by a self-assessed quality score. It has been directly used as supplemental training data to build a statistical machine translation system dedicated to that sublanguage, and can be used to extract the specific bilingual terminology. To our knowledge, it is the first corpus of this kind to be released.</abstract>
@@ -10433,7 +10433,7 @@
       <url hash="af98d295">W16-50</url>
       <editor><first>Eduardo</first><last>Blanco</last></editor>
       <editor><first>Roser</first><last>Morante</last></editor>
-      <editor><first>Roser</first><last>Saurí</last></editor>
+      <editor id="roser-sauri"><first>Roser</first><last>Saurí</last></editor>
       <publisher>The COLING 2016 Organizing Committee</publisher>
       <address>Osaka, Japan</address>
       <month>December</month>
@@ -10448,8 +10448,8 @@
       <title>‘Who would have thought of that!’: A Hierarchical Topic Model for Extraction of Sarcasm-prevalent Topics and Sarcasm Detection</title>
       <author><first>Aditya</first><last>Joshi</last></author>
       <author><first>Prayas</first><last>Jain</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
-      <author><first>Mark</first><last>Carman</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="mark-carman"><first>Mark</first><last>Carman</last></author>
       <pages>1–10</pages>
       <url hash="14a32262">W16-5001</url>
       <abstract>Topic Models have been reported to be beneficial for aspect-based sentiment analysis. This paper reports the first topic model for sarcasm detection, to the best of our knowledge. Designed on the basis of the intuition that sarcastic tweets are likely to have a mixture of words of both sentiments as against tweets with literal sentiment (either positive or negative), our hierarchical topic model discovers sarcasm-prevalent topics and topic-level sentiment. Using a dataset of tweets labeled using hashtags, the model estimates topic-level, and sentiment-level distributions. Our evaluation shows that topics such as ‘work’, ‘gun laws’, ‘weather’ are sarcasm-prevalent topics. Our model is also able to discover the mixture of sentiment-bearing words that exist in a text of a given sentiment-related label. Finally, we apply our model to predict sarcasm in tweets. We outperform two prior work based on statistical classifiers with specific features, by around 25%.</abstract>
@@ -10467,7 +10467,7 @@
       <title>Detecting Level of Belief in <fixed-case>C</fixed-case>hinese and <fixed-case>S</fixed-case>panish</title>
       <author><first>Juan Pablo</first><last>Colomer</last></author>
       <author><first>Keyu</first><last>Lai</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>22–30</pages>
       <url hash="fc4a0406">W16-5003</url>
       <abstract>There has been extensive work on detecting the level of committed belief (also known as “factuality”) that an author is expressing towards the propositions in his or her utterances. Previous work on English has revealed that this can be done as a sequence tagging task. In this paper, we investigate the same task for Chinese and Spanish, two very different languages from English and from each other.</abstract>
@@ -10476,7 +10476,7 @@
     <paper id="4">
       <title>Contradiction Detection for Rumorous Claims</title>
       <author><first>Piroska</first><last>Lendvai</last></author>
-      <author><first>Uwe</first><last>Reichel</last></author>
+      <author id="uwe-reichel"><first>Uwe</first><last>Reichel</last></author>
       <pages>31–40</pages>
       <url hash="bc2f7dd2">W16-5004</url>
       <abstract>The utilization of social media material in journalistic workflows is increasing, demanding automated methods for the identification of mis- and disinformation. Since textual contradiction across social media posts can be a signal of rumorousness, we seek to model how claims in Twitter posts are being textually contradicted. We identify two different contexts in which contradiction emerges: its broader form can be observed across independently posted tweets and its more specific form in threaded conversations. We define how the two scenarios differ in terms of central elements of argumentation: claims and conversation structure. We design and evaluate models for the two scenarios uniformly as 3-way Recognizing Textual Entailment tasks in order to represent claims and conversation structure implicitly in a generic inference model, while previous studies used explicit or no representation of these properties. To address noisy text, our classifiers use simple similarity features derived from the string and part-of-speech level. Corpus statistics reveal distribution differences for these features in contradictory as opposed to non-contradictory tweet relations, and the classifiers yield state of the art performance.</abstract>
@@ -10484,7 +10484,7 @@
     </paper>
     <paper id="5">
       <title>Negation and Modality in Machine Translation</title>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <pages>41</pages>
       <url hash="0d4af2e9">W16-5005</url>
       <abstract>Negation and modality are two important grammatical phenomena that have attracted recent research attention as they can contribute to extra-propositional meaning aspects, among with factuality, attribution, irony and sarcasm. These aspects go beyond analysis such as semantic role labeling, and modeling them is important as a step towards a higher level of language understanding, which is needed for practical applications such as sentiment analysis. In this talk, I will go beyond English, and I will discuss how negation and modality are expressed in other languages. I will also go beyond sentiment analysis and I will present some challenges that the two phenomena pose for machine translation (MT). In particular, I will demonstrate how contemporary MT systems fail on them, and I will discuss some possible solutions.</abstract>
@@ -10492,11 +10492,11 @@
     </paper>
     <paper id="6">
       <title>Problematic Cases in the Annotation of Negation in <fixed-case>S</fixed-case>panish</title>
-      <author><first>Salud María</first><last>Jiménez-Zafra</last></author>
-      <author><first>Maite</first><last>Martin</last></author>
-      <author><first>L. Alfonso</first><last>Ureña-López</last></author>
-      <author><first>Toni</first><last>Martí</last></author>
-      <author><first>Mariona</first><last>Taulé</last></author>
+      <author id="salud-maria-jimenez-zafra"><first>Salud María</first><last>Jiménez-Zafra</last></author>
+      <author id="m-teresa-martin-valdivia"><first>Maite</first><last>Martin</last></author>
+      <author id="l-alfonso-urena-lopez"><first>L. Alfonso</first><last>Ureña-López</last></author>
+      <author id="m-antonia-marti"><first>Toni</first><last>Martí</last></author>
+      <author id="mariona-taule"><first>Mariona</first><last>Taulé</last></author>
       <pages>42–48</pages>
       <url hash="7cd6c025">W16-5006</url>
       <abstract>This paper presents the main sources of disagreement found during the annotation of the Spanish SFU Review Corpus with negation (SFU ReviewSP -NEG). Negation detection is a challenge in most of the task related to NLP, so the availability of corpora annotated with this phenomenon is essential in order to advance in tasks related to this area. A thorough analysis of the problems found during the annotation could help in the study of this phenomenon.</abstract>
@@ -10518,8 +10518,8 @@
       <booktitle>Proceedings of the Fifth Workshop on Building and Evaluating Resources for Biomedical Text Mining (<fixed-case>B</fixed-case>io<fixed-case>T</fixed-case>xt<fixed-case>M</fixed-case>2016)</booktitle>
       <url hash="dc9a51f6">W16-51</url>
       <editor><first>Sophia</first><last>Ananiadou</last></editor>
-      <editor><first>Riza</first><last>Batista-Navarro</last></editor>
-      <editor><first>Kevin Bretonnel</first><last>Cohen</last></editor>
+      <editor id="riza-theresa-batista-navarro"><first>Riza</first><last>Batista-Navarro</last></editor>
+      <editor id="k-bretonnel-cohen"><first>Kevin Bretonnel</first><last>Cohen</last></editor>
       <editor><first>Dina</first><last>Demner-Fushman</last></editor>
       <editor><first>Paul</first><last>Thompson</last></editor>
       <publisher>The COLING 2016 Organizing Committee</publisher>
@@ -10553,7 +10553,7 @@
     </paper>
     <paper id="3">
       <title>Building Content-driven Entity Networks for Scarce Scientific Literature using Content Information</title>
-      <author><first>Reinald Kim</first><last>Amplayo</last></author>
+      <author id="reinald-kim-amplayo"><first>Reinald Kim</first><last>Amplayo</last></author>
       <author><first>Min</first><last>Song</last></author>
       <pages>20–29</pages>
       <url hash="0d79efb8">W16-5103</url>
@@ -10582,8 +10582,8 @@
     <paper id="6">
       <title>Fully unsupervised low-dimensional representation of adverse drug reaction events through distributional semantics</title>
       <author><first>Alicia</first><last>Pérez</last></author>
-      <author><first>Arantza</first><last>Casillas</last></author>
-      <author><first>Koldo</first><last>Gojenola</last></author>
+      <author id="arantza-casillas"><first>Arantza</first><last>Casillas</last></author>
+      <author id="koldo-gojenola"><first>Koldo</first><last>Gojenola</last></author>
       <pages>50–59</pages>
       <url hash="557dfa12">W16-5106</url>
       <abstract>Electronic health records show great variability since the same concept is often expressed with different terms, either scientific latin forms, common or lay variants and even vernacular naming. Deep learning enables distributional representation of terms in a vector-space, and therefore, related terms tend to be close in the vector space. Accordingly, embedding words through these vectors opens the way towards accounting for semantic relatedness through classical algebraic operations. In this work we propose a simple though efficient unsupervised characterization of Adverse Drug Reactions (ADRs). This approach exploits the embedding representation of the terms involved in candidate ADR events, that is, drug-disease entity pairs. In brief, the ADRs are represented as vectors that link the drug with the disease in their context through a recursive additive model. We discovered that a low-dimensional representation that makes use of the modulus and argument of the embedded representation of the ADR event shows correlation with the manually annotated class. Thus, it can be derived that this characterization results in to be beneficial for further classification tasks as predictive features.</abstract>
@@ -10592,11 +10592,11 @@
     <paper id="7">
       <title>A Dataset for <fixed-case>ICD</fixed-case>-10 Coding of Death Certificates: Creation and Usage</title>
       <author><first>Thomas</first><last>Lavergne</last></author>
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <author><first>Aude</first><last>Robert</last></author>
       <author><first>Cyril</first><last>Grouin</last></author>
       <author><first>Grégoire</first><last>Rey</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <pages>60–69</pages>
       <url hash="03f920b1">W16-5107</url>
       <abstract>Very few datasets have been released for the evaluation of diagnosis coding with the International Classification of Diseases, and only one so far in a language other than English. This paper describes a large-scale dataset prepared from French death certificates, and the problems which needed to be solved to turn it into a dataset suitable for the application of machine learning and natural language processing methods of ICD-10 coding. The dataset includes the free-text statements written by medical doctors, the associated meta-data, the human coder-assigned codes for each statement, as well as the statement segments which supported the coder’s decision for each code. The dataset comprises 93,694 death certificates totalling 276,103 statements and 377,677 ICD-10 code assignments (3,457 unique codes). It was made available for an international automated coding shared task, which attracted five participating teams. An extended version of the dataset will be used in a new edition of the shared task.</abstract>
@@ -10616,7 +10616,7 @@
     </paper>
     <paper id="9">
       <title>Supervised classification of end-of-lines in clinical text with no manual annotation</title>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <author><first>Cyril</first><last>Grouin</last></author>
       <author><first>Thomas</first><last>Lavergne</last></author>
       <pages>80–88</pages>
@@ -10627,7 +10627,7 @@
     <paper id="10">
       <title><fixed-case>B</fixed-case>io<fixed-case>DCA</fixed-case> Identifier: A System for Automatic Identification of Discourse Connective and Arguments from Biomedical Text</title>
       <author><first>Sindhuja</first><last>Gopalan</last></author>
-      <author><first>Sobha</first><last>Lalitha Devi</last></author>
+      <author id="sobha-lalitha-devi"><first>Sobha</first><last>Lalitha Devi</last></author>
       <pages>89–98</pages>
       <url hash="19956da3">W16-5110</url>
       <abstract>This paper describes a Natural language processing system developed for automatic identification of explicit connectives, its sense and arguments. Prior work has shown that the difference in usage of connectives across corpora affects the cross domain connective identification task negatively. Hence the development of domain specific discourse parser has become indispensable. Here, we present a corpus annotated with discourse relations on Medline abstracts. Kappa score is calculated to check the annotation quality of our corpus. The previous works on discourse analysis in bio-medical data have concentrated only on the identification of connectives and hence we have developed an end-end parser for connective and argument identification using Conditional Random Fields algorithm. The type and sub-type of the connective sense is also identified. The results obtained are encouraging.</abstract>
@@ -10636,7 +10636,7 @@
     <paper id="11">
       <title>Data, tools and resources for mining social media drug chatter</title>
       <author><first>Abeed</first><last>Sarker</last></author>
-      <author><first>Graciela</first><last>Gonzalez</last></author>
+      <author id="graciela-gonzalez"><first>Graciela</first><last>Gonzalez</last></author>
       <pages>99–107</pages>
       <url hash="159f2c9e">W16-5111</url>
       <abstract>Social media has emerged into a crucial resource for obtaining population-based signals for various public health monitoring and surveillance tasks, such as pharmacovigilance. There is an abundance of knowledge hidden within social media data, and the volume is growing. Drug-related chatter on social media can include user-generated information that can provide insights into public health problems such as abuse, adverse reactions, long-term effects, and multi-drug interactions. Our objective in this paper is to present to the biomedical natural language processing, data science, and public health communities data sets (annotated and unannotated), tools and resources that we have collected and created from social media. The data we present was collected from Twitter using the generic and brand names of drugs as keywords, along with their common misspellings. Following the collection of the data, annotation guidelines were created over several iterations, which detail important aspects of social media data annotation and can be used by future researchers for developing similar data sets. The annotation guidelines were followed to prepare data sets for text classification, information extraction and normalization. In this paper, we discuss the preparation of these guidelines, outline the data sets prepared, and present an overview of our state-of-the-art systems for data collection, supervised classification, and information extraction. In addition to the development of supervised systems for classification and extraction, we developed and released unlabeled data and language models. We discuss the potential uses of these language models in data mining and the large volumes of unlabeled data from which they were generated. We believe that the summaries and repositories we present here of our data, annotation guidelines, models, and tools will be beneficial to the research community as a single-point entry for all these resources, and will promote further research in this area.</abstract>
@@ -10646,9 +10646,9 @@
       <title>Detection of Text Reuse in <fixed-case>F</fixed-case>rench Medical Corpora</title>
       <author><first>Eva</first><last>D’hondt</last></author>
       <author><first>Cyril</first><last>Grouin</last></author>
-      <author><first>Aurélie</first><last>Névéol</last></author>
-      <author><first>Efstathios</first><last>Stamatatos</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
+      <author id="efstathios-stamatatos"><first>Efstathios</first><last>Stamatatos</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <pages>108–114</pages>
       <url hash="32dde83c">W16-5112</url>
       <abstract>Electronic Health Records (EHRs) are increasingly available in modern health care institutions either through the direct creation of electronic documents in hospitals’ health information systems, or through the digitization of historical paper records. Each EHR creation method yields the need for sophisticated text reuse detection tools in order to prepare the EHR collections for efficient secondary use relying on Natural Language Processing methods. Herein, we address the detection of two types of text reuse in French EHRs: 1) the detection of updated versions of the same document and 2) the detection of document duplicates that still bear surface differences due to OCR or de-identification processing. We present a robust text reuse detection method to automatically identify redundant document pairs in two French EHR corpora that achieves an overall macro F-measure of 0.68 and 0.60, respectively and correctly identifies all redundant document pairs of interest.</abstract>
@@ -10696,8 +10696,8 @@
       <url hash="c36a257c">W16-52</url>
       <editor><first>Yohei</first><last>Murakami</last></editor>
       <editor><first>Donghui</first><last>Lin</last></editor>
-      <editor><first>Nancy</first><last>Ide</last></editor>
-      <editor><first>James</first><last>Pustejovsky</last></editor>
+      <editor id="nancy-ide"><first>Nancy</first><last>Ide</last></editor>
+      <editor id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></editor>
       <publisher>The COLING 2016 Organizing Committee</publisher>
       <address>Osaka, Japan</address>
       <month>December</month>
@@ -10710,10 +10710,10 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>K</fixed-case>athaa : <fixed-case>NLP</fixed-case> Systems as Edge-Labeled Directed Acyclic <fixed-case>M</fixed-case>ulti<fixed-case>G</fixed-case>raphs</title>
-      <author><first>Sharada</first><last>Mohanty</last></author>
+      <author id="sharada-prasanna-mohanty"><first>Sharada</first><last>Mohanty</last></author>
       <author><first>Nehal J</first><last>Wani</last></author>
-      <author><first>Manish</first><last>Srivastava</last></author>
-      <author><first>Dipti</first><last>Sharma</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Srivastava</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti</first><last>Sharma</last></author>
       <pages>1–10</pages>
       <url hash="01b6cf7b">W16-5201</url>
       <abstract>We present Kathaa, an Open Source web-based Visual Programming Framework for Natural Language Processing (NLP) Systems. Kathaa supports the design, execution and analysis of complex NLP systems by visually connecting NLP components from an easily extensible Module Library. It models NLP systems an edge-labeled Directed Acyclic MultiGraph, and lets the user use publicly co-created modules in their own NLP applications irrespective of their technical proficiency in Natural Language Processing. Kathaa exposes an intuitive web based Interface for the users to interact with and modify complex NLP Systems; and a precise Module definition API to allow easy integration of new state of the art NLP components. Kathaa enables researchers to publish their services in a standardized format to enable the masses to use their services out of the box. The vision of this work is to pave the way for a system like Kathaa, to be the Lego blocks of NLP Research and Applications. As a practical use case we use Kathaa to visually implement the Sampark Hindi-Panjabi Machine Translation Pipeline and the Sampark Hindi-Urdu Machine Translation Pipeline, to demonstrate the fact that Kathaa can handle really complex NLP systems while still being intuitive for the end user.</abstract>
@@ -10723,7 +10723,7 @@
       <title><fixed-case>LAPPS</fixed-case>/Galaxy: Current State and Next Steps</title>
       <author><first>Nancy</first><last>Ide</last></author>
       <author><first>Keith</first><last>Suderman</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <author><first>James</first><last>Pustejovsky</last></author>
       <author><first>Marc</first><last>Verhagen</last></author>
       <pages>11–18</pages>
@@ -10776,7 +10776,7 @@
     <paper id="7">
       <title>A non-expert <fixed-case>K</fixed-case>aldi recipe for <fixed-case>V</fixed-case>ietnamese Speech Recognition System</title>
       <author><first>Hieu-Thi</first><last>Luong</last></author>
-      <author><first>Hai-Quan</first><last>Vu</last></author>
+      <author id="hai-quan-vu"><first>Hai-Quan</first><last>Vu</last></author>
       <pages>51–55</pages>
       <url hash="f2b1353d">W16-5207</url>
       <abstract>In this paper we describe a non-expert setup for Vietnamese speech recognition system using Kaldi toolkit. We collected a speech corpus over fifteen hours from about fifty Vietnamese native speakers and using it to test the feasibility of our setup. The essential linguistic components for the Automatic Speech Recognition (ASR) system was prepared basing on the written form of the language instead of expertise knowledge on linguistic and phonology as commonly seen in rich resource languages like English. The modeling of tones by integrating them into the phoneme and using the phonetic decision tree is also discussed. Experimental results showed this setup for ASR systems does yield competitive results while still have potentials for further improvements.</abstract>
@@ -10832,7 +10832,7 @@
     </frontmatter>
     <paper id="1">
       <title>Vectors or Graphs? On Differences of Representations for Distributional Semantic Models</title>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>1–7</pages>
       <url hash="900780d5">W16-5301</url>
       <abstract>Distributional Semantic Models (DSMs) have recently received increased attention, together with the rise of neural architectures for scalable training of dense vector embeddings. While some of the literature even includes terms like ‘vectors’ and ‘dimensionality’ in the definition of DSMs, there are some good reasons why we should consider alternative formulations of distributional models. As an instance, I present a scalable graph-based solution to distributional semantics. The model belongs to the family of ‘count-based’ DSMs, keeps its representation sparse and explicit, and thus fully interpretable. I will highlight some important differences between sparse graph-based and dense vector approaches to DSMs: while dense vector-based models are computationally easier to handle and provide a nice uniform representation that can be compared and combined in many ways, they lack interpretability, provenance and robustness. On the other hand, graph-based sparse models have a more straightforward interpretation, handle sense distinctions more naturally and can straightforwardly be linked to knowledge bases, while lacking the ability to compare arbitrary lexical units and a compositionality operation. Since both representations have their merits, I opt for exploring their combination in the outlook.</abstract>
@@ -10840,7 +10840,7 @@
     </paper>
     <paper id="2">
       <title>“Beware the Jabberwock, dear reader!” Testing the distributional reality of construction semantics</title>
-      <author><first>Gianluca</first><last>Lebani</last></author>
+      <author id="gianluca-e-lebani"><first>Gianluca</first><last>Lebani</last></author>
       <author><first>Alessandro</first><last>Lenci</last></author>
       <pages>8–18</pages>
       <url hash="68e7434c">W16-5302</url>
@@ -10868,10 +10868,10 @@
     <paper id="5">
       <title>Semantic Relation Classification: Task Formalisation and Refinement</title>
       <author><first>Vivian</first><last>Santos</last></author>
-      <author><first>Manuela</first><last>Huerliman</last></author>
+      <author id="manuela-huerlimann"><first>Manuela</first><last>Huerliman</last></author>
       <author><first>Brian</first><last>Davis</last></author>
       <author><first>Siegfried</first><last>Handschuh</last></author>
-      <author><first>André</first><last>Freitas</last></author>
+      <author id="andre-freitas"><first>André</first><last>Freitas</last></author>
       <pages>30–39</pages>
       <url hash="72f932c8">W16-5305</url>
       <abstract>The identification of semantic relations between terms within texts is a fundamental task in Natural Language Processing which can support applications requiring a lightweight semantic interpretation model. Currently, semantic relation classification concentrates on relations which are evaluated over open-domain data. This work provides a critique on the set of abstract relations used for semantic relation classification with regard to their ability to express relationships between terms which are found in a domain-specific corpora. Based on this analysis, this work proposes an alternative semantic relation model based on reusing and extending the set of abstract relations present in the DOLCE ontology. The resulting set of relations is well grounded, allows to capture a wide range of relations and could thus be used as a foundation for automatic classification of semantic relations.</abstract>
@@ -10881,7 +10881,7 @@
       <title>The Power of Language Music: <fixed-case>A</fixed-case>rabic Lemmatization through Patterns</title>
       <author><first>Mohammed</first><last>Attia</last></author>
       <author><first>Ayah</first><last>Zirikly</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>40–50</pages>
       <url hash="f50305ee">W16-5306</url>
       <abstract>The interaction between roots and patterns in Arabic has intrigued lexicographers and morphologists for centuries. While roots provide the consonantal building blocks, patterns provide the syllabic vocalic moulds. While roots provide abstract semantic classes, patterns realize these classes in specific instances. In this way both roots and patterns are indispensable for understanding the derivational, morphological and, to some extent, the cognitive aspects of the Arabic language. In this paper we perform lemmatization (a high-level lexical processing) without relying on a lookup dictionary. We use a hybrid approach that consists of a machine learning classifier to predict the lemma pattern for a given stem, and mapping rules to convert stems to their respective lemmas with the vocalization defined by the pattern.</abstract>
@@ -10899,7 +10899,7 @@
     <paper id="8">
       <title>Towards a resource based on users’ knowledge to overcome the Tip of the Tongue problem.</title>
       <author><first>Michael</first><last>Zock</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>57–68</pages>
       <url hash="8f06cce1">W16-5308</url>
       <abstract>Language production is largely a matter of words which, in the case of access problems, can be searched for in an external resource (lexicon, thesaurus). In this kind of dialogue the user provides the momentarily available knowledge concerning the target and the system responds with the best guess(es) it can make given this input. As tip-of-the-tongue (ToT)-studies have shown, people always have some knowledge concerning the target (meaning fragments, number of syllables, ...) even if its complete form is eluding them. We will show here how to tap on this knowledge to build a resource likely to help authors (speakers/writers) to overcome the ToT-problem. Yet, before doing so we need a better understanding of the various kinds of knowledge people have when looking for a word. To this end, we asked crowdworkers to provide some cues to describe a given target and to specify then how each one of them relates to the target, in the hope that this could help others to find the elusive word. Next, we checked how well a given search strategy worked when being applied to differently built lexical networks. The results showed quite dramatic differences, which is not really surprising. After all, different networks are built for different purposes; hence each one of them is more or less suited for a given task. What was more surprising though is the fact that the relational information given by the users did not allow us to find the elusive word in WordNet better than without it.</abstract>
@@ -10967,7 +10967,7 @@
       <title><fixed-case>C</fixed-case>og<fixed-case>AL</fixed-case>ex-<fixed-case>V</fixed-case> Shared Task: <fixed-case>LOPE</fixed-case></title>
       <author><first>Kanan</first><last>Luce</last></author>
       <author><first>Jiaxing</first><last>Yu</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <pages>110–113</pages>
       <url hash="08d2d4db">W16-5315</url>
       <abstract>Automatic discovery of semantically-related words is one of the most important NLP tasks, and has great impact on the theoretical psycholinguistic modeling of the mental lexicon. In this shared task, we employ the word embeddings model to testify two thoughts explicitly or implicitly assumed by the NLP community: (1). Word embedding models can reflect syntagmatic similarities in usage between words to distances in projected vector space. (2). Word embedding models can reflect paradigmatic relationships between words.</abstract>
@@ -10996,7 +10996,7 @@
       <author><first>Stefan</first><last>Bott</last></author>
       <author><first>Nana</first><last>Khvtisavrishvili</last></author>
       <author><first>Max</first><last>Kisselew</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>125–133</pages>
       <url hash="38ba26f9">W16-5318</url>
       <abstract>German particle verbs represent a frequent type of multi-word-expression that forms a highly productive paradigm in the lexicon. Similarly to other multi-word expressions, particle verbs exhibit various levels of compositionality. One of the major obstacles for the study of compositionality is the lack of representative gold standards of human ratings. In order to address this bottleneck, this paper presents such a gold standard data set containing 400 randomly selected German particle verbs. It is balanced across several particle types and three frequency bands, and accomplished by human ratings on the degree of semantic compositionality.</abstract>
@@ -11023,8 +11023,8 @@
     </paper>
     <paper id="21">
       <title>A Proposal for combining “general” and specialized frames</title>
-      <author><first>Marie-Claude</first><last>L’ Homme</last></author>
-      <author><first>Carlos</first><last>Subirats</last></author>
+      <author id="marie-claude-lhomme"><first>Marie-Claude</first><last>L’ Homme</last></author>
+      <author id="carlos-subirats-ruggeberg"><first>Carlos</first><last>Subirats</last></author>
       <author><first>Benoît</first><last>Robichaud</last></author>
       <pages>156–165</pages>
       <url hash="2367d2f2">W16-5321</url>
@@ -11044,7 +11044,7 @@
       <title>Categorization of Semantic Roles for Dictionary Definitions</title>
       <author><first>Vivian</first><last>Silva</last></author>
       <author><first>Siegfried</first><last>Handschuh</last></author>
-      <author><first>André</first><last>Freitas</last></author>
+      <author id="andre-freitas"><first>André</first><last>Freitas</last></author>
       <pages>176–184</pages>
       <url hash="6fb596ee">W16-5323</url>
       <abstract>Understanding the semantic relationships between terms is a fundamental task in natural language processing applications. While structured resources that can express those relationships in a formal way, such as ontologies, are still scarce, a large number of linguistic resources gathering dictionary definitions is becoming available, but understanding the semantic structure of natural language definitions is fundamental to make them useful in semantic interpretation tasks. Based on an analysis of a subset of WordNet’s glosses, we propose a set of semantic roles that compose the semantic structure of a dictionary definition, and show how they are related to the definition’s syntactic configuration, identifying patterns that can be used in the development of information extraction frameworks and semantic models.</abstract>
@@ -11053,7 +11053,7 @@
     <paper id="24">
       <title>Corpus and dictionary development for classifiers/quantifiers towards a <fixed-case>F</fixed-case>rench-<fixed-case>J</fixed-case>apanese machine translation</title>
       <author><first>Mutsuko</first><last>Tomokiyo</last></author>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <pages>185–192</pages>
       <url hash="a1a2f5e6">W16-5324</url>
       <abstract>Although quantifiers/classifiers expressions occur frequently in everyday communications or written documents, there is no description for them in classical bilingual paper dictionaries, nor in machine-readable dictionaries. The paper describes a corpus and dictionary development for quantifiers/classifiers, and their usage in the framework of French-Japanese machine translation (MT). They often cause problems of lexical ambiguity and of set phrase recognition during analysis, in particular for a long-distance language pair like French and Japanese. For the development of a dictionary aiming at ambiguity resolution for expressions including quantifiers and classifiers which may be ambiguous with common nouns, we have annotated our corpus with UWs (interlingual lexemes) of UNL (Universal Networking Language) found on the UNL-jp dictionary. The extraction of potential classifiers/quantifiers from corpus is made by UNLexplorer web service. Keywords : classifiers, quantifiers, phraseology study, corpus annotation, UNL (Universal Networking Language), UWs dictionary, Tori Bank, French-Japanese machine translation (MT).</abstract>
@@ -11064,10 +11064,10 @@
     <meta>
       <booktitle>Proceedings of the 12th Workshop on <fixed-case>A</fixed-case>sian Language Resources (<fixed-case>ALR</fixed-case>12)</booktitle>
       <url hash="017ef3a9">W16-54</url>
-      <editor><first>Koiti</first><last>Hasida</last></editor>
-      <editor><first>Kam-Fai</first><last>Wong</last></editor>
+      <editor id="koiti-hasida"><first>Koiti</first><last>Hasida</last></editor>
+      <editor id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></editor>
       <editor><first>Nicoletta</first><last>Calzorari</last></editor>
-      <editor><first>Key-Sun</first><last>Choi</last></editor>
+      <editor id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></editor>
       <publisher>The COLING 2016 Organizing Committee</publisher>
       <address>Osaka, Japan</address>
       <month>December</month>
@@ -11106,7 +11106,7 @@
       <author><first>Tak-sum</first><last>Wong</last></author>
       <author><first>Xinying</first><last>Chen</last></author>
       <author><first>Kim</first><last>Gerdes</last></author>
-      <author><first>John</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
       <pages>20–29</pages>
       <url hash="99df3fb5">W16-5403</url>
       <abstract>This article proposes a Universal Dependency Annotation Scheme for Mandarin Chinese, including POS tags and dependency analysis. We identify cases of idiosyncrasy of Mandarin Chinese that are difficult to fit into the current schema which has mainly been based on the descriptions of various Indo-European languages. We discuss differences between our scheme and those of the Stanford Chinese Dependencies and the Chinese Dependency Treebank.</abstract>
@@ -11126,8 +11126,8 @@
       <author><first>Minh-Tien</first><last>Nguyen</last></author>
       <author><first>Dac Viet</first><last>Lai</last></author>
       <author><first>Phong-Khac</first><last>Do</last></author>
-      <author><first>Duc-Vu</first><last>Tran</last></author>
-      <author><first>Minh-Le</first><last>Nguyen</last></author>
+      <author id="duc-vu-tran"><first>Duc-Vu</first><last>Tran</last></author>
+      <author id="minh-le-nguyen"><first>Minh-Le</first><last>Nguyen</last></author>
       <pages>38–48</pages>
       <url hash="0bdbd6d5">W16-5405</url>
       <abstract>This paper presents VSoLSCSum, a Vietnamese linked sentence-comment dataset, which was manually created to treat the lack of standard corpora for social context summarization in Vietnamese. The dataset was collected through the keywords of 141 Web documents in 12 special events, which were mentioned on Vietnamese Web pages. Social users were asked to involve in creating standard summaries and the label of each sentence or comment. The inter-agreement calculated by Cohen’s Kappa among raters after validating is 0.685. To illustrate the potential use of our dataset, a learning to rank method was trained by using a set of local and social features. Experimental results indicate that the summary model trained on our dataset outperforms state-of-the-art baselines in both ROUGE-1 and ROUGE-2 in social context summarization.</abstract>
@@ -11136,7 +11136,7 @@
     <paper id="6">
       <title><fixed-case>BCCWJ</fixed-case>-<fixed-case>D</fixed-case>ep<fixed-case>P</fixed-case>ara: A Syntactic Annotation Treebank on the ‘<fixed-case>B</fixed-case>alanced <fixed-case>C</fixed-case>orpus of <fixed-case>C</fixed-case>ontemporary <fixed-case>W</fixed-case>ritten <fixed-case>J</fixed-case>apanese’</title>
       <author><first>Masayuki</first><last>Asahara</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>49–58</pages>
       <url hash="9471213a">W16-5406</url>
       <abstract>Paratactic syntactic structures are difficult to represent in syntactic dependency tree structures. As such, we propose an annotation schema for syntactic dependency annotation of Japanese, in which coordinate structures are split from and overlaid on bunsetsu-based (base phrase unit) dependency. The schema represents nested coordinate structures, non-constituent conjuncts, and forward sharing as the set of regions. The annotation was performed on the core data of ‘Balanced Corpus of Contemporary Written Japanese’, which comprised about one million words and 1980 samples from six registers, such as newspapers, books, magazines, and web texts.</abstract>
@@ -11167,7 +11167,7 @@
     <paper id="9">
       <title>An Overview of <fixed-case>BPPT</fixed-case>’s <fixed-case>I</fixed-case>ndonesian Language Resources</title>
       <author><first>Gunarso</first><last>Gunarso</last></author>
-      <author><first>Hammam</first><last>Riza</last></author>
+      <author id="hammam-riza"><first>Hammam</first><last>Riza</last></author>
       <pages>73–77</pages>
       <url hash="deb5ef67">W16-5409</url>
       <abstract>This paper describes various Indonesian language resources that Agency for the Assessment and Application of Technology (BPPT) has developed and collected since mid 80’s when we joined MMTS (Multilingual Machine Translation System), an international project coordinated by CICC-Japan to develop a machine translation system for five Asian languages (Bahasa Indonesia, Malay, Thai, Japanese, and Chinese). Since then, we have been actively doing many types of research in the field of statistical machine translation, speech recognition, and speech synthesis which requires many text and speech corpus. Most recent cooperation within ASEAN-IVO is the development of Indonesian ALT (Asian Language Treebank) has added new NLP tools.</abstract>
@@ -11212,7 +11212,7 @@
       <title>Automatic Evaluation of Commonsense Knowledge for Refining <fixed-case>J</fixed-case>apanese <fixed-case>C</fixed-case>oncept<fixed-case>N</fixed-case>et</title>
       <author><first>Seiya</first><last>Shudo</last></author>
       <author><first>Rafal</first><last>Rzepka</last></author>
-      <author><first>Kenji</first><last>Araki</last></author>
+      <author id="kenji-araki"><first>Kenji</first><last>Araki</last></author>
       <pages>105–112</pages>
       <url hash="4604fbfe">W16-5413</url>
       <abstract>In this paper we present two methods for automatic common sense knowledge evaluation for Japanese entries in ConceptNet ontology. Our proposed methods utilize text-mining approach: one with relation clue words and WordNet synonyms, and one without. Both methods were tested with a blog corpus. The system based on our proposed methods reached relatively high precision score for three relations (MadeOf, UsedFor, AtLocation), which is comparable with previous research using commercial search engines and simpler input. We analyze errors and discuss problems of common sense evaluation, both manual and automatic and propose ideas for further improvements.</abstract>
@@ -11223,7 +11223,7 @@
       <author><first>Mohamed</first><last>Al-Badrashiny</last></author>
       <author><first>Abdelati</first><last>Hawwari</last></author>
       <author><first>Mahmoud</first><last>Ghoneim</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>113–122</pages>
       <url hash="a2f6beee">W16-5414</url>
       <abstract>Although MWE are relatively morphologically and syntactically fixed expressions, several types of flexibility can be observed in MWE, verbal MWE in particular. Identifying the degree of morphological and syntactic flexibility of MWE is very important for many Lexicographic and NLP tasks. Adding MWE variants/tokens to a dictionary resource requires characterizing the flexibility among other morphosyntactic features. Carrying out the task manually faces several challenges since it is a very laborious task time and effort wise, as well as it will suffer from coverage limitation. The problem is exacerbated in rich morphological languages where the average word in Arabic could have 12 possible inflection forms. Accordingly, in this paper we introduce a semi-automatic Arabic multiwords expressions resource (SAMER). We propose an automated method that identifies the morphological and syntactic flexibility of Arabic Verbal Multiword Expressions (AVMWE). All observed morphological variants and syntactic pattern alternations of an AVMWE are automatically acquired using large scale corpora. We look for three morphosyntactic aspects of AVMWE types investigating derivational and inflectional variations and syntactic templates, namely: 1) inflectional variation (inflectional paradigm) and calculating degree of flexibility; 2) derivational productivity; and 3) identifying and classifying the different syntactic types. We build a comprehensive list of AVMWE. Every token in the AVMWE list is lemmatized and tagged with POS information. We then search Arabic Gigaword and All ATBs for all possible flexible matches. For each AVMWE type we generate: a) a statistically ranked list of MWE-lexeme inflections and syntactic pattern alternations; b) An abstract syntactic template; and c) The most frequent form. Our technique is validated using a Golden MWE annotated list. The results shows that the quality of the generated resource is 80.04%.</abstract>
@@ -11231,7 +11231,7 @@
     </paper>
     <paper id="15">
       <title>Sentiment Analysis for Low Resource Languages: A Study on Informal <fixed-case>I</fixed-case>ndonesian Tweets</title>
-      <author><first>Tuan Anh</first><last>Le</last></author>
+      <author id="tuan-anh-le"><first>Tuan Anh</first><last>Le</last></author>
       <author><first>David</first><last>Moeljadi</last></author>
       <author><first>Yasuhide</first><last>Miura</last></author>
       <author><first>Tomoko</first><last>Ohkuma</last></author>
@@ -11246,7 +11246,7 @@
       <booktitle>Proceedings of the <fixed-case>INLG</fixed-case> 2016 Workshop on Computational Creativity in Natural Language Generation</booktitle>
       <url hash="a35e75a4">W16-55</url>
       <editor><first>Matthew</first><last>Purver</last></editor>
-      <editor><first>Pablo</first><last>Gervás</last></editor>
+      <editor id="pablo-gervas"><first>Pablo</first><last>Gervás</last></editor>
       <editor><first>Sascha</first><last>Griffiths</last></editor>
       <doi>10.18653/v1/W16-55</doi>
       <publisher>Association for Computational Linguistics</publisher>
@@ -11300,7 +11300,7 @@
     </paper>
     <paper id="5">
       <title>A Challenge to the Third Hoshi Shinichi Award</title>
-      <author><first>Satoshi</first><last>Sato</last></author>
+      <author id="satoshi-sato"><first>Satoshi</first><last>Sato</last></author>
       <pages>31–35</pages>
       <url hash="0b4d119b">W16-5505</url>
       <doi>10.18653/v1/W16-5505</doi>
@@ -11339,7 +11339,7 @@
     </paper>
     <paper id="9">
       <title>Combinatorics vs Grammar: Archeology of Computational Poetry in Tape Mark <fixed-case>I</fixed-case></title>
-      <author><first>Alessandro</first><last>Mazzei</last></author>
+      <author id="alessandro-mazzei"><first>Alessandro</first><last>Mazzei</last></author>
       <author><first>Andrea</first><last>Valle</last></author>
       <pages>61–70</pages>
       <url hash="d32aab8b">W16-5509</url>
@@ -11357,7 +11357,7 @@
       <editor><first>Dirk</first><last>Hovy</last></editor>
       <editor><first>David</first><last>Jurgens</last></editor>
       <editor><first>Brendan</first><last>O’Connor</last></editor>
-      <editor><first>Alice</first><last>Oh</last></editor>
+      <editor id="alice-oh"><first>Alice</first><last>Oh</last></editor>
       <editor><first>Oren</first><last>Tsur</last></editor>
       <editor><first>Svitlana</first><last>Volkova</last></editor>
       <doi>10.18653/v1/W16-56</doi>
@@ -11374,7 +11374,7 @@
     <paper id="1">
       <title>Relating semantic similarity and semantic association to how humans label other people</title>
       <author><first>Kenneth</first><last>Joseph</last></author>
-      <author><first>Kathleen M.</first><last>Carley</last></author>
+      <author id="kathleen-m-carley"><first>Kathleen M.</first><last>Carley</last></author>
       <pages>1–10</pages>
       <url hash="edb98cde">W16-5601</url>
       <doi>10.18653/v1/W16-5601</doi>
@@ -11401,7 +11401,7 @@
     <paper id="4">
       <title>Social Proof: The Impact of Author Traits on Influence Detection</title>
       <author><first>Sara</first><last>Rosenthal</last></author>
-      <author><first>Kathy</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathy</first><last>McKeown</last></author>
       <pages>27–36</pages>
       <url hash="7e6c860a">W16-5604</url>
       <doi>10.18653/v1/W16-5604</doi>
@@ -11419,7 +11419,7 @@
       <title>User profiling with geo-located posts and demographic data</title>
       <author><first>Adam</first><last>Poulston</last></author>
       <author><first>Mark</first><last>Stevenson</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
       <pages>43–48</pages>
       <url hash="c59f3d5a">W16-5606</url>
       <doi>10.18653/v1/W16-5606</doi>
@@ -11448,7 +11448,7 @@
     </paper>
     <paper id="9">
       <title>Identifying Stance by Analyzing Political Discourse on <fixed-case>T</fixed-case>witter</title>
-      <author><first>Kristen</first><last>Johnson</last></author>
+      <author id="kristen-johnson"><first>Kristen</first><last>Johnson</last></author>
       <author><first>Dan</first><last>Goldwasser</last></author>
       <pages>66–75</pages>
       <url hash="426d0425">W16-5609</url>
@@ -11467,9 +11467,9 @@
     </paper>
     <paper id="11">
       <title>The Effects of Data Collection Methods in <fixed-case>T</fixed-case>witter</title>
-      <author><first>Sunghwan Mac</first><last>Kim</last></author>
+      <author id="sunghwan-mac-kim"><first>Sunghwan Mac</first><last>Kim</last></author>
       <author><first>Stephen</first><last>Wan</last></author>
-      <author><first>Cécile</first><last>Paris</last></author>
+      <author id="cecile-paris"><first>Cécile</first><last>Paris</last></author>
       <author><first>Brian</first><last>Jin</last></author>
       <author><first>Bella</first><last>Robinson</last></author>
       <pages>86–91</pages>
@@ -11510,7 +11510,7 @@
       <title>Bag of What? Simple Noun Phrase Extraction for Text Analysis</title>
       <author><first>Abram</first><last>Handler</last></author>
       <author><first>Matthew</first><last>Denny</last></author>
-      <author><first>Hanna</first><last>Wallach</last></author>
+      <author id="hanna-wallach"><first>Hanna</first><last>Wallach</last></author>
       <author><first>Brendan</first><last>O’Connor</last></author>
       <pages>114–124</pages>
       <url hash="232af62c">W16-5615</url>
@@ -11529,8 +11529,8 @@
       <title>The Clinical Panel: Leveraging Psychological Expertise During <fixed-case>NLP</fixed-case> Research</title>
       <author><first>Glen</first><last>Coppersmith</last></author>
       <author><first>Kristy</first><last>Hollingshead</last></author>
-      <author><first>H. Andrew</first><last>Schwartz</last></author>
-      <author><first>Molly</first><last>Ireland</last></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last></author>
+      <author id="molly-ireland"><first>Molly</first><last>Ireland</last></author>
       <author><first>Rebecca</first><last>Resnik</last></author>
       <author><first>Kate</first><last>Loveys</last></author>
       <author><first>April</first><last>Foreman</last></author>
@@ -11542,7 +11542,7 @@
     </paper>
     <paper id="18">
       <title>Are You a Racist or Am <fixed-case>I</fixed-case> Seeing Things? Annotator Influence on Hate Speech Detection on <fixed-case>T</fixed-case>witter</title>
-      <author><first>Zeerak</first><last>Waseem</last></author>
+      <author id="zeerak-talat"><first>Zeerak</first><last>Waseem</last></author>
       <pages>138–142</pages>
       <url hash="4f848ee6">W16-5618</url>
       <doi>10.18653/v1/W16-5618</doi>
@@ -11551,8 +11551,8 @@
     <paper id="19">
       <title>Disentangling Topic Models: A Cross-cultural Analysis of Personal Values through Words</title>
       <author><first>Steven</first><last>Wilson</last></author>
-      <author><first>Rada</first><last>Mihalcea</last></author>
-      <author><first>Ryan</first><last>Boyd</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
+      <author id="ryan-boyd"><first>Ryan</first><last>Boyd</last></author>
       <author><first>James</first><last>Pennebaker</last></author>
       <pages>143–152</pages>
       <url hash="42755f91">W16-5619</url>
@@ -11622,7 +11622,7 @@
     <paper id="5">
       <title>Automatic Identification of Narrative Diegesis and Point of View</title>
       <author><first>Joshua</first><last>Eisenberg</last></author>
-      <author><first>Mark</first><last>Finlayson</last></author>
+      <author id="mark-finlayson"><first>Mark</first><last>Finlayson</last></author>
       <pages>36–46</pages>
       <url hash="7976ff3c">W16-5705</url>
       <doi>10.18653/v1/W16-5705</doi>
@@ -11632,7 +11632,7 @@
       <title>Richer Event Description: Integrating event coreference with temporal, causal and bridging annotation</title>
       <author><first>Tim</first><last>O’Gorman</last></author>
       <author><first>Kristin</first><last>Wright-Bettner</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>47–56</pages>
       <url hash="a92aa888">W16-5706</url>
       <doi>10.18653/v1/W16-5706</doi>
@@ -11640,7 +11640,7 @@
     </paper>
     <paper id="7">
       <title><fixed-case>NASTEA</fixed-case>: Investigating Narrative Schemas through Annotated Entities</title>
-      <author><first>Dan</first><last>Simonson</last></author>
+      <author id="dan-simonson"><first>Dan</first><last>Simonson</last></author>
       <author><first>Anthony</first><last>Davis</last></author>
       <pages>57–66</pages>
       <url hash="815b5e71">W16-5707</url>
@@ -11661,10 +11661,10 @@
     <meta>
       <booktitle>Proceedings of the Second Workshop on Computational Approaches to Code Switching</booktitle>
       <url hash="cacefc0d">W16-58</url>
-      <editor><first>Mona</first><last>Diab</last></editor>
+      <editor id="mona-diab"><first>Mona</first><last>Diab</last></editor>
       <editor><first>Pascale</first><last>Fung</last></editor>
       <editor><first>Mahmoud</first><last>Ghoneim</last></editor>
-      <editor><first>Julia</first><last>Hirschberg</last></editor>
+      <editor id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></editor>
       <editor><first>Thamar</first><last>Solorio</last></editor>
       <doi>10.18653/v1/W16-58</doi>
       <publisher>Association for Computational Linguistics</publisher>
@@ -11679,7 +11679,7 @@
     </frontmatter>
     <paper id="1">
       <title>Challenges of Computational Processing of Code-Switching</title>
-      <author><first>Özlem</first><last>Çetinoğlu</last></author>
+      <author id="ozlem-cetinoglu"><first>Özlem</first><last>Çetinoğlu</last></author>
       <author><first>Sarah</first><last>Schulz</last></author>
       <author><first>Ngoc Thang</first><last>Vu</last></author>
       <pages>1–11</pages>
@@ -11689,9 +11689,9 @@
     </paper>
     <paper id="2">
       <title>Simple Tools for Exploring Variation in Code-switching for Linguists</title>
-      <author><first>Gualberto A.</first><last>Guzman</last></author>
+      <author id="gualberto-a-guzman"><first>Gualberto A.</first><last>Guzman</last></author>
       <author><first>Jacqueline</first><last>Serigos</last></author>
-      <author><first>Barbara E.</first><last>Bullock</last></author>
+      <author id="barbara-bullock"><first>Barbara E.</first><last>Bullock</last></author>
       <author><first>Almeida Jacqueline</first><last>Toribio</last></author>
       <pages>12–20</pages>
       <url hash="c122bfa1">W16-5802</url>
@@ -11703,7 +11703,7 @@
       <title>Word-Level Language Identification and Predicting Codeswitching Points in <fixed-case>S</fixed-case>wahili-<fixed-case>E</fixed-case>nglish Language Data</title>
       <author><first>Mario</first><last>Piergallini</last></author>
       <author><first>Rouzbeh</first><last>Shirvani</last></author>
-      <author><first>Gauri</first><last>S. Gautam</last></author>
+      <author id="gauri-shankar-gautam"><first>Gauri</first><last>S. Gautam</last></author>
       <author><first>Mohamed</first><last>Chouikha</last></author>
       <pages>21–29</pages>
       <url hash="ad8ed588">W16-5803</url>
@@ -11752,8 +11752,8 @@
       <title>A Neural Model for Language Identification in Code-Switched Tweets</title>
       <author><first>Aaron</first><last>Jaech</last></author>
       <author><first>George</first><last>Mulcaire</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>60–64</pages>
       <url hash="7316dd9b">W16-5807</url>
       <doi>10.18653/v1/W16-5807</doi>
@@ -11773,7 +11773,7 @@
     <paper id="9">
       <title>Accurate <fixed-case>P</fixed-case>inyin-<fixed-case>E</fixed-case>nglish Codeswitched Language Identification</title>
       <author><first>Meng Xuan</first><last>Xia</last></author>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <pages>71–79</pages>
       <url hash="01c0af26">W16-5809</url>
       <doi>10.18653/v1/W16-5809</doi>
@@ -11840,7 +11840,7 @@
       <title>The <fixed-case>H</fixed-case>oward <fixed-case>U</fixed-case>niversity System Submission for the Shared Task in Language Identification in <fixed-case>S</fixed-case>panish-<fixed-case>E</fixed-case>nglish Codeswitching</title>
       <author><first>Rouzbeh</first><last>Shirvani</last></author>
       <author><first>Mario</first><last>Piergallini</last></author>
-      <author><first>Gauri Shankar</first><last>Gautam</last></author>
+      <author id="gauri-shankar-gautam"><first>Gauri Shankar</first><last>Gautam</last></author>
       <author><first>Mohamed</first><last>Chouikha</last></author>
       <pages>116–120</pages>
       <url hash="1ca58796">W16-5815</url>
@@ -11849,7 +11849,7 @@
     </paper>
     <paper id="16">
       <title>Codeswitching Detection via Lexical Features in Conditional Random Fields</title>
-      <author><first>Prajwol</first><last>Shrestha</last></author>
+      <author id="prajwol-shrestha"><first>Prajwol</first><last>Shrestha</last></author>
       <pages>121–126</pages>
       <url hash="c269e0d6">W16-5816</url>
       <doi>10.18653/v1/W16-5816</doi>
@@ -11857,8 +11857,8 @@
     </paper>
     <paper id="17">
       <title>Language Identification in Code-Switched Text Using Conditional Random Fields and <fixed-case>B</fixed-case>abelnet</title>
-      <author><first>Utpal Kumar</first><last>Sikdar</last></author>
-      <author><first>Björn</first><last>Gambäck</last></author>
+      <author id="utpal-kumar-sikdar"><first>Utpal Kumar</first><last>Sikdar</last></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gambäck</last></author>
       <pages>127–131</pages>
       <url hash="f41d5a44">W16-5817</url>
       <doi>10.18653/v1/W16-5817</doi>
@@ -11879,7 +11879,7 @@
       <url hash="d15c6610">W16-59</url>
       <editor><first>Kai-Wei</first><last>Chang</last></editor>
       <editor><first>Ming-Wei</first><last>Chang</last></editor>
-      <editor><first>Alexander</first><last>Rush</last></editor>
+      <editor id="alexander-m-rush"><first>Alexander</first><last>Rush</last></editor>
       <editor><first>Vivek</first><last>Srikumar</last></editor>
       <doi>10.18653/v1/W16-59</doi>
       <publisher>Association for Computational Linguistics</publisher>
@@ -11894,7 +11894,7 @@
     </frontmatter>
     <paper id="1">
       <title>Inside-Outside and Forward-Backward Algorithms Are Just Backprop (tutorial paper)</title>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>1–17</pages>
       <url hash="357325c1">W16-5901</url>
       <doi>10.18653/v1/W16-5901</doi>
@@ -11903,7 +11903,7 @@
     <paper id="2">
       <title>Research on attention memory networks as a model for learning natural language inference</title>
       <author><first>Zhuang</first><last>Liu</last></author>
-      <author><first>Degen</first><last>Huang</last></author>
+      <author id="degen-huang"><first>Degen</first><last>Huang</last></author>
       <author><first>Jing</first><last>Zhang</last></author>
       <author><first>Kaiyu</first><last>Huang</last></author>
       <pages>18–24</pages>
@@ -11923,7 +11923,7 @@
     <paper id="4">
       <title>Posterior regularization for Joint Modeling of Multiple Structured Prediction Tasks with Soft Constraints</title>
       <author><first>Kartik</first><last>Goyal</last></author>
-      <author><first>Chris</first><last>Dyer</last></author>
+      <author id="chris-dyer"><first>Chris</first><last>Dyer</last></author>
       <pages>35–43</pages>
       <url hash="06141b17">W16-5904</url>
       <doi>10.18653/v1/W16-5904</doi>
@@ -11942,7 +11942,7 @@
     <paper id="6">
       <title>Introducing <fixed-case>DRAIL</fixed-case> – a Step Towards Declarative Deep Relational Learning</title>
       <author><first>Xiao</first><last>Zhang</last></author>
-      <author><first>Maria Leonor</first><last>Pacheco</last></author>
+      <author id="maria-leonor-pacheco"><first>Maria Leonor</first><last>Pacheco</last></author>
       <author><first>Chang</first><last>Li</last></author>
       <author><first>Dan</first><last>Goldwasser</last></author>
       <pages>54–62</pages>
@@ -11952,7 +11952,7 @@
     </paper>
     <paper id="7">
       <title>Unsupervised Neural Hidden <fixed-case>M</fixed-case>arkov Models</title>
-      <author><first>Ke M.</first><last>Tran</last></author>
+      <author id="ke-m-tran"><first>Ke M.</first><last>Tran</last></author>
       <author><first>Yonatan</first><last>Bisk</last></author>
       <author><first>Ashish</first><last>Vaswani</last></author>
       <author><first>Daniel</first><last>Marcu</last></author>
@@ -11969,9 +11969,9 @@
       <url hash="f06a93e7">W16-60</url>
       <editor><first>Annie</first><last>Louis</last></editor>
       <editor><first>Michael</first><last>Roth</last></editor>
-      <editor><first>Bonnie</first><last>Webber</last></editor>
-      <editor><first>Michael</first><last>White</last></editor>
-      <editor><first>Luke</first><last>Zettlemoyer</last></editor>
+      <editor id="bonnie-webber"><first>Bonnie</first><last>Webber</last></editor>
+      <editor id="michael-white"><first>Michael</first><last>White</last></editor>
+      <editor id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></editor>
       <doi>10.18653/v1/W16-60</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Austin, TX</address>
@@ -11986,7 +11986,7 @@
     <paper id="1">
       <title>An Analysis of Prerequisite Skills for Reading Comprehension</title>
       <author><first>Saku</first><last>Sugawara</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>1–5</pages>
       <url hash="efcecdd5">W16-6001</url>
       <doi>10.18653/v1/W16-6001</doi>
@@ -12005,7 +12005,7 @@
     <paper id="3">
       <title>Statistical Script Learning with Recurrent Neural Networks</title>
       <author><first>Karl</first><last>Pichotta</last></author>
-      <author><first>Raymond</first><last>Mooney</last></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last></author>
       <pages>11–16</pages>
       <url hash="c5558460">W16-6003</url>
       <doi>10.18653/v1/W16-6003</doi>
@@ -12025,7 +12025,7 @@
     <paper id="5">
       <title>Unsupervised Event Coreference for Abstract Words</title>
       <author><first>Dheeraj</first><last>Rajagopal</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <author><first>Teruko</first><last>Mitamura</last></author>
       <pages>22–26</pages>
       <url hash="90c6f84c">W16-6005</url>
@@ -12035,7 +12035,7 @@
     <paper id="6">
       <title>Towards Broad-coverage Meaning Representation: The Case of Comparison Structures</title>
       <author><first>Omid</first><last>Bakhshandeh</last></author>
-      <author><first>James</first><last>Allen</last></author>
+      <author id="james-allen"><first>James</first><last>Allen</last></author>
       <pages>27–31</pages>
       <url hash="406b8258">W16-6006</url>
       <doi>10.18653/v1/W16-6006</doi>
@@ -12053,7 +12053,7 @@
     </paper>
     <paper id="8">
       <title><fixed-case>C</fixed-case>2<fixed-case>D</fixed-case>2<fixed-case>E</fixed-case>2: Using Call Centers to Motivate the Use of Dialog and Diarization in Entity Extraction</title>
-      <author><first>Ken</first><last>Church</last></author>
+      <author id="kenneth-church"><first>Ken</first><last>Church</last></author>
       <author><first>Weizhong</first><last>Zhu</last></author>
       <author><first>Jason</first><last>Pelecanos</last></author>
       <pages>35–38</pages>
@@ -12065,7 +12065,7 @@
       <title>Visualizing the Content of a Children’s Story in a Virtual World: Lessons Learned</title>
       <author><first>Quynh Ngoc Thi</first><last>Do</last></author>
       <author><first>Steven</first><last>Bethard</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>39–42</pages>
       <url hash="fb02fe81">W16-6009</url>
       <doi>10.18653/v1/W16-6009</doi>
@@ -12074,7 +12074,7 @@
     <paper id="10">
       <title>Stylistic Transfer in Natural Language Generation Systems Using Recurrent Neural Networks</title>
       <author><first>Jad</first><last>Kabbara</last></author>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <pages>43–47</pages>
       <url hash="0fb03252">W16-6010</url>
       <doi>10.18653/v1/W16-6010</doi>
@@ -12107,9 +12107,9 @@
       <booktitle>Proceedings of the Seventh International Workshop on Health Text Mining and Information Analysis</booktitle>
       <url hash="f8e89075">W16-61</url>
       <editor><first>Cyril</first><last>Grouin</last></editor>
-      <editor><first>Thierry</first><last>Hamon</last></editor>
-      <editor><first>Aurélie</first><last>Névéol</last></editor>
-      <editor><first>Pierre</first><last>Zweigenbaum</last></editor>
+      <editor id="thierry-hamon"><first>Thierry</first><last>Hamon</last></editor>
+      <editor id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></editor>
+      <editor id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></editor>
       <doi>10.18653/v1/W16-61</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Auxtin, TX</address>
@@ -12179,8 +12179,8 @@
     <paper id="6">
       <title>Retrofitting Word Vectors of <fixed-case>M</fixed-case>e<fixed-case>SH</fixed-case> Terms to Improve Semantic Similarity Measures</title>
       <author><first>Zhiguo</first><last>Yu</last></author>
-      <author><first>Trevor</first><last>Cohen</last></author>
-      <author><first>Byron</first><last>Wallace</last></author>
+      <author id="trevor-cohen"><first>Trevor</first><last>Cohen</last></author>
+      <author id="byron-c-wallace"><first>Byron</first><last>Wallace</last></author>
       <author><first>Elmer</first><last>Bernstam</last></author>
       <author><first>Todd</first><last>Johnson</last></author>
       <pages>43–51</pages>
@@ -12210,7 +12210,7 @@
     <paper id="9">
       <title>Citation Analysis with Neural Attention Models</title>
       <author><first>Tsendsuren</first><last>Munkhdalai</last></author>
-      <author><first>John P.</first><last>Lalor</last></author>
+      <author id="john-p-lalor"><first>John P.</first><last>Lalor</last></author>
       <author><first>Hong</first><last>Yu</last></author>
       <pages>69–77</pages>
       <url hash="208dd615">W16-6109</url>
@@ -12220,7 +12220,7 @@
     <paper id="10">
       <title>Replicability of Research in Biomedical Natural Language Processing: a pilot evaluation for a coding task</title>
       <author><first>Aurélie</first><last>Névéol</last></author>
-      <author><first>Kevin</first><last>Cohen</last></author>
+      <author id="k-bretonnel-cohen"><first>Kevin</first><last>Cohen</last></author>
       <author><first>Cyril</first><last>Grouin</last></author>
       <author><first>Aude</first><last>Robert</last></author>
       <pages>78–84</pages>
@@ -12239,8 +12239,8 @@
     <paper id="12">
       <title>Leveraging coreference to identify arms in medical abstracts: An experimental study</title>
       <author><first>Elisa</first><last>Ferracane</last></author>
-      <author><first>Iain</first><last>Marshall</last></author>
-      <author><first>Byron C.</first><last>Wallace</last></author>
+      <author id="iain-marshall"><first>Iain</first><last>Marshall</last></author>
+      <author id="byron-c-wallace"><first>Byron C.</first><last>Wallace</last></author>
       <author><first>Katrin</first><last>Erk</last></author>
       <pages>86–95</pages>
       <url hash="cd3fab3a">W16-6112</url>
@@ -12259,7 +12259,7 @@
     </paper>
     <paper id="14">
       <title>Exploring Query Expansion for Entity Searches in <fixed-case>P</fixed-case>ub<fixed-case>M</fixed-case>ed</title>
-      <author><first>Chung-Chi</first><last>Huang</last></author>
+      <author id="chung-chi-huang"><first>Chung-Chi</first><last>Huang</last></author>
       <author><first>Zhiyong</first><last>Lu</last></author>
       <pages>106–112</pages>
       <url hash="84f19b6e">W16-6114</url>
@@ -12288,10 +12288,10 @@
     <paper id="1">
       <title>Identifying and Categorizing Disaster-Related Tweets</title>
       <author><first>Kevin</first><last>Stowe</last></author>
-      <author><first>Michael J.</first><last>Paul</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="michael-paul"><first>Michael J.</first><last>Paul</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Leysia</first><last>Palen</last></author>
-      <author><first>Kenneth</first><last>Anderson</last></author>
+      <author id="kenneth-m-anderson"><first>Kenneth</first><last>Anderson</last></author>
       <pages>1–6</pages>
       <url hash="ddcb63d4">W16-6201</url>
       <doi>10.18653/v1/W16-6201</doi>
@@ -12344,9 +12344,9 @@
     </paper>
     <paper id="6">
       <title>Detecting Social Roles in <fixed-case>T</fixed-case>witter</title>
-      <author><first>Sunghwan Mac</first><last>Kim</last></author>
+      <author id="sunghwan-mac-kim"><first>Sunghwan Mac</first><last>Kim</last></author>
       <author><first>Stephen</first><last>Wan</last></author>
-      <author><first>Cécile</first><last>Paris</last></author>
+      <author id="cecile-paris"><first>Cécile</first><last>Paris</last></author>
       <pages>34–40</pages>
       <url hash="ed0985a7">W16-6206</url>
       <doi>10.18653/v1/W16-6206</doi>
@@ -12354,7 +12354,7 @@
     </paper>
     <paper id="7">
       <title>Identifying Sensible Participants in Online Discussions</title>
-      <author><first>Siddharth</first><last>Jain</last></author>
+      <author id="siddharth-jain"><first>Siddharth</first><last>Jain</last></author>
       <pages>41–47</pages>
       <url hash="445a36d5">W16-6207</url>
       <doi>10.18653/v1/W16-6207</doi>
@@ -12363,9 +12363,9 @@
     <paper id="8">
       <title>emoji2vec: Learning Emoji Representations from their Description</title>
       <author><first>Ben</first><last>Eisner</last></author>
-      <author><first>Tim</first><last>Rocktäschel</last></author>
+      <author id="tim-rocktaschel"><first>Tim</first><last>Rocktäschel</last></author>
       <author><first>Isabelle</first><last>Augenstein</last></author>
-      <author><first>Matko</first><last>Bošnjak</last></author>
+      <author id="matko-bosnjak"><first>Matko</first><last>Bošnjak</last></author>
       <author><first>Sebastian</first><last>Riedel</last></author>
       <pages>48–54</pages>
       <url hash="d4a5b199">W16-6208</url>
@@ -12376,7 +12376,7 @@
       <title>Learning Latent Local Conversation Modes for Predicting Comment Endorsement in Online Discussions</title>
       <author><first>Hao</first><last>Fang</last></author>
       <author><first>Hao</first><last>Cheng</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
       <pages>55–64</pages>
       <url hash="ac270412">W16-6209</url>
       <doi>10.18653/v1/W16-6209</doi>
@@ -12399,7 +12399,7 @@
       <author><first>Yu-Lun</first><last>Hsieh</last></author>
       <author><first>Yung-Chun</first><last>Chang</last></author>
       <author><first>Chun-Han</first><last>Chu</last></author>
-      <author><first>Wen-Lian</first><last>Hsu</last></author>
+      <author id="wen-lian-hsu"><first>Wen-Lian</first><last>Hsu</last></author>
       <pages>74–83</pages>
       <url hash="cf319b66">W16-6211</url>
       <doi>10.18653/v1/W16-6211</doi>
@@ -12410,8 +12410,8 @@
       <author><first>Aaron</first><last>Jaech</last></author>
       <author><first>George</first><last>Mulcaire</last></author>
       <author><first>Shobhit</first><last>Hathi</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>84–93</pages>
       <url hash="3b5da3b3">W16-6212</url>
       <doi>10.18653/v1/W16-6212</doi>
@@ -12421,7 +12421,7 @@
     <paper id="13">
       <title>Human versus Machine Attention in Document Classification: A Dataset with Crowdsourced Annotations</title>
       <author><first>Nikolaos</first><last>Pappas</last></author>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <pages>94–100</pages>
       <url hash="c93f62af">W16-6213</url>
       <doi>10.18653/v1/W16-6213</doi>
@@ -12432,9 +12432,9 @@
     <meta>
       <booktitle>Proceedings of the 13th International Conference on Natural Language Processing</booktitle>
       <url hash="41519f1d">W16-63</url>
-      <editor><first>Dipti Misra</first><last>Sharma</last></editor>
+      <editor id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></editor>
       <editor><first>Rajeev</first><last>Sangal</last></editor>
-      <editor><first>Anil Kumar</first><last>Singh</last></editor>
+      <editor id="anil-kumar-singh"><first>Anil Kumar</first><last>Singh</last></editor>
       <publisher>NLP Association of India</publisher>
       <address>Varanasi, India</address>
       <month>December</month>
@@ -12455,7 +12455,7 @@
     <paper id="2">
       <title>Integrating <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et for Multiple Sense Embeddings in Vector Semantics</title>
       <author><first>David</first><last>Foley</last></author>
-      <author><first>Jugal</first><last>Kalita</last></author>
+      <author id="jugal-kalita"><first>Jugal</first><last>Kalita</last></author>
       <pages>2–9</pages>
       <url hash="c5506d00">W16-6302</url>
       <bibkey>foley-kalita-2016-integrating</bibkey>
@@ -12465,7 +12465,7 @@
       <author><first>Debajyoty</first><last>Banik</last></author>
       <author><first>Sukanta</first><last>Sen</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>10–19</pages>
       <url hash="969a7a03">W16-6303</url>
       <bibkey>banik-etal-2016-smt</bibkey>
@@ -12473,7 +12473,7 @@
     <paper id="4">
       <title>Composition of Compound Nouns Using Distributional Semantics</title>
       <author><first>Kyra</first><last>Yee</last></author>
-      <author><first>Jugal</first><last>Kalita</last></author>
+      <author id="jugal-kalita"><first>Jugal</first><last>Kalita</last></author>
       <pages>20–29</pages>
       <url hash="f9b82938">W16-6304</url>
       <bibkey>yee-kalita-2016-composition</bibkey>
@@ -12499,7 +12499,7 @@
       <title>Sentence Based Discourse Classification for <fixed-case>H</fixed-case>indi Story Text-to-Speech (<fixed-case>TTS</fixed-case>) System</title>
       <author><first>Kumud</first><last>Tripathi</last></author>
       <author><first>Parakrant</first><last>Sarkar</last></author>
-      <author><first>K. Sreenivasa</first><last>Rao</last></author>
+      <author id="k-sreenivasa-rao"><first>K. Sreenivasa</first><last>Rao</last></author>
       <pages>46–54</pages>
       <url hash="0a5aa3a7">W16-6307</url>
       <bibkey>tripathi-etal-2016-sentence</bibkey>
@@ -12508,7 +12508,7 @@
       <title>Biomolecular Event Extraction using a Stacked Generalization based Classifier</title>
       <author><first>Amit</first><last>Majumder</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Sudip Kumar</first><last>Naskar</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last></author>
       <pages>55–64</pages>
       <url hash="b770e6d4">W16-6308</url>
       <bibkey>majumder-etal-2016-biomolecular</bibkey>
@@ -12516,14 +12516,14 @@
     <paper id="9">
       <title>Syntax and Pragmatics of Conversation: A Case of <fixed-case>B</fixed-case>angla</title>
       <author><first>Samir</first><last>Karmakar</last></author>
-      <author><first>Soumya Sankar</first><last>Ghosh</last></author>
+      <author id="soumya-sankar-ghosh"><first>Soumya Sankar</first><last>Ghosh</last></author>
       <pages>65–70</pages>
       <url hash="f3b95e78">W16-6309</url>
       <bibkey>karmakar-ghosh-2016-syntax</bibkey>
     </paper>
     <paper id="10">
       <title>Dependency grammars as Haskell programs</title>
-      <author><first>Tomasz</first><last>Obrębski</last></author>
+      <author id="tomasz-obrebski"><first>Tomasz</first><last>Obrębski</last></author>
       <pages>71–80</pages>
       <url hash="1446f3ef">W16-6310</url>
       <bibkey>obrebski-2016-dependency</bibkey>
@@ -12535,7 +12535,7 @@
       <author><first>Sabyasachi</first><last>Kamila</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
       <author><first>Sriparna</first><last>Saha</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>81–89</pages>
       <url hash="4192141a">W16-6311</url>
       <bibkey>kumar-etal-2016-improving</bibkey>
@@ -12559,7 +12559,7 @@
     </paper>
     <paper id="14">
       <title>Constraint Grammar-based conversion of Dependency Treebanks</title>
-      <author><first>Eckhard</first><last>Bick</last></author>
+      <author id="eckhard-bick"><first>Eckhard</first><last>Bick</last></author>
       <pages>109–114</pages>
       <url hash="cf84f494">W16-6314</url>
       <bibkey>bick-2016-constraint</bibkey>
@@ -12568,7 +12568,7 @@
       <title>Meaning Matters: Senses of Words are More Informative than Words for Cross-domain Sentiment Analysis</title>
       <author><first>Raksha</first><last>Sharma</last></author>
       <author><first>Sudha</first><last>Bhingardive</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>115–119</pages>
       <url hash="8dfefafe">W16-6315</url>
       <bibkey>sharma-etal-2016-meaning</bibkey>
@@ -12576,7 +12576,7 @@
     <paper id="16">
       <title><fixed-case>POS</fixed-case> Tagging Experts via Topic Modeling</title>
       <author><first>Atreyee</first><last>Mukherjee</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <author><first>Matthias</first><last>Scheutz</last></author>
       <pages>120–128</pages>
       <url hash="79cd3a1b">W16-6316</url>
@@ -12586,7 +12586,7 @@
       <title>Graph theoretic interpretation of <fixed-case>B</fixed-case>angla traditional grammar</title>
       <author><first>Samir</first><last>Karmakar</last></author>
       <author><first>Sayantani</first><last>Banerjee</last></author>
-      <author><first>Soumya</first><last>Ghosh</last></author>
+      <author id="soumya-sankar-ghosh"><first>Soumya</first><last>Ghosh</last></author>
       <pages>129–136</pages>
       <url hash="c941e057">W16-6317</url>
       <bibkey>karmakar-etal-2016-graph</bibkey>
@@ -12613,7 +12613,7 @@
       <author><first>Shreenivas</first><last>Bharadwaj</last></author>
       <author><first>Monik</first><last>Pamecha</last></author>
       <author><first>Ameya</first><last>Prabhu</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>154–160</pages>
       <url hash="064e837a">W16-6320</url>
       <bibkey>athavale-etal-2016-towards</bibkey>
@@ -12622,7 +12622,7 @@
       <title><fixed-case>V</fixed-case>aidya: A Spoken Dialog System for Health Domain</title>
       <author><first>Prathyusha</first><last>Danda</last></author>
       <author><first>Brij Mohan Lal</first><last>Srivastava</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>161–166</pages>
       <url hash="284394fd">W16-6321</url>
       <bibkey>danda-etal-2016-vaidya</bibkey>
@@ -12639,7 +12639,7 @@
     </paper>
     <paper id="23">
       <title>Keynote Lecture 2: Neural (and other Machine Learning) Approaches to Text Normalization</title>
-      <author><first>Richard</first><last>Sproat</last></author>
+      <author id="richard-sproat"><first>Richard</first><last>Sproat</last></author>
       <pages>177</pages>
       <url hash="a22055d1">W16-6323</url>
       <bibkey>sproat-2016-keynote</bibkey>
@@ -12647,7 +12647,7 @@
     <paper id="24">
       <title>Wisdom of Students: A Consistent Automatic Short Answer Grading Technique</title>
       <author><first>Shourya</first><last>Roy</last></author>
-      <author><first>Sandipan</first><last>Dandapat</last></author>
+      <author id="sandipan-dandapat"><first>Sandipan</first><last>Dandapat</last></author>
       <author><first>Ajay</first><last>Nagesh</last></author>
       <author><first>Y.</first><last>Narahari</last></author>
       <pages>178–187</pages>
@@ -12657,18 +12657,18 @@
     <paper id="25">
       <title>A Recurrent Neural Network Architecture for De-identifying Clinical Records</title>
       <author><first/><last>Shweta</last></author>
-      <author><first>Ankit</first><last>Kumar</last></author>
+      <author id="ankit-srivastava"><first>Ankit</first><last>Kumar</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
       <author><first>Sriparna</first><last>Saha</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>188–197</pages>
       <url hash="39566261">W16-6325</url>
       <bibkey>shweta-etal-2016-recurrent</bibkey>
     </paper>
     <paper id="26">
       <title><fixed-case>T</fixed-case>witter Named Entity Extraction and Linking Using Differential Evolution</title>
-      <author><first>Utpal Kumar</first><last>Sikdar</last></author>
-      <author><first>Björn</first><last>Gambäck</last></author>
+      <author id="utpal-kumar-sikdar"><first>Utpal Kumar</first><last>Sikdar</last></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gambäck</last></author>
       <pages>198–207</pages>
       <url hash="531dba80">W16-6326</url>
       <bibkey>sikdar-gamback-2016-twitter</bibkey>
@@ -12685,7 +12685,7 @@
       <title>A Computational Analysis of <fixed-case>M</fixed-case>ahabharata</title>
       <author><first>Debarati</first><last>Das</last></author>
       <author><first>Bhaskarjyoti</first><last>Das</last></author>
-      <author><first>Kavi</first><last>Mahesh</last></author>
+      <author id="kavi-mahesh"><first>Kavi</first><last>Mahesh</last></author>
       <pages>219–228</pages>
       <url hash="7fc461c2">W16-6328</url>
       <bibkey>das-etal-2016-computational</bibkey>
@@ -12693,14 +12693,14 @@
     <paper id="29">
       <title>Use of Features for Accentuation of ghañanta Words</title>
       <author><first>Samir Janardan</first><last>Sohoni</last></author>
-      <author><first>Malhar A.</first><last>Kulkarni</last></author>
+      <author id="malhar-kulkarni"><first>Malhar A.</first><last>Kulkarni</last></author>
       <pages>229–238</pages>
       <url hash="cf452445">W16-6329</url>
       <bibkey>sohoni-kulkarni-2016-use</bibkey>
     </paper>
     <paper id="30">
       <title>Learning to Identify Subjective Sentences</title>
-      <author><first>Girish K.</first><last>Palshikar</last></author>
+      <author id="girish-palshikar"><first>Girish K.</first><last>Palshikar</last></author>
       <author><first>Manoj</first><last>Apte</last></author>
       <author><first>Deepak</first><last>Pandita</last></author>
       <author><first>Vikram</first><last>Singh</last></author>
@@ -12710,10 +12710,10 @@
     </paper>
     <paper id="31">
       <title>Opinion Mining in a Code-Mixed Environment: A Case Study with Government Portals</title>
-      <author><first>Deepak</first><last>Gupta</last></author>
+      <author id="deepak-gupta"><first>Deepak</first><last>Gupta</last></author>
       <author><first>Ankit</first><last>Lamba</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>249–258</pages>
       <url hash="5ce15077">W16-6331</url>
       <bibkey>gupta-etal-2016-opinion</bibkey>
@@ -12721,7 +12721,7 @@
     <paper id="32">
       <title>Use of Semantic Knowledge Base for Enhancement of Coherence of Code-mixed Topic-Based Aspect Clusters</title>
       <author><first>Kavita</first><last>Asnani</last></author>
-      <author><first>Jyoti D</first><last>Pawar</last></author>
+      <author id="jyoti-pawar"><first>Jyoti D</first><last>Pawar</last></author>
       <pages>259–266</pages>
       <url hash="7730f610">W16-6332</url>
       <bibkey>asnani-pawar-2016-use</bibkey>
@@ -12729,7 +12729,7 @@
     <paper id="33">
       <title>Genetic Algorithm (<fixed-case>GA</fixed-case>) Implementation for Feature Selection in <fixed-case>M</fixed-case>anipuri <fixed-case>POS</fixed-case> Tagging</title>
       <author><first>Kishorjit</first><last>Nongmeikapam</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>267–274</pages>
       <url hash="d9f115a2">W16-6333</url>
       <bibkey>nongmeikapam-bandyopadhyay-2016-genetic</bibkey>
@@ -12754,8 +12754,8 @@
     <paper id="36">
       <title>On Why Coarse Class Classification is Bottleneck in Noun Compound Interpretation</title>
       <author><first>Girishkumar</first><last>Ponkiya</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
-      <author><first>Girish K.</first><last>Palshikar</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="girish-palshikar"><first>Girish K.</first><last>Palshikar</last></author>
       <pages>293–298</pages>
       <url hash="427b8424">W16-6336</url>
       <bibkey>ponkiya-etal-2016-coarse</bibkey>
@@ -12767,8 +12767,8 @@
       <author><first>Nandini</first><last>Ghag</last></author>
       <author><first>Jai</first><last>Paranjape</last></author>
       <author><first>Nilesh</first><last>Joshi</last></author>
-      <author><first>Malhar</first><last>Kulkarni</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="malhar-kulkarni"><first>Malhar</first><last>Kulkarni</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>299–304</pages>
       <url hash="8daaeeef">W16-6337</url>
       <bibkey>redkar-etal-2016-verbframator</bibkey>
@@ -12807,9 +12807,9 @@
     <meta>
       <booktitle>Proceedings of the 2nd Deep Machine Translation Workshop</booktitle>
       <url hash="3bf46f74">W16-64</url>
-      <editor><first>Jan</first><last>Hajič</last></editor>
-      <editor><first>Gertjan</first><last>van Noord</last></editor>
-      <editor><first>António</first><last>Branco</last></editor>
+      <editor id="jan-hajic"><first>Jan</first><last>Hajič</last></editor>
+      <editor id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></editor>
+      <editor id="antonio-branco"><first>António</first><last>Branco</last></editor>
       <publisher>ÚFAL MFF UK</publisher>
       <address>Lisbon, Portugal</address>
       <month>October</month>
@@ -12824,7 +12824,7 @@
       <title><fixed-case>M</fixed-case>oses &amp; Treex Hybrid <fixed-case>MT</fixed-case> Systems Bestiary</title>
       <author><first>Rudolf</first><last>Rosa</last></author>
       <author><first>Martin</first><last>Popel</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <author><first>David</first><last>Mareček</last></author>
       <author><first>Ondřej</first><last>Dušek</last></author>
       <url hash="b7815992">W16-6401</url>
@@ -12834,14 +12834,14 @@
     <paper id="2">
       <title>Factoring Adjunction in Hierarchical Phrase-Based <fixed-case>SMT</fixed-case></title>
       <author><first>Sophie</first><last>Arnoult</last></author>
-      <author><first>Khalil</first><last>Sima’an</last></author>
+      <author id="khalil-simaan"><first>Khalil</first><last>Sima’an</last></author>
       <url hash="f9f14e36">W16-6402</url>
       <pages>11-20</pages>
       <bibkey>arnoult-simaan-2016-factoring</bibkey>
     </paper>
     <paper id="3">
       <title>A Hybrid Approach for Deep Machine Translation</title>
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <author><first>Petya</first><last>Osenova</last></author>
       <url hash="e8803939">W16-6403</url>
       <pages>21-28</pages>
@@ -12852,7 +12852,7 @@
       <author><first>Eleftherios</first><last>Avramidis</last></author>
       <author><first>Vivien</first><last>Macketanz</last></author>
       <author><first>Aljoscha</first><last>Burchardt</last></author>
-      <author><first>Jindrich</first><last>Helcl</last></author>
+      <author id="jindrich-helcl"><first>Jindrich</first><last>Helcl</last></author>
       <author><first>Hans</first><last>Uszkoreit</last></author>
       <url hash="99b6e9e2">W16-6404</url>
       <pages>29-38</pages>
@@ -12861,20 +12861,20 @@
     <paper id="5">
       <title>Adding syntactic structure to bilingual terminology for improved domain adaptation</title>
       <author><first>Mikel</first><last>Artetxe</last></author>
-      <author><first>Gorka</first><last>Labaka</last></author>
+      <author id="gorka-labaka"><first>Gorka</first><last>Labaka</last></author>
       <author><first>Chakaveh</first><last>Saedi</last></author>
-      <author><first>João</first><last>Rodrigues</last></author>
-      <author><first>João</first><last>Silva</last></author>
+      <author id="joao-rodrigues"><first>João</first><last>Rodrigues</last></author>
+      <author id="joao-silva"><first>João</first><last>Silva</last></author>
       <author><first>António</first><last>Branco</last></author>
-      <author><first>Eneko</first><last>Agirre</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
       <url hash="448d6504">W16-6405</url>
       <pages>39-46</pages>
       <bibkey>artetxe-etal-2016-adding</bibkey>
     </paper>
     <paper id="6">
       <title>Incorporation of a valency lexicon into a <fixed-case>T</fixed-case>ecto<fixed-case>MT</fixed-case> pipeline</title>
-      <author><first>Natalia</first><last>Klyueva</last></author>
-      <author><first>Vladislav</first><last>Kuboň</last></author>
+      <author id="natalia-klyueva"><first>Natalia</first><last>Klyueva</last></author>
+      <author id="vladislav-kubon"><first>Vladislav</first><last>Kuboň</last></author>
       <url hash="5cd97c57">W16-6406</url>
       <pages>47-53</pages>
       <bibkey>klyueva-kubon-2016-incorporation</bibkey>
@@ -12884,9 +12884,9 @@
     <meta>
       <booktitle>Proceedings of the joint workshop on <fixed-case>NLP</fixed-case> for Computer Assisted Language Learning and <fixed-case>NLP</fixed-case> for Language Acquisition</booktitle>
       <editor><first>Elena</first><last>Volodina</last></editor>
-      <editor><first>Gintarė</first><last>Grigonytė</last></editor>
+      <editor id="gintare-grigonyte"><first>Gintarė</first><last>Grigonytė</last></editor>
       <editor><first>Ildikó</first><last>Pilán</last></editor>
-      <editor><first>Kristina Nilsson</first><last>Björkenstam</last></editor>
+      <editor id="kristina-nilsson-bjorkenstam"><first>Kristina Nilsson</first><last>Björkenstam</last></editor>
       <editor><first>Lars</first><last>Borin</last></editor>
       <publisher>LiU Electronic Press</publisher>
       <address>Umeå, Sweden</address>
@@ -12911,7 +12911,7 @@
     </paper>
     <paper id="2">
       <title>Towards error annotation in a learner corpus of <fixed-case>P</fixed-case>ortuguese</title>
-      <author><first>Iria</first><last>del Río</last></author>
+      <author id="iria-del-rio-gayo"><first>Iria</first><last>del Río</last></author>
       <author><first>Sandra</first><last>Antunes</last></author>
       <author><first>Amália</first><last>Mendes</last></author>
       <author><first>Maarten</first><last>Janssen</last></author>
@@ -12970,7 +12970,7 @@
     </paper>
     <paper id="9">
       <title>Building a learner corpus for <fixed-case>R</fixed-case>ussian</title>
-      <author><first>Ekaterina</first><last>Rakhilina</last></author>
+      <author id="ekaterina-v-rakhilina"><first>Ekaterina</first><last>Rakhilina</last></author>
       <author><first>Anastasia</first><last>Vyrenkova</last></author>
       <author><first>Elmira</first><last>Mustakimova</last></author>
       <author><first>Alina</first><last>Ladygina</last></author>
@@ -13041,10 +13041,10 @@
       <title>Generating summaries of hospitalizations: A new metric to assess the complexity of medical terms and their definitions</title>
       <author><first>Sabita</first><last>Acharya</last></author>
       <author><first>Barbara</first><last>Di Eugenio</last></author>
-      <author><first>Andrew D.</first><last>Boyd</last></author>
+      <author id="andrew-boyd"><first>Andrew D.</first><last>Boyd</last></author>
       <author><first>Karen</first><last>Dunn Lopez</last></author>
       <author><first>Richard</first><last>Cameron</last></author>
-      <author><first>Gail M</first><last>Keenan</last></author>
+      <author id="gail-m-keenan"><first>Gail M</first><last>Keenan</last></author>
       <pages>26–30</pages>
       <url hash="00b6acc1">W16-6604</url>
       <doi>10.18653/v1/W16-6604</doi>
@@ -13052,7 +13052,7 @@
     </paper>
     <paper id="5">
       <title>Designing Algorithms for Referring with Proper Names</title>
-      <author><first>Kees</first><last>van Deemter</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
       <pages>31–35</pages>
       <url hash="eeefa65b">W16-6605</url>
       <doi>10.18653/v1/W16-6605</doi>
@@ -13064,7 +13064,7 @@
       <author><first>Vassilis</first><last>Plachouras</last></author>
       <author><first>Frank</first><last>Schilder</last></author>
       <author><first>Hiroko</first><last>Bretz</last></author>
-      <author><first>Jochen</first><last>Leidner</last></author>
+      <author id="jochen-l-leidner"><first>Jochen</first><last>Leidner</last></author>
       <author><first>Dezhao</first><last>Song</last></author>
       <pages>36–39</pages>
       <url hash="900c5362">W16-6606</url>
@@ -13082,8 +13082,8 @@
     <paper id="8">
       <title>Abstractive Compression of Captions with Attentive Recurrent Neural Networks</title>
       <author><first>Sander</first><last>Wubben</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <author><first>Suzan</first><last>Verberne</last></author>
       <pages>41–50</pages>
       <url hash="e462b9a8">W16-6608</url>
@@ -13102,12 +13102,12 @@
     <paper id="10">
       <title>Automatic label generation for news comment clusters</title>
       <author><first>Ahmet</first><last>Aker</last></author>
-      <author><first>Monica</first><last>Paramita</last></author>
-      <author><first>Emina</first><last>Kurtic</last></author>
+      <author id="monica-lestari-paramita"><first>Monica</first><last>Paramita</last></author>
+      <author id="emina-kurtic"><first>Emina</first><last>Kurtic</last></author>
       <author><first>Adam</first><last>Funk</last></author>
       <author><first>Emma</first><last>Barker</last></author>
       <author><first>Mark</first><last>Hepple</last></author>
-      <author><first>Rob</first><last>Gaizauskas</last></author>
+      <author id="robert-gaizauskas"><first>Rob</first><last>Gaizauskas</last></author>
       <pages>61–69</pages>
       <url hash="cb62158f">W16-6610</url>
       <doi>10.18653/v1/W16-6610</doi>
@@ -13117,7 +13117,7 @@
       <title>Improving Fluency in Narrative Text Generation With Grammatical Transformations and Probabilistic Parsing</title>
       <author><first>Emily</first><last>Ahn</last></author>
       <author><first>Fabrizio</first><last>Morbini</last></author>
-      <author><first>Andrew</first><last>Gordon</last></author>
+      <author id="andrew-gordon"><first>Andrew</first><last>Gordon</last></author>
       <pages>70–73</pages>
       <url hash="408e49ef">W16-6611</url>
       <doi>10.18653/v1/W16-6611</doi>
@@ -13127,7 +13127,7 @@
       <title>The Multilingual Affective Soccer Corpus (<fixed-case>MASC</fixed-case>): Compiling a biased parallel corpus on soccer reportage in <fixed-case>E</fixed-case>nglish, <fixed-case>G</fixed-case>erman and <fixed-case>D</fixed-case>utch</title>
       <author><first>Nadine</first><last>Braun</last></author>
       <author><first>Martijn</first><last>Goudbeek</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <pages>74–78</pages>
       <url hash="e978412c">W16-6612</url>
       <doi>10.18653/v1/W16-6612</doi>
@@ -13135,7 +13135,7 @@
     </paper>
     <paper id="13">
       <title>Challenges of Argument Mining: Generating an Argument Synthesis based on the Qualia Structure</title>
-      <author><first>Patrick</first><last>Saint-Dizier</last></author>
+      <author id="patrick-saint-dizier"><first>Patrick</first><last>Saint-Dizier</last></author>
       <pages>79–83</pages>
       <url hash="099011fc">W16-6613</url>
       <doi>10.18653/v1/W16-6613</doi>
@@ -13154,7 +13154,7 @@
     <paper id="15">
       <title>Task demands and individual variation in referring expressions</title>
       <author><first>Adriana</first><last>Baltaretu</last></author>
-      <author><first>Thiago</first><last>Castro Ferreira</last></author>
+      <author id="thiago-castro-ferreira"><first>Thiago</first><last>Castro Ferreira</last></author>
       <pages>89–93</pages>
       <url hash="7ed9501c">W16-6615</url>
       <doi>10.18653/v1/W16-6615</doi>
@@ -13173,7 +13173,7 @@
     <paper id="17">
       <title>Evaluative Pattern Extraction for Automated Text Generation</title>
       <author><first>Chia-Chen</first><last>Lee</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <pages>99–103</pages>
       <url hash="b3b82b51">W16-6617</url>
       <doi>10.18653/v1/W16-6617</doi>
@@ -13182,7 +13182,7 @@
     <paper id="18">
       <title>Statistics-Based Lexical Choice for <fixed-case>NLG</fixed-case> from Quantitative Information</title>
       <author><first>Xiao</first><last>Li</last></author>
-      <author><first>Kees</first><last>van Deemter</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
       <author><first>Chenghua</first><last>Lin</last></author>
       <pages>104–108</pages>
       <url hash="ef45d8df">W16-6618</url>
@@ -13211,8 +13211,8 @@
     <paper id="21">
       <title>Enabling text readability awareness during the micro planning phase of <fixed-case>NLG</fixed-case> applications</title>
       <author><first>Priscilla</first><last>Moraes</last></author>
-      <author><first>Kathleen</first><last>McCoy</last></author>
-      <author><first>Sandra</first><last>Carberry</last></author>
+      <author id="kathleen-f-mccoy"><first>Kathleen</first><last>McCoy</last></author>
+      <author id="sandra-carberry"><first>Sandra</first><last>Carberry</last></author>
       <pages>121–131</pages>
       <url hash="12d69569">W16-6621</url>
       <doi>10.18653/v1/W16-6621</doi>
@@ -13238,8 +13238,8 @@
     <paper id="24">
       <title>Statistical Natural Language Generation from Tabular Non-textual Data</title>
       <author><first>Joy</first><last>Mahapatra</last></author>
-      <author><first>Sudip Kumar</first><last>Naskar</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>143–152</pages>
       <url hash="deafeb7e">W16-6624</url>
       <doi>10.18653/v1/W16-6624</doi>
@@ -13249,7 +13249,7 @@
       <title>Paraphrase Generation from Latent-Variable <fixed-case>PCFG</fixed-case>s for Semantic Parsing</title>
       <author><first>Shashi</first><last>Narayan</last></author>
       <author><first>Siva</first><last>Reddy</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <pages>153–162</pages>
       <url hash="41102a84">W16-6625</url>
       <doi>10.18653/v1/W16-6625</doi>
@@ -13280,7 +13280,7 @@
       <title>A Challenge Proposal for Narrative Generation Using <fixed-case>CNL</fixed-case>s</title>
       <author><first>Eugenio</first><last>Concepción</last></author>
       <author><first>Gonzalo</first><last>Méndez</last></author>
-      <author><first>Pablo</first><last>Gervás</last></author>
+      <author id="pablo-gervas"><first>Pablo</first><last>Gervás</last></author>
       <author><first>Carlos</first><last>León</last></author>
       <pages>171–173</pages>
       <url hash="62583539">W16-6628</url>
@@ -13298,7 +13298,7 @@
     </paper>
     <paper id="30">
       <title><fixed-case>S</fixed-case>imple<fixed-case>NLG</fixed-case>-<fixed-case>IT</fixed-case>: adapting <fixed-case>S</fixed-case>imple<fixed-case>NLG</fixed-case> to <fixed-case>I</fixed-case>talian</title>
-      <author><first>Alessandro</first><last>Mazzei</last></author>
+      <author id="alessandro-mazzei"><first>Alessandro</first><last>Mazzei</last></author>
       <author><first>Cristina</first><last>Battaglino</last></author>
       <author><first>Cristina</first><last>Bosco</last></author>
       <pages>184–192</pages>
@@ -13309,7 +13309,7 @@
     <paper id="31">
       <title>Don’t Mention the Shoe! A Learning to Rank Approach to Content Selection for Image Description Generation</title>
       <author><first>Josiah</first><last>Wang</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <pages>193–202</pages>
       <url hash="8f0b5a89">W16-6631</url>
       <doi>10.18653/v1/W16-6631</doi>
@@ -13354,9 +13354,9 @@
     </paper>
     <paper id="36">
       <title>Towards proper name generation: a corpus analysis</title>
-      <author><first>Thiago</first><last>Castro Ferreira</last></author>
+      <author id="thiago-castro-ferreira"><first>Thiago</first><last>Castro Ferreira</last></author>
       <author><first>Sander</first><last>Wubben</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <pages>222–226</pages>
       <url hash="acf441e7">W16-6636</url>
       <doi>10.18653/v1/W16-6636</doi>
@@ -13373,8 +13373,8 @@
     </paper>
     <paper id="38">
       <title>Enhancing <fixed-case>PTB</fixed-case> <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies for Grammar-Based Surface Realization</title>
-      <author><first>David L.</first><last>King</last></author>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="david-king"><first>David L.</first><last>King</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <pages>232–236</pages>
       <url hash="e6097215">W16-6638</url>
       <doi>10.18653/v1/W16-6638</doi>
@@ -13382,7 +13382,7 @@
     </paper>
     <paper id="39">
       <title>Effect of Data Annotation, Feature Selection and Model Choice on Spatial Description Generation in <fixed-case>F</fixed-case>rench</title>
-      <author><first>Anja</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anja</first><last>Belz</last></author>
       <author><first>Adrian</first><last>Muscat</last></author>
       <author><first>Brandon</first><last>Birmingham</last></author>
       <author><first>Jessie</first><last>Levacher</last></author>
@@ -13396,8 +13396,8 @@
     <paper id="40">
       <title><fixed-case>QGASP</fixed-case>: a Framework for Question Generation Based on Different Levels of Linguistic Information</title>
       <author><first>Hugo</first><last>Patinho Rodrigues</last></author>
-      <author><first>Luísa</first><last>Coheur</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="luisa-coheur"><first>Luísa</first><last>Coheur</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <pages>242–243</pages>
       <url hash="ca09ebc3">W16-6640</url>
       <doi>10.18653/v1/W16-6640</doi>
@@ -13405,7 +13405,7 @@
     </paper>
     <paper id="41">
       <title>Automatic Reports from Spreadsheets: Data Analysis for the Rest of Us</title>
-      <author><first>Pablo</first><last>Duboue</last></author>
+      <author id="pablo-duboue"><first>Pablo</first><last>Duboue</last></author>
       <pages>244–245</pages>
       <url hash="0012c719">W16-6641</url>
       <doi>10.18653/v1/W16-6641</doi>
@@ -13413,7 +13413,7 @@
     </paper>
     <paper id="42">
       <title>Towards Generating Colour Terms for Referents in Photographs: Prefer the Expected or the Unexpected?</title>
-      <author><first>Sina</first><last>Zarrieß</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
       <author><first>David</first><last>Schlangen</last></author>
       <pages>246–255</pages>
       <url hash="c3410323">W16-6642</url>
@@ -13423,7 +13423,7 @@
     <paper id="43">
       <title>Absolute and Relative Properties in Geographic Referring Expressions</title>
       <author><first>Rodrigo</first><last>de Oliveira</last></author>
-      <author><first>Somayajulu</first><last>Sripada</last></author>
+      <author id="somayajulu-sripada"><first>Somayajulu</first><last>Sripada</last></author>
       <author><first>Ehud</first><last>Reiter</last></author>
       <pages>256–264</pages>
       <url hash="2daf6b2b">W16-6643</url>
diff --git a/data/xml/W17.xml b/data/xml/W17.xml
index 7bccdc8291..129754f13d 100644
--- a/data/xml/W17.xml
+++ b/data/xml/W17.xml
@@ -26,7 +26,7 @@
       <author><first>Dustin</first><last>Bowers</last></author>
       <author><first>Antti</first><last>Arppe</last></author>
       <author><first>Jordan</first><last>Lachler</last></author>
-      <author><first>Sjur</first><last>Moshagen</last></author>
+      <author id="sjur-moshagen"><first>Sjur</first><last>Moshagen</last></author>
       <author><first>Trond</first><last>Trosterud</last></author>
       <pages>1–9</pages>
       <url hash="7b6c1c3e">W17-0101</url>
@@ -72,8 +72,8 @@
     </paper>
     <paper id="6">
       <title><fixed-case>STREAMLI</fixed-case>n<fixed-case>ED</fixed-case> Challenges: Aligning Research Interests with Shared Tasks</title>
-      <author><first>Gina-Anne</first><last>Levow</last></author>
-      <author><first>Emily M.</first><last>Bender</last></author>
+      <author id="gina-anne-levow"><first>Gina-Anne</first><last>Levow</last></author>
+      <author id="emily-m-bender"><first>Emily M.</first><last>Bender</last></author>
       <author><first>Patrick</first><last>Littell</last></author>
       <author><first>Kristen</first><last>Howell</last></author>
       <author><first>Shobhana</first><last>Chelliah</last></author>
@@ -122,7 +122,7 @@
     <paper id="10">
       <title>Inferring Case Systems from <fixed-case>IGT</fixed-case>: Enriching the Enrichment</title>
       <author><first>Kristen</first><last>Howell</last></author>
-      <author><first>Emily M.</first><last>Bender</last></author>
+      <author id="emily-m-bender"><first>Emily M.</first><last>Bender</last></author>
       <author><first>Michel</first><last>Lockwood</last></author>
       <author><first>Fei</first><last>Xia</last></author>
       <author><first>Olga</first><last>Zamaraeva</last></author>
@@ -136,8 +136,8 @@
       <author><first>Jordan</first><last>Kodner</last></author>
       <author><first>Spencer</first><last>Caplan</last></author>
       <author><first>Hongzhi</first><last>Xu</last></author>
-      <author><first>Mitchell P.</first><last>Marcus</last></author>
-      <author><first>Charles</first><last>Yang</last></author>
+      <author id="mitch-marcus"><first>Mitchell P.</first><last>Marcus</last></author>
+      <author id="charles-yang"><first>Charles</first><last>Yang</last></author>
       <pages>76–84</pages>
       <url hash="2238476f">W17-0111</url>
       <doi>10.18653/v1/W17-0111</doi>
@@ -198,7 +198,7 @@
       <title>Computational Support for Finding Word Classes: A Case Study of <fixed-case>A</fixed-case>bui</title>
       <author><first>Olga</first><last>Zamaraeva</last></author>
       <author><first>František</first><last>Kratochvíl</last></author>
-      <author><first>Emily M.</first><last>Bender</last></author>
+      <author id="emily-m-bender"><first>Emily M.</first><last>Bender</last></author>
       <author><first>Fei</first><last>Xia</last></author>
       <author><first>Kristen</first><last>Howell</last></author>
       <pages>130–140</pages>
@@ -218,7 +218,7 @@
     </paper>
     <paper id="20">
       <title>Connecting Documentation and Revitalization: A New Approach to Language Apps</title>
-      <author><first>Alexa N.</first><last>Little</last></author>
+      <author id="alexa-n-little"><first>Alexa N.</first><last>Little</last></author>
       <pages>151–155</pages>
       <url hash="3ddb6d19">W17-0120</url>
       <doi>10.18653/v1/W17-0120</doi>
@@ -255,7 +255,7 @@
     <meta>
       <booktitle>Proceedings of the 21st Nordic Conference on Computational Linguistics</booktitle>
       <url hash="6a4b361a">W17-02</url>
-      <editor><first>Jörg</first><last>Tiedemann</last></editor>
+      <editor id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></editor>
       <editor><first>Nina</first><last>Tahmasebi</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Gothenburg, Sweden</address>
@@ -270,7 +270,7 @@
     <paper id="1">
       <title>Joint <fixed-case>UD</fixed-case> Parsing of <fixed-case>N</fixed-case>orwegian <fixed-case>B</fixed-case>okmål and <fixed-case>N</fixed-case>ynorsk</title>
       <author><first>Erik</first><last>Velldal</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <author><first>Petter</first><last>Hohle</last></author>
       <pages>1–10</pages>
       <url hash="a87ed67e">W17-0201</url>
@@ -280,7 +280,7 @@
       <title>Replacing <fixed-case>OOV</fixed-case> Words For Dependency Parsing With Distributional Semantics</title>
       <author><first>Prasanth</first><last>Kolachina</last></author>
       <author><first>Martin</first><last>Riedl</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>11–19</pages>
       <url hash="eb98ce26">W17-0202</url>
       <bibkey>kolachina-etal-2017-replacing</bibkey>
@@ -336,7 +336,7 @@
       <title><fixed-case>OCR</fixed-case> and post-correction of historical <fixed-case>F</fixed-case>innish texts</title>
       <author><first>Senka</first><last>Drobac</last></author>
       <author><first>Pekka</first><last>Kauppinen</last></author>
-      <author><first>Krister</first><last>Lindén</last></author>
+      <author id="krister-linden"><first>Krister</first><last>Lindén</last></author>
       <pages>70–76</pages>
       <url hash="7b6eedf6">W17-0209</url>
       <bibkey>drobac-etal-2017-ocr</bibkey>
@@ -345,7 +345,7 @@
       <title><fixed-case>T</fixed-case>witter Topic Modeling by Tweet Aggregation</title>
       <author><first>Asbjørn</first><last>Steinskog</last></author>
       <author><first>Jonas</first><last>Therkelsen</last></author>
-      <author><first>Björn</first><last>Gambäck</last></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gambäck</last></author>
       <pages>77–86</pages>
       <url hash="b4019689">W17-0210</url>
       <bibkey>steinskog-etal-2017-twitter</bibkey>
@@ -369,8 +369,8 @@
     </paper>
     <paper id="13">
       <title>Using Pseudowords for Algorithm Comparison: An Evaluation Framework for Graph-based Word Sense Induction</title>
-      <author><first>Flavio</first><last>Massimiliano Cecchini</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="flavio-massimiliano-cecchini"><first>Flavio</first><last>Massimiliano Cecchini</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <author><first>Martin</first><last>Riedl</last></author>
       <pages>105–114</pages>
       <url hash="defbdf59">W17-0213</url>
@@ -378,8 +378,8 @@
     </paper>
     <paper id="14">
       <title><fixed-case>N</fixed-case>orth-<fixed-case>S</fixed-case>ámi to <fixed-case>F</fixed-case>innish rule-based machine translation system</title>
-      <author><first>Tommi</first><last>Pirinen</last></author>
-      <author><first>Francis M.</first><last>Tyers</last></author>
+      <author id="tommi-a-pirinen"><first>Tommi</first><last>Pirinen</last></author>
+      <author id="francis-tyers"><first>Francis M.</first><last>Tyers</last></author>
       <author><first>Trond</first><last>Trosterud</last></author>
       <author><first>Ryan</first><last>Johnson</last></author>
       <author><first>Kevin</first><last>Unhammer</last></author>
@@ -396,7 +396,7 @@
       <author><first>Sandra</first><last>Nystø Rahka</last></author>
       <author><first>Marja-Liisa</first><last>Olthuis</last></author>
       <author><first>Trond</first><last>Trosterud</last></author>
-      <author><first>Francis M.</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis M.</first><last>Tyers</last></author>
       <pages>123–131</pages>
       <url hash="939c9087">W17-0215</url>
       <bibkey>antonsen-etal-2017-machine</bibkey>
@@ -404,7 +404,7 @@
     <paper id="16">
       <title><fixed-case>SWEGRAM</fixed-case> – A Web-Based Tool for Automatic Annotation and Analysis of <fixed-case>S</fixed-case>wedish Texts</title>
       <author><first>Jesper</first><last>Näsman</last></author>
-      <author><first>Beáta</first><last>Megyesi</last></author>
+      <author id="beata-megyesi"><first>Beáta</first><last>Megyesi</last></author>
       <author><first>Anne</first><last>Palmér</last></author>
       <pages>132–141</pages>
       <url hash="3cd47474">W17-0216</url>
@@ -413,7 +413,7 @@
     <paper id="17">
       <title>Optimizing a <fixed-case>P</fixed-case>o<fixed-case>S</fixed-case> Tagset for <fixed-case>N</fixed-case>orwegian Dependency Parsing</title>
       <author><first>Petter</first><last>Hohle</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <author><first>Erik</first><last>Velldal</last></author>
       <pages>142–151</pages>
       <url hash="10b2ac87">W17-0217</url>
@@ -451,7 +451,7 @@
     <paper id="21">
       <title>Evaluation of language identification methods using 285 languages</title>
       <author><first>Tommi</first><last>Jauhiainen</last></author>
-      <author><first>Krister</first><last>Lindén</last></author>
+      <author id="krister-linden"><first>Krister</first><last>Lindén</last></author>
       <author><first>Heidi</first><last>Jauhiainen</last></author>
       <pages>183–191</pages>
       <url hash="6a7dcd7b">W17-0221</url>
@@ -460,14 +460,14 @@
     <paper id="22">
       <title>Can We Create a Tool for General Domain Event Analysis?</title>
       <author><first>Siim</first><last>Orasmaa</last></author>
-      <author><first>Heiki-Jaan</first><last>Kaalep</last></author>
+      <author id="heiki-jaan-kaalep"><first>Heiki-Jaan</first><last>Kaalep</last></author>
       <pages>192–201</pages>
       <url hash="2817b2ca">W17-0222</url>
       <bibkey>orasmaa-kaalep-2017-create</bibkey>
     </paper>
     <paper id="23">
       <title>From Treebank to <fixed-case>P</fixed-case>ropbank: A Semantic-Role and <fixed-case>V</fixed-case>erb<fixed-case>N</fixed-case>et Corpus for <fixed-case>D</fixed-case>anish</title>
-      <author><first>Eckhard</first><last>Bick</last></author>
+      <author id="eckhard-bick"><first>Eckhard</first><last>Bick</last></author>
       <pages>202–210</pages>
       <url hash="3494e7e3">W17-0223</url>
       <bibkey>bick-2017-treebank</bibkey>
@@ -516,7 +516,7 @@
       <author><first>Steinþór</first><last>Steingrímsson</last></author>
       <author><first>Jón</first><last>Guðnason</last></author>
       <author><first>Sigrún</first><last>Helgadóttir</last></author>
-      <author><first>Eiríkur</first><last>Rögnvaldsson</last></author>
+      <author id="eirikur-rognvaldsson"><first>Eiríkur</first><last>Rögnvaldsson</last></author>
       <pages>237–240</pages>
       <url hash="12f90e28">W17-0229</url>
       <bibkey>steingrimsson-etal-2017-malromur</bibkey>
@@ -541,9 +541,9 @@
       <title>A modernised version of the Glossa corpus search system</title>
       <author><first>Anders</first><last>Nøklestad</last></author>
       <author><first>Kristin</first><last>Hagen</last></author>
-      <author><first>Janne</first><last>Bondi Johannessen</last></author>
+      <author id="janne-bondi-johannessen"><first>Janne</first><last>Bondi Johannessen</last></author>
       <author><first>Michał</first><last>Kosek</last></author>
-      <author><first>Joel</first><last>Priestley</last></author>
+      <author id="joel-priestley"><first>Joel</first><last>Priestley</last></author>
       <pages>251–254</pages>
       <url hash="ce196674">W17-0232</url>
       <bibkey>noklestad-etal-2017-modernised</bibkey>
@@ -628,7 +628,7 @@
       <title><fixed-case>W</fixed-case>ordnet extension via word embeddings: Experiments on the <fixed-case>N</fixed-case>orwegian <fixed-case>W</fixed-case>ordnet</title>
       <author><first>Heidi</first><last>Sand</last></author>
       <author><first>Erik</first><last>Velldal</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <pages>298–302</pages>
       <url hash="c4802c7c">W17-0242</url>
       <bibkey>sand-etal-2017-wordnet</bibkey>
@@ -638,7 +638,7 @@
       <author><first>Robert</first><last>Östling</last></author>
       <author><first>Carl</first><last>Börstell</last></author>
       <author><first>Moa</first><last>Gärdenfors</last></author>
-      <author><first>Mats</first><last>Wirén</last></author>
+      <author id="mats-wiren"><first>Mats</first><last>Wirén</last></author>
       <pages>303–308</pages>
       <url hash="0b0df314">W17-0243</url>
       <bibkey>ostling-etal-2017-universal</bibkey>
@@ -649,7 +649,7 @@
       <author><first>Evelina</first><last>Rennes</last></author>
       <author><first>Daniel</first><last>Fahlborg</last></author>
       <author><first>Vida</first><last>Johansson</last></author>
-      <author><first>Arne</first><last>Jönsson</last></author>
+      <author id="arne-jonsson"><first>Arne</first><last>Jönsson</last></author>
       <pages>309–313</pages>
       <url hash="8d165c7e">W17-0244</url>
       <bibkey>falkenjack-etal-2017-services</bibkey>
@@ -664,7 +664,7 @@
     </paper>
     <paper id="46">
       <title><fixed-case>TALERUM</fixed-case> - Learning <fixed-case>D</fixed-case>anish by Doing <fixed-case>D</fixed-case>anish</title>
-      <author><first>Peter</first><last>Juel Henrichsen</last></author>
+      <author id="peter-juel-henrichsen"><first>Peter</first><last>Juel Henrichsen</last></author>
       <pages>318–321</pages>
       <url hash="74178c3f">W17-0246</url>
       <bibkey>juel-henrichsen-2017-talerum</bibkey>
@@ -683,7 +683,7 @@
       <author><first>Victoria</first><last>Rosén</last></author>
       <author><first>Helge</first><last>Dyvik</last></author>
       <author><first>Paul</first><last>Meurer</last></author>
-      <author><first>Koenraad</first><last>De Smedt</last></author>
+      <author id="koenraad-de-smedt"><first>Koenraad</first><last>De Smedt</last></author>
       <pages>326–329</pages>
       <url hash="bc9ed3fb">W17-0248</url>
       <bibkey>rosen-etal-2017-exploring</bibkey>
@@ -704,9 +704,9 @@
     <meta>
       <booktitle>Proceedings of the joint workshop on <fixed-case>NLP</fixed-case> for Computer Assisted Language Learning and <fixed-case>NLP</fixed-case> for Language Acquisition</booktitle>
       <editor><first>Elena</first><last>Volodina</last></editor>
-      <editor><first>Gintarė</first><last>Grigonytė</last></editor>
+      <editor id="gintare-grigonyte"><first>Gintarė</first><last>Grigonytė</last></editor>
       <editor><first>Ildikó</first><last>Pilán</last></editor>
-      <editor><first>Kristina Nilsson</first><last>Björkenstam</last></editor>
+      <editor id="kristina-nilsson-bjorkenstam"><first>Kristina Nilsson</first><last>Björkenstam</last></editor>
       <editor><first>Lars</first><last>Borin</last></editor>
       <publisher>LiU Electronic Press</publisher>
       <address>Gothenburg, Sweden</address>
@@ -729,7 +729,7 @@
     <paper id="2">
       <title>Challenging learners in their individual zone of proximal development using pedagogic developmental benchmarks of syntactic complexity</title>
       <author><first>Xiaobin</first><last>Chen</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <pages>8-17</pages>
       <url hash="ed284b97">W17-0302</url>
       <bibkey>chen-meurers-2017-challenging</bibkey>
@@ -753,10 +753,10 @@
     </paper>
     <paper id="5">
       <title>Developing a web-based workbook for <fixed-case>E</fixed-case>nglish supporting the interaction of students and teachers</title>
-      <author><first>Björn</first><last>Rudzewitz</last></author>
+      <author id="bjorn-rudzewitz"><first>Björn</first><last>Rudzewitz</last></author>
       <author><first>Ramon</first><last>Ziai</last></author>
       <author><first>Kordula</first><last>De Kuthy</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <pages>36-46</pages>
       <url hash="80b0cbd2">W17-0305</url>
       <bibkey>rudzewitz-etal-2017-developing</bibkey>
@@ -765,7 +765,7 @@
       <title>Annotating errors in student texts: First experiences and experiments</title>
       <author><first>Sara</first><last>Stymne</last></author>
       <author><first>Eva</first><last>Pettersson</last></author>
-      <author><first>Beáta</first><last>Megyesi</last></author>
+      <author id="beata-megyesi"><first>Beáta</first><last>Megyesi</last></author>
       <author><first>Anne</first><last>Palmér</last></author>
       <pages>47-60</pages>
       <url hash="2d571076">W17-0306</url>
@@ -785,7 +785,7 @@
     <meta>
       <booktitle>Proceedings of the <fixed-case>N</fixed-case>o<fixed-case>D</fixed-case>a<fixed-case>L</fixed-case>i<fixed-case>D</fixed-case>a 2017 Workshop on Universal Dependencies (<fixed-case>UDW</fixed-case> 2017)</booktitle>
       <url hash="dd3bc789">W17-04</url>
-      <editor><first>Marie-Catherine</first><last>de Marneffe</last></editor>
+      <editor id="marie-catherine-de-marneffe"><first>Marie-Catherine</first><last>de Marneffe</last></editor>
       <editor><first>Joakim</first><last>Nivre</last></editor>
       <editor><first>Sebastian</first><last>Schuster</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -800,7 +800,7 @@
     </frontmatter>
     <paper id="1">
       <title>Cross-Lingual Parser Selection for Low-Resource Languages</title>
-      <author><first>Željko</first><last>Agić</last></author>
+      <author id="zeljko-agic"><first>Željko</first><last>Agić</last></author>
       <pages>1–10</pages>
       <url hash="d03eadea">W17-0401</url>
       <bibkey>agic-2017-cross</bibkey>
@@ -815,7 +815,7 @@
     <paper id="3">
       <title>Increasing Return on Annotation Investment: The Automatic Construction of a <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependency Treebank for <fixed-case>D</fixed-case>utch</title>
       <author><first>Gosse</first><last>Bouma</last></author>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <pages>19–26</pages>
       <url hash="ba3caf45">W17-0403</url>
       <bibkey>bouma-van-noord-2017-increasing</bibkey>
@@ -824,7 +824,7 @@
       <title>Converting the <fixed-case>T</fixed-case>ü<fixed-case>B</fixed-case>a-<fixed-case>D</fixed-case>/<fixed-case>Z</fixed-case> Treebank of <fixed-case>G</fixed-case>erman to <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies</title>
       <author><first>Çağrı</first><last>Çöltekin</last></author>
       <author><first>Ben</first><last>Campbell</last></author>
-      <author><first>Erhard</first><last>Hinrichs</last></author>
+      <author id="erhard-hinrichs"><first>Erhard</first><last>Hinrichs</last></author>
       <author><first>Heike</first><last>Telljohann</last></author>
       <pages>27–37</pages>
       <url hash="abd5b713">W17-0404</url>
@@ -835,7 +835,7 @@
       <author><first>Peter</first><last>Dirix</last></author>
       <author><first>Liesbeth</first><last>Augustinus</last></author>
       <author><first>Daniel</first><last>van Niekerk</last></author>
-      <author><first>Frank</first><last>Van Eynde</last></author>
+      <author id="frank-van-eynde"><first>Frank</first><last>Van Eynde</last></author>
       <pages>38–47</pages>
       <url hash="06bafdec">W17-0405</url>
       <bibkey>dirix-etal-2017-universal</bibkey>
@@ -843,7 +843,7 @@
     <paper id="6">
       <title>Elliptic Constructions: Spotting Patterns in <fixed-case>UD</fixed-case> Treebanks</title>
       <author><first>Kira</first><last>Droganova</last></author>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <pages>48–57</pages>
       <url hash="90ff2360">W17-0406</url>
       <bibkey>droganova-zeman-2017-elliptic</bibkey>
@@ -858,7 +858,7 @@
     </paper>
     <paper id="8">
       <title>Towards <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies for Learner <fixed-case>C</fixed-case>hinese</title>
-      <author><first>John</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
       <author><first>Herman</first><last>Leung</last></author>
       <author><first>Keying</first><last>Li</last></author>
       <pages>67–71</pages>
@@ -893,7 +893,7 @@
     <paper id="12">
       <title><fixed-case>U</fixed-case>dapi: Universal <fixed-case>API</fixed-case> for <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies</title>
       <author><first>Martin</first><last>Popel</last></author>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
       <author><first>Martin</first><last>Vojtek</last></author>
       <pages>96–101</pages>
       <url hash="876337fc">W17-0412</url>
@@ -902,7 +902,7 @@
     <paper id="13">
       <title><fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies for <fixed-case>G</fixed-case>reek</title>
       <author><first>Prokopis</first><last>Prokopidis</last></author>
-      <author><first>Haris</first><last>Papageorgiou</last></author>
+      <author id="harris-papageorgiou"><first>Haris</first><last>Papageorgiou</last></author>
       <pages>102–106</pages>
       <url hash="50827cb8">W17-0413</url>
       <bibkey>prokopidis-papageorgiou-2017-universal</bibkey>
@@ -918,7 +918,7 @@
     <paper id="15">
       <title>Empirically Sampling <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies</title>
       <author><first>Natalie</first><last>Schluter</last></author>
-      <author><first>Željko</first><last>Agić</last></author>
+      <author id="zeljko-agic"><first>Željko</first><last>Agić</last></author>
       <pages>117–122</pages>
       <url hash="65682550">W17-0415</url>
       <bibkey>schluter-agic-2017-empirically</bibkey>
@@ -927,7 +927,7 @@
       <title>Gapping Constructions in <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies v2</title>
       <author><first>Sebastian</first><last>Schuster</last></author>
       <author><first>Matthew</first><last>Lamm</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>123–132</pages>
       <url hash="80ce8a8d">W17-0416</url>
       <bibkey>schuster-etal-2017-gapping</bibkey>
@@ -935,14 +935,14 @@
     <paper id="17">
       <title>Toward <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies for <fixed-case>A</fixed-case>inu</title>
       <author><first>Hajime</first><last>Senuma</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>133–139</pages>
       <url hash="8370ff71">W17-0417</url>
       <bibkey>senuma-aizawa-2017-toward</bibkey>
     </paper>
     <paper id="18">
       <title>Automatic Morpheme Segmentation and Labeling in <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies Resources</title>
-      <author><first>Miikka</first><last>Silfverberg</last></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last></author>
       <author><first>Mans</first><last>Hulden</last></author>
       <pages>140–145</pages>
       <url hash="544e22b6">W17-0418</url>
@@ -984,7 +984,7 @@
       <title>Improving <fixed-case>POS</fixed-case> Tagging in <fixed-case>O</fixed-case>ld <fixed-case>S</fixed-case>panish Using <fixed-case>TEITOK</fixed-case></title>
       <author><first>Maarten</first><last>Janssen</last></author>
       <author><first>Josep</first><last>Ausensi</last></author>
-      <author><first>Josep</first><last>Fontana</last></author>
+      <author id="josep-maria-fontana"><first>Josep</first><last>Fontana</last></author>
       <pages>2–6</pages>
       <url hash="6894efdc">W17-0502</url>
       <bibkey>janssen-etal-2017-improving</bibkey>
@@ -1009,7 +1009,7 @@
     </paper>
     <paper id="5">
       <title>Ambiguity in Semantically Related Word Substitutions: an investigation in historical <fixed-case>B</fixed-case>ible translations</title>
-      <author><first>Maria</first><last>Moritz</last></author>
+      <author id="maria-berger"><first>Maria</first><last>Moritz</last></author>
       <author><first>Marco</first><last>Büchler</last></author>
       <pages>18–23</pages>
       <url hash="e0f45a13">W17-0505</url>
@@ -1031,7 +1031,7 @@
       <author><first>Michael</first><last>Hund</last></author>
       <author><first>Frederik</first><last>Dennig</last></author>
       <author><first>Miriam</first><last>Butt</last></author>
-      <author><first>Daniel</first><last>Keim</last></author>
+      <author id="daniel-keim"><first>Daniel</first><last>Keim</last></author>
       <pages>32–39</pages>
       <url hash="fdaf11cd">W17-0507</url>
       <bibkey>schatzle-etal-2017-histobankvis</bibkey>
@@ -1048,7 +1048,7 @@
     <paper id="9">
       <title>Data-driven Morphology and Sociolinguistics for Early <fixed-case>M</fixed-case>odern <fixed-case>D</fixed-case>utch</title>
       <author><first>Marijn</first><last>Schraagen</last></author>
-      <author><first>Marjo</first><last>van Koppen</last></author>
+      <author id="marjo-van-koppen"><first>Marjo</first><last>van Koppen</last></author>
       <author><first>Feike</first><last>Dietz</last></author>
       <pages>47–53</pages>
       <url hash="1d605a05">W17-0509</url>
@@ -1071,9 +1071,9 @@
     <meta>
       <booktitle>Proceedings of the Third Workshop on Computational Linguistics for Uralic Languages</booktitle>
       <url hash="62424784">W17-06</url>
-      <editor><first>Francis M.</first><last>Tyers</last></editor>
+      <editor id="francis-tyers"><first>Francis M.</first><last>Tyers</last></editor>
       <editor><first>Michael</first><last>Rießler</last></editor>
-      <editor><first>Tommi A.</first><last>Pirinen</last></editor>
+      <editor id="tommi-a-pirinen"><first>Tommi A.</first><last>Pirinen</last></editor>
       <editor><first>Trond</first><last>Trosterud</last></editor>
       <doi>10.18653/v1/W17-06</doi>
       <publisher>Association for Computational Linguistics</publisher>
@@ -1177,8 +1177,8 @@
       <venue>cmcl</venue>
       <editor><first>Ted</first><last>Gibson</last></editor>
       <editor><first>Tal</first><last>Linzen</last></editor>
-      <editor><first>Asad</first><last>Sayeed</last></editor>
-      <editor><first>Marten</first><last>van Schijndel</last></editor>
+      <editor id="asad-sayeed"><first>Asad</first><last>Sayeed</last></editor>
+      <editor id="marten-van-schijndel"><first>Marten</first><last>van Schijndel</last></editor>
       <editor><first>William</first><last>Schuler</last></editor>
     </meta>
     <frontmatter>
@@ -1190,7 +1190,7 @@
       <author><first>Matthew</first><last>Nelson</last></author>
       <author><first>Stanislas</first><last>Dehaene</last></author>
       <author><first>Christophe</first><last>Pallier</last></author>
-      <author><first>John</first><last>Hale</last></author>
+      <author id="john-hale"><first>John</first><last>Hale</last></author>
       <pages>1–10</pages>
       <url hash="349074b5">W17-0701</url>
       <doi>10.18653/v1/W17-0701</doi>
@@ -1200,7 +1200,7 @@
     <paper id="2">
       <title>Learning an Input Filter for Argument Structure Acquisition</title>
       <author><first>Laurel</first><last>Perkins</last></author>
-      <author><first>Naomi</first><last>Feldman</last></author>
+      <author id="naomi-feldman"><first>Naomi</first><last>Feldman</last></author>
       <author><first>Jeffrey</first><last>Lidz</last></author>
       <pages>11–19</pages>
       <url hash="c80ffb5b">W17-0702</url>
@@ -1211,7 +1211,7 @@
     <paper id="3">
       <title>Grounding sound change in ideal observer models of perception</title>
       <author><first>Zachary</first><last>Burchill</last></author>
-      <author><first>T. Florian</first><last>Jaeger</last></author>
+      <author id="t-florian-jaeger"><first>T. Florian</first><last>Jaeger</last></author>
       <pages>20–28</pages>
       <url hash="87b2c0a7">W17-0703</url>
       <doi>10.18653/v1/W17-0703</doi>
@@ -1265,7 +1265,7 @@
     </frontmatter>
     <paper id="1">
       <title>Readers vs. Writers vs. Texts: Coping with Different Perspectives of Text Understanding in Emotion Annotation</title>
-      <author><first>Sven</first><last>Buechel</last></author>
+      <author id="sven-buechel"><first>Sven</first><last>Buechel</last></author>
       <author><first>Udo</first><last>Hahn</last></author>
       <pages>1–12</pages>
       <url hash="d7bb1104">W17-0801</url>
@@ -1276,7 +1276,7 @@
     <paper id="2">
       <title>Finding Good Conversations Online: The <fixed-case>Y</fixed-case>ahoo <fixed-case>N</fixed-case>ews Annotated Comments Corpus</title>
       <author><first>Courtney</first><last>Napoles</last></author>
-      <author><first>Joel</first><last>Tetreault</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
       <author><first>Aasish</first><last>Pappu</last></author>
       <author><first>Enrica</first><last>Rosato</last></author>
       <author><first>Brian</first><last>Provenzale</last></author>
@@ -1298,7 +1298,7 @@
     </paper>
     <paper id="4">
       <title>A Code-Switching Corpus of <fixed-case>T</fixed-case>urkish-<fixed-case>G</fixed-case>erman Conversations</title>
-      <author><first>Özlem</first><last>Çetinoğlu</last></author>
+      <author id="ozlem-cetinoglu"><first>Özlem</first><last>Çetinoğlu</last></author>
       <pages>34–40</pages>
       <url hash="f0963bd3">W17-0804</url>
       <doi>10.18653/v1/W17-0804</doi>
@@ -1307,9 +1307,9 @@
     </paper>
     <paper id="5">
       <title>Annotating omission in statement pairs</title>
-      <author><first>Héctor</first><last>Martínez Alonso</last></author>
+      <author id="hector-martinez-alonso"><first>Héctor</first><last>Martínez Alonso</last></author>
       <author><first>Amaury</first><last>Delamaire</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <pages>41–45</pages>
       <url hash="e19a854b">W17-0805</url>
       <doi>10.18653/v1/W17-0805</doi>
@@ -1336,7 +1336,7 @@
       <author><first>Chiho</first><last>Toyoshima</last></author>
       <author><first>Mayuka</first><last>Yamamoto</last></author>
       <author><first>Kyo</first><last>Kageura</last></author>
-      <author><first>Anthony</first><last>Hartley</last></author>
+      <author id="anthony-hartley"><first>Anthony</first><last>Hartley</last></author>
       <pages>57–66</pages>
       <url hash="804d96c4">W17-0807</url>
       <doi>10.18653/v1/W17-0807</doi>
@@ -1346,7 +1346,7 @@
     <paper id="8">
       <title>Representation and Interchange of Linguistic Annotation. An In-Depth, Side-by-Side Comparison of Three Designs</title>
       <author><first>Richard</first><last>Eckart de Castilho</last></author>
-      <author><first>Nancy</first><last>Ide</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
       <author><first>Emanuele</first><last>Lapponi</last></author>
       <author><first>Stephan</first><last>Oepen</last></author>
       <author><first>Keith</first><last>Suderman</last></author>
@@ -1374,7 +1374,7 @@
       <author><first>Martin</first><last>Tutek</last></author>
       <author><first>Jan</first><last>Šnajder</last></author>
       <author><first>Goran</first><last>Glavaš</last></author>
-      <author><first>Bojana</first><last>Dalbelo Bašić</last></author>
+      <author id="bojana-dalbelo-basic"><first>Bojana</first><last>Dalbelo Bašić</last></author>
       <author><first>Nataša</first><last>Milić-Frayling</last></author>
       <pages>82–90</pages>
       <url hash="e0f4c074">W17-0810</url>
@@ -1388,7 +1388,7 @@
       <author><first>Arihant</first><last>Gupta</last></author>
       <author><first>Avijit</first><last>Vajpayee</last></author>
       <author><first>Arjit</first><last>Srivastava</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>91–94</pages>
       <url hash="07f7a8a0">W17-0811</url>
       <doi>10.18653/v1/W17-0811</doi>
@@ -1398,8 +1398,8 @@
     <paper id="12">
       <title>The <fixed-case>BEC</fixed-case>au<fixed-case>SE</fixed-case> Corpus 2.0: Annotating Causality and Overlapping Relations</title>
       <author><first>Jesse</first><last>Dunietz</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
       <pages>95–104</pages>
       <url hash="c15208c9">W17-0812</url>
       <doi>10.18653/v1/W17-0812</doi>
@@ -1436,7 +1436,7 @@
       <url hash="182a95b9">W17-09</url>
       <editor><first>Michael</first><last>Roth</last></editor>
       <editor><first>Nasrin</first><last>Mostafazadeh</last></editor>
-      <editor><first>Nathanael</first><last>Chambers</last></editor>
+      <editor id="nathanael-chambers"><first>Nathanael</first><last>Chambers</last></editor>
       <editor><first>Annie</first><last>Louis</last></editor>
       <doi>10.18653/v1/W17-09</doi>
       <publisher>Association for Computational Linguistics</publisher>
@@ -1470,7 +1470,7 @@
       <author><first>Ori</first><last>Shapira</last></author>
       <author><first>Shyam</first><last>Upadhyay</last></author>
       <author><first>Dan</first><last>Roth</last></author>
-      <author><first>Eugenio</first><last>Martinez Camara</last></author>
+      <author id="eugenio-martinez-camara"><first>Eugenio</first><last>Martinez Camara</last></author>
       <author><first>Iryna</first><last>Gurevych</last></author>
       <author><first>Ido</first><last>Dagan</last></author>
       <pages>12–24</pages>
@@ -1515,7 +1515,7 @@
       <author><first>Michael</first><last>Roth</last></author>
       <author><first>Annie</first><last>Louis</last></author>
       <author><first>Nathanael</first><last>Chambers</last></author>
-      <author><first>James</first><last>Allen</last></author>
+      <author id="james-allen"><first>James</first><last>Allen</last></author>
       <pages>46–51</pages>
       <url hash="452699ab">W17-0906</url>
       <doi>10.18653/v1/W17-0906</doi>
@@ -1529,7 +1529,7 @@
       <author><first>Ioannis</first><last>Konstas</last></author>
       <author><first>Leila</first><last>Zilles</last></author>
       <author><first>Yejin</first><last>Choi</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>52–55</pages>
       <url hash="6eb4b82a">W17-0907</url>
       <doi>10.18653/v1/W17-0907</doi>
@@ -1542,8 +1542,8 @@
       <author><first>Yevgeniy</first><last>Puzikov</last></author>
       <author><first>Andreas</first><last>Rücklé</last></author>
       <author><first>Judith</first><last>Eckle-Kohler</last></author>
-      <author><first>Teresa</first><last>Martin</last></author>
-      <author><first>Eugenio</first><last>Martínez-Cámara</last></author>
+      <author id="m-teresa-martin-valdivia"><first>Teresa</first><last>Martin</last></author>
+      <author id="eugenio-martinez-camara"><first>Eugenio</first><last>Martínez-Cámara</last></author>
       <author><first>Daniil</first><last>Sorokin</last></author>
       <author><first>Maxime</first><last>Peyrard</last></author>
       <author><first>Iryna</first><last>Gurevych</last></author>
@@ -1578,7 +1578,7 @@
       <author><first>Melissa</first><last>Roemmele</last></author>
       <author><first>Sosuke</first><last>Kobayashi</last></author>
       <author><first>Naoya</first><last>Inoue</last></author>
-      <author><first>Andrew</first><last>Gordon</last></author>
+      <author id="andrew-gordon"><first>Andrew</first><last>Gordon</last></author>
       <pages>74–80</pages>
       <url hash="0e419e19">W17-0911</url>
       <doi>10.18653/v1/W17-0911</doi>
@@ -1588,7 +1588,7 @@
     <paper id="12">
       <title><fixed-case>IIT</fixed-case> (<fixed-case>BHU</fixed-case>): System Description for <fixed-case>LSDS</fixed-case>em’17 Shared Task</title>
       <author id="pranav-goel-umd"><first>Pranav</first><last>Goel</last></author>
-      <author><first>Anil Kumar</first><last>Singh</last></author>
+      <author id="anil-kumar-singh"><first>Anil Kumar</first><last>Singh</last></author>
       <pages>81–86</pages>
       <url hash="99b1555f">W17-0912</url>
       <doi>10.18653/v1/W17-0912</doi>
@@ -1612,11 +1612,11 @@
       <url hash="3ab4057a">W17-10</url>
       <editor><first>George</first><last>Giannakopoulos</last></editor>
       <editor><first>Elena</first><last>Lloret</last></editor>
-      <editor><first>John M.</first><last>Conroy</last></editor>
+      <editor id="john-conroy"><first>John M.</first><last>Conroy</last></editor>
       <editor><first>Josef</first><last>Steinberger</last></editor>
       <editor><first>Marina</first><last>Litvak</last></editor>
-      <editor><first>Peter</first><last>Rankel</last></editor>
-      <editor><first>Benoit</first><last>Favre</last></editor>
+      <editor id="peter-a-rankel"><first>Peter</first><last>Rankel</last></editor>
+      <editor id="benoit-favre"><first>Benoit</first><last>Favre</last></editor>
       <doi>10.18653/v1/W17-10</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Valencia, Spain</address>
@@ -1648,8 +1648,8 @@
       <title>Decoupling Encoder and Decoder Networks for Abstractive Document Summarization</title>
       <author><first>Ying</first><last>Xu</last></author>
       <author><first>Jey Han</first><last>Lau</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>7–11</pages>
       <url hash="4d9fbb43">W17-1002</url>
       <doi>10.18653/v1/W17-1002</doi>
@@ -1660,7 +1660,7 @@
       <title>Centroid-based Text Summarization through Compositionality of Word Embeddings</title>
       <author><first>Gaetano</first><last>Rossiello</last></author>
       <author><first>Pierpaolo</first><last>Basile</last></author>
-      <author><first>Giovanni</first><last>Semeraro</last></author>
+      <author id="giovanni-semeraro"><first>Giovanni</first><last>Semeraro</last></author>
       <pages>12–21</pages>
       <url hash="e9738339">W17-1003</url>
       <doi>10.18653/v1/W17-1003</doi>
@@ -1692,8 +1692,8 @@
       <title>Ultra-Concise Multi-genre Summarisation of Web2.0: towards Intelligent Content Generation</title>
       <author><first>Elena</first><last>Lloret</last></author>
       <author><first>Ester</first><last>Boldrini</last></author>
-      <author><first>Patricio</first><last>Martínez-Barco</last></author>
-      <author><first>Manuel</first><last>Palomar</last></author>
+      <author id="patricio-martinez-barco"><first>Patricio</first><last>Martínez-Barco</last></author>
+      <author id="manuel-palomar"><first>Manuel</first><last>Palomar</last></author>
       <pages>37–46</pages>
       <url hash="12b36eda">W17-1006</url>
       <doi>10.18653/v1/W17-1006</doi>
@@ -1702,9 +1702,9 @@
     </paper>
     <paper id="7">
       <title>Machine Learning Approach to Evaluate <fixed-case>M</fixed-case>ulti<fixed-case>L</fixed-case>ingual Summaries</title>
-      <author><first>Samira</first><last>Ellouze</last></author>
+      <author id="samira-ellouze"><first>Samira</first><last>Ellouze</last></author>
       <author><first>Maher</first><last>Jaoua</last></author>
-      <author><first>Lamia</first><last>Hadrich Belguith</last></author>
+      <author id="lamia-hadrich-belguith"><first>Lamia</first><last>Hadrich Belguith</last></author>
       <pages>47–54</pages>
       <url hash="809dc5ae">W17-1007</url>
       <doi>10.18653/v1/W17-1007</doi>
@@ -1754,9 +1754,9 @@
     </paper>
     <paper id="3">
       <title>Potential and Limitations of Cross-Domain Sentiment Classification</title>
-      <author><first>Jan Milan</first><last>Deriu</last></author>
+      <author id="jan-milan-deriu"><first>Jan Milan</first><last>Deriu</last></author>
       <author><first>Martin</first><last>Weilenmann</last></author>
-      <author><first>Dirk</first><last>Von Gruenigen</last></author>
+      <author id="dirk-von-gruenigen"><first>Dirk</first><last>Von Gruenigen</last></author>
       <author><first>Mark</first><last>Cieliebak</last></author>
       <pages>17–24</pages>
       <url hash="94b4693d">W17-1103</url>
@@ -1769,7 +1769,7 @@
       <author><first>Kevin</first><last>McKelvey</last></author>
       <author><first>Peter</first><last>Goutzounis</last></author>
       <author><first>Stephen</first><last>da Cruz</last></author>
-      <author><first>Nathanael</first><last>Chambers</last></author>
+      <author id="nathanael-chambers"><first>Nathanael</first><last>Chambers</last></author>
       <pages>25–35</pages>
       <url hash="edc144db">W17-1104</url>
       <doi>10.18653/v1/W17-1104</doi>
@@ -1790,7 +1790,7 @@
     <paper id="6">
       <title>A <fixed-case>T</fixed-case>witter Corpus and Benchmark Resources for <fixed-case>G</fixed-case>erman Sentiment Analysis</title>
       <author><first>Mark</first><last>Cieliebak</last></author>
-      <author><first>Jan Milan</first><last>Deriu</last></author>
+      <author id="jan-milan-deriu"><first>Jan Milan</first><last>Deriu</last></author>
       <author><first>Dominic</first><last>Egger</last></author>
       <author><first>Fatih</first><last>Uzdilli</last></author>
       <pages>45–51</pages>
@@ -1804,11 +1804,11 @@
     <meta>
       <booktitle>Proceedings of the Fourth Workshop on <fixed-case>NLP</fixed-case> for Similar Languages, Varieties and Dialects (<fixed-case>V</fixed-case>ar<fixed-case>D</fixed-case>ial)</booktitle>
       <url hash="cb720080">W17-12</url>
-      <editor><first>Preslav</first><last>Nakov</last></editor>
+      <editor id="preslav-nakov"><first>Preslav</first><last>Nakov</last></editor>
       <editor><first>Marcos</first><last>Zampieri</last></editor>
       <editor><first>Nikola</first><last>Ljubešić</last></editor>
-      <editor><first>Jörg</first><last>Tiedemann</last></editor>
-      <editor><first>Shevin</first><last>Malmasi</last></editor>
+      <editor id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></editor>
+      <editor id="shervin-malmasi"><first>Shevin</first><last>Malmasi</last></editor>
       <editor><first>Ahmed</first><last>Ali</last></editor>
       <doi>10.18653/v1/W17-12</doi>
       <publisher>Association for Computational Linguistics</publisher>
@@ -1888,7 +1888,7 @@
     </paper>
     <paper id="7">
       <title>Why <fixed-case>C</fixed-case>atalan-<fixed-case>S</fixed-case>panish Neural Machine Translation? Analysis, comparison and combination with standard Rule and Phrase-based technologies</title>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
       <pages>55–62</pages>
       <url hash="d67feecd">W17-1207</url>
       <doi>10.18653/v1/W17-1207</doi>
@@ -1938,7 +1938,7 @@
     <paper id="12">
       <title>Evaluating <fixed-case>H</fixed-case>e<fixed-case>LI</fixed-case> with Non-Linear Mappings</title>
       <author><first>Tommi</first><last>Jauhiainen</last></author>
-      <author><first>Krister</first><last>Lindén</last></author>
+      <author id="krister-linden"><first>Krister</first><last>Lindén</last></author>
       <author><first>Heidi</first><last>Jauhiainen</last></author>
       <pages>102–108</pages>
       <url hash="c41394f0">W17-1212</url>
@@ -1949,8 +1949,8 @@
     <paper id="13">
       <title>A Perplexity-Based Method for Similar Languages Discrimination</title>
       <author><first>Pablo</first><last>Gamallo</last></author>
-      <author><first>Jose Ramom</first><last>Pichel</last></author>
-      <author><first>Iñaki</first><last>Alegria</last></author>
+      <author id="jose-ramom-pichel-campos"><first>Jose Ramom</first><last>Pichel</last></author>
+      <author id="inaki-alegria"><first>Iñaki</first><last>Alegria</last></author>
       <pages>109–114</pages>
       <url hash="1bdd8bd4">W17-1213</url>
       <doi>10.18653/v1/W17-1213</doi>
@@ -1969,7 +1969,7 @@
     <paper id="15">
       <title>Discriminating between Similar Languages with Word-level Convolutional Neural Networks</title>
       <author><first>Marcelo</first><last>Criscuolo</last></author>
-      <author><first>Sandra Maria</first><last>Aluísio</last></author>
+      <author id="sandra-aluisio"><first>Sandra Maria</first><last>Aluísio</last></author>
       <pages>124–130</pages>
       <url hash="4b06a9ba">W17-1215</url>
       <doi>10.18653/v1/W17-1215</doi>
@@ -1987,11 +1987,11 @@
     </paper>
     <paper id="17">
       <title>Discriminating between Similar Languages Using a Combination of Typed and Untyped Character N-grams and Words</title>
-      <author><first>Helena</first><last>Gomez</last></author>
+      <author id="helena-gomez"><first>Helena</first><last>Gomez</last></author>
       <author><first>Ilia</first><last>Markov</last></author>
       <author><first>Jorge</first><last>Baptista</last></author>
       <author><first>Grigori</first><last>Sidorov</last></author>
-      <author><first>David</first><last>Pinto</last></author>
+      <author id="david-pinto"><first>David</first><last>Pinto</last></author>
       <pages>137–145</pages>
       <url hash="e2a90102">W17-1217</url>
       <doi>10.18653/v1/W17-1217</doi>
@@ -2012,7 +2012,7 @@
       <title>When Sparse Traditional Models Outperform Dense Neural Networks: the Curious Case of Discriminating between Similar Languages</title>
       <author><first>Maria</first><last>Medvedeva</last></author>
       <author><first>Martin</first><last>Kroon</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>156–163</pages>
       <url hash="ed1a272a">W17-1219</url>
       <doi>10.18653/v1/W17-1219</doi>
@@ -2061,7 +2061,7 @@
     <paper id="24">
       <title>Exploring Lexical and Syntactic Features for Language Variety Identification</title>
       <author><first>Chris</first><last>van der Lee</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <pages>190–199</pages>
       <url hash="62216166">W17-1224</url>
       <doi>10.18653/v1/W17-1224</doi>
@@ -2071,7 +2071,7 @@
     <paper id="25">
       <title>Learning to Identify <fixed-case>A</fixed-case>rabic and <fixed-case>G</fixed-case>erman Dialects using Multiple Kernels</title>
       <author><first>Radu Tudor</first><last>Ionescu</last></author>
-      <author><first>Andrei</first><last>Butnaru</last></author>
+      <author id="andrei-butnaru"><first>Andrei</first><last>Butnaru</last></author>
       <pages>200–209</pages>
       <url hash="27ba72d8">W17-1225</url>
       <doi>10.18653/v1/W17-1225</doi>
@@ -2081,9 +2081,9 @@
     <paper id="26">
       <title><fixed-case>S</fixed-case>lavic Forest, <fixed-case>N</fixed-case>orwegian Wood</title>
       <author><first>Rudolf</first><last>Rosa</last></author>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <author><first>David</first><last>Mareček</last></author>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
       <pages>210–219</pages>
       <url hash="5cd2e9d2">W17-1226</url>
       <doi>10.18653/v1/W17-1226</doi>
@@ -2096,9 +2096,9 @@
       <booktitle>Proceedings of the Third <fixed-case>A</fixed-case>rabic Natural Language Processing Workshop</booktitle>
       <url hash="33594ae5">W17-13</url>
       <editor><first>Nizar</first><last>Habash</last></editor>
-      <editor><first>Mona</first><last>Diab</last></editor>
+      <editor id="mona-diab"><first>Mona</first><last>Diab</last></editor>
       <editor><first>Kareem</first><last>Darwish</last></editor>
-      <editor><first>Wassim</first><last>El-Hajj</last></editor>
+      <editor id="wassim-el-hajj"><first>Wassim</first><last>El-Hajj</last></editor>
       <editor><first>Hend</first><last>Al-Khalifa</last></editor>
       <editor><first>Houda</first><last>Bouamor</last></editor>
       <editor><first>Nadi</first><last>Tomeh</last></editor>
@@ -2187,7 +2187,7 @@
       <author><first>Salima</first><last>Medhaffar</last></author>
       <author><first>Fethi</first><last>Bougares</last></author>
       <author><first>Yannick</first><last>Estève</last></author>
-      <author><first>Lamia</first><last>Hadrich-Belguith</last></author>
+      <author id="lamia-hadrich-belguith"><first>Lamia</first><last>Hadrich-Belguith</last></author>
       <pages>55–61</pages>
       <url hash="a33551d2">W17-1307</url>
       <doi>10.18653/v1/W17-1307</doi>
@@ -2201,7 +2201,7 @@
       <author><first>Ahmad</first><last>Ghandour</last></author>
       <author><first>Shady</first><last>Elbassuoni</last></author>
       <author><first>Hazem</first><last>Hajj</last></author>
-      <author><first>Khaled</first><last>Shaban</last></author>
+      <author id="khaled-shaban"><first>Khaled</first><last>Shaban</last></author>
       <pages>62–71</pages>
       <url hash="e637b36c">W17-1308</url>
       <doi>10.18653/v1/W17-1308</doi>
@@ -2211,7 +2211,7 @@
     <paper id="9">
       <title>A New Error Annotation for Dyslexic texts in <fixed-case>A</fixed-case>rabic</title>
       <author><first>Maha</first><last>Alamri</last></author>
-      <author><first>William J</first><last>Teahan</last></author>
+      <author id="william-j-teahan"><first>William J</first><last>Teahan</last></author>
       <pages>72–78</pages>
       <url hash="ee381efe">W17-1309</url>
       <doi>10.18653/v1/W17-1309</doi>
@@ -2225,7 +2225,7 @@
       <author><first>Abdullah</first><last>M. Mousa</last></author>
       <author><first>Mostafa</first><last>Elhosiny</last></author>
       <author><first>Sherif</first><last>Abdou</last></author>
-      <author><first>Mohsen</first><last>Rashwan</last></author>
+      <author id="mohsen-rashwan"><first>Mohsen</first><last>Rashwan</last></author>
       <pages>79–83</pages>
       <url hash="5193c288">W17-1310</url>
       <doi>10.18653/v1/W17-1310</doi>
@@ -2275,7 +2275,7 @@
       <author><first>Hazem</first><last>Hajj</last></author>
       <author><first>Wassim</first><last>El-Hajj</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
-      <author><first>Khaled</first><last>Shaban</last></author>
+      <author id="khaled-shaban"><first>Khaled</first><last>Shaban</last></author>
       <pages>110–118</pages>
       <url hash="e6b443b3">W17-1314</url>
       <doi>10.18653/v1/W17-1314</doi>
@@ -2333,7 +2333,7 @@
       <author><first>Dominique</first><last>Fohr</last></author>
       <author><first>Denis</first><last>Jouvet</last></author>
       <author><first>David</first><last>Langlois</last></author>
-      <author><first>Kamel</first><last>Smaili</last></author>
+      <author id="kamel-smaili"><first>Kamel</first><last>Smaili</last></author>
       <pages>157–165</pages>
       <url hash="9f686c75">W17-1319</url>
       <doi>10.18653/v1/W17-1319</doi>
@@ -2344,7 +2344,7 @@
       <title><fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies for <fixed-case>A</fixed-case>rabic</title>
       <author><first>Dima</first><last>Taji</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <pages>166–176</pages>
       <url hash="99bab123">W17-1320</url>
       <doi>10.18653/v1/W17-1320</doi>
@@ -2377,7 +2377,7 @@
     <meta>
       <booktitle>Proceedings of the 6th Workshop on <fixed-case>B</fixed-case>alto-<fixed-case>S</fixed-case>lavic Natural Language Processing</booktitle>
       <url hash="7ce7fb51">W17-14</url>
-      <editor><first>Tomaž</first><last>Erjavec</last></editor>
+      <editor id="tomaz-erjavec"><first>Tomaž</first><last>Erjavec</last></editor>
       <editor><first>Jakub</first><last>Piskorski</last></editor>
       <editor><first>Lidia</first><last>Pivovarova</last></editor>
       <editor><first>Jan</first><last>Šnajder</last></editor>
@@ -2427,7 +2427,7 @@
     <paper id="4">
       <title>Projecting Multiword Expression Resources on a <fixed-case>P</fixed-case>olish Treebank</title>
       <author><first>Agata</first><last>Savary</last></author>
-      <author><first>Jakub</first><last>Waszczuk</last></author>
+      <author id="jakub-waszczuk"><first>Jakub</first><last>Waszczuk</last></author>
       <pages>20–26</pages>
       <url hash="2caf526f">W17-1404</url>
       <doi>10.18653/v1/W17-1404</doi>
@@ -2457,9 +2457,9 @@
     </paper>
     <paper id="7">
       <title><fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies for <fixed-case>S</fixed-case>erbian in Comparison with <fixed-case>C</fixed-case>roatian and Other <fixed-case>S</fixed-case>lavic Languages</title>
-      <author><first>Tanja</first><last>Samardžić</last></author>
+      <author id="tanja-samardzic"><first>Tanja</first><last>Samardžić</last></author>
       <author><first>Mirjana</first><last>Starović</last></author>
-      <author><first>Željko</first><last>Agić</last></author>
+      <author id="zeljko-agic"><first>Željko</first><last>Agić</last></author>
       <author><first>Nikola</first><last>Ljubešić</last></author>
       <pages>39–44</pages>
       <url hash="20f1b36b">W17-1407</url>
@@ -2606,7 +2606,7 @@
     </frontmatter>
     <paper id="1">
       <title>Use Generalized Representations, But Do Not Forget Surface Features</title>
-      <author><first>Nafise Sadat</first><last>Moosavi</last></author>
+      <author id="nafise-sadat-moosavi"><first>Nafise Sadat</first><last>Moosavi</last></author>
       <author><first>Michael</first><last>Strube</last></author>
       <pages>1–7</pages>
       <url hash="7697b515">W17-1501</url>
@@ -2618,8 +2618,8 @@
       <title>Enriching <fixed-case>B</fixed-case>asque Coreference Resolution System using Semantic Knowledge sources</title>
       <author><first>Ander</first><last>Soraluze</last></author>
       <author><first>Olatz</first><last>Arregi</last></author>
-      <author><first>Xabier</first><last>Arregi</last></author>
-      <author><first>Arantza</first><last>Díaz de Ilarraza</last></author>
+      <author id="xabier-arregi"><first>Xabier</first><last>Arregi</last></author>
+      <author id="arantza-diaz-de-ilarraza"><first>Arantza</first><last>Díaz de Ilarraza</last></author>
       <pages>8–16</pages>
       <url hash="6783baa1">W17-1502</url>
       <doi>10.18653/v1/W17-1502</doi>
@@ -2648,8 +2648,8 @@
     </paper>
     <paper id="5">
       <title>Using Coreference Links to Improve <fixed-case>S</fixed-case>panish-to-<fixed-case>E</fixed-case>nglish Machine Translation</title>
-      <author><first>Lesly</first><last>Miculicich Werlen</last></author>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="lesly-miculicich-werlen"><first>Lesly</first><last>Miculicich Werlen</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <pages>30–40</pages>
       <url hash="d0c61886">W17-1505</url>
       <doi>10.18653/v1/W17-1505</doi>
@@ -2679,7 +2679,7 @@
       <title>Projection-based Coreference Resolution Using Deep Syntax</title>
       <author><first>Michal</first><last>Novák</last></author>
       <author><first>Anna</first><last>Nedoluzhko</last></author>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
       <pages>56–64</pages>
       <url hash="4b0c7035">W17-1508</url>
       <doi>10.18653/v1/W17-1508</doi>
@@ -2692,11 +2692,11 @@
       <booktitle>Proceedings of the First <fixed-case>ACL</fixed-case> Workshop on Ethics in Natural Language Processing</booktitle>
       <url hash="5b5ad02e">W17-16</url>
       <editor><first>Dirk</first><last>Hovy</last></editor>
-      <editor><first>Shannon</first><last>Spruit</last></editor>
+      <editor id="shannon-l-spruit"><first>Shannon</first><last>Spruit</last></editor>
       <editor><first>Margaret</first><last>Mitchell</last></editor>
-      <editor><first>Emily M.</first><last>Bender</last></editor>
+      <editor id="emily-m-bender"><first>Emily M.</first><last>Bender</last></editor>
       <editor><first>Michael</first><last>Strube</last></editor>
-      <editor><first>Hanna</first><last>Wallach</last></editor>
+      <editor id="hanna-wallach"><first>Hanna</first><last>Wallach</last></editor>
       <doi>10.18653/v1/W17-16</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Valencia, Spain</address>
@@ -2738,7 +2738,7 @@
     </paper>
     <paper id="4">
       <title>Ethical by Design: Ethics Best Practices for Natural Language Processing</title>
-      <author><first>Jochen L.</first><last>Leidner</last></author>
+      <author id="jochen-l-leidner"><first>Jochen L.</first><last>Leidner</last></author>
       <author><first>Vassilis</first><last>Plachouras</last></author>
       <pages>30–40</pages>
       <url hash="ad5dbabe">W17-1604</url>
@@ -2781,7 +2781,7 @@
     </paper>
     <paper id="8">
       <title>Ethical Considerations in <fixed-case>NLP</fixed-case> Shared Tasks</title>
-      <author><first>Carla</first><last>Parra Escartín</last></author>
+      <author id="carla-parra-escartin"><first>Carla</first><last>Parra Escartín</last></author>
       <author><first>Wessel</first><last>Reijers</last></author>
       <author><first>Teresa</first><last>Lynn</last></author>
       <author><first>Joss</first><last>Moorkens</last></author>
@@ -2806,9 +2806,9 @@
     </paper>
     <paper id="10">
       <title>A Short Review of Ethical Challenges in Clinical Natural Language Processing</title>
-      <author><first>Simon</first><last>Šuster</last></author>
+      <author id="simon-suster"><first>Simon</first><last>Šuster</last></author>
       <author><first>Stéphan</first><last>Tulkens</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>80–87</pages>
       <url hash="586abf15">W17-1610</url>
       <doi>10.18653/v1/W17-1610</doi>
@@ -2840,7 +2840,7 @@
       <author><first>Charese</first><last>Smiley</last></author>
       <author><first>Frank</first><last>Schilder</last></author>
       <author><first>Vassilis</first><last>Plachouras</last></author>
-      <author><first>Jochen L.</first><last>Leidner</last></author>
+      <author id="jochen-l-leidner"><first>Jochen L.</first><last>Leidner</last></author>
       <pages>103–108</pages>
       <url hash="ec8f9af3">W17-1613</url>
       <doi>10.18653/v1/W17-1613</doi>
@@ -2852,7 +2852,7 @@
     <meta>
       <booktitle>Proceedings of the 13th Workshop on Multiword Expressions (<fixed-case>MWE</fixed-case> 2017)</booktitle>
       <url hash="af17a21d">W17-17</url>
-      <editor><first>Stella</first><last>Markantonatou</last></editor>
+      <editor id="stella-markantonatou"><first>Stella</first><last>Markantonatou</last></editor>
       <editor><first>Carlos</first><last>Ramisch</last></editor>
       <editor><first>Agata</first><last>Savary</last></editor>
       <editor><first>Veronika</first><last>Vincze</last></editor>
@@ -2869,7 +2869,7 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>P</fixed-case>ara<fixed-case>D</fixed-case>i: Dictionary of Paraphrases of <fixed-case>C</fixed-case>zech Complex Predicates with Light Verbs</title>
-      <author><first>Petra</first><last>Barančíková</last></author>
+      <author id="petra-barancikova"><first>Petra</first><last>Barančíková</last></author>
       <author><first>Václava</first><last>Kettnerová</last></author>
       <pages>1–10</pages>
       <url hash="36ac12ca">W17-1701</url>
@@ -2892,8 +2892,8 @@
     <paper id="3">
       <title>Using bilingual word-embeddings for multilingual collocation extraction</title>
       <author><first>Marcos</first><last>Garcia</last></author>
-      <author><first>Marcos</first><last>García-Salido</last></author>
-      <author><first>Margarita</first><last>Alonso-Ramos</last></author>
+      <author id="marcos-garcia-salido"><first>Marcos</first><last>García-Salido</last></author>
+      <author id="margarita-alonso-ramos"><first>Margarita</first><last>Alonso-Ramos</last></author>
       <pages>21–30</pages>
       <url hash="43da10d3">W17-1703</url>
       <doi>10.18653/v1/W17-1703</doi>
@@ -2904,13 +2904,13 @@
       <title>The <fixed-case>PARSEME</fixed-case> Shared Task on Automatic Identification of Verbal Multiword Expressions</title>
       <author><first>Agata</first><last>Savary</last></author>
       <author><first>Carlos</first><last>Ramisch</last></author>
-      <author><first>Silvio</first><last>Cordeiro</last></author>
+      <author id="silvio-cordeiro"><first>Silvio</first><last>Cordeiro</last></author>
       <author><first>Federico</first><last>Sangati</last></author>
       <author><first>Veronika</first><last>Vincze</last></author>
-      <author><first>Behrang</first><last>QasemiZadeh</last></author>
-      <author><first>Marie</first><last>Candito</last></author>
+      <author id="behrang-qasemizadeh"><first>Behrang</first><last>QasemiZadeh</last></author>
+      <author id="marie-candito"><first>Marie</first><last>Candito</last></author>
       <author><first>Fabienne</first><last>Cap</last></author>
-      <author><first>Voula</first><last>Giouli</last></author>
+      <author id="voula-giouli"><first>Voula</first><last>Giouli</last></author>
       <author><first>Ivelina</first><last>Stoyanova</last></author>
       <author><first>Antoine</first><last>Doucet</last></author>
       <pages>31–47</pages>
@@ -2921,7 +2921,7 @@
     </paper>
     <paper id="5">
       <title><fixed-case>US</fixed-case>zeged: Identifying Verbal Multiword Expressions with <fixed-case>POS</fixed-case> Tagging and Parsing Techniques</title>
-      <author><first>Katalin Ilona</first><last>Simkó</last></author>
+      <author id="katalin-ilona-simko"><first>Katalin Ilona</first><last>Simkó</last></author>
       <author><first>Viktória</first><last>Kovács</last></author>
       <author><first>Veronika</first><last>Vincze</last></author>
       <pages>48–53</pages>
@@ -2934,7 +2934,7 @@
       <title>Parsing and <fixed-case>MWE</fixed-case> Detection: Fips at the <fixed-case>PARSEME</fixed-case> Shared Task</title>
       <author><first>Vasiliki</first><last>Foufi</last></author>
       <author><first>Luka</first><last>Nerima</last></author>
-      <author><first>Éric</first><last>Wehrli</last></author>
+      <author id="eric-wehrli"><first>Éric</first><last>Wehrli</last></author>
       <pages>54–59</pages>
       <url hash="970585aa">W17-1706</url>
       <doi>10.18653/v1/W17-1706</doi>
@@ -2943,7 +2943,7 @@
     </paper>
     <paper id="7">
       <title>Neural Networks for Multi-Word Expression Detection</title>
-      <author><first>Natalia</first><last>Klyueva</last></author>
+      <author id="natalia-klyueva"><first>Natalia</first><last>Klyueva</last></author>
       <author><first>Antoine</first><last>Doucet</last></author>
       <author><first>Milan</first><last>Straka</last></author>
       <pages>60–65</pages>
@@ -2955,7 +2955,7 @@
     <paper id="8">
       <title>Factoring Ambiguity out of the Prediction of Compositionality for <fixed-case>G</fixed-case>erman Multi-Word Expressions</title>
       <author><first>Stefan</first><last>Bott</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>66–72</pages>
       <url hash="fd546d6b">W17-1708</url>
       <doi>10.18653/v1/W17-1708</doi>
@@ -2974,7 +2974,7 @@
     <paper id="10">
       <title>Understanding Idiomatic Variation</title>
       <author><first>Kristina</first><last>Geeraert</last></author>
-      <author><first>R. Harald</first><last>Baayen</last></author>
+      <author id="harald-baayen"><first>R. Harald</first><last>Baayen</last></author>
       <author><first>John</first><last>Newman</last></author>
       <pages>80–90</pages>
       <url hash="b6e1657f">W17-1710</url>
@@ -3024,11 +3024,11 @@
     </paper>
     <paper id="15">
       <title>Detection of Verbal Multi-Word Expressions via Conditional Random Fields with Syntactic Dependency Features and Semantic Re-Ranking</title>
-      <author><first>Alfredo</first><last>Maldonado</last></author>
+      <author id="alfredo-maldonado"><first>Alfredo</first><last>Maldonado</last></author>
       <author><first>Lifeng</first><last>Han</last></author>
       <author><first>Erwan</first><last>Moreau</last></author>
       <author><first>Ashjan</first><last>Alsulaimani</last></author>
-      <author><first>Koel Dutta</first><last>Chowdhury</last></author>
+      <author id="koel-dutta-chowdhury"><first>Koel Dutta</first><last>Chowdhury</last></author>
       <author><first>Carl</first><last>Vogel</last></author>
       <author><first>Qun</first><last>Liu</last></author>
       <pages>114–120</pages>
@@ -3039,10 +3039,10 @@
     </paper>
     <paper id="16">
       <title>A data-driven approach to verbal multiword expression detection. <fixed-case>PARSEME</fixed-case> Shared Task system description paper</title>
-      <author><first>Tiberiu</first><last>Boros</last></author>
+      <author id="tiberiu-boros"><first>Tiberiu</first><last>Boros</last></author>
       <author><first>Sonia</first><last>Pipa</last></author>
-      <author><first>Verginica</first><last>Barbu Mititelu</last></author>
-      <author><first>Dan</first><last>Tufis</last></author>
+      <author id="verginica-barbu-mititelu"><first>Verginica</first><last>Barbu Mititelu</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufis</last></author>
       <pages>121–126</pages>
       <url hash="d46a3c6d">W17-1716</url>
       <doi>10.18653/v1/W17-1716</doi>
@@ -3052,8 +3052,8 @@
     <paper id="17">
       <title>The <fixed-case>ATILF</fixed-case>-<fixed-case>LLF</fixed-case> System for Parseme Shared Task: a Transition-based Verbal Multiword Expression Tagger</title>
       <author><first>Hazem</first><last>Al Saied</last></author>
-      <author><first>Matthieu</first><last>Constant</last></author>
-      <author><first>Marie</first><last>Candito</last></author>
+      <author id="matthieu-constant"><first>Matthieu</first><last>Constant</last></author>
+      <author id="marie-candito"><first>Marie</first><last>Candito</last></author>
       <pages>127–132</pages>
       <url hash="3cf2c787">W17-1717</url>
       <doi>10.18653/v1/W17-1717</doi>
@@ -3064,7 +3064,7 @@
       <title>Investigating the Opacity of Verb-Noun Multiword Expression Usages in Context</title>
       <author><first>Shiva</first><last>Taslimipoor</last></author>
       <author><first>Omid</first><last>Rohanian</last></author>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <author><first>Afsaneh</first><last>Fazly</last></author>
       <pages>133–138</pages>
       <url hash="fc5a3843">W17-1718</url>
@@ -3076,7 +3076,7 @@
       <title>Compositionality in Verb-Particle Constructions</title>
       <author><first>Archna</first><last>Bhatia</last></author>
       <author><first>Choh Man</first><last>Teng</last></author>
-      <author><first>James</first><last>Allen</last></author>
+      <author id="james-allen"><first>James</first><last>Allen</last></author>
       <pages>139–148</pages>
       <url hash="ecf962d2">W17-1719</url>
       <doi>10.18653/v1/W17-1719</doi>
@@ -3086,10 +3086,10 @@
     <paper id="20">
       <title>Rule-Based Translation of <fixed-case>S</fixed-case>panish Verb-Noun Combinations into <fixed-case>B</fixed-case>asque</title>
       <author><first>Uxoa</first><last>Iñurrieta</last></author>
-      <author><first>Itziar</first><last>Aduriz</last></author>
-      <author><first>Arantza</first><last>Díaz de Ilarraza</last></author>
-      <author><first>Gorka</first><last>Labaka</last></author>
-      <author><first>Kepa</first><last>Sarasola</last></author>
+      <author id="itziar-aduriz"><first>Itziar</first><last>Aduriz</last></author>
+      <author id="arantza-diaz-de-ilarraza"><first>Arantza</first><last>Díaz de Ilarraza</last></author>
+      <author id="gorka-labaka"><first>Gorka</first><last>Labaka</last></author>
+      <author id="kepa-sarasola"><first>Kepa</first><last>Sarasola</last></author>
       <pages>149–154</pages>
       <url hash="a6fe4b99">W17-1720</url>
       <doi>10.18653/v1/W17-1720</doi>
@@ -3107,7 +3107,7 @@
     </paper>
     <paper id="22">
       <title>Simple Compound Splitting for <fixed-case>G</fixed-case>erman</title>
-      <author><first>Marion</first><last>Weller-Di Marco</last></author>
+      <author id="marion-weller-di-marco"><first>Marion</first><last>Weller-Di Marco</last></author>
       <pages>161–166</pages>
       <url hash="9a8581c5">W17-1722</url>
       <doi>10.18653/v1/W17-1722</doi>
@@ -3126,8 +3126,8 @@
     </paper>
     <paper id="24">
       <title>Comparing Recurring Lexico-Syntactic Trees (<fixed-case>RLT</fixed-case>s) and Ngram Techniques for Extended Phraseology Extraction</title>
-      <author><first>Agnès</first><last>Tutin</last></author>
-      <author><first>Olivier</first><last>Kraif</last></author>
+      <author id="agnes-tutin"><first>Agnès</first><last>Tutin</last></author>
+      <author id="olivier-kraif"><first>Olivier</first><last>Kraif</last></author>
       <pages>176–180</pages>
       <url hash="5c3bf995">W17-1724</url>
       <doi>10.18653/v1/W17-1724</doi>
@@ -3136,8 +3136,8 @@
     </paper>
     <paper id="25">
       <title>Benchmarking Joint Lexical and Syntactic Analysis on Multiword-Rich Data</title>
-      <author><first>Matthieu</first><last>Constant</last></author>
-      <author><first>Héctor</first><last>Martinez Alonso</last></author>
+      <author id="matthieu-constant"><first>Matthieu</first><last>Constant</last></author>
+      <author id="hector-martinez-alonso"><first>Héctor</first><last>Martinez Alonso</last></author>
       <pages>181–186</pages>
       <url hash="c2e13338">W17-1725</url>
       <doi>10.18653/v1/W17-1725</doi>
@@ -3148,7 +3148,7 @@
       <title>Semi-Automated Resolution of Inconsistency for a Harmonized Multiword Expression and Dependency Parse Annotation</title>
       <author><first>King</first><last>Chan</last></author>
       <author><first>Julian</first><last>Brooke</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>187–193</pages>
       <url hash="e398df73">W17-1726</url>
       <doi>10.18653/v1/W17-1726</doi>
@@ -3168,7 +3168,7 @@
     <paper id="28">
       <title>Complex Verbs are Different: Exploring the Visual Modality in Multi-Modal Models to Predict Compositionality</title>
       <author><first>Maximilian</first><last>Köper</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>200–206</pages>
       <url hash="23b250f5">W17-1728</url>
       <doi>10.18653/v1/W17-1728</doi>
@@ -3182,7 +3182,7 @@
       <url hash="18715fbe">W17-18</url>
       <editor><first>Eduardo</first><last>Blanco</last></editor>
       <editor><first>Roser</first><last>Morante</last></editor>
-      <editor><first>Roser</first><last>Saurí</last></editor>
+      <editor id="roser-sauri"><first>Roser</first><last>Saurí</last></editor>
       <doi>10.18653/v1/W17-18</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Valencia, Spain</address>
@@ -3198,9 +3198,9 @@
       <title>Understanding the Semantics of Narratives of Interpersonal Violence through Reader Annotations and Physiological Reactions</title>
       <author><first>Alexander</first><last>Calderwood</last></author>
       <author><first>Elizabeth A.</first><last>Pruett</last></author>
-      <author><first>Raymond</first><last>Ptucha</last></author>
+      <author id="raymond-ptucha"><first>Raymond</first><last>Ptucha</last></author>
       <author><first>Christopher</first><last>Homan</last></author>
-      <author><first>Cecilia</first><last>Ovesdotter Alm</last></author>
+      <author id="cecilia-ovesdotter-alm"><first>Cecilia</first><last>Ovesdotter Alm</last></author>
       <pages>1–9</pages>
       <url hash="760998b5">W17-1801</url>
       <doi>10.18653/v1/W17-1801</doi>
@@ -3210,7 +3210,7 @@
     <paper id="2">
       <title>Intension, Attitude, and Tense Annotation in a High-Fidelity Semantic Representation</title>
       <author><first>Gene</first><last>Kim</last></author>
-      <author><first>Lenhart</first><last>Schubert</last></author>
+      <author id="lenhart-schubert"><first>Lenhart</first><last>Schubert</last></author>
       <pages>10–15</pages>
       <url hash="2949b843">W17-1802</url>
       <doi>10.18653/v1/W17-1802</doi>
@@ -3232,7 +3232,7 @@
       <author><first>Federico</first><last>Fancellu</last></author>
       <author><first>Siva</first><last>Reddy</last></author>
       <author><first>Adam</first><last>Lopez</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <pages>22–32</pages>
       <url hash="22ec7b1d">W17-1804</url>
       <doi>10.18653/v1/W17-1804</doi>
@@ -3255,7 +3255,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <title>The Scope and Focus of Negation: A Complete Annotation Framework for <fixed-case>I</fixed-case>talian</title>
       <author><first>Begoña</first><last>Altuna</last></author>
       <author><first>Anne-Lyse</first><last>Minard</last></author>
-      <author><first>Manuela</first><last>Speranza</last></author>
+      <author id="manuela-speranza"><first>Manuela</first><last>Speranza</last></author>
       <pages>34–42</pages>
       <url hash="ce92b783">W17-1806</url>
       <doi>10.18653/v1/W17-1806</doi>
@@ -3264,9 +3264,9 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="7">
       <title>Annotation of negation in the <fixed-case>IULA</fixed-case> <fixed-case>S</fixed-case>panish Clinical Record Corpus</title>
-      <author><first>Montserrat</first><last>Marimon</last></author>
+      <author id="montserrat-marimon"><first>Montserrat</first><last>Marimon</last></author>
       <author><first>Jorge</first><last>Vivaldi</last></author>
-      <author><first>Núria</first><last>Bel</last></author>
+      <author id="nuria-bel"><first>Núria</first><last>Bel</last></author>
       <pages>43–52</pages>
       <url hash="79602598">W17-1807</url>
       <doi>10.18653/v1/W17-1807</doi>
@@ -3275,9 +3275,9 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="8">
       <title>Annotating Negation in <fixed-case>S</fixed-case>panish Clinical Texts</title>
-      <author><first>Noa</first><last>Cruz</last></author>
+      <author id="noa-p-cruz-diaz"><first>Noa</first><last>Cruz</last></author>
       <author><first>Roser</first><last>Morante</last></author>
-      <author><first>Manuel J.</first><last>Maña López</last></author>
+      <author id="manuel-j-mana-lopez"><first>Manuel J.</first><last>Maña López</last></author>
       <author><first>Jacinto</first><last>Mata Vázquez</last></author>
       <author><first>Carlos L.</first><last>Parra Calderón</last></author>
       <pages>53–58</pages>
@@ -3290,7 +3290,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <title>Neural Networks for Negation Cue Detection in <fixed-case>C</fixed-case>hinese</title>
       <author><first>Hangfeng</first><last>He</last></author>
       <author><first>Federico</first><last>Fancellu</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <pages>59–63</pages>
       <url hash="858228d0">W17-1809</url>
       <doi>10.18653/v1/W17-1809</doi>
@@ -3301,7 +3301,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <title>An open-source tool for negation detection: a maximum-margin approach</title>
       <author><first>Martine</first><last>Enger</last></author>
       <author><first>Erik</first><last>Velldal</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <pages>64–69</pages>
       <url hash="eb93751b">W17-1810</url>
       <doi>10.18653/v1/W17-1810</doi>
@@ -3313,7 +3313,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <meta>
       <booktitle>Proceedings of the 1st Workshop on Sense, Concept and Entity Representations and their Applications</booktitle>
       <url hash="041c8e2a">W17-19</url>
-      <editor><first>Jose</first><last>Camacho-Collados</last></editor>
+      <editor id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></editor>
       <editor><first>Mohammad Taher</first><last>Pilehvar</last></editor>
       <doi>10.18653/v1/W17-19</doi>
       <publisher>Association for Computational Linguistics</publisher>
@@ -3329,7 +3329,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="1">
       <title>Compositional Semantics using Feature-Based Models from <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et</title>
       <author><first>Pablo</first><last>Gamallo</last></author>
-      <author><first>Martín</first><last>Pereira-Fariña</last></author>
+      <author id="martin-pereira-farina"><first>Martín</first><last>Pereira-Fariña</last></author>
       <pages>1–11</pages>
       <url hash="ff3228a7">W17-1901</url>
       <doi>10.18653/v1/W17-1901</doi>
@@ -3351,7 +3351,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="3">
       <title>Improving Verb Metaphor Detection by Propagating Abstractness to Words, Phrases and Individual Senses</title>
       <author><first>Maximilian</first><last>Köper</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>24–30</pages>
       <url hash="a5a18345">W17-1903</url>
       <doi>10.18653/v1/W17-1903</doi>
@@ -3362,7 +3362,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <title>Improving Clinical Diagnosis Inference through Integration of Structured and Unstructured Knowledge</title>
       <author><first>Yuan</first><last>Ling</last></author>
       <author><first>Yuan</first><last>An</last></author>
-      <author><first>Sadid</first><last>Hasan</last></author>
+      <author id="sadid-a-hasan"><first>Sadid</first><last>Hasan</last></author>
       <pages>31–36</pages>
       <url hash="9e125ef9">W17-1904</url>
       <doi>10.18653/v1/W17-1904</doi>
@@ -3373,7 +3373,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <title>Classifying Lexical-semantic Relationships by Exploiting Sense/Concept Representations</title>
       <author><first>Kentaro</first><last>Kanada</last></author>
       <author><first>Tetsunori</first><last>Kobayashi</last></author>
-      <author><first>Yoshihiko</first><last>Hayashi</last></author>
+      <author id="yoshihiko-hayashi"><first>Yoshihiko</first><last>Hayashi</last></author>
       <pages>37–46</pages>
       <url hash="4bbdb6d6">W17-1905</url>
       <doi>10.18653/v1/W17-1905</doi>
@@ -3405,7 +3405,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <title>Creating and Validating Multilingual Semantic Representations for Six Languages: Expert versus Non-Expert Crowds</title>
       <author><first>Mahmoud</first><last>El-Haj</last></author>
       <author><first>Paul</first><last>Rayson</last></author>
-      <author><first>Scott</first><last>Piao</last></author>
+      <author id="scott-s-l-piao"><first>Scott</first><last>Piao</last></author>
       <author><first>Stephen</first><last>Wattam</last></author>
       <pages>61–71</pages>
       <url hash="08d23698">W17-1908</url>
@@ -3417,8 +3417,8 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <title>Using Linked Disambiguated Distributional Networks for Word Sense Disambiguation</title>
       <author><first>Alexander</first><last>Panchenko</last></author>
       <author><first>Stefano</first><last>Faralli</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>72–78</pages>
       <url hash="0ad68776">W17-1909</url>
       <doi>10.18653/v1/W17-1909</doi>
@@ -3431,7 +3431,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <author><first>Julie</first><last>Weeds</last></author>
       <author><first>John</first><last>Wilkie</last></author>
       <author><first>Jeremy</first><last>Reffin</last></author>
-      <author><first>David</first><last>Weir</last></author>
+      <author id="david-weir"><first>David</first><last>Weir</last></author>
       <pages>79–90</pages>
       <url hash="65dea86b">W17-1910</url>
       <doi>10.18653/v1/W17-1910</doi>
@@ -3441,7 +3441,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="11">
       <title>Elucidating Conceptual Properties from Word Embeddings</title>
       <author><first>Kyoung-Rok</first><last>Jang</last></author>
-      <author><first>Sung-Hyon</first><last>Myaeng</last></author>
+      <author id="sung-hyon-myaeng"><first>Sung-Hyon</first><last>Myaeng</last></author>
       <pages>91–95</pages>
       <url hash="e7de8596">W17-1911</url>
       <doi>10.18653/v1/W17-1911</doi>
@@ -3495,7 +3495,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <meta>
       <booktitle>Proceedings of the Sixth Workshop on Vision and Language</booktitle>
       <url hash="8a974f2d">W17-20</url>
-      <editor><first>Anya</first><last>Belz</last></editor>
+      <editor id="anja-belz"><first>Anya</first><last>Belz</last></editor>
       <editor><first>Erkut</first><last>Erdem</last></editor>
       <editor><first>Katerina</first><last>Pastra</last></editor>
       <editor><first>Krystian</first><last>Mikolajczyk</last></editor>
@@ -3534,9 +3534,9 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="3">
       <title>Learning to Recognize Animals by Watching Documentaries: Using Subtitles as Weak Supervision</title>
-      <author><first>Aparna</first><last>Nurani Venkitasubramanian</last></author>
+      <author id="aparna-nurani-venkitasubramanian"><first>Aparna</first><last>Nurani Venkitasubramanian</last></author>
       <author><first>Tinne</first><last>Tuytelaars</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>21–30</pages>
       <url hash="17837cc5">W17-2003</url>
       <doi>10.18653/v1/W17-2003</doi>
@@ -3596,12 +3596,12 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <meta>
       <booktitle>Proceedings of the Joint <fixed-case>SIGHUM</fixed-case> Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature</booktitle>
       <url hash="d35497b9">W17-22</url>
-      <editor><first>Beatrice</first><last>Alex</last></editor>
+      <editor id="beatrice-alex"><first>Beatrice</first><last>Alex</last></editor>
       <editor><first>Stefania</first><last>Degaetano-Ortlieb</last></editor>
       <editor><first>Anna</first><last>Feldman</last></editor>
       <editor><first>Anna</first><last>Kazantseva</last></editor>
       <editor><first>Nils</first><last>Reiter</last></editor>
-      <editor><first>Stan</first><last>Szpakowicz</last></editor>
+      <editor id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></editor>
       <doi>10.18653/v1/W17-22</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Vancouver, Canada</address>
@@ -3616,7 +3616,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="1">
       <title>Metaphor Detection in a Poetry Corpus</title>
       <author><first>Vaibhav</first><last>Kesarwani</last></author>
-      <author><first>Diana</first><last>Inkpen</last></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last></author>
       <author><first>Stan</first><last>Szpakowicz</last></author>
       <author><first>Chris</first><last>Tanasescu</last></author>
       <pages>1–9</pages>
@@ -3640,7 +3640,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="3">
       <title>Investigating the Relationship between Literary Genres and Emotional Plot Development</title>
       <author><first>Evgeny</first><last>Kim</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <author><first>Roman</first><last>Klinger</last></author>
       <pages>17–26</pages>
       <url hash="fcd861aa">W17-2203</url>
@@ -3650,7 +3650,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="4">
       <title>Enjambment Detection in a Large Diachronic Corpus of <fixed-case>S</fixed-case>panish Sonnets</title>
-      <author><first>Pablo</first><last>Ruiz Fabo</last></author>
+      <author id="pablo-ruiz-fabo"><first>Pablo</first><last>Ruiz Fabo</last></author>
       <author><first>Clara</first><last>Martínez Cantón</last></author>
       <author><first>Thierry</first><last>Poibeau</last></author>
       <author><first>Elena</first><last>González-Blanco</last></author>
@@ -3694,7 +3694,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="8">
       <title>An End-to-end Environment for Research Question-Driven Entity Extraction and Network Analysis</title>
-      <author><first>Andre</first><last>Blessing</last></author>
+      <author id="andre-blessing"><first>Andre</first><last>Blessing</last></author>
       <author><first>Nora</first><last>Echelmeyer</last></author>
       <author><first>Markus</first><last>John</last></author>
       <author><first>Nils</first><last>Reiter</last></author>
@@ -3717,7 +3717,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="10">
       <title>Finding a Character’s Voice: Stylome Classification on Literary Characters</title>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <author><first>Ana Sabina</first><last>Uban</last></author>
       <pages>78–82</pages>
       <url hash="b93f88d9">W17-2210</url>
@@ -3737,7 +3737,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="12">
       <title>Speeding up corpus development for linguistic research: language documentation and acquisition in <fixed-case>R</fixed-case>omansh Tuatschin</title>
       <author><first>Géraldine</first><last>Walther</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <pages>89–94</pages>
       <url hash="936b5e5f">W17-2212</url>
       <doi>10.18653/v1/W17-2212</doi>
@@ -3761,7 +3761,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="14">
       <title>A Dataset for <fixed-case>S</fixed-case>anskrit Word Segmentation</title>
       <author><first>Amrith</first><last>Krishna</last></author>
-      <author><first>Pavan Kumar</first><last>Satuluri</last></author>
+      <author id="pavankumar-satuluri"><first>Pavan Kumar</first><last>Satuluri</last></author>
       <author><first>Pawan</first><last>Goyal</last></author>
       <pages>105–114</pages>
       <url hash="6a19f3e8">W17-2214</url>
@@ -3784,10 +3784,10 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <meta>
       <booktitle>Proceedings of the 16th <fixed-case>B</fixed-case>io<fixed-case>NLP</fixed-case> Workshop</booktitle>
       <url hash="7b6d839b">W17-23</url>
-      <editor><first>Kevin Bretonnel</first><last>Cohen</last></editor>
+      <editor id="k-bretonnel-cohen"><first>Kevin Bretonnel</first><last>Cohen</last></editor>
       <editor><first>Dina</first><last>Demner-Fushman</last></editor>
       <editor><first>Sophia</first><last>Ananiadou</last></editor>
-      <editor><first>Junichi</first><last>Tsujii</last></editor>
+      <editor id="junichi-tsujii"><first>Junichi</first><last>Tsujii</last></editor>
       <doi>10.18653/v1/W17-23</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Vancouver, Canada,</address>
@@ -3826,8 +3826,8 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="3">
       <title>Insights into Analogy Completion from the Biomedical Domain</title>
       <author><first>Denis</first><last>Newman-Griffis</last></author>
-      <author><first>Albert</first><last>Lai</last></author>
-      <author><first>Eric</first><last>Fosler-Lussier</last></author>
+      <author id="albert-m-lai"><first>Albert</first><last>Lai</last></author>
+      <author id="eric-fosler-lussier"><first>Eric</first><last>Fosler-Lussier</last></author>
       <pages>19–28</pages>
       <url hash="2778818f">W17-2303</url>
       <doi>10.18653/v1/W17-2303</doi>
@@ -3836,7 +3836,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="4">
       <title>Deep learning for extracting protein-protein interactions from biomedical literature</title>
-      <author><first>Yifan</first><last>Peng</last></author>
+      <author id="yifan-peng-cmu"><first>Yifan</first><last>Peng</last></author>
       <author><first>Zhiyong</first><last>Lu</last></author>
       <pages>29–38</pages>
       <url hash="00759987">W17-2304</url>
@@ -3861,7 +3861,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <author><first>Konstantinos</first><last>Bougiatiotis</last></author>
       <author><first>Anastasia</first><last>Krithara</last></author>
       <author><first>Georgios</first><last>Paliouras</last></author>
-      <author><first>Ioannis</first><last>Kakadiaris</last></author>
+      <author id="ioannis-kakadiaris"><first>Ioannis</first><last>Kakadiaris</last></author>
       <pages>48–57</pages>
       <url hash="bf46e5a9">W17-2306</url>
       <doi>10.18653/v1/W17-2306</doi>
@@ -3876,7 +3876,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <author><first>Aditya</first><last>Chandrasekar</last></author>
       <author><first>Zi</first><last>Yang</last></author>
       <author><first>Niloy</first><last>Gupta</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <pages>58–66</pages>
       <url hash="3ec8bb7b">W17-2307</url>
       <doi>10.18653/v1/W17-2307</doi>
@@ -3885,7 +3885,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="8">
       <title><fixed-case>M</fixed-case>acquarie <fixed-case>U</fixed-case>niversity at <fixed-case>B</fixed-case>io<fixed-case>ASQ</fixed-case> 5b – Query-based Summarisation Techniques for Selecting the Ideal Answers</title>
-      <author><first>Diego</first><last>Mollá</last></author>
+      <author id="diego-molla"><first>Diego</first><last>Mollá</last></author>
       <pages>67–75</pages>
       <url hash="bd71a0d2">W17-2308</url>
       <doi>10.18653/v1/W17-2308</doi>
@@ -3920,11 +3920,11 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="11">
       <title>Creation and evaluation of a dictionary-based tagger for virus species and proteins</title>
-      <author><first>Helen</first><last>Cook</last></author>
+      <author id="helen-v-cook"><first>Helen</first><last>Cook</last></author>
       <author><first>Rūdolfs</first><last>Bērziņš</last></author>
       <author><first>Cristina Leal</first><last>Rodrıguez</last></author>
       <author><first>Juan Miguel</first><last>Cejuela</last></author>
-      <author><first>Lars Juhl</first><last>Jensen</last></author>
+      <author id="lars-juhl-jensen"><first>Lars Juhl</first><last>Jensen</last></author>
       <pages>91–98</pages>
       <url hash="85fcf22e">W17-2311</url>
       <doi>10.18653/v1/W17-2311</doi>
@@ -3934,8 +3934,8 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="12">
       <title>Representation of complex terms in a vector space structured by an ontology for a normalization task</title>
       <author><first>Arnaud</first><last>Ferré</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
-      <author><first>Claire</first><last>Nédellec</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="claire-nedellec"><first>Claire</first><last>Nédellec</last></author>
       <pages>99–106</pages>
       <url hash="1b04892a">W17-2312</url>
       <doi>10.18653/v1/W17-2312</doi>
@@ -3944,7 +3944,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="13">
       <title>Improving Correlation with Human Judgments by Integrating Semantic Similarity with Second–Order Vectors</title>
-      <author><first>Bridget</first><last>McInnes</last></author>
+      <author id="bridget-mcinnes"><first>Bridget</first><last>McInnes</last></author>
       <author><first>Ted</first><last>Pedersen</last></author>
       <pages>107–116</pages>
       <url hash="be423f65">W17-2313</url>
@@ -3968,7 +3968,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <author><first>Sudha</first><last>Rao</last></author>
       <author><first>Daniel</first><last>Marcu</last></author>
       <author><first>Kevin</first><last>Knight</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <pages>126–135</pages>
       <url hash="a166611d">W17-2315</url>
       <doi>10.18653/v1/W17-2315</doi>
@@ -3981,7 +3981,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <author><first>Abeed</first><last>Sarker</last></author>
       <author><first>Masoud</first><last>Rouhizadeh</last></author>
       <author><first>Karen</first><last>O’Connor</last></author>
-      <author><first>Graciela</first><last>Gonzalez</last></author>
+      <author id="graciela-gonzalez"><first>Graciela</first><last>Gonzalez</last></author>
       <pages>136–142</pages>
       <url hash="2cf000df">W17-2316</url>
       <doi>10.18653/v1/W17-2316</doi>
@@ -3991,8 +3991,8 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="17">
       <title>Unsupervised Context-Sensitive Spelling Correction of Clinical Free-Text with Word and Character N-Gram Embeddings</title>
       <author><first>Pieter</first><last>Fivez</last></author>
-      <author><first>Simon</first><last>Šuster</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="simon-suster"><first>Simon</first><last>Šuster</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>143–148</pages>
       <url hash="5321a5f0">W17-2317</url>
       <doi>10.18653/v1/W17-2317</doi>
@@ -4002,7 +4002,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="18">
       <title>Characterization of Divergence in Impaired Speech of <fixed-case>ALS</fixed-case> Patients</title>
       <author><first>Archna</first><last>Bhatia</last></author>
-      <author><first>Bonnie</first><last>Dorr</last></author>
+      <author id="bonnie-dorr"><first>Bonnie</first><last>Dorr</last></author>
       <author><first>Kristy</first><last>Hollingshead</last></author>
       <author><first>Samuel L.</first><last>Phillips</last></author>
       <author><first>Barbara</first><last>McKenzie</last></author>
@@ -4015,10 +4015,10 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="19">
       <title>Deep Learning for Punctuation Restoration in Medical Reports</title>
       <author><first>Wael</first><last>Salloum</last></author>
-      <author><first>Greg</first><last>Finley</last></author>
+      <author id="gregory-finley"><first>Greg</first><last>Finley</last></author>
       <author><first>Erik</first><last>Edwards</last></author>
       <author><first>Mark</first><last>Miller</last></author>
-      <author><first>David</first><last>Suendermann-Oeft</last></author>
+      <author id="david-suendermann-oeft"><first>David</first><last>Suendermann-Oeft</last></author>
       <pages>159–164</pages>
       <url hash="71190cfb">W17-2319</url>
       <doi>10.18653/v1/W17-2319</doi>
@@ -4027,7 +4027,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="20">
       <title>Unsupervised Domain Adaptation for Clinical Negation Detection</title>
-      <author><first>Timothy</first><last>Miller</last></author>
+      <author id="timothy-miller"><first>Timothy</first><last>Miller</last></author>
       <author><first>Steven</first><last>Bethard</last></author>
       <author><first>Hadi</first><last>Amiri</last></author>
       <author><first>Guergana</first><last>Savova</last></author>
@@ -4039,12 +4039,12 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="21">
       <title><fixed-case>B</fixed-case>io<fixed-case>C</fixed-case>reative <fixed-case>VI</fixed-case> Precision Medicine Track: creating a training corpus for mining protein-protein interactions affected by mutations</title>
-      <author><first>Rezarta</first><last>Islamaj Doğan</last></author>
+      <author id="rezarta-islamaj-dogan"><first>Rezarta</first><last>Islamaj Doğan</last></author>
       <author><first>Andrew</first><last>Chatr-aryamontri</last></author>
       <author><first>Sun</first><last>Kim</last></author>
       <author><first>Chih-Hsuan</first><last>Wei</last></author>
-      <author><first>Yifan</first><last>Peng</last></author>
-      <author><first>Donald</first><last>Comeau</last></author>
+      <author id="yifan-peng-cmu"><first>Yifan</first><last>Peng</last></author>
+      <author id="donald-c-comeau"><first>Donald</first><last>Comeau</last></author>
       <author><first>Zhiyong</first><last>Lu</last></author>
       <pages>171–175</pages>
       <url hash="3d101687">W17-2321</url>
@@ -4055,7 +4055,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="22">
       <title>Painless Relation Extraction with Kindred</title>
       <author><first>Jake</first><last>Lever</last></author>
-      <author><first>Steven</first><last>Jones</last></author>
+      <author id="steven-jm-jones"><first>Steven</first><last>Jones</last></author>
       <pages>176–183</pages>
       <url hash="37da6480">W17-2322</url>
       <doi>10.18653/v1/W17-2322</doi>
@@ -4066,7 +4066,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <title>Noise Reduction Methods for Distantly Supervised Biomedical Relation Extraction</title>
       <author><first>Gang</first><last>Li</last></author>
       <author><first>Cathy</first><last>Wu</last></author>
-      <author><first>K.</first><last>Vijay-Shanker</last></author>
+      <author id="k-vijay-shanker"><first>K.</first><last>Vijay-Shanker</last></author>
       <pages>184–193</pages>
       <url hash="d16de010">W17-2323</url>
       <doi>10.18653/v1/W17-2323</doi>
@@ -4104,7 +4104,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <author><first>A.S.M. Ashique</first><last>Mahmood</last></author>
       <author><first>Karen</first><last>Ross</last></author>
       <author><first>Cathy</first><last>Wu</last></author>
-      <author><first>K.</first><last>Vijay-Shanker</last></author>
+      <author id="k-vijay-shanker"><first>K.</first><last>Vijay-Shanker</last></author>
       <pages>206–215</pages>
       <url hash="ae45d653">W17-2326</url>
       <doi>10.18653/v1/W17-2326</doi>
@@ -4119,7 +4119,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <author><first>Sophia</first><last>Katrenko</last></author>
       <author><first>Pascal</first><last>Coupet</last></author>
       <author><first>Marius</first><last>Doornenbal</last></author>
-      <author><first>Michelle</first><last>Gregory</last></author>
+      <author id="michelle-gregory"><first>Michelle</first><last>Gregory</last></author>
       <pages>216–221</pages>
       <url hash="6656f415">W17-2327</url>
       <doi>10.18653/v1/W17-2327</doi>
@@ -4153,7 +4153,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="30">
       <title>Protein Word Detection using Text Segmentation Techniques</title>
       <author><first>Devi</first><last>Ganesan</last></author>
-      <author><first>Ashish V.</first><last>Tendulkar</last></author>
+      <author id="ashish-v-tendulkar"><first>Ashish V.</first><last>Tendulkar</last></author>
       <author><first>Sutanu</first><last>Chakraborti</last></author>
       <pages>238–246</pages>
       <url hash="b36be337">W17-2330</url>
@@ -4197,7 +4197,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <title>Evaluating Feature Extraction Methods for Knowledge-based Biomedical Word Sense Disambiguation</title>
       <author><first>Sam</first><last>Henry</last></author>
       <author><first>Clint</first><last>Cuffy</last></author>
-      <author><first>Bridget</first><last>McInnes</last></author>
+      <author id="bridget-mcinnes"><first>Bridget</first><last>McInnes</last></author>
       <pages>272–281</pages>
       <url hash="d696999e">W17-2334</url>
       <doi>10.18653/v1/W17-2334</doi>
@@ -4206,7 +4206,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="35">
       <title>Investigating the Documentation of Electronic Cigarette Use in the Veteran Affairs Electronic Health Record: A Pilot Study</title>
-      <author><first>Danielle</first><last>Mowery</last></author>
+      <author id="danielle-l-mowery"><first>Danielle</first><last>Mowery</last></author>
       <author><first>Brett</first><last>South</last></author>
       <author><first>Olga</first><last>Patterson</last></author>
       <author><first>Shu-Hong</first><last>Zhu</last></author>
@@ -4220,10 +4220,10 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="36">
       <title>Automated Preamble Detection in Dictated Medical Reports</title>
       <author><first>Wael</first><last>Salloum</last></author>
-      <author><first>Greg</first><last>Finley</last></author>
+      <author id="gregory-finley"><first>Greg</first><last>Finley</last></author>
       <author><first>Erik</first><last>Edwards</last></author>
       <author><first>Mark</first><last>Miller</last></author>
-      <author><first>David</first><last>Suendermann-Oeft</last></author>
+      <author id="david-suendermann-oeft"><first>David</first><last>Suendermann-Oeft</last></author>
       <pages>287–295</pages>
       <url hash="f97dead9">W17-2336</url>
       <doi>10.18653/v1/W17-2336</doi>
@@ -4233,7 +4233,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="37">
       <title>A Biomedical Question Answering System in <fixed-case>B</fixed-case>io<fixed-case>ASQ</fixed-case> 2017</title>
       <author><first>Mourad</first><last>Sarrouti</last></author>
-      <author><first>Said</first><last>Ouatik El Alaoui</last></author>
+      <author id="said-ouatik-el-alaoui"><first>Said</first><last>Ouatik El Alaoui</last></author>
       <pages>296–301</pages>
       <url hash="26016268">W17-2337</url>
       <doi>10.18653/v1/W17-2337</doi>
@@ -4245,7 +4245,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <author><first>Kevin</first><last>Patel</last></author>
       <author><first>Divya</first><last>Patel</last></author>
       <author><first>Mansi</first><last>Golakiya</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <author><first>Nilesh</first><last>Birari</last></author>
       <pages>302–306</pages>
       <url hash="a791fd02">W17-2338</url>
@@ -4277,7 +4277,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="41">
       <title>Representations of Time Expressions for Temporal Relation Extraction with Convolutional Neural Networks</title>
       <author><first>Chen</first><last>Lin</last></author>
-      <author><first>Timothy</first><last>Miller</last></author>
+      <author id="timothy-miller"><first>Timothy</first><last>Miller</last></author>
       <author><first>Dmitriy</first><last>Dligach</last></author>
       <author><first>Steven</first><last>Bethard</last></author>
       <author><first>Guergana</first><last>Savova</last></author>
@@ -4290,7 +4290,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="42">
       <title>Automatic Diagnosis Coding of Radiology Reports: A Comparison of Deep Learning and Conventional Classification Methods</title>
       <author><first>Sarvnaz</first><last>Karimi</last></author>
-      <author><first>Xiang</first><last>Dai</last></author>
+      <author id="xiang-dai"><first>Xiang</first><last>Dai</last></author>
       <author><first>Hamed</first><last>Hassanzadeh</last></author>
       <author><first>Anthony</first><last>Nguyen</last></author>
       <pages>328–332</pages>
@@ -4301,9 +4301,9 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="43">
       <title>Automatic classification of doctor-patient questions for a virtual patient record query task</title>
-      <author><first>Leonardo</first><last>Campillos Llanos</last></author>
-      <author><first>Sophie</first><last>Rosset</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="leonardo-campillos-llanos"><first>Leonardo</first><last>Campillos Llanos</last></author>
+      <author id="sophie-rosset"><first>Sophie</first><last>Rosset</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <pages>333–341</pages>
       <url hash="4b1bae3e">W17-2343</url>
       <doi>10.18653/v1/W17-2343</doi>
@@ -4326,7 +4326,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="45">
       <title>Clinical Event Detection with Hybrid Neural Architecture</title>
       <author><first>Adyasha</first><last>Maharana</last></author>
-      <author><first>Meliha</first><last>Yetisgen</last></author>
+      <author id="meliha-yetisgen-yildiz"><first>Meliha</first><last>Yetisgen</last></author>
       <pages>351–355</pages>
       <url hash="214f9c11">W17-2345</url>
       <doi>10.18653/v1/W17-2345</doi>
@@ -4336,8 +4336,8 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="46">
       <title>Extracting Personal Medical Events for User Timeline Construction using Minimal Supervision</title>
       <author><first>Aakanksha</first><last>Naik</last></author>
-      <author><first>Chris</first><last>Bogart</last></author>
-      <author><first>Carolyn</first><last>Rose</last></author>
+      <author id="christopher-bogart"><first>Chris</first><last>Bogart</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rose</last></author>
       <pages>356–364</pages>
       <url hash="2b08a3fd">W17-2346</url>
       <doi>10.18653/v1/W17-2346</doi>
@@ -4380,7 +4380,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <editor><first>Martin</first><last>Riedl</last></editor>
       <editor><first>Swapna</first><last>Somasundaran</last></editor>
       <editor><first>Goran</first><last>Glavaš</last></editor>
-      <editor><first>Eduard</first><last>Hovy</last></editor>
+      <editor id="eduard-hovy"><first>Eduard</first><last>Hovy</last></editor>
       <doi>10.18653/v1/W17-24</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Vancouver, Canada</address>
@@ -4398,7 +4398,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <author><first>Henrique Ferraz</first><last>de Arruda</last></author>
       <author><first>Thales</first><last>Sinelli</last></author>
       <author><first>Luciano da Fontoura</first><last>Costa</last></author>
-      <author><first>Diego Raphael</first><last>Amancio</last></author>
+      <author id="diego-raphael-amancio"><first>Diego Raphael</first><last>Amancio</last></author>
       <pages>1–10</pages>
       <url hash="dc12c4fb">W17-2401</url>
       <doi>10.18653/v1/W17-2401</doi>
@@ -4421,8 +4421,8 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="3">
       <title>Merging knowledge bases in different languages</title>
       <author><first>Jerónimo</first><last>Hernández-González</last></author>
-      <author><first>Estevam R.</first><last>Hruschka Jr.</last></author>
-      <author><first>Tom M.</first><last>Mitchell</last></author>
+      <author id="estevam-r-hruschka-jr"><first>Estevam R.</first><last>Hruschka Jr.</last></author>
+      <author id="tom-mitchell"><first>Tom M.</first><last>Mitchell</last></author>
       <pages>21–29</pages>
       <url hash="29ea1747">W17-2403</url>
       <doi>10.18653/v1/W17-2403</doi>
@@ -4431,7 +4431,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="4">
       <title>Parameter Free Hierarchical Graph-Based Clustering for Analyzing Continuous Word Embeddings</title>
-      <author><first>Thomas Alexander</first><last>Trost</last></author>
+      <author id="thomas-alexander-trost"><first>Thomas Alexander</first><last>Trost</last></author>
       <author><first>Dietrich</first><last>Klakow</last></author>
       <pages>30–38</pages>
       <url hash="65558f5c">W17-2404</url>
@@ -4452,8 +4452,8 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="6">
       <title>Graph Methods for Multilingual <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>ets</title>
-      <author><first>Collin F.</first><last>Baker</last></author>
-      <author><first>Michael</first><last>Ellsworth</last></author>
+      <author id="collin-f-baker"><first>Collin F.</first><last>Baker</last></author>
+      <author id="michael-ellsworth"><first>Michael</first><last>Ellsworth</last></author>
       <pages>45–50</pages>
       <url hash="98ab8356">W17-2406</url>
       <doi>10.18653/v1/W17-2406</doi>
@@ -4474,7 +4474,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="8">
       <title>Work Hard, Play Hard: Email Classification on the Avocado and <fixed-case>E</fixed-case>nron Corpora</title>
       <author><first>Sakhar</first><last>Alkhereyf</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>57–65</pages>
       <url hash="7106a8d3">W17-2408</url>
       <doi>10.18653/v1/W17-2408</doi>
@@ -4484,7 +4484,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="9">
       <title>A Graph Based Semi-Supervised Approach for Analysis of Derivational Nouns in <fixed-case>S</fixed-case>anskrit</title>
       <author><first>Amrith</first><last>Krishna</last></author>
-      <author><first>Pavankumar</first><last>Satuluri</last></author>
+      <author id="pavankumar-satuluri"><first>Pavankumar</first><last>Satuluri</last></author>
       <author><first>Harshavardhan</first><last>Ponnada</last></author>
       <author><first>Muneeb</first><last>Ahmed</last></author>
       <author><first>Gulab</first><last>Arora</last></author>
@@ -4513,7 +4513,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <booktitle>Proceedings of the 10th Workshop on Building and Using Comparable Corpora</booktitle>
       <url hash="3f27b743">W17-25</url>
       <editor><first>Serge</first><last>Sharoff</last></editor>
-      <editor><first>Pierre</first><last>Zweigenbaum</last></editor>
+      <editor id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></editor>
       <editor><first>Reinhard</first><last>Rapp</last></editor>
       <doi>10.18653/v1/W17-25</doi>
       <publisher>Association for Computational Linguistics</publisher>
@@ -4528,7 +4528,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </frontmatter>
     <paper id="1">
       <title>Users and Data: The Two Neglected Children of Bilingual Natural Language Processing Research</title>
-      <author><first>Phillippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Phillippe</first><last>Langlais</last></author>
       <pages>1–5</pages>
       <url hash="dad4c61f">W17-2501</url>
       <doi>10.18653/v1/W17-2501</doi>
@@ -4538,7 +4538,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="2">
       <title>Deep Investigation of Cross-Language Plagiarism Detection Methods</title>
       <author><first>Jérémy</first><last>Ferrero</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <author><first>Didier</first><last>Schwab</last></author>
       <author><first>Frédéric</first><last>Agnès</last></author>
       <pages>6–15</pages>
@@ -4604,7 +4604,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <title>Weighted Set-Theoretic Alignment of Comparable Sentences</title>
       <author><first>Andoni</first><last>Azpeitia</last></author>
       <author><first>Thierry</first><last>Etchegoyhen</last></author>
-      <author><first>Eva</first><last>Martínez Garcia</last></author>
+      <author id="eva-martinez-garcia"><first>Eva</first><last>Martínez Garcia</last></author>
       <pages>41–45</pages>
       <url hash="14ee631d">W17-2508</url>
       <doi>10.18653/v1/W17-2508</doi>
@@ -4614,7 +4614,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="9">
       <title><fixed-case>BUCC</fixed-case> 2017 Shared Task: a First Attempt Toward a Deep Learning Framework for Identifying Parallel Sentences in Comparable Corpora</title>
       <author><first>Francis</first><last>Grégoire</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <pages>46–50</pages>
       <url hash="14c2e3a1">W17-2509</url>
       <doi>10.18653/v1/W17-2509</doi>
@@ -4633,9 +4633,9 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="11">
       <title><fixed-case>BUCC</fixed-case>2017: A Hybrid Approach for Identifying Parallel Sentences in Comparable Corpora</title>
-      <author><first>Sainik</first><last>Mahata</last></author>
+      <author id="sainik-mahata"><first>Sainik</first><last>Mahata</last></author>
       <author><first>Dipankar</first><last>Das</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>56–59</pages>
       <url hash="62126c99">W17-2511</url>
       <doi>10.18653/v1/W17-2511</doi>
@@ -4658,12 +4658,12 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <meta>
       <booktitle>Proceedings of the 2nd Workshop on Representation Learning for <fixed-case>NLP</fixed-case></booktitle>
       <url hash="b5b4ad67">W17-26</url>
-      <editor><first>Phil</first><last>Blunsom</last></editor>
+      <editor id="phil-blunsom"><first>Phil</first><last>Blunsom</last></editor>
       <editor><first>Antoine</first><last>Bordes</last></editor>
       <editor><first>Kyunghyun</first><last>Cho</last></editor>
-      <editor><first>Shay</first><last>Cohen</last></editor>
-      <editor><first>Chris</first><last>Dyer</last></editor>
-      <editor><first>Edward</first><last>Grefenstette</last></editor>
+      <editor id="shay-b-cohen"><first>Shay</first><last>Cohen</last></editor>
+      <editor id="chris-dyer"><first>Chris</first><last>Dyer</last></editor>
+      <editor id="edward-grefenstette"><first>Edward</first><last>Grefenstette</last></editor>
       <editor><first>Karl Moritz</first><last>Hermann</last></editor>
       <editor><first>Laura</first><last>Rimell</last></editor>
       <editor><first>Jason</first><last>Weston</last></editor>
@@ -4716,7 +4716,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="4">
       <title>Emergent Predication Structure in Hidden State Vectors of Neural Readers</title>
       <author><first>Hai</first><last>Wang</last></author>
-      <author><first>Takeshi</first><last>Onishi</last></author>
+      <author id="takashi-onishi"><first>Takeshi</first><last>Onishi</last></author>
       <author><first>Kevin</first><last>Gimpel</last></author>
       <author><first>David</first><last>McAllester</last></author>
       <pages>26–36</pages>
@@ -4728,7 +4728,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="5">
       <title>Towards Harnessing Memory Networks for Coreference Resolution</title>
       <author><first>Joe</first><last>Cheri</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>37–42</pages>
       <url hash="ccee9b03">W17-2605</url>
       <doi>10.18653/v1/W17-2605</doi>
@@ -4773,7 +4773,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="9">
       <title>Knowledge Base Completion: Baselines Strike Back</title>
       <author><first>Rudolf</first><last>Kadlec</last></author>
-      <author><first>Ondrej</first><last>Bajgar</last></author>
+      <author id="ondrej-bajgar"><first>Ondrej</first><last>Bajgar</last></author>
       <author><first>Jan</first><last>Kleindienst</last></author>
       <pages>69–74</pages>
       <url hash="a0e658cb">W17-2609</url>
@@ -4785,7 +4785,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <title>Sequential Attention: A Context-Aware Alignment Function for Machine Reading</title>
       <author><first>Sebastian</first><last>Brarda</last></author>
       <author><first>Philip</first><last>Yeres</last></author>
-      <author><first>Samuel</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel</first><last>Bowman</last></author>
       <pages>75–80</pages>
       <url hash="b398e727">W17-2610</url>
       <doi>10.18653/v1/W17-2610</doi>
@@ -4864,7 +4864,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="17">
       <title>Learning Bilingual Projections of Embeddings for Vocabulary Expansion in Machine Translation</title>
-      <author><first>Pranava Swaroop</first><last>Madhyastha</last></author>
+      <author id="pranava-swaroop-madhyastha"><first>Pranava Swaroop</first><last>Madhyastha</last></author>
       <author><first>Cristina</first><last>España-Bonet</last></author>
       <pages>139–145</pages>
       <url hash="d1879d52">W17-2617</url>
@@ -4975,7 +4975,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <author><first>Hannes</first><last>Schulz</last></author>
       <author><first>Jeremie</first><last>Zumer</last></author>
       <author><first>Layla</first><last>El Asri</last></author>
-      <author id="shikhar-sharma"><first>Shikhar</first><last>Sharma</last></author>
+      <author><first>Shikhar</first><last>Sharma</last></author>
       <pages>219–227</pages>
       <url hash="71c55c45">W17-2626</url>
       <doi>10.18653/v1/W17-2626</doi>
@@ -5010,7 +5010,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <author><first>Sandeep</first><last>Subramanian</last></author>
       <author><first>Sai</first><last>Rajeswar</last></author>
       <author><first>Francis</first><last>Dutil</last></author>
-      <author><first>Chris</first><last>Pal</last></author>
+      <author id="christopher-pal"><first>Chris</first><last>Pal</last></author>
       <author><first>Aaron</first><last>Courville</last></author>
       <pages>241–251</pages>
       <url hash="a32933de">W17-2629</url>
@@ -5022,9 +5022,9 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <title>Deep Active Learning for Named Entity Recognition</title>
       <author><first>Yanyao</first><last>Shen</last></author>
       <author><first>Hyokun</first><last>Yun</last></author>
-      <author><first>Zachary</first><last>Lipton</last></author>
+      <author id="zachary-c-lipton"><first>Zachary</first><last>Lipton</last></author>
       <author><first>Yakov</first><last>Kronrod</last></author>
-      <author><first>Animashree</first><last>Anandkumar</last></author>
+      <author id="animashree-anandkumar"><first>Animashree</first><last>Anandkumar</last></author>
       <pages>252–256</pages>
       <url hash="14804132">W17-2630</url>
       <doi>10.18653/v1/W17-2630</doi>
@@ -5061,8 +5061,8 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <editor><first>Ben</first><last>Miller</last></editor>
       <editor><first>Marieke</first><last>van Erp</last></editor>
       <editor><first>Piek</first><last>Vossen</last></editor>
-      <editor><first>Martha</first><last>Palmer</last></editor>
-      <editor><first>Eduard</first><last>Hovy</last></editor>
+      <editor id="martha-palmer"><first>Martha</first><last>Palmer</last></editor>
+      <editor id="eduard-hovy"><first>Eduard</first><last>Hovy</last></editor>
       <editor><first>Teruko</first><last>Mitamura</last></editor>
       <editor><first>David</first><last>Caswell</last></editor>
       <doi>10.18653/v1/W17-27</doi>
@@ -5079,7 +5079,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="1">
       <title>news<fixed-case>L</fixed-case>ens: building and visualizing long-ranging news stories</title>
       <author><first>Philippe</first><last>Laban</last></author>
-      <author><first>Marti</first><last>Hearst</last></author>
+      <author id="marti-a-hearst"><first>Marti</first><last>Hearst</last></author>
       <pages>1–9</pages>
       <url hash="deab75ec">W17-2701</url>
       <doi>10.18653/v1/W17-2701</doi>
@@ -5089,7 +5089,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="2">
       <title>Detecting Changes in <fixed-case>T</fixed-case>witter Streams using Temporal Clusters of Hashtags</title>
       <author><first>Yunli</first><last>Wang</last></author>
-      <author><first>Cyril</first><last>Goutte</last></author>
+      <author id="cyril-goutte"><first>Cyril</first><last>Goutte</last></author>
       <pages>10–14</pages>
       <url hash="8bdec457">W17-2702</url>
       <doi>10.18653/v1/W17-2702</doi>
@@ -5121,7 +5121,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <title>Tracing armed conflicts with diachronic word embedding models</title>
       <author><first>Andrey</first><last>Kutuzov</last></author>
       <author><first>Erik</first><last>Velldal</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <pages>31–36</pages>
       <url hash="8d10ce6e">W17-2705</url>
       <doi>10.18653/v1/W17-2705</doi>
@@ -5131,7 +5131,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="6">
       <title>The Circumstantial Event Ontology (<fixed-case>CEO</fixed-case>)</title>
-      <author><first>Roxane</first><last>Segers</last></author>
+      <author id="roxane-segers"><first>Roxane</first><last>Segers</last></author>
       <author><first>Tommaso</first><last>Caselli</last></author>
       <author><first>Piek</first><last>Vossen</last></author>
       <pages>37–41</pages>
@@ -5143,9 +5143,9 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="7">
       <title>Event Detection and Semantic Storytelling: Generating a Travelogue from a large Collection of Personal Letters</title>
       <author><first>Georg</first><last>Rehm</last></author>
-      <author><first>Julian</first><last>Moreno Schneider</last></author>
+      <author id="julian-moreno-schneider"><first>Julian</first><last>Moreno Schneider</last></author>
       <author><first>Peter</first><last>Bourgonje</last></author>
-      <author><first>Ankit</first><last>Srivastava</last></author>
+      <author id="ankit-srivastava"><first>Ankit</first><last>Srivastava</last></author>
       <author><first>Jan</first><last>Nehring</last></author>
       <author><first>Armin</first><last>Berger</last></author>
       <author><first>Luca</first><last>König</last></author>
@@ -5161,7 +5161,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <title>Inference of Fine-Grained Event Causality from Blogs and Films</title>
       <author><first>Zhichao</first><last>Hu</last></author>
       <author><first>Elahe</first><last>Rahimtoroghi</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <pages>52–58</pages>
       <url hash="c9465f6f">W17-2708</url>
       <doi>10.18653/v1/W17-2708</doi>
@@ -5172,7 +5172,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <title>On the Creation of a Security-Related Event Corpus</title>
       <author><first>Martin</first><last>Atkinson</last></author>
       <author><first>Jakub</first><last>Piskorski</last></author>
-      <author><first>Hristo</first><last>Tanev</last></author>
+      <author id="hristo-tanev"><first>Hristo</first><last>Tanev</last></author>
       <author><first>Vanni</first><last>Zavarella</last></author>
       <pages>59–65</pages>
       <url hash="e02462cf">W17-2709</url>
@@ -5201,8 +5201,8 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="12">
       <title>The Rich Event Ontology</title>
-      <author><first>Susan</first><last>Brown</last></author>
-      <author><first>Claire</first><last>Bonial</last></author>
+      <author id="susan-windisch-brown"><first>Susan</first><last>Brown</last></author>
+      <author id="claire-bonial"><first>Claire</first><last>Bonial</last></author>
       <author><first>Leo</first><last>Obrst</last></author>
       <author><first>Martha</first><last>Palmer</last></author>
       <pages>87–97</pages>
@@ -5272,7 +5272,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <title>Guiding Interaction Behaviors for Multi-modal Grounded Language Learning</title>
       <author><first>Jesse</first><last>Thomason</last></author>
       <author><first>Jivko</first><last>Sinapov</last></author>
-      <author><first>Raymond</first><last>Mooney</last></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last></author>
       <pages>20–24</pages>
       <url hash="5c6dfec4">W17-2803</url>
       <doi>10.18653/v1/W17-2803</doi>
@@ -5283,7 +5283,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <title>Structured Learning for Context-aware Spoken Language Understanding of Robotic Commands</title>
       <author><first>Andrea</first><last>Vanzo</last></author>
       <author><first>Danilo</first><last>Croce</last></author>
-      <author><first>Roberto</first><last>Basili</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
       <author><first>Daniele</first><last>Nardi</last></author>
       <pages>25–34</pages>
       <url hash="3581c18a">W17-2804</url>
@@ -5329,14 +5329,14 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="8">
       <title>Exploring Variation of Natural Human Commands to a Robot in a Collaborative Navigation Task</title>
       <author><first>Matthew</first><last>Marge</last></author>
-      <author><first>Claire</first><last>Bonial</last></author>
+      <author id="claire-bonial"><first>Claire</first><last>Bonial</last></author>
       <author><first>Ashley</first><last>Foots</last></author>
-      <author><first>Cory</first><last>Hayes</last></author>
+      <author id="cory-hayes"><first>Cory</first><last>Hayes</last></author>
       <author><first>Cassidy</first><last>Henry</last></author>
       <author><first>Kimberly</first><last>Pollard</last></author>
       <author><first>Ron</first><last>Artstein</last></author>
-      <author><first>Clare</first><last>Voss</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="clare-voss"><first>Clare</first><last>Voss</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <pages>58–66</pages>
       <url hash="25ab4281">W17-2808</url>
       <doi>10.18653/v1/W17-2808</doi>
@@ -5361,7 +5361,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="10">
       <title>Are Distributional Representations Ready for the Real World? Evaluating Word Vectors for Grounded Perceptual Meaning</title>
-      <author><first>Li</first><last>Lucy</last></author>
+      <author id="li-lucy"><first>Li</first><last>Lucy</last></author>
       <author><first>Jon</first><last>Gauthier</last></author>
       <pages>76–85</pages>
       <url hash="15e73892">W17-2810</url>
@@ -5391,7 +5391,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <author><first>Sriraam</first><last>Natarajan</last></author>
       <author><first>Janardhan Rao</first><last>Doppa</last></author>
       <author><first>Julia</first><last>Hockenmaier</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Dan</first><last>Roth</last></author>
       <pages>95–103</pages>
       <url hash="8870725d">W17-2812</url>
@@ -5426,7 +5426,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <title>Language-independent Gender Prediction on <fixed-case>T</fixed-case>witter</title>
       <author><first>Nikola</first><last>Ljubešić</last></author>
       <author><first>Darja</first><last>Fišer</last></author>
-      <author><first>Tomaž</first><last>Erjavec</last></author>
+      <author id="tomaz-erjavec"><first>Tomaž</first><last>Erjavec</last></author>
       <pages>1–6</pages>
       <url hash="855a53c8">W17-2901</url>
       <doi>10.18653/v1/W17-2901</doi>
@@ -5445,9 +5445,9 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="3">
       <title>Personality Driven Differences in Paraphrase Preference</title>
-      <author><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
+      <author id="daniel-preotiuc-pietro"><first>Daniel</first><last>Preoţiuc-Pietro</last></author>
       <author><first>Jordan</first><last>Carpenter</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <pages>17–26</pages>
       <url hash="8b557f22">W17-2903</url>
       <doi>10.18653/v1/W17-2903</doi>
@@ -5480,7 +5480,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <title>Cross-Lingual Classification of Topics in Political Texts</title>
       <author><first>Goran</first><last>Glavaš</last></author>
       <author><first>Federico</first><last>Nanni</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <pages>42–46</pages>
       <url hash="eb91636f">W17-2906</url>
       <doi>10.18653/v1/W17-2906</doi>
@@ -5502,9 +5502,9 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <author><first>Shrimai</first><last>Prabhumoye</last></author>
       <author><first>Samridhi</first><last>Choudhary</last></author>
       <author><first>Evangelia</first><last>Spiliopoulou</last></author>
-      <author><first>Christopher</first><last>Bogart</last></author>
-      <author><first>Carolyn</first><last>Rose</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="christopher-bogart"><first>Christopher</first><last>Bogart</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rose</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <pages>53–62</pages>
       <url hash="7caed713">W17-2908</url>
       <doi>10.18653/v1/W17-2908</doi>
@@ -5536,8 +5536,8 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <title>Code-Switching as a Social Act: The Case of <fixed-case>A</fixed-case>rabic <fixed-case>W</fixed-case>ikipedia Talk Pages</title>
       <author><first>Michael</first><last>Yoder</last></author>
       <author><first>Shruti</first><last>Rijhwani</last></author>
-      <author><first>Carolyn</first><last>Rosé</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rosé</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
       <pages>73–82</pages>
       <url hash="e09d26f9">W17-2911</url>
       <doi>10.18653/v1/W17-2911</doi>
@@ -5558,7 +5558,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="13">
       <title>Ideological Phrase Indicators for Classification of Political Discourse Framing on <fixed-case>T</fixed-case>witter</title>
-      <author><first>Kristen</first><last>Johnson</last></author>
+      <author id="kristen-johnson"><first>Kristen</first><last>Johnson</last></author>
       <author><first>I-Ta</first><last>Lee</last></author>
       <author><first>Dan</first><last>Goldwasser</last></author>
       <pages>90–99</pages>
@@ -5572,10 +5572,10 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <meta>
       <booktitle>Proceedings of the First Workshop on Abusive Language Online</booktitle>
       <url hash="9976b544">W17-30</url>
-      <editor><first>Zeerak</first><last>Waseem</last></editor>
+      <editor id="zeerak-talat"><first>Zeerak</first><last>Waseem</last></editor>
       <editor><first>Wendy Hui Kyong</first><last>Chung</last></editor>
       <editor><first>Dirk</first><last>Hovy</last></editor>
-      <editor><first>Joel</first><last>Tetreault</last></editor>
+      <editor id="joel-tetreault"><first>Joel</first><last>Tetreault</last></editor>
       <doi>10.18653/v1/W17-30</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Vancouver, BC, Canada</address>
@@ -5600,7 +5600,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="2">
       <title>Constructive Language in News Comments</title>
       <author><first>Varada</first><last>Kolhatkar</last></author>
-      <author><first>Maite</first><last>Taboada</last></author>
+      <author id="maite-taboada"><first>Maite</first><last>Taboada</last></author>
       <pages>11–17</pages>
       <url hash="161b45ed">W17-3002</url>
       <doi>10.18653/v1/W17-3002</doi>
@@ -5657,7 +5657,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="7">
       <title>Legal Framework, Dataset and Annotation Schema for Socially Unacceptable Online Discourse Practices in <fixed-case>S</fixed-case>lovene</title>
       <author><first>Darja</first><last>Fišer</last></author>
-      <author><first>Tomaž</first><last>Erjavec</last></author>
+      <author id="tomaz-erjavec"><first>Tomaž</first><last>Erjavec</last></author>
       <author><first>Nikola</first><last>Ljubešić</last></author>
       <pages>46–51</pages>
       <url hash="ef95ee6d">W17-3007</url>
@@ -5731,8 +5731,8 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="13">
       <title>Using Convolutional Neural Networks to Classify Hate-Speech</title>
-      <author><first>Björn</first><last>Gambäck</last></author>
-      <author><first>Utpal Kumar</first><last>Sikdar</last></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gambäck</last></author>
+      <author id="utpal-kumar-sikdar"><first>Utpal Kumar</first><last>Sikdar</last></author>
       <pages>85–90</pages>
       <url hash="a0d83cc9">W17-3013</url>
       <doi>10.18653/v1/W17-3013</doi>
@@ -5756,7 +5756,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <booktitle>Proceedings of the Fourth Workshop on Computational Linguistics and Clinical Psychology — From Linguistic Signal to Clinical Reality</booktitle>
       <url hash="e6575b05">W17-31</url>
       <editor><first>Kristy</first><last>Hollingshead</last></editor>
-      <editor><first>Molly E.</first><last>Ireland</last></editor>
+      <editor id="molly-ireland"><first>Molly E.</first><last>Ireland</last></editor>
       <editor><first>Kate</first><last>Loveys</last></editor>
       <doi>10.18653/v1/W17-31</doi>
       <publisher>Association for Computational Linguistics</publisher>
@@ -5796,7 +5796,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="3">
       <title>A Corpus Analysis of Social Connections and Social Isolation in Adolescents Suffering from Depressive Disorders</title>
       <author><first>Jia-Wen</first><last>Guo</last></author>
-      <author><first>Danielle L</first><last>Mowery</last></author>
+      <author id="danielle-l-mowery"><first>Danielle L</first><last>Mowery</last></author>
       <author><first>Djin</first><last>Lai</last></author>
       <author><first>Katherine</first><last>Sward</last></author>
       <author><first>Mike</first><last>Conway</last></author>
@@ -5809,7 +5809,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="4">
       <title>Monitoring Tweets for Depression to Detect At-risk Users</title>
       <author><first>Zunaira</first><last>Jamil</last></author>
-      <author><first>Diana</first><last>Inkpen</last></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last></author>
       <author><first>Prasadith</first><last>Buddhitha</last></author>
       <author><first>Kenton</first><last>White</last></author>
       <pages>32–40</pages>
@@ -5833,7 +5833,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <title>Natural-language Interactive Narratives in Imaginal Exposure Therapy for Obsessive-Compulsive Disorder</title>
       <author><first>Melissa</first><last>Roemmele</last></author>
       <author><first>Paola</first><last>Mardo</last></author>
-      <author><first>Andrew</first><last>Gordon</last></author>
+      <author id="andrew-gordon"><first>Andrew</first><last>Gordon</last></author>
       <pages>48–57</pages>
       <url hash="c906de43">W17-3106</url>
       <doi>10.18653/v1/W17-3106</doi>
@@ -5853,8 +5853,8 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="8">
       <title>Detecting and Explaining Crisis</title>
       <author><first>Rohan</first><last>Kshirsagar</last></author>
-      <author><first>Robert</first><last>Morris</last></author>
-      <author><first>Samuel</first><last>Bowman</last></author>
+      <author id="robert-w-morris"><first>Robert</first><last>Morris</last></author>
+      <author id="samuel-bowman"><first>Samuel</first><last>Bowman</last></author>
       <pages>66–73</pages>
       <url hash="5cb8a90e">W17-3108</url>
       <doi>10.18653/v1/W17-3108</doi>
@@ -5888,7 +5888,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <meta>
       <booktitle>Proceedings of the First Workshop on Neural Machine Translation</booktitle>
       <url hash="1ca21456">W17-32</url>
-      <editor><first>Thang</first><last>Luong</last></editor>
+      <editor id="minh-thang-luong"><first>Thang</first><last>Luong</last></editor>
       <editor><first>Alexandra</first><last>Birch</last></editor>
       <editor><first>Graham</first><last>Neubig</last></editor>
       <editor><first>Andrew</first><last>Finch</last></editor>
@@ -5918,7 +5918,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <author><first>Jan</first><last>Niehues</last></author>
       <author><first>Eunah</first><last>Cho</last></author>
       <author><first>Thanh-Le</first><last>Ha</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>11–17</pages>
       <url hash="757e6e5d">W17-3202</url>
       <doi>10.18653/v1/W17-3202</doi>
@@ -5970,7 +5970,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="7">
       <title>Beam Search Strategies for Neural Machine Translation</title>
       <author><first>Markus</first><last>Freitag</last></author>
-      <author><first>Yaser</first><last>Al-Onaizan</last></author>
+      <author id="yaser-al-onaizan"><first>Yaser</first><last>Al-Onaizan</last></author>
       <pages>56–60</pages>
       <url hash="49ffbe56">W17-3207</url>
       <doi>10.18653/v1/W17-3207</doi>
@@ -6009,7 +6009,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <url hash="2fc75e0b">W17-34</url>
       <editor><first>Makoto</first><last>Kanazawa</last></editor>
       <editor><first>Philippe</first><last>de Groote</last></editor>
-      <editor><first>Mehrnoosh</first><last>Sadrzadeh</last></editor>
+      <editor id="mehrnoosh-sadrzadeh"><first>Mehrnoosh</first><last>Sadrzadeh</last></editor>
       <doi>10.18653/v1/W17-34</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>London, UK</address>
@@ -6058,7 +6058,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="5">
       <title>Latent-Variable <fixed-case>PCFG</fixed-case>s: Background and Applications</title>
-      <author><first>Shay</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay</first><last>Cohen</last></author>
       <pages>47–58</pages>
       <url hash="be8a19a4">W17-3405</url>
       <doi>10.18653/v1/W17-3405</doi>
@@ -6083,7 +6083,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="8">
       <title>A Monotonicity Calculus and Its Completeness</title>
       <author><first>Thomas</first><last>Icard</last></author>
-      <author><first>Lawrence</first><last>Moss</last></author>
+      <author id="lawrence-s-moss"><first>Lawrence</first><last>Moss</last></author>
       <author><first>William</first><last>Tune</last></author>
       <pages>75–87</pages>
       <url hash="372df1de">W17-3408</url>
@@ -6149,8 +6149,8 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <meta>
       <booktitle>Proceedings of the 10th International Conference on Natural Language Generation</booktitle>
       <url hash="ec011b53">W17-35</url>
-      <editor><first>Jose M.</first><last>Alonso</last></editor>
-      <editor><first>Alberto</first><last>Bugarín</last></editor>
+      <editor id="jose-m-alonso"><first>Jose M.</first><last>Alonso</last></editor>
+      <editor id="alberto-bugarin-diz"><first>Alberto</first><last>Bugarín</last></editor>
       <editor><first>Ehud</first><last>Reiter</last></editor>
       <doi>10.18653/v1/W17-35</doi>
       <publisher>Association for Computational Linguistics</publisher>
@@ -6165,10 +6165,10 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </frontmatter>
     <paper id="1">
       <title>Linguistic realisation as machine translation: Comparing different <fixed-case>MT</fixed-case> models for <fixed-case>AMR</fixed-case>-to-text generation</title>
-      <author><first>Thiago</first><last>Castro Ferreira</last></author>
+      <author id="thiago-castro-ferreira"><first>Thiago</first><last>Castro Ferreira</last></author>
       <author><first>Iacer</first><last>Calixto</last></author>
       <author><first>Sander</first><last>Wubben</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <pages>1–10</pages>
       <url hash="6428598f">W17-3501</url>
       <doi>10.18653/v1/W17-3501</doi>
@@ -6177,7 +6177,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="2">
       <title>A Survey on Intelligent Poetry Generation: Languages, Features, Techniques, Reutilisation and Evaluation</title>
-      <author><first>Hugo</first><last>Gonçalo Oliveira</last></author>
+      <author id="hugo-goncalo-oliveira"><first>Hugo</first><last>Gonçalo Oliveira</last></author>
       <pages>11–20</pages>
       <url hash="0ff98da2">W17-3502</url>
       <doi>10.18653/v1/W17-3502</doi>
@@ -6197,7 +6197,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="4">
       <title>Content Selection for Real-time Sports News Construction from Commentary Texts</title>
-      <author><first>Jin-ge</first><last>Yao</last></author>
+      <author id="jin-ge-yao"><first>Jin-ge</first><last>Yao</last></author>
       <author><first>Jianmin</first><last>Zhang</last></author>
       <author><first>Xiaojun</first><last>Wan</last></author>
       <author><first>Jianguo</first><last>Xiao</last></author>
@@ -6244,7 +6244,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="8">
       <title>Co-<fixed-case>P</fixed-case>oe<fixed-case>T</fixed-case>ry<fixed-case>M</fixed-case>e: a Co-Creative Interface for the Composition of Poetry</title>
-      <author><first>Hugo</first><last>Gonçalo Oliveira</last></author>
+      <author id="hugo-goncalo-oliveira"><first>Hugo</first><last>Gonçalo Oliveira</last></author>
       <author><first>Tiago</first><last>Mendes</last></author>
       <author><first>Ana</first><last>Boavida</last></author>
       <pages>70–71</pages>
@@ -6255,8 +6255,8 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="9">
       <title>Refer-i<fixed-case>TTS</fixed-case>: A System for Referring in Spoken Installments to Objects in Real-World Images</title>
-      <author><first>Sina</first><last>Zarrieß</last></author>
-      <author><first>M. Soledad</first><last>López Gambino</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
+      <author id="m-soledad-lopez-gambino"><first>M. Soledad</first><last>López Gambino</last></author>
       <author><first>David</first><last>Schlangen</last></author>
       <pages>72–73</pages>
       <url hash="8e6392ba">W17-3509</url>
@@ -6296,7 +6296,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="13">
       <title><fixed-case>PASS</fixed-case>: A <fixed-case>D</fixed-case>utch data-to-text system for soccer, targeted towards specific audiences</title>
       <author><first>Chris</first><last>van der Lee</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <author><first>Sander</first><last>Wubben</last></author>
       <pages>95–104</pages>
       <url hash="bbcf5409">W17-3513</url>
@@ -6317,7 +6317,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="15">
       <title>Talking about the world with a distributed model</title>
-      <author><first>Gemma</first><last>Boleda</last></author>
+      <author id="gemma-boleda"><first>Gemma</first><last>Boleda</last></author>
       <pages>114</pages>
       <url hash="666a25ae">W17-3515</url>
       <doi>10.18653/v1/W17-3515</doi>
@@ -6327,7 +6327,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="16">
       <title>The <fixed-case>C</fixed-case>ode2<fixed-case>T</fixed-case>ext Challenge: Text Generation in Source Libraries</title>
       <author><first>Kyle</first><last>Richardson</last></author>
-      <author><first>Sina</first><last>Zarrieß</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
       <author><first>Jonas</first><last>Kuhn</last></author>
       <pages>115–119</pages>
       <url hash="2e5b5be5">W17-3516</url>
@@ -6340,7 +6340,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <author><first>Simon</first><last>Mille</last></author>
       <author><first>Bernd</first><last>Bohnet</last></author>
       <author><first>Leo</first><last>Wanner</last></author>
-      <author><first>Anja</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anja</first><last>Belz</last></author>
       <pages>120–123</pages>
       <url hash="23b24632">W17-3517</url>
       <doi>10.18653/v1/W17-3517</doi>
@@ -6391,7 +6391,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="22">
       <title><fixed-case>G</fixed-case>-<fixed-case>TUNA</fixed-case>: a corpus of referring expressions in <fixed-case>G</fixed-case>erman, including duration information</title>
-      <author><first>David</first><last>Howcroft</last></author>
+      <author id="david-m-howcroft"><first>David</first><last>Howcroft</last></author>
       <author><first>Jorrig</first><last>Vogels</last></author>
       <author><first>Vera</first><last>Demberg</last></author>
       <pages>149–153</pages>
@@ -6489,7 +6489,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <author><first>Eva</first><last>Hasler</last></author>
       <author><first>Felix</first><last>Stahlberg</last></author>
       <author><first>Marcus</first><last>Tomalin</last></author>
-      <author><first>Adrià</first><last>de Gispert</last></author>
+      <author id="adria-de-gispert"><first>Adrià</first><last>de Gispert</last></author>
       <author id="bill-byrne"><first>Bill</first><last>Byrne</last></author>
       <pages>208–212</pages>
       <url hash="fbc9e207">W17-3531</url>
@@ -6499,12 +6499,12 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="32">
       <title>Investigating the content and form of referring expressions in <fixed-case>M</fixed-case>andarin: introducing the Mtuna corpus</title>
-      <author><first>Kees</first><last>van Deemter</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
       <author><first>Le</first><last>Sun</last></author>
       <author><first>Rint</first><last>Sybesma</last></author>
       <author><first>Xiao</first><last>Li</last></author>
       <author><first>Bo</first><last>Chen</last></author>
-      <author><first>Muyun</first><last>Yang</last></author>
+      <author id="muyun-yang"><first>Muyun</first><last>Yang</last></author>
       <pages>213–217</pages>
       <url hash="9b2049f1">W17-3532</url>
       <doi>10.18653/v1/W17-3532</doi>
@@ -6537,7 +6537,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <title>Textually Summarising Incomplete Data</title>
       <author><first>Stephanie</first><last>Inglis</last></author>
       <author><first>Ehud</first><last>Reiter</last></author>
-      <author><first>Somayajulu</first><last>Sripada</last></author>
+      <author id="somayajulu-sripada"><first>Somayajulu</first><last>Sripada</last></author>
       <pages>228–232</pages>
       <url hash="9b9069f1">W17-3535</url>
       <doi>10.18653/v1/W17-3535</doi>
@@ -6546,8 +6546,8 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="36">
       <title>Improving the generation of personalised descriptions</title>
-      <author><first>Thiago</first><last>Castro Ferreira</last></author>
-      <author><first>Ivandré</first><last>Paraboni</last></author>
+      <author id="thiago-castro-ferreira"><first>Thiago</first><last>Castro Ferreira</last></author>
+      <author id="ivandre-paraboni"><first>Ivandré</first><last>Paraboni</last></author>
       <pages>233–237</pages>
       <url hash="9c999fd0">W17-3536</url>
       <doi>10.18653/v1/W17-3536</doi>
@@ -6587,9 +6587,9 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="40">
       <title>Referential Success of Set Referring Expressions with Fuzzy Properties</title>
-      <author><first>Nicolás</first><last>Marín</last></author>
+      <author id="nicolas-marin"><first>Nicolás</first><last>Marín</last></author>
       <author><first>Gustavo</first><last>Rivas-Gervilla</last></author>
-      <author><first>Daniel</first><last>Sánchez</last></author>
+      <author id="daniel-sanchez-cisneros"><first>Daniel</first><last>Sánchez</last></author>
       <pages>247–251</pages>
       <url hash="001da225">W17-3540</url>
       <doi>10.18653/v1/W17-3540</doi>
@@ -6686,7 +6686,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="5">
       <title>Applying the <fixed-case>R</fixed-case>hetorical <fixed-case>S</fixed-case>tructure <fixed-case>T</fixed-case>heory in <fixed-case>A</fixed-case>lzheimer patients’ speech</title>
       <author><first>Anayeli</first><last>Paulino</last></author>
-      <author><first>Gerardo</first><last>Sierra</last></author>
+      <author id="gerardo-sierra"><first>Gerardo</first><last>Sierra</last></author>
       <pages>34–38</pages>
       <url hash="c5fc18c8">W17-3605</url>
       <doi>10.18653/v1/W17-3605</doi>
@@ -6695,7 +6695,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="6">
       <title>Using lexical level information in discourse structures for <fixed-case>B</fixed-case>asque sentiment analysis</title>
       <author><first>Jon</first><last>Alkorta</last></author>
-      <author><first>Koldo</first><last>Gojenola</last></author>
+      <author id="koldo-gojenola"><first>Koldo</first><last>Gojenola</last></author>
       <author><first>Mikel</first><last>Iruskieta</last></author>
       <author><first>Maite</first><last>Taboada</last></author>
       <pages>39–47</pages>
@@ -6706,7 +6706,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="7">
       <title>Framework for the Analysis of Simplified Texts Taking Discourse into Account: the <fixed-case>B</fixed-case>asque Causal Relations as Case Study</title>
       <author><first>Itziar</first><last>Gonzalez-Dios</last></author>
-      <author><first>Arantza</first><last>Diaz de Ilarraza</last></author>
+      <author id="arantza-diaz-de-ilarraza"><first>Arantza</first><last>Diaz de Ilarraza</last></author>
       <author><first>Mikel</first><last>Iruskieta</last></author>
       <pages>48–57</pages>
       <url hash="875ee0a1">W17-3607</url>
@@ -6771,9 +6771,9 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="2">
       <title>A Simple Method for Clarifying Sentences with Coordination Ambiguities</title>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <author><first>Manjuan</first><last>Duan</last></author>
-      <author><first>David L.</first><last>King</last></author>
+      <author id="david-king"><first>David L.</first><last>King</last></author>
       <url hash="8533f43b">W17-3702</url>
       <doi>10.18653/v1/W17-3702</doi>
       <bibkey>white-etal-2017-simple</bibkey>
@@ -6787,7 +6787,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="4">
       <title>An Essay on Self-explanatory Computational Intelligence: A Linguistic Model of Data Processing Systems</title>
-      <author><first>Jose M.</first><last>Alonso</last></author>
+      <author id="jose-m-alonso"><first>Jose M.</first><last>Alonso</last></author>
       <author><first>Gracian</first><last>Trivino</last></author>
       <url hash="994886c6">W17-3704</url>
       <doi>10.18653/v1/W17-3704</doi>
@@ -6799,8 +6799,8 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <booktitle>Proceedings of the Linguistic Resources for Automatic Natural Language Generation - <fixed-case>L</fixed-case>i<fixed-case>RA</fixed-case>@<fixed-case>NLG</fixed-case></booktitle>
       <url hash="e5ee7c07">W17-38</url>
       <editor><first>Kristina</first><last>Kocijan</last></editor>
-      <editor><first>Peter</first><last>Machonis</last></editor>
-      <editor><first>Max</first><last>Silberztein</last></editor>
+      <editor id="peter-machonis"><first>Peter</first><last>Machonis</last></editor>
+      <editor id="max-silberztein"><first>Max</first><last>Silberztein</last></editor>
       <doi>10.18653/v1/W17-38</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Santiago de Compostela, Spain</address>
@@ -6826,7 +6826,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <author><first>Milagros</first><last>Fernández-Gavilanes</last></author>
       <author><first>Enrique</first><last>Costa-Montenegro</last></author>
       <author><first>Jonathan</first><last>Juncal-Martínez</last></author>
-      <author><first>Francisco J.</first><last>González-Castaño</last></author>
+      <author id="francisco-javier-gonzalez-castano"><first>Francisco J.</first><last>González-Castaño</last></author>
       <pages>11-15</pages>
       <url hash="8dc5a9b7">W17-3802</url>
       <doi>10.18653/v1/W17-3802</doi>
@@ -6891,7 +6891,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <meta>
       <booktitle>Proceedings of the Workshop on Computational Creativity in Natural Language Generation (<fixed-case>CC</fixed-case>-<fixed-case>NLG</fixed-case> 2017)</booktitle>
       <url hash="d4d9196b">W17-39</url>
-      <editor><first>Hugo</first><last>Gonçalo Oliveira</last></editor>
+      <editor id="hugo-goncalo-oliveira"><first>Hugo</first><last>Gonçalo Oliveira</last></editor>
       <editor><first>Ben</first><last>Burtenshaw</last></editor>
       <editor><first>Mike</first><last>Kestemont</last></editor>
       <editor><first>Tom</first><last>De Smedt</last></editor>
@@ -6928,7 +6928,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="3">
       <title>Template-Free Construction of Rhyming Poems with Thematic Cohesion</title>
-      <author><first>Pablo</first><last>Gervás</last></author>
+      <author id="pablo-gervas"><first>Pablo</first><last>Gervás</last></author>
       <pages>21–28</pages>
       <url hash="db1ee521">W17-3903</url>
       <doi>10.18653/v1/W17-3903</doi>
@@ -6936,7 +6936,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="4">
       <title>Synthetic Literature: Writing Science Fiction in a Co-Creative Process</title>
-      <author><first>Enrique</first><last>Manjavacas</last></author>
+      <author id="enrique-manjavacas"><first>Enrique</first><last>Manjavacas</last></author>
       <author><first>Folgert</first><last>Karsdorp</last></author>
       <author><first>Ben</first><last>Burtenshaw</last></author>
       <author><first>Mike</first><last>Kestemont</last></author>
@@ -6987,7 +6987,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <author><first>Cyril</first><last>Allauzen</last></author>
       <author><first>Françoise</first><last>Beaufays</last></author>
       <author><first>Tom</first><last>Ouyang</last></author>
-      <author><first>Michael</first><last>Riley</last></author>
+      <author id="michael-riley"><first>Michael</first><last>Riley</last></author>
       <author><first>David</first><last>Rybach</last></author>
       <pages>10–19</pages>
       <url hash="ad4a7762">W17-4002</url>
@@ -7022,7 +7022,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="6">
       <title>Finite-State Morphological Analysis for <fixed-case>M</fixed-case>arathi</title>
       <author><first>Vinit</first><last>Ravishankar</last></author>
-      <author><first>Francis M.</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis M.</first><last>Tyers</last></author>
       <pages>50–55</pages>
       <url hash="f392030d">W17-4006</url>
       <doi>10.18653/v1/W17-4006</doi>
@@ -7031,7 +7031,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="7">
       <title>Word Transduction for Addressing the <fixed-case>OOV</fixed-case> Problem in Machine Translation for Similar Resource-Scarce Languages</title>
       <author><first>Shashikant</first><last>Sharma</last></author>
-      <author><first>Anil Kumar</first><last>Singh</last></author>
+      <author id="anil-kumar-singh"><first>Anil Kumar</first><last>Singh</last></author>
       <pages>56–63</pages>
       <url hash="95d9e066">W17-4007</url>
       <doi>10.18653/v1/W17-4009</doi>
@@ -7058,7 +7058,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="10">
       <title>Evaluating an Automata Approach to Query Containment</title>
-      <author><first>Michael</first><last>Minock</last></author>
+      <author id="michael-minock"><first>Michael</first><last>Minock</last></author>
       <pages>75–79</pages>
       <url hash="4309e461">W17-4010</url>
       <doi>10.18653/v1/W17-4010</doi>
@@ -7070,7 +7070,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <booktitle>Proceedings of the First Workshop on Subword and Character Level Models in <fixed-case>NLP</fixed-case></booktitle>
       <url hash="946b862f">W17-41</url>
       <editor><first>Manaal</first><last>Faruqui</last></editor>
-      <editor><first>Hinrich</first><last>Schuetze</last></editor>
+      <editor id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></editor>
       <editor><first>Isabel</first><last>Trancoso</last></editor>
       <editor><first>Yadollah</first><last>Yaghoobzadeh</last></editor>
       <doi>10.18653/v1/W17-41</doi>
@@ -7097,7 +7097,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="2">
       <title>Learning variable length units for <fixed-case>SMT</fixed-case> between related languages via Byte Pair Encoding</title>
       <author><first>Anoop</first><last>Kunchukuttan</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>14–24</pages>
       <url hash="00c7b375">W17-4102</url>
       <doi>10.18653/v1/W17-4102</doi>
@@ -7119,7 +7119,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <author><first>Maria</first><last>Ponomareva</last></author>
       <author><first>Kirill</first><last>Milintsevich</last></author>
       <author><first>Ekaterina</first><last>Chernyak</last></author>
-      <author><first>Anatoly</first><last>Starostin</last></author>
+      <author id="anatoli-starostin"><first>Anatoly</first><last>Starostin</last></author>
       <pages>31–35</pages>
       <url hash="b72aa8ea">W17-4104</url>
       <doi>10.18653/v1/W17-4104</doi>
@@ -7150,7 +7150,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="7">
       <title>Weakly supervised learning of allomorphy</title>
-      <author><first>Miikka</first><last>Silfverberg</last></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last></author>
       <author><first>Mans</first><last>Hulden</last></author>
       <pages>46–56</pages>
       <url hash="70823639">W17-4107</url>
@@ -7194,7 +7194,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="10">
       <title>Exploring Cross-Lingual Transfer of Morphological Knowledge In Sequence-to-Sequence Models</title>
       <author><first>Huiming</first><last>Jin</last></author>
-      <author><first>Katharina</first><last>Kann</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
       <pages>70–75</pages>
       <url hash="e0660689">W17-4110</url>
       <doi>10.18653/v1/W17-4110</doi>
@@ -7203,7 +7203,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="11">
       <title>Unlabeled Data for Morphological Generation With Character-Based Sequence-to-Sequence Models</title>
-      <author><first>Katharina</first><last>Kann</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
       <author><first>Hinrich</first><last>Schütze</last></author>
       <pages>76–81</pages>
       <url hash="f67983a0">W17-4111</url>
@@ -7250,9 +7250,9 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="15">
       <title>Word Representation Models for Morphologically Rich Languages in Neural Machine Translation</title>
       <author><first>Ekaterina</first><last>Vylomova</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <author><first>Xuanli</first><last>He</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>103–108</pages>
       <url hash="fc60ba90">W17-4115</url>
       <doi>10.18653/v1/W17-4115</doi>
@@ -7262,7 +7262,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="16">
       <title>Spell-Checking based on Syllabification and Character-level Graphs for a <fixed-case>P</fixed-case>eruvian Agglutinative Language</title>
       <author><first>Carlo</first><last>Alva</last></author>
-      <author><first>Arturo</first><last>Oncevay</last></author>
+      <author id="arturo-oncevay"><first>Arturo</first><last>Oncevay</last></author>
       <pages>109–116</pages>
       <url hash="9ced2163">W17-4116</url>
       <doi>10.18653/v1/W17-4116</doi>
@@ -7283,7 +7283,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="18">
       <title>A General-Purpose Tagger with Convolutional Neural Networks</title>
       <author><first>Xiang</first><last>Yu</last></author>
-      <author><first>Agnieszka</first><last>Falenska</last></author>
+      <author id="agnieszka-falenska"><first>Agnieszka</first><last>Falenska</last></author>
       <author><first>Ngoc Thang</first><last>Vu</last></author>
       <pages>124–129</pages>
       <url hash="fe8c8dfa">W17-4118</url>
@@ -7313,7 +7313,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="21">
       <title>Neural Paraphrase Identification of Questions with Noisy Pretraining</title>
-      <author><first>Gaurav Singh</first><last>Tomar</last></author>
+      <author id="gaurav-singh-tomar"><first>Gaurav Singh</first><last>Tomar</last></author>
       <author><first>Thyago</first><last>Duque</last></author>
       <author><first>Oscar</first><last>Täckström</last></author>
       <author><first>Jakob</first><last>Uszkoreit</last></author>
@@ -7328,7 +7328,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <title>Sub-character Neural Language Modelling in <fixed-case>J</fixed-case>apanese</title>
       <author><first>Viet</first><last>Nguyen</last></author>
       <author><first>Julian</first><last>Brooke</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>148–153</pages>
       <url hash="f50fa3ad">W17-4122</url>
       <doi>10.18653/v1/W17-4122</doi>
@@ -7337,9 +7337,9 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="23">
       <title>Byte-based Neural Machine Translation</title>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
       <author><first>Carlos</first><last>Escolano</last></author>
-      <author><first>José A. R.</first><last>Fonollosa</last></author>
+      <author id="jose-a-r-fonollosa"><first>José A. R.</first><last>Fonollosa</last></author>
       <pages>154–158</pages>
       <url hash="d975533b">W17-4123</url>
       <doi>10.18653/v1/W17-4123</doi>
@@ -7349,7 +7349,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="24">
       <title>Improving Opinion-Target Extraction with Character-Level Word Embeddings</title>
       <author><first>Soufian</first><last>Jebbara</last></author>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <pages>159–167</pages>
       <url hash="e2eb410c">W17-4124</url>
       <doi>10.18653/v1/W17-4124</doi>
@@ -7378,7 +7378,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <title>Predicting News Values from Headline Text and Emotions</title>
       <author><first>Maria Pia</first><last>di Buono</last></author>
       <author><first>Jan</first><last>Šnajder</last></author>
-      <author><first>Bojana</first><last>Dalbelo Bašić</last></author>
+      <author id="bojana-dalbelo-basic"><first>Bojana</first><last>Dalbelo Bašić</last></author>
       <author><first>Goran</first><last>Glavaš</last></author>
       <author><first>Martin</first><last>Tutek</last></author>
       <author><first>Natasa</first><last>Milic-Frayling</last></author>
@@ -7391,7 +7391,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="2">
       <title>Predicting User Views in Online News</title>
       <author><first>Daniel</first><last>Hardt</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>7–12</pages>
       <url hash="306ea53e">W17-4202</url>
       <doi>10.18653/v1/W17-4202</doi>
@@ -7414,9 +7414,9 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="4">
       <title>What to Write? A topic recommender for journalists</title>
       <author><first>Alessandro</first><last>Cucchiarelli</last></author>
-      <author><first>Christian</first><last>Morbidoni</last></author>
+      <author id="christian-morbidoni"><first>Christian</first><last>Morbidoni</last></author>
       <author><first>Giovanni</first><last>Stilo</last></author>
-      <author><first>Paola</first><last>Velardi</last></author>
+      <author id="paola-velardi"><first>Paola</first><last>Velardi</last></author>
       <pages>19–24</pages>
       <url hash="bb0a367f">W17-4204</url>
       <doi>10.18653/v1/W17-4204</doi>
@@ -7439,7 +7439,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="6">
       <title>Language-based Construction of Explorable News Graphs for Journalists</title>
       <author><first>Rémi</first><last>Bois</last></author>
-      <author><first>Guillaume</first><last>Gravier</last></author>
+      <author id="guillaume-gravier"><first>Guillaume</first><last>Gravier</last></author>
       <author><first>Eric</first><last>Jamet</last></author>
       <author><first>Emmanuel</first><last>Morin</last></author>
       <author><first>Pascale</first><last>Sébillot</last></author>
@@ -7456,7 +7456,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <author><first>Piek</first><last>Vossen</last></author>
       <author><first>Janneke</first><last>van der Zwaan</last></author>
       <author><first>Antske</first><last>Fokkens</last></author>
-      <author><first>Willem</first><last>van Hage</last></author>
+      <author id="willem-robert-van-hage"><first>Willem</first><last>van Hage</last></author>
       <author><first>Inger</first><last>Leemans</last></author>
       <author><first>Isa</first><last>Maks</last></author>
       <pages>37–45</pages>
@@ -7469,7 +7469,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <title>Analyzing the Revision Logs of a <fixed-case>J</fixed-case>apanese Newspaper for Article Quality Assessment</title>
       <author><first>Hideaki</first><last>Tamori</last></author>
       <author><first>Yuta</first><last>Hitomi</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Kentaro</first><last>Inui</last></author>
       <pages>46–50</pages>
       <url hash="5490bb4f">W17-4208</url>
@@ -7493,7 +7493,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <title>Incongruent Headlines: Yet Another Way to Mislead Your Readers</title>
       <author><first>Sophie</first><last>Chesney</last></author>
       <author><first>Maria</first><last>Liakata</last></author>
-      <author><first>Massimo</first><last>Poesio</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
       <author><first>Matthew</first><last>Purver</last></author>
       <pages>56–61</pages>
       <url hash="7b994c7b">W17-4210</url>
@@ -7514,8 +7514,8 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="12">
       <title>Semantic Storytelling, Cross-lingual Event Detection and other Semantic Services for a Newsroom Content Curation Dashboard</title>
-      <author><first>Julian</first><last>Moreno-Schneider</last></author>
-      <author><first>Ankit</first><last>Srivastava</last></author>
+      <author id="julian-moreno-schneider"><first>Julian</first><last>Moreno-Schneider</last></author>
+      <author id="ankit-srivastava"><first>Ankit</first><last>Srivastava</last></author>
       <author><first>Peter</first><last>Bourgonje</last></author>
       <author><first>David</first><last>Wabnitz</last></author>
       <author><first>Georg</first><last>Rehm</last></author>
@@ -7551,7 +7551,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="15">
       <title>From Clickbait to Fake News Detection: An Approach based on Detecting the Stance of Headlines to Articles</title>
       <author><first>Peter</first><last>Bourgonje</last></author>
-      <author><first>Julian</first><last>Moreno Schneider</last></author>
+      <author id="julian-moreno-schneider"><first>Julian</first><last>Moreno Schneider</last></author>
       <author><first>Georg</first><last>Rehm</last></author>
       <pages>84–89</pages>
       <url hash="a38cb813">W17-4215</url>
@@ -7583,7 +7583,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="18">
       <title>Using <fixed-case>N</fixed-case>ew <fixed-case>Y</fixed-case>ork <fixed-case>T</fixed-case>imes Picks to Identify Constructive Comments</title>
       <author><first>Varada</first><last>Kolhatkar</last></author>
-      <author><first>Maite</first><last>Taboada</last></author>
+      <author id="maite-taboada"><first>Maite</first><last>Taboada</last></author>
       <pages>100–105</pages>
       <url hash="400f83d2">W17-4218</url>
       <doi>10.18653/v1/W17-4218</doi>
@@ -7609,7 +7609,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <editor><first>Kai-Wei</first><last>Chang</last></editor>
       <editor><first>Ming-Wei</first><last>Chang</last></editor>
       <editor><first>Vivek</first><last>Srikumar</last></editor>
-      <editor><first>Alexander M.</first><last>Rush</last></editor>
+      <editor id="alexander-m-rush"><first>Alexander M.</first><last>Rush</last></editor>
       <doi>10.18653/v1/W17-43</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Copenhagen, Denmark</address>
@@ -7654,7 +7654,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="4">
       <title>Structured Prediction via Learning to Search under Bandit Feedback</title>
       <author><first>Amr</first><last>Sharaf</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <pages>17–26</pages>
       <url hash="3c2c0f98">W17-4304</url>
       <doi>10.18653/v1/W17-4304</doi>
@@ -7665,7 +7665,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <title>Syntax Aware <fixed-case>LSTM</fixed-case> model for Semantic Role Labeling</title>
       <author><first>Feng</first><last>Qian</last></author>
       <author><first>Lei</first><last>Sha</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <author><first>Lu-chen</first><last>Liu</last></author>
       <author><first>Ming</first><last>Zhang</last></author>
       <pages>27–32</pages>
@@ -7697,7 +7697,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="8">
       <title>Piecewise Latent Variables for Neural Variational Text Processing</title>
-      <author><first>Iulian Vlad</first><last>Serban</last></author>
+      <author id="iulian-vlad-serban"><first>Iulian Vlad</first><last>Serban</last></author>
       <author><first>Alexander</first><last>Ororbia II</last></author>
       <author><first>Joelle</first><last>Pineau</last></author>
       <author><first>Aaron</first><last>Courville</last></author>
@@ -7712,10 +7712,10 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <meta>
       <booktitle>Proceedings of the 3rd Workshop on Noisy User-generated Text</booktitle>
       <url hash="c19f6979">W17-44</url>
-      <editor><first>Leon</first><last>Derczynski</last></editor>
+      <editor id="leon-derczynski"><first>Leon</first><last>Derczynski</last></editor>
       <editor><first>Wei</first><last>Xu</last></editor>
       <editor><first>Alan</first><last>Ritter</last></editor>
-      <editor><first>Tim</first><last>Baldwin</last></editor>
+      <editor id="timothy-baldwin"><first>Tim</first><last>Baldwin</last></editor>
       <doi>10.18653/v1/W17-44</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Copenhagen, Denmark</address>
@@ -7739,9 +7739,9 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="2">
       <title>Towards the Understanding of Gaming Audiences by Modeling Twitch Emotes</title>
       <author><first>Francesco</first><last>Barbieri</last></author>
-      <author><first>Luis</first><last>Espinosa-Anke</last></author>
+      <author id="luis-espinosa-anke"><first>Luis</first><last>Espinosa-Anke</last></author>
       <author><first>Miguel</first><last>Ballesteros</last></author>
-      <author><first>Juan</first><last>Soler-Company</last></author>
+      <author id="juan-soler-company"><first>Juan</first><last>Soler-Company</last></author>
       <author><first>Horacio</first><last>Saggion</last></author>
       <pages>11–20</pages>
       <url hash="2bbd5a68">W17-4402</url>
@@ -7764,7 +7764,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="4">
       <title>To normalize, or not to normalize: The impact of normalization on Part-of-Speech tagging</title>
       <author><first>Rob</first><last>van der Goot</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <author><first>Malvina</first><last>Nissim</last></author>
       <pages>31–39</pages>
       <url hash="6df20b7c">W17-4404</url>
@@ -7787,7 +7787,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="6">
       <title>Incorporating Metadata into Content-Based User Embeddings</title>
       <author><first>Linzi</first><last>Xing</last></author>
-      <author><first>Michael J.</first><last>Paul</last></author>
+      <author id="michael-paul"><first>Michael J.</first><last>Paul</last></author>
       <pages>45–49</pages>
       <url hash="0293e22d">W17-4406</url>
       <doi>10.18653/v1/W17-4406</doi>
@@ -7797,8 +7797,8 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="7">
       <title>Simple Queries as Distant Labels for Predicting Gender on <fixed-case>T</fixed-case>witter</title>
       <author><first>Chris</first><last>Emmery</last></author>
-      <author><first>Grzegorz</first><last>Chrupała</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="grzegorz-chrupala"><first>Grzegorz</first><last>Chrupała</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>50–55</pages>
       <url hash="1090f1f5">W17-4407</url>
       <doi>10.18653/v1/W17-4407</doi>
@@ -7819,7 +7819,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="9">
       <title>Evaluating hypotheses in geolocation on a very large sample of <fixed-case>T</fixed-case>witter</title>
       <author><first>Bahar</first><last>Salehi</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>62–67</pages>
       <url hash="75208040">W17-4409</url>
       <doi>10.18653/v1/W17-4409</doi>
@@ -7830,7 +7830,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <title>The Effect of Error Rate in Artificially Generated Data for Automatic Preposition and Determiner Correction</title>
       <author><first>Fraser</first><last>Bowen</last></author>
       <author><first>Jon</first><last>Dehdari</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>68–76</pages>
       <url hash="4c32e328">W17-4410</url>
       <doi>10.18653/v1/W17-4410</doi>
@@ -7852,7 +7852,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="12">
       <title>Noisy <fixed-case>U</fixed-case>yghur Text Normalization</title>
       <author><first>Osman</first><last>Tursun</last></author>
-      <author><first>Ruket</first><last>Cakici</last></author>
+      <author id="ruket-cakici"><first>Ruket</first><last>Cakici</last></author>
       <pages>85–93</pages>
       <url hash="92fe6cb9">W17-4412</url>
       <doi>10.18653/v1/W17-4412</doi>
@@ -7862,7 +7862,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="13">
       <title>Crowdsourcing Multiple Choice Science Questions</title>
       <author><first>Johannes</first><last>Welbl</last></author>
-      <author><first>Nelson F.</first><last>Liu</last></author>
+      <author id="nelson-f-liu"><first>Nelson F.</first><last>Liu</last></author>
       <author><first>Matt</first><last>Gardner</last></author>
       <pages>94–106</pages>
       <url hash="82a80892">W17-4413</url>
@@ -7887,8 +7887,8 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <title>Huntsville, hospitals, and hockey teams: Names can reveal your location</title>
       <author><first>Bahar</first><last>Salehi</last></author>
       <author><first>Dirk</first><last>Hovy</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>116–121</pages>
       <url hash="10de0092">W17-4415</url>
       <doi>10.18653/v1/W17-4415</doi>
@@ -7898,8 +7898,8 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="16">
       <title>Improving Document Clustering by Removing Unnatural Language</title>
       <author><first>Myungha</first><last>Jang</last></author>
-      <author><first>Jinho D.</first><last>Choi</last></author>
-      <author><first>James</first><last>Allan</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
+      <author id="james-allan"><first>James</first><last>Allan</last></author>
       <pages>122–130</pages>
       <url hash="3e6449d0">W17-4416</url>
       <doi>10.18653/v1/W17-4416</doi>
@@ -7953,10 +7953,10 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="21">
       <title>Multi-channel <fixed-case>B</fixed-case>i<fixed-case>LSTM</fixed-case>-<fixed-case>CRF</fixed-case> Model for Emerging Named Entity Recognition in Social Media</title>
-      <author><first>Bill Y.</first><last>Lin</last></author>
-      <author><first>Frank</first><last>Xu</last></author>
+      <author id="bill-yuchen-lin"><first>Bill Y.</first><last>Lin</last></author>
+      <author id="frank-f-xu"><first>Frank</first><last>Xu</last></author>
       <author><first>Zhiyi</first><last>Luo</last></author>
-      <author><first>Kenny</first><last>Zhu</last></author>
+      <author id="kenny-zhu"><first>Kenny</first><last>Zhu</last></author>
       <pages>160–165</pages>
       <url hash="ae94c276">W17-4421</url>
       <doi>10.18653/v1/W17-4421</doi>
@@ -7985,8 +7985,8 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="24">
       <title>A Feature-based Ensemble Approach to Recognition of Emerging and Rare Named Entities</title>
-      <author><first>Utpal Kumar</first><last>Sikdar</last></author>
-      <author><first>Björn</first><last>Gambäck</last></author>
+      <author id="utpal-kumar-sikdar"><first>Utpal Kumar</first><last>Sikdar</last></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gambäck</last></author>
       <pages>177–181</pages>
       <url hash="06c64737">W17-4424</url>
       <doi>10.18653/v1/W17-4424</doi>
@@ -7999,7 +7999,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <booktitle>Proceedings of the Workshop on New Frontiers in Summarization</booktitle>
       <url hash="2b465fdf">W17-45</url>
       <editor><first>Lu</first><last>Wang</last></editor>
-      <editor><first>Jackie Chi Kit</first><last>Cheung</last></editor>
+      <editor id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></editor>
       <editor><first>Giuseppe</first><last>Carenini</last></editor>
       <editor id="fei-liu-utdallas"><first>Fei</first><last>Liu</last></editor>
       <doi>10.18653/v1/W17-45</doi>
@@ -8057,7 +8057,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="5">
       <title>Coarse-to-Fine Attention Models for Document Summarization</title>
       <author><first>Jeffrey</first><last>Ling</last></author>
-      <author><first>Alexander</first><last>Rush</last></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last></author>
       <pages>33–42</pages>
       <url hash="d43ccab9">W17-4505</url>
       <doi>10.18653/v1/W17-4505</doi>
@@ -8067,7 +8067,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="6">
       <title>Automatic Community Creation for Abstractive Spoken Conversations Summarization</title>
       <author><first>Karan</first><last>Singla</last></author>
-      <author><first>Evgeny</first><last>Stepanov</last></author>
+      <author id="evgeny-stepanov"><first>Evgeny</first><last>Stepanov</last></author>
       <author><first>Ali Orkan</first><last>Bayer</last></author>
       <author><first>Giuseppe</first><last>Carenini</last></author>
       <author><first>Giuseppe</first><last>Riccardi</last></author>
@@ -8102,8 +8102,8 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="9">
       <title>Topic Model Stability for Hierarchical Summarization</title>
-      <author><first>John</first><last>Miller</last></author>
-      <author><first>Kathleen</first><last>McCoy</last></author>
+      <author id="john-miller"><first>John</first><last>Miller</last></author>
+      <author id="kathleen-f-mccoy"><first>Kathleen</first><last>McCoy</last></author>
       <pages>64–73</pages>
       <url hash="c3449a42">W17-4509</url>
       <doi>10.18653/v1/W17-4509</doi>
@@ -8158,7 +8158,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <booktitle>Proceedings of the Workshop on Speech-Centric Natural Language Processing</booktitle>
       <url hash="11048b62">W17-46</url>
       <editor><first>Nicholas</first><last>Ruiz</last></editor>
-      <editor><first>Srinivas</first><last>Bangalore</last></editor>
+      <editor id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></editor>
       <doi>10.18653/v1/W17-46</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Copenhagen, Denmark</address>
@@ -8173,7 +8173,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="1">
       <title>Functions of Silences towards Information Flow in Spoken Conversation</title>
       <author><first>Shammur Absar</first><last>Chowdhury</last></author>
-      <author><first>Evgeny</first><last>Stepanov</last></author>
+      <author id="evgeny-stepanov"><first>Evgeny</first><last>Stepanov</last></author>
       <author><first>Morena</first><last>Danieli</last></author>
       <author><first>Giuseppe</first><last>Riccardi</last></author>
       <pages>1–9</pages>
@@ -8195,7 +8195,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="3">
       <title>Analyzing Human and Machine Performance In Resolving Ambiguous Spoken Sentences</title>
       <author><first>Hussein</first><last>Ghaly</last></author>
-      <author><first>Michael</first><last>Mandel</last></author>
+      <author id="michael-mandel"><first>Michael</first><last>Mandel</last></author>
       <pages>18–26</pages>
       <url hash="e777f461">W17-4603</url>
       <doi>10.18653/v1/W17-4603</doi>
@@ -8241,7 +8241,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <author><first>Antonios</first><last>Anastasopoulos</last></author>
       <author><first>Sameer</first><last>Bansal</last></author>
       <author><first>David</first><last>Chiang</last></author>
-      <author><first>Sharon</first><last>Goldwater</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
       <author><first>Adam</first><last>Lopez</last></author>
       <pages>53–58</pages>
       <url hash="a25c47fb">W17-4607</url>
@@ -8252,7 +8252,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="8">
       <title><fixed-case>A</fixed-case>mharic-<fixed-case>E</fixed-case>nglish Speech Translation in Tourism Domain</title>
       <author><first>Michael</first><last>Melese</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <author><first>Million</first><last>Meshesha</last></author>
       <pages>59–66</pages>
       <url hash="12f1ca13">W17-4608</url>
@@ -8273,7 +8273,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="10">
       <title>Improving coreference resolution with automatically predicted prosodic information</title>
-      <author><first>Ina</first><last>Roesiger</last></author>
+      <author id="ina-roesiger"><first>Ina</first><last>Roesiger</last></author>
       <author><first>Sabrina</first><last>Stehwien</last></author>
       <author><first>Arndt</first><last>Riester</last></author>
       <author><first>Ngoc Thang</first><last>Vu</last></author>
@@ -8288,14 +8288,14 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <meta>
       <booktitle>Proceedings of the Second Conference on Machine Translation</booktitle>
       <url hash="74a92d19">W17-47</url>
-      <editor><first>Ondřej</first><last>Bojar</last></editor>
+      <editor id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></editor>
       <editor><first>Christian</first><last>Buck</last></editor>
-      <editor><first>Rajen</first><last>Chatterjee</last></editor>
+      <editor id="rajen-chatterjee"><first>Rajen</first><last>Chatterjee</last></editor>
       <editor><first>Christian</first><last>Federmann</last></editor>
       <editor><first>Yvette</first><last>Graham</last></editor>
       <editor><first>Barry</first><last>Haddow</last></editor>
       <editor><first>Matthias</first><last>Huck</last></editor>
-      <editor><first>Antonio Jimeno</first><last>Yepes</last></editor>
+      <editor id="antonio-jimeno-yepes"><first>Antonio Jimeno</first><last>Yepes</last></editor>
       <editor><first>Philipp</first><last>Koehn</last></editor>
       <editor><first>Julia</first><last>Kreutzer</last></editor>
       <doi>10.18653/v1/W17-47</doi>
@@ -8313,7 +8313,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <title>Sense-Aware Statistical Machine Translation using Adaptive Context-Dependent Clustering</title>
       <author><first>Xiao</first><last>Pu</last></author>
       <author><first>Nikolaos</first><last>Pappas</last></author>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <pages>1–10</pages>
       <url hash="74f24e4c">W17-4701</url>
       <doi>10.18653/v1/W17-4701</doi>
@@ -8321,7 +8321,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="2">
       <title>Improving Word Sense Disambiguation in Neural Machine Translation with Sense Embeddings</title>
-      <author><first>Annette</first><last>Rios Gonzales</last></author>
+      <author id="annette-rios-gonzales"><first>Annette</first><last>Rios Gonzales</last></author>
       <author><first>Laura</first><last>Mascarell</last></author>
       <author><first>Rico</first><last>Sennrich</last></author>
       <pages>11–19</pages>
@@ -8332,7 +8332,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="3">
       <title>Word Representations in Factored Neural Machine Translation</title>
       <author><first>Franck</first><last>Burlot</last></author>
-      <author><first>Mercedes</first><last>García-Martínez</last></author>
+      <author id="mercedes-garcia-martinez"><first>Mercedes</first><last>García-Martínez</last></author>
       <author><first>Loïc</first><last>Barrault</last></author>
       <author><first>Fethi</first><last>Bougares</last></author>
       <author><first>François</first><last>Yvon</last></author>
@@ -8344,8 +8344,8 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="4">
       <title>Modeling Target-Side Inflection in Neural Machine Translation</title>
       <author><first>Aleš</first><last>Tamchyna</last></author>
-      <author><first>Marion</first><last>Weller-Di Marco</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="marion-weller-di-marco"><first>Marion</first><last>Weller-Di Marco</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>32–42</pages>
       <url hash="864e606c">W17-4704</url>
       <doi>10.18653/v1/W17-4704</doi>
@@ -8364,7 +8364,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <title>Target-side Word Segmentation Strategies for Neural Machine Translation</title>
       <author><first>Matthias</first><last>Huck</last></author>
       <author><first>Simon</first><last>Riess</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>56–67</pages>
       <url hash="5bffe60d">W17-4706</url>
       <doi>10.18653/v1/W17-4706</doi>
@@ -8372,7 +8372,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="7">
       <title>Predicting Target Language <fixed-case>CCG</fixed-case> Supertags Improves Neural Machine Translation</title>
-      <author><first>Maria</first><last>Nădejde</last></author>
+      <author id="maria-nadejde"><first>Maria</first><last>Nădejde</last></author>
       <author><first>Siva</first><last>Reddy</last></author>
       <author><first>Rico</first><last>Sennrich</last></author>
       <author><first>Tomasz</first><last>Dwojak</last></author>
@@ -8406,8 +8406,8 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="10">
       <title>Deep architectures for Neural Machine Translation</title>
-      <author><first>Antonio Valerio</first><last>Miceli Barone</last></author>
-      <author><first>Jindřich</first><last>Helcl</last></author>
+      <author id="antonio-valerio-miceli-barone"><first>Antonio Valerio</first><last>Miceli Barone</last></author>
+      <author id="jindrich-helcl"><first>Jindřich</first><last>Helcl</last></author>
       <author><first>Rico</first><last>Sennrich</last></author>
       <author><first>Barry</first><last>Haddow</last></author>
       <author><first>Alexandra</first><last>Birch</last></author>
@@ -8419,7 +8419,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="11">
       <title>Biasing Attention-Based Recurrent Neural Networks Using External Alignment Information</title>
       <author><first>Tamer</first><last>Alkhouli</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>108–117</pages>
       <url hash="49d5989e">W17-4711</url>
       <doi>10.18653/v1/W17-4711</doi>
@@ -8428,7 +8428,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="12">
       <title>Effective Domain Mixing for Neural Machine Translation</title>
       <author><first>Denny</first><last>Britz</last></author>
-      <author><first>Quoc</first><last>Le</last></author>
+      <author id="quoc-le"><first>Quoc</first><last>Le</last></author>
       <author><first>Reid</first><last>Pryzant</last></author>
       <pages>118–126</pages>
       <url hash="b0876971">W17-4712</url>
@@ -8437,9 +8437,9 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="13">
       <title>Multi-Domain Neural Machine Translation through Unsupervised Adaptation</title>
-      <author><first>M. Amin</first><last>Farajian</last></author>
+      <author id="m-amin-farajian"><first>M. Amin</first><last>Farajian</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marcello</first><last>Federico</last></author>
       <pages>127–137</pages>
       <url hash="63ae1ae8">W17-4713</url>
@@ -8449,8 +8449,8 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="14">
       <title>Adapting Neural Machine Translation with Parallel Synthetic Data</title>
       <author><first>Mara</first><last>Chinea-Ríos</last></author>
-      <author><first>Álvaro</first><last>Peris</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="alvaro-peris"><first>Álvaro</first><last>Peris</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <pages>138–147</pages>
       <url hash="9a24608e">W17-4714</url>
       <doi>10.18653/v1/W17-4714</doi>
@@ -8459,7 +8459,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="15">
       <title>Copied Monolingual Data Improves Low-Resource Neural Machine Translation</title>
       <author><first>Anna</first><last>Currey</last></author>
-      <author><first>Antonio Valerio</first><last>Miceli Barone</last></author>
+      <author id="antonio-valerio-miceli-barone"><first>Antonio Valerio</first><last>Miceli Barone</last></author>
       <author><first>Kenneth</first><last>Heafield</last></author>
       <pages>148–156</pages>
       <url hash="e8762109">W17-4715</url>
@@ -8469,11 +8469,11 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="16">
       <title>Guiding Neural Machine Translation Decoding with External Knowledge</title>
       <author><first>Rajen</first><last>Chatterjee</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <author><first>Marcello</first><last>Federico</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
-      <author><first>Frédéric</first><last>Blain</last></author>
+      <author id="frederic-blain"><first>Frédéric</first><last>Blain</last></author>
       <pages>157–168</pages>
       <url hash="1f50c4bb">W17-4716</url>
       <doi>10.18653/v1/W17-4716</doi>
@@ -8492,9 +8492,9 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <author><first>Qun</first><last>Liu</last></author>
       <author><first>Varvara</first><last>Logacheva</last></author>
       <author><first>Christof</first><last>Monz</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Matt</first><last>Post</last></author>
-      <author><first>Raphael</first><last>Rubino</last></author>
+      <author id="raphael-rubino"><first>Raphael</first><last>Rubino</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <pages>169–214</pages>
@@ -8517,9 +8517,9 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="19">
       <title>Findings of the <fixed-case>WMT</fixed-case> 2017 Biomedical Translation Shared Task</title>
       <author><first>Antonio</first><last>Jimeno Yepes</last></author>
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <author><first>Mariana</first><last>Neves</last></author>
-      <author><first>Karin</first><last>Verspoor</last></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last></author>
       <author><first>Ondřej</first><last>Bojar</last></author>
       <author><first>Arthur</first><last>Boyer</last></author>
       <author><first>Cristian</first><last>Grozea</last></author>
@@ -8542,7 +8542,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <author><first>Roman</first><last>Sudarikov</last></author>
       <author><first>David</first><last>Mareček</last></author>
       <author><first>Tom</first><last>Kocmi</last></author>
-      <author><first>Dušan</first><last>Variš</last></author>
+      <author id="dusan-varis"><first>Dušan</first><last>Variš</last></author>
       <author><first>Ondřej</first><last>Bojar</last></author>
       <pages>248–256</pages>
       <url hash="48640355">W17-4720</url>
@@ -8568,10 +8568,10 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <author><first>Guillaume</first><last>Klein</last></author>
       <author><first>Catherine</first><last>Kobus</last></author>
       <author><first>Natalia</first><last>Segal</last></author>
-      <author><first>Christophe</first><last>Servan</last></author>
+      <author id="christophe-servan"><first>Christophe</first><last>Servan</last></author>
       <author><first>Bo</first><last>Wang</last></author>
       <author><first>Dakun</first><last>Zhang</last></author>
-      <author><first>Josep</first><last>Crego</last></author>
+      <author id="josep-m-crego"><first>Josep</first><last>Crego</last></author>
       <author><first>Jean</first><last>Senellart</last></author>
       <pages>265–270</pages>
       <url hash="42af35a4">W17-4722</url>
@@ -8580,7 +8580,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="23">
       <title><fixed-case>FBK</fixed-case>’s Participation to the <fixed-case>E</fixed-case>nglish-to-<fixed-case>G</fixed-case>erman News Translation Task of <fixed-case>WMT</fixed-case> 2017</title>
-      <author><first>Mattia Antonino</first><last>Di Gangi</last></author>
+      <author id="mattia-a-di-gangi"><first>Mattia Antonino</first><last>Di Gangi</last></author>
       <author><first>Nicola</first><last>Bertoldi</last></author>
       <author><first>Marcello</first><last>Federico</last></author>
       <pages>271–275</pages>
@@ -8604,8 +8604,8 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="25">
       <title>The <fixed-case>TALP</fixed-case>-<fixed-case>UPC</fixed-case> Neural Machine Translation System for <fixed-case>G</fixed-case>erman/<fixed-case>F</fixed-case>innish-<fixed-case>E</fixed-case>nglish Using the Inverse Direction Model in Rescoring</title>
       <author><first>Carlos</first><last>Escolano</last></author>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
-      <author><first>José A. R.</first><last>Fonollosa</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="jose-a-r-fonollosa"><first>José A. R.</first><last>Fonollosa</last></author>
       <pages>283–287</pages>
       <url hash="02e561d0">W17-4725</url>
       <doi>10.18653/v1/W17-4725</doi>
@@ -8613,7 +8613,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="26">
       <title><fixed-case>LIUM</fixed-case> Machine Translation Systems for <fixed-case>WMT</fixed-case>17 News Translation Task</title>
-      <author><first>Mercedes</first><last>García-Martínez</last></author>
+      <author id="mercedes-garcia-martinez"><first>Mercedes</first><last>García-Martínez</last></author>
       <author><first>Ozan</first><last>Caglayan</last></author>
       <author><first>Walid</first><last>Aransa</last></author>
       <author><first>Adrien</first><last>Bardet</last></author>
@@ -8637,7 +8637,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="28">
       <title>The <fixed-case>AFRL-MITLL</fixed-case> <fixed-case>WMT17</fixed-case> Systems: Old, New, Borrowed, <fixed-case>BLEU</fixed-case></title>
       <author><first>Jeremy</first><last>Gwinnup</last></author>
-      <author><first>Timothy</first><last>Anderson</last></author>
+      <author id="tim-anderson"><first>Timothy</first><last>Anderson</last></author>
       <author><first>Grant</first><last>Erdmann</last></author>
       <author><first>Katherine</first><last>Young</last></author>
       <author><first>Michaeel</first><last>Kazi</last></author>
@@ -8663,7 +8663,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <title><fixed-case>LMU</fixed-case> <fixed-case>M</fixed-case>unich’s Neural Machine Translation Systems for News Articles and Health Information Texts</title>
       <author><first>Matthias</first><last>Huck</last></author>
       <author><first>Fabienne</first><last>Braune</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>315–322</pages>
       <url hash="ca9e85cc">W17-4730</url>
       <doi>10.18653/v1/W17-4730</doi>
@@ -8672,7 +8672,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="31">
       <title>Rule-based Machine translation from <fixed-case>E</fixed-case>nglish to <fixed-case>F</fixed-case>innish</title>
       <author><first>Arvi</first><last>Hurskainen</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>323–329</pages>
       <url hash="25d16f8d">W17-4731</url>
       <doi>10.18653/v1/W17-4731</doi>
@@ -8696,7 +8696,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <title>The <fixed-case>H</fixed-case>elsinki Neural Machine Translation System</title>
       <author><first>Robert</first><last>Östling</last></author>
       <author><first>Yves</first><last>Scherrer</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <author><first>Gongbo</first><last>Tang</last></author>
       <author><first>Tommi</first><last>Nieminen</last></author>
       <pages>338–347</pages>
@@ -8707,20 +8707,20 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="34">
       <title>The <fixed-case>QT</fixed-case>21 Combined Machine Translation System for <fixed-case>E</fixed-case>nglish to <fixed-case>L</fixed-case>atvian</title>
       <author><first>Jan-Thorsten</first><last>Peter</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <author><first>Ondřej</first><last>Bojar</last></author>
-      <author><first>Ngoc-Quan</first><last>Pham</last></author>
+      <author id="ngoc-quan-pham"><first>Ngoc-Quan</first><last>Pham</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <author><first>Franck</first><last>Burlot</last></author>
       <author><first>François</first><last>Yvon</last></author>
-      <author><first>Mārcis</first><last>Pinnis</last></author>
+      <author id="marcis-pinnis"><first>Mārcis</first><last>Pinnis</last></author>
       <author><first>Valters</first><last>Šics</last></author>
       <author><first>Jasmijn</first><last>Bastings</last></author>
       <author><first>Miguel</first><last>Rios</last></author>
       <author><first>Wilker</first><last>Aziz</last></author>
       <author><first>Philip</first><last>Williams</last></author>
-      <author><first>Frédéric</first><last>Blain</last></author>
+      <author id="frederic-blain"><first>Frédéric</first><last>Blain</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>348–357</pages>
       <url hash="320cb9ec">W17-4734</url>
@@ -8736,7 +8736,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <author><first>Jan</first><last>Rosendahl</last></author>
       <author><first>Nick</first><last>Rossenbach</last></author>
       <author><first>Miguel</first><last>Graça</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>358–365</pages>
       <url hash="cba85ab7">W17-4735</url>
       <doi>10.18653/v1/W17-4735</doi>
@@ -8744,12 +8744,12 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="36">
       <title>The Karlsruhe Institute of Technology Systems for the News Translation Task in <fixed-case>WMT</fixed-case> 2017</title>
-      <author><first>Ngoc-Quan</first><last>Pham</last></author>
+      <author id="ngoc-quan-pham"><first>Ngoc-Quan</first><last>Pham</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
       <author><first>Thanh-Le</first><last>Ha</last></author>
       <author><first>Eunah</first><last>Cho</last></author>
       <author><first>Matthias</first><last>Sperber</last></author>
-      <author><first>Alexander</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alexander</first><last>Waibel</last></author>
       <pages>366–373</pages>
       <url hash="86509559">W17-4736</url>
       <doi>10.18653/v1/W17-4736</doi>
@@ -8757,8 +8757,8 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="37">
       <title>Tilde’s Machine Translation Systems for <fixed-case>WMT</fixed-case> 2017</title>
-      <author><first>Mārcis</first><last>Pinnis</last></author>
-      <author><first>Rihards</first><last>Krišlauks</last></author>
+      <author id="marcis-pinnis"><first>Mārcis</first><last>Pinnis</last></author>
+      <author id="rihards-krislauks"><first>Rihards</first><last>Krišlauks</last></author>
       <author><first>Toms</first><last>Miks</last></author>
       <author><first>Daiga</first><last>Deksne</last></author>
       <author><first>Valters</first><last>Šics</last></author>
@@ -8786,7 +8786,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <author><first>Ulrich</first><last>Germann</last></author>
       <author><first>Barry</first><last>Haddow</last></author>
       <author><first>Kenneth</first><last>Heafield</last></author>
-      <author><first>Antonio Valerio</first><last>Miceli Barone</last></author>
+      <author id="antonio-valerio-miceli-barone"><first>Antonio Valerio</first><last>Miceli Barone</last></author>
       <author><first>Philip</first><last>Williams</last></author>
       <pages>389–399</pages>
       <url hash="9bbe2f48">W17-4739</url>
@@ -8810,7 +8810,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <title>The <fixed-case>JAIST</fixed-case> Machine Translation Systems for <fixed-case>WMT</fixed-case> 17</title>
       <author><first>Hai-Long</first><last>Trieu</last></author>
       <author><first>Trung-Tin</first><last>Pham</last></author>
-      <author><first>Le-Minh</first><last>Nguyen</last></author>
+      <author id="minh-le-nguyen"><first>Le-Minh</first><last>Nguyen</last></author>
       <pages>405–409</pages>
       <url hash="b861bb06">W17-4741</url>
       <doi>10.18653/v1/W17-4741</doi>
@@ -8874,7 +8874,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <author><first>Ozan</first><last>Caglayan</last></author>
       <author><first>Walid</first><last>Aransa</last></author>
       <author><first>Adrien</first><last>Bardet</last></author>
-      <author><first>Mercedes</first><last>García-Martínez</last></author>
+      <author id="mercedes-garcia-martinez"><first>Mercedes</first><last>García-Martínez</last></author>
       <author><first>Fethi</first><last>Bougares</last></author>
       <author><first>Loïc</first><last>Barrault</last></author>
       <author><first>Marc</first><last>Masana</last></author>
@@ -8888,7 +8888,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="47">
       <title><fixed-case>DCU</fixed-case> System Report on the <fixed-case>WMT</fixed-case> 2017 Multi-modal Machine Translation Task</title>
       <author><first>Iacer</first><last>Calixto</last></author>
-      <author><first>Koel</first><last>Dutta Chowdhury</last></author>
+      <author id="koel-dutta-chowdhury"><first>Koel</first><last>Dutta Chowdhury</last></author>
       <author><first>Qun</first><last>Liu</last></author>
       <pages>440–444</pages>
       <url hash="18a92292">W17-4747</url>
@@ -8900,7 +8900,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <author><first>John</first><last>Duselis</last></author>
       <author><first>Michael</first><last>Hutt</last></author>
       <author><first>Jeremy</first><last>Gwinnup</last></author>
-      <author><first>James</first><last>Davis</last></author>
+      <author id="james-davis"><first>James</first><last>Davis</last></author>
       <author><first>Joshua</first><last>Sandvick</last></author>
       <pages>445–449</pages>
       <url hash="d6a61842">W17-4748</url>
@@ -8909,7 +8909,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="49">
       <title><fixed-case>CUNI</fixed-case> System for the <fixed-case>WMT</fixed-case>17 Multimodal Translation Task</title>
-      <author><first>Jindřich</first><last>Helcl</last></author>
+      <author id="jindrich-helcl"><first>Jindřich</first><last>Helcl</last></author>
       <author><first>Jindřich</first><last>Libovický</last></author>
       <pages>450–457</pages>
       <url hash="c16e2a0f">W17-4749</url>
@@ -8937,7 +8937,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="52">
       <title><fixed-case>S</fixed-case>heffield <fixed-case>M</fixed-case>ulti<fixed-case>MT</fixed-case>: Using Object Posterior Predictions for Multimodal Machine Translation</title>
-      <author><first>Pranava Swaroop</first><last>Madhyastha</last></author>
+      <author id="pranava-swaroop-madhyastha"><first>Pranava Swaroop</first><last>Madhyastha</last></author>
       <author><first>Josiah</first><last>Wang</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>470–476</pages>
@@ -8949,7 +8949,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <title><fixed-case>NICT</fixed-case>-<fixed-case>NAIST</fixed-case> System for <fixed-case>WMT</fixed-case>17 Multimodal Translation Task</title>
       <author><first>Jingyi</first><last>Zhang</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichro</first><last>Sumita</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
       <author><first>Satoshi</first><last>Nakamura</last></author>
       <pages>477–482</pages>
@@ -8978,7 +8978,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="56">
       <title>A Shared Task on Bandit Learning for Machine Translation</title>
-      <author><first>Artem</first><last>Sokolov</last></author>
+      <author id="artem-sokolov"><first>Artem</first><last>Sokolov</last></author>
       <author><first>Julia</first><last>Kreutzer</last></author>
       <author><first>Kellen</first><last>Sunderland</last></author>
       <author><first>Pavel</first><last>Danchenko</last></author>
@@ -8993,7 +8993,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="57">
       <title>Results of the <fixed-case>WMT</fixed-case>17 Neural <fixed-case>MT</fixed-case> Training Task</title>
       <author><first>Ondřej</first><last>Bojar</last></author>
-      <author><first>Jindřich</first><last>Helcl</last></author>
+      <author id="jindrich-helcl"><first>Jindřich</first><last>Helcl</last></author>
       <author><first>Tom</first><last>Kocmi</last></author>
       <author><first>Jindřich</first><last>Libovický</last></author>
       <author><first>Tomáš</first><last>Musil</last></author>
@@ -9012,7 +9012,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="59">
       <title>Predicting Translation Performance with Referential Translation Machines</title>
-      <author><first>Ergun</first><last>Biçici</last></author>
+      <author id="ergun-bicici"><first>Ergun</first><last>Biçici</last></author>
       <pages>540–544</pages>
       <url hash="b7141b96">W17-4759</url>
       <doi>10.18653/v1/W17-4759</doi>
@@ -9020,8 +9020,8 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="60">
       <title>Bilexical Embeddings for Quality Estimation</title>
-      <author><first>Frédéric</first><last>Blain</last></author>
-      <author><first>Carolina</first><last>Scarton</last></author>
+      <author id="frederic-blain"><first>Frédéric</first><last>Blain</last></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>545–550</pages>
       <url hash="2c110bf7">W17-4760</url>
@@ -9036,7 +9036,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <author><first>Qingyu</first><last>Xiang</last></author>
       <author><first>Lilin</first><last>Zhang</last></author>
       <author><first>Maoxi</first><last>Li</last></author>
-      <author><first>Mingwen</first><last>Wang</last></author>
+      <author id="mingwen-wang"><first>Mingwen</first><last>Wang</last></author>
       <pages>551–555</pages>
       <url hash="d948ff2d">W17-4761</url>
       <doi>10.18653/v1/W17-4761</doi>
@@ -9063,8 +9063,8 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="64">
       <title>Unbabel’s Participation in the <fixed-case>WMT</fixed-case>17 Translation Quality Estimation Shared Task</title>
-      <author><first>André F. T.</first><last>Martins</last></author>
-      <author><first>Fabio</first><last>Kepler</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
+      <author id="fabio-kepler"><first>Fabio</first><last>Kepler</last></author>
       <author><first>José</first><last>Monteiro</last></author>
       <pages>569–574</pages>
       <url hash="69ed3ab1">W17-4764</url>
@@ -9073,7 +9073,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="65">
       <title>Feature-Enriched Character-Level Convolutions for Text Regression</title>
-      <author><first>Gustavo</first><last>Paetzold</last></author>
+      <author id="gustavo-paetzold"><first>Gustavo</first><last>Paetzold</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>575–581</pages>
       <url hash="44bae53f">W17-4765</url>
@@ -9114,7 +9114,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <author><first>Ondřej</first><last>Bojar</last></author>
       <author><first>Ondřej</first><last>Hübsch</last></author>
       <author><first>Rudolf</first><last>Rosa</last></author>
-      <author><first>Dušan</first><last>Variš</last></author>
+      <author id="dusan-varis"><first>Dušan</first><last>Variš</last></author>
       <pages>604–611</pages>
       <url hash="5c3a4f1e">W17-4769</url>
       <doi>10.18653/v1/W17-4769</doi>
@@ -9122,7 +9122,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="70">
       <title>chr<fixed-case>F</fixed-case>++: words helping character n-grams</title>
-      <author><first>Maja</first><last>Popović</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
       <pages>612–618</pages>
       <url hash="fea9a44d">W17-4770</url>
       <doi>10.18653/v1/W17-4770</doi>
@@ -9140,7 +9140,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="72">
       <title><fixed-case>LIG</fixed-case>-<fixed-case>CRIS</fixed-case>t<fixed-case>AL</fixed-case> Submission for the <fixed-case>WMT</fixed-case> 2017 Automatic Post-Editing Task</title>
       <author><first>Alexandre</first><last>Bérard</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <author><first>Olivier</first><last>Pietquin</last></author>
       <pages>623–629</pages>
       <url hash="7bf44242">W17-4772</url>
@@ -9150,10 +9150,10 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="73">
       <title>Multi-source Neural Automatic Post-Editing: <fixed-case>FBK</fixed-case>’s participation in the <fixed-case>WMT</fixed-case> 2017 <fixed-case>APE</fixed-case> shared task</title>
       <author><first>Rajen</first><last>Chatterjee</last></author>
-      <author><first>M. Amin</first><last>Farajian</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="m-amin-farajian"><first>M. Amin</first><last>Farajian</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
-      <author><first>Ankit</first><last>Srivastava</last></author>
+      <author id="ankit-srivastava"><first>Ankit</first><last>Srivastava</last></author>
       <author><first>Santanu</first><last>Pal</last></author>
       <pages>630–638</pages>
       <url hash="376d599b">W17-4773</url>
@@ -9171,7 +9171,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="75">
       <title>Ensembling Factored Neural Machine Translation Models for Automatic Post-Editing and Quality Estimation</title>
-      <author><first>Chris</first><last>Hokamp</last></author>
+      <author id="chris-hokamp"><first>Chris</first><last>Hokamp</last></author>
       <pages>647–654</pages>
       <url hash="83365637">W17-4775</url>
       <doi>10.18653/v1/W17-4775</doi>
@@ -9184,7 +9184,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <author><first>Liu</first><last>Huang</last></author>
       <author><first>Lilin</first><last>Zhang</last></author>
       <author><first>Maoxi</first><last>Li</last></author>
-      <author><first>Mingwen</first><last>Wang</last></author>
+      <author id="mingwen-wang"><first>Mingwen</first><last>Wang</last></author>
       <pages>655–660</pages>
       <url hash="9175b70e">W17-4776</url>
       <doi>10.18653/v1/W17-4776</doi>
@@ -9192,7 +9192,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="77">
       <title><fixed-case>CUNI</fixed-case> System for <fixed-case>WMT</fixed-case>17 Automatic Post-Editing Task</title>
-      <author><first>Dušan</first><last>Variš</last></author>
+      <author id="dusan-varis"><first>Dušan</first><last>Variš</last></author>
       <author><first>Ondřej</first><last>Bojar</last></author>
       <pages>661–666</pages>
       <url hash="965ce0c0">W17-4777</url>
@@ -9205,7 +9205,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <author><first>Shi</first><last>Feng</last></author>
       <author><first>Khanh</first><last>Nguyen</last></author>
       <author><first>Kianté</first><last>Brantley</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <pages>667–673</pages>
       <url hash="c50529e7">W17-4778</url>
       <doi>10.18653/v1/W17-4778</doi>
@@ -9244,9 +9244,9 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <meta>
       <booktitle>Proceedings of the Third Workshop on Discourse in Machine Translation</booktitle>
       <url hash="9d769d1b">W17-48</url>
-      <editor><first>Bonnie</first><last>Webber</last></editor>
-      <editor><first>Andrei</first><last>Popescu-Belis</last></editor>
-      <editor><first>Jörg</first><last>Tiedemann</last></editor>
+      <editor id="bonnie-webber"><first>Bonnie</first><last>Webber</last></editor>
+      <editor id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></editor>
+      <editor id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></editor>
       <doi>10.18653/v1/W17-48</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Copenhagen, Denmark</address>
@@ -9262,7 +9262,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <title>Findings of the 2017 <fixed-case>D</fixed-case>isco<fixed-case>MT</fixed-case> Shared Task on Cross-lingual Pronoun Prediction</title>
       <author><first>Sharid</first><last>Loáiciga</last></author>
       <author><first>Sara</first><last>Stymne</last></author>
-      <author><first>Preslav</first><last>Nakov</last></author>
+      <author id="preslav-nakov"><first>Preslav</first><last>Nakov</last></author>
       <author><first>Christian</first><last>Hardmeier</last></author>
       <author><first>Jörg</first><last>Tiedemann</last></author>
       <author><first>Mauro</first><last>Cettolo</last></author>
@@ -9276,7 +9276,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="2">
       <title>Validation of an Automatic Metric for the Accuracy of Pronoun Translation (<fixed-case>APT</fixed-case>)</title>
-      <author><first>Lesly</first><last>Miculicich Werlen</last></author>
+      <author id="lesly-miculicich-werlen"><first>Lesly</first><last>Miculicich Werlen</last></author>
       <author><first>Andrei</first><last>Popescu-Belis</last></author>
       <pages>17–25</pages>
       <url hash="2441fb00">W17-4802</url>
@@ -9379,7 +9379,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="12">
       <title>Translating Implicit Discourse Connectives Based on Cross-lingual Annotation and Alignment</title>
       <author><first>Hongzheng</first><last>Li</last></author>
-      <author><first>Philippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Philippe</first><last>Langlais</last></author>
       <author><first>Yaohong</first><last>Jin</last></author>
       <pages>93–98</pages>
       <url hash="a38e86b2">W17-4812</url>
@@ -9398,7 +9398,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="14">
       <title>On Integrating Discourse in Machine Translation</title>
-      <author><first>Karin</first><last>Sim Smith</last></author>
+      <author id="karin-sim-smith"><first>Karin</first><last>Sim Smith</last></author>
       <pages>110–121</pages>
       <url hash="721d2725">W17-4814</url>
       <doi>10.18653/v1/W17-4814</doi>
@@ -9437,8 +9437,8 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <title>Shakespearizing Modern Language Using Copy-Enriched Sequence to Sequence Models</title>
       <author><first>Harsh</first><last>Jhamtani</last></author>
       <author><first>Varun</first><last>Gangal</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <pages>10–19</pages>
       <url hash="bdc7b44f">W17-4902</url>
       <doi>10.18653/v1/W17-4902</doi>
@@ -9460,7 +9460,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <title>Harvesting Creative Templates for Generating Stylistically Varied Restaurant Reviews</title>
       <author><first>Shereen</first><last>Oraby</last></author>
       <author><first>Sheideh</first><last>Homayon</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <pages>28–36</pages>
       <url hash="6fd0dd27">W17-4904</url>
       <doi>10.18653/v1/W17-4904</doi>
@@ -9470,7 +9470,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="5">
       <title>Is writing style predictive of scientific fraud?</title>
       <author><first>Chloé</first><last>Braud</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>37–42</pages>
       <url hash="0d074b5d">W17-4905</url>
       <doi>10.18653/v1/W17-4905</doi>
@@ -9503,7 +9503,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <title>Topic and audience effects on distinctively <fixed-case>S</fixed-case>cottish vocabulary usage in <fixed-case>T</fixed-case>witter data</title>
       <author><first>Philippa</first><last>Shoemark</last></author>
       <author><first>James</first><last>Kirby</last></author>
-      <author><first>Sharon</first><last>Goldwater</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
       <pages>59–68</pages>
       <url hash="e4dc0252">W17-4908</url>
       <doi>10.18653/v1/W17-4908</doi>
@@ -9535,7 +9535,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="11">
       <title>Stylistic Variation in Television Dialogue for Natural Language Generation</title>
       <author><first>Grace</first><last>Lin</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <pages>85–93</pages>
       <url hash="74e137b2">W17-4911</url>
       <doi>10.18653/v1/W17-4911</doi>
@@ -9564,9 +9564,9 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="14">
       <title>Assessing the Stylistic Properties of Neurally Generated Text in Authorship Attribution</title>
-      <author><first>Enrique</first><last>Manjavacas</last></author>
+      <author id="enrique-manjavacas"><first>Enrique</first><last>Manjavacas</last></author>
       <author><first>Jeroen</first><last>De Gussem</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <author><first>Mike</first><last>Kestemont</last></author>
       <pages>116–125</pages>
       <url hash="54aa787c">W17-4914</url>
@@ -9579,7 +9579,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <meta>
       <booktitle>Proceedings of the 12th Workshop on Innovative Use of <fixed-case>NLP</fixed-case> for Building Educational Applications</booktitle>
       <url hash="2109e9ee">W17-50</url>
-      <editor><first>Joel</first><last>Tetreault</last></editor>
+      <editor id="joel-tetreault"><first>Joel</first><last>Tetreault</last></editor>
       <editor><first>Jill</first><last>Burstein</last></editor>
       <editor><first>Claudia</first><last>Leacock</last></editor>
       <editor><first>Helen</first><last>Yannakoudakis</last></editor>
@@ -9596,7 +9596,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </frontmatter>
     <paper id="1">
       <title>Question Difficulty – How to Estimate Without Norming, How to Use for Automated Grading</title>
-      <author><first>Ulrike</first><last>Padó</last></author>
+      <author id="ulrike-pado"><first>Ulrike</first><last>Padó</last></author>
       <pages>1–10</pages>
       <url hash="96189a4c">W17-5001</url>
       <doi>10.18653/v1/W17-5001</doi>
@@ -9606,7 +9606,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="2">
       <title>Combining <fixed-case>CNN</fixed-case>s and Pattern Matching for Question Interpretation in a Virtual Patient Dialogue System</title>
       <author><first>Lifeng</first><last>Jin</last></author>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <author><first>Evan</first><last>Jaffe</last></author>
       <author><first>Laura</first><last>Zimmerman</last></author>
       <author><first>Douglas</first><last>Danforth</last></author>
@@ -9653,7 +9653,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="6">
       <title>Predicting Specificity in Classroom Discussion</title>
       <author><first>Luca</first><last>Lugini</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <pages>52–61</pages>
       <url hash="5f20ca8a">W17-5006</url>
       <doi>10.18653/v1/W17-5006</doi>
@@ -9662,7 +9662,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="7">
       <title>A Report on the 2017 Native Language Identification Shared Task</title>
-      <author><first>Shervin</first><last>Malmasi</last></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></author>
       <author><first>Keelan</first><last>Evanini</last></author>
       <author><first>Aoife</first><last>Cahill</last></author>
       <author><first>Joel</first><last>Tetreault</last></author>
@@ -9689,7 +9689,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="9">
       <title>Predicting Audience’s Laughter During Presentations Using Convolutional Neural Network</title>
       <author><first>Lei</first><last>Chen</last></author>
-      <author><first>Chong Min</first><last>Lee</last></author>
+      <author id="chungmin-lee"><first>Chong Min</first><last>Lee</last></author>
       <pages>86–90</pages>
       <url hash="62a6bd39">W17-5009</url>
       <doi>10.18653/v1/W17-5009</doi>
@@ -9722,7 +9722,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="12">
       <title>An Investigation into the Pedagogical Features of Documents</title>
       <author><first>Emily</first><last>Sheng</last></author>
-      <author><first>Prem</first><last>Natarajan</last></author>
+      <author id="prem-natarajan"><first>Prem</first><last>Natarajan</last></author>
       <author><first>Jonathan</first><last>Gordon</last></author>
       <author><first>Gully</first><last>Burns</last></author>
       <pages>109–120</pages>
@@ -9734,8 +9734,8 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="13">
       <title>Combining Multiple Corpora for Readability Assessment for People with Cognitive Disabilities</title>
       <author><first>Victoria</first><last>Yaneva</last></author>
-      <author><first>Constantin</first><last>Orăsan</last></author>
-      <author><first>Richard</first><last>Evans</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orăsan</last></author>
+      <author id="richard-evans"><first>Richard</first><last>Evans</last></author>
       <author><first>Omid</first><last>Rohanian</last></author>
       <pages>121–132</pages>
       <url hash="77e7d74f">W17-5013</url>
@@ -9757,7 +9757,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="15">
       <title>Distractor Generation for <fixed-case>C</fixed-case>hinese Fill-in-the-blank Items</title>
       <author><first>Shu</first><last>Jiang</last></author>
-      <author><first>John</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
       <pages>143–148</pages>
       <url hash="021363ca">W17-5015</url>
       <doi>10.18653/v1/W17-5015</doi>
@@ -9768,7 +9768,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <title>An Error-Oriented Approach to Word Embedding Pre-Training</title>
       <author><first>Youmna</first><last>Farag</last></author>
       <author><first>Marek</first><last>Rei</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <pages>149–158</pages>
       <url hash="cc5cd881">W17-5016</url>
       <doi>10.18653/v1/W17-5016</doi>
@@ -9781,7 +9781,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <author><first>Andrea</first><last>Horbach</last></author>
       <author><first>Aoife</first><last>Cahill</last></author>
       <author><first>Torsten</first><last>Zesch</last></author>
-      <author><first>Chong Min</first><last>Lee</last></author>
+      <author id="chungmin-lee"><first>Chong Min</first><last>Lee</last></author>
       <pages>159–168</pages>
       <url hash="e6df53c3">W17-5017</url>
       <doi>10.18653/v1/W17-5017</doi>
@@ -9793,7 +9793,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <author><first>Anaïs</first><last>Tack</last></author>
       <author><first>Thomas</first><last>François</last></author>
       <author><first>Sophie</first><last>Roekhaut</last></author>
-      <author><first>Cédrick</first><last>Fairon</last></author>
+      <author id="cedrick-fairon"><first>Cédrick</first><last>Fairon</last></author>
       <pages>169–179</pages>
       <url hash="2f2b53b6">W17-5018</url>
       <doi>10.18653/v1/W17-5018</doi>
@@ -9825,7 +9825,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <author><first>Pavel</first><last>Ircing</last></author>
       <author><first>Jan</first><last>Švec</last></author>
       <author><first>Zbyněk</first><last>Zajíc</last></author>
-      <author><first>Barbora</first><last>Hladká</last></author>
+      <author id="barbora-hladka"><first>Barbora</first><last>Hladká</last></author>
       <author><first>Martin</first><last>Holub</last></author>
       <pages>198–209</pages>
       <url hash="bca5fad6">W17-5021</url>
@@ -9871,9 +9871,9 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="25">
       <title>Neural Networks and Spelling Features for Native Language Identification</title>
       <author><first>Johannes</first><last>Bjerva</last></author>
-      <author><first>Gintarė</first><last>Grigonytė</last></author>
+      <author id="gintare-grigonyte"><first>Gintarė</first><last>Grigonytė</last></author>
       <author><first>Robert</first><last>Östling</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>235–239</pages>
       <url hash="7f8fac76">W17-5025</url>
       <doi>10.18653/v1/W17-5025</doi>
@@ -9925,10 +9925,10 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="30">
       <title>Effects of Lexical Properties on Viewing Time per Word in Autistic and Neurotypical Readers</title>
-      <author><first>Sanja</first><last>Štajner</last></author>
+      <author id="sanja-stajner"><first>Sanja</first><last>Štajner</last></author>
       <author><first>Victoria</first><last>Yaneva</last></author>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <pages>271–281</pages>
       <url hash="a3bf34ca">W17-5030</url>
       <doi>10.18653/v1/W17-5030</doi>
@@ -9938,7 +9938,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="31">
       <title>Transparent text quality assessment with convolutional neural networks</title>
       <author><first>Robert</first><last>Östling</last></author>
-      <author><first>Gintare</first><last>Grigonyte</last></author>
+      <author id="gintare-grigonyte"><first>Gintare</first><last>Grigonyte</last></author>
       <pages>282–286</pages>
       <url hash="123d41f8">W17-5031</url>
       <doi>10.18653/v1/W17-5031</doi>
@@ -9950,7 +9950,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <author><first>Marek</first><last>Rei</last></author>
       <author><first>Mariano</first><last>Felice</last></author>
       <author id="zheng-yuan-cambridge"><first>Zheng</first><last>Yuan</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <pages>287–292</pages>
       <url hash="f4d2ac61">W17-5032</url>
       <doi>10.18653/v1/W17-5032</doi>
@@ -9970,7 +9970,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="34">
       <title>Multiple Choice Question Generation Utilizing An Ontology</title>
       <author><first>Katherine</first><last>Stasaski</last></author>
-      <author><first>Marti A.</first><last>Hearst</last></author>
+      <author id="marti-a-hearst"><first>Marti A.</first><last>Hearst</last></author>
       <pages>303–312</pages>
       <url hash="49ef3315">W17-5034</url>
       <doi>10.18653/v1/W17-5034</doi>
@@ -9991,7 +9991,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="36">
       <title>Language Based Mapping of Science Assessment Items to Skills</title>
       <author><first>Farah</first><last>Nadeem</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
       <pages>319–326</pages>
       <url hash="d6fd0fa7">W17-5036</url>
       <doi>10.18653/v1/W17-5036</doi>
@@ -10012,7 +10012,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="38">
       <title>Question Generation for Language Learning: From ensuring texts are read to supporting learning</title>
       <author><first>Maria</first><last>Chinkina</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <pages>334–344</pages>
       <url hash="c94efaf5">W17-5038</url>
       <doi>10.18653/v1/W17-5038</doi>
@@ -10043,7 +10043,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="41">
       <title>Exploring Optimal Voting in Native Language Identification</title>
-      <author><first>Cyril</first><last>Goutte</last></author>
+      <author id="cyril-goutte"><first>Cyril</first><last>Goutte</last></author>
       <author><first>Serge</first><last>Léger</last></author>
       <pages>367–373</pages>
       <url hash="395ee033">W17-5041</url>
@@ -10069,8 +10069,8 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <author><first>Bo</first><last>Blankers</last></author>
       <author><first>Johannes</first><last>Bjerva</last></author>
       <author><first>Malvina</first><last>Nissim</last></author>
-      <author><first>Gertjan</first><last>van Noord</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <author><first>Martijn</first><last>Wieling</last></author>
       <pages>382–389</pages>
       <url hash="92747834">W17-5043</url>
@@ -10091,8 +10091,8 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="45">
       <title>Native Language Identification on Text and Speech</title>
       <author><first>Marcos</first><last>Zampieri</last></author>
-      <author><first>Alina Maria</first><last>Ciobanu</last></author>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="alina-maria-ciobanu"><first>Alina Maria</first><last>Ciobanu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <pages>398–404</pages>
       <url hash="23898633">W17-5045</url>
       <doi>10.18653/v1/W17-5045</doi>
@@ -10102,7 +10102,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="46">
       <title>Native Language Identification using Phonetic Algorithms</title>
       <author><first>Charese</first><last>Smiley</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <pages>405–412</pages>
       <url hash="584ebd74">W17-5046</url>
       <doi>10.18653/v1/W17-5046</doi>
@@ -10112,7 +10112,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="47">
       <title>A deep-learning based native-language classification by using a latent semantic analysis for the <fixed-case>NLI</fixed-case> Shared Task 2017</title>
       <author><first>Yoo Rhee</first><last>Oh</last></author>
-      <author><first>Hyung-Bae</first><last>Jeon</last></author>
+      <author id="hyung-bae-jeon"><first>Hyung-Bae</first><last>Jeon</last></author>
       <author><first>Hwa Jeon</first><last>Song</last></author>
       <author><first>Yun-Kyung</first><last>Lee</last></author>
       <author><first>Jeon-Gue</first><last>Park</last></author>
@@ -10125,8 +10125,8 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="48">
       <title>Fusion of Simple Models for Native Language Identification</title>
-      <author><first>Fabio</first><last>Kepler</last></author>
-      <author><first>Ramon</first><last>F. Astudillo</last></author>
+      <author id="fabio-kepler"><first>Fabio</first><last>Kepler</last></author>
+      <author id="ramon-fernandez-astudillo"><first>Ramon</first><last>F. Astudillo</last></author>
       <author><first>Alberto</first><last>Abad</last></author>
       <pages>423–429</pages>
       <url hash="db6d6575">W17-5048</url>
@@ -10147,7 +10147,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <paper id="50">
       <title>Using Gaze to Predict Text Readability</title>
       <author><first>Ana Valeria</first><last>González-Garduño</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>438–443</pages>
       <url hash="da61e863">W17-5050</url>
       <doi>10.18653/v1/W17-5050</doi>
@@ -10185,14 +10185,14 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <url hash="e429c94f">W17-51</url>
       <editor><first>Ivan</first><last>Habernal</last></editor>
       <editor><first>Iryna</first><last>Gurevych</last></editor>
-      <editor><first>Kevin</first><last>Ashley</last></editor>
-      <editor><first>Claire</first><last>Cardie</last></editor>
-      <editor><first>Nancy</first><last>Green</last></editor>
-      <editor><first>Diane</first><last>Litman</last></editor>
+      <editor id="kevin-d-ashley"><first>Kevin</first><last>Ashley</last></editor>
+      <editor id="claire-cardie"><first>Claire</first><last>Cardie</last></editor>
+      <editor id="nancy-green"><first>Nancy</first><last>Green</last></editor>
+      <editor id="diane-litman"><first>Diane</first><last>Litman</last></editor>
       <editor><first>Georgios</first><last>Petasis</last></editor>
-      <editor><first>Chris</first><last>Reed</last></editor>
+      <editor id="chris-reed"><first>Chris</first><last>Reed</last></editor>
       <editor><first>Noam</first><last>Slonim</last></editor>
-      <editor><first>Vern</first><last>Walker</last></editor>
+      <editor id="vern-walker"><first>Vern</first><last>Walker</last></editor>
       <doi>10.18653/v1/W17-51</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Copenhagen, Denmark</address>
@@ -10222,7 +10222,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <author><first>Elena</first><last>Musi</last></author>
       <author><first>Alyssa</first><last>Hwang</last></author>
       <author><first>Smaranda</first><last>Muresan</last></author>
-      <author><first>Kathy</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathy</first><last>McKeown</last></author>
       <pages>11–21</pages>
       <url hash="555c0a8b">W17-5102</url>
       <doi>10.18653/v1/W17-5102</doi>
@@ -10266,7 +10266,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <title>Building an Argument Search Engine for the Web</title>
       <author><first>Henning</first><last>Wachsmuth</last></author>
       <author><first>Martin</first><last>Potthast</last></author>
-      <author><first>Khalid</first><last>Al-Khatib</last></author>
+      <author id="khalid-al-khatib"><first>Khalid</first><last>Al-Khatib</last></author>
       <author><first>Yamen</first><last>Ajjour</last></author>
       <author><first>Jana</first><last>Puschmann</last></author>
       <author><first>Jiani</first><last>Qu</last></author>
@@ -10327,7 +10327,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
     </paper>
     <paper id="11">
       <title>Using Question-Answering Techniques to Implement a Knowledge-Driven Argument Mining Approach</title>
-      <author><first>Patrick</first><last>Saint-Dizier</last></author>
+      <author id="patrick-saint-dizier"><first>Patrick</first><last>Saint-Dizier</last></author>
       <pages>85–90</pages>
       <url hash="5b0d7869">W17-5111</url>
       <doi>10.18653/v1/W17-5111</doi>
@@ -10388,9 +10388,9 @@ is able to handle phenomena related to scope by means of an higher-order type th
     <meta>
       <booktitle>Proceedings of the 8th Workshop on Computational Approaches to Subjectivity, Sentiment and Social Media Analysis</booktitle>
       <url hash="68aa2440">W17-52</url>
-      <editor><first>Alexandra</first><last>Balahur</last></editor>
-      <editor><first>Saif M.</first><last>Mohammad</last></editor>
-      <editor><first>Erik</first><last>van der Goot</last></editor>
+      <editor id="alexandra-balahur"><first>Alexandra</first><last>Balahur</last></editor>
+      <editor id="saif-mohammad"><first>Saif M.</first><last>Mohammad</last></editor>
+      <editor id="erik-van-der-goot"><first>Erik</first><last>van der Goot</last></editor>
       <doi>10.18653/v1/W17-52</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Copenhagen, Denmark</address>
@@ -10415,7 +10415,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <title>Assessing State-of-the-Art Sentiment Models on State-of-the-Art Sentiment Datasets</title>
       <author><first>Jeremy</first><last>Barnes</last></author>
       <author><first>Roman</first><last>Klinger</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>2–12</pages>
       <url hash="69037d9d">W17-5202</url>
       <doi>10.18653/v1/W17-5202</doi>
@@ -10433,7 +10433,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
       <author><first>Hendrik</first><last>Schuff</last></author>
       <author><first>Jeremy</first><last>Barnes</last></author>
       <author><first>Julian</first><last>Mohme</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <author><first>Roman</first><last>Klinger</last></author>
       <pages>13–23</pages>
       <url hash="91de65d7">W17-5203</url>
@@ -10446,7 +10446,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     </paper>
     <paper id="4">
       <title>Ranking Right-Wing Extremist Social Media Profiles by Similarity to Democratic and Extremist Groups</title>
-      <author><first>Matthias</first><last>Hartung</last></author>
+      <author id="matthias-hartung"><first>Matthias</first><last>Hartung</last></author>
       <author><first>Roman</first><last>Klinger</last></author>
       <author><first>Franziska</first><last>Schmidtke</last></author>
       <author><first>Lars</first><last>Vogel</last></author>
@@ -10502,7 +10502,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <title>Towards Syntactic <fixed-case>I</fixed-case>berian Polarity Classification</title>
       <author><first>David</first><last>Vilares</last></author>
       <author><first>Marcos</first><last>Garcia</last></author>
-      <author><first>Miguel A.</first><last>Alonso</last></author>
+      <author id="miguel-a-alonso"><first>Miguel A.</first><last>Alonso</last></author>
       <author><first>Carlos</first><last>Gómez-Rodríguez</last></author>
       <pages>67–73</pages>
       <url hash="5328ccf7">W17-5209</url>
@@ -10523,9 +10523,9 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="11">
       <title>Linguistic Reflexes of Well-Being and Happiness in Echo</title>
       <author><first>Jiaqi</first><last>Wu</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
-      <author><first>Pranav</first><last>Anand</last></author>
-      <author><first>Steve</first><last>Whittaker</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
+      <author id="pranav-anand"><first>Pranav</first><last>Anand</last></author>
+      <author id="steve-whittaker"><first>Steve</first><last>Whittaker</last></author>
       <pages>81–91</pages>
       <url hash="414d6d98">W17-5211</url>
       <doi>10.18653/v1/W17-5211</doi>
@@ -10595,11 +10595,11 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     </paper>
     <paper id="18">
       <title>Towards an integrated pipeline for aspect-based sentiment analysis in various domains</title>
-      <author><first>Orphée</first><last>De Clercq</last></author>
+      <author id="orphee-de-clercq"><first>Orphée</first><last>De Clercq</last></author>
       <author><first>Els</first><last>Lefever</last></author>
       <author><first>Gilles</first><last>Jacobs</last></author>
       <author><first>Tijl</first><last>Carpels</last></author>
-      <author><first>Véronique</first><last>Hoste</last></author>
+      <author id="veronique-hoste"><first>Véronique</first><last>Hoste</last></author>
       <pages>136–142</pages>
       <url hash="749e5a8c">W17-5218</url>
       <doi>10.18653/v1/W17-5218</doi>
@@ -10621,7 +10621,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <title>Lexicon Integrated <fixed-case>CNN</fixed-case> Models with Attention for Sentiment Analysis</title>
       <author><first>Bonggun</first><last>Shin</last></author>
       <author><first>Timothy</first><last>Lee</last></author>
-      <author><first>Jinho D.</first><last>Choi</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
       <pages>149–158</pages>
       <url hash="20416cc7">W17-5220</url>
       <doi>10.18653/v1/W17-5220</doi>
@@ -10665,7 +10665,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="24">
       <title>Unsupervised Aspect Term Extraction with <fixed-case>B</fixed-case>-<fixed-case>LSTM</fixed-case> &amp; <fixed-case>CRF</fixed-case> using Automatically Labelled Datasets</title>
       <author><first>Athanasios</first><last>Giannakopoulos</last></author>
-      <author><first>Claudiu</first><last>Musat</last></author>
+      <author id="claudiu-musat"><first>Claudiu</first><last>Musat</last></author>
       <author><first>Andreea</first><last>Hossmann</last></author>
       <author><first>Michael</first><last>Baeriswyl</last></author>
       <pages>180–188</pages>
@@ -10677,7 +10677,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="25">
       <title><fixed-case>PLN</fixed-case>-<fixed-case>PUCRS</fixed-case> at <fixed-case>E</fixed-case>mo<fixed-case>I</fixed-case>nt-2017: Psycholinguistic features for emotion intensity prediction in tweets</title>
       <author><first>Henrique</first><last>Santos</last></author>
-      <author><first>Renata</first><last>Vieira</last></author>
+      <author id="renata-vieira"><first>Renata</first><last>Vieira</last></author>
       <pages>189–192</pages>
       <url hash="1b25bd4f">W17-5225</url>
       <doi>10.18653/v1/W17-5225</doi>
@@ -10724,8 +10724,8 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <author><first>Md Shad</first><last>Akhtar</last></author>
       <author><first>Palaash</first><last>Sawant</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Jyoti</first><last>Pawar</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="jyoti-pawar"><first>Jyoti</first><last>Pawar</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>212–218</pages>
       <url hash="f6467a7e">W17-5229</url>
       <doi>10.18653/v1/W17-5229</doi>
@@ -10735,7 +10735,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="30">
       <title><fixed-case>NSE</fixed-case>mo at <fixed-case>E</fixed-case>mo<fixed-case>I</fixed-case>nt-2017: An Ensemble to Predict Emotion Intensity in Tweets</title>
       <author><first>Sreekanth</first><last>Madisetty</last></author>
-      <author><first>Maunendra Sankar</first><last>Desarkar</last></author>
+      <author id="maunendra-sankar-desarkar"><first>Maunendra Sankar</first><last>Desarkar</last></author>
       <pages>219–224</pages>
       <url hash="660c7241">W17-5230</url>
       <doi>10.18653/v1/W17-5230</doi>
@@ -10744,7 +10744,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     </paper>
     <paper id="31">
       <title><fixed-case>T</fixed-case>ecnolengua <fixed-case>L</fixed-case>ingmotif at <fixed-case>E</fixed-case>mo<fixed-case>I</fixed-case>nt-2017: A lexicon-based approach</title>
-      <author><first>Antonio</first><last>Moreno-Ortiz</last></author>
+      <author id="antonio-moreno-ortiz"><first>Antonio</first><last>Moreno-Ortiz</last></author>
       <pages>225–232</pages>
       <url hash="524609d7">W17-5231</url>
       <doi>10.18653/v1/W17-5231</doi>
@@ -10764,9 +10764,9 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="33">
       <title><fixed-case>YZU</fixed-case>-<fixed-case>NLP</fixed-case> at <fixed-case>E</fixed-case>mo<fixed-case>I</fixed-case>nt-2017: Determining Emotion Intensity Using a Bi-directional <fixed-case>LSTM</fixed-case>-<fixed-case>CNN</fixed-case> Model</title>
       <author><first>Yuanye</first><last>He</last></author>
-      <author><first>Liang-Chih</first><last>Yu</last></author>
+      <author id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last></author>
       <author><first>K. Robert</first><last>Lai</last></author>
-      <author><first>Weiyi</first><last>Liu</last></author>
+      <author id="weiyi-liu"><first>Weiyi</first><last>Liu</last></author>
       <pages>238–242</pages>
       <url hash="db61677d">W17-5233</url>
       <doi>10.18653/v1/W17-5233</doi>
@@ -10796,7 +10796,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="36">
       <title><fixed-case>LIPN</fixed-case>-<fixed-case>UAM</fixed-case> at <fixed-case>E</fixed-case>mo<fixed-case>I</fixed-case>nt-2017:Combination of Lexicon-based features and Sentence-level Vector Representations for Emotion Intensity Determination</title>
       <author><first>Davide</first><last>Buscaldi</last></author>
-      <author><first>Belem</first><last>Priego</last></author>
+      <author id="belem-priego-sanchez"><first>Belem</first><last>Priego</last></author>
       <pages>255–258</pages>
       <url hash="bf5ca0e1">W17-5236</url>
       <doi>10.18653/v1/W17-5236</doi>
@@ -10821,13 +10821,13 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <meta>
       <booktitle>Proceedings of the 2nd Workshop on Evaluating Vector Space Representations for <fixed-case>NLP</fixed-case></booktitle>
       <url hash="648c78e9">W17-53</url>
-      <editor><first>Samuel</first><last>Bowman</last></editor>
+      <editor id="samuel-bowman"><first>Samuel</first><last>Bowman</last></editor>
       <editor><first>Yoav</first><last>Goldberg</last></editor>
       <editor><first>Felix</first><last>Hill</last></editor>
       <editor><first>Angeliki</first><last>Lazaridou</last></editor>
       <editor><first>Omer</first><last>Levy</last></editor>
       <editor><first>Roi</first><last>Reichart</last></editor>
-      <editor><first>Anders</first><last>Søgaard</last></editor>
+      <editor id="anders-sogaard"><first>Anders</first><last>Søgaard</last></editor>
       <doi>10.18653/v1/W17-53</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Copenhagen, Denmark</address>
@@ -10877,7 +10877,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <title>Evaluation of word embeddings against cognitive processes: primed reaction times in lexical decision and naming tasks</title>
       <author><first>Jeremy</first><last>Auguste</last></author>
       <author><first>Arnaud</first><last>Rey</last></author>
-      <author><first>Benoit</first><last>Favre</last></author>
+      <author id="benoit-favre"><first>Benoit</first><last>Favre</last></author>
       <pages>21–26</pages>
       <url hash="8d1a0ea3">W17-5304</url>
       <doi>10.18653/v1/W17-5304</doi>
@@ -10896,7 +10896,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     </paper>
     <paper id="6">
       <title>Recognizing Textual Entailment in <fixed-case>T</fixed-case>witter Using Word Embeddings</title>
-      <author><first>Octavia-Maria</first><last>Şulea</last></author>
+      <author id="octavia-maria-sulea"><first>Octavia-Maria</first><last>Şulea</last></author>
       <pages>31–35</pages>
       <url hash="9d057a3c">W17-5306</url>
       <doi>10.18653/v1/W17-5306</doi>
@@ -10910,7 +10910,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <author><first>Zhen-Hua</first><last>Ling</last></author>
       <author><first>Si</first><last>Wei</last></author>
       <author><first>Hui</first><last>Jiang</last></author>
-      <author><first>Diana</first><last>Inkpen</last></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last></author>
       <pages>36–40</pages>
       <url hash="707c1aba">W17-5307</url>
       <doi>10.18653/v1/W17-5307</doi>
@@ -10930,8 +10930,8 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="9">
       <title>Character-level Intra Attention Network for Natural Language Inference</title>
       <author><first>Han</first><last>Yang</last></author>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
-      <author><first>José A. R.</first><last>Fonollosa</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="jose-a-r-fonollosa"><first>José A. R.</first><last>Fonollosa</last></author>
       <pages>46–50</pages>
       <url hash="f5772b1d">W17-5309</url>
       <doi>10.18653/v1/W17-5309</doi>
@@ -10956,7 +10956,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <author><first>Thuong-Hai</first><last>Pham</last></author>
       <author><first>Xiaoyu</first><last>Bai</last></author>
       <author><first>Marc</first><last>Tanti</last></author>
-      <author><first>Lonneke</first><last>van der Plas</last></author>
+      <author id="lonneke-van-der-plas"><first>Lonneke</first><last>van der Plas</last></author>
       <author><first>Albert</first><last>Gatt</last></author>
       <pages>56–60</pages>
       <url hash="bb37264d">W17-5311</url>
@@ -10969,8 +10969,8 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <meta>
       <booktitle>Proceedings of the First Workshop on Building Linguistically Generalizable <fixed-case>NLP</fixed-case> Systems</booktitle>
       <url hash="f166c301">W17-54</url>
-      <editor><first>Emily</first><last>Bender</last></editor>
-      <editor><first>Hal</first><last>Daumé III</last></editor>
+      <editor id="emily-m-bender"><first>Emily</first><last>Bender</last></editor>
+      <editor id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></editor>
       <editor><first>Allyson</first><last>Ettinger</last></editor>
       <editor><first>Sudha</first><last>Rao</last></editor>
       <doi>10.18653/v1/W17-54</doi>
@@ -11013,7 +11013,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <title>Massively Multilingual Neural Grapheme-to-Phoneme Conversion</title>
       <author><first>Ben</first><last>Peters</last></author>
       <author><first>Jon</first><last>Dehdari</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>19–26</pages>
       <url hash="cbcf8c96">W17-5403</url>
       <doi>10.18653/v1/W17-5403</doi>
@@ -11023,8 +11023,8 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="4">
       <title><fixed-case>BIBI</fixed-case> System Description: Building with <fixed-case>CNN</fixed-case>s and Breaking with Deep Reinforcement Learning</title>
       <author><first>Yitong</first><last>Li</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>27–32</pages>
       <url hash="63288a99">W17-5404</url>
       <doi>10.18653/v1/W17-5404</doi>
@@ -11036,11 +11036,11 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <author><first>Taylor</first><last>Mahler</last></author>
       <author><first>Willy</first><last>Cheung</last></author>
       <author><first>Micha</first><last>Elsner</last></author>
-      <author><first>David</first><last>King</last></author>
-      <author><first>Marie-Catherine</first><last>de Marneffe</last></author>
+      <author id="david-king"><first>David</first><last>King</last></author>
+      <author id="marie-catherine-de-marneffe"><first>Marie-Catherine</first><last>de Marneffe</last></author>
       <author><first>Cory</first><last>Shain</last></author>
       <author><first>Symon</first><last>Stevens-Guille</last></author>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <pages>33–39</pages>
       <url hash="94340603">W17-5405</url>
       <doi>10.18653/v1/W17-5405</doi>
@@ -11051,9 +11051,9 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     </paper>
     <paper id="6">
       <title>An Adaptable Lexical Simplification Architecture for Major <fixed-case>I</fixed-case>bero-<fixed-case>R</fixed-case>omance Languages</title>
-      <author><first>Daniel</first><last>Ferrés</last></author>
+      <author id="daniel-ferres"><first>Daniel</first><last>Ferrés</last></author>
       <author><first>Horacio</first><last>Saggion</last></author>
-      <author><first>Xavier</first><last>Gómez Guinovart</last></author>
+      <author id="xavier-gomez-guinovart"><first>Xavier</first><last>Gómez Guinovart</last></author>
       <pages>40–47</pages>
       <url hash="86eefb71">W17-5406</url>
       <doi>10.18653/v1/W17-5406</doi>
@@ -11063,7 +11063,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="7">
       <title>Cross-genre Document Retrieval: Matching between Conversational and Formal Writings</title>
       <author><first>Tomasz</first><last>Jurczyk</last></author>
-      <author><first>Jinho D.</first><last>Choi</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
       <pages>48–53</pages>
       <url hash="20674708">W17-5407</url>
       <doi>10.18653/v1/W17-5407</doi>
@@ -11104,7 +11104,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <meta>
       <booktitle>Proceedings of the 18th Annual <fixed-case>SIG</fixed-case>dial Meeting on Discourse and Dialogue</booktitle>
       <url hash="20fbb7ea">W17-55</url>
-      <editor><first>Kristiina</first><last>Jokinen</last></editor>
+      <editor id="kristiina-jokinen"><first>Kristiina</first><last>Jokinen</last></editor>
       <editor><first>Manfred</first><last>Stede</last></editor>
       <editor><first>David</first><last>DeVault</last></editor>
       <editor><first>Annie</first><last>Louis</last></editor>
@@ -11132,7 +11132,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="2">
       <title>Towards Full Text Shallow Discourse Relation Annotation: Experiments with Cross-Paragraph Implicit Relations in the <fixed-case>PDTB</fixed-case></title>
       <author><first>Rashmi</first><last>Prasad</last></author>
-      <author><first>Katherine</first><last>Forbes Riley</last></author>
+      <author id="kate-forbes-riley"><first>Katherine</first><last>Forbes Riley</last></author>
       <author><first>Alan</first><last>Lee</last></author>
       <pages>7–16</pages>
       <url hash="5346413e">W17-5502</url>
@@ -11179,7 +11179,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <author><first>Mihail</first><last>Eric</last></author>
       <author><first>Lakshmi</first><last>Krishnan</last></author>
       <author><first>Francois</first><last>Charette</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>37–49</pages>
       <url hash="3257b29b">W17-5506</url>
       <doi>10.18653/v1/W17-5506</doi>
@@ -11219,11 +11219,11 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <author><first>Paweł</first><last>Budzianowski</last></author>
       <author><first>Iñigo</first><last>Casanueva</last></author>
       <author><first>Nikola</first><last>Mrkšić</last></author>
-      <author><first>Lina M.</first><last>Rojas-Barahona</last></author>
+      <author id="lina-m-rojas-barahona"><first>Lina M.</first><last>Rojas-Barahona</last></author>
       <author><first>Pei-Hao</first><last>Su</last></author>
       <author><first>Tsung-Hsien</first><last>Wen</last></author>
-      <author><first>Milica</first><last>Gašić</last></author>
-      <author><first>Steve</first><last>Young</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gašić</last></author>
+      <author id="steve-young"><first>Steve</first><last>Young</last></author>
       <pages>65–70</pages>
       <url hash="2e8fd517">W17-5509</url>
       <doi>10.18653/v1/W17-5509</doi>
@@ -11243,7 +11243,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     </paper>
     <paper id="11">
       <title>Demonstration of interactive teaching for end-to-end dialog control with hybrid code networks</title>
-      <author><first>Jason D.</first><last>Williams</last></author>
+      <author id="jason-d-williams"><first>Jason D.</first><last>Williams</last></author>
       <author><first>Lars</first><last>Liden</last></author>
       <pages>82–85</pages>
       <url hash="eb730c0d">W17-5511</url>
@@ -11259,8 +11259,8 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <author><first>Nikola</first><last>Mrkšić</last></author>
       <author><first>Tsung-Hsien</first><last>Wen</last></author>
       <author><first>Iñigo</first><last>Casanueva</last></author>
-      <author><first>Lina M.</first><last>Rojas-Barahona</last></author>
-      <author><first>Milica</first><last>Gašić</last></author>
+      <author id="lina-m-rojas-barahona"><first>Lina M.</first><last>Rojas-Barahona</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gašić</last></author>
       <pages>86–92</pages>
       <url hash="2c28b631">W17-5512</url>
       <doi>10.18653/v1/W17-5512</doi>
@@ -11281,8 +11281,8 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="14">
       <title>Sequential Dialogue Context Modeling for Spoken Language Understanding</title>
       <author><first>Ankur</first><last>Bapna</last></author>
-      <author><first>Gokhan</first><last>Tür</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tür</last></author>
+      <author id="gokhan-tur"><first>Gokhan</first><last>Tür</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tür</last></author>
       <author><first>Larry</first><last>Heck</last></author>
       <pages>103–114</pages>
       <url hash="6637ae89">W17-5514</url>
@@ -11331,8 +11331,8 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <author><first>Pei-Hao</first><last>Su</last></author>
       <author><first>Paweł</first><last>Budzianowski</last></author>
       <author><first>Stefan</first><last>Ultes</last></author>
-      <author><first>Milica</first><last>Gašić</last></author>
-      <author><first>Steve</first><last>Young</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gašić</last></author>
+      <author id="steve-young"><first>Steve</first><last>Young</last></author>
       <pages>147–157</pages>
       <url hash="5b6ab749">W17-5518</url>
       <doi>10.18653/v1/W17-5518</doi>
@@ -11368,11 +11368,11 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <author><first>Edward</first><last>Cai</last></author>
       <author><first>Allen</first><last>Lu</last></author>
       <author><first>Eli</first><last>Pincus</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <author><first>Stefan</first><last>Ultes</last></author>
-      <author><first>Lina M.</first><last>Rojas-Barahona</last></author>
-      <author><first>Milica</first><last>Gasic</last></author>
-      <author><first>Steve</first><last>Young</last></author>
+      <author id="lina-m-rojas-barahona"><first>Lina M.</first><last>Rojas-Barahona</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gasic</last></author>
+      <author id="steve-young"><first>Steve</first><last>Young</last></author>
       <author><first>Maxine</first><last>Eskenazi</last></author>
       <pages>170–173</pages>
       <url hash="e5ab2154">W17-5521</url>
@@ -11395,7 +11395,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="23">
       <title>The Role of Conversation Context for Sarcasm Detection in Online Interactions</title>
       <author><first>Debanjan</first><last>Ghosh</last></author>
-      <author><first>Alexander</first><last>Richard Fabbri</last></author>
+      <author id="alexander-richard-fabbri"><first>Alexander</first><last>Richard Fabbri</last></author>
       <author><first>Smaranda</first><last>Muresan</last></author>
       <pages>186–196</pages>
       <url hash="d3183294">W17-5523</url>
@@ -11429,7 +11429,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <title><fixed-case>F</fixed-case>rames: a corpus for adding memory to goal-oriented dialogue systems</title>
       <author><first>Layla</first><last>El Asri</last></author>
       <author><first>Hannes</first><last>Schulz</last></author>
-      <author id="shikhar-sharma"><first>Shikhar</first><last>Sharma</last></author>
+      <author><first>Shikhar</first><last>Sharma</last></author>
       <author><first>Jeremie</first><last>Zumer</last></author>
       <author><first>Justin</first><last>Harris</last></author>
       <author><first>Emery</first><last>Fine</last></author>
@@ -11455,7 +11455,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="28">
       <title>Neural-based Natural Language Generation in Dialogue using <fixed-case>RNN</fixed-case> Encoder-Decoder with Semantic Aggregation</title>
       <author><first>Van-Khanh</first><last>Tran</last></author>
-      <author><first>Le-Minh</first><last>Nguyen</last></author>
+      <author id="minh-le-nguyen"><first>Le-Minh</first><last>Nguyen</last></author>
       <author><first>Satoshi</first><last>Tojo</last></author>
       <pages>231–240</pages>
       <url hash="01fff806">W17-5528</url>
@@ -11465,8 +11465,8 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     </paper>
     <paper id="29">
       <title>Beyond On-hold Messages: Conversational Time-buying in Task-oriented Dialogue</title>
-      <author><first>Soledad</first><last>López Gambino</last></author>
-      <author><first>Sina</first><last>Zarrieß</last></author>
+      <author id="m-soledad-lopez-gambino"><first>Soledad</first><last>López Gambino</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
       <author><first>David</first><last>Schlangen</last></author>
       <pages>241–246</pages>
       <url hash="b7ca1a9b">W17-5529</url>
@@ -11487,7 +11487,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="31">
       <title>Predicting Success in Goal-Driven Human-Human Dialogues</title>
       <author><first>Michael</first><last>Noseworthy</last></author>
-      <author><first>Jackie Chi Kit</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie Chi Kit</first><last>Cheung</last></author>
       <author><first>Joelle</first><last>Pineau</last></author>
       <pages>253–262</pages>
       <url hash="d1a762c5">W17-5531</url>
@@ -11500,7 +11500,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <author><first>Jordon</first><last>Johnson</last></author>
       <author><first>Vaden</first><last>Masrani</last></author>
       <author><first>Giuseppe</first><last>Carenini</last></author>
-      <author><first>Raymond</first><last>Ng</last></author>
+      <author id="raymond-ng"><first>Raymond</first><last>Ng</last></author>
       <pages>263–272</pages>
       <url hash="a414e746">W17-5532</url>
       <doi>10.18653/v1/W17-5532</doi>
@@ -11520,7 +11520,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="34">
       <title>Adversarial evaluation for open-domain dialogue generation</title>
       <author><first>Elia</first><last>Bruni</last></author>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <pages>284–288</pages>
       <url hash="b390d7c9">W17-5534</url>
       <doi>10.18653/v1/W17-5534</doi>
@@ -11531,7 +11531,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <title>Exploring Joint Neural Model for Sentence Level Discourse Parsing and Sentiment Analysis</title>
       <author><first>Bita</first><last>Nejat</last></author>
       <author><first>Giuseppe</first><last>Carenini</last></author>
-      <author><first>Raymond</first><last>Ng</last></author>
+      <author id="raymond-ng"><first>Raymond</first><last>Ng</last></author>
       <pages>289–298</pages>
       <url hash="4df4af59">W17-5535</url>
       <doi>10.18653/v1/W17-5535</doi>
@@ -11554,8 +11554,8 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <author><first>Shereen</first><last>Oraby</last></author>
       <author><first>Vrindavan</first><last>Harrison</last></author>
       <author><first>Amita</first><last>Misra</last></author>
-      <author><first>Ellen</first><last>Riloff</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <pages>310–319</pages>
       <url hash="75d1d872">W17-5537</url>
       <doi>10.18653/v1/W17-5537</doi>
@@ -11567,8 +11567,8 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <title>Finding Structure in Figurative Language: Metaphor Detection with Topic-based Frames</title>
       <author><first>Hyeju</first><last>Jang</last></author>
       <author><first>Keith</first><last>Maki</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
-      <author><first>Carolyn</first><last>Rosé</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rosé</last></author>
       <pages>320–330</pages>
       <url hash="a2d9d4b8">W17-5538</url>
       <doi>10.18653/v1/W17-5538</doi>
@@ -11577,7 +11577,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     </paper>
     <paper id="39">
       <title>Using Reinforcement Learning to Model Incrementality in a Fast-Paced Dialogue Game</title>
-      <author><first>Ramesh</first><last>Manuvinakurike</last></author>
+      <author id="ramesh-manuvinakurike"><first>Ramesh</first><last>Manuvinakurike</last></author>
       <author><first>David</first><last>DeVault</last></author>
       <author><first>Kallirroi</first><last>Georgila</last></author>
       <pages>331–341</pages>
@@ -11589,7 +11589,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="40">
       <title>Inferring Narrative Causality between Event Pairs in Films</title>
       <author><first>Zhichao</first><last>Hu</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <pages>342–351</pages>
       <url hash="c721beaa">W17-5540</url>
       <doi>10.18653/v1/W17-5540</doi>
@@ -11623,8 +11623,8 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <author><first>Elahe</first><last>Rahimtoroghi</last></author>
       <author><first>Jiaqi</first><last>Wu</last></author>
       <author><first>Ruimin</first><last>Wang</last></author>
-      <author><first>Pranav</first><last>Anand</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="pranav-anand"><first>Pranav</first><last>Anand</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <pages>360–369</pages>
       <url hash="94429379">W17-5543</url>
       <doi>10.18653/v1/W17-5543</doi>
@@ -11655,7 +11655,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <author><first>Matthias</first><last>Grabmair</last></author>
       <author><first>Graham</first><last>Neubig</last></author>
       <author><first>Jonathan</first><last>Francis</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <pages>374–383</pages>
       <url hash="c357b049">W17-5545</url>
       <doi>10.18653/v1/W17-5545</doi>
@@ -11701,10 +11701,10 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="1">
       <title>Building a Better Bitext for Structurally Different Languages through Self-training</title>
       <author><first>Jungyeul</first><last>Park</last></author>
-      <author><first>Loïc</first><last>Dugast</last></author>
+      <author id="loic-dugast"><first>Loïc</first><last>Dugast</last></author>
       <author><first>Jeen-Pyo</first><last>Hong</last></author>
       <author><first>Chang-Uk</first><last>Shin</last></author>
-      <author><first>Jeong-Won</first><last>Cha</last></author>
+      <author id="jeong-won-cha"><first>Jeong-Won</first><last>Cha</last></author>
       <pages>1–10</pages>
       <url hash="02fe425e">W17-5601</url>
       <abstract>We propose a novel method to bootstrap the construction of parallel corpora for new pairs of structurally different languages. We do so by combining the use of a pivot language and self-training. A pivot language enables the use of existing translation models to bootstrap the alignment and a self-training procedure enables to achieve better alignment, both at the document and sentence level. We also propose several evaluation methods for the resulting alignment.</abstract>
@@ -11807,7 +11807,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="5">
       <title><fixed-case>J</fixed-case>apanese to <fixed-case>E</fixed-case>nglish/<fixed-case>C</fixed-case>hinese/<fixed-case>K</fixed-case>orean Datasets for Translation Quality Estimation and Automatic Post-Editing</title>
       <author><first>Atsushi</first><last>Fujita</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>79–88</pages>
       <url hash="cdf26a36">W17-5705</url>
       <abstract>Aiming at facilitating the research on quality estimation (QE) and automatic post-editing (APE) of machine translation (MT) outputs, especially for those among Asian languages, we have created new datasets for Japanese to English, Chinese, and Korean translations. As the source text, actual utterances in Japanese were extracted from the log data of our speech translation service. MT outputs were then given by phrase-based statistical MT systems. Finally, human evaluators were employed to grade the quality of MT outputs and to post-edit them. This paper describes the characteristics of the created datasets and reports on our benchmarking experiments on word-level QE, sentence-level QE, and APE conducted using the created datasets.</abstract>
@@ -11871,7 +11871,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="11">
       <title>Ensemble and Reranking: Using Multiple Models in the <fixed-case>NICT</fixed-case>-2 Neural Machine Translation System at <fixed-case>WAT</fixed-case>2017</title>
       <author><first>Kenji</first><last>Imamura</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>127–134</pages>
       <url hash="3120f9b2">W17-5711</url>
       <abstract>In this paper, we describe the NICT-2 neural machine translation system evaluated at WAT2017. This system uses multiple models as an ensemble and combines models with opposite decoding directions by reranking (called bi-directional reranking). In our experimental results on small data sets, the translation quality improved when the number of models was increased to 32 in total and did not saturate. In the experiments on large data sets, improvements of 1.59-3.32 BLEU points were achieved when six-model ensembles were combined by the bi-directional reranking.</abstract>
@@ -11883,7 +11883,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <author><first>Katsuhito</first><last>Sudoh</last></author>
       <author><first>Satoshi</first><last>Nakamura</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>135–139</pages>
       <url hash="1e5af071">W17-5712</url>
       <abstract>This paper describes the details about the NAIST-NICT machine translation system for WAT2017 English-Japanese Scientific Paper Translation Task. The system consists of a language-independent tokenizer and an attentional encoder-decoder style neural machine translation model. According to the official results, our system achieves higher translation accuracy than any systems submitted previous campaigns despite simple model architecture.</abstract>
@@ -11901,7 +11901,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     </paper>
     <paper id="14">
       <title><fixed-case>K</fixed-case>yoto <fixed-case>U</fixed-case>niversity Participation to <fixed-case>WAT</fixed-case> 2017</title>
-      <author><first>Fabien</first><last>Cromieres</last></author>
+      <author id="fabien-cromieres"><first>Fabien</first><last>Cromieres</last></author>
       <author><first>Raj</first><last>Dabre</last></author>
       <author><first>Toshiaki</first><last>Nakazawa</last></author>
       <author><first>Sadao</first><last>Kurohashi</last></author>
@@ -11913,8 +11913,8 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="15">
       <title><fixed-case>CUNI</fixed-case> <fixed-case>NMT</fixed-case> System for <fixed-case>WAT</fixed-case> 2017 Translation Tasks</title>
       <author><first>Tom</first><last>Kocmi</last></author>
-      <author><first>Dušan</first><last>Variš</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="dusan-varis"><first>Dušan</first><last>Variš</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>154–159</pages>
       <url hash="fb7b5f5f">W17-5715</url>
       <abstract>The paper presents this year’s CUNI submissions to the WAT 2017 Translation Task focusing on the Japanese-English translation, namely Scientific papers subtask, Patents subtask and Newswire subtask. We compare two neural network architectures, the standard sequence-to-sequence with attention (Seq2Seq) and an architecture using convolutional sentence encoder (FBConv2Seq), both implemented in the NMT framework Neural Monkey that we currently participate in developing. We also compare various types of preprocessing of the source Japanese sentences and their impact on the overall results. Furthermore, we include the results of our experiments with out-of-domain data obtained by combining the corpora provided for each subtask.</abstract>
@@ -11934,7 +11934,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <author><first>Sandhya</first><last>Singh</last></author>
       <author><first>Ritesh</first><last>Panjwani</last></author>
       <author><first>Anoop</first><last>Kunchukuttan</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>167–170</pages>
       <url hash="77bf8678">W17-5717</url>
       <abstract>In this paper, we empirically compare the two encoder-decoder neural machine translation architectures: convolutional sequence to sequence model (ConvS2S) and recurrent sequence to sequence model (RNNS2S) for English-Hindi language pair as part of IIT Bombay’s submission to WAT2017 shared task. We report the results for both English-Hindi and Hindi-English direction of language pair.</abstract>
@@ -12001,7 +12001,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <author><first>Hong-Jie</first><last>Dai</last></author>
       <author><first>Yung-Chun</first><last>Chang</last></author>
       <author><first>Jitendra</first><last>Jonnagaddala</last></author>
-      <author><first>Wen-Lian</first><last>Hsu</last></author>
+      <author id="wen-lian-hsu"><first>Wen-Lian</first><last>Hsu</last></author>
       <pages>26–32</pages>
       <url hash="202cd9a7">W17-5804</url>
       <abstract>The increasing popularity of social media lead users to share enormous information on the internet. This information has various application like, it can be used to develop models to understand or predict user behavior on social media platforms. For example, few online retailers have studied the shopping patterns to predict shopper’s pregnancy stage. Another interesting application is to use the social media platforms to analyze users’ health-related information. In this study, we developed a tree kernel-based model to classify tweets conveying pregnancy related information using this corpus. The developed pregnancy classification model achieved an accuracy of 0.847 and an F-score of 0.565. A new corpus from popular social media platform Twitter was developed for the purpose of this study. In future, we would like to improve this corpus by reducing noise such as retweets.</abstract>
@@ -12041,7 +12041,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <author><first>Juae</first><last>Kim</last></author>
       <author><first>Sunjae</first><last>Kwon</last></author>
       <author><first>Youngjoong</first><last>Ko</last></author>
-      <author><first>Jungyun</first><last>Seo</last></author>
+      <author id="jungyun-seo"><first>Jungyun</first><last>Seo</last></author>
       <pages>47–51</pages>
       <url hash="8b996342">W17-5807</url>
       <abstract>Biomedical Named Entity (NE) recognition is a core technique for various works in the biomedical domain. In previous studies, using machine learning algorithm shows better performance than dictionary-based and rule-based approaches because there are too many terminological variations of biomedical NEs and new biomedical NEs are constantly generated. To achieve the high performance with a machine-learning algorithm, good-quality corpora are required. However, it is difficult to obtain the good-quality corpora because an-notating a biomedical corpus for ma-chine-learning is extremely time-consuming and costly. In addition, most previous corpora are insufficient for high-level tasks because they cannot cover various domains. Therefore, we propose a method for generating a large amount of machine-labeled data that covers various domains. To generate a large amount of machine-labeled data, firstly we generate an initial machine-labeled data by using a chunker and MetaMap. The chunker is developed to extract only biomedical NEs with manually annotated data. MetaMap is used to annotate the category of bio-medical NE. Then we apply the self-training approach to bootstrap the performance of initial machine-labeled data. In our experiments, the biomedical NE recognition system that is trained with our proposed machine-labeled data achieves much high performance. As a result, our system outperforms biomedical NE recognition system that using MetaMap only with 26.03%p improvements on F1-score.</abstract>
@@ -12051,7 +12051,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <title>Enhancing Drug-Drug Interaction Classification with Corpus-level Feature and Classifier Ensemble</title>
       <author><first>Jing Cyun</first><last>Tu</last></author>
       <author><first>Po-Ting</first><last>Lai</last></author>
-      <author><first>Richard Tzong-Han</first><last>Tsai</last></author>
+      <author id="richard-tzong-han-tsai"><first>Richard Tzong-Han</first><last>Tsai</last></author>
       <pages>52–56</pages>
       <url hash="980919d2">W17-5808</url>
       <abstract>The study of drug-drug interaction (DDI) is important in the drug discovering. Both PubMed and DrugBank are rich resources to retrieve DDI information which is usually represented in plain text. Automatically extracting DDI pairs from text improves the quality of drug discov-ering. In this paper, we presented a study that focuses on the DDI classification. We normalized the drug names, and developed both sentence-level and corpus-level features for DDI classification. A classifier ensemble approach is used for the unbalance DDI labels problem. Our approach achieved an F-score of 65.4% on SemEval 2013 DDI test set. The experimental results also show the effects of proposed corpus-level features in the DDI task.</abstract>
@@ -12061,7 +12061,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <title>Chemical-Induced Disease Detection Using Invariance-based Pattern Learning Model</title>
       <author><first>Neha</first><last>Warikoo</last></author>
       <author><first>Yung-Chun</first><last>Chang</last></author>
-      <author><first>Wen-Lian</first><last>Hsu</last></author>
+      <author id="wen-lian-hsu"><first>Wen-Lian</first><last>Hsu</last></author>
       <pages>57–64</pages>
       <url hash="4c8126c5">W17-5809</url>
       <abstract>In this work, we introduce a novel feature engineering approach named “algebraic invariance” to identify discriminative patterns for learning relation pair features for the chemical-disease relation (CDR) task of BioCreative V. Our method exploits the existing structural similarity of the key concepts of relation descriptions from the CDR corpus to generate robust linguistic patterns for SVM tree kernel-based learning. Preprocessing of the training data classifies the entity pairs as either related or unrelated to build instance types for both inter-sentential and intra-sentential scenarios. An invariant function is proposed to process and optimally cluster similar patterns for both positive and negative instances. The learning model for CDR pairs is based on the SVM tree kernel approach, which generates feature trees and vectors and is modeled on suitable invariance based patterns, bringing brevity, precision and context to the identifier features. Results demonstrate that our method outperformed other compared approaches, achieved a high recall rate of 85.08%, and averaged an F1-score of 54.34% without the use of any additional knowledge bases.</abstract>
@@ -12072,10 +12072,10 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <meta>
       <booktitle>Proceedings of the 4th Workshop on Natural Language Processing Techniques for Educational Applications (<fixed-case>NLPTEA</fixed-case> 2017)</booktitle>
       <url hash="e181ef63">W17-59</url>
-      <editor><first>Yuen-Hsien</first><last>Tseng</last></editor>
+      <editor id="yuen-hsien-tseng"><first>Yuen-Hsien</first><last>Tseng</last></editor>
       <editor><first>Hsin-Hsi</first><last>Chen</last></editor>
       <editor><first>Lung-Hao</first><last>Lee</last></editor>
-      <editor><first>Liang-Chih</first><last>Yu</last></editor>
+      <editor id="liang-chih-yu"><first>Liang-Chih</first><last>Yu</last></editor>
       <publisher>Asian Federation of Natural Language Processing</publisher>
       <address>Taipei, Taiwan</address>
       <month>December</month>
@@ -12104,7 +12104,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="2">
       <title>Understanding Non-Native Writings: Can a Parser Help?</title>
       <author><first>Jirka</first><last>Hana</last></author>
-      <author><first>Barbora</first><last>Hladká</last></author>
+      <author id="barbora-hladka"><first>Barbora</first><last>Hladká</last></author>
       <pages>12–16</pages>
       <url hash="994efe52">W17-5902</url>
       <abstract>We present a pilot study on parsing non-native texts written by learners of Czech. We performed experiments that have shown that at least high-level syntactic functions, like subject, predicate, and object, can be assigned based on a parser trained on standard native language.</abstract>
@@ -12113,7 +12113,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="3">
       <title><fixed-case>C</fixed-case>arrier Sentence Selection for Fill-in-the-blank Items</title>
       <author><first>Shu</first><last>Jiang</last></author>
-      <author><first>John</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
       <pages>17–22</pages>
       <url hash="1233bce0">W17-5903</url>
       <abstract>Fill-in-the-blank items are a common form of exercise in computer-assisted language learning systems. To automatically generate an effective item, the system must be able to select a high-quality carrier sentence that illustrates the usage of the target word. Previous approaches for carrier sentence selection have considered sentence length, vocabulary difficulty, the position of the target word and the presence of finite verbs. This paper investigates the utility of word co-occurrence statistics and lexical similarity as selection criteria. In an evaluation on generating fill-in-the-blank items for learning Chinese as a foreign language, we show that these two criteria can improve carrier sentence quality.</abstract>
@@ -12125,8 +12125,8 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <author><first>Sandhya</first><last>Singh</last></author>
       <author><first>Meenakshi</first><last>Somasundaram</last></author>
       <author><first>Dhara</first><last>Gorasia</last></author>
-      <author><first>Malhar</first><last>Kulkarni</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="malhar-kulkarni"><first>Malhar</first><last>Kulkarni</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>23–28</pages>
       <url hash="0f03f810">W17-5904</url>
       <abstract>In today’s technology driven digital era, education domain is undergoing a transformation from traditional approaches to more learner controlled and flexible methods of learning. This transformation has opened the new avenues for interdisciplinary research in the field of educational technology and natural language processing in developing quality digital aids for learning and teaching. The tool presented here - Hindi Shabhadamitra, developed using Hindi Wordnet for Hindi language learning, is one such e-learning tool. It has been developed as a teaching and learning aid suitable for formal school based curriculum and informal setup for self learning users. Besides vocabulary, it also provides word based grammar along with images and pronunciation for better learning and retention. This aid demonstrates that how a rich lexical resource like wordnet can be systematically remodeled for practical usage in the educational domain.</abstract>
@@ -12137,9 +12137,9 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <author><first>Gabriel</first><last>Fung</last></author>
       <author><first>Maxime</first><last>Debosschere</last></author>
       <author><first>Dingmin</first><last>Wang</last></author>
-      <author id="bo-li"><first>Bo</first><last>Li</last></author>
+      <author><first>Bo</first><last>Li</last></author>
       <author><first>Jia</first><last>Zhu</last></author>
-      <author><first>Kam-Fai</first><last>Wong</last></author>
+      <author id="kam-fai-wong"><first>Kam-Fai</first><last>Wong</last></author>
       <pages>29–34</pages>
       <url hash="3a9c6411">W17-5905</url>
       <abstract>This paper provides an overview along with our findings of the Chinese Spelling Check shared task at NLPTEA 2017. The goal of this task is to develop a computer-assisted system to automatically diagnose typing errors in traditional Chinese sentences written by students. We defined six types of errors which belong to two categories. Given a sentence, the system should detect where the errors are, and for each detected error determine its type and provide correction suggestions. We designed, constructed, and released a benchmark dataset for this task.</abstract>
@@ -12191,8 +12191,8 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="10">
       <title>Complex Word Identification: Challenges in Data Annotation and System Performance</title>
       <author><first>Marcos</first><last>Zampieri</last></author>
-      <author><first>Shervin</first><last>Malmasi</last></author>
-      <author><first>Gustavo</first><last>Paetzold</last></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></author>
+      <author id="gustavo-paetzold"><first>Gustavo</first><last>Paetzold</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>59–63</pages>
       <url hash="0ede0a62">W17-5910</url>
@@ -12215,7 +12215,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <author><first>Harsimran</first><last>Bedi</last></author>
       <author><first>Sangameshwar</first><last>Patil</last></author>
       <author><first>Swapnil</first><last>Hingmire</last></author>
-      <author><first>Girish</first><last>Palshikar</last></author>
+      <author id="girish-palshikar"><first>Girish</first><last>Palshikar</last></author>
       <pages>69–77</pages>
       <url hash="7e750e6f">W17-5912</url>
       <abstract>Event timeline serves as the basic structure of history, and it is used as a disposition of key phenomena in studying history as a subject in secondary school. In order to enable a student to understand a historical phenomenon as a series of connected events, we present a system for automatic event timeline generation from history textbooks. Additionally, we propose Message Sequence Chart (MSC) and time-map based visualization techniques to visualize an event timeline. We also identify key computational challenges in developing natural language processing based applications for history textbooks.</abstract>
@@ -12289,7 +12289,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <author><first>Guoping</first><last>Huang</last></author>
       <author><first>Jiajun</first><last>Zhang</last></author>
       <author><first>Yu</first><last>Zhou</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>37–45</pages>
       <url hash="ce9469b4">W17-6005</url>
       <abstract>Terms extensively exist in specific domains, and term translation plays a critical role in domain-specific machine translation (MT) tasks. However, it’s a challenging task to translate them correctly for the huge number of pre-existing terms and the endless new terms. To achieve better term translation quality, it is necessary to inject external term knowledge into the underlying MT system. Fortunately, there are plenty of term translation knowledge in parenthetical sentences on the Internet. In this paper, we propose a simple, straightforward and effective framework to improve term translation by learning from parenthetical sentences. This framework includes: (1) a focused web crawler; (2) a parenthetical sentence filter, acquiring parenthetical sentences including bilingual term pairs; (3) a term translation knowledge extractor, extracting bilingual term translation candidates; (4) a probability learner, generating the term translation table for MT decoders. The extensive experiments demonstrate that our proposed framework significantly improves the translation quality of terms and sentences.</abstract>
@@ -12339,14 +12339,14 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="4">
       <title>Reflexives and Reciprocals in Synchronous <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammar</title>
       <author><first>Cristina</first><last>Aggazzotti</last></author>
-      <author><first>Stuart M.</first><last>Shieber</last></author>
+      <author id="stuart-m-shieber"><first>Stuart M.</first><last>Shieber</last></author>
       <pages>31–42</pages>
       <url hash="975eefb5">W17-6204</url>
       <bibkey>aggazzotti-shieber-2017-reflexives</bibkey>
     </paper>
     <paper id="5">
       <title>Coordination in <fixed-case>TAG</fixed-case> without the Conjoin Operation</title>
-      <author><first>Chung-hye</first><last>Han</last></author>
+      <author id="chung-hye-han"><first>Chung-hye</first><last>Han</last></author>
       <author><first>Anoop</first><last>Sarkar</last></author>
       <pages>43–52</pages>
       <url hash="24642506">W17-6205</url>
@@ -12354,7 +12354,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     </paper>
     <paper id="6">
       <title>Scope, Time, and Predicate Restriction in <fixed-case>B</fixed-case>lackfoot using <fixed-case>MC</fixed-case>-<fixed-case>STAG</fixed-case></title>
-      <author><first>Dennis Ryan</first><last>Storoshenko</last></author>
+      <author id="dennis-ryan-storoshenko"><first>Dennis Ryan</first><last>Storoshenko</last></author>
       <pages>53–60</pages>
       <url hash="0e55d30c">W17-6206</url>
       <bibkey>storoshenko-2017-scope</bibkey>
@@ -12369,7 +12369,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     </paper>
     <paper id="8">
       <title>Parsing with Dynamic Continuized <fixed-case>CCG</fixed-case></title>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <author><first>Simon</first><last>Charlow</last></author>
       <author><first>Jordan</first><last>Needle</last></author>
       <author><first>Dylan</first><last>Bumford</last></author>
@@ -12379,7 +12379,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     </paper>
     <paper id="9">
       <title>Multiword Expression-Aware <fixed-case>A</fixed-case>* <fixed-case>TAG</fixed-case> Parsing Revisited</title>
-      <author><first>Jakub</first><last>Waszczuk</last></author>
+      <author id="jakub-waszczuk"><first>Jakub</first><last>Waszczuk</last></author>
       <author><first>Agata</first><last>Savary</last></author>
       <author><first>Yannick</first><last>Parmentier</last></author>
       <pages>84–93</pages>
@@ -12406,7 +12406,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="12">
       <title>Transforming Dependency Structures to <fixed-case>LTAG</fixed-case> Derivation Trees</title>
       <author><first>Caio</first><last>Corro</last></author>
-      <author><first>Joseph</first><last>Le Roux</last></author>
+      <author id="joseph-le-roux"><first>Joseph</first><last>Le Roux</last></author>
       <pages>112–121</pages>
       <url hash="ff3303b9">W17-6212</url>
       <bibkey>corro-le-roux-2017-transforming</bibkey>
@@ -12415,10 +12415,10 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <title>Linguistically Rich Vector Representations of Supertags for <fixed-case>TAG</fixed-case> Parsing</title>
       <author><first>Dan</first><last>Friedman</last></author>
       <author><first>Jungo</first><last>Kasai</last></author>
-      <author><first>R. Thomas</first><last>McCoy</last></author>
+      <author id="r-thomas-mccoy"><first>R. Thomas</first><last>McCoy</last></author>
       <author><first>Robert</first><last>Frank</last></author>
       <author><first>Forrest</first><last>Davis</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>122–131</pages>
       <url hash="df40860b">W17-6213</url>
       <bibkey>friedman-etal-2017-linguistically</bibkey>
@@ -12428,7 +12428,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <author><first>Pauli</first><last>Xu</last></author>
       <author><first>Robert</first><last>Frank</last></author>
       <author><first>Jungo</first><last>Kasai</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>132–141</pages>
       <url hash="2c6a5ed8">W17-6214</url>
       <bibkey>xu-etal-2017-tag</bibkey>
@@ -12472,8 +12472,8 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     </paper>
     <paper id="3">
       <title>Lexicalized vs. Delexicalized Parsing in Low-Resource Scenarios</title>
-      <author><first>Agnieszka</first><last>Falenska</last></author>
-      <author><first>Özlem</first><last>Çetinoğlu</last></author>
+      <author id="agnieszka-falenska"><first>Agnieszka</first><last>Falenska</last></author>
+      <author id="ozlem-cetinoglu"><first>Özlem</first><last>Çetinoğlu</last></author>
       <pages>18–24</pages>
       <url hash="52ce4cc3">W17-6303</url>
       <abstract>We present a systematic analysis of lexicalized vs. delexicalized parsing in low-resource scenarios, and propose a methodology to choose one method over another under certain conditions. We create a set of simulation experiments on 41 languages and apply our findings to 9 low-resource languages. Experimental results show that our methodology chooses the best approach in 8 out of 9 cases.</abstract>
@@ -12481,8 +12481,8 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     </paper>
     <paper id="4">
       <title>Improving neural tagging with lexical information</title>
-      <author><first>Benoît</first><last>Sagot</last></author>
-      <author><first>Héctor</first><last>Martínez Alonso</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
+      <author id="hector-martinez-alonso"><first>Héctor</first><last>Martínez Alonso</last></author>
       <pages>25–31</pages>
       <url hash="47518962">W17-6304</url>
       <abstract>Neural part-of-speech tagging has achieved competitive results with the incorporation of character-based and pre-trained word embeddings. In this paper, we show that a state-of-the-art bi-LSTM tagger can benefit from using information from morphosyntactic lexicons as additional input. The tagger, trained on several dozen languages, shows a consistent, average improvement when using lexical information, even when also using character-based embeddings, thus showing the complementarity of the different sources of lexical information. The improvements are particularly important for the smaller datasets.</abstract>
@@ -12490,7 +12490,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     </paper>
     <paper id="5">
       <title>Prepositional Phrase Attachment over Word Embedding Products</title>
-      <author><first>Pranava Swaroop</first><last>Madhyastha</last></author>
+      <author id="pranava-swaroop-madhyastha"><first>Pranava Swaroop</first><last>Madhyastha</last></author>
       <author><first>Xavier</first><last>Carreras</last></author>
       <author><first>Ariadna</first><last>Quattoni</last></author>
       <pages>32–43</pages>
@@ -12500,7 +12500,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     </paper>
     <paper id="6">
       <title><fixed-case>L</fixed-case>1-<fixed-case>L</fixed-case>2 Parallel Dependency Treebank as Learner Corpus</title>
-      <author><first>John</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
       <author><first>Keying</first><last>Li</last></author>
       <author><first>Herman</first><last>Leung</last></author>
       <pages>44–49</pages>
@@ -12510,7 +12510,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     </paper>
     <paper id="7">
       <title>Splitting Complex <fixed-case>E</fixed-case>nglish Sentences</title>
-      <author><first>John</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
       <author><first>J. Buddhika K. Pathirage</first><last>Don</last></author>
       <pages>50–55</pages>
       <url hash="0107370a">W17-6307</url>
@@ -12529,9 +12529,9 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     </paper>
     <paper id="9">
       <title>Leveraging Newswire Treebanks for Parsing Conversational Data with Argument Scrambling</title>
-      <author><first>Riyaz A.</first><last>Bhat</last></author>
-      <author><first>Irshad</first><last>Bhat</last></author>
-      <author><first>Dipti</first><last>Sharma</last></author>
+      <author id="riyaz-ahmad-bhat"><first>Riyaz A.</first><last>Bhat</last></author>
+      <author id="irshad-bhat"><first>Irshad</first><last>Bhat</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti</first><last>Sharma</last></author>
       <pages>61–66</pages>
       <url hash="e44604a6">W17-6309</url>
       <abstract>We investigate the problem of parsing conversational data of morphologically-rich languages such as Hindi where argument scrambling occurs frequently. We evaluate a state-of-the-art non-linear transition-based parsing system on a new dataset containing 506 dependency trees for sentences from Bollywood (Hindi) movie scripts and Twitter posts of Hindi monolingual speakers. We show that a dependency parser trained on a newswire treebank is strongly biased towards the canonical structures and degrades when applied to conversational data. Inspired by Transformational Generative Grammar (Chomsky, 1965), we mitigate the sampling bias by generating all theoretically possible alternative word orders of a clause from the existing (kernel) structures in the treebank. Training our parser on canonical and transformed structures improves performance on conversational data by around 9% LAS over the baseline newswire parser.</abstract>
@@ -12539,7 +12539,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     </paper>
     <paper id="10">
       <title>Using hyperlinks to improve multilingual partial parsers</title>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>67–71</pages>
       <url hash="b2cbce94">W17-6310</url>
       <abstract>Syntactic annotation is costly and not available for the vast majority of the world’s languages. We show that sometimes we can do away with less labeled data by exploiting more readily available forms of mark-up. Specifically, we revisit an idea from Valentin Spitkovsky’s work (2010), namely that hyperlinks typically bracket syntactic constituents or chunks. We strengthen his results by showing that not only can hyperlinks help in low resource scenarios, exemplified here by Quechua, but learning from hyperlinks can also improve state-of-the-art NLP models for English newswire. We also present out-of-domain evaluation on English Ontonotes 4.0.</abstract>
@@ -12549,8 +12549,8 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <title>Correcting prepositional phrase attachments using multimodal corpora</title>
       <author><first>Sebastien</first><last>Delecraz</last></author>
       <author><first>Alexis</first><last>Nasr</last></author>
-      <author><first>Frederic</first><last>Bechet</last></author>
-      <author><first>Benoit</first><last>Favre</last></author>
+      <author id="frederic-bechet"><first>Frederic</first><last>Bechet</last></author>
+      <author id="benoit-favre"><first>Benoit</first><last>Favre</last></author>
       <pages>72–77</pages>
       <url hash="35ad76e0">W17-6311</url>
       <abstract>PP-attachments are an important source of errors in parsing natural language. We propose in this article to use data coming from a multimodal corpus, combining textual, visual and conceptual information, as well as a correction strategy, to propose alternative attachments in the output of a parser.</abstract>
@@ -12569,7 +12569,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <title>Effective Online Reordering with Arc-Eager Transitions</title>
       <author><first>Ryosuke</first><last>Kohita</last></author>
       <author><first>Hiroshi</first><last>Noji</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>88–98</pages>
       <url hash="f6f6895d">W17-6313</url>
       <abstract>We present a new transition system with word reordering for unrestricted non-projective dependency parsing. Our system is based on decomposed arc-eager rather than arc-standard, which allows more flexible ambiguity resolution between a local projective and non-local crossing attachment. In our experiment on Universal Dependencies 2.0, we find our parser outperforms the ordinary swap-based parser particularly on languages with a large amount of non-projectivity.</abstract>
@@ -12630,7 +12630,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <meta>
       <booktitle>Proceedings of the Fourth International Conference on Dependency Linguistics (Depling 2017)</booktitle>
       <url hash="394e5eae">W17-65</url>
-      <editor><first>Simonetta</first><last>Montemagni</last></editor>
+      <editor id="simonetta-montemagni"><first>Simonetta</first><last>Montemagni</last></editor>
       <editor><first>Joakim</first><last>Nivre</last></editor>
       <publisher>Linköping University Electronic Press</publisher>
       <address>Pisa, Italy</address>
@@ -12651,7 +12651,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     </paper>
     <paper id="2">
       <title>Syntax-Semantics Interface: A Plea for a Deep Dependency Sentence Structure</title>
-      <author><first>Eva</first><last>Hajičová</last></author>
+      <author id="eva-hajicova"><first>Eva</first><last>Hajičová</last></author>
       <pages>2–3</pages>
       <url hash="75b2ce97">W17-6502</url>
       <bibkey>hajicova-2017-syntax</bibkey>
@@ -12666,7 +12666,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     </paper>
     <paper id="4">
       <title>On the Predicate-Argument Structure: Internal and Absorbing Scope</title>
-      <author><first>Igor</first><last>Boguslavsky</last></author>
+      <author id="igor-boguslavsky"><first>Igor</first><last>Boguslavsky</last></author>
       <pages>15–24</pages>
       <url hash="3a2359ad">W17-6504</url>
       <bibkey>boguslavsky-2017-predicate</bibkey>
@@ -12690,10 +12690,10 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     </paper>
     <paper id="7">
       <title>Enhanced <fixed-case>UD</fixed-case> Dependencies with Neutralized Diathesis Alternation</title>
-      <author><first>Marie</first><last>Candito</last></author>
+      <author id="marie-candito"><first>Marie</first><last>Candito</last></author>
       <author><first>Bruno</first><last>Guillaume</last></author>
       <author><first>Guy</first><last>Perrier</last></author>
-      <author><first>Djamé</first><last>Seddah</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
       <pages>42–53</pages>
       <url hash="219ea8e5">W17-6507</url>
       <bibkey>candito-etal-2017-enhanced</bibkey>
@@ -12708,8 +12708,8 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     </paper>
     <paper id="9">
       <title>A Dependency Treebank for <fixed-case>K</fixed-case>urmanji <fixed-case>K</fixed-case>urdish</title>
-      <author><first>Memduh</first><last>Gökırmak</last></author>
-      <author><first>Francis M.</first><last>Tyers</last></author>
+      <author id="memduh-gokirmak"><first>Memduh</first><last>Gökırmak</last></author>
+      <author id="francis-tyers"><first>Francis M.</first><last>Tyers</last></author>
       <pages>64–72</pages>
       <url hash="bda42be5">W17-6509</url>
       <bibkey>gokirmak-tyers-2017-dependency</bibkey>
@@ -12750,7 +12750,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     </paper>
     <paper id="14">
       <title>Assessing the Annotation Consistency of the <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies Corpora</title>
-      <author><first>Marie-Catherine</first><last>de Marneffe</last></author>
+      <author id="marie-catherine-de-marneffe"><first>Marie-Catherine</first><last>de Marneffe</last></author>
       <author><first>Matias</first><last>Grioni</last></author>
       <author><first>Jenna</first><last>Kanerva</last></author>
       <author><first>Filip</first><last>Ginter</last></author>
@@ -12768,7 +12768,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     </paper>
     <paper id="16">
       <title>Dependency Structure of Binary Conjunctions(of the <fixed-case>IF</fixed-case>…, <fixed-case>THEN</fixed-case>… Type)</title>
-      <author><first>Igor</first><last>Mel’čuk</last></author>
+      <author id="igor-melcuk"><first>Igor</first><last>Mel’čuk</last></author>
       <pages>127–134</pages>
       <url hash="87cd4f03">W17-6516</url>
       <bibkey>melcuk-2017-dependency</bibkey>
@@ -12823,9 +12823,9 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <author><first>Alexandre</first><last>Rademaker</last></author>
       <author><first>Fabricio</first><last>Chalub</last></author>
       <author><first>Livy</first><last>Real</last></author>
-      <author><first>Cláudia</first><last>Freitas</last></author>
-      <author><first>Eckhard</first><last>Bick</last></author>
-      <author><first>Valeria</first><last>de Paiva</last></author>
+      <author id="claudia-freitas"><first>Cláudia</first><last>Freitas</last></author>
+      <author id="eckhard-bick"><first>Eckhard</first><last>Bick</last></author>
+      <author id="valeria-de-paiva"><first>Valeria</first><last>de Paiva</last></author>
       <pages>197–206</pages>
       <url hash="c5bddccb">W17-6523</url>
       <bibkey>rademaker-etal-2017-universal</bibkey>
@@ -12853,16 +12853,16 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <title>Annotating <fixed-case>I</fixed-case>talian Social Media Texts in <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies</title>
       <author><first>Manuela</first><last>Sanguinetti</last></author>
       <author><first>Cristina</first><last>Bosco</last></author>
-      <author><first>Alessandro</first><last>Mazzei</last></author>
-      <author><first>Alberto</first><last>Lavelli</last></author>
-      <author><first>Fabio</first><last>Tamburini</last></author>
+      <author id="alessandro-mazzei"><first>Alessandro</first><last>Mazzei</last></author>
+      <author id="alberto-lavelli"><first>Alberto</first><last>Lavelli</last></author>
+      <author id="fabio-tamburini"><first>Fabio</first><last>Tamburini</last></author>
       <pages>229–239</pages>
       <url hash="9750d39f">W17-6526</url>
       <bibkey>sanguinetti-etal-2017-annotating</bibkey>
     </paper>
     <paper id="27">
       <title><fixed-case>H</fixed-case>ungarian Copula Constructions in Dependency Syntax and Parsing</title>
-      <author><first>Katalin Ilona</first><last>Simkó</last></author>
+      <author id="katalin-ilona-simko"><first>Katalin Ilona</first><last>Simkó</last></author>
       <author><first>Veronika</first><last>Vincze</last></author>
       <pages>240–247</pages>
       <url hash="fe657f6e">W17-6527</url>
@@ -12870,7 +12870,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     </paper>
     <paper id="28">
       <title>Semgrex-Plus: a Tool for Automatic Dependency-Graph Rewriting</title>
-      <author><first>Fabio</first><last>Tamburini</last></author>
+      <author id="fabio-tamburini"><first>Fabio</first><last>Tamburini</last></author>
       <pages>248–254</pages>
       <url hash="b54cd158">W17-6528</url>
       <bibkey>tamburini-2017-semgrex</bibkey>
@@ -12878,7 +12878,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="29">
       <title>Unity in Diversity: A Unified Parsing Strategy for Major <fixed-case>I</fixed-case>ndian Languages</title>
       <author><first>Juhi</first><last>Tandon</last></author>
-      <author><first>Dipti Misra</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></author>
       <pages>255–265</pages>
       <url hash="5c74e51f">W17-6529</url>
       <bibkey>tandon-sharma-2017-unity</bibkey>
@@ -12888,7 +12888,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <author><first>Tak-sum</first><last>Wong</last></author>
       <author><first>Kim</first><last>Gerdes</last></author>
       <author><first>Herman</first><last>Leung</last></author>
-      <author><first>John</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
       <pages>266–275</pages>
       <url hash="2c6cc9d5">W17-6530</url>
       <bibkey>wong-etal-2017-quantitative</bibkey>
@@ -12904,7 +12904,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     </paper>
     <paper id="32">
       <title>Core Arguments in <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies</title>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <pages>287–296</pages>
       <url hash="21df0bba">W17-6532</url>
       <bibkey>zeman-2017-core</bibkey>
@@ -12914,7 +12914,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <meta>
       <booktitle>Proceedings of the 11th <fixed-case>B</fixed-case>razilian Symposium in Information and Human Language Technology</booktitle>
       <url hash="0008f51a">W17-66</url>
-      <editor><first>Gustavo Henrique</first><last>Paetzold</last></editor>
+      <editor id="gustavo-paetzold"><first>Gustavo Henrique</first><last>Paetzold</last></editor>
       <editor><first>Vládia</first><last>Pinheiro</last></editor>
       <publisher>Sociedade Brasileira de Computação</publisher>
       <address>Uberlândia, Brazil</address>
@@ -12940,7 +12940,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <title>Estudo exploratório de categorias gramaticais com potencial de indicadores para a Análise de Sentimentos (An Exploratory study of grammatical categories as potential indicators for Sentiment Analysis)[In <fixed-case>P</fixed-case>ortuguese]</title>
       <author><first>Júlia</first><last>Rodrigues</last></author>
       <author><first>Adriana</first><last>Pagano</last></author>
-      <author><first>Emerson</first><last>Paraiso</last></author>
+      <author id="emerson-cabrera-paraiso"><first>Emerson</first><last>Paraiso</last></author>
       <pages>17-21</pages>
       <url hash="dc4318ad">W17-6602</url>
       <bibkey>rodrigues-etal-2017-estudo</bibkey>
@@ -12955,7 +12955,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     </paper>
     <paper id="4">
       <title>A study on irony within the context of 7x1-<fixed-case>PT</fixed-case> corpus</title>
-      <author><first>Silvia</first><last>Moraes</last></author>
+      <author id="silvia-moraes"><first>Silvia</first><last>Moraes</last></author>
       <author><first>Rackel</first><last>Machado</last></author>
       <author><first>Matheus</first><last>Redecker</last></author>
       <author><first>Rafael</first><last>Cadaval</last></author>
@@ -12977,7 +12977,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="6">
       <title>Wheel of Life: an initial investigation. Topic-Related Polarity Visualization in Personal Stories</title>
       <author><first>Henrique</first><last>Santos</last></author>
-      <author><first>Renata</first><last>Vieira</last></author>
+      <author id="renata-vieira"><first>Renata</first><last>Vieira</last></author>
       <author><first>Greice</first><last>Pinho</last></author>
       <author><first>Jackson</first><last>Pinheiro</last></author>
       <pages>37-41</pages>
@@ -13006,9 +13006,9 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="9">
       <title>Processo de construção de um corpus anotado com Entidades Geológicas visando <fixed-case>REN</fixed-case> (Building an annotated corpus with geological entities for <fixed-case>NER</fixed-case>)[In <fixed-case>P</fixed-case>ortuguese]</title>
       <author><first>Daniela</first><last>Amaral</last></author>
-      <author><first>Sandra</first><last>Collovini</last></author>
+      <author id="sandra-collovini"><first>Sandra</first><last>Collovini</last></author>
       <author><first>Anny</first><last>Figueira</last></author>
-      <author><first>Renata</first><last>Vieira</last></author>
+      <author id="renata-vieira"><first>Renata</first><last>Vieira</last></author>
       <author><first>Renata</first><last>Vieira</last></author>
       <author><first>Marco</first><last>Gonzalez</last></author>
       <pages>63-72</pages>
@@ -13064,11 +13064,11 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="15">
       <title><fixed-case>P</fixed-case>ortuguese Word Embeddings: Evaluating on Word Analogies and Natural Language Tasks</title>
       <author><first>Nathan</first><last>Hartmann</last></author>
-      <author><first>Erick</first><last>Fonseca</last></author>
+      <author id="erick-fonseca"><first>Erick</first><last>Fonseca</last></author>
       <author><first>Christopher</first><last>Shulby</last></author>
       <author><first>Marcos</first><last>Treviso</last></author>
       <author><first>Jéssica</first><last>Silva</last></author>
-      <author><first>Sandra</first><last>Aluísio</last></author>
+      <author id="sandra-aluisio"><first>Sandra</first><last>Aluísio</last></author>
       <pages>122-131</pages>
       <url hash="9bcda322">W17-6615</url>
       <bibkey>hartmann-etal-2017-portuguese</bibkey>
@@ -13085,7 +13085,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <title>Normalizador de Texto para Lingua Portuguesa baseado em Modelo de Linguagem (A Normalizer based on Language Model for Texts in <fixed-case>P</fixed-case>ortuguese)[In <fixed-case>P</fixed-case>ortuguese]</title>
       <author><first>Patrick</first><last>Bard</last></author>
       <author><first>Renan Lopes</first><last>Luis</last></author>
-      <author><first>Silvia</first><last>Moraes</last></author>
+      <author id="silvia-moraes"><first>Silvia</first><last>Moraes</last></author>
       <pages>142-150</pages>
       <url hash="42c1024e">W17-6617</url>
       <bibkey>bard-etal-2017-normalizador</bibkey>
@@ -13094,7 +13094,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <title>Evaluating Word Embeddings for Sentence Boundary Detection in Speech Transcripts</title>
       <author><first>Marcos</first><last>Treviso</last></author>
       <author><first>Christopher</first><last>Shulby</last></author>
-      <author><first>Sandra</first><last>Aluísio</last></author>
+      <author id="sandra-aluisio"><first>Sandra</first><last>Aluísio</last></author>
       <pages>151-160</pages>
       <url hash="933abf21">W17-6618</url>
       <bibkey>treviso-etal-2017-evaluating</bibkey>
@@ -13128,7 +13128,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="22">
       <title>Constituição de Um Dicionário Eletrônico Trilíngue Fundado em Frames a partir da Extração Automática de Candidatos a Termos do Domínio do Turismo (The Constitution of a Trilingual Eletronic Dictionary Based on Frames from the Automatic Extraction of Candidate Terms of the Tourism Domain)[In <fixed-case>P</fixed-case>ortuguese]</title>
       <author><first>Simone Rodrigues</first><last>Peron-Corrêa</last></author>
-      <author><first>Tiago Timponi</first><last>Torrent</last></author>
+      <author id="tiago-timponi-torrent"><first>Tiago Timponi</first><last>Torrent</last></author>
       <pages>193-200</pages>
       <url hash="e4390c47">W17-6622</url>
       <bibkey>peron-correa-torrent-2017-constituicao</bibkey>
@@ -13136,7 +13136,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="23">
       <title>A Modelagem Computacional do Domínio dos Esportes na <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et Brasil (The Computational Modeling of the Sports Domain in <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et Brasil)[In <fixed-case>P</fixed-case>ortuguese]</title>
       <author><first>Alexandre Diniz</first><last>Costa</last></author>
-      <author><first>Tiago Timponi</first><last>Torrent</last></author>
+      <author id="tiago-timponi-torrent"><first>Tiago Timponi</first><last>Torrent</last></author>
       <pages>201-208</pages>
       <url hash="1a15990c">W17-6623</url>
       <bibkey>costa-torrent-2017-modelagem</bibkey>
@@ -13144,8 +13144,8 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="24">
       <title>Descrição e modelagem de construções interrogativas <fixed-case>QU</fixed-case>- em Português Brasileiro para o desenvolvimento de um chatbot (Description and modeling of interrogative constructs <fixed-case>QU</fixed-case>- in <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese for the development of a chatbot)[In <fixed-case>P</fixed-case>ortuguese]</title>
       <author><first>Natália Duarte</first><last>Marção</last></author>
-      <author><first>Tiago Timponi</first><last>Torrent</last></author>
-      <author><first>Ely Edison da Silva</first><last>Matos</last></author>
+      <author id="tiago-timponi-torrent"><first>Tiago Timponi</first><last>Torrent</last></author>
+      <author id="ely-edison-da-silva-matos"><first>Ely Edison da Silva</first><last>Matos</last></author>
       <pages>209-216</pages>
       <url hash="dc1f0c68">W17-6624</url>
       <bibkey>marcao-etal-2017-descricao</bibkey>
@@ -13153,7 +13153,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="25">
       <title>Construções de Estrutura Argumental no âmbito do Constructicon da <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et Brasil: proposta de uma modelagem linguístico-computacional (Structural Constructs of Arguments in the Context of the Construction of <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et Brasil: a proposal for a computational-linguistic modeling)[In <fixed-case>P</fixed-case>ortuguese]</title>
       <author><first>Vânia Gomes</first><last>Almeida</last></author>
-      <author><first>Tiago Timponi</first><last>Torrent</last></author>
+      <author id="tiago-timponi-torrent"><first>Tiago Timponi</first><last>Torrent</last></author>
       <pages>217-223</pages>
       <url hash="aad8bbe0">W17-6625</url>
       <bibkey>almeida-torrent-2017-construcoes</bibkey>
@@ -13168,7 +13168,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="27">
       <title>Uma Proposta Metodológica para a Categorização Automatizada de Atrações Turísticas a partir de Comentários de Usuários em Plataformas Online (A Methodological Proposition for the Automatic Categorization of Touristic Attractions from User Comments in Online Platforms)[In <fixed-case>P</fixed-case>ortuguese]</title>
       <author><first>Vanessa Maria Ramos Lopes</first><last>Paiva</last></author>
-      <author><first>Tiago Timponi</first><last>Torrent</last></author>
+      <author id="tiago-timponi-torrent"><first>Tiago Timponi</first><last>Torrent</last></author>
       <pages>232-239</pages>
       <url hash="7aeb14f3">W17-6627</url>
       <bibkey>paiva-torrent-2017-uma</bibkey>
@@ -13177,7 +13177,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <title>Sofrer uma ofensa, Receber uma advertência: Verbos-suporte Conversos de ‘Fazer’ no Português do Brasil (Suffering an offense, Receiving a citation: Supporting Vectors Converted from ‘To do’ in <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese)[In <fixed-case>P</fixed-case>ortuguese]</title>
       <author><first>Claúdia D.</first><last>Barros</last></author>
       <author><first>Nathalia P.</first><last>Calcia</last></author>
-      <author><first>Oto A.</first><last>Vale</last></author>
+      <author id="oto-vale"><first>Oto A.</first><last>Vale</last></author>
       <pages>240-246</pages>
       <url hash="1e9af5c9">W17-6628</url>
       <bibkey>barros-etal-2017-sofrer</bibkey>
@@ -13202,8 +13202,8 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="31">
       <title>As bases de dados verbais <fixed-case>ADESSE</fixed-case> e <fixed-case>V</fixed-case>i<fixed-case>PE</fixed-case>r: uma análise constrastiva das construções locativas em espanhol e em português (The verbal databases <fixed-case>ADESSE</fixed-case> and <fixed-case>V</fixed-case>i<fixed-case>PE</fixed-case>r: a contrastive analysis of locative constructs in <fixed-case>S</fixed-case>panish and <fixed-case>P</fixed-case>ortuguese)[In <fixed-case>P</fixed-case>ortuguese]</title>
       <author><first>Roana</first><last>Rodrigues</last></author>
-      <author><first>Oto</first><last>Vale</last></author>
-      <author><first>Laura</first><last>Alonso Alemany</last></author>
+      <author id="oto-vale"><first>Oto</first><last>Vale</last></author>
+      <author id="laura-alonso-alemany"><first>Laura</first><last>Alonso Alemany</last></author>
       <pages>266-273</pages>
       <url hash="8edb596a">W17-6631</url>
       <bibkey>rodrigues-etal-2017-bases</bibkey>
@@ -13230,7 +13230,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     </paper>
     <paper id="2">
       <title>Extracting word lists for domain-specific implicit opinions from corpora</title>
-      <author><first>Núria Bertomeu</first><last>Castelló</last></author>
+      <author id="nuria-bertomeu"><first>Núria Bertomeu</first><last>Castelló</last></author>
       <author><first>Manfred</first><last>Stede</last></author>
       <url hash="eb632d95">W17-6802</url>
       <bibkey>castello-stede-2017-extracting</bibkey>
@@ -13247,7 +13247,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="4">
       <title>Semantic Variation in Online Communities of Practice</title>
       <author><first>Marco</first><last>Del Tredici</last></author>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <url hash="5a600a61">W17-6804</url>
       <bibkey>del-tredici-fernandez-2017-semantic</bibkey>
     </paper>
@@ -13284,7 +13284,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <title>If Sentences Could See: Investigating Visual Information for Semantic Textual Similarity</title>
       <author><first>Goran</first><last>Glavaš</last></author>
       <author><first>Ivan</first><last>Vulić</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <url hash="bb59787a">W17-6809</url>
       <bibkey>glavas-etal-2017-sentences</bibkey>
     </paper>
@@ -13306,10 +13306,10 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="12">
       <title>Extracting hypernym relations from <fixed-case>W</fixed-case>ikipedia disambiguation pages : comparing symbolic and machine learning approaches</title>
       <author><first>Mouna</first><last>Kamel</last></author>
-      <author><first>Cassia</first><last>Trojahn</last></author>
+      <author id="cassia-trojahn"><first>Cassia</first><last>Trojahn</last></author>
       <author><first>Adel</first><last>Ghamnia</last></author>
       <author><first>Nathalie</first><last>Aussenac-Gilles</last></author>
-      <author><first>Cécile</first><last>Fabre</last></author>
+      <author id="cecile-fabre"><first>Cécile</first><last>Fabre</last></author>
       <url hash="e9340833">W17-6812</url>
       <bibkey>kamel-etal-2017-extracting</bibkey>
     </paper>
@@ -13328,7 +13328,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <author><first>Anna</first><last>Dickinson</last></author>
       <author><first>Nathan</first><last>Schneider</last></author>
       <author><first>Annie</first><last>Louis</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <url hash="4799e651">W17-6814</url>
       <bibkey>rohde-etal-2017-exploring</bibkey>
     </paper>
@@ -13372,7 +13372,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     </paper>
     <paper id="2">
       <title><fixed-case>P</fixed-case>ropbank Annotation of <fixed-case>D</fixed-case>anish Noun Frames</title>
-      <author><first>Eckhard</first><last>Bick</last></author>
+      <author id="eckhard-bick"><first>Eckhard</first><last>Bick</last></author>
       <url hash="d16af6f2">W17-6902</url>
       <bibkey>bick-2017-propbank</bibkey>
     </paper>
@@ -13385,10 +13385,10 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     </paper>
     <paper id="4">
       <title>Living a discrete life in a continuous world: Reference in cross-modal entity tracking</title>
-      <author><first>Gemma</first><last>Boleda</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
-      <author><first>Nghia The</first><last>Pham</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="gemma-boleda"><first>Gemma</first><last>Boleda</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
+      <author id="nghia-the-pham"><first>Nghia The</first><last>Pham</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <url hash="2b177ef6">W17-6904</url>
       <bibkey>boleda-etal-2017-living</bibkey>
     </paper>
@@ -13400,8 +13400,8 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     </paper>
     <paper id="6">
       <title>A Semantically-Based Computational Approach to Narrative Structure</title>
-      <author><first>Rodolfo</first><last>Delmonte</last></author>
-      <author><first>Giulia</first><last>Marchesini</last></author>
+      <author id="rodolfo-delmonte"><first>Rodolfo</first><last>Delmonte</last></author>
+      <author id="giulia-marchesini"><first>Giulia</first><last>Marchesini</last></author>
       <url hash="3c6b44b3">W17-6906</url>
       <bibkey>delmonte-marchesini-2017-semantically</bibkey>
     </paper>
@@ -13417,7 +13417,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="8">
       <title>Bigger does not mean better! We prefer specificity</title>
       <author><first>Emmanuelle</first><last>Dusserre</last></author>
-      <author><first>Muntsa</first><last>Padró</last></author>
+      <author id="muntsa-padro"><first>Muntsa</first><last>Padró</last></author>
       <url hash="1cfc72a4">W17-6908</url>
       <bibkey>dusserre-padro-2017-bigger</bibkey>
     </paper>
@@ -13449,7 +13449,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <title>Incorporating visual features into word embeddings: A bimodal autoencoder-based approach</title>
       <author><first>Mika</first><last>Hasegawa</last></author>
       <author><first>Tetsunori</first><last>Kobayashi</last></author>
-      <author><first>Yoshihiko</first><last>Hayashi</last></author>
+      <author id="yoshihiko-hayashi"><first>Yoshihiko</first><last>Hayashi</last></author>
       <url hash="ef3fe9a2">W17-6912</url>
       <bibkey>hasegawa-etal-2017-incorporating</bibkey>
     </paper>
@@ -13471,13 +13471,13 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <title>Textual Inference: getting logic from humans</title>
       <author><first>Aikaterini-Lida</first><last>Kalouli</last></author>
       <author><first>Livy</first><last>Real</last></author>
-      <author><first>Valeria</first><last>de Paiva</last></author>
+      <author id="valeria-de-paiva"><first>Valeria</first><last>de Paiva</last></author>
       <url hash="57438832">W17-6915</url>
       <bibkey>kalouli-etal-2017-textual</bibkey>
     </paper>
     <paper id="16">
       <title>Situating Word Senses in their Historical Context with Linked Data</title>
-      <author><first>Fahad</first><last>Khan</last></author>
+      <author id="fahad-khan"><first>Fahad</first><last>Khan</last></author>
       <author><first>Jack</first><last>Bowers</last></author>
       <author><first>Francesca</first><last>Frontini</last></author>
       <url hash="629083e5">W17-6916</url>
@@ -13508,14 +13508,14 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <author><first>Ross</first><last>Beveridge</last></author>
       <author><first>Jaime</first><last>Ruiz</last></author>
       <author><first>Bruce</first><last>Draper</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <url hash="f0b8303b">W17-6919</url>
       <bibkey>krishnaswamy-etal-2017-communicating</bibkey>
     </paper>
     <paper id="20">
       <title>Ambiguss, a game for building a Sense Annotated Corpus for <fixed-case>F</fixed-case>rench</title>
       <author><first>Mathieu</first><last>Lafourcade</last></author>
-      <author><first>Nathalie Le</first><last>Brun</last></author>
+      <author id="nathalie-le-brun"><first>Nathalie Le</first><last>Brun</last></author>
       <url hash="f3fd1b44">W17-6920</url>
       <bibkey>lafourcade-brun-2017-ambiguss</bibkey>
     </paper>
@@ -13529,9 +13529,9 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="22">
       <title>Are doggies really nicer than dogs? The impact of morphological derivation on emotional valence in <fixed-case>G</fixed-case>erman</title>
       <author><first>Gabriella</first><last>Lapesa</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <author><first>Tillmann</first><last>Pross</last></author>
-      <author><first>Antje</first><last>Rossdeutscher</last></author>
+      <author id="antje-rossdeutscher"><first>Antje</first><last>Rossdeutscher</last></author>
       <url hash="cae58f3e">W17-6922</url>
       <bibkey>lapesa-etal-2017-doggies</bibkey>
     </paper>
@@ -13567,7 +13567,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     </paper>
     <paper id="27">
       <title>Neural Disambiguation of Causal Lexical Markers Based on Context</title>
-      <author><first>Eugenio</first><last>Martínez-Cámara</last></author>
+      <author id="eugenio-martinez-camara"><first>Eugenio</first><last>Martínez-Cámara</last></author>
       <author><first>Vered</first><last>Shwartz</last></author>
       <author><first>Iryna</first><last>Gurevych</last></author>
       <author><first>Ido</first><last>Dagan</last></author>
@@ -13579,7 +13579,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <author><first>Mariia</first><last>Melymuka</last></author>
       <author><first>Gabriella</first><last>Lapesa</last></author>
       <author><first>Max</first><last>Kisselew</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <url hash="d11fe79f">W17-6928</url>
       <bibkey>melymuka-etal-2017-modeling</bibkey>
     </paper>
@@ -13599,7 +13599,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="31">
       <title>Distributional <fixed-case>L</fixed-case>esk: Effective Knowledge-Based Word Sense Disambiguation</title>
       <author><first>Dieke</first><last>Oele</last></author>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <url hash="3fe12f97">W17-6931</url>
       <bibkey>oele-van-noord-2017-distributional</bibkey>
     </paper>
@@ -13613,7 +13613,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="33">
       <title>There’s no ‘Count or Predict’ but task-based selection for distributional models</title>
       <author><first>Martin</first><last>Riedl</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <url hash="0744e26c">W17-6933</url>
       <bibkey>riedl-biemann-2017-theres</bibkey>
     </paper>
@@ -13643,7 +13643,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <author><first>Shota</first><last>Sasaki</last></author>
       <author><first>Sho</first><last>Takase</last></author>
       <author><first>Naoya</first><last>Inoue</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Kentaro</first><last>Inui</last></author>
       <url hash="f8d699d2">W17-6937</url>
       <bibkey>sasaki-etal-2017-handling</bibkey>
@@ -13652,20 +13652,20 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <title>Vision and Language Integration: Moving beyond Objects</title>
       <author><first>Ravi</first><last>Shekhar</last></author>
       <author><first>Sandro</first><last>Pezzelle</last></author>
-      <author><first>Aurélie</first><last>Herbelot</last></author>
+      <author id="aurelie-herbelot"><first>Aurélie</first><last>Herbelot</last></author>
       <author><first>Moin</first><last>Nabi</last></author>
       <author><first>Enver</first><last>Sangineto</last></author>
-      <author><first>Raffaella</first><last>Bernardi</last></author>
+      <author id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last></author>
       <url hash="821e4d4b">W17-6938</url>
       <bibkey>shekhar-etal-2017-vision</bibkey>
     </paper>
     <paper id="39">
       <title>Can You See the (Linguistic) Difference? Exploring Mass/Count Distinction in Vision</title>
-      <author><first>David Addison</first><last>Smith</last></author>
+      <author id="david-a-smith"><first>David Addison</first><last>Smith</last></author>
       <author><first>Sandro</first><last>Pezzelle</last></author>
       <author><first>Francesca</first><last>Franzon</last></author>
       <author><first>Chiara</first><last>Zanini</last></author>
-      <author><first>Raffaella</first><last>Bernardi</last></author>
+      <author id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last></author>
       <url hash="eceb5350">W17-6939</url>
       <bibkey>smith-etal-2017-see</bibkey>
     </paper>
@@ -13681,7 +13681,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <title><fixed-case>L</fixed-case>ex<fixed-case>S</fixed-case>ub<fixed-case>NC</fixed-case>: A Dataset of Lexical Substitution for Nominal Compounds</title>
       <author><first>Rodrigo</first><last>Wilkens</last></author>
       <author><first>Leonardo</first><last>Zilio</last></author>
-      <author><first>Silvio Ricardo</first><last>Cordeiro</last></author>
+      <author id="silvio-cordeiro"><first>Silvio Ricardo</first><last>Cordeiro</last></author>
       <author><first>Felipe</first><last>Paula</last></author>
       <author><first>Carlos</first><last>Ramisch</last></author>
       <author><first>Marco</first><last>Idiart</last></author>
@@ -13693,7 +13693,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <title>Exploring Soft-Clustering for <fixed-case>G</fixed-case>erman (Particle) Verbs across Frequency Ranges</title>
       <author><first>Moritz</first><last>Wittmann</last></author>
       <author><first>Maximilian</first><last>Köper</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <url hash="360b1857">W17-6942</url>
       <bibkey>wittmann-etal-2017-exploring</bibkey>
     </paper>
@@ -13719,9 +13719,9 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <booktitle>Proceedings of Language, Ontology, Terminology and Knowledge Structures Workshop (<fixed-case>LOTKS</fixed-case> 2017)</booktitle>
       <editor><first>Francesca</first><last>Frontini</last></editor>
       <editor><first>Larisa</first><last>Grčić Simeunović</last></editor>
-      <editor><first>Špela</first><last>Vintar</last></editor>
-      <editor><first>Anas Fahad</first><last>Khan</last></editor>
-      <editor><first>Artemis</first><last>Parvisi</last></editor>
+      <editor id="spela-vintar"><first>Špela</first><last>Vintar</last></editor>
+      <editor id="fahad-khan"><first>Anas Fahad</first><last>Khan</last></editor>
+      <editor id="artemis-parvizi"><first>Artemis</first><last>Parvisi</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Montpellier, France</address>
       <month>September</month>
@@ -13735,7 +13735,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="1">
       <title>Exploratory Analysis for Ontology Learning from Social Events on Social Media Streaming in <fixed-case>S</fixed-case>panish</title>
       <author><first>Enrique</first><last>Valeriano</last></author>
-      <author><first>Arturo</first><last>Oncevay-Marcos</last></author>
+      <author id="arturo-oncevay"><first>Arturo</first><last>Oncevay-Marcos</last></author>
       <url hash="a40bfdf4">W17-7001</url>
       <bibkey>valeriano-oncevay-marcos-2017-exploratory</bibkey>
     </paper>
@@ -13765,7 +13765,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="5">
       <title>Fine-grained domain classification of text using <fixed-case>TERMIUM</fixed-case> Plus</title>
       <author><first>Gabriel</first><last>Bernier-Colborne</last></author>
-      <author><first>Caroline</first><last>Barrière</last></author>
+      <author id="caroline-barriere"><first>Caroline</first><last>Barrière</last></author>
       <author><first>Pierre André</first><last>Ménard</last></author>
       <url hash="abea3bd6">W17-7005</url>
       <bibkey>bernier-colborne-etal-2017-fine</bibkey>
@@ -13773,14 +13773,14 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="6">
       <title><fixed-case>TBX</fixed-case> in <fixed-case>ODD</fixed-case>: Schema-agnostic specification and documentation for <fixed-case>T</fixed-case>erm<fixed-case>B</fixed-case>ase e<fixed-case>X</fixed-case>change</title>
       <author><first>Stefan</first><last>Pernes</last></author>
-      <author><first>Laurent</first><last>Romary</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
       <url hash="85c4f134">W17-7006</url>
       <bibkey>pernes-romary-2017-tbx</bibkey>
     </paper>
     <paper id="7">
       <title>Enrichment of <fixed-case>F</fixed-case>rench Biomedical Ontologies with <fixed-case>UMLS</fixed-case> Concepts and Semantic Types for Biomedical Named Entity Recognition Though Ontological Semantic Annotation</title>
       <author><first>Andon</first><last>Tchechmedjiev</last></author>
-      <author><first>Clément</first><last>Jonquet</last></author>
+      <author id="clement-jonquet"><first>Clément</first><last>Jonquet</last></author>
       <url hash="57f7267b">W17-7007</url>
       <bibkey>tchechmedjiev-jonquet-2017-enrichment</bibkey>
     </paper>
@@ -13803,7 +13803,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="10">
       <title>Developing <fixed-case>L</fixed-case>ex<fixed-case>O</fixed-case>: a Collaborative Editor of Multilingual Lexica and Termino-Ontological Resources in the Humanities</title>
       <author><first>Andrea</first><last>Bellandi</last></author>
-      <author><first>Emiliano</first><last>Giovannetti</last></author>
+      <author id="emiliano-giovannetti"><first>Emiliano</first><last>Giovannetti</last></author>
       <author><first>Silvia</first><last>Piccini</last></author>
       <author><first>Anja</first><last>Weingart</last></author>
       <url hash="e37fa87e">W17-7010</url>
@@ -13822,7 +13822,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
   <volume id="71" type="proceedings">
     <meta>
       <booktitle>Proceedings of the <fixed-case>IWCS</fixed-case> workshop on Foundations of Situated and Multimodal Communication</booktitle>
-      <editor><first>Nicholas</first><last>Asher</last></editor>
+      <editor id="nicholas-asher"><first>Nicholas</first><last>Asher</last></editor>
       <editor><first>Julie</first><last>Hunter</last></editor>
       <editor><first>Alex</first><last>Lascarides</last></editor>
       <venue>ws</venue>
@@ -13836,7 +13836,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <title>Exploring Multi-Modal <fixed-case>T</fixed-case>ext+<fixed-case>I</fixed-case>mage Models to Distinguish between Abstract and Concrete Nouns</title>
       <author><first>Sai Abishek</first><last>Bhaskar</last></author>
       <author><first>Maximilian</first><last>Köper</last></author>
-      <author><first>Sabine</first><last>Schulte Im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte Im Walde</last></author>
       <author><first>Diego</first><last>Frassinelli</last></author>
       <url hash="34d578aa">W17-7101</url>
       <bibkey>bhaskar-etal-2017-exploring</bibkey>
@@ -13849,7 +13849,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     </paper>
     <paper id="3">
       <title>Creating Common Ground through Multimodal Simulations</title>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <author><first>Nikhil</first><last>Krishnaswamy</last></author>
       <author><first>Bruce</first><last>Draper</last></author>
       <author><first>Pradyumna</first><last>Narayana</last></author>
@@ -13909,7 +13909,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="5">
       <title>Correcting Contradictions</title>
       <author><first>Aikaterini-Lida</first><last>Kalouli</last></author>
-      <author><first>Valeria</first><last>de Paiva</last></author>
+      <author id="valeria-de-paiva"><first>Valeria</first><last>de Paiva</last></author>
       <author><first>Livy</first><last>Real</last></author>
       <url hash="f5ed6234">W17-7205</url>
       <bibkey>kalouli-etal-2017-correcting</bibkey>
@@ -14019,8 +14019,8 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     </paper>
     <paper id="2">
       <title>A semantically-based approach to the annotation of narrative style</title>
-      <author><first>Rodolfo</first><last>Delmonte</last></author>
-      <author><first>Giulia</first><last>Marchesi</last></author>
+      <author id="rodolfo-delmonte"><first>Rodolfo</first><last>Delmonte</last></author>
+      <author id="giulia-marchesini"><first>Giulia</first><last>Marchesi</last></author>
       <url hash="8a21a891">W17-7402</url>
       <bibkey>delmonte-marchesi-2017-semantically</bibkey>
     </paper>
@@ -14032,9 +14032,9 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     </paper>
     <paper id="4">
       <title>Revisiting the <fixed-case>ISO</fixed-case> standard for dialogue act annotation</title>
-      <author><first>Harry</first><last>Bunt</last></author>
+      <author id="harry-bunt"><first>Harry</first><last>Bunt</last></author>
       <author><first>Volha</first><last>Petukhova</last></author>
-      <author><first>Alex Chengyu</first><last>Fang</last></author>
+      <author id="alex-chengyu-fang"><first>Alex Chengyu</first><last>Fang</last></author>
       <url hash="409dd12c">W17-7404</url>
       <bibkey>bunt-etal-2017-revisiting</bibkey>
     </paper>
@@ -14047,7 +14047,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <author><first>Ketong</first><last>Su</last></author>
       <author><first>Benjamin R.</first><last>Cowan</last></author>
       <author><first>Killian</first><last>Levacher</last></author>
-      <author><first>Arturo Calvo</first><last>Devesa</last></author>
+      <author id="arturo-calvo-devesa"><first>Arturo Calvo</first><last>Devesa</last></author>
       <author><first>Lodana</first><last>Cerrato</last></author>
       <author><first>Nick</first><last>Campbell</last></author>
       <author><first>Vincent</first><last>Wade</last></author>
@@ -14079,14 +14079,14 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     </paper>
     <paper id="9">
       <title>Towards interoperable annotation of quantification</title>
-      <author><first>Harry</first><last>Bunt</last></author>
+      <author id="harry-bunt"><first>Harry</first><last>Bunt</last></author>
       <url hash="495f0911">W17-7409</url>
       <bibkey>bunt-2017-towards</bibkey>
     </paper>
     <paper id="10">
       <title><fixed-case>PACTE</fixed-case>: A colloaborative platform for textual annotation</title>
       <author><first>Pierre André</first><last>Ménard</last></author>
-      <author><first>Caroline</first><last>Barrière</last></author>
+      <author id="caroline-barriere"><first>Caroline</first><last>Barrière</last></author>
       <url hash="06fb69eb">W17-7410</url>
       <bibkey>menard-barriere-2017-pacte</bibkey>
     </paper>
@@ -14107,8 +14107,8 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="13">
       <title>Temporal@<fixed-case>ODIL</fixed-case> project: Adapting <fixed-case>ISO</fixed-case>-<fixed-case>T</fixed-case>ime<fixed-case>ML</fixed-case> to syntactic treebanks for the temporal annotation of spoken speech</title>
       <author><first>Jean-Yves</first><last>Antoine</last></author>
-      <author><first>Jakub</first><last>Wasczuk</last></author>
-      <author><first>Anaïs</first><last>Lefeuvre-Haftermeyer</last></author>
+      <author id="jakub-waszczuk"><first>Jakub</first><last>Wasczuk</last></author>
+      <author id="anais-lefeuvre"><first>Anaïs</first><last>Lefeuvre-Haftermeyer</last></author>
       <author><first>Lotfi</first><last>Abouda</last></author>
       <author><first>Emmanuel</first><last>Schang</last></author>
       <author><first>Agata</first><last>Savary</last></author>
@@ -14125,7 +14125,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     </paper>
     <paper id="15">
       <title>Enriching the Notion of Path in <fixed-case>ISO</fixed-case>-Space</title>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <author><first>Kiyong</first><last>Lee</last></author>
       <url hash="353b4b8e">W17-7415</url>
       <bibkey>pustejovsky-lee-2017-enriching</bibkey>
@@ -14134,7 +14134,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
   <volume id="75" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 14th International Conference on Natural Language Processing (<fixed-case>ICON</fixed-case>-2017)</booktitle>
-      <editor><first>Sivaji</first><last>Bandyopadhyay</last></editor>
+      <editor id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></editor>
       <publisher>NLP Association of India</publisher>
       <address>Kolkata, India</address>
       <month>December</month>
@@ -14163,7 +14163,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <title>Three-phase training to address data sparsity in Neural Machine Translation</title>
       <author><first>Ruchit</first><last>Agrawal</last></author>
       <author><first>Mihir</first><last>Shekhar</last></author>
-      <author><first>Dipti</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti</first><last>Sharma</last></author>
       <pages>13–22</pages>
       <url hash="60830719">W17-7503</url>
       <bibkey>agrawal-etal-2017-three</bibkey>
@@ -14173,7 +14173,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <author><first>Saurav</first><last>Jha</last></author>
       <author><first>Aanchal</first><last>Chaurasia</last></author>
       <author><first>Akhilesh</first><last>Sudhakar</last></author>
-      <author><first>Anil Kumar</first><last>Singh</last></author>
+      <author id="anil-kumar-singh"><first>Anil Kumar</first><last>Singh</last></author>
       <pages>23–32</pages>
       <url hash="c7f28b59">W17-7504</url>
       <bibkey>jha-etal-2017-reference</bibkey>
@@ -14182,7 +14182,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <title>A vis-à-vis evaluation of <fixed-case>MT</fixed-case> paradigms for linguistically distant languages</title>
       <author><first>Ruchit</first><last>Agrawal</last></author>
       <author><first>Jahfar</first><last>Ali</last></author>
-      <author><first>Dipti Misra</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></author>
       <pages>33–42</pages>
       <url hash="1b55626e">W17-7505</url>
       <bibkey>agrawal-etal-2017-vis</bibkey>
@@ -14200,7 +14200,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <title><fixed-case>POS</fixed-case> Tagging For Resource Poor Languages Through Feature Projection</title>
       <author><first>Pruthwik</first><last>Mishra</last></author>
       <author><first>Vandan</first><last>Mujadia</last></author>
-      <author><first>Dipti Misra</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></author>
       <pages>50–55</pages>
       <url hash="e4ccae29">W17-7507</url>
       <bibkey>mishra-etal-2017-pos</bibkey>
@@ -14208,7 +14208,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="8">
       <title>An Exploration of Word Embedding Initialization in Deep-Learning Tasks</title>
       <author><first>Tom</first><last>Kocmi</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>56–64</pages>
       <url hash="c08af2c1">W17-7508</url>
       <bibkey>kocmi-bojar-2017-exploration</bibkey>
@@ -14287,7 +14287,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <author><first>Tanik</first><last>Saikh</last></author>
       <author><first>Tirthankar</first><last>Ghosal</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>131–140</pages>
       <url hash="9f2718d3">W17-7517</url>
       <bibkey>saikh-etal-2017-document</bibkey>
@@ -14296,7 +14296,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <title>Is your Statement Purposeless? Predicting Computer Science Graduation Admission Acceptance based on Statement Of Purpose</title>
       <author><first>Diptesh</first><last>Kanojia</last></author>
       <author><first>Nikhil</first><last>Wani</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>141–145</pages>
       <url hash="10439d71">W17-7518</url>
       <bibkey>kanojia-etal-2017-statement</bibkey>
@@ -14304,14 +14304,14 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="19">
       <title>Natural Language Programing with Automatic Code Generation towards Solving Addition-Subtraction Word Problems</title>
       <author><first>Sourav</first><last>Mandal</last></author>
-      <author><first>Sudip Kumar</first><last>Naskar</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last></author>
       <pages>146–154</pages>
       <url hash="0df9ffcb">W17-7519</url>
       <bibkey>mandal-naskar-2017-natural</bibkey>
     </paper>
     <paper id="20">
       <title>Unsupervised Separation of Transliterable and Native Words for <fixed-case>M</fixed-case>alayalam</title>
-      <author><first>Deepak</first><last>P</last></author>
+      <author id="deepak-p"><first>Deepak</first><last>P</last></author>
       <pages>155–164</pages>
       <url hash="e9e9828d">W17-7520</url>
       <bibkey>p-2017-unsupervised</bibkey>
@@ -14337,14 +14337,14 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <author><first>Kuntal</first><last>Dey</last></author>
       <author><first>Ritvik</first><last>Shrivastava</last></author>
       <author><first>Saroj</first><last>Kaushik</last></author>
-      <author><first>L Venkata</first><last>Subramaniam</last></author>
+      <author id="l-venkata-subramaniam"><first>L Venkata</first><last>Subramaniam</last></author>
       <pages>178–187</pages>
       <url hash="0ba0cbaf">W17-7523</url>
       <bibkey>dey-etal-2017-semtagger</bibkey>
     </paper>
     <paper id="24">
       <title>Reasoning with Sets to Solve Simple Word Problems Automatically</title>
-      <author><first>Sowmya S</first><last>Sundaram</last></author>
+      <author id="sowmya-s-sundaram"><first>Sowmya S</first><last>Sundaram</last></author>
       <author><first>Deepak</first><last>Khemani</last></author>
       <pages>188–196</pages>
       <url hash="c326ee1e">W17-7524</url>
@@ -14361,7 +14361,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="26">
       <title>Beyond <fixed-case>W</fixed-case>ord2<fixed-case>V</fixed-case>ec: Embedding Words and Phrases in Same Vector Space</title>
       <author><first>Vijay Prakash</first><last>Dwivedi</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>205–211</pages>
       <url hash="f70ecc6b">W17-7526</url>
       <bibkey>dwivedi-shrivastava-2017-beyond</bibkey>
@@ -14394,7 +14394,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     </paper>
     <paper id="30">
       <title><fixed-case>M</fixed-case>alayalam <fixed-case>V</fixed-case>erb<fixed-case>F</fixed-case>rames</title>
-      <author><first>Jisha P</first><last>Jayan</last></author>
+      <author id="jisha-p-jayan"><first>Jisha P</first><last>Jayan</last></author>
       <author><first>Asha S</first><last>Nair</last></author>
       <author><first>Govindaru</first><last>V</last></author>
       <pages>236–244</pages>
@@ -14407,8 +14407,8 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <author><first>Sandhya</first><last>Singh</last></author>
       <author><first>Dhara</first><last>Gorasia</last></author>
       <author><first>Meenakshi</first><last>Somasundaram</last></author>
-      <author><first>Malhar</first><last>Kulkarni</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="malhar-kulkarni"><first>Malhar</first><last>Kulkarni</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>245–254</pages>
       <url hash="5eb7338f">W17-7531</url>
       <bibkey>redkar-etal-2017-hindi-shabdamitra</bibkey>
@@ -14427,7 +14427,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <title>End to End Dialog System for <fixed-case>T</fixed-case>elugu</title>
       <author><first>Prathyusha</first><last>Danda</last></author>
       <author><first>Prathyusha</first><last>Jwalapuram</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>265–272</pages>
       <url hash="87de8d75">W17-7533</url>
       <bibkey>danda-etal-2017-end</bibkey>
@@ -14461,7 +14461,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="37">
       <title>Unsupervised Morpheme Segmentation Through Numerical Weighting and Thresholding</title>
       <author><first>Joy</first><last>Mahapatra</last></author>
-      <author><first>Sudip Kumar</first><last>Naskar</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last></author>
       <pages>298–304</pages>
       <url hash="69aa561a">W17-7537</url>
       <bibkey>mahapatra-naskar-2017-unsupervised</bibkey>
@@ -14470,7 +14470,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <title>Experiments with Domain Dependent Dialogue Act Classification using Open-Domain Dialogue Corpora</title>
       <author><first>Swapnil</first><last>Hingmire</last></author>
       <author><first>Apoorv</first><last>Shrivastava</last></author>
-      <author><first>Girish</first><last>Palshikar</last></author>
+      <author id="girish-palshikar"><first>Girish</first><last>Palshikar</last></author>
       <author><first>Saurabh</first><last>Srivastava</last></author>
       <pages>305–311</pages>
       <url hash="909c0910">W17-7538</url>
@@ -14479,7 +14479,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="39">
       <title>Normalization of Social Media Text using Deep Neural Networks</title>
       <author><first>Ajay Shankar</first><last>Tiwari</last></author>
-      <author><first>Sudip Kumar</first><last>Naskar</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last></author>
       <pages>312–321</pages>
       <url hash="74945078">W17-7539</url>
       <bibkey>tiwari-naskar-2017-normalization</bibkey>
@@ -14541,24 +14541,24 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <title>Linguistic approach based Transfer Learning for Sentiment Classification in <fixed-case>H</fixed-case>indi</title>
       <author><first>Vartika</first><last>Rai</last></author>
       <author><first>Sakshee</first><last>Vijay</last></author>
-      <author><first>Dipti</first><last>Misra</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti</first><last>Misra</last></author>
       <pages>373–382</pages>
       <url hash="628862eb">W17-7546</url>
       <bibkey>rai-etal-2017-linguistic</bibkey>
     </paper>
     <paper id="47">
       <title>Scalable Bio-Molecular Event Extraction System towards Knowledge Acquisition</title>
-      <author><first>Pattabhi RK</first><last>Rao</last></author>
+      <author id="pattabhi-rk-rao"><first>Pattabhi RK</first><last>Rao</last></author>
       <author><first>Sindhuja</first><last>Gopalan</last></author>
-      <author><first>Sobha Lalitha</first><last>Devi</last></author>
+      <author id="sobha-lalitha-devi"><first>Sobha Lalitha</first><last>Devi</last></author>
       <pages>383–391</pages>
       <url hash="f7f72793">W17-7547</url>
       <bibkey>rao-etal-2017-scalable</bibkey>
     </paper>
     <paper id="48">
       <title>Co-reference Resolution in <fixed-case>T</fixed-case>amil Text</title>
-      <author><first>Vijay Sundar</first><last>Ram</last></author>
-      <author><first>Sobha Lalitha</first><last>Devi</last></author>
+      <author id="vijay-sundar-ram"><first>Vijay Sundar</first><last>Ram</last></author>
+      <author id="sobha-lalitha-devi"><first>Sobha Lalitha</first><last>Devi</last></author>
       <pages>392–401</pages>
       <url hash="9c1b8075">W17-7548</url>
       <bibkey>ram-devi-2017-co</bibkey>
@@ -14566,8 +14566,8 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="49">
       <title>Cross Linguistic Variations in Discourse Relations among <fixed-case>I</fixed-case>ndian Languages</title>
       <author><first>Sindhuja</first><last>Gopalan</last></author>
-      <author><first>Lakshmi</first><last>S</last></author>
-      <author><first>Sobha Lalitha</first><last>Devi</last></author>
+      <author id="lakshmi-s"><first>Lakshmi</first><last>S</last></author>
+      <author id="sobha-lalitha-devi"><first>Sobha Lalitha</first><last>Devi</last></author>
       <pages>402–407</pages>
       <url hash="b09abf1d">W17-7549</url>
       <bibkey>gopalan-etal-2017-cross</bibkey>
@@ -14625,7 +14625,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="56">
       <title>Neural Networks for Semantic Textual Similarity</title>
       <author><first>Derek</first><last>Prijatelj</last></author>
-      <author><first>Jugal</first><last>Kalita</last></author>
+      <author id="jugal-kalita"><first>Jugal</first><last>Kalita</last></author>
       <author><first>Jonathan</first><last>Ventura</last></author>
       <pages>456–465</pages>
       <url hash="29604527">W17-7556</url>
@@ -14635,7 +14635,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <title>Open Set Text Classification Using <fixed-case>CNN</fixed-case>s</title>
       <author><first>Sridhama</first><last>Prakhya</last></author>
       <author><first>Vinodini</first><last>Venkataram</last></author>
-      <author><first>Jugal</first><last>Kalita</last></author>
+      <author id="jugal-kalita"><first>Jugal</first><last>Kalita</last></author>
       <pages>466–475</pages>
       <url hash="51ce00f9">W17-7557</url>
       <bibkey>prakhya-etal-2017-open</bibkey>
@@ -14652,7 +14652,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="59">
       <title>Neural Morphological Disambiguation Using Surface and Contextual Morphological Awareness</title>
       <author><first>Akhilesh</first><last>Sudhakar</last></author>
-      <author><first>Anil Kumar</first><last>Singh</last></author>
+      <author id="anil-kumar-singh"><first>Anil Kumar</first><last>Singh</last></author>
       <pages>485–494</pages>
       <url hash="c4b975fd">W17-7559</url>
       <bibkey>sudhakar-singh-2017-neural</bibkey>
@@ -14660,7 +14660,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="60">
       <title>Word Sense Disambiguation for <fixed-case>M</fixed-case>alayalam in a Conditional Random Field Framework</title>
       <author><first>Junaida M</first><last>K</last></author>
-      <author><first>Jisha P</first><last>Jayan</last></author>
+      <author id="jisha-p-jayan"><first>Jisha P</first><last>Jayan</last></author>
       <author><first>Elizabeth</first><last>Sherly</last></author>
       <pages>495–502</pages>
       <url hash="613fddd2">W17-7560</url>
@@ -14670,7 +14670,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <title>Semisupervied Data Driven Word Sense Disambiguation for Resource-poor Languages</title>
       <author><first>Pratibha</first><last>Rani</last></author>
       <author><first>Vikram</first><last>Pudi</last></author>
-      <author><first>Dipti M.</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti M.</first><last>Sharma</last></author>
       <pages>503–512</pages>
       <url hash="42a63acf">W17-7561</url>
       <bibkey>rani-etal-2017-semisupervied</bibkey>
@@ -14685,7 +14685,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     </paper>
     <paper id="63">
       <title>Semantic Enrichment Across Language: A Case Study of <fixed-case>C</fixed-case>zech Bibliographic Databases</title>
-      <author><first>Pavel</first><last>Smrz</last></author>
+      <author id="pavel-smrz"><first>Pavel</first><last>Smrz</last></author>
       <author><first>Lubomir</first><last>Otrusina</last></author>
       <pages>523–532</pages>
       <url hash="c3d4eccf">W17-7563</url>
@@ -14696,7 +14696,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <meta>
       <booktitle>Proceedings of the 16th International Workshop on Treebanks and Linguistic Theories</booktitle>
       <url hash="941f7505">W17-76</url>
-      <editor><first>Jan</first><last>Hajič</last></editor>
+      <editor id="jan-hajic"><first>Jan</first><last>Hajič</last></editor>
       <address>Prague, Czech Republic</address>
       <year>2017</year>
       <venue>tlt</venue>
@@ -14707,14 +14707,14 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     </frontmatter>
     <paper id="1">
       <title>Annotating and parsing to semantic frames: feedback from the <fixed-case>F</fixed-case>rench <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et project</title>
-      <author><first>Marie</first><last>Candito</last></author>
+      <author id="marie-candito"><first>Marie</first><last>Candito</last></author>
       <pages>v</pages>
       <url hash="2929f294">W17-7601</url>
       <bibkey>candito-2017-annotating</bibkey>
     </paper>
     <paper id="2">
       <title>Downstream use of syntactic analysis: does representation matter?</title>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <pages>vi</pages>
       <url hash="ffd6e742">W17-7602</url>
       <bibkey>ovrelid-2017-downstream</bibkey>
@@ -14724,16 +14724,16 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <author><first>Daniël</first><last>de Kok</last></author>
       <author><first>Patricia</first><last>Fischer</last></author>
       <author><first>Corina</first><last>Dima</last></author>
-      <author><first>Erhard</first><last>Hinrichs</last></author>
+      <author id="erhard-hinrichs"><first>Erhard</first><last>Hinrichs</last></author>
       <pages>1–9</pages>
       <url hash="ee8ecb66">W17-7603</url>
       <bibkey>de-kok-etal-2017-distributional</bibkey>
     </paper>
     <paper id="4">
       <title><fixed-case>UD</fixed-case> Annotatrix: An annotation tool for <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies</title>
-      <author><first>Francis M.</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis M.</first><last>Tyers</last></author>
       <author><first>Mariya</first><last>Sheyanova</last></author>
-      <author><first>Jonathan North</first><last>Washington</last></author>
+      <author id="jonathan-washington"><first>Jonathan North</first><last>Washington</last></author>
       <pages>10–17</pages>
       <url hash="6d641eb0">W17-7604</url>
       <bibkey>tyers-etal-2017-ud</bibkey>
@@ -14766,7 +14766,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     </paper>
     <paper id="8">
       <title>Extensions to the <fixed-case>G</fixed-case>r<fixed-case>ETEL</fixed-case> Treebank Query Application</title>
-      <author><first>Jan</first><last>Odijk</last></author>
+      <author id="jan-odijk"><first>Jan</first><last>Odijk</last></author>
       <author><first>Martijn</first><last>van der Klis</last></author>
       <author><first>Sheean</first><last>Spoel</last></author>
       <pages>46–55</pages>
@@ -14776,9 +14776,9 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="9">
       <title>The Relation of Form and Function in Linguistic Theory and in a Multilayer Treebank</title>
       <author><first>Eduard</first><last>Bejček</last></author>
-      <author><first>Eva</first><last>Hajičová</last></author>
+      <author id="eva-hajicova"><first>Eva</first><last>Hajičová</last></author>
       <author><first>Marie</first><last>Mikulová</last></author>
-      <author><first>Jarmila</first><last>Panevová</last></author>
+      <author id="jarmila-panevova"><first>Jarmila</first><last>Panevová</last></author>
       <pages>56–63</pages>
       <url hash="f1bf8229">W17-7609</url>
       <bibkey>bejcek-etal-2017-relation</bibkey>
@@ -14786,16 +14786,16 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="10">
       <title>Literal readings of multiword expressions: as scarce as hen’s teeth</title>
       <author><first>Agata</first><last>Savary</last></author>
-      <author><first>Silvio Ricardo</first><last>Cordeiro</last></author>
+      <author id="silvio-cordeiro"><first>Silvio Ricardo</first><last>Cordeiro</last></author>
       <pages>64–72</pages>
       <url hash="1a0af56d">W17-7610</url>
       <bibkey>savary-cordeiro-2017-literal</bibkey>
     </paper>
     <paper id="11">
       <title>Querying Multi-word Expressions Annotation with <fixed-case>CQL</fixed-case></title>
-      <author><first>Natalia</first><last>Klyueva</last></author>
+      <author id="natalia-klyueva"><first>Natalia</first><last>Klyueva</last></author>
       <author><first>Anna</first><last>Vernerová</last></author>
-      <author><first>Behrang</first><last>Qasemizadeh</last></author>
+      <author id="behrang-qasemizadeh"><first>Behrang</first><last>Qasemizadeh</last></author>
       <pages>73–79</pages>
       <url hash="9edeaf74">W17-7611</url>
       <bibkey>klyueva-etal-2017-querying</bibkey>
@@ -14827,7 +14827,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="15">
       <title>Error Analysis of Cross-lingual Tagging and Parsing</title>
       <author><first>Rudolf</first><last>Rosa</last></author>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
       <pages>106–118</pages>
       <url hash="7dc48fb8">W17-7615</url>
       <bibkey>rosa-zabokrtsky-2017-error</bibkey>
@@ -14843,7 +14843,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="17">
       <title>Recent Developments within <fixed-case>B</fixed-case>ul<fixed-case>T</fixed-case>ree<fixed-case>B</fixed-case>ank</title>
       <author><first>Petya</first><last>Osenova</last></author>
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <pages>129–137</pages>
       <url hash="ec22d048">W17-7617</url>
       <bibkey>osenova-simov-2017-recent</bibkey>
@@ -14851,7 +14851,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="18">
       <title>Towards a dependency-annotated treebank for <fixed-case>B</fixed-case>ambara</title>
       <author><first>Ekaterina</first><last>Aplonova</last></author>
-      <author><first>Francis M.</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis M.</first><last>Tyers</last></author>
       <pages>138–145</pages>
       <url hash="f1fbd6d6">W17-7618</url>
       <bibkey>aplonova-tyers-2017-towards</bibkey>
@@ -14865,7 +14865,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     </paper>
     <paper id="20">
       <title>What <fixed-case>I</fixed-case> think when <fixed-case>I</fixed-case> think about treebanks</title>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>161–166</pages>
       <url hash="f799b8bb">W17-7620</url>
       <bibkey>sogaard-2017-think</bibkey>
@@ -14873,7 +14873,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="21">
       <title>Syntactic Semantic Correspondence in Dependency Grammar</title>
       <author><first>Cătălina</first><last>Mărănduc</last></author>
-      <author><first>Cătălin</first><last>Mititelu</last></author>
+      <author id="catalin-mititelu"><first>Cătălin</first><last>Mititelu</last></author>
       <author><first>Victoria</first><last>Bobicev</last></author>
       <pages>167–180</pages>
       <url hash="cec1ad21">W17-7621</url>
@@ -14899,7 +14899,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <title>Dangerous Relations in Dependency Treebanks</title>
       <author><first>Chiara</first><last>Alzetta</last></author>
       <author><first>Felice</first><last>Dell’Orletta</last></author>
-      <author><first>Simonetta</first><last>Montemagni</last></author>
+      <author id="simonetta-montemagni"><first>Simonetta</first><last>Montemagni</last></author>
       <author><first>Giulia</first><last>Venturi</last></author>
       <pages>201–210</pages>
       <url hash="e24ac2f6">W17-7624</url>
@@ -14960,7 +14960,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <title>Automatic Summarization of Online Debates</title>
       <author><first>Nattapong</first><last>Sanchan</last></author>
       <author><first>Ahmet</first><last>Aker</last></author>
-      <author><first>Kalina</first><last>Bontcheva</last></author>
+      <author id="kalina-bontcheva"><first>Kalina</first><last>Bontcheva</last></author>
       <pages>19–27</pages>
       <doi>10.26615/978-954-452-038-0_003</doi>
       <url>https://doi.org/10.26615/978-954-452-038-0_003</url>
@@ -14983,7 +14983,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
   <volume id="78" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Workshop Knowledge Resources for the Socio-Economic Sciences and Humanities associated with <fixed-case>RANLP</fixed-case> 2017</booktitle>
-      <editor><first>Kalliopi</first><last>Zervanou</last></editor>
+      <editor id="kalliopi-zervanou"><first>Kalliopi</first><last>Zervanou</last></editor>
       <editor><first>Petya</first><last>Osenova</last></editor>
       <editor><first>Eveline</first><last>Wandl-Vogt</last></editor>
       <editor><first>Dan</first><last>Cristea</last></editor>
@@ -15011,7 +15011,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <title>A Multiform Balanced Dependency Treebank for <fixed-case>R</fixed-case>omanian</title>
       <author><first>Mihaela</first><last>Colhon</last></author>
       <author><first>Cătălina</first><last>Mărănduc</last></author>
-      <author><first>Cătălin</first><last>Mititelu</last></author>
+      <author id="catalin-mititelu"><first>Cătălin</first><last>Mititelu</last></author>
       <pages>9–18</pages>
       <doi>10.26615/978-954-452-040-3_002</doi>
       <url>https://doi.org/10.26615/978-954-452-040-3_002</url>
@@ -15024,7 +15024,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <author><first>Piek</first><last>Vossen</last></author>
       <author><first>Marco</first><last>Rospocher</last></author>
       <author><first>Rinke</first><last>Hoekstra</last></author>
-      <author><first>Willem Robert</first><last>van Hage</last></author>
+      <author id="willem-robert-van-hage"><first>Willem Robert</first><last>van Hage</last></author>
       <pages>19–25</pages>
       <doi>10.26615/978-954-452-040-3_003</doi>
       <url>https://doi.org/10.26615/978-954-452-040-3_003</url>
@@ -15062,9 +15062,9 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <meta>
       <booktitle>Proceedings of the Workshop Human-Informed Translation and Interpreting Technology</booktitle>
       <editor><first>Irina</first><last>Temnikova</last></editor>
-      <editor><first>Constantin</first><last>Orasan</last></editor>
-      <editor><first>Gloria Corpas</first><last>Pastor</last></editor>
-      <editor><first>Stephan</first><last>Vogel</last></editor>
+      <editor id="constantin-orasan"><first>Constantin</first><last>Orasan</last></editor>
+      <editor id="gloria-corpas-pastor"><first>Gloria Corpas</first><last>Pastor</last></editor>
+      <editor id="stephan-vogel"><first>Stephan</first><last>Vogel</last></editor>
       <publisher>Association for Computational Linguistics, Shoumen, Bulgaria</publisher>
       <address>Varna, Bulgaria</address>
       <month>September</month>
@@ -15091,7 +15091,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="2">
       <title>Experiments in Non-Coherent Post-editing</title>
       <author><first>Cristina</first><last>Toledo Báez</last></author>
-      <author><first>Moritz</first><last>Schaeffer</last></author>
+      <author id="moritz-schaeffer"><first>Moritz</first><last>Schaeffer</last></author>
       <author><first>Michael</first><last>Carl</last></author>
       <pages>11–20</pages>
       <doi>10.26615/978-954-452-042-7_002</doi>
@@ -15134,7 +15134,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="6">
       <title>Translation Memory Systems Have a Long Way to Go</title>
       <author><first>Andrea</first><last>Silvestre Baquero</last></author>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <pages>44–51</pages>
       <doi>10.26615/978-954-452-042-7_006</doi>
       <url>https://doi.org/10.26615/978-954-452-042-7_006</url>
@@ -15166,7 +15166,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <meta>
       <booktitle>Proceedings of the Biomedical <fixed-case>NLP</fixed-case> Workshop associated with <fixed-case>RANLP</fixed-case> 2017</booktitle>
       <editor><first>Svetla</first><last>Boytcheva</last></editor>
-      <editor><first>Kevin Bretonnel</first><last>Cohen</last></editor>
+      <editor id="k-bretonnel-cohen"><first>Kevin Bretonnel</first><last>Cohen</last></editor>
       <editor><first>Guergana</first><last>Savova</last></editor>
       <editor><first>Galia</first><last>Angelova</last></editor>
       <publisher>INCOMA Ltd.</publisher>
@@ -15200,7 +15200,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     </paper>
     <paper id="3">
       <title>Discourse-Wide Extraction of Assay Frames from the Biological Literature</title>
-      <author><first>Dayne</first><last>Freitag</last></author>
+      <author id="dayne-freitag"><first>Dayne</first><last>Freitag</last></author>
       <author><first>Paul</first><last>Kalmar</last></author>
       <author><first>Eric</first><last>Yeh</last></author>
       <pages>15–23</pages>
@@ -15221,7 +15221,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="5">
       <title>Understanding of unknown medical words</title>
       <author><first>Natalia</first><last>Grabar</last></author>
-      <author><first>Thierry</first><last>Hamon</last></author>
+      <author id="thierry-hamon"><first>Thierry</first><last>Hamon</last></author>
       <pages>32–41</pages>
       <doi>10.26615/978-954-452-044-1_005</doi>
       <url>https://doi.org/10.26615/978-954-452-044-1_005</url>
@@ -15234,7 +15234,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <author><first>Steffen</first><last>Remus</last></author>
       <author><first>Alexander</first><last>Panchenko</last></author>
       <author><first>Andreas</first><last>Holzinger</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>42–48</pages>
       <doi>10.26615/978-954-452-044-1_006</doi>
       <url>https://doi.org/10.26615/978-954-452-044-1_006</url>
@@ -15275,7 +15275,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     </paper>
     <paper id="10">
       <title><fixed-case>POMELO</fixed-case>: <fixed-case>M</fixed-case>edline corpus with manually annotated food-drug interactions</title>
-      <author><first>Thierry</first><last>Hamon</last></author>
+      <author id="thierry-hamon"><first>Thierry</first><last>Hamon</last></author>
       <author><first>Vincent</first><last>Tabanou</last></author>
       <author><first>Fleur</first><last>Mougin</last></author>
       <author><first>Natalia</first><last>Grabar</last></author>
@@ -15289,7 +15289,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="11">
       <title>Annotation of Clinical Narratives in <fixed-case>B</fixed-case>ulgarian language</title>
       <author><first>Ivajlo</first><last>Radev</last></author>
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <author><first>Galia</first><last>Angelova</last></author>
       <author><first>Svetla</first><last>Boytcheva</last></author>
       <pages>81–87</pages>
@@ -15330,7 +15330,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
       <title>Tools for Building a Corpus to Study the Historical and Geographical Variation of the <fixed-case>R</fixed-case>omanian Language</title>
       <author><first>Victoria</first><last>Bobicev</last></author>
       <author><first>Cătălina</first><last>Mărănduc</last></author>
-      <author><first>Cenel Augusto</first><last>Perez</last></author>
+      <author id="cenel-augusto-perez"><first>Cenel Augusto</first><last>Perez</last></author>
       <pages>10–19</pages>
       <doi>0.26615/978-954-452-046-5_002</doi>
       <url>http://doi.org/10.26615/978-954-452-046-5_002</url>
@@ -15354,7 +15354,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
     <paper id="4">
       <title>On the annotation of vague expressions: a case study on <fixed-case>R</fixed-case>omanian historical texts</title>
       <author><first>Anca</first><last>Dinu</last></author>
-      <author><first>Walther</first><last>von Hahn</last></author>
+      <author id="walther-von-hahn"><first>Walther</first><last>von Hahn</last></author>
       <author><first>Cristina</first><last>Vertan</last></author>
       <pages>24–31</pages>
       <doi>0.26615/978-954-452-046-5_004</doi>
diff --git a/data/xml/W18.xml b/data/xml/W18.xml
index e8567a8a4d..01507645b9 100644
--- a/data/xml/W18.xml
+++ b/data/xml/W18.xml
@@ -4,10 +4,10 @@
     <meta>
       <booktitle>Proceedings of the 8th Workshop on Cognitive Modeling and Computational Linguistics (<fixed-case>CMCL</fixed-case> 2018)</booktitle>
       <url hash="46b62da3">W18-01</url>
-      <editor><first>Asad</first><last>Sayeed</last></editor>
-      <editor><first>Cassandra</first><last>Jacobs</last></editor>
+      <editor id="asad-sayeed"><first>Asad</first><last>Sayeed</last></editor>
+      <editor id="cassandra-l-jacobs"><first>Cassandra</first><last>Jacobs</last></editor>
       <editor><first>Tal</first><last>Linzen</last></editor>
-      <editor><first>Marten</first><last>van Schijndel</last></editor>
+      <editor id="marten-van-schijndel"><first>Marten</first><last>van Schijndel</last></editor>
       <doi>10.18653/v1/W18-01</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Salt Lake City, Utah</address>
@@ -42,7 +42,7 @@
       <title>Dynamic encoding of structural uncertainty in gradient symbols</title>
       <author><first>Pyeong Whan</first><last>Cho</last></author>
       <author><first>Matthew</first><last>Goldrick</last></author>
-      <author><first>Richard L.</first><last>Lewis</last></author>
+      <author id="richard-l-lewis"><first>Richard L.</first><last>Lewis</last></author>
       <author><first>Paul</first><last>Smolensky</last></author>
       <pages>19–28</pages>
       <url hash="b990d861">W18-0103</url>
@@ -106,11 +106,11 @@
     <meta>
       <booktitle>Proceedings of the Fourth International Workshop on Computational Linguistics of Uralic Languages</booktitle>
       <url hash="d0476d3c">W18-02</url>
-      <editor><first>Tommi A.</first><last>Pirinen</last></editor>
+      <editor id="tommi-a-pirinen"><first>Tommi A.</first><last>Pirinen</last></editor>
       <editor><first>Michael</first><last>Rießler</last></editor>
       <editor><first>Jack</first><last>Rueter</last></editor>
       <editor><first>Trond</first><last>Trosterud</last></editor>
-      <editor><first>Francis M.</first><last>Tyers</last></editor>
+      <editor id="francis-tyers"><first>Francis M.</first><last>Tyers</last></editor>
       <doi>10.18653/v1/W18-02</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Helsinki, Finland</address>
@@ -125,7 +125,7 @@
     <paper id="1">
       <title>Dependency Parsing of Code-Switching Data with Cross-Lingual Feature Representations</title>
       <author><first>Niko</first><last>Partanen</last></author>
-      <author><first>Kyungtae</first><last>Lim</last></author>
+      <author id="kyungtae-lim"><first>Kyungtae</first><last>Lim</last></author>
       <author><first>Michael</first><last>Rießler</last></author>
       <author><first>Thierry</first><last>Poibeau</last></author>
       <pages>1–17</pages>
@@ -199,7 +199,7 @@
     </paper>
     <paper id="9">
       <title>Initial Experiments in Data-Driven Morphological Analysis for <fixed-case>F</fixed-case>innish</title>
-      <author><first>Miikka</first><last>Silfverberg</last></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last></author>
       <author><first>Mans</first><last>Hulden</last></author>
       <pages>98–105</pages>
       <url hash="03816814">W18-0209</url>
@@ -225,7 +225,7 @@
     </paper>
     <paper id="12">
       <title>Parallel Forms in <fixed-case>E</fixed-case>stonian Finite State Morphology</title>
-      <author><first>Heiki-Jaan</first><last>Kaalep</last></author>
+      <author id="heiki-jaan-kaalep"><first>Heiki-Jaan</first><last>Kaalep</last></author>
       <pages>139–153</pages>
       <url hash="a6fd1d41">W18-0212</url>
       <doi>10.18653/v1/W18-0212</doi>
@@ -252,7 +252,7 @@
   <volume id="3" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Society for Computation in Linguistics (<fixed-case>SC</fixed-case>i<fixed-case>L</fixed-case>) 2018</booktitle>
-      <editor><first>Gaja</first><last>Jarosz</last></editor>
+      <editor id="gaja-jarosz"><first>Gaja</first><last>Jarosz</last></editor>
       <editor><first>Brendan</first><last>O’Connor</last></editor>
       <editor><first>Joe</first><last>Pater</last></editor>
       <venue>scil</venue>
@@ -298,7 +298,7 @@
     <paper id="4">
       <title>Modeling the Decline in <fixed-case>E</fixed-case>nglish Passivization</title>
       <author><first>Liwen</first><last>Hou</last></author>
-      <author><first>David</first><last>Smith</last></author>
+      <author id="david-a-smith"><first>David</first><last>Smith</last></author>
       <pages>34-43</pages>
       <doi>10.7275/R5ZC812C</doi>
       <url hash="00c57249">W18-0304</url>
@@ -314,7 +314,7 @@
     </paper>
     <paper id="6">
       <title>A bidirectional mapping between <fixed-case>E</fixed-case>nglish and <fixed-case>CNF</fixed-case>-based reasoners</title>
-      <author><first>Steven</first><last>Abney</last></author>
+      <author id="steven-abney"><first>Steven</first><last>Abney</last></author>
       <pages>55-63</pages>
       <doi>10.7275/R5PZ571N</doi>
       <url hash="88b053bf">W18-0306</url>
@@ -332,7 +332,7 @@
     <paper id="8">
       <title>Differentiating Phrase Structure Parsing and Memory Retrieval in the Brain</title>
       <author><first>Shohini</first><last>Bhattasali</last></author>
-      <author><first>John</first><last>Hale</last></author>
+      <author id="john-hale"><first>John</first><last>Hale</last></author>
       <author><first>Christophe</first><last>Pallier</last></author>
       <author><first>Jonathan</first><last>Brennan</last></author>
       <author><first>Wen-Ming</first><last>Luh</last></author>
@@ -369,7 +369,7 @@
     </paper>
     <paper id="12">
       <title>Conditions on abruptness in a gradient-ascent Maximum Entropy learner</title>
-      <author><first>Elliott</first><last>Moreton</last></author>
+      <author id="elliott-moreton"><first>Elliott</first><last>Moreton</last></author>
       <pages>113-124</pages>
       <doi>10.7275/R5XG9PBX</doi>
       <url hash="880ba256">W18-0312</url>
@@ -377,7 +377,7 @@
     </paper>
     <paper id="13">
       <title>Using Rhetorical Topics for Automatic Summarization</title>
-      <author><first>Natalie M.</first><last>Schrimpf</last></author>
+      <author id="natalie-m-schrimpf"><first>Natalie M.</first><last>Schrimpf</last></author>
       <pages>125-135</pages>
       <doi>10.7275/R5SQ8XM6</doi>
       <url hash="587f22cb">W18-0313</url>
@@ -385,8 +385,8 @@
     </paper>
     <paper id="14">
       <title>Sound Analogies with Phoneme Embeddings</title>
-      <author><first>Miikka P.</first><last>Silfverberg</last></author>
-      <author><first>Lingshuang</first><last>Mao</last></author>
+      <author id="miikka-silfverberg"><first>Miikka P.</first><last>Silfverberg</last></author>
+      <author id="lingshuang-jack-mao"><first>Lingshuang</first><last>Mao</last></author>
       <author><first>Mans</first><last>Hulden</last></author>
       <pages>136-144</pages>
       <doi>10.7275/R5NZ85VD</doi>
@@ -424,7 +424,7 @@
     <meta>
       <booktitle>Proceedings of the Thirteenth Workshop on Innovative Use of <fixed-case>NLP</fixed-case> for Building Educational Applications</booktitle>
       <url hash="6a2eb493">W18-05</url>
-      <editor><first>Joel</first><last>Tetreault</last></editor>
+      <editor id="joel-tetreault"><first>Joel</first><last>Tetreault</last></editor>
       <editor><first>Jill</first><last>Burstein</last></editor>
       <editor><first>Ekaterina</first><last>Kochmar</last></editor>
       <editor><first>Claudia</first><last>Leacock</last></editor>
@@ -455,9 +455,9 @@
     <paper id="2">
       <title>Using Paraphrasing and Memory-Augmented Models to Combat Data Sparsity in Question Interpretation with a Virtual Patient Dialogue System</title>
       <author><first>Lifeng</first><last>Jin</last></author>
-      <author><first>David</first><last>King</last></author>
+      <author id="david-king"><first>David</first><last>King</last></author>
       <author><first>Amad</first><last>Hussein</last></author>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <author><first>Douglas</first><last>Danforth</last></author>
       <pages>13–23</pages>
       <abstract>When interpreting questions in a virtual patient dialogue system one must inevitably tackle the challenge of a long tail of relatively infrequently asked questions. To make progress on this challenge, we investigate the use of paraphrasing for data augmentation and neural memory-based classification, finding that the two methods work best in combination. In particular, we find that the neural memory-based approach not only outperforms a straight CNN classifier on low frequency questions, but also takes better advantage of the augmented data created by paraphrasing, together yielding a nearly 10% absolute improvement in accuracy on the least frequently asked questions.</abstract>
@@ -480,7 +480,7 @@
       <title>Automatic Input Enrichment for Selecting Reading Material: An Online Study with <fixed-case>E</fixed-case>nglish Teachers</title>
       <author><first>Maria</first><last>Chinkina</last></author>
       <author><first>Ankita</first><last>Oswal</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <pages>35–44</pages>
       <abstract>Input material at the appropriate level is crucial for language acquisition. Automating the search for such material can systematically and efficiently support teachers in their pedagogical practice. This is the goal of the computational linguistic task of automatic input enrichment (Chinkina &amp; Meurers, 2016): It analyzes and re-ranks a collection of texts in order to prioritize those containing target linguistic forms. In the online study described in the paper, we collected 240 responses from English teachers in order to investigate whether they preferred automatic input enrichment over web search when selecting reading material for class. Participants demonstrated a general preference for the material provided by an automatic input enrichment system. It was also rated significantly higher than the texts retrieved by a standard web search engine with regard to the representation of linguistic forms and equivalent with regard to the relevance of the content to the topic. We discuss the implications of the results for language teaching and consider the potential strands of future research.</abstract>
       <url hash="6a3d7f69">W18-0504</url>
@@ -490,7 +490,7 @@
     <paper id="5">
       <title>Estimating Linguistic Complexity for Science Texts</title>
       <author><first>Farah</first><last>Nadeem</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
       <pages>45–55</pages>
       <abstract>Evaluation of text difficulty is important both for downstream tasks like text simplification, and for supporting educators in classrooms. Existing work on automated text complexity analysis uses linear models with engineered knowledge-driven features as inputs. While this offers interpretability, these models have lower accuracy for shorter texts. Traditional readability metrics have the additional drawback of not generalizing to informational texts such as science. We propose a neural approach, training on science and other informational texts, to mitigate both problems. Our results show that neural methods outperform knowledge-based linear models for short texts, and have the capacity to generalize to genres not present in the training data.</abstract>
       <url hash="9f19521a">W18-0505</url>
@@ -522,11 +522,11 @@
     <paper id="7">
       <title>A Report on the Complex Word Identification Shared Task 2018</title>
       <author><first>Seid Muhie</first><last>Yimam</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
-      <author><first>Shervin</first><last>Malmasi</last></author>
-      <author><first>Gustavo</first><last>Paetzold</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></author>
+      <author id="gustavo-paetzold"><first>Gustavo</first><last>Paetzold</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
-      <author><first>Sanja</first><last>Štajner</last></author>
+      <author id="sanja-stajner"><first>Sanja</first><last>Štajner</last></author>
       <author><first>Anaïs</first><last>Tack</last></author>
       <author><first>Marcos</first><last>Zampieri</last></author>
       <pages>66–78</pages>
@@ -548,9 +548,9 @@
     <paper id="9">
       <title><fixed-case>COAST</fixed-case> - Customizable Online Syllable Enhancement in Texts. A flexible framework for automatically enhancing reading materials</title>
       <author><first>Heiko</first><last>Holz</last></author>
-      <author><first>Zarah</first><last>Weiss</last></author>
+      <author id="zarah-weiss"><first>Zarah</first><last>Weiss</last></author>
       <author><first>Oliver</first><last>Brehm</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <pages>89–100</pages>
       <abstract>This paper presents COAST, a web-based application to easily and automatically enhance syllable structure, word stress, and spacing in texts, that was designed in close collaboration with learning therapists to ensure its practical relevance. Such syllable-enhanced texts are commonly used in learning therapy or private tuition to promote the recognition of syllables in order to improve reading and writing skills. In a state of the art solutions for automatic syllable enhancement, we put special emphasis on syllable stress and support specific marking of the primary syllable stress in words. Core features of our tool are i) a highly customizable text enhancement and template functionality, and ii) a novel crowd-sourcing mechanism that we employ to address the issue of data sparsity in language resources. We successfully tested COAST with real-life practitioners in a series of user tests validating the concept of our framework.</abstract>
       <url hash="e886d159">W18-0509</url>
@@ -570,7 +570,7 @@
     <paper id="11">
       <title>Annotating Student Talk in Text-based Classroom Discussions</title>
       <author><first>Luca</first><last>Lugini</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <author><first>Amanda</first><last>Godley</last></author>
       <author><first>Christopher</first><last>Olshefski</last></author>
       <pages>110–116</pages>
@@ -591,12 +591,12 @@
     </paper>
     <paper id="13">
       <title>Generating Feedback for <fixed-case>E</fixed-case>nglish Foreign Language Exercises</title>
-      <author><first>Björn</first><last>Rudzewitz</last></author>
+      <author id="bjorn-rudzewitz"><first>Björn</first><last>Rudzewitz</last></author>
       <author><first>Ramon</first><last>Ziai</last></author>
       <author><first>Kordula</first><last>De Kuthy</last></author>
       <author><first>Verena</first><last>Möller</last></author>
       <author><first>Florian</first><last>Nuxoll</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <pages>127–136</pages>
       <abstract>While immediate feedback on learner language is often discussed in the Second Language Acquisition literature (e.g., Mackey 2006), few systems used in real-life educational settings provide helpful, metalinguistic feedback to learners. In this paper, we present a novel approach leveraging task information to generate the expected range of well-formed and ill-formed variability in learner answers along with the required diagnosis and feedback. We combine this offline generation approach with an online component that matches the actual student answers against the pre-computed hypotheses. The results obtained for a set of 33 thousand answers of 7th grade German high school students learning English show that the approach successfully covers frequent answer patterns. At the same time, paraphrases and content errors require a more flexible alignment approach, for which we are planning to complement the method with the CoMiC approach successfully used for the analysis of reading comprehension answers (Meurers et al., 2011).</abstract>
       <url hash="2d6c8df0">W18-0513</url>
@@ -608,7 +608,7 @@
       <author><first>Anaïs</first><last>Tack</last></author>
       <author><first>Thomas</first><last>François</last></author>
       <author><first>Piet</first><last>Desmet</last></author>
-      <author><first>Cédrick</first><last>Fairon</last></author>
+      <author id="cedrick-fairon"><first>Cédrick</first><last>Fairon</last></author>
       <pages>137–146</pages>
       <abstract>In this paper, we introduce NT2Lex, a novel lexical resource for Dutch as a foreign language (NT2) which includes frequency distributions of 17,743 words and expressions attested in expert-written textbook texts and readers graded along the scale of the Common European Framework of Reference (CEFR). In essence, the lexicon informs us about what kind of vocabulary should be understood when reading Dutch as a non-native reader at a particular proficiency level. The main novelty of the resource with respect to the previously developed CEFR-graded lexicons concerns the introduction of corpus-based evidence for L2 word sense complexity through the linkage to Open Dutch WordNet (Postma et al., 2016). The resource thus contains, on top of the lemmatised and part-of-speech tagged lexical entries, a total of 11,999 unique word senses and 8,934 distinct synsets.</abstract>
       <url hash="3b88c4f9">W18-0514</url>
@@ -640,7 +640,7 @@
     </paper>
     <paper id="17">
       <title><fixed-case>L</fixed-case>a<fixed-case>STUS</fixed-case>/<fixed-case>TALN</fixed-case> at Complex Word Identification (<fixed-case>CWI</fixed-case>) 2018 Shared Task</title>
-      <author><first>Ahmed</first><last>AbuRa’ed</last></author>
+      <author id="ahmed-aburaed"><first>Ahmed</first><last>AbuRa’ed</last></author>
       <author><first>Horacio</first><last>Saggion</last></author>
       <pages>159–165</pages>
       <abstract>This paper presents the participation of the LaSTUS/TALN team in the Complex Word Identification (CWI) Shared Task 2018 in the English monolingual track . The purpose of the task was to determine if a word in a given sentence can be judged as complex or not by a certain target audience. For the English track, task organizers provided a training and a development datasets of 27,299 and 3,328 words respectively together with the sentence in which each word occurs. The words were judged as complex or not by 20 human evaluators; ten of whom are natives. We submitted two systems: one system modeled each word to evaluate as a numeric vector populated with a set of lexical, semantic and contextual features while the other system relies on a word embedding representation and a distance metric. We trained two separate classifiers to automatically decide if each word is complex or not. We submitted six runs, two for each of the three subsets of the English monolingual CWI track.</abstract>
@@ -660,7 +660,7 @@
     </paper>
     <paper id="19">
       <title><fixed-case>U</fixed-case>nibuc<fixed-case>K</fixed-case>ernel: A kernel-based learning method for complex word identification</title>
-      <author><first>Andrei</first><last>Butnaru</last></author>
+      <author id="andrei-butnaru"><first>Andrei</first><last>Butnaru</last></author>
       <author><first>Radu Tudor</first><last>Ionescu</last></author>
       <pages>175–183</pages>
       <abstract>In this paper, we present a kernel-based learning approach for the 2018 Complex Word Identification (CWI) Shared Task. Our approach is based on combining multiple low-level features, such as character n-grams, with high-level semantic features that are either automatically learned using word embeddings or extracted from a lexical knowledge base, namely WordNet. After feature extraction, we employ a kernel method for the learning phase. The feature matrix is first transformed into a normalized kernel matrix. For the binary classification task (simple versus complex), we employ Support Vector Machines. For the regression task, in which we have to predict the complexity level of a word (a word is more complex if it is labeled as complex by more annotators), we employ v-Support Vector Regression. We applied our approach only on the three English data sets containing documents from Wikipedia, WikiNews and News domains. Our best result during the competition was the third place on the English Wikipedia data set. However, in this paper, we also report better post-competition results.</abstract>
@@ -693,7 +693,7 @@
       <author><first>Nikhil</first><last>Wani</last></author>
       <author><first>Sandeep</first><last>Mathias</last></author>
       <author><first>Jayashree Aanand</first><last>Gajjam</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>200–205</pages>
       <abstract>In this paper, we present an effective system using voting ensemble classifiers to detect contextually complex words for non-native English speakers. To make the final decision, we channel a set of eight calibrated classifiers based on lexical, size and vocabulary features and train our model with annotated datasets collected from a mixture of native and non-native speakers. Thereafter, we test our system on three datasets namely News, WikiNews, and Wikipedia and report competitive results with an F1-Score ranging between 0.777 to 0.855 for each of the datasets. Our system outperforms multiple other models and falls within 0.042 to 0.026 percent of the best-performing model’s score in the shared task.</abstract>
       <url hash="5e857c2c">W18-0522</url>
@@ -703,8 +703,8 @@
     <paper id="23">
       <title>Grotoco@<fixed-case>SLAM</fixed-case>: Second Language Acquisition Modeling with Simple Features, Learners and Task-wise Models</title>
       <author><first>Sigrid</first><last>Klerke</last></author>
-      <author><first>Héctor</first><last>Martínez Alonso</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="hector-martinez-alonso"><first>Héctor</first><last>Martínez Alonso</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>206–211</pages>
       <abstract>We present our submission to the 2018 Duolingo Shared Task on Second Language Acquisition Modeling (SLAM). We focus on evaluating a range of features for the task, including user-derived measures, while examining how far we can get with a simple linear classifier. Our analysis reveals that errors differ per exercise format, which motivates our final and best-performing system: a task-wise (per exercise-format) model.</abstract>
       <url hash="91374f77">W18-0523</url>
@@ -760,7 +760,7 @@
     <paper id="28">
       <title>Annotation and Classification of Sentence-level Revision Improvement</title>
       <author><first>Tazin</first><last>Afrin</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <pages>240–246</pages>
       <abstract>Studies of writing revisions rarely focus on revision quality. To address this issue, we introduce a corpus of between-draft revisions of student argumentative essays, annotated as to whether each revision improves essay quality. We demonstrate a potential usage of our annotations by developing a machine learning model to predict revision improvement. With the goal of expanding training data, we also extract revisions from a dataset edited by expert proofreaders. Our results indicate that blending expert and non-expert revisions increases model performance, with expert data particularly important for predicting low-quality revisions.</abstract>
       <url hash="5b91a35c">W18-0528</url>
@@ -770,7 +770,7 @@
     <paper id="29">
       <title>Language Model Based Grammatical Error Correction without Annotated Training Data</title>
       <author><first>Christopher</first><last>Bryant</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <pages>247–253</pages>
       <abstract>Since the end of the CoNLL-2014 shared task on grammatical error correction (GEC), research into language model (LM) based approaches to GEC has largely stagnated. In this paper, we re-examine LMs in GEC and show that it is entirely possible to build a simple system that not only requires minimal annotated data (∼1000 sentences), but is also fairly competitive with several state-of-the-art systems. This approach should be of particular interest for languages where very little annotated training data exists, although we also hope to use it as a baseline to motivate future research.</abstract>
       <url hash="a0b04900">W18-0529</url>
@@ -791,7 +791,7 @@
       <title>Automated Content Analysis: A Case Study of Computer Science Student Summaries</title>
       <author><first>Yanjun</first><last>Gao</last></author>
       <author><first>Patricia M.</first><last>Davies</last></author>
-      <author><first>Rebecca J.</first><last>Passonneau</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca J.</first><last>Passonneau</last></author>
       <pages>264–272</pages>
       <abstract>Technology is transforming Higher Education learning and teaching. This paper reports on a project to examine how and why automated content analysis could be used to assess precis writing by university students. We examine the case of one hundred and twenty-two summaries written by computer science freshmen. The texts, which had been hand scored using a teacher-designed rubric, were autoscored using the Natural Language Processing software, PyrEval. Pearson’s correlation coefficient and Spearman rank correlation were used to analyze the relationship between the teacher score and the PyrEval score for each summary. Three content models automatically constructed by PyrEval from different sets of human reference summaries led to consistent correlations, showing that the approach is reliable. Also observed was that, in cases where the focus of student assessment centers on formative feedback, categorizing the PyrEval scores by examining the average and standard deviations could lead to novel interpretations of their relationships. It is suggested that this project has implications for the ways in which automated content analysis could be used to help university students improve their summarization skills.</abstract>
       <url hash="2d6f00fc">W18-0531</url>
@@ -801,7 +801,7 @@
     <paper id="32">
       <title>Toward Data-Driven Tutorial Question Answering with Deep Learning Conversational Models</title>
       <author><first>Mayank</first><last>Kulkarni</last></author>
-      <author><first>Kristy</first><last>Boyer</last></author>
+      <author id="kristy-boyer"><first>Kristy</first><last>Boyer</last></author>
       <pages>273–283</pages>
       <abstract>There has been an increase in popularity of data-driven question answering systems given their recent success. This pa-per explores the possibility of building a tutorial question answering system for Java programming from data sampled from a community-based question answering forum. This paper reports on the creation of a dataset that could support building such a tutorial question answering system and discusses the methodology to create the 106,386 question strong dataset. We investigate how retrieval-based and generative models perform on the given dataset. The work also investigates the usefulness of using hybrid approaches such as combining retrieval-based and generative models. The results indicate that building data-driven tutorial systems using community-based question answering forums holds significant promise.</abstract>
       <url hash="53184f5a">W18-0532</url>
@@ -824,9 +824,9 @@
     </paper>
     <paper id="34">
       <title>A <fixed-case>P</fixed-case>ortuguese Native Language Identification Dataset</title>
-      <author><first>Iria</first><last>del Río Gayo</last></author>
+      <author id="iria-del-rio-gayo"><first>Iria</first><last>del Río Gayo</last></author>
       <author><first>Marcos</first><last>Zampieri</last></author>
-      <author><first>Shervin</first><last>Malmasi</last></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></author>
       <pages>291–296</pages>
       <abstract>In this paper we present NLI-PT, the first Portuguese dataset compiled for Native Language Identification (NLI), the task of identifying an author’s first language based on their second language writing. The dataset includes 1,868 student essays written by learners of European Portuguese, native speakers of the following L1s: Chinese, English, Spanish, German, Russian, French, Japanese, Italian, Dutch, Tetum, Arabic, Polish, Korean, Romanian, and Swedish. NLI-PT includes the original student text and four different types of annotation: POS, fine-grained POS, constituency parses, and dependency parses. NLI-PT can be used not only in NLI but also in research on several topics in the field of Second Language Acquisition and educational NLP. We discuss possible applications of this dataset and present the results obtained for the first lexical baseline system for Portuguese NLI.</abstract>
       <url hash="99d12df8">W18-0534</url>
@@ -848,8 +848,8 @@
       <author><first>Meng</first><last>Zhang</last></author>
       <author><first>Xie</first><last>Chen</last></author>
       <author><first>Ronan</first><last>Cummins</last></author>
-      <author><first>Øistein E.</first><last>Andersen</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="oistein-e-andersen"><first>Øistein E.</first><last>Andersen</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <pages>305–314</pages>
       <abstract>Some language exams have multiple writing tasks. When a learner writes multiple texts in a language exam, it is not surprising that the quality of these texts tends to be similar, and the existing automated text scoring (ATS) systems do not explicitly model this similarity. In this paper, we suggest that it could be useful to include the other texts written by this learner in the same exam as extra references in an ATS system. We propose various approaches of fusing information from multiple tasks and pass this authorship knowledge into our ATS model on six different datasets. We show that this can positively affect the model performance at a global level.</abstract>
       <url hash="f19d105b">W18-0536</url>
@@ -871,7 +871,7 @@
       <author><first>Segun Taofeek</first><last>Aroyehun</last></author>
       <author><first>Jason</first><last>Angel</last></author>
       <author><first>Daniel Alejandro</first><last>Pérez Alvarez</last></author>
-      <author><first>Alexander</first><last>Gelbukh</last></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last></author>
       <pages>322–327</pages>
       <abstract>We describe the systems of NLP-CIC team that participated in the Complex Word Identification (CWI) 2018 shared task. The shared task aimed to benchmark approaches for identifying complex words in English and other languages from the perspective of non-native speakers. Our goal is to compare two approaches: feature engineering and a deep neural network. Both approaches achieved comparable performance on the English test set. We demonstrated the flexibility of the deep-learning approach by using the same deep neural network setup in the Spanish track. Our systems achieved competitive results: all our systems were within 0.01 of the system with the best macro-F1 score on the test sets except on Wikipedia test set, on which our best system is 0.04 below the best macro-F1 score.</abstract>
       <url hash="a7313f78">W18-0538</url>
@@ -900,7 +900,7 @@
     </paper>
     <paper id="41">
       <title>Complex Word Identification Using Character n-grams</title>
-      <author><first>Maja</first><last>Popović</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
       <pages>341–348</pages>
       <abstract>This paper investigates the use of character n-gram frequencies for identifying complex words in English, German and Spanish texts. The approach is based on the assumption that complex words are likely to contain different character sequences than simple words. The multinomial Naive Bayes classifier was used with n-grams of different lengths as features, and the best results were obtained for the combination of 2-grams and 4-grams. This variant was submitted to the Complex Word Identification Shared Task 2018 for all texts and achieved F-scores between 70% and 83%. The system was ranked in the middle range for all English texts, as third of fourteen submissions for German, and as tenth of seventeen submissions for Spanish. The method is not very convenient for the cross-language task, achieving only 59% on the French text.</abstract>
       <url hash="b8188036">W18-0541</url>
@@ -980,7 +980,7 @@
     <paper id="49">
       <title>Co-Attention Based Neural Network for Source-Dependent Essay Scoring</title>
       <author><first>Haoran</first><last>Zhang</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <pages>399–409</pages>
       <abstract>This paper presents an investigation of using a co-attention based neural network for source-dependent essay scoring. We use a co-attention mechanism to help the model learn the importance of each part of the essay more accurately. Also, this paper shows that the co-attention based neural network model provides reliable score prediction of source-dependent responses. We evaluate our model on two source-dependent response corpora. Results show that our model outperforms the baseline on both corpora. We also show that the attention of the model is similar to the expert opinions with examples.</abstract>
       <url hash="2132e02c">W18-0549</url>
@@ -1007,7 +1007,7 @@
       <url hash="589cca75">W18-06</url>
       <editor><first>Kate</first><last>Loveys</last></editor>
       <editor><first>Kate</first><last>Niederhoffer</last></editor>
-      <editor><first>Emily</first><last>Prud’hommeaux</last></editor>
+      <editor id="emily-prudhommeaux"><first>Emily</first><last>Prud’hommeaux</last></editor>
       <editor><first>Rebecca</first><last>Resnik</last></editor>
       <editor><first>Philip</first><last>Resnik</last></editor>
       <doi>10.18653/v1/W18-06</doi>
@@ -1049,7 +1049,7 @@
       <author><first>Suraj</first><last>Nair</last></author>
       <author><first>Ayah</first><last>Zirikly</last></author>
       <author><first>Meir</first><last>Friedenberg</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <author><first>Philip</first><last>Resnik</last></author>
       <pages>25–36</pages>
       <abstract>We report on the creation of a dataset for studying assessment of suicide risk via online postings in Reddit. Evaluation of risk-level annotations by experts yields what is, to our knowledge, the first demonstration of reliability in risk assessment by clinicians based on social media postings. We also introduce and demonstrate the value of a new, detailed rubric for assessing suicide risk, compare crowdsourced with expert performance, and present baseline predictive modeling experiments using the new dataset, which will be made available to researchers through the American Association of Suicidology.</abstract>
@@ -1064,7 +1064,7 @@
       <author><first>Kate</first><last>Niederhoffer</last></author>
       <author><first>Kate</first><last>Loveys</last></author>
       <author><first>Philip</first><last>Resnik</last></author>
-      <author><first>H. Andrew</first><last>Schwartz</last></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last></author>
       <pages>37–46</pages>
       <abstract>We describe the shared task for the CLPsych 2018 workshop, which focused on predicting current and future psychological health from an essay authored in childhood. Language-based predictions of a person’s current health have the potential to supplement traditional psychological assessment such as questionnaires, improving intake risk measurement and monitoring. Predictions of future psychological health can aid with both early detection and the development of preventative care. Research into the mental health trajectory of people, beginning from their childhood, has thus far been an area of little work within the NLP community. This shared task represents one of the first attempts to evaluate the use of early language to predict future health; this has the potential to support a wide variety of clinical health care tasks, from early assessment of lifetime risk for mental health problems, to optimal timing for targeted interventions aimed at both prevention and treatment.</abstract>
       <url hash="8bd5a141">W18-0604</url>
@@ -1074,7 +1074,7 @@
     <paper id="5">
       <title>An Approach to the <fixed-case>CLP</fixed-case>sych 2018 Shared Task Using Top-Down Text Representation and Simple Bottom-Up Model Selection</title>
       <author><first>Micah</first><last>Iserman</last></author>
-      <author><first>Molly</first><last>Ireland</last></author>
+      <author id="molly-ireland"><first>Molly</first><last>Ireland</last></author>
       <author><first>Andrew</first><last>Littlefield</last></author>
       <author><first>Tyler</first><last>Davis</last></author>
       <author><first>Sage</first><last>Maliepaard</last></author>
@@ -1088,9 +1088,9 @@
       <title>Using contextual information for automatic triage of posts in a peer-support forum</title>
       <author><first>Edgar</first><last>Altszyler</last></author>
       <author><first>Ariel J.</first><last>Berenstein</last></author>
-      <author><first>David</first><last>Milne</last></author>
+      <author id="david-n-milne"><first>David</first><last>Milne</last></author>
       <author><first>Rafael A.</first><last>Calvo</last></author>
-      <author><first>Diego</first><last>Fernandez Slezak</last></author>
+      <author id="diego-fernandez-slezak"><first>Diego</first><last>Fernandez Slezak</last></author>
       <pages>57–68</pages>
       <abstract>Mental health forums are online spaces where people can share their experiences anonymously and get peer support. These forums, require the supervision of moderators to provide support in delicate cases, such as posts expressing suicide ideation. The large increase in the number of forum users makes the task of the moderators unmanageable without the help of automatic triage systems. In the present paper, we present a Machine Learning approach for the triage of posts. Most approaches in the literature focus on the content of the posts, but only a few authors take advantage of features extracted from the context in which they appear. Our approach consists of the development and implementation of a large variety of new features from both, the content and the context of posts, such as previous messages, interaction with other users and author’s history. Our method has competed in the CLPsych 2017 Shared Task, obtaining the first place for several of the subtasks. Moreover, we also found that models that take advantage of post context improve significantly its performance in the detection of flagged posts (posts that require moderators attention), as well as those that focus on post content outperforms in the detection of most urgent events.</abstract>
       <url hash="ac08494f">W18-0606</url>
@@ -1102,7 +1102,7 @@
       <author><first>Julia</first><last>Ive</last></author>
       <author><first>George</first><last>Gkotsis</last></author>
       <author><first>Rina</first><last>Dutta</last></author>
-      <author><first>Robert</first><last>Stewart</last></author>
+      <author id="robert-stewart"><first>Robert</first><last>Stewart</last></author>
       <author><first>Sumithra</first><last>Velupillai</last></author>
       <pages>69–77</pages>
       <abstract>Mental health problems represent a major public health challenge. Automated analysis of text related to mental health is aimed to help medical decision-making, public health policies and to improve health care. Such analysis may involve text classification. Traditionally, automated classification has been performed mainly using machine learning methods involving costly feature engineering. Recently, the performance of those methods has been dramatically improved by neural methods. However, mainly Convolutional neural networks (CNNs) have been explored. In this paper, we apply a hierarchical Recurrent neural network (RNN) architecture with an attention mechanism on social media data related to mental health. We show that this architecture improves overall classification results as compared to previously reported results on the same data. Benefitting from the attention mechanism, it can also efficiently select text elements crucial for classification decisions, which can also be used for in-depth analysis.</abstract>
@@ -1114,7 +1114,7 @@
       <title>Cross-cultural differences in language markers of depression online</title>
       <author><first>Kate</first><last>Loveys</last></author>
       <author><first>Jonathan</first><last>Torrez</last></author>
-      <author><first>Alex</first><last>Fine</last></author>
+      <author id="alex-fine"><first>Alex</first><last>Fine</last></author>
       <author><first>Glen</first><last>Moriarty</last></author>
       <author><first>Glen</first><last>Coppersmith</last></author>
       <pages>78–87</pages>
@@ -1128,7 +1128,7 @@
       <author><first>Ahmed</first><last>Husseini Orabi</last></author>
       <author><first>Prasadith</first><last>Buddhitha</last></author>
       <author><first>Mahmoud</first><last>Husseini Orabi</last></author>
-      <author><first>Diana</first><last>Inkpen</last></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last></author>
       <pages>88–97</pages>
       <abstract>Mental illness detection in social media can be considered a complex task, mainly due to the complicated nature of mental disorders. In recent years, this research area has started to evolve with the continuous increase in popularity of social media platforms that became an integral part of people’s life. This close relationship between social media platforms and their users has made these platforms to reflect the users’ personal life with different limitations. In such an environment, researchers are presented with a wealth of information regarding one’s life. In addition to the level of complexity in identifying mental illnesses through social media platforms, adopting supervised machine learning approaches such as deep neural networks have not been widely accepted due to the difficulties in obtaining sufficient amounts of annotated training data. Due to these reasons, we try to identify the most effective deep neural network architecture among a few of selected architectures that were successfully used in natural language processing tasks. The chosen architectures are used to detect users with signs of mental illnesses (depression in our case) given limited unstructured text data extracted from the Twitter social media platform.</abstract>
       <url hash="1eef0871">W18-0609</url>
@@ -1137,9 +1137,9 @@
     </paper>
     <paper id="10">
       <title>Current and Future Psychological Health Prediction using Language and Socio-Demographics of Children for the <fixed-case>CLP</fixed-case>ysch 2018 Shared Task</title>
-      <author><first>Sharath Chandra</first><last>Guntuku</last></author>
+      <author id="sharath-chandra-guntuku"><first>Sharath Chandra</first><last>Guntuku</last></author>
       <author><first>Salvatore</first><last>Giorgi</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <pages>98–106</pages>
       <abstract>This article is a system description and report on the submission of a team from the University of Pennsylvania in the ’CLPsych 2018’ shared task. The goal of the shared task was to use childhood language as a marker for both current and future psychological health over individual lifetimes. Our system employs multiple textual features derived from the essays written and individuals’ socio-demographic variables at the age of 11. We considered several word clustering approaches, and explore the use of linear regression based on different feature sets. Our approach showed best results for predicting distress at the age of 42 and for predicting current anxiety on Disattenuated Pearson Correlation, and ranked fourth in the future health prediction task. In addition to the subtasks presented, we attempted to provide insight into mental health aspects at different ages. Our findings indicate that misspellings, words with illegible letters and increased use of personal pronouns are correlated with poor mental health at age 11, while descriptions about future physical activity, family and friends are correlated with good mental health.</abstract>
       <url hash="0f946145">W18-0610</url>
@@ -1199,7 +1199,7 @@
       <title>Automatic Detection of Incoherent Speech for Diagnosing Schizophrenia</title>
       <author><first>Dan</first><last>Iter</last></author>
       <author><first>Jong</first><last>Yoon</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <pages>136–146</pages>
       <abstract>Schizophrenia is a mental disorder which afflicts an estimated 0.7% of adults world wide. It affects many areas of mental function, often evident from incoherent speech. Diagnosing schizophrenia relies on subjective judgments resulting in disagreements even among trained clinicians. Recent studies have proposed the use of natural language processing for diagnosis by drawing on automatically-extracted linguistic features like discourse coherence and lexicon. Here, we present the first benchmark comparison of previously proposed coherence models for detecting symptoms of schizophrenia and evaluate their performance on a new dataset of recorded interviews between subjects and clinicians. We also present two alternative coherence metrics based on modern sentence embedding techniques that outperform the previous methods on our dataset. Lastly, we propose a novel computational model for reference incoherence based on ambiguous pronoun usage and show that it is a highly predictive feature on our data. While the number of subjects is limited in this pilot study, our results suggest new directions for diagnosing common symptoms of schizophrenia.</abstract>
       <url hash="9d13ba68">W18-0615</url>
@@ -1219,7 +1219,7 @@
       <author><first>Meredith</first><last>Cola</last></author>
       <author><first>Juhi</first><last>Pandey</last></author>
       <author><first>Edward S.</first><last>Brodkin</last></author>
-      <author><first>Robert T.</first><last>Schultz</last></author>
+      <author id="robert-t-schultz"><first>Robert T.</first><last>Schultz</last></author>
       <author><first>Birkan</first><last>Tunç</last></author>
       <pages>147–157</pages>
       <abstract>Autism spectrum disorder (ASD) is a neurodevelopmental condition characterized by impaired social communication and the presence of restricted, repetitive patterns of behaviors and interests. Prior research suggests that restricted patterns of behavior in ASD may be cross-domain phenomena that are evident in a variety of modalities. Computational studies of language in ASD provide support for the existence of an underlying dimension of restriction that emerges during a conversation. Similar evidence exists for restricted patterns of facial movement. Using tools from computational linguistics, computer vision, and information theory, this study tests whether cognitive-motor restriction can be detected across multiple behavioral domains in adults with ASD during a naturalistic conversation. Our methods identify restricted behavioral patterns, as measured by entropy in word use and mouth movement. Results suggest that adults with ASD produce significantly less diverse mouth movements and words than neurotypical adults, with an increased reliance on repeated patterns in both domains. The diversity values of the two domains are not significantly correlated, suggesting that they provide complementary information.</abstract>
@@ -1257,7 +1257,7 @@
       <title>Predicting Human Trustfulness from <fixed-case>F</fixed-case>acebook Language</title>
       <author><first>Mohammadzaman</first><last>Zamani</last></author>
       <author><first>Anneke</first><last>Buffone</last></author>
-      <author><first>H. Andrew</first><last>Schwartz</last></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last></author>
       <pages>174–181</pages>
       <abstract>Trustfulness — one’s general tendency to have confidence in unknown people or situations — predicts many important real-world outcomes such as mental health and likelihood to cooperate with others such as clinicians. While data-driven measures of interpersonal trust have previously been introduced, here, we develop the first language-based assessment of the personality trait of trustfulness by fitting one’s language to an accepted questionnaire-based trust score. Further, using trustfulness as a type of case study, we explore the role of questionnaire size as well as word count in developing language-based predictive models of users’ psychological traits. We find that leveraging a longer questionnaire can yield greater test set accuracy, while, for training, we find it beneficial to include users who took smaller questionnaires which offers more observations for training. Similarly, after noting a decrease in individual prediction error as word count increased, we found a word count-weighted training scheme was helpful when there were very few users in the first place.</abstract>
       <url hash="50bf3d8b">W18-0619</url>
@@ -1266,7 +1266,7 @@
     </paper>
     <paper id="20">
       <title>Within and Between-Person Differences in Language Used Across Anxiety Support and Neutral <fixed-case>R</fixed-case>eddit Communities</title>
-      <author><first>Molly</first><last>Ireland</last></author>
+      <author id="molly-ireland"><first>Molly</first><last>Ireland</last></author>
       <author><first>Micah</first><last>Iserman</last></author>
       <pages>182–193</pages>
       <abstract>Although many studies have distinguished between the social media language use of people who do and do not have a mental health condition, within-person context-sensitive comparisons (for example, analyzing individuals’ language use when seeking support or discussing neutral topics) are less common. Two dictionary-based analyses of Reddit communities compared (1) anxious individuals’ comments in anxiety support communities (e.g., /r/PanicParty) with the same users’ comments in neutral communities (e.g., /r/todayilearned), and, (2) within popular neutral communities, comments by members of anxiety subreddits with comments by other users. Each comparison yielded theory-consistent effects as well as unexpected results that suggest novel hypotheses to be tested in the future. Results have relevance for improving researchers’ and practitioners’ ability to unobtrusively assess anxiety symptoms in conversations that are not explicitly about mental health.</abstract>
@@ -1292,7 +1292,7 @@
     <meta>
       <booktitle>Proceedings of the First Workshop on Computational Models of Reference, Anaphora and Coreference</booktitle>
       <url hash="42117c1a">W18-07</url>
-      <editor><first>Massimo</first><last>Poesio</last></editor>
+      <editor id="massimo-poesio"><first>Massimo</first><last>Poesio</last></editor>
       <editor><first>Vincent</first><last>Ng</last></editor>
       <editor><first>Maciej</first><last>Ogrodniczuk</last></editor>
       <doi>10.18653/v1/W18-07</doi>
@@ -1308,7 +1308,7 @@
     </frontmatter>
     <paper id="1">
       <title>Anaphora Resolution for <fixed-case>T</fixed-case>witter Conversations: An Exploratory Study</title>
-      <author><first>Berfin</first><last>Aktaş</last></author>
+      <author id="berfin-aktas"><first>Berfin</first><last>Aktaş</last></author>
       <author><first>Tatjana</first><last>Scheffler</last></author>
       <author><first>Manfred</first><last>Stede</last></author>
       <pages>1–10</pages>
@@ -1322,8 +1322,8 @@
       <author><first>Massimo</first><last>Poesio</last></author>
       <author><first>Yulia</first><last>Grishina</last></author>
       <author><first>Varada</first><last>Kolhatkar</last></author>
-      <author><first>Nafise</first><last>Moosavi</last></author>
-      <author><first>Ina</first><last>Roesiger</last></author>
+      <author id="nafise-sadat-moosavi"><first>Nafise</first><last>Moosavi</last></author>
+      <author id="ina-roesiger"><first>Ina</first><last>Roesiger</last></author>
       <author><first>Adam</first><last>Roussel</last></author>
       <author><first>Fabian</first><last>Simonjetz</last></author>
       <author><first>Alexandra</first><last>Uma</last></author>
@@ -1338,7 +1338,7 @@
     </paper>
     <paper id="3">
       <title>Rule- and Learning-based Methods for Bridging Resolution in the <fixed-case>ARRAU</fixed-case> Corpus</title>
-      <author><first>Ina</first><last>Roesiger</last></author>
+      <author id="ina-roesiger"><first>Ina</first><last>Roesiger</last></author>
       <pages>23–33</pages>
       <abstract>We present two systems for bridging resolution, which we submitted to the CRAC shared task on bridging anaphora resolution in the ARRAU corpus (track 2): a rule-based approach following Hou et al. 2014 and a learning-based approach. The re-implementation of Hou et al. 2014 achieves very poor performance when being applied to ARRAU. We found that the reasons for this lie in the different bridging annotations: whereas the rule-based system suggests many referential bridging pairs, ARRAU contains mostly lexical bridging. We describe the differences between these two types of bridging and adapt the rule-based approach to be able to handle lexical bridging. The modified rule-based approach achieves reasonable performance on all (sub)-tasks and outperforms a simple learning-based approach.</abstract>
       <url hash="c49ea55b">W18-0703</url>
@@ -1356,10 +1356,10 @@
     </paper>
     <paper id="5">
       <title>Integrating Predictions from Neural-Network Relation Classifiers into Coreference and Bridging Resolution</title>
-      <author><first>Ina</first><last>Roesiger</last></author>
+      <author id="ina-roesiger"><first>Ina</first><last>Roesiger</last></author>
       <author><first>Maximilian</first><last>Köper</last></author>
       <author><first>Kim Anh</first><last>Nguyen</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>44–49</pages>
       <abstract>Cases of coreference and bridging resolution often require knowledge about semantic relations between anaphors and antecedents. We suggest state-of-the-art neural-network classifiers trained on relation benchmarks to predict and integrate likelihoods for relations. Two experiments with representations differing in noise and complexity improve our bridging but not our coreference resolver.</abstract>
       <url hash="57d5f3ac">W18-0705</url>
@@ -1369,7 +1369,7 @@
     <paper id="6">
       <title>Towards Bridging Resolution in <fixed-case>G</fixed-case>erman: Data Analysis and Rule-based Experiments</title>
       <author><first>Janis</first><last>Pagel</last></author>
-      <author><first>Ina</first><last>Roesiger</last></author>
+      <author id="ina-roesiger"><first>Ina</first><last>Roesiger</last></author>
       <pages>50–60</pages>
       <abstract>Bridging resolution is the task of recognising bridging anaphors and linking them to their antecedents. While there is some work on bridging resolution for English, there is only little work for German. We present two datasets which contain bridging annotations, namely DIRNDL and GRAIN, and compare the performance of a rule-based system with a simple baseline approach on these two corpora. The performance for full bridging resolution ranges between an F1 score of 13.6% for DIRNDL and 11.8% for GRAIN. An analysis using oracle lists suggests that the system could, to a certain extent, benefit from ranking and re-ranking antecedent candidates. Furthermore, we investigate the importance of single features and show that the features used in our work seem promising for future bridging resolution approaches.</abstract>
       <url hash="3e967a2f">W18-0706</url>
@@ -1410,7 +1410,7 @@
       <author><first>Jixing</first><last>Li</last></author>
       <author><first>Murielle</first><last>Fabre</last></author>
       <author><first>Wen-Ming</first><last>Luh</last></author>
-      <author><first>John</first><last>Hale</last></author>
+      <author id="john-hale"><first>John</first><last>Hale</last></author>
       <pages>87–96</pages>
       <abstract>Typological differences between English and Chinese suggest stronger reliance on salience of the antecedent during pronoun resolution in Chinese. We examined this hypothesis by correlating a difficulty measure of pronoun resolution derived by the activation-based ACT-R model with the brain activity of English and Chinese participants listening to a same audiobook during fMRI recording. The ACT-R model predicts higher overall difficulty for English speakers, which is supported at the brain level in left Broca’s area. More generally, it confirms that computational modeling approach is able to dissociate different dimensions that are involved in the complex process of pronoun resolution in the brain.</abstract>
       <url hash="6c2de313">W18-0710</url>
@@ -1523,7 +1523,7 @@
     <paper id="3">
       <title>Leveraging Syntactic Constructions for Metaphor Identification</title>
       <author><first>Kevin</first><last>Stowe</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <pages>17–26</pages>
       <abstract>Identification of metaphoric language in text is critical for generating effective semantic representations for natural language understanding. Computational approaches to metaphor identification have largely relied on heuristic based models or feature-based machine learning, using hand-crafted lexical resources coupled with basic syntactic information. However, recent work has shown the predictive power of syntactic constructions in determining metaphoric source and target domains (Sullivan 2013). Our work intends to explore syntactic constructions and their relation to metaphoric language. We undertake a corpus-based analysis of predicate-argument constructions and their metaphoric properties, and attempt to effectively represent syntactic constructions as features for metaphor processing, both in identifying source and target domains and in distinguishing metaphoric words from non-metaphoric.</abstract>
       <url hash="a2fa7647">W18-0903</url>
@@ -1533,7 +1533,7 @@
     <paper id="4">
       <title>Literal, Metphorical or Both? Detecting Metaphoricity in Isolated Adjective-Noun Phrases</title>
       <author><first>Agnieszka</first><last>Mykowiecka</last></author>
-      <author><first>Malgorzata</first><last>Marciniak</last></author>
+      <author id="malgorzata-marciniak"><first>Malgorzata</first><last>Marciniak</last></author>
       <author><first>Aleksander</first><last>Wawer</last></author>
       <pages>27–33</pages>
       <abstract>The paper addresses the classification of isolated Polish adjective-noun phrases according to their metaphoricity. We tested neural networks to predict if a phrase has a literal or metaphorical sense or can have both senses depending on usage. The input to the neural network consists of word embeddings, but we also tested the impact of information about the domain of the adjective and about the abstractness of the noun. We applied our solution to English data available on the Internet and compared it to results published in papers. We found that the solution based on word embeddings only can achieve results comparable with complex solutions requiring additional information.</abstract>
@@ -1563,7 +1563,7 @@
     </paper>
     <paper id="7">
       <title>A Report on the 2018 <fixed-case>VUA</fixed-case> Metaphor Detection Shared Task</title>
-      <author><first>Chee Wee (Ben)</first><last>Leong</last></author>
+      <author id="chee-wee-leong"><first>Chee Wee (Ben)</first><last>Leong</last></author>
       <author><first>Beata</first><last>Beigman Klebanov</last></author>
       <author><first>Ekaterina</first><last>Shutova</last></author>
       <pages>56–66</pages>
@@ -1596,7 +1596,7 @@
     <paper id="10">
       <title>Phrase-Level Metaphor Identification Using Distributed Representations of Word Meaning</title>
       <author><first>Omnia</first><last>Zayed</last></author>
-      <author><first>John Philip</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John Philip</first><last>McCrae</last></author>
       <author><first>Paul</first><last>Buitelaar</last></author>
       <pages>81–90</pages>
       <abstract>Metaphor is an essential element of human cognition which is often used to express ideas and emotions that might be difficult to express using literal language. Processing metaphoric language is a challenging task for a wide range of applications ranging from text simplification to psychotherapy. Despite the variety of approaches that are trying to process metaphor, there is still a need for better models that mimic the human cognition while exploiting fewer resources. In this paper, we present an approach based on distributional semantics to identify metaphors on the phrase-level. We investigated the use of different word embeddings models to identify verb-noun pairs where the verb is used metaphorically. Several experiments are conducted to show the performance of the proposed approach on benchmark datasets.</abstract>
@@ -1640,7 +1640,7 @@
     <paper id="14">
       <title>Di-<fixed-case>LSTM</fixed-case> Contrast : A Deep Neural Network for Metaphor Detection</title>
       <author><first>Krishnkant</first><last>Swarnkar</last></author>
-      <author><first>Anil Kumar</first><last>Singh</last></author>
+      <author id="anil-kumar-singh"><first>Anil Kumar</first><last>Singh</last></author>
       <pages>115–120</pages>
       <abstract>The contrast between the contextual and general meaning of a word serves as an important clue for detecting its metaphoricity. In this paper, we present a deep neural architecture for metaphor detection which exploits this contrast. Additionally, we also use cost-sensitive learning by re-weighting examples, and baseline features like concreteness ratings, POS and WordNet-based features. The best performing system of ours achieves an overall F1 score of 0.570 on All POS category and 0.605 on the Verbs category at the Metaphor Shared Task 2018.</abstract>
       <url hash="a34ebae5">W18-0914</url>
@@ -1662,7 +1662,7 @@
       <title>Detecting Figurative Word Occurrences Using Recurrent Neural Networks</title>
       <author><first>Agnieszka</first><last>Mykowiecka</last></author>
       <author><first>Aleksander</first><last>Wawer</last></author>
-      <author><first>Malgorzata</first><last>Marciniak</last></author>
+      <author id="malgorzata-marciniak"><first>Malgorzata</first><last>Marciniak</last></author>
       <pages>124–127</pages>
       <abstract>The paper addresses detection of figurative usage of words in English text. The chosen method was to use neural nets fed by pretrained word embeddings. The obtained results show that simple solutions, based on words embeddings only, are comparable to complex solutions, using many sources of information which are not available for languages less-studied than English.</abstract>
       <url hash="6ef24596">W18-0916</url>
@@ -1682,7 +1682,7 @@
     </paper>
     <paper id="18">
       <title>Using Language Learner Data for Metaphor Detection</title>
-      <author><first>Egon</first><last>Stemle</last></author>
+      <author id="egon-stemle"><first>Egon</first><last>Stemle</last></author>
       <author><first>Alexander</first><last>Onysko</last></author>
       <pages>133–138</pages>
       <abstract>This article describes the system that participated in the shared task on metaphor detection on the Vrije University Amsterdam Metaphor Corpus (VUA). The ST was part of the workshop on processing figurative language at the 16th annual conference of the North American Chapter of the Association for Computational Linguistics (NAACL2018). The system combines a small assertion of trending techniques, which implement matured methods from NLP and ML; in particular, the system uses word embeddings from standard corpora and from corpora representing different proficiency levels of language learners in a LSTM BiRNN architecture. The system is available under the APLv2 open-source license.</abstract>
@@ -1714,7 +1714,7 @@
       <author><first>Soumya</first><last>Wadhwa</last></author>
       <author><first>Varsha</first><last>Embar</last></author>
       <author><first>Matthias</first><last>Grabmair</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <pages>1–7</pages>
       <abstract>In this paper, we investigate the tendency of end-to-end neural Machine Reading Comprehension (MRC) models to match shallow patterns rather than perform inference-oriented reasoning on RC benchmarks. We aim to test the ability of these systems to answer questions which focus on referential inference. We propose ParallelQA, a strategy to formulate such questions using parallel passages. We also demonstrate that existing neural models fail to generalize well to this setting.</abstract>
       <url hash="54f6a11a">W18-1001</url>
@@ -1728,7 +1728,7 @@
       <author><first>Seyedarian</first><last>Hosseini</last></author>
       <author><first>Michael</first><last>Noukhovitch</last></author>
       <author><first>Yoshua</first><last>Bengio</last></author>
-      <author><first>Jackie</first><last>Cheung</last></author>
+      <author id="jackie-chi-kit-cheung"><first>Jackie</first><last>Cheung</last></author>
       <pages>8–16</pages>
       <abstract>Commonsense knowledge bases such as ConceptNet represent knowledge in the form of relational triples. Inspired by recent work by Li et al., we analyse if knowledge base completion models can be used to mine commonsense knowledge from raw text. We propose novelty of predicted triples with respect to the training set as an important factor in interpreting results. We critically analyse the difficulty of mining novel commonsense knowledge, and show that a simple baseline method that outperforms the previous state of the art on predicting more novel triples.</abstract>
       <url hash="fb16abc8">W18-1002</url>
@@ -1775,7 +1775,7 @@
       <url hash="0ea9f9bc">W18-11</url>
       <editor><first>Malvina</first><last>Nissim</last></editor>
       <editor><first>Viviana</first><last>Patti</last></editor>
-      <editor><first>Barbara</first><last>Plank</last></editor>
+      <editor id="barbara-plank"><first>Barbara</first><last>Plank</last></editor>
       <editor><first>Claudia</first><last>Wagner</last></editor>
       <doi>10.18653/v1/W18-11</doi>
       <publisher>Association for Computational Linguistics</publisher>
@@ -1812,7 +1812,7 @@
     <paper id="3">
       <title>Building an annotated dataset of app store reviews with Appraisal features in <fixed-case>E</fixed-case>nglish and <fixed-case>S</fixed-case>panish</title>
       <author><first>Natalia</first><last>Mora</last></author>
-      <author><first>Julia</first><last>Lavid-López</last></author>
+      <author id="julia-lavid-lopez"><first>Julia</first><last>Lavid-López</last></author>
       <pages>16–24</pages>
       <abstract>This paper describes the creation and annotation of a dataset consisting of 250 English and Spanish app store reviews from Google’s Play Store with Appraisal features. This is one of the most influential linguistic frameworks for the analysis of evaluation and opinion in discourse due to its insightful descriptive features. However, it has not been extensively applied in NLP in spite of its potential for the classification of the subjective content of these reviews. We describe the dataset, the annotation scheme and guidelines, the agreement studies, the annotation results and their impact on the characterisation of this genre.</abstract>
       <url hash="df48c9a1">W18-1103</url>
@@ -1823,7 +1823,7 @@
       <title>Enabling Deep Learning of Emotion With First-Person Seed Expressions</title>
       <author><first>Hassan</first><last>Alhuzali</last></author>
       <author><first>Muhammad</first><last>Abdul-Mageed</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <pages>25–35</pages>
       <abstract>The computational treatment of emotion in natural language text remains relatively limited, and Arabic is no exception. This is partly due to lack of labeled data. In this work, we describe and manually validate a method for the automatic acquisition of emotion labeled data and introduce a newly developed data set for Modern Standard and Dialectal Arabic emotion detection focused at Robert Plutchik’s 8 basic emotion types. Using a hybrid supervision method that exploits first person emotion seeds, we show how we can acquire promising results with a deep gated recurrent neural network. Our best model reaches 70% <i>F</i>-score, significantly (i.e., 11%, <tex-math>p &lt; 0.05</tex-math>) outperforming a competitive baseline. Applying our method and data on an external dataset of 4 emotions released around the same time we finalized our work, we acquire 7% absolute gain in <tex-math>F</tex-math>-score over a linear SVM classifier trained on gold data, thus validating our approach.</abstract>
       <url hash="ee4e5e4f">W18-1104</url>
@@ -1836,7 +1836,7 @@
       <author><first>Deepanshu</first><last>Vijay</last></author>
       <author><first>Vinay</first><last>Singh</last></author>
       <author><first>Syed Sarfaraz</first><last>Akhtar</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>36–41</pages>
       <abstract>Hate speech detection in social media texts is an important Natural language Processing task, which has several crucial applications like sentiment analysis, investigating cyberbullying and examining socio-political controversies. While relevant research has been done independently on code-mixed social media texts and hate speech detection, our work is the first attempt in detecting hate speech in Hindi-English code-mixed social media text. In this paper, we analyze the problem of hate speech detection in code-mixed texts and present a Hindi-English code-mixed dataset consisting of tweets posted online on Twitter. The tweets are annotated with the language at word level and the class they belong to (Hate Speech or Normal Speech). We also propose a supervised classification system for detecting hate speech in the text using various character level, word level, and lexicon based features.</abstract>
       <url hash="d8cb173e">W18-1105</url>
@@ -1885,7 +1885,7 @@
     <paper id="10">
       <title>Understanding the Effect of Gender and Stance in Opinion Expression in Debates on “Abortion”</title>
       <author><first>Esin</first><last>Durmus</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>69–75</pages>
       <abstract>In this paper, we focus on understanding linguistic differences across groups with different self-identified gender and stance in expressing opinions about ABORTION. We provide a new dataset consisting of users’ gender, stance on ABORTION as well as the debates in ABORTION drawn from debate.org. We use the gender and stance information to identify significant linguistic differences across individuals with different gender and stance. We show the importance of considering the stance information along with the gender since we observe significant linguistic differences across individuals with different stance even within the same gender group.</abstract>
       <url hash="e5c27c12">W18-1110</url>
@@ -1962,7 +1962,7 @@
       <booktitle>Proceedings of the Second Workshop on Subword/Character <fixed-case>LE</fixed-case>vel Models</booktitle>
       <url hash="4627b5fe">W18-12</url>
       <editor><first>Manaal</first><last>Faruqui</last></editor>
-      <editor><first>Hinrich</first><last>Schütze</last></editor>
+      <editor id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></editor>
       <editor><first>Isabel</first><last>Trancoso</last></editor>
       <editor><first>Yulia</first><last>Tsvetkov</last></editor>
       <editor><first>Yadollah</first><last>Yaghoobzadeh</last></editor>
@@ -1990,7 +1990,7 @@
     <paper id="2">
       <title>Entropy-Based Subword Mining with an Application to Word Embeddings</title>
       <author><first>Ahmed</first><last>El-Kishky</last></author>
-      <author><first>Frank</first><last>Xu</last></author>
+      <author id="frank-f-xu"><first>Frank</first><last>Xu</last></author>
       <author><first>Aston</first><last>Zhang</last></author>
       <author><first>Stephen</first><last>Macke</last></author>
       <author><first>Jiawei</first><last>Han</last></author>
@@ -2015,7 +2015,7 @@
     <paper id="4">
       <title>Addressing Low-Resource Scenarios with Character-aware Embeddings</title>
       <author><first>Sean</first><last>Papay</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <author><first>Ngoc Thang</first><last>Vu</last></author>
       <pages>32–37</pages>
       <abstract>Most modern approaches to computing word embeddings assume the availability of text corpora with billions of words. In this paper, we explore a setup where only corpora with millions of words are available, and many words in any new text are out of vocabulary. This setup is both of practical interests – modeling the situation for specific domains and low-resource languages – and of psycholinguistic interest, since it corresponds much more closely to the actual experiences and challenges of human language learning and use. We compare standard skip-gram word embeddings with character-based embeddings on word relatedness prediction. Skip-grams excel on large corpora, while character-based embeddings do well on small corpora generally and rare and complex words specifically. The models can be combined easily.</abstract>
@@ -2037,9 +2037,9 @@
     </paper>
     <paper id="6">
       <title>Discovering Phonesthemes with Sparse Regularization</title>
-      <author><first>Nelson F.</first><last>Liu</last></author>
-      <author><first>Gina-Anne</first><last>Levow</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="nelson-f-liu"><first>Nelson F.</first><last>Liu</last></author>
+      <author id="gina-anne-levow"><first>Gina-Anne</first><last>Levow</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>49–54</pages>
       <abstract>We introduce a simple method for extracting non-arbitrary form-meaning representations from a collection of semantic vectors. We treat the problem as one of feature selection for a model trained to predict word vectors from subword features. We apply this model to the problem of automatically discovering phonesthemes, which are submorphemic sound clusters that appear in words with similar meaning. Many of our model-predicted phonesthemes overlap with those proposed in the linguistics literature, and we validate our approach with human judgments.</abstract>
       <url hash="6383fdf4">W18-1206</url>
@@ -2049,7 +2049,7 @@
     <paper id="7">
       <title>Meaningless yet meaningful: Morphology grounded subword-level <fixed-case>NMT</fixed-case></title>
       <author><first>Tamali</first><last>Banerjee</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>55–60</pages>
       <abstract>We explore the use of two independent subsystems Byte Pair Encoding (BPE) and Morfessor as basic units for subword-level neural machine translation (NMT). We show that, for linguistically distant language-pairs Morfessor-based segmentation algorithm produces significantly better quality translation than BPE. However, for close language-pairs BPE-based subword-NMT may translate better than Morfessor-based subword-NMT. We propose a combined approach of these two segmentation algorithms Morfessor-BPE (M-BPE) which outperforms these two baseline systems in terms of BLEU score. Our results are supported by experiments on three language-pairs: English-Hindi, Bengali-Hindi and English-Bengali.</abstract>
       <url hash="9c0322df">W18-1207</url>
@@ -2082,7 +2082,7 @@
       <author><first>Shiran</first><last>Dudy</last></author>
       <author><first>Shaobin</first><last>Xu</last></author>
       <author><first>Steven</first><last>Bedrick</last></author>
-      <author><first>David</first><last>Smith</last></author>
+      <author id="david-a-smith"><first>David</first><last>Smith</last></author>
       <pages>72–77</pages>
       <abstract>Brain-computer interfaces and other augmentative and alternative communication devices introduce language-modeing challenges distinct from other character-entry methods. In particular, the acquired signal of the EEG (electroencephalogram) signal is noisier, which, in turn, makes the user intent harder to decipher. In order to adapt to this condition, we propose to maintain ambiguous history for every time step, and to employ, apart from the character language model, word information to produce a more robust prediction system. We present preliminary results that compare this proposed Online-Context Language Model (OCLM) to current algorithms that are used in this type of setting. Evaluation on both perplexity and predictive accuracy demonstrates promising results when dealing with ambiguous histories in order to provide to the front end a distribution of the next character the user might type.</abstract>
       <url hash="6bbb9a95">W18-1210</url>
@@ -2111,7 +2111,7 @@
       <title>Using Hedge Detection to Improve Committed Belief Tagging</title>
       <author><first>Morgan</first><last>Ulinski</last></author>
       <author><first>Seth</first><last>Benjamin</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
       <pages>1–5</pages>
       <abstract>We describe a novel method for identifying hedge terms using a set of manually constructed rules. We present experiments adding hedge features to a committed belief system to improve classification. We compare performance of this system (a) without hedging features, (b) with dictionary-based features, and (c) with rule-based features. We find that using hedge features improves performance of the committed belief system, particularly in identifying instances of non-committed belief and reported belief.</abstract>
       <url hash="a305a31a">W18-1301</url>
@@ -2131,7 +2131,7 @@
     <paper id="3">
       <title>Detecting Sarcasm is Extremely Easy ;-)</title>
       <author><first>Natalie</first><last>Parde</last></author>
-      <author><first>Rodney</first><last>Nielsen</last></author>
+      <author id="rodney-nielsen"><first>Rodney</first><last>Nielsen</last></author>
       <pages>21–26</pages>
       <abstract>Detecting sarcasm in text is a particularly challenging problem in computational semantics, and its solution may vary across different types of text. We analyze the performance of a domain-general sarcasm detection system on datasets from two very different domains: Twitter, and Amazon product reviews. We categorize the errors that we identify with each, and make recommendations for addressing these issues in NLP systems in the future.</abstract>
       <url hash="e830c3f0">W18-1303</url>
@@ -2164,8 +2164,8 @@
       <url hash="3985ffb5">W18-14</url>
       <editor><first>Parisa</first><last>Kordjamshidi</last></editor>
       <editor><first>Archna</first><last>Bhatia</last></editor>
-      <editor><first>James</first><last>Pustejovsky</last></editor>
-      <editor><first>Marie-Francine</first><last>Moens</last></editor>
+      <editor id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></editor>
+      <editor id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></editor>
       <doi>10.18653/v1/W18-14</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>New Orleans</address>
@@ -2181,7 +2181,7 @@
       <title>Exploring the Functional and Geometric Bias of Spatial Relations Using Neural Language Models</title>
       <author><first>Simon</first><last>Dobnik</last></author>
       <author><first>Mehdi</first><last>Ghanimifard</last></author>
-      <author><first>John</first><last>Kelleher</last></author>
+      <author id="john-kelleher"><first>John</first><last>Kelleher</last></author>
       <pages>1–11</pages>
       <abstract>The challenge for computational models of spatial descriptions for situated dialogue systems is the integration of information from different modalities. The semantics of spatial descriptions are grounded in at least two sources of information: (i) a geometric representation of space and (ii) the functional interaction of related objects that. We train several neural language models on descriptions of scenes from a dataset of image captions and examine whether the functional or geometric bias of spatial descriptions reported in the literature is reflected in the estimated perplexity of these models. The results of these experiments have implications for the creation of models of spatial lexical semantics for human-robot dialogue systems. Furthermore, they also provide an insight into the kinds of the semantic knowledge captured by neural language models trained on spatial descriptions, which has implications for image captioning systems.</abstract>
       <url hash="da14f455">W18-1401</url>
@@ -2191,7 +2191,7 @@
     <paper id="2">
       <title>Building and Learning Structures in a Situated Blocks World Through Deep Language Understanding</title>
       <author><first>Ian</first><last>Perera</last></author>
-      <author><first>James</first><last>Allen</last></author>
+      <author id="james-allen"><first>James</first><last>Allen</last></author>
       <author><first>Choh Man</first><last>Teng</last></author>
       <author><first>Lucian</first><last>Galescu</last></author>
       <pages>12–20</pages>
@@ -2203,7 +2203,7 @@
     <paper id="3">
       <title>Computational Models for Spatial Prepositions</title>
       <author><first>Georgiy</first><last>Platonov</last></author>
-      <author><first>Lenhart</first><last>Schubert</last></author>
+      <author id="lenhart-schubert"><first>Lenhart</first><last>Schubert</last></author>
       <pages>21–30</pages>
       <abstract>Developing computational models of spatial prepositions (such as on, in, above, etc.) is crucial for such tasks as human-machine collaboration, story understanding, and 3D model generation from descriptions. However, these prepositions are notoriously vague and ambiguous, with meanings depending on the types, shapes and sizes of entities in the argument positions, the physical and task context, and other factors. As a result truth value judgments for prepositional relations are often uncertain and variable. In this paper we treat the modeling task as calling for assignment of probabilities to such relations as a function of multiple factors, where such probabilities can be viewed as estimates of whether humans would judge the relations to hold in given circumstances. We implemented our models in a 3D blocks world and a room world in a computer graphics setting, and found that true/false judgments based on these models do not differ much more from human judgments that the latter differ from one another. However, what really matters pragmatically is not the accuracy of truth value judgments but whether, for instance, the computer models suffice for identifying objects described in terms of prepositional relations, (e.g., “the box to the left of the table”, where there are multiple boxes). For such tasks, our models achieved accuracies above 90% for most relations.</abstract>
       <url hash="bc248e0a">W18-1403</url>
@@ -2212,8 +2212,8 @@
     </paper>
     <paper id="4">
       <title>Lexical Conceptual Structure of Literal and Metaphorical Spatial Language: A Case Study of “Push”</title>
-      <author><first>Bonnie</first><last>Dorr</last></author>
-      <author><first>Mari</first><last>Olsen</last></author>
+      <author id="bonnie-dorr"><first>Bonnie</first><last>Dorr</last></author>
+      <author id="mari-broman-olsen"><first>Mari</first><last>Olsen</last></author>
       <pages>31–40</pages>
       <abstract>Prior methodologies for understanding spatial language have treated literal expressions such as “Mary pushed the car over the edge” differently from metaphorical extensions such as “Mary’s job pushed her over the edge”. We demonstrate a methodology for standardizing literal and metaphorical meanings, by building on work in Lexical Conceptual Structure (LCS), a general-purpose representational component used in machine translation. We argue that spatial predicates naturally extend into other fields (e.g., circumstantial or temporal), and that LCS provides both a framework for distinguishing spatial from non-spatial, and a system for finding metaphorical meaning extensions. We start with MetaNet (MN), a large repository of conceptual metaphors, condensing 197 spatial entries into sixteen top-level categories of motion frames. Using naturally occurring instances of English push , and expansions of MN frames, we demonstrate that literal and metaphorical extensions exhibit patterns predicted and represented by the LCS model.</abstract>
       <url hash="8e9d7e41">W18-1404</url>
@@ -2222,8 +2222,8 @@
     </paper>
     <paper id="5">
       <title>Representing Spatial Relations in <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et</title>
-      <author><first>Miriam R. L.</first><last>Petruck</last></author>
-      <author><first>Michael J.</first><last>Ellsworth</last></author>
+      <author id="miriam-r-l-petruck"><first>Miriam R. L.</first><last>Petruck</last></author>
+      <author id="michael-ellsworth"><first>Michael J.</first><last>Ellsworth</last></author>
       <pages>41–45</pages>
       <abstract>While humans use natural language to express spatial relations between and across entities in the world with great facility, natural language systems have a facility that depends on that human facility. This position paper presents approach to representing spatial relations in language, and advocates its adoption for representing the meaning of spatial language. This work shows the importance of axis-orientation systems for capturing the complexity of spatial relations, which FrameNet encodes with semantic types.</abstract>
       <url hash="07fd2849">W18-1405</url>
@@ -2235,7 +2235,7 @@
       <author><first>Jason</first><last>Baldridge</last></author>
       <author><first>Tania</first><last>Bedrax-Weiss</last></author>
       <author><first>Daphne</first><last>Luong</last></author>
-      <author><first>Srini</first><last>Narayanan</last></author>
+      <author id="srini-narayanan"><first>Srini</first><last>Narayanan</last></author>
       <author><first>Bo</first><last>Pang</last></author>
       <author><first>Fernando</first><last>Pereira</last></author>
       <author><first>Radu</first><last>Soricut</last></author>
@@ -2261,8 +2261,8 @@
     </paper>
     <paper id="8">
       <title>The Case for Systematically Derived Spatial Language Usage</title>
-      <author><first>Bonnie</first><last>Dorr</last></author>
-      <author><first>Clare</first><last>Voss</last></author>
+      <author id="bonnie-dorr"><first>Bonnie</first><last>Dorr</last></author>
+      <author id="clare-voss"><first>Clare</first><last>Voss</last></author>
       <pages>63–70</pages>
       <abstract>This position paper argues that, while prior work in spatial language understanding for tasks such as robot navigation focuses on mapping natural language into deep conceptual or non-linguistic representations, it is possible to systematically derive regular patterns of spatial language usage from existing lexical-semantic resources. Furthermore, even with access to such resources, effective solutions to many application areas such as robot navigation and narrative generation also require additional knowledge at the syntax-semantics interface to cover the wide range of spatial expressions observed and available to natural language speakers. We ground our insights in, and present our extensions to, an existing lexico-semantic resource, covering 500 semantic classes of verbs, of which 219 fall within a spatial subset. We demonstrate that these extensions enable systematic derivation of regular patterns of spatial language without requiring manual annotation.</abstract>
       <url hash="622dfa61">W18-1408</url>
@@ -2275,7 +2275,7 @@
       <booktitle>Proceedings of the First Workshop on Storytelling</booktitle>
       <url hash="13e53380">W18-15</url>
       <editor><first>Margaret</first><last>Mitchell</last></editor>
-      <editor><first>Ting-Hao ‘Kenneth’</first><last>Huang</last></editor>
+      <editor id="ting-hao-huang"><first>Ting-Hao ‘Kenneth’</first><last>Huang</last></editor>
       <editor><first>Francis</first><last>Ferraro</last></editor>
       <editor><first>Ishan</first><last>Misra</last></editor>
       <doi>10.18653/v1/W18-15</doi>
@@ -2304,7 +2304,7 @@
     <paper id="2">
       <title>Linguistic Features of Helpfulness in Automated Support for Creative Writing</title>
       <author><first>Melissa</first><last>Roemmele</last></author>
-      <author><first>Andrew</first><last>Gordon</last></author>
+      <author id="andrew-gordon"><first>Andrew</first><last>Gordon</last></author>
       <pages>14–19</pages>
       <abstract>We examine an emerging NLP application that supports creative writing by automatically suggesting continuing sentences in a story. The application tracks users’ modifications to generated sentences, which can be used to quantify their “helpfulness” in advancing the story. We explore the task of predicting helpfulness based on automatically detected linguistic features of the suggestions. We illustrate this analysis on a set of user interactions with the application using an initial selection of features relevant to story generation.</abstract>
       <url hash="fe4f97d7">W18-1502</url>
@@ -2313,9 +2313,9 @@
     </paper>
     <paper id="3">
       <title>A Pipeline for Creative Visual Storytelling</title>
-      <author><first>Stephanie</first><last>Lukin</last></author>
+      <author id="stephanie-lukin"><first>Stephanie</first><last>Lukin</last></author>
       <author><first>Reginald</first><last>Hobbs</last></author>
-      <author><first>Clare</first><last>Voss</last></author>
+      <author id="clare-voss"><first>Clare</first><last>Voss</last></author>
       <pages>20–32</pages>
       <abstract>Computational visual storytelling produces a textual description of events and interpretations depicted in a sequence of images. These texts are made possible by advances and cross-disciplinary approaches in natural language processing, generation, and computer vision. We define a computational creative visual storytelling as one with the ability to alter the telling of a story along three aspects: to speak about different environments, to produce variations based on narrative goals, and to adapt the narrative to the audience. These aspects of creative storytelling and their effect on the narrative have yet to be explored in visual storytelling. This paper presents a pipeline of task-modules, Object Identification, Single-Image Inferencing, and Multi-Image Narration, that serve as a preliminary design for building a creative visual storyteller. We have piloted this design for a sequence of images in an annotation task. We present and analyze the collected corpus and describe plans towards automation.</abstract>
       <url hash="8c27f52e">W18-1503</url>
@@ -2347,7 +2347,7 @@
     <paper id="6">
       <title>An Encoder-decoder Approach to Predicting Causal Relations in Stories</title>
       <author><first>Melissa</first><last>Roemmele</last></author>
-      <author><first>Andrew</first><last>Gordon</last></author>
+      <author id="andrew-gordon"><first>Andrew</first><last>Gordon</last></author>
       <pages>50–59</pages>
       <abstract>We address the task of predicting causally related events in stories according to a standard evaluation framework, the Choice of Plausible Alternatives (COPA). We present a neural encoder-decoder model that learns to predict relations between adjacent sequences in stories as a means of modeling causality. We explore this approach using different methods for extracting and representing sequence pairs as well as different model architectures. We also compare the impact of different training datasets on our model. In particular, we demonstrate the usefulness of a corpus not previously applied to COPA, the ROCStories corpus. While not state-of-the-art, our results establish a new reference point for systems evaluated on COPA, and one that is particularly informative for future neural-based approaches.</abstract>
       <url hash="edc96831">W18-1506</url>
@@ -2358,7 +2358,7 @@
       <title>Neural Event Extraction from Movies Description</title>
       <author><first>Alex</first><last>Tozzo</last></author>
       <author><first>Dejan</first><last>Jovanović</last></author>
-      <author><first>Mohamed</first><last>Amer</last></author>
+      <author id="mohamed-r-amer"><first>Mohamed</first><last>Amer</last></author>
       <pages>60–66</pages>
       <abstract>We present a novel approach for event extraction and abstraction from movie descriptions. Our event frame consists of “who”, “did what” “to whom”, “where”, and “when”. We formulate our problem using a recurrent neural network, enhanced with structural features extracted from syntactic parser, and trained using curriculum learning by progressively increasing the difficulty of the sentences. Our model serves as an intermediate step towards question answering systems, visual storytelling, and story completion tasks. We evaluate our approach on MovieQA dataset.</abstract>
       <url hash="4cf48533">W18-1507</url>
@@ -2409,7 +2409,7 @@
       <title>Detecting Syntactic Features of Translated <fixed-case>C</fixed-case>hinese</title>
       <author><first>Hai</first><last>Hu</last></author>
       <author><first>Wen</first><last>Li</last></author>
-      <author><first>Sandra</first><last>Kübler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kübler</last></author>
       <pages>20–28</pages>
       <abstract>We present a machine learning approach to distinguish texts translated to Chinese (by humans) from texts originally written in Chinese, with a focus on a wide range of syntactic features. Using Support Vector Machines (SVMs) as classifier on a genre-balanced corpus in translation studies of Chinese, we find that constituent parse trees and dependency triples as features without lexical information perform very well on the task, with an F-measure above 90%, close to the results of lexical n-gram features, without the risk of learning topic information rather than translation features. Thus, we claim syntactic features alone can accurately distinguish translated from original Chinese. Translated Chinese exhibits an increased use of determiners, subject position pronouns, NP + “的” as NP modifiers, multiple NPs or VPs conjoined by "、", among other structures. We also interpret the syntactic features with reference to previous translation studies in Chinese, particularly the usage of pronouns.</abstract>
       <url hash="946cd30f">W18-1603</url>
@@ -2432,7 +2432,7 @@
       <author><first>Francisco</first><last>Rangel</last></author>
       <author><first>Paolo</first><last>Rosso</last></author>
       <author><first>Julian</first><last>Brooke</last></author>
-      <author><first>Alexandra</first><last>Uitdenbogerd</last></author>
+      <author id="alexandra-l-uitdenbogerd"><first>Alexandra</first><last>Uitdenbogerd</last></author>
       <pages>39–43</pages>
       <abstract>In this paper, we approach the task of native language identification in a realistic cross-corpus scenario where a model is trained with available data and has to predict the native language from data of a different corpus. The motivation behind this study is to investigate native language identification in the Australian academic scenario where a majority of students come from China, Indonesia, and Arabic-speaking nations. We have proposed a statistical embedding representation reporting a significant improvement over common single-layer approaches of the state of the art, identifying Chinese, Arabic, and Indonesian in a cross-corpus scenario. The proposed approach was shown to be competitive even when the data is scarce and imbalanced.</abstract>
       <url hash="cf191134">W18-1605</url>
@@ -2447,7 +2447,7 @@
       <editor><first>Goran</first><last>Glavaš</last></editor>
       <editor><first>Swapna</first><last>Somasundaran</last></editor>
       <editor><first>Martin</first><last>Riedl</last></editor>
-      <editor><first>Eduard</first><last>Hovy</last></editor>
+      <editor id="eduard-hovy"><first>Eduard</first><last>Hovy</last></editor>
       <doi>10.18653/v1/W18-17</doi>
       <publisher>Association for Computational Linguistics</publisher>
       <address>New Orleans, Louisiana, USA</address>
@@ -2462,8 +2462,8 @@
     <paper id="1">
       <title>Scientific Discovery as Link Prediction in Influence and Citation Graphs</title>
       <author><first>Fan</first><last>Luo</last></author>
-      <author><first>Marco A.</first><last>Valenzuela-Escárcega</last></author>
-      <author><first>Gus</first><last>Hahn-Powell</last></author>
+      <author id="marco-a-valenzuela-escarcega"><first>Marco A.</first><last>Valenzuela-Escárcega</last></author>
+      <author id="gus-hahn-powell"><first>Gus</first><last>Hahn-Powell</last></author>
       <author><first>Mihai</first><last>Surdeanu</last></author>
       <pages>1–6</pages>
       <abstract>We introduce a machine learning approach for the identification of “white spaces” in scientific knowledge. Our approach addresses this task as link prediction over a graph that contains over 2M influence statements such as “CTCF activates FOXA1”, which were automatically extracted using open-domain machine reading. We model this prediction task using graph-based features extracted from the above influence graph, as well as from a citation graph that captures scientific communities. We evaluated the proposed approach through backtesting. Although the data is heavily unbalanced (50 times more negative examples than positives), our approach predicts which influence links will be discovered in the “near future” with a F1 score of 27 points, and a mean average precision of 68%.</abstract>
@@ -2474,7 +2474,7 @@
     <paper id="2">
       <title>Efficient Generation and Processing of Word Co-occurrence Networks Using corpus2graph</title>
       <author><first>Zheng</first><last>Zhang</last></author>
-      <author><first>Pierre</first><last>Zweigenbaum</last></author>
+      <author id="pierre-zweigenbaum"><first>Pierre</first><last>Zweigenbaum</last></author>
       <author><first>Ruiqing</first><last>Yin</last></author>
       <pages>7–11</pages>
       <abstract>Corpus2graph is an open-source NLP-application-oriented tool that generates a word co-occurrence network from a large corpus. It not only contains different built-in methods to preprocess words, analyze sentences, extract word pairs and define edge weights, but also supports user-customized functions. By using parallelization techniques, it can generate a large word co-occurrence network of the whole English Wikipedia data within hours. And thanks to its nodes-edges-weight three-level progressive calculation design, rebuilding networks with different configurations is even faster as it does not need to start all over again. This tool also works with other graph libraries such as igraph, NetworkX and graph-tool as a front end providing data to boost network generation speed.</abstract>
@@ -2484,7 +2484,7 @@
     </paper>
     <paper id="3">
       <title>Multi-hop Inference for Sentence-level <fixed-case>T</fixed-case>ext<fixed-case>G</fixed-case>raphs: How Challenging is Meaningfully Combining Information for Science Question Answering?</title>
-      <author><first>Peter</first><last>Jansen</last></author>
+      <author id="peter-jansen"><first>Peter</first><last>Jansen</last></author>
       <pages>12–17</pages>
       <abstract>Question Answering for complex questions is often modelled as a graph construction or traversal task, where a solver must build or traverse a graph of facts that answer and explain a given question. This “multi-hop” inference has been shown to be extremely challenging, with few models able to aggregate more than two facts before being overwhelmed by “semantic drift”, or the tendency for long chains of facts to quickly drift off topic. This is a major barrier to current inference models, as even elementary science questions require an average of 4 to 6 facts to answer and explain. In this work we empirically characterize the difficulty of building or traversing a graph of sentences connected by lexical overlap, by evaluating chance sentence aggregation quality through 9,784 manually-annotated judgements across knowledge graphs built from three free-text corpora (including study guides and Simple Wikipedia). We demonstrate semantic drift tends to be high and aggregation quality low, at between 0.04 and 3, and highlight scenarios that maximize the likelihood of meaningfully combining information.</abstract>
       <url hash="ce709889">W18-1703</url>
@@ -2497,7 +2497,7 @@
       <author><first>Stéphane</first><last>Huet</last></author>
       <author><first>Thiago</first><last>Gouveia da Silva</last></author>
       <author><first>Andréa Carneiro</first><last>Linhares</last></author>
-      <author><first>Juan-Manuel</first><last>Torres-Moreno</last></author>
+      <author id="juan-manuel-torres-moreno"><first>Juan-Manuel</first><last>Torres-Moreno</last></author>
       <pages>18–27</pages>
       <abstract>Multi-Sentence Compression (MSC) aims to generate a short sentence with key information from a cluster of closely related sentences. MSC enables summarization and question-answering systems to generate outputs combining fully formed sentences from one or several documents. This paper describes a new Integer Linear Programming method for MSC using a vertex-labeled graph to select different keywords, and novel 3-gram scores to generate more informative sentences while maintaining their grammaticality. Our system is of good quality and outperforms the state-of-the-art for evaluations led on news dataset. We led both automatic and manual evaluations to determine the informativeness and the grammaticality of compressions for each dataset. Additional tests, which take advantage of the fact that the length of compressions can be modulated, still improve ROUGE scores with shorter output sentences.</abstract>
       <url hash="05d480ae">W18-1704</url>
@@ -2586,7 +2586,7 @@
     </paper>
     <paper id="3">
       <title>Fluency Over Adequacy: A Pilot Study in Measuring User Trust in Imperfect <fixed-case>MT</fixed-case></title>
-      <author><first>Marianna</first><last>Martindale</last></author>
+      <author id="marianna-martindale"><first>Marianna</first><last>Martindale</last></author>
       <author><first>Marine</first><last>Carpuat</last></author>
       <pages>13–25</pages>
       <url hash="b20fa756">W18-1803</url>
@@ -2594,10 +2594,10 @@
     </paper>
     <paper id="4">
       <title>Combining Quality Estimation and Automatic Post-editing to Enhance Machine Translation output</title>
-      <author><first>Rajen</first><last>Chatterjee</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="rajen-chatterjee"><first>Rajen</first><last>Chatterjee</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
-      <author><first>Frédéric</first><last>Blain</last></author>
+      <author id="frederic-blain"><first>Frédéric</first><last>Blain</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>26–38</pages>
       <url hash="f79fc2ef">W18-1804</url>
@@ -2607,7 +2607,7 @@
       <title>Neural Morphological Tagging of Lemma Sequences for Machine Translation</title>
       <author><first>Costanza</first><last>Conforti</last></author>
       <author><first>Matthias</first><last>Huck</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>39–53</pages>
       <url hash="81963903">W18-1805</url>
       <bibkey>conforti-etal-2018-neural</bibkey>
@@ -2624,8 +2624,8 @@
       <title>How Robust Are Character-Based Word Embeddings in Tagging and <fixed-case>MT</fixed-case> Against Wrod Scramlbing or Randdm Nouse?</title>
       <author><first>Georg</first><last>Heigold</last></author>
       <author><first>Stalin</first><last>Varanasi</last></author>
-      <author><first>Günter</first><last>Neumann</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="gunter-neumann"><first>Günter</first><last>Neumann</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>68–80</pages>
       <url hash="8772bde6">W18-1807</url>
       <bibkey>heigold-etal-2018-robust</bibkey>
@@ -2642,7 +2642,7 @@
     <paper id="9">
       <title>Register-sensitive Translation: a Case Study of <fixed-case>M</fixed-case>andarin and <fixed-case>C</fixed-case>antonese (Non-archival Extended Abstract)</title>
       <author><first>Tak-sum</first><last>Wong</last></author>
-      <author><first>John</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
       <pages>89–96</pages>
       <url hash="38ab22f6">W18-1809</url>
       <bibkey>wong-lee-2018-register</bibkey>
@@ -2693,7 +2693,7 @@
     <paper id="15">
       <title>Simultaneous Translation using Optimized Segmentation</title>
       <author><first>Maryam</first><last>Siahbani</last></author>
-      <author><first>Hassan</first><last>Shavarani</last></author>
+      <author id="hassan-s-shavarani"><first>Hassan</first><last>Shavarani</last></author>
       <author><first>Ashkan</first><last>Alinejad</last></author>
       <author><first>Anoop</first><last>Sarkar</last></author>
       <pages>154–167</pages>
@@ -2702,13 +2702,13 @@
     </paper>
     <paper id="16">
       <title>Neural Monkey: The Current State and Beyond</title>
-      <author><first>Jindřich</first><last>Helcl</last></author>
+      <author id="jindrich-helcl"><first>Jindřich</first><last>Helcl</last></author>
       <author><first>Jindřich</first><last>Libovický</last></author>
       <author><first>Tom</first><last>Kocmi</last></author>
       <author><first>Tomáš</first><last>Musil</last></author>
       <author><first>Ondřej</first><last>Cífka</last></author>
-      <author><first>Dušan</first><last>Variš</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="dusan-varis"><first>Dušan</first><last>Variš</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>168–176</pages>
       <url hash="d26ca2d3">W18-1816</url>
       <bibkey>helcl-etal-2018-neural</bibkey>
@@ -2720,7 +2720,7 @@
       <author><first>Yuntian</first><last>Deng</last></author>
       <author><first>Vincent</first><last>Nguyen</last></author>
       <author><first>Jean</first><last>Senellart</last></author>
-      <author><first>Alexander</first><last>Rush</last></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last></author>
       <pages>177–184</pages>
       <url hash="50390a91">W18-1817</url>
       <bibkey>klein-etal-2018-opennmt</bibkey>
@@ -2753,7 +2753,7 @@
       <author><first>Aidan</first><last>Gomez</last></author>
       <author><first>Stephan</first><last>Gouws</last></author>
       <author><first>Llion</first><last>Jones</last></author>
-      <author><first>Łukasz</first><last>Kaiser</last></author>
+      <author id="lukasz-kaiser"><first>Łukasz</first><last>Kaiser</last></author>
       <author><first>Nal</first><last>Kalchbrenner</last></author>
       <author><first>Niki</first><last>Parmar</last></author>
       <author><first>Ryan</first><last>Sepassi</last></author>
@@ -2769,7 +2769,7 @@
       <author><first>Tobias</first><last>Domhan</last></author>
       <author><first>Michael</first><last>Denkowski</last></author>
       <author><first>David</first><last>Vilar</last></author>
-      <author><first>Artem</first><last>Sokolov</last></author>
+      <author id="artem-sokolov"><first>Artem</first><last>Sokolov</last></author>
       <author><first>Ann</first><last>Clifton</last></author>
       <author><first>Matt</first><last>Post</last></author>
       <pages>200–207</pages>
@@ -2793,8 +2793,8 @@
       <url hash="f2240f67">W18-19</url>
       <editor><first>Janice</first><last>Campbell</last></editor>
       <editor><first>Alex</first><last>Yanishevsky</last></editor>
-      <editor><first>Jennifer</first><last>Doyon</last></editor>
-      <editor><first>Doug</first><last>Jones</last></editor>
+      <editor id="jennifer-doyon"><first>Jennifer</first><last>Doyon</last></editor>
+      <editor id="douglas-jones"><first>Doug</first><last>Jones</last></editor>
       <publisher>Association for Machine Translation in the Americas</publisher>
       <address>Boston, MA</address>
       <month>March</month>
@@ -2853,7 +2853,7 @@
     </paper>
     <paper id="8">
       <title>Same-language machine translation for local flavours/flavors</title>
-      <author><first>Gema</first><last>Ramírez-Sánchez</last></author>
+      <author id="gema-ramirez-sanchez"><first>Gema</first><last>Ramírez-Sánchez</last></author>
       <author><first>Janice</first><last>Campbell</last></author>
       <pages>35–53</pages>
       <url hash="05beb7bd">W18-1908</url>
@@ -2868,8 +2868,8 @@
     </paper>
     <paper id="10">
       <title>Developing a Neural Machine Translation Service for the 2017-2018 <fixed-case>E</fixed-case>uropean <fixed-case>U</fixed-case>nion Presidency</title>
-      <author><first>Mārcis</first><last>Pinnis</last></author>
-      <author><first>Rihards</first><last>Kalnins</last></author>
+      <author id="marcis-pinnis"><first>Mārcis</first><last>Pinnis</last></author>
+      <author id="rihards-kalnins"><first>Rihards</first><last>Kalnins</last></author>
       <pages>72–83</pages>
       <url hash="707ccd54">W18-1910</url>
       <bibkey>pinnis-kalnins-2018-developing</bibkey>
@@ -2897,8 +2897,8 @@
     </paper>
     <paper id="14">
       <title>Turning <fixed-case>NMT</fixed-case> Research into Commercial Products</title>
-      <author><first>Dragos</first><last>Munteanu</last></author>
-      <author><first>Adrià</first><last>Gispert</last></author>
+      <author id="dragos-stefan-munteanu"><first>Dragos</first><last>Munteanu</last></author>
+      <author id="adria-de-gispert"><first>Adrià</first><last>Gispert</last></author>
       <pages>166–193</pages>
       <url hash="b226519c">W18-1914</url>
       <bibkey>munteanu-gispert-2018-turning</bibkey>
@@ -2923,8 +2923,8 @@
       <author><first>Ann</first><last>Clifton</last></author>
       <author><first>Greg</first><last>Hanneman</last></author>
       <author><first>Patrick</first><last>Porter</last></author>
-      <author><first>Donna</first><last>Gates</last></author>
-      <author><first>Almut</first><last>Hildebrand</last></author>
+      <author id="donna-gates"><first>Donna</first><last>Gates</last></author>
+      <author id="almut-silja-hildebrand"><first>Almut</first><last>Hildebrand</last></author>
       <author><first>Anish</first><last>Kumar</last></author>
       <pages>223–233</pages>
       <url hash="aaea97ca">W18-1917</url>
@@ -2950,7 +2950,7 @@
       <author><first>Corey</first><last>Miller</last></author>
       <author><first>Danielle</first><last>Silverman</last></author>
       <author><first>Vanesa</first><last>Jurica</last></author>
-      <author><first>Elizabeth</first><last>Richerson</last></author>
+      <author id="elizabeth-schroeder"><first>Elizabeth</first><last>Richerson</last></author>
       <author><first>Rodney</first><last>Morris</last></author>
       <author><first>Elisabeth</first><last>Mallard</last></author>
       <pages>275–282</pages>
@@ -2959,18 +2959,18 @@
     </paper>
     <paper id="21">
       <title>Challenges in Speech Recognition and Translation of High-Value Low-Density Polysynthetic Languages</title>
-      <author><first>Judith</first><last>Klavans</last></author>
+      <author id="judith-l-klavans"><first>Judith</first><last>Klavans</last></author>
       <author><first>John</first><last>Morgan</last></author>
       <author><first>Stephen</first><last>LaRocca</last></author>
       <author><first>Jeffrey</first><last>Micher</last></author>
-      <author><first>Clare</first><last>Voss</last></author>
+      <author id="clare-voss"><first>Clare</first><last>Voss</last></author>
       <pages>283–293</pages>
       <url hash="8a48c794">W18-1921</url>
       <bibkey>klavans-etal-2018-challenges</bibkey>
     </paper>
     <paper id="22">
       <title>Evaluating Automatic Speech Recognition in Translation</title>
-      <author><first>Evelyne</first><last>Tzoukermann</last></author>
+      <author id="evelyne-tzoukermann"><first>Evelyne</first><last>Tzoukermann</last></author>
       <author><first>Corey</first><last>Miller</last></author>
       <pages>294–302</pages>
       <url hash="0c20a2b7">W18-1922</url>
@@ -2987,7 +2987,7 @@
     </paper>
     <paper id="24">
       <title><fixed-case>T</fixed-case>utorial: De-mystifying Neural <fixed-case>MT</fixed-case></title>
-      <author><first>Dragos</first><last>Munteanu</last></author>
+      <author id="dragos-stefan-munteanu"><first>Dragos</first><last>Munteanu</last></author>
       <author><first>Ling</first><last>Tsou</last></author>
       <url hash="ed45f247">W18-1924</url>
       <bibkey>munteanu-tsou-2018-tutorial</bibkey>
@@ -2995,7 +2995,7 @@
     <paper id="25">
       <title><fixed-case>T</fixed-case>utorial: <fixed-case>MQM</fixed-case>-<fixed-case>DQF</fixed-case>: A Good Marriage (Translation Quality for the 21st Century)</title>
       <author><first>Arle</first><last>Lommel</last></author>
-      <author><first>Alan</first><last>Melby</last></author>
+      <author id="alan-k-melby"><first>Alan</first><last>Melby</last></author>
       <url hash="0ddb1111">W18-1925</url>
       <bibkey>lommel-melby-2018-tutorial</bibkey>
     </paper>
@@ -3032,7 +3032,7 @@
     </paper>
     <paper id="2">
       <title>Termbase Exchange (<fixed-case>TBX</fixed-case>)</title>
-      <author><first>Sue</first><last>Wright</last></author>
+      <author id="sue-ellen-wright"><first>Sue</first><last>Wright</last></author>
       <pages>25–47</pages>
       <url hash="c40109f2">W18-2002</url>
       <bibkey>wright-2018-termbase</bibkey>
@@ -3060,7 +3060,7 @@
     </paper>
     <paper id="6">
       <title>Translation <fixed-case>API</fixed-case> Cases and Classes (<fixed-case>TAPICC</fixed-case>)</title>
-      <author><first>Alan</first><last>Melby</last></author>
+      <author id="alan-k-melby"><first>Alan</first><last>Melby</last></author>
       <pages>95–112</pages>
       <url hash="2a466011">W18-2006</url>
       <bibkey>melby-2018-translation</bibkey>
@@ -3070,9 +3070,9 @@
     <meta>
       <booktitle>Proceedings of the <fixed-case>AMTA</fixed-case> 2018 Workshop on Translation Quality Estimation and Automatic Post-Editing</booktitle>
       <url hash="0717f387">W18-21</url>
-      <editor><first>Ramón</first><last>Astudillo</last></editor>
-      <editor><first>João</first><last>Graça</last></editor>
-      <editor><first>André</first><last>Martins</last></editor>
+      <editor id="ramon-fernandez-astudillo"><first>Ramón</first><last>Astudillo</last></editor>
+      <editor id="joao-graca"><first>João</first><last>Graça</last></editor>
+      <editor id="andre-f-t-martins"><first>André</first><last>Martins</last></editor>
       <publisher>Association for Machine Translation in the Americas</publisher>
       <address>Boston, MA</address>
       <month>March</month>
@@ -3191,7 +3191,7 @@
     <paper id="4">
       <title>A Survey of Machine Translation Work in the <fixed-case>P</fixed-case>hilippines: From 1998 to 2018</title>
       <author><first>Nathaniel</first><last>Oco</last></author>
-      <author><first>Rachel</first><last>Roxas</last></author>
+      <author id="rachel-edita-roxas"><first>Rachel</first><last>Roxas</last></author>
       <pages>30–36</pages>
       <url hash="592a11d5">W18-2204</url>
       <bibkey>oco-roxas-2018-survey</bibkey>
@@ -3219,7 +3219,7 @@
       <title>Apertium’s Web Toolchain for Low-Resource Language Technology</title>
       <author><first>Sushain</first><last>Cherivirala</last></author>
       <author><first>Shardul</first><last>Chiplunkar</last></author>
-      <author><first>Jonathan</first><last>Washington</last></author>
+      <author id="jonathan-washington"><first>Jonathan</first><last>Washington</last></author>
       <author><first>Kevin</first><last>Unhammer</last></author>
       <pages>53–62</pages>
       <url hash="5cdd5c1d">W18-2207</url>
@@ -3231,9 +3231,9 @@
       <booktitle>Proceedings of the <fixed-case>B</fixed-case>io<fixed-case>NLP</fixed-case> 2018 workshop</booktitle>
       <url hash="3d01a69e">W18-23</url>
       <editor><first>Dina</first><last>Demner-Fushman</last></editor>
-      <editor><first>Kevin Bretonnel</first><last>Cohen</last></editor>
+      <editor id="k-bretonnel-cohen"><first>Kevin Bretonnel</first><last>Cohen</last></editor>
       <editor><first>Sophia</first><last>Ananiadou</last></editor>
-      <editor><first>Junichi</first><last>Tsujii</last></editor>
+      <editor id="junichi-tsujii"><first>Junichi</first><last>Tsujii</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Melbourne, Australia</address>
       <month>July</month>
@@ -3290,7 +3290,7 @@
     </paper>
     <paper id="5">
       <title>Identifying Key Sentences for Precision Oncology Using Semi-Supervised Learning</title>
-      <author><first>Jurica</first><last>Ševa</last></author>
+      <author id="jurica-seva"><first>Jurica</first><last>Ševa</last></author>
       <author><first>Martin</first><last>Wackerbauer</last></author>
       <author><first>Ulf</first><last>Leser</last></author>
       <pages>35–46</pages>
@@ -3301,7 +3301,7 @@
     </paper>
     <paper id="6">
       <title>Ontology alignment in the biomedical domain using entity definitions and context</title>
-      <author><first>Lucy Lu</first><last>Wang</last></author>
+      <author id="lucy-lu-wang"><first>Lucy Lu</first><last>Wang</last></author>
       <author><first>Chandra</first><last>Bhagavatula</last></author>
       <author><first>Mark</first><last>Neumann</last></author>
       <author><first>Kyle</first><last>Lo</last></author>
@@ -3374,7 +3374,7 @@
       <author><first>Nitish</first><last>Kulkarni</last></author>
       <author><first>Srividya</first><last>Pranavi</last></author>
       <author><first>Gabriel</first><last>Bayomi</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <author><first>Teruko</first><last>Mitamura</last></author>
       <pages>109–117</pages>
       <abstract>In this paper, we present a novel Biomedical Question Answering system, BioAMA: “Biomedical Ask Me Anything” on task 5b of the annual BioASQ challenge. In this work, we focus on a wide variety of question types including factoid, list based, summary and yes/no type questions that generate both exact and well-formed ‘ideal’ answers. For summary-type questions, we combine effective IR-based techniques for retrieval and diversification of relevant snippets for a question to create an end-to-end system which achieves a ROUGE-2 score of 0.72 and a ROUGE-SU4 score of 0.71 on ideal answer questions (7% improvement over the previous best model). Additionally, we propose a novel NLI-based framework to answer the yes/no questions. To train the NLI model, we also devise a transfer-learning technique by cross-domain projection of word embeddings. Finally, we present a two-stage approach to address the factoid and list type questions by first generating a candidate set using NER taggers and ranking them using both supervised or unsupervised techniques.</abstract>
@@ -3385,7 +3385,7 @@
     <paper id="13">
       <title><fixed-case>P</fixed-case>hrase2<fixed-case>V</fixed-case>ec<fixed-case>GLM</fixed-case>: Neural generalized language model–based semantic tagging for complex query reformulation in medical <fixed-case>IR</fixed-case></title>
       <author><first>Manirupa</first><last>Das</last></author>
-      <author><first>Eric</first><last>Fosler-Lussier</last></author>
+      <author id="eric-fosler-lussier"><first>Eric</first><last>Fosler-Lussier</last></author>
       <author><first>Simon</first><last>Lin</last></author>
       <author><first>Soheil</first><last>Moosavinasab</last></author>
       <author><first>David</first><last>Chen</last></author>
@@ -3401,7 +3401,7 @@
     <paper id="14">
       <title>Convolutional neural networks for chemical-disease relation extraction are improved with character-based word embeddings</title>
       <author><first>Dat Quoc</first><last>Nguyen</last></author>
-      <author><first>Karin</first><last>Verspoor</last></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last></author>
       <pages>129–136</pages>
       <abstract>We investigate the incorporation of character-based word representations into a standard CNN-based relation extraction model. We experiment with two common neural architectures, CNN and LSTM, to learn word vector representations from character embeddings. Through a task on the BioCreative-V CDR corpus, extracting relationships between chemicals and diseases, we show that models exploiting the character-based word representations improve on models that do not use this information, obtaining state-of-the-art result relative to previous neural approaches.</abstract>
       <url hash="bad88dd9">W18-2314</url>
@@ -3443,7 +3443,7 @@
     <paper id="18">
       <title><fixed-case>S</fixed-case>ingle<fixed-case>C</fixed-case>ite: Towards an improved Single Citation Search in <fixed-case>P</fixed-case>ub<fixed-case>M</fixed-case>ed</title>
       <author><first>Lana</first><last>Yeganova</last></author>
-      <author><first>Donald C</first><last>Comeau</last></author>
+      <author id="donald-c-comeau"><first>Donald C</first><last>Comeau</last></author>
       <author><first>Won</first><last>Kim</last></author>
       <author><first>W John</first><last>Wilbur</last></author>
       <author><first>Zhiyong</first><last>Lu</last></author>
@@ -3456,7 +3456,7 @@
     <paper id="19">
       <title>A Framework for Developing and Evaluating Word Embeddings of Drug-named Entity</title>
       <author><first>Mengnan</first><last>Zhao</last></author>
-      <author><first>Aaron J.</first><last>Masino</last></author>
+      <author id="aaron-j-masino"><first>Aaron J.</first><last>Masino</last></author>
       <author><first>Christopher C.</first><last>Yang</last></author>
       <pages>156–160</pages>
       <abstract>We investigate the quality of task specific word embeddings created with relatively small, targeted corpora. We present a comprehensive evaluation framework including both intrinsic and extrinsic evaluation that can be expanded to named entities beyond drug name. Intrinsic evaluation results tell that drug name embeddings created with a domain specific document corpus outperformed the previously published versions that derived from a very large general text corpus. Extrinsic evaluation uses word embedding for the task of drug name recognition with Bi-LSTM model and the results demonstrate the advantage of using domain-specific word embeddings as the only input feature for drug name recognition with F1-score achieving 0.91. This work suggests that it may be advantageous to derive domain specific embeddings for certain tasks even when the domain specific corpus is of limited size.</abstract>
@@ -3468,7 +3468,7 @@
       <title><fixed-case>M</fixed-case>e<fixed-case>SH</fixed-case>-based dataset for measuring the relevance of text retrieval</title>
       <author><first>Won Gyu</first><last>Kim</last></author>
       <author><first>Lana</first><last>Yeganova</last></author>
-      <author><first>Donald</first><last>Comeau</last></author>
+      <author id="donald-c-comeau"><first>Donald</first><last>Comeau</last></author>
       <author><first>W John</first><last>Wilbur</last></author>
       <author><first>Zhiyong</first><last>Lu</last></author>
       <pages>161–165</pages>
@@ -3548,8 +3548,8 @@
     <meta>
       <booktitle>Proceedings of the Seventh Named Entities Workshop</booktitle>
       <url hash="872af1b5">W18-24</url>
-      <editor><first>Nancy</first><last>Chen</last></editor>
-      <editor><first>Rafael E.</first><last>Banchs</last></editor>
+      <editor id="nancy-chen"><first>Nancy</first><last>Chen</last></editor>
+      <editor id="rafael-e-banchs"><first>Rafael E.</first><last>Banchs</last></editor>
       <editor><first>Xiangyu</first><last>Duan</last></editor>
       <editor><first>Min</first><last>Zhang</last></editor>
       <editor><first>Haizhou</first><last>Li</last></editor>
@@ -3596,7 +3596,7 @@
       <title>Attention-based Semantic Priming for Slot-filling</title>
       <author><first>Jiewen</first><last>Wu</last></author>
       <author><first>Rafael E.</first><last>Banchs</last></author>
-      <author><first>Luis Fernando</first><last>D’Haro</last></author>
+      <author id="luis-fernando-dharo"><first>Luis Fernando</first><last>D’Haro</last></author>
       <author><first>Pavitra</first><last>Krishnaswamy</last></author>
       <author><first>Nancy</first><last>Chen</last></author>
       <pages>22–26</pages>
@@ -3610,7 +3610,7 @@
       <author><first>Vinay</first><last>Singh</last></author>
       <author><first>Deepanshu</first><last>Vijay</last></author>
       <author><first>Syed Sarfaraz</first><last>Akhtar</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>27–35</pages>
       <abstract>Named Entity Recognition (NER) is a major task in the field of Natural Language Processing (NLP), and also is a sub-task of Information Extraction. The challenge of NER for tweets lie in the insufficient information available in a tweet. There has been a significant amount of work done related to entity extraction, but only for resource rich languages and domains such as newswire. Entity extraction is, in general, a challenging task for such an informal text, and code-mixed text further complicates the process with it’s unstructured and incomplete information. We propose experiments with different machine learning classification algorithms with word, character and lexical features. The algorithms we experimented with are Decision tree, Long Short-Term Memory (LSTM), and Conditional Random Field (CRF). In this paper, we present a corpus for NER in Hindi-English Code-Mixed along with extensive experiments on our machine learning models which achieved the best f1-score of 0.95 with both CRF and LSTM.</abstract>
       <url hash="b7bc4c99">W18-2405</url>
@@ -3634,7 +3634,7 @@
       <title>Named-Entity Tagging and Domain adaptation for Better Customized Translation</title>
       <author><first>Zhongwei</first><last>Li</last></author>
       <author><first>Xuancong</first><last>Wang</last></author>
-      <author><first>Ai Ti</first><last>Aw</last></author>
+      <author id="aiti-aw"><first>Ai Ti</first><last>Aw</last></author>
       <author><first>Eng Siong</first><last>Chng</last></author>
       <author><first>Haizhou</first><last>Li</last></author>
       <pages>41–46</pages>
@@ -3751,10 +3751,10 @@
       <author><first>Mark</first><last>Neumann</last></author>
       <author><first>Oyvind</first><last>Tafjord</last></author>
       <author><first>Pradeep</first><last>Dasigi</last></author>
-      <author><first>Nelson F.</first><last>Liu</last></author>
-      <author><first>Matthew</first><last>Peters</last></author>
+      <author id="nelson-f-liu"><first>Nelson F.</first><last>Liu</last></author>
+      <author id="matthew-e-peters"><first>Matthew</first><last>Peters</last></author>
       <author><first>Michael</first><last>Schmitz</last></author>
-      <author><first>Luke</first><last>Zettlemoyer</last></author>
+      <author id="luke-zettlemoyer"><first>Luke</first><last>Zettlemoyer</last></author>
       <pages>1–6</pages>
       <abstract>Modern natural language processing (NLP) research requires writing code. Ideally this code would provide a precise definition of the approach, easy repeatability of results, and a basis for extending the research. However, many research codebases bury high-level parameters under implementation details, are challenging to run and debug, and are difficult enough to extend that they are more likely to be rewritten. This paper describes AllenNLP, a library for applying deep learning methods to NLP research that addresses these issues with easy-to-use command-line tools, declarative configuration-driven experiments, and modular NLP abstractions. AllenNLP has already increased the rate of research experimentation and the sharing of NLP components at the Allen Institute for Artificial Intelligence, and we are working to have the same impact across the field.</abstract>
       <url hash="0eaa2993">W18-2501</url>
@@ -3788,7 +3788,7 @@
       <author><first>Devendra Singh</first><last>Chaplot</last></author>
       <author><first>Bowen</first><last>Tan</last></author>
       <author><first>Xingjiang</first><last>Yu</last></author>
-      <author><first>Eric</first><last>Xing</last></author>
+      <author id="eric-xing"><first>Eric</first><last>Xing</last></author>
       <pages>13–22</pages>
       <abstract>We introduce Texar, an open-source toolkit aiming to support the broad set of text generation tasks. Different from many existing toolkits that are specialized for specific applications (e.g., neural machine translation), Texar is designed to be highly flexible and versatile. This is achieved by abstracting the common patterns underlying the diverse tasks and methodologies, creating a library of highly reusable modules and functionalities, and enabling arbitrary model architectures and various algorithmic paradigms. The features make Texar particularly suitable for technique sharing and generalization across different text generation applications. The toolkit emphasizes heavily on extensibility and modularized system design, so that components can be freely plugged in or swapped out. We conduct extensive experiments and case studies to demonstrate the use and advantage of the toolkit.</abstract>
       <url hash="4814cd31">W18-2503</url>
@@ -3814,9 +3814,9 @@
     </paper>
     <paper id="5">
       <title>The risk of sub-optimal use of Open Source <fixed-case>NLP</fixed-case> Software: <fixed-case>UKB</fixed-case> is inadvertently state-of-the-art in knowledge-based <fixed-case>WSD</fixed-case></title>
-      <author><first>Eneko</first><last>Agirre</last></author>
-      <author><first>Oier</first><last>López de Lacalle</last></author>
-      <author><first>Aitor</first><last>Soroa</last></author>
+      <author id="eneko-agirre"><first>Eneko</first><last>Agirre</last></author>
+      <author id="oier-lopez-de-lacalle"><first>Oier</first><last>López de Lacalle</last></author>
+      <author id="aitor-soroa"><first>Aitor</first><last>Soroa</last></author>
       <pages>29–33</pages>
       <abstract>UKB is an open source collection of programs for performing, among other tasks, Knowledge-Based Word Sense Disambiguation (WSD). Since it was released in 2009 it has been often used out-of-the-box in sub-optimal settings. We show that nine years later it is the state-of-the-art on knowledge-based WSD. This case shows the pitfalls of releasing open source NLP software without optimal default settings and precise instructions for reproducibility.</abstract>
       <url hash="1403c9fb">W18-2505</url>
@@ -3854,8 +3854,8 @@
       <title>Integrating Multiple <fixed-case>NLP</fixed-case> Technologies into an Open-source Platform for Multilingual Media Monitoring</title>
       <author><first>Ulrich</first><last>Germann</last></author>
       <author><first>Renārs</first><last>Liepins</last></author>
-      <author><first>Didzis</first><last>Gosko</last></author>
-      <author><first>Guntis</first><last>Barzdins</last></author>
+      <author id="didzis-gosko"><first>Didzis</first><last>Gosko</last></author>
+      <author id="guntis-barzdins"><first>Guntis</first><last>Barzdins</last></author>
       <pages>47–51</pages>
       <abstract>The open-source SUMMA Platform is a highly scalable distributed architecture for monitoring a large number of media broadcasts in parallel, with a lag behind actual broadcast time of at most a few minutes. It assembles numerous state-of-the-art NLP technologies into a fully automated media ingestion pipeline that can record live broadcasts, detect and transcribe spoken content, translate from several languages (original text or transcribed speech) into English, recognize Named Entities, detect topics, cluster and summarize documents across language barriers, and extract and store factual claims in these news items. This paper describes the intended use cases and discusses the system design decisions that allowed us to integrate state-of-the-art NLP modules into an effective workflow with comparatively little effort.</abstract>
       <url hash="afa06f85">W18-2508</url>
@@ -3864,7 +3864,7 @@
     </paper>
     <paper id="9">
       <title>The Annotated Transformer</title>
-      <author><first>Alexander</first><last>Rush</last></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last></author>
       <pages>52–60</pages>
       <abstract>A major goal of open-source NLP is to quickly and accurately reproduce the results of new work, in a manner that the community can easily use and modify. While most papers publish enough detail for replication, it still may be difficult to achieve good results in practice. This paper presents a worked exercise of paper reproduction with the goal of implementing the results of the recent Transformer model. The replication exercise aims at simple code structure that follows closely with the original work, while achieving an efficient usable system.</abstract>
       <url hash="c8d74fea">W18-2509</url>
@@ -3894,7 +3894,7 @@
     <paper id="1">
       <title>Ruminating Reader: Reasoning with Gated Multi-hop Attention</title>
       <author><first>Yichen</first><last>Gong</last></author>
-      <author><first>Samuel</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel</first><last>Bowman</last></author>
       <pages>1–11</pages>
       <abstract>To answer the question in machine comprehension (MC) task, the models need to establish the interaction between the question and the context. To tackle the problem that the single-pass model cannot reflect on and correct its answer, we present Ruminating Reader. Ruminating Reader adds a second pass of attention and a novel information fusion component to the Bi-Directional Attention Flow model (BiDAF). We propose novel layer structures that construct a query aware context vector representation and fuse encoding representation with intermediate representation on top of BiDAF model. We show that a multi-hop attention mechanism can be applied to a bi-directional attention structure. In experiments on SQuAD, we find that the Reader outperforms the BiDAF baseline by 2.1 F1 score and 2.7 EM score. Our analysis shows that different hops of the attention have different responsibilities in selecting answers.</abstract>
       <url hash="cc2f3f26">W18-2601</url>
@@ -3904,7 +3904,7 @@
     <paper id="2">
       <title>Systematic Error Analysis of the <fixed-case>S</fixed-case>tanford Question Answering Dataset</title>
       <author><first>Marc-Antoine</first><last>Rondeau</last></author>
-      <author><first>T. J.</first><last>Hazen</last></author>
+      <author id="timothy-j-hazen"><first>T. J.</first><last>Hazen</last></author>
       <pages>12–20</pages>
       <abstract>We analyzed the outputs of multiple question answering (QA) models applied to the Stanford Question Answering Dataset (SQuAD) to identify the core challenges for QA systems on this data set. Through an iterative process, challenging aspects were hypothesized through qualitative analysis of the common error cases. A classifier was then constructed to predict whether SQuAD test examples were likely to be difficult for systems to answer based on features associated with the hypothesized aspects. The classifier’s performance was used to accept or reject each aspect as an indicator of difficulty. With this approach, we ensured that our hypotheses were systematically tested and not simply accepted based on our pre-existing biases. Our explanations are not accepted based on human evaluation of individual examples. This process also enabled us to identify the primary QA strategy learned by the models, i.e., systems determined the acceptable answer type for a question and then selected the acceptable answer span of that type containing the highest density of words present in the question within its local vicinity in the passage.</abstract>
       <url hash="0d86359d">W18-2602</url>
@@ -3915,7 +3915,7 @@
     <paper id="3">
       <title>A Multi-Stage Memory Augmented Neural Network for Machine Reading Comprehension</title>
       <author><first>Seunghak</first><last>Yu</last></author>
-      <author><first>Sathish Reddy</first><last>Indurthi</last></author>
+      <author id="sathish-reddy-indurthi"><first>Sathish Reddy</first><last>Indurthi</last></author>
       <author><first>Seohyun</first><last>Back</last></author>
       <author><first>Haejun</first><last>Lee</last></author>
       <pages>21–30</pages>
@@ -3960,7 +3960,7 @@
       <title>Robust and Scalable Differentiable Neural Computer for Question Answering</title>
       <author><first>Jörg</first><last>Franke</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>47–59</pages>
       <abstract>Deep learning models are often not easily adaptable to new tasks and require task-specific adjustments. The differentiable neural computer (DNC), a memory-augmented neural network, is designed as a general problem solver which can be used in a wide range of tasks. But in reality, it is hard to apply this model to new tasks. We analyze the DNC and identify possible improvements within the application of question answering. This motivates a more robust and scalable DNC (rsDNC). The objective precondition is to keep the general character of this model intact while making its application more reliable and speeding up its required training time. The rsDNC is distinguished by a more robust training, a slim memory unit and a bidirectional architecture. We not only achieve new state-of-the-art performance on the bAbI task, but also minimize the performance variance between different initializations. Furthermore, we demonstrate the simplified applicability of the rsDNC to new tasks with passable results on the CNN RC task without adaptions.</abstract>
       <url hash="335bb7e5">W18-2606</url>
@@ -3976,12 +3976,12 @@
       <author><first>Rajarshi</first><last>Das</last></author>
       <author><first>Andrew</first><last>McCallum</last></author>
       <author><first>Maria</first><last>Chang</last></author>
-      <author><first>Achille</first><last>Fokoue-Nkoutche</last></author>
+      <author id="achille-fokoue-nkoutche"><first>Achille</first><last>Fokoue-Nkoutche</last></author>
       <author><first>Pavan</first><last>Kapanipathi</last></author>
       <author><first>Nicholas</first><last>Mattei</last></author>
       <author><first>Ryan</first><last>Musa</last></author>
       <author><first>Kartik</first><last>Talamadupula</last></author>
-      <author><first>Michael</first><last>Witbrock</last></author>
+      <author id="michael-j-witbrock"><first>Michael</first><last>Witbrock</last></author>
       <pages>60–70</pages>
       <abstract>The recent work of Clark et al. (2018) introduces the AI2 Reasoning Challenge (ARC) and the associated ARC dataset that partitions open domain, complex science questions into easy and challenge sets. That paper includes an analysis of 100 questions with respect to the types of knowledge and reasoning required to answer them; however, it does not include clear definitions of these types, nor does it offer information about the quality of the labels. We propose a comprehensive set of definitions of knowledge and reasoning types necessary for answering the questions in the ARC dataset. Using ten annotators and a sophisticated annotation interface, we analyze the distribution of labels across the challenge set and statistics related to them. Additionally, we demonstrate that although naive information retrieval methods return sentences that are irrelevant to answering the query, sufficient supporting text is often present in the (ARC) corpus. Evaluating with human-selected relevant sentences improves the performance of a neural machine comprehension model by 42 points.</abstract>
       <url hash="efc23f6f">W18-2607</url>
@@ -4018,7 +4018,7 @@
       <title>Comparative Analysis of Neural <fixed-case>QA</fixed-case> models on <fixed-case>SQ</fixed-case>u<fixed-case>AD</fixed-case></title>
       <author><first>Soumya</first><last>Wadhwa</last></author>
       <author><first>Khyathi</first><last>Chandu</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <pages>89–97</pages>
       <abstract>The task of Question Answering has gained prominence in the past few decades for testing the ability of machines to understand natural language. Large datasets for Machine Reading have led to the development of neural models that cater to deeper language understanding compared to information retrieval tasks. Different components in these neural architectures are intended to tackle different challenges. As a first step towards achieving generalization across multiple domains, we attempt to understand and compare the peculiarities of existing end-to-end neural models on the Stanford Question Answering Dataset (SQuAD) by performing quantitative as well as qualitative analysis of the results attained by each of them. We observed that prediction errors reflect certain model-specific biases, which we further discuss in this paper.</abstract>
       <url hash="d677451f">W18-2610</url>
@@ -4045,7 +4045,7 @@
       <url hash="86864c40">W18-27</url>
       <editor><first>Alexandra</first><last>Birch</last></editor>
       <editor><first>Andrew</first><last>Finch</last></editor>
-      <editor><first>Thang</first><last>Luong</last></editor>
+      <editor id="minh-thang-luong"><first>Thang</first><last>Luong</last></editor>
       <editor><first>Graham</first><last>Neubig</last></editor>
       <editor><first>Yusuke</first><last>Oda</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -4086,8 +4086,8 @@
       <title>Iterative Back-Translation for Neural Machine Translation</title>
       <author><first>Vu Cong Duy</first><last>Hoang</last></author>
       <author><first>Philipp</first><last>Koehn</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <pages>18–24</pages>
       <abstract>We present iterative back-translation, a method for generating increasingly better synthetic parallel data from monolingual data to train neural machine translation systems. Our proposed method is very simple yet effective and highly applicable in practice. We demonstrate improvements in neural machine translation quality in both high and low resourced scenarios, including the best reported BLEU scores for the WMT 2017 German↔English tasks.</abstract>
       <url hash="d58eedc3">W18-2703</url>
@@ -4097,7 +4097,7 @@
     <paper id="4">
       <title>Inducing Grammars with and for Neural Machine Translation</title>
       <author><first>Yonatan</first><last>Bisk</last></author>
-      <author><first>Ke</first><last>Tran</last></author>
+      <author id="ke-m-tran"><first>Ke</first><last>Tran</last></author>
       <pages>25–35</pages>
       <abstract>Machine translation systems require semantic knowledge and grammatical understanding. Neural machine translation (NMT) systems often assume this information is captured by an attention mechanism and a decoder that ensures fluency. Recent work has shown that incorporating explicit syntax alleviates the burden of modeling both types of knowledge. However, requiring parses is expensive and does not explore the question of what syntax a model needs during translation. To address both of these issues we introduce a model that simultaneously translates while inducing dependency trees. In this way, we leverage the benefits of structure while investigating what syntax NMT must induce to maximize performance. We show that our dependency trees are 1. language pair dependent and 2. improve translation quality.</abstract>
       <url hash="5beda686">W18-2704</url>
@@ -4131,7 +4131,7 @@
       <title>Enhancement of Encoder and Attention Using Target Monolingual Corpora in Neural Machine Translation</title>
       <author><first>Kenji</first><last>Imamura</last></author>
       <author><first>Atsushi</first><last>Fujita</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>55–63</pages>
       <abstract>A large-scale parallel corpus is required to train encoder-decoder neural machine translation. The method of using synthetic parallel texts, in which target monolingual corpora are automatically translated into source sentences, is effective in improving the decoder, but is unreliable for enhancing the encoder. In this paper, we propose a method that enhances the encoder and attention using target monolingual corpora by generating multiple source sentences via sampling. By using multiple source sentences, diversity close to that of humans is achieved. Our experimental results show that the translation quality is improved by increasing the number of synthetic source sentences for each given target sentence, and quality close to that using a manually created parallel corpus was achieved.</abstract>
       <url hash="1ee0cf05">W18-2707</url>
@@ -4184,9 +4184,9 @@
     </paper>
     <paper id="12">
       <title>Towards one-shot learning for rare-word translation with external experts</title>
-      <author><first>Ngoc-Quan</first><last>Pham</last></author>
+      <author id="ngoc-quan-pham"><first>Ngoc-Quan</first><last>Pham</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
-      <author><first>Alexander</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alexander</first><last>Waibel</last></author>
       <pages>100–109</pages>
       <abstract>Neural machine translation (NMT) has significantly improved the quality of automatic translation models. One of the main challenges in current systems is the translation of rare words. We present a generic approach to address this weakness by having external models annotate the training data as Experts, and control the model-expert interaction with a pointer network and reinforcement learning. Our experiments using phrase-based models to simulate Experts to complement neural machine translation models show that the model can be trained to copy the annotations into the output consistently. We demonstrate the benefit of our proposed framework in outof domain translation scenarios with only lexical resources, improving more than 1.0 BLEU point in both translation directions English-Spanish and German-English.</abstract>
       <url hash="8375caf0">W18-2712</url>
@@ -4196,7 +4196,7 @@
     <paper id="13">
       <title><fixed-case>NICT</fixed-case> Self-Training Approach to Neural Machine Translation at <fixed-case>NMT</fixed-case>-2018</title>
       <author><first>Kenji</first><last>Imamura</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>110–115</pages>
       <abstract>This paper describes the NICT neural machine translation system submitted at the NMT-2018 shared task. A characteristic of our approach is the introduction of self-training. Since our self-training does not change the model structure, it does not influence the efficiency of translation, such as the translation speed. The experimental results showed that the translation quality improved not only in the sequence-to-sequence (seq-to-seq) models but also in the transformer models.</abstract>
       <url hash="e7969b21">W18-2713</url>
@@ -4207,7 +4207,7 @@
       <title>Fast Neural Machine Translation Implementation</title>
       <author><first>Hieu</first><last>Hoang</last></author>
       <author><first>Tomasz</first><last>Dwojak</last></author>
-      <author><first>Rihards</first><last>Krislauks</last></author>
+      <author id="rihards-krislauks"><first>Rihards</first><last>Krislauks</last></author>
       <author><first>Daniel</first><last>Torregrosa</last></author>
       <author><first>Kenneth</first><last>Heafield</last></author>
       <pages>116–121</pages>
@@ -4223,8 +4223,8 @@
       <author><first>Bo</first><last>Wang</last></author>
       <author><first>Guillaume</first><last>Klein</last></author>
       <author><first>Jean-Pierre</first><last>Ramatchandirin</last></author>
-      <author><first>Josep</first><last>Crego</last></author>
-      <author><first>Alexander</first><last>Rush</last></author>
+      <author id="josep-m-crego"><first>Josep</first><last>Crego</last></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last></author>
       <pages>122–128</pages>
       <abstract>We present a system description of the OpenNMT Neural Machine Translation entry for the WNMT 2018 evaluation. In this work, we developed a heavily optimized NMT inference model targeting a high-performance CPU system. The final system uses a combination of four techniques, all of them lead to significant speed-ups in combination: (a) sequence distillation, (b) architecture modifications, (c) precomputation, particularly of vocabulary, and (d) CPU targeted quantization. This work achieves the fastest performance of the shared task, and led to the development of new features that have been integrated to OpenNMT and available to the community.</abstract>
       <url hash="d80e7751">W18-2715</url>
@@ -4265,11 +4265,11 @@
     </frontmatter>
     <paper id="1">
       <title>Predicting Brain Activation with <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Embeddings</title>
-      <author><first>João</first><last>António Rodrigues</last></author>
+      <author id="joao-rodrigues"><first>João</first><last>António Rodrigues</last></author>
       <author><first>Ruben</first><last>Branco</last></author>
-      <author><first>João</first><last>Silva</last></author>
+      <author id="joao-silva"><first>João</first><last>Silva</last></author>
       <author><first>Chakaveh</first><last>Saedi</last></author>
-      <author><first>António</first><last>Branco</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
       <pages>1–5</pages>
       <abstract>The task of taking a semantic representation of a noun and predicting the brain activity triggered by it in terms of fMRI spatial patterns was pioneered by Mitchell et al. 2008. That seminal work used word co-occurrence features to represent the meaning of the nouns. Even though the task does not impose any specific type of semantic representation, the vast majority of subsequent approaches resort to feature-based models or to semantic spaces (aka word embeddings). We address this task, with competitive results, by using instead a semantic network to encode lexical semantics, thus providing further evidence for the cognitive plausibility of this approach to model lexical meaning.</abstract>
       <url hash="d85ac56b">W18-2801</url>
@@ -4290,7 +4290,7 @@
     <paper id="3">
       <title>Language Production Dynamics with Recurrent Neural Networks</title>
       <author><first>Jesús</first><last>Calvillo</last></author>
-      <author><first>Matthew</first><last>Crocker</last></author>
+      <author id="matthew-crocker"><first>Matthew</first><last>Crocker</last></author>
       <pages>17–26</pages>
       <abstract>We present an analysis of the internal mechanism of the recurrent neural model of sentence production presented by Calvillo et al. (2016). The results show clear patterns of computation related to each layer in the network allowing to infer an algorithmic account, where the semantics activates the semantically related words, then each word generated at each time step activates syntactic and semantic constraints on possible continuations, while the recurrence preserves information through time. We propose that such insights could generalize to other models with similar architecture, including some used in computational linguistics for language modeling, machine translation and image caption generation.</abstract>
       <url hash="68ad3a4e">W18-2803</url>
@@ -4323,7 +4323,7 @@
     <paper id="6">
       <title>Affordances in Grounded Language Learning</title>
       <author><first>Stephen</first><last>McGregor</last></author>
-      <author><first>KyungTae</first><last>Lim</last></author>
+      <author id="kyungtae-lim"><first>KyungTae</first><last>Lim</last></author>
       <pages>41–46</pages>
       <abstract>We present a novel methodology involving mappings between different modes of semantic representation. We propose distributional semantic models as a mechanism for representing the kind of world knowledge inherent in the system of abstract symbols characteristic of a sophisticated community of language users. Then, motivated by insight from ecological psychology, we describe a model approximating affordances, by which we mean a language learner’s direct perception of opportunities for action in an environment. We present a preliminary experiment involving mapping between these two representational modalities, and propose that our methodology can become the basis for a cognitively inspired model of grounded language learning.</abstract>
       <url hash="7bc5207a">W18-2806</url>
@@ -4345,7 +4345,7 @@
       <author><first>Jixing</first><last>Li</last></author>
       <author><first>Murielle</first><last>Fabre</last></author>
       <author><first>Wen-Ming</first><last>Luh</last></author>
-      <author><first>John</first><last>Hale</last></author>
+      <author id="john-hale"><first>John</first><last>Hale</last></author>
       <pages>56–64</pages>
       <abstract>The current study examined the role of syntactic structure during pronoun resolution. We correlated complexity measures derived by the syntax-sensitive Hobbs algorithm and a neural network model for pronoun resolution with brain activity of participants listening to an audiobook during fMRI recording. Compared to the neural network model, the Hobbs algorithm is associated with larger clusters of brain activation in a network including the left Broca’s area.</abstract>
       <url hash="646f2eb4">W18-2808</url>
@@ -4355,7 +4355,7 @@
     <paper id="9">
       <title>A Sound and Complete Left-Corner Parsing for <fixed-case>M</fixed-case>inimalist <fixed-case>G</fixed-case>rammars</title>
       <author><first>Miloš</first><last>Stanojević</last></author>
-      <author><first>Edward</first><last>Stabler</last></author>
+      <author id="edward-stabler"><first>Edward</first><last>Stabler</last></author>
       <pages>65–74</pages>
       <abstract>This paper presents a left-corner parser for minimalist grammars. The relation between the parser and the grammar is transparent in the sense that there is a very simple 1-1 correspondence between derivations and parses. Like left-corner context-free parsers, left-corner minimalist parsers can be non-terminating when the grammar has empty left corners, so an easily computed left-corner oracle is defined to restrict the search.</abstract>
       <url hash="42a6d16a">W18-2809</url>
@@ -4367,12 +4367,12 @@
     <meta>
       <booktitle>Proceedings of the Workshop on the Relevance of Linguistic Structure in Neural Architectures for <fixed-case>NLP</fixed-case></booktitle>
       <url hash="2f44f94b">W18-29</url>
-      <editor><first>Georgiana</first><last>Dinu</last></editor>
+      <editor id="georgiana-dinu"><first>Georgiana</first><last>Dinu</last></editor>
       <editor><first>Miguel</first><last>Ballesteros</last></editor>
-      <editor><first>Avirup</first><last>Sil</last></editor>
-      <editor><first>Sam</first><last>Bowman</last></editor>
+      <editor id="avirup-sil"><first>Avirup</first><last>Sil</last></editor>
+      <editor id="samuel-bowman"><first>Sam</first><last>Bowman</last></editor>
       <editor><first>Wael</first><last>Hamza</last></editor>
-      <editor><first>Anders</first><last>Sogaard</last></editor>
+      <editor id="anders-sogaard"><first>Anders</first><last>Sogaard</last></editor>
       <editor><first>Tahira</first><last>Naseem</last></editor>
       <editor><first>Yoav</first><last>Goldberg</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -4451,7 +4451,7 @@
     <paper id="7">
       <title>Syntactic Dependency Representations in Neural Relation Classification</title>
       <author><first>Farhad</first><last>Nooralahzadeh</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <pages>47–53</pages>
       <abstract>We investigate the use of different syntactic dependency representations in a neural relation classification task and compare the CoNLL, Stanford Basic and Universal Dependencies schemes. We further compare with a syntax-agnostic approach and perform an error analysis in order to gain a better understanding of the results.</abstract>
       <url hash="87b73bff">W18-2907</url>
@@ -4468,9 +4468,9 @@
       <editor><first>He</first><last>He</last></editor>
       <editor><first>Felix</first><last>Hill</last></editor>
       <editor><first>Spandana</first><last>Gella</last></editor>
-      <editor><first>Jamie</first><last>Kiros</last></editor>
+      <editor id="jamie-kiros"><first>Jamie</first><last>Kiros</last></editor>
       <editor><first>Hongyuan</first><last>Mei</last></editor>
-      <editor><first>Dipendra</first><last>Misra</last></editor>
+      <editor id="dipendra-misra"><first>Dipendra</first><last>Misra</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Melbourne, Australia</address>
       <month>July</month>
@@ -4485,7 +4485,7 @@
       <title>Corpus Specificity in <fixed-case>LSA</fixed-case> and Word2vec: The Role of Out-of-Domain Documents</title>
       <author><first>Edgar</first><last>Altszyler</last></author>
       <author><first>Mariano</first><last>Sigman</last></author>
-      <author><first>Diego</first><last>Fernández Slezak</last></author>
+      <author id="diego-fernandez-slezak"><first>Diego</first><last>Fernández Slezak</last></author>
       <pages>1–10</pages>
       <url hash="d91027db">W18-3001</url>
       <abstract>Despite the popularity of word embeddings, the precise way by which they acquire semantic relations between words remain unclear. In the present article, we investigate whether LSA and word2vec capacity to identify relevant semantic relations increases with corpus size. One intuitive hypothesis is that the capacity to identify relevant associations should increase as the amount of data increases. However, if corpus size grows in topics which are not specific to the domain of interest, signal to noise ratio may weaken. Here we investigate the effect of corpus specificity and size in word-embeddings, and for this, we study two ways for progressive elimination of documents: the elimination of random documents vs. the elimination of documents unrelated to a specific task. We show that word2vec can take advantage of all the documents, obtaining its best performance when it is trained with the whole corpus. On the contrary, the specialization (removal of out-of-domain documents) of the training corpus, accompanied by a decrease of dimensionality, can increase LSA word-representation quality while speeding up the processing time. From a cognitive-modeling point of view, we point out that LSA’s word-knowledge acquisitions may not be efficiently exploiting higher-order co-occurrences and global relations, whereas word2vec does.</abstract>
@@ -4614,8 +4614,8 @@
     <paper id="13">
       <title>Evaluating Word Embeddings in Multi-label Classification Using Fine-Grained Name Typing</title>
       <author><first>Yadollah</first><last>Yaghoobzadeh</last></author>
-      <author><first>Katharina</first><last>Kann</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>101–106</pages>
       <url hash="d1b6c506">W18-3013</url>
       <abstract>Embedding models typically associate each word with a single real-valued vector, representing its different properties. Evaluation methods, therefore, need to analyze the accuracy and completeness of these properties in embeddings. This requires fine-grained analysis of embedding subspaces. Multi-label classification is an appropriate way to do so. We propose a new evaluation method for word embeddings based on multi-label classification given a word embedding. The task we use is fine-grained name typing: given a large corpus, find all types that a name can refer to based on the name embedding. Given the scale of entities in knowledge bases, we can build datasets for this task that are complementary to the current embedding evaluation datasets in: they are very large, contain fine-grained classes, and allow the direct evaluation of embeddings without confounding factors like sentence context.</abstract>
@@ -4638,9 +4638,9 @@
     <paper id="16">
       <title><fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Embeddings</title>
       <author><first>Chakaveh</first><last>Saedi</last></author>
-      <author><first>António</first><last>Branco</last></author>
-      <author><first>João</first><last>António Rodrigues</last></author>
-      <author><first>João</first><last>Silva</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
+      <author id="joao-rodrigues"><first>João</first><last>António Rodrigues</last></author>
+      <author id="joao-silva"><first>João</first><last>Silva</last></author>
       <pages>122–131</pages>
       <url hash="b3c64b73">W18-3016</url>
       <abstract>Semantic networks and semantic spaces have been two prominent approaches to represent lexical semantics. While a unified account of the lexical meaning relies on one being able to convert between these representations, in both directions, the conversion direction from semantic networks into semantic spaces started to attract more attention recently. In this paper we present a methodology for this conversion and assess it with a case study. When it is applied over WordNet, the performance of the resulting embeddings in a mainstream semantic similarity task is very good, substantially superior to the performance of word embeddings based on very large collections of texts like word2vec.</abstract>
@@ -4693,7 +4693,7 @@
     <paper id="21">
       <title>Limitations of Cross-Lingual Learning from Image Search</title>
       <author><first>Mareike</first><last>Hartmann</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>159–163</pages>
       <url hash="915670a8">W18-3021</url>
       <abstract>Cross-lingual representation learning is an important step in making NLP scale to all the world’s languages. Previous work on bilingual lexicon induction suggests that it is possible to learn cross-lingual representations of words based on similarities between images associated with these words. However, that work focused (almost exclusively) on the translation of nouns only. Here, we investigate whether the meaning of other parts-of-speech (POS), in particular adjectives and verbs, can be learned in the same way. Our experiments across five language pairs indicate that previous work does not scale to the problem of learning cross-lingual representations beyond simple nouns.</abstract>
@@ -4731,11 +4731,11 @@
     </paper>
     <paper id="24">
       <title><fixed-case>LSTM</fixed-case>s Exploit Linguistic Attributes of Data</title>
-      <author><first>Nelson F.</first><last>Liu</last></author>
+      <author id="nelson-f-liu"><first>Nelson F.</first><last>Liu</last></author>
       <author><first>Omer</first><last>Levy</last></author>
       <author><first>Roy</first><last>Schwartz</last></author>
       <author><first>Chenhao</first><last>Tan</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>180–186</pages>
       <url hash="688f6ec6">W18-3024</url>
       <abstract>While recurrent neural networks have found success in a variety of natural language processing applications, they are general models of sequential data. We investigate how the properties of natural language data affect an LSTM’s ability to learn a nonlinguistic task: recalling elements from its input. We find that models trained on natural language data are able to recall tokens from much longer sequences than models trained on non-language sequential data. Furthermore, we show that the LSTM learns to solve the memorization task by explicitly using a subset of its neurons to count timesteps in the input. We hypothesize that the patterns and structure in natural language data enable LSTMs to learn by providing approximate ways of reducing loss, but understanding the effect of different training data on the learnability of LSTMs remains an open question.</abstract>
@@ -4754,8 +4754,8 @@
     <paper id="26">
       <title>Jointly Embedding Entities and Text with Distant Supervision</title>
       <author><first>Denis</first><last>Newman-Griffis</last></author>
-      <author><first>Albert M</first><last>Lai</last></author>
-      <author><first>Eric</first><last>Fosler-Lussier</last></author>
+      <author id="albert-m-lai"><first>Albert M</first><last>Lai</last></author>
+      <author id="eric-fosler-lussier"><first>Eric</first><last>Fosler-Lussier</last></author>
       <pages>195–206</pages>
       <url hash="c8c19c09">W18-3026</url>
       <abstract>Learning representations for knowledge base entities and concepts is becoming increasingly important for NLP applications. However, recent entity embedding methods have relied on structured resources that are expensive to create for new domains and corpora. We present a distantly-supervised method for jointly learning embeddings of entities and text from an unnanotated corpus, using only a list of mappings between entities and surface forms. We learn embeddings from open-domain and biomedical corpora, and compare against prior methods that rely on human-annotated text or large knowledge graph structure. Our embeddings capture entity similarity and relatedness better than prior work, both in existing biomedical datasets and a new Wikipedia-based dataset that we release to the community. Results on analogy completion and entity sense disambiguation indicate that entities and words capture complementary information that can be effectively combined for downstream use.</abstract>
@@ -4789,8 +4789,8 @@
       <booktitle>Proceedings of the First Workshop on Economics and Natural Language Processing</booktitle>
       <url hash="5c6ee647">W18-31</url>
       <editor><first>Udo</first><last>Hahn</last></editor>
-      <editor><first>Véronique</first><last>Hoste</last></editor>
-      <editor><first>Ming-Feng</first><last>Tsai</last></editor>
+      <editor id="veronique-hoste"><first>Véronique</first><last>Hoste</last></editor>
+      <editor id="ming-feng-tsai"><first>Ming-Feng</first><last>Tsai</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Melbourne, Australia</address>
       <month>July</month>
@@ -4828,8 +4828,8 @@
     </paper>
     <paper id="3">
       <title>A Corpus of Corporate Annual and Social Responsibility Reports: 280 Million Tokens of Balanced Organizational Writing</title>
-      <author><first>Sebastian G.M.</first><last>Händschke</last></author>
-      <author><first>Sven</first><last>Buechel</last></author>
+      <author id="sebastian-g-m-handschke"><first>Sebastian G.M.</first><last>Händschke</last></author>
+      <author id="sven-buechel"><first>Sven</first><last>Buechel</last></author>
       <author><first>Jan</first><last>Goldenstein</last></author>
       <author><first>Philipp</first><last>Poschmann</last></author>
       <author><first>Tinghui</first><last>Duan</last></author>
@@ -4844,7 +4844,7 @@
     <paper id="4">
       <title>Word Embeddings-Based Uncertainty Detection in Financial Disclosures</title>
       <author><first>Christoph Kilian</first><last>Theil</last></author>
-      <author><first>Sanja</first><last>Štajner</last></author>
+      <author id="sanja-stajner"><first>Sanja</first><last>Štajner</last></author>
       <author><first>Heiner</first><last>Stuckenschmidt</last></author>
       <pages>32–37</pages>
       <abstract>In this paper, we use NLP techniques to detect linguistic uncertainty in financial disclosures. Leveraging general-domain and domain-specific word embedding models, we automatically expand an existing dictionary of uncertainty triggers. We furthermore examine how an expert filtering affects the quality of such an expansion. We show that the dictionary expansions significantly improve regressions on stock return volatility. Lastly, we prove that the expansions significantly boost the automatic detection of uncertain sentences.</abstract>
@@ -4854,8 +4854,8 @@
     </paper>
     <paper id="5">
       <title>A Simple End-to-End Question Answering Model for Product Information</title>
-      <author><first>Tuan</first><last>Lai</last></author>
-      <author><first>Trung</first><last>Bui</last></author>
+      <author id="tuan-lai"><first>Tuan</first><last>Lai</last></author>
+      <author id="trung-bui"><first>Trung</first><last>Bui</last></author>
       <author><first>Sheng</first><last>Li</last></author>
       <author><first>Nedim</first><last>Lipka</last></author>
       <pages>38–43</pages>
@@ -4919,8 +4919,8 @@
       <editor><first>Fahad</first><last>AlGhamdi</last></editor>
       <editor><first>Victor</first><last>Soto</last></editor>
       <editor><first>Thamar</first><last>Solorio</last></editor>
-      <editor><first>Mona</first><last>Diab</last></editor>
-      <editor><first>Julia</first><last>Hirschberg</last></editor>
+      <editor id="mona-diab"><first>Mona</first><last>Diab</last></editor>
+      <editor id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Melbourne, Australia</address>
       <month>July</month>
@@ -4970,11 +4970,11 @@
       <author><first>Khyathi</first><last>Chandu</last></author>
       <author><first>Ekaterina</first><last>Loginova</last></author>
       <author><first>Vishal</first><last>Gupta</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
-      <author><first>Günter</first><last>Neumann</last></author>
-      <author><first>Manoj</first><last>Chinnakotla</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
-      <author><first>Alan W.</first><last>Black</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
+      <author id="gunter-neumann"><first>Günter</first><last>Neumann</last></author>
+      <author id="manoj-chinnakotla"><first>Manoj</first><last>Chinnakotla</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
+      <author id="alan-w-black"><first>Alan W.</first><last>Black</last></author>
       <pages>29–38</pages>
       <abstract>Code-Mixing (CM) is the phenomenon of alternating between two or more languages which is prevalent in bi- and multi-lingual communities. Most NLP applications today are still designed with the assumption of a single interaction language and are most likely to break given a CM utterance with multiple languages mixed at a morphological, phrase or sentence level. For example, popular commercial search engines do not yet fully understand the intents expressed in CM queries. As a first step towards fostering research which supports CM in NLP applications, we systematically crowd-sourced and curated an evaluation dataset for factoid question answering in three CM languages - Hinglish (Hindi+English), Tenglish (Telugu+English) and Tamlish (Tamil+English) which belong to two language families (Indo-Aryan and Dravidian). We share the details of our data collection process, techniques which were used to avoid inducing lexical bias amongst the crowd workers and other CM specific linguistic properties of the dataset. Our final dataset, which is available freely for research purposes, has 1,694 Hinglish, 2,848 Tamlish and 1,391 Tenglish factoid questions and their answers. We discuss the techniques used by the participants for the first edition of this ongoing challenge.</abstract>
       <url hash="b657849e">W18-3204</url>
@@ -4984,8 +4984,8 @@
     <paper id="5">
       <title>Transliteration Better than Translation? Answering Code-mixed Questions over a Knowledge Base</title>
       <author><first>Vishal</first><last>Gupta</last></author>
-      <author><first>Manoj</first><last>Chinnakotla</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manoj-chinnakotla"><first>Manoj</first><last>Chinnakotla</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>39–50</pages>
       <abstract>Humans can learn multiple languages. If they know a fact in one language, they can answer a question in another language they understand. They can also answer Code-mix (CM) questions: questions which contain both languages. This behavior is attributed to the unique learning ability of humans. Our task aims to study if machines can achieve this. We demonstrate how effectively a machine can answer CM questions. In this work, we adopt a two phase approach: candidate generation and candidate re-ranking to answer questions. We propose a Triplet-Siamese-Hybrid CNN (TSHCNN) to re-rank candidate answers. We show experiments on the SimpleQuestions dataset. Our network is trained only on English questions provided in this dataset and noisy Hindi translations of these questions and can answer English-Hindi CM questions effectively without the need of translation into English. Back-transliterated CM questions outperform their lexical and sentence level translated counterparts by 5% &amp; 35% in accuracy respectively, highlighting the efficacy of our approach in a resource constrained setting.</abstract>
       <url hash="cbe78c08">W18-3205</url>
@@ -5005,7 +5005,7 @@
     </paper>
     <paper id="7">
       <title>Code-Switching Language Modeling using Syntax-Aware Multi-Task Learning</title>
-      <author><first>Genta Indra</first><last>Winata</last></author>
+      <author id="genta-indra-winata"><first>Genta Indra</first><last>Winata</last></author>
       <author><first>Andrea</first><last>Madotto</last></author>
       <author><first>Chien-Sheng</first><last>Wu</last></author>
       <author><first>Pascale</first><last>Fung</last></author>
@@ -5017,7 +5017,7 @@
     </paper>
     <paper id="8">
       <title>Predicting the presence of a Matrix Language in code-switching</title>
-      <author><first>Barbara</first><last>Bullock</last></author>
+      <author id="barbara-bullock"><first>Barbara</first><last>Bullock</last></author>
       <author><first>Wally</first><last>Guzmán</last></author>
       <author><first>Jacqueline</first><last>Serigos</last></author>
       <author><first>Vivek</first><last>Sharath</last></author>
@@ -5032,7 +5032,7 @@
       <title>Automatic Detection of Code-switching Style from Acoustics</title>
       <author><first>SaiKrishna</first><last>Rallabandi</last></author>
       <author><first>Sunayana</first><last>Sitaram</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <pages>76–81</pages>
       <abstract>Multilingual speakers switch between languages in an non-trivial fashion displaying inter sentential, intra sentential, and congruent lexicalization based transitions. While monolingual ASR systems may be capable of recognizing a few words from a foreign language, they are usually not robust enough to handle these varied styles of code-switching. There is also a lack of large code-switched speech corpora capturing all these styles making it difficult to build code-switched speech recognition systems. We hypothesize that it may be useful for an ASR system to be able to first detect the switching style of a particular utterance from acoustics, and then use specialized language models or other adaptation techniques for decoding the speech. In this paper, we look at the first problem of detecting code-switching style from acoustics. We classify code-switched Spanish-English and Hindi-English corpora using two metrics and show that features extracted from acoustics alone can distinguish between different kinds of code-switching in these language pairs.</abstract>
       <url hash="ace6292c">W18-3209</url>
@@ -5055,7 +5055,7 @@
       <author><first>Khyathi</first><last>Chandu</last></author>
       <author><first>Thomas</first><last>Manzini</last></author>
       <author><first>Sumeet</first><last>Singh</last></author>
-      <author><first>Alan W.</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W.</first><last>Black</last></author>
       <pages>92–97</pages>
       <abstract>Code-switching (CS), the practice of alternating between two or more languages in conversations, is pervasive in most multi-lingual communities. CS texts have a complex interplay between languages and occur in informal contexts that make them harder to collect and construct NLP tools for. We approach this problem through Language Modeling (LM) on a new Hindi-English mixed corpus containing 59,189 unique sentences collected from blogging websites. We implement and discuss different Language Models derived from a multi-layered LSTM architecture. We hypothesize that encoding language information strengthens a language model by helping to learn code-switching points. We show that our highest performing model achieves a test perplexity of 19.52 on the CS corpus that we collected and processed. On this data we demonstrate that our performance is an improvement over AWD-LSTM LM (a recent state of the art on monolingual English).</abstract>
       <url hash="f1a5efc7">W18-3211</url>
@@ -5088,7 +5088,7 @@
     </paper>
     <paper id="14">
       <title>Bilingual Character Representation for Efficiently Addressing Out-of-Vocabulary Words in Code-Switching Named Entity Recognition</title>
-      <author><first>Genta Indra</first><last>Winata</last></author>
+      <author id="genta-indra-winata"><first>Genta Indra</first><last>Winata</last></author>
       <author><first>Chien-Sheng</first><last>Wu</last></author>
       <author><first>Andrea</first><last>Madotto</last></author>
       <author><first>Pascale</first><last>Fung</last></author>
@@ -5100,9 +5100,9 @@
     </paper>
     <paper id="15">
       <title>Named Entity Recognition on Code-Switched Data Using Conditional Random Fields</title>
-      <author><first>Utpal Kumar</first><last>Sikdar</last></author>
+      <author id="utpal-kumar-sikdar"><first>Utpal Kumar</first><last>Sikdar</last></author>
       <author><first>Biswanath</first><last>Barik</last></author>
-      <author><first>Björn</first><last>Gambäck</last></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gambäck</last></author>
       <pages>115–119</pages>
       <abstract>Named Entity Recognition is an important information extraction task that identifies proper names in unstructured texts and classifies them into some pre-defined categories. Identification of named entities in code-mixed social media texts is a more difficult and challenging task as the contexts are short, ambiguous and often noisy. This work proposes a Conditional Random Fields based named entity recognition system to identify proper names in code-switched data and classify them into nine categories. The system ranked fifth among nine participant systems and achieved a 59.25% F1-score.</abstract>
       <url hash="becb701d">W18-3215</url>
@@ -5114,8 +5114,8 @@
       <author><first>Florian</first><last>Janke</last></author>
       <author><first>Tongrui</first><last>Li</last></author>
       <author><first>Eric</first><last>Rincón</last></author>
-      <author><first>Gualberto</first><last>Guzmán</last></author>
-      <author><first>Barbara</first><last>Bullock</last></author>
+      <author id="gualberto-a-guzman"><first>Gualberto</first><last>Guzmán</last></author>
+      <author id="barbara-bullock"><first>Barbara</first><last>Bullock</last></author>
       <author><first>Almeida Jacqueline</first><last>Toribio</last></author>
       <pages>120–125</pages>
       <abstract>This paper describes the system for the Named Entity Recognition Shared Task of the Third Workshop on Computational Approaches to Linguistic Code-Switching (CALCS) submitted by the Bilingual Annotations Tasks (BATs) research group of the University of Texas. Our system uses several features to train a Conditional Random Field (CRF) model for classifying input words as Named Entities (NEs) using the Inside-Outside-Beginning (IOB) tagging scheme. We participated in the Modern Standard Arabic-Egyptian Arabic (MSA-EGY) and English-Spanish (ENG-SPA) tasks, achieving weighted average F-scores of 65.62 and 54.16 respectively. We also describe the performance of a deep neural network (NN) trained on a subset of the CRF features, which did not surpass CRF performance.</abstract>
@@ -5127,7 +5127,7 @@
       <title>Tackling Code-Switched <fixed-case>NER</fixed-case>: Participation of <fixed-case>CMU</fixed-case></title>
       <author><first>Parvathy</first><last>Geetha</last></author>
       <author><first>Khyathi</first><last>Chandu</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <pages>126–131</pages>
       <abstract>Named Entity Recognition plays a major role in several downstream applications in NLP. Though this task has been heavily studied in formal monolingual texts and also noisy texts like Twitter data, it is still an emerging task in code-switched (CS) content on social media. This paper describes our participation in the shared task of NER on code-switched data for Spanglish (Spanish + English) and Arabish (Arabic + English). In this paper we describe models that intuitively developed from the data for the shared task Named Entity Recognition on Code-switched Data. Owing to the sparse and non-linear relationships between words in Twitter data, we explored neural architectures that are capable of non-linearities fairly well. In specific, we trained character level models and word level models based on Bidirectional LSTMs (Bi-LSTMs) to perform sequential tagging. We train multiple models to identify nominal mentions and subsequently use this information to predict the labels of named entity in a sequence. Our best model is a character level model along with word level pre-trained multilingual embeddings that gave an F-score of 56.72 in Spanglish and a word level model that gave an F-score of 65.02 in Arabish on the test data.</abstract>
       <url hash="774f1960">W18-3217</url>
@@ -5163,7 +5163,7 @@
       <title><fixed-case>IIT</fixed-case> (<fixed-case>BHU</fixed-case>) Submission for the <fixed-case>ACL</fixed-case> Shared Task on Named Entity Recognition on Code-switched Data</title>
       <author><first>Shashwat</first><last>Trivedi</last></author>
       <author><first>Harsh</first><last>Rangwani</last></author>
-      <author><first>Anil</first><last>Kumar Singh</last></author>
+      <author id="anil-kumar-singh"><first>Anil</first><last>Kumar Singh</last></author>
       <pages>148–153</pages>
       <abstract>This paper describes the best performing system for the shared task on Named Entity Recognition (NER) on code-switched data for the language pair Spanish-English (ENG-SPA). We introduce a gated neural architecture for the NER task. Our final model achieves an F1 score of 63.76%, outperforming the baseline by 10%.</abstract>
       <url hash="a871a842">W18-3220</url>
@@ -5229,7 +5229,7 @@
     <paper id="3">
       <title>Multimodal Relational Tensor Network for Sentiment and Emotion Classification</title>
       <author><first>Saurav</first><last>Sahay</last></author>
-      <author><first>Shachi H</first><last>Kumar</last></author>
+      <author id="shachi-h-kumar"><first>Shachi H</first><last>Kumar</last></author>
       <author><first>Rui</first><last>Xia</last></author>
       <author><first>Jonathan</first><last>Huang</last></author>
       <author><first>Lama</first><last>Nachman</last></author>
@@ -5266,7 +5266,7 @@
       <title>Polarity and Intensity: the Two Aspects of Sentiment Analysis</title>
       <author><first>Leimin</first><last>Tian</last></author>
       <author><first>Catherine</first><last>Lai</last></author>
-      <author><first>Johanna</first><last>Moore</last></author>
+      <author id="johanna-d-moore"><first>Johanna</first><last>Moore</last></author>
       <pages>40–47</pages>
       <abstract>Current multimodal sentiment analysis frames sentiment score prediction as a general Machine Learning task. However, what the sentiment score actually represents has often been overlooked. As a measurement of opinions and affective states, a sentiment score generally consists of two aspects: polarity and intensity. We decompose sentiment scores into these two aspects and study how they are conveyed through individual modalities and combined multimodal models in a naturalistic monologue setting. In particular, we build unimodal and multimodal multi-task learning models with sentiment score prediction as the main task and polarity and/or intensity classification as the auxiliary tasks. Our experiments show that sentiment analysis benefits from multi-task learning, and individual modalities differ when conveying the polarity and intensity aspects of sentiment.</abstract>
       <url hash="02d10eb6">W18-3306</url>
@@ -5313,7 +5313,7 @@
     <meta>
       <booktitle>Proceedings of the Workshop on Deep Learning Approaches for Low-Resource <fixed-case>NLP</fixed-case></booktitle>
       <url hash="6d8e4ccb">W18-34</url>
-      <editor><first>Reza</first><last>Haffari</last></editor>
+      <editor id="gholamreza-haffari"><first>Reza</first><last>Haffari</last></editor>
       <editor><first>Colin</first><last>Cherry</last></editor>
       <editor><first>George</first><last>Foster</last></editor>
       <editor><first>Shahram</first><last>Khadivi</last></editor>
@@ -5330,11 +5330,11 @@
     </frontmatter>
     <paper id="1">
       <title>Character-level Supervision for Low-resource <fixed-case>POS</fixed-case> Tagging</title>
-      <author><first>Katharina</first><last>Kann</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
       <author><first>Johannes</first><last>Bjerva</last></author>
       <author><first>Isabelle</first><last>Augenstein</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>1–11</pages>
       <abstract>Neural part-of-speech (POS) taggers are known to not perform well with little training data. As a step towards overcoming this problem, we present an architecture for learning more robust neural POS taggers by jointly training a hierarchical, recurrent model and a recurrent character-based sequence-to-sequence network supervised using an auxiliary objective. This way, we introduce stronger character-level supervision into the model, which enables better generalization to unseen words and provides regularization, making our encoding less prone to overfitting. We experiment with three auxiliary tasks: lemmatization, character-based word autoencoding, and character-based random string autoencoding. Experiments with minimal amounts of labeled data on 34 languages show that our new architecture outperforms a single-task baseline and, surprisingly, that, on average, raw text autoencoding can be as beneficial for low-resource POS tagging as using lemma information. Our neural POS tagger closes the gap to a state-of-the-art POS tagger (MarMoT) for low-resource scenarios by 43%, even outperforming it on languages with templatic morphology, e.g., Arabic, Hebrew, and Turkish, by some margin.</abstract>
       <url hash="74e3c27a">W18-3401</url>
@@ -5354,7 +5354,7 @@
     <paper id="3">
       <title>Multi-task learning for historical text normalization: Size matters</title>
       <author><first>Marcel</first><last>Bollmann</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <author><first>Joachim</first><last>Bingel</last></author>
       <pages>19–24</pages>
       <abstract>Historical text normalization suffers from small datasets that exhibit high variance, and previous work has shown that multi-task learning can be used to leverage data from related problems in order to obtain more robust models. Previous work has been limited to datasets from a specific language and a specific historical period, and it is not clear whether results generalize. It therefore remains an open problem, when historical text normalization benefits from multi-task learning. We explore the benefits of multi-task learning across 10 different datasets, representing different languages and periods. Our main finding—contrary to what has been observed for other NLP tasks—is that multi-task learning mainly works when target task data is very scarce.</abstract>
@@ -5375,7 +5375,7 @@
     </paper>
     <paper id="5">
       <title>Multimodal Neural Machine Translation for Low-resource Language Pairs using Synthetic Data</title>
-      <author><first>Koel</first><last>Dutta Chowdhury</last></author>
+      <author id="koel-dutta-chowdhury"><first>Koel</first><last>Dutta Chowdhury</last></author>
       <author><first>Mohammed</first><last>Hasanuzzaman</last></author>
       <author><first>Qun</first><last>Liu</last></author>
       <pages>33–42</pages>
@@ -5401,7 +5401,7 @@
     </paper>
     <paper id="7">
       <title>Domain Adapted Word Embeddings for Improved Sentiment Classification</title>
-      <author><first>Prathusha</first><last>Kameswara Sarma</last></author>
+      <author id="prathusha-kameswara-sarma"><first>Prathusha</first><last>Kameswara Sarma</last></author>
       <author><first>Yingyu</first><last>Liang</last></author>
       <author><first>Bill</first><last>Sethares</last></author>
       <pages>51–59</pages>
@@ -5425,7 +5425,7 @@
       <title>Semi-Supervised Learning with Auxiliary Evaluation Component for Large Scale e-Commerce Text Classification</title>
       <author><first>Mingkuan</first><last>Liu</last></author>
       <author><first>Musen</first><last>Wen</last></author>
-      <author><first>Selcuk</first><last>Kopru</last></author>
+      <author id="selcuk-kopru"><first>Selcuk</first><last>Kopru</last></author>
       <author><first>Xianjing</first><last>Liu</last></author>
       <author><first>Alan</first><last>Lu</last></author>
       <pages>68–76</pages>
@@ -5437,7 +5437,7 @@
     </paper>
     <paper id="10">
       <title>Low-rank passthrough neural networks</title>
-      <author><first>Antonio Valerio</first><last>Miceli Barone</last></author>
+      <author id="antonio-valerio-miceli-barone"><first>Antonio Valerio</first><last>Miceli Barone</last></author>
       <pages>77–86</pages>
       <abstract>Various common deep learning architectures, such as LSTMs, GRUs, Resnets and Highway Networks, employ state passthrough connections that support training with high feed-forward depth or recurrence over many time steps. These “Passthrough Networks” architectures also enable the decoupling of the network state size from the number of parameters of the network, a possibility has been studied by Sak et al. (2014) with their low-rank parametrization of the LSTM. In this work we extend this line of research, proposing effective, low-rank and low-rank plus diagonal matrix parametrizations for Passthrough Networks which exploit this decoupling property, reducing the data complexity and memory requirements of the network while preserving its memory capacity. This is particularly beneficial in low-resource settings as it supports expressive models with a compact parametrization less susceptible to overfitting. We present competitive experimental results on several tasks, including language modeling and a near state of the art result on sequential randomly-permuted MNIST classification, a hard task on natural data.</abstract>
       <url hash="80aabebc">W18-3410</url>
@@ -5464,9 +5464,9 @@
     <paper id="1">
       <title>Sociolinguistic Corpus of <fixed-case>W</fixed-case>hats<fixed-case>A</fixed-case>pp Chats in <fixed-case>S</fixed-case>panish among College Students</title>
       <author><first>Alejandro</first><last>Dorantes</last></author>
-      <author><first>Gerardo</first><last>Sierra</last></author>
+      <author id="gerardo-sierra"><first>Gerardo</first><last>Sierra</last></author>
       <author><first>Tlauhlia Yamín</first><last>Donohue Pérez</last></author>
-      <author><first>Gemma</first><last>Bel-Enguix</last></author>
+      <author id="gemma-bel-enguix"><first>Gemma</first><last>Bel-Enguix</last></author>
       <author><first>Mónica</first><last>Jasso Rosales</last></author>
       <pages>1–6</pages>
       <abstract>This work presents the Sociolinguistic Corpus of WhatsApp Chats in Spanish among College Students, a corpus of raw data for general use. Its purpose is to offer data for the study of of language and interactions via Instant Messaging (IM) among bachelors. Our paper consists of an overview of both the corpus’s content and demographic metadata. Furthermore, it presents the current research being conducted with it —namely parenthetical expressions, orality traits, and code-switching. This work also includes a brief outline of similar corpora and recent studies in the field of IM.</abstract>
@@ -5502,7 +5502,7 @@
     <paper id="4">
       <title>Detecting Offensive Tweets in <fixed-case>H</fixed-case>indi-<fixed-case>E</fixed-case>nglish Code-Switched Language</title>
       <author><first>Puneet</first><last>Mathur</last></author>
-      <author><first>Rajiv</first><last>Shah</last></author>
+      <author id="rajiv-shah"><first>Rajiv</first><last>Shah</last></author>
       <author><first>Ramit</first><last>Sawhney</last></author>
       <author><first>Debanjan</first><last>Mahata</last></author>
       <pages>18–26</pages>
@@ -5587,9 +5587,9 @@
       <title>Improving Classification of <fixed-case>T</fixed-case>witter Behavior During Hurricane Events</title>
       <author><first>Kevin</first><last>Stowe</last></author>
       <author><first>Jennings</first><last>Anderson</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Leysia</first><last>Palen</last></author>
-      <author><first>Ken</first><last>Anderson</last></author>
+      <author id="kenneth-m-anderson"><first>Ken</first><last>Anderson</last></author>
       <pages>67–75</pages>
       <abstract>A large amount of social media data is generated during natural disasters, and identifying the relevant portions of this data is critical for researchers attempting to understand human behavior, the effects of information sources, and preparatory actions undertaken during these events. In order to classify human behavior during hazard events, we employ machine learning for two tasks: identifying hurricane related tweets and classifying user evacuation behavior during hurricanes. We show that feature-based and deep learning methods provide different benefits for tweet classification, and ensemble-based methods using linguistic, temporal, and geospatial features can effectively classify user behavior.</abstract>
       <url hash="cd5fa784">W18-3512</url>
@@ -5612,7 +5612,7 @@
       <booktitle>Proceedings of the First Workshop on Multilingual Surface Realisation</booktitle>
       <url hash="e41634a6">W18-36</url>
       <editor><first>Simon</first><last>Mille</last></editor>
-      <editor><first>Anja</first><last>Belz</last></editor>
+      <editor id="anja-belz"><first>Anja</first><last>Belz</last></editor>
       <editor><first>Bernd</first><last>Bohnet</last></editor>
       <editor><first>Emily</first><last>Pitler</last></editor>
       <editor><first>Leo</first><last>Wanner</last></editor>
@@ -5666,9 +5666,9 @@
     </paper>
     <paper id="4">
       <title>Surface Realization Shared Task 2018 (<fixed-case>SR</fixed-case>18): The <fixed-case>T</fixed-case>ilburg <fixed-case>U</fixed-case>niversity Approach</title>
-      <author><first>Thiago</first><last>Castro Ferreira</last></author>
+      <author id="thiago-castro-ferreira"><first>Thiago</first><last>Castro Ferreira</last></author>
       <author><first>Sander</first><last>Wubben</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <pages>35–38</pages>
       <abstract>This study describes the approach developed by the Tilburg University team to the shallow task of the Multilingual Surface Realization Shared Task 2018 (SR18). Based on (Castro Ferreira et al., 2017), the approach works by first preprocessing an input dependency tree into an ordered linearized string, which is then realized using a statistical machine translation model. Our approach shows promising results, with BLEU scores above 50 for 5 different languages (English, French, Italian, Portuguese and Spanish) and above 35 for the Dutch language.</abstract>
       <url hash="fbb7bb06">W18-3604</url>
@@ -5677,8 +5677,8 @@
     </paper>
     <paper id="5">
       <title>The <fixed-case>OSU</fixed-case> Realizer for <fixed-case>SRST</fixed-case> ‘18: Neural Sequence-to-Sequence Inflection and Incremental Locality-Based Linearization</title>
-      <author><first>David</first><last>King</last></author>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="david-king"><first>David</first><last>King</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <pages>39–48</pages>
       <abstract>Surface realization is a nontrivial task as it involves taking structured data and producing grammatically and semantically correct utterances. Many competing grammar-based and statistical models for realization still struggle with relatively simple sentences. For our submission to the 2018 Surface Realization Shared Task, we tackle the shallow task by first generating inflected wordforms with a neural sequence-to-sequence model before incrementally linearizing them. For linearization, we use a global linear model trained using early update that makes use of features that take into account the dependency structure and dependency locality. Using this pipeline sufficed to produce surprisingly strong results in the shared task. In future work, we intend to pursue joint approaches to linearization and morphological inflection and incorporating a neural language model into the linearization choices.</abstract>
       <url hash="5afc5aa4">W18-3605</url>
@@ -5688,7 +5688,7 @@
     <paper id="6">
       <title>Generating High-Quality Surface Realizations Using Data Augmentation and Factored Sequence Models</title>
       <author><first>Henry</first><last>Elder</last></author>
-      <author><first>Chris</first><last>Hokamp</last></author>
+      <author id="chris-hokamp"><first>Chris</first><last>Hokamp</last></author>
       <pages>49–53</pages>
       <abstract>This work presents state of the art results in reconstruction of surface realizations from obfuscated text. We identify the lack of sufficient training data as the major obstacle to training high-performing models, and solve this issue by generating large amounts of synthetic training data. We also propose preprocessing techniques which make the structure contained in the input features more accessible to sequence models. Our models were ranked first on all evaluation metrics in the English portion of the 2018 Surface Realization shared task.</abstract>
       <url hash="182fa713">W18-3606</url>
@@ -5711,7 +5711,7 @@
     </paper>
     <paper id="8">
       <title><fixed-case>NILC</fixed-case>-<fixed-case>SWORNEMO</fixed-case> at the Surface Realization Shared Task: Exploring Syntax-Based Word Ordering using Neural Models</title>
-      <author><first>Marco Antonio</first><last>Sobrevilla Cabezudo</last></author>
+      <author id="marco-antonio-sobrevilla-cabezudo"><first>Marco Antonio</first><last>Sobrevilla Cabezudo</last></author>
       <author><first>Thiago</first><last>Pardo</last></author>
       <pages>58–64</pages>
       <abstract>This paper describes the submission by the NILC Computational Linguistics research group of the University of São Paulo/Brazil to the Track 1 of the Surface Realization Shared Task (SRST Track 1). We present a neural-based method that works at the syntactic level to order the words (which we refer by NILC-SWORNEMO, standing for “Syntax-based Word ORdering using NEural MOdels”). Additionally, we apply a bottom-up approach to build the sentence and, using language-specific lexicons, we produce the proper word form of each lemma in the sentence. The results obtained by our method outperformed the average of the results for English, Portuguese and Spanish in the track.</abstract>
@@ -5722,7 +5722,7 @@
     <paper id="9">
       <title>The <fixed-case>D</fixed-case>ip<fixed-case>I</fixed-case>nfo-<fixed-case>U</fixed-case>ni<fixed-case>T</fixed-case>o system for <fixed-case>SRST</fixed-case> 2018</title>
       <author><first>Valerio</first><last>Basile</last></author>
-      <author><first>Alessandro</first><last>Mazzei</last></author>
+      <author id="alessandro-mazzei"><first>Alessandro</first><last>Mazzei</last></author>
       <pages>65–71</pages>
       <abstract>This paper describes the system developed by the DipInfo-UniTo team to participate to the shallow track of the Surface Realization Shared Task 2018. The system employs two separate neural networks with different architectures to predict the word ordering and the morphological inflection independently from each other. The UniTO realizer is language independent, and its simple architecture allowed it to be scored in the central part of the final ranking of the shared task.</abstract>
       <url hash="d368c64a">W18-3609</url>
@@ -5734,7 +5734,7 @@
     <meta>
       <booktitle>Proceedings of the 5th Workshop on Natural Language Processing Techniques for Educational Applications</booktitle>
       <url hash="0c115732">W18-37</url>
-      <editor><first>Yuen-Hsien</first><last>Tseng</last></editor>
+      <editor id="yuen-hsien-tseng"><first>Yuen-Hsien</first><last>Tseng</last></editor>
       <editor><first>Hsin-Hsi</first><last>Chen</last></editor>
       <editor><first>Vincent</first><last>Ng</last></editor>
       <editor><first>Mamoru</first><last>Komachi</last></editor>
@@ -5752,7 +5752,7 @@
       <title>Generating Questions for Reading Comprehension using Coherence Relations</title>
       <author><first>Takshak</first><last>Desai</last></author>
       <author><first>Parag</first><last>Dakle</last></author>
-      <author><first>Dan</first><last>Moldovan</last></author>
+      <author id="dan-moldovan"><first>Dan</first><last>Moldovan</last></author>
       <pages>1–10</pages>
       <abstract>In this paper, we have proposed a technique for generating complex reading comprehension questions from a discourse that are more useful than factual ones derived from assertions. Our system produces a set of general-level questions using coherence relations and a set of well-defined syntactic transformations on the input text. Generated questions evaluate comprehension abilities like a comprehensive analysis of the text and its structure, correct identification of the author’s intent, a thorough evaluation of stated arguments; and a deduction of the high-level semantic relations that hold between text spans. Experiments performed on the RST-DT corpus allow us to conclude that our system possesses a strong aptitude for generating intricate questions. These questions are capable of effectively assessing a student’s interpretation of the text.</abstract>
       <url hash="ba07388b">W18-3701</url>
@@ -5793,7 +5793,7 @@
     <paper id="5">
       <title>Thank “Goodness”! A Way to Measure Style in Student Essays</title>
       <author><first>Sandeep</first><last>Mathias</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>35–41</pages>
       <abstract>Essays have two major components for scoring - content and style. In this paper, we describe a property of the essay, called goodness, and use it to predict the score given for the style of student essays. We compare our approach to solve this problem with baseline approaches, like language modeling and also a state-of-the-art deep learning system. We show that, despite being quite intuitive, our approach is very powerful in predicting the style of the essays.</abstract>
       <url hash="c740c01c">W18-3705</url>
@@ -5899,7 +5899,7 @@
     <paper id="14">
       <title>Joint learning of frequency and word embeddings for multilingual readability assessment</title>
       <author><first>Dieu-Thu</first><last>Le</last></author>
-      <author><first>Cam-Tu</first><last>Nguyen</last></author>
+      <author id="cam-tu-nguyen"><first>Cam-Tu</first><last>Nguyen</last></author>
       <author><first>Xiaoliang</first><last>Wang</last></author>
       <pages>103–107</pages>
       <abstract>This paper describes two models that employ word frequency embeddings to deal with the problem of readability assessment in multiple languages. The task is to determine the difficulty level of a given document, i.e., how hard it is for a reader to fully comprehend the text. The proposed models show how frequency information can be integrated to improve the readability assessment. The experimental results testing on both English and Chinese datasets show that the proposed models improve the results notably when comparing to those using only traditional word embeddings.</abstract>
@@ -5910,7 +5910,7 @@
     <paper id="15">
       <title><fixed-case>MULLE</fixed-case>: A grammar-based <fixed-case>L</fixed-case>atin language learning tool to supplement the classroom setting</title>
       <author><first>Herbert</first><last>Lange</last></author>
-      <author><first>Peter</first><last>Ljunglöf</last></author>
+      <author id="peter-ljunglof"><first>Peter</first><last>Ljunglöf</last></author>
       <pages>108–112</pages>
       <abstract>MULLE is a tool for language learning that focuses on teaching Latin as a foreign language. It is aimed for easy integration into the traditional classroom setting and syllabus, which makes it distinct from other language learning tools that provide standalone learning experience. It uses grammar-based lessons and embraces methods of gamification to improve the learner motivation. The main type of exercise provided by our application is to practice translation, but it is also possible to shift the focus to vocabulary or morphology training.</abstract>
       <url hash="56fa2203">W18-3715</url>
@@ -5919,9 +5919,9 @@
     </paper>
     <paper id="16">
       <title>Textual Features Indicative of Writing Proficiency in Elementary School <fixed-case>S</fixed-case>panish Documents</title>
-      <author><first>Gemma</first><last>Bel-Enguix</last></author>
+      <author id="gemma-bel-enguix"><first>Gemma</first><last>Bel-Enguix</last></author>
       <author><first>Diana</first><last>Dueñas Chávez</last></author>
-      <author><first>Arturo</first><last>Curiel Díaz</last></author>
+      <author id="arturo-curiel"><first>Arturo</first><last>Curiel Díaz</last></author>
       <pages>113–118</pages>
       <abstract>Childhood acquisition of written language is not straightforward. Writing skills evolve differently depending on external factors, such as the conditions in which children practice their productions and the quality of their instructors’ guidance. This can be challenging in low-income areas, where schools may struggle to ensure ideal acquisition conditions. Developing computational tools to support the learning process may counterweight negative environmental influences; however, few work exists on the use of information technologies to improve childhood literacy. This work centers around the computational study of Spanish word and syllable structure in documents written by 2nd and 3rd year elementary school students. The studied texts were compared against a corpus of short stories aimed at the same age group, so as to observe whether the children tend to produce similar written patterns as the ones they are expected to interpret at their literacy level. The obtained results show some significant differences between the two kinds of texts, pointing towards possible strategies for the implementation of new education software in support of written language acquisition.</abstract>
       <url hash="aeb93db0">W18-3716</url>
@@ -5931,7 +5931,7 @@
     <paper id="17">
       <title>Assessment of an Index for Measuring Pronunciation Difficulty</title>
       <author><first>Katsunori</first><last>Kotani</last></author>
-      <author><first>Takehiko</first><last>Yoshimi</last></author>
+      <author id="takehiko-yoshimi"><first>Takehiko</first><last>Yoshimi</last></author>
       <pages>119–124</pages>
       <abstract>This study assesses an index for measur-ing the pronunciation difficulty of sen-tences (henceforth, pronounceability) based on the normalized edit distance from a reference sentence to a transcrip-tion of learners’ pronunciation. Pro-nounceability should be examined when language teachers use a computer-assisted language learning system for pronunciation learning to maintain the motivation of learners. However, unlike the evaluation of learners’ pronunciation performance, previous research did not focus on pronounceability not only for English but also for Asian languages. This study found that the normalized edit distance was reliable but not valid. The lack of validity appeared to be because of an English test used for determining the proficiency of learners.</abstract>
       <url hash="622f175a">W18-3717</url>
@@ -5950,7 +5950,7 @@
     </paper>
     <paper id="19">
       <title>From Fidelity to Fluency: Natural Language Processing for Translator Training</title>
-      <author><first>Oi Yee</first><last>Kwong</last></author>
+      <author id="olivia-o-y-kwong"><first>Oi Yee</first><last>Kwong</last></author>
       <pages>130–134</pages>
       <abstract>This study explores the use of natural language processing techniques to enhance bilingual lexical access beyond simple equivalents, to enable translators to navigate along a wider cross-lingual lexical space and more examples showing different translation strategies, which is essential for them to learn to produce not only faithful but also fluent translations.</abstract>
       <url hash="b34b7137">W18-3719</url>
@@ -5959,7 +5959,7 @@
     </paper>
     <paper id="20">
       <title>Countering Position Bias in Instructor Interventions in <fixed-case>MOOC</fixed-case> Discussion Forums</title>
-      <author><first>Muthu Kumar</first><last>Chandrasekaran</last></author>
+      <author id="muthu-kumar-chandrasekaran"><first>Muthu Kumar</first><last>Chandrasekaran</last></author>
       <author><first>Min-Yen</first><last>Kan</last></author>
       <pages>135–142</pages>
       <abstract>We systematically confirm that instructors are strongly influenced by the user interface presentation of Massive Online Open Course (MOOC) discussion forums. In a large scale dataset, we conclusively show that instructor interventions exhibit strong position bias, as measured by the position where the thread appeared on the user interface at the time of intervention. We measure and remove this bias, enabling unbiased statistical modelling and evaluation. We show that our de-biased classifier improves predicting interventions over the state-of-the-art on courses with sufficient number of interventions by 8.2% in F1 and 24.4% in recall on average.</abstract>
@@ -6047,7 +6047,7 @@
     <paper id="27">
       <title>Detecting Simultaneously <fixed-case>C</fixed-case>hinese Grammar Errors Based on a <fixed-case>B</fixed-case>i<fixed-case>LSTM</fixed-case>-<fixed-case>CRF</fixed-case> Model</title>
       <author><first>Yajun</first><last>Liu</last></author>
-      <author><first>Hongying</first><last>Zan</last></author>
+      <author id="hongying-zan"><first>Hongying</first><last>Zan</last></author>
       <author><first>Mengjie</first><last>Zhong</last></author>
       <author><first>Hongchao</first><last>Ma</last></author>
       <pages>188–193</pages>
@@ -6071,7 +6071,7 @@
       <author><first>Shih-Hung</first><last>Wu</last></author>
       <author><first>Jun-Wei</first><last>Wang</last></author>
       <author><first>Liang-Pu</first><last>Chen</last></author>
-      <author><first>Ping-Che</first><last>Yang</last></author>
+      <author id="ping-che-yang"><first>Ping-Che</first><last>Yang</last></author>
       <pages>199–202</pages>
       <abstract>This paper reports how we build a Chinese Grammatical Error Diagnosis system in the NLPTEA-2018 CGED shared task. In 2018, we sent three runs with three different approaches. The first one is a pattern-based approach by frequent error pattern matching. The second one is a sequential labelling approach by conditional random fields (CRF). The third one is a rewriting approach by sequence to sequence (seq2seq) model. The three approaches have different properties that aim to optimize different performance metrics and the formal run results show the differences as we expected.</abstract>
       <url hash="90a0767d">W18-3729</url>
@@ -6093,10 +6093,10 @@
     <meta>
       <booktitle>Proceedings of the First Workshop on Linguistic Resources for Natural Language Processing</booktitle>
       <url hash="0ac2f8a2">W18-38</url>
-      <editor><first>Peter</first><last>Machonis</last></editor>
+      <editor id="peter-machonis"><first>Peter</first><last>Machonis</last></editor>
       <editor><first>Anabela</first><last>Barreiro</last></editor>
       <editor><first>Kristina</first><last>Kocijan</last></editor>
-      <editor><first>Max</first><last>Silberztein</last></editor>
+      <editor id="max-silberztein"><first>Max</first><last>Silberztein</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Santa Fe, New Mexico, USA</address>
       <month>August</month>
@@ -6109,7 +6109,7 @@
     </frontmatter>
     <paper id="1">
       <title>Corpus Phonetics: Past, Present, and Future</title>
-      <author><first>Mark</first><last>Liberman</last></author>
+      <author id="mark-liberman"><first>Mark</first><last>Liberman</last></author>
       <pages>1</pages>
       <url hash="ceb7736f">W18-3801</url>
       <abstract>Invited talk</abstract>
@@ -6126,7 +6126,7 @@
     <paper id="3">
       <title>Rule-based vs. Neural Net Approaches to Semantic Textual Similarity</title>
       <author><first>Linrui</first><last>Zhang</last></author>
-      <author><first>Dan</first><last>Moldovan</last></author>
+      <author id="dan-moldovan"><first>Dan</first><last>Moldovan</last></author>
       <pages>12–17</pages>
       <url hash="1c050aed">W18-3803</url>
       <abstract>This paper presents a neural net approach to determine Semantic Textual Similarity (STS) using attention-based bidirectional Long Short-Term Memory Networks (Bi-LSTM). To this date, most of the traditional STS systems were rule-based that built on top of excessive use of linguistic features and resources. In this paper, we present an end-to-end attention-based Bi-LSTM neural network system that solely takes word-level features, without expensive feature engineering work or the usage of external resources. By comparing its performance with traditional rule-based systems against SemEval-2012 benchmark, we make an assessment on the limitations and strengths of neural net systems to rule-based systems on Semantic Textual Similarity.</abstract>
@@ -6171,8 +6171,8 @@
     </paper>
     <paper id="8">
       <title><fixed-case>STYLUS</fixed-case>: A Resource for Systematically Derived Language Usage</title>
-      <author><first>Bonnie</first><last>Dorr</last></author>
-      <author><first>Clare</first><last>Voss</last></author>
+      <author id="bonnie-dorr"><first>Bonnie</first><last>Dorr</last></author>
+      <author id="clare-voss"><first>Clare</first><last>Voss</last></author>
       <pages>57–64</pages>
       <url hash="35e9d724">W18-3808</url>
       <abstract>We describe a resource derived through extraction of a set of argument realizations from an existing lexical-conceptual structure (LCS) Verb Database of 500 verb classes (containing a total of 9525 verb entries) to include information about realization of arguments for a range of different verb classes. We demonstrate that our extended resource, called STYLUS (SysTematicallY Derived Language USe), enables systematic derivation of regular patterns of language usage without requiring manual annotation. We posit that both spatially oriented applications such as robot navigation and more general applications such as narrative generation require a layered representation scheme where a set of primitives (often grounded in space/motion such as GO) is coupled with a representation of constraints at the syntax-semantics interface. We demonstrate that the resulting resource covers three cases of lexico-semantic operations applicable to both language understanding and language generation.</abstract>
@@ -6181,7 +6181,7 @@
     <paper id="9">
       <title>Contemporary <fixed-case>A</fixed-case>mharic Corpus: Automatically Morpho-Syntactically Tagged <fixed-case>A</fixed-case>mharic Corpus</title>
       <author><first>Andargachew Mekonnen</first><last>Gezmu</last></author>
-      <author><first>Binyam Ephrem</first><last>Seyoum</last></author>
+      <author id="binyam-ephrem-seyoum"><first>Binyam Ephrem</first><last>Seyoum</last></author>
       <author><first>Michael</first><last>Gasser</last></author>
       <author><first>Andreas</first><last>Nürnberger</last></author>
       <pages>65–70</pages>
@@ -6193,7 +6193,7 @@
       <title>Gold Corpus for Telegraphic Summarization</title>
       <author><first>Chanakya</first><last>Malireddy</last></author>
       <author><first>Srivenkata N M</first><last>Somisetty</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>71–77</pages>
       <url hash="2488ae03">W18-3810</url>
       <abstract>Most extractive summarization techniques operate by ranking all the source sentences and then select the top ranked sentences as the summary. Such methods are known to produce good summaries, especially when applied to news articles and scientific texts. However, they don’t fare so well when applied to texts such as fictional narratives, which don’t have a single central or recurrent theme. This is because usually the information or plot of the story is spread across several sentences. In this paper, we discuss a different summarization technique called Telegraphic Summarization. Here, we don’t select whole sentences, rather pick short segments of text spread across sentences, as the summary. We have tailored a set of guidelines to create such summaries and, using the same, annotate a gold corpus of 200 English short stories.</abstract>
@@ -6210,15 +6210,15 @@
     </paper>
     <paper id="12">
       <title>Parallel Corpora for bi-Directional Statistical Machine Translation for Seven <fixed-case>E</fixed-case>thiopian Language Pairs</title>
-      <author><first>Solomon</first><last>Teferra Abate</last></author>
+      <author id="solomon-teferra-abate"><first>Solomon</first><last>Teferra Abate</last></author>
       <author><first>Michael</first><last>Melese</last></author>
-      <author><first>Martha</first><last>Yifiru Tachbelie</last></author>
+      <author id="martha-yifiru-tachbelie"><first>Martha</first><last>Yifiru Tachbelie</last></author>
       <author><first>Million</first><last>Meshesha</last></author>
       <author><first>Solomon</first><last>Atinafu</last></author>
       <author><first>Wondwossen</first><last>Mulugeta</last></author>
       <author><first>Yaregal</first><last>Assabie</last></author>
       <author><first>Hafte</first><last>Abera</last></author>
-      <author><first>Binyam</first><last>Ephrem</last></author>
+      <author id="binyam-ephrem-seyoum"><first>Binyam</first><last>Ephrem</last></author>
       <author><first>Tewodros</first><last>Abebe</last></author>
       <author><first>Wondimagegnhue</first><last>Tsegaye</last></author>
       <author><first>Amanuel</first><last>Lemma</last></author>
@@ -6232,7 +6232,7 @@
     <paper id="13">
       <title>Using Embeddings to Compare <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et Frames Across Languages</title>
       <author><first>Jennifer</first><last>Sikos</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <pages>91–101</pages>
       <url hash="4d664573">W18-3813</url>
       <abstract>Much interest in Frame Semantics is fueled by the substantial extent of its applicability across languages. At the same time, lexicographic studies have found that the applicability of individual frames can be diminished by cross-lingual divergences regarding polysemy, syntactic valency, and lexicalization. Due to the large effort involved in manual investigations, there are so far no broad-coverage resources with “problematic” frames for any language pair. Our study investigates to what extent multilingual vector representations of frames learned from manually annotated corpora can address this need by serving as a wide coverage source for such divergences. We present a case study for the language pair English — German using the FrameNet and SALSA corpora and find that inferences can be made about cross-lingual frame applicability using a vector space model.</abstract>
@@ -6250,8 +6250,8 @@
     </paper>
     <paper id="15">
       <title>Towards an Automatic Classification of Illustrative Examples in a Large <fixed-case>J</fixed-case>apanese-<fixed-case>F</fixed-case>rench Dictionary Obtained by <fixed-case>OCR</fixed-case></title>
-      <author><first>Christian</first><last>Boitet</last></author>
-      <author><first>Mathieu</first><last>Mangeot</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
+      <author id="mathieu-mangeot"><first>Mathieu</first><last>Mangeot</last></author>
       <author><first>Mutsuko</first><last>Tomokiyo</last></author>
       <pages>112–121</pages>
       <url hash="c93d125d">W18-3815</url>
@@ -6271,7 +6271,7 @@
       <title>Enabling Code-Mixed Translation: Parallel Corpus Creation and <fixed-case>MT</fixed-case> Augmentation Approach</title>
       <author><first>Mrinal</first><last>Dhar</last></author>
       <author><first>Vaibhav</first><last>Kumar</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>131–140</pages>
       <url hash="d5e3b3c2">W18-3817</url>
       <abstract>Code-mixing, use of two or more languages in a single sentence, is ubiquitous; generated by multi-lingual speakers across the world. The phenomenon presents itself prominently in social media discourse. Consequently, there is a growing need for translating code-mixed hybrid language into standard languages. However, due to the lack of gold parallel data, existing machine translation systems fail to properly translate code-mixed text. In an effort to initiate the task of machine translation of code-mixed content, we present a newly created parallel corpus of code-mixed English-Hindi and English. We selected previously available English-Hindi code-mixed data as a starting point for the creation of our parallel corpus. We then chose 4 human translators, fluent in both English and Hindi, for translating the 6088 code-mixed English-Hindi sentences to English. With the help of the created parallel corpus, we analyzed the structure of English-Hindi code-mixed data and present a technique to augment run-of-the-mill machine translation (MT) approaches that can help achieve superior translations without the need for specially designed translation systems. We present an augmentation pipeline for existing MT approaches, like Phrase Based MT (Moses) and Neural MT, to improve the translation of code-mixed text. The augmentation pipeline is presented as a pre-processing step and can be plugged with any existing MT system, which we demonstrate by improving translations done by systems like Moses, Google Neural Machine Translation System (NMTS) and Bing Translator for English-Hindi code-mixed content.</abstract>
@@ -6283,10 +6283,10 @@
       <booktitle>Proceedings of the Fifth Workshop on <fixed-case>NLP</fixed-case> for Similar Languages, Varieties and Dialects (<fixed-case>V</fixed-case>ar<fixed-case>D</fixed-case>ial 2018)</booktitle>
       <url hash="f8a0ab7a">W18-39</url>
       <editor><first>Marcos</first><last>Zampieri</last></editor>
-      <editor><first>Preslav</first><last>Nakov</last></editor>
+      <editor id="preslav-nakov"><first>Preslav</first><last>Nakov</last></editor>
       <editor><first>Nikola</first><last>Ljubešić</last></editor>
-      <editor><first>Jörg</first><last>Tiedemann</last></editor>
-      <editor><first>Shervin</first><last>Malmasi</last></editor>
+      <editor id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></editor>
+      <editor id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></editor>
       <editor><first>Ahmed</first><last>Ali</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Santa Fe, New Mexico, USA</address>
@@ -6305,16 +6305,16 @@
       <author><first>Preslav</first><last>Nakov</last></author>
       <author><first>Ahmed</first><last>Ali</last></author>
       <author><first>Suwon</first><last>Shon</last></author>
-      <author><first>James</first><last>Glass</last></author>
+      <author id="james-glass"><first>James</first><last>Glass</last></author>
       <author><first>Yves</first><last>Scherrer</last></author>
-      <author><first>Tanja</first><last>Samardžić</last></author>
+      <author id="tanja-samardzic"><first>Tanja</first><last>Samardžić</last></author>
       <author><first>Nikola</first><last>Ljubešić</last></author>
       <author><first>Jörg</first><last>Tiedemann</last></author>
       <author><first>Chris</first><last>van der Lee</last></author>
       <author><first>Stefan</first><last>Grondelaers</last></author>
       <author><first>Nelleke</first><last>Oostdijk</last></author>
       <author><first>Dirk</first><last>Speelman</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <author><first>Ritesh</first><last>Kumar</last></author>
       <author><first>Bornini</first><last>Lahiri</last></author>
       <author><first>Mayank</first><last>Jain</last></author>
@@ -6326,9 +6326,9 @@
     <paper id="2">
       <title>Encoder-Decoder Methods for Text Normalization</title>
       <author><first>Massimo</first><last>Lusetti</last></author>
-      <author><first>Tatyana</first><last>Ruzsics</last></author>
-      <author><first>Anne</first><last>Göhring</last></author>
-      <author><first>Tanja</first><last>Samardžić</last></author>
+      <author id="tatyana-ruzsics"><first>Tatyana</first><last>Ruzsics</last></author>
+      <author id="anne-gohring"><first>Anne</first><last>Göhring</last></author>
+      <author id="tanja-samardzic"><first>Tanja</first><last>Samardžić</last></author>
       <author><first>Elisabeth</first><last>Stark</last></author>
       <pages>18–28</pages>
       <url hash="f8580db9">W18-3902</url>
@@ -6348,7 +6348,7 @@
     </paper>
     <paper id="4">
       <title>Sub-label dependencies for Neural Morphological Tagging – The Joint Submission of <fixed-case>U</fixed-case>niversity of <fixed-case>C</fixed-case>olorado and <fixed-case>U</fixed-case>niversity of <fixed-case>H</fixed-case>elsinki for <fixed-case>V</fixed-case>ar<fixed-case>D</fixed-case>ial 2018</title>
-      <author><first>Miikka</first><last>Silfverberg</last></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last></author>
       <author><first>Senka</first><last>Drobac</last></author>
       <pages>37–45</pages>
       <url hash="0eb5b796">W18-3904</url>
@@ -6377,7 +6377,7 @@
       <title>Iterative Language Model Adaptation for <fixed-case>I</fixed-case>ndo-<fixed-case>A</fixed-case>ryan Language Identification</title>
       <author><first>Tommi</first><last>Jauhiainen</last></author>
       <author><first>Heidi</first><last>Jauhiainen</last></author>
-      <author><first>Krister</first><last>Lindén</last></author>
+      <author id="krister-linden"><first>Krister</first><last>Lindén</last></author>
       <pages>66–75</pages>
       <url hash="176078c2">W18-3907</url>
       <abstract>This paper presents the experiments and results obtained by the SUKI team in the Indo-Aryan Language Identification shared task of the VarDial 2018 Evaluation Campaign. The shared task was an open one, but we did not use any corpora other than what was distributed by the organizers. A total of eight teams provided results for this shared task. Our submission using a HeLI-method based language identifier with iterative language model adaptation obtained the best results in the shared task with a macro F1-score of 0.958.</abstract>
@@ -6385,7 +6385,7 @@
     </paper>
     <paper id="8">
       <title>Language and the Shifting Sands of Domain, Space and Time (Invited Talk)</title>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>76</pages>
       <url hash="78b0d40a">W18-3908</url>
       <abstract>In this talk, I will first present recent work on domain debiasing in the context of language identification, then discuss a new line of work on language variety analysis in the form of dialect map generation. Finally, I will reflect on the interplay between time and space on language variation, and speculate on how these can be captured in a single model.</abstract>
@@ -6393,7 +6393,7 @@
     </paper>
     <paper id="9">
       <title><fixed-case>U</fixed-case>nibuc<fixed-case>K</fixed-case>ernel Reloaded: First Place in <fixed-case>A</fixed-case>rabic Dialect Identification for the Second Year in a Row</title>
-      <author><first>Andrei</first><last>Butnaru</last></author>
+      <author id="andrei-butnaru"><first>Andrei</first><last>Butnaru</last></author>
       <author><first>Radu Tudor</first><last>Ionescu</last></author>
       <pages>77–87</pages>
       <url hash="34902736">W18-3909</url>
@@ -6404,7 +6404,7 @@
       <title>Varying image description tasks: spoken versus written descriptions</title>
       <author><first>Emiel</first><last>van Miltenburg</last></author>
       <author><first>Ruud</first><last>Koolen</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <pages>88–100</pages>
       <url hash="e154c772">W18-3910</url>
       <abstract>Automatic image description systems are commonly trained and evaluated on written image descriptions. At the same time, these systems are often used to provide spoken descriptions (e.g. for visually impaired users) through apps like TapTapSee or Seeing AI. This is not a problem, as long as spoken and written descriptions are very similar. However, linguistic research suggests that spoken language often differs from written language. These differences are not regular, and vary from context to context. Therefore, this paper investigates whether there are differences between written and spoken image descriptions, even if they are elicited through similar tasks. We compare descriptions produced in two languages (English and Dutch), and in both languages observe substantial differences between spoken and written descriptions. Future research should see if users prefer the spoken over the written style and, if so, aim to emulate spoken descriptions.</abstract>
@@ -6413,7 +6413,7 @@
     <paper id="11">
       <title>Transfer Learning for <fixed-case>B</fixed-case>ritish <fixed-case>S</fixed-case>ign <fixed-case>L</fixed-case>anguage Modelling</title>
       <author><first>Boris</first><last>Mocialov</last></author>
-      <author><first>Helen</first><last>Hastie</last></author>
+      <author id="helen-hastie"><first>Helen</first><last>Hastie</last></author>
       <author><first>Graham</first><last>Turner</last></author>
       <pages>101–110</pages>
       <url hash="e7bbe47c">W18-3911</url>
@@ -6440,8 +6440,8 @@
     <paper id="14">
       <title>Neural Network Architectures for <fixed-case>A</fixed-case>rabic Dialect Identification</title>
       <author><first>Elise</first><last>Michon</last></author>
-      <author><first>Minh Quang</first><last>Pham</last></author>
-      <author><first>Josep</first><last>Crego</last></author>
+      <author id="minh-quang-pham"><first>Minh Quang</first><last>Pham</last></author>
+      <author id="josep-m-crego"><first>Josep</first><last>Crego</last></author>
       <author><first>Jean</first><last>Senellart</last></author>
       <pages>128–136</pages>
       <url hash="c8978aff">W18-3914</url>
@@ -6452,7 +6452,7 @@
       <title><fixed-case>H</fixed-case>e<fixed-case>LI</fixed-case>-based Experiments in Discriminating Between <fixed-case>D</fixed-case>utch and <fixed-case>F</fixed-case>lemish Subtitles</title>
       <author><first>Tommi</first><last>Jauhiainen</last></author>
       <author><first>Heidi</first><last>Jauhiainen</last></author>
-      <author><first>Krister</first><last>Lindén</last></author>
+      <author id="krister-linden"><first>Krister</first><last>Lindén</last></author>
       <pages>137–144</pages>
       <url hash="175aeb5e">W18-3915</url>
       <abstract>This paper presents the experiments and results obtained by the SUKI team in the Discriminating between Dutch and Flemish in Subtitles shared task of the VarDial 2018 Evaluation Campaign. Our best submission was ranked 8th, obtaining macro F1-score of 0.61. Our best results were produced by a language identifier implementing the HeLI method without any modifications. We describe, in addition to the best method we used, some of the experiments we did with unsupervised clustering.</abstract>
@@ -6460,9 +6460,9 @@
     </paper>
     <paper id="16">
       <title>Measuring language distance among historical varieties using perplexity. Application to <fixed-case>E</fixed-case>uropean <fixed-case>P</fixed-case>ortuguese.</title>
-      <author><first>Jose Ramom</first><last>Pichel Campos</last></author>
+      <author id="jose-ramom-pichel-campos"><first>Jose Ramom</first><last>Pichel Campos</last></author>
       <author><first>Pablo</first><last>Gamallo</last></author>
-      <author><first>Iñaki</first><last>Alegria</last></author>
+      <author id="inaki-alegria"><first>Iñaki</first><last>Alegria</last></author>
       <pages>145–155</pages>
       <url hash="bf7308db">W18-3916</url>
       <abstract>The objective of this work is to quantify, with a simple and robust measure, the distance between historical varieties of a language. The measure will be inferred from text corpora corresponding to historical periods. Different approaches have been proposed for similar aims: Language Identification, Phylogenetics, Historical Linguistics or Dialectology. In our approach, we used a perplexity-based measure to calculate language distance between all the historical periods of a specific language: European Portuguese. Perplexity has also proven to be a robust metric to calculate distance between languages. However, this measure has not been tested yet to identify diachronic periods within the historical evolution of a specific language. For this purpose, a historical Portuguese corpus has been constructed from different open sources containing texts with close original spelling. The results of our experiments show that Portuguese keeps an important degree of homogeneity over time. We anticipate this metric to be a starting point to be applied to other languages.</abstract>
@@ -6494,11 +6494,11 @@
     </paper>
     <paper id="20">
       <title>Discriminating between <fixed-case>I</fixed-case>ndo-<fixed-case>A</fixed-case>ryan Languages Using <fixed-case>SVM</fixed-case> Ensembles</title>
-      <author><first>Alina Maria</first><last>Ciobanu</last></author>
+      <author id="alina-maria-ciobanu"><first>Alina Maria</first><last>Ciobanu</last></author>
       <author><first>Marcos</first><last>Zampieri</last></author>
       <author><first>Shervin</first><last>Malmasi</last></author>
       <author><first>Santanu</first><last>Pal</last></author>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <pages>178–184</pages>
       <url hash="7c780955">W18-3920</url>
       <abstract>In this paper we present a system based on SVM ensembles trained on characters and words to discriminate between five similar languages of the Indo-Aryan family: Hindi, Braj Bhasha, Awadhi, Bhojpuri, and Magahi. The system competed in the Indo-Aryan Language Identification (ILI) shared task organized within the VarDial Evaluation Campaign 2018. Our best entry in the competition, named ILIdentification, scored 88.95% F1 score and it was ranked 3rd out of 8 teams.</abstract>
@@ -6509,7 +6509,7 @@
       <author><first>Divyanshu</first><last>Gupta</last></author>
       <author><first>Gourav</first><last>Dhakad</last></author>
       <author><first>Jayprakash</first><last>Gupta</last></author>
-      <author><first>Anil Kumar</first><last>Singh</last></author>
+      <author id="anil-kumar-singh"><first>Anil Kumar</first><last>Singh</last></author>
       <pages>185–190</pages>
       <url hash="18e94f2d">W18-3921</url>
       <abstract>Text language Identification is a Natural Language Processing task of identifying and recognizing a given language out of many different languages from a piece of text. This paper describes our submission to the ILI 2018 shared-task, which includes the identification of 5 closely related Indo-Aryan languages. We developed a word-level LSTM(Long Short-term Memory) model, a specific type of Recurrent Neural Network model, for this task. Given a sentence, our model embeds each word of the sentence and convert into its trainable word embedding, feeds them into our LSTM network and finally predict the language. We obtained an F1 macro score of 0.836, ranking 5th in the task.</abstract>
@@ -6518,7 +6518,7 @@
     <paper id="22">
       <title>Exploring Classifier Combinations for Language Variety Identification</title>
       <author><first>Tim</first><last>Kreutz</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>191–198</pages>
       <url hash="f333a394">W18-3922</url>
       <abstract>This paper describes CLiPS’s submissions for the Discriminating between Dutch and Flemish in Subtitles (DFS) shared task at VarDial 2018. We explore different ways to combine classifiers trained on different feature groups. Our best system uses two Linear SVM classifiers; one trained on lexical features (word n-grams) and one trained on syntactic features (PoS n-grams). The final prediction for a document to be in Flemish Dutch or Netherlandic Dutch is made by the classifier that outputs the highest probability for one of the two labels. This confidence vote approach outperforms a meta-classifier on the development data and on the test data.</abstract>
@@ -6526,7 +6526,7 @@
     </paper>
     <paper id="23">
       <title>Identification of Differences between <fixed-case>D</fixed-case>utch Language Varieties with the <fixed-case>V</fixed-case>ar<fixed-case>D</fixed-case>ial2018 <fixed-case>D</fixed-case>utch-<fixed-case>F</fixed-case>lemish Subtitle Data</title>
-      <author><first>Hans</first><last>van Halteren</last></author>
+      <author id="hans-van-halteren"><first>Hans</first><last>van Halteren</last></author>
       <author><first>Nelleke</first><last>Oostdijk</last></author>
       <pages>199–209</pages>
       <url hash="0babc3e7">W18-3923</url>
@@ -6547,8 +6547,8 @@
       <author><first>Fernando</first><last>Benites</last></author>
       <author><first>Ralf</first><last>Grubenmann</last></author>
       <author><first>Pius</first><last>von Däniken</last></author>
-      <author><first>Dirk</first><last>von Grünigen</last></author>
-      <author><first>Jan</first><last>Deriu</last></author>
+      <author id="dirk-von-gruenigen"><first>Dirk</first><last>von Grünigen</last></author>
+      <author id="jan-milan-deriu"><first>Jan</first><last>Deriu</last></author>
       <author><first>Mark</first><last>Cieliebak</last></author>
       <pages>218–227</pages>
       <url hash="9e5aed24">W18-3925</url>
@@ -6579,7 +6579,7 @@
       <title>When Simple n-gram Models Outperform Syntactic Approaches: Discriminating between <fixed-case>D</fixed-case>utch and <fixed-case>F</fixed-case>lemish</title>
       <author><first>Martin</first><last>Kroon</last></author>
       <author><first>Masha</first><last>Medvedeva</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>244–253</pages>
       <url hash="60196067">W18-3928</url>
       <abstract>In this paper we present the results of our participation in the Discriminating between Dutch and Flemish in Subtitles VarDial 2018 shared task. We try techniques proven to work well for discriminating between language varieties as well as explore the potential of using syntactic features, i.e. hierarchical syntactic subtrees. We experiment with different combinations of features. Discriminating between these two languages turned out to be a very hard task, not only for a machine: human performance is only around 0.51 F1 score; our best system is still a simple Naive Bayes model with word unigrams and bigrams. The system achieved an F1 score (macro) of 0.62, which ranked us 4th in the shared task.</abstract>
@@ -6589,7 +6589,7 @@
       <title><fixed-case>H</fixed-case>e<fixed-case>LI</fixed-case>-based Experiments in <fixed-case>S</fixed-case>wiss <fixed-case>G</fixed-case>erman Dialect Identification</title>
       <author><first>Tommi</first><last>Jauhiainen</last></author>
       <author><first>Heidi</first><last>Jauhiainen</last></author>
-      <author><first>Krister</first><last>Lindén</last></author>
+      <author id="krister-linden"><first>Krister</first><last>Lindén</last></author>
       <pages>254–262</pages>
       <url hash="c89a1948">W18-3929</url>
       <abstract>In this paper we present the experiments and results by the SUKI team in the German Dialect Identification shared task of the VarDial 2018 Evaluation Campaign. Our submission using HeLI with adaptive language models obtained the best results in the shared task with a macro F1-score of 0.686, which is clearly higher than the other submitted results. Without some form of unsupervised adaptation on the test set, it might not be possible to reach as high an F1-score with the level of domain difference between the datasets of the shared task. We describe the methods used in detail, as well as some additional experiments carried out during the shared task.</abstract>
@@ -6606,7 +6606,7 @@
     </paper>
     <paper id="31">
       <title>A Neural Approach to Language Variety Translation</title>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
       <author><first>Marcos</first><last>Zampieri</last></author>
       <author><first>Santanu</first><last>Pal</last></author>
       <pages>275–282</pages>
@@ -6624,9 +6624,9 @@
     </paper>
     <paper id="33">
       <title><fixed-case>G</fixed-case>erman Dialect Identification Using Classifier Ensembles</title>
-      <author><first>Alina Maria</first><last>Ciobanu</last></author>
+      <author id="alina-maria-ciobanu"><first>Alina Maria</first><last>Ciobanu</last></author>
       <author><first>Shervin</first><last>Malmasi</last></author>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <pages>288–294</pages>
       <url hash="c184b917">W18-3933</url>
       <abstract>In this paper we present the GDI classification entry to the second German Dialect Identification (GDI) shared task organized within the scope of the VarDial Evaluation Campaign 2018. We present a system based on SVM classifier ensembles trained on characters and words. The system was trained on a collection of speech transcripts of five Swiss-German dialects provided by the organizers. The transcripts included in the dataset contained speakers from Basel, Bern, Lucerne, and Zurich. Our entry in the challenge reached 62.03% F1 score and was ranked third out of eight teams.</abstract>
@@ -6637,7 +6637,7 @@
     <meta>
       <booktitle>Proceedings of the Third Workshop on Semantic Deep Learning</booktitle>
       <url hash="eaeab46d">W18-40</url>
-      <editor><first>Luis Espinosa</first><last>Anke</last></editor>
+      <editor id="luis-espinosa-anke"><first>Luis Espinosa</first><last>Anke</last></editor>
       <editor><first>Dagmar</first><last>Gromann</last></editor>
       <editor><first>Thierry</first><last>Declerck</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -6654,7 +6654,7 @@
       <title>Replicated <fixed-case>S</fixed-case>iamese <fixed-case>LSTM</fixed-case> in Ticketing System for Similarity Learning and Retrieval in Asymmetric Texts</title>
       <author><first>Pankaj</first><last>Gupta</last></author>
       <author><first>Bernt</first><last>Andrassy</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>1–11</pages>
       <url hash="fb41a1b1">W18-4001</url>
       <abstract>The goal of our industrial ticketing system is to retrieve a relevant solution for an input query, by matching with historical tickets stored in knowledge base. A query is comprised of subject and description, while a historical ticket consists of subject, description and solution. To retrieve a relevant solution, we use textual similarity paradigm to learn similarity in the query and historical tickets. The task is challenging due to significant term mismatch in the query and ticket pairs of asymmetric lengths, where subject is a short text but description and solution are multi-sentence texts. We present a novel Replicated Siamese LSTM model to learn similarity in asymmetric text pairs, that gives 22% and 7% gain (Accuracy@10) for retrieval task, respectively over unsupervised and supervised baselines. We also show that the topic and distributed semantic features for short and long texts improved both similarity learning and retrieval.</abstract>
@@ -6662,9 +6662,9 @@
     </paper>
     <paper id="2">
       <title>Word-Embedding based Content Features for Automated Oral Proficiency Scoring</title>
-      <author><first>Su-Youn</first><last>Yoon</last></author>
+      <author id="su-youn-yoon"><first>Su-Youn</first><last>Yoon</last></author>
       <author><first>Anastassia</first><last>Loukina</last></author>
-      <author><first>Chong Min</first><last>Lee</last></author>
+      <author id="chungmin-lee"><first>Chong Min</first><last>Lee</last></author>
       <author><first>Matthew</first><last>Mulholland</last></author>
       <author><first>Xinhao</first><last>Wang</last></author>
       <author><first>Ikkyu</first><last>Choi</last></author>
@@ -6770,7 +6770,7 @@
       <title>Enhancing Cohesion and Coherence of Fake Text to Improve Believability for Deceiving Cyber Attackers</title>
       <author><first>Prakruthi</first><last>Karuna</last></author>
       <author><first>Hemant</first><last>Purohit</last></author>
-      <author><first>Özlem</first><last>Uzuner</last></author>
+      <author id="ozlem-uzuner"><first>Özlem</first><last>Uzuner</last></author>
       <author><first>Sushil</first><last>Jajodia</last></author>
       <author><first>Rajesh</first><last>Ganesan</last></author>
       <pages>31–40</pages>
@@ -6820,7 +6820,7 @@
       <author><first>Diana I.</first><last>Luna-Umanzor</last></author>
       <author><first>Alma E.</first><last>Ríos-Ponce</last></author>
       <author><first>Balderas-Pliego</first><last>Mariana</last></author>
-      <author><first>Gemma</first><last>Bel-Enguix</last></author>
+      <author id="gemma-bel-enguix"><first>Gemma</first><last>Bel-Enguix</last></author>
       <pages>85–93</pages>
       <url hash="45326500">W18-4109</url>
       <abstract>Older adults tend to suffer a decline in some of their cognitive capabilities, being language one of least affected processes. Word association norms (WAN) also known as free word associations reflect word-word relations, the participant reads or hears a word and is asked to write or say the first word that comes to mind. Free word associations show how the organization of semantic memory remains almost unchanged with age. We have performed a WAN task with very small samples of older adults with Alzheimer’s disease (AD), vascular dementia (VaD) and mixed dementia (MxD), and also with a control group of typical aging adults, matched by age, sex and education. All of them are native speakers of Mexican Spanish. The results show, as expected, that Alzheimer disease has a very important impact in lexical retrieval, unlike vascular and mixed dementia. This suggests that linguistic tests elaborated from WAN can be also used for detecting AD at early stages.</abstract>
@@ -6892,12 +6892,12 @@
       <editor><first>Ben</first><last>Miller</last></editor>
       <editor><first>Marieke</first><last>van Erp</last></editor>
       <editor><first>Piek</first><last>Vossen</last></editor>
-      <editor><first>Martha</first><last>Palmer</last></editor>
-      <editor><first>Eduard</first><last>Hovy</last></editor>
+      <editor id="martha-palmer"><first>Martha</first><last>Palmer</last></editor>
+      <editor id="eduard-hovy"><first>Eduard</first><last>Hovy</last></editor>
       <editor><first>Teruko</first><last>Mitamura</last></editor>
       <editor><first>David</first><last>Caswell</last></editor>
-      <editor><first>Susan W.</first><last>Brown</last></editor>
-      <editor><first>Claire</first><last>Bonial</last></editor>
+      <editor id="susan-windisch-brown"><first>Susan W.</first><last>Brown</last></editor>
+      <editor id="claire-bonial"><first>Claire</first><last>Bonial</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Santa Fe, New Mexico, U.S.A</address>
       <month>August</month>
@@ -6910,7 +6910,7 @@
     </frontmatter>
     <paper id="1">
       <title>Every Object Tells a Story</title>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <author><first>Nikhil</first><last>Krishnaswamy</last></author>
       <pages>1–6</pages>
       <url hash="3e49d4ba">W18-4301</url>
@@ -6941,7 +6941,7 @@
       <author><first>W. Victor</first><last>Yarlott</last></author>
       <author><first>Cristina</first><last>Cornelio</last></author>
       <author><first>Tian</first><last>Gao</last></author>
-      <author><first>Mark</first><last>Finlayson</last></author>
+      <author id="mark-finlayson"><first>Mark</first><last>Finlayson</last></author>
       <pages>25–33</pages>
       <url hash="d248e13e">W18-4304</url>
       <abstract>Discourse structure is a key aspect of all forms of text, providing valuable information both to humans and machines. We applied the hierarchical theory of news discourse developed by van Dijk to examine how paragraphs operate as units of discourse structure within news articles—what we refer to here as document-level discourse. This document-level discourse provides a characterization of the content of each paragraph that describes its relation to the events presented in the article (such as main events, backgrounds, and consequences) as well as to other components of the story (such as commentary and evaluation). The purpose of a news discourse section is of great utility to story understanding as it affects both the importance and temporal order of items introduced in the text—therefore, if we know the news discourse purpose for different sections, we should be able to better rank events for their importance and better construct timelines. We test two hypotheses: first, that people can reliably annotate news articles with van Dijk’s theory; second, that we can reliably predict these labels using machine learning. We show that people have a high degree of agreement with each other when annotating the theory (F1 &gt; 0.8, Cohen’s kappa &gt; 0.6), demonstrating that it can be both learned and reliably applied by human annotators. Additionally, we demonstrate first steps toward machine learning of the theory, achieving a performance of F1 = 0.54, which is 65% of human performance. Moreover, we have generated a gold-standard, adjudicated corpus of 50 documents for document-level discourse annotation based on the ACE Phase 2 corpus.</abstract>
@@ -6970,7 +6970,7 @@
       <author><first>Christopher</first><last>Reale</last></author>
       <author><first>Claire</first><last>Bonial</last></author>
       <author><first>Heesung</first><last>Kwon</last></author>
-      <author><first>Clare</first><last>Voss</last></author>
+      <author id="clare-voss"><first>Clare</first><last>Voss</last></author>
       <pages>55–60</pages>
       <url hash="69b0c01f">W18-4307</url>
       <abstract>We propose a method to improve human activity recognition in video by leveraging semantic information about the target activities from an expert-defined linguistic resource, VerbNet. Our hypothesis is that activities that share similar event semantics, as defined by the semantic predicates of VerbNet, will be more likely to share some visual components. We use a deep convolutional neural network approach as a baseline and incorporate linguistic information from VerbNet through multi-task learning. We present results of experiments showing the added information has negligible impact on recognition performance. We discuss how this may be because the lexical semantic information defined by VerbNet is generally not visually salient given the video processing approach used here, and how we may handle this in future approaches.</abstract>
@@ -7015,9 +7015,9 @@
       <booktitle>Proceedings of the First Workshop on Trolling, Aggression and Cyberbullying (<fixed-case>TRAC</fixed-case>-2018)</booktitle>
       <url hash="b63e1957">W18-44</url>
       <editor><first>Ritesh</first><last>Kumar</last></editor>
-      <editor><first>Atul Kr.</first><last>Ojha</last></editor>
+      <editor id="atul-kr-ojha"><first>Atul Kr.</first><last>Ojha</last></editor>
       <editor><first>Marcos</first><last>Zampieri</last></editor>
-      <editor><first>Shervin</first><last>Malmasi</last></editor>
+      <editor id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Santa Fe, New Mexico, USA</address>
       <month>August</month>
@@ -7062,7 +7062,7 @@
     <paper id="4">
       <title>Fully Connected Neural Network with Advance Preprocessor to Identify Aggression over <fixed-case>F</fixed-case>acebook and <fixed-case>T</fixed-case>witter</title>
       <author><first>Kashyap</first><last>Raiyani</last></author>
-      <author><first>Teresa</first><last>Gonçalves</last></author>
+      <author id="teresa-goncalves"><first>Teresa</first><last>Gonçalves</last></author>
       <author><first>Paulo</first><last>Quaresma</last></author>
       <author><first>Vitor Beires</first><last>Nogueira</last></author>
       <pages>28–41</pages>
@@ -7073,7 +7073,7 @@
     <paper id="5">
       <title>Cyberbullying Intervention Based on Convolutional Neural Networks</title>
       <author><first>Qianjia</first><last>Huang</last></author>
-      <author><first>Diana</first><last>Inkpen</last></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last></author>
       <author><first>Jianhong</first><last>Zhang</last></author>
       <author><first>David</first><last>Van Bruwaene</last></author>
       <pages>42–51</pages>
@@ -7136,7 +7136,7 @@
     <paper id="11">
       <title>Aggression Detection in Social Media: Using Deep Neural Networks, Data Augmentation, and Pseudo Labeling</title>
       <author><first>Segun Taofeek</first><last>Aroyehun</last></author>
-      <author><first>Alexander</first><last>Gelbukh</last></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last></author>
       <pages>90–97</pages>
       <url hash="b01504d4">W18-4411</url>
       <abstract>With the advent of the read-write web which facilitates social interactions in online spaces, the rise of anti-social behaviour in online spaces has attracted the attention of researchers. In this paper, we address the challenge of automatically identifying aggression in social media posts. Our team, saroyehun, participated in the English track of the Aggression Detection in Social Media Shared Task. On this task, we investigate the efficacy of deep neural network models of varying complexity. Our results reveal that deep neural network models require more data points to do better than an NBSVM linear baseline based on character n-grams. Our improved deep neural network models were trained on augmented data and pseudo labeled examples. Our LSTM classifier receives a weighted macro-F1 score of 0.6425 to rank first overall on the Facebook subtask of the shared task. On the social media sub-task, our CNN-LSTM model records a weighted macro-F1 score of 0.5920 to place third overall.</abstract>
@@ -7156,7 +7156,7 @@
       <title>Degree based Classification of Harmful Speech using <fixed-case>T</fixed-case>witter Data</title>
       <author><first>Sanjana</first><last>Sharma</last></author>
       <author><first>Saksham</first><last>Agrawal</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>106–112</pages>
       <url hash="64452789">W18-4413</url>
       <abstract>Harmful speech has various forms and it has been plaguing the social media in different ways. If we need to crackdown different degrees of hate speech and abusive behavior amongst it, the classification needs to be based on complex ramifications which needs to be defined and hold accountable for, other than racist, sexist or against some particular group and community. This paper primarily describes how we created an ontological classification of harmful speech based on degree of hateful intent and used it to annotate twitter data accordingly. The key contribution of this paper is the new dataset of tweets we created based on ontological classes and degrees of harmful speech found in the text. We also propose supervised classification system for recognizing these respective harmful speech classes in the texts hence. This serves as a preliminary work to lay down foundation on defining different classes of harmful speech and subsequent work will be done in making it’s automatic detection more robust and efficient.</abstract>
@@ -7164,7 +7164,7 @@
     </paper>
     <paper id="14">
       <title>Aggressive Language Identification Using Word Embeddings and Sentiment Features</title>
-      <author><first>Constantin</first><last>Orăsan</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orăsan</last></author>
       <pages>113–119</pages>
       <url hash="ffa0ffb1">W18-4414</url>
       <abstract>This paper describes our participation in the First Shared Task on Aggression Identification. The method proposed relies on machine learning to identify social media texts which contain aggression. The main features employed by our method are information extracted from word embeddings and the output of a sentiment analyser. Several machine learning methods and different combinations of features were tried. The official submissions used Support Vector Machines and Random Forests. The official evaluation showed that for texts similar to the ones in the training dataset Random Forests work best, whilst for texts which are different SVMs are a better choice. The evaluation also showed that despite its simplicity the method performs well when compared with more elaborated methods.</abstract>
@@ -7173,7 +7173,7 @@
     <paper id="15">
       <title>Aggression Detection in Social Media using Deep Neural Networks</title>
       <author><first>Sreekanth</first><last>Madisetty</last></author>
-      <author><first>Maunendra</first><last>Sankar Desarkar</last></author>
+      <author id="maunendra-sankar-desarkar"><first>Maunendra</first><last>Sankar Desarkar</last></author>
       <pages>120–127</pages>
       <url hash="97a82033">W18-4415</url>
       <abstract>With the rise of user-generated content in social media coupled with almost non-existent moderation in many such systems, aggressive contents have been observed to rise in such forums. In this paper, we work on the problem of aggression detection in social media. Aggression can sometimes be expressed directly or overtly or it can be hidden or covert in the text. On the other hand, most of the content in social media is non-aggressive in nature. We propose an ensemble based system to classify an input post to into one of three classes, namely, Overtly Aggressive, Covertly Aggressive, and Non-aggressive. Our approach uses three deep learning methods, namely, Convolutional Neural Networks (CNN) with five layers (input, convolution, pooling, hidden, and output), Long Short Term Memory networks (LSTM), and Bi-directional Long Short Term Memory networks (Bi-LSTM). A majority voting based ensemble method is used to combine these classifiers (CNN, LSTM, and Bi-LSTM). We trained our method on Facebook comments dataset and tested on Facebook comments (in-domain) and other social media posts (cross-domain). Our system achieves the F1-score (weighted) of 0.604 for Facebook posts and 0.508 for social media posts.</abstract>
@@ -7195,7 +7195,7 @@
       <title>Cyberbullying Detection Task: the <fixed-case>EBSI</fixed-case>-<fixed-case>LIA</fixed-case>-<fixed-case>UNAM</fixed-case> System (<fixed-case>ELU</fixed-case>) at <fixed-case>COLING</fixed-case>’18 <fixed-case>TRAC</fixed-case>-1</title>
       <author><first>Ignacio</first><last>Arroyo-Fernández</last></author>
       <author><first>Dominic</first><last>Forest</last></author>
-      <author><first>Juan-Manuel</first><last>Torres-Moreno</last></author>
+      <author id="juan-manuel-torres-moreno"><first>Juan-Manuel</first><last>Torres-Moreno</last></author>
       <author><first>Mauricio</first><last>Carrasco-Ruiz</last></author>
       <author><first>Thomas</first><last>Legeleux</last></author>
       <author><first>Karen</first><last>Joannette</last></author>
@@ -7218,7 +7218,7 @@
       <author><first>Ahmed</first><last>Husseini Orabi</last></author>
       <author><first>Mahmoud</first><last>Husseini Orabi</last></author>
       <author><first>Qianjia</first><last>Huang</last></author>
-      <author><first>Diana</first><last>Inkpen</last></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last></author>
       <author><first>David</first><last>Van Bruwaene</last></author>
       <pages>159–165</pages>
       <url hash="2e080307">W18-4419</url>
@@ -7247,7 +7247,7 @@
     <paper id="22">
       <title>Combining Shallow and Deep Learning for Aggressive Text Detection</title>
       <author><first>Viktor</first><last>Golem</last></author>
-      <author><first>Vanja Mladen</first><last>Karan</last></author>
+      <author id="vanja-m-karan"><first>Vanja Mladen</first><last>Karan</last></author>
       <author><first>Jan</first><last>Šnajder</last></author>
       <pages>188–198</pages>
       <url hash="b98c292c">W18-4422</url>
@@ -7269,12 +7269,12 @@
     <meta>
       <booktitle>Proceedings of the Second Joint <fixed-case>SIGHUM</fixed-case> Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature</booktitle>
       <url hash="6469046e">W18-45</url>
-      <editor><first>Beatrice</first><last>Alex</last></editor>
+      <editor id="beatrice-alex"><first>Beatrice</first><last>Alex</last></editor>
       <editor><first>Stefania</first><last>Degaetano-Ortlieb</last></editor>
       <editor><first>Anna</first><last>Feldman</last></editor>
       <editor><first>Anna</first><last>Kazantseva</last></editor>
       <editor><first>Nils</first><last>Reiter</last></editor>
-      <editor><first>Stan</first><last>Szpakowicz</last></editor>
+      <editor id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Santa Fe, New Mexico</address>
       <month>August</month>
@@ -7288,8 +7288,8 @@
     <paper id="1">
       <title>Learning Diachronic Analogies to Analyze Concept Change</title>
       <author><first>Matthias</first><last>Orlikowski</last></author>
-      <author><first>Matthias</first><last>Hartung</last></author>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="matthias-hartung"><first>Matthias</first><last>Hartung</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <pages>1–11</pages>
       <url hash="543818e0">W18-4501</url>
       <abstract>We propose to study the evolution of concepts by learning to complete diachronic analogies between lists of terms which relate to the same concept at different points in time. We present a number of models based on operations on word embedddings that correspond to different assumptions about the characteristics of diachronic analogies and change in concept vocabularies. These are tested in a quantitative evaluation for nine different concepts on a corpus of Dutch newspapers from the 1950s and 1980s. We show that a model which treats the concept terms as analogous and learns weights to compensate for diachronic changes (weighted linear combination) is able to more accurately predict the missing term than a learned transformation and two baselines for most of the evaluated concepts. We also find that all models tend to be coherent in relation to the represented concept, but less discriminative in regard to other concepts. Additionally, we evaluate the effect of aligning the time-specific embedding spaces using orthogonal Procrustes, finding varying effects on performance, depending on the model, concept and evaluation metric. For the weighted linear combination, however, results improve with alignment in a majority of cases. All related code is released publicly.</abstract>
@@ -7375,7 +7375,7 @@
       <author><first>Mika</first><last>Hämäläinen</last></author>
       <author><first>Tanja</first><last>Säily</last></author>
       <author><first>Jack</first><last>Rueter</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <author><first>Eetu</first><last>Mäkelä</last></author>
       <pages>87–96</pages>
       <url hash="5c5616f2">W18-4510</url>
@@ -7402,9 +7402,9 @@
     </paper>
     <paper id="13">
       <title>A Method for Human-Interpretable Paraphrasticality Prediction</title>
-      <author><first>Maria</first><last>Moritz</last></author>
+      <author id="maria-berger"><first>Maria</first><last>Moritz</last></author>
       <author><first>Johannes</first><last>Hellrich</last></author>
-      <author><first>Sven</first><last>Büchel</last></author>
+      <author id="sven-buechel"><first>Sven</first><last>Büchel</last></author>
       <pages>113–118</pages>
       <url hash="e1bbcecf">W18-4513</url>
       <abstract>The detection of reused text is important in a wide range of disciplines. However, even as research in the field of plagiarism detection is constantly improving, heavily modified or paraphrased text is still challenging for current methodologies. For historical texts, these problems are even more severe, since text sources were often subject to stronger and more frequent modifications. Despite the need for tools to automate text criticism, e.g., tracing modifications in historical text, algorithmic support is still limited. While current techniques can tell if and how frequently a text has been modified, very little work has been done on determining the degree and kind of paraphrastic modification—despite such information being of substantial interest to scholars. We present a human-interpretable, feature-based method to measure paraphrastic modification. Evaluating our technique on three data sets, we find that our approach performs competitive to text similarity scores borrowed from machine translation evaluation, being much harder to interpret.</abstract>
@@ -7421,7 +7421,7 @@
     </paper>
     <paper id="15">
       <title>Towards Coreference for Literary Text: Analyzing Domain-Specific Phenomena</title>
-      <author><first>Ina</first><last>Roesiger</last></author>
+      <author id="ina-roesiger"><first>Ina</first><last>Roesiger</last></author>
       <author><first>Sarah</first><last>Schulz</last></author>
       <author><first>Nils</first><last>Reiter</last></author>
       <pages>129–138</pages>
@@ -7452,7 +7452,7 @@
       <title>Induction of a Large-Scale Knowledge Graph from the <fixed-case>R</fixed-case>egesta <fixed-case>I</fixed-case>mperii</title>
       <author><first>Juri</first><last>Opitz</last></author>
       <author><first>Leo</first><last>Born</last></author>
-      <author><first>Vivi</first><last>Nastase</last></author>
+      <author id="vivi-nastase"><first>Vivi</first><last>Nastase</last></author>
       <pages>159–168</pages>
       <url hash="9e7f97cb">W18-4518</url>
       <abstract>We induce and visualize a Knowledge Graph over the Regesta Imperii (RI), an important large-scale resource for medieval history research. The RI comprise more than 150,000 digitized abstracts of medieval charters issued by the Roman-German kings and popes distributed over many European locations and a time span of more than 700 years. Our goal is to provide a resource for historians to visualize and query the RI, possibly aiding medieval history research. The resulting medieval graph and visualization tools are shared publicly.</abstract>
@@ -7464,9 +7464,9 @@
       <booktitle>Proceedings of the Workshop on Linguistic Complexity and Natural Language Processing</booktitle>
       <url hash="7410da5e">W18-46</url>
       <editor><first>Leonor</first><last>Becerra-Bonache</last></editor>
-      <editor><first>M. Dolores</first><last>Jiménez-López</last></editor>
-      <editor><first>Carlos</first><last>Martín-Vide</last></editor>
-      <editor><first>Adrià</first><last>Torrens-Urrutia</last></editor>
+      <editor id="m-dolores-jimenez-lopez"><first>M. Dolores</first><last>Jiménez-López</last></editor>
+      <editor id="carlos-martin-vide"><first>Carlos</first><last>Martín-Vide</last></editor>
+      <editor id="adria-torrens-urrutia"><first>Adrià</first><last>Torrens-Urrutia</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Santa Fe, New-Mexico</address>
       <month>August</month>
@@ -7489,7 +7489,7 @@
     </paper>
     <paper id="2">
       <title>Computational Complexity of Natural Languages: A Reasoned Overview</title>
-      <author><first>António</first><last>Branco</last></author>
+      <author id="antonio-branco"><first>António</first><last>Branco</last></author>
       <pages>10–19</pages>
       <url hash="aadf75a9">W18-4602</url>
       <abstract>There has been an upsurge of research interest in natural language complexity. As this interest will benefit from being informed by established contributions in this area, this paper presents a reasoned overview of central results concerning the computational complexity of natural language parsing. This overview also seeks to help to understand why, contrary to recent and widespread assumptions, it is by no means sufficient that an agent handles sequences of items under a pattern <tex-math>a^n b^n</tex-math> or under a pattern <tex-math>a^n b^m c^n d^m</tex-math> to ascertain ipso facto that this is the result of at least an underlying context-free grammar or an underlying context-sensitive grammar, respectively. In addition, it seeks to help to understand why it is also not sufficient that an agent handles sequences of items under a pattern <tex-math>a^n b^n</tex-math> for it to be deemed as having a cognitive capacity of higher computational complexity.</abstract>
@@ -7520,7 +7520,7 @@
       <author><first>Ayush</first><last>Jain</last></author>
       <author><first>Vishal</first><last>Singh</last></author>
       <author><first>Sidharth</first><last>Ranjan</last></author>
-      <author><first>Rajakrishnan</first><last>Rajkumar</last></author>
+      <author id="rajakrishnan-rajkumar"><first>Rajakrishnan</first><last>Rajkumar</last></author>
       <author><first>Sumeet</first><last>Agarwal</last></author>
       <pages>38–48</pages>
       <url hash="8fc6287b">W18-4605</url>
@@ -7549,7 +7549,7 @@
     <meta>
       <booktitle>Proceedings of the 14th Joint <fixed-case>ACL-ISO</fixed-case> Workshop on Interoperable Semantic Annotation</booktitle>
       <url hash="3a7291c1">W18-47</url>
-      <editor><first>Harry</first><last>Bunt</last></editor>
+      <editor id="harry-bunt"><first>Harry</first><last>Bunt</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Santa Fe, New Mexico, USA</address>
       <month>August</month>
@@ -7562,9 +7562,9 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>D</fixed-case>ial<fixed-case>E</fixed-case>dit: Annotations for Spoken Conversational Image Editing</title>
-      <author><first>Ramesh</first><last>Manuvirakurike</last></author>
+      <author id="ramesh-manuvinakurike"><first>Ramesh</first><last>Manuvirakurike</last></author>
       <author><first>Jacqueline</first><last>Brixey</last></author>
-      <author><first>Trung</first><last>Bui</last></author>
+      <author id="trung-bui"><first>Trung</first><last>Bui</last></author>
       <author><first>Walter</first><last>Chang</last></author>
       <author><first>Ron</first><last>Artstein</last></author>
       <author><first>Kallirroi</first><last>Georgila</last></author>
@@ -7591,8 +7591,8 @@
       <author><first>Simon</first><last>Keizer</last></author>
       <author><first>Catherine</first><last>Pelachaud</last></author>
       <author><first>Volha</first><last>Petukhova</last></author>
-      <author><first>Laurent</first><last>Prévot</last></author>
-      <author><first>Mariët</first><last>Theune</last></author>
+      <author id="laurent-prevot"><first>Laurent</first><last>Prévot</last></author>
+      <author id="mariet-theune"><first>Mariët</first><last>Theune</last></author>
       <pages>21–34</pages>
       <url hash="b4a115f3">W18-4703</url>
       <bibkey>bunt-etal-2018-downward</bibkey>
@@ -7600,7 +7600,7 @@
     <paper id="4">
       <title>The Revision of <fixed-case>ISO</fixed-case>-Space,Focused on the Movement Link</title>
       <author><first>Kiyong</first><last>Lee</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <author><first>Harry</first><last>Bunt</last></author>
       <pages>35–44</pages>
       <url hash="d1daf2da">W18-4704</url>
@@ -7623,7 +7623,7 @@
     </paper>
     <paper id="7">
       <title>A Dialogue Annotation Scheme for Weight Management Chat using the Trans-Theoretical Model of Health Behavior Change</title>
-      <author><first>Ramesh</first><last>Manuvirakurike</last></author>
+      <author id="ramesh-manuvinakurike"><first>Ramesh</first><last>Manuvirakurike</last></author>
       <author><first>Sumanth</first><last>Bharawadj</last></author>
       <author><first>Kallirroi</first><last>Georgila</last></author>
       <pages>60–68</pages>
@@ -7651,7 +7651,7 @@
     <paper id="10">
       <title>Discourse Annotation in the <fixed-case>PDTB</fixed-case>: The Next Generation</title>
       <author><first>Rashmi</first><last>Prasad</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <author><first>Alan</first><last>Lee</last></author>
       <pages>87–97</pages>
       <url hash="da5759f5">W18-4710</url>
@@ -7660,7 +7660,7 @@
     <paper id="11">
       <title>Towards Understanding End-of-trip Instructions in a Taxi Ride Scenario</title>
       <author><first>Deepthi</first><last>Karkada</last></author>
-      <author><first>Ramesh</first><last>Manuvirakurike</last></author>
+      <author id="ramesh-manuvinakurike"><first>Ramesh</first><last>Manuvirakurike</last></author>
       <author><first>Kallirroi</first><last>Georgila</last></author>
       <pages>98–107</pages>
       <url hash="3a93e63b">W18-4711</url>
@@ -7671,7 +7671,7 @@
     <meta>
       <booktitle>Proceedings of the Workshop on Computational Modeling of Polysynthetic Languages</booktitle>
       <url hash="2baef711">W18-48</url>
-      <editor><first>Judith L.</first><last>Klavans</last></editor>
+      <editor id="judith-l-klavans"><first>Judith L.</first><last>Klavans</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Santa Fe, New Mexico, USA</address>
       <month>August</month>
@@ -7692,7 +7692,7 @@
     </paper>
     <paper id="2">
       <title>A Neural Morphological Analyzer for <fixed-case>A</fixed-case>rapaho Verbs Learned from a Finite State Transducer</title>
-      <author><first>Sarah</first><last>Moeller</last></author>
+      <author id="sarah-moeller"><first>Sarah</first><last>Moeller</last></author>
       <author><first>Ghazaleh</first><last>Kazeminejad</last></author>
       <author><first>Andrew</first><last>Cowell</last></author>
       <author><first>Mans</first><last>Hulden</last></author>
@@ -7712,7 +7712,7 @@
     <paper id="4">
       <title>A prototype finite-state morphological analyser for <fixed-case>C</fixed-case>hukchi</title>
       <author><first>Vasilisa</first><last>Andriyanets</last></author>
-      <author><first>Francis</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last></author>
       <pages>31–40</pages>
       <url hash="2fde20ff">W18-4804</url>
       <abstract>In this article we describe the application of finite-state transducers to the morphological and phonological systems of Chukchi, a polysynthetic language spoken in the north of the Russian Federation. The language exhibits progressive and regressive vowel harmony, productive incorporation and extensive circumfixing. To implement the analyser we use the well-known Helsinki Finite-State Toolkit (HFST). The resulting model covers the majority of the morphological and phonological processes. A brief evaluation carried out on publically-available corpora shows that the coverage of the transducer is between and 53% and 76%. An error evaluation of 100 tokens randomly selected from the corpus, which were not covered by the analyser shows that most of the morphological processes are covered and that the majority of errors are caused by a limited stem lexicon.</abstract>
@@ -7750,10 +7750,10 @@
     <paper id="8">
       <title>Lost in Translation: Analysis of Information Loss During Machine Translation Between Polysynthetic and Fusional Languages</title>
       <author><first>Manuel</first><last>Mager</last></author>
-      <author><first>Elisabeth</first><last>Mager</last></author>
+      <author id="elisabeth-maier"><first>Elisabeth</first><last>Mager</last></author>
       <author><first>Alfonso</first><last>Medina-Urrea</last></author>
-      <author><first>Ivan Vladimir</first><last>Meza Ruiz</last></author>
-      <author><first>Katharina</first><last>Kann</last></author>
+      <author id="ivan-meza-ruiz"><first>Ivan Vladimir</first><last>Meza Ruiz</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
       <pages>73–83</pages>
       <url hash="fc8434ff">W18-4808</url>
       <abstract>Machine translation from polysynthetic to fusional languages is a challenging task, which gets further complicated by the limited amount of parallel text available. Thus, translation performance is far from the state of the art for high-resource and more intensively studied language pairs. To shed light on the phenomena which hamper automatic translation to and from polysynthetic languages, we study translations from three low-resource, polysynthetic languages (Nahuatl, Wixarika and Yorem Nokki) into Spanish and vice versa. Doing so, we find that in a morpheme-to-morpheme alignment an important amount of information contained in polysynthetic morphemes has no Spanish counterpart, and its translation is often omitted. We further conduct a qualitative analysis and, thus, identify morpheme types that are commonly hard to align or ignored in the translation process.</abstract>
@@ -7761,7 +7761,7 @@
     </paper>
     <paper id="9">
       <title>Automatic Glossing in a Low-Resource Setting for Language Documentation</title>
-      <author><first>Sarah</first><last>Moeller</last></author>
+      <author id="sarah-moeller"><first>Sarah</first><last>Moeller</last></author>
       <author><first>Mans</first><last>Hulden</last></author>
       <pages>84–93</pages>
       <url hash="8e4b1bf9">W18-4809</url>
@@ -7778,8 +7778,8 @@
       <editor><first>Jena D.</first><last>Hwang</last></editor>
       <editor><first>Nathan</first><last>Schneider</last></editor>
       <editor><first>Melanie</first><last>Andresen</last></editor>
-      <editor><first>Sameer</first><last>Pradhan</last></editor>
-      <editor><first>Miriam R. L.</first><last>Petruck</last></editor>
+      <editor id="sameer-pradhan"><first>Sameer</first><last>Pradhan</last></editor>
+      <editor id="miriam-r-l-petruck"><first>Miriam R. L.</first><last>Petruck</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Santa Fe, New Mexico, USA</address>
       <month>August</month>
@@ -7793,7 +7793,7 @@
     </frontmatter>
     <paper id="1">
       <title>Annotation Schemes for Surface Construction Labeling</title>
-      <author><first>Lori</first><last>Levin</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
       <pages>1</pages>
       <url hash="33f6954b">W18-4901</url>
       <abstract>In this talk I will describe the interaction of linguistics and language technologies in Surface Construction Labeling (SCL) from the perspective of corpus annotation tasks such as definiteness, modality, and causality. Linguistically, following Construction Grammar, SCL recognizes that meaning may be carried by morphemes, words, or arbitrary constellations of morpho-lexical elements. SCL is like Shallow Semantic Parsing in that it does not attempt a full compositional analysis of meaning, but rather identifies only the main elements of a semantic frame, where the frames may be invoked by constructions as well as lexical items. Computationally, SCL is different from tasks such as information extraction in that it deals only with meanings that are expressed in a conventional, grammaticalized way and does not address inferred meanings. I review the work of Dunietz (2018) on the labeling of causal frames including causal connectives and cause and effect arguments. I will describe how to design an annotation scheme for SCL, including isolating basic units of form and meaning and building a “constructicon”. I will conclude with remarks about the nature of universal categories and universal meaning representations in language technologies. This talk describes joint work with Jaime Carbonell, Jesse Dunietz, Nathan Schneider, and Miriam Petruck.</abstract>
@@ -7820,7 +7820,7 @@
       <title>Processing <fixed-case>MWE</fixed-case>s: Neurocognitive Bases of Verbal <fixed-case>MWE</fixed-case>s and Lexical Cohesiveness within <fixed-case>MWE</fixed-case>s</title>
       <author><first>Shohini</first><last>Bhattasali</last></author>
       <author><first>Murielle</first><last>Fabre</last></author>
-      <author><first>John</first><last>Hale</last></author>
+      <author id="john-hale"><first>John</first><last>Hale</last></author>
       <pages>6–17</pages>
       <url hash="38a3d99d">W18-4904</url>
       <abstract>Multiword expressions have posed a challenge in the past for computational linguistics since they comprise a heterogeneous family of word clusters and are difficult to detect in natural language data. In this paper, we present a fMRI study based on language comprehension to provide neuroimaging evidence for processing MWEs. We investigate whether different MWEs have distinct neural bases, e.g. if verbal MWEs involve separate brain areas from non-verbal MWEs and if MWEs with varying levels of cohesiveness activate dissociable brain regions. Our study contributes neuroimaging evidence illustrating that different MWEs elicit spatially distinct patterns of activation. We also adapt an association measure, usually used to detect MWEs, as a cognitively plausible metric for language processing.</abstract>
@@ -7854,7 +7854,7 @@
     </paper>
     <paper id="8">
       <title>Fixed Similes: Measuring aspects of the relation between <fixed-case>MWE</fixed-case> idiomatic semantics and syntactic flexibility</title>
-      <author><first>Stella</first><last>Markantonatou</last></author>
+      <author id="stella-markantonatou"><first>Stella</first><last>Markantonatou</last></author>
       <author><first>Panagiotis</first><last>Kouris</last></author>
       <author><first>Yanis</first><last>Maistros</last></author>
       <pages>51–61</pages>
@@ -7865,7 +7865,7 @@
     <paper id="9">
       <title>Fine-Grained Termhood Prediction for <fixed-case>G</fixed-case>erman Compound Terms Using Neural Networks</title>
       <author><first>Anna</first><last>Hätty</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>62–73</pages>
       <url hash="d32e4d20">W18-4909</url>
       <abstract>Automatic term identification and investigating the understandability of terms in a specialized domain are often treated as two separate lines of research. We propose a combined approach for this matter, by defining fine-grained classes of termhood and framing a classification task. The classes reflect tiers of a term’s association to a domain. The new setup is applied to German closed compounds as term candidates in the domain of cooking. For the prediction of the classes, we compare several neural network architectures and also take salient information about the compounds’ components into account. We show that applying a similar class distinction to the compounds’ components and propagating this information within the network improves the compound class prediction results.</abstract>
@@ -7874,10 +7874,10 @@
     <paper id="10">
       <title>Towards a Computational Lexicon for <fixed-case>M</fixed-case>oroccan <fixed-case>D</fixed-case>arija: Words, Idioms, and Constructions</title>
       <author><first>Jamal</first><last>Laoudi</last></author>
-      <author><first>Claire</first><last>Bonial</last></author>
+      <author id="claire-bonial"><first>Claire</first><last>Bonial</last></author>
       <author><first>Lucia</first><last>Donatelli</last></author>
       <author><first>Stephen</first><last>Tratz</last></author>
-      <author><first>Clare</first><last>Voss</last></author>
+      <author id="clare-voss"><first>Clare</first><last>Voss</last></author>
       <pages>74–85</pages>
       <url hash="b3e687bd">W18-4910</url>
       <abstract>In this paper, we explore the challenges of building a computational lexicon for Moroccan Darija (MD), an Arabic dialect spoken by over 32 million people worldwide but which only recently has begun appearing frequently in written form in social media. We raise the question of what belongs in such a lexicon and start by describing our work building traditional word-level lexicon entries with their English translations. We then discuss challenges in translating idiomatic MD text that led to creating multi-word expression lexicon entries whose meanings could not be fully derived from the individual words. Finally, we provide a preliminary exploration of constructions to be considered for inclusion in an MD constructicon by translating examples of English constructions and examining their MD counterparts.</abstract>
@@ -7886,12 +7886,12 @@
     <paper id="11">
       <title>Verbal Multiword Expressions in <fixed-case>B</fixed-case>asque Corpora</title>
       <author><first>Uxoa</first><last>Iñurrieta</last></author>
-      <author><first>Itziar</first><last>Aduriz</last></author>
+      <author id="itziar-aduriz"><first>Itziar</first><last>Aduriz</last></author>
       <author><first>Ainara</first><last>Estarrona</last></author>
       <author><first>Itziar</first><last>Gonzalez-Dios</last></author>
-      <author><first>Antton</first><last>Gurrutxaga</last></author>
-      <author><first>Ruben</first><last>Urizar</last></author>
-      <author><first>Iñaki</first><last>Alegria</last></author>
+      <author id="antton-gurrutxaga"><first>Antton</first><last>Gurrutxaga</last></author>
+      <author id="ruben-urizar"><first>Ruben</first><last>Urizar</last></author>
+      <author id="inaki-alegria"><first>Iñaki</first><last>Alegria</last></author>
       <pages>86–95</pages>
       <url hash="87edc6d1">W18-4911</url>
       <abstract>This paper presents a Basque corpus where Verbal Multiword Expressions (VMWEs) were annotated following universal guidelines. Information on the annotation is given, and some ideas for discussion upon the guidelines are also proposed. The corpus is useful not only for NLP-related research, but also to draw conclusions on Basque phraseology in comparison with other languages.</abstract>
@@ -7929,11 +7929,11 @@
     <paper id="15">
       <title>Developing and Evaluating Annotation Procedures for <fixed-case>T</fixed-case>witter Data during Hazard Events</title>
       <author><first>Kevin</first><last>Stowe</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Jennings</first><last>Anderson</last></author>
       <author><first>Marina</first><last>Kogan</last></author>
       <author><first>Leysia</first><last>Palen</last></author>
-      <author><first>Kenneth M.</first><last>Anderson</last></author>
+      <author id="kenneth-m-anderson"><first>Kenneth M.</first><last>Anderson</last></author>
       <author><first>Rebecca</first><last>Morss</last></author>
       <author><first>Julie</first><last>Demuth</last></author>
       <author><first>Heather</first><last>Lazrus</last></author>
@@ -7957,8 +7957,8 @@
     <paper id="17">
       <title>The <fixed-case>RST</fixed-case> <fixed-case>S</fixed-case>panish-<fixed-case>C</fixed-case>hinese Treebank</title>
       <author><first>Shuyuan</first><last>Cao</last></author>
-      <author><first>Iria</first><last>da Cunha</last></author>
-      <author><first>Mikel</first><last>Iruskieta</last></author>
+      <author id="iria-da-cunha"><first>Iria</first><last>da Cunha</last></author>
+      <author id="mikel-iruskieta"><first>Mikel</first><last>Iruskieta</last></author>
       <pages>156–166</pages>
       <url hash="02ed9cb2">W18-4917</url>
       <abstract>Discourse analysis is necessary for different tasks of Natural Language Processing (NLP). As two of the most spoken languages in the world, discourse analysis between Spanish and Chinese is important for NLP research. This paper aims to present the first open Spanish-Chinese parallel corpus annotated with discourse information, whose theoretical framework is based on the Rhetorical Structure Theory (RST). We have evaluated and harmonized each annotation part to obtain a high annotated-quality corpus. The corpus is already available to the public.</abstract>
@@ -7995,9 +7995,9 @@
     <paper id="21">
       <title>Constructing an Annotated Corpus of Verbal <fixed-case>MWE</fixed-case>s for <fixed-case>E</fixed-case>nglish</title>
       <author><first>Abigail</first><last>Walsh</last></author>
-      <author><first>Claire</first><last>Bonial</last></author>
+      <author id="claire-bonial"><first>Claire</first><last>Bonial</last></author>
       <author><first>Kristina</first><last>Geeraert</last></author>
-      <author><first>John P.</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></author>
       <author><first>Nathan</first><last>Schneider</last></author>
       <author><first>Clarissa</first><last>Somers</last></author>
       <pages>193–200</pages>
@@ -8007,7 +8007,7 @@
     </paper>
     <paper id="22">
       <title>Cooperating Tools for <fixed-case>MWE</fixed-case> Lexicon Management and Corpus Annotation</title>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <author><first>Akihiko</first><last>Kato</last></author>
       <author><first>Hiroyuki</first><last>Shindo</last></author>
       <author><first>Toshio</first><last>Morita</last></author>
@@ -8020,8 +8020,8 @@
       <title>“Fingers in the Nose”: Evaluating Speakers’ Identification of Multi-Word Expressions Using a Slightly Gamified Crowdsourcing Platform</title>
       <author><first>Karën</first><last>Fort</last></author>
       <author><first>Bruno</first><last>Guillaume</last></author>
-      <author><first>Matthieu</first><last>Constant</last></author>
-      <author><first>Nicolas</first><last>Lefèbvre</last></author>
+      <author id="matthieu-constant"><first>Matthieu</first><last>Constant</last></author>
+      <author id="nicolas-lefebvre"><first>Nicolas</first><last>Lefèbvre</last></author>
       <author><first>Yann-Alan</first><last>Pilatte</last></author>
       <pages>207–213</pages>
       <url hash="ed8667e5">W18-4923</url>
@@ -8042,15 +8042,15 @@
     <paper id="25">
       <title>Edition 1.1 of the <fixed-case>PARSEME</fixed-case> Shared Task on Automatic Identification of Verbal Multiword Expressions</title>
       <author><first>Carlos</first><last>Ramisch</last></author>
-      <author><first>Silvio Ricardo</first><last>Cordeiro</last></author>
+      <author id="silvio-cordeiro"><first>Silvio Ricardo</first><last>Cordeiro</last></author>
       <author><first>Agata</first><last>Savary</last></author>
       <author><first>Veronika</first><last>Vincze</last></author>
-      <author><first>Verginica</first><last>Barbu Mititelu</last></author>
+      <author id="verginica-barbu-mititelu"><first>Verginica</first><last>Barbu Mititelu</last></author>
       <author><first>Archna</first><last>Bhatia</last></author>
       <author><first>Maja</first><last>Buljan</last></author>
-      <author><first>Marie</first><last>Candito</last></author>
+      <author id="marie-candito"><first>Marie</first><last>Candito</last></author>
       <author><first>Polona</first><last>Gantar</last></author>
-      <author><first>Voula</first><last>Giouli</last></author>
+      <author id="voula-giouli"><first>Voula</first><last>Giouli</last></author>
       <author><first>Tunga</first><last>Güngör</last></author>
       <author><first>Abdelati</first><last>Hawwari</last></author>
       <author><first>Uxoa</first><last>Iñurrieta</last></author>
@@ -8059,8 +8059,8 @@
       <author><first>Timm</first><last>Lichte</last></author>
       <author><first>Chaya</first><last>Liebeskind</last></author>
       <author><first>Johanna</first><last>Monti</last></author>
-      <author><first>Carla</first><last>Parra Escartín</last></author>
-      <author><first>Behrang</first><last>QasemiZadeh</last></author>
+      <author id="carla-parra-escartin"><first>Carla</first><last>Parra Escartín</last></author>
+      <author id="behrang-qasemizadeh"><first>Behrang</first><last>QasemiZadeh</last></author>
       <author><first>Renata</first><last>Ramisch</last></author>
       <author><first>Nathan</first><last>Schneider</last></author>
       <author><first>Ivelina</first><last>Stoyanova</last></author>
@@ -8075,7 +8075,7 @@
       <title><fixed-case>CRF</fixed-case>-Seq and <fixed-case>CRF</fixed-case>-<fixed-case>D</fixed-case>ep<fixed-case>T</fixed-case>ree at <fixed-case>PARSEME</fixed-case> Shared Task 2018: Detecting Verbal <fixed-case>MWE</fixed-case>s using Sequential and Dependency-Based Approaches</title>
       <author><first>Erwan</first><last>Moreau</last></author>
       <author><first>Ashjan</first><last>Alsulaimani</last></author>
-      <author><first>Alfredo</first><last>Maldonado</last></author>
+      <author id="alfredo-maldonado"><first>Alfredo</first><last>Maldonado</last></author>
       <author><first>Carl</first><last>Vogel</last></author>
       <pages>241–247</pages>
       <url hash="674f3fb9">W18-4926</url>
@@ -8094,7 +8094,7 @@
     </paper>
     <paper id="28">
       <title><fixed-case>GBD</fixed-case>-<fixed-case>NER</fixed-case> at <fixed-case>PARSEME</fixed-case> Shared Task 2018: Multi-Word Expression Detection Using Bidirectional Long-Short-Term Memory Networks and Graph-Based Decoding</title>
-      <author><first>Tiberiu</first><last>Boros</last></author>
+      <author id="tiberiu-boros"><first>Tiberiu</first><last>Boros</last></author>
       <author><first>Ruxandra</first><last>Burtica</last></author>
       <pages>254–260</pages>
       <url hash="ec510125">W18-4928</url>
@@ -8114,7 +8114,7 @@
     <paper id="30">
       <title><fixed-case>TRAPACC</fixed-case> and <fixed-case>TRAPACCS</fixed-case> at <fixed-case>PARSEME</fixed-case> Shared Task 2018: Neural Transition Tagging of Verbal Multiword Expressions</title>
       <author><first>Regina</first><last>Stodden</last></author>
-      <author><first>Behrang</first><last>QasemiZadeh</last></author>
+      <author id="behrang-qasemizadeh"><first>Behrang</first><last>QasemiZadeh</last></author>
       <author><first>Laura</first><last>Kallmeyer</last></author>
       <pages>268–274</pages>
       <url hash="9a407e31">W18-4930</url>
@@ -8123,7 +8123,7 @@
     </paper>
     <paper id="31">
       <title><fixed-case>TRAVERSAL</fixed-case> at <fixed-case>PARSEME</fixed-case> Shared Task 2018: Identification of Verbal Multiword Expressions Using a Discriminative Tree-Structured Model</title>
-      <author><first>Jakub</first><last>Waszczuk</last></author>
+      <author id="jakub-waszczuk"><first>Jakub</first><last>Waszczuk</last></author>
       <pages>275–282</pages>
       <url hash="57e126fb">W18-4931</url>
       <abstract>This paper describes a system submitted to the closed track of the PARSEME shared task (edition 1.1) on automatic identification of verbal multiword expressions (VMWEs). The system represents VMWE identification as a labeling task where one of two labels (MWE or not-MWE) must be predicted for each node in the dependency tree based on local context, including adjacent nodes and their labels. The system relies on multiclass logistic regression to determine the globally optimal labeling of a tree. The system ranked 1st in the general cross-lingual ranking of the closed track systems, according to both official evaluation measures: MWE-based F1 and token-based F1.</abstract>
@@ -8145,7 +8145,7 @@
       <author><first>Nicolas</first><last>Zampieri</last></author>
       <author><first>Manon</first><last>Scholivet</last></author>
       <author><first>Carlos</first><last>Ramisch</last></author>
-      <author><first>Benoit</first><last>Favre</last></author>
+      <author id="benoit-favre"><first>Benoit</first><last>Favre</last></author>
       <pages>290–296</pages>
       <url hash="7a963252">W18-4933</url>
       <abstract>This paper describes the Veyn system, submitted to the closed track of the PARSEME Shared Task 2018 on automatic identification of verbal multiword expressions (VMWEs). Veyn is based on a sequence tagger using recurrent neural networks. We represent VMWEs using a variant of the begin-inside-outside encoding scheme combined with the VMWE category tag. In addition to the system description, we present development experiments to determine the best tagging scheme. Veyn is freely available, covers 19 languages, and was ranked ninth (MWE-based) and eight (Token-based) among 13 submissions, considering macro-averaged F1 across languages.</abstract>
@@ -8157,9 +8157,9 @@
       <booktitle>Proceedings of the 19th Annual <fixed-case>SIG</fixed-case>dial Meeting on Discourse and Dialogue</booktitle>
       <url hash="a587b7b8">W18-50</url>
       <editor><first>Kazunori</first><last>Komatani</last></editor>
-      <editor><first>Diane</first><last>Litman</last></editor>
+      <editor id="diane-litman"><first>Diane</first><last>Litman</last></editor>
       <editor><first>Kai</first><last>Yu</last></editor>
-      <editor><first>Alex</first><last>Papangelis</last></editor>
+      <editor id="alexandros-papangelis"><first>Alex</first><last>Papangelis</last></editor>
       <editor><first>Lawrence</first><last>Cavedon</last></editor>
       <editor><first>Mikio</first><last>Nakano</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -8195,8 +8195,8 @@
     <paper id="3">
       <title>Modeling Linguistic and Personality Adaptation for Natural Language Generation</title>
       <author><first>Zhichao</first><last>Hu</last></author>
-      <author><first>Jean</first><last>Fox Tree</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="jean-e-fox-tree"><first>Jean</first><last>Fox Tree</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <pages>20–31</pages>
       <url hash="61ae5eb0">W18-5003</url>
       <abstract>Previous work has shown that conversants adapt to many aspects of their partners’ language. Other work has shown that while every person is unique, they often share general patterns of behavior. Theories of personality aim to explain these shared patterns, and studies have shown that many linguistic cues are correlated with personality traits. We propose an adaptation measure for adaptive natural language generation for dialogs that integrates the predictions of both personality theories and adaptation theories, that can be applied as a dialog unfolds, on a turn by turn basis. We show that our measure meets criteria for validity, and that adaptation varies according to corpora and task, speaker, and the set of features used to model it. We also produce fine-grained models according to the dialog segmentation or the speaker, and demonstrate the decaying trend of adaptation.</abstract>
@@ -8243,7 +8243,7 @@
       <author><first>Florian</first><last>Kreyssig</last></author>
       <author><first>Iñigo</first><last>Casanueva</last></author>
       <author><first>Paweł</first><last>Budzianowski</last></author>
-      <author><first>Milica</first><last>Gašić</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gašić</last></author>
       <pages>60–69</pages>
       <url hash="3b7d10ef">W18-5007</url>
       <abstract>User Simulators are one of the major tools that enable offline training of task-oriented dialogue systems. For this task the Agenda-Based User Simulator (ABUS) is often used. The ABUS is based on hand-crafted rules and its output is in semantic form. Issues arise from both properties such as limited diversity and the inability to interface a text-level belief tracker. This paper introduces the Neural User Simulator (NUS) whose behaviour is learned from a corpus and which generates natural language, hence needing a less labelled dataset than simulators generating a semantic output. In comparison to much of the past work on this topic, which evaluates user simulators on corpus-based metrics, we use the NUS to train the policy of a reinforcement learning based Spoken Dialogue System. The NUS is compared to the ABUS by evaluating the policies that were trained using the simulators. Cross-model evaluation is performed i.e. training on one simulator and testing on the other. Furthermore, the trained policies are tested on real users. In both evaluation tasks the NUS outperformed the ABUS.</abstract>
@@ -8276,7 +8276,7 @@
     <paper id="10">
       <title>A Situated Dialogue System for Learning Structural Concepts in Blocks World</title>
       <author><first>Ian</first><last>Perera</last></author>
-      <author><first>James</first><last>Allen</last></author>
+      <author id="james-allen"><first>James</first><last>Allen</last></author>
       <author><first>Choh Man</first><last>Teng</last></author>
       <author><first>Lucian</first><last>Galescu</last></author>
       <pages>89–98</pages>
@@ -8298,14 +8298,14 @@
     </paper>
     <paper id="12">
       <title>Consequences and Factors of Stylistic Differences in Human-Robot Dialogue</title>
-      <author><first>Stephanie</first><last>Lukin</last></author>
+      <author id="stephanie-lukin"><first>Stephanie</first><last>Lukin</last></author>
       <author><first>Kimberly</first><last>Pollard</last></author>
-      <author><first>Claire</first><last>Bonial</last></author>
+      <author id="claire-bonial"><first>Claire</first><last>Bonial</last></author>
       <author><first>Matthew</first><last>Marge</last></author>
       <author><first>Cassidy</first><last>Henry</last></author>
       <author><first>Ron</first><last>Artstein</last></author>
-      <author><first>David</first><last>Traum</last></author>
-      <author><first>Clare</first><last>Voss</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
+      <author id="clare-voss"><first>Clare</first><last>Voss</last></author>
       <pages>110–118</pages>
       <url hash="ebffa1a4">W18-5012</url>
       <abstract>This paper identifies stylistic differences in instruction-giving observed in a corpus of human-robot dialogue. Differences in verbosity and structure (i.e., single-intent vs. multi-intent instructions) arose naturally without restrictions or prior guidance on how users should speak with the robot. Different styles were found to produce different rates of miscommunication, and correlations were found between style differences and individual user variation, trust, and interaction experience with the robot. Understanding potential consequences and factors that influence style can inform design of dialogue systems that are robust to natural variation from human users.</abstract>
@@ -8327,7 +8327,7 @@
       <author><first>Sarah</first><last>Plane</last></author>
       <author><first>Ariel</first><last>Marvasti</last></author>
       <author><first>Tyler</first><last>Egan</last></author>
-      <author><first>Casey</first><last>Kennington</last></author>
+      <author id="casey-kennington"><first>Casey</first><last>Kennington</last></author>
       <pages>130–139</pages>
       <url hash="6dfcfbba">W18-5014</url>
       <abstract>When interacting with robots in a situated spoken dialogue setting, human dialogue partners tend to assign anthropomorphic and social characteristics to those robots. In this paper, we explore the age and educational level that human dialogue partners assign to three different robotic systems, including an un-embodied spoken dialogue system. We found that how a robot speaks is as important to human perceptions as the way the robot looks. Using the data from our experiment, we derived prosodic, emotional, and linguistic features from the participants to train and evaluate a classifier that predicts perceived intelligence, age, and education level.</abstract>
@@ -8384,8 +8384,8 @@
       <author><first>Lena</first><last>Reed</last></author>
       <author><first>Shubhangi</first><last>Tandon</last></author>
       <author><first>Sharath</first><last>T.S.</last></author>
-      <author><first>Stephanie</first><last>Lukin</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="stephanie-lukin"><first>Stephanie</first><last>Lukin</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <pages>180–190</pages>
       <url hash="ca5c60b0">W18-5019</url>
       <abstract>Natural language generators for task-oriented dialogue must effectively realize system dialogue actions and their associated semantics. In many applications, it is also desirable for generators to control the style of an utterance. To date, work on task-oriented neural generation has primarily focused on semantic fidelity rather than achieving stylistic goals, while work on style has been done in contexts where it is difficult to measure content preservation. Here we present three different sequence-to-sequence models and carefully test how well they disentangle content and style. We use a statistical generator, Personage, to synthesize a new corpus of over 88,000 restaurant domain utterances whose style varies according to models of personality, giving us total control over both the semantic content and the stylistic variation in the training data. We then vary the amount of explicit stylistic supervision given to the three models. We show that our most explicit model can simultaneously achieve high fidelity to both semantic and stylistic goals: this model adds a context vector of 36 stylistic parameters as input to the hidden state of the encoder at each time step, showing the benefits of explicit stylistic supervision, even when the amount of training data is large.</abstract>
@@ -8430,7 +8430,7 @@
     <paper id="23">
       <title>Discourse Coherence in the Wild: A Dataset, Evaluation and Methods</title>
       <author><first>Alice</first><last>Lai</last></author>
-      <author><first>Joel</first><last>Tetreault</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
       <pages>214–223</pages>
       <url hash="4a53367e">W18-5023</url>
       <attachment type="attachment" hash="35285eb7">W18-5023.Attachment.pdf</attachment>
@@ -8495,7 +8495,7 @@
       <title><fixed-case>D</fixed-case>ial<fixed-case>C</fixed-case>rowd: A toolkit for easy dialog system assessment</title>
       <author><first>Kyusong</first><last>Lee</last></author>
       <author><first>Tiancheng</first><last>Zhao</last></author>
-      <author><first>Alan W.</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W.</first><last>Black</last></author>
       <author><first>Maxine</first><last>Eskenazi</last></author>
       <pages>245–248</pages>
       <url hash="b19f72e5">W18-5028</url>
@@ -8508,8 +8508,8 @@
       <author><first>David</first><last>Pautler</last></author>
       <author><first>Vikram</first><last>Ramanarayanan</last></author>
       <author><first>Kirby</first><last>Cofino</last></author>
-      <author><first>Patrick</first><last>Lange</last></author>
-      <author><first>David</first><last>Suendermann-Oeft</last></author>
+      <author id="patrick-l-lange"><first>Patrick</first><last>Lange</last></author>
+      <author id="david-suendermann-oeft"><first>David</first><last>Suendermann-Oeft</last></author>
       <pages>249–252</pages>
       <url hash="02fbeb79">W18-5029</url>
       <abstract>We present a paradigm for interactive teacher training that leverages multimodal dialog technology to puppeteer custom-designed embodied conversational agents (ECAs) in student roles. We used the open-source multimodal dialog system HALEF to implement a small-group classroom math discussion involving Venn diagrams where a human teacher candidate has to interact with two student ECAs whose actions are controlled by the dialog system. Such an automated paradigm has the potential to be extended and scaled to a wide range of interactive simulation scenarios in education, medicine, and business where group interaction training is essential.</abstract>
@@ -8519,7 +8519,7 @@
     <paper id="30">
       <title>An Empirical Study of Self-Disclosure in Spoken Dialogue Systems</title>
       <author><first>Abhilasha</first><last>Ravichander</last></author>
-      <author><first>Alan W.</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W.</first><last>Black</last></author>
       <pages>253–263</pages>
       <url hash="5ce31db8">W18-5030</url>
       <abstract>Self-disclosure is a key social strategy employed in conversation to build relations and increase conversational depth. It has been heavily studied in psychology and linguistic literature, particularly for its ability to induce self-disclosure from the recipient, a phenomena known as reciprocity. However, we know little about how self-disclosure manifests in conversation with automated dialog systems, especially as any self-disclosure on the part of a dialog system is patently disingenuous. In this work, we run a large-scale quantitative analysis on the effect of self-disclosure by analyzing interactions between real-world users and a spoken dialog system in the context of social conversation. We find that indicators of reciprocity occur even in human-machine dialog, with far-reaching implications for chatbots in a variety of domains including education, negotiation and social dialog.</abstract>
@@ -8545,11 +8545,11 @@
       <author><first>Stefan</first><last>Ultes</last></author>
       <author><first>Paweł</first><last>Budzianowski</last></author>
       <author><first>Iñigo</first><last>Casanueva</last></author>
-      <author><first>Lina M.</first><last>Rojas-Barahona</last></author>
+      <author id="lina-m-rojas-barahona"><first>Lina M.</first><last>Rojas-Barahona</last></author>
       <author><first>Bo-Hsiang</first><last>Tseng</last></author>
       <author><first>Yen-Chen</first><last>Wu</last></author>
-      <author><first>Steve</first><last>Young</last></author>
-      <author><first>Milica</first><last>Gašić</last></author>
+      <author id="steve-young"><first>Steve</first><last>Young</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gašić</last></author>
       <pages>273–283</pages>
       <url hash="c3673a2d">W18-5032</url>
       <attachment type="attachment" hash="4a648ea9">W18-5032.Attachment.pdf</attachment>
@@ -8559,8 +8559,8 @@
     </paper>
     <paper id="33">
       <title>Conversational Image Editing: Incremental Intent Identification in a New Dialogue Task</title>
-      <author><first>Ramesh</first><last>Manuvinakurike</last></author>
-      <author><first>Trung</first><last>Bui</last></author>
+      <author id="ramesh-manuvinakurike"><first>Ramesh</first><last>Manuvinakurike</last></author>
+      <author id="trung-bui"><first>Trung</first><last>Bui</last></author>
       <author><first>Walter</first><last>Chang</last></author>
       <author><first>Kallirroi</first><last>Georgila</last></author>
       <pages>284–295</pages>
@@ -8595,7 +8595,7 @@
       <author><first>Marco</first><last>Guerini</last></author>
       <author><first>Simone</first><last>Magnolini</last></author>
       <author><first>Vevake</first><last>Balaraman</last></author>
-      <author><first>Bernardo</first><last>Magnini</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
       <pages>317–326</pages>
       <url hash="f54f17ce">W18-5036</url>
       <abstract>We present a domain portable zero-shot learning approach for entity recognition in task-oriented conversational agents, which does not assume any annotated sentences at training time. Rather, we derive a neural model of the entity names based only on available gazetteers, and then apply the model to recognize new entities in the context of user utterances. In order to evaluate our working hypothesis we focus on nominal entities that are largely used in e-commerce to name products. Through a set of experiments in two languages (English and Italian) and three different domains (furniture, food, clothing), we show that the neural gazetteer-based approach outperforms several competitive baselines, with minimal requirements of linguistic features.</abstract>
@@ -8620,7 +8620,7 @@
       <author><first>Florian</first><last>Kreyssig</last></author>
       <author><first>Bo-Hsiang</first><last>Tseng</last></author>
       <author><first>Yen-chen</first><last>Wu</last></author>
-      <author><first>Milica</first><last>Gašić</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gašić</last></author>
       <pages>332–337</pages>
       <url hash="3c639ff3">W18-5038</url>
       <abstract>Reinforcement learning (RL) is a promising dialogue policy optimisation approach, but traditional RL algorithms fail to scale to large domains. Recently, Feudal Dialogue Management (FDM), has shown to increase the scalability to large domains by decomposing the dialogue management decision into two steps, making use of the domain ontology to abstract the dialogue state in each step. In order to abstract the state space, however, previous work on FDM relies on handcrafted feature functions. In this work, we show that these feature functions can be learned jointly with the policy model while obtaining similar performance, even outperforming the handcrafted features in several environments and domains.</abstract>
@@ -8635,7 +8635,7 @@
       <author><first>Iñigo</first><last>Casanueva</last></author>
       <author><first>Yen-Chen</first><last>Wu</last></author>
       <author><first>Stefan</first><last>Ultes</last></author>
-      <author><first>Milica</first><last>Gašić</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gašić</last></author>
       <pages>338–343</pages>
       <url hash="b97a1835">W18-5039</url>
       <abstract>Cross-domain natural language generation (NLG) is still a difficult task within spoken dialogue modelling. Given a semantic representation provided by the dialogue manager, the language generator should generate sentences that convey desired information. Traditional template-based generators can produce sentences with all necessary information, but these sentences are not sufficiently diverse. With RNN-based models, the diversity of the generated sentences can be high, however, in the process some information is lost. In this work, we improve an RNN-based generator by considering latent information at the sentence level during generation using conditional variational auto-encoder architecture. We demonstrate that our model outperforms the original RNN-based generator, while yielding highly diverse sentences. In addition, our model performs better when the training data is limited.</abstract>
@@ -8704,7 +8704,7 @@
       <title>Multi-task Learning for Joint Language Understanding and Dialogue State Tracking</title>
       <author><first>Abhinav</first><last>Rastogi</last></author>
       <author><first>Raghav</first><last>Gupta</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></author>
       <pages>376–384</pages>
       <url hash="390b87e8">W18-5045</url>
       <abstract>This paper presents a novel approach for multi-task learning of language understanding (LU) and dialogue state tracking (DST) in task-oriented dialogue systems. Multi-task training enables the sharing of the neural network layers responsible for encoding the user utterance for both LU and DST and improves performance while reducing the number of network parameters. In our proposed framework, DST operates on a set of candidate values for each slot that has been mentioned so far. These candidate sets are generated using LU slot annotations for the current user utterance, dialogue acts corresponding to the preceding system utterance and the dialogue state estimated for the previous turn, enabling DST to handle slots with a large or unbounded set of possible values and deal with slot values not seen during training. Furthermore, to bridge the gap between training and inference, we investigate the use of scheduled sampling on LU output for the current user utterance as well as the DST output for the preceding turn.</abstract>
@@ -8735,7 +8735,7 @@
       <title><fixed-case>C</fixed-case>ogent: A Generic Dialogue System Shell Based on a Collaborative Problem Solving Model</title>
       <author><first>Lucian</first><last>Galescu</last></author>
       <author><first>Choh Man</first><last>Teng</last></author>
-      <author><first>James</first><last>Allen</last></author>
+      <author id="james-allen"><first>James</first><last>Allen</last></author>
       <author><first>Ian</first><last>Perera</last></author>
       <pages>400–409</pages>
       <url hash="23dbdb75">W18-5048</url>
@@ -8763,7 +8763,7 @@
       <editor><first>Ruihong</first><last>Huang</last></editor>
       <editor><first>Vinodkumar</first><last>Prabhakaran</last></editor>
       <editor><first>Rob</first><last>Voigt</last></editor>
-      <editor><first>Zeerak</first><last>Waseem</last></editor>
+      <editor id="zeerak-talat"><first>Zeerak</first><last>Waseem</last></editor>
       <editor><first>Jacqueline</first><last>Wernimont</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Brussels, Belgium</address>
@@ -8788,7 +8788,7 @@
     </paper>
     <paper id="2">
       <title>Hate Speech Dataset from a White Supremacy Forum</title>
-      <author><first>Ona</first><last>de Gibert</last></author>
+      <author id="ona-de-gibert"><first>Ona</first><last>de Gibert</last></author>
       <author><first>Naiara</first><last>Perez</last></author>
       <author><first>Aitor</first><last>García-Pablos</last></author>
       <author><first>Montse</first><last>Cuadros</last></author>
@@ -8812,7 +8812,7 @@
       <title>Predictive Embeddings for Hate Speech Detection on <fixed-case>T</fixed-case>witter</title>
       <author><first>Rohan</first><last>Kshirsagar</last></author>
       <author><first>Tyrus</first><last>Cukuvac</last></author>
-      <author><first>Kathy</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathy</first><last>McKeown</last></author>
       <author><first>Susan</first><last>McGregor</last></author>
       <pages>26–32</pages>
       <url hash="36cc02f4">W18-5104</url>
@@ -8838,7 +8838,7 @@
       <author><first>Aman</first><last>Varshney</last></author>
       <author><first>Syed Sarfaraz</first><last>Akhtar</last></author>
       <author><first>Deepanshu</first><last>Vijay</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>43–50</pages>
       <url hash="009823df">W18-5106</url>
       <abstract>In the past few years, bully and aggressive posts on social media have grown significantly, causing serious consequences for victims/users of all demographics. Majority of the work in this field has been done for English only. In this paper, we introduce a deep learning based classification system for Facebook posts and comments of Hindi-English Code-Mixed text to detect the aggressive behaviour of/towards users. Our work focuses on text from users majorly in the Indian Subcontinent. The dataset that we used for our models is provided by <b>TRAC-1</b>in their shared task. Our classification model assigns each Facebook post/comment to one of the three predefined categories: “Overtly Aggressive”, “Covertly Aggressive” and “Non-Aggressive”. We experimented with 6 classification models and our CNN model on a 10 K-fold cross-validation gave the best result with the prediction accuracy of 73.2%.</abstract>
@@ -8847,7 +8847,7 @@
     </paper>
     <paper id="7">
       <title>Creating a <fixed-case>W</fixed-case>hats<fixed-case>A</fixed-case>pp Dataset to Study Pre-teen Cyberbullying</title>
-      <author><first>Rachele</first><last>Sprugnoli</last></author>
+      <author id="rachele-sprugnoli"><first>Rachele</first><last>Sprugnoli</last></author>
       <author><first>Stefano</first><last>Menini</last></author>
       <author><first>Sara</first><last>Tonelli</last></author>
       <author><first>Filippo</first><last>Oncini</last></author>
@@ -8885,7 +8885,7 @@
     <paper id="10">
       <title>The Effects of User Features on <fixed-case>T</fixed-case>witter Hate Speech Detection</title>
       <author><first>Elise</first><last>Fehn Unsvåg</last></author>
-      <author><first>Björn</first><last>Gambäck</last></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gambäck</last></author>
       <pages>75–85</pages>
       <url hash="05a04c72">W18-5110</url>
       <abstract>The paper investigates the potential effects user features have on hate speech classification. A quantitative analysis of Twitter data was conducted to better understand user characteristics, but no correlations were found between hateful text and the characteristics of the users who had posted it. However, experiments with a hate speech classifier based on datasets from three different languages showed that combining certain user features with textual features gave slight improvements of classification performance. While the incorporation of user features resulted in varying impact on performance for the different datasets used, user network-related features provided the most consistent improvements.</abstract>
@@ -8947,7 +8947,7 @@
     <paper id="16">
       <title>Datasets of <fixed-case>S</fixed-case>lovene and <fixed-case>C</fixed-case>roatian Moderated News Comments</title>
       <author><first>Nikola</first><last>Ljubešić</last></author>
-      <author><first>Tomaž</first><last>Erjavec</last></author>
+      <author id="tomaz-erjavec"><first>Tomaž</first><last>Erjavec</last></author>
       <author><first>Darja</first><last>Fišer</last></author>
       <pages>124–131</pages>
       <url hash="bf4a171b">W18-5116</url>
@@ -8957,7 +8957,7 @@
     </paper>
     <paper id="17">
       <title>Cross-Domain Detection of Abusive Language Online</title>
-      <author><first>Vanja Mladen</first><last>Karan</last></author>
+      <author id="vanja-m-karan"><first>Vanja Mladen</first><last>Karan</last></author>
       <author><first>Jan</first><last>Šnajder</last></author>
       <pages>132–137</pages>
       <url hash="93567a84">W18-5117</url>
@@ -8970,7 +8970,7 @@
       <author><first>Puneet</first><last>Mathur</last></author>
       <author><first>Ramit</first><last>Sawhney</last></author>
       <author><first>Meghna</first><last>Ayyar</last></author>
-      <author><first>Rajiv</first><last>Shah</last></author>
+      <author id="rajiv-shah"><first>Rajiv</first><last>Shah</last></author>
       <pages>138–148</pages>
       <url hash="ee1bbc46">W18-5118</url>
       <abstract>The use of code-switched languages (<i>e.g.</i>, Hinglish, which is derived by the blending of Hindi with the English language) is getting much popular on Twitter due to their ease of communication in native languages. However, spelling variations and absence of grammar rules introduce ambiguity and make it difficult to understand the text automatically. This paper presents the Multi-Input Multi-Channel Transfer Learning based model (MIMCT) to detect offensive (hate speech or abusive) Hinglish tweets from the proposed Hinglish Offensive Tweet (HOT) dataset using transfer learning coupled with multiple feature inputs. Specifically, it takes multiple primary word embedding along with secondary extracted features as inputs to train a multi-channel CNN-LSTM architecture that has been pre-trained on English tweets through transfer learning. The proposed MIMCT model outperforms the baseline supervised classification models, transfer learning based CNN and LSTM models to establish itself as the state of the art in the unexplored domain of Hinglish offensive text classification.</abstract>
@@ -9075,7 +9075,7 @@
       <title>An Argument-Annotated Corpus of Scientific Publications</title>
       <author><first>Anne</first><last>Lauscher</last></author>
       <author><first>Goran</first><last>Glavaš</last></author>
-      <author><first>Simone Paolo</first><last>Ponzetto</last></author>
+      <author id="simone-paolo-ponzetto"><first>Simone Paolo</first><last>Ponzetto</last></author>
       <pages>40–46</pages>
       <url hash="2daac16e">W18-5206</url>
       <abstract>Argumentation is an essential feature of scientific language. We present an annotation study resulting in a corpus of scientific publications annotated with argumentative components and relations. The argumentative annotations have been added to the existing Dr. Inventor Corpus, already annotated for four other rhetorical aspects. We analyze the annotated argumentative structures and investigate the relations between argumentation and other rhetorical aspects of scientific writing, such as discourse roles and citation contexts.</abstract>
@@ -9095,7 +9095,7 @@
     <paper id="8">
       <title>Argument Component Classification for Classroom Discussions</title>
       <author><first>Luca</first><last>Lugini</last></author>
-      <author><first>Diane</first><last>Litman</last></author>
+      <author id="diane-litman"><first>Diane</first><last>Litman</last></author>
       <pages>57–67</pages>
       <url hash="95497883">W18-5208</url>
       <abstract>This paper focuses on argument component classification for transcribed spoken classroom discussions, with the goal of automatically classifying student utterances into claims, evidence, and warrants. We show that an existing method for argument component classification developed for another educationally-oriented domain performs poorly on our dataset. We then show that feature sets from prior work on argument mining for student essays and online dialogues can be used to improve performance considerably. We also provide a comparison between convolutional neural networks and recurrent neural networks when trained under different conditions to classify argument components in classroom discussions. While neural network models are not always able to outperform a logistic regression model, we were able to gain some useful insights: convolutional networks are more robust than recurrent networks both at the character and at the word level, and specificity information can help boost performance in multi-task training.</abstract>
@@ -9104,7 +9104,7 @@
     </paper>
     <paper id="9">
       <title>Evidence Types, Credibility Factors, and Patterns or Soft Rules for Weighing Conflicting Evidence: Argument Mining in the Context of Legal Rules Governing Evidence Assessment</title>
-      <author><first>Vern R.</first><last>Walker</last></author>
+      <author id="vern-walker"><first>Vern R.</first><last>Walker</last></author>
       <author><first>Dina</first><last>Foerster</last></author>
       <author><first>Julia Monica</first><last>Ponce</last></author>
       <author><first>Matthew</first><last>Rosen</last></author>
@@ -9144,7 +9144,7 @@
       <author><first>Maoran</first><last>Xu</last></author>
       <author><first>Hao</first><last>Fu</last></author>
       <author id="yang-liu-icsi"><first>Yang</first><last>Liu</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
       <pages>97–104</pages>
       <url hash="f48a6b66">W18-5212</url>
       <abstract>In this paper, we propose to incorporate topic aspects information for online comments convincingness evaluation. Our model makes use of graph convolutional network to utilize implicit topic information within a discussion thread to assist the evaluation of convincingness of each single comment. In order to test the effectiveness of our proposed model, we annotate topic information on top of a public dataset for argument convincingness evaluation. Experimental results show that topic information is able to improve the performance for convincingness evaluation. We also make a move to detect topic aspects automatically.</abstract>
@@ -9153,7 +9153,7 @@
     </paper>
     <paper id="13">
       <title>Proposed Method for Annotation of Scientific Arguments in Terms of Semantic Relations and Argument Schemes</title>
-      <author><first>Nancy</first><last>Green</last></author>
+      <author id="nancy-green"><first>Nancy</first><last>Green</last></author>
       <pages>105–110</pages>
       <url hash="38594f13">W18-5213</url>
       <abstract>This paper presents a proposed method for annotation of scientific arguments in biological/biomedical journal articles. Semantic entities and relations are used to represent the propositional content of arguments in instances of argument schemes. We describe an experiment in which we encoded the arguments in a journal article to identify issues in this approach. Our catalogue of argument schemes and a copy of the annotated article are now publically available.</abstract>
@@ -9173,7 +9173,7 @@
     <paper id="15">
       <title>Dave the debater: a retrieval-based and generative argumentative dialogue agent</title>
       <author><first>Dieu Thu</first><last>Le</last></author>
-      <author><first>Cam-Tu</first><last>Nguyen</last></author>
+      <author id="cam-tu-nguyen"><first>Cam-Tu</first><last>Nguyen</last></author>
       <author><first>Kim Anh</first><last>Nguyen</last></author>
       <pages>121–130</pages>
       <url hash="20f2f005">W18-5215</url>
@@ -9220,7 +9220,7 @@
     <meta>
       <booktitle>Proceedings of the 6th <fixed-case>B</fixed-case>io<fixed-case>ASQ</fixed-case> Workshop A challenge on large-scale biomedical semantic indexing and question answering</booktitle>
       <url hash="650f8ce7">W18-53</url>
-      <editor><first>Ioannis A.</first><last>Kakadiaris</last></editor>
+      <editor id="ioannis-kakadiaris"><first>Ioannis A.</first><last>Kakadiaris</last></editor>
       <editor><first>George</first><last>Paliouras</last></editor>
       <editor><first>Anastasia</first><last>Krithara</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -9258,7 +9258,7 @@
     </paper>
     <paper id="3">
       <title><fixed-case>M</fixed-case>acquarie <fixed-case>U</fixed-case>niversity at <fixed-case>B</fixed-case>io<fixed-case>ASQ</fixed-case> 6b: Deep learning and deep reinforcement learning for query-based summarisation</title>
-      <author><first>Diego</first><last>Mollá</last></author>
+      <author id="diego-molla"><first>Diego</first><last>Mollá</last></author>
       <pages>22–29</pages>
       <url hash="bb64ec6a">W18-5303</url>
       <abstract>This paper describes Macquarie University’s contribution to the BioASQ Challenge (BioASQ 6b, Phase B). We focused on the extraction of the ideal answers, and the task was approached as an instance of query-based multi-document summarisation. In particular, this paper focuses on the experiments related to the deep learning and reinforcement learning approaches used in the submitted runs. The best run used a deep learning model under a regression-based framework. The deep learning architecture used features derived from the output of LSTM chains on word embeddings, plus features based on similarity with the query, and sentence position. The reinforcement learning approach was a proof-of-concept prototype that trained a global policy using REINFORCE. The global policy was implemented as a neural network that used tf.idf features encoding the candidate sentence, question, and context.</abstract>
@@ -9281,8 +9281,8 @@
     <paper id="5">
       <title><fixed-case>M</fixed-case>ind<fixed-case>L</fixed-case>ab Neural Network Approach at <fixed-case>B</fixed-case>io<fixed-case>ASQ</fixed-case> 6<fixed-case>B</fixed-case></title>
       <author><first>Andrés</first><last>Rosso-Mateus</last></author>
-      <author><first>Fabio A.</first><last>González</last></author>
-      <author><first>Manuel</first><last>Montes-y-Gómez</last></author>
+      <author id="fabio-a-gonzalez"><first>Fabio A.</first><last>González</last></author>
+      <author id="manuel-montes"><first>Manuel</first><last>Montes-y-Gómez</last></author>
       <pages>40–46</pages>
       <url hash="e366f15b">W18-5305</url>
       <abstract>Biomedical Question Answering is concerned with the development of methods and systems that automatically find answers to natural language posed questions. In this work, we describe the system used in the BioASQ Challenge task 6b for document retrieval and snippet retrieval (with particular emphasis in this subtask). The proposed model makes use of semantic similarity patterns that are evaluated and measured by a convolutional neural network architecture. Subsequently, the snippet ranking performance is improved with a pseudo-relevance feedback approach in a later step. Based on the preliminary results, we reached the second position in snippet retrieval sub-task.</abstract>
@@ -9293,7 +9293,7 @@
       <title><fixed-case>A</fixed-case>ttention<fixed-case>M</fixed-case>e<fixed-case>SH</fixed-case>: Simple, Effective and Interpretable Automatic <fixed-case>M</fixed-case>e<fixed-case>SH</fixed-case> Indexer</title>
       <author><first>Qiao</first><last>Jin</last></author>
       <author><first>Bhuwan</first><last>Dhingra</last></author>
-      <author><first>William</first><last>Cohen</last></author>
+      <author id="william-cohen"><first>William</first><last>Cohen</last></author>
       <author><first>Xinghua</first><last>Lu</last></author>
       <pages>47–56</pages>
       <url hash="3368554e">W18-5306</url>
@@ -9308,7 +9308,7 @@
       <author><first>Qiuze</first><last>Wu</last></author>
       <author><first>Boyue</first><last>Li</last></author>
       <author><first>Khyathi</first><last>Chandu</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <pages>57–65</pages>
       <url hash="5ee1144f">W18-5307</url>
       <abstract>The growing number of biomedical publications is a challenge for human researchers, who invest considerable effort to search for relevant documents and pinpointed answers. Biomedical Question Answering can automatically generate answers for a user’s topic or question, significantly reducing the effort required to locate the most relevant information in a large document corpus. Extractive summarization techniques, which concatenate the most relevant text units drawn from multiple documents, perform well on automatic evaluation metrics like ROUGE, but score poorly on human readability, due to the presence of redundant text and grammatical errors in the answer. This work moves toward abstractive summarization, which attempts to distill and present the meaning of the original text in a more coherent way. We incorporate a sentence fusion approach, based on Integer Linear Programming, along with three novel approaches for sentence ordering, in an attempt to improve the human readability of ideal answers. Using an open framework for configuration space exploration (BOOM), we tested over 2000 unique system configurations in order to identify the best-performing combinations for the sixth edition of Phase B of the BioASQ challenge.</abstract>
@@ -9344,7 +9344,7 @@
       <author><first>Pramati</first><last>Kalwad</last></author>
       <author><first>Khyathi</first><last>Chandu</last></author>
       <author><first>Teruko</first><last>Mitamura</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <pages>79–89</pages>
       <url hash="3e75686b">W18-5310</url>
       <abstract>The ever-increasing magnitude of biomedical information sources makes it difficult and time-consuming for a human researcher to find the most relevant documents and pinpointed answers for a specific question or topic when using only a traditional search engine. Biomedical Question Answering systems automatically identify the most relevant documents and pinpointed answers, given an information need expressed as a natural language question. Generating a non-redundant, human-readable summary that satisfies the information need of a given biomedical question is the focus of the Ideal Answer Generation task, part of the BioASQ challenge. This paper presents a system for ideal answer generation (using ontology-based retrieval and a neural learning-to-rank approach, combined with extractive and abstractive summarization techniques) which achieved the highest ROUGE score of 0.659 on the BioASQ 5b batch 2 test.</abstract>
@@ -9357,7 +9357,7 @@
       <booktitle>Proceedings of the 2018 <fixed-case>EMNLP</fixed-case> Workshop <fixed-case>B</fixed-case>lackbox<fixed-case>NLP</fixed-case>: Analyzing and Interpreting Neural Networks for <fixed-case>NLP</fixed-case></booktitle>
       <url hash="f0ad39ea">W18-54</url>
       <editor><first>Tal</first><last>Linzen</last></editor>
-      <editor><first>Grzegorz</first><last>Chrupała</last></editor>
+      <editor id="grzegorz-chrupala"><first>Grzegorz</first><last>Chrupała</last></editor>
       <editor><first>Afra</first><last>Alishahi</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Brussels, Belgium</address>
@@ -9373,7 +9373,7 @@
       <title>When does deep multi-task learning work for loosely related document classification tasks?</title>
       <author><first>Emma</first><last>Kerinec</last></author>
       <author><first>Chloé</first><last>Braud</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>1–8</pages>
       <url hash="f321acaf">W18-5401</url>
       <abstract>This work aims to contribute to our understanding of <i>when</i> multi-task learning through parameter sharing in deep neural networks leads to improvements over single-task learning. We focus on the setting of learning from <i>loosely related</i> tasks, for which no theoretical guarantees exist. We therefore approach the question empirically, studying which properties of datasets and single-task learning characteristics correlate with improvements from multi-task learning. We are the first to study this in a text classification setting and across more than 500 different task pairs.</abstract>
@@ -9383,7 +9383,7 @@
     <paper id="2">
       <title>Analyzing Learned Representations of a Deep <fixed-case>ASR</fixed-case> Performance Prediction Model</title>
       <author><first>Zied</first><last>Elloumi</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <author><first>Olivier</first><last>Galibert</last></author>
       <author><first>Benjamin</first><last>Lecouteux</last></author>
       <pages>9–15</pages>
@@ -9396,7 +9396,7 @@
       <title>Explaining non-linear Classifier Decisions within Kernel-based Deep Architectures</title>
       <author><first>Danilo</first><last>Croce</last></author>
       <author><first>Daniele</first><last>Rossini</last></author>
-      <author><first>Roberto</first><last>Basili</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
       <pages>16–24</pages>
       <url hash="6356f3e0">W18-5403</url>
       <abstract>Nonlinear methods such as deep neural networks achieve state-of-the-art performances in several semantic NLP tasks. However epistemologically transparent decisions are not provided as for the limited interpretability of the underlying acquired neural models. In neural-based semantic inference tasks epistemological transparency corresponds to the ability of tracing back causal connections between the linguistic properties of a input instance and the produced classification output. In this paper, we propose the use of a methodology, called <i>Layerwise Relevance Propagation</i>, over linguistically motivated neural architectures, namely <i>Kernel-based Deep Architectures</i> (KDA), to guide argumentations and explanation inferences. In such a way, each decision provided by a KDA can be linked to real examples, linguistically related to the input instance: these can be used to motivate the network output. Quantitative analysis shows that richer explanations about the semantic and syntagmatic structures of the examples characterize more convincing arguments in two tasks, i.e. question classification and semantic role labeling.</abstract>
@@ -9405,7 +9405,7 @@
     </paper>
     <paper id="4">
       <title>Nightmare at test time: How punctuation prevents parsers from generalizing</title>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <author><first>Miryam</first><last>de Lhoneux</last></author>
       <author><first>Isabelle</first><last>Augenstein</last></author>
       <pages>25–29</pages>
@@ -9417,7 +9417,7 @@
     <paper id="5">
       <title>Evaluating Textual Representations through Image Generation</title>
       <author><first>Graham</first><last>Spinks</last></author>
-      <author><first>Marie-Francine</first><last>Moens</last></author>
+      <author id="marie-francine-moens"><first>Marie-Francine</first><last>Moens</last></author>
       <pages>30–39</pages>
       <url hash="d949ff81">W18-5405</url>
       <abstract>We present a methodology for determining the quality of textual representations through the ability to generate images from them. Continuous representations of textual input are ubiquitous in modern Natural Language Processing techniques either at the core of machine learning algorithms or as the by-product at any given layer of a neural network. While current techniques to evaluate such representations focus on their performance on particular tasks, they don’t provide a clear understanding of the level of informational detail that is stored within them, especially their ability to represent spatial information. The central premise of this paper is that visual inspection or analysis is the most convenient method to quickly and accurately determine information content. Through the use of text-to-image neural networks, we propose a new technique to compare the quality of textual representations by visualizing their information content. The method is illustrated on a medical dataset where the correct representation of spatial information and shorthands are of particular importance. For four different well-known textual representations, we show with a quantitative analysis that some representations are consistently able to deliver higher quality visualizations of the information content. Additionally, we show that the quantitative analysis technique correlates with the judgment of a human expert evaluator in terms of alignment.</abstract>
@@ -9426,7 +9426,7 @@
     </paper>
     <paper id="6">
       <title>On the Role of Text Preprocessing in Neural Network Architectures: An Evaluation Study on Text Categorization and Sentiment Analysis</title>
-      <author><first>Jose</first><last>Camacho-Collados</last></author>
+      <author id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></author>
       <author><first>Mohammad Taher</first><last>Pilehvar</last></author>
       <pages>40–46</pages>
       <url hash="3a43513c">W18-5406</url>
@@ -9437,7 +9437,7 @@
     <paper id="7">
       <title>Jump to better conclusions: <fixed-case>SCAN</fixed-case> both left and right</title>
       <author><first>Jasmijn</first><last>Bastings</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <author><first>Jason</first><last>Weston</last></author>
       <author><first>Kyunghyun</first><last>Cho</last></author>
       <author><first>Douwe</first><last>Kiela</last></author>
@@ -9462,7 +9462,7 @@
       <title>Linguistic representations in multi-task neural networks for ellipsis resolution</title>
       <author><first>Ola</first><last>Rønning</last></author>
       <author><first>Daniel</first><last>Hardt</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>66–73</pages>
       <url hash="4cf18b5c">W18-5409</url>
       <abstract>Sluicing resolution is the task of identifying the antecedent to a question ellipsis. Antecedents are often sentential constituents, and previous work has therefore relied on syntactic parsing, together with complex linguistic features. A recent model instead used partial parsing as an auxiliary task in sequential neural network architectures to inject syntactic information. We explore the linguistic information being brought to bear by such networks, both by defining subsets of the data exhibiting relevant linguistic characteristics, and by examining the internal representations of the network. Both perspectives provide evidence for substantial linguistic knowledge being deployed by the neural networks.</abstract>
@@ -9474,7 +9474,7 @@
       <author><first>Shun</first><last>Kiyono</last></author>
       <author><first>Sho</first><last>Takase</last></author>
       <author><first>Jun</first><last>Suzuki</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Kentaro</first><last>Inui</last></author>
       <author><first>Masaaki</first><last>Nagata</last></author>
       <pages>74–81</pages>
@@ -9486,8 +9486,8 @@
     <paper id="11">
       <title>Rule induction for global explanation of trained models</title>
       <author><first>Madhumita</first><last>Sushil</last></author>
-      <author><first>Simon</first><last>Šuster</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="simon-suster"><first>Simon</first><last>Šuster</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>82–97</pages>
       <url hash="fe66cd0a">W18-5411</url>
       <abstract>Understanding the behavior of a trained network and finding explanations for its outputs is important for improving the network’s performance and generalization ability, and for ensuring trust in automated systems. Several approaches have previously been proposed to identify and visualize the most important features by analyzing a trained network. However, the relations between different features and classes are lost in most cases. We propose a technique to induce sets of if-then-else rules that capture these relations to globally explain the predictions of a network. We first calculate the importance of the features in the trained network. We then weigh the original inputs with these feature importance scores, simplify the transformed input space, and finally fit a rule induction model to explain the model predictions. We find that the output rule-sets can explain the predictions of a neural network trained for 4-class text classification from the 20 newsgroups dataset to a macro-averaged F-score of 0.80. We make the code available at <url>https://github.com/clips/interpret_with_rules</url>.</abstract>
@@ -9498,7 +9498,7 @@
       <title>Can <fixed-case>LSTM</fixed-case> Learn to Capture Agreement? The Case of <fixed-case>B</fixed-case>asque</title>
       <author><first>Shauli</first><last>Ravfogel</last></author>
       <author><first>Yoav</first><last>Goldberg</last></author>
-      <author><first>Francis</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last></author>
       <pages>98–107</pages>
       <url hash="f54d22b4">W18-5412</url>
       <abstract>Sequential neural networks models are powerful tools in a variety of Natural Language Processing (NLP) tasks. The sequential nature of these models raises the questions: to what extent can these models implicitly learn hierarchical structures typical to human language, and what kind of grammatical phenomena can they acquire? We focus on the task of agreement prediction in Basque, as a case study for a task that requires implicit understanding of sentence structure and the acquisition of a complex but consistent morphological system. Analyzing experimental results from two syntactic prediction tasks – verb number prediction and suffix recovery – we find that sequential models perform worse on agreement prediction in Basque than one might expect on the basis of a previous agreement prediction work in English. Tentative findings based on diagnostic classifiers suggest the network makes use of local heuristics as a proxy for the hierarchical structure of the sentence. We propose the Basque agreement prediction task as challenging benchmark for models that attempt to learn regularities in human language.</abstract>
@@ -9508,7 +9508,7 @@
     <paper id="13">
       <title>Rearranging the Familiar: Testing Compositional Generalization in Recurrent Networks</title>
       <author><first>João</first><last>Loula</last></author>
-      <author><first>Marco</first><last>Baroni</last></author>
+      <author id="marco-baroni"><first>Marco</first><last>Baroni</last></author>
       <author><first>Brenden</first><last>Lake</last></author>
       <pages>108–114</pages>
       <url hash="db1ad1ea">W18-5413</url>
@@ -9519,7 +9519,7 @@
     <paper id="14">
       <title>Evaluating the Ability of <fixed-case>LSTM</fixed-case>s to Learn Context-Free Grammars</title>
       <author><first>Luzi</first><last>Sennhauser</last></author>
-      <author><first>Robert</first><last>Berwick</last></author>
+      <author id="robert-c-berwick"><first>Robert</first><last>Berwick</last></author>
       <pages>115–124</pages>
       <url hash="3ae2e492">W18-5414</url>
       <abstract>While long short-term memory (LSTM) neural net architectures are designed to capture sequence information, human language is generally composed of hierarchical structures. This raises the question as to whether LSTMs can learn hierarchical structures. We explore this question with a well-formed bracket prediction task using two types of brackets modeled by an LSTM. Demonstrating that such a system is learnable by an LSTM is the first step in demonstrating that the entire class of CFLs is also learnable. We observe that the model requires exponential memory in terms of the number of characters and embedded depth, where a sub-linear memory should suffice. Still, the model does more than memorize the training input. It learns how to distinguish between relevant and irrelevant information. On the other hand, we also observe that the model does not generalize well. We conclude that LSTMs do not learn the relevant underlying context-free rules, suggesting the good overall performance is attained rather by an efficient way of evaluating nuisance variables. LSTMs are a way to quickly reach good results for many natural language tasks, but to understand and generate natural language one has to investigate other concepts that can make more direct use of natural language’s structural nature.</abstract>
@@ -9561,7 +9561,7 @@
     <paper id="18">
       <title><fixed-case>LISA</fixed-case>: Explaining Recurrent Neural Network Judgments via Layer-w<fixed-case>I</fixed-case>se Semantic Accumulation and Example to Pattern Transformation</title>
       <author><first>Pankaj</first><last>Gupta</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>154–164</pages>
       <url hash="145fad9c">W18-5418</url>
       <abstract>Recurrent neural networks (RNNs) are temporal networks and cumulative in nature that have shown promising results in various natural language processing tasks. Despite their success, it still remains a challenge to understand their hidden behavior. In this work, we analyze and interpret the cumulative nature of RNN via a proposed technique named as <i>Layer-wIse-Semantic-Accumulation</i> (LISA) for explaining decisions and detecting the most likely (i.e., saliency) patterns that the network relies on while decision making. We demonstrate (1) <i>LISA</i>: “How an RNN accumulates or builds semantics during its sequential processing for a given text example and expected response” (2) <i>Example2pattern</i>: “How the saliency patterns look like for each category in the data according to the network in decision making”. We analyse the sensitiveness of RNNs about different inputs to check the increase or decrease in prediction scores and further extract the saliency patterns learned by the network. We employ two relation classification datasets: SemEval 10 Task 8 and TAC KBP Slot Filling to explain RNN predictions via the <i>LISA</i> and <i>example2pattern</i>.</abstract>
@@ -9572,7 +9572,7 @@
       <title>Analysing the potential of seq-to-seq models for incremental interpretation in task-oriented dialogue</title>
       <author><first>Dieuwke</first><last>Hupkes</last></author>
       <author><first>Sanne</first><last>Bouwmeester</last></author>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <pages>165–174</pages>
       <url hash="c0cb8797">W18-5419</url>
       <abstract>We investigate how encoder-decoder models trained on a synthetic dataset of task-oriented dialogues process disfluencies, such as hesitations and self-corrections. We find that, contrary to earlier results, disfluencies have very little impact on the task success of seq-to-seq models with attention. Using visualisations and diagnostic classifiers, we analyse the representations that are incrementally built by the model, and discover that models develop little to no awareness of the structure of disfluencies. However, adding disfluencies to the data appears to help the model create clearer representations overall, as evidenced by the attention patterns the different models exhibit.</abstract>
@@ -9615,7 +9615,7 @@
     <paper id="23">
       <title>What do <fixed-case>RNN</fixed-case> Language Models Learn about Filler–Gap Dependencies?</title>
       <author><first>Ethan</first><last>Wilcox</last></author>
-      <author><first>Roger</first><last>Levy</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
       <author><first>Takashi</first><last>Morita</last></author>
       <author><first>Richard</first><last>Futrell</last></author>
       <pages>211–221</pages>
@@ -9637,7 +9637,7 @@
     <paper id="25">
       <title>Closing Brackets with Recurrent Neural Networks</title>
       <author><first>Natalia</first><last>Skachkova</last></author>
-      <author><first>Thomas</first><last>Trost</last></author>
+      <author id="thomas-alexander-trost"><first>Thomas</first><last>Trost</last></author>
       <author><first>Dietrich</first><last>Klakow</last></author>
       <pages>232–239</pages>
       <url hash="7c2f01f7">W18-5425</url>
@@ -9705,7 +9705,7 @@
     <paper id="31">
       <title>An Analysis of Encoder Representations in Transformer-Based Machine Translation</title>
       <author><first>Alessandro</first><last>Raganato</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>287–297</pages>
       <url hash="6038db89">W18-5431</url>
       <abstract>The attention mechanism is a successful technique in modern NLP, especially in tasks like machine translation. The recently proposed network architecture of the <i>Transformer</i> is based entirely on attention mechanisms and achieves new state of the art results in neural machine translation, outperforming other sequence-to-sequence models. However, so far not much is known about the internal properties of the model and the representations it learns to achieve that performance. To study this question, we investigate the information that is learned by the attention mechanism in Transformer models with different translation quality. We assess the representations of the encoder by extracting dependency relations based on self-attention weights, we perform four probing tasks to study the amount of syntactic and semantic captured information and we also test attention in a transfer learning scenario. Our analysis sheds light on the relative strengths and weaknesses of the various encoder representations. We observe that specific attention heads mark syntactic dependency relations and we can also confirm that lower layers tend to learn more about syntax while higher layers tend to encode more semantics.</abstract>
@@ -9717,7 +9717,7 @@
       <author><first>Johnny</first><last>Wei</last></author>
       <author><first>Khiem</first><last>Pham</last></author>
       <author><first>Brendan</first><last>O’Connor</last></author>
-      <author><first>Brian</first><last>Dillon</last></author>
+      <author id="brian-w-dillon"><first>Brian</first><last>Dillon</last></author>
       <pages>298–305</pages>
       <url hash="5b37f45a">W18-5432</url>
       <abstract>Sequence to sequence (seq2seq) models are often employed in settings where the target output is natural language. However, the syntactic properties of the language generated from these models are not well understood. We explore whether such output belongs to a formal and realistic grammar, by employing the English Resource Grammar (ERG), a broad coverage, linguistically precise HPSG-based grammar of English. From a French to English parallel corpus, we analyze the parseability and grammatical constructions occurring in output from a seq2seq translation model. Over 93% of the model translations are parseable, suggesting that it learns to generate conforming to a grammar. The model has trouble learning the distribution of rarer syntactic rules, and we pinpoint several constructions that differentiate translations between the references and our model.</abstract>
@@ -9778,7 +9778,7 @@
       <title>Interpretable Textual Neuron Representations for <fixed-case>NLP</fixed-case></title>
       <author><first>Nina</first><last>Poerner</last></author>
       <author><first>Benjamin</first><last>Roth</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>325–327</pages>
       <url hash="129a0ab2">W18-5437</url>
       <abstract>Input optimization methods, such as Google Deep Dream, create interpretable representations of neurons for computer vision DNNs. We propose and evaluate ways of transferring this technology to NLP. Our results suggest that gradient ascent with a gumbel softmax layer produces n-gram representations that outperform naive corpus search in terms of target neuron activation. The representations highlight differences in syntax awareness between the language and visual models of the Imaginet architecture.</abstract>
@@ -9835,7 +9835,7 @@
     <paper id="42">
       <title>Interpretable Word Embedding Contextualization</title>
       <author><first>Kyoung-Rok</first><last>Jang</last></author>
-      <author><first>Sung-Hyon</first><last>Myaeng</last></author>
+      <author id="sung-hyon-myaeng"><first>Sung-Hyon</first><last>Myaeng</last></author>
       <author><first>Sang-Bum</first><last>Kim</last></author>
       <pages>341–343</pages>
       <url hash="eafb3fa8">W18-5442</url>
@@ -9846,7 +9846,7 @@
     <paper id="43">
       <title>State Gradients for <fixed-case>RNN</fixed-case> Memory Analysis</title>
       <author><first>Lyan</first><last>Verwimp</last></author>
-      <author><first>Hugo</first><last>Van hamme</last></author>
+      <author id="hugo-van-hamme"><first>Hugo</first><last>Van hamme</last></author>
       <author><first>Vincent</first><last>Renkens</last></author>
       <author><first>Patrick</first><last>Wambacq</last></author>
       <pages>344–346</pages>
@@ -9881,7 +9881,7 @@
       <author><first>Julian</first><last>Michael</last></author>
       <author><first>Felix</first><last>Hill</last></author>
       <author><first>Omer</first><last>Levy</last></author>
-      <author><first>Samuel</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel</first><last>Bowman</last></author>
       <pages>353–355</pages>
       <url hash="081bb7f4">W18-5446</url>
       <abstract>Human ability to understand language is <i>general, flexible, and robust</i>. In contrast, most NLU models above the word level are designed for a specific task and struggle with out-of-domain data. If we aspire to develop models with understanding beyond the detection of superficial correspondences between inputs and outputs, then it is critical to develop a unified model that can execute a range of linguistic tasks across different domains. To facilitate research in this direction, we present the General Language Understanding Evaluation (GLUE, gluebenchmark.com): a benchmark of nine diverse NLU tasks, an auxiliary dataset for probing models for understanding of specific linguistic phenomena, and an online platform for evaluating and comparing models. For some benchmark tasks, training data is plentiful, but for others it is limited or does not match the genre of the test set. GLUE thus favors models that can represent linguistic knowledge in a way that facilitates sample-efficient learning and effective knowledge-transfer across tasks. While none of the datasets in GLUE were created from scratch for the benchmark, four of them feature privately-held test data, which is used to ensure that the benchmark is used fairly. We evaluate baselines that use ELMo (Peters et al., 2018), a powerful transfer learning technique, as well as state-of-the-art sentence representation models. The best models still achieve fairly low absolute scores. Analysis with our diagnostic dataset yields similarly weak performance over all phenomena tested, with some exceptions.</abstract>
@@ -9901,7 +9901,7 @@
     <paper id="48">
       <title>Language Modeling Teaches You More than Translation Does: Lessons Learned Through Auxiliary Syntactic Task Analysis</title>
       <author><first>Kelly</first><last>Zhang</last></author>
-      <author><first>Samuel</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel</first><last>Bowman</last></author>
       <pages>359–361</pages>
       <url hash="b20c55fe">W18-5448</url>
       <abstract>Recently, researchers have found that deep LSTMs trained on tasks like machine translation learn substantial syntactic and semantic information about their input sentences, including part-of-speech. These findings begin to shed light on why pretrained representations, like ELMo and CoVe, are so beneficial for neural language understanding models. We still, though, do not yet have a clear understanding of how the choice of pretraining objective affects the type of linguistic information that models learn. With this in mind, we compare four objectives—language modeling, translation, skip-thought, and autoencoding—on their ability to induce syntactic and part-of-speech information, holding constant the quantity and genre of the training data, as well as the LSTM architecture.</abstract>
@@ -9924,7 +9924,7 @@
       <title>Interpretable Structure Induction via Sparse Attention</title>
       <author><first>Ben</first><last>Peters</last></author>
       <author><first>Vlad</first><last>Niculae</last></author>
-      <author><first>André F. T.</first><last>Martins</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
       <pages>365–367</pages>
       <url hash="c1bf0602">W18-5450</url>
       <abstract>Neural network methods are experiencing wide adoption in NLP, thanks to their empirical performance on many tasks. Modern neural architectures go way beyond simple feedforward and recurrent models: they are complex pipelines that perform soft, differentiable computation instead of discrete logic. The price of such soft computing is the introduction of dense dependencies, which make it hard to disentangle the patterns that trigger a prediction. Our recent work on sparse and structured latent computation presents a promising avenue for enhancing interpretability of such neural pipelines. Through this extended abstract, we aim to discuss and explore the potential and impact of our methods.</abstract>
@@ -9938,7 +9938,7 @@
       <author><first>Michael</first><last>Behrisch</last></author>
       <author><first>Adam</first><last>Perer</last></author>
       <author><first>Hanspeter</first><last>Pfister</last></author>
-      <author><first>Alexander</first><last>Rush</last></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last></author>
       <pages>368–370</pages>
       <url hash="0a21c648">W18-5451</url>
       <abstract>Neural attention-based sequence-to-sequence models (seq2seq) (Sutskever et al., 2014; Bahdanau et al., 2014) have proven to be accurate and robust for many sequence prediction tasks. They have become the standard approach for automatic translation of text, at the cost of increased model complexity and uncertainty. End-to-end trained neural models act as a black box, which makes it difficult to examine model decisions and attribute errors to a specific part of a model. The highly connected and high-dimensional internal representations pose a challenge for analysis and visualization tools. The development of methods to understand seq2seq predictions is crucial for systems in production settings, as mistakes involving language are often very apparent to human readers. For instance, a widely publicized incident resulted from a translation system mistakenly translating “good morning” into “attack them” leading to a wrongful arrest (Hern, 2017).</abstract>
@@ -9949,7 +9949,7 @@
       <title>Grammar Induction with Neural Language Models: An Unusual Replication</title>
       <author><first>Phu Mon</first><last>Htut</last></author>
       <author><first>Kyunghyun</first><last>Cho</last></author>
-      <author><first>Samuel</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel</first><last>Bowman</last></author>
       <pages>371–373</pages>
       <url hash="263e918e">W18-5452</url>
       <abstract>Grammar induction is the task of learning syntactic structure without the expert-labeled treebanks (Charniak and Carroll, 1992; Klein and Manning, 2002). Recent work on latent tree learning offers a new family of approaches to this problem by inducing syntactic structure using the supervision from a downstream NLP task (Yogatama et al., 2017; Maillard et al., 2017; Choi et al., 2018). In a recent paper published at ICLR, Shen et al. (2018) introduce such a model and report near state-of-the-art results on the target task of language modeling, and the first strong latent tree learning result on constituency parsing. During the analysis of this model, we discover issues that make the original results hard to trust, including tuning and even training on what is effectively the test set. Here, we analyze the model under different configurations to understand what it learns and to identify the conditions under which it succeeds. We find that this model represents the first empirical success for neural network latent tree learning, and that neural language modeling warrants further study as a setting for grammar induction.</abstract>
@@ -9981,7 +9981,7 @@
     </paper>
     <paper id="55">
       <title>End-to-end Image Captioning Exploits Distributional Similarity in Multimodal Space</title>
-      <author><first>Pranava Swaroop</first><last>Madhyastha</last></author>
+      <author id="pranava-swaroop-madhyastha"><first>Pranava Swaroop</first><last>Madhyastha</last></author>
       <author><first>Josiah</first><last>Wang</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>381–383</pages>
@@ -10037,8 +10037,8 @@
     </paper>
     <paper id="2">
       <title>The Data Challenge in Misinformation Detection: Source Reputation vs. Content Veracity</title>
-      <author><first>Fatemeh</first><last>Torabi Asr</last></author>
-      <author><first>Maite</first><last>Taboada</last></author>
+      <author id="fatemeh-torabi-asr"><first>Fatemeh</first><last>Torabi Asr</last></author>
+      <author id="maite-taboada"><first>Maite</first><last>Taboada</last></author>
       <pages>10–15</pages>
       <url hash="68ea1e1d">W18-5502</url>
       <abstract>Misinformation detection at the level of full news articles is a text classification problem. Reliably labeled data in this domain is rare. Previous work relied on news articles collected from so-called “reputable” and “suspicious” websites and labeled accordingly. We leverage fact-checking websites to collect individually-labeled news articles with regard to the veracity of their content and use this data to test the cross-domain generalization of a classifier trained on bigger text collections but labeled according to source reputation. Our results suggest that reputation-based classification is not sufficient for predicting the veracity level of the majority of news articles, and that the system performance on different test datasets depends on topic distribution. Therefore collecting well-balanced and carefully-assessed training data is a priority for developing robust misinformation detection systems.</abstract>
@@ -10049,7 +10049,7 @@
       <title>Crowdsourcing Semantic Label Propagation in Relation Classification</title>
       <author><first>Anca</first><last>Dumitrache</last></author>
       <author><first>Lora</first><last>Aroyo</last></author>
-      <author><first>Chris</first><last>Welty</last></author>
+      <author id="chris-welty"><first>Chris</first><last>Welty</last></author>
       <pages>16–21</pages>
       <url hash="671822cf">W18-5503</url>
       <abstract>Distant supervision is a popular method for performing relation extraction from text that is known to produce noisy labels. Most progress in relation extraction and classification has been made with crowdsourced corrections to distant-supervised labels, and there is evidence that indicates still more would be better. In this paper, we explore the problem of propagating human annotation signals gathered for open-domain relation classification through the CrowdTruth methodology for crowdsourcing, that captures ambiguity in annotations by measuring inter-annotator disagreement. Our approach propagates annotations to sentences that are similar in a low dimensional embedding space, expanding the number of labels by two orders of magnitude. Our experiments show significant improvement in a sentence-level multi-class relation classifier.</abstract>
@@ -10059,8 +10059,8 @@
     <paper id="4">
       <title>Retrieve and Re-rank: A Simple and Effective <fixed-case>IR</fixed-case> Approach to Simple Question Answering over Knowledge Graphs</title>
       <author><first>Vishal</first><last>Gupta</last></author>
-      <author><first>Manoj</first><last>Chinnakotla</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manoj-chinnakotla"><first>Manoj</first><last>Chinnakotla</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>22–27</pages>
       <url hash="091d8b22">W18-5504</url>
       <abstract>SimpleQuestions is a commonly used benchmark for single-factoid question answering (QA) over Knowledge Graphs (KG). Existing QA systems rely on various components to solve different sub-tasks of the problem (such as entity detection, entity linking, relation prediction and evidence integration). In this work, we propose a different approach to the problem and present an information retrieval style solution for it. We adopt a two-phase approach: candidate generation and candidate re-ranking to answer questions. We propose a Triplet-Siamese-Hybrid CNN (TSHCNN) to re-rank candidate answers. Our approach achieves an accuracy of 80% which sets a new state-of-the-art on the SimpleQuestions dataset.</abstract>
@@ -10174,7 +10174,7 @@
     <paper id="14">
       <title>Affordance Extraction and Inference based on Semantic Role Labeling</title>
       <author><first>Daniel</first><last>Loureiro</last></author>
-      <author><first>Alípio</first><last>Jorge</last></author>
+      <author id="alipio-jorge"><first>Alípio</first><last>Jorge</last></author>
       <pages>91–96</pages>
       <url hash="cf0c1b3a">W18-5514</url>
       <abstract>Common-sense reasoning is becoming increasingly important for the advancement of Natural Language Processing. While word embeddings have been very successful, they cannot explain which aspects of ‘coffee’ and ‘tea’ make them similar, or how they could be related to ‘shop’. In this paper, we propose an explicit word representation that builds upon the Distributional Hypothesis to represent meaning from semantic roles, and allow inference of relations from their meshing, as supported by the affordance-based Indexical Hypothesis. We find that our model improves the state-of-the-art on unsupervised word similarity tasks while allowing for direct inference of new relations from the same vector space.</abstract>
@@ -10231,7 +10231,7 @@
     <paper id="19">
       <title><fixed-case>SIRIUS</fixed-case>-<fixed-case>LTG</fixed-case>: An Entity Linking Approach to Fact Extraction and Verification</title>
       <author><first>Farhad</first><last>Nooralahzadeh</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <pages>119–123</pages>
       <url hash="a38ad6d7">W18-5519</url>
       <abstract>This article presents the SIRIUS-LTG system for the Fact Extraction and VERification (FEVER) Shared Task. It consists of three components: 1) <i>Wikipedia Page Retrieval</i>: First we extract the entities in the claim, then we find potential Wikipedia URI candidates for each of the entities using a SPARQL query over DBpedia 2) <i>Sentence selection</i>: We investigate various techniques i.e. Smooth Inverse Frequency (SIF), Word Mover’s Distance (WMD), Soft-Cosine Similarity, Cosine similarity with unigram Term Frequency Inverse Document Frequency (TF-IDF) to rank sentences by their similarity to the claim. 3) <i>Textual Entailment</i>: We compare three models for the task of claim classification. We apply a Decomposable Attention (DA) model (Parikh et al., 2016), a Decomposed Graph Entailment (DGE) model (Khot et al., 2018) and a Gradient-Boosted Decision Trees (TalosTree) model (Sean et al., 2017) for this task. The experiments show that the pipeline with simple Cosine Similarity using TFIDF in sentence selection along with DA model as labelling model achieves the best results on the development set (F1 evidence: 32.17, label accuracy: 59.61 and FEVER score: 0.3778). Furthermore, it obtains 30.19, 48.87 and 36.55 in terms of F1 evidence, label accuracy and FEVER score, respectively, on the test set. Our system ranks 15th among 23 participants in the shared task prior to any human-evaluation of the evidence.</abstract>
@@ -10297,7 +10297,7 @@
     <paper id="25">
       <title>Team <fixed-case>SWEEP</fixed-case>er: Joint Sentence Extraction and Fact Checking with Pointer Networks</title>
       <author><first>Christopher</first><last>Hidey</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>150–155</pages>
       <url hash="aadbfe63">W18-5525</url>
       <abstract>Many tasks such as question answering and reading comprehension rely on information extracted from unreliable sources. These systems would thus benefit from knowing whether a statement from an unreliable source is correct. We present experiments on the FEVER (Fact Extraction and VERification) task, a shared task that involves selecting sentences from Wikipedia and predicting whether a claim is supported by those sentences, refuted, or there is not enough information. Fact checking is a task that benefits from not only asserting or disputing the veracity of a claim but also finding evidence for that position. As these tasks are dependent on each other, an ideal model would consider the veracity of the claim when finding evidence and also find only the evidence that is relevant. We thus jointly model sentence extraction and verification on the FEVER shared task. Among all participants, we ranked 5th on the blind test set (prior to any additional human evaluation of the evidence).</abstract>
@@ -10308,7 +10308,7 @@
       <title><fixed-case>QED</fixed-case>: A fact verification system for the <fixed-case>FEVER</fixed-case> shared task</title>
       <author><first>Jackson</first><last>Luken</last></author>
       <author><first>Nanjiang</first><last>Jiang</last></author>
-      <author><first>Marie-Catherine</first><last>de Marneffe</last></author>
+      <author id="marie-catherine-de-marneffe"><first>Marie-Catherine</first><last>de Marneffe</last></author>
       <pages>156–160</pages>
       <url hash="b5bdd2e3">W18-5526</url>
       <abstract>This paper describes our system submission to the 2018 Fact Extraction and VERification (FEVER) shared task. The system uses a heuristics-based approach for evidence extraction and a modified version of the inference model by Parikh et al. (2016) for classification. Our process is broken down into three modules: potentially relevant documents are gathered based on key phrases in the claim, then any possible evidence sentences inside those documents are extracted, and finally our classifier discards any evidence deemed irrelevant and uses the remaining to classify the claim’s veracity. Our system beats the shared task baseline by 12% and is successful at finding correct evidence (evidence retrieval F1 of 62.5% on the development set).</abstract>
@@ -10319,7 +10319,7 @@
       <title>Team <fixed-case>UMBC</fixed-case>-<fixed-case>FEVER</fixed-case> : Claim verification using Semantic Lexical Resources</title>
       <author><first>Ankur</first><last>Padia</last></author>
       <author><first>Francis</first><last>Ferraro</last></author>
-      <author><first>Tim</first><last>Finin</last></author>
+      <author id="tim-finin"><first>Tim</first><last>Finin</last></author>
       <pages>161–165</pages>
       <url hash="cbdb3c68">W18-5527</url>
       <abstract>We describe our system used in the 2018 FEVER shared task. The system employed a frame-based information retrieval approach to select Wikipedia sentences providing evidence and used a two-layer multilayer perceptron to classify a claim as correct or not. Our submission achieved a score of 0.3966 on the Evidence F1 metric with accuracy of 44.79%, and FEVER score of 0.2628 F1 points.</abstract>
@@ -10342,7 +10342,7 @@
     <meta>
       <booktitle>Proceedings of the Ninth International Workshop on Health Text Mining and Information Analysis</booktitle>
       <url hash="8be9844d">W18-56</url>
-      <editor><first>Alberto</first><last>Lavelli</last></editor>
+      <editor id="alberto-lavelli"><first>Alberto</first><last>Lavelli</last></editor>
       <editor><first>Anne-Lyse</first><last>Minard</last></editor>
       <editor><first>Fabio</first><last>Rinaldi</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -10383,9 +10383,9 @@
     </paper>
     <paper id="3">
       <title>Revisiting neural relation classification in clinical notes with external information</title>
-      <author><first>Simon</first><last>Šuster</last></author>
+      <author id="simon-suster"><first>Simon</first><last>Šuster</last></author>
       <author><first>Madhumita</first><last>Sushil</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <pages>22–28</pages>
       <url hash="b1d2787b">W18-5603</url>
       <abstract>Recently, segment convolutional neural networks have been proposed for end-to-end relation extraction in the clinical domain, achieving results comparable to or outperforming the approaches with heavy manual feature engineering. In this paper, we analyze the errors made by the neural classifier based on confusion matrices, and then investigate three simple extensions to overcome its limitations. We find that including ontological association between drugs and problems, and data-induced association between medical concepts does not reliably improve the performance, but that large gains are obtained by the incorporation of semantic classes to capture relation triggers.</abstract>
@@ -10395,7 +10395,7 @@
     <paper id="4">
       <title>Supervised Machine Learning for Extractive Query Based Summarisation of Biomedical Data</title>
       <author><first>Mandeep</first><last>Kaur</last></author>
-      <author><first>Diego</first><last>Mollá</last></author>
+      <author id="diego-molla"><first>Diego</first><last>Mollá</last></author>
       <pages>29–37</pages>
       <url hash="5e2e4214">W18-5604</url>
       <abstract>The automation of text summarisation of biomedical publications is a pressing need due to the plethora of information available online. This paper explores the impact of several supervised machine learning approaches for extracting multi-document summaries for given queries. In particular, we compare classification and regression approaches for query-based extractive summarisation using data provided by the BioASQ Challenge. We tackled the problem of annotating sentences for training classification systems and show that a simple annotation approach outperforms regression-based summarisation.</abstract>
@@ -10406,7 +10406,7 @@
       <title>Comparing <fixed-case>CNN</fixed-case> and <fixed-case>LSTM</fixed-case> character-level embeddings in <fixed-case>B</fixed-case>i<fixed-case>LSTM</fixed-case>-<fixed-case>CRF</fixed-case> models for chemical and disease named entity recognition</title>
       <author><first>Zenan</first><last>Zhai</last></author>
       <author><first>Dat Quoc</first><last>Nguyen</last></author>
-      <author><first>Karin</first><last>Verspoor</last></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last></author>
       <pages>38–43</pages>
       <url hash="bb4c239c">W18-5605</url>
       <abstract>We compare the use of LSTM-based and CNN-based character-level word embeddings in BiLSTM-CRF models to approach chemical and disease named entity recognition (NER) tasks. Empirical results over the BioCreative V CDR corpus show that the use of either type of character-level word embeddings in conjunction with the BiLSTM-CRF models leads to comparable state-of-the-art performance. However, the models using CNN-based character-level word embeddings have a computational performance advantage, increasing training time over word-based models by 25% while the LSTM-based character-level word embeddings more than double the required training time.</abstract>
@@ -10415,14 +10415,14 @@
     </paper>
     <paper id="6">
       <title>Deep learning for language understanding of mental health concepts derived from Cognitive Behavioural Therapy</title>
-      <author><first>Lina M.</first><last>Rojas-Barahona</last></author>
+      <author id="lina-m-rojas-barahona"><first>Lina M.</first><last>Rojas-Barahona</last></author>
       <author><first>Bo-Hsiang</first><last>Tseng</last></author>
       <author><first>Yinpei</first><last>Dai</last></author>
       <author><first>Clare</first><last>Mansfield</last></author>
       <author><first>Osman</first><last>Ramadan</last></author>
       <author><first>Stefan</first><last>Ultes</last></author>
       <author><first>Michael</first><last>Crawford</last></author>
-      <author><first>Milica</first><last>Gašić</last></author>
+      <author id="milica-gasic"><first>Milica</first><last>Gašić</last></author>
       <pages>44–54</pages>
       <url hash="d273f066">W18-5606</url>
       <abstract>In recent years, we have seen deep learning and distributed representations of words and sentences make impact on a number of natural language processing tasks, such as similarity, entailment and sentiment analysis. Here we introduce a new task: understanding of mental health concepts derived from Cognitive Behavioural Therapy (CBT). We define a mental health ontology based on the CBT principles, annotate a large corpus where this phenomena is exhibited and perform understanding using deep learning and distributed representations. Our results show that the performance of deep learning models combined with word embeddings or sentence embeddings significantly outperform non-deep-learning models in this difficult task. This understanding module will be an essential component of a statistical dialogue system delivering therapy.</abstract>
@@ -10432,7 +10432,7 @@
     <paper id="7">
       <title>Investigating the Challenges of Temporal Relation Extraction from Clinical Text</title>
       <author><first>Diana</first><last>Galvan</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Koji</first><last>Matsuda</last></author>
       <author><first>Kentaro</first><last>Inui</last></author>
       <pages>55–64</pages>
@@ -10457,7 +10457,7 @@
     <paper id="9">
       <title>Unsupervised Identification of Study Descriptors in Toxicology Research: An Experimental Study</title>
       <author><first>Drahomira</first><last>Herrmannova</last></author>
-      <author><first>Steven</first><last>Young</last></author>
+      <author id="steve-young"><first>Steven</first><last>Young</last></author>
       <author><first>Robert</first><last>Patton</last></author>
       <author><first>Christopher</first><last>Stahl</last></author>
       <author><first>Nicole</first><last>Kleinstreuer</last></author>
@@ -10498,7 +10498,7 @@
       <title>Automatically Detecting the Position and Type of Psychiatric Evaluation Report Sections</title>
       <author><first>Deya</first><last>Banisakher</last></author>
       <author><first>Naphtali</first><last>Rishe</last></author>
-      <author><first>Mark A.</first><last>Finlayson</last></author>
+      <author id="mark-finlayson"><first>Mark A.</first><last>Finlayson</last></author>
       <pages>101–110</pages>
       <url hash="af8d342a">W18-5612</url>
       <abstract>Psychiatric evaluation reports represent a rich and still mostly-untapped source of information for developing systems for automatic diagnosis and treatment of mental health problems. These reports contain free-text structured within sections using a convention of headings. We present a model for automatically detecting the position and type of different psychiatric evaluation report sections. We developed this model using a corpus of 150 sample reports that we gathered from the Web, and used sentences as a processing unit while section headings were used as labels of section type. From these labels we generated a unified hierarchy of labels of section types, and then learned n-gram models of the language found in each section. To model conventions for section order, we integrated these n-gram models with a Hierarchical Hidden Markov Model (HHMM) representing the probabilities of observed section orders found in the corpus, and then used this HHMM n-gram model in a decoding framework to infer the most likely section boundaries and section types for documents with their section labels removed. We evaluated our model over two tasks, namely, identifying section boundaries and identifying section types and orders. Our model significantly outperformed baselines for each task with an F1 of 0.88 for identifying section types, and a 0.26 WindowDiff (Wd) and 0.20 and (Pk) scores, respectively, for identifying section boundaries.</abstract>
@@ -10510,7 +10510,7 @@
       <author><first>Taraka</first><last>Rama</last></author>
       <author><first>Pål</first><last>Brekke</last></author>
       <author><first>Øystein</first><last>Nytrø</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <pages>111–121</pages>
       <url hash="0a75e0e7">W18-5613</url>
       <abstract>In this article, we describe the development of annotation guidelines for family history information in Norwegian clinical text. We make use of incrementally developed synthetic clinical text describing patients’ family history relating to cases of cardiac disease and present a general methodology which integrates the synthetically produced clinical statements and guideline development. We analyze inter-annotator agreement based on the developed guidelines and present results from experiments aimed at evaluating the validity and applicability of the annotated corpus using machine learning techniques. The resulting annotated corpus contains 477 sentences and 6030 tokens. Both the annotation guidelines and the annotated corpus are made freely available and as such constitutes the first publicly available resource of Norwegian clinical text.</abstract>
@@ -10534,8 +10534,8 @@
       <author><first>Nicholas</first><last>Miller</last></author>
       <author><first>Kirsten</first><last>Bolton</last></author>
       <author><first>Philip</first><last>Cawkwell</last></author>
-      <author><first>Marie</first><last>Meteer</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="marie-meteer"><first>Marie</first><last>Meteer</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <author><first>Mei</first><last>Hua-Hall</last></author>
       <pages>129–138</pages>
       <url hash="8a8857a4">W18-5615</url>
@@ -10573,7 +10573,7 @@
     </paper>
     <paper id="18">
       <title>In-domain Context-aware Token Embeddings Improve Biomedical Named Entity Recognition</title>
-      <author><first>Golnar</first><last>Sheikhshabbafghi</last></author>
+      <author id="golnar-sheikhshab"><first>Golnar</first><last>Sheikhshabbafghi</last></author>
       <author><first>Inanc</first><last>Birol</last></author>
       <author><first>Anoop</first><last>Sarkar</last></author>
       <pages>160–164</pages>
@@ -10585,7 +10585,7 @@
     <paper id="19">
       <title>Self-training improves Recurrent Neural Networks performance for Temporal Relation Extraction</title>
       <author><first>Chen</first><last>Lin</last></author>
-      <author><first>Timothy</first><last>Miller</last></author>
+      <author id="timothy-miller"><first>Timothy</first><last>Miller</last></author>
       <author><first>Dmitriy</first><last>Dligach</last></author>
       <author><first>Hadi</first><last>Amiri</last></author>
       <author><first>Steven</first><last>Bethard</last></author>
@@ -10615,7 +10615,7 @@
       <author><first>André</first><last>Bittar</last></author>
       <author><first>Rina</first><last>Dutta</last></author>
       <author><first>Rashmi</first><last>Patel</last></author>
-      <author><first>Robert</first><last>Stewart</last></author>
+      <author id="robert-stewart"><first>Robert</first><last>Stewart</last></author>
       <author><first>Sumithra</first><last>Velupillai</last></author>
       <pages>183–192</pages>
       <url hash="e94e85e8">W18-5621</url>
@@ -10628,7 +10628,7 @@
       <author><first>Julien</first><last>Tourille</last></author>
       <author><first>Matthieu</first><last>Doutreligne</last></author>
       <author><first>Olivier</first><last>Ferret</last></author>
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <author><first>Nicolas</first><last>Paris</last></author>
       <author><first>Xavier</first><last>Tannier</last></author>
       <pages>193–203</pages>
@@ -10642,7 +10642,7 @@
       <author><first>Yuhao</first><last>Zhang</last></author>
       <author><first>Daisy Yi</first><last>Ding</last></author>
       <author><first>Tianpei</first><last>Qian</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <author><first>Curtis P.</first><last>Langlotz</last></author>
       <pages>204–213</pages>
       <url hash="e4ccc0e3">W18-5623</url>
@@ -10708,7 +10708,7 @@
       <title>A Methodology for Evaluating Interaction Strategies of Task-Oriented Conversational Agents</title>
       <author><first>Marco</first><last>Guerini</last></author>
       <author><first>Sara</first><last>Falcone</last></author>
-      <author><first>Bernardo</first><last>Magnini</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
       <pages>24–32</pages>
       <url hash="33a168f9">W18-5704</url>
       <abstract>In task-oriented conversational agents, more attention has been usually devoted to assessing task effectiveness, rather than to <i>how</i> the task is achieved. However, conversational agents are moving towards more complex and human-like interaction capabilities (e.g. the ability to use a formal/informal register, to show an empathetic behavior), for which standard evaluation methodologies may not suffice. In this paper, we provide a novel methodology to assess - in a completely controlled way - the impact on the quality of experience of agent’s interaction strategies. The methodology is based on a within subject design, where two slightly different transcripts of the same interaction with a conversational agent are presented to the user. Through a series of pilot experiments we prove that this methodology allows fast and cheap experimentation/evaluation, focusing on aspects that are overlooked by current methods.</abstract>
@@ -10749,7 +10749,7 @@
     <paper id="8">
       <title>Data Augmentation for Neural Online Chats Response Selection</title>
       <author><first>Wenchao</first><last>Du</last></author>
-      <author><first>Alan</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan</first><last>Black</last></author>
       <pages>52–58</pages>
       <url hash="7b0cc8b5">W18-5708</url>
       <abstract>Data augmentation seeks to manipulate the available data for training to improve the generalization ability of models. We investigate two data augmentation proxies, permutation and flipping, for neural dialog response selection task on various models over multiple datasets, including both Chinese and English languages. Different from standard data augmentation techniques, our method combines the original and synthesized data for prediction. Empirical results show that our approach can gain 1 to 3 recall-at-1 points over baseline models in both full-scale and small-scale settings.</abstract>
@@ -10774,7 +10774,7 @@
       <author><first>Ignacio</first><last>Aguado</last></author>
       <author><first>Andreea</first><last>Hossmann</last></author>
       <author><first>Michael</first><last>Baeriswyl</last></author>
-      <author><first>Claudiu</first><last>Musat</last></author>
+      <author id="claudiu-musat"><first>Claudiu</first><last>Musat</last></author>
       <pages>67–73</pages>
       <url hash="690c689a">W18-5710</url>
       <abstract>Most of the world’s data is stored in relational databases. Accessing these requires specialized knowledge of the Structured Query Language (SQL), putting them out of the reach of many people. A recent research thread in Natural Language Processing (NLP) aims to alleviate this problem by automatically translating natural language questions into SQL queries. While the proposed solutions are a great start, they lack robustness and do not easily generalize: the methods require high quality descriptions of the database table columns, and the most widely used training dataset, WikiSQL, is heavily biased towards using those descriptions as part of the questions. In this work, we propose solutions to both problems: we entirely eliminate the need for column descriptions, by relying solely on their contents, and we augment the WikiSQL dataset by paraphrasing column names to reduce bias. We show that the accuracy of existing methods drops when trained on our augmented, column-agnostic dataset, and that our own method reaches state of the art accuracy, while relying on column contents only.</abstract>
@@ -10784,7 +10784,7 @@
     <paper id="11">
       <title>Exploring Named Entity Recognition As an Auxiliary Task for Slot Filling in Conversational Language Understanding</title>
       <author><first>Samuel</first><last>Louvan</last></author>
-      <author><first>Bernardo</first><last>Magnini</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
       <pages>74–80</pages>
       <url hash="2fb72aa3">W18-5711</url>
       <abstract>Slot filling is a crucial task in the Natural Language Understanding (NLU) component of a dialogue system. Most approaches for this task rely solely on the domain-specific datasets for training. We propose a joint model of slot filling and Named Entity Recognition (NER) in a multi-task learning (MTL) setup. Our experiments on three slot filling datasets show that using NER as an auxiliary task improves slot filling performance and achieve competitive performance compared with state-of-the-art. In particular, NER is effective when supervised at the lower layer of the model. For low-resource scenarios, we found that MTL is effective for one dataset.</abstract>
@@ -10794,7 +10794,7 @@
     <paper id="12">
       <title>Why are Sequence-to-Sequence Models So Dull? Understanding the Low-Diversity Problem of Chatbots</title>
       <author><first>Shaojie</first><last>Jiang</last></author>
-      <author><first>Maarten</first><last>de Rijke</last></author>
+      <author id="maarten-de-rijke"><first>Maarten</first><last>de Rijke</last></author>
       <pages>81–86</pages>
       <url hash="b3273bed">W18-5712</url>
       <abstract>Diversity is a long-studied topic in information retrieval that usually refers to the requirement that retrieved results should be non-repetitive and cover different aspects. In a conversational setting, an additional dimension of diversity matters: an engaging response generation system should be able to output responses that are diverse and interesting. Sequence-to-sequence (Seq2Seq) models have been shown to be very effective for response generation. However, dialogue responses generated by Seq2Seq models tend to have low diversity. In this paper, we review known sources and existing approaches to this low-diversity problem. We also identify a source of low diversity that has been little studied so far, namely model over-confidence. We sketch several directions for tackling model over-confidence and, hence, the low-diversity problem, including confidence penalties and label smoothing.</abstract>
@@ -10817,7 +10817,7 @@
     <meta>
       <booktitle>Proceedings of the Fifteenth Workshop on Computational Research in Phonetics, Phonology, and Morphology</booktitle>
       <url hash="04d3aa79">W18-58</url>
-      <editor><first>Sandra</first><last>Kuebler</last></editor>
+      <editor id="sandra-kubler"><first>Sandra</first><last>Kuebler</last></editor>
       <editor><first>Garrett</first><last>Nicolai</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Brussels, Belgium</address>
@@ -10851,7 +10851,7 @@
     </paper>
     <paper id="3">
       <title>Acoustic Word Disambiguation with Phonogical Features in <fixed-case>D</fixed-case>anish <fixed-case>ASR</fixed-case></title>
-      <author><first>Andreas Søeborg</first><last>Kirkedal</last></author>
+      <author id="andreas-soeborg-kirkedal"><first>Andreas Søeborg</first><last>Kirkedal</last></author>
       <pages>21–31</pages>
       <url hash="b962909c">W18-5803</url>
       <abstract>Phonological features can indicate word class and we can use word class information to disambiguate both homophones and homographs in automatic speech recognition (ASR). We show Danish stød can be predicted from speech and used to improve ASR. We discover which acoustic features contain the signal of stød, how to use these features to predict stød and how we can make use of stød and stødpredictive acoustic features to improve overall ASR accuracy and decoding speed. In the process, we discover acoustic features that are novel to the phonetic characterisation of stød.</abstract>
@@ -10861,11 +10861,11 @@
     <paper id="4">
       <title><fixed-case>A</fixed-case>daptor <fixed-case>G</fixed-case>rammars for the Linguist: Word Segmentation Experiments for Very Low-Resource Languages</title>
       <author><first>Pierre</first><last>Godard</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <author><first>François</first><last>Yvon</last></author>
-      <author><first>Martine</first><last>Adda-Decker</last></author>
-      <author><first>Gilles</first><last>Adda</last></author>
-      <author><first>Hélène</first><last>Maynard</last></author>
+      <author id="martine-adda-decker"><first>Martine</first><last>Adda-Decker</last></author>
+      <author id="gilles-adda"><first>Gilles</first><last>Adda</last></author>
+      <author id="helene-bonneau-maynard"><first>Hélène</first><last>Maynard</last></author>
       <author><first>Annie</first><last>Rialland</last></author>
       <pages>32–42</pages>
       <url hash="090086f9">W18-5804</url>
@@ -10907,7 +10907,7 @@
     <paper id="8">
       <title>Automatically Tailoring Unsupervised Morphological Segmentation to the Language</title>
       <author><first>Ramy</first><last>Eskander</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <author><first>Smaranda</first><last>Muresan</last></author>
       <pages>78–83</pages>
       <url hash="e307e0c3">W18-5808</url>
@@ -10968,8 +10968,8 @@
     </paper>
     <paper id="14">
       <title>On Hapax Legomena and Morphological Productivity</title>
-      <author><first>Janet</first><last>Pierrehumbert</last></author>
-      <author><first>Ramon</first><last>Granell</last></author>
+      <author id="janet-pierrehumbert"><first>Janet</first><last>Pierrehumbert</last></author>
+      <author id="ramon-granell"><first>Ramon</first><last>Granell</last></author>
       <pages>125–130</pages>
       <url hash="e4390da7">W18-5814</url>
       <abstract>Quantifying and predicting morphological productivity is a long-standing challenge in corpus linguistics and psycholinguistics. The same challenge reappears in natural language processing in the context of handling words that were not seen in the training set (out-of-vocabulary, or OOV, words). Prior research showed that a good indicator of the productivity of a morpheme is the number of words involving it that occur exactly once (the <i>hapax legomena</i>). A technical connection was adduced between this result and Good-Turing smoothing, which assigns probability mass to unseen events on the basis of the simplifying assumption that word frequencies are stationary. In a large-scale study of 133 affixes in Wikipedia, we develop evidence that success in fact depends on tapping the frequency range in which the assumptions of Good-Turing are violated.</abstract>
@@ -10979,7 +10979,7 @@
     <paper id="15">
       <title>A Morphological Analyzer for <fixed-case>S</fixed-case>hipibo-Konibo</title>
       <author><first>Ronald</first><last>Cardenas</last></author>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <pages>131–139</pages>
       <url hash="5351fc40">W18-5815</url>
       <abstract>We present a fairly complete morphological analyzer for Shipibo-Konibo, a low-resourced native language spoken in the Amazonian region of Peru. We resort to the robustness of finite-state systems in order to model the complex morphosyntax of the language. Evaluation over raw corpora shows promising coverage of grammatical phenomena, limited only by the scarce lexicon. We make this tool freely available so as to aid the production of annotated corpora and impulse further research in native languages of Peru.</abstract>
@@ -11012,7 +11012,7 @@
     <paper id="18">
       <title>Phonological Features for Morphological Inflection</title>
       <author><first>Adam</first><last>Wiemerslage</last></author>
-      <author><first>Miikka</first><last>Silfverberg</last></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last></author>
       <author><first>Mans</first><last>Hulden</last></author>
       <pages>161–166</pages>
       <url hash="2306bce5">W18-5818</url>
@@ -11034,10 +11034,10 @@
     <meta>
       <booktitle>Proceedings of the 2018 <fixed-case>EMNLP</fixed-case> Workshop <fixed-case>SMM</fixed-case>4<fixed-case>H</fixed-case>: The 3rd Social Media Mining for Health Applications Workshop &amp; Shared Task</booktitle>
       <url hash="17d9e622">W18-59</url>
-      <editor><first>Graciela</first><last>Gonzalez-Hernandez</last></editor>
-      <editor><first>Davy</first><last>Weissenbacher</last></editor>
+      <editor id="graciela-gonzalez"><first>Graciela</first><last>Gonzalez-Hernandez</last></editor>
+      <editor id="davy-weissenbacher"><first>Davy</first><last>Weissenbacher</last></editor>
       <editor><first>Abeed</first><last>Sarker</last></editor>
-      <editor><first>Michael</first><last>Paul</last></editor>
+      <editor id="michael-paul"><first>Michael</first><last>Paul</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Brussels, Belgium</address>
       <month>October</month>
@@ -11119,7 +11119,7 @@
       <author><first>Sahil</first><last>Chopra</last></author>
       <author><first>Simra</first><last>Shahid</last></author>
       <author><first>Laiba</first><last>Mehnaz</last></author>
-      <author><first>Rajiv</first><last>Shah</last></author>
+      <author id="rajiv-shah"><first>Rajiv</first><last>Shah</last></author>
       <pages>27–31</pages>
       <url hash="16f7f3d5">W18-5907</url>
       <abstract>Social media-based text mining in healthcare has received special attention in recent times due to the enhanced accessibility of social media sites like Twitter. The increasing trend of spreading important information in distress can help patients reach out to prospective blood donors in a time bound manner. However such manual efforts are mostly inefficient due to the limited network of a user. In a novel step to solve this problem, we present an annotated Emergency Blood Donation Request (EBDR) dataset to classify tweets referring to the necessity of urgent blood donation requirement. Additionally, we also present an automated feature-based SVM classification technique that can help selective EBDR tweets reach relevant personals as well as medical authorities. Our experiments also present a quantitative evidence that linguistic along with handcrafted heuristics can act as the most representative set of signals this task with an accuracy of 97.89%.</abstract>
@@ -11128,7 +11128,7 @@
     </paper>
     <paper id="8">
       <title>Dealing with Medication Non-Adherence Expressions in <fixed-case>T</fixed-case>witter</title>
-      <author><first>Takeshi</first><last>Onishi</last></author>
+      <author id="takashi-onishi"><first>Takeshi</first><last>Onishi</last></author>
       <author><first>Davy</first><last>Weissenbacher</last></author>
       <author><first>Ari</first><last>Klein</last></author>
       <author><first>Karen</first><last>O’Connor</last></author>
@@ -11165,10 +11165,10 @@
     <paper id="11">
       <title>Shot Or Not: Comparison of <fixed-case>NLP</fixed-case> Approaches for Vaccination Behaviour Detection</title>
       <author><first>Aditya</first><last>Joshi</last></author>
-      <author><first>Xiang</first><last>Dai</last></author>
+      <author id="xiang-dai"><first>Xiang</first><last>Dai</last></author>
       <author><first>Sarvnaz</first><last>Karimi</last></author>
       <author><first>Ross</first><last>Sparks</last></author>
-      <author><first>Cécile</first><last>Paris</last></author>
+      <author id="cecile-paris"><first>Cécile</first><last>Paris</last></author>
       <author><first>C Raina</first><last>MacIntyre</last></author>
       <pages>43–47</pages>
       <url hash="e1ff5f05">W18-5911</url>
@@ -11210,7 +11210,7 @@
     <paper id="15">
       <title>Automatic Identification of Drugs and Adverse Drug Reaction Related Tweets</title>
       <author><first>Segun Taofeek</first><last>Aroyehun</last></author>
-      <author><first>Alexander</first><last>Gelbukh</last></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last></author>
       <pages>54–55</pages>
       <url hash="5b3a4e1d">W18-5915</url>
       <abstract>We describe our submissions to the Third Social Media Mining for Health Applications Shared Task. We participated in two tasks (tasks 1 and 3). For both tasks, we experimented with a traditional machine learning model (Naive Bayes Support Vector Machine (NBSVM)), deep learning models (Convolutional Neural Networks (CNN), Long Short-Term Memory (LSTM), and Bidirectional LSTM (BiLSTM)), and the combination of deep learning model with SVM. We observed that the NBSVM reaches superior performance on both tasks on our development split of the training data sets. Official result for task 1 based on the blind evaluation data shows that the predictions of the NBSVM achieved our team’s best F-score of 0.910 which is above the average score received by all submissions to the task. On task 3, the combination of of BiLSTM and SVM gives our best F-score for the positive class of 0.394.</abstract>
@@ -11281,7 +11281,7 @@
     <meta>
       <booktitle>Proceedings of the Second Workshop on Universal Dependencies (<fixed-case>UDW</fixed-case> 2018)</booktitle>
       <url hash="7d9ecd00">W18-60</url>
-      <editor><first>Marie-Catherine</first><last>de Marneffe</last></editor>
+      <editor id="marie-catherine-de-marneffe"><first>Marie-Catherine</first><last>de Marneffe</last></editor>
       <editor><first>Teresa</first><last>Lynn</last></editor>
       <editor><first>Sebastian</first><last>Schuster</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -11298,7 +11298,7 @@
       <title>Assessing the Impact of Incremental Error Detection and Correction. A Case Study on the <fixed-case>I</fixed-case>talian <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependency Treebank</title>
       <author><first>Chiara</first><last>Alzetta</last></author>
       <author><first>Felice</first><last>Dell’Orletta</last></author>
-      <author><first>Simonetta</first><last>Montemagni</last></author>
+      <author id="simonetta-montemagni"><first>Simonetta</first><last>Montemagni</last></author>
       <author><first>Maria</first><last>Simi</last></author>
       <author><first>Giulia</first><last>Venturi</last></author>
       <pages>1–7</pages>
@@ -11329,11 +11329,11 @@
     <paper id="3">
       <title>Expletives in <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependency Treebanks</title>
       <author><first>Gosse</first><last>Bouma</last></author>
-      <author><first>Jan</first><last>Hajic</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajic</last></author>
       <author><first>Dag</first><last>Haug</last></author>
       <author><first>Joakim</first><last>Nivre</last></author>
       <author><first>Per Erik</first><last>Solberg</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <pages>18–26</pages>
       <url hash="f4434ffa">W18-6003</url>
       <abstract>Although treebanks annotated according to the guidelines of Universal Dependencies (UD) now exist for many languages, the goal of annotating the same phenomena in a cross-linguistically consistent fashion is not always met. In this paper, we investigate one phenomenon where we believe such consistency is lacking, namely expletive elements. Such elements occupy a position that is structurally associated with a core argument (or sometimes an oblique dependent), yet are non-referential and semantically void. Many UD treebanks identify at least some elements as expletive, but the range of phenomena differs between treebanks, even for closely related languages, and sometimes even for different treebanks for the same language. In this paper, we present criteria for identifying expletives that are applicable across languages and compatible with the goals of UD, give an overview of expletives as found in current UD treebanks, and present recommendations for the annotation of expletives so that more consistent annotation can be achieved in future releases.</abstract>
@@ -11342,10 +11342,10 @@
     </paper>
     <paper id="4">
       <title>Challenges in Converting the Index <fixed-case>T</fixed-case>homisticus Treebank into <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies</title>
-      <author><first>Flavio Massimiliano</first><last>Cecchini</last></author>
+      <author id="flavio-massimiliano-cecchini"><first>Flavio Massimiliano</first><last>Cecchini</last></author>
       <author><first>Marco</first><last>Passarotti</last></author>
       <author><first>Paola</first><last>Marongiu</last></author>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <pages>27–36</pages>
       <url hash="68a41de0">W18-6004</url>
       <abstract>This paper describes the changes applied to the original process used to convert the <i>Index Thomisticus</i> Treebank, a corpus including texts in Medieval Latin by Thomas Aquinas, into the annotation style of Universal Dependencies. The changes are made both to harmonise the Universal Dependencies version of the <i>Index Thomisticus</i> Treebank with the two other available Latin treebanks and to fix errors and inconsistencies resulting from the original process. The paper details the treatment of different issues in PoS tagging, lemmatisation and assignment of dependency relations. Finally, it assesses the quality of the new conversion process by providing an evaluation against a gold standard.</abstract>
@@ -11367,7 +11367,7 @@
       <author><first>Kira</first><last>Droganova</last></author>
       <author><first>Filip</first><last>Ginter</last></author>
       <author><first>Jenna</first><last>Kanerva</last></author>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <pages>47–54</pages>
       <url hash="041aacc5">W18-6006</url>
       <abstract>In this paper, we focus on parsing rare and non-trivial constructions, in particular ellipsis. We report on several experiments in enrichment of training data for this specific construction, evaluated on five languages: Czech, English, Finnish, Russian and Slovak. These data enrichment methods draw upon self-training and tri-training, combined with a stratified sampling method mimicking the structural complexity of the original treebank. In addition, using these same methods, we also demonstrate small improvements over the CoNLL-17 parsing shared task winning system for four of the five languages, not only restricted to the elliptical constructions.</abstract>
@@ -11402,8 +11402,8 @@
       <author><first>Masayuki</first><last>Asahara</last></author>
       <author><first>Jena D.</first><last>Hwang</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
-      <author><first>Jinho D.</first><last>Choi</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>75–84</pages>
       <url hash="598f1a78">W18-6009</url>
       <abstract>This paper discusses the representation of coordinate structures in the Universal Dependencies framework for two head-final languages, Japanese and Korean. UD applies a strict principle that makes the head of coordination the left-most conjunct. However, the guideline may produce syntactic trees which are difficult to accept in head-final languages. This paper describes the status in the current Japanese and Korean corpora and proposes alternative designs suitable for these languages.</abstract>
@@ -11421,8 +11421,8 @@
     </paper>
     <paper id="11">
       <title>Marrying <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies and <fixed-case>U</fixed-case>niversal <fixed-case>M</fixed-case>orphology</title>
-      <author><first>Arya D.</first><last>McCarthy</last></author>
-      <author><first>Miikka</first><last>Silfverberg</last></author>
+      <author id="arya-d-mccarthy"><first>Arya D.</first><last>McCarthy</last></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last></author>
       <author><first>Ryan</first><last>Cotterell</last></author>
       <author><first>Mans</first><last>Hulden</last></author>
       <author><first>David</first><last>Yarowsky</last></author>
@@ -11438,7 +11438,7 @@
       <author><first>Paola</first><last>Marongiu</last></author>
       <author><first>Filip</first><last>Ginter</last></author>
       <author><first>Jenna</first><last>Kanerva</last></author>
-      <author><first>Simonetta</first><last>Montemagni</last></author>
+      <author id="simonetta-montemagni"><first>Simonetta</first><last>Montemagni</last></author>
       <author><first>Sebastian</first><last>Schuster</last></author>
       <author><first>Maria</first><last>Simi</last></author>
       <pages>102–107</pages>
@@ -11473,7 +11473,7 @@
       <title>The First <fixed-case>K</fixed-case>omi-<fixed-case>Z</fixed-case>yrian <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies Treebanks</title>
       <author><first>Niko</first><last>Partanen</last></author>
       <author><first>Rogier</first><last>Blokland</last></author>
-      <author><first>KyungTae</first><last>Lim</last></author>
+      <author id="kyungtae-lim"><first>KyungTae</first><last>Lim</last></author>
       <author><first>Thierry</first><last>Poibeau</last></author>
       <author><first>Michael</first><last>Rießler</last></author>
       <pages>126–132</pages>
@@ -11495,7 +11495,7 @@
     </paper>
     <paper id="17">
       <title>Multi-source synthetic treebank creation for improved cross-lingual dependency parsing</title>
-      <author><first>Francis</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last></author>
       <author><first>Mariya</first><last>Sheyanova</last></author>
       <author><first>Aleksandra</first><last>Martynova</last></author>
       <author><first>Pavel</first><last>Stepachev</last></author>
@@ -11511,11 +11511,11 @@
       <author><first>Alonso</first><last>Vasquez</last></author>
       <author><first>Renzo</first><last>Ego Aguirre</last></author>
       <author><first>Candy</first><last>Angulo</last></author>
-      <author><first>John</first><last>Miller</last></author>
+      <author id="john-miller"><first>John</first><last>Miller</last></author>
       <author><first>Claudia</first><last>Villanueva</last></author>
-      <author><first>Željko</first><last>Agić</last></author>
+      <author id="zeljko-agic"><first>Željko</first><last>Agić</last></author>
       <author><first>Roberto</first><last>Zariquiey</last></author>
-      <author><first>Arturo</first><last>Oncevay</last></author>
+      <author id="arturo-oncevay"><first>Arturo</first><last>Oncevay</last></author>
       <pages>151–161</pages>
       <url hash="68a7645a">W18-6018</url>
       <abstract>We present an initial version of the Universal Dependencies (UD) treebank for Shipibo-Konibo, the first South American, Amazonian, Panoan and Peruvian language with a resource built under UD. We describe the linguistic aspects of how the tagset was defined and the treebank was annotated; in addition we present our specific treatment of linguistic units called <i>clitics</i>. Although the treebank is still under development, it allowed us to perform a typological comparison against Spanish, the predominant language in Peru, and dependency syntax parsing experiments in both monolingual and cross-lingual approaches.</abstract>
@@ -11569,7 +11569,7 @@
       <url hash="5f86a021">W18-61</url>
       <editor><first>Wei</first><last>Xu</last></editor>
       <editor><first>Alan</first><last>Ritter</last></editor>
-      <editor><first>Tim</first><last>Baldwin</last></editor>
+      <editor id="timothy-baldwin"><first>Tim</first><last>Baldwin</last></editor>
       <editor><first>Afshin</first><last>Rahimi</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Brussels, Belgium</address>
@@ -11585,7 +11585,7 @@
       <title>Inducing a lexicon of sociolinguistic variables from code-mixed text</title>
       <author><first>Philippa</first><last>Shoemark</last></author>
       <author><first>James</first><last>Kirby</last></author>
-      <author><first>Sharon</first><last>Goldwater</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
       <pages>1–6</pages>
       <url hash="4ea21c38">W18-6101</url>
       <abstract>Sociolinguistics is often concerned with how variants of a linguistic item (e.g., <i>nothing</i> vs. <i>nothin’</i>) are used by different groups or in different situations. We introduce the task of inducing lexical variables from code-mixed text: that is, identifying equivalence pairs such as (<i>football</i>, <i>fitba</i>) along with their linguistic code (<i>football</i>→British, <i>fitba</i>→Scottish). We adapt a framework for identifying gender-biased word pairs to this new task, and present results on three different pairs of English dialects, using tweets as the code-mixed text. Our system achieves precision of over 70% for two of these three datasets, and produces useful results even without extensive parameter tuning. Our success in adapting this framework from gender to language variety suggests that it could be used to discover other types of analogous pairs as well.</abstract>
@@ -11596,7 +11596,7 @@
       <title><fixed-case>T</fixed-case>witter Geolocation using Knowledge-Based Methods</title>
       <author><first>Taro</first><last>Miyazaki</last></author>
       <author><first>Afshin</first><last>Rahimi</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
       <author><first>Timothy</first><last>Baldwin</last></author>
       <pages>7–16</pages>
       <url hash="b4542ee3">W18-6102</url>
@@ -11629,7 +11629,7 @@
       <title>How do you correct run-on sentences it’s not as easy as it seems</title>
       <author><first>Junchao</first><last>Zheng</last></author>
       <author><first>Courtney</first><last>Napoles</last></author>
-      <author><first>Joel</first><last>Tetreault</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
       <author><first>Kostiantyn</first><last>Omelianchuk</last></author>
       <pages>33–38</pages>
       <url hash="35cf97a2">W18-6105</url>
@@ -11652,7 +11652,7 @@
     </paper>
     <paper id="7">
       <title>Normalization of Transliterated Words in Code-Mixed Data Using <fixed-case>S</fixed-case>eq2<fixed-case>S</fixed-case>eq Model &amp; <fixed-case>L</fixed-case>evenshtein Distance</title>
-      <author><first>Soumil</first><last>Mandal</last></author>
+      <author id="soumil-mandal"><first>Soumil</first><last>Mandal</last></author>
       <author><first>Karthick</first><last>Nanmaran</last></author>
       <pages>49–53</pages>
       <url hash="678cce98">W18-6107</url>
@@ -11735,7 +11735,7 @@
     <paper id="15">
       <title>Detecting Code-Switching between <fixed-case>T</fixed-case>urkish-<fixed-case>E</fixed-case>nglish Language Pair</title>
       <author><first>Zeynep</first><last>Yirmibeşoğlu</last></author>
-      <author><first>Gülşen</first><last>Eryiğit</last></author>
+      <author id="gulsen-eryigit"><first>Gülşen</first><last>Eryiğit</last></author>
       <pages>110–115</pages>
       <url hash="fe131a6a">W18-6115</url>
       <abstract>Code-switching (usage of different languages within a single conversation context in an alternative manner) is a highly increasing phenomenon in social media and colloquial usage which poses different challenges for natural language processing. This paper introduces the first study for the detection of Turkish-English code-switching and also a small test data collected from social media in order to smooth the way for further studies. The proposed system using character level n-grams and conditional random fields (CRFs) obtains 95.6% micro-averaged F1-score on the introduced test data set.</abstract>
@@ -11744,8 +11744,8 @@
     </paper>
     <paper id="16">
       <title>Language Identification in Code-Mixed Data using Multichannel Neural Networks and Context Capture</title>
-      <author><first>Soumil</first><last>Mandal</last></author>
-      <author><first>Anil Kumar</first><last>Singh</last></author>
+      <author id="soumil-mandal"><first>Soumil</first><last>Mandal</last></author>
+      <author id="anil-kumar-singh"><first>Anil Kumar</first><last>Singh</last></author>
       <pages>116–120</pages>
       <url hash="5a2c3111">W18-6116</url>
       <abstract>An accurate language identification tool is an absolute necessity for building complex NLP systems to be used on code-mixed data. Lot of work has been recently done on the same, but there’s still room for improvement. Inspired from the recent advancements in neural network architectures for computer vision tasks, we have implemented multichannel neural networks combining CNN and LSTM for word level language identification of code-mixed data. Combining this with a Bi-LSTM-CRF context capture module, accuracies of 93.28% and 93.32% is achieved on our two testing sets.</abstract>
@@ -11768,7 +11768,7 @@
       <title>Content Extraction and Lexical Analysis from Customer-Agent Interactions</title>
       <author><first>Sergiu</first><last>Nisioi</last></author>
       <author><first>Anca</first><last>Bucur</last></author>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <pages>132–136</pages>
       <url hash="7c2f4571">W18-6118</url>
       <abstract>In this paper, we provide a lexical comparative analysis of the vocabulary used by customers and agents in an Enterprise Resource Planning (ERP) environment and a potential solution to clean the data and extract relevant content for NLP. As a result, we demonstrate that the actual vocabulary for the language that prevails in the ERP conversations is highly divergent from the standardized dictionary and further different from general language usage as extracted from the Common Crawl corpus. Moreover, in specific business communication circumstances, where it is expected to observe a high usage of standardized language, code switching and non-standard expression are predominant, emphasizing once more the discrepancy between the day-to-day use of language and the standardized one.</abstract>
@@ -11778,7 +11778,7 @@
     <paper id="19">
       <title>Preferred Answer Selection in <fixed-case>S</fixed-case>tack <fixed-case>O</fixed-case>verflow: Better Text Representations ... and Metadata, Metadata, Metadata</title>
       <author><first>Steven</first><last>Xu</last></author>
-      <author><first>Andrew</first><last>Bennett</last></author>
+      <author id="andrew-bennett"><first>Andrew</first><last>Bennett</last></author>
       <author><first>Doris</first><last>Hoogeveen</last></author>
       <author><first>Jey Han</first><last>Lau</last></author>
       <author><first>Timothy</first><last>Baldwin</last></author>
@@ -11802,7 +11802,7 @@
     <paper id="21">
       <title>Classification of Tweets about Reported Events using Neural Networks</title>
       <author><first>Kiminobu</first><last>Makino</last></author>
-      <author><first>Yuka</first><last>Takei</last></author>
+      <author id="yuka-takei"><first>Yuka</first><last>Takei</last></author>
       <author><first>Taro</first><last>Miyazaki</last></author>
       <author><first>Jun</first><last>Goto</last></author>
       <pages>153–163</pages>
@@ -11815,8 +11815,8 @@
       <title>Learning to Define Terms in the Software Domain</title>
       <author><first>Vidhisha</first><last>Balachandran</last></author>
       <author><first>Dheeraj</first><last>Rajagopal</last></author>
-      <author><first>Rose Catherine</first><last>Kanjirathinkal</last></author>
-      <author><first>William</first><last>Cohen</last></author>
+      <author id="rose-catherine-kanjirathinkal"><first>Rose Catherine</first><last>Kanjirathinkal</last></author>
+      <author id="william-cohen"><first>William</first><last>Cohen</last></author>
       <pages>164–172</pages>
       <url hash="846bc510">W18-6122</url>
       <abstract>One way to test a person’s knowledge of a domain is to ask them to define domain-specific terms. Here, we investigate the task of automatically generating definitions of technical terms by reading text from the technical domain. Specifically, we learn definitions of software entities from a large corpus built from the user forum Stack Overflow. To model definitions, we train a language model and incorporate additional domain-specific information like word co-occurrence, and ontological category information. Our approach improves previous baselines by 2 BLEU points for the definition generation task. Our experiments also show the additional challenges associated with the task and the short-comings of language-model based architectures for definition generation.</abstract>
@@ -11848,7 +11848,7 @@
       <title>Low-resource named entity recognition via multi-source projection: Not quite there yet?</title>
       <author><first>Jan Vium</first><last>Enghoff</last></author>
       <author><first>Søren</first><last>Harrison</last></author>
-      <author><first>Željko</first><last>Agić</last></author>
+      <author id="zeljko-agic"><first>Željko</first><last>Agić</last></author>
       <pages>195–201</pages>
       <url hash="e91d55d2">W18-6125</url>
       <abstract>Projecting linguistic annotations through word alignments is one of the most prevalent approaches to cross-lingual transfer learning. Conventional wisdom suggests that annotation projection “just works” regardless of the task at hand. We carefully consider multi-source projection for named entity recognition. Our experiment with 17 languages shows that to detect named entities in true low-resource languages, annotation projection may not be the right way to move forward. On a more positive note, we also uncover the conditions that do favor named entity projection from multiple sources. We argue these are infeasible under noisy low-resource constraints.</abstract>
@@ -11860,7 +11860,7 @@
       <author><first>Lisheng</first><last>Fu</last></author>
       <author><first>Bonan</first><last>Min</last></author>
       <author><first>Thien Huu</first><last>Nguyen</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <pages>202–207</pages>
       <url hash="a1c57777">W18-6126</url>
       <abstract>Typical relation extraction models are trained on a single corpus annotated with a pre-defined relation schema. An individual corpus is often small, and the models may often be biased or overfitted to the corpus. We hypothesize that we can learn a better representation by combining multiple relation datasets. We attempt to use a shared encoder to learn the unified feature representation and to augment it with regularization by adversarial training. The additional corpora feeding the encoder can help to learn a better feature representation layer even though the relation schemas are different. We use ACE05 and ERE datasets as our case study for experiments. The multi-task model obtains significant improvement on both datasets.</abstract>
@@ -11904,9 +11904,9 @@
     <meta>
       <booktitle>Proceedings of the 9th Workshop on Computational Approaches to Subjectivity, Sentiment and Social Media Analysis</booktitle>
       <url hash="b17e4396">W18-62</url>
-      <editor><first>Alexandra</first><last>Balahur</last></editor>
-      <editor><first>Saif M.</first><last>Mohammad</last></editor>
-      <editor><first>Veronique</first><last>Hoste</last></editor>
+      <editor id="alexandra-balahur"><first>Alexandra</first><last>Balahur</last></editor>
+      <editor id="saif-mohammad"><first>Saif M.</first><last>Mohammad</last></editor>
+      <editor id="veronique-hoste"><first>Veronique</first><last>Hoste</last></editor>
       <editor><first>Roman</first><last>Klinger</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Brussels, Belgium</address>
@@ -11920,7 +11920,7 @@
     </frontmatter>
     <paper id="1">
       <title>Identifying Affective Events and the Reasons for their Polarity</title>
-      <author><first>Ellen</first><last>Riloff</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
       <pages>1</pages>
       <url hash="9027fe50">W18-6201</url>
       <abstract>Many events have a positive or negative impact on our lives (e.g., “I bought a house” is typically good news, but ”My house burned down” is bad news). Recognizing events that have affective polarity is essential for narrative text understanding, conversational dialogue, and applications such as summarization and sarcasm detection. We will discuss our recent work on identifying affective events and categorizing them based on the underlying reasons for their affective polarity. First, we will describe a weakly supervised learning method to induce a large set of affective events from a text corpus by optimizing for semantic consistency. Second, we will present models to classify affective events based on Human Need Categories, which often explain people’s motivations and desires. Our best results use a co-training model that consists of event expression and event context classifiers and exploits both labeled and unlabeled texts. We will conclude with a discussion of interesting directions for future work in this area.</abstract>
@@ -11929,7 +11929,7 @@
     </paper>
     <paper id="2">
       <title>Deep contextualized word representations for detecting sarcasm and irony</title>
-      <author><first>Suzana</first><last>Ilić</last></author>
+      <author id="suzana-ilic"><first>Suzana</first><last>Ilić</last></author>
       <author><first>Edison</first><last>Marrese-Taylor</last></author>
       <author><first>Jorge</first><last>Balazs</last></author>
       <author><first>Yutaka</first><last>Matsuo</last></author>
@@ -11942,7 +11942,7 @@
     <paper id="3">
       <title>Implicit Subjective and Sentimental Usages in Multi-sense Word Embeddings</title>
       <author><first>Yuqi</first><last>Sun</last></author>
-      <author><first>Haoyue</first><last>Shi</last></author>
+      <author id="freda-shi"><first>Haoyue</first><last>Shi</last></author>
       <author><first>Junfeng</first><last>Hu</last></author>
       <pages>8–13</pages>
       <url hash="32d6eff9">W18-6203</url>
@@ -11965,7 +11965,7 @@
       <title>Creating a Dataset for Multilingual Fine-grained Emotion-detection Using Gamification-based Annotation</title>
       <author><first>Emily</first><last>Öhman</last></author>
       <author><first>Kaisla</first><last>Kajava</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <author><first>Timo</first><last>Honkela</last></author>
       <pages>24–30</pages>
       <url hash="2fcc3e26">W18-6205</url>
@@ -11976,7 +11976,7 @@
     <paper id="6">
       <title><fixed-case>IEST</fixed-case>: <fixed-case>WASSA</fixed-case>-2018 Implicit Emotions Shared Task</title>
       <author><first>Roman</first><last>Klinger</last></author>
-      <author><first>Orphée</first><last>De Clercq</last></author>
+      <author id="orphee-de-clercq"><first>Orphée</first><last>De Clercq</last></author>
       <author><first>Saif</first><last>Mohammad</last></author>
       <author><first>Alexandra</first><last>Balahur</last></author>
       <pages>31–42</pages>
@@ -12022,7 +12022,7 @@
     <paper id="10">
       <title>Sentiment analysis under temporal shift</title>
       <author><first>Jan</first><last>Lukes</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>65–71</pages>
       <url hash="97c75104">W18-6210</url>
       <abstract>Sentiment analysis models often rely on training data that is several years old. In this paper, we show that lexical features change polarity over time, leading to degrading performance. This effect is particularly strong in sparse models relying only on highly predictive features. Using predictive feature selection, we are able to significantly improve the accuracy of such models over time.</abstract>
@@ -12043,7 +12043,7 @@
     <paper id="12">
       <title>Topic-Specific Sentiment Analysis Can Help Identify Political Ideology</title>
       <author><first>Sumit</first><last>Bhatia</last></author>
-      <author><first>Deepak</first><last>P</last></author>
+      <author id="deepak-p"><first>Deepak</first><last>P</last></author>
       <pages>79–84</pages>
       <url hash="23dc9933">W18-6212</url>
       <abstract>Ideological leanings of an individual can often be gauged by the sentiment one expresses about different issues. We propose a simple framework that represents a political ideology as a distribution of sentiment polarities towards a set of topics. This representation can then be used to detect ideological leanings of documents (speeches, news articles, etc.) based on the sentiments expressed towards different topics. Experiments performed using a widely used dataset show the promise of our proposed approach that achieves comparable performance to other methods despite being much simpler and more interpretable.</abstract>
@@ -12053,8 +12053,8 @@
     <paper id="13">
       <title>Saying no but meaning yes: negation and sentiment analysis in <fixed-case>B</fixed-case>asque</title>
       <author><first>Jon</first><last>Alkorta</last></author>
-      <author><first>Koldo</first><last>Gojenola</last></author>
-      <author><first>Mikel</first><last>Iruskieta</last></author>
+      <author id="koldo-gojenola"><first>Koldo</first><last>Gojenola</last></author>
+      <author id="mikel-iruskieta"><first>Mikel</first><last>Iruskieta</last></author>
       <pages>85–90</pages>
       <url hash="9e3eb5de">W18-6213</url>
       <abstract>In this work, we have analyzed the effects of negation on the semantic orientation in Basque. The analysis shows that negation markers can strengthen, weaken or have no effect on sentiment orientation of a word or a group of words. Using the Constraint Grammar formalism, we have designed and evaluated a set of linguistic rules to formalize these three phenomena. The results show that two phenomena, strengthening and no change, have been identified accurately and the third one, weakening, with acceptable results.</abstract>
@@ -12065,7 +12065,7 @@
       <title>Leveraging Writing Systems Change for Deep Learning Based <fixed-case>C</fixed-case>hinese Emotion Analysis</title>
       <author><first>Rong</first><last>Xiang</last></author>
       <author><first>Yunfei</first><last>Long</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <author><first>Dan</first><last>Xiong</last></author>
       <author><first>I-Hsuan</first><last>Chen</last></author>
       <pages>91–96</pages>
@@ -12078,7 +12078,7 @@
       <title>Ternary <fixed-case>T</fixed-case>witter Sentiment Classification with Distant Supervision and Sentiment-Specific Word Embeddings</title>
       <author><first>Mats</first><last>Byrkjeland</last></author>
       <author><first>Frederik</first><last>Gørvell de Lichtenberg</last></author>
-      <author><first>Björn</first><last>Gambäck</last></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gambäck</last></author>
       <pages>97–106</pages>
       <url hash="411bb2b1">W18-6215</url>
       <abstract>The paper proposes the Ternary Sentiment Embedding Model, a new model for creating sentiment embeddings based on the Hybrid Ranking Model of Tang et al. (2016), but trained on ternary-labeled data instead of binary-labeled, utilizing sentiment embeddings from datasets made with different distant supervision methods. The model is used as part of a complete Twitter Sentiment Analysis system and empirically compared to existing systems, showing that it outperforms Hybrid Ranking and that the quality of the distant-supervised dataset has a great impact on the quality of the produced sentiment embeddings.</abstract>
@@ -12108,7 +12108,7 @@
     <paper id="18">
       <title>The Role of Emotions in Native Language Identification</title>
       <author><first>Ilia</first><last>Markov</last></author>
-      <author><first>Vivi</first><last>Nastase</last></author>
+      <author id="vivi-nastase"><first>Vivi</first><last>Nastase</last></author>
       <author><first>Carlo</first><last>Strapparava</last></author>
       <author><first>Grigori</first><last>Sidorov</last></author>
       <pages>123–129</pages>
@@ -12131,7 +12131,7 @@
       <title>Dual Memory Network Model for Biased Product Review Classification</title>
       <author><first>Yunfei</first><last>Long</last></author>
       <author><first>Mingyu</first><last>Ma</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <author><first>Rong</first><last>Xiang</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <pages>140–148</pages>
@@ -12165,7 +12165,7 @@
       <author><first>Ramit</first><last>Sawhney</last></author>
       <author><first>Prachi</first><last>Manchanda</last></author>
       <author><first>Puneet</first><last>Mathur</last></author>
-      <author><first>Rajiv</first><last>Shah</last></author>
+      <author id="rajiv-shah"><first>Rajiv</first><last>Shah</last></author>
       <author><first>Raj</first><last>Singh</last></author>
       <pages>167–175</pages>
       <url hash="d24374bc">W18-6223</url>
@@ -12175,7 +12175,7 @@
     </paper>
     <paper id="24">
       <title><fixed-case>UTFPR</fixed-case> at <fixed-case>IEST</fixed-case> 2018: Exploring Character-to-Word Composition for Emotion Analysis</title>
-      <author><first>Gustavo</first><last>Paetzold</last></author>
+      <author id="gustavo-paetzold"><first>Gustavo</first><last>Paetzold</last></author>
       <pages>176–181</pages>
       <url hash="7c850cdf">W18-6224</url>
       <abstract>We introduce the UTFPR system for the Implicit Emotions Shared Task of 2018: A compositional character-to-word recurrent neural network that does not exploit heavy and/or hard-to-obtain resources. We find that our approach can outperform multiple baselines, and offers an elegant and effective solution to the problem of orthographic variance in tweets.</abstract>
@@ -12206,9 +12206,9 @@
     <paper id="27">
       <title><fixed-case>SINAI</fixed-case> at <fixed-case>IEST</fixed-case> 2018: Neural Encoding of Emotional External Knowledge for Emotion Classification</title>
       <author><first>Flor Miriam</first><last>Plaza-del-Arco</last></author>
-      <author><first>Eugenio</first><last>Martínez-Cámara</last></author>
-      <author><first>Maite</first><last>Martin</last></author>
-      <author><first>L. Alfonso</first><last>Ureña- López</last></author>
+      <author id="eugenio-martinez-camara"><first>Eugenio</first><last>Martínez-Cámara</last></author>
+      <author id="m-teresa-martin-valdivia"><first>Maite</first><last>Martin</last></author>
+      <author id="l-alfonso-urena-lopez"><first>L. Alfonso</first><last>Ureña- López</last></author>
       <pages>195–200</pages>
       <url hash="d5586398">W18-6227</url>
       <abstract>In this paper, we describe our participation in WASSA 2018 Implicit Emotion Shared Task (IEST 2018). We claim that the use of emotional external knowledge may enhance the performance and the capacity of generalization of an emotion classification system based on neural networks. Accordingly, we submitted four deep learning systems grounded in a sequence encoding layer. They mainly differ in the feature vector space and the recurrent neural network used in the sequence encoding layer. The official results show that the systems that used emotional external knowledge have a higher capacity of generalization, hence our claim holds.</abstract>
@@ -12342,7 +12342,7 @@
       <title>What Makes You Stressed? Finding Reasons From Tweets</title>
       <author><first>Reshmi</first><last>Gopalakrishna Pillai</last></author>
       <author><first>Mike</first><last>Thelwall</last></author>
-      <author><first>Constantin</first><last>Orasan</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orasan</last></author>
       <pages>266–272</pages>
       <url hash="64b00c7f">W18-6239</url>
       <abstract>Detecting stress from social media gives a non-intrusive and inexpensive alternative to traditional tools such as questionnaires or physiological sensors for monitoring mental state of individuals. This paper introduces a novel framework for finding reasons for stress from tweets, analyzing multiple categories for the first time. Three word-vector based methods are evaluated on collections of tweets about politics or airlines and are found to be more accurate than standard machine learning algorithms.</abstract>
@@ -12363,7 +12363,7 @@
     <paper id="41">
       <title>Identifying Opinion-Topics and Polarity of Parliamentary Debate Motions</title>
       <author><first>Gavin</first><last>Abercrombie</last></author>
-      <author><first>Riza Theresa</first><last>Batista-Navarro</last></author>
+      <author id="riza-theresa-batista-navarro"><first>Riza Theresa</first><last>Batista-Navarro</last></author>
       <pages>280–285</pages>
       <url hash="9757143f">W18-6241</url>
       <abstract>Analysis of the topics mentioned and opinions expressed in parliamentary debate motions–or proposals–is difficult for human readers, but necessary for understanding and automatic processing of the content of the subsequent speeches. We present a dataset of debate motions with pre-existing ‘<i>policy</i>’ labels, and investigate the utility of these labels for simultaneous topic and opinion polarity analysis. For topic detection, we apply one-versus-the-rest supervised topic classification, finding that good performance is achieved in predicting the <i>policy</i> topics, and that textual features derived from the debate titles associated with the motions are particularly indicative of motion topic. We then examine whether the output could also be used to determine the positions taken by proposers towards the different policies by investigating how well humans agree in interpreting the opinion polarities of the motions. Finding very high levels of agreement, we conclude that the <i>policies</i> used can be reliable labels for use in these tasks, and that successful topic detection can therefore provide opinion analysis of the motions ‘for free’.</abstract>
@@ -12442,7 +12442,7 @@
     <paper id="48">
       <title>Predicting Adolescents’ Educational Track from Chat Messages on <fixed-case>D</fixed-case>utch Social Media</title>
       <author><first>Lisa</first><last>Hilte</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <author><first>Reinhild</first><last>Vandekerckhove</last></author>
       <pages>328–334</pages>
       <url hash="02533003">W18-6248</url>
@@ -12480,23 +12480,23 @@
     <meta>
       <booktitle>Proceedings of the Third Conference on Machine Translation: Research Papers</booktitle>
       <url hash="758dd006">W18-63</url>
-      <editor><first>Ondřej</first><last>Bojar</last></editor>
-      <editor><first>Rajen</first><last>Chatterjee</last></editor>
+      <editor id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></editor>
+      <editor id="rajen-chatterjee"><first>Rajen</first><last>Chatterjee</last></editor>
       <editor><first>Christian</first><last>Federmann</last></editor>
       <editor><first>Mark</first><last>Fishel</last></editor>
       <editor><first>Yvette</first><last>Graham</last></editor>
       <editor><first>Barry</first><last>Haddow</last></editor>
       <editor><first>Matthias</first><last>Huck</last></editor>
-      <editor><first>Antonio Jimeno</first><last>Yepes</last></editor>
+      <editor id="antonio-jimeno-yepes"><first>Antonio Jimeno</first><last>Yepes</last></editor>
       <editor><first>Philipp</first><last>Koehn</last></editor>
       <editor><first>Christof</first><last>Monz</last></editor>
-      <editor><first>Matteo</first><last>Negri</last></editor>
-      <editor><first>Aurélie</first><last>Névéol</last></editor>
+      <editor id="matteo-negri"><first>Matteo</first><last>Negri</last></editor>
+      <editor id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></editor>
       <editor><first>Mariana</first><last>Neves</last></editor>
       <editor><first>Matt</first><last>Post</last></editor>
       <editor><first>Lucia</first><last>Specia</last></editor>
       <editor><first>Marco</first><last>Turchi</last></editor>
-      <editor><first>Karin</first><last>Verspoor</last></editor>
+      <editor id="karin-verspoor"><first>Karin</first><last>Verspoor</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Brussels, Belgium</address>
       <month>October</month>
@@ -12521,7 +12521,7 @@
     </paper>
     <paper id="2">
       <title>Character-level <fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish Translation through <fixed-case>ASCII</fixed-case> Encoding</title>
-      <author><first>Nikola I.</first><last>Nikolov</last></author>
+      <author id="nikola-i-nikolov"><first>Nikola I.</first><last>Nikolov</last></author>
       <author><first>Yuhuang</first><last>Hu</last></author>
       <author><first>Mi Xue</first><last>Tan</last></author>
       <author><first>Richard H.R.</first><last>Hahnloser</last></author>
@@ -12566,7 +12566,7 @@
     <paper id="6">
       <title>Coreference and Coherence in Neural Machine Translation: A Study Using Oracle Experiments</title>
       <author><first>Dario</first><last>Stojanovski</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>49–60</pages>
       <url hash="5e91a955">W18-6306</url>
       <abstract>Cross-sentence context can provide valuable information in Machine Translation and is critical for translation of anaphoric pronouns and for providing consistent translations. In this paper, we devise simple oracle experiments targeting coreference and coherence. Oracles are an easy way to evaluate the effect of different discourse-level phenomena in NMT using BLEU and eliminate the necessity to manually define challenge sets for this purpose. We propose two context-aware NMT models and compare them against models working on a concatenation of consecutive sentences. Concatenation models perform better, but are computationally expensive. We show that NMT models taking advantage of context oracle signals can achieve considerable gains in BLEU, of up to 7.02 BLEU for coreference and 1.89 BLEU for coherence on subtitles translation. Access to strong signals allows us to make clear comparisons between context-aware models.</abstract>
@@ -12576,7 +12576,7 @@
     <paper id="7">
       <title>A Large-Scale Test Set for the Evaluation of Context-Aware Pronoun Translation in Neural Machine Translation</title>
       <author><first>Mathias</first><last>Müller</last></author>
-      <author><first>Annette</first><last>Rios</last></author>
+      <author id="annette-rios-gonzales"><first>Annette</first><last>Rios</last></author>
       <author><first>Elena</first><last>Voita</last></author>
       <author><first>Rico</first><last>Sennrich</last></author>
       <pages>61–72</pages>
@@ -12588,8 +12588,8 @@
     <paper id="8">
       <title>Beyond Weight Tying: Learning Joint Input-Output Embeddings for Neural Machine Translation</title>
       <author><first>Nikolaos</first><last>Pappas</last></author>
-      <author><first>Lesly</first><last>Miculicich</last></author>
-      <author><first>James</first><last>Henderson</last></author>
+      <author id="lesly-miculicich-werlen"><first>Lesly</first><last>Miculicich</last></author>
+      <author id="james-henderson"><first>James</first><last>Henderson</last></author>
       <pages>73–83</pages>
       <url hash="ca85ee98">W18-6308</url>
       <abstract>Tying the weights of the target word embeddings with the target word classifiers of neural machine translation models leads to faster training and often to better translation quality. Given the success of this parameter sharing, we investigate other forms of sharing in between no sharing and hard equality of parameters. In particular, we propose a <i>structure-aware</i> output layer which captures the semantic structure of the output space of words within a joint input-output embedding. The model is a generalized form of <i>weight tying</i> which shares parameters but allows learning a more flexible relationship with input word embeddings and allows the effective capacity of the output layer to be controlled. In addition, the model shares weights across output classifiers and translation contexts which allows it to better leverage prior knowledge about them. Our evaluation on English-to-Finnish and English-to-German datasets shows the effectiveness of the method against strong encoder-decoder baselines trained with or without <i>weight tying</i>.</abstract>
@@ -12614,7 +12614,7 @@
       <title>Improving Neural Language Models with Weight Norm Initialization and Regularization</title>
       <author><first>Christian</first><last>Herold</last></author>
       <author><first>Yingbo</first><last>Gao</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>93–100</pages>
       <url hash="dac7ebb8">W18-6310</url>
       <abstract>Embedding and projection matrices are commonly used in neural language models (NLM) as well as in other sequence processing networks that operate on large vocabularies. We examine such matrices in fine-tuned language models and observe that a NLM learns word vectors whose norms are related to the word frequencies. We show that by initializing the weight norms with scaled log word counts, together with other techniques, lower perplexities can be obtained in early epochs of training. We also introduce a weight norm regularization loss term, whose hyperparameters are tuned via a grid search. With this method, we are able to significantly improve perplexities on two word-level language modeling tasks (without dynamic evaluation): from 54.44 to 53.16 on Penn Treebank (PTB) and from 61.45 to 60.13 on WikiText-2 (WT2).</abstract>
@@ -12624,8 +12624,8 @@
     <paper id="11">
       <title>Contextual Neural Model for Translating Bilingual Multi-Speaker Conversations</title>
       <author><first>Sameen</first><last>Maruf</last></author>
-      <author><first>André F. T.</first><last>Martins</last></author>
-      <author><first>Gholamreza</first><last>Haffari</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
+      <author id="gholamreza-haffari"><first>Gholamreza</first><last>Haffari</last></author>
       <pages>101–112</pages>
       <url hash="d4bf15cd">W18-6311</url>
       <abstract>Recent works in neural machine translation have begun to explore document translation. However, translating online multi-speaker conversations is still an open problem. In this work, we propose the task of translating Bilingual Multi-Speaker Conversations, and explore neural architectures which exploit both source and target-side conversation histories for this task. To initiate an evaluation for this task, we introduce datasets extracted from Europarl v7 and OpenSubtitles2016. Our experiments on four language-pairs confirm the significance of leveraging conversation history, both in terms of BLEU and manual evaluation.</abstract>
@@ -12649,12 +12649,12 @@
       <author><first>Brian</first><last>Thompson</last></author>
       <author><first>Huda</first><last>Khayrallah</last></author>
       <author><first>Antonios</first><last>Anastasopoulos</last></author>
-      <author><first>Arya D.</first><last>McCarthy</last></author>
+      <author id="arya-d-mccarthy"><first>Arya D.</first><last>McCarthy</last></author>
       <author><first>Kevin</first><last>Duh</last></author>
       <author><first>Rebecca</first><last>Marvin</last></author>
       <author><first>Paul</first><last>McNamee</last></author>
       <author><first>Jeremy</first><last>Gwinnup</last></author>
-      <author><first>Tim</first><last>Anderson</last></author>
+      <author id="tim-anderson"><first>Tim</first><last>Anderson</last></author>
       <author><first>Philipp</first><last>Koehn</last></author>
       <pages>124–132</pages>
       <url hash="e40bf0fa">W18-6313</url>
@@ -12719,7 +12719,7 @@
       <title>On The Alignment Problem In Multi-Head Attention-Based Neural Machine Translation</title>
       <author><first>Tamer</first><last>Alkhouli</last></author>
       <author><first>Gabriel</first><last>Bretschner</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>177–185</pages>
       <url hash="00382373">W18-6318</url>
       <abstract>This work investigates the alignment problem in state-of-the-art multi-head attention models based on the transformer architecture. We demonstrate that alignment extraction in transformer models can be improved by augmenting an additional alignment head to the multi-head source-to-target attention component. This is used to compute sharper attention weights. We describe how to use the alignment head to achieve competitive performance. To study the effect of adding the alignment head, we simulate a dictionary-guided translation task, where the user wants to guide translation using pre-defined dictionary entries. Using the proposed approach, we achieve up to 3.8% BLEU improvement when using the dictionary, in comparison to 2.4% BLEU in the baseline case. We also propose alignment pruning to speed up decoding in alignment-based neural machine translation (ANMT), which speeds up translation by a factor of 1.8 without loss in translation performance. We carry out experiments on the shared WMT 2016 English→Romanian news task and the BOLT Chinese→English discussion forum task.</abstract>
@@ -12738,8 +12738,8 @@
     </paper>
     <paper id="20">
       <title>Exploring gap filling as a cheaper alternative to reading comprehension questionnaires when evaluating machine translation for gisting</title>
-      <author><first>Mikel L.</first><last>Forcada</last></author>
-      <author><first>Carolina</first><last>Scarton</last></author>
+      <author id="mikel-l-forcada"><first>Mikel L.</first><last>Forcada</last></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <author><first>Barry</first><last>Haddow</last></author>
       <author><first>Alexandra</first><last>Birch</last></author>
@@ -12786,7 +12786,7 @@
       <title>Massively Parallel Cross-Lingual Learning in Low-Resource Target Language Translation</title>
       <author><first>Zhong</first><last>Zhou</last></author>
       <author><first>Matthias</first><last>Sperber</last></author>
-      <author><first>Alexander</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alexander</first><last>Waibel</last></author>
       <pages>232–243</pages>
       <url hash="70d6ca36">W18-6324</url>
       <abstract>We work on translation from rich-resource languages to low-resource languages. The main challenges we identify are the lack of low-resource language data, effective methods for cross-lingual transfer, and the variable-binding problem that is common in neural systems. We build a translation system that addresses these challenges using eight European language families as our test ground. Firstly, we add the source and the target family labels and study intra-family and inter-family influences for effective cross-lingual transfer. We achieve an improvement of +9.9 in BLEU score for English-Swedish translation using eight families compared to the single-family multi-source multi-target baseline. Moreover, we find that training on two neighboring families closest to the low-resource language is often enough. Secondly, we construct an ablation study and find that reasonably good results can be achieved even with considerably less target data. Thirdly, we address the variable-binding problem by building an order-preserving named entity translation model. We obtain 60.6% accuracy in qualitative evaluation where our translations are akin to human translations in a preliminary study.</abstract>
@@ -12806,7 +12806,7 @@
     <paper id="26">
       <title>Input Combination Strategies for Multi-Source Transformer Decoder</title>
       <author><first>Jindřich</first><last>Libovický</last></author>
-      <author><first>Jindřich</first><last>Helcl</last></author>
+      <author id="jindrich-helcl"><first>Jindřich</first><last>Helcl</last></author>
       <author><first>David</first><last>Mareček</last></author>
       <pages>253–260</pages>
       <url hash="14743cdf">W18-6326</url>
@@ -12829,23 +12829,23 @@
     <meta>
       <booktitle>Proceedings of the Third Conference on Machine Translation: Shared Task Papers</booktitle>
       <url hash="758dd006">W18-64</url>
-      <editor><first>Ondřej</first><last>Bojar</last></editor>
-      <editor><first>Rajen</first><last>Chatterjee</last></editor>
+      <editor id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></editor>
+      <editor id="rajen-chatterjee"><first>Rajen</first><last>Chatterjee</last></editor>
       <editor><first>Christian</first><last>Federmann</last></editor>
       <editor><first>Mark</first><last>Fishel</last></editor>
       <editor><first>Yvette</first><last>Graham</last></editor>
       <editor><first>Barry</first><last>Haddow</last></editor>
       <editor><first>Matthias</first><last>Huck</last></editor>
-      <editor><first>Antonio Jimeno</first><last>Yepes</last></editor>
+      <editor id="antonio-jimeno-yepes"><first>Antonio Jimeno</first><last>Yepes</last></editor>
       <editor><first>Philipp</first><last>Koehn</last></editor>
       <editor><first>Christof</first><last>Monz</last></editor>
-      <editor><first>Matteo</first><last>Negri</last></editor>
-      <editor><first>Aurélie</first><last>Névéol</last></editor>
+      <editor id="matteo-negri"><first>Matteo</first><last>Negri</last></editor>
+      <editor id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></editor>
       <editor><first>Mariana</first><last>Neves</last></editor>
       <editor><first>Matt</first><last>Post</last></editor>
       <editor><first>Lucia</first><last>Specia</last></editor>
       <editor><first>Marco</first><last>Turchi</last></editor>
-      <editor><first>Karin</first><last>Verspoor</last></editor>
+      <editor id="karin-verspoor"><first>Karin</first><last>Verspoor</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Belgium, Brussels</address>
       <month>October</month>
@@ -12916,7 +12916,7 @@
     </paper>
     <paper id="5">
       <title>Robust parfda Statistical Machine Translation Results</title>
-      <author><first>Ergun</first><last>Biçici</last></author>
+      <author id="ergun-bicici"><first>Ergun</first><last>Biçici</last></author>
       <pages>345–354</pages>
       <url hash="994c64eb">W18-6405</url>
       <abstract>We build parallel feature decay algorithms (parfda) Moses statistical machine translation (SMT) models for language pairs in the translation task. parfda obtains results close to the top constrained phrase-based SMT with an average of 2.252 BLEU points difference on WMT 2017 datasets using significantly less computation for building SMT systems than that would be spent using all available corpora. We obtain BLEU upper bounds based on target coverage to identify which systems used additional data. We use PRO for tuning to decrease fluctuations in the results and postprocess translation outputs to decrease translation errors due to the casing of words. F1 scores on the key phrases of the English to Turkish testsuite that we prepared reveal that parfda achieves 2nd best results. Truecasing translations before scoring obtained the best results overall.</abstract>
@@ -12927,8 +12927,8 @@
       <title>The <fixed-case>TALP</fixed-case>-<fixed-case>UPC</fixed-case> Machine Translation Systems for <fixed-case>WMT</fixed-case>18 News Shared Translation Task</title>
       <author><first>Noe</first><last>Casas</last></author>
       <author><first>Carlos</first><last>Escolano</last></author>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
-      <author><first>José A. R.</first><last>Fonollosa</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="jose-a-r-fonollosa"><first>José A. R.</first><last>Fonollosa</last></author>
       <pages>355–360</pages>
       <url hash="bb1f619b">W18-6406</url>
       <abstract>In this article we describe the TALP-UPC research group participation in the WMT18 news shared translation task for Finnish-English and Estonian-English within the multi-lingual subtrack. All of our primary submissions implement an attention-based Neural Machine Translation architecture. Given that Finnish and Estonian belong to the same language family and are similar, we use as training data the combination of the datasets of both language pairs to paliate the data scarceness of each individual pair. We also report the translation quality of systems trained on individual language pair data to serve as baseline and comparison reference.</abstract>
@@ -12972,7 +12972,7 @@
       <author><first>Yunsu</first><last>Kim</last></author>
       <author><first>Julian</first><last>Schamper</last></author>
       <author><first>Jiahui</first><last>Geng</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>377–385</pages>
       <url hash="c3717b4f">W18-6409</url>
       <doi>10.18653/v1/W18-6409</doi>
@@ -12993,7 +12993,7 @@
     <paper id="11">
       <title>The <fixed-case>AFRL</fixed-case> <fixed-case>WMT</fixed-case>18 Systems: Ensembling, Continuation and Combination</title>
       <author><first>Jeremy</first><last>Gwinnup</last></author>
-      <author><first>Tim</first><last>Anderson</last></author>
+      <author id="tim-anderson"><first>Tim</first><last>Anderson</last></author>
       <author><first>Grant</first><last>Erdmann</last></author>
       <author><first>Katherine</first><last>Young</last></author>
       <pages>394–398</pages>
@@ -13010,7 +13010,7 @@
       <author><first>Ulrich</first><last>Germann</last></author>
       <author><first>Roman</first><last>Grundkiewicz</last></author>
       <author><first>Kenneth</first><last>Heafield</last></author>
-      <author><first>Antonio Valerio</first><last>Miceli Barone</last></author>
+      <author id="antonio-valerio-miceli-barone"><first>Antonio Valerio</first><last>Miceli Barone</last></author>
       <author><first>Rico</first><last>Sennrich</last></author>
       <pages>399–409</pages>
       <url hash="6bf50f7e">W18-6412</url>
@@ -13033,10 +13033,10 @@
       <title>The <fixed-case>MLLP</fixed-case>-<fixed-case>UPV</fixed-case> <fixed-case>G</fixed-case>erman-<fixed-case>E</fixed-case>nglish Machine Translation System for <fixed-case>WMT</fixed-case>18</title>
       <author><first>Javier</first><last>Iranzo-Sánchez</last></author>
       <author><first>Pau</first><last>Baquero-Arnal</last></author>
-      <author><first>Gonçal V.</first><last>Garcés Díaz-Munío</last></author>
+      <author id="goncal-v-garces-diaz-munio"><first>Gonçal V.</first><last>Garcés Díaz-Munío</last></author>
       <author><first>Adrià</first><last>Martínez-Villaronga</last></author>
       <author><first>Jorge</first><last>Civera</last></author>
-      <author><first>Alfons</first><last>Juan</last></author>
+      <author id="alfons-juan"><first>Alfons</first><last>Juan</last></author>
       <pages>418–424</pages>
       <url hash="40ff0a98">W18-6414</url>
       <attachment type="poster" hash="e32956a8">W18-6414.Poster.pdf</attachment>
@@ -13077,9 +13077,9 @@
     </paper>
     <paper id="18">
       <title><fixed-case>JUCBNMT</fixed-case> at <fixed-case>WMT</fixed-case>2018 News Translation Task: Character Based Neural Machine Translation of <fixed-case>F</fixed-case>innish to <fixed-case>E</fixed-case>nglish</title>
-      <author><first>Sainik Kumar</first><last>Mahata</last></author>
+      <author id="sainik-mahata"><first>Sainik Kumar</first><last>Mahata</last></author>
       <author><first>Dipankar</first><last>Das</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>445–448</pages>
       <url hash="a2e8e538">W18-6418</url>
       <abstract>In the current work, we present a description of the system submitted to WMT 2018 News Translation Shared task. The system was created to translate news text from Finnish to English. The system used a Character Based Neural Machine Translation model to accomplish the given task. The current paper documents the preprocessing steps, the description of the submitted system and the results produced using the same. Our system garnered a BLEU score of 12.9.</abstract>
@@ -13092,7 +13092,7 @@
       <author><first>Rui</first><last>Wang</last></author>
       <author><first>Atsushi</first><last>Fujita</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>449–455</pages>
       <url hash="0e9df9f9">W18-6419</url>
       <abstract>This paper presents the NICT’s participation to the WMT18 shared news translation task. We participated in the eight translation directions of four language pairs: Estonian-English, Finnish-English, Turkish-English and Chinese-English. For each translation direction, we prepared state-of-the-art statistical (SMT) and neural (NMT) machine translation systems. Our NMT systems were trained with the transformer architecture using the provided parallel data enlarged with a large quantity of back-translated monolingual data that we generated with a new incremental training framework. Our primary submissions to the task are the result of a simple combination of our SMT and NMT systems. Our systems are ranked first for the Estonian-English and Finnish-English language pairs (constraint) according to BLEU-cased.</abstract>
@@ -13121,9 +13121,9 @@
     </paper>
     <paper id="22">
       <title>The Karlsruhe Institute of Technology Systems for the News Translation Task in <fixed-case>WMT</fixed-case> 2018</title>
-      <author><first>Ngoc-Quan</first><last>Pham</last></author>
+      <author id="ngoc-quan-pham"><first>Ngoc-Quan</first><last>Pham</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
-      <author><first>Alexander</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alexander</first><last>Waibel</last></author>
       <pages>467–472</pages>
       <url hash="a2e583e6">W18-6422</url>
       <abstract>We present our experiments in the scope of the news translation task in WMT 2018, in directions: English→German. The core of our systems is the encoder-decoder based neural machine translation models using the transformer architecture. We enhanced the model with a deeper architecture. By using techniques to limit the memory consumption, we were able to train models that are 4 times larger on one GPU and improve the performance by 1.2 BLEU points. Furthermore, we performed sentence selection for the newly available ParaCrawl corpus. Thereby, we could improve the effectiveness of the corpus by 0.5 BLEU points.</abstract>
@@ -13132,9 +13132,9 @@
     </paper>
     <paper id="23">
       <title>Tilde’s Machine Translation Systems for <fixed-case>WMT</fixed-case> 2018</title>
-      <author><first>Mārcis</first><last>Pinnis</last></author>
+      <author id="marcis-pinnis"><first>Mārcis</first><last>Pinnis</last></author>
       <author><first>Matīss</first><last>Rikters</last></author>
-      <author><first>Rihards</first><last>Krišlauks</last></author>
+      <author id="rihards-krislauks"><first>Rihards</first><last>Krišlauks</last></author>
       <pages>473–481</pages>
       <url hash="2cdb33e9">W18-6423</url>
       <abstract>The paper describes the development process of the Tilde’s NMT systems that were submitted for the WMT 2018 shared task on news translation. We describe the data filtering and pre-processing workflows, the NMT system training architectures, and automatic evaluation results. For the WMT 2018 shared task, we submitted seven systems (both constrained and unconstrained) for English-Estonian and Estonian-English translation directions. The submitted systems were trained using Transformer models.</abstract>
@@ -13156,7 +13156,7 @@
       <author><first>Yves</first><last>Scherrer</last></author>
       <author><first>Tommi</first><last>Nieminen</last></author>
       <author><first>Arvi</first><last>Hurskainen</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>488–495</pages>
       <url hash="bda83a61">W18-6425</url>
       <abstract>This paper describes the University of Helsinki’s submissions to the WMT18 shared news translation task for English-Finnish and English-Estonian, in both directions. This year, our main submissions employ a novel neural architecture, the Transformer, using the open-source OpenNMT framework. Our experiments couple domain labeling and fine tuned multilingual models with shared vocabularies between the source and target language, using the provided parallel data of the shared task and additional back-translations. Finally, we compare, for the English-to-Finnish case, the effectiveness of different machine translation architectures, starting from a rule-based approach to our best neural model, analyzing the output and highlighting future research.</abstract>
@@ -13170,7 +13170,7 @@
       <author><first>Parnia</first><last>Bahar</last></author>
       <author><first>Yunsu</first><last>Kim</last></author>
       <author><first>Arne</first><last>Nix</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>496–503</pages>
       <url hash="24494911">W18-6426</url>
       <abstract>This paper describes the statistical machine translation systems developed at RWTH Aachen University for the German→English, English→Turkish and Chinese→English translation tasks of the EMNLP 2018 Third Conference on Machine Translation (WMT 2018). We use ensembles of neural machine translation systems based on the Transformer architecture. Our main focus is on the German→English task where we to all automatic scored first with respect metrics provided by the organizers. We identify data selection, fine-tuning, batch size and model dimension as important hyperparameters. In total we improve by 6.8% BLEU over our last year’s submission and by 4.8% BLEU over the winning system of the 2017 German→English task. In English→Turkish task, we show 3.6% BLEU improvement over the last year’s winning system. We further report results on the Chinese→English task where we improve 2.2% BLEU on average over our baseline systems but stay behind the 2018 winning systems.</abstract>
@@ -13180,7 +13180,7 @@
     <paper id="27">
       <title>The <fixed-case>U</fixed-case>niversity of <fixed-case>C</fixed-case>ambridge’s Machine Translation Systems for <fixed-case>WMT</fixed-case>18</title>
       <author><first>Felix</first><last>Stahlberg</last></author>
-      <author><first>Adrià</first><last>de Gispert</last></author>
+      <author id="adria-de-gispert"><first>Adrià</first><last>de Gispert</last></author>
       <author id="bill-byrne"><first>Bill</first><last>Byrne</last></author>
       <pages>504–512</pages>
       <url hash="3f5ba2e9">W18-6427</url>
@@ -13193,7 +13193,7 @@
       <author><first>Dario</first><last>Stojanovski</last></author>
       <author><first>Viktor</first><last>Hangya</last></author>
       <author><first>Matthias</first><last>Huck</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>513–521</pages>
       <url hash="c5c245db">W18-6428</url>
       <abstract>We describe LMU Munich’s unsupervised machine translation systems for English↔German translation. These systems were used to participate in the WMT18 news translation shared task and more specifically, for the unsupervised learning sub-track. The systems are trained on English and German monolingual data only and exploit and combine previously proposed techniques such as using word-by-word translated data based on bilingual word embeddings, denoising and on-the-fly backtranslation.</abstract>
@@ -13304,7 +13304,7 @@
     </paper>
     <paper id="37">
       <title>The Word Sense Disambiguation Test Suite at <fixed-case>WMT</fixed-case>18</title>
-      <author><first>Annette</first><last>Rios</last></author>
+      <author id="annette-rios-gonzales"><first>Annette</first><last>Rios</last></author>
       <author><first>Mathias</first><last>Müller</last></author>
       <author><first>Rico</first><last>Sennrich</last></author>
       <pages>588–596</pages>
@@ -13335,12 +13335,12 @@
       <author><first>Benoit</first><last>Huet</last></author>
       <author><first>Mikko</first><last>Kurimo</last></author>
       <author><first>Jorma</first><last>Laaksonen</last></author>
-      <author><first>Bernard</first><last>Merialdo</last></author>
+      <author id="bernard-merialdo"><first>Bernard</first><last>Merialdo</last></author>
       <author><first>Phu</first><last>Pham</last></author>
       <author><first>Mats</first><last>Sjöberg</last></author>
       <author><first>Umut</first><last>Sulubacak</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
-      <author><first>Raphael</first><last>Troncy</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="raphael-troncy"><first>Raphael</first><last>Troncy</last></author>
       <author><first>Raúl</first><last>Vázquez</last></author>
       <pages>603–611</pages>
       <url hash="a260b249">W18-6439</url>
@@ -13355,7 +13355,7 @@
       <author><first>Michael</first><last>Hutt</last></author>
       <author><first>Grant</first><last>Erdmann</last></author>
       <author><first>John</first><last>Duselis</last></author>
-      <author><first>James</first><last>Davis</last></author>
+      <author id="james-davis"><first>James</first><last>Davis</last></author>
       <pages>612–615</pages>
       <url hash="d6ff3e9f">W18-6440</url>
       <abstract>AFRL-Ohio State extends its usage of visual domain-driven machine translation for use as a peer with traditional machine translation systems. As a peer, it is enveloped into a system combination of neural and statistical MT systems to present a composite translation.</abstract>
@@ -13364,9 +13364,9 @@
     </paper>
     <paper id="41">
       <title><fixed-case>CUNI</fixed-case> System for the <fixed-case>WMT</fixed-case>18 Multimodal Translation Task</title>
-      <author><first>Jindřich</first><last>Helcl</last></author>
+      <author id="jindrich-helcl"><first>Jindřich</first><last>Helcl</last></author>
       <author><first>Jindřich</first><last>Libovický</last></author>
-      <author><first>Dušan</first><last>Variš</last></author>
+      <author id="dusan-varis"><first>Dušan</first><last>Variš</last></author>
       <pages>616–623</pages>
       <url hash="17274cb6">W18-6441</url>
       <abstract>We present our submission to the WMT18 Multimodal Translation Task. The main feature of our submission is applying a self-attentive network instead of a recurrent neural network. We evaluate two methods of incorporating the visual features in the model: first, we include the image representation as another input to the network; second, we train the model to predict the visual features and use it as an auxiliary objective. For our submission, we acquired both textual and multimodal additional data. Both of the proposed methods yield significant improvements over recurrent networks and self-attentive textual baselines.</abstract>
@@ -13376,8 +13376,8 @@
     <paper id="42">
       <title><fixed-case>S</fixed-case>heffield Submissions for <fixed-case>WMT</fixed-case>18 Multimodal Translation Shared Task</title>
       <author><first>Chiraag</first><last>Lala</last></author>
-      <author><first>Pranava Swaroop</first><last>Madhyastha</last></author>
-      <author><first>Carolina</first><last>Scarton</last></author>
+      <author id="pranava-swaroop-madhyastha"><first>Pranava Swaroop</first><last>Madhyastha</last></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>624–631</pages>
       <url hash="374f9733">W18-6442</url>
@@ -13421,7 +13421,7 @@
       <author><first>Matthias</first><last>Huck</last></author>
       <author><first>Dario</first><last>Stojanovski</last></author>
       <author><first>Viktor</first><last>Hangya</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>648–654</pages>
       <url hash="d1b0dd9d">W18-6446</url>
       <abstract>We present the LMU Munich machine translation systems for the English–German language pair. We have built neural machine translation systems for both translation directions (English→German and German→English) and for two different domains (the biomedical domain and the news domain). The systems were used for our participation in the WMT18 biomedical translation task and in the shared task on machine translation of news. The main focus of our recent system development efforts has been on achieving improvements in the biomedical domain over last year’s strong biomedical translation engine for English→German (Huck et al., 2017a). Considerable progress has been made in the latter task, which we report on in this paper.</abstract>
@@ -13453,7 +13453,7 @@
     <paper id="49">
       <title>Neural Machine Translation with the Transformer and Multi-Source <fixed-case>R</fixed-case>omance Languages for the Biomedical <fixed-case>WMT</fixed-case> 2018 task</title>
       <author><first>Brian</first><last>Tubay</last></author>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
       <pages>667–670</pages>
       <url hash="de8acb34">W18-6449</url>
       <abstract>The Transformer architecture has become the state-of-the-art in Machine Translation. This model, which relies on attention-based mechanisms, has outperformed previous neural machine translation architectures in several tasks. In this system description paper, we report details of training neural machine translation with multi-source Romance languages with the Transformer model and in the evaluation frame of the biomedical WMT 2018 task. Using multi-source languages from the same family allows improvements of over 6 BLEU points.</abstract>
@@ -13474,10 +13474,10 @@
     <paper id="51">
       <title>Findings of the <fixed-case>WMT</fixed-case> 2018 Shared Task on Quality Estimation</title>
       <author><first>Lucia</first><last>Specia</last></author>
-      <author><first>Frédéric</first><last>Blain</last></author>
+      <author id="frederic-blain"><first>Frédéric</first><last>Blain</last></author>
       <author><first>Varvara</first><last>Logacheva</last></author>
-      <author><first>Ramón</first><last>F. Astudillo</last></author>
-      <author><first>André F. T.</first><last>Martins</last></author>
+      <author id="ramon-fernandez-astudillo"><first>Ramón</first><last>F. Astudillo</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
       <pages>689–709</pages>
       <url hash="fc9db952">W18-6451</url>
       <abstract>We report the results of the WMT18 shared task on Quality Estimation, i.e. the task of predicting the quality of the output of machine translation systems at various granularity levels: word, phrase, sentence and document. This year we include four language pairs, three text domains, and translations produced by both statistical and neural machine translation systems. Participating teams from ten institutions submitted a variety of systems to different task variants and language pairs.</abstract>
@@ -13488,7 +13488,7 @@
       <title>Findings of the <fixed-case>WMT</fixed-case> 2018 Shared Task on Automatic Post-Editing</title>
       <author><first>Rajen</first><last>Chatterjee</last></author>
       <author><first>Matteo</first><last>Negri</last></author>
-      <author><first>Raphael</first><last>Rubino</last></author>
+      <author id="raphael-rubino"><first>Raphael</first><last>Rubino</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <pages>710–725</pages>
       <url hash="7fa87169">W18-6452</url>
@@ -13501,7 +13501,7 @@
       <author><first>Philipp</first><last>Koehn</last></author>
       <author><first>Huda</first><last>Khayrallah</last></author>
       <author><first>Kenneth</first><last>Heafield</last></author>
-      <author><first>Mikel L.</first><last>Forcada</last></author>
+      <author id="mikel-l-forcada"><first>Mikel L.</first><last>Forcada</last></author>
       <pages>726–739</pages>
       <url hash="d496d33c">W18-6453</url>
       <abstract>We posed the shared task of assigning sentence-level quality scores for a very noisy corpus of sentence pairs crawled from the web, with the goal of sub-selecting 1% and 10% of high-quality data to be used to train machine translation systems. Seventeen participants from companies, national research labs, and universities participated in this task.</abstract>
@@ -13522,7 +13522,7 @@
     <paper id="55">
       <title><fixed-case>ITER</fixed-case>: Improving Translation Edit Rate through Optimizable Edit Costs</title>
       <author><first>Joybrata</first><last>Panja</last></author>
-      <author><first>Sudip Kumar</first><last>Naskar</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last></author>
       <pages>746–750</pages>
       <url hash="4d8052af">W18-6455</url>
       <abstract>The paper presents our participation in the WMT 2018 Metrics Shared Task. We propose an improved version of Translation Edit/Error Rate (TER). In addition to including the basic edit operations in TER, namely - insertion, deletion, substitution and shift, our metric also allows stem matching, optimizable edit costs and better normalization so as to correlate better with human judgement scores. The proposed metric shows much higher correlation with human judgments than TER.</abstract>
@@ -13544,7 +13544,7 @@
       <title>Keep It or Not: Word Level Quality Estimation for Post-Editing</title>
       <author><first>Prasenjit</first><last>Basu</last></author>
       <author><first>Santanu</first><last>Pal</last></author>
-      <author><first>Sudip Kumar</first><last>Naskar</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last></author>
       <pages>759–764</pages>
       <url hash="540ca099">W18-6457</url>
       <abstract>The paper presents our participation in the WMT 2018 shared task on word level quality estimation (QE) of machine translated (MT) text, i.e., to predict whether a word in MT output for a given source context is correctly translated and hence should be retained in the post-edited translation (PE), or not. To perform the QE task, we measure the similarity of the source context of the target MT word with the context for which the word is retained in PE in the training data. This is achieved in two different ways, using <i>Bag-of-Words</i> (<i>BoW</i>) model and <i>Document-to-Vector</i> (<i>Doc2Vec</i>) model. In the <i>BoW</i> model, we compute the cosine similarity while in the <i>Doc2Vec</i> model we consider the Doc2Vec similarity. By applying the Kneedle algorithm on the F1mult vs. similarity score plot, we derive the threshold based on which OK/BAD decisions are taken for the MT words. Experimental results revealed that the Doc2Vec model performs better than the BoW model on the word level QE task.</abstract>
@@ -13553,7 +13553,7 @@
     </paper>
     <paper id="58">
       <title><fixed-case>RTM</fixed-case> results for Predicting Translation Performance</title>
-      <author><first>Ergun</first><last>Biçici</last></author>
+      <author id="ergun-bicici"><first>Ergun</first><last>Biçici</last></author>
       <pages>765–769</pages>
       <url hash="7648eea7">W18-6458</url>
       <abstract>With improved prediction combination using weights based on their training performance and stacking and multilayer perceptrons to build deeper prediction models, RTMs become the 3rd system in general at the sentence-level prediction of translation scores and achieve the lowest RMSE in English to German NMT QET results. For the document-level task, we compare document-level RTM models with sentence-level RTM models obtained with the concatenation of document sentences and obtain similar results.</abstract>
@@ -13585,7 +13585,7 @@
     <paper id="61">
       <title>Supervised and Unsupervised Minimalist Quality Estimators: Vicomtech’s Participation in the <fixed-case>WMT</fixed-case> 2018 Quality Estimation Task</title>
       <author><first>Thierry</first><last>Etchegoyhen</last></author>
-      <author><first>Eva</first><last>Martínez Garcia</last></author>
+      <author id="eva-martinez-garcia"><first>Eva</first><last>Martínez Garcia</last></author>
       <author><first>Andoni</first><last>Azpeitia</last></author>
       <pages>782–787</pages>
       <url hash="154ce774">W18-6461</url>
@@ -13608,8 +13608,8 @@
     <paper id="63">
       <title><fixed-case>S</fixed-case>heffield Submissions for the <fixed-case>WMT</fixed-case>18 Quality Estimation Shared Task</title>
       <author><first>Julia</first><last>Ive</last></author>
-      <author><first>Carolina</first><last>Scarton</last></author>
-      <author><first>Frédéric</first><last>Blain</last></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last></author>
+      <author id="frederic-blain"><first>Frédéric</first><last>Blain</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>794–800</pages>
       <url hash="bf1d1f8d">W18-6463</url>
@@ -13620,8 +13620,8 @@
     <paper id="64">
       <title><fixed-case>UA</fixed-case>lacant machine translation quality estimation at <fixed-case>WMT</fixed-case> 2018: a simple approach using phrase tables and feed-forward neural networks</title>
       <author><first>Felipe</first><last>Sánchez-Martínez</last></author>
-      <author><first>Miquel</first><last>Esplà-Gomis</last></author>
-      <author><first>Mikel L.</first><last>Forcada</last></author>
+      <author id="miquel-espla-gomis"><first>Miquel</first><last>Esplà-Gomis</last></author>
+      <author id="mikel-l-forcada"><first>Mikel L.</first><last>Forcada</last></author>
       <pages>801–808</pages>
       <url hash="4118e2b9">W18-6464</url>
       <abstract>We describe the Universitat d’Alacant submissions to the word- and sentence-level machine translation (MT) quality estimation (QE) shared task at WMT 2018. Our approach to word-level MT QE builds on previous work to mark the words in the machine-translated sentence as <i>OK</i> or <i>BAD</i>, and is extended to determine if a word or sequence of words need to be inserted in the gap after each word. Our sentence-level submission simply uses the edit operations predicted by the word-level approach to approximate TER. The method presented ranked first in the sub-task of identifying insertions in gaps for three out of the six datasets, and second in the rest of them.</abstract>
@@ -13632,7 +13632,7 @@
       <title><fixed-case>A</fixed-case>libaba Submission for <fixed-case>WMT</fixed-case>18 Quality Estimation Task</title>
       <author><first>Jiayi</first><last>Wang</last></author>
       <author><first>Kai</first><last>Fan</last></author>
-      <author id="bo-li"><first>Bo</first><last>Li</last></author>
+      <author><first>Bo</first><last>Li</last></author>
       <author><first>Fengming</first><last>Zhou</last></author>
       <author><first>Boxing</first><last>Chen</last></author>
       <author><first>Yangbin</first><last>Shi</last></author>
@@ -13669,7 +13669,7 @@
       <author><first>Santanu</first><last>Pal</last></author>
       <author><first>Nico</first><last>Herbig</last></author>
       <author><first>Antonio</first><last>Krüger</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>827–835</pages>
       <url hash="125c380f">W18-6468</url>
       <abstract>This paper presents our English–German Automatic Post-Editing (APE) system submitted to the APE Task organized at WMT 2018 (Chatterjee et al., 2018). The proposed model is an extension of the transformer architecture: two separate self-attention-based encoders encode the machine translation output (mt) and the source (src), followed by a joint encoder that attends over a combination of these two encoded sequences (encsrc and encmt) for generating the post-edited sentence. We compare this multi-source architecture (i.e, {src, mt} → pe) to a monolingual transformer (i.e., mt → pe) model and an ensemble combining the multi-source {src, mt} → pe and single-source mt → pe models. For both the PBSMT and the NMT task, the ensemble yields the best results, followed by the multi-source model and last the single-source approach. Our best model, the ensemble, achieves a BLEU score of 66.16 and 74.22 for the PBSMT and NMT task, respectively.</abstract>
@@ -13679,7 +13679,7 @@
     <paper id="69">
       <title><fixed-case>DFKI</fixed-case>-<fixed-case>MLT</fixed-case> System Description for the <fixed-case>WMT</fixed-case>18 Automatic Post-editing Task</title>
       <author><first>Daria</first><last>Pylypenko</last></author>
-      <author><first>Raphael</first><last>Rubino</last></author>
+      <author id="raphael-rubino"><first>Raphael</first><last>Rubino</last></author>
       <pages>836–839</pages>
       <url hash="277c0208">W18-6469</url>
       <abstract>This paper presents the Automatic Post-editing (APE) systems submitted by the DFKI-MLT group to the WMT’18 APE shared task. Three monolingual neural sequence-to-sequence APE systems were trained using target-language data only: one using an attentional recurrent neural network architecture and two using the attention-only (<i>transformer</i>) architecture. The training data was composed of machine translated (MT) output used as source to the APE model aligned with their manually post-edited version or reference translation as target. We made use of the provided training sets only and trained APE models applicable to phrase-based and neural MT outputs. Results show better performances reached by the attention-only model over the recurrent one, significant improvement over the baseline when post-editing phrase-based MT output but degradation when applied to neural MT output.</abstract>
@@ -13723,7 +13723,7 @@
       <title><fixed-case>STACC</fixed-case>, <fixed-case>OOV</fixed-case> Density and N-gram Saturation: Vicomtech’s Participation in the <fixed-case>WMT</fixed-case> 2018 Shared Task on Parallel Corpus Filtering</title>
       <author><first>Andoni</first><last>Azpeitia</last></author>
       <author><first>Thierry</first><last>Etchegoyhen</last></author>
-      <author><first>Eva</first><last>Martínez Garcia</last></author>
+      <author id="eva-martinez-garcia"><first>Eva</first><last>Martínez Garcia</last></author>
       <pages>860–866</pages>
       <url hash="ec213f6a">W18-6473</url>
       <abstract>We describe Vicomtech’s participation in the WMT 2018 Shared Task on parallel corpus filtering. We aimed to evaluate a simple approach to the task, which can efficiently process large volumes of data and can be easily deployed for new datasets in different language pairs and domains. We based our approach on STACC, an efficient and portable method for parallel sentence identification in comparable corpora. To address the specifics of the corpus filtering task, which features significant volumes of noisy data, the core method was expanded with a penalty based on the amount of unknown words in sentence pairs. Additionally, we experimented with a complementary data saturation method based on source sentence n-grams, with the goal of demoting parallel sentence pairs that do not contribute significant amounts of yet unobserved n-grams. Our approach requires no prior training and is highly efficient on the type of large datasets featured in the corpus filtering task. We achieved competitive results with this simple and portable method, ranking in the top half among competing systems overall.</abstract>
@@ -13733,7 +13733,7 @@
     <paper id="74">
       <title>A hybrid pipeline of rules and machine learning to filter web-crawled parallel corpora</title>
       <author><first>Eduard</first><last>Barbu</last></author>
-      <author><first>Verginica</first><last>Barbu Mititelu</last></author>
+      <author id="verginica-barbu-mititelu"><first>Verginica</first><last>Barbu Mititelu</last></author>
       <pages>867–871</pages>
       <url hash="8d8804da">W18-6474</url>
       <abstract>A hybrid pipeline comprising rules and machine learning is used to filter a noisy web English-German parallel corpus for the Parallel Corpus Filtering task. The core of the pipeline is a module based on the logistic regression algorithm that returns the probability that a translation unit is accepted. The training set for the logistic regression is created by automatic annotation. The quality of the automatic annotation is estimated by manually labeling the training set.</abstract>
@@ -13753,7 +13753,7 @@
     <paper id="76">
       <title><fixed-case>MAJE</fixed-case> Submission to the <fixed-case>WMT</fixed-case>2018 Shared Task on Parallel Corpus Filtering</title>
       <author><first>Marina</first><last>Fomicheva</last></author>
-      <author><first>Jesús</first><last>González-Rubio</last></author>
+      <author id="jesus-gonzalez-rubio"><first>Jesús</first><last>González-Rubio</last></author>
       <pages>877–881</pages>
       <url hash="1798140e">W18-6476</url>
       <abstract>This paper describes the participation of Webinterpret in the shared task on parallel corpus filtering at the Third Conference on Machine Translation (WMT 2018). The paper describes the main characteristics of our approach and discusses the results obtained on the data sets published for the shared task.</abstract>
@@ -13763,7 +13763,7 @@
     <paper id="77">
       <title>An Unsupervised System for Parallel Corpus Filtering</title>
       <author><first>Viktor</first><last>Hangya</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>882–887</pages>
       <url hash="d6499309">W18-6477</url>
       <abstract>In this paper we describe LMU Munich’s submission for the <i>WMT 2018 Parallel Corpus Filtering</i> shared task which addresses the problem of cleaning noisy parallel corpora. The task of mining and cleaning parallel sentences is important for improving the quality of machine translation systems, especially for low-resource languages. We tackle this problem in a fully unsupervised fashion relying on bilingual word embeddings created without any bilingual signal. After pre-filtering noisy data we rank sentence pairs by calculating bilingual sentence-level similarities and then remove redundant data by employing monolingual similarity as well. Our unsupervised system achieved good performance during the official evaluation of the shared task, scoring only a few BLEU points behind the best systems, while not requiring any parallel training data.</abstract>
@@ -13798,7 +13798,7 @@
       <author><first>Samuel</first><last>Larkin</last></author>
       <author><first>Darlene</first><last>Stewart</last></author>
       <author><first>Michel</first><last>Simard</last></author>
-      <author><first>Cyril</first><last>Goutte</last></author>
+      <author id="cyril-goutte"><first>Cyril</first><last>Goutte</last></author>
       <author><first>Chi-kiu</first><last>Lo</last></author>
       <pages>900–907</pages>
       <url hash="1a4a35dd">W18-6480</url>
@@ -13812,7 +13812,7 @@
       <author><first>Michel</first><last>Simard</last></author>
       <author><first>Darlene</first><last>Stewart</last></author>
       <author><first>Samuel</first><last>Larkin</last></author>
-      <author><first>Cyril</first><last>Goutte</last></author>
+      <author id="cyril-goutte"><first>Cyril</first><last>Goutte</last></author>
       <author><first>Patrick</first><last>Littell</last></author>
       <pages>908–916</pages>
       <url hash="4920c6e6">W18-6481</url>
@@ -13834,7 +13834,7 @@
     </paper>
     <paper id="83">
       <title><fixed-case>UTFPR</fixed-case> at <fixed-case>WMT</fixed-case> 2018: Minimalistic Supervised Corpora Filtering for Machine Translation</title>
-      <author><first>Gustavo</first><last>Paetzold</last></author>
+      <author id="gustavo-paetzold"><first>Gustavo</first><last>Paetzold</last></author>
       <pages>923–927</pages>
       <url hash="67f0c36f">W18-6483</url>
       <abstract>We present the UTFPR systems at the WMT 2018 parallel corpus filtering task. Our supervised approach discerns between good and bad translations by training classic binary classification models over an artificially produced binary classification dataset derived from a high-quality translation set, and a minimalistic set of 6 semantic distance features that rely only on easy-to-gather resources. We rank translations by their probability for the “good” label. Our results show that logistic regression pairs best with our approach, yielding more consistent results throughout the different settings evaluated.</abstract>
@@ -13846,7 +13846,7 @@
       <author><first>Vassilis</first><last>Papavassiliou</last></author>
       <author><first>Sokratis</first><last>Sofianopoulos</last></author>
       <author><first>Prokopis</first><last>Prokopidis</last></author>
-      <author><first>Stelios</first><last>Piperidis</last></author>
+      <author id="stelios-piperidis"><first>Stelios</first><last>Piperidis</last></author>
       <pages>928–933</pages>
       <url hash="63d2a762">W18-6484</url>
       <abstract>This paper describes the submission of the Institute for Language and Speech Processing/Athena Research and Innovation Center (ILSP/ARC) for the WMT 2018 Parallel Corpus Filtering shared task. We explore several properties of sentences and sentence pairs that our system explored in the context of the task with the purpose of clustering sentence pairs according to their appropriateness in training MT systems. We also discuss alternative methods for ranking the sentence pairs of the most appropriate clusters with the aim of generating the two datasets (of 10 and 100 million words as required in the task) that were evaluated. By summarizing the results of several experiments that were carried out by the organizers during the evaluation phase, our submission achieved an average BLEU score of 26.41, even though it does not make use of any language-specific resources like bilingual lexica, monolingual corpora, or MT output, while the average score of the best participant system was 27.91.</abstract>
@@ -13855,8 +13855,8 @@
     </paper>
     <paper id="85">
       <title><fixed-case>SYSTRAN</fixed-case> Participation to the <fixed-case>WMT</fixed-case>2018 Shared Task on Parallel Corpus Filtering</title>
-      <author><first>MinhQuang</first><last>Pham</last></author>
-      <author><first>Josep</first><last>Crego</last></author>
+      <author id="minh-quang-pham"><first>MinhQuang</first><last>Pham</last></author>
+      <author id="josep-m-crego"><first>Josep</first><last>Crego</last></author>
       <author><first>Jean</first><last>Senellart</last></author>
       <pages>934–938</pages>
       <url hash="5fc1cb71">W18-6485</url>
@@ -13866,7 +13866,7 @@
     </paper>
     <paper id="86">
       <title>Tilde’s Parallel Corpus Filtering Methods for <fixed-case>WMT</fixed-case> 2018</title>
-      <author><first>Mārcis</first><last>Pinnis</last></author>
+      <author id="marcis-pinnis"><first>Mārcis</first><last>Pinnis</last></author>
       <pages>939–945</pages>
       <url hash="47b84b5c">W18-6486</url>
       <abstract>The paper describes parallel corpus filtering methods that allow reducing noise of noisy “parallel” corpora from a level where the corpora are not usable for neural machine translation training (i.e., the resulting systems fail to achieve reasonable translation quality; well below 10 BLEU points) up to a level where the trained systems show decent (over 20 BLEU points on a 10 million word dataset and up to 30 BLEU points on a 100 million word dataset). The paper also documents Tilde’s submissions to the WMT 2018 shared task on parallel corpus filtering.</abstract>
@@ -13880,7 +13880,7 @@
       <author><first>Yunsu</first><last>Kim</last></author>
       <author><first>Miguel</first><last>Graça</last></author>
       <author><first>Aman</first><last>Gokrani</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>946–954</pages>
       <url hash="5f8a64f9">W18-6487</url>
       <abstract>This paper describes the submission of RWTH Aachen University for the De→En parallel corpus filtering task of the <i>EMNLP 2018 Third Conference on Machine Translation</i> (WMT 2018). We use several rule-based, heuristic methods to preselect sentence pairs. These sentence pairs are scored with count-based and neural systems as language and translation models. In addition to single sentence-pair scoring, we further implement a simple redundancy removing heuristic. Our best performing corpus filtering system relies on recurrent neural language models and translation models based on the transformer architecture. A model trained on 10M randomly sampled tokens reaches a performance of 9.2% BLEU on newstest2018. Using our filtering and ranking techniques we achieve 34.8% BLEU.</abstract>
@@ -13889,10 +13889,10 @@
     </paper>
     <paper id="88">
       <title>Prompsit’s submission to <fixed-case>WMT</fixed-case> 2018 Parallel Corpus Filtering shared task</title>
-      <author><first>Víctor M.</first><last>Sánchez-Cartagena</last></author>
+      <author id="victor-m-sanchez-cartagena"><first>Víctor M.</first><last>Sánchez-Cartagena</last></author>
       <author><first>Marta</first><last>Bañón</last></author>
-      <author><first>Sergio</first><last>Ortiz-Rojas</last></author>
-      <author><first>Gema</first><last>Ramírez</last></author>
+      <author id="sergio-ortiz-rojas"><first>Sergio</first><last>Ortiz-Rojas</last></author>
+      <author id="gema-ramirez-sanchez"><first>Gema</first><last>Ramírez</last></author>
       <pages>955–962</pages>
       <url hash="ea902460">W18-6488</url>
       <abstract>This paper describes Prompsit Language Engineering’s submissions to the WMT 2018 parallel corpus filtering shared task. Our four submissions were based on an automatic classifier for identifying pairs of sentences that are mutual translations. A set of hand-crafted hard rules for discarding sentences with evident flaws were applied before the classifier. We explored different strategies for achieving a training corpus with diverse vocabulary and fluent sentences: language model scoring, an active-learning-inspired data selection algorithm and n-gram saturation. Our submissions were very competitive in comparison with other participants on the 100 million word training corpus.</abstract>
@@ -13904,7 +13904,7 @@
       <author><first>Rui</first><last>Wang</last></author>
       <author><first>Benjamin</first><last>Marie</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>963–967</pages>
       <url hash="f3c8c2f2">W18-6489</url>
       <abstract>This paper presents the NICT’s participation in the WMT18 shared parallel corpus filtering task. The organizers provided 1 billion words German-English corpus crawled from the web as part of the Paracrawl project. This corpus is too noisy to build an acceptable neural machine translation (NMT) system. Using the clean data of the WMT18 shared news translation task, we designed several features and trained a classifier to score each sentence pairs in the noisy data. Finally, we sampled 100 million and 10 million words and built corresponding NMT systems. Empirical results show that our NMT systems trained on sampled data achieve promising performance.</abstract>
@@ -13916,7 +13916,7 @@
     <meta>
       <booktitle>Proceedings of the 11th International Conference on Natural Language Generation</booktitle>
       <url hash="6c5f2c30">W18-65</url>
-      <editor><first>Emiel</first><last>Krahmer</last></editor>
+      <editor id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></editor>
       <editor><first>Albert</first><last>Gatt</last></editor>
       <editor><first>Martijn</first><last>Goudbeek</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -13958,7 +13958,7 @@
     </paper>
     <paper id="3">
       <title>Syntactic Manipulation for Generating more Diverse and Interesting Texts</title>
-      <author><first>Jan Milan</first><last>Deriu</last></author>
+      <author id="jan-milan-deriu"><first>Jan Milan</first><last>Deriu</last></author>
       <author><first>Mark</first><last>Cieliebak</last></author>
       <pages>22–34</pages>
       <url hash="8f541161">W18-6503</url>
@@ -13982,7 +13982,7 @@
       <author><first>Sebastian</first><last>Gehrmann</last></author>
       <author><first>Falcon</first><last>Dai</last></author>
       <author><first>Henry</first><last>Elder</last></author>
-      <author><first>Alexander</first><last>Rush</last></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last></author>
       <pages>46–56</pages>
       <url hash="554a0a79">W18-6505</url>
       <abstract>Learning to generate fluent natural language from structured data with neural networks has become an common approach for NLG. This problem can be challenging when the form of the structured data varies between examples. This paper presents a survey of several extensions to sequence-to-sequence models to account for the latent content selection process, particularly variants of copy attention and coverage decoding. We further propose a training method based on diverse ensembling to encourage models to learn distinct sentence templates during training. An empirical evaluation of these techniques shows an increase in the quality of generated text across five automated metrics, as well as human evaluation.</abstract>
@@ -13992,7 +13992,7 @@
     <paper id="6">
       <title><fixed-case>S</fixed-case>imple<fixed-case>NLG</fixed-case>-<fixed-case>ZH</fixed-case>: a Linguistic Realisation Engine for <fixed-case>M</fixed-case>andarin</title>
       <author><first>Guanyi</first><last>Chen</last></author>
-      <author><first>Kees</first><last>van Deemter</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
       <author><first>Chenghua</first><last>Lin</last></author>
       <pages>57–66</pages>
       <url hash="11a10c65">W18-6506</url>
@@ -14004,7 +14004,7 @@
       <title>Adapting <fixed-case>S</fixed-case>imple<fixed-case>NLG</fixed-case> to <fixed-case>G</fixed-case>alician language</title>
       <author><first>Andrea</first><last>Cascallar-Fuentes</last></author>
       <author><first>Alejandro</first><last>Ramos-Soto</last></author>
-      <author><first>Alberto</first><last>Bugarín Diz</last></author>
+      <author id="alberto-bugarin-diz"><first>Alberto</first><last>Bugarín Diz</last></author>
       <pages>67–72</pages>
       <url hash="7fd91143">W18-6507</url>
       <abstract>In this paper, we describe SimpleNLG-GL, an adaptation of the linguistic realisation SimpleNLG library for the Galician language. This implementation is derived from SimpleNLG-ES, the English-Spanish version of this library. It has been tested using a battery of examples which covers the most common rules for Galician.</abstract>
@@ -14014,7 +14014,7 @@
     <paper id="8">
       <title>Going <fixed-case>D</fixed-case>utch: Creating <fixed-case>S</fixed-case>imple<fixed-case>NLG</fixed-case>-<fixed-case>NL</fixed-case></title>
       <author><first>Ruud</first><last>de Jong</last></author>
-      <author><first>Mariët</first><last>Theune</last></author>
+      <author id="mariet-theune"><first>Mariët</first><last>Theune</last></author>
       <pages>73–78</pages>
       <url hash="57f08777">W18-6508</url>
       <abstract>This paper presents SimpleNLG-NL, an adaptation of the SimpleNLG surface realisation engine for the Dutch language. It describes a novel method for determining and testing the grammatical constructions to be implemented, using target sentences sampled from a treebank.</abstract>
@@ -14025,7 +14025,7 @@
       <title>Learning to Flip the Bias of News Headlines</title>
       <author><first>Wei-Fan</first><last>Chen</last></author>
       <author><first>Henning</first><last>Wachsmuth</last></author>
-      <author><first>Khalid</first><last>Al-Khatib</last></author>
+      <author id="khalid-al-khatib"><first>Khalid</first><last>Al-Khatib</last></author>
       <author><first>Benno</first><last>Stein</last></author>
       <pages>79–88</pages>
       <url hash="4e6036d5">W18-6509</url>
@@ -14037,7 +14037,7 @@
       <title>Stylistically User-Specific Generation</title>
       <author><first>Abdurrisyad</first><last>Fikri</last></author>
       <author><first>Hiroya</first><last>Takamura</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>89–98</pages>
       <url hash="065bb950">W18-6510</url>
       <abstract>Recent neural models for response generation show good results in terms of general responses. In real conversations, however, depending on the speaker/responder, similar utterances should require different responses. In this study, we attempt to consider individual user’s information in adjusting the notable sequence-to-sequence (seq2seq) model for more diverse, user-specific responses. We assume that we need user-specific features to adjust the response and we argue that some selected representative words from the users are suitable for this task. Furthermore, we prove that even for unseen or unknown users, our model can provide more diverse and interesting responses, while maintaining correlation with input utterances. Experimental results with human evaluation show that our model can generate more interesting responses than the popular seq2seqmodel and achieve higher relevance with input utterances than our baseline.</abstract>
@@ -14051,7 +14051,7 @@
       <author><first>Xingkun</first><last>Liu</last></author>
       <author><first>Atanas</first><last>Laskov</last></author>
       <author><first>Pedro</first><last>Patron</last></author>
-      <author><first>Helen</first><last>Hastie</last></author>
+      <author id="helen-hastie"><first>Helen</first><last>Hastie</last></author>
       <pages>99–108</pages>
       <url hash="47b5d959">W18-6511</url>
       <abstract>As unmanned vehicles become more autonomous, it is important to maintain a high level of transparency regarding their behaviour and how they operate. This is particularly important in remote locations where they cannot be directly observed. Here, we describe a method for generating explanations in natural language of autonomous system behaviour and reasoning. Our method involves deriving an interpretable model of autonomy through having an expert ‘speak aloud’ and providing various levels of detail based on this model. Through an online evaluation study with operators, we show it is best to generate explanations with multiple possible reasons but tersely worded. This work has implications for designing interfaces for autonomy as well as for explainable AI and operator training.</abstract>
@@ -14112,7 +14112,7 @@
     </paper>
     <paper id="16">
       <title><fixed-case>S</fixed-case>patial<fixed-case>VOC</fixed-case>2<fixed-case>K</fixed-case>: A Multilingual Dataset of Images with Annotations and Features for Spatial Relations between Objects</title>
-      <author><first>Anja</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anja</first><last>Belz</last></author>
       <author><first>Adrian</first><last>Muscat</last></author>
       <author><first>Pierre</first><last>Anguill</last></author>
       <author><first>Mouhamadou</first><last>Sow</last></author>
@@ -14128,7 +14128,7 @@
       <title>Adding the Third Dimension to Spatial Relation Detection in 2<fixed-case>D</fixed-case> Images</title>
       <author><first>Brandon</first><last>Birmingham</last></author>
       <author><first>Adrian</first><last>Muscat</last></author>
-      <author><first>Anja</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anja</first><last>Belz</last></author>
       <pages>146–151</pages>
       <url hash="ad3b69de">W18-6517</url>
       <abstract>Detection of spatial relations between objects in images is currently a popular subject in image description research. A range of different language and geometric object features have been used in this context, but methods have not so far used explicit information about the third dimension (depth), except when manually added to annotations. The lack of such information hampers detection of spatial relations that are inherently 3D. In this paper, we use a fully automatic method for creating a depth map of an image and derive several different object-level depth features from it which we add to an existing feature set to test the effect on spatial relation detection. We show that performance increases are obtained from adding depth features in all scenarios tested.</abstract>
@@ -14148,7 +14148,7 @@
     <paper id="19">
       <title>Modelling Pro-drop with the Rational Speech Acts Model</title>
       <author><first>Guanyi</first><last>Chen</last></author>
-      <author><first>Kees</first><last>van Deemter</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
       <author><first>Chenghua</first><last>Lin</last></author>
       <pages>159–164</pages>
       <url hash="dca78a67">W18-6519</url>
@@ -14172,7 +14172,7 @@
     </paper>
     <paper id="21">
       <title>Enriching the <fixed-case>W</fixed-case>eb<fixed-case>NLG</fixed-case> corpus</title>
-      <author><first>Thiago</first><last>Castro Ferreira</last></author>
+      <author id="thiago-castro-ferreira"><first>Thiago</first><last>Castro Ferreira</last></author>
       <author><first>Diego</first><last>Moussallem</last></author>
       <author><first>Emiel</first><last>Krahmer</last></author>
       <author><first>Sander</first><last>Wubben</last></author>
@@ -14185,7 +14185,7 @@
     <paper id="22">
       <title>Towards making <fixed-case>NLG</fixed-case> a voice for interpretable Machine Learning</title>
       <author><first>James</first><last>Forrest</last></author>
-      <author><first>Somayajulu</first><last>Sripada</last></author>
+      <author id="somayajulu-sripada"><first>Somayajulu</first><last>Sripada</last></author>
       <author><first>Wei</first><last>Pang</last></author>
       <author><first>George</first><last>Coghill</last></author>
       <pages>177–182</pages>
@@ -14198,7 +14198,7 @@
       <title>Template-based multilingual football reports generation using <fixed-case>W</fixed-case>ikidata as a knowledge base</title>
       <author><first>Lorenzo</first><last>Gatti</last></author>
       <author><first>Chris</first><last>van der Lee</last></author>
-      <author><first>Mariët</first><last>Theune</last></author>
+      <author id="mariet-theune"><first>Mariët</first><last>Theune</last></author>
       <pages>183–188</pages>
       <url hash="ab424738">W18-6523</url>
       <abstract>This paper presents a new version of a football reports generation system called PASS. The original version generated Dutch text and relied on a limited hand-crafted knowledge base. We describe how, in a short amount of time, we extended PASS to produce English texts, exploiting machine translation and Wikidata as a large-scale source of multilingual knowledge.</abstract>
@@ -14208,7 +14208,7 @@
     <paper id="24">
       <title>Automatic Evaluation of Neural Personality-based Chatbots</title>
       <author><first>Yujie</first><last>Xing</last></author>
-      <author><first>Raquel</first><last>Fernández</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
       <pages>189–194</pages>
       <url hash="1b01a22e">W18-6524</url>
       <abstract>Stylistic variation is critical to render the utterances generated by conversational agents natural and engaging. In this paper, we focus on sequence-to-sequence models for open-domain dialogue response generation and propose a new method to evaluate the extent to which such models are able to generate responses that reflect different personality traits.</abstract>
@@ -14237,7 +14237,7 @@
     <paper id="27">
       <title>Underspecified <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependency Structures as Inputs for Multilingual Surface Realisation</title>
       <author><first>Simon</first><last>Mille</last></author>
-      <author><first>Anja</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anja</first><last>Belz</last></author>
       <author><first>Bernd</first><last>Bohnet</last></author>
       <author><first>Leo</first><last>Wanner</last></author>
       <pages>199–209</pages>
@@ -14249,7 +14249,7 @@
     <paper id="28">
       <title><fixed-case>LSTM</fixed-case> Hypertagging</title>
       <author><first>Reid</first><last>Fu</last></author>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <pages>210–220</pages>
       <url hash="1b46e419">W18-6528</url>
       <abstract>Hypertagging, or supertagging for surface realization, is the process of assigning lexical categories to nodes in an input semantic graph. Previous work has shown that hypertagging significantly increases realization speed and quality by reducing the search space of the realizer. Building on recent work using LSTMs to improve accuracy on supertagging for parsing, we develop an LSTM hypertagging method for OpenCCG, an open source NLP toolkit for CCG. Our results show significant improvements in both hypertagging accuracy and downstream realization performance.</abstract>
@@ -14269,12 +14269,12 @@
     </paper>
     <paper id="30">
       <title>Generating <fixed-case>E</fixed-case>-Commerce Product Titles and Predicting their Quality</title>
-      <author><first>José G.</first><last>Camargo de Souza</last></author>
+      <author id="jose-g-c-de-souza"><first>José G.</first><last>Camargo de Souza</last></author>
       <author><first>Michael</first><last>Kozielski</last></author>
       <author><first>Prashant</first><last>Mathur</last></author>
       <author><first>Ernie</first><last>Chang</last></author>
       <author><first>Marco</first><last>Guerini</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
       <author><first>Evgeny</first><last>Matusov</last></author>
       <pages>233–243</pages>
@@ -14286,7 +14286,7 @@
     <paper id="31">
       <title>Designing and testing the messages produced by a virtual dietitian</title>
       <author><first>Luca</first><last>Anselma</last></author>
-      <author><first>Alessandro</first><last>Mazzei</last></author>
+      <author id="alessandro-mazzei"><first>Alessandro</first><last>Mazzei</last></author>
       <pages>244–253</pages>
       <url hash="ae06394a">W18-6531</url>
       <abstract>This paper presents a project about the automatic generation of persuasive messages in the context of the diet management. In the first part of the paper we introduce the basic mechanisms related to data interpretation and content selection for a numerical data-to-text generation architecture. In the second part of the paper we discuss a number of factors influencing the design of the messages. In particular, we consider the design of the aggregation procedure. Finally, we present the results of a human-based evaluation concerning this design factor.</abstract>
@@ -14308,7 +14308,7 @@
     <paper id="33">
       <title>Automatically Generating Questions about Novel Metaphors in Literature</title>
       <author><first>Natalie</first><last>Parde</last></author>
-      <author><first>Rodney</first><last>Nielsen</last></author>
+      <author id="rodney-nielsen"><first>Rodney</first><last>Nielsen</last></author>
       <pages>264–273</pages>
       <url hash="209ca125">W18-6533</url>
       <abstract>The automatic generation of stimulating questions is crucial to the development of intelligent cognitive exercise applications. We developed an approach that generates appropriate <i>Questioning the Author</i> queries based on novel metaphors in diverse syntactic relations in literature. We show that the generated questions are comparable to human-generated questions in terms of naturalness, sensibility, and depth, and score slightly higher than human-generated questions in terms of clarity. We also show that questions generated about novel metaphors are rated as cognitively deeper than questions generated about non- or conventional metaphors, providing evidence that metaphor novelty can be leveraged to promote cognitive exercise.</abstract>
@@ -14329,7 +14329,7 @@
       <title>Can Neural Generators for Dialogue Learn Sentence Planning and Discourse Structuring?</title>
       <author><first>Lena</first><last>Reed</last></author>
       <author><first>Shereen</first><last>Oraby</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <pages>284–295</pages>
       <url hash="3fb382aa">W18-6535</url>
       <abstract>Responses in task-oriented dialogue systems often realize multiple propositions whose ultimate form depends on the use of sentence planning and discourse structuring operations. For example a recommendation may consist of an explicitly evaluative utterance e.g. <i>Chanpen Thai is the best option</i>, along with content related by the justification discourse relation, e.g. <i>It has great food and service</i>, that combines multiple propositions into a single phrase. While neural generation methods integrate sentence planning and surface realization in one end-to-end learning framework, previous work has not shown that neural generators can: (1) perform common sentence planning and discourse structuring operations; (2) make decisions as to whether to realize content in a single sentence or over multiple sentences; (3) generalize sentence planning and discourse relation operations beyond what was seen in training. We systematically create large training corpora that exhibit particular sentence planning operations and then test neural models to see what they learn. We compare models without explicit latent variables for sentence planning with ones that provide explicit supervision during training. We show that only the models with additional supervision can reproduce sentence planning and discourse operations and generalize to situations unseen in training.</abstract>
@@ -14339,7 +14339,7 @@
     <paper id="36">
       <title>Neural Generation of Diverse Questions using Answer Focus, Contextual and Linguistic Features</title>
       <author><first>Vrindavan</first><last>Harrison</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <pages>296–306</pages>
       <url hash="9e32ae1e">W18-6536</url>
       <abstract>Question Generation is the task of automatically creating questions from textual input. In this work we present a new Attentional Encoder–Decoder Recurrent Neural Network model for automatic question generation. Our model incorporates linguistic features and an additional sentence embedding to capture meaning at both sentence and word levels. The linguistic features are designed to capture information related to named entity recognition, word case, and entity coreference resolution. In addition our model uses a copying mechanism and a special answer signal that enables generation of numerous diverse questions on a given sentence. Our model achieves state of the art results of 19.98 Bleu_4 on a benchmark Question Generation dataset, outperforming all previously published results by a significant margin. A human evaluation also shows that the added features improve the quality of the generated questions.</abstract>
@@ -14364,7 +14364,7 @@
       <author><first>Martin</first><last>Potthast</last></author>
       <author><first>Nedim</first><last>Lipka</last></author>
       <author><first>Benno</first><last>Stein</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <pages>318–321</pages>
       <url hash="24b82f1d">W18-6538</url>
       <abstract>The TL;DR challenge fosters research in abstractive summarization of informal text, the largest and fastest-growing source of textual data on the web, which has been overlooked by summarization research so far. The challenge owes its name to the frequent practice of social media users to supplement long posts with a “TL;DR”—for “too long; didn’t read”—followed by a short summary as a courtesy to those who would otherwise reply with the exact same abbreviation to indicate they did not care to read a post for its apparent length. Posts featuring TL;DR summaries form an excellent ground truth for summarization, and by tapping into this resource for the first time, we have mined millions of training examples from social media, opening the door to all kinds of generative models.</abstract>
@@ -14386,7 +14386,7 @@
       <title>Adapting Descriptions of People to the Point of View of a Moving Observer</title>
       <author><first>Gonzalo</first><last>Méndez</last></author>
       <author><first>Raquel</first><last>Hervás</last></author>
-      <author><first>Pablo</first><last>Gervás</last></author>
+      <author id="pablo-gervas"><first>Pablo</first><last>Gervás</last></author>
       <author><first>Ricardo</first><last>de la Rosa</last></author>
       <author><first>Daniel</first><last>Ruiz</last></author>
       <pages>329–338</pages>
@@ -14433,7 +14433,7 @@
       <title>Comprehension Driven Document Planning in Natural Language Generation Systems</title>
       <author><first>Craig</first><last>Thomson</last></author>
       <author><first>Ehud</first><last>Reiter</last></author>
-      <author><first>Somayajulu</first><last>Sripada</last></author>
+      <author id="somayajulu-sripada"><first>Somayajulu</first><last>Sripada</last></author>
       <pages>371–380</pages>
       <url hash="6595d652">W18-6544</url>
       <abstract>This paper proposes an approach to NLG system design which focuses on generating output text which can be more easily processed by the reader. Ways in which cognitive theory might be combined with existing NLG techniques are discussed and two simple experiments in content ordering are presented.</abstract>
@@ -14453,7 +14453,7 @@
     </paper>
     <paper id="46">
       <title>Toward <fixed-case>B</fixed-case>ayesian Synchronous Tree Substitution Grammars for Sentence Planning</title>
-      <author><first>David M.</first><last>Howcroft</last></author>
+      <author id="david-m-howcroft"><first>David M.</first><last>Howcroft</last></author>
       <author><first>Dietrich</first><last>Klakow</last></author>
       <author><first>Vera</first><last>Demberg</last></author>
       <pages>391–396</pages>
@@ -14465,7 +14465,7 @@
     <paper id="47">
       <title>The Task Matters: Comparing Image Captioning and Task-Based Dialogical Image Description</title>
       <author><first>Nikolai</first><last>Ilinykh</last></author>
-      <author><first>Sina</first><last>Zarrieß</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
       <author><first>David</first><last>Schlangen</last></author>
       <pages>397–402</pages>
       <url hash="c62adf7d">W18-6547</url>
@@ -14477,7 +14477,7 @@
       <title>Generating Summaries of Sets of Consumer Products: Learning from Experiments</title>
       <author><first>Kittipitch</first><last>Kuptavanich</last></author>
       <author><first>Ehud</first><last>Reiter</last></author>
-      <author><first>Kees</first><last>Van Deemter</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>Van Deemter</last></author>
       <author><first>Advaith</first><last>Siddharthan</last></author>
       <pages>403–407</pages>
       <url hash="fd0f946e">W18-6548</url>
@@ -14514,8 +14514,8 @@
       <title>Meteorologists and Students: A resource for language grounding of geographical descriptors</title>
       <author><first>Alejandro</first><last>Ramos-Soto</last></author>
       <author><first>Ehud</first><last>Reiter</last></author>
-      <author><first>Kees</first><last>van Deemter</last></author>
-      <author><first>Jose</first><last>Alonso</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
+      <author id="jose-m-alonso"><first>Jose</first><last>Alonso</last></author>
       <author><first>Albert</first><last>Gatt</last></author>
       <pages>421–425</pages>
       <url hash="ffd3ae3f">W18-6551</url>
@@ -14526,7 +14526,7 @@
     <paper id="52">
       <title><fixed-case>C</fixed-case>yclegen: Cyclic consistency based product review generator from attributes</title>
       <author><first>Vasu</first><last>Sharma</last></author>
-      <author><first>Harsh</first><last>Sharma</last></author>
+      <author id="harsh-vardhan-sharma"><first>Harsh</first><last>Sharma</last></author>
       <author><first>Ankita</first><last>Bishnu</last></author>
       <author><first>Labhesh</first><last>Patel</last></author>
       <pages>426–430</pages>
@@ -14549,7 +14549,7 @@
     <paper id="54">
       <title>Characterizing Variation in Crowd-Sourced Data for Training Neural Language Generators to Produce Stylistically Varied Outputs</title>
       <author><first>Juraj</first><last>Juraska</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <pages>441–450</pages>
       <url hash="e97196a9">W18-6554</url>
       <abstract>One of the biggest challenges of end-to-end language generation from meaning representations in dialogue systems is making the outputs more natural and varied. Here we take a large corpus of 50K crowd-sourced utterances in the restaurant domain and develop text analysis methods that systematically characterize types of sentences in the training data. We then automatically label the training data to allow us to conduct two kinds of experiments with a neural generator. First, we test the effect of training the system with different stylistic partitions and quantify the effect of smaller, but more stylistically controlled training data. Second, we propose a method of labeling the style variants during training, and show that we can modify the style of the generated utterances using our stylistic labels. We contrast and compare these methods that can be used with any existing large corpus, showing how they vary in terms of semantic quality and stylistic control.</abstract>
@@ -14560,7 +14560,7 @@
       <title>Char2char Generation with Reranking for the <fixed-case>E</fixed-case>2<fixed-case>E</fixed-case> <fixed-case>NLG</fixed-case> Challenge</title>
       <author><first>Shubham</first><last>Agarwal</last></author>
       <author><first>Marc</first><last>Dymetman</last></author>
-      <author><first>Éric</first><last>Gaussier</last></author>
+      <author id="eric-gaussier"><first>Éric</first><last>Gaussier</last></author>
       <pages>451–456</pages>
       <url hash="9bd77c95">W18-6555</url>
       <abstract>This paper describes our submission to the E2E NLG Challenge. Recently, neural seq2seq approaches have become mainstream in NLG, often resorting to pre- (respectively post-) processing <i>delexicalization</i> (relexicalization) steps at the word-level to handle rare words. By contrast, we train a simple character level seq2seq model, which requires no pre/post-processing (delexicalization, tokenization or even lowercasing), with surprisingly good results. For further improvement, we explore two re-ranking approaches for scoring candidates. We also introduce a synthetic dataset creation procedure, which opens up a new way of creating artificial datasets for Natural Language Generation.</abstract>
@@ -14629,7 +14629,7 @@
     <paper id="61">
       <title>Statistical <fixed-case>NLG</fixed-case> for Generating the Content and Form of Referring Expressions</title>
       <author><first>Xiao</first><last>Li</last></author>
-      <author><first>Kees</first><last>van Deemter</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
       <author><first>Chenghua</first><last>Lin</last></author>
       <pages>482–491</pages>
       <url hash="fa12a508">W18-6561</url>
@@ -14640,9 +14640,9 @@
     <paper id="62">
       <title>Specificity measures and reference</title>
       <author><first>Albert</first><last>Gatt</last></author>
-      <author><first>Nicolás</first><last>Marín</last></author>
+      <author id="nicolas-marin"><first>Nicolás</first><last>Marín</last></author>
       <author><first>Gustavo</first><last>Rivas-Gervilla</last></author>
-      <author><first>Daniel</first><last>Sánchez</last></author>
+      <author id="daniel-sanchez-cisneros"><first>Daniel</first><last>Sánchez</last></author>
       <pages>492–502</pages>
       <url hash="e89385f4">W18-6562</url>
       <abstract>In this paper we study empirically the validity of measures of referential success for referring expressions involving gradual properties. More specifically, we study the ability of several measures of referential success to predict the success of a user in choosing the right object, given a referring expression. Experimental results indicate that certain fuzzy measures of success are able to predict human accuracy in reference resolution. Such measures are therefore suitable for the estimation of the success or otherwise of a referring expression produced by a generation algorithm, especially in case the properties in a domain cannot be assumed to have crisp denotations.</abstract>
@@ -14651,7 +14651,7 @@
     </paper>
     <paper id="63">
       <title>Decoding Strategies for Neural Referring Expression Generation</title>
-      <author><first>Sina</first><last>Zarrieß</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
       <author><first>David</first><last>Schlangen</last></author>
       <pages>503–512</pages>
       <url hash="0bffe49e">W18-6563</url>
@@ -14664,7 +14664,7 @@
     <meta>
       <booktitle>Proceedings of the 3rd Workshop on Computational Creativity in Natural Language Generation (<fixed-case>CC</fixed-case>-<fixed-case>NLG</fixed-case> 2018)</booktitle>
       <url hash="612075dd">W18-66</url>
-      <editor><first>Hugo</first><last>Gonçalo Oliveira</last></editor>
+      <editor id="hugo-goncalo-oliveira"><first>Hugo</first><last>Gonçalo Oliveira</last></editor>
       <editor><first>Ben</first><last>Burtenshaw</last></editor>
       <editor><first>Raquel</first><last>Hervás</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -14716,7 +14716,7 @@
     <paper id="5">
       <title>Content Determination for Chess as a Source for Suspenseful Narratives</title>
       <author><first>Richard</first><last>Doust</last></author>
-      <author><first>Pablo</first><last>Gervás</last></author>
+      <author id="pablo-gervas"><first>Pablo</first><last>Gervás</last></author>
       <pages>26-33</pages>
       <url hash="2a8df604">W18-6605</url>
       <doi>10.18653/v1/W18-6605</doi>
@@ -14726,7 +14726,7 @@
       <title>Generating Stories Using Role-playing Games and Simulated Human-like Conversations</title>
       <author><first>Alan</first><last>Tapscott</last></author>
       <author><first>Carlos</first><last>León</last></author>
-      <author><first>Pablo</first><last>Gervás</last></author>
+      <author id="pablo-gervas"><first>Pablo</first><last>Gervás</last></author>
       <pages>34-42</pages>
       <url hash="d340a819">W18-6606</url>
       <doi>10.18653/v1/W18-6606</doi>
@@ -14737,9 +14737,9 @@
     <meta>
       <booktitle>Proceedings of the Workshop on Intelligent Interactive Systems and Language Generation (2<fixed-case>IS</fixed-case>&amp;<fixed-case>NLG</fixed-case>)</booktitle>
       <url hash="50982d8b">W18-67</url>
-      <editor><first>Jose M.</first><last>Alonso</last></editor>
+      <editor id="jose-m-alonso"><first>Jose M.</first><last>Alonso</last></editor>
       <editor><first>Alejandro</first><last>Catala</last></editor>
-      <editor><first>Mariët</first><last>Theune</last></editor>
+      <editor id="mariet-theune"><first>Mariët</first><last>Theune</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Tilburg, the Netherlands</address>
       <month>November</month>
@@ -14777,7 +14777,7 @@
       <author><first>Matthieu</first><last>Riou</last></author>
       <author><first>Stéphane</first><last>Huet</last></author>
       <author><first>Bassam</first><last>Jabaian</last></author>
-      <author><first>Fabrice</first><last>Lefèvre</last></author>
+      <author id="fabrice-lefevre"><first>Fabrice</first><last>Lefèvre</last></author>
       <pages>9-14</pages>
       <url hash="d5b70c9f">W18-6703</url>
       <doi>10.18653/v1/W18-6703</doi>
@@ -14798,7 +14798,7 @@
       <title>Trouble on the Road: Finding Reasons for Commuter Stress from Tweets</title>
       <author><first>Reshmi</first><last>Gopalakrishna Pillai</last></author>
       <author><first>Mike</first><last>Thelwall</last></author>
-      <author><first>Constantin</first><last>Orasan</last></author>
+      <author id="constantin-orasan"><first>Constantin</first><last>Orasan</last></author>
       <pages>20-25</pages>
       <url hash="77adaf31">W18-6705</url>
       <doi>10.18653/v1/W18-6705</doi>
@@ -14806,10 +14806,10 @@
     </paper>
     <paper id="6">
       <title>Assisted Nominalization for Academic <fixed-case>E</fixed-case>nglish Writing</title>
-      <author><first>John</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
       <author><first>Dariush</first><last>Saberi</last></author>
       <author><first>Marvin</first><last>Lam</last></author>
-      <author><first>Jonathan</first><last>Webster</last></author>
+      <author id="jonathan-j-webster"><first>Jonathan</first><last>Webster</last></author>
       <pages>26-30</pages>
       <url hash="58735763">W18-6706</url>
       <doi>10.18653/v1/W18-6706</doi>
@@ -14818,10 +14818,10 @@
     <paper id="7">
       <title>Two-Step Training and Mixed Encoding-Decoding for Implementing a Generative Chatbot with a Small Dialogue Corpus</title>
       <author><first>Jintae</first><last>Kim</last></author>
-      <author><first>Hyeon-Gu</first><last>Lee</last></author>
+      <author id="hyeon-gu-lee"><first>Hyeon-Gu</first><last>Lee</last></author>
       <author><first>Harksoo</first><last>Kim</last></author>
       <author><first>Yeonsoo</first><last>Lee</last></author>
-      <author><first>Young-Gil</first><last>Kim</last></author>
+      <author id="young-gil-kim"><first>Young-Gil</first><last>Kim</last></author>
       <pages>31-35</pages>
       <url hash="157ea8eb">W18-6707</url>
       <doi>10.18653/v1/W18-6707</doi>
@@ -14831,7 +14831,7 @@
       <title>Supporting Content Design with an Eye Tracker: The Case of Weather-based Recommendations</title>
       <author><first>Alejandro</first><last>Catala</last></author>
       <author><first>Jose M.</first><last>Alonso</last></author>
-      <author><first>Alberto</first><last>Bugarin</last></author>
+      <author id="alberto-bugarin-diz"><first>Alberto</first><last>Bugarin</last></author>
       <pages>36-41</pages>
       <url hash="12064cf7">W18-6708</url>
       <doi>10.18653/v1/W18-6708</doi>
@@ -14843,7 +14843,7 @@
       <author><first>Daphne</first><last>Ippolito</last></author>
       <author><first>Arun</first><last>Kirubarajan</last></author>
       <author><first>Jai</first><last>Thirani</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <author><first>Chris</first><last>Callison-Burch</last></author>
       <pages>42-44</pages>
       <url hash="3c89a95a">W18-6709</url>
@@ -14854,7 +14854,7 @@
       <title><fixed-case>C</fixed-case>heck<fixed-case>Y</fixed-case>our<fixed-case>M</fixed-case>eal!: diet management with <fixed-case>NLG</fixed-case></title>
       <author><first>Luca</first><last>Anselma</last></author>
       <author><first>Simone</first><last>Donetti</last></author>
-      <author><first>Alessandro</first><last>Mazzei</last></author>
+      <author id="alessandro-mazzei"><first>Alessandro</first><last>Mazzei</last></author>
       <author><first>Andrea</first><last>Pirone</last></author>
       <pages>45-47</pages>
       <url hash="a586b6b9">W18-6710</url>
@@ -14866,7 +14866,7 @@
     <meta>
       <booktitle>Proceedings of the Workshop on <fixed-case>NLG</fixed-case> for Human–Robot Interaction</booktitle>
       <url hash="ece83658">W18-69</url>
-      <editor><first>Mary Ellen</first><last>Foster</last></editor>
+      <editor id="mary-ellen-foster"><first>Mary Ellen</first><last>Foster</last></editor>
       <editor><first>Hendrik</first><last>Buschmeier</last></editor>
       <editor><first>Dimitra</first><last>Gkatzia</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -14885,7 +14885,7 @@
       <title>Context-sensitive Natural Language Generation for robot-assisted second language tutoring</title>
       <author><first>Bram</first><last>Willemsen</last></author>
       <author><first>Jan</first><last>de Wit</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <author><first>Mirjam</first><last>de Haas</last></author>
       <author><first>Paul</first><last>Vogt</last></author>
       <pages>1–7</pages>
@@ -14907,7 +14907,7 @@
     <paper id="3">
       <title>Shaping a social robot’s humor with Natural Language Generation and socially-aware reinforcement learning</title>
       <author><first>Hannes</first><last>Ritschel</last></author>
-      <author><first>Elisabeth</first><last>André</last></author>
+      <author id="elisabeth-andre"><first>Elisabeth</first><last>André</last></author>
       <pages>12–16</pages>
       <url hash="92d2bf8e">W18-6903</url>
       <abstract>Humor is an important aspect in human interaction to regulate conversations, increase interpersonal attraction and trust. For social robots, humor is one aspect to make interactions more natural, enjoyable, and to increase credibility and acceptance. In combination with appropriate non-verbal behavior, natural language generation offers the ability to create content on-the-fly. This work outlines the building-blocks for providing an individual, multimodal interaction experience by shaping the robot’s humor with the help of Natural Language Generation and Reinforcement Learning based on human social signals.</abstract>
@@ -14918,7 +14918,7 @@
       <title>From sensors to sense: Integrated heterogeneous ontologies for Natural Language Generation</title>
       <author><first>Mihai</first><last>Pomarlan</last></author>
       <author><first>Robert</first><last>Porzel</last></author>
-      <author><first>John</first><last>Bateman</last></author>
+      <author id="john-bateman"><first>John</first><last>Bateman</last></author>
       <author><first>Rainer</first><last>Malaka</last></author>
       <pages>17–21</pages>
       <url hash="c1ff0a04">W18-6904</url>
@@ -14930,7 +14930,7 @@
       <title>A farewell to arms: Non-verbal communication for non-humanoid robots</title>
       <author><first>Aaron G.</first><last>Cass</last></author>
       <author><first>Kristina</first><last>Striegnitz</last></author>
-      <author><first>Nick</first><last>Webb</last></author>
+      <author id="nick-webb"><first>Nick</first><last>Webb</last></author>
       <pages>22–26</pages>
       <url hash="75cc3989">W18-6905</url>
       <abstract>Human-robot interactions situated in a dynamic environment create a unique mix of challenges for conversational systems. We argue that, on the one hand, NLG can contribute to addressing these challenges and that, on the other hand, they pose interesting research problems for NLG. To illustrate our position we describe our research on non-humanoid robots using non-verbal signals to support communication.</abstract>
@@ -14939,7 +14939,7 @@
     </paper>
     <paper id="6">
       <title>Being data-driven is not enough: Revisiting interactive instruction giving as a challenge for <fixed-case>NLG</fixed-case></title>
-      <author><first>Sina</first><last>Zarrieß</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
       <author><first>David</first><last>Schlangen</last></author>
       <pages>27–31</pages>
       <url hash="69106909">W18-6906</url>
@@ -14952,10 +14952,10 @@
     <meta>
       <booktitle>Proceedings of the 1st Workshop on Automatic Text Adaptation (<fixed-case>ATA</fixed-case>)</booktitle>
       <url hash="913e0287">W18-70</url>
-      <editor><first>Arne</first><last>Jönsson</last></editor>
+      <editor id="arne-jonsson"><first>Arne</first><last>Jönsson</last></editor>
       <editor><first>Evelina</first><last>Rennes</last></editor>
       <editor><first>Horacio</first><last>Saggion</last></editor>
-      <editor><first>Sanja</first><last>Stajner</last></editor>
+      <editor id="sanja-stajner"><first>Sanja</first><last>Stajner</last></editor>
       <editor><first>Victoria</first><last>Yaneva</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Tilburg, the Netherlands</address>
@@ -14998,9 +14998,9 @@
     <paper id="4">
       <title>Assisted Lexical Simplification for <fixed-case>F</fixed-case>rench Native Children with Reading Difficulties</title>
       <author><first>Firas</first><last>Hmida</last></author>
-      <author><first>Mokhtar B.</first><last>Billami</last></author>
+      <author id="mokhtar-b-billami"><first>Mokhtar B.</first><last>Billami</last></author>
       <author><first>Thomas</first><last>François</last></author>
-      <author><first>Núria</first><last>Gala</last></author>
+      <author id="nuria-gala"><first>Núria</first><last>Gala</last></author>
       <pages>21-28</pages>
       <url hash="177944af">W18-7004</url>
       <doi>10.18653/v1/W18-7004</doi>
@@ -15010,10 +15010,10 @@
       <title>Reference-less Quality Estimation of Text Simplification Systems</title>
       <author><first>Louis</first><last>Martin</last></author>
       <author><first>Samuel</first><last>Humeau</last></author>
-      <author><first>Pierre-Emmanuel</first><last>Mazaré</last></author>
-      <author><first>Éric</first><last>de La Clergerie</last></author>
+      <author id="pierre-emmanuel-mazare"><first>Pierre-Emmanuel</first><last>Mazaré</last></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Éric</first><last>de La Clergerie</last></author>
       <author><first>Antoine</first><last>Bordes</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <pages>29-38</pages>
       <url hash="f9217e4d">W18-7005</url>
       <doi>10.18653/v1/W18-7005</doi>
@@ -15022,7 +15022,7 @@
     <paper id="6">
       <title>Improving Machine Translation of <fixed-case>E</fixed-case>nglish Relative Clauses with Automatic Text Simplification</title>
       <author><first>Sanja</first><last>Štajner</last></author>
-      <author><first>Maja</first><last>Popović</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
       <pages>39-48</pages>
       <url hash="20459478">W18-7006</url>
       <doi>10.18653/v1/W18-7006</doi>
@@ -15081,21 +15081,21 @@
     <paper id="5">
       <title>Demonstrating the <fixed-case>MUSTE</fixed-case> Language Learning Environment</title>
       <author><first>Herbert</first><last>Lange</last></author>
-      <author><first>Peter</first><last>Ljunglöf</last></author>
+      <author id="peter-ljunglof"><first>Peter</first><last>Ljunglöf</last></author>
       <pages>41-46</pages>
       <url hash="54784230">W18-7105</url>
       <bibkey>lange-ljunglof-2018-demonstrating</bibkey>
     </paper>
     <paper id="6">
       <title>Learner Corpus Anonymization in the Age of <fixed-case>GDPR</fixed-case>: Insights from the Creation of a Learner Corpus of <fixed-case>S</fixed-case>wedish</title>
-      <author><first>Beáta</first><last>Megyesi</last></author>
+      <author id="beata-megyesi"><first>Beáta</first><last>Megyesi</last></author>
       <author><first>Lena</first><last>Granstedt</last></author>
       <author><first>Sofia</first><last>Johansson</last></author>
       <author><first>Julia</first><last>Prentice</last></author>
       <author><first>Dan</first><last>Rosén</last></author>
       <author><first>Carl-Johan</first><last>Schenström</last></author>
       <author><first>Gunlög</first><last>Sundberg</last></author>
-      <author><first>Mats</first><last>Wirén</last></author>
+      <author id="mats-wiren"><first>Mats</first><last>Wirén</last></author>
       <author><first>Elena</first><last>Volodina</last></author>
       <pages>47-56</pages>
       <url hash="80cb8228">W18-7106</url>
@@ -15104,7 +15104,7 @@
     <paper id="7">
       <title>Work Smart - Reducing Effort in Short-Answer Grading</title>
       <author><first>Margot</first><last>Mieskes</last></author>
-      <author><first>Ulrike</first><last>Padó</last></author>
+      <author id="ulrike-pado"><first>Ulrike</first><last>Padó</last></author>
       <pages>57-68</pages>
       <url hash="818ccece">W18-7107</url>
       <bibkey>mieskes-pado-2018-work</bibkey>
@@ -15119,9 +15119,9 @@
     </paper>
     <paper id="9">
       <title>A Linguistically-Informed Search Engine to Identifiy Reading Material for Functional Illiteracy Classes</title>
-      <author><first>Zarah</first><last>Weiss</last></author>
+      <author id="zarah-weiss"><first>Zarah</first><last>Weiss</last></author>
       <author><first>Sabrina</first><last>Dittrich</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <pages>79-90</pages>
       <url hash="895d86f6">W18-7109</url>
       <bibkey>weiss-etal-2018-linguistically</bibkey>
@@ -15129,10 +15129,10 @@
     <paper id="10">
       <title>Feedback Strategies for Form and Meaning in a Real-life Language Tutoring System</title>
       <author><first>Ramon</first><last>Ziai</last></author>
-      <author><first>Bjoern</first><last>Rudzewitz</last></author>
+      <author id="bjorn-rudzewitz"><first>Bjoern</first><last>Rudzewitz</last></author>
       <author><first>Kordula</first><last>De Kuthy</last></author>
       <author><first>Florian</first><last>Nuxoll</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <pages>91-98</pages>
       <url hash="399f419d">W18-7110</url>
       <bibkey>ziai-etal-2018-feedback</bibkey>
diff --git a/data/xml/W19.xml b/data/xml/W19.xml
index b574d6183c..4315d520cc 100644
--- a/data/xml/W19.xml
+++ b/data/xml/W19.xml
@@ -3,7 +3,7 @@
   <volume id="1" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Society for Computation in Linguistics (<fixed-case>SC</fixed-case>i<fixed-case>L</fixed-case>) 2019</booktitle>
-      <editor><first>Gaja</first><last>Jarosz</last></editor>
+      <editor id="gaja-jarosz"><first>Gaja</first><last>Jarosz</last></editor>
       <editor><first>Max</first><last>Nelson</last></editor>
       <editor><first>Brendan</first><last>O’Connor</last></editor>
       <editor><first>Joe</first><last>Pater</last></editor>
@@ -18,7 +18,7 @@
     </frontmatter>
     <paper id="1">
       <title>Can Entropy Explain Successor Surprisal Effects in Reading?</title>
-      <author><first>Marten</first><last>van Schijndel</last></author>
+      <author id="marten-van-schijndel"><first>Marten</first><last>van Schijndel</last></author>
       <author><first>Tal</first><last>Linzen</last></author>
       <pages>1-7</pages>
       <doi>10.7275/qtbb-9d05</doi>
@@ -56,7 +56,7 @@
       <title>Modeling Clausal Complementation for a Grammar Engineering Resource</title>
       <author><first>Olga</first><last>Zamaraeva</last></author>
       <author><first>Kristen</first><last>Howell</last></author>
-      <author><first>Emily M.</first><last>Bender</last></author>
+      <author id="emily-m-bender"><first>Emily M.</first><last>Bender</last></author>
       <pages>39-49</pages>
       <doi>10.7275/dygn-c796</doi>
       <url hash="f8d3d0e2">W19-0105</url>
@@ -65,7 +65,7 @@
     <paper id="6">
       <title>Do <fixed-case>RNN</fixed-case>s learn human-like abstract word order preferences?</title>
       <author><first>Richard</first><last>Futrell</last></author>
-      <author><first>Roger P.</first><last>Levy</last></author>
+      <author id="roger-levy"><first>Roger P.</first><last>Levy</last></author>
       <pages>50-59</pages>
       <doi>10.7275/jb34-9986</doi>
       <url hash="58be6481">W19-0106</url>
@@ -81,7 +81,7 @@
     </paper>
     <paper id="8">
       <title>Constraint breeding during on-line incremental learning</title>
-      <author><first>Elliot</first><last>Moreton</last></author>
+      <author id="elliott-moreton"><first>Elliot</first><last>Moreton</last></author>
       <pages>69-80</pages>
       <doi>10.7275/6f9x-6411</doi>
       <url hash="63f81efe">W19-0108</url>
@@ -90,7 +90,7 @@
     <paper id="9">
       <title>An Incremental Iterated Response Model of Pragmatics</title>
       <author><first>Reuben</first><last>Cohn-Gordon</last></author>
-      <author><first>Noah</first><last>Goodman</last></author>
+      <author id="noah-goodman"><first>Noah</first><last>Goodman</last></author>
       <author><first>Christopher</first><last>Potts</last></author>
       <pages>81-90</pages>
       <doi>10.7275/cprc-8x17</doi>
@@ -152,7 +152,7 @@
     </paper>
     <paper id="16">
       <title>Using Sentiment Induction to Understand Variation in Gendered Online Communities</title>
-      <author><first>Li</first><last>Lucy</last></author>
+      <author id="li-lucy"><first>Li</first><last>Lucy</last></author>
       <author><first>Julia</first><last>Mendelsohn</last></author>
       <pages>156-166</pages>
       <doi>10.7275/11wq-ep51</doi>
@@ -161,7 +161,7 @@
     </paper>
     <paper id="17">
       <title>On the difficulty of a distributional semantics of spoken language</title>
-      <author><first>Grzegorz</first><last>Chrupała</last></author>
+      <author id="grzegorz-chrupala"><first>Grzegorz</first><last>Chrupała</last></author>
       <author><first>Lieke</first><last>Gelderloos</last></author>
       <author><first>Ákos</first><last>Kádár</last></author>
       <author><first>Afra</first><last>Alishahi</last></author>
@@ -183,7 +183,7 @@
     <paper id="19">
       <title>Guess Who’s Coming (and Who’s Going): Bringing Perspective to the Rational Speech Acts Framework</title>
       <author><first>Carolyn Jane</first><last>Anderson</last></author>
-      <author><first>Brian W.</first><last>Dillon</last></author>
+      <author id="brian-w-dillon"><first>Brian W.</first><last>Dillon</last></author>
       <pages>185-194</pages>
       <doi>10.7275/9bn3-8x38</doi>
       <url hash="c4d1ded4">W19-0119</url>
@@ -210,7 +210,7 @@
       <title>Modeling the Acquisition of Words with Multiple Meanings</title>
       <author><first>Libby</first><last>Barak</last></author>
       <author><first>Sammy</first><last>Floyd</last></author>
-      <author><first>Adele</first><last>Goldberg</last></author>
+      <author id="adele-goldberg"><first>Adele</first><last>Goldberg</last></author>
       <pages>216-225</pages>
       <doi>10.7275/tr21-m273</doi>
       <url hash="ff464bca">W19-0122</url>
@@ -218,7 +218,7 @@
     </paper>
     <paper id="23">
       <title>Evaluation Order Effects in Dynamic Continuized <fixed-case>CCG</fixed-case>: From Negative Polarity Items to Balanced Punctuation</title>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <pages>226-235</pages>
       <doi>10.7275/kpch-rk05</doi>
       <url hash="d1c525a4">W19-0123</url>
@@ -226,10 +226,10 @@
     </paper>
     <paper id="24">
       <title><fixed-case>A</fixed-case>bstract <fixed-case>M</fixed-case>eaning <fixed-case>R</fixed-case>epresentation for Human-Robot Dialogue</title>
-      <author><first>Claire N.</first><last>Bonial</last></author>
+      <author id="claire-bonial"><first>Claire N.</first><last>Bonial</last></author>
       <author><first>Lucia</first><last>Donatelli</last></author>
       <author><first>Jessica</first><last>Ervin</last></author>
-      <author><first>Clare R.</first><last>Voss</last></author>
+      <author id="clare-voss"><first>Clare R.</first><last>Voss</last></author>
       <pages>236-246</pages>
       <doi>10.7275/v3c5-yd35</doi>
       <url hash="4659169f">W19-0124</url>
@@ -267,7 +267,7 @@
       <title>On Evaluating the Generalization of <fixed-case>LSTM</fixed-case> Models in Formal Languages</title>
       <author><first>Mirac</first><last>Suzgun</last></author>
       <author><first>Yonatan</first><last>Belinkov</last></author>
-      <author><first>Stuart M.</first><last>Shieber</last></author>
+      <author id="stuart-m-shieber"><first>Stuart M.</first><last>Shieber</last></author>
       <pages>277-286</pages>
       <doi>10.7275/s02b-4d91</doi>
       <url hash="cf4fee51">W19-0128</url>
@@ -275,10 +275,10 @@
     </paper>
     <paper id="29">
       <title>Verb Argument Structure Alternations in Word and Sentence Embeddings</title>
-      <author><first>Katharina</first><last>Kann</last></author>
+      <author id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></author>
       <author><first>Alex</first><last>Warstadt</last></author>
       <author><first>Adina</first><last>Williams</last></author>
-      <author><first>Samuel R.</first><last>Bowman</last></author>
+      <author id="samuel-bowman"><first>Samuel R.</first><last>Bowman</last></author>
       <pages>287-297</pages>
       <doi>10.7275/q5js-4y86</doi>
       <url hash="4fd2696f">W19-0129</url>
@@ -289,9 +289,9 @@
     <meta>
       <booktitle>Proceedings of the Fifth International Workshop on Computational Linguistics for Uralic Languages</booktitle>
       <url hash="57ba8ec6">W19-03</url>
-      <editor><first>Tommi A.</first><last>Pirinen</last></editor>
-      <editor><first>Heiki-Jaan</first><last>Kaalep</last></editor>
-      <editor><first>Francis M.</first><last>Tyers</last></editor>
+      <editor id="tommi-a-pirinen"><first>Tommi A.</first><last>Pirinen</last></editor>
+      <editor id="heiki-jaan-kaalep"><first>Heiki-Jaan</first><last>Kaalep</last></editor>
+      <editor id="francis-tyers"><first>Francis M.</first><last>Tyers</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Tartu, Estonia</address>
       <month>January</month>
@@ -304,7 +304,7 @@
     </frontmatter>
     <paper id="1">
       <title>Data-Driven Morphological Analysis for <fixed-case>U</fixed-case>ralic Languages</title>
-      <author><first>Miikka</first><last>Silfverberg</last></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last></author>
       <author><first>Francis</first><last>Tyers</last></author>
       <pages>1–14</pages>
       <url hash="93b20deb">W19-0301</url>
@@ -402,7 +402,7 @@
     <paper id="12">
       <title>Is this the end? Two-step tokenization of sentence boundaries</title>
       <author><first>Linda</first><last>Wiechetek</last></author>
-      <author><first>Sjur Nørstebø</first><last>Moshagen</last></author>
+      <author id="sjur-moshagen"><first>Sjur Nørstebø</first><last>Moshagen</last></author>
       <author><first>Thomas</first><last>Omma</last></author>
       <pages>141–153</pages>
       <url hash="251057f2">W19-0312</url>
@@ -464,7 +464,7 @@
     <paper id="2">
       <title>A Type-coherent, Expressive Representation as an Initial Step to Language Understanding</title>
       <author><first>Gene Louis</first><last>Kim</last></author>
-      <author><first>Lenhart</first><last>Schubert</last></author>
+      <author id="lenhart-schubert"><first>Lenhart</first><last>Schubert</last></author>
       <pages>13–30</pages>
       <abstract>A growing interest in tasks involving language understanding by the NLP community has led to the need for effective semantic parsing and inference. Modern NLP systems use semantic representations that do not quite fulfill the nuanced needs for language understanding: adequately modeling language semantics, enabling general inferences, and being accurately recoverable. This document describes underspecified logical forms (ULF) for Episodic Logic (EL), which is an initial form for a semantic representation that balances these needs. ULFs fully resolve the semantic type structure while leaving issues such as quantifier scope, word sense, and anaphora unresolved; they provide a starting point for further resolution into EL, and enable certain structural inferences without further resolution. This document also presents preliminary results of creating a hand-annotated corpus of ULFs for the purpose of training a precise ULF parser, showing a three-person pairwise interannotator agreement of 0.88 on confident annotations. We hypothesize that a divide-and-conquer approach to semantic parsing starting with derivation of ULFs will lead to semantic analyses that do justice to subtle aspects of linguistic meaning, and will enable construction of more accurate semantic parsers.</abstract>
       <url hash="b41318e3">W19-0402</url>
@@ -473,7 +473,7 @@
     </paper>
     <paper id="3">
       <title>A Semantic Annotation Scheme for Quantification</title>
-      <author><first>Harry</first><last>Bunt</last></author>
+      <author id="harry-bunt"><first>Harry</first><last>Bunt</last></author>
       <pages>31–42</pages>
       <abstract>This paper describes in brief the proposal called ‘QuantML’ which was accepted by the International Organisation for Standards (ISO) last February as a starting point for developing a standard for the interoperable annotation of quantification phenomena in natural language, as part of the ISO 24617 Semantic Annotation Framework. The proposal, firmly rooted in the theory of generalised quantifiers, neo-Davidsonian semantics, and DRT, covers a wide range of quantification phenomena. The QuantML scheme consists of (1) an abstract syntax which defines ‘annotation structures’ as triples and other set-theoretic constructs; (b) a compositional semantics of annotation structures; (3) an XML representation of annotation structures.</abstract>
       <url hash="0ceab1b5">W19-0403</url>
@@ -503,7 +503,7 @@
     </paper>
     <paper id="6">
       <title>A Semantic Ontology of <fixed-case>D</fixed-case>anish Adjectives</title>
-      <author><first>Eckhard</first><last>Bick</last></author>
+      <author id="eckhard-bick"><first>Eckhard</first><last>Bick</last></author>
       <pages>71–78</pages>
       <abstract>This paper presents a semantic annotation scheme for Danish adjectives, focusing both on prototypical semantic content and semantic collocational restrictions on an adjective’s head noun. The core type set comprises about 110 categories ordered in a shallow hierarchy with 14 primary and 25 secondary umbrella categories. In addition, domain information and binary sentiment tags are provided, as well as VerbNet-derived frames and semantic roles for those adjectives governing arguments. The scheme has been almost fully implemented on the lexicon of the Danish VISL parser, DanGram, containing 14,000 adjectives. We discuss the annotation scheme and its applicational perspectives, and present a statistical breakdown and coverage evaluation for three Danish reference corpora.</abstract>
       <url hash="bd69b6af">W19-0406</url>
@@ -537,7 +537,7 @@
       <title>Temporal and Aspectual Entailment</title>
       <author><first>Thomas</first><last>Kober</last></author>
       <author><first>Sander</first><last>Bijl de Vroe</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>103–119</pages>
       <abstract>Inferences regarding “Jane’s arrival in London” from predications such as “Jane is going to London” or “Jane has gone to London” depend on tense and aspect of the predications. Tense determines the temporal location of the predication in the past, present or future of the time of utterance. The aspectual auxiliaries on the other hand specify the internal constituency of the event, i.e. whether the event of “going to London” is completed and whether its consequences hold at that time or not. While tense and aspect are among the most important factors for determining natural language inference, there has been very little work to show whether modern embedding models capture these semantic concepts. In this paper we propose a novel entailment dataset and analyse the ability of contextualised word representations to perform inference on predications across aspectual types and tenses. We show that they encode a substantial amount of information relating to tense and aspect, but fail to consistently model inferences that require reasoning with these semantic properties.</abstract>
       <url hash="7421d30a">W19-0409</url>
@@ -547,7 +547,7 @@
     <paper id="10">
       <title>Don’t Blame Distributional Semantics if it can’t do Entailment</title>
       <author><first>Matthijs</first><last>Westera</last></author>
-      <author><first>Gemma</first><last>Boleda</last></author>
+      <author id="gemma-boleda"><first>Gemma</first><last>Boleda</last></author>
       <pages>120–133</pages>
       <abstract>Distributional semantics has had enormous empirical success in Computational Linguistics and Cognitive Science in modeling various semantic phenomena, such as semantic similarity, and distributional models are widely used in state-of-the-art Natural Language Processing systems. However, the theoretical status of distributional semantics within a broader theory of language and cognition is still unclear: What does distributional semantics model? Can it be, on its own, a fully adequate model of the meanings of linguistic expressions? The standard answer is that distributional semantics is not fully adequate in this regard, because it falls short on some of the central aspects of formal semantic approaches: truth conditions, entailment, reference, and certain aspects of compositionality. We argue that this standard answer rests on a misconception: These aspects do not belong in a theory of expression meaning, they are instead aspects of speaker meaning, i.e., communicative intentions in a particular context. In a slogan: words do not refer, speakers do. Clearing this up enables us to argue that distributional semantics on its own is an adequate model of expression meaning. Our proposal sheds light on the role of distributional semantics in a broader theory of language and cognition, its relationship to formal semantics, and its place in computational models.</abstract>
       <url hash="34052f75">W19-0410</url>
@@ -556,7 +556,7 @@
     </paper>
     <paper id="11">
       <title>Ambiguity in Explicit Discourse Connectives</title>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <author><first>Rashmi</first><last>Prasad</last></author>
       <author><first>Alan</first><last>Lee</last></author>
       <pages>134–141</pages>
@@ -569,7 +569,7 @@
       <title>Aligning Open <fixed-case>IE</fixed-case> Relations and <fixed-case>KB</fixed-case> Relations using a <fixed-case>S</fixed-case>iamese Network Based on Word Embedding</title>
       <author><first>Rifki Afina</first><last>Putri</last></author>
       <author><first>Giwon</first><last>Hong</last></author>
-      <author><first>Sung-Hyon</first><last>Myaeng</last></author>
+      <author id="sung-hyon-myaeng"><first>Sung-Hyon</first><last>Myaeng</last></author>
       <pages>142–153</pages>
       <abstract>Open Information Extraction (Open IE) aims at generating entity-relation-entity triples from a large amount of text, aiming at capturing key semantics of the text. Given a triple, the relation expresses the type of semantic relation between the entities. Although relations from an Open IE system are more extensible than those used in a traditional Information Extraction system and a Knowledge Base (KB) such as Knowledge Graphs, the former lacks in semantics; an Open IE relation is simply a sequence of words, whereas a KB relation has a predefined meaning. As a way to provide a meaning to an Open IE relation, we attempt to align it with one of the predefined set of relations used in a KB. Our approach is to use a Siamese network that compares two sequences of word embeddings representing an Open IE relation and a predefined KB relation. In order to make the approach practical, we automatically generate a training dataset using a distant supervision approach instead of relying on a hand-labeled dataset. Our experiment shows that the proposed method can capture the relational semantics better than the recent approaches.</abstract>
       <url hash="0b875732">W19-0412</url>
@@ -581,8 +581,8 @@
       <author><first>Md Shad</first><last>Akhtar</last></author>
       <author><first>Abhishek</first><last>Kumar</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>154–164</pages>
       <abstract>In this paper, we propose a language-agnostic deep neural network architecture for aspect-based sentiment analysis. The proposed approach is based on Bidirectional Long Short-Term Memory (Bi-LSTM) network, which is further assisted with extra hand-crafted features. We define three different architectures for the successful combination of word embeddings and hand-crafted features. We evaluate the proposed approach for six languages (i.e. English, Spanish, French, Dutch, German and Hindi) and two problems (i.e. aspect term extraction and aspect sentiment classification). Experiments show that the proposed model attains state-of-the-art performance in most of the settings.</abstract>
       <url hash="435bc3ed">W19-0413</url>
@@ -623,7 +623,7 @@
       <title>Cross-Lingual Transfer of Semantic Roles: From Raw Text to Semantic Roles</title>
       <author><first>Maryam</first><last>Aminian</last></author>
       <author><first>Mohammad Sadegh</first><last>Rasooli</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>200–210</pages>
       <abstract>We describe a transfer method based on annotation projection to develop a dependency-based semantic role labeling system for languages for which no supervised linguistic information other than parallel data is available. Unlike previous work that presumes the availability of supervised features such as lemmas, part-of-speech tags, and dependency parse trees, we only make use of word and character features. Our deep model considers using character-based representations as well as unsupervised stem embeddings to alleviate the need for supervised features. Our experiments outperform a state-of-the-art method that uses supervised lexico-syntactic features on 6 out of 7 languages in the Universal Proposition Bank.</abstract>
       <url hash="26f68cb0">W19-0417</url>
@@ -634,8 +634,8 @@
       <title>Evaluating the Representational Hub of Language and Vision Models</title>
       <author><first>Ravi</first><last>Shekhar</last></author>
       <author><first>Ece</first><last>Takmaz</last></author>
-      <author><first>Raquel</first><last>Fernández</last></author>
-      <author><first>Raffaella</first><last>Bernardi</last></author>
+      <author id="raquel-fernandez"><first>Raquel</first><last>Fernández</last></author>
+      <author id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last></author>
       <pages>211–222</pages>
       <abstract>The multimodal models used in the emerging field at the intersection of computational linguistics and computer vision implement the bottom-up processing of the “Hub and Spoke” architecture proposed in cognitive science to represent how the brain processes and combines multi-sensory inputs. In particular, the Hub is implemented as a neural network encoder. We investigate the effect on this encoder of various vision-and-language tasks proposed in the literature: visual question answering, visual reference resolution, and visually grounded dialogue. To measure the quality of the representations learned by the encoder, we use two kinds of analyses. First, we evaluate the encoder pre-trained on the different vision-and-language tasks on an existing “diagnostic task” designed to assess multimodal semantic understanding. Second, we carry out a battery of analyses aimed at studying how the encoder merges and exploits the two modalities.</abstract>
       <url hash="b995aa04">W19-0418</url>
@@ -647,7 +647,7 @@
       <author><first>Rezka</first><last>Leonandya</last></author>
       <author><first>Dieuwke</first><last>Hupkes</last></author>
       <author><first>Elia</first><last>Bruni</last></author>
-      <author><first>Germán</first><last>Kruszewski</last></author>
+      <author id="german-kruszewski"><first>Germán</first><last>Kruszewski</last></author>
       <pages>223–234</pages>
       <abstract>Learning to follow human instructions is a long-pursued goal in artificial intelligence. The task becomes particularly challenging if no prior knowledge of the employed language is assumed while relying only on a handful of examples to learn from. Work in the past has relied on hand-coded components or manually engineered features to provide strong inductive biases that make learning in such situations possible. In contrast, here we seek to establish whether this knowledge can be acquired automatically by a neural network system through a two phase training procedure: A (slow) offline learning stage where the network learns about the general structure of the task and a (fast) online adaptation phase where the network learns the language of a new given speaker. Controlled experiments show that when the network is exposed to familiar instructions but containing novel words, the model adapts very efficiently to the new vocabulary. Moreover, even for human speakers whose language usage can depart significantly from our artificial training language, our network can still make use of its automatically acquired inductive bias to learn to follow instructions more effectively.</abstract>
       <url hash="9166a81a">W19-0419</url>
@@ -680,8 +680,8 @@
     <paper id="22">
       <title>Using <fixed-case>W</fixed-case>iktionary as a resource for <fixed-case>WSD</fixed-case> : the case of <fixed-case>F</fixed-case>rench verbs</title>
       <author><first>Vincent</first><last>Segonne</last></author>
-      <author><first>Marie</first><last>Candito</last></author>
-      <author><first>Benoît</first><last>Crabbé</last></author>
+      <author id="marie-candito"><first>Marie</first><last>Candito</last></author>
+      <author id="benoit-crabbe"><first>Benoît</first><last>Crabbé</last></author>
       <pages>259–270</pages>
       <abstract>As opposed to word sense induction, word sense disambiguation (WSD) has the advantage of us-ing interpretable senses, but requires annotated data, which are quite rare for most languages except English (Miller et al. 1993; Fellbaum, 1998). In this paper, we investigate which strategy to adopt to achieve WSD for languages lacking data that was annotated specifically for the task, focusing on the particular case of verb disambiguation in French. We first study the usability of Eurosense (Bovi et al. 2017) , a multilingual corpus extracted from Europarl (Kohen, 2005) and automatically annotated with BabelNet (Navigli and Ponzetto, 2010) senses. Such a resource opened up the way to supervised and semi-supervised WSD for resourceless languages like French. While this perspective looked promising, our evaluation on French verbs was inconclusive and showed the annotated senses’ quality was not sufficient for supervised WSD on French verbs. Instead, we propose to use Wiktionary, a collaboratively edited, multilingual online dictionary, as a resource for WSD. Wiktionary provides both sense inventory and manually sense tagged examples which can be used to train supervised and semi-supervised WSD systems. Yet, because senses’ distribution differ in lexicographic examples found in Wiktionary with respect to natural text, we then focus on studying the impact on WSD of the training data size and senses’ distribution. Using state-of-the art semi-supervised systems, we report experiments of Wiktionary-based WSD for French verbs, evaluated on FrenchSemEval (FSE), a new dataset of French verbs manually annotated with wiktionary senses.</abstract>
       <url hash="b6100c85">W19-0422</url>
@@ -690,7 +690,7 @@
     </paper>
     <paper id="23">
       <title>A Comparison of Context-sensitive Models for Lexical Substitution</title>
-      <author><first>Aina</first><last>Garí Soler</last></author>
+      <author id="aina-gari-soler"><first>Aina</first><last>Garí Soler</last></author>
       <author><first>Anne</first><last>Cocos</last></author>
       <author><first>Marianna</first><last>Apidianaki</last></author>
       <author><first>Chris</first><last>Callison-Burch</last></author>
@@ -712,7 +712,7 @@
     <paper id="25">
       <title>Frame Identification as Categorization: Exemplars vs Prototypes in Embeddingland</title>
       <author><first>Jennifer</first><last>Sikos</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <pages>295–306</pages>
       <abstract>Categorization is a central capability of human cognition, and a number of theories have been developed to account for properties of categorization. Even though many tasks in semantics also involve categorization of some kind, theories of categorization do not play a major role in contemporary research in computational linguistics. This paper follows the idea that embedding-based models of semantics lend themselves well to being formulated in terms of classical categorization theories. The benefit is a space of model families that enables (a) the formulation of hypotheses about the impact of major design decisions, and (b) a transparent assessment of these decisions. We instantiate this idea on the task of frame-semantic frame identification. We define four models that cross two design variables: (a) the choice of prototype vs. exemplar categorization, corresponding to different degrees of generalization applied to the input; and (b) the presence vs. absence of a fine-tuning step, corresponding to generic vs. task-adaptive categorization. We find that for frame identification, generalization and task-adaptive categorization both yield substantial benefits. Our prototype-based, fine-tuned model, which combines the best choices for these variables, establishes a new state of the art in frame identification.</abstract>
       <url hash="3b7267e6">W19-0425</url>
@@ -761,7 +761,7 @@
     <paper id="3">
       <title>Distributional Semantics in the Real World: Building Word Vector Representations from a Truth-Theoretic Model</title>
       <author><first>Elizaveta</first><last>Kuzmenko</last></author>
-      <author><first>Aurélie</first><last>Herbelot</last></author>
+      <author id="aurelie-herbelot"><first>Aurélie</first><last>Herbelot</last></author>
       <pages>16–23</pages>
       <abstract>Distributional semantics models (DSMs) are known to produce excellent representations of word meaning, which correlate with a range of behavioural data. As lexical representations, they have been said to be fundamentally different from truth-theoretic models of semantics, where meaning is defined as a correspondence relation to the world. There are two main aspects to this difference: a) DSMs are built over corpus data which may or may not reflect ‘what is in the world’; b) they are built from word co-occurrences, that is, from lexical types rather than entities and sets. In this paper, we inspect the properties of a distributional model built over a set-theoretic approximation of ‘the real world’. To achieve this, we take the annotation a large database of images marked with objects, attributes and relations, convert the data into a representation akin to first-order logic and build several distributional models using various combinations of features. We evaluate those models over both relatedness and similarity datasets, demonstrating their effectiveness in standard evaluations. This allows us to conclude that, despite prior claims, truth-theoretic models are good candidates for building graded lexical representations of meaning.</abstract>
       <url hash="cdfd4fc5">W19-0503</url>
@@ -793,7 +793,7 @@
     <paper id="6">
       <title>Distributional Interaction of Concreteness and Abstractness in Verb–Noun Subcategorisation</title>
       <author><first>Diego</first><last>Frassinelli</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>38–43</pages>
       <abstract>In recent years, both cognitive and computational research has provided empirical analyses of contextual co-occurrence of concrete and abstract words, partially resulting in inconsistent pictures. In this work we provide a more fine-grained description of the distributional nature in the corpus-based interaction of verbs and nouns within subcategorisation, by investigating the concreteness of verbs and nouns that are in a specific syntactic relationship with each other, i.e., subject, direct object, and prepositional object. Overall, our experiments show consistent patterns in the distributional representation of subcategorising and subcategorised concrete and abstract words. At the same time, the studies reveal empirical evidence why contextual abstractness represents a valuable indicator for automatic non-literal language identification.</abstract>
       <url hash="eb0ec29b">W19-0506</url>
@@ -803,7 +803,7 @@
     <paper id="7">
       <title>Generating a Novel Dataset of Multimodal Referring Expressions</title>
       <author><first>Nikhil</first><last>Krishnaswamy</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <pages>44–51</pages>
       <abstract>Referring expressions and definite descriptions of objects in space exploit information both about object characteristics and locations. To resolve potential ambiguity, referencing strategies in language can rely on increasingly abstract concepts to distinguish an object in a given location from similar ones elsewhere, yet the description of the intended location may still be imprecise or difficult to interpret. Meanwhile, modalities such as gesture may communicate spatial information such as locations in a more concise manner. In real peer-to-peer communication, humans use language and gesture together to reference entities, with a capacity for mixing and changing modalities where needed. While recent progress in AI and human-computer interaction has created systems where a human can interact with a computer multimodally, computers often lack the capacity to intelligently mix modalities when generating referring expressions. We present a novel dataset of referring expressions combining natural language and gesture, describe its creation and evaluation, and its uses to train computational models for generating and interpreting multimodal referring expressions.</abstract>
       <url hash="0578f6a6">W19-0507</url>
@@ -864,7 +864,7 @@
     <paper id="1">
       <title>A Dynamic Semantics for Causal Counterfactuals</title>
       <author><first>Kenneth</first><last>Lai</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <pages>1–8</pages>
       <abstract>Under the standard approach to counterfactuals, to determine the meaning of a counterfactual sentence, we consider the “closest” possible world(s) where the antecedent is true, and evaluate the consequent. Building on the standard approach, some researchers have found that the set of worlds to be considered is dependent on context; it evolves with the discourse. Others have focused on how to define the “distance” between possible worlds, using ideas from causal modeling. This paper integrates the two ideas. We present a semantics for counterfactuals that uses a distance measure based on causal laws, that can also change over time. We show how our semantics can be implemented in the Haskell programming language.</abstract>
       <url hash="7afedb96">W19-0601</url>
@@ -914,7 +914,7 @@
     <paper id="6">
       <title>Semantic Frame Embeddings for Detecting Relations between Software Requirements</title>
       <author><first>Waad</first><last>Alhoshan</last></author>
-      <author><first>Riza</first><last>Batista-Navarro</last></author>
+      <author id="riza-theresa-batista-navarro"><first>Riza</first><last>Batista-Navarro</last></author>
       <author><first>Liping</first><last>Zhao</last></author>
       <pages>44–51</pages>
       <abstract>The early phases of requirements engineering (RE) deal with a vast amount of software requirements (i.e., requirements that define characteristics of software systems), which are typically expressed in natural language. Analysing such unstructured requirements, usually obtained from users’ inputs, is considered a challenging task due to the inherent ambiguity and inconsistency of natural language. To support such a task, methods based on natural language processing (NLP) can be employed. One of the more recent advances in NLP is the use of word embeddings for capturing contextual information, which can then be applied in word analogy tasks. In this paper, we describe a new resource, i.e., embedding-based representations of semantic frames in FrameNet, which was developed to support the detection of relations between software requirements. Our embeddings, which encapsulate contextual information at the semantic frame level, were trained on a large corpus of requirements (i.e., a collection of more than three million mobile application reviews). The similarity between these frame embeddings is then used as a basis for detecting semantic relatedness between software requirements. Compared with existing resources underpinned by word-level embeddings alone, and frame embeddings built upon pre-trained vectors, our proposed frame embeddings obtained better performance against judgements of an RE expert. These encouraging results demonstrate the strong potential of the resource in supporting RE analysis tasks (e.g., traceability), which we plan to investigate as part of our future work.</abstract>
@@ -955,7 +955,7 @@
       <title>Assessing the Difficulty of Classifying <fixed-case>C</fixed-case>oncept<fixed-case>N</fixed-case>et Relations in a Multi-Label Classification Setting</title>
       <author><first>Maria</first><last>Becker</last></author>
       <author><first>Michael</first><last>Staniek</last></author>
-      <author><first>Vivi</first><last>Nastase</last></author>
+      <author id="vivi-nastase"><first>Vivi</first><last>Nastase</last></author>
       <author><first>Anette</first><last>Frank</last></author>
       <abstract>Commonsense knowledge relations are crucial for advanced NLU tasks. We examine the learnability of such relations as represented in ConceptNet, taking into account their specific properties, which can make relation classification difficult: a given concept pair can be linked by multiple relation types, and relations can have multi-word arguments of diverse semantic types. We explore a neural open world multi-label classification approach that focuses on the evaluation of classification accuracy for individual relations. Based on an in-depth study of the specific properties of the ConceptNet resource, we investigate the impact of different relation representations and model variations. Our analysis reveals that the complexity of argument types and relation ambiguity are the most important challenges to address. We design a customized evaluation method to address the incompleteness of the resource that can be expanded in future work.</abstract>
       <url hash="cd3060a4">W19-0801</url>
@@ -985,7 +985,7 @@
     </paper>
     <paper id="4">
       <title>Semantic Matching of Documents from Heterogeneous Collections: A Simple and Transparent Method for Practical Applications</title>
-      <author><first>Mark-Christoph</first><last>Mueller</last></author>
+      <author id="mark-christoph-muller"><first>Mark-Christoph</first><last>Mueller</last></author>
       <abstract>We present a very simple, unsupervised method for the pairwise matching of documents from heterogeneous collections. We demonstrate our method with the Concept-Project matching task, which is a binary classification task involving pairs of documents from heterogeneous collections. Although our method only employs standard resources without any domain- or task-specific modifications, it clearly outperforms the more complex system of the original authors. In addition, our method is transparent, because it provides explicit information about how a similarity score was computed, and efficient, because it is based on the aggregation of (pre-computable) word-level similarities.</abstract>
       <url hash="0a2aa0a2">W19-0804</url>
       <doi>10.18653/v1/W19-0804</doi>
@@ -996,12 +996,12 @@
     <meta>
       <booktitle>Proceedings of the <fixed-case>IWCS</fixed-case> Workshop Vector Semantics for Discourse and Dialogue</booktitle>
       <url hash="21dfd3bd">W19-09</url>
-      <editor><first>Mehrnoosh</first><last>Sadrzadeh</last></editor>
+      <editor id="mehrnoosh-sadrzadeh"><first>Mehrnoosh</first><last>Sadrzadeh</last></editor>
       <editor><first>Matthew</first><last>Purver</last></editor>
       <editor><first>Arash</first><last>Eshghi</last></editor>
       <editor><first>Julian</first><last>Hough</last></editor>
       <editor><first>Ruth</first><last>Kempson</last></editor>
-      <editor><first>Patrick G. T.</first><last>Healey</last></editor>
+      <editor id="patrick-healey"><first>Patrick G. T.</first><last>Healey</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Gothenburg, Sweden</address>
       <month>May</month>
@@ -1108,8 +1108,8 @@
       <booktitle>Proceedings of the Sixth Workshop on Natural Language and Computer Science</booktitle>
       <url hash="1a3c1474">W19-11</url>
       <editor><first>Robin</first><last>Cooper</last></editor>
-      <editor><first>Valeria</first><last>de Paiva</last></editor>
-      <editor><first>Lawrence S.</first><last>Moss</last></editor>
+      <editor id="valeria-de-paiva"><first>Valeria</first><last>de Paiva</last></editor>
+      <editor id="lawrence-s-moss"><first>Lawrence S.</first><last>Moss</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Gothenburg, Sweden</address>
       <month>May</month>
@@ -1136,7 +1136,7 @@
       <title>Towards Natural Language Story Understanding with Rich Logical Schemas</title>
       <author><first>Lane</first><last>Lawley</last></author>
       <author><first>Gene Louis</first><last>Kim</last></author>
-      <author><first>Lenhart</first><last>Schubert</last></author>
+      <author id="lenhart-schubert"><first>Lenhart</first><last>Schubert</last></author>
       <pages>11–22</pages>
       <abstract>Generating “commonsense’’ knowledge for intelligent understanding and reasoning is a difficult, long-standing problem, whose scale challenges the capacity of any approach driven primarily by human input. Furthermore, approaches based on mining statistically repetitive patterns fail to produce the rich representations humans acquire, and fall far short of human efficiency in inducing knowledge from text. The idea of our approach to this problem is to provide a learning system with a “head start” consisting of a semantic parser, some basic ontological knowledge, and most importantly, a small set of very general schemas about the kinds of patterns of events (often purposive, causal, or socially conventional) that even a one- or two-year-old could reasonably be presumed to possess. We match these initial schemas to simple children’s stories, obtaining concrete instances, and combining and abstracting these into new candidate schemas. Both the initial and generated schemas are specified using a rich, expressive logical form. While modern approaches to schema reasoning often only use slot-and-filler structures, this logical form allows us to specify complex relations and constraints over the slots. Though formal, the representations are language-like, and as such readily relatable to NL text. The agents, objects, and other roles in the schemas are represented by typed variables, and the event variables can be related through partial temporal ordering and causal relations. To match natural language stories with existing schemas, we first parse the stories into an underspecified variant of the logical form used by the schemas, which is suitable for most concrete stories. We include a walkthrough of matching a children’s story to these schemas and generating inferences from these matches.</abstract>
       <url hash="ffc9b833">W19-1102</url>
@@ -1204,7 +1204,7 @@
     <paper id="3">
       <title>Discourse Representation Structure Parsing with Recurrent Neural Networks and the Transformer Model</title>
       <author><first>Jiangming</first><last>Liu</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <author><first>Mirella</first><last>Lapata</last></author>
       <abstract>We describe the systems we developed for Discourse Representation Structure (DRS) parsing as part of the IWCS-2019 Shared Task of DRS Parsing.1 Our systems are based on sequence-to-sequence modeling. To implement our model, we use the open-source neural machine translation system implemented in PyTorch, OpenNMT-py. We experimented with a variety of encoder-decoder models based on recurrent neural networks and the Transformer model. We conduct experiments on the standard benchmark of the Parallel Meaning Bank (PMB 2.2). Our best system achieves a score of 84.8% F1 in the DRS parsing shared task.</abstract>
       <url hash="09a36943">W19-1203</url>
@@ -1224,11 +1224,11 @@
     <meta>
       <booktitle>Proceedings of the Tenth Workshop on Computational Approaches to Subjectivity, Sentiment and Social Media Analysis</booktitle>
       <url hash="5d635f79">W19-13</url>
-      <editor><first>Alexandra</first><last>Balahur</last></editor>
+      <editor id="alexandra-balahur"><first>Alexandra</first><last>Balahur</last></editor>
       <editor><first>Roman</first><last>Klinger</last></editor>
-      <editor><first>Veronique</first><last>Hoste</last></editor>
+      <editor id="veronique-hoste"><first>Veronique</first><last>Hoste</last></editor>
       <editor><first>Carlo</first><last>Strapparava</last></editor>
-      <editor><first>Orphee</first><last>De Clercq</last></editor>
+      <editor id="orphee-de-clercq"><first>Orphee</first><last>De Clercq</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Minneapolis, USA</address>
       <month>June</month>
@@ -1256,7 +1256,7 @@
       <author><first>Da</first><last>Yin</last></author>
       <author><first>Xiao</first><last>Liu</last></author>
       <author><first>Xiuyu</first><last>Wu</last></author>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <pages>6–15</pages>
       <abstract>In this paper, we propose a soft label approach to target-level sentiment classification task, in which a history-based soft labeling model is proposed to measure the possibility of a context word as an opinion word. We also apply a convolution layer to extract local active features, and introduce positional weights to take relative distance information into consideration. In addition, we obtain more informative target representation by training with context tokens together to make deeper interaction between target and context tokens. We conduct experiments on SemEval 2014 datasets and the experimental results show that our approach significantly outperforms previous models and gives state-of-the-art results on these datasets.</abstract>
       <url hash="6a5d7300">W19-1302</url>
@@ -1276,7 +1276,7 @@
     </paper>
     <paper id="4">
       <title>Exploring Fine-Tuned Embeddings that Model Intensifiers for Emotion Analysis</title>
-      <author><first>Laura Ana Maria</first><last>Bostan</last></author>
+      <author id="laura-ana-maria-oberlander"><first>Laura Ana Maria</first><last>Bostan</last></author>
       <author><first>Roman</first><last>Klinger</last></author>
       <pages>25–34</pages>
       <abstract>Adjective phrases like “a little bit surprised”, “completely shocked”, or “not stunned at all” are not handled properly by current state-of-the-art emotion classification and intensity prediction systems. Based on this finding, we analyze differences between embeddings used by these systems in regard to their capability of handling such cases and argue that intensifiers in context of emotion words need special treatment, as is established for sentiment polarity classification, but not for more fine-grained emotion prediction. To resolve this issue, we analyze different aspects of a post-processing pipeline which enriches the word representations of such phrases. This includes expansion of semantic spaces at the phrase level and sub-word level followed by retrofitting to emotion lexicons. We evaluate the impact of these steps with ‘A La Carte and Bag-of-Substrings extensions based on pretrained GloVe,Word2vec, and fastText embeddings against a crowd-sourced corpus of intensity annotations for tweets containing our focus phrases. We show that the fastText-based models do not gain from handling these specific phrases under inspection. For Word2vec embeddings, we show that our post-processing pipeline improves the results by up to 8% on a novel dataset densly populated with intensifiers while it does not decrease the performance on the established EmoInt dataset.</abstract>
@@ -1321,10 +1321,10 @@
     </paper>
     <paper id="8">
       <title>How do we feel when a robot dies? Emotions expressed on <fixed-case>T</fixed-case>witter before and after hitch<fixed-case>BOT</fixed-case>’s destruction</title>
-      <author><first>Kathleen C.</first><last>Fraser</last></author>
+      <author id="kathleen-c-fraser"><first>Kathleen C.</first><last>Fraser</last></author>
       <author><first>Frauke</first><last>Zeller</last></author>
       <author><first>David Harris</first><last>Smith</last></author>
-      <author><first>Saif</first><last>Mohammad</last></author>
+      <author id="saif-mohammad"><first>Saif</first><last>Mohammad</last></author>
       <author><first>Frank</first><last>Rudzicz</last></author>
       <pages>62–71</pages>
       <abstract>In 2014, a chatty but immobile robot called hitchBOT set out to hitchhike across Canada. It similarly made its way across Germany and the Netherlands, and had begun a trip across the USA when it was destroyed by vandals. In this work, we analyze the emotions and sentiments associated with words in tweets posted before and after hitchBOT’s destruction to answer two questions: Were there any differences in the emotions expressed across the different countries visited by hitchBOT? And how did the public react to the demise of hitchBOT? Our analyses indicate that while there were few cross-cultural differences in sentiment towards hitchBOT, there was a significant negative emotional reaction to its destruction, suggesting that people had formed an emotional connection with hitchBOT and perceived its destruction as morally wrong. We discuss potential implications of anthropomorphism and emotional attachment to robots from the perspective of robot ethics.</abstract>
@@ -1338,7 +1338,7 @@
       <author><first>Lakshya</first><last>Kumar</last></author>
       <author><first>Arpan</first><last>Somani</last></author>
       <author><first>Aditya</first><last>Joshi</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>72–80</pages>
       <abstract>Research in sarcasm detection spans almost a decade. However a particular form of sarcasm remains unexplored: sarcasm expressed through numbers, which we estimate, forms about 11% of the sarcastic tweets in our dataset. The sentence ‘Love waking up at 3 am’ is sarcastic because of the number. In this paper, we focus on detecting sarcasm in tweets arising out of numbers. Initially, to get an insight into the problem, we implement a rule-based and a statistical machine learning-based (ML) classifier. The rule-based classifier conveys the crux of the numerical sarcasm problem, namely, incongruity arising out of numbers. The statistical ML classifier uncovers the indicators i.e., features of such sarcasm. The actual system in place, however, are two deep learning (DL) models, CNN and attention network that obtains an F-score of 0.93 and 0.91 on our dataset of tweets containing numbers. To the best of our knowledge, this is the first line of research investigating the phenomenon of sarcasm arising out of numbers, culminating in a detector thereof.</abstract>
       <url hash="314af0c7">W19-1309</url>
@@ -1372,10 +1372,10 @@
       <booktitle>Proceedings of the Sixth Workshop on <fixed-case>NLP</fixed-case> for Similar Languages, Varieties and Dialects</booktitle>
       <url hash="eeff0e97">W19-14</url>
       <editor><first>Marcos</first><last>Zampieri</last></editor>
-      <editor><first>Preslav</first><last>Nakov</last></editor>
-      <editor><first>Shervin</first><last>Malmasi</last></editor>
+      <editor id="preslav-nakov"><first>Preslav</first><last>Nakov</last></editor>
+      <editor id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></editor>
       <editor><first>Nikola</first><last>Ljubešić</last></editor>
-      <editor><first>Jörg</first><last>Tiedemann</last></editor>
+      <editor id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></editor>
       <editor><first>Ahmed</first><last>Ali</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Ann Arbor, Michigan</address>
@@ -1392,14 +1392,14 @@
       <author><first>Marcos</first><last>Zampieri</last></author>
       <author><first>Shervin</first><last>Malmasi</last></author>
       <author><first>Yves</first><last>Scherrer</last></author>
-      <author><first>Tanja</first><last>Samardžić</last></author>
-      <author><first>Francis</first><last>Tyers</last></author>
-      <author><first>Miikka</first><last>Silfverberg</last></author>
-      <author><first>Natalia</first><last>Klyueva</last></author>
+      <author id="tanja-samardzic"><first>Tanja</first><last>Samardžić</last></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last></author>
+      <author id="natalia-klyueva"><first>Natalia</first><last>Klyueva</last></author>
       <author><first>Tung-Le</first><last>Pan</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <author><first>Radu Tudor</first><last>Ionescu</last></author>
-      <author><first>Andrei M.</first><last>Butnaru</last></author>
+      <author id="andrei-butnaru"><first>Andrei M.</first><last>Butnaru</last></author>
       <author><first>Tommi</first><last>Jauhiainen</last></author>
       <pages>1–16</pages>
       <abstract>In this paper, we present the findings of the Third VarDial Evaluation Campaign organized as part of the sixth edition of the workshop on Natural Language Processing (NLP) for Similar Languages, Varieties and Dialects (VarDial), co-located with NAACL 2019. This year, the campaign included five shared tasks, including one task re-run – German Dialect Identification (GDI) – and four new tasks – Cross-lingual Morphological Analysis (CMA), Discriminating between Mainland and Taiwan variation of Mandarin Chinese (DMT), Moldavian vs. Romanian Cross-dialect Topic identification (MRC), and Cuneiform Language Identification (CLI). A total of 22 teams submitted runs across the five shared tasks. After the end of the competition, we received 14 system description papers, which are published in the VarDial workshop proceedings and referred to in this report.</abstract>
@@ -1410,7 +1410,7 @@
     <paper id="2">
       <title>Improving Cuneiform Language Identification with <fixed-case>BERT</fixed-case></title>
       <author><first>Gabriel</first><last>Bernier-Colborne</last></author>
-      <author><first>Cyril</first><last>Goutte</last></author>
+      <author id="cyril-goutte"><first>Cyril</first><last>Goutte</last></author>
       <author><first>Serge</first><last>Léger</last></author>
       <pages>17–25</pages>
       <abstract>We describe the systems developed by the National Research Council Canada for the Cuneiform Language Identification (CLI) shared task at the 2019 VarDial evaluation campaign. We compare a state-of-the-art baseline relying on character n-grams and a traditional statistical classifier, a voting ensemble of classifiers, and a deep learning approach using a Transformer network. We describe how these systems were trained, and analyze the impact of some preprocessing and model estimation decisions. The deep neural network achieved 77% accuracy on the test data, which turned out to be the best performance at the CLI evaluation, establishing a new state-of-the-art for cuneiform language identification.</abstract>
@@ -1488,7 +1488,7 @@
       <author><first>Tommi</first><last>Jauhiainen</last></author>
       <author><first>Heidi</first><last>Jauhiainen</last></author>
       <author><first>Tero</first><last>Alstola</last></author>
-      <author><first>Krister</first><last>Lindén</last></author>
+      <author id="krister-linden"><first>Krister</first><last>Lindén</last></author>
       <pages>89–98</pages>
       <abstract>This article introduces a corpus of cuneiform texts from which the dataset for the use of the Cuneiform Language Identification (CLI) 2019 shared task was derived as well as some preliminary language identification experiments conducted using that corpus. We also describe the CLI dataset and how it was derived from the corpus. In addition, we provide some baseline language identification results using the CLI dataset. To the best of our knowledge, the experiments detailed here represent the first time that automatic language identification methods have been used on cuneiform data.</abstract>
       <url hash="71544f85">W19-1409</url>
@@ -1498,7 +1498,7 @@
     <paper id="10">
       <title>Leveraging Pretrained Word Embeddings for Part-of-Speech Tagging of Code Switching Data</title>
       <author><first>Fahad</first><last>AlGhamdi</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>99–109</pages>
       <abstract>Linguistic Code Switching (CS) is a phenomenon that occurs when multilingual speakers alternate between two or more languages/dialects within a single conversation. Processing CS data is especially challenging in intra-sentential data given state-of-the-art monolingual NLP technologies since such technologies are geared toward the processing of one language at a time. In this paper, we address the problem of Part-of-Speech tagging (POS) in the context of linguistic code switching (CS). We explore leveraging multiple neural network architectures to measure the impact of different pre-trained embeddings methods on POS tagging CS data. We investigate the landscape in four CS language pairs, Spanish-English, Hindi-English, Modern Standard Arabic- Egyptian Arabic dialect (MSA-EGY), and Modern Standard Arabic- Levantine Arabic dialect (MSA-LEV). Our results show that multilingual embedding (e.g., MSA-EGY and MSA-LEV) helps closely related languages (EGY/LEV) but adds noise to the languages that are distant (SPA/HIN). Finally, we show that our proposed models outperform state-of-the-art CS taggers for MSA-EGY language pair.</abstract>
       <url hash="ff479d45">W19-1410</url>
@@ -1526,7 +1526,7 @@
     </paper>
     <paper id="13">
       <title><fixed-case>BAM</fixed-case>: A combination of deep and shallow models for <fixed-case>G</fixed-case>erman Dialect Identification.</title>
-      <author><first>Andrei M.</first><last>Butnaru</last></author>
+      <author id="andrei-butnaru"><first>Andrei M.</first><last>Butnaru</last></author>
       <pages>128–137</pages>
       <abstract>*This is a submission for the Third VarDial Evaluation Campaign* In this paper, we present a machine learning approach for the German Dialect Identification (GDI) Closed Shared Task of the DSL 2019 Challenge. The proposed approach combines deep and shallow models, by applying a voting scheme on the outputs resulted from a Character-level Convolutional Neural Networks (Char-CNN), a Long Short-Term Memory (LSTM) network, and a model based on String Kernels. The first model used is the Char-CNN model that merges multiple convolutions computed with kernels of different sizes. The second model is the LSTM network which applies a global max pooling over the returned sequences over time. Both models pass the activation maps to two fully-connected layers. The final model is based on String Kernels, computed on character p-grams extracted from speech transcripts. The model combines two blended kernel functions, one is the presence bits kernel, and the other is the intersection kernel. The empirical results obtained in the shared task prove that the approach can achieve good results. The system proposed in this paper obtained the fourth place with a macro-F1 score of 62.55%</abstract>
       <url hash="59dcb274">W19-1413</url>
@@ -1592,7 +1592,7 @@
     <paper id="19">
       <title>Discriminating between <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese and <fixed-case>S</fixed-case>wiss-<fixed-case>G</fixed-case>erman varieties using adaptive language models</title>
       <author><first>Tommi</first><last>Jauhiainen</last></author>
-      <author><first>Krister</first><last>Lindén</last></author>
+      <author id="krister-linden"><first>Krister</first><last>Lindén</last></author>
       <author><first>Heidi</first><last>Jauhiainen</last></author>
       <pages>178–187</pages>
       <abstract>This paper describes the language identification systems used by the SUKI team in the Discriminating between the Mainland and Taiwan variation of Mandarin Chinese (DMT) and the German Dialect Identification (GDI) shared tasks which were held as part of the third VarDial Evaluation Campaign. The DMT shared task included two separate tracks, one for the simplified Chinese script and one for the traditional Chinese script. We submitted three runs on both tracks of the DMT task as well as on the GDI task. We won the traditional Chinese track using Naive Bayes with language model adaptation, came second on GDI with an adaptive version of the HeLI 2.0 method, and third on the simplified Chinese track using again the adaptive Naive Bayes.</abstract>
@@ -1632,7 +1632,7 @@
     </paper>
     <paper id="23">
       <title>Experiments in Cuneiform Language Identification</title>
-      <author><first>Gustavo Henrique</first><last>Paetzold</last></author>
+      <author id="gustavo-paetzold"><first>Gustavo Henrique</first><last>Paetzold</last></author>
       <author><first>Marcos</first><last>Zampieri</last></author>
       <pages>209–213</pages>
       <abstract>This paper presents methods to discriminate between languages and dialects written in Cuneiform script, one of the first writing systems in the world. We report the results obtained by the PZ team in the Cuneiform Language Identification (CLI) shared task organized within the scope of the VarDial Evaluation Campaign 2019. The task included two languages, Sumerian and Akkadian. The latter is divided into six dialects: Old Babylonian, Middle Babylonian peripheral, Standard Babylonian, Neo Babylonian, Late Babylonian, and Neo Assyrian. We approach the task using a meta-classifier trained on various SVM models and we show the effectiveness of the system for this task. Our submission achieved 0.738 F1 score in discriminating between the seven languages and dialects and it was ranked fourth in the competition among eight teams.</abstract>
@@ -1654,7 +1654,7 @@
       <title>Cross-lingual Annotation Projection Is Effective for Neural Part-of-Speech Tagging</title>
       <author><first>Matthias</first><last>Huck</last></author>
       <author><first>Diana</first><last>Dutka</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>223–233</pages>
       <abstract>We tackle the important task of part-of-speech tagging using a neural model in the zero-resource scenario, where we have no access to gold-standard POS training data. We compare this scenario with the low-resource scenario, where we have access to a small amount of gold-standard POS training data. Our experiments focus on Ukrainian as a representative of under-resourced languages. Russian is highly related to Ukrainian, so we exploit gold-standard Russian POS tags. We consider four techniques to perform Ukrainian POS tagging: zero-shot tagging and cross-lingual annotation projection (for the zero-resource scenario), and compare these with self-training and multilingual learning (for the low-resource scenario). We find that cross-lingual annotation projection works particularly well in the zero-resource scenario.</abstract>
       <url hash="946ae874">W19-1425</url>
@@ -1666,7 +1666,7 @@
     <meta>
       <booktitle>Proceedings of the Third Workshop on Structured Prediction for <fixed-case>NLP</fixed-case></booktitle>
       <url hash="6d754522">W19-15</url>
-      <editor><first>Andre</first><last>Martins</last></editor>
+      <editor id="andre-f-t-martins"><first>Andre</first><last>Martins</last></editor>
       <editor><first>Andreas</first><last>Vlachos</last></editor>
       <editor><first>Zornitsa</first><last>Kozareva</last></editor>
       <editor><first>Sujith</first><last>Ravi</last></editor>
@@ -1708,7 +1708,7 @@
       <title><fixed-case>SPARSE</fixed-case>: Structured Prediction using Argument-Relative Structured Encoding</title>
       <author><first>Rishi</first><last>Bommasani</last></author>
       <author><first>Arzoo</first><last>Katiyar</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>13–17</pages>
       <abstract>We propose structured encoding as a novel approach to learning representations for relations and events in neural structured prediction. Our approach explicitly leverages the structure of available relation and event metadata to generate these representations, which are parameterized by both the attribute structure of the metadata as well as the learned representation of the arguments of the relations and events. We consider affine, biaffine, and recurrent operators for building hierarchical representations and modelling underlying features. We apply our approach to the second-order structured prediction task studied in the 2016/2017 Belief and Sentiment analysis evaluations (BeSt): given a document and its entities, relations, and events (including metadata and mentions), determine the sentiment of each entity towards every relation and event in the document. Without task-specific knowledge sources or domain engineering, we significantly improve over systems and baselines that neglect the available metadata or its hierarchical structure. We observe across-the-board improvements on the BeSt 2016/2017 sentiment analysis task of at least 2.3 (absolute) and 10.6% (relative) F-measure over the previous state-of-the-art.</abstract>
       <url hash="6db4a934">W19-1503</url>
@@ -1721,7 +1721,7 @@
       <title>Lightly-supervised Representation Learning with Global Interpretability</title>
       <author><first>Andrew</first><last>Zupon</last></author>
       <author><first>Maria</first><last>Alexeeva</last></author>
-      <author><first>Marco A.</first><last>Valenzuela-Escárcega</last></author>
+      <author id="marco-a-valenzuela-escarcega"><first>Marco A.</first><last>Valenzuela-Escárcega</last></author>
       <author><first>Ajay</first><last>Nagesh</last></author>
       <author><first>Mihai</first><last>Surdeanu</last></author>
       <pages>18–28</pages>
@@ -1764,7 +1764,7 @@
     <paper id="1">
       <title>Corpus of Multimodal Interaction for Collaborative Planning</title>
       <author><first>Miltiadis Marios</first><last>Katsakioris</last></author>
-      <author><first>Helen</first><last>Hastie</last></author>
+      <author id="helen-hastie"><first>Helen</first><last>Hastie</last></author>
       <author><first>Ioannis</first><last>Konstas</last></author>
       <author><first>Atanas</first><last>Laskov</last></author>
       <pages>1–6</pages>
@@ -1837,7 +1837,7 @@
       <title><fixed-case>S</fixed-case>patial<fixed-case>N</fixed-case>et: A Declarative Resource for Spatial Relations</title>
       <author><first>Morgan</first><last>Ulinski</last></author>
       <author><first>Bob</first><last>Coyne</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
       <pages>61–70</pages>
       <abstract>This paper introduces SpatialNet, a novel resource which links linguistic expressions to actual spatial configurations. SpatialNet is based on FrameNet (Ruppenhofer et al., 2016) and VigNet (Coyne et al., 2011), two resources which use frame semantics to encode lexical meaning. SpatialNet uses a deep semantic representation of spatial relations to provide a formal description of how a language expresses spatial information. This formal representation of the lexical semantics of spatial language also provides a consistent way to represent spatial meaning across multiple languages. In this paper, we describe the structure of SpatialNet, with examples from English and German. We also show how SpatialNet can be combined with other existing NLP tools to create a text-to-scene system for a language.</abstract>
       <url hash="7f072af1">W19-1607</url>
@@ -1862,7 +1862,7 @@
       <url hash="7f873b79">W19-17</url>
       <editor><first>Heidi</first><last>Christensen</last><affiliation>University of Sheffield</affiliation></editor>
       <editor><first>Kristy</first><last>Hollingshead</last><affiliation>Florida Institute for Human and Machine Cognition</affiliation></editor>
-      <editor><first>Emily</first><last>Prud’hommeaux</last><affiliation>Boston College</affiliation></editor>
+      <editor id="emily-prudhommeaux"><first>Emily</first><last>Prud’hommeaux</last><affiliation>Boston College</affiliation></editor>
       <editor><first>Frank</first><last>Rudzicz</last><affiliation>University of Toronto</affiliation></editor>
       <editor><first>Keith</first><last>Vertanen</last><affiliation>Michigan Technological University</affiliation></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -1915,7 +1915,7 @@
       <title>Speech-based Estimation of Bulbar Regression in Amyotrophic Lateral Sclerosis</title>
       <author><first>Alan</first><last>Wisler</last></author>
       <author><first>Kristin</first><last>Teplansky</last></author>
-      <author><first>Jordan</first><last>Green</last></author>
+      <author id="jordan-r-green"><first>Jordan</first><last>Green</last></author>
       <author><first>Yana</first><last>Yunusova</last></author>
       <author><first>Thomas</first><last>Campbell</last></author>
       <author><first>Daragh</first><last>Heitzman</last></author>
@@ -1929,7 +1929,7 @@
     <paper id="5">
       <title>A Blissymbolics Translation System</title>
       <author><first>Usman</first><last>Sohail</last></author>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <pages>32–36</pages>
       <abstract>Blissymbolics (Bliss) is a pictographic writing system that is used by people with communication disorders. Bliss attempts to create a writing system that makes words easier to distinguish by using pictographic symbols that encapsulate meaning rather than sound, as the English alphabet does for example. Users of Bliss rely on human interpreters to use Bliss. We created a translation system from Bliss to natural English with the hopes of decreasing the reliance on human interpreters by the Bliss community. We first discuss the basic rules of Blissymbolics. Then we point out some of the challenges associated with developing computer assisted tools for Blissymbolics. Next we talk about our ongoing work in developing a translation system, including current limitations, and future work. We conclude with a set of examples showing the current capabilities of our translation system.</abstract>
       <url hash="5894dd7c">W19-1705</url>
@@ -1952,7 +1952,7 @@
     <paper id="7">
       <title>Noisy Neural Language Modeling for Typing Prediction in <fixed-case>BCI</fixed-case> Communication</title>
       <author><first>Rui</first><last>Dong</last></author>
-      <author><first>David</first><last>Smith</last></author>
+      <author id="david-a-smith"><first>David</first><last>Smith</last></author>
       <author><first>Shiran</first><last>Dudy</last></author>
       <author><first>Steven</first><last>Bedrick</last></author>
       <pages>44–51</pages>
@@ -1966,8 +1966,8 @@
     <meta>
       <booktitle>Proceedings of the Second Workshop on Shortcomings in Vision and Language</booktitle>
       <url hash="348565b8">W19-18</url>
-      <editor><first>Raffaella</first><last>Bernardi</last></editor>
-      <editor><first>Raquel</first><last>Fernandez</last></editor>
+      <editor id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last></editor>
+      <editor id="raquel-fernandez"><first>Raquel</first><last>Fernandez</last></editor>
       <editor><first>Spandana</first><last>Gella</last></editor>
       <editor><first>Kushal</first><last>Kafle</last></editor>
       <editor><first>Christopher</first><last>Kanan</last></editor>
@@ -2063,7 +2063,7 @@
     <paper id="8">
       <title>Grounded Word Sense Translation</title>
       <author><first>Chiraag</first><last>Lala</last></author>
-      <author><first>Pranava</first><last>Madhyastha</last></author>
+      <author id="pranava-swaroop-madhyastha"><first>Pranava</first><last>Madhyastha</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>78–85</pages>
       <abstract>Recent work on visually grounded language learning has focused on broader applications of grounded representations, such as visual question answering and multimodal machine translation. In this paper we consider grounded word sense translation, i.e. the task of correctly translating an ambiguous source word given the corresponding textual and visual context. Our main objective is to investigate the extent to which images help improve word-level (lexical) translation quality. We do so by first studying the dataset for this task to understand the scope and challenges of the task. We then explore different data settings, image features, and ways of grounding to investigate the gain from using images in each of the combinations. We find that grounding on the image is specially beneficial in weaker unidirectional recurrent translation models. We observe that adding structured image information leads to stronger gains in lexical translation accuracy.</abstract>
@@ -2107,7 +2107,7 @@
       <title>An Analysis of Attention over Clinical Notes for Predictive Tasks</title>
       <author><first>Sarthak</first><last>Jain</last></author>
       <author><first>Ramin</first><last>Mohammadi</last></author>
-      <author><first>Byron C.</first><last>Wallace</last></author>
+      <author id="byron-c-wallace"><first>Byron C.</first><last>Wallace</last></author>
       <pages>15–21</pages>
       <abstract>The shift to electronic medical records (EMRs) has engendered research into machine learning and natural language technologies to analyze patient records, and to predict from these clinical outcomes of interest. Two observations motivate our aims here. First, unstructured notes contained within EMR often contain key information, and hence should be exploited by models. Second, while strong predictive performance is important, interpretability of models is perhaps equally so for applications in this domain. Together, these points suggest that neural models for EMR may benefit from incorporation of attention over notes, which one may hope will both yield performance gains and afford transparency in predictions. In this work we perform experiments to explore this question using two EMR corpora and four different predictive tasks, that: (i) inclusion of attention mechanisms is critical for neural encoder modules that operate over notes fields in order to yield competitive performance, but, (ii) unfortunately, while these boost predictive performance, it is decidedly less clear whether they provide meaningful support for predictions.</abstract>
       <url hash="3c19c285">W19-1902</url>
@@ -2116,7 +2116,7 @@
     </paper>
     <paper id="3">
       <title>Extracting Adverse Drug Event Information with Minimal Engineering</title>
-      <author><first>Timothy</first><last>Miller</last></author>
+      <author id="timothy-miller"><first>Timothy</first><last>Miller</last></author>
       <author><first>Alon</first><last>Geva</last></author>
       <author><first>Dmitriy</first><last>Dligach</last></author>
       <pages>22–27</pages>
@@ -2128,7 +2128,7 @@
     <paper id="4">
       <title>Hierarchical Nested Named Entity Recognition</title>
       <author><first>Zita</first><last>Marinho</last></author>
-      <author><first>Afonso</first><last>Mendes</last></author>
+      <author id="alfonso-mendes"><first>Afonso</first><last>Mendes</last></author>
       <author><first>Sebastião</first><last>Miranda</last></author>
       <author><first>David</first><last>Nogueira</last></author>
       <pages>28–34</pages>
@@ -2164,7 +2164,7 @@
     <paper id="7">
       <title>Study of lexical aspect in the <fixed-case>F</fixed-case>rench medical language. Development of a lexical resource</title>
       <author><first>Agathe</first><last>Pierson</last></author>
-      <author><first>Cédrick</first><last>Fairon</last></author>
+      <author id="cedrick-fairon"><first>Cédrick</first><last>Fairon</last></author>
       <pages>55–64</pages>
       <abstract>This paper details the development of a linguistic resource designed to improve temporal information extraction systems and to integrate aspectual values. After a brief review of recent works in temporal information extraction for the medical area, we discuss the linguistic notion of aspect and how it got a place in the NLP field. Then, we present our clinical data and describe the five-step approach adopted in this study. Finally, we represent the linguistic resource itself and explain how we elaborated it and which properties were selected for the creation of the tables.</abstract>
       <url hash="a4debe2d">W19-1907</url>
@@ -2174,7 +2174,7 @@
     <paper id="8">
       <title>A <fixed-case>BERT</fixed-case>-based Universal Model for Both Within- and Cross-sentence Clinical Temporal Relation Extraction</title>
       <author><first>Chen</first><last>Lin</last></author>
-      <author><first>Timothy</first><last>Miller</last></author>
+      <author id="timothy-miller"><first>Timothy</first><last>Miller</last></author>
       <author><first>Dmitriy</first><last>Dligach</last></author>
       <author><first>Steven</first><last>Bethard</last></author>
       <author><first>Guergana</first><last>Savova</last></author>
@@ -2204,7 +2204,7 @@
       <author><first>Alejandro</first><last>Piad-Morffis</last></author>
       <author><first>Yoan</first><last>Guitérrez</last></author>
       <author><first>Suilan</first><last>Estevez-Velarde</last></author>
-      <author><first>Rafael</first><last>Muñoz</last></author>
+      <author id="rafael-munoz"><first>Rafael</first><last>Muñoz</last></author>
       <pages>79–88</pages>
       <abstract>Knowledge discovery from text in natural language is a task usually aided by the manual construction of annotated corpora. Specifically in the clinical domain, several annotation models are used depending on the characteristics of the task to solve (e.g., named entity recognition, relation extraction, etc.). However, few general-purpose annotation models exist, that can support a broad range of knowledge extraction tasks. This paper presents an annotation model designed to capture a large portion of the semantics of natural language text. The structure of the annotation model is presented, with examples of annotated sentences and a brief description of each semantic role and relation defined. This research focuses on an application to clinical texts in the Spanish language. Nevertheless, the presented annotation model is extensible to other domains and languages. An example of annotated sentences, guidelines, and suitable configuration files for an annotation tool are also provided for the research community.</abstract>
       <url hash="790d2e5a">W19-1910</url>
@@ -2264,7 +2264,7 @@
       <author><first>Eben</first><last>Holderness</last></author>
       <author><first>Philip</first><last>Cawkwell</last></author>
       <author><first>Kirsten</first><last>Bolton</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <author><first>Mei-Hua</first><last>Hall</last></author>
       <pages>117–123</pages>
       <abstract>Recently natural language processing (NLP) tools have been developed to identify and extract salient risk indicators in electronic health records (EHRs). Sentiment analysis, although widely used in non-medical areas for improving decision making, has been studied minimally in the clinical setting. In this study, we undertook, to our knowledge, the first domain adaptation of sentiment analysis to psychiatric EHRs by defining psychiatric clinical sentiment, performing an annotation project, and evaluating multiple sentence-level sentiment machine learning (ML) models. Results indicate that off-the-shelf sentiment analysis tools fail in identifying clinically positive or negative polarity, and that the definition of clinical sentiment that we provide is learnable with relatively small amounts of training data. This project is an initial step towards further refining sentiment analysis methods for clinical use. Our long-term objective is to incorporate the results of this project as part of a machine learning model that predicts inpatient readmission risk. We hope that this work will initiate a discussion concerning domain adaptation of sentiment analysis to the clinical setting.</abstract>
@@ -2276,8 +2276,8 @@
       <title>Medical Word Embeddings for <fixed-case>S</fixed-case>panish: Development and Evaluation</title>
       <author><first>Felipe</first><last>Soares</last></author>
       <author><first>Marta</first><last>Villegas</last></author>
-      <author><first>Aitor</first><last>Gonzalez-Agirre</last></author>
-      <author><first>Martin</first><last>Krallinger</last></author>
+      <author id="aitor-gonzalez-agirre"><first>Aitor</first><last>Gonzalez-Agirre</last></author>
+      <author id="martin-krallinger"><first>Martin</first><last>Krallinger</last></author>
       <author><first>Jordi</first><last>Armengol-Estapé</last></author>
       <pages>124–133</pages>
       <abstract>Word embeddings are representations of words in a dense vector space. Although they are not recent phenomena in Natural Language Processing (NLP), they have gained momentum after the recent developments of neural methods and Word2Vec. Regarding their applications in medical and clinical NLP, they are invaluable resources when training in-domain named entity recognition systems, classifiers or taggers, for instance. Thus, the development of tailored word embeddings for medical NLP is of great interest. However, we identified a gap in the literature which we aim to fill in this paper: the availability of embeddings for medical NLP in Spanish, as well as a standardized form of intrinsic evaluation. Since most work has been done for English, some established datasets for intrinsic evaluation are already available. In this paper, we show the steps we employed to adapt such datasets for the first time to Spanish, of particular relevance due to the considerable volume of EHRs in this language, as well as the creation of in-domain medical word embeddings for the Spanish using the state-of-the-art FastText model. We performed intrinsic evaluation with our adapted datasets, as well as extrinsic evaluation with a named entity recognition systems using a baseline embedding of general-domain. Both experiments proved that our embeddings are suitable for use in medical NLP in the Spanish language, and are more accurate than general-domain ones.</abstract>
@@ -2355,7 +2355,7 @@
       <author><first>Denis</first><last>Newman-Griffis</last></author>
       <author><first>Aparajita</first><last>Haldar</last></author>
       <author><first>Hakan</first><last>Ferhatosmanoglu</last></author>
-      <author><first>Eric</first><last>Fosler-Lussier</last></author>
+      <author id="eric-fosler-lussier"><first>Eric</first><last>Fosler-Lussier</last></author>
       <pages>8–17</pages>
       <abstract>Analysis of word embedding properties to inform their use in downstream NLP tasks has largely been studied by assessing nearest neighbors. However, geometric properties of the continuous feature space contribute directly to the use of embedding features in downstream models, and are largely unexplored. We consider four properties of word embedding geometry, namely: position relative to the origin, distribution of features in the vector space, global pairwise distances, and local pairwise distances. We define a sequence of transformations to generate new embeddings that expose subsets of these properties to downstream models and evaluate change in task performance to understand the contribution of each property to NLP models. We transform publicly available pretrained embeddings from three popular toolkits (word2vec, GloVe, and FastText) and evaluate on a variety of intrinsic tasks, which model linguistic information in the vector space, and extrinsic tasks, which use vectors as input to machine learning models. We find that intrinsic evaluations are highly sensitive to absolute position, while extrinsic tasks rely primarily on local similarity. Our findings suggest that future embedding models and post-processing techniques should focus primarily on similarity to nearby points in vector space.</abstract>
       <url hash="75c67ace">W19-2002</url>
@@ -2378,7 +2378,7 @@
     <paper id="4">
       <title>How Well Do Embedding Models Capture Non-compositionality? A View from Multiword Expressions</title>
       <author><first>Navnita</first><last>Nandakumar</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Bahar</first><last>Salehi</last></author>
       <pages>27–34</pages>
       <abstract>In this paper, we apply various embedding methods on multiword expressions to study how well they capture the nuances of non-compositional data. Our results from a pool of word-, character-, and document-level embbedings suggest that Word2vec performs the best, followed by FastText and Infersent. Moreover, we find that recently-proposed contextualised embedding models such as Bert and ELMo are not adept at handling non-compositionality in multiword expressions.</abstract>
@@ -2388,7 +2388,7 @@
     </paper>
     <paper id="5">
       <title>Measuring Semantic Abstraction of Multilingual <fixed-case>NMT</fixed-case> with Paraphrase Recognition and Generation Tasks</title>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <author><first>Yves</first><last>Scherrer</last></author>
       <pages>35–42</pages>
       <abstract>In this paper, we investigate whether multilingual neural translation models learn stronger semantic abstractions of sentences than bilingual ones. We test this hypotheses by measuring the perplexity of such models when applied to paraphrases of the source language. The intuition is that an encoder produces better representations if a decoder is capable of recognizing synonymous sentences in the same language even though the model is never trained for that task. In our setup, we add 16 different auxiliary languages to a bidirectional bilingual baseline model (English-French) and test it with in-domain and out-of-domain paraphrases in English. The results show that the perplexity is significantly reduced in each of the cases, indicating that meaning can be grounded in translation. This is further supported by a study on paraphrase generation that we also include at the end of the paper.</abstract>
@@ -2460,7 +2460,7 @@
       <title>Probing Biomedical Embeddings from Language Models</title>
       <author><first>Qiao</first><last>Jin</last></author>
       <author><first>Bhuwan</first><last>Dhingra</last></author>
-      <author><first>William</first><last>Cohen</last></author>
+      <author id="william-cohen"><first>William</first><last>Cohen</last></author>
       <author><first>Xinghua</first><last>Lu</last></author>
       <pages>82–89</pages>
       <abstract>Contextualized word embeddings derived from pre-trained language models (LMs) show significant improvements on downstream NLP tasks. Pre-training on domain-specific corpora, such as biomedical articles, further improves their performance. In this paper, we conduct probing experiments to determine what additional information is carried intrinsically by the in-domain trained contextualized embeddings. For this we use the pre-trained LMs as fixed feature extractors and restrict the downstream task models to not have additional sequence modeling layers. We compare BERT (Devlin et al. 2018), ELMo (Peters et al., 2018), BioBERT (Lee et al., 2019) and BioELMo, a biomedical version of ELMo trained on 10M PubMed abstracts. Surprisingly, while fine-tuned BioBERT is better than BioELMo in biomedical NER and NLI tasks, as a fixed feature extractor BioELMo outperforms BioBERT in our probing tasks. We use visualization and nearest neighbor analysis to show that better encoding of entity-type and relational information leads to this superiority.</abstract>
@@ -2538,7 +2538,7 @@
       <author><first>Veronica</first><last>Lynn</last></author>
       <author><first>Salvatore</first><last>Giorgi</last></author>
       <author><first>Niranjan</first><last>Balasubramanian</last></author>
-      <author><first>H. Andrew</first><last>Schwartz</last></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last></author>
       <pages>18–28</pages>
       <abstract>NLP naturally puts a primary focus on leveraging document language, occasionally considering user attributes as supplemental. However, as we tackle more social scientific tasks, it is possible user attributes might be of primary importance and the document supplemental. Here, we systematically investigate the predictive power of user-level features alone versus document-level features for document-level tasks. We first show user attributes can sometimes carry more task-related information than the document itself. For example, a tweet-level stance detection model using only 13 user-level attributes (i.e. features that did not depend on the specific tweet) was able to obtain a higher F1 than the top-performing SemEval participant. We then consider multiple tasks and a wider range of user attributes, showing the performance of strong document-only models can often be improved (as in stance, sentiment, and sarcasm) with user attributes, particularly benefiting tasks with stable “trait-like” outcomes (e.g. stance) most relative to frequently changing “state-like” outcomes (e.g. sentiment). These results not only support the growing work on integrating user factors into predictive systems, but that some of our NLP tasks might be better cast primarily as user-level (or human) tasks.</abstract>
       <url hash="61f790bf">W19-2103</url>
@@ -2585,7 +2585,7 @@
     <paper id="7">
       <title>Using time series and natural language processing to identify viral moments in the 2016 <fixed-case>U</fixed-case>.<fixed-case>S</fixed-case>. Presidential Debate</title>
       <author><first>Josephine</first><last>Lukito</last></author>
-      <author><first>Prathusha</first><last>K Sarma</last></author>
+      <author id="prathusha-kameswara-sarma"><first>Prathusha</first><last>K Sarma</last></author>
       <author><first>Jordan</first><last>Foley</last></author>
       <author><first>Aman</first><last>Abhishek</last></author>
       <pages>54–64</pages>
@@ -2597,7 +2597,7 @@
     <paper id="8">
       <title>Stance Classification, Outcome Prediction, and Impact Assessment: <fixed-case>NLP</fixed-case> Tasks for Studying Group Decision-Making</title>
       <author><first>Elijah</first><last>Mayfield</last></author>
-      <author><first>Alan</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan</first><last>Black</last></author>
       <pages>65–77</pages>
       <abstract>In group decision-making, the nuanced process of conflict and resolution that leads to consensus formation is closely tied to the quality of decisions made. Behavioral scientists rarely have rich access to process variables, though, as unstructured discussion transcripts are difficult to analyze. Here, we define ways for NLP researchers to contribute to the study of groups and teams. We introduce three tasks alongside a large new corpus of over 400,000 group debates on Wikipedia. We describe the tasks and their importance, then provide baselines showing that BERT contextualized word embeddings consistently outperform other language representations.</abstract>
       <url hash="9d163252">W19-2108</url>
@@ -2631,7 +2631,7 @@
     <paper id="11">
       <title>Simple dynamic word embeddings for mapping perceptions in the public sphere</title>
       <author><first>Nabeel</first><last>Gillani</last></author>
-      <author><first>Roger</first><last>Levy</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
       <pages>94–99</pages>
       <abstract>Word embeddings trained on large-scale historical corpora can illuminate human biases and stereotypes that perpetuate social inequalities. These embeddings are often trained in separate vector space models defined according to different attributes of interest. In this paper, we introduce a single, unified dynamic embedding model that learns attribute-specific word embeddings and apply it to a novel dataset—talk radio shows from around the US—to analyze perceptions about refugees. We validate our model on a benchmark dataset and apply it to two corpora of talk radio shows averaging 117 million words produced over one month across 83 stations and 64 cities. Our findings suggest that dynamic word embeddings are capable of identifying nuanced differences in public discourse about contentious topics, suggesting their usefulness as a tool for better understanding how the public perceives and engages with different issues across time, geography, and other dimensions.</abstract>
       <url hash="ce2b5028">W19-2111</url>
@@ -2642,7 +2642,7 @@
     </paper>
     <paper id="12">
       <title>Modeling Behavioral Aspects of Social Media Discourse for Moral Classification</title>
-      <author><first>Kristen</first><last>Johnson</last></author>
+      <author id="kristen-johnson"><first>Kristen</first><last>Johnson</last></author>
       <author><first>Dan</first><last>Goldwasser</last></author>
       <pages>100–109</pages>
       <abstract>Political discourse on social media microblogs, specifically Twitter, has become an undeniable part of mainstream U.S. politics. Given the length constraint of tweets, politicians must carefully word their statements to ensure their message is understood by their intended audience. This constraint often eliminates the context of the tweet, making automatic analysis of social media political discourse a difficult task. To overcome this challenge, we propose simultaneous modeling of high-level abstractions of political language, such as political slogans and framing strategies, with abstractions of how politicians behave on Twitter. These behavioral abstractions can be further leveraged as forms of supervision in order to increase prediction accuracy, while reducing the burden of annotation. In this work, we use Probabilistic Soft Logic (PSL) to build relational models to capture the similarities in language and behavior that obfuscate political messages on Twitter. When combined, these descriptors reveal the moral foundations underlying the discourse of U.S. politicians online, <i>across</i> differing governing administrations, showing how party talking points remain cohesive or change over time.</abstract>
@@ -2659,10 +2659,10 @@
       <editor><first>Elliott</first><last>Ash</last></editor>
       <editor><first>Leslie</first><last>Barrett</last></editor>
       <editor><first>Daniel</first><last>Chen</last></editor>
-      <editor><first>Adam</first><last>Meyers</last></editor>
-      <editor><first>Daniel</first><last>Preotiuc-Pietro</last></editor>
+      <editor id="adam-meyers"><first>Adam</first><last>Meyers</last></editor>
+      <editor id="daniel-preotiuc-pietro"><first>Daniel</first><last>Preotiuc-Pietro</last></editor>
       <editor><first>David</first><last>Rosenberg</last></editor>
-      <editor><first>Amanda</first><last>Stent</last></editor>
+      <editor id="amanda-stent"><first>Amanda</first><last>Stent</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Minneapolis, Minnesota</address>
       <month>June</month>
@@ -2690,7 +2690,7 @@
       <author><first>John</first><last>Aberdeen</last></author>
       <author><first>Karl</first><last>Branting</last></author>
       <author><first>Craig</first><last>Pfeifer</last></author>
-      <author><first>Alexander</first><last>Yeh</last></author>
+      <author id="alexander-yeh"><first>Alexander</first><last>Yeh</last></author>
       <author><first>Amartya</first><last>Chakraborty</last></author>
       <pages>12–20</pages>
       <abstract>Recent research has demonstrated that judicial and administrative decisions can be predicted by machine-learning models trained on prior decisions. However, to have any practical application, these predictions must be explainable, which in turn requires modeling a rich set of features. Such approaches face a roadblock if the knowledge engineering required to create these features is not scalable. We present an approach to developing a feature-rich corpus of administrative rulings about domain name disputes, an approach which leverages a small amount of manual annotation and prototypical patterns present in the case documents to automatically extend feature labels to the entire corpus. To demonstrate the feasibility of this approach, we report results from systems trained on this dataset.</abstract>
@@ -2700,7 +2700,7 @@
     </paper>
     <paper id="3">
       <title>The Extent of Repetition in Contract Language</title>
-      <author><first>Dan</first><last>Simonson</last></author>
+      <author id="dan-simonson"><first>Dan</first><last>Simonson</last></author>
       <author><first>Daniel</first><last>Broderick</last></author>
       <author><first>Jonathan</first><last>Herr</last></author>
       <pages>21–30</pages>
@@ -2745,14 +2745,14 @@
     <paper id="7">
       <title>Developing and Orchestrating a Portfolio of Natural Legal Language Processing and Document Curation Services</title>
       <author><first>Georg</first><last>Rehm</last></author>
-      <author><first>Julián</first><last>Moreno-Schneider</last></author>
+      <author id="julian-moreno-schneider"><first>Julián</first><last>Moreno-Schneider</last></author>
       <author><first>Jorge</first><last>Gracia</last></author>
       <author><first>Artem</first><last>Revenko</last></author>
       <author><first>Victor</first><last>Mireles</last></author>
       <author><first>Maria</first><last>Khvalchik</last></author>
       <author><first>Ilan</first><last>Kernerman</last></author>
       <author><first>Andis</first><last>Lagzdins</last></author>
-      <author><first>Marcis</first><last>Pinnis</last></author>
+      <author id="marcis-pinnis"><first>Marcis</first><last>Pinnis</last></author>
       <author><first>Artus</first><last>Vasilevskis</last></author>
       <author><first>Elena</first><last>Leitner</last></author>
       <author><first>Jan</first><last>Milde</last></author>
@@ -2862,8 +2862,8 @@
       <author><first>Alessio</first><last>Palmero Aprosio</last></author>
       <author><first>Sara</first><last>Tonelli</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
-      <author><first>Mattia A.</first><last>Di Gangi</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
+      <author id="mattia-a-di-gangi"><first>Mattia A.</first><last>Di Gangi</last></author>
       <pages>37–44</pages>
       <abstract>Neural text simplification has gained increasing attention in the NLP community thanks to recent advancements in deep sequence-to-sequence learning. Most recent efforts with such a data-demanding paradigm have dealt with the English language, for which sizeable training datasets are currently available to deploy competitive models. Similar improvements on less resource-rich languages are conditioned either to intensive manual work to create training data, or to the design of effective automatic generation techniques to bypass the data acquisition bottleneck. Inspired by the machine translation field, in which synthetic parallel pairs generated from monolingual data yield significant improvements to neural models, in this paper we exploit large amounts of heterogeneous data to automatically select simple sentences, which are then used to create synthetic simplification pairs. We also evaluate other solutions, such as oversampling and the use of external word embeddings to be fed to the neural simplification system. Our approach is evaluated on Italian and Spanish, for which few thousand gold sentence pairs are available. The results show that these techniques yield performance improvements over a baseline sequence-to-sequence configuration.</abstract>
       <url hash="22f5a6d1">W19-2305</url>
@@ -2912,8 +2912,8 @@
       <author><first>Ziang</first><last>Xie</last></author>
       <author><first>Cindy</first><last>Wang</last></author>
       <author><first>Max</first><last>Drach</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
-      <author><first>Andrew</first><last>Ng</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
+      <author id="andrew-y-ng"><first>Andrew</first><last>Ng</last></author>
       <pages>74–81</pages>
       <abstract>We introduce a simple method for text style transfer that frames style transfer as denoising: we synthesize a noisy corpus and treat the source style as a noisy version of the target style. To control for aspects such as preserving meaning while modifying style, we propose a reranking approach in the data synthesis phase. We evaluate our method on three novel style transfer tasks: transferring between British and American varieties, text genres (formal vs. casual), and lyrics from different musical genres. By measuring style transfer quality, meaning preservation, and the fluency of generated outputs, we demonstrate that our method is able both to produce high-quality output while maintaining the flexibility to suggest syntactically rich stylistic edits.</abstract>
       <url hash="43482cf2">W19-2309</url>
@@ -2983,7 +2983,7 @@
     <paper id="2">
       <title>Character Identification Refined: A Proposal</title>
       <author><first>Labiba</first><last>Jahan</last></author>
-      <author><first>Mark</first><last>Finlayson</last></author>
+      <author id="mark-finlayson"><first>Mark</first><last>Finlayson</last></author>
       <pages>12–18</pages>
       <abstract>Characters are a key element of narrative and so character identification plays an important role in automatic narrative understanding. Unfortunately, most prior work that incorporates character identification is not built upon a clear, theoretically grounded concept of character. They either take character identification for granted (e.g., using simple heuristics on referring expressions), or rely on simplified definitions that do not capture important distinctions between characters and other referents in the story. Prior approaches have also been rather complicated, relying, for example, on predefined case bases or ontologies. In this paper we propose a narratologically grounded definition of character for discussion at the workshop, and also demonstrate a preliminary yet straightforward supervised machine learning model with a small set of features that performs well on two corpora. The most important of the two corpora is a set of 46 Russian folktales, on which the model achieves an F1 of 0.81. Error analysis suggests that features relevant to the plot will be necessary for further improvements in performance.</abstract>
       <url hash="79390108">W19-2402</url>
@@ -2994,8 +2994,8 @@
       <title>Deep Natural Language Understanding of News Text</title>
       <author><first>Jaya</first><last>Shree</last></author>
       <author><first>Emily</first><last>Liu</last></author>
-      <author><first>Andrew</first><last>Gordon</last></author>
-      <author><first>Jerry</first><last>Hobbs</last></author>
+      <author id="andrew-gordon"><first>Andrew</first><last>Gordon</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry</first><last>Hobbs</last></author>
       <pages>19–27</pages>
       <abstract>Early proposals for the deep understanding of natural language text advocated an approach of “interpretation as abduction,” where the meaning of a text was derived as an explanation that logically entailed the input words, given a knowledge base of lexical and commonsense axioms. While most subsequent NLP research has instead pursued statistical and data-driven methods, the approach of interpretation as abduction has seen steady advancements in both theory and software implementations. In this paper, we summarize advances in deriving the logical form of the text, encoding commonsense knowledge, and technologies for scalable abductive reasoning. We then explore the application of these advancements to the deep understanding of a paragraph of news text, where the subtle meaning of words and phrases are resolved by backward chaining on a knowledge base of 80 hand-authored axioms.</abstract>
       <url hash="22849ca7">W19-2403</url>
@@ -3004,13 +3004,13 @@
     </paper>
     <paper id="4">
       <title>Extraction of Message Sequence Charts from Narrative History Text</title>
-      <author><first>Girish</first><last>Palshikar</last></author>
+      <author id="girish-palshikar"><first>Girish</first><last>Palshikar</last></author>
       <author><first>Sachin</first><last>Pawar</last></author>
       <author><first>Sangameshwar</first><last>Patil</last></author>
       <author><first>Swapnil</first><last>Hingmire</last></author>
       <author><first>Nitin</first><last>Ramrakhiyani</last></author>
       <author><first>Harsimran</first><last>Bedi</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <author><first>Vasudeva</first><last>Varma</last></author>
       <pages>28–36</pages>
       <abstract>In this paper, we advocate the use of Message Sequence Chart (MSC) as a knowledge representation to capture and visualize multi-actor interactions and their temporal ordering. We propose algorithms to automatically extract an MSC from a history narrative. For a given narrative, we first identify verbs which indicate interactions and then use dependency parsing and Semantic Role Labelling based approaches to identify senders (initiating actors) and receivers (other actors involved) for these interaction verbs. As a final step in MSC extraction, we employ a state-of-the art algorithm to temporally re-order these interactions. Our evaluation on multiple publicly available narratives shows improvements over four baselines.</abstract>
@@ -3044,11 +3044,11 @@
     <meta>
       <booktitle>Proceedings of the 3rd Joint <fixed-case>SIGHUM</fixed-case> Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature</booktitle>
       <url hash="cf6597f7">W19-25</url>
-      <editor><first>Beatrice</first><last>Alex</last></editor>
+      <editor id="beatrice-alex"><first>Beatrice</first><last>Alex</last></editor>
       <editor><first>Stefania</first><last>Degaetano-Ortlieb</last></editor>
       <editor><first>Anna</first><last>Kazantseva</last></editor>
       <editor><first>Nils</first><last>Reiter</last></editor>
-      <editor><first>Stan</first><last>Szpakowicz</last></editor>
+      <editor id="stan-szpakowicz"><first>Stan</first><last>Szpakowicz</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Minneapolis, USA</address>
       <month>June</month>
@@ -3062,7 +3062,7 @@
     <paper id="1">
       <title>Modeling Word Emotion in Historical Language: Quantity Beats Supposed Stability in Seed Word Selection</title>
       <author><first>Johannes</first><last>Hellrich</last></author>
-      <author><first>Sven</first><last>Buechel</last></author>
+      <author id="sven-buechel"><first>Sven</first><last>Buechel</last></author>
       <author><first>Udo</first><last>Hahn</last></author>
       <pages>1–11</pages>
       <abstract>To understand historical texts, we must be aware that language—including the emotional connotation attached to words—changes over time. In this paper, we aim at estimating the emotion which is associated with a given word in former language stages of English and German. Emotion is represented following the popular Valence-Arousal-Dominance (VAD) annotation scheme. While being more expressive than polarity alone, existing word emotion induction methods are typically not suited for addressing it. To overcome this limitation, we present adaptations of two popular algorithms to VAD. To measure their effectiveness in diachronic settings, we present the first gold standard for historical word emotions, which was created by scholars with proficiency in the respective language stages and covers both English and German. In contrast to claims in previous work, our findings indicate that hand-selecting small sets of seed words with supposedly stable emotional meaning is actually harm- rather than helpful.</abstract>
@@ -3074,7 +3074,7 @@
       <title>Clustering-Based Article Identification in Historical Newspapers</title>
       <author><first>Martin</first><last>Riedl</last></author>
       <author><first>Daniela</first><last>Betz</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <pages>12–17</pages>
       <abstract>This article focuses on the problem of identifying articles and recovering their text from within and across newspaper pages when OCR just delivers one text file per page. We frame the task as a segmentation plus clustering step. Our results on a sample of 1912 New York Tribune magazine shows that performing the clustering based on similarities computed with word embeddings outperforms a similarity measure based on character n-grams and words. Furthermore, the automatic segmentation based on the text results in low scores, due to the low quality of some OCRed documents.</abstract>
       <url hash="42270e42">W19-2502</url>
@@ -3157,7 +3157,7 @@
       <author><first>Mika</first><last>Hämäläinen</last></author>
       <author><first>Tanja</first><last>Säily</last></author>
       <author><first>Jack</first><last>Rueter</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <author><first>Eetu</first><last>Mäkelä</last></author>
       <pages>71–75</pages>
       <abstract>This paper studies the use of NMT (neural machine translation) as a normalization method for an early English letter corpus. The corpus has previously been normalized so that only less frequent deviant forms are left out without normalization. This paper discusses different methods for improving the normalization of these deviant forms by using different approaches. Adding features to the training data is found to be unhelpful, but using a lexicographical resource to filter the top candidates produced by the NMT model together with lemmatization improves results.</abstract>
@@ -3177,7 +3177,7 @@
     <paper id="11">
       <title>Semantics and Homothetic Clustering of Hafez Poetry</title>
       <author><first>Arya</first><last>Rahgozar</last></author>
-      <author><first>Diana</first><last>Inkpen</last></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Inkpen</last></author>
       <pages>82–90</pages>
       <abstract>We have created two sets of labels for Hafez (1315-1390) poems, using unsupervised learning. Our labels are the only semantic clustering alternative to the previously existing, hand-labeled, gold-standard classification of Hafez poems, to be used for literary research. We have cross-referenced, measured and analyzed the agreements of our clustering labels with Houman’s chronological classes. Our features are based on topic modeling and word embeddings. We also introduced a similarity of similarities’ features, we called homothetic clustering approach that proved effective, in case of Hafez’s small corpus of ghazals2. Although all our experiments showed different clusters when compared with Houman’s classes, we think they were valid in their own right to have provided further insights, and have proved useful as a contrasting alternative to Houman’s classes. Our homothetic clusterer and its feature design and engineering framework can be used for further semantic analysis of Hafez’s poetry and other similar literary research.</abstract>
       <url hash="53a51d23">W19-2511</url>
@@ -3188,7 +3188,7 @@
       <title>Computational Linguistics Applications for Multimedia Services</title>
       <author><first>Kyeongmin</first><last>Rim</last></author>
       <author><first>Kelley</first><last>Lynch</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <pages>91–97</pages>
       <abstract>We present Computational Linguistics Applications for Multimedia Services (CLAMS), a platform that provides access to computational content analysis tools for archival multimedia material that appear in different media, such as text, audio, image, and video. The primary goal of CLAMS is: (1) to develop an interchange format between multimodal metadata generation tools to ensure interoperability between tools; (2) to provide users with a portable, user-friendly workflow engine to chain selected tools to extract meaningful analyses; and (3) to create a public software development kit (SDK) for developers that eases deployment of analysis tools within the CLAMS platform. CLAMS is designed to help archives and libraries enrich the metadata associated with their mass-digitized multimedia collections, that would otherwise be largely unsearchable.</abstract>
       <url hash="56af227c">W19-2512</url>
@@ -3208,7 +3208,7 @@
     </paper>
     <paper id="14">
       <title>On the Feasibility of Automated Detection of Allusive Text Reuse</title>
-      <author><first>Enrique</first><last>Manjavacas</last></author>
+      <author id="enrique-manjavacas"><first>Enrique</first><last>Manjavacas</last></author>
       <author><first>Brian</first><last>Long</last></author>
       <author><first>Mike</first><last>Kestemont</last></author>
       <pages>104–114</pages>
@@ -3219,7 +3219,7 @@
     </paper>
     <paper id="15">
       <title>The limits of <fixed-case>S</fixed-case>panglish?</title>
-      <author><first>Barbara</first><last>Bullock</last></author>
+      <author id="barbara-bullock"><first>Barbara</first><last>Bullock</last></author>
       <author><first>Wally</first><last>Guzmán</last></author>
       <author><first>Almeida Jacqueline</first><last>Toribio</last></author>
       <pages>115–121</pages>
@@ -3246,7 +3246,7 @@
     <meta>
       <booktitle>Proceedings of the Workshop on Extracting Structured Knowledge from Scientific Publications</booktitle>
       <url hash="1df6b85a">W19-26</url>
-      <editor><first>Vivi</first><last>Nastase</last></editor>
+      <editor id="vivi-nastase"><first>Vivi</first><last>Nastase</last></editor>
       <editor><first>Benjamin</first><last>Roth</last></editor>
       <editor><first>Laura</first><last>Dietz</last></editor>
       <editor><first>Andrew</first><last>McCallum</last></editor>
@@ -3325,8 +3325,8 @@
       <author><first>Soham</first><last>Parikh</last></author>
       <author><first>Elizabeth</first><last>Conrad</last></author>
       <author><first>Oshin</first><last>Agarwal</last></author>
-      <author><first>Iain</first><last>Marshall</last></author>
-      <author><first>Byron</first><last>Wallace</last></author>
+      <author id="iain-marshall"><first>Iain</first><last>Marshall</last></author>
+      <author id="byron-c-wallace"><first>Byron</first><last>Wallace</last></author>
       <author><first>Ani</first><last>Nenkova</last></author>
       <pages>43–47</pages>
       <abstract>Standard paradigms for search do not work well in the medical context. Typical information needs, such as retrieving a full list of medical interventions for a given condition, or finding the reported efficacy of a particular treatment with respect to a specific outcome of interest cannot be straightforwardly posed in typical text-box search. Instead, we propose faceted-search in which a user specifies a condition and then can browse treatments and outcomes that have been evaluated. Choosing from these, they can access randomized control trials (RCTs) describing individual studies. Realizing such a view of the medical evidence requires information extraction techniques to identify the population, interventions, and outcome measures in an RCT. Patients, health practitioners, and biomedical librarians all stand to benefit from such innovation in search of medical evidence. We present an initial prototype of such an interface applied to pre-registered clinical studies. We also discuss pilot studies into the applicability of information extraction methods to allow for similar access to all published trial results.</abstract>
@@ -3363,7 +3363,7 @@
       <author><first>Ronen</first><last>Tamari</last></author>
       <author><first>Hiroyuki</first><last>Shindo</last></author>
       <author><first>Dafna</first><last>Shahaf</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>62–71</pages>
       <abstract>Understanding procedural text requires tracking entities, actions and effects as the narrative unfolds. We focus on the challenging real-world problem of action-graph extraction from materials science papers, where language is highly specialized and data annotation is expensive and scarce. We propose a novel approach, Text2Quest, where procedural text is interpreted as instructions for an interactive game. A learning agent completes the game by executing the procedure correctly in a text-based simulated lab environment. The framework can complement existing approaches and enables richer forms of learning compared to static texts. We discuss potential limitations and advantages of the approach, and release a prototype proof-of-concept, hoping to encourage research in this direction.</abstract>
       <url hash="bba14f2d">W19-2609</url>
@@ -3372,7 +3372,7 @@
     </paper>
     <paper id="10">
       <title>Textual and Visual Characteristics of Mathematical Expressions in Scholar Documents</title>
-      <author><first>Vidas</first><last>Daudaravicius</last></author>
+      <author id="vidas-daudaravicius"><first>Vidas</first><last>Daudaravicius</last></author>
       <pages>72–81</pages>
       <abstract>Mathematical expressions (ME) are widely used in scholar documents. In this paper we analyze characteristics of textual and visual MEs characteristics for the image-to-LaTeX translation task. While there are open data-sets of LaTeX files with MEs included it is very complicated to extract these MEs from a document and to compile the list of MEs. Therefore we release a corpus of open-access scholar documents with PDF and JATS-XML parallel files. The MEs in these documents are LaTeX encoded and are document independent. The data contains more than 1.2 million distinct annotated formulae and more than 80 million raw tokens of LaTeX MEs in more than 8 thousand documents. While the variety of textual lengths and visual sizes of MEs are not well defined we found that the task of analyzing MEs in scholar documents can be reduced to the subtask of a particular text length, image width and height bounds, and display MEs can be processed as arrays of partial MEs.</abstract>
       <url hash="d45c9cc5">W19-2610</url>
@@ -3388,7 +3388,7 @@
       <editor><first>Debopam</first><last>Das</last></editor>
       <editor><first>Erick Maziero</first><last>Galani</last></editor>
       <editor><first>Juliano Desiderato</first><last>Antonio</last></editor>
-      <editor><first>Mikel</first><last>Iruskieta</last></editor>
+      <editor id="mikel-iruskieta"><first>Mikel</first><last>Iruskieta</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Minneapolis, MN</address>
       <month>June</month>
@@ -3403,7 +3403,7 @@
       <title>Introduction to Discourse Relation Parsing and Treebanking (<fixed-case>DISRPT</fixed-case>): 7th Workshop on <fixed-case>R</fixed-case>hetorical <fixed-case>S</fixed-case>tructure <fixed-case>T</fixed-case>heory and Related Formalisms</title>
       <author><first>Amir</first><last>Zeldes</last></author>
       <author><first>Debopam</first><last>Das</last></author>
-      <author><first>Erick Galani</first><last>Maziero</last></author>
+      <author id="erick-galani-maziero"><first>Erick Galani</first><last>Maziero</last></author>
       <author><first>Juliano</first><last>Antonio</last></author>
       <author><first>Mikel</first><last>Iruskieta</last></author>
       <pages>1–6</pages>
@@ -3468,7 +3468,7 @@
     <paper id="7">
       <title>Annotating Shallow Discourse Relations in <fixed-case>T</fixed-case>witter Conversations</title>
       <author><first>Tatjana</first><last>Scheffler</last></author>
-      <author><first>Berfin</first><last>Aktaş</last></author>
+      <author id="berfin-aktas"><first>Berfin</first><last>Aktaş</last></author>
       <author><first>Debopam</first><last>Das</last></author>
       <author><first>Manfred</first><last>Stede</last></author>
       <pages>50–55</pages>
@@ -3538,7 +3538,7 @@
       <title>The <fixed-case>DISRPT</fixed-case> 2019 Shared Task on Elementary Discourse Unit Segmentation and Connective Detection</title>
       <author><first>Amir</first><last>Zeldes</last></author>
       <author><first>Debopam</first><last>Das</last></author>
-      <author><first>Erick Galani</first><last>Maziero</last></author>
+      <author id="erick-galani-maziero"><first>Erick Galani</first><last>Maziero</last></author>
       <author><first>Juliano</first><last>Antonio</last></author>
       <author><first>Mikel</first><last>Iruskieta</last></author>
       <pages>97–104</pages>
@@ -3574,7 +3574,7 @@
       <author><first>Mikel</first><last>Iruskieta</last></author>
       <author><first>Kepa</first><last>Bengoetxea</last></author>
       <author><first>Aitziber</first><last>Atutxa Salazar</last></author>
-      <author><first>Arantza</first><last>Diaz de Ilarraza</last></author>
+      <author id="arantza-diaz-de-ilarraza"><first>Arantza</first><last>Diaz de Ilarraza</last></author>
       <pages>125–132</pages>
       <abstract>The DISPRT 2019 workshop has organized a shared task aiming to identify cross-formalism and multilingual discourse segments. Elementary Discourse Units (EDUs) are quite similar across different theories. Segmentation is the very first stage on the way of rhetorical annotation. Still, each annotation project adopted several decisions with consequences not only on the annotation of the relational discourse structure but also at the segmentation stage. In this shared task, we have employed pre-trained word embeddings, neural networks (BiLSTM+CRF) to perform the segmentation. We report F1 results for 6 languages: Basque (0.853), English (0.919), French (0.907), German (0.913), Portuguese (0.926) and Spanish (0.868 and 0.769). Finally, we also pursued an error analysis based on clause typology for Basque and Spanish, in order to understand the performance of the segmenter.</abstract>
       <url hash="f08a0202">W19-2716</url>
@@ -3600,7 +3600,7 @@
     <paper id="18">
       <title>Towards discourse annotation and sentiment analysis of the <fixed-case>B</fixed-case>asque Opinion Corpus</title>
       <author><first>Jon</first><last>Alkorta</last></author>
-      <author><first>Koldo</first><last>Gojenola</last></author>
+      <author id="koldo-gojenola"><first>Koldo</first><last>Gojenola</last></author>
       <author><first>Mikel</first><last>Iruskieta</last></author>
       <pages>144–152</pages>
       <abstract>Discourse information is crucial for a better understanding of the text structure and it is also necessary to describe which part of an opinionated text is more relevant or to decide how a text span can change the polarity (strengthen or weaken) of other span by means of coherence relations. This work presents the first results on the annotation of the Basque Opinion Corpus using Rhetorical Structure Theory (RST). Our evaluation results and analysis show us the main avenues to improve on a future annotation process. We have also extracted the subjectivity of several rhetorical relations and the results show the effect of sentiment words in relations and the influence of each relation in the semantic orientation value.</abstract>
@@ -3631,7 +3631,7 @@
       <author><first>Chandrakumari</first><last>Suvarna</last></author>
       <author><first>Pooja</first><last>Casula</last></author>
       <author><first>Mingtong</first><last>Zhang</last></author>
-      <author><first>Carolyn</first><last>Rosé</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rosé</last></author>
       <pages>163–168</pages>
       <abstract>We present a package of annotation resources, including annotation guideline, flowchart, and an Intelligent Tutoring System for training human annotators. These resources can be used to apply Rhetorical Structure Theory (RST) to essays written by students in K-12 schools. Furthermore, we highlight the great potential of using RST to provide automated feedback for improving writing quality across genres.</abstract>
       <url hash="ce6139bc">W19-2720</url>
@@ -3645,7 +3645,7 @@
       <booktitle>Proceedings of the Second Workshop on Computational Models of Reference, Anaphora and Coreference</booktitle>
       <url hash="df1dc68c">W19-28</url>
       <editor><first>Maciej</first><last>Ogrodniczuk</last></editor>
-      <editor><first>Sameer</first><last>Pradhan</last></editor>
+      <editor id="sameer-pradhan"><first>Sameer</first><last>Pradhan</last></editor>
       <editor><first>Yulia</first><last>Grishina</last></editor>
       <editor><first>Vincent</first><last>Ng</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -3730,10 +3730,10 @@
       <booktitle>Proceedings of the Workshop on Cognitive Modeling and Computational Linguistics</booktitle>
       <url hash="856754db">W19-29</url>
       <editor><first>Emmanuele</first><last>Chersoni</last></editor>
-      <editor><first>Cassandra</first><last>Jacobs</last></editor>
+      <editor id="cassandra-l-jacobs"><first>Cassandra</first><last>Jacobs</last></editor>
       <editor><first>Alessandro</first><last>Lenci</last></editor>
       <editor><first>Tal</first><last>Linzen</last></editor>
-      <editor><first>Laurent</first><last>Prévot</last></editor>
+      <editor id="laurent-prevot"><first>Laurent</first><last>Prévot</last></editor>
       <editor><first>Enrico</first><last>Santus</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Minneapolis, Minnesota</address>
@@ -3749,7 +3749,7 @@
       <title>The Active-Filler Strategy in a Move-Eager Left-Corner <fixed-case>M</fixed-case>inimalist <fixed-case>G</fixed-case>rammar Parser</title>
       <author><first>Tim</first><last>Hunter</last></author>
       <author><first>Miloš</first><last>Stanojević</last></author>
-      <author><first>Edward</first><last>Stabler</last></author>
+      <author id="edward-stabler"><first>Edward</first><last>Stabler</last></author>
       <pages>1–10</pages>
       <abstract>Recent psycholinguistic evidence suggests that human parsing of moved elements is ‘active’, and perhaps even ‘hyper-active’: it seems that a leftward-moved object is related to a verbal position rapidly, perhaps even before the transitivity information associated with the verb is available to the listener. This paper presents a formal, sound and complete parser for Minimalist Grammars whose search space contains branching points that we can identify as the locus of the decision to perform this kind of active gap-finding. This brings formal models of parsing into closer contact with recent psycholinguistic theorizing than was previously possible.</abstract>
       <url hash="f7b758e6">W19-2901</url>
@@ -3759,7 +3759,7 @@
     <paper id="2">
       <title>Priming vs. Inhibition of Optional Infinitival “to”</title>
       <author><first>Robin</first><last>Melnick</last></author>
-      <author><first>Thomas</first><last>Wasow</last></author>
+      <author id="thomas-wasow"><first>Thomas</first><last>Wasow</last></author>
       <pages>11–19</pages>
       <abstract>The word “to” that precedes verbs in English infinitives is optional in at least two environments: in what Wasow et al. (2015) previously called the “do-be” construction, and in the complement of “help”, which we explore in the present work. In the “do-be” construction, Wasow et al. found that a preceding infinitival “to” increases the use of following optional “to”, but the use of “to” in the complement of help is reduced following “to help”. We examine two hypotheses regarding why the same function word is primed by prior use in one construction and inhibited in another. We then test predictions made by the two hypotheses, finding support for one of them.</abstract>
       <url hash="be353973">W19-2902</url>
@@ -3769,8 +3769,8 @@
     <paper id="3">
       <title>Simulating <fixed-case>S</fixed-case>panish-<fixed-case>E</fixed-case>nglish Code-Switching: El Modelo Está Generating Code-Switches</title>
       <author><first>Chara</first><last>Tsoukala</last></author>
-      <author><first>Stefan L.</first><last>Frank</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="stefan-l-frank"><first>Stefan L.</first><last>Frank</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <author><first>Jorge</first><last>Valdés Kroff</last></author>
       <author><first>Mirjam</first><last>Broersma</last></author>
       <pages>20–29</pages>
@@ -3783,7 +3783,7 @@
       <title>Surprisal and Interference Effects of Case Markers in <fixed-case>H</fixed-case>indi Word Order</title>
       <author><first>Sidharth</first><last>Ranjan</last></author>
       <author><first>Sumeet</first><last>Agarwal</last></author>
-      <author><first>Rajakrishnan</first><last>Rajkumar</last></author>
+      <author id="rajakrishnan-rajkumar"><first>Rajakrishnan</first><last>Rajkumar</last></author>
       <pages>30–42</pages>
       <abstract>Based on the Production-Distribution-Comprehension (PDC) account of language processing, we formulate two distinct hypotheses about case marking, word order choices and processing in Hindi. Our first hypothesis is that Hindi tends to optimize for processing efficiency at both lexical and syntactic levels. We quantify the role of case markers in this process. For the task of predicting the reference sentence occurring in a corpus (amidst meaning-equivalent grammatical variants) using a machine learning model, surprisal estimates from an artificial version of the language (i.e., Hindi without any case markers) result in lower prediction accuracy compared to natural Hindi. Our second hypothesis is that Hindi tends to minimize interference due to case markers while ordering preverbal constituents. We show that Hindi tends to avoid placing next to each other constituents whose heads are marked by identical case inflections. Our findings adhere to PDC assumptions and we discuss their implications for language production, learning and universals.</abstract>
       <url hash="dcdd12a8">W19-2904</url>
@@ -3793,7 +3793,7 @@
     <paper id="5">
       <title>Modeling Hierarchical Syntactic Structures in Morphological Processing</title>
       <author><first>Yohei</first><last>Oseki</last></author>
-      <author><first>Charles</first><last>Yang</last></author>
+      <author id="charles-yang"><first>Charles</first><last>Yang</last></author>
       <author><first>Alec</first><last>Marantz</last></author>
       <pages>43–52</pages>
       <abstract>Sentences are represented as hierarchical syntactic structures, which have been successfully modeled in sentence processing. In contrast, despite the theoretical agreement on hierarchical syntactic structures within words, words have been argued to be computationally less complex than sentences and implemented by finite-state models as linear strings of morphemes, and even the psychological reality of morphemes has been denied. In this paper, extending the computational models employed in sentence processing to morphological processing, we performed a computational simulation experiment where, given incremental surprisal as a linking hypothesis, five computational models with different representational assumptions were evaluated against human reaction times in visual lexical decision experiments available from the English Lexicon Project (ELP), a “shared task” in the morphological processing literature. The simulation experiment demonstrated that (i) “amorphous” models without morpheme units underperformed relative to “morphous” models, (ii) a computational model with hierarchical syntactic structures, Probabilistic Context-Free Grammar (PCFG), most accurately explained human reaction times, and (iii) this performance was achieved on top of surface frequency effects. These results strongly suggest that morphological processing tracks morphemes incrementally from left to right and parses them into hierarchical syntactic structures, contrary to “amorphous” and finite-state models of morphological processing.</abstract>
@@ -3804,7 +3804,7 @@
     <paper id="6">
       <title>A Modeling Study of the Effects of Surprisal and Entropy in Perceptual Decision Making of an Adaptive Agent</title>
       <author><first>Pyeong Whan</first><last>Cho</last></author>
-      <author><first>Richard</first><last>Lewis</last></author>
+      <author id="richard-l-lewis"><first>Richard</first><last>Lewis</last></author>
       <pages>53–61</pages>
       <abstract>Processing difficulty in online language comprehension has been explained in terms of surprisal and entropy reduction. Although both hypotheses have been supported by experimental data, we do not fully understand their relative contributions on processing difficulty. To develop a better understanding, we propose a mechanistic model of perceptual decision making that interacts with a simulated task environment with temporal dynamics. The proposed model collects noisy bottom-up evidence over multiple timesteps, integrates it with its top-down expectation, and makes perceptual decisions, producing processing time data directly without relying on any linking hypothesis. Temporal dynamics in the task environment was determined by a simple finite-state grammar, which was designed to create the situations where the surprisal and entropy reduction hypotheses predict different patterns. After the model was trained to maximize rewards, the model developed an adaptive policy and both surprisal and entropy effects were observed especially in a measure reflecting earlier processing.</abstract>
       <url hash="5d7695df">W19-2906</url>
@@ -3814,7 +3814,7 @@
     <paper id="7">
       <title>Modeling Long-Distance Cue Integration in Spoken Word Recognition</title>
       <author><first>Wednesday</first><last>Bushong</last></author>
-      <author><first>T. Florian</first><last>Jaeger</last></author>
+      <author id="t-florian-jaeger"><first>T. Florian</first><last>Jaeger</last></author>
       <pages>62–70</pages>
       <abstract>Cues to linguistic categories are distributed across the speech signal. Optimal categorization thus requires that listeners maintain gradient representations of incoming input in order to integrate that information with later cues. There is now evidence that listeners can and do integrate cues that occur far apart in time. Computational models of this integration have however been lacking. We take a first step at addressing this gap by mathematically formalizing four models of how listeners may maintain and use cue information during spoken language understanding and test them on two perception experiments. In one experiment, we find support for rational integration of cues at long distances. In a second, more memory and attention-taxing experiment, we find evidence in favor of a switching model that avoids maintaining detailed representations of cues in memory. These results are a first step in understanding what kinds of mechanisms listeners use for cue integration under different memory and attentional constraints.</abstract>
       <url hash="0cf210ca">W19-2907</url>
@@ -3826,7 +3826,7 @@
       <author><first>Bruno</first><last>Gaume</last></author>
       <author><first>Lydia</first><last>Mai Ho-Dac</last></author>
       <author><first>Ludovic</first><last>Tanguy</last></author>
-      <author><first>Cécile</first><last>Fabre</last></author>
+      <author id="cecile-fabre"><first>Cécile</first><last>Fabre</last></author>
       <author><first>Bénédicte</first><last>Pierrejean</last></author>
       <author><first>Nabil</first><last>Hathout</last></author>
       <author><first>Jérôme</first><last>Farinas</last></author>
@@ -3844,8 +3844,8 @@
     <paper id="9">
       <title>Dependency Parsing with your Eyes: Dependency Structure Predicts Eye Regressions During Reading</title>
       <author><first>Alessandro</first><last>Lopopolo</last></author>
-      <author><first>Stefan L.</first><last>Frank</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="stefan-l-frank"><first>Stefan L.</first><last>Frank</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <author><first>Roel</first><last>Willems</last></author>
       <pages>77–85</pages>
       <abstract>Backward saccades during reading have been hypothesized to be involved in structural reanalysis, or to be related to the level of text difficulty. We test the hypothesis that backward saccades are involved in online syntactic analysis. If this is the case we expect that saccades will coincide, at least partially, with the edges of the relations computed by a dependency parser. In order to test this, we analyzed a large eye-tracking dataset collected while 102 participants read three short narrative texts. Our results show a relation between backward saccades and the syntactic structure of sentences.</abstract>
@@ -3876,7 +3876,7 @@
       <title>Quantifiers in a Multimodal World: Hallucinating Vision with Language and Sound</title>
       <author><first>Alberto</first><last>Testoni</last></author>
       <author><first>Sandro</first><last>Pezzelle</last></author>
-      <author><first>Raffaella</first><last>Bernardi</last></author>
+      <author id="raffaella-bernardi"><first>Raffaella</first><last>Bernardi</last></author>
       <pages>105–116</pages>
       <abstract>Inspired by the literature on multisensory integration, we develop a computational model to ground quantifiers in perception. The model learns to pick, out of nine quantifiers (‘few’, ‘many’, ‘all’, etc.), the one that is more likely to describe the percent of animals in a visual-auditory input containing both animals and artifacts. We show that relying on concurrent sensory inputs increases model performance on the quantification task. Moreover, we evaluate the model in a situation in which only the auditory modality is given, while the visual one is ‘hallucinanted’ either from the auditory input itself or from a linguistic caption describing the quantity of entities in the auditory input. This way, the model exploits prior associations between modalities. We show that the model profits from the prior knowledge and outperforms the auditory-only setting.</abstract>
       <url hash="49d4e7e8">W19-2912</url>
@@ -3897,7 +3897,7 @@
       <title>The Development of Abstract Concepts in Children’s Early Lexical Networks</title>
       <author><first>Abdellah</first><last>Fourtassi</last></author>
       <author><first>Isaac</first><last>Scheinfeld</last></author>
-      <author><first>Michael</first><last>Frank</last></author>
+      <author id="michael-c-frank"><first>Michael</first><last>Frank</last></author>
       <pages>129–133</pages>
       <abstract>How do children learn abstract concepts such as animal vs. artifact? Previous research has suggested that such concepts can partly be derived using cues from the language children hear around them. Following this suggestion, we propose a model where we represent the children’ developing lexicon as an evolving network. The nodes of this network are based on vocabulary knowledge as reported by parents, and the edges between pairs of nodes are based on the probability of their co-occurrence in a corpus of child-directed speech. We found that several abstract categories can be identified as the dense regions in such networks. In addition, our simulations suggest that these categories develop simultaneously, rather than sequentially, thanks to the children’s word learning trajectory which favors the exploration of the global conceptual space.</abstract>
       <url hash="c0e74897">W19-2914</url>
@@ -3906,7 +3906,7 @@
     </paper>
     <paper id="15">
       <title>Verb-Second Effect on Quantifier Scope Interpretation</title>
-      <author><first>Asad</first><last>Sayeed</last></author>
+      <author id="asad-sayeed"><first>Asad</first><last>Sayeed</last></author>
       <author><first>Matthias</first><last>Lindemann</last></author>
       <author><first>Vera</first><last>Demberg</last></author>
       <pages>134–139</pages>
@@ -3967,7 +3967,7 @@
     <paper id="1">
       <title>Towards augmenting crisis counselor training by improving message retrieval</title>
       <author><first>Orianna</first><last>Demasi</last></author>
-      <author><first>Marti A.</first><last>Hearst</last></author>
+      <author id="marti-a-hearst"><first>Marti A.</first><last>Hearst</last></author>
       <author><first>Benjamin</first><last>Recht</last></author>
       <pages>1–11</pages>
       <abstract>A fundamental challenge when training counselors is presenting novices with the opportunity to practice counseling distressed individuals without exacerbating a situation. Rather than replacing human empathy with an automated counselor, we propose simulating an individual in crisis so that human counselors in training can practice crisis counseling in a low-risk environment. Towards this end, we collect a dataset of suicide prevention counselor role-play transcripts and make initial steps towards constructing a CRISISbot for humans to counsel while in training. In this data-constrained setting, we evaluate the potential for message retrieval to construct a coherent chat agent in light of recent advances with text embedding methods. Our results show that embeddings can considerably improve retrieval approaches to make them competitive with generative models. By coherently retrieving messages, we can help counselors practice chatting in a low-risk environment.</abstract>
@@ -3981,7 +3981,7 @@
       <author><first>Derrick</first><last>Hull</last></author>
       <author><first>Jacob</first><last>Levine</last></author>
       <author><first>Bonnie</first><last>Ray</last></author>
-      <author><first>Kathy</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathy</first><last>McKeown</last></author>
       <pages>12–23</pages>
       <abstract>While conversation in therapy sessions can vary widely in both topic and style, an understanding of the underlying techniques used by therapists can provide valuable insights into how therapists best help clients of different types. Dialogue act classification aims to identify the conversational “action” each speaker takes at each utterance, such as sympathizing, problem-solving or assumption checking. We propose to apply dialogue act classification to therapy transcripts, using a therapy-specific labeling scheme, in order to gain a high-level understanding of the flow of conversation in therapy sessions. We present a novel annotation scheme that spans multiple psychotherapeutic approaches, apply it to a large and diverse corpus of psychotherapy transcripts, and present and discuss classification results obtained using both SVM and neural network-based models. The results indicate that identifying the structure and flow of therapeutic actions is an obtainable goal, opening up the opportunity in the future to provide therapeutic recommendations tailored to specific client situations.</abstract>
       <url hash="84a2f0c1">W19-3002</url>
@@ -3992,7 +3992,7 @@
       <title><fixed-case>CLP</fixed-case>sych 2019 Shared Task: Predicting the Degree of Suicide Risk in <fixed-case>R</fixed-case>eddit Posts</title>
       <author><first>Ayah</first><last>Zirikly</last></author>
       <author><first>Philip</first><last>Resnik</last></author>
-      <author><first>Özlem</first><last>Uzuner</last></author>
+      <author id="ozlem-uzuner"><first>Özlem</first><last>Uzuner</last></author>
       <author><first>Kristy</first><last>Hollingshead</last></author>
       <pages>24–33</pages>
       <abstract>The shared task for the 2019 Workshop on Computational Linguistics and Clinical Psychology (CLPsych’19) introduced an assessment of suicide risk based on social media postings, using data from Reddit to identify users at no, low, moderate, or severe risk. Two variations of the task focused on users whose posts to the r/SuicideWatch subreddit indicated they might be at risk; a third task looked at screening users based only on their more everyday (non-SuicideWatch) posts. We received submissions from 15 different teams, and the results provide progress and insight into the value of language signal in helping to predict risk level.</abstract>
@@ -4020,8 +4020,8 @@
       <author><first>Huy</first><last>Vu</last></author>
       <author><first>Mohammad</first><last>Zamani</last></author>
       <author><first>Parth</first><last>Limbachiya</last></author>
-      <author><first>Sharath Chandra</first><last>Guntuku</last></author>
-      <author><first>H. Andrew</first><last>Schwartz</last></author>
+      <author id="sharath-chandra-guntuku"><first>Sharath Chandra</first><last>Guntuku</last></author>
+      <author id="h-andrew-schwartz"><first>H. Andrew</first><last>Schwartz</last></author>
       <pages>39–44</pages>
       <abstract>Mental health predictive systems typically model language as if from a single context (e.g. Twitter posts, status updates, or forum posts) and often limited to a single level of analysis (e.g. either the message-level or user-level). Here, we bring these pieces together to explore the use of open-vocabulary (BERT embeddings, topics) and theoretical features (emotional expression lexica, personality) for the task of suicide risk assessment on support forums (the CLPsych-2019 Shared Task). We used dual context based approaches (modeling content from suicide forums separate from other content), built over both traditional ML models as well as a novel dual RNN architecture with user-factor adaptation. We find that while affect from the suicide context distinguishes with no-risk from those with “any-risk”, personality factors from the non-suicide contexts provide distinction of the levels of risk: low, medium, and high risk. Within the shared task, our dual-context approach (listed as SBU-HLAB in the official results) achieved state-of-the-art performance predicting suicide risk using a combination of suicide-context and non-suicide posts (Task B), achieving an F1 score of 0.50 over hidden test set labels.</abstract>
       <url hash="706ce849">W19-3005</url>
@@ -4035,7 +4035,7 @@
       <author><first>Birkan</first><last>Tunc</last></author>
       <author><first>Casey</first><last>Zampella</last></author>
       <author><first>Edward</first><last>Brodkin</last></author>
-      <author><first>Robert</first><last>Schultz</last></author>
+      <author id="robert-t-schultz"><first>Robert</first><last>Schultz</last></author>
       <author><first>Julia</first><last>Parish-Morris</last></author>
       <pages>45–54</pages>
       <abstract>Spoken language ability is highly heterogeneous in Autism Spectrum Disorder (ASD), which complicates efforts to identify linguistic markers for use in diagnostic classification, clinical characterization, and for research and clinical outcome measurement. Machine learning techniques that harness the power of multivariate statistics and non-linear data analysis hold promise for modeling this heterogeneity, but many models require enormous datasets, which are unavailable for most psychiatric conditions (including ASD). In lieu of such datasets, good models can still be built by leveraging domain knowledge. In this study, we compare two machine learning approaches: the first approach incorporates prior knowledge about language variation across middle childhood, adolescence, and adulthood to classify 6-minute naturalistic conversation samples from 140 age- and IQ-matched participants (81 with ASD), while the other approach treats all ages the same. We found that individual age-informed models were significantly more accurate than a single model tasked with building a common algorithm across age groups. Furthermore, predictive linguistic features differed significantly by age group, confirming the importance of considering age-related changes in language use when classifying ASD. Our results suggest that limitations imposed by heterogeneity inherent to ASD and from developmental change with age can be (at least partially) overcome using domain knowledge, such as understanding spoken language development from childhood through adulthood.</abstract>
@@ -4045,7 +4045,7 @@
     </paper>
     <paper id="7">
       <title>The importance of sharing patient-generated clinical speech and language data</title>
-      <author><first>Kathleen C.</first><last>Fraser</last></author>
+      <author id="kathleen-c-fraser"><first>Kathleen C.</first><last>Fraser</last></author>
       <author><first>Nicklas</first><last>Linz</last></author>
       <author><first>Hali</first><last>Lindsay</last></author>
       <author><first>Alexandra</first><last>König</last></author>
@@ -4058,7 +4058,7 @@
     <paper id="8">
       <title>Depressed Individuals Use Negative Self-Focused Language When Recalling Recent Interactions with Close Romantic Partners but Not Family or <fixed-case>F</fixed-case>riends</title>
       <author><first>Taleen</first><last>Nalabandian</last></author>
-      <author><first>Molly</first><last>Ireland</last></author>
+      <author id="molly-ireland"><first>Molly</first><last>Ireland</last></author>
       <pages>62–73</pages>
       <abstract>Depression is characterized by a self-focused negative attentional bias, which is often reflected in everyday language use. In a prospective writing study, we explored whether the association between depressive symptoms and negative, self-focused language varies across social contexts. College students (N = 243) wrote about a recent interaction with a person they care deeply about. Depression symptoms positively correlated with negative emotion words and first-person singular pronouns (or negative self-focus) when writing about a recent interaction with romantic partners or, to a lesser extent, friends, but not family members. The pattern of results was more pronounced when participants perceived greater self-other overlap (i.e., interpersonal closeness) with their romantic partner. Findings regarding how the linguistic profile of depression differs by type of relationship may inform more effective methods of clinical diagnosis and treatment.</abstract>
       <url hash="c13848e0">W19-3008</url>
@@ -4128,7 +4128,7 @@
     <paper id="14">
       <title>Reviving a psychometric measure: Classification and prediction of the Operant Motive Test</title>
       <author><first>Dirk</first><last>Johannßen</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <author><first>David</first><last>Scheffer</last></author>
       <pages>121–125</pages>
       <abstract>Implicit motives allow for the characterization of behavior, subsequent success and long-term development. While this has been operationalized in the operant motive test, research on motives has declined mainly due to labor-intensive and costly human annotation. In this study, we analyze over 200,000 labeled data items from 40,000 participants and utilize them for engineering features for training a logistic model tree machine learning model. It captures manually assigned motives well with an F-score of 80%, coming close to the pairwise annotator intraclass correlation coefficient of r = .85. In addition, we found a significant correlation of r = .2 between subsequent academic success and data automatically labeled with our model in an extrinsic evaluation.</abstract>
@@ -4155,7 +4155,7 @@
     <paper id="16">
       <title>Overcoming the bottleneck in traditional assessments of verbal memory: Modeling human ratings and classifying clinical group membership</title>
       <author><first>Chelsea</first><last>Chandler</last></author>
-      <author><first>Peter W.</first><last>Foltz</last></author>
+      <author id="peter-foltz"><first>Peter W.</first><last>Foltz</last></author>
       <author><first>Jian</first><last>Cheng</last></author>
       <author><first>Jared C.</first><last>Bernstein</last></author>
       <author><first>Elizabeth P.</first><last>Rosenfeld</last></author>
@@ -4274,7 +4274,7 @@
       <title>Dictionaries and Decision Trees for the 2019 <fixed-case>CLP</fixed-case>sych Shared Task</title>
       <author><first>Micah</first><last>Iserman</last></author>
       <author><first>Taleen</first><last>Nalabandian</last></author>
-      <author><first>Molly</first><last>Ireland</last></author>
+      <author id="molly-ireland"><first>Molly</first><last>Ireland</last></author>
       <pages>188–194</pages>
       <abstract>In this summary, we discuss our approach to the CLPsych Shared Task and its initial results. For our predictions in each task, we used a recursive partitioning algorithm (decision trees) to select from our set of features, which were primarily dictionary scores and counts of individual words. We focused primarily on Task A, which aimed to predict suicide risk, as rated by a team of expert clinicians (Shing et al., 2018), based on language used in SuicideWatch posts on Reddit. Category-level findings highlight the potential importance of social and moral language categories. Word-level correlates of risk levels underline the value of fine-grained data-driven approaches, revealing both theory-consistent and potentially novel correlates of suicide risk that may motivate future research.</abstract>
       <url hash="b6084b0f">W19-3025</url>
@@ -4326,7 +4326,7 @@
     <paper id="4">
       <title>Bottom-Up Unranked Tree-to-Graph Transducers for Translation into Semantic Graphs</title>
       <author><first>Johanna</first><last>Björklund</last></author>
-      <author><first>Shay B.</first><last>Cohen</last></author>
+      <author id="shay-b-cohen"><first>Shay B.</first><last>Cohen</last></author>
       <author><first>Frank</first><last>Drewes</last></author>
       <author><first>Giorgio</first><last>Satta</last></author>
       <pages>7–17</pages>
@@ -4339,7 +4339,7 @@
       <title>On the Compression of Lexicon Transducers</title>
       <author><first>Marco</first><last>Cognetta</last></author>
       <author><first>Cyril</first><last>Allauzen</last></author>
-      <author><first>Michael</first><last>Riley</last></author>
+      <author id="michael-riley"><first>Michael</first><last>Riley</last></author>
       <pages>18–26</pages>
       <url hash="65a06360">W19-3105</url>
       <abstract>In finite-state language processing pipelines, a lexicon is often a key component. It needs to be comprehensive to ensure accuracy, reducing out-of-vocabulary misses. However, in memory-constrained environments (e.g., mobile phones), the size of the component automata must be kept small. Indeed, a delicate balance between comprehensiveness, speed, and memory must be struck to conform to device requirements while providing a good user experience.
@@ -4393,7 +4393,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
       <title>A Syntactically Expressive Morphological Analyzer for <fixed-case>T</fixed-case>urkish</title>
       <author><first>Adnan</first><last>Ozturel</last></author>
       <author><first>Tolga</first><last>Kayadelen</last></author>
-      <author><first>Isin</first><last>Demirsahin</last></author>
+      <author id="isin-demirsahin"><first>Isin</first><last>Demirsahin</last></author>
       <pages>65–75</pages>
       <url hash="6d0e2e09">W19-3110</url>
       <abstract>We present a broad coverage model of Turkish morphology and an open-source morphological analyzer that implements it. The model captures intricacies of Turkish morphology-syntax interface, thus could be used as a baseline that guides language model development. It introduces a novel fine part-of-speech tagset, a fine-grained affix inventory and represents morphotactics without zero-derivations. The morphological analyzer is freely available. It consists of modular reusable components of human-annotated gold standard lexicons, implements Turkish morphotactics as finite-state transducers using OpenFst and morphophonemic processes as Thrax grammars.</abstract>
@@ -4415,7 +4415,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
       <title>Distilling weighted finite automata from arbitrary probabilistic models</title>
       <author><first>Ananda Theertha</first><last>Suresh</last></author>
       <author><first>Brian</first><last>Roark</last></author>
-      <author><first>Michael</first><last>Riley</last></author>
+      <author id="michael-riley"><first>Michael</first><last>Riley</last></author>
       <author><first>Vlad</first><last>Schogol</last></author>
       <pages>87–97</pages>
       <url hash="dc721e54">W19-3112</url>
@@ -4437,7 +4437,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
       <author><first>Lawrence</first><last>Wolf-Sonkin</last></author>
       <author><first>Vlad</first><last>Schogol</last></author>
       <author><first>Brian</first><last>Roark</last></author>
-      <author><first>Michael</first><last>Riley</last></author>
+      <author id="michael-riley"><first>Michael</first><last>Riley</last></author>
       <pages>108–117</pages>
       <url hash="fa598695">W19-3114</url>
       <abstract>The use of the Latin script for text entry of South Asian languages is common, even though there is no standard orthography for these languages in the script. We explore several compact finite-state architectures that permit variable spellings of words during mobile text entry. We find that approaches making use of transliteration transducers provide large accuracy improvements over baselines, but that simpler approaches involving a compact representation of many attested alternatives yields much of the accuracy gain. This is particularly important when operating under constraints on model size (e.g., on inexpensive mobile devices with limited storage and memory for keyboard models), and on speed of inference, since people typing on mobile keyboards expect no perceptual delay in keyboard responsiveness.</abstract>
@@ -4458,8 +4458,8 @@ In this paper, we describe a compression scheme for lexicons when represented as
     <meta>
       <booktitle>Proceedings of the Fourth Social Media Mining for Health Applications (#SMM4H) Workshop &amp; Shared Task</booktitle>
       <url hash="56755d93">W19-32</url>
-      <editor><first>Davy</first><last>Weissenbacher</last></editor>
-      <editor><first>Graciela</first><last>Gonzalez-Hernandez</last></editor>
+      <editor id="davy-weissenbacher"><first>Davy</first><last>Weissenbacher</last></editor>
+      <editor id="graciela-gonzalez"><first>Graciela</first><last>Gonzalez-Hernandez</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Florence, Italy</address>
       <month>August</month>
@@ -4475,7 +4475,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
       <author><first>Kai</first><last>He</last></author>
       <author><first>Jialun</first><last>Wu</last></author>
       <author><first>Xiaoyong</first><last>Ma</last></author>
-      <author id="chong-zhang"><first>Chong</first><last>Zhang</last></author>
+      <author><first>Chong</first><last>Zhang</last></author>
       <author><first>Ming</first><last>Huang</last></author>
       <author><first>Chen</first><last>Li</last></author>
       <author><first>Lixia</first><last>Yao</last></author>
@@ -4503,7 +4503,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
       <author><first>Arjun</first><last>Magge</last></author>
       <author><first>Ashlynn</first><last>Daughton</last></author>
       <author><first>Karen</first><last>O’Connor</last></author>
-      <author><first>Michael J.</first><last>Paul</last></author>
+      <author id="michael-paul"><first>Michael J.</first><last>Paul</last></author>
       <author><first>Graciela</first><last>Gonzalez-Hernandez</last></author>
       <pages>21–30</pages>
       <abstract>The number of users of social media continues to grow, with nearly half of adults worldwide and two-thirds of all American adults using social networking. Advances in automated data processing, machine learning and NLP present the possibility of utilizing this massive data source for biomedical and public health applications, if researchers address the methodological challenges unique to this media. We present the Social Media Mining for Health Shared Tasks collocated with the ACL at Florence in 2019, which address these challenges for health monitoring and surveillance, utilizing state of the art techniques for processing noisy, real-world, and substantially creative language expressions from social media users. For the fourth execution of this challenge, we proposed four different tasks. Task 1 asked participants to distinguish tweets reporting an adverse drug reaction (ADR) from those that do not. Task 2, a follow-up to Task 1, asked participants to identify the span of text in tweets reporting ADRs. Task 3 is an end-to-end task where the goal was to first detect tweets mentioning an ADR and then map the extracted colloquial mentions of ADRs in the tweets to their corresponding standard concept IDs in the MedDRA vocabulary. Finally, Task 4 asked participants to classify whether a tweet contains a personal mention of one’s health, a more general discussion of the health issue, or is an unrelated mention. A total of 34 teams from around the world registered and 19 teams from 12 countries submitted a system run. We summarize here the corpora for this challenge which are freely available at <url>https://competitions.codalab.org/competitions/22521</url>, and present an overview of the methods and the results of the competing systems.</abstract>
@@ -4515,7 +4515,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
       <title><fixed-case>M</fixed-case>ed<fixed-case>N</fixed-case>orm: A Corpus and Embeddings for Cross-terminology Medical Concept Normalisation</title>
       <author><first>Maksim</first><last>Belousov</last></author>
       <author><first>William G.</first><last>Dixon</last></author>
-      <author><first>Goran</first><last>Nenadic</last></author>
+      <author id="goran-nenadic"><first>Goran</first><last>Nenadic</last></author>
       <pages>31–39</pages>
       <abstract>The medical concept normalisation task aims to map textual descriptions to standard terminologies such as SNOMED-CT or MedDRA. Existing publicly available datasets annotated using different terminologies cannot be simply merged and utilised, and therefore become less valuable when developing machine learning-based concept normalisation systems. To address that, we designed a data harmonisation pipeline and engineered a corpus of 27,979 textual descriptions simultaneously mapped to both MedDRA and SNOMED-CT, sourced from five publicly available datasets across biomedical and social media domains. The pipeline can be used in the future to integrate new datasets into the corpus and also could be applied in relevant data curation tasks. We also described a method to merge different terminologies into a single concept graph preserving their relations and demonstrated that representation learning approach based on random walks on a graph can efficiently encode both hierarchical and equivalent relations and capture semantic similarities not only between concepts inside a given terminology but also between concepts from different terminologies. We believe that making a corpus and embeddings for cross-terminology medical concept normalisation available to the research community would contribute to a better understanding of the task.</abstract>
       <url hash="3c8998b2">W19-3204</url>
@@ -4526,7 +4526,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
       <title>Passive Diagnosis Incorporating the <fixed-case>PHQ</fixed-case>-4 for Depression and Anxiety</title>
       <author><first>Fionn</first><last>Delahunty</last></author>
       <author><first>Robert</first><last>Johansson</last></author>
-      <author><first>Mihael</first><last>Arcan</last></author>
+      <author id="mihael-arcan"><first>Mihael</first><last>Arcan</last></author>
       <pages>40–46</pages>
       <abstract>Depression and anxiety are the two most prevalent mental health disorders worldwide, impacting the lives of millions of people each year. In this work, we develop and evaluate a multilabel, multidimensional deep neural network designed to predict PHQ-4 scores based on individuals written text. Our system outperforms random baseline metrics and provides a novel approach to how we can predict psychometric scores from written text. Additionally, we explore how this architecture can be applied to analyse social media data.</abstract>
       <url hash="58115491">W19-3205</url>
@@ -4598,7 +4598,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
     <paper id="11">
       <title>Affective Behaviour Analysis of On-line User Interactions: Are On-line Support Groups More Therapeutic than <fixed-case>T</fixed-case>witter?</title>
       <author><first>Giuliano</first><last>Tortoreto</last></author>
-      <author><first>Evgeny</first><last>Stepanov</last></author>
+      <author id="evgeny-stepanov"><first>Evgeny</first><last>Stepanov</last></author>
       <author><first>Alessandra</first><last>Cervone</last></author>
       <author><first>Mateusz</first><last>Dubiel</last></author>
       <author><first>Giuseppe</first><last>Riccardi</last></author>
@@ -4644,7 +4644,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
     <paper id="15">
       <title>Deep Learning for Identification of Adverse Effect Mentions In <fixed-case>T</fixed-case>witter Data</title>
       <author><first>Paul</first><last>Barry</last></author>
-      <author><first>Ozlem</first><last>Uzuner</last></author>
+      <author id="ozlem-uzuner"><first>Ozlem</first><last>Uzuner</last></author>
       <pages>99–101</pages>
       <abstract>Social Media Mining for Health Applications (SMM4H) Adverse Effect Mentions Shared Task challenges participants to accurately identify spans of text within a tweet that correspond to Adverse Effects (AEs) resulting from medication usage (Weissenbacher et al., 2019). This task features a training data set of 2,367 tweets, in addition to a 1,000 tweet evaluation data set. The solution presented here features a bidirectional Long Short-term Memory Network (bi-LSTM) for the generation of character-level embeddings. It uses a second bi-LSTM trained on both character and token level embeddings to feed a Conditional Random Field (CRF) which provides the final classification. This paper further discusses the deep learning algorithms used in our solution.</abstract>
       <url hash="06e3c99f">W19-3215</url>
@@ -4655,8 +4655,8 @@ In this paper, we describe a compression scheme for lexicons when represented as
       <title>Using Machine Learning and Deep Learning Methods to Find Mentions of Adverse Drug Reactions in Social Media</title>
       <author><first>Pilar</first><last>López Úbeda</last></author>
       <author><first>Manuel Carlos</first><last>Díaz Galiano</last></author>
-      <author><first>Maite</first><last>Martin</last></author>
-      <author><first>L. Alfonso</first><last>Urena Lopez</last></author>
+      <author id="m-teresa-martin-valdivia"><first>Maite</first><last>Martin</last></author>
+      <author id="l-alfonso-urena-lopez"><first>L. Alfonso</first><last>Urena Lopez</last></author>
       <pages>102–106</pages>
       <abstract>Over time the use of social networks is becoming very popular platforms for sharing health related information. Social Media Mining for Health Applications (SMM4H) provides tasks such as those described in this document to help manage information in the health domain. This document shows the first participation of the SINAI group. We study approaches based on machine learning and deep learning to extract adverse drug reaction mentions from Twitter. The results obtained in the tasks are encouraging, we are close to the average of all participants and even above in some cases.</abstract>
       <url hash="1802740a">W19-3216</url>
@@ -4749,7 +4749,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
       <author><first>Simra</first><last>Shahid</last></author>
       <author><first>Laiba</first><last>Mehnaz</last></author>
       <author><first>Yaman</first><last>Kumar</last></author>
-      <author><first>Rajiv Ratn</first><last>Shah</last></author>
+      <author id="rajiv-shah"><first>Rajiv Ratn</first><last>Shah</last></author>
       <pages>127–132</pages>
       <abstract>In this paper, we present our approach and the system description for the Social Media Mining for Health Applications (SMM4H) Shared Task 1,2 and 4 (2019). Our main contribution is to show the effectiveness of Transfer Learning approaches like BERT and ULMFiT, and how they generalize for the classification tasks like identification of adverse drug reaction mentions and reporting of personal health problems in tweets. We show the use of stacked embeddings combined with BLSTM+CRF tagger for identifying spans mentioning adverse drug reactions in tweets. We also show that these approaches perform well even with imbalanced dataset in comparison to undersampling and oversampling.</abstract>
       <url hash="16a210ff">W19-3223</url>
@@ -4759,7 +4759,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
     <paper id="24">
       <title>Detection of Adverse Drug Reaction in Tweets Using a Combination of Heterogeneous Word Embeddings</title>
       <author><first>Segun Taofeek</first><last>Aroyehun</last></author>
-      <author><first>Alexander</first><last>Gelbukh</last></author>
+      <author id="alexander-gelbukh"><first>Alexander</first><last>Gelbukh</last></author>
       <pages>133–135</pages>
       <abstract>This paper details our approach to the task of detecting reportage of adverse drug reaction in tweets as part of the 2019 social media mining for healthcare applications shared task. We employed a combination of three types of word representations as input to a LSTM model. With this approach, we achieved an F1 score of 0.5209.</abstract>
       <url hash="da778f65">W19-3224</url>
@@ -4785,10 +4785,10 @@ In this paper, we describe a compression scheme for lexicons when represented as
       <url hash="4ec4432d">W19-33</url>
       <editor><first>Nianwen</first><last>Xue</last></editor>
       <editor><first>William</first><last>Croft</last></editor>
-      <editor><first>Jan</first><last>Hajic</last></editor>
+      <editor id="jan-hajic"><first>Jan</first><last>Hajic</last></editor>
       <editor><first>Chu-Ren</first><last>Huang</last></editor>
       <editor><first>Stephan</first><last>Oepen</last></editor>
-      <editor><first>Martha</first><last>Palmer</last></editor>
+      <editor id="martha-palmer"><first>Martha</first><last>Palmer</last></editor>
       <editor><first>James</first><last>Pustejovksy</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Florence, Italy</address>
@@ -4826,7 +4826,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
     </paper>
     <paper id="3">
       <title>Modeling Quantification and Scope in <fixed-case>A</fixed-case>bstract <fixed-case>M</fixed-case>eaning <fixed-case>R</fixed-case>epresentations</title>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <author><first>Ken</first><last>Lai</last></author>
       <author><first>Nianwen</first><last>Xue</last></author>
       <pages>28–33</pages>
@@ -4849,7 +4849,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
       <title><fixed-case>GKR</fixed-case>: Bridging the Gap between Symbolic/structural and Distributional Meaning Representations</title>
       <author><first>Aikaterini-Lida</first><last>Kalouli</last></author>
       <author><first>Richard</first><last>Crouch</last></author>
-      <author><first>Valeria</first><last>de Paiva</last></author>
+      <author id="valeria-de-paiva"><first>Valeria</first><last>de Paiva</last></author>
       <pages>44–55</pages>
       <abstract>Three broad approaches have been attempted to combine distributional and structural/symbolic aspects to construct meaning representations: a) injecting linguistic features into distributional representations, b) injecting distributional features into symbolic representations or c) combining structural and distributional features in the final representation. This work focuses on an example of the third and less studied approach: it extends the Graphical Knowledge Representation (GKR) to include distributional features and proposes a division of semantic labour between the distributional and structural/symbolic features. We propose two extensions of GKR that clearly show this division and empirically test one of the proposals on an NLI dataset with hard compositional pairs.</abstract>
       <url hash="867010d1">W19-3305</url>
@@ -4865,7 +4865,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
       <author><first>Graeme</first><last>McGuire</last></author>
       <author><first>Sophie</first><last>Sackstein</last></author>
       <author><first>Georgiy</first><last>Platonov</last></author>
-      <author><first>Lenhart</first><last>Schubert</last></author>
+      <author id="lenhart-schubert"><first>Lenhart</first><last>Schubert</last></author>
       <pages>56–65</pages>
       <abstract>Unscoped episodic logical form (ULF) is a semantic representation capturing the predicate-argument structure of English within the episodic logic formalism in relation to the syntactic structure, while leaving scope, word sense, and anaphora unresolved. We describe how ULF can be used to generate natural language inferences that are grounded in the semantic and syntactic structure through a small set of rules defined over interpretable predicates and transformations on ULFs. The semantic restrictions placed by ULF semantic types enables us to ensure that the inferred structures are semantically coherent while the nearness to syntax enables accurate mapping to English. We demonstrate these inferences on four classes of conversationally-oriented inferences in a mixed genre dataset with 68.5% precision from human judgments.</abstract>
       <url hash="0bcb16e1">W19-3306</url>
@@ -4874,7 +4874,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
     </paper>
     <paper id="7">
       <title>A Plea for Information Structure as a Part of Meaning Representation</title>
-      <author><first>Eva</first><last>Hajicova</last></author>
+      <author id="eva-hajicova"><first>Eva</first><last>Hajicova</last></author>
       <pages>66–72</pages>
       <abstract>The view that the representation of information structure (IS) should be a part of (any type of) representation of meaning is based on the fact that IS is a semantically relevant phenomenon. In the contribution, three arguments supporting this view are briefly summarized, namely, the relation of IS to the interpretation of negation and presupposition, the relevance of IS to the understanding of discourse connectivity and for the establishment and interpretation of coreference relations. Afterwards, possible integration of the description of the main ingredient of IS into a meaning representation is illustrated.</abstract>
       <url hash="2f1d36db">W19-3307</url>
@@ -4893,7 +4893,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
     </paper>
     <paper id="9">
       <title>Meta-Semantic Representation for Early Detection of <fixed-case>A</fixed-case>lzheimer’s Disease</title>
-      <author><first>Jinho D.</first><last>Choi</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
       <author><first>Mengmei</first><last>Li</last></author>
       <author><first>Felicia</first><last>Goldstein</last></author>
       <author><first>Ihab</first><last>Hajjar</last></author>
@@ -4942,7 +4942,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
     </paper>
     <paper id="13">
       <title>Meaning Representation of Null Instantiated Semantic Roles in <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et</title>
-      <author><first>Miriam R L</first><last>Petruck</last></author>
+      <author id="miriam-r-l-petruck"><first>Miriam R L</first><last>Petruck</last></author>
       <pages>121–127</pages>
       <abstract>Humans have the unique ability to infer information about participants in a scene, even if they are not mentioned in a text about that scene. Computer systems cannot do so without explicit information about those participants. This paper addresses the linguistic phenomenon of null-instantiated frame elements, i.e., implicit semantic roles, and their representation in FrameNet (FN). It motivates FN’s annotation practice, and illustrates three types of null-instantiated arguments that FrameNet tracks, noting that other lexical resources do not record such semantic-pragmatic information, despite its need in natural language understanding (NLU), and the elaborate efforts to create new datasets. It challenges the community to appeal to FN data to develop more sophisticated techniques for recognizing implicit semantic roles, and creating needed datasets. Although the annotation of null-instantiated roles was lexicographically motivated, FN provides useful information for text processing, and therefore must be considered in the design of any meaning representation for natural language understanding.</abstract>
       <url hash="b5184c10">W19-3313</url>
@@ -4973,7 +4973,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
     </paper>
     <paper id="16">
       <title>Preparing <fixed-case>SNACS</fixed-case> for Subjects and Objects</title>
-      <author><first>Adi</first><last>Shalev</last></author>
+      <author id="adi-shalev"><first>Adi</first><last>Shalev</last></author>
       <author><first>Jena D.</first><last>Hwang</last></author>
       <author><first>Nathan</first><last>Schneider</last></author>
       <author><first>Vivek</first><last>Srikumar</last></author>
@@ -4998,11 +4998,11 @@ In this paper, we describe a compression scheme for lexicons when represented as
     </paper>
     <paper id="18">
       <title><fixed-case>V</fixed-case>erb<fixed-case>N</fixed-case>et Representations: Subevent Semantics for Transfer Verbs</title>
-      <author><first>Susan Windisch</first><last>Brown</last></author>
+      <author id="susan-windisch-brown"><first>Susan Windisch</first><last>Brown</last></author>
       <author><first>Julia</first><last>Bonn</last></author>
       <author><first>James</first><last>Gung</last></author>
       <author><first>Annie</first><last>Zaenen</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <author><first>Martha</first><last>Palmer</last></author>
       <pages>154–163</pages>
       <abstract>This paper announces the release of a new version of the English lexical resource VerbNet with substantially revised semantic representations designed to facilitate computer planning and reasoning based on human language. We use the transfer of possession and transfer of information event representations to illustrate both the general framework of the representations and the types of nuances the new representations can capture. These representations use a Generative Lexicon-inspired subevent structure to track attributes of event participants across time, highlighting oppositions and temporal and causal relations among the subevents.</abstract>
@@ -5045,13 +5045,13 @@ In this paper, we describe a compression scheme for lexicons when represented as
     </paper>
     <paper id="22">
       <title>Augmenting <fixed-case>A</fixed-case>bstract <fixed-case>M</fixed-case>eaning <fixed-case>R</fixed-case>epresentation for Human-Robot Dialogue</title>
-      <author><first>Claire</first><last>Bonial</last></author>
+      <author id="claire-bonial"><first>Claire</first><last>Bonial</last></author>
       <author><first>Lucia</first><last>Donatelli</last></author>
-      <author><first>Stephanie M.</first><last>Lukin</last></author>
+      <author id="stephanie-lukin"><first>Stephanie M.</first><last>Lukin</last></author>
       <author><first>Stephen</first><last>Tratz</last></author>
       <author><first>Ron</first><last>Artstein</last></author>
-      <author><first>David</first><last>Traum</last></author>
-      <author><first>Clare</first><last>Voss</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
+      <author id="clare-voss"><first>Clare</first><last>Voss</last></author>
       <pages>199–210</pages>
       <abstract>We detail refinements made to Abstract Meaning Representation (AMR) that make the representation more suitable for supporting a situated dialogue system, where a human remotely controls a robot for purposes of search and rescue and reconnaissance. We propose 36 augmented AMRs that capture speech acts, tense and aspect, and spatial information. This linguistic information is vital for representing important distinctions, for example whether the robot has moved, is moving, or will move. We evaluate two existing AMR parsers for their performance on dialogue data. We also outline a model for graph-to-graph conversion, in which output from AMR parsers is converted into our refined AMRs. The design scheme presented here, though task-specific, is extendable for broad coverage of speech acts using AMR in future task-independent work.</abstract>
       <url hash="5e764134">W19-3322</url>
@@ -5064,8 +5064,8 @@ In this paper, we describe a compression scheme for lexicons when represented as
       <booktitle>Proceedings of the Second Workshop on Storytelling</booktitle>
       <url hash="11250637">W19-34</url>
       <editor><first>Francis</first><last>Ferraro</last></editor>
-      <editor><first>Ting-Hao ‘Kenneth’</first><last>Huang</last></editor>
-      <editor><first>Stephanie M.</first><last>Lukin</last></editor>
+      <editor id="ting-hao-huang"><first>Ting-Hao ‘Kenneth’</first><last>Huang</last></editor>
+      <editor id="stephanie-lukin"><first>Stephanie M.</first><last>Lukin</last></editor>
       <editor><first>Margaret</first><last>Mitchell</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Florence, Italy</address>
@@ -5095,7 +5095,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
       <author><first>Khyathi</first><last>Chandu</last></author>
       <author><first>Shrimai</first><last>Prabhumoye</last></author>
       <author><first>Ruslan</first><last>Salakhutdinov</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <pages>11–21</pages>
       <abstract>Visual storytelling is the task of generating stories based on a sequence of images. Inspired by the recent works in neural generation focusing on controlling the form of text, this paper explores the idea of generating these stories in different personas. However, one of the main challenges of performing this task is the lack of a dataset of visual stories in different personas. Having said that, there are independent datasets for both visual storytelling and annotated sentences for various persona. In this paper we describe an approach to overcome this by getting labelled persona data from a different task and leveraging those annotations to perform persona based story generation. We inspect various ways of incorporating personality in both the encoder and the decoder representations to steer the generation in the target direction. To this end, we propose five models which are incremental extensions to the baseline model to perform the task at hand. In our experiments we use five different personas to guide the generation process. We find that the models based on our hypotheses perform better at capturing words while generating stories in the target persona.</abstract>
       <url hash="4a9f63be">W19-3402</url>
@@ -5107,7 +5107,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
       <author><first>Xinru</first><last>Yan</last></author>
       <author><first>Aakanksha</first><last>Naik</last></author>
       <author><first>Yohan</first><last>Jo</last></author>
-      <author><first>Carolyn</first><last>Rose</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rose</last></author>
       <pages>22–33</pages>
       <abstract>We propose a novel take on understanding narratives in social media, focusing on learning ”functional story schemas”, which consist of sets of stereotypical functional structures. We develop an unsupervised pipeline to extract schemas and apply our method to Reddit posts to detect schematic structures that are characteristic of different subreddits. We validate our schemas through human interpretation and evaluate their utility via a text classification task. Our experiments show that extracted schemas capture distinctive structural patterns in different subreddits, improving classification performance of several models by 2.4% on average. We also observe that these schemas serve as lenses that reveal community norms.</abstract>
       <url hash="ca78fd99">W19-3403</url>
@@ -5120,7 +5120,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
       <author><first>Vera</first><last>Demberg</last></author>
       <author><first>Pavel</first><last>Shkadzko</last></author>
       <author><first>Wei</first><last>Shi</last></author>
-      <author><first>Asad</first><last>Sayeed</last></author>
+      <author id="asad-sayeed"><first>Asad</first><last>Sayeed</last></author>
       <pages>34–45</pages>
       <abstract>Automatically generating globally coherent stories is a challenging problem. Neural text generation models have been shown to perform well at generating fluent sentences from data, but they usually fail to keep track of the overall coherence of the story after a couple of sentences. Existing work that incorporates a text planning module succeeded in generating recipes and dialogues, but appears quite data-demanding. We propose a novel story generation approach that generates globally coherent stories from a fairly small corpus. The model exploits a symbolic text planning module to produce text plans, thus reducing the demand of data; a neural surface realization module then generates fluent text conditioned on the text plan. Human evaluation showed that our model outperforms various baselines by a wide margin and generates stories which are fluent as well as globally coherent.</abstract>
       <url hash="c4325e39">W19-3404</url>
@@ -5155,7 +5155,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
     <paper id="7">
       <title><fixed-case>N</fixed-case>arrative <fixed-case>G</fixed-case>eneration in the <fixed-case>W</fixed-case>ild: <fixed-case>M</fixed-case>ethods from <fixed-case>N</fixed-case>a<fixed-case>N</fixed-case>o<fixed-case>G</fixed-case>en<fixed-case>M</fixed-case>o</title>
       <author><first>Judith</first><last>van Stegeren</last></author>
-      <author><first>Mariët</first><last>Theune</last></author>
+      <author id="mariet-theune"><first>Mariët</first><last>Theune</last></author>
       <pages>65–74</pages>
       <abstract>In text generation, generating long stories is still a challenge. Coherence tends to decrease rapidly as the output length increases. Especially for generated stories, coherence of the narrative is an important quality aspect of the output text. In this paper we examine how narrative coherence is attained in the submissions of NaNoGenMo 2018, an online text generation event where participants are challenged to generate a 50,000 word novel. We list the main approaches that were used to generate coherent narratives and link them to scientific literature. Finally, we give recommendations on when to use which approach.</abstract>
       <url hash="4c16502c">W19-3407</url>
@@ -5176,7 +5176,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
       <title>A Simple Approach to Classify Fictional and Non-Fictional Genres</title>
       <author><first>Mohammed Rameez</first><last>Qureshi</last></author>
       <author><first>Sidharth</first><last>Ranjan</last></author>
-      <author><first>Rajakrishnan</first><last>Rajkumar</last></author>
+      <author id="rajakrishnan-rajkumar"><first>Rajakrishnan</first><last>Rajkumar</last></author>
       <author><first>Kushal</first><last>Shah</last></author>
       <pages>81–89</pages>
       <abstract>In this work, we deploy a logistic regression classifier to ascertain whether a given document belongs to the fiction or non-fiction genre. For genre identification, previous work had proposed three classes of features, viz., low-level (character-level and token counts), high-level (lexical and syntactic information) and derived features (type-token ratio, average word length or average sentence length). Using the Recursive feature elimination with cross-validation (RFECV) algorithm, we perform feature selection experiments on an exhaustive set of nineteen features (belonging to all the classes mentioned above) extracted from Brown corpus text. As a result, two simple features viz., the ratio of the number of adverbs to adjectives and the number of adjectives to pronouns turn out to be the most significant. Subsequently, our classification experiments aimed towards genre identification of documents from the Brown and Baby BNC corpora demonstrate that the performance of a classifier containing just the two aforementioned features is at par with that of a classifier containing the exhaustive feature set.</abstract>
@@ -5208,7 +5208,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
     <paper id="12">
       <title>Winter is here: Summarizing <fixed-case>T</fixed-case>witter Streams related to Pre-Scheduled Events</title>
       <author><first>Anietie</first><last>Andy</last></author>
-      <author><first>Derry Tanti</first><last>Wijaya</last></author>
+      <author id="derry-tanti-wijaya"><first>Derry Tanti</first><last>Wijaya</last></author>
       <author><first>Chris</first><last>Callison-Burch</last></author>
       <pages>112–116</pages>
       <abstract>Pre-scheduled events, such as TV shows and sports games, usually garner considerable attention from the public. Twitter captures large volumes of discussions and messages related to these events, in real-time. Twitter streams related to pre-scheduled events are characterized by the following: (1) spikes in the volume of published tweets reflect the highlights of the event and (2) some of the published tweets make reference to the characters involved in the event, in the context in which they are currently portrayed in a subevent. In this paper, we take advantage of these characteristics to identify the highlights of pre-scheduled events from tweet streams and we demonstrate a method to summarize these highlights. We evaluate our algorithm on tweets collected around 2 episodes of a popular TV show, Game of Thrones, Season 7.</abstract>
@@ -5223,7 +5223,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
       <author><first>Prakhar</first><last>Gupta</last></author>
       <author><first>Vinayshekhar</first><last>Bannihatti Kumar</last></author>
       <author><first>Mukul</first><last>Bhutani</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <pages>117–126</pages>
       <abstract>We study the problem of generating interesting endings for stories. Neural generative models have shown promising results for various text generation problems. Sequence to Sequence (Seq2Seq) models are typically trained to generate a single output sequence for a given input sequence. However, in the context of a story, multiple endings are possible. Seq2Seq models tend to ignore the context and generate generic and dull responses. Very few works have studied generating diverse and interesting story endings for the same story context. In this paper, we propose models which generate more diverse and interesting outputs by 1) training models to focus attention on important keyphrases of the story, and 2) promoting generating nongeneric words. We show that the combination of the two leads to more interesting endings.</abstract>
       <url hash="48c2a419">W19-3413</url>
@@ -5247,9 +5247,9 @@ In this paper, we describe a compression scheme for lexicons when represented as
       <booktitle>Proceedings of the Third Workshop on Abusive Language Online</booktitle>
       <url hash="b3b427c2">W19-35</url>
       <editor><first>Sarah T.</first><last>Roberts</last></editor>
-      <editor><first>Joel</first><last>Tetreault</last></editor>
+      <editor id="joel-tetreault"><first>Joel</first><last>Tetreault</last></editor>
       <editor><first>Vinodkumar</first><last>Prabhakaran</last></editor>
-      <editor><first>Zeerak</first><last>Waseem</last></editor>
+      <editor id="zeerak-talat"><first>Zeerak</first><last>Waseem</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Florence, Italy</address>
       <month>August</month>
@@ -5289,7 +5289,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
       <title>Detecting harassment in real-time as conversations develop</title>
       <author><first>Wessel</first><last>Stoop</last></author>
       <author><first>Florian</first><last>Kunneman</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <author><first>Ben</first><last>Miller</last></author>
       <pages>19–24</pages>
       <abstract>We developed a machine-learning-based method to detect video game players that harass teammates or opponents in chat earlier in the conversation. This real-time technology would allow gaming companies to intervene during games, such as issue warnings or muting or banning a player. In a proof-of-concept experiment on League of Legends data we compute and visualize evaluation metrics for a machine learning classifier as conversations unfold, and observe that the optimal precision and recall of detecting toxic players at each moment in the conversation depends on the confidence threshold of the classifier: the threshold should start low, and increase as the conversation unfolds. How fast this sliding threshold should increase depends on the training set size.</abstract>
@@ -5336,7 +5336,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
     <paper id="7">
       <title>The Discourse of Online Content Moderation: Investigating Polarized User Responses to Changes in <fixed-case>R</fixed-case>eddit’s Quarantine Policy</title>
       <author><first>Qinlan</first><last>Shen</last></author>
-      <author><first>Carolyn</first><last>Rose</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rose</last></author>
       <pages>58–69</pages>
       <abstract>Recent concerns over abusive behavior on their platforms have pressured social media companies to strengthen their content moderation policies. However, user opinions on these policies have been relatively understudied. In this paper, we present an analysis of user responses to a September 27, 2018 announcement about the quarantine policy on Reddit as a case study of to what extent the discourse on content moderation is polarized by users’ ideological viewpoint. We introduce a novel partitioning approach for characterizing user polarization based on their distribution of participation across interest subreddits. We then use automated techniques for capturing framing to examine how users with different viewpoints discuss moderation issues, finding that right-leaning users invoked censorship while left-leaning users highlighted inconsistencies on how content policies are applied. Overall, we argue for a more nuanced approach to moderation by highlighting the intersection of behavior and ideology in considering how abusive language is defined and regulated.</abstract>
       <url hash="0fa46458">W19-3507</url>
@@ -5360,7 +5360,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
       <author><first>Alex</first><last>Harris</last></author>
       <author><first>Dong</first><last>Nguyen</last></author>
       <author><first>Rebekah</first><last>Tromble</last></author>
-      <author><first>Scott</first><last>Hale</last></author>
+      <author id="scott-a-hale"><first>Scott</first><last>Hale</last></author>
       <author><first>Helen</first><last>Margetts</last></author>
       <pages>80–93</pages>
       <abstract>Online abusive content detection is an inherently difficult task. It has received considerable attention from academia, particularly within the computational linguistics community, and performance appears to have improved as the field has matured. However, considerable challenges and unaddressed frontiers remain, spanning technical, social and ethical dimensions. These issues constrain the performance, efficiency and generalizability of abusive content detection systems. In this article we delineate and clarify the main challenges and frontiers in the field, critically evaluate their implications and discuss potential solutions. We also highlight ways in which social scientific insights can advance research. We discuss the lack of support given to researchers working with abusive content and provide guidelines for ethical research.</abstract>
@@ -5372,7 +5372,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
       <title>A Hierarchically-Labeled <fixed-case>P</fixed-case>ortuguese Hate Speech Dataset</title>
       <author><first>Paula</first><last>Fortuna</last></author>
       <author><first>João</first><last>Rocha da Silva</last></author>
-      <author><first>Juan</first><last>Soler-Company</last></author>
+      <author id="juan-soler-company"><first>Juan</first><last>Soler-Company</last></author>
       <author><first>Leo</first><last>Wanner</last></author>
       <author><first>Sérgio</first><last>Nunes</last></author>
       <pages>94–104</pages>
@@ -5399,7 +5399,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
       <title><fixed-case>L</fixed-case>-<fixed-case>HSAB</fixed-case>: A <fixed-case>L</fixed-case>evantine <fixed-case>T</fixed-case>witter Dataset for Hate Speech and Abusive Language</title>
       <author><first>Hala</first><last>Mulki</last></author>
       <author><first>Hatem</first><last>Haddad</last></author>
-      <author><first>Chedi</first><last>Bechikh Ali</last></author>
+      <author id="chedi-bechikh-ali"><first>Chedi</first><last>Bechikh Ali</last></author>
       <author><first>Halima</first><last>Alshabani</last></author>
       <pages>111–118</pages>
       <abstract>Hate speech and abusive language have become a common phenomenon on Arabic social media. Automatic hate speech and abusive detection systems can facilitate the prohibition of toxic textual contents. The complexity, informality and ambiguity of the Arabic dialects hindered the provision of the needed resources for Arabic abusive/hate speech detection research. In this paper, we introduce the first publicly-available Levantine Hate Speech and Abusive (L-HSAB) Twitter dataset with the objective to be a benchmark dataset for automatic detection of online Levantine toxic contents. We, further, provide a detailed review of the data collection steps and how we design the annotation guidelines such that a reliable dataset annotation is guaranteed. This has been later emphasized through the comprehensive evaluation of the annotations as the annotation agreement metrics of Cohen’s Kappa (k) and Krippendorff’s alpha (α) indicated the consistency of the annotations.</abstract>
@@ -5420,7 +5420,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
     </paper>
     <paper id="14">
       <title>Preemptive Toxic Language Detection in <fixed-case>W</fixed-case>ikipedia Comments Using Thread-Level Context</title>
-      <author><first>Vanja Mladen</first><last>Karan</last></author>
+      <author id="vanja-m-karan"><first>Vanja Mladen</first><last>Karan</last></author>
       <author><first>Jan</first><last>Šnajder</last></author>
       <pages>129–134</pages>
       <abstract>We address the task of automatically detecting toxic content in user generated texts. We fo cus on exploring the potential for preemptive moderation, i.e., predicting whether a particular conversation thread will, in the future, incite a toxic comment. Moreover, we perform preliminary investigation of whether a model that jointly considers all comments in a conversation thread outperforms a model that considers only individual comments. Using an existing dataset of conversations among Wikipedia contributors as a starting point, we compile a new large-scale dataset for this task consisting of labeled comments and comments from their conversation threads.</abstract>
@@ -5433,7 +5433,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
       <author><first>Sravan</first><last>Bodapati</last></author>
       <author><first>Spandana</first><last>Gella</last></author>
       <author><first>Kasturi</first><last>Bhattacharjee</last></author>
-      <author><first>Yaser</first><last>Al-Onaizan</last></author>
+      <author id="yaser-al-onaizan"><first>Yaser</first><last>Al-Onaizan</last></author>
       <pages>135–145</pages>
       <abstract>The text we see in social media suffers from lots of undesired characterstics like hatespeech, abusive language, insults etc. The nature of this text is also very different compared to the traditional text we see in news with lots of obfuscated words, intended typos. This poses several robustness challenges to many natural language processing (NLP) techniques developed for traditional text. Many techniques proposed in the recent times such as charecter encoding models, subword models, byte pair encoding to extract subwords can aid in dealing with few of these nuances. In our work, we analyze the effectiveness of each of the above techniques, compare and contrast various word decomposition techniques when used in combination with others. We experiment with recent advances of finetuning pretrained language models, and demonstrate their robustness to domain shift. We also show our approaches achieve state of the art performance on Wikipedia attack, toxicity datasets, and Twitter hatespeech dataset.</abstract>
       <url hash="2942f8ff">W19-3515</url>
@@ -5445,7 +5445,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
     <paper id="16">
       <title>A Platform Agnostic Dual-Strand Hate Speech Detector</title>
       <author><first>Johannes Skjeggestad</first><last>Meyer</last></author>
-      <author><first>Björn</first><last>Gambäck</last></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gambäck</last></author>
       <pages>146–156</pages>
       <abstract>Hate speech detectors must be applicable across a multitude of services and platforms, and there is hence a need for detection approaches that do not depend on any information specific to a given platform. For instance, the information stored about the text’s author may differ between services, and so using such data would reduce a system’s general applicability. The paper thus focuses on using exclusively text-based input in the detection, in an optimised architecture combining Convolutional Neural Networks and Long Short-Term Memory-networks. The hate speech detector merges two strands with character n-grams and word embeddings to produce the final classification, and is shown to outperform comparable previous approaches.</abstract>
       <url hash="9d08ba6f">W19-3516</url>
@@ -5499,7 +5499,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
       <editor><first>Diyi</first><last>Yang</last></editor>
       <editor><first>Rossana</first><last>Cunha</last></editor>
       <editor><first>Samira</first><last>Shaikh</last></editor>
-      <editor><first>Zeerak</first><last>Waseem</last></editor>
+      <editor id="zeerak-talat"><first>Zeerak</first><last>Waseem</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Florence, Italy</address>
       <month>August</month>
@@ -5521,7 +5521,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
     <paper id="2">
       <title>Towards a Resource Grammar for <fixed-case>R</fixed-case>unyankore and Rukiga</title>
       <author><first>David</first><last>Bamutura</last></author>
-      <author><first>Peter</first><last>Ljunglöf</last></author>
+      <author id="peter-ljunglof"><first>Peter</first><last>Ljunglöf</last></author>
       <pages>2–6</pages>
       <abstract>Currently, there is a lack of computational grammar resources for many under-resourced languages which limits the ability to develop Natural Language Processing (NLP) tools and applications such as Multilingual Document Authoring, Computer-Assisted Language Learning (CALL) and Low-Coverage Machine Translation (MT) for these languages. In this paper, we present our attempt to formalise the grammar of two such languages: Runyankore and Rukiga. For this formalisation we use the Grammatical Framework (GF) and its Resource Grammar Library (GF-RGL).</abstract>
       <bibkey>bamutura-ljunglof-2019-towards</bibkey>
@@ -5596,9 +5596,9 @@ In this paper, we describe a compression scheme for lexicons when represented as
     </paper>
     <paper id="11">
       <title><fixed-case>E</fixed-case>nglish-<fixed-case>E</fixed-case>thiopian Languages Statistical Machine Translation</title>
-      <author><first>Solomon Teferra</first><last>Abate</last></author>
+      <author id="solomon-teferra-abate"><first>Solomon Teferra</first><last>Abate</last></author>
       <author><first>Michael</first><last>Melese</last></author>
-      <author><first>Martha Yifiru</first><last>Tachbelie</last></author>
+      <author id="martha-yifiru-tachbelie"><first>Martha Yifiru</first><last>Tachbelie</last></author>
       <author><first>Million</first><last>Meshesha</last></author>
       <author><first>Solomon</first><last>Atinafu</last></author>
       <author><first>Wondwossen</first><last>Mulugeta</last></author>
@@ -5633,7 +5633,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
     <paper id="14">
       <title>Learning Trilingual Dictionaries for <fixed-case>U</fixed-case>rdu – <fixed-case>R</fixed-case>oman <fixed-case>U</fixed-case>rdu – <fixed-case>E</fixed-case>nglish</title>
       <author><first>Moiz</first><last>Rauf</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <pages>38–42</pages>
       <abstract>In this paper, we present an effort to generate a joint Urdu, Roman Urdu and English trilingual lexicon using automated methods. We make a case for using statistical machine translation approaches and parallel corpora for dictionary creation. To this purpose, we use word alignment tools on the corpus and evaluate translations using human evaluators. Despite different writing script and considerable noise in the corpus our results show promise with over 85% accuracy of Roman Urdu–Urdu and 45% English–Urdu pairs.</abstract>
       <bibkey>rauf-pado-2019-learning</bibkey>
@@ -5673,7 +5673,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
       <title>Controlling the Specificity of Clarification Question Generation</title>
       <author><first>Yang Trista</first><last>Cao</last></author>
       <author><first>Sudha</first><last>Rao</last></author>
-      <author><first>Hal</first><last>Daumé III</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé III</last></author>
       <pages>53–56</pages>
       <abstract>Unlike comprehension-style questions, clarification questions look for some missing information in a given context. However, without guidance, neural models for question generation, similar to dialog generation models, lead to generic and bland questions that cannot elicit useful information. We argue that controlling the level of specificity of the generated questions can have useful applications and propose a neural clarification question generation model for the same. We first train a classifier that annotates a clarification question with its level of specificity (generic or specific) to the given context. Our results on the Amazon questions dataset demonstrate that training a clarification question generation model on specificity annotated data can generate questions with varied levels of specificity to the given context.</abstract>
       <bibkey>cao-etal-2019-controlling</bibkey>
@@ -5682,7 +5682,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
       <title>Non-Monotonic Sequential Text Generation</title>
       <author><first>Kiante</first><last>Brantley</last></author>
       <author><first>Kyunghyun</first><last>Cho</last></author>
-      <author><first>Hal</first><last>Daumé</last></author>
+      <author id="hal-daume-iii"><first>Hal</first><last>Daumé</last></author>
       <author><first>Sean</first><last>Welleck</last></author>
       <pages>57–59</pages>
       <abstract>Standard sequential generation methods assume a pre-specified generation order, such as text generation methods which generate words from left to right. In this work, we propose a framework for training models of text generation that operate in non-monotonic orders; the model directly learns good orders, without any additional annotation. Our framework operates by generating a word at an arbitrary position, and then recursively generating words to its left and then words to its right, yielding a binary tree. Learning is framed as imitation learning, including a coaching method which moves from imitating an oracle to reinforcing the policy’s own preferences. Experimental results demonstrate that using the proposed method, it is possible to learn policies which generate text without pre-specifying a generation order while achieving competitive performance with conventional left-to-right generation.</abstract>
@@ -5724,7 +5724,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
     <paper id="25">
       <title>Construction and Alignment of Multilingual Entailment Graphs for Semantic Inference</title>
       <author><first>Sabine</first><last>Weber</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>77–79</pages>
       <abstract>This paper presents ongoing work on the construction and alignment of predicate entailment graphs in English and German. We extract predicate-argument pairs from large corpora of monolingual English and German news text and construct monolingual paraphrase clusters and entailment graphs. We use an aligned subset of entities to derive the bilingual alignment of entities and relations, and achieve better than baseline results on a translated subset of a predicate entailment data set (Levy and Dagan, 2016) and the German portion of XNLI (Conneau et al., 2018).</abstract>
       <bibkey>weber-steedman-2019-construction</bibkey>
@@ -5742,7 +5742,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
     <paper id="27">
       <title>Acoustic Characterization of Singaporean Children’s <fixed-case>E</fixed-case>nglish: Comparisons to <fixed-case>A</fixed-case>merican and <fixed-case>B</fixed-case>ritish Counterparts</title>
       <author><first>Yuling</first><last>Gu</last></author>
-      <author><first>Nancy</first><last>Chen</last></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last></author>
       <pages>83–87</pages>
       <abstract>We investigate English pronunciation patterns in Singaporean children in relation to their American and British counterparts by conducting archetypal analysis on selected vowel pairs. Given that Singapore adopts British English as the institutional standard, one might expect Singaporean children to follow British pronunciation patterns, but we observe that Singaporean children also present similar patterns to Americans for TRAP-BATH spilt vowels: (1) British and Singaporean children both produce these vowels with a relatively lowered tongue height. (2) These vowels are more fronted for American and Singaporean children (p &lt; 0.001). In addition, when comparing /æ/ and /ε/ productions, British speakers show the clearest distinction between the two vowels; Singaporean and American speakers exhibit a higher and more fronted tongue position for /æ/ (p &lt; 0.001), causing /æ/ to be acoustically more similar to /ε/.</abstract>
       <bibkey>gu-chen-2019-acoustic</bibkey>
@@ -5775,7 +5775,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
       <title>Isolating the Effects of Modeling Recursive Structures: A Case Study in Pronunciation Prediction of <fixed-case>C</fixed-case>hinese Characters</title>
       <author><first>Minh</first><last>Nguyen</last></author>
       <author><first>Gia H</first><last>Ngo</last></author>
-      <author><first>Nancy</first><last>Chen</last></author>
+      <author id="nancy-chen"><first>Nancy</first><last>Chen</last></author>
       <pages>95–97</pages>
       <abstract>Finding that explicitly modeling structures leads to better generalization, we consider the task of predicting Cantonese pronunciations of logographs (Chinese characters) using logographs’ recursive structures. This task is a suitable case study for two reasons. First, logographs’ pronunciations depend on structures (i.e. the hierarchies of sub-units in logographs) Second, the quality of logographic structures is consistent since the structures are constructed automatically using a set of rules. Thus, this task is less affected by confounds such as varying quality between annotators. Empirical results show that modeling structures explicitly using treeLSTM outperforms LSTM baseline, reducing prediction error by 6.0% relative.</abstract>
       <bibkey>nguyen-etal-2019-isolating</bibkey>
@@ -5817,7 +5817,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
       <title>Polysemous Language in Child Directed Speech</title>
       <author><first>Sammy</first><last>Floyd</last></author>
       <author><first>Libby</first><last>Barak</last></author>
-      <author><first>Adele</first><last>Goldberg</last></author>
+      <author id="adele-goldberg"><first>Adele</first><last>Goldberg</last></author>
       <author><first>Casey</first><last>Lew-Williams</last></author>
       <pages>114–117</pages>
       <abstract>Polysemous Language in Child Directed Speech Learning the meaning of words is one of the fundamental building blocks of verbal communication. Models of child language acquisition have generally made the simplifying assumption that each word appears in child-directed speech with a single meaning. To understand naturalistic word learning during childhood, it is essential to know whether children hear input that is in fact constrained to single meaning per word, or whether the environment naturally contains multiple senses. In this study, we use a topic modeling approach to automatically induce word senses from child-directed speech. Our results confirm the plausibility of our automated analysis approach and reveal an increasing rate of using multiple senses in child-directed speech, starting with corpora from children as early as the first year of life.</abstract>
@@ -5827,7 +5827,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
       <title>Principled Frameworks for Evaluating Ethics in <fixed-case>NLP</fixed-case> Systems</title>
       <author><first>Shrimai</first><last>Prabhumoye</last></author>
       <author><first>Elijah</first><last>Mayfield</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <pages>118–121</pages>
       <abstract>We critique recent work on ethics in natural language processing. Those discussions have focused on data collection, experimental design, and interventions in modeling. But we argue that we ought to first understand the frameworks of ethics that are being used to evaluate the fairness and justice of algorithmic systems. Here, we begin that discussion by outlining deontological and consequentialist ethics, and make predictions on the research agenda prioritized by each.</abstract>
       <bibkey>prabhumoye-etal-2019-principled</bibkey>
@@ -5845,7 +5845,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
     <paper id="39">
       <title>Evaluating Ways of Adapting Word Similarity</title>
       <author><first>Libby</first><last>Barak</last></author>
-      <author><first>Adele</first><last>Goldberg</last></author>
+      <author id="adele-goldberg"><first>Adele</first><last>Goldberg</last></author>
       <pages>126–128</pages>
       <abstract>People judge pairwise similarity by deciding which aspects of the words’ meanings are relevant for the comparison of the given pair. However, computational representations of meaning rely on dimensions of the vector representation for similarity comparisons, without considering the specific pairing at hand. Prior work has adapted computational similarity judgments by using the softmax function in order to address this limitation by capturing asymmetry in human judgments. We extend this analysis by showing that a simple modification of cosine similarity offers a better correlation with human judgments over a comprehensive dataset. The modification performs best when the similarity between two words is calculated with reference to other words that are most similar and dissimilar to the pair.</abstract>
       <bibkey>barak-goldberg-2019-evaluating</bibkey>
@@ -5870,7 +5870,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
       <title>Context Effects on Human Judgments of Similarity</title>
       <author><first>Libby</first><last>Barak</last></author>
       <author><first>Noe</first><last>Kong-Johnson</last></author>
-      <author><first>Adele</first><last>Goldberg</last></author>
+      <author id="adele-goldberg"><first>Adele</first><last>Goldberg</last></author>
       <pages>135–137</pages>
       <abstract>The semantic similarity of words forms the basis of many natural language processing methods. These computational similarity measures are often based on a mathematical comparison of vector representations of word meanings, while human judgments of similarity differ in lacking geometrical properties, e.g., symmetric similarity and triangular similarity. In this study, we propose a novel task design to further explore human behavior by asking whether a pair of words is deemed more similar depending on an immediately preceding judgment. Results from a crowdsourcing experiment show that people consistently judge words as more similar when primed by a judgment that evokes a relevant relationship. Our analysis further shows that word2vec similarity correlated significantly better with the out-of-context judgments, thus confirming the methodological differences in human-computer judgments, and offering a new testbed for probing the differences.</abstract>
       <bibkey>barak-etal-2019-context</bibkey>
@@ -5895,7 +5895,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
     </paper>
     <paper id="45">
       <title>Pardon the Interruption: Automatic Analysis of Gender and Competitive Turn-Taking in <fixed-case>U</fixed-case>nited <fixed-case>S</fixed-case>tates <fixed-case>S</fixed-case>upreme <fixed-case>C</fixed-case>ourt Hearings</title>
-      <author><first>Haley</first><last>Lepp</last></author>
+      <author id="haley-lepp"><first>Haley</first><last>Lepp</last></author>
       <pages>143–145</pages>
       <abstract>The United States Supreme Court plays a key role in defining the legal basis for gender discrimination throughout the country, yet there are few checks on gender bias within the court itself. In conversational turn-taking, interruptions have been documented as a marker of bias between speakers of different genders. The goal of this study is to automatically differentiate between respectful and disrespectful conversational turns taken during official hearings, which could help in detecting bias and finding remediation techniques for discourse in the courtroom. In this paper, I present a corpus of turns annotated by legal professionals, and describe the design of a semi-supervised classifier that will use acoustic and lexical features to analyze turn-taking at scale. On completion of annotations, this classifier will be trained to extract the likelihood that turns are respectful or disrespectful for use in studies of speech trends.</abstract>
       <bibkey>lepp-2019-pardon</bibkey>
@@ -5905,7 +5905,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
       <author><first>Nouha</first><last>Dziri</last></author>
       <author><first>Ehsan</first><last>Kamalloo</last></author>
       <author><first>Kory</first><last>Mathewson</last></author>
-      <author><first>Osmar</first><last>Zaiane</last></author>
+      <author id="osmar-r-zaiane"><first>Osmar</first><last>Zaiane</last></author>
       <pages>146–148</pages>
       <abstract>Evaluating open-domain dialogue systems is difficult due to the diversity of possible correct answers. Automatic metrics such as BLEU correlate weakly with human annotations, resulting in a significant bias across different models and datasets. Some researchers resort to human judgment experimentation for assessing response quality, which is expensive, time consuming, and not scalable. Moreover, judges tend to evaluate a small number of dialogues, meaning that minor differences in evaluation configuration may lead to dissimilar results. In this paper, we present interpretable metrics for evaluating topic coherence by making use of distributed sentence representations. Furthermore, we introduce calculable approximations of human judgment based on conversational coherence by adopting state-of-the-art entailment techniques. Results show that our metrics can be used as a surrogate for human judgment, making it easy to evaluate dialogue systems on large-scale datasets and allowing an unbiased estimate for the quality of the responses. This paper has been accepted in NAACL 2019.</abstract>
       <bibkey>dziri-etal-2019-evaluating-coherence</bibkey>
@@ -5937,8 +5937,8 @@ In this paper, we describe a compression scheme for lexicons when represented as
       <title>A Parallel Corpus <fixed-case>M</fixed-case>ixtec-<fixed-case>S</fixed-case>panish</title>
       <author><first>Cynthia</first><last>Montaño</last></author>
       <author><first>Gerardo</first><last>Sierra Martínez</last></author>
-      <author><first>Gemma</first><last>Bel-Enguix</last></author>
-      <author><first>Helena</first><last>Gomez</last></author>
+      <author id="gemma-bel-enguix"><first>Gemma</first><last>Bel-Enguix</last></author>
+      <author id="helena-gomez"><first>Helena</first><last>Gomez</last></author>
       <pages>157–159</pages>
       <abstract>This work is about the compilation process of parallel documents Spanish-Mixtec. There are not many Spanish-Mixec parallel texts and most of the sources are non-digital books. Due to this, we need to face the errors when digitizing the sources and difficulties in sentence alignment, as well as the fact that does not exist a standard orthography. Our parallel corpus consists of sixty texts coming from books and digital repositories. These documents belong to different domains: history, traditional stories, didactic material, recipes, ethnographical descriptions of each town and instruction manuals for disease prevention. We have classified this material in five major categories: didactic (6 texts), educative (6 texts), interpretative (7 texts), narrative (39 texts), and poetic (2 texts). The final total of tokens is 49,814 Spanish words and 47,774 Mixtec words. The texts belong to the states of Oaxaca (48 texts), Guerrero (9 texts) and Puebla (3 texts). According to this data, we see that the corpus is unbalanced in what refers to the representation of the different territories. While 55% of speakers are in Oaxaca, 80% of texts come from this region. Guerrero has the 30% of speakers and the 15% of texts and Puebla, with the 15% of the speakers has a representation of the 5% in the corpus.</abstract>
       <bibkey>montano-etal-2019-parallel</bibkey>
@@ -5973,7 +5973,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
     <paper id="54">
       <title><fixed-case>CSI</fixed-case> <fixed-case>P</fixed-case>eru News: finding the culprit, victim and location in news articles</title>
       <author><first>Gina</first><last>Bustamante</last></author>
-      <author><first>Arturo</first><last>Oncevay</last></author>
+      <author id="arturo-oncevay"><first>Arturo</first><last>Oncevay</last></author>
       <pages>174–176</pages>
       <abstract>We introduce a shift on the DS method over the domain of crime-related news from Peru, attempting to find the culprit, victim and location of a crime description from a RE perspective. Obtained results are highly promising and show that proposed modifications are effective in non-traditional domains.</abstract>
       <bibkey>bustamante-oncevay-2019-csi</bibkey>
@@ -5990,7 +5990,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
     <paper id="56">
       <title>Cross-Sentence Transformations in Text Simplification</title>
       <author><first>Fernando</first><last>Alva-Manchego</last></author>
-      <author><first>Carolina</first><last>Scarton</last></author>
+      <author id="carolina-scarton"><first>Carolina</first><last>Scarton</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
       <pages>181–184</pages>
       <abstract>Current approaches to Text Simplification focus on simplifying sentences individually. However, certain simplification transformations span beyond single sentences (e.g. joining and re-ordering sentences). In this paper, we motivate the need for modelling the simplification task at the document level, and assess the performance of sequence-to-sequence neural models in this setup. We analyse parallel original-simplified documents created by professional editors and show that there are frequent rewriting transformations that are not restricted to sentence boundaries. We also propose strategies to automatically evaluate the performance of a simplification model on these cross-sentence transformations. Our experiments show the inability of standard sequence-to-sequence neural models to learn these transformations, and suggest directions towards document-level simplification.</abstract>
@@ -6001,9 +6001,9 @@ In this paper, we describe a compression scheme for lexicons when represented as
     <meta>
       <booktitle>Proceedings of the 7th Workshop on Balto-Slavic Natural Language Processing</booktitle>
       <url hash="53ea2def">W19-37</url>
-      <editor><first>Tomaž</first><last>Erjavec</last></editor>
+      <editor id="tomaz-erjavec"><first>Tomaž</first><last>Erjavec</last></editor>
       <editor><first>Michał</first><last>Marcińczuk</last></editor>
-      <editor><first>Preslav</first><last>Nakov</last></editor>
+      <editor id="preslav-nakov"><first>Preslav</first><last>Nakov</last></editor>
       <editor><first>Jakub</first><last>Piskorski</last></editor>
       <editor><first>Lidia</first><last>Pivovarova</last></editor>
       <editor><first>Jan</first><last>Šnajder</last></editor>
@@ -6084,7 +6084,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
     <paper id="7">
       <title>Data Set for Stance and Sentiment Analysis from User Comments on <fixed-case>C</fixed-case>roatian News</title>
       <author><first>Mihaela</first><last>Bošnjak</last></author>
-      <author><first>Vanja Mladen</first><last>Karan</last></author>
+      <author id="vanja-m-karan"><first>Vanja Mladen</first><last>Karan</last></author>
       <pages>50–55</pages>
       <abstract>Nowadays it is becoming more important than ever to find new ways of extracting useful information from the evergrowing amount of user-generated data available online. In this paper, we describe the creation of a data set that contains news articles and corresponding comments from Croatian news outlet 24 sata. Our annotation scheme is specifically tailored for the task of detecting stances and sentiment from user comments as well as assessing if commentator claims are verifiable. Through this data, we hope to get a better understanding of the publics viewpoint on various events. In addition, we also explore the potential of applying supervised machine learning models toautomate annotation of more data.</abstract>
       <url hash="114adde9">W19-3707</url>
@@ -6131,7 +6131,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
     </paper>
     <paper id="11">
       <title><fixed-case>TLR</fixed-case> at <fixed-case>BSNLP</fixed-case>2019: A Multilingual Named Entity Recognition System</title>
-      <author><first>Jose G.</first><last>Moreno</last></author>
+      <author id="jose-g-moreno"><first>Jose G.</first><last>Moreno</last></author>
       <author><first>Elvys</first><last>Linhares Pontes</last></author>
       <author><first>Mickael</first><last>Coustaty</last></author>
       <author><first>Antoine</first><last>Doucet</last></author>
@@ -6145,7 +6145,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
       <title>Tuning Multilingual Transformers for Language-Specific Named Entity Recognition</title>
       <author><first>Mikhail</first><last>Arkhipov</last></author>
       <author><first>Maria</first><last>Trofimova</last></author>
-      <author><first>Yuri</first><last>Kuratov</last></author>
+      <author id="yurii-kuratov"><first>Yuri</first><last>Kuratov</last></author>
       <author><first>Alexey</first><last>Sorokin</last></author>
       <pages>89–93</pages>
       <abstract>Our paper addresses the problem of multilingual named entity recognition on the material of 4 languages: Russian, Bulgarian, Czech and Polish. We solve this task using the BERT model. We use a hundred languages multilingual model as base for transfer to the mentioned Slavic languages. Unsupervised pre-training of the BERT model on these 4 languages allows to significantly outperform baseline neural approaches and multilingual BERT. Additional improvement is achieved by extending BERT with a word-level CRF layer. Our system was submitted to BSNLP 2019 Shared Task on Multilingual Named Entity Recognition and demonstrated top performance in multilingual setting for two competition metrics. We open-sourced NER models and BERT model pre-trained on the four Slavic languages.</abstract>
@@ -6167,7 +6167,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
       <title><fixed-case>JRC</fixed-case> <fixed-case>TMA</fixed-case>-<fixed-case>CC</fixed-case>: <fixed-case>S</fixed-case>lavic Named Entity Recognition and Linking. Participation in the <fixed-case>BSNLP</fixed-case>-2019 shared task</title>
       <author><first>Guillaume</first><last>Jacquet</last></author>
       <author><first>Jakub</first><last>Piskorski</last></author>
-      <author><first>Hristo</first><last>Tanev</last></author>
+      <author id="hristo-tanev"><first>Hristo</first><last>Tanev</last></author>
       <author><first>Ralf</first><last>Steinberger</last></author>
       <pages>100–104</pages>
       <abstract>We report on the participation of the JRC Text Mining and Analysis Competence Centre (TMA-CC) in the BSNLP-2019 Shared Task, which focuses on named-entity recognition, lemmatisation and cross-lingual linking. We propose a hybrid system combining a rule-based approach and light ML techniques. We use multilingual lexical resources such as JRC-NAMES and BABELNET together with a named entity guesser to recognise names. In a second step, we combine known names with wild cards to increase recognition recall by also capturing inflection variants. In a third step, we increase precision by filtering these name candidates with automatically learnt inflection patterns derived from name occurrences in large news article collections. Our major requirement is to achieve high precision. We achieved an average of 65% F-measure with 93% precision on the four languages.</abstract>
@@ -6178,7 +6178,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
     <paper id="15">
       <title>Building <fixed-case>E</fixed-case>nglish-to-<fixed-case>S</fixed-case>erbian Machine Translation System for <fixed-case>IMD</fixed-case>b Movie Reviews</title>
       <author><first>Pintu</first><last>Lohar</last></author>
-      <author><first>Maja</first><last>Popović</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <pages>105–113</pages>
       <abstract>This paper reports the results of the first experiment dealing with the challenges of building a machine translation system for user-generated content involving a complex South Slavic language. We focus on translation of English IMDb user movie reviews into Serbian, in a low-resource scenario. We explore potentials and limits of (i) phrase-based and neural machine translation systems trained on out-of-domain clean parallel data from news articles (ii) creating additional synthetic in-domain parallel corpus by machine-translating the English IMDb corpus into Serbian. Our main findings are that morphology and syntax are better handled by the neural approach than by the phrase-based approach even in this low-resource mismatched domain scenario, however the situation is different for the lexical aspect, especially for person names. This finding also indicates that in general, machine translation of person names into Slavic languages (especially those which require/allow transcription) should be investigated more systematically.</abstract>
@@ -6213,7 +6213,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
     <meta>
       <booktitle>Proceedings of the First Workshop on Gender Bias in Natural Language Processing</booktitle>
       <url hash="180e4811">W19-38</url>
-      <editor><first>Marta R.</first><last>Costa-jussà</last></editor>
+      <editor id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></editor>
       <editor><first>Christian</first><last>Hardmeier</last></editor>
       <editor><first>Will</first><last>Radford</last></editor>
       <editor><first>Kellie</first><last>Webster</last></editor>
@@ -6266,7 +6266,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
     <paper id="4">
       <title>Measuring Gender Bias in Word Embeddings across Domains and Discovering New Gender Bias Word Categories</title>
       <author><first>Kaytlin</first><last>Chaloner</last></author>
-      <author><first>Alfredo</first><last>Maldonado</last></author>
+      <author id="alfredo-maldonado"><first>Alfredo</first><last>Maldonado</last></author>
       <pages>25–32</pages>
       <abstract>Prior work has shown that word embeddings capture human stereotypes, including gender bias. However, there is a lack of studies testing the presence of specific gender bias categories in word embeddings across diverse domains. This paper aims to fill this gap by applying the WEAT bias detection method to four sets of word embeddings trained on corpora from four different domains: news, social networking, biomedical and a gender-balanced corpus extracted from Wikipedia (GAP). We find that some domains are definitely more prone to gender bias than others, and that the categories of gender bias present also vary for each set of word embeddings. We detect some gender bias in GAP. We also propose a simple but novel method for discovering new bias categories by clustering word embeddings. We validate this method through WEAT’s hypothesis testing mechanism and find it useful for expanding the relatively small set of well-known gender bias word categories commonly used in the literature.</abstract>
       <url hash="4f023b3b">W19-3804</url>
@@ -6287,7 +6287,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
     <paper id="6">
       <title>Conceptor Debiasing of Word Representations Evaluated on <fixed-case>WEAT</fixed-case></title>
       <author><first>Saket</first><last>Karve</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <author><first>João</first><last>Sedoc</last></author>
       <pages>40–48</pages>
       <abstract>Bias in word representations, such as Word2Vec, has been widely reported and investigated, and efforts made to debias them. We apply the debiasing conceptor for post-processing both traditional and contextualized word embeddings. Our method can simultaneously remove racial and gender biases from word representations. Unlike standard debiasing methods, the debiasing conceptor can utilize heterogeneous lists of biased words without loss in performance. Finally, our empirical experiments show that the debiasing conceptor diminishes racial and gender bias of word representations as measured using the Word Embedding Association Test (WEAT) of Caliskan et al. (2017).</abstract>
@@ -6309,7 +6309,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
     <paper id="8">
       <title>The Role of Protected Class Word Lists in Bias Identification of Contextualized Word Representations</title>
       <author><first>João</first><last>Sedoc</last></author>
-      <author><first>Lyle</first><last>Ungar</last></author>
+      <author id="lyle-ungar"><first>Lyle</first><last>Ungar</last></author>
       <pages>55–61</pages>
       <abstract>Systemic bias in word embeddings has been widely reported and studied, and efforts made to debias them; however, new contextualized embeddings such as ELMo and BERT are only now being similarly studied. Standard debiasing methods require heterogeneous lists of target words to identify the “bias subspace”. We show show that using new contextualized word embeddings in conceptor debiasing allows us to more accurately debias word embeddings by breaking target word lists into more homogeneous subsets and then combining (”Or’ing”) the debiasing conceptors of the different subsets.</abstract>
       <url hash="7b7ae484">W19-3808</url>
@@ -6341,7 +6341,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
       <title><fixed-case>BERT</fixed-case> Masked Language Modeling for Co-reference Resolution</title>
       <author><first>Felipe</first><last>Alfaro</last></author>
       <author><first>Marta R.</first><last>Costa-jussà</last></author>
-      <author><first>José A. R.</first><last>Fonollosa</last></author>
+      <author id="jose-a-r-fonollosa"><first>José A. R.</first><last>Fonollosa</last></author>
       <pages>76–81</pages>
       <abstract>This paper explains the TALP-UPC participation for the Gendered Pronoun Resolution shared-task of the 1st ACL Workshop on Gender Bias for Natural Language Processing. We have implemented two models for mask language modeling using pre-trained BERT adjusted to work for a classification problem. The proposed solutions are based on the word probabilities of the original BERT model, but using common English names to replace the original test names.</abstract>
       <url hash="373c64cf">W19-3811</url>
@@ -6467,7 +6467,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
       <author><first>Keita</first><last>Kurita</last></author>
       <author><first>Nidhi</first><last>Vyas</last></author>
       <author><first>Ayush</first><last>Pareek</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <author><first>Yulia</first><last>Tsvetkov</last></author>
       <pages>166–172</pages>
       <abstract>Contextual word embeddings such as BERT have achieved state of the art performance in numerous NLP tasks. Since they are optimized to capture the statistical properties of training data, they tend to pick up on and amplify social stereotypes present in the data as well. In this study, we (1) propose a template-based method to quantify bias in BERT; (2) show that this method obtains more consistent results in capturing social biases than the traditional cosine based method; and (3) conduct a case study, evaluating gender bias in a downstream task of Gender Pronoun Resolution. Although our case study focuses on gender bias, the proposed technique is generalizable to unveiling other biases, including in multiclass settings, such as racial and religious biases.</abstract>
@@ -6492,7 +6492,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
     <meta>
       <booktitle>Proceedings of the Workshop on Deep Learning and Formal Languages: Building Bridges</booktitle>
       <url hash="b4185bd7">W19-39</url>
-      <editor><first>Jason</first><last>Eisner</last></editor>
+      <editor id="jason-eisner"><first>Jason</first><last>Eisner</last></editor>
       <editor><first>Matthias</first><last>Gallé</last></editor>
       <editor><first>Jeffrey</first><last>Heinz</last></editor>
       <editor><first>Ariadna</first><last>Quattoni</last></editor>
@@ -6540,7 +6540,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
     <paper id="4">
       <title>Multi-Element Long Distance Dependencies: Using <fixed-case>SP</fixed-case>k Languages to Explore the Characteristics of Long-Distance Dependencies</title>
       <author><first>Abhijit</first><last>Mahalunkar</last></author>
-      <author><first>John</first><last>Kelleher</last></author>
+      <author id="john-kelleher"><first>John</first><last>Kelleher</last></author>
       <pages>34–43</pages>
       <abstract>In order to successfully model Long Distance Dependencies (LDDs) it is necessary to under-stand the full-range of the characteristics of the LDDs exhibited in a target dataset. In this paper, we use Strictly k-Piecewise languages to generate datasets with various properties. We then compute the characteristics of the LDDs in these datasets using mutual information and analyze the impact of factors such as (i) k, (ii) length of LDDs, (iii) vocabulary size, (iv) forbidden strings, and (v) dataset size. This analysis reveal that the number of interacting elements in a dependency is an important characteristic of LDDs. This leads us to the challenge of modelling multi-element long-distance dependencies. Our results suggest that attention mechanisms in neural networks may aide in modeling datasets with multi-element long-distance dependencies. However, we conclude that there is a need to develop more efficient attention mechanisms to address this issue.</abstract>
       <url hash="49309885">W19-3904</url>
@@ -6551,7 +6551,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
       <title><fixed-case>LSTM</fixed-case> Networks Can Perform Dynamic Counting</title>
       <author><first>Mirac</first><last>Suzgun</last></author>
       <author><first>Yonatan</first><last>Belinkov</last></author>
-      <author><first>Stuart</first><last>Shieber</last></author>
+      <author id="stuart-m-shieber"><first>Stuart</first><last>Shieber</last></author>
       <author><first>Sebastian</first><last>Gehrmann</last></author>
       <pages>44–54</pages>
       <abstract>In this paper, we systematically assess the ability of standard recurrent networks to perform dynamic counting and to encode hierarchical representations. All the neural models in our experiments are designed to be small-sized networks both to prevent them from memorizing the training sets and to visualize and interpret their behaviour at test time. Our results demonstrate that the Long Short-Term Memory (LSTM) networks can learn to recognize the well-balanced parenthesis language (Dyck-1) and the shuffles of multiple Dyck-1 languages, each defined over different parenthesis-pairs, by emulating simple real-time k-counter machines. To the best of our knowledge, this work is the first study to introduce the shuffle languages to analyze the computational power of neural networks. We also show that a single-layer LSTM with only one hidden unit is practically sufficient for recognizing the Dyck-1 language. However, none of our recurrent networks was able to yield a good performance on the Dyck-2 language learning task, which requires a model to have a stack-like mechanism for recognition.</abstract>
@@ -6580,7 +6580,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
     <paper id="1">
       <title>Crowdsourced Hedge Term Disambiguation</title>
       <author><first>Morgan</first><last>Ulinski</last></author>
-      <author><first>Julia</first><last>Hirschberg</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
       <pages>1–5</pages>
       <abstract>We address the issue of acquiring quality annotations of hedging words and phrases, linguistic phenomenona in which words, sounds, or other constructions are used to express ambiguity or uncertainty. Due to the limited availability of existing corpora annotated for hedging, linguists and other language scientists have been constrained as to the extent they can study this phenomenon. In this paper, we introduce a new method of acquiring hedging annotations via crowdsourcing, based on reformulating the task of labeling hedges as a simple word sense disambiguation task. We also introduce a new hedging corpus we have constructed by applying this method, a collection of forum posts annotated using Amazon Mechanical Turk. We found that the crowdsourced judgments we obtained had an inter-annotator agreement of 92.89% (Fleiss’ Kappa=0.751) and, when comparing a subset of these annotations to an expert-annotated gold standard, an accuracy of 96.65%.</abstract>
       <url hash="d72543bf">W19-4001</url>
@@ -6591,7 +6591,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
       <title><fixed-case>W</fixed-case>i<fixed-case>R</fixed-case>e57 : A Fine-Grained Benchmark for Open Information Extraction</title>
       <author><first>William</first><last>Lechelle</last></author>
       <author><first>Fabrizio</first><last>Gotti</last></author>
-      <author><first>Phillippe</first><last>Langlais</last></author>
+      <author id="philippe-langlais"><first>Phillippe</first><last>Langlais</last></author>
       <pages>6–15</pages>
       <abstract>We build a reference for the task of Open Information Extraction, on five documents. We tentatively resolve a number of issues that arise, including coreference and granularity, and we take steps toward addressing inference, a significant problem. We seek to better pinpoint the requirements for the task. We produce our annotation guidelines specifying what is correct to extract and what is not. In turn, we use this reference to score existing Open IE systems. We address the non-trivial problem of evaluating the extractions produced by systems against the reference tuples, and share our evaluation script. Among seven compared extractors, we find the MinIE system to perform best.</abstract>
       <url hash="37c07586">W19-4002</url>
@@ -6689,8 +6689,8 @@ In the last paragraph of Section 3, the word "additional" was changed to "supple
       <title>Assessing Back-Translation as a Corpus Generation Strategy for non-<fixed-case>E</fixed-case>nglish Tasks: A Study in Reading Comprehension and Word Sense Disambiguation</title>
       <author><first>Fabricio</first><last>Monsalve</last></author>
       <author><first>Kervy</first><last>Rivas Rojas</last></author>
-      <author><first>Marco Antonio</first><last>Sobrevilla Cabezudo</last></author>
-      <author><first>Arturo</first><last>Oncevay</last></author>
+      <author id="marco-antonio-sobrevilla-cabezudo"><first>Marco Antonio</first><last>Sobrevilla Cabezudo</last></author>
+      <author id="arturo-oncevay"><first>Arturo</first><last>Oncevay</last></author>
       <pages>81–89</pages>
       <abstract>Corpora curated by experts have sustained Natural Language Processing mainly in English, but the expensiveness of corpora creation is a barrier for the development in further languages. Thus, we propose a corpus generation strategy that only requires a machine translation system between English and the target language in both directions, where we filter the best translations by computing automatic translation metrics and the task performance score. By studying Reading Comprehension in Spanish and Word Sense Disambiguation in Portuguese, we identified that a more quality-oriented metric has high potential in the corpora selection without degrading the task performance. We conclude that it is possible to systematise the building of quality corpora using machine translation and automatic metrics, besides some prior effort to clean and process the data.</abstract>
       <url hash="fb980812">W19-4010</url>
@@ -6700,7 +6700,7 @@ In the last paragraph of Section 3, the word "additional" was changed to "supple
     <paper id="11">
       <title>A Framework for Annotating ‘Related Works’ to Support Feedback to Novice Writers</title>
       <author><first>Arlene</first><last>Casey</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <author><first>Dorota</first><last>Glowacka</last></author>
       <pages>90–99</pages>
       <abstract>Understanding what is expected of academic writing can be difficult for novice writers to assimilate, and recent years have seen several automated tools become available to support academic writing. Our work presents a framework for annotating features of the Related Work section of academic writing, that supports writer feedback.</abstract>
@@ -6712,7 +6712,7 @@ In the last paragraph of Section 3, the word "additional" was changed to "supple
       <title>An Online Annotation Assistant for Argument Schemes</title>
       <author><first>John</first><last>Lawrence</last></author>
       <author><first>Jacky</first><last>Visser</last></author>
-      <author><first>Chris</first><last>Reed</last></author>
+      <author id="chris-reed"><first>Chris</first><last>Reed</last></author>
       <pages>100–107</pages>
       <abstract>Understanding the inferential principles underpinning an argument is essential to the proper interpretation and evaluation of persuasive discourse. Argument schemes capture the conventional patterns of reasoning appealed to in persuasion. The empirical study of these patterns relies on the availability of data about the actual use of argumentation in communicative practice. Annotated corpora of argument schemes, however, are scarce, small, and unrepresentative. Aiming to address this issue, we present one step in the development of improved datasets by integrating the Argument Scheme Key – a novel annotation method based on one of the most popular typologies of argument schemes – into the widely used OVA software for argument analysis.</abstract>
       <url hash="e3f56bad">W19-4012</url>
@@ -6757,8 +6757,8 @@ In the last paragraph of Section 3, the word "additional" was changed to "supple
       <author><first>Aikaterini-Lida</first><last>Kalouli</last></author>
       <author><first>Annebeth</first><last>Buis</last></author>
       <author><first>Livy</first><last>Real</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
-      <author><first>Valeria</first><last>de Paiva</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
+      <author id="valeria-de-paiva"><first>Valeria</first><last>de Paiva</last></author>
       <pages>132–143</pages>
       <abstract>The vast amount of research introducing new corpora and techniques for semi-automatically annotating corpora shows the important role that datasets play in today’s research, especially in the machine learning community. This rapid development raises concerns about the quality of the datasets created and consequently of the models trained, as recently discussed with respect to the Natural Language Inference (NLI) task. In this work we conduct an annotation experiment based on a small subset of the SICK corpus. The experiment reveals several problems in the annotation guidelines, and various challenges of the NLI task itself. Our quantitative evaluation of the experiment allows us to assign our empirical observations to specific linguistic phenomena and leads us to recommendations for future annotation tasks, for NLI and possibly for other tasks.</abstract>
       <url hash="049eff3a">W19-4016</url>
@@ -6796,7 +6796,7 @@ In the last paragraph of Section 3, the word "additional" was changed to "supple
       <author><first>Abdullatif</first><last>Köksal</last></author>
       <author><first>Balkiz</first><last>Ozturk Basaran</last></author>
       <author><first>Tunga</first><last>Gungor</last></author>
-      <author><first>Arzucan</first><last>Özgür</last></author>
+      <author id="arzucan-ozgur"><first>Arzucan</first><last>Özgür</last></author>
       <pages>166–177</pages>
       <abstract>In this paper, we present the current version of two different treebanks, the re-annotation of the Turkish PUD Treebank and the first annotation of the Turkish National Corpus Universal Dependency (henceforth TNC-UD). The annotation of both treebanks, the Turkish PUD Treebank and TNC-UD, was carried out based on the decisions concerning linguistic adequacy of re-annotation of the Turkish IMST-UD Treebank (Türk et. al., forthcoming). Both of the treebanks were annotated with the same annotation process and morphological and syntactic analyses. The TNC-UD is planned to have 10,000 sentences. In this paper, we will present the first 500 sentences along with the annotation PUD Treebank. Moreover, this paper also offers the parsing results of a graph-based neural parser on the previous and re-annotated PUD, as well as the TNC-UD. In light of the comparisons, even though we observe a slight decrease in the attachment scores of the Turkish PUD treebank, we demonstrate that the annotation of the TNC-UD improves the parsing accuracy of Turkish. In addition to the treebanks, we have also constructed a custom annotation software with advanced filtering and morphological editing options. Both the treebanks, including a full edit-history and the annotation guidelines, and the custom software are publicly available under an open license online.</abstract>
       <url hash="62eef070">W19-4019</url>
@@ -6806,7 +6806,7 @@ In the last paragraph of Section 3, the word "additional" was changed to "supple
     <paper id="20">
       <title>A Dataset for Semantic Role Labelling of <fixed-case>H</fixed-case>indi-<fixed-case>E</fixed-case>nglish Code-Mixed Tweets</title>
       <author><first>Riya</first><last>Pal</last></author>
-      <author><first>Dipti</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti</first><last>Sharma</last></author>
       <pages>178–188</pages>
       <abstract>We present a data set of 1460 Hindi-English code-mixed tweets consisting of 20,949 tokens labelled with Proposition Bank labels marking their semantic roles. We created verb frames for complex predicates present in the corpus and formulated mappings from Paninian dependency labels to Proposition Bank labels. With the help of these mappings and the dependency tree, we propose a baseline rule based system for Semantic Role Labelling of Hindi-English code-mixed data. We obtain an accuracy of 96.74% for Argument Identification and are able to further classify 73.93% of the labels correctly. While there is relevant ongoing research on Semantic Role Labelling and on building tools for code-mixed social media data, this is the first attempt at labelling semantic roles in code-mixed data, to the best of our knowledge.</abstract>
       <url hash="d2e27358">W19-4020</url>
@@ -6816,7 +6816,7 @@ In the last paragraph of Section 3, the word "additional" was changed to "supple
     <paper id="21">
       <title>A Multi-Platform Annotation Ecosystem for Domain Adaptation</title>
       <author><first>Richard</first><last>Eckart de Castilho</last></author>
-      <author><first>Nancy</first><last>Ide</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
       <author><first>Jin-Dong</first><last>Kim</last></author>
       <author><first>Jan-Christoph</first><last>Klie</last></author>
       <author><first>Keith</first><last>Suderman</last></author>
@@ -6829,7 +6829,7 @@ In the last paragraph of Section 3, the word "additional" was changed to "supple
     <paper id="22">
       <title>A New Annotation Scheme for the <fixed-case>S</fixed-case>ejong Part-of-speech Tagged Corpus</title>
       <author><first>Jungyeul</first><last>Park</last></author>
-      <author><first>Francis</first><last>Tyers</last></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last></author>
       <pages>195–202</pages>
       <abstract>In this paper we present a new annotation scheme for the Sejong part-of-speech tagged corpus based on Universal Dependencies style annotation. By using a new annotation scheme, we can produce Sejong-style morphological analysis and part-of-speech tagging results which have been the <i>de facto</i> standard for Korean language processing. We also explore the possibility of doing named-entity recognition and semantic-role labelling for Korean using the new annotation scheme.</abstract>
       <url hash="22b57a4e">W19-4022</url>
@@ -6907,7 +6907,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     </paper>
     <paper id="28">
       <title>Towards a General <fixed-case>A</fixed-case>bstract <fixed-case>M</fixed-case>eaning <fixed-case>R</fixed-case>epresentation Corpus for <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese</title>
-      <author><first>Marco Antonio</first><last>Sobrevilla Cabezudo</last></author>
+      <author id="marco-antonio-sobrevilla-cabezudo"><first>Marco Antonio</first><last>Sobrevilla Cabezudo</last></author>
       <author><first>Thiago</first><last>Pardo</last></author>
       <pages>236–244</pages>
       <abstract>Abstract Meaning Representation (AMR) is a recent and prominent semantic representation with good acceptance and several applications in the Natural Language Processing area. For English, there is a large annotated corpus (with approximately 39K sentences) that supports the research with the representation. However, to the best of our knowledge, there is only one restricted corpus for Portuguese, which contains 1,527 sentences. In this context, this paper presents an effort to build a general purpose AMR-annotated corpus for Brazilian Portuguese by translating and adapting AMR English guidelines. Our results show that such approach is feasible, but there are some challenging phenomena to solve. More than this, efforts are necessary to increase the coverage of the corresponding lexical resource that supports the annotation.</abstract>
@@ -6922,7 +6922,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <url hash="d156f322">W19-41</url>
       <editor><first>Yun-Nung</first><last>Chen</last></editor>
       <editor><first>Tania</first><last>Bedrax-Weiss</last></editor>
-      <editor><first>Dilek</first><last>Hakkani-Tur</last></editor>
+      <editor id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></editor>
       <editor><first>Anuj</first><last>Kumar</last></editor>
       <editor><first>Mike</first><last>Lewis</last></editor>
       <editor><first>Thang-Minh</first><last>Luong</last></editor>
@@ -6975,7 +6975,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Nouha</first><last>Dziri</last></author>
       <author><first>Ehsan</first><last>Kamalloo</last></author>
       <author><first>Kory</first><last>Mathewson</last></author>
-      <author><first>Osmar</first><last>Zaiane</last></author>
+      <author id="osmar-r-zaiane"><first>Osmar</first><last>Zaiane</last></author>
       <pages>18–31</pages>
       <abstract>Sequence-to-Sequence (Seq2Seq) models have witnessed a notable success in generating natural conversational exchanges. Notwithstanding the syntactically well-formed responses generated by these neural network models, they are prone to be acontextual, short and generic. In this work, we introduce a Topical Hierarchical Recurrent Encoder Decoder (THRED), a novel, fully data-driven, multi-turn response generation system intended to produce contextual and topic-aware responses. Our model is built upon the basic Seq2Seq model by augmenting it with a hierarchical joint attention mechanism that incorporates topical concepts and previous interactions into the response generation. To train our model, we provide a clean and high-quality conversational dataset mined from Reddit comments. We evaluate THRED on two novel automated metrics, dubbed Semantic Similarity and Response Echo Index, as well as with human evaluation. Our experiments demonstrate that the proposed model is able to generate more diverse and contextually relevant responses compared to the strong baselines.</abstract>
       <url hash="dbd612e5">W19-4103</url>
@@ -7024,7 +7024,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Chulaka</first><last>Gunasekara</last></author>
       <author><first>Jonathan K.</first><last>Kummerfeld</last></author>
       <author><first>Lazaros</first><last>Polymenakos</last></author>
-      <author><first>Walter</first><last>Lasecki</last></author>
+      <author id="walter-lasecki"><first>Walter</first><last>Lasecki</last></author>
       <pages>60–67</pages>
       <abstract>Goal-oriented dialogue in complex domains is an extremely challenging problem and there are relatively few datasets. This task provided two new resources that presented different challenges: one was focused but small, while the other was large but diverse. We also considered several new variations on the next utterance selection problem: (1) increasing the number of candidates, (2) including paraphrases, and (3) not including a correct option in the candidate set. Twenty teams participated, developing a range of neural network models, including some that successfully incorporated external data to boost performance. Both datasets have been publicly released, enabling future work to build on these results, working towards robust goal-oriented dialogue systems.</abstract>
       <url hash="bebc2a9b">W19-4107</url>
@@ -7050,7 +7050,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title>Energy-Based Modelling for Dialogue State Tracking</title>
       <author><first>Anh Duong</first><last>Trinh</last></author>
       <author><first>Robert</first><last>Ross</last></author>
-      <author><first>John</first><last>Kelleher</last></author>
+      <author id="john-kelleher"><first>John</first><last>Kelleher</last></author>
       <pages>77–86</pages>
       <abstract>The uncertainties of language and the complexity of dialogue contexts make accurate dialogue state tracking one of the more challenging aspects of dialogue processing. To improve state tracking quality, we argue that relationships between different aspects of dialogue state must be taken into account as they can often guide a more accurate interpretation process. To this end, we present an energy-based approach to dialogue state tracking as a structured classification task. The novelty of our approach lies in the use of an energy network on top of a deep learning architecture to explore more signal correlations between network variables including input features and output labels. We demonstrate that the energy-based approach improves the performance of a deep learning dialogue state tracker towards state-of-the-art results without the need for many of the other steps required by current state-of-the-art methods.</abstract>
       <url hash="680aab86">W19-4109</url>
@@ -7098,7 +7098,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title>Learning to Explain: Answering Why-Questions via Rephrasing</title>
       <author><first>Allen</first><last>Nie</last></author>
       <author><first>Erin</first><last>Bennett</last></author>
-      <author><first>Noah</first><last>Goodman</last></author>
+      <author id="noah-goodman"><first>Noah</first><last>Goodman</last></author>
       <pages>113–120</pages>
       <abstract>Providing plausible responses to why questions is a challenging but critical goal for language based human-machine interaction. Explanations are challenging in that they require many different forms of abstract knowledge and reasoning. Previous work has either relied on human-curated structured knowledge bases or detailed domain representation to generate satisfactory explanations. They are also often limited to ranking pre-existing explanation choices. In our work, we contribute to the under-explored area of generating natural language explanations for general phenomena. We automatically collect large datasets of explanation-phenomenon pairs which allow us to train sequence-to-sequence models to generate natural language explanations. We compare different training strategies and evaluate their performance using both automatic scores and human ratings. We demonstrate that our strategy is sufficient to generate highly plausible explanations for general open-domain phenomena compared to other models trained on different datasets.</abstract>
       <url hash="01f122c9">W19-4113</url>
@@ -7212,7 +7212,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Ahmet</first><last>Üstün</last></author>
       <author><first>Rob</first><last>van der Goot</last></author>
       <author><first>Gosse</first><last>Bouma</last></author>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <pages>35–49</pages>
       <abstract>This paper describes our submission to SIGMORPHON 2019 Task 2: Morphological analysis and lemmatization in context. Our model is a multi-task sequence to sequence neural network, which jointly learns morphological tagging and lemmatization. On the encoding side, we exploit character-level as well as contextual information. We introduce a multi-attention decoder to selectively focus on different parts of character and word sequences. To further improve the model, we train on multiple datasets simultaneously and use external embeddings for initialization. Our final model reaches an average morphological tagging F1 score of 94.54 and a lemma accuracy of 93.91 on the test data, ranking respectively 3rd and 6th out of 13 teams in the SIGMORPHON 2019 shared task.</abstract>
       <url hash="696fe9f9">W19-4206</url>
@@ -7222,7 +7222,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="7">
       <title><fixed-case>IT</fixed-case>–<fixed-case>IST</fixed-case> at the <fixed-case>SIGMORPHON</fixed-case> 2019 Shared Task: Sparse Two-headed Models for Inflection</title>
       <author><first>Ben</first><last>Peters</last></author>
-      <author><first>André F. T.</first><last>Martins</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
       <pages>50–56</pages>
       <abstract>This paper presents the Instituto de Telecomunicações–Instituto Superior Técnico submission to Task 1 of the SIGMORPHON 2019 Shared Task. Our models combine sparse sequence-to-sequence models with a two-headed attention mechanism that learns separate attention distributions for the lemma and inflectional tags. Among submissions to Task 1, our models rank second and third. Despite the low data setting of the task (only 100 in-language training examples), they learn plausible inflection patterns and often concentrate all probability mass into a small set of hypotheses, making beam search exact.</abstract>
       <url hash="b6b76c15">W19-4207</url>
@@ -7234,8 +7234,8 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Aditi</first><last>Chaudhary</last></author>
       <author><first>Elizabeth</first><last>Salesky</last></author>
       <author><first>Gayatri</first><last>Bhat</last></author>
-      <author><first>David R.</first><last>Mortensen</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
+      <author id="david-r-mortensen"><first>David R.</first><last>Mortensen</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
       <author><first>Yulia</first><last>Tsvetkov</last></author>
       <pages>57–70</pages>
       <abstract>This paper presents the submission by the CMU-01 team to the SIGMORPHON 2019 task 2 of Morphological Analysis and Lemmatization in Context. This task requires us to produce the lemma and morpho-syntactic description of each token in a sequence, for 107 treebanks. We approach this task with a hierarchical neural conditional random field (CRF) model which predicts each coarse-grained feature (eg. POS, Case, etc.) independently. However, most treebanks are under-resourced, thus making it challenging to train deep neural models for them. Hence, we propose a multi-lingual transfer training regime where we transfer from multiple related languages that share similar typology.</abstract>
@@ -7270,7 +7270,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Alexis</first><last>Palmer</last></author>
       <author><first>Suleyman Olcay</first><last>Polat</last></author>
       <author><first>Taraka</first><last>Rama</last></author>
-      <author><first>Rodney</first><last>Nielsen</last></author>
+      <author id="rodney-nielsen"><first>Rodney</first><last>Nielsen</last></author>
       <pages>87–94</pages>
       <abstract>This paper presents the UNT HiLT+Ling system for the Sigmorphon 2019 shared Task 2: Morphological Analysis and Lemmatization in Context. Our core approach focuses on the morphological tagging task; part-of-speech tagging and lemmatization are treated as secondary tasks. Given the highly multilingual nature of the task, we propose an approach which makes minimal use of the supplied training data, in order to be extensible to languages without labeled training data for the morphological inflection task. Specifically, we use a parallel Bible corpus to align contextual embeddings at the verse level. The aligned verses are used to build cross-language translation matrices, which in turn are used to map between embedding spaces for the various languages. Finally, we use sets of inflected forms, primarily from a high-resource language, to induce vector representations for individual UniMorph tags. Morphological analysis is performed by matching vector representations to embeddings for individual tokens. While our system results are dramatically below the average system submitted for the shared task evaluation campaign, our method is (we suspect) unique in its minimal reliance on labeled training data.</abstract>
       <url hash="8cb396ed">W19-4211</url>
@@ -7281,7 +7281,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title><fixed-case>UDP</fixed-case>ipe at <fixed-case>SIGMORPHON</fixed-case> 2019: Contextualized Embeddings, Regularization with Morphological Categories, Corpora Merging</title>
       <author><first>Milan</first><last>Straka</last></author>
       <author><first>Jana</first><last>Straková</last></author>
-      <author><first>Jan</first><last>Hajic</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajic</last></author>
       <pages>95–103</pages>
       <abstract>We present our contribution to the SIGMORPHON 2019 Shared Task: Crosslinguality and Context in Morphology, Task 2: contextual morphological analysis and lemmatization. We submitted a modification of the UDPipe 2.0, one of best-performing systems of the CoNLL 2018 Shared Task: Multilingual Parsing from Raw Text to Universal Dependencies and an overall winner of the 2018 Shared Task on Extrinsic Parser Evaluation. As our first improvement, we use the pretrained contextualized embeddings (BERT) as additional inputs to the network; secondly, we use individual morphological features as regularization; and finally, we merge the selected corpora of the same language. In the lemmatization task, our system exceeds all the submitted systems by a wide margin with lemmatization accuracy 95.78 (second best was 95.00, third 94.46). In the morphological analysis, our system placed tightly second: our morphological analysis accuracy was 93.19, the winning system’s 93.23.</abstract>
       <url hash="30278e18">W19-4212</url>
@@ -7292,7 +7292,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title><fixed-case>CUNI</fixed-case>–<fixed-case>M</fixed-case>alta system at <fixed-case>SIGMORPHON</fixed-case> 2019 Shared Task on Morphological Analysis and Lemmatization in context: Operation-based word formation</title>
       <author><first>Ronald</first><last>Cardenas</last></author>
       <author><first>Claudia</first><last>Borg</last></author>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <pages>104–112</pages>
       <abstract>This paper presents the submission by the Charles University-University of Malta team to the SIGMORPHON 2019 Shared Task on Morphological Analysis and Lemmatization in context. We present a lemmatization model based on previous work on neural transducers (Makarov and Clematide, 2018b; Aharoni and Goldberg, 2016). The key difference is that our model transforms the whole word form in every step, instead of consuming it character by character. We propose a merging strategy inspired by Byte-Pair-Encoding that reduces the space of valid operations by merging frequent adjacent operations. The resulting operations not only encode the actions to be performed but the relative position in the word token and how characters need to be transformed. Our morphological tagger is a vanilla biLSTM tagger that operates over operation representations, encoding operations and words in a hierarchical manner. Even though relative performance according to metrics is below the baseline, experiments show that our models capture important associations between interpretable operation labels and fine-grained morpho-syntax labels.</abstract>
       <url hash="8461346c">W19-4213</url>
@@ -7385,7 +7385,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="22">
       <title>Unsupervised Morphological Segmentation for Low-Resource Polysynthetic Languages</title>
       <author><first>Ramy</first><last>Eskander</last></author>
-      <author><first>Judith</first><last>Klavans</last></author>
+      <author id="judith-l-klavans"><first>Judith</first><last>Klavans</last></author>
       <author><first>Smaranda</first><last>Muresan</last></author>
       <pages>189–195</pages>
       <abstract>Polysynthetic languages pose a challenge for morphological analysis due to the root-morpheme complexity and to the word class “squish”. In addition, many of these polysynthetic languages are low-resource. We propose unsupervised approaches for morphological segmentation of low-resource polysynthetic languages based on Adaptor Grammars (AG) (Eskander et al., 2016). We experiment with four languages from the Uto-Aztecan family. Our AG-based approaches outperform other unsupervised approaches and show promise when compared to supervised methods, outperforming them on two of the four languages.</abstract>
@@ -7406,8 +7406,8 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     </paper>
     <paper id="24">
       <title>Encoder-decoder models for latent phonological representations of words</title>
-      <author><first>Cassandra L.</first><last>Jacobs</last></author>
-      <author><first>Frédéric</first><last>Mailhot</last></author>
+      <author id="cassandra-l-jacobs"><first>Cassandra L.</first><last>Jacobs</last></author>
+      <author id="frederic-mailhot"><first>Frédéric</first><last>Mailhot</last></author>
       <pages>206–217</pages>
       <abstract>We use sequence-to-sequence networks trained on sequential phonetic encoding tasks to construct compositional phonological representations of words. We show that the output of an encoder network can predict the phonetic durations of American English words better than a number of alternative forms. We also show that the model’s learned representations map onto existing measures of words’ phonological structure (phonological neighborhood density and phonotactic probability).</abstract>
       <url hash="7ef2c2e6">W19-4224</url>
@@ -7426,15 +7426,15 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     </paper>
     <paper id="26">
       <title>The <fixed-case>SIGMORPHON</fixed-case> 2019 Shared Task: Morphological Analysis in Context and Cross-Lingual Transfer for Inflection</title>
-      <author><first>Arya D.</first><last>McCarthy</last></author>
+      <author id="arya-d-mccarthy"><first>Arya D.</first><last>McCarthy</last></author>
       <author><first>Ekaterina</first><last>Vylomova</last></author>
       <author><first>Shijie</first><last>Wu</last></author>
       <author><first>Chaitanya</first><last>Malaviya</last></author>
       <author><first>Lawrence</first><last>Wolf-Sonkin</last></author>
       <author><first>Garrett</first><last>Nicolai</last></author>
       <author><first>Christo</first><last>Kirov</last></author>
-      <author><first>Miikka</first><last>Silfverberg</last></author>
-      <author><first>Sabrina J.</first><last>Mielke</last></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last></author>
+      <author id="sabrina-j-mielke"><first>Sabrina J.</first><last>Mielke</last></author>
       <author><first>Jeffrey</first><last>Heinz</last></author>
       <author><first>Ryan</first><last>Cotterell</last></author>
       <author><first>Mans</first><last>Hulden</last></author>
@@ -7455,7 +7455,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <editor><first>Isabelle</first><last>Augenstein</last></editor>
       <editor><first>Spandana</first><last>Gella</last></editor>
       <editor><first>Sebastian</first><last>Ruder</last></editor>
-      <editor><first>Katharina</first><last>Kann</last></editor>
+      <editor id="katharina-von-der-wense"><first>Katharina</first><last>Kann</last></editor>
       <editor><first>Burcu</first><last>Can</last></editor>
       <editor><first>Johannes</first><last>Welbl</last></editor>
       <editor><first>Alexis</first><last>Conneau</last></editor>
@@ -7487,9 +7487,9 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     </paper>
     <paper id="2">
       <title>To Tune or Not to Tune? Adapting Pretrained Representations to Diverse Tasks</title>
-      <author><first>Matthew E.</first><last>Peters</last></author>
+      <author id="matthew-e-peters"><first>Matthew E.</first><last>Peters</last></author>
       <author><first>Sebastian</first><last>Ruder</last></author>
-      <author><first>Noah A.</first><last>Smith</last></author>
+      <author id="noah-a-smith"><first>Noah A.</first><last>Smith</last></author>
       <pages>7–14</pages>
       <abstract>While most previous work has focused on different pretraining objectives and architectures for transfer learning, we ask how to best adapt the pretrained model to a given target task. We focus on the two most common forms of adaptation, feature extraction (where the pretrained weights are frozen), and directly fine-tuning the pretrained model. Our empirical results across diverse NLP tasks with two state-of-the-art models show that the relative performance of fine-tuning vs. feature extraction depends on the similarity of the pretraining and target tasks. We explore possible explanations for this finding and provide a set of adaptation guidelines for the NLP practitioner.</abstract>
       <url hash="fcff1713">W19-4302</url>
@@ -7513,7 +7513,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Alessandro</first><last>Raganato</last></author>
       <author><first>Raúl</first><last>Vázquez</last></author>
       <author><first>Mathias</first><last>Creutz</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>27–32</pages>
       <abstract>In this paper, we explore a multilingual translation model with a cross-lingually shared layer that can be used as fixed-size sentence representation in different downstream tasks. We systematically study the impact of the size of the shared layer and the effect of including additional languages in the model. In contrast to related previous work, we demonstrate that the performance in translation does correlate with trainable downstream tasks. In particular, we show that larger intermediate layers not only improve translation quality, especially for long sentences, but also push the accuracy of trainable classification tasks. On the other hand, shorter representations lead to increased compression that is beneficial in non-trainable similarity tasks. We hypothesize that the training procedure on the downstream task enables the model to identify the encoded information that is useful for the specific task whereas non-trainable benchmarks can be confused by other types of information also encoded in the representation of a sentence.</abstract>
       <url hash="87066e12">W19-4304</url>
@@ -7524,7 +7524,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title>Multilingual <fixed-case>NMT</fixed-case> with a Language-Independent Attention Bridge</title>
       <author><first>Raúl</first><last>Vázquez</last></author>
       <author><first>Alessandro</first><last>Raganato</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <author><first>Mathias</first><last>Creutz</last></author>
       <pages>33–39</pages>
       <abstract>In this paper, we propose an architecture for machine translation (MT) capable of obtaining multilingual sentence representations by incorporating an intermediate attention bridge that is shared across all languages. We train the model with language-specific encoders and decoders that are connected through an inner-attention layer on the encoder side. The attention bridge exploits the semantics from each language for translation and develops into a language-agnostic meaning representation that can efficiently be used for transfer learning. We present a new framework for the efficient development of multilingual neural machine translation (NMT) using this model and scheduled training. We have tested the approach in a systematic way with a multi-parallel data set. The model achieves substantial improvements over strong bilingual models and performs well for zero-shot translation, which demonstrates its ability of abstraction and transfer learning.</abstract>
@@ -7547,7 +7547,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="7">
       <title><fixed-case>M</fixed-case>o<fixed-case>RT</fixed-case>y: Unsupervised Learning of Task-specialized Word Embeddings by Autoencoding</title>
       <author><first>Nils</first><last>Rethmeier</last></author>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>49–54</pages>
       <abstract>Word embeddings have undoubtedly revolutionized NLP. However, pretrained embeddings do not always work for a specific task (or set of tasks), particularly in limited resource setups. We introduce a simple yet effective, self-supervised post-processing method that constructs task-specialized word representations by picking from a menu of reconstructing transformations to yield improved end-task performance (MORTY). The method is complementary to recent state-of-the-art approaches to inductive transfer via fine-tuning, and forgoes costly model architectures and annotation. We evaluate MORTY on a broad range of setups, including different word embedding methods, corpus sizes and end-task semantics. Finally, we provide a surprisingly simple recipe to obtain specialized embeddings that better fit end-tasks.</abstract>
       <url hash="7afafc75">W19-4307</url>
@@ -7572,7 +7572,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Nick</first><last>Rossenbach</last></author>
       <author><first>Jan</first><last>Rosendahl</last></author>
       <author><first>Shahram</first><last>Khadivi</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>61–71</pages>
       <abstract>We propose a novel model architecture and training algorithm to learn bilingual sentence embeddings from a combination of parallel and monolingual data. Our method connects autoencoding and neural machine translation to force the source and target sentence embeddings to share the same space without the help of a pivot language or an additional transformation. We train a multilayer perceptron on top of the sentence embeddings to extract good bilingual sentence pairs from nonparallel or noisy parallel data. Our approach shows promising performance on sentence alignment recovery and the WMT 2018 parallel corpus filtering tasks with only a single model.</abstract>
       <url hash="cf9dfeca">W19-4309</url>
@@ -7595,7 +7595,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="11">
       <title>Composing Noun Phrase Vector Representations</title>
       <author><first>Aikaterini-Lida</first><last>Kalouli</last></author>
-      <author><first>Valeria</first><last>de Paiva</last></author>
+      <author id="valeria-de-paiva"><first>Valeria</first><last>de Paiva</last></author>
       <author><first>Richard</first><last>Crouch</last></author>
       <pages>84–95</pages>
       <abstract>Vector representations of words have seen an increasing success over the past years in a variety of NLP tasks. While there seems to be a consensus about the usefulness of word embeddings and how to learn them, it is still unclear which representations can capture the meaning of phrases or even whole sentences. Recent work has shown that simple operations outperform more complex deep architectures. In this work, we propose two novel constraints for computing noun phrase vector representations. First, we propose that the semantic and not the syntactic contribution of each component of a noun phrase should be considered, so that the resulting composed vectors express more of the phrase meaning. Second, the composition process of the two phrase vectors should apply suitable dimensions’ selection in a way that specific semantic features captured by the phrase’s meaning become more salient. Our proposed methods are compared to 11 other approaches, including popular baselines and a neural net architecture, and are evaluated across 6 tasks and 2 datasets. Our results show that these constraints lead to more expressive phrase representations and can be applied to other state-of-the-art methods to improve their performance.</abstract>
@@ -7629,7 +7629,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="14">
       <title>Constructive Type-Logical Supertagging With Self-Attention Networks</title>
       <author><first>Konstantinos</first><last>Kogkalidis</last></author>
-      <author><first>Michael</first><last>Moortgat</last></author>
+      <author id="michael-moortgat"><first>Michael</first><last>Moortgat</last></author>
       <author><first>Tejaswini</first><last>Deoskar</last></author>
       <pages>113–123</pages>
       <abstract>We propose a novel application of self-attention networks towards grammar induction. We present an attention-based supertagger for a refined type-logical grammar, trained on constructing types inductively. In addition to achieving a high overall type accuracy, our model is able to learn the syntax of the grammar’s type system along with its denotational semantics. This lifts the closed world assumption commonly made by lexicalized grammar supertaggers, greatly enhancing its generalization potential. This is evidenced both by its adequate accuracy over sparse word types and its ability to correctly construct complex types never seen during training, which, to the best of our knowledge, was as of yet unaccomplished.</abstract>
@@ -7663,7 +7663,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title>An Empirical Study on Pre-trained Embeddings and Language Models for Bot Detection</title>
       <author><first>Andres</first><last>Garcia-Silva</last></author>
       <author><first>Cristian</first><last>Berrio</last></author>
-      <author><first>José Manuel</first><last>Gómez-Pérez</last></author>
+      <author id="jose-manuel-gomez-perez"><first>José Manuel</first><last>Gómez-Pérez</last></author>
       <pages>148–155</pages>
       <abstract>Fine-tuning pre-trained language models has significantly advanced the state of art in a wide range of NLP downstream tasks. Usually, such language models are learned from large and well-formed text corpora from e.g. encyclopedic resources, books or news. However, a significant amount of the text to be analyzed nowadays is Web data, often from social media. In this paper we consider the research question: How do standard pre-trained language models generalize and capture the peculiarities of rather short, informal and frequently automatically generated text found in social media? To answer this question, we focus on bot detection in Twitter as our evaluation task and test the performance of fine-tuning approaches based on language models against popular neural architectures such as LSTM and CNN combined with pre-trained and contextualized embeddings. Our results also show strong performance variations among the different language model approaches, which suggest further research.</abstract>
       <url hash="d9e8c6cc">W19-4317</url>
@@ -7673,7 +7673,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="18">
       <title>Probing Multilingual Sentence Representations With <fixed-case>X</fixed-case>-Probe</title>
       <author><first>Vinit</first><last>Ravishankar</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <author><first>Erik</first><last>Velldal</last></author>
       <pages>156–168</pages>
       <abstract>This paper extends the task of probing sentence representations for linguistic insight in a multilingual domain. In doing so, we make two contributions: first, we provide datasets for multilingual probing, derived from Wikipedia, in five languages, viz. English, French, German, Spanish and Russian. Second, we evaluate six sentence encoders for each language, each trained by mapping sentence representations to English sentence representations, using sentences in a parallel corpus. We discover that cross-lingually mapped representations are often better at retaining certain linguistic information than representations derived from English encoders trained on natural language inference (NLI) as a downstream task.</abstract>
@@ -7694,7 +7694,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     </paper>
     <paper id="20">
       <title>Learning Multilingual Meta-Embeddings for Code-Switching Named Entity Recognition</title>
-      <author><first>Genta Indra</first><last>Winata</last></author>
+      <author id="genta-indra-winata"><first>Genta Indra</first><last>Winata</last></author>
       <author><first>Zhaojiang</first><last>Lin</last></author>
       <author><first>Pascale</first><last>Fung</last></author>
       <pages>181–186</pages>
@@ -7753,7 +7753,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="25">
       <title>On Committee Representations of Adversarial Learning Models for Question-Answer Ranking</title>
       <author><first>Sparsh</first><last>Gupta</last></author>
-      <author><first>Vitor</first><last>Carvalho</last></author>
+      <author id="vitor-carvalho"><first>Vitor</first><last>Carvalho</last></author>
       <pages>218–223</pages>
       <abstract>Adversarial training is a process in Machine Learning that explicitly trains models on adversarial inputs (inputs designed to deceive or trick the learning process) in order to make it more robust or accurate. In this paper we investigate how representing adversarial training models as committees can be used to effectively improve the performance of Question-Answer (QA) Ranking. We start by empirically probing the effects of adversarial training over multiple QA ranking algorithms, including the state-of-the-art Multihop Attention Network model. We evaluate these algorithms on several benchmark datasets and observe that, while adversarial training is beneficial to most baseline algorithms, there are cases where it may lead to overfitting and performance degradation. We investigate the causes of such degradation, and then propose a new representation procedure for this adversarial learning problem, based on committee learning, that not only is capable of consistently improving all baseline algorithms, but also outperforms the previous state-of-the-art algorithm by as much as 6% in NDCG.</abstract>
       <url hash="2cd1bc2d">W19-4325</url>
@@ -7831,7 +7831,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="32">
       <title>Leveraging Pre-Trained Embeddings for <fixed-case>W</fixed-case>elsh Taggers</title>
       <author><first>Ignatius</first><last>Ezeani</last></author>
-      <author><first>Scott</first><last>Piao</last></author>
+      <author id="scott-s-l-piao"><first>Scott</first><last>Piao</last></author>
       <author><first>Steven</first><last>Neale</last></author>
       <author><first>Paul</first><last>Rayson</last></author>
       <author><first>Dawn</first><last>Knight</last></author>
@@ -7899,10 +7899,10 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     </paper>
     <paper id="4">
       <title>Computationally Modeling the Impact of Task-Appropriate Language Complexity and Accuracy on Human Grading of <fixed-case>G</fixed-case>erman Essays</title>
-      <author><first>Zarah</first><last>Weiss</last></author>
+      <author id="zarah-weiss"><first>Zarah</first><last>Weiss</last></author>
       <author><first>Anja</first><last>Riemenschneider</last></author>
       <author><first>Pauline</first><last>Schröter</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <pages>30–45</pages>
       <abstract>Computational linguistic research on the language complexity of student writing typically involves human ratings as a gold standard. However, educational science shows that teachers find it difficult to identify and cleanly separate accuracy, different aspects of complexity, contents, and structure. In this paper, we therefore explore the use of computational linguistic methods to investigate how task-appropriate complexity and accuracy relate to the grading of overall performance, content performance, and language performance as assigned by teachers. Based on texts written by students for the official school-leaving state examination (Abitur), we show that teachers successfully assign higher language performance grades to essays with higher task-appropriate language complexity and properly separate this from content scores. Yet, accuracy impacts teacher assessment for all grading rubrics, also the content score, overemphasizing the role of accuracy. Our analysis is based on broad computational linguistic modeling of German language complexity and an innovative theory- and data-driven feature aggregation method inferring task-appropriate language complexity.</abstract>
       <url hash="da63fbde">W19-4404</url>
@@ -7924,8 +7924,8 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title>The <fixed-case>BEA</fixed-case>-2019 Shared Task on Grammatical Error Correction</title>
       <author><first>Christopher</first><last>Bryant</last></author>
       <author><first>Mariano</first><last>Felice</last></author>
-      <author><first>Øistein E.</first><last>Andersen</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="oistein-e-andersen"><first>Øistein E.</first><last>Andersen</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <pages>52–75</pages>
       <abstract>This paper reports on the BEA-2019 Shared Task on Grammatical Error Correction (GEC). As with the CoNLL-2014 shared task, participants are required to correct all types of errors in test data. One of the main contributions of the BEA-2019 shared task is the introduction of a new dataset, the Write&amp;Improve+LOCNESS corpus, which represents a wider range of native and learner English levels and abilities. Another contribution is the introduction of tracks, which control the amount of annotated data available to participants. Systems are evaluated in terms of ERRANT F_0.5, which allows us to report a much wider range of performance statistics. The competition was hosted on Codalab and remains open for further submissions on the blind test set.</abstract>
       <url hash="5c88f72d">W19-4406</url>
@@ -7957,7 +7957,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title>Regression or classification? Automated Essay Scoring for <fixed-case>N</fixed-case>orwegian</title>
       <author><first>Stig</first><last>Johan Berggren</last></author>
       <author><first>Taraka</first><last>Rama</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <pages>92–102</pages>
       <abstract>In this paper we present first results for the task of Automated Essay Scoring for Norwegian learner language. We analyze a number of properties of this task experimentally and assess (i) the formulation of the task as either regression or classification, (ii) the use of various non-neural and neural machine learning architectures with various types of input representations, and (iii) applying multi-task learning for joint prediction of essay scoring and native language identification. We find that a GRU-based attention model trained in a single-task setting performs best at the AES task.</abstract>
       <url hash="3aa92729">W19-4409</url>
@@ -8086,7 +8086,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title>Noisy Channel for Low Resource Grammatical Error Correction</title>
       <author><first>Simon</first><last>Flachs</last></author>
       <author><first>Ophélie</first><last>Lacroix</last></author>
-      <author><first>Anders</first><last>Søgaard</last></author>
+      <author id="anders-sogaard"><first>Anders</first><last>Søgaard</last></author>
       <pages>191–196</pages>
       <abstract>This paper describes our contribution to the low-resource track of the BEA 2019 shared task on Grammatical Error Correction (GEC). Our approach to GEC builds on the theory of the noisy channel by combining a channel model and language model. We generate confusion sets from the Wikipedia edit history and use the frequencies of edits to estimate the channel model. Additionally, we use two pre-trained language models: 1) Google’s BERT model, which we fine-tune for specific error types and 2) OpenAI’s GPT-2 model, utilizing that it can operate with previous sentences as context. Furthermore, we search for the optimal combinations of corrections using beam search.</abstract>
       <url hash="931eccba">W19-4420</url>
@@ -8188,7 +8188,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="29">
       <title>Anglicized Words and Misspelled Cognates in Native Language Identification</title>
       <author><first>Ilia</first><last>Markov</last></author>
-      <author><first>Vivi</first><last>Nastase</last></author>
+      <author id="vivi-nastase"><first>Vivi</first><last>Nastase</last></author>
       <author><first>Carlo</first><last>Strapparava</last></author>
       <pages>275–284</pages>
       <abstract>In this paper, we present experiments that estimate the impact of specific lexical choices of people writing in a second language (L2). In particular, we look at misspelled words that indicate lexical uncertainty on the part of the author, and separate them into three categories: misspelled cognates, “L2-ed” (in our case, anglicized) words, and all other spelling errors. We test the assumption that such errors contain clues about the native language of an essay’s author through the task of native language identification. The results of the experiments show that the information brought by each of these categories is complementary. We also note that while the distribution of such features changes with the proficiency level of the writer, their contribution towards native language identification remains significant at all levels.</abstract>
@@ -8221,7 +8221,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     </paper>
     <paper id="32">
       <title>Toward Automated Content Feedback Generation for Non-native Spontaneous Speech</title>
-      <author><first>Su-Youn</first><last>Yoon</last></author>
+      <author id="su-youn-yoon"><first>Su-Youn</first><last>Yoon</last></author>
       <author><first>Ching-Ni</first><last>Hsieh</last></author>
       <author><first>Klaus</first><last>Zechner</last></author>
       <author><first>Matthew</first><last>Mulholland</last></author>
@@ -8315,7 +8315,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title>Simple Construction of Mixed-Language Texts for Vocabulary Learning</title>
       <author><first>Adithya</first><last>Renduchintala</last></author>
       <author><first>Philipp</first><last>Koehn</last></author>
-      <author><first>Jason</first><last>Eisner</last></author>
+      <author id="jason-eisner"><first>Jason</first><last>Eisner</last></author>
       <pages>369–379</pages>
       <abstract>We present a machine foreign-language teacher that takes documents written in a student’s native language and detects situations where it can replace words with their foreign glosses such that new foreign vocabulary can be learned simply through reading the resulting mixed-language text. We show that it is possible to design such a machine teacher without any supervised data from (human) students. We accomplish this by modifying a cloze language model to incrementally learn new vocabulary items, and use this language model as a proxy for the word guessing and learning ability of real students. Our machine foreign-language teacher decides which subset of words to replace by consulting this language model. We evaluate three variants of our student proxy language models through a study on Amazon Mechanical Turk (MTurk). We find that MTurk “students” were able to guess the meanings of foreign words introduced by the machine teacher with high accuracy for both function words as well as content words in two out of the three models. In addition, we show that students are able to retain their knowledge about the foreign words after they finish reading the document.</abstract>
       <url hash="0b277ede">W19-4439</url>
@@ -8324,8 +8324,8 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     </paper>
     <paper id="40">
       <title>Analyzing Linguistic Complexity and Accuracy in Academic Language Development of <fixed-case>G</fixed-case>erman across Elementary and Secondary School</title>
-      <author><first>Zarah</first><last>Weiss</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="zarah-weiss"><first>Zarah</first><last>Weiss</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <pages>380–393</pages>
       <abstract>We track the development of writing complexity and accuracy in German students’ early academic language development from first to eighth grade. Combining an empirically broad approach to linguistic complexity with the high-quality error annotation included in the Karlsruhe Children’s Text corpus (Lavalley et al. 2015) used, we construct models of German academic language development that successfully identify the student’s grade level. We show that classifiers for the early years rely more on accuracy development, whereas development in secondary school is better characterized by increasingly complex language in all domains: linguistic system, language use, and human sentence processing characteristics. We demonstrate the generalizability and robustness of models using such a broad complexity feature set across writing topics.</abstract>
       <url hash="1f2e2ad6">W19-4440</url>
@@ -8334,8 +8334,8 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     </paper>
     <paper id="41">
       <title>Content Modeling for Automated Oral Proficiency Scoring System</title>
-      <author><first>Su-Youn</first><last>Yoon</last></author>
-      <author><first>Chong Min</first><last>Lee</last></author>
+      <author id="su-youn-yoon"><first>Su-Youn</first><last>Yoon</last></author>
+      <author id="chungmin-lee"><first>Chong Min</first><last>Lee</last></author>
       <pages>394–401</pages>
       <abstract>We developed an automated oral proficiency scoring system for non-native English speakers’ spontaneous speech. Automated systems that score holistic proficiency are expected to assess a wide range of performance categories, and the content is one of the core performance categories. In order to assess the quality of the content, we trained a Siamese convolutional neural network (CNN) to model the semantic relationship between key points generated by experts and a test response. The correlation between human scores and Siamese CNN scores was comparable to human-human agreement (r=0.63), and it was higher than the baseline content features. The inclusion of Siamese CNN-based feature to the existing state-of-the-art automated scoring model achieved a small but statistically significant improvement. However, the new model suffered from score inflation for long atypical responses with serious content issues. We investigated the reasons of this score inflation by analyzing the associations with linguistic features and identifying areas strongly associated with the score errors.</abstract>
       <url hash="44a350f6">W19-4441</url>
@@ -8372,7 +8372,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="44">
       <title>Metaphors in Text Simplification: To change or not to change, that is the question</title>
       <author><first>Yulia</first><last>Clausen</last></author>
-      <author><first>Vivi</first><last>Nastase</last></author>
+      <author id="vivi-nastase"><first>Vivi</first><last>Nastase</last></author>
       <pages>423–434</pages>
       <abstract>We present an analysis of metaphors in news text simplification. Using features that capture general and metaphor specific characteristics, we test whether we can automatically identify which metaphors will be changed or preserved, and whether there are features that have different predictive power for metaphors or literal words. The experiments show that the Age of Acquisition is the most distinctive feature for both metaphors and literal words. Features that capture Imageability and Concreteness are useful when used alone, but within the full set of features they lose their impact. Frequency of use seems to be the best feature to differentiate metaphors that should be changed and those to be preserved.</abstract>
       <url hash="35318d47">W19-4444</url>
@@ -8400,7 +8400,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>David</first><last>Gerritsen</last></author>
       <author><first>Brittany</first><last>McLaughlin</last></author>
       <author><first>Ezekiel</first><last>Dixon-Román</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <pages>444–460</pages>
       <abstract>There is a long record of research on equity in schools. As machine learning researchers begin to study fairness and bias in earnest, language technologies in education have an unusually strong theoretical and applied foundation to build on. Here, we introduce concepts from culturally relevant pedagogy and other frameworks for teaching and learning, identifying future work on equity in NLP. We present case studies in a range of topics like intelligent tutoring systems, computer-assisted language learning, automated essay scoring, and sentiment analysis in classrooms, and provide an actionable agenda for research.</abstract>
       <url hash="b9d5e4d4">W19-4446</url>
@@ -8436,7 +8436,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="49">
       <title>The Unbearable Weight of Generating Artificial Errors for Grammatical Error Correction</title>
       <author><first>Phu Mon</first><last>Htut</last></author>
-      <author><first>Joel</first><last>Tetreault</last></author>
+      <author id="joel-tetreault"><first>Joel</first><last>Tetreault</last></author>
       <pages>478–483</pages>
       <abstract>In this paper, we investigate the impact of using 4 recent neural models for generating artificial errors to help train the neural grammatical error correction models. We conduct a battery of experiments on the effect of data size, models, and comparison with a rule-based approach.</abstract>
       <url hash="b9295325">W19-4449</url>
@@ -8448,7 +8448,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Farah</first><last>Nadeem</last></author>
       <author id="huy-nguyen-lls"><first>Huy</first><last>Nguyen</last></author>
       <author id="yang-liu-icsi"><first>Yang</first><last>Liu</last></author>
-      <author><first>Mari</first><last>Ostendorf</last></author>
+      <author id="mari-ostendorf"><first>Mari</first><last>Ostendorf</last></author>
       <pages>484–493</pages>
       <abstract>Automated essay scoring systems typically rely on hand-crafted features to predict essay quality, but such systems are limited by the cost of feature engineering. Neural networks offer an alternative to feature engineering, but they typically require more annotated data. This paper explores network structures, contextualized embeddings and pre-training strategies aimed at capturing discourse characteristics of essays. Experiments on three essay scoring tasks show benefits from all three strategies in different combinations, with simpler architectures being more effective when less training data is available.</abstract>
       <url hash="919b24d8">W19-4450</url>
@@ -8476,7 +8476,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Elena</first><last>Musi</last></author>
       <author><first>Patricia</first><last>Davies</last></author>
       <author><first>Smaranda</first><last>Muresan</last></author>
-      <author><first>Rebecca J.</first><last>Passonneau</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca J.</first><last>Passonneau</last></author>
       <pages>507–518</pages>
       <abstract>We present a unique dataset of student source-based argument essays to facilitate research on the relations between content, argumentation skills, and assessment. Two classroom writing assignments were given to college students in a STEM major, accompanied by a carefully designed rubric. The paper presents a reliability study of the rubric, showing it to be highly reliable, and initial annotation on content and argumentation annotation of the essays.</abstract>
       <url hash="7fd29ab4">W19-4452</url>
@@ -8513,8 +8513,8 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title>A Cascade Model for Proposition Extraction in Argumentation</title>
       <author><first>Yohan</first><last>Jo</last></author>
       <author><first>Jacky</first><last>Visser</last></author>
-      <author><first>Chris</first><last>Reed</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="chris-reed"><first>Chris</first><last>Reed</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>11–24</pages>
       <abstract>We present a model to tackle a fundamental but understudied problem in computational argumentation: proposition extraction. Propositions are the basic units of an argument and the primary building blocks of most argument mining systems. However, they are usually substituted by argumentative discourse units obtained via surface-level text segmentation, which may yield text segments that lack semantic information necessary for subsequent argument mining processes. In contrast, our cascade model aims to extract complete propositions by handling anaphora resolution, text segmentation, reported speech, questions, imperatives, missing subjects, and revision. We formulate each task as a computational problem and test various models using a corpus of the 2016 U.S. presidential debates. We show promising performance for some tasks and discuss main challenges in proposition extraction.</abstract>
       <url hash="5828dd8c">W19-4502</url>
@@ -8648,7 +8648,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="14">
       <title>Annotation of Rhetorical Moves in Biochemistry Articles</title>
       <author><first>Mohammed</first><last>Alliheedi</last></author>
-      <author><first>Robert E.</first><last>Mercer</last></author>
+      <author id="robert-e-mercer"><first>Robert E.</first><last>Mercer</last></author>
       <author><first>Robin</first><last>Cohen</last></author>
       <pages>113–123</pages>
       <abstract>This paper focuses on the real world application of scientific writing and on determining rhetorical moves, an important step in establishing the argument structure of biomedical articles. Using the observation that the structure of scholarly writing in laboratory-based experimental sciences closely follows laboratory procedures, we examine most closely the Methods section of the texts and adopt an approach of identifying rhetorical moves that are procedure-oriented. We also propose a verb-centric frame semantics with an effective set of semantic roles in order to support the analysis. These components are designed to support a computational model that extends a promising proposal of appropriate rhetorical moves for this domain, but one which is merely descriptive. Our work also contributes to the understanding of argument-related annotation schemes. In particular, we conduct a detailed study with human annotators to confirm that our selection of semantic roles is effective in determining the underlying rhetorical structure of existing biomedical articles in an extensive dataset. The annotated dataset that we produce provides the important knowledge needed for our ultimate goal of analyzing biochemistry articles.</abstract>
@@ -8673,7 +8673,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Alexander</first><last>Bondarenko</last></author>
       <author><first>Mirco</first><last>Franzek</last></author>
       <author><first>Matthias</first><last>Hagen</last></author>
-      <author><first>Chris</first><last>Biemann</last></author>
+      <author id="chris-biemann"><first>Chris</first><last>Biemann</last></author>
       <pages>136–145</pages>
       <abstract>We tackle the tasks of automatically identifying comparative sentences and categorizing the intended preference (e.g., “Python has better NLP libraries than MATLAB” → Python, better, MATLAB). To this end, we manually annotate 7,199 sentences for 217 distinct target item pairs from several domains (27% of the sentences contain an oriented comparison in the sense of “better” or “worse”). A gradient boosting model based on pre-trained sentence embeddings reaches an F1 score of 85% in our experimental evaluation. The model can be used to extract comparative sentences for pro/con argumentation in comparative / argument search engines or debating technologies.</abstract>
       <url hash="9e3955c9">W19-4516</url>
@@ -8684,7 +8684,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title>Ranking Passages for Argument Convincingness</title>
       <author><first>Peter</first><last>Potash</last></author>
       <author><first>Adam</first><last>Ferguson</last></author>
-      <author><first>Timothy J.</first><last>Hazen</last></author>
+      <author id="timothy-j-hazen"><first>Timothy J.</first><last>Hazen</last></author>
       <pages>146–155</pages>
       <abstract>In data ranking applications, pairwise annotation is often more consistent than cardinal annotation for learning ranking models. We examine this in a case study on ranking text passages for argument convincingness. Our task is to choose text passages that provide the highest-quality, most-convincing arguments for opposing sides of a topic. Using data from a deployed system within the Bing search engine, we construct a pairwise-labeled dataset for argument convincingness that is substantially more comprehensive in topical coverage compared to existing public resources. We detail the process of extracting topical passages for queries submitted to a search engine, creating annotated sets of passages aligned to different stances on a topic, and assessing argument convincingness of passages using pairwise annotation. Using a state-of-the-art convincingness model, we evaluate several methods for using pairwise-annotated data examples to train models for ranking passages. Our results show pairwise training outperforms training that regresses to a target score for each passage. Our results also show a simple ‘win-rate’ score is a better regression target than the previously proposed page-rank target. Lastly, addressing the need to filter noisy crowd-sourced annotations when constructing a dataset, we show that filtering for transitivity within pairwise annotations is more effective than filtering based on annotation confidence measures for individual examples.</abstract>
       <url hash="8a261bbc">W19-4517</url>
@@ -8705,7 +8705,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title>Persuasion of the Undecided: Language vs. the Listener</title>
       <author><first>Liane</first><last>Longpre</last></author>
       <author><first>Esin</first><last>Durmus</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <pages>167–176</pages>
       <abstract>This paper examines the factors that govern persuasion for a priori UNDECIDED versus DECIDED audience members in the context of on-line debates. We separately study two types of influences: linguistic factors — features of the language of the debate itself; and audience factors — features of an audience member encoding demographic information, prior beliefs, and debate platform behavior. In a study of users of a popular debate platform, we find first that different combinations of linguistic features are critical for predicting persuasion outcomes for UNDECIDED versus DECIDED members of the audience. We additionally find that audience factors have more influence on predicting the side (PRO/CON) that persuaded UNDECIDED users than for DECIDED users that flip their stance to the opposing side. Our results emphasize the importance of considering the undecided and decided audiences separately when studying linguistic factors of persuasion.</abstract>
       <url hash="e0cc0062">W19-4519</url>
@@ -8728,8 +8728,8 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <meta>
       <booktitle>Proceedings of the Fourth Arabic Natural Language Processing Workshop</booktitle>
       <url hash="a50d4d69">W19-46</url>
-      <editor><first>Wassim</first><last>El-Hajj</last></editor>
-      <editor><first>Lamia Hadrich</first><last>Belguith</last></editor>
+      <editor id="wassim-el-hajj"><first>Wassim</first><last>El-Hajj</last></editor>
+      <editor id="lamia-hadrich-belguith"><first>Lamia Hadrich</first><last>Belguith</last></editor>
       <editor><first>Fethi</first><last>Bougares</last></editor>
       <editor><first>Walid</first><last>Magdy</last></editor>
       <editor><first>Imed</first><last>Zitouni</last></editor>
@@ -8763,7 +8763,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Mahitab</first><last>Emam</last></author>
       <author><first>Khaled</first><last>Essam</last></author>
       <author><first>Robert</first><last>Nabil</last></author>
-      <author><first>Hany</first><last>Hassan</last></author>
+      <author id="hany-hassan-awadalla"><first>Hany</first><last>Hassan</last></author>
       <pages>11–17</pages>
       <abstract>Parallel corpora available for building machine translation (MT) models for dialectal Arabic (DA) are rather limited. The scarcity of resources has prompted the use of Modern Standard Arabic (MSA) abundant resources to complement the limited dialectal resource. However, dialectal clitics often differ between MSA and DA. This paper compares morphology-aware DA word segmentation to other word segmentation approaches like Byte Pair Encoding (BPE) and Sub-word Regularization (SR). A set of experiments conducted on Egyptian Arabic (EA), Levantine Arabic (LA), and Gulf Arabic (GA) show that a sufficiently accurate morphology-aware segmentation used in conjunction with BPE outperforms the other word segmentation approaches.</abstract>
       <url hash="60db9239">W19-4602</url>
@@ -8789,7 +8789,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Hala</first><last>Mulki</last></author>
       <author><first>Hatem</first><last>Haddad</last></author>
       <author><first>Mourad</first><last>Gridach</last></author>
-      <author><first>Ismail</first><last>Babaoğlu</last></author>
+      <author id="ismail-babaoglu"><first>Ismail</first><last>Babaoğlu</last></author>
       <pages>30–39</pages>
       <abstract>Arabic sentiment analysis models have employed compositional embedding features to represent the Arabic dialectal content. These embeddings are usually composed via ordered, syntax-aware composition functions and learned within deep neural frameworks. With the free word order and the varying syntax nature across the different Arabic dialects, a sentiment analysis system developed for one dialect might not be efficient for the others. Here we present syntax-ignorant n-gram embeddings to be used in sentiment analysis of several Arabic dialects. The proposed embeddings were composed and learned using an unordered composition function and a shallow neural model. Five datasets of different dialects were used to evaluate the produced embeddings in the sentiment analysis task. The obtained results revealed that, our syntax-ignorant embeddings could outperform word2vec model and doc2vec both variant models in addition to hand-crafted system baselines, while a competent performance was noticed towards baseline systems that adopted more complicated neural architectures.</abstract>
       <url hash="6fda96f6">W19-4604</url>
@@ -8813,7 +8813,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title>Homograph Disambiguation through Selective Diacritic Restoration</title>
       <author><first>Sawsan</first><last>Alqahtani</last></author>
       <author><first>Hanan</first><last>Aldarmaki</last></author>
-      <author><first>Mona</first><last>Diab</last></author>
+      <author id="mona-diab"><first>Mona</first><last>Diab</last></author>
       <pages>49–59</pages>
       <abstract>Lexical ambiguity, a challenging phenomenon in all natural languages, is particularly prevalent for languages with diacritics that tend to be omitted in writing, such as Arabic. Omitting diacritics leads to an increase in the number of homographs: different words with the same spelling. Diacritic restoration could theoretically help disambiguate these words, but in practice, the increase in overall sparsity leads to performance degradation in NLP applications. In this paper, we propose approaches for automatically marking a subset of words for diacritic restoration, which leads to selective homograph disambiguation. Compared to full or no diacritic restoration, these approaches yield selectively-diacritized datasets that balance sparsity and lexical disambiguation. We evaluate the various selection strategies extrinsically on several downstream applications: neural machine translation, part-of-speech tagging, and semantic textual similarity. Our experiments on Arabic show promising results, where our devised strategies on selective diacritization lead to a more balanced and consistent performance in downstream applications.</abstract>
       <url hash="8389a259">W19-4606</url>
@@ -8838,7 +8838,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Nour</first><last>El Droubi</last></author>
       <author><first>Hazem</first><last>Hajj</last></author>
       <author><first>Wassim</first><last>El-Hajj</last></author>
-      <author><first>Khaled</first><last>Shaban</last></author>
+      <author id="khaled-shaban"><first>Khaled</first><last>Shaban</last></author>
       <pages>68–77</pages>
       <abstract>Arabic is a complex language with limited resources which makes it challenging to produce accurate text classification tasks such as sentiment analysis. The utilization of transfer learning (TL) has recently shown promising results for advancing accuracy of text classification in English. TL models are pre-trained on large corpora, and then fine-tuned on task-specific datasets. In particular, universal language models (ULMs), such as recently developed BERT, have achieved state-of-the-art results in various NLP tasks in English. In this paper, we hypothesize that similar success can be achieved for Arabic. The work aims at supporting the hypothesis by developing the first Universal Language Model in Arabic (hULMonA - حلمنا meaning our dream), demonstrating its use for Arabic classifications tasks, and demonstrating how a pre-trained multi-lingual BERT can also be used for Arabic. We then conduct a benchmark study to evaluate both ULM successes with Arabic sentiment analysis. Experiment results show that the developed hULMonA and multi-lingual ULM are able to generalize well to multiple Arabic data sets and achieve new state of the art results in Arabic Sentiment Analysis for some of the tested sets.</abstract>
       <url hash="453e9ef5">W19-4608</url>
@@ -8862,7 +8862,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Xingyu</first><last>Fu</last></author>
       <author><first>Aseel</first><last>Addawood</last></author>
       <author><first>Nahil</first><last>Sobh</last></author>
-      <author><first>Clare</first><last>Voss</last></author>
+      <author id="clare-voss"><first>Clare</first><last>Voss</last></author>
       <author><first>Jiawei</first><last>Han</last></author>
       <pages>88–96</pages>
       <abstract>In this paper, we tackle the problem of “root extraction” from words in the Semitic language family. A challenge in applying natural language processing techniques to these languages is the data sparsity problem that arises from their rich internal morphology, where the substructure is inherently non-concatenative and morphemes are interdigitated in word formation. While previous automated methods have relied on human-curated rules or multiclass classification, they have not fully leveraged the various combinations of regular, sequential concatenative morphology within the words and the internal interleaving within templatic stems of roots and patterns. To address this, we propose a constrained sequence-to-sequence root extraction method. Experimental results show our constrained model outperforms a variety of methods at root extraction. Furthermore, by enriching word embeddings with resulting decompositions, we show improved results on word analogy, word similarity, and language modeling tasks.</abstract>
@@ -8924,7 +8924,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Sane</first><last>Yagi</last></author>
       <author><first>Ouafaa</first><last>Kacha</last></author>
       <author><first>Nizar</first><last>Habash</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <pages>137–147</pages>
       <abstract>We present a collection of morphologically annotated corpora for seven Arabic dialects: Taizi Yemeni, Sanaani Yemeni, Najdi, Jordanian, Syrian, Iraqi and Moroccan Arabic. The corpora collectively cover over 200,000 words, and are all manually annotated in a common set of standards for orthography, diacritized lemmas, tokenization, morphological units and English glosses. These corpora will be publicly available to serve as benchmarks for training and evaluating systems for Arabic dialect morphological analysis and disambiguation.</abstract>
       <url hash="8992fdcc">W19-4615</url>
@@ -8946,7 +8946,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     </paper>
     <paper id="17">
       <title>Translating Between Morphologically Rich Languages: An <fixed-case>A</fixed-case>rabic-to-<fixed-case>T</fixed-case>urkish Machine Translation System</title>
-      <author><first>İlknur</first><last>Durgar El-Kahlout</last></author>
+      <author id="ilknur-durgar-el-kahlout"><first>İlknur</first><last>Durgar El-Kahlout</last></author>
       <author><first>Emre</first><last>Bektaş</last></author>
       <author><first>Naime Şeyma</first><last>Erdem</last></author>
       <author><first>Hamza</first><last>Kaya</last></author>
@@ -9114,7 +9114,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Victor</first><last>Guichard</last></author>
       <author><first>Praveen</first><last>Joshi</last></author>
       <author><first>Haithem</first><last>Afli</last></author>
-      <author><first>Abdessalam</first><last>Bouchekif</last></author>
+      <author id="abdessalam-bouchekif"><first>Abdessalam</first><last>Bouchekif</last></author>
       <pages>249–253</pages>
       <abstract>In this paper, we present two approaches for Arabic Fine-Grained Dialect Identification. The first approach is based on Recurrent Neural Networks (BLSTM, BGRU) using hierarchical classification. The main idea is to separate the classification process for a sentence from a given text in two stages. We start with a higher level of classification (8 classes) and then the finer-grained classification (26 classes). The second approach is given by a voting system based on Naive Bayes and Random Forest. Our system achieves an F1 score of 63.02 % on the subtask evaluation dataset.</abstract>
       <url hash="4681ae2a">W19-4631</url>
@@ -9136,7 +9136,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Karima</first><last>Meftouh</last></author>
       <author><first>Karima</first><last>Abidi</last></author>
       <author><first>Salima</first><last>Harrat</last></author>
-      <author><first>Kamel</first><last>Smaili</last></author>
+      <author id="kamel-smaili"><first>Kamel</first><last>Smaili</last></author>
       <pages>259–263</pages>
       <abstract>This paper describes the approach adopted by the SMarT research group to build a dialect identification system in the framework of the Madar shared task on Arabic fine-grained dialect identification. We experimented several approaches, but we finally decided to use a Multinomial Naive Bayes classifier based on word and character ngrams in addition to the language model probabilities. We achieved a score of 67.73% in terms of Macro accuracy and a macro-averaged F1-score of 67.31%</abstract>
       <url hash="25549de6">W19-4633</url>
@@ -9236,7 +9236,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="1">
       <title>From Insanely Jealous to Insanely Delicious: Computational Models for the Semantic Bleaching of <fixed-case>E</fixed-case>nglish Intensifiers</title>
       <author><first>Yiwei</first><last>Luo</last></author>
-      <author><first>Dan</first><last>Jurafsky</last></author>
+      <author id="dan-jurafsky"><first>Dan</first><last>Jurafsky</last></author>
       <author><first>Beth</first><last>Levin</last></author>
       <pages>1–13</pages>
       <abstract>We introduce novel computational models for modeling semantic bleaching, a widespread category of change in which words become more abstract or lose elements of meaning, like the development of “arrive” from its earlier meaning ‘become at shore.’ We validate our methods on a widespread case of bleaching in English: de-adjectival adverbs that originate as manner adverbs (as in “awfully behaved”) and later become intensifying adverbs (as in “awfully nice”). Our methods formally quantify three reflexes of bleaching: decreasing similarity to the source meaning (e.g., “awful”), increasing similarity to a fully bleached prototype (e.g., “very”), and increasing productivity (e.g., the breadth of adjectives that an adverb modifies). We also test a new causal model and find evidence that bleaching is initially triggered in contexts such as “conspicuously evident” and “insanely jealous”, where an adverb premodifies a semantically similar adjective. These contexts provide a form of “bridging context” (Evans and Wilkins, 2000) that allow a manner adverb to be reinterpreted as an intensifying adverb similar to “very”.</abstract>
@@ -9277,7 +9277,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="5">
       <title>Contextualized Diachronic Word Representations</title>
       <author><first>Ganesh</first><last>Jawahar</last></author>
-      <author><first>Djamé</first><last>Seddah</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
       <pages>35–47</pages>
       <abstract>Diachronic word embeddings play a key role in capturing interesting patterns about how language evolves over time. Most of the existing work focuses on studying corpora spanning across several decades, which is understandably still not a possibility when working on social media-based user-generated content. In this work, we address the problem of studying semantic changes in a large Twitter corpus collected over five years, a much shorter period than what is usually the norm in diachronic studies. We devise a novel attentional model, based on Bernoulli word embeddings, that are conditioned on contextual extra-linguistic (social) features such as network, spatial and socio-economic variables, which are associated with Twitter users, as well as topic-based features. We posit that these social features provide an inductive bias that helps our model to overcome the narrow time-span regime problem. Our extensive experiments reveal that our proposed model is able to capture subtle semantic shifts without being biased towards frequency cues and also works well when certain contextual features are absent. Our model fits the data better than current state-of-the-art dynamic word embedding models and therefore is a promising tool to study diachronic semantic changes over small time periods.</abstract>
       <url hash="a4674618">W19-4705</url>
@@ -9320,7 +9320,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="9">
       <title>A Method to Automatically Identify Diachronic Variation in Collocations.</title>
       <author><first>Marcos</first><last>Garcia</last></author>
-      <author><first>Marcos</first><last>García Salido</last></author>
+      <author id="marcos-garcia-salido"><first>Marcos</first><last>García Salido</last></author>
       <pages>71–80</pages>
       <abstract>This paper introduces a novel method to track collocational variations in diachronic corpora that can identify several changes undergone by these phraseological combinations and to propose alternative solutions found in later periods. The strategy consists of extracting syntactically-related candidates of collocations and ranking them using statistical association measures. Then, starting from the first period of the corpus, the system tracks each combination over time, verifying different types of historical variation such as the loss of one or both lemmas, the disappearance of the collocation, or its diachronic frequency trend. Using a distributional semantics strategy, it also proposes other linguistic structures which convey similar meanings to those extinct collocations. A case study on historical corpora of Portuguese and Spanish shows that the system speeds up and facilitates the finding of some diachronic changes and phraseological shifts that are harder to identify without using automated methods.</abstract>
       <url hash="3e92ce70">W19-4709</url>
@@ -9435,8 +9435,8 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="20">
       <title>Studying Laws of Semantic Divergence across Languages using Cognate Sets</title>
       <author><first>Ana</first><last>Uban</last></author>
-      <author><first>Alina Maria</first><last>Ciobanu</last></author>
-      <author><first>Liviu P.</first><last>Dinu</last></author>
+      <author id="alina-maria-ciobanu"><first>Alina Maria</first><last>Ciobanu</last></author>
+      <author id="liviu-p-dinu"><first>Liviu P.</first><last>Dinu</last></author>
       <pages>161–166</pages>
       <abstract>Semantic divergence in related languages is a key concern of historical linguistics. Intra-lingual semantic shift has been previously studied in computational linguistics, but this can only provide a limited picture of the evolution of word meanings, which often develop in a multilingual environment. In this paper we investigate semantic change across languages by measuring the semantic distance of cognate words in multiple languages. By comparing current meanings of cognates in different languages, we hope to uncover information about their previous meanings, and about how they diverged in their respective languages from their common original etymon. We further study the properties of their semantic divergence, by analyzing how the features of words such as frequency and polysemy are related to the divergence in their meaning, and thus make the first steps towards formulating laws of cross-lingual semantic change.</abstract>
       <url hash="8c1dde9c">W19-4720</url>
@@ -9483,7 +9483,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title>One-to-<fixed-case>X</fixed-case> Analogical Reasoning on Word Embeddings: a Case for Diachronic Armed Conflict Prediction from News Texts</title>
       <author><first>Andrey</first><last>Kutuzov</last></author>
       <author><first>Erik</first><last>Velldal</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <pages>196–201</pages>
       <abstract>We extend the well-known word analogy task to a one-to-X formulation, including one-to-none cases, when no correct answer exists. The task is cast as a relation discovery problem and applied to historical armed conflicts datasets, attempting to predict new relations of type ‘location:armed-group’ based on data about past events. As the source of semantic information, we use diachronic word embedding models trained on English news texts. A simple technique to improve diachronic performance in such task is demonstrated, using a threshold based on a function of cosine distance to decrease the number of false positives; this approach is shown to be beneficial on two different corpora. Finally, we publish a ready-to-use test set for one-to-X analogy evaluation on historical armed conflicts data.</abstract>
       <url hash="3069a75e">W19-4724</url>
@@ -9507,7 +9507,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="26">
       <title>Semantic Change in the Language of <fixed-case>UK</fixed-case> Parliamentary Debates</title>
       <author><first>Gavin</first><last>Abercrombie</last></author>
-      <author><first>Riza</first><last>Batista-Navarro</last></author>
+      <author id="riza-theresa-batista-navarro"><first>Riza</first><last>Batista-Navarro</last></author>
       <pages>210–215</pages>
       <abstract>We investigate changes in the meanings of words used in the UK Parliament across two different epochs. We use word embeddings to explore changes in the distribution of words of interest and uncover words that appear to have undergone semantic transformation in the intervening period, and explore different ways of obtaining target words for this purpose. We find that semantic changes are generally in line with those found in other corpora, and little evidence that parliamentary language is more static than general English. It also seems that words with senses that have been recorded in the dictionary as having fallen into disuse do not undergo semantic changes in this domain.</abstract>
       <url hash="d4bb7b4f">W19-4726</url>
@@ -9538,7 +9538,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title>Measuring the Compositionality of Noun-Noun Compounds over Time</title>
       <author><first>Prajit</first><last>Dhar</last></author>
       <author><first>Janis</first><last>Pagel</last></author>
-      <author><first>Lonneke</first><last>van der Plas</last></author>
+      <author id="lonneke-van-der-plas"><first>Lonneke</first><last>van der Plas</last></author>
       <pages>234–239</pages>
       <abstract>We present work in progress on the temporal progression of compositionality in noun-noun compounds. Previous work has proposed computational methods for determining the compositionality of compounds. These methods try to automatically determine how transparent the meaning of the compound as a whole is with respect to the meaning of its parts. We hypothesize that such a property might change over time. We use the time-stamped Google Books corpus for our diachronic investigations, and first examine whether the vector-based semantic spaces extracted from this corpus are able to predict compositionality ratings, despite their inherent limitations. We find that using temporal information helps predicting the ratings, although correlation with the ratings is lower than reported for other corpora. Finally, we show changes in compositionality over time for a selection of compounds.</abstract>
       <url hash="d11101b7">W19-4729</url>
@@ -9548,7 +9548,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="30">
       <title>Towards Automatic Variant Analysis of Ancient Devotional Texts</title>
       <author><first>Amir</first><last>Hazem</last></author>
-      <author><first>Béatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Béatrice</first><last>Daille</last></author>
       <author><first>Dominique</first><last>Stutzmann</last></author>
       <author><first>Jacob</first><last>Currie</last></author>
       <author><first>Christine</first><last>Jacquin</last></author>
@@ -9595,7 +9595,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Christin</first><last>Schätzle</last></author>
       <author><first>Frederik L.</first><last>Dennig</last></author>
       <author><first>Michael</first><last>Blumenschein</last></author>
-      <author><first>Daniel A.</first><last>Keim</last></author>
+      <author id="daniel-keim"><first>Daniel A.</first><last>Keim</last></author>
       <author><first>Miriam</first><last>Butt</last></author>
       <pages>272–278</pages>
       <abstract>Historical change typically is the result of complex interactions between several linguistic factors. Identifying the relevant factors and understanding how they interact across the temporal dimension is the core remit of historical linguistics. With respect to corpus work, this entails a separate annotation, extraction and painstaking pair-wise comparison of the relevant bits of information. This paper presents a significant extension of HistoBankVis, a multilayer visualization system which allows a fast and interactive exploration of complex linguistic data. Linguistic factors can be understood as data dimensions which show complex interrelationships. We model these relationships with the Parallel Sets technique. We demonstrate the powerful potential of this technique by applying the system to understanding the interaction of case, grammatical relations and word order in the history of Icelandic.</abstract>
@@ -9609,7 +9609,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <booktitle>Proceedings of the 2019 ACL Workshop BlackboxNLP: Analyzing and Interpreting Neural Networks for NLP</booktitle>
       <url hash="1c4c6922">W19-48</url>
       <editor><first>Tal</first><last>Linzen</last></editor>
-      <editor><first>Grzegorz</first><last>Chrupała</last></editor>
+      <editor id="grzegorz-chrupala"><first>Grzegorz</first><last>Chrupała</last></editor>
       <editor><first>Yonatan</first><last>Belinkov</last></editor>
       <editor><first>Dieuwke</first><last>Hupkes</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -9637,7 +9637,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="2">
       <title>Sentiment Analysis Is Not Solved! Assessing and Probing Sentiment Classification</title>
       <author><first>Jeremy</first><last>Barnes</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <author><first>Erik</first><last>Velldal</last></author>
       <pages>12–23</pages>
       <abstract>Neural methods for sentiment analysis have led to quantitative improvements over previous approaches, but these advances are not always accompanied with a thorough analysis of the qualitative differences. Therefore, it is not clear what outstanding conceptual challenges for sentiment analysis remain. In this work, we attempt to discover what challenges still prove a problem for sentiment classifiers for English and to provide a challenging dataset. We collect the subset of sentences that an (oracle) ensemble of state-of-the-art sentiment classifiers misclassify and then annotate them for 18 linguistic and paralinguistic phenomena, such as negation, sarcasm, modality, etc. Finally, we provide a case study that demonstrates the usefulness of the dataset to probe the performance of a given sentiment classifier with respect to linguistic phenomena.</abstract>
@@ -9649,7 +9649,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title>Second-order Co-occurrence Sensitivity of Skip-Gram with Negative Sampling</title>
       <author><first>Dominik</first><last>Schlechtweg</last></author>
       <author><first>Cennet</first><last>Oguz</last></author>
-      <author><first>Sabine</first><last>Schulte im Walde</last></author>
+      <author id="sabine-schulte-im-walde"><first>Sabine</first><last>Schulte im Walde</last></author>
       <pages>24–30</pages>
       <abstract>We simulate first- and second-order context overlap and show that Skip-Gram with Negative Sampling is similar to Singular Value Decomposition in capturing second-order co-occurrence information, while Pointwise Mutual Information is agnostic to it. We support the results with an empirical study finding that the models react differently when provided with additional second-order information. Our findings reveal a basic property of Skip-Gram with Negative Sampling and point towards an explanation of its success on a variety of tasks.</abstract>
       <url hash="93c93da0">W19-4803</url>
@@ -9703,7 +9703,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title>Do Human Rationales Improve Machine Explanations?</title>
       <author><first>Julia</first><last>Strout</last></author>
       <author><first>Ye</first><last>Zhang</last></author>
-      <author><first>Raymond</first><last>Mooney</last></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last></author>
       <pages>56–62</pages>
       <abstract>Work on “learning with rationales” shows that humans providing explanations to a machine learning system can improve the system’s predictive accuracy. However, this work has not been connected to work in “explainable AI” which concerns machines explaining their reasoning to humans. In this work, we show that learning with rationales can also improve the quality of the machine’s explanations as evaluated by human judges. Specifically, we present experiments showing that, for CNN-based text classification, explanations generated using “supervised attention” are judged superior to explanations generated using normal unsupervised attention.</abstract>
       <url hash="54843077">W19-4807</url>
@@ -9756,7 +9756,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="12">
       <title>Faithful Multimodal Explanation for Visual Question Answering</title>
       <author><first>Jialin</first><last>Wu</last></author>
-      <author><first>Raymond</first><last>Mooney</last></author>
+      <author id="raymond-mooney"><first>Raymond</first><last>Mooney</last></author>
       <pages>103–112</pages>
       <abstract>AI systems’ ability to explain their reasoning is critical to their utility and trustworthiness. Deep neural networks have enabled significant progress on many challenging problems such as visual question answering (VQA). However, most of them are opaque black boxes with limited explanatory capability. This paper presents a novel approach to developing a high-performing VQA system that can elucidate its answers with integrated textual and visual explanations that faithfully reflect important aspects of its underlying reasoning while capturing the style of comprehensible human explanations. Extensive experimental evaluation demonstrates the advantages of this approach compared to competing methods using both automated metrics and human evaluation.</abstract>
       <url hash="1dadfddd">W19-4812</url>
@@ -9805,7 +9805,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="16">
       <title>Modeling Paths for Explainable Knowledge Base Completion</title>
       <author><first>Josua</first><last>Stadelmaier</last></author>
-      <author><first>Sebastian</first><last>Padó</last></author>
+      <author id="sebastian-pado"><first>Sebastian</first><last>Padó</last></author>
       <pages>147–157</pages>
       <abstract>A common approach in knowledge base completion (KBC) is to learn representations for entities and relations in order to infer missing facts by generalizing existing ones. A shortcoming of standard models is that they do not explain their predictions to make them verifiable easily to human inspection. In this paper, we propose the Context Path Model (CPM) which generates explanations for new facts in KBC by providing sets of <i>context paths</i> as supporting evidence for these triples. For example, a new triple (Theresa May, nationality, Britain) may be explained by the path (Theresa May, born in, Eastbourne, contained in, Britain). The CPM is formulated as a wrapper that can be applied on top of various existing KBC models. We evaluate it for the well-established TransE model. We observe that its performance remains very close despite the added complexity, and that most of the paths proposed as explanations provide meaningful evidence to assess the correctness.</abstract>
       <url hash="12c1a133">W19-4816</url>
@@ -9835,7 +9835,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="19">
       <title>Hierarchical Representation in Neural Language Models: Suppression and Recovery of Expectations</title>
       <author><first>Ethan</first><last>Wilcox</last></author>
-      <author><first>Roger</first><last>Levy</last></author>
+      <author id="roger-levy"><first>Roger</first><last>Levy</last></author>
       <author><first>Richard</first><last>Futrell</last></author>
       <pages>181–190</pages>
       <abstract>Work using artificial languages as training input has shown that LSTMs are capable of inducing the stack-like data structures required to represent context-free and certain mildly context-sensitive languages — formal language classes which correspond in theory to the hierarchical structures of natural language. Here we present a suite of experiments probing whether neural language models trained on linguistic data induce these stack-like data structures and deploy them while incrementally predicting words. We study two natural language phenomena: center embedding sentences and syntactic island constraints on the filler–gap dependency. In order to properly predict words in these structures, a model must be able to temporarily suppress certain expectations and then recover those expectations later, essentially pushing and popping these expectations on a stack. Our results provide evidence that models can successfully suppress and recover expectations in many cases, but do not fully recover their previous grammatical state.</abstract>
@@ -9912,7 +9912,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     </paper>
     <paper id="26">
       <title><fixed-case>GE</fixed-case>val: Tool for Debugging <fixed-case>NLP</fixed-case> Datasets and Models</title>
-      <author><first>Filip</first><last>Graliński</last></author>
+      <author id="filip-gralinski"><first>Filip</first><last>Graliński</last></author>
       <author><first>Anna</first><last>Wróblewska</last></author>
       <author><first>Tomasz</first><last>Stanisławek</last></author>
       <author><first>Kamil</first><last>Grabowski</last></author>
@@ -9938,7 +9938,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Kevin</first><last>Clark</last></author>
       <author><first>Urvashi</first><last>Khandelwal</last></author>
       <author><first>Omer</first><last>Levy</last></author>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <pages>276–286</pages>
       <abstract>Large pre-trained neural networks such as BERT have had great recent success in NLP, motivating a growing body of research investigating what aspects of language they are able to learn from unlabeled data. Most recent analysis has focused on model outputs (e.g., language model surprisal) or internal vector representations (e.g., probing classifiers). Complementary to these works, we propose methods for analyzing the attention mechanisms of pre-trained models and apply them to BERT. BERT’s attention heads exhibit patterns such as attending to delimiter tokens, specific positional offsets, or broadly attending over the whole sentence, with heads in the same layer often exhibiting similar behaviors. We further show that certain attention heads correspond well to linguistic notions of syntax and coreference. For example, we find heads that attend to the direct objects of verbs, determiners of nouns, objects of prepositions, and coreferent mentions with remarkably high accuracy. Lastly, we propose an attention-based probing classifier and use it to further demonstrate that substantial syntactic information is captured in BERT’s attention.</abstract>
       <url hash="54f98d80">W19-4828</url>
@@ -9951,7 +9951,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <booktitle>Proceedings of TyP-NLP: The First Workshop on Typology for Polyglot NLP</booktitle>
       <url hash="b3f053c0">W19-49</url>
       <editor><first>Haim</first><last>Dubossarsky</last></editor>
-      <editor><first>Arya D.</first><last>McCarthy</last></editor>
+      <editor id="arya-d-mccarthy"><first>Arya D.</first><last>McCarthy</last></editor>
       <editor><first>Edoardo Maria</first><last>Ponti</last></editor>
       <editor><first>Ivan</first><last>Vulić</last></editor>
       <editor><first>Ekaterina</first><last>Vylomova</last></editor>
@@ -9976,9 +9976,9 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <booktitle>Proceedings of the 18th BioNLP Workshop and Shared Task</booktitle>
       <url hash="3e7a898e">W19-50</url>
       <editor><first>Dina</first><last>Demner-Fushman</last></editor>
-      <editor><first>Kevin Bretonnel</first><last>Cohen</last></editor>
+      <editor id="k-bretonnel-cohen"><first>Kevin Bretonnel</first><last>Cohen</last></editor>
       <editor><first>Sophia</first><last>Ananiadou</last></editor>
-      <editor><first>Junichi</first><last>Tsujii</last></editor>
+      <editor id="junichi-tsujii"><first>Junichi</first><last>Tsujii</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Florence, Italy</address>
       <month>August</month>
@@ -10048,7 +10048,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     </paper>
     <paper id="6">
       <title>Transfer Learning in Biomedical Natural Language Processing: An Evaluation of <fixed-case>BERT</fixed-case> and <fixed-case>ELM</fixed-case>o on Ten Benchmarking Datasets</title>
-      <author><first>Yifan</first><last>Peng</last></author>
+      <author id="yifan-peng-cmu"><first>Yifan</first><last>Peng</last></author>
       <author><first>Shankai</first><last>Yan</last></author>
       <author><first>Zhiyong</first><last>Lu</last></author>
       <pages>58–65</pages>
@@ -10073,7 +10073,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="8">
       <title><fixed-case>M</fixed-case>o<fixed-case>NER</fixed-case>o: a Biomedical Gold Standard Corpus for the <fixed-case>R</fixed-case>omanian Language</title>
       <author><first>Maria</first><last>Mitrofan</last></author>
-      <author><first>Verginica</first><last>Barbu Mititelu</last></author>
+      <author id="verginica-barbu-mititelu"><first>Verginica</first><last>Barbu Mititelu</last></author>
       <author><first>Grigorina</first><last>Mitrofan</last></author>
       <pages>71–79</pages>
       <abstract>In an era when large amounts of data are generated daily in various fields, the biomedical field among others, linguistic resources can be exploited for various tasks of Natural Language Processing. Moreover, increasing number of biomedical documents are available in languages other than English. To be able to extract information from natural language free text resources, methods and tools are needed for a variety of languages. This paper presents the creation of the MoNERo corpus, a gold standard biomedical corpus for Romanian, annotated with both part of speech tags and named entities. MoNERo comprises 154,825 morphologically annotated tokens and 23,188 entity annotations belonging to four entity semantic groups corresponding to UMLS Semantic Groups.</abstract>
@@ -10088,7 +10088,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Aditya</first><last>Siddhant</last></author>
       <author><first>Anirudha</first><last>Rayasam</last></author>
       <author><first>Niket</first><last>Tandon</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <pages>80–87</pages>
       <abstract>Domain adaptation remains one of the most challenging aspects in the wide-spread use of Semantic Role Labeling (SRL) systems. Current state-of-the-art methods are typically trained on large-scale datasets, but their performances do not directly transfer to low-resource domain-specific settings. In this paper, we propose two approaches for domain adaptation in the biological domain that involves pre-training LSTM-CRF based on existing large-scale datasets and adapting it for a low-resource corpus of biological processes. Our first approach defines a mapping between the source labels and the target labels, and the other approach modifies the final CRF layer in sequence-labeling neural network architecture. We perform our experiments on ProcessBank dataset which contains less than 200 paragraphs on biological processes. We improve over the previous state-of-the-art system on this dataset by 21 F1 points. We also show that, by incorporating event-event relationship in ProcessBank, we are able to achieve an additional 2.6 F1 gain, giving us possible insights into how to improve SRL systems for biological process using richer annotations.</abstract>
       <url hash="fa7e335a">W19-5009</url>
@@ -10111,7 +10111,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Hanna</first><last>Pylieva</last></author>
       <author><first>Artem</first><last>Chernodub</last></author>
       <author><first>Natalia</first><last>Grabar</last></author>
-      <author><first>Thierry</first><last>Hamon</last></author>
+      <author id="thierry-hamon"><first>Thierry</first><last>Hamon</last></author>
       <pages>97–104</pages>
       <abstract>Patients and their families often require a better understanding of medical information provided by doctors. We currently address this issue by improving the identification of difficult to understand medical words. We introduce novel embeddings received from RNN - FrnnMUTE (French RNN Medical Understandability Text Embeddings) which allow to reach up to 87.0 F1 score in identification of difficult words. We also note that adding pre-trained FastText word embeddings to the feature set substantially improves the performance of the model which classifies words according to their difficulty. We study the generalizability of different models through three cross-validation scenarios which allow testing classifiers in real-world conditions: understanding of medical words by new users, and classification of new unseen words by the automatic models. The RNN - FrnnMUTE embeddings and the categorization code are being made available for the research.</abstract>
       <url hash="9cc80062">W19-5011</url>
@@ -10124,7 +10124,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Mariska</first><last>Leeflang</last></author>
       <author><first>René</first><last>Spijker</last></author>
       <author><first>Evangelos</first><last>Kanoulas</last></author>
-      <author><first>Aurélie</first><last>Névéol</last></author>
+      <author id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></author>
       <pages>105–114</pages>
       <abstract>Systematic reviews are important in evidence based medicine, but are expensive to produce. Automating or semi-automating the data extraction of index test, target condition, and reference standard from articles has the potential to decrease the cost of conducting systematic reviews of diagnostic test accuracy, but relevant training data is not available. We create a distantly supervised dataset of approximately 90,000 sentences, and let two experts manually annotate a small subset of around 1,000 sentences for evaluation. We evaluate the performance of BioBERT and logistic regression for ranking the sentences, and compare the performance for distant and direct supervision. Our results suggest that distant supervision can work as well as, or better than direct supervision on this problem, and that distantly trained models can perform as well as, or better than human annotators.</abstract>
       <url hash="cbbbc269">W19-5012</url>
@@ -10137,7 +10137,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Tsanta</first><last>Randriatsitohaina</last></author>
       <author><first>Fleur</first><last>Mougin</last></author>
       <author><first>Natalia</first><last>Grabar</last></author>
-      <author><first>Thierry</first><last>Hamon</last></author>
+      <author id="thierry-hamon"><first>Thierry</first><last>Hamon</last></author>
       <pages>115–124</pages>
       <abstract>In this paper, we address the problem of automatically constructing a relevant corpus of scientific articles about food-drug interactions. There is a growing number of scientific publications that describe food-drug interactions but currently building a high-coverage corpus that can be used for information extraction purposes is not trivial. We investigate several methods for automating the query selection process using an expert-curated corpus of food-drug interactions. Our experiments show that index term features along with a decision tree classifier are the best approach for this task and that feature selection approaches and in particular gain ratio outperform frequency-based methods for query selection.</abstract>
       <url hash="73a7e558">W19-5013</url>
@@ -10148,7 +10148,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title>Enhancing biomedical word embeddings by retrofitting to verb clusters</title>
       <author><first>Billy</first><last>Chiu</last></author>
       <author><first>Simon</first><last>Baker</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Anna</first><last>Korhonen</last></author>
       <pages>125–134</pages>
       <abstract>Verbs play a fundamental role in many biomed-ical tasks and applications such as relation and event extraction. We hypothesize that performance on many downstream tasks can be improved by aligning the input pretrained embeddings according to semantic verb classes. In this work, we show that by using semantic clusters for verbs, a large lexicon of verbclasses derived from biomedical literature, weare able to improve the performance of common pretrained embeddings in downstream tasks by retrofitting them to verb classes. We present a simple and computationally efficient approach using a widely-available “off-the-shelf” retrofitting algorithm to align pretrained embeddings according to semantic verb clusters. We achieve state-of-the-art results on text classification and relation extraction tasks.</abstract>
@@ -10161,7 +10161,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Aditya</first><last>Joshi</last></author>
       <author><first>Sarvnaz</first><last>Karimi</last></author>
       <author><first>Ross</first><last>Sparks</last></author>
-      <author><first>Cecile</first><last>Paris</last></author>
+      <author id="cecile-paris"><first>Cecile</first><last>Paris</last></author>
       <author><first>C Raina</first><last>MacIntyre</last></author>
       <pages>135–141</pages>
       <abstract>Distributed representations of text can be used as features when training a statistical classifier. These representations may be created as a composition of word vectors or as context-based sentence vectors. We compare the two kinds of representations (word versus context) for three classification problems: influenza infection classification, drug usage classification and personal health mention classification. For statistical classifiers trained for each of these problems, context-based representations based on ELMo, Universal Sentence Encoder, Neural-Net Language Model and FLAIR are better than Word2Vec, GloVe and the two adapted using the MESH ontology. There is an improvement of 2-4% in the accuracy when these context-based representations are used instead of word-based representations.</abstract>
@@ -10192,7 +10192,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="18">
       <title>Incorporating Figure Captions and Descriptive Text in <fixed-case>M</fixed-case>e<fixed-case>SH</fixed-case> Term Indexing</title>
       <author><first>Xindi</first><last>Wang</last></author>
-      <author><first>Robert E.</first><last>Mercer</last></author>
+      <author id="robert-e-mercer"><first>Robert E.</first><last>Mercer</last></author>
       <pages>165–175</pages>
       <abstract>The goal of text classification is to automatically assign categories to documents. Deep learning automatically learns effective features from data instead of adopting human-designed features. In this paper, we focus specifically on biomedical document classification using a deep learning approach. We present a novel multichannel TextCNN model for MeSH term indexing. Beyond the normal use of the text from the abstract and title for model training, we also consider figure and table captions, as well as paragraphs associated with the figures and tables. We demonstrate that these latter text sources are important feature sources for our method. A new dataset consisting of these text segments curated from 257,590 full text articles together with the articles’ MEDLINE/PubMed MeSH terms is publicly available.</abstract>
       <url hash="ad8d2449">W19-5018</url>
@@ -10265,7 +10265,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title>Contributions to Clinical Named Entity Recognition in <fixed-case>P</fixed-case>ortuguese</title>
       <author><first>Fábio</first><last>Lopes</last></author>
       <author><first>César</first><last>Teixeira</last></author>
-      <author><first>Hugo</first><last>Gonçalo Oliveira</last></author>
+      <author id="hugo-goncalo-oliveira"><first>Hugo</first><last>Gonçalo Oliveira</last></author>
       <pages>223–233</pages>
       <abstract>Having in mind that different languages might present different challenges, this paper presents the following contributions to the area of Information Extraction from clinical text, targeting the Portuguese language: a collection of 281 clinical texts in this language, with manually-annotated named entities; word embeddings trained in a larger collection of similar texts; results of using BiLSTM-CRF neural networks for named entity recognition on the annotated collection, including a comparison of using in-domain or out-of-domain word embeddings in this task. Although learned with much less data, performance is higher when using in-domain embeddings. When tested in 20 independent clinical texts, this model achieved better results than a model using larger out-of-domain embeddings.</abstract>
       <url hash="9a82542c">W19-5024</url>
@@ -10325,7 +10325,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Cyril</first><last>Grouin</last></author>
       <author><first>Natalia</first><last>Grabar</last></author>
       <author><first>Vincent</first><last>Claveau</last></author>
-      <author><first>Thierry</first><last>Hamon</last></author>
+      <author id="thierry-hamon"><first>Thierry</first><last>Hamon</last></author>
       <pages>273–282</pages>
       <abstract>Textual data are useful for accessing expert information. Yet, since the texts are representative of distinct language uses, it is necessary to build specific corpora in order to be able to design suitable NLP tools. In some domains, such as medical domain, it may be complicated to access the representative textual data and their semantic annotations, while there exists a real need for providing efficient tools and methods. Our paper presents a corpus of clinical cases written in French, and their semantic annotations. Thus, we manually annotated a set of 717 files into four general categories (age, gender, outcome, and origin) for a total number of 2,835 annotations. The values of age, gender, and outcome are normalized. A subset with 70 files has been additionally manually annotated into 27 categories for a total number of 5,198 annotations.</abstract>
       <url hash="119699dd">W19-5029</url>
@@ -10336,7 +10336,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title>Two-stage Federated Phenotyping and Patient Representation Learning</title>
       <author><first>Dianbo</first><last>Liu</last></author>
       <author><first>Dmitriy</first><last>Dligach</last></author>
-      <author><first>Timothy</first><last>Miller</last></author>
+      <author id="timothy-miller"><first>Timothy</first><last>Miller</last></author>
       <pages>283–291</pages>
       <abstract>A large percentage of medical information is in unstructured text format in electronic medical record systems. Manual extraction of information from clinical notes is extremely time consuming. Natural language processing has been widely used in recent years for automatic information extraction from medical texts. However, algorithms trained on data from a single healthcare provider are not generalizable and error-prone due to the heterogeneity and uniqueness of medical documents. We develop a two-stage federated natural language processing method that enables utilization of clinical notes from different hospitals or clinics without moving the data, and demonstrate its performance using obesity and comorbities phenotyping as medical task. This approach not only improves the quality of a specific clinical task but also facilitates knowledge progression in the whole healthcare system, which is an essential part of learning health system. To the best of our knowledge, this is the first application of federated machine learning in clinical NLP.</abstract>
       <url hash="c6c5aa23">W19-5030</url>
@@ -10400,9 +10400,9 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Saber</first><last>Akhondi</last></author>
       <author><first>Camilo</first><last>Thorne</last></author>
       <author><first>Christian</first><last>Druckenbrodt</last></author>
-      <author><first>Trevor</first><last>Cohn</last></author>
-      <author><first>Michelle</first><last>Gregory</last></author>
-      <author><first>Karin</first><last>Verspoor</last></author>
+      <author id="trevor-cohn"><first>Trevor</first><last>Cohn</last></author>
+      <author id="michelle-gregory"><first>Michelle</first><last>Gregory</last></author>
+      <author id="karin-verspoor"><first>Karin</first><last>Verspoor</last></author>
       <pages>328–338</pages>
       <abstract>Chemical patents are an important resource for chemical information. However, few chemical Named Entity Recognition (NER) systems have been evaluated on patent documents, due in part to their structural and linguistic complexity. In this paper, we explore the NER performance of a BiLSTM-CRF model utilising pre-trained word embeddings, character-level word representations and contextualized ELMo word representations for chemical patents. We compare word embeddings pre-trained on biomedical and chemical patent corpora. The effect of tokenizers optimized for the chemical domain on NER performance in chemical patents is also explored. The results on two patent corpora show that contextualized word representations generated from ELMo substantially improve chemical NER performance w.r.t. the current state-of-the-art. We also show that domain-specific resources such as word embeddings trained on chemical patents and chemical-specific tokenizers, have a positive impact on NER performance.</abstract>
       <url hash="7ee1a8bc">W19-5035</url>
@@ -10434,7 +10434,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="38">
       <title>Extracting relations between outcomes and significance levels in Randomized Controlled Trials (<fixed-case>RCT</fixed-case>s) publications</title>
       <author><first>Anna</first><last>Koroleva</last></author>
-      <author><first>Patrick</first><last>Paroubek</last></author>
+      <author id="patrick-paroubek"><first>Patrick</first><last>Paroubek</last></author>
       <pages>359–369</pages>
       <abstract>Randomized controlled trials assess the effects of an experimental intervention by comparing it to a control intervention with regard to some variables - trial outcomes. Statistical hypothesis testing is used to test if the experimental intervention is superior to the control. Statistical significance is typically reported for the measured outcomes and is an important characteristic of the results. We propose a machine learning approach to automatically extract reported outcomes, significance levels and the relation between them. We annotated a corpus of 663 sentences with 2,552 outcome - significance level relations (1,372 positive and 1,180 negative relations). We compared several classifiers, using a manually crafted feature set, and a number of deep learning models. The best performance (F-measure of 94%) was shown by the BioBERT fine-tuned model.</abstract>
       <url hash="eaca7edd">W19-5038</url>
@@ -10474,7 +10474,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Shefali</first><last>Garg</last></author>
       <author><first>Sheetal</first><last>Shalini</last></author>
       <author><first>Prashant</first><last>Gupta</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <author><first>Teruko</first><last>Mitamura</last></author>
       <pages>389–398</pages>
       <abstract>Parallel deep learning architectures like fine-tuned BERT and MT-DNN, have quickly become the state of the art, bypassing previous deep and shallow learning methods by a large margin. More recently, pre-trained models from large related datasets have been able to perform well on many downstream tasks by just fine-tuning on domain-specific datasets (similar to transfer learning). However, using powerful models on non-trivial tasks, such as ranking and large document classification, still remains a challenge due to input size limitations of parallel architecture and extremely small datasets (insufficient for fine-tuning). In this work, we introduce an end-to-end system, trained in a multi-task setting, to filter and re-rank answers in the medical domain. We use task-specific pre-trained models as deep feature extractors. Our model achieves the highest Spearman’s Rho and Mean Reciprocal Rank of 0.338 and 0.9622 respectively, on the ACL-BioNLP workshop MediQA Question Answering shared-task.</abstract>
@@ -10532,7 +10532,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     </paper>
     <paper id="46">
       <title><fixed-case>DUT</fixed-case>-<fixed-case>NLP</fixed-case> at <fixed-case>MEDIQA</fixed-case> 2019: An Adversarial Multi-Task Network to Jointly Model Recognizing Question Entailment and Question Answering</title>
-      <author><first>Huiwei</first><last>Zhou</last></author>
+      <author id="huiwei-zhou"><first>Huiwei</first><last>Zhou</last></author>
       <author><first>Xuefei</first><last>Li</last></author>
       <author><first>Weihong</first><last>Yao</last></author>
       <author><first>Chengkun</first><last>Lang</last></author>
@@ -10545,7 +10545,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     </paper>
     <paper id="47">
       <title><fixed-case>DUT</fixed-case>-<fixed-case>BIM</fixed-case> at <fixed-case>MEDIQA</fixed-case> 2019: Utilizing Transformer Network and Medical Domain-Specific Contextualized Representations for Question Answering</title>
-      <author><first>Huiwei</first><last>Zhou</last></author>
+      <author id="huiwei-zhou"><first>Huiwei</first><last>Zhou</last></author>
       <author><first>Bizun</first><last>Lei</last></author>
       <author><first>Zhe</first><last>Liu</last></author>
       <author><first>Zhuang</first><last>Liu</last></author>
@@ -10562,7 +10562,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Aditi</first><last>Chaudhary</last></author>
       <author><first>James</first><last>Route</last></author>
       <author><first>Teruko</first><last>Mitamura</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <pages>453–461</pages>
       <abstract>This paper presents the submissions by TeamDr.Quad to the ACL-BioNLP 2019 shared task on Textual Inference and Question Entailment in the Medical Domain. Our system is based on the prior work Liu et al. (2019) which uses a multi-task objective function for textual entailment. In this work, we explore different strategies for generalizing state-of-the-art language understanding models to the specialized medical domain. Our results on the shared task demonstrate that incorporating domain knowledge through data augmentation is a powerful strategy for addressing challenges posed specialized domains such as medicine.</abstract>
       <url hash="62335eea">W19-5048</url>
@@ -10574,7 +10574,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Sai Abishek</first><last>Bhaskar</last></author>
       <author><first>Rashi</first><last>Rungta</last></author>
       <author><first>James</first><last>Route</last></author>
-      <author><first>Eric</first><last>Nyberg</last></author>
+      <author id="eric-nyberg"><first>Eric</first><last>Nyberg</last></author>
       <author><first>Teruko</first><last>Mitamura</last></author>
       <pages>462–470</pages>
       <abstract>This paper presents a multi-task learning approach to natural language inference (NLI) and question entailment (RQE) in the biomedical domain. Recognizing textual inference relations and question similarity can address the issue of answering new consumer health questions by mapping them to Frequently Asked Questions on reputed websites like the NIH. We show that leveraging information from parallel tasks across domains along with medical knowledge integration allows our model to learn better biomedical feature representations. Our final models for the NLI and RQE tasks achieve the 4th and 2nd rank on the shared-task leaderboard respectively.</abstract>
@@ -10662,7 +10662,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     </paper>
     <paper id="57">
       <title><fixed-case>L</fixed-case>asige<fixed-case>B</fixed-case>io<fixed-case>TM</fixed-case> at <fixed-case>MEDIQA</fixed-case> 2019: Biomedical Question Answering using Bidirectional Transformers and Named Entity Recognition</title>
-      <author><first>Andre</first><last>Lamurias</last></author>
+      <author id="andre-lamurias"><first>Andre</first><last>Lamurias</last></author>
       <author><first>Francisco M</first><last>Couto</last></author>
       <pages>523–527</pages>
       <abstract>Biomedical Question Answering (QA) aims at providing automated answers to user questions, regarding a variety of biomedical topics. For example, these questions may ask for related to diseases, drugs, symptoms, or medical procedures. Automated biomedical QA systems could improve the retrieval of information necessary to answer these questions. The MEDIQA challenge consisted of three tasks concerning various aspects of biomedical QA. This challenge aimed at advancing approaches to Natural Language Inference (NLI) and Recognizing Question Entailment (RQE), which would then result in enhanced approaches to biomedical QA. Our approach explored a common Transformer-based architecture that could be applied to each task. This approach shared the same pre-trained weights, but which were then fine-tuned for each task using the provided training data. Furthermore, we augmented the training data with external datasets and enriched the question and answer texts using MER, a named entity recognition tool. Our approach obtained high levels of accuracy, in particular on the NLI task, which classified pairs of text according to their relation. For the QA task, we obtained higher Spearman’s rank correlation values using the entities recognized by MER.</abstract>
@@ -10702,10 +10702,10 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <booktitle>Proceedings of the Joint Workshop on Multiword Expressions and WordNet (MWE-WN 2019)</booktitle>
       <url hash="b9c630a3">W19-51</url>
       <editor><first>Agata</first><last>Savary</last></editor>
-      <editor><first>Carla Parra</first><last>Escartín</last></editor>
+      <editor id="carla-parra-escartin"><first>Carla Parra</first><last>Escartín</last></editor>
       <editor><first>Francis</first><last>Bond</last></editor>
       <editor><first>Jelena</first><last>Mitrović</last></editor>
-      <editor><first>Verginica Barbu</first><last>Mititelu</last></editor>
+      <editor id="verginica-barbu-mititelu"><first>Verginica Barbu</first><last>Mititelu</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Florence, Italy</address>
       <month>August</month>
@@ -10743,7 +10743,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title>The <fixed-case>R</fixed-case>omanian Corpus Annotated with Verbal Multiword Expressions</title>
       <author><first>Verginica</first><last>Barbu Mititelu</last></author>
       <author><first>Mihaela</first><last>Cristescu</last></author>
-      <author><first>Mihaela</first><last>Onofrei</last></author>
+      <author id="mihaela-plamada-onofrei"><first>Mihaela</first><last>Onofrei</last></author>
       <pages>13–21</pages>
       <abstract>This paper reports on the Romanian journalistic corpus annotated with verbal multiword expressions following the PARSEME guidelines. The corpus is sentence split, tokenized, part-of-speech tagged, lemmatized, syntactically annotated and verbal multiword expressions are identified and classified. It offers insights into the frequency of such Romanian word combinations and allows for their characterization. We offer data about the types of verbal multiword expressions in the corpus and some of their characteristics, such as internal structure, diversity in the corpus, average length, productivity of the verbs. This is a language resource that is important per se, as well as for the task of automatic multiword expressions identification, which can be further used in other systems. It was already used as training and test material in the shared tasks for the automatic identification of verbal multiword expressions organized by PARSEME.</abstract>
       <url hash="48f433bd">W19-5103</url>
@@ -10764,7 +10764,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="5">
       <title>Learning to Predict Novel Noun-Noun Compounds</title>
       <author><first>Prajit</first><last>Dhar</last></author>
-      <author><first>Lonneke</first><last>van der Plas</last></author>
+      <author id="lonneke-van-der-plas"><first>Lonneke</first><last>van der Plas</last></author>
       <pages>30–39</pages>
       <abstract>We introduce temporally and contextually-aware models for the novel task of predicting unseen but plausible concepts, as conveyed by noun-noun compounds in a time-stamped corpus. We train compositional models on observed compounds, more specifically the composed distributed representations of their constituents across a time-stamped corpus, while giving it corrupted instances (where head or modifier are replaced by a random constituent) as negative evidence. The model captures generalisations over this data and learns what combinations give rise to plausible compounds and which ones do not. After training, we query the model for the plausibility of automatically generated novel combinations and verify whether the classifications are accurate. For our best model, we find that in around 85% of the cases, the novel compounds generated are attested in previously unseen data. An additional estimated 5% are plausible despite not being attested in the recent corpus, based on judgments from independent human raters.</abstract>
       <url hash="b88ba0d0">W19-5105</url>
@@ -10784,8 +10784,8 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="7">
       <title>A comparison of statistical association measures for identifying dependency-based collocations in various languages.</title>
       <author><first>Marcos</first><last>Garcia</last></author>
-      <author><first>Marcos</first><last>García Salido</last></author>
-      <author><first>Margarita</first><last>Alonso-Ramos</last></author>
+      <author id="marcos-garcia-salido"><first>Marcos</first><last>García Salido</last></author>
+      <author id="margarita-alonso-ramos"><first>Margarita</first><last>Alonso-Ramos</last></author>
       <pages>49–59</pages>
       <abstract>This paper presents an exploration of different statistical association measures to automatically identify collocations from corpora in English, Portuguese, and Spanish. To evaluate the impact of the association metrics we manually annotated corpora with three different syntactic patterns of collocations (adjective-noun, verb-object and nominal compounds). We took advantage of the PARSEME 1.1 Shared Task corpora by selecting a subset of 155k tokens in the three referred languages, in which we annotated 1,526 collocations with the corresponding Lexical Functions according to the Meaning-Text Theory. Using the resulting gold-standard, we have carried out a comparison between frequency data and several well-known association measures, both symmetric and asymmetric. The results show that the combination of dependency triples with raw frequency information is as powerful as the best association measures in most syntactic patterns and languages. Furthermore, and despite the asymmetric behaviour of collocations, directional approaches perform worse than the symmetric ones in the extraction of these phraseological combinations.</abstract>
       <url hash="cc925eac">W19-5107</url>
@@ -10807,7 +10807,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title>Modeling <fixed-case>MWE</fixed-case>s in <fixed-case>BTB</fixed-case>-<fixed-case>WN</fixed-case></title>
       <author><first>Laska</first><last>Laskova</last></author>
       <author><first>Petya</first><last>Osenova</last></author>
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <author><first>Ivajlo</first><last>Radev</last></author>
       <author><first>Zara</first><last>Kancheva</last></author>
       <pages>70–78</pages>
@@ -10819,7 +10819,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="10">
       <title>Without lexicons, multiword expression identification will never fly: A position statement</title>
       <author><first>Agata</first><last>Savary</last></author>
-      <author><first>Silvio</first><last>Cordeiro</last></author>
+      <author id="silvio-cordeiro"><first>Silvio</first><last>Cordeiro</last></author>
       <author><first>Carlos</first><last>Ramisch</last></author>
       <pages>79–91</pages>
       <abstract>Because most multiword expressions (MWEs), especially verbal ones, are semantically non-compositional, their automatic identification in running text is a prerequisite for semantically-oriented downstream applications. However, recent developments, driven notably by the PARSEME shared task on automatic identification of verbal MWEs, show that this task is harder than related tasks, despite recent contributions both in multilingual corpus annotation and in computational models. In this paper, we analyse possible reasons for this state of affairs. They lie in the nature of the MWE phenomenon, as well as in its distributional properties. We also offer a comparative analysis of the state-of-the-art systems, which exhibit particularly strong sensitivity to unseen data. On this basis, we claim that, in order to make strong headway in MWE identification, the community should bend its mind into coupling identification of MWEs with their discovery, via syntactic MWE lexicons. Such lexicons need not necessarily achieve a linguistically complete modelling of MWEs’ behavior, but they should provide minimal morphosyntactic information to cover some potential uses, so as to complement existing MWE-annotated corpora. We define requirements for such minimal NLP-oriented lexicon, and we propose a roadmap for the MWE community driven by these requirements.</abstract>
@@ -10840,7 +10840,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="12">
       <title>Semantic Modelling of Adjective-Noun Collocations Using <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et</title>
       <author><first>Yana</first><last>Strakatova</last></author>
-      <author><first>Erhard</first><last>Hinrichs</last></author>
+      <author id="erhard-hinrichs"><first>Erhard</first><last>Hinrichs</last></author>
       <pages>104–113</pages>
       <abstract>In this paper we argue that Frame Semantics (Fillmore, 1982) provides a good framework for semantic modelling of adjective-noun collocations. More specifically, the notion of a frame is rich enough to account for nouns from different semantic classes and to model semantic relations that hold between an adjective and a noun in terms of Frame Elements. We have substantiated these findings by considering a sample of adjective-noun collocations from German such as “enger Freund” ‘close friend’ and “starker Regen” ‘heavy rain’. The data sample is taken from different semantic fields identified in the German wordnet GermaNet (Hamp and Feldweg, 1997; Henrich and Hinrichs, 2010). The study is based on the electronic dictionary DWDS (Klein and Geyken, 2010) and uses the collocation extraction tool Wortprofil (Geyken et al., 2009). The FrameNet modelling is based on the online resource available at <url>http://framenet.icsi.berkeley.edu</url>. Since FrameNets are available for a range of typologically different languages, it is feasible to extend the current case study to other languages.</abstract>
       <url hash="3e73957a">W19-5112</url>
@@ -10849,7 +10849,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     </paper>
     <paper id="13">
       <title>A Neural Graph-based Approach to Verbal <fixed-case>MWE</fixed-case> Identification</title>
-      <author><first>Jakub</first><last>Waszczuk</last></author>
+      <author id="jakub-waszczuk"><first>Jakub</first><last>Waszczuk</last></author>
       <author><first>Rafael</first><last>Ehren</last></author>
       <author><first>Regina</first><last>Stodden</last></author>
       <author><first>Laura</first><last>Kallmeyer</last></author>
@@ -10871,7 +10871,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     </paper>
     <paper id="15">
       <title><fixed-case>IDION</fixed-case>: A database for <fixed-case>M</fixed-case>odern <fixed-case>G</fixed-case>reek multiword expressions</title>
-      <author><first>Stella</first><last>Markantonatou</last></author>
+      <author id="stella-markantonatou"><first>Stella</first><last>Markantonatou</last></author>
       <author><first>Panagiotis</first><last>Minos</last></author>
       <author><first>George</first><last>Zakis</last></author>
       <author><first>Vassiliki</first><last>Moutzouri</last></author>
@@ -10884,7 +10884,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     </paper>
     <paper id="16">
       <title>Identification of Adjective-Noun Neologisms using Pretrained Language Models</title>
-      <author><first>John Philip</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John Philip</first><last>McCrae</last></author>
       <pages>135–141</pages>
       <abstract>Neologism detection is a key task in the constructing of lexical resources and has wider implications for NLP, however the identification of multiword neologisms has received little attention. In this paper, we show that we can effectively identify the distinction between compositional and non-compositional adjective-noun pairs by using pretrained language models and comparing this with individual word embeddings. Our results show that the use of these models significantly improves over baseline linguistic features, however the combination with linguistic features still further improves the results, suggesting the strength of a hybrid approach.</abstract>
       <url hash="ed33eb06">W19-5116</url>
@@ -10894,7 +10894,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="17">
       <title>Neural Lemmatization of Multiword Expressions</title>
       <author><first>Marine</first><last>Schmitt</last></author>
-      <author><first>Mathieu</first><last>Constant</last></author>
+      <author id="matthieu-constant"><first>Mathieu</first><last>Constant</last></author>
       <pages>142–148</pages>
       <abstract>This article focuses on the lemmatization of multiword expressions (MWEs). We propose a deep encoder-decoder architecture generating for every MWE word its corresponding part in the lemma, based on the internal context of the MWE. The encoder relies on recurrent networks based on (1) the character sequence of the individual words to capture their morphological properties, and (2) the word sequence of the MWE to capture lexical and syntactic properties. The decoder in charge of generating the corresponding part of the lemma for each word of the MWE is based on a classical character-level attention-based recurrent model. Our model is evaluated for Italian, French, Polish and Portuguese and shows good performances except for Polish.</abstract>
       <url hash="32beefa2">W19-5117</url>
@@ -10906,7 +10906,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Antonio</first><last>Šajatović</last></author>
       <author><first>Maja</first><last>Buljan</last></author>
       <author><first>Jan</first><last>Šnajder</last></author>
-      <author><first>Bojana</first><last>Dalbelo Bašić</last></author>
+      <author id="bojana-dalbelo-basic"><first>Bojana</first><last>Dalbelo Bašić</last></author>
       <pages>149–154</pages>
       <abstract>Automatic Term Extraction (ATE) extracts terminology from domain-specific corpora. ATE is used in many NLP tasks, including Computer Assisted Translation, where it is typically applied to individual documents rather than the entire corpus. While corpus-level ATE has been extensively evaluated, it is not obvious how the results transfer to document-level ATE. To fill this gap, we evaluate 16 state-of-the-art ATE methods on full-length documents from three different domains, on both corpus and document levels. Unlike existing studies, our evaluation is more realistic as we take into account all gold terms. We show that no single method is best in corpus-level ATE, but C-Value and KeyConceptRelatendess surpass others in document-level ATE.</abstract>
       <url hash="190ede4b">W19-5118</url>
@@ -10939,7 +10939,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title>The Impact of Word Representations on Sequential Neural <fixed-case>MWE</fixed-case> Identification</title>
       <author><first>Nicolas</first><last>Zampieri</last></author>
       <author><first>Carlos</first><last>Ramisch</last></author>
-      <author><first>Geraldine</first><last>Damnati</last></author>
+      <author id="geraldine-damnati"><first>Geraldine</first><last>Damnati</last></author>
       <pages>169–175</pages>
       <abstract>Recent initiatives such as the PARSEME shared task allowed the rapid development of MWE identification systems. Many of those are based on recent NLP advances, using neural sequence models that take continuous word representations as input. We study two related questions in neural MWE identification: (a) the use of lemmas and/or surface forms as input features, and (b) the use of word-based or character-based embeddings to represent them. Our experiments on Basque, French, and Polish show that character-based representations yield systematically better results than word-based ones. In some cases, character-based representations of surface forms can be used as a proxy for lemmas, depending on the morphological complexity of the language.</abstract>
       <url hash="0682f903">W19-5121</url>
@@ -10951,23 +10951,23 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <meta>
       <booktitle>Proceedings of the Fourth Conference on Machine Translation (Volume 1: Research Papers)</booktitle>
       <url hash="b88306f2">W19-52</url>
-      <editor><first>Ondřej</first><last>Bojar</last></editor>
-      <editor><first>Rajen</first><last>Chatterjee</last></editor>
+      <editor id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></editor>
+      <editor id="rajen-chatterjee"><first>Rajen</first><last>Chatterjee</last></editor>
       <editor><first>Christian</first><last>Federmann</last></editor>
       <editor><first>Mark</first><last>Fishel</last></editor>
       <editor><first>Yvette</first><last>Graham</last></editor>
       <editor><first>Barry</first><last>Haddow</last></editor>
       <editor><first>Matthias</first><last>Huck</last></editor>
-      <editor><first>Antonio Jimeno</first><last>Yepes</last></editor>
+      <editor id="antonio-jimeno-yepes"><first>Antonio Jimeno</first><last>Yepes</last></editor>
       <editor><first>Philipp</first><last>Koehn</last></editor>
-      <editor><first>André</first><last>Martins</last></editor>
+      <editor id="andre-f-t-martins"><first>André</first><last>Martins</last></editor>
       <editor><first>Christof</first><last>Monz</last></editor>
-      <editor><first>Matteo</first><last>Negri</last></editor>
-      <editor><first>Aurélie</first><last>Névéol</last></editor>
+      <editor id="matteo-negri"><first>Matteo</first><last>Negri</last></editor>
+      <editor id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></editor>
       <editor><first>Mariana</first><last>Neves</last></editor>
       <editor><first>Matt</first><last>Post</last></editor>
       <editor><first>Marco</first><last>Turchi</last></editor>
-      <editor><first>Karin</first><last>Verspoor</last></editor>
+      <editor id="karin-verspoor"><first>Karin</first><last>Verspoor</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Florence, Italy</address>
       <month>August</month>
@@ -10992,10 +10992,10 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     </paper>
     <paper id="2">
       <title>Improving Zero-shot Translation with Language-Independent Constraints</title>
-      <author><first>Ngoc-Quan</first><last>Pham</last></author>
+      <author id="ngoc-quan-pham"><first>Ngoc-Quan</first><last>Pham</last></author>
       <author><first>Jan</first><last>Niehues</last></author>
       <author><first>Thanh-Le</first><last>Ha</last></author>
-      <author><first>Alexander</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alexander</first><last>Waibel</last></author>
       <pages>13–23</pages>
       <abstract>An important concern in training multilingual neural machine translation (NMT) is to translate between language pairs unseen during training, i.e zero-shot translation. Improving this ability kills two birds with one stone by providing an alternative to pivot translation which also allows us to better understand how the model captures information between languages. In this work, we carried out an investigation on this capability of the multilingual NMT models. First, we intentionally create an encoder architecture which is independent with respect to the source language. Such experiments shed light on the ability of NMT encoders to learn multilingual representations, in general. Based on such proof of concept, we were able to design regularization methods into the standard Transformer model, so that the whole architecture becomes more robust in zero-shot conditions. We investigated the behaviour of such models on the standard IWSLT 2017 multilingual dataset. We achieved an average improvement of 2.23 BLEU points across 12 language pairs compared to the zero-shot performance of a state-of-the-art multilingual system. Additionally, we carry out further experiments in which the effect is confirmed even for language pairs with multiple intermediate pivots.</abstract>
       <url hash="751531b8">W19-5202</url>
@@ -11029,7 +11029,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Yunsu</first><last>Kim</last></author>
       <author><first>Julian</first><last>Schamper</last></author>
       <author><first>Shahram</first><last>Khadivi</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>45–52</pages>
       <abstract>Back-translation — data augmentation by translating target monolingual data — is a crucial component in modern neural machine translation (NMT). In this work, we reformulate back-translation in the scope of cross-entropy optimization of an NMT model, clarifying its underlying mathematical assumptions and approximations beyond its heuristic usage. Our formulation covers broader synthetic data generation schemes, including sampling from a target-to-source NMT model. With this formulation, we point out fundamental problems of the sampling-based approaches and propose to remedy them by (i) disabling label smoothing for the target-to-source model and (ii) sampling from a restricted search space. Our statements are investigated on the WMT 2018 German &lt;-&gt; English news translation task.</abstract>
       <url hash="ae3784e5">W19-5205</url>
@@ -11127,23 +11127,23 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <meta>
       <booktitle>Proceedings of the Fourth Conference on Machine Translation (Volume 2: Shared Task Papers, Day 1)</booktitle>
       <url hash="f3ea3fb7">W19-53</url>
-      <editor><first>Ondřej</first><last>Bojar</last></editor>
-      <editor><first>Rajen</first><last>Chatterjee</last></editor>
+      <editor id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></editor>
+      <editor id="rajen-chatterjee"><first>Rajen</first><last>Chatterjee</last></editor>
       <editor><first>Christian</first><last>Federmann</last></editor>
       <editor><first>Mark</first><last>Fishel</last></editor>
       <editor><first>Yvette</first><last>Graham</last></editor>
       <editor><first>Barry</first><last>Haddow</last></editor>
       <editor><first>Matthias</first><last>Huck</last></editor>
-      <editor><first>Antonio Jimeno</first><last>Yepes</last></editor>
+      <editor id="antonio-jimeno-yepes"><first>Antonio Jimeno</first><last>Yepes</last></editor>
       <editor><first>Philipp</first><last>Koehn</last></editor>
-      <editor><first>André</first><last>Martins</last></editor>
+      <editor id="andre-f-t-martins"><first>André</first><last>Martins</last></editor>
       <editor><first>Christof</first><last>Monz</last></editor>
-      <editor><first>Matteo</first><last>Negri</last></editor>
-      <editor><first>Aurélie</first><last>Névéol</last></editor>
+      <editor id="matteo-negri"><first>Matteo</first><last>Negri</last></editor>
+      <editor id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></editor>
       <editor><first>Mariana</first><last>Neves</last></editor>
       <editor><first>Matt</first><last>Post</last></editor>
       <editor><first>Marco</first><last>Turchi</last></editor>
-      <editor><first>Karin</first><last>Verspoor</last></editor>
+      <editor id="karin-verspoor"><first>Karin</first><last>Verspoor</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Florence, Italy</address>
       <month>August</month>
@@ -11158,14 +11158,14 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title>Findings of the 2019 Conference on Machine Translation (<fixed-case>WMT</fixed-case>19)</title>
       <author><first>Loïc</first><last>Barrault</last></author>
       <author><first>Ondřej</first><last>Bojar</last></author>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
       <author><first>Christian</first><last>Federmann</last></author>
       <author><first>Mark</first><last>Fishel</last></author>
       <author><first>Yvette</first><last>Graham</last></author>
       <author><first>Barry</first><last>Haddow</last></author>
       <author><first>Matthias</first><last>Huck</last></author>
       <author><first>Philipp</first><last>Koehn</last></author>
-      <author><first>Shervin</first><last>Malmasi</last></author>
+      <author id="shervin-malmasi"><first>Shervin</first><last>Malmasi</last></author>
       <author><first>Christof</first><last>Monz</last></author>
       <author><first>Mathias</first><last>Müller</last></author>
       <author><first>Santanu</first><last>Pal</last></author>
@@ -11215,7 +11215,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Ulrich</first><last>Germann</last></author>
       <author><first>Roman</first><last>Grundkiewicz</last></author>
       <author><first>Faheem</first><last>Kirefu</last></author>
-      <author><first>Antonio Valerio</first><last>Miceli Barone</last></author>
+      <author id="antonio-valerio-miceli-barone"><first>Antonio Valerio</first><last>Miceli Barone</last></author>
       <author><first>Alexandra</first><last>Birch</last></author>
       <pages>103–115</pages>
       <abstract>The University of Edinburgh participated in the WMT19 Shared Task on News Translation in six language directions: English↔Gujarati, English↔Chinese, German→English, and English→Czech. For all translation directions, we created or used back-translations of monolingual data in the target language as additional synthetic training data. For English↔Gujarati, we also explored semi-supervised MT with cross-lingual language model pre-training, and translation pivoting through Hindi. For translation to and from Chinese, we investigated character-based tokenisation vs. sub-word segmentation of Chinese text. For German→English, we studied the impact of vast amounts of back-translated training data on translation quality, gaining a few additional insights over Edunov et al. (2018). For English→Czech, we compared different preprocessing and tokenisation regimes.</abstract>
@@ -11238,14 +11238,14 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     </paper>
     <paper id="6">
       <title>Machine Translation with parfda, <fixed-case>M</fixed-case>oses, kenlm, nplm, and <fixed-case>PRO</fixed-case></title>
-      <author><first>Ergun</first><last>Biçici</last></author>
+      <author id="ergun-bicici"><first>Ergun</first><last>Biçici</last></author>
       <pages>122–128</pages>
       <abstract>We build parfda Moses statistical machine translation (SMT) models for most language pairs in the news translation task. We experiment with a hybrid approach using neural language models integrated into Moses. We obtain the constrained data statistics on the machine translation task, the coverage of the test sets, and the upper bounds on the translation results. We also contribute a new testsuite for the German-English language pair and a new automated key phrase extraction technique for the evaluation of the testsuite translations.</abstract>
       <url hash="36c43081">W19-5306</url>
       <revision id="1" href="W19-5306v1" hash="444c5306"/>
       <revision id="2" href="W19-5306v2" hash="3767b9cc">Clarifies notation in Table 7, Figure 2 caption, and Table 4.</revision>
-      <doi>10.18653/v1/W19-5306</doi>
       <revision id="3" href="W19-5306v3" hash="36c43081">Clarified notation in Table 7.</revision>
+      <doi>10.18653/v1/W19-5306</doi>
       <bibkey>bicici-2019-machine</bibkey>
     </paper>
     <paper id="7">
@@ -11295,10 +11295,10 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="11">
       <title>The <fixed-case>TALP</fixed-case>-<fixed-case>UPC</fixed-case> Machine Translation Systems for <fixed-case>WMT</fixed-case>19 News Translation Task: Pivoting Techniques for Low Resource <fixed-case>MT</fixed-case></title>
       <author><first>Noe</first><last>Casas</last></author>
-      <author><first>José A. R.</first><last>Fonollosa</last></author>
+      <author id="jose-a-r-fonollosa"><first>José A. R.</first><last>Fonollosa</last></author>
       <author><first>Carlos</first><last>Escolano</last></author>
       <author><first>Christine</first><last>Basta</last></author>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
       <pages>155–162</pages>
       <abstract>In this article, we describe the TALP-UPC research group participation in the WMT19 news translation shared task for Kazakh-English. Given the low amount of parallel training data, we resort to using Russian as pivot language, training subword-based statistical translation systems for Russian-Kazakh and Russian-English that were then used to create two synthetic pseudo-parallel corpora for Kazakh-English and English-Kazakh respectively. Finally, a self-attention model based on the decoder part of the Transformer architecture was trained on the two pseudo-parallel corpora.</abstract>
       <url hash="ca8deb09">W19-5311</url>
@@ -11308,7 +11308,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     </paper>
     <paper id="12">
       <title><fixed-case>K</fixed-case>yoto <fixed-case>U</fixed-case>niversity Participation to the <fixed-case>WMT</fixed-case> 2019 News Shared Task</title>
-      <author><first>Fabien</first><last>Cromieres</last></author>
+      <author id="fabien-cromieres"><first>Fabien</first><last>Cromieres</last></author>
       <author><first>Sadao</first><last>Kurohashi</last></author>
       <pages>163–167</pages>
       <abstract>We describe here the experiments we did for the the news translation shared task of WMT 2019. We focused on the new German-to-French language direction, and mostly used current standard approaches to develop a Neural Machine Translation system. We make use of the Tensor2Tensor implementation of the Transformer model. After carefully cleaning the data and noting the importance of the good use of recent monolingual data for the task, we obtain our final result by combining the output of a diverse set of trained models through the use of their “checkpoint agreement”.</abstract>
@@ -11324,7 +11324,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Rui</first><last>Wang</last></author>
       <author><first>Atsushi</first><last>Fujita</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>168–174</pages>
       <abstract>In this paper, we describe our supervised neural machine translation (NMT) systems that we developed for the news translation task for Kazakh↔English, Gujarati↔English, Chinese↔English, and English→Finnish translation directions. We focused on leveraging multilingual transfer learning and back-translation for the extremely low-resource language pairs: Kazakh↔English and Gujarati↔English translation. For the Chinese↔English translation, we used the provided parallel data augmented with a large quantity of back-translated monolingual data to train state-of-the-art NMT systems. We then employed techniques that have been proven to be most effective, such as back-translation, fine-tuning, and model ensembling, to generate the primary submissions of Chinese↔English. For English→Finnish, our submission from WMT18 remains a strong baseline despite the increase in parallel corpora for this year’s task.</abstract>
       <url hash="4be41821">W19-5313</url>
@@ -11354,7 +11354,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="16">
       <title>The <fixed-case>IIIT</fixed-case>-<fixed-case>H</fixed-case> <fixed-case>G</fixed-case>ujarati-<fixed-case>E</fixed-case>nglish Machine Translation System for <fixed-case>WMT</fixed-case>19</title>
       <author><first>Vikrant</first><last>Goyal</last></author>
-      <author><first>Dipti Misra</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></author>
       <pages>191–195</pages>
       <abstract>This paper describes the Neural Machine Translation system of IIIT-Hyderabad for the Gujarati→English news translation shared task of WMT19. Our system is basedon encoder-decoder framework with attention mechanism. We experimented with Multilingual Neural MT models. Our experiments show that Multilingual Neural Machine Translation leveraging parallel data from related language pairs helps in significant BLEU improvements upto 11.5, for low resource language pairs like Gujarati-English</abstract>
       <url hash="ec436651">W19-5316</url>
@@ -11383,7 +11383,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title>The <fixed-case>AFRL</fixed-case> <fixed-case>WMT</fixed-case>19 Systems: Old Favorites and New Tricks</title>
       <author><first>Jeremy</first><last>Gwinnup</last></author>
       <author><first>Grant</first><last>Erdmann</last></author>
-      <author><first>Tim</first><last>Anderson</last></author>
+      <author id="tim-anderson"><first>Tim</first><last>Anderson</last></author>
       <pages>203–208</pages>
       <abstract>This paper describes the Air Force Research Laboratory (AFRL) machine translation systems and the improvements that were developed during the WMT19 evaluation campaign. This year, we refine our approach to training popular neural machine translation toolkits, experiment with a new domain adaptation technique and again measure improvements in performance on the Russian–English language pair.</abstract>
       <url hash="89e8631c">W19-5318</url>
@@ -11392,7 +11392,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     </paper>
     <paper id="19">
       <title>Evaluating the Supervised and Zero-shot Performance of Multi-lingual Translation Models</title>
-      <author><first>Chris</first><last>Hokamp</last></author>
+      <author id="chris-hokamp"><first>Chris</first><last>Hokamp</last></author>
       <author><first>John</first><last>Glover</last></author>
       <author><first>Demian</first><last>Gholipour Ghalandari</last></author>
       <pages>209–217</pages>
@@ -11404,9 +11404,9 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="20">
       <title>The <fixed-case>MLLP</fixed-case>-<fixed-case>UPV</fixed-case> Supervised Machine Translation Systems for <fixed-case>WMT</fixed-case>19 News Translation Task</title>
       <author><first>Javier</first><last>Iranzo-Sánchez</last></author>
-      <author><first>Gonçal V.</first><last>Garcés Díaz-Munío</last></author>
+      <author id="goncal-v-garces-diaz-munio"><first>Gonçal V.</first><last>Garcés Díaz-Munío</last></author>
       <author><first>Jorge</first><last>Civera</last></author>
-      <author><first>Alfons</first><last>Juan</last></author>
+      <author id="alfons-juan"><first>Alfons</first><last>Juan</last></author>
       <pages>218–224</pages>
       <abstract>This paper describes the participation of the MLLP research group of the Universitat Politècnica de València in the WMT 2019 News Translation Shared Task. In this edition, we have submitted systems for the German ↔ English and German ↔ French language pairs, participating in both directions of each pair. Our submitted systems, based on the Transformer architecture, make ample use of data filtering, synthetic data and domain adaptation through fine-tuning.</abstract>
       <url hash="f82e7e55">W19-5320</url>
@@ -11498,7 +11498,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title>Incorporating Word and Subword Units in Unsupervised Machine Translation Using Language Model Rescoring</title>
       <author><first>Zihan</first><last>Liu</last></author>
       <author><first>Yan</first><last>Xu</last></author>
-      <author><first>Genta Indra</first><last>Winata</last></author>
+      <author id="genta-indra-winata"><first>Genta Indra</first><last>Winata</last></author>
       <author><first>Pascale</first><last>Fung</last></author>
       <pages>275–282</pages>
       <abstract>This paper describes CAiRE’s submission to the unsupervised machine translation track of the WMT’19 news shared task from German to Czech. We leverage a phrase-based statistical machine translation (PBSMT) model and a pre-trained language model to combine word-level neural machine translation (NMT) and subword-level NMT models without using any parallel data. We propose to solve the morphological richness problem of languages by training byte-pair encoding (BPE) embeddings for German and Czech separately, and they are aligned using MUSE (Conneau et al., 2018). To ensure the fluency and consistency of translations, a rescoring mechanism is proposed that reuses the pre-trained language model to select the translation candidates generated through beam search. Moreover, a series of pre-processing and post-processing approaches are applied to improve the quality of final translations.</abstract>
@@ -11508,11 +11508,11 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     </paper>
     <paper id="28">
       <title><fixed-case>JUMT</fixed-case> at <fixed-case>WMT</fixed-case>2019 News Translation Task: A Hybrid Approach to Machine Translation for <fixed-case>L</fixed-case>ithuanian to <fixed-case>E</fixed-case>nglish</title>
-      <author><first>Sainik Kumar</first><last>Mahata</last></author>
+      <author id="sainik-mahata"><first>Sainik Kumar</first><last>Mahata</last></author>
       <author><first>Avishek</first><last>Garain</last></author>
       <author><first>Adityar</first><last>Rayala</last></author>
       <author><first>Dipankar</first><last>Das</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>283–286</pages>
       <abstract>In the current work, we present a description of the system submitted to WMT 2019 News Translation Shared task. The system was created to translate news text from Lithuanian to English. To accomplish the given task, our system used a Word Embedding based Neural Machine Translation model to post edit the outputs generated by a Statistical Machine Translation model. The current paper documents the architecture of our model, descriptions of the various modules and the results produced using the same. Our system garnered a BLEU score of 17.6.</abstract>
       <url hash="c01ddc58">W19-5328</url>
@@ -11538,7 +11538,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Kehai</first><last>Chen</last></author>
       <author><first>Atsushi</first><last>Fujita</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>294–301</pages>
       <abstract>This paper presents the NICT’s participation in the WMT19 unsupervised news translation task. We participated in the unsupervised translation direction: German-Czech. Our primary submission to the task is the result of a simple combination of our unsupervised neural and statistical machine translation systems. Our system is ranked first for the German-to-Czech translation task, using only the data provided by the organizers (“constraint’”), according to both BLEU-cased and human evaluation. We also performed contrastive experiments with other language pairs, namely, English-Gujarati and English-Kazakh, to better assess the effectiveness of unsupervised machine translation in for distant language pairs and in truly low-resource conditions.</abstract>
       <url hash="80482f43">W19-5330</url>
@@ -11560,8 +11560,8 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Shankha Raj</first><last>Nayek</last></author>
       <author><first>Aditya</first><last>Chowdhury</last></author>
       <author><first>Santanu</first><last>Pal</last></author>
-      <author><first>Sudip Kumar</first><last>Naskar</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>308–313</pages>
       <abstract>In this paper we describe our joint submission (JU-Saarland) from Jadavpur University and Saarland University in the WMT 2019 news translation shared task for English–Gujarati language pair within the translation task sub-track. Our baseline and primary submissions are built using Recurrent neural network (RNN) based neural machine translation (NMT) system which follows attention mechanism. Given the fact that the two languages belong to different language families and there is not enough parallel data for this language pair, building a high quality NMT system for this language pair is a difficult task. We produced synthetic data through back-translation from available monolingual data. We report the translation quality of our English–Gujarati and Gujarati–English NMT systems trained at word, byte-pair and character encoding levels where RNN at word level is considered as the baseline and used for comparison purpose. Our English–Gujarati system ranked in the second position in the shared task.</abstract>
       <url hash="4e02ba75">W19-5332</url>
@@ -11585,10 +11585,10 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="34">
       <title>e<fixed-case>T</fixed-case>ranslation’s Submissions to the <fixed-case>WMT</fixed-case> 2019 News Translation Task</title>
       <author><first>Csaba</first><last>Oravecz</last></author>
-      <author><first>Katina</first><last>Bontcheva</last></author>
+      <author id="katina-bontcheva"><first>Katina</first><last>Bontcheva</last></author>
       <author><first>Adrien</first><last>Lardilleux</last></author>
-      <author><first>László</first><last>Tihanyi</last></author>
-      <author><first>Andreas</first><last>Eisele</last></author>
+      <author id="laszlo-tihanyi"><first>László</first><last>Tihanyi</last></author>
+      <author id="andreas-eisele"><first>Andreas</first><last>Eisele</last></author>
       <pages>320–326</pages>
       <abstract>This paper describes the submissions of the eTranslation team to the WMT 2019 news translation shared task. The systems have been developed with the aim of identifying and following rather than establishing best practices, under the constraints imposed by a low resource training and decoding environment normally used for our production systems. Thus most of the findings and results are transferable to systems used in the eTranslation service. Evaluations suggest that this approach is able to produce decent models with good performance and speed without the overhead of using prohibitively deep and complex architectures.</abstract>
       <url hash="ed6ebe3f">W19-5334</url>
@@ -11598,8 +11598,8 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     </paper>
     <paper id="35">
       <title>Tilde’s Machine Translation Systems for <fixed-case>WMT</fixed-case> 2019</title>
-      <author><first>Marcis</first><last>Pinnis</last></author>
-      <author><first>Rihards</first><last>Krišlauks</last></author>
+      <author id="marcis-pinnis"><first>Marcis</first><last>Pinnis</last></author>
+      <author id="rihards-krislauks"><first>Rihards</first><last>Krišlauks</last></author>
       <author><first>Matīss</first><last>Rikters</last></author>
       <pages>327–334</pages>
       <abstract>The paper describes the development process of Tilde’s NMT systems for the WMT 2019 shared task on news translation. We trained systems for the English-Lithuanian and Lithuanian-English translation directions in constrained and unconstrained tracks. We build upon the best methods of the previous year’s competition and combine them with recent advancements in the field. We also present a new method to ensure source domain adherence in back-translated data. Our systems achieved a shared first place in human evaluation.</abstract>
@@ -11610,7 +11610,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     </paper>
     <paper id="36">
       <title>Apertium-fin-eng–Rule-based Shallow Machine Translation for <fixed-case>WMT</fixed-case> 2019 Shared Task</title>
-      <author><first>Tommi</first><last>Pirinen</last></author>
+      <author id="tommi-a-pirinen"><first>Tommi</first><last>Pirinen</last></author>
       <pages>335–341</pages>
       <abstract>In this paper we describe a rule-based, bi-directional machine translation system for the Finnish—English language pair. The baseline system was based on the existing data of FinnWordNet, omorfi and apertium-eng. We have built the disambiguation, lexical selection and translation rules by hand. The dictionaries and rules have been developed based on the shared task data. We describe in this article the use of the shared task data as a kind of a test-driven development workflow in RBMT development and show that it suits perfectly to a modern software engineering continuous integration workflow of RBMT and yields big increases to BLEU scores with minimal effort.</abstract>
       <url hash="1d2cec39">W19-5336</url>
@@ -11640,7 +11640,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Weiyue</first><last>Wang</last></author>
       <author><first>Parnia</first><last>Bahar</last></author>
       <author><first>Yingbo</first><last>Gao</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>349–355</pages>
       <abstract>This paper describes the neural machine translation systems developed at the RWTH Aachen University for the German-English, Chinese-English and Kazakh-English news translation tasks of the Fourth Conference on Machine Translation (WMT19). For all tasks, the final submitted system is based on the Transformer architecture. We focus on improving data filtering and fine-tuning as well as systematically evaluating interesting approaches like unigram language model segmentation and transfer learning. For the De-En task, none of the tested methods gave a significant improvement over last years winning system and we end up with the same performance, resulting in 39.6% BLEU on newstest2019. In the Zh-En task, we show 1.3% BLEU improvement over our last year’s submission, which we mostly attribute to the splitting of long sentences during translation. We further report results on the Kazakh-English task where we gain improvements of 11.1% BLEU over our baseline system. On the same task we present a recent transfer learning approach, which uses half of the free parameters of our submission system and performs on par with it.</abstract>
       <url hash="1db58e3f">W19-5338</url>
@@ -11649,7 +11649,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     </paper>
     <paper id="39">
       <title>The <fixed-case>U</fixed-case>niversitat d’Alacant Submissions to the <fixed-case>E</fixed-case>nglish-to-<fixed-case>K</fixed-case>azakh News Translation Task at <fixed-case>WMT</fixed-case> 2019</title>
-      <author><first>Víctor M.</first><last>Sánchez-Cartagena</last></author>
+      <author id="victor-m-sanchez-cartagena"><first>Víctor M.</first><last>Sánchez-Cartagena</last></author>
       <author><first>Juan Antonio</first><last>Pérez-Ortiz</last></author>
       <author><first>Felipe</first><last>Sánchez-Martínez</last></author>
       <pages>356–363</pages>
@@ -11662,7 +11662,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title><fixed-case>CUED</fixed-case>@<fixed-case>WMT</fixed-case>19:<fixed-case>EWC</fixed-case>&amp;<fixed-case>LM</fixed-case>s</title>
       <author><first>Felix</first><last>Stahlberg</last></author>
       <author><first>Danielle</first><last>Saunders</last></author>
-      <author><first>Adrià</first><last>de Gispert</last></author>
+      <author id="adria-de-gispert"><first>Adrià</first><last>de Gispert</last></author>
       <author id="bill-byrne"><first>Bill</first><last>Byrne</last></author>
       <pages>364–373</pages>
       <abstract>Two techniques provide the fabric of the Cambridge University Engineering Department’s (CUED) entry to the WMT19 evaluation campaign: elastic weight consolidation (EWC) and different forms of language modelling (LMs). We report substantial gains by fine-tuning very strong baselines on former WMT test sets using a combination of checkpoint averaging and EWC. A sentence-level Transformer LM and a document-level LM based on a modified Transformer architecture yield further gains. As in previous years, we also extract n-gram probabilities from SMT lattices which can be seen as a source-conditioned n-gram LM.</abstract>
@@ -11712,7 +11712,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Dario</first><last>Stojanovski</last></author>
       <author><first>Viktor</first><last>Hangya</last></author>
       <author><first>Matthias</first><last>Huck</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>393–399</pages>
       <abstract>We describe LMU Munich’s machine translation system for German→Czech translation which was used to participate in the WMT19 shared task on unsupervised news translation. We train our model using monolingual data only from both languages. The final model is an unsupervised neural model using established techniques for unsupervised translation such as denoising autoencoding and online back-translation. We bootstrap the model with masked language model pretraining and enhance it with back-translations from an unsupervised phrase-based system which is itself bootstrapped using unsupervised bilingual word embeddings.</abstract>
       <url hash="3957685d">W19-5344</url>
@@ -11722,7 +11722,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="45">
       <title>Combining Local and Document-Level Context: The <fixed-case>LMU</fixed-case> <fixed-case>M</fixed-case>unich Neural Machine Translation System at <fixed-case>WMT</fixed-case>19</title>
       <author><first>Dario</first><last>Stojanovski</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>400–406</pages>
       <abstract>We describe LMU Munich’s machine translation system for English→German translation which was used to participate in the WMT19 shared task on supervised news translation. We specifically participated in the document-level MT track. The system used as a primary submission is a context-aware Transformer capable of both rich modeling of limited contextual information and integration of large-scale document-level context with a less rich representation. We train this model by fine-tuning a big Transformer baseline. Our experimental results show that document-level context provides for large improvements in translation quality, and adding a rich representation of the previous sentence provides a small additional gain.</abstract>
       <url hash="a2d8fbff">W19-5345</url>
@@ -11734,7 +11734,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Sukanta</first><last>Sen</last></author>
       <author><first>Kamal Kumar</first><last>Gupta</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>407–411</pages>
       <abstract>We describe our submission to WMT 2019 News translation shared task for Gujarati-English language pair. We submit constrained systems, i.e, we rely on the data provided for this language pair and do not use any external data. We train Transformer based subword-level neural machine translation (NMT) system using original parallel corpus along with synthetic parallel corpus obtained through back-translation of monolingual data. Our primary systems achieve BLEU scores of 10.4 and 8.1 for Gujarati→English and English→Gujarati, respectively. We observe that incorporating monolingual data through back-translation improves the BLEU score significantly over baseline NMT and SMT systems for this language pair.</abstract>
       <url hash="580edaa1">W19-5346</url>
@@ -11750,7 +11750,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Sami</first><last>Virpioja</last></author>
       <author><first>Alessandro</first><last>Raganato</last></author>
       <author><first>Arvi</first><last>Hurskainen</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>412–423</pages>
       <abstract>In this paper we present the University of Helsinki submissions to the WMT 2019 shared news translation task in three language pairs: English-German, English-Finnish and Finnish-English. This year we focused first on cleaning and filtering the training data using multiple data-filtering approaches, resulting in much smaller and cleaner training sets. For English-German we trained both sentence-level transformer models as well as compared different document-level translation approaches. For Finnish-English and English-Finnish we focused on different segmentation approaches and we also included a rule-based system for English-Finnish.</abstract>
       <url hash="731c482b">W19-5347</url>
@@ -11792,7 +11792,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="50">
       <title><fixed-case>DFKI</fixed-case>-<fixed-case>NMT</fixed-case> Submission to the <fixed-case>WMT</fixed-case>19 News Translation Task</title>
       <author><first>Jingyi</first><last>Zhang</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>440–444</pages>
       <abstract>This paper describes the DFKI-NMT submission to the WMT19 News translation task. We participated in both English-to-German and German-to-English directions. We trained Transformer models and adopted various techniques for effectively training our models, including data selection, back-translation and in-domain fine-tuning. We give a detailed analysis of the performance of our system.</abstract>
       <url hash="34d0a297">W19-5350</url>
@@ -11828,7 +11828,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     </paper>
     <paper id="53">
       <title>Evaluating Conjunction Disambiguation on <fixed-case>E</fixed-case>nglish-to-<fixed-case>G</fixed-case>erman and <fixed-case>F</fixed-case>rench-to-<fixed-case>G</fixed-case>erman <fixed-case>WMT</fixed-case> 2019 Translation Hypotheses</title>
-      <author><first>Maja</first><last>Popović</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
       <pages>464–469</pages>
       <abstract>We present a test set for evaluating an MT system’s capability to translate ambiguous conjunctions depending on the sentence structure. We concentrate on the English conjunction “but” and its French equivalent “mais” which can be translated into two different German conjunctions. We evaluate all English-to-German and French-to-German submissions to the WMT 2019 shared translation task. The evaluation is done mainly automatically, with additional fast manual inspection of unclear cases. All systems almost perfectly recognise the target conjunction “aber”, whereas accuracies for the other target conjunction “sondern” range from 78% to 97%, and the errors are mostly caused by replacing it with the alternative conjunction “aber”. The best performing system for both language pairs is a multilingual Transformer “TartuNLP” system trained on all WMT 2019 language pairs which use the Latin script, indicating that the multilingual approach is beneficial for conjunction disambiguation. As for other system features, such as using synthetic back-translated data, context-aware, hybrid, etc., no particular (dis)advantages can be observed. Qualitative manual inspection of translation hypotheses shown that highly ranked systems generally produce translations with high adequacy and fluency, meaning that these systems are not only capable of capturing the right conjunction whereas the rest of the translation hypothesis is poor. On the other hand, the low ranked systems generally exhibit lower fluency and poor adequacy.</abstract>
       <url hash="51b384d0">W19-5353</url>
@@ -11839,7 +11839,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title>The <fixed-case>M</fixed-case>u<fixed-case>C</fixed-case>o<fixed-case>W</fixed-case> Test Suite at <fixed-case>WMT</fixed-case> 2019: Automatically Harvested Multilingual Contrastive Word Sense Disambiguation Test Sets for Machine Translation</title>
       <author><first>Alessandro</first><last>Raganato</last></author>
       <author><first>Yves</first><last>Scherrer</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>470–480</pages>
       <abstract>Supervised Neural Machine Translation (NMT) systems currently achieve impressive translation quality for many language pairs. One of the key features of a correct translation is the ability to perform word sense disambiguation (WSD), i.e., to translate an ambiguous word with its correct sense. Existing evaluation benchmarks on WSD capabilities of translation systems rely heavily on manual work and cover only few language pairs and few word types. We present MuCoW, a multilingual contrastive test suite that covers 16 language pairs with more than 200 thousand contrastive sentence pairs, automatically built from word-aligned parallel corpora and the wide-coverage multilingual sense inventory of BabelNet. We evaluate the quality of the ambiguity lexicons and of the resulting test suite on all submissions from 9 language pairs presented in the WMT19 news shared translation task, plus on other 5 language pairs using NMT pretrained models. The MuCoW test suite is available at <url>http://github.com/Helsinki-NLP/MuCoW</url>.</abstract>
       <url hash="b7c7d4c6">W19-5354</url>
@@ -11863,7 +11863,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title><fixed-case>WMDO</fixed-case>: Fluency-based Word Mover’s Distance for Machine Translation Evaluation</title>
       <author><first>Julian</first><last>Chow</last></author>
       <author><first>Lucia</first><last>Specia</last></author>
-      <author><first>Pranava</first><last>Madhyastha</last></author>
+      <author id="pranava-swaroop-madhyastha"><first>Pranava</first><last>Madhyastha</last></author>
       <pages>494–500</pages>
       <abstract>We propose WMDO, a metric based on distance between distributions in the semantic vector space. Matching in the semantic space has been investigated for translation evaluation, but the constraints of a translation’s word order have not been fully explored. Building on the Word Mover’s Distance metric and various word embeddings, we introduce a fragmentation penalty to account for fluency of a translation. This word order extension is shown to perform better than standard WMD, with promising results against other types of metrics.</abstract>
       <url hash="7c8219b6">W19-5356</url>
@@ -11893,7 +11893,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title><fixed-case>EED</fixed-case>: Extended Edit Distance Measure for Machine Translation</title>
       <author><first>Peter</first><last>Stanchev</last></author>
       <author><first>Weiyue</first><last>Wang</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <pages>514–520</pages>
       <abstract>Over the years a number of machine translation metrics have been developed in order to evaluate the accuracy and quality of machine-generated translations. Metrics such as BLEU and TER have been used for decades. However, with the rapid progress of machine translation systems, the need for better metrics is growing. This paper proposes an extension of the edit distance, which achieves better human correlation, whilst remaining fast, flexible and easy to understand.</abstract>
       <url hash="8a4abc9a">W19-5359</url>
@@ -11927,7 +11927,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="62">
       <title><fixed-case>NICT</fixed-case>’s Supervised Neural Machine Translation Systems for the <fixed-case>WMT</fixed-case>19 Translation Robustness Task</title>
       <author><first>Raj</first><last>Dabre</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>533–536</pages>
       <abstract>In this paper we describe our neural machine translation (NMT) systems for Japanese↔English translation which we submitted to the translation robustness task. We focused on leveraging transfer learning via fine tuning to improve translation quality. We used a fairly well established domain adaptation technique called Mixed Fine Tuning (MFT) (Chu et. al., 2017) to improve translation quality for Japanese↔English. We also trained bi-directional NMT models instead of uni-directional ones as the former are known to be quite robust, especially in low-resource scenarios. However, given the noisy nature of the in-domain training data, the improvements we obtained are rather modest.</abstract>
       <url hash="1bf963d7">W19-5362</url>
@@ -11945,7 +11945,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     </paper>
     <paper id="64">
       <title><fixed-case>CUNI</fixed-case> System for the <fixed-case>WMT</fixed-case>19 Robustness Task</title>
-      <author><first>Jindřich</first><last>Helcl</last></author>
+      <author id="jindrich-helcl"><first>Jindřich</first><last>Helcl</last></author>
       <author><first>Jindřich</first><last>Libovický</last></author>
       <author><first>Martin</first><last>Popel</last></author>
       <pages>539–543</pages>
@@ -12007,23 +12007,23 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <meta>
       <booktitle>Proceedings of the Fourth Conference on Machine Translation (Volume 3: Shared Task Papers, Day 2)</booktitle>
       <url hash="99c07870">W19-54</url>
-      <editor><first>Ondřej</first><last>Bojar</last></editor>
-      <editor><first>Rajen</first><last>Chatterjee</last></editor>
+      <editor id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></editor>
+      <editor id="rajen-chatterjee"><first>Rajen</first><last>Chatterjee</last></editor>
       <editor><first>Christian</first><last>Federmann</last></editor>
       <editor><first>Mark</first><last>Fishel</last></editor>
       <editor><first>Yvette</first><last>Graham</last></editor>
       <editor><first>Barry</first><last>Haddow</last></editor>
       <editor><first>Matthias</first><last>Huck</last></editor>
-      <editor><first>Antonio Jimeno</first><last>Yepes</last></editor>
+      <editor id="antonio-jimeno-yepes"><first>Antonio Jimeno</first><last>Yepes</last></editor>
       <editor><first>Philipp</first><last>Koehn</last></editor>
-      <editor><first>André</first><last>Martins</last></editor>
+      <editor id="andre-f-t-martins"><first>André</first><last>Martins</last></editor>
       <editor><first>Christof</first><last>Monz</last></editor>
-      <editor><first>Matteo</first><last>Negri</last></editor>
-      <editor><first>Aurélie</first><last>Névéol</last></editor>
+      <editor id="matteo-negri"><first>Matteo</first><last>Negri</last></editor>
+      <editor id="aurelie-neveol"><first>Aurélie</first><last>Névéol</last></editor>
       <editor><first>Mariana</first><last>Neves</last></editor>
       <editor><first>Matt</first><last>Post</last></editor>
       <editor><first>Marco</first><last>Turchi</last></editor>
-      <editor><first>Karin</first><last>Verspoor</last></editor>
+      <editor id="karin-verspoor"><first>Karin</first><last>Verspoor</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Florence, Italy</address>
       <month>August</month>
@@ -12036,7 +12036,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     </frontmatter>
     <paper id="1">
       <title>Findings of the <fixed-case>WMT</fixed-case> 2019 Shared Tasks on Quality Estimation</title>
-      <author><first>Erick</first><last>Fonseca</last></author>
+      <author id="erick-fonseca"><first>Erick</first><last>Fonseca</last></author>
       <author><first>Lisa</first><last>Yankovskaya</last></author>
       <author><first>André F. T.</first><last>Martins</last></author>
       <author><first>Mark</first><last>Fishel</last></author>
@@ -12068,7 +12068,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Cristian</first><last>Grozea</last></author>
       <author><first>Antonio</first><last>Jimeno Yepes</last></author>
       <author><first>Madeleine</first><last>Kittner</last></author>
-      <author><first>Martin</first><last>Krallinger</last></author>
+      <author id="martin-krallinger"><first>Martin</first><last>Krallinger</last></author>
       <author><first>Nancy</first><last>Mah</last></author>
       <author><first>Aurelie</first><last>Neveol</last></author>
       <author><first>Mariana</first><last>Neves</last></author>
@@ -12085,7 +12085,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="4">
       <title>Findings of the <fixed-case>WMT</fixed-case> 2019 Shared Task on Parallel Corpus Filtering for Low-Resource Conditions</title>
       <author><first>Philipp</first><last>Koehn</last></author>
-      <author><first>Francisco</first><last>Guzmán</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzmán</last></author>
       <author><first>Vishrav</first><last>Chaudhary</last></author>
       <author><first>Juan</first><last>Pino</last></author>
       <pages>54–72</pages>
@@ -12096,7 +12096,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     </paper>
     <paper id="5">
       <title><fixed-case>RTM</fixed-case> Stacking Results for Machine Translation Performance Prediction</title>
-      <author><first>Ergun</first><last>Biçici</last></author>
+      <author id="ergun-bicici"><first>Ergun</first><last>Biçici</last></author>
       <pages>73–77</pages>
       <abstract>We obtain new results using referential translation machines with increased number of learning models in the set of results that are stacked to obtain a better mixture of experts prediction. We combine features extracted from the word-level predictions with the sentence- or document-level features, which significantly improve the results on the training sets but decrease the test set results.</abstract>
       <url hash="da2d0ec4">W19-5405</url>
@@ -12105,12 +12105,12 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     </paper>
     <paper id="6">
       <title>Unbabel’s Participation in the <fixed-case>WMT</fixed-case>19 Translation Quality Estimation Shared Task</title>
-      <author><first>Fabio</first><last>Kepler</last></author>
+      <author id="fabio-kepler"><first>Fabio</first><last>Kepler</last></author>
       <author><first>Jonay</first><last>Trénous</last></author>
       <author><first>Marcos</first><last>Treviso</last></author>
       <author><first>Miguel</first><last>Vera</last></author>
       <author><first>António</first><last>Góis</last></author>
-      <author><first>M. Amin</first><last>Farajian</last></author>
+      <author id="m-amin-farajian"><first>M. Amin</first><last>Farajian</last></author>
       <author><first>António V.</first><last>Lopes</last></author>
       <author><first>André F. T.</first><last>Martins</last></author>
       <pages>78–84</pages>
@@ -12186,7 +12186,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="13">
       <title>Unbabel’s Submission to the <fixed-case>WMT</fixed-case>2019 <fixed-case>APE</fixed-case> Shared Task: <fixed-case>BERT</fixed-case>-Based Encoder-Decoder for Automatic Post-Editing</title>
       <author><first>António V.</first><last>Lopes</last></author>
-      <author><first>M. Amin</first><last>Farajian</last></author>
+      <author id="m-amin-farajian"><first>M. Amin</first><last>Farajian</last></author>
       <author><first>Gonçalo M.</first><last>Correia</last></author>
       <author><first>Jonay</first><last>Trénous</last></author>
       <author><first>André F. T.</first><last>Martins</last></author>
@@ -12202,7 +12202,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Hongfei</first><last>Xu</last></author>
       <author><first>Nico</first><last>Herbig</last></author>
       <author><first>Antonio</first><last>Krüger</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>124–131</pages>
       <abstract>In this paper we present an English–German Automatic Post-Editing (APE) system called transference, submitted to the APE Task organized at WMT 2019. Our transference model is based on a multi-encoder transformer architecture. Unlike previous approaches, it (i) uses a transformer encoder block for src, (ii) followed by a transformer decoder block, but without masking, for self-attention on mt, which effectively acts as second encoder combining src –&gt; mt, and (iii) feeds this representation into a final decoder block generating pe. Our model improves over the raw black-box neural machine translation system by 0.9 and 1.0 absolute BLEU points on the WMT 2019 APE development and test set. Our submission ranked 3rd, however compared to the two top systems, performance differences are not statistically significant.</abstract>
       <url hash="5dd77790">W19-5414</url>
@@ -12235,7 +12235,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title><fixed-case>U</fixed-case>d<fixed-case>S</fixed-case> Submission for the <fixed-case>WMT</fixed-case> 19 Automatic Post-Editing Task</title>
       <author><first>Hongfei</first><last>Xu</last></author>
       <author><first>Qiuhui</first><last>Liu</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>145–150</pages>
       <abstract>In this paper, we describe our submission to the English-German APE shared task at WMT 2019. We utilize and adapt an NMT architecture originally developed for exploiting context information to APE, implement this in our own transformer model and explore joint training of the APE task with a de-noising encoder.</abstract>
       <url hash="5c155e69">W19-5417</url>
@@ -12247,8 +12247,8 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title>Terminology-Aware Segmentation and Domain Feature for the <fixed-case>WMT</fixed-case>19 Biomedical Translation Task</title>
       <author><first>Casimiro Pio</first><last>Carrino</last></author>
       <author><first>Bardia</first><last>Rafieian</last></author>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
-      <author><first>José A. R.</first><last>Fonollosa</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="jose-a-r-fonollosa"><first>José A. R.</first><last>Fonollosa</last></author>
       <pages>151–155</pages>
       <abstract>In this work, we give a description of the TALP-UPC systems submitted for the WMT19 Biomedical Translation Task. Our proposed strategy is NMT model-independent and relies only on one ingredient, a biomedical terminology list. We first extracted such a terminology list by labelling biomedical words in our training dataset using the BabelNet API. Then, we designed a data preparation strategy to insert the terms information at a token level. Finally, we trained the Transformer model with this terms-informed data. Our best-submitted system ranked 2nd and 3rd for Spanish-English and English-Spanish translation directions, respectively.</abstract>
       <url hash="7e798e9a">W19-5418</url>
@@ -12294,7 +12294,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="22">
       <title><fixed-case>BSC</fixed-case> Participation in the <fixed-case>WMT</fixed-case> Translation of Biomedical Abstracts</title>
       <author><first>Felipe</first><last>Soares</last></author>
-      <author><first>Martin</first><last>Krallinger</last></author>
+      <author id="martin-krallinger"><first>Martin</first><last>Krallinger</last></author>
       <pages>175–178</pages>
       <abstract>This paper describes the machine translation systems developed by the Barcelona Supercomputing (BSC) team for the biomedical translation shared task of WMT19. Our system is based on Neural Machine Translation unsing the OpenNMT-py toolkit and Transformer architecture. We participated in four translation directions for the English/Spanish and English/Portuguese language pairs. To create our training data, we concatenated several parallel corpora, both from in-domain and out-of-domain sources, as well as terminological resources from UMLS.</abstract>
       <url hash="051821bc">W19-5422</url>
@@ -12306,7 +12306,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Pau</first><last>Baquero-Arnal</last></author>
       <author><first>Javier</first><last>Iranzo-Sánchez</last></author>
       <author><first>Jorge</first><last>Civera</last></author>
-      <author><first>Alfons</first><last>Juan</last></author>
+      <author id="alfons-juan"><first>Alfons</first><last>Juan</last></author>
       <pages>179–184</pages>
       <abstract>This paper describes the participation of the MLLP research group of the Universitat Politècnica de València in the WMT 2019 Similar Language Translation Shared Task. We have submitted systems for the Portuguese ↔ Spanish language pair, in both directions. We have submitted systems based on the Transformer architecture as well as an in development novel architecture which we have called 2D alternating RNN. We have carried out domain adaptation through fine-tuning.</abstract>
       <url hash="bbd6d7cf">W19-5423</url>
@@ -12317,7 +12317,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title>The <fixed-case>TALP</fixed-case>-<fixed-case>UPC</fixed-case> System for the <fixed-case>WMT</fixed-case> Similar Language Task: Statistical vs Neural Machine Translation</title>
       <author><first>Magdalena</first><last>Biesialska</last></author>
       <author><first>Lluis</first><last>Guardia</last></author>
-      <author><first>Marta R.</first><last>Costa-jussà</last></author>
+      <author id="marta-r-costa-jussa"><first>Marta R.</first><last>Costa-jussà</last></author>
       <pages>185–191</pages>
       <abstract>Although the problem of similar language translation has been an area of research interest for many years, yet it is still far from being solved. In this paper, we study the performance of two popular approaches: statistical and neural. We conclude that both methods yield similar results; however, the performance varies depending on the language pair. While the statistical approach outperforms the neural one by a difference of 6 BLEU points for the Spanish-Portuguese language pair, the proposed neural model surpasses the statistical one by a difference of 2 BLEU points for Czech-Polish. In the former case, the language similarity (based on perplexity) is much higher than in the latter case. Additionally, we report negative results for the system combination with back-translation. Our TALP-UPC system submission won 1st place for Czech-&gt;Polish and 2nd place for Spanish-&gt;Portuguese in the official evaluation of the 1st WMT Similar Language Translation task.</abstract>
       <url hash="1eb6be08">W19-5424</url>
@@ -12337,7 +12337,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="26">
       <title>Utilizing Monolingual Data in <fixed-case>NMT</fixed-case> for Similar Languages: Submission to Similar Language Translation Task</title>
       <author><first>Jyotsana</first><last>Khatri</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>197–201</pages>
       <abstract>This paper describes our submission to Shared Task on Similar Language Translation in Fourth Conference on Machine Translation (WMT 2019). We submitted three systems for Hindi -&gt; Nepali direction in which we have examined the performance of a RNN based NMT system, a semi-supervised NMT system where monolingual data of both languages is utilized using the architecture by and a system trained with extra synthetic sentences generated using copy of source and target sentences without using any additional monolingual data.</abstract>
       <url hash="8dedfdd7">W19-5426</url>
@@ -12348,7 +12348,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title>Neural Machine Translation: <fixed-case>H</fixed-case>indi-<fixed-case>N</fixed-case>epali</title>
       <author><first>Sahinur Rahman</first><last>Laskar</last></author>
       <author><first>Partha</first><last>Pakray</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>202–207</pages>
       <abstract>With the extensive use of Machine Translation (MT) technology, there is progressively interest in directly translating between pairs of similar languages. Because the main challenge is to overcome the limitation of available parallel data to produce a precise MT output. Current work relies on the Neural Machine Translation (NMT) with attention mechanism for the similar language translation of WMT19 shared task in the context of Hindi-Nepali pair. The NMT systems trained the Hindi-Nepali parallel corpus and tested, analyzed in Hindi ⇔ Nepali translation. The official result declared at WMT19 shared task, which shows that our NMT system obtained Bilingual Evaluation Understudy (BLEU) score 24.6 for primary configuration in Nepali to Hindi translation. Also, we have achieved BLEU score 53.7 (Hindi to Nepali) and 49.1 (Nepali to Hindi) in contrastive system type.</abstract>
       <url hash="d3f990f2">W19-5427</url>
@@ -12368,7 +12368,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     </paper>
     <paper id="29">
       <title>Panlingua-<fixed-case>KMI</fixed-case> <fixed-case>MT</fixed-case> System for Similar Language Translation Task at <fixed-case>WMT</fixed-case> 2019</title>
-      <author><first>Atul Kr.</first><last>Ojha</last></author>
+      <author id="atul-kr-ojha"><first>Atul Kr.</first><last>Ojha</last></author>
       <author><first>Ritesh</first><last>Kumar</last></author>
       <author><first>Akanksha</first><last>Bansal</last></author>
       <author><first>Priya</first><last>Rani</last></author>
@@ -12382,7 +12382,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title><fixed-case>UDS</fixed-case>–<fixed-case>DFKI</fixed-case> Submission to the <fixed-case>WMT</fixed-case>2019 <fixed-case>C</fixed-case>zech–<fixed-case>P</fixed-case>olish Similar Language Translation Shared Task</title>
       <author><first>Santanu</first><last>Pal</last></author>
       <author><first>Marcos</first><last>Zampieri</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>219–223</pages>
       <abstract>In this paper we present the UDS-DFKI system submitted to the Similar Language Translation shared task at WMT 2019. The first edition of this shared task featured data from three pairs of similar languages: Czech and Polish, Hindi and Nepali, and Portuguese and Spanish. Participants could choose to participate in any of these three tracks and submit system outputs in any translation direction. We report the results obtained by our system in translating from Czech to Polish and comment on the impact of out-of-domain test data in the performance of our system. UDS-DFKI achieved competitive performance ranking second among ten teams in Czech to Polish translation.</abstract>
       <url hash="de5781b2">W19-5430</url>
@@ -12436,7 +12436,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title>Low-Resource Corpus Filtering Using Multilingual Sentence Embeddings</title>
       <author><first>Vishrav</first><last>Chaudhary</last></author>
       <author><first>Yuqing</first><last>Tang</last></author>
-      <author><first>Francisco</first><last>Guzmán</last></author>
+      <author id="francisco-guzman"><first>Francisco</first><last>Guzmán</last></author>
       <author><first>Holger</first><last>Schwenk</last></author>
       <author><first>Philipp</first><last>Koehn</last></author>
       <pages>261–266</pages>
@@ -12457,7 +12457,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     </paper>
     <paper id="37">
       <title>Webinterpret Submission to the <fixed-case>WMT</fixed-case>2019 Shared Task on Parallel Corpus Filtering</title>
-      <author><first>Jesús</first><last>González-Rubio</last></author>
+      <author id="jesus-gonzalez-rubio"><first>Jesús</first><last>González-Rubio</last></author>
       <pages>271–276</pages>
       <abstract>This document describes the participation of Webinterpret in the shared task on parallel corpus filtering at the Fourth Conference on Machine Translation (WMT 2019). Here, we describe the main characteristics of our approach and discuss the results obtained on the data sets published for the shared task.</abstract>
       <url hash="ac9c7227">W19-5437</url>
@@ -12477,8 +12477,8 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="39">
       <title>Filtering of Noisy Parallel Corpora Based on Hypothesis Generation</title>
       <author><first>Zuzanna</first><last>Parcheta</last></author>
-      <author><first>Germán</first><last>Sanchis-Trilles</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="german-sanchis-trilles"><first>Germán</first><last>Sanchis-Trilles</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <pages>282–288</pages>
       <abstract>The filtering task of noisy parallel corpora in WMT2019 aims to challenge participants to create filtering methods to be useful for training machine translation systems. In this work, we introduce a noisy parallel corpora filtering system based on generating hypotheses by means of a translation model. We train translation models in both language pairs: Nepali–English and Sinhala–English using provided parallel corpora. We select the training subset for three language pairs (Nepali, Sinhala and Hindi to English) jointly using bilingual cross-entropy selection to create the best possible translation model for both language pairs. Once the translation models are trained, we translate the noisy corpora and generate a hypothesis for each sentence pair. We compute the smoothed BLEU score between the target sentence and generated hypothesis. In addition, we apply several rules to discard very noisy or inadequate sentences which can lower the translation score. These heuristics are based on sentence length, source and target similarity and source language detection. We compare our results with the baseline published on the shared task website, which uses the Zipporah model, over which we achieve significant improvements in one of the conditions in the shared task. The designed filtering system is domain independent and all experiments are conducted using neural machine translation.</abstract>
       <url hash="41542ede">W19-5439</url>
@@ -12489,7 +12489,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title>Parallel Corpus Filtering Based on Fuzzy String Matching</title>
       <author><first>Sukanta</first><last>Sen</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>289–293</pages>
       <abstract>In this paper, we describe the IIT Patna’s submission to WMT 2019 shared task on parallel corpus filtering. This shared task asks the participants to develop methods for scoring each parallel sentence from a given noisy parallel corpus. Quality of the scoring method is judged based on the quality of SMT and NMT systems trained on smaller set of high-quality parallel sentences sub-sampled from the original noisy corpus. This task has two language pairs. We submit for both the Nepali-English and Sinhala-English language pairs. We define fuzzy string matching score between English and the translated (into English) source based on Levenshtein distance. Based on the scores, we sub-sample two sets (having 1 million and 5 millions English tokens) of parallel sentences from each parallel corpus, and train SMT systems for development purpose only. The organizers publish the official evaluation using both SMT and NMT on the final official test set. Total 10 teams participated in the shared task and according the official evaluation, our scoring method obtains 2nd position in the team ranking for 1-million NepaliEnglish NMT and 5-million Sinhala-English NMT categories.</abstract>
       <url hash="8c908ac3">W19-5440</url>
@@ -12500,7 +12500,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title>The <fixed-case>U</fixed-case>niversity of <fixed-case>H</fixed-case>elsinki Submission to the <fixed-case>WMT</fixed-case>19 Parallel Corpus Filtering Task</title>
       <author><first>Raúl</first><last>Vázquez</last></author>
       <author><first>Umut</first><last>Sulubacak</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>294–300</pages>
       <abstract>This paper describes the University of Helsinki Language Technology group’s participation in the WMT 2019 parallel corpus filtering task. Our scores were produced using a two-step strategy. First, we individually applied a series of filters to remove the ‘bad’ quality sentences. Then, we produced scores for each sentence by weighting these features with a classification model. This methodology allowed us to build a simple and reliable system that is easily adaptable to other language pairs.</abstract>
       <url hash="ec937e25">W19-5441</url>
@@ -12541,7 +12541,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Abhishek</first><last>Sainani</last></author>
       <author><first>Nitin</first><last>Ramrakhiyani</last></author>
       <author><first>Sachin</first><last>Pawar</last></author>
-      <author><first>Girish K</first><last>Palshikar</last></author>
+      <author id="girish-palshikar"><first>Girish K</first><last>Palshikar</last></author>
       <author><first>Smita</first><last>Ghaisas</last></author>
       <pages>8–13</pages>
       <url hash="d05179e4">W19-5502</url>
@@ -12550,7 +12550,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="3">
       <title>Rationale Classification for Educational Trading Platforms</title>
       <author><first>Annie</first><last>Ying</last></author>
-      <author><first>Pablo</first><last>Duboue</last></author>
+      <author id="pablo-duboue"><first>Pablo</first><last>Duboue</last></author>
       <pages>14–20</pages>
       <url hash="94bb56a4">W19-5503</url>
       <bibkey>ying-duboue-2019-rationale</bibkey>
@@ -12598,7 +12598,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title>Learning to Learn Sales Prediction with Social Media Sentiment</title>
       <author><first>Zhaojiang</first><last>Lin</last></author>
       <author><first>Andrea</first><last>Madotto</last></author>
-      <author><first>Genta Indra</first><last>Winata</last></author>
+      <author id="genta-indra-winata"><first>Genta Indra</first><last>Winata</last></author>
       <author><first>Zihan</first><last>Liu</last></author>
       <author><first>Yan</first><last>Xu</last></author>
       <author><first>Cong</first><last>Gao</last></author>
@@ -12711,7 +12711,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="20">
       <title><fixed-case>HITS</fixed-case>-<fixed-case>SBD</fixed-case> at the <fixed-case>F</fixed-case>in<fixed-case>SBD</fixed-case> Task: Machine Learning vs. Rule-based Sentence Boundary Detection</title>
       <author><first>Mehwish</first><last>Fatima</last></author>
-      <author><first>Mark-Christoph</first><last>Mueller</last></author>
+      <author id="mark-christoph-muller"><first>Mark-Christoph</first><last>Mueller</last></author>
       <pages>115–121</pages>
       <url hash="7b94237d">W19-5520</url>
       <bibkey>fatima-mueller-2019-hits</bibkey>
@@ -12721,7 +12721,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Mingyu</first><last>Wan</last></author>
       <author><first>Rong</first><last>Xiang</last></author>
       <author><first>Emmanuele</first><last>Chersoni</last></author>
-      <author><first>Natalia</first><last>Klyueva</last></author>
+      <author id="natalia-klyueva"><first>Natalia</first><last>Klyueva</last></author>
       <author><first>Kathleen</first><last>Ahrens</last></author>
       <author><first>Bin</first><last>Miao</last></author>
       <author><first>David</first><last>Broadstock</last></author>
@@ -12747,7 +12747,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <url hash="17f1bbec">W19-56</url>
       <editor><first>Mahmoud</first><last>El-Haj</last></editor>
       <editor><first>Paul</first><last>Rayson</last></editor>
-      <editor><first>Eric</first><last>Atwell</last></editor>
+      <editor id="eric-atwell"><first>Eric</first><last>Atwell</last></editor>
       <editor><first>Lama</first><last>Alsudias</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Cardiff, United Kingdom</address>
@@ -12829,7 +12829,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="9">
       <title>Crisis Detection from <fixed-case>A</fixed-case>rabic Tweets</title>
       <author><first>Alaa</first><last>Alharbi</last></author>
-      <author><first>Mark</first><last>Lee</last></author>
+      <author id="mark-lee"><first>Mark</first><last>Lee</last></author>
       <pages>72–79</pages>
       <url hash="f16b72dc">W19-5609</url>
       <bibkey>alharbi-lee-2019-crisis</bibkey>
@@ -12837,7 +12837,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="10">
       <title>The Design of the <fixed-case>S</fixed-case>au<fixed-case>LTC</fixed-case> application for the <fixed-case>E</fixed-case>nglish-<fixed-case>A</fixed-case>rabic Learner Translation Corpus</title>
       <author><first>Maha</first><last>Al-Harthi</last></author>
-      <author><first>Amal</first><last>Alsaif</last></author>
+      <author id="amal-al-saif"><first>Amal</first><last>Alsaif</last></author>
       <pages>80–88</pages>
       <url hash="521444a4">W19-5610</url>
       <bibkey>al-harthi-alsaif-2019-design</bibkey>
@@ -12964,10 +12964,10 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <meta>
       <booktitle>Proceedings of the 5th Workshop on Semantic Deep Learning (SemDeep-5)</booktitle>
       <url hash="8db96abc">W19-58</url>
-      <editor><first>Luis</first><last>Espinosa-Anke</last></editor>
+      <editor id="luis-espinosa-anke"><first>Luis</first><last>Espinosa-Anke</last></editor>
       <editor><first>Thierry</first><last>Declerck</last></editor>
       <editor><first>Dagmar</first><last>Gromann</last></editor>
-      <editor><first>Jose</first><last>Camacho-Collados</last></editor>
+      <editor id="jose-camacho-collados"><first>Jose</first><last>Camacho-Collados</last></editor>
       <editor><first>Mohammad Taher</first><last>Pilehvar</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Macau, China</address>
@@ -12982,14 +12982,14 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="1">
       <title><fixed-case>LIAAD</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>D</fixed-case>eep-5 Challenge: Word-in-Context (<fixed-case>W</fixed-case>i<fixed-case>C</fixed-case>)</title>
       <author><first>Daniel</first><last>Loureiro</last></author>
-      <author><first>Alípio</first><last>Jorge</last></author>
+      <author id="alipio-jorge"><first>Alípio</first><last>Jorge</last></author>
       <pages>1–5</pages>
       <url hash="f7a7de80">W19-5801</url>
       <bibkey>loureiro-jorge-2019-liaad</bibkey>
     </paper>
     <paper id="2">
       <title><fixed-case>LIMSI</fixed-case>-<fixed-case>MULTISEM</fixed-case> at the <fixed-case>IJCAI</fixed-case> <fixed-case>S</fixed-case>em<fixed-case>D</fixed-case>eep-5 <fixed-case>W</fixed-case>i<fixed-case>C</fixed-case> Challenge: Context Representations for Word Usage Similarity Estimation</title>
-      <author><first>Aina</first><last>Garí Soler</last></author>
+      <author id="aina-gari-soler"><first>Aina</first><last>Garí Soler</last></author>
       <author><first>Marianna</first><last>Apidianaki</last></author>
       <author><first>Alexandre</first><last>Allauzen</last></author>
       <pages>6–11</pages>
@@ -13027,9 +13027,9 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="6">
       <title>Extending Neural Question Answering with Linguistic Input Features</title>
       <author><first>Fabian</first><last>Hommel</last></author>
-      <author><first>Philipp</first><last>Cimiano</last></author>
+      <author id="philipp-cimiano"><first>Philipp</first><last>Cimiano</last></author>
       <author><first>Matthias</first><last>Orlikowski</last></author>
-      <author><first>Matthias</first><last>Hartung</last></author>
+      <author id="matthias-hartung"><first>Matthias</first><last>Hartung</last></author>
       <pages>31–39</pages>
       <url hash="51564d97">W19-5806</url>
       <bibkey>hommel-etal-2019-extending</bibkey>
@@ -13040,7 +13040,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Valerio</first><last>Piccioni</last></author>
       <author><first>Vevake</first><last>Balaraman</last></author>
       <author><first>Marco</first><last>Guerini</last></author>
-      <author><first>Bernardo</first><last>Magnini</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
       <pages>40–49</pages>
       <url hash="93e7690f">W19-5807</url>
       <bibkey>magnolini-etal-2019-use</bibkey>
@@ -13069,11 +13069,11 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <booktitle>Proceedings of the 20th Annual SIGdial Meeting on Discourse and Dialogue</booktitle>
       <url hash="20ed9820">W19-59</url>
       <editor><first>Satoshi</first><last>Nakamura</last></editor>
-      <editor><first>Milica</first><last>Gasic</last></editor>
+      <editor id="milica-gasic"><first>Milica</first><last>Gasic</last></editor>
       <editor><first>Ingrid</first><last>Zukerman</last></editor>
       <editor><first>Gabriel</first><last>Skantze</last></editor>
       <editor><first>Mikio</first><last>Nakano</last></editor>
-      <editor><first>Alexandros</first><last>Papangelis</last></editor>
+      <editor id="alexandros-papangelis"><first>Alexandros</first><last>Papangelis</last></editor>
       <editor><first>Stefan</first><last>Ultes</last></editor>
       <editor><first>Koichiro</first><last>Yoshino</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -13120,7 +13120,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="4">
       <title>Few-Shot Dialogue Generation Without Annotated Data: A Transfer Learning Approach</title>
       <author><first>Igor</first><last>Shalyminov</last></author>
-      <author><first>Sungjin</first><last>Lee</last></author>
+      <author id="sungjin-lee"><first>Sungjin</first><last>Lee</last></author>
       <author><first>Arash</first><last>Eshghi</last></author>
       <author><first>Oliver</first><last>Lemon</last></author>
       <pages>32–39</pages>
@@ -13133,7 +13133,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title><fixed-case>SIM</fixed-case>: A Slot-Independent Neural Model for Dialogue State Tracking</title>
       <author><first>Chenguang</first><last>Zhu</last></author>
       <author><first>Michael</first><last>Zeng</last></author>
-      <author><first>Xuedong</first><last>Huang</last></author>
+      <author id="xuedong-huang"><first>Xuedong</first><last>Huang</last></author>
       <pages>40–45</pages>
       <abstract>Dialogue state tracking is an important component in task-oriented dialogue systems to identify users’ goals and requests as a dialogue proceeds. However, as most previous models are dependent on dialogue slots, the model complexity soars when the number of slots increases. In this paper, we put forward a slot-independent neural model (SIM) to track dialogue states while keeping the model complexity invariant to the number of dialogue slots. The model utilizes attention mechanisms between user utterance and system actions. SIM achieves state-of-the-art results on WoZ and DSTC2 tasks, with only 20% of the model size of previous models.</abstract>
       <url hash="14a38c72">W19-5905</url>
@@ -13185,7 +13185,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title>Capturing Dialogue State Variable Dependencies with an Energy-based Neural Dialogue State Tracker</title>
       <author><first>Anh Duong</first><last>Trinh</last></author>
       <author><first>Robert J.</first><last>Ross</last></author>
-      <author><first>John D.</first><last>Kelleher</last></author>
+      <author id="john-kelleher"><first>John D.</first><last>Kelleher</last></author>
       <pages>75–84</pages>
       <abstract>Dialogue state tracking requires the population and maintenance of a multi-slot frame representation of the dialogue state. Frequently, dialogue state tracking systems assume independence between slot values within a frame. In this paper we argue that treating the prediction of each slot value as an independent prediction task may ignore important associations between the slot values, and, consequently, we argue that treating dialogue state tracking as a structured prediction problem can help to improve dialogue state tracking performance. To support this argument, the research presented in this paper is structured into three stages: (i) analyzing variable dependencies in dialogue data; (ii) applying an energy-based methodology to model dialogue state tracking as a structured prediction task; and (iii) evaluating the impact of inter-slot relationships on model performance. Overall we demonstrate that modelling the associations between target slots with an energy-based formalism improves dialogue state tracking performance in a number of ways.</abstract>
       <url hash="6f90986d">W19-5910</url>
@@ -13195,7 +13195,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="11">
       <title>Leveraging Non-Conversational Tasks for Low Resource Slot Filling: Does it help?</title>
       <author><first>Samuel</first><last>Louvan</last></author>
-      <author><first>Bernardo</first><last>Magnini</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
       <pages>85–91</pages>
       <abstract>Slot filling is a core operation for utterance understanding in task-oriented dialogue systems. Slots are typically domain-specific, and adding new domains to a dialogue system involves data and time-intensive processes. A popular technique to address the problem is transfer learning, where it is assumed the availability of a large slot filling dataset for the source domain, to be used to help slot filling on the target domain, with fewer data. In this work, instead, we propose to leverage source tasks based on semantically related non-conversational resources (e.g., semantic sequence tagging datasets), as they are both cheaper to obtain and reusable to several slot filling domains. We show that using auxiliary non-conversational tasks in a multi-task learning setup consistently improves low resource slot filling performance.</abstract>
       <url hash="655fa148">W19-5911</url>
@@ -13207,7 +13207,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Alexandros</first><last>Papangelis</last></author>
       <author><first>Yi-Chia</first><last>Wang</last></author>
       <author><first>Piero</first><last>Molino</last></author>
-      <author><first>Gokhan</first><last>Tur</last></author>
+      <author id="gokhan-tur"><first>Gokhan</first><last>Tur</last></author>
       <pages>92–102</pages>
       <abstract>Some of the major challenges in training conversational agents include the lack of large-scale data of real-world complexity, defining appropriate evaluation measures, and managing meaningful conversations across many topics over long periods of time. Moreover, most works tend to assume that the conversational agent’s environment is stationary, a somewhat strong assumption. To remove this assumption and overcome the lack of data, we take a step away from the traditional training pipeline and model the conversation as a stochastic collaborative game. Each agent (player) has a role (“assistant”, “tourist”, “eater”, etc.) and their own objectives, and can only interact via language they generate. Each agent, therefore, needs to learn to operate optimally in an environment with multiple sources of uncertainty (its own LU and LG, the other agent’s LU, Policy, and LG). In this work, we present the first complete attempt at concurrently training conversational agents that communicate only via self-generated language and show that they outperform supervised and deep learning baselines.</abstract>
       <url hash="a46925d2">W19-5912</url>
@@ -13227,7 +13227,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     </paper>
     <paper id="14">
       <title>Spoken Conversational Search for General Knowledge</title>
-      <author><first>Lina M.</first><last>Rojas Barahona</last></author>
+      <author id="lina-m-rojas-barahona"><first>Lina M.</first><last>Rojas Barahona</last></author>
       <author><first>Pascal</first><last>Bellec</last></author>
       <author><first>Benoit</first><last>Besset</last></author>
       <author><first>Martinho</first><last>Dossantos</last></author>
@@ -13235,7 +13235,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Munshi</first><last>Asadullah</last></author>
       <author><first>Olivier</first><last>Leblouch</last></author>
       <author><first>Jeanyves.</first><last>Lancien</last></author>
-      <author><first>Geraldine</first><last>Damnati</last></author>
+      <author id="geraldine-damnati"><first>Geraldine</first><last>Damnati</last></author>
       <author><first>Emmanuel</first><last>Mory</last></author>
       <author><first>Frederic</first><last>Herledan</last></author>
       <pages>110–113</pages>
@@ -13249,7 +13249,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Jean-Leon</first><last>Bouraoui</last></author>
       <author><first>Sonia</first><last>Le Meitour</last></author>
       <author><first>Romain</first><last>Carbou</last></author>
-      <author><first>Lina M.</first><last>Rojas Barahona</last></author>
+      <author id="lina-m-rojas-barahona"><first>Lina M.</first><last>Rojas Barahona</last></author>
       <author><first>Vincent</first><last>Lemaire</last></author>
       <pages>114–117</pages>
       <abstract>We present Graph2Bots, a tool for assisting conversational agent designers. It extracts a graph representation from human-human conversations by using unsupervised learning. The generated graph contains the main stages of the dialogue and their inner transitions. The graphical user interface (GUI) then allows graph editing.</abstract>
@@ -13273,7 +13273,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Semih</first><last>Yavuz</last></author>
       <author><first>Abhinav</first><last>Rastogi</last></author>
       <author><first>Guan-Lin</first><last>Chao</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></author>
       <pages>122–132</pages>
       <abstract>Recent advances in neural sequence-to-sequence models have led to promising results for several language generation-based tasks, including dialogue response generation, summarization, and machine translation. However, these models are known to have several problems, especially in the context of chit-chat based dialogue systems: they tend to generate short and dull responses that are often too generic. Furthermore, these models do not ground conversational responses on knowledge and facts, resulting in turns that are not accurate, informative and engaging for the users. In this paper, we propose and experiment with a series of response generation models that aim to serve in the general scenario where in addition to the dialogue context, relevant unstructured external knowledge in the form of text is also assumed to be available for models to harness. Our proposed approach extends pointer-generator networks (See et al., 2017) by allowing the decoder to hierarchically attend and copy from external knowledge in addition to the dialogue context. We empirically show the effectiveness of the proposed model compared to several baselines including (Ghazvininejadet al., 2018; Zhang et al., 2018) through both automatic evaluation metrics and human evaluation on ConvAI2 dataset.</abstract>
       <url hash="42f490b9">W19-5917</url>
@@ -13298,7 +13298,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Xinnuo</first><last>Xu</last></author>
       <author><first>Yizhe</first><last>Zhang</last></author>
       <author><first>Lars</first><last>Liden</last></author>
-      <author><first>Sungjin</first><last>Lee</last></author>
+      <author id="sungjin-lee"><first>Sungjin</first><last>Lee</last></author>
       <pages>143–154</pages>
       <abstract>Although the data-driven approaches of some recent bot building platforms make it possible for a wide range of users to easily create dialogue systems, those platforms don’t offer tools for quickly identifying which log dialogues contain problems. This is important since corrections to log dialogues provide a means to improve performance after deployment. A log dialogue ranker, which ranks problematic dialogues higher, is an essential tool due to the sheer volume of log dialogues that could be generated. However, training a ranker typically requires labelling a substantial amount of data, which is not feasible for most users. In this paper, we present a novel unsupervised approach for dialogue ranking using GANs and release a corpus of labelled dialogues for evaluation and comparison with supervised methods. The evaluation result shows that our method compares favorably to supervised methods without any labelled data.</abstract>
       <url hash="ec4a1298">W19-5919</url>
@@ -13336,7 +13336,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Hu</first><last>Xu</last></author>
       <author><first>Bing</first><last>Liu</last></author>
       <author><first>Huaixiu</first><last>Zheng</last></author>
-      <author><first>Gokhan</first><last>Tur</last></author>
+      <author id="gokhan-tur"><first>Gokhan</first><last>Tur</last></author>
       <pages>178–187</pages>
       <abstract>This paper proposes a novel end-to-end architecture for task-oriented dialogue systems. It is based on a simple and practical yet very effective sequence-to-sequence approach, where language understanding and state tracking tasks are modeled jointly with a structured copy-augmented sequential decoder and a multi-label decoder for each slot. The policy engine and language generation tasks are modeled jointly following that. The copy-augmented sequential decoder deals with new or unknown values in the conversation, while the multi-label decoder combined with the sequential decoder ensures the explicit assignment of values to slots. On the generation part, slot binary classifiers are used to improve performance. This architecture is scalable to real-world scenarios and is shown through an empirical evaluation to achieve state-of-the-art performance on both the Cambridge Restaurant dataset and the Stanford in-car assistant dataset.</abstract>
       <url hash="cec9294c">W19-5922</url>
@@ -13346,7 +13346,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="23">
       <title><fixed-case>F</fixed-case>riends<fixed-case>QA</fixed-case>: Open-Domain Question Answering on <fixed-case>TV</fixed-case> Show Transcripts</title>
       <author><first>Zhengzhe</first><last>Yang</last></author>
-      <author><first>Jinho D.</first><last>Choi</last></author>
+      <author id="jinho-d-choi"><first>Jinho D.</first><last>Choi</last></author>
       <pages>188–197</pages>
       <abstract>This paper presents FriendsQA, a challenging question answering dataset that contains 1,222 dialogues and 10,610 open-domain questions, to tackle machine comprehension on everyday conversations. Each dialogue, involving multiple speakers, is annotated with several types of questions regarding the dialogue contexts, and the answers are annotated with certain spans in the dialogue. A series of crowdsourcing tasks are conducted to ensure good annotation quality, resulting a high inter-annotator agreement of 81.82%. A comprehensive annotation analytics is provided for a deeper understanding in this dataset. Three state-of-the-art QA systems are experimented, R-Net, QANet, and BERT, and evaluated on this dataset. BERT in particular depicts promising results, an accuracy of 74.2% for answer utterance selection and an F1-score of 64.2% for answer span selection, suggesting that the FriendsQA task is hard yet has a great potential of elevating QA research on multiparty dialogue to another level.</abstract>
       <url hash="8794882f">W19-5923</url>
@@ -13355,7 +13355,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     </paper>
     <paper id="24">
       <title>Foundations of Collaborative Task-Oriented Dialogue: What’s in a Slot?</title>
-      <author><first>Philip</first><last>Cohen</last></author>
+      <author id="philip-r-cohen"><first>Philip</first><last>Cohen</last></author>
       <pages>198–209</pages>
       <abstract>In this paper, we examine the foundations of task-oriented dialogues, in which systems are requested to perform tasks for humans. We argue that the way this dialogue task has been framed has limited its applicability to processing simple requests with atomic “slot-fillers”. However, real task-oriented dialogues can contain more complex utterances that provide non-atomic constraints on slot values. For example, in response to the system’s question “What time do you want me to reserve the restaurant?”, a user should be able to say “the earliest time available,” which cannot be handled by classic “intent + slots” approaches that do not incorporate expressive logical form meaning representations. Furthermore, situations for which it would be desirable to build task-oriented dialogue systems, e.g., to engage in mixed-initiative, collaborative or multiparty dialogues, will require a more general approach. In order to overcome these limitations and to provide such an approach, we give a logical analysis of the “intent+slot” dialogue setting using a modal logic of intention and including a more expansive notion of “dialogue state”. Finally, we briefly discuss our program of research to build a next generation of plan-based dialogue systems that goes beyond “intent + slots”.</abstract>
       <url hash="980cd673">W19-5924</url>
@@ -13379,7 +13379,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Guan-Lin</first><last>Chao</last></author>
       <author><first>Abhinav</first><last>Rastogi</last></author>
       <author><first>Semih</first><last>Yavuz</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></author>
       <author><first>Jindong</first><last>Chen</last></author>
       <author><first>Ian</first><last>Lane</last></author>
       <pages>215–225</pages>
@@ -13402,7 +13402,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title>A Quantitative Analysis of Patients’ Narratives of Heart Failure</title>
       <author><first>Sabita</first><last>Acharya</last></author>
       <author><first>Barbara</first><last>Di Eugenio</last></author>
-      <author><first>Andrew</first><last>Boyd</last></author>
+      <author id="andrew-boyd"><first>Andrew</first><last>Boyd</last></author>
       <author><first>Richard</first><last>Cameron</last></author>
       <author><first>Karen</first><last>Dunn Lopez</last></author>
       <author><first>Pamela</first><last>Martyn-Nemeth</last></author>
@@ -13421,7 +13421,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title><fixed-case>TDD</fixed-case>iscourse: A Dataset for Discourse-Level Temporal Ordering of Events</title>
       <author><first>Aakanksha</first><last>Naik</last></author>
       <author><first>Luke</first><last>Breitfeller</last></author>
-      <author><first>Carolyn</first><last>Rose</last></author>
+      <author id="carolyn-rose"><first>Carolyn</first><last>Rose</last></author>
       <pages>239–249</pages>
       <abstract>Prior work on temporal relation classification has focused extensively on event pairs in the same or adjacent sentences (local), paying scant attention to discourse-level (global) pairs. This restricts the ability of systems to learn temporal links between global pairs, since reliance on local syntactic features suffices to achieve reasonable performance on existing datasets. However, systems should be capable of incorporating cues from document-level structure to assign temporal relations. In this work, we take a first step towards discourse-level temporal ordering by creating TDDiscourse, the first dataset focusing specifically on temporal links between event pairs which are more than one sentence apart. We create TDDiscourse by augmenting TimeBank-Dense, a corpus of English news articles, manually annotating global pairs that cannot be inferred automatically from existing annotations. Our annotations double the number of temporal links in TimeBank-Dense, while possessing several desirable properties such as focusing on long-distance pairs and not being automatically inferable. We adapt and benchmark the performance of three state-of-the-art models on TDDiscourse and observe that existing systems indeed find discourse-level temporal ordering harder.</abstract>
       <url hash="d18f0008">W19-5929</url>
@@ -13431,7 +13431,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="30">
       <title>Real Life Application of a Question Answering System Using <fixed-case>BERT</fixed-case> Language Model</title>
       <author><first>Francesca</first><last>Alloatti</last></author>
-      <author><first>Luigi</first><last>Di Caro</last></author>
+      <author id="luigi-di-caro"><first>Luigi</first><last>Di Caro</last></author>
       <author><first>Gianpiero</first><last>Sportelli</last></author>
       <pages>250–253</pages>
       <abstract>It is often hard to apply the newest advances in research to real life scenarios. They usually require the resolution of some specific task applied to a restricted domain, all the while providing small amounts of data to begin with. In this study we apply one of the newest innovations in Deep Learning to a task of text classification. We created a question answering system in Italian that provides information about a specific subject, e-invoicing and digital billing. Italy recently introduced a new legislation about e-invoicing and people have some legit doubts, therefore a large share of professionals could benefit from this tool.</abstract>
@@ -13456,7 +13456,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Abhishek</first><last>Sethi</last></author>
       <author><first>Sanchit</first><last>Agarwal</last></author>
       <author><first>Tagyoung</first><last>Chung</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></author>
       <pages>264–273</pages>
       <abstract>Dialog state tracking is used to estimate the current belief state of a dialog given all the preceding conversation. Machine reading comprehension, on the other hand, focuses on building systems that read passages of text and answer questions that require some understanding of passages. We formulate dialog state tracking as a reading comprehension task to answer the question what is the state of the current dialog? after reading conversational context. In contrast to traditional state tracking methods where the dialog state is often predicted as a distribution over a closed set of all the possible slot values within an ontology, our method uses a simple attention-based neural network to point to the slot values within the conversation. Experiments on MultiWOZ-2.0 cross-domain dialog dataset show that our simple system can obtain similar accuracies compared to the previous more complex methods. By exploiting recent advances in contextual word embeddings, adding a model that explicitly tracks whether a slot value should be carried over to the next turn, and combining our method with a traditional joint state tracking method that relies on closed set vocabulary, we can obtain a joint-goal accuracy of 47.33% on the standard test split, exceeding current state-of-the-art by 11.75%**.</abstract>
       <url hash="e0bfd484">W19-5932</url>
@@ -13577,7 +13577,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title>A Dynamic Strategy Coach for Effective Negotiation</title>
       <author><first>Yiheng</first><last>Zhou</last></author>
       <author><first>He</first><last>He</last></author>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <author><first>Yulia</first><last>Tsvetkov</last></author>
       <pages>367–378</pages>
       <abstract>Negotiation is a complex activity involving strategic reasoning, persuasion, and psychology. An average person is often far from an expert in negotiation. Our goal is to assist humans to become better negotiators through a machine-in-the-loop approach that combines machine’s advantage at data-driven decision-making and human’s language generation ability. We consider a bargaining scenario where a seller and a buyer negotiate the price of an item for sale through a text-based dialogue. Our negotiation coach monitors messages between them and recommends strategies in real time to the seller to get a better deal (e.g., “reject the proposal and propose a price”, “talk about your personal experience with the product”). The best strategy largely depends on the context (e.g., the current price, the buyer’s attitude). Therefore, we first identify a set of negotiation strategies, then learn to predict the best strategy in a given dialogue context from a set of human-human bargaining dialogues. Evaluation on human-human dialogues shows that our coach increases the profits of the seller by almost 60%.</abstract>
@@ -13592,7 +13592,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Tiancheng</first><last>Zhao</last></author>
       <author><first>Amy</first><last>Pavel</last></author>
       <author><first>Maxine</first><last>Eskenazi</last></author>
-      <author><first>Jeffrey</first><last>Bigham</last></author>
+      <author id="jeffrey-p-bigham"><first>Jeffrey</first><last>Bigham</last></author>
       <pages>379–391</pages>
       <abstract>The aim of this paper is to mitigate the shortcomings of automatic evaluation of open-domain dialog systems through multi-reference evaluation. Existing metrics have been shown to correlate poorly with human judgement, particularly in open-domain dialog. One alternative is to collect human annotations for evaluation, which can be expensive and time consuming. To demonstrate the effectiveness of multi-reference evaluation, we augment the test set of DailyDialog with multiple references. A series of experiments show that the use of multiple references results in improved correlation between several automatic metrics and human judgement for both the quality and the diversity of system output.</abstract>
       <url hash="bb441efe">W19-5944</url>
@@ -13614,7 +13614,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="46">
       <title>Dialogue Act Classification in Team Communication for Robot Assisted Disaster Response</title>
       <author><first>Tatiana</first><last>Anikina</last></author>
-      <author><first>Ivana</first><last>Kruijff-Korbayova</last></author>
+      <author id="ivana-kruijff-korbayova"><first>Ivana</first><last>Kruijff-Korbayova</last></author>
       <pages>399–410</pages>
       <abstract>We present the results we obtained on the classification of dialogue acts in a corpus of human-human team communication in the domain of robot-assisted disaster response. We annotated dialogue acts according to the ISO 24617-2 standard scheme and carried out experiments using the FastText linear classifier as well as several neural architectures, including feed-forward, recurrent and convolutional neural models with different types of embeddings, context and attention mechanism. The best performance was achieved with a ”Divide &amp; Merge” architecture presented in the paper, using trainable GloVe embeddings and a structured dialogue history. This model learns from the current utterance and the preceding context separately and then combines the two generated representations. Average accuracy of 10-fold cross-validation is 79.8%, F-score 71.8%.</abstract>
       <url hash="f6d7982e">W19-5946</url>
@@ -13624,8 +13624,8 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="47">
       <title>Multi-Task Learning of System Dialogue Act Selection for Supervised Pretraining of Goal-Oriented Dialogue Policies</title>
       <author><first>Sarah</first><last>McLeod</last></author>
-      <author><first>Ivana</first><last>Kruijff-Korbayova</last></author>
-      <author><first>Bernd</first><last>Kiefer</last></author>
+      <author id="ivana-kruijff-korbayova"><first>Ivana</first><last>Kruijff-Korbayova</last></author>
+      <author id="bernd-kiefer"><first>Bernd</first><last>Kiefer</last></author>
       <pages>411–417</pages>
       <abstract>This paper describes the use of Multi-Task Neural Networks (NNs) for system dialogue act selection. These models leverage the representations learned by the Natural Language Understanding (NLU) unit to enable robust initialization/bootstrapping of dialogue policies from medium sized initial data sets. We evaluate the models on two goal-oriented dialogue corpora in the travel booking domain. Results show the proposed models improve over models trained without knowledge of NLU tasks.</abstract>
       <url hash="0bd2f8d2">W19-5947</url>
@@ -13686,7 +13686,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <editor><first>Jordan</first><last>Lachler</last></editor>
       <editor><first>Alexis</first><last>Palmer</last></editor>
       <editor><first>Lane</first><last>Schwartz</last></editor>
-      <editor><first>Miikka</first><last>Silfverberg</last></editor>
+      <editor id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Honolulu</address>
       <month>February</month>
@@ -13724,7 +13724,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     </paper>
     <paper id="4">
       <title><fixed-case>OCR</fixed-case> evaluation tools for the 21st century</title>
-      <author><first>Eddie Antonio</first><last>Santos</last></author>
+      <author id="eddie-antonio-santos"><first>Eddie Antonio</first><last>Santos</last></author>
       <pages>23–27</pages>
       <url hash="8f9209e3">W19-6004</url>
       <bibkey>santos-2019-ocr</bibkey>
@@ -13733,7 +13733,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title>Handling cross-cutting properties in automatic inference of lexical classes: A case study of Chintang</title>
       <author><first>Olga</first><last>Zamaraeva</last></author>
       <author><first>Kristen</first><last>Howell</last></author>
-      <author><first>Emily M.</first><last>Bender</last></author>
+      <author id="emily-m-bender"><first>Emily M.</first><last>Bender</last></author>
       <pages>28–38</pages>
       <url hash="36c94a67">W19-6005</url>
       <bibkey>zamaraeva-etal-2019-handling</bibkey>
@@ -13749,7 +13749,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="7">
       <title>Seeing more than whitespace — Tokenisation and disambiguation in a <fixed-case>N</fixed-case>orth <fixed-case>S</fixed-case>ámi grammar checker</title>
       <author><first>Linda</first><last>Wiechetek</last></author>
-      <author><first>Sjur Nørstebø</first><last>Moshagen</last></author>
+      <author id="sjur-moshagen"><first>Sjur Nørstebø</first><last>Moshagen</last></author>
       <author><first>Kevin Brubeck</first><last>Unhammer</last></author>
       <pages>46–55</pages>
       <url hash="8ed0489c">W19-6007</url>
@@ -13773,10 +13773,10 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     </paper>
     <paper id="10">
       <title>A biscriptual morphological transducer for <fixed-case>C</fixed-case>rimean <fixed-case>T</fixed-case>atar</title>
-      <author><first>Francis M.</first><last>Tyers</last></author>
-      <author><first>Jonathan</first><last>Washington</last></author>
+      <author id="francis-tyers"><first>Francis M.</first><last>Tyers</last></author>
+      <author id="jonathan-washington"><first>Jonathan</first><last>Washington</last></author>
       <author><first>Darya</first><last>Kavitskaya</last></author>
-      <author><first>Memduh</first><last>Gökırmak</last></author>
+      <author id="memduh-gokirmak"><first>Memduh</first><last>Gökırmak</last></author>
       <author><first>Nick</first><last>Howell</last></author>
       <author><first>Remziye</first><last>Berberova</last></author>
       <pages>74–80</pages>
@@ -13785,7 +13785,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     </paper>
     <paper id="11">
       <title>Improving Low-Resource Morphological Learning with Intermediate Forms from Finite State Transducers</title>
-      <author><first>Sarah</first><last>Moeller</last></author>
+      <author id="sarah-moeller"><first>Sarah</first><last>Moeller</last></author>
       <author><first>Ghazaleh</first><last>Kazeminejad</last></author>
       <author><first>Andrew</first><last>Cowell</last></author>
       <author><first>Mans</first><last>Hulden</last></author>
@@ -13809,7 +13809,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <booktitle>Proceedings of the 22nd Nordic Conference on Computational Linguistics</booktitle>
       <url hash="dfac8b1a">W19-61</url>
       <editor><first>Mareike</first><last>Hartmann</last></editor>
-      <editor><first>Barbara</first><last>Plank</last></editor>
+      <editor id="barbara-plank"><first>Barbara</first><last>Plank</last></editor>
       <publisher>Linköping University Electronic Press</publisher>
       <address>Turku, Finland</address>
       <month>September–October</month>
@@ -13823,7 +13823,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="1">
       <title>Comparison between <fixed-case>NMT</fixed-case> and <fixed-case>PBSMT</fixed-case> Performance for Translating Noisy User-Generated Content</title>
       <author><first>José Carlos</first><last>Rosales Núñez</last></author>
-      <author><first>Djamé</first><last>Seddah</last></author>
+      <author id="djame-seddah"><first>Djamé</first><last>Seddah</last></author>
       <author><first>Guillaume</first><last>Wisniewski</last></author>
       <pages>2–14</pages>
       <abstract>This work compares the performances achieved by Phrase-Based Statistical Machine Translation systems (PB-SMT) and attention-based Neuronal Machine Translation systems (NMT) when translating User Generated Content (UGC), as encountered in social medias, from French to English. We show that, contrary to what could be expected, PBSMT outperforms NMT when translating non-canonical inputs. Our error analysis uncovers the specificities of UGC that are problematic for sequential NMT architectures and suggests new avenue for improving NMT models.</abstract>
@@ -13900,8 +13900,8 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="9">
       <title>Comparing linear and neural models for competitive <fixed-case>MWE</fixed-case> identification</title>
       <author><first>Hazem Al</first><last>Saied</last></author>
-      <author><first>Marie</first><last>Candito</last></author>
-      <author><first>Mathieu</first><last>Constant</last></author>
+      <author id="marie-candito"><first>Marie</first><last>Candito</last></author>
+      <author id="matthieu-constant"><first>Mathieu</first><last>Constant</last></author>
       <pages>86–96</pages>
       <abstract>In this paper, we compare the use of linear versus neural classifiers in a greedy transition system for MWE identification. Both our linear and neural models achieve a new state-of-the-art on the PARSEME 1.1 shared task data sets, comprising 20 languages. Surprisingly, our best model is a simple feed-forward network with one hidden layer, although more sophisticated (recurrent) architectures were tested. The feedback from this study is that tuning a SVM is rather straightforward, whereas tuning our neural system revealed more challenging. Given the number of languages and the variety of linguistic phenomena to handle for the MWE identification task, we have designed an accurate tuning procedure, and we show that hyperparameters are better selected by using a majority-vote within random search configurations rather than a simple best configuration selection. Although the performance is rather good (better than both the best shared task system and the average of the best per-language results), further work is needed to improve the generalization power, especially on unseen MWEs.</abstract>
       <url hash="727b19a0">W19-6109</url>
@@ -13909,8 +13909,8 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     </paper>
     <paper id="10">
       <title>Syntax-based identification of light-verb constructions</title>
-      <author><first>Silvio Ricardo</first><last>Cordeiro</last></author>
-      <author><first>Marie</first><last>Candito</last></author>
+      <author id="silvio-cordeiro"><first>Silvio Ricardo</first><last>Cordeiro</last></author>
+      <author id="marie-candito"><first>Marie</first><last>Candito</last></author>
       <pages>97–104</pages>
       <abstract>This paper analyzes results on light-verb construction identification from the PARSEME shared-task, distinguishing between simple cases that could be directly learned from training data from more complex cases that require an extra level of semantic processing. We propose a simple baseline that beats the state of the art for the simple cases, and couple it with another simple baseline to handle the complex cases. We additionally present two other classifiers based on a richer set of features, with results surpassing the state of the art by 8 percentage points.</abstract>
       <url hash="cabdd0ac">W19-6110</url>
@@ -13920,7 +13920,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title>Comparing the Performance of Feature Representations for the Categorization of the Easy-to-Read Variety vs Standard Language</title>
       <author><first>Marina</first><last>Santini</last></author>
       <author><first>Benjamin</first><last>Danielsson</last></author>
-      <author><first>Arne</first><last>Jönsson</last></author>
+      <author id="arne-jonsson"><first>Arne</first><last>Jönsson</last></author>
       <pages>105–114</pages>
       <abstract>We explore the effectiveness of four feature representations – bag-of-words, word embeddings, principal components and autoencoders – for the binary categorization of the easy-to-read variety vs standard language. Standard language refers to the ordinary language variety used by a population as a whole or by a community, while the “easy-to-read” variety is a simpler (or a simplified) version of the standard language. We test the efficiency of these feature representations on three corpora, which differ in size, class balance, unit of analysis, language and topic. We rely on supervised and unsupervised machine learning algorithms. Results show that bag-of-words is a robust and straightforward feature representation for this task and performs well in many experimental settings. Its performance is equivalent or equal to the performance achieved with principal components and autoencorders, whose preprocessing is however more time-consuming. Word embeddings are less accurate than the other feature representations for this classification task.</abstract>
       <url hash="8e412137">W19-6111</url>
@@ -13939,7 +13939,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title>Annotating evaluative sentences for sentiment analysis: a dataset for <fixed-case>N</fixed-case>orwegian</title>
       <author><first>Petter</first><last>Mæhlum</last></author>
       <author><first>Jeremy</first><last>Barnes</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <author><first>Erik</first><last>Velldal</last></author>
       <pages>121–130</pages>
       <abstract>This paper documents the creation of a large-scale dataset of evaluative sentences – i.e. both subjective and objective sentences that are found to be sentiment-bearing – based on mixed-domain professional reviews from various news-sources. We present both the annotation scheme and first results for classification experiments. The effort represents a step toward creating a Norwegian dataset for fine-grained sentiment analysis.</abstract>
@@ -14005,7 +14005,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title>Lexicon information in neural sentiment analysis: a multi-task learning approach</title>
       <author><first>Jeremy</first><last>Barnes</last></author>
       <author><first>Samia</first><last>Touileb</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <author><first>Erik</first><last>Velldal</last></author>
       <pages>175–186</pages>
       <abstract>This paper explores the use of multi-task learning (MTL) for incorporating external knowledge in neural models. Specifically, we show how MTL can enable a BiLSTM sentiment classifier to incorporate information from sentiment lexicons. Our MTL set-up is shown to improve model performance (compared to a single-task set-up) on both English and Norwegian sentence-level sentiment datasets. The paper also introduces a new sentiment lexicon for Norwegian.</abstract>
@@ -14025,7 +14025,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="21">
       <title>Political Stance in <fixed-case>D</fixed-case>anish</title>
       <author><first>Rasmus</first><last>Lehmann</last></author>
-      <author><first>Leon</first><last>Derczynski</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
       <pages>197–207</pages>
       <abstract>The task of stance detection consists of classifying the opinion within a text towards some target. This paper seeks to generate a dataset of quotes from Danish politicians, label this dataset to allow the task of stance detection to be performed, and present annotation guidelines to allow further expansion of the generated dataset. Furthermore, three models based on an LSTM architecture are designed, implemented and optimized to perform the task of stance detection for the generated dataset. Experiments are performed using conditionality and bi-directionality for these models, and using either singular word embeddings or averaged word embeddings for an entire quote, to determine the optimal model design. The simplest model design, applying neither conditionality or bi-directionality, and averaged word embeddings across quotes, yields the strongest results. Furthermore, it was found that inclusion of the quotes politician, and the party affiliation of the quoted politician, greatly improved performance of the strongest model.</abstract>
       <url hash="65f1da0f">W19-6121</url>
@@ -14035,7 +14035,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title>Joint Rumour Stance and Veracity Prediction</title>
       <author><first>Anders Edelbo</first><last>Lillie</last></author>
       <author><first>Emil Refsgaard</first><last>Middelboe</last></author>
-      <author><first>Leon</first><last>Derczynski</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
       <pages>208–221</pages>
       <abstract>The net is rife with rumours that spread through microblogs and social media. Not all the claims in these can be verified. However, recent work has shown that the stances alone that commenters take toward claims can be sufficiently good indicators of claim veracity, using e.g. an HMM that takes conversational stance sequences as the only input. Existing results are monolingual (English) and mono-platform (Twitter). This paper introduces a stance-annotated Reddit dataset for the Danish language, and describes various implementations of stance classification models. Of these, a Linear SVM provides predicts stance best, with 0.76 accuracy / 0.42 macro F1. Stance labels are then used to predict veracity across platforms and also across languages, training on conversations held in one language and using the model on conversations held in another. In our experiments, monolinugal scores reach stance-based veracity accuracy of 0.83 (F1 0.68); applying the model across languages predicts veracity of claims with an accuracy of 0.82 (F1 0.67). This demonstrates the surprising and powerful viability of transferring stance-based veracity prediction across languages.</abstract>
       <url hash="71f29b16">W19-6122</url>
@@ -14075,7 +14075,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="26">
       <title>Matching Keys and Encrypted Manuscripts</title>
       <author><first>Eva</first><last>Pettersson</last></author>
-      <author><first>Beata</first><last>Megyesi</last></author>
+      <author id="beata-megyesi"><first>Beata</first><last>Megyesi</last></author>
       <pages>253–261</pages>
       <abstract>Historical cryptology is the study of historical encrypted messages aiming at their decryption by analyzing the mathematical, linguistic and other coding patterns and their historical context. In libraries and archives we can find quite a lot of ciphers, as well as keys describing the method used to transform the plaintext message into a ciphertext. In this paper, we present work on automatically mapping keys to ciphers to reconstruct the original plaintext message, and use language models generated from historical texts to guess the underlying plaintext language.</abstract>
       <url hash="2efb698e">W19-6126</url>
@@ -14104,7 +14104,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Antti</first><last>Suni</last></author>
       <author><first>Hande</first><last>Celikkanat</last></author>
       <author><first>Sofoklis</first><last>Kakouros</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <author><first>Martti</first><last>Vainio</last></author>
       <pages>281–290</pages>
       <abstract>In this paper we introduce a new natural language processing dataset and benchmark for predicting prosodic prominence from written text. To our knowledge this will be the largest publicly available dataset with prosodic labels. We describe the dataset construction and the resulting benchmark dataset in detail and train a number of different models ranging from feature-based classifiers to neural network systems for the prediction of discretized prosodic prominence. We show that pre-trained contextualized word representations from BERT outperform the other models even with less than 10% of the training data. Finally we discuss the dataset in light of the results and point to future research and plans for further improving both the dataset and methods of predicting prosodic prominence from text. The dataset and the code for the models will be made publicly available.</abstract>
@@ -14135,7 +14135,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="32">
       <title>Ensembles of Neural Morphological Inflection Models</title>
       <author><first>Ilmari</first><last>Kylliäinen</last></author>
-      <author><first>Miikka</first><last>Silfverberg</last></author>
+      <author id="miikka-silfverberg"><first>Miikka</first><last>Silfverberg</last></author>
       <pages>304–309</pages>
       <abstract>We investigate different ensemble learning techniques for neural morphological inflection using bidirectional LSTM encoder-decoder models with attention. We experiment with weighted and unweighted majority voting and bagging. We find that all investigated ensemble methods lead to improved accuracy over a baseline of a single model. However, contrary to expectation based on earlier work by Najafi et al. (2018) and Silfverberg et al. (2017), weighting does not deliver clear benefits. Bagging was found to underperform plain voting ensembles in general.</abstract>
       <url hash="61ef40bf">W19-6132</url>
@@ -14194,7 +14194,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     </paper>
     <paper id="38">
       <title>Bornholmsk Natural Language Processing: Resources and Tools</title>
-      <author><first>Leon</first><last>Derczynski</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
       <author><first>Alex Speed</first><last>Kjeldsen</last></author>
       <pages>338–344</pages>
       <abstract>This paper introduces language processing resources and tools for Bornholmsk, a language spoken on the island of Bornholm, with roots in Danish and closely related to Scanian. This presents an overview of the language and available data, and the first NLP models for this living, minority Nordic language. Sammenfattnijng på borrijnholmst: Dæjnna artikkelijn introduserer natursprågsresurser å varktoi for borrijnholmst, ed språg a dær snakkes på ön Borrijnholm me rødder i danst å i nær familia me skånst. Artikkelijn gjer ed âuersyn âuer språged å di datan som fijnnes, å di fosste NLP modællarna for dætta læwenes nordiska minnretâlsspråged.</abstract>
@@ -14217,7 +14217,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Andre</first><last>Kåsen</last></author>
       <author><first>Anders</first><last>Nøklestad</last></author>
       <author><first>Kristin</first><last>Hagen</last></author>
-      <author><first>Joel</first><last>Priestley</last></author>
+      <author id="joel-priestley"><first>Joel</first><last>Priestley</last></author>
       <pages>350–355</pages>
       <abstract>This paper describes an evaluation of five data-driven part-of-speech (PoS) taggers for spoken Norwegian. The taggers all rely on different machine learning mechanisms: decision trees, hidden Markov models (HMMs), conditional random fields (CRFs), long-short term memory networks (LSTMs), and convolutional neural networks (CNNs). We go into some of the challenges posed by the task of tagging spoken, as opposed to written, language, and in particular a wide range of dialects as is found in the recordings of the LIA (Language Infrastructure made Accessible) project. The results show that the taggers based on either conditional random fields or neural networks perform much better than the rest, with the LSTM tagger getting the highest score.</abstract>
       <url hash="64060e6d">W19-6140</url>
@@ -14227,7 +14227,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title>The Lacunae of <fixed-case>D</fixed-case>anish Natural Language Processing</title>
       <author><first>Andreas</first><last>Kirkedal</last></author>
       <author><first>Barbara</first><last>Plank</last></author>
-      <author><first>Leon</first><last>Derczynski</last></author>
+      <author id="leon-derczynski"><first>Leon</first><last>Derczynski</last></author>
       <author><first>Natalie</first><last>Schluter</last></author>
       <pages>356–362</pages>
       <abstract>Danish is a North Germanic language spoken principally in Denmark, a country with a long tradition of technological and scientific innovation. However, the language has received relatively little attention from a technological perspective. In this paper, we review Natural Language Processing (NLP) research, digital resources and tools which have been developed for Danish. We find that availability of models and tools is limited, which calls for work that lifts Danish NLP a step closer to the privileged languages. Dansk abstrakt: Dansk er et nordgermansk sprog, talt primært i kongeriget Danmark, et land med stærk tradition for teknologisk og videnskabelig innovation. Det danske sprog har imidlertid været genstand for relativt begrænset opmærksomhed, teknologisk set. I denne artikel gennemgår vi sprogteknologi-forskning, -ressourcer og -værktøjer udviklet for dansk. Vi konkluderer at der eksisterer et fåtal af modeller og værktøjer, hvilket indbyder til forskning som løfter dansk sprogteknologi i niveau med mere priviligerede sprog.</abstract>
@@ -14274,7 +14274,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="46">
       <title>The <fixed-case>OPUS</fixed-case> Resource Repository: An Open Package for Creating Parallel Corpora and Machine Translation Services</title>
       <author><first>Mikko</first><last>Aulamo</last></author>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>389–394</pages>
       <abstract>This paper presents a flexible and powerful system for creating parallel corpora and for running neural machine translation services. Our package provides a scalable data repository backend that offers transparent data pre-processing pipelines and automatic alignment procedures that facilitate the compilation of extensive parallel data sets from a variety of sources. Moreover, we develop a web-based interface that constitutes an intuitive frontend for end-users of the platform. The whole system can easily be distributed over virtual machines and implements a sophisticated permission system with secure connections and a flexible database for storing arbitrary metadata. Furthermore, we also provide an interface for neural machine translation that can run as a service on virtual machines, which also incorporates a connection to the data repository software.</abstract>
       <url hash="0694d499">W19-6146</url>
@@ -14284,7 +14284,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title>Garnishing a phonetic dictionary for <fixed-case>ASR</fixed-case> intake</title>
       <author><first>Iben Nyholm</first><last>Debess</last></author>
       <author><first>Sandra Saxov</first><last>Lamhauge</last></author>
-      <author><first>Peter Juel</first><last>Henrichsen</last></author>
+      <author id="peter-juel-henrichsen"><first>Peter Juel</first><last>Henrichsen</last></author>
       <pages>395–399</pages>
       <abstract>We present a new method for preparing a lexical-phonetic database as a resource for acoustic model training. The research is an offshoot of the ongoing Project Ravnur (Speech Recognition for Faroese), but the method is language-independent. At NODALIDA 2019 we demonstrate the method (called SHARP) online, showing how a traditional lexical-phonetic dictionary (with a very rich phone inventory) is transformed into an ASR-friendly database (with reduced phonetics, preventing data sparseness). The mapping procedure is informed by a corpus of speech transcripts. We conclude with a discussion on the benefits of a well-thought-out BLARK design (Basic Language Resource Kit), making tools like SHARP possible.</abstract>
       <url hash="6ebc64ea">W19-6147</url>
@@ -14314,11 +14314,11 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <booktitle>Proceedings of the First NLPL Workshop on Deep Learning for Natural Language Processing</booktitle>
       <url hash="20e12391">W19-62</url>
       <editor><first>Joakim</first><last>Nivre</last></editor>
-      <editor><first>Leon</first><last>Derczynski</last></editor>
+      <editor id="leon-derczynski"><first>Leon</first><last>Derczynski</last></editor>
       <editor><first>Filip</first><last>Ginter</last></editor>
       <editor><first>Bjørn</first><last>Lindi</last></editor>
       <editor><first>Stephan</first><last>Oepen</last></editor>
-      <editor><first>Anders</first><last>Søgaard</last></editor>
+      <editor id="anders-sogaard"><first>Anders</first><last>Søgaard</last></editor>
       <editor><first>Jörg</first><last>Tidemann</last></editor>
       <publisher>Linköping University Electronic Press</publisher>
       <address>Turku, Finland</address>
@@ -14334,7 +14334,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title>Mark my Word: A Sequence-to-Sequence Approach to Definition Modeling</title>
       <author><first>Timothee</first><last>Mickus</last></author>
       <author><first>Denis</first><last>Paperno</last></author>
-      <author><first>Matthieu</first><last>Constant</last></author>
+      <author id="matthieu-constant"><first>Matthieu</first><last>Constant</last></author>
       <pages>1–11</pages>
       <abstract>Defining words in a textual context is a useful task both for practical purposes and for gaining insight into distributed word representations. Building on the distributional hypothesis, we argue here that the most natural formalization of definition modeling is to treat it as a sequence-to-sequence task, rather than a word-to-sequence task: given an input sequence with a highlighted word, generate a contextually appropriate definition for it. We implement this approach in a Transformer-based sequence-to-sequence model. Our proposal allows to train contextualization and definition generation in an end-to-end fashion, which is a conceptual improvement over earlier works. We achieve state-of-the-art results both in contextual and non-contextual definition modeling.</abstract>
       <url hash="717c40b9">W19-6201</url>
@@ -14373,8 +14373,8 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="5">
       <title>Multilingual Probing of Deep Pre-Trained Contextual Encoders</title>
       <author><first>Vinit</first><last>Ravishankar</last></author>
-      <author><first>Memduh</first><last>Gökırmak</last></author>
-      <author><first>Lilja</first><last>Øvrelid</last></author>
+      <author id="memduh-gokirmak"><first>Memduh</first><last>Gökırmak</last></author>
+      <author id="lilja-ovrelid"><first>Lilja</first><last>Øvrelid</last></author>
       <author><first>Erik</first><last>Velldal</last></author>
       <pages>37–47</pages>
       <abstract>Encoders that generate representations based on context have, in recent years, benefited from adaptations that allow for pre-training on large text corpora. Earlier work on evaluating fixed-length sentence representations has included the use of ‘probing’ tasks, that use diagnostic classifiers to attempt to quantify the extent to which these encoders capture specific linguistic phenomena. The principle of probing has also resulted in extended evaluations that include relatively newer word-level pre-trained encoders. We build on probing tasks established in the literature and comprehensively evaluate and analyse – from a typological perspective amongst others – multilingual variants of existing encoders on probing datasets constructed for 6 non-English languages. Specifically, we probe each layer of a multiple monolingual RNN-based ELMo models, the transformer-based BERT’s cased and uncased multilingual variants, and a variant of BERT that uses a cross-lingual modelling scheme (XLM).</abstract>
@@ -14433,7 +14433,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     </paper>
     <paper id="2">
       <title>Automatic Generation and Semantic Grading of <fixed-case>E</fixed-case>speranto Sentences in a Teaching Context</title>
-      <author><first>Eckhard</first><last>Bick</last></author>
+      <author id="eckhard-bick"><first>Eckhard</first><last>Bick</last></author>
       <pages>10–19</pages>
       <url hash="eb258bd1">W19-6302</url>
       <bibkey>bick-2019-automatic</bibkey>
@@ -14448,7 +14448,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     </paper>
     <paper id="4">
       <title>Linguistic features and proficiency classification in <fixed-case>L</fixed-case>2 <fixed-case>S</fixed-case>panish and <fixed-case>L</fixed-case>2<fixed-case>P</fixed-case>ortuguese.</title>
-      <author><first>Iria</first><last>del Río</last></author>
+      <author id="iria-del-rio-gayo"><first>Iria</first><last>del Río</last></author>
       <pages>31–40</pages>
       <url hash="862f9967">W19-6304</url>
       <bibkey>del-rio-2019-linguistic</bibkey>
@@ -14456,9 +14456,9 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="5">
       <title>Integrating large-scale web data and curated corpus data in a search engine supporting <fixed-case>G</fixed-case>erman literacy education</title>
       <author><first>Sabrina</first><last>Dittrich</last></author>
-      <author><first>Zarah</first><last>Weiss</last></author>
+      <author id="zarah-weiss"><first>Zarah</first><last>Weiss</last></author>
       <author><first>Hannes</first><last>Schröter</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <pages>41–56</pages>
       <url hash="59c18aed">W19-6305</url>
       <bibkey>dittrich-etal-2019-integrating</bibkey>
@@ -14488,7 +14488,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="8">
       <title>Summarization Evaluation meets Short-Answer Grading</title>
       <author><first>Margot</first><last>Mieskes</last></author>
-      <author><first>Ulrike</first><last>Padó</last></author>
+      <author id="ulrike-pado"><first>Ulrike</first><last>Padó</last></author>
       <pages>79–85</pages>
       <url hash="e885fe08">W19-6308</url>
       <bibkey>mieskes-pado-2019-summarization</bibkey>
@@ -14507,8 +14507,8 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Ramon</first><last>Ziai</last></author>
       <author><first>Florian</first><last>Nuxoll</last></author>
       <author><first>Kordula</first><last>De Kuthy</last></author>
-      <author><first>Björn</first><last>Rudzewitz</last></author>
-      <author><first>Detmar</first><last>Meurers</last></author>
+      <author id="bjorn-rudzewitz"><first>Björn</first><last>Rudzewitz</last></author>
+      <author id="detmar-meurers"><first>Detmar</first><last>Meurers</last></author>
       <pages>93–99</pages>
       <url hash="819bb592">W19-6310</url>
       <bibkey>ziai-etal-2019-impact</bibkey>
@@ -14520,7 +14520,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <url hash="c9c922ad">W19-64</url>
       <editor><first>Mahmoud</first><last>El-Haj</last></editor>
       <editor><first>Paul</first><last>Rayson</last></editor>
-      <editor><first>Steven</first><last>Young</last></editor>
+      <editor id="steve-young"><first>Steven</first><last>Young</last></editor>
       <editor><first>Houda</first><last>Bouamor</last></editor>
       <editor><first>Sira</first><last>Ferradans</last></editor>
       <publisher>Linköping University Electronic Press</publisher>
@@ -14560,21 +14560,21 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="4">
       <title>Active Learning for Financial Investment Reports</title>
       <author><first>Sian</first><last>Gooding</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <pages>25–32</pages>
       <url hash="fc54885d">W19-6404</url>
       <bibkey>gooding-briscoe-2019-active</bibkey>
     </paper>
     <paper id="5">
       <title>Towards Unlocking the Narrative of the <fixed-case>U</fixed-case>nited <fixed-case>S</fixed-case>tates Income Tax Forms</title>
-      <author><first>Esme</first><last>Manandise</last></author>
+      <author id="esmeralda-manandise"><first>Esme</first><last>Manandise</last></author>
       <pages>33–41</pages>
       <url hash="28958015">W19-6405</url>
       <bibkey>manandise-2019-towards</bibkey>
     </paper>
     <paper id="6">
       <title>Tone Analysis in <fixed-case>S</fixed-case>panish Financial Reporting Narratives</title>
-      <author><first>Antonio</first><last>Moreno-Sandoval</last></author>
+      <author id="antonio-moreno-sandoval"><first>Antonio</first><last>Moreno-Sandoval</last></author>
       <author><first>Pablo Alfonso Haya</first><last>Ana Gisbert</last></author>
       <author><first>Marta</first><last>Guerrero</last></author>
       <author><first>Helena</first><last>Montoro</last></author>
@@ -14630,7 +14630,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <booktitle>Proceedings of the Workshop on NLP and Pseudonymisation</booktitle>
       <url hash="44bf7ab2">W19-65</url>
       <editor><first>Lars</first><last>Ahrenberg</last></editor>
-      <editor><first>Beata</first><last>Megyesi</last></editor>
+      <editor id="beata-megyesi"><first>Beata</first><last>Megyesi</last></editor>
       <publisher>Linköping Electronic Press</publisher>
       <address>Turku, Finland</address>
       <month>September</month>
@@ -14676,7 +14676,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <meta>
       <booktitle>Proceedings of Machine Translation Summit XVII: Research Track</booktitle>
       <url hash="3d4008aa">W19-66</url>
-      <editor><first>Mikel</first><last>Forcada</last></editor>
+      <editor id="mikel-l-forcada"><first>Mikel</first><last>Forcada</last></editor>
       <editor><first>Andy</first><last>Way</last></editor>
       <editor><first>Barry</first><last>Haddow</last></editor>
       <editor><first>Rico</first><last>Sennrich</last></editor>
@@ -14694,7 +14694,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title>Online Sentence Segmentation for Simultaneous Interpretation using Multi-Shifted Recurrent Neural Network</title>
       <author><first>Xiaolin</first><last>Wang</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>1–11</pages>
       <url hash="54a85e7d">W19-6601</url>
       <bibkey>wang-etal-2019-online</bibkey>
@@ -14707,7 +14707,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Muhammad</first><last>Rahman</last></author>
       <author><first>Yiming</first><last>Wang</last></author>
       <author><first>Hainan</first><last>Xu</last></author>
-      <author><first>Daniel</first><last>Povey</last></author>
+      <author id="daniel-povey"><first>Daniel</first><last>Povey</last></author>
       <author><first>Philipp</first><last>Koehn</last></author>
       <author><first>Kevin</first><last>Duh</last></author>
       <pages>12–20</pages>
@@ -14716,8 +14716,8 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     </paper>
     <paper id="3">
       <title>Enhancing Transformer for End-to-end Speech-to-Text Translation</title>
-      <author><first>Mattia Antonino</first><last>Di Gangi</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="mattia-a-di-gangi"><first>Mattia Antonino</first><last>Di Gangi</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <author><first>Roldano</first><last>Cattoni</last></author>
       <author><first>Roberto</first><last>Dessi</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
@@ -14736,7 +14736,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="5">
       <title><fixed-case>T</fixed-case>ranslator2<fixed-case>V</fixed-case>ec: Understanding and Representing Human Post-Editors</title>
       <author><first>António</first><last>Góis</last></author>
-      <author><first>André F. T.</first><last>Martins</last></author>
+      <author id="andre-f-t-martins"><first>André F. T.</first><last>Martins</last></author>
       <pages>43–54</pages>
       <url hash="bb41a8fa">W19-6605</url>
       <bibkey>gois-martins-2019-translator2vec</bibkey>
@@ -14769,7 +14769,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     </paper>
     <paper id="9">
       <title>Automatic error classification with multiple error labels</title>
-      <author><first>Maja</first><last>Popovic</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popovic</last></author>
       <author><first>David</first><last>Vilar</last></author>
       <pages>87–95</pages>
       <url hash="3bee11f3">W19-6609</url>
@@ -14813,7 +14813,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="14">
       <title>Improving Anaphora Resolution in Neural Machine Translation Using Curriculum Learning</title>
       <author><first>Dario</first><last>Stojanovski</last></author>
-      <author><first>Alexander</first><last>Fraser</last></author>
+      <author id="alexander-fraser"><first>Alexander</first><last>Fraser</last></author>
       <pages>140–150</pages>
       <url hash="0b4df783">W19-6614</url>
       <bibkey>stojanovski-fraser-2019-improving</bibkey>
@@ -14850,7 +14850,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title>An Exploration of Placeholding in Neural Machine Translation</title>
       <author><first>Matt</first><last>Post</last></author>
       <author><first>Shuoyang</first><last>Ding</last></author>
-      <author><first>Marianna</first><last>Martindale</last></author>
+      <author id="marianna-martindale"><first>Marianna</first><last>Martindale</last></author>
       <author><first>Winston</first><last>Wu</last></author>
       <pages>182–192</pages>
       <url hash="915327db">W19-6618</url>
@@ -14896,7 +14896,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     </paper>
     <paper id="23">
       <title>Identifying Fluently Inadequate Output in Neural and Statistical Machine Translation</title>
-      <author><first>Marianna</first><last>Martindale</last></author>
+      <author id="marianna-martindale"><first>Marianna</first><last>Martindale</last></author>
       <author><first>Marine</first><last>Carpuat</last></author>
       <author><first>Kevin</first><last>Duh</last></author>
       <author><first>Paul</first><last>McNamee</last></author>
@@ -14919,7 +14919,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>John</first><last>Ortega</last></author>
       <author><first>Felipe</first><last>Sánchez-Martínez</last></author>
       <author><first>Marco</first><last>Turchi</last></author>
-      <author><first>Matteo</first><last>Negri</last></author>
+      <author id="matteo-negri"><first>Matteo</first><last>Negri</last></author>
       <pages>256–266</pages>
       <url hash="8cf28d47">W19-6625</url>
       <bibkey>ortega-etal-2019-improving</bibkey>
@@ -14948,7 +14948,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <meta>
       <booktitle>Proceedings of Machine Translation Summit XVII: Translator, Project and User Tracks</booktitle>
       <url hash="2005f1d7">W19-67</url>
-      <editor><first>Mikel</first><last>Forcada</last></editor>
+      <editor id="mikel-l-forcada"><first>Mikel</first><last>Forcada</last></editor>
       <editor><first>Andy</first><last>Way</last></editor>
       <editor><first>John</first><last>Tinsley</last></editor>
       <editor><first>Dimitar</first><last>Shterionov</last></editor>
@@ -14966,8 +14966,8 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     </frontmatter>
     <paper id="1">
       <title>Competitiveness Analysis of the <fixed-case>E</fixed-case>uropean Machine Translation Market</title>
-      <author><first>Andrejs</first><last>Vasiļjevs</last></author>
-      <author><first>Inguna</first><last>Skadiņa</last></author>
+      <author id="andrejs-vasiljevs"><first>Andrejs</first><last>Vasiļjevs</last></author>
+      <author id="inguna-skadina"><first>Inguna</first><last>Skadiņa</last></author>
       <author><first>Indra</first><last>Sāmīte</last></author>
       <author><first>Kaspars</first><last>Kauliņš</last></author>
       <author><first>Ēriks</first><last>Ajausks</last></author>
@@ -14982,8 +14982,8 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Mihaela</first><last>Vela</last></author>
       <author><first>Santanu</first><last>Pal</last></author>
       <author><first>Marcos</first><last>Zampieri</last></author>
-      <author><first>Sudip</first><last>Naskar</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip</first><last>Naskar</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>8–15</pages>
       <url hash="63927fd1">W19-6702</url>
       <bibkey>vela-etal-2019-improving</bibkey>
@@ -15066,7 +15066,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     </paper>
     <paper id="12">
       <title>On reducing translation shifts in translations intended for <fixed-case>MT</fixed-case> evaluation</title>
-      <author><first>Maja</first><last>Popovic</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popovic</last></author>
       <pages>80–87</pages>
       <url hash="7c23ce00">W19-6712</url>
       <bibkey>popovic-2019-reducing</bibkey>
@@ -15133,7 +15133,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     </paper>
     <paper id="19">
       <title>i<fixed-case>ADAATPA</fixed-case> Project: Pangeanic use cases</title>
-      <author><first>Mercedes</first><last>García-Martínez</last></author>
+      <author id="mercedes-garcia-martinez"><first>Mercedes</first><last>García-Martínez</last></author>
       <author><first>Amando</first><last>Estela</last></author>
       <author><first>Laurent</first><last>Bié</last></author>
       <author><first>Alexandre</first><last>Helle</last></author>
@@ -15164,9 +15164,9 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     </paper>
     <paper id="21">
       <title><fixed-case>P</fixed-case>ara<fixed-case>C</fixed-case>rawl: Web-scale parallel corpora for the languages of the <fixed-case>EU</fixed-case></title>
-      <author><first>Miquel</first><last>Esplà</last></author>
+      <author id="miquel-espla-gomis"><first>Miquel</first><last>Esplà</last></author>
       <author><first>Mikel</first><last>Forcada</last></author>
-      <author><first>Gema</first><last>Ramírez-Sánchez</last></author>
+      <author id="gema-ramirez-sanchez"><first>Gema</first><last>Ramírez-Sánchez</last></author>
       <author><first>Hieu</first><last>Hoang</last></author>
       <pages>118–119</pages>
       <url hash="efe5fb18">W19-6721</url>
@@ -15177,7 +15177,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Chao-Hong</first><last>Liu</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <author><first>Catarina</first><last>Silva</last></author>
-      <author><first>André</first><last>Martins</last></author>
+      <author id="andre-f-t-martins"><first>André</first><last>Martins</last></author>
       <pages>120–121</pages>
       <url hash="7cc9875a">W19-6722</url>
       <bibkey>liu-etal-2019-pivot</bibkey>
@@ -15191,7 +15191,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Rachel</first><last>Bawden</last></author>
       <author><first>Felipe</first><last>Sánchez-Martínez</last></author>
       <author><first>Mikel L.</first><last>Forcada</last></author>
-      <author><first>Miquel</first><last>Esplà-Gomis</last></author>
+      <author id="miquel-espla-gomis"><first>Miquel</first><last>Esplà-Gomis</last></author>
       <author><first>Víctor</first><last>Sánchez-Cartagena</last></author>
       <author><first>Juan Antonio</first><last>Pérez-Ortiz</last></author>
       <author><first>Wilker</first><last>Aziz</last></author>
@@ -15217,7 +15217,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Bharathi Raja</first><last>Chakravarthi</last></author>
       <author><first>Juan</first><last>Alonso</last></author>
       <author><first>Noe</first><last>Casas</last></author>
-      <author><first>Mihael</first><last>Arcan</last></author>
+      <author id="mihael-arcan"><first>Mihael</first><last>Arcan</last></author>
       <pages>125–133</pages>
       <url hash="6d0c8d8d">W19-6725</url>
       <bibkey>torregrosa-etal-2019-leveraging</bibkey>
@@ -15244,7 +15244,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="28">
       <title>Surveying the potential of using speech technologies for post-editing purposes in the context of international organizations: What do professional translators think?</title>
       <author><first>Jeevanthi</first><last>Liyanapathirana</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Bartolomé</first><last>Mesa-Lao</last></author>
       <pages>149–158</pages>
       <url hash="a660ef27">W19-6728</url>
@@ -15283,9 +15283,9 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Marek</first><last>Mazur</last></author>
       <author><first>Manuel</first><last>Herranz</last></author>
       <author><first>Alex</first><last>Helle</last></author>
-      <author><first>Gema</first><last>Ramírez-Sánchez</last></author>
+      <author id="gema-ramirez-sanchez"><first>Gema</first><last>Ramírez-Sánchez</last></author>
       <author><first>Víctor</first><last>Sánchez-Cartagena</last></author>
-      <author><first>Mārcis</first><last>Pinnis</last></author>
+      <author id="marcis-pinnis"><first>Mārcis</first><last>Pinnis</last></author>
       <author><first>Valters</first><last>Šics</last></author>
       <pages>179–185</pages>
       <url hash="95c43ae8">W19-6732</url>
@@ -15308,7 +15308,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="34">
       <title>Monolingual backtranslation in a medical speech translation system for diagnostic interviews - a <fixed-case>NMT</fixed-case> approach</title>
       <author><first>Jonathan</first><last>Mutal</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Johanna</first><last>Gerlach</last></author>
       <author><first>Paula</first><last>Estrella</last></author>
       <author><first>Hervé</first><last>Spechbach</last></author>
@@ -15333,12 +15333,12 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="37">
       <title>Incremental Adaptation of <fixed-case>NMT</fixed-case> for Professional Post-editors: A User Study</title>
       <author><first>Miguel</first><last>Domingo</last></author>
-      <author><first>Mercedes</first><last>García-Martínez</last></author>
-      <author><first>Álvaro</first><last>Peris</last></author>
+      <author id="mercedes-garcia-martinez"><first>Mercedes</first><last>García-Martínez</last></author>
+      <author id="alvaro-peris"><first>Álvaro</first><last>Peris</last></author>
       <author><first>Alexandre</first><last>Helle</last></author>
       <author><first>Amando</first><last>Estela</last></author>
       <author><first>Laurent</first><last>Bié</last></author>
-      <author><first>Francisco</first><last>Casacuberta</last></author>
+      <author id="francisco-casacuberta"><first>Francisco</first><last>Casacuberta</last></author>
       <author><first>Manuel</first><last>Herranz</last></author>
       <pages>219–227</pages>
       <url hash="8accfa30">W19-6737</url>
@@ -15363,9 +15363,9 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <booktitle>Proceedings of the 2nd Workshop on Technologies for MT of Low Resource Languages</booktitle>
       <url hash="8d3db337">W19-68</url>
       <editor><first>Alina</first><last>Karakanta</last></editor>
-      <editor><first>Atul Kr.</first><last>Ojha</last></editor>
+      <editor id="atul-kr-ojha"><first>Atul Kr.</first><last>Ojha</last></editor>
       <editor><first>Chao-Hong</first><last>Liu</last></editor>
-      <editor><first>Jonathan</first><last>Washington</last></editor>
+      <editor id="jonathan-washington"><first>Jonathan</first><last>Washington</last></editor>
       <editor><first>Nathaniel</first><last>Oco</last></editor>
       <editor><first>Surafel Melaku</first><last>Lakew</last></editor>
       <editor><first>Valentin</first><last>Malykh</last></editor>
@@ -15397,7 +15397,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     </paper>
     <paper id="3">
       <title>Workflows for kickstarting <fixed-case>RBMT</fixed-case> in virtually No-Resource Situation</title>
-      <author><first>Tommi A</first><last>Pirinen</last></author>
+      <author id="tommi-a-pirinen"><first>Tommi A</first><last>Pirinen</last></author>
       <pages>11–16</pages>
       <url hash="0a0c5abe">W19-6803</url>
       <bibkey>pirinen-2019-workflows</bibkey>
@@ -15406,15 +15406,15 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title>A Continuous Improvement Framework of Machine Translation for <fixed-case>S</fixed-case>hipibo-Konibo</title>
       <author><first>Héctor Erasmo Gómez</first><last>Montoya</last></author>
       <author><first>Kervy Dante Rivas</first><last>Rojas</last></author>
-      <author><first>Arturo</first><last>Oncevay</last></author>
+      <author id="arturo-oncevay"><first>Arturo</first><last>Oncevay</last></author>
       <pages>17–23</pages>
       <url hash="420188d3">W19-6804</url>
       <bibkey>montoya-etal-2019-continuous</bibkey>
     </paper>
     <paper id="5">
       <title>A free/open-source rule-based machine translation system for <fixed-case>C</fixed-case>rimean <fixed-case>T</fixed-case>atar to <fixed-case>T</fixed-case>urkish</title>
-      <author><first>Memduh</first><last>Gökırmak</last></author>
-      <author><first>Francis</first><last>Tyers</last></author>
+      <author id="memduh-gokirmak"><first>Memduh</first><last>Gökırmak</last></author>
+      <author id="francis-tyers"><first>Francis</first><last>Tyers</last></author>
       <author><first>Jonathan</first><last>Washington</last></author>
       <pages>24–31</pages>
       <url hash="9892f661">W19-6805</url>
@@ -15446,7 +15446,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="8">
       <title>Corpus Building for Low Resource Languages in the <fixed-case>DARPA</fixed-case> <fixed-case>LORELEI</fixed-case> Program</title>
       <author><first>Jennifer</first><last>Tracey</last></author>
-      <author><first>Stephanie</first><last>Strassel</last></author>
+      <author id="stephanie-strassel"><first>Stephanie</first><last>Strassel</last></author>
       <author><first>Ann</first><last>Bies</last></author>
       <author><first>Zhiyi</first><last>Song</last></author>
       <author><first>Michael</first><last>Arrigo</last></author>
@@ -15465,9 +15465,9 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Bharathi Raja</first><last>Chakravarthi</last></author>
       <author><first>Ruba</first><last>Priyadharshini</last></author>
       <author><first>Bernardo</first><last>Stearns</last></author>
-      <author><first>Arun</first><last>Jayapal</last></author>
+      <author id="arun-kumar-jayapal"><first>Arun</first><last>Jayapal</last></author>
       <author><first>Sridevy</first><last>S</last></author>
-      <author><first>Mihael</first><last>Arcan</last></author>
+      <author id="mihael-arcan"><first>Mihael</first><last>Arcan</last></author>
       <author><first>Manel</first><last>Zarrouk</last></author>
       <author><first>John P</first><last>McCrae</last></author>
       <pages>56–63</pages>
@@ -15478,7 +15478,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title>A3-108 Machine Translation System for <fixed-case>L</fixed-case>o<fixed-case>R</fixed-case>es<fixed-case>MT</fixed-case> 2019</title>
       <author><first>Saumitra</first><last>Yadav</last></author>
       <author><first>Vandan</first><last>Mujadia</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <pages>64–67</pages>
       <url hash="f6d7f355">W19-6810</url>
       <bibkey>yadav-etal-2019-a3</bibkey>
@@ -15504,8 +15504,8 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <url hash="ca14cb63">W19-69</url>
       <editor><first>Teresa</first><last>Lynn</last></editor>
       <editor><first>Delyth</first><last>Prys</last></editor>
-      <editor><first>Colin</first><last>Batchelor</last></editor>
-      <editor><first>Francis</first><last>Tyers</last></editor>
+      <editor id="colin-batchelor"><first>Colin</first><last>Batchelor</last></editor>
+      <editor id="francis-tyers"><first>Francis</first><last>Tyers</last></editor>
       <publisher>European Association for Machine Translation</publisher>
       <address>Dublin, Ireland</address>
       <month>August</month>
@@ -15518,7 +15518,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     </frontmatter>
     <paper id="1">
       <title>Unsupervised multi-word term recognition in <fixed-case>W</fixed-case>elsh</title>
-      <author><first>Irena</first><last>Spasić</last></author>
+      <author id="irena-spasic"><first>Irena</first><last>Spasić</last></author>
       <author><first>David</first><last>Owen</last></author>
       <author><first>Dawn</first><last>Knight</last></author>
       <author><first>Andreas</first><last>Artemiou</last></author>
@@ -15566,7 +15566,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     </paper>
     <paper id="7">
       <title>Adapting Term Recognition to an Under-Resourced Language: the Case of <fixed-case>I</fixed-case>rish</title>
-      <author><first>John P.</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></author>
       <author><first>Adrian</first><last>Doyle</last></author>
       <pages>48–57</pages>
       <url hash="80c75fd5">W19-6907</url>
@@ -15593,7 +15593,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="10">
       <title>A Character-Level <fixed-case>LSTM</fixed-case> Network Model for Tokenizing the <fixed-case>O</fixed-case>ld <fixed-case>I</fixed-case>rish text of the <fixed-case>W</fixed-case>ürzburg Glosses on the <fixed-case>P</fixed-case>auline Epistles</title>
       <author><first>Adrian</first><last>Doyle</last></author>
-      <author><first>John P.</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></author>
       <author><first>Clodagh</first><last>Downey</last></author>
       <pages>70–79</pages>
       <url hash="2184b019">W19-6910</url>
@@ -15614,7 +15614,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <booktitle>Proceedings of the Second MEMENTO workshop on Modelling Parameters of Cognitive Effort in Translation Production</booktitle>
       <url hash="a3dc8ceb">W19-70</url>
       <editor><first>Michael</first><last>Carl</last></editor>
-      <editor><first>Silvia</first><last>Hansen-Schirra</last></editor>
+      <editor id="silvia-hansen-schirra"><first>Silvia</first><last>Hansen-Schirra</last></editor>
       <publisher>European Association for Machine Translation</publisher>
       <address>Dublin, Ireland</address>
       <month>August</month>
@@ -15635,7 +15635,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="2">
       <title>Modelling word translation entropy and syntactic equivalence with machine learning</title>
       <author><first>Bram</first><last>Vanroy</last></author>
-      <author><first>Orphée</first><last>De Clercq</last></author>
+      <author id="orphee-de-clercq"><first>Orphée</first><last>De Clercq</last></author>
       <author><first>Lieve</first><last>Macken</last></author>
       <pages>3–4</pages>
       <url hash="e48ef07d">W19-7002</url>
@@ -15652,7 +15652,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="4">
       <title>Translation Quality and Effort Prediction in Professional Machine Translation Post-Editing</title>
       <author><first>Jennifer</first><last>Vardaro</last></author>
-      <author><first>Moritz</first><last>Schaeffer</last></author>
+      <author id="moritz-schaeffer"><first>Moritz</first><last>Schaeffer</last></author>
       <author><first>Silvia</first><last>Hansen-Schirra</last></author>
       <pages>7–8</pages>
       <url hash="75ab2def">W19-7004</url>
@@ -15702,7 +15702,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Anke</first><last>Tardel</last></author>
       <author><first>Silvia</first><last>Hansen-Schirra</last></author>
       <author><first>Silke</first><last>Gutermuth</last></author>
-      <author><first>Moritz</first><last>Schaeffer</last></author>
+      <author id="moritz-schaeffer"><first>Moritz</first><last>Schaeffer</last></author>
       <pages>19–20</pages>
       <url hash="d490e839">W19-7010</url>
       <bibkey>tardel-etal-2019-automatization</bibkey>
@@ -15728,7 +15728,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <meta>
       <booktitle>Proceedings of the Second Workshop on Multilingualism at the Intersection of Knowledge Bases and Machine Translation</booktitle>
       <url hash="5db98a78">W19-71</url>
-      <editor><first>Mihael</first><last>Arcan</last></editor>
+      <editor id="mihael-arcan"><first>Mihael</first><last>Arcan</last></editor>
       <editor><first>Marco</first><last>Turchi</last></editor>
       <editor><first>Jinhua</first><last>Du</last></editor>
       <editor><first>Dimitar</first><last>Shterionov</last></editor>
@@ -15747,7 +15747,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title><fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Gloss Translation for Under-resourced Languages using Multilingual Neural Machine Translation</title>
       <author><first>Bharathi Raja</first><last>Chakravarthi</last></author>
       <author><first>Mihael</first><last>Arcan</last></author>
-      <author><first>John P.</first><last>McCrae</last></author>
+      <author id="john-philip-mccrae"><first>John P.</first><last>McCrae</last></author>
       <pages>1–7</pages>
       <url hash="cfd12103">W19-7101</url>
       <bibkey>chakravarthi-etal-2019-wordnet</bibkey>
@@ -15756,8 +15756,8 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title>Leveraging <fixed-case>SNOMED</fixed-case> <fixed-case>CT</fixed-case> terms and relations for machine translation of clinical texts from <fixed-case>B</fixed-case>asque to <fixed-case>S</fixed-case>panish</title>
       <author><first>Xabier</first><last>Soto</last></author>
       <author><first>Olatz</first><last>Perez-De-Viñaspre</last></author>
-      <author><first>Maite</first><last>Oronoz</last></author>
-      <author><first>Gorka</first><last>Labaka</last></author>
+      <author id="maite-oronoz"><first>Maite</first><last>Oronoz</last></author>
+      <author id="gorka-labaka"><first>Gorka</first><last>Labaka</last></author>
       <pages>8–18</pages>
       <url hash="a0bf8fc2">W19-7102</url>
       <bibkey>soto-etal-2019-leveraging</bibkey>
@@ -15785,7 +15785,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title>Hybrid Data-Model Parallel Training for Sequence-to-Sequence Recurrent Neural Network Machine Translation</title>
       <author><first>Junya</first><last>Ono</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>4–12</pages>
       <url hash="1a84c146">W19-7201</url>
       <bibkey>ono-etal-2019-hybrid</bibkey>
@@ -15824,7 +15824,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <booktitle>Proceedings of the Qualities of Literary Machine Translation</booktitle>
       <url hash="87d46cfe">W19-73</url>
       <editor><first>James</first><last>Hadley</last></editor>
-      <editor><first>Maja</first><last>Popović</last></editor>
+      <editor id="maja-popovic"><first>Maja</first><last>Popović</last></editor>
       <editor><first>Haithem</first><last>Afli</last></editor>
       <editor><first>Andy</first><last>Way</last></editor>
       <publisher>European Association for Machine Translation</publisher>
@@ -15840,8 +15840,8 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="1">
       <title>Neural Machine Translation of Literary Texts from <fixed-case>E</fixed-case>nglish to <fixed-case>S</fixed-case>lovene</title>
       <author><first>Taja</first><last>Kuzman</last></author>
-      <author><first>Špela</first><last>Vintar</last></author>
-      <author><first>Mihael</first><last>Arčan</last></author>
+      <author id="spela-vintar"><first>Špela</first><last>Vintar</last></author>
+      <author id="mihael-arcan"><first>Mihael</first><last>Arčan</last></author>
       <pages>1–9</pages>
       <url hash="fbbba106">W19-7301</url>
       <bibkey>kuzman-etal-2019-neural</bibkey>
@@ -15904,7 +15904,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="1">
       <title><fixed-case>T</fixed-case>witter Bot Detection using Diversity Measures</title>
       <author><first>Dijana</first><last>Kosmajac</last></author>
-      <author><first>Vlado</first><last>Keselj</last></author>
+      <author id="vlado-keselj"><first>Vlado</first><last>Keselj</last></author>
       <pages>1–8</pages>
       <url hash="6f2f1882">W19-7401</url>
       <bibkey>kosmajac-keselj-2019-twitter</bibkey>
@@ -15943,7 +15943,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <author><first>Shobhit</first><last>Jain</last></author>
       <author><first>Sravan Babu</first><last>Bodapati</last></author>
       <author><first>Ramesh</first><last>Nallapati</last></author>
-      <author><first>Anima</first><last>Anandkumar</last></author>
+      <author id="animashree-anandkumar"><first>Anima</first><last>Anandkumar</last></author>
       <pages>34–41</pages>
       <url hash="6bcc8135">W19-7405</url>
       <bibkey>jain-etal-2019-multi</bibkey>
@@ -16164,11 +16164,11 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     </paper>
     <paper id="9">
       <title>Introduction to <fixed-case>S</fixed-case>anskrit Shabdamitra: An Educational Application of <fixed-case>S</fixed-case>anskrit <fixed-case>W</fixed-case>ordnet</title>
-      <author><first>Malhar</first><last>Kulkarni</last></author>
+      <author id="malhar-kulkarni"><first>Malhar</first><last>Kulkarni</last></author>
       <author><first>Nilesh</first><last>Joshi</last></author>
       <author><first>Sayali</first><last>Khare</last></author>
       <author><first>Hanumant</first><last>Redkar</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <pages>117–133</pages>
       <url hash="b49448f5">W19-7509</url>
       <bibkey>kulkarni-etal-2019-introduction</bibkey>
@@ -16187,8 +16187,8 @@ One of the references was wrong therefore it is corrected to cite the appropriat
       <title>Utilizing Word Embeddings based Features for Phylogenetic Tree Generation of <fixed-case>S</fixed-case>anskrit Texts</title>
       <author><first>Diptesh</first><last>Kanojia</last></author>
       <author><first>Abhijeet</first><last>Dubey</last></author>
-      <author><first>Malhar</first><last>Kulkarni</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="malhar-kulkarni"><first>Malhar</first><last>Kulkarni</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <author><first>Gholemreza</first><last>Haffari</last></author>
       <pages>152–165</pages>
       <url hash="1cdfd59b">W19-7511</url>
@@ -16197,8 +16197,8 @@ One of the references was wrong therefore it is corrected to cite the appropriat
     <paper id="12">
       <title>An Introduction to the Textual History Tool</title>
       <author><first>Diptesh</first><last>Kanojia</last></author>
-      <author><first>Malhar</first><last>Kulkarni</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="malhar-kulkarni"><first>Malhar</first><last>Kulkarni</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <author><first>Eivind</first><last>Kahrs</last></author>
       <pages>166–180</pages>
       <url hash="8e71171d">W19-7512</url>
@@ -16243,7 +16243,7 @@ Participants of the tutorial will get a clear understanding of Neural Model type
     </paper>
     <paper id="2">
       <title>Challenge Test Sets for <fixed-case>MT</fixed-case> Evaluation</title>
-      <author><first>Maja</first><last>Popović</last></author>
+      <author id="maja-popovic"><first>Maja</first><last>Popović</last></author>
       <author><first>Sheila</first><last>Castilho</last></author>
       <abstract>Most of the test sets used for the evaluation of MT systems reflect the frequency distribution of different phenomena found in naturally occurring data (”standard” or ”natural” test sets). However, to better understand particular strengths and weaknesses of MT systems, especially those based on neural networks, it is necessary to apply more focused evaluation procedures. Therefore, another type of test sets (”challenge” test sets, also called ”test suites”) is being increasingly employed in order to highlight points of difficulty which are relevant to model development, training, or using of the given system. This tutorial will be useful for anyone (researchers, developers, users, translators) interested in detailed evaluation and getting a better understanding of machine translation (MT) systems and models. The attendees will learn about the motivation and linguistic background of challenge test sets and a range of testing possibilities applied to the state-of-the-art MT systems, as well as a number of practical aspects and challenges.</abstract>
       <attachment type="presentation" hash="12760e39">W19-7602.Presentation.pdf</attachment>
@@ -16292,7 +16292,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
     </paper>
     <paper id="2">
       <title><fixed-case>S</fixed-case>yntax<fixed-case>F</fixed-case>est 2019 Invited talk - Transferring <fixed-case>NLP</fixed-case> models across languages and domains</title>
-      <author><first>Barbara</first><last>Plank</last></author>
+      <author id="barbara-plank"><first>Barbara</first><last>Plank</last></author>
       <pages>2–2</pages>
       <url hash="afc73caf">W19-7702</url>
       <doi>10.18653/v1/W19-7702</doi>
@@ -16313,7 +16313,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
     <paper id="4">
       <title>Reflexives in <fixed-case>C</fixed-case>zech from a Dependency Perspective</title>
       <author><first>Vaclava</first><last>Kettnerova</last></author>
-      <author><first>Marketa</first><last>Lopatkova</last></author>
+      <author id="marketa-lopatkova"><first>Marketa</first><last>Lopatkova</last></author>
       <pages>14–25</pages>
       <url hash="b855954d">W19-7704</url>
       <doi>10.18653/v1/W19-7704</doi>
@@ -16381,7 +16381,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
       <author><first>Eleni</first><last>Metheniti</last></author>
       <author><first>Pomi</first><last>Park</last></author>
       <author><first>Kristina</first><last>Kolesova</last></author>
-      <author><first>Günter</first><last>Neumann</last></author>
+      <author id="gunter-neumann"><first>Günter</first><last>Neumann</last></author>
       <pages>100–111</pages>
       <url hash="89b03040">W19-7712</url>
       <doi>10.18653/v1/W19-7712</doi>
@@ -16428,7 +16428,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
     <paper id="17">
       <title>Towards Deep <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies</title>
       <author><first>Kira</first><last>Droganova</last></author>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <pages>144–152</pages>
       <url hash="c6764aef">W19-7717</url>
       <doi>10.18653/v1/W19-7717</doi>
@@ -16438,8 +16438,8 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
       <title>Delimiting Adverbial Meanings. A corpus-based comparative study on <fixed-case>C</fixed-case>zech spatial prepositions and their <fixed-case>E</fixed-case>nglish equivalents</title>
       <author><first>Marie</first><last>Mikulová</last></author>
       <author><first>Veronika</first><last>Kolářová</last></author>
-      <author><first>Jarmila</first><last>Panevová</last></author>
-      <author><first>Eva</first><last>Hajičová</last></author>
+      <author id="jarmila-panevova"><first>Jarmila</first><last>Panevová</last></author>
+      <author id="eva-hajicova"><first>Eva</first><last>Hajičová</last></author>
       <pages>153–159</pages>
       <url hash="69b60630">W19-7718</url>
       <doi>10.18653/v1/W19-7718</doi>
@@ -16448,7 +16448,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
     <paper id="19">
       <title>A <fixed-case>S</fixed-case>panish <fixed-case>E</fixed-case>-dictionary of Collocations</title>
       <author><first>Maria Auxiliadora</first><last>Barrios Rodriguez</last></author>
-      <author><first>Igor</first><last>Boguslavsky</last></author>
+      <author id="igor-boguslavsky"><first>Igor</first><last>Boguslavsky</last></author>
       <pages>160–167</pages>
       <url hash="afb0a090">W19-7719</url>
       <doi>10.18653/v1/W19-7719</doi>
@@ -16496,7 +16496,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
     <paper id="24">
       <title><fixed-case>P</fixed-case>āṇinian Syntactico-Semantic Relation Labels</title>
       <author><first>Amba</first><last>Kulkarni</last></author>
-      <author><first>Dipti</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti</first><last>Sharma</last></author>
       <pages>198–208</pages>
       <url hash="131c2b9a">W19-7724</url>
       <doi>10.18653/v1/W19-7724</doi>
@@ -16505,7 +16505,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
     <paper id="25">
       <title>Experiments on human incremental parsing</title>
       <author><first>Leonid</first><last>Mityushin</last></author>
-      <author><first>Leonid</first><last>Iomdin</last></author>
+      <author id="leonid-iomdin"><first>Leonid</first><last>Iomdin</last></author>
       <pages>209–215</pages>
       <url hash="ba946dcf">W19-7725</url>
       <doi>10.18653/v1/W19-7725</doi>
@@ -16526,10 +16526,10 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
     <meta>
       <booktitle>Proceedings of the 18th International Workshop on Treebanks and Linguistic Theories (TLT, SyntaxFest 2019)</booktitle>
       <url hash="e7045a46">W19-78</url>
-      <editor><first>Marie</first><last>Candito</last></editor>
+      <editor id="marie-candito"><first>Marie</first><last>Candito</last></editor>
       <editor><first>Kilian</first><last>Evang</last></editor>
       <editor><first>Stephan</first><last>Oepen</last></editor>
-      <editor><first>Djamé</first><last>Seddah</last></editor>
+      <editor id="djame-seddah"><first>Djamé</first><last>Seddah</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Paris, France</address>
       <month>August</month>
@@ -16583,10 +16583,10 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
     </paper>
     <paper id="5">
       <title>Parallel Dependency Treebank Annotated with Interlinked Verbal Synonym Classes and Roles</title>
-      <author><first>Zdeňka</first><last>Urešová</last></author>
-      <author><first>Eva</first><last>Fučíková</last></author>
-      <author><first>Eva</first><last>Hajičová</last></author>
-      <author><first>Jan</first><last>Hajič</last></author>
+      <author id="zdenka-uresova"><first>Zdeňka</first><last>Urešová</last></author>
+      <author id="eva-fucikova"><first>Eva</first><last>Fučíková</last></author>
+      <author id="eva-hajicova"><first>Eva</first><last>Hajičová</last></author>
+      <author id="jan-hajic"><first>Jan</first><last>Hajič</last></author>
       <pages>38–50</pages>
       <url hash="907e00ba">W19-7805</url>
       <doi>10.18653/v1/W19-7805</doi>
@@ -16594,7 +16594,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
     </paper>
     <paper id="6">
       <title>Ordering of Adverbials of Time and Place in Grammars and in an Annotated <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>zech Parallel Corpus</title>
-      <author><first>Eva</first><last>Hajičová</last></author>
+      <author id="eva-hajicova"><first>Eva</first><last>Hajičová</last></author>
       <author><first>Jiří</first><last>Mírovský</last></author>
       <author><first>Kateřina</first><last>Rysová</last></author>
       <pages>51–60</pages>
@@ -16621,7 +16621,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
     </paper>
     <paper id="9">
       <title>Challenges of Annotating a Code-Switching Treebank</title>
-      <author><first>Özlem</first><last>Çetinoğlu</last></author>
+      <author id="ozlem-cetinoglu"><first>Özlem</first><last>Çetinoğlu</last></author>
       <author><first>Çağrı</first><last>Çöltekin</last></author>
       <pages>82–90</pages>
       <url hash="286de035">W19-7809</url>
@@ -16631,7 +16631,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
     <paper id="10">
       <title>Dependency Parser for <fixed-case>B</fixed-case>engali-<fixed-case>E</fixed-case>nglish Code-Mixed Data enhanced with a Synthetic Treebank</title>
       <author><first>Urmi</first><last>Ghosh</last></author>
-      <author><first>Dipti</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti</first><last>Sharma</last></author>
       <author><first>Simran</first><last>Khanuja</last></author>
       <pages>91–99</pages>
       <url hash="0a198c21">W19-7810</url>
@@ -16692,7 +16692,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
       <title>Challenges of language change and variation: towards an extended treebank of Medieval <fixed-case>F</fixed-case>rench</title>
       <author><first>Mathilde</first><last>Regnault</last></author>
       <author><first>Sophie</first><last>Prévost</last></author>
-      <author><first>Eric</first><last>Villemonte de la Clergerie</last></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Eric</first><last>Villemonte de la Clergerie</last></author>
       <pages>144–150</pages>
       <url hash="d9487ab9">W19-7816</url>
       <doi>10.18653/v1/W19-7816</doi>
@@ -16777,7 +16777,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
     </paper>
     <paper id="8">
       <title>Extracting out of the subject in <fixed-case>F</fixed-case>rench: experimental evidence</title>
-      <author><first>Anne</first><last>Abeillé</last></author>
+      <author id="anne-abeille"><first>Anne</first><last>Abeillé</last></author>
       <author><first>Elodie</first><last>Winckel</last></author>
       <pages>68–74</pages>
       <url hash="ae65bb82">W19-7908</url>
@@ -16807,7 +16807,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
     <paper id="11">
       <title>Dependency Length Minimization vs. Word Order Constraints: An Empirical Study On 55 Treebanks</title>
       <author><first>Xiang</first><last>Yu</last></author>
-      <author><first>Agnieszka</first><last>Falenska</last></author>
+      <author id="agnieszka-falenska"><first>Agnieszka</first><last>Falenska</last></author>
       <author><first>Jonas</first><last>Kuhn</last></author>
       <pages>89–97</pages>
       <url hash="484cc200">W19-7911</url>
@@ -16857,7 +16857,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
       <booktitle>Proceedings of the Third Workshop on Universal Dependencies (UDW, SyntaxFest 2019)</booktitle>
       <url hash="bec2ab47">W19-80</url>
       <editor><first>Alexandre</first><last>Rademaker</last></editor>
-      <editor><first>Francis</first><last>Tyers</last></editor>
+      <editor id="francis-tyers"><first>Francis</first><last>Tyers</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Paris, France</address>
       <month>August</month>
@@ -16891,7 +16891,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
     </paper>
     <paper id="3">
       <title>Developing <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies for <fixed-case>W</fixed-case>olof</title>
-      <author><first>Cheikh Bamba</first><last>Dione</last></author>
+      <author id="cheikh-m-bamba-dione"><first>Cheikh Bamba</first><last>Dione</last></author>
       <pages>12–23</pages>
       <url hash="4d8ded6e">W19-8003</url>
       <doi>10.18653/v1/W19-8003</doi>
@@ -16900,7 +16900,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
     <paper id="4">
       <title>Improving <fixed-case>UD</fixed-case> processing via satellite resources for morphology</title>
       <author><first>Kaja</first><last>Dobrovoljc</last></author>
-      <author><first>Tomaž</first><last>Erjavec</last></author>
+      <author id="tomaz-erjavec"><first>Tomaž</first><last>Erjavec</last></author>
       <author><first>Nikola</first><last>Ljubešić</last></author>
       <pages>24–34</pages>
       <url hash="b2fefe52">W19-8004</url>
@@ -16970,7 +16970,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
     </paper>
     <paper id="12">
       <title>Recursive <fixed-case>LSTM</fixed-case> Tree Representation for Arc-Standard Transition-Based Dependency Parsing</title>
-      <author><first>Mohab</first><last>Elkaref</last></author>
+      <author id="mohab-el-karef"><first>Mohab</first><last>Elkaref</last></author>
       <author><first>Bernd</first><last>Bohnet</last></author>
       <pages>101–107</pages>
       <url hash="4e35ef9d">W19-8012</url>
@@ -16984,7 +16984,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
       <author><first>Şaziye</first><last>Betül Özateş</last></author>
       <author><first>Balkız</first><last>Öztürk Başaran</last></author>
       <author><first>Tunga</first><last>Güngör</last></author>
-      <author><first>Arzucan</first><last>Özgür</last></author>
+      <author id="arzucan-ozgur"><first>Arzucan</first><last>Özgür</last></author>
       <pages>108–115</pages>
       <url hash="2141ec43">W19-8013</url>
       <doi>10.18653/v1/W19-8013</doi>
@@ -16993,7 +16993,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
     <paper id="14">
       <title>Towards transferring <fixed-case>B</fixed-case>ulgarian Sentences with Elliptical Elements to <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies: issues and strategies</title>
       <author><first>Petya</first><last>Osenova</last></author>
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <pages>116–123</pages>
       <url hash="c2325743">W19-8014</url>
       <doi>10.18653/v1/W19-8014</doi>
@@ -17011,7 +17011,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
     </paper>
     <paper id="16">
       <title>Building minority dependency treebanks, dictionaries and computational grammars at the same time—an experiment in <fixed-case>K</fixed-case>arelian treebanking</title>
-      <author><first>Tommi A</first><last>Pirinen</last></author>
+      <author id="tommi-a-pirinen"><first>Tommi A</first><last>Pirinen</last></author>
       <pages>132–136</pages>
       <url hash="ac8710bb">W19-8016</url>
       <doi>10.18653/v1/W19-8016</doi>
@@ -17026,9 +17026,9 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
       <editor><first>Vera</first><last>Demberg</last></editor>
       <editor><first>Chandra</first><last>Khatri</last></editor>
       <editor><first>Abhinav</first><last>Rastogi</last></editor>
-      <editor><first>Donia</first><last>Scott</last></editor>
-      <editor><first>Marilyn</first><last>Walker</last></editor>
-      <editor><first>Michael</first><last>White</last></editor>
+      <editor id="donia-scott"><first>Donia</first><last>Scott</last></editor>
+      <editor id="marilyn-walker"><first>Marilyn</first><last>Walker</last></editor>
+      <editor id="michael-white"><first>Michael</first><last>White</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Tokyo, Japan</address>
       <month>November</month>
@@ -17172,7 +17172,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
     <meta>
       <booktitle>Proceedings of the 1st Workshop on Interactive Natural Language Technology for Explainable Artificial Intelligence (NL4XAI 2019)</booktitle>
       <url hash="523120b0">W19-84</url>
-      <editor><first>Jose M.</first><last>Alonso</last></editor>
+      <editor id="jose-m-alonso"><first>Jose M.</first><last>Alonso</last></editor>
       <editor><first>Alejandro</first><last>Catala</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <year>2019</year>
@@ -17202,8 +17202,8 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
     </paper>
     <paper id="3">
       <title>A Survey of Explainable <fixed-case>AI</fixed-case> Terminology</title>
-      <author><first>Miruna-Adriana</first><last>Clinciu</last></author>
-      <author><first>Helen</first><last>Hastie</last></author>
+      <author id="miruna-clinciu"><first>Miruna-Adriana</first><last>Clinciu</last></author>
+      <author id="helen-hastie"><first>Helen</first><last>Hastie</last></author>
       <pages>8–13</pages>
       <url hash="12323ee7">W19-8403</url>
       <doi>10.18653/v1/W19-8403</doi>
@@ -17255,7 +17255,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
       <booktitle>Proceedings of the Second International Workshop on Resources and Tools for Derivational Morphology</booktitle>
       <url hash="3827a648">W19-85</url>
       <editor><first>Magda</first><last>Ševčíková</last></editor>
-      <editor><first>Zdeněk</first><last>Žabokrtský</last></editor>
+      <editor id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></editor>
       <editor><first>Eleonora</first><last>Litta</last></editor>
       <editor><first>Marco</first><last>Passarotti</last></editor>
       <publisher>Charles University, Faculty of Mathematics and Physics, Institute of Formal and Applied Linguistics</publisher>
@@ -17277,7 +17277,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
     </paper>
     <paper id="2">
       <title><fixed-case>P</fixed-case>ara<fixed-case>D</fixed-case>is and Démonette: From Theory to Resources for Derivational Paradigms</title>
-      <author><first>Fiammetta</first><last>Namer</last></author>
+      <author id="fiammetta-namer"><first>Fiammetta</first><last>Namer</last></author>
       <author><first>Nabil</first><last>Hathout</last></author>
       <pages>5--14</pages>
       <url hash="8aacd0b4">W19-8502</url>
@@ -17287,7 +17287,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
       <title>Semantic descriptions of <fixed-case>F</fixed-case>rench derivational relations in a families-and-paradigms framework</title>
       <author><first>Daniele</first><last>Sanacore</last></author>
       <author><first>Nabil</first><last>Hathout</last></author>
-      <author><first>Fiammetta</first><last>Namer</last></author>
+      <author id="fiammetta-namer"><first>Fiammetta</first><last>Namer</last></author>
       <pages>15--24</pages>
       <url hash="5114adc1">W19-8503</url>
       <bibkey>sanacore-etal-2019-semantic</bibkey>
@@ -17387,7 +17387,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
   <volume id="86" ingest-date="2019-11-30" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 12th International Conference on Natural Language Generation</booktitle>
-      <editor><first>Kees</first><last>van Deemter</last></editor>
+      <editor id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></editor>
       <editor><first>Chenghua</first><last>Lin</last></editor>
       <editor><first>Hiroya</first><last>Takamura</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
@@ -17474,7 +17474,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
       <title>Computational Argumentation Synthesis as a Language Modeling Task</title>
       <author><first>Roxanne</first><last>El Baff</last></author>
       <author><first>Henning</first><last>Wachsmuth</last></author>
-      <author><first>Khalid</first><last>Al Khatib</last></author>
+      <author id="khalid-al-khatib"><first>Khalid</first><last>Al Khatib</last></author>
       <author><first>Manfred</first><last>Stede</last></author>
       <author><first>Benno</first><last>Stein</last></author>
       <pages>54–64</pages>
@@ -17493,7 +17493,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
       <author><first>Behnam</first><last>Hedayatnia</last></author>
       <author><first>Anu</first><last>Venkatesh</last></author>
       <author><first>Raefer</first><last>Gabriel</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></author>
       <pages>65–75</pages>
       <abstract>Encoder-decoder based neural architectures serve as the basis of state-of-the-art approaches in end-to-end open domain dialog systems. Since most of such systems are trained with a maximum likelihood (MLE) objective they suffer from issues such as lack of generalizability and the generic response problem, i.e., a system response that can be an answer to a large number of user utterances, e.g., “Maybe, I don’t know.” Having explicit feedback on the relevance and interestingness of a system response at each turn can be a useful signal for mitigating such issues and improving system quality by selecting responses from different approaches. Towards this goal, we present a system that evaluates chatbot responses at each dialog turn for coherence and engagement. Our system provides explicit turn-level dialog quality feedback, which we show to be highly correlated with human evaluation. To show that incorporating this feedback in the neural response generation models improves dialog quality, we present two different and complementary mechanisms to incorporate explicit feedback into a neural response generation model: reranking and direct modification of the loss function during training. Our studies show that a response generation model that incorporates these combined feedback mechanisms produce more engaging and coherent responses in an open-domain spoken dialog setting, significantly improving the response quality using both automatic and human evaluation.</abstract>
       <url hash="50fc1a18">W19-8608</url>
@@ -17527,7 +17527,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
       <author><first>Jinfeng</first><last>Rao</last></author>
       <author><first>Kartikeya</first><last>Upasani</last></author>
       <author><first>Anusha</first><last>Balakrishnan</last></author>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <author><first>Anuj</first><last>Kumar</last></author>
       <author><first>Rajen</first><last>Subba</last></author>
       <pages>95–100</pages>
@@ -17550,7 +17550,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
       <title>Neural Question Generation using Interrogative Phrases</title>
       <author><first>Yuichi</first><last>Sasazawa</last></author>
       <author><first>Sho</first><last>Takase</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <pages>106–111</pages>
       <abstract>Question Generation (QG) is the task of generating questions from a given passage. One of the key requirements of QG is to generate a question such that it results in a target answer. Previous works used a target answer to obtain a desired question. However, we also want to specify how to ask questions and improve the quality of generated questions. In this study, we explore the use of interrogative phrases as additional sources to control QG. By providing interrogative phrases, we expect that QG can generate a more reliable sequence of words subsequent to an interrogative phrase. We present a baseline sequence-to-sequence model with the attention, copy, and coverage mechanisms, and show that the simple baseline achieves state-of-the-art performance. The experiments demonstrate that interrogative phrases contribute to improving the performance of QG. In addition, we report the superiority of using interrogative phrases in human evaluation. Finally, we show that a question answering system can provide target answers more correctly when the questions are generated with interrogative phrases.</abstract>
       <url hash="169a0911">W19-8613</url>
@@ -17571,7 +17571,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
     <paper id="15">
       <title><fixed-case>M</fixed-case>in<fixed-case>W</fixed-case>iki<fixed-case>S</fixed-case>plit: A Sentence Splitting Corpus with Minimal Propositions</title>
       <author><first>Christina</first><last>Niklaus</last></author>
-      <author><first>André</first><last>Freitas</last></author>
+      <author id="andre-freitas"><first>André</first><last>Freitas</last></author>
       <author><first>Siegfried</first><last>Handschuh</last></author>
       <pages>118–123</pages>
       <abstract>We compiled a new sentence splitting corpus that is composed of 203K pairs of aligned complex source and simplified target sentences. Contrary to previously proposed text simplification corpora, which contain only a small number of split examples, we present a dataset where each input sentence is broken down into a set of minimal propositions, i.e. a sequence of sound, self-contained utterances with each of them presenting a minimal semantic unit that cannot be further decomposed into meaningful propositions. This corpus is useful for developing sentence splitting approaches that learn how to transform sentences with a complex linguistic structure into a fine-grained representation of short sentences that present a simple and more regular structure which is easier to process for downstream applications and thus facilitates and improves their performance.</abstract>
@@ -17597,7 +17597,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
       <title><fixed-case>KPT</fixed-case>imes: A Large-Scale Dataset for Keyphrase Generation on News Documents</title>
       <author><first>Ygor</first><last>Gallina</last></author>
       <author><first>Florian</first><last>Boudin</last></author>
-      <author><first>Beatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Beatrice</first><last>Daille</last></author>
       <pages>130–135</pages>
       <abstract>Keyphrase generation is the task of predicting a set of lexical units that conveys the main content of a source text. Existing datasets for keyphrase generation are only readily available for the scholarly domain and include non-expert annotations. In this paper we present KPTimes, a large-scale dataset of news texts paired with editor-curated keyphrases. Exploring the dataset, we show how editors tag documents, and how their annotations differ from those found in existing datasets. We also train and evaluate state-of-the-art neural keyphrase generation models on KPTimes to gain insights on how well they perform on the news domain. The dataset is available online at <url>https://github.com/ygorg/KPTimes</url>.</abstract>
       <url hash="dd8565e2">W19-8617</url>
@@ -17607,7 +17607,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
     <paper id="18">
       <title>Sketch Me if You Can: Towards Generating Detailed Descriptions of Object Shape by Grounding in Images and Drawings</title>
       <author><first>Ting</first><last>Han</last></author>
-      <author><first>Sina</first><last>Zarrieß</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
       <pages>136–140</pages>
       <abstract>A lot of recent work in Language &amp; Vision has looked at generating descriptions or referring expressions for objects in scenes of real-world images, though focusing mostly on relatively simple language like object names, color and location attributes (e.g., brown chair on the left). This paper presents work on Draw-and-Tell, a dataset of detailed descriptions for common objects in images where annotators have produced fine-grained attribute-centric expressions distinguishing a target object from a range of similar objects. Additionally, the dataset comes with hand-drawn sketches for each object. As Draw-and-Tell is medium-sized and contains a rich vocabulary, it constitutes an interesting challenge for CNN-LSTM architectures used in state-of-the-art image captioning models. We explore whether the additional modality given through sketches can help such a model to learn to accurately ground detailed language referring expressions to object shapes. Our results are encouraging.</abstract>
       <url hash="0764949b">W19-8618</url>
@@ -17619,7 +17619,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
       <author><first>Feng</first><last>Nie</last></author>
       <author><first>Jinpeng</first><last>Wang</last></author>
       <author><first>Rong</first><last>Pan</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <pages>141–146</pages>
       <abstract>Data-to-text generation aims to generate descriptions given a structured input data (i.e., a table with multiple records). Existing neural methods for encoding input data can be divided into two categories: a) pooling based encoders which ignore dependencies between input records or b) recurrent encoders which model only sequential dependencies between input records. In our investigation, although the recurrent encoder generally outperforms the pooling based encoder by learning the sequential dependencies, it is sensitive to the order of the input records (i.e., performance decreases when injecting the random shuffling noise over input data). To overcome this problem, we propose to adopt the self-attention mechanism to learn dependencies between arbitrary input records. Experimental results show the proposed method achieves comparable results and remains stable under random shuffling over input data.</abstract>
       <url hash="84813a34">W19-8619</url>
@@ -17642,7 +17642,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
     <paper id="21">
       <title>Tell Me More: A Dataset of Visual Scene Description Sequences</title>
       <author><first>Nikolai</first><last>Ilinykh</last></author>
-      <author><first>Sina</first><last>Zarrieß</last></author>
+      <author id="sina-zarriess"><first>Sina</first><last>Zarrieß</last></author>
       <author><first>David</first><last>Schlangen</last></author>
       <pages>152–157</pages>
       <abstract>We present a dataset consisting of what we call image description sequences, which are multi-sentence descriptions of the contents of an image. These descriptions were collected in a pseudo-interactive setting, where the describer was told to describe the given image to a listener who needs to identify the image within a set of images, and who successively asks for more information. As we show, this setup produced nicely structured data that, we think, will be useful for learning models capable of planning and realising such description discourses.</abstract>
@@ -17653,7 +17653,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
     <paper id="22">
       <title>A Closer Look at Recent Results of Verb Selection for Data-to-Text <fixed-case>NLG</fixed-case></title>
       <author><first>Guanyi</first><last>Chen</last></author>
-      <author><first>Jin-Ge</first><last>Yao</last></author>
+      <author id="jin-ge-yao"><first>Jin-Ge</first><last>Yao</last></author>
       <pages>158–163</pages>
       <abstract>Automatic natural language generation systems need to use the contextually-appropriate verbs when describing different kinds of facts or events, which has triggered research interest on verb selection for data-to-text generation. In this paper, we discuss a few limitations of the current task settings and the evaluation metrics. We also provide two simple, efficient, interpretable baseline approaches for statistical selection of trend verbs, which give a strong performance on both previously used evaluation metrics and our new evaluation.</abstract>
       <url hash="9a8a92bc">W19-8622</url>
@@ -17665,7 +17665,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
       <title><fixed-case>V</fixed-case>i<fixed-case>GGO</fixed-case>: A Video Game Corpus for Data-To-Text Generation in Open-Domain Conversation</title>
       <author><first>Juraj</first><last>Juraska</last></author>
       <author><first>Kevin</first><last>Bowden</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <pages>164–172</pages>
       <abstract>The uptake of deep learning in natural language generation (NLG) led to the release of both small and relatively large parallel corpora for training neural models. The existing data-to-text datasets are, however, aimed at task-oriented dialogue systems, and often thus limited in diversity and versatility. They are typically crowdsourced, with much of the noise left in them. Moreover, current neural NLG models do not take full advantage of large training data, and due to their strong generalizing properties produce sentences that look template-like regardless. We therefore present a new corpus of 7K samples, which (1) is clean despite being crowdsourced, (2) has utterances of 9 generalizable and conversational dialogue act types, making it more suitable for open-domain dialogue systems, and (3) explores the domain of video games, which is new to dialogue systems despite having excellent potential for supporting rich conversations.</abstract>
       <url hash="1be0b7bc">W19-8623</url>
@@ -17684,7 +17684,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
     </paper>
     <paper id="25">
       <title>Visually grounded generation of entailments from premises</title>
-      <author><first>Somayeh</first><last>Jafaritazehjani</last></author>
+      <author id="somayeh-jafaritazehjani"><first>Somayeh</first><last>Jafaritazehjani</last></author>
       <author><first>Albert</first><last>Gatt</last></author>
       <author><first>Marc</first><last>Tanti</last></author>
       <pages>178–188</pages>
@@ -17741,7 +17741,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
     <paper id="30">
       <title>Efficiency Metrics for Data-Driven Models: A Text Summarization Case Study</title>
       <author><first>Erion</first><last>Çano</last></author>
-      <author><first>Ondřej</first><last>Bojar</last></author>
+      <author id="ondrej-bojar"><first>Ondřej</first><last>Bojar</last></author>
       <pages>229–239</pages>
       <abstract>Using data-driven models for solving text summarization or similar tasks has become very common in the last years. Yet most of the studies report basic accuracy scores only, and nothing is known about the ability of the proposed models to improve when trained on more data. In this paper, we define and propose three data efficiency metrics: data score efficiency, data time deficiency and overall data efficiency. We also propose a simple scheme that uses those metrics and apply it for a more comprehensive evaluation of popular methods on text summarization and title generation tasks. For the latter task, we process and release a huge collection of 35 million abstract-title pairs from scientific articles. Our results reveal that among the tested models, the Transformer is the most efficient on both tasks.</abstract>
       <url hash="73d1b8d4">W19-8630</url>
@@ -17786,7 +17786,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
     </paper>
     <paper id="34">
       <title>Personalized Substitution Ranking for Lexical Simplification</title>
-      <author><first>John</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
       <author><first>Chak Yan</first><last>Yeung</last></author>
       <pages>258–267</pages>
       <abstract>A lexical simplification (LS) system substitutes difficult words in a text with simpler ones to make it easier for the user to understand. In the typical LS pipeline, the Substitution Ranking step determines the best substitution out of a set of candidates. Most current systems do not consider the user’s vocabulary proficiency, and always aim for the simplest candidate. This approach may overlook less-simple candidates that the user can understand, and that are semantically closer to the original word. We propose a personalized approach for Substitution Ranking to identify the candidate that is the closest synonym and is non-complex for the user. In experiments on learners of English at different proficiency levels, we show that this approach enhances the semantic faithfulness of the output, at the cost of a relatively small increase in the number of complex words.</abstract>
@@ -17809,7 +17809,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
     <paper id="36">
       <title>Head-First Linearization with Tree-Structured Representation</title>
       <author><first>Xiang</first><last>Yu</last></author>
-      <author><first>Agnieszka</first><last>Falenska</last></author>
+      <author id="agnieszka-falenska"><first>Agnieszka</first><last>Falenska</last></author>
       <author><first>Ngoc Thang</first><last>Vu</last></author>
       <author><first>Jonas</first><last>Kuhn</last></author>
       <pages>279–289</pages>
@@ -17830,7 +17830,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
     </paper>
     <paper id="38">
       <title>Generation of Hip-Hop Lyrics with Hierarchical Modeling and Conditional Templates</title>
-      <author><first>Enrique</first><last>Manjavacas</last></author>
+      <author id="enrique-manjavacas"><first>Enrique</first><last>Manjavacas</last></author>
       <author><first>Mike</first><last>Kestemont</last></author>
       <author><first>Folgert</first><last>Karsdorp</last></author>
       <pages>301–310</pages>
@@ -17872,9 +17872,9 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
       <author><first>Hideaki</first><last>Tamori</last></author>
       <author><first>Ko</first><last>Kikuta</last></author>
       <author><first>Jiro</first><last>Nishitoba</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Kentaro</first><last>Inui</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>333–343</pages>
       <abstract>Browsing news articles on multiple devices is now possible. The lengths of news article headlines have precise upper bounds, dictated by the size of the display of the relevant device or interface. Therefore, controlling the length of headlines is essential when applying the task of headline generation to news production. However, because there is no corpus of headlines of multiple lengths for a given article, previous research on controlling output length in headline generation has not discussed whether the system outputs could be adequately evaluated without multiple references of different lengths. In this paper, we introduce two corpora, which are Japanese News Corpus (JNC) and JApanese MUlti-Length Headline Corpus (JAMUL), to confirm the validity of previous evaluation settings. The JNC provides common supervision data for headline generation. The JAMUL is a large-scale evaluation dataset for headlines of three different lengths composed by professional editors. We report new findings on these corpora; for example, although the longest length reference summary can appropriately evaluate the existing methods controlling output length, this evaluation setting has several problems.</abstract>
       <url hash="b8c8f388">W19-8641</url>
@@ -17898,7 +17898,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
       <author><first>Albert</first><last>Gatt</last></author>
       <author><first>Emiel</first><last>van Miltenburg</last></author>
       <author><first>Sander</first><last>Wubben</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <pages>355–368</pages>
       <abstract>Currently, there is little agreement as to how Natural Language Generation (NLG) systems should be evaluated. While there is some agreement regarding automatic metrics, there is a high degree of variation in the way that human evaluation is carried out. This paper provides an overview of how human evaluation is currently conducted, and presents a set of best practices, grounded in the literature. With this paper, we hope to contribute to the quality and consistency of human evaluations in NLG.</abstract>
       <url hash="8a96b50d">W19-8643</url>
@@ -17973,7 +17973,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
       <author><first>Merel</first><last>van de Kerkhof</last></author>
       <author><first>Ruud</first><last>Koolen</last></author>
       <author><first>Martijn</first><last>Goudbeek</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <pages>403–408</pages>
       <abstract>Task effects in NLG corpus elicitation recently started to receive more attention, but are usually not modeled statistically. We present a controlled replication of the study by Van Miltenburg et al. (2018b), contrasting spoken with written descriptions. We collected additional written Dutch descriptions to supplement the spoken data from the DIDEC corpus, and analyzed the descriptions using mixed effects modeling to account for variation between participants and items. Our results show that the effects of modality largely disappear in a controlled setting.</abstract>
       <url hash="f751c808">W19-8649</url>
@@ -18007,7 +18007,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
     <paper id="52">
       <title>Semantic Noise Matters for Neural Natural Language Generation</title>
       <author><first>Ondřej</first><last>Dušek</last></author>
-      <author><first>David M.</first><last>Howcroft</last></author>
+      <author id="david-m-howcroft"><first>David M.</first><last>Howcroft</last></author>
       <author><first>Verena</first><last>Rieser</last></author>
       <pages>421–426</pages>
       <abstract>Neural natural language generation (NNLG) systems are known for their pathological outputs, i.e. generating text which is unrelated to the input specification. In this paper, we show the impact of semantic noise on state-of-the-art NNLG models which implement different semantic control mechanisms. We find that cleaned data can improve semantic correctness by up to 97%, while maintaining fluency. We also find that the most common error is omitting information, rather than hallucination.</abstract>
@@ -18030,7 +18030,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
     </paper>
     <paper id="54">
       <title>Towards a Metric for Automated Conversational Dialogue System Evaluation and Improvement</title>
-      <author><first>Jan Milan</first><last>Deriu</last></author>
+      <author id="jan-milan-deriu"><first>Jan Milan</first><last>Deriu</last></author>
       <author><first>Mark</first><last>Cieliebak</last></author>
       <pages>432–437</pages>
       <abstract>We present “AutoJudge”, an automated evaluation method for conversational dialogue systems. The method works by first generating dialogues based on self-talk, i.e. dialogue systems talking to itself. Then, it uses human ratings on these dialogues to train an automated judgement model. Our experiments show that AutoJudge correlates well with the human ratings and can be used to automatically evaluate dialogue systems, even in deployed systems. In a second part, we attempt to apply AutoJudge to improve existing systems. This works well for re-ranking a set of candidate utterances. However, our experiments show that AutoJudge cannot be applied as reward for reinforcement learning, although the metric can distinguish good from bad dialogues. We discuss potential reasons, but state here already that this is still an open question for further research.</abstract>
@@ -18055,7 +18055,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
       <author><first>Felix</first><last>Clouth</last></author>
       <author><first>Jeroen</first><last>Vermunt</last></author>
       <author><first>Xander</first><last>Verbeek</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <pages>443–452</pages>
       <abstract>In this paper, we present a novel data-to-text system for cancer patients, providing information on quality of life implications after treatment, which can be embedded in the context of shared decision making. Currently, information on quality of life implications is often not discussed, partly because (until recently) data has been lacking. In our work, we rely on a newly developed prediction model, which assigns patients to scenarios. Furthermore, we use data-to-text techniques to explain these scenario-based predictions in personalized and understandable language. We highlight the possibilities of NLG for personalization, discuss ethical implications and also present the outcomes of a first evaluation with clinicians.</abstract>
       <url hash="b7cae015">W19-8656</url>
@@ -18069,7 +18069,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
       <author><first>Rahul</first><last>Goel</last></author>
       <author><first>Behnam</first><last>Hedayatnia</last></author>
       <author><first>Anu</first><last>Venkatesh</last></author>
-      <author><first>Dilek</first><last>Hakkani-Tur</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek</first><last>Hakkani-Tur</last></author>
       <author><first>Raefer</first><last>Gabriel</last></author>
       <pages>453–462</pages>
       <abstract>Current approaches to Natural Language Generation (NLG) for dialog mainly focus on domain-specific, task-oriented applications (e.g. restaurant booking) using limited ontologies (up to 20 slot types), usually without considering the previous conversation context. Furthermore, these approaches require large amounts of data for each domain, and do not benefit from examples that may be available for other domains. This work explores the feasibility of applying statistical NLG to scenarios requiring larger ontologies, such as multi-domain dialog applications or open-domain question answering (QA) based on knowledge graphs. We model NLG through an Encoder-Decoder framework using a large dataset of interactions between real-world users and a conversational agent for open-domain QA. First, we investigate the impact of increasing the number of slot types on the generation quality and experiment with different partitions of the QA data with progressively larger ontologies (up to 369 slot types). Second, we perform multi-task learning experiments between open-domain QA and task-oriented dialog, and benchmark our model on a popular NLG dataset. Moreover, we experiment with using the conversational context as an additional input to improve response generation quality. Our experiments show the feasibility of learning statistical NLG models for open-domain QA with larger ontologies.</abstract>
@@ -18079,7 +18079,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
     </paper>
     <paper id="58">
       <title>Using <fixed-case>NLG</fixed-case> for speech synthesis of mathematical sentences</title>
-      <author><first>Alessandro</first><last>Mazzei</last></author>
+      <author id="alessandro-mazzei"><first>Alessandro</first><last>Mazzei</last></author>
       <author><first>Michele</first><last>Monticone</last></author>
       <author><first>Cristian</first><last>Bernareggi</last></author>
       <pages>463–472</pages>
@@ -18128,7 +18128,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
       <title><fixed-case>D</fixed-case>is<fixed-case>S</fixed-case>im: A Discourse-Aware Syntactic Text Simplification Framework for <fixed-case>E</fixed-case>nglish and <fixed-case>G</fixed-case>erman</title>
       <author><first>Christina</first><last>Niklaus</last></author>
       <author><first>Matthias</first><last>Cetto</last></author>
-      <author><first>André</first><last>Freitas</last></author>
+      <author id="andre-freitas"><first>André</first><last>Freitas</last></author>
       <author><first>Siegfried</first><last>Handschuh</last></author>
       <pages>504–507</pages>
       <abstract>We introduce DisSim, a discourse-aware sentence splitting framework for English and German whose goal is to transform syntactically complex sentences into an intermediate representation that presents a simple and more regular structure which is easier to process for downstream semantic applications. For this purpose, we turn input sentences into a two-layered semantic hierarchy in the form of core facts and accompanying contexts, while identifying the rhetorical relations that hold between them. In that way, we preserve the coherence structure of the input and, hence, its interpretability for downstream tasks.</abstract>
@@ -18167,7 +18167,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
       <title>Generating Abstractive Summaries with Finetuned Language Models</title>
       <author><first>Sebastian</first><last>Gehrmann</last></author>
       <author><first>Zachary</first><last>Ziegler</last></author>
-      <author><first>Alexander</first><last>Rush</last></author>
+      <author id="alexander-m-rush"><first>Alexander</first><last>Rush</last></author>
       <pages>516–522</pages>
       <abstract>Neural abstractive document summarization is commonly approached by models that exhibit a mostly extractive behavior. This behavior is facilitated by a copy-attention which allows models to copy words from a source document. While models in the mostly extractive news summarization domain benefit from this inductive bias, they commonly fail to paraphrase or compress information from the source document. Recent advances in transfer-learning from large pretrained language models give rise to alternative approaches that do not rely on copy-attention and instead learn to generate concise and abstractive summaries. In this paper, as part of the TL;DR challenge, we compare the abstractiveness of summaries from different summarization approaches and show that transfer-learning can be efficiently utilized without any changes to the model architecture. We demonstrate that the approach leads to a higher level of abstraction for a similar performance on the TL;DR challenge tasks, enabling true natural language compression.</abstract>
       <url hash="d4e50260">W19-8665</url>
@@ -18180,7 +18180,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
       <author><first>Michael</first><last>Völske</last></author>
       <author><first>Nedim</first><last>Lipka</last></author>
       <author><first>Benno</first><last>Stein</last></author>
-      <author><first>Hinrich</first><last>Schütze</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schütze</last></author>
       <author><first>Martin</first><last>Potthast</last></author>
       <pages>523–528</pages>
       <abstract>In this paper, we report on the results of the TL;DR challenge, discussing an extensive manual evaluation of the expected properties of a good summary based on analyzing the comments provided by human annotators.</abstract>
@@ -18225,7 +18225,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
     <paper id="70">
       <title>Neural Generation for <fixed-case>C</fixed-case>zech: Data and Baselines</title>
       <author><first>Ondřej</first><last>Dušek</last></author>
-      <author><first>Filip</first><last>Jurčíček</last></author>
+      <author id="filip-jurcicek"><first>Filip</first><last>Jurčíček</last></author>
       <pages>563–574</pages>
       <abstract>We present the first dataset targeted at end-to-end NLG in Czech in the restaurant domain, along with several strong baseline models using the sequence-to-sequence approach. While non-English NLG is under-explored in general, Czech, as a morphologically rich language, makes the task even harder: Since Czech requires inflecting named entities, delexicalization or copy mechanisms do not work out-of-the-box and lexicalizing the generated outputs is non-trivial. In our experiments, we present two different approaches to this this problem: (1) using a neural language model to select the correct inflected form while lexicalizing, (2) a two-step generation setup: our sequence-to-sequence model generates an interleaved sequence of lemmas and morphological tags, which are then inflected by a morphological generator.</abstract>
       <url hash="2806eeca">W19-8670</url>
@@ -18235,7 +18235,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
     <paper id="71">
       <title>Modeling Confidence in Sequence-to-Sequence Models</title>
       <author><first>Jan</first><last>Niehues</last></author>
-      <author><first>Ngoc-Quan</first><last>Pham</last></author>
+      <author id="ngoc-quan-pham"><first>Ngoc-Quan</first><last>Pham</last></author>
       <pages>575–583</pages>
       <abstract>Recently, significant improvements have been achieved in various natural language processing tasks using neural sequence-to-sequence models. While aiming for the best generation quality is important, ultimately it is also necessary to develop models that can assess the quality of their output. In this work, we propose to use the similarity between training and test conditions as a measure for models’ confidence. We investigate methods solely using the similarity as well as methods combining it with the posterior probability. While traditionally only target tokens are annotated with confidence measures, we also investigate methods to annotate source tokens with confidence. By learning an internal alignment model, we can significantly improve confidence projection over using state-of-the-art external alignment tools. We evaluate the proposed methods on downstream confidence estimation for machine translation (MT). We show improvements on segment-level confidence estimation as well as on confidence estimation for source tokens. In addition, we show that the same methods can also be applied to other tasks using sequence-to-sequence models. On the automatic speech recognition (ASR) task, we are able to find 60% of the errors by looking at 20% of the data.</abstract>
       <url hash="dd96805c">W19-8671</url>
@@ -18245,7 +18245,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
     <paper id="72">
       <title>A Good Sample is Hard to Find: Noise Injection Sampling and Self-Training for Neural Language Generation Models</title>
       <author><first>Chris</first><last>Kedzie</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <pages>584–593</pages>
       <abstract>Deep neural networks (DNN) are quickly becoming the de facto standard modeling method for many natural language generation (NLG) tasks. In order for such models to truly be useful, they must be capable of correctly generating utterances for novel meaning representations (MRs) at test time. In practice, even sophisticated DNNs with various forms of semantic control frequently fail to generate utterances faithful to the input MR. In this paper, we propose an architecture agnostic self-training method to sample novel MR/text utterance pairs to augment the original training data. Remarkably, after training on the augmented data, even simple encoder-decoder models with greedy decoding are capable of generating semantically correct utterances that are as good as state-of-the-art outputs in both automatic and human evaluations of quality.</abstract>
       <url hash="1244d742">W19-8672</url>
@@ -18304,7 +18304,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
       <title>What Influences the Features of Post-editese? A Preliminary Study</title>
       <author><first>Sheila</first><last>Castilho</last></author>
       <author><first>Natália</first><last>Resende</last></author>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <pages>19–27</pages>
       <abstract>While a number of studies have shown evidence of translationese phenomena, that is, statistical differences between original texts and translated texts (Gellerstam, 1986), results of studies searching for translationese features in postedited texts (what has been called ”posteditese” (Daems et al., 2017)) have presented mixed results. This paper reports a preliminary study aimed at identifying the presence of post-editese features in machine-translated post-edited texts and at understanding how they differ from translationese features. We test the influence of factors such as post-editing (PE) levels (full vs. light), translation proficiency (professionals vs. students) and text domain (news vs. literary). Results show evidence of post-editese features, especially in light PE texts and in certain domains.</abstract>
       <url hash="f873968e">W19-8703</url>
@@ -18314,8 +18314,8 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
     <paper id="4">
       <title>Designing a Frame-Semantic Machine Translation Evaluation Metric</title>
       <author><first>Oliver</first><last>Czulo</last></author>
-      <author><first>Tiago Timponi</first><last>Torrent</last></author>
-      <author><first>Ely Edison da Silva</first><last>Matos</last></author>
+      <author id="tiago-timponi-torrent"><first>Tiago Timponi</first><last>Torrent</last></author>
+      <author id="ely-edison-da-silva-matos"><first>Ely Edison da Silva</first><last>Matos</last></author>
       <author><first>Alexandre</first><last>Diniz da Costa</last></author>
       <author><first>Debanjana</first><last>Kar</last></author>
       <pages>28–35</pages>
@@ -18355,7 +18355,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
     <paper id="8">
       <title>Comparing a Hand-crafted to an Automatically Generated Feature Set for Deep Learning: Pairwise Translation Evaluation</title>
       <author><first>Despoina</first><last>Mouratidis</last></author>
-      <author><first>Katia Lida</first><last>Kermanidis</last></author>
+      <author id="katia-lida-kermanidis"><first>Katia Lida</first><last>Kermanidis</last></author>
       <pages>66–74</pages>
       <abstract>The automatic evaluation of machine translation (MT) has proven to be a very significant research topic. Most automatic evaluation methods focus on the evaluation of the output of MT as they compute similarity scores that represent translation quality. This work targets on the performance of MT evaluation. We present a general scheme for learning to classify parallel translations, using linguistic information, of two MT model outputs and one human (reference) translation. We present three experiments to this scheme using neural networks (NN). One using string based hand-crafted features (Exp1), the second using automatically trained embeddings from the reference and the two MT outputs (one from a statistical machine translation (SMT) model and the other from a neural ma-chine translation (NMT) model), which are learned using NN (Exp2), and the third experiment (Exp3) that combines information from the other two experiments. The languages involved are English (EN), Greek (GR) and Italian (IT) segments are educational in domain. The proposed language-independent learning scheme which combines information from the two experiments (experiment 3) achieves higher classification accuracy compared with models using BLEU score information as well as other classification approaches, such as Random Forest (RF) and Support Vector Machine (SVM).</abstract>
       <url hash="de59dc0b">W19-8708</url>
@@ -18366,7 +18366,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
       <title>Differences between <fixed-case>SMT</fixed-case> and <fixed-case>NMT</fixed-case> Output - a Translators’ Point of View</title>
       <author><first>Jonathan</first><last>Mutal</last></author>
       <author><first>Lise</first><last>Volkart</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <author><first>Sabrina</first><last>Girletti</last></author>
       <author><first>Paula</first><last>Estrella</last></author>
       <pages>75–81</pages>
@@ -18416,7 +18416,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
     </paper>
     <paper id="14">
       <title>Towards a Proactive <fixed-case>MWE</fixed-case> Terminological Platform for Cross-Lingual Mediation in the Age of Big Data</title>
-      <author><first>Benjamin K.</first><last>Tsou</last></author>
+      <author id="benjamin-k-tsou"><first>Benjamin K.</first><last>Tsou</last></author>
       <author><first>Kapo</first><last>Chow</last></author>
       <author><first>Junru</first><last>Nie</last></author>
       <author><first>Yuan</first><last>Yuan</last></author>
@@ -18482,8 +18482,8 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
     <paper id="1">
       <title><fixed-case>RANLP</fixed-case> 2019 Multilingual Headline Generation Task Overview</title>
       <author><first>Marina</first><last>Litvak</last></author>
-      <author><first>John M.</first><last>Conroy</last></author>
-      <author><first>Peter A.</first><last>Rankel</last></author>
+      <author id="john-conroy"><first>John M.</first><last>Conroy</last></author>
+      <author id="peter-a-rankel"><first>Peter A.</first><last>Rankel</last></author>
       <pages>1–5</pages>
       <abstract>The objective of the 2019 RANLP Multilingual Headline Generation (HG) Task is to explore some of the challenges highlighted by current state of the art approaches on creating informative headlines to news articles: non-descriptive headlines, out-of-domain training data, generating headlines from long documents which are not well represented by the head heuristic, and dealing with multilingual domain. This tasks makes available a large set of training data for headline generation and provides an evaluation methods for the task. Our data sets are drawn from Wikinews as well as Wikipedia. Participants were required to generate headlines for at least 3 languages, which were evaluated via automatic methods. A key aspect of the task is multilinguality. The task measures the performance of multilingual headline generation systems using the Wikipedia and Wikinews articles in multiple languages. The objective is to assess the performance of automatic headline generation techniques on text documents covering a diverse range of languages and topics outside the news domain.</abstract>
       <url hash="f619fdd7">W19-8901</url>
diff --git a/data/xml/W77.xml b/data/xml/W77.xml
index 9f43ed37ed..71e803e644 100644
--- a/data/xml/W77.xml
+++ b/data/xml/W77.xml
@@ -77,7 +77,7 @@
     </paper>
     <paper id="9">
       <title><fixed-case>DAN</fixed-case>w<fixed-case>ORD</fixed-case> – Hyppighedsundersøgelser i moderne dansk (<fixed-case>DAN</fixed-case>w<fixed-case>ORD</fixed-case> – Frequency surveys in modern <fixed-case>D</fixed-case>anish) [In <fixed-case>D</fixed-case>anish]</title>
-      <author><first>Bente</first><last>Maegaard</last></author>
+      <author id="bente-maegaard"><first>Bente</first><last>Maegaard</last></author>
       <author><first>Hanne</first><last>Ruus</last></author>
       <pages>65–74</pages>
       <url hash="ebc07df7">W77-0109</url>
@@ -107,7 +107,7 @@
     </paper>
     <paper id="13">
       <title>Chartanalys och morfologi (Chart analysis and morphology) [In <fixed-case>S</fixed-case>wedish]</title>
-      <author><first>Anna</first><last>Sågvall Hein</last></author>
+      <author id="anna-sagvall-hein"><first>Anna</first><last>Sågvall Hein</last></author>
       <pages>87–93</pages>
       <url hash="f2b845ee">W77-0113</url>
       <bibkey>sagvall-hein-1977-chartanalys</bibkey>
diff --git a/data/xml/W79.xml b/data/xml/W79.xml
index 966bdb5860..c223bf5f6a 100644
--- a/data/xml/W79.xml
+++ b/data/xml/W79.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the 2nd Nordic Conference of Computational Linguistics (<fixed-case>NODALIDA</fixed-case> 1979)</booktitle>
       <url hash="812ea39c">W79-01</url>
-      <editor><first>Bente</first><last>Maegaard</last></editor>
+      <editor id="bente-maegaard"><first>Bente</first><last>Maegaard</last></editor>
       <publisher>Institut for Anvendt og Matematisk Lingvistik, University of Copenhagen, Denmark</publisher>
       <address>Copenhagen, Denmark</address>
       <month>October</month>
diff --git a/data/xml/W81.xml b/data/xml/W81.xml
index f0c3965785..a8f569db22 100644
--- a/data/xml/W81.xml
+++ b/data/xml/W81.xml
@@ -59,7 +59,7 @@
     </paper>
     <paper id="7">
       <title>Experience with <fixed-case>COMMENTATOR</fixed-case>, a computer system simulating verbal behaviour</title>
-      <author><first>Milan</first><last>Bílý</last></author>
+      <author id="milan-bily"><first>Milan</first><last>Bílý</last></author>
       <pages>39–46</pages>
       <url hash="674ecb22">W81-0107</url>
       <bibkey>bily-1981-experience</bibkey>
@@ -94,7 +94,7 @@
     </paper>
     <paper id="12">
       <title><fixed-case>U</fixed-case>ppsala Chart Parser, Version 2 (<fixed-case>UCP</fixed-case>-2) – En översikt (<fixed-case>U</fixed-case>ppsala Chart Parser, Version 2 (<fixed-case>UCP</fixed-case>-2) – An overview) [In <fixed-case>S</fixed-case>wedish]</title>
-      <author><first>Anna</first><last>Sågvall Hein</last></author>
+      <author id="anna-sagvall-hein"><first>Anna</first><last>Sågvall Hein</last></author>
       <pages>95–116</pages>
       <url hash="2d41cf5a">W81-0112</url>
       <bibkey>sagvall-hein-1981-uppsala</bibkey>
diff --git a/data/xml/W83.xml b/data/xml/W83.xml
index 948ed90998..449e3f6a56 100644
--- a/data/xml/W83.xml
+++ b/data/xml/W83.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the 4th Nordic Conference of Computational Linguistics (<fixed-case>NODALIDA</fixed-case> 1983)</booktitle>
       <url hash="e6a245d4">W83-01</url>
-      <editor><first>Anna Sågvall</first><last>Hein</last></editor>
+      <editor id="anna-sagvall-hein"><first>Anna Sågvall</first><last>Hein</last></editor>
       <publisher>Centrum för datorlingvistik, Uppsala University, Sweden</publisher>
       <address>Uppsala, Sweden</address>
       <month>May</month>
@@ -89,10 +89,10 @@
     </paper>
     <paper id="11">
       <title>Knowledge Engineering Applied to Morphological Analysis</title>
-      <author><first>Harri</first><last>Jäppinen</last></author>
-      <author><first>Aarno</first><last>Lehtola</last></author>
-      <author><first>Esa</first><last>Nelimarkka</last></author>
-      <author><first>Matti</first><last>Ylilammi</last></author>
+      <author id="harri-jappinen"><first>Harri</first><last>Jäppinen</last></author>
+      <author id="aarno-lehtola"><first>Aarno</first><last>Lehtola</last></author>
+      <author id="esa-nelimarkka"><first>Esa</first><last>Nelimarkka</last></author>
+      <author id="matti-ylilammi"><first>Matti</first><last>Ylilammi</last></author>
       <pages>111–120</pages>
       <url hash="fbb632a5">W83-0111</url>
       <bibkey>jappinen-etal-1984-knowledge</bibkey>
@@ -120,23 +120,23 @@
     </paper>
     <paper id="15">
       <title><fixed-case>HP</fixed-case> – A Heuristic Finite State Parser Based on Morphology</title>
-      <author><first>Gunnel</first><last>Källgren</last></author>
+      <author id="gunnel-kallgren"><first>Gunnel</first><last>Källgren</last></author>
       <pages>155–161</pages>
       <url hash="ed81ed38">W83-0115</url>
       <bibkey>kallgren-1984-hp</bibkey>
     </paper>
     <paper id="16">
       <title>Regelformalismer til brug ved datamatisk lingvistik (Rule formalisms for use in computational linguistics) [In <fixed-case>D</fixed-case>anish]</title>
-      <author><first>Bente</first><last>Maegaard</last></author>
+      <author id="bente-maegaard"><first>Bente</first><last>Maegaard</last></author>
       <pages>162–168</pages>
       <url hash="eb5ebf12">W83-0116</url>
       <bibkey>maegaard-1984-regelformalismer</bibkey>
     </paper>
     <paper id="17">
       <title>A Computational Model of <fixed-case>F</fixed-case>innish Sentence Structure</title>
-      <author><first>Esa</first><last>Nelimarkka</last></author>
-      <author><first>Harri</first><last>Jäppinen</last></author>
-      <author><first>Aarno</first><last>Lehtola</last></author>
+      <author id="esa-nelimarkka"><first>Esa</first><last>Nelimarkka</last></author>
+      <author id="harri-jappinen"><first>Harri</first><last>Jäppinen</last></author>
+      <author id="aarno-lehtola"><first>Aarno</first><last>Lehtola</last></author>
       <pages>169–177</pages>
       <url hash="ba9c5dcb">W83-0117</url>
       <bibkey>nelimarkka-etal-1984-computational</bibkey>
diff --git a/data/xml/W85.xml b/data/xml/W85.xml
index ede1febbe4..94f5290756 100644
--- a/data/xml/W85.xml
+++ b/data/xml/W85.xml
@@ -51,7 +51,7 @@
     </paper>
     <paper id="6">
       <title>A self-extending lexicon: description of a word learning program</title>
-      <author><first>Eva</first><last>Ejerhed</last></author>
+      <author id="eva-ejerhed"><first>Eva</first><last>Ejerhed</last></author>
       <author><first>Hank</first><last>Bromley</last></author>
       <pages>59–72</pages>
       <url hash="71f65c4e">W85-0106</url>
@@ -88,7 +88,7 @@
     </paper>
     <paper id="11">
       <title><fixed-case>REFTEX</fixed-case> – et datamatstøttet oversættelsessystem (<fixed-case>REFTEX</fixed-case> – A computer-assisted translation system) [In <fixed-case>D</fixed-case>anish]</title>
-      <author><first>Poul Søren</first><last>Kjærsgaard</last></author>
+      <author id="poul-soren-kjaersgaard"><first>Poul Søren</first><last>Kjærsgaard</last></author>
       <pages>121–130</pages>
       <url hash="dae2367a">W85-0111</url>
       <bibkey>kjaersgaard-1986-reftex</bibkey>
@@ -109,7 +109,7 @@
     </paper>
     <paper id="14">
       <title><fixed-case>DPL</fixed-case> – a computational method for describing grammars and modelling parsers</title>
-      <author><first>Aarno</first><last>Lehtola</last></author>
+      <author id="aarno-lehtola"><first>Aarno</first><last>Lehtola</last></author>
       <pages>151–159</pages>
       <url hash="51167a84">W85-0114</url>
       <bibkey>lehtola-1986-dpl</bibkey>
@@ -137,7 +137,7 @@
     </paper>
     <paper id="18">
       <title>A two-level description of written <fixed-case>F</fixed-case>rench</title>
-      <author><first>Annette</first><last>Östling Andersson</last></author>
+      <author id="annette-ostling-andersson"><first>Annette</first><last>Östling Andersson</last></author>
       <pages>195–202</pages>
       <url hash="a9180e58">W85-0118</url>
       <bibkey>ostling-andersson-1986-two</bibkey>
diff --git a/data/xml/W87.xml b/data/xml/W87.xml
index f8a4dbc816..b89c9b0849 100644
--- a/data/xml/W87.xml
+++ b/data/xml/W87.xml
@@ -16,7 +16,7 @@
     </frontmatter>
     <paper id="1">
       <title>What good is Syntactic Information in the Lexicon of a Syntactic Parser?</title>
-      <author><first>Gunnel</first><last>Källgren</last></author>
+      <author id="gunnel-kallgren"><first>Gunnel</first><last>Källgren</last></author>
       <pages>5–16</pages>
       <url hash="5498352b">W87-0101</url>
       <bibkey>kallgren-1988-good</bibkey>
@@ -37,7 +37,7 @@
     </paper>
     <paper id="4">
       <title><fixed-case>AWARE</fixed-case> – <fixed-case>DAG</fixed-case>-transformations for Semantic Analysis</title>
-      <author><first>Aarno</first><last>Lehtola</last></author>
+      <author id="aarno-lehtola"><first>Aarno</first><last>Lehtola</last></author>
       <author><first>Timo</first><last>Honkela</last></author>
       <pages>58–68</pages>
       <url hash="d2160e24">W87-0104</url>
@@ -46,7 +46,7 @@
     <paper id="5">
       <title>Predication Graphs as Canonical Representation of Query Sentences</title>
       <author><first>Timo</first><last>Honkela</last></author>
-      <author><first>Aarno</first><last>Lehtola</last></author>
+      <author id="aarno-lehtola"><first>Aarno</first><last>Lehtola</last></author>
       <author><first>K.</first><last>Valkonen</last></author>
       <pages>69–77</pages>
       <url hash="e0519055">W87-0105</url>
@@ -82,7 +82,7 @@
     </paper>
     <paper id="10">
       <title>Processing Sentences Clause by Clause</title>
-      <author><first>Eva</first><last>Ejerhed</last></author>
+      <author id="eva-ejerhed"><first>Eva</first><last>Ejerhed</last></author>
       <pages>155–169</pages>
       <url hash="adabfb4b">W87-0110</url>
       <bibkey>ejerhed-1988-processing</bibkey>
@@ -148,7 +148,7 @@
     </paper>
     <paper id="19">
       <title>Simulering af relationel database (Simulation of relational databases) [In <fixed-case>D</fixed-case>anish]</title>
-      <author><first>Bodil</first><last>Nistrup Madsen</last></author>
+      <author id="bodil-nistrup-madsen"><first>Bodil</first><last>Nistrup Madsen</last></author>
       <pages>286–300</pages>
       <url hash="3c48ea06">W87-0119</url>
       <bibkey>nistrup-madsen-1988-simulering</bibkey>
diff --git a/data/xml/W89.xml b/data/xml/W89.xml
index 68fa0bd16e..f88ad46934 100644
--- a/data/xml/W89.xml
+++ b/data/xml/W89.xml
@@ -5,7 +5,7 @@
       <booktitle>Proceedings of the 7th Nordic Conference of Computational Linguistics (<fixed-case>NODALIDA</fixed-case> 1989)</booktitle>
       <url hash="302734b4">W89-01</url>
       <editor><first>Jörgen</first><last>Pind</last></editor>
-      <editor><first>Eiríkur</first><last>Rögnvaldsson</last></editor>
+      <editor id="eirikur-rognvaldsson"><first>Eiríkur</first><last>Rögnvaldsson</last></editor>
       <publisher>Institute of Lexicography, Institute of Linguistics, University of Iceland, Iceland</publisher>
       <address>Reykjavík, Iceland</address>
       <year>1990</year>
@@ -24,7 +24,7 @@
     </paper>
     <paper id="2">
       <title>A <fixed-case>S</fixed-case>wedish Clause Grammar And Its Implementation</title>
-      <author><first>Eva</first><last>Ejerhed</last></author>
+      <author id="eva-ejerhed"><first>Eva</first><last>Ejerhed</last></author>
       <pages>14–29</pages>
       <url hash="43d9139f">W89-0102</url>
       <bibkey>ejerhed-1990-swedish</bibkey>
@@ -45,14 +45,14 @@
     </paper>
     <paper id="5">
       <title>Is Two-level Morphology a Morphological Model?</title>
-      <author><first>Janne Bondi</first><last>Johannessen</last></author>
+      <author id="janne-bondi-johannessen"><first>Janne Bondi</first><last>Johannessen</last></author>
       <pages>51–59</pages>
       <url hash="e5d8ca07">W89-0105</url>
       <bibkey>johannessen-1990-two</bibkey>
     </paper>
     <paper id="6">
       <title>Automatic Indexing and Generating of Content Graphs from Unrestricted Text</title>
-      <author><first>Gunnel</first><last>Källgren</last></author>
+      <author id="gunnel-kallgren"><first>Gunnel</first><last>Källgren</last></author>
       <pages>60–76</pages>
       <url hash="6d0d7cf3">W89-0106</url>
       <bibkey>kallgren-1990-automatic</bibkey>
@@ -108,14 +108,14 @@
     </paper>
     <paper id="14">
       <title>Representational Issues within <fixed-case>E</fixed-case>urotra</title>
-      <author><first>Hanne</first><last>Fersøe</last></author>
+      <author id="hanne-fersoe"><first>Hanne</first><last>Fersøe</last></author>
       <pages>157–169</pages>
       <url hash="4e89d0b9">W89-0114</url>
       <bibkey>fersoe-1990-representational</bibkey>
     </paper>
     <paper id="15">
       <title>Identifiering av diskursrefenter vid maskinöversättning från ryska till svenska (Identification of dicourse references in machine translation from <fixed-case>R</fixed-case>ussian to <fixed-case>S</fixed-case>wedish) [In <fixed-case>S</fixed-case>wedish]</title>
-      <author><first>Barbara</first><last>Gawrońska-Werngren</last></author>
+      <author id="barbara-gawronska"><first>Barbara</first><last>Gawrońska-Werngren</last></author>
       <pages>170–182</pages>
       <url hash="8e48df33">W89-0115</url>
       <bibkey>gawronska-werngren-1990-identifiering</bibkey>
@@ -136,7 +136,7 @@
     </paper>
     <paper id="18">
       <title>Collocations in Knowledge Based Machine Translation</title>
-      <author><first>Guðrún</first><last>Magnúsdóttir</last></author>
+      <author id="gudrun-magnusdottir"><first>Guðrún</first><last>Magnúsdóttir</last></author>
       <pages>204–207</pages>
       <url hash="1df09466">W89-0118</url>
       <bibkey>magnusdottir-1990-collocations</bibkey>
@@ -192,7 +192,7 @@
     </paper>
     <paper id="26">
       <title>Application-Dependent Discourse Management for Natural Language Interfaces: An Empirical Investigation</title>
-      <author><first>Arne</first><last>Jönsson</last></author>
+      <author id="arne-jonsson"><first>Arne</first><last>Jönsson</last></author>
       <pages>297–307</pages>
       <url hash="7ccb3695">W89-0126</url>
       <bibkey>jonsson-1990-application</bibkey>
@@ -214,7 +214,7 @@
     </paper>
     <paper id="29">
       <title>Lemmatising the Definitions of Svensk Ordbok by Morphological and Syntactic Analysis. A Pilot Study</title>
-      <author><first>Anna</first><last>Sågvall Hein</last></author>
+      <author id="anna-sagvall-hein"><first>Anna</first><last>Sågvall Hein</last></author>
       <pages>342–357</pages>
       <url hash="a23438e2">W89-0129</url>
       <bibkey>sagvall-hein-1990-lemmatising</bibkey>
@@ -230,7 +230,7 @@
   <volume id="2" type="proceedings">
     <meta>
       <booktitle>Proceedings of the First International Workshop on Parsing Technologies</booktitle>
-      <editor><first>Masaru</first><last>Tomita</last></editor>
+      <editor id="masaru-tomita"><first>Masaru</first><last>Tomita</last></editor>
       <publisher>Carnegy Mellon University</publisher>
       <address>Pittsburgh, Pennsylvania, USA</address>
       <month>August</month>
@@ -245,7 +245,7 @@
     </frontmatter>
     <paper id="1">
       <title>Unification and Classification: An Experiment in Information-Based Parsing</title>
-      <author><first>Robert T.</first><last>Kasper</last></author>
+      <author id="robert-t-kasper"><first>Robert T.</first><last>Kasper</last></author>
       <pages>1–7</pages>
       <abstract>When dealing with a phenomenon as vast and com plex as natural language, an experimental approach is often the best way to discover new computational methods and determine their usefulness. The experimental process includes designing and selecting new experiments, carrying out the experiments, and evaluating the experiments. Most conference presentations are about finished experiments, completed theoretical results, or the evaluation of systems already in use. In this workshop setting, I would like to depart from this tendency to discuss some experiments that we are beginning to perform, and the reasons for investigating a particular approach to parsing. This approach builds on recent work in unification-based parsing and classification-based knowledge representation, developing an architecture that brings together the capabilities of these related frameworks.</abstract>
       <url hash="e3a9d8d3">W89-0201</url>
@@ -261,10 +261,10 @@
     </paper>
     <paper id="3">
       <title>An Overview of Disjunctive Constraint Satisfaction</title>
-      <author><first>John T.</first><last>Maxwell III</last></author>
-      <author><first>Ronald M.</first><last>Kaplan</last></author>
+      <author id="john-t-maxwell-iii"><first>John T.</first><last>Maxwell III</last></author>
+      <author id="ronald-m-kaplan"><first>Ronald M.</first><last>Kaplan</last></author>
       <pages>18–27</pages>
-      <abstract/>
+      <abstract></abstract>
       <url hash="8fbd21e5">W89-0203</url>
       <bibkey>maxwell-iii-kaplan-1989-overview</bibkey>
     </paper>
@@ -304,7 +304,7 @@
     <paper id="8">
       <title>The Computational Implementation of Principle-Based Parsers</title>
       <author><first>Sandiway</first><last>Fong</last></author>
-      <author><first>Robert C.</first><last>Berwick</last></author>
+      <author id="robert-c-berwick"><first>Robert C.</first><last>Berwick</last></author>
       <pages>75–84</pages>
       <abstract>This paper addresses the issue of how to organize linguistic principles for efficient processing. Based on the general characterization of principles in terms of purely computational properties, the effects of principle-ordering on parser performance are investigated. A novel parser that exploits the possible variation in principle-ordering to dynamically re-order principles is described. Heuristics for minimizing the amount of unnecessary work performed during the parsing process are also discussed.</abstract>
       <url hash="29cce927">W89-0208</url>
@@ -387,18 +387,18 @@
     </paper>
     <paper id="17">
       <title>Parsing Spoken Language Using Combinatory Grammars</title>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>162–171</pages>
-      <abstract/>
+      <abstract></abstract>
       <url hash="f09eb777">W89-0217</url>
       <bibkey>steedman-1989-parsing</bibkey>
     </paper>
     <paper id="18">
       <title>Recognition of <fixed-case>C</fixed-case>ombinatory <fixed-case>C</fixed-case>ategorial <fixed-case>G</fixed-case>rammars and Linear Indexed Grammars</title>
-      <author><first>K.</first><last>Vijay-Shanker</last></author>
-      <author><first>David J.</first><last>Weir</last></author>
+      <author id="k-vijay-shanker"><first>K.</first><last>Vijay-Shanker</last></author>
+      <author id="david-weir"><first>David J.</first><last>Weir</last></author>
       <pages>172–181</pages>
-      <abstract/>
+      <abstract></abstract>
       <url hash="c15c0f6b">W89-0218</url>
       <bibkey>vijay-shanker-weir-1989-recognition</bibkey>
     </paper>
@@ -428,7 +428,7 @@
     </paper>
     <paper id="22">
       <title>Probabilistic Parsing for Spoken Language Applications</title>
-      <author><first>Stephanie</first><last>Seneff</last></author>
+      <author id="stephanie-seneff"><first>Stephanie</first><last>Seneff</last></author>
       <pages>209–218</pages>
       <abstract>A new natural language system, TINA, has been developed for applications involving spoken language tasks, which integrate key ideas from context free grammars, Augmented Transition Networks (ATN’s) [6], and Lexical Functional Grammars (LFG’s) [1]. The parser uses a best-first strategy, with probability assignments on all arcs obtained automatically from a set of example sentences. An initial context-free grammar, derived from the example sentences, is first converted to a probabilistic network structure. Control includes both top-down and bottom-up cycles, and key parameters are passed among nodes to deal with long-distance movement, agreement, and semantic constraints. The probabilities provide a natural mechanism for exploring more common grammatical constructions first. One novel feature of TINA is that it provides an atuomatic sentence generation capability, which has been very effective for identifying overgeneration problems. A fully integrated spoken language system using this parser is under development.</abstract>
       <url hash="57c065f1">W89-0222</url>
@@ -445,7 +445,7 @@
     <paper id="24">
       <title>A Connectionist Parser Aimed at Spoken Language</title>
       <author><first>Ajay</first><last>Jain</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <pages>221–229</pages>
       <abstract>We describe a connectionist model which learns to parse single sentences from sequential word input. A parse in the connectionist network contains information about role assignment, prepositional attachment, relative clause structure, and subordinate clause structure. The trained network displays several interesting types of behavior. These include predictive ability, tolerance to certain corruptions of input word sequences, and some generalization capability. We report on experiments in which a small number of sentence types have been successfully learned by a network. Work is in progress on a larger database. Application of this type of connectionist model to the area of spoken language processing is discussed.</abstract>
       <url hash="7ab8d1af">W89-0224</url>
@@ -471,9 +471,9 @@
     </paper>
     <paper id="27">
       <title>Complexity and Decidability in Left-Associative Grammar</title>
-      <author><first>Roland</first><last>Hausser</last></author>
+      <author id="roland-r-hausser"><first>Roland</first><last>Hausser</last></author>
       <pages>254–263</pages>
-      <abstract/>
+      <abstract></abstract>
       <url hash="3be734b6">W89-0227</url>
       <bibkey>hausser-1989-complexity</bibkey>
     </paper>
@@ -487,7 +487,7 @@
     </paper>
     <paper id="29">
       <title>Finite State Machines from Feature Grammars</title>
-      <author><first>Alan W</first><last>Black</last></author>
+      <author id="alan-w-black"><first>Alan W</first><last>Black</last></author>
       <pages>277–285</pages>
       <abstract>This paper describes the conversion of a set of feature grammar rules into a deterministic finite state machine that accepts the same language (or at least a well-defined related language). First the reasoning behind why this is an interesting thing to do within the Edinburgh speech recogniser project, is discussed. Then details about the compilation algorithm are given. Finally, there is some discussion of the advantages and disadvantages of this method of implementing feature based grammar formalisms.</abstract>
       <url hash="f77831da">W89-0229</url>
@@ -522,9 +522,9 @@
     </paper>
     <paper id="33">
       <title>Chart Parsing for Loosely Coupled Parallel Systems</title>
-      <author><first>Henry S.</first><last>Thompson</last></author>
+      <author id="henry-s-thompson"><first>Henry S.</first><last>Thompson</last></author>
       <pages>320–328</pages>
-      <abstract/>
+      <abstract></abstract>
       <url hash="28f3fa5f">W89-0233</url>
       <bibkey>thompson-1989-chart</bibkey>
     </paper>
@@ -540,7 +540,7 @@
     <paper id="35">
       <title>The Relevance of Lexicalization to Parsing</title>
       <author><first>Yves</first><last>Schabes</last></author>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
       <pages>339–349</pages>
       <abstract>In this paper, we investigate the processing of the so-called ‘lexicalized’ grammar. In ‘lexicalized’ grammars (Schabes, Abeille and Joshi, 1988), each elementary structure is systema tically associated with a lexical ‘head’. These structures specify extended domains of locality (as compared to CFGs) over which constraints can be stated. The ‘grammar’ consists of a lexicon where each lexical item is associated with a finite number of structures for which that item is the ‘head’ . There are no separate grammar rules. There are, of course, ‘rules’ which tell us how these structures are combined. A general two-pass parsing strategy for ‘lexicalized’ grammars follows naturally. In the first stage, the parser selects a set of elementary structures associated with the lexical items in the input sentence, and in the second stage the sentence is parsed with respect to this set. We evaluate this strategy with respect to two characteristics. First, the amount of filtering on the entire grammar is evaluated: once the first pass is performed, the parser uses only a subset of the grammar. Second, we evaluate the use of non-local information: the structures selected during the first pass encode the morphological value (and therefore the position in the string) of their ‘head’; this enables the parser to use non-local in form ation to guide its search. We take Lexicalized Tree Adjoining Grammars as an in stance of lexicalized grammar. We illustrate the organization of the grammar. Then we show how a general Earley-type TAG parser (Schabes and Joshi, 1988) can take advantage of lexicalization. Empirical data show that the filtering of the grammar and the non-local in formation provided by the two-pass strategy improve the performance of the parser. We explain how constraints over the elementary structures expressed by unification equations can be parsed by a simple extension of the Earley-type TAG parser. Lexicalization guarantees termination of the algorithm without special devices such as restrictors.</abstract>
       <url hash="4adad542">W89-0235</url>
@@ -566,7 +566,7 @@
     <paper id="38">
       <title>Analysis Techniques for <fixed-case>K</fixed-case>orean Sentences Based on <fixed-case>L</fixed-case>exical <fixed-case>F</fixed-case>unctional <fixed-case>G</fixed-case>rammar</title>
       <author><first>Deok Ho</first><last>Yoon</last></author>
-      <author><first>Yung Taek</first><last>Kim</last></author>
+      <author id="yung-taek-kim"><first>Yung Taek</first><last>Kim</last></author>
       <pages>369–378</pages>
       <abstract>The Unification-based Grammars seem to be adequate for the analysis of agglutinative languages such as Korean, etc. In this paper, the merits of Lexical Functional Grammar is analyzed and the structure of Korean Syntactic Analyzer is described. Verbal complex category is used for the analysis of several linguistic phenomena and a new attribute of UNKNOWN is defined for the analysis of grammatical relations.</abstract>
       <url hash="f8b13409">W89-0238</url>
@@ -584,10 +584,10 @@
     </paper>
     <paper id="40">
       <title>Parsing, Word Associations and Typical Predicate-Argument Relations</title>
-      <author><first>Kenneth</first><last>Church</last></author>
-      <author><first>William</first><last>Gale</last></author>
+      <author id="kenneth-church"><first>Kenneth</first><last>Church</last></author>
+      <author id="william-a-gale"><first>William</first><last>Gale</last></author>
       <author><first>Patrick</first><last>Hanks</last></author>
-      <author><first>Donald</first><last>Hindle</last></author>
+      <author id="donald-hindle"><first>Donald</first><last>Hindle</last></author>
       <pages>389–398</pages>
       <abstract>There are a number of collocational constraints in natural languages that ought to play a more important role in natural language parsers. Thus, for example, it is hard for most parsers to take advantage of the fact that wine is typically drunk, produced, and sold, but (probably) not pruned. So too, it is hard for a parser to know which verbs go with which prepositions (e.g., set up) and which nouns fit together to form compound noun phrases (e.g., computer programmer). This paper will attempt to show that many of these types of concerns can be addressed with syntactic methods (symbol pushing), and need not require explicit semantic interpretation. We have found that it is possible to identify many of these interesting co-occurrence relations by computing simple summary statistics over millions of words of text. This paper will summarize a number of experiments carried out by various subsets of the authors over the last few years. The term collocation will be used quite broadly to include constraints on SVO (subject verb object) triples, phrasal verbs, compound noun phrases, and psychoiinguistic notions of word association (e.g., doctor/nurse).</abstract>
       <url hash="a25b55c8">W89-0240</url>
@@ -605,7 +605,7 @@
     <paper id="42">
       <title><fixed-case>PREMO</fixed-case>: Parsing by Conspicuous Lexical Consumption</title>
       <author><first>Brian M.</first><last>Slator</last></author>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <pages>401–413</pages>
       <abstract>PREMO is a knowledge-based Preference Semantics parser with access to a large, lexical semantic knowledge base and organized along the lines of an operating system. The state of every partial parse is captured in a structure called a language object, and the control structure of the preference machine is a priority queue of these language objects. The language object at the front of the queue has the highest score as computed by a preference metric that weighs grammatical predictions, semantic type matching, and pragmatic coherence. The highest priority language object is the intermediate reading that is currently most preferred (the others are still “alive,” but not actively pursued); in this way the preference machine avoids combinatorial explosion by following a “best-first” strategy for parsing. The system has clear extensions into parallel processing.</abstract>
       <url hash="258d9be9">W89-0242</url>
@@ -621,9 +621,9 @@
     </paper>
     <paper id="44">
       <title>A Broad-Coverage Natural Language Analysis System</title>
-      <author><first>Karen</first><last>Jensen</last></author>
+      <author id="karen-jensen"><first>Karen</first><last>Jensen</last></author>
       <pages>425–441</pages>
-      <abstract/>
+      <abstract></abstract>
       <url hash="69eff1a4">W89-0244</url>
       <bibkey>jensen-1989-broad</bibkey>
     </paper>
@@ -637,9 +637,9 @@
     </paper>
     <paper id="46">
       <title>A Dependency-Based Parser for Topic and Focus</title>
-      <author><first>Eva</first><last>Hajičová</last></author>
+      <author id="eva-hajicova"><first>Eva</first><last>Hajičová</last></author>
       <pages>448–457</pages>
-      <abstract/>
+      <abstract></abstract>
       <url hash="b8466cd0">W89-0246</url>
       <bibkey>hajicova-1989-dependency</bibkey>
     </paper>
diff --git a/data/xml/W90.xml b/data/xml/W90.xml
index dec565a8e0..9eee102094 100644
--- a/data/xml/W90.xml
+++ b/data/xml/W90.xml
@@ -3,9 +3,9 @@
   <volume id="1" type="proceedings">
     <meta>
       <booktitle>Proceedings of the Fifth International Workshop on Natural Language Generation</booktitle>
-      <editor><first>Kathleen R.</first><last>McKeown</last></editor>
-      <editor><first>Johanna D.</first><last>Moore</last></editor>
-      <editor><first>Sergei</first><last>Nirenburg</last></editor>
+      <editor id="kathleen-mckeown"><first>Kathleen R.</first><last>McKeown</last></editor>
+      <editor id="johanna-d-moore"><first>Johanna D.</first><last>Moore</last></editor>
+      <editor id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Linden Hall Conference Center, Dawson, Pennsylvania</address>
       <month>June</month>
@@ -18,22 +18,22 @@
     </frontmatter>
     <paper id="1">
       <title>Using <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars Systemic Framework in the</title>
-      <author><first>Kathleen F.</first><last>McCoy</last></author>
-      <author><first>K.</first><last>Vijay-Shanker</last></author>
+      <author id="kathleen-f-mccoy"><first>Kathleen F.</first><last>McCoy</last></author>
+      <author id="k-vijay-shanker"><first>K.</first><last>Vijay-Shanker</last></author>
       <author><first>Gijoo</first><last>Yang</last></author>
       <url hash="1b8404cd">W90-0101</url>
       <bibkey>mccoy-etal-1990-using</bibkey>
     </paper>
     <paper id="2">
       <title>Generation and Synchronous <fixed-case>T</fixed-case>ree-<fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars</title>
-      <author><first>Stuart M.</first><last>Shieber</last></author>
+      <author id="stuart-m-shieber"><first>Stuart M.</first><last>Shieber</last></author>
       <author><first>Yves</first><last>Schabes</last></author>
       <url hash="4fab4dfb">W90-0102</url>
       <bibkey>shieber-schabes-1990-generation</bibkey>
     </paper>
     <paper id="3">
       <title>A Connectionist Treatment of Grammar for Generation: Relying on Emergents</title>
-      <author><first>Nigel</first><last>Ward</last></author>
+      <author id="nigel-ward"><first>Nigel</first><last>Ward</last></author>
       <url hash="69be1603">W90-0103</url>
       <bibkey>ward-1990-connectionist</bibkey>
     </paper>
@@ -46,7 +46,7 @@
     <paper id="5">
       <title>A collocational based approach to salience-sensitive lexical selection</title>
       <author><first>Leo</first><last>Wanner</last></author>
-      <author><first>John A.</first><last>Bateman</last></author>
+      <author id="john-bateman"><first>John A.</first><last>Bateman</last></author>
       <url hash="1e59d5a7">W90-0105</url>
       <bibkey>wanner-bateman-1990-collocational</bibkey>
     </paper>
@@ -65,19 +65,19 @@
     </paper>
     <paper id="8">
       <title>Upper Modeling: organizing knowledge for natural language processing</title>
-      <author><first>John A.</first><last>Bateman</last></author>
+      <author id="john-bateman"><first>John A.</first><last>Bateman</last></author>
       <url hash="efbb3f22">W90-0108</url>
       <bibkey>bateman-1990-upper</bibkey>
     </paper>
     <paper id="9">
       <title>Abstract Linguistic Resources for Text Planning</title>
-      <author><first>Marie W.</first><last>Meteer</last></author>
+      <author id="marie-meteer"><first>Marie W.</first><last>Meteer</last></author>
       <url hash="5cddb9a2">W90-0109</url>
       <bibkey>meteer-1990-abstract</bibkey>
     </paper>
     <paper id="10">
       <title>Using Discourse Focus, Temporal Focus, and Spatial Focus to Generate Multisentential Text</title>
-      <author><first>Mark T.</first><last>Maybury</last></author>
+      <author id="mark-t-maybury"><first>Mark T.</first><last>Maybury</last></author>
       <url hash="d09e4169">W90-0110</url>
       <bibkey>maybury-1990-using</bibkey>
     </paper>
@@ -90,7 +90,7 @@
     </paper>
     <paper id="12">
       <title>Domain Communication Knowledge</title>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <url hash="12983725">W90-0112</url>
       <bibkey>rambow-1990-domain</bibkey>
     </paper>
@@ -102,15 +102,15 @@
     </paper>
     <paper id="14">
       <title>The Role of Underlying Structure in Text Generation</title>
-      <author><first>Robert Alan</first><last>Granville</last></author>
+      <author id="robert-granville"><first>Robert Alan</first><last>Granville</last></author>
       <url hash="6abbd05d">W90-0114</url>
       <bibkey>granville-1990-role</bibkey>
     </paper>
     <paper id="15">
       <title>The Basic Block Model of Extended Explanations</title>
       <author><first>David J.</first><last>Mooney</last></author>
-      <author><first>Sandra</first><last>Carberry</last></author>
-      <author><first>Kathleen F.</first><last>McCoy</last></author>
+      <author id="sandra-carberry"><first>Sandra</first><last>Carberry</last></author>
+      <author id="kathleen-f-mccoy"><first>Kathleen F.</first><last>McCoy</last></author>
       <url hash="b693cc62">W90-0115</url>
       <bibkey>mooney-etal-1990-basic</bibkey>
     </paper>
@@ -122,7 +122,7 @@
     </paper>
     <paper id="17">
       <title>Parsimonious and Profligate Approaches to the Question of Discourse Structure Relations</title>
-      <author><first>Eduard H.</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard H.</first><last>Hovy</last></author>
       <url hash="6440c62f">W90-0117</url>
       <bibkey>hovy-1990-parsimonious</bibkey>
     </paper>
@@ -160,14 +160,14 @@
     </paper>
     <paper id="23">
       <title>Relational-Grammar-Based Generation in the <fixed-case>JETS</fixed-case> <fixed-case>J</fixed-case>apanese-<fixed-case>E</fixed-case>nglish Machine Translation System</title>
-      <author><first>David E.</first><last>Johnson</last></author>
+      <author id="david-e-johnson"><first>David E.</first><last>Johnson</last></author>
       <author><first>Hideo</first><last>Watanabe</last></author>
       <url hash="6a9a21ce">W90-0123</url>
       <bibkey>johnson-watanabe-1990-relational</bibkey>
     </paper>
     <paper id="24">
       <title>Real-Time Generation from Systemic Grammars</title>
-      <author><first>Terry</first><last>Patten</last></author>
+      <author id="terry-patten"><first>Terry</first><last>Patten</last></author>
       <author><first>Daniel S.</first><last>Stoops</last></author>
       <url hash="28797029">W90-0124</url>
       <bibkey>patten-stoops-1990-real</bibkey>
@@ -175,8 +175,8 @@
     <paper id="25">
       <title>Narrated Animation: A Case for Generation</title>
       <author><first>Norman</first><last>Badler</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
-      <author><first>Bonnie Lynn</first><last>Webber</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
+      <author id="bonnie-webber"><first>Bonnie Lynn</first><last>Webber</last></author>
       <url hash="3c3a99a6">W90-0125</url>
       <bibkey>badler-etal-1990-narrated</bibkey>
     </paper>
@@ -186,7 +186,7 @@
       <booktitle>Proceedings of the First International Workshop on Tree Adjoining Grammar and Related Frameworks (<fixed-case>TAG</fixed-case>+1)</booktitle>
       <url hash="72bce822">W90-02</url>
       <editor><first>Karin</first><last>Harbusch</last></editor>
-      <editor><first>Wolfgang</first><last>Wahlster</last></editor>
+      <editor id="wolfgang-wahlster"><first>Wolfgang</first><last>Wahlster</last></editor>
       <publisher>Internationales Begegnungs- und Forschungszentrum für Informatik (IBFI)</publisher>
       <address>Schloß Dagstuhl</address>
       <month>August</month>
@@ -199,7 +199,7 @@
     </frontmatter>
     <paper id="1">
       <title>Formal properties of Synchronous <fixed-case>T</fixed-case>ree-<fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars</title>
-      <author><first>Stuart</first><last>Shieber</last></author>
+      <author id="stuart-m-shieber"><first>Stuart</first><last>Shieber</last></author>
       <pages>6–7</pages>
       <url hash="e459293d">W90-0201</url>
       <bibkey>shieber-1990-formal</bibkey>
@@ -223,14 +223,14 @@
     </paper>
     <paper id="4">
       <title>Multicomponent <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars</title>
-      <author><first>David</first><last>Weir</last></author>
+      <author id="david-weir"><first>David</first><last>Weir</last></author>
       <pages>9</pages>
       <url hash="be26b8b2">W90-0204</url>
       <bibkey>weir-1990-multicomponent</bibkey>
     </paper>
     <paper id="5">
       <title>Embedded Pushdown Automata</title>
-      <author><first>K.</first><last>Vijay-Shanker</last></author>
+      <author id="k-vijay-shanker"><first>K.</first><last>Vijay-Shanker</last></author>
       <pages>10</pages>
       <url hash="df67ac07">W90-0205</url>
       <bibkey>vijay-shanker-1990-embedded</bibkey>
@@ -260,7 +260,7 @@
     <paper id="9">
       <title>Parallel <fixed-case>TAG</fixed-case> Parsing on the Connection Machine</title>
       <author><first>Michael</first><last>Palis</last></author>
-      <author><first>David</first><last>Wei</last></author>
+      <author id="david-weir"><first>David</first><last>Wei</last></author>
       <pages>12–13</pages>
       <url hash="823bbdac">W90-0209</url>
       <bibkey>palis-wei-1990-parallel</bibkey>
@@ -268,7 +268,7 @@
     <paper id="10">
       <title><fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammar, Segment Grammar and Incremental Sentence Generation</title>
       <author><first>Gerard</first><last>Kempen</last></author>
-      <author><first>Koenraad</first><last>DeSmedt</last></author>
+      <author id="koenraad-de-smedt"><first>Koenraad</first><last>DeSmedt</last></author>
       <pages>13–14</pages>
       <url hash="a759dbb6">W90-0210</url>
       <bibkey>kempen-desmedt-1990-tree</bibkey>
@@ -297,16 +297,16 @@
     </paper>
     <paper id="14">
       <title>A <fixed-case>TAG</fixed-case> analysis of the Third construction in <fixed-case>G</fixed-case>erman</title>
-      <author><first>Anthony</first><last>Kroch</last></author>
-      <author><first>Beatrice</first><last>Santorini</last></author>
-      <author><first>Aravind</first><last>Joshi</last></author>
+      <author id="anthony-kroch"><first>Anthony</first><last>Kroch</last></author>
+      <author id="beatrice-santorini"><first>Beatrice</first><last>Santorini</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
       <pages>16–17</pages>
       <url hash="2489ef5f">W90-0214</url>
       <bibkey>kroch-etal-1990-tag</bibkey>
     </paper>
     <paper id="15">
       <title><fixed-case>F</fixed-case>rench and <fixed-case>E</fixed-case>nglish determiners: Interaction of morphology, syntax and semantics in <fixed-case>L</fixed-case>exicalized <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars</title>
-      <author><first>Anne</first><last>Abeillé</last></author>
+      <author id="anne-abeille"><first>Anne</first><last>Abeillé</last></author>
       <pages>17–20</pages>
       <url hash="c19e4a25">W90-0215</url>
       <bibkey>abeille-1990-french</bibkey>
@@ -320,7 +320,7 @@
     </paper>
     <paper id="17">
       <title>Coordination in <fixed-case>TAG</fixed-case> in the manner of <fixed-case>CCG</fixed-case> (Combinatory Category Grammars): Fixed vs. Flexible Phrase Structure</title>
-      <author><first>Aravind</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind</first><last>Joshi</last></author>
       <pages>21</pages>
       <url hash="7c28c07f">W90-0217</url>
       <bibkey>joshi-1990-coordination</bibkey>
diff --git a/data/xml/W91.xml b/data/xml/W91.xml
index 384493850e..ab7de0395c 100644
--- a/data/xml/W91.xml
+++ b/data/xml/W91.xml
@@ -19,13 +19,13 @@
     <paper id="2">
       <title>Reversibility in a Constraint and Type based Logic Grammar: Application to Secondary Predication</title>
       <author><first>Palmira</first><last>Marrafa</last></author>
-      <author><first>Patrick</first><last>Saint-Dizier</last></author>
+      <author id="patrick-saint-dizier"><first>Patrick</first><last>Saint-Dizier</last></author>
       <url hash="dbadb0e5">W91-0102</url>
       <bibkey>marrafa-saint-dizier-1991-reversibility</bibkey>
     </paper>
     <paper id="3">
       <title>Towards Uniform Processing of Constraint-based Categorial Grammars</title>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <url hash="fafd4f7f">W91-0103</url>
       <bibkey>van-noord-1991-towards</bibkey>
     </paper>
@@ -37,13 +37,13 @@
     </paper>
     <paper id="5">
       <title>Reversibility and Modularity in Natural Language Generation</title>
-      <author><first>Gunter</first><last>Neumann</last></author>
+      <author id="gunter-neumann"><first>Gunter</first><last>Neumann</last></author>
       <url hash="dbd7ea4e">W91-0105</url>
       <bibkey>neumann-1991-reversibility</bibkey>
     </paper>
     <paper id="6">
       <title>Reversible <fixed-case>NLP</fixed-case> by Deriving the Grammars From the Knowledge Base</title>
-      <author><first>David D.</first><last>McDonald</last></author>
+      <author id="david-d-mcdonald"><first>David D.</first><last>McDonald</last></author>
       <url hash="5d403a35">W91-0106</url>
       <bibkey>mcdonald-1991-reversible</bibkey>
     </paper>
@@ -55,7 +55,7 @@
     </paper>
     <paper id="8">
       <title>Generation and Translation Towards a Formalism-Independent Characterisation</title>
-      <author><first>Henry S.</first><last>Thompson</last></author>
+      <author id="henry-s-thompson"><first>Henry S.</first><last>Thompson</last></author>
       <url hash="9c762ddc">W91-0108</url>
       <bibkey>thompson-1991-generation</bibkey>
     </paper>
@@ -64,31 +64,31 @@
       <author><first>James</first><last>Barnett</last></author>
       <author><first>Inderjeet</first><last>Mani</last></author>
       <author><first>Paul</first><last>Martin</last></author>
-      <author><first>Elaine</first><last>Rich</last></author>
+      <author id="elaine-rich"><first>Elaine</first><last>Rich</last></author>
       <url hash="77993cde">W91-0109</url>
       <bibkey>barnett-etal-1991-reversible</bibkey>
     </paper>
     <paper id="10">
       <title>A Uniform Architecture for Parsing, Generation and Transfer</title>
-      <author><first>Remi</first><last>Zajac</last></author>
+      <author id="remi-zajac"><first>Remi</first><last>Zajac</last></author>
       <url hash="acec8188">W91-0110</url>
       <bibkey>zajac-1991-uniform</bibkey>
     </paper>
     <paper id="11">
       <title>Common Heuristics for Parsing, Generation, and Whatever...</title>
-      <author><last>Hasida</last><first>Koiti</first></author>
+      <author id="koiti-hasida"><first>Koiti</first><last>Hasida</last></author>
       <url hash="f271e3bc">W91-0111</url>
       <bibkey>hasida-1991-common</bibkey>
     </paper>
     <paper id="12">
       <title>A General Computational Method for Grammar Inversion</title>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
       <url hash="0780d91f">W91-0112</url>
       <bibkey>strzalkowski-1991-general</bibkey>
     </paper>
     <paper id="13">
       <title>Compiling Trace &amp; Unification Grammar for Parsing and Generation</title>
-      <author><first>Hans Ulrich</first><last>Block</last></author>
+      <author id="hans-ulrich-block"><first>Hans Ulrich</first><last>Block</last></author>
       <url hash="d949c468">W91-0113</url>
       <bibkey>block-1991-compiling</bibkey>
     </paper>
@@ -101,7 +101,7 @@
     </paper>
     <paper id="15">
       <title>Handling Pragmatic Information With A Reversible Architecture</title>
-      <author><first>Masato</first><last>Ishizaki</last></author>
+      <author id="masato-ishizaki"><first>Masato</first><last>Ishizaki</last></author>
       <url hash="08d25fec">W91-0115</url>
       <bibkey>ishizaki-1991-handling</bibkey>
     </paper>
@@ -129,14 +129,14 @@
     </frontmatter>
     <paper id="1">
       <title>Knowledge representation and knowledge of words</title>
-      <author><first>Richmond H.</first><last>Thomason</last></author>
+      <author id="richmond-h-thomason"><first>Richmond H.</first><last>Thomason</last></author>
       <url hash="6dc88787">W91-0201</url>
       <bibkey>thomason-1991-knowledge</bibkey>
     </paper>
     <paper id="2">
       <title>Syntax-Driven and Ontology-Driven Lexical Semantics</title>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
       <url hash="ecb55345">W91-0202</url>
       <bibkey>nirenburg-levin-1991-syntax</bibkey>
     </paper>
@@ -160,7 +160,7 @@
     </paper>
     <paper id="6">
       <title>Conventional Metaphor and the Lexicon</title>
-      <author><first>James H.</first><last>Martin</last></author>
+      <author id="james-h-martin"><first>James H.</first><last>Martin</last></author>
       <url hash="5a44859b">W91-0206</url>
       <bibkey>martin-1991-conventional</bibkey>
     </paper>
@@ -181,13 +181,13 @@
     <paper id="9">
       <title>Lexical Operations in a Unification-based Framework</title>
       <author><first>Ann</first><last>Copestake</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <url hash="ed64cfa8">W91-0209</url>
       <bibkey>copestake-briscoe-1991-lexical</bibkey>
     </paper>
     <paper id="10">
       <title>Lexical Structures for Linguistic Inference</title>
-      <author><first>Peter</first><last>Anick</last></author>
+      <author id="peter-anick"><first>Peter</first><last>Anick</last></author>
       <author><first>Sabine</first><last>Bergler</last></author>
       <url hash="474340f5">W91-0210</url>
       <bibkey>anick-bergler-1991-lexical</bibkey>
@@ -207,7 +207,7 @@
     </paper>
     <paper id="13">
       <title>Lexical and World Knowledge: Theoretical and Applied Viewpoints</title>
-      <author><first>John S.</first><last>White</last></author>
+      <author id="john-s-white"><first>John S.</first><last>White</last></author>
       <url hash="109db1a4">W91-0213</url>
       <bibkey>white-1991-lexical</bibkey>
     </paper>
@@ -225,7 +225,7 @@
     </paper>
     <paper id="16">
       <title>For the Lexicon That Has Everything</title>
-      <author><first>Martha</first><last>Evens</last></author>
+      <author id="martha-evens"><first>Martha</first><last>Evens</last></author>
       <author><first>Joanne</first><last>Dardaine</last></author>
       <author><first>Yu-Fen</first><last>Huang</last></author>
       <author><first>Sun M.</first><last>Li</last></author>
@@ -238,13 +238,13 @@
     </paper>
     <paper id="17">
       <title>Acquiring and representing semantic information in a Lexical Knowledge Base</title>
-      <author><first>Nicoletta</first><last>Calzolari</last></author>
+      <author id="nicoletta-calzolari"><first>Nicoletta</first><last>Calzolari</last></author>
       <url hash="b5d715a0">W91-0217</url>
       <bibkey>calzolari-1991-acquiring</bibkey>
     </paper>
     <paper id="18">
       <title>General Lexical Representation for an Effect Predicate</title>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <url hash="243092af">W91-0218</url>
       <bibkey>palmer-1991-general</bibkey>
     </paper>
@@ -257,25 +257,25 @@
     </paper>
     <paper id="20">
       <title>Presuppositions and Default Reasoning: A Study in Lexical Pragmatics</title>
-      <author><first>Robert E.</first><last>Mercer</last></author>
+      <author id="robert-e-mercer"><first>Robert E.</first><last>Mercer</last></author>
       <url hash="b807ec93">W91-0220</url>
       <bibkey>mercer-1991-presuppositions</bibkey>
     </paper>
     <paper id="21">
       <title>Lexicon, Ontology and Text Meaning</title>
-      <author><first>Boyan A.</first><last>Onyshkevych</last></author>
+      <author id="boyan-onyshkevych"><first>Boyan A.</first><last>Onyshkevych</last></author>
       <url hash="5942324f">W91-0221</url>
       <bibkey>onyshkevych-1991-lexicon</bibkey>
     </paper>
     <paper id="22">
       <title>A Two-Level Knowledge Representation for Machine Translation: Lexical Semantics and Tense/Aspect</title>
-      <author><first>Bonnie J.</first><last>Dorr</last></author>
+      <author id="bonnie-dorr"><first>Bonnie J.</first><last>Dorr</last></author>
       <url hash="aa8898f0">W91-0222</url>
       <bibkey>dorr-1991-two</bibkey>
     </paper>
     <paper id="23">
       <title>The Autonomy of Shallow Lexical Knowledge</title>
-      <author><first>Kathleen</first><last>Dahlgren</last></author>
+      <author id="kathleen-dahlgren"><first>Kathleen</first><last>Dahlgren</last></author>
       <url hash="54a10abb">W91-0223</url>
       <bibkey>dahlgren-1991-autonomy</bibkey>
     </paper>
@@ -327,7 +327,7 @@
     </paper>
     <paper id="4">
       <title>A Unification-based Grammar of Serial Verb Constructions</title>
-      <author><first>Adams B.</first><last>Bodomo</last></author>
+      <author id="adams-b-bodomo"><first>Adams B.</first><last>Bodomo</last></author>
       <pages>41–56</pages>
       <url hash="ca4f6a5d">W91-0304</url>
       <bibkey>bodomo-1992-unification</bibkey>
@@ -372,7 +372,7 @@
     </paper>
     <paper id="10">
       <title>On the Coverage of a Morphological Analyser based on “Svensk Ordbok” [A Dictionary of <fixed-case>S</fixed-case>wedish]</title>
-      <author><first>Anna</first><last>Sågvall Hein</last></author>
+      <author id="anna-sagvall-hein"><first>Anna</first><last>Sågvall Hein</last></author>
       <pages>119–131</pages>
       <url hash="aa863e33">W91-0310</url>
       <bibkey>sagvall-hein-1992-coverage</bibkey>
@@ -401,7 +401,7 @@
     </paper>
     <paper id="14">
       <title>Anaphora and Intensionality in Classical Logic</title>
-      <author><first>Jørgen</first><last>Villadsen</last></author>
+      <author id="jorgen-villadsen"><first>Jørgen</first><last>Villadsen</last></author>
       <pages>165–176</pages>
       <url hash="85b33b37">W91-0314</url>
       <bibkey>villadsen-1992-anaphora</bibkey>
@@ -416,7 +416,7 @@
     </paper>
     <paper id="16">
       <title>A <fixed-case>S</fixed-case>wedish Core Vocabulary for Machine Translation</title>
-      <author><first>Annette</first><last>Östling</last></author>
+      <author id="annette-ostling-andersson"><first>Annette</first><last>Östling</last></author>
       <pages>187–198</pages>
       <url hash="73568b72">W91-0316</url>
       <bibkey>ostling-1992-swedish</bibkey>
diff --git a/data/xml/W93.xml b/data/xml/W93.xml
index d016ef3390..89d4c59350 100644
--- a/data/xml/W93.xml
+++ b/data/xml/W93.xml
@@ -12,7 +12,7 @@
     </frontmatter>
     <paper id="1">
       <title>Word Sense Disambiguation by Human Subjects: Computational and Psycholinguistic Applications</title>
-      <author><first>Thomas</first><last>Ahlswede</last></author>
+      <author id="thomas-ahlswede"><first>Thomas</first><last>Ahlswede</last></author>
       <author><first>David</first><last>Lorand</last></author>
       <url hash="848ce671">W93-0101</url>
       <bibkey>ahlswede-lorand-1993-word</bibkey>
@@ -21,7 +21,7 @@
       <title>Towards Building Contextual Representations of Word Senses Using Statistical Models</title>
       <author><first>Claudia</first><last>Leacock</last></author>
       <author><first>Geoffrey</first><last>Towell</last></author>
-      <author><first>Ellen</first><last>Voorhees</last></author>
+      <author id="ellen-m-voorhees"><first>Ellen</first><last>Voorhees</last></author>
       <url hash="d6d78ccb">W93-0102</url>
       <bibkey>leacock-etal-1993-towards</bibkey>
     </paper>
@@ -29,7 +29,7 @@
       <title>Lexical Concept Acquisition from Collocation Map</title>
       <author><first>Young S.</first><last>Han</last></author>
       <author><first>Young Kyoon</first><last>Han</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <url hash="0fad61fc">W93-0103</url>
       <bibkey>han-etal-1993-lexical</bibkey>
     </paper>
@@ -43,7 +43,7 @@
       <title>Identifying Unknown Proper Names in Newswire Text</title>
       <author><first>Inderjeet</first><last>Mani</last></author>
       <author><first>T. Richard</first><last>Macmillan</last></author>
-      <author><first>Susann</first><last>Luperfoy</last></author>
+      <author id="susann-luperfoy"><first>Susann</first><last>Luperfoy</last></author>
       <author><first>Elaine</first><last>Lusher</last></author>
       <author><first>Sharon</first><last>Laskowski</last></author>
       <url hash="0ac14697">W93-0105</url>
@@ -51,16 +51,16 @@
     </paper>
     <paper id="6">
       <title>Customizing a Lexicon to Better Suit a Computational Task</title>
-      <author><first>Marti</first><last>Hearst</last></author>
-      <author><first>Hinrich</first><last>Schuetze</last></author>
+      <author id="marti-a-hearst"><first>Marti</first><last>Hearst</last></author>
+      <author id="hinrich-schutze"><first>Hinrich</first><last>Schuetze</last></author>
       <url hash="a171f551">W93-0106</url>
       <bibkey>hearst-schuetze-1993-customizing</bibkey>
     </paper>
     <paper id="7">
       <title>Hierarchical Clustering of Verbs</title>
-      <author><first>Roberto</first><last>Basili</last></author>
-      <author><first>Maria</first><last>Pazienza</last></author>
-      <author><first>Paola</first><last>Velardi</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
+      <author id="maria-teresa-pazienza"><first>Maria</first><last>Pazienza</last></author>
+      <author id="paola-velardi"><first>Paola</first><last>Velardi</last></author>
       <url hash="218a3c77">W93-0107</url>
       <bibkey>basili-etal-1993-hierarchical</bibkey>
     </paper>
@@ -74,22 +74,22 @@
     <paper id="9">
       <title>The Automatic Acquisition of Frequencies of Verb Subcategorization Frames from Tagged Corpora</title>
       <author><first>Akira</first><last>Ushioda</last></author>
-      <author><first>David A.</first><last>Evans</last></author>
+      <author id="david-a-evans"><first>David A.</first><last>Evans</last></author>
       <author><first>Ted</first><last>Gibson</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <url hash="f3b3e8ad">W93-0109</url>
       <bibkey>ushioda-etal-1993-automatic</bibkey>
     </paper>
     <paper id="10">
       <title>Acquiring Predicate-Argument Mapping Information from Multilingual Texts</title>
       <author><first>Chinatsu</first><last>Aone</last></author>
-      <author><first>Douglas</first><last>McKee</last></author>
+      <author id="douglas-mckee"><first>Douglas</first><last>McKee</last></author>
       <url hash="da6c5b67">W93-0110</url>
       <bibkey>aone-mckee-1993-acquiring</bibkey>
     </paper>
     <paper id="11">
       <title>Experiments in Syntactic and Semantic Classification and Disambiguation using Bootstrapping</title>
-      <author><first>Robert</first><last>Futrelle</last></author>
+      <author id="robert-p-futrelle"><first>Robert</first><last>Futrelle</last></author>
       <author><first>Susan</first><last>Gauch</last></author>
       <url hash="961393a3">W93-0111</url>
       <bibkey>futrelle-gauch-1993-experiments</bibkey>
@@ -109,16 +109,16 @@
     <paper id="14">
       <title>Categorization and Standardizing Proper Nouns for Efficient Information Retrieval</title>
       <author><first>Woojin</first><last>Paik</last></author>
-      <author><first>Elizabeth</first><last>Liddy</last></author>
-      <author><first>Edmund</first><last>Yu</last></author>
+      <author id="elizabeth-d-liddy"><first>Elizabeth</first><last>Liddy</last></author>
+      <author id="edmund-yu"><first>Edmund</first><last>Yu</last></author>
       <author><first>Mary</first><last>McKenna</last></author>
       <url hash="0e6e47f6">W93-0114</url>
       <bibkey>paik-etal-1993-categorization</bibkey>
     </paper>
     <paper id="15">
       <title>The Long Journey from the Core to the Real Size of Large <fixed-case>LDB</fixed-case>s</title>
-      <author><first>Elena</first><last>Paskaleva</last></author>
-      <author><first>Kiril</first><last>Simov</last></author>
+      <author id="elena-paskaleva"><first>Elena</first><last>Paskaleva</last></author>
+      <author id="kiril-simov"><first>Kiril</first><last>Simov</last></author>
       <author><first>Mariana</first><last>Damova</last></author>
       <author><first>Milena</first><last>Slavcheva</last></author>
       <url hash="5cc3db08">W93-0115</url>
@@ -137,9 +137,9 @@
     </frontmatter>
     <paper id="1">
       <title>Rhetorical Relations: Necessary But Not Sufficient</title>
-      <author><first>Sandra</first><last>Carberry</last></author>
-      <author><first>Jennifer</first><last>Chu</last></author>
-      <author><first>Nancy</first><last>Green</last></author>
+      <author id="sandra-carberry"><first>Sandra</first><last>Carberry</last></author>
+      <author id="jennifer-chu-carroll"><first>Jennifer</first><last>Chu</last></author>
+      <author id="nancy-green"><first>Nancy</first><last>Green</last></author>
       <author><first>Lynn</first><last>Lambert</last></author>
       <url hash="b1ef162c">W93-0201</url>
       <bibkey>carberry-etal-1993-rhetorical</bibkey>
@@ -153,7 +153,7 @@
     <paper id="3">
       <title>Knowledge, Intention, Rhetoric: Levels of Variation in Multilingual Instructions</title>
       <author><first>Judy</first><last>Delin</last></author>
-      <author><first>Donia</first><last>Scott</last></author>
+      <author id="donia-scott"><first>Donia</first><last>Scott</last></author>
       <author><first>Tony</first><last>Hartley</last></author>
       <url hash="127e4104">W93-0203</url>
       <bibkey>delin-etal-1993-knowledge</bibkey>
@@ -166,7 +166,7 @@
     </paper>
     <paper id="5">
       <title>A Goal-Based Grammar of Rhetoric</title>
-      <author><first>Chrysanne</first><last>DiMarco</last></author>
+      <author id="chrysanne-dimarco"><first>Chrysanne</first><last>DiMarco</last></author>
       <author><first>Graeme</first><last>Hirst</last></author>
       <author><first>Marzena</first><last>Makuta-Giluk</last></author>
       <url hash="b769bf33">W93-0205</url>
@@ -174,20 +174,20 @@
     </paper>
     <paper id="6">
       <title>An Algorithm for High-Level Organization of Multi-Paragraph Texts</title>
-      <author><first>Robert</first><last>Granville</last></author>
+      <author id="robert-granville"><first>Robert</first><last>Granville</last></author>
       <url hash="fd866f46">W93-0206</url>
       <bibkey>granville-1993-algorithm</bibkey>
     </paper>
     <paper id="7">
       <title>Planning for Intentions with Rhetorical Relations</title>
-      <author><first>Susan</first><last>Haller</last></author>
+      <author id="susan-haller"><first>Susan</first><last>Haller</last></author>
       <url hash="422aee2e">W93-0207</url>
       <bibkey>haller-1993-planning</bibkey>
     </paper>
     <paper id="8">
       <title>Structuring Two-Medium Dialog for Learning Language and Other Things</title>
       <author><first>Henry</first><last>Hamburger</last></author>
-      <author><first>Dan</first><last>Tufis</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufis</last></author>
       <author><first>Raza</first><last>Hashim</last></author>
       <url hash="86b36c05">W93-0208</url>
       <bibkey>hamburger-etal-1993-structuring</bibkey>
@@ -200,14 +200,14 @@
     </paper>
     <paper id="10">
       <title>In Defense of Syntax: Informational, Intentional, and Rhetorical Structures in Discourse</title>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <url hash="ef23fcb9">W93-0210</url>
       <bibkey>hovy-1993-defense</bibkey>
     </paper>
     <paper id="11">
       <title>Observations and Directions in Text Structure</title>
       <author><first>John</first><last>Hughes</last></author>
-      <author><first>Kathleen</first><last>McCoy</last></author>
+      <author id="kathleen-f-mccoy"><first>Kathleen</first><last>McCoy</last></author>
       <url hash="f6c755b7">W93-0211</url>
       <bibkey>hughes-mccoy-1993-observations</bibkey>
     </paper>
@@ -226,7 +226,7 @@
     <paper id="14">
       <title>Towards Stratification of <fixed-case>RST</fixed-case></title>
       <author><first>Tanya</first><last>Korelsky</last></author>
-      <author><first>Richard</first><last>Kittredge</last></author>
+      <author id="richard-kittredge"><first>Richard</first><last>Kittredge</last></author>
       <url hash="fa5eb658">W93-0214</url>
       <bibkey>korelsky-kittredge-1993-towards</bibkey>
     </paper>
@@ -238,82 +238,82 @@
     </paper>
     <paper id="16">
       <title>Empirical Evidence for Intention-Based Discourse Segmentation</title>
-      <author><first>Diane J.</first><last>Litman</last></author>
-      <author><first>Rebecca J.</first><last>Passonneau</last></author>
+      <author id="diane-litman"><first>Diane J.</first><last>Litman</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca J.</first><last>Passonneau</last></author>
       <url hash="5261ecee">W93-0216</url>
       <bibkey>litman-passonneau-1993-empirical</bibkey>
     </paper>
     <paper id="17">
       <title>The Need for Intentionally-Based Approaches to Language</title>
-      <author><first>Karen</first><last>Lochbaum</last></author>
+      <author id="karen-e-lochbaum"><first>Karen</first><last>Lochbaum</last></author>
       <url hash="8ecf544a">W93-0217</url>
       <bibkey>lochbaum-1993-need</bibkey>
     </paper>
     <paper id="18">
       <title>Intentions in Bilingual Dialogue Processing</title>
-      <author><first>Susann</first><last>LuperFoy</last></author>
+      <author id="susann-luperfoy"><first>Susann</first><last>LuperFoy</last></author>
       <url hash="2d4b98b0">W93-0218</url>
       <bibkey>luperfoy-1993-intentions</bibkey>
     </paper>
     <paper id="19">
       <title>The Representation of Interdependencies between Communicative Goals and Rhetorical Relations in the Framework of Multimedia Document Generation</title>
-      <author><first>Elisabeth</first><last>Maier</last></author>
+      <author id="elisabeth-maier"><first>Elisabeth</first><last>Maier</last></author>
       <url hash="00808c0f">W93-0219</url>
       <bibkey>maier-1993-representation</bibkey>
     </paper>
     <paper id="20">
       <title>On Structure and Intention</title>
-      <author><first>Mark</first><last>Maybury</last></author>
+      <author id="mark-t-maybury"><first>Mark</first><last>Maybury</last></author>
       <url hash="0980da99">W93-0220</url>
       <bibkey>maybury-1993-structure</bibkey>
     </paper>
     <paper id="21">
       <title>Textual Constraints, Rhetorical <fixed-case>RE</fixed-case>lations and Communicative Goals and Rhetorical Relations in the Framework of Multimedia Document Generation</title>
-      <author><first>Elisabeth</first><last>Maier</last></author>
+      <author id="elisabeth-maier"><first>Elisabeth</first><last>Maier</last></author>
       <url hash="963cba03">W93-0221</url>
       <bibkey>maier-1993-textual</bibkey>
     </paper>
     <paper id="22">
       <title>Assumption Underlying Discourse Relations: Which Ones are Really There and Where are They?</title>
-      <author><first>Marie</first><last>Meteer</last></author>
+      <author id="marie-meteer"><first>Marie</first><last>Meteer</last></author>
       <url hash="ae2d004b">W93-0222</url>
       <bibkey>meteer-1993-assumption</bibkey>
     </paper>
     <paper id="23">
       <title>How Could Rhetorical Relations Be Used in Machine Translation?</title>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <url hash="dffbebc9">W93-0223</url>
       <bibkey>mitkov-1993-rhetorical</bibkey>
     </paper>
     <paper id="24">
       <title>On the Necessity of Intentions and the Usefulness of Rhetorical Relations: A Position Paper</title>
-      <author><first>Vibhu</first><last>Mittal</last></author>
-      <author><first>Cecile</first><last>Paris</last></author>
+      <author id="vibhu-o-mittal"><first>Vibhu</first><last>Mittal</last></author>
+      <author id="cecile-paris"><first>Cecile</first><last>Paris</last></author>
       <url hash="fca79b21">W93-0224</url>
       <bibkey>mittal-paris-1993-necessity</bibkey>
     </paper>
     <paper id="25">
       <title>Investigating Discourse Relations</title>
       <author><first>Megan</first><last>Moser</last></author>
-      <author><first>Johanna</first><last>Moore</last></author>
+      <author id="johanna-d-moore"><first>Johanna</first><last>Moore</last></author>
       <url hash="821316ad">W93-0225</url>
       <bibkey>moser-moore-1993-investigating</bibkey>
     </paper>
     <paper id="26">
       <title>Intentions, Information, and Inference: Two Rhetorical Questions</title>
-      <author><first>Jon</first><last>Oberlander</last></author>
+      <author id="jon-oberlander"><first>Jon</first><last>Oberlander</last></author>
       <url hash="a1cffeb4">W93-0226</url>
       <bibkey>oberlander-1993-intentions</bibkey>
     </paper>
     <paper id="27">
       <title>Rhetoric as Knowledge</title>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <url hash="7ab0410f">W93-0227</url>
       <bibkey>rambow-1993-rhetoric</bibkey>
     </paper>
     <paper id="28">
       <title>Intentions, Rhetoric, or Discourse Relations ? A Case from Multilingual Document Generation</title>
-      <author><first>Dietmar</first><last>Rosner</last></author>
+      <author id="dietmar-rosner"><first>Dietmar</first><last>Rosner</last></author>
       <url hash="5057949e">W93-0228</url>
       <bibkey>rosner-1993-intentions</bibkey>
     </paper>
@@ -339,13 +339,13 @@
     </paper>
     <paper id="32">
       <title>On Discourse Relations, Rhetorical Relations and Rhetoric</title>
-      <author><first>Candace</first><last>Sidner</last></author>
+      <author id="candace-l-sidner"><first>Candace</first><last>Sidner</last></author>
       <url hash="8c52b82d">W93-0232</url>
       <bibkey>sidner-1993-discourse</bibkey>
     </paper>
     <paper id="33">
       <title>Summarising as a Lever for Studying Large-Scale Discourse Structure</title>
-      <author><first>Karen</first><last>Sparck Jones</last></author>
+      <author id="karen-sparck-jones"><first>Karen</first><last>Sparck Jones</last></author>
       <url hash="29469232">W93-0233</url>
       <bibkey>sparck-jones-1993-summarising</bibkey>
     </paper>
@@ -357,7 +357,7 @@
     </paper>
     <paper id="35">
       <title>Rhetorical Relations, Action and Intentionality in Conversation</title>
-      <author><first>David</first><last>Traum</last></author>
+      <author id="david-traum"><first>David</first><last>Traum</last></author>
       <url hash="e0cf657d">W93-0235</url>
       <bibkey>traum-1993-rhetorical</bibkey>
     </paper>
@@ -375,19 +375,19 @@
     </paper>
     <paper id="38">
       <title>Information and Deliberation in Discourse</title>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <url hash="32b8165e">W93-0238</url>
       <bibkey>walker-1993-information</bibkey>
     </paper>
     <paper id="39">
       <title>Issues in Linguistic Segmentation</title>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <url hash="73678e29">W93-0239</url>
       <bibkey>wiebe-1993-issues</bibkey>
     </paper>
     <paper id="40">
       <title>Closing the Gap Between Discourse Structure and Communicative Intention</title>
-      <author><first>Horng Jyh Paul</first><last>Wu</last></author>
+      <author id="horng-jyh-paul-wu"><first>Horng Jyh Paul</first><last>Wu</last></author>
       <url hash="870dd4f4">W93-0240</url>
       <bibkey>wu-1993-closing</bibkey>
     </paper>
@@ -405,20 +405,20 @@
     <paper id="1">
       <title>Robust Bilingual Word Alignment for Machine Aided Translation</title>
       <author><first>Ido</first><last>Dagan</last></author>
-      <author><first>Kenneth</first><last>Church</last></author>
+      <author id="kenneth-church"><first>Kenneth</first><last>Church</last></author>
       <author><first>Willian</first><last>Gale</last></author>
       <url hash="b5910a54">W93-0301</url>
       <bibkey>dagan-etal-1993-robust</bibkey>
     </paper>
     <paper id="2">
       <title>Robust Text Processing in Automated Information Retrieval</title>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
       <url hash="66bd9d6f">W93-0302</url>
       <bibkey>strzalkowski-1993-robust-text</bibkey>
     </paper>
     <paper id="3">
       <title>Document Filtering using Semantic Information from a Machine Readable Dictionary</title>
-      <author><first>Elizabeth D.</first><last>Liddy</last></author>
+      <author id="elizabeth-d-liddy"><first>Elizabeth D.</first><last>Liddy</last></author>
       <author><first>Woojin</first><last>Paik</last></author>
       <url hash="dbeb7ae4">W93-0303</url>
       <bibkey>liddy-paik-1993-document</bibkey>
@@ -432,7 +432,7 @@
     <paper id="5">
       <title><fixed-case>HMM</fixed-case>-Based Part-of-Speech Tagging for <fixed-case>C</fixed-case>hinese Corpora</title>
       <author><first>Chao-Huang</first><last>Chang</last></author>
-      <author><first>Cheng-der</first><last>Chen</last></author>
+      <author id="cheng-der-chen"><first>Cheng-der</first><last>Chen</last></author>
       <url hash="952628f7">W93-0305</url>
       <bibkey>chang-chen-1993-hmm</bibkey>
     </paper>
@@ -445,7 +445,7 @@
     <paper id="7">
       <title>Structural Ambiguity and Conceptual Relations</title>
       <author><first>Philip</first><last>Resnik</last></author>
-      <author><first>Marti A.</first><last>Hearst</last></author>
+      <author id="marti-a-hearst"><first>Marti A.</first><last>Hearst</last></author>
       <url hash="6b2e0e88">W93-0307</url>
       <bibkey>resnik-hearst-1993-structural</bibkey>
     </paper>
@@ -478,8 +478,8 @@
     <paper id="12">
       <title>Example-Based Sense Tagging of Running <fixed-case>C</fixed-case>hinese Text</title>
       <author><first>Xiang</first><last>Tong</last></author>
-      <author><first>Chang-ning</first><last>Huang</last></author>
-      <author><first>Cheng-ming</first><last>Guo</last></author>
+      <author id="changning-huang"><first>Chang-ning</first><last>Huang</last></author>
+      <author id="cheng-ming-guo"><first>Cheng-ming</first><last>Guo</last></author>
       <url hash="77d8a5da">W93-0312</url>
       <bibkey>tong-etal-1993-example</bibkey>
     </paper>
@@ -537,7 +537,7 @@
     </paper>
     <paper id="5">
       <title>Porting a Stochastic Part-of-Speech Tagger to <fixed-case>S</fixed-case>wedish</title>
-      <author><first>Douglass</first><last>Cutting</last></author>
+      <author id="douglass-cutting"><first>Douglass</first><last>Cutting</last></author>
       <pages>65–70</pages>
       <url hash="0200e75b">W93-0405</url>
       <bibkey>cutting-1994-porting</bibkey>
@@ -545,7 +545,7 @@
     <paper id="6">
       <title>Tagging Experiments Using Neural Networks</title>
       <author><first>Martin</first><last>Eineborg</last></author>
-      <author><first>Björn</first><last>Gambäck</last></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gambäck</last></author>
       <pages>71–81</pages>
       <url hash="23ea3332">W93-0406</url>
       <bibkey>eineborg-gamback-1994-tagging</bibkey>
@@ -559,7 +559,7 @@
     </paper>
     <paper id="8">
       <title>On Implementing <fixed-case>S</fixed-case>wedish Tense and Aspect</title>
-      <author><first>Björn</first><last>Gambäck</last></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gambäck</last></author>
       <pages>97–109</pages>
       <url hash="ef103366">W93-0408</url>
       <bibkey>gamback-1994-implementing</bibkey>
@@ -581,7 +581,7 @@
     <paper id="11">
       <title>From Semantic Representations to <fixed-case>SQL</fixed-case> Queries</title>
       <author><first>Per</first><last>Anker Jensen</last></author>
-      <author><first>Bodil</first><last>Nistrup Madsen</last></author>
+      <author id="bodil-nistrup-madsen"><first>Bodil</first><last>Nistrup Madsen</last></author>
       <author><first>Annie</first><last>Stahél</last></author>
       <author><first>Carl</first><last>Vikner</last></author>
       <pages>133–142</pages>
@@ -591,7 +591,7 @@
     <paper id="12">
       <title>Clustering Sentences – Making Sense of Synonymous Sentences</title>
       <author><first>Jussi</first><last>Karlgren</last></author>
-      <author><first>Björn</first><last>Gambäck</last></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gambäck</last></author>
       <author><first>Christer</first><last>Samuelsson</last></author>
       <pages>143–154</pages>
       <url hash="a5fc7f65">W93-0412</url>
@@ -671,7 +671,7 @@
     </paper>
     <paper id="23">
       <title>Preferences and Linguistic Choices in the Multra Machine Translation System</title>
-      <author><first>Anna</first><last>Sågvall Hein</last></author>
+      <author id="anna-sagvall-hein"><first>Anna</first><last>Sågvall Hein</last></author>
       <pages>267–276</pages>
       <url hash="6a946a5e">W93-0423</url>
       <bibkey>sagvall-hein-1994-preferences</bibkey>
diff --git a/data/xml/W94.xml b/data/xml/W94.xml
index 17f2f6cbb4..8e9f66ca6d 100644
--- a/data/xml/W94.xml
+++ b/data/xml/W94.xml
@@ -16,21 +16,21 @@
     </frontmatter>
     <paper id="1">
       <title>INVITED TALK: Qualitative and Quantitative Designs for Speech Translation</title>
-      <author><first>Hiyan</first><last>Alshawi</last></author>
+      <author id="hiyan-alshawi"><first>Hiyan</first><last>Alshawi</last></author>
       <url hash="d63813e9">W94-0101</url>
       <bibkey>alshawi-1994-invited</bibkey>
     </paper>
     <paper id="2">
       <title>The Noisy Channel and the Braying Donkey</title>
-      <author><first>Roberto</first><last>Basili</last></author>
-      <author><first>Maria Teresa</first><last>Pazienza</last></author>
-      <author><first>Paola</first><last>Velardi</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
+      <author id="maria-teresa-pazienza"><first>Maria Teresa</first><last>Pazienza</last></author>
+      <author id="paola-velardi"><first>Paola</first><last>Velardi</last></author>
       <url hash="b04a270d">W94-0102</url>
       <bibkey>basili-etal-1994-noisy</bibkey>
     </paper>
     <paper id="3">
       <title><fixed-case>AMALGAM</fixed-case>: Automatic Mapping Among Lexico-Grammatical Annotation Models</title>
-      <author><first>Eric</first><last>Atwell</last></author>
+      <author id="eric-atwell"><first>Eric</first><last>Atwell</last></author>
       <author><first>John</first><last>Hughes</last></author>
       <author><first>Clive</first><last>Souter</last></author>
       <url hash="21718553">W94-0103</url>
@@ -38,14 +38,14 @@
     </paper>
     <paper id="4">
       <title>Study and Implementation of Combined Techniques for Automatic Extraction of Terminology</title>
-      <author><first>Beatrice</first><last>Daille</last></author>
+      <author id="beatrice-daille"><first>Beatrice</first><last>Daille</last></author>
       <url hash="e98cb8cc">W94-0104</url>
       <bibkey>daille-1994-study</bibkey>
     </paper>
     <paper id="5">
       <title>Parsing with Principles and Probabilities</title>
       <author><first>Andrew</first><last>Fordham</last></author>
-      <author><first>Matthew</first><last>Crocker</last></author>
+      <author id="matthew-crocker"><first>Matthew</first><last>Crocker</last></author>
       <url hash="ddc6f3d8">W94-0105</url>
       <bibkey>fordham-crocker-1994-parsing</bibkey>
     </paper>
@@ -57,8 +57,8 @@
     </paper>
     <paper id="7">
       <title>Complexity of Description of Primitives: Relevance to Local Statistical Computations</title>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
-      <author><first>B.</first><last>Srinivas</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="srinivas-bangalore"><first>B.</first><last>Srinivas</last></author>
       <url hash="b53ef09a">W94-0107</url>
       <bibkey>joshi-srinivas-1994-complexity</bibkey>
     </paper>
@@ -71,21 +71,21 @@
     </paper>
     <paper id="9">
       <title>Integrating Symbolic and Statistical Approches in Speech and Natural Language Applications</title>
-      <author><first>Marie</first><last>Meteer</last></author>
-      <author><first>Herbert</first><last>Gish</last></author>
+      <author id="marie-meteer"><first>Marie</first><last>Meteer</last></author>
+      <author id="herbert-gish"><first>Herbert</first><last>Gish</last></author>
       <url hash="35743d8d">W94-0109</url>
       <bibkey>meteer-gish-1994-integrating</bibkey>
     </paper>
     <paper id="10">
       <title>Combining Linguistic with Statistical Methods in Automatic Speech Understanding</title>
-      <author><first>Patti</first><last>Price</last></author>
+      <author id="patti-price"><first>Patti</first><last>Price</last></author>
       <url hash="dca2384c">W94-0110</url>
       <bibkey>price-1994-combining</bibkey>
     </paper>
     <paper id="11">
       <title>Exploring the Statistical Derivation of Transformational Rule Sequences for Part-of-Speech Tagging</title>
-      <author><first>Lance A.</first><last>Ramshaw</last></author>
-      <author><first>Mitchell P.</first><last>Marcus</last></author>
+      <author id="lance-ramshaw"><first>Lance A.</first><last>Ramshaw</last></author>
+      <author id="mitch-marcus"><first>Mitchell P.</first><last>Marcus</last></author>
       <url hash="8b2ea448">W94-0111</url>
       <bibkey>ramshaw-marcus-1994-exploring</bibkey>
     </paper>
@@ -97,8 +97,8 @@
     </paper>
     <paper id="13">
       <title>Recovering From Parser Failures: A Hybrid Statistical/Symbolic Approach</title>
-      <author><first>Carolyn Penstein</first><last>Rose</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="carolyn-rose"><first>Carolyn Penstein</first><last>Rose</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <url hash="74f234fe">W94-0113</url>
       <bibkey>rose-waibel-1994-recovering</bibkey>
     </paper>
@@ -111,7 +111,7 @@
     <paper id="15">
       <title>Learning a Radically Lexical Grammar</title>
       <author><first>Danny</first><last>Soloman</last></author>
-      <author><first>Mary McGee</first><last>Wood</last></author>
+      <author id="mary-mcgee-wood"><first>Mary McGee</first><last>Wood</last></author>
       <url hash="fe724b52">W94-0115</url>
       <bibkey>soloman-wood-1994-learning</bibkey>
     </paper>
@@ -141,14 +141,14 @@
     </paper>
     <paper id="3">
       <title>Constraints, Exceptions and Representations</title>
-      <author><first>T. Mark</first><last>Ellison</last></author>
+      <author id="t-mark-ellison"><first>T. Mark</first><last>Ellison</last></author>
       <url hash="6f968748">W94-0203</url>
       <bibkey>ellison-1994-constraints</bibkey>
     </paper>
     <paper id="4">
       <title>Default Finite State Machines and Finite State Phonology</title>
       <author><first>Gerald</first><last>Penn</last></author>
-      <author><first>Richmond</first><last>Thomason</last></author>
+      <author id="richmond-h-thomason"><first>Richmond</first><last>Thomason</last></author>
       <url hash="0d746ee1">W94-0204</url>
       <bibkey>penn-thomason-1994-default</bibkey>
     </paper>
@@ -198,13 +198,13 @@
     <paper id="2">
       <title><fixed-case>DPOCL</fixed-case>: A Principled Approach To Discourse Planning</title>
       <author><first>R. Michael</first><last>Young</last></author>
-      <author><first>Johanna D.</first><last>Moore</last></author>
+      <author id="johanna-d-moore"><first>Johanna D.</first><last>Moore</last></author>
       <url hash="56077c2a">W94-0302</url>
       <bibkey>young-moore-1994-dpocl</bibkey>
     </paper>
     <paper id="3">
       <title>Building Underlying Structures for Multiparagraph Texts</title>
-      <author><first>Robert</first><last>Granville</last></author>
+      <author id="robert-granville"><first>Robert</first><last>Granville</last></author>
       <url hash="fdf3efb5">W94-0303</url>
       <bibkey>granville-1994-building</bibkey>
     </paper>
@@ -223,8 +223,8 @@
     </paper>
     <paper id="6">
       <title>Intentions, Structure and Expression in Multi-Lingual Instructions</title>
-      <author><first>Cecile L.</first><last>Paris</last></author>
-      <author><first>Donia R.</first><last>Scott</last></author>
+      <author id="cecile-paris"><first>Cecile L.</first><last>Paris</last></author>
+      <author id="donia-scott"><first>Donia R.</first><last>Scott</last></author>
       <url hash="4d7511d0">W94-0306</url>
       <bibkey>paris-scott-1994-intentions</bibkey>
     </paper>
@@ -238,9 +238,9 @@
     <paper id="8">
       <title>Expressing Procedural Relationships in Multilingual Instructions</title>
       <author><first>Judy</first><last>Delin</last></author>
-      <author><first>Anthony</first><last>Hartley</last></author>
-      <author><first>Cecile</first><last>Paris</last></author>
-      <author><first>Donia</first><last>Scott</last></author>
+      <author id="anthony-hartley"><first>Anthony</first><last>Hartley</last></author>
+      <author id="cecile-paris"><first>Cecile</first><last>Paris</last></author>
+      <author id="donia-scott"><first>Donia</first><last>Scott</last></author>
       <author><first>Keith</first><last>Vander Linden</last></author>
       <url hash="c0b225b2">W94-0308</url>
       <bibkey>delin-etal-1994-expressing</bibkey>
@@ -253,7 +253,7 @@
     </paper>
     <paper id="10">
       <title>On the Creative Use of Language: The Form of Lexical Resources</title>
-      <author><first>David D.</first><last>McDonald</last></author>
+      <author id="david-d-mcdonald"><first>David D.</first><last>McDonald</last></author>
       <author><first>Federica</first><last>Busa</last></author>
       <url hash="01f12548">W94-0310</url>
       <bibkey>mcdonald-busa-1994-creative</bibkey>
@@ -261,13 +261,13 @@
     <paper id="11">
       <title>Semantic Lexicons: The Cornerstone for Lexical Choice in Natural Language Generation</title>
       <author><first>Evelyne</first><last>Viegas</last></author>
-      <author><first>Pierrette</first><last>Bouillon</last></author>
+      <author id="pierrette-bouillon"><first>Pierrette</first><last>Bouillon</last></author>
       <url hash="10745f08">W94-0311</url>
       <bibkey>viegas-bouillon-1994-semantic</bibkey>
     </paper>
     <paper id="12">
       <title>Generating Event Descriptions with Sage: A Simulation and Generation Environment</title>
-      <author><first>Marie</first><last>Meteer</last></author>
+      <author id="marie-meteer"><first>Marie</first><last>Meteer</last></author>
       <url hash="4e26b9f7">W94-0312</url>
       <bibkey>meteer-1994-generating</bibkey>
     </paper>
@@ -304,7 +304,7 @@
     <paper id="18">
       <title>Towards the Application of Text Generation in an Integrated Publication System</title>
       <author><first>Elke</first><last>Teich</last></author>
-      <author><first>John</first><last>Bateman</last></author>
+      <author id="john-bateman"><first>John</first><last>Bateman</last></author>
       <url hash="56a20bf6">W94-0318</url>
       <bibkey>teich-bateman-1994-towards</bibkey>
     </paper>
@@ -316,35 +316,35 @@
     </paper>
     <paper id="20">
       <title>The Role of Cognitive Modeling in Communicative Intentions</title>
-      <author><first>Owen</first><last>Rambow</last></author>
-      <author><first>Marilyn</first><last>Walker</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
+      <author id="marilyn-walker"><first>Marilyn</first><last>Walker</last></author>
       <url hash="e2f009ce">W94-0320</url>
       <bibkey>rambow-walker-1994-role</bibkey>
     </paper>
     <paper id="21">
       <title>Recognizing Digressive Questions Using a Model for Interactive Generation for Interactive Generation</title>
-      <author><first>Susan M.</first><last>Haller</last></author>
+      <author id="susan-haller"><first>Susan M.</first><last>Haller</last></author>
       <url hash="df0c2e9b">W94-0321</url>
       <bibkey>haller-1994-recognizing</bibkey>
     </paper>
     <paper id="22">
       <title>Generating Indirect Answers to Yes-No Questions</title>
-      <author><first>Nancy</first><last>Green</last></author>
-      <author><first>Sandra</first><last>Carberry</last></author>
+      <author id="nancy-green"><first>Nancy</first><last>Green</last></author>
+      <author id="sandra-carberry"><first>Sandra</first><last>Carberry</last></author>
       <url hash="29e77d9a">W94-0322</url>
       <bibkey>green-carberry-1994-generating</bibkey>
     </paper>
     <paper id="23">
       <title>Real-Time Natural Language Generation in <fixed-case>NL</fixed-case>-<fixed-case>SOAR</fixed-case></title>
       <author><first>Robert</first><last>Rubinoff</last></author>
-      <author><first>Jill Fain</first><last>Lehman</last></author>
+      <author id="jill-fain-lehman"><first>Jill Fain</first><last>Lehman</last></author>
       <url hash="eb410196">W94-0323</url>
       <bibkey>rubinoff-lehman-1994-real</bibkey>
     </paper>
     <paper id="24">
       <title>Generating Cooperative System Responses in Information Retrieval Dialogues</title>
       <author><first>Markus</first><last>Fischer</last></author>
-      <author><first>Elisabeth</first><last>Maier</last></author>
+      <author id="elisabeth-maier"><first>Elisabeth</first><last>Maier</last></author>
       <author><first>Adelheit</first><last>Stein</last></author>
       <url hash="17305139">W94-0324</url>
       <bibkey>fischer-etal-1994-generating</bibkey>
@@ -352,7 +352,7 @@
     <paper id="25">
       <title>Situation Viewpoints for Generation</title>
       <author><first>Henry</first><last>Hamburger</last></author>
-      <author><first>Dan</first><last>Tufis</last></author>
+      <author id="dan-tufis"><first>Dan</first><last>Tufis</last></author>
       <url hash="be81ea05">W94-0325</url>
       <bibkey>hamburger-tufis-1994-situation</bibkey>
     </paper>
@@ -364,22 +364,22 @@
     </paper>
     <paper id="27">
       <title>Bidirectional Incremental Generation and Analysis with Categorial Grammar and Indexed Quasi-Logical Form</title>
-      <author><first>Torbjoern</first><last>Lager</last></author>
-      <author><first>William J.</first><last>Black</last></author>
+      <author id="torbjorn-lager"><first>Torbjoern</first><last>Lager</last></author>
+      <author id="william-j-black"><first>William J.</first><last>Black</last></author>
       <url hash="e234724c">W94-0327</url>
       <bibkey>lager-black-1994-bidirectional</bibkey>
     </paper>
     <paper id="28">
       <title>Toward a Multidimensional Framework to Guide the Automated Generation of Text Types</title>
-      <author><first>Julia</first><last>Lavid</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="julia-lavid-lopez"><first>Julia</first><last>Lavid</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <url hash="52f2a713">W94-0328</url>
       <bibkey>lavid-hovy-1994-toward</bibkey>
     </paper>
     <paper id="29">
       <title><fixed-case>CORECT</fixed-case>: Combining <fixed-case>CSCW</fixed-case> with Natural Language Generation for Collaborative Requirement Capture</title>
       <author><first>John</first><last>Levine</last></author>
-      <author><first>Chris</first><last>Mellish</last></author>
+      <author id="chris-mellish"><first>Chris</first><last>Mellish</last></author>
       <url hash="11bb69db">W94-0329</url>
       <bibkey>levine-mellish-1994-corect</bibkey>
     </paper>
@@ -392,9 +392,9 @@
     </paper>
     <paper id="31">
       <title>Generation in the <fixed-case>LOLITA</fixed-case> System: An Engineering Approach</title>
-      <author><first>Mark H.</first><last>Smith</last></author>
-      <author><first>Roberto</first><last>Garigliano</last></author>
-      <author><first>Richard G.</first><last>Morgan</last></author>
+      <author id="mark-h-smith"><first>Mark H.</first><last>Smith</last></author>
+      <author id="roberto-garigliano"><first>Roberto</first><last>Garigliano</last></author>
+      <author id="richard-g-morgan"><first>Richard G.</first><last>Morgan</last></author>
       <url hash="393b3ab1">W94-0331</url>
       <bibkey>smith-etal-1994-generation</bibkey>
     </paper>
diff --git a/data/xml/W95.xml b/data/xml/W95.xml
index 96b1e34b5b..5e90d2ae28 100644
--- a/data/xml/W95.xml
+++ b/data/xml/W95.xml
@@ -19,13 +19,13 @@
     </paper>
     <paper id="2">
       <title>Lexical Heads, Phrase Structure and the Induction of Grammar</title>
-      <author><first>Carl</first><last>de Marcken</last></author>
+      <author id="carl-de-marcken"><first>Carl</first><last>de Marcken</last></author>
       <url hash="fa28185d">W95-0102</url>
       <bibkey>de-marcken-1995-lexical</bibkey>
     </paper>
     <paper id="3">
       <title>Prepositional Phrase Attachment through a Backed-off Model</title>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <author><first>James</first><last>Brooks</last></author>
       <url hash="d73cc65b">W95-0103</url>
       <bibkey>collins-brooks-1995-prepositional</bibkey>
@@ -50,8 +50,8 @@
     </paper>
     <paper id="7">
       <title>Text Chunking using Transformation-Based Learning</title>
-      <author><first>Lance</first><last>Ramshaw</last></author>
-      <author><first>Mitch</first><last>Marcus</last></author>
+      <author id="lance-ramshaw"><first>Lance</first><last>Ramshaw</last></author>
+      <author id="mitch-marcus"><first>Mitch</first><last>Marcus</last></author>
       <url hash="2c6f7bc5">W95-0107</url>
       <bibkey>ramshaw-marcus-1995-text</bibkey>
     </paper>
@@ -73,21 +73,21 @@
     </paper>
     <paper id="10">
       <title>Inverse Document Frequency (<fixed-case>IDF</fixed-case>): A Measure of Deviations from <fixed-case>P</fixed-case>oisson</title>
-      <author><first>Kenneth</first><last>Church</last></author>
-      <author><first>William</first><last>Gale</last></author>
+      <author id="kenneth-church"><first>Kenneth</first><last>Church</last></author>
+      <author id="william-a-gale"><first>William</first><last>Gale</last></author>
       <url hash="36887439">W95-0110</url>
       <bibkey>church-gale-1995-inverse</bibkey>
     </paper>
     <paper id="11">
       <title>Automatic Suggestion of Significant Terms for a Predefined Topic</title>
-      <author><first>Joe</first><last>Zhou</last></author>
+      <author id="joe-zhou"><first>Joe</first><last>Zhou</last></author>
       <author><first>Pete</first><last>Dapkus</last></author>
       <url hash="66f0996e">W95-0111</url>
       <bibkey>zhou-dapkus-1995-automatic</bibkey>
     </paper>
     <paper id="12">
       <title>Automatically Acquiring Conceptual Patterns without an Annotated Corpus</title>
-      <author><first>Ellen</first><last>Riloff</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
       <author><first>Jay</first><last>Shoen</last></author>
       <url hash="1f60a64d">W95-0112</url>
       <bibkey>riloff-shoen-1995-automatically</bibkey>
@@ -145,7 +145,7 @@
     </paper>
     <paper id="3">
       <title><fixed-case>S</fixed-case>wedish Language Processing in the Spoken Language Translator</title>
-      <author><first>Björn</first><last>Gambäck</last></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gambäck</last></author>
       <pages>37–49</pages>
       <url hash="c2c094bc">W95-0203</url>
       <bibkey>gamback-1995-swedish</bibkey>
@@ -159,7 +159,7 @@
     </paper>
     <paper id="5">
       <title>Sense Extension Functions in Lexical Semantics</title>
-      <author><first>Peter</first><last>Rossen Skadhauge</last></author>
+      <author id="peter-rossen-skadhauge"><first>Peter</first><last>Rossen Skadhauge</last></author>
       <pages>59–68</pages>
       <url hash="678207dd">W95-0205</url>
       <bibkey>rossen-skadhauge-1995-sense</bibkey>
diff --git a/data/xml/W96.xml b/data/xml/W96.xml
index bdec515112..6d13cc09d7 100644
--- a/data/xml/W96.xml
+++ b/data/xml/W96.xml
@@ -3,7 +3,7 @@
   <volume id="1" type="proceedings">
     <meta>
       <booktitle>Fourth Workshop on Very Large Corpora</booktitle>
-      <editor><first>Donia</first><last>Scott</last></editor>
+      <editor id="donia-scott"><first>Donia</first><last>Scott</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Herstmonceux Castle, Sussex, UK</address>
       <month>June</month>
@@ -16,14 +16,14 @@
     </frontmatter>
     <paper id="1">
       <title>Using Word Class for Part-of-speech Disambiguation</title>
-      <author><first>Evelyne</first><last>Tzoukermann</last></author>
-      <author><first>Dragomir R.</first><last>Radev</last></author>
+      <author id="evelyne-tzoukermann"><first>Evelyne</first><last>Tzoukermann</last></author>
+      <author id="dragomir-radev"><first>Dragomir R.</first><last>Radev</last></author>
       <url hash="7f4b3f13">W96-0101</url>
       <bibkey>tzoukermann-radev-1996-using</bibkey>
     </paper>
     <paper id="2">
       <title><fixed-case>MBT</fixed-case>: A Memory-Based Part of Speech Tagger-Generator</title>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <author><first>Jakub</first><last>Zavrel</last></author>
       <author><first>Peter</first><last>Berck</last></author>
       <author><first>Steven</first><last>Gillis</last></author>
@@ -61,27 +61,27 @@
     <paper id="7">
       <title>Automatic Extraction of Word Sequence Correspondences in Parallel Corpora</title>
       <author><first>Mihoko</first><last>Kitamura</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <url hash="4e6e0ceb">W96-0107</url>
       <bibkey>kitamura-matsumoto-1996-automatic</bibkey>
     </paper>
     <paper id="8">
       <title>A Statistical Approach to Automatic <fixed-case>OCR</fixed-case> Error Correction in Context</title>
       <author><first>Xiang</first><last>Tong</last></author>
-      <author><first>David A.</first><last>Evans</last></author>
+      <author id="david-a-evans"><first>David A.</first><last>Evans</last></author>
       <url hash="6560515e">W96-0108</url>
       <bibkey>tong-evans-1996-statistical</bibkey>
     </paper>
     <paper id="9">
       <title>Exploiting Text Structure for Topic Identification</title>
       <author><first>Tadashi</first><last>Nomoto</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <url hash="5bc6dfcb">W96-0109</url>
       <bibkey>nomoto-matsumoto-1996-exploiting</bibkey>
     </paper>
     <paper id="10">
       <title>Statistical Models for Deep-structure Disambiguation</title>
-      <author><first>TungHui</first><last>Chiang</last></author>
+      <author id="tung-hui-chiang"><first>TungHui</first><last>Chiang</last></author>
       <author><first>Keh-Yih</first><last>Su</last></author>
       <url hash="35b660a2">W96-0110</url>
       <bibkey>chiang-su-1996-statistical</bibkey>
@@ -107,7 +107,7 @@
     <paper id="14">
       <title>Towards Automatic Grammar Acquisition from a Bracketed Corpus</title>
       <author><first>Thanaruk</first><last>Theeramunkong</last></author>
-      <author><first>Manabu</first><last>Okumara</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumara</last></author>
       <url hash="bc73abd0">W96-0114</url>
       <bibkey>theeramunkong-okumara-1996-towards</bibkey>
     </paper>
@@ -146,7 +146,7 @@
     </paper>
     <paper id="4">
       <title>Modeling Conversational Speech for Speech Recognition</title>
-      <author><first>Marie</first><last>Meteer</last></author>
+      <author id="marie-meteer"><first>Marie</first><last>Meteer</last></author>
       <author><first>Rukmini</first><last>Iyer</last></author>
       <url hash="5fc433d9">W96-0204</url>
       <bibkey>meteer-iyer-1996-modeling</bibkey>
@@ -166,34 +166,34 @@
     <paper id="7">
       <title>Combining Hand-crafted Rules and Unsupervised Learning in Constraint-based Morphological Disambiguation</title>
       <author><first>Kemal</first><last>Oflazer</last></author>
-      <author><first>Gokhan</first><last>Tur</last></author>
+      <author id="gokhan-tur"><first>Gokhan</first><last>Tur</last></author>
       <url hash="67072801">W96-0207</url>
       <bibkey>oflazer-tur-1996-combining</bibkey>
     </paper>
     <paper id="8">
       <title>Comparative Experiments on Disambiguating Word Senses: An Illustration of the Role of Bias in Machine Learning</title>
-      <author><first>Raymond J.</first><last>Mooney</last></author>
+      <author id="raymond-mooney"><first>Raymond J.</first><last>Mooney</last></author>
       <url hash="be3639d6">W96-0208</url>
       <bibkey>mooney-1996-comparative</bibkey>
     </paper>
     <paper id="9">
       <title>Apportioning Development Effort in a Probabilistic <fixed-case>LR</fixed-case> Parsing System Through Evaluation</title>
-      <author><first>John</first><last>Carroll</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <url hash="778ba2ef">W96-0209</url>
       <bibkey>carroll-briscoe-1996-apportioning</bibkey>
     </paper>
     <paper id="10">
       <title>The Measure of a Model</title>
-      <author><first>Rebecca</first><last>Bruce</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="rebecca-bruce"><first>Rebecca</first><last>Bruce</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <author><first>Ted</first><last>Pedersen</last></author>
       <url hash="4588e562">W96-0210</url>
       <bibkey>bruce-etal-1996-measure</bibkey>
     </paper>
     <paper id="11">
       <title>Automating Feature Set Selection for Case-Based Learning of Linguistic Knowledge</title>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <url hash="92f96132">W96-0211</url>
       <bibkey>cardie-1996-automating</bibkey>
     </paper>
@@ -206,13 +206,13 @@
     </paper>
     <paper id="13">
       <title>A Maximum Entropy Model for Part-Of-Speech Tagging</title>
-      <author><first>Adwait</first><last>Ratnaparkhi</last></author>
+      <author id="adwait-ratnaparkhi"><first>Adwait</first><last>Ratnaparkhi</last></author>
       <url hash="a352c713">W96-0213</url>
       <bibkey>ratnaparkhi-1996-maximum</bibkey>
     </paper>
     <paper id="14">
       <title>Efficient Algorithms for Parsing the <fixed-case>DOP</fixed-case> Model</title>
-      <author><first>Joshua</first><last>Goodman</last></author>
+      <author id="joshua-goodman"><first>Joshua</first><last>Goodman</last></author>
       <url hash="c24c732b">W96-0214</url>
       <bibkey>goodman-1996-efficient</bibkey>
     </paper>
@@ -241,7 +241,7 @@
     </paper>
     <paper id="3">
       <title>Controlling the Application of Lexical Rules</title>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <author><first>Ann</first><last>Copestake</last></author>
       <url hash="055ca62c">W96-0303</url>
       <bibkey>briscoe-copestake-1996-controlling</bibkey>
@@ -249,7 +249,7 @@
     <paper id="4">
       <title>Using Lexical Semantic Techniques to Classify Free-Responses</title>
       <author><first>Jill</first><last>Burstein</last></author>
-      <author><first>Randy</first><last>Kaplan</last></author>
+      <author id="randy-m-kaplan"><first>Randy</first><last>Kaplan</last></author>
       <author><first>Susanne</first><last>Wolff</last></author>
       <author><first>Chi</first><last>Lu</last></author>
       <url hash="95c037b6">W96-0304</url>
@@ -257,15 +257,15 @@
     </paper>
     <paper id="5">
       <title>Acquisition of Computational-Semantic Lexicons from Machine Readable Lexical Resources</title>
-      <author><first>Jason J.S.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason J.S.</first><last>Chang</last></author>
       <author><first>J.N.</first><last>Chen</last></author>
       <url hash="7fd90825">W96-0305</url>
       <bibkey>chang-chen-1996-acquisition</bibkey>
     </paper>
     <paper id="6">
       <title>Acquisition of Semantic Lexicons: Using Word Sense Disambiguation to Improve Precision</title>
-      <author><first>Bonnie J.</first><last>Dorr</last></author>
-      <author><first>Doug</first><last>Jones</last></author>
+      <author id="bonnie-dorr"><first>Bonnie J.</first><last>Dorr</last></author>
+      <author id="douglas-jones"><first>Doug</first><last>Jones</last></author>
       <url hash="24505531">W96-0306</url>
       <bibkey>dorr-jones-1996-acquisition</bibkey>
     </paper>
@@ -278,13 +278,13 @@
     <paper id="8">
       <title><i>Lexical Rules</i> is Italicized</title>
       <author><first>Stephen</first><last>Helmreich</last></author>
-      <author><first>David</first><last>Farwell</last></author>
+      <author id="david-farwell"><first>David</first><last>Farwell</last></author>
       <url hash="bd144ef3">W96-0308</url>
       <bibkey>helmreich-farwell-1996-lexical</bibkey>
     </paper>
     <paper id="9">
       <title>Qualia Structure and the Compositional Interpretation of Compounds</title>
-      <author><first>Michael</first><last>Johnston</last></author>
+      <author id="michael-johnston"><first>Michael</first><last>Johnston</last></author>
       <author><first>Federica</first><last>Busa</last></author>
       <url hash="a1e22810">W96-0309</url>
       <bibkey>johnston-busa-1996-qualia</bibkey>
@@ -292,14 +292,14 @@
     <paper id="10">
       <title>Lexical Rules for Deverbal Adjectives</title>
       <author><first>Victor</first><last>Raskin</last></author>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <url hash="3ec70977">W96-0310</url>
       <bibkey>raskin-nirenburg-1996-lexical</bibkey>
     </paper>
     <paper id="11">
       <title>Morphological Productivity in the Lexicon</title>
       <author><first>Onur T.</first><last>Sehitoglu</last></author>
-      <author><first>H. Cem</first><last>Bozsahin</last></author>
+      <author id="cem-bozsahin"><first>H. Cem</first><last>Bozsahin</last></author>
       <url hash="773fcd34">W96-0311</url>
       <bibkey>sehitoglu-bozsahin-1996-morphological</bibkey>
     </paper>
@@ -309,7 +309,7 @@
       <booktitle>Eighth International Natural Language Generation Workshop</booktitle>
       <venue>inlg</venue>
       <year>1996</year>
-      <editor><first>Donia</first><last>Scott</last></editor>
+      <editor id="donia-scott"><first>Donia</first><last>Scott</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Stroudsburg, PA, USA</address>
       <doi>10.18653/v1/W96-04</doi>
@@ -322,7 +322,7 @@
     <paper id="1">
       <title>The <fixed-case>H</fixed-case>ealth<fixed-case>D</fixed-case>oc Sentence Planner</title>
       <author><first>Leo</first><last>Wanner</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <url hash="75e4774e">W96-0401</url>
       <bibkey>wanner-hovy-1996-healthdoc</bibkey>
       <doi>10.18653/v1/W96-0401</doi>
@@ -345,9 +345,9 @@
     </paper>
     <paper id="4">
       <title>Approximate Generation from Non-Hierarchical Representations</title>
-      <author><first>Nicolas</first><last>Nicolov</last></author>
-      <author><first>Chris</first><last>Mellish</last></author>
-      <author><first>Graeme</first><last>Ritchie</last></author>
+      <author id="nicolas-nicolov"><first>Nicolas</first><last>Nicolov</last></author>
+      <author id="chris-mellish"><first>Chris</first><last>Mellish</last></author>
+      <author id="graeme-ritchie"><first>Graeme</first><last>Ritchie</last></author>
       <url hash="ca4e88ad">W96-0404</url>
       <bibkey>nicolov-etal-1996-approximate</bibkey>
       <doi>10.18653/v1/W96-0404</doi>
@@ -370,15 +370,15 @@
     <paper id="7">
       <title>Generating Patent Claims from Interactive Input</title>
       <author><first>Svetlana</first><last>Sheremetyeva</last></author>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
-      <author><first>Irene</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="irene-nirenburg"><first>Irene</first><last>Nirenburg</last></author>
       <url hash="b29141cf">W96-0407</url>
       <bibkey>sheremetyeva-etal-1996-generating</bibkey>
       <doi>10.18653/v1/W96-0407</doi>
     </paper>
     <paper id="8">
       <title>Considering the Effects of Second Language Learning on Generation</title>
-      <author><first>Kathleen F.</first><last>McCoy</last></author>
+      <author id="kathleen-f-mccoy"><first>Kathleen F.</first><last>McCoy</last></author>
       <author><first>Christopher A.</first><last>Pennington</last></author>
       <author><first>Linda Z.</first><last>Suri</last></author>
       <url hash="458ecf3d">W96-0408</url>
@@ -387,9 +387,9 @@
     </paper>
     <paper id="9">
       <title>Tactical Generation in a Free Constituent Order Language</title>
-      <author><first>Dilek Zeynep</first><last>Hakkani</last></author>
+      <author id="dilek-hakkani-tur"><first>Dilek Zeynep</first><last>Hakkani</last></author>
       <author><first>Kemal</first><last>Oflazer</last></author>
-      <author><first>Ilyas</first><last>Cicekli</last></author>
+      <author id="ilyas-cicekli"><first>Ilyas</first><last>Cicekli</last></author>
       <url hash="2254f30e">W96-0409</url>
       <bibkey>hakkani-etal-1996-tactical</bibkey>
       <doi>10.18653/v1/W96-0409</doi>
@@ -397,7 +397,7 @@
     <paper id="10">
       <title>Paying Heed to Collocations</title>
       <author><first>Matthew</first><last>Stone</last></author>
-      <author><first>Christine</first><last>Doran</last></author>
+      <author id="christine-doran"><first>Christine</first><last>Doran</last></author>
       <url hash="698359d5">W96-0410</url>
       <bibkey>stone-doran-1996-paying</bibkey>
       <doi>10.18653/v1/W96-0410</doi>
@@ -440,8 +440,8 @@
     <paper id="16">
       <title>Sources of Flexibility in Dynamic Hypertext Generation</title>
       <author><first>Alistair</first><last>Knott</last></author>
-      <author><first>Chris</first><last>Mellish</last></author>
-      <author><first>Jon</first><last>Oberlander</last></author>
+      <author id="chris-mellish"><first>Chris</first><last>Mellish</last></author>
+      <author id="jon-oberlander"><first>Jon</first><last>Oberlander</last></author>
       <author><first>Mick</first><last>O’Donnell</last></author>
       <url hash="f8bef4a0">W96-0416</url>
       <bibkey>knott-etal-1996-sources</bibkey>
@@ -473,7 +473,7 @@
       <booktitle>Eighth International Natural Language Generation Workshop (Posters and Demonstrations)</booktitle>
       <venue>inlg</venue>
       <year>1996</year>
-      <editor><first>Donia</first><last>Scott</last></editor>
+      <editor id="donia-scott"><first>Donia</first><last>Scott</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Stroudsburg, PA, USA</address>
       <doi>10.18653/v1/W96-05</doi>
@@ -494,15 +494,15 @@
     <paper id="2">
       <title><fixed-case>SPLAT</fixed-case>: A sentence-plan authoring tool</title>
       <author><first>Bruce</first><last>Jakeway</last></author>
-      <author><first>Chrysanne</first><last>DiMarco</last></author>
+      <author id="chrysanne-dimarco"><first>Chrysanne</first><last>DiMarco</last></author>
       <url hash="b9356541">W96-0502</url>
       <bibkey>jakeway-dimarco-1996-splat</bibkey>
       <doi>10.18653/v1/W96-0502</doi>
     </paper>
     <paper id="3">
       <title>The <fixed-case>M</fixed-case>odel<fixed-case>E</fixed-case>xplainer</title>
-      <author><first>Benoit</first><last>Lavoie</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="benoit-lavoie"><first>Benoit</first><last>Lavoie</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <author><first>Ehud</first><last>Reiter</last></author>
       <url hash="da6eb0c1">W96-0503</url>
       <bibkey>lavoie-etal-1996-modelexplainer</bibkey>
@@ -510,7 +510,7 @@
     </paper>
     <paper id="4">
       <title><fixed-case>DRAFTER</fixed-case></title>
-      <author><first>Cécile</first><last>Paris</last></author>
+      <author id="cecile-paris"><first>Cécile</first><last>Paris</last></author>
       <author><first>Keith</first><last>Vander Linden</last></author>
       <url hash="fd51a5fe">W96-0504</url>
       <bibkey>paris-vander-linden-1996-drafter</bibkey>
@@ -527,14 +527,14 @@
     <paper id="6">
       <title><fixed-case>PICARD</fixed-case>: The Next Generator</title>
       <author><first>Stephen</first><last>Beale</last></author>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <url hash="45894f26">W96-0506</url>
       <bibkey>beale-nirenburg-1996-picard</bibkey>
       <doi>10.18653/v1/W96-0506</doi>
     </paper>
     <paper id="7">
       <title>Overview of <fixed-case>A</fixed-case>leth<fixed-case>G</fixed-case>en</title>
-      <author><first>José</first><last>Coch</last></author>
+      <author id="jose-coch"><first>José</first><last>Coch</last></author>
       <url hash="3e8c5b4b">W96-0507</url>
       <bibkey>coch-1996-overview</bibkey>
       <doi>10.18653/v1/W96-0507</doi>
@@ -542,7 +542,7 @@
     <paper id="8">
       <title>On Lexical Aggregation and Ordering</title>
       <author><first>Hercules</first><last>Dalianis</last></author>
-      <author><first>Eduard</first><last>Hovy</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
       <url hash="2f233b7c">W96-0508</url>
       <bibkey>dalianis-hovy-1996-lexical</bibkey>
       <doi>10.18653/v1/W96-0508</doi>
@@ -550,7 +550,7 @@
     <paper id="9">
       <title>Generating ‘Distributed’ Referring Expressions: an Initial Report</title>
       <author><first>Barbara</first><last>Di Eugenio</last></author>
-      <author><first>Johanna D.</first><last>Moore</last></author>
+      <author id="johanna-d-moore"><first>Johanna D.</first><last>Moore</last></author>
       <url hash="e8e3cd57">W96-0509</url>
       <bibkey>di-eugenio-moore-1996-generating</bibkey>
       <doi>10.18653/v1/W96-0509</doi>
@@ -572,7 +572,7 @@
     </paper>
     <paper id="12">
       <title>An Architecture For Distributed Natural Language Summarization</title>
-      <author><first>Dragomir R.</first><last>Radev</last></author>
+      <author id="dragomir-radev"><first>Dragomir R.</first><last>Radev</last></author>
       <url hash="fcf986b6">W96-0512</url>
       <bibkey>radev-1996-architecture</bibkey>
       <doi>10.18653/v1/W96-0512</doi>
diff --git a/data/xml/W97.xml b/data/xml/W97.xml
index d972e8c442..0793469ec1 100644
--- a/data/xml/W97.xml
+++ b/data/xml/W97.xml
@@ -12,7 +12,7 @@
     </frontmatter>
     <paper id="1">
       <title>Summary of Invited Speech</title>
-      <author><first>Mitch</first><last>Marcus</last></author>
+      <author id="mitch-marcus"><first>Mitch</first><last>Marcus</last></author>
       <url hash="4299d8bf">W97-0101</url>
       <bibkey>marcus-1997-summary</bibkey>
     </paper>
@@ -24,7 +24,7 @@
     </paper>
     <paper id="3">
       <title>Commercial Impact of <fixed-case>VLC</fixed-case> Research</title>
-      <author><first>Howard</first><last>Turtle</last></author>
+      <author id="howard-r-turtle"><first>Howard</first><last>Turtle</last></author>
       <url hash="5a8ee290">W97-0103</url>
       <bibkey>turtle-1997-commercial</bibkey>
     </paper>
@@ -36,24 +36,24 @@
     </paper>
     <paper id="5">
       <title>Probabilistic Parsing of Unrestricted <fixed-case>E</fixed-case>nglish Text, With a Highly-Detailed Grammar</title>
-      <author><first>Ezra</first><last>Black</last></author>
+      <author id="ezra-black"><first>Ezra</first><last>Black</last></author>
       <author><first>Stephen</first><last>Eubank</last></author>
-      <author><first>Hideki</first><last>Kashioka</last></author>
-      <author><first>David</first><last>Magerman</last></author>
+      <author id="hideki-kashioka"><first>Hideki</first><last>Kashioka</last></author>
+      <author id="david-m-magerman"><first>David</first><last>Magerman</last></author>
       <url hash="3e131cf3">W97-0105</url>
       <bibkey>black-etal-1997-probabilistic</bibkey>
     </paper>
     <paper id="6">
       <title>Grammar Acquisition Based on Clustering Analysis and Its Application to Statistical Parsing</title>
       <author><first>Thanaruk</first><last>Theeramunkong</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <url hash="7a3258cd">W97-0106</url>
       <bibkey>theeramunkong-okumura-1997-grammar</bibkey>
     </paper>
     <paper id="7">
       <title>Reestimation and Best-First Parsing Algorithm for Probabilistic Dependency Grammars</title>
       <author><first>Seungmi</first><last>Lee</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <url hash="0bdb806d">W97-0107</url>
       <bibkey>lee-choi-1997-reestimation</bibkey>
     </paper>
@@ -66,20 +66,20 @@
     <paper id="9">
       <title>Corpus Based <fixed-case>PP</fixed-case> Attachment Ambiguity Resolution with a Semantic Dictionary</title>
       <author><first>Jiri</first><last>Stetina</last></author>
-      <author><first>Makoto</first><last>Nagao</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
       <url hash="affede03">W97-0109</url>
       <bibkey>stetina-nagao-1997-corpus</bibkey>
     </paper>
     <paper id="10">
       <title>Corpus Based Statistical Generalization Tree in Rule Optimization</title>
-      <author><first>Joyce Yue</first><last>Chai</last></author>
-      <author><first>Alan W.</first><last>Biermann</last></author>
+      <author id="joyce-chai"><first>Joyce Yue</first><last>Chai</last></author>
+      <author id="alan-w-biermann"><first>Alan W.</first><last>Biermann</last></author>
       <url hash="a74dd33b">W97-0110</url>
       <bibkey>chai-biermann-1997-corpus</bibkey>
     </paper>
     <paper id="11">
       <title>Clustering Co-occurrence Graph based on Transitivity</title>
-      <author><first>Kumiko</first><last>Tanaka-Ishii</last></author>
+      <author id="kumiko-tanaka-ishii"><first>Kumiko</first><last>Tanaka-Ishii</last></author>
       <url hash="a3446640">W97-0111</url>
       <bibkey>tanaka-ishii-1997-clustering</bibkey>
     </paper>
@@ -92,7 +92,7 @@
     <paper id="13">
       <title>Data Reliability and Its Effects on Automatic Abstracting</title>
       <author><first>Tadashi</first><last>Nomoto</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <url hash="7249b038">W97-0113</url>
       <bibkey>nomoto-matsumoto-1997-data</bibkey>
     </paper>
@@ -104,8 +104,8 @@
     </paper>
     <paper id="15">
       <title>Statistical Acquisition of Terminology Dictionary</title>
-      <author><last>Huang</last><first>Xuan-jing</first></author>
-      <author><last>Wu</last><first>Li-de</first></author>
+      <author id="xuan-jing-huang"><first>Xuan-jing</first><last>Huang</last></author>
+      <author id="lide-wu"><first>Li-de</first><last>Wu</last></author>
       <author><last>Wang</last><first>Wen-xin</first></author>
       <url hash="d5c455b7">W97-0115</url>
       <bibkey>huang-etal-1997-statistical</bibkey>
@@ -118,14 +118,14 @@
     </paper>
     <paper id="17">
       <title>A Natural Language Correction Model for Continuous Speech Recognition</title>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
       <author><first>Ronald</first><last>Brandow</last></author>
       <url hash="089e99b7">W97-0117</url>
       <bibkey>strzalkowski-brandow-1997-natural</bibkey>
     </paper>
     <paper id="18">
       <title>The Effects of Corpus Size and Homogeneity on Language Model Quality</title>
-      <author><first>Tony G.</first><last>Rose</last></author>
+      <author id="tony-rose"><first>Tony G.</first><last>Rose</last></author>
       <url hash="f6c81c61">W97-0118</url>
       <bibkey>rose-1997-effects</bibkey>
     </paper>
@@ -163,28 +163,28 @@
     <paper id="24">
       <title>Analysis of Unknown Lexical Items using Morphological and Syntactic Information with the <fixed-case>TIMIT</fixed-case> Corpus</title>
       <author><first>Scott M.</first><last>Thede</last></author>
-      <author><first>Mary</first><last>Harper</last></author>
+      <author id="mary-harper"><first>Mary</first><last>Harper</last></author>
       <url hash="f549bc08">W97-0124</url>
       <bibkey>thede-harper-1997-analysis</bibkey>
     </paper>
     <paper id="25">
       <title>A Local Grammar-based Approach to Recognizing of Proper Names in <fixed-case>K</fixed-case>orean Texts</title>
-      <author><first>Jee-Sun</first><last>Nam</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="jee-sun-nam"><first>Jee-Sun</first><last>Nam</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <url hash="e9b0b41f">W97-0125</url>
       <bibkey>nam-choi-1997-local</bibkey>
     </paper>
     <paper id="26">
       <title>A Statistical Approach to <fixed-case>T</fixed-case>hai Morphological Analyzer</title>
       <author><last>Kawtrakul</last><first>Asanee</first></author>
-      <author><last>Thumkanon</last><first>Chalathip</first></author>
+      <author id="chalathip-thumkanon"><first>Chalathip</first><last>Thumkanon</last></author>
       <url hash="66e7d35c">W97-0126</url>
       <bibkey>kawtrakul-thumkanon-1997-statistical</bibkey>
     </paper>
     <paper id="27">
       <title>Probabilistic Word Classification Based on Context-Sensitive Binary Tree Method</title>
       <author><first>Jun</first><last>Gao</last></author>
-      <author><first>XiXian</first><last>Chen</last></author>
+      <author id="xixian-chen"><first>XiXian</first><last>Chen</last></author>
       <url hash="18b2c10f">W97-0127</url>
       <bibkey>gao-chen-1997-probabilistic</bibkey>
     </paper>
@@ -207,10 +207,10 @@
     </paper>
     <paper id="2">
       <title>Experience in <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Sense Tagging in the <fixed-case>W</fixed-case>all <fixed-case>S</fixed-case>treet <fixed-case>J</fixed-case>ournal</title>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <author><first>Julie</first><last>Maples</last></author>
       <author><first>Lei</first><last>Duan</last></author>
-      <author><first>Rebecca</first><last>Bruce</last></author>
+      <author id="rebecca-bruce"><first>Rebecca</first><last>Bruce</last></author>
       <url hash="fd5f5586">W97-0202</url>
       <bibkey>wiebe-etal-1997-experience</bibkey>
     </paper>
@@ -222,7 +222,7 @@
     </paper>
     <paper id="4">
       <title>A Frame-Semantic Approach to Semantic Annotation</title>
-      <author><first>John B.</first><last>Lowe</last></author>
+      <author id="john-b-lowe"><first>John B.</first><last>Lowe</last></author>
       <url hash="e08eac58">W97-0204</url>
       <bibkey>lowe-1997-frame</bibkey>
     </paper>
@@ -236,7 +236,7 @@
       <title>Analysis of a Hand-Tagging Task</title>
       <author><first>Christiane</first><last>Fellbaum</last></author>
       <author><first>Joachim</first><last>Grabowski</last></author>
-      <author><first>Shari</first><last>Land</last></author>
+      <author id="shari-landes"><first>Shari</first><last>Land</last></author>
       <url hash="07069d9a">W97-0206</url>
       <bibkey>fellbaum-etal-1997-analysis</bibkey>
     </paper>
@@ -248,7 +248,7 @@
     </paper>
     <paper id="8">
       <title>Sense Tagging: Semantic Tagging with a Lexicon</title>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <author><first>Mark</first><last>Stevenson</last></author>
       <url hash="e9d76756">W97-0208</url>
       <bibkey>wilks-stevenson-1997-sense</bibkey>
@@ -261,18 +261,18 @@
     </paper>
     <paper id="10">
       <title>Investigating Complementary Methods for Verb Sense Pruning</title>
-      <author><first>Hongyan</first><last>Jing</last></author>
+      <author id="hongyan-jing"><first>Hongyan</first><last>Jing</last></author>
       <author><first>Vasileios</first><last>Hatzivassiloglou</last></author>
-      <author><first>Rebecca</first><last>Passonneau</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca</first><last>Passonneau</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
       <url hash="9e2df21f">W97-0210</url>
       <bibkey>jing-etal-1997-investigating</bibkey>
     </paper>
     <paper id="11">
       <title>Towards a Bootstrapping Framework for Corpus Semantic Tagging</title>
-      <author><first>Roberto</first><last>Basili</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
       <author><first>Michelangelo</first><last>Della Rocca</last></author>
-      <author><first>Maria Teresa</first><last>Pazienza</last></author>
+      <author id="maria-teresa-pazienza"><first>Maria Teresa</first><last>Pazienza</last></author>
       <url hash="35b0f34c">W97-0211</url>
       <bibkey>basili-etal-1997-towards</bibkey>
     </paper>
@@ -292,14 +292,14 @@
     </paper>
     <paper id="14">
       <title>Writing Annotation Instructions</title>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <url hash="497446be">W97-0214</url>
       <bibkey>wiebe-1997-writing</bibkey>
     </paper>
     <paper id="15">
       <title>Combining Knowledge Sources for Automatic Semantic Tagging</title>
-      <author><first>Douglas</first><last>Jones</last></author>
-      <author><first>Boyan</first><last>Onyshkevych</last></author>
+      <author id="douglas-jones"><first>Douglas</first><last>Jones</last></author>
+      <author id="boyan-onyshkevych"><first>Boyan</first><last>Onyshkevych</last></author>
       <url hash="658f73b9">W97-0215</url>
       <bibkey>jones-onyshkevych-1997-combining</bibkey>
     </paper>
@@ -318,14 +318,14 @@
     </paper>
     <paper id="18">
       <title>Constructing Semantic Tagsets</title>
-      <author><first>Alain</first><last>Polguere</last></author>
+      <author id="alain-polguere"><first>Alain</first><last>Polguere</last></author>
       <url hash="236fe699">W97-0218</url>
       <bibkey>polguere-1997-constructing</bibkey>
     </paper>
     <paper id="19">
       <title>Structured Lexicons and Semantic Tagging</title>
-      <author><first>Bonnie J.</first><last>Dorr</last></author>
-      <author><first>Mari Broman</first><last>Olsen</last></author>
+      <author id="bonnie-dorr"><first>Bonnie J.</first><last>Dorr</last></author>
+      <author id="mari-broman-olsen"><first>Mari Broman</first><last>Olsen</last></author>
       <url hash="101062cf">W97-0219</url>
       <bibkey>dorr-olsen-1997-structured</bibkey>
     </paper>
@@ -342,35 +342,35 @@
     </frontmatter>
     <paper id="1">
       <title>A Linear Observed Time Statistical Parser Based on Maximum Entropy Models</title>
-      <author><first>Adwait</first><last>Ratnaparkhi</last></author>
+      <author id="adwait-ratnaparkhi"><first>Adwait</first><last>Ratnaparkhi</last></author>
       <url hash="4f66b03f">W97-0301</url>
       <bibkey>ratnaparkhi-1997-linear</bibkey>
     </paper>
     <paper id="2">
       <title>Global Thresholding and Multiple-Pass Parsing</title>
-      <author><first>Joshua</first><last>Goodman</last></author>
+      <author id="joshua-goodman"><first>Joshua</first><last>Goodman</last></author>
       <url hash="259f6782">W97-0302</url>
       <bibkey>goodman-1997-global</bibkey>
     </paper>
     <paper id="3">
       <title>An Efficient Distribution of Labor in a Two Stage Robust Interpretation Process</title>
-      <author><first>Carolyn Penstein</first><last>Rose</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="carolyn-rose"><first>Carolyn Penstein</first><last>Rose</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <url hash="439112dc">W97-0303</url>
       <bibkey>rose-lavie-1997-efficient</bibkey>
     </paper>
     <paper id="4">
       <title>Text Segmentation Using Exponential Models</title>
       <author><first>Doug</first><last>Beeferman</last></author>
-      <author><first>Adam</first><last>Berger</last></author>
-      <author><first>John</first><last>Lafferty</last></author>
+      <author id="adam-berger"><first>Adam</first><last>Berger</last></author>
+      <author id="john-lafferty"><first>John</first><last>Lafferty</last></author>
       <url hash="a60df47c">W97-0304</url>
       <bibkey>beeferman-etal-1997-text</bibkey>
     </paper>
     <paper id="5">
       <title>Detecting Subject Boundaries Within Text: A Language Independent Statistical Approach</title>
       <author><first>Korin</first><last>Richmond</last></author>
-      <author><first>Andrew</first><last>Smith</last></author>
+      <author id="andrew-smith"><first>Andrew</first><last>Smith</last></author>
       <author><first>Einat</first><last>Amitay</last></author>
       <url hash="c7c9474b">W97-0305</url>
       <bibkey>richmond-etal-1997-detecting</bibkey>
@@ -393,7 +393,7 @@
     </paper>
     <paper id="8">
       <title>On aligning trees</title>
-      <author><first>Jo</first><last>Calder</last></author>
+      <author id="jo-calder"><first>Jo</first><last>Calder</last></author>
       <url hash="38cbc1d9">W97-0308</url>
       <bibkey>calder-1997-aligning</bibkey>
     </paper>
@@ -425,23 +425,23 @@
     </paper>
     <paper id="13">
       <title>A Corpus-Based Approach for Building Semantic Lexicons</title>
-      <author><first>Ellen</first><last>Riloff</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
       <author><first>Jessica</first><last>Shepherd</last></author>
       <url hash="afdf1373">W97-0313</url>
       <bibkey>riloff-shepherd-1997-corpus</bibkey>
     </paper>
     <paper id="14">
       <title>Inducing Terminology for Lexical Acquisition</title>
-      <author><first>Roberto</first><last>Basili</last></author>
-      <author><first>Gianluca</first><last>De Rossi</last></author>
-      <author><first>Maria Teresa</first><last>Pazienza</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
+      <author id="gianluca-de-rossi"><first>Gianluca</first><last>De Rossi</last></author>
+      <author id="maria-teresa-pazienza"><first>Maria Teresa</first><last>Pazienza</last></author>
       <url hash="feaa5b12">W97-0314</url>
       <bibkey>basili-etal-1997-inducing</bibkey>
     </paper>
     <paper id="15">
       <title>Name Searching and Information Retrieval</title>
       <author><first>Paul</first><last>Thompson</last></author>
-      <author><first>Christopher C.</first><last>Dozier</last></author>
+      <author id="christopher-dozier"><first>Christopher C.</first><last>Dozier</last></author>
       <url hash="2bdb3ea9">W97-0315</url>
       <bibkey>thompson-dozier-1997-name</bibkey>
     </paper>
@@ -465,30 +465,30 @@
     </paper>
     <paper id="19">
       <title>Probabilistic Coreference in Information Extraction</title>
-      <author><first>Andrew</first><last>Kehler</last></author>
+      <author id="andrew-kehler"><first>Andrew</first><last>Kehler</last></author>
       <url hash="755eb5a2">W97-0319</url>
       <bibkey>kehler-1997-probabilistic</bibkey>
     </paper>
     <paper id="20">
       <title>An Empirical Approach to Temporal Reference Resolution</title>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <author><first>Tom</first><last>O’Hara</last></author>
-      <author><first>Kenneth</first><last>McKeever</last></author>
+      <author id="kenneth-j-mckeever"><first>Kenneth</first><last>McKeever</last></author>
       <author><first>Thorsten</first><last>Ohrstrom-Sandgren</last></author>
       <url hash="b6db4014">W97-0320</url>
       <bibkey>wiebe-etal-1997-empirical</bibkey>
     </paper>
     <paper id="21">
       <title>Word Sense Disambiguation Based on Structured Semantic Space</title>
-      <author><last>Ji</last><first>Donghong</first></author>
-      <author><last>Huang</last><first>Changning</first></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
+      <author id="changning-huang"><first>Changning</first><last>Huang</last></author>
       <url hash="3b305c4c">W97-0321</url>
       <bibkey>ji-huang-1997-word</bibkey>
     </paper>
     <paper id="22">
       <title>Distinguishing Word Senses in Untagged Text</title>
       <author><first>Ted</first><last>Pedersen</last></author>
-      <author><first>Rebecca</first><last>Bruce</last></author>
+      <author id="rebecca-bruce"><first>Rebecca</first><last>Bruce</last></author>
       <url hash="e866b0bf">W97-0322</url>
       <bibkey>pedersen-bruce-1997-distinguishing</bibkey>
     </paper>
@@ -518,8 +518,8 @@
     </paper>
     <paper id="2">
       <title>A Dialogue Analysis Model with Statistical Speech Act Processing for Dialogue Machine Translation</title>
-      <author><first>Jae-won</first><last>Lee</last></author>
-      <author><first>Gil Chang</first><last>Kim</last></author>
+      <author id="jae-won-lee"><first>Jae-won</first><last>Lee</last></author>
+      <author id="gil-chang-kim"><first>Gil Chang</first><last>Kim</last></author>
       <url hash="baff2c64">W97-0402</url>
       <bibkey>lee-kim-1997-dialogue</bibkey>
     </paper>
@@ -569,40 +569,40 @@
     </paper>
     <paper id="8">
       <title><fixed-case>E</fixed-case>nglish-to-<fixed-case>M</fixed-case>andarin Speech Translation with Head Transducers</title>
-      <author><first>Hiyan</first><last>Alshawi</last></author>
+      <author id="hiyan-alshawi"><first>Hiyan</first><last>Alshawi</last></author>
       <url hash="02c55194">W97-0408</url>
       <bibkey>alshawi-1997-english</bibkey>
     </paper>
     <paper id="9">
       <title>Interactive Speech Translation in the <fixed-case>DIPLOMAT</fixed-case> Project</title>
-      <author><first>Robert</first><last>Frederking</last></author>
-      <author><first>Alexander</first><last>Rudnicky</last></author>
+      <author id="robert-frederking"><first>Robert</first><last>Frederking</last></author>
+      <author id="alexander-rudnicky"><first>Alexander</first><last>Rudnicky</last></author>
       <author><first>Christopher</first><last>Hogan</last></author>
       <url hash="d9ebf58e">W97-0409</url>
       <bibkey>frederking-etal-1997-interactive</bibkey>
     </paper>
     <paper id="10">
       <title>Expanding the Domain of a Multi-lingual Speech-to-Speech Translation System</title>
-      <author><first>Alon</first><last>Lavie</last></author>
-      <author><first>Lori</first><last>Levin</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
       <author><first>Puming</first><last>Zhan</last></author>
-      <author><first>Maite</first><last>Taboada</last></author>
-      <author><first>Donna</first><last>Gates</last></author>
+      <author id="maite-taboada"><first>Maite</first><last>Taboada</last></author>
+      <author id="donna-gates"><first>Donna</first><last>Gates</last></author>
       <author><first>Mirella</first><last>Lapata</last></author>
       <author><first>Cortis</first><last>Clark</last></author>
       <author><first>Matthew</first><last>Broadhead</last></author>
-      <author><first>Alex</first><last>Waibel</last></author>
+      <author id="alex-waibel"><first>Alex</first><last>Waibel</last></author>
       <url hash="76f0b9f4">W97-0410</url>
       <bibkey>lavie-etal-1997-expanding</bibkey>
     </paper>
     <paper id="11">
       <title>Translation Methodology in the Spoken Language Translator: An Evaluation</title>
-      <author><first>David</first><last>Carter</last></author>
+      <author id="david-carter"><first>David</first><last>Carter</last></author>
       <author><first>Ralph</first><last>Becket</last></author>
-      <author><first>Manny</first><last>Rayner</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
       <author><first>Robert</first><last>Eklund</last></author>
       <author><first>Catriona</first><last>MacDermid</last></author>
-      <author><first>Mats</first><last>Wirén</last></author>
+      <author id="mats-wiren"><first>Mats</first><last>Wirén</last></author>
       <author><first>Sabine</first><last>Kirchmeier-Andersen</last></author>
       <author><first>Christina</first><last>Philp</last></author>
       <url hash="6f7e005e">W97-0411</url>
@@ -628,7 +628,7 @@
     </paper>
     <paper id="15">
       <title>Spoken Language Translation with the <fixed-case>ITSV</fixed-case>ox System</title>
-      <author><first>Eric</first><last>Wehrli</last></author>
+      <author id="eric-wehrli"><first>Eric</first><last>Wehrli</last></author>
       <author><first>Jean-Luc</first><last>Cochard</last></author>
       <url hash="362c0dc3">W97-0415</url>
       <bibkey>wehrli-cochard-1997-spoken</bibkey>
@@ -660,7 +660,7 @@
     </paper>
     <paper id="3">
       <title>Simple <fixed-case>NLP</fixed-case> Techniques for Expanding Telegraphic Sentences</title>
-      <author><first>Kathleen F.</first><last>McCoy</last></author>
+      <author id="kathleen-f-mccoy"><first>Kathleen F.</first><last>McCoy</last></author>
       <url hash="2ead088f">W97-0503</url>
       <bibkey>mccoy-1997-simple</bibkey>
     </paper>
@@ -697,18 +697,18 @@
     </paper>
     <paper id="8">
       <title>A Tutor for Teaching <fixed-case>E</fixed-case>nglish as a Second Language for Deaf Users of <fixed-case>A</fixed-case>merican <fixed-case>S</fixed-case>ign <fixed-case>L</fixed-case>anguage</title>
-      <author><first>Kathleen F.</first><last>McCoy</last></author>
+      <author id="kathleen-f-mccoy"><first>Kathleen F.</first><last>McCoy</last></author>
       <author><first>Lisa N.</first><last>Masterman</last></author>
       <url hash="b3a24dd4">W97-0508</url>
       <bibkey>mccoy-masterman-1997-tutor</bibkey>
     </paper>
     <paper id="9">
       <title>Application of <fixed-case>NLP</fixed-case> technologyto production of closed-caption <fixed-case>TV</fixed-case>. programs in <fixed-case>J</fixed-case>apanese for the hearing impaired</title>
-      <author><first>Takahiro</first><last>Wakao</last></author>
+      <author id="takahiro-wakao"><first>Takahiro</first><last>Wakao</last></author>
       <author><first>Terumasa</first><last>Ehara</last></author>
       <author><first>Eiji</first><last>Sawamura</last></author>
       <author><first>Yoshiharu</first><last>Abe</last></author>
-      <author><first>Katsuhiko</first><last>Shirai</last></author>
+      <author id="katsuhiko-shirai"><first>Katsuhiko</first><last>Shirai</last></author>
       <url hash="545f9b00">W97-0509</url>
       <bibkey>wakao-etal-1997-application</bibkey>
     </paper>
@@ -734,9 +734,9 @@
     </frontmatter>
     <paper id="1">
       <title>Evaluating Interactive Dialogue Systems: Extending Component Evaluation to Integrated System Evaluation</title>
-      <author><first>Marilyn A.</first><last>Walker</last></author>
-      <author><first>Diane J.</first><last>Litman</last></author>
-      <author><first>Candace A.</first><last>Kamm</last></author>
+      <author id="marilyn-walker"><first>Marilyn A.</first><last>Walker</last></author>
+      <author id="diane-litman"><first>Diane J.</first><last>Litman</last></author>
+      <author id="candace-a-kamm"><first>Candace A.</first><last>Kamm</last></author>
       <author><first>Alicia</first><last>Abella</last></author>
       <url hash="057e3632">W97-0601</url>
       <bibkey>walker-etal-1997-evaluating</bibkey>
@@ -744,37 +744,37 @@
     <paper id="2">
       <title>A Generic Template to evaluate integrated components in spoken dialogue systems</title>
       <author><first>Gavin E.</first><last>Churcher</last></author>
-      <author><first>Eric S.</first><last>Atwell</last></author>
+      <author id="eric-atwell"><first>Eric S.</first><last>Atwell</last></author>
       <author><first>Clive</first><last>Souter</last></author>
       <url hash="a0e541b1">W97-0602</url>
       <bibkey>churcher-etal-1997-generic</bibkey>
     </paper>
     <paper id="3">
       <title><fixed-case>GENERALITY</fixed-case> <fixed-case>AND</fixed-case> <fixed-case>OBJECTIVITY</fixed-case> Central Issues in Putting a Dialogue Evaluation Tool into Practical Use</title>
-      <author><first>Laila</first><last>Dybkjaer</last></author>
-      <author><first>Niels Ole</first><last>Bernsen</last></author>
-      <author><first>Hans</first><last>Dybkjaer</last></author>
+      <author id="laila-dybkjaer"><first>Laila</first><last>Dybkjaer</last></author>
+      <author id="niels-ole-bernsen"><first>Niels Ole</first><last>Bernsen</last></author>
+      <author id="hans-dybkjaer"><first>Hans</first><last>Dybkjaer</last></author>
       <url hash="0b8b3b74">W97-0603</url>
       <bibkey>dybkjaer-etal-1997-generality</bibkey>
     </paper>
     <paper id="4">
       <title>An Object-Oriented Model for the Design of Cross-Domain Dialogue Systems</title>
-      <author><first>Ian M.</first><last>O’Neill</last></author>
-      <author><first>Michael F.</first><last>McTear</last></author>
+      <author id="ian-m-oneill"><first>Ian M.</first><last>O’Neill</last></author>
+      <author id="michael-f-mctear"><first>Michael F.</first><last>McTear</last></author>
       <url hash="731befe5">W97-0604</url>
       <bibkey>oneill-mctear-1997-object</bibkey>
     </paper>
     <paper id="5">
       <title>Automatic Lexicon Enhancement by Means of Corpus Tagging</title>
-      <author><first>Frederic</first><last>Bechet</last></author>
+      <author id="frederic-bechet"><first>Frederic</first><last>Bechet</last></author>
       <author><first>Thierry</first><last>Spriet</last></author>
-      <author><first>Marc</first><last>El-Beze</last></author>
+      <author id="marc-el-beze"><first>Marc</first><last>El-Beze</last></author>
       <url hash="1dcc7f28">W97-0605</url>
       <bibkey>bechet-etal-1997-automatic</bibkey>
     </paper>
     <paper id="6">
       <title>Clarification Dialogues as Measure to Increase Robustness in a Spoken Dialogue System</title>
-      <author><first>Elisabeth</first><last>Maier</last></author>
+      <author id="elisabeth-maier"><first>Elisabeth</first><last>Maier</last></author>
       <author><first>Norbert</first><last>Reithinger</last></author>
       <author><first>Jan</first><last>Alexandersson</last></author>
       <url hash="babe9f49">W97-0606</url>
@@ -782,7 +782,7 @@
     </paper>
     <paper id="7">
       <title>Performance Measures for the Next Generation of Spoken Natural Language Dialog Systems</title>
-      <author><first>Ronnie W.</first><last>Smith</last></author>
+      <author id="ronnie-w-smith"><first>Ronnie W.</first><last>Smith</last></author>
       <url hash="9c9455ed">W97-0607</url>
       <bibkey>smith-1997-performance</bibkey>
     </paper>
@@ -803,7 +803,7 @@
     </paper>
     <paper id="10">
       <title>Context Modeling for Language and Speech Generation</title>
-      <author><first>Kees</first><last>van Deemter</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
       <url hash="982e5b2e">W97-0610</url>
       <bibkey>van-deemter-1997-context</bibkey>
     </paper>
@@ -818,7 +818,7 @@
       <author><first>Toshihiko</first><last>Itoh</last></author>
       <author><first>Akihiro</first><last>Denda</last></author>
       <author><first>Satoru</first><last>Kogure</last></author>
-      <author><first>Seiichi</first><last>Nakagawa</last></author>
+      <author id="seiichi-nakagawa"><first>Seiichi</first><last>Nakagawa</last></author>
       <url hash="8e7b4f34">W97-0612</url>
       <bibkey>itoh-etal-1997-robust</bibkey>
     </paper>
@@ -833,7 +833,7 @@
       <author><first>Mark-Jan</first><last>Nederhof</last></author>
       <author><first>Gosse</first><last>Bouma</last></author>
       <author><first>Rob</first><last>Koeling</last></author>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <url hash="0ca66ebf">W97-0614</url>
       <bibkey>nederhof-etal-1997-grammatical</bibkey>
     </paper>
@@ -846,8 +846,8 @@
     </paper>
     <paper id="16">
       <title>How to obey the 7 commandments for spoken dialogue?</title>
-      <author><first>Emiel</first><last>Krahmer</last></author>
-      <author><first>Jan</first><last>Landsbergen</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
+      <author id="jan-landsbergen"><first>Jan</first><last>Landsbergen</last></author>
       <author><first>Xavier</first><last>Pouteau</last></author>
       <url hash="5e394b18">W97-0616</url>
       <bibkey>krahmer-etal-1997-obey</bibkey>
@@ -882,7 +882,7 @@
     </paper>
     <paper id="21">
       <title>Speech-Graphics Dialogue Systems</title>
-      <author><first>Alan W.</first><last>Biermann</last></author>
+      <author id="alan-w-biermann"><first>Alan W.</first><last>Biermann</last></author>
       <author><first>Michael S.</first><last>Fulkerson</last></author>
       <author><first>Greg A.</first><last>Keim</last></author>
       <url hash="80313ab8">W97-0621</url>
@@ -901,13 +901,13 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>S</fixed-case>ummarising: Where are we now? Where should we go?</title>
-      <author><first>Karen</first><last>Sparck Jones</last></author>
+      <author id="karen-sparck-jones"><first>Karen</first><last>Sparck Jones</last></author>
       <url hash="9db8301b">W97-0701</url>
       <bibkey>sparck-jones-1997-summarising</bibkey>
     </paper>
     <paper id="2">
       <title>Salience-based Content Characterisafion of Text Documents</title>
-      <author><first>Branimir</first><last>Boguraev</last></author>
+      <author id="branimir-boguraev"><first>Branimir</first><last>Boguraev</last></author>
       <url hash="95da8b11">W97-0702</url>
       <bibkey>boguraev-1997-salience</bibkey>
     </paper>
@@ -920,8 +920,8 @@
     </paper>
     <paper id="4">
       <title>Automated Text Summarization in <fixed-case>SUMMARIST</fixed-case></title>
-      <author><first>Eduard</first><last>Hovy</last></author>
-      <author><first>ChinYew</first><last>Lin</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
+      <author id="chin-yew-lin"><first>ChinYew</first><last>Lin</last></author>
       <url hash="9ea476c4">W97-0704</url>
       <bibkey>hovy-lin-1997-automated</bibkey>
     </paper>
@@ -935,7 +935,7 @@
     </paper>
     <paper id="6">
       <title>A Proposal for Task-based Evaluation of Text Summarization Systems</title>
-      <author><first>Therese Firmin</first><last>Hand</last></author>
+      <author id="therese-firmin"><first>Therese Firmin</first><last>Hand</last></author>
       <url hash="bdcc85a5">W97-0706</url>
       <bibkey>hand-1997-proposal</bibkey>
     </paper>
@@ -943,7 +943,7 @@
       <title>Automatic Text Summarization by Paragraph Extraction</title>
       <author><first>Mandar</first><last>Mitra</last></author>
       <author><first>Amit</first><last>Singhal</last></author>
-      <author><first>Chris</first><last>Buckley</last></author>
+      <author id="chris-buckley"><first>Chris</first><last>Buckley</last></author>
       <url hash="90a398e2">W97-0707</url>
       <bibkey>mitra-etal-1997-automatic</bibkey>
     </paper>
@@ -958,7 +958,7 @@
     <paper id="9">
       <title>Statistical methods for retrieving most significant paragraphs in newspaper articles</title>
       <author><first>Jose</first><last>Abracos</last></author>
-      <author><first>Gabriel Pereira</first><last>Lopes</last></author>
+      <author id="gabriel-lopes"><first>Gabriel Pereira</first><last>Lopes</last></author>
       <url hash="356e474a">W97-0709</url>
       <bibkey>abracos-lopes-1997-statistical</bibkey>
     </paper>
@@ -1017,7 +1017,7 @@
       <title>Multilingual design of <fixed-case>E</fixed-case>uro<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et</title>
       <author><first>Piek</first><last>Vossen</last></author>
       <author><first>Pedro</first><last>Diez-Orzas</last></author>
-      <author><first>Wim</first><last>Peters</last></author>
+      <author id="wim-peters"><first>Wim</first><last>Peters</last></author>
       <url hash="fd8d38ad">W97-0801</url>
       <bibkey>vossen-etal-1997-multilingual</bibkey>
     </paper>
@@ -1046,14 +1046,14 @@
     <paper id="5">
       <title>Lexical Discrimination with the <fixed-case>I</fixed-case>talian Version of <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et</title>
       <author><first>Alessandro</first><last>Artale</last></author>
-      <author><first>Bernardo</first><last>Magnini</last></author>
+      <author id="bernardo-magnini"><first>Bernardo</first><last>Magnini</last></author>
       <author><first>Carlo</first><last>Strapparava</last></author>
       <url hash="d1b2150d">W97-0805</url>
       <bibkey>artale-etal-1997-lexical</bibkey>
     </paper>
     <paper id="6">
       <title>Integrating a Lexical Database and a Training Collection for Text Categorization</title>
-      <author><first>Jose Maria</first><last>Gomez-Hidalgo</last></author>
+      <author id="jose-maria-gomez-hidalgo"><first>Jose Maria</first><last>Gomez-Hidalgo</last></author>
       <author><first>Manuel</first><last>de Buenaga Rodriguez</last></author>
       <url hash="39d1235c">W97-0806</url>
       <bibkey>gomez-hidalgo-de-buenaga-rodriguez-1997-integrating</bibkey>
@@ -1068,27 +1068,27 @@
     </paper>
     <paper id="8">
       <title>Word Sense Disambiguation for Acquisition of Selectional Preferences</title>
-      <author><first>Diana</first><last>McCarthy</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
       <url hash="38ec75f5">W97-0808</url>
       <bibkey>mccarthy-1997-word</bibkey>
     </paper>
     <paper id="9">
       <title>The Use of Lexical Semantics in Information Extraction</title>
-      <author><first>Joyce Yue</first><last>Chai</last></author>
-      <author><first>Alan W.</first><last>Biermann</last></author>
+      <author id="joyce-chai"><first>Joyce Yue</first><last>Chai</last></author>
+      <author id="alan-w-biermann"><first>Alan W.</first><last>Biermann</last></author>
       <url hash="af8b3506">W97-0809</url>
       <bibkey>chai-biermann-1997-use</bibkey>
     </paper>
     <paper id="10">
       <title>Subject and Object Dependency Extraction Using Finite-State Transducers</title>
-      <author><first>Salah</first><last>Ait-Mokhtar</last></author>
+      <author id="salah-ait-mokhtar"><first>Salah</first><last>Ait-Mokhtar</last></author>
       <author><first>Jean-Pierre</first><last>Chanod</last></author>
       <url hash="6876146b">W97-0810</url>
       <bibkey>ait-mokhtar-chanod-1997-subject</bibkey>
     </paper>
     <paper id="11">
       <title>An Experiment in Semantic Tagging using Hidden <fixed-case>M</fixed-case>arkov Model Tagging</title>
-      <author><first>Frederique</first><last>Segond</last></author>
+      <author id="frederique-segond"><first>Frederique</first><last>Segond</last></author>
       <author><first>Anne</first><last>Schiller</last></author>
       <author><first>Gregory</first><last>Grefenstette</last></author>
       <author><first>Jean-Pierre</first><last>Chanod</last></author>
@@ -1104,7 +1104,7 @@
     <paper id="13">
       <title>Inferring Semantic Similarity from Distributional Evidence: an Analogy-based Approach to Word Sense Disambiguation</title>
       <author><first>Stefano</first><last>Federici</last></author>
-      <author><first>Simonetta</first><last>Montemagni</last></author>
+      <author id="simonetta-montemagni"><first>Simonetta</first><last>Montemagni</last></author>
       <author><first>Vito</first><last>Pirrelli</last></author>
       <url hash="23480e43">W97-0813</url>
       <bibkey>federici-etal-1997-inferring</bibkey>
@@ -1134,7 +1134,7 @@
       <author><first>Etienne</first><last>Cornu</last></author>
       <author><first>Francois</first><last>Grosjean</last></author>
       <author><first>Lysiane</first><last>Grosjean</last></author>
-      <author><first>Natalie</first><last>Kubler</last></author>
+      <author id="natalie-kubler"><first>Natalie</first><last>Kubler</last></author>
       <author><first>Nicolas</first><last>Lewy</last></author>
       <author><first>Corinne</first><last>Tschumi</last></author>
       <url hash="12d45fa2">W97-0902</url>
@@ -1143,11 +1143,11 @@
     <paper id="3">
       <title>Software Re-Use and Evolution in Text Generation Applications</title>
       <author><first>Karen</first><last>Kukich</last></author>
-      <author><first>Rebecca</first><last>Passonneau</last></author>
-      <author><first>Kathleen</first><last>McKeown</last></author>
-      <author><first>Dragomir</first><last>Radev</last></author>
+      <author id="rebecca-j-passonneau"><first>Rebecca</first><last>Passonneau</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen</first><last>McKeown</last></author>
+      <author id="dragomir-radev"><first>Dragomir</first><last>Radev</last></author>
       <author><first>Vasileios</first><last>Hatzivassiloglou</last></author>
-      <author><first>Hongyan</first><last>Jing</last></author>
+      <author id="hongyan-jing"><first>Hongyan</first><last>Jing</last></author>
       <url hash="93457ad5">W97-0903</url>
       <bibkey>kukich-etal-1997-software</bibkey>
     </paper>
@@ -1165,7 +1165,7 @@
     </paper>
     <paper id="6">
       <title>Practical Considerations in Building a Multi-Lingual Authoring System for Business Letters</title>
-      <author><first>John</first><last>Tait</last></author>
+      <author id="john-tait"><first>John</first><last>Tait</last></author>
       <url hash="9f480401">W97-0906</url>
       <bibkey>tait-1997-practical</bibkey>
     </paper>
@@ -1194,16 +1194,16 @@
     </paper>
     <paper id="10">
       <title>Recycling Lingware in a Multilingual <fixed-case>MT</fixed-case> System</title>
-      <author><first>Manny</first><last>Rayner</last></author>
-      <author><first>David</first><last>Carter</last></author>
+      <author id="manny-rayner"><first>Manny</first><last>Rayner</last></author>
+      <author id="david-carter"><first>David</first><last>Carter</last></author>
       <author><first>Ivan</first><last>Bretan</last></author>
       <author><first>Robert</first><last>Eklund</last></author>
-      <author><first>Mats</first><last>Wiren</last></author>
+      <author id="mats-wiren"><first>Mats</first><last>Wiren</last></author>
       <author><first>Steffen Leo</first><last>Hansen</last></author>
       <author><first>Sabine</first><last>Kirchmeier-Andersen</last></author>
       <author><first>Christina</first><last>Philp</last></author>
       <author><first>Finn</first><last>Sorensen</last></author>
-      <author><first>Hanne Erdman</first><last>Thomsen</last></author>
+      <author id="hanne-erdman-thomsen"><first>Hanne Erdman</first><last>Thomsen</last></author>
       <url hash="ed15f7a1">W97-0910</url>
       <bibkey>rayner-etal-1997-recycling</bibkey>
     </paper>
@@ -1221,29 +1221,29 @@
     <paper id="1">
       <title>A Trainable Message Understanding System</title>
       <author><first>Amit</first><last>Bagga</last></author>
-      <author><first>Joyce Yue</first><last>Chai</last></author>
+      <author id="joyce-chai"><first>Joyce Yue</first><last>Chai</last></author>
       <url hash="cbca815e">W97-1001</url>
       <bibkey>bagga-chai-1997-trainable</bibkey>
     </paper>
     <paper id="2">
       <title>Relational Learning of Pattern-Match Rules for Information Extraction</title>
-      <author><first>Mary Elaine</first><last>Califf</last></author>
-      <author><first>Raymond J.</first><last>Mooney</last></author>
+      <author id="mary-elaine-califf"><first>Mary Elaine</first><last>Califf</last></author>
+      <author id="raymond-mooney"><first>Raymond J.</first><last>Mooney</last></author>
       <url hash="87433324">W97-1002</url>
       <bibkey>califf-mooney-1997-relational</bibkey>
     </paper>
     <paper id="3">
       <title>A Preliminary Study of Word Clustering Based on Syntactic Behavior</title>
       <author><first>Wide R.</first><last>Hogenhout</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <url hash="7dcbaaab">W97-1003</url>
       <bibkey>hogenhout-matsumoto-1997-preliminary</bibkey>
     </paper>
     <paper id="4">
       <title>Learning New Compositions from Given Ones</title>
-      <author><last>Ji</last><first>Donghong</first></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
       <author><last>He</last><first>Jun</first></author>
-      <author><last>Huang</last><first>Changning</first></author>
+      <author id="changning-huang"><first>Changning</first><last>Huang</last></author>
       <url hash="2028866a">W97-1004</url>
       <bibkey>ji-etal-1997-learning</bibkey>
     </paper>
@@ -1251,7 +1251,7 @@
       <title>A Statistical Decision Making Method: A Case Study on Prepositional Phrase Attachment</title>
       <author><first>Mehmet</first><last>Kayaalp</last></author>
       <author><first>Ted</first><last>Pedersen</last></author>
-      <author><first>Rebecca</first><last>Bruce</last></author>
+      <author id="rebecca-bruce"><first>Rebecca</first><last>Bruce</last></author>
       <url hash="961e3c29">W97-1005</url>
       <bibkey>kayaalp-etal-1997-statistical</bibkey>
     </paper>
@@ -1264,15 +1264,15 @@
     </paper>
     <paper id="7">
       <title>From Psycholinguistic Modelling of Interlanguage in Second Language Acquisition to a Computational Model</title>
-      <author><first>Montse</first><last>Maritxalar</last></author>
-      <author><first>Arantza</first><last>Diaz de Ilarraza</last></author>
-      <author><first>Maite</first><last>Oronoz</last></author>
+      <author id="montse-maritxalar"><first>Montse</first><last>Maritxalar</last></author>
+      <author id="arantza-diaz-de-ilarraza"><first>Arantza</first><last>Diaz de Ilarraza</last></author>
+      <author id="maite-oronoz"><first>Maite</first><last>Oronoz</last></author>
       <url hash="78559a2e">W97-1007</url>
       <bibkey>maritxalar-etal-1997-psycholinguistic</bibkey>
     </paper>
     <paper id="8">
       <title>What makes a word: Learning base units in <fixed-case>J</fixed-case>apanese for speech recognition</title>
-      <author><first>Laura Mayfield</first><last>Tomokiyo</last></author>
+      <author id="laura-mayfield-tomokiyo"><first>Laura Mayfield</first><last>Tomokiyo</last></author>
       <author><first>Klaus</first><last>Ries</last></author>
       <url hash="2484d559">W97-1008</url>
       <bibkey>tomokiyo-ries-1997-makes</bibkey>
@@ -1287,13 +1287,13 @@
     <paper id="10">
       <title>Learning Stochastic Categorial Grammars</title>
       <author><first>Miles</first><last>Osborne</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <url hash="b4a63a9a">W97-1010</url>
       <bibkey>osborne-briscoe-1997-learning</bibkey>
     </paper>
     <paper id="11">
       <title>Learning and Application of Differential Grammars</title>
-      <author><first>David M. W.</first><last>Powers</last></author>
+      <author id="david-m-w-powers"><first>David M. W.</first><last>Powers</last></author>
       <url hash="71073230">W97-1011</url>
       <bibkey>powers-1997-learning</bibkey>
     </paper>
@@ -1311,8 +1311,8 @@
     </paper>
     <paper id="14">
       <title>Word Triggers and the <fixed-case>EM</fixed-case> Algorithm</title>
-      <author><first>Christoph</first><last>Tillmann</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="christoph-tillmann"><first>Christoph</first><last>Tillmann</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <url hash="e61cca6b">W97-1014</url>
       <bibkey>tillmann-ney-1997-word</bibkey>
     </paper>
@@ -1326,7 +1326,7 @@
     <paper id="16">
       <title>Resolving <fixed-case>PP</fixed-case> attachment Ambiguities with Memory-Based Learning</title>
       <author><first>Jakub</first><last>Zavrel</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <author><first>Jorn</first><last>Veenstra</last></author>
       <url hash="0f6bd38e">W97-1016</url>
       <bibkey>zavrel-etal-1997-resolving</bibkey>
@@ -1346,7 +1346,7 @@
       <title>A Complexity Measure for Diachronic <fixed-case>C</fixed-case>hinese Phonology</title>
       <author><first>Anand</first><last>Raman</last></author>
       <author><first>John</first><last>Newman</last></author>
-      <author><first>Jon</first><last>Patrick</last></author>
+      <author id="jon-patrick"><first>Jon</first><last>Patrick</last></author>
       <url hash="3b5867cc">W97-1101</url>
       <bibkey>raman-etal-1997-complexity</bibkey>
     </paper>
@@ -1378,20 +1378,20 @@
     </paper>
     <paper id="6">
       <title>A <fixed-case>C</fixed-case>zech Morphological Lexicon</title>
-      <author><first>Hana</first><last>Skoumalova</last></author>
+      <author id="hana-skoumalova"><first>Hana</first><last>Skoumalova</last></author>
       <url hash="574206f9">W97-1106</url>
       <bibkey>skoumalova-1997-czech</bibkey>
     </paper>
     <paper id="7">
       <title>Stochastic phonological grammars and acceptability</title>
       <author><first>John</first><last>Coleman</last></author>
-      <author><first>Janet</first><last>Pierrehumbert</last></author>
+      <author id="janet-pierrehumbert"><first>Janet</first><last>Pierrehumbert</last></author>
       <url hash="bc6296ca">W97-1107</url>
       <bibkey>coleman-pierrehumbert-1997-stochastic</bibkey>
     </paper>
     <paper id="8">
       <title>Linearization of Nonlinear Lexical Representations</title>
-      <author><first>George Anton</first><last>Kiraz</last></author>
+      <author id="george-anton-kiraz"><first>George Anton</first><last>Kiraz</last></author>
       <url hash="c260f399">W97-1108</url>
       <bibkey>kiraz-1997-linearization</bibkey>
     </paper>
@@ -1408,7 +1408,7 @@
     </frontmatter>
     <paper id="1">
       <title>Probabilistic Model of Acoustic/Prosody/Concept Relationships for Speech Synthesis</title>
-      <author><first>Nanette M.</first><last>Veilleux</last></author>
+      <author id="nanette-veilleux"><first>Nanette M.</first><last>Veilleux</last></author>
       <url hash="c65a2c86">W97-1201</url>
       <bibkey>veilleux-1997-probabilistic</bibkey>
     </paper>
@@ -1430,7 +1430,7 @@
     <paper id="4">
       <title>Integrating Language Generation with Speech Synthesis in a Concept to Speech System</title>
       <author><first>Shimei</first><last>Pan</last></author>
-      <author><first>Kathleen R.</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen R.</first><last>McKeown</last></author>
       <url hash="362652fd">W97-1204</url>
       <bibkey>pan-mckeown-1997-integrating</bibkey>
     </paper>
@@ -1478,8 +1478,8 @@
     </frontmatter>
     <paper id="1">
       <title>Resolving bridging references in unrestricted text</title>
-      <author><first>Massimo</first><last>Poesio</last></author>
-      <author><first>Renata</first><last>Vieira</last></author>
+      <author id="massimo-poesio"><first>Massimo</first><last>Poesio</last></author>
+      <author id="renata-vieira"><first>Renata</first><last>Vieira</last></author>
       <author><first>Simone</first><last>Teufel</last></author>
       <url hash="4de81ece">W97-1301</url>
       <bibkey>poesio-etal-1997-resolving</bibkey>
@@ -1494,7 +1494,7 @@
     </paper>
     <paper id="3">
       <title>Factors in anaphora resolution: they are not the only things that matter. A case study based on two different approaches</title>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <url hash="399fc020">W97-1303</url>
       <bibkey>mitkov-1997-factors</bibkey>
     </paper>
@@ -1544,15 +1544,15 @@
     </paper>
     <paper id="11">
       <title>Event coreference for information extraction</title>
-      <author><first>Kevin</first><last>Humphreys</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
-      <author><first>Saliha</first><last>Azzam</last></author>
+      <author id="kevin-humphreys"><first>Kevin</first><last>Humphreys</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="saliha-azzam"><first>Saliha</first><last>Azzam</last></author>
       <url hash="358ee36d">W97-1311</url>
       <bibkey>humphreys-etal-1997-event</bibkey>
     </paper>
     <paper id="12">
       <title>How far are we from (semi-)automatic of anaphoric links in corpora?</title>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
       <url hash="3a6132b0">W97-1312</url>
       <bibkey>mitkov-1997-far</bibkey>
     </paper>
@@ -1565,7 +1565,7 @@
     </paper>
     <paper id="14">
       <title>Cooperation between pronoun and reference resolution for unrestricted texts</title>
-      <author><first>Andrei</first><last>Popescu-Belis</last></author>
+      <author id="andrei-popescu-belis"><first>Andrei</first><last>Popescu-Belis</last></author>
       <author><first>Isabelle</first><last>Robba</last></author>
       <url hash="543c41a6">W97-1314</url>
       <bibkey>popescu-belis-robba-1997-cooperation</bibkey>
@@ -1583,8 +1583,8 @@
     </frontmatter>
     <paper id="1">
       <title>Integration and Synchronization of Input Modes during Multimodal Human-Computer Interaction</title>
-      <author><first>Sharon</first><last>Oviatt</last></author>
-      <author><first>Antonella</first><last>DeAngeli</last></author>
+      <author id="sharon-oviatt"><first>Sharon</first><last>Oviatt</last></author>
+      <author id="antonella-de-angeli"><first>Antonella</first><last>DeAngeli</last></author>
       <author><first>Karen</first><last>Kuhn</last></author>
       <url hash="d59fb8c0">W97-1401</url>
       <bibkey>oviatt-etal-1997-integration</bibkey>
@@ -1592,7 +1592,7 @@
     <paper id="2">
       <title>Referring in Multimodal Systems: The Importance of User Expertise and System Features</title>
       <author><first>Daniela</first><last>Petrelli</last></author>
-      <author><first>Antonella</first><last>De Angeli</last></author>
+      <author id="antonella-de-angeli"><first>Antonella</first><last>De Angeli</last></author>
       <author><first>Walter</first><last>Gerbino</last></author>
       <author><first>Giulia</first><last>Cassano</last></author>
       <url hash="189a06ff">W97-1402</url>
@@ -1601,7 +1601,7 @@
     <paper id="3">
       <title>Towards Generation of Fluent Referring Action in Multimodal Situations</title>
       <author><first>Tsuneaki</first><last>Kato</last></author>
-      <author><first>Yukiko I.</first><last>Nakano</last></author>
+      <author id="yukiko-i-nakano"><first>Yukiko I.</first><last>Nakano</last></author>
       <url hash="4f44b2cd">W97-1403</url>
       <bibkey>kato-nakano-1997-towards</bibkey>
     </paper>
@@ -1642,7 +1642,7 @@
     </paper>
     <paper id="9">
       <title>Planning Referential Acts for Animated Presentation Agents</title>
-      <author><first>Elisabeth</first><last>Andre</last></author>
+      <author id="elisabeth-andre"><first>Elisabeth</first><last>Andre</last></author>
       <author><first>Thomas</first><last>Rist</last></author>
       <url hash="01e8b06f">W97-1409</url>
       <bibkey>andre-rist-1997-planning</bibkey>
@@ -1657,8 +1657,8 @@
     <paper id="11">
       <title>Referring to Displays in Multimodal Interfaces</title>
       <author><first>Daqing</first><last>He</last></author>
-      <author><first>Graeme</first><last>Ritchie</last></author>
-      <author><first>John</first><last>Lee</last></author>
+      <author id="graeme-ritchie"><first>Graeme</first><last>Ritchie</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
       <url hash="cd39ace1">W97-1411</url>
       <bibkey>he-etal-1997-referring</bibkey>
     </paper>
@@ -1672,14 +1672,14 @@
     <paper id="13">
       <title>Constraints on the Use of Language, Gesture and Speech for Multimodal Dialogues</title>
       <author><first>Bertrand</first><last>Gaiffe</last></author>
-      <author><first>Laurent</first><last>Romary</last></author>
+      <author id="laurent-romary"><first>Laurent</first><last>Romary</last></author>
       <url hash="60ad87fe">W97-1413</url>
       <bibkey>gaiffe-romary-1997-constraints</bibkey>
     </paper>
     <paper id="14">
       <title>A Model for Multimodal Reference Resolution</title>
-      <author><first>Luis. A.</first><last>Pineda</last></author>
-      <author><first>E. Gabriela</first><last>Garza</last></author>
+      <author id="luis-a-pineda"><first>Luis. A.</first><last>Pineda</last></author>
+      <author id="e-gabriela-garza"><first>E. Gabriela</first><last>Garza</last></author>
       <url hash="dce4d56c">W97-1414</url>
       <bibkey>pineda-garza-1997-model</bibkey>
     </paper>
@@ -1716,60 +1716,60 @@
     </frontmatter>
     <paper id="1">
       <title>Some apparently disjoint aims and requirements for grammar development environments” the case of natural language generation</title>
-      <author><first>John</first><last>Bateman</last></author>
+      <author id="john-bateman"><first>John</first><last>Bateman</last></author>
       <url hash="24097385">W97-1501</url>
       <bibkey>bateman-1997-apparently</bibkey>
     </paper>
     <paper id="2">
       <title>The <fixed-case>T</fixed-case>ree<fixed-case>B</fixed-case>anker: a Tool for Supervised Training of Parsed Corpora</title>
-      <author><first>David</first><last>Carter</last></author>
+      <author id="david-carter"><first>David</first><last>Carter</last></author>
       <url hash="dd5baefe">W97-1502</url>
       <bibkey>carter-1997-treebanker</bibkey>
     </paper>
     <paper id="3">
       <title>Participatory Design for Linguistic Engineering: the Case of the <fixed-case>GEPPETTO</fixed-case> Development Environment</title>
-      <author><first>Fabio</first><last>Ciravegna</last></author>
-      <author><first>Alberto</first><last>Lavelli</last></author>
+      <author id="fabio-ciravegna"><first>Fabio</first><last>Ciravegna</last></author>
+      <author id="alberto-lavelli"><first>Alberto</first><last>Lavelli</last></author>
       <author><first>Daniela</first><last>Petrelli</last></author>
-      <author><first>Fabio</first><last>Pianesi</last></author>
+      <author id="fabio-pianesi"><first>Fabio</first><last>Pianesi</last></author>
       <url hash="033e892c">W97-1503</url>
       <bibkey>ciravegna-etal-1997-participatory</bibkey>
     </paper>
     <paper id="4">
       <title>Hypertextual Grammar Development</title>
-      <author><first>Luca</first><last>Dini</last></author>
-      <author><first>Giampaolo</first><last>Mazzini</last></author>
+      <author id="luca-dini"><first>Luca</first><last>Dini</last></author>
+      <author id="giampaolo-mazzini"><first>Giampaolo</first><last>Mazzini</last></author>
       <url hash="90cfd4c9">W97-1504</url>
       <bibkey>dini-mazzini-1997-hypertextual</bibkey>
     </paper>
     <paper id="5">
       <title>Maintaining the Forest and Burning out the Underbrush in <fixed-case>XTAG</fixed-case></title>
-      <author><first>Christine</first><last>Doran</last></author>
-      <author><first>Beth</first><last>Hockey</last></author>
+      <author id="christine-doran"><first>Christine</first><last>Doran</last></author>
+      <author id="beth-ann-hockey"><first>Beth</first><last>Hockey</last></author>
       <author><first>Philip</first><last>Hopely</last></author>
       <author><first>Joseph</first><last>Rosenzweig</last></author>
       <author><first>Anoop</first><last>Sarkar</last></author>
-      <author><first>B.</first><last>Srinivas</last></author>
+      <author id="srinivas-bangalore"><first>B.</first><last>Srinivas</last></author>
       <author><first>Fei</first><last>Xia</last></author>
       <url hash="9be3dcba">W97-1505</url>
       <bibkey>doran-etal-1997-maintaining</bibkey>
     </paper>
     <paper id="6">
       <title>The <fixed-case>C</fixed-case>on<fixed-case>T</fixed-case>roll System as Large Grammar Development Platform</title>
-      <author><first>Thilo</first><last>Gotz</last></author>
-      <author><first>Walt Detmar</first><last>Meurers</last></author>
+      <author id="thilo-gotz"><first>Thilo</first><last>Gotz</last></author>
+      <author id="detmar-meurers"><first>Walt Detmar</first><last>Meurers</last></author>
       <url hash="695a5db6">W97-1506</url>
       <bibkey>gotz-meurers-1997-controll</bibkey>
     </paper>
     <paper id="7">
       <title>Application-driven automatic subgrammar extraction</title>
-      <author><first>Renate</first><last>Henschel</last></author>
+      <author id="renate-henschel"><first>Renate</first><last>Henschel</last></author>
       <url hash="668e530a">W97-1507</url>
       <bibkey>henschel-1997-application</bibkey>
     </paper>
     <paper id="8">
       <title>Lexical Resource Reconciliation in the Xerox Linguistic Environment</title>
-      <author><first>Ronald M.</first><last>Kaplan</last></author>
+      <author id="ronald-m-kaplan"><first>Ronald M.</first><last>Kaplan</last></author>
       <url hash="2580df17">W97-1508</url>
       <bibkey>kaplan-1997-lexical</bibkey>
     </paper>
@@ -1781,7 +1781,7 @@
     </paper>
     <paper id="10">
       <title><fixed-case>EFLUF</fixed-case> - an Implementation of a <fixed-case>FL</fixed-case>exible Unification Formalism</title>
-      <author><first>Lena</first><last>Stromback</last></author>
+      <author id="lena-stromback"><first>Lena</first><last>Stromback</last></author>
       <url hash="c6274e94">W97-1510</url>
       <bibkey>stromback-1997-efluf</bibkey>
     </paper>
@@ -1789,7 +1789,7 @@
       <title>Exploiting Contextual Information in Hypothesis Selection for Grammar Refinement</title>
       <author><first>Thanaruk</first><last>Theeramunkong</last></author>
       <author><first>Yasunobu</first><last>Kawaguchi</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <url hash="b2efea97">W97-1511</url>
       <bibkey>theeramunkong-etal-1997-exploiting</bibkey>
     </paper>
@@ -1801,15 +1801,15 @@
     </paper>
     <paper id="13">
       <title>Hdrug. A Flexible and Extendible Development Environment for Natural Language Processing.</title>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <author><first>Gosse</first><last>Bouma</last></author>
       <url hash="fb9f3815">W97-1513</url>
       <bibkey>van-noord-bouma-1997-hdrug</bibkey>
     </paper>
     <paper id="14">
       <title>An Object-Oriented Linguistic Engineering Environment using <fixed-case>LFG</fixed-case> (Lexical Functionnal Grammar) and <fixed-case>CG</fixed-case> (Conceptual Graphs)</title>
-      <author><first>Jerome</first><last>Vapillon</last></author>
-      <author><first>Xavier</first><last>Briffault</last></author>
+      <author id="jerome-vapillon"><first>Jerome</first><last>Vapillon</last></author>
+      <author id="xavier-briffault"><first>Xavier</first><last>Briffault</last></author>
       <author><first>Gerard</first><last>Sabah</last></author>
       <author><first>Karim</first><last>Chibout</last></author>
       <url hash="add11907">W97-1514</url>
diff --git a/data/xml/W98.xml b/data/xml/W98.xml
index 2cf5dcaef8..4ba73670bb 100644
--- a/data/xml/W98.xml
+++ b/data/xml/W98.xml
@@ -4,10 +4,10 @@
     <meta>
       <booktitle>Proceedings of the Fourth International Workshop on Tree Adjoining Grammars and Related Frameworks (<fixed-case>TAG</fixed-case>+4)</booktitle>
       <url hash="081d1f63">W98-01</url>
-      <editor><first>Anne</first><last>Abeillé</last></editor>
+      <editor id="anne-abeille"><first>Anne</first><last>Abeillé</last></editor>
       <editor><first>Tilman</first><last>Becker</last></editor>
       <editor><first>Giorgio</first><last>Satta</last></editor>
-      <editor><first>K.</first><last>Vijay-Shanker</last></editor>
+      <editor id="k-vijay-shanker"><first>K.</first><last>Vijay-Shanker</last></editor>
       <publisher>Institute for Research in Cognitive Science</publisher>
       <address>University of Pennsylvania</address>
       <month>August</month>
@@ -21,14 +21,14 @@
     <paper id="1">
       <title>An experiment on synchronous <fixed-case>TAG</fixed-case>s for the construction of a transfer module</title>
       <author><first>Alexandre</first><last>Agustini</last></author>
-      <author><first>Vera Lúcia Strube</first><last>de Lima</last></author>
+      <author id="vera-lucia-strube-de-lima"><first>Vera Lúcia Strube</first><last>de Lima</last></author>
       <pages>1–4</pages>
       <url hash="ef05ed10">W98-0101</url>
       <bibkey>agustini-de-lima-1998-experiment</bibkey>
     </paper>
     <paper id="2">
       <title>Transplanting supertags from <fixed-case>E</fixed-case>nglish to <fixed-case>S</fixed-case>panish</title>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
       <pages>5–8</pages>
       <url hash="104a4611">W98-0102</url>
       <bibkey>bangalore-1998-transplanting</bibkey>
@@ -44,7 +44,7 @@
     <paper id="4">
       <title>Motion verbs and semantic features in <fixed-case>TAG</fixed-case></title>
       <author><first>Tonia</first><last>Bleam</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>K.</first><last>Vijay-Shanker</last></author>
       <pages>13–16</pages>
       <url hash="dc98786d">W98-0104</url>
@@ -59,7 +59,7 @@
     </paper>
     <paper id="6">
       <title>Can the <fixed-case>TAG</fixed-case> derivation tree represent a semantic graph? An answer in the light of Meaning-Text Theory</title>
-      <author><first>Marie-Hélène</first><last>Candito</last></author>
+      <author id="marie-candito"><first>Marie-Hélène</first><last>Candito</last></author>
       <author><first>Sylvain</first><last>Kahane</last></author>
       <pages>21–24</pages>
       <url hash="392c77cd">W98-0106</url>
@@ -67,7 +67,7 @@
     </paper>
     <paper id="7">
       <title>Defining <fixed-case>DTG</fixed-case> derivations to get semantic graphs</title>
-      <author><first>Marie-Hélène</first><last>Candito</last></author>
+      <author id="marie-candito"><first>Marie-Hélène</first><last>Candito</last></author>
       <author><first>Sylvain</first><last>Kahane</last></author>
       <pages>25–28</pages>
       <url hash="7c3f0215">W98-0107</url>
@@ -75,11 +75,11 @@
     </paper>
     <paper id="8">
       <title>The <fixed-case>L</fixed-case>ex<fixed-case>S</fixed-case>ys project</title>
-      <author><first>John</first><last>Carroll</last></author>
-      <author><first>Nicolas</first><last>Nicolov</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
+      <author id="nicolas-nicolov"><first>Nicolas</first><last>Nicolov</last></author>
       <author><first>Olga</first><last>Shaumyan</last></author>
       <author><first>Martine</first><last>Smets</last></author>
-      <author><first>David</first><last>Weir</last></author>
+      <author id="david-weir"><first>David</first><last>Weir</last></author>
       <pages>29–33</pages>
       <url hash="4380647f">W98-0108</url>
       <bibkey>carroll-etal-1998-lexsys</bibkey>
@@ -100,9 +100,9 @@
     </paper>
     <paper id="11">
       <title>A tabular interpretation of bottom-up automata for <fixed-case>TAG</fixed-case></title>
-      <author><first>Eric</first><last>de la Clergerie</last></author>
-      <author><first>Miguel A.</first><last>Alonso Pardo</last></author>
-      <author><first>David Cabrero</first><last>Souto</last></author>
+      <author id="eric-villemonte-de-la-clergerie"><first>Eric</first><last>de la Clergerie</last></author>
+      <author id="miguel-a-alonso"><first>Miguel A.</first><last>Alonso Pardo</last></author>
+      <author id="david-cabrero-souto"><first>David Cabrero</first><last>Souto</last></author>
       <pages>42–45</pages>
       <url hash="a8d8134e">W98-0111</url>
       <bibkey>de-la-clergerie-etal-1998-tabular</bibkey>
@@ -118,7 +118,7 @@
     <paper id="13">
       <title>Describing discourse semantics</title>
       <author><first>Claire</first><last>Gardent</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <pages>50–53</pages>
       <url hash="509d4023">W98-0113</url>
       <bibkey>gardent-webber-1998-describing</bibkey>
@@ -163,7 +163,7 @@
     </paper>
     <paper id="19">
       <title>Partial proof trees and structural modalities</title>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
       <author><first>Seth</first><last>Kulick</last></author>
       <author><first>Natasha</first><last>Kurtonina</last></author>
       <pages>74–75</pages>
@@ -216,7 +216,7 @@
     </paper>
     <paper id="26">
       <title>‘Category families’ for Categorial Grammars</title>
-      <author><first>Mary</first><last>McGee Wood</last></author>
+      <author id="mary-mcgee-wood"><first>Mary</first><last>McGee Wood</last></author>
       <pages>100–103</pages>
       <url hash="7036fe55">W98-0126</url>
       <bibkey>mcgee-wood-1998-category</bibkey>
@@ -225,8 +225,8 @@
       <title>Packing of feature structures for optimizing the <fixed-case>HPSG</fixed-case>-style grammar translated from <fixed-case>TAG</fixed-case></title>
       <author><first>Yusuke</first><last>Miyao</last></author>
       <author><first>Kentaro</first><last>Torisawa</last></author>
-      <author><first>Yuka</first><last>Tateisi</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="yuka-tateisi"><first>Yuka</first><last>Tateisi</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>104–107</pages>
       <url hash="c09105b8">W98-0127</url>
       <bibkey>miyao-etal-1998-packing</bibkey>
@@ -241,7 +241,7 @@
     <paper id="29">
       <title>Description theory, <fixed-case>LTAG</fixed-case>s and underspecified semantics</title>
       <author><first>Reinhard</first><last>Muskens</last></author>
-      <author><first>Emiel</first><last>Krahmer</last></author>
+      <author id="emiel-krahmer"><first>Emiel</first><last>Krahmer</last></author>
       <pages>112–115</pages>
       <url hash="abd43444">W98-0129</url>
       <bibkey>muskens-krahmer-1998-description</bibkey>
@@ -257,14 +257,14 @@
     </paper>
     <paper id="31">
       <title>Automatic extraction of stochastic lexicalized tree grammars from treebanks</title>
-      <author><first>Günter</first><last>Neumann</last></author>
+      <author id="gunter-neumann"><first>Günter</first><last>Neumann</last></author>
       <pages>120–123</pages>
       <url hash="b389ae03">W98-0131</url>
       <bibkey>neumann-1998-automatic</bibkey>
     </paper>
     <paper id="32">
       <title>Memoisation in sentence generation with lexicalised grammars</title>
-      <author><first>Nicolas</first><last>Nicolov</last></author>
+      <author id="nicolas-nicolov"><first>Nicolas</first><last>Nicolov</last></author>
       <pages>124–127</pages>
       <url hash="a6e366b6">W98-0132</url>
       <bibkey>nicolov-1998-memoisation</bibkey>
@@ -286,7 +286,7 @@
     </paper>
     <paper id="35">
       <title>Wh-islands in <fixed-case>TAG</fixed-case> and related formalisms</title>
-      <author><first>Owen</first><last>Rambow</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
       <author><first>K.</first><last>Vijay-Shanker</last></author>
       <pages>147–150</pages>
       <url hash="27da335e">W98-0135</url>
@@ -316,7 +316,7 @@
     <paper id="39">
       <title>A compact encoding of a <fixed-case>DTG</fixed-case> grammar</title>
       <author><first>Martine</first><last>Smets</last></author>
-      <author><first>Roger</first><last>Evans</last></author>
+      <author id="roger-evans"><first>Roger</first><last>Evans</last></author>
       <pages>164–167</pages>
       <url hash="0376f0d2">W98-0139</url>
       <bibkey>smets-evans-1998-compact</bibkey>
@@ -324,17 +324,17 @@
     <paper id="40">
       <title>Formal analyses of the <fixed-case>H</fixed-case>ungarian verbal complex</title>
       <author><first>Temese</first><last>Szalai</last></author>
-      <author><first>Edward</first><last>Stabler</last></author>
+      <author id="edward-stabler"><first>Edward</first><last>Stabler</last></author>
       <pages>168–171</pages>
       <url hash="8e77df9d">W98-0140</url>
       <bibkey>szalai-stabler-1998-formal</bibkey>
     </paper>
     <paper id="41">
       <title>Translating the <fixed-case>XTAG</fixed-case> <fixed-case>E</fixed-case>nglish grammar to <fixed-case>HPSG</fixed-case></title>
-      <author><first>Yuka</first><last>Tateisi</last></author>
+      <author id="yuka-tateisi"><first>Yuka</first><last>Tateisi</last></author>
       <author><first>Kentaro</first><last>Torisawa</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>172–175</pages>
       <url hash="ea6e9855">W98-0141</url>
       <bibkey>tateisi-etal-1998-translating</bibkey>
@@ -349,7 +349,7 @@
     <paper id="43">
       <title>Consistent grammar development using partial-tree descriptions for <fixed-case>L</fixed-case>exicalized <fixed-case>T</fixed-case>ree-<fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars</title>
       <author><first>Fei</first><last>Xia</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>K.</first><last>Vijay-Shanker</last></author>
       <author><first>Joseph</first><last>Rosenzweig</last></author>
       <pages>180–183</pages>
@@ -402,10 +402,10 @@
     </paper>
     <paper id="6">
       <title>“<fixed-case>I</fixed-case> just played that a minute ago!:” Designing User Interfaces for Audio Navigation</title>
-      <author><first>Julia</first><last>Hirschberg</last></author>
+      <author id="julia-hirschberg"><first>Julia</first><last>Hirschberg</last></author>
       <author><first>John</first><last>Choi</last></author>
-      <author><first>Christine</first><last>Nakatani</last></author>
-      <author><first>Steve</first><last>Whittaker</last></author>
+      <author id="christine-h-nakatani"><first>Christine</first><last>Nakatani</last></author>
+      <author id="steve-whittaker"><first>Steve</first><last>Whittaker</last></author>
       <url hash="7a682389">W98-0206</url>
       <bibkey>hirschberg-etal-1998-just</bibkey>
     </paper>
@@ -424,7 +424,7 @@
       <author><first>Rod</first><last>Holland</last></author>
       <author><first>Rob</first><last>Hyland</last></author>
       <author><first>Inderjeet</first><last>Mani</last></author>
-      <author><first>Mark</first><last>Maybury</last></author>
+      <author id="mark-t-maybury"><first>Mark</first><last>Maybury</last></author>
       <author><first>Andy</first><last>Merlino</last></author>
       <author><first>Jim</first><last>Rayson</last></author>
       <url hash="f89a612e">W98-0208</url>
@@ -439,17 +439,17 @@
     </paper>
     <paper id="10">
       <title>A Media-Independent Content Language for Integrated Text and Graphics Generation</title>
-      <author><first>Nancy</first><last>Green</last></author>
+      <author id="nancy-green"><first>Nancy</first><last>Green</last></author>
       <author><first>Giuseppe</first><last>Carenini</last></author>
-      <author><first>Stephan</first><last>Kerpedjiev</last></author>
-      <author><first>Steven</first><last>Roth</last></author>
-      <author><first>Johanna</first><last>Moore</last></author>
+      <author id="stephan-m-kerpedjiev"><first>Stephan</first><last>Kerpedjiev</last></author>
+      <author id="steven-roth"><first>Steven</first><last>Roth</last></author>
+      <author id="johanna-d-moore"><first>Johanna</first><last>Moore</last></author>
       <url hash="a7e8c3ef">W98-0210</url>
       <bibkey>green-etal-1998-media</bibkey>
     </paper>
     <paper id="11">
       <title>How to build a (quite general) linguistic diagram editor</title>
-      <author><first>Jo</first><last>Calder</last></author>
+      <author id="jo-calder"><first>Jo</first><last>Calder</last></author>
       <url hash="06b3de95">W98-0211</url>
       <bibkey>calder-1998-build</bibkey>
     </paper>
@@ -461,7 +461,7 @@
     </paper>
     <paper id="13">
       <title>Multimodal Visualization of Geometrical Constructions</title>
-      <author><first>Valerie</first><last>Bellynck</last></author>
+      <author id="valerie-bellynck"><first>Valerie</first><last>Bellynck</last></author>
       <url hash="1029c667">W98-0213</url>
       <bibkey>bellynck-1998-multimodal</bibkey>
     </paper>
@@ -480,7 +480,7 @@
     </paper>
     <paper id="16">
       <title>Integration of Speech and Vision in a small mobile robot</title>
-      <author><first>Dominique</first><last>Estival</last></author>
+      <author id="dominique-estival"><first>Dominique</first><last>Estival</last></author>
       <url hash="ccc6379f">W98-0216</url>
       <bibkey>estival-1998-integration</bibkey>
     </paper>
@@ -503,7 +503,7 @@
     </paper>
     <paper id="2">
       <title>Identifying the Linguistic Correlates of Rhetorical Relations</title>
-      <author><first>Simon H.</first><last>Corston-Oliver</last></author>
+      <author id="simon-corston-oliver"><first>Simon H.</first><last>Corston-Oliver</last></author>
       <url hash="680dc8f7">W98-0302</url>
       <bibkey>corston-oliver-1998-identifying</bibkey>
     </paper>
@@ -513,7 +513,7 @@
       <author><first>Karen</first><last>Kukich</last></author>
       <author><first>Susanne</first><last>Wolff</last></author>
       <author><first>Chi</first><last>Lu</last></author>
-      <author><first>Martin</first><last>Chodorow</last></author>
+      <author id="martin-chodorow"><first>Martin</first><last>Chodorow</last></author>
       <url hash="e00bb112">W98-0303</url>
       <bibkey>burstein-etal-1998-enriching</bibkey>
     </paper>
@@ -562,13 +562,13 @@
     </paper>
     <paper id="11">
       <title>Some Exotic Discourse Markers of Spoken Dialog</title>
-      <author><first>Nigel</first><last>Ward</last></author>
+      <author id="nigel-ward"><first>Nigel</first><last>Ward</last></author>
       <url hash="e74b0a78">W98-0311</url>
       <bibkey>ward-1998-exotic</bibkey>
     </paper>
     <paper id="12">
       <title>Lexical Marking and the Recovery of Discourse Structure</title>
-      <author><first>Kathleen</first><last>Dahlgren</last></author>
+      <author id="kathleen-dahlgren"><first>Kathleen</first><last>Dahlgren</last></author>
       <url hash="63ff692a">W98-0312</url>
       <bibkey>dahlgren-1998-lexical</bibkey>
     </paper>
@@ -581,14 +581,14 @@
     </paper>
     <paper id="14">
       <title>Signalling in written text: a corpus-based approach</title>
-      <author><first>Marie-Paule</first><last>Pery-Woodley</last></author>
+      <author id="marie-paule-pery-woodley"><first>Marie-Paule</first><last>Pery-Woodley</last></author>
       <url hash="0cfa2c66">W98-0314</url>
       <bibkey>pery-woodley-1998-signalling</bibkey>
     </paper>
     <paper id="15">
       <title>Anchoring a <fixed-case>L</fixed-case>exicalized <fixed-case>T</fixed-case>ree-<fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammar for Discourse</title>
-      <author><first>Bonnie Lynn</first><last>Webber</last></author>
-      <author><first>Aravind K.</first><last>Joshi</last></author>
+      <author id="bonnie-webber"><first>Bonnie Lynn</first><last>Webber</last></author>
+      <author id="aravind-joshi"><first>Aravind K.</first><last>Joshi</last></author>
       <url hash="a85a1388">W98-0315</url>
       <bibkey>webber-joshi-1998-anchoring</bibkey>
     </paper>
@@ -602,7 +602,7 @@
     </paper>
     <paper id="17">
       <title>Cue Phrase Selection in Instruction Dialogue Using Machine Learning</title>
-      <author><first>Yukiko I.</first><last>Nakano</last></author>
+      <author id="yukiko-i-nakano"><first>Yukiko I.</first><last>Nakano</last></author>
       <author><first>Tsuneaki</first><last>Kato</last></author>
       <url hash="622b604e">W98-0317</url>
       <bibkey>nakano-kato-1998-cue</bibkey>
@@ -616,8 +616,8 @@
     </paper>
     <paper id="19">
       <title>Lexical, Prosodic, and Syntactic Cues for Dialog Acts</title>
-      <author><first>Daniel</first><last>Jurafsky</last></author>
-      <author><first>Elizabeth</first><last>Shriberg</last></author>
+      <author id="dan-jurafsky"><first>Daniel</first><last>Jurafsky</last></author>
+      <author id="elizabeth-shriberg"><first>Elizabeth</first><last>Shriberg</last></author>
       <author><first>Barbara</first><last>Fox</last></author>
       <author><first>Traci</first><last>Curl</last></author>
       <url hash="91613934">W98-0319</url>
@@ -636,7 +636,7 @@
     </frontmatter>
     <paper id="1">
       <title>Towards an implementable dependency grammar</title>
-      <author><first>Timo</first><last>Järvinen</last></author>
+      <author id="timo-jarvinen"><first>Timo</first><last>Järvinen</last></author>
       <author><first>Pasi</first><last>Tapanainen</last></author>
       <url hash="a8ec76d0">W98-0501</url>
       <bibkey>jarvinen-tapanainen-1998-towards</bibkey>
@@ -649,10 +649,10 @@
     </paper>
     <paper id="3">
       <title>Two Useful Measures of Word Order Complexity</title>
-      <author><first>Tomas</first><last>Holan</last></author>
-      <author><first>Vladislav</first><last>Kubon</last></author>
-      <author><first>Karel</first><last>Oliva</last></author>
-      <author><first>Martin</first><last>Platek</last></author>
+      <author id="tomas-holan"><first>Tomas</first><last>Holan</last></author>
+      <author id="vladislav-kubon"><first>Vladislav</first><last>Kubon</last></author>
+      <author id="karel-oliva"><first>Karel</first><last>Oliva</last></author>
+      <author id="martin-platek"><first>Martin</first><last>Platek</last></author>
       <url hash="9224840f">W98-0503</url>
       <bibkey>holan-etal-1998-two</bibkey>
     </paper>
@@ -670,14 +670,14 @@
     </paper>
     <paper id="6">
       <title>Movement rules revisited</title>
-      <author><first>Eva</first><last>Hajicova</last></author>
+      <author id="eva-hajicova"><first>Eva</first><last>Hajicova</last></author>
       <url hash="30b292e4">W98-0506</url>
       <bibkey>hajicova-1998-movement</bibkey>
     </paper>
     <paper id="7">
       <title>Integration of syntactic and lexical information in a hierarchical dependency grammar</title>
       <author><first>Cristina</first><last>Barbero</last></author>
-      <author><first>Leonardo</first><last>Lesmo</last></author>
+      <author id="leonardo-lesmo"><first>Leonardo</first><last>Lesmo</last></author>
       <author><first>Vincenzo</first><last>Lombardo</last></author>
       <url hash="4e5ecb6b">W98-0507</url>
       <bibkey>barbero-etal-1998-integration</bibkey>
@@ -715,15 +715,15 @@
     </paper>
     <paper id="12">
       <title>Complements and Adjuncts in Dependency Grammar Parsing Emulated by a Constrained Context-Free Grammar</title>
-      <author><first>Tom B.Y.</first><last>Lai</last></author>
-      <author><first>Changning</first><last>Huang</last></author>
+      <author id="tom-b-y-lai"><first>Tom B.Y.</first><last>Lai</last></author>
+      <author id="changning-huang"><first>Changning</first><last>Huang</last></author>
       <url hash="ddc9a9b7">W98-0512</url>
       <bibkey>lai-huang-1998-complements</bibkey>
     </paper>
     <paper id="13">
       <title>An Annotated Corpus in <fixed-case>J</fixed-case>apanese Using <fixed-case>T</fixed-case>esniere’s Structural Syntax</title>
       <author><first>Yves</first><last>Lepage</last></author>
-      <author><last>Ando</last><first>Shin-Ichi</first></author>
+      <author id="shinichi-ando"><first>Shin-Ichi</first><last>Ando</last></author>
       <author><last>Akamine</last><first>Susumu</first></author>
       <author><last>Iida</last><first>Hitoshi</first></author>
       <url hash="2fad0c81">W98-0513</url>
@@ -763,10 +763,10 @@
     </paper>
     <paper id="4">
       <title>Using <fixed-case>NOMLEX</fixed-case> to Produce Nominalization Patterns for Information Extraction</title>
-      <author><first>Adam</first><last>Meyers</last></author>
-      <author><first>Catherine</first><last>Macleod</last></author>
+      <author id="adam-meyers"><first>Adam</first><last>Meyers</last></author>
+      <author id="catherine-macleod"><first>Catherine</first><last>Macleod</last></author>
       <author><first>Roman</first><last>Yangarber</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <author><first>Leslie</first><last>Barrett</last></author>
       <author><first>Ruth</first><last>Reeves</last></author>
       <url hash="9f1352a3">W98-0604</url>
@@ -778,7 +778,7 @@
       <author><first>Masaki</first><last>Murata</last></author>
       <author><first>Yasunori</first><last>Yata</last></author>
       <author><first>Mitsunobu</first><last>Shimada</last></author>
-      <author><first>Makoto</first><last>Nagao</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
       <url hash="a12bb0cb">W98-0605</url>
       <bibkey>kurohashi-etal-1998-construction</bibkey>
     </paper>
@@ -786,7 +786,7 @@
       <title>The treatment of noun phrase queries in a natural language database access system</title>
       <author><first>Alexandra</first><last>Klein</last></author>
       <author><first>Johannes</first><last>Matiasek</last></author>
-      <author><first>Harald</first><last>Trost</last></author>
+      <author id="harald-trost"><first>Harald</first><last>Trost</last></author>
       <url hash="1fc8480b">W98-0606</url>
       <bibkey>klein-etal-1998-treatment</bibkey>
     </paper>
@@ -794,13 +794,13 @@
       <title>Integrating Referring and Informing in <fixed-case>NP</fixed-case> Planning</title>
       <author><first>Michael</first><last>O’Donnell</last></author>
       <author><first>Hua</first><last>Cheng</last></author>
-      <author><first>Janet</first><last>Hitzeman</last></author>
+      <author id="janet-hitzeman"><first>Janet</first><last>Hitzeman</last></author>
       <url hash="2b2a21b1">W98-0607</url>
       <bibkey>odonnell-etal-1998-integrating</bibkey>
     </paper>
     <paper id="8">
       <title>Coreference in Knowledge Editing</title>
-      <author><first>Kees</first><last>van Deemter</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
       <author><first>Richard</first><last>Power</last></author>
       <url hash="6f675fda">W98-0608</url>
       <bibkey>van-deemter-power-1998-coreference</bibkey>
@@ -833,7 +833,7 @@
     </paper>
     <paper id="13">
       <title>Nominal Metonymy Processing</title>
-      <author><first>Boyan</first><last>Onyshkevych</last></author>
+      <author id="boyan-onyshkevych"><first>Boyan</first><last>Onyshkevych</last></author>
       <url hash="fc239d54">W98-0613</url>
       <bibkey>onyshkevych-1998-nominal</bibkey>
     </paper>
@@ -858,7 +858,7 @@
       <title>General Word Sense Disambiguation Method Based on a Full Sentential Context</title>
       <author><first>Jiri</first><last>Stetina</last></author>
       <author><first>Sadao</first><last>Kurohashi</last></author>
-      <author><first>Makoto</first><last>Nagao</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
       <url hash="45ae64bb">W98-0701</url>
       <bibkey>stetina-etal-1998-general</bibkey>
     </paper>
@@ -870,14 +870,14 @@
     </paper>
     <paper id="3">
       <title>Word Sense Disambiguation based on Semantic Density</title>
-      <author><first>Rada</first><last>Mihalcea</last></author>
-      <author><first>Dan I.</first><last>Moldovan</last></author>
+      <author id="rada-mihalcea"><first>Rada</first><last>Mihalcea</last></author>
+      <author id="dan-moldovan"><first>Dan I.</first><last>Moldovan</last></author>
       <url hash="2372e89f">W98-0703</url>
       <bibkey>mihalcea-moldovan-1998-word</bibkey>
     </paper>
     <paper id="4">
       <title>The Use of <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et in Information Retrieval</title>
-      <author><last>Rila</last><first>Mandala</first></author>
+      <author id="rila-mandala"><first>Mandala</first><last>Rila</last></author>
       <author><last>Tokunaga</last><first>Takenobu</first></author>
       <author><last>Tanaka</last><first>Hozumi</first></author>
       <url hash="dde678a0">W98-0704</url>
@@ -913,31 +913,31 @@
     </paper>
     <paper id="9">
       <title>Using <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et for Building <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>ets</title>
-      <author><first>Xavier</first><last>Farreres</last></author>
-      <author><first>German</first><last>Rigau</last></author>
+      <author id="javier-farreres"><first>Xavier</first><last>Farreres</last></author>
+      <author id="german-rigau"><first>German</first><last>Rigau</last></author>
       <author><first>Horacio</first><last>Rodffguez</last></author>
       <url hash="7b870817">W98-0709</url>
       <bibkey>farreres-etal-1998-using</bibkey>
     </paper>
     <paper id="10">
       <title>Aligning <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et with Additional Lexical Resources</title>
-      <author><first>Oi Yee</first><last>Kwong</last></author>
+      <author id="olivia-o-y-kwong"><first>Oi Yee</first><last>Kwong</last></author>
       <url hash="0095af0d">W98-0710</url>
       <bibkey>kwong-1998-aligning</bibkey>
     </paper>
     <paper id="11">
       <title>Automatic Adaptation of <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et to Sublanguages and to Computational Tasks</title>
-      <author><first>Roberto</first><last>Basili</last></author>
+      <author id="roberto-basili"><first>Roberto</first><last>Basili</last></author>
       <author><first>Alessandro</first><last>Cucchiarelli</last></author>
       <author><first>Carlo</first><last>Consoli</last></author>
-      <author><first>Maria Teresa</first><last>Pazienza</last></author>
-      <author><first>Paola</first><last>Velardi</last></author>
+      <author id="maria-teresa-pazienza"><first>Maria Teresa</first><last>Pazienza</last></author>
+      <author id="paola-velardi"><first>Paola</first><last>Velardi</last></author>
       <url hash="29c57d3a">W98-0711</url>
       <bibkey>basili-etal-1998-automatic</bibkey>
     </paper>
     <paper id="12">
       <title>Augmenting <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et-like lexical resources with distributional evidence. An application-oriented perspective</title>
-      <author><first>Simonetta</first><last>Montemagni</last></author>
+      <author id="simonetta-montemagni"><first>Simonetta</first><last>Montemagni</last></author>
       <author><first>Vito</first><last>Pirrelli</last></author>
       <url hash="c7b5059a">W98-0712</url>
       <bibkey>montemagni-pirrelli-1998-augmenting</bibkey>
@@ -945,15 +945,15 @@
     <paper id="13">
       <title>Lexical Acquisition with <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et and the Mikrokosmos Ontology</title>
       <author><first>Tom</first><last>O’Hara</last></author>
-      <author><first>Kavi</first><last>Mahesh</last></author>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="kavi-mahesh"><first>Kavi</first><last>Mahesh</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <url hash="044e190c">W98-0713</url>
       <bibkey>ohara-etal-1998-lexical</bibkey>
     </paper>
     <paper id="14">
       <title>Algorithms for Ontological Mediation</title>
       <author><first>Alistair E.</first><last>Campbell</last></author>
-      <author><first>Stuart C.</first><last>Shapiro</last></author>
+      <author id="stuart-c-shapiro"><first>Stuart C.</first><last>Shapiro</last></author>
       <url hash="587d7fc2">W98-0714</url>
       <bibkey>campbell-shapiro-1998-algorithms</bibkey>
     </paper>
@@ -965,7 +965,7 @@
     </paper>
     <paper id="16">
       <title>A Comparison of <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et and <fixed-case>R</fixed-case>oget’s Taxonomy for Measuring Semantic Similarity</title>
-      <author><first>Michael L.</first><last>McHale</last></author>
+      <author id="michael-l-mc-hale"><first>Michael L.</first><last>McHale</last></author>
       <url hash="3fe8268f">W98-0716</url>
       <bibkey>mchale-1998-comparison</bibkey>
     </paper>
@@ -978,7 +978,7 @@
     </paper>
     <paper id="18">
       <title>Usage of <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et in Natural Language Generation</title>
-      <author><first>Hongyan</first><last>Jing</last></author>
+      <author id="hongyan-jing"><first>Hongyan</first><last>Jing</last></author>
       <url hash="2e9db84e">W98-0718</url>
       <bibkey>jing-1998-usage</bibkey>
     </paper>
@@ -990,7 +990,7 @@
     </paper>
     <paper id="20">
       <title>Deriving Metonymic Coercions from <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et</title>
-      <author><first>Sanda M.</first><last>Harabagiu</last></author>
+      <author id="sanda-harabagiu"><first>Sanda M.</first><last>Harabagiu</last></author>
       <url hash="0d7b4fb9">W98-0720</url>
       <bibkey>harabagiu-1998-deriving</bibkey>
     </paper>
@@ -1030,7 +1030,7 @@
       <title>Grapheme-to-phoneme transcription rules for <fixed-case>S</fixed-case>panish, with application to automatic speech recognition and synthesis</title>
       <author><first>Patrizia</first><last>Bonaventura</last></author>
       <author><first>Fabio</first><last>Giuliani</last></author>
-      <author><first>Juan Maria</first><last>Garrido</last></author>
+      <author id="juan-maria-garrido"><first>Juan Maria</first><last>Garrido</last></author>
       <author><first>Isabel</first><last>Ortin</last></author>
       <url hash="b545b958">W98-0804</url>
       <bibkey>bonaventura-etal-1998-grapheme</bibkey>
@@ -1079,7 +1079,7 @@
     </paper>
     <paper id="5">
       <title>An Approach to the Automatic Acquisition of Phonotactic Constraints</title>
-      <author><first>Anja</first><last>Belz</last></author>
+      <author id="anja-belz"><first>Anja</first><last>Belz</last></author>
       <url hash="cc53b717">W98-0905</url>
       <bibkey>belz-1998-approach</bibkey>
     </paper>
@@ -1104,7 +1104,7 @@
     <paper id="1">
       <title>Discovering Lexical Information by Tagging <fixed-case>A</fixed-case>rabic Newspaper Text</title>
       <author><first>Saleem</first><last>Abuleil</last></author>
-      <author><first>Martha</first><last>Evens</last></author>
+      <author id="martha-evens"><first>Martha</first><last>Evens</last></author>
       <url hash="70bcb554">W98-1001</url>
       <bibkey>abuleil-evens-1998-discovering</bibkey>
     </paper>
@@ -1130,7 +1130,7 @@
     </paper>
     <paper id="5">
       <title>Translating Names and Technical Terms in <fixed-case>A</fixed-case>rabic Text</title>
-      <author><first>Bonnie</first><last>Glover</last></author>
+      <author id="bonnie-glover-stalls"><first>Bonnie</first><last>Glover</last></author>
       <author><first>Kevin</first><last>Knight</last></author>
       <url hash="f59cd40a">W98-1005</url>
       <bibkey>glover-knight-1998-translating</bibkey>
@@ -1157,13 +1157,13 @@
     <paper id="9">
       <title>A Computational Morphology System for <fixed-case>A</fixed-case>rabic</title>
       <author><first>Riyad</first><last>Al-Shalabi</last></author>
-      <author><first>Martha</first><last>Evens</last></author>
+      <author id="martha-evens"><first>Martha</first><last>Evens</last></author>
       <url hash="d170b747">W98-1009</url>
       <bibkey>al-shalabi-evens-1998-computational</bibkey>
     </paper>
     <paper id="10">
       <title>A Morphological Analyzer for <fixed-case>A</fixed-case>kkadian Verbal Forms with a Model of Phonetic Transformations</title>
-      <author><first>Francois</first><last>Barthelemy</last></author>
+      <author id="francois-barthelemy"><first>Francois</first><last>Barthelemy</last></author>
       <url hash="7abc11db">W98-1010</url>
       <bibkey>barthelemy-1998-morphological</bibkey>
     </paper>
@@ -1175,7 +1175,7 @@
     </paper>
     <paper id="12">
       <title>Generating Determiners and Quantifiers in <fixed-case>H</fixed-case>ebrew</title>
-      <author><first>Yael Dahan</first><last>Netzer</last></author>
+      <author id="yael-netzer"><first>Yael Dahan</first><last>Netzer</last></author>
       <author><first>Michael</first><last>Elhadad</last></author>
       <url hash="b878c2a9">W98-1012</url>
       <bibkey>netzer-elhadad-1998-generating</bibkey>
@@ -1222,13 +1222,13 @@
     </paper>
     <paper id="2">
       <title>Encoding Linguistic Corpora</title>
-      <author><first>Nancy</first><last>Ide</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
       <url hash="0ffacdd6">W98-1102</url>
       <bibkey>ide-1998-encoding</bibkey>
     </paper>
     <paper id="3">
       <title>Using a Probabilistic Translation Model for Cross-Language Information Retrieval</title>
-      <author><first>Jian-Yun</first><last>Nie</last></author>
+      <author id="jian-yun-nie"><first>Jian-Yun</first><last>Nie</last></author>
       <author><first>Pierre</first><last>Isabelle</last></author>
       <author><first>George</first><last>Foster</last></author>
       <url hash="655d9df4">W98-1103</url>
@@ -1237,20 +1237,20 @@
     <paper id="4">
       <title>Using Suffix Arrays to Compute Term Frequency and Document Frequency for All Substrings in a Corpus</title>
       <author><first>Mikio</first><last>Yamamoto</last></author>
-      <author><first>Kenneth W.</first><last>Church</last></author>
+      <author id="kenneth-church"><first>Kenneth W.</first><last>Church</last></author>
       <url hash="a89d0c0e">W98-1104</url>
       <bibkey>yamamoto-church-1998-using</bibkey>
     </paper>
     <paper id="5">
       <title>Semantic Tagging using a Probabilistic Context Free Grammar</title>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <author><first>Scott</first><last>Miller</last></author>
       <url hash="93e5b5d8">W98-1105</url>
       <bibkey>collins-miller-1998-semantic</bibkey>
     </paper>
     <paper id="6">
       <title>An Empirical Approach to Conceptual Case Frame Acquisition</title>
-      <author><first>Ellen</first><last>Riloff</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
       <author><first>Mark</first><last>Schmelzenbach</last></author>
       <url hash="5f1afad2">W98-1106</url>
       <bibkey>riloff-schmelzenbach-1998-empirical</bibkey>
@@ -1258,7 +1258,7 @@
     <paper id="7">
       <title>Semantic Lexicon Acquisition for Learning Natural Language Interfaces</title>
       <author><first>Cynthia A.</first><last>Thompson</last></author>
-      <author><first>Raymond J.</first><last>Mooney</last></author>
+      <author id="raymond-mooney"><first>Raymond J.</first><last>Mooney</last></author>
       <url hash="0e5b7be7">W98-1107</url>
       <bibkey>thompson-mooney-1998-semantic</bibkey>
     </paper>
@@ -1281,8 +1281,8 @@
     </paper>
     <paper id="10">
       <title>Generalized unknown morpheme guessing for hybrid <fixed-case>POS</fixed-case> tagging of <fixed-case>K</fixed-case>orean</title>
-      <author><first>Jeongwon</first><last>Cha</last></author>
-      <author><first>Geunbae</first><last>Lee</last></author>
+      <author id="jeong-won-cha"><first>Jeongwon</first><last>Cha</last></author>
+      <author id="gary-geunbae-lee"><first>Geunbae</first><last>Lee</last></author>
       <author><first>Jong-Hyeok</first><last>Lee</last></author>
       <url hash="3e4d21c2">W98-1110</url>
       <bibkey>cha-etal-1998-generalized</bibkey>
@@ -1295,9 +1295,9 @@
     </paper>
     <paper id="12">
       <title>Aligning tagged bitexts</title>
-      <author><first>Raquel</first><last>Martinez</last></author>
+      <author id="raquel-martinez"><first>Raquel</first><last>Martinez</last></author>
       <author><first>Joseba</first><last>Abaitua</last></author>
-      <author><first>Arantza</first><last>Casillas</last></author>
+      <author id="arantza-casillas"><first>Arantza</first><last>Casillas</last></author>
       <url hash="63157d0f">W98-1112</url>
       <bibkey>martinez-etal-1998-aligning</bibkey>
     </paper>
@@ -1305,22 +1305,22 @@
       <title>Towards Unsupervised Extraction of Verb Paradigms from Large Corpora</title>
       <author><first>Cornelia H.</first><last>Parkes</last></author>
       <author><first>Alexander M.</first><last>Malek</last></author>
-      <author><first>Mitchell P.</first><last>Marcus</last></author>
+      <author id="mitch-marcus"><first>Mitchell P.</first><last>Marcus</last></author>
       <url hash="5a19130f">W98-1113</url>
       <bibkey>parkes-etal-1998-towards</bibkey>
     </paper>
     <paper id="14">
       <title>Can Subcategorisation Probabilities Help a Statistical Parser</title>
-      <author><first>John</first><last>Carroll</last></author>
+      <author id="john-a-carroll"><first>John</first><last>Carroll</last></author>
       <author><first>Guido</first><last>Minnen</last></author>
-      <author><first>Ted</first><last>Briscoe</last></author>
+      <author id="ted-briscoe"><first>Ted</first><last>Briscoe</last></author>
       <url hash="814a7e5a">W98-1114</url>
       <bibkey>carroll-etal-1998-subcategorisation</bibkey>
     </paper>
     <paper id="15">
       <title>Edge-Based Best-First Chart Parsing</title>
       <author><first>Eugene</first><last>Charniak</last></author>
-      <author><first>Sharon</first><last>Goldwater</last></author>
+      <author id="sharon-goldwater"><first>Sharon</first><last>Goldwater</last></author>
       <author><first>Mark</first><last>Johnson</last></author>
       <url hash="df1ae7fd">W98-1115</url>
       <bibkey>charniak-etal-1998-edge</bibkey>
@@ -1344,14 +1344,14 @@
       <author><first>Andrew</first><last>Borthwick</last></author>
       <author><first>John</first><last>Sterling</last></author>
       <author><first>Eugene</first><last>Agichtein</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <url hash="5b4120d7">W98-1118</url>
       <bibkey>borthwick-etal-1998-exploiting</bibkey>
     </paper>
     <paper id="19">
       <title>A Statistical Approach to Anaphora Resolution</title>
       <author><first>Niyu</first><last>Ge</last></author>
-      <author><first>John</first><last>Hale</last></author>
+      <author id="john-hale"><first>John</first><last>Hale</last></author>
       <author><first>Eugene</first><last>Charniak</last></author>
       <url hash="2cff23df">W98-1119</url>
       <bibkey>ge-etal-1998-statistical</bibkey>
@@ -1359,29 +1359,29 @@
     <paper id="20">
       <title>A Decision Tree Method for Finding and Classifying Names in <fixed-case>J</fixed-case>apanese Texts</title>
       <author><first>Satoshi</first><last>Sekine</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <author><first>Hiroyuki</first><last>Shinnou</last></author>
       <url hash="7741ef34">W98-1120</url>
       <bibkey>sekine-etal-1998-decision</bibkey>
     </paper>
     <paper id="21">
       <title><fixed-case>POS</fixed-case> Tagging versus Classes in Language Modeling</title>
-      <author><first>Peter A.</first><last>Heeman</last></author>
+      <author id="peter-a-heeman"><first>Peter A.</first><last>Heeman</last></author>
       <url hash="7996405c">W98-1121</url>
       <bibkey>heeman-1998-pos</bibkey>
     </paper>
     <paper id="22">
       <title>Automatic Acquisition of Phrase Grammars for Stochastic Language Modeling</title>
       <author><first>Giuseppe</first><last>Riccardi</last></author>
-      <author><first>Srinivas</first><last>Bangalore</last></author>
+      <author id="srinivas-bangalore"><first>Srinivas</first><last>Bangalore</last></author>
       <url hash="6e05ee0a">W98-1122</url>
       <bibkey>riccardi-bangalore-1998-automatic</bibkey>
     </paper>
     <paper id="23">
       <title>Linear Segmentation and Segment Significance</title>
       <author><first>Min-Yen</first><last>Kan</last></author>
-      <author><first>Judith L.</first><last>Klavans</last></author>
-      <author><first>Kathleen R.</first><last>McKeown</last></author>
+      <author id="judith-l-klavans"><first>Judith L.</first><last>Klavans</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen R.</first><last>McKeown</last></author>
       <url hash="abe6a098">W98-1123</url>
       <bibkey>kan-etal-1998-linear</bibkey>
     </paper>
@@ -1394,15 +1394,15 @@
     <paper id="25">
       <title>Discourse Parsing: A Decision Tree Approach</title>
       <author><first>Tadashi</first><last>Nomoto</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <url hash="7c133fd2">W98-1125</url>
       <bibkey>nomoto-matsumoto-1998-discourse</bibkey>
     </paper>
     <paper id="26">
       <title>Mapping Collocational Properties into Machine Learning Features</title>
-      <author><first>Janyce M.</first><last>Wiebe</last></author>
-      <author><first>Kenneth J.</first><last>McKeever</last></author>
-      <author><first>Rebecca F.</first><last>Bruce</last></author>
+      <author id="janyce-wiebe"><first>Janyce M.</first><last>Wiebe</last></author>
+      <author id="kenneth-j-mckeever"><first>Kenneth J.</first><last>McKeever</last></author>
+      <author id="rebecca-bruce"><first>Rebecca F.</first><last>Bruce</last></author>
       <url hash="01290f47">W98-1126</url>
       <bibkey>wiebe-etal-1998-mapping</bibkey>
     </paper>
@@ -1419,7 +1419,7 @@
     </frontmatter>
     <paper id="1">
       <title>Abstraction Is Harmful in Language Learning</title>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <url hash="99d627e0">W98-1201</url>
       <bibkey>daelemans-1998-abstraction</bibkey>
     </paper>
@@ -1435,19 +1435,19 @@
     </paper>
     <paper id="3">
       <title>Learning a Lexicalized Grammar for <fixed-case>G</fixed-case>erman</title>
-      <author><first>Sandra</first><last>Kubler</last></author>
+      <author id="sandra-kubler"><first>Sandra</first><last>Kubler</last></author>
       <url hash="62adc8f1">W98-1203</url>
       <bibkey>kubler-1998-learning</bibkey>
     </paper>
     <paper id="4">
       <title>A Lexically-Intensive Algorithm for Domain-Specific Knowlegde Acquisition</title>
-      <author><first>Rene</first><last>Schneider</last></author>
+      <author id="rene-schneider"><first>Rene</first><last>Schneider</last></author>
       <url hash="9ba1fe60">W98-1204</url>
       <bibkey>schneider-1998-lexically</bibkey>
     </paper>
     <paper id="5">
       <title>Look-Back and Look-Ahead in the Conversion of Hidden <fixed-case>M</fixed-case>arkov Models into Finite State Transducers</title>
-      <author><first>André</first><last>Kempe</last></author>
+      <author id="andre-kempe"><first>André</first><last>Kempe</last></author>
       <url hash="6fa98126">W98-1205</url>
       <bibkey>kempe-1998-look</bibkey>
     </paper>
@@ -1466,9 +1466,9 @@
     </paper>
     <paper id="8">
       <title>Implementing a Sense Tagger in a General Architecture for Text Engineering</title>
-      <author><first>Hamish</first><last>Cunningham</last></author>
+      <author id="hamish-cunningham"><first>Hamish</first><last>Cunningham</last></author>
       <author><first>Mark</first><last>Stevenson</last></author>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <url hash="2c50ea1b">W98-1208</url>
       <bibkey>cunningham-etal-1998-implementing</bibkey>
     </paper>
@@ -1504,7 +1504,7 @@
     </paper>
     <paper id="13">
       <title>Automatically generating hypertext in newspaper articles by computing semantic relatedness</title>
-      <author><last>Green</last><first>Stephen J</first></author>
+      <author id="stephen-j-green"><first>Stephen J</first><last>Green</last></author>
       <url hash="e0ac38d6">W98-1213</url>
       <bibkey>green-1998-automatically</bibkey>
     </paper>
@@ -1517,13 +1517,13 @@
     </paper>
     <paper id="15">
       <title>Sense Variation and Lexical Semantics Generative Operations</title>
-      <author><first>Patrick</first><last>Saint-Dizier</last></author>
+      <author id="patrick-saint-dizier"><first>Patrick</first><last>Saint-Dizier</last></author>
       <url hash="12f4a2fd">W98-1215</url>
       <bibkey>saint-dizier-1998-sense</bibkey>
     </paper>
     <paper id="16">
       <title>An Attempt to Use Weighted Cusums to Identify Sublanguages</title>
-      <author><first>Harold</first><last>Somers</last></author>
+      <author id="harold-somers"><first>Harold</first><last>Somers</last></author>
       <url hash="6dfe6dbf">W98-1216</url>
       <bibkey>somers-1998-attempt</bibkey>
     </paper>
@@ -1535,7 +1535,7 @@
     </paper>
     <paper id="18">
       <title>Applications and Explanations of <fixed-case>Z</fixed-case>ipf’s Law</title>
-      <author><first>David M. W.</first><last>Powers</last></author>
+      <author id="david-m-w-powers"><first>David M. W.</first><last>Powers</last></author>
       <url hash="2ca040c5">W98-1218</url>
       <bibkey>powers-1998-applications</bibkey>
     </paper>
@@ -1555,7 +1555,7 @@
     </paper>
     <paper id="21">
       <title>Generatlon of Simple <fixed-case>T</fixed-case>urkish Sentences with Systemic-Functional Grammar</title>
-      <author><first>Ilyas</first><last>Cicekli</last></author>
+      <author id="ilyas-cicekli"><first>Ilyas</first><last>Cicekli</last></author>
       <author><first>Turgay</first><last>Korkrmaz</last></author>
       <url hash="110cbc57">W98-1221</url>
       <bibkey>cicekli-korkrmaz-1998-generatlon</bibkey>
@@ -1570,16 +1570,16 @@
     </paper>
     <paper id="23">
       <title>Modularity in Inductively-Learned Word Pronunciation Systems</title>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <author><first>Ton</first><last>Weijters</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <url hash="57ca4ce1">W98-1223</url>
       <bibkey>van-den-bosch-etal-1998-modularity</bibkey>
     </paper>
     <paper id="24">
       <title>Do Not Forget: Full Memory in Memory-Based Learning of Word Pronunciation</title>
-      <author><first>Antal</first><last>van den Bosch</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <url hash="5d48b9ad">W98-1224</url>
       <bibkey>van-den-bosch-daelemans-1998-forget</bibkey>
     </paper>
@@ -1654,7 +1654,7 @@
     </paper>
     <paper id="35">
       <title>The Total <fixed-case>T</fixed-case>uring Test and the Loebner Prize</title>
-      <author><first>David M. W.</first><last>Powers</last></author>
+      <author id="david-m-w-powers"><first>David M. W.</first><last>Powers</last></author>
       <url hash="5e52604b">W98-1235</url>
       <bibkey>powers-1998-total</bibkey>
     </paper>
@@ -1680,19 +1680,19 @@
     </paper>
     <paper id="39">
       <title>Morphemes as Necessary Concept for Structures Discovery from Untagged Corpora</title>
-      <author><first>Herve</first><last>Dejean</last></author>
+      <author id="herve-dejean"><first>Herve</first><last>Dejean</last></author>
       <url hash="6bef085c">W98-1239</url>
       <bibkey>dejean-1998-morphemes</bibkey>
     </paper>
     <paper id="40">
       <title>The segmentation problem in morphology learning</title>
-      <author><first>Christopher D.</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher D.</first><last>Manning</last></author>
       <url hash="5c1afd53">W98-1240</url>
       <bibkey>manning-1998-segmentation</bibkey>
     </paper>
     <paper id="41">
       <title>Reconciliation of Unsupervised Clustering, Segmentation and Cohesion</title>
-      <author><first>David M. W.</first><last>Powers</last></author>
+      <author id="david-m-w-powers"><first>David M. W.</first><last>Powers</last></author>
       <url hash="e13636a3">W98-1241</url>
       <bibkey>powers-1998-reconciliation</bibkey>
     </paper>
@@ -1734,7 +1734,7 @@
     <paper id="4">
       <title>Robust Parsing Using a Hidden <fixed-case>M</fixed-case>arkov Model</title>
       <author><first>Wide R.</first><last>Hogenhout</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <url hash="5cc9a7b7">W98-1304</url>
       <bibkey>hogenhout-matsumoto-1998-robust</bibkey>
     </paper>
@@ -1748,14 +1748,14 @@
     </paper>
     <paper id="6">
       <title>Treatment of e-Moves in Subset Construction</title>
-      <author><first>Gertjan</first><last>van Noord</last></author>
+      <author id="gertjan-van-noord"><first>Gertjan</first><last>van Noord</last></author>
       <url hash="49caa1c4">W98-1306</url>
       <bibkey>van-noord-1998-treatment</bibkey>
     </paper>
     <paper id="7">
       <title>Learning Finite-State Models for Language Understanding</title>
-      <author><first>David</first><last>Pico</last></author>
-      <author><first>Enrique</first><last>Vidal</last></author>
+      <author id="david-pico"><first>David</first><last>Pico</last></author>
+      <author id="enrique-vidal"><first>Enrique</first><last>Vidal</last></author>
       <url hash="88c6ddac">W98-1307</url>
       <bibkey>pico-vidal-1998-learning</bibkey>
     </paper>
@@ -1768,13 +1768,13 @@
     <paper id="9">
       <title>Implementing Voting Constraints with Finite State Transducers</title>
       <author><first>Kemal</first><last>Oflazer</last></author>
-      <author><first>Gokhan</first><last>Tur</last></author>
+      <author id="gokhan-tur"><first>Gokhan</first><last>Tur</last></author>
       <url hash="cc1bb668">W98-1309</url>
       <bibkey>oflazer-tur-1998-implementing</bibkey>
     </paper>
     <paper id="10">
       <title>Feature Structures, Unification and Finite-State Transducers</title>
-      <author><first>Remi</first><last>Zajac</last></author>
+      <author id="remi-zajac"><first>Remi</first><last>Zajac</last></author>
       <url hash="fab58c8b">W98-1310</url>
       <bibkey>zajac-1998-feature</bibkey>
     </paper>
@@ -1800,7 +1800,7 @@
   <volume id="14" type="proceedings">
     <meta>
       <booktitle>Natural Language Generation</booktitle>
-      <editor><first>Eduard</first><last>Hovy</last></editor>
+      <editor id="eduard-hovy"><first>Eduard</first><last>Hovy</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Niagara-on-the-Lake, Ontario, Canada</address>
       <month>August</month>
@@ -1813,49 +1813,49 @@
     </frontmatter>
     <paper id="1">
       <title>Natural Language Generation Journeys to Interactive 3<fixed-case>D</fixed-case> Worlds Invited Talk Extended Abstract</title>
-      <author><first>James C.</first><last>Lester</last></author>
+      <author id="james-lester"><first>James C.</first><last>Lester</last></author>
       <author><first>William H.</first><last>Bares</last></author>
-      <author><first>Charles B.</first><last>Callaway</last></author>
+      <author id="charles-b-callaway"><first>Charles B.</first><last>Callaway</last></author>
       <author><first>Stuart G.</first><last>Towns</last></author>
       <url hash="6776421c">W98-1401</url>
       <bibkey>lester-etal-1998-natural</bibkey>
     </paper>
     <paper id="2">
       <title>Communicative Goal-Driven <fixed-case>NL</fixed-case> Generation and Data-Driven Graphics Generation: An Architectural Synthesis for Multimedia Page Generation</title>
-      <author><first>John</first><last>Bateman</last></author>
+      <author id="john-bateman"><first>John</first><last>Bateman</last></author>
       <author><first>Thomas</first><last>Kamps</last></author>
-      <author><first>Jorg</first><last>Kleinz</last></author>
+      <author id="jorg-kleinz"><first>Jorg</first><last>Kleinz</last></author>
       <author><first>Klaus</first><last>Reichenberger</last></author>
       <url hash="8bd2c096">W98-1402</url>
       <bibkey>bateman-etal-1998-communicative</bibkey>
     </paper>
     <paper id="3">
       <title>A Principled Representation of Attributive Descriptions for Generating Integrated Text and Information Graphics Presentations</title>
-      <author><first>Nancy</first><last>Green</last></author>
+      <author id="nancy-green"><first>Nancy</first><last>Green</last></author>
       <author><first>Giuseppe</first><last>Carenini</last></author>
-      <author><first>Johanna</first><last>Moore</last></author>
+      <author id="johanna-d-moore"><first>Johanna</first><last>Moore</last></author>
       <url hash="b9db1d91">W98-1403</url>
       <bibkey>green-etal-1998-principled</bibkey>
     </paper>
     <paper id="4">
       <title>An Architecture for Opportunistic Text Generation</title>
-      <author><first>Chris</first><last>Mellish</last></author>
+      <author id="chris-mellish"><first>Chris</first><last>Mellish</last></author>
       <author><first>Mick</first><last>O’Donnell</last></author>
-      <author><first>Jon</first><last>Oberlander</last></author>
+      <author id="jon-oberlander"><first>Jon</first><last>Oberlander</last></author>
       <author><first>Alistair</first><last>Knott</last></author>
       <url hash="bf092c92">W98-1404</url>
       <bibkey>mellish-etal-1998-architecture</bibkey>
     </paper>
     <paper id="5">
       <title>Controlled Realization of Complex Objects</title>
-      <author><first>David D.</first><last>McDonald</last></author>
+      <author id="david-d-mcdonald"><first>David D.</first><last>McDonald</last></author>
       <url hash="a3d5ae30">W98-1405</url>
       <bibkey>mcdonald-1998-controlled</bibkey>
     </paper>
     <paper id="6">
       <title>De-Constraining Text Generation</title>
       <author><first>Stephen</first><last>Beale</last></author>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <author><first>Evelyne</first><last>Viegas</last></author>
       <author><first>Leo</first><last>Wanner</last></author>
       <url hash="5f0c9aa4">W98-1406</url>
@@ -1879,10 +1879,10 @@
       <title>A New Approach to Expert System Explanations</title>
       <author><first>Regina</first><last>Barzilay</last></author>
       <author><first>Daryl</first><last>McCullough</last></author>
-      <author><first>Owen</first><last>Rambow</last></author>
-      <author><first>Jonathan</first><last>DeCristofaro</last></author>
+      <author id="owen-rambow"><first>Owen</first><last>Rambow</last></author>
+      <author id="jonathan-decristofaro"><first>Jonathan</first><last>DeCristofaro</last></author>
       <author><first>Tanya</first><last>Korelsky</last></author>
-      <author><first>Benoit</first><last>Lavoie</last></author>
+      <author id="benoit-lavoie"><first>Benoit</first><last>Lavoie</last></author>
       <url hash="8f7fe495">W98-1409</url>
       <bibkey>barzilay-etal-1998-new</bibkey>
     </paper>
@@ -1894,9 +1894,9 @@
     </paper>
     <paper id="11">
       <title>Experiments Using Stochastic Search for Text Planning</title>
-      <author><first>Chris</first><last>Mellish</last></author>
+      <author id="chris-mellish"><first>Chris</first><last>Mellish</last></author>
       <author><first>Alistair</first><last>Knott</last></author>
-      <author><first>Jon</first><last>Oberlander</last></author>
+      <author id="jon-oberlander"><first>Jon</first><last>Oberlander</last></author>
       <author><first>Mick</first><last>O’Donnell</last></author>
       <url hash="32158ea8">W98-1411</url>
       <bibkey>mellish-etal-1998-experiments</bibkey>
@@ -1938,7 +1938,7 @@
     </paper>
     <paper id="17">
       <title>Planning Dialogue Contributions With New Information</title>
-      <author><first>Kristiina</first><last>Jokinen</last></author>
+      <author id="kristiina-jokinen"><first>Kristiina</first><last>Jokinen</last></author>
       <author><first>Hideki</first><last>Tanaka</last></author>
       <author><first>Akio</first><last>Yokoo</last></author>
       <url hash="e4c2431e">W98-1417</url>
@@ -1946,7 +1946,7 @@
     </paper>
     <paper id="18">
       <title>Generation of Noun Compounds in <fixed-case>H</fixed-case>ebrew: Can Syntactic Knowledge Be Fully Encapsulated?</title>
-      <author><first>Yael Dahan</first><last>Netzer</last></author>
+      <author id="yael-netzer"><first>Yael Dahan</first><last>Netzer</last></author>
       <author><first>Michael</first><last>Elhadad</last></author>
       <url hash="3479403f">W98-1418</url>
       <bibkey>netzer-elhadad-1998-generation</bibkey>
@@ -1954,14 +1954,14 @@
     <paper id="19">
       <title>Textual Economy Through Close Coupling of Syntax and Semantics</title>
       <author><first>Matthew</first><last>Stone</last></author>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <url hash="6b484f81">W98-1419</url>
       <bibkey>stone-webber-1998-textual</bibkey>
     </paper>
     <paper id="20">
       <title>A Language-Independent System for Generating Feature Structures from Interlingua Representations</title>
       <author><first>Murat</first><last>Temizsoy</last></author>
-      <author><first>Ilyas</first><last>Cicekli</last></author>
+      <author id="ilyas-cicekli"><first>Ilyas</first><last>Cicekli</last></author>
       <url hash="7b89bbb9">W98-1420</url>
       <bibkey>temizsoy-cicekli-1998-language</bibkey>
     </paper>
@@ -1980,7 +1980,7 @@
     </paper>
     <paper id="23">
       <title>Approaches to Surface Realization With <fixed-case>HPSG</fixed-case></title>
-      <author><first>Graham</first><last>Wilcock</last></author>
+      <author id="graham-wilcock"><first>Graham</first><last>Wilcock</last></author>
       <url hash="86ed36c4">W98-1423</url>
       <bibkey>wilcock-1998-approaches</bibkey>
     </paper>
@@ -2004,22 +2004,22 @@
     </paper>
     <paper id="26">
       <title>The Practical Value of N-Grams Is in Generation</title>
-      <author><first>Irene</first><last>Langkilde</last></author>
+      <author id="irene-langkilde"><first>Irene</first><last>Langkilde</last></author>
       <author><first>Kevin</first><last>Knight</last></author>
       <url hash="d680d8b4">W98-1426</url>
       <bibkey>langkilde-knight-1998-practical</bibkey>
     </paper>
     <paper id="27">
       <title>Generation as a Solution to Its Own Problem</title>
-      <author><first>Donia</first><last>Scott</last></author>
+      <author id="donia-scott"><first>Donia</first><last>Scott</last></author>
       <author><first>Richard</first><last>Power</last></author>
-      <author><first>Roger</first><last>Evans</last></author>
+      <author id="roger-evans"><first>Roger</first><last>Evans</last></author>
       <url hash="135ab04d">W98-1427</url>
       <bibkey>scott-etal-1998-generation</bibkey>
     </paper>
     <paper id="28">
       <title><fixed-case>EXEMPLARS</fixed-case>: A Practical, Extensible Framework For Dynamic Text Generation</title>
-      <author><first>Michael</first><last>White</last></author>
+      <author id="michael-white"><first>Michael</first><last>White</last></author>
       <author><first>Ted</first><last>Caldwell</last></author>
       <url hash="cbb70291">W98-1428</url>
       <bibkey>white-caldwell-1998-exemplars</bibkey>
@@ -2036,16 +2036,16 @@
       <title>System Demonstration Content Planning as the Basis for an Intelligent Tutoring System</title>
       <author><first>Reva</first><last>Freedman</last></author>
       <author><first>Stefan</first><last>Brandle</last></author>
-      <author><first>Michael</first><last>Glass</last></author>
+      <author id="michael-glass"><first>Michael</first><last>Glass</last></author>
       <author><first>Jung Hee</first><last>Kim</last></author>
       <author><first>Yujian</first><last>Zhou</last></author>
-      <author><first>Martha W.</first><last>Evens</last></author>
+      <author id="martha-evens"><first>Martha W.</first><last>Evens</last></author>
       <url hash="46a9362f">W98-1430</url>
       <bibkey>freedman-etal-1998-system</bibkey>
     </paper>
     <paper id="31">
       <title>System Demonstration <fixed-case>FLAUBERT</fixed-case>: An User Friendly System for Multilingual Text Generation</title>
-      <author><first>Frederic</first><last>Meunier</last></author>
+      <author id="frederic-meunier"><first>Frederic</first><last>Meunier</last></author>
       <author><first>Laurence</first><last>Danlos</last></author>
       <url hash="bedacb14">W98-1431</url>
       <bibkey>meunier-danlos-1998-system</bibkey>
@@ -2059,8 +2059,8 @@
     </paper>
     <paper id="33">
       <title>System Demonstration <fixed-case>G</fixed-case>oal<fixed-case>G</fixed-case>etter: Generation of Spoken Soccer Reports</title>
-      <author><first>Mariet</first><last>Theune</last></author>
-      <author><first>Esther</first><last>Klabbers</last></author>
+      <author id="mariet-theune"><first>Mariet</first><last>Theune</last></author>
+      <author id="esther-klabbers"><first>Esther</first><last>Klabbers</last></author>
       <url hash="fab3e272">W98-1433</url>
       <bibkey>theune-klabbers-1998-system</bibkey>
     </paper>
@@ -2074,7 +2074,7 @@
     </paper>
     <paper id="35">
       <title>System Demonstration Interactive Generation and Knowledge Administration in <fixed-case>M</fixed-case>ulti<fixed-case>M</fixed-case>eteo</title>
-      <author><first>Jose</first><last>Coch</last></author>
+      <author id="jose-coch"><first>Jose</first><last>Coch</last></author>
       <url hash="999d36f8">W98-1435</url>
       <bibkey>coch-1998-system</bibkey>
     </paper>
@@ -2084,15 +2084,15 @@
       <author><first>Teodora</first><last>Ratiu</last></author>
       <author><first>Maria</first><last>Ferencz</last></author>
       <author><first>Tonde-Csilla</first><last>Kovacs</last></author>
-      <author><first>Istvan</first><last>Nagy</last></author>
-      <author><first>Diana</first><last>Zaiu</last></author>
+      <author id="istvan-nagy-t"><first>Istvan</first><last>Nagy</last></author>
+      <author id="diana-inkpen"><first>Diana</first><last>Zaiu</last></author>
       <url hash="501fbf01">W98-1436</url>
       <bibkey>ferencz-etal-1998-romvox</bibkey>
     </paper>
     <paper id="37">
       <title><fixed-case>WYSIWYM</fixed-case>: knowledge editing with natural language feedback</title>
       <author><first>Richard</first><last>Power</last></author>
-      <author><first>Donia</first><last>Scott</last></author>
+      <author id="donia-scott"><first>Donia</first><last>Scott</last></author>
       <url hash="7ff38d5a">W98-1437</url>
       <bibkey>power-scott-1998-wysiwym</bibkey>
     </paper>
@@ -2101,7 +2101,7 @@
     <meta>
       <booktitle>Proceedings of the Third Conference on Empirical Methods for Natural Language Processing</booktitle>
       <url hash="34474dd1">W98-15</url>
-      <editor><first>Nancy</first><last>Ide</last></editor>
+      <editor id="nancy-ide"><first>Nancy</first><last>Ide</last></editor>
       <editor><first>Atro</first><last>Voutilainen</last></editor>
       <publisher>Association for Computational Linguistics</publisher>
       <address>Palacio de Exposiciones y Congresos, Granada, Spain</address>
@@ -2116,15 +2116,15 @@
     <paper id="1">
       <title>Dynamic Coreference-Based Summarization</title>
       <author><first>Breck</first><last>Baldwin</last></author>
-      <author><first>Thomas S.</first><last>Morton</last></author>
+      <author id="thomas-s-morton"><first>Thomas S.</first><last>Morton</last></author>
       <pages>1–6</pages>
       <url hash="b49bff55">W98-1501</url>
       <bibkey>baldwin-morton-1998-dynamic</bibkey>
     </paper>
     <paper id="2">
       <title>Multilingual Robust Anaphora Resolution</title>
-      <author><first>Ruslan</first><last>Mitkov</last></author>
-      <author><first>Lamia</first><last>Belguith</last></author>
+      <author id="ruslan-mitkov"><first>Ruslan</first><last>Mitkov</last></author>
+      <author id="lamia-hadrich-belguith"><first>Lamia</first><last>Belguith</last></author>
       <author><first>Malgorzata</first><last>Stys</last></author>
       <pages>7–16</pages>
       <url hash="779cdc29">W98-1502</url>
@@ -2133,7 +2133,7 @@
     <paper id="3">
       <title>Aligning Clattses in Parallel Texts</title>
       <author><first>Sotiris</first><last>Boutsis</last></author>
-      <author><first>Stelios</first><last>Piperidis</last></author>
+      <author id="stelios-piperidis"><first>Stelios</first><last>Piperidis</last></author>
       <pages>17–26</pages>
       <url hash="2e9bb4d3">W98-1503</url>
       <bibkey>boutsis-piperidis-1998-aligning</bibkey>
@@ -2156,15 +2156,15 @@
     <paper id="6">
       <title>Measures for Corpus Similarity and Homogeneity</title>
       <author><first>Adam</first><last>Kilgarriff</last></author>
-      <author><first>Tony</first><last>Rose</last></author>
+      <author id="tony-rose"><first>Tony</first><last>Rose</last></author>
       <pages>46–52</pages>
       <url hash="f8bd1cc9">W98-1506</url>
       <bibkey>kilgarriff-rose-1998-measures</bibkey>
     </paper>
     <paper id="7">
       <title>Word-Sense Distinguishability and Inter-Coder Agreement</title>
-      <author><first>Rebecca</first><last>Bruce</last></author>
-      <author><first>Janyce</first><last>Wiebe</last></author>
+      <author id="rebecca-bruce"><first>Rebecca</first><last>Bruce</last></author>
+      <author id="janyce-wiebe"><first>Janyce</first><last>Wiebe</last></author>
       <pages>53–60</pages>
       <url hash="b7df03a7">W98-1507</url>
       <bibkey>bruce-wiebe-1998-word</bibkey>
@@ -2200,14 +2200,14 @@
     <paper id="11">
       <title><fixed-case>J</fixed-case>apanese Dependency Structure Analysis based on Lexicalized Statistics</title>
       <author><last>Fujio</last><first>Masakazu</first></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>87–95</pages>
       <url hash="843fa9a8">W98-1511</url>
       <bibkey>fujio-matsumoto-1998-japanese</bibkey>
     </paper>
     <paper id="12">
       <title>A Comparison of Criteria for Maximum Entropy/ Minimum Divergence Feature Selection</title>
-      <author><first>Adam</first><last>Berger</last></author>
+      <author id="adam-berger"><first>Adam</first><last>Berger</last></author>
       <author><first>Harry</first><last>Printz</last></author>
       <pages>96–106</pages>
       <url hash="ae24952e">W98-1512</url>
@@ -2223,7 +2223,7 @@
     <meta>
       <booktitle>Proceedings of the 11th Nordic Conference of Computational Linguistics (<fixed-case>NODALIDA</fixed-case> 1998)</booktitle>
       <url hash="5add5084">W98-16</url>
-      <editor><first>Bente</first><last>Maegaard</last></editor>
+      <editor id="bente-maegaard"><first>Bente</first><last>Maegaard</last></editor>
       <publisher>Center for Sprogteknologi, University of Copenhagen, Denmark</publisher>
       <address>Copenhagen, Denmark</address>
       <month>March</month>
@@ -2237,7 +2237,7 @@
     <paper id="1">
       <title><fixed-case>LMT</fixed-case> at Tivoli Gardens</title>
       <author><first>Arendse</first><last>Bernth</last></author>
-      <author><first>Michael</first><last>McCord</last></author>
+      <author id="michael-c-mccord"><first>Michael</first><last>McCord</last></author>
       <pages>4–12</pages>
       <url hash="14339704">W98-1601</url>
       <bibkey>bernth-mccord-1998-lmt</bibkey>
@@ -2265,7 +2265,7 @@
     </paper>
     <paper id="5">
       <title>Structural Lexical Heuristics in the Automatic Analysis of <fixed-case>P</fixed-case>ortuguese</title>
-      <author><first>Eckhard</first><last>Bick</last></author>
+      <author id="eckhard-bick"><first>Eckhard</first><last>Bick</last></author>
       <pages>44–56</pages>
       <url hash="4fce50c2">W98-1605</url>
       <bibkey>bick-1998-structural</bibkey>
@@ -2280,7 +2280,7 @@
     </paper>
     <paper id="7">
       <title>A Chart-Based Framework for Grammar Checking. Initial Studies</title>
-      <author><first>Anna</first><last>Sågvall Hein</last></author>
+      <author id="anna-sagvall-hein"><first>Anna</first><last>Sågvall Hein</last></author>
       <pages>68–80</pages>
       <url hash="b2f064e2">W98-1607</url>
       <bibkey>sagvall-hein-1998-chart</bibkey>
@@ -2288,7 +2288,7 @@
     <paper id="8">
       <title><fixed-case>CP</fixed-case>-<fixed-case>UDOG</fixed-case>: An Algorithm for the Disambiguation of Compound Participles in <fixed-case>D</fixed-case>anish</title>
       <author><first>Jens</first><last>Ahlmann Hansen</last></author>
-      <author><first>Poul Søren</first><last>Kjærsgaard</last></author>
+      <author id="poul-soren-kjaersgaard"><first>Poul Søren</first><last>Kjærsgaard</last></author>
       <pages>81–86</pages>
       <url hash="3929d551">W98-1608</url>
       <bibkey>ahlmann-hansen-kjaersgaard-1998-cp</bibkey>
@@ -2302,7 +2302,7 @@
     </paper>
     <paper id="10">
       <title><fixed-case>CATCH</fixed-case>: A Program for Developing World Wide Web <fixed-case>CALL</fixed-case> Material</title>
-      <author><first>Erik F.</first><last>Tjong Kim Sang</last></author>
+      <author id="erik-tjong-kim-sang"><first>Erik F.</first><last>Tjong Kim Sang</last></author>
       <pages>94–99</pages>
       <url hash="f6b6841d">W98-1610</url>
       <bibkey>tjong-kim-sang-1998-catch</bibkey>
@@ -2318,14 +2318,14 @@
     </paper>
     <paper id="12">
       <title>Peeking Into the <fixed-case>D</fixed-case>anish Living Room. <fixed-case>I</fixed-case>nternet access to a large speech corpus</title>
-      <author><first>Peter Juel</first><last>Henrichsen</last></author>
+      <author id="peter-juel-henrichsen"><first>Peter Juel</first><last>Henrichsen</last></author>
       <pages>109–119</pages>
       <url hash="a1bf6876">W98-1612</url>
       <bibkey>henrichsen-1998-peeking</bibkey>
     </paper>
     <paper id="13">
       <title>Extraction of Translation Equivalents from Parallel Corpora</title>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>120–128</pages>
       <url hash="39c0e794">W98-1613</url>
       <bibkey>tiedemann-1998-extraction</bibkey>
@@ -2347,7 +2347,7 @@
     </paper>
     <paper id="16">
       <title>Logic for Part-of-Speech Tagging and Shallow Parsing</title>
-      <author><first>Torbjörn</first><last>Lager</last></author>
+      <author id="torbjorn-lager"><first>Torbjörn</first><last>Lager</last></author>
       <pages>152–159</pages>
       <url hash="7396e03a">W98-1616</url>
       <bibkey>lager-1998-logic</bibkey>
@@ -2375,7 +2375,7 @@
     </paper>
     <paper id="20">
       <title>Teaching and learning computational linguistics in an international setting</title>
-      <author><first>Koenraad</first><last>de Smedt</last></author>
+      <author id="koenraad-de-smedt"><first>Koenraad</first><last>de Smedt</last></author>
       <pages>186–189</pages>
       <url hash="ee6a7850">W98-1620</url>
       <bibkey>de-smedt-1998-teaching</bibkey>
diff --git a/data/xml/W99.xml b/data/xml/W99.xml
index 43cd9feb9f..0d209a027d 100644
--- a/data/xml/W99.xml
+++ b/data/xml/W99.xml
@@ -12,7 +12,7 @@
     </frontmatter>
     <paper id="1">
       <title>An Integrated Approach to Reference and Presupposition Resolution</title>
-      <author><first>Robert T.</first><last>Kasper</last></author>
+      <author id="robert-t-kasper"><first>Robert T.</first><last>Kasper</last></author>
       <author><first>Paul C.</first><last>Davis</last></author>
       <author><first>Craige</first><last>Roberts</last></author>
       <url hash="a4e00258">W99-0101</url>
@@ -28,14 +28,14 @@
       <title>Anaphora Resolution using Extended Centen’ng Algorithm in a Multi-modal Dialogue System</title>
       <author><first>Harksoo</first><last>Kim</last></author>
       <author><first>Jeong-Mi</first><last>Cho</last></author>
-      <author><first>Jungyun</first><last>Seo</last></author>
+      <author id="jungyun-seo"><first>Jungyun</first><last>Seo</last></author>
       <url hash="aca487ae">W99-0103</url>
       <bibkey>kim-etal-1999-anaphora</bibkey>
     </paper>
     <paper id="4">
       <title>Knowledge-Lean Coreference Resolution and its Relation to Textual Cohesion and Coherence</title>
-      <author><first>Sanda M.</first><last>Harabagiu</last></author>
-      <author><first>Steven J.</first><last>Maiorano</last></author>
+      <author id="sanda-harabagiu"><first>Sanda M.</first><last>Harabagiu</last></author>
+      <author id="steven-j-maiorano"><first>Steven J.</first><last>Maiorano</last></author>
       <url hash="d0f6ac03">W99-0104</url>
       <bibkey>harabagiu-maiorano-1999-knowledge</bibkey>
     </paper>
@@ -50,7 +50,7 @@
     <paper id="6">
       <title>Discourse Structure and Co-Reference: An Empirical Study</title>
       <author><first>Dan</first><last>Cristea</last></author>
-      <author><first>Nancy</first><last>Ide</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
       <author><first>Daniel</first><last>Marcu</last></author>
       <author><first>Valentin</first><last>Tablan</last></author>
       <url hash="6a3190db">W99-0106</url>
@@ -58,22 +58,22 @@
     </paper>
     <paper id="7">
       <title>Building a Tool for Annotating Reference in Discourse</title>
-      <author><first>Jonathan</first><last>DeCristofaro</last></author>
+      <author id="jonathan-decristofaro"><first>Jonathan</first><last>DeCristofaro</last></author>
       <author><first>Michael</first><last>Strube</last></author>
-      <author><first>Kathleen E.</first><last>McCoy</last></author>
+      <author id="kathleen-f-mccoy"><first>Kathleen E.</first><last>McCoy</last></author>
       <url hash="156b4c65">W99-0107</url>
       <bibkey>decristofaro-etal-1999-building</bibkey>
     </paper>
     <paper id="8">
       <title>Generating Anaphoric Expressions: Pronoun or Definite Description?</title>
-      <author><first>Kathleen E.</first><last>McCoy</last></author>
+      <author id="kathleen-f-mccoy"><first>Kathleen E.</first><last>McCoy</last></author>
       <author><first>Michael</first><last>Strube</last></author>
       <url hash="940f521d">W99-0108</url>
       <bibkey>mccoy-strube-1999-generating</bibkey>
     </paper>
     <paper id="9">
       <title>Cb or not Cb? Centering theory applied to <fixed-case>NLG</fixed-case></title>
-      <author><first>Rodger</first><last>Kibble</last></author>
+      <author id="rodger-kibble"><first>Rodger</first><last>Kibble</last></author>
       <url hash="13b17e7f">W99-0109</url>
       <bibkey>kibble-1999-cb</bibkey>
     </paper>
@@ -124,7 +124,7 @@
     </paper>
     <paper id="2">
       <title>Is Hillary Rodham <fixed-case>C</fixed-case>linton the President? Disambiguating Names across Documents</title>
-      <author><first>Yael</first><last>Ravin</last></author>
+      <author id="yael-ravin"><first>Yael</first><last>Ravin</last></author>
       <author><first>Zunaid</first><last>Kazi</last></author>
       <url hash="245480dd">W99-0202</url>
       <bibkey>ravin-kazi-1999-hillary</bibkey>
@@ -141,7 +141,7 @@
     <paper id="4">
       <title>Automatic Slide Presentation from Semantically Annotated Documents</title>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Koiti</first><last>Hasida</last></author>
+      <author id="koiti-hasida"><first>Koiti</first><last>Hasida</last></author>
       <url hash="ebe61664">W99-0204</url>
       <bibkey>utiyama-hasida-1999-automatic</bibkey>
     </paper>
@@ -149,7 +149,7 @@
       <title>Resolution of Indirect Anaphora in <fixed-case>J</fixed-case>apanese Sentences Using Examples: “<fixed-case>X</fixed-case> no <fixed-case>Y</fixed-case> (<fixed-case>Y</fixed-case> of <fixed-case>X</fixed-case>)”</title>
       <author><first>Masaki</first><last>Murata</last></author>
       <author><first>Hitoshi</first><last>Isahara</last></author>
-      <author><first>Makoto</first><last>Nagao</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
       <url hash="50bd5512">W99-0205</url>
       <bibkey>murata-etal-1999-resolution</bibkey>
     </paper>
@@ -157,15 +157,15 @@
       <title>Pronoun Resolution in <fixed-case>J</fixed-case>apanese Sentences Using Surface Expressions and Examples</title>
       <author><first>Masaki</first><last>Murata</last></author>
       <author><first>Hitoshi</first><last>Isahara</last></author>
-      <author><first>Makoto</first><last>Nagao</last></author>
+      <author id="makoto-nagao"><first>Makoto</first><last>Nagao</last></author>
       <url hash="9a79bc13">W99-0206</url>
       <bibkey>murata-etal-1999-pronoun</bibkey>
     </paper>
     <paper id="7">
       <title>Corpus-Based Anaphora Resolution Towards Antecedent Preference</title>
-      <author><first>Michael</first><last>Paul</last></author>
+      <author id="michael-paul"><first>Michael</first><last>Paul</last></author>
       <author><first>Kazuhide</first><last>Yamamoto</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <url hash="30664127">W99-0207</url>
       <bibkey>paul-etal-1999-corpus</bibkey>
     </paper>
@@ -186,30 +186,30 @@
     </paper>
     <paper id="10">
       <title>Coreference-oriented Interlingual Slot Structure &amp; Machine Translation</title>
-      <author><first>Jesus</first><last>Peral</last></author>
-      <author><first>Manuel</first><last>Palomar</last></author>
-      <author><first>Antonio</first><last>Ferrandez</last></author>
+      <author id="jesus-peral"><first>Jesus</first><last>Peral</last></author>
+      <author id="manuel-palomar"><first>Manuel</first><last>Palomar</last></author>
+      <author id="antonio-ferrandez"><first>Antonio</first><last>Ferrandez</last></author>
       <url hash="e2d90fb1">W99-0210</url>
       <bibkey>peral-etal-1999-coreference</bibkey>
     </paper>
     <paper id="11">
       <title>Using Coreference Chains for Text Summarization</title>
-      <author><first>Saliha</first><last>Azzam</last></author>
-      <author><first>Kevin</first><last>Humphreys</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="saliha-azzam"><first>Saliha</first><last>Azzam</last></author>
+      <author id="kevin-humphreys"><first>Kevin</first><last>Humphreys</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
       <url hash="2f843647">W99-0211</url>
       <bibkey>azzam-etal-1999-using</bibkey>
     </paper>
     <paper id="12">
       <title>Using Coreference for Question Answering</title>
-      <author><first>Thomas S.</first><last>Morton</last></author>
+      <author id="thomas-s-morton"><first>Thomas S.</first><last>Morton</last></author>
       <url hash="84efc21b">W99-0212</url>
       <bibkey>morton-1999-using</bibkey>
     </paper>
     <paper id="13">
       <title>What is coreference, and what should coreference annotation be?</title>
-      <author><first>Kees</first><last>van Deemter</last></author>
-      <author><first>Rodger</first><last>Kibble</last></author>
+      <author id="kees-van-deemter"><first>Kees</first><last>van Deemter</last></author>
+      <author id="rodger-kibble"><first>Rodger</first><last>Kibble</last></author>
       <url hash="b9d40971">W99-0213</url>
       <bibkey>van-deemter-kibble-1999-coreference</bibkey>
     </paper>
@@ -227,7 +227,7 @@
     <paper id="1">
       <title>Annotation Graphs as a Framework for Multidimensional Linguistic Data Analysis</title>
       <author><first>Steven</first><last>Bird</last></author>
-      <author><first>Mark</first><last>Liberman</last></author>
+      <author id="mark-liberman"><first>Mark</first><last>Liberman</last></author>
       <url hash="690161a0">W99-0301</url>
       <bibkey>bird-liberman-1999-annotation</bibkey>
     </paper>
@@ -240,7 +240,7 @@
     </paper>
     <paper id="3">
       <title>Argumentation Mark-Up: A Proposal</title>
-      <author><first>Jean-Francois</first><last>Delannoy</last></author>
+      <author id="jean-francois-delannoy"><first>Jean-Francois</first><last>Delannoy</last></author>
       <url hash="a287e6af">W99-0303</url>
       <bibkey>delannoy-1999-argumentation</bibkey>
     </paper>
@@ -275,10 +275,10 @@
     </paper>
     <paper id="6">
       <title>Tagging of Speech Acts and Dialogue Games in <fixed-case>S</fixed-case>panish Call Home</title>
-      <author><first>Lori</first><last>Levin</last></author>
+      <author id="lori-levin"><first>Lori</first><last>Levin</last></author>
       <author><first>Klaus</first><last>Ries</last></author>
       <author><first>Ann</first><last>Thyme-Gobbel</last></author>
-      <author><first>Alon</first><last>Lavie</last></author>
+      <author id="alon-lavie"><first>Alon</first><last>Lavie</last></author>
       <url hash="48079635">W99-0306</url>
       <bibkey>levin-etal-1999-tagging</bibkey>
     </paper>
@@ -292,7 +292,7 @@
     </paper>
     <paper id="8">
       <title>Tagging Psychotherapeutic Interviews for Linguistic Analysis</title>
-      <author><first>Jon David</first><last>Patrick</last></author>
+      <author id="jon-patrick"><first>Jon David</first><last>Patrick</last></author>
       <url hash="392a4e5b">W99-0308</url>
       <bibkey>patrick-1999-tagging</bibkey>
     </paper>
@@ -326,15 +326,15 @@
     </paper>
     <paper id="13">
       <title>A Two-level Approach to Coding Dialogue for Discourse Structure: Activities of the 1998 <fixed-case>DRI</fixed-case> Working Group on Higher-level Structures</title>
-      <author><first>David R.</first><last>Traum</last></author>
-      <author><first>Christine H.</first><last>Nakatani</last></author>
+      <author id="david-traum"><first>David R.</first><last>Traum</last></author>
+      <author id="christine-h-nakatani"><first>Christine H.</first><last>Nakatani</last></author>
       <url hash="877a8915">W99-0313</url>
       <bibkey>traum-nakatani-1999-two</bibkey>
     </paper>
     <paper id="14">
       <title>Automatically Extracting Grounding Tags from <fixed-case>BF</fixed-case> Tags</title>
       <author><first>Teresa</first><last>Zollo</last></author>
-      <author><first>Mark</first><last>Core</last></author>
+      <author id="mark-g-core"><first>Mark</first><last>Core</last></author>
       <url hash="f7f3f2e5">W99-0314</url>
       <bibkey>zollo-core-1999-automatically</bibkey>
     </paper>
@@ -356,7 +356,7 @@
     </paper>
     <paper id="2">
       <title>Eliciting Natural Speech From Non-Native Users: Collecting Speech Data for <fixed-case>LVCSR</fixed-case></title>
-      <author><first>Laura Mayfield</first><last>Tomokiyo</last></author>
+      <author id="laura-mayfield-tomokiyo"><first>Laura Mayfield</first><last>Tomokiyo</last></author>
       <author><first>Susanne</first><last>Burger</last></author>
       <url hash="423297fc">W99-0402</url>
       <bibkey>tomokiyo-burger-1999-eliciting</bibkey>
@@ -376,22 +376,22 @@
     </paper>
     <paper id="5">
       <title>Modeling the language assessment process and result: Proposed architecture for automatic oral proficiency assessment</title>
-      <author><first>Gina-Anne</first><last>Levow</last></author>
-      <author><first>Mari Broman</first><last>Olsen</last></author>
+      <author id="gina-anne-levow"><first>Gina-Anne</first><last>Levow</last></author>
+      <author id="mari-broman-olsen"><first>Mari Broman</first><last>Olsen</last></author>
       <url hash="15607375">W99-0405</url>
       <bibkey>levow-olsen-1999-modeling</bibkey>
     </paper>
     <paper id="6">
       <title>Dual Use of Linguistic Resources: Evaluation of <fixed-case>MT</fixed-case> Systems and Language Learners</title>
       <author><first>Lisa</first><last>Decrozant</last></author>
-      <author><first>Clare R.</first><last>Voss</last></author>
+      <author id="clare-voss"><first>Clare R.</first><last>Voss</last></author>
       <url hash="a96a2bc0">W99-0406</url>
       <bibkey>decrozant-voss-1999-dual</bibkey>
     </paper>
     <paper id="7">
       <title><fixed-case>FAME</fixed-case>: a Functional Annotation Meta-scheme for multi-modal and multi-lingual Parsing Evaluation</title>
       <author><first>Alessandro</first><last>Lenci</last></author>
-      <author><first>Simonetta</first><last>Montemagni</last></author>
+      <author id="simonetta-montemagni"><first>Simonetta</first><last>Montemagni</last></author>
       <author><first>Vito</first><last>Pirrelli</last></author>
       <author><first>Claudia</first><last>Soria</last></author>
       <url hash="2003398e">W99-0407</url>
@@ -399,8 +399,8 @@
     </paper>
     <paper id="8">
       <title>Modeling User Language Proficiency in a Writing Tutor for Deaf Learners of <fixed-case>E</fixed-case>nglish</title>
-      <author><first>Lisa N.</first><last>Michaud</last></author>
-      <author><first>Kathleen F.</first><last>McCoy</last></author>
+      <author id="lisa-n-michaud"><first>Lisa N.</first><last>Michaud</last></author>
+      <author id="kathleen-f-mccoy"><first>Kathleen F.</first><last>McCoy</last></author>
       <url hash="26b8c8f9">W99-0408</url>
       <bibkey>michaud-mccoy-1999-modeling</bibkey>
     </paper>
@@ -413,14 +413,14 @@
     </paper>
     <paper id="10">
       <title>A Web-Based System for Automatic Language Skill Assessment: <fixed-case>EVALING</fixed-case></title>
-      <author><first>Cedrick</first><last>Fairon</last></author>
+      <author id="cedrick-fairon"><first>Cedrick</first><last>Fairon</last></author>
       <url hash="61d8aa23">W99-0410</url>
       <bibkey>fairon-1999-web</bibkey>
     </paper>
     <paper id="11">
       <title>Automated Essay Scoring for Nonnative <fixed-case>E</fixed-case>nglish Speakers</title>
       <author><first>Jill</first><last>Burstein</last></author>
-      <author><first>Martin</first><last>Chodorow</last></author>
+      <author id="martin-chodorow"><first>Martin</first><last>Chodorow</last></author>
       <url hash="51c7d591">W99-0411</url>
       <bibkey>burstein-chodorow-1999-automated</bibkey>
     </paper>
@@ -437,16 +437,16 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et 2 - A Morphologically and Semantically Enhanced Resource</title>
-      <author><first>Sanda M.</first><last>Harabagiu</last></author>
+      <author id="sanda-harabagiu"><first>Sanda M.</first><last>Harabagiu</last></author>
       <author><first>George A.</first><last>Miller</last></author>
-      <author><first>Dan I.</first><last>Moldovan</last></author>
+      <author id="dan-moldovan"><first>Dan I.</first><last>Moldovan</last></author>
       <url hash="5588c0c6">W99-0501</url>
       <bibkey>harabagiu-etal-1999-wordnet</bibkey>
     </paper>
     <paper id="2">
       <title>A Case Study on Inter-Annotator Agreement for Word Sense Disambiguation</title>
       <author><first>Hwee Tou</first><last>Ng</last></author>
-      <author><first>Chung Yong</first><last>Lim</last></author>
+      <author id="chung-yong-lim"><first>Chung Yong</first><last>Lim</last></author>
       <author><first>Shou King</first><last>Foo</last></author>
       <url hash="2661abde">W99-0502</url>
       <bibkey>ng-etal-1999-case</bibkey>
@@ -455,16 +455,16 @@
       <title>Supervised Learning of Lexical Semantic Verb Classes Using Frequency Distributions</title>
       <author><first>Suzanne</first><last>Stevenson</last></author>
       <author><first>Paola</first><last>Merlo</last></author>
-      <author><first>Natalia Kariaeva</first><last>Rutgers</last></author>
+      <author id="natalia-kariaeva-rutgers"><first>Natalia Kariaeva</first><last>Rutgers</last></author>
       <url hash="da7d31cd">W99-0503</url>
       <bibkey>stevenson-etal-1999-supervised</bibkey>
     </paper>
     <paper id="4">
       <title>On the concept of diathesis alternations as semantic oppositions</title>
-      <author><first>Ana</first><last>Fernandez</last></author>
-      <author><first>M. Antonia</first><last>Marti</last></author>
-      <author><first>Gloria</first><last>Vazquez</last></author>
-      <author><first>Irene</first><last>Castellon</last></author>
+      <author id="ana-fernandez"><first>Ana</first><last>Fernandez</last></author>
+      <author id="m-antonia-marti"><first>M. Antonia</first><last>Marti</last></author>
+      <author id="gloria-vazquez"><first>Gloria</first><last>Vazquez</last></author>
+      <author id="irene-castellon"><first>Irene</first><last>Castellon</last></author>
       <url hash="5c4f1f06">W99-0504</url>
       <bibkey>fernandez-etal-1999-concept</bibkey>
     </paper>
@@ -476,20 +476,20 @@
     </paper>
     <paper id="6">
       <title>On Some Aspects of Lexical Standardization</title>
-      <author><first>Remi</first><last>Zajac</last></author>
+      <author id="remi-zajac"><first>Remi</first><last>Zajac</last></author>
       <url hash="68f81a3d">W99-0506</url>
       <bibkey>zajac-1999-aspects</bibkey>
     </paper>
     <paper id="7">
       <title><fixed-case>SIMPLE</fixed-case>- Semantic Information for Multifunctional Plurilingual Lexica: Some Examples of <fixed-case>D</fixed-case>anish” Concrete Nouns</title>
-      <author><first>Bolette Sandford</first><last>Pedersen</last></author>
+      <author id="bolette-sandford-pedersen"><first>Bolette Sandford</first><last>Pedersen</last></author>
       <author><first>Britt</first><last>Keson</last></author>
       <url hash="55af4616">W99-0507</url>
       <bibkey>pedersen-keson-1999-simple</bibkey>
     </paper>
     <paper id="8">
       <title>Parallel Translations as Sense Discriminators</title>
-      <author><first>Nancy</first><last>Ide</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
       <url hash="6f6c3f54">W99-0508</url>
       <bibkey>ide-1999-parallel</bibkey>
     </paper>
@@ -518,7 +518,7 @@
     <paper id="12">
       <title>Towards a Universal Index of Meaning</title>
       <author><first>Piek</first><last>Vossen</last></author>
-      <author><first>Wim</first><last>Peters</last></author>
+      <author id="wim-peters"><first>Wim</first><last>Peters</last></author>
       <author><first>Julio</first><last>Gonzalo</last></author>
       <url hash="95271ab5">W99-0512</url>
       <bibkey>vossen-etal-1999-towards</bibkey>
@@ -544,7 +544,7 @@
     </frontmatter>
     <paper id="1">
       <title>What’s Happened Since the First <fixed-case>SIGDAT</fixed-case> Meeting?</title>
-      <author><first>Kenneth Ward</first><last>Church</last></author>
+      <author id="kenneth-church"><first>Kenneth Ward</first><last>Church</last></author>
       <url hash="9f8f5215">W99-0601</url>
       <bibkey>church-1999-whats</bibkey>
     </paper>
@@ -564,9 +564,9 @@
     </paper>
     <paper id="4">
       <title>Improved Alignment Models for Statistical Machine Translation</title>
-      <author><first>Franz Josef</first><last>Och</last></author>
-      <author><first>Christoph</first><last>Tillmann</last></author>
-      <author><first>Hermann</first><last>Ney</last></author>
+      <author id="franz-josef-och"><first>Franz Josef</first><last>Och</last></author>
+      <author id="christoph-tillmann"><first>Christoph</first><last>Tillmann</last></author>
+      <author id="hermann-ney"><first>Hermann</first><last>Ney</last></author>
       <url hash="6de2ed74">W99-0604</url>
       <bibkey>och-etal-1999-improved</bibkey>
     </paper>
@@ -579,7 +579,7 @@
     </paper>
     <paper id="6">
       <title>Boosting Applied to Tagging and <fixed-case>PP</fixed-case> Attachment</title>
-      <author><first>Steven</first><last>Abney</last></author>
+      <author id="steven-abney"><first>Steven</first><last>Abney</last></author>
       <author><first>Robert E.</first><last>Schapire</last></author>
       <author><first>Yoram</first><last>Singer</last></author>
       <url hash="f01e01f3">W99-0606</url>
@@ -587,7 +587,7 @@
     </paper>
     <paper id="7">
       <title>Applying Extrasentential Context To Maximum Entropy Based Tagging With A Large Semantic And Syntactic Tagset</title>
-      <author><first>Ezra</first><last>Black</last></author>
+      <author id="ezra-black"><first>Ezra</first><last>Black</last></author>
       <author><first>Andrew</first><last>Finch</last></author>
       <author><first>Ruiqiang</first><last>Zhang</last></author>
       <url hash="309b6456">W99-0607</url>
@@ -595,8 +595,8 @@
     </paper>
     <paper id="8">
       <title>Improving <fixed-case>POS</fixed-case> Tagging Using Machine-Learning Techniques</title>
-      <author><first>Lluis</first><last>Marquez</last></author>
-      <author><first>Horacio</first><last>Rodriguez</last></author>
+      <author id="lluis-marquez"><first>Lluis</first><last>Marquez</last></author>
+      <author id="horacio-rodriguez"><first>Horacio</first><last>Rodriguez</last></author>
       <author><first>Josep</first><last>Carmona</last></author>
       <author><first>Josep</first><last>Montolio</last></author>
       <url hash="05007bd9">W99-0608</url>
@@ -620,21 +620,21 @@
     </paper>
     <paper id="11">
       <title>Noun Phrase Coreference as Clustering</title>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <author><first>Kiri</first><last>Wagstaff</last></author>
       <url hash="8c3e5cc9">W99-0611</url>
       <bibkey>cardie-wagstaff-1999-noun</bibkey>
     </paper>
     <paper id="12">
       <title>Language Independent Named Entity Recognition Combining Morphological and Contextual Evidence</title>
-      <author><first>Silviu</first><last>Cucerzan</last></author>
+      <author id="silviu-cucerzan"><first>Silviu</first><last>Cucerzan</last></author>
       <author><first>David</first><last>Yarowsky</last></author>
       <url hash="c70cd400">W99-0612</url>
       <bibkey>cucerzan-yarowsky-1999-language</bibkey>
     </paper>
     <paper id="13">
       <title>Unsupervised Models for Named Entity Classification</title>
-      <author><first>Michael</first><last>Collins</last></author>
+      <author id="michael-collins"><first>Michael</first><last>Collins</last></author>
       <author><first>Yoram</first><last>Singer</last></author>
       <url hash="6715480d">W99-0613</url>
       <bibkey>collins-singer-1999-unsupervised</bibkey>
@@ -649,19 +649,19 @@
       <title><fixed-case>HMM</fixed-case> Specialization with Selective Lexicalization</title>
       <author><first>Jin-Dong</first><last>Kim</last></author>
       <author><first>Sang-Zoo</first><last>Lee</last></author>
-      <author><first>Hae-Chang</first><last>Rim</last></author>
+      <author id="hae-chang-rim"><first>Hae-Chang</first><last>Rim</last></author>
       <url hash="ba9f470f">W99-0615</url>
       <bibkey>kim-etal-1999-hmm</bibkey>
     </paper>
     <paper id="16">
       <title>Why Doesn’t Natural Language Come Naturally?</title>
-      <author><first>Richard</first><last>Schwartz</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
       <url hash="1021d435">W99-0616</url>
       <bibkey>schwartz-1999-doesnt</bibkey>
     </paper>
     <paper id="17">
       <title><fixed-case>POS</fixed-case> Tags and Decision Trees for Language Modeling</title>
-      <author><first>Peter A.</first><last>Heeman</last></author>
+      <author id="peter-a-heeman"><first>Peter A.</first><last>Heeman</last></author>
       <url hash="08e9e275">W99-0617</url>
       <bibkey>heeman-1999-pos</bibkey>
     </paper>
@@ -676,14 +676,14 @@
     <paper id="19">
       <title>Word Informativeness and Automatic Pitch Accent Modeling</title>
       <author><first>Shimei</first><last>Pan</last></author>
-      <author><first>Kathleen R.</first><last>McKeown</last></author>
+      <author id="kathleen-mckeown"><first>Kathleen R.</first><last>McKeown</last></author>
       <url hash="0aceb370">W99-0619</url>
       <bibkey>pan-mckeown-1999-word</bibkey>
     </paper>
     <paper id="20">
       <title>Learning Discourse Relations with Active Data Selection</title>
       <author><first>Tadashi</first><last>Nomoto</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <url hash="daf015d0">W99-0620</url>
       <bibkey>nomoto-matsumoto-1999-learning</bibkey>
     </paper>
@@ -699,13 +699,13 @@
     <paper id="22">
       <title>Guiding a Well-Founded Parser with Corpus Statistics</title>
       <author><first>Amon</first><last>Seagull</last></author>
-      <author><first>Lenhart</first><last>Schubert</last></author>
+      <author id="lenhart-schubert"><first>Lenhart</first><last>Schubert</last></author>
       <url hash="172e8a2f">W99-0622</url>
       <bibkey>seagull-schubert-1999-guiding</bibkey>
     </paper>
     <paper id="23">
       <title>Exploiting Diversity in Natural Language Processing: Combining Parsers</title>
-      <author><first>John C.</first><last>Henderson</last></author>
+      <author id="john-henderson"><first>John C.</first><last>Henderson</last></author>
       <author><first>Eric</first><last>Brill</last></author>
       <url hash="4532b4bd">W99-0623</url>
       <bibkey>henderson-brill-1999-exploiting</bibkey>
@@ -713,7 +713,7 @@
     <paper id="24">
       <title>Lexical ambiguity and Information Retrieval revisited</title>
       <author><first>Julio</first><last>Gonzalo</last></author>
-      <author><first>Anselmo</first><last>Penas</last></author>
+      <author id="anselmo-penas"><first>Anselmo</first><last>Penas</last></author>
       <author><first>Felisa</first><last>Verdejo</last></author>
       <url hash="8d82fc46">W99-0624</url>
       <bibkey>gonzalo-etal-1999-lexical</bibkey>
@@ -721,21 +721,21 @@
     <paper id="25">
       <title>Detecting Text Similarity over Short Passages: Exploring Linguistic Feature Combinations via Machine Learning</title>
       <author><first>Vasileios</first><last>Hatzivassiloglou</last></author>
-      <author><first>Judith L.</first><last>Klavans</last></author>
+      <author id="judith-l-klavans"><first>Judith L.</first><last>Klavans</last></author>
       <author><first>Eleazar</first><last>Eskin</last></author>
       <url hash="b34f5cec">W99-0625</url>
       <bibkey>hatzivassiloglou-etal-1999-detecting</bibkey>
     </paper>
     <paper id="26">
       <title>Automatic Construction of Weighted String Similarity Measures</title>
-      <author><first>Jorg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jorg</first><last>Tiedemann</last></author>
       <url hash="fa1e9773">W99-0626</url>
       <bibkey>tiedemann-1999-automatic</bibkey>
     </paper>
     <paper id="27">
       <title>Taking the load off the conference chairs-towards a digital paper-routing assistant</title>
       <author><first>David</first><last>Yarowsky</last></author>
-      <author><first>Radu</first><last>Florian</last></author>
+      <author id="radu-florian"><first>Radu</first><last>Florian</last></author>
       <url hash="b3e9be86">W99-0627</url>
       <bibkey>yarowsky-florian-1999-taking</bibkey>
     </paper>
@@ -743,7 +743,7 @@
       <title><fixed-case>PP</fixed-case>-Attachment: A Committee Machine Approach</title>
       <author><first>Martha A.</first><last>Alegre</last></author>
       <author><first>Josep M.</first><last>Sopena</last></author>
-      <author><first>Agusti</first><last>Lloberas</last></author>
+      <author id="agusti-lloberas"><first>Agusti</first><last>Lloberas</last></author>
       <url hash="a49ff05c">W99-0628</url>
       <bibkey>alegre-etal-1999-pp</bibkey>
     </paper>
@@ -751,7 +751,7 @@
       <title>Cascaded Grammatical Relation Assignment</title>
       <author><first>Sabine</first><last>Buchholz</last></author>
       <author><first>Jorn</first><last>Veenstra</last></author>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <url hash="e0510be1">W99-0629</url>
       <bibkey>buchholz-etal-1999-cascaded</bibkey>
     </paper>
@@ -765,7 +765,7 @@
     <paper id="31">
       <title>An Iterative Approach to Estimating Frequencies over a Semantic Hierarchy</title>
       <author><first>Stephen</first><last>Clark</last></author>
-      <author><first>David</first><last>Weir</last></author>
+      <author id="david-weir"><first>David</first><last>Weir</last></author>
       <url hash="378a1164">W99-0631</url>
       <bibkey>clark-weir-1999-iterative</bibkey>
     </paper>
@@ -778,7 +778,7 @@
     </paper>
     <paper id="33">
       <title>Improving <fixed-case>B</fixed-case>rill’s <fixed-case>POS</fixed-case> Tagger for an Agglutinative Language</title>
-      <author><first>Beata</first><last>Megyesi</last></author>
+      <author id="beata-megyesi"><first>Beata</first><last>Megyesi</last></author>
       <url hash="37315b84">W99-0633</url>
       <bibkey>megyesi-1999-improving</bibkey>
     </paper>
@@ -786,14 +786,14 @@
       <title>Corpus-Based Learning for Noun Phrase Coreference Resolution</title>
       <author><first>Wee Meng</first><last>Soon</last></author>
       <author><first>Hwee Tou</first><last>Ng</last></author>
-      <author><first>Chung Yong</first><last>Lim</last></author>
+      <author id="chung-yong-lim"><first>Chung Yong</first><last>Lim</last></author>
       <url hash="aa0cd1b4">W99-0634</url>
       <bibkey>soon-etal-1999-corpus</bibkey>
     </paper>
     <paper id="35">
       <title>Corpus-Based Approach for Nominal Compound Analysis for <fixed-case>K</fixed-case>orean Based on Linguistic and Statistical Information</title>
       <author><first>Juntae</first><last>Yoon</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <author><first>Mansuk</first><last>Song</last></author>
       <url hash="0c23744a">W99-0635</url>
       <bibkey>yoon-etal-1999-corpus</bibkey>
@@ -811,21 +811,21 @@
     </frontmatter>
     <paper id="1">
       <title>Unsupervised Learning of Word Boundary with Description Length Gain</title>
-      <author><first>Chunyu</first><last>Kit</last></author>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="chunyu-kit"><first>Chunyu</first><last>Kit</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <url hash="1b928711">W99-0701</url>
       <bibkey>kit-wilks-1999-unsupervised</bibkey>
     </paper>
     <paper id="2">
       <title>Experiments in Unsupervised Entropy-Based Corpus Segmentation</title>
-      <author><first>André</first><last>Kempe</last></author>
+      <author id="andre-kempe"><first>André</first><last>Kempe</last></author>
       <url hash="285d0623">W99-0702</url>
       <bibkey>kempe-1999-experiments</bibkey>
     </paper>
     <paper id="3">
       <title>Practical Bootstrapping of Morphological Analyzers</title>
       <author><first>Kemal</first><last>Oflazer</last></author>
-      <author><first>Sergei</first><last>Nirenburg</last></author>
+      <author id="sergei-nirenburg"><first>Sergei</first><last>Nirenburg</last></author>
       <url hash="1ba26af0">W99-0703</url>
       <bibkey>oflazer-nirenburg-1999-practical</bibkey>
     </paper>
@@ -837,21 +837,21 @@
     </paper>
     <paper id="5">
       <title>The u-<fixed-case>TBL</fixed-case> System: Logic Programming Tools for Transformation-Based Learning</title>
-      <author><first>Torbjorn</first><last>Lager</last></author>
+      <author id="torbjorn-lager"><first>Torbjorn</first><last>Lager</last></author>
       <url hash="cfa5d041">W99-0705</url>
       <bibkey>lager-1999-u-tbl</bibkey>
     </paper>
     <paper id="6">
       <title>Learning Transformation Rules to Find Grammatical Relations</title>
       <author><first>Lisa</first><last>Ferro</last></author>
-      <author><first>Marc</first><last>Vilain</last></author>
-      <author><first>Alexander</first><last>Yeh</last></author>
+      <author id="marc-vilain"><first>Marc</first><last>Vilain</last></author>
+      <author id="alexander-yeh"><first>Alexander</first><last>Yeh</last></author>
       <url hash="1e4a02ec">W99-0706</url>
       <bibkey>ferro-etal-1999-learning</bibkey>
     </paper>
     <paper id="7">
       <title>Memory-Based Shallow Parsing</title>
-      <author><first>Walter</first><last>Daelemans</last></author>
+      <author id="walter-daelemans"><first>Walter</first><last>Daelemans</last></author>
       <author><first>Sabine</first><last>Buchholz</last></author>
       <author><first>Jorn</first><last>Veenstra</last></author>
       <url hash="6fba0d73">W99-0707</url>
@@ -876,7 +876,7 @@
     </frontmatter>
     <paper id="1">
       <title>Keynote Talk: Diamonds on my Windshield: the Use of Computer-based Instruction in Computational Linguistics</title>
-      <author><first>Jo</first><last>Calder</last></author>
+      <author id="jo-calder"><first>Jo</first><last>Calder</last></author>
       <url hash="5e44d1db">W99-0801</url>
       <bibkey>calder-1999-keynote</bibkey>
     </paper>
@@ -889,13 +889,13 @@
     <paper id="3">
       <title>Web tools for introductory computational linguistics</title>
       <author><first>Dafydd</first><last>Gibbon</last></author>
-      <author><first>Julie</first><last>Carson-Berndsen</last></author>
+      <author id="julie-carson-berndsen"><first>Julie</first><last>Carson-Berndsen</last></author>
       <url hash="52c402d2">W99-0803</url>
       <bibkey>gibbon-carson-berndsen-1999-web</bibkey>
     </paper>
     <paper id="4">
       <title>Intranet learning tools for <fixed-case>NLP</fixed-case></title>
-      <author><first>William J.</first><last>Black</last></author>
+      <author id="william-j-black"><first>William J.</first><last>Black</last></author>
       <author><first>Simon</first><last>Hill</last></author>
       <author><first>Mahmoud</first><last>Kassaei</last></author>
       <url hash="00f2f001">W99-0804</url>
@@ -924,7 +924,7 @@
       <title>An Open Distance Learning Web-Course for <fixed-case>NLP</fixed-case> in <fixed-case>IR</fixed-case></title>
       <author><first>Felisa</first><last>Verdejo</last></author>
       <author><first>Julio</first><last>Gonzalo</last></author>
-      <author><first>Anselmo</first><last>Penas</last></author>
+      <author id="anselmo-penas"><first>Anselmo</first><last>Penas</last></author>
       <url hash="48bbdff9">W99-0808</url>
       <bibkey>verdejo-etal-1999-open</bibkey>
     </paper>
@@ -941,14 +941,14 @@
     </frontmatter>
     <paper id="1">
       <title>Hiding a Semantic Hierarchy in a <fixed-case>M</fixed-case>arkov Model</title>
-      <author><first>Steven</first><last>Abney</last></author>
+      <author id="steven-abney"><first>Steven</first><last>Abney</last></author>
       <author><first>Marc</first><last>Light</last></author>
       <url hash="b8a12398">W99-0901</url>
       <bibkey>abney-light-1999-hiding</bibkey>
     </paper>
     <paper id="2">
       <title>The applications of unsupervised learning to <fixed-case>J</fixed-case>apanese grapheme-phoneme alignment</title>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Hozumi</first><last>Tanaka</last></author>
       <url hash="51120d9d">W99-0902</url>
       <bibkey>baldwin-tanaka-1999-applications</bibkey>
@@ -956,14 +956,14 @@
     <paper id="3">
       <title>Dual Distributional Verb Sense Disambiguation with Small Corpora and Machine Readable Dictionaries</title>
       <author><first>Jeong-Mi</first><last>Cho</last></author>
-      <author><first>Jungyun</first><last>Seo</last></author>
-      <author><first>Gil Chang</first><last>Kim</last></author>
+      <author id="jungyun-seo"><first>Jungyun</first><last>Seo</last></author>
+      <author id="gil-chang-kim"><first>Gil Chang</first><last>Kim</last></author>
       <url hash="afeb40ed">W99-0903</url>
       <bibkey>cho-etal-1999-dual</bibkey>
     </paper>
     <paper id="4">
       <title>Unsupervised learning of derivational morphology from inflectional lexicons</title>
-      <author><first>Eric</first><last>Gaussier</last></author>
+      <author id="eric-gaussier"><first>Eric</first><last>Gaussier</last></author>
       <url hash="3267a4a7">W99-0904</url>
       <bibkey>gaussier-1999-unsupervised</bibkey>
     </paper>
@@ -1066,7 +1066,7 @@
     <paper id="7">
       <title>The shortcomings of a tagger</title>
       <author><first>Kristin</first><last>Hagen</last></author>
-      <author><first>Janne Bondi</first><last>Johannessen</last></author>
+      <author id="janne-bondi-johannessen"><first>Janne Bondi</first><last>Johannessen</last></author>
       <author><first>Anders</first><last>Nøklestad</last></author>
       <pages>66–75</pages>
       <url hash="95dbe646">W99-1007</url>
@@ -1082,15 +1082,15 @@
     </paper>
     <paper id="9">
       <title>Extracting Keywords from Digital Document Collections</title>
-      <author id="anna-jonsson"><first>Anna</first><last>Jonsson</last></author>
+      <author><first>Anna</first><last>Jonsson</last></author>
       <pages>83–90</pages>
       <url hash="30ec355e">W99-1009</url>
       <bibkey>jonsson-2000-extracting</bibkey>
     </paper>
     <paper id="10">
       <title>Ontologically Supported Semantic Matching</title>
-      <author><first>Atanas K.</first><last>Kiryakov</last></author>
-      <author><first>Kiril Iv.</first><last>Simov</last></author>
+      <author id="atanas-kiryakov"><first>Atanas K.</first><last>Kiryakov</last></author>
+      <author id="kiril-simov"><first>Kiril Iv.</first><last>Simov</last></author>
       <pages>91–102</pages>
       <url hash="305a49e9">W99-1010</url>
       <bibkey>kiryakov-simov-2000-ontologically</bibkey>
@@ -1104,7 +1104,7 @@
     </paper>
     <paper id="12">
       <title>Towards a Finite-State Parser for <fixed-case>S</fixed-case>wedish</title>
-      <author><first>Beáta</first><last>Megyesi</last></author>
+      <author id="beata-megyesi"><first>Beáta</first><last>Megyesi</last></author>
       <author><first>Sara</first><last>Rydin</last></author>
       <pages>115–123</pages>
       <url hash="6fc2d0c2">W99-1012</url>
@@ -1144,7 +1144,7 @@
     <paper id="17">
       <title>Designing a System for <fixed-case>S</fixed-case>wedish Spoken Document Retrieval</title>
       <author><first>Botond</first><last>Pakucs</last></author>
-      <author><first>Björn</first><last>Gambäck</last></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gambäck</last></author>
       <pages>162–173</pages>
       <url hash="874b4489">W99-1017</url>
       <bibkey>pakucs-gamback-2000-designing</bibkey>
@@ -1159,7 +1159,7 @@
     <paper id="19">
       <title>An Information Retrieval System with Cooperative Behaviour</title>
       <author><first>Paulo</first><last>Quaresma</last></author>
-      <author><first>Irene</first><last>Pimenta Rodrigues</last></author>
+      <author id="irene-rodrigues"><first>Irene</first><last>Pimenta Rodrigues</last></author>
       <pages>182–190</pages>
       <url hash="0a0f3c27">W99-1019</url>
       <bibkey>quaresma-pimenta-rodrigues-2000-information</bibkey>
@@ -1174,7 +1174,7 @@
     </paper>
     <paper id="21">
       <title>Automatic proofreading for <fixed-case>N</fixed-case>orwegian: The challenges of lexical and grammatical variation</title>
-      <author><first>Koenraad</first><last>de Smedt</last></author>
+      <author id="koenraad-de-smedt"><first>Koenraad</first><last>de Smedt</last></author>
       <author><first>Victoria</first><last>Rosén</last></author>
       <pages>206–215</pages>
       <url hash="0b583c28">W99-1021</url>
@@ -1182,7 +1182,7 @@
     </paper>
     <paper id="22">
       <title>Word Alignment Step by Step</title>
-      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <author id="jorg-tiedemann"><first>Jörg</first><last>Tiedemann</last></author>
       <pages>216–227</pages>
       <url hash="b1a0d8c8">W99-1022</url>
       <bibkey>tiedemann-2000-word</bibkey>
diff --git a/data/xml/X93.xml b/data/xml/X93.xml
index 087822e2d2..9517b08183 100644
--- a/data/xml/X93.xml
+++ b/data/xml/X93.xml
@@ -15,7 +15,7 @@
     </frontmatter>
     <paper id="1">
       <title><fixed-case>TIPSTER</fixed-case> Program Overview</title>
-      <author><first>Roberta H.</first><last>Merchant</last></author>
+      <author id="roberta-h-merchant"><first>Roberta H.</first><last>Merchant</last></author>
       <doi>10.3115/1119149.1119151</doi>
       <pages>1–2</pages>
       <url hash="5d91abc4">X93-1001</url>
@@ -31,7 +31,7 @@
     </paper>
     <paper id="3">
       <title>The <fixed-case>M</fixed-case>essage <fixed-case>U</fixed-case>nderstanding <fixed-case>C</fixed-case>onferences</title>
-      <author><first>Beth M.</first><last>Sundheim</last></author>
+      <author id="beth-m-sundheim"><first>Beth M.</first><last>Sundheim</last></author>
       <doi>10.3115/1119149.1119153</doi>
       <pages>5–5</pages>
       <url hash="7a56d369">X93-1003</url>
@@ -73,7 +73,7 @@
       <title><fixed-case>INQUERY</fixed-case> System Overview</title>
       <author><first>John</first><last>Broglio</last></author>
       <author><first>James P.</first><last>Callan</last></author>
-      <author><first>W. Bruce</first><last>Croft</last></author>
+      <author id="w-bruce-croft"><first>W. Bruce</first><last>Croft</last></author>
       <doi>10.3115/1119149.1119159</doi>
       <pages>47–67</pages>
       <url hash="bdaf5c09">X93-1008</url>
@@ -82,7 +82,7 @@
     <paper id="9">
       <title><fixed-case>TIPSTER</fixed-case> Phase <fixed-case>I</fixed-case> Final Report</title>
       <author><first>Bill</first><last>Caid</last></author>
-      <author><first>Stephen</first><last>Gallant</last></author>
+      <author id="stephen-l-gallant"><first>Stephen</first><last>Gallant</last></author>
       <author><first>Joel</first><last>Carleton</last></author>
       <author><first>David</first><last>Sudbeck</last></author>
       <doi>10.3115/1119149.1119160</doi>
@@ -92,8 +92,8 @@
     </paper>
     <paper id="10">
       <title><fixed-case>DR</fixed-case>-<fixed-case>LINK</fixed-case> System: Phase <fixed-case>I</fixed-case> Summary</title>
-      <author><first>Elizabeth D.</first><last>Liddy</last></author>
-      <author><first>Sung H.</first><last>Myaeng</last></author>
+      <author id="elizabeth-d-liddy"><first>Elizabeth D.</first><last>Liddy</last></author>
+      <author id="sung-hyon-myaeng"><first>Sung H.</first><last>Myaeng</last></author>
       <doi>10.3115/1119149.1119161</doi>
       <pages>93–112</pages>
       <url hash="c0968c7c">X93-1010</url>
@@ -117,7 +117,7 @@
     </paper>
     <paper id="13">
       <title>Tasks, Domains, and Languages for Information Extraction</title>
-      <author><first>Boyan</first><last>Onyshkevych</last></author>
+      <author id="boyan-onyshkevych"><first>Boyan</first><last>Onyshkevych</last></author>
       <author><first>Mary Ellen</first><last>Okurowski</last></author>
       <author><first>Lynn</first><last>Carlson</last></author>
       <doi>10.3115/1119149.1119165</doi>
@@ -128,7 +128,7 @@
     <paper id="14">
       <title>Corpora and Data Preparation for Information Extraction</title>
       <author><first>Lynn</first><last>Carlson</last></author>
-      <author><first>Boyan</first><last>Onyshkevych</last></author>
+      <author id="boyan-onyshkevych"><first>Boyan</first><last>Onyshkevych</last></author>
       <author><first>Mary Ellen</first><last>Okurowski</last></author>
       <doi>10.3115/1119149.1119166</doi>
       <pages>135–139</pages>
@@ -137,7 +137,7 @@
     </paper>
     <paper id="15">
       <title>Template Design for Information Extraction</title>
-      <author><first>Boyan</first><last>Onyshkevych</last></author>
+      <author id="boyan-onyshkevych"><first>Boyan</first><last>Onyshkevych</last></author>
       <doi>10.3115/1119149.1119167</doi>
       <pages>141–145</pages>
       <url hash="2e3582cd">X93-1015</url>
@@ -145,7 +145,7 @@
     </paper>
     <paper id="16">
       <title><fixed-case>TIPSTER/MUC</fixed-case>-5 Information Extraction System Evaluation</title>
-      <author><first>Beth M.</first><last>Sundheim</last></author>
+      <author id="beth-m-sundheim"><first>Beth M.</first><last>Sundheim</last></author>
       <doi>10.3115/1119149.1119168</doi>
       <pages>147–163</pages>
       <url hash="e661f9f1">X93-1016</url>
@@ -153,7 +153,7 @@
     </paper>
     <paper id="17">
       <title>An Analysis of the Joint Venture <fixed-case>J</fixed-case>apanese Text Prototype and Its Effect on System Performance</title>
-      <author><first>Steve</first><last>Moiorano</last></author>
+      <author id="steven-j-maiorano"><first>Steve</first><last>Moiorano</last></author>
       <doi>10.3115/1119149.1119169</doi>
       <pages>165–178</pages>
       <url hash="59e4fe0d">X93-1017</url>
@@ -169,11 +169,11 @@
     </paper>
     <paper id="19">
       <title><fixed-case>BBN</fixed-case>’s <fixed-case>PLUM</fixed-case> Probabilistic Language Understanding System</title>
-      <author><first>Ralph</first><last>Weischedel</last></author>
-      <author><first>Damaris</first><last>Ayuso</last></author>
-      <author><first>Heidi</first><last>Fox</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
+      <author id="damaris-ayuso"><first>Damaris</first><last>Ayuso</last></author>
+      <author id="heidi-fox"><first>Heidi</first><last>Fox</last></author>
       <author><first>Tomoyoshi</first><last>Matsukawa</last></author>
-      <author><first>Constantine</first><last>Papageorgiou</last></author>
+      <author id="constantine-papageorgiou"><first>Constantine</first><last>Papageorgiou</last></author>
       <author><first>Dawn</first><last>MacLaughlin</last></author>
       <author><first>Masaichiro</first><last>Kitagawa</last></author>
       <author><first>Tsutomu</first><last>Sakai</last></author>
@@ -188,14 +188,14 @@
     </paper>
     <paper id="20">
       <title>The <fixed-case>TIPSTER/SHOGUN</fixed-case> Project</title>
-      <author><first>Paul S.</first><last>Jacobs</last></author>
-      <author><first>George</first><last>Krupka</last></author>
-      <author><first>Lisa</first><last>Rau</last></author>
-      <author><first>Michael L.</first><last>Mauldin</last></author>
+      <author id="paul-s-jacobs"><first>Paul S.</first><last>Jacobs</last></author>
+      <author id="george-krupka"><first>George</first><last>Krupka</last></author>
+      <author id="lisa-rau"><first>Lisa</first><last>Rau</last></author>
+      <author id="michael-l-mauldin"><first>Michael L.</first><last>Mauldin</last></author>
       <author><first>Teruko</first><last>Mitamura</last></author>
       <author><first>Tsuyoshi</first><last>Kitani</last></author>
       <author><first>Ira</first><last>Sider</last></author>
-      <author><first>Lois</first><last>Childs</last></author>
+      <author id="lois-c-childs"><first>Lois</first><last>Childs</last></author>
       <doi>10.3115/1119149.1119172</doi>
       <pages>209–221</pages>
       <url hash="0d8026ad">X93-1020</url>
@@ -203,15 +203,15 @@
     </paper>
     <paper id="21">
       <title><fixed-case>CRL</fixed-case>/<fixed-case>B</fixed-case>randeis: The <fixed-case>D</fixed-case>iderot System</title>
-      <author><first>Jim</first><last>Cowie</last></author>
-      <author><first>Louise</first><last>Guthrie</last></author>
-      <author><last>Wang</last><first>Jin</first></author>
-      <author><first>William</first><last>Ogden</last></author>
-      <author><first>James</first><last>Pustejovsky</last></author>
+      <author id="jim-cowie"><first>Jim</first><last>Cowie</last></author>
+      <author id="louise-guthrie"><first>Louise</first><last>Guthrie</last></author>
+      <author><first>Jin</first><last>Wang</last></author>
+      <author id="william-c-ogden"><first>William</first><last>Ogden</last></author>
+      <author id="james-pustejovsky"><first>James</first><last>Pustejovsky</last></author>
       <author><first>Rong</first><last>Wang</last></author>
-      <author><first>Takahiro</first><last>Wakao</last></author>
-      <author><first>Scott</first><last>Waterman</last></author>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="takahiro-wakao"><first>Takahiro</first><last>Wakao</last></author>
+      <author><last>Waterman</last><first>Scott</first></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <doi>10.3115/1119149.1119173</doi>
       <pages>223–239</pages>
       <url hash="2707dcfb">X93-1021</url>
@@ -224,7 +224,7 @@
       <author id="stephen-soderland"><first>S.</first><last>Soderland</last></author>
       <author id="ellen-riloff"><first>E.</first><last>Riloff</last></author>
       <author id="claire-cardie"><first>C.</first><last>Cardie</last></author>
-      <author><first>J.</first><last>Peterson</last></author>
+      <author id="j-peterson"><first>J.</first><last>Peterson</last></author>
       <author id="fangfang-feng"><first>F.</first><last>Feng</last></author>
       <doi>10.3115/1119149.1119174</doi>
       <pages>241–256</pages>
@@ -233,8 +233,8 @@
     </paper>
     <paper id="23">
       <title>Dictionary Construction by Domain Experts</title>
-      <author><first>Ellen</first><last>Riloff</last></author>
-      <author><first>Wendy G.</first><last>Lehnert</last></author>
+      <author id="ellen-riloff"><first>Ellen</first><last>Riloff</last></author>
+      <author id="wendy-lehnert"><first>Wendy G.</first><last>Lehnert</last></author>
       <doi>10.3115/1119149.1119175</doi>
       <pages>257–259</pages>
       <url hash="cc99c16b">X93-1023</url>
diff --git a/data/xml/X96.xml b/data/xml/X96.xml
index 2c1f5f4b3d..d2421b3862 100644
--- a/data/xml/X96.xml
+++ b/data/xml/X96.xml
@@ -39,7 +39,7 @@
     </paper>
     <paper id="4">
       <title>Technology Transfer: Observations from the <fixed-case>TIPSTER</fixed-case> Text Program</title>
-      <author><first>Sarah M.</first><last>Taylor</last></author>
+      <author id="sarah-taylor"><first>Sarah M.</first><last>Taylor</last></author>
       <doi>10.3115/1119018.1119023</doi>
       <pages>23–32</pages>
       <url hash="172fa929">X96-1004</url>
@@ -55,7 +55,7 @@
     </paper>
     <paper id="6">
       <title>The <fixed-case>M</fixed-case>essage <fixed-case>U</fixed-case>nderstanding <fixed-case>C</fixed-case>onferences</title>
-      <author><first>Beth M.</first><last>Sundheim</last></author>
+      <author id="beth-m-sundheim"><first>Beth M.</first><last>Sundheim</last></author>
       <doi>10.3115/1119018.1119025</doi>
       <pages>35–37</pages>
       <url hash="0c48f450">X96-1006</url>
@@ -88,7 +88,7 @@
     </paper>
     <paper id="10">
       <title>The <fixed-case>L</fixed-case>ockheed <fixed-case>M</fixed-case>artin <fixed-case>TIPSTER</fixed-case> <fixed-case>II</fixed-case> Project</title>
-      <author><first>Steven</first><last>Maiorano</last></author>
+      <author id="steven-j-maiorano"><first>Steven</first><last>Maiorano</last></author>
       <doi>10.3115/1119018.1119030</doi>
       <pages>47–48</pages>
       <url hash="f94b860d">X96-1010</url>
@@ -96,7 +96,7 @@
     </paper>
     <paper id="11">
       <title><fixed-case>C</fixed-case>ervantes - A System Supporting Text Analysis</title>
-      <author><first>Jim</first><last>Cowie</last></author>
+      <author id="jim-cowie"><first>Jim</first><last>Cowie</last></author>
       <doi>10.3115/1119018.1119031</doi>
       <pages>49–49</pages>
       <url hash="754ad1e2">X96-1011</url>
@@ -104,7 +104,7 @@
     </paper>
     <paper id="12">
       <title>The <fixed-case>NYU</fixed-case> <fixed-case>TIPSTER</fixed-case> <fixed-case>II</fixed-case> Project</title>
-      <author><first>Sarah M.</first><last>Taylor</last></author>
+      <author id="sarah-taylor"><first>Sarah M.</first><last>Taylor</last></author>
       <doi>10.3115/1119018.1119032</doi>
       <pages>51–51</pages>
       <url hash="f53acea2">X96-1012</url>
@@ -112,7 +112,7 @@
     </paper>
     <paper id="13">
       <title><fixed-case>SRA</fixed-case> Participation in <fixed-case>TIPSTER</fixed-case> Phase <fixed-case>II</fixed-case></title>
-      <author><first>Lisa</first><last>Rau</last></author>
+      <author id="lisa-rau"><first>Lisa</first><last>Rau</last></author>
       <doi>10.3115/1119018.1119033</doi>
       <pages>53–53</pages>
       <url hash="993c5041">X96-1013</url>
@@ -120,7 +120,7 @@
     </paper>
     <paper id="14">
       <title>The <fixed-case>SRI</fixed-case> <fixed-case>TIPSTER</fixed-case> <fixed-case>II</fixed-case> Project</title>
-      <author><first>Steven</first><last>Maiorano</last></author>
+      <author id="steven-j-maiorano"><first>Steven</first><last>Maiorano</last></author>
       <doi>10.3115/1119018.1119034</doi>
       <pages>55–56</pages>
       <url hash="ed940a1c">X96-1014</url>
@@ -147,7 +147,7 @@
       <title><fixed-case>C</fixed-case>able <fixed-case>A</fixed-case>bstracting and <fixed-case>IN</fixed-case>dexing <fixed-case>S</fixed-case>ystem (<fixed-case>CANIS</fixed-case>) Prototype</title>
       <author><first>Ira</first><last>Sider</last></author>
       <author><first>Jeffrey</first><last>Baker</last></author>
-      <author><first>Deborah</first><last>Brady</last></author>
+      <author id="deborah-brady"><first>Deborah</first><last>Brady</last></author>
       <author><first>Lynne</first><last>Higbie</last></author>
       <author><first>Tom</first><last>Howard</last></author>
       <doi>10.3115/1119018.1119038</doi>
@@ -167,9 +167,9 @@
     <paper id="19">
       <title>The <fixed-case>HOOKAH</fixed-case> Information Extraction System</title>
       <author><first>Chris</first><last>Barclay</last></author>
-      <author><first>Sean</first><last>Boisen</last></author>
+      <author id="sean-boisen"><first>Sean</first><last>Boisen</last></author>
       <author><first>Clinton</first><last>Hyde</last></author>
-      <author><first>Ralph</first><last>Weischedel</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
       <doi>10.3115/1119018.1119040</doi>
       <pages>79–82</pages>
       <url hash="1bc7af10">X96-1019</url>
@@ -185,7 +185,7 @@
     </paper>
     <paper id="21">
       <title><fixed-case>O</fixed-case>leada: User-Centered <fixed-case>TIPSTER</fixed-case> Technology for Language Instruction</title>
-      <author><first>William C.</first><last>Ogden</last></author>
+      <author id="william-c-ogden"><first>William C.</first><last>Ogden</last></author>
       <author><first>Philip</first><last>Bernick</last></author>
       <doi>10.3115/1119018.1119042</doi>
       <pages>85–90</pages>
@@ -211,7 +211,7 @@
     <paper id="24">
       <title>The <fixed-case>T</fixed-case>emple <fixed-case>T</fixed-case>ranslator’s <fixed-case>W</fixed-case>orkstation Project</title>
       <author><first>Michelle</first><last>Vanni</last></author>
-      <author><first>Remi</first><last>Zajac</last></author>
+      <author id="remi-zajac"><first>Remi</first><last>Zajac</last></author>
       <doi>10.3115/1119018.1119045</doi>
       <pages>101–106</pages>
       <url hash="2824f1c3">X96-1024</url>
@@ -227,15 +227,15 @@
     </paper>
     <paper id="26">
       <title><fixed-case>C</fixed-case>hinese Information Extraction and Retrieval</title>
-      <author><first>Sean</first><last>Boisen</last></author>
-      <author><first>Michael</first><last>Crystal</last></author>
+      <author id="sean-boisen"><first>Sean</first><last>Boisen</last></author>
+      <author id="michael-crystal"><first>Michael</first><last>Crystal</last></author>
       <author><first>Erik</first><last>Peterson</last></author>
-      <author><first>Ralph</first><last>Weischedel</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
       <author><first>John</first><last>Broglio</last></author>
       <author><first>Jamie</first><last>Callan</last></author>
-      <author><first>Bruce</first><last>Croft</last></author>
+      <author id="w-bruce-croft"><first>Bruce</first><last>Croft</last></author>
       <author><first>Theresa</first><last>Hand</last></author>
-      <author><first>Thomas</first><last>Keenan</last></author>
+      <author id="thomas-a-keenan"><first>Thomas</first><last>Keenan</last></author>
       <author><first>Mary Ellen</first><last>Okurowski</last></author>
       <doi>10.3115/1119018.1119047</doi>
       <pages>109–119</pages>
@@ -244,10 +244,10 @@
     </paper>
     <paper id="27">
       <title><fixed-case>TIPSTER</fixed-case>-Compatible Projects at <fixed-case>S</fixed-case>heffield</title>
-      <author><first>Hamish</first><last>Cunningham</last></author>
-      <author><first>Kevin</first><last>Humphreys</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="hamish-cunningham"><first>Hamish</first><last>Cunningham</last></author>
+      <author id="kevin-humphreys"><first>Kevin</first><last>Humphreys</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <doi>10.3115/1119018.1119048</doi>
       <pages>121–123</pages>
       <url hash="81cb7cb1">X96-1027</url>
@@ -255,11 +255,11 @@
     </paper>
     <paper id="28">
       <title>Progress in Information Extraction</title>
-      <author><first>Ralph</first><last>Weischedel</last></author>
-      <author><first>Sean</first><last>Boisen</last></author>
-      <author><first>Daniel</first><last>Bikel</last></author>
-      <author><first>Robert</first><last>Bobrow</last></author>
-      <author><first>Michael</first><last>Crystal</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
+      <author id="sean-boisen"><first>Sean</first><last>Boisen</last></author>
+      <author id="daniel-m-bikel"><first>Daniel</first><last>Bikel</last></author>
+      <author id="robert-bobrow"><first>Robert</first><last>Bobrow</last></author>
+      <author id="michael-crystal"><first>Michael</first><last>Crystal</last></author>
       <author><first>William</first><last>Ferguson</last></author>
       <author><first>Allan</first><last>Wechsler</last></author>
       <author><first/><last>The PLUM Research Group</last></author>
@@ -270,7 +270,7 @@
     </paper>
     <paper id="29">
       <title>The Role of Syntax in Information Extraction</title>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <doi>10.3115/1119018.1119051</doi>
       <pages>139–142</pages>
       <url hash="b6ff1fd7">X96-1029</url>
@@ -278,7 +278,7 @@
     </paper>
     <paper id="30">
       <title>Natural Language Information Retrieval: <fixed-case>TIPSTER</fixed-case>-2 Final Report</title>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
       <doi>10.3115/1119018.1119052</doi>
       <pages>143–148</pages>
       <url hash="28aeb430">X96-1030</url>
@@ -304,7 +304,7 @@
     </paper>
     <paper id="33">
       <title>A Simple Probabilistic Approach to Classification and Routing</title>
-      <author><first>Louise</first><last>Guthrie</last></author>
+      <author id="louise-guthrie"><first>Louise</first><last>Guthrie</last></author>
       <author><first>James</first><last>Leistensnider</last></author>
       <doi>10.3115/1119018.1119055</doi>
       <pages>167–177</pages>
@@ -313,7 +313,7 @@
     </paper>
     <paper id="34">
       <title>An Evaluation of Coreference Resolution Strategies for Acquiring Associated Information</title>
-      <author><first>Lois C.</first><last>Childs</last></author>
+      <author id="lois-c-childs"><first>Lois C.</first><last>Childs</last></author>
       <doi>10.3115/1119018.1119056</doi>
       <pages>179–184</pages>
       <url hash="e636fa51">X96-1034</url>
@@ -321,7 +321,7 @@
     </paper>
     <paper id="35">
       <title>Advances in Multilingual Text Retrieval</title>
-      <author><first>Mark</first><last>Davis</last></author>
+      <author id="mark-w-davis"><first>Mark</first><last>Davis</last></author>
       <doi>10.3115/1119018.1119057</doi>
       <pages>185–194</pages>
       <url hash="85042e42">X96-1035</url>
@@ -329,8 +329,8 @@
     </paper>
     <paper id="36">
       <title>Integration of Document Detection and Information Extraction</title>
-      <author><first>Louise</first><last>Guthrie</last></author>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="louise-guthrie"><first>Louise</first><last>Guthrie</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
       <author><last>Wang</last><first>Jin</first></author>
       <author><first>Fang</first><last>Lin</last></author>
       <doi>10.3115/1119018.1119058</doi>
@@ -340,12 +340,12 @@
     </paper>
     <paper id="37">
       <title><fixed-case>SRI</fixed-case>’s Tipster <fixed-case>II</fixed-case> Project</title>
-      <author><first>Jerry R.</first><last>Hobbs</last></author>
-      <author><first>Douglas</first><last>Appelt</last></author>
-      <author><first>John</first><last>Bear</last></author>
-      <author><first>David</first><last>Israel</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry R.</first><last>Hobbs</last></author>
+      <author id="douglas-appelt"><first>Douglas</first><last>Appelt</last></author>
+      <author id="john-bear"><first>John</first><last>Bear</last></author>
+      <author id="david-israel"><first>David</first><last>Israel</last></author>
       <author><first>Megumi</first><last>Kameyama</last></author>
-      <author><first>Andrew</first><last>Kehler</last></author>
+      <author id="andrew-kehler"><first>Andrew</first><last>Kehler</last></author>
       <author><first>Mark</first><last>Stickel</last></author>
       <author><first>Mabry</first><last>Tyson</last></author>
       <doi>10.3115/1119018.1119059</doi>
@@ -363,7 +363,7 @@
     </paper>
     <paper id="39">
       <title>Building an Architecture: A <fixed-case>CAWG</fixed-case> Saga</title>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <doi>10.3115/1119018.1119062</doi>
       <pages>213–215</pages>
       <url hash="e514a322">X96-1039</url>
@@ -379,7 +379,7 @@
     </paper>
     <paper id="41">
       <title><fixed-case>TUIT</fixed-case>: A Toolkit for Constructing Multilingual <fixed-case>TIPSTER</fixed-case> User Interfaces</title>
-      <author><first>William C.</first><last>Ogden</last></author>
+      <author id="william-c-ogden"><first>William C.</first><last>Ogden</last></author>
       <doi>10.3115/1119018.1119064</doi>
       <pages>219–220</pages>
       <url hash="fe705b37">X96-1041</url>
@@ -395,7 +395,7 @@
     </paper>
     <paper id="43">
       <title><fixed-case>TIPSTER</fixed-case> Text Phase <fixed-case>II</fixed-case> Architecture Design Version 2.1p 19 <fixed-case>J</fixed-case>une 1996</title>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <doi>10.3115/1119018.1119066</doi>
       <pages>249–305</pages>
       <url hash="5ff49098">X96-1043</url>
@@ -427,8 +427,8 @@
     </paper>
     <paper id="47">
       <title>Design of the <fixed-case>MUC</fixed-case>-6 Evaluation</title>
-      <author><first>Ralph</first><last>Grishman</last></author>
-      <author><first>Beth</first><last>Sundheim</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
+      <author id="beth-m-sundheim"><first>Beth</first><last>Sundheim</last></author>
       <doi>10.3115/1119018.1119072</doi>
       <pages>413–422</pages>
       <url hash="22b4160a">X96-1047</url>
@@ -436,7 +436,7 @@
     </paper>
     <paper id="48">
       <title>Overview of Results of the <fixed-case>MUC</fixed-case>-6 Evaluation</title>
-      <author><first>Beth M.</first><last>Sundheim</last></author>
+      <author id="beth-m-sundheim"><first>Beth M.</first><last>Sundheim</last></author>
       <doi>10.3115/1119018.1119073</doi>
       <pages>423–442</pages>
       <url hash="596b128e">X96-1048</url>
@@ -444,9 +444,9 @@
     </paper>
     <paper id="49">
       <title>The Multilingual Entity Task (<fixed-case>MET</fixed-case>) Overview</title>
-      <author><first>Roberta</first><last>Merchant</last></author>
+      <author id="roberta-h-merchant"><first>Roberta</first><last>Merchant</last></author>
       <author><first>Mary Ellen</first><last>Okurowski</last></author>
-      <author><first>Nancy</first><last>Chinchor</last></author>
+      <author id="nancy-chinchor"><first>Nancy</first><last>Chinchor</last></author>
       <doi>10.3115/1119018.1119075</doi>
       <pages>445–447</pages>
       <url hash="6e6cef19">X96-1049</url>
@@ -454,7 +454,7 @@
     </paper>
     <paper id="50">
       <title>Multilingual Entity Task (<fixed-case>MET</fixed-case>): <fixed-case>J</fixed-case>apanese Results</title>
-      <author><first>Steven</first><last>Maiorano</last></author>
+      <author id="steven-j-maiorano"><first>Steven</first><last>Maiorano</last></author>
       <author><first>Terry</first><last>Wilson</last></author>
       <doi>10.3115/1119018.1119076</doi>
       <pages>449–451</pages>
@@ -463,7 +463,7 @@
     </paper>
     <paper id="51">
       <title>An Interpretative Data Analysis of <fixed-case>C</fixed-case>hinese Named Entity Subtypes</title>
-      <author><first>Thomas A.</first><last>Keenan</last></author>
+      <author id="thomas-a-keenan"><first>Thomas A.</first><last>Keenan</last></author>
       <doi>10.3115/1119018.1119077</doi>
       <pages>453–455</pages>
       <url hash="d5e58439">X96-1051</url>
@@ -481,11 +481,11 @@
       <title><fixed-case>MITRE</fixed-case>: Description of the <fixed-case>A</fixed-case>lembic System as Used in <fixed-case>MET</fixed-case></title>
       <author><first>John</first><last>Aberdeen</last></author>
       <author id="john-d-burger"><first>John</first><last>Burger</last></author>
-      <author><first>David</first><last>Day</last></author>
-      <author><first>Lynette</first><last>Hirschman</last></author>
-      <author><first>David</first><last>Palmer</last></author>
-      <author><first>Patricia</first><last>Robinson</last></author>
-      <author><first>Marc</first><last>Vilain</last></author>
+      <author id="david-day"><first>David</first><last>Day</last></author>
+      <author id="lynette-hirschman"><first>Lynette</first><last>Hirschman</last></author>
+      <author id="david-d-palmer"><first>David</first><last>Palmer</last></author>
+      <author id="patricia-robinson"><first>Patricia</first><last>Robinson</last></author>
+      <author id="marc-vilain"><first>Marc</first><last>Vilain</last></author>
       <doi>10.3115/1119018.1119079</doi>
       <pages>461–462</pages>
       <url hash="cdd9e564">X96-1053</url>
@@ -501,11 +501,11 @@
     </paper>
     <paper id="55">
       <title>Approaches in <fixed-case>MET</fixed-case> (Multi-Lingual Entity Task)</title>
-      <author><first>Damaris</first><last>Ayuso</last></author>
-      <author><first>Daniel</first><last>Bikel</last></author>
+      <author id="damaris-ayuso"><first>Damaris</first><last>Ayuso</last></author>
+      <author id="daniel-m-bikel"><first>Daniel</first><last>Bikel</last></author>
       <author><first>Tasha</first><last>Hall</last></author>
       <author><first>Erik</first><last>Peterson</last></author>
-      <author><first>Ralph</first><last>Weischedel</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
       <author><first>Patrick</first><last>Jost</last></author>
       <doi>10.3115/1119018.1119081</doi>
       <pages>465–466</pages>
@@ -514,7 +514,7 @@
     </paper>
     <paper id="56">
       <title><fixed-case>CRL</fixed-case>’s Approach to <fixed-case>MET</fixed-case></title>
-      <author><first>Jim</first><last>Cowie</last></author>
+      <author id="jim-cowie"><first>Jim</first><last>Cowie</last></author>
       <doi>10.3115/1119018.1119082</doi>
       <pages>467–468</pages>
       <url hash="02a1b280">X96-1056</url>
@@ -540,10 +540,10 @@
     <paper id="59">
       <title><fixed-case>NEC</fixed-case> Corporation and <fixed-case>U</fixed-case>niversity of <fixed-case>S</fixed-case>heffield: “Description of <fixed-case>NEC</fixed-case>/<fixed-case>S</fixed-case>heffleld System Used For <fixed-case>MET</fixed-case> <fixed-case>J</fixed-case>apanese”</title>
       <author><first>Yoshikazu</first><last>Takemoto</last></author>
-      <author><first>Takahiro</first><last>Wakao</last></author>
+      <author id="takahiro-wakao"><first>Takahiro</first><last>Wakao</last></author>
       <author><first>Hiroshi</first><last>Yamada</last></author>
-      <author><first>Robert</first><last>Gaizauskas</last></author>
-      <author><first>Yorick</first><last>Wilks</last></author>
+      <author id="robert-gaizauskas"><first>Robert</first><last>Gaizauskas</last></author>
+      <author id="yorick-wilks"><first>Yorick</first><last>Wilks</last></author>
       <doi>10.3115/1119018.1119085</doi>
       <pages>475–476</pages>
       <url hash="a3d800c3">X96-1059</url>
diff --git a/data/xml/X98.xml b/data/xml/X98.xml
index 2bdd5256d7..373fab96db 100644
--- a/data/xml/X98.xml
+++ b/data/xml/X98.xml
@@ -40,8 +40,8 @@
     </paper>
     <paper id="4">
       <title>The Common Pattern Specification Language</title>
-      <author><first>Douglas E.</first><last>Appelt</last></author>
-      <author><first>Boyan</first><last>Onyshkevych</last></author>
+      <author id="douglas-appelt"><first>Douglas E.</first><last>Appelt</last></author>
+      <author id="boyan-onyshkevych"><first>Boyan</first><last>Onyshkevych</last></author>
       <doi>10.3115/1119089.1119095</doi>
       <pages>23–30</pages>
       <url hash="21f12bb3">X98-1004</url>
@@ -73,7 +73,7 @@
     </paper>
     <paper id="8">
       <title>The <fixed-case>SRI</fixed-case> <fixed-case>TIPSTER</fixed-case> <fixed-case>III</fixed-case> Project</title>
-      <author><first>Steven</first><last>Maiorano</last></author>
+      <author id="steven-j-maiorano"><first>Steven</first><last>Maiorano</last></author>
       <doi>10.3115/1119089.1119100</doi>
       <pages>39–40</pages>
       <url hash="ae935e2e">X98-1008</url>
@@ -89,7 +89,7 @@
     </paper>
     <paper id="10">
       <title>Coreference Resolution Strategies From an Application Perspective</title>
-      <author><first>Lois C.</first><last>Childs</last></author>
+      <author id="lois-c-childs"><first>Lois C.</first><last>Childs</last></author>
       <author><first>David</first><last>Dadd</last></author>
       <author><first>Norris</first><last>Heintzelman</last></author>
       <doi>10.3115/1119089.1119103</doi>
@@ -99,7 +99,7 @@
     </paper>
     <paper id="11">
       <title>Extracting and Normalizing Temporal Expressions</title>
-      <author><first>Lois C.</first><last>Childs</last></author>
+      <author id="lois-c-childs"><first>Lois C.</first><last>Childs</last></author>
       <author><first>David</first><last>Cassel</last></author>
       <doi>10.3115/1119089.1119104</doi>
       <pages>51–56</pages>
@@ -108,7 +108,7 @@
     </paper>
     <paper id="12">
       <title>Research in Information Extraction: 1996-98</title>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <doi>10.3115/1119089.1119105</doi>
       <pages>57–60</pages>
       <url hash="4abd1137">X98-1012</url>
@@ -116,12 +116,12 @@
     </paper>
     <paper id="13">
       <title>Information Extraction Research and Applications: Current Progress and Future Directions</title>
-      <author><first>Andrew</first><last>Kehler</last></author>
-      <author><first>Jerry R.</first><last>Hobbs</last></author>
-      <author><first>Douglas</first><last>Appelt</last></author>
-      <author><first>John</first><last>Bear</last></author>
+      <author id="andrew-kehler"><first>Andrew</first><last>Kehler</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry R.</first><last>Hobbs</last></author>
+      <author id="douglas-appelt"><first>Douglas</first><last>Appelt</last></author>
+      <author id="john-bear"><first>John</first><last>Bear</last></author>
       <author><first>Matthew</first><last>Caywood</last></author>
-      <author><first>David</first><last>Israel</last></author>
+      <author id="david-israel"><first>David</first><last>Israel</last></author>
       <author><first>Megumi</first><last>Kameyama</last></author>
       <author><first>David</first><last>Martin</last></author>
       <author><first>Claire</first><last>Monteleoni</last></author>
@@ -133,12 +133,12 @@
     <paper id="14">
       <title>Algorithms That Learn to Extract Information <fixed-case>BBN</fixed-case>: <fixed-case>TIPSTER</fixed-case> Phase <fixed-case>III</fixed-case></title>
       <author><first>Scott</first><last>Miller</last></author>
-      <author><first>Michael</first><last>Crystal</last></author>
-      <author><first>Heidi</first><last>Fox</last></author>
-      <author><first>Lance</first><last>Ramshaw</last></author>
-      <author><first>Richard</first><last>Schwartz</last></author>
+      <author id="michael-crystal"><first>Michael</first><last>Crystal</last></author>
+      <author id="heidi-fox"><first>Heidi</first><last>Fox</last></author>
+      <author id="lance-ramshaw"><first>Lance</first><last>Ramshaw</last></author>
+      <author id="richard-schwartz"><first>Richard</first><last>Schwartz</last></author>
       <author><first>Rebecca</first><last>Stone</last></author>
-      <author><first>Ralph</first><last>Weischedel</last></author>
+      <author id="ralph-weischedel"><first>Ralph</first><last>Weischedel</last></author>
       <doi>10.3115/1119089.1119107</doi>
       <pages>75–89</pages>
       <url hash="1a01e543">X98-1014</url>
@@ -157,7 +157,7 @@
     <paper id="16">
       <title>Transforming Examples into Patterns for Information Extraction</title>
       <author><first>Roman</first><last>Yangarber</last></author>
-      <author><first>Ralph</first><last>Grishman</last></author>
+      <author id="ralph-grishman"><first>Ralph</first><last>Grishman</last></author>
       <doi>10.3115/1119089.1119109</doi>
       <pages>97–103</pages>
       <url hash="36b272d8">X98-1016</url>
@@ -165,12 +165,12 @@
     </paper>
     <paper id="17">
       <title>The Smart/Empire <fixed-case>TIPSTER</fixed-case> <fixed-case>IR</fixed-case> System</title>
-      <author><first>Chris</first><last>Buckley</last></author>
+      <author id="chris-buckley"><first>Chris</first><last>Buckley</last></author>
       <author><first>Janet</first><last>Walz</last></author>
-      <author><first>Claire</first><last>Cardie</last></author>
+      <author id="claire-cardie"><first>Claire</first><last>Cardie</last></author>
       <author><first>Scott</first><last>Mardis</last></author>
       <author><first>Mandar</first><last>Mitra</last></author>
-      <author><first>David</first><last>Pierce</last></author>
+      <author id="david-pierce"><first>David</first><last>Pierce</last></author>
       <author><first>Kiri</first><last>Wagstaff</last></author>
       <doi>10.3115/1119089.1119111</doi>
       <pages>107–121</pages>
@@ -180,7 +180,7 @@
     <paper id="18">
       <title>Dynamic Data Fusion</title>
       <author><first>Ted</first><last>Diamond</last></author>
-      <author><first>Elizabeth D.</first><last>Liddy</last></author>
+      <author id="elizabeth-d-liddy"><first>Elizabeth D.</first><last>Liddy</last></author>
       <doi>10.3115/1119089.1119112</doi>
       <pages>123–128</pages>
       <url hash="111e8014">X98-1018</url>
@@ -188,7 +188,7 @@
     </paper>
     <paper id="19">
       <title>Improving <fixed-case>E</fixed-case>nglish and <fixed-case>C</fixed-case>hinese Ad-Hoc Retrieval: <fixed-case>TIPSTER</fixed-case> Text Phase 3 Final Report</title>
-      <author><first>Kui-Lam</first><last>Kwok</last></author>
+      <author id="kui-lam-kwok"><first>Kui-Lam</first><last>Kwok</last></author>
       <doi>10.3115/1119089.1119113</doi>
       <pages>129–137</pages>
       <url hash="9a932b54">X98-1019</url>
@@ -196,9 +196,9 @@
     </paper>
     <paper id="20">
       <title>Enhancing Detection through Linguistic Indexing and Topic Expansion</title>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
       <author><first>Gees C.</first><last>Stein</last></author>
-      <author><first>G. Bowden</first><last>Wise</last></author>
+      <author id="g-bowden-wise"><first>G. Bowden</first><last>Wise</last></author>
       <doi>10.3115/1119089.1119114</doi>
       <pages>139–148</pages>
       <url hash="16cdb40d">X98-1020</url>
@@ -207,7 +207,7 @@
     <paper id="21">
       <title>Overview of the <fixed-case>U</fixed-case>niversity of <fixed-case>P</fixed-case>ennsylvania’s <fixed-case>TIPSTER</fixed-case> Project</title>
       <author><first>Breck</first><last>Baldwin</last></author>
-      <author><first>Thomas S.</first><last>Morton</last></author>
+      <author id="thomas-s-morton"><first>Thomas S.</first><last>Morton</last></author>
       <author><first>Amit</first><last>Bagga</last></author>
       <doi>10.3115/1119089.1119116</doi>
       <pages>151–161</pages>
@@ -216,7 +216,7 @@
     </paper>
     <paper id="22">
       <title>An <fixed-case>NTU</fixed-case>-Approach to Automatic Sentence Extraction for Summary Generation</title>
-      <author><first>Kuang-hua</first><last>Chen</last></author>
+      <author id="kuang-hua-chen"><first>Kuang-hua</first><last>Chen</last></author>
       <author><first>Sheng-Jie</first><last>Huang</last></author>
       <author><first>Wen-Cheng</first><last>Lin</last></author>
       <author><first>Hsin-Hsi</first><last>Chen</last></author>
@@ -228,7 +228,7 @@
     </paper>
     <paper id="23">
       <title>Improving Robust Domain Independent Summarization</title>
-      <author><first>Jim</first><last>Cowie</last></author>
+      <author id="jim-cowie"><first>Jim</first><last>Cowie</last></author>
       <author><first>Eugene</first><last>Ludovik</last></author>
       <author><first>Hugo</first><last>Molina-Salgado</last></author>
       <doi>10.3115/1119089.1119118</doi>
@@ -238,7 +238,7 @@
     </paper>
     <paper id="24">
       <title>Automatic Text Summarization in <fixed-case>TIPSTER</fixed-case></title>
-      <author><first>Therese</first><last>Firmin</last></author>
+      <author id="therese-firmin"><first>Therese</first><last>Firmin</last></author>
       <author><first>Inderjeet</first><last>Mani</last></author>
       <doi>10.3115/1119089.1119119</doi>
       <pages>179–180</pages>
@@ -247,8 +247,8 @@
     </paper>
     <paper id="25">
       <title>Summarization: (1) Using <fixed-case>MMR</fixed-case> for Diversity- Based Reranking and (2) Evaluating Summaries</title>
-      <author><first>Jade</first><last>Goldstein</last></author>
-      <author><first>Jaime</first><last>Carbonell</last></author>
+      <author id="jade-goldstein"><first>Jade</first><last>Goldstein</last></author>
+      <author id="jaime-g-carbonell"><first>Jaime</first><last>Carbonell</last></author>
       <doi>10.3115/1119089.1119120</doi>
       <pages>181–195</pages>
       <url hash="d24132f8">X98-1025</url>
@@ -256,8 +256,8 @@
     </paper>
     <paper id="26">
       <title>Automated Text Summarization and the <fixed-case>S</fixed-case>ummarist System</title>
-      <author><first>Eduard</first><last>Hovy</last></author>
-      <author><first>Chin-Yew</first><last>Lin</last></author>
+      <author id="eduard-hovy"><first>Eduard</first><last>Hovy</last></author>
+      <author id="chin-yew-lin"><first>Chin-Yew</first><last>Lin</last></author>
       <doi>10.3115/1119089.1119121</doi>
       <pages>197–214</pages>
       <url hash="4cf1fd63">X98-1026</url>
@@ -266,7 +266,7 @@
     <paper id="27">
       <title>Multiple &amp; Single Document Summarization Using <fixed-case>DR-LINK</fixed-case></title>
       <author><first>Mary</first><last>McKenna</last></author>
-      <author><first>Elizabeth</first><last>Liddy</last></author>
+      <author id="elizabeth-d-liddy"><first>Elizabeth</first><last>Liddy</last></author>
       <doi>10.3115/1119089.1119122</doi>
       <pages>215–221</pages>
       <url hash="41269e35">X98-1027</url>
@@ -274,9 +274,9 @@
     </paper>
     <paper id="28">
       <title>A Text-Extraction Based Summarizer</title>
-      <author><first>Tomek</first><last>Strzalkowski</last></author>
+      <author id="tomek-strzalkowski"><first>Tomek</first><last>Strzalkowski</last></author>
       <author><first>Gees C.</first><last>Stein</last></author>
-      <author><first>G. Bowden</first><last>Wise</last></author>
+      <author id="g-bowden-wise"><first>G. Bowden</first><last>Wise</last></author>
       <doi>10.3115/1119089.1119123</doi>
       <pages>223–230</pages>
       <url hash="bdc2b50b">X98-1028</url>
@@ -292,7 +292,7 @@
     </paper>
     <paper id="30">
       <title><fixed-case>MUC</fixed-case>/<fixed-case>MET</fixed-case> Evaluation Trends</title>
-      <author><first>Nancy A.</first><last>Chinchor</last></author>
+      <author id="nancy-chinchor"><first>Nancy A.</first><last>Chinchor</last></author>
       <doi>10.3115/1119089.1119126</doi>
       <pages>235–239</pages>
       <url hash="18c6b6a6">X98-1030</url>
@@ -300,7 +300,7 @@
     </paper>
     <paper id="31">
       <title>The <fixed-case>T</fixed-case>ext <fixed-case>RE</fixed-case>trieval <fixed-case>C</fixed-case>onferences (<fixed-case>TREC</fixed-case>s)</title>
-      <author><first>Ellen M.</first><last>Voorhees</last></author>
+      <author id="ellen-m-voorhees"><first>Ellen M.</first><last>Voorhees</last></author>
       <author><first>Donna</first><last>Harman</last></author>
       <doi>10.3115/1119089.1119127</doi>
       <pages>241–273</pages>
diff --git a/data/xml/Y00.xml b/data/xml/Y00.xml
index 7384acb378..e3709177d5 100644
--- a/data/xml/Y00.xml
+++ b/data/xml/Y00.xml
@@ -25,7 +25,7 @@
     </paper>
     <paper id="2">
       <title>Verb Alternations and <fixed-case>J</fixed-case>apanese : How, What and Where</title>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Hozumi</first><last>Tanaka</last></author>
       <pages>3–14</pages>
       <url hash="843a4651">Y00-1002</url>
@@ -34,7 +34,7 @@
     </paper>
     <paper id="3">
       <title>Detection and Correction of Phonetic Errors with a New Orthographic Dictionary</title>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>15–22</pages>
       <url hash="e276c9fc">Y00-1003</url>
       <doi>http://hdl.handle.net/2065/12147</doi>
@@ -61,8 +61,8 @@
     </paper>
     <paper id="6">
       <title>Textual Information Segmentation by Cohesive Ties</title>
-      <author><first>Samuel W.K.</first><last>Chan</last></author>
-      <author><first>Benjamin K.</first><last>T’sou</last></author>
+      <author id="samuel-w-k-chan"><first>Samuel W.K.</first><last>Chan</last></author>
+      <author id="benjamin-k-tsou"><first>Benjamin K.</first><last>T’sou</last></author>
       <author><first>C.F.</first><last>Choy</last></author>
       <pages>47–56</pages>
       <url hash="fd35c251">Y00-1006</url>
@@ -122,7 +122,7 @@
     <paper id="13">
       <title>Using Bilingual Semantic Information in <fixed-case>C</fixed-case>hinese-<fixed-case>K</fixed-case>orean Word Alignment</title>
       <author><first>Jin-Xia</first><last>Huang</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <pages>121–130</pages>
       <url hash="62908abd">Y00-1013</url>
       <doi>http://hdl.handle.net/2065/12144</doi>
@@ -146,7 +146,7 @@
     </paper>
     <paper id="16">
       <title>Qualia Structure and the Accessibility of Arguments : <fixed-case>J</fixed-case>apanese Internally-Headed Relative Clauses with Implicit Target</title>
-      <author><first>Chiharu Uda</first><last>Kikuta</last></author>
+      <author id="chiharu-uda-kikuta"><first>Chiharu Uda</first><last>Kikuta</last></author>
       <pages>153–164</pages>
       <url hash="8b7bbebe">Y00-1016</url>
       <doi>http://hdl.handle.net/2065/12148</doi>
@@ -186,7 +186,7 @@
     </paper>
     <paper id="21">
       <title>On the Discourse Analysis in <fixed-case>K</fixed-case>orean Dialogues</title>
-      <author><first>Ik-hwan</first><last>Lee</last></author>
+      <author id="ik-hwan-lee"><first>Ik-hwan</first><last>Lee</last></author>
       <author><first>Minhaeng</first><last>Lee</last></author>
       <pages>207–218</pages>
       <url hash="13f133c5">Y00-1021</url>
@@ -243,7 +243,7 @@
     </paper>
     <paper id="28">
       <title>Collocation Deficiency in a Learner Corpus of <fixed-case>E</fixed-case>nglish : From an Overuse Perspective</title>
-      <author><first>Rebecca Hsue-Hueh</first><last>Shih</last></author>
+      <author id="hsue-hueh-shih"><first>Rebecca Hsue-Hueh</first><last>Shih</last></author>
       <pages>281–288</pages>
       <url hash="c45157ff">Y00-1028</url>
       <doi>http://hdl.handle.net/2065/12161</doi>
@@ -270,14 +270,14 @@
     </paper>
     <paper id="31">
       <title>Automatic Conversion from Phonetic to Textual Representation of <fixed-case>C</fixed-case>antonese : The Case of <fixed-case>H</fixed-case>ong <fixed-case>K</fixed-case>ong Court Proceedings</title>
-      <author><first>Benjamin K.</first><last>Tsou</last></author>
+      <author id="benjamin-k-tsou"><first>Benjamin K.</first><last>Tsou</last></author>
       <author id="king-kui-sin"><first>K.K.</first><last>Sin</last></author>
-      <author><first>Samuel W. K.</first><last>Chan</last></author>
-      <author><first>Tom B. Y.</first><last>Lai</last></author>
-      <author><first>Caesar</first><last>Lun</last></author>
+      <author id="samuel-w-k-chan"><first>Samuel W. K.</first><last>Chan</last></author>
+      <author id="tom-b-y-lai"><first>Tom B. Y.</first><last>Lai</last></author>
+      <author id="suen-caesar-lun"><first>Caesar</first><last>Lun</last></author>
       <author><first>K. T.</first><last>Ko</last></author>
-      <author><first>Gary K. K.</first><last>Chan</last></author>
-      <author><first>Lawrence Y. L.</first><last>Cheung</last></author>
+      <author id="gary-k-k-chan"><first>Gary K. K.</first><last>Chan</last></author>
+      <author id="lawrence-y-l-cheung"><first>Lawrence Y. L.</first><last>Cheung</last></author>
       <pages>313–324</pages>
       <url hash="50dc2973">Y00-1031</url>
       <doi>http://hdl.handle.net/2065/12165</doi>
@@ -336,7 +336,7 @@
     </paper>
     <paper id="38">
       <title>A Unified Approach to Tense in <fixed-case>J</fixed-case>apanese</title>
-      <author><first>Kei</first><last>Yoshimoto</last></author>
+      <author id="kei-yoshimoto"><first>Kei</first><last>Yoshimoto</last></author>
       <author><first>Chidori</first><last>Nakamura</last></author>
       <author><first>Yoshiki</first><last>Mori</last></author>
       <pages>389–400</pages>
@@ -346,7 +346,7 @@
     </paper>
     <paper id="39">
       <title>Exclusion phrases and criticisms of semantic compositionality</title>
-      <author><first>Richard</first><last>Zuber</last></author>
+      <author id="richard-zuber"><first>Richard</first><last>Zuber</last></author>
       <pages>401–412</pages>
       <url hash="7b9b6c1a">Y00-1039</url>
       <doi>http://hdl.handle.net/2065/12174</doi>
@@ -354,7 +354,7 @@
     </paper>
     <paper id="40">
       <title>The Semantics of amwu-N-to/-irato/-ina in <fixed-case>K</fixed-case>orean : Arbitrary Choice and Concession</title>
-      <author><first>Chungmin</first><last>Lee</last></author>
+      <author id="chungmin-lee"><first>Chungmin</first><last>Lee</last></author>
       <author><first>Daeho</first><last>Chung</last></author>
       <author><first>Seungho</first><last>Nam</last></author>
       <pages>413–424</pages>
diff --git a/data/xml/Y01.xml b/data/xml/Y01.xml
index 55069e0fa5..d77a2efb61 100644
--- a/data/xml/Y01.xml
+++ b/data/xml/Y01.xml
@@ -3,9 +3,9 @@
   <volume id="1" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 15th Pacific Asia Conference on Language, Information and Computation</booktitle>
-      <editor><first>Benjamin K.</first><last>T’sou</last></editor>
-      <editor><first>Olivia O.Y.</first><last>Kwong</last></editor>
-      <editor><first>Tom B.Y.</first><last>Lai</last></editor>
+      <editor id="benjamin-k-tsou"><first>Benjamin K.</first><last>T’sou</last></editor>
+      <editor id="olivia-o-y-kwong"><first>Olivia O.Y.</first><last>Kwong</last></editor>
+      <editor id="tom-b-y-lai"><first>Tom B.Y.</first><last>Lai</last></editor>
       <publisher>City University of Hong Kong</publisher>
       <address>Hong Kong, China</address>
       <month>February</month>
@@ -18,9 +18,9 @@
     </frontmatter>
     <paper id="1">
       <title>Building a Large Lexical Databank Which Provides Deep Semantics</title>
-      <author><first>Charles J.</first><last>Fillmore</last></author>
+      <author id="charles-j-fillmore"><first>Charles J.</first><last>Fillmore</last></author>
       <author><first>Charles</first><last>Wooters</last></author>
-      <author><first>Collin F.</first><last>Baker</last></author>
+      <author id="collin-f-baker"><first>Collin F.</first><last>Baker</last></author>
       <pages>3–26</pages>
       <url hash="a5bc7e0c">Y01-1001</url>
       <doi>http://hdl.handle.net/2065/12202</doi>
@@ -46,7 +46,7 @@
     <paper id="4">
       <title>A Parallel Interpretation of Floated Quantifiers and Adverbials</title>
       <author><first>Masahiro</first><last>Kobayashi</last></author>
-      <author><first>Kei</first><last>Yoshimoto</last></author>
+      <author id="kei-yoshimoto"><first>Kei</first><last>Yoshimoto</last></author>
       <pages>45–52</pages>
       <url hash="2e37934f">Y01-1004</url>
       <doi>http://hdl.handle.net/2065/12209</doi>
@@ -88,8 +88,8 @@
     </paper>
     <paper id="9">
       <title>Towards a Conceptual Representation of Lexical Meaning in <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et</title>
-      <author><first>Jen-nan</first><last>Chen</last></author>
-      <author><first>Sue J.</first><last>Ker</last></author>
+      <author id="jen-nan-chen"><first>Jen-nan</first><last>Chen</last></author>
+      <author id="sue-j-ker"><first>Sue J.</first><last>Ker</last></author>
       <pages>97–108</pages>
       <url hash="303ce632">Y01-1009</url>
       <doi>http://hdl.handle.net/2065/12214</doi>
@@ -132,7 +132,7 @@
     </paper>
     <paper id="14">
       <title>Temporal Structure on Discourse Level within the Controlled Information Packaging Theory</title>
-      <author><first>Ik-Hwan</first><last>Lee</last></author>
+      <author id="ik-hwan-lee"><first>Ik-Hwan</first><last>Lee</last></author>
       <author><first>Minhaeng</first><last>Lee</last></author>
       <pages>151–162</pages>
       <url hash="47e9c2b1">Y01-1014</url>
@@ -157,9 +157,9 @@
     </paper>
     <paper id="17">
       <title>Building domain-independent text generation system</title>
-      <author><first>XinYu</first><last>Deng</last></author>
+      <author id="xinyu-deng"><first>XinYu</first><last>Deng</last></author>
       <author><first>Sadao</first><last>Kurohashi</last></author>
-      <author><first>Jun’ichi</first><last>Nakamura</last></author>
+      <author id="jun-ichi-nakamura"><first>Jun’ichi</first><last>Nakamura</last></author>
       <pages>187–194</pages>
       <url hash="1c2e34ac">Y01-1017</url>
       <doi>http://hdl.handle.net/2065/12190</doi>
@@ -252,7 +252,7 @@
       <title>An <fixed-case>HPSG</fixed-case> Account of the Hierarchical Clause Formation in <fixed-case>J</fixed-case>apanese : <fixed-case>HPSG</fixed-case>-Based <fixed-case>J</fixed-case>apanese Grammar for Practical Parsing</title>
       <author><first>Takashi</first><last>Miyata</last></author>
       <author><first>Akira</first><last>Otani</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>305–316</pages>
       <url hash="0e0839d2">Y01-1028</url>
       <doi>http://hdl.handle.net/2065/12203</doi>
@@ -260,7 +260,7 @@
     </paper>
     <paper id="29">
       <title>The <fixed-case>J</fixed-case>apanese Internally-Headed Relative Clause as a Marked Head-Complement Structure</title>
-      <author><first>Chiharu Uda</first><last>Kikuta</last></author>
+      <author id="chiharu-uda-kikuta"><first>Chiharu Uda</first><last>Kikuta</last></author>
       <pages>317–324</pages>
       <url hash="ad66b8ea">Y01-1029</url>
       <doi>http://hdl.handle.net/2065/12204</doi>
diff --git a/data/xml/Y02.xml b/data/xml/Y02.xml
index a8fa6cab82..b53ad68cc9 100644
--- a/data/xml/Y02.xml
+++ b/data/xml/Y02.xml
@@ -3,9 +3,9 @@
   <volume id="1" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 16th Pacific Asia Conference on Language, Information and Computation</booktitle>
-      <editor><first>Ik-Hwan</first><last>Lee</last></editor>
+      <editor id="ik-hwan-lee"><first>Ik-Hwan</first><last>Lee</last></editor>
       <editor><first>Yong-Beom</first><last>Kim</last></editor>
-      <editor><first>Key-Sun</first><last>Choi</last></editor>
+      <editor id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></editor>
       <editor><first>Minhaeng</first><last>Lee</last></editor>
       <publisher>The Korean Society for Language and Information</publisher>
       <address>Jeju, Korea</address>
@@ -19,7 +19,7 @@
     </frontmatter>
     <paper id="1">
       <title>Robust Syntactic Annotation of Corpora and Memory-based Parsing</title>
-      <author><first>Erhard W.</first><last>Hinrichs</last></author>
+      <author id="erhard-hinrichs"><first>Erhard W.</first><last>Hinrichs</last></author>
       <pages>1–1</pages>
       <url hash="104ec722">Y02-1001</url>
       <doi>http://hdl.handle.net/2065/12215</doi>
@@ -36,7 +36,7 @@
     <paper id="3">
       <title>Identification of <fixed-case>C</fixed-case>hinese Personal Names in Unrestricted Texts</title>
       <author><first>Lawrence</first><last>Cheung</last></author>
-      <author><first>Benjamin K.</first><last>Tsou</last></author>
+      <author id="benjamin-k-tsou"><first>Benjamin K.</first><last>Tsou</last></author>
       <author><first>Maosong</first><last>Sun</last></author>
       <pages>28–35</pages>
       <url hash="76d0deb0">Y02-1003</url>
@@ -64,8 +64,8 @@
     </paper>
     <paper id="6">
       <title>On Negative Imperatives in <fixed-case>K</fixed-case>orean</title>
-      <author><first>Chung-hye</first><last>Han</last></author>
-      <author><first>Chung-min</first><last>Lee</last></author>
+      <author id="chung-hye-han"><first>Chung-hye</first><last>Han</last></author>
+      <author id="chungmin-lee"><first>Chung-min</first><last>Lee</last></author>
       <pages>59–68</pages>
       <url hash="f40ec581">Y02-1006</url>
       <doi>http://hdl.handle.net/2065/12255</doi>
@@ -73,10 +73,10 @@
     </paper>
     <paper id="7">
       <title><fixed-case>P</fixed-case>enn <fixed-case>K</fixed-case>orean Treebank : Development and Evaluation</title>
-      <author><first>Chung-hye</first><last>Han</last></author>
+      <author id="chung-hye-han"><first>Chung-hye</first><last>Han</last></author>
       <author><first>Na-Rae</first><last>Han</last></author>
       <author><first>Eon-Suk</first><last>Ko</last></author>
-      <author><first>Martha</first><last>Palmer</last></author>
+      <author id="martha-palmer"><first>Martha</first><last>Palmer</last></author>
       <author><first>Heejong</first><last>Yi</last></author>
       <pages>69–78</pages>
       <url hash="12c62ab9">Y02-1007</url>
@@ -105,7 +105,7 @@
     <paper id="10">
       <title>Type Construction of Nouns with the Verb ha- ‘do’</title>
       <author><first>Seohyun</first><last>Im</last></author>
-      <author><first>Chungmin</first><last>Lee</last></author>
+      <author id="chungmin-lee"><first>Chungmin</first><last>Lee</last></author>
       <pages>103–112</pages>
       <url hash="9db01472">Y02-1010</url>
       <doi>http://hdl.handle.net/2065/12216</doi>
@@ -137,7 +137,7 @@
     </paper>
     <paper id="14">
       <title>An Alignment Based Technique for Text Translation between Traditional <fixed-case>C</fixed-case>hinese and Simplified <fixed-case>C</fixed-case>hinese</title>
-      <author><first>Sue J.</first><last>Ker</last></author>
+      <author id="sue-j-ker"><first>Sue J.</first><last>Ker</last></author>
       <author><first>Chun-Hsien</first><last>Lin</last></author>
       <pages>147–156</pages>
       <url hash="3337f85e">Y02-1014</url>
@@ -146,8 +146,8 @@
     </paper>
     <paper id="15">
       <title>Verb Pattern Based <fixed-case>K</fixed-case>orean-<fixed-case>C</fixed-case>hinese Machine Translation System</title>
-      <author><first>Changhyun</first><last>Kim</last></author>
-      <author><first>Young Kil</first><last>Kim</last></author>
+      <author id="chang-hyun-kim"><first>Changhyun</first><last>Kim</last></author>
+      <author id="young-gil-kim"><first>Young Kil</first><last>Kim</last></author>
       <author><first>Munpyo</first><last>Hong</last></author>
       <author><first>Young Ae</first><last>Seo</last></author>
       <author><first>Sung Il</first><last>Yang</last></author>
@@ -162,7 +162,7 @@
       <author><first>Jun-Su</first><last>Kim</last></author>
       <author><first>Wang-Woo</first><last>Lee</last></author>
       <author><first>Chang-Hwan</first><last>Kim</last></author>
-      <author><first>Cheol-young</first><last>Ock</last></author>
+      <author id="cheol-young-ock"><first>Cheol-young</first><last>Ock</last></author>
       <pages>166–176</pages>
       <url hash="071c6c6d">Y02-1016</url>
       <doi>http://hdl.handle.net/2065/12222</doi>
@@ -227,7 +227,7 @@
     </paper>
     <paper id="24">
       <title>Toward a Bilingual Legal Term Glossary from Context Profiles</title>
-      <author><first>Oi Yee</first><last>Kwong</last></author>
+      <author id="olivia-o-y-kwong"><first>Oi Yee</first><last>Kwong</last></author>
       <pages>249–258</pages>
       <url hash="b84866d9">Y02-1024</url>
       <doi>http://hdl.handle.net/2065/12231</doi>
@@ -243,8 +243,8 @@
     </paper>
     <paper id="26">
       <title>An Operator Assisted Call Routing System</title>
-      <author><first>Chun-Jen</first><last>Lee</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="chun-jen-lee"><first>Chun-Jen</first><last>Lee</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>271–280</pages>
       <url hash="71befb6f">Y02-1026</url>
       <doi>http://hdl.handle.net/2065/12233</doi>
@@ -262,7 +262,7 @@
       <title>A <fixed-case>K</fixed-case>orean Noun Semantic Hierarchy (<fixed-case>W</fixed-case>ordnet) Construction</title>
       <author><first>Juho</first><last>Lee</last></author>
       <author><first>Koaunghi</first><last>Un</last></author>
-      <author><first>Hee-Sook</first><last>Bae</last></author>
+      <author id="hee-sook-bae"><first>Hee-Sook</first><last>Bae</last></author>
       <author><first>Key-Sun</first><last>Choi</last></author>
       <pages>290–295</pages>
       <url hash="3179043f">Y02-1028</url>
@@ -271,7 +271,7 @@
     </paper>
     <paper id="29">
       <title>Implementation of Long-distance Reflexives in <fixed-case>K</fixed-case>orean : A Categorial Grammar Approach</title>
-      <author><first>Yong-hun</first><last>Lee</last></author>
+      <author id="yong-hun-lee"><first>Yong-hun</first><last>Lee</last></author>
       <pages>296–307</pages>
       <url hash="8adde00a">Y02-1029</url>
       <doi>http://hdl.handle.net/2065/12237</doi>
@@ -299,7 +299,7 @@
     <paper id="32">
       <title>If a Quantifier is not floated, but moored or even incorporated : Complexity of Presuppositions in Local Domain</title>
       <author><first>Yoshiki</first><last>Mori</last></author>
-      <author><first>Kei</first><last>Yoshimoto</last></author>
+      <author id="kei-yoshimoto"><first>Kei</first><last>Yoshimoto</last></author>
       <pages>330–347</pages>
       <url hash="9f6eaf1a">Y02-1032</url>
       <doi>http://hdl.handle.net/2065/12240</doi>
@@ -401,7 +401,7 @@
     </paper>
     <paper id="44">
       <title>Building a Domain-Specific <fixed-case>F</fixed-case>rench-<fixed-case>K</fixed-case>orean Lexicon</title>
-      <author><first>Aesun</first><last>Yoon</last></author>
+      <author id="aesun-yoon"><first>Aesun</first><last>Yoon</last></author>
       <pages>465–474</pages>
       <url hash="8a031428">Y02-1044</url>
       <doi>http://hdl.handle.net/2065/12253</doi>
diff --git a/data/xml/Y03.xml b/data/xml/Y03.xml
index 1429cf534d..2dd93d4827 100644
--- a/data/xml/Y03.xml
+++ b/data/xml/Y03.xml
@@ -3,8 +3,8 @@
   <volume id="1" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 17th Pacific Asia Conference on Language, Information and Computation</booktitle>
-      <editor><first>Dong Hong</first><last>Ji</last></editor>
-      <editor><first>Kim Teng</first><last>Lua</last></editor>
+      <editor id="donghong-ji"><first>Dong Hong</first><last>Ji</last></editor>
+      <editor id="kim-teng-lua"><first>Kim Teng</first><last>Lua</last></editor>
       <publisher>COLIPS PUBLICATIONS</publisher>
       <address>Sentosa, Singapore</address>
       <month>October</month>
@@ -17,7 +17,7 @@
     </frontmatter>
     <paper id="1">
       <title>Virtual Linked Lexical Knowledge Base for Causality Reasoning</title>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <pages>1–1</pages>
       <url hash="b1e6b5af">Y03-1001</url>
       <doi>http://hdl.handle.net/2065/12259</doi>
@@ -105,8 +105,8 @@
       <title>Porting Grammars between Typologically Similar Languages : <fixed-case>J</fixed-case>apanese to <fixed-case>K</fixed-case>orean</title>
       <author><first>Roger</first><last>Kim</last></author>
       <author><first>Mary</first><last>Dalrymple</last></author>
-      <author><first>Ronald M.</first><last>Kaplan</last></author>
-      <author><first>Tracy Holloway</first><last>King</last></author>
+      <author id="ronald-m-kaplan"><first>Ronald M.</first><last>Kaplan</last></author>
+      <author id="tracy-holloway-king"><first>Tracy Holloway</first><last>King</last></author>
       <pages>98–105</pages>
       <url hash="fbd97957">Y03-1011</url>
       <doi>http://hdl.handle.net/2065/12309</doi>
@@ -131,7 +131,7 @@
     </paper>
     <paper id="14">
       <title>Stock Markets as Ocean Water : A Corpus-based, Comparative Study of <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese, <fixed-case>E</fixed-case>nglish and <fixed-case>S</fixed-case>panish</title>
-      <author><first>Siaw-Fong</first><last>Chung</last></author>
+      <author id="siaw-fong-chung"><first>Siaw-Fong</first><last>Chung</last></author>
       <author><first>Kathleen</first><last>Ahrens</last></author>
       <author><first>Ya-hui</first><last>Sung</last></author>
       <pages>124–133</pages>
@@ -150,7 +150,7 @@
     <paper id="16">
       <title>Context-rule Model for Pos Tagging</title>
       <author><first>Yu-Fang</first><last>Tsai</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <pages>146–151</pages>
       <url hash="47f6fbc2">Y03-1016</url>
       <doi>http://hdl.handle.net/2065/12264</doi>
@@ -158,7 +158,7 @@
     </paper>
     <paper id="17">
       <title><fixed-case>C</fixed-case>hinese Word Segmentation Based on Contextual Entropy</title>
-      <author><first>Jin Hu</first><last>Huang</last></author>
+      <author id="jin-hu-huang"><first>Jin Hu</first><last>Huang</last></author>
       <author><first>David</first><last>Powers</last></author>
       <pages>152–158</pages>
       <url hash="b64a41dd">Y03-1017</url>
@@ -178,7 +178,7 @@
       <title>Cross-Lingual Text Filtering Based on Text Concepts and k<fixed-case>NN</fixed-case></title>
       <author><first>Shaozi</first><last>Li</last></author>
       <author><first>Weifeng</first><last>Su</last></author>
-      <author><first>Tangqiu</first><last>Li</last></author>
+      <author id="tangqiu-li"><first>Tangqiu</first><last>Li</last></author>
       <author><first>Huowang</first><last>Chen</last></author>
       <pages>166–173</pages>
       <url hash="2176bd8f">Y03-1019</url>
@@ -204,8 +204,8 @@
     </paper>
     <paper id="22">
       <title>A Synchronous Corpus-Based Study of Verb-Noun Fluidity in <fixed-case>C</fixed-case>hinese</title>
-      <author><first>Oi Yee</first><last>Kwong</last></author>
-      <author><first>Benjamin K.</first><last>Tsou</last></author>
+      <author id="olivia-o-y-kwong"><first>Oi Yee</first><last>Kwong</last></author>
+      <author id="benjamin-k-tsou"><first>Benjamin K.</first><last>Tsou</last></author>
       <pages>194–203</pages>
       <url hash="406bfb29">Y03-1022</url>
       <doi>http://hdl.handle.net/2065/12271</doi>
@@ -266,8 +266,8 @@
     </paper>
     <paper id="29">
       <title>Efficient Methods for Multigram Compound Discovery</title>
-      <author><last>Wu</last><first>Horng Jyh Paul</first></author>
-      <author><last>Ng</last><first>Hong I</first></author>
+      <author id="horng-jyh-paul-wu"><first>Horng Jyh Paul</first><last>Wu</last></author>
+      <author id="hong-i-ng"><first>Hong I</first><last>Ng</last></author>
       <author><last>Gong</last><first>Ruibin</first></author>
       <pages>257–268</pages>
       <url hash="2c55549b">Y03-1029</url>
@@ -276,7 +276,7 @@
     </paper>
     <paper id="30">
       <title>Translation Template Learning Based on Hidden <fixed-case>M</fixed-case>arkov Modeling</title>
-      <author><first>Minh Le</first><last>Nguyen</last></author>
+      <author id="minh-le-nguyen"><first>Minh Le</first><last>Nguyen</last></author>
       <author><first>Akari</first><last>Shimazu</last></author>
       <author><first>Susumu</first><last>Horiguchi</last></author>
       <pages>269–276</pages>
@@ -307,7 +307,7 @@
     </paper>
     <paper id="33">
       <title>A New Sentence Reduction based on Decision Tree Model</title>
-      <author><first>Minh Le</first><last>Nguyen</last></author>
+      <author id="minh-le-nguyen"><first>Minh Le</first><last>Nguyen</last></author>
       <author><first>Susumu</first><last>Horiguchi</last></author>
       <pages>290–297</pages>
       <url hash="0a94e869">Y03-1033</url>
@@ -316,7 +316,7 @@
     </paper>
     <paper id="34">
       <title><fixed-case>J</fixed-case>apanese Parser on the basis of the <fixed-case>L</fixed-case>exical-<fixed-case>F</fixed-case>unctional <fixed-case>G</fixed-case>rammar Formalism and its Evaluation</title>
-      <author><first>Hiroshi</first><last>Masuichi</last></author>
+      <author id="hiroshi-masuichi"><first>Hiroshi</first><last>Masuichi</last></author>
       <author><first>Tomoko</first><last>Okuma</last></author>
       <author><first>Hiroki</first><last>Yoshimura</last></author>
       <author><first>Yasunari</first><last>Harada</last></author>
@@ -327,9 +327,9 @@
     </paper>
     <paper id="35">
       <title>A Statistical Approach to <fixed-case>C</fixed-case>hinese-to-<fixed-case>E</fixed-case>nglish Back-Transliteration</title>
-      <author><first>Chun-Jen</first><last>Lee</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
-      <author><first>Jyh-Shing Roger</first><last>Jang</last></author>
+      <author id="chun-jen-lee"><first>Chun-Jen</first><last>Lee</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
+      <author id="jyh-shing-roger-jang"><first>Jyh-Shing Roger</first><last>Jang</last></author>
       <pages>310–318</pages>
       <url hash="a3598f80">Y03-1035</url>
       <doi>http://hdl.handle.net/2065/12286</doi>
@@ -346,7 +346,7 @@
     <paper id="37">
       <title>Modeling Verb Order in Complex Multi-Verbal Predicate Constructions</title>
       <author><first>Olivia S.-C.</first><last>Lam</last></author>
-      <author><first>Adams B.</first><last>Bodomo</last></author>
+      <author id="adams-b-bodomo"><first>Adams B.</first><last>Bodomo</last></author>
       <pages>328–338</pages>
       <url hash="467410cd">Y03-1037</url>
       <doi>http://hdl.handle.net/2065/12288</doi>
@@ -425,12 +425,12 @@
     </paper>
     <paper id="46">
       <title>Towards a Multi-Objective Corpus for <fixed-case>V</fixed-case>ietnamese Language</title>
-      <author><last>Vu</last><first>Hai Quan</first></author>
+      <author id="hai-quan-vu"><first>Hai Quan</first><last>Vu</last></author>
       <author><last>Pham</last><first>Nam Trung</first></author>
       <author><last>Nguyen</last><first>Duc Hoang Ha</first></author>
       <author><last>Huynh</last><first>Bao Toan</first></author>
       <author><last>Le</last><first>Hoai Bac</first></author>
-      <author><last>Hoang</last><first>Kiem</first></author>
+      <author id="hoang-kiem"><first>Kiem</first><last>Hoang</last></author>
       <pages>416–422</pages>
       <url hash="f78f232c">Y03-1046</url>
       <doi>http://hdl.handle.net/2065/12298</doi>
@@ -439,7 +439,7 @@
     <paper id="47">
       <title>Using Zero Anaphora Resolution to Improve Text Categorization</title>
       <author><first>Ching-Long</first><last>Yeh</last></author>
-      <author><first>Yi-Chun</first><last>Chen</last></author>
+      <author id="yichun-chen"><first>Yi-Chun</first><last>Chen</last></author>
       <pages>423–430</pages>
       <url hash="676c86c4">Y03-1047</url>
       <doi>http://hdl.handle.net/2065/12299</doi>
@@ -464,7 +464,7 @@
     <paper id="50">
       <title>The Treatment of <fixed-case>J</fixed-case>apanese Focus Particles Based on <fixed-case>L</fixed-case>exical-<fixed-case>F</fixed-case>unctional <fixed-case>G</fixed-case>rammar</title>
       <author><first>Tomoko</first><last>Ohkuma</last></author>
-      <author><first>Hiroshi</first><last>Masuichi</last></author>
+      <author id="hiroshi-masuichi"><first>Hiroshi</first><last>Masuichi</last></author>
       <author><first>Hiroki</first><last>Yoshimura</last></author>
       <author><first>Yasunari</first><last>Harada</last></author>
       <pages>448–455</pages>
diff --git a/data/xml/Y04.xml b/data/xml/Y04.xml
index 5a2e4a8b3d..b84fb1aa65 100644
--- a/data/xml/Y04.xml
+++ b/data/xml/Y04.xml
@@ -3,11 +3,11 @@
   <volume id="1" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 18th Pacific Asia Conference on Language, Information and Computation</booktitle>
-      <editor><first>Hiroshi</first><last>Masuichi</last></editor>
+      <editor id="hiroshi-masuichi"><first>Hiroshi</first><last>Masuichi</last></editor>
       <editor><first>Tomoko</first><last>Ohkuma</last></editor>
       <editor><first>Kiyoshi</first><last>Ishikawa</last></editor>
       <editor><first>Yasunari</first><last>Harada</last></editor>
-      <editor><first>Kei</first><last>Yoshimoto</last></editor>
+      <editor id="kei-yoshimoto"><first>Kei</first><last>Yoshimoto</last></editor>
       <publisher>Logico-Linguistic Society of Japan</publisher>
       <address>Waseda University, Tokyo, Japan</address>
       <month>December</month>
@@ -28,7 +28,7 @@
     </paper>
     <paper id="2">
       <title>Machine Learning based <fixed-case>NLP</fixed-case> : Experiences and Supporting Tools</title>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>15–16</pages>
       <url hash="eedcd642">Y04-1002</url>
       <doi>http://hdl.handle.net/2065/555</doi>
@@ -97,7 +97,7 @@
     <paper id="10">
       <title><fixed-case>J</fixed-case>apanese Subjects and Information Structure : A Constraint-based Approach</title>
       <author><first>Akira</first><last>Ohtani</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>93–104</pages>
       <url hash="073af66f">Y04-1010</url>
       <doi>http://hdl.handle.net/2065/563</doi>
@@ -115,7 +115,7 @@
     <paper id="12">
       <title>Automatic Discovery of Telic and Agentive Roles from Corpus Data</title>
       <author><first>Ichiro</first><last>Yamada</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>115–126</pages>
       <url hash="7d08837c">Y04-1012</url>
       <doi>http://hdl.handle.net/2065/565</doi>
@@ -133,9 +133,9 @@
     </paper>
     <paper id="14">
       <title>Pruning False Unknown Words to Improve <fixed-case>C</fixed-case>hinese Word Segmentation</title>
-      <author><first>Chooi-Ling</first><last>Goh</last></author>
+      <author id="chooi-ling-goh"><first>Chooi-Ling</first><last>Goh</last></author>
       <author><first>Masayuki</first><last>Asahara</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>139–150</pages>
       <url hash="4142c21b">Y04-1014</url>
       <doi>http://hdl.handle.net/2065/567</doi>
@@ -143,7 +143,7 @@
     </paper>
     <paper id="15">
       <title>Ontology-based Prediction of Compound Relations : A Study Based on <fixed-case>SUMO</fixed-case></title>
-      <author><first>Jia-Fei</first><last>Hong</last></author>
+      <author id="jia-fei-hong"><first>Jia-Fei</first><last>Hong</last></author>
       <author><first>Xiang-Bing</first><last>Li</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <pages>151–160</pages>
@@ -158,8 +158,8 @@
       <author><first>Aoife</first><last>Cahill</last></author>
       <author><first>Rowena</first><last>Chan</last></author>
       <author><first>Ruth</first><last>O’Donovan</last></author>
-      <author><first>Adams</first><last>Bodomo</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="adams-b-bodomo"><first>Adams</first><last>Bodomo</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <pages>161–172</pages>
       <url hash="84c0844d">Y04-1016</url>
@@ -188,7 +188,7 @@
     <paper id="19">
       <title>Integrated Use of Internal and External Evidence in the Alignment of Multi-Word Named Entities</title>
       <author><first>Takeshi</first><last>Kutsumi</last></author>
-      <author><first>Takehiko</first><last>Yoshimi</last></author>
+      <author id="takehiko-yoshimi"><first>Takehiko</first><last>Yoshimi</last></author>
       <author><first>Katsunori</first><last>Kotani</last></author>
       <author><first>Ichiko</first><last>Sata</last></author>
       <author><first>Hitoshi</first><last>Isahara</last></author>
@@ -216,7 +216,7 @@
     </paper>
     <paper id="22">
       <title>Scalar Meanings of the Concessive (-to), the Contrastive Topic Marker (-nun) and -man ‘only’ in <fixed-case>K</fixed-case>orean (and <fixed-case>J</fixed-case>apanese)</title>
-      <author><first>Chungmin</first><last>Lee</last></author>
+      <author id="chungmin-lee"><first>Chungmin</first><last>Lee</last></author>
       <pages>217–226</pages>
       <url hash="fbbaf38b">Y04-1022</url>
       <doi>http://hdl.handle.net/2065/575</doi>
@@ -267,8 +267,8 @@
     </paper>
     <paper id="28">
       <title>Adaptive Word Sense Tagging on <fixed-case>C</fixed-case>hinese Corpus</title>
-      <author><first>Sue-jin</first><last>Ker</last></author>
-      <author><first>Jen-Nan</first><last>Chen</last></author>
+      <author id="sue-j-ker"><first>Sue-jin</first><last>Ker</last></author>
+      <author id="jen-nan-chen"><first>Jen-Nan</first><last>Chen</last></author>
       <pages>267–274</pages>
       <url hash="5a5bf680">Y04-1028</url>
       <doi>http://hdl.handle.net/2065/581</doi>
@@ -285,7 +285,7 @@
     </paper>
     <paper id="30">
       <title><fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish Parallel Corpus Construction and its Application</title>
-      <author><first>Baobao</first><last>Chang</last></author>
+      <author id="baobao-chang"><first>Baobao</first><last>Chang</last></author>
       <pages>283–290</pages>
       <url hash="f9695ef3">Y04-1030</url>
       <doi>http://hdl.handle.net/2065/583</doi>
diff --git a/data/xml/Y05.xml b/data/xml/Y05.xml
index fae7b8a6a3..02062c0c0c 100644
--- a/data/xml/Y05.xml
+++ b/data/xml/Y05.xml
@@ -50,7 +50,7 @@
       <title>A Framework for Data Management for the Online Volunteer Translators’ Aid System <fixed-case>QRL</fixed-case>ex</title>
       <author><first>Youcef</first><last>Bey</last></author>
       <author><first>Kyo</first><last>Kageura</last></author>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <pages>51–60</pages>
       <url hash="7267e4aa">Y05-1005</url>
       <doi>http://hdl.handle.net/2065/28988</doi>
@@ -60,7 +60,7 @@
       <title>From Text to Sign Language: Exploiting the Spatial and Motioning Dimension</title>
       <author><first>Ji-Won</first><last>Choi</last></author>
       <author><first>Hee-Jin</first><last>Lee</last></author>
-      <author><first>Jong C.</first><last>Park</last></author>
+      <author id="jong-c-park"><first>Jong C.</first><last>Park</last></author>
       <pages>61–69</pages>
       <url hash="bc8f87c9">Y05-1006</url>
       <doi>http://hdl.handle.net/2065/28986</doi>
@@ -68,7 +68,7 @@
     </paper>
     <paper id="7">
       <title><fixed-case>MARKET</fixed-case> Metaphors: <fixed-case>C</fixed-case>hinese, <fixed-case>E</fixed-case>nglish and <fixed-case>M</fixed-case>alay</title>
-      <author><first>Siaw-Fong</first><last>Chung</last></author>
+      <author id="siaw-fong-chung"><first>Siaw-Fong</first><last>Chung</last></author>
       <pages>71–81</pages>
       <url hash="d9e78edb">Y05-1007</url>
       <doi>http://hdl.handle.net/2065/29011</doi>
@@ -103,7 +103,7 @@
       <author><first>Makoto</first><last>Kondo</last></author>
       <author><first>Hideki</first><last>Asoh</last></author>
       <author><first>Akira</first><last>Takagi</last></author>
-      <author><first>Yukihiro</first><last>Ito</last></author>
+      <author id="yukihiro-itoh"><first>Yukihiro</first><last>Ito</last></author>
       <pages>107–118</pages>
       <url hash="129bd206">Y05-1010</url>
       <doi>http://hdl.handle.net/2065/28993</doi>
@@ -111,10 +111,10 @@
     </paper>
     <paper id="11">
       <title>A Study on Implementation of <fixed-case>S</fixed-case>outhern-<fixed-case>M</fixed-case>in <fixed-case>T</fixed-case>aiwanese Tone Sandhi System</title>
-      <author><last>Iun</last><first>Un-gian</first></author>
-      <author><last>Lau</last><first>Kiat-gak</first></author>
+      <author id="un-gian-iunn"><first>Un-gian</first><last>Iun</last></author>
+      <author id="kiat-gak-lau"><first>Kiat-gak</first><last>Lau</last></author>
       <author><last>Li</last><first>Sheng-an</first></author>
-      <author><last>Kao</last><first>Cheng-yan</first></author>
+      <author id="cheng-yan-kao"><first>Cheng-yan</first><last>Kao</last></author>
       <pages>119–130</pages>
       <url hash="aa6622d2">Y05-1011</url>
       <doi>http://hdl.handle.net/2065/29009</doi>
@@ -123,7 +123,7 @@
     <paper id="12">
       <title>Vowel Sound Disambiguation for Intelligible <fixed-case>K</fixed-case>orean Speech Synthesis</title>
       <author><first>Ho-Joon</first><last>Lee</last></author>
-      <author><first>Jong C.</first><last>Park</last></author>
+      <author id="jong-c-park"><first>Jong C.</first><last>Park</last></author>
       <pages>131–142</pages>
       <url hash="420b306c">Y05-1012</url>
       <doi>http://hdl.handle.net/2065/28995</doi>
@@ -153,7 +153,7 @@
     </paper>
     <paper id="15">
       <title>A Structured <fixed-case>SVM</fixed-case> Semantic Parser Augmented by Semantic Tagging with Conditional Random Field</title>
-      <author><first>Minh Le</first><last>Nguyen</last></author>
+      <author id="minh-le-nguyen"><first>Minh Le</first><last>Nguyen</last></author>
       <author><first>Akira</first><last>Shimazu</last></author>
       <author><first>Hieu Xuan</first><last>Phan</last></author>
       <pages>167–177</pages>
@@ -164,7 +164,7 @@
     <paper id="16">
       <title>Multiply Quantified Internally Headed Relative Clause in <fixed-case>J</fixed-case>apanese: A Skolem Term Based Approach</title>
       <author><first>Rui</first><last>Otake</last></author>
-      <author><first>Kei</first><last>Yoshimoto</last></author>
+      <author id="kei-yoshimoto"><first>Kei</first><last>Yoshimoto</last></author>
       <pages>179–189</pages>
       <url hash="9f588c89">Y05-1016</url>
       <doi>http://hdl.handle.net/2065/29015</doi>
@@ -174,7 +174,7 @@
       <title>A study on multiple interpretations of frequency adverbs in <fixed-case>J</fixed-case>apanese</title>
       <author><first>Tomoaki</first><last>Ozawa</last></author>
       <author><first>Hiroyuki</first><last>Nishina</last></author>
-      <author><first>Kei</first><last>Yoshimoto</last></author>
+      <author id="kei-yoshimoto"><first>Kei</first><last>Yoshimoto</last></author>
       <author><first>Shigeru</first><last>Sato</last></author>
       <pages>191–198</pages>
       <url hash="ed146069">Y05-1017</url>
@@ -187,7 +187,7 @@
       <author><first>Kazunori</first><last>Komatani</last></author>
       <author><first>Takashi</first><last>Miyata</last></author>
       <author><first>Koichi</first><last>Hashida</last></author>
-      <author><first>Hiroshi</first><last>Okuno</last></author>
+      <author id="hiroshi-g-okuno"><first>Hiroshi</first><last>Okuno</last></author>
       <pages>199–210</pages>
       <url hash="9822cc1f">Y05-1018</url>
       <doi>http://hdl.handle.net/2065/28996</doi>
@@ -233,8 +233,8 @@
     </paper>
     <paper id="23">
       <title>An Approach to Improve the Smoothing Process Based on Non-uniform Redistribution</title>
-      <author><first>Feng-Long</first><last>Huang</last></author>
-      <author><first>Ming-Shing</first><last>Yu</last></author>
+      <author id="feng-long-huang"><first>Feng-Long</first><last>Huang</last></author>
+      <author id="ming-shing-yu"><first>Ming-Shing</first><last>Yu</last></author>
       <pages>257–264</pages>
       <url hash="e0ac5fcf">Y05-1023</url>
       <doi>http://hdl.handle.net/2065/29016</doi>
diff --git a/data/xml/Y06.xml b/data/xml/Y06.xml
index 8d5cfaeb92..2d7160b70e 100644
--- a/data/xml/Y06.xml
+++ b/data/xml/Y06.xml
@@ -14,7 +14,7 @@
     </frontmatter>
     <paper id="1">
       <title>Which Is Essential for <fixed-case>C</fixed-case>hinese Word Segmentation: Character versus Word</title>
-      <author><first>Chang-Ning</first><last>Huang</last></author>
+      <author id="changning-huang"><first>Chang-Ning</first><last>Huang</last></author>
       <author><first>Hai</first><last>Zhao</last></author>
       <pages>1–12</pages>
       <url hash="216a6c5e">Y06-1001</url>
@@ -31,7 +31,7 @@
     </paper>
     <paper id="3">
       <title>Towards a Neuro-Cognitive Model of Human Sentence Processing</title>
-      <author><first>Kei</first><last>Yoshimoto</last></author>
+      <author id="kei-yoshimoto"><first>Kei</first><last>Yoshimoto</last></author>
       <author><first>Shigeru</first><last>Sato</last></author>
       <pages>21–27</pages>
       <url hash="c43cebe1">Y06-1003</url>
@@ -79,7 +79,7 @@
       <title>An Information Retrieval Model Based On Word Concept</title>
       <author><first>Chen</first><last>Wu</last></author>
       <author><first>Quan</first><last>Zhang</last></author>
-      <author><first>Xiangfeng</first><last>Wei</last></author>
+      <author id="xiangfeng-wei"><first>Xiangfeng</first><last>Wei</last></author>
       <pages>56–63</pages>
       <url hash="e0952df3">Y06-1008</url>
       <doi>http://hdl.handle.net/2065/29040</doi>
@@ -87,7 +87,7 @@
     </paper>
     <paper id="9">
       <title>Discriminative Reranking for Spelling Correction</title>
-      <author id="yang-zhang"><first>Yang</first><last>Zhang</last></author>
+      <author><first>Yang</first><last>Zhang</last></author>
       <author><first>Pilian</first><last>He</last></author>
       <author><first>Wei</first><last>Xiang</last></author>
       <author><first>Mu</first><last>Li</last></author>
@@ -120,9 +120,9 @@
     <paper id="12">
       <title>Effective Tag Set Selection in <fixed-case>C</fixed-case>hinese Word Segmentation via Conditional Random Field Modeling</title>
       <author><first>Hai</first><last>Zhao</last></author>
-      <author><first>Chang-Ning</first><last>Huang</last></author>
+      <author id="changning-huang"><first>Chang-Ning</first><last>Huang</last></author>
       <author><first>Mu</first><last>Li</last></author>
-      <author><first>Bao-Liang</first><last>Lu</last></author>
+      <author id="bao-liang-lu"><first>Bao-Liang</first><last>Lu</last></author>
       <pages>87–94</pages>
       <url hash="82db7621">Y06-1012</url>
       <doi>http://hdl.handle.net/2065/29030</doi>
@@ -141,9 +141,9 @@
     <paper id="14">
       <title>A Comparative Study of the Effect of Word Segmentation On <fixed-case>C</fixed-case>hinese Terminology Extraction</title>
       <author><first>Luning</first><last>Ji</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <author><first>Wenjie</first><last>Li</last></author>
-      <author><first>YiRong</first><last>Chen</last></author>
+      <author id="yi-rong-chen"><first>YiRong</first><last>Chen</last></author>
       <pages>101–108</pages>
       <url hash="77f0bee0">Y06-1014</url>
       <doi>http://hdl.handle.net/2065/29021</doi>
@@ -152,7 +152,7 @@
     <paper id="15">
       <title><fixed-case>TC</fixed-case>tract-A Collocation Extraction Approach for Noun Phrases Using Shallow Parsing Rules and Statistic Models</title>
       <author><first>Wan Yin</first><last>Li</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <author><first>James</first><last>Liu</last></author>
       <pages>109–116</pages>
       <url hash="e63f81fd">Y06-1015</url>
@@ -225,7 +225,7 @@
       <title>The Analysis of <fixed-case>C</fixed-case>hinese Sentence Semantic Chunk Share Based on <fixed-case>HNC</fixed-case> Theory</title>
       <author><first>Quan</first><last>Zhang</last></author>
       <author><first>Chen</first><last>Wu</last></author>
-      <author><first>Xiangfeng</first><last>Wei</last></author>
+      <author id="xiangfeng-wei"><first>Xiangfeng</first><last>Wei</last></author>
       <pages>175–182</pages>
       <url hash="286e0143">Y06-1023</url>
       <doi>http://hdl.handle.net/2065/29068</doi>
@@ -233,7 +233,7 @@
     </paper>
     <paper id="24">
       <title>Using <fixed-case>C</fixed-case>hinese <fixed-case>G</fixed-case>igaword Corpus and <fixed-case>C</fixed-case>hinese Word Sketch in linguistic Research</title>
-      <author><first>Jia-Fei</first><last>Hong</last></author>
+      <author id="jia-fei-hong"><first>Jia-Fei</first><last>Hong</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <pages>183–190</pages>
       <url hash="1b78a98d">Y06-1024</url>
@@ -259,9 +259,9 @@
     <paper id="27">
       <title>Knowledge-Rich Approach to Automatic Grammatical Information Acquisition: Enriching <fixed-case>C</fixed-case>hinese <fixed-case>S</fixed-case>ketch <fixed-case>E</fixed-case>ngine with a Lexical Grammar</title>
       <author><first>Chu-Ren</first><last>Huang</last></author>
-      <author><first>Wei-Yun</first><last>Ma</last></author>
+      <author id="wei-yun-ma"><first>Wei-Yun</first><last>Ma</last></author>
       <author><first>Yi-Ching</first><last>Wu</last></author>
-      <author><first>Chih-Ming</first><last>Chiu</last></author>
+      <author id="chih-ming-chiu"><first>Chih-Ming</first><last>Chiu</last></author>
       <pages>206–214</pages>
       <url hash="8df35c7c">Y06-1027</url>
       <doi>http://hdl.handle.net/2065/29027</doi>
@@ -269,11 +269,11 @@
     </paper>
     <paper id="28">
       <title><fixed-case>V</fixed-case>ietnamese Word Segmentation with <fixed-case>CRF</fixed-case>s and <fixed-case>SVM</fixed-case>s: An Investigation</title>
-      <author><first>Cam-Tu</first><last>Nguyen</last></author>
+      <author id="cam-tu-nguyen"><first>Cam-Tu</first><last>Nguyen</last></author>
       <author><first>Trung-Kien</first><last>Nguyen</last></author>
       <author><first>Xuan-Hieu</first><last>Phan</last></author>
-      <author><first>Le-Minh</first><last>Nguyen</last></author>
-      <author><first>Quang-Thuy</first><last>Ha</last></author>
+      <author id="minh-le-nguyen"><first>Le-Minh</first><last>Nguyen</last></author>
+      <author id="quang-thuy-ha"><first>Quang-Thuy</first><last>Ha</last></author>
       <pages>215–222</pages>
       <url hash="121ba9d0">Y06-1028</url>
       <doi>http://hdl.handle.net/2065/29084</doi>
@@ -281,7 +281,7 @@
     </paper>
     <paper id="29">
       <title>A language-independent method for the alignement of parallel corpora</title>
-      <author><first>Thi Minh Huyền</first><last>Nguyễn</last></author>
+      <author id="thi-minh-huyen-nguyen"><first>Thi Minh Huyền</first><last>Nguyễn</last></author>
       <author><first>Mathias</first><last>Rossignol</last></author>
       <pages>223–230</pages>
       <url hash="7732e250">Y06-1029</url>
@@ -318,7 +318,7 @@
     </paper>
     <paper id="33">
       <title>Research on Hypothesizing and Sorting the Eg Candidates in <fixed-case>C</fixed-case>hinese Semantic Parsing</title>
-      <author><first>XiangFeng</first><last>Wei</last></author>
+      <author id="xiangfeng-wei"><first>XiangFeng</first><last>Wei</last></author>
       <author><first>Quan</first><last>Zhang</last></author>
       <pages>250–256</pages>
       <url hash="a001a7ef">Y06-1033</url>
@@ -328,8 +328,8 @@
     <paper id="34">
       <title>Mining the Relation between Sentiment Expression and Target Using Dependency of Words</title>
       <author><first>Zhongchao</first><last>Fei</last></author>
-      <author><first>Xuanjing</first><last>Huang</last></author>
-      <author><first>Lide</first><last>Wu</last></author>
+      <author id="xuan-jing-huang"><first>Xuanjing</first><last>Huang</last></author>
+      <author id="lide-wu"><first>Lide</first><last>Wu</last></author>
       <pages>257–264</pages>
       <url hash="06e6cfe7">Y06-1034</url>
       <doi>http://hdl.handle.net/2065/29079</doi>
@@ -348,7 +348,7 @@
     <paper id="36">
       <title>A Constraint-based Morphological Analyzer for Concatenative and Non-concatenative Morphology</title>
       <author><first>Farrah Cherry</first><last>Fortes-Galvan</last></author>
-      <author><first>Rachel Edita</first><last>Roxas</last></author>
+      <author id="rachel-edita-roxas"><first>Rachel Edita</first><last>Roxas</last></author>
       <pages>273–279</pages>
       <url hash="c2d298ae">Y06-1036</url>
       <doi>http://hdl.handle.net/2065/29081</doi>
@@ -386,7 +386,7 @@
       <title>An Activation-based Sentence Processing Model of <fixed-case>E</fixed-case>nglish</title>
       <author><first>Kei</first><last>Takahashi</last></author>
       <author><first>Kiyoshi</first><last>Ishikawa</last></author>
-      <author><first>Kei</first><last>Yoshimoto</last></author>
+      <author id="kei-yoshimoto"><first>Kei</first><last>Yoshimoto</last></author>
       <pages>303–310</pages>
       <url hash="1edfed0e">Y06-1040</url>
       <doi>http://hdl.handle.net/2065/29018</doi>
@@ -395,7 +395,7 @@
     <paper id="41">
       <title>Platform for Full-Syntax Grammar Development Using Meta-grammar Constructs</title>
       <author><first>Aleš</first><last>Horák</last></author>
-      <author><first>Vladimír</first><last>Kadlec</last></author>
+      <author id="vladimir-kadlec"><first>Vladimír</first><last>Kadlec</last></author>
       <pages>311–318</pages>
       <url hash="887a9bad">Y06-1041</url>
       <doi>http://hdl.handle.net/2065/29045</doi>
@@ -413,7 +413,7 @@
     </paper>
     <paper id="43">
       <title>Using the <fixed-case>S</fixed-case>wadesh list for creating a simple common taxonomy</title>
-      <author><first>Laurent</first><last>Prévot</last></author>
+      <author id="laurent-prevot"><first>Laurent</first><last>Prévot</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <author><first>I-Li</first><last>Su</last></author>
       <pages>324–331</pages>
@@ -423,11 +423,11 @@
     </paper>
     <paper id="44">
       <title>The Construction of a Dictionary for a Two-layer <fixed-case>C</fixed-case>hinese Morphological Analyzer</title>
-      <author><first>Chooi-Ling</first><last>Goh</last></author>
-      <author><first>Jia</first><last>Lü</last></author>
+      <author id="chooi-ling-goh"><first>Chooi-Ling</first><last>Goh</last></author>
+      <author id="jia-lu"><first>Jia</first><last>Lü</last></author>
       <author><first>Yuchang</first><last>Cheng</last></author>
       <author><first>Masayuki</first><last>Asahara</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>332–340</pages>
       <url hash="b0b53dd5">Y06-1044</url>
       <doi>http://hdl.handle.net/2065/29024</doi>
@@ -435,7 +435,7 @@
     </paper>
     <paper id="45">
       <title>A Natural Language Model of Computing with Words in Web Pages</title>
-      <author><first>Ze-yu</first><last>Zheng</last></author>
+      <author id="ze-yu-zheng"><first>Ze-yu</first><last>Zheng</last></author>
       <author><first>Ping</first><last>Zhang</last></author>
       <pages>341–346</pages>
       <url hash="dd5752bd">Y06-1045</url>
@@ -472,9 +472,9 @@
     </paper>
     <paper id="49">
       <title><fixed-case>H</fixed-case>ow<fixed-case>N</fixed-case>et Based <fixed-case>C</fixed-case>hinese Question Classification</title>
-      <author><first>Dongfeng</first><last>Cai</last></author>
-      <author><first>Jingguang</first><last>Sun</last></author>
-      <author><first>Guiping</first><last>Zhang</last></author>
+      <author id="dongfeng-cai"><first>Dongfeng</first><last>Cai</last></author>
+      <author id="jingguang-sun"><first>Jingguang</first><last>Sun</last></author>
+      <author id="guiping-zhang"><first>Guiping</first><last>Zhang</last></author>
       <author><first>Dexin</first><last>Lv</last></author>
       <author><first>Yanju</first><last>Dong</last></author>
       <author><first>Yan</first><last>Song</last></author>
@@ -497,7 +497,7 @@
     <paper id="51">
       <title>Automatic Target Word Disambiguation Using Syntactic Relationships</title>
       <author><first>Ebony</first><last>Domingo</last></author>
-      <author><first>Rachel Edita</first><last>Roxas</last></author>
+      <author id="rachel-edita-roxas"><first>Rachel Edita</first><last>Roxas</last></author>
       <pages>374–377</pages>
       <url hash="5815d268">Y06-1051</url>
       <doi>http://hdl.handle.net/2065/29057</doi>
@@ -507,7 +507,7 @@
       <title>Semantic Representation and Composition for Unknown Compounds in <fixed-case>E</fixed-case>-<fixed-case>H</fixed-case>ow<fixed-case>N</fixed-case>et</title>
       <author><first>Yueh-Yin</first><last>Shih</last></author>
       <author><first>Shu-Ling</first><last>Huang</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <pages>378–381</pages>
       <url hash="775f7302">Y06-1052</url>
       <doi>http://hdl.handle.net/2065/29039</doi>
@@ -526,7 +526,7 @@
       <title>Learning Translation Rules for a Bidirectional <fixed-case>E</fixed-case>nglish-<fixed-case>F</fixed-case>ilipino Machine Translator</title>
       <author><first>Michelle Wendy</first><last>Tan</last></author>
       <author><first>Bryan Anthony</first><last>Hong</last></author>
-      <author><first>Danniel Liwanag</first><last>Alcantara</last></author>
+      <author id="danniel-liwanag-alcantara"><first>Danniel Liwanag</first><last>Alcantara</last></author>
       <author><first>Amiel</first><last>Perez</last></author>
       <author><first>Lawrence</first><last>Tan</last></author>
       <pages>386–389</pages>
@@ -536,9 +536,9 @@
     </paper>
     <paper id="55">
       <title>A Visualization method for machine translation evaluation results</title>
-      <author><first>Jian-Min</first><last>Yao</last></author>
-      <author><first>Yun-Qian</first><last>Qu</last></author>
-      <author><first>Qiao-Ming</first><last>Zhu</last></author>
+      <author id="jianmin-yao"><first>Jian-Min</first><last>Yao</last></author>
+      <author id="yun-qian-qu"><first>Yun-Qian</first><last>Qu</last></author>
+      <author id="qiaoming-zhu"><first>Qiao-Ming</first><last>Zhu</last></author>
       <author><first>Jing</first><last>Zhang</last></author>
       <pages>390–393</pages>
       <url hash="e67e2903">Y06-1055</url>
@@ -555,11 +555,11 @@
     </paper>
     <paper id="57">
       <title>Research on concept-sememe tree and semantic relevance computation</title>
-      <author><first>GuiPing</first><last>Zhang</last></author>
+      <author id="guiping-zhang"><first>GuiPing</first><last>Zhang</last></author>
       <author><first>Chao</first><last>Yu</last></author>
-      <author><first>DongFeng</first><last>Cai</last></author>
+      <author id="dongfeng-cai"><first>DongFeng</first><last>Cai</last></author>
       <author><first>Yan</first><last>Song</last></author>
-      <author><first>JingGuang</first><last>Sun</last></author>
+      <author id="jingguang-sun"><first>JingGuang</first><last>Sun</last></author>
       <pages>398–402</pages>
       <url hash="872c2263">Y06-1057</url>
       <doi>http://hdl.handle.net/2065/29052</doi>
@@ -687,7 +687,7 @@
       <title>Translation &amp; Transform Algorithm of Query Sentence in Cross-Language Information Retrieval</title>
       <author><first>Xiao-fei</first><last>Zhang</last></author>
       <author><first>Ke-liang</first><last>Zhang</last></author>
-      <author><first>He-yan</first><last>Huang</last></author>
+      <author id="he-yan-huang"><first>He-yan</first><last>Huang</last></author>
       <pages>467–470</pages>
       <url hash="cac50185">Y06-1071</url>
       <doi>http://hdl.handle.net/2065/29062</doi>
diff --git a/data/xml/Y07.xml b/data/xml/Y07.xml
index c94c4570cf..02c84a25ce 100644
--- a/data/xml/Y07.xml
+++ b/data/xml/Y07.xml
@@ -14,7 +14,7 @@
     </frontmatter>
     <paper id="1">
       <title>Scalable Deep Linguistic Processing: Mind the Lexical Gap</title>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>3–12</pages>
       <url hash="0b7d5908">Y07-1001</url>
       <doi>http://hdl.handle.net/2065/29115</doi>
@@ -22,7 +22,7 @@
     </paper>
     <paper id="2">
       <title>The Semantics of Semantic Annotation</title>
-      <author><first>Harry</first><last>Bunt</last></author>
+      <author id="harry-bunt"><first>Harry</first><last>Bunt</last></author>
       <pages>13–28</pages>
       <url hash="5af551d8">Y07-1002</url>
       <doi>http://hdl.handle.net/2065/29125</doi>
@@ -30,7 +30,7 @@
     </paper>
     <paper id="3">
       <title>Deep Lexical Semantics: The Ontological Ascent</title>
-      <author><first>Jerry R.</first><last>Hobbs</last></author>
+      <author id="jerry-r-hobbs"><first>Jerry R.</first><last>Hobbs</last></author>
       <pages>29–41</pages>
       <url hash="a087a905">Y07-1003</url>
       <doi>http://hdl.handle.net/2065/29124</doi>
@@ -74,7 +74,7 @@
       <title><fixed-case>BEYT</fixed-case>rans: A Free Online Collaborative <fixed-case>W</fixed-case>iki-Based <fixed-case>CAT</fixed-case> Environment Designed for Online Translation Communities</title>
       <author><first>Youcef</first><last>Bey</last></author>
       <author><first>Kyo</first><last>Kageura</last></author>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <pages>87–94</pages>
       <url hash="86d36fae">Y07-1008</url>
       <doi>http://hdl.handle.net/2065/29117</doi>
@@ -91,7 +91,7 @@
     <paper id="10">
       <title>Customizing an <fixed-case>E</fixed-case>nglish-<fixed-case>K</fixed-case>orean Machine Translation System for Patent Translation</title>
       <author><first>Sung-Kwon</first><last>Choi</last></author>
-      <author><first>Young-Gil</first><last>Kim</last></author>
+      <author id="young-gil-kim"><first>Young-Gil</first><last>Kim</last></author>
       <pages>105–114</pages>
       <url hash="e22c4759">Y07-1010</url>
       <doi>http://hdl.handle.net/2065/29090</doi>
@@ -107,7 +107,7 @@
     </paper>
     <paper id="12">
       <title>Computing Thresholds of Linguistic Saliency</title>
-      <author><first>Siaw-Fong</first><last>Chung</last></author>
+      <author id="siaw-fong-chung"><first>Siaw-Fong</first><last>Chung</last></author>
       <author><first>Kathleen</first><last>Ahrens</last></author>
       <author><first>Chung-Ping</first><last>Cheng</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
@@ -119,9 +119,9 @@
     </paper>
     <paper id="13">
       <title>Modality and Modal Sense Representation in <fixed-case>E</fixed-case>-<fixed-case>H</fixed-case>ow<fixed-case>N</fixed-case>et</title>
-      <author><first>You-Shan</first><last>Chung</last></author>
+      <author id="you-shan-chung"><first>You-Shan</first><last>Chung</last></author>
       <author><first>Shu-Ling</first><last>Huang</last></author>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <pages>136–145</pages>
       <url hash="9dd93741">Y07-1013</url>
       <doi>http://hdl.handle.net/2065/29096</doi>
@@ -129,8 +129,8 @@
     </paper>
     <paper id="14">
       <title><fixed-case>A</fixed-case>uto<fixed-case>C</fixed-case>or: A Query Based Automatic Acquisition of Corpora of Closely-related Languages</title>
-      <author><first>Davis Muhajereen D.</first><last>Dimalen</last></author>
-      <author><first>Rachel Edita O.</first><last>Roxas</last></author>
+      <author id="davis-muhajereen-d-dimalen"><first>Davis Muhajereen D.</first><last>Dimalen</last></author>
+      <author id="rachel-edita-roxas"><first>Rachel Edita O.</first><last>Roxas</last></author>
       <pages>146–154</pages>
       <url hash="3b264725">Y07-1014</url>
       <doi>http://hdl.handle.net/2065/29141</doi>
@@ -138,7 +138,7 @@
     </paper>
     <paper id="15">
       <title>The Polysemy of Da3: An ontology-based lexical semantic study</title>
-      <author><first>Jia-Fei</first><last>Hong</last></author>
+      <author id="jia-fei-hong"><first>Jia-Fei</first><last>Hong</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <author><first>Kathleen</first><last>Ahrens</last></author>
       <pages>155–162</pages>
@@ -157,7 +157,7 @@
     <paper id="17">
       <title>Time-moving Metaphors and Ego-moving Metaphors: Which Is Better Comprehended by <fixed-case>T</fixed-case>aiwanese?</title>
       <author><first>Hsin-mei May</first><last>Huang</last></author>
-      <author><first>Ching-yu Shelley</first><last>Hsieh</last></author>
+      <author id="shelley-ching-yu-hsieh"><first>Ching-yu Shelley</first><last>Hsieh</last></author>
       <pages>173–181</pages>
       <url hash="65e306a3">Y07-1017</url>
       <doi>http://hdl.handle.net/2065/29099</doi>
@@ -224,7 +224,7 @@
     <paper id="25">
       <title>Transition and Parsing State and Incrementality in Dynamic Syntax</title>
       <author><first>Masahiro</first><last>Kobayashi</last></author>
-      <author><first>Kei</first><last>Yoshimoto</last></author>
+      <author id="kei-yoshimoto"><first>Kei</first><last>Yoshimoto</last></author>
       <pages>249–258</pages>
       <url hash="4f3b89df">Y07-1025</url>
       <doi>http://hdl.handle.net/2065/29142</doi>
@@ -233,7 +233,7 @@
     <paper id="26">
       <title>A Focus Account for Contrastive Reduplication: Prototypicality and Contrastivity</title>
       <author><first>Binna</first><last>Lee</last></author>
-      <author><first>Chungmin</first><last>Lee</last></author>
+      <author id="chungmin-lee"><first>Chungmin</first><last>Lee</last></author>
       <pages>259–267</pages>
       <url hash="d4437e2e">Y07-1026</url>
       <doi>http://hdl.handle.net/2065/29089</doi>
@@ -249,7 +249,7 @@
     </paper>
     <paper id="28">
       <title>Implementation of Presence and Absence of Blocking Effects: A Categorial Grammar Approach to <fixed-case>C</fixed-case>hinese and <fixed-case>K</fixed-case>orean</title>
-      <author><first>Yong-hun</first><last>Lee</last></author>
+      <author id="yong-hun-lee"><first>Yong-hun</first><last>Lee</last></author>
       <pages>275–284</pages>
       <url hash="b18896a2">Y07-1028</url>
       <doi>http://hdl.handle.net/2065/29111</doi>
@@ -257,7 +257,7 @@
     </paper>
     <paper id="29">
       <title>Mining Parallel Text from the Web based on Sentence Alignment</title>
-      <author id="bo-li"><first>Bo</first><last>Li</last></author>
+      <author><first>Bo</first><last>Li</last></author>
       <author><first>Juan</first><last>Liu</last></author>
       <author><first>Huili</first><last>Zhu</last></author>
       <pages>285–292</pages>
@@ -275,7 +275,7 @@
     </paper>
     <paper id="31">
       <title>Using Non-Local Features to Improve Named Entity Recognition Recall</title>
-      <author><first>Xinnian</first><last>Mao</last></author>
+      <author id="xinnian-mao"><first>Xinnian</first><last>Mao</last></author>
       <author><first>Wei</first><last>Xu</last></author>
       <author><first>Yuan</first><last>Dong</last></author>
       <author><first>Saike</first><last>He</last></author>
@@ -288,7 +288,7 @@
     <paper id="32">
       <title>Analysis of Indirect Uses of Interrogative Sentences Carrying Anger</title>
       <author><first>Hye-Jin</first><last>Min</last></author>
-      <author><first>Jong C.</first><last>Park</last></author>
+      <author id="jong-c-park"><first>Jong C.</first><last>Park</last></author>
       <pages>311–320</pages>
       <url hash="30bc5dac">Y07-1032</url>
       <doi>http://hdl.handle.net/2065/29094</doi>
@@ -299,7 +299,7 @@
       <author><first>Maki</first><last>Miyake</last></author>
       <author><first>Terry</first><last>Joyce</last></author>
       <author><first>Jaeyoung</first><last>Jung</last></author>
-      <author><first>Hiroyuki</first><last>Akama</last></author>
+      <author id="hiroyuki-akama"><first>Hiroyuki</first><last>Akama</last></author>
       <pages>321–329</pages>
       <url hash="cc6e61af">Y07-1033</url>
       <doi>http://hdl.handle.net/2065/29088</doi>
@@ -327,7 +327,7 @@
     </paper>
     <paper id="36">
       <title>Acquisition of Named-Entity-Related Relations for Searching</title>
-      <author><first>Tri-Thanh</first><last>Nguyen</last></author>
+      <author id="tri-thanh-nguyen"><first>Tri-Thanh</first><last>Nguyen</last></author>
       <author><first>Akira</first><last>Shimazu</last></author>
       <pages>349–357</pages>
       <url hash="1f67f705">Y07-1036</url>
@@ -345,7 +345,7 @@
     <paper id="38">
       <title>Case, Coordination, and Information Structure in <fixed-case>J</fixed-case>apanese</title>
       <author><first>Akira</first><last>Otani</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>365–374</pages>
       <url hash="da69c8f9">Y07-1038</url>
       <doi>http://hdl.handle.net/2065/29104</doi>
@@ -354,7 +354,7 @@
     <paper id="39">
       <title>Automatic Acquisition of <fixed-case>L</fixed-case>exical-<fixed-case>F</fixed-case>unctional <fixed-case>G</fixed-case>rammar Resources from a <fixed-case>J</fixed-case>apanese Dependency Corpus</title>
       <author><first>Masanori</first><last>Oya</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <pages>375–384</pages>
       <url hash="938bca0d">Y07-1039</url>
       <doi>http://hdl.handle.net/2065/29140</doi>
@@ -364,8 +364,8 @@
       <title>Semi-Automatic Annotation Tool to Build Large Dependency Tree-Tagged Corpus</title>
       <author><first>Eun-Jin</first><last>Park</last></author>
       <author><first>Jae-Hoon</first><last>Kim</last></author>
-      <author><first>Chang-Hyun</first><last>Kim</last></author>
-      <author><first>Young-Kill</first><last>Kim</last></author>
+      <author id="chang-hyun-kim"><first>Chang-Hyun</first><last>Kim</last></author>
+      <author id="young-gil-kim"><first>Young-Kill</first><last>Kim</last></author>
       <pages>385–393</pages>
       <url hash="9c98dfb3">Y07-1040</url>
       <doi>http://hdl.handle.net/2065/29100</doi>
@@ -373,7 +373,7 @@
     </paper>
     <paper id="41">
       <title>Multiple Sluicing in <fixed-case>E</fixed-case>nglish</title>
-      <author><first>Myung-Kwan</first><last>Park</last></author>
+      <author id="myung-kwan-park"><first>Myung-Kwan</first><last>Park</last></author>
       <author><first>Jung-Min</first><last>Kang</last></author>
       <pages>394–404</pages>
       <url hash="b0a85b98">Y07-1041</url>
@@ -390,8 +390,8 @@
     </paper>
     <paper id="43">
       <title>Relation Extraction Using Convolution Tree Kernel Expanded with Entity Features</title>
-      <author><first>Longhua</first><last>Qian</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="longhua-qian"><first>Longhua</first><last>Qian</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <author><first>Qiaomin</first><last>Zhu</last></author>
       <author><first>Peide</first><last>Qian</last></author>
       <pages>415–421</pages>
@@ -469,8 +469,8 @@
       <author><first>Yu-Chun</first><last>Wang</last></author>
       <author><first>Yi-Hsun</first><last>Lee</last></author>
       <author><first>Chu-Cheng</first><last>Lin</last></author>
-      <author><first>Tzong-Han Richard</first><last>Tsai</last></author>
-      <author><first>Wen-Lian</first><last>Hsu</last></author>
+      <author id="richard-tzong-han-tsai"><first>Tzong-Han Richard</first><last>Tsai</last></author>
+      <author id="wen-lian-hsu"><first>Wen-Lian</first><last>Hsu</last></author>
       <pages>489–497</pages>
       <url hash="4def64c3">Y07-1051</url>
       <doi>http://hdl.handle.net/2065/29139</doi>
@@ -478,7 +478,7 @@
     </paper>
     <paper id="52">
       <title>Research on a Model of Extracting Persons’ Information Based on Statistic Method and Conceptual Knowledge</title>
-      <author><first>XiangFeng</first><last>Wei</last></author>
+      <author id="xiangfeng-wei"><first>XiangFeng</first><last>Wei</last></author>
       <author><first>Ning</first><last>Jia</last></author>
       <author><first>Quan</first><last>Zhang</last></author>
       <author><first>HanFen</first><last>Zang</last></author>
@@ -499,7 +499,7 @@
     </paper>
     <paper id="54">
       <title>Distal Demonstrative Hitlo in <fixed-case>T</fixed-case>aiwanese <fixed-case>S</fixed-case>outhern <fixed-case>M</fixed-case>in</title>
-      <author><first>Yi-jing</first><last>Zhao</last></author>
+      <author id="yi-jing-zhao"><first>Yi-jing</first><last>Zhao</last></author>
       <pages>522–530</pages>
       <url hash="1cd67d2d">Y07-1054</url>
       <doi>http://hdl.handle.net/2065/29110</doi>
@@ -507,7 +507,7 @@
     </paper>
     <paper id="55">
       <title>Children’s Acquisition of Demonstrative Pronouns in <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese</title>
-      <author><first>Yi-jing</first><last>Zhao</last></author>
+      <author id="yi-jing-zhao"><first>Yi-jing</first><last>Zhao</last></author>
       <pages>532–541</pages>
       <url hash="c68a8409">Y07-1055</url>
       <doi>http://hdl.handle.net/2065/29095</doi>
@@ -515,7 +515,7 @@
     </paper>
     <paper id="56">
       <title>Ambiguity of Reflexives and Case Extension</title>
-      <author><first>Richard</first><last>Zuber</last></author>
+      <author id="richard-zuber"><first>Richard</first><last>Zuber</last></author>
       <pages>542–547</pages>
       <url hash="ea171890">Y07-1056</url>
       <doi>http://hdl.handle.net/2065/29123</doi>
diff --git a/data/xml/Y08.xml b/data/xml/Y08.xml
index e9dbbb7050..c6de955635 100644
--- a/data/xml/Y08.xml
+++ b/data/xml/Y08.xml
@@ -3,7 +3,7 @@
   <volume id="1" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 22nd Pacific Asia Conference on Language, Information and Computation</booktitle>
-      <editor><first>Rachel Edita O.</first><last>Roxas</last></editor>
+      <editor id="rachel-edita-roxas"><first>Rachel Edita O.</first><last>Roxas</last></editor>
       <publisher>De La Salle University, Manila, Philippines</publisher>
       <address>The University of the Philippines Visayas Cebu College, Cebu City, Philippines</address>
       <month>November</month>
@@ -30,7 +30,7 @@
     </paper>
     <paper id="3">
       <title>Scalar Implicatures: Pragmatic Inferences or Grammar?</title>
-      <author><first>Chungmin</first><last>Lee</last></author>
+      <author id="chungmin-lee"><first>Chungmin</first><last>Lee</last></author>
       <pages>30–45</pages>
       <url hash="103fb8d7">Y08-1003</url>
       <bibkey>lee-2008-scalar</bibkey>
@@ -40,7 +40,7 @@
       <author><first>Haizhou</first><last>Li</last></author>
       <author><first>Bin</first><last>Ma</last></author>
       <author><first>Kong-Aik</first><last>Lee</last></author>
-      <author><first>Khe-Chai</first><last>Sim</last></author>
+      <author id="khe-chai-sim"><first>Khe-Chai</first><last>Sim</last></author>
       <author><first>Hanwu</first><last>Sun</last></author>
       <author><first>Rong</first><last>Tong</last></author>
       <author><first>Donglai</first><last>Zhu</last></author>
@@ -58,7 +58,7 @@
     </paper>
     <paper id="6">
       <title>Some Challenges of Advanced Question-Answering: an Experiment with How-to Questions</title>
-      <author><first>Patrick</first><last>Saint-Dizier</last></author>
+      <author id="patrick-saint-dizier"><first>Patrick</first><last>Saint-Dizier</last></author>
       <pages>65–73</pages>
       <url hash="01d92d74">Y08-1006</url>
       <bibkey>saint-dizier-2008-challenges</bibkey>
@@ -87,7 +87,7 @@
     </paper>
     <paper id="10">
       <title>Constituent Structure for <fixed-case>F</fixed-case>ilipino: Induction through Probabilistic Approaches</title>
-      <author><first>Danniel</first><last>Alcantara</last></author>
+      <author id="danniel-liwanag-alcantara"><first>Danniel</first><last>Alcantara</last></author>
       <author><first>Allan</first><last>Borra</last></author>
       <pages>113–122</pages>
       <url hash="dddf48fc">Y08-1010</url>
@@ -114,7 +114,7 @@
       <author><first>Mary Grace</first><last>Lim</last></author>
       <author><first>Patricia Bea</first><last>Perez</last></author>
       <author><first>Joanna Patricia</first><last>Reyes</last></author>
-      <author><first>Nathalie Rose</first><last>Lim</last></author>
+      <author id="nathalie-rose-lim"><first>Nathalie Rose</first><last>Lim</last></author>
       <pages>141–150</pages>
       <url hash="56cd8478">Y08-1013</url>
       <bibkey>chen-etal-2008-natural</bibkey>
@@ -134,7 +134,7 @@
       <author><first>Ki-Young</first><last>Lee</last></author>
       <author><first>Yoon-Hyung</first><last>Roh</last></author>
       <author><first>Oh-Woog</first><last>Kwon</last></author>
-      <author><first>Young-Gil</first><last>Kim</last></author>
+      <author id="young-gil-kim"><first>Young-Gil</first><last>Kim</last></author>
       <pages>161–168</pages>
       <url hash="23e8cbeb">Y08-1015</url>
       <bibkey>choi-etal-2008-overcome</bibkey>
@@ -142,7 +142,7 @@
     <paper id="16">
       <title>Multi-Engine Approach for Named Entity Recognition in <fixed-case>B</fixed-case>engali</title>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>169–178</pages>
       <url hash="845e92ab">Y08-1016</url>
       <bibkey>ekbal-bandyopadhyay-2008-multi</bibkey>
@@ -182,7 +182,7 @@
     <paper id="21">
       <title><fixed-case>K</fixed-case>orean Parsing Based on the Applicative <fixed-case>C</fixed-case>ombinatory <fixed-case>C</fixed-case>ategorial <fixed-case>G</fixed-case>rammar</title>
       <author><first>Juyeon</first><last>Kang</last></author>
-      <author><first>Jean-Pierre</first><last>Desclés</last></author>
+      <author id="jean-pierre-descles"><first>Jean-Pierre</first><last>Desclés</last></author>
       <pages>215–224</pages>
       <url hash="159a70dc">Y08-1021</url>
       <bibkey>kang-descles-2008-korean</bibkey>
@@ -198,7 +198,7 @@
     </paper>
     <paper id="23">
       <title>A Preliminary Study on the Impact of Lexical Concreteness on Word Sense Disambiguation</title>
-      <author><first>Oi Yee</first><last>Kwong</last></author>
+      <author id="olivia-o-y-kwong"><first>Oi Yee</first><last>Kwong</last></author>
       <pages>235–244</pages>
       <url hash="2989a699">Y08-1023</url>
       <bibkey>kwong-2008-preliminary</bibkey>
@@ -214,7 +214,7 @@
     <paper id="25">
       <title>An Improved Corpus Comparison Approach to Domain Specific Term Recognition</title>
       <author><first>Xiaoyue</first><last>Liu</last></author>
-      <author><first>Chunyu</first><last>Kit</last></author>
+      <author id="chunyu-kit"><first>Chunyu</first><last>Kit</last></author>
       <pages>253–261</pages>
       <url hash="5728376d">Y08-1025</url>
       <bibkey>liu-kit-2008-improved</bibkey>
@@ -222,17 +222,17 @@
     <paper id="26">
       <title>Extending an <fixed-case>I</fixed-case>ndonesian Semantic Analysis-based Question Answering System with Linguistic and World Knowledge Axioms</title>
       <author><first>Rahmad</first><last>Mahendra</last></author>
-      <author><first>Septina Dian</first><last>Larasati</last></author>
-      <author><first>Ruli</first><last>Manurung</last></author>
+      <author id="septina-dian-larasati"><first>Septina Dian</first><last>Larasati</last></author>
+      <author id="ruli-manurung"><first>Ruli</first><last>Manurung</last></author>
       <pages>262–271</pages>
       <url hash="b39fe8f1">Y08-1026</url>
       <bibkey>mahendra-etal-2008-extending</bibkey>
     </paper>
     <paper id="27">
       <title>An Implementation of a Flexible Author-Reviewer Model of Generation using Genetic Algorithms</title>
-      <author><first>Ruli</first><last>Manurung</last></author>
-      <author><first>Graeme</first><last>Ritchie</last></author>
-      <author><first>Henry</first><last>Thompson</last></author>
+      <author id="ruli-manurung"><first>Ruli</first><last>Manurung</last></author>
+      <author id="graeme-ritchie"><first>Graeme</first><last>Ritchie</last></author>
+      <author id="henry-s-thompson"><first>Henry</first><last>Thompson</last></author>
       <pages>272–281</pages>
       <url hash="30e0af12">Y08-1027</url>
       <bibkey>manurung-etal-2008-implementation</bibkey>
@@ -247,7 +247,7 @@
     <paper id="29">
       <title>On <fixed-case>J</fixed-case>apanese Desiderative Constructions</title>
       <author><first>Akira</first><last>Ohtani</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>290–301</pages>
       <url hash="928d27ce">Y08-1029</url>
       <bibkey>ohtani-steedman-2008-japanese</bibkey>
@@ -279,7 +279,7 @@
     </paper>
     <paper id="33">
       <title>Trend-based Document Clustering for Sensitive and Stable Topic Detection</title>
-      <author><first>Yoshihide</first><last>Sato</last></author>
+      <author id="yoshihide-kato"><first>Yoshihide</first><last>Sato</last></author>
       <author><first>Harumi</first><last>Kawashima</last></author>
       <author><first>Hidenori</first><last>Okuda</last></author>
       <author><first>Masahiro</first><last>Oku</last></author>
@@ -324,7 +324,7 @@
     <paper id="38">
       <title>Using ‘Low-cost’ Learning Features for Pronoun Resolution</title>
       <author><first>Ramon</first><last>Cuevas</last></author>
-      <author><first>Ivandré</first><last>Paraboni</last></author>
+      <author id="ivandre-paraboni"><first>Ivandré</first><last>Paraboni</last></author>
       <pages>377–383</pages>
       <url hash="ef0d3d1e">Y08-1038</url>
       <bibkey>cuevas-paraboni-2008-using</bibkey>
@@ -343,7 +343,7 @@
     <paper id="40">
       <title>Controlled <fixed-case>K</fixed-case>orean for <fixed-case>K</fixed-case>orean-<fixed-case>E</fixed-case>nglish <fixed-case>MT</fixed-case></title>
       <author><first>Munpyo</first><last>Hong</last></author>
-      <author><first>Chang-Hyun</first><last>Kim</last></author>
+      <author id="chang-hyun-kim"><first>Chang-Hyun</first><last>Kim</last></author>
       <pages>391–396</pages>
       <url hash="5980a547">Y08-1040</url>
       <bibkey>hong-kim-2008-controlled</bibkey>
@@ -353,7 +353,7 @@
       <author><first>Rile</first><last>Hu</last></author>
       <author><first>Yuezhong</first><last>Tang</last></author>
       <author><first>Chen</first><last>Li</last></author>
-      <author><first>Xia</first><last>Wang</last></author>
+      <author id="xia-wang"><first>Xia</first><last>Wang</last></author>
       <pages>397–403</pages>
       <url hash="8154698b">Y08-1041</url>
       <bibkey>hu-etal-2008-statistical</bibkey>
@@ -376,9 +376,9 @@
     </paper>
     <paper id="44">
       <title>What is Needed the Most in <fixed-case>MT</fixed-case>-Supported Paper Writing</title>
-      <author><first>Chang Hyun</first><last>Kim</last></author>
+      <author id="chang-hyun-kim"><first>Chang Hyun</first><last>Kim</last></author>
       <author><first>Oh-Woog</first><last>Kwon</last></author>
-      <author><first>Young Kil</first><last>Kim</last></author>
+      <author id="young-gil-kim"><first>Young Kil</first><last>Kim</last></author>
       <pages>418–427</pages>
       <url hash="1d78b84a">Y08-1044</url>
       <bibkey>kim-etal-2008-needed</bibkey>
@@ -420,7 +420,7 @@
       <author><first>Ki-Young</first><last>Lee</last></author>
       <author><first>Sung-Kwon</first><last>Choi</last></author>
       <author><first>Oh-Woog</first><last>Kwon</last></author>
-      <author><first>Young-Gil</first><last>Kim</last></author>
+      <author id="young-gil-kim"><first>Young-Gil</first><last>Kim</last></author>
       <pages>460–466</pages>
       <url hash="6af63f75">Y08-1049</url>
       <bibkey>roh-etal-2008-recognizing</bibkey>
diff --git a/data/xml/Y09.xml b/data/xml/Y09.xml
index 23942388a9..2fa1bbb582 100644
--- a/data/xml/Y09.xml
+++ b/data/xml/Y09.xml
@@ -16,7 +16,7 @@
     </frontmatter>
     <paper id="1">
       <title>A Step toward Compositional Semantics: <fixed-case>E</fixed-case>-<fixed-case>H</fixed-case>ow<fixed-case>N</fixed-case>et a Lexical Semantic Representation System</title>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <author><first>Shu-Ling</first><last>Huang</last></author>
       <pages>1–8</pages>
       <url hash="bc829c04">Y09-1001</url>
@@ -38,14 +38,14 @@
     </paper>
     <paper id="4">
       <title>Resultatives as Causal Relations between Events</title>
-      <author><first>Ik-Hwan</first><last>Lee</last></author>
+      <author id="ik-hwan-lee"><first>Ik-Hwan</first><last>Lee</last></author>
       <pages>29–39</pages>
       <url hash="24938f58">Y09-1004</url>
       <bibkey>lee-2009-resultatives</bibkey>
     </paper>
     <paper id="5">
       <title>Developing Speech Recognition and Synthesis Technologies to Support Computer-Aided Pronunciation Training for <fixed-case>C</fixed-case>hinese Learners of <fixed-case>E</fixed-case>nglish</title>
-      <author><first>Helen</first><last>Meng</last></author>
+      <author id="helen-meng"><first>Helen</first><last>Meng</last></author>
       <pages>40–42</pages>
       <url hash="721d5bfb">Y09-1005</url>
       <bibkey>meng-2009-developing</bibkey>
@@ -70,7 +70,7 @@
     </paper>
     <paper id="8">
       <title>Dependency Grammar Based <fixed-case>E</fixed-case>nglish Subject-Verb Agreement Evaluation</title>
-      <author><first>Dongfeng</first><last>Cai</last></author>
+      <author id="dongfeng-cai"><first>Dongfeng</first><last>Cai</last></author>
       <author><first>Yonghua</first><last>Hu</last></author>
       <author><first>Xuelei</first><last>Miao</last></author>
       <author><first>Yan</first><last>Song</last></author>
@@ -80,9 +80,9 @@
     </paper>
     <paper id="9">
       <title><fixed-case>W</fixed-case>iki<fixed-case>S</fixed-case>ense: Supersense Tagging of <fixed-case>W</fixed-case>ikipedia Named Entities Based <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et</title>
-      <author><first>Joseph</first><last>Chang</last></author>
-      <author><first>Richard Tzong-Han</first><last>Tsai</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="joseph-z-chang"><first>Joseph</first><last>Chang</last></author>
+      <author id="richard-tzong-han-tsai"><first>Richard Tzong-Han</first><last>Tsai</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>72–81</pages>
       <url hash="5c851c3d">Y09-1009</url>
       <bibkey>chang-etal-2009-wikisense</bibkey>
@@ -90,7 +90,7 @@
     <paper id="10">
       <title>An Integrated Approach to Heterogeneous Data for Information Extraction</title>
       <author><first>Ying</first><last>Chen</last></author>
-      <author><first>Sophia Y. M.</first><last>Lee</last></author>
+      <author id="sophia-y-m-lee"><first>Sophia Y. M.</first><last>Lee</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <pages>82–91</pages>
       <url hash="cfcb395b">Y09-1010</url>
@@ -99,7 +99,7 @@
     <paper id="11">
       <title>Are Emotions Enumerable or Decomposable? And its Implications for Emotion Processing</title>
       <author><first>Ying</first><last>Chen</last></author>
-      <author><first>Sophia Y. M.</first><last>Lee</last></author>
+      <author id="sophia-y-m-lee"><first>Sophia Y. M.</first><last>Lee</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <pages>92–100</pages>
       <url hash="1308dc93">Y09-1011</url>
@@ -116,7 +116,7 @@
     <paper id="13">
       <title>Coupling an Annotated Corpus and a Morphosyntactic Lexicon for State-of-the-Art <fixed-case>POS</fixed-case> Tagging with Less Human Effort</title>
       <author><first>Pascal</first><last>Denis</last></author>
-      <author><first>Benoît</first><last>Sagot</last></author>
+      <author id="benoit-sagot"><first>Benoît</first><last>Sagot</last></author>
       <pages>110–119</pages>
       <url hash="922e188a">Y09-1013</url>
       <bibkey>denis-sagot-2009-coupling</bibkey>
@@ -125,14 +125,14 @@
       <title>Voted Approach for Part of Speech Tagging in <fixed-case>B</fixed-case>engali</title>
       <author><first>Asif</first><last>Ekbal</last></author>
       <author><first>Md.</first><last>Hasanuzzaman</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>120–129</pages>
       <url hash="c17dffc5">Y09-1014</url>
       <bibkey>ekbal-etal-2009-voted</bibkey>
     </paper>
     <paper id="15">
       <title>Adjective Density as a Text Formality Characteristic for Automatic Text Classification: A Study Based on the <fixed-case>B</fixed-case>ritish <fixed-case>N</fixed-case>ational <fixed-case>C</fixed-case>orpus</title>
-      <author><first>Alex Chengyu</first><last>Fang</last></author>
+      <author id="alex-chengyu-fang"><first>Alex Chengyu</first><last>Fang</last></author>
       <author><first>Jing</first><last>Cao</last></author>
       <pages>130–139</pages>
       <url hash="ede1d197">Y09-1015</url>
@@ -142,7 +142,7 @@
       <title>Correcting Errors Using the Framework of Argumentation: Towards Generating Argumentative Correction Propositions from Error Annotation Schemas</title>
       <author><first>Marie</first><last>Garnier</last></author>
       <author><first>Arnaud</first><last>Rykner</last></author>
-      <author><first>Patrick</first><last>Saint-Dizier</last></author>
+      <author id="patrick-saint-dizier"><first>Patrick</first><last>Saint-Dizier</last></author>
       <pages>140–149</pages>
       <url hash="ae5719bd">Y09-1016</url>
       <bibkey>garnier-etal-2009-correcting</bibkey>
@@ -159,7 +159,7 @@
     <paper id="18">
       <title><fixed-case>L</fixed-case>ogistic<fixed-case>LDA</fixed-case>: Regularizing <fixed-case>L</fixed-case>atent <fixed-case>D</fixed-case>irichlet <fixed-case>A</fixed-case>llocation by Logistic Regression</title>
       <author><first>Jia-Cheng</first><last>Guo</last></author>
-      <author><first>Bao-Liang</first><last>Lu</last></author>
+      <author id="bao-liang-lu"><first>Bao-Liang</first><last>Lu</last></author>
       <author><first>Zhiwei</first><last>Li</last></author>
       <author><first>Lei</first><last>Zhang</last></author>
       <pages>160–169</pages>
@@ -169,8 +169,8 @@
     <paper id="19">
       <title>Dependency Relations as Source Context in Phrase-Based <fixed-case>SMT</fixed-case></title>
       <author><first>Rejwanul</first><last>Haque</last></author>
-      <author><first>Sudip Kumar</first><last>Naskar</last></author>
-      <author><first>Antal</first><last>van den Bosch</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last></author>
+      <author id="antal-van-den-bosch"><first>Antal</first><last>van den Bosch</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <pages>170–179</pages>
       <url hash="cef49f5f">Y09-1019</url>
@@ -190,7 +190,7 @@
     <paper id="21">
       <title>Query-Focused Multi-Document Summarization Using Co-Training Based Semi-Supervised Learning</title>
       <author><first>Po</first><last>Hu</last></author>
-      <author><first>Donghong</first><last>Ji</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
       <author><first>Hai</first><last>Wang</last></author>
       <author><first>Chong</first><last>Teng</last></author>
       <pages>190–199</pages>
@@ -199,10 +199,10 @@
     </paper>
     <paper id="22">
       <title>Review Classification Using Semantic Features and Run-Time Weighting</title>
-      <author><first>Chung-chi</first><last>Huang</last></author>
+      <author id="chung-chi-huang"><first>Chung-chi</first><last>Huang</last></author>
       <author><first>Meng-chiech</first><last>Lee</last></author>
       <author><first>Zhe-nan</first><last>Lin</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>200–209</pages>
       <url hash="b82fd02b">Y09-1022</url>
       <bibkey>huang-etal-2009-review</bibkey>
@@ -226,7 +226,7 @@
     <paper id="25">
       <title>Layer-Based Dependency Parsing</title>
       <author><first>Ping</first><last>Jian</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>230–239</pages>
       <url hash="9b3a213a">Y09-1025</url>
       <bibkey>jian-zong-2009-layer</bibkey>
@@ -246,7 +246,7 @@
     <paper id="27">
       <title>An Experimental Syntactic Study of Binding: A Case Study of <fixed-case>K</fixed-case>orean Long-Distance Anaphor caki</title>
       <author><first>Ji-Hye</first><last>Kim</last></author>
-      <author><first>James H.</first><last>Yoon</last></author>
+      <author id="james-yoon"><first>James H.</first><last>Yoon</last></author>
       <pages>250–259</pages>
       <url hash="47820533">Y09-1027</url>
       <bibkey>kim-yoon-2009-experimental</bibkey>
@@ -254,7 +254,7 @@
     <paper id="28">
       <title>Method of Extracting Is-A and Part-Of Relations Using Pattern Pairs in Mass Corpus</title>
       <author><first>Se-Jong</first><last>Kim</last></author>
-      <author><first>Yong-Hun</first><last>Lee</last></author>
+      <author id="yong-hun-lee"><first>Yong-Hun</first><last>Lee</last></author>
       <author><first>Jong-Hyeok</first><last>Lee</last></author>
       <pages>260–268</pages>
       <url hash="643cb15c">Y09-1028</url>
@@ -286,7 +286,7 @@
     </paper>
     <paper id="32">
       <title>Cause Event Representations for Happiness and Surprise</title>
-      <author><first>Sophia Yat Mei</first><last>Lee</last></author>
+      <author id="sophia-yat-mei-lee"><first>Sophia Yat Mei</first><last>Lee</last></author>
       <author><first>Ying</first><last>Chen</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <pages>297–306</pages>
@@ -313,7 +313,7 @@
       <title>Approach to Selecting Best Development Set for Phrase-Based Statistical Machine Translation</title>
       <author><first>Peng</first><last>Liu</last></author>
       <author><first>Yu</first><last>Zhou</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>325–334</pages>
       <url hash="732095d4">Y09-1035</url>
       <bibkey>liu-etal-2009-approach</bibkey>
@@ -321,7 +321,7 @@
     <paper id="36">
       <title>Using Extra-Linguistic Material for <fixed-case>M</fixed-case>andarin-<fixed-case>F</fixed-case>rench Verbal Constructions Comparison</title>
       <author><first>Pierre</first><last>Magistry</last></author>
-      <author><first>Laurent</first><last>Prévot</last></author>
+      <author id="laurent-prevot"><first>Laurent</first><last>Prévot</last></author>
       <author><first>Hintat</first><last>Cheung</last></author>
       <author><first>Chien-yun</first><last>Shiao</last></author>
       <author><first>Yann</first><last>Desalle</last></author>
@@ -332,9 +332,9 @@
     </paper>
     <paper id="37">
       <title>Improving Unsegmented Dialogue Turns Annotation with N-gram Transducers</title>
-      <author><first>Carlos-D.</first><last>Martínez-Hinarejos</last></author>
+      <author id="carlos-d-martinez-hinarejos"><first>Carlos-D.</first><last>Martínez-Hinarejos</last></author>
       <author><first>Vicent</first><last>Tamarit</last></author>
-      <author><first>José-Miguel</first><last>Benedí</last></author>
+      <author id="jose-miguel-benedi"><first>José-Miguel</first><last>Benedí</last></author>
       <pages>345–354</pages>
       <url hash="eb3f8e90">Y09-1037</url>
       <bibkey>martinez-hinarejos-etal-2009-improving</bibkey>
@@ -342,7 +342,7 @@
     <paper id="38">
       <title>Using Tree Kernels for Classifying Temporal Relations between Events</title>
       <author><first>Seyed Abolghasem</first><last>Mirroshandel</last></author>
-      <author><first>Gholamreza</first><last>Ghassem-Sani</last></author>
+      <author id="gholamreza-ghassem-sani"><first>Gholamreza</first><last>Ghassem-Sani</last></author>
       <author><first>Mahdy</first><last>Khayyamian</last></author>
       <pages>355–364</pages>
       <url hash="f09757c6">Y09-1038</url>
@@ -359,9 +359,9 @@
       <title>Extending Bilingual <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et via Hierarchical Word Translation Classification</title>
       <author><first>Tzu-yi</first><last>Nien</last></author>
       <author><first>Tsun</first><last>Ku</last></author>
-      <author><first>Chung-chi</first><last>Huang</last></author>
-      <author><first>Mei-hua</first><last>Chen</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="chung-chi-huang"><first>Chung-chi</first><last>Huang</last></author>
+      <author id="mei-hua-chen"><first>Mei-hua</first><last>Chen</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>375–384</pages>
       <url hash="72a6b7c9">Y09-1040</url>
       <bibkey>nien-etal-2009-extending</bibkey>
@@ -376,14 +376,14 @@
     <paper id="42">
       <title>Note on <fixed-case>J</fixed-case>apanese Epistemic Verb Constructions: A Surface-Compositional Analysis</title>
       <author><first>Akira</first><last>Ohtani</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>395–404</pages>
       <url hash="1df89ddd">Y09-1042</url>
       <bibkey>ohtani-steedman-2009-note</bibkey>
     </paper>
     <paper id="43">
       <title>On the So-Called Thematic Use of Wa: Reconsideration and Reconciliation</title>
-      <author><first>David Y.</first><last>Oshima</last></author>
+      <author id="david-yoshikazu-oshima"><first>David Y.</first><last>Oshima</last></author>
       <pages>405–414</pages>
       <url hash="8510b327">Y09-1043</url>
       <bibkey>oshima-2009-called</bibkey>
@@ -462,7 +462,7 @@
     <paper id="6">
       <title>Finding Answers to Definition Questions Using Web Knowledge Bases</title>
       <author><first>Han</first><last>Ren</last></author>
-      <author><first>Donghong</first><last>Ji</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
       <author><first>Jing</first><last>Wan</last></author>
       <author><first>Chong</first><last>Teng</last></author>
       <pages>484–492</pages>
@@ -473,7 +473,7 @@
       <title>Incorporating Statistical Information of Lexical Dependency into a Rule-Based Parser</title>
       <author><first>Yoon-Hyung</first><last>Roh</last></author>
       <author><first>Ki-Young</first><last>Lee</last></author>
-      <author><first>Young-Gil</first><last>Kim</last></author>
+      <author id="young-gil-kim"><first>Young-Gil</first><last>Kim</last></author>
       <pages>493–500</pages>
       <url hash="35aeddd0">Y09-2007</url>
       <bibkey>roh-etal-2009-incorporating</bibkey>
@@ -484,7 +484,7 @@
       <author><first>Bianca Pamela</first><last>Alcera</last></author>
       <author><first>Ed Oswald</first><last>Go</last></author>
       <author><first>Czarina Meg</first><last>Gonzales</last></author>
-      <author><first>Nathalie Rose</first><last>Lim</last></author>
+      <author id="nathalie-rose-lim"><first>Nathalie Rose</first><last>Lim</last></author>
       <pages>501–510</pages>
       <url hash="91429888">Y09-2008</url>
       <bibkey>samson-etal-2009-automated</bibkey>
@@ -507,7 +507,7 @@
     </paper>
     <paper id="11">
       <title><fixed-case>C</fixed-case>hinese Function Tag Labeling</title>
-      <author><first>Weiwei</first><last>Sun</last></author>
+      <author id="weiwei-sun-sd"><first>Weiwei</first><last>Sun</last></author>
       <author><first>Zhifang</first><last>Sui</last></author>
       <pages>530–539</pages>
       <url hash="42dd4305">Y09-2011</url>
@@ -530,7 +530,7 @@
     </paper>
     <paper id="14">
       <title>Passage Retrieval Using Answer Type Profiles in Question Answering</title>
-      <author><first>Surya Ganesh</first><last>Veeravalli</last></author>
+      <author id="surya-ganesh"><first>Surya Ganesh</first><last>Veeravalli</last></author>
       <author><first>Vasudeva</first><last>Varma</last></author>
       <pages>559–568</pages>
       <url hash="7e3d3783">Y09-2014</url>
@@ -548,7 +548,7 @@
     <paper id="16">
       <title>A Framework for Effectively Integrating Hard and Soft Syntactic Rules into Phrase Based Translation</title>
       <author><first>Jiajun</first><last>Zhang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <pages>579–588</pages>
       <url hash="c1fe4c8e">Y09-2016</url>
       <bibkey>zhang-zong-2009-framework</bibkey>
@@ -556,7 +556,7 @@
     <paper id="17">
       <title>A Bootstrapping Method for Finer-Grained Opinion Mining Using Graph Model</title>
       <author><first>Shu</first><last>Zhang</last></author>
-      <author><first>Yingju</first><last>Xia</last></author>
+      <author id="yingju-xia"><first>Yingju</first><last>Xia</last></author>
       <author><first>Yao</first><last>Meng</last></author>
       <author><first>Hao</first><last>Yu</last></author>
       <pages>589–595</pages>
@@ -575,22 +575,22 @@
     </paper>
     <paper id="19">
       <title>Summarizing Opinions in Blog Threads</title>
-      <author><first>Alexandra</first><last>Balahur</last></author>
-      <author><first>Mijail</first><last>Kabadjov</last></author>
+      <author id="alexandra-balahur"><first>Alexandra</first><last>Balahur</last></author>
+      <author id="mijail-kabadjov"><first>Mijail</first><last>Kabadjov</last></author>
       <author><first>Josef</first><last>Steinberger</last></author>
       <author><first>Ralf</first><last>Steinberger</last></author>
-      <author><first>Andrés</first><last>Montoyo</last></author>
+      <author id="andres-montoyo"><first>Andrés</first><last>Montoyo</last></author>
       <pages>606–613</pages>
       <url hash="2d1accd8">Y09-2019</url>
       <bibkey>balahur-etal-2009-summarizing-opinions</bibkey>
     </paper>
     <paper id="20">
       <title>Constraint Based Hybrid Approach to Parsing <fixed-case>I</fixed-case>ndian Languages</title>
-      <author><first>Akshar</first><last>Bharati</last></author>
+      <author id="akshar-bharati"><first>Akshar</first><last>Bharati</last></author>
       <author><first>Samar</first><last>Husain</last></author>
       <author><first>Meher</first><last>Vijay</last></author>
       <author><first>Kalyan</first><last>Deepak</last></author>
-      <author><first>Dipti Misra</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></author>
       <author><first>Rajeev</first><last>Sangal</last></author>
       <pages>614–621</pages>
       <url hash="32725e14">Y09-2020</url>
@@ -600,7 +600,7 @@
       <title>Interpolated <fixed-case>PLSI</fixed-case> for Learning Plausible Verb Arguments</title>
       <author><first>Hiram</first><last>Calvo</last></author>
       <author><first>Kentaro</first><last>Inui</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>622–629</pages>
       <url hash="a0666eb3">Y09-2021</url>
       <bibkey>calvo-etal-2009-interpolated</bibkey>
@@ -615,8 +615,8 @@
     </paper>
     <paper id="23">
       <title><fixed-case>V</fixed-case>ocab<fixed-case>A</fixed-case>nalyzer: A Referred Word List Analyzing Tool with Keyword, Concordancing and N-gram Functions</title>
-      <author><first>Siaw-Fong</first><last>Chung</last></author>
-      <author><first>F.Y. August</first><last>Chao</last></author>
+      <author id="siaw-fong-chung"><first>Siaw-Fong</first><last>Chung</last></author>
+      <author id="f-y-august-chao"><first>F.Y. August</first><last>Chao</last></author>
       <author><first>Yi-Chen</first><last>Hsieh</last></author>
       <pages>638–645</pages>
       <url hash="e6e6dcef">Y09-2023</url>
@@ -624,8 +624,8 @@
     </paper>
     <paper id="24">
       <title>Building Online Corpora of <fixed-case>P</fixed-case>hilippine Languages</title>
-      <author><first>Shirley N.</first><last>Dita</last></author>
-      <author><first>Rachel Edita O.</first><last>Roxas</last></author>
+      <author id="shirley-dita"><first>Shirley N.</first><last>Dita</last></author>
+      <author id="rachel-edita-roxas"><first>Rachel Edita O.</first><last>Roxas</last></author>
       <author><first>Paul</first><last>Inventado</last></author>
       <pages>646–653</pages>
       <url hash="605eeec1">Y09-2024</url>
@@ -641,9 +641,9 @@
     </paper>
     <paper id="26">
       <title><fixed-case>L</fixed-case>atin Etymologies as Features on <fixed-case>BNC</fixed-case> Text Categorization</title>
-      <author><first>Alex Chengyu</first><last>Fang</last></author>
+      <author id="alex-chengyu-fang"><first>Alex Chengyu</first><last>Fang</last></author>
       <author><first>Wanyin</first><last>Li</last></author>
-      <author><first>Nancy</first><last>Ide</last></author>
+      <author id="nancy-ide"><first>Nancy</first><last>Ide</last></author>
       <pages>662–669</pages>
       <url hash="4b35aa3c">Y09-2026</url>
       <bibkey>fang-etal-2009-latin</bibkey>
@@ -651,8 +651,8 @@
     <paper id="27">
       <title>Experiments on Domain Adaptation for <fixed-case>E</fixed-case>nglish–<fixed-case>H</fixed-case>indi <fixed-case>SMT</fixed-case></title>
       <author><first>Rejwanul</first><last>Haque</last></author>
-      <author><first>Sudip Kumar</first><last>Naskar</last></author>
-      <author><first>Josef</first><last>van Genabith</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last></author>
+      <author id="josef-van-genabith"><first>Josef</first><last>van Genabith</last></author>
       <author><first>Andy</first><last>Way</last></author>
       <pages>670–677</pages>
       <url hash="605291f5">Y09-2027</url>
@@ -668,7 +668,7 @@
     </paper>
     <paper id="29">
       <title>Bridging the Gap between Graph Modeling and Developmental Psycholinguistics: An Experiment on Measuring Lexical Proximity in <fixed-case>C</fixed-case>hinese Semantic Space</title>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <author><first>Chun-Han</first><last>Chang</last></author>
       <author><first>Ivy</first><last>Kuo</last></author>
       <author><first>Hintat</first><last>Cheung</last></author>
@@ -683,7 +683,7 @@
       <author><first>Yun</first><last>Jin</last></author>
       <author><first>Qing</first><last>Li</last></author>
       <author><first>Yingshun</first><last>Wu</last></author>
-      <author><first>Young-Gil</first><last>Kim</last></author>
+      <author id="young-gil-kim"><first>Young-Gil</first><last>Kim</last></author>
       <pages>694–701</pages>
       <url hash="15869073">Y09-2030</url>
       <bibkey>jin-etal-2009-effective</bibkey>
@@ -701,7 +701,7 @@
     </paper>
     <paper id="32">
       <title>Syntactic Category Prediction for Improving Translation Quality in <fixed-case>E</fixed-case>nglish-<fixed-case>K</fixed-case>orean Machine Translation</title>
-      <author><first>Sung-Dong</first><last>Kim</last></author>
+      <author id="sung-dong-kim"><first>Sung-Dong</first><last>Kim</last></author>
       <pages>710–717</pages>
       <url hash="ef4394c5">Y09-2032</url>
       <bibkey>kim-2009-syntactic</bibkey>
@@ -712,7 +712,7 @@
       <author><first>Sung-Kwon</first><last>Choi</last></author>
       <author><first>Ki-Young</first><last>Lee</last></author>
       <author><first>Yoon-Hyung</first><last>Roh</last></author>
-      <author><first>Young-Gil</first><last>Kim</last></author>
+      <author id="young-gil-kim"><first>Young-Gil</first><last>Kim</last></author>
       <pages>718–725</pages>
       <url hash="3124f0d6">Y09-2033</url>
       <bibkey>kwon-etal-2009-customizing</bibkey>
@@ -728,9 +728,9 @@
     <paper id="35">
       <title>Extracting Keyphrases from <fixed-case>C</fixed-case>hinese News Articles Using <fixed-case>T</fixed-case>ext<fixed-case>R</fixed-case>ank and Query Log Knowledge</title>
       <author><first>Weiming</first><last>Liang</last></author>
-      <author><first>Chang-Ning</first><last>Huang</last></author>
+      <author id="changning-huang"><first>Chang-Ning</first><last>Huang</last></author>
       <author><first>Mu</first><last>Li</last></author>
-      <author><first>Bao-Liang</first><last>Lu</last></author>
+      <author id="bao-liang-lu"><first>Bao-Liang</first><last>Lu</last></author>
       <pages>733–740</pages>
       <url hash="636be4b1">Y09-2035</url>
       <bibkey>liang-etal-2009-extracting</bibkey>
@@ -739,7 +739,7 @@
       <title>Modeling the Relationship among Linguistic Typological Features with Hierarchical <fixed-case>D</fixed-case>irichlet Process</title>
       <author><first>Chu-Cheng</first><last>Lin</last></author>
       <author><first>Yu-Chun</first><last>Wang</last></author>
-      <author><first>Richard Tzong-Han</first><last>Tsai</last></author>
+      <author id="richard-tzong-han-tsai"><first>Richard Tzong-Han</first><last>Tsai</last></author>
       <pages>741–747</pages>
       <url hash="459a5e73">Y09-2036</url>
       <bibkey>lin-etal-2009-modeling</bibkey>
@@ -748,7 +748,7 @@
       <title>Document Re-ranking via <fixed-case>W</fixed-case>ikipedia Articles for Definition/Biography Type Questions</title>
       <author><first>Maofu</first><last>Liu</last></author>
       <author><first>Fang</first><last>Fang</last></author>
-      <author><first>Donghong</first><last>Ji</last></author>
+      <author id="donghong-ji"><first>Donghong</first><last>Ji</last></author>
       <pages>748–754</pages>
       <url hash="37ce4fb4">Y09-2037</url>
       <bibkey>liu-etal-2009-document</bibkey>
@@ -756,7 +756,7 @@
     <paper id="38">
       <title>Towards Bilingual Term Extraction in Comparable Patents</title>
       <author><first>Bin</first><last>Lu</last></author>
-      <author><first>Benjamin K.</first><last>Tsou</last></author>
+      <author id="benjamin-k-tsou"><first>Benjamin K.</first><last>Tsou</last></author>
       <pages>755–762</pages>
       <url hash="ea03c169">Y09-2038</url>
       <bibkey>lu-tsou-2009-towards</bibkey>
@@ -764,7 +764,7 @@
     <paper id="39">
       <title>Factors Affecting Part-of-Speech Tagging for <fixed-case>T</fixed-case>agalog</title>
       <author><first>Erlyn</first><last>Manguilimotan</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>763–770</pages>
       <url hash="e438185f">Y09-2039</url>
       <bibkey>manguilimotan-matsumoto-2009-factors</bibkey>
@@ -774,7 +774,7 @@
       <author><first>Kenta</first><last>Oouchida</last></author>
       <author><first>Jin-Dong</first><last>Kim</last></author>
       <author><first>Toshihisa</first><last>Takagi</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>771–778</pages>
       <url hash="db54c228">Y09-2040</url>
       <bibkey>oouchida-etal-2009-guidelink</bibkey>
@@ -817,10 +817,10 @@
     </paper>
     <paper id="45">
       <title>Named Entity Recognition for <fixed-case>M</fixed-case>anipuri Using Support Vector Machine</title>
-      <author><first>Thoudam Doren</first><last>Singh</last></author>
+      <author id="thoudam-doren-singh"><first>Thoudam Doren</first><last>Singh</last></author>
       <author><first>Kishorjit</first><last>Nongmeikapam</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>811–818</pages>
       <url hash="fad792db">Y09-2045</url>
       <bibkey>singh-etal-2009-named</bibkey>
@@ -837,7 +837,7 @@
     <paper id="47">
       <title>Which is More Suitable for <fixed-case>C</fixed-case>hinese Word Segmentation, the Generative Model or the Discriminative One?</title>
       <author><first>Kun</first><last>Wang</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <author><first>Keh-Yih</first><last>Su</last></author>
       <pages>827–834</pages>
       <url hash="ee4d66e8">Y09-2047</url>
@@ -846,11 +846,11 @@
     <paper id="48">
       <title>Design of <fixed-case>C</fixed-case>hinese <fixed-case>HPSG</fixed-case> Framework for Data-Driven Parsing</title>
       <author><first>Xiangli</first><last>Wang</last></author>
-      <author><first>Shunya</first><last>Iwasawa</last></author>
+      <author id="shunya-iwasawa"><first>Shunya</first><last>Iwasawa</last></author>
       <author><first>Yusuke</first><last>Miyao</last></author>
       <author><first>Takuya</first><last>Matsuzaki</last></author>
       <author><first>Kun</first><last>Yu</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>835–842</pages>
       <url hash="d6c27854">Y09-2048</url>
       <bibkey>wang-etal-2009-design</bibkey>
@@ -858,7 +858,7 @@
     <paper id="49">
       <title>Rule-based <fixed-case>K</fixed-case>orean Grapheme to Phoneme Conversion Using Sound Patterns</title>
       <author><first>Yu-Chun</first><last>Wang</last></author>
-      <author><first>Richard Tzong-Han</first><last>Tsai</last></author>
+      <author id="richard-tzong-han-tsai"><first>Richard Tzong-Han</first><last>Tsai</last></author>
       <pages>843–850</pages>
       <url hash="a2bee148">Y09-2049</url>
       <bibkey>wang-tsai-2009-rule</bibkey>
@@ -889,7 +889,7 @@
     </paper>
     <paper id="53">
       <title>Towards Establishing a Hierarchy in the <fixed-case>J</fixed-case>apanese Sentence Structure</title>
-      <author><first>Kei</first><last>Yoshimoto</last></author>
+      <author id="kei-yoshimoto"><first>Kei</first><last>Yoshimoto</last></author>
       <author><first>Chidori</first><last>Nakamura</last></author>
       <author><first>Alastair</first><last>Butler</last></author>
       <pages>875–882</pages>
diff --git a/data/xml/Y10.xml b/data/xml/Y10.xml
index 4c4992e8db..20c49b060b 100644
--- a/data/xml/Y10.xml
+++ b/data/xml/Y10.xml
@@ -6,7 +6,7 @@
       <editor><first>Ryo</first><last>Otoguro</last></editor>
       <editor><first>Kiyoshi</first><last>Ishikawa</last></editor>
       <editor><first>Hiroshi</first><last>Umemoto</last></editor>
-      <editor><first>Kei</first><last>Yoshimoto</last></editor>
+      <editor id="kei-yoshimoto"><first>Kei</first><last>Yoshimoto</last></editor>
       <editor><first>Yasunari</first><last>Harada</last></editor>
       <publisher>Institute of Digital Enhancement of Cognitive Processing, Waseda University</publisher>
       <address>Tohoku University, Sendai, Japan</address>
@@ -70,7 +70,7 @@
     </paper>
     <paper id="8">
       <title>A Morphosyntactic Analysis of the Pronominal System of <fixed-case>P</fixed-case>hilippine Languages</title>
-      <author><first>Shirley N.</first><last>Dita</last></author>
+      <author id="shirley-dita"><first>Shirley N.</first><last>Dita</last></author>
       <pages>45–59</pages>
       <url hash="05b9efa6">Y10-1008</url>
       <bibkey>dita-2010-morphosyntactic</bibkey>
@@ -101,7 +101,7 @@
     </paper>
     <paper id="12">
       <title>Enhanced Genre Classification through Linguistically Fine-Grained <fixed-case>POS</fixed-case> Tags</title>
-      <author><first>Alex Chengyu</first><last>Fang</last></author>
+      <author id="alex-chengyu-fang"><first>Alex Chengyu</first><last>Fang</last></author>
       <author><first>Jing</first><last>Cao</last></author>
       <pages>85–94</pages>
       <url hash="6e34e7ab">Y10-1012</url>
@@ -110,7 +110,7 @@
     <paper id="13">
       <title>Identifying Emotional Expressions, Intensities and Sentence Level Emotion Tags Using a Supervised Framework</title>
       <author><first>Dipankar</first><last>Das</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>95–104</pages>
       <url hash="e0bed5e1">Y10-1013</url>
       <bibkey>das-bandyopadhyay-2010-identifying</bibkey>
@@ -190,7 +190,7 @@
     </paper>
     <paper id="23">
       <title>Evidentials and Epistemic Modal in <fixed-case>K</fixed-case>orean: Evidence from Their Intractions</title>
-      <author><first>Chungmin</first><last>Lee</last></author>
+      <author id="chungmin-lee"><first>Chungmin</first><last>Lee</last></author>
       <pages>193–202</pages>
       <url hash="94363275">Y10-1023</url>
       <bibkey>lee-2010-evidentials</bibkey>
@@ -199,7 +199,7 @@
       <title>Implementation of <fixed-case>K</fixed-case>orean Syllable Structures in the Typed Feature Structure Formalism</title>
       <author><first>Gyu-hyung</first><last>Lee</last></author>
       <author><first>Ye-seul</first><last>Park</last></author>
-      <author><first>Yong-hun</first><last>Lee</last></author>
+      <author id="yong-hun-lee"><first>Yong-hun</first><last>Lee</last></author>
       <pages>203–212</pages>
       <url hash="15017194">Y10-1024</url>
       <bibkey>lee-etal-2010-implementation</bibkey>
@@ -215,8 +215,8 @@
     <paper id="26">
       <title>e<fixed-case>S</fixed-case>patial<fixed-case>ML</fixed-case>: An Event-Driven Spatial Annotation Framework</title>
       <author><first>Kiyong</first><last>Lee</last></author>
-      <author><first>Jonathan</first><last>Webster</last></author>
-      <author><first>Alex Chengyu</first><last>Fang</last></author>
+      <author id="jonathan-j-webster"><first>Jonathan</first><last>Webster</last></author>
+      <author id="alex-chengyu-fang"><first>Alex Chengyu</first><last>Fang</last></author>
       <pages>223–232</pages>
       <url hash="766bb838">Y10-1026</url>
       <bibkey>lee-etal-2010-espatialml</bibkey>
@@ -233,8 +233,8 @@
     </paper>
     <paper id="28">
       <title>Developing an Online <fixed-case>I</fixed-case>ndonesian Corpora Repository</title>
-      <author><first>Ruli</first><last>Manurung</last></author>
-      <author><first>Bayu</first><last>Distiawan</last></author>
+      <author id="ruli-manurung"><first>Ruli</first><last>Manurung</last></author>
+      <author id="bayu-distiawan"><first>Bayu</first><last>Distiawan</last></author>
       <author><first>Desmond Darma</first><last>Putra</last></author>
       <pages>243–249</pages>
       <url hash="46422370">Y10-1028</url>
@@ -250,8 +250,8 @@
     <paper id="30">
       <title>Unsupervised Classification of Biomedical Abstracts Using Lexical Association</title>
       <author><first>Jonathon</first><last>Read</last></author>
-      <author><first>Jonathan</first><last>Webster</last></author>
-      <author><first>Alex Chengyu</first><last>Fang</last></author>
+      <author id="jonathan-j-webster"><first>Jonathan</first><last>Webster</last></author>
+      <author id="alex-chengyu-fang"><first>Alex Chengyu</first><last>Fang</last></author>
       <pages>261–270</pages>
       <url hash="9f92a53e">Y10-1030</url>
       <bibkey>read-etal-2010-unsupervised</bibkey>
@@ -303,14 +303,14 @@
       <author><first>Hao</first><last>Yu</last></author>
       <author><first>Yao</first><last>Meng</last></author>
       <author><first>Yingliang</first><last>Lu</last></author>
-      <author><first>Yingju</first><last>Xia</last></author>
+      <author id="yingju-xia"><first>Yingju</first><last>Xia</last></author>
       <pages>321–330</pages>
       <url hash="c3366c4b">Y10-1036</url>
       <bibkey>yang-etal-2010-fault</bibkey>
     </paper>
     <paper id="37">
       <title>Syntactically Complex Demonstratives and Sortal Inherency</title>
-      <author><first>Richard</first><last>Zuber</last></author>
+      <author id="richard-zuber"><first>Richard</first><last>Zuber</last></author>
       <pages>331–338</pages>
       <url hash="bc49b5eb">Y10-1037</url>
       <bibkey>zuber-2010-syntactically</bibkey>
@@ -332,20 +332,20 @@
     </paper>
     <paper id="40">
       <title><fixed-case>GRASP</fixed-case>: Grammar- and Syntax-based Pattern-Finder for Collocation and Phrase Learning</title>
-      <author><first>Mei-hua</first><last>Chen</last></author>
-      <author><first>Chung-chi</first><last>Huang</last></author>
-      <author><first>Shih-ting</first><last>Huang</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="mei-hua-chen"><first>Mei-hua</first><last>Chen</last></author>
+      <author id="chung-chi-huang"><first>Chung-chi</first><last>Huang</last></author>
+      <author id="shih-ting-huang"><first>Shih-ting</first><last>Huang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>357–364</pages>
       <url hash="b464aedd">Y10-1040</url>
       <bibkey>chen-etal-2010-grasp</bibkey>
     </paper>
     <paper id="41">
       <title>Mitigating Problems in Analogy-based <fixed-case>EBMT</fixed-case> with <fixed-case>SMT</fixed-case> and vice versa: A Case Study with Named Entity Transliteration</title>
-      <author><first>Sandipan</first><last>Dandapat</last></author>
+      <author id="sandipan-dandapat"><first>Sandipan</first><last>Dandapat</last></author>
       <author><first>Sara</first><last>Morrissey</last></author>
-      <author><first>Sudip Kumar</first><last>Naskar</last></author>
-      <author><first>Harold</first><last>Somers</last></author>
+      <author id="sudip-kumar-naskar"><first>Sudip Kumar</first><last>Naskar</last></author>
+      <author id="harold-somers"><first>Harold</first><last>Somers</last></author>
       <pages>365–372</pages>
       <url hash="a50d30e8">Y10-1041</url>
       <bibkey>dandapat-etal-2010-mitigating</bibkey>
@@ -373,8 +373,8 @@
     </paper>
     <paper id="45">
       <title>Using Corpus-based Linguistic Approaches in Sense Prediction Study</title>
-      <author><first>Jia-Fei</first><last>Hong</last></author>
-      <author><first>Sue-Jin</first><last>Ker</last></author>
+      <author id="jia-fei-hong"><first>Jia-Fei</first><last>Hong</last></author>
+      <author id="sue-j-ker"><first>Sue-Jin</first><last>Ker</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <author><first>Kathleen</first><last>Ahrens</last></author>
       <pages>399–407</pages>
@@ -390,8 +390,8 @@
     </paper>
     <paper id="47">
       <title>The Specialized Vocabulary of Modern Patent Language: Semantic Associations in Patent Lexis</title>
-      <author><first>Darren Hsin-hung</first><last>Lin</last></author>
-      <author><first>Shelley Ching-yu</first><last>Hsieh</last></author>
+      <author id="darren-hsin-hung-lin"><first>Darren Hsin-hung</first><last>Lin</last></author>
+      <author id="shelley-ching-yu-hsieh"><first>Shelley Ching-yu</first><last>Hsieh</last></author>
       <pages>417–424</pages>
       <url hash="cf7a81c3">Y10-1047</url>
       <bibkey>lin-hsieh-2010-specialized</bibkey>
@@ -408,7 +408,7 @@
     <paper id="49">
       <title>An Approach toward Register Classification of Book Samples in the <fixed-case>B</fixed-case>alanced <fixed-case>C</fixed-case>orpus of <fixed-case>C</fixed-case>ontemporary <fixed-case>W</fixed-case>ritten <fixed-case>J</fixed-case>apanese</title>
       <author><first>Wakako</first><last>Kashino</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>433–438</pages>
       <url hash="74086746">Y10-1049</url>
       <bibkey>kashino-okumura-2010-approach</bibkey>
@@ -424,9 +424,9 @@
     </paper>
     <paper id="51">
       <title>A Supervised Machine Learning Approach for Event-Event Relation Identification</title>
-      <author><first>Anup Kumar</first><last>Kolya</last></author>
+      <author id="anup-kumar-kolya"><first>Anup Kumar</first><last>Kolya</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>447–454</pages>
       <url hash="f5af4214">Y10-1051</url>
       <bibkey>kolya-etal-2010-supervised</bibkey>
@@ -458,7 +458,7 @@
       <author><first>Yusuke</first><last>Miyao</last></author>
       <author><first>Alastair</first><last>Butler</last></author>
       <author><first>Kei</first><last>Yoshimoto</last></author>
-      <author><first>Jun’ichi</first><last>Tsujii</last></author>
+      <author id="junichi-tsujii"><first>Jun’ichi</first><last>Tsujii</last></author>
       <pages>481–488</pages>
       <url hash="65876b03">Y10-1055</url>
       <bibkey>miyao-etal-2010-modular</bibkey>
@@ -480,7 +480,7 @@
     <paper id="58">
       <title>A Multi-Dimensional Analysis of <fixed-case>J</fixed-case>apanese Benefactives: The Case of the Yaru-Construction</title>
       <author><first>Akira</first><last>Otani</last></author>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>503–510</pages>
       <url hash="23ec8cdd">Y10-1058</url>
       <bibkey>otani-steedman-2010-multi</bibkey>
@@ -509,7 +509,7 @@
     <paper id="62">
       <title>Incorporate Credibility into Context for the Best Social Media Answers</title>
       <author><first>Qi</first><last>Su</last></author>
-      <author><first>Helen Kai-yun</first><last>Chen</last></author>
+      <author id="helen-kaiyun-chen"><first>Helen Kai-yun</first><last>Chen</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <pages>535–541</pages>
       <url hash="0d49390f">Y10-1062</url>
@@ -518,7 +518,7 @@
     <paper id="63">
       <title>A Query Focused Multi Document Automatic Summarization</title>
       <author><first>Pinaki</first><last>Bhaskar</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>545–554</pages>
       <url hash="28078538">Y10-1063</url>
       <bibkey>bhaskar-bandyopadhyay-2010-query</bibkey>
@@ -527,7 +527,7 @@
       <title>Through Low-Cost Annotation to Reliable Parsing Evaluation</title>
       <author><first>Marek</first><last>Grác</last></author>
       <author><first>Miloš</first><last>Jakubíček</last></author>
-      <author><first>Vojtěch</first><last>Kovář</last></author>
+      <author id="vojtech-kovar"><first>Vojtěch</first><last>Kovář</last></author>
       <pages>555–562</pages>
       <url hash="1dfb6784">Y10-1064</url>
       <bibkey>grac-etal-2010-low</bibkey>
@@ -550,7 +550,7 @@
       <title>How Well Conditional Random Fields Can be Used in Novel Term Recognition</title>
       <author><first>Xing</first><last>Zhang</last></author>
       <author><first>Yan</first><last>Song</last></author>
-      <author><first>Alex Chengyu</first><last>Fang</last></author>
+      <author id="alex-chengyu-fang"><first>Alex Chengyu</first><last>Fang</last></author>
       <pages>583–592</pages>
       <url hash="b1ed4ee8">Y10-1067</url>
       <bibkey>zhang-etal-2010-well</bibkey>
@@ -569,8 +569,8 @@
       <author><first>Evan Liz Cantoja</first><last>Buhay</last></author>
       <author><first>Marie Joy Padilla</first><last>Evardone</last></author>
       <author><first>Hansel Baguio</first><last>Nocon</last></author>
-      <author><first>Davis Muhajereen</first><last>Dimalen</last></author>
-      <author><first>Rachel Edita</first><last>Roxas</last></author>
+      <author id="davis-muhajereen-d-dimalen"><first>Davis Muhajereen</first><last>Dimalen</last></author>
+      <author id="rachel-edita-roxas"><first>Rachel Edita</first><last>Roxas</last></author>
       <pages>603–611</pages>
       <url hash="9e42ab2c">Y10-1069</url>
       <bibkey>buhay-etal-2010-autolex</bibkey>
@@ -585,7 +585,7 @@
     <paper id="71">
       <title>Finding Emotion Holder from <fixed-case>B</fixed-case>engali Blog <fixed-case>T</fixed-case>exts—<fixed-case>A</fixed-case>n Unsupervised Syntactic Approach</title>
       <author><first>Dipankar</first><last>Das</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>621–628</pages>
       <url hash="68a1a700">Y10-1071</url>
       <bibkey>das-bandyopadhyay-2010-finding</bibkey>
@@ -620,7 +620,7 @@
       <title>Using Various Features in Machine Learning to Obtain High Levels of Performance for Recognition of <fixed-case>J</fixed-case>apanese Notational Variants</title>
       <author><first>Masahiro</first><last>Kojima</last></author>
       <author><first>Masaki</first><last>Murata</last></author>
-      <author><first>Jun’ichi</first><last>Kazama</last></author>
+      <author id="junichi-kazama"><first>Jun’ichi</first><last>Kazama</last></author>
       <author><first>Kow</first><last>Kuroda</last></author>
       <author><first>Atsushi</first><last>Fujita</last></author>
       <author><first>Eiji</first><last>Aramaki</last></author>
@@ -648,7 +648,7 @@
     </paper>
     <paper id="78">
       <title>The Acquisition of Imperfective Aspect Marking in <fixed-case>K</fixed-case>orean as a Second Language by <fixed-case>J</fixed-case>apanese Learners</title>
-      <author><first>Ju-yeon</first><last>Ryu</last></author>
+      <author id="ju-yeon-ryu"><first>Ju-yeon</first><last>Ryu</last></author>
       <author><first>Kaoru</first><last>Horie</last></author>
       <author><first>Yasuhiro</first><last>Shirai</last></author>
       <pages>677–684</pages>
@@ -694,14 +694,14 @@
     </paper>
     <paper id="83">
       <title>Workshop on Advanced Corpus Solutions</title>
-      <author><first>Janne Bondi</first><last>Johannessen</last></author>
+      <author id="janne-bondi-johannessen"><first>Janne Bondi</first><last>Johannessen</last></author>
       <pages>717–719</pages>
       <url hash="a525cdd7">Y10-1083</url>
       <bibkey>johannessen-2010-workshop</bibkey>
     </paper>
     <paper id="84">
       <title>Degrees of Orality in Speech-like Corpora: Comparative Annotation of Chat and <fixed-case>E</fixed-case>-mail Corpora</title>
-      <author><first>Eckhard</first><last>Bick</last></author>
+      <author id="eckhard-bick"><first>Eckhard</first><last>Bick</last></author>
       <pages>721–729</pages>
       <url hash="0ef1b5ff">Y10-1084</url>
       <bibkey>bick-2010-degrees</bibkey>
@@ -717,16 +717,16 @@
       <title>Fast Syntactic Searching in Very Large Corpora for Many Languages</title>
       <author><first>Miloš</first><last>Jakubíček</last></author>
       <author><first>Adam</first><last>Kilgarriff</last></author>
-      <author><first>Diana</first><last>McCarthy</last></author>
-      <author><first>Pavel</first><last>Rychlý</last></author>
+      <author id="diana-mccarthy"><first>Diana</first><last>McCarthy</last></author>
+      <author id="pavel-rychly"><first>Pavel</first><last>Rychlý</last></author>
       <pages>741–747</pages>
       <url hash="9665bfcc">Y10-1086</url>
       <bibkey>jakubicek-etal-2010-fast</bibkey>
     </paper>
     <paper id="87">
       <title>A Multilingual Speech Resource: The <fixed-case>N</fixed-case>ordic Dialect Corpus</title>
-      <author><first>Janne Bondi</first><last>Johannessen</last></author>
-      <author><first>Joel</first><last>Priestley</last></author>
+      <author id="janne-bondi-johannessen"><first>Janne Bondi</first><last>Johannessen</last></author>
+      <author id="joel-priestley"><first>Joel</first><last>Priestley</last></author>
       <author><first>Anders</first><last>Nøklestad</last></author>
       <pages>749–758</pages>
       <url hash="e5d351ae">Y10-1087</url>
@@ -743,7 +743,7 @@
     <paper id="89">
       <title>Advanced Corpus Solutions for Humanities Researchers</title>
       <author><first>James</first><last>Wilson</last></author>
-      <author><first>Anthony</first><last>Hartley</last></author>
+      <author id="anthony-hartley"><first>Anthony</first><last>Hartley</last></author>
       <author><first>Serge</first><last>Sharoff</last></author>
       <author><first>Paul</first><last>Stephenson</last></author>
       <pages>769–778</pages>
@@ -774,7 +774,7 @@
     <paper id="92">
       <title>Towards the Global <fixed-case>S</fixed-case>enti<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et</title>
       <author><first>Amitava</first><last>Das</last></author>
-      <author><first>Sivaji</first><last>Bandyopadhyay</last></author>
+      <author id="sivaji-bandyopadhyay"><first>Sivaji</first><last>Bandyopadhyay</last></author>
       <pages>799–808</pages>
       <url hash="7c581a97">Y10-1092</url>
       <bibkey>das-bandyopadhyay-2010-towards</bibkey>
@@ -782,7 +782,7 @@
     <paper id="93">
       <title>Towards an Automatic Measurement of Verbal Lexicon Acquisition: The Case for a Young Children-versus-Adults Classification in <fixed-case>F</fixed-case>rench and <fixed-case>M</fixed-case>andarin</title>
       <author><first>Yann</first><last>Desalle</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <author><first>Bruno</first><last>Gaume</last></author>
       <author><first>Hintat</first><last>Cheung</last></author>
       <pages>809–818</pages>
@@ -791,10 +791,10 @@
     </paper>
     <paper id="94">
       <title>Graph Representation of Synonymy and Translation Resources for Crosslinguistic Modelisation of Meaning</title>
-      <author><first>Benoît</first><last>Gaillard</last></author>
+      <author id="benoit-gaillard"><first>Benoît</first><last>Gaillard</last></author>
       <author><first>Yannick</first><last>Chudy</last></author>
       <author><first>Pierre</first><last>Magistry</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <author><first>Emmanuel</first><last>Navarro</last></author>
       <pages>819–830</pages>
       <url hash="f1134d28">Y10-1094</url>
@@ -809,7 +809,7 @@
     </paper>
     <paper id="96">
       <title>Computational Modeling of Verb Acquisition, from a Monolingual to a Bilingual Study</title>
-      <author><first>Laurent</first><last>Prévot</last></author>
+      <author id="laurent-prevot"><first>Laurent</first><last>Prévot</last></author>
       <author><first>Chun-Han</first><last>Chang</last></author>
       <author><first>Yann</first><last>Desalle</last></author>
       <pages>841–851</pages>
@@ -833,14 +833,14 @@
     </paper>
     <paper id="99">
       <title>Natural Language Production in Database Semantics</title>
-      <author><first>Roland</first><last>Hausser</last></author>
+      <author id="roland-r-hausser"><first>Roland</first><last>Hausser</last></author>
       <pages>875–884</pages>
       <url hash="b752aa7b">Y10-1099</url>
       <bibkey>hausser-2010-natural</bibkey>
     </paper>
     <paper id="100">
       <title>Change of Location and Change of State: How Telicity is Attained</title>
-      <author><first>Chungmin</first><last>Lee</last></author>
+      <author id="chungmin-lee"><first>Chungmin</first><last>Lee</last></author>
       <pages>885–894</pages>
       <url hash="57f07871">Y10-1100</url>
       <bibkey>lee-2010-change</bibkey>
@@ -854,7 +854,7 @@
     </paper>
     <paper id="102">
       <title>A Note on Pseudo-comparatives like “John is rich like <fixed-case>X</fixed-case>!” and “Like <fixed-case>X</fixed-case>, John is rich!”</title>
-      <author><first>Benjamin</first><last>Tsou</last></author>
+      <author id="benjamin-k-tsou"><first>Benjamin</first><last>Tsou</last></author>
       <pages>907–915</pages>
       <url hash="da383cc4">Y10-1102</url>
       <bibkey>tsou-2010-note</bibkey>
diff --git a/data/xml/Y11.xml b/data/xml/Y11.xml
index 330b152529..d53a028994 100644
--- a/data/xml/Y11.xml
+++ b/data/xml/Y11.xml
@@ -3,7 +3,7 @@
   <volume id="1" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 25th Pacific Asia Conference on Language, Information and Computation</booktitle>
-      <editor><first>Helena Hong</first><last>Gao</last></editor>
+      <editor id="helena-hong-gao"><first>Helena Hong</first><last>Gao</last></editor>
       <editor><first>Minghui</first><last>Dong</last></editor>
       <publisher>Institute of Digital Enhancement of Cognitive Processing, Waseda University</publisher>
       <address>Singapore</address>
@@ -24,7 +24,7 @@
     </paper>
     <paper id="2">
       <title><fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>hinese Name Transliteration with Bi-Directional Syllable-Based Maximum Matching</title>
-      <author><first>Oi Yee</first><last>Kwong</last></author>
+      <author id="olivia-o-y-kwong"><first>Oi Yee</first><last>Kwong</last></author>
       <pages>11–19</pages>
       <url hash="399ddf40">Y11-1002</url>
       <bibkey>kwong-2011-english</bibkey>
@@ -35,7 +35,7 @@
       <author><first>Yangsheng</first><last>Ji</last></author>
       <author><first>Ning</first><last>Xi</last></author>
       <author><first>Shujian</first><last>Huang</last></author>
-      <author><first>Jiajun</first><last>Chen</last></author>
+      <author id="jiajun-chen"><first>Jiajun</first><last>Chen</last></author>
       <pages>20–30</pages>
       <url hash="681500ad">Y11-1003</url>
       <bibkey>zhao-etal-2011-language</bibkey>
@@ -67,7 +67,7 @@
     </paper>
     <paper id="7">
       <title>Measuring Concept Concreteness from the Lexicographic Perspective</title>
-      <author><first>Oi Yee</first><last>Kwong</last></author>
+      <author id="olivia-o-y-kwong"><first>Oi Yee</first><last>Kwong</last></author>
       <pages>60–69</pages>
       <url hash="330f3609">Y11-1007</url>
       <bibkey>kwong-2011-measuring</bibkey>
@@ -89,7 +89,7 @@
     </paper>
     <paper id="10">
       <title>Automatic Wrapper Generation and Maintenance</title>
-      <author><first>Yingju</first><last>Xia</last></author>
+      <author id="yingju-xia"><first>Yingju</first><last>Xia</last></author>
       <author><first>Yuhang</first><last>Yang</last></author>
       <author><first>Shu</first><last>Zhang</last></author>
       <author><first>Hao</first><last>Yu</last></author>
@@ -99,10 +99,10 @@
     </paper>
     <paper id="11">
       <title>Evaluation via Negativa of <fixed-case>C</fixed-case>hinese Word Segmentation for Information Retrieval</title>
-      <author><first>Mike Tian-Jian</first><last>Jiang</last></author>
+      <author id="mike-tian-jian-jiang"><first>Mike Tian-Jian</first><last>Jiang</last></author>
       <author><first>Cheng-Wei</first><last>Shih</last></author>
-      <author><first>Richard Tzong-Han</first><last>Tsai</last></author>
-      <author><first>Wen-Lian</first><last>Hsu</last></author>
+      <author id="richard-tzong-han-tsai"><first>Richard Tzong-Han</first><last>Tsai</last></author>
+      <author id="wen-lian-hsu"><first>Wen-Lian</first><last>Hsu</last></author>
       <pages>100–109</pages>
       <url hash="3adda534">Y11-1011</url>
       <bibkey>jiang-etal-2011-evaluation</bibkey>
@@ -118,7 +118,7 @@
     </paper>
     <paper id="13">
       <title>A Graph-based Bilingual Corpus Selection Approach for <fixed-case>SMT</fixed-case></title>
-      <author><first>Wenhan</first><last>Chao</last></author>
+      <author id="wenhan-chao"><first>Wenhan</first><last>Chao</last></author>
       <author><first>Zhoujun</first><last>Li</last></author>
       <pages>120–129</pages>
       <url hash="12b02900">Y11-1013</url>
@@ -135,7 +135,7 @@
     </paper>
     <paper id="15">
       <title>Context Resolution of Verb Particle Constructions for <fixed-case>E</fixed-case>nglish to <fixed-case>H</fixed-case>indi Translation</title>
-      <author><first>Niladri</first><last>Chatterjee</last></author>
+      <author id="niladri-chatterjee"><first>Niladri</first><last>Chatterjee</last></author>
       <author><first>Renu</first><last>Balyan</last></author>
       <pages>140–149</pages>
       <url hash="453011fe">Y11-1015</url>
@@ -160,9 +160,9 @@
     </paper>
     <paper id="18">
       <title><fixed-case>T</fixed-case>ibetan Word Segmentation as Syllable Tagging Using Conditional Random Field</title>
-      <author><first>Huidan</first><last>Liu</last></author>
-      <author><first>Minghua</first><last>Nuo</last></author>
-      <author><first>Longlong</first><last>Ma</last></author>
+      <author id="huidan-liu"><first>Huidan</first><last>Liu</last></author>
+      <author id="minghua-nuo"><first>Minghua</first><last>Nuo</last></author>
+      <author id="longlong-ma"><first>Longlong</first><last>Ma</last></author>
       <author><first>Jian</first><last>Wu</last></author>
       <author><first>Yeping</first><last>He</last></author>
       <pages>168–177</pages>
@@ -178,7 +178,7 @@
     </paper>
     <paper id="20">
       <title>The <fixed-case>L</fixed-case>1 Acquisition of the Imperfective Aspect markers in <fixed-case>K</fixed-case>orean: a Comparison with <fixed-case>J</fixed-case>apanese</title>
-      <author><first>Ju-Yeon</first><last>Ryu</last></author>
+      <author id="ju-yeon-ryu"><first>Ju-Yeon</first><last>Ryu</last></author>
       <pages>186–195</pages>
       <url hash="a1346b9a">Y11-1020</url>
       <bibkey>ryu-2011-l1</bibkey>
@@ -214,7 +214,7 @@
     </paper>
     <paper id="24">
       <title>A Bare-bones Constraint Grammar</title>
-      <author><first>Eckhard</first><last>Bick</last></author>
+      <author id="eckhard-bick"><first>Eckhard</first><last>Bick</last></author>
       <pages>226–235</pages>
       <url hash="3f7a4cac">Y11-1024</url>
       <bibkey>bick-2011-bare</bibkey>
@@ -223,7 +223,7 @@
       <title>Spring Cleaning and Grammar Compression: Two Techniques for Detection of Redundancy in <fixed-case>HPSG</fixed-case> Grammars</title>
       <author><first>Antske</first><last>Fokkens</last></author>
       <author><first>Yi</first><last>Zhang</last></author>
-      <author><first>Emily M.</first><last>Bender</last></author>
+      <author id="emily-m-bender"><first>Emily M.</first><last>Bender</last></author>
       <pages>236–244</pages>
       <url hash="a70d64de">Y11-1025</url>
       <bibkey>fokkens-etal-2011-spring</bibkey>
@@ -256,7 +256,7 @@
     </paper>
     <paper id="29">
       <title>Annotating the Structure and Semantics of Fables</title>
-      <author><first>Oi Yee</first><last>Kwong</last></author>
+      <author id="olivia-o-y-kwong"><first>Oi Yee</first><last>Kwong</last></author>
       <pages>275–282</pages>
       <url hash="e077ccbd">Y11-1029</url>
       <bibkey>kwong-2011-annotating</bibkey>
@@ -264,7 +264,7 @@
     <paper id="30">
       <title>Verbal Inflection in <fixed-case>H</fixed-case>indi: A Distributed Morphology Approach</title>
       <author><first>Smriti</first><last>Singh</last></author>
-      <author><first>Vaijayanthi M.</first><last>Sarma</last></author>
+      <author id="vaijayanthi-m-sarma"><first>Vaijayanthi M.</first><last>Sarma</last></author>
       <pages>283–292</pages>
       <url hash="384d6955">Y11-1030</url>
       <bibkey>singh-sarma-2011-verbal</bibkey>
@@ -272,7 +272,7 @@
     <paper id="31">
       <title>Word classes in <fixed-case>I</fixed-case>ndonesian: A linguistic reality or a convenient fallacy in natural language processing?</title>
       <author><first>Meladel</first><last>Mistica</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>I Wayan</first><last>Arka</last></author>
       <pages>293–302</pages>
       <url hash="feec064b">Y11-1031</url>
@@ -280,7 +280,7 @@
     </paper>
     <paper id="32">
       <title>Automated Proof Reading of Clinical Notes</title>
-      <author><first>Jon</first><last>Patrick</last></author>
+      <author id="jon-patrick"><first>Jon</first><last>Patrick</last></author>
       <author><first>Dung</first><last>Nguyen</last></author>
       <pages>303–312</pages>
       <url hash="fca48c09">Y11-1032</url>
@@ -306,7 +306,7 @@
     </paper>
     <paper id="35">
       <title>Unsupervised Word Sense Disambiguation Using Neighborhood Knowledge</title>
-      <author><first>Heyan</first><last>Huang</last></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last></author>
       <author><first>Zhizhuo</first><last>Yang</last></author>
       <author><first>Ping</first><last>Jian</last></author>
       <pages>333–342</pages>
@@ -316,7 +316,7 @@
     <paper id="36">
       <title>Dependency-based Analysis for <fixed-case>T</fixed-case>agalog Sentences</title>
       <author><first>Erlyn</first><last>Manguilimotan</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>343–352</pages>
       <url hash="ab276778">Y11-1036</url>
       <bibkey>manguilimotan-matsumoto-2011-dependency</bibkey>
@@ -338,7 +338,7 @@
     </paper>
     <paper id="39">
       <title>In Situ Text Summarisation for Museum Visitors</title>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <author><first>Patrick</first><last>Ye</last></author>
       <author><first>Fabian</first><last>Bohnert</last></author>
       <author><first>Ingrid</first><last>Zukerman</last></author>
@@ -350,7 +350,7 @@
       <title>Iteratively Estimating Pattern Reliability and Seed Quality With Extraction Consistency</title>
       <author><first>Yi-Hsun</first><last>Lee</last></author>
       <author><first>Chung-Yao</first><last>Chuang</last></author>
-      <author><first>Wen-Lian</first><last>Hsu</last></author>
+      <author id="wen-lian-hsu"><first>Wen-Lian</first><last>Hsu</last></author>
       <pages>382–391</pages>
       <url hash="57c95332">Y11-1040</url>
       <bibkey>lee-etal-2011-iteratively</bibkey>
@@ -367,7 +367,7 @@
       <title>A Listwise Approach to Coreference Resolution in Multiple Languages</title>
       <author><first>Oanh Thi</first><last>Tran</last></author>
       <author><first>Bach Xuan</first><last>Ngo</last></author>
-      <author><first>Minh Le</first><last>Nguyen</last></author>
+      <author id="minh-le-nguyen"><first>Minh Le</first><last>Nguyen</last></author>
       <author><first>Akira</first><last>Shimazu</last></author>
       <pages>400–409</pages>
       <url hash="0aa3d7ec">Y11-1042</url>
@@ -376,7 +376,7 @@
     <paper id="43">
       <title>Combining Dependency and Constituent-based Syntactic Information for Anaphoricity Determination in Coreference Resolution</title>
       <author><first>Fang</first><last>Kong</last></author>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <pages>410–419</pages>
       <url hash="42bf7458">Y11-1043</url>
       <bibkey>kong-zhou-2011-combining</bibkey>
@@ -395,7 +395,7 @@
     <paper id="45">
       <title>A Hybrid Extraction Model for <fixed-case>C</fixed-case>hinese Noun/Verb Synonymous bi-gram Collocations</title>
       <author><first>Wanyin</first><last>Li</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <pages>430–439</pages>
       <url hash="34fdfc4b">Y11-1045</url>
       <bibkey>li-lu-2011-hybrid</bibkey>
@@ -425,7 +425,7 @@
     <paper id="49">
       <title>An <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>hinese Cross-lingual Word Semantic Similarity Measure Exploring Attributes and Relations</title>
       <author><first>Lin</first><last>Dai</last></author>
-      <author><first>Heyan</first><last>Huang</last></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last></author>
       <pages>467–476</pages>
       <url hash="3ac3b520">Y11-1049</url>
       <bibkey>dai-huang-2011-english</bibkey>
@@ -433,8 +433,8 @@
     <paper id="50">
       <title>Learning-to-Translate Based on the <fixed-case>S</fixed-case>-<fixed-case>SSTC</fixed-case> Annotation Schema</title>
       <author><first>Enya Kong</first><last>Tang</last></author>
-      <author><first>Zaharin</first><last>Yusoff</last></author>
-      <author><first>Christian</first><last>Boitet</last></author>
+      <author id="zaharin-yusoff"><first>Zaharin</first><last>Yusoff</last></author>
+      <author id="christian-boitet"><first>Christian</first><last>Boitet</last></author>
       <pages>477–484</pages>
       <url hash="01f027a3">Y11-1050</url>
       <bibkey>tang-etal-2011-learning</bibkey>
@@ -507,7 +507,7 @@
     <paper id="59">
       <title><fixed-case>NERSIL</fixed-case> - the Named-Entity Recognition System for <fixed-case>I</fixed-case>ban Language</title>
       <author><first>Yong Soo</first><last>Fong</last></author>
-      <author><first>Bali Ranaivo</first><last>Malanҫon</last></author>
+      <author id="bali-ranaivo-malancon"><first>Bali Ranaivo</first><last>Malanҫon</last></author>
       <author><first>Alvin Yeo</first><last>Wee</last></author>
       <pages>549–558</pages>
       <url hash="ce77ae9e">Y11-1059</url>
@@ -517,7 +517,7 @@
       <title>Improving <fixed-case>PP</fixed-case> Attachment Disambiguation in a Rule-based Parser</title>
       <author><first>Yoon-Hyung</first><last>Roh</last></author>
       <author><first>Ki-Young</first><last>Lee</last></author>
-      <author><first>Young-Gil</first><last>Kim</last></author>
+      <author id="young-gil-kim"><first>Young-Gil</first><last>Kim</last></author>
       <pages>559–566</pages>
       <url hash="560d80ea">Y11-1060</url>
       <bibkey>roh-etal-2011-improving</bibkey>
@@ -544,7 +544,7 @@
       <author><first>Takafumi</first><last>Suzuki</last></author>
       <author><first>Kiyoko</first><last>Uchiyama</last></author>
       <author><first>Ryota</first><last>Tomisaka</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>587–596</pages>
       <url hash="f34cfa8f">Y11-1063</url>
       <bibkey>suzuki-etal-2011-analyzing</bibkey>
@@ -563,7 +563,7 @@
     <paper id="65">
       <title>A Construction Grammar Approach to Prepositional Phrase Attachment: Semantic Feature Analysis of <fixed-case>V</fixed-case> <fixed-case>NP</fixed-case>1 into <fixed-case>NP</fixed-case>2 Construction</title>
       <author><first>Liyin</first><last>Chen</last></author>
-      <author><first>Siaw-Fong</first><last>Chung</last></author>
+      <author id="siaw-fong-chung"><first>Siaw-Fong</first><last>Chung</last></author>
       <author><first>Chao-Lin</first><last>Liu</last></author>
       <pages>607–614</pages>
       <url hash="fe6da7ee">Y11-1065</url>
diff --git a/data/xml/Y12.xml b/data/xml/Y12.xml
index 2801bad424..17c1149517 100644
--- a/data/xml/Y12.xml
+++ b/data/xml/Y12.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the 26th Pacific Asia Conference on Language, Information, and Computation</booktitle>
       <url hash="63d9e4e5">Y12-1</url>
-      <editor><first>Ruli</first><last>Manurung</last></editor>
+      <editor id="ruli-manurung"><first>Ruli</first><last>Manurung</last></editor>
       <editor><first>Francis</first><last>Bond</last></editor>
       <publisher>Faculty of Computer Science, Universitas Indonesia</publisher>
       <address>Bali, Indonesia</address>
@@ -32,28 +32,28 @@
     </paper>
     <paper id="3">
       <title>Idiomaticity and Classical Traditions in Some <fixed-case>E</fixed-case>ast <fixed-case>A</fixed-case>sian Languages</title>
-      <author><first>Benjamin K</first><last>Tsou</last></author>
+      <author id="benjamin-k-tsou"><first>Benjamin K</first><last>Tsou</last></author>
       <pages>39–55</pages>
       <url hash="28d53f1c">Y12-1003</url>
       <bibkey>tsou-2012-idiomaticity</bibkey>
     </paper>
     <paper id="4">
       <title>Things between Lexicon and Grammar</title>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>56–57</pages>
       <url hash="d088ef50">Y12-1004</url>
       <bibkey>matsumoto-2012-things</bibkey>
     </paper>
     <paper id="5">
       <title>Social Media: Friend or Foe of Natural Language Processing?</title>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>58–59</pages>
       <url hash="31e72bf4">Y12-1005</url>
       <bibkey>baldwin-2012-social</bibkey>
     </paper>
     <paper id="6">
       <title>Towards a Semantic Annotation of <fixed-case>E</fixed-case>nglish Television News - Building and Evaluating a Constraint Grammar <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et</title>
-      <author><first>Eckhard</first><last>Bick</last></author>
+      <author id="eckhard-bick"><first>Eckhard</first><last>Bick</last></author>
       <pages>60–69</pages>
       <url hash="e6038762">Y12-1006</url>
       <bibkey>bick-2012-towards</bibkey>
@@ -70,7 +70,7 @@
     <paper id="8">
       <title>Automatic Domain Adaptation for Word Sense Disambiguation Based on Comparison of Multiple Classifiers</title>
       <author><first>Kanako</first><last>Komiya</last></author>
-      <author><first>Manabu</first><last>Okumura</last></author>
+      <author id="manabu-okumura"><first>Manabu</first><last>Okumura</last></author>
       <pages>80–88</pages>
       <url hash="3b3adb69">Y12-1008</url>
       <bibkey>komiya-okumura-2012-automatic</bibkey>
@@ -126,15 +126,15 @@
     <paper id="14">
       <title><fixed-case>I</fixed-case>ndonesian Dependency Treebank: Annotation and Parsing</title>
       <author><first>Nathan</first><last>Green</last></author>
-      <author><first>Septina Dian</first><last>Larasati</last></author>
-      <author><first>Zdenek</first><last>Zabokrtsky</last></author>
+      <author id="septina-dian-larasati"><first>Septina Dian</first><last>Larasati</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdenek</first><last>Zabokrtsky</last></author>
       <pages>137–145</pages>
       <url hash="f5ff1612">Y12-1014</url>
       <bibkey>green-etal-2012-indonesian</bibkey>
     </paper>
     <paper id="15">
       <title>Handling <fixed-case>I</fixed-case>ndonesian Clitics: A Dataset Comparison for an <fixed-case>I</fixed-case>ndonesian-<fixed-case>E</fixed-case>nglish Statistical Machine Translation System</title>
-      <author><first>Septina Dian</first><last>Larasati</last></author>
+      <author id="septina-dian-larasati"><first>Septina Dian</first><last>Larasati</last></author>
       <pages>146–152</pages>
       <url hash="26e1f9bd">Y12-1015</url>
       <bibkey>larasati-2012-handling</bibkey>
@@ -182,14 +182,14 @@
     <paper id="21">
       <title>Extracting Keywords from Multi-party Live Chats</title>
       <author><first>Su Nam</first><last>Kim</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>199–208</pages>
       <url hash="e5d937a7">Y12-1021</url>
       <bibkey>kim-baldwin-2012-extracting</bibkey>
     </paper>
     <paper id="22">
       <title>Extracting Networks of People and Places from Literary Texts</title>
-      <author><first>John</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
       <author><first>Chak Yan</first><last>Yeung</last></author>
       <pages>209–218</pages>
       <url hash="61f90b22">Y12-1022</url>
@@ -205,7 +205,7 @@
     <paper id="24">
       <title>Pattern Matching Refinements to Dictionary-Based Code-Switching Point Detection</title>
       <author><first>Nathaniel</first><last>Oco</last></author>
-      <author><first>Rachel Edita</first><last>Roxas</last></author>
+      <author id="rachel-edita-roxas"><first>Rachel Edita</first><last>Roxas</last></author>
       <pages>229–236</pages>
       <url hash="10bed3ab">Y12-1024</url>
       <bibkey>oco-roxas-2012-pattern</bibkey>
@@ -257,7 +257,7 @@
       <title>Emotional Tendency Identification for Micro-blog Topics Based on Multiple Characteristics</title>
       <author><first>Quanchao</first><last>Liu</last></author>
       <author><first>Chong</first><last>Feng</last></author>
-      <author><first>Heyan</first><last>Huang</last></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last></author>
       <pages>280–288</pages>
       <url hash="a80da78e">Y12-1030</url>
       <bibkey>liu-etal-2012-emotional</bibkey>
@@ -265,7 +265,7 @@
     <paper id="31">
       <title>Product Name Classification for Product Instance Distinction</title>
       <author><first>Hye-Jin</first><last>Min</last></author>
-      <author><first>Jong C.</first><last>Park</last></author>
+      <author id="jong-c-park"><first>Jong C.</first><last>Park</last></author>
       <pages>289–298</pages>
       <url hash="9222ae6c">Y12-1031</url>
       <bibkey>min-park-2012-product</bibkey>
@@ -290,14 +290,14 @@
     <paper id="34">
       <title>Applying Statistical Post-Editing to <fixed-case>E</fixed-case>nglish-to-<fixed-case>K</fixed-case>orean Rule-based Machine Translation System</title>
       <author><first>Ki-Young</first><last>Lee</last></author>
-      <author><first>Young-Gil</first><last>Kim</last></author>
+      <author id="young-gil-kim"><first>Young-Gil</first><last>Kim</last></author>
       <pages>318–324</pages>
       <url hash="788547b1">Y12-1034</url>
       <bibkey>lee-kim-2012-applying</bibkey>
     </paper>
     <paper id="35">
       <title>A Model of <fixed-case>V</fixed-case>ietnamese Person Named Entity Question Answering System</title>
-      <author><first>Mai-Vu</first><last>Tran</last></author>
+      <author id="mai-vu-tran"><first>Mai-Vu</first><last>Tran</last></author>
       <author><first>Duc-Trong</first><last>Le</last></author>
       <author><first>Xuan Tu</first><last>Tran</last></author>
       <author><first>Tien-Tung</first><last>Nguyen</last></author>
@@ -309,7 +309,7 @@
       <title>Towards a Semantic Annotation of <fixed-case>E</fixed-case>nglish Television News - Building and Evaluating a Constraint Grammar <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et</title>
       <author><first>Shaohua</first><last>Yang</last></author>
       <author><first>Hai</first><last>Zhao</last></author>
-      <author><first>Bao-liang</first><last>Lu</last></author>
+      <author id="bao-liang-lu"><first>Bao-liang</first><last>Lu</last></author>
       <pages>333–342</pages>
       <url hash="351ddb68">Y12-1036</url>
       <bibkey>yang-etal-2012-towards</bibkey>
@@ -333,7 +333,7 @@
     </paper>
     <paper id="39">
       <title>Introduction of a Probabilistic Language Model to Non-Factoid Question Answering Using Example <fixed-case>Q</fixed-case>&amp;<fixed-case>A</fixed-case> Pairs</title>
-      <author><first>Kyosuke</first><last>Yoshida</last></author>
+      <author id="kyosuke-yoshida"><first>Kyosuke</first><last>Yoshida</last></author>
       <author><first>Taro</first><last>Ueda</last></author>
       <author><first>Madoka</first><last>Ishioroshi</last></author>
       <author><first>Hideyuki</first><last>Shibuki</last></author>
@@ -362,7 +362,7 @@
       <title>Anaphora Annotation in <fixed-case>H</fixed-case>indi Dependency <fixed-case>T</fixed-case>ree<fixed-case>B</fixed-case>ank</title>
       <author><first>Praveen</first><last>Dakwale</last></author>
       <author><first>Himanshu</first><last>Sharma</last></author>
-      <author><first>Dipti M</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti M</first><last>Sharma</last></author>
       <pages>391–400</pages>
       <url hash="1179f0a3">Y12-1042</url>
       <bibkey>dakwale-etal-2012-anaphora</bibkey>
@@ -370,8 +370,8 @@
     <paper id="43">
       <title>Improving Statistical Machine Translation with Processing Shallow Parsing</title>
       <author><first>Hoai-Thu</first><last>Vuong</last></author>
-      <author><first>Vinh Van</first><last>Nguyen</last></author>
-      <author><first>Viet Hong</first><last>Tran</last></author>
+      <author id="vinh-van-nguyen"><first>Vinh Van</first><last>Nguyen</last></author>
+      <author id="viet-hong-tran"><first>Viet Hong</first><last>Tran</last></author>
       <author><first>Akira</first><last>Shimazu</last></author>
       <pages>401–407</pages>
       <url hash="981a1d62">Y12-1043</url>
@@ -379,7 +379,7 @@
     </paper>
     <paper id="44">
       <title>Psycholinguistics, Lexicography, and Word Sense Disambiguation</title>
-      <author><first>Oi Yee</first><last>Kwong</last></author>
+      <author id="olivia-o-y-kwong"><first>Oi Yee</first><last>Kwong</last></author>
       <pages>408–417</pages>
       <url hash="9df6ec73">Y12-1044</url>
       <bibkey>kwong-2012-psycholinguistics</bibkey>
@@ -396,7 +396,7 @@
       <title>The Headedness of <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese Serial Verb Constructions: A Corpus-Based Study</title>
       <author><first>Jingxia</first><last>Lin</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
-      <author><first>Huarui</first><last>Zhang</last></author>
+      <author id="huarui-zhang"><first>Huarui</first><last>Zhang</last></author>
       <author><first>Hongzhi</first><last>Xu</last></author>
       <pages>428–435</pages>
       <url hash="d3dc7ae7">Y12-1046</url>
@@ -435,7 +435,7 @@
       <title>Classifying Dialogue Acts in Multi-party Live Chats</title>
       <author><first>Su Nam</first><last>Kim</last></author>
       <author><first>Lawrence</first><last>Cavedon</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>463–472</pages>
       <url hash="50f5f6c7">Y12-1050</url>
       <bibkey>kim-etal-2012-classifying-dialogue</bibkey>
@@ -451,14 +451,14 @@
       <title>Deep Lexical Acquisition of Type Properties in Low-resource Languages: A Case Study in <fixed-case>W</fixed-case>ambaya</title>
       <author><first>Jeremy</first><last>Nicholson</last></author>
       <author><first>Rachel</first><last>Nordlinger</last></author>
-      <author><first>Timothy</first><last>Baldwin</last></author>
+      <author id="timothy-baldwin"><first>Timothy</first><last>Baldwin</last></author>
       <pages>481–490</pages>
       <url hash="bf876257">Y12-1052</url>
       <bibkey>nicholson-etal-2012-deep</bibkey>
     </paper>
     <paper id="53">
       <title><fixed-case>C</fixed-case>hinese Sentiments on the Clouds: A Preliminary Experiment on Corpus Processing and Exploration on Cloud Service</title>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <author><first>Yu-Yun</first><last>Chang</last></author>
       <author><first>Meng-Xian</first><last>Shih</last></author>
       <pages>491–497</pages>
@@ -491,9 +491,9 @@
     </paper>
     <paper id="56">
       <title>Analysis of Social and Expressive Factors of Requests by Methods of Text Mining</title>
-      <author><first>Daša</first><last>Munková</last></author>
+      <author id="dasa-munkova"><first>Daša</first><last>Munková</last></author>
       <author><first>Michal</first><last>Munk</last></author>
-      <author><first>Zuzana</first><last>Fráterová</last></author>
+      <author id="zuzana-fraterova"><first>Zuzana</first><last>Fráterová</last></author>
       <author><first>Beáta</first><last>Ďuračková</last></author>
       <pages>515–524</pages>
       <url hash="dd8347e1">Y12-1056</url>
@@ -502,7 +502,7 @@
     <paper id="57">
       <title>Set Expansion using Sibling Relations between Semantic Categories</title>
       <author><first>Sho</first><last>Takase</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Kentaro</first><last>Inui</last></author>
       <pages>525–534</pages>
       <url hash="9b01eae3">Y12-1057</url>
@@ -519,7 +519,7 @@
     </paper>
     <paper id="59">
       <title>Text Readability Classification of Textbooks of a Low-Resource Language</title>
-      <author><first>Zahurul</first><last>Islam</last></author>
+      <author id="zahurul-islam"><first>Zahurul</first><last>Islam</last></author>
       <author><first>Alexander</first><last>Mehler</last></author>
       <author><first>Rashedur</first><last>Rahman</last></author>
       <pages>545–553</pages>
@@ -538,7 +538,7 @@
       <title>Improved Constituent Context Model with Features</title>
       <author><first>Yun</first><last>Huang</last></author>
       <author><first>Min</first><last>Zhang</last></author>
-      <author><first>Chew Lim</first><last>Tan</last></author>
+      <author id="chew-lim-tan"><first>Chew Lim</first><last>Tan</last></author>
       <pages>564–573</pages>
       <url hash="a146b82b">Y12-1061</url>
       <bibkey>huang-etal-2012-improved-constituent</bibkey>
@@ -591,7 +591,7 @@
     </paper>
     <paper id="68">
       <title>Psych-Predicates: How They Are Different</title>
-      <author><first>Chungmin</first><last>Lee</last></author>
+      <author id="chungmin-lee"><first>Chungmin</first><last>Lee</last></author>
       <pages>626–631</pages>
       <url hash="f7d194d0">Y12-1068</url>
       <bibkey>lee-2012-psych</bibkey>
@@ -607,7 +607,7 @@
     <paper id="70">
       <title>Gap in “Gapless” Relative Clauses in <fixed-case>K</fixed-case>orean and Other <fixed-case>A</fixed-case>sian Languages</title>
       <author><first>Jeong-Shik</first><last>Lee</last></author>
-      <author><first>Chungmin</first><last>Lee</last></author>
+      <author id="chungmin-lee"><first>Chungmin</first><last>Lee</last></author>
       <pages>640–645</pages>
       <url hash="b8b8ef78">Y12-1070</url>
       <bibkey>lee-lee-2012-gap</bibkey>
diff --git a/data/xml/Y13.xml b/data/xml/Y13.xml
index 69a4a8bd6e..9eb0e69341 100644
--- a/data/xml/Y13.xml
+++ b/data/xml/Y13.xml
@@ -62,9 +62,9 @@
     </paper>
     <paper id="7">
       <title>A Quantitative Comparative Study of Prosodic and Discourse Units, the Case of <fixed-case>F</fixed-case>rench and <fixed-case>T</fixed-case>aiwan <fixed-case>M</fixed-case>andarin</title>
-      <author><first>Laurent</first><last>Prévot</last></author>
+      <author id="laurent-prevot"><first>Laurent</first><last>Prévot</last></author>
       <author><first>Shu-Chuan</first><last>Tseng</last></author>
-      <author><first>Alvin Cheng-Hsien</first><last>Chen</last></author>
+      <author id="alvin-cheng-hsien-chen"><first>Alvin Cheng-Hsien</first><last>Chen</last></author>
       <author><first>Klim</first><last>Peshkov</last></author>
       <pages>92–101</pages>
       <url hash="bb27d873">Y13-1007</url>
@@ -73,7 +73,7 @@
     <paper id="8">
       <title>Corpus-Based Research on Tense Analysis and Rhetorical Structure in Journal Article Abstracts</title>
       <author><first>Pin-ning</first><last>Tu</last></author>
-      <author><first>Shih-Ping</first><last>Wang</last></author>
+      <author id="shih-ping-wang"><first>Shih-Ping</first><last>Wang</last></author>
       <pages>102–107</pages>
       <url hash="919c3a93">Y13-1008</url>
       <bibkey>tu-wang-2013-corpus</bibkey>
@@ -90,7 +90,7 @@
     <paper id="10">
       <title>A Study of the Effectiveness of Suffixes for <fixed-case>C</fixed-case>hinese Word Segmentation</title>
       <author><first>Xiaoqing</first><last>Li</last></author>
-      <author><first>Chengqing</first><last>Zong</last></author>
+      <author id="chengqing-zong"><first>Chengqing</first><last>Zong</last></author>
       <author><first>Keh-Yih</first><last>Su</last></author>
       <pages>118–125</pages>
       <url hash="2936267f">Y13-1010</url>
@@ -107,14 +107,14 @@
       <title>Difficulties in Perception and Pronunciation of <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese Disyllabic Word Tone Acquisition: A Study of Some <fixed-case>J</fixed-case>apanese <fixed-case>U</fixed-case>niversity Students</title>
       <author><first>Yuting</first><last>Dong</last></author>
       <author><first>Yasushi</first><last>Tsubota</last></author>
-      <author><first>Masatake</first><last>Dantsuji</last></author>
+      <author id="masatake-dantsuji"><first>Masatake</first><last>Dantsuji</last></author>
       <pages>143–152</pages>
       <url hash="88784567">Y13-1012</url>
       <bibkey>dong-etal-2013-difficulties</bibkey>
     </paper>
     <paper id="13">
       <title>Exploring the <fixed-case>C</fixed-case>hinese Mental Lexicon with Word Association Norms</title>
-      <author><first>Oi Yee</first><last>Kwong</last></author>
+      <author id="olivia-o-y-kwong"><first>Oi Yee</first><last>Kwong</last></author>
       <pages>153–162</pages>
       <url hash="d066c5f1">Y13-1013</url>
       <bibkey>kwong-2013-exploring</bibkey>
@@ -123,7 +123,7 @@
       <title>Towards Automatic Error Type Classification of <fixed-case>J</fixed-case>apanese Language Learners’ Writings</title>
       <author><first>Hiromi</first><last>Oyama</last></author>
       <author><first>Mamoru</first><last>Komachi</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>163–172</pages>
       <url hash="5198d5e8">Y13-1014</url>
       <bibkey>oyama-etal-2013-towards</bibkey>
@@ -137,7 +137,7 @@
     </paper>
     <paper id="16">
       <title>Clausal-Packaging of Path of Motion in Second Language Acquisition of <fixed-case>R</fixed-case>ussian and <fixed-case>S</fixed-case>panish</title>
-      <author><first>Kawai</first><last>Chui</last></author>
+      <author id="ka-wai-chui"><first>Kawai</first><last>Chui</last></author>
       <author><first>Hsiang-lin</first><last>Yeh</last></author>
       <author><first>Wen-Chun</first><last>Lan</last></author>
       <author><first>Yu-Han</first><last>Cheng</last></author>
@@ -166,7 +166,7 @@
       <author><first>Sachi</first><last>Yasuda</last></author>
       <author><first>Hikari</first><last>Konishi</last></author>
       <author><first>Mizuho</first><last>Imada</last></author>
-      <author><first>Kikuo</first><last>Maekawa</last></author>
+      <author id="kikuo-maekawa"><first>Kikuo</first><last>Maekawa</last></author>
       <pages>206–214</pages>
       <url hash="2d451889">Y13-1019</url>
       <bibkey>asahara-etal-2013-bccwj</bibkey>
@@ -174,15 +174,15 @@
     <paper id="20">
       <title>A Corpus-based Approach to Linguistic Function</title>
       <author><first>Hengbin</first><last>Yan</last></author>
-      <author><first>Jonathan</first><last>Webster</last></author>
+      <author id="jonathan-j-webster"><first>Jonathan</first><last>Webster</last></author>
       <pages>215–221</pages>
       <url hash="e4dedba4">Y13-1020</url>
       <bibkey>yan-webster-2013-corpus</bibkey>
     </paper>
     <paper id="21">
       <title>A Case Study of a Free Word Order</title>
-      <author><first>Vladislav</first><last>Kuboň</last></author>
-      <author><first>Markéta</first><last>Lopatková</last></author>
+      <author id="vladislav-kubon"><first>Vladislav</first><last>Kuboň</last></author>
+      <author id="marketa-lopatkova"><first>Markéta</first><last>Lopatková</last></author>
       <author><first>Jiří</first><last>Mírovský</last></author>
       <pages>222–231</pages>
       <url hash="97e1d852">Y13-1021</url>
@@ -197,7 +197,7 @@
     </paper>
     <paper id="23">
       <title><fixed-case>C</fixed-case>hin<fixed-case>G</fixed-case>ram: A <fixed-case>TRALE</fixed-case> Implementation of an <fixed-case>HPSG</fixed-case> Fragment for <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese</title>
-      <author><first>Stefan</first><last>Müller</last></author>
+      <author id="stefan-muller"><first>Stefan</first><last>Müller</last></author>
       <author><first>Janna</first><last>Lipenkova</last></author>
       <pages>240–249</pages>
       <url hash="633da6e1">Y13-1023</url>
@@ -215,7 +215,7 @@
     <paper id="25">
       <title>Transliteration Extraction from Classical <fixed-case>C</fixed-case>hinese Buddhist Literature Using Conditional Random Fields</title>
       <author><first>Yu-Chun</first><last>Wang</last></author>
-      <author><first>Richard Tzong-Han</first><last>Tsai</last></author>
+      <author id="richard-tzong-han-tsai"><first>Richard Tzong-Han</first><last>Tsai</last></author>
       <pages>260–266</pages>
       <url hash="882a0280">Y13-1025</url>
       <bibkey>wang-tsai-2013-transliteration</bibkey>
@@ -256,7 +256,7 @@
     </paper>
     <paper id="30">
       <title>An Application of Comparative Corpora of Interactional Data – Toward the Sound Profiles of Sites of Initiation in <fixed-case>F</fixed-case>rench and <fixed-case>M</fixed-case>andarin Recycling Repair</title>
-      <author><first>Helen Kai-yun</first><last>Chen</last></author>
+      <author id="helen-kaiyun-chen"><first>Helen Kai-yun</first><last>Chen</last></author>
       <pages>302–311</pages>
       <url hash="4aef263f">Y13-1030</url>
       <bibkey>chen-2013-application</bibkey>
@@ -264,14 +264,14 @@
     <paper id="31">
       <title>Of-Constructions in the Predicate of Demonstrate and Show in Academic Discourse</title>
       <author><first>Liyin</first><last>Chen</last></author>
-      <author><first>Siaw-Fong</first><last>Chung</last></author>
+      <author id="siaw-fong-chung"><first>Siaw-Fong</first><last>Chung</last></author>
       <pages>312–321</pages>
       <url hash="4b4a19b2">Y13-1031</url>
       <bibkey>chen-chung-2013-constructions</bibkey>
     </paper>
     <paper id="32">
       <title>Spatial Particles in <fixed-case>E</fixed-case>nglish: A Quantitative Corpus-Based Approach to the Conceptualization of Symmetry in Bodily Orientation</title>
-      <author><first>Alvin Cheng-Hsien</first><last>Chen</last></author>
+      <author id="alvin-cheng-hsien-chen"><first>Alvin Cheng-Hsien</first><last>Chen</last></author>
       <pages>322–328</pages>
       <url hash="07f667a4">Y13-1032</url>
       <bibkey>chen-2013-spatial</bibkey>
@@ -292,7 +292,7 @@
       <author><first>Hideki</first><last>Asoh</last></author>
       <author><first>Akira</first><last>Takagi</last></author>
       <author><first>Tatsuhiro</first><last>Konishi</last></author>
-      <author><first>Yukihiro</first><last>Itoh</last></author>
+      <author id="yukihiro-itoh"><first>Yukihiro</first><last>Itoh</last></author>
       <pages>339–348</pages>
       <url hash="aef1a6b9">Y13-1034</url>
       <bibkey>noguchi-etal-2013-event</bibkey>
@@ -343,7 +343,7 @@
     <paper id="40">
       <title>Transliteration Systems across <fixed-case>I</fixed-case>ndian Languages Using Parallel Corpora</title>
       <author><first>Rishabh</first><last>Srivastava</last></author>
-      <author><first>Riyaz Ahmad</first><last>Bhat</last></author>
+      <author id="riyaz-ahmad-bhat"><first>Riyaz Ahmad</first><last>Bhat</last></author>
       <pages>390–398</pages>
       <url hash="863c8286">Y13-1040</url>
       <bibkey>srivastava-bhat-2013-transliteration</bibkey>
@@ -360,7 +360,7 @@
     <paper id="42">
       <title>Classifying Questions in Question Answering System Using Finite State Machines with a Simple Learning Approach</title>
       <author><first>Mohammad Moinul</first><last>Hoque</last></author>
-      <author><first>Teresa</first><last>Goncalves</last></author>
+      <author id="teresa-goncalves"><first>Teresa</first><last>Goncalves</last></author>
       <author><first>Paulo</first><last>Quaresma</last></author>
       <pages>409–414</pages>
       <url hash="05e9e3fb">Y13-1042</url>
@@ -377,22 +377,22 @@
     <paper id="44">
       <title><fixed-case>V</fixed-case>ietnamese Text Accent Restoration with Statistical Machine Translation</title>
       <author><first>Luan-Nghia</first><last>Pham</last></author>
-      <author><first>Viet-Hong</first><last>Tran</last></author>
-      <author><first>Vinh-Van</first><last>Nguyen</last></author>
+      <author id="viet-hong-tran"><first>Viet-Hong</first><last>Tran</last></author>
+      <author id="vinh-van-nguyen"><first>Vinh-Van</first><last>Nguyen</last></author>
       <pages>423–429</pages>
       <url hash="3b501984">Y13-1044</url>
       <bibkey>pham-etal-2013-vietnamese</bibkey>
     </paper>
     <paper id="45">
       <title>A Compact <fixed-case>FP</fixed-case>-Tree for Fast Frequent Pattern Retrieval</title>
-      <author><first>Tri Thanh</first><last>Nguyen</last></author>
+      <author id="tri-thanh-nguyen"><first>Tri Thanh</first><last>Nguyen</last></author>
       <pages>430–439</pages>
       <url hash="4db89aef">Y13-1045</url>
       <bibkey>nguyen-2013-compact</bibkey>
     </paper>
     <paper id="46">
       <title><fixed-case>ML</fixed-case>-Tuned Constraint Grammars</title>
-      <author><first>Eckhard</first><last>Bick</last></author>
+      <author id="eckhard-bick"><first>Eckhard</first><last>Bick</last></author>
       <pages>440–449</pages>
       <url hash="0e24baae">Y13-1046</url>
       <bibkey>bick-2013-ml</bibkey>
@@ -451,7 +451,7 @@
       <title>Automatic Clause Boundary Annotation in the <fixed-case>H</fixed-case>indi Treebank</title>
       <author><first>Rahul</first><last>Sharma</last></author>
       <author><first>Soma</first><last>Paul</last></author>
-      <author><first>Riyaz Ahmad</first><last>Bhat</last></author>
+      <author id="riyaz-ahmad-bhat"><first>Riyaz Ahmad</first><last>Bhat</last></author>
       <author><first>Sambhav</first><last>Jain</last></author>
       <pages>499–504</pages>
       <url hash="fbecde78">Y13-1053</url>
@@ -520,14 +520,14 @@
     </paper>
     <paper id="6">
       <title>Automatic Identification of <fixed-case>E</fixed-case>nglish Collocation Errors Based on Dependency Relations</title>
-      <author><first>Zhao-Ming</first><last>Gao</last></author>
+      <author id="zhao-ming-gao"><first>Zhao-Ming</first><last>Gao</last></author>
       <pages>550–555</pages>
       <url hash="fd473036">Y13-2006</url>
       <bibkey>gao-2013-automatic</bibkey>
     </paper>
     <paper id="7">
       <title>A <fixed-case>J</fixed-case>apanese Learning Support System Matching Individual Abilities</title>
-      <author><first>Takahiro</first><last>Ohno</last></author>
+      <author id="takahiro-ohno"><first>Takahiro</first><last>Ohno</last></author>
       <author><first>Zyunitiro</first><last>Edani</last></author>
       <author><first>Ayato</first><last>Inoue</last></author>
       <author><first>Dongli</first><last>Han</last></author>
diff --git a/data/xml/Y14.xml b/data/xml/Y14.xml
index 1c46d4f77b..c3e071f9e7 100644
--- a/data/xml/Y14.xml
+++ b/data/xml/Y14.xml
@@ -16,7 +16,7 @@
     </frontmatter>
     <paper id="1">
       <title>Robust Semantics for Semantic Parsing</title>
-      <author><first>Mark</first><last>Steedman</last></author>
+      <author id="mark-steedman"><first>Mark</first><last>Steedman</last></author>
       <pages>1–1</pages>
       <url hash="8067bfb9">Y14-1001</url>
       <bibkey>steedman-2014-robust</bibkey>
@@ -30,14 +30,14 @@
     </paper>
     <paper id="3">
       <title>Registerial Cartography: Context-based Mapping of Text Types and their Rhetorical-relational Organization</title>
-      <author><first>Christian M.I.M.</first><last>Matthiessen</last></author>
+      <author id="christian-m-i-m-matthiessen"><first>Christian M.I.M.</first><last>Matthiessen</last></author>
       <pages>5–26</pages>
       <url hash="4908f47d">Y14-1003</url>
       <bibkey>matthiessen-2014-registerial</bibkey>
     </paper>
     <paper id="4">
       <title>Discourse for Machine Translation.</title>
-      <author><first>Bonnie</first><last>Webber</last></author>
+      <author id="bonnie-webber"><first>Bonnie</first><last>Webber</last></author>
       <pages>27–27</pages>
       <url hash="eba24ea5">Y14-1004</url>
       <bibkey>webber-2014-discourse</bibkey>
@@ -72,7 +72,7 @@
     </paper>
     <paper id="9">
       <title>Phonological Suppression of Anaphoric Wh-expressions in <fixed-case>E</fixed-case>nglish and <fixed-case>K</fixed-case>orean</title>
-      <author><first>Myung-Kwan</first><last>Park</last></author>
+      <author id="myung-kwan-park"><first>Myung-Kwan</first><last>Park</last></author>
       <pages>57–64</pages>
       <url hash="097f38d0">Y14-1009</url>
       <bibkey>park-2014-phonological</bibkey>
@@ -83,7 +83,7 @@
       <author><first>Sonse</first><last>Shimaoka</last></author>
       <author><first>Kazeto</first><last>Yamamoto</last></author>
       <author><first>Yotaro</first><last>Watanabe</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Kentaro</first><last>Inui</last></author>
       <pages>65–74</pages>
       <url hash="1f0c870d">Y14-1010</url>
@@ -96,7 +96,7 @@
       <author><first>Cen-Chieh</first><last>Chen</last></author>
       <author><first>Chad</first><last>Liu</last></author>
       <author><first>Chun-Hung</first><last>Lu</last></author>
-      <author><first>Wen-Lian</first><last>Hsu</last></author>
+      <author id="wen-lian-hsu"><first>Wen-Lian</first><last>Hsu</last></author>
       <pages>75–84</pages>
       <url hash="a66290c9">Y14-1011</url>
       <bibkey>chang-etal-2014-semantic-frame</bibkey>
@@ -119,7 +119,7 @@
     <paper id="14">
       <title>A Corpus-Based Quantitative Study of Nominalizations across <fixed-case>C</fixed-case>hinese and <fixed-case>B</fixed-case>ritish Media <fixed-case>E</fixed-case>nglish</title>
       <author><first>Ying</first><last>Liu</last></author>
-      <author><first>Alex Chengyu</first><last>Fang</last></author>
+      <author id="alex-chengyu-fang"><first>Alex Chengyu</first><last>Fang</last></author>
       <author><first>Naixing</first><last>Wei</last></author>
       <pages>101–110</pages>
       <url hash="10312dda">Y14-1014</url>
@@ -151,7 +151,7 @@
     <paper id="18">
       <title>Taking Antonymy Mask off in Vector Space</title>
       <author><first>Enrico</first><last>Santus</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <author><first>Alessandro</first><last>Lenci</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <pages>135–144</pages>
@@ -207,7 +207,7 @@
       <title>Automatically Building a Corpus for Sentiment Analysis on <fixed-case>I</fixed-case>ndonesian Tweets</title>
       <author><first>Alfan Farizki</first><last>Wicaksono</last></author>
       <author><first>Clara</first><last>Vania</last></author>
-      <author><first>Bayu</first><last>Distiawan</last></author>
+      <author id="bayu-distiawan"><first>Bayu</first><last>Distiawan</last></author>
       <author><first>Mirna</first><last>Adriani</last></author>
       <pages>185–194</pages>
       <url hash="a150fdd9">Y14-1024</url>
@@ -233,7 +233,7 @@
     </paper>
     <paper id="27">
       <title>How Mutual Knowledge Constrains the Choice of Anaphoric Demonstratives in <fixed-case>J</fixed-case>apanese and <fixed-case>E</fixed-case>nglish</title>
-      <author><first>David Yoshikazu</first><last>Oshima</last></author>
+      <author id="david-yoshikazu-oshima"><first>David Yoshikazu</first><last>Oshima</last></author>
       <author><first>Eric</first><last>McCready</last></author>
       <pages>214–223</pages>
       <url hash="1fc41b37">Y14-1027</url>
@@ -248,7 +248,7 @@
     </paper>
     <paper id="29">
       <title>Annotating Article Errors in <fixed-case>S</fixed-case>panish Learner Texts: Design and Evaluation of an Annotation Scheme</title>
-      <author><first>M. Pilar</first><last>Valverde Ibañez</last></author>
+      <author id="m-pilar-valverde-ibanez"><first>M. Pilar</first><last>Valverde Ibañez</last></author>
       <author><first>Akira</first><last>Ohtani</last></author>
       <pages>234–243</pages>
       <url hash="e50d55fe">Y14-1029</url>
@@ -288,9 +288,9 @@
     </paper>
     <paper id="34">
       <title><fixed-case>T</fixed-case>ake<fixed-case>T</fixed-case>wo: A Word Aligner based on Self Learning</title>
-      <author><first>Jim</first><last>Chang</last></author>
-      <author><first>Jian-Cheng</first><last>Wu</last></author>
-      <author><first>Jason</first><last>Chang</last></author>
+      <author id="jim-chang"><first>Jim</first><last>Chang</last></author>
+      <author id="jian-cheng-wu"><first>Jian-Cheng</first><last>Wu</last></author>
+      <author id="jason-s-chang"><first>Jason</first><last>Chang</last></author>
       <pages>282–291</pages>
       <url hash="2a38907a">Y14-1034</url>
       <bibkey>chang-etal-2014-taketwo</bibkey>
@@ -311,7 +311,7 @@
     </paper>
     <paper id="37">
       <title>Readability of <fixed-case>B</fixed-case>angla News Articles for Children</title>
-      <author><first>Zahrul</first><last>Islam</last></author>
+      <author id="zahurul-islam"><first>Zahrul</first><last>Islam</last></author>
       <author><first>Rashedur</first><last>Rahman</last></author>
       <pages>309–317</pages>
       <url hash="79bdc56f">Y14-1037</url>
@@ -321,7 +321,7 @@
       <title>Focusing on a Subset of Scripts Enhances the Learning Efficiency of Second Language Writing System</title>
       <author><first>Ching-Pong</first><last>Au</last></author>
       <author><first>Yuk-Man</first><last>Cheung</last></author>
-      <author><first>Charles</first><last>Chen Jr.</last></author>
+      <author id="charles-chen-jr"><first>Charles</first><last>Chen Jr.</last></author>
       <pages>318–327</pages>
       <url hash="3be9cb02">Y14-1038</url>
       <bibkey>au-etal-2014-focusing</bibkey>
@@ -331,7 +331,7 @@
       <author><first>Miao</first><last>Fan</last></author>
       <author><first>Qiang</first><last>Zhou</last></author>
       <author><first>Emily</first><last>Chang</last></author>
-      <author><first>Thomas Fang</first><last>Zheng</last></author>
+      <author id="fang-zheng"><first>Thomas Fang</first><last>Zheng</last></author>
       <pages>328–337</pages>
       <url hash="e5638d4f">Y14-1039</url>
       <bibkey>fan-etal-2014-transition</bibkey>
@@ -374,7 +374,7 @@
     <paper id="44">
       <title>A Quantitative View of Short Utterances in Daily Conversation: A Case Study of Thats right, Thats true and Thats correct</title>
       <author><first>Yanjiao</first><last>Li</last></author>
-      <author><first>Alex Chengyu</first><last>Fang</last></author>
+      <author id="alex-chengyu-fang"><first>Alex Chengyu</first><last>Fang</last></author>
       <author><first>Jing</first><last>Cao</last></author>
       <pages>378–386</pages>
       <url hash="b74d2a8e">Y14-1044</url>
@@ -384,7 +384,7 @@
       <title>A Listenability Measuring Method for an Adaptive Computer-assisted Language Learningand Teaching System</title>
       <author><first>Katsunori</first><last>Kotani</last></author>
       <author><first>Shota</first><last>Ueda</last></author>
-      <author><first>Takehiko</first><last>Yoshimi</last></author>
+      <author id="takehiko-yoshimi"><first>Takehiko</first><last>Yoshimi</last></author>
       <author><first>Hiroaki</first><last>Nanjo</last></author>
       <pages>387–394</pages>
       <url hash="70e5e603">Y14-1045</url>
@@ -437,7 +437,7 @@
     </paper>
     <paper id="51">
       <title>On the Functional Differences between the Discourse Particles Ne and Yone in <fixed-case>J</fixed-case>apanese.</title>
-      <author><first>David Yoshikazu</first><last>Oshima</last></author>
+      <author id="david-yoshikazu-oshima"><first>David Yoshikazu</first><last>Oshima</last></author>
       <pages>442–451</pages>
       <url hash="4f7bf8e0">Y14-1051</url>
       <bibkey>oshima-2014-functional</bibkey>
@@ -477,7 +477,7 @@
     <paper id="56">
       <title>A Hierarchical Word Sequence Language Model</title>
       <author><first>Xiaoyi</first><last>Wu</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>489–494</pages>
       <url hash="16824bb9">Y14-1056</url>
       <bibkey>wu-matsumoto-2014-hierarchical</bibkey>
@@ -525,16 +525,16 @@
     </paper>
     <paper id="62">
       <title>A Keyword-based Monolingual Sentence Aligner in Text Simplification</title>
-      <author><first>Chung-Chi</first><last>Huang</last></author>
+      <author id="chung-chi-huang"><first>Chung-Chi</first><last>Huang</last></author>
       <pages>542–550</pages>
       <url hash="e0b1a6f0">Y14-1062</url>
       <bibkey>huang-2014-keyword</bibkey>
     </paper>
     <paper id="63">
       <title>Automatic Detection of Comma Splices</title>
-      <author><first>John</first><last>Lee</last></author>
+      <author id="john-s-y-lee"><first>John</first><last>Lee</last></author>
       <author><first>Chak Yan</first><last>Yeung</last></author>
-      <author><first>Martin</first><last>Chodorow</last></author>
+      <author id="martin-chodorow"><first>Martin</first><last>Chodorow</last></author>
       <pages>551–560</pages>
       <url hash="72ba1f56">Y14-1063</url>
       <bibkey>lee-etal-2014-automatic</bibkey>
@@ -633,7 +633,7 @@
     <paper id="75">
       <title>K-repeating Substrings: a String-Algorithmic Approach to Privacy-Preserving Publishing of Textual Data</title>
       <author><first>Yusuke</first><last>Matsubara</last></author>
-      <author><first>Koiti</first><last>Hasida</last></author>
+      <author id="koiti-hasida"><first>Koiti</first><last>Hasida</last></author>
       <pages>658–667</pages>
       <url hash="38c4bea6">Y14-1075</url>
       <bibkey>matsubara-hasida-2014-k</bibkey>
diff --git a/data/xml/Y15.xml b/data/xml/Y15.xml
index 5e97900097..a8e7480c88 100644
--- a/data/xml/Y15.xml
+++ b/data/xml/Y15.xml
@@ -17,7 +17,7 @@
     <paper id="1">
       <title>Two-level Word Class Categorization Model in Analytic Languages and Its Implications for <fixed-case>POS</fixed-case> Tagging in <fixed-case>M</fixed-case>odern <fixed-case>C</fixed-case>hinese Corpora</title>
       <author><first>Renqiang</first><last>Wang</last></author>
-      <author><first>Changning</first><last>Huang</last></author>
+      <author id="changning-huang"><first>Changning</first><last>Huang</last></author>
       <pages>1–10</pages>
       <url hash="c3ac4ba1">Y15-1001</url>
       <bibkey>wang-huang-2015-two</bibkey>
@@ -44,7 +44,7 @@
       <author><first>Salima</first><last>Harrat</last></author>
       <author><first>Salma</first><last>Jamoussi</last></author>
       <author><first>Mourad</first><last>Abbas</last></author>
-      <author><first>Kamel</first><last>Smaili</last></author>
+      <author id="kamel-smaili"><first>Kamel</first><last>Smaili</last></author>
       <pages>26–34</pages>
       <url hash="60313d28">Y15-1004</url>
       <bibkey>meftouh-etal-2015-machine</bibkey>
@@ -63,8 +63,8 @@
     </paper>
     <paper id="6">
       <title>Computing Semantic Text Similarity Using Rich Features</title>
-      <author id="yang-liu"><first>Yang</first><last>Liu</last></author>
-      <author><first>Chengjie</first><last>Sun</last></author>
+      <author><first>Yang</first><last>Liu</last></author>
+      <author id="cheng-jie-sun"><first>Chengjie</first><last>Sun</last></author>
       <author><first>Lei</first><last>Lin</last></author>
       <author><first>Xiaolong</first><last>Wang</last></author>
       <author><first>Yuming</first><last>Zhao</last></author>
@@ -109,7 +109,7 @@
       <title>Distant Supervision for Entity Linking</title>
       <author><first>Miao</first><last>Fan</last></author>
       <author><first>Qiang</first><last>Zhou</last></author>
-      <author><first>Thomas Fang</first><last>Zheng</last></author>
+      <author id="fang-zheng"><first>Thomas Fang</first><last>Zheng</last></author>
       <pages>79–86</pages>
       <url hash="dbb82895">Y15-1010</url>
       <bibkey>fan-etal-2015-distant</bibkey>
@@ -127,7 +127,7 @@
     <paper id="12">
       <title>Fast and Large-scale Unsupervised Relation Extraction</title>
       <author><first>Sho</first><last>Takase</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Kentaro</first><last>Inui</last></author>
       <pages>96–105</pages>
       <url hash="c8dc946e">Y15-1012</url>
@@ -137,7 +137,7 @@
       <title>Reducing Lexical Features in Parsing by Word Embeddings</title>
       <author><first>Hiroya</first><last>Komatsu</last></author>
       <author><first>Ran</first><last>Tian</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Kentaro</first><last>Inui</last></author>
       <pages>106–113</pages>
       <url hash="2b9a315c">Y15-1013</url>
@@ -162,7 +162,7 @@
       <title>Unsupervised and Lightly Supervised Part-of-Speech Tagging Using Recurrent Neural Networks</title>
       <author><first>Othman</first><last>Zennaki</last></author>
       <author><first>Nasredine</first><last>Semmar</last></author>
-      <author><first>Laurent</first><last>Besacier</last></author>
+      <author id="laurent-besacier"><first>Laurent</first><last>Besacier</last></author>
       <pages>133–142</pages>
       <url hash="3ca9d785">Y15-1016</url>
       <bibkey>zennaki-etal-2015-unsupervised</bibkey>
@@ -195,7 +195,7 @@
     <paper id="20">
       <title>A Comprehensive Filter Feature Selection for Improving Document Classification</title>
       <author><first>Nguyen Hoai Nam</first><last>Le</last></author>
-      <author><first>Bao Quoc</first><last>Ho</last></author>
+      <author id="bao-quoc-ho"><first>Bao Quoc</first><last>Ho</last></author>
       <pages>169–177</pages>
       <url hash="e599aca0">Y15-1020</url>
       <bibkey>le-ho-2015-comprehensive</bibkey>
@@ -223,7 +223,7 @@
       <title>Sentiment Classification of <fixed-case>A</fixed-case>rabic Documents: Experiments with multi-type features and ensemble algorithms</title>
       <author><first>Amine</first><last>Bayoudhi</last></author>
       <author><first>Hatem</first><last>Ghorbel</last></author>
-      <author><first>Lamia Hadrich</first><last>Belguith</last></author>
+      <author id="lamia-hadrich-belguith"><first>Lamia Hadrich</first><last>Belguith</last></author>
       <pages>196–205</pages>
       <url hash="2d5d5c45">Y15-1023</url>
       <bibkey>bayoudhi-etal-2015-sentiment</bibkey>
@@ -254,14 +254,14 @@
     <paper id="27">
       <title>A Comparative Study on <fixed-case>M</fixed-case>andarin and <fixed-case>C</fixed-case>antonese Resultative Verb Compounds</title>
       <author><first>Helena Yan Ping</first><last>Lau</last></author>
-      <author><first>Sophia Yat Mei</first><last>Lee</last></author>
+      <author id="sophia-yat-mei-lee"><first>Sophia Yat Mei</first><last>Lee</last></author>
       <pages>231–239</pages>
       <url hash="c34f414d">Y15-1027</url>
       <bibkey>lau-lee-2015-comparative</bibkey>
     </paper>
     <paper id="28">
       <title>Complex-<fixed-case>NP</fixed-case> Islands in <fixed-case>K</fixed-case>orean: An Experimental Approach</title>
-      <author><first>Yong-hun</first><last>Lee</last></author>
+      <author id="yong-hun-lee"><first>Yong-hun</first><last>Lee</last></author>
       <author><first>Yeonkyung</first><last>Park</last></author>
       <pages>240–249</pages>
       <url hash="2451e710">Y15-1028</url>
@@ -271,7 +271,7 @@
       <title>Two Types of Multiple Subject Constructions (<fixed-case>MSC</fixed-case>s) in <fixed-case>K</fixed-case>orean</title>
       <author><first>Ji-Hye</first><last>Kim</last></author>
       <author><first>Eunah</first><last>Kim</last></author>
-      <author><first>James</first><last>Yoon</last></author>
+      <author id="james-yoon"><first>James</first><last>Yoon</last></author>
       <pages>250–258</pages>
       <url hash="f0c2833e">Y15-1029</url>
       <bibkey>kim-etal-2015-two</bibkey>
@@ -282,7 +282,7 @@
       <author><first>Vichet</first><last>Chea</last></author>
       <author><first>Andrew</first><last>Finch</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <pages>259–269</pages>
       <url hash="084700a5">Y15-1030</url>
       <bibkey>kyaw-thu-etal-2015-large</bibkey>
@@ -291,7 +291,7 @@
       <title><fixed-case>E</fixed-case>nglish to <fixed-case>C</fixed-case>hinese Translation: How <fixed-case>C</fixed-case>hinese Character Matters</title>
       <author><first>Rui</first><last>Wang</last></author>
       <author><first>Hai</first><last>Zhao</last></author>
-      <author><first>Bao-Liang</first><last>Lu</last></author>
+      <author id="bao-liang-lu"><first>Bao-Liang</first><last>Lu</last></author>
       <pages>270–280</pages>
       <url hash="630d12e9">Y15-1031</url>
       <bibkey>wang-etal-2015-english</bibkey>
@@ -310,7 +310,7 @@
       <title>Large-scale Dictionary Construction via Pivot-based Statistical Machine Translation with Significance Pruning and Neural Network Features</title>
       <author><first>Raj</first><last>Dabre</last></author>
       <author><first>Chenhui</first><last>Chu</last></author>
-      <author><first>Fabien</first><last>Cromieres</last></author>
+      <author id="fabien-cromieres"><first>Fabien</first><last>Cromieres</last></author>
       <author><first>Toshiaki</first><last>Nakazawa</last></author>
       <author><first>Sadao</first><last>Kurohashi</last></author>
       <pages>289–297</pages>
@@ -319,7 +319,7 @@
     </paper>
     <paper id="34">
       <title>Annotation and Classification of <fixed-case>F</fixed-case>rench Feedback Communicative Functions</title>
-      <author><first>Laurent</first><last>Prévot</last></author>
+      <author id="laurent-prevot"><first>Laurent</first><last>Prévot</last></author>
       <author><first>Jan</first><last>Gorisch</last></author>
       <author><first>Sankar</first><last>Mukherjee</last></author>
       <pages>298–306</pages>
@@ -357,7 +357,7 @@
     <paper id="38">
       <title>The Cross-modal Representation of Metaphors</title>
       <author><first>Yutung</first><last>Chang</last></author>
-      <author><first>Kawai</first><last>Chui</last></author>
+      <author id="ka-wai-chui"><first>Kawai</first><last>Chui</last></author>
       <pages>332–340</pages>
       <url hash="9da8ec08">Y15-1038</url>
       <bibkey>chang-chui-2015-cross</bibkey>
@@ -365,7 +365,7 @@
     <paper id="39">
       <title>Writing to Read: the Case of <fixed-case>C</fixed-case>hinese</title>
       <author><first>Qi</first><last>Zhang</last></author>
-      <author><first>Ronan</first><last>Reilly</last></author>
+      <author id="ronan-g-reilly"><first>Ronan</first><last>Reilly</last></author>
       <pages>341–350</pages>
       <url hash="1ac84cb7">Y15-1039</url>
       <bibkey>zhang-reilly-2015-writing</bibkey>
@@ -373,14 +373,14 @@
     <paper id="40">
       <title>Design of a Learner Corpus for Listening and Speaking Performance</title>
       <author><first>Katsunori</first><last>Kotani</last></author>
-      <author><first>Takehiko</first><last>Yoshimi</last></author>
+      <author id="takehiko-yoshimi"><first>Takehiko</first><last>Yoshimi</last></author>
       <pages>351–358</pages>
       <url hash="94140c03">Y15-1040</url>
       <bibkey>kotani-yoshimi-2015-design</bibkey>
     </paper>
     <paper id="41">
       <title>Understanding Infants’ Language Development in Relation to Levels of Consciousness: An Approach in Building up an Agent-based Model</title>
-      <author><first>Helena Hong</first><last>Gao</last></author>
+      <author id="helena-hong-gao"><first>Helena Hong</first><last>Gao</last></author>
       <author><first>Can</first><last>Guo</last></author>
       <pages>359–368</pages>
       <url hash="869f0418">Y15-1041</url>
@@ -406,7 +406,7 @@
       <title>Self Syntactico-Semantic Enrichment of <fixed-case>LMF</fixed-case> Normalized Dictionaries</title>
       <author><first>Imen</first><last>Elleuch</last></author>
       <author><first>Bilel</first><last>Gargouri</last></author>
-      <author><first>Abdelmajid</first><last>Ben Hamadou</last></author>
+      <author id="abdelmajid-ben-hamadou"><first>Abdelmajid</first><last>Ben Hamadou</last></author>
       <pages>387–395</pages>
       <url hash="b86db8c3">Y15-1044</url>
       <bibkey>elleuch-etal-2015-self</bibkey>
@@ -428,7 +428,7 @@
     </paper>
     <paper id="47">
       <title>Not Voice but Case Identity in <fixed-case>VP</fixed-case> Ellipsis of <fixed-case>E</fixed-case>nglish</title>
-      <author><first>Myungkwan</first><last>Park</last></author>
+      <author id="myung-kwan-park"><first>Myungkwan</first><last>Park</last></author>
       <author><first>Sunjoo</first><last>Choi</last></author>
       <pages>413–421</pages>
       <url hash="a32d896e">Y15-1047</url>
@@ -437,7 +437,7 @@
     <paper id="48">
       <title>A Statistical Modeling of the Correlation between Island Effects and Working-memory Capacity for <fixed-case>L</fixed-case>2 Learners</title>
       <author><first>Euhee</first><last>Kim</last></author>
-      <author><first>Myungkwan</first><last>Park</last></author>
+      <author id="myung-kwan-park"><first>Myungkwan</first><last>Park</last></author>
       <pages>422–430</pages>
       <url hash="0c176e53">Y15-1048</url>
       <bibkey>kim-park-2015-statistical</bibkey>
@@ -462,7 +462,7 @@
     <paper id="51">
       <title>An Improved Hierarchical Word Sequence Language Model Using Directional Information</title>
       <author><first>Xiaoyi</first><last>Wu</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>449–454</pages>
       <url hash="55237449">Y15-1051</url>
       <bibkey>wu-matsumoto-2015-improved</bibkey>
@@ -508,7 +508,7 @@
       <author><first>Yoshinari</first><last>Fujinuma</last></author>
       <author><first>Hikaru</first><last>Yokono</last></author>
       <author><first>Pascual</first><last>Martínez-Gómez</last></author>
-      <author><first>Akiko</first><last>Aizawa</last></author>
+      <author id="akiko-aizawa"><first>Akiko</first><last>Aizawa</last></author>
       <pages>488–495</pages>
       <url hash="fdd84032">Y15-1056</url>
       <bibkey>fujinuma-etal-2015-distant</bibkey>
@@ -563,7 +563,7 @@
       <title>Corpus annotation with a linguistic analysis of the associations between event mentions and spatial expressions</title>
       <author><first>Jin-Woo</first><last>Chung</last></author>
       <author><first>Jinseon</first><last>You</last></author>
-      <author><first>Jong C.</first><last>Park</last></author>
+      <author id="jong-c-park"><first>Jong C.</first><last>Park</last></author>
       <pages>535–543</pages>
       <url hash="cd11cfdf">Y15-1062</url>
       <bibkey>chung-etal-2015-corpus</bibkey>
@@ -629,7 +629,7 @@
     </paper>
     <paper id="2">
       <title>Toward a Corpus of <fixed-case>C</fixed-case>antonese Verbal Comments and their Classification by Multi-dimensional Analysis</title>
-      <author><first>Oi Yee</first><last>Kwong</last></author>
+      <author id="olivia-o-y-kwong"><first>Oi Yee</first><last>Kwong</last></author>
       <pages>10–18</pages>
       <url hash="1c19682f">Y15-2002</url>
       <bibkey>kwong-2015-toward</bibkey>
@@ -639,7 +639,7 @@
       <author><first>Soyun</first><last>Jeong</last></author>
       <author><first>Youngmin</first><last>Park</last></author>
       <author><first>Sangwoo</first><last>Kang</last></author>
-      <author><first>Jungyun</first><last>Seo</last></author>
+      <author id="jungyun-seo"><first>Jungyun</first><last>Seo</last></author>
       <pages>19–26</pages>
       <url hash="8101856c">Y15-2003</url>
       <bibkey>jeong-etal-2015-improved</bibkey>
@@ -647,19 +647,19 @@
     <paper id="4">
       <title>An Arguing Lexicon for Stance Classification on Short Text Comments in <fixed-case>C</fixed-case>hinese</title>
       <author><first>Ju-han</first><last>Chuang</last></author>
-      <author><first>Shu-Kai</first><last>Hsieh</last></author>
+      <author id="shu-kai-hsieh"><first>Shu-Kai</first><last>Hsieh</last></author>
       <pages>27–36</pages>
       <url hash="73399050">Y15-2004</url>
       <bibkey>chuang-hsieh-2015-arguing</bibkey>
     </paper>
     <paper id="5">
       <title>Learning Sentential Patterns of Various Rhetoric Moves for Assisted Academic Writing</title>
-      <author><first>Jim</first><last>Chang</last></author>
+      <author id="jim-chang"><first>Jim</first><last>Chang</last></author>
       <author><first>Hsiang-Ling</first><last>Hsu</last></author>
       <author><first>Joanne</first><last>Boisson</last></author>
       <author><first>Hao-Chun</first><last>Peng</last></author>
       <author><first>Yu-Hsuan</first><last>Wu</last></author>
-      <author><first>Jason S.</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason S.</first><last>Chang</last></author>
       <pages>37–45</pages>
       <url hash="0a822e9f">Y15-2005</url>
       <bibkey>chang-etal-2015-learning</bibkey>
@@ -691,7 +691,7 @@
       <title>Semi-automatic Filtering of Translation Errors in Triangle Corpus</title>
       <author><first>Sung-Kwon</first><last>Choi</last></author>
       <author><first>Jong-Hun</first><last>Shin</last></author>
-      <author><first>Young-Gil</first><last>Kim</last></author>
+      <author id="young-gil-kim"><first>Young-Gil</first><last>Kim</last></author>
       <pages>72–79</pages>
       <url hash="cee9d5d5">Y15-2009</url>
       <bibkey>choi-etal-2015-semi</bibkey>
@@ -700,7 +700,7 @@
       <title>Cross-language Projection of Dependency Trees for Tree-to-tree Machine Translation</title>
       <author><first>Yu</first><last>Shen</last></author>
       <author><first>Chenhui</first><last>Chu</last></author>
-      <author><first>Fabien</first><last>Cromieres</last></author>
+      <author id="fabien-cromieres"><first>Fabien</first><last>Cromieres</last></author>
       <author><first>Sadao</first><last>Kurohashi</last></author>
       <pages>80–88</pages>
       <url hash="d56506be">Y15-2010</url>
@@ -717,7 +717,7 @@
     </paper>
     <paper id="12">
       <title>Finding the Origin of a Translated Historical Document</title>
-      <author><first>Zahrul</first><last>Islam</last></author>
+      <author id="zahurul-islam"><first>Zahrul</first><last>Islam</last></author>
       <author><first>Natia</first><last>Dundua</last></author>
       <pages>96–105</pages>
       <url hash="45c846c0">Y15-2012</url>
@@ -727,7 +727,7 @@
       <title>Improving the Performance of an Example-Based Machine Translation System Using a Domain-specific Bilingual Lexicon</title>
       <author><first>Nasredine</first><last>Semmar</last></author>
       <author><first>Othman</first><last>Zennaki</last></author>
-      <author><first>Meriama</first><last>Laib</last></author>
+      <author id="meriama-laib"><first>Meriama</first><last>Laib</last></author>
       <pages>106–115</pages>
       <url hash="3cc31d88">Y15-2013</url>
       <bibkey>semmar-etal-2015-improving</bibkey>
@@ -736,7 +736,7 @@
       <title>A Multifactorial Analysis of <fixed-case>E</fixed-case>nglish Particle Movement in <fixed-case>K</fixed-case>orean <fixed-case>EFL</fixed-case> Learners’ Writings</title>
       <author><first>Gyu-Hyeong</first><last>Lee</last></author>
       <author><first>Ha-Eung</first><last>Kim</last></author>
-      <author><first>Yong-hun</first><last>Lee</last></author>
+      <author id="yong-hun-lee"><first>Yong-hun</first><last>Lee</last></author>
       <pages>116–124</pages>
       <url hash="ead99c43">Y15-2014</url>
       <bibkey>lee-etal-2015-multifactorial</bibkey>
@@ -745,7 +745,7 @@
       <title>An Efficient Annotation for Phrasal Verbs using Dependency Information</title>
       <author><first>Masayuki</first><last>Komai</last></author>
       <author><first>Hiroyuki</first><last>Shindo</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>125–131</pages>
       <url hash="7bca97e2">Y15-2015</url>
       <bibkey>komai-etal-2015-efficient</bibkey>
@@ -754,7 +754,7 @@
       <title>Color Aesthetics and Social Networks in Complete Tang Poems: Explorations and Discoveries</title>
       <author><first>Chao-Lin</first><last>Liu</last></author>
       <author><first>Hongsu</first><last>Wang</last></author>
-      <author><first>Wen-Huei</first><last>Cheng</last></author>
+      <author id="wen-huei-cheng"><first>Wen-Huei</first><last>Cheng</last></author>
       <author><first>Chu-Ting</first><last>Hsu</last></author>
       <author><first>Wei-Yun</first><last>Chiu</last></author>
       <pages>132–141</pages>
@@ -796,7 +796,7 @@
     </paper>
     <paper id="21">
       <title>Distinguishing between True and False Stories using various Linguistic Features</title>
-      <author><first>Yaakov</first><last>Hacohen-Kerner</last></author>
+      <author id="yaakov-hacohen-kerner"><first>Yaakov</first><last>Hacohen-Kerner</last></author>
       <author><first>Rakefet</first><last>Dilmon</last></author>
       <author><first>Shimon</first><last>Friedlich</last></author>
       <author><first>Daniel Nisim</first><last>Cohen</last></author>
@@ -806,7 +806,7 @@
     </paper>
     <paper id="22">
       <title>Bilingually motivated segmentation and generation of word translations using relatively small translation data sets</title>
-      <author><first>Kavitha Karimbi</first><last>Mahesh</last></author>
+      <author id="kavi-mahesh"><first>Kavitha Karimbi</first><last>Mahesh</last></author>
       <author><first>Luís</first><last>Gomes</last></author>
       <author><first>José</first><last>Lopes</last></author>
       <pages>187–196</pages>
@@ -868,7 +868,7 @@
     <paper id="29">
       <title>A Corpus-based Comparatively Study on the Semantic Features and Syntactic patterns of Yòu/Hái in <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese</title>
       <author><first>Yuncui</first><last>Zhang</last></author>
-      <author><first>Pengyuan</first><last>Liu</last></author>
+      <author id="pengyuan-liu"><first>Pengyuan</first><last>Liu</last></author>
       <pages>249–257</pages>
       <url hash="8eca1bdb">Y15-2029</url>
       <bibkey>zhang-liu-2015-corpus</bibkey>
@@ -894,7 +894,7 @@
     </paper>
     <paper id="32">
       <title>Automatic Classification of Spoken Languages using Diverse Acoustic Features</title>
-      <author><first>Yaakov</first><last>Hacohen-Kerner</last></author>
+      <author id="yaakov-hacohen-kerner"><first>Yaakov</first><last>Hacohen-Kerner</last></author>
       <author><first>Ruben</first><last>Hagege</last></author>
       <pages>275–285</pages>
       <url hash="c013c408">Y15-2032</url>
@@ -929,7 +929,7 @@
     </paper>
     <paper id="36">
       <title>Feature Reduction Using Ensemble Approach</title>
-      <author><first>Yingju</first><last>Xia</last></author>
+      <author id="yingju-xia"><first>Yingju</first><last>Xia</last></author>
       <author><first>Cuiqin</first><last>Hou</last></author>
       <author><first>Zhuoran</first><last>Xu</last></author>
       <author><first>Jun</first><last>Sun</last></author>
@@ -960,7 +960,7 @@
     </paper>
     <paper id="39">
       <title>Dependency parsing for <fixed-case>C</fixed-case>hinese long sentence: A second-stage main structure parsing method</title>
-      <author id="bo-li"><first>Bo</first><last>Li</last></author>
+      <author><first>Bo</first><last>Li</last></author>
       <author><first>Yunfei</first><last>Long</last></author>
       <author><first>Weiguang</first><last>Qu</last></author>
       <pages>337–344</pages>
diff --git a/data/xml/Y16.xml b/data/xml/Y16.xml
index a6ca9a2d81..4abfd29ef4 100644
--- a/data/xml/Y16.xml
+++ b/data/xml/Y16.xml
@@ -15,14 +15,14 @@
     </frontmatter>
     <paper id="1">
       <title>The <fixed-case>C</fixed-case>ore<fixed-case>G</fixed-case>ram Project: Theoretical Linguistics, Theory Development and Verification</title>
-      <author><first>Stefan</first><last>Müller</last></author>
+      <author id="stefan-muller"><first>Stefan</first><last>Müller</last></author>
       <url hash="95545b43">Y16-1001</url>
       <pages>3–3</pages>
       <bibkey>muller-2016-coregram</bibkey>
     </paper>
     <paper id="2">
       <title>Inferring Methodological Meta-knowledge from Large Biomedical Corpora</title>
-      <author><first>Goran</first><last>Nenadic</last></author>
+      <author id="goran-nenadic"><first>Goran</first><last>Nenadic</last></author>
       <url hash="7aa037dc">Y16-1002</url>
       <pages>5–5</pages>
       <bibkey>nenadic-2016-inferring</bibkey>
@@ -64,7 +64,7 @@
     </paper>
     <paper id="8">
       <title>The grammar and semantics of disjuncts in World Englishes</title>
-      <author><first>Shirley</first><last>Dita</last></author>
+      <author id="shirley-dita"><first>Shirley</first><last>Dita</last></author>
       <url hash="1e98b59c">Y16-1008</url>
       <pages>35–35</pages>
       <bibkey>dita-2016-grammar</bibkey>
@@ -92,7 +92,7 @@
       <title><fixed-case>K</fixed-case>orean Language Resources for Everyone</title>
       <author><first>Jungyeul</first><last>Park</last></author>
       <author><first>Jeen-Pyo</first><last>Hong</last></author>
-      <author><first>Jeong-Won</first><last>Cha</last></author>
+      <author id="jeong-won-cha"><first>Jeong-Won</first><last>Cha</last></author>
       <url hash="144c3764">Y16-2002</url>
       <pages>49–58</pages>
       <bibkey>park-etal-2016-korean</bibkey>
@@ -108,7 +108,7 @@
       <title>A Generalized Framework for Hierarchical Word Sequence Language Model</title>
       <author><first>Xiaoyi</first><last>Wu</last></author>
       <author><first>Kevin</first><last>Duh</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <url hash="7ac4231b">Y16-2004</url>
       <pages>69–75</pages>
       <bibkey>wu-etal-2016-generalized</bibkey>
@@ -116,7 +116,7 @@
     <paper id="5">
       <title>Processing <fixed-case>E</fixed-case>nglish <fixed-case>I</fixed-case>sland Sentences by <fixed-case>K</fixed-case>orean <fixed-case>EFL</fixed-case> Learners</title>
       <author><first>Yeonkyung</first><last>Park</last></author>
-      <author><first>Yong-hun</first><last>Lee</last></author>
+      <author id="yong-hun-lee"><first>Yong-hun</first><last>Lee</last></author>
       <url hash="d6d98d55">Y16-2005</url>
       <pages>77–84</pages>
       <bibkey>park-lee-2016-processing</bibkey>
@@ -125,7 +125,7 @@
       <title>Multiple Emotions Detection in Conversation Transcripts</title>
       <author><first>Duc-Anh</first><last>Phan</last></author>
       <author><first>Hiroyuki</first><last>Shindo</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <url hash="c01386a7">Y16-2006</url>
       <pages>85–94</pages>
       <bibkey>phan-etal-2016-multiple</bibkey>
@@ -183,7 +183,7 @@
       <title>Event Based Emotion Classification for News Articles</title>
       <author><first>Minglei</first><last>Li</last></author>
       <author><first>Da</first><last>Wang</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <author><first>Yunfei</first><last>Long</last></author>
       <url hash="a14e15f6">Y16-2013</url>
       <pages>153–162</pages>
@@ -201,7 +201,7 @@
     <paper id="15">
       <title>Integrating Word Embedding Offsets into the Espresso System for Part-Whole Relation Extraction</title>
       <author><first>Van-Thuy</first><last>Phi</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <url hash="dd7b7fce">Y16-2015</url>
       <pages>173–181</pages>
       <bibkey>phi-matsumoto-2016-integrating</bibkey>
@@ -210,7 +210,7 @@
       <title>An Experimental Study of Subject Properties in <fixed-case>K</fixed-case>orean Multiple Subject Constructions (<fixed-case>MSC</fixed-case>s)</title>
       <author><first>Ji-Hye</first><last>Kim</last></author>
       <author><first>Eunah</first><last>Kim</last></author>
-      <author><first>James</first><last>Yoon</last></author>
+      <author id="james-yoon"><first>James</first><last>Yoon</last></author>
       <url hash="490384d3">Y16-2016</url>
       <pages>183–190</pages>
       <bibkey>kim-etal-2016-experimental</bibkey>
@@ -224,10 +224,10 @@
     </paper>
     <paper id="18">
       <title>Planting Trees in the Desert: Delexicalized Tagging and Parsing Combined</title>
-      <author><first>Daniel</first><last>Zeman</last></author>
+      <author id="daniel-zeman"><first>Daniel</first><last>Zeman</last></author>
       <author><first>David</first><last>Mareček</last></author>
       <author><first>Zhiwei</first><last>Yu</last></author>
-      <author><first>Zdeněk</first><last>Žabokrtský</last></author>
+      <author id="zdenek-zabokrtsky"><first>Zdeněk</first><last>Žabokrtský</last></author>
       <url hash="7f89e6b9">Y16-2018</url>
       <pages>199–207</pages>
       <bibkey>zeman-etal-2016-planting</bibkey>
@@ -268,7 +268,7 @@
       <author><first>Sumit</first><last>Maharjan</last></author>
       <author><first>Masaki</first><last>Saito</last></author>
       <author><first>Kota</first><last>Yamaguchi</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Takayuki</first><last>Okatani</last></author>
       <author><first>Kentaro</first><last>Inui</last></author>
       <url hash="9f7e2c63">Y16-2022</url>
@@ -277,7 +277,7 @@
     </paper>
     <paper id="23">
       <title>Strong Associations Can Be Weak: Some Thoughts on Cross-lingual Word Webs for Translation</title>
-      <author><first>Oi Yee</first><last>Kwong</last></author>
+      <author id="olivia-o-y-kwong"><first>Oi Yee</first><last>Kwong</last></author>
       <url hash="e2a65608">Y16-2023</url>
       <pages>249–257</pages>
       <bibkey>kwong-2016-strong</bibkey>
@@ -285,8 +285,8 @@
     <paper id="24">
       <title>Dealing with Out-Of-Vocabulary Problem in Sentence Alignment Using Word Similarity</title>
       <author><first>Hai-Long</first><last>Trieu</last></author>
-      <author><first>Le-Minh</first><last>Nguyen</last></author>
-      <author><first>Phuong-Thai</first><last>Nguyen</last></author>
+      <author id="minh-le-nguyen"><first>Le-Minh</first><last>Nguyen</last></author>
+      <author id="phuong-thai-nguyen"><first>Phuong-Thai</first><last>Nguyen</last></author>
       <url hash="d15d024a">Y16-2024</url>
       <pages>259–266</pages>
       <bibkey>trieu-etal-2016-dealing</bibkey>
@@ -302,7 +302,7 @@
       <title>Toward the automatic extraction of knowledge of usable goods</title>
       <author><first>Mei</first><last>Uemura</last></author>
       <author><first>Naho</first><last>Orita</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Kentaro</first><last>Inui</last></author>
       <url hash="8e07d280">Y16-2026</url>
       <pages>277–285</pages>
@@ -361,7 +361,7 @@
     </paper>
     <paper id="4">
       <title>The Inner Circle vs. the Outer Circle or <fixed-case>B</fixed-case>ritish <fixed-case>E</fixed-case>nglish vs. <fixed-case>A</fixed-case>merican <fixed-case>E</fixed-case>nglish</title>
-      <author><first>Yong-hun</first><last>Lee</last></author>
+      <author id="yong-hun-lee"><first>Yong-hun</first><last>Lee</last></author>
       <author><first>Ki-suk</first><last>Jun</last></author>
       <url hash="cc284f6b">Y16-3004</url>
       <pages>339–346</pages>
@@ -371,7 +371,7 @@
       <title>A Correlation Analysis of <fixed-case>E</fixed-case>nglish Particle Placement of Three <fixed-case>E</fixed-case>ast <fixed-case>A</fixed-case>sian <fixed-case>EFL</fixed-case> Learners Writings</title>
       <author><first>Ha-Eung</first><last>Kim</last></author>
       <author><first>Gyu-Hyeong</first><last>Lee</last></author>
-      <author><first>Yong-hun</first><last>Lee</last></author>
+      <author id="yong-hun-lee"><first>Yong-hun</first><last>Lee</last></author>
       <url hash="565cbda9">Y16-3005</url>
       <pages>347–354</pages>
       <bibkey>kim-etal-2016-correlation</bibkey>
@@ -456,7 +456,7 @@
       <author><first>Nathaniel</first><last>Oco</last></author>
       <author><first>Leif Romeritch</first><last>Syliongka</last></author>
       <author><first>Tod</first><last>Allman</last></author>
-      <author><first>Rachel Edita</first><last>Roxas</last></author>
+      <author id="rachel-edita-roxas"><first>Rachel Edita</first><last>Roxas</last></author>
       <url hash="054ee9de">Y16-3015</url>
       <pages>433–438</pages>
       <bibkey>oco-etal-2016-philippine</bibkey>
@@ -542,7 +542,7 @@
     </paper>
     <paper id="26">
       <title>The Cloud of Knowing: Non-factive al-ta ‘know’ (as a Neg-raiser) in <fixed-case>K</fixed-case>orean</title>
-      <author><first>Chungmin</first><last>Lee</last></author>
+      <author id="chungmin-lee"><first>Chungmin</first><last>Lee</last></author>
       <author><first>Seungjin</first><last>Hong</last></author>
       <url hash="f2014a67">Y16-3026</url>
       <pages>527–533</pages>
@@ -553,7 +553,7 @@
       <author><first>Masatoshi</first><last>Suzuki</last></author>
       <author><first>Koji</first><last>Matsuda</last></author>
       <author><first>Satoshi</first><last>Sekine</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Kentaro</first><last>Inui</last></author>
       <url hash="c968dfc1">Y16-3027</url>
       <pages>535–544</pages>
diff --git a/data/xml/Y17.xml b/data/xml/Y17.xml
index 123f287aad..ed1ee94353 100644
--- a/data/xml/Y17.xml
+++ b/data/xml/Y17.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the 31st Pacific Asia Conference on Language, Information and Computation</booktitle>
       <url hash="ddfe17bd">Y17-1</url>
-      <editor><first>Rachel Edita</first><last>Roxas</last></editor>
+      <editor id="rachel-edita-roxas"><first>Rachel Edita</first><last>Roxas</last></editor>
       <publisher>The National University (Phillippines)</publisher>
       <month>November</month>
       <year>2017</year>
@@ -66,7 +66,7 @@
     <paper id="8">
       <title>The Phrasal-Prepositional Verbs in <fixed-case>P</fixed-case>hilippine <fixed-case>E</fixed-case>nglish: A Corpus-based Analysis</title>
       <author><first>Jennibelle</first><last>Ella</last></author>
-      <author><first>Shirley</first><last>Dita</last></author>
+      <author id="shirley-dita"><first>Shirley</first><last>Dita</last></author>
       <pages>34–41</pages>
       <url hash="51d1a09a">Y17-1008</url>
       <bibkey>ella-dita-2017-phrasal</bibkey>
@@ -131,7 +131,7 @@
     <paper id="16">
       <title>The Importance of Automatic Syntactic Features in <fixed-case>V</fixed-case>ietnamese Named Entity Recognition</title>
       <author><first>Thai-Hoang</first><last>Pham</last></author>
-      <author><first>Phuong</first><last>Le-Hong</last></author>
+      <author id="phuong-le-hong"><first>Phuong</first><last>Le-Hong</last></author>
       <pages>97–103</pages>
       <url hash="155d160b">Y17-1016</url>
       <bibkey>pham-le-hong-2017-importance</bibkey>
@@ -171,7 +171,7 @@
     <paper id="21">
       <title>A Parallel Recurrent Neural Network for Language Modeling with <fixed-case>POS</fixed-case> Tags</title>
       <author><first>Chao</first><last>Su</last></author>
-      <author><first>Heyan</first><last>Huang</last></author>
+      <author id="he-yan-huang"><first>Heyan</first><last>Huang</last></author>
       <author><first>Shumin</first><last>Shi</last></author>
       <author><first>Yuhang</first><last>Guo</last></author>
       <author><first>Hao</first><last>Wu</last></author>
@@ -182,7 +182,7 @@
     <paper id="22">
       <title>Identifying Deception in <fixed-case>I</fixed-case>ndonesian Transcribed Interviews through Lexical-based Approach</title>
       <author><first>Tifani</first><last>Warnita</last></author>
-      <author><first>Dessi Puji</first><last>Lestari</last></author>
+      <author id="dessi-puji-lestari"><first>Dessi Puji</first><last>Lestari</last></author>
       <pages>148–154</pages>
       <url hash="6369395d">Y17-1022</url>
       <bibkey>warnita-lestari-2017-identifying</bibkey>
@@ -205,7 +205,7 @@
     </paper>
     <paper id="25">
       <title>Remarks on epistemically biased questions</title>
-      <author><first>David Yoshikazu</first><last>Oshima</last></author>
+      <author id="david-yoshikazu-oshima"><first>David Yoshikazu</first><last>Oshima</last></author>
       <pages>169–177</pages>
       <url hash="2506dc5e">Y17-1025</url>
       <bibkey>oshima-2017-remarks</bibkey>
@@ -235,7 +235,7 @@
     <paper id="29">
       <title>Subjecthood and Grammatical Relations in <fixed-case>K</fixed-case>orean: An Experimental Study with Honorific Agreement and Plural Copying</title>
       <author><first>Ji-Hye</first><last>Kim</last></author>
-      <author><first>Yong-Hun</first><last>Lee</last></author>
+      <author id="yong-hun-lee"><first>Yong-Hun</first><last>Lee</last></author>
       <author><first>James Hye-Suk</first><last>Yoon</last></author>
       <pages>206–213</pages>
       <url hash="e53e2991">Y17-1029</url>
@@ -251,7 +251,7 @@
     <paper id="31">
       <title>A Corpus-based Analysis of Near-Synonymous Sentence-final Particles in <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese: “bale” and “eryi”</title>
       <author><first>Xuefeng</first><last>Gao</last></author>
-      <author><first>Yat-Mei</first><last>Lee</last></author>
+      <author id="sophia-yat-mei-lee"><first>Yat-Mei</first><last>Lee</last></author>
       <pages>222–230</pages>
       <url hash="592cf53e">Y17-1031</url>
       <bibkey>gao-lee-2017-corpus</bibkey>
@@ -269,7 +269,7 @@
     <paper id="33">
       <title>Word Learning by Young Bilinguals: Understanding the Denotation and Connotation Differences of “Cut” Verbs in <fixed-case>E</fixed-case>nglish and <fixed-case>C</fixed-case>hinese</title>
       <author><first>Keng Hwee</first><last>Neo</last></author>
-      <author><first>Helena</first><last>Gao</last></author>
+      <author id="helena-hong-gao"><first>Helena</first><last>Gao</last></author>
       <pages>241–248</pages>
       <url hash="416db8e6">Y17-1033</url>
       <bibkey>neo-gao-2017-word</bibkey>
@@ -317,7 +317,7 @@
     <paper id="39">
       <title>Rule-based Reordering and Post-Processing for <fixed-case>I</fixed-case>ndonesian-<fixed-case>K</fixed-case>orean Statistical Machine Translation</title>
       <author><first>Candy Olivia</first><last>Mawalim</last></author>
-      <author><first>Dessi Puji</first><last>Lestari</last></author>
+      <author id="dessi-puji-lestari"><first>Dessi Puji</first><last>Lestari</last></author>
       <author><first>Ayu</first><last>Purwarianti</last></author>
       <pages>287–295</pages>
       <url hash="965d238a">Y17-1039</url>
@@ -326,7 +326,7 @@
     <paper id="40">
       <title>Sentence Complexity Estimation for <fixed-case>C</fixed-case>hinese-speaking Learners of <fixed-case>J</fixed-case>apanese</title>
       <author><first>Jun</first><last>Liu</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>296–302</pages>
       <url hash="e3ee28ed">Y17-1040</url>
       <bibkey>liu-matsumoto-2017-sentence</bibkey>
@@ -366,7 +366,7 @@
       <title>A Crowdsourcing Approach for Annotating Causal Relation Instances in <fixed-case>W</fixed-case>ikipedia</title>
       <author><first>Kazuaki</first><last>Hanawa</last></author>
       <author><first>Akira</first><last>Sasaki</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Kentaro</first><last>Inui</last></author>
       <pages>336–345</pages>
       <url hash="3e41d695">Y17-1045</url>
@@ -393,7 +393,7 @@
       <title>Extracting Important Tweets for News Writers using Recurrent Neural Network with Attention Mechanism and Multi-task Learning</title>
       <author><first>Taro</first><last>Miyazaki</last></author>
       <author><first>Shin</first><last>Toriumi</last></author>
-      <author><first>Yuka</first><last>Takei</last></author>
+      <author id="yuka-takei"><first>Yuka</first><last>Takei</last></author>
       <author><first>Ichiro</first><last>Yamada</last></author>
       <author><first>Jun</first><last>Goto</last></author>
       <pages>363–369</pages>
@@ -402,7 +402,7 @@
     </paper>
     <paper id="49">
       <title>Tweet Extraction for News Production Considering Unreality</title>
-      <author><first>Yuka</first><last>Takei</last></author>
+      <author id="yuka-takei"><first>Yuka</first><last>Takei</last></author>
       <author><first>Taro</first><last>Miyazaki</last></author>
       <author><first>Ichiro</first><last>Yamada</last></author>
       <author><first>Jun</first><last>Goto</last></author>
@@ -422,8 +422,8 @@
     <paper id="51">
       <title>Investigating Phrase-Based and Neural-Based Machine Translation on Low-Resource Settings</title>
       <author><first>Hai Long</first><last>Trieu</last></author>
-      <author><first>Duc-Vu</first><last>Tran</last></author>
-      <author><first>Le Minh</first><last>Nguyen</last></author>
+      <author id="duc-vu-tran"><first>Duc-Vu</first><last>Tran</last></author>
+      <author id="minh-le-nguyen"><first>Le Minh</first><last>Nguyen</last></author>
       <pages>384–391</pages>
       <url hash="1ba8dbd5">Y17-1051</url>
       <bibkey>trieu-etal-2017-investigating</bibkey>
diff --git a/data/xml/Y18.xml b/data/xml/Y18.xml
index a7266d1ff4..907ce2929b 100644
--- a/data/xml/Y18.xml
+++ b/data/xml/Y18.xml
@@ -21,7 +21,7 @@
       <title>Multi-dialect Neural Machine Translation and Dialectometry</title>
       <author><first>Kaori</first><last>Abe</last></author>
       <author><first>Yuichiroh</first><last>Matsubayashi</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Kentaro</first><last>Inui</last></author>
       <url hash="bb66beed">Y18-1001</url>
       <bibkey>abe-etal-2018-multi</bibkey>
@@ -30,7 +30,7 @@
       <title>Automated Error Correction and Validation for <fixed-case>POS</fixed-case> Tagging of <fixed-case>H</fixed-case>indi</title>
       <author><first>Sachi</first><last>Angle</last></author>
       <author><first>Pruthwik</first><last>Mishra</last></author>
-      <author><first>Dipti Misra</first><last>Sharma</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></author>
       <url hash="7d8a7dad">Y18-1002</url>
       <bibkey>angle-etal-2018-automated</bibkey>
     </paper>
@@ -60,7 +60,7 @@
     <paper id="6">
       <title>Towards an Automatic Text Comprehension for the <fixed-case>A</fixed-case>rabic Question-Answering: Semantic and Logical Representation of Texts</title>
       <author><first>Wided</first><last>Bakari</last></author>
-      <author><first>Patrice</first><last>Bellot</last></author>
+      <author id="patrice-bellot"><first>Patrice</first><last>Bellot</last></author>
       <author><first>Mahmoud</first><last>Neji</last></author>
       <url hash="66ccdfaa">Y18-1006</url>
       <bibkey>bakari-etal-2018-towards</bibkey>
@@ -88,14 +88,14 @@
     <paper id="9">
       <title>Investigating the <fixed-case>E</fixed-case>nglish <fixed-case>ADJECTIVE</fixed-case> <fixed-case>OF</fixed-case> Construction in Academic Writing</title>
       <author><first>Liyin</first><last>Chen</last></author>
-      <author><first>Siaw-Fong</first><last>Chung</last></author>
+      <author id="siaw-fong-chung"><first>Siaw-Fong</first><last>Chung</last></author>
       <url hash="5039a496">Y18-1009</url>
       <bibkey>chen-chung-2018-investigating</bibkey>
     </paper>
     <paper id="10">
       <title>Detecting Free Translation in Parallel Corpora from Attention Scores</title>
       <author><first>Qi</first><last>Chen</last></author>
-      <author><first>Oi Yee</first><last>Kwong</last></author>
+      <author id="olivia-o-y-kwong"><first>Oi Yee</first><last>Kwong</last></author>
       <author><first>Jingbo</first><last>Zhu</last></author>
       <url hash="1a5e1925">Y18-1010</url>
       <bibkey>chen-etal-2018-detecting</bibkey>
@@ -127,7 +127,7 @@
     </paper>
     <paper id="14">
       <title>A Corpus Study of Linguistic-Cultural Conceptualization of <fixed-case>FEAR</fixed-case> in <fixed-case>C</fixed-case>hinese and <fixed-case>R</fixed-case>ussian</title>
-      <author><first>Kawai</first><last>Chui</last></author>
+      <author id="ka-wai-chui"><first>Kawai</first><last>Chui</last></author>
       <author><first>Hsiang-Lin</first><last>Yeh</last></author>
       <author><first>Jie-Li</first><last>Tsai</last></author>
       <url hash="de3a117f">Y18-1014</url>
@@ -146,14 +146,14 @@
       <title>Too Many Questions? What Can We Do? : Multiple Question Span Detection</title>
       <author><first>Prathyusha</first><last>Danda</last></author>
       <author><first>Brij Mohan Lal</first><last>Srivastava</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <url hash="3980b47a">Y18-1016</url>
       <bibkey>danda-etal-2018-many</bibkey>
     </paper>
     <paper id="17">
       <title><fixed-case>B</fixed-case>o<fixed-case>WL</fixed-case>er: A neural approach to extractive text summarization</title>
       <author><first>Pranav</first><last>Dhakras</last></author>
-      <author><first>Manish</first><last>Shrivastava</last></author>
+      <author id="manish-shrivastava"><first>Manish</first><last>Shrivastava</last></author>
       <url hash="c1e7c079">Y18-1017</url>
       <bibkey>dhakras-shrivastava-2018-bowler</bibkey>
     </paper>
@@ -161,7 +161,7 @@
       <title>Effectiveness of Character Language Model for <fixed-case>V</fixed-case>ietnamese Named Entity Recognition</title>
       <author><first>Xuan-Dung</first><last>Doan</last></author>
       <author><first>Trung-Thanh</first><last>Dang</last></author>
-      <author><first>Le-Minh</first><last>Nguyen</last></author>
+      <author id="minh-le-nguyen"><first>Le-Minh</first><last>Nguyen</last></author>
       <url hash="e5fe59db">Y18-1018</url>
       <bibkey>doan-etal-2018-effectiveness</bibkey>
     </paper>
@@ -175,7 +175,7 @@
     <paper id="20">
       <title>Exclamative Sentences in Emotion Expressions in <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese: A Corpus-based Approach</title>
       <author><first>Xuefeng</first><last>Gao</last></author>
-      <author><first>Sophia Yat Mei</first><last>Lee</last></author>
+      <author id="sophia-yat-mei-lee"><first>Sophia Yat Mei</first><last>Lee</last></author>
       <url hash="f3760590">Y18-1020</url>
       <bibkey>gao-lee-2018-exclamative</bibkey>
     </paper>
@@ -196,7 +196,7 @@
     <paper id="23">
       <title>Are They Arguing or not: A Corpus-based Study</title>
       <author><first>Min-Chun</first><last>Hsiao</last></author>
-      <author><first>Siaw-Fong</first><last>Chung</last></author>
+      <author id="siaw-fong-chung"><first>Siaw-Fong</first><last>Chung</last></author>
       <url hash="6f3ce78f">Y18-1023</url>
       <bibkey>hsiao-chung-2018-arguing</bibkey>
     </paper>
@@ -248,7 +248,7 @@
     </paper>
     <paper id="30">
       <title>Model-Theoretic Incremental Interpretation Based on <fixed-case>D</fixed-case>iscourse <fixed-case>R</fixed-case>epresentation <fixed-case>T</fixed-case>heory</title>
-      <author><first>Yoshihide</first><last>Kato</last></author>
+      <author id="yoshihide-kato"><first>Yoshihide</first><last>Kato</last></author>
       <author><first>Shigeki</first><last>Matsubara</last></author>
       <url hash="4a06d227">Y18-1030</url>
       <bibkey>kato-matsubara-2018-model</bibkey>
@@ -279,7 +279,7 @@
       <author><first>Shun</first><last>Kiyono</last></author>
       <author><first>Sho</first><last>Takase</last></author>
       <author><first>Jun</first><last>Suzuki</last></author>
-      <author><first>Naoaki</first><last>Okazaki</last></author>
+      <author id="naoaki-okazaki"><first>Naoaki</first><last>Okazaki</last></author>
       <author><first>Kentaro</first><last>Inui</last></author>
       <author><first>Masaaki</first><last>Nagata</last></author>
       <url hash="606ba45b">Y18-1034</url>
@@ -294,14 +294,14 @@
     </paper>
     <paper id="36">
       <title>The Non-deictic Use of Demonstratives in Conversations and Interpreted Speeches in Contemporary <fixed-case>H</fixed-case>ong <fixed-case>K</fixed-case>ong <fixed-case>C</fixed-case>antonese</title>
-      <author><first>Oi Yee</first><last>Kwong</last></author>
+      <author id="olivia-o-y-kwong"><first>Oi Yee</first><last>Kwong</last></author>
       <url hash="1ab28535">Y18-1036</url>
       <bibkey>kwong-2018-non</bibkey>
     </paper>
     <paper id="37">
       <title>Questions as a Pre-event, Pivot Event and Post-event of Emotions</title>
       <author><first>Helena Yan Ping</first><last>Lau</last></author>
-      <author><first>Sophia Yat Mei</first><last>Lee</last></author>
+      <author id="sophia-yat-mei-lee"><first>Sophia Yat Mei</first><last>Lee</last></author>
       <author><first>Zhongqing</first><last>Wang</last></author>
       <url hash="d87e981d">Y18-1037</url>
       <bibkey>lau-etal-2018-questions</bibkey>
@@ -323,7 +323,7 @@
     <paper id="40">
       <title>A New Angle on <fixed-case>L</fixed-case>2 Texts: A Statistical Approach to Translation Universals</title>
       <author><first>Younghee Cheri</first><last>Lee</last></author>
-      <author><first>Yong-Hun</first><last>Lee</last></author>
+      <author id="yong-hun-lee"><first>Yong-Hun</first><last>Lee</last></author>
       <url hash="ac8b035b">Y18-1040</url>
       <bibkey>lee-lee-2018-new</bibkey>
     </paper>
@@ -339,14 +339,14 @@
       <title><fixed-case>C</fixed-case>hinese Spelling Check based on Neural Machine Translation</title>
       <author><first>Chiao-Wen</first><last>Li</last></author>
       <author><first>Jhih-Jie</first><last>Chen</last></author>
-      <author><first>Jason</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason</first><last>Chang</last></author>
       <url hash="3c7788d4">Y18-1042</url>
       <bibkey>li-etal-2018-chinese</bibkey>
     </paper>
     <paper id="43">
       <title>Research on Entity Relation Extraction for Military Field</title>
       <author><first>Chen</first><last>Liang</last></author>
-      <author><first>Hongying</first><last>Zan</last></author>
+      <author id="hongying-zan"><first>Hongying</first><last>Zan</last></author>
       <author><first>Yajun</first><last>Liu</last></author>
       <author><first>Yunfang</first><last>Wu</last></author>
       <url hash="d16ff8c3">Y18-1043</url>
@@ -374,7 +374,7 @@
       <author><first>Fei</first><last>Cheng</last></author>
       <author><first>Yiran</first><last>Wang</last></author>
       <author><first>Hiroyuki</first><last>Shindo</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <url hash="ba3faf5c">Y18-1046</url>
       <bibkey>liu-etal-2018-automatic-error</bibkey>
     </paper>
@@ -423,15 +423,15 @@
       <author><first>Katsuhiko</first><last>Hayashi</last></author>
       <author><first>Takahiro</first><last>Ishihara</last></author>
       <author><first>Hitoshi</first><last>Manabe</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <url hash="3733a8e9">Y18-1052</url>
       <bibkey>matsuno-etal-2018-reduction</bibkey>
     </paper>
     <paper id="53">
       <title><fixed-case>E</fixed-case>qu<fixed-case>G</fixed-case>ener: A Reasoning Network for Word Problem Solving by Generating Arithmetic Equations</title>
       <author><first>Pruthwik</first><last>Mishra</last></author>
-      <author><first>Litton J</first><last>Kurisinkel</last></author>
-      <author><first>Dipti Misra</first><last>Sharma</last></author>
+      <author id="litton-j-kurisinkel"><first>Litton J</first><last>Kurisinkel</last></author>
+      <author id="dipti-misra-sharma"><first>Dipti Misra</first><last>Sharma</last></author>
       <author><first>Vasudeva</first><last>Varma</last></author>
       <url hash="8813d0f1">Y18-1053</url>
       <bibkey>mishra-etal-2018-equgener</bibkey>
@@ -510,7 +510,7 @@
       <author><first>Cheng-Cyuan</first><last>Peng</last></author>
       <author><first>Ching-Yu</first><last>Yang</last></author>
       <author><first>Jhih-Jie</first><last>Chen</last></author>
-      <author><first>Jason</first><last>Chang</last></author>
+      <author id="jason-s-chang"><first>Jason</first><last>Chang</last></author>
       <url hash="36bff720">Y18-1062</url>
       <bibkey>peng-etal-2018-smartwrite</bibkey>
     </paper>
@@ -575,7 +575,7 @@
       <author><first>Hoyun</first><last>Song</last></author>
       <author><first>Jinseon</first><last>You</last></author>
       <author><first>Jin-Woo</first><last>Chung</last></author>
-      <author><first>Jong C.</first><last>Park</last></author>
+      <author id="jong-c-park"><first>Jong C.</first><last>Park</last></author>
       <url hash="cbfcd916">Y18-1070</url>
       <bibkey>song-etal-2018-feature</bibkey>
     </paper>
@@ -671,7 +671,7 @@
     <paper id="82">
       <title>A Re-examination of Syntactic Complexity by Investigating the Internal Structure Variations of Adverbial Clauses across Speech and Writing</title>
       <author><first>Mingyu</first><last>Wan</last></author>
-      <author><first>Alex Chengyu</first><last>Fang</last></author>
+      <author id="alex-chengyu-fang"><first>Alex Chengyu</first><last>Fang</last></author>
       <url hash="9cc270c2">Y18-1082</url>
       <bibkey>wan-fang-2018-examination</bibkey>
     </paper>
@@ -711,7 +711,7 @@
     <paper id="88">
       <title>Attention-based <fixed-case>BLSTM</fixed-case>-<fixed-case>CRF</fixed-case> Architecture for <fixed-case>M</fixed-case>ongolian Named Entity Recognition</title>
       <author><first>Yuzhu</first><last>Xiong</last></author>
-      <author><first>Minghua</first><last>Nuo</last></author>
+      <author id="minghua-nuo"><first>Minghua</first><last>Nuo</last></author>
       <url hash="338342be">Y18-1088</url>
       <bibkey>xiong-nuo-2018-attention</bibkey>
     </paper>
@@ -727,7 +727,7 @@
       <title>Development of Perceptual Training Software for Realizing High Variability Training Paradigm and Self Adaptive Training Paradigm</title>
       <author><first>Ruining</first><last>Yang</last></author>
       <author><first>Hiroaki</first><last>Nanjo</last></author>
-      <author><first>Masatake</first><last>Dantsuji</last></author>
+      <author id="masatake-dantsuji"><first>Masatake</first><last>Dantsuji</last></author>
       <url hash="b6310911">Y18-1090</url>
       <bibkey>yang-etal-2018-development</bibkey>
     </paper>
@@ -830,10 +830,10 @@
     </paper>
     <paper id="4">
       <title>Food-Related Sentiment Analysis for <fixed-case>C</fixed-case>antonese</title>
-      <author><first>Natalia</first><last>Klyueva</last></author>
+      <author id="natalia-klyueva"><first>Natalia</first><last>Klyueva</last></author>
       <author><first>Yunfei</first><last>Long</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
-      <author><first>Qin</first><last>Lu</last></author>
+      <author id="qin-lu"><first>Qin</first><last>Lu</last></author>
       <url hash="e975cb42">Y18-2004</url>
       <bibkey>klyueva-etal-2018-food</bibkey>
     </paper>
@@ -896,7 +896,7 @@
       <author><first>Raj</first><last>Dabre</last></author>
       <author><first>Anoop</first><last>Kunchukuttan</last></author>
       <author><first>Atsushi</first><last>Fujita</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <url hash="821e69a4">Y18-3003</url>
       <bibkey>dabre-etal-2018-nicts</bibkey>
     </paper>
@@ -921,7 +921,7 @@
       <author><first>Rui</first><last>Wang</last></author>
       <author><first>Chenchen</first><last>Ding</last></author>
       <author><first>Masao</first><last>Utiyama</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <url hash="cec6d1c9">Y18-3006</url>
       <bibkey>wang-etal-2018-english</bibkey>
     </paper>
@@ -929,7 +929,7 @@
       <title>Combination of Statistical and Neural Machine Translation for <fixed-case>M</fixed-case>yanmar-<fixed-case>E</fixed-case>nglish</title>
       <author><first>Benjamin</first><last>Marie</last></author>
       <author><first>Atsushi</first><last>Fujita</last></author>
-      <author><first>Eiichiro</first><last>Sumita</last></author>
+      <author id="eiichiro-sumita"><first>Eiichiro</first><last>Sumita</last></author>
       <url hash="4faad113">Y18-3007</url>
       <bibkey>marie-etal-2018-combination</bibkey>
     </paper>
@@ -959,8 +959,8 @@
     </paper>
     <paper id="11">
       <title>The <fixed-case>RGNLP</fixed-case> Machine Translation Systems for <fixed-case>WAT</fixed-case> 2018</title>
-      <author><first>Atul Kr.</first><last>Ojha</last></author>
-      <author><first>Koel Dutta</first><last>Chowdhury</last></author>
+      <author id="atul-kr-ojha"><first>Atul Kr.</first><last>Ojha</last></author>
+      <author id="koel-dutta-chowdhury"><first>Koel Dutta</first><last>Chowdhury</last></author>
       <author><first>Chao-Hong</first><last>Liu</last></author>
       <author><first>Karan</first><last>Saxena</last></author>
       <url hash="225f42f5">Y18-3011</url>
@@ -971,7 +971,7 @@
       <author><first>Sukanta</first><last>Sen</last></author>
       <author><first>Kamal Kumar</first><last>Gupta</last></author>
       <author><first>Asif</first><last>Ekbal</last></author>
-      <author><first>Pushpak</first><last>Bhattacharyya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharyya</last></author>
       <url hash="96078f60">Y18-3012</url>
       <bibkey>sen-etal-2018-iitp</bibkey>
     </paper>
@@ -979,7 +979,7 @@
       <title>Multilingual <fixed-case>I</fixed-case>ndian Language Translation System at <fixed-case>WAT</fixed-case> 2018: Many-to-one Phrase-based <fixed-case>SMT</fixed-case></title>
       <author><first>Tamali</first><last>Banerjee</last></author>
       <author><first>Anoop</first><last>Kunchukuttan</last></author>
-      <author><first>Pushpak</first><last>Bhattacharya</last></author>
+      <author id="pushpak-bhattacharyya"><first>Pushpak</first><last>Bhattacharya</last></author>
       <url hash="49d7f1a7">Y18-3013</url>
       <bibkey>banerjee-etal-2018-multilingual</bibkey>
     </paper>
diff --git a/data/xml/Y95.xml b/data/xml/Y95.xml
index 94e34da0cc..0993d82971 100644
--- a/data/xml/Y95.xml
+++ b/data/xml/Y95.xml
@@ -3,8 +3,8 @@
   <volume id="1" type="proceedings">
     <meta>
       <booktitle>Proceedings of the 10th Pacific Asia Conference on Language, Information and Computation</booktitle>
-      <editor><first>Benjamin K.</first><last>T’sou</last></editor>
-      <editor><first>Tom B. Y.</first><last>Lai</last></editor>
+      <editor id="benjamin-k-tsou"><first>Benjamin K.</first><last>T’sou</last></editor>
+      <editor id="tom-b-y-lai"><first>Tom B. Y.</first><last>Lai</last></editor>
       <publisher>City University of Hong Kong</publisher>
       <address>City University of Hong Kong, Hong Kong</address>
       <month>December</month>
@@ -17,7 +17,7 @@
     </frontmatter>
     <paper id="1">
       <title>Distances and Trees in Linguistics</title>
-      <author><first>William S-Y.</first><last>Wang</last></author>
+      <author id="william-s-y-wang"><first>William S-Y.</first><last>Wang</last></author>
       <pages>1–6</pages>
       <url hash="e07ee95b">Y95-1001</url>
       <doi>http://hdl.handle.net/2065/11866</doi>
@@ -33,7 +33,7 @@
     </paper>
     <paper id="3">
       <title>An Analysis of Generic Expressions in Situation Semantics</title>
-      <author><first>Ik-Hwan</first><last>Lee</last></author>
+      <author id="ik-hwan-lee"><first>Ik-Hwan</first><last>Lee</last></author>
       <pages>19–28</pages>
       <url hash="a1353e21">Y95-1003</url>
       <doi>http://hdl.handle.net/2065/11879</doi>
@@ -49,7 +49,7 @@
     </paper>
     <paper id="5">
       <title>Comprehending Text : Achieving Coherence through a Connectionist Architecture</title>
-      <author><first>Samuel W. K.</first><last>Chan</last></author>
+      <author id="samuel-w-k-chan"><first>Samuel W. K.</first><last>Chan</last></author>
       <pages>39–48</pages>
       <url hash="5096ee8b">Y95-1005</url>
       <doi>http://hdl.handle.net/2065/11894</doi>
@@ -57,7 +57,7 @@
     </paper>
     <paper id="6">
       <title>Predication of Meaning of Bisyllabic <fixed-case>C</fixed-case>hinese Compound Words Using Back Propagation Neural Network</title>
-      <author><last>Lua</last><first>Kim Teng</first></author>
+      <author id="kim-teng-lua"><first>Kim Teng</first><last>Lua</last></author>
       <pages>49–56</pages>
       <url hash="69d3d1d6">Y95-1006</url>
       <doi>http://hdl.handle.net/2065/11895</doi>
@@ -82,7 +82,7 @@
     </paper>
     <paper id="9">
       <title>Scrambling in <fixed-case>G</fixed-case>erman : Extraction into the Mittelfeld</title>
-      <author><first>Stefan</first><last>Müller</last></author>
+      <author id="stefan-muller"><first>Stefan</first><last>Müller</last></author>
       <pages>79–84</pages>
       <url hash="7f2ef763">Y95-1009</url>
       <doi>http://hdl.handle.net/2065/11899</doi>
@@ -116,7 +116,7 @@
     </paper>
     <paper id="13">
       <title>A Cognitive Account of the Lexical Polysemy of <fixed-case>C</fixed-case>hinese Kai</title>
-      <author><first>Flora Yu-Fang</first><last>Wang</last></author>
+      <author id="flora-yu-fang-wang"><first>Flora Yu-Fang</first><last>Wang</last></author>
       <pages>103–108</pages>
       <url hash="c8308931">Y95-1013</url>
       <doi>http://hdl.handle.net/2065/11867</doi>
@@ -125,8 +125,8 @@
     <paper id="14">
       <title>Automatic Sense Disambiguation for Target Word Selection</title>
       <author><first>Kwon Yang</first><last>Kim</last></author>
-      <author><first>Se Young</first><last>Park</last></author>
-      <author><first>Sang Jo</first><last>Lee</last></author>
+      <author id="se-young-park"><first>Se Young</first><last>Park</last></author>
+      <author id="sang-jo-lee"><first>Sang Jo</first><last>Lee</last></author>
       <pages>109–114</pages>
       <url hash="e2113c13">Y95-1014</url>
       <doi>http://hdl.handle.net/2065/11868</doi>
@@ -134,8 +134,8 @@
     </paper>
     <paper id="15">
       <title>Structural Ambiguity and Conceptual Information Retrieval</title>
-      <author><first>Mathis Huey-chyun</first><last>Chen</last></author>
-      <author><first>Jason J.S.</first><last>Chang</last></author>
+      <author id="huey-chyun-chen"><first>Mathis Huey-chyun</first><last>Chen</last></author>
+      <author id="jason-s-chang"><first>Jason J.S.</first><last>Chang</last></author>
       <pages>115–120</pages>
       <url hash="0bf67194">Y95-1015</url>
       <doi>http://hdl.handle.net/2065/11869</doi>
@@ -152,8 +152,8 @@
     </paper>
     <paper id="17">
       <title>The Postprocessing of Optical Character Recognition Based on Statistical Noisy Channel and Language Model</title>
-      <author><first>Jason J. S.</first><last>Chang</last></author>
-      <author><first>Shun-Der</first><last>Chen</last></author>
+      <author id="jason-s-chang"><first>Jason J. S.</first><last>Chang</last></author>
+      <author id="shun-der-chen"><first>Shun-Der</first><last>Chen</last></author>
       <pages>127–132</pages>
       <url hash="de48d32d">Y95-1017</url>
       <doi>http://hdl.handle.net/2065/11871</doi>
@@ -161,7 +161,7 @@
     </paper>
     <paper id="18">
       <title>A Quantitative Analysis of Word-Definition in a Machine-Readable Dictionary</title>
-      <author><first>Robert W.P.</first><last>Luk</last></author>
+      <author id="robert-w-p-luk"><first>Robert W.P.</first><last>Luk</last></author>
       <author><first>Venus M.K.</first><last>Chan</last></author>
       <pages>133–138</pages>
       <url hash="4601f037">Y95-1018</url>
@@ -195,7 +195,7 @@
     <paper id="22">
       <title><fixed-case>HMM</fixed-case> Parameter Learning for <fixed-case>J</fixed-case>apanese Morphological Analyzer</title>
       <author><first>Koichi</first><last>Takeuchi</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>163–172</pages>
       <url hash="f80b1ea7">Y95-1022</url>
       <doi>http://hdl.handle.net/2065/11876</doi>
@@ -203,8 +203,8 @@
     </paper>
     <paper id="23">
       <title>Automatic Acquisition of Class-based Rules for Word Alignment</title>
-      <author><first>Sur-Jin</first><last>Ker</last></author>
-      <author><first>Jason J.S.</first><last>Chang</last></author>
+      <author id="sue-j-ker"><first>Sur-Jin</first><last>Ker</last></author>
+      <author id="jason-s-chang"><first>Jason J.S.</first><last>Chang</last></author>
       <pages>173–184</pages>
       <url hash="56924086">Y95-1023</url>
       <doi>http://hdl.handle.net/2065/11877</doi>
@@ -213,7 +213,7 @@
     <paper id="24">
       <title>Automated Alignment in Multilingual Corpora</title>
       <author><first>J.A.</first><last>Campbell</last></author>
-      <author><first>Alex Chengyu</first><last>Fang</last></author>
+      <author id="alex-chengyu-fang"><first>Alex Chengyu</first><last>Fang</last></author>
       <pages>185–194</pages>
       <url hash="b0e62329">Y95-1024</url>
       <doi>http://hdl.handle.net/2065/11878</doi>
@@ -266,7 +266,7 @@
     </paper>
     <paper id="30">
       <title>A Corpus-Based Study of Adverbial Clauses in <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese Conversations : A Preliminary Analysis</title>
-      <author><first>Yu-Fang</first><last>Wang</last></author>
+      <author id="flora-yu-fang-wang"><first>Yu-Fang</first><last>Wang</last></author>
       <pages>237–242</pages>
       <url hash="56fb1f36">Y95-1030</url>
       <doi>http://hdl.handle.net/2065/11885</doi>
@@ -274,7 +274,7 @@
     </paper>
     <paper id="31">
       <title>A Network-Based Writing System for <fixed-case>F</fixed-case>rench</title>
-      <author><first>Aesun</first><last>Yoon</last></author>
+      <author id="aesun-yoon"><first>Aesun</first><last>Yoon</last></author>
       <author><first>Hyuk-Chul</first><last>Kwon</last></author>
       <pages>243–248</pages>
       <url hash="f1441406">Y95-1031</url>
@@ -283,7 +283,7 @@
     </paper>
     <paper id="32">
       <title>Web Access to a Lexical Database Using <fixed-case>VB</fixed-case>/Access <fixed-case>CGI</fixed-case> Programming</title>
-      <author><first>Jonathan J.</first><last>Webster</last></author>
+      <author id="jonathan-j-webster"><first>Jonathan J.</first><last>Webster</last></author>
       <pages>249–254</pages>
       <url hash="b3122f8d">Y95-1032</url>
       <doi>http://hdl.handle.net/2065/11887</doi>
@@ -292,7 +292,7 @@
     <paper id="33">
       <title>Document Ranking Method for High Precision Rate</title>
       <author><first>Mee-Sun</first><last>Jeon</last></author>
-      <author><first>Se-Young</first><last>Park</last></author>
+      <author id="se-young-park"><first>Se-Young</first><last>Park</last></author>
       <pages>255–260</pages>
       <url hash="8b6555cc">Y95-1033</url>
       <doi>http://hdl.handle.net/2065/11888</doi>
@@ -301,7 +301,7 @@
     <paper id="34">
       <title>Natural Languages Analysis in Machine Translation (<fixed-case>MT</fixed-case>) Based on the <fixed-case>STCG</fixed-case> (String-Tree Correspondence Grammar)</title>
       <author><last>Tang</last><first>Enya Kong</first></author>
-      <author><first>Zaharin</first><last>Yusoff</last></author>
+      <author id="zaharin-yusoff"><first>Zaharin</first><last>Yusoff</last></author>
       <pages>261–266</pages>
       <url hash="d3dc1898">Y95-1034</url>
       <doi>http://hdl.handle.net/2065/11889</doi>
@@ -326,7 +326,7 @@
     </paper>
     <paper id="37">
       <title>A Unified Account of Polarity Phenomena</title>
-      <author><first>Chungmin</first><last>Lee</last></author>
+      <author id="chungmin-lee"><first>Chungmin</first><last>Lee</last></author>
       <pages>281–291</pages>
       <url hash="84ddcdfd">Y95-1037</url>
       <doi>http://hdl.handle.net/2065/11892</doi>
diff --git a/data/xml/Y96.xml b/data/xml/Y96.xml
index 9ac59fab3c..cadc3111ce 100644
--- a/data/xml/Y96.xml
+++ b/data/xml/Y96.xml
@@ -34,7 +34,7 @@
     </paper>
     <paper id="3">
       <title>Subject-oriented and non Subject-oriented Long-distance Anaphora : an Integrated Approach</title>
-      <author><first>Antonio</first><last>Branco</last></author>
+      <author id="antonio-branco"><first>Antonio</first><last>Branco</last></author>
       <author><first>Palmira</first><last>Marrafa</last></author>
       <pages>21–30</pages>
       <url hash="f646cd55">Y96-1003</url>
@@ -60,7 +60,7 @@
     </paper>
     <paper id="6">
       <title>Underspecified <fixed-case>J</fixed-case>apanese Semantics in a Machine Translation System</title>
-      <author><first>Björn</first><last>Gambäck</last></author>
+      <author id="bjorn-gamback"><first>Björn</first><last>Gambäck</last></author>
       <author><first>Christian</first><last>Lieske</last></author>
       <author><first>Yoshiki</first><last>Mori</last></author>
       <pages>53–62</pages>
@@ -73,7 +73,7 @@
       <author><first>J.A.</first><last>Campbell</last></author>
       <author id="niladri-chatterjee"><first>N.</first><last>Chatterjee</last></author>
       <author><first>M.</first><last>Manela</last></author>
-      <author><first>Alex Chengyu</first><last>Fang</last></author>
+      <author id="alex-chengyu-fang"><first>Alex Chengyu</first><last>Fang</last></author>
       <pages>63–72</pages>
       <url hash="bcafe190">Y96-1007</url>
       <doi>http://hdl.handle.net/2065/12063</doi>
@@ -97,7 +97,7 @@
     </paper>
     <paper id="10">
       <title>A Discourse Approach to Causal Sentences in <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese</title>
-      <author><first>Mei-chih</first><last>Tsai</last></author>
+      <author id="mei-chih-tsai"><first>Mei-chih</first><last>Tsai</last></author>
       <pages>93–98</pages>
       <url hash="42fbf86f">Y96-1010</url>
       <doi>http://hdl.handle.net/2065/12066</doi>
@@ -121,8 +121,8 @@
     </paper>
     <paper id="13">
       <title>Neural Networks in <fixed-case>C</fixed-case>hinese Lexical Classification</title>
-      <author><first>Md Maruf</first><last>Hasan</last></author>
-      <author><first>Kim-Teng</first><last>Lua</last></author>
+      <author id="md-maruf-hasan"><first>Md Maruf</first><last>Hasan</last></author>
+      <author id="kim-teng-lua"><first>Kim-Teng</first><last>Lua</last></author>
       <pages>119–128</pages>
       <url hash="a28fd45c">Y96-1013</url>
       <doi>http://hdl.handle.net/2065/12020</doi>
@@ -131,9 +131,9 @@
     <paper id="14">
       <title>A Logical Structure for the Construction of Machine Readable Dictionaries</title>
       <author><first>Byung-Jin</first><last>Choi</last></author>
-      <author><first>Jae-Sung</first><last>Lee</last></author>
+      <author id="jaesung-lee"><first>Jae-Sung</first><last>Lee</last></author>
       <author><first>Woon-Jae</first><last>Lee</last></author>
-      <author><first>Key-Sun</first><last>Choi</last></author>
+      <author id="key-sun-choi"><first>Key-Sun</first><last>Choi</last></author>
       <pages>129–136</pages>
       <url hash="cf94d264">Y96-1014</url>
       <doi>http://hdl.handle.net/2065/12021</doi>
@@ -141,8 +141,8 @@
     </paper>
     <paper id="15">
       <title>Extraction of Thematic Roles from Dictionary Definitions</title>
-      <author><first>Michael L.</first><last>Mc Hale</last></author>
-      <author><first>Sung H.</first><last>Myaeng</last></author>
+      <author id="michael-l-mc-hale"><first>Michael L.</first><last>Mc Hale</last></author>
+      <author id="sung-hyon-myaeng"><first>Sung H.</first><last>Myaeng</last></author>
       <pages>137–146</pages>
       <url hash="2b28dd02">Y96-1015</url>
       <doi>http://hdl.handle.net/2065/12022</doi>
@@ -150,7 +150,7 @@
     </paper>
     <paper id="16">
       <title>Beyond Telicity and Affected-Theme : Semantic Factors Contributing to the Resultative Interpretation of Predicates in <fixed-case>J</fixed-case>apanese</title>
-      <author><first>Chiharu</first><last>Uda</last></author>
+      <author id="chiharu-uda-kikuta"><first>Chiharu</first><last>Uda</last></author>
       <pages>147–156</pages>
       <url hash="ce02d450">Y96-1016</url>
       <doi>http://hdl.handle.net/2065/12023</doi>
@@ -166,9 +166,9 @@
     </paper>
     <paper id="18">
       <title><fixed-case>S</fixed-case>INICA <fixed-case>C</fixed-case>ORPUS : Design Methodology for Balanced Corpora</title>
-      <author><first>Keh-Jiann</first><last>Chen</last></author>
+      <author id="keh-jiann-chen"><first>Keh-Jiann</first><last>Chen</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
-      <author><first>Li-Ping</first><last>Chang</last></author>
+      <author id="li-ping-chang"><first>Li-Ping</first><last>Chang</last></author>
       <author><first>Hui-Li</first><last>Hsu</last></author>
       <pages>167–176</pages>
       <url hash="3a7fa206">Y96-1018</url>
@@ -219,7 +219,7 @@
     <paper id="24">
       <title>A Proposal of <fixed-case>K</fixed-case>orean Conjugation System and its Application to Morphological Analysis</title>
       <author><first>Yoshitaka</first><last>Hirano</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>229–236</pages>
       <url hash="949daa5f">Y96-1024</url>
       <doi>http://hdl.handle.net/2065/12032</doi>
@@ -229,7 +229,7 @@
       <title>Rule-based Approach to <fixed-case>K</fixed-case>orean Morphological Disambiguation Supported by Statistical Method</title>
       <author><first>Min-Jung</first><last>Kim</last></author>
       <author><first>Hyuk-Chul</first><last>Kwon</last></author>
-      <author><first>Ae-Sun</first><last>Yoon</last></author>
+      <author id="aesun-yoon"><first>Ae-Sun</first><last>Yoon</last></author>
       <pages>237–246</pages>
       <url hash="b75f77ed">Y96-1025</url>
       <doi>http://hdl.handle.net/2065/12033</doi>
@@ -341,8 +341,8 @@
     </paper>
     <paper id="38">
       <title>Principle-based Parsing for <fixed-case>C</fixed-case>hinese</title>
-      <author><first>Charles D.</first><last>Yang</last></author>
-      <author><first>Robert C.</first><last>Berwick</last></author>
+      <author id="charles-yang"><first>Charles D.</first><last>Yang</last></author>
+      <author id="robert-c-berwick"><first>Robert C.</first><last>Berwick</last></author>
       <pages>363–371</pages>
       <url hash="52da1064">Y96-1038</url>
       <doi>http://hdl.handle.net/2065/12047</doi>
@@ -360,7 +360,7 @@
     <paper id="40">
       <title>Fast Statistical Grammar Induction</title>
       <author><first>Wide R.</first><last>Hogenhout</last></author>
-      <author><first>Yuji</first><last>Matsumoto</last></author>
+      <author id="yuji-matsumoto"><first>Yuji</first><last>Matsumoto</last></author>
       <pages>383–392</pages>
       <url hash="9e7b46d4">Y96-1040</url>
       <doi>http://hdl.handle.net/2065/12049</doi>
diff --git a/data/xml/Y98.xml b/data/xml/Y98.xml
index 6a24033660..ab40139fbc 100644
--- a/data/xml/Y98.xml
+++ b/data/xml/Y98.xml
@@ -4,7 +4,7 @@
     <meta>
       <booktitle>Proceedings of the 12th Pacific Asia Conference on Language, Information and Computation</booktitle>
       <editor><first>Jin</first><last>Guo</last></editor>
-      <editor><first>Kim Teng</first><last>Lua</last></editor>
+      <editor id="kim-teng-lua"><first>Kim Teng</first><last>Lua</last></editor>
       <editor><first>Jie</first><last>Xu</last></editor>
       <publisher>Chinese and Oriental Languages Information Processing Society</publisher>
       <address>Singapore</address>
@@ -76,7 +76,7 @@
     </paper>
     <paper id="8">
       <title>A Multiple Inheritance Analysis of the Internally-Headed Relative Clause in <fixed-case>J</fixed-case>apanese</title>
-      <author><first>Chiharu Uda</first><last>Kikuta</last></author>
+      <author id="chiharu-uda-kikuta"><first>Chiharu Uda</first><last>Kikuta</last></author>
       <pages>82–93</pages>
       <url hash="b3937a0b">Y98-1008</url>
       <doi>http://hdl.handle.net/2065/12104</doi>
@@ -109,7 +109,7 @@
     <paper id="12">
       <title>Common Grounds as Multiple Information States</title>
       <author><first>Jae-Il</first><last>Yeom</last></author>
-      <author><first>Ik-Hwan</first><last>Lee</last></author>
+      <author id="ik-hwan-lee"><first>Ik-Hwan</first><last>Lee</last></author>
       <pages>127–138</pages>
       <url hash="95730ce8">Y98-1012</url>
       <doi>http://hdl.handle.net/2065/12072</doi>
@@ -125,7 +125,7 @@
     </paper>
     <paper id="14">
       <title>Predictivity vs. Stipulativity in the Lexicon</title>
-      <author><first>Cornelia Maria</first><last>Verspoor</last></author>
+      <author id="karin-verspoor"><first>Cornelia Maria</first><last>Verspoor</last></author>
       <pages>152–162</pages>
       <url hash="6ae64ea6">Y98-1014</url>
       <doi>http://hdl.handle.net/2065/12074</doi>
@@ -134,7 +134,7 @@
     <paper id="15">
       <title>Using Case Prototypicality as a Semantic Primitive</title>
       <author><first>Dan-Hee</first><last>Yang</last></author>
-      <author><first>Ik-Hwan</first><last>Lee</last></author>
+      <author id="ik-hwan-lee"><first>Ik-Hwan</first><last>Lee</last></author>
       <author><first>Mansuk</first><last>Song</last></author>
       <pages>163–171</pages>
       <url hash="72fc379d">Y98-1015</url>
@@ -160,7 +160,7 @@
     </paper>
     <paper id="18">
       <title><fixed-case>MI</fixed-case>-trigger-based Language Modelling</title>
-      <author><first>Guodong</first><last>Zhou</last></author>
+      <author id="guodong-zhou"><first>Guodong</first><last>Zhou</last></author>
       <author><first>Kim-Teng</first><last>Lua</last></author>
       <pages>195–205</pages>
       <url hash="1ad56500">Y98-1018</url>
@@ -169,8 +169,8 @@
     </paper>
     <paper id="19">
       <title>Extracting Recurrent Phrases and Terms from Texts Using a Purely Statistical Method</title>
-      <author><first>Zhao-Ming</first><last>Gao</last></author>
-      <author><first>Harold</first><last>Somers</last></author>
+      <author id="zhao-ming-gao"><first>Zhao-Ming</first><last>Gao</last></author>
+      <author id="harold-somers"><first>Harold</first><last>Somers</last></author>
       <pages>206–211</pages>
       <url hash="fefb2190">Y98-1019</url>
       <doi>http://hdl.handle.net/2065/12080</doi>
@@ -206,7 +206,7 @@
     <paper id="23">
       <title>Word-Sense Classification by Hierarchical Clustering</title>
       <author><first>Ken Y.K.</first><last>Lau</last></author>
-      <author><first>Robert W.P.</first><last>Luk</last></author>
+      <author id="robert-w-p-luk"><first>Robert W.P.</first><last>Luk</last></author>
       <pages>236–247</pages>
       <url hash="5dab936f">Y98-1023</url>
       <doi>http://hdl.handle.net/2065/12084</doi>
@@ -214,7 +214,7 @@
     </paper>
     <paper id="24">
       <title>Automatic Acquisition of a High-Precision Translation Lexicon from Parallel <fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish Corpora</title>
-      <author><first>Zhao-Ming</first><last>Gao</last></author>
+      <author id="zhao-ming-gao"><first>Zhao-Ming</first><last>Gao</last></author>
       <pages>248–254</pages>
       <url hash="29827573">Y98-1024</url>
       <doi>http://hdl.handle.net/2065/12085</doi>
@@ -222,7 +222,7 @@
     </paper>
     <paper id="25">
       <title><fixed-case>S</fixed-case>urrogater : A Simple Yet Efficient Document Condensation System</title>
-      <author><first>Joe</first><last>Zhou</last></author>
+      <author id="joe-zhou"><first>Joe</first><last>Zhou</last></author>
       <pages>255–262</pages>
       <url hash="e6867fd8">Y98-1025</url>
       <doi>http://hdl.handle.net/2065/12086</doi>
@@ -240,7 +240,7 @@
     <paper id="27">
       <title>On Removing Ambiguity in Text Understanding</title>
       <author><first>Simin</first><last>Li</last></author>
-      <author><first>Yukihiro</first><last>Ito</last></author>
+      <author id="yukihiro-itoh"><first>Yukihiro</first><last>Ito</last></author>
       <pages>271–282</pages>
       <url hash="b02d9995">Y98-1027</url>
       <doi>http://hdl.handle.net/2065/12088</doi>
@@ -267,7 +267,7 @@
     </paper>
     <paper id="30">
       <title>Syntactic Verifier as a Filter to Compound Unit Recognizer</title>
-      <author><first>Hanmin</first><last>Jung</last></author>
+      <author id="han-min-jung"><first>Hanmin</first><last>Jung</last></author>
       <author><first>Sanghwa</first><last>Yuh</last></author>
       <author><first>Taewan</first><last>Kim</last></author>
       <author><first>Dong-In</first><last>Park</last></author>
@@ -287,7 +287,7 @@
     </paper>
     <paper id="32">
       <title>The Advantages of 3<fixed-case>D</fixed-case>-Trees in Modeling Human Sentence Processing</title>
-      <author><first>Charles C.</first><last>Lee</last></author>
+      <author id="charles-c-lee"><first>Charles C.</first><last>Lee</last></author>
       <pages>316–327</pages>
       <url hash="cf52b88f">Y98-1032</url>
       <doi>http://hdl.handle.net/2065/12093</doi>
@@ -312,7 +312,7 @@
     </paper>
     <paper id="35">
       <title>Using A Semantic Classification in Parsing <fixed-case>C</fixed-case>hinese : Some Preliminary Results</title>
-      <author><first>Kok Wee</first><last>Gan</last></author>
+      <author id="kok-wee-gan"><first>Kok Wee</first><last>Gan</last></author>
       <pages>340–347</pages>
       <url hash="e471556e">Y98-1035</url>
       <doi>http://hdl.handle.net/2065/12097</doi>
diff --git a/data/xml/Y99.xml b/data/xml/Y99.xml
index 0ee459e58f..8df65099dd 100644
--- a/data/xml/Y99.xml
+++ b/data/xml/Y99.xml
@@ -17,7 +17,7 @@
     </frontmatter>
     <paper id="1">
       <title>Linguistics in an Age of Engineering</title>
-      <author><first>Christopher</first><last>Manning</last></author>
+      <author id="christopher-d-manning"><first>Christopher</first><last>Manning</last></author>
       <pages>1–1</pages>
       <url hash="f6c36b2b">Y99-1001</url>
       <doi>http://hdl.handle.net/2065/12106</doi>
@@ -41,7 +41,7 @@
     </paper>
     <paper id="4">
       <title>Lexical Information and Beyond : Constructional Inferences in Semantic Representation</title>
-      <author><first>Mei-Chun</first><last>Liu</last></author>
+      <author id="mei-chun-liu"><first>Mei-Chun</first><last>Liu</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <author><first>Ching-Yi</first><last>Lee</last></author>
       <pages>27–37</pages>
@@ -51,8 +51,8 @@
     </paper>
     <paper id="5">
       <title>Alternation Across Semantic Fields : A Study of <fixed-case>M</fixed-case>andarin Verbs of Emotion</title>
-      <author><first>Li-li</first><last>Chang</last></author>
-      <author><first>Keh-jiann</first><last>Chen</last></author>
+      <author id="li-li-chang"><first>Li-li</first><last>Chang</last></author>
+      <author id="keh-jiann-chen"><first>Keh-jiann</first><last>Chen</last></author>
       <author><first>Chu-Ren</first><last>Huang</last></author>
       <pages>39–50</pages>
       <url hash="742f2142">Y99-1005</url>
@@ -78,7 +78,7 @@
     </paper>
     <paper id="8">
       <title>Lexical Information and Pragmatic Information : Reflexivity of an Event and Resultative Constructions in <fixed-case>J</fixed-case>apanese</title>
-      <author><first>Chiharu Uda</first><last>Kikuta</last></author>
+      <author id="chiharu-uda-kikuta"><first>Chiharu Uda</first><last>Kikuta</last></author>
       <pages>75–86</pages>
       <url hash="ccc8bb07">Y99-1008</url>
       <doi>http://hdl.handle.net/2065/12139</doi>
@@ -153,8 +153,8 @@
     </paper>
     <paper id="17">
       <title>Free Word Order in a Constraint-based Implementation of Dependency Grammar</title>
-      <author><first>Tom B.Y.</first><last>Lai</last></author>
-      <author><last>Huang</last><first>Changning</first></author>
+      <author id="tom-b-y-lai"><first>Tom B.Y.</first><last>Lai</last></author>
+      <author id="changning-huang"><first>Changning</first><last>Huang</last></author>
       <pages>161–168</pages>
       <url hash="de594280">Y99-1017</url>
       <doi>http://hdl.handle.net/2065/12114</doi>
@@ -207,8 +207,8 @@
     <paper id="23">
       <title>The Lexicon in <fixed-case>FCIDB</fixed-case> : A Friendly <fixed-case>C</fixed-case>hinese Interface for <fixed-case>DBMS</fixed-case></title>
       <author><first>Da-Jinn</first><last>Wang</last></author>
-      <author><first>Tsong-Yi</first><last>Chen</last></author>
-      <author><first>Martha W.</first><last>Evens</last></author>
+      <author id="tsong-yi-chen"><first>Tsong-Yi</first><last>Chen</last></author>
+      <author id="martha-evens"><first>Martha W.</first><last>Evens</last></author>
       <pages>215–222</pages>
       <url hash="7fbe91e9">Y99-1023</url>
       <doi>http://hdl.handle.net/2065/12120</doi>
@@ -263,7 +263,7 @@
     <paper id="29">
       <title>Extraction of Simple Sentences from Mixed Sentences for Building <fixed-case>K</fixed-case>orean Case Frames</title>
       <author><first>Dan-Hee</first><last>Yang</last></author>
-      <author><first>Ik-Hwan</first><last>Lee</last></author>
+      <author id="ik-hwan-lee"><first>Ik-Hwan</first><last>Lee</last></author>
       <author><first>Mansuk</first><last>Song</last></author>
       <pages>269–276</pages>
       <url hash="d95c961e">Y99-1029</url>
@@ -273,7 +273,7 @@
     <paper id="30">
       <title>Sub-Sentential Alignment Method by Analogy</title>
       <author><first>Tantely</first><last>Andriamanankasina</last></author>
-      <author><first>Kenji</first><last>Araki</last></author>
+      <author id="kenji-araki"><first>Kenji</first><last>Araki</last></author>
       <author><first>Koji</first><last>Tochinai</last></author>
       <pages>277–284</pages>
       <url hash="81eb968b">Y99-1030</url>
@@ -282,8 +282,8 @@
     </paper>
     <paper id="31">
       <title>A Study of Performance Evaluation for <fixed-case>GA</fixed-case>-<fixed-case>ILMT</fixed-case> Using Travel <fixed-case>E</fixed-case>nglish</title>
-      <author><first>Hiroshi</first><last>Echizen-ya</last></author>
-      <author><first>Kenji</first><last>Araki</last></author>
+      <author id="hiroshi-echizen-ya"><first>Hiroshi</first><last>Echizen-ya</last></author>
+      <author id="kenji-araki"><first>Kenji</first><last>Araki</last></author>
       <author><first>Yoshio</first><last>Momouchi</last></author>
       <author><first>Koji</first><last>Tochinai</last></author>
       <pages>285–292</pages>
@@ -293,8 +293,8 @@
     </paper>
     <paper id="32">
       <title>Anaphora Resolution as Lexical Cohesion Identification</title>
-      <author><first>Samuel W.K.</first><last>Chan</last></author>
-      <author><first>Benjamin K.</first><last>T’sou</last></author>
+      <author id="samuel-w-k-chan"><first>Samuel W.K.</first><last>Chan</last></author>
+      <author id="benjamin-k-tsou"><first>Benjamin K.</first><last>T’sou</last></author>
       <pages>293–304</pages>
       <url hash="d4ff2b33">Y99-1032</url>
       <doi>http://hdl.handle.net/2065/12130</doi>
diff --git a/data/yaml/name_variants.yaml b/data/yaml/name_variants.yaml
deleted file mode 100644
index ab68e1d861..0000000000
--- a/data/yaml/name_variants.yaml
+++ /dev/null
@@ -1,11263 +0,0 @@
-- canonical: {first: Anthony, last: Hughes}
-  id: anthony-hughes
-  orcid: 0009-0003-4065-1094
-  variants:
-  - {first: Anthony James, last: Hughes}
-- canonical: {first: Benjamin Matthias, last: Ruppik}
-  id: benjamin-matthias-ruppik
-  orcid: 0000-0001-9035-9217
-  variants:
-  - {first: Benjamin, last: Ruppik}
-- canonical: {first: Kranti, last: Chalamalasetti}
-  id: kranti-chalamalasetti
-  variants:
-  - {first: Chalamalasetti, last: Kranti}
-- canonical: {first: Felicia, last: Körner}
-  id: felicia-koerner
-  variants:
-  - {first: Felicia, last: Koerner}
-- canonical: {first: Pranav, last: A}
-  comment: UC Santa Cruz
-  id: pranav-a
-  similar: [pranav-anand]
-- canonical: {first: Balamurali, last: AR}
-  variants:
-  - {first: Balamurali, last: A.R.}
-  - {first: Balamurali, last: A.R}
-- canonical: {first: Solomon Teferra, last: Abate}
-  variants:
-  - {first: Solomon, last: Teferra Abate}
-  - {first: Solomon, last: Teferra}
-- canonical: {first: Ramzi, last: Abbès}
-  variants:
-  - {first: Ramzi, last: Abbes}
-- canonical: {first: Samir, last: AbdelRahman}
-  variants:
-  - {first: Samir, last: Abdelrahman}
-- canonical: {first: Anne, last: Abeillé}
-  variants:
-  - {first: Anne, last: Abeille}
-- canonical: {first: Steven, last: Abney}
-  id: steven-abney
-  variants:
-  - {first: Steve, last: Abney}
-  - {first: Steven P., last: Abney}
-- canonical: {first: Victor, last: Abrash}
-  id: victor-abrash
-- canonical: {first: José I., last: Abreu}
-  variants:
-  - {first: Jose I., last: Abreu}
-  - {first: José, last: Abreu}
-- canonical: {first: Sarkis, last: Abrilian}
-  id: sarkis-abrilian
-- canonical: {first: Ahmed, last: AbuRa’ed}
-  variants:
-  - {first: Ahmed, last: Abura’ed}
-- canonical: {first: Esref, last: Adali}
-  variants:
-  - {first: Eşref, last: Adalı}
-  - {first: Eşref, last: Adali}
-- canonical: {first: Gilles, last: Adda}
-  id: gilles-adda
-- canonical: {first: Martine, last: Adda-Decker}
-  id: martine-adda-decker
-  variants:
-  - {first: Martine, last: Adda-decker}
-- canonical: {first: David Ifeoluwa, last: Adelani}
-  variants:
-  - {first: David, last: Adelani}
-  - {first: David I., last: Adelani}
-- canonical: {first: Wei, last: Ai}
-  id: wei-ai-umich
-  orcid: 0000-0001-6271-9430
-  institution: University of Michigan
-- canonical: {first: Wei, last: Ai}
-  id: wei-ai
-  comment: May refer to several people
-- canonical: {first: Giovanni, last: Adorni}
-  id: giovanni-adorni
-- canonical: {first: Geert, last: Adriaens}
-  id: geert-adriaens
-- canonical: {first: Itziar, last: Aduriz}
-  id: itziar-aduriz
-- canonical: {first: Rodrigo, last: Agerri}
-  id: rodrigo-agerri
-- canonical: {first: Eneko, last: Agirre}
-  id: eneko-agirre
-- canonical: {first: Željko, last: Agić}
-  variants:
-  - {first: Zeljko, last: Agic}
-- canonical: {first: Shyam Sundar, last: Agrawal}
-  variants:
-  - {first: Shyam, last: Agrawal}
-- canonical: {first: David W., last: Aha}
-  variants:
-  - {first: David, last: Aha}
-- canonical: {first: Thomas, last: Ahlswede}
-  variants:
-  - {first: Thomas E., last: Ahlswede}
-- canonical: {first: Elisabeth, last: Ahlsén}
-  variants:
-  - {first: Elisabeth, last: Ahlsen}
-- canonical: {first: Faisal, last: Ahmad}
-  variants:
-  - {first: Faisal, last: Ahmed}
-- canonical: {first: Wasi, last: Ahmad}
-  variants:
-  - {first: Wasi Uddin, last: Ahmad}
-- canonical: {first: Tafseer, last: Ahmed}
-  variants:
-  - {first: Tafseer, last: Ahmed Khan}
-- canonical: {first: Byung-Gyu, last: Ahn}
-  variants:
-  - {first: Byung Gyu, last: Ahn}
-- canonical: {first: Gregory, last: Aist}
-  variants:
-  - {first: Greg, last: Aist}
-- canonical: {first: Salah, last: Ait-Mokhtar}
-  variants:
-  - {first: Salah, last: Aït-Mokhtar}
-- canonical: {first: Akiko, last: Aizawa}
-  variants:
-  - {first: Akiko N., last: Aizawa}
-- canonical: {first: Gianmaria, last: Ajani}
-  id: gianmaria-ajani
-- canonical: {first: Hiroyuki, last: Akama}
-  variants:
-  - {first: Hiroyuki, last: Akam}
-- canonical: {first: Zheng, last: Yuan}
-  comment: Cambridge
-  id: zheng-yuan-cambridge
-  orcid: 0000-0003-2406-1708
-  institution: University of Cambridge
-- canonical: {first: Zheng, last: Yuan}
-  id: zheng-yuan
-  comment: May refer to several people
-- canonical: {first: Mohammad, last: Akbar}
-  id: mohammad-akbar
-- canonical: {first: A., last: Akilandeswari}
-  variants:
-  - {first: Akilandeswari, last: A}
-- canonical: {first: Berfin, last: Aktaş}
-  variants:
-  - {first: Berfin, last: Aktas}
-- canonical: {first: Khalid, last: Al Khatib}
-  variants:
-  - {first: Khalid, last: Al-Khatib}
-- canonical: {first: Mosleh Hmoud, last: Al-Adhaileh}
-  variants:
-  - {first: Mosleh H., last: Al-Adhaileh}
-- canonical: {first: Chau Minh, last: Pham}
-  orcid: 0009-0004-0435-7450
-  institution: University of Maryland
-  variants:
-  - {first: Chau, last: Pham}
-- canonical: {first: Adil, last: Al-Kufaishi}
-  id: adil-al-kufaishi
-- canonical: {first: Yaser, last: Al-Onaizan}
-  variants:
-  - {first: Yaser, last: Al-onaizan}
-- canonical: {first: Amal, last: Al-Saif}
-  variants:
-  - {first: Amal, last: Alsaif}
-- canonical: {first: Vicent, last: Alabau}
-  variants:
-  - {first: Vicente, last: Alabau}
-- canonical: {first: Jesujoba, last: Alabi}
-  variants:
-  - {first: Jesujoba O., last: Alabi}
-  - {first: Jesujoba Oluwadara, last: Alabi}
-- canonical: {first: Danniel Liwanag, last: Alcantara}
-  variants:
-  - {first: Danniel, last: Alcantara}
-- canonical: {first: Izaskun, last: Aldezabal}
-  id: izaskun-aldezabal
-- canonical: {first: Iñaki, last: Alegría}
-  id: inaki-alegria
-  variants:
-  - {first: Iñaki, last: Alegria}
-  - {first: Inaki, last: Alegria}
-- canonical: {first: Beatrice, last: Alex}
-  variants:
-  - {first: Bea, last: Alex}
-- canonical: {first: Zoltán, last: Alexin}
-  id: zoltan-alexin
-- canonical: {first: James, last: Allan}
-  comment: UMass Amherst
-  id: james-allan
-  similar: [james-allen]
-- canonical: {first: James, last: Allen}
-  comment: Rochester
-  id: james-allen
-  similar: [james-allan]
-  variants:
-  - {first: James F., last: Allen}
-- canonical: {first: Jonathan, last: Allen}
-  variants:
-  - {first: Jonathan, last: All}
-- canonical: {first: Hector, last: Allende-Cid}
-  variants:
-  - {first: Héctor, last: Allende}
-  - {first: Héctor, last: Allende-Cid}
-- canonical: {first: Fil, last: Alleva}
-  id: fil-alleva
-  variants:
-  - {first: Fileno, last: Alleva}
-- canonical: {first: José João, last: Almeida}
-  variants:
-  - {first: Jose Joao, last: Almeida}
-- canonical: {first: Miguel B., last: Almeida}
-  variants:
-  - {first: Miguel, last: Almeida}
-- canonical: {first: Huda, last: Almuzaini}
-  id: huda-almuzaini
-- canonical: {first: Jose M., last: Alonso}
-  variants:
-  - {first: Jose, last: Alonso}
-- canonical: {first: Miguel A., last: Alonso}
-  variants:
-  - {first: Miguel, last: Alonso Pardo}
-  - {first: Miguel A., last: Alonso Pardo}
-- canonical: {first: Laura, last: Alonso Alemany}
-  variants:
-  - {first: Laura, last: Alonso i Alemany}
-  - {first: Laura, last: Alonso}
-- canonical: {first: Erick, last: Alphonse}
-  id: erick-alphonse
-- canonical: {first: Hiyan, last: Alshawi}
-  variants:
-  - {first: Hiyan, last: Alsawi}
-- canonical: {first: Romina, last: Altamirano}
-  variants:
-  - {first: Ivana Romina, last: Altamirano}
-- canonical: {first: Mohamed, last: Altantawy}
-  variants:
-  - {first: Mohamed, last: AlTantawy}
-- canonical: {first: Sandra, last: Aluísio}
-  variants:
-  - {first: Sandra Maria, last: Aluísio}
-  - {first: Sandra, last: Aluisio}
-  - {first: Sandra M., last: Aluísio}
-- canonical: {first: Carlos, last: Alzate}
-  variants:
-  - {first: Carlos, last: Alzate Perez}
-- canonical: {first: Diego Raphael, last: Amancio}
-  variants:
-  - {first: Diego, last: Amancio}
-- canonical: {first: Shin-ya, last: Amano}
-  variants:
-  - {first: Sin-ya, last: Amano}
-- canonical: {first: Fredy A., last: Amaya}
-  id: fredy-a-amaya
-- canonical: {first: Juan Carlos, last: Amengual}
-  id: juan-carlos-amengual
-- canonical: {first: Mohamed R., last: Amer}
-  variants:
-  - {first: Mohamed, last: Amer}
-- canonical: {first: Enrique, last: Amigó}
-  variants:
-  - {first: Enrique, last: Amigo}
-- canonical: {first: Massih R., last: Amini}
-  variants:
-  - {first: Massih-Reza, last: Amini}
-- canonical: {first: Reinald Kim, last: Amplayo}
-  variants:
-  - {first: Reinald, last: Kim Amplayo}
-- canonical: {first: Marcelo Adriano, last: Amâncio}
-  variants:
-  - {first: Marcelo, last: Amancio}
-- canonical: {first: Pranav, last: Anand}
-  comment: Dayta AI
-  id: pranav-anand
-  similar: [pranav-a]
-- canonical: {first: Animashree, last: Anandkumar}
-  variants:
-  - {first: Anima, last: Anandkumar}
-- canonical: {first: Øistein E., last: Andersen}
-  variants:
-  - {first: Øistein, last: Andersen}
-- canonical: {first: Andrew J., last: Anderson}
-  variants:
-  - {first: Andrew, last: Anderson}
-- canonical: {first: Anne H., last: Anderson}
-  variants:
-  - {first: Anne, last: Anderson}
-- canonical: {first: Kenneth M., last: Anderson}
-  variants:
-  - {first: Kenneth, last: Anderson}
-  - {first: Ken, last: Anderson}
-- canonical: {first: Tim, last: Anderson}
-  variants:
-  - {first: Timothy, last: Anderson}
-- canonical: {first: Winston N, last: Anderson}
-  variants:
-  - {first: Winston, last: Anderson}
-- canonical: {first: Shinichi, last: Ando}
-  variants:
-  - {first: Sinichi, last: Ando}
-  - {first: Shin-ichi, last: Ando}
-  - {first: Shin-Ichi, last: Ando}
-- canonical: {first: Elisabeth, last: Andre}
-  variants:
-  - {first: Elisabeth, last: André}
-- canonical: {first: Alexandre, last: Andreewsky}
-  comment: LIMSI
-  id: alexandre-andreewsky
-  similar: [alexander-andreyewsky]
-- canonical: {first: Alexander, last: Andreyewsky}
-  comment: IBM
-  id: alexander-andreyewsky
-  similar: [alexandre-andreewsky]
-- canonical: {first: Peter, last: Anick}
-  variants:
-  - {first: Peter G., last: Anick}
-- canonical: {first: Olatz, last: Ansa}
-  id: olatz-ansa
-- canonical: {first: Georges, last: Antoniadis}
-  id: georges-antoniadis
-- canonical: {first: Juliano D., last: Antonio}
-  id: juliano-d-antonio
-- canonical: {first: Waqas, last: Anwar}
-  variants:
-  - {first: Muhammad Waqas, last: Anwar}
-- canonical: {first: Douglas, last: Appelt}
-  variants:
-  - {first: Douglas E., last: Appelt}
-  - {first: Doug, last: Appelt}
-- canonical: {first: Noriko H., last: Arai}
-  variants:
-  - {first: Noriko, last: Arai}
-- canonical: {first: Kenji, last: Araki}
-  id: kenji-araki
-- canonical: {first: Masahiro, last: Araki}
-  id: masahiro-araki
-- canonical: {first: Mihael, last: Arcan}
-  variants:
-  - {first: Mihael, last: Arčan}
-- canonical: {first: Nikolay, last: Arefyev}
-  variants:
-  - {first: Nikolay, last: Arefiev}
-- canonical: {first: Nerea, last: Areta}
-  id: nerea-areta
-- canonical: {first: Susan, last: Armstrong}
-  id: susan-armstrong
-  variants:
-  - {first: Susan, last: Warwick-Armstrong}
-  - {first: Susan, last: Warwick}
-- canonical: {first: Alan R., last: Aronson}
-  variants:
-  - {first: Alan, last: Aronson}
-- canonical: {first: Xabier, last: Arregi}
-  id: xabier-arregi
-- canonical: {first: Jose Mari, last: Arriola}
-  id: jose-mari-arriola
-- canonical: {first: Núria, last: Artigas}
-  id: nuria-artigas
-- canonical: {first: Xabier, last: Artola}
-  id: xabier-artola
-- canonical: {first: Kavosh, last: Asadi Atui}
-  variants:
-  - {first: Kavosh, last: Asadi}
-- canonical: {first: Noushin Rezapour, last: Asheghi}
-  variants:
-  - {first: Noushin, last: Rezapour Asheghi}
-- canonical: {first: Nicholas, last: Asher}
-  variants:
-  - {first: Nicolas, last: Asher}
-- canonical: {first: Kevin D., last: Ashley}
-  variants:
-  - {first: Kevin, last: Ashley}
-- canonical: {first: Àlex R., last: Atrio}
-  variants:
-  - {first: Àlex, last: Atrio}
-- canonical: {first: Jordi, last: Atserias}
-  id: jordi-atserias
-  variants:
-  - {first: Jordi, last: Atserias Batalla}
-- canonical: {first: Mohamed, last: Attia}
-  id: mohamed-attia
-- canonical: {first: Eric, last: Atwell}
-  variants:
-  - {first: Eric Steven, last: Atwell}
-  - {first: Eric S., last: Atwell}
-- canonical: {first: Steve, last: Austin}
-  id: steve-austin
-- canonical: {first: Luciana Beatriz, last: Avila}
-  variants:
-  - {first: Luciana Beatriz, last: Ávila}
-  - {first: Luciana, last: Ávila}
-- canonical: {first: Aiti, last: Aw}
-  variants:
-  - {first: AiTi, last: Aw}
-  - {first: Ai Ti, last: Aw}
-- canonical: {first: Christelle, last: Ayache}
-  id: christelle-ayache
-- canonical: {first: Necip Fazil, last: Ayan}
-  variants:
-  - {first: Necip, last: Fazil Ayan}
-- canonical: {first: Damaris, last: Ayuso}
-  id: damaris-ayuso
-  variants:
-  - {first: Damaris M., last: Ayuso}
-- canonical: {first: Saliha, last: Azzam}
-  id: saliha-azzam
-- canonical: {first: Harald, last: Baayen}
-  variants:
-  - {first: R. Harald, last: Baayen}
-- canonical: {first: Ismail, last: Babaoğlu}
-  variants:
-  - {first: Ismail, last: Babaoglu}
-- canonical: {first: Ciprian, last: Bacalu}
-  id: ciprian-bacalu
-- canonical: {first: Ngo Xuan, last: Bach}
-  variants:
-  - {first: Ngo, last: Xuan Bach}
-- canonical: {first: Joan, last: Bachenko}
-  id: joan-bachenko
-- canonical: {first: Daniel, last: Bachut}
-  id: daniel-bachut
-- canonical: {first: Brett W., last: Bader}
-  variants:
-  - {first: Brett, last: Bader}
-- canonical: {first: Adriana, last: Badulescu}
-  variants:
-  - {first: Adriana, last: Bădulescu}
-- canonical: {first: Hee-Sook, last: Bae}
-  variants:
-  - {first: Hee Sook, last: Bae}
-- canonical: {first: Erik, last: Baert}
-  id: erik-baert
-- canonical: {first: Mirko, last: Baglioni}
-  id: mirko-baglioni
-- canonical: {first: Jeanne, last: Baguenier Desormeaux}
-  variants:
-  - {first: Jeanne, last: Baguenier-Desormeaux}
-- canonical: {first: Lalit R., last: Bahl}
-  id: lalit-r-bahl
-- canonical: {first: Mohammad, last: Bahrani}
-  id: mohammad-bahrani
-- canonical: {first: Ruzena, last: Bajcsy}
-  id: ruzena-bajcsy
-- canonical: {first: Ondřej, last: Bajgar}
-  variants:
-  - {first: Ondrej, last: Bajgar}
-- canonical: {first: Stylianos, last: Bakamidis}
-  id: stylianos-bakamidis
-- canonical: {first: Collin F., last: Baker}
-  variants:
-  - {first: Collin, last: Baker}
-- canonical: {first: George, last: Baker}
-  id: george-baker
-  variants:
-  - {first: George Arthur, last: Baker}
-- canonical: {first: James, last: Baker}
-  variants:
-  - {first: James K., last: Baker}
-- canonical: {first: Janet, last: Baker}
-  variants:
-  - {first: Janet M., last: Baker}
-- canonical: {first: Kathryn, last: Baker}
-  variants:
-  - {first: Kathryn L., last: Baker}
-- canonical: {first: Pedro, last: Balage Filho}
-  variants:
-  - {first: Pedro, last: Balage}
-  - {first: Pedro Paulo, last: Balage Filho}
-  - {first: Pedro P. Balage, last: Filho}
-  - {first: Pedro, last: Filho}
-- canonical: {first: Alexandra, last: Balahur}
-  variants:
-  - {first: Alexandra, last: Balahur-Dobrescu}
-- canonical: {first: Timothy, last: Baldwin}
-  variants:
-  - {first: Tim, last: Baldwin}
-- canonical: {first: Catherine N., last: Ball}
-  variants:
-  - {first: Catherine, last: Ball}
-- canonical: {first: Bruce W., last: Ballard}
-  id: bruce-w-ballard
-  variants:
-  - {first: Bruce, last: Ballard}
-- canonical: {first: Rafael E., last: Banchs}
-  variants:
-  - {first: Rafael, last: Banchs}
-- canonical: {first: Sivaji, last: Bandyopadhyay}
-  variants:
-  - {first: Sivaji, last: Bandopadhyay}
-  - {first: Sivaju, last: Bandyopadhyay}
-  - {first: Sivaji, last: B}
-- canonical: {first: Eduardo R., last: Banga}
-  id: eduardo-r-banga
-  variants:
-  - {first: Eduardo, last: R. Banga}
-  - {first: Eduardo Rodríguez, last: Banga}
-- canonical: {first: Srinivas, last: Bangalore}
-  variants:
-  - {first: B., last: Srinivas}
-  - {first: '', last: Srinivas}
-- canonical: {first: Forrest, last: Bao}
-  variants:
-  - {first: Forrest Sheng, last: Bao}
-- canonical: {first: Petra, last: Barancikova}
-  variants:
-  - {first: Petra, last: Barančíková}
-- canonical: {first: Cătălina, last: Barbu}
-  id: catalina-barbu
-  variants:
-  - {first: Catalina, last: Barbu}
-- canonical: {first: Verginica, last: Barbu Mititelu}
-  variants:
-  - {first: Verginica Barbu, last: Mititelu}
-- canonical: {first: Anup, last: Barman}
-  variants:
-  - {first: Anup Kr., last: Barman}
-- canonical: {first: John, last: Barnden}
-  id: john-barnden
-  variants:
-  - {first: John A., last: Barnden}
-- canonical: {first: Marco, last: Baroni}
-  id: marco-baroni
-- canonical: {first: Roberto, last: Barra-Chicote}
-  variants:
-  - {first: Roberto Barra, last: Chicote}
-- canonical: {first: Sergio, last: Barrachina}
-  id: sergio-barrachina
-- canonical: {first: Claude, last: Barras}
-  id: claude-barras
-- canonical: {first: Caroline, last: Barriere}
-  variants:
-  - {first: Caroline, last: Barrière}
-- canonical: {first: Chris, last: Barry}
-  id: chris-barry
-- canonical: {first: Valentina, last: Bartalesi Lenzi}
-  id: valentina-bartalesi-lenzi
-- canonical: {first: François, last: Barthélemy}
-  variants:
-  - {first: Francois, last: Barthelemy}
-- canonical: {first: G. Edward, last: Barton}
-  variants:
-  - {first: G. Edward, last: 'Barton, Jr.'}
-- canonical: {first: Guntis, last: Barzdins}
-  variants:
-  - {first: Guntis, last: Bārzdiņš}
-- canonical: {first: Karine, last: Baschung}
-  id: karine-baschung
-- canonical: {first: Roberto, last: Basili}
-  id: roberto-basili
-- canonical: {first: Colin, last: Batchelor}
-  variants:
-  - {first: Colin R., last: Batchelor}
-- canonical: {first: John, last: Bateman}
-  variants:
-  - {first: John A., last: Bateman}
-- canonical: {first: Madeleine, last: Bates}
-  id: madeleine-bates
-  variants:
-  - {first: Madeline, last: Bates}
-- canonical: {first: Riza Theresa, last: Batista-Navarro}
-  variants:
-  - {first: Riza, last: Batista-Navarro}
-- canonical: {first: Anton, last: Batliner}
-  id: anton-batliner
-- canonical: {first: Istvan, last: Batori}
-  id: istvan-batori
-- canonical: {first: Marco, last: Battista}
-  id: marco-battista
-- canonical: {first: William A., last: 'Baumgartner, Jr.'}
-  variants:
-  - {first: William A., last: Baumgartner Jr.}
-  - {first: William A., last: Baumgartner}
-  - {first: William, last: Baumgartner}
-  - {first: William, last: Baumgartner Jr.}
-- canonical: {first: Samuel, last: Bayer}
-  variants:
-  - {first: Sam, last: Bayer}
-- canonical: {first: Andrew David, last: Beale}
-  variants:
-  - {first: Andrew, last: David}
-- canonical: {first: David L., last: Bean}
-  variants:
-  - {first: David, last: Bean}
-- canonical: {first: John, last: Bear}
-  id: john-bear
-- canonical: {first: Hannah, last: Bechara}
-  variants:
-  - {first: Hanna, last: Béchara}
-  - {first: Hanna, last: Bechara}
-  - {first: Hannah, last: Béchara}
-- canonical: {first: Frederic, last: Bechet}
-  id: frederic-bechet
-  variants:
-  - {first: Frédéric, last: Bechét}
-  - {first: Frédéric, last: Béchet}
-  - {first: Frederic, last: Béchet}
-- canonical: {first: Chedi, last: Bechikh Ali}
-  variants:
-  - {first: Chedi, last: Bechikh}
-- canonical: {first: Daniel, last: Beck}
-  variants:
-  - {first: Daniel Emilio, last: Beck}
-- canonical: {first: Lee, last: Becker}
-  variants:
-  - {first: Lee A., last: Becker}
-- canonical: {first: Russell, last: Beckley}
-  variants:
-  - {first: Russ, last: Beckley}
-- canonical: {first: Paul, last: Bedaride}
-  variants:
-  - {first: Paul, last: Bédaride}
-- canonical: {first: Cosmin Adrian, last: Bejan}
-  variants:
-  - {first: Cosmin, last: Adrian Bejan}
-  - {first: Cosmin, last: Bejan}
-- canonical: {first: Núria, last: Bel}
-  variants:
-  - {first: Nuria, last: Bel}
-- canonical: {first: Gemma, last: Bel-Enguix}
-  variants:
-  - {first: Gemma Bel, last: Enguix}
-  - {first: Gemma, last: Bel Enguix}
-- canonical: {first: Julie, last: Belião}
-  variants:
-  - {first: Julie, last: Beliao}
-- canonical: {first: Narjès, last: Bellamine Ben Saoud}
-  variants:
-  - {first: Narjès Bellamine Ben, last: Saoud}
-- canonical: {first: Patrice, last: Bellot}
-  id: patrice-bellot
-- canonical: {first: Valérie, last: Bellynck}
-  variants:
-  - {first: Valerie, last: Bellynck}
-- canonical: {first: Islam, last: Beltagy}
-  id: islam-beltagy
-- canonical: {first: Robert S., last: Belvin}
-  variants:
-  - {first: Robert, last: Belvin}
-  - {first: Robert S., last: Melvin}
-- canonical: {first: Anja, last: Belz}
-  variants:
-  - {first: Anya, last: Belz}
-- canonical: {first: Roni, last: Ben Aharon}
-  variants:
-  - {first: Roni, last: Ben-Aharon}
-- canonical: {first: Abdelmajid, last: Ben Hamadou}
-  variants:
-  - {first: Abdelmajid, last: Ben hamadou}
-  - {first: Abdelmajid, last: Benhamadou}
-  - {first: Abdelmajid-Lin, last: Ben Hamadou}
-- canonical: {first: Abderrahim, last: Benabbou}
-  id: abderrahim-benabbou
-- canonical: {first: Farah, last: Benamara}
-  variants:
-  - {first: Farah, last: Beanamara}
-  - {first: Farah, last: Benamara Zitoune}
-- canonical: {first: Chomicha, last: Bendahman}
-  id: chomicha-bendahman
-- canonical: {first: Emily M., last: Bender}
-  variants:
-  - {first: Emily, last: Bender}
-- canonical: {first: José-Miguel, last: Benedí}
-  id: jose-miguel-benedi
-  variants:
-  - {first: Jose-Miguel, last: Benedi}
-  - {first: José Miguel, last: Benedí}
-  - {first: José Miguel, last: Benedí Ruíz}
-  - {first: José-M., last: Benedí}
-  - {first: José Miguel, last: Benedi Ruiz}
-  - {first: José-Miguel, last: Benedí Ruíz}
-- canonical: {first: Simon, last: Benigeri}
-  id: simon-benigeri
-  variants:
-  - {first: Simon, last: Ben Igeri}
-- canonical: {first: Andrew, last: Bennett}
-  id: andrew-bennett
-  similar: [andrew-bennetts]
-- canonical: {first: Paul, last: Bennett}
-  variants:
-  - {first: Paul N., last: Bennett}
-- canonical: {first: Andrew, last: Bennetts}
-  id: andrew-bennetts
-  similar: [andrew-bennett]
-- canonical: {first: Alexander, last: Berg}
-  variants:
-  - {first: Alex, last: Berg}
-  - {first: Alexander C, last: Berg}
-- canonical: {first: Tamara, last: Berg}
-  variants:
-  - {first: Tamara L., last: Berg}
-  - {first: Tamara L, last: Berg}
-- canonical: {first: Carole, last: Bergamini}
-  id: carole-bergamini
-- canonical: {first: Adam, last: Berger}
-  variants:
-  - {first: Adam L., last: Berger}
-- canonical: {first: Maria, last: Berger}
-  variants:
-  - {first: Maria, last: Moritz}
-- canonical: {first: Raffaella, last: Bernardi}
-  id: raffaella-bernardi
-- canonical: {first: Niels Ole, last: Bernsen}
-  variants:
-  - {first: Niels Ole, last: Bernse}
-  - {first: Niels O., last: Bernsen}
-- canonical: {first: Elisa, last: Bertino}
-  id: elisa-bertino
-- canonical: {first: Núria, last: Bertomeu}
-  variants:
-  - {first: Nuria, last: Bertomeu}
-  - {first: Núria, last: Bertomeu Castelló}
-  - {first: Núria Bertomeu, last: Castelló}
-- canonical: {first: Robert C., last: Berwick}
-  variants:
-  - {first: Robert, last: Berwick}
-  - {first: Robert Cregar, last: Berwick}
-- canonical: {first: Gabriel G., last: Bes}
-  id: gabriel-g-bes
-  variants:
-  - {first: Gabriel G., last: Bès}
-  - {first: Gabriel, last: Bès}
-- canonical: {first: Laurent, last: Besacier}
-  id: laurent-besacier
-- canonical: {first: Štefan, last: Beňuš}
-  variants:
-  - {first: Stefan, last: Benus}
-  - {first: S̆tefan, last: Ben̆us̆}
-- canonical: {first: Akshar, last: Bharati}
-  variants:
-  - {first: Akshar, last: Bharathi}
-- canonical: {first: Irshad, last: Bhat}
-  variants:
-  - {first: Irshad A., last: Bhat}
-- canonical: {first: Rajesh, last: Bhat}
-  id: rajesh-bhat
-  similar: [rajesh-bhatt]
-- canonical: {first: Riyaz Ahmad, last: Bhat}
-  variants:
-  - {first: Riyaz A., last: Bhat}
-- canonical: {first: Rajesh, last: Bhatt}
-  comment: UMass Amherst
-  id: rajesh-bhatt
-  similar: [rajesh-bhat]
-- canonical: {first: Pushpak, last: Bhattacharyya}
-  variants:
-  - {first: Pushpak, last: Bhattacharya}
-- canonical: {first: Virendrakumar, last: Bhavsar}
-  variants:
-  - {first: Virendra, last: Bhavsar}
-- canonical: {first: Plaban Kr., last: Bhowmick}
-  variants:
-  - {first: Plaban, last: Bhowmick}
-- canonical: {first: Ergun, last: Bicici}
-  variants:
-  - {first: Ergun, last: Biçici}
-- canonical: {first: Eckhard, last: Bick}
-  id: eckhard-bick
-- canonical: {first: Timothy W., last: Bickmore}
-  variants:
-  - {first: Timothy, last: Bickmore}
-- canonical: {first: Chris, last: Biemann}
-  variants:
-  - {first: Christian, last: Biemann}
-- canonical: {first: Janusz Stanisław, last: Bien}
-  variants:
-  - {first: Janusz Stanislaw, last: Bien}
-  - {first: Janusz S., last: Bień}
-  - {first: Janusz S., last: Bien}
-- canonical: {first: Marie A., last: Bienkowski}
-  variants:
-  - {first: Marie, last: Bienkowski}
-- canonical: {first: Alan W., last: Biermann}
-  id: alan-w-biermann
-  variants:
-  - {first: Alan, last: Biermann}
-- canonical: {first: Jeffrey P., last: Bigham}
-  variants:
-  - {first: Jeffrey, last: Bigham}
-- canonical: {first: Daniel M., last: Bikel}
-  variants:
-  - {first: Daniel, last: Bikel}
-  - {first: Dan, last: Bikel}
-- canonical: {first: Dimitrios, last: Bilidas}
-  variants:
-  - {first: Dimitris, last: Bilidas}
-- canonical: {first: Eric, last: Bilinski}
-  variants:
-  - {first: Éric, last: Bilinski}
-- canonical: {first: Mokhtar B., last: Billami}
-  variants:
-  - {first: Mokhtar-Boumedyen, last: Billami}
-- canonical: {first: Jeff, last: Bilmes}
-  variants:
-  - {first: Jeff A., last: Bilmes}
-- canonical: {first: Matthew W., last: Bilotti}
-  variants:
-  - {first: Matthew, last: Bilotti}
-- canonical: {first: Milan, last: Bily}
-  variants:
-  - {first: Milan, last: Bílý}
-- canonical: {first: Diana, last: Binnenpoorte}
-  id: diana-binnenpoorte
-- canonical: {first: Elizabeth, last: Bishop}
-  id: elizabeth-bishop
-- canonical: {first: Alan W., last: Black}
-  id: alan-w-black
-  variants:
-  - {first: Alan, last: Black}
-  - {first: Alan W, last: Black}
-- canonical: {first: Ezra, last: Black}
-  id: ezra-black
-  variants:
-  - {first: Ezra W., last: Black}
-- canonical: {first: Lois M., last: Black}
-  variants:
-  - {first: Lois, last: Black}
-- canonical: {first: William J., last: Black}
-  id: william-j-black
-  variants:
-  - {first: William J, last: Black}
-  - {first: William, last: Black}
-- canonical: {first: Frédéric, last: Blain}
-  variants:
-  - {first: Frederic, last: Blain}
-- canonical: {first: Hervé, last: Blanchon}
-  variants:
-  - {first: Herve, last: Blanchon}
-- canonical: {first: Christian, last: Blaschke}
-  id: christian-blaschke
-- canonical: {first: Nate, last: Blaylock}
-  id: nate-blaylock
-- canonical: {first: David, last: Blei}
-  variants:
-  - {first: David M., last: Blei}
-- canonical: {first: Hatte, last: Blejer}
-  variants:
-  - {first: Hatte R., last: Blejer}
-- canonical: {first: André, last: Blessing}
-  variants:
-  - {first: Andre, last: Blessing}
-- canonical: {first: Hans Ulrich, last: Block}
-  variants:
-  - {first: Hans-Ulrich, last: Block}
-- canonical: {first: Marsden S., last: Blois}
-  id: marsden-s-blois
-- canonical: {first: Phil, last: Blunsom}
-  variants:
-  - {first: Philip, last: Blunsom}
-- canonical: {first: Tamara, last: Bobić}
-  variants:
-  - {first: Tamara, last: Bobic}
-- canonical: {first: Daniel, last: Bobrow}
-  variants:
-  - {first: Daniel G., last: Bobrow}
-- canonical: {first: Robert, last: Bobrow}
-  id: robert-bobrow
-  variants:
-  - {first: Robert J., last: Bobrow}
-  - {first: Rusty, last: Bobrow}
-- canonical: {first: Péter Pál, last: Boda}
-  variants:
-  - {first: Péter, last: Boda}
-- canonical: {first: Adams B., last: Bodomo}
-  variants:
-  - {first: Adams, last: Bodomo}
-- canonical: {first: Guido, last: Boella}
-  id: guido-boella
-- canonical: {first: Katharina, last: Boesefeldt}
-  id: katharina-boesefeldt
-- canonical: {first: Christopher, last: Bogart}
-  variants:
-  - {first: Chris, last: Bogart}
-- canonical: {first: Branimir, last: Boguraev}
-  id: branimir-boguraev
-  variants:
-  - {first: Branimir K., last: Boguraev}
-  - {first: Bran, last: Boguraev}
-- canonical: {first: Igor, last: Boguslavsky}
-  variants:
-  - {first: Igor M., last: Boguslavsky}
-- canonical: {first: Dan, last: Bohus}
-  id: dan-bohus
-  variants:
-  - {first: Dan, last: Bohuş}
-- canonical: {first: Sean, last: Boisen}
-  id: sean-boisen
-- canonical: {first: Christian, last: Boitet}
-  id: christian-boitet
-- canonical: {first: Loic, last: Boizou}
-  variants:
-  - {first: Loïc, last: Boizou}
-- canonical: {first: Ondřej, last: Bojar}
-  variants:
-  - {first: Ondrej, last: Bojar}
-- canonical: {first: Julie E., last: Boland}
-  variants:
-  - {first: Julie, last: Boland}
-- canonical: {first: Daniel, last: Bolaños}
-  variants:
-  - {first: Daniel, last: Bolanos}
-- canonical: {first: Gemma, last: Boleda}
-  variants:
-  - {first: Gemma, last: Boleda Torrent}
-- canonical: {first: Andrea, last: Bolognesi}
-  id: andrea-bolognesi
-- canonical: {first: Igor A., last: Bolshakov}
-  variants:
-  - {first: Igor, last: Bolshakov}
-- canonical: {first: Antonio, last: Bonafonte}
-  id: antonio-bonafonte
-- canonical: {first: Jean-François, last: Bonastre}
-  id: jean-francois-bonastre
-  variants:
-  - {first: Jean-Francois, last: Bonastre}
-- canonical: {first: Guillaume, last: Bonfante}
-  variants:
-  - {first: Guillame, last: Bonfante}
-- canonical: {first: Claire, last: Bonial}
-  variants:
-  - {first: Claire N., last: Bonial}
-- canonical: {first: Marco Aldo Piccolino, last: Boniforti}
-  variants:
-  - {first: Marco Aldo, last: Piccolino Boniforti}
-- canonical: {first: Hélène, last: Bonneau-Maynard}
-  id: helene-bonneau-maynard
-  variants:
-  - {first: Hélène, last: Maynard}
-- canonical: {first: Kalina, last: Bontcheva}
-  id: kalina-bontcheva
-  similar: [katina-bontcheva]
-- canonical: {first: Katina, last: Bontcheva}
-  id: katina-bontcheva
-  similar: [kalina-bontcheva]
-- canonical: {first: German, last: Bordel}
-  id: german-bordel
-  variants:
-  - {first: Germán, last: Bordel}
-- canonical: {first: Emanuela, last: Boroş}
-  variants:
-  - {first: Emanuela, last: Boroș}
-  - {first: Emanuela, last: Boros}
-- canonical: {first: Tiberiu, last: Boroş}
-  variants:
-  - {first: Tiberiu, last: Boroș}
-  - {first: Tiberiu, last: Boros}
-- canonical: {first: Sonja, last: Bosch}
-  variants:
-  - {first: Sonja E., last: Bosch}
-- canonical: {first: Matko, last: Bosnjak}
-  variants:
-  - {first: Matko, last: Bošnjak}
-- canonical: {first: Elizabeth C., last: Botha}
-  id: elizabeth-c-botha
-- canonical: {first: Alexandre, last: Bouchard-Côté}
-  variants:
-  - {first: Alexandre, last: Bouchard}
-- canonical: {first: Abdessalam, last: Bouchekif}
-  variants:
-  - {first: Abdesselam, last: Bouchekif}
-- canonical: {first: Mohamed Mahdi, last: Boudabous}
-  variants:
-  - {first: Mohamed, last: Boudabous}
-- canonical: {first: Aicha, last: Bouhjar}
-  variants:
-  - {first: Aïcha, last: Bouhjar}
-- canonical: {first: Pierrette, last: Bouillon}
-  id: pierrette-bouillon
-- canonical: {first: Philippe, last: Boula de Mareüil}
-  id: philippe-boula-de-mareuil
-  variants:
-  - {first: Philippe Boula, last: de Mareüil}
-- canonical: {first: Gilles, last: Boulianne}
-  id: gilles-boulianne
-- canonical: {first: Paolo, last: Bouquet}
-  id: paolo-bouquet
-- canonical: {first: Laurent, last: Bourbeau}
-  id: laurent-bourbeau
-- canonical: {first: Caroline, last: Bousquet-Vernhettes}
-  variants:
-  - {first: Caroline, last: Bousquet}
-- canonical: {first: Lou, last: Boves}
-  variants:
-  - {first: Louis, last: Boves}
-- canonical: {first: Samuel, last: Bowman}
-  variants:
-  - {first: Samuel R., last: Bowman}
-  - {first: Sam, last: Bowman}
-- canonical: {first: Stephen, last: Boxwell}
-  variants:
-  - {first: Stephen A., last: Boxwell}
-- canonical: {first: Richard D., last: Boyce}
-  variants:
-  - {first: Richard, last: Boyce}
-- canonical: {first: Andrew, last: Boyd}
-  variants:
-  - {first: Andrew D., last: Boyd}
-- canonical: {first: Amber, last: Boydstun}
-  variants:
-  - {first: Amber E., last: Boydstun}
-- canonical: {first: Kristy, last: Boyer}
-  variants:
-  - {first: Kristy Elizabeth, last: Boyer}
-- canonical: {first: Cem, last: Bozsahin}
-  variants:
-  - {first: Cem, last: Bozşahin}
-  - {first: H. Cem, last: Bozsahin}
-- canonical: {first: Olivier, last: Boëffard}
-  variants:
-  - {first: Olivier, last: Boeffard}
-- canonical: {first: Lisa, last: Braden-Harder}
-  variants:
-  - {first: Lisa C., last: Braden-Harder}
-- canonical: {first: Deborah, last: Brady}
-  variants:
-  - {first: Deb, last: Brady}
-- canonical: {first: Annelies, last: Braffort}
-  id: annelies-braffort
-- canonical: {first: S.R.K., last: Branavan}
-  variants:
-  - {first: S. R. K., last: Branavan}
-- canonical: {first: António, last: Branco}
-  variants:
-  - {first: Antonio, last: Branco}
-  - {first: Antonio H., last: Branco}
-  - {first: António Horta, last: Branco}
-- canonical: {first: Andrew, last: Brasher}
-  id: andrew-brasher
-- canonical: {first: Harry, last: Bratt}
-  id: harry-bratt
-- canonical: {first: Adrian, last: Braşoveanu}
-  variants:
-  - {first: Adrian, last: Brasoveanu}
-- canonical: {first: Eric, last: Breck}
-  variants:
-  - {first: Eric J., last: Breck}
-- canonical: {first: D. S., last: Bree}
-  variants:
-  - {first: D.S., last: Bree}
-- canonical: {first: Jason, last: Brenier}
-  variants:
-  - {first: Jason M., last: Brenier}
-- canonical: {first: Susan E., last: Brennan}
-  variants:
-  - {first: Susan, last: Brennan}
-- canonical: {first: Xavier, last: Briffault}
-  id: xavier-briffault
-- canonical: {first: Ted, last: Briscoe}
-  id: ted-briscoe
-  variants:
-  - {first: Edward, last: Briscoe}
-- canonical: {first: George Aaron, last: Broadwell}
-  variants:
-  - {first: Aaron, last: Broadwell}
-  - {first: G. Aaron, last: Broadwell}
-- canonical: {first: Daan, last: Broeder}
-  id: daan-broeder
-- canonical: {first: Michael K., last: Brown}
-  variants:
-  - {first: Michael, last: Brown}
-- canonical: {first: Peter F., last: Brown}
-  id: peter-f-brown
-- canonical: {first: Ralf D., last: Brown}
-  variants:
-  - {first: Ralf, last: Brown}
-- canonical: {first: Susan Windisch, last: Brown}
-  variants:
-  - {first: Susan, last: Windisch Brown}
-  - {first: Susan, last: Brown}
-  - {first: Susan W., last: Brown}
-- canonical: {first: Rebecca, last: Bruce}
-  variants:
-  - {first: Rebecca F., last: Bruce}
-- canonical: {first: Hennie, last: Brugman}
-  id: hennie-brugman
-- canonical: {first: Ernst, last: Buchberger}
-  id: ernst-buchberger
-- canonical: {first: Chris, last: Buckley}
-  id: chris-buckley
-- canonical: {first: Sven, last: Buechel}
-  variants:
-  - {first: Sven, last: Büchel}
-- canonical: {first: Alberto, last: Bugarín Diz}
-  variants:
-  - {first: Alberto, last: Bugarín}
-  - {first: Alberto, last: Bugarin}
-- canonical: {first: Trung, last: Bui}
-  variants:
-  - {first: Trung H., last: Bui}
-- canonical: {first: Florin, last: Bulgarov}
-  variants:
-  - {first: Florin Adrian, last: Bulgarov}
-- canonical: {first: Barbara, last: Bullock}
-  variants:
-  - {first: Barbara E., last: Bullock}
-- canonical: {first: Razvan, last: Bunescu}
-  variants:
-  - {first: Razvan C., last: Bunescu}
-- canonical: {first: Harry, last: Bunt}
-  id: harry-bunt
-- canonical: {first: Laura, last: Burdick}
-  variants:
-  - {first: Laura, last: Wendlandt}
-- canonical: {first: Gaston, last: Burek}
-  variants:
-  - {first: Gaston G., last: Burek}
-- canonical: {first: Clint, last: Burfoot}
-  variants:
-  - {first: Clinton, last: Burfoot}
-- canonical: {first: John D., last: Burger}
-  comment: MITRE
-  id: john-d-burger
-  similar: [john-f-burger]
-- canonical: {first: John F., last: Burger}
-  comment: System Development Corporation
-  id: john-f-burger
-  similar: [john-d-burger]
-- canonical: {first: Christopher J.C., last: Burges}
-  variants:
-  - {first: Chris J.C., last: Burges}
-- canonical: {first: Diego A., last: Burgos}
-  variants:
-  - {first: Diego, last: Burgos}
-- canonical: {first: Bianka, last: Buschbeck}
-  id: bianka-buschbeck
-  variants:
-  - {first: Bianka, last: Buschbeck-Wolf}
-- canonical: {first: Andrei, last: Butnaru}
-  variants:
-  - {first: Andrei M., last: Butnaru}
-- canonical: {first: Bill, last: Byrne}
-  comment: University of Cambridge
-  id: bill-byrne
-- canonical: {first: Bill, last: Byrne}
-  comment: UCSD Ph.d; https://www.linkedin.com/in/billb/
-  id: bill-byrne-ucsd
-- canonical: {first: Donna, last: Byron}
-  id: donna-byron
-  variants:
-  - {first: Donna K., last: Byron}
-- canonical: {first: Tamás, last: Bíró}
-  variants:
-  - {first: Tamás, last: Biró}
-- canonical: {first: Benjamin, last: Börschinger}
-  variants:
-  - {first: Benjamin, last: Boerschinger}
-- canonical: {first: Kenneth S., last: Bøgh}
-  variants:
-  - {first: Kenneth, last: Bøgh}
-- canonical: {first: Alena, last: Bŏhmová}
-  variants:
-  - {first: Alena, last: Bohmova}
-  - {first: Alena, last: Böhmová}
-- canonical: {first: Sheila, last: C. M. de Sousa}
-  variants:
-  - {first: Sheila C.M., last: de Sousa}
-- canonical: {first: José G., last: C. de Souza}
-  variants:
-  - {first: José G.C., last: de Souza}
-  - {first: Jose G.C., last: de Souza}
-  - {first: José Guilherme, last: Camargo de Souza}
-  - {first: José G., last: Camargo de Souza}
-  - {first: José Guilherme, last: C. de Souza}
-- canonical: {first: Malarkodi, last: C.S.}
-  variants:
-  - {first: Malarkodi, last: C.S}
-  - {first: CS., last: Malarkodi}
-- canonical: {first: Joao Paulo, last: Cabral}
-  variants:
-  - {first: João P., last: Cabral}
-- canonical: {first: Luís Miguel, last: Cabral}
-  variants:
-  - {first: Luís, last: Cabral}
-- canonical: {first: Luis-Adrián, last: Cabrera-Diego}
-  variants:
-  - {first: Luis Adrián, last: Cabrera-Diego}
-- canonical: {first: Maria Teresa, last: Cabré}
-  id: maria-teresa-cabre
-  variants:
-  - {first: M. Teresa, last: Cabré}
-  - {first: Teresa, last: Cabré}
-- canonical: {first: Whitney L., last: Cade}
-  variants:
-  - {first: Whitney, last: Cade}
-- canonical: {first: Anais, last: Cadilhac}
-  variants:
-  - {first: Anaïs, last: Cadilhac}
-- canonical: {first: Michael J., last: Cafarella}
-  variants:
-  - {first: Michael, last: Cafarella}
-- canonical: {first: Lynne, last: Cahill}
-  id: lynne-cahill
-  variants:
-  - {first: Lynne J., last: Cahill}
-- canonical: {first: Dongfeng, last: Cai}
-  variants:
-  - {first: DongFeng, last: Cai}
-- canonical: {first: Jun Fu, last: Cai}
-  variants:
-  - {first: Junfu, last: Cai}
-- canonical: {first: Qingqing, last: Cai}
-  variants:
-  - {first: Qing-qing, last: Cai}
-- canonical: {first: Jo, last: Calder}
-  variants:
-  - {first: Jonathan, last: Calder}
-- canonical: {first: Mary Elaine, last: Califf}
-  id: mary-elaine-califf
-- canonical: {first: Charles B., last: Callaway}
-  variants:
-  - {first: Charles, last: Callaway}
-- canonical: {first: Diego, last: Calvanese}
-  id: diego-calvanese
-- canonical: {first: Nicoletta, last: Calzolari}
-  id: nicoletta-calzolari
-  variants:
-  - {first: Nicoletta Calzolari, last: Zamorani}
-- canonical: {first: Jose, last: Camacho-Collados}
-  variants:
-  - {first: José, last: Camacho-Collados}
-- canonical: {first: Ellen, last: Campana}
-  id: ellen-campana
-- canonical: {first: Joseph P., last: Campbell}
-  variants:
-  - {first: Joseph, last: Campbell}
-- canonical: {first: Francisco, last: Campillo}
-  variants:
-  - {first: Francisco Campillo, last: Díaz}
-- canonical: {first: Doğan, last: Can}
-  variants:
-  - {first: Dogan, last: Can}
-- canonical: {first: Arnaldo, last: 'Candido, Jr.'}
-  variants:
-  - {first: Arnaldo, last: Candido Jr.}
-  - {first: Arnaldo, last: Candido Jr}
-  - {first: Arnaldo, last: Candido}
-- canonical: {first: Marie, last: Candito}
-  variants:
-  - {first: Marie-Helene, last: Candito}
-  - {first: Marie-Hélène, last: Candito}
-- canonical: {first: Amparo Elizabeth, last: Cano Basave}
-  variants:
-  - {first: Amparo Elizabeth, last: Cano-Basave}
-- canonical: {first: Xuan-Nga, last: Cao}
-  variants:
-  - {first: Xuân-Nga, last: Cao}
-  - {first: Xuân-Nga Cao, last: Kam}
-- canonical: {first: Amedeo, last: Cappelli}
-  id: amedeo-cappelli
-- canonical: {first: George, last: Carayannis}
-  id: george-carayannis
-- canonical: {first: José María, last: Carazo}
-  variants:
-  - {first: José-María, last: Carazo}
-- canonical: {first: Sandra, last: Carberry}
-  variants:
-  - {first: M. Sandra, last: Carberry}
-- canonical: {first: Jaime G., last: Carbonell}
-  comment: CMU
-  id: jaime-g-carbonell
-  similar: [jaime-r-carbonell]
-  variants:
-  - {first: Jaime, last: Carbonell}
-  - {first: Jaime G., last: Carbonell Jr}
-- canonical: {first: Jaime R., last: Carbonell}
-  comment: BBN; d. 1973
-  id: jaime-r-carbonell
-  similar: [jaime-g-carbonell]
-- canonical: {first: Antonio, last: Cardenal}
-  variants:
-  - {first: Antonio, last: Cardenal-Lopez}
-- canonical: {first: Claire, last: Cardie}
-  id: claire-cardie
-- canonical: {first: Patrick, last: Cardinal}
-  id: patrick-cardinal
-- canonical: {first: Paula, last: Cardoso}
-  id: paula-cardoso
-  variants:
-  - {first: Paula C. Figueira, last: Cardoso}
-  - {first: Paula C. F., last: Cardoso}
-- canonical: {first: George, last: Caridakis}
-  id: george-caridakis
-- canonical: {first: Kathleen M., last: Carley}
-  variants:
-  - {first: Kathleen, last: Carley}
-- canonical: {first: Mark, last: Carman}
-  variants:
-  - {first: Mark J., last: Carman}
-  - {first: Mark James, last: Carman}
-- canonical: {first: Jorge, last: Carrillo de Albornoz}
-  variants:
-  - {first: Jorge Carrillo, last: de Albornoz}
-- canonical: {first: Jeremy J., last: Carroll}
-  variants:
-  - {first: Jeremy, last: Carroll}
-- canonical: {first: John A., last: Carroll}
-  comment: Cambridge, Sussex
-  id: john-a-carroll
-  similar: [john-b-carroll]
-  variants:
-  - {first: John, last: Carroll}
-- canonical: {first: John B., last: Carroll}
-  comment: UNC
-  id: john-b-carroll
-  similar: [john-a-carroll]
-- canonical: {first: Julie, last: Carson-Berndsen}
-  variants:
-  - {first: Julie, last: Carson}
-  - {first: Julle, last: Carson-Berndsen}
-- canonical: {first: David, last: Carter}
-  variants:
-  - {first: David M., last: Carter}
-- canonical: {first: Christopher, last: Caruso}
-  variants:
-  - {first: Chris, last: Caruso}
-- canonical: {first: Vitor, last: Carvalho}
-  variants:
-  - {first: Vitor R., last: Carvalho}
-- canonical: {first: Francisco, last: Casacuberta}
-  id: francisco-casacuberta
-- canonical: {first: Bernardino, last: Casas}
-  id: bernardino-casas
-- canonical: {first: Helena de Medeiros, last: Caseli}
-  variants:
-  - {first: Helena, last: de Medeiros Caseli}
-- canonical: {first: Arantza, last: Casillas}
-  id: arantza-casillas
-- canonical: {first: Asunción, last: Castaño}
-  id: asuncion-castano
-- canonical: {first: José, last: Castaño}
-  variants:
-  - {first: José M., last: Castaño}
-- canonical: {first: João Miguel, last: Casteleiro}
-  variants:
-  - {first: João, last: Casteleiro}
-- canonical: {first: Núria, last: Castell}
-  variants:
-  - {first: Nuria, last: Castell}
-- canonical: {first: Antonio, last: Castellanos}
-  id: antonio-castellanos
-- canonical: {first: Eric, last: Castelli’}
-  variants:
-  - {first: Eric, last: Castelli}
-- canonical: {first: Irene, last: Castellón}
-  variants:
-  - {first: Irene, last: Castellon}
-- canonical: {first: Julio, last: Castillo}
-  variants:
-  - {first: Julio Javier, last: Castillo}
-- canonical: {first: Thiago, last: Castro Ferreira}
-  variants:
-  - {first: Thiago, last: Ferreira}
-- canonical: {first: Maria Lucia, last: Castro Jorge}
-  variants:
-  - {first: Maria Lucía Castro, last: Jorge}
-  - {first: Maria Lucía, last: Castro Jorge}
-- canonical: {first: Maria Jose, last: Castro-Bleda}
-  variants:
-  - {first: María José, last: Castro}
-  - {first: María-José, last: Castro}
-- canonical: {first: Dolors, last: Català}
-  variants:
-  - {first: Dolors, last: Catala}
-- canonical: {first: Maria Novella, last: Catarsi}
-  id: maria-novella-catarsi
-- canonical: {first: Roberta, last: Catizone}
-  id: roberta-catizone
-- canonical: {first: Gabriela, last: Cavaglià}
-  variants:
-  - {first: Gabriela, last: Cavaglia}
-- canonical: {first: Alexandru, last: Ceauşu}
-  variants:
-  - {first: Alexandru, last: Ceausu}
-- canonical: {first: Guillermo A., last: Cecchi}
-  variants:
-  - {first: Guillermo, last: Cecchi}
-- canonical: {first: Ali Hadian, last: Cefidekhanie}
-  variants:
-  - {first: Ali, last: Hadian}
-- canonical: {first: Pedro Concejero, last: Cerezo}
-  variants:
-  - {first: Pedro, last: Concejero}
-- canonical: {first: Scott A., last: Hale}
-  variants:
-  - {first: Scott, last: Hale}
-- canonical: {first: Jeong-Won, last: Cha}
-  variants:
-  - {first: Jeongwon, last: Cha}
-- canonical: {first: Seungho, last: Cha}
-  id: seungho-cha
-- canonical: {first: Joyce, last: Chai}
-  variants:
-  - {first: Joyce Yue, last: Chai}
-  - {first: Joyce Y., last: Chai}
-- canonical: {first: Kian Ming A., last: Chai}
-  variants:
-  - {first: Kian Ming Adam, last: Chai}
-- canonical: {first: Aimilios, last: Chalamandaris}
-  id: aimilios-chalamandaris
-  variants:
-  - {first: Chalamandaris, last: Aimilios}
-- canonical: {first: Nathanael, last: Chambers}
-  variants:
-  - {first: Nathan, last: Chambers}
-- canonical: {first: Gary K. K., last: Chan}
-  id: gary-k-k-chan
-- canonical: {first: Kwok-Ping, last: Chan}
-  variants:
-  - {first: Kwok Ping, last: Chan}
-- canonical: {first: Samuel W. K., last: Chan}
-  id: samuel-w-k-chan
-  variants:
-  - {first: Samuel W.K., last: Chan}
-- canonical: {first: Brian J., last: Chandler}
-  id: brian-j-chandler
-  variants:
-  - {first: Brian, last: Chandler}
-- canonical: {first: Sharath, last: Chandra Guntuku}
-  variants:
-  - {first: Sharath Chandra, last: Guntuku}
-- canonical: {first: Raman, last: Chandrasekar}
-  id: raman-chandrasekar
-- canonical: {first: Muthu Kumar, last: Chandrasekaran}
-  variants:
-  - {first: Muthu, last: Kumar Chandrasekaran}
-- canonical: {first: Angel, last: Chang}
-  variants:
-  - {first: Angel X., last: Chang}
-- canonical: {first: Baobao, last: Chang}
-  variants:
-  - {first: Bao-Bao, last: Chang}
-- canonical: {first: Ching Yun, last: Chang}
-  variants:
-  - {first: Ching-Yun, last: Chang}
-- canonical: {first: Edward Y., last: Chang}
-  variants:
-  - {first: Edward, last: Chang}
-- canonical: {first: Jason S., last: Chang}
-  variants:
-  - {first: Jason, last: Chang}
-  - {first: Jason J. S., last: Chang}
-  - {first: Jason J.S., last: Chang}
-  - {first: Jason J., last: Chang}
-- canonical: {first: Jim, last: Chang}
-  variants:
-  - {first: Jimmy, last: Chang}
-- canonical: {first: Joseph Z., last: Chang}
-  variants:
-  - {first: Joseph, last: Chang}
-  - {first: Joseph Z, last: Chang}
-- canonical: {first: Jyun-Sheng, last: Chang}
-  variants:
-  - {first: Jyun-sheng, last: Chang}
-- canonical: {first: Kai-min Kevin, last: Chang}
-  variants:
-  - {first: Kai-Min, last: Chang}
-  - {first: Kai-min K., last: Chang}
-- canonical: {first: Li-Li, last: Chang}
-  variants:
-  - {first: Li-li, last: Chang}
-- canonical: {first: Li-Ping, last: Chang}
-  variants:
-  - {first: Li-ping, last: Chang}
-- canonical: {first: Pi-Chuan, last: Chang}
-  variants:
-  - {first: Pichuan, last: Chang}
-- canonical: {first: Shih-Fu, last: Chang}
-  variants:
-  - {first: Shih-fu, last: Chang}
-- canonical: {first: Yu-wei, last: Chang}
-  variants:
-  - {first: Yu-Wei, last: Chang}
-- canonical: {first: F. Y. August, last: Chao}
-  variants:
-  - {first: F.Y. August, last: Chao}
-- canonical: {first: Wenhan, last: Chao}
-  variants:
-  - {first: WenHan, last: Chao}
-  - {first: Wen-Han, last: Chao}
-- canonical: {first: Wendy, last: Chapman}
-  variants:
-  - {first: Wendy W, last: Chapman}
-- canonical: {first: Marcela, last: Charfuelan}
-  variants:
-  - {first: Marcela, last: Charfuelán}
-- canonical: {first: Eric, last: Charton}
-  variants:
-  - {first: Éric, last: Charton}
-- canonical: {first: Noël, last: Chateau}
-  id: noel-chateau
-- canonical: {first: Niladri, last: Chatterjee}
-  id: niladri-chatterjee
-- canonical: {first: Rajen, last: Chatterjee}
-  variants:
-  - {first: Rajan, last: Chatterjee}
-- canonical: {first: Jacques, last: Chauché}
-  id: jacques-chauche
-- canonical: {first: Himani, last: Chaudhry}
-  variants:
-  - {first: Himani, last: Chaudhary}
-- canonical: {first: Bidyut Baran, last: Chaudhuri}
-  id: bidyut-baran-chaudhuri
-  variants:
-  - {first: Bidyut B., last: Chaudhuri}
-- canonical: {first: Chiwei, last: Che}
-  id: chiwei-che
-- canonical: {first: Alvin Cheng-Hsien, last: Chen}
-  variants:
-  - {first: Cheng-Hsien, last: Chen}
-- canonical: {first: Catherine, last: Chen}
-  comment: UC Berkley
-  id: catherine-chen-ucberkley
-- canonical: {first: Catherine, last: Chen}
-  comment: Brown
-  id: catherine-chen-bu
-- canonical: {first: Chao-Jan, last: Chen}
-  variants:
-  - {first: Chao-jan, last: Chen}
-- canonical: {first: Cheng-Der, last: Chen}
-  variants:
-  - {first: Cheng-der, last: Chen}
-- canonical: {first: Feng-Yi, last: Chen}
-  variants:
-  - {first: Feng-yi, last: Chen}
-- canonical: {first: Francine, last: Chen}
-  variants:
-  - {first: Francine R., last: Chen}
-- canonical: {first: Helen Kaiyun, last: Chen}
-  variants:
-  - {first: Kai-Yun, last: Chen}
-  - {first: Kai-yun, last: Chen}
-  - {first: Helen Kai-yun, last: Chen}
-- canonical: {first: Huey-Chyun, last: Chen}
-  variants:
-  - {first: Mathis Huey-chyun, last: Chen}
-- canonical: {first: Jen Nan, last: Chen}
-  variants:
-  - {first: Jen-Nan, last: Chen}
-  - {first: Jen-nan, last: Chen}
-- canonical: {first: Jiajun, last: Chen}
-  variants:
-  - {first: Jia-jun, last: Chen}
-  - {first: Jia-Jun, last: Chen}
-- canonical: {first: Keh-Jiann, last: Chen}
-  variants:
-  - {first: Keh-jiann, last: Chen}
-  - {first: Ke-Jiann, last: Chen}
-  - {first: K. J., last: Chen}
-- canonical: {first: Qian, last: Cao}
-  id: qian-cao-renmin
-  orcid: 0000-0003-3288-1714
-  institution: Renmin University of China
-  comment: Renmin
-- canonical: {first: Qian, last: Cao}
-  id: qian-cao
-  comment: May refer to several people
-- canonical: {first: Kuang-hua, last: Chen}
-  variants:
-  - {first: Kuang-Hua, last: Chen}
-- canonical: {first: Li-mei, last: Chen}
-  variants:
-  - {first: Li-Mei, last: Chen}
-- canonical: {first: Liang-Yu, last: Chen}
-  variants:
-  - {first: Liangyu, last: Chen}
-- canonical: {first: Mei-hua, last: Chen}
-  variants:
-  - {first: Mei-Hua, last: Chen}
-- canonical: {first: Mia Xu, last: Chen}
-  variants:
-  - {first: Mia, last: Chen}
-- canonical: {first: Nancy, last: Chen}
-  variants:
-  - {first: Nancy F., last: Chen}
-- canonical: {first: Po Chun, last: Chen}
-  variants:
-  - {first: Po-Chun, last: Chen}
-- canonical: {first: Po Hsuan, last: Chen}
-  variants:
-  - {first: Po-Hsuan, last: Chen}
-- canonical: {first: Shun-Der, last: Chen}
-  variants:
-  - {first: Shun-Der, last: Cheng}
-- canonical: {first: Ssu-Cheng, last: Chen}
-  variants:
-  - {first: Su-Cheng, last: Chen}
-- canonical: {first: Stanley F., last: Chen}
-  variants:
-  - {first: Stanley, last: Chen}
-- canonical: {first: Tsong-yi, last: Chen}
-  variants:
-  - {first: Tsong-Yi, last: Chen}
-- canonical: {first: Xixian, last: Chen}
-  variants:
-  - {first: XiXian, last: Chen}
-- canonical: {first: Yi-Rong, last: Chen}
-  variants:
-  - {first: YiRong, last: Chen}
-  - {first: Yi-Rung, last: Chen}
-- canonical: {first: YiChun, last: Chen}
-  variants:
-  - {first: Yi-Chun, last: Chen}
-- canonical: {first: Yuanzhu Peter, last: Chen}
-  variants:
-  - {first: Peter, last: Chen}
-- canonical: {first: Zhiyu, last: Chen}
-  id: zhiyu-chen-lehigh
-  orcid: 0000-0002-3096-7912
-  comment: Lehigh University
-- canonical: {first: Zhiyu, last: Chen}
-  id: zhiyu-chen
-  comment: May refer to several people
-- canonical: {first: Charles, last: 'Chen, Jr.'}
-  variants:
-  - {first: Charles, last: Chen}
-  - {first: Charles, last: Chen Jr.}
-- canonical: {first: Noureddine, last: Chenfour}
-  id: noureddine-chenfour
-- canonical: {first: Wen-Huei, last: Cheng}
-  variants:
-  - {first: Wen-Hui, last: Cheng}
-- canonical: {first: Xueqi, last: Cheng}
-  variants:
-  - {first: Xue-Qi, last: Cheng}
-- canonical: {first: Chi-Shun, last: Cheung}
-  variants:
-  - {first: Chi Shun, last: Cheung}
-- canonical: {first: Jackie Chi Kit, last: Cheung}
-  variants:
-  - {first: Jackie C. K., last: Cheung}
-  - {first: Jackie C.K., last: Cheung}
-  - {first: Jackie, last: Cheung}
-- canonical: {first: Lawrence Y. L., last: Cheung}
-  id: lawrence-y-l-cheung
-  variants:
-  - {first: Lawrence Y.L., last: Cheung}
-- canonical: {first: Peter A., last: Chew}
-  variants:
-  - {first: Peter, last: Chew}
-- canonical: {first: Adam, last: Cheyer}
-  id: adam-cheyer
-- canonical: {first: Tung-Hui, last: Chiang}
-  variants:
-  - {first: TungHui, last: Chiang}
-- canonical: {first: Yuang-Chin, last: Chiang}
-  variants:
-  - {first: Yuang-chin, last: Chiang}
-- canonical: {first: Jen-Tzung, last: Chien}
-  variants:
-  - {first: Jen-Tzong, last: Chien}
-- canonical: {first: Lois C., last: Childs}
-  variants:
-  - {first: Lois, last: Childs}
-- canonical: {first: Phil Sidney, last: Ostheimer}
-  id: phil-sidney-ostheimer
-  orcid: 0009-0009-6186-3233
-  institution: RPTU Kaiserslautern-Landau
-  variants:
-  - {first: Phil, last: Ostheimer}
-- canonical: {first: Odbayar, last: Chimeddorj}
-  variants:
-  - {first: Chimeddorj, last: Odbayar}
-- canonical: {first: Nancy, last: Chinchor}
-  id: nancy-chinchor
-  variants:
-  - {first: Nancy A., last: Chinchor}
-- canonical: {first: P. C., last: Ching}
-  variants:
-  - {first: P.C., last: Ching}
-- canonical: {first: Manoj, last: Chinnakotla}
-  variants:
-  - {first: Manoj K., last: Chinnakotla}
-  - {first: Manoj Kumar, last: Chinnakotla}
-- canonical: {first: Luminita, last: Chiran}
-  id: luminita-chiran
-- canonical: {first: Mahesh V., last: Chitrao}
-  variants:
-  - {first: Mahesh, last: Chitrao}
-- canonical: {first: Chih-Ming, last: Chiu}
-  variants:
-  - {first: Chih-ming, last: Chiu}
-- canonical: {first: Hsun-Wen, last: Chiu}
-  variants:
-  - {first: Hsun-wen, last: Chiu}
-- canonical: {first: Timothy, last: Chklovski}
-  variants:
-  - {first: Tim, last: Chklovski}
-- canonical: {first: Hakaze, last: Cho}
-  id: hakaze-cho
-  variants:
-  - {first: Yufeng, last: Zhao}
-- canonical: {first: Martin, last: Chodorow}
-  variants:
-  - {first: Martin S., last: Chodorow}
-- canonical: {first: GyuHyeon, last: Choi}
-  variants:
-  - {first: Gyu-Hyeon, last: Choi}
-- canonical: {first: Jinho D., last: Choi}
-  variants:
-  - {first: Jinho, last: Choi}
-- canonical: {first: Key-Sun, last: Choi}
-  variants:
-  - {first: Key-sun, last: Choi}
-- canonical: {first: Annick, last: Choisier}
-  id: annick-choisier
-- canonical: {first: Mickey W. C., last: Chong}
-  variants:
-  - {first: Mickey W.C., last: Chong}
-- canonical: {first: George, last: Chou}
-  id: george-chou
-- canonical: {first: Seng-Cho T., last: Chou}
-  variants:
-  - {first: Seng-cho T., last: Chou}
-- canonical: {first: Prafulla Kumar, last: Choubey}
-  variants:
-  - {first: Prafulla, last: Choubey}
-- canonical: {first: Khalid, last: Choukri}
-  id: khalid-choukri
-  variants:
-  - {first: Kalid, last: Choukri}
-- canonical: {first: Yen-Lu, last: Chow}
-  variants:
-  - {first: Yen-lu, last: Chow}
-- canonical: {first: Thomas Ulrich, last: Christiansen}
-  variants:
-  - {first: Thomas, last: Christiansen}
-- canonical: {first: Dimitris, last: Christodoulakis}
-  variants:
-  - {first: Dimitris N., last: Christodoulakis}
-- canonical: {first: C. Mario, last: Christoudias}
-  variants:
-  - {first: Mario, last: Christoudias}
-- canonical: {first: Grzegorz, last: Chrupała}
-  variants:
-  - {first: Grzegorz, last: Chrupala}
-- canonical: {first: Jennifer, last: Chu-Carroll}
-  variants:
-  - {first: Jennifer, last: Chu}
-- canonical: {first: Tat-Seng, last: Chua}
-  variants:
-  - {first: Tat Seng, last: Chua}
-- canonical: {first: Ka-Wai, last: Chui}
-  variants:
-  - {first: Kawai, last: Chui}
-- canonical: {first: Grace, last: Chung}
-  variants:
-  - {first: Grace Y, last: Chung}
-- canonical: {first: Hee Sung, last: Chung}
-  variants:
-  - {first: Hee-Sung, last: Chung}
-- canonical: {first: HooJung, last: Chung}
-  variants:
-  - {first: Hoojung, last: Chung}
-- canonical: {first: Siaw-Fong, last: Chung}
-  variants:
-  - {first: Siaw Fong, last: Chung}
-- canonical: {first: You-Shan, last: Chung}
-  variants:
-  - {first: You-shan, last: Chung}
-- canonical: {first: Kenneth, last: Church}
-  variants:
-  - {first: Kenneth Ward, last: Church}
-  - {first: Ken, last: Church}
-  - {first: Kenneth W., last: Church}
-- canonical: {first: Christopher, last: Chute}
-  variants:
-  - {first: Christopher G., last: Chute}
-- canonical: {first: Ilyas, last: Cicekli}
-  variants:
-  - {first: İlyas, last: Çiçekli}
-- canonical: {first: Christopher, last: Cieri}
-  variants:
-  - {first: Chris, last: Cieri}
-- canonical: {first: Philipp, last: Cimiano}
-  id: philipp-cimiano
-- canonical: {first: Alina Maria, last: Ciobanu}
-  variants:
-  - {first: Alina, last: Ciobanu}
-- canonical: {first: Manuel R., last: Ciosici}
-  variants:
-  - {first: Manuel, last: Ciosici}
-- canonical: {first: Fabio, last: Ciravegna}
-  id: fabio-ciravegna
-- canonical: {first: Montserrat, last: Civit}
-  id: montserrat-civit
-- canonical: {first: Chris, last: Clark}
-  variants:
-  - {first: Christine, last: Clark}
-- canonical: {first: Jonathan H., last: Clark}
-  variants:
-  - {first: Jonathan, last: Clark}
-- canonical: {first: Charles L. A., last: Clarke}
-  id: charles-l-a-clarke
-- canonical: {first: Luka A., last: Clarke}
-  variants:
-  - {first: Luka, last: Clarke}
-- canonical: {first: Mark A., last: Clements}
-  variants:
-  - {first: Mark, last: Clements}
-- canonical: {first: Miruna, last: Clinciu}
-  variants:
-  - {first: Miruna-Adriana, last: Clinciu}
-- canonical: {first: John H., last: 'Clippinger, Jr.'}
-  variants:
-  - {first: John Henry, last: 'Clippinger, Jr.'}
-- canonical: {first: Paul, last: Clough}
-  variants:
-  - {first: Paul D., last: Clough}
-- canonical: {first: Martin, last: Cmejrek}
-  variants:
-  - {first: Martin, last: Čmejrek}
-- canonical: {first: Noah, last: Coccaro}
-  id: noah-coccaro
-- canonical: {first: Jose, last: Coch}
-  variants:
-  - {first: José, last: Coch}
-- canonical: {first: John, last: Cocke}
-  id: john-cocke
-- canonical: {first: Joan, last: Codina-Filba}
-  variants:
-  - {first: Joan, last: Codina-Filbà}
-  - {first: Joan, last: Codina}
-- canonical: {first: Jordan, last: Cohen}
-  variants:
-  - {first: Jordan R., last: Cohen}
-- canonical: {first: K. Bretonnel, last: Cohen}
-  variants:
-  - {first: Kevin Bretonnel, last: Cohen}
-  - {first: Kevin B., last: Cohen}
-  - {first: Kevin, last: Cohen}
-- canonical: {first: Philip R., last: Cohen}
-  variants:
-  - {first: Philip, last: Cohen}
-  - {first: Phil R., last: Cohen}
-- canonical: {first: Shay B., last: Cohen}
-  variants:
-  - {first: Shay, last: Cohen}
-- canonical: {first: Trevor, last: Cohen}
-  comment: University of Washington
-  id: trevor-cohen
-  similar: [trevor-cohn]
-- canonical: {first: William, last: Cohen}
-  variants:
-  - {first: William W., last: Cohen}
-- canonical: {first: Yael, last: Cohen-Sygal}
-  variants:
-  - {first: Yael, last: Sygal}
-- canonical: {first: Luísa, last: Coheur}
-  variants:
-  - {first: Luisa, last: Coheur}
-- canonical: {first: Trevor, last: Cohn}
-  comment: University of Melbourne
-  id: trevor-cohn
-  similar: [trevor-cohen]
-- canonical: {first: Andrew W., last: Cole}
-  variants:
-  - {first: Andrew, last: Cole}
-- canonical: {first: Ronald, last: Cole}
-  id: ronald-cole
-  variants:
-  - {first: Ron, last: Cole}
-  - {first: Ronald A., last: Cole}
-- canonical: {first: Mariona, last: Coll Ardanuy}
-  variants:
-  - {first: Mariona Coll, last: Ardanuy}
-- canonical: {first: Christophe, last: Collet}
-  id: christophe-collet
-- canonical: {first: Jean-Marc, last: Colletta}
-  id: jean-marc-colletta
-- canonical: {first: Edward, last: Collins}
-  variants:
-  - {first: Ed, last: Collins}
-- canonical: {first: Michael, last: Collins}
-  variants:
-  - {first: Michael John, last: Collins}
-  - {first: Mike, last: Collins}
-- canonical: {first: Sandra, last: Collovini}
-  id: sandra-collovini
-- canonical: {first: Pere, last: Comas}
-  variants:
-  - {first: Pere R., last: Comas}
-- canonical: {first: Donald C., last: Comeau}
-  variants:
-  - {first: Don, last: Comeau}
-  - {first: Donald, last: Comeau}
-  - {first: Donald C, last: Comeau}
-- canonical: {first: Elisabet, last: Comelles}
-  id: elisabet-comelles
-- canonical: {first: Kristian, last: Concepcion}
-  variants:
-  - {first: Kris, last: Concepcion}
-- canonical: {first: Jae, last: Hee Lee}
-  id: jae-hee-lee-bremen
-  orcid: 0000-0001-9840-780X
-  institution: University of Bremen
-  comment: Bremen
-- canonical: {first: Jae, last: Hee Lee}
-  id: jae-hee-lee
-  comment: May refer to several people
-- canonical: {first: Sherri, last: Condon}
-  variants:
-  - {first: Sherri L., last: Condon}
-- canonical: {first: John, last: Conroy}
-  variants:
-  - {first: John M., last: Conroy}
-- canonical: {first: Matthieu, last: Constant}
-  variants:
-  - {first: Mathieu, last: Constant}
-- canonical: {first: Susan P., last: Converse}
-  variants:
-  - {first: Susan, last: Converse}
-- canonical: {first: Helen V., last: Cook}
-  variants:
-  - {first: Helen, last: Cook}
-  - {first: Helen V, last: Cook}
-- canonical: {first: Peter-Arno, last: Coppen}
-  id: peter-arno-coppen
-- canonical: {first: Ornella, last: Corazzari}
-  id: ornella-corazzari
-- canonical: {first: Greville C., last: Corbett}
-  variants:
-  - {first: Greville, last: Corbett}
-  - {first: Greville G., last: Corbett}
-- canonical: {first: Peter, last: Corbett}
-  variants:
-  - {first: Peter T., last: Corbett}
-- canonical: {first: João Paulo, last: Cordeiro}
-  variants:
-  - {first: João, last: Cordeiro}
-- canonical: {first: Silvio, last: Cordeiro}
-  variants:
-  - {first: Silvio Ricardo, last: Cordeiro}
-- canonical: {first: Mark G., last: Core}
-  variants:
-  - {first: Mark, last: Core}
-- canonical: {first: Courtney D., last: Corley}
-  variants:
-  - {first: Courtney, last: Corley}
-- canonical: {first: Annick, last: Corluy}
-  id: annick-corluy
-- canonical: {first: Thomas L., last: Cornell}
-  variants:
-  - {first: Thomas, last: Cornell}
-- canonical: {first: Gloria, last: Corpas Pastor}
-  variants:
-  - {first: Gloria, last: Corpas}
-  - {first: Gloria Corpas, last: Pastor}
-- canonical: {first: Simon, last: Corston-Oliver}
-  variants:
-  - {first: Simon H., last: Corston-Oliver}
-- canonical: {first: Louise, last: Corti}
-  id: louise-corti
-- canonical: {first: Santiago, last: Cortés Vaíllo}
-  variants:
-  - {first: Santiago, last: Cortes}
-  - {first: Santiago Cortés, last: Vaíllo}
-- canonical: {first: William J., last: Corvey}
-  variants:
-  - {first: William, last: Corvey}
-- canonical: {first: Angela, last: Costa}
-  variants:
-  - {first: Ângela, last: Costa}
-- canonical: {first: Luís Fernando, last: Costa}
-  variants:
-  - {first: Luís, last: Costa}
-- canonical: {first: Christophe, last: Costa Florêncio}
-  variants:
-  - {first: Christophe Costa, last: Florencio}
-- canonical: {first: Marta R., last: Costa-jussà}
-  variants:
-  - {first: Marta, last: R. Costa-jussà}
-  - {first: Marta R., last: Costa-Jussà}
-  - {first: Marta R., last: Costa-Jussa}
-  - {first: Marta, last: Ruiz Costa-jussà}
-  - {first: Marta Ruiz, last: Costa-jussà}
-- canonical: {first: Fintan J., last: Costello}
-  variants:
-  - {first: Fintan, last: Costello}
-- canonical: {first: William, last: Coster}
-  variants:
-  - {first: Will, last: Coster}
-- canonical: {first: Louise-Amélie, last: Cougnon}
-  variants:
-  - {first: Louis-Amélie, last: Cougnon}
-- canonical: {first: Francisco M., last: Couto}
-  variants:
-  - {first: Francisco, last: Couto}
-- canonical: {first: Daniel, last: Couto Vale}
-  variants:
-  - {first: Daniel, last: Couto-Vale}
-  - {first: Daniel, last: Vale}
-- canonical: {first: Jim, last: Cowie}
-  id: jim-cowie
-- canonical: {first: Roddy, last: Cowie}
-  id: roddy-cowie
-- canonical: {first: Benoit, last: Crabbé}
-  variants:
-  - {first: Benoît, last: Crabbé}
-- canonical: {first: Gregory, last: Crane}
-  variants:
-  - {first: Gregory R., last: Crane}
-- canonical: {first: Lambros, last: Cranias}
-  variants:
-  - {first: Lambros, last: Kranias}
-- canonical: {first: Josep M., last: Crego}
-  variants:
-  - {first: Josep Maria, last: Crego}
-  - {first: Josep, last: Crego}
-- canonical: {first: Luca, last: Cristoforetti}
-  id: luca-cristoforetti
-- canonical: {first: Matthew, last: Crocker}
-  variants:
-  - {first: Matthew W., last: Crocker}
-- canonical: {first: W. Bruce, last: Croft}
-  variants:
-  - {first: Bruce, last: Croft}
-- canonical: {first: Fabien, last: Cromieres}
-  variants:
-  - {first: Fabien, last: Cromières}
-- canonical: {first: Paul A., last: Crook}
-  variants:
-  - {first: Paul, last: Crook}
-- canonical: {first: Noa P., last: Cruz Diaz}
-  variants:
-  - {first: Noa P., last: Cruz}
-  - {first: Noa, last: Cruz}
-  - {first: Noa P., last: Cruz Díaz}
-- canonical: {first: Michael, last: Crystal}
-  variants:
-  - {first: Michael R., last: Crystal}
-- canonical: {first: Andras, last: Csomai}
-  variants:
-  - {first: András, last: Csomai}
-- canonical: {first: Catia, last: Cucchiarini}
-  id: catia-cucchiarini
-- canonical: {first: Silviu, last: Cucerzan}
-  variants:
-  - {first: Silviu-Petru, last: Cucerzan}
-- canonical: {first: Chris, last: Culy}
-  variants:
-  - {first: Christopher, last: Culy}
-- canonical: {first: Hamish, last: Cunningham}
-  id: hamish-cunningham
-- canonical: {first: Arturo, last: Curiel}
-  variants:
-  - {first: Arturo, last: Curiel Díaz}
-- canonical: {first: Wei, last: Liu}
-  id: wei-liu-kcl
-  orcid: 0000-0003-0011-7797
-  institution: Kings College London
-  comment: KCL
-- canonical: {first: Wei, last: Liu}
-  id: wei-liu
-  comment: May refer to several people
-- canonical: {first: James R., last: Curran}
-  variants:
-  - {first: James, last: Curran}
-- canonical: {first: Douglass, last: Cutting}
-  variants:
-  - {first: Doug, last: Cutting}
-- canonical: {first: Jan, last: Cuřín}
-  id: jan-curin
-- canonical: {first: Agata, last: Cybulska}
-  variants:
-  - {first: Agata Katarzyna, last: Cybulska}
-- canonical: {first: Scott, last: Cyphers}
-  variants:
-  - {first: D. Scott, last: Cyphers}
-- canonical: {first: Marianne, last: Dabbadie}
-  id: marianne-dabbadie
-- canonical: {first: Walter, last: Daelemans}
-  id: walter-daelemans
-- canonical: {first: Deborah A., last: Dahl}
-  id: deborah-a-dahl
-  variants:
-  - {first: Deborah, last: Dahl}
-- canonical: {first: Kathleen, last: Dahlgren}
-  id: kathleen-dahlgren
-- canonical: {first: Li-Rong, last: Dai}
-  variants:
-  - {first: LiRong, last: Dai}
-- canonical: {first: Xiang, last: Dai}
-  variants:
-  - {first: Xiangying, last: Dai}
-- canonical: {first: Xinyu, last: Dai}
-  variants:
-  - {first: Xin-yu, last: Dai}
-  - {first: Xin-Yu, last: Dai}
-- canonical: {first: Béatrice, last: Daille}
-  variants:
-  - {first: Beatrice, last: Daille}
-- canonical: {first: Bojana, last: Dalbelo Bašić}
-  variants:
-  - {first: Bojana Dalbelo, last: Bašić}
-- canonical: {first: Patrice, last: Dalle}
-  id: patrice-dalle
-- canonical: {first: Bhavana, last: Dalvi}
-  variants:
-  - {first: Bhavana, last: Dalvi Mishra}
-- canonical: {first: Om P., last: Damani}
-  variants:
-  - {first: Om, last: Damani}
-- canonical: {first: Fred, last: Damerau}
-  variants:
-  - {first: Fred J., last: Damerau}
-- canonical: {first: Laurie, last: Damianos}
-  variants:
-  - {first: Laurie E., last: Damianos}
-- canonical: {first: Danica, last: Damljanović}
-  variants:
-  - {first: Danica, last: Damljanovic}
-- canonical: {first: Géraldine, last: Damnati}
-  variants:
-  - {first: Geraldine, last: Damnati}
-- canonical: {first: Robert I., last: Damper}
-  id: robert-i-damper
-- canonical: {first: Sandipan, last: Dandapat}
-  variants:
-  - {first: Sandipan, last: Dandpat}
-- canonical: {first: Hoa Trang, last: Dang}
-  variants:
-  - {first: Hoa, last: Dang}
-- canonical: {first: Ron, last: 'Daniel, Jr.'}
-  variants:
-  - {first: Ron, last: Daniel}
-  - {first: Ron, last: Daniel Jr.}
-- canonical: {first: Masatake, last: Dantsuji}
-  id: masatake-dantsuji
-- canonical: {first: Aswarth Abhilash, last: Dara}
-  variants:
-  - {first: Aswarth, last: Dara}
-- canonical: {first: Stéfan, last: Darmoni}
-  variants:
-  - {first: Stefan, last: Darmoni}
-- canonical: {first: Vidas, last: Daudaravicius}
-  variants:
-  - {first: Vidas, last: Daudaravičius}
-- canonical: {first: Jordi, last: Daudé}
-  id: jordi-daude
-- canonical: {first: Hal, last: Daumé III}
-  variants:
-  - {first: Hal, last: Daume III}
-  - {first: Hal, last: Daume}
-  - {first: Hal, last: Daumé}
-- canonical: {first: Chris Irwin, last: Davis}
-  variants:
-  - {first: Chris, last: Davis}
-- canonical: {first: James, last: Davis}
-  variants:
-  - {first: James Raymond, last: Davis}
-- canonical: {first: Mark W., last: Davis}
-  variants:
-  - {first: Mark, last: Davis}
-- canonical: {first: Sashka T., last: Davis}
-  variants:
-  - {first: Sashka, last: Davis}
-- canonical: {first: Ian P., last: Davy}
-  variants:
-  - {first: Ian, last: Davy}
-  - {first: Ian P, last: Davy}
-- canonical: {first: David, last: Day}
-  variants:
-  - {first: David S., last: Day}
-- canonical: {first: Antonella, last: De Angeli}
-  variants:
-  - {first: Antonella, last: DeAngeli}
-- canonical: {first: Vitor, last: De Araujo}
-  variants:
-  - {first: Vítor, last: Araújo}
-- canonical: {first: Orphee, last: De Clercq}
-  variants:
-  - {first: Orphée, last: De Clercq}
-- canonical: {first: Georges, last: De Moor}
-  id: georges-de-moor
-- canonical: {first: Renato, last: De Mori}
-  variants:
-  - {first: Renato, last: de Mori}
-- canonical: {first: Anne, last: De Roeck}
-  variants:
-  - {first: Anne, last: DeRoeck}
-  - {first: Anne, last: de Roeck}
-  - {first: Anne, last: deRoeck}
-  - {first: A.N., last: De Roeck}
-  - {first: Anne N., last: De Roeck}
-- canonical: {first: Gianluca, last: De Rossi}
-  variants:
-  - {first: Gianluca, last: Rossi}
-- canonical: {first: Koenraad, last: De Smedt}
-  variants:
-  - {first: Koenraad, last: de Smedt}
-  - {first: Koenraad, last: DeSmedt}
-- canonical: {first: Ángel, last: De la Torre}
-  id: angel-de-la-torre
-- canonical: {first: Jonathan, last: DeCristofaro}
-  variants:
-  - {first: Jonathan D., last: DeCristofaro}
-- canonical: {first: Rosa, last: Del Gaudio}
-  variants:
-  - {first: Rosa, last: Gaudio}
-- canonical: {first: Riccardo, last: Del Gratta}
-  variants:
-  - {first: Riccardo, last: del Gratta}
-- canonical: {first: Iria, last: Del Río Gayo}
-  variants:
-  - {first: Iria, last: del Río Gayo}
-  - {first: Iria, last: del Río}
-  - {first: Iria, last: del Rio}
-- canonical: {first: Elisabeth, last: Delais-Roussarie}
-  variants:
-  - {first: Élisabeth, last: Delais-Roussarie}
-- canonical: {first: Jean-François, last: Delannoy}
-  variants:
-  - {first: Jean-Francois, last: Delannoy}
-- canonical: {first: Stephen A., last: Della Pietra}
-  id: stephen-a-della-pietra
-  variants:
-  - {first: Stephen, last: Della Pietra}
-  - {first: Stephen, last: DellaPietra}
-- canonical: {first: Vincent J., last: Della Pietra}
-  id: vincent-j-della-pietra
-  variants:
-  - {first: Vincent, last: DellaPietra}
-- canonical: {first: Rodolfo, last: Delmonte}
-  id: rodolfo-delmonte
-- canonical: {first: Paul, last: Deléglise}
-  variants:
-  - {first: Paul, last: Deleglise}
-- canonical: {first: George, last: Demetriou}
-  variants:
-  - {first: George C., last: Demetriou}
-- canonical: {first: Isin, last: Demirsahin}
-  variants:
-  - {first: Işin, last: Demirşahin}
-  - {first: Isin, last: Demirşahin}
-- canonical: {first: Peter, last: Deng}
-  id: peter-deng
-- canonical: {first: Xinyu, last: Deng}
-  variants:
-  - {first: XinYu, last: Deng}
-- canonical: {first: Zhi-Hong, last: Deng}
-  variants:
-  - {first: Zhihong, last: Deng}
-- canonical: {first: Alexandre, last: Denis}
-  id: alexandre-denis
-- canonical: {first: Leon, last: Derczynski}
-  variants:
-  - {first: Leon, last: Strømberg-Derczynski}
-- canonical: {first: Jan Milan, last: Deriu}
-  variants:
-  - {first: Jan, last: Deriu}
-- canonical: {first: Julien, last: Derivière}
-  id: julien-deriviere
-- canonical: {first: Maunendra Sankar, last: Desarkar}
-  variants:
-  - {first: Maunendra, last: Sankar Desarkar}
-- canonical: {first: Théo, last: Desbordes}
-  variants:
-  - {first: Theo, last: Desbordes}
-- canonical: {first: Jean-Pierre, last: Descles}
-  variants:
-  - {first: Jean-Pierre, last: Desclés}
-  - {first: Jean Pierre, last: Descles}
-- canonical: {first: Elina, last: Desipri}
-  id: elina-desipri
-  variants:
-  - {first: Elina, last: Desypri}
-- canonical: {first: José, last: Deulofeu}
-  variants:
-  - {first: Jose, last: Deulofeu}
-- canonical: {first: Arturo Calvo, last: Devesa}
-  variants:
-  - {first: Arturo, last: Calvo}
-- canonical: {first: Laurence, last: Devillers}
-  id: laurence-devillers
-- canonical: {first: Pradip, last: Dey}
-  variants:
-  - {first: Paradip, last: Dey}
-- canonical: {first: Arnab, last: Dhar}
-  variants:
-  - {first: Arnad, last: Dhar}
-- canonical: {first: Paramveer S., last: Dhillon}
-  variants:
-  - {first: Paramveer, last: Dhillon}
-- canonical: {first: Luigi, last: Di Caro}
-  variants:
-  - {first: Luigi, last: di Caro}
-- canonical: {first: Giuseppe, last: Di Fabbrizio}
-  variants:
-  - {first: Giuseppe, last: Fabbrizio}
-- canonical: {first: Mattia A., last: Di Gangi}
-  variants:
-  - {first: Mattia Antonino, last: Di Gangi}
-  - {first: Mattia, last: Di Gangi}
-- canonical: {first: Mauro, last: Di Manzo}
-  id: mauro-di-manzo
-- canonical: {first: Giorgio Maria, last: Di Nunzio}
-  variants:
-  - {first: Giorgio, last: Di Nunzio}
-- canonical: {first: Vittorio, last: Di Tomaso}
-  id: vittorio-di-tomaso
-- canonical: {first: Zihao, last: Li}
-  id: zihao-li-helsinki
-  orcid: 0009-0008-9329-5341
-  institution: University of Helsinki
-  comment: Helsinki
-- canonical: {first: Zihao, last: Li}
-  id: zihao-li
-  comment: May refer to several people
-- canonical: {first: Chrysanne, last: DiMarco}
-  variants:
-  - {first: Chrysanne, last: Di Marco}
-- canonical: {first: Denise, last: DiPersio}
-  variants:
-  - {first: Denise, last: Dipersio}
-- canonical: {first: Mona, last: Diab}
-  variants:
-  - {first: Mona T., last: Diab}
-- canonical: {first: Gaël, last: Dias}
-  variants:
-  - {first: Gael, last: Dias}
-  - {first: Gäel, last: Dias}
-- canonical: {first: Miguel Sales, last: Dias}
-  variants:
-  - {first: Miguel, last: Dias}
-- canonical: {first: Bento Carlos, last: Dias-da-Silva}
-  variants:
-  - {first: Bento Carlos Dias, last: da Silva}
-- canonical: {first: Javier, last: Dieguez-Tirado}
-  variants:
-  - {first: Javier, last: Dieguez}
-- canonical: {first: Dinh, last: Dien}
-  variants:
-  - {first: Dien, last: Dinh}
-- canonical: {first: Mireia, last: Diez}
-  variants:
-  - {first: Mireia, last: Díez}
-- canonical: {first: Vassilios, last: Digalakis}
-  id: vassilios-digalakis
-- canonical: {first: Brian W., last: Dillon}
-  variants:
-  - {first: Brian, last: Dillon}
-- canonical: {first: Davis Muhajereen D., last: Dimalen}
-  variants:
-  - {first: Davis Muhajereen, last: Dimalen}
-- canonical: {first: Vania, last: Dimitrova}
-  variants:
-  - {first: Vanya, last: Dimitrova}
-- canonical: {first: Luca, last: Dini}
-  id: luca-dini
-- canonical: {first: Norbert, last: Dinstl}
-  id: norbert-dinstl
-- canonical: {first: Georgiana, last: Dinu}
-  id: georgiana-dinu
-- canonical: {first: Liviu P., last: Dinu}
-  variants:
-  - {first: Liviu, last: Dinu}
-  - {first: Liviu Petrisor, last: Dinu}
-- canonical: {first: Cheikh M. Bamba, last: Dione}
-  variants:
-  - {first: Cheikh Bamba, last: Dione}
-- canonical: {first: Bayu, last: Distiawan}
-  variants:
-  - {first: Bayu Distiawan, last: Trisedya}
-- canonical: {first: Shirley, last: Dita}
-  variants:
-  - {first: Shirley N., last: Dita}
-- canonical: {first: Paul, last: Dixon}
-  variants:
-  - {first: Paul R., last: Dixon}
-- canonical: {first: Quoc Khanh, last: Do}
-  variants:
-  - {first: Quoc-Khanh, last: Do}
-- canonical: {first: Simon, last: Dobrisek}
-  variants:
-  - {first: Simon, last: Dobrišek}
-- canonical: {first: Boris V., last: Dobrov}
-  id: boris-v-dobrov
-  variants:
-  - {first: Boris, last: Dobrov}
-- canonical: {first: Laura, last: Docio-Fernandez}
-  variants:
-  - {first: Laura, last: Docío-Fernández}
-- canonical: {first: George R., last: Doddington}
-  variants:
-  - {first: George, last: Doddington}
-- canonical: {first: Ellen K., last: Dodge}
-  variants:
-  - {first: Ellen, last: Dodge}
-- canonical: {first: Shinichi, last: Doi}
-  variants:
-  - {first: Shin’ichi, last: Doi}
-- canonical: {first: Charles P., last: Dolan}
-  variants:
-  - {first: Charles, last: Dolan}
-- canonical: {first: William B., last: Dolan}
-  variants:
-  - {first: William, last: Dolan}
-  - {first: Bill, last: Dolan}
-- canonical: {first: Ioannis, last: Dologlou}
-  id: ioannis-dologlou
-- canonical: {first: Martin Ariel, last: Dominguez}
-  variants:
-  - {first: Martín, last: Domínguez}
-  - {first: Martin Ariel, last: Domínguez}
-- canonical: {first: Ming Chui, last: Dong}
-  variants:
-  - {first: Ming-Chui, last: Dong}
-- canonical: {first: Xin Luna, last: Dong}
-  variants:
-  - {first: Xin, last: Dong}
-- canonical: {first: Christine, last: Doran}
-  id: christine-doran
-- canonical: {first: Bonnie, last: Dorr}
-  variants:
-  - {first: Bonnie J., last: Dorr}
-- canonical: {first: Jochen, last: Dorre}
-  variants:
-  - {first: Jochen, last: Dörre}
-- canonical: {first: Léon, last: Dostert}
-  variants:
-  - {first: Leon, last: Dostert}
-  - {first: L. E., last: Dostert}
-- canonical: {first: Ellen, last: Douglas-Cowie}
-  id: ellen-douglas-cowie
-- canonical: {first: Yerai, last: Doval}
-  variants:
-  - {first: Yerai, last: Doval Mosquera}
-- canonical: {first: John, last: Dowding}
-  id: john-dowding
-- canonical: {first: Jennifer, last: Doyon}
-  variants:
-  - {first: Jennifer B., last: Doyon}
-- canonical: {first: Christopher, last: Dozier}
-  variants:
-  - {first: Christopher C., last: Dozier}
-- canonical: {first: Elliott Franco, last: Drabek}
-  variants:
-  - {first: Elliott, last: Drabek}
-  - {first: Elliott, last: Drábek}
-- canonical: {first: Felix, last: Dreizin}
-  id: felix-dreizin
-- canonical: {first: Biljana, last: Drndarević}
-  variants:
-  - {first: Biljana, last: Drndarevic}
-- canonical: {first: Witold, last: Drożdżyński}
-  variants:
-  - {first: Witold, last: Drozdzynski}
-- canonical: {first: Sebastian, last: Drude}
-  id: sebastian-drude
-- canonical: {first: Jianyong, last: Duan}
-  variants:
-  - {first: Jian-Yong, last: Duan}
-- canonical: {first: Yuguang, last: Duan}
-  variants:
-  - {first: Yu, last: Duan}
-- canonical: {first: Pablo, last: Duboue}
-  variants:
-  - {first: Pablo A., last: Duboue}
-  - {first: Pablo Ariel, last: Duboue}
-- canonical: {first: Loic, last: Dugast}
-  variants:
-  - {first: Loïc, last: Dugast}
-- canonical: {first: Stefan Daniel, last: Dumitrescu}
-  variants:
-  - {first: Ștefan Daniel, last: Dumitrescu}
-  - {first: Ștefan, last: Dumitrescu}
-- canonical: {first: Pierre, last: Dumouchel}
-  id: pierre-dumouchel
-- canonical: {first: Ted E., last: Dunning}
-  variants:
-  - {first: Ted, last: Dunning}
-- canonical: {first: Long, last: Duong}
-  variants:
-  - {first: Long, last: Duong Thanh}
-- canonical: {first: Magali Sanches, last: Duran}
-  variants:
-  - {first: Magali, last: Sanches Duran}
-  - {first: Magali, last: Duran}
-- canonical: {first: Ilknur, last: Durgar El-Kahlout}
-  variants:
-  - {first: Ilknur Durgar, last: El-Kahlout}
-  - {first: İlknur, last: Durgar El-Kahlout}
-  - {first: İlknur Durgar, last: El-Kahlout}
-- canonical: {first: Koel, last: Dutta Chowdhury}
-  variants:
-  - {first: Koel Dutta, last: Chowdhury}
-- canonical: {first: Arienne, last: Dwyer}
-  id: arienne-dwyer
-- canonical: {first: Hans, last: Dybkjaer}
-  variants:
-  - {first: Hans, last: Dybkjær}
-- canonical: {first: Laila, last: Dybkjaer}
-  variants:
-  - {first: Laila, last: Dybkjær}
-- canonical: {first: Chris, last: Dyer}
-  variants:
-  - {first: Christopher, last: Dyer}
-  - {first: Christopher J., last: Dyer}
-- canonical: {first: Michael G., last: Dyer}
-  variants:
-  - {first: Michael, last: Dyer}
-- canonical: {first: Myroslava O., last: Dzikovska}
-  variants:
-  - {first: Myroslava, last: Dzikovska}
-- canonical: {first: Daniel, last: Déchelotte}
-  variants:
-  - {first: Daniel, last: Dechelotte}
-- canonical: {first: Hervé, last: Déjean}
-  id: herve-dejean
-  variants:
-  - {first: Herve, last: Dejean}
-- canonical: {first: Víctor J., last: Díaz}
-  variants:
-  - {first: Victor J., last: Díaz}
-- canonical: {first: Jesús E., last: Díaz Verdejo}
-  id: jesus-e-diaz-verdejo
-- canonical: {first: Arantza, last: Díaz de Ilarraza}
-  id: arantza-diaz-de-ilarraza
-  variants:
-  - {first: Arantza, last: Diaz de Ilarraza}
-- canonical: {first: Elisabeth, last: D’Halleweyn}
-  variants:
-  - {first: Elizabeth, last: D’Halleweyn}
-- canonical: {first: Luis Fernando, last: D’Haro}
-  variants:
-  - {first: Luis F., last: d’Haro}
-- canonical: {first: Susana, last: Early}
-  id: susana-early
-- canonical: {first: Hiroshi, last: Echizen-ya}
-  variants:
-  - {first: Hiroshi, last: Echizen’ya}
-- canonical: {first: Philip, last: Edmonds}
-  variants:
-  - {first: Philip G., last: Edmonds}
-- canonical: {first: Angels, last: Egea}
-  variants:
-  - {first: Àngels, last: Egea}
-- canonical: {first: Liat, last: Ein Dor}
-  variants:
-  - {first: Liat, last: Ein-Dor}
-- canonical: {first: Andreas, last: Eisele}
-  id: andreas-eisele
-- canonical: {first: Jason, last: Eisner}
-  variants:
-  - {first: Jason M., last: Eisner}
-- canonical: {first: Eva, last: Ejerhed}
-  variants:
-  - {first: Eva I., last: Ejerhed}
-- canonical: {first: Kerstin Severinson, last: Eklundh}
-  variants:
-  - {first: Kerstin, last: Severinson Eklundh}
-  - {first: Kerstin, last: Severinson}
-- canonical: {first: Said Ouatik, last: El Alaoui}
-  variants:
-  - {first: Said, last: Ouatik El Alaoui}
-- canonical: {first: Adil, last: El Ghali}
-  variants:
-  - {first: Adil, last: El-Ghali}
-- canonical: {first: Ismail, last: El Maarouf}
-  variants:
-  - {first: Ismaïl, last: El Maarouf}
-- canonical: {first: Samhaa R., last: El-Beltagy}
-  variants:
-  - {first: Samhaa, last: El-Beltagy}
-- canonical: {first: Marc, last: El-Bèze}
-  id: marc-el-beze
-  variants:
-  - {first: Marc, last: El-Beze}
-- canonical: {first: Wassim, last: El-Hajj}
-  variants:
-  - {first: Wassim, last: El Hajj}
-- canonical: {first: Mohab, last: El-karef}
-  variants:
-  - {first: Mohab, last: Elkaref}
-- canonical: {first: Noémie, last: Elhadad}
-  variants:
-  - {first: Noemie, last: Elhadad}
-- canonical: {first: Frédéric, last: Eliséi}
-  variants:
-  - {first: Frederic, last: Elisei}
-- canonical: {first: Faiza, last: Elkateb-Gara}
-  variants:
-  - {first: Faiza, last: Gara}
-  - {first: Faïza, last: Elkateb-Gara}
-- canonical: {first: John, last: Elliott}
-  variants:
-  - {first: John, last: Elliot}
-- canonical: {first: David, last: Ellis}
-  variants:
-  - {first: David Ellis, last: Rogers}
-- canonical: {first: T. Mark, last: Ellison}
-  id: t-mark-ellison
-- canonical: {first: Samira, last: Ellouze}
-  variants:
-  - {first: Samira Walha, last: Ellouze}
-- canonical: {first: Mariem, last: Ellouze Khemekhem}
-  variants:
-  - {first: Mariem, last: Ellouze Khemakhem}
-  - {first: Mariem, last: Ellouze}
-  - {first: Mariem Ellouze, last: Khmekhem}
-  - {first: Mariem, last: Ellouze khemekhem}
-- canonical: {first: Michael, last: Ellsworth}
-  variants:
-  - {first: Michael J., last: Ellsworth}
-- canonical: {first: Mohan, last: Zhang}
-  id: mohan-zhang-unc
-  orcid: 0009-0000-8866-7878
-  institution: University of North Carolina
-  comment: UNC
-- canonical: {first: Mohan, last: Zhang}
-  id: mohan-zhang
-  comment: May refer to several people
-- canonical: {first: David, last: Elson}
-  variants:
-  - {first: David K., last: Elson}
-- canonical: {first: Martin C., last: Emele}
-  variants:
-  - {first: Martin, last: Emele}
-- canonical: {first: Louisette, last: Emirkanian}
-  id: louisette-emirkanian
-- canonical: {first: Chantal, last: Enguehard}
-  id: chantal-enguehard
-- canonical: {first: Mark, last: Epstein}
-  id: mark-epstein
-- canonical: {first: Adoram, last: Erell}
-  id: adoram-erell
-- canonical: {first: Tomaž, last: Erjavec}
-  variants:
-  - {first: Tomaz, last: Erjavec}
-- canonical: {first: Gunes, last: Erkan}
-  variants:
-  - {first: Güneş, last: Erkan}
-- canonical: {first: Gülşen, last: Eryiğit}
-  variants:
-  - {first: Gülşen, last: Eryiǧit}
-- canonical: {first: Mahbaneh, last: Eshaghzadeh Torbati}
-  variants:
-  - {first: Mahbaneh, last: Eshaghzadeh}
-- canonical: {first: Iris, last: Eshkol}
-  variants:
-  - {first: Iris, last: Eshkol-Taravella}
-- canonical: {first: Salvador, last: España}
-  id: salvador-espana
-- canonical: {first: Luis, last: Espinosa Anke}
-  variants:
-  - {first: Luis, last: Espinosa-Anke}
-  - {first: Luis Espinosa, last: Anke}
-- canonical: {first: Miquel, last: Esplà-Gomis}
-  variants:
-  - {first: Miquel, last: Esplà}
-- canonical: {first: Dominique, last: Estival}
-  id: dominique-estival
-- canonical: {first: David A., last: Evans}
-  variants:
-  - {first: David Andreoff, last: Evans}
-- canonical: {first: David K., last: Evans}
-  variants:
-  - {first: David, last: Evans}
-  - {first: David Kirk, last: Evans}
-- canonical: {first: Edmund Grimley, last: Evans}
-  variants:
-  - {first: Edmund, last: Grimley-Evans}
-- canonical: {first: Richard, last: Evans}
-  id: richard-evans
-- canonical: {first: Roger, last: Evans}
-  id: roger-evans
-- canonical: {first: Martha, last: Evens}
-  variants:
-  - {first: Martha W., last: Evens}
-  - {first: Martha W, last: Evens}
-- canonical: {first: Stephanie S., last: Everett}
-  variants:
-  - {first: Stephanie, last: Everett}
-- canonical: {first: Lindsay J., last: Evett}
-  id: lindsay-j-evett
-- canonical: {first: Chandra Kiran Reddy, last: Evuru}
-  variants:
-  - {first: Chandra Kiran, last: Evuru}
-- canonical: {first: Frank Van, last: Eynde}
-  variants:
-  - {first: Frank, last: van Eynde}
-  - {first: Frank, last: Van Eynde}
-- canonical: {first: Nerea, last: Ezeiza}
-  id: nerea-ezeiza
-- canonical: {first: Cécile, last: Fabre}
-  variants:
-  - {first: Cecile, last: Fabre}
-- canonical: {first: Karoly, last: Fabricz}
-  id: karoly-fabricz
-- canonical: {first: Marcos Didonet Del, last: Fabro}
-  variants:
-  - {first: Marcus Didonet, last: Del Fabro}
-- canonical: {first: Hakimeh, last: Fadaee}
-  variants:
-  - {first: Hakimeh, last: Fadaei}
-- canonical: {first: Cédrick, last: Fairon}
-  variants:
-  - {first: Cedrick, last: Fairon}
-- canonical: {first: Nikos, last: Fakotakis}
-  id: nikos-fakotakis
-  variants:
-  - {first: Nikos D., last: Fakotakis}
-- canonical: {first: Agnieszka, last: Falenska}
-  variants:
-  - {first: Agnieszka, last: Faleńska}
-- canonical: {first: Shixi, last: Fan}
-  variants:
-  - {first: ShiXi, last: Fan}
-- canonical: {first: Alex Chengyu, last: Fang}
-  variants:
-  - {first: Alex C., last: Fang}
-- canonical: {first: M. Amin, last: Farajian}
-  variants:
-  - {first: Mohammad Amin, last: Farajian}
-- canonical: {first: Richárd, last: Farkas}
-  variants:
-  - {first: Richard, last: Farkas}
-- canonical: {first: Javier, last: Farreres}
-  variants:
-  - {first: Xavier, last: Farreres}
-- canonical: {first: Tanveer A., last: Faruquie}
-  variants:
-  - {first: Tanveer, last: Faruquie}
-  - {first: Tanveer A, last: Faruquie}
-- canonical: {first: David, last: Farwell}
-  id: david-farwell
-- canonical: {first: Nicolas R., last: Fauceglia}
-  variants:
-  - {first: Nicolas, last: Fauceglia}
-- canonical: {first: Benoit, last: Favre}
-  variants:
-  - {first: Benoît, last: Favre}
-- canonical: {first: Steven, last: Feiner}
-  variants:
-  - {first: Steven K., last: Feiner}
-- canonical: {first: Laurie, last: Feldman}
-  variants:
-  - {first: Laurie Beth, last: Feldman}
-- canonical: {first: Naomi, last: Feldman}
-  variants:
-  - {first: Naomi H., last: Feldman}
-- canonical: {first: Laszlo, last: Felfoldi}
-  variants:
-  - {first: László, last: Felföldi}
-- canonical: {first: Ariani Di, last: Felippo}
-  variants:
-  - {first: Ariani, last: Di-Felippo}
-  - {first: Ariani, last: Di Felippo}
-- canonical: {first: Valéria Delisandra, last: Feltrim}
-  variants:
-  - {first: Valéria, last: Feltrim}
-  - {first: Valéria D., last: Feltrim}
-- canonical: {first: Fangfang, last: Feng}
-  id: fangfang-feng
-- canonical: {first: Jens Erik, last: Fenstad}
-  variants:
-  - {first: Jens-Erik, last: Fenstad}
-- canonical: {first: Eraldo, last: Fernandes}
-  variants:
-  - {first: Eraldo Rezende, last: Fernandes}
-- canonical: {first: Ana, last: Fernandez}
-  variants:
-  - {first: Ana Fernández, last: Montraveta}
-  - {first: Ana, last: Fernández-Montraveta}
-- canonical: {first: Ramón, last: Fernandez Astudillo}
-  variants:
-  - {first: Ramón, last: Astudillo}
-  - {first: Ramón, last: F. Astudillo}
-  - {first: Ramon, last: F. Astudillo}
-- canonical: {first: Diego, last: Fernandez Slezak}
-  variants:
-  - {first: Diego, last: Fernández Slezak}
-- canonical: {first: Raquel, last: Fernández}
-  variants:
-  - {first: Raquel, last: Fernandez}
-- canonical: {first: Antonio, last: Fernández Orquín}
-  variants:
-  - {first: Antonio, last: Fernandez Orquín}
-  - {first: Antonio, last: Fernández-Orquín}
-  - {first: Antonio, last: Fernández}
-- canonical: {first: David, last: Fernández-Amorós}
-  variants:
-  - {first: David, last: Fernández}
-  - {first: David, last: Férnandez-Amorós}
-- canonical: {first: Fernando, last: Fernández-Martínez}
-  variants:
-  - {first: Fernando Fernández, last: Martínez}
-- canonical: {first: Stéphane, last: Ferrari}
-  variants:
-  - {first: Stephane, last: Ferrari}
-- canonical: {first: Kathleen, last: Ferraro}
-  variants:
-  - {first: Kathleen, last: Ferrara}
-- canonical: {first: Antonio, last: Ferrández}
-  id: antonio-ferrandez
-  variants:
-  - {first: Antonio, last: Ferrandez}
-- canonical: {first: Óscar, last: Ferrández}
-  variants:
-  - {first: Oscar, last: Ferrandez}
-  - {first: Oscar, last: Ferrández}
-- canonical: {first: Gaëlle, last: Ferré}
-  variants:
-  - {first: Gaelle, last: Ferré}
-- canonical: {first: Daniel, last: Ferrés}
-  variants:
-  - {first: Dani, last: Ferrés}
-- canonical: {first: Hanne, last: Fersøe}
-  id: hanne-fersoe
-  variants:
-  - {first: Hanne, last: Fersoe}
-- canonical: {first: Charles J., last: Fillmore}
-  variants:
-  - {first: Charles, last: Fillmore}
-- canonical: {first: Maria José B., last: Finatto}
-  variants:
-  - {first: Maria José, last: Finatto}
-  - {first: Maria José Bocorny, last: Finatto}
-- canonical: {first: Alex, last: Fine}
-  variants:
-  - {first: Alex B., last: Fine}
-- canonical: {first: Linda, last: Fineman}
-  id: linda-fineman
-- canonical: {first: Tim, last: Finin}
-  variants:
-  - {first: Timothy W., last: Finin}
-- canonical: {first: Pamela E., last: Fink}
-  id: pamela-e-fink
-- canonical: {first: Jenny Rose, last: Finkel}
-  variants:
-  - {first: Jenny, last: Finkel}
-- canonical: {first: Mark, last: Finlayson}
-  variants:
-  - {first: Mark A., last: Finlayson}
-- canonical: {first: Gregory, last: Finley}
-  variants:
-  - {first: Greg, last: Finley}
-- canonical: {first: Therese, last: Firmin}
-  variants:
-  - {first: Therese Firmin, last: Hand}
-- canonical: {first: Jonathan G., last: Fiscus}
-  id: jonathan-g-fiscus
-  variants:
-  - {first: Jonathan C., last: Fiscus}
-  - {first: Jonathan, last: Fiscus}
-- canonical: {first: David, last: Fisher}
-  id: david-fisher
-- canonical: {first: William M., last: Fisher}
-  id: william-m-fisher
-  variants:
-  - {first: William, last: Fisher}
-- canonical: {first: Xin, last: Xu}
-  id: xin-xu-ucsd
-  orcid: 0000-0001-5238-0955
-  comment: UCSD
-  institution: UC San Diego
-- canonical: {first: Xin, last: Xu}
-  id: xin-xu
-  comment: May refer to multiple people
-- canonical: {first: Sisay, last: Fissaha Adafre}
-  variants:
-  - {first: Sisay, last: Fissaha}
-  - {first: Sisay Fissaha, last: Adafre}
-- canonical: {first: Eileen, last: Fitzpatrick}
-  id: eileen-fitzpatrick
-- canonical: {first: James L., last: Flanagan}
-  id: james-l-flanagan
-- canonical: {first: Sébastien, last: Flavier}
-  variants:
-  - {first: Sebastien, last: Flavier}
-- canonical: {first: Iuliana Alexandra, last: Fleşcan-Lovin-Arseni}
-  variants:
-  - {first: Iuliana Alexandra, last: Fleșcan-Lovin-Arseni}
-  - {first: Iuliana-Alexandra, last: Flescan-Lovin-Arseni}
-- canonical: {first: Dan, last: Flickinger}
-  id: dan-flickinger
-  variants:
-  - {first: Daniel, last: Flickinger}
-  - {first: Daniel P., last: Flickinger}
-- canonical: {first: Radu, last: Florian}
-  id: radu-florian
-- canonical: {first: Christian, last: Fluhr}
-  id: christian-fluhr
-- canonical: {first: Achille, last: Fokoue-Nkoutche}
-  variants:
-  - {first: Achille, last: Fokoue}
-- canonical: {first: Helka, last: Folch}
-  id: helka-folch
-- canonical: {first: Peter, last: Foltz}
-  variants:
-  - {first: Peter W., last: Foltz}
-- canonical: {first: José A. R., last: Fonollosa}
-  variants:
-  - {first: Jose A., last: R. Fonollosa}
-  - {first: José A.R., last: Fonollosa}
-  - {first: José A., last: R. Fonollosa}
-  - {first: Jose A. R., last: Fonollosa}
-- canonical: {first: Erick, last: Fonseca}
-  variants:
-  - {first: Erick Rocha, last: Fonseca}
-  - {first: Erick R., last: Fonseca}
-- canonical: {first: Evandro B., last: Fonseca}
-  variants:
-  - {first: Evandro, last: Fonseca}
-- canonical: {first: Ariadna, last: Font Llitjós}
-  variants:
-  - {first: Ariadna, last: Font-Llitjos}
-  - {first: Ariadna, last: Font Llitjos}
-- canonical: {first: Josep Maria, last: Fontana}
-  variants:
-  - {first: Josep, last: Fontana}
-- canonical: {first: Kate, last: Forbes-Riley}
-  variants:
-  - {first: Kate, last: Forbes}
-  - {first: Katherine, last: Forbes-Riley}
-  - {first: Katherine, last: Forbes}
-  - {first: Katherine, last: Forbes Riley}
-- canonical: {first: Kenneth, last: Forbus}
-  variants:
-  - {first: Kenneth D., last: Forbus}
-- canonical: {first: Mikel L., last: Forcada}
-  variants:
-  - {first: Mikel, last: Forcada}
-- canonical: {first: Cameron Shaw, last: Fordyce}
-  variants:
-  - {first: Cameron, last: Fordyce}
-- canonical: {first: Lluis, last: Formiga}
-  variants:
-  - {first: Lluís, last: Formiga}
-- canonical: {first: David, last: Forsyth}
-  variants:
-  - {first: David A., last: Forsyth}
-- canonical: {first: Corina, last: Forăscu}
-  variants:
-  - {first: Corina, last: Forascu}
-- canonical: {first: Eric, last: Fosler-Lussier}
-  variants:
-  - {first: J. Eric, last: Fosler}
-  - {first: Eric, last: Fosler}
-- canonical: {first: Victoria, last: Fossum}
-  variants:
-  - {first: Victoria Li, last: Fossum}
-- canonical: {first: Dean, last: Foster}
-  variants:
-  - {first: Dean P., last: Foster}
-- canonical: {first: Mary Ellen, last: Foster}
-  variants:
-  - {first: Mary E., last: Foster}
-- canonical: {first: Kilian A., last: Foth}
-  variants:
-  - {first: Kilian, last: Foth}
-- canonical: {first: Stavroula-Evita, last: Fotinea}
-  id: stavroula-evita-fotinea
-- canonical: {first: Christophe, last: Fouqueré}
-  id: christophe-fouquere
-- canonical: {first: Sébastien, last: Fournier}
-  variants:
-  - {first: Sebastien, last: Fournier}
-- canonical: {first: Heidi, last: Fox}
-  variants:
-  - {first: Heidi J., last: Fox}
-- canonical: {first: Jean E., last: Fox Tree}
-  variants:
-  - {first: Jean Fox, last: Tree}
-  - {first: Jean, last: Fox Tree}
-- canonical: {first: Michael C., last: Frank}
-  variants:
-  - {first: Michael, last: Frank}
-- canonical: {first: Stefan L., last: Frank}
-  variants:
-  - {first: Stefan, last: Frank}
-- canonical: {first: Alexander, last: Franz}
-  variants:
-  - {first: Alexander M., last: Franz}
-- canonical: {first: Claire, last: François}
-  variants:
-  - {first: Claire, last: Francois}
-- canonical: {first: Chaoqun, last: Liu}
-  id: chaoqun-liu-ntu
-  orcid: 0000-0001-8014-2516
-  institution: Nanyang Technological University
-  comment: NTU
-- canonical: {first: Chaoqun, last: Liu}
-  id: chaoqun-liu
-  comment: May refer to several people
-- canonical: {first: Alexander, last: Fraser}
-  variants:
-  - {first: Alex, last: Fraser}
-- canonical: {first: Kathleen C., last: Fraser}
-  variants:
-  - {first: Kathleen, last: Fraser}
-- canonical: {first: Norman M., last: Fraser}
-  variants:
-  - {first: Norman, last: Fraser}
-- canonical: {first: Elisabeth, last: Frasnelli}
-  id: elisabeth-frasnelli
-- canonical: {first: Zuzana, last: Fraterova}
-  variants:
-  - {first: Zuzana, last: Fráterová}
-- canonical: {first: Robert, last: Frederking}
-  variants:
-  - {first: Robert E., last: Frederking}
-- canonical: {first: Dayne, last: Freitag}
-  id: dayne-freitag
-- canonical: {first: André, last: Freitas}
-  variants:
-  - {first: Andre, last: Freitas}
-- canonical: {first: Cláudia, last: Freitas}
-  variants:
-  - {first: Claudia, last: Freitas}
-- canonical: {first: Karin, last: Friberg Heppin}
-  variants:
-  - {first: Karin Friberg, last: Heppin}
-  - {first: Karin, last: Friberg}
-- canonical: {first: Carol, last: Friedman}
-  id: carol-friedman
-- canonical: {first: Richard, last: Fritzson}
-  variants:
-  - {first: Rich, last: Fritzson}
-- canonical: {first: Sónia, last: Frota}
-  id: sonia-frota
-- canonical: {first: Eva, last: Fucikova}
-  variants:
-  - {first: Eva, last: Fučíková}
-- canonical: {first: Maria, last: Fuentes}
-  variants:
-  - {first: Maria, last: Fuentes Fort}
-- canonical: {first: Jun’ichi, last: Fukumoto}
-  variants:
-  - {first: Junichi, last: Fukumoto}
-- canonical: {first: Shun-ya, last: Fukunaga}
-  variants:
-  - {first: Shunya, last: Fukunaga}
-- canonical: {first: Sean A., last: Fulop}
-  variants:
-  - {first: Sean, last: Fulop}
-- canonical: {first: Sadaoki, last: Furui}
-  id: sadaoki-furui
-- canonical: {first: Robert P., last: Futrelle}
-  variants:
-  - {first: Robert, last: Futrelle}
-- canonical: {first: Luana, last: Fǎgǎrǎşan}
-  variants:
-  - {first: Luana, last: Fagarasan}
-- canonical: {first: Kiran, last: GVR}
-  variants:
-  - {first: Kiran, last: Gvr}
-- canonical: {first: Raghu Pujitha, last: Gade}
-  variants:
-  - {first: Pujitha, last: Gade}
-- canonical: {first: Benoit, last: Gaillard}
-  variants:
-  - {first: Benoît, last: Gaillard}
-- canonical: {first: Robert, last: Gaizauskas}
-  id: robert-gaizauskas
-  variants:
-  - {first: Robert J., last: Gaizauskas}
-  - {first: Rob, last: Gaizauskas}
-- canonical: {first: Nuria, last: Gala}
-  variants:
-  - {first: Núria, last: Gala}
-  - {first: Nùria, last: Gala}
-- canonical: {first: Dimitrios, last: Galanis}
-  variants:
-  - {first: Dimitris, last: Galanis}
-- canonical: {first: William A., last: Gale}
-  variants:
-  - {first: William, last: Gale}
-- canonical: {first: Stephen L., last: Gallant}
-  variants:
-  - {first: Stephen, last: Gallant}
-- canonical: {first: Ascension, last: Gallardo-Antolin}
-  variants:
-  - {first: Ascension, last: Gallardo}
-- canonical: {first: Sylvain, last: Galliano}
-  id: sylvain-galliano
-- canonical: {first: Björn, last: Gambäck}
-  variants:
-  - {first: Bjorn, last: Gamback}
-  - {first: Björn, last: Gämback}
-- canonical: {first: Iñaki, last: Gaminde}
-  id: inaki-gaminde
-- canonical: {first: Kok Wee, last: Gan}
-  variants:
-  - {first: Kok-Wee, last: Gan}
-- canonical: {first: Surya, last: Ganesh}
-  variants:
-  - {first: Surya Ganesh, last: V}
-  - {first: Surya Ganesh, last: Veeravalli}
-- canonical: {first: Barathi, last: Ganesh H. B.}
-  variants:
-  - {first: Barathi, last: Ganesh HB}
-- canonical: {first: Vikas, last: Ganjigunte Ashok}
-  variants:
-  - {first: Vikas, last: Ashok}
-- canonical: {first: Helena Hong, last: Gao}
-  variants:
-  - {first: Helena, last: Gao}
-- canonical: {first: Zhao Ming, last: Gao}
-  variants:
-  - {first: Zhao-Ming, last: Gao}
-  - {first: Zhao-ming, last: Gao}
-- canonical: {first: Radovan, last: Garabík}
-  variants:
-  - {first: Radovan, last: Garabik}
-- canonical: {first: Fernando, last: Garcia}
-  variants:
-  - {first: Fernando, last: García-Granada}
-  - {first: Fernando, last: García}
-- canonical: {first: Marie-Neige, last: Garcia}
-  id: marie-neige-garcia
-- canonical: {first: Jorge, last: Garcia Flores}
-  variants:
-  - {first: Jorge, last: García Flores}
-  - {first: Jorge J., last: García Flores}
-- canonical: {first: Alberto, last: Garcia-Duran}
-  variants:
-  - {first: Alberto, last: García-Durán}
-- canonical: {first: Carmen, last: Garcia-Mateo}
-  variants:
-  - {first: Carmen, last: García-Mateo}
-- canonical: {first: Gonçal V., last: Garcés Díaz-Munío}
-  orcid: 0000-0002-2594-5858
-- canonical: {first: Mar, last: García}
-  id: mar-garcia
-- canonical: {first: José M., last: García Miguel}
-  variants:
-  - {first: José M., last: García-Miguel}
-- canonical: {first: Marcos, last: García Salido}
-  variants:
-  - {first: Marcos, last: García-Salido}
-- canonical: {first: Miguel Ángel, last: García-Cumbreras}
-  variants:
-  - {first: M. Ángel, last: García}
-  - {first: Miguel, last: García-Cumbreras}
-  - {first: Miguel Á., last: García Cumbreras}
-- canonical: {first: Mercedes, last: García-Martínez}
-  variants:
-  - {first: Mercedes García, last: Martínez}
-- canonical: {first: Ana, last: García-Serrano}
-  variants:
-  - {first: Ana M., last: García-Serrano}
-- canonical: {first: Diogo, last: Glória-Silva}
-  orcid: 0000-0002-4420-7455
-  institution: NOVA University of Lisbon - School of Science and Technology
-  variants:
-  - {first: Diogo F. C., last: Silva}
-- canonical: {first: Ismael, last: García-Varea}
-  variants:
-  - {first: Ismael García, last: Varea}
-  - {first: Ismael, last: García Varea}
-- canonical: {first: Manuel, last: García-Vega}
-  variants:
-  - {first: Manuel, last: García}
-- canonical: {first: Roberto, last: Garigliano}
-  id: roberto-garigliano
-- canonical: {first: John S., last: Garofolo}
-  id: john-s-garofolo
-  variants:
-  - {first: John, last: Garofolo}
-- canonical: {first: Juan María, last: Garrido}
-  variants:
-  - {first: Juan Maria, last: Garrido}
-- canonical: {first: Marta, last: Garrote-Salazar}
-  variants:
-  - {first: Marta, last: Garrote}
-- canonical: {first: Paul H., last: Garthwaite}
-  variants:
-  - {first: Paul, last: Garthwaite}
-  - {first: Paul H, last: Garthwaite}
-- canonical: {first: E. Gabriela, last: Garza}
-  variants:
-  - {first: Gabriela, last: Garza}
-- canonical: {first: Aina, last: Garí Soler}
-  variants:
-  - {first: Aina Garí, last: Soler}
-- canonical: {first: Milica, last: Gasic}
-  variants:
-  - {first: Milica, last: Gašić}
-- canonical: {first: Donna, last: Gates}
-  variants:
-  - {first: Donna M., last: Gates}
-- canonical: {first: Maíra, last: Gatti}
-  variants:
-  - {first: Maira, last: Gatti}
-- canonical: {first: Eric, last: Gaussier}
-  variants:
-  - {first: Éric, last: Gaussier}
-- canonical: {first: Akash Kumar, last: Gautam}
-  variants:
-  - {first: Akash, last: Gautam}
-- canonical: {first: Gauri Shankar, last: Gautam}
-  variants:
-  - {first: Gauri, last: S. Gautam}
-- canonical: {first: Marsal, last: Gavalda}
-  variants:
-  - {first: Marsal, last: Gavaldà}
-- canonical: {first: Maria, last: Gavrilidou}
-  id: maria-gavrilidou
-- canonical: {first: Jean Mark, last: Gawron}
-  id: jean-mark-gawron
-  variants:
-  - {first: Mark, last: Gawron}
-  - {first: J. Mark, last: Gawron}
-- canonical: {first: Barbara, last: Gawronska}
-  variants:
-  - {first: Barbara, last: Gawronska-Werngren}
-  - {first: Barbara, last: Gawrońska-Werngren}
-- canonical: {first: Claudia, last: Gdaniec}
-  id: claudia-gdaniec
-- canonical: {first: Binyam Gebrekidan, last: Gebre}
-  variants:
-  - {first: Binyam, last: Gebre}
-- canonical: {first: T. V., last: Geetha}
-  variants:
-  - {first: Geetha, last: T V}
-  - {first: T V, last: Geetha}
-- canonical: {first: Maayan, last: Geffet}
-  variants:
-  - {first: Maayan, last: Zhitomirsky-Geffet}
-- canonical: {first: Johanna, last: Geiß}
-  variants:
-  - {first: Johanna, last: Geiss}
-- canonical: {first: Alexander, last: Gelbukh}
-  variants:
-  - {first: Alexander F., last: Gelbukh}
-- canonical: {first: Debela Tesfaye, last: Gemechu}
-  variants:
-  - {first: Debela, last: Tesfaye}
-- canonical: {first: Jort Florent, last: Gemmeke}
-  variants:
-  - {first: Jort F., last: Gemmeke}
-  - {first: Jort, last: Gemmeke}
-- canonical: {first: Cédric, last: Gendrot}
-  variants:
-  - {first: Cedric, last: Gendrot}
-- canonical: {first: Edouard, last: Geoffrois}
-  id: edouard-geoffrois
-- canonical: {first: Lucas, last: Georges Gabriel Charpentier}
-  variants:
-  - {first: Lucas, last: Charpentier}
-- canonical: {first: Panayiotis, last: Georgiou}
-  variants:
-  - {first: Panayiotis G., last: Georgiou}
-- canonical: {first: Matthew, last: Gerber}
-  variants:
-  - {first: Matt, last: Gerber}
-  - {first: Matthew S., last: Gerber}
-  - {first: Matthew, last: Garber}
-- canonical: {first: Abigail S., last: Gertner}
-  variants:
-  - {first: Abigail, last: Gertner}
-- canonical: {first: Pablo, last: Gervás}
-  id: pablo-gervas
-- canonical: {first: Gholamreza, last: Ghassem-Sani}
-  variants:
-  - {first: Gholamreza, last: Ghassem-sani}
-  - {first: Gholamreza, last: Ghasem-Sani}
-- canonical: {first: Samik, last: Ghosh}
-  variants:
-  - {first: Samik, last: Gosh}
-- canonical: {first: Soumya Sankar, last: Ghosh}
-  variants:
-  - {first: Soumya, last: Ghosh}
-- canonical: {first: Egidio, last: Giachin}
-  id: egidio-giachin
-- canonical: {first: Daniela, last: Gifu}
-  variants:
-  - {first: Daniela, last: Gîfu}
-- canonical: {first: Helen M., last: Gigley}
-  variants:
-  - {first: Helen, last: Gigley}
-- canonical: {first: Luca, last: Gilardoni}
-  id: luca-gilardoni
-- canonical: {first: Laurent, last: Gillard}
-  id: laurent-gillard
-- canonical: {first: Dan, last: Gillick}
-  variants:
-  - {first: Daniel, last: Gillick}
-- canonical: {first: Laurence, last: Gillick}
-  variants:
-  - {first: Laurence S., last: Gillick}
-- canonical: {first: Jesús, last: Giménez}
-  variants:
-  - {first: Jesus, last: Gimenez}
-- canonical: {first: Mireia, last: Ginestí-Rosell}
-  variants:
-  - {first: Mireia, last: Ginestí Rosell}
-- canonical: {first: Alexandru-Lucian, last: Ginsca}
-  variants:
-  - {first: Alexandru, last: Ginsca}
-  - {first: Alexandru-Lucian, last: Gînscă}
-- canonical: {first: Voula, last: Giouli}
-  id: voula-giouli
-- canonical: {first: Emiliano, last: Giovannetti}
-  variants:
-  - {first: Emiliano, last: Giovanetti}
-- canonical: {first: Joan, last: Giralt Duran}
-  variants:
-  - {first: Joan Giralt, last: Duran}
-- canonical: {first: Christian, last: Girardi}
-  id: christian-girardi
-- canonical: {first: Roxana, last: Girju}
-  variants:
-  - {first: Roxana, last: Gîrju}
-- canonical: {first: Herbert, last: Gish}
-  variants:
-  - {first: Herb, last: Gish}
-- canonical: {first: Claudio, last: Giuliano}
-  id: claudio-giuliano
-- canonical: {first: Sheila R., last: Glasbey}
-  id: sheila-r-glasbey
-  variants:
-  - {first: Sheila, last: Glasbey}
-- canonical: {first: James, last: Glass}
-  variants:
-  - {first: James R., last: Glass}
-- canonical: {first: Michael, last: Glass}
-  variants:
-  - {first: Michael R., last: Glass}
-- canonical: {first: Meghan, last: Glenn}
-  variants:
-  - {first: Meghan Lammie, last: Glenn}
-- canonical: {first: Alfio, last: Gliozzo}
-  variants:
-  - {first: Alfio, last: Massimiliano Gliozzo}
-  - {first: Alfio Massimiliano, last: Gliozzo}
-  - {first: Alfio M., last: Gliozzo}
-- canonical: {first: Daniele, last: Godard}
-  variants:
-  - {first: Danièle, last: Godard}
-- canonical: {first: Guenther, last: Goerz}
-  id: guenther-goerz
-- canonical: {first: Sebastian, last: Goeser}
-  id: sebastian-goeser
-- canonical: {first: Chooi-Ling, last: Goh}
-  variants:
-  - {first: Chooi Ling, last: Goh}
-- canonical: {first: Koldo, last: Gojenola}
-  id: koldo-gojenola
-  variants:
-  - {first: Koldobika, last: Gojenola}
-  - {first: Koldo, last: Gojenola Galletebeitia}
-- canonical: {first: Adele, last: Goldberg}
-  variants:
-  - {first: Adele E., last: Goldberg}
-- canonical: {first: Andrew B., last: Goldberg}
-  variants:
-  - {first: Andrew, last: Goldberg}
-- canonical: {first: Eli, last: Goldberg}
-  id: eli-goldberg
-- canonical: {first: Jade, last: Goldstein}
-  variants:
-  - {first: Jade, last: Goldstein-Stewart}
-- canonical: {first: Sharon, last: Goldwater}
-  variants:
-  - {first: Sharon J., last: Goldwater}
-- canonical: {first: Sujatha Das, last: Gollapalli}
-  variants:
-  - {first: Sujatha, last: Das Gollapalli}
-  - {first: Sujatha, last: Das}
-- canonical: {first: Helena, last: Gomez}
-  variants:
-  - {first: Helena, last: Gómez}
-- canonical: {first: Jose Maria, last: Gomez-Hidalgo}
-  variants:
-  - {first: Jose Maria Gomez, last: Hidalgo}
-  - {first: José M. Gómez, last: Hidalgo}
-- canonical: {first: Junping, last: Gong}
-  variants:
-  - {first: Jun-ping, last: Gong}
-- canonical: {first: Zhengxian, last: Gong}
-  variants:
-  - {first: ZhengXian, last: Gong}
-- canonical: {first: Graciela, last: Gonzalez}
-  variants:
-  - {first: Graciela, last: Gonzalez-Hernandez}
-- canonical: {first: Meritxell, last: Gonzàlez}
-  id: meritxell-gonzalez
-  variants:
-  - {first: Meritxell, last: González}
-- canonical: {first: Edgar, last: Gonzàlez Pellicer}
-  variants:
-  - {first: Edgar, last: Gonzàlez}
-- canonical: {first: Fabio A., last: González}
-  variants:
-  - {first: Fabio, last: González}
-- canonical: {first: Aitor, last: González-Agirre}
-  variants:
-  - {first: Aitor, last: Gonzalez-Agirre}
-- canonical: {first: Francisco Javier, last: González-Castaño}
-  variants:
-  - {first: Francisco J., last: González-Castaño}
-- canonical: {first: Ana, last: González-Ledesma}
-  variants:
-  - {first: Ana, last: Gonzalez}
-- canonical: {first: Joaquín, last: González-Rodríguez}
-  variants:
-  - {first: Joaquin, last: Gonzalez-Rodriguez}
-- canonical: {first: Jesús, last: González-Rubio}
-  variants:
-  - {first: Jesús, last: González Rubio}
-- canonical: {first: Hugo, last: Gonçalo Oliveira}
-  variants:
-  - {first: Hugo Gonçalo, last: Oliveira}
-- canonical: {first: Patricia, last: Gonçalves}
-  variants:
-  - {first: Patricia Nunes, last: Gonçalves}
-  - {first: Patrícia, last: Gonçalves}
-- canonical: {first: Teresa, last: Gonçalves}
-  variants:
-  - {first: Teresa, last: Goncalves}
-- canonical: {first: David, last: Goodine}
-  id: david-goodine
-- canonical: {first: Joshua, last: Goodman}
-  variants:
-  - {first: Joshua T., last: Goodman}
-- canonical: {first: Michael Wayne, last: Goodman}
-  variants:
-  - {first: Michael, last: Goodman}
-- canonical: {first: Noah, last: Goodman}
-  variants:
-  - {first: Noah D., last: Goodman}
-- canonical: {first: Andrew, last: Gordon}
-  variants:
-  - {first: Andrew S., last: Gordon}
-- canonical: {first: Joshua B., last: Gordon}
-  variants:
-  - {first: Joshua, last: Gordon}
-- canonical: {first: Yonael, last: Gorfu}
-  id: yonael-gorfu
-- canonical: {first: Allen L., last: Gorin}
-  variants:
-  - {first: Allen, last: Gorin}
-- canonical: {first: Philip, last: Gorinski}
-  variants:
-  - {first: Philip John, last: Gorinski}
-- canonical: {first: Matthew R., last: Gormley}
-  variants:
-  - {first: Matthew, last: Gormley}
-- canonical: {first: Genevieve, last: Gorrell}
-  id: genevieve-gorrell
-- canonical: {first: Didzis, last: Gosko}
-  variants:
-  - {first: Didzis, last: Goško}
-- canonical: {first: Thilo, last: Gotz}
-  variants:
-  - {first: Thilo, last: Götz}
-- canonical: {first: Jérôme, last: Goulian}
-  id: jerome-goulian
-- canonical: {first: Cyril, last: Goutte}
-  id: cyril-goutte
-- canonical: {first: Arthur C., last: Graesser}
-  variants:
-  - {first: Art, last: Graesser}
-  - {first: Arthur, last: Graesser}
-- canonical: {first: Joseph F., last: Grafsgaard}
-  variants:
-  - {first: Joseph, last: Grafsgaard}
-- canonical: {first: Naida, last: Graham}
-  variants:
-  - {first: Naida L., last: Graham}
-- canonical: {first: Filip, last: Gralinski}
-  variants:
-  - {first: Filip, last: Graliński}
-- canonical: {first: Ramon, last: Granell}
-  variants:
-  - {first: Ramón, last: Granell}
-- canonical: {first: Robert, last: Granville}
-  variants:
-  - {first: Robert Alan, last: Granville}
-- canonical: {first: Agustin, last: Gravano}
-  variants:
-  - {first: Agustín, last: Gravano}
-- canonical: {first: Édouard, last: Grave}
-  variants:
-  - {first: Edouard, last: Grave}
-- canonical: {first: Guillaume, last: Gravier}
-  id: guillaume-gravier
-- canonical: {first: João, last: Graça}
-  variants:
-  - {first: Joao, last: Graca}
-  - {first: João V., last: Graça}
-- canonical: {first: Jordan R., last: Green}
-  variants:
-  - {first: Jordan, last: Green}
-- canonical: {first: Matthew J., last: Green}
-  variants:
-  - {first: Matthew, last: Green}
-- canonical: {first: Nancy, last: Green}
-  variants:
-  - {first: Nancy L., last: Green}
-- canonical: {first: Stephen J., last: Green}
-  variants:
-  - {first: Stephen, last: Green}
-  - {first: Stephen J, last: Green}
-- canonical: {first: Mark A., last: Greenwood}
-  variants:
-  - {first: Mark, last: Greenwood}
-- canonical: {first: Edward, last: Grefenstette}
-  id: edward-grefenstette
-- canonical: {first: Michelle, last: Gregory}
-  id: michelle-gregory
-  variants:
-  - {first: Michelle L., last: Gregory}
-- canonical: {first: Warren, last: Greiff}
-  variants:
-  - {first: Warren R., last: Greiff}
-- canonical: {first: Thomas L., last: Griffiths}
-  variants:
-  - {first: Thomas, last: Griffiths}
-- canonical: {first: Gintarė, last: Grigonytė}
-  variants:
-  - {first: Gintare, last: Grigonyte}
-  - {first: Gintarė, last: Grigonyte}
-- canonical: {first: Ralph, last: Grishman}
-  id: ralph-grishman
-- canonical: {first: Hendrik Johannes, last: Groenewald}
-  variants:
-  - {first: Hendrik J., last: Groenewald}
-- canonical: {first: Leif, last: Groenqvist}
-  variants:
-  - {first: Leif, last: Gronqvist}
-- canonical: {first: Maria Toporowska, last: Gronostaj}
-  variants:
-  - {first: Maria, last: Toporowska Gronostaj}
-- canonical: {first: Jerneja, last: Gros}
-  variants:
-  - {first: Jerneja Žganec, last: Gros}
-- canonical: {first: Justin H., last: Gross}
-  variants:
-  - {first: Justin, last: Gross}
-- canonical: {first: Barbara J., last: Grosz}
-  variants:
-  - {first: Barbara, last: Grosz}
-- canonical: {first: Laszlo, last: Grunfeld}
-  id: laszlo-grunfeld
-- canonical: {first: Normunds, last: Gruzitis}
-  variants:
-  - {first: Normunds, last: Grūzītis}
-- canonical: {first: Nicole, last: Grégoire}
-  variants:
-  - {first: Nicole, last: Gregoire}
-- canonical: {first: Hung-Yan, last: Gu}
-  variants:
-  - {first: Hung-yan, last: Gu}
-- canonical: {first: Franz, last: Guenthner}
-  id: franz-guenthner
-- canonical: {first: Emiliano Raul, last: Guevara}
-  variants:
-  - {first: Emiliano, last: Guevara}
-- canonical: {first: Pierre, last: Guillaume}
-  id: pierre-guillaume
-- canonical: {first: Thierry, last: Guillotin}
-  id: thierry-guillotin
-- canonical: {first: Curry I., last: Guinn}
-  variants:
-  - {first: Curry, last: Guinn}
-- canonical: {first: José M., last: Guirao}
-  variants:
-  - {first: José María, last: Guirao}
-- canonical: {first: Greg, last: Gul-rajani}
-  variants:
-  - {first: Greg, last: Gulrajani}
-- canonical: {first: Omer Farukhan, last: Gunes}
-  variants:
-  - {first: Omer, last: Gunes}
-- canonical: {first: Cheng-ming, last: Guo}
-  variants:
-  - {first: Cheng Ming, last: Guo}
-- canonical: {first: Ying-Mei, last: Guo}
-  variants:
-  - {first: YingMei, last: Guo}
-- canonical: {first: Yuqing, last: Guo}
-  variants:
-  - {first: Yuqing, last: Gao}
-- canonical: {first: Zhicheng, last: Guo}
-  comment: Tsinghua
-  id: zhicheng-guo-tsinghua
-- canonical: {first: Zhicheng, last: Guo}
-  comment: xidian
-  id: zhicheng-guo-xidian
-- canonical: {first: Deepak, last: Gupta}
-  variants:
-  - {first: Deepak Kumar, last: Gupta}
-  - {first: Deepa, last: Gupta}
-- canonical: {first: Naman K., last: Gupta}
-  variants:
-  - {first: Naman, last: Gupta}
-- canonical: {first: Vineet, last: Gupta}
-  id: vineet-gupta
-- canonical: {first: Antton, last: Gurrutxaga}
-  id: antton-gurrutxaga
-- canonical: {first: Sofia, last: Gustafson-Capková}
-  variants:
-  - {first: Sofia, last: Gustafson Capková}
-- canonical: {first: Louise, last: Guthrie}
-  id: louise-guthrie
-- canonical: {first: E. Dario, last: Gutierrez}
-  variants:
-  - {first: Elkin, last: Darío Gutiérrez}
-  - {first: E. Darío, last: Gutiérrez}
-- canonical: {first: Yoan, last: Gutiérrez}
-  variants:
-  - {first: Yoan, last: Gutiérrez Vázquez}
-- canonical: {first: Gualberto A., last: Guzman}
-  variants:
-  - {first: Gualberto, last: Guzmán}
-- canonical: {first: Francisco, last: Guzmán}
-  variants:
-  - {first: Francisco, last: Guzman}
-- canonical: {first: Tibor, last: Gyimóthy}
-  id: tibor-gyimothy
-- canonical: {first: José M., last: Gómez}
-  variants:
-  - {first: José Manuel, last: Gómez}
-  - {first: Jose Manuel, last: Gómez}
-  - {first: Jose M., last: Gomez}
-- canonical: {first: Xavier, last: Gómez Guinovart}
-  variants:
-  - {first: Xavier, last: Gómez-Guinovart}
-- canonical: {first: Naiara, last: Pérez}
-  orcid: 0000-0001-8648-0428
-  institution: University of the Basque Country (UPV/EHU)
-  variants:
-  - {first: Naiara, last: Perez-Miguel}
-  - {first: Naiara, last: Miguel}
-- canonical: {first: Asunción, last: Gómez-Pérez}
-  variants:
-  - {first: Asunción Gómez, last: Pérez}
-- canonical: {first: José Manuel, last: Gómez-Pérez}
-  variants:
-  - {first: Jose Manuel, last: Gomez-Perez}
-- canonical: {first: Anne, last: Göhring}
-  variants:
-  - {first: Anne, last: Goehring}
-- canonical: {first: Memduh, last: Gökırmak}
-  variants:
-  - {first: Memduh, last: Gokirmak}
-- canonical: {first: Jana, last: Götze}
-  variants:
-  - {first: Jana, last: Goetze}
-- canonical: {first: Shachi, last: H. Kumar}
-  variants:
-  - {first: Shachi H, last: Kumar}
-- canonical: {first: Eun Young, last: Ha}
-  variants:
-  - {first: Eun, last: Ha}
-  - {first: Eun Y., last: Ha}
-- canonical: {first: Le Quan, last: Ha}
-  variants:
-  - {first: Le Q, last: Ha}
-- canonical: {first: Quang Thuy, last: Ha}
-  variants:
-  - {first: Quang-Thuy, last: Ha}
-- canonical: {first: Yaakov, last: HaCohen-Kerner}
-  variants:
-  - {first: Yaakov, last: Hacohen-Kerner}
-- canonical: {first: Anne, last: Haake}
-  variants:
-  - {first: Anne R., last: Haake}
-- canonical: {first: Salah, last: Haamid}
-  id: salah-haamid
-- canonical: {first: Andrew, last: Haas}
-  variants:
-  - {first: Andrew R., last: Haas}
-- canonical: {first: Christopher, last: Habel}
-  variants:
-  - {first: Christopher U., last: Habel}
-- canonical: {first: Benoit, last: Habert}
-  id: benoit-habert
-  variants:
-  - {first: Benoît, last: Habert}
-- canonical: {first: Kadri, last: Hacioglu}
-  id: kadri-hacioglu
-- canonical: {first: Bassam, last: Haddad}
-  id: bassam-haddad
-- canonical: {first: Nicholas J., last: Haddock}
-  variants:
-  - {first: Nicholas, last: Haddock}
-- canonical: {first: Widad Mustafa El, last: Hadi}
-  id: widad-mustafa-el-hadi
-  variants:
-  - {first: Widad Mustafa, last: El Hadi}
-  - {first: Widad, last: Mustafa El Hadi}
-- canonical: {first: Mohamed Nassime, last: Hadjadj}
-  variants:
-  - {first: Mohamed, last: Hadjadj}
-- canonical: {first: Lamia, last: Hadrich Belguith}
-  variants:
-  - {first: Lamia Hadrich, last: Belguith}
-  - {first: Lamia, last: Hadrich-Belguith}
-  - {first: Lamia, last: Belguith}
-  - {first: Lamia, last: Belguith Hadrich}
-- canonical: {first: Walter, last: Haeseryn}
-  id: walter-haeseryn
-- canonical: {first: Nazila, last: Hafezi}
-  id: nazila-hafezi
-- canonical: {first: Gholamreza, last: Haffari}
-  variants:
-  - {first: Reza, last: Haffari}
-- canonical: {first: Younggyun, last: Hahm}
-  variants:
-  - {first: YoungGyun, last: Hahm}
-- canonical: {first: Gus, last: Hahn-Powell}
-  variants:
-  - {first: Gustave, last: Hahn-Powell}
-- canonical: {first: Negacy, last: Hailu}
-  variants:
-  - {first: Negacy D., last: Hailu}
-- canonical: {first: Horst-Udo, last: Hain}
-  id: horst-udo-hain
-- canonical: {first: Jan, last: Hajic}
-  id: jan-hajic
-  similar: [jan-hajic-jr]
-  variants:
-  - {first: Jan, last: Hajič}
-- canonical: {first: Eva, last: Hajicova}
-  id: eva-hajicova
-  variants:
-  - {first: Eva, last: Hajicová}
-  - {first: Eva, last: Hajičová}
-- canonical: {first: Jan, last: Hajič jr.}
-  id: jan-hajic-jr
-  similar: [jan-hajic]
-- canonical: {first: Dilek, last: Hakkani-Tur}
-  id: dilek-hakkani-tur
-  variants:
-  - {first: Dilek, last: Hakkani-Tür}
-  - {first: Dilek Zeynep, last: Hakkani}
-- canonical: {first: John, last: Hale}
-  variants:
-  - {first: John T., last: Hale}
-- canonical: {first: Keith, last: Hall}
-  variants:
-  - {first: Keith B., last: Hall}
-- canonical: {first: Mark, last: Hall}
-  variants:
-  - {first: Mark Michael, last: Hall}
-- canonical: {first: Patrick, last: Haller}
-  id: patrick-haller-zurich
-  note: University of Zurich
-  orcid: 0000-0002-8968-7587
-- canonical: {first: Susan, last: Haller}
-  id: susan-haller
-  variants:
-  - {first: Susan M., last: Haller}
-- canonical: {first: Péter, last: Halácsy}
-  variants:
-  - {first: Péter, last: Halácsky}
-- canonical: {first: Olivier, last: Hamon}
-  id: olivier-hamon
-- canonical: {first: Thierry, last: Hamon}
-  id: thierry-hamon
-- canonical: {first: Julien, last: Hamonic}
-  id: julien-hamonic
-- canonical: {first: Chung-hye, last: Han}
-  variants:
-  - {first: Chung-Hye, last: Han}
-  - {first: Chunghye, last: Han}
-- canonical: {first: HyoJung, last: Han}
-  variants:
-  - {first: Hou Jeung, last: Han}
-- canonical: {first: Jingguang, last: Han}
-  variants:
-  - {first: Jing Guang, last: Han}
-- canonical: {first: Kenji, last: Hanakata}
-  id: kenji-hanakata
-- canonical: {first: Philip, last: Hanna}
-  id: philip-hanna
-- canonical: {first: Dorte Haltrup, last: Hansen}
-  variants:
-  - {first: Dorte H., last: Hansen}
-- canonical: {first: Silvia, last: Hansen-Schirra}
-  variants:
-  - {first: Silvia, last: Hansen}
-- canonical: {first: Sanda, last: Harabagiu}
-  variants:
-  - {first: Sanda M., last: Harabagiu}
-- canonical: {first: Robert M., last: Haralick}
-  variants:
-  - {first: Robert, last: Haralick}
-- canonical: {first: Mary, last: Harper}
-  id: mary-harper
-  variants:
-  - {first: Mary P., last: Harper}
-- canonical: {first: Phil, last: Harrison}
-  id: phil-harrison
-  variants:
-  - {first: Philip, last: Harrison}
-- canonical: {first: Anthony, last: Hartley}
-  id: anthony-hartley
-  variants:
-  - {first: Anthony F., last: Hartley}
-- canonical: {first: Matthias, last: Hartung}
-  id: matthias-hartung
-- canonical: {first: Md. Maruf, last: Hasan}
-  variants:
-  - {first: Md Maruf, last: Hasan}
-  - {first: Maruf, last: Hasan}
-- canonical: {first: Sadid A., last: Hasan}
-  variants:
-  - {first: Sadid, last: Hasan}
-- canonical: {first: Saša, last: Hasan}
-  variants:
-  - {first: Sasa, last: Hasan}
-- canonical: {first: Tatsunori B., last: Hashimoto}
-  variants:
-  - {first: Tatsunori, last: Hashimoto}
-- canonical: {first: Koiti, last: Hasida}
-  variants:
-  - {first: Kôiti, last: Hasida}
-- canonical: {first: Ahmed, last: Hassan}
-  variants:
-  - {first: Ahmed Hassan, last: Awadallah}
-- canonical: {first: Hany, last: Hassan Awadalla}
-  variants:
-  - {first: Hany, last: Hassan}
-- canonical: {first: Helen, last: Hastie}
-  variants:
-  - {first: Helen Wright, last: Hastie}
-- canonical: {first: Alexander G., last: Hauptmann}
-  variants:
-  - {first: Alex, last: Hauptmann}
-  - {first: Alexander, last: Hauptmann}
-- canonical: {first: Roland R., last: Hausser}
-  variants:
-  - {first: Roland, last: Hausser}
-- canonical: {first: Annette, last: Hautli}
-  variants:
-  - {first: Annette, last: Hautli-Janisz}
-- canonical: {first: Jiří, last: Havelka}
-  variants:
-  - {first: Jiri, last: Havelka}
-- canonical: {first: Jennifer, last: Hay}
-  variants:
-  - {first: Jennifer B., last: Hay}
-- canonical: {first: Yoshihiko, last: Hayashi}
-  id: yoshihiko-hayashi
-- canonical: {first: Cory, last: Hayes}
-  variants:
-  - {first: Cory J., last: Hayes}
-- canonical: {first: Jer, last: Hayes}
-  variants:
-  - {first: Jeremiah, last: Hayes}
-- canonical: {first: Timothy J., last: Hazen}
-  variants:
-  - {first: T. J., last: Hazen}
-- canonical: {first: Patrick, last: Healey}
-  variants:
-  - {first: Pat, last: Healey}
-  - {first: Patrick G. T., last: Healey}
-  - {first: Patrick G.T., last: Healey}
-- canonical: {first: Marti A., last: Hearst}
-  variants:
-  - {first: Marti, last: Hearst}
-- canonical: {first: Peter A., last: Heeman}
-  variants:
-  - {first: Peter, last: Heeman}
-- canonical: {first: George E., last: Heidorn}
-  id: george-e-heidorn
-- canonical: {first: Katarina, last: Heimann Mühlenbock}
-  variants:
-  - {first: Katarina, last: Mühlenbock}
-- canonical: {first: Pascale, last: Feldkamp}
-  institution: Aarhus University
-  orcid: 0000-0002-2434-4268
-  variants:
-  - {first: Pascale, last: Moreira}
-  - {first: Pascale Feldkamp, last: Moreira}
-- canonical: {first: Jindřich, last: Helcl}
-  variants:
-  - {first: Jindrich, last: Helcl}
-- canonical: {first: Randall A., last: Helzerman}
-  id: randall-a-helzerman
-- canonical: {first: Christian F., last: Hempelmann}
-  variants:
-  - {first: Christian, last: Hempelmann}
-- canonical: {first: Charles T., last: Hemphill}
-  variants:
-  - {first: Charles, last: Hemphill}
-- canonical: {first: James, last: Henderson}
-  variants:
-  - {first: James B., last: Henderson}
-- canonical: {first: John, last: Henderson}
-  variants:
-  - {first: John C., last: Henderson}
-- canonical: {first: James, last: Hendler}
-  variants:
-  - {first: James A., last: Hendler}
-- canonical: {first: Robert J., last: Hendley}
-  variants:
-  - {first: Robert, last: Hendley}
-- canonical: {first: Gary G., last: Hendrix}
-  variants:
-  - {first: Gary, last: Hendrix}
-- canonical: {first: Enrique, last: Henestroza Anguiano}
-  variants:
-  - {first: Enrique Henestroza, last: Anguiano}
-- canonical: {first: Peter Juel, last: Henrichsen}
-  variants:
-  - {first: Peter, last: Juel Henrichsen}
-- canonical: {first: Carlos, last: Henríquez}
-  variants:
-  - {first: Carlos, last: Henriquez}
-  - {first: Carlos A., last: Henríquez Q.}
-- canonical: {first: Renate, last: Henschel}
-  id: renate-henschel
-- canonical: {first: Aurélie, last: Herbelot}
-  variants:
-  - {first: Aurelie, last: Herbelot}
-- canonical: {first: Amaç, last: Herdaǧdelen}
-  variants:
-  - {first: Amaç, last: Herdağdelen}
-- canonical: {first: Myriam, last: Hernandez}
-  variants:
-  - {first: Myriam, last: Hernández A}
-  - {first: Myriam, last: Hernández}
-- canonical: {first: Daniel, last: Hernandez-Lopez}
-  variants:
-  - {first: Daniel Hernández, last: López}
-- canonical: {first: Inmaculada, last: Hernáez}
-  id: inmaculada-hernaez
-  variants:
-  - {first: Inmaculada, last: Hernaez}
-  - {first: Inma, last: Hernaez}
-  - {first: Inma, last: Hernáez}
-- canonical: {first: Gregorio, last: Hernández}
-  id: gregorio-hernandez
-  variants:
-  - {first: Gregorio, last: Hernandez}
-- canonical: {first: Luis, last: Hernández}
-  variants:
-  - {first: Luis Hernández, last: Gomez}
-  - {first: Luis Hernández, last: Gómez}
-  - {first: Luis A., last: Hernandez}
-  - {first: Luis A., last: Hernández}
-  - {first: Luis A. Hernández, last: Gómez}
-- canonical: {first: Adolfo, last: Hernández H.}
-  variants:
-  - {first: Adolfo, last: Hernández}
-- canonical: {first: John R., last: Hershey}
-  variants:
-  - {first: John, last: Hershey}
-- canonical: {first: James, last: Hieronymus}
-  id: james-hieronymus
-- canonical: {first: Almut Silja, last: Hildebrand}
-  variants:
-  - {first: Silja, last: Hildebrand}
-  - {first: Almut, last: Hildebrand}
-- canonical: {first: Lucas Welter, last: Hilgert}
-  variants:
-  - {first: Lucas, last: Hilgert}
-- canonical: {first: Robin L., last: Hill}
-  variants:
-  - {first: Robin, last: Hill}
-- canonical: {first: Dustin, last: Hillard}
-  id: dustin-hillard
-- canonical: {first: Donald, last: Hindle}
-  id: donald-hindle
-  variants:
-  - {first: Don, last: Hindle}
-- canonical: {first: Elizabeth A., last: Hinkelman}
-  variants:
-  - {first: Elizabeth, last: Hinkelman}
-- canonical: {first: Erhard, last: Hinrichs}
-  variants:
-  - {first: Erhard W., last: Hinrichs}
-- canonical: {first: Marie, last: Hinrichs}
-  variants:
-  - {first: Marie, last: Boyle-Hinrichs}
-- canonical: {first: Hideki, last: Hirakawa}
-  id: hideki-hirakawa
-- canonical: {first: Julia, last: Hirschberg}
-  variants:
-  - {first: Julia B., last: Hirschberg}
-- canonical: {first: Lynette, last: Hirschman}
-  id: lynette-hirschman
-  variants:
-  - {first: Lynette, last: Hirshman}
-- canonical: {first: Toru, last: Hitaka}
-  variants:
-  - {first: Tooru, last: Hitaka}
-- canonical: {first: Janet, last: Hitzeman}
-  id: janet-hitzeman
-- canonical: {first: Barbora, last: Hladká}
-  id: barbora-hladka
-  variants:
-  - {first: Barbora, last: Hladka}
-- canonical: {first: Bao Quoc, last: Ho}
-  variants:
-  - {first: Quoc, last: Ho}
-  - {first: Quoc, last: Ho Bao}
-- canonical: {first: Hing-cheung, last: Ho}
-  variants:
-  - {first: Hing-Cheung, last: Ho}
-- canonical: {first: Heng, last: Wang}
-  comment: University of Sydney
-  institution: University of Sydney
-  orcid: 0009-0009-5473-5751
-  id: heng-wang-sydney
-- canonical: {first: Heng, last: Wang}
-  comment: May refer to several people
-  id: heng-wang
-- canonical: {first: Tu-Bao, last: Ho}
-  variants:
-  - {first: Tu Bao, last: Ho}
-- canonical: {first: Lydia-Mai, last: Ho-Dac}
-  variants:
-  - {first: Mai, last: Ho-dac}
-- canonical: {first: Jerry R., last: Hobbs}
-  id: jerry-r-hobbs
-  variants:
-  - {first: Jerry, last: Hobbs}
-- canonical: {first: Beth Ann, last: Hockey}
-  id: beth-ann-hockey
-  variants:
-  - {first: Beth A., last: Hockey}
-  - {first: Beth, last: Hockey}
-- canonical: {first: Edward, last: Hoenkamp}
-  variants:
-  - {first: Eduard, last: Hoenkamp}
-- canonical: {first: Wolfgang, last: Hoeppner}
-  id: wolfgang-hoeppner
-- canonical: {first: Anja, last: Hoethker}
-  variants:
-  - {first: Anja, last: Höthker}
-- canonical: {first: Holger, last: Hoffmann}
-  variants:
-  - {first: Holger, last: Hoffman}
-- canonical: {first: Raphael, last: Hoffmann}
-  variants:
-  - {first: Raphael, last: Hoffman}
-- canonical: {first: Th. R., last: Hofmann}
-  variants:
-  - {first: T. R., last: Hofmann}
-- canonical: {first: Martin, last: Hofmann--Apitius}
-  variants:
-  - {first: Martin, last: Hofmann-Apitius}
-- canonical: {first: Chris, last: Hokamp}
-  variants:
-  - {first: Christopher, last: Hokamp}
-- canonical: {first: Tomáš, last: Holan}
-  variants:
-  - {first: Tomas, last: Holan}
-- canonical: {first: Natsuko, last: Holden}
-  id: natsuko-holden
-- canonical: {first: Gordana Ilić, last: Holen}
-  variants:
-  - {first: Gordana Ilic, last: Holen}
-- canonical: {first: Hsiao-Wuen, last: Hon}
-  id: hsiao-wuen-hon
-- canonical: {first: Jia-Fei, last: Hong}
-  variants:
-  - {first: Jia-Fei, last: Hung}
-- canonical: {first: Philip, last: Hoole}
-  variants:
-  - {first: Phil, last: Hoole}
-- canonical: {first: Heather, last: Horsfall}
-  id: heather-horsfall
-- canonical: {first: Tamás, last: Horváth}
-  id: tamas-horvath
-- canonical: {first: Iris, last: Hoser}
-  id: iris-hoser
-- canonical: {first: Veronique, last: Hoste}
-  variants:
-  - {first: Véronique, last: Hoste}
-- canonical: {first: Wen-Juan, last: Hou}
-  variants:
-  - {first: Wen, last: Juan Hou}
-  - {first: Juan, last: Wen}
-- canonical: {first: Eduard, last: Hovy}
-  variants:
-  - {first: Eduard H., last: Hovy}
-  - {first: Ed, last: Hovy}
-- canonical: {first: Blake, last: Howald}
-  variants:
-  - {first: Blake Stephen, last: Howald}
-- canonical: {first: David M., last: Howcroft}
-  variants:
-  - {first: David, last: Howcroft}
-- canonical: {first: Frederick M., last: Hoyt}
-  variants:
-  - {first: Frederick, last: Hoyt}
-- canonical: {first: Daniel, last: Hromada}
-  variants:
-  - {first: Daniel Devatman, last: Hromada}
-  - {first: Daniel, last: Devatman Hromada}
-- canonical: {first: Estevam R., last: 'Hruschka, Jr.'}
-  variants:
-  - {first: Estevam R., last: Hruschka Jr.}
-- canonical: {first: Hung-ting, last: Hsieh}
-  variants:
-  - {first: Hung-Ting, last: Hsieh}
-- canonical: {first: Shelley Ching-Yu, last: Hsieh}
-  variants:
-  - {first: Ching-yu, last: Hsieh}
-  - {first: Shelley Ching-yu, last: Hsieh}
-  - {first: Ching-yu Shelley, last: Hsieh}
-- canonical: {first: Shu-Kai, last: Hsieh}
-  variants:
-  - {first: Shu-kai, last: Hsieh}
-  - {first: ShuKai, last: Hsieh}
-- canonical: {first: Wen-Chi, last: Hsien}
-  variants:
-  - {first: Wen-Chi, last: Hsie}
-- canonical: {first: Bo-June (Paul), last: Hsu}
-  variants:
-  - {first: Bo-june Paul, last: Hsu}
-  - {first: Bo-June Paul, last: Hsu}
-- canonical: {first: Chun-nan, last: Hsu}
-  variants:
-  - {first: Chun-Nan, last: Hsu}
-- canonical: {first: Wen-Lian, last: Hsu}
-  variants:
-  - {first: Wen-lian, last: Hsu}
-- canonical: {first: Yu-Ling Una, last: Hsu}
-  variants:
-  - {first: Yu-Ling, last: Hsu}
-- canonical: {first: Dong Cheng, last: Hu}
-  variants:
-  - {first: Dong-Cheng, last: Hu}
-- canonical: {first: An-Ta, last: Huang}
-  variants:
-  - {first: Anta, last: Huang}
-- canonical: {first: Changning, last: Huang}
-  variants:
-  - {first: Chang-Ning, last: Huang}
-  - {first: Chang-ning, last: Huang}
-- canonical: {first: Chung-Chi, last: Huang}
-  variants:
-  - {first: Chung-chi, last: Huang}
-- canonical: {first: Degen, last: Huang}
-  variants:
-  - {first: De-Gen, last: Huang}
-- canonical: {first: Eric H., last: Huang}
-  variants:
-  - {first: Eric, last: Huang}
-- canonical: {first: Feng-Long, last: Huang}
-  variants:
-  - {first: Feng-Long, last: Hwang}
-- canonical: {first: He-Yan, last: Huang}
-  variants:
-  - {first: He-yan, last: Huang}
-  - {first: Heyan, last: Huang}
-- canonical: {first: Jin Hu, last: Huang}
-  variants:
-  - {first: JinHu, last: Huang}
-- canonical: {first: Jui Ting, last: Huang}
-  variants:
-  - {first: Jui-Ting, last: Huang}
-- canonical: {first: Lian′en, last: Huang}
-  variants:
-  - {first: Lian’en, last: Huang}
-- canonical: {first: Qi-quan, last: Huang}
-  variants:
-  - {first: Qi-Quan, last: Huang}
-- canonical: {first: Shih-Ting, last: Huang}
-  variants:
-  - {first: Shih-ting, last: Huang}
-  - {first: Shi-Ting, last: Huang}
-- canonical: {first: Shuan-fan, last: Huang}
-  variants:
-  - {first: Shuan-Fan, last: Huang}
-- canonical: {first: Ting-Hao, last: Huang}
-  variants:
-  - {first: Ting-Hao ‘Kenneth’, last: Huang}
-  - {first: Ting-Hao Kenneth, last: Huang}
-- canonical: {first: Xiangji, last: Huang}
-  variants:
-  - {first: Jimmy Xiangji, last: Huang}
-- canonical: {first: Xuan-Jing, last: Huang}
-  variants:
-  - {first: Xuan-jing, last: Huang}
-  - {first: Xuanjing, last: Huang}
-- canonical: {first: Xuedong, last: Huang}
-  id: xuedong-huang
-- canonical: {first: Jing, last: Huang}
-  id: jing-huang-stanford
-  orcid: 0000-0001-9301-9410
-  comment: May refer to many people
-- canonical: {first: Jing, last: Huang}
-  id: jing-huang
-  comment: May refer to many people
-- canonical: {first: Richard A., last: Hudson}
-  variants:
-  - {first: Richard, last: Hudson}
-- canonical: {first: Manuela, last: Huerlimann}
-  variants:
-  - {first: Manuela, last: Hürlimann}
-  - {first: Manuela, last: Huerliman}
-- canonical: {first: Mathew, last: Huerta-Enochian}
-  id: mathew-huerta-enochian
-- canonical: {first: Kevin, last: Humphreys}
-  id: kevin-humphreys
-- canonical: {first: Jeih-weih, last: Hung}
-  variants:
-  - {first: Jeih-Weih, last: Hung}
-- canonical: {first: Kate, last: Hunicke-Smith}
-  id: kate-hunicke-smith
-- canonical: {first: Dan, last: Hunter}
-  id: dan-hunter
-- canonical: {first: Lawrence, last: Hunter}
-  variants:
-  - {first: Lawrence E., last: Hunter}
-- canonical: {first: Lluís-F., last: Hurtado}
-  variants:
-  - {first: Lluís F., last: Hurtado}
-  - {first: LLuís-F., last: Hurtado}
-- canonical: {first: Mazhar Mehdi, last: Hussain}
-  variants:
-  - {first: Mazhar, last: Hussain}
-- canonical: {first: W. John, last: Hutchins}
-  variants:
-  - {first: John, last: Hutchins}
-- canonical: {first: Christian, last: Huyck}
-  id: christian-huyck
-- canonical: {first: Mei-Yuh, last: Hwang}
-  id: mei-yuh-hwang
-- canonical: {first: Sebastian G. M., last: Händschke}
-  variants:
-  - {first: Sebastian G.M., last: Händschke}
-- canonical: {first: Christian, last: Hänig}
-  variants:
-  - {first: Christian, last: Haenig}
-- canonical: {first: Harald, last: Höge}
-  id: harald-hoge
-  variants:
-  - {first: Harald, last: Hoege}
-- canonical: {first: Ali, last: Hürriyetoğlu}
-  variants:
-  - {first: Ali, last: Hurriyetoglu}
-  - {first: Ali, last: Hürriyetoǧlu}
-- canonical: {first: Fidelia, last: Ibekwe-SanJuan}
-  variants:
-  - {first: Fidelia, last: Ibekwe-Sanjuan}
-- canonical: {first: Nancy, last: Ide}
-  variants:
-  - {first: Nancy M., last: Ide}
-- canonical: {first: Carlos A., last: Iglesias}
-  variants:
-  - {first: Carlos, last: Iglesias}
-- canonical: {first: Suzana, last: Ilic}
-  variants:
-  - {first: Suzana, last: Ilić}
-- canonical: {first: Sathish Reddy, last: Indurthi}
-  variants:
-  - {first: Sathish, last: Reddy}
-  - {first: Sathish, last: Indurthi}
-- canonical: {first: Anton Karl, last: Ingason}
-  variants:
-  - {first: Anton K., last: Ingason}
-- canonical: {first: Robert, last: Ingria}
-  id: robert-ingria
-- canonical: {first: Diana, last: Inkpen}
-  variants:
-  - {first: Diana Zaiu, last: Inkpen}
-  - {first: Diana, last: Zaiu}
-- canonical: {first: Leonid, last: Iomdin}
-  variants:
-  - {first: Leonid L., last: Iomdin}
-- canonical: {first: Molly, last: Ireland}
-  variants:
-  - {first: Molly E., last: Ireland}
-- canonical: {first: José, last: Iria}
-  variants:
-  - {first: Jose, last: Iria}
-- canonical: {first: Mikel, last: Iruskieta}
-  id: mikel-iruskieta
-- canonical: {first: Anas El, last: Isbihani}
-  variants:
-  - {first: Anas, last: El Isbihani}
-- canonical: {first: Masato, last: Ishizaki}
-  id: masato-ishizaki
-- canonical: {first: Aminul, last: Islam}
-  variants:
-  - {first: Md. Aminul, last: Islam}
-- canonical: {first: Zahurul, last: Islam}
-  variants:
-  - {first: Zahrul, last: Islam}
-- canonical: {first: Rezarta, last: Islamaj Dogan}
-  variants:
-  - {first: Rezarta, last: Islamaj Doğan}
-- canonical: {first: David, last: Israel}
-  variants:
-  - {first: David J., last: Israel}
-- canonical: {first: Shuichi, last: Itahashi}
-  variants:
-  - {first: Shuich, last: Itahashi}
-- canonical: {first: Yukihiro, last: Itoh}
-  variants:
-  - {first: Yukihiro, last: Ito}
-- canonical: {first: Abe, last: Ittycheriah}
-  id: abe-ittycheriah
-- canonical: {first: Un-Gian, last: Iunn}
-  variants:
-  - {first: Un-gian, last: Iun}
-  - {first: Ún-giân, last: Iû}
-- canonical: {first: Alexei V., last: Ivanov}
-  variants:
-  - {first: Alexei, last: Ivanov}
-- canonical: {first: Krasimira, last: Ivanova}
-  variants:
-  - {first: Krassimira, last: Ivanova}
-- canonical: {first: Lucja, last: Iwanska}
-  variants:
-  - {first: Lucja M., last: Iwanska}
-- canonical: {first: Shun’ya, last: Iwasawa}
-  variants:
-  - {first: Shunya, last: Iwasawa}
-- canonical: {first: Rubén, last: Izquierdo}
-  variants:
-  - {first: Ruben, last: Izquierdo Bevia}
-  - {first: Ruben, last: Izquierdo}
-- canonical: {first: Litton, last: J Kurisinkel}
-  variants:
-  - {first: Litton J, last: Kurisinkel}
-- canonical: {first: Eric, last: Jackson}
-  id: eric-jackson
-- canonical: {first: Cassandra L., last: Jacobs}
-  variants:
-  - {first: Cassandra, last: Jacobs}
-- canonical: {first: Paul S., last: Jacobs}
-  id: paul-s-jacobs
-  variants:
-  - {first: Paul, last: Jacobs}
-- canonical: {first: T. Florian, last: Jaeger}
-  variants:
-  - {first: Florian, last: Jaeger}
-- canonical: {first: Somayeh, last: Jafaritazehjani}
-  variants:
-  - {first: Somayeh, last: Jafaritazehjan}
-- canonical: {first: Abhyuday, last: Jagannatha}
-  variants:
-  - {first: Abhyuday N, last: Jagannatha}
-- canonical: {first: Michael E., last: Jahr}
-  variants:
-  - {first: Michael, last: Jahr}
-- canonical: {first: Brage Ekroll, last: Jahren}
-  variants:
-  - {first: Brage, last: Jahren}
-- canonical: {first: Siddharth, last: Jain}
-  variants:
-  - {first: Siddhanth, last: Jain}
-- canonical: {first: Primož, last: Jakopin}
-  variants:
-  - {first: Primoz, last: Jakopin}
-- canonical: {first: Anthony, last: Jameson}
-  id: anthony-jameson
-- canonical: {first: Srinivasan, last: Janarthanam}
-  variants:
-  - {first: Srini, last: Janarthanam}
-- canonical: {first: Jyh-Shing Roger, last: Jang}
-  variants:
-  - {first: Jyh-Shing, last: Jang}
-  - {first: Jyh-Shing, last: Roger Jang}
-  - {first: Roger Jyh-Shing, last: Jang}
-- canonical: {first: Myung-Gil, last: Jang}
-  variants:
-  - {first: Myoung-Gil, last: Jang}
-- canonical: {first: Seok Bae, last: Jang}
-  variants:
-  - {first: Seok B., last: Jang}
-- canonical: {first: Peter, last: Jansen}
-  variants:
-  - {first: Peter J., last: Jansen}
-- canonical: {first: Michèle, last: Jardino}
-  id: michele-jardino
-  variants:
-  - {first: Michele, last: Jardino}
-- canonical: {first: Gaja, last: Jarosz}
-  variants:
-  - {first: Gaja E., last: Jarosz}
-- canonical: {first: Timo, last: Jarvinen}
-  variants:
-  - {first: Timo, last: Järvinen}
-- canonical: {first: Jisha P., last: Jayan}
-  variants:
-  - {first: Jisha, last: P Jayan}
-  - {first: Jisha P, last: Jayan}
-- canonical: {first: Arun Kumar, last: Jayapal}
-  variants:
-  - {first: Arun, last: Jayapal}
-- canonical: {first: Frederick, last: Jelinek}
-  id: frederick-jelinek
-  variants:
-  - {first: Fred, last: Jelinek}
-  - {first: Fredrick, last: Jelinek}
-- canonical: {first: Karen, last: Jensen}
-  id: karen-jensen
-- canonical: {first: Lars Juhl, last: Jensen}
-  variants:
-  - {first: Lars J., last: Jensen}
-- canonical: {first: Hyung-Bae, last: Jeon}
-  variants:
-  - {first: Hyungbae, last: Jeon}
-- canonical: {first: Girish Nath, last: Jha}
-  variants:
-  - {first: Girish, last: Jha}
-- canonical: {first: Donghong, last: Ji}
-  variants:
-  - {first: DongHong, last: Ji}
-  - {first: Dong-Hong, last: Ji}
-  - {first: Dong Hong, last: Ji}
-- canonical: {first: Paul D, last: Ji}
-  variants:
-  - {first: Paul D., last: Ji}
-- canonical: {first: Jia-Yan, last: Jian}
-  variants:
-  - {first: Jia Yan, last: Jian}
-- canonical: {first: Mike Tian-Jian, last: Jiang}
-  variants:
-  - {first: Tian-Jian, last: Jiang}
-- canonical: {first: Zheng Ping, last: Jiang}
-  variants:
-  - {first: Zhengping, last: Jiang}
-- canonical: {first: Antonio, last: Jimeno Yepes}
-  variants:
-  - {first: Antonio Jimeno, last: Yepes}
-  - {first: Antonio José, last: Jimeno Yepes}
-  - {first: Antonio, last: Jimeno-Yepes}
-- canonical: {first: M. Dolores, last: Jiménez-López}
-  variants:
-  - {first: Maria Dolores, last: Jiménez-López}
-- canonical: {first: Salud María, last: Jiménez-Zafra}
-  variants:
-  - {first: Salud M., last: Jiménez-Zafra}
-  - {first: Salud M., last: Jiménez Zafra}
-- canonical: {first: Hongyan, last: Jing}
-  id: hongyan-jing
-- canonical: {first: Petr, last: Jirku}
-  id: petr-jirku
-- canonical: {first: Amanda C., last: Jobbins}
-  id: amanda-c-jobbins
-- canonical: {first: Janne Bondi, last: Johannessen}
-  variants:
-  - {first: Janne, last: Bondi Johannessen}
-- canonical: {first: Anders, last: Johannsen}
-  variants:
-  - {first: Anders, last: Johanssen}
-- canonical: {first: David E., last: Johnson}
-  variants:
-  - {first: David, last: Johnson}
-- canonical: {first: Helen L., last: Johnson}
-  variants:
-  - {first: Helen, last: Johnson}
-- canonical: {first: Kristen, last: Johnson}
-  variants:
-  - {first: Kristen Marie, last: Johnson}
-- canonical: {first: Michael T., last: Johnson}
-  id: michael-t-johnson
-- canonical: {first: Rie, last: Johnson}
-  variants:
-  - {first: Rie, last: Ando}
-  - {first: Rie Kubota, last: Ando}
-- canonical: {first: Roderick L., last: Johnson}
-  id: roderick-l-johnson
-- canonical: {first: Michael, last: Johnston}
-  id: michael-johnston
-- canonical: {first: Kristiina, last: Jokinen}
-  variants:
-  - {first: Päivi Kristiina, last: Jokinen}
-- canonical: {first: Bevan, last: Jones}
-  variants:
-  - {first: Bevan K., last: Jones}
-  - {first: Bevan Keeley, last: Jones}
-- canonical: {first: Christopher, last: Jones}
-  variants:
-  - {first: Chris, last: Jones}
-- canonical: {first: Dominic R., last: Jones}
-  variants:
-  - {first: Dominic, last: Jones}
-- canonical: {first: Douglas, last: Jones}
-  variants:
-  - {first: Douglas A., last: Jones}
-  - {first: Doug, last: Jones}
-- canonical: {first: Gareth J. F., last: Jones}
-  variants:
-  - {first: Gareth J.F., last: Jones}
-- canonical: {first: Mark, last: Jones}
-  variants:
-  - {first: Mark A., last: Jones}
-  - {first: Mark Alan, last: Jones}
-- canonical: {first: Michael, last: Jones}
-  variants:
-  - {first: Michael P., last: Jones}
-- canonical: {first: Steven JM, last: Jones}
-  variants:
-  - {first: Steven, last: Jones}
-- canonical: {first: Clement, last: Jonquet}
-  variants:
-  - {first: Clément, last: Jonquet}
-- canonical: {first: Michael I., last: Jordan}
-  variants:
-  - {first: Michael, last: Jordan}
-- canonical: {first: Pamela, last: Jordan}
-  variants:
-  - {first: Pamela W., last: Jordan}
-- canonical: {first: Alipio, last: Jorge}
-  variants:
-  - {first: Alípio, last: Jorge}
-- canonical: {first: Aravind, last: Joshi}
-  id: aravind-joshi
-  variants:
-  - {first: Aravind K., last: Joshi}
-- canonical: {first: Sachindra, last: Joshi}
-  variants:
-  - {first: Sachin, last: Joshi}
-- canonical: {first: Shafiq, last: Joty}
-  variants:
-  - {first: Shafiq R., last: Joty}
-- canonical: {first: Yun-Cheng, last: Ju}
-  variants:
-  - {first: Yun Cheng, last: Ju}
-- canonical: {first: Alfons, last: Juan}
-  variants:
-  - {first: Alfons, last: Juan-Císcar}
-- canonical: {first: Yau-Tarng, last: Juang}
-  variants:
-  - {first: Yau-Tang, last: Juang}
-- canonical: {first: Jozef, last: Juhár}
-  variants:
-  - {first: Jozef, last: Juhar}
-- canonical: {first: Cléo, last: Jullien}
-  variants:
-  - {first: Cleo, last: Jullien}
-- canonical: {first: Han-Min, last: Jung}
-  variants:
-  - {first: Hanmin, last: Jung}
-- canonical: {first: Sangkeun, last: Jung}
-  variants:
-  - {first: SangKeun, last: Jung}
-- canonical: {first: Sung Young, last: Jung}
-  variants:
-  - {first: Sung-Young, last: Jung}
-- canonical: {first: Simeon, last: Junker}
-  variants:
-  - {first: Simeon, last: Schüz}
-- canonical: {first: Dan, last: Jurafsky}
-  variants:
-  - {first: Daniel, last: Jurafsky}
-- canonical: {first: Filip, last: Jurcicek}
-  variants:
-  - {first: Filip, last: Jurčíček}
-- canonical: {first: Marcel Adam, last: Just}
-  variants:
-  - {first: Marcel, last: Just}
-- canonical: {first: Harri, last: Jäppinen}
-  id: harri-jappinen
-  variants:
-  - {first: Harri, last: Jappinen}
-- canonical: {first: Arne, last: Jönsson}
-  variants:
-  - {first: Arne, last: Jonsson}
-- canonical: {first: Brigitte, last: Jörg}
-  variants:
-  - {first: Brigitte, last: Jorg}
-- canonical: {first: Bhadran V., last: K}
-  variants:
-  - {first: Bhadran, last: V K}
-  - {first: Bhadran V, last: K}
-- canonical: {first: Heiki-Jaan, last: Kaalep}
-  variants:
-  - {first: Heiki Jaan, last: Kaalep}
-- canonical: {first: Mijail, last: Kabadjov}
-  id: mijail-kabadjov
-  variants:
-  - {first: Mijail A., last: Kabadjov}
-  - {first: Mijail, last: Alexandrov-Kabadjov}
-- canonical: {first: Michael B., last: Kac}
-  variants:
-  - {first: Michael, last: Kac}
-- canonical: {first: Vladimír, last: Kadlec}
-  variants:
-  - {first: Vladimir, last: Kadlec}
-- canonical: {first: Jeremy G., last: Kahn}
-  variants:
-  - {first: Jeremy, last: Kahn}
-- canonical: {first: Łukasz, last: Kaiser}
-  variants:
-  - {first: Lukasz, last: Kaiser}
-- canonical: {first: Michael, last: Kaisser}
-  variants:
-  - {first: Michael, last: Kaißer}
-- canonical: {first: Ioannis, last: Kakadiaris}
-  variants:
-  - {first: Ioannis A., last: Kakadiaris}
-- canonical: {first: Jun’ichi, last: Kakegawa}
-  variants:
-  - {first: Jun-ichi, last: Kakegawa}
-- canonical: {first: Jugal, last: Kalita}
-  id: jugal-kalita
-  variants:
-  - {first: Jugal K., last: Kalita}
-- canonical: {first: Rihards, last: Kalniņš}
-  variants:
-  - {first: Rihards, last: Kalnins}
-- canonical: {first: Nanda, last: Kambhatla}
-  id: nanda-kambhatla
-  variants:
-  - {first: Nandakishore, last: Kambhatla}
-- canonical: {first: Shin-ichiro, last: Kamei}
-  variants:
-  - {first: Shinichiro, last: Kamei}
-- canonical: {first: Prathusha, last: Kameswara Sarma}
-  variants:
-  - {first: Prathusha, last: K Sarma}
-- canonical: {first: Candace A., last: Kamm}
-  variants:
-  - {first: Candace, last: Kamm}
-- canonical: {first: Bo-Yeong, last: Kang}
-  variants:
-  - {first: Bo-yeong, last: Kang}
-- canonical: {first: Rose Catherine, last: Kanjirathinkal}
-  variants:
-  - {first: Rose, last: Catherine}
-- canonical: {first: Ashvin, last: Kannan}
-  id: ashvin-kannan
-- canonical: {first: Paul, last: Kantor}
-  variants:
-  - {first: Paul B., last: Kantor}
-- canonical: {first: Cheng-yan, last: Kao}
-  variants:
-  - {first: Cheng-Yan, last: Kao}
-  - {first: Cheng Yan, last: Kao}
-- canonical: {first: Ting-hui, last: Kao}
-  variants:
-  - {first: Ting-Hui, last: Kao}
-- canonical: {first: Randy M., last: Kaplan}
-  variants:
-  - {first: Randy, last: Kaplan}
-- canonical: {first: Ronald M., last: Kaplan}
-  variants:
-  - {first: Ronald, last: Kaplan}
-  - {first: Ron, last: Kaplan}
-- canonical: {first: Jurgita, last: Kapočiūtė-Dzikienė}
-  variants:
-  - {first: Jurgita, last: Kapociute-Dzikiene}
-- canonical: {first: Diman, last: Karagyozov}
-  variants:
-  - {first: Diman, last: Karagiozov}
-- canonical: {first: Rafael - Michael, last: Karampatsis}
-  variants:
-  - {first: Rafael Michael, last: Karampatsis}
-- canonical: {first: Vanja M., last: Karan}
-  variants:
-  - {first: Vanja Mladen, last: Karan}
-- canonical: {first: David R., last: Karger}
-  variants:
-  - {first: David, last: Karger}
-- canonical: {first: Kostas, last: Karpouzis}
-  id: kostas-karpouzis
-- canonical: {first: Hideki, last: Kashioka}
-  id: hideki-kashioka
-- canonical: {first: Robert T., last: Kasper}
-  variants:
-  - {first: Robert, last: Kasper}
-- canonical: {first: Walter, last: Kasper}
-  id: walter-kasper
-- canonical: {first: Rohit, last: Kate}
-  variants:
-  - {first: Rohit J., last: Kate}
-- canonical: {first: Naoto, last: Kato}
-  variants:
-  - {first: Naoto, last: Katoh}
-- canonical: {first: Yoshihide, last: Kato}
-  variants:
-  - {first: Yoshihide, last: Sato}
-- canonical: {first: Graham, last: Katz}
-  variants:
-  - {first: E. Graham, last: Katz}
-- canonical: {first: Jason, last: Katz-Brown}
-  variants:
-  - {first: Jason, last: Brown}
-- canonical: {first: Ergina, last: Kavallieratou}
-  id: ergina-kavallieratou
-- canonical: {first: Hisashi, last: Kawai}
-  variants:
-  - {first: Kawai, last: Hisashi}
-- canonical: {first: Jun′ichi, last: Kazama}
-  variants:
-  - {first: Jun’ichi, last: Kazama}
-- canonical: {first: Zdravko, last: Kačič}
-  variants:
-  - {first: Zdravko, last: Kacic}
-- canonical: {first: John, last: Keane}
-  variants:
-  - {first: John, last: Kane}
-- canonical: {first: Michael S., last: Kearns}
-  variants:
-  - {first: Michael, last: Kearns}
-- canonical: {first: Gail M., last: Keenan}
-  variants:
-  - {first: Gail, last: Keenan}
-  - {first: Gail M, last: Keenan}
-- canonical: {first: Thomas A., last: Keenan}
-  variants:
-  - {first: Thomas, last: Keenan}
-- canonical: {first: Judy Anne, last: Kegl}
-  variants:
-  - {first: Judy, last: Kegl}
-- canonical: {first: Andrew, last: Kehler}
-  variants:
-  - {first: Andy, last: Kehler}
-- canonical: {first: Daniel, last: Keim}
-  variants:
-  - {first: Daniel A., last: Keim}
-- canonical: {first: John, last: Kelleher}
-  variants:
-  - {first: John D., last: Kelleher}
-- canonical: {first: Andre, last: Kempe}
-  variants:
-  - {first: André, last: Kempe}
-- canonical: {first: Casey, last: Kennington}
-  variants:
-  - {first: Casey Redd, last: Kennington}
-- canonical: {first: Fabio, last: Kepler}
-  id: fabio-kepler
-  variants:
-  - {first: Fabio N., last: Kepler}
-  - {first: Fabio Natanael, last: Kepler}
-- canonical: {first: Sue J., last: Ker}
-  variants:
-  - {first: Sur-Jin, last: Ker}
-  - {first: Su-Jin, last: Ker}
-  - {first: Sue-Jin, last: Ker}
-  - {first: Sue-jin, last: Ker}
-- canonical: {first: Katia Lida, last: Kermanidis}
-  variants:
-  - {first: Katia, last: Kermanidis}
-- canonical: {first: Margaret, last: Kern}
-  variants:
-  - {first: Margaret L., last: Kern}
-- canonical: {first: Stephan M., last: Kerpedjiev}
-  variants:
-  - {first: Stephan, last: Kerpedjiev}
-- canonical: {first: Vlado, last: Keselj}
-  variants:
-  - {first: Vlado, last: Kešelj}
-- canonical: {first: Fahad, last: Khan}
-  variants:
-  - {first: Anas Fahad, last: Khan}
-- canonical: {first: Md. Anwarus Salam, last: Khan}
-  variants:
-  - {first: Khan Md. Anwarus, last: Salam}
-  - {first: Khan Md., last: Anwarus Salam}
-- canonical: {first: Mohammed Arif, last: Khan}
-  variants:
-  - {first: Arif, last: Khan}
-  - {first: Arif Md., last: Khan}
-- canonical: {first: Vikash, last: Khandelwal}
-  variants:
-  - {first: Vikas, last: Khandelwal}
-- canonical: {first: Mitesh M., last: Khapra}
-  variants:
-  - {first: Mitesh, last: Khapra}
-  - {first: Mitesh, last: M. Khapra}
-  - {first: Mitesh M, last: Khapra}
-  - {first: Mitesh Shantadevi, last: Khapra}
-- canonical: {first: Sanjeev, last: Khudanpur}
-  id: sanjeev-khudanpur
-- canonical: {first: Rodger, last: Kibble}
-  id: rodger-kibble
-- canonical: {first: Chloé, last: Kiddon}
-  variants:
-  - {first: Chloe, last: Kiddon}
-- canonical: {first: Zhihao, last: Zhang}
-  id: zhihao-zhang-soochow
-  orcid: 0000-0001-9283-101X
-  institution: Soochow University
-  comment: Soochow
-- canonical: {first: Zhihao, last: Zhang}
-  id: zhihao-zhang
-  comment: May refer to several people
-- canonical: {first: Bernd, last: Kiefer}
-  id: bernd-kiefer
-- canonical: {first: Hoang, last: Kiem}
-  variants:
-  - {first: Kiem, last: Hoang}
-- canonical: {first: Scott F., last: Kiesling}
-  variants:
-  - {first: Scott, last: Kiesling}
-- canonical: {first: Hideaki, last: Kikuchi}
-  id: hideaki-kikuchi
-- canonical: {first: Gen-ichiro, last: Kikui}
-  variants:
-  - {first: Gen’ichiro, last: Kikui}
-- canonical: {first: Chiharu Uda, last: Kikuta}
-  variants:
-  - {first: Chiharu, last: Uda}
-- canonical: {first: Bong-Wan, last: Kim}
-  variants:
-  - {first: Jong Wan, last: Kim}
-- canonical: {first: Chang-Hyun, last: Kim}
-  variants:
-  - {first: Changhyun, last: Kim}
-  - {first: Chang Hyun, last: Kim}
-- canonical: {first: Deok-bong, last: Kim}
-  variants:
-  - {first: Deok-Bong, last: Kim}
-- canonical: {first: Dong-Il, last: Kim}
-  variants:
-  - {first: Dong-il, last: Kim}
-- canonical: {first: Eun-kyung, last: Kim}
-  variants:
-  - {first: Eun-Kyung, last: Kim}
-- canonical: {first: Gil Chang, last: Kim}
-  variants:
-  - {first: GilChang, last: Kim}
-  - {first: Gil-Chang, last: Kim}
-  - {first: Gilchang, last: Kim}
-- canonical: {first: Hyuhng Joon, last: Kim}
-  variants:
-  - {first: Hyuhng, last: Kim}
-- canonical: {first: Jung-jae, last: Kim}
-  variants:
-  - {first: Jung-Jae, last: Kim}
-- canonical: {first: Kyoung-young, last: Kim}
-  variants:
-  - {first: Kyoung-Young, last: Kim}
-- canonical: {first: Sung Dong, last: Kim}
-  variants:
-  - {first: Sung-Dong, last: Kim}
-- canonical: {first: Sunghwan Mac, last: Kim}
-  variants:
-  - {first: Sunghwan, last: Kim}
-- canonical: {first: Young-Gil, last: Kim}
-  variants:
-  - {first: Young-Kil, last: Kim}
-  - {first: Young Kil, last: Kim}
-  - {first: Young-Kill, last: Kim}
-  - {first: YoungKil, last: Kim}
-- canonical: {first: Yung Taek, last: Kim}
-  variants:
-  - {first: Yung-Taek, last: Kim}
-- canonical: {first: Owen, last: Kimball}
-  id: owen-kimball
-- canonical: {first: David, last: King}
-  variants:
-  - {first: David L., last: King}
-- canonical: {first: Tracy Holloway, last: King}
-  variants:
-  - {first: Tracy H., last: King}
-- canonical: {first: Brian, last: Kingsbury}
-  id: brian-kingsbury
-- canonical: {first: Jim, last: Kinzey}
-  variants:
-  - {first: Jim, last: Kimzey}
-- canonical: {first: Karin, last: Kipper}
-  variants:
-  - {first: Karin Christine, last: Kipper}
-  - {first: Karin, last: Schuler}
-  - {first: Karin, last: Kipper Schuler}
-  - {first: Karin, last: Kipper-Schuler}
-- canonical: {first: George Anton, last: Kiraz}
-  variants:
-  - {first: George, last: Kiraz}
-- canonical: {first: Andreas Søeborg, last: Kirkedal}
-  variants:
-  - {first: Andreas, last: Søeborg Kirkedal}
-- canonical: {first: Jamie, last: Kiros}
-  variants:
-  - {first: Jamie Ryan, last: Kiros}
-- canonical: {first: Atanas, last: Kiryakov}
-  variants:
-  - {first: Atanas K., last: Kiryakov}
-- canonical: {first: Balázs, last: Kis}
-  variants:
-  - {first: Balazs, last: Kis}
-- canonical: {first: Imre, last: Kiss}
-  id: imre-kiss
-- canonical: {first: Chunyu, last: Kit}
-  variants:
-  - {first: Chun-yu, last: Kit}
-- canonical: {first: Sotaro, last: Kita}
-  id: sotaro-kita
-- canonical: {first: Richard, last: Kittredge}
-  id: richard-kittredge
-- canonical: {first: Poul Søren, last: Kjærsgaard}
-  variants:
-  - {first: Poul Soren, last: Kjaersgaard}
-- canonical: {first: Esther, last: Klabbers}
-  id: esther-klabbers
-- canonical: {first: Ioannis, last: Klapaftis}
-  variants:
-  - {first: Ioannis P., last: Klapaftis}
-- canonical: {first: Alex, last: Klassmann}
-  variants:
-  - {first: Alexander, last: Klassmann}
-- canonical: {first: Judith L., last: Klavans}
-  id: judith-l-klavans
-  variants:
-  - {first: Judith, last: Klavans}
-- canonical: {first: Wolfgang, last: Klein}
-  id: wolfgang-klein
-- canonical: {first: Jörg, last: Kleinz}
-  variants:
-  - {first: Jorg, last: Kleinz}
-- canonical: {first: Gerda, last: Klimonow}
-  id: gerda-klimonow
-- canonical: {first: Tor, last: Klingberg}
-  id: tor-klingberg
-- canonical: {first: Natalia, last: Klyueva}
-  variants:
-  - {first: Natalia, last: Kljueva}
-- canonical: {first: Tina, last: Klüwer}
-  variants:
-  - {first: Tina, last: Kluewer}
-- canonical: {first: Krzysztof, last: Kochut}
-  id: krzysztof-kochut
-- canonical: {first: Andras, last: Kocsor}
-  variants:
-  - {first: András, last: Kocsor}
-- canonical: {first: Hanae, last: Koiso}
-  id: hanae-koiso
-- canonical: {first: Mare, last: Koit}
-  id: mare-koit
-- canonical: {first: Atsuko, last: Koizumi}
-  id: atsuko-koizumi
-- canonical: {first: George, last: Kokkinakis}
-  id: george-kokkinakis
-  variants:
-  - {first: George K., last: Kokkinakis}
-- canonical: {first: Sofie Johansson, last: Kokkinakis}
-  variants:
-  - {first: Sofie, last: Johansson Kokkinakis}
-- canonical: {first: Sia, last: Kolkovska}
-  variants:
-  - {first: Siya, last: Kolkovska}
-- canonical: {first: David, last: Kolovratnik}
-  variants:
-  - {first: David, last: Kolovratník}
-- canonical: {first: Anup Kumar, last: Kolya}
-  variants:
-  - {first: Anup, last: Kumar Kolya}
-  - {first: Anup, last: Kolya}
-- canonical: {first: Ravikumar, last: Komandur}
-  variants:
-  - {first: K, last: Ravikumar}
-- canonical: {first: Rik, last: Koncel-Kedziorski}
-  id: rik-koncel-kedziorski
-- canonical: {first: Ravikumar, last: Kondadadi}
-  variants:
-  - {first: Ravi, last: Kondadadi}
-  - {first: Ravi Kumar, last: Kondadadi}
-- canonical: {first: Alexis, last: Konstantinidis}
-  variants:
-  - {first: Alexis, last: Konstandinidis}
-- canonical: {first: Selcuk, last: Kopru}
-  variants:
-  - {first: Selçuk, last: Köprü}
-- canonical: {first: Jan, last: Kors}
-  variants:
-  - {first: Jan, last: Korst}
-- canonical: {first: Govind, last: Kothari}
-  variants:
-  - {first: '', last: Govind}
-- canonical: {first: Guy-Noel, last: Kouarata}
-  variants:
-  - {first: Guy-Noël, last: Kouarata}
-- canonical: {first: Eleni, last: Koutsogeorgos}
-  id: eleni-koutsogeorgos
-- canonical: {first: John J., last: Kovarik}
-  variants:
-  - {first: John, last: Kovarik}
-- canonical: {first: Vojtěch, last: Kovář}
-  variants:
-  - {first: Vojtech, last: Kovář}
-- canonical: {first: Marek, last: Kozlowski}
-  variants:
-  - {first: Marek, last: Kozłowski}
-- canonical: {first: Emiel, last: Krahmer}
-  variants:
-  - {first: Emiel J., last: Krahmer}
-- canonical: {first: Olivier, last: Kraif}
-  id: olivier-kraif
-- canonical: {first: Martin, last: Krallinger}
-  id: martin-krallinger
-- canonical: {first: Steven, last: Krauwer}
-  id: steven-krauwer
-- canonical: {first: Jana, last: Kravalová}
-  variants:
-  - {first: Jana, last: Kravalova}
-- canonical: {first: Hans-Ulrich, last: Krieger}
-  variants:
-  - {first: HansUlrich, last: Krieger}
-- canonical: {first: Raghava, last: Krishnan}
-  id: raghava-krishnan
-- canonical: {first: Rihards, last: Krišlauks}
-  variants:
-  - {first: Rihards, last: Krislauks}
-- canonical: {first: Anthony, last: Kroch}
-  variants:
-  - {first: Anthony S., last: Kroch}
-- canonical: {first: Geert-Jan M., last: Kruijff}
-  variants:
-  - {first: Geert-Jan, last: Kruijff}
-- canonical: {first: Ivana, last: Kruijff-Korbayová}
-  variants:
-  - {first: Ivana, last: Kruijff-Korbayova}
-  - {first: Ivana, last: Kruijff-Korbayovà}
-- canonical: {first: George, last: Krupka}
-  variants:
-  - {first: George R., last: Krupka}
-- canonical: {first: Udo, last: Kruschwitz}
-  id: udo-kruschwitz
-- canonical: {first: Germán, last: Kruszewski}
-  variants:
-  - {first: German, last: Kruszewski}
-- canonical: {first: Francis, last: Kubala}
-  id: francis-kubala
-- canonical: {first: Vladislav, last: Kubon}
-  variants:
-  - {first: Vladislav, last: Kuboň}
-  - {first: Vladlslav, last: Kubon}
-- canonical: {first: Taku, last: Kudo}
-  variants:
-  - {first: Taku, last: Kudoh}
-- canonical: {first: Ulrike, last: Kugler}
-  id: ulrike-kugler
-- canonical: {first: Anne, last: Kuhn}
-  id: anne-kuhn
-- canonical: {first: Robert J., last: Kuhns}
-  variants:
-  - {first: Robert, last: Kuhns}
-- canonical: {first: Malhar, last: Kulkarni}
-  variants:
-  - {first: Malhar A., last: Kulkarni}
-- canonical: {first: Ayush, last: Kumar}
-  variants:
-  - {first: Kumar, last: Ayush}
-- canonical: {first: Harshit, last: Kumar}
-  id: harshit-kumar
-- canonical: {first: Harshit, last: Kumar}
-  id: harshit-kumar-iit
-- canonical: {first: Anand, last: Kumar M}
-  variants:
-  - {first: Anand Kumar, last: Madasamy}
-  - {first: Anand Kumar, last: M}
-- canonical: {first: A, last: Kumaran}
-  variants:
-  - {first: A., last: Kumaran}
-- canonical: {first: Masako, last: Kume}
-  id: masako-kume
-- canonical: {first: Andrew L., last: Kun}
-  variants:
-  - {first: Andrew, last: Kun}
-- canonical: {first: Stephen, last: Kunath}
-  variants:
-  - {first: Stephen A., last: Kunath}
-- canonical: {first: Kerstin, last: Kunz}
-  variants:
-  - {first: Kerstin Anna, last: Kunz}
-- canonical: {first: Chan-hung, last: Kuo}
-  variants:
-  - {first: Chan-Hung, last: Kuo}
-- canonical: {first: Sankar, last: Kuppan}
-  variants:
-  - {first: Sankar, last: K}
-- canonical: {first: Anna, last: Kupść}
-  variants:
-  - {first: Anna, last: Kupsc}
-- canonical: {first: Yurii, last: Kuratov}
-  variants:
-  - {first: Yuri, last: Kuratov}
-- canonical: {first: Mohamed Zakaria, last: Kurdi}
-  variants:
-  - {first: Mohamed-Zakaria, last: Kurdi}
-- canonical: {first: Emina, last: Kurtić}
-  variants:
-  - {first: Emina, last: Kurtic}
-- canonical: {first: Nicholas, last: Kushmerick}
-  id: nicholas-kushmerick
-- canonical: {first: Andreas, last: Kustner}
-  id: andreas-kustner
-- canonical: {first: Sergey O., last: Kuznetsov}
-  variants:
-  - {first: Sergei O., last: Kuznetsov}
-- canonical: {first: Ivona, last: Kučerová}
-  variants:
-  - {first: Ivona, last: Kuc̆erová}
-- canonical: {first: Pavel, last: Kvĕtoň}
-  variants:
-  - {first: Pavel, last: Kveton}
-  - {first: Pavel, last: Květoň}
-- canonical: {first: Stan C., last: Kwasny}
-  variants:
-  - {first: Stan, last: Kwasny}
-- canonical: {first: Cheol Jung, last: Kweon}
-  variants:
-  - {first: Cheoljung, last: Kweon}
-- canonical: {first: Kui-Lam, last: Kwok}
-  id: kui-lam-kwok
-  variants:
-  - {first: Kui Lam, last: Kwok}
-- canonical: {first: Olivia O.Y., last: Kwong}
-  variants:
-  - {first: O.Y., last: Kwong}
-  - {first: Oi Yee, last: Kwong}
-- canonical: {first: Gunnel, last: Källgren}
-  variants:
-  - {first: Gunnel, last: Kallgren}
-- canonical: {first: Joachim, last: Köhler}
-  variants:
-  - {first: Joachim, last: Koehler}
-- canonical: {first: Natalie, last: Kübler}
-  variants:
-  - {first: Natalie, last: Kubler}
-- canonical: {first: Sandra, last: Kübler}
-  variants:
-  - {first: Sandra, last: Kubler}
-  - {first: Sandra, last: Kuebler}
-- canonical: {first: Sobha, last: L}
-  variants:
-  - {first: L., last: Sobha}
-- canonical: {first: Abhay, last: L. Kashyap}
-  variants:
-  - {first: Abhay, last: Kashyap}
-- canonical: {first: Gorka, last: Labaka}
-  id: gorka-labaka
-- canonical: {first: Penny, last: Labropoulou}
-  id: penny-labropoulou
-- canonical: {first: Martin, last: Labský}
-  variants:
-  - {first: Martin, last: Labsky}
-- canonical: {first: Finley, last: Lacatusu}
-  variants:
-  - {first: V. Finley, last: Lacatusu}
-- canonical: {first: Anne, last: Lacheret}
-  variants:
-  - {first: Anne, last: Lacheret-Dujour}
-- canonical: {first: John, last: Lafferty}
-  id: john-lafferty
-  variants:
-  - {first: John D., last: Lafferty}
-  - {first: John, last: Lafrerty}
-- canonical: {first: Frederique, last: Laforest}
-  variants:
-  - {first: Frédérique, last: Laforest}
-- canonical: {first: Antonio-L., last: Lagarda}
-  variants:
-  - {first: Antonio, last: Lagarda}
-  - {first: Antonio L., last: Lagarda}
-- canonical: {first: Torbjörn, last: Lager}
-  variants:
-  - {first: Torbjorn, last: Lager}
-  - {first: Torbjoern, last: Lager}
-- canonical: {first: Albert M., last: Lai}
-  variants:
-  - {first: Albert, last: Lai}
-  - {first: Albert M, last: Lai}
-- canonical: {first: Jennifer C., last: Lai}
-  variants:
-  - {first: Jenifer C., last: Lai}
-  - {first: Jennifer, last: Lai}
-- canonical: {first: Min-Hua, last: Lai}
-  variants:
-  - {first: Min Hua, last: Lai}
-- canonical: {first: Tom B.Y., last: Lai}
-  id: tom-b-y-lai
-  variants:
-  - {first: Tom B. Y., last: Lai}
-  - {first: Tom B.Y, last: Lai}
-- canonical: {first: Tom Bong-yeung, last: Lai}
-  variants:
-  - {first: Bong-Yeung, last: Lai}
-- canonical: {first: Tuan, last: Lai}
-  variants:
-  - {first: Tuan Manh, last: Lai}
-- canonical: {first: Yu-da, last: Lai}
-  variants:
-  - {first: Yu-Da, last: Lai}
-- canonical: {first: Meriama, last: Laib}
-  variants:
-  - {first: Meriama, last: Laïb}
-  - {first: Mariama, last: Laib}
-- canonical: {first: Sobha, last: Lalitha Devi}
-  variants:
-  - {first: Lalitha Devi, last: Sobha}
-  - {first: Sobha Lalitha, last: Devi}
-- canonical: {first: John P., last: Lalor}
-  variants:
-  - {first: John, last: Lalor}
-- canonical: {first: Lori, last: Lamel}
-  id: lori-lamel
-  variants:
-  - {first: Lori F., last: Lamel}
-- canonical: {first: André, last: Lamúrias}
-  variants:
-  - {first: Andre, last: Lamurias}
-- canonical: {first: Man, last: Lan}
-  variants:
-  - {first: Lan, last: Man}
-- canonical: {first: Thomas, last: Landauer}
-  variants:
-  - {first: Thomas K, last: Landauer}
-- canonical: {first: Shari, last: Landes}
-  variants:
-  - {first: Shari, last: Land}
-- canonical: {first: Jan, last: Landsbergen}
-  variants:
-  - {first: S. P. J., last: Landsbergen}
-  - {first: S.P.J., last: Landsbergen}
-- canonical: {first: Francois-Michel, last: Lang}
-  variants:
-  - {first: Francois M., last: Lang}
-- canonical: {first: Patrick L., last: Lange}
-  variants:
-  - {first: Patrick, last: Lange}
-- canonical: {first: D. Terence, last: Langendoen}
-  variants:
-  - {first: Terence, last: Langendoen}
-- canonical: {first: Irene, last: Langkilde}
-  variants:
-  - {first: Irene, last: Langkilde-Geary}
-- canonical: {first: Philippe, last: Langlais}
-  variants:
-  - {first: Phillippe, last: Langlais}
-- canonical: {first: Eric, last: Laporte}
-  variants:
-  - {first: Éric, last: Laporte}
-- canonical: {first: Christophe, last: Laprun}
-  variants:
-  - {first: Christophe D., last: Laprun}
-- canonical: {first: Septina Dian, last: Larasati}
-  variants:
-  - {first: Septina, last: Larasati}
-- canonical: {first: Walter, last: Lasecki}
-  variants:
-  - {first: Walter S., last: Lasecki}
-- canonical: {first: Olga N., last: Lashevskaja}
-  variants:
-  - {first: Olga, last: Lashevskaja}
-- canonical: {first: Naveen Kumar, last: Laskari}
-  variants:
-  - {first: Naveen, last: Kumar}
-- canonical: {first: Kiat-gak, last: Lau}
-  variants:
-  - {first: Kiat-Gak, last: Lau}
-  - {first: Kiãt-gãk, last: Lâu}
-- canonical: {first: Alberto, last: Lavelli}
-  id: alberto-lavelli
-- canonical: {first: Julia, last: Lavid-López}
-  variants:
-  - {first: Julia, last: Lavid}
-- canonical: {first: Alon, last: Lavie}
-  id: alon-lavie
-- canonical: {first: Benoit, last: Lavoie}
-  id: benoit-lavoie
-- canonical: {first: Seamus, last: Lawless}
-  variants:
-  - {first: Séamus, last: Lawless}
-- canonical: {first: Audrey, last: Le}
-  variants:
-  - {first: Audrey N., last: Le}
-- canonical: {first: Hai-Son, last: Le}
-  variants:
-  - {first: Hai Son, last: Le}
-  - {first: Hai-son, last: Le}
-- canonical: {first: Hoang Quynh, last: Le}
-  variants:
-  - {first: Hoang-Quynh, last: Le}
-- canonical: {first: Quoc, last: Le}
-  variants:
-  - {first: Quoc V., last: Le}
-- canonical: {first: Nathalie, last: Le Brun}
-  variants:
-  - {first: Nathalie Le, last: Brun}
-- canonical: {first: Phuong, last: Le Hong}
-  id: phuong-le-hong
-  variants:
-  - {first: Phuong, last: Le-Hong}
-  - {first: Hồng Phương, last: Lê}
-  - {first: Phương, last: Lê Hồng}
-  - {first: Hong-Phuong, last: Le}
-- canonical: {first: Sébastien, last: Le Maguer}
-  variants:
-  - {first: Sébastien Le, last: Maguer}
-- canonical: {first: Quang, last: Le Minh}
-  variants:
-  - {first: Minh Quang, last: Le}
-- canonical: {first: Joseph, last: Le Roux}
-  variants:
-  - {first: Joseph Le, last: Roux}
-- canonical: {first: Jean-Luc, last: LeBrun}
-  variants:
-  - {first: Jean-Luc, last: Lebrun}
-- canonical: {first: Gianluca E., last: Lebani}
-  variants:
-  - {first: Gianluca, last: Lebani}
-- canonical: {first: Gilles, last: Lechenadec}
-  id: gilles-lechenadec
-- canonical: {first: C. H., last: Lee}
-  variants:
-  - {first: C.-H., last: Lee}
-- canonical: {first: Charles C., last: Lee}
-  variants:
-  - {first: Charles, last: Lee}
-- canonical: {first: Chi-Chun, last: Lee}
-  variants:
-  - {first: Chi-Chun (Jeremy), last: Lee}
-  - {first: Chi-Chun Jeremy, last: Lee}
-- canonical: {first: Chi-Yao, last: Lee}
-  variants:
-  - {first: Chih-yao, last: Lee}
-  - {first: Chih-Yao, last: Lee}
-- canonical: {first: Chia-Ying, last: Lee}
-  variants:
-  - {first: Chia-ying, last: Lee}
-- canonical: {first: Chia-ming, last: Lee}
-  variants:
-  - {first: Chia-Ming, last: Lee}
-- canonical: {first: Chun-Jen, last: Lee}
-  variants:
-  - {first: Chun-Jun, last: Lee}
-- canonical: {first: Chungmin, last: Lee}
-  variants:
-  - {first: Chong Min, last: Lee}
-  - {first: Chung-min, last: Lee}
-- canonical: {first: Donghun, last: Lee}
-  comment: Kakao Brain
-  id: donghun-lee-kb
-- canonical: {first: Donghun, last: Lee}
-  comment: Korea University
-  id: donghun-lee-ku
-- canonical: {first: Gary Geunbae, last: Lee}
-  variants:
-  - {first: Geunbae, last: Lee}
-- canonical: {first: Hsiang-Pin, last: Lee}
-  variants:
-  - {first: Hsiang-Ping, last: Lee}
-- canonical: {first: Hyeon-gu, last: Lee}
-  variants:
-  - {first: Hyeon-Gu, last: Lee}
-- canonical: {first: Ik-Hwan, last: Lee}
-  variants:
-  - {first: Ik-hwan, last: Lee}
-- canonical: {first: Jae-Won, last: Lee}
-  variants:
-  - {first: Jae-won, last: Lee}
-- canonical: {first: JaeSung, last: Lee}
-  variants:
-  - {first: Jae-Sung, last: Lee}
-- canonical: {first: Jaesong, last: Lee}
-  variants:
-  - {first: JaeSong, last: Lee}
-- canonical: {first: Jin-seok, last: Lee}
-  variants:
-  - {first: Jin-Seok, last: Lee}
-- canonical: {first: John S. Y., last: Lee}
-  variants:
-  - {first: John, last: Lee}
-- canonical: {first: Joo-Young, last: Lee}
-  variants:
-  - {first: JooYoung, last: Lee}
-- canonical: {first: Kai-Fu, last: Lee}
-  id: kai-fu-lee
-- canonical: {first: Kyung-Soon, last: Lee}
-  variants:
-  - {first: KyungSoon, last: Lee}
-- canonical: {first: Lianhau, last: Lee}
-  variants:
-  - {first: Lian Hau, last: Lee}
-- canonical: {first: Lin-Shan, last: Lee}
-  variants:
-  - {first: Lin-shan, last: Lee}
-- canonical: {first: Mark, last: Lee}
-  id: mark-lee
-  variants:
-  - {first: Mark G., last: Lee}
-- canonical: {first: Sang-Jo, last: Lee}
-  variants:
-  - {first: Sang Jo, last: Lee}
-- canonical: {first: Sophia Y. M., last: Lee}
-  variants:
-  - {first: Sophia Y.M., last: Lee}
-- canonical: {first: Sophia Yat Mei, last: Lee}
-  variants:
-  - {first: Yat-Mei, last: Lee}
-- canonical: {first: Sungjin, last: Lee}
-  variants:
-  - {first: Sung-Jin, last: Lee}
-- canonical: {first: Vivian K., last: Lee}
-  variants:
-  - {first: Vivian, last: Lee}
-- canonical: {first: Woong Ki, last: Lee}
-  variants:
-  - {first: Woong-Ki, last: Lee}
-- canonical: {first: Yeon Su, last: Lee}
-  variants:
-  - {first: Yeon-Su, last: Lee}
-- canonical: {first: Yong-Hun, last: Lee}
-  variants:
-  - {first: Yong-hun, last: Lee}
-- canonical: {first: Yoong Keok, last: Lee}
-  variants:
-  - {first: Yoong, last: Keok Lee}
-- canonical: {first: Nicolas, last: Lefebvre}
-  variants:
-  - {first: Nicolas, last: Lefèbvre}
-- canonical: {first: Anaïs, last: Lefeuvre}
-  variants:
-  - {first: Anaïs, last: Lefeuvre-Haftermeyer}
-- canonical: {first: Fabrice, last: Lefèvre}
-  id: fabrice-lefevre
-  variants:
-  - {first: Fabrice, last: Lefevre}
-- canonical: {first: Gurpreet Singh, last: Lehal}
-  variants:
-  - {first: Gurpreet, last: Singh Lehal}
-  - {first: Gurpreet, last: Lehal}
-- canonical: {first: Jill Fain, last: Lehman}
-  variants:
-  - {first: Jill F., last: Lehman}
-- canonical: {first: Wendy, last: Lehnert}
-  id: wendy-lehnert
-  variants:
-  - {first: Wendy G., last: Lehnert}
-- canonical: {first: Aarno, last: Lehtola}
-  id: aarno-lehtola
-- canonical: {first: Richard E, last: Leibbrandt}
-  variants:
-  - {first: Richard E., last: Leibbrandt}
-- canonical: {first: Jochen L., last: Leidner}
-  variants:
-  - {first: Jochen, last: Leidner}
-- canonical: {first: Marielle, last: Leijten}
-  variants:
-  - {first: Mariëlle, last: Leijten}
-- canonical: {first: Luis A., last: Leiva}
-  variants:
-  - {first: Luis, last: Leiva}
-- canonical: {first: Jeremy, last: Leixa}
-  variants:
-  - {first: Jérémy, last: Leixa}
-- canonical: {first: Pietro, last: Leo}
-  id: pietro-leo
-- canonical: {first: Jacqueline, last: Leon}
-  variants:
-  - {first: Jacqueline, last: Léon}
-- canonical: {first: Chee Wee, last: Leong}
-  variants:
-  - {first: Chee Wee (Ben), last: Leong}
-- canonical: {first: Haley, last: Lepp}
-  variants:
-  - {first: Haley M., last: Lepp}
-- canonical: {first: Mikel, last: Lersundi}
-  id: mikel-lersundi
-- canonical: {first: Leonardo, last: Lesmo}
-  id: leonardo-lesmo
-- canonical: {first: Dessi Puji, last: Lestari}
-  variants:
-  - {first: Dessi, last: Lestari}
-- canonical: {first: James, last: Lester}
-  variants:
-  - {first: James C., last: Lester}
-- canonical: {first: Igor, last: Leturia}
-  id: igor-leturia
-- canonical: {first: Hong, last: Leung}
-  variants:
-  - {first: Hong C., last: Leung}
-- canonical: {first: Lori, last: Levin}
-  variants:
-  - {first: Lori S., last: Levin}
-- canonical: {first: Lauren, last: Levine}
-  variants:
-  - {first: Lauren Elizabeth, last: Levine}
-- canonical: {first: Stephen C., last: Levinson}
-  comment: Max-Planck-Institute for Psycholinguistics
-  id: stephen-c-levinson
-  similar: [stephen-e-levinson]
-- canonical: {first: Stephen E., last: Levinson}
-  comment: Bell Labs
-  id: stephen-e-levinson
-  similar: [stephen-c-levinson]
-- canonical: {first: Gina-Anne, last: Levow}
-  variants:
-  - {first: Gina, last: Levow}
-- canonical: {first: Roger, last: Levy}
-  variants:
-  - {first: Roger P., last: Levy}
-- canonical: {first: Kristīne, last: Levāne-Petrova}
-  variants:
-  - {first: Kristīne, last: Levāne}
-- canonical: {first: Barbara, last: Lewandowska-Tomaszyk}
-  variants:
-  - {first: Barbara, last: Lewandowska}
-- canonical: {first: David D., last: Lewis}
-  variants:
-  - {first: David, last: Lewis}
-- canonical: {first: Richard L., last: Lewis}
-  variants:
-  - {first: Richard, last: Lewis}
-- canonical: {first: William, last: Lewis}
-  variants:
-  - {first: William D., last: Lewis}
-- canonical: {first: Fernando Sánchez, last: León}
-  variants:
-  - {first: Fernando, last: Sánchez}
-- canonical: {first: Saul, last: León}
-  variants:
-  - {first: Saul, last: León Silverio}
-  - {first: Saúl, last: León}
-- canonical: {first: Pilar, last: León-Araúz}
-  variants:
-  - {first: Pilar León, last: Araúz}
-- canonical: {first: Belinda Z., last: Li}
-  variants:
-  - {first: Belinda, last: Li}
-- canonical: {first: Bo, last: Li}
-  comment: May refer to several people
-  id: bo-li
-- canonical: {first: Bo, last: Li}
-  comment: BeiHang
-  id: bo-li-bh
-- canonical: {first: Bo, last: Li}
-  comment: Vanderbilt, UIUC
-  id: bo-li-vanderbilt
-- canonical: {first: Bo, last: Li}
-  comment: NUS, Google
-  id: bo-li-nus
-  variant: {first: Troy, last: Lee}
-- canonical: {first: Bo, last: Li}
-  comment: Chinese Academy of Sciences
-  id: bo-li-cas
-- canonical: {first: Huifeng, last: Li}
-  variants:
-  - {first: Hui-Feng, last: Li}
-- canonical: {first: Jiatong, last: Li}
-  comment: Hong Kong Polytechnic
-  id: jiatong-li-hk
-- canonical: {first: Jiatong, last: Li}
-  comment: Rutgers
-  id: jiatong-li-ru
-- canonical: {first: Junhui, last: Li}
-  variants:
-  - {first: JunHui, last: Li}
-- canonical: {first: Shih-Min, last: Li}
-  variants:
-  - {first: Shi-Min, last: Li}
-- canonical: {first: Shuanglong, last: Li}
-  variants:
-  - {first: ShuangLong, last: Li}
-- canonical: {first: Tangqiu, last: Li}
-  variants:
-  - {first: Tanqiu, last: Li}
-- canonical: {first: Victor O.K., last: Li}
-  variants:
-  - {first: Victor O. K., last: Li}
-- canonical: {first: Weigang, last: Li}
-  variants:
-  - {first: Weikang, last: Li}
-- canonical: {first: Yongqi, last: Li}
-  comment: Wuhan University
-  id: yongqi-li-wuhan
-- canonical: {first: Jonghyun, last: Choi}
-  id: jonghyun-choi-umd
-  orcid: 0000-0002-7934-8434
-  institution: University of Maryland
-  comment: University of Maryland
-- canonical: {first: Jonghyun, last: Choi}
-  id: jonghyun-choi
-  comment: May refer to several people
-- canonical: {first: Yongqi, last: Li}
-  comment: The Hong Kong Polytechnic University
-  id: yongqi-li-hk
-- canonical: {first: Huizhi, last: Liang}
-  variants:
-  - {first: HuiZhi, last: Liang}
-- canonical: {first: Po-Yu, last: Liang}
-  variants:
-  - {first: Po-yu, last: Liang}
-- canonical: {first: Mark, last: Liberman}
-  id: mark-liberman
-  variants:
-  - {first: Mark Y., last: Liberman}
-- canonical: {first: Elizabeth D., last: Liddy}
-  variants:
-  - {first: Elizabeth, last: Liddy}
-- canonical: {first: Chung Yong, last: Lim}
-  variants:
-  - {first: Daniel Chung Yong, last: Lim}
-- canonical: {first: Heui-Seok, last: Lim}
-  variants:
-  - {first: Heuiseok, last: Lim}
-- canonical: {first: KyungTae, last: Lim}
-  variants:
-  - {first: Kyungtae, last: Lim}
-- canonical: {first: Nathalie Rose, last: Lim}
-  variants:
-  - {first: Nathalie, last: Lim}
-- canonical: {first: Bill Yuchen, last: Lin}
-  variants:
-  - {first: Bill Y., last: Lin}
-- canonical: {first: Bor-Shen, last: Lin}
-  variants:
-  - {first: Bor-shen, last: Lin}
-- canonical: {first: Cheng-Yuan, last: Lin}
-  variants:
-  - {first: Cheng Yuan, last: Lin}
-- canonical: {first: Chi-san Althon, last: Lin}
-  variants:
-  - {first: Chi-San, last: Lin}
-  - {first: Chi-San Althon, last: Lin}
-- canonical: {first: Dongsheng, last: Li}
-  id: dongsheng-li-fudan
-  orcid: 0000-0003-3103-8442
-  institution: Fudan University
-  comment: Fudan
-- canonical: {first: Dongsheng, last: Li}
-  id: dongsheng-li
-  comment: May refer to several people
-- canonical: {first: Chih-Lung, last: Lin}
-  variants:
-  - {first: Chih-Long, last: Lin}
-- canonical: {first: Chin-Yew, last: Lin}
-  variants:
-  - {first: ChinYew, last: Lin}
-- canonical: {first: Ching-sheng, last: Lin}
-  variants:
-  - {first: Ching-Sheng, last: Lin}
-- canonical: {first: Cong-kai, last: Lin}
-  variants:
-  - {first: Cong-Kai, last: Lin}
-- canonical: {first: Darren Hsin-Hung, last: Lin}
-  variants:
-  - {first: Darren Hsin-hung, last: Lin}
-  - {first: Hsin-Hung, last: Lin}
-- canonical: {first: Hing-Lung, last: Lin}
-  variants:
-  - {first: Hing-lung, last: Lin}
-- canonical: {first: Qiguang, last: Lin}
-  id: qiguang-lin
-- canonical: {first: Shou-De, last: Lin}
-  variants:
-  - {first: Shou-de, last: Lin}
-- canonical: {first: Shu-Yen, last: Lin}
-  variants:
-  - {first: Shu-yen, last: Lin}
-- canonical: {first: Victoria, last: Lin}
-  comment: CMU
-  id: victoria-lin-cmu
-- canonical: {first: Xi Victoria, last: Lin}
-  comment: U of Washington, Meta
-- canonical: {first: Xiaojun, last: Lin}
-  variants:
-  - {first: Xiaojun, last: Li}
-- canonical: {first: Ya-Ting, last: Lin}
-  variants:
-  - {first: Ya-Ting, last: Li}
-- canonical: {first: Georges, last: Linarès}
-  variants:
-  - {first: Georges, last: Linares}
-- canonical: {first: Krister, last: Lindén}
-  variants:
-  - {first: Krister, last: Linden}
-- canonical: {first: Marcia C., last: Linebarger}
-  variants:
-  - {first: Marcia, last: Linebarger}
-- canonical: {first: Maria Teresa, last: Lino}
-  variants:
-  - {first: Teresa, last: Lino}
-- canonical: {first: Nikos, last: Liolios}
-  id: nikos-liolios
-- canonical: {first: Zachary C., last: Lipton}
-  variants:
-  - {first: Zachary, last: Lipton}
-- canonical: {first: Adam, last: Liska}
-  variants:
-  - {first: Adam, last: Liška}
-- canonical: {first: Lucian Vlad, last: Lita}
-  variants:
-  - {first: Lucian, last: Lita}
-- canonical: {first: Diane, last: Litman}
-  variants:
-  - {first: Diane J., last: Litman}
-- canonical: {first: Alexa N., last: Little}
-  variants:
-  - {first: Alexa, last: Little}
-- canonical: {first: Alex, last: Liu}
-  variants:
-  - {first: Alexander, last: Liu}
-- canonical: {first: Bingquan, last: Liu}
-  variants:
-  - {first: BingQuan, last: Liu}
-- canonical: {first: Chin-Ting, last: Liu}
-  variants:
-  - {first: Chin-Ting Jimbo, last: Liu}
-- canonical: {first: Fei, last: Liu}
-  comment: May refer to several people
-  id: fei-liu
-- canonical: {first: Fei, last: Liu}
-  comment: UT Dallas, Bosch, CMU, University of Central Florida, Emory University
-  id: fei-liu-utdallas
-- canonical: {first: Fei, last: Liu}
-  comment: Google Assistant
-  id: fei-liu-gga
-- canonical: {first: Fei, last: Liu}
-  comment: University of Melbourne
-  id: fei-liu-unimelb
-- canonical: {first: Huidan, last: Liu}
-  variants:
-  - {first: Hui Dan, last: Liu}
-- canonical: {first: Mei-Chun, last: Liu}
-  variants:
-  - {first: Mei-chun, last: Liu}
-- canonical: {first: Nelson F., last: Liu}
-  variants:
-  - {first: Nelson, last: Liu}
-- canonical: {first: Pengyuan, last: Liu}
-  variants:
-  - {first: PengYuan, last: Liu}
-  - {first: Peng-Yuan, last: Liu}
-- canonical: {first: Peter J., last: Liu}
-  variants:
-  - {first: Peter, last: Liu}
-- canonical: {first: Weiyi, last: Liu}
-  variants:
-  - {first: Weiyi, last: Lu}
-- canonical: {first: Yang, last: Liu}
-  comment: Edinburgh Ph.D., Microsoft
-  id: yang-liu-edinburgh
-- canonical: {first: Yang, last: Liu}
-  comment: Beijing Language and Culture University
-  id: yang-liu-blcu
-- canonical: {first: Yang, last: Liu}
-  comment: The Chinese University of Hong Kong (Shenzhen)
-  id: yang-liu-hk
-- canonical: {first: Yang, last: Liu}
-  comment: 刘扬; Ph.D Purdue; ICSI, Dallas, Facebook, Liulishuo, Amazon
-  id: yang-liu-icsi
-- canonical: {first: Yang, last: Liu}
-  comment: 刘洋; ICT, Tsinghua, Beijing Academy of Artificial Intelligence
-  id: yang-liu-ict
-- canonical: {first: Yang, last: Liu}
-  comment: Peking University
-  id: yang-liu-pk
-- canonical: {first: Yang, last: Liu}
-  comment: Wilfrid Laurier University
-  id: yang-liu-wl
-- canonical: {first: Yang, last: Liu}
-  comment: Samsung Research Center Beijing
-  id: yang-liu-ss
-- canonical: {first: Yang, last: Liu}
-  comment: National University of Defense Technology
-  id: yang-liu-dt
-- canonical: {first: Yang, last: Liu}
-  comment: Microsoft Cognitive Services Research
-  id: yang-liu-microsoft
-- canonical: {first: Yang, last: Liu}
-  comment: May refer to several people
-  id: yang-liu
-  similar: [yang-janet-liu]
-- canonical: {first: Yang, last: Liu}
-  comment: Univ. of Michigan, UC Santa Cruz
-  id: yang-liu-umich
-- canonical: {first: Yang, last: Liu}
-  comment: University of Helsinki
-  id: yang-liu-Helsinki
-- canonical: {first: Yang, last: Liu}
-  comment: 3M Health Information Systems
-  id: yang-liu-3m
-- canonical: {first: Yang, last: Liu}
-  comment: Tianjin University, China
-  id: yang-liu-tianjin
-- canonical: {first: Yang Janet, last: Liu}
-  comment: Georgetown University; 刘洋
-  id: yang-janet-liu
-- canonical: {first: Andrej, last: Ljolje}
-  id: andrej-ljolje
-- canonical: {first: Peter, last: Ljunglöf}
-  variants:
-  - {first: Peter, last: Ljunglof}
-- canonical: {first: Leonardo Campillos, last: Llanos}
-  variants:
-  - {first: Leonardo, last: Campillos Llanos}
-- canonical: {first: Eduardo, last: Lleida}
-  variants:
-  - {first: Eduardo, last: LLeida}
-- canonical: {first: Agusti, last: Lloberas}
-  variants:
-  - {first: Agusti, last: LLoberas}
-- canonical: {first: Fernando, last: Llopis}
-  variants:
-  - {first: Fernando, last: LLopis}
-- canonical: {first: David, last: Llorens}
-  id: david-llorens
-- canonical: {first: Héctor, last: Llorens}
-  variants:
-  - {first: Hector, last: Llorens}
-- canonical: {first: Feng-Ju, last: Lo}
-  variants:
-  - {first: Fengju, last: Lo}
-- canonical: {first: Wai-Kit, last: Lo}
-  variants:
-  - {first: Wai Kit, last: Lo}
-- canonical: {first: Karen E., last: Lochbaum}
-  variants:
-  - {first: Karen, last: Lochbaum}
-- canonical: {first: Elizaveta, last: Loginova-Clouet}
-  variants:
-  - {first: Elizaveta, last: Clouet}
-- canonical: {first: Derek, last: Long}
-  variants:
-  - {first: Derek P., last: Long}
-- canonical: {first: Marketa, last: Lopatkova}
-  variants:
-  - {first: Markéta, last: Straňáková-Lopatková}
-  - {first: Markéta, last: Lopatková}
-- canonical: {first: Gabriel, last: Lopes}
-  variants:
-  - {first: Jose Gabriel P., last: Lopes}
-  - {first: Jose Gabriel, last: Lopes}
-  - {first: Gabriel P., last: Lopes}
-  - {first: José Gabriel Pereira, last: Lopes}
-  - {first: Gabriel, last: Pereira Lopes}
-  - {first: Gabriel Pereira, last: Lopes}
-- canonical: {first: Roque, last: Lopez Condori}
-  variants:
-  - {first: Roque, last: López}
-- canonical: {first: Oier, last: Lopez de Lacalle}
-  variants:
-  - {first: Oier López, last: de Lacalle}
-  - {first: Oier Lopez, last: de Lacalle}
-  - {first: Oier, last: López de Lacalle}
-- canonical: {first: Alina Beatrice, last: Lorent}
-  variants:
-  - {first: Alina Beatrice, last: Lorenţ}
-  - {first: Alina, last: Lorenț}
-- canonical: {first: Natalia, last: Loukachevitch}
-  id: natalia-loukachevitch
-  variants:
-  - {first: Natalia V., last: Loukachevitch}
-- canonical: {first: John B., last: Lowe}
-  variants:
-  - {first: John, last: Lowe}
-- canonical: {first: Eneldo, last: Loza Mencía}
-  variants:
-  - {first: Eneldo Loza, last: Mencía}
-- canonical: {first: Bao-Liang, last: Lu}
-  variants:
-  - {first: Bao-liang, last: Lu}
-- canonical: {first: Qin, last: Lu}
-  id: qin-lu
-- canonical: {first: Wei-lun, last: Lu}
-  variants:
-  - {first: Wei-Lwun, last: Lu}
-  - {first: Louis Wei-lun, last: Lu}
-- canonical: {first: Kim-Teng, last: Lua}
-  variants:
-  - {first: KimTeng, last: Lua}
-  - {first: Kim Teng, last: Lua}
-- canonical: {first: Juan Manuel, last: Lucas-Cuesta}
-  variants:
-  - {first: Juan Manuel, last: Lucas}
-- canonical: {first: Li, last: Lucy}
-  variants:
-  - {first: Lucy, last: Li}
-- canonical: {first: Peter J., last: Ludlow}
-  variants:
-  - {first: Peter, last: Ludlow}
-- canonical: {first: Robert W.P., last: Luk}
-  id: robert-w-p-luk
-- canonical: {first: Robert Wing Pong, last: Luk}
-  variants:
-  - {first: Wing-Pong, last: Luk}
-- canonical: {first: Stephanie, last: Lukin}
-  variants:
-  - {first: Stephanie M., last: Lukin}
-- canonical: {first: Suen Caesar, last: Lun}
-  id: suen-caesar-lun
-  variants:
-  - {first: Caesar Suen, last: Lun}
-  - {first: Caesar, last: Lun}
-  - {first: S. Caesar, last: Lun}
-- canonical: {first: Xiaoqiang, last: Luo}
-  id: xiaoqiang-luo
-- canonical: {first: Minh-Thang, last: Luong}
-  variants:
-  - {first: Thang, last: Luong}
-- canonical: {first: Ngoc Quang, last: Luong}
-  variants:
-  - {first: Ngoc-Quang, last: Luong}
-- canonical: {first: Susann, last: LuperFoy}
-  variants:
-  - {first: Susann, last: Luperfoy}
-- canonical: {first: Veronika, last: Lux}
-  variants:
-  - {first: Veronika, last: Lux-Pogodalla}
-  - {first: Véronika, last: Lux-Pogodalla}
-- canonical: {first: Gunn Inger, last: Lyse}
-  variants:
-  - {first: Gunn, last: Lyse}
-- canonical: {first: Steven L., last: Lytinen}
-  variants:
-  - {first: Steven, last: Lytinen}
-- canonical: {first: Eldon G., last: Lytle}
-  variants:
-  - {first: Eldon G., last: Lytel}
-- canonical: {first: Dau-cheng, last: Lyu}
-  variants:
-  - {first: Dau-Cheng, last: Lyu}
-- canonical: {first: Ren-Yuan, last: Lyu}
-  variants:
-  - {first: Ren-yuan, last: Lyu}
-- canonical: {first: François, last: Lévy}
-  variants:
-  - {first: François, last: Levy}
-- canonical: {first: Tuan Anh, last: Lê}
-  variants:
-  - {first: Tuan Anh, last: Le}
-  - {first: Tuấn Anh, last: Lê}
-- canonical: {first: M. Soledad, last: López Gambino}
-  variants:
-  - {first: Soledad, last: López Gambino}
-- canonical: {first: Karmele, last: López de Ipiña}
-  id: karmele-lopez-de-ipina
-- canonical: {first: Maddalen, last: López de Lacalle}
-  variants:
-  - {first: Maddalen, last: Lopez de Lacalle}
-- canonical: {first: Ramón, last: López-Cózar}
-  id: ramon-lopez-cozar
-- canonical: {first: Birte, last: Lönneker}
-  variants:
-  - {first: Birte, last: Lönneker-Rodman}
-  - {first: Birte, last: Loenneker-Rodman}
-- canonical: {first: Jia, last: Lü}
-  variants:
-  - {first: Jia, last: Lu}
-- canonical: {first: Yajuan, last: Lü}
-  variants:
-  - {first: Yajuan, last: Lu}
-  - {first: Yajuan, last: Lv}
-- canonical: {first: Harald, last: Lüngen}
-  variants:
-  - {first: Harald, last: Lungen}
-- canonical: {first: Marie-Claude, last: L’Homme}
-  variants:
-  - {first: Marie-Claude, last: L’ Homme}
-- canonical: {first: Sasikumar, last: M}
-  variants:
-  - {first: Sasikumar, last: M.}
-- canonical: {first: Ariadne, last: M. B. Rizzoni Carvalho}
-  variants:
-  - {first: Ariadne M. B. R., last: Carvalho}
-- canonical: {first: Nagwa, last: M. El-Makky}
-  variants:
-  - {first: Nagwa, last: El-Makky}
-- canonical: {first: Longlong, last: Ma}
-  variants:
-  - {first: Long Long, last: Ma}
-- canonical: {first: Wei-Ying, last: Ma}
-  variants:
-  - {first: Wei-ying, last: Ma}
-- canonical: {first: Wei-Yun, last: Ma}
-  variants:
-  - {first: Wei Yun, last: Ma}
-- canonical: {first: Mohamed, last: Maamouri}
-  variants:
-  - {first: Mohammed, last: Maamouri}
-- canonical: {first: Andrew, last: Maas}
-  variants:
-  - {first: Andrew L., last: Maas}
-- canonical: {first: Rónan, last: Mac an tSaoir}
-  variants:
-  - {first: Ronan, last: Mac an tSaoir}
-- canonical: {first: Andrew, last: MacKinlay}
-  variants:
-  - {first: Andrew, last: McKinlay}
-- canonical: {first: Peter, last: Machonis}
-  variants:
-  - {first: Peter A., last: Machonis}
-- canonical: {first: Catherine, last: Macleod}
-  variants:
-  - {first: Catherine, last: MacLeod}
-- canonical: {first: Imanol, last: Madariaga}
-  id: imanol-madariaga
-- canonical: {first: Pranava Swaroop, last: Madhyastha}
-  variants:
-  - {first: Pranava, last: Madhyastha}
-- canonical: {first: Bente, last: Maegaard}
-  id: bente-maegaard
-- canonical: {first: Kikuo, last: Maekawa}
-  id: kikuo-maekawa
-- canonical: {first: Valérie, last: Maffiolo}
-  id: valerie-maffiolo
-- canonical: {first: David M., last: Magerman}
-  id: david-m-magerman
-  variants:
-  - {first: David, last: Magerman}
-- canonical: {first: Brunelle, last: Magnana Ekoukou}
-  variants:
-  - {first: Brunelle Magnana, last: Ekoukou}
-- canonical: {first: Bernardo, last: Magnini}
-  id: bernardo-magnini
-- canonical: {first: Guðrun, last: Magnúsdóttir}
-  variants:
-  - {first: Guðrún, last: Magnúsdóttir}
-- canonical: {first: Sainik, last: Mahata}
-  variants:
-  - {first: Sainik Kumar, last: Mahata}
-- canonical: {first: Kavi, last: Mahesh}
-  variants:
-  - {first: Kavitha, last: Mahesh}
-  - {first: Kavitha Karimbi, last: Mahesh}
-- canonical: {first: Trang, last: Mai Xuan}
-  variants:
-  - {first: Trang Mai, last: Xuan}
-- canonical: {first: Elisabeth, last: Maier}
-  variants:
-  - {first: Elisabeth, last: Mager}
-- canonical: {first: Frederic, last: Mailhot}
-  variants:
-  - {first: Fred, last: Mailhot}
-  - {first: Frédéric, last: Mailhot}
-- canonical: {first: Steven J., last: Maiorano}
-  variants:
-  - {first: Steve, last: Maiorano}
-  - {first: Steven, last: Maiorano}
-  - {first: Steve, last: Moiorano}
-- canonical: {first: François, last: Mairesse}
-  variants:
-  - {first: Francois, last: Mairesse}
-- canonical: {first: John, last: Makhoul}
-  id: john-makhoul
-- canonical: {first: Shozo, last: Makino}
-  id: shozo-makino
-- canonical: {first: Alfredo, last: Maldonado}
-  variants:
-  - {first: Alfredo, last: Maldonado Guerra}
-  - {first: Alfredo, last: Maldonado-Guerra}
-- canonical: {first: Nishtha, last: Malhotra}
-  variants:
-  - {first: Nishta, last: Malhotra}
-- canonical: {first: M. G. Abbas, last: Malik}
-  variants:
-  - {first: M.G. Abbas, last: Malik}
-  - {first: M G Abbas, last: Malik}
-- canonical: {first: Deepak Kumar, last: Malladi}
-  variants:
-  - {first: Deepak, last: Malladi}
-- canonical: {first: Shervin, last: Malmasi}
-  variants:
-  - {first: Shevin, last: Malmasi}
-- canonical: {first: Preetam, last: Maloor}
-  id: preetam-maloor
-- canonical: {first: Robert, last: Malouf}
-  variants:
-  - {first: Rob, last: Malouf}
-- canonical: {first: Liliana, last: Mamani Sanchez}
-  variants:
-  - {first: Liliana, last: Mamani Sánchez}
-  - {first: Liliana Mamani, last: Sanchez}
-- canonical: {first: Nuno, last: Mamede}
-  variants:
-  - {first: Nuno J., last: Mamede}
-- canonical: {first: Nadia, last: Mana}
-  id: nadia-mana
-- canonical: {first: Esmeralda, last: Manandise}
-  variants:
-  - {first: Esme, last: Manandise}
-- canonical: {first: Alexis, last: Manaster-Ramer}
-  variants:
-  - {first: Alexis, last: Manaster Ramer}
-- canonical: {first: Soumil, last: Mandal}
-  variants:
-  - {first: Soumik, last: Mandal}
-- canonical: {first: Rila, last: Mandala}
-  variants:
-  - {first: Mandala, last: Rila}
-- canonical: {first: Michael, last: Mandel}
-  variants:
-  - {first: Michael, last: Mandl}
-- canonical: {first: Angrosh, last: Mandya}
-  variants:
-  - {first: Mandya, last: Angrosh}
-- canonical: {first: Mathieu, last: Mangeot}
-  variants:
-  - {first: Mathieu, last: Mangeot-Lerebours}
-- canonical: {first: Lidia, last: Mangu}
-  id: lidia-mangu
-- canonical: {first: Enrique, last: Manjavacas}
-  variants:
-  - {first: Enrique, last: Manjavacas Arevalo}
-- canonical: {first: Varun, last: Manjunatha}
-  variants:
-  - {first: Varun, last: Manjunath}
-- canonical: {first: Gideon, last: Mann}
-  variants:
-  - {first: Gideon S., last: Mann}
-- canonical: {first: William C., last: Mann}
-  variants:
-  - {first: William, last: Mann}
-- canonical: {first: Prashanth, last: Mannem}
-  variants:
-  - {first: Prashanth Reddy, last: Mannem}
-  - {first: Prashanth, last: Reddy}
-- canonical: {first: Christopher D., last: Manning}
-  variants:
-  - {first: Christopher, last: Manning}
-  - {first: Chris, last: Manning}
-- canonical: {first: Andre, last: Mansikkaniemi}
-  variants:
-  - {first: André, last: Mansikkaniemi}
-- canonical: {first: Mairgup, last: Mansur}
-  variants:
-  - {first: Mansur, last: Mairgup}
-- canonical: {first: Ruli, last: Manurung}
-  id: ruli-manurung
-- canonical: {first: Ramesh, last: Manuvinakurike}
-  variants:
-  - {first: Ramesh, last: Manuvirakurike}
-- canonical: {first: Lingshuang Jack, last: Mao}
-  variants:
-  - {first: Lingshuang, last: Mao}
-- canonical: {first: Xinnian, last: Mao}
-  variants:
-  - {first: Xin, last: Mao}
-- canonical: {first: Yu Hang, last: Mao}
-  variants:
-  - {first: Yu-Hang, last: Mao}
-  - {first: Yuhang, last: Mao}
-- canonical: {first: Valérie, last: Mapelli}
-  variants:
-  - {first: Valerie, last: Mapelli}
-- canonical: {first: Yannick, last: Marchand}
-  id: yannick-marchand
-- canonical: {first: Giulia, last: Marchesini}
-  variants:
-  - {first: Giulia, last: Marchesi}
-- canonical: {first: Malgorzata, last: Marciniak}
-  variants:
-  - {first: Małgorzata, last: Marciniak}
-- canonical: {first: Mitch, last: Marcus}
-  id: mitch-marcus
-  variants:
-  - {first: Mitchell, last: Marcus}
-  - {first: Mitchell P., last: Marcus}
-- canonical: {first: Joseph, last: Mariani}
-  id: joseph-mariani
-- canonical: {first: Montserrat, last: Marimon}
-  variants:
-  - {first: Montserrat, last: Marimón}
-  - {first: Montserrat Marimon, last: Felipe}
-- canonical: {first: Nicolas, last: Marin}
-  variants:
-  - {first: Nicolás, last: Marín}
-- canonical: {first: Andre, last: Mariotti}
-  variants:
-  - {first: André, last: Mariotti}
-- canonical: {first: Alberto, last: Maritxalar}
-  id: alberto-maritxalar
-- canonical: {first: Montse, last: Maritxalar}
-  id: montse-maritxalar
-- canonical: {first: José B., last: Mariño}
-  variants:
-  - {first: José, last: Mariño}
-- canonical: {first: Stella, last: Markantonatou}
-  id: stella-markantonatou
-- canonical: {first: Aleksandra Zögling, last: Markuš}
-  variants:
-  - {first: Aleksandra, last: Zögling}
-- canonical: {first: Kornél, last: Markó}
-  variants:
-  - {first: Kornel, last: Markó}
-- canonical: {first: Iain, last: Marshall}
-  variants:
-  - {first: Iain J., last: Marshall}
-- canonical: {first: Pierre-Francois, last: Marteau}
-  variants:
-  - {first: Pierre-François, last: Marteau}
-- canonical: {first: Alvin, last: Martin}
-  variants:
-  - {first: Alvin F., last: Martin}
-- canonical: {first: James H., last: Martin}
-  variants:
-  - {first: James, last: Martin}
-- canonical: {first: Jean-Claude, last: Martin}
-  id: jean-claude-martin
-- canonical: {first: M. Patrick, last: Martin}
-  variants:
-  - {first: Pierre M., last: Martin}
-  - {first: Patrick, last: Martin}
-- canonical: {first: Marco, last: Martin}
-  id: marco-martin
-- canonical: {first: Melanie, last: Martin}
-  variants:
-  - {first: Melanie J., last: Martin}
-- canonical: {first: William A., last: Martin}
-  id: william-a-martin
-- canonical: {first: Marianna, last: Martindale}
-  variants:
-  - {first: Marianna J., last: Martindale}
-- canonical: {first: David, last: Martinez}
-  variants:
-  - {first: David, last: Martínez}
-- canonical: {first: Miroslav, last: Martinović}
-  variants:
-  - {first: Miroslav, last: Martinovic}
-- canonical: {first: André F. T., last: Martins}
-  variants:
-  - {first: Andre, last: Martins}
-  - {first: André, last: Martins}
-- canonical: {first: Fernando, last: Martins}
-  id: fernando-martins
-- canonical: {first: Ronaldo Teixeira, last: Martins}
-  variants:
-  - {first: Ronaldo, last: Martins}
-- canonical: {first: David, last: Martins de Matos}
-  variants:
-  - {first: David Martins, last: de Matos}
-  - {first: David M., last: de Matos}
-- canonical: {first: M. Antònia, last: Martí}
-  id: m-antonia-marti
-  variants:
-  - {first: M. Antonia, last: Martí}
-  - {first: M. Antonia, last: Marti}
-  - {first: Antonia, last: Martí}
-  - {first: Mª Antònia, last: Martí}
-  - {first: Maria Antònia, last: Martí}
-  - {first: Toni, last: Martí}
-- canonical: {first: M. Teresa, last: Martín-Valdivia}
-  variants:
-  - {first: Maite, last: Martin}
-  - {first: María Teresa, last: Martín-Valdivia}
-  - {first: Maria Teresa, last: Martín-Valdivia}
-  - {first: Teresa, last: Martin}
-  - {first: M. Teresa, last: Martín}
-  - {first: Maite, last: Martín-Valdivia}
-- canonical: {first: Carlos, last: Martín-Vide}
-  variants:
-  - {first: Carlos Martin, last: Vide}
-- canonical: {first: José Manuel, last: Martínez}
-  variants:
-  - {first: Jose M.M., last: Martinez}
-  - {first: José Manuel, last: Martínez Martínez}
-  - {first: Jose Manuel, last: Martinez}
-- canonical: {first: Raquel, last: Martínez}
-  variants:
-  - {first: Raquel, last: Martinez}
-- canonical: {first: Héctor, last: Martínez Alonso}
-  variants:
-  - {first: Hector, last: Martinez}
-  - {first: Héctor, last: Martínez}
-  - {first: Héctor Martínez, last: Alonso}
-  - {first: Hector, last: Martinez Alonso}
-  - {first: Héctor, last: Martinez Alonso}
-  - {first: Hector, last: Martínez Alonso}
-- canonical: {first: Eva, last: Martínez Garcia}
-  variants:
-  - {first: Eva Martínez, last: Garcia}
-- canonical: {first: Patricio, last: Martínez-Barco}
-  id: patricio-martinez-barco
-  variants:
-  - {first: Patricio, last: Martinez-Barco}
-  - {first: Patricio Martinez, last: Barco}
-- canonical: {first: Eugenio, last: Martínez-Cámara}
-  variants:
-  - {first: Eugenio, last: Martinez Camara}
-- canonical: {first: Carlos-D., last: Martínez-Hinarejos}
-  variants:
-  - {first: Carlos D., last: Martínez-Hinarejos}
-  - {first: Carlos D., last: Martínez Hinarejos}
-  - {first: Carlos D., last: Martínez}
-- canonical: {first: Fernando, last: Martínez-Santiago}
-  variants:
-  - {first: Fernando, last: Martínez Santiago}
-- canonical: {first: Luis, last: Marujo}
-  variants:
-  - {first: Luís, last: Marujo}
-- canonical: {first: Andrés, last: Marzal}
-  id: andres-marzal
-- canonical: {first: Aaron J., last: Masino}
-  variants:
-  - {first: Aaron, last: Masino}
-- canonical: {first: Flavio, last: Massimiliano Cecchini}
-  variants:
-  - {first: Flavio Massimiliano, last: Cecchini}
-- canonical: {first: Demetrios, last: Master}
-  variants:
-  - {first: Demitrios, last: Master}
-- canonical: {first: Fumito, last: Masui}
-  id: fumito-masui
-- canonical: {first: Hiroshi, last: Masuichi}
-  variants:
-  - {first: Hiroshi, last: Mashuichi}
-- canonical: {first: Marco, last: Matassoni}
-  id: marco-matassoni
-- canonical: {first: Yannick, last: Mathieu}
-  variants:
-  - {first: Yvette Yannick, last: Mathieu}
-  - {first: Yvette, last: Mathieu}
-- canonical: {first: Ely Edison da Silva, last: Matos}
-  variants:
-  - {first: Ely, last: Matos}
-  - {first: Ely E. S., last: Matos}
-- canonical: {first: Yuji, last: Matsumoto}
-  variants:
-  - {first: Yūji, last: Matsumoto}
-- canonical: {first: Shoichi, last: Matsunaga}
-  variants:
-  - {first: Sho-ichi, last: Matsunaga}
-- canonical: {first: Christian M.I.M., last: Matthiessen}
-  variants:
-  - {first: Christian M. I. M., last: Matthiessen}
-- canonical: {first: Irina, last: Matveeva}
-  id: irina-matveeva
-- canonical: {first: Mirjam Sepesy, last: Maucec}
-  variants:
-  - {first: Mirjam Sepesy, last: Maučec}
-- canonical: {first: Michael L., last: Mauldin}
-  variants:
-  - {first: Michael, last: Mauldin}
-- canonical: {first: Daniel, last: Maxwell}
-  variants:
-  - {first: Dan, last: Maxwell}
-- canonical: {first: K. Tamsin, last: Maxwell}
-  variants:
-  - {first: Tamsin, last: Maxwell}
-- canonical: {first: John T., last: Maxwell III}
-  variants:
-  - {first: John, last: Maxwell}
-  - {first: John T., last: Maxwell}
-- canonical: {first: Mark T., last: Maybury}
-  variants:
-  - {first: Mark, last: Maybury}
-- canonical: {first: Aingeru, last: Mayor}
-  id: aingeru-mayor
-- canonical: {first: Pierre-Emmanuel, last: Mazare}
-  variants:
-  - {first: Pierre-Emmanuel, last: Mazaré}
-- canonical: {first: Erick Galani, last: Maziero}
-  variants:
-  - {first: Erick, last: Maziero}
-- canonical: {first: Pawel, last: Mazur}
-  variants:
-  - {first: Paweł, last: Mazur}
-- canonical: {first: Alessandro, last: Mazzei}
-  id: alessandro-mazzei
-- canonical: {first: Giampaolo, last: Mazzini}
-  id: giampaolo-mazzini
-- canonical: {first: Manuel J., last: Maña López}
-  variants:
-  - {first: Manuel J., last: Maña}
-  - {first: Manual Maña, last: López}
-  - {first: Manuel, last: Maña López}
-- canonical: {first: Michael L., last: Mc Hale}
-  variants:
-  - {first: Michael L., last: McHale}
-- canonical: {first: Gordon I., last: McCalla}
-  id: gordon-i-mccalla
-  variants:
-  - {first: Gordon, last: McCalla}
-- canonical: {first: J. Scott, last: McCarley}
-  variants:
-  - {first: Scott, last: McCarley}
-- canonical: {first: Arya D., last: McCarthy}
-  variants:
-  - {first: Arya, last: McCarthy}
-- canonical: {first: Diana, last: McCarthy}
-  variants:
-  - {first: Diana F., last: McCarthy}
-- canonical: {first: Joe, last: McCarthy}
-  id: joe-mccarthy
-- canonical: {first: Michael C., last: McCord}
-  variants:
-  - {first: Michael, last: McCord}
-- canonical: {first: Kathleen F., last: McCoy}
-  variants:
-  - {first: Kathleen, last: McCoy}
-  - {first: Kathleen E., last: McCoy}
-- canonical: {first: Nancy, last: McCracken}
-  variants:
-  - {first: Nancy J., last: McCracken}
-- canonical: {first: John Philip, last: McCrae}
-  variants:
-  - {first: John, last: McCrae}
-  - {first: John P., last: McCrae}
-- canonical: {first: David D., last: McDonald}
-  comment: MIT, BBN, SIFT
-  id: david-d-mcdonald
-  similar: [david-w-mcdonald]
-- canonical: {first: David W., last: McDonald}
-  comment: Univ. of Washington
-  id: david-w-mcdonald
-  similar: [david-d-mcdonald]
-- canonical: {first: Joyce, last: McDowell}
-  id: joyce-mcdowell
-- canonical: {first: Dan, last: McFarland}
-  variants:
-  - {first: Daniel, last: McFarland}
-  - {first: Daniel A., last: McFarland}
-- canonical: {first: David, last: McGee}
-  variants:
-  - {first: David R., last: McGee}
-- canonical: {first: Bridget, last: McInnes}
-  variants:
-  - {first: Bridget Thomson, last: McInnes}
-  - {first: Bridget T., last: McInnes}
-- canonical: {first: Douglas, last: McKee}
-  variants:
-  - {first: Doug, last: McKee}
-- canonical: {first: Kenneth J., last: McKeever}
-  variants:
-  - {first: Kenneth, last: McKeever}
-- canonical: {first: Kathleen, last: McKeown}
-  variants:
-  - {first: Kathy, last: McKeown}
-  - {first: Kathleen R., last: McKeown}
-- canonical: {first: Danielle S., last: McNamara}
-  variants:
-  - {first: Danielle, last: McNamara}
-- canonical: {first: John, last: McNaught}
-  id: john-mcnaught
-- canonical: {first: Margaret, last: McRorie}
-  id: margaret-mcrorie
-- canonical: {first: Susan W., last: McRoy}
-  variants:
-  - {first: Susan, last: McRoy}
-- canonical: {first: Kevin, last: McTait}
-  id: kevin-mctait
-- canonical: {first: Michael F., last: McTear}
-  variants:
-  - {first: Michael, last: McTear}
-- canonical: {first: Boubaker, last: Meddeb-Hamrouni}
-  variants:
-  - {first: Boubaker, last: Meddeb Hamrouni}
-- canonical: {first: Christopher, last: Meek}
-  variants:
-  - {first: Chris, last: Meek}
-- canonical: {first: Beáta, last: Megyesi}
-  variants:
-  - {first: Beata, last: Megyesi}
-  - {first: Beáta Bandmann, last: Megyesi}
-  - {first: Beáta B., last: Megyesi}
-- canonical: {first: Dennis, last: Mehay}
-  variants:
-  - {first: Dennis Nolan, last: Mehay}
-- canonical: {first: Sanket Vaibhav, last: Mehta}
-  variants:
-  - {first: Vaibhav, last: Mehta}
-- canonical: {first: Baye Yimam, last: Mekonnen}
-  variants:
-  - {first: Baye, last: Yimam}
-- canonical: {first: Alan K., last: Melby}
-  variants:
-  - {first: Alan, last: Melby}
-- canonical: {first: Chris, last: Mellish}
-  id: chris-mellish
-  variants:
-  - {first: Chris S., last: Mellish}
-- canonical: {first: Igor, last: Mel’čuk}
-  id: igor-melcuk
-- canonical: {first: Alfonso, last: Mendes}
-  variants:
-  - {first: Afonso, last: Mendes}
-- canonical: {first: Ana Cristina, last: Mendes}
-  variants:
-  - {first: Ana C., last: Mendes}
-  - {first: Ana, last: Mendes}
-- canonical: {first: Eneida A., last: Mendonca}
-  variants:
-  - {first: Eneida, last: Mendonca}
-- canonical: {first: Gustavo, last: Mendonca}
-  variants:
-  - {first: Gustavo, last: Mendonça}
-- canonical: {first: Helen, last: Meng}
-  variants:
-  - {first: Helen M., last: Meng}
-- canonical: {first: Rakesh R, last: Menon}
-  variants:
-  - {first: Rakesh, last: Menon}
-- canonical: {first: Robert E., last: Mercer}
-  comment: Univ. of Western Ontario
-  id: robert-e-mercer
-  similar: [robert-l-mercer]
-- canonical: {first: Robert L., last: Mercer}
-  comment: IBM
-  id: robert-l-mercer
-  similar: [robert-e-mercer]
-- canonical: {first: Roberta H., last: Merchant}
-  variants:
-  - {first: Roberta, last: Merchant}
-- canonical: {first: Bernard, last: Merialdo}
-  id: bernard-merialdo
-- canonical: {first: Elizabeth, last: Merkhofer}
-  variants:
-  - {first: Elizabeth M., last: Merkhofer}
-- canonical: {first: Marie, last: Meteer}
-  variants:
-  - {first: Marie W., last: Meteer}
-- canonical: {first: Marie Hélène, last: Metzger}
-  variants:
-  - {first: Marie-Hélène, last: Metzger}
-- canonical: {first: Dieter, last: Metzing}
-  id: dieter-metzing
-- canonical: {first: Frédéric, last: Meunier}
-  variants:
-  - {first: Frederic, last: Meunier}
-- canonical: {first: Detmar, last: Meurers}
-  variants:
-  - {first: W. Detmar, last: Meurers}
-  - {first: Walt Detmar, last: Meurers}
-- canonical: {first: Montserrat, last: Meya}
-  id: montserrat-meya
-- canonical: {first: Adam, last: Meyers}
-  id: adam-meyers
-- canonical: {first: Benjamin S., last: Meyers}
-  variants:
-  - {first: Benjamin, last: Meyers}
-- canonical: {first: Stephane, last: Meystre}
-  variants:
-  - {first: Stéphane, last: Meystre}
-- canonical: {first: Ivan, last: Meza-Ruiz}
-  variants:
-  - {first: Ivan Vladimir, last: Meza Ruiz}
-  - {first: Ivan V., last: Meza}
-  - {first: Ivan, last: Meza}
-  - {first: Ivan Vladimir, last: Meza-Ruiz}
-- canonical: {first: Antonio Valerio, last: Miceli-Barone}
-  variants:
-  - {first: Antonio Valerio, last: Miceli Barone}
-- canonical: {first: Lisa N., last: Michaud}
-  variants:
-  - {first: Lisa, last: Michaud}
-- canonical: {first: Patrizia, last: Michelassi}
-  id: patrizia-michelassi
-- canonical: {first: Archibald, last: Michiels}
-  id: archibald-michiels
-- canonical: {first: Lesly, last: Miculicich Werlen}
-  variants:
-  - {first: Lesly, last: Miculicich}
-- canonical: {first: Sabrina J., last: Mielke}
-  variants:
-  - {first: Sabrina, last: Mielke}
-- canonical: {first: Rada, last: Mihalcea}
-  variants:
-  - {first: Rada F., last: Mihalcea}
-- canonical: {first: France, last: Mihelic}
-  variants:
-  - {first: France, last: Mihelič}
-- canonical: {first: Tomáš, last: Mikolov}
-  variants:
-  - {first: Tomas, last: Mikolov}
-- canonical: {first: Sandra, last: Milena Castellanos Páez}
-  variants:
-  - {first: Sandra Castellanos, last: Páez}
-- canonical: {first: Ruy Luiz, last: Milidiú}
-  variants:
-  - {first: Ruy, last: Milidiú}
-- canonical: {first: Dale A., last: Miller}
-  variants:
-  - {first: Dale, last: Miller}
-- canonical: {first: John, last: Miller}
-  variants:
-  - {first: John E., last: Miller}
-- canonical: {first: Keith J., last: Miller}
-  variants:
-  - {first: Keith, last: Miller}
-- canonical: {first: Lance A., last: Miller}
-  id: lance-a-miller
-- canonical: {first: Laura G., last: Miller}
-  id: laura-g-miller
-- canonical: {first: Timothy, last: Miller}
-  variants:
-  - {first: Tim, last: Miller}
-- canonical: {first: Daniel P., last: Mills}
-  variants:
-  - {first: Daniel, last: Mills}
-- canonical: {first: David N., last: Milne}
-  variants:
-  - {first: David, last: Milne}
-- canonical: {first: Robert, last: Milne}
-  variants:
-  - {first: Rob, last: Milne}
-- canonical: {first: Behrouz, last: Minaei-Bidgoli}
-  variants:
-  - {first: Behrouz, last: Minaei-bidgoli}
-  - {first: Behrouz, last: Minaei}
-- canonical: {first: Nobuaki, last: Minematsu}
-  id: nobuaki-minematsu
-- canonical: {first: Zhaoyan, last: Ming}
-  variants:
-  - {first: Zhao-Yan, last: Ming}
-- canonical: {first: Michael, last: Minock}
-  variants:
-  - {first: Michael J., last: Minock}
-- canonical: {first: T. T., last: Mirnalinee}
-  variants:
-  - {first: Mirnalinee, last: T T}
-  - {first: T T, last: Mirnalinee}
-- canonical: {first: Dipendra, last: Misra}
-  variants:
-  - {first: Dipendra Kumar, last: Misra}
-- canonical: {first: Kei, last: Mitamura}
-  id: kei-mitamura
-- canonical: {first: Brian, last: Mitchell}
-  id: brian-mitchell
-- canonical: {first: Christopher, last: Mitchell}
-  variants:
-  - {first: Christopher M., last: Mitchell}
-- canonical: {first: Tom, last: Mitchell}
-  variants:
-  - {first: Tom M., last: Mitchell}
-- canonical: {first: Catalin, last: Mititelu}
-  variants:
-  - {first: Cătălin, last: Mititelu}
-- canonical: {first: Ruslan, last: Mitkov}
-  id: ruslan-mitkov
-- canonical: {first: V. K., last: Mittal}
-  variants:
-  - {first: V.K., last: Mittal}
-- canonical: {first: Vibhu O., last: Mittal}
-  variants:
-  - {first: Vibhu, last: Mittal}
-- canonical: {first: Natalia N., last: Modjeska}
-  variants:
-  - {first: Natalia, last: Modjeska}
-- canonical: {first: Sarah, last: Moeller}
-  variants:
-  - {first: Sarah R., last: Moeller}
-- canonical: {first: Marie Francine, last: Moens}
-  variants:
-  - {first: Marie-Francine, last: Moens}
-- canonical: {first: Saif, last: Mohammad}
-  variants:
-  - {first: Saif M., last: Mohammad}
-- canonical: {first: Ehsan, last: Mohammady Ardehaly}
-  variants:
-  - {first: Ehsan, last: Mohammady}
-- canonical: {first: Sharada Prasanna, last: Mohanty}
-  variants:
-  - {first: Sharada, last: Mohanty}
-- canonical: {first: Muhammad Tasnim, last: Mohiuddin}
-  variants:
-  - {first: Tasnim, last: Mohiuddin}
-- canonical: {first: Begoña Villada, last: Moirón}
-  variants:
-  - {first: Begoña, last: Villada Moirón}
-  - {first: Begoña, last: Villada}
-  - {first: M. Begoña Villada, last: Moirón}
-- canonical: {first: Luis Gerardo, last: Mojica de la Vega}
-  variants:
-  - {first: Luis, last: Mojica de la Vega}
-- canonical: {first: Christian, last: Moldovan}
-  variants:
-  - {first: Cristian, last: Moldovan}
-- canonical: {first: Dan, last: Moldovan}
-  id: dan-moldovan
-  variants:
-  - {first: Dan I., last: Moldovan}
-- canonical: {first: M. Dolores, last: Molina-González}
-  variants:
-  - {first: M. Dolores, last: Molina-Gonzalez}
-- canonical: {first: Diego, last: Molla}
-  variants:
-  - {first: Diego, last: Mollá-Aliod}
-  - {first: Diego, last: Mollá Aliod}
-  - {first: Diego, last: Molla-Aliod}
-  - {first: Diego, last: Mollá}
-- canonical: {first: Simonetta, last: Montemagni}
-  id: simonetta-montemagni
-- canonical: {first: Calkin S., last: Montero}
-  variants:
-  - {first: Calkin, last: Montero}
-- canonical: {first: Juan M., last: Montero}
-  variants:
-  - {first: Juan Manuel, last: Montero}
-- canonical: {first: Manuel, last: Montes}
-  variants:
-  - {first: Manuel, last: Montes-y-Gómez}
-  - {first: Manuel, last: Montes y Gomez}
-  - {first: Manuel, last: Montes y Gómez}
-- canonical: {first: Azucena, last: Montes-Rendon}
-  variants:
-  - {first: Azucena, last: Montes}
-- canonical: {first: Christine A., last: Montgomery}
-  variants:
-  - {first: Christine, last: Montgomery}
-- canonical: {first: Andrés, last: Montoyo}
-  variants:
-  - {first: Andres, last: Montoyo}
-  - {first: Andrés, last: Montoyo Guijarro}
-- canonical: {first: Kyong-Hi, last: Moon}
-  variants:
-  - {first: Kyonghi, last: Moon}
-- canonical: {first: Sungrim, last: Moon}
-  variants:
-  - {first: SungRim, last: Moon}
-- canonical: {first: Raymond, last: Mooney}
-  variants:
-  - {first: Raymond J., last: Mooney}
-- canonical: {first: Johanna D., last: Moore}
-  id: johanna-d-moore
-  variants:
-  - {first: Johanna, last: Moore}
-- canonical: {first: Robert C., last: Moore}
-  id: robert-c-moore
-  variants:
-  - {first: Robert, last: Moore}
-- canonical: {first: Roger K., last: Moore}
-  variants:
-  - {first: Roger, last: Moore}
-- canonical: {first: Michael, last: Moortgat}
-  id: michael-moortgat
-- canonical: {first: Nafise Sadat, last: Moosavi}
-  variants:
-  - {first: Nafise, last: Moosavi}
-- canonical: {first: Sílvia, last: Moraes}
-  variants:
-  - {first: Silvia, last: Moraes}
-- canonical: {first: Nicolás, last: Morales}
-  variants:
-  - {first: Nicolas, last: Morales}
-- canonical: {first: Douglas B., last: Moran}
-  variants:
-  - {first: Douglas, last: Moran}
-  - {first: Doug, last: Moran}
-- canonical: {first: Steven, last: Moran}
-  variants:
-  - {first: Steve, last: Moran}
-- canonical: {first: Paul, last: Morarescu}
-  variants:
-  - {first: Paul, last: Morărescu}
-  - {first: Paul C., last: Morărescu}
-- canonical: {first: Christian, last: Morbidoni}
-  id: christian-morbidoni
-- canonical: {first: Grégoire, last: Moreau de Montcheuil}
-  variants:
-  - {first: Grégoire, last: de Montcheuil}
-- canonical: {first: Paloma, last: Moreda Pozo}
-  variants:
-  - {first: Paloma, last: Moreda}
-- canonical: {first: Jihai, last: Zhang}
-  id: jihai-zhang-cuhk
-  orcid: 0000-0002-1400-9116
-  institution: The Chinese University of Hong Kong
-  comment: CUHK
-- canonical: {first: Jihai, last: Zhang}
-  id: jihai-zhang
-  comment: May refer to several people
-- canonical: {first: Asunción, last: Moreno}
-  id: asuncion-moreno
-  variants:
-  - {first: Asuncion, last: Moreno}
-  - {first: Asuncíon, last: Moreno}
-- canonical: {first: José G., last: Moreno}
-  variants:
-  - {first: Jose G., last: Moreno}
-  - {first: Jose, last: Moreno}
-- canonical: {first: Lidia, last: Moreno}
-  id: lidia-moreno
-- canonical: {first: Antonio, last: Moreno Ribas}
-  comment: Univ. Rovira i Virgili
-  id: antonio-moreno-ribas
-  similar: [antonio-moreno-ortiz, antonio-moreno-sandoval]
-- canonical: {first: Julian, last: Moreno Schneider}
-  variants:
-  - {first: Julian, last: Moreno-Schneider}
-  - {first: Julián, last: Moreno-Schneider}
-  - {first: Julián, last: Moreno Schneider}
-- canonical: {first: Antonio, last: Moreno-Ortiz}
-  comment: Univ. of Málaga
-  id: antonio-moreno-ortiz
-  similar: [antonio-moreno-ribas, antonio-moreno-sandoval]
-  variants:
-  - {first: Antonio, last: Moreno Ortiz}
-- canonical: {first: Antonio, last: Moreno-Sandoval}
-  comment: NYU, Univ. Autónoma de Madrid
-  id: antonio-moreno-sandoval
-  similar: [antonio-moreno-ortiz, antonio-moreno-ribas]
-  variants:
-  - {first: Antonio Moreno, last: Sandoval}
-- canonical: {first: Elliott, last: Moreton}
-  variants:
-  - {first: Elliot, last: Moreton}
-- canonical: {first: Lorenzo, last: Moretti}
-  id: lorenzo-moretti
-- canonical: {first: Richard G., last: Morgan}
-  variants:
-  - {first: Richard, last: Morgan}
-- canonical: {first: William, last: Morgan}
-  variants:
-  - {first: William T., last: Morgan}
-- canonical: {first: Véronique, last: Moriceau}
-  variants:
-  - {first: Veronique, last: Moriceau}
-- canonical: {first: Tsuyoshi, last: Morimoto}
-  variants:
-  - {first: Tsuyosi, last: Morimoto}
-- canonical: {first: James G., last: Mork}
-  variants:
-  - {first: James, last: Mork}
-- canonical: {first: Robert W., last: Morris}
-  variants:
-  - {first: Robert, last: Morris}
-- canonical: {first: David R., last: Mortensen}
-  variants:
-  - {first: David, last: Mortensen}
-- canonical: {first: Thomas S., last: Morton}
-  variants:
-  - {first: Thomas, last: Morton}
-- canonical: {first: Alex, last: Moruz}
-  variants:
-  - {first: Mihai Alex, last: Moruz}
-- canonical: {first: Ulrike, last: Mosel}
-  id: ulrike-mosel
-- canonical: {first: Sjur, last: Moshagen}
-  variants:
-  - {first: Sjur Nørstebø, last: Moshagen}
-  - {first: Sjur N., last: Moshagen}
-- canonical: {first: Lawrence S., last: Moss}
-  variants:
-  - {first: Lawrence, last: Moss}
-- canonical: {first: Anna, last: Jonsson}
-  id: anna-jonsson-umea
-  orcid: 0000-0002-9873-4170
-  institution: Umeå University
-  comment: Umeå University
-- canonical: {first: Anna, last: Jonsson}
-  id: anna-jonsson
-  comment: May refer to several people
-- canonical: {first: Djamel, last: Mostefa}
-  id: djamel-mostefa
-- canonical: {first: Jessica, last: Moszkowicz}
-  variants:
-  - {first: Jessica L., last: Moszkowicz}
-- canonical: {first: Abdelhak, last: Mouradi}
-  id: abdelhak-mouradi
-- canonical: {first: Hamed, last: Movasagh}
-  id: hamed-movasagh
-- canonical: {first: Danielle L., last: Mowery}
-  variants:
-  - {first: Danielle, last: Mowery}
-  - {first: Danielle L, last: Mowery}
-- canonical: {first: Joanna, last: Mrozinski}
-  id: joanna-mrozinski
-- canonical: {first: Christian, last: Mueller}
-  variants:
-  - {first: Christian, last: Müller}
-- canonical: {first: Thomas, last: Mueller}
-  variants:
-  - {first: Thomas, last: Müller}
-- canonical: {first: Chafic, last: Mukbel}
-  id: chafic-mukbel
-- canonical: {first: Rutu, last: Mulkar-Mehta}
-  variants:
-  - {first: Rutu, last: Mulkar}
-- canonical: {first: Dasa, last: Munkova}
-  variants:
-  - {first: Daša, last: Munková}
-- canonical: {first: Juan Pablo, last: Munoz}
-  id: juan-pablo-munoz
-  variants:
-  - {first: J. Pablo, last: Muñoz}
-- canonical: {first: Dragos Stefan, last: Munteanu}
-  variants:
-  - {first: Dragos, last: Munteanu}
-- canonical: {first: William R., last: Murray}
-  variants:
-  - {first: William, last: Murray}
-- canonical: {first: Hema A., last: Murthy}
-  variants:
-  - {first: Hema, last: Murthy}
-- canonical: {first: Hy, last: Murveit}
-  id: hy-murveit
-- canonical: {first: Claudiu, last: Musat}
-  variants:
-  - {first: Claudiu-Cristian, last: Musat}
-- canonical: {first: Gabriele, last: Musillo}
-  variants:
-  - {first: Gabriele Antonio, last: Musillo}
-- canonical: {first: Pradeep, last: Muthukrishnan}
-  variants:
-  - {first: Pradeep, last: Muthukrishan}
-- canonical: {first: Rafael, last: Muñoz}
-  id: rafael-munoz
-  variants:
-  - {first: Rafael, last: Muñoz Guillena}
-  - {first: Rafael, last: Muñoz-Guillena}
-- canonical: {first: Sung-Hyon, last: Myaeng}
-  variants:
-  - {first: Sung Hyon, last: Myaeng}
-  - {first: Sung H., last: Myaeng}
-  - {first: Sung-hyon, last: Myaeng}
-- canonical: {first: Kanthashree, last: Mysore Sathyendra}
-  variants:
-  - {first: Kanthashree Mysore, last: Sathyendra}
-- canonical: {first: Lluís, last: Màrquez}
-  id: lluis-marquez
-  variants:
-  - {first: Lluis, last: Marquez}
-  - {first: Lluis, last: Màrquez}
-  - {first: Lluis, last: Márquez}
-- canonical: {first: Gildas, last: Ménier}
-  variants:
-  - {first: Gildas, last: Menier}
-- canonical: {first: Bernd, last: Möbius}
-  variants:
-  - {first: Bernd, last: Mobius}
-- canonical: {first: Christof, last: Müller}
-  variants:
-  - {first: Christof E., last: Müller}
-- canonical: {first: Christoph, last: Müller}
-  variants:
-  - {first: Christoph, last: Mueller}
-- canonical: {first: Frank Henrik, last: Müller}
-  variants:
-  - {first: Frank H., last: Müller}
-  - {first: Frank, last: Müller}
-- canonical: {first: Mark-Christoph, last: Müller}
-  variants:
-  - {first: Mark-Christoph, last: Mueller}
-- canonical: {first: Bang, last: Nguyen}
-  institution: Notre Dame
-  orcid: 0009-0002-8365-4562
-- canonical: {first: Stefan, last: Müller}
-  variants:
-  - {first: Stefan, last: Muller}
-  - {first: Stefan, last: Mueller}
-- canonical: {first: Maria, last: Nadejde}
-  variants:
-  - {first: Maria, last: Nădejde}
-- canonical: {first: Makoto, last: Nagao}
-  id: makoto-nagao
-- canonical: {first: Meenakshi, last: Nagarajan}
-  variants:
-  - {first: Meena, last: Nagarajan}
-- canonical: {first: Magdi, last: Nagi}
-  variants:
-  - {first: Magdy, last: Nagi}
-- canonical: {first: István, last: Nagy T.}
-  variants:
-  - {first: István T., last: Nagy}
-  - {first: István, last: Nagy}
-  - {first: Istvan, last: Nagy}
-- canonical: {first: Seiichi, last: Nakagawa}
-  id: seiichi-nakagawa
-- canonical: {first: Jun-ichi, last: Nakamura}
-  variants:
-  - {first: Jun’ichi, last: Nakamura}
-- canonical: {first: Yukiko I., last: Nakano}
-  variants:
-  - {first: Yukiko, last: Nakano}
-- canonical: {first: Ndapandula, last: Nakashole}
-  variants:
-  - {first: Ndapa, last: Nakashole}
-- canonical: {first: Christine H., last: Nakatani}
-  variants:
-  - {first: Christine, last: Nakatani}
-- canonical: {first: Shu, last: Nakazato}
-  id: shu-nakazato
-- canonical: {first: Preslav, last: Nakov}
-  variants:
-  - {first: Preslav I., last: Nakov}
-- canonical: {first: Yuhao, last: Wang}
-  id: yuhao-wang-renmin
-  orcid: 0009-0001-5760-9285
-  institution: Renmin University of China
-  comment: Renmin
-- canonical: {first: Yuhao, last: Wang}
-  id: yuhao-wang
-  comment: May refer to several people
-- canonical: {first: Jee-sun, last: Nam}
-  variants:
-  - {first: Jee-Sun, last: Nam}
-- canonical: {first: Fiammetta, last: Namer}
-  variants:
-  - {first: Fiametta, last: Namer}
-- canonical: {first: Shrikanth, last: Narayanan}
-  variants:
-  - {first: Shri, last: Narayanan}
-  - {first: Shrikanth S., last: Narayanan}
-- canonical: {first: Srini, last: Narayanan}
-  variants:
-  - {first: Srinivas, last: Narayanan}
-- canonical: {first: Maria Fernanda Bacelar do, last: Nascimento}
-  variants:
-  - {first: Maria Fernanda Bacelar, last: do Nascimento}
-  - {first: Fernanda Bacelar, last: do Nascimento}
-- canonical: {first: Jamal A., last: Nasir}
-  variants:
-  - {first: Jamal, last: Nasir}
-- canonical: {first: Sudip Kumar, last: Naskar}
-  variants:
-  - {first: Sudip, last: Kumar Naskar}
-  - {first: Sudip, last: Naskar}
-- canonical: {first: Vivi, last: Nastase}
-  variants:
-  - {first: Vivi, last: Năstase}
-- canonical: {first: Prem, last: Natarajan}
-  variants:
-  - {first: Premkumar, last: Natarajan}
-- canonical: {first: P. Senthil, last: Nathan}
-  variants:
-  - {first: Senthil, last: Nathan}
-- canonical: {first: Borja, last: Navarro}
-  id: borja-navarro
-  variants:
-  - {first: Borja, last: Navarro-Colorado}
-- canonical: {first: Eva, last: Navas}
-  id: eva-navas
-- canonical: {first: Jiří, last: Navrátil}
-  variants:
-  - {first: Jiri, last: Navratil}
-- canonical: {first: Tapas, last: Nayak}
-  variants:
-  - {first: Tapas, last: Nayek}
-- canonical: {first: Adeline, last: Nazarenko}
-  id: adeline-nazarenko
-  variants:
-  - {first: Adeline, last: Nazarenko-Perrin}
-- canonical: {first: Jeannette G., last: Neal}
-  id: jeannette-g-neal
-- canonical: {first: Silvia, last: Necşulescu}
-  variants:
-  - {first: Silvia, last: Necsulescu}
-- canonical: {first: Nicolas, last: Nedobejkine}
-  id: nicolas-nedobejkine
-- canonical: {first: Mary S., last: Neff}
-  variants:
-  - {first: Mary, last: Neff}
-- canonical: {first: Matteo, last: Negri}
-  id: matteo-negri
-- canonical: {first: Anil Kumar, last: Nelakanti}
-  variants:
-  - {first: Anil, last: Kumar}
-- canonical: {first: Esa, last: Nelimarkka}
-  id: esa-nelimarkka
-- canonical: {first: Dávid Márk, last: Nemeskey}
-  variants:
-  - {first: David Mark, last: Nemeskey}
-- canonical: {first: Goran, last: Nenadic}
-  variants:
-  - {first: Goran, last: Nenadić}
-- canonical: {first: Jian, last: Wang}
-  id: jian-wang-hongkongpoly
-  orcid: 0000-0002-8992-8336
-  institution: The Hong Kong Polytechnic University
-  comment: Hong Kong Polytechnic
-- canonical: {first: Jian, last: Wang}
-  id: jian-wang
-  comment: May refer to several people
-- canonical: {first: João P., last: Neto}
-  variants:
-  - {first: Joao P., last: Neto}
-  - {first: Joao, last: Neto}
-  - {first: João, last: Neto}
-  - {first: João Paulo, last: Neto}
-- canonical: {first: Yael, last: Netzer}
-  variants:
-  - {first: Yael Dahan, last: Netzer}
-  - {first: Yael, last: Dahan}
-- canonical: {first: Günter, last: Neumann}
-  variants:
-  - {first: Gunter, last: Neumann}
-  - {first: Guenter, last: Neumann}
-- canonical: {first: Aurelie, last: Neveol}
-  variants:
-  - {first: Aurélie, last: Névéol}
-- canonical: {first: Bruce E., last: Nevin}
-  variants:
-  - {first: Bruce, last: Nevin}
-- canonical: {first: Paula, last: Newman}
-  id: paula-newman
-  variants:
-  - {first: Paula S., last: Newman}
-- canonical: {first: Hermann, last: Ney}
-  id: hermann-ney
-- canonical: {first: Gunta, last: Nešpore}
-  variants:
-  - {first: Gunta, last: Nespore-Berzkalne}
-- canonical: {first: Andrew Y., last: Ng}
-  variants:
-  - {first: Andrew, last: Ng}
-- canonical: {first: Hong-I, last: Ng}
-  variants:
-  - {first: Hong I, last: Ng}
-- canonical: {first: Jun Ping, last: Ng}
-  variants:
-  - {first: Jun-Ping, last: Ng}
-- canonical: {first: Raymond, last: Ng}
-  variants:
-  - {first: Raymond T., last: Ng}
-- canonical: {first: See Kiong, last: Ng}
-  variants:
-  - {first: See-Kiong, last: Ng}
-- canonical: {first: Cam-Tu, last: Nguyen}
-  variants:
-  - {first: Cẩm Tú, last: Nguyễn}
-- canonical: {first: Huy, last: Nguyen}
-  comment: Stanford
-  id: huy-nguyen-stanford
-- canonical: {first: Huy, last: Nguyen}
-  comment: UPitt, Amazon
-  id: huy-nguyen-pgh
-- canonical: {first: Huy, last: Nguyen}
-  comment: BCL Technologies Inc.
-  id: huy-nguyen-bcl
-- canonical: {first: Huy, last: Nguyen}
-  comment: ex-liulishuo
-  id: huy-nguyen-lls
-- canonical: {first: Huy Tien, last: Nguyen}
-  variants:
-  - {first: Huy-Tien, last: Nguyen}
-- canonical: {first: Kasu Sai Kartheek, last: Reddy}
-  orcid: 0009-0007-6679-3313
-  institution: Indian Institute of Information Technology Dharwad, India
-  variants:
-  - {first: Sai Kartheek, last: Reddy Kasu}
-- canonical: {first: Long, last: Nguyen}
-  id: long-nguyen
-- canonical: {first: Minh Le, last: Nguyen}
-  id: minh-le-nguyen
-  variants:
-  - {first: Minh-Le, last: Nguyen}
-  - {first: Le-Minh, last: Nguyen}
-  - {first: Nguyen Le, last: Minh}
-  - {first: Le Minh, last: Nguyen}
-  - {first: Nguyen, last: Le Minh}
-- canonical: {first: Phuong-Thai, last: Nguyen}
-  variants:
-  - {first: Phuong Thai, last: Nguyen}
-- canonical: {first: Quy, last: Nguyen}
-  variants:
-  - {first: Quy T., last: Nguyen}
-- canonical: {first: Thi Minh Huyen, last: Nguyen}
-  id: thi-minh-huyen-nguyen
-  variants:
-  - {first: Thi Minh Huyền, last: Nguyễn}
-  - {first: Thị Minh Huyền, last: Nguyễn}
-  - {first: Thi-Minh-Huyen, last: Nguyen}
-- canonical: {first: ThuyLinh, last: Nguyen}
-  variants:
-  - {first: Thuy Linh, last: Nguyen}
-- canonical: {first: Toan Q., last: Nguyen}
-  variants:
-  - {first: Toan, last: Nguyen}
-- canonical: {first: Tri-Thanh, last: Nguyen}
-  variants:
-  - {first: Tri Thanh, last: Nguyen}
-- canonical: {first: Van minh, last: Nguyen}
-  variants:
-  - {first: Van Minh, last: Nguyen}
-- canonical: {first: Viet Cuong, last: Nguyen}
-  variants:
-  - {first: Nguyen Viet, last: Cuong}
-- canonical: {first: Vinh Van, last: Nguyen}
-  variants:
-  - {first: Vinh-Van, last: Nguyen}
-- canonical: {first: Ngô Thanh, last: Nhàn}
-  id: ngo-thanh-nhan
-  variants:
-  - {first: Ngo Thanh, last: Nhan}
-- canonical: {first: Nicolas, last: Nicolov}
-  id: nicolas-nicolov
-  similar: [nikola-i-nikolov]
-- canonical: {first: Jian-Yun, last: Nie}
-  variants:
-  - {first: Jian-yun, last: Nie}
-- canonical: {first: Rodney, last: Nielsen}
-  variants:
-  - {first: Rodney D., last: Nielsen}
-- canonical: {first: Sonja, last: Nießen}
-  id: sonja-niessen
-  variants:
-  - {first: Sonja, last: Niessen}
-- canonical: {first: Nikola I., last: Nikolov}
-  id: nikola-i-nikolov
-  similar: [nicolas-nicolov]
-- canonical: {first: Kristina, last: Nilsson Björkenstam}
-  variants:
-  - {first: Kristina, last: Nilsson}
-  - {first: Kristina Nilsson, last: Björkenstam}
-  - {first: Kristina, last: N. Björkenstam}
-- canonical: {first: Nobal Bikram, last: Niraula}
-  variants:
-  - {first: Nobal, last: Niraula}
-- canonical: {first: Irene, last: Nirenburg}
-  variants:
-  - {first: Irene B., last: Nirenburg}
-- canonical: {first: Sergei, last: Nirenburg}
-  id: sergei-nirenburg
-  variants:
-  - {first: Sergei, last: Nirenberg}
-- canonical: {first: Toyoaki, last: Nishida}
-  variants:
-  - {first: Toyo-aki, last: Nishida}
-- canonical: {first: Bodil, last: Nistrup Madsen}
-  variants:
-  - {first: Bodil Nistrup, last: Madsen}
-- canonical: {first: Zheng-Yu, last: Niu}
-  variants:
-  - {first: Zheng Yu, last: Niu}
-  - {first: Zhengyu, last: Niu}
-- canonical: {first: Pascal, last: Nocéra}
-  variants:
-  - {first: Pascal, last: Nocera}
-- canonical: {first: Albino, last: Nogueiras}
-  variants:
-  - {first: Albino Nogueiras, last: Rodriguez}
-- canonical: {first: Lewis M., last: Norton}
-  variants:
-  - {first: Lewis, last: Norton}
-- canonical: {first: R., last: Nozohoor-Farshi}
-  variants:
-  - {first: R, last: Nozohoor-Farshi}
-- canonical: {first: Rita, last: Nuebel}
-  variants:
-  - {first: Rita, last: Nüebel}
-- canonical: {first: Minghua, last: Nuo}
-  variants:
-  - {first: Ming Hua, last: Nuo}
-- canonical: {first: Aparna, last: Nurani Venkitasubramanian}
-  variants:
-  - {first: Aparna N., last: Venkitasubramanian}
-- canonical: {first: Eric, last: Nyberg}
-  variants:
-  - {first: Eric H., last: Nyberg III}
-  - {first: Eric H., last: Nyberg}
-  - {first: Eric H., last: 'Nyberg, 3rd'}
-- canonical: {first: Claire, last: Nédellec}
-  variants:
-  - {first: Claire, last: Nėdellec}
-- canonical: {first: Elmar, last: Nöth}
-  id: elmar-noth
-  variants:
-  - {first: Elmar, last: Noth}
-- canonical: {first: Douglas W., last: Oard}
-  variants:
-  - {first: Douglas, last: Oard}
-  - {first: Doug, last: Oard}
-- canonical: {first: Jon, last: Oberlander}
-  variants:
-  - {first: Jonathan, last: Oberländer}
-- canonical: {first: Laura Ana Maria, last: Oberländer}
-  variants:
-  - {first: Laura Ana Maria, last: Bostan}
-  - {first: Laura-Ana-Maria, last: Bostan}
-- canonical: {first: Ivan, last: Obradović}
-  variants:
-  - {first: Ivan, last: Obradoviæ}
-- canonical: {first: Tomasz, last: Obrębski}
-  variants:
-  - {first: Tomasz, last: Obrebski}
-- canonical: {first: Franz Josef, last: Och}
-  id: franz-josef-och
-  variants:
-  - {first: Franz J., last: Och}
-  - {first: Franz, last: Och}
-- canonical: {first: Cheol-Young, last: Ock}
-  variants:
-  - {first: Cheolyoung, last: Ock}
-  - {first: Cheol-young, last: Ock}
-- canonical: {first: Wei, last: Fan}
-  id: wei-fan-hkust
-  orcid: 0009-0008-1900-7081
-  institution: Hong Kong University of Science and Technology
-  comment: HKUST
-- canonical: {first: Wei, last: Fan}
-  id: wei-fan
-  comment: May refer to several people
-- canonical: {first: Julian J., last: Odell}
-  id: julian-j-odell
-- canonical: {first: Jan, last: Odijk}
-  id: jan-odijk
-- canonical: {first: Pinar, last: Oezden Wennerberg}
-  variants:
-  - {first: Pinar, last: Wennerberg}
-  - {first: Pinar Oezden, last: Wennerberg}
-- canonical: {first: William C., last: Ogden}
-  variants:
-  - {first: William, last: Ogden}
-- canonical: {first: Philip, last: Ogren}
-  variants:
-  - {first: Philip V., last: Ogren}
-- canonical: {first: Alice, last: Oh}
-  variants:
-  - {first: Alice H., last: Oh}
-- canonical: {first: Jong-Hoon, last: Oh}
-  variants:
-  - {first: Jong Hoon, last: Oh}
-- canonical: {first: Takahiro, last: Ohno}
-  variants:
-  - {first: Takahiro, last: Ono}
-- canonical: {first: Atul Kr., last: Ojha}
-  variants:
-  - {first: Atul Ku., last: Ojha}
-- canonical: {first: Naoaki, last: Okazaki}
-  variants:
-  - {first: Naoki, last: Okazaki}
-- canonical: {first: Manabu, last: Okumura}
-  variants:
-  - {first: Manabu, last: Okumara}
-- canonical: {first: Hiroshi G., last: Okuno}
-  variants:
-  - {first: Hiroshi, last: Okuno}
-- canonical: {first: Duane E., last: Olawsky}
-  variants:
-  - {first: Duane, last: Olawsky}
-- canonical: {first: Karel, last: Oliva}
-  variants:
-  - {first: Karel, last: Oli̊va}
-- canonical: {first: José Luís, last: Oliveira}
-  variants:
-  - {first: Luís, last: Oliveira}
-- canonical: {first: Osvaldo Novais, last: Oliveira Jr.}
-  variants:
-  - {first: Osvaldo, last: Oliveira Jr}
-- canonical: {first: Solange, last: Oliveira Rezende}
-  variants:
-  - {first: Solange, last: Rezende}
-- canonical: {first: Andrew, last: Olney}
-  variants:
-  - {first: Andrew M., last: Olney}
-- canonical: {first: Mari Broman, last: Olsen}
-  variants:
-  - {first: Mari, last: Olsen}
-- canonical: {first: Maurizio, last: Omologo}
-  id: maurizio-omologo
-- canonical: {first: Arturo, last: Oncevay}
-  variants:
-  - {first: Arturo, last: Oncevay-Marcos}
-- canonical: {first: Corinna, last: Onelli}
-  id: corinna-onelli
-- canonical: {first: Takashi, last: Onishi}
-  variants:
-  - {first: Takeshi, last: Onishi}
-  - {first: Takashi, last: Oonishi}
-- canonical: {first: Boyan, last: Onyshkevych}
-  variants:
-  - {first: Boyan A., last: Onyshkevych}
-- canonical: {first: Constantin, last: Orasan}
-  id: constantin-orasan
-  variants:
-  - {first: Constantin, last: Orăsan}
-- canonical: {first: Zeynep, last: Orhan}
-  variants:
-  - {first: Orhan, last: Zeynep}
-- canonical: {first: Maite, last: Oronoz}
-  id: maite-oronoz
-- canonical: {first: J. Walker, last: Orr}
-  variants:
-  - {first: Walker, last: Orr}
-- canonical: {first: Javier, last: Ortega-García}
-  variants:
-  - {first: Javier, last: Ortega-Garcia}
-- canonical: {first: Sergio, last: Ortiz Rojas}
-  variants:
-  - {first: Sergio, last: Ortiz-Rojas}
-  - {first: Sergio Ortiz, last: Rojas}
-- canonical: {first: Pedro, last: Ortiz Suarez}
-  variants:
-  - {first: Pedro Javier, last: Ortiz Suárez}
-- canonical: {first: Daniel, last: Ortiz-Martínez}
-  variants:
-  - {first: Daniel, last: Ortíz-Martínez}
-  - {first: Daniel, last: Ortiz Martínez}
-- canonical: {first: Beatrice, last: Oshika}
-  variants:
-  - {first: Beatrice T., last: Oshika}
-- canonical: {first: David Yoshikazu, last: Oshima}
-  variants:
-  - {first: David Y., last: Oshima}
-- canonical: {first: Mari, last: Ostendorf}
-  id: mari-ostendorf
-- canonical: {first: Julia, last: Otmakhova}
-  variants:
-  - {first: Yulia, last: Otmakhova}
-- canonical: {first: Jahna, last: Otterbacher}
-  variants:
-  - {first: Jahna C., last: Otterbacher}
-- canonical: {first: Cecilia, last: Ovesdotter Alm}
-  variants:
-  - {first: Cecilia Ovesdotter, last: Alm}
-  - {first: Cecilia O., last: Alm}
-  - {first: Cecilia, last: O. Alm}
-- canonical: {first: Sharon, last: Oviatt}
-  variants:
-  - {first: Sharon L., last: Oviatt}
-- canonical: {first: Hiromi Itoh, last: Ozaku}
-  variants:
-  - {first: Hiromi itoh, last: Ozaku}
-- canonical: {first: Canberk, last: Ozdemir}
-  variants:
-  - {first: Canberk, last: Özdemir}
-- canonical: {first: Benoît, last: Ozell}
-  variants:
-  - {first: Benoit, last: Ozell}
-- canonical: {first: Timothy, last: O’Donnell}
-  variants:
-  - {first: Timothy J., last: O’Donnell}
-  - {first: Tim, last: O’Donnell}
-- canonical: {first: Thomas P., last: O’Hara}
-  variants:
-  - {first: Thomas, last: O’Hara}
-- canonical: {first: Tim, last: O’Keefe}
-  variants:
-  - {first: Timothy, last: O’Keefe}
-- canonical: {first: Dianne P., last: O’Leary}
-  variants:
-  - {first: Dianne, last: O’Leary}
-- canonical: {first: Ian M., last: O’Neill}
-  variants:
-  - {first: Ian, last: O’Neill}
-- canonical: {first: Douglas, last: O’Shaughnessy}
-  id: douglas-oshaughnessy
-  variants:
-  - {first: Douglas D., last: O’Shaughnessy}
-- canonical: {first: Dave, last: O’mara}
-  id: dave-omara
-- canonical: {first: Elaine, last: O′Mahony}
-  variants:
-  - {first: Elaine, last: O’Mahony}
-- canonical: {first: Sarah Masud, last: Preum}
-  orcid: 0000-0002-7771-8323
-  variants:
-  - {first: Sarah, last: Preum}
-  - {first: Sarah M., last: Preum}
-- canonical: {first: Deepak, last: P}
-  variants:
-  - {first: Deepak, last: Padmanabhan}
-- canonical: {first: Gerhard, last: Paaß}
-  variants:
-  - {first: Gerhard, last: Paass}
-- canonical: {first: Gordon, last: Pace}
-  variants:
-  - {first: Gordon J., last: Pace}
-- canonical: {first: María Leonor, last: Pacheco}
-  variants:
-  - {first: Maria Leonor, last: Pacheco}
-- canonical: {first: Ulrike, last: Pado}
-  variants:
-  - {first: Ulrike, last: Padó}
-- canonical: {first: Lluís, last: Padró}
-  id: lluis-padro
-  variants:
-  - {first: Lluis, last: Padro}
-  - {first: Lluis, last: Padró}
-- canonical: {first: Muntsa, last: Padró}
-  id: muntsa-padro
-- canonical: {first: Sebastian, last: Padó}
-  variants:
-  - {first: Sebastian, last: Pado}
-- canonical: {first: Gustavo, last: Paetzold}
-  variants:
-  - {first: Gustavo H., last: Paetzold}
-  - {first: Gustavo, last: Henrique Paetzold}
-  - {first: Gustavo Henrique, last: Paetzold}
-- canonical: {first: Peteris, last: Paikens}
-  variants:
-  - {first: Pēteris, last: Paikens}
-- canonical: {first: Jean-Pierre, last: Paillet}
-  variants:
-  - {first: Jean Pierre, last: Paillet}
-- canonical: {first: Helen, last: Pain}
-  id: helen-pain
-- canonical: {first: Daniel, last: Paiva}
-  id: daniel-paiva
-  variants:
-  - {first: Daniel S., last: Paiva}
-- canonical: {first: Sergey V., last: Pakhomov}
-  variants:
-  - {first: Sergey, last: Pakhomov}
-- canonical: {first: Serguei, last: Pakhomov}
-  variants:
-  - {first: Serguei V., last: Pakhomov}
-- canonical: {first: Christopher, last: Pal}
-  variants:
-  - {first: Chris, last: Pal}
-- canonical: {first: David S., last: Pallett}
-  id: david-s-pallett
-  variants:
-  - {first: David, last: Pallett}
-- canonical: {first: David D., last: Palmer}
-  variants:
-  - {first: David, last: Palmer}
-- canonical: {first: Martha, last: Palmer}
-  variants:
-  - {first: Martha Stone, last: Palmer}
-  - {first: Martha S., last: Palmer}
-- canonical: {first: Manuel, last: Palomar}
-  id: manuel-palomar
-- canonical: {first: Girish, last: Palshikar}
-  variants:
-  - {first: Girish K., last: Palshikar}
-  - {first: Girish K, last: Palshikar}
-- canonical: {first: Michael J., last: Pan}
-  variants:
-  - {first: Michael, last: Pan}
-- canonical: {first: Yi-Cheng, last: Pan}
-  variants:
-  - {first: Yi-cheng, last: Pan}
-- canonical: {first: Onkar Arun, last: Pandit}
-  variants:
-  - {first: Onkar, last: Pandit}
-- canonical: {first: Jarmila, last: Panevová}
-  variants:
-  - {first: Jarmila, last: Panevova}
-- canonical: {first: Nagesh C., last: Panyam}
-  variants:
-  - {first: Nagesh, last: C. Panyam}
-- canonical: {first: Constantine, last: Papageorgiou}
-  variants:
-  - {first: Constantine P., last: Papageorgiou}
-- canonical: {first: Harris, last: Papageorgiou}
-  variants:
-  - {first: Haris, last: Papageorgiou}
-- canonical: {first: Alexandros, last: Papangelis}
-  variants:
-  - {first: Alex, last: Papangelis}
-- canonical: {first: Ivandré, last: Paraboni}
-  variants:
-  - {first: Ivandre, last: Paraboni}
-- canonical: {first: Emerson Cabrera, last: Paraiso}
-  variants:
-  - {first: Emerson, last: Paraiso}
-- canonical: {first: Monica Lestari, last: Paramita}
-  variants:
-  - {first: Monica, last: Paramita}
-- canonical: {first: Jose Manuel, last: Pardo}
-  variants:
-  - {first: Jose M., last: Pardo}
-  - {first: José M., last: Pardo}
-- canonical: {first: Mi, last: Zhang}
-  id: mi-zhang-ucd
-  orcid: 0000-0003-3567-3478
-  institution: University College Dublin
-  comment: Dublin
-- canonical: {first: Mi, last: Zhang}
-  id: mi-zhang
-  comment: May refer to multiple people
-- canonical: {first: Antonio, last: Pareja Lora}
-  variants:
-  - {first: Antonio, last: Pareja-Lora}
-- canonical: {first: Ankur, last: Parikh}
-  variants:
-  - {first: Ankur P., last: Parikh}
-- canonical: {first: Cecile, last: Paris}
-  variants:
-  - {first: Cécile, last: Paris}
-  - {first: Cecile L., last: Paris}
-- canonical: {first: Praveen, last: Paritosh}
-  variants:
-  - {first: Praveen, last: P}
-- canonical: {first: HyukRo, last: Park}
-  variants:
-  - {first: Hyukro, last: Park}
-- canonical: {first: Hyun Seok, last: Park}
-  variants:
-  - {first: Hyun S., last: Park}
-- canonical: {first: Jong C., last: Park}
-  variants:
-  - {first: Jong, last: Park}
-- canonical: {first: Myung-Kwan, last: Park}
-  variants:
-  - {first: Myungkwan, last: Park}
-- canonical: {first: Sang-Kyu, last: Park}
-  variants:
-  - {first: Sangkyu, last: Park}
-- canonical: {first: Se-Young, last: Park}
-  variants:
-  - {first: Se Young, last: Park}
-- canonical: {first: Y. Albert, last: Park}
-  variants:
-  - {first: Albert, last: Park}
-- canonical: {first: ‘Ōiwi, last: Parker Jones}
-  variants:
-  - {first: Oiwi, last: Parker Jones}
-- canonical: {first: Patrick, last: Paroubek}
-  id: patrick-paroubek
-- canonical: {first: Carla, last: Parra Escartín}
-  variants:
-  - {first: Carla, last: Parra}
-  - {first: Carla Parra, last: Escartín}
-  - {first: Carla Parra, last: Escartin}
-- canonical: {first: Barbara H., last: Partee}
-  variants:
-  - {first: Barbara, last: Partee}
-- canonical: {first: Md. Rizwan, last: Parvez}
-  variants:
-  - {first: Md Rizwan, last: Parvez}
-- canonical: {first: Artemis, last: Parvizi}
-  variants:
-  - {first: Artemis, last: Parvisi}
-- canonical: {first: Marius, last: Pasca}
-  variants:
-  - {first: Marius A., last: Pasca}
-  - {first: Marius, last: Paşca}
-- canonical: {first: Elena, last: Paskaleva}
-  variants:
-  - {first: Elena, last: Pascaleva}
-- canonical: {first: Rebecca J., last: Passonneau}
-  variants:
-  - {first: Rebecca, last: Passonneau}
-- canonical: {first: John K., last: Pate}
-  variants:
-  - {first: John, last: Pate}
-  - {first: John K, last: Pate}
-- canonical: {first: Pu, last: Zhao}
-  id: pu-zhao-northeastern
-  orcid: 0000-0001-5018-2859
-  institution: Northeastern University
-  comment: Northeastern
-- canonical: {first: Pu, last: Zhao}
-  id: pu-zhao
-  comment: May refer to several people
-- canonical: {first: Pratikkumar, last: Patel}
-  variants:
-  - {first: Pratik, last: Patel}
-- canonical: {first: Jon, last: Patrick}
-  variants:
-  - {first: Jon D., last: Patrick}
-  - {first: Jon David, last: Patrick}
-- canonical: {first: Terry, last: Patten}
-  id: terry-patten
-- canonical: {first: Michael, last: Paul}
-  variants:
-  - {first: Michael J., last: Paul}
-- canonical: {first: Niklas, last: Paulsson}
-  id: niklas-paulsson
-- canonical: {first: Jyoti, last: Pawar}
-  variants:
-  - {first: Jyoti D., last: Pawar}
-  - {first: Jyoti D, last: Pawar}
-- canonical: {first: Maria Teresa, last: Pazienza}
-  id: maria-teresa-pazienza
-  variants:
-  - {first: Maria Teresa, last: Pazienze}
-  - {first: Maria, last: Pazienza}
-- canonical: {first: Bolette Sandford, last: Pedersen}
-  variants:
-  - {first: Bolette, last: Sandford Pedersen}
-  - {first: Bolette S., last: Pedersen}
-  - {first: Bolette, last: Pedersen}
-  - {first: Bo, last: Pedersen}
-- canonical: {first: Víctor, last: Peinado}
-  variants:
-  - {first: Victor, last: Peinado}
-- canonical: {first: Bryan, last: Pellom}
-  id: bryan-pellom
-- canonical: {first: Mikel, last: Penagarikano}
-  id: mikel-penagarikano
-- canonical: {first: Yifan, last: Peng}
-  comment: cmu
-  id: yifan-peng-cmu
-- canonical: {first: Christopher, last: Pennington}
-  variants:
-  - {first: Chris, last: Pennington}
-- canonical: {first: Joseph J., last: Peper}
-  variants:
-  - {first: Joseph, last: Peper}
-- canonical: {first: Jesús, last: Peral}
-  id: jesus-peral
-  variants:
-  - {first: Jesus, last: Peral}
-- canonical: {first: Fernando, last: Perdigão}
-  variants:
-  - {first: Fernando S., last: Perdigão}
-- canonical: {first: Jose Manuel, last: Perea-Ortega}
-  variants:
-  - {first: José M., last: Perea-Ortega}
-  - {first: Jose Manuel, last: Perea}
-  - {first: Jose-Manuel, last: Perea-Ortega}
-- canonical: {first: Daniel Bastos, last: Pereira}
-  variants:
-  - {first: Daniel B., last: Pereira}
-- canonical: {first: Fernando C. N., last: Pereira}
-  variants:
-  - {first: Fernando C.N., last: Pereira}
-- canonical: {first: Luísa, last: Pereira}
-  variants:
-  - {first: Luisa, last: Pereira}
-- canonical: {first: Martín, last: Pereira-Fariña}
-  id: martin-pereira-farina
-- canonical: {first: Cenel-Augusto, last: Perez}
-  variants:
-  - {first: Cenel Augusto, last: Perez}
-- canonical: {first: Álvaro, last: Peris}
-  variants:
-  - {first: Alvaro, last: Peris}
-- canonical: {first: C. Raymond, last: Perrault}
-  variants:
-  - {first: Raymond, last: Perrault}
-- canonical: {first: Andreas, last: Persidis}
-  id: andreas-persidis
-- canonical: {first: Marie-Paule, last: Pery-Woodley}
-  variants:
-  - {first: Marie-Paule, last: Péry-Woodley}
-- canonical: {first: John, last: Pestian}
-  variants:
-  - {first: John P., last: Pestian}
-- canonical: {first: Matthew E., last: Peters}
-  variants:
-  - {first: Matthew, last: Peters}
-- canonical: {first: Wim, last: Peters}
-  id: wim-peters
-- canonical: {first: Daniel, last: Peterson}
-  variants:
-  - {first: Daniel W., last: Peterson}
-- canonical: {first: J., last: Peterson}
-  variants:
-  - {first: Jill, last: Peterson}
-- canonical: {first: Vladimir, last: Petkevic}
-  variants:
-  - {first: Vladimír, last: Petkevič}
-- canonical: {first: Saša, last: Petrović}
-  variants:
-  - {first: Sasa, last: Petrovic}
-- canonical: {first: Miriam R. L., last: Petruck}
-  variants:
-  - {first: Miriam R.L., last: Petruck}
-  - {first: Miriam R L, last: Petruck}
-- canonical: {first: Anselmo, last: Peñas}
-  variants:
-  - {first: Anselmo, last: Penas}
-- canonical: {first: Minh Quang, last: Pham}
-  comment: SYSTRAN
-  id: minh-quang-pham
-  similar: [minh-quang-nhat-pham]
-  variants:
-  - {first: MinhQuang, last: Pham}
-- canonical: {first: Minh Quang Nhat, last: Pham}
-  comment: JAIST, Alt Vietnam
-  id: minh-quang-nhat-pham
-  similar: [minh-quang-pham]
-- canonical: {first: Nghia The, last: Pham}
-  variants:
-  - {first: Nghia, last: Pham}
-- canonical: {first: Ngoc-Quan, last: Pham}
-  variants:
-  - {first: Ngoc Quan, last: Pham}
-- canonical: {first: Tuoi Thi, last: Phan}
-  variants:
-  - {first: Tuoi, last: T. Phan}
-- canonical: {first: John, last: Phillips}
-  comment: Univ. of Manchester
-  id: john-phillips
-  similar: [jon-phillips]
-- canonical: {first: Jon, last: Phillips}
-  comment: Georgetown, MITRE
-  id: jon-phillips
-  similar: [john-phillips]
-- canonical: {first: Michael, last: Phillips}
-  id: michael-phillips
-- canonical: {first: Robert, last: Phillips}
-  variants:
-  - {first: Rob, last: Phillips}
-- canonical: {first: Fabio, last: Pianesi}
-  id: fabio-pianesi
-- canonical: {first: Emanuele, last: Pianta}
-  id: emanuele-pianta
-- canonical: {first: Scott S.L., last: Piao}
-  id: scott-s-l-piao
-  variants:
-  - {first: Scott, last: Piao}
-  - {first: Scott S. L., last: Piao}
-- canonical: {first: Christine, last: Piatko}
-  variants:
-  - {first: Christine D., last: Piatko}
-- canonical: {first: Francesco, last: Piazza}
-  id: francesco-piazza
-- canonical: {first: José Ramom, last: Pichel Campos}
-  variants:
-  - {first: José Ramom, last: Pichel}
-  - {first: Jose Ramom, last: Pichel}
-  - {first: Jose Ramom, last: Pichel Campos}
-- canonical: {first: M. A., last: Picheny}
-  variants:
-  - {first: M.A., last: Picheny}
-- canonical: {first: David, last: Picó}
-  variants:
-  - {first: David, last: Pico}
-- canonical: {first: Roberto, last: Pieraccini}
-  id: roberto-pieraccini
-- canonical: {first: David, last: Pierce}
-  variants:
-  - {first: David R., last: Pierce}
-- canonical: {first: Janet, last: Pierrehumbert}
-  variants:
-  - {first: Janet B., last: Pierrehumbert}
-- canonical: {first: Paola, last: Pietrandrea}
-  variants:
-  - {first: Paola, last: Pietandrea}
-- canonical: {first: Luis. A., last: Pineda}
-  variants:
-  - {first: Luis, last: Pineda}
-- canonical: {first: Gisele Montilha, last: Pinheiro}
-  variants:
-  - {first: Gisele, last: Montilha}
-- canonical: {first: Mārcis, last: Pinnis}
-  variants:
-  - {first: Marcis, last: Pinnis}
-- canonical: {first: David, last: Pinto}
-  variants:
-  - {first: David Eduardo, last: Pinto Avendaño}
-  - {first: David, last: Pinto Avendaño}
-- canonical: {first: R., last: Piotrowski}
-  variants:
-  - {first: R. G., last: Piotrowski}
-- canonical: {first: Stelios, last: Piperidis}
-  id: stelios-piperidis
-  variants:
-  - {first: Stelios, last: Piperdis}
-- canonical: {first: Tommi A., last: Pirinen}
-  variants:
-  - {first: Tommi, last: Pirinen}
-  - {first: Tommi A, last: Pirinen}
-- canonical: {first: John F., last: Pitrelli}
-  variants:
-  - {first: John, last: Pitrelli}
-- canonical: {first: Luiz Augusto, last: Pizzato}
-  variants:
-  - {first: Luiz Augusto Sangoi, last: Pizzato}
-- canonical: {first: Paul, last: Placeway}
-  id: paul-placeway
-- canonical: {first: Mihaela, last: Plamada-Onofrei}
-  variants:
-  - {first: Mihaela, last: Onofrei}
-  - {first: Mihaela, last: Plămadă-Onofrei}
-- canonical: {first: Magdalena, last: Plamadă}
-  variants:
-  - {first: Magdalena, last: Plamada}
-- canonical: {first: Barbara, last: Plank}
-  id: barbara-plank
-- canonical: {first: Martin, last: Platek}
-  variants:
-  - {first: Martin, last: Plátek}
-- canonical: {first: John C., last: Platt}
-  variants:
-  - {first: John, last: Platt}
-- canonical: {first: Matúš, last: Pleva}
-  variants:
-  - {first: Matus, last: Pleva}
-- canonical: {first: Massimo, last: Poesio}
-  id: massimo-poesio
-- canonical: {first: Alain, last: Polguère}
-  id: alain-polguere
-  variants:
-  - {first: Alain, last: Polguere}
-- canonical: {first: Joseph, last: Polifroni}
-  variants:
-  - {first: Joseph H., last: Polifroni}
-- canonical: {first: Ziortza, last: Polin}
-  id: ziortza-polin
-- canonical: {first: Carl, last: Pollard}
-  variants:
-  - {first: Carl J., last: Pollard}
-- canonical: {first: Petr, last: Pollák}
-  variants:
-  - {first: Petr, last: Pollak}
-- canonical: {first: Simone Paolo, last: Ponzetto}
-  variants:
-  - {first: Simone P., last: Ponzetto}
-  - {first: Simone, last: Ponzetto}
-- canonical: {first: Diana Nicoleta, last: Popa}
-  variants:
-  - {first: Diana, last: Popa}
-- canonical: {first: Ashok, last: Popat}
-  variants:
-  - {first: Ashok C., last: Popat}
-- canonical: {first: Lubos, last: Popelínsky}
-  variants:
-  - {first: Luboš, last: Popelínský}
-- canonical: {first: Andrei, last: Popescu-Belis}
-  id: andrei-popescu-belis
-  variants:
-  - {first: Andrei, last: Popescu Belis}
-- canonical: {first: Maja, last: Popović}
-  variants:
-  - {first: Maja, last: Popovic}
-- canonical: {first: Bruce, last: Porter}
-  variants:
-  - {first: Bruce W., last: Porter}
-- canonical: {first: Oana, last: Postolache}
-  variants:
-  - {first: Oana-Diana, last: Postolache}
-- canonical: {first: Petra, last: Poukarová}
-  variants:
-  - {first: Petra, last: Klimešová}
-- canonical: {first: Daniel, last: Povey}
-  id: daniel-povey
-- canonical: {first: David M. W., last: Powers}
-  id: david-m-w-powers
-  variants:
-  - {first: David M W, last: Powers}
-- canonical: {first: Maria, last: Pozzi}
-  variants:
-  - {first: María, last: Pozzi}
-  - {first: Mara, last: Pozzi}
-- canonical: {first: Sameer, last: Pradhan}
-  id: sameer-pradhan
-  variants:
-  - {first: Sameer S., last: Pradhan}
-- canonical: {first: K.V.S., last: Prasad}
-  variants:
-  - {first: K.V.S, last: Prasad}
-- canonical: {first: Federico, last: Prat}
-  id: federico-prat
-- canonical: {first: Daniel, last: Preoţiuc-Pietro}
-  variants:
-  - {first: Daniel, last: Preotiuc-Pietro}
-- canonical: {first: Nives Mikelić, last: Preradović}
-  variants:
-  - {first: Nives, last: Mikelić Preradović}
-- canonical: {first: Lauma, last: Pretkalniņa}
-  variants:
-  - {first: Lauma, last: Pretkalnina}
-  - {first: Lauma, last: Pretkalnin̨a}
-- canonical: {first: Sergio José, last: Rodríguez Méndez}
-  orcid: 0000-0001-7203-8399
-  institution: National Yang Ming Chiao Tung University
-  comment: NYCU
-  variants:
-  - {first: Sergio J., last: Rodriguez Mendez}
-- canonical: {first: Patti, last: Price}
-  id: patti-price
-  variants:
-  - {first: Patti J., last: Price}
-- canonical: {first: Belém, last: Priego Sanchez}
-  variants:
-  - {first: Belem, last: Priego}
-- canonical: {first: Joel, last: Priestley}
-  variants:
-  - {first: Joel James, last: Priestley}
-- canonical: {first: Danie J., last: Prinsloo}
-  variants:
-  - {first: Danie, last: Prinsloo}
-- canonical: {first: Ruben A., last: Proano}
-  variants:
-  - {first: Rubén, last: Proaño}
-  - {first: Rubén A., last: Proaño}
-- canonical: {first: Irina, last: Prodanof}
-  id: irina-prodanof
-- canonical: {first: Domenico, last: Proietti}
-  id: domenico-proietti
-- canonical: {first: Carlos A., last: Prolo}
-  variants:
-  - {first: Carlos, last: Prolo}
-- canonical: {first: Gabor, last: Proszeky}
-  variants:
-  - {first: Gábor, last: Prószéky}
-  - {first: Gabor, last: Prbszeky}
-- canonical: {first: Emily, last: Prud’hommeaux}
-  variants:
-  - {first: Emily T., last: Prud’hommeaux}
-  - {first: Emily, last: Prud'hommeaux}
-- canonical: {first: Mark, last: Przybocki}
-  variants:
-  - {first: Mark A., last: Przybocki}
-- canonical: {first: Laurent, last: Prévot}
-  variants:
-  - {first: Laurent, last: Prevot}
-- canonical: {first: Josef, last: Psutka}
-  id: josef-psutka
-  variants:
-  - {first: Josef V., last: Psutka}
-- canonical: {first: Jan, last: Ptacek}
-  variants:
-  - {first: Jan, last: Ptáček}
-- canonical: {first: Raymond, last: Ptucha}
-  variants:
-  - {first: Ray, last: Ptucha}
-- canonical: {first: Rajkumar, last: Pujari}
-  variants:
-  - {first: Pujari, last: Rajkumar}
-- canonical: {first: Paolo, last: Puliti}
-  id: paolo-puliti
-- canonical: {first: Geoffrey K., last: Pullum}
-  variants:
-  - {first: Geoffrey, last: Pullum}
-- canonical: {first: Stephen, last: Pulman}
-  id: stephen-pulman
-  variants:
-  - {first: Stephen G., last: Pulman}
-- canonical: {first: Ryosuke, last: Takahashi}
-  id: ryosuke-takahashi-tohoku
-  orcid: 0009-0002-9887-2781
-  comment: Tohoku
-  institution: Tohoku University
-- canonical: {first: Ryosuke, last: Takahashi}
-  id: ryosuke-takahashi
-  comment: May refer to several people
-- canonical: {first: James, last: Pustejovsky}
-  id: james-pustejovsky
-  variants:
-  - {first: James D., last: Pustejovsky}
-- canonical: {first: Guy, last: Pérennou}
-  id: guy-perennou
-- canonical: {first: Chantal, last: Pérez-Hernández}
-  variants:
-  - {first: Chantal, last: Pérez}
-- canonical: {first: Behrang, last: QasemiZadeh}
-  variants:
-  - {first: Behrang, last: Q. Zadeh}
-  - {first: Behrang Q., last: Zadeh}
-  - {first: Behrang, last: Zadeh}
-  - {first: Behrang, last: Qasemizadeh}
-- canonical: {first: Haoliang, last: Qi}
-  variants:
-  - {first: HaoLiang, last: Qi}
-- canonical: {first: Longhua, last: Qian}
-  variants:
-  - {first: LongHua, last: Qian}
-- canonical: {first: Xin Ying, last: Qiu}
-  variants:
-  - {first: Xinying, last: Qiu}
-- canonical: {first: Yun-Qian, last: Qu}
-  variants:
-  - {first: Yunqian, last: Qu}
-- canonical: {first: Maurice, last: Quezel-Ambrunaz}
-  id: maurice-quezel-ambrunaz
-- canonical: {first: Matthieu, last: Quignard}
-  id: matthieu-quignard
-- canonical: {first: Kevin M., last: Quinn}
-  variants:
-  - {first: Kevin, last: Quinn}
-- canonical: {first: Pattabhi, last: RK Rao}
-  variants:
-  - {first: T. Pattabhi, last: R. K Rao}
-  - {first: Pattabhi RK, last: Rao}
-- canonical: {first: Hazem, last: Raafat}
-  variants:
-  - {first: Hazem, last: M. Raafat}
-- canonical: {first: Lawrence R., last: Rabiner}
-  id: lawrence-r-rabiner
-- canonical: {first: David Nicolas, last: Racca}
-  variants:
-  - {first: David Nicolás, last: Racca}
-- canonical: {first: Dragomir, last: Radev}
-  variants:
-  - {first: Dragomir R., last: Radev}
-- canonical: {first: Remo, last: Raffaelli}
-  id: remo-raffaelli
-- canonical: {first: Anna N., last: Rafferty}
-  variants:
-  - {first: Anna, last: Rafferty}
-- canonical: {first: Ahmed, last: Ragheb}
-  id: ahmed-ragheb
-- canonical: {first: Achla M., last: Raina}
-  variants:
-  - {first: Achla, last: Raina}
-  - {first: Achla M, last: Raina}
-- canonical: {first: S., last: Rajendran}
-  variants:
-  - {first: Rajendran, last: S}
-- canonical: {first: Rajakrishnan, last: Rajkumar}
-  variants:
-  - {first: Rajkumar, last: Rajakrishnan}
-- canonical: {first: Martin, last: Rajman}
-  id: martin-rajman
-- canonical: {first: Ekaterina V., last: Rakhilina}
-  variants:
-  - {first: Ekaterina, last: Rakhilina}
-- canonical: {first: Bhuvana, last: Ramabhadran}
-  id: bhuvana-ramabhadran
-- canonical: {first: Ananth, last: Ramakrishnan A.}
-  variants:
-  - {first: Ananth, last: Ramakrishnan A}
-- canonical: {first: Sv, last: Ramanan}
-  variants:
-  - {first: SV, last: Ramanan}
-- canonical: {first: Owen, last: Rambow}
-  variants:
-  - {first: Owen C., last: Rambow}
-- canonical: {first: Radoslaw, last: Ramocki}
-  variants:
-  - {first: Radosław, last: Ramocki}
-- canonical: {first: Margarita Alonso, last: Ramos}
-  variants:
-  - {first: Margarita, last: Alonso-Ramos}
-- canonical: {first: Lance, last: Ramshaw}
-  variants:
-  - {first: Lance A., last: Ramshaw}
-- canonical: {first: Wenyu, last: Zhang}
-  id: wenyu-zhang-cornell
-  orcid: 0000-0002-3849-4320
-  comment: Cornell
-  institution: Cornell University
-- canonical: {first: Wenyu, last: Zhang}
-  id: wenyu-zhang
-  comment: May refer to several people
-- canonical: {first: Gema, last: Ramírez-Sánchez}
-  variants:
-  - {first: Gema, last: Ramírez}
-- canonical: {first: Bali, last: Ranaivo-Malançon}
-  variants:
-  - {first: Bali, last: Ranaivo-Malancon}
-  - {first: Bali Ranaivo, last: Malanҫon}
-- canonical: {first: Peter A., last: Rankel}
-  variants:
-  - {first: Peter, last: Rankel}
-- canonical: {first: K Sreenivasa, last: Rao}
-  variants:
-  - {first: K. Sreenivasa, last: Rao}
-- canonical: {first: Spyros, last: Raptis}
-  id: spyros-raptis
-- canonical: {first: Mohsen, last: Rashwan}
-  id: mohsen-rashwan
-- canonical: {first: Lev, last: Ratinov}
-  variants:
-  - {first: Lev-Arie, last: Ratinov}
-- canonical: {first: Adwait, last: Ratnaparkhi}
-  id: adwait-ratnaparkhi
-- canonical: {first: Esther, last: Ratsch}
-  id: esther-ratsch
-- canonical: {first: Lisa, last: Rau}
-  variants:
-  - {first: Lisa F., last: Rau}
-- canonical: {first: Yael, last: Ravin}
-  id: yael-ravin
-- canonical: {first: Balaraman, last: Ravindran}
-  id: balaraman-ravindran
-- canonical: {first: Manny, last: Rayner}
-  id: manny-rayner
-- canonical: {first: Agha Ali, last: Raza}
-  variants:
-  - {first: Agha, last: Raza}
-- canonical: {first: Mike, last: Reape}
-  id: mike-reape
-- canonical: {first: Dietrich, last: Rebholz Schuhmann}
-  variants:
-  - {first: Dietrich, last: Rebholz-Schuhmann}
-- canonical: {first: Chris, last: Reed}
-  id: chris-reed
-- canonical: {first: Florence, last: Reeder}
-  variants:
-  - {first: Florence M., last: Reeder}
-- canonical: {first: Larry H., last: Reeker}
-  id: larry-h-reeker
-- canonical: {first: Uwe, last: Reichel}
-  variants:
-  - {first: Uwe D., last: Reichel}
-- canonical: {first: Ronan G., last: Reilly}
-  variants:
-  - {first: Ronan, last: Reilly}
-- canonical: {first: Frederick, last: Reiss}
-  variants:
-  - {first: Frederick R., last: Reiss}
-- canonical: {first: Jose, last: Relaño-Gil}
-  variants:
-  - {first: Jose, last: Relano Gil}
-  - {first: José, last: Relaño Gil}
-  - {first: José, last: Relaño}
-- canonical: {first: Francesc, last: Ribas}
-  variants:
-  - {first: Francesc Ribas, last: Framis}
-- canonical: {first: Marco Tulio, last: Ribeiro}
-  variants:
-  - {first: Marco, last: Ribeiro}
-- canonical: {first: Ricardo, last: Ribeiro}
-  variants:
-  - {first: Ricardo Daniel, last: Ribeiro}
-- canonical: {first: Elaine, last: Rich}
-  variants:
-  - {first: Elaine A., last: Rich}
-- canonical: {first: Alexander, last: Richard Fabbri}
-  variants:
-  - {first: Alexander R., last: Fabbri}
-  - {first: Alexander, last: Fabbri}
-- canonical: {first: German, last: Rigau}
-  id: german-rigau
-- canonical: {first: Michael, last: Riley}
-  variants:
-  - {first: Michael D., last: Riley}
-- canonical: {first: Ellen, last: Riloff}
-  id: ellen-riloff
-- canonical: {first: Hae Chang, last: Rim}
-  variants:
-  - {first: Hae-Chang, last: Rim}
-- canonical: {first: Antonio, last: Rincón}
-  variants:
-  - {first: Antonio, last: Rincon}
-- canonical: {first: Thomas C., last: Rindflesch}
-  variants:
-  - {first: Thomas, last: Rindflesch}
-- canonical: {first: Eric, last: Ringger}
-  variants:
-  - {first: Eric K., last: Ringger}
-- canonical: {first: Annette, last: Rios Gonzales}
-  variants:
-  - {first: Annette, last: Rios}
-- canonical: {first: Eric Sven, last: Ristad}
-  variants:
-  - {first: Eric, last: Ristad}
-- canonical: {first: Graeme, last: Ritchie}
-  id: graeme-ritchie
-  variants:
-  - {first: Graeme D., last: Ritchie}
-- canonical: {first: Hammam, last: Riza}
-  variants:
-  - {first: Ir. Hammam, last: Riza}
-- canonical: {first: Albert A., last: Rizzo}
-  variants:
-  - {first: Albert, last: Rizzo}
-  - {first: Skip, last: Rizzo}
-  - {first: Albert Skip, last: Rizzo}
-- canonical: {first: Nick, last: Rizzolo}
-  variants:
-  - {first: Nicholas, last: Rizzolo}
-- canonical: {first: Jane J., last: Robinson}
-  variants:
-  - {first: Jane, last: Robinson}
-- canonical: {first: Patricia, last: Robinson}
-  id: patricia-robinson
-- canonical: {first: Leonida Della, last: Rocca}
-  variants:
-  - {first: Leonida, last: Della-Rocca}
-  - {first: Leonida, last: Della Rocca}
-- canonical: {first: Martha-Alicia, last: Rocha}
-  variants:
-  - {first: Martha Alicia, last: Rocha}
-- canonical: {first: Tim, last: Rocktäschel}
-  variants:
-  - {first: Tim, last: Rocktaschel}
-- canonical: {first: Álvaro, last: Rodrigo}
-  variants:
-  - {first: Alvaro, last: Rodrigo}
-- canonical: {first: Luis, last: Rodrigo-Aguado}
-  variants:
-  - {first: Luis, last: Rodrigo}
-- canonical: {first: Irene, last: Rodrigues}
-  variants:
-  - {first: Irene Pimenta, last: Rodrigues}
-  - {first: Irene, last: Pimenta Rodrigues}
-- canonical: {first: João, last: Rodrigues}
-  variants:
-  - {first: João, last: António Rodrigues}
-- canonical: {first: Kepa Joseba, last: Rodriguez}
-  variants:
-  - {first: Kepa J., last: Rodríguez}
-  - {first: Kepa Joseba, last: Rodríguez}
-- canonical: {first: H., last: Rodriguez Hontoria}
-  variants:
-  - {first: H., last: Rodriguez}
-- canonical: {first: Victor, last: Rodriguez-Doncel}
-  variants:
-  - {first: Víctor, last: Rodríguez}
-  - {first: Victor, last: Rodríguez Doncel}
-- canonical: {first: Luis Javier, last: Rodriguez-Fuentes}
-  variants:
-  - {first: Luis Javier, last: Rodríguez-Fuentes}
-- canonical: {first: Mari Carmen, last: Rodriguez-Gancedo}
-  id: mari-carmen-rodriguez-gancedo
-  variants:
-  - {first: M. Carmen Rodríguez, last: Gancedo}
-  - {first: M. Carmen, last: Rodríguez}
-  - {first: Mari Carmen, last: Rodríguez}
-- canonical: {first: Carlos, last: Rodriguez-Penagos}
-  variants:
-  - {first: Carlos, last: Rodríguez}
-  - {first: Carlos Rodriguez, last: Penagos}
-  - {first: Carlos, last: Rodríguez Penagos}
-  - {first: Carlos, last: Rodríguez-Penagos}
-- canonical: {first: Horacio, last: Rodríguez}
-  variants:
-  - {first: Horacio, last: Rodriguez}
-- canonical: {first: Miguel, last: Rodríguez Hernández}
-  variants:
-  - {first: Miguel Ángel, last: Rodríguez}
-  - {first: Miguel, last: Rodríguez}
-- canonical: {first: Christophe, last: Roeder}
-  variants:
-  - {first: Chris, last: Roeder}
-- canonical: {first: Ina, last: Roesiger}
-  variants:
-  - {first: Ina, last: Rösiger}
-- canonical: {first: U., last: Rohini}
-  variants:
-  - {first: Rohini, last: U}
-- canonical: {first: J. Robin, last: Rohlicek}
-  id: j-robin-rohlicek
-  variants:
-  - {first: Robin, last: Rohlicek}
-- canonical: {first: David M., last: Rojas}
-  variants:
-  - {first: David, last: Rojas}
-- canonical: {first: Lina M., last: Rojas Barahona}
-  variants:
-  - {first: Lina M., last: Rojas-Barahona}
-  - {first: Lina, last: Rojas-Barahona}
-  - {first: Lina, last: Rojas}
-  - {first: Lina Maria, last: Rojas-Barahona}
-- canonical: {first: Norton Trevisan, last: Roman}
-  variants:
-  - {first: Norton T., last: Roman}
-  - {first: Norton, last: Trevisan Roman}
-- canonical: {first: Daniela M., last: Romano}
-  variants:
-  - {first: Daniela, last: Romano}
-- canonical: {first: Lorenza, last: Romano}
-  id: lorenza-romano
-- canonical: {first: Laurent, last: Romary}
-  id: laurent-romary
-- canonical: {first: Tiit, last: Roosmaa}
-  id: tiit-roosmaa
-- canonical: {first: Paul, last: Roossin}
-  id: paul-roossin
-- canonical: {first: Carolyn, last: Rose}
-  id: carolyn-rose
-  variants:
-  - {first: Carolyn P., last: Rose}
-  - {first: Carolyn P., last: Rosé}
-  - {first: Carolyn, last: P. Rosé}
-  - {first: Carolyn Penstein, last: Rose}
-  - {first: Carolyn, last: Penstein Rosé}
-  - {first: Carolyn Penstein, last: Rosé}
-  - {first: Carolyn, last: Penstein-Rosé}
-  - {first: Carolyn, last: Rosé}
-- canonical: {first: Tony, last: Rose}
-  id: tony-rose
-  variants:
-  - {first: Tony G., last: Rose}
-- canonical: {first: Ronald, last: Rosenfeld}
-  id: ronald-rosenfeld
-- canonical: {first: Stanley J., last: Rosenschein}
-  variants:
-  - {first: Stanley, last: Rosenschein}
-  - {first: Stan, last: Rosenschein}
-- canonical: {first: Michael, last: Rosner}
-  id: michael-rosner
-  variants:
-  - {first: Mike, last: Rosner}
-- canonical: {first: Peter, last: Rossen Skadhauge}
-  variants:
-  - {first: Peter Rossen, last: Skadhauge}
-- canonical: {first: Sophie, last: Rosset}
-  id: sophie-rosset
-- canonical: {first: Piercarlo, last: Rossi}
-  id: piercarlo-rossi
-- canonical: {first: Stefano Dei, last: Rossi}
-  variants:
-  - {first: Stefano, last: Dei Rossi}
-- canonical: {first: Alexandre, last: Rossi Alvares}
-  variants:
-  - {first: Alexandre Rossi, last: Alvares}
-- canonical: {first: Antti-Veikko, last: Rosti}
-  variants:
-  - {first: Antti-Veikko I., last: Rosti}
-- canonical: {first: Ryan, last: Roth}
-  variants:
-  - {first: Ryan, last: M. Roth}
-- canonical: {first: Steven, last: Roth}
-  variants:
-  - {first: Steven F., last: Roth}
-- canonical: {first: Jacques, last: Rouault}
-  id: jacques-rouault
-- canonical: {first: Brigitte, last: Roudaud}
-  id: brigitte-roudaud
-- canonical: {first: Salim, last: Roukos}
-  id: salim-roukos
-- canonical: {first: Grégory, last: Roulet--Guiot}
-  variants:
-  - {first: Grégory, last: Roulet-Guiot}
-- canonical: {first: François, last: Rousselot}
-  id: francois-rousselot
-  variants:
-  - {first: Francois, last: Rousselot}
-- canonical: {first: Bryan R., last: Routledge}
-  variants:
-  - {first: Bryan, last: Routledge}
-- canonical: {first: Justus C., last: Roux}
-  id: justus-c-roux
-- canonical: {first: Rachel Edita, last: Roxas}
-  variants:
-  - {first: Rachel Edita O., last: Roxas}
-  - {first: Rachel, last: Roxas}
-- canonical: {first: Deb, last: Roy}
-  variants:
-  - {first: Suman, last: Deb Roy}
-- canonical: {first: Antje, last: Roßdeutscher}
-  variants:
-  - {first: Antje, last: Rossdeutscher}
-- canonical: {first: Victoria L., last: Rubin}
-  variants:
-  - {first: Victoria, last: Rubin}
-- canonical: {first: Raphael, last: Rubino}
-  variants:
-  - {first: Raphaël, last: Rubino}
-- canonical: {first: Antonio J., last: Rubio}
-  id: antonio-j-rubio
-- canonical: {first: Alex, last: Rudnick}
-  id: alex-rudnick
-  similar: [alexander-rudnicky]
-- canonical: {first: Alexander, last: Rudnicky}
-  id: alexander-rudnicky
-  similar: [alex-rudnick]
-  variants:
-  - {first: Alexander I., last: Rudnicky}
-  - {first: Alex, last: Rudnicky}
-- canonical: {first: Björn, last: Rudzewitz}
-  variants:
-  - {first: Bjoern, last: Rudzewitz}
-- canonical: {first: Stefan, last: Rued}
-  variants:
-  - {first: Stefan, last: Rüd}
-- canonical: {first: Pablo, last: Ruiz Fabo}
-  variants:
-  - {first: Pablo, last: Ruiz}
-- canonical: {first: María, last: Ruiz-Casado}
-  variants:
-  - {first: Maria, last: Ruiz-Casado}
-- canonical: {first: Juana María, last: Ruiz-Martínez}
-  variants:
-  - {first: Juana Maria, last: Ruiz-Martínez}
-  - {first: Juana Maria, last: Ruiz Martinez}
-- canonical: {first: C.J., last: Rupp}
-  variants:
-  - {first: C. J., last: Rupp}
-- canonical: {first: Alexander M., last: Rush}
-  variants:
-  - {first: Alexander, last: Rush}
-- canonical: {first: Albert, last: Russel}
-  id: albert-russel
-- canonical: {first: Graham, last: Russell}
-  id: graham-russell
-  variants:
-  - {first: Graham J., last: Russell}
-- canonical: {first: Martin, last: Russell}
-  id: martin-russell
-- canonical: {first: Natalia Kariaeva, last: Rutgers}
-  variants:
-  - {first: Natalia, last: Kariaeva}
-- canonical: {first: Jean David, last: Ruvini}
-  variants:
-  - {first: Jean-David, last: Ruvini}
-- canonical: {first: Tatyana, last: Ruzsics}
-  variants:
-  - {first: Tatiana, last: Ruzsics}
-- canonical: {first: Karen L., last: Ryan}
-  variants:
-  - {first: Karen, last: Ryan}
-- canonical: {first: Pavel, last: Rychlý}
-  variants:
-  - {first: Pavel, last: Rychly}
-- canonical: {first: Ju-yeon, last: Ryu}
-  variants:
-  - {first: Ju-Yeon, last: Ryu}
-- canonical: {first: Won Ho, last: Ryu}
-  variants:
-  - {first: Won-Ho, last: Ryu}
-- canonical: {first: Eirikur, last: Rögnvaldsson}
-  variants:
-  - {first: Eiríkur, last: Rögnvaldsson}
-- canonical: {first: Dietmar, last: Rösner}
-  id: dietmar-rosner
-  variants:
-  - {first: Dietmar, last: Rosner}
-  - {first: Dietmar F., last: Roesner}
-  - {first: Dietmar, last: Roesner}
-- canonical: {first: Carlos Subirats, last: Rüggeberg}
-  variants:
-  - {first: Carlos, last: Subirats}
-- canonical: {first: Lakshmi, last: S}
-  variants:
-  - {first: Lakshmi, last: Saheer}
-  - {first: Lakshmi, last: S.}
-- canonical: {first: Houda, last: Saadane}
-  variants:
-  - {first: Houda, last: Saâdane}
-- canonical: {first: Sari, last: Saba-Sadiya}
-  variants:
-  - {first: Sari, last: Sadiya}
-- canonical: {first: Victor, last: Sadler}
-  id: victor-sadler
-- canonical: {first: Mehrnoosh, last: Sadrzadeh}
-  id: mehrnoosh-sadrzadeh
-- canonical: {first: Naomi, last: Sager}
-  id: naomi-sager
-- canonical: {first: Benoît, last: Sagot}
-  variants:
-  - {first: Benoit, last: Sagot}
-- canonical: {first: Herve, last: Saint-Amand}
-  variants:
-  - {first: Hervé, last: Saint-Amand}
-- canonical: {first: Patrick, last: Saint-Dizier}
-  variants:
-  - {first: Patrick, last: Saint Dizier}
-- canonical: {first: Suguru, last: Saitô}
-  variants:
-  - {first: Suguru, last: Saito}
-- canonical: {first: Rafa, last: Saiz}
-  id: rafa-saiz
-- canonical: {first: Maximiliano, last: Saiz-Noeda}
-  id: maximiliano-saiz-noeda
-- canonical: {first: Satoshi, last: Sakai}
-  id: satoshi-sakai
-- canonical: {first: Sebastián Peña, last: Saldarriaga}
-  variants:
-  - {first: Peña, last: Saldarriaga}
-  - {first: Sebastian, last: Peña Saldarriaga}
-- canonical: {first: Juliano Efson, last: Sales}
-  variants:
-  - {first: Juliano, last: Efson Sales}
-  - {first: Juliano, last: Sales}
-- canonical: {first: Morris, last: Salkoff}
-  id: morris-salkoff
-- canonical: {first: Ansaf, last: Salleb-Aouissi}
-  variants:
-  - {first: Ansaf, last: Salleb-Aoussi}
-- canonical: {first: Gerard, last: Salton}
-  id: gerard-salton
-- canonical: {first: Giancarlo, last: Salton}
-  variants:
-  - {first: Giancarlo D., last: Salton}
-- canonical: {first: Madis, last: Saluveer}
-  id: madis-saluveer
-- canonical: {first: Sethserey, last: Sam*’}
-  variants:
-  - {first: Sethserey, last: Sam}
-- canonical: {first: Rasoul, last: Samad Zadeh Kaljahi}
-  variants:
-  - {first: Rasul, last: Samad Zadeh Kaljahi}
-- canonical: {first: Tanja, last: Samardzic}
-  variants:
-  - {first: Tanja, last: Samardžić}
-- canonical: {first: Nagiza, last: Samatova}
-  variants:
-  - {first: Nagiza F., last: Samatova}
-- canonical: {first: Hossein, last: Sameti}
-  id: hossein-sameti
-- canonical: {first: Ken, last: Samuel}
-  variants:
-  - {first: Kenneth, last: Samuel}
-- canonical: {first: Ruben, last: San-Segundo}
-  variants:
-  - {first: Rubén, last: San-Segundo}
-- canonical: {first: Daniel, last: Sanchez-Cisneros}
-  variants:
-  - {first: Daniel, last: Sánchez}
-- canonical: {first: Olivia, last: Sanchez-Graillet}
-  variants:
-  - {first: Olivia, last: Sanchez}
-- canonical: {first: Emilio, last: Sanchis}
-  variants:
-  - {first: Emilio, last: Sanchís}
-- canonical: {first: Germán, last: Sanchis-Trilles}
-  variants:
-  - {first: Germán, last: Sanchis Trilles}
-  - {first: Germán, last: Sanchis}
-- canonical: {first: Gregory, last: Sanders}
-  variants:
-  - {first: Gregory A., last: Sanders}
-  - {first: Greg, last: Sanders}
-- canonical: {first: Baskaran, last: Sankaran}
-  variants:
-  - {first: Sankaran, last: Baskaran}
-- canonical: {first: Beatrice, last: Santorini}
-  id: beatrice-santorini
-- canonical: {first: Eddie Antonio, last: Santos}
-  orcid: 0000-0001-5337-715X
-  variants:
-  - {first: Eddie A., last: Santos}
-  - {first: Eddie, last: Antonio Santos}
-  - {first: Eddie, last: Santos}
-- canonical: {first: Estela, last: Saquete}
-  id: estela-saquete
-  variants:
-  - {first: Estela, last: Saquete Boro}
-- canonical: {first: Murat, last: Saraclar}
-  variants:
-  - {first: Murat, last: Saraçlar}
-- canonical: {first: Xabier, last: Saralegi}
-  id: xabier-saralegi
-- canonical: {first: Kepa, last: Sarasola}
-  id: kepa-sarasola
-- canonical: {first: K, last: Saravanan}
-  id: k-saravanan
-  variants:
-  - {first: Saravanan, last: K}
-  - {first: K., last: Saravanan}
-- canonical: {first: Ruhi, last: Sarikaya}
-  variants:
-  - {first: Ruhi, last: Srikaya}
-- canonical: {first: Efsun, last: Sarioglu Kayi}
-  variants:
-  - {first: Efsun, last: Sarioglu}
-- canonical: {first: Anish Das, last: Sarma}
-  variants:
-  - {first: Atish Das, last: Sarma}
-- canonical: {first: Shikhar Kr., last: Sarma}
-  degree: Gauhati University
-  orcid: 0000-0002-9495-1901
-  id: shikhar-kumar-sarma-gu
-  variants:
-  - {first: Shikhar, last: Sarma}
-  - {first: Shikhar, last: Sharma}
-- canonical: {first: Shikhar, last: Sharma}
-  comment: May refer to multiple people
-  id: shikhar-sharma
-- canonical: {first: Vaijayanthi M., last: Sarma}
-  variants:
-  - {first: Vaijayanthi, last: Sarma}
-- canonical: {first: Satoshi, last: Sato}
-  id: satoshi-sato
-- canonical: {first: Pavankumar, last: Satuluri}
-  variants:
-  - {first: Pavan Kumar, last: Satuluri}
-- canonical: {first: Baiba, last: Saulīte}
-  variants:
-  - {first: Baiba, last: Saulite}
-- canonical: {first: Roser, last: Saurí}
-  variants:
-  - {first: Roser, last: Sauri}
-- canonical: {first: Asad, last: Sayeed}
-  variants:
-  - {first: Asad B., last: Sayeed}
-- canonical: {first: Yucel, last: Saygin}
-  variants:
-  - {first: Yücel, last: Saygın}
-- canonical: {first: Carolina, last: Scarton}
-  variants:
-  - {first: Carolina Evaristo, last: Scarton}
-- canonical: {first: Remko, last: Scha}
-  id: remko-scha
-  variants:
-  - {first: Remko J. H., last: Scha}
-- canonical: {first: Moritz, last: Schaeffer}
-  variants:
-  - {first: Moritz Jonas, last: Schaeffer}
-- canonical: {first: Roger C., last: Schank}
-  variants:
-  - {first: Roger, last: Schank}
-- canonical: {first: Peter, last: Schauble}
-  variants:
-  - {first: Peter, last: Schäuble}
-- canonical: {first: Judith D., last: Schlesinger}
-  variants:
-  - {first: Judith, last: Schlesinger}
-- canonical: {first: Michael, last: Schlichtkrull}
-  variants:
-  - {first: Michael Sejr, last: Schlichtkrull}
-- canonical: {first: Ralf, last: Schlueter}
-  variants:
-  - {first: Ralf, last: Schlüter}
-- canonical: {first: Julian J., last: Schlöder}
-  variants:
-  - {first: Julian, last: Schlöder}
-- canonical: {first: Laurent, last: Schmitt}
-  id: laurent-schmitt
-- canonical: {first: René, last: Schneider}
-  variants:
-  - {first: Rene, last: Schneider}
-- canonical: {first: Edward, last: Schofield}
-  variants:
-  - {first: Ed, last: Schofield}
-- canonical: {first: Natalie M., last: Schrimpf}
-  variants:
-  - {first: Natalie, last: Schrimpf}
-- canonical: {first: Elizabeth, last: Schroeder}
-  variants:
-  - {first: Elizabeth Schroeder, last: Richerson}
-  - {first: Elizabeth, last: Richerson}
-- canonical: {first: Lenhart, last: Schubert}
-  variants:
-  - {first: Lenhart K., last: Schubert}
-  - {first: Len, last: Schubert}
-- canonical: {first: Björn, last: Schuller}
-  variants:
-  - {first: Bjoern, last: Schuller}
-- canonical: {first: Sabine, last: Schulte im Walde}
-  variants:
-  - {first: Sabine, last: Schulte Im Walde}
-  - {first: Sabine, last: Schulte in Walde}
-- canonical: {first: Robert T., last: Schultz}
-  variants:
-  - {first: Robert, last: Schultz}
-- canonical: {first: Julia Maria, last: Schulz}
-  variants:
-  - {first: Julia, last: Schulz}
-- canonical: {first: Stefan, last: Schulz}
-  variants:
-  - {first: Stefan, last: Schultz}
-- canonical: {first: Sarah E., last: Schwarm}
-  variants:
-  - {first: Sarah, last: Schwarm}
-- canonical: {first: Ariel, last: Schwartz}
-  variants:
-  - {first: Ariel S., last: Schwartz}
-- canonical: {first: H. Andrew, last: Schwartz}
-  variants:
-  - {first: Hansen Andrew, last: Schwartz}
-  - {first: Hansen A., last: Schwartz}
-  - {first: H Andrew, last: Schwartz}
-- canonical: {first: Richard, last: Schwartz}
-  id: richard-schwartz
-  variants:
-  - {first: Rich, last: Schwartz}
-- canonical: {first: Ulrich, last: Schäfer}
-  variants:
-  - {first: Ulrich, last: Schafer}
-  - {first: Ulrich, last: Schaefer}
-- canonical: {first: Martin, last: Schäler}
-  variants:
-  - {first: Martin, last: Schäfer}
-- canonical: {first: Reinhard, last: Schäler}
-  variants:
-  - {first: Reinhard, last: Schaler}
-- canonical: {first: Hinrich, last: Schütze}
-  variants:
-  - {first: Hinrich, last: Schutze}
-  - {first: Hinrich, last: Schuetze}
-- canonical: {first: Donia, last: Scott}
-  id: donia-scott
-  variants:
-  - {first: Donia R., last: Scott}
-- canonical: {first: Djamé, last: Seddah}
-  variants:
-  - {first: Djame, last: Seddah}
-- canonical: {first: Roxane, last: Segers}
-  variants:
-  - {first: Roxanne, last: Segers}
-- canonical: {first: Frédérique, last: Segond}
-  variants:
-  - {first: Frederique, last: Segond}
-- canonical: {first: Jérémie, last: Segouat}
-  id: jeremie-segouat
-- canonical: {first: Isabel, last: Segura-Bedmar}
-  variants:
-  - {first: Isabel, last: Segura Bedmar}
-- canonical: {first: Corrado, last: Seidenari}
-  id: corrado-seidenari
-- canonical: {first: Bernard, last: Seite}
-  id: bernard-seite
-- canonical: {first: Ethan, last: Selfridge}
-  variants:
-  - {first: Ethan O., last: Selfridge}
-- canonical: {first: Sathiya Keerthi, last: Selvaraj}
-  variants:
-  - {first: Sathiya, last: Keerthi}
-- canonical: {first: Jiří, last: Semecký}
-  variants:
-  - {first: Jirí, last: Semecky}
-- canonical: {first: Giovanni, last: Semeraro}
-  id: giovanni-semeraro
-- canonical: {first: Stephanie, last: Seneff}
-  id: stephanie-seneff
-- canonical: {first: Hongsuck, last: Seo}
-  variants:
-  - {first: Paul Hongsuck, last: Seo}
-- canonical: {first: Jungyun, last: Seo}
-  variants:
-  - {first: Jung Yun, last: Seo}
-- canonical: {first: Luciano, last: Serafini}
-  id: luciano-serafini
-- canonical: {first: Iulian Vlad, last: Serban}
-  variants:
-  - {first: Iulian, last: Serban}
-- canonical: {first: Jean-François, last: Serignat}
-  id: jean-francois-serignat
-- canonical: {first: Nicolás, last: Serrano}
-  variants:
-  - {first: Nicolas, last: Serrano}
-- canonical: {first: Christophe, last: Servan}
-  id: christophe-servan
-- canonical: {first: Andrea, last: Setzer}
-  id: andrea-setzer
-- canonical: {first: Jurica, last: Seva}
-  variants:
-  - {first: Jurica, last: Ševa}
-- canonical: {first: Ayisigi B., last: Sevdik-Calli}
-  variants:
-  - {first: Ayişiği, last: Sevdik-Çalli}
-- canonical: {first: Binyam Ephrem, last: Seyoum}
-  variants:
-  - {first: Binyam, last: Ephrem}
-- canonical: {first: Petr, last: Sgall}
-  id: petr-sgall
-- canonical: {first: Khaled, last: Shaban}
-  variants:
-  - {first: Khaled, last: Bashir Shaban}
-- canonical: {first: Rajiv, last: Shah}
-  variants:
-  - {first: Rajiv Ratn, last: Shah}
-- canonical: {first: Ritesh, last: Shah}
-  variants:
-  - {first: Ritesh M., last: Shah}
-- canonical: {first: Mostafa, last: Shahin}
-  id: mostafa-shahin
-- canonical: {first: Adi, last: Shalev}
-  variants:
-  - {first: Adi, last: Bitan}
-- canonical: {first: Zoya M., last: Shalyapina}
-  id: zoya-m-shalyapina
-  variants:
-  - {first: Zoyn M., last: Shalyapina}
-- canonical: {first: Stuart C., last: Shapiro}
-  id: stuart-c-shapiro
-- canonical: {first: Abdul-Baquee, last: Sharaf}
-  variants:
-  - {first: Abdul-Baquee M., last: Sharaf}
-- canonical: {first: Dipti Misra, last: Sharma}
-  variants:
-  - {first: Dipti, last: Misra Sharma}
-  - {first: Dipti, last: Sharma}
-  - {first: Dipti M., last: Sharma}
-  - {first: Dipti, last: Misra}
-  - {first: Dipti M, last: Sharma}
-- canonical: {first: Harsh Vardhan, last: Sharma}
-  variants:
-  - {first: Harsh, last: Sharma}
-- canonical: {first: Vishnu Dutt, last: Sharma}
-  variants:
-  - {first: Vishnu, last: Sharma}
-- canonical: {first: Richard A., last: Sharman}
-  id: richard-a-sharman
-- canonical: {first: Stefanie, last: Shattuck-Hufnagel}
-  variants:
-  - {first: S. Shattuck, last: Hufnagel}
-- canonical: {first: Hassan S., last: Shavarani}
-  variants:
-  - {first: Hassan, last: Shavarani}
-- canonical: {first: Bayan Abu, last: Shawar}
-  variants:
-  - {first: Bayan, last: Abu Shawar}
-- canonical: {first: Kathleen M., last: Sheehan}
-  variants:
-  - {first: Kathleen, last: Sheehan}
-- canonical: {first: Golnar, last: Sheikhshab}
-  variants:
-  - {first: Golnar, last: Sheikhshabbafghi}
-- canonical: {first: Jia-Lin, last: Shen}
-  variants:
-  - {first: Jia-lin, last: Shen}
-- canonical: {first: David D., last: Sherertz}
-  id: david-d-sherertz
-- canonical: {first: Mohamed Ahmed, last: Sherif}
-  variants:
-  - {first: Mohamed, last: Sherif}
-- canonical: {first: Kyumars, last: Sheykh Esmaili}
-  variants:
-  - {first: Kyumars Sheykh, last: Esmaili}
-- canonical: {first: Freda, last: Shi}
-  id: freda-shi
-  orcid: 0009-0009-5697-449X
-  variants:
-  - {first: Haoyue, last: Shi}
-- canonical: {first: Stuart M., last: Shieber}
-  variants:
-  - {first: Stuart, last: Shieber}
-- canonical: {first: Hsue-Hueh, last: Shih}
-  variants:
-  - {first: Rebecca Hsue-Hueh, last: Shih}
-- canonical: {first: Katsumasa, last: Shimizu}
-  id: katsumasa-shimizu
-- canonical: {first: Tohru, last: Shimizu}
-  variants:
-  - {first: Toru, last: Shimizu}
-- canonical: {first: Mitsuo, last: Shimohata}
-  id: mitsuo-shimohata
-- canonical: {first: Saim, last: Shin}
-  variants:
-  - {first: Sa-Im, last: Shin}
-- canonical: {first: Katsuhiko, last: Shirai}
-  id: katsuhiko-shirai
-- canonical: {first: Satoshi, last: Shirai}
-  variants:
-  - {first: Satosi, last: Shirai}
-- canonical: {first: Praneeth M., last: Shishtla}
-  variants:
-  - {first: Praneeth, last: Shishtla}
-  - {first: Praneeth M, last: Shishtla}
-- canonical: {first: Darla Magdalene, last: Shockley}
-  variants:
-  - {first: Darla, last: Shockley}
-- canonical: {first: Prajwol, last: Shrestha}
-  variants:
-  - {first: Prajol, last: Shrestha}
-- canonical: {first: Elizabeth, last: Shriberg}
-  id: elizabeth-shriberg
-- canonical: {first: Manish, last: Shrivastava}
-  variants:
-  - {first: Manish, last: Srivastava}
-- canonical: {first: Heung Yeung, last: Shum}
-  variants:
-  - {first: Heung-Yeung, last: Shum}
-- canonical: {first: Elvira I., last: Sicilia-Garcia}
-  id: elvira-i-sicilia-garcia
-- canonical: {first: Candace L., last: Sidner}
-  variants:
-  - {first: Candace, last: Sidner}
-- canonical: {first: Gerardo, last: Sierra}
-  variants:
-  - {first: Gerardo, last: Sierra-Martínez}
-- canonical: {first: Utpal Kumar, last: Sikdar}
-  variants:
-  - {first: Utpal, last: Sikdar}
-- canonical: {first: Avirup, last: Sil}
-  variants:
-  - {first: Avi, last: Sil}
-- canonical: {first: Max, last: Silberztein}
-  variants:
-  - {first: Max D., last: Silberztein}
-- canonical: {first: Miikka, last: Silfverberg}
-  variants:
-  - {first: Miikka P., last: Silfverberg}
-- canonical: {first: João, last: Silva}
-  variants:
-  - {first: João Ricardo, last: Silva}
-- canonical: {first: Mario J., last: Silva}
-  variants:
-  - {first: Mário J., last: Silva}
-  - {first: Mário, last: Silva}
-- canonical: {first: Kim E. A., last: Silverman}
-  variants:
-  - {first: Kim E.A., last: Silverman}
-- canonical: {first: Khe Chai, last: Sim}
-  variants:
-  - {first: Khe-Chai, last: Sim}
-- canonical: {first: Karin, last: Sim Smith}
-  variants:
-  - {first: Karin Sim, last: Smith}
-- canonical: {first: Khalil, last: Sima’an}
-  id: khalil-simaan
-- canonical: {first: Katalin Ilona, last: Simkó}
-  variants:
-  - {first: Katalin, last: Simkó}
-- canonical: {first: Anca-Roxana, last: Simon}
-  variants:
-  - {first: Anca, last: Simon}
-  - {first: Anca-Roxana, last: Şimon}
-- canonical: {first: Nathalie, last: Simonin}
-  id: nathalie-simonin
-- canonical: {first: Dan, last: Simonson}
-  variants:
-  - {first: Daniel, last: Simonson}
-- canonical: {first: Kiril, last: Simov}
-  variants:
-  - {first: Kiril Iv., last: Simov}
-- canonical: {first: King Kui, last: Sin}
-  id: king-kui-sin
-  variants:
-  - {first: KingKui, last: Sin}
-- canonical: {first: Anil Kumar, last: Singh}
-  variants:
-  - {first: Anil, last: Kumar Singh}
-- canonical: {first: Munindar P., last: Singh}
-  variants:
-  - {first: Munindar, last: Singh}
-- canonical: {first: Thoudam Doren, last: Singh}
-  variants:
-  - {first: Thoudam, last: Doren Singh}
-- canonical: {first: R Mahesh K, last: Sinha}
-  variants:
-  - {first: R. Mahesh K., last: Sinha}
-- canonical: {first: Inguna, last: Skadiņa}
-  variants:
-  - {first: Inguna, last: Skadina}
-  - {first: Inguna, last: Skadin̨a}
-- canonical: {first: Wojciech, last: Skalmowski}
-  id: wojciech-skalmowski
-- canonical: {first: Romuald, last: Skiba}
-  id: romuald-skiba
-- canonical: {first: Steven, last: Skiena}
-  variants:
-  - {first: Steve, last: Skiena}
-- canonical: {first: Michael, last: Skinner}
-  variants:
-  - {first: Michael A., last: Skinner}
-- canonical: {first: Hana, last: Skoumalova}
-  variants:
-  - {first: Hana, last: Skoumalová}
-- canonical: {first: Frank, last: Smadja}
-  variants:
-  - {first: Frank A., last: Smadja}
-- canonical: {first: Kamel, last: Smaili}
-  variants:
-  - {first: Kamel, last: Smaïli}
-- canonical: {first: Nasser, last: Smaili}
-  id: nasser-smaili
-- canonical: {first: Sharon, last: Small}
-  variants:
-  - {first: Sharon, last: Gower Small}
-- canonical: {first: R. A., last: Smit}
-  variants:
-  - {first: R.A., last: Smit}
-- canonical: {first: Andrew, last: Smith}
-  variants:
-  - {first: Andrew E., last: Smith}
-- canonical: {first: Brian Cantwell, last: Smith}
-  variants:
-  - {first: Brian, last: Smith}
-- canonical: {first: David A., last: Smith}
-  variants:
-  - {first: David, last: Smith}
-  - {first: David Addison, last: Smith}
-- canonical: {first: Francis J., last: Smith}
-  id: francis-j-smith
-- canonical: {first: Jason, last: Smith}
-  variants:
-  - {first: Jason R., last: Smith}
-- canonical: {first: Mark H., last: Smith}
-  variants:
-  - {first: Mark, last: Smith}
-- canonical: {first: Noah A., last: Smith}
-  variants:
-  - {first: Noah, last: Smith}
-- canonical: {first: Raoul N., last: Smith}
-  variants:
-  - {first: Raoul N, last: Smith}
-- canonical: {first: Ronnie W., last: Smith}
-  variants:
-  - {first: Ronnie, last: Smith}
-- canonical: {first: Otakar, last: Smrz}
-  variants:
-  - {first: Otakar, last: Smrž}
-- canonical: {first: Pavel, last: Smrz}
-  variants:
-  - {first: Pavel, last: Smrž}
-- canonical: {first: Gyri, last: Smørdal Losnegaard}
-  variants:
-  - {first: Gyri S., last: Losnegaard}
-  - {first: Gyri, last: Losnegaard}
-- canonical: {first: Matthew, last: Snover}
-  variants:
-  - {first: Matthew G., last: Snover}
-- canonical: {first: Marco Antonio, last: Sobrevilla Cabezudo}
-  variants:
-  - {first: Marco A., last: Sobrevilla Cabezudo}
-  - {first: Marco, last: Sobrevilla}
-- canonical: {first: Stephen, last: Soderland}
-  id: stephen-soderland
-- canonical: {first: Sylvana, last: Sofkova Hashemi}
-  variants:
-  - {first: Sylvana, last: Sofkova}
-- canonical: {first: Artem, last: Sokolov}
-  variants:
-  - {first: Artem, last: Sokokov}
-- canonical: {first: Juan José Rodríguez, last: Soler}
-  variants:
-  - {first: Juan José, last: Rodríguez}
-- canonical: {first: Joan, last: Soler i Bou}
-  variants:
-  - {first: Joan, last: Soler}
-- canonical: {first: Juan, last: Soler-Company}
-  variants:
-  - {first: Juan, last: Soler Company}
-- canonical: {first: Aitor, last: Sologaistoa}
-  id: aitor-sologaistoa
-- canonical: {first: Harold, last: Somers}
-  id: harold-somers
-  variants:
-  - {first: Harold L., last: Somers}
-- canonical: {first: Norman K., last: Sondheimer}
-  variants:
-  - {first: Norman, last: Sondheimer}
-- canonical: {first: Young Chol, last: Song}
-  variants:
-  - {first: Young C., last: Song}
-- canonical: {first: Cagil, last: Sonmez}
-  variants:
-  - {first: Çağıl, last: Sönmez}
-  - {first: Cagil, last: Sönmez}
-- canonical: {first: Von-Wun, last: Soo}
-  variants:
-  - {first: Von-wun, last: Soo}
-- canonical: {first: Frank K., last: Soong}
-  variants:
-  - {first: Frank, last: Soong}
-- canonical: {first: Jeffrey, last: Sorensen}
-  variants:
-  - {first: Jeffrey S., last: Sorensen}
-- canonical: {first: Aitor, last: Soroa}
-  id: aitor-soroa
-  variants:
-  - {first: Aitor, last: Soroa Etxabe}
-- canonical: {first: Ionut, last: Sorodoc}
-  variants:
-  - {first: Ionut-Teodor, last: Sorodoc}
-- canonical: {first: William, last: Soto Martinez}
-  variants:
-  - {first: William, last: Soto}
-- canonical: {first: Susana, last: Sotelo}
-  orcid: 0000-0002-0067-7957
-  variants:
-  - {first: Susana Sotelo, last: Docio}
-- canonical: {first: Maria Clara Paixão de, last: Sousa}
-  variants:
-  - {first: Maria Clara, last: Paixão de Sousa}
-- canonical: {first: David Cabrero, last: Souto}
-  variants:
-  - {first: David, last: Cabrero}
-- canonical: {first: Jackson, last: Souza}
-  id: jackson-souza
-- canonical: {first: Vinícius Mourão Alves de, last: Souza}
-  variants:
-  - {first: Vinícius Mourão Alves, last: de Souza}
-- canonical: {first: Irena, last: Spasić}
-  variants:
-  - {first: Irena, last: Spasic}
-- canonical: {first: Manuela, last: Speranza}
-  id: manuela-speranza
-- canonical: {first: Valentin I., last: Spitkovsky}
-  variants:
-  - {first: Valentin, last: Spitkovsky}
-- canonical: {first: Drahomíra “johanka”, last: Spoustová}
-  variants:
-  - {first: Johanka, last: Spoustová}
-  - {first: Drahomíra „johanka“, last: Spoustová}
-- canonical: {first: Richard, last: Sproat}
-  variants:
-  - {first: Richard W., last: Sproat}
-- canonical: {first: Rachele, last: Sprugnoli}
-  id: rachele-sprugnoli
-- canonical: {first: Shannon L., last: Spruit}
-  variants:
-  - {first: Shannon, last: Spruit}
-- canonical: {first: Peter, last: Spyns}
-  id: peter-spyns
-- canonical: {first: Constantine D., last: Spyropoulos}
-  variants:
-  - {first: Constantine, last: Spyropoulos}
-- canonical: {first: Karen, last: Spärck Jones}
-  id: karen-sparck-jones
-  variants:
-  - {first: Karen, last: Sparck Jones}
-  - {first: Karen, last: Jones}
-- canonical: {first: Rohini K., last: Srihari}
-  variants:
-  - {first: Rohini, last: Srihari}
-  - {first: K. Rohini, last: Srihari}
-- canonical: {first: Munirathnam, last: Srikanth}
-  id: munirathnam-srikanth
-  variants:
-  - {first: Muirathnam, last: Srikanth}
-- canonical: {first: Somayajulu, last: Sripada}
-  variants:
-  - {first: Somayajulu G., last: Sripada}
-  - {first: Somayajula G., last: Sripada}
-  - {first: Somayajulu Gowri, last: Sripada}
-- canonical: {first: Ankit, last: Srivastava}
-  variants:
-  - {first: Ankit Kumar, last: Srivastava}
-  - {first: Ankit K., last: Srivastava}
-  - {first: Ankit, last: Kumar}
-- canonical: {first: Edward, last: Stabler}
-  variants:
-  - {first: Edward P., last: 'Stabler, Jr.'}
-  - {first: Edward P., last: Stabler}
-- canonical: {first: Gregory, last: Stainhauer}
-  id: gregory-stainhauer
-- canonical: {first: David, last: Stallard}
-  id: david-stallard
-  variants:
-  - {first: David G., last: Stallard}
-- canonical: {first: Bonnie Glover, last: Stalls}
-  variants:
-  - {first: Bonnie, last: Glover}
-- canonical: {first: Efstathios, last: Stamatatos}
-  id: efstathios-stamatatos
-- canonical: {first: Ranka, last: Stanković}
-  variants:
-  - {first: Ranka, last: Stankoviæ}
-- canonical: {first: Ingrid, last: Starke}
-  id: ingrid-starke
-- canonical: {first: Anatoli, last: Starostin}
-  variants:
-  - {first: Anatoly, last: Starostin}
-- canonical: {first: Mark, last: Steedman}
-  id: mark-steedman
-- canonical: {first: Dan, last: Stefanescu}
-  variants:
-  - {first: Dan, last: Ştefănescu}
-  - {first: Dan, last: Ştefanescu}
-  - {first: Dan, last: Ștefănescu}
-- canonical: {first: Stefan, last: Steidl}
-  id: stefan-steidl
-- canonical: {first: Erich H., last: Steiner}
-  variants:
-  - {first: Erich, last: Steiner}
-- canonical: {first: Egon, last: Stemle}
-  variants:
-  - {first: Egon W., last: Stemle}
-- canonical: {first: Amanda, last: Stent}
-  id: amanda-stent
-  variants:
-  - {first: Amanda J., last: Stent}
-- canonical: {first: Evgeny, last: Stepanov}
-  variants:
-  - {first: Evgeny A., last: Stepanov}
-- canonical: {first: Richard M., last: Stern}
-  variants:
-  - {first: Richard, last: Stern}
-- canonical: {first: Rosemary, last: Stevenson}
-  id: rosemary-stevenson
-- canonical: {first: Brandon M., last: Stewart}
-  variants:
-  - {first: Brandon, last: Stewart}
-- canonical: {first: Robert, last: Stewart}
-  variants:
-  - {first: Rob, last: Stewart}
-- canonical: {first: Andreas, last: Stolcke}
-  id: andreas-stolcke
-- canonical: {first: Scott C., last: Stoness}
-  variants:
-  - {first: Scott, last: Stoness}
-- canonical: {first: Dennis Ryan, last: Storoshenko}
-  variants:
-  - {first: Dennis R., last: Storoshenko}
-- canonical: {first: Marco Antonio, last: Stranisci}
-  variants:
-  - {first: Marco, last: Stranisci}
-- canonical: {first: Stephanie, last: Strassel}
-  variants:
-  - {first: Stephanie M., last: Strassel}
-- canonical: {first: Helmer, last: Strik}
-  id: helmer-strik
-- canonical: {first: Lena, last: Stromback}
-  variants:
-  - {first: Lena, last: Strömbäck}
-- canonical: {first: Jennifer, last: Stromer-Galley}
-  variants:
-  - {first: Jennifer, last: Strommer-Galley}
-- canonical: {first: Tomek, last: Strzalkowski}
-  id: tomek-strzalkowski
-  variants:
-  - {first: Tomek, last: Strzalkowskl}
-- canonical: {first: Sofia, last: Strönbergsson}
-  variants:
-  - {first: Sofia, last: Strömbergsson}
-- canonical: {first: Janienke, last: Sturm}
-  id: janienke-sturm
-- canonical: {first: Dean, last: Sturtevant}
-  variants:
-  - {first: Dean G., last: Sturtevant}
-- canonical: {first: Margo, last: Stys-Budzikowska}
-  variants:
-  - {first: Margo, last: Budzikowska}
-  - {first: Margo, last: Stys}
-- canonical: {first: Marie-Hélène, last: Stéfanini}
-  variants:
-  - {first: Marie-Helene, last: Stefanini}
-- canonical: {first: Yang, last: Zhang}
-  comment: USTC
-  id: yang-zhang-ustc
-  orcid: 0000-0002-7863-5183
-  institution: University of Science and Technology of China
-- canonical: {first: Yang, last: Zhang}
-  id: yang-zhang
-  comment: May refer to several people
-- canonical: {first: Sebastian, last: Stüker}
-  variants:
-  - {first: Sebastian, last: Stueker}
-- canonical: {first: Cheng-chao, last: Su}
-  variants:
-  - {first: Cheng-Chao, last: Su}
-- canonical: {first: L. Venkata, last: Subramaniam}
-  id: l-venkata-subramaniam
-  variants:
-  - {first: L Venkata, last: Subramaniam}
-- canonical: {first: Shivashankar, last: Subramanian}
-  variants:
-  - {first: S., last: Shivashankar}
-- canonical: {first: Amarnag, last: Subramanya}
-  variants:
-  - {first: Amar, last: Subramanya}
-- canonical: {first: Fabian, last: Suchanek}
-  variants:
-  - {first: Fabian M., last: Suchanek}
-- canonical: {first: Vit, last: Suchomel}
-  variants:
-  - {first: Vít, last: Suchomel}
-- canonical: {first: David, last: Suendermann-Oeft}
-  variants:
-  - {first: David, last: Suendermann}
-- canonical: {first: Masakatsu, last: Sugimoto}
-  id: masakatsu-sugimoto
-- canonical: {first: Ryochi, last: Sugimura}
-  id: ryochi-sugimura
-- canonical: {first: Yoshi, last: Suhara}
-  variants:
-  - {first: Yoshihiko, last: Suhara}
-- canonical: {first: '', last: Sukhada}
-  variants:
-  - {first: Sukhada, last: Palkar}
-- canonical: {first: Jana, last: Sukkarieh}
-  variants:
-  - {first: Jana Z., last: Sukkarieh}
-- canonical: {first: Md Arafat, last: Sultan}
-  variants:
-  - {first: Md. Arafat, last: Sultan}
-  - {first: Md., last: Sultan}
-- canonical: {first: Eiichiro, last: Sumita}
-  variants:
-  - {first: Eiichro, last: Sumita}
-- canonical: {first: Cheng-Jie, last: Sun}
-  variants:
-  - {first: Chengjie, last: Sun}
-- canonical: {first: Jingguang, last: Sun}
-  variants:
-  - {first: JingGuang, last: Sun}
-- canonical: {first: Sheng-he, last: Sun}
-  variants:
-  - {first: Sheng-He, last: Sun}
-- canonical: {first: Weiwei, last: Sun}
-  comment: Shandong University
-  id: weiwei-sun-sd
-- canonical: {first: Yufang, last: Sun}
-  variants:
-  - {first: Yu-fang, last: Sun}
-- canonical: {first: Vijay, last: Sundar Ram}
-  variants:
-  - {first: Vijay Sundar, last: Ram}
-  - {first: R. Vijay Sundar, last: Ram}
-  - {first: Vijay Sundar Ram, last: R}
-- canonical: {first: Sowmya S., last: Sundaram}
-  variants:
-  - {first: Sowmya S, last: Sundaram}
-- canonical: {first: Beth M., last: Sundheim}
-  variants:
-  - {first: Beth, last: Sundheim}
-- canonical: {first: Yao-Ting, last: Sung}
-  variants:
-  - {first: Yao-Ting, last: Hung}
-- canonical: {first: Simon, last: Suster}
-  variants:
-  - {first: Simon, last: Šuster}
-- canonical: {first: Richard F. E., last: Sutcliffe}
-  variants:
-  - {first: Richard F.E., last: Sutcliffe}
-- canonical: {first: Armando, last: Suárez}
-  id: armando-suarez
-- canonical: {first: Mari Carmen, last: Suárez-Figueroa}
-  variants:
-  - {first: M. Carmen, last: Suárez-Figueroa}
-- canonical: {first: Piergiorgio, last: Svaizer}
-  id: piergiorgio-svaizer
-- canonical: {first: Ben, last: Swanson}
-  variants:
-  - {first: Benjamin, last: Swanson}
-- canonical: {first: Daniel G., last: Swanson}
-  variants:
-  - {first: Daniel, last: Swanson}
-- canonical: {first: Robert S., last: Swier}
-  variants:
-  - {first: Robert, last: Swier}
-- canonical: {first: Mary, last: Swift}
-  variants:
-  - {first: Mary D., last: Swift}
-- canonical: {first: A.J.M., last: Szanser}
-  variants:
-  - {first: A.J., last: Szanser}
-- canonical: {first: Stan, last: Szpakowicz}
-  variants:
-  - {first: Stanislaw, last: Szpakowicz}
-  - {first: Stanisław, last: Szpakowicz}
-- canonical: {first: Marcin, last: Szummer}
-  variants:
-  - {first: Martin, last: Szummer}
-- canonical: {first: Joan-Andreu, last: Sánchez}
-  variants:
-  - {first: Joan-Andreu, last: Sanchez}
-  - {first: Joan Andreu, last: Sánchez}
-- canonical: {first: Jon, last: Sánchez}
-  id: jon-sanchez
-  variants:
-  - {first: Jon, last: Sanchez}
-- canonical: {first: Víctor M., last: Sánchez-Cartagena}
-  variants:
-  - {first: Victor M., last: Sánchez-Cartagena}
-- canonical: {first: Cristina, last: Sánchez-Marco}
-  variants:
-  - {first: Cristina, last: Marco}
-  - {first: Cristina Sánchez, last: Marco}
-- canonical: {first: J. Fernando, last: Sánchez-Rada}
-  variants:
-  - {first: Fernando, last: Sánchez-Rada}
-- canonical: {first: Ágnes, last: Sándor}
-  variants:
-  - {first: Agnes, last: Sandor}
-- canonical: {first: Anna, last: Sågvall Hein}
-  variants:
-  - {first: Anna Sagvall, last: Hein}
-  - {first: Anna Sågvall, last: Hein}
-- canonical: {first: Rune, last: Sætre}
-  variants:
-  - {first: Rune, last: Saetre}
-- canonical: {first: Gilles, last: Sérasset}
-  variants:
-  - {first: Gilles, last: Serasset}
-- canonical: {first: Anders, last: Søgaard}
-  variants:
-  - {first: Anders, last: Sogaard}
-- canonical: {first: Chris, last: Thomas}
-  id: chris-thomas
-  orcid: 0000-0002-3226-396X
-  variants:
-  - {first: Christopher, last: Thomas}
-- canonical: {first: Christopher, last: Thomas}
-  comment: May refer to several people
-  id: christopher-thomas
-- canonical: {first: Maite, last: Taboada}
-  id: maite-taboada
-- canonical: {first: Martha Yifiru, last: Tachbelie}
-  variants:
-  - {first: Martha, last: Yifiru Tachbelie}
-- canonical: {first: Thiago D., last: Tadeu}
-  variants:
-  - {first: Thiago, last: Tadeu}
-- canonical: {first: Chia-Hung, last: Tai}
-  variants:
-  - {first: Chia-hung, last: Tai}
-- canonical: {first: John, last: Tait}
-  variants:
-  - {first: John Irving, last: Tait}
-- canonical: {first: Kazuya, last: Takeda}
-  variants:
-  - {first: Kasuya, last: Takeda}
-- canonical: {first: Yuka, last: Takei}
-  variants:
-  - {first: Yuya, last: Takei}
-- canonical: {first: Zeerak, last: Talat}
-  variants:
-  - {first: Zeerak, last: Waseem}
-- canonical: {first: Susan W., last: Talbott}
-  variants:
-  - {first: Susan, last: Talbott}
-- canonical: {first: Partha, last: Talukdar}
-  variants:
-  - {first: Partha Pratim, last: Talukdar}
-  - {first: Partha, last: Pratim Talukdar}
-  - {first: Partha P., last: Talukdar}
-- canonical: {first: Wai Lok, last: Tam}
-  variants:
-  - {first: Wailok, last: Tam}
-- canonical: {first: Fabio, last: Tamburini}
-  id: fabio-tamburini
-- canonical: {first: Noriyuki, last: Tamura}
-  id: noriyuki-tamura
-- canonical: {first: Chew Lim, last: Tan}
-  variants:
-  - {first: Chew-Lim, last: Tan}
-  - {first: ChewLim, last: Tan}
-- canonical: {first: Kumiko, last: Tanaka-Ishii}
-  variants:
-  - {first: Kumiko, last: Tanaka}
-- canonical: {first: Hristo, last: Tanev}
-  variants:
-  - {first: Hristo, last: Tannev}
-- canonical: {first: Ahmet Cüneyd, last: Tantuğ}
-  variants:
-  - {first: A. Cüneyd, last: Tantuǧ}
-- canonical: {first: Daniel, last: Tapias}
-  variants:
-  - {first: Daniel Tapias, last: Merino}
-- canonical: {first: Doina, last: Tatar}
-  variants:
-  - {first: Doina, last: Tătar}
-- canonical: {first: Yuka, last: Tateisi}
-  variants:
-  - {first: Yuka, last: Tateishi}
-- canonical: {first: Mariona, last: Taulé}
-  id: mariona-taule
-  variants:
-  - {first: Mariona, last: Taule}
-- canonical: {first: Miriam, last: Tavoni}
-  id: miriam-tavoni
-- canonical: {first: Sarah, last: Taylor}
-  variants:
-  - {first: Sarah M., last: Taylor}
-- canonical: {first: Suzanne Liebowitz, last: Taylor}
-  variants:
-  - {first: Suzanne, last: Liebowitz}
-- canonical: {first: William J., last: Teahan}
-  id: william-j-teahan
-  variants:
-  - {first: William J, last: Teahan}
-- canonical: {first: João Paulo, last: Teixeira}
-  id: joao-paulo-teixeira
-  variants:
-  - {first: João P., last: Teixeira}
-- canonical: {first: Eric Sadit, last: Tellez}
-  variants:
-  - {first: Eric S., last: Tellez}
-- canonical: {first: Ashish V., last: Tendulkar}
-  variants:
-  - {first: Ashish, last: Tendulkar}
-- canonical: {first: Yonglin, last: Teng}
-  variants:
-  - {first: Yong-lin, last: Teng}
-- canonical: {first: Harry, last: Tennant}
-  variants:
-  - {first: Harry R., last: Tennant}
-- canonical: {first: Alexandre, last: Termier}
-  id: alexandre-termier
-- canonical: {first: Egidio L., last: Terra}
-  variants:
-  - {first: Egidio, last: Terra}
-- canonical: {first: Maurizio, last: Tesconi}
-  variants:
-  - {first: Maurizio, last: Tescon}
-- canonical: {first: Joel, last: Tetreault}
-  variants:
-  - {first: Joel R., last: Tetreault}
-- canonical: {first: Lisanne, last: Teunissen}
-  variants:
-  - {first: Lisa, last: Teunissen}
-- canonical: {first: Mariët, last: Theune}
-  id: mariet-theune
-  variants:
-  - {first: Mariet, last: Theune}
-- canonical: {first: John C., last: Thomas}
-  variants:
-  - {first: John, last: Thomas}
-- canonical: {first: Richmond H., last: Thomason}
-  variants:
-  - {first: Richmond, last: Thomason}
-- canonical: {first: Henry S., last: Thompson}
-  variants:
-  - {first: Henry, last: Thompson}
-- canonical: {first: Hanne Erdman, last: Thomsen}
-  variants:
-  - {first: Hanne, last: Erdman Thomsen}
-- canonical: {first: Chalathip, last: Thumkanon}
-  variants:
-  - {first: Chalatip, last: Thumkanon}
-- canonical: {first: Junfeng, last: Tian}
-  variants:
-  - {first: Jun Feng, last: Tian}
-- canonical: {first: Jörg, last: Tiedemann}
-  variants:
-  - {first: Jorg, last: Tiedemann}
-  - {first: Joerg, last: Tiedemann}
-- canonical: {first: Laszlo, last: Tihanyi}
-  variants:
-  - {first: László, last: Tihanyi}
-- canonical: {first: Christoph, last: Tillmann}
-  id: christoph-tillmann
-- canonical: {first: Harry J., last: Tily}
-  variants:
-  - {first: Harry, last: Tily}
-- canonical: {first: Ismail, last: Timimi}
-  id: ismail-timimi
-  variants:
-  - {first: Ismaïl, last: Timimi}
-- canonical: {first: Neil, last: Tipper}
-  id: neil-tipper
-- canonical: {first: Erik, last: Tjong Kim Sang}
-  variants:
-  - {first: Erik F., last: Tjong Kim Sang}
-- canonical: {first: Tomoki, last: Toda}
-  variants:
-  - {first: Tomiki, last: Toda}
-- canonical: {first: Amalia, last: Todirascu}
-  variants:
-  - {first: Amalia, last: Todiraşcu}
-- canonical: {first: Doroteo T., last: Toledano}
-  variants:
-  - {first: Doroteo Torre, last: Toledano}
-  - {first: Doroteo, last: Toledano}
-- canonical: {first: Gaurav Singh, last: Tomar}
-  variants:
-  - {first: Gaurav, last: Singh}
-- canonical: {first: David, last: Tomas}
-  variants:
-  - {first: David, last: Tomás}
-- canonical: {first: Masaru, last: Tomita}
-  id: masaru-tomita
-- canonical: {first: Yoshihiro, last: Tomiyama}
-  id: yoshihiro-tomiyama
-- canonical: {first: Laura Mayfield, last: Tomokiyo}
-  variants:
-  - {first: Laura, last: Mayfield}
-- canonical: {first: Loong-Cheong, last: Tong}
-  variants:
-  - {first: Loong Cheong, last: Tong}
-- canonical: {first: Fatemeh, last: Torabi Asr}
-  variants:
-  - {first: Fatemeh Torabi, last: Asr}
-- canonical: {first: Adrià, last: Torrens Urrutia}
-  variants:
-  - {first: Adrià, last: Torrens-Urrutia}
-- canonical: {first: Tiago Timponi, last: Torrent}
-  variants:
-  - {first: Tiago, last: Torrent}
-  - {first: Tiago T., last: Torrent}
-- canonical: {first: Yixuan, last: Tang}
-  comment: HKUST
-  id: yixuan-tang-hkust
-  orcid: 0009-0006-2405-2026
-  institution: Hong Kong University of Science and Technology
-- canonical: {first: Yixuan, last: Tang}
-  comment: May refer to several people
-  id: yixuan-tang
-- canonical: {first: M. Inés, last: Torres}
-  variants:
-  - {first: María Inés, last: Torres}
-- canonical: {first: Juan-Manuel, last: Torres-Moreno}
-  variants:
-  - {first: Juan-Manuel Torres, last: Moreno}
-  - {first: Juan-Manuel, last: Torres}
-- canonical: {first: Dilara, last: Torunoğlu-Selamet}
-  variants:
-  - {first: Dilara, last: Torunoǧlu}
-- canonical: {first: Alejandro H., last: Toselli}
-  variants:
-  - {first: Alejandro Héctor, last: Toselli}
-- canonical: {first: Kanokorn, last: Trakultaweekoon}
-  variants:
-  - {first: Kanokorn, last: Trakultaweekool}
-- canonical: {first: Do-Dat, last: Tran}
-  variants:
-  - {first: Do Dat, last: Tran}
-- canonical: {first: Duc-Vu, last: Tran}
-  variants:
-  - {first: Vu Duc, last: Tran}
-- canonical: {first: Giang Binh, last: Tran}
-  variants:
-  - {first: Giang, last: Tran}
-- canonical: {first: Ke M., last: Tran}
-  variants:
-  - {first: Ke, last: Tran}
-  - {first: Ke, last: Tran Manh}
-- canonical: {first: Mai-Vu, last: Tran}
-  variants:
-  - {first: Mai-vu, last: Tran}
-- canonical: {first: Nam-Khanh, last: Tran}
-  variants:
-  - {first: Nam Khanh, last: Tran}
-- canonical: {first: Quan Hung, last: Tran}
-  variants:
-  - {first: Quan, last: Tran}
-- canonical: {first: Tuan, last: Tran}
-  variants:
-  - {first: Tuan Dung, last: Tran}
-- canonical: {first: Viet Hong, last: Tran}
-  variants:
-  - {first: Viet-Hong, last: Tran}
-- canonical: {first: Diana, last: Trandabat}
-  variants:
-  - {first: Diana, last: Trandabăț}
-  - {first: Diana, last: Trandabăţ}
-  - {first: Diana Marie, last: Trandabăţ}
-- canonical: {first: David, last: Traum}
-  variants:
-  - {first: David R., last: Traum}
-- canonical: {first: Beata, last: Trawiński}
-  variants:
-  - {first: Beata, last: Trawinski}
-- canonical: {first: Jérémy, last: Trione}
-  variants:
-  - {first: Jeremy, last: Trione}
-- canonical: {first: Marian, last: Trnka}
-  variants:
-  - {first: Marián, last: Trnka}
-- canonical: {first: Cassia, last: Trojahn}
-  variants:
-  - {first: Cássia, last: Trojahn}
-- canonical: {first: Roy, last: Tromble}
-  variants:
-  - {first: Roy W., last: Tromble}
-- canonical: {first: Raphael, last: Troncy}
-  variants:
-  - {first: Raphaël, last: Troncy}
-- canonical: {first: Harald, last: Trost}
-  id: harald-trost
-- canonical: {first: Thomas Alexander, last: Trost}
-  variants:
-  - {first: Thomas, last: Trost}
-- canonical: {first: Khiet P., last: Truong}
-  variants:
-  - {first: Khiet, last: Truong}
-- canonical: {first: Mei-Chih, last: Tsai}
-  variants:
-  - {first: Mei-chih, last: Tsai}
-- canonical: {first: Ming-Feng, last: Tsai}
-  variants:
-  - {first: Meng-Feng, last: Tsai}
-- canonical: {first: Richard Tzong-Han, last: Tsai}
-  variants:
-  - {first: Tzong-Han, last: Tsai}
-  - {first: Tzong-Han Richard, last: Tsai}
-  - {first: Richard Tzong-han, last: Tsai}
-- canonical: {first: Sung-Fung, last: Tsai}
-  variants:
-  - {first: Sung-Feng, last: Tsai}
-- canonical: {first: Chiu-yu, last: Tseng}
-  variants:
-  - {first: Chiu-Yu, last: Tseng}
-- canonical: {first: Chiung-hui, last: Tseng}
-  variants:
-  - {first: Chiung-Hui, last: Tseng}
-- canonical: {first: Huihsin, last: Tseng}
-  variants:
-  - {first: Hui-hsin, last: Tseng}
-  - {first: Hui-Hsin, last: Tseng}
-- canonical: {first: Yuen-Hsien, last: Tseng}
-  variants:
-  - {first: Yuan-Hsien, last: Tseng}
-- canonical: {first: Pirros, last: Tsiakoulis}
-  id: pirros-tsiakoulis
-- canonical: {first: Benjamin K., last: Tsou}
-  id: benjamin-k-tsou
-  variants:
-  - {first: Benjamin K.Y., last: Tsou}
-  - {first: Benjamin K., last: T’sou}
-  - {first: Benjamin, last: Tsou}
-  - {first: Benjamin K, last: Tsou}
-- canonical: {first: Jun’ichi, last: Tsujii}
-  id: junichi-tsujii
-  variants:
-  - {first: Jun-ichi, last: Tsujii}
-  - {first: Jun-Ichi, last: Tsujii}
-  - {first: Junichi, last: Tsujii}
-  - {first: Jun-ich, last: Tsujii}
-- canonical: {first: Junya, last: Tsutsumi}
-  id: junya-tsutsumi
-- canonical: {first: Wen-Hsiang, last: Tu}
-  variants:
-  - {first: Wen-hsiang, last: Tu}
-- canonical: {first: Ying-Chieh, last: Tu}
-  variants:
-  - {first: Ying-chieh, last: Tu}
-- canonical: {first: Luu Anh, last: Tuan}
-  variants:
-  - {first: Anh, last: Luu}
-  - {first: Anh Tuan, last: Luu}
-- canonical: {first: Allen B., last: Tucker}
-  variants:
-  - {first: Allen, last: Tucker}
-- canonical: {first: Catalina Oana, last: Tudor}
-  variants:
-  - {first: Catalina O., last: Tudor}
-- canonical: {first: Dan, last: Tufiş}
-  variants:
-  - {first: Dan, last: Tufis}
-  - {first: Dan, last: Tufiș}
-- canonical: {first: Giovanni, last: Tummarello}
-  id: giovanni-tummarello
-- canonical: {first: Gokhan, last: Tur}
-  id: gokhan-tur
-  variants:
-  - {first: Gokhan, last: Tür}
-- canonical: {first: Umit Deniz, last: Turan}
-  variants:
-  - {first: Ümit Deniz, last: Turan}
-- canonical: {first: Ramona Andreea, last: Turcu}
-  variants:
-  - {first: Ramona-Andreea, last: Turcu}
-- canonical: {first: Joseph, last: Turian}
-  variants:
-  - {first: Joseph P., last: Turian}
-- canonical: {first: Franco, last: Turini}
-  id: franco-turini
-- canonical: {first: Jordi, last: Turmo}
-  id: jordi-turmo
-- canonical: {first: Peter, last: Turney}
-  variants:
-  - {first: Peter D., last: Turney}
-- canonical: {first: Howard R., last: Turtle}
-  variants:
-  - {first: Howard, last: Turtle}
-- canonical: {first: Agnès, last: Tutin}
-  variants:
-  - {first: Agnes, last: Tutin}
-- canonical: {first: Mark S., last: Tuttle}
-  id: mark-s-tuttle
-- canonical: {first: Francis, last: Tyers}
-  variants:
-  - {first: Francis M., last: Tyers}
-- canonical: {first: Evelyne, last: Tzoukermann}
-  id: evelyne-tzoukermann
-- canonical: {first: Ferhan, last: Türe}
-  variants:
-  - {first: Ferhan, last: Ture}
-- canonical: {first: Raghavendra, last: Udupa}
-  variants:
-  - {first: Raghavendra Udupa, last: U.}
-- canonical: {first: Yoshihiro, last: Ueda}
-  id: yoshihiro-ueda
-- canonical: {first: Shunsuke, last: Uemura}
-  variants:
-  - {first: Syunsuke, last: Uemura}
-- canonical: {first: Chunyang, last: Jiang}
-  comment: HKUST
-  id: chunyang-jiang-hkust
-  orcid: 0009-0005-3401-4093
-  institution: Hong Kong University of Science and Technology
-- canonical: {first: Chunyang, last: Jiang}
-  id: chunyang-jiang
-  comment: May refer to several people
-- canonical: {first: Alexandra L., last: Uitdenbogerd}
-  variants:
-  - {first: Alexandra, last: Uitdenbogerd}
-- canonical: {first: Nancy, last: Underwood}
-  variants:
-  - {first: Nancy L., last: Underwood}
-- canonical: {first: Marcus, last: Uneson}
-  variants:
-  - {first: Markus, last: Uneson}
-- canonical: {first: Lyle, last: Ungar}
-  variants:
-  - {first: Lyle H., last: Ungar}
-- canonical: {first: L. Alfonso, last: Urena Lopez}
-  variants:
-  - {first: L. Alfonso, last: Ureña-López}
-  - {first: L. Alfonso, last: Ureña López}
-  - {first: L. Alfonso, last: Urena-López}
-  - {first: L. Alfonso, last: Urena}
-  - {first: Alfonso, last: Ureña-López}
-  - {first: Luis Alfonso, last: Ureña-López}
-  - {first: L. Alfonso, last: Ureña- López}
-- canonical: {first: Zdenka, last: Uresova}
-  variants:
-  - {first: Zdeňka, last: Urešová}
-- canonical: {first: Ruben, last: Urizar}
-  id: ruben-urizar
-  variants:
-  - {first: Rubén, last: Urizar}
-- canonical: {first: Miriam, last: Urkia}
-  id: miriam-urkia
-- canonical: {first: Cristian, last: Ursu}
-  variants:
-  - {first: Christian, last: Ursu}
-- canonical: {first: Suzan, last: Uskudarli}
-  variants:
-  - {first: Suzan, last: Üsküdarlı}
-- canonical: {first: David C., last: Uthus}
-  variants:
-  - {first: David, last: Uthus}
-- canonical: {first: Ozlem, last: Uzuner}
-  variants:
-  - {first: Özlem, last: Uzuner}
-- canonical: {first: Elaine, last: Uí Dhonnchadha}
-  id: elaine-ui-dhonnchadha
-- canonical: {first: Arjun Atreya, last: V}
-  variants:
-  - {first: Arjun, last: Atreya V}
-  - {first: Arjun, last: Atreya}
-- canonical: {first: Subbarao K., last: V}
-  variants:
-  - {first: K.V., last: Subbarao}
-  - {first: Subbarao K, last: V.}
-- canonical: {first: Devadath, last: V V}
-  variants:
-  - {first: Devadath V, last: V}
-- canonical: {first: Mayank N., last: Vahia}
-  variants:
-  - {first: Mayank, last: Vahia}
-- canonical: {first: Jacqueline, last: Vaissiere}
-  variants:
-  - {first: Jacqueline, last: Vaissière}
-- canonical: {first: Antonio S., last: Valderrábanos}
-  variants:
-  - {first: Antonio S., last: Valderrabanos}
-- canonical: {first: Oto, last: Vale}
-  variants:
-  - {first: Oto A., last: Vale}
-- canonical: {first: Marco A., last: Valenzuela-Escárcega}
-  variants:
-  - {first: Marco Antonio, last: Valenzuela-Escárcega}
-- canonical: {first: Andre, last: Valli}
-  variants:
-  - {first: André, last: Valli}
-- canonical: {first: Valtcho, last: Valtchev}
-  id: valtcho-valtchev
-- canonical: {first: Andoni, last: Valverde}
-  id: andoni-valverde
-- canonical: {first: M. Pilar, last: Valverde Ibáñez}
-  variants:
-  - {first: M. Pilar, last: Valverde Ibañez}
-- canonical: {first: Carol, last: Van Ess-Dykema}
-  variants:
-  - {first: Carol J., last: Van Ess-Dykema}
-  - {first: Carol, last: VanEss-Dykema}
-- canonical: {first: Marjo, last: Van Koppen}
-  variants:
-  - {first: Marjo, last: van Koppen}
-- canonical: {first: Tim, last: Van de Cruys}
-  variants:
-  - {first: Tim, last: Van De Cruys}
-- canonical: {first: Aline A., last: Vanin}
-  variants:
-  - {first: Aline, last: Vanin}
-- canonical: {first: Tristan, last: Vanrullen}
-  variants:
-  - {first: Tristan, last: van Rullen}
-  - {first: Tristan, last: Van Rullen}
-- canonical: {first: Jerome, last: Vapillon}
-  id: jerome-vapillon
-- canonical: {first: Dániel, last: Varga}
-  id: daniel-varga
-  variants:
-  - {first: Daniel, last: Varga}
-- canonical: {first: István, last: Varga}
-  variants:
-  - {first: Istvan, last: Varga}
-- canonical: {first: Giovanni Battista, last: Varile}
-  id: giovanni-battista-varile
-  variants:
-  - {first: Giovanni B., last: Varile}
-- canonical: {first: Dusan, last: Varis}
-  variants:
-  - {first: Dušan, last: Variš}
-- canonical: {first: Ioana, last: Vasilescu}
-  id: ioana-vasilescu
-- canonical: {first: Gunaranjan, last: Vasireddy}
-  id: gunaranjan-vasireddy
-- canonical: {first: Andrejs, last: Vasiļjevs}
-  variants:
-  - {first: Andrejs, last: Vasiljevs}
-- canonical: {first: Alexander, last: Vasserman}
-  variants:
-  - {first: Alex, last: Vasserman}
-- canonical: {first: Dominique, last: Vaufreydaz}
-  id: dominique-vaufreydaz
-- canonical: {first: Bernard, last: Vauquois}
-  id: bernard-vauquois
-- canonical: {first: Guillaume, last: Vauvert}
-  id: guillaume-vauvert
-- canonical: {first: Eva Maria, last: Vecchi}
-  variants:
-  - {first: Eva, last: Vecchi}
-- canonical: {first: Arlindo, last: Veiga}
-  variants:
-  - {first: Arlindo O., last: Veiga}
-- canonical: {first: Nanette M., last: Veilleux}
-  id: nanette-veilleux
-- canonical: {first: Gerard, last: Veillon}
-  id: gerard-veillon
-- canonical: {first: Paola, last: Velardi}
-  id: paola-velardi
-- canonical: {first: Patricia, last: Velazquez-Morales}
-  variants:
-  - {first: Patricia, last: Velázquez-Morales}
-- canonical: {first: Noortje, last: Venhuizen}
-  variants:
-  - {first: Noortje J., last: Venhuizen}
-- canonical: {first: Pranav Narayanan, last: Venkit}
-  variants:
-  - {first: Pranav, last: Venkit}
-- canonical: {first: Mateja, last: Verlič}
-  variants:
-  - {first: Mateja, last: Verlic}
-- canonical: {first: Yiyang, last: Du}
-  id: yiyang-du-cmu
-  comment: CMU
-  orcid: 0009-0007-1949-9736
-  institution: Carnegie Mellon University
-- canonical: {first: Yiyang, last: Du}
-  id: yiyang-du
-  comment: May refer to several people
-- canonical: {first: Jean, last: Veronis}
-  variants:
-  - {first: Jean, last: Véronis}
-- canonical: {first: Karin, last: Verspoor}
-  variants:
-  - {first: Karin M., last: Verspoor}
-  - {first: Cornelia Maria, last: Verspoor}
-- canonical: {first: Anita Lilla, last: Verő}
-  variants:
-  - {first: Anita Lilla, last: Vero}
-- canonical: {first: Katerina, last: Veselá}
-  variants:
-  - {first: Kateřina, last: Veselá}
-- canonical: {first: Grażyna, last: Vetulani}
-  variants:
-  - {first: Grazyna, last: Vetulani}
-- canonical: {first: José Luis, last: Vicedo}
-  id: jose-luis-vicedo
-  variants:
-  - {first: Jose-Luis, last: Vicedo}
-  - {first: Jose Luis, last: Vicedo}
-  - {first: José L., last: Vicedo}
-- canonical: {first: Enrique, last: Vidal}
-  id: enrique-vidal
-- canonical: {first: Renata, last: Vieira}
-  id: renata-vieira
-- canonical: {first: Sarah, last: Vieweg}
-  variants:
-  - {first: Sarah E., last: Vieweg}
-- canonical: {first: Jacob Hoover, last: Vigly}
-  id: jacob-hoover-vigly
-  variants:
-  - {first: Jacob Louis, last: Hoover}
-  - {first: Jacob, last: Hoover}
-- canonical: {first: Marina, last: Vigário}
-  id: marina-vigario
-- canonical: {first: K., last: Vijay-Shanker}
-  id: k-vijay-shanker
-  variants:
-  - {first: K, last: Vijay-Shanker}
-  - {first: K., last: Vijay-Shankar}
-  - {first: Vijay, last: Shanker}
-- canonical: {first: Marc, last: Vilain}
-  variants:
-  - {first: Marc B., last: Vilain}
-- canonical: {first: Juan Miguel, last: Vilar}
-  id: juan-miguel-vilar
-  variants:
-  - {first: Juan-Miguel, last: Vilar}
-  - {first: Juan M., last: Vilar}
-- canonical: {first: Darnes, last: Vilariño}
-  variants:
-  - {first: Darnes, last: Vilariño Ayala}
-- canonical: {first: Jorgen, last: Villadsen}
-  variants:
-  - {first: Jørgen, last: Villadsen}
-- canonical: {first: Jeanne, last: Villaneau}
-  id: jeanne-villaneau
-- canonical: {first: Luís, last: Villarejo}
-  variants:
-  - {first: Luis, last: Villarejo}
-- canonical: {first: Luis, last: Villaseñor-Pineda}
-  variants:
-  - {first: Luis, last: Villaseñor}
-  - {first: Luis, last: Villasenor}
-- canonical: {first: Éric, last: Villemonte de la Clergerie}
-  variants:
-  - {first: Eric, last: Villemonte de la Clergerie}
-  - {first: Eric, last: de la Clergerie}
-  - {first: Eric, last: de La Clergerie}
-  - {first: Éric, last: de La Clergerie}
-  - {first: Éric, last: de la Clergerie}
-  - {first: Éric, last: Villemonte de La Clergerie}
-- canonical: {first: Špela, last: Vintar}
-  variants:
-  - {first: Spela, last: Vintar}
-- canonical: {first: S. V. N., last: Vishwanathan}
-  variants:
-  - {first: S.V.N., last: Vishwanathan}
-- canonical: {first: George, last: Vladutz}
-  id: george-vladutz
-- canonical: {first: Nguyen, last: Vo}
-  variants:
-  - {first: Nguyen, last: Ha Vo}
-- canonical: {first: Stephan, last: Vogel}
-  id: stephan-vogel
-  variants:
-  - {first: Stephen, last: Vogel}
-- canonical: {first: Maria das Graças, last: Volpe Nunes}
-  variants:
-  - {first: Maria, last: das Graças Volpe Nunes}
-  - {first: Maria, last: das Gracas Volpe Nunes}
-  - {first: Maria das Graças Volpe, last: Nunes}
-  - {first: Maria, last: das Graças}
-  - {first: Maria das Graças V., last: Nunes}
-  - {first: Maria das Graças, last: Nunes}
-  - {first: Maria das Gracas, last: Volpe}
-- canonical: {first: Dirk, last: Von Gruenigen}
-  variants:
-  - {first: Dirk, last: von Grünigen}
-- canonical: {first: Ellen M., last: Voorhees}
-  variants:
-  - {first: Ellen, last: Voorhees}
-- canonical: {first: Clare, last: Voss}
-  variants:
-  - {first: Clare R., last: Voss}
-- canonical: {first: Hai-Quan, last: Vu}
-  variants:
-  - {first: Hai Quan, last: Vu}
-- canonical: {first: Pranav, last: Goel}
-  comment: UMD
-  id: pranav-goel-umd
-  orcid: 0000-0003-1037-2687
-  institution: University of Maryland
-- canonical: {first: Pranav, last: Goel}
-  id: pranav-goel
-  comment: May refer to several people
-- canonical: {first: Thuy, last: Vu}
-  variants:
-  - {first: Thuy-Trang, last: Vu}
-- canonical: {first: Tu, last: Vu}
-  variants:
-  - {first: Tu Thanh, last: Vu}
-- canonical: {first: Xuan Luong, last: Vu}
-  variants:
-  - {first: Xuân Lương, last: Vũ}
-  - {first: Xuan-Luong, last: Vu}
-- canonical: {first: Kristina, last: Vuckovic}
-  variants:
-  - {first: Kristina, last: Vučković}
-- canonical: {first: Stasa, last: Vujicic-Stankovic}
-  variants:
-  - {first: Staša Vujičić, last: Stanković}
-  - {first: Staša, last: Vujičić Stanković}
-- canonical: {first: Jan, last: Vystrčil}
-  variants:
-  - {first: Jan, last: Vystrcil}
-- canonical: {first: Tamás, last: Váradi}
-  variants:
-  - {first: Tamas, last: Váradi}
-- canonical: {first: Glòria, last: Vázquez}
-  variants:
-  - {first: Gloria, last: Vázquez}
-  - {first: Gloria, last: Vazquez}
-- canonical: {first: Silvia, last: Vázquez}
-  variants:
-  - {first: Silvia Rodríguez, last: Vázquez}
-- canonical: {first: Sonia, last: Vázquez}
-  variants:
-  - {first: Sonia, last: Vazquez}
-  - {first: Sonia, last: Vázquez Pérez}
-- canonical: {first: Jaakko, last: Väyrynen}
-  variants:
-  - {first: Jaakko J., last: Väyrynen}
-- canonical: {first: Luuk Van, last: Waes}
-  variants:
-  - {first: Luuk, last: Van Waes}
-- canonical: {first: Peter Waiganjo, last: Wagacha}
-  variants:
-  - {first: Peter W., last: Wagacha}
-  - {first: Peter, last: Wagacha}
-- canonical: {first: Stefan, last: Wagner}
-  variants:
-  - {first: Stefan, last: Wager}
-- canonical: {first: Wolfgang, last: Wahlster}
-  id: wolfgang-wahlster
-- canonical: {first: Alex, last: Waibel}
-  id: alex-waibel
-  variants:
-  - {first: Alexander, last: Waibel}
-- canonical: {first: Takahiro, last: Wakao}
-  id: takahiro-wakao
-- canonical: {first: Christopher R., last: Walker}
-  variants:
-  - {first: Christopher, last: Walker}
-  - {first: Christopher R, last: Walker}
-- canonical: {first: Marilyn, last: Walker}
-  id: marilyn-walker
-  variants:
-  - {first: Marilyn A., last: Walker}
-- canonical: {first: Vern, last: Walker}
-  variants:
-  - {first: Vern R., last: Walker}
-- canonical: {first: Byron C., last: Wallace}
-  variants:
-  - {first: Byron, last: Wallace}
-- canonical: {first: Hanna, last: Wallach}
-  variants:
-  - {first: Hanna M., last: Wallach}
-- canonical: {first: Joel, last: Wallenberg}
-  variants:
-  - {first: Joel C., last: Wallenberg}
-- canonical: {first: Annalu, last: Waller}
-  id: annalu-waller
-- canonical: {first: Alan, last: Wallington}
-  id: alan-wallington
-  variants:
-  - {first: Alan M., last: Wallington}
-- canonical: {first: David L., last: Waltz}
-  id: david-l-waltz
-- canonical: {first: Chi-Shing, last: Wang}
-  variants:
-  - {first: Chi-shing, last: Wang}
-- canonical: {first: Daisy Zhe, last: Wang}
-  variants:
-  - {first: Zhe, last: Wang}
-- canonical: {first: Flora Yu-Fang, last: Wang}
-  variants:
-  - {first: Yu-Fang, last: Wang}
-- canonical: {first: Hsin-Min, last: Wang}
-  variants:
-  - {first: Hsin-min, last: Wang}
-- canonical: {first: JianXiang, last: Wang}
-  variants:
-  - {first: Jianxiang, last: Wang}
-- canonical: {first: Kexin, last: Wang}
-  comment: Bytedance
-  id: kexin-wang-bd
-- canonical: {first: Kexin, last: Wang}
-  comment: TU Darmstadt
-  id: kexin-wang-tudarmstadt
-  orcid: 0000-0003-1175-7829
-  institution: TU Darmstadt
-- canonical: {first: Kexin, last: Wang}
-  id: kexin-wang
-  comment: May refer to several people
-- canonical: {first: Kun-Ching, last: Wang}
-  variants:
-  - {first: Kun-ching, last: Wang}
-- canonical: {first: Ling Xiao, last: Wang}
-  variants:
-  - {first: Lingxiao, last: Wang}
-- canonical: {first: Lucy Lu, last: Wang}
-  id: lucy-lu-wang
-  variants:
-  - {first: Lucy, last: Wang}
-- canonical: {first: Michelle Q., last: Wang}
-  variants:
-  - {first: Michelle, last: Wang}
-- canonical: {first: Mingwen, last: Wang}
-  variants:
-  - {first: MingWen, last: Wang}
-  - {first: Ming-Wei, last: Wang}
-- canonical: {first: Richard C., last: Wang}
-  variants:
-  - {first: Richard, last: Wang}
-- canonical: {first: Shih-ping, last: Wang}
-  variants:
-  - {first: Shih-Ping, last: Wang}
-- canonical: {first: Sida I., last: Wang}
-  variants:
-  - {first: Sida, last: Wang}
-- canonical: {first: Wen, last: Wang}
-  id: wen-wang
-- canonical: {first: Wen Ting, last: Wang}
-  variants:
-  - {first: WenTing, last: Wang}
-- canonical: {first: William S-Y., last: Wang}
-  variants:
-  - {first: William S.-Y., last: Wang}
-- canonical: {first: Xia, last: Wang}
-  id: xia-wang
-- canonical: {first: Xiao-Long, last: Wang}
-  variants:
-  - {first: XiaoLong, last: Wang}
-  - {first: Xiao-long, last: Wang}
-- canonical: {first: Xiaolei, last: Wang}
-  comment: Fudan
-  id: xiaolei-wang-fudan
-- canonical: {first: Xiaolei, last: Wang}
-  comment: Renmin
-  id: xiaolei-wang-renmin
-- canonical: {first: Yih-Ru, last: Wang}
-  variants:
-  - {first: Yih-ru, last: Wang}
-- canonical: {first: YongCheng, last: Wang}
-  variants:
-  - {first: Yong-Cheng, last: Wang}
-  - {first: Yong Cheng, last: Wang}
-- canonical: {first: Nigel, last: Ward}
-  variants:
-  - {first: Nigel G., last: Ward}
-- canonical: {first: Wayne, last: Ward}
-  id: wayne-ward
-  variants:
-  - {first: Wayne H., last: Ward}
-- canonical: {first: David H. D., last: Warren}
-  variants:
-  - {first: David H.D., last: Warren}
-- canonical: {first: Jonathan, last: Washington}
-  variants:
-  - {first: Jonathan North, last: Washington}
-  - {first: Jonathan N., last: Washington}
-- canonical: {first: Thomas, last: Wasow}
-  variants:
-  - {first: Tom, last: Wasow}
-- canonical: {first: Jakub, last: Waszczuk}
-  variants:
-  - {first: Jakub, last: Wasczuk}
-- canonical: {first: Catherine I., last: Watson}
-  variants:
-  - {first: Catherine, last: Watson}
-- canonical: {first: J. Angus, last: Webb}
-  variants:
-  - {first: Angus, last: Webb}
-- canonical: {first: Nick, last: Webb}
-  id: nick-webb
-- canonical: {first: Bonnie, last: Webber}
-  id: bonnie-webber
-  variants:
-  - {first: Bonnie L., last: Webber}
-  - {first: Bonnie Lynn, last: Webber}
-- canonical: {first: Heinz J., last: Weber}
-  variants:
-  - {first: H-J., last: Weber}
-- canonical: {first: Jonathan J., last: Webster}
-  variants:
-  - {first: Jonathan, last: Webster}
-- canonical: {first: Jurgen, last: Wedekind}
-  variants:
-  - {first: Jürgen, last: Wedekind}
-- canonical: {first: Eric, last: Wehrli}
-  variants:
-  - {first: Éric, last: Wehrli}
-- canonical: {first: Xiangfeng, last: Wei}
-  variants:
-  - {first: XiangFeng, last: Wei}
-- canonical: {first: Robert, last: Weide}
-  id: robert-weide
-- canonical: {first: Amy, last: Weinberg}
-  variants:
-  - {first: Amy S., last: Weinberg}
-- canonical: {first: Steven H., last: Weinberger}
-  variants:
-  - {first: Steven, last: Weinberger}
-- canonical: {first: Clifford J., last: Weinstein}
-  variants:
-  - {first: Clifford, last: Weinstein}
-- canonical: {first: Mitch, last: Weintraub}
-  id: mitch-weintraub
-  variants:
-  - {first: Mitchel, last: Weintraub}
-- canonical: {first: Maxwell, last: Weinzierl}
-  variants:
-  - {first: Maxwell A., last: Weinzierl}
-- canonical: {first: David, last: Weir}
-  id: david-weir
-  variants:
-  - {first: David J., last: Weir}
-  - {first: David, last: Wei}
-- canonical: {first: Ralph, last: Weischedel}
-  variants:
-  - {first: Ralph M., last: Weischedel}
-- canonical: {first: Zarah, last: Weiss}
-  variants:
-  - {first: Zarah, last: Weiß}
-- canonical: {first: Davy, last: Weissenbacher}
-  id: davy-weissenbacher
-- canonical: {first: Daniel S., last: Weld}
-  variants:
-  - {first: Daniel, last: Weld}
-  - {first: Dan, last: Weld}
-- canonical: {first: Marion, last: Weller-Di Marco}
-  variants:
-  - {first: Marion, last: Di Marco}
-- canonical: {first: Ben, last: Wellner}
-  variants:
-  - {first: Benjamin, last: Wellner}
-- canonical: {first: Chris, last: Welty}
-  variants:
-  - {first: Christopher, last: Welty}
-- canonical: {first: Christopher M., last: White}
-  id: christopher-m-white
-- canonical: {first: James Paul, last: White}
-  variants:
-  - {first: James P., last: White}
-  - {first: James, last: White}
-- canonical: {first: John S., last: White}
-  variants:
-  - {first: John, last: White}
-- canonical: {first: Michael, last: White}
-  id: michael-white
-  variants:
-  - {first: Mike, last: White}
-- canonical: {first: Peter, last: White}
-  variants:
-  - {first: Pete, last: White}
-- canonical: {first: Ryen, last: White}
-  variants:
-  - {first: Ryan, last: White}
-- canonical: {first: Pete, last: Whitelock}
-  id: pete-whitelock
-- canonical: {first: Edward W. D., last: Whittaker}
-  id: edward-w-d-whittaker
-- canonical: {first: Steve, last: Whittaker}
-  id: steve-whittaker
-- canonical: {first: Daniel, last: Whyatt}
-  variants:
-  - {first: Dan, last: Whyatt}
-- canonical: {first: Janyce, last: Wiebe}
-  variants:
-  - {first: Janyce M., last: Wiebe}
-  - {first: Jan, last: Wiebe}
-- canonical: {first: Colin W., last: Wightman}
-  id: colin-w-wightman
-- canonical: {first: Derry Tanti, last: Wijaya}
-  variants:
-  - {first: Derry, last: Wijaya}
-- canonical: {first: Graham, last: Wilcock}
-  id: graham-wilcock
-- canonical: {first: John, last: Wilkerson}
-  variants:
-  - {first: John D., last: Wilkerson}
-- canonical: {first: Yorick, last: Wilks}
-  id: yorick-wilks
-- canonical: {first: Jason D., last: Williams}
-  variants:
-  - {first: Jason, last: Williams}
-- canonical: {first: Jay, last: Wilpon}
-  variants:
-  - {first: Jay G., last: Wilpon}
-- canonical: {first: Andrew, last: Wilson}
-  variants:
-  - {first: Andrew T., last: Wilson}
-- canonical: {first: Amy, last: Winarske}
-  id: amy-winarske
-- canonical: {first: Genta Indra, last: Winata}
-  variants:
-  - {first: Genta, last: Winata}
-- canonical: {first: Benjamin, last: Wing}
-  variants:
-  - {first: Ben, last: Wing}
-- canonical: {first: Mats, last: Wirén}
-  variants:
-  - {first: Mats, last: Wiren}
-- canonical: {first: G. Bowden, last: Wise}
-  variants:
-  - {first: Bowden, last: Wise}
-- canonical: {first: Michael J., last: Witbrock}
-  variants:
-  - {first: Michael, last: Witbrock}
-- canonical: {first: Peter, last: Wittenburg}
-  id: peter-wittenburg
-- canonical: {first: Billy T.M., last: Wong}
-  variants:
-  - {first: Billy T. M., last: Wong}
-- canonical: {first: Kam-Fai, last: Wong}
-  id: kam-fai-wong
-  variants:
-  - {first: Kam-fai, last: Wong}
-- canonical: {first: Ping Wai, last: Wong}
-  variants:
-  - {first: Percy Ping-Wai, last: Wong}
-- canonical: {first: Raymond, last: Wong}
-  variants:
-  - {first: Raymond K., last: Wong}
-- canonical: {first: Mary McGee, last: Wood}
-  id: mary-mcgee-wood
-  variants:
-  - {first: Mary, last: McGee Wood}
-- canonical: {first: Phil C., last: Woodland}
-  id: phil-c-woodland
-- canonical: {first: William A., last: Woods}
-  id: william-a-woods
-- canonical: {first: Karsten L., last: Worm}
-  id: karsten-l-worm
-  variants:
-  - {first: Karsten, last: Worm}
-- canonical: {first: Monika, last: Woszczyna}
-  id: monika-woszczyna
-- canonical: {first: Klaus, last: Wothke}
-  id: klaus-wothke
-- canonical: {first: Sue Ellen, last: Wright}
-  variants:
-  - {first: Sue, last: Wright}
-- canonical: {first: Chia-Lung, last: Wu}
-  variants:
-  - {first: Chia-Long, last: Wu}
-- canonical: {first: Chun-Kai, last: Wu}
-  variants:
-  - {first: Kevin Chun-Kai, last: Wu}
-- canonical: {first: Horng Jyh Paul, last: Wu}
-  variants:
-  - {first: Horng-Jyh P., last: Wu}
-- canonical: {first: Jian-Chen, last: Wu}
-  variants:
-  - {first: Jien-Chen, last: Wu}
-- canonical: {first: Jian-Cheng, last: Wu}
-  variants:
-  - {first: Jian-cheng, last: Wu}
-  - {first: Jiancheng, last: Wu}
-- canonical: {first: Lide, last: Wu}
-  variants:
-  - {first: Li-de, last: Wu}
-- canonical: {first: Ming-Jer, last: Wu}
-  variants:
-  - {first: Min-Jer, last: Wu}
-- canonical: {first: Katharina, last: Wäschle}
-  variants:
-  - {first: Katharina, last: Waeschle}
-- canonical: {first: Amelie, last: Wührl}
-  variants:
-  - {first: Amelie, last: Wuehrl}
-- canonical: {first: Geraldo Bonorino, last: Xexéo}
-  variants:
-  - {first: Geraldo, last: Xexéo}
-- canonical: {first: Yingju, last: Xia}
-  variants:
-  - {first: Ying-Ju, last: Xia}
-  - {first: YingJu, last: Xia}
-- canonical: {first: Jinghui, last: Xiao}
-  variants:
-  - {first: JingHui, last: Xiao}
-- canonical: {first: Eric, last: Xing}
-  variants:
-  - {first: Eric P., last: Xing}
-- canonical: {first: Deyi, last: Xiong}
-  variants:
-  - {first: De-Yi, last: Xiong}
-- canonical: {first: Frank F., last: Xu}
-  variants:
-  - {first: Frank, last: Xu}
-- canonical: {first: Jian-ming, last: Xu}
-  variants:
-  - {first: Jian-Ming, last: Xu}
-- canonical: {first: Jinan, last: Xu}
-  variants:
-  - {first: JinAn, last: Xu}
-- canonical: {first: Mingbin, last: Xu}
-  variants:
-  - {first: MingBin, last: Xu}
-- canonical: {first: Zhiming, last: Xu}
-  variants:
-  - {first: Zhi-Ming, last: Xu}
-- canonical: {first: Serge A., last: Yablonsky}
-  variants:
-  - {first: Serge, last: Yablonsky}
-- canonical: {first: Ihsan, last: Yalcinkaya}
-  variants:
-  - {first: İhsan, last: Yalçinkaya}
-  - {first: İhsan, last: Yalcinkaya}
-- canonical: {first: Hirofumi, last: Yamamoto}
-  variants:
-  - {first: Hirohumi, last: Yamamoto}
-- canonical: {first: Yoichi, last: Yamashita}
-  id: yoichi-yamashita
-- canonical: {first: Chao-Han Huck, last: Yang}
-  variants:
-  - {first: Huck Chao-Han, last: Yang}
-- canonical: {first: Charles, last: Yang}
-  variants:
-  - {first: Charles D., last: Yang}
-- canonical: {first: Dechuan, last: Yang}
-  variants:
-  - {first: De, last: Yang}
-- canonical: {first: Dong, last: Yang}
-  id: dong-yang
-- canonical: {first: Eun-Suk, last: Yang}
-  variants:
-  - {first: Eunsuk, last: Yang}
-- canonical: {first: Li-chin, last: Yang}
-  variants:
-  - {first: Li-Chin, last: Yang}
-- canonical: {first: Lingpeng, last: Yang}
-  variants:
-  - {first: LingPeng, last: Yang}
-- canonical: {first: Muyun, last: Yang}
-  variants:
-  - {first: MuYun, last: Yang}
-  - {first: Mu-yun, last: Yang}
-- canonical: {first: Ping-Che, last: Yang}
-  variants:
-  - {first: Ping-che, last: Yang}
-- canonical: {first: Ting-hao, last: Yang}
-  variants:
-  - {first: Ting-Hao, last: Yang}
-- canonical: {first: Yaosheng, last: Yang}
-  variants:
-  - {first: YaoSheng, last: Yang}
-- canonical: {first: Jianmin, last: Yao}
-  variants:
-  - {first: Jian-min, last: Yao}
-  - {first: Jian-Min, last: Yao}
-- canonical: {first: Jin-ge, last: Yao}
-  variants:
-  - {first: Jin-Ge, last: Yao}
-- canonical: {first: Yao, last: Yao}
-  id: yao-yao-uwisc
-- canonical: {first: Yao, last: Yao}
-  id: yao-yao
-- canonical: {first: Mustafa, last: Yaseen}
-  id: mustafa-yaseen
-- canonical: {first: Norihito, last: Yasuda}
-  variants:
-  - {first: Norihi, last: Yasuda}
-- canonical: {first: Alexander, last: Yeh}
-  variants:
-  - {first: Alexander S., last: Yeh}
-  - {first: Alex, last: Yeh}
-- canonical: {first: Kevin C., last: Yeh}
-  variants:
-  - {first: Kevin, last: Yeh}
-- canonical: {first: Ming-chin, last: Yen}
-  variants:
-  - {first: Ming-Chin, last: Yen}
-- canonical: {first: Meliha, last: Yetisgen-Yildiz}
-  variants:
-  - {first: Meliha, last: Yetisgen}
-  - {first: Meliha, last: Yetişgen}
-- canonical: {first: Szu-ting, last: Yi}
-  variants:
-  - {first: Szuting, last: Yi}
-- canonical: {first: Wen-tau, last: Yih}
-  variants:
-  - {first: Scott Wen-tau, last: Yih}
-- canonical: {first: Matti, last: Ylilammi}
-  id: matti-ylilammi
-- canonical: {first: Shoichi, last: Yokoyama}
-  id: shoichi-yokoyama
-- canonical: {first: Aesun, last: Yoon}
-  variants:
-  - {first: Ae sun, last: Yoon}
-  - {first: Ae-Sun, last: Yoon}
-- canonical: {first: James, last: Yoon}
-  variants:
-  - {first: James H., last: Yoon}
-- canonical: {first: Su-Youn, last: Yoon}
-  variants:
-  - {first: Su-youn, last: Yoon}
-- canonical: {first: Kyosuke, last: Yoshida}
-  variants:
-  - {first: Kyôsuke, last: Yoshida}
-- canonical: {first: Takehiko, last: Yoshimi}
-  id: takehiko-yoshimi
-- canonical: {first: Kei, last: Yoshimoto}
-  id: kei-yoshimoto
-- canonical: {first: Nick J., last: Youd}
-  variants:
-  - {first: Nick, last: Youd}
-- canonical: {first: Sheryl, last: Young}
-  variants:
-  - {first: Sheryl R., last: Young}
-- canonical: {first: Steve, last: Young}
-  variants:
-  - {first: Steven, last: Young}
-- canonical: {first: Steve J., last: Young}
-  id: steve-j-young
-- canonical: {first: Clement T., last: Yu}
-  variants:
-  - {first: Clement, last: Yu}
-- canonical: {first: Edmund, last: Yu}
-  variants:
-  - {first: Edmund S., last: Yu}
-- canonical: {first: Liang-Chih, last: Yu}
-  variants:
-  - {first: Liang-chih, last: Yu}
-- canonical: {first: Ming-Shing, last: Yu}
-  variants:
-  - {first: Ming-shing, last: Yu}
-- canonical: {first: Philip S., last: Yu}
-  variants:
-  - {first: Philip, last: Yu}
-- canonical: {first: Zaharin, last: Yusoff}
-  id: zaharin-yusoff
-- canonical: {first: Ertugrul, last: Yılmaz}
-  variants:
-  - {first: Ertuğrul, last: Yilmaz}
-  - {first: Ertuǧrul, last: Yılmaz}
-- canonical: {first: Osmar R., last: Zaiane}
-  variants:
-  - {first: Osmar, last: Zaïane}
-  - {first: Osmar, last: Zaiane}
-  - {first: Osmar R., last: Zaïane}
-- canonical: {first: Omar, last: Zaidan}
-  variants:
-  - {first: Omar F., last: Zaidan}
-- canonical: {first: Remi, last: Zajac}
-  variants:
-  - {first: Rémi, last: Zajac}
-- canonical: {first: Xabier, last: Zalbide}
-  id: xabier-zalbide
-- canonical: {first: Jordi Porta, last: Zamorano}
-  variants:
-  - {first: Jordi, last: Porta}
-- canonical: {first: Antonio, last: Zampolli}
-  id: antonio-zampolli
-- canonical: {first: Hongying, last: Zan}
-  variants:
-  - {first: Hong-ying, last: Zan}
-- canonical: {first: Stefano, last: Zanobini}
-  id: stefano-zanobini
-- canonical: {first: Fabio Massimo, last: Zanzotto}
-  id: fabio-massimo-zanzotto
-  variants:
-  - {first: Fabio, last: Massimo Zanzotto}
-  - {first: Fabio, last: Zanzotto}
-- canonical: {first: Carlos Mario, last: Zapata Jaramillo}
-  variants:
-  - {first: Carlos M., last: Zapata Jaramillo}
-- canonical: {first: Gian Piero, last: Zarri}
-  id: gian-piero-zarri
-- canonical: {first: Sina, last: Zarrieß}
-  variants:
-  - {first: Sina, last: Zarriess}
-- canonical: {first: George, last: Zavaliagkos}
-  id: george-zavaliagkos
-- canonical: {first: Britta, last: Zeller}
-  variants:
-  - {first: Britta D., last: Zeller}
-- canonical: {first: Daniel, last: Zeman}
-  variants:
-  - {first: Dan, last: Zeman}
-- canonical: {first: Kalliopi, last: Zervanou}
-  variants:
-  - {first: Kalliopi A., last: Zervanou}
-- canonical: {first: Luke, last: Zettlemoyer}
-  variants:
-  - {first: Luke S., last: Zettlemoyer}
-- canonical: {first: ChengXiang, last: Zhai}
-  variants:
-  - {first: Chengxiang, last: Zhai}
-- canonical: {first: Chao, last: Zhang}
-  comment: Tsinghua University
-  id: chao-zhang-tu
-- canonical: {first: Dan, last: Zhang}
-  comment: Tsinghua University
-  id: dan-zhang-tsinghua
-- canonical: {first: Dan, last: Zhang}
-  comment: May refer to several people
-  id: dan-zhang
-- canonical: {first: Fang-Fang, last: Zhang}
-  variants:
-  - {first: Fangfang, last: Zhang}
-- canonical: {first: Guiping, last: Zhang}
-  variants:
-  - {first: GuiPing, last: Zhang}
-- canonical: {first: Huarui, last: Zhang}
-  variants:
-  - {first: HuaRui, last: Zhang}
-- canonical: {first: Ke-Jia, last: Zhang}
-  variants:
-  - {first: Ke-Jia, last: Chang}
-- canonical: {first: Li, last: Zhang}
-  comment: University of Pennsylvania
-  id: li-zhang-upenn
-- canonical: {first: Li, last: Zhang}
-  comment: UC San Diego
-  id: li-zhang-ucsandiego
-- canonical: {first: Li, last: Zhang}
-  comment: UK
-  id: li-zhang-uk
-- canonical: {first: Li, last: Zhang}
-  comment: Google
-  id: li-zhang-gg
-- canonical: {first: Li, last: Zhang}
-  comment: AWS
-  id: li-zhang-aws
-- canonical: {first: Li, last: Zhang}
-  comment: IBM-china
-  id: li-zhang-ibmc
-- canonical: {first: Li, last: Zhang}
-  comment: Newcastle, UK
-  id: li-zhang-newcastle
-- canonical: {first: Li, last: Zhang}
-  comment: Teesside University
-  id: li-zhang-teesside
-- canonical: {first: Li, last: Zhang}
-  comment: Birmingham
-  id: li-zhang-birmingham
-- canonical: {first: Li, last: Zhang}
-  comment: Google
-  id: li-zhang-google
-- canonical: {first: Li, last: Zhang}
-  comment: Nankai
-  id: li-zhang-nankai
-- canonical: {first: Li, last: Zhang}
-  comment: Wuhan
-  id: li-zhang-wuhan
-- canonical: {first: Ranran Haoran, last: Zhang}
-  comment: Penn State University
-  id: ranran-haoran-zhang
-- canonical: {first: Weinan, last: Zhang}
-  variants:
-  - {first: Wei-Nan, last: Zhang}
-- canonical: {first: Xiuzhen (Jenny), last: Zhang}
-  variants:
-  - {first: Xiuzhen, last: Zhang}
-- canonical: {first: Yao-Zhong, last: Zhang}
-  id: yao-zhong-zhang
-  variants:
-  - {first: Yao Zhong, last: Zhang}
-  - {first: Yao-zhong, last: Zhang}
-- canonical: {first: Ying, last: Zhang}
-  variants:
-  - {first: Joy Ying, last: Zhang}
-- canonical: {first: Tiejun, last: Zhao}
-  variants:
-  - {first: TieJun, last: Zhao}
-  - {first: Tie-Jun, last: Zhao}
-  - {first: Tie-jun, last: Zhao}
-- canonical: {first: Wayne Xin, last: Zhao}
-  variants:
-  - {first: Xin, last: Zhao}
-- canonical: {first: Weina, last: Zhao}
-  variants:
-  - {first: Wei Na, last: Zhao}
-- canonical: {first: Yi-jing, last: Zhao}
-  variants:
-  - {first: Yi-Jing, last: Hao}
-- canonical: {first: Fang, last: Zheng}
-  variants:
-  - {first: Thomas Fang, last: Zheng}
-- canonical: {first: Jiaheng, last: Zheng}
-  variants:
-  - {first: Jia-heng, last: Zheng}
-- canonical: {first: Ze-yu, last: Zheng}
-  variants:
-  - {first: Zeyu, last: Zheng}
-- canonical: {first: Guodong, last: Zhou}
-  variants:
-  - {first: GuoDong, last: Zhou}
-- canonical: {first: Huiwei, last: Zhou}
-  variants:
-  - {first: HuiWei, last: Zhou}
-- canonical: {first: Joe, last: Zhou}
-  variants:
-  - {first: Joe F., last: Zhou}
-- canonical: {first: Yan-Zuo, last: Zhou}
-  variants:
-  - {first: Yen-zuo, last: Zhou}
-- canonical: {first: Zhi Min, last: Zhou}
-  variants:
-  - {first: Zhi-Min, last: Zhou}
-- canonical: {first: Kenny, last: Zhu}
-  variants:
-  - {first: Kenny Q., last: Zhu}
-- canonical: {first: Qiaoming, last: Zhu}
-  variants:
-  - {first: Qiao-ming, last: Zhu}
-  - {first: Qiao-Ming, last: Zhu}
-  - {first: QiaoMing, last: Zhu}
-- canonical: {first: Song-chun, last: Zhu}
-  variants:
-  - {first: Song-Chun, last: Zhu}
-- canonical: {first: Xiaojin, last: Zhu}
-  variants:
-  - {first: Xiaojin Jerry, last: Zhu}
-- canonical: {first: Janez, last: Zibert}
-  variants:
-  - {first: Janez, last: Žibert}
-- canonical: {first: Ute, last: Ziegenhain}
-  id: ute-ziegenhain
-- canonical: {first: Harald H., last: Zimmermann}
-  id: harald-h-zimmermann
-- canonical: {first: Cäcilia, last: Zirn}
-  variants:
-  - {first: Caecilia, last: Zirn}
-- canonical: {first: Arturs, last: Znotins}
-  variants:
-  - {first: Artūrs, last: Znotiņš}
-- canonical: {first: Chengqing, last: Zong}
-  variants:
-  - {first: Cheng-qing, last: Zong}
-- canonical: {first: Enrico, last: Zovato}
-  id: enrico-zovato
-- canonical: {first: Richard, last: Zuber}
-  id: richard-zuber
-- canonical: {first: Victor, last: Zue}
-  id: victor-zue
-  variants:
-  - {first: Victor W., last: Zue}
-- canonical: {first: Geoffrey, last: Zweig}
-  id: geoffrey-zweig
-  variants:
-  - {first: Geoff, last: Zweig}
-- canonical: {first: Pierre, last: Zweigenbaum}
-  id: pierre-zweigenbaum
-- canonical: {first: Iria, last: da Cunha}
-  id: iria-da-cunha
-- canonical: {first: William, last: de Beaumont}
-  variants:
-  - {first: Will, last: de Beaumont}
-- canonical: {first: Martine, last: de Calmès}
-  id: martine-de-calmes
-- canonical: {first: Guadalupe Aguado, last: de Cea}
-  variants:
-  - {first: Guadalupe, last: Aguado de Cea}
-  - {first: Guadalupe, last: Aguado-de-Cea}
-- canonical: {first: Ricardo, last: de Córdoba}
-  variants:
-  - {first: Ricardo, last: de Cordoba}
-- canonical: {first: Adrià, last: de Gispert}
-  variants:
-  - {first: Adrià, last: Gispert}
-  - {first: Adrià, last: De Gispert}
-- canonical: {first: Clément, last: de Groc}
-  variants:
-  - {first: Clément, last: De Groc}
-- canonical: {first: Vera Lucia Strube, last: de Lima}
-  variants:
-  - {first: Vera Lúcia Strube, last: de Lima}
-- canonical: {first: Céline, last: de Looze}
-  variants:
-  - {first: Céline, last: Delooze}
-  - {first: Céline, last: De Looze}
-  - {first: Celine, last: De Looze}
-- canonical: {first: Claude, last: de Loupy}
-  variants:
-  - {first: Claude, last: De Loupy}
-- canonical: {first: Carl, last: de Marcken}
-  variants:
-  - {first: Carl G., last: de Marcken}
-- canonical: {first: Marie-Catherine, last: de Marneffe}
-  variants:
-  - {first: Marie Catherine, last: de Marneffe}
-- canonical: {first: Paulo C F, last: de Oliveira}
-  variants:
-  - {first: Paulo C. F., last: de Oliveira}
-- canonical: {first: Valeria, last: de Paiva}
-  id: valeria-de-paiva
-- canonical: {first: Maarten, last: de Rijke}
-  variants:
-  - {first: Maarten, last: De Rijke}
-- canonical: {first: Folkert, last: de Vriend}
-  id: folkert-de-vriend
-- canonical: {first: Peter V., last: deSouza}
-  id: peter-v-desouza
-- canonical: {first: Louis, last: des Tombe}
-  id: louis-des-tombe
-- canonical: {first: Daniela Oliveira F., last: do Amaral}
-  variants:
-  - {first: Daniela O. F., last: do Amaral}
-- canonical: {first: Cicero, last: dos Santos}
-  variants:
-  - {first: Cícero, last: dos Santos}
-  - {first: Cícero Nogueira, last: dos Santos}
-  - {first: Cicero, last: Nogueira dos Santos}
-  - {first: Cícero, last: Nogueira dos Santos}
-- canonical: {first: Johan Adam, last: du Preez}
-  id: johan-adam-du-preez
-- canonical: {first: Hugo Van, last: hamme}
-  variants:
-  - {first: Hugo, last: Van hamme}
-- canonical: {first: Kees, last: van Deemter}
-  variants:
-  - {first: Kees, last: Van Deemter}
-- canonical: {first: Josef, last: van Genabith}
-  id: josef-van-genabith
-  variants:
-  - {first: Josef, last: Van Genabith}
-- canonical: {first: Willem Robert, last: van Hage}
-  variants:
-  - {first: Willem, last: Van Hage}
-  - {first: Willem, last: van Hage}
-- canonical: {first: Hans, last: van Halteren}
-  variants:
-  - {first: Hans, last: Van Halteren}
-- canonical: {first: Gerhard B., last: van Huyssteen}
-  variants:
-  - {first: Gerhard, last: Van Huyssteen}
-  - {first: Gerhard, last: van Huyssteen}
-  - {first: Gerhard B, last: van Huyssteen}
-- canonical: {first: Marcel P., last: van Lohuizen}
-  variants:
-  - {first: Marcel P., last: Van Lohuizen}
-- canonical: {first: Erik, last: van Mulligen}
-  variants:
-  - {first: Erik M., last: van Mulligen}
-- canonical: {first: Gertjan, last: van Noord}
-  variants:
-  - {first: Gertjan, last: Van Noord}
-- canonical: {first: Marten, last: van Schijndel}
-  variants:
-  - {first: Marten, last: Van Schijndel}
-  - {first: Martin, last: van Schijndel}
-- canonical: {first: Dieter, last: van Uytvanck}
-  variants:
-  - {first: Dieter, last: Van Uytvanck}
-- canonical: {first: Menno, last: van Zaanen}
-  variants:
-  - {first: Menno, last: van Zannen}
-- canonical: {first: Antal, last: van den Bosch}
-  variants:
-  - {first: Antal, last: Van den Bosch}
-  - {first: Antal, last: Van Den Bosch}
-- canonical: {first: Henk, last: van den Heuvel}
-  id: henk-van-den-heuvel
-- canonical: {first: Erik, last: van der Goot}
-  variants:
-  - {first: Erik, last: Van der Goot}
-- canonical: {first: P. H. J., last: van der Kamp}
-  variants:
-  - {first: P.H.J., last: van der Kamp}
-- canonical: {first: Lonneke, last: van der Plas}
-  variants:
-  - {first: Lonneke, last: Van Der Plas}
-- canonical: {first: Hennie, last: van der Vliet}
-  variants:
-  - {first: Hennie, last: VanderVliet}
-- canonical: {first: Rene, last: van der Wal}
-  variants:
-  - {first: René, last: van der Wal}
-  - {first: Rene, last: Van Der Wal}
-- canonical: {first: Walther, last: von Hahn}
-  variants:
-  - {first: Walther, last: v. Hahn}
-- canonical: {first: Katharina, last: von der Wense}
-  variants:
-  - {first: Katharina, last: Kann}
-- canonical: {first: Aitor, last: Álvarez}
-  variants:
-  - {first: Aitor, last: Arronte Álvarez}
-- canonical: {first: Ruket, last: Çakıcı}
-  variants:
-  - {first: Ruket, last: Cakici}
-  - {first: Ruken, last: Cakici}
-  - {first: Ruken, last: Çakıcı}
-- canonical: {first: Özlem, last: Çetinoğlu}
-  variants:
-  - {first: Ozlem, last: Cetinoglu}
-  - {first: Özlem, last: Çetinoglu}
-- canonical: {first: Haldur, last: Õim}
-  id: haldur-oim
-  variants:
-  - {first: Haldur, last: Oim}
-- canonical: {first: Hale, last: Ögel Balaban}
-  variants:
-  - {first: Hale, last: Ogel}
-- canonical: {first: Berkay Furkan, last: Önder}
-  variants:
-  - {first: Berkay, last: Önder}
-- canonical: {first: Annette, last: Östling Andersson}
-  variants:
-  - {first: Annette, last: Östling}
-- canonical: {first: Gözde, last: Özbal}
-  variants:
-  - {first: Gozde, last: Ozbal}
-- canonical: {first: Arzucan, last: Özgür}
-  variants:
-  - {first: Arzucan, last: Ozgur}
-- canonical: {first: Lilja, last: Øvrelid}
-  variants:
-  - {first: Lilja, last: Ovrelid}
-- canonical: {first: Damir, last: Ćavar}
-  variants:
-  - {first: Damir, last: Cavar}
-- canonical: {first: Matej, last: Ďurčo}
-  variants:
-  - {first: Matej, last: Durco}
-- canonical: {first: Ozan, last: İrsoy}
-  variants:
-  - {first: Ozan, last: Irsoy}
-- canonical: {first: Gözde Gül, last: Şahin}
-  variants:
-  - {first: Gözde, last: Şahin}
-  - {first: Gözde Gül, last: İşgüder}
-- canonical: {first: Gabriela, last: Şerban}
-  variants:
-  - {first: Gabriela, last: Serban}
-- canonical: {first: Octavia-Maria, last: Şulea}
-  variants:
-  - {first: Maria, last: Sulea}
-  - {first: Octavia-Maria, last: Sulea}
-  - {first: Maria-Octavia, last: Sulea}
-- canonical: {first: Jana, last: Šindlerová}
-  variants:
-  - {first: Jana, last: Sindlerova}
-- canonical: {first: Sanja, last: Štajner}
-  variants:
-  - {first: Sanja, last: Stajner}
-- canonical: {first: Zdeněk, last: Žabokrtský}
-  variants:
-  - {first: Zdenek, last: Zabokrtsky}
-  - {first: Zdenĕk, last: Žabokrtský}
-  - {first: Zdenek, last: Žabokrtsky}
-- canonical: {first: Lukáš, last: Žilka}
-  variants:
-  - {first: Lukas, last: Zilka}
-- canonical: {first: Anirudh, last: Sundar}
-  variants:
-  - {first: Anirudh S., last: Sundar}
-  - {first: Anirudh S, last: Sundar}
-- canonical: {first: Cong, last: Liu}
-  comment: Florida Atlantic University
-  id: cong-liu-fau
-- canonical: {first: Cong, last: Liu}
-  comment: May refer to several people
-  id: cong-liu
-- canonical: {first: Cong, last: Liu}
-  comment: University of California, Riverside
-  id: cong-liu-ucr
-- canonical: {first: Cong, last: Liu}
-  comment: iFLYTEK Research
-  id: cong-liu-iflytek
-- canonical: {first: Kyuyoung, last: Kim}
-  variants:
-  - {first: Kyu-Young, last: Kim}
-- canonical: {first: Jann Railey, last: Montalan}
-  id: jann-railey-montalan
-  variants:
-  - {first: Jann, last: Montalan}
-  - {first: Railey, last: Montalan}
-  - {first: Jann Railey E., last: Montalan}
-- canonical: {first: R. Thomas, last: McCoy}
-  id: r-thomas-mccoy
-  variants:
-  - {first: Tom, last: McCoy}
-- canonical: {first: Kun, last: Zhang}
-  comment: University of Science and Technology of China
-  id: kun-zhang-ustc
-- canonical: {first: Kun, last: Zhang}
-  comment: Inria Saclay-Île-de-France
-  id: kun-zhang-inria
-- canonical: {first: Kun, last: Zhang}
-  comment: University of Chinese Academy of Sciences
-  id: kun-zhang-ucas
-- canonical: {first: Kun, last: Zhang}
-  comment: May refer to multiple people
-  id: kun-zhang
-- canonical: {first: Xuan Long, last: Do}
-  variants:
-  - {first: Do Xuan, last: Long}
-- canonical: {first: Jian, last: Chen}
-  comment: May refer to several people
-  id: jian-chen
-- canonical: {first: Jian, last: Chen}
-  comment: University at Buffalo
-  id: jian-chen-ub
-- canonical: {first: Hannah, last: Cyberey}
-  id: hannah-cyberey
-  variants:
-  - {first: Hannah, last: Chen}
-- canonical: {first: Lester James Validad, last: Miranda}
-  id: lester-james-validad-miranda
-  variants:
-  - {first: Lester James, last: Miranda}
-- canonical: {first: Marten, last: During}
-  comment: University of Luxembourg
-  id: marten-during-ul
-- canonical: {first: Marten, last: During}
-  comment: May refer to several people
-  id: marten-during
-- canonical: {first: Börje F., last: Karlsson}
-  variants:
-  - {first: Börje, last: Karlsson}
-  comment: https://github.com/acl-org/acl-anthology/issues/4041
-  orcid: 0000-0001-8925-360X
-  degree: PUC-Rio
-- canonical: {first: Saptarshi, last: Ghosh}
-  id: saptarshi-ghosh-cincinnati
-  degree: University of Cincinnati
-  orcid: 0009-0006-9472-7121
-- canonical: {first: Saptarshi, last: Ghosh}
-  comment: May refer to several people
-  id: saptarshi-ghosh
-- canonical: {first: Mayank, last: Singh}
-  comment: University of Arizona
-  id: mayank-singh-az
-- canonical: {first: Mayank, last: Singh}
-  comment: May refer to several people
-  id: mayank-singh
-- canonical: {first: Takumi, last: Goto}
-  variants:
-  - {first: Takumi, last: Gotou}
-  id: 0009-0006-8124-899X
-  degree: Nara Institute of Science and Technology
-- canonical: {first: Muhammad N., last: ElNokrashy}
-  id: muhammad-elnokrashy
-  variants:
-  - {first: Muhammad, last: ElNokrashy}
-  - {first: Muhammad Nael, last: ElNokrashy}
-- canonical: {first: Nishat, last: Raihan}
-  orcid: 0000-0001-6242-398X
-  variants:
-  - {first: Md Nishat, last: Raihan}
-- canonical: {first: Ona, last: de Gibert}
-  id: ona-de-gibert
-  variants:
-  - {first: Ona, last: de Gibert Bonet}
-  orcid: 0000-0002-7163-4807
-  degree: University of Helsinki, Finland
-- canonical: {first: Wenzheng, last: Zhang}
-  comment: Rutgers University
-  orcid: 0009-0009-2578-9224
-  id: wenzheng-zhang-ru
-- canonical: {first: Wenzheng, last: Zhang}
-  comment: May refer to several people
-  id: wenzheng-zhang
-- canonical: {first: Zhengyan, last: Shi}
-  orcid: 0000-0003-3074-3035
-  degree: University College London
-  variants:
-  - {first: Zhengxiang, last: Shi}
-- canonical: {first: Shu, last: Yang}
-  comment: University of British Columbia
-  orcid: 0000-0002-8507-7191
-  id: shu-yang-ubc
-- canonical: {first: Shu, last: Yang}
-  comment: May refer to several people
-  id: shu-yang
-- canonical: {first: Chen, last: Cecilia Liu}
-  id: chen-cecilia-liu
-  orcid: 0009-0004-2382-8609
-  comment: Technische Universität Darmstadt
-- canonical: {first: Chen, last: Liu}
-  comment: May refer to several people
-  id: chen-liu
-- canonical: {first: Li, last: Lin}
-  degree: Peking University
-  orcid: 0009-0008-5072-5022
-  id: li-lin-pku
-- canonical: {first: Li, last: Lin}
-  comment: May refer to multiple people
-  id: li-lin
-- canonical: {first: Junyu, last: Luo}
-  degree: Peking University
-  orcid: 0009-0001-6894-1144
-  id: junyu-luo-pu
-- canonical: {first: Junyu, last: Luo}
-  comment: May refer to multiple people
-  id: junyu-luo
-- canonical: {first: Zhihao, last: Wang}
-  degree: Xiamen University
-  orcid: 0009-0008-7497-6467
-  id: zhihao-wang-xu
-- canonical: {first: Zhihao, last: Wang}
-  comment: May refer to multiple people
-  id: zhihao-wang
-- canonical: {first: Ryan, last: Boyd}
-  orcid: 0000-0002-1876-6050
-  degree: University of Texas at Austin
-  variants:
-  - {first: Ryan L., last: Boyd}
-- canonical: {first: Qi, last: Li}
-  degree: University at Buffalo
-  orcid: 0000-0002-3136-2157
-  id: qi-li-ub
-- canonical: {first: Qi, last: Li}
-  comment: May refer to multiple people
-  id: qi-li
-- canonical: {first: Zhihan, last: Zhang}
-  degree: Singapore Management University
-  orcid: 0009-0009-5813-9172
-  id: zhihan-zhang-smu
-- canonical: {first: Zhihan, last: Zhang}
-  comment: May refer to multiple people
-  id: zhihan-zhang
-- canonical: {first: Ning, last: Liu}
-  degree: Tsinghua University
-  orcid: 0000-0001-7475-9739
-  id: ning-liu-tsinghua
-- canonical: {first: Ning, last: Liu}
-  comment: May refer to multiple people
-  id: ning-liu
-- canonical: {first: Changye, last: Li}
-  degree: University of Minnesota
-  orcid: 0000-0002-9743-7406
-  id: changye-li-umn
-- canonical: {first: Changye, last: Li}
-  comment: May refer to multiple people
-  id: changye-li
-- canonical: {first: Ya, last: Li}
-  degree: Chinese Academy of Sciences
-  orcid: 0000-0002-6284-5039
-  id: ya-li-cas
-- canonical: {first: Ya, last: Li}
-  comment: May refer to multiple people
-  id: ya-li
-- canonical: {first: Yue, last: Li}
-  degree: East China Normal University
-  orcid: 0009-0005-5509-2103
-  id: yue-li-ecnu
-- canonical: {first: Yue, last: Li}
-  comment: May refer to multiple people
-  id: yue-li
-- canonical: {first: Lu, last: Xu}
-  degree: Sapienza University of Rome
-  orcid: 0000-0002-5660-3631
-  id: lu-xu-uniroma1
-- canonical: {first: Lu, last: Xu}
-  comment: May refer to multiple people
-  id: lu-xu
-- canonical: {first: Jiahao, last: Yuan}
-  degree: East China Normal University
-  orcid: 0009-0002-6194-450X
-  id: jiahao-yuan-ecnu
-- canonical: {first: Jiahao, last: Yuan}
-  comment: May refer to multiple people
-  id: jiahao-yuan
-- canonical: {first: Chong, last: Zhang}
-  degree: Xi'an Jiaotong-Liverpool University
-  orcid: 0009-0003-2020-6989
-  id: chong-zhang-xjtlu
-- canonical: {first: Chong, last: Zhang}
-  comment: May refer to multiple people
-  id: chong-zhang
-- canonical: {first: Xinpeng, last: Wang}
-  degree: Ludwig Maximilian University of Munich (LMU)
-  orcid: 0009-0006-5213-1119
-  id: xinpeng-wang-lmu
-- canonical: {first: Xinpeng, last: Wang}
-  comment: May refer to multiple people
-  id: xinpeng-wang
-- canonical: {first: Shengjie, last: Li}
-  comment: University of Texas at Dallas
-  id: shengjie-li
-  orcid: 0000-0002-5442-5464
-- canonical: {first: Shengjie, last: Li}
-  id: shengjie-li-peking
-  comment: Peking University
-  orcid: 0000-0003-3489-9125
-- canonical: {first: Shashank, last: Gupta}
-  id: shashank-gupta-uiuc
-  orcid: 0000-0002-3683-3739
-  institution: University of Illinois at Urbana-Champaign
-- canonical: {first: Shashank, last: Gupta}
-  id: shashank-gupta
-  comment: "May refer to several people"
-- canonical: {first: Chen, last: Zhang}
-  id: chen-zhang-peking
-  orcid: 0000-0001-5842-0516
-  institution: Peking University
-- canonical: {first: Chen, last: Zhang}
-  id: chen-zhang
-  comment: May refer to several people
diff --git a/data/yaml/people.yaml b/data/yaml/people.yaml
new file mode 100644
index 0000000000..91d7f9480b
--- /dev/null
+++ b/data/yaml/people.yaml
@@ -0,0 +1,15340 @@
+a-akilandeswari:
+  names:
+  - {first: A., last: Akilandeswari}
+  - {first: Akilandeswari, last: A}
+a-j-m-szanser:
+  names:
+  - {first: A.J.M., last: Szanser}
+  - {first: A.J., last: Szanser}
+a-kumaran:
+  names:
+  - {first: A, last: Kumaran}
+  - {first: A., last: Kumaran}
+aarno-lehtola:
+  names:
+  - {first: Aarno, last: Lehtola}
+  - {first: A., last: Lehtola}
+aaron-j-masino:
+  names:
+  - {first: Aaron J., last: Masino}
+  - {first: Aaron, last: Masino}
+abdelhak-mouradi:
+  names:
+  - {first: Abdelhak, last: Mouradi}
+  - {first: A., last: Mouradi}
+abdelmajid-ben-hamadou:
+  names:
+  - {first: Abdelmajid, last: Ben Hamadou}
+  - {first: Abdelmajid, last: Ben hamadou}
+  - {first: Abdelmajid, last: Benhamadou}
+  - {first: Abdelmajid-Lin, last: Ben Hamadou}
+abderrahim-benabbou:
+  names:
+  - {first: Abderrahim, last: Benabbou}
+  - {first: A., last: Benabbou}
+abdessalam-bouchekif:
+  names:
+  - {first: Abdessalam, last: Bouchekif}
+  - {first: Abdesselam, last: Bouchekif}
+abdul-baquee-sharaf:
+  names:
+  - {first: Abdul-Baquee, last: Sharaf}
+  - {first: Abdul-Baquee M., last: Sharaf}
+abe-ittycheriah:
+  names:
+  - {first: Abe, last: Ittycheriah}
+  - {first: A., last: Ittycheriah}
+abhay-l-kashyap:
+  names:
+  - {first: Abhay, last: L. Kashyap}
+  - {first: Abhay, last: Kashyap}
+abhyuday-jagannatha:
+  names:
+  - {first: Abhyuday, last: Jagannatha}
+  - {first: Abhyuday N, last: Jagannatha}
+abigail-s-gertner:
+  names:
+  - {first: Abigail S., last: Gertner}
+  - {first: Abigail, last: Gertner}
+achille-fokoue-nkoutche:
+  names:
+  - {first: Achille, last: Fokoue-Nkoutche}
+  - {first: Achille, last: Fokoue}
+achla-m-raina:
+  names:
+  - {first: Achla M., last: Raina}
+  - {first: Achla, last: Raina}
+  - {first: Achla M, last: Raina}
+adam-berger:
+  names:
+  - {first: Adam, last: Berger}
+  - {first: Adam L., last: Berger}
+adam-cheyer:
+  names:
+  - {first: Adam, last: Cheyer}
+  - {first: A., last: Cheyer}
+adam-liska:
+  names:
+  - {first: Adam, last: Liska}
+  - {first: Adam, last: Liška}
+adam-meyers:
+  names:
+  - {first: Adam, last: Meyers}
+  - {first: A., last: Meyers}
+adams-b-bodomo:
+  names:
+  - {first: Adams B., last: Bodomo}
+  - {first: Adams, last: Bodomo}
+adele-goldberg:
+  names:
+  - {first: Adele, last: Goldberg}
+  - {first: Adele E., last: Goldberg}
+adeline-nazarenko:
+  names:
+  - {first: Adeline, last: Nazarenko}
+  - {first: Adeline, last: Nazarenko-Perrin}
+  - {first: A., last: Nazarenko}
+adi-shalev:
+  names:
+  - {first: Adi, last: Shalev}
+  - {first: Adi, last: Bitan}
+adil-al-kufaishi:
+  names:
+  - {first: Adil, last: Al-Kufaishi}
+  - {first: A., last: Al-Kufaishi}
+adil-el-ghali:
+  names:
+  - {first: Adil, last: El Ghali}
+  - {first: Adil, last: El-Ghali}
+adolfo-hernandez-h:
+  names:
+  - {first: Adolfo, last: Hernández H.}
+  - {first: Adolfo, last: Hernández}
+adoram-erell:
+  names:
+  - {first: Adoram, last: Erell}
+  - {first: A., last: Erell}
+adria-de-gispert:
+  names:
+  - {first: Adrià, last: de Gispert}
+  - {first: Adrià, last: Gispert}
+  - {first: Adrià, last: De Gispert}
+adria-torrens-urrutia:
+  names:
+  - {first: Adrià, last: Torrens Urrutia}
+  - {first: Adrià, last: Torrens-Urrutia}
+adrian-brasoveanu:
+  names:
+  - {first: Adrian, last: Braşoveanu}
+  - {first: Adrian, last: Brasoveanu}
+adriana-badulescu:
+  names:
+  - {first: Adriana, last: Badulescu}
+  - {first: Adriana, last: Bădulescu}
+adwait-ratnaparkhi:
+  names:
+  - {first: Adwait, last: Ratnaparkhi}
+  - {first: A., last: Ratnaparkhi}
+aesun-yoon:
+  names:
+  - {first: Aesun, last: Yoon}
+  - {first: Ae sun, last: Yoon}
+  - {first: Ae-Sun, last: Yoon}
+agata-cybulska:
+  names:
+  - {first: Agata, last: Cybulska}
+  - {first: Agata Katarzyna, last: Cybulska}
+agha-ali-raza:
+  names:
+  - {first: Agha Ali, last: Raza}
+  - {first: Agha, last: Raza}
+agnes-sandor:
+  names:
+  - {first: Ágnes, last: Sándor}
+  - {first: Agnes, last: Sandor}
+agnes-tutin:
+  names:
+  - {first: Agnès, last: Tutin}
+  - {first: Agnes, last: Tutin}
+agnieszka-falenska:
+  names:
+  - {first: Agnieszka, last: Falenska}
+  - {first: Agnieszka, last: Faleńska}
+agusti-lloberas:
+  names:
+  - {first: Agusti, last: Lloberas}
+  - {first: Agusti, last: LLoberas}
+agustin-gravano:
+  names:
+  - {first: Agustin, last: Gravano}
+  - {first: Agustín, last: Gravano}
+ahmed-aburaed:
+  names:
+  - {first: Ahmed, last: AbuRa’ed}
+  - {first: Ahmed, last: Abura’ed}
+ahmed-hassan:
+  names:
+  - {first: Ahmed, last: Hassan}
+  - {first: Ahmed Hassan, last: Awadallah}
+ahmed-ragheb:
+  names:
+  - {first: Ahmed, last: Ragheb}
+  - {first: A., last: Ragheb}
+ahmet-cuneyd-tantug:
+  names:
+  - {first: Ahmet Cüneyd, last: Tantuğ}
+  - {first: A. Cüneyd, last: Tantuǧ}
+aicha-bouhjar:
+  names:
+  - {first: Aicha, last: Bouhjar}
+  - {first: Aïcha, last: Bouhjar}
+aimilios-chalamandaris:
+  names:
+  - {first: Aimilios, last: Chalamandaris}
+  - {first: Chalamandaris, last: Aimilios}
+  - {first: A., last: Chalamandaris}
+aina-gari-soler:
+  names:
+  - {first: Aina, last: Garí Soler}
+  - {first: Aina Garí, last: Soler}
+aingeru-mayor:
+  names:
+  - {first: Aingeru, last: Mayor}
+  - {first: A., last: Mayor}
+aiti-aw:
+  names:
+  - {first: Aiti, last: Aw}
+  - {first: AiTi, last: Aw}
+  - {first: Ai Ti, last: Aw}
+aitor-alvarez:
+  names:
+  - {first: Aitor, last: Álvarez}
+  - {first: Aitor, last: Arronte Álvarez}
+aitor-gonzalez-agirre:
+  names:
+  - {first: Aitor, last: González-Agirre}
+  - {first: Aitor, last: Gonzalez-Agirre}
+aitor-sologaistoa:
+  names:
+  - {first: Aitor, last: Sologaistoa}
+  - {first: A., last: Sologaistoa}
+aitor-soroa:
+  names:
+  - {first: Aitor, last: Soroa}
+  - {first: Aitor, last: Soroa Etxabe}
+  - {first: A., last: Soroa}
+akash-kumar-gautam:
+  names:
+  - {first: Akash Kumar, last: Gautam}
+  - {first: Akash, last: Gautam}
+akiko-aizawa:
+  names:
+  - {first: Akiko, last: Aizawa}
+  - {first: Akiko N., last: Aizawa}
+akshar-bharati:
+  names:
+  - {first: Akshar, last: Bharati}
+  - {first: Akshar, last: Bharathi}
+alain-polguere:
+  names:
+  - {first: Alain, last: Polguère}
+  - {first: Alain, last: Polguere}
+  - {first: A., last: Polguere}
+alan-k-melby:
+  names:
+  - {first: Alan K., last: Melby}
+  - {first: Alan, last: Melby}
+alan-r-aronson:
+  names:
+  - {first: Alan R., last: Aronson}
+  - {first: Alan, last: Aronson}
+alan-w-biermann:
+  names:
+  - {first: Alan W., last: Biermann}
+  - {first: Alan, last: Biermann}
+  - {first: A., last: Biermann}
+alan-w-black:
+  names:
+  - {first: Alan W., last: Black}
+  - {first: Alan, last: Black}
+  - {first: Alan W, last: Black}
+  - {first: A.W., last: Black}
+alan-wallington:
+  names:
+  - {first: Alan, last: Wallington}
+  - {first: Alan M., last: Wallington}
+  - {first: A.M., last: Wallington}
+albert-a-rizzo:
+  names:
+  - {first: Albert A., last: Rizzo}
+  - {first: Albert, last: Rizzo}
+  - {first: Skip, last: Rizzo}
+  - {first: Albert Skip, last: Rizzo}
+albert-m-lai:
+  names:
+  - {first: Albert M., last: Lai}
+  - {first: Albert, last: Lai}
+  - {first: Albert M, last: Lai}
+albert-russel:
+  names:
+  - {first: Albert, last: Russel}
+  - {first: A., last: Russel}
+alberto-bugarin-diz:
+  names:
+  - {first: Alberto, last: Bugarín Diz}
+  - {first: Alberto, last: Bugarín}
+  - {first: Alberto, last: Bugarin}
+alberto-garcia-duran:
+  names:
+  - {first: Alberto, last: Garcia-Duran}
+  - {first: Alberto, last: García-Durán}
+alberto-lavelli:
+  names:
+  - {first: Alberto, last: Lavelli}
+  - {first: A., last: Lavelli}
+alberto-maritxalar:
+  names:
+  - {first: Alberto, last: Maritxalar}
+  - {first: A., last: Maritxalar}
+albino-nogueiras:
+  names:
+  - {first: Albino, last: Nogueiras}
+  - {first: Albino Nogueiras, last: Rodriguez}
+alejandro-h-toselli:
+  names:
+  - {first: Alejandro H., last: Toselli}
+  - {first: Alejandro Héctor, last: Toselli}
+aleksandra-zogling-markus:
+  names:
+  - {first: Aleksandra Zögling, last: Markuš}
+  - {first: Aleksandra, last: Zögling}
+alena-bohmova:
+  names:
+  - {first: Alena, last: Bŏhmová}
+  - {first: Alena, last: Bohmova}
+  - {first: Alena, last: Böhmová}
+alessandro-mazzei:
+  names:
+  - {first: Alessandro, last: Mazzei}
+  - {first: A, last: Mazzei}
+alex-chengyu-fang:
+  names:
+  - {first: Alex Chengyu, last: Fang}
+  - {first: Alex C., last: Fang}
+alex-fine:
+  names:
+  - {first: Alex, last: Fine}
+  - {first: Alex B., last: Fine}
+alex-klassmann:
+  names:
+  - {first: Alex, last: Klassmann}
+  - {first: Alexander, last: Klassmann}
+alex-liu:
+  names:
+  - {first: Alex, last: Liu}
+  - {first: Alexander, last: Liu}
+alex-moruz:
+  names:
+  - {first: Alex, last: Moruz}
+  - {first: Mihai Alex, last: Moruz}
+alex-r-atrio:
+  names:
+  - {first: Àlex R., last: Atrio}
+  - {first: Àlex, last: Atrio}
+alex-rudnick:
+  names:
+  - {first: Alex, last: Rudnick}
+  similar:
+  - alexander-rudnicky
+alex-waibel:
+  names:
+  - {first: Alex, last: Waibel}
+  - {first: Alexander, last: Waibel}
+  - {first: A., last: Waibel}
+alexa-n-little:
+  names:
+  - {first: Alexa N., last: Little}
+  - {first: Alexa, last: Little}
+alexander-andreyewsky:
+  comment: IBM
+  names:
+  - {first: Alexander, last: Andreyewsky}
+  - {first: A., last: Andreyewsky}
+  similar:
+  - alexandre-andreewsky
+alexander-berg:
+  names:
+  - {first: Alexander, last: Berg}
+  - {first: Alex, last: Berg}
+  - {first: Alexander C, last: Berg}
+alexander-franz:
+  names:
+  - {first: Alexander, last: Franz}
+  - {first: Alexander M., last: Franz}
+alexander-fraser:
+  names:
+  - {first: Alexander, last: Fraser}
+  - {first: Alex, last: Fraser}
+alexander-g-hauptmann:
+  names:
+  - {first: Alexander G., last: Hauptmann}
+  - {first: Alex, last: Hauptmann}
+  - {first: Alexander, last: Hauptmann}
+alexander-gelbukh:
+  names:
+  - {first: Alexander, last: Gelbukh}
+  - {first: Alexander F., last: Gelbukh}
+alexander-m-rush:
+  names:
+  - {first: Alexander M., last: Rush}
+  - {first: Alexander, last: Rush}
+alexander-richard-fabbri:
+  names:
+  - {first: Alexander, last: Richard Fabbri}
+  - {first: Alexander R., last: Fabbri}
+  - {first: Alexander, last: Fabbri}
+alexander-rudnicky:
+  names:
+  - {first: Alexander, last: Rudnicky}
+  - {first: Alexander I., last: Rudnicky}
+  - {first: Alex, last: Rudnicky}
+  - {first: A., last: Rudnicky}
+  similar:
+  - alex-rudnick
+alexander-vasserman:
+  names:
+  - {first: Alexander, last: Vasserman}
+  - {first: Alex, last: Vasserman}
+alexander-yeh:
+  names:
+  - {first: Alexander, last: Yeh}
+  - {first: Alexander S., last: Yeh}
+  - {first: Alex, last: Yeh}
+alexandra-balahur:
+  names:
+  - {first: Alexandra, last: Balahur}
+  - {first: Alexandra, last: Balahur-Dobrescu}
+alexandra-l-uitdenbogerd:
+  names:
+  - {first: Alexandra L., last: Uitdenbogerd}
+  - {first: Alexandra, last: Uitdenbogerd}
+alexandre-andreewsky:
+  comment: LIMSI
+  names:
+  - {first: Alexandre, last: Andreewsky}
+  - {first: A., last: Andreewsky}
+  similar:
+  - alexander-andreyewsky
+alexandre-bouchard-cote:
+  names:
+  - {first: Alexandre, last: Bouchard-Côté}
+  - {first: Alexandre, last: Bouchard}
+alexandre-denis:
+  names:
+  - {first: Alexandre, last: Denis}
+  - {first: A., last: Denis}
+alexandre-rossi-alvares:
+  names:
+  - {first: Alexandre, last: Rossi Alvares}
+  - {first: Alexandre Rossi, last: Alvares}
+alexandre-termier:
+  names:
+  - {first: Alexandre, last: Termier}
+  - {first: A., last: Termier}
+alexandros-papangelis:
+  names:
+  - {first: Alexandros, last: Papangelis}
+  - {first: Alex, last: Papangelis}
+alexandru-ceausu:
+  names:
+  - {first: Alexandru, last: Ceauşu}
+  - {first: Alexandru, last: Ceausu}
+alexandru-lucian-ginsca:
+  names:
+  - {first: Alexandru-Lucian, last: Ginsca}
+  - {first: Alexandru, last: Ginsca}
+  - {first: Alexandru-Lucian, last: Gînscă}
+alexei-v-ivanov:
+  names:
+  - {first: Alexei V., last: Ivanov}
+  - {first: Alexei, last: Ivanov}
+alexis-konstantinidis:
+  names:
+  - {first: Alexis, last: Konstantinidis}
+  - {first: Alexis, last: Konstandinidis}
+alexis-manaster-ramer:
+  names:
+  - {first: Alexis, last: Manaster-Ramer}
+  - {first: Alexis, last: Manaster Ramer}
+alfio-gliozzo:
+  names:
+  - {first: Alfio, last: Gliozzo}
+  - {first: Alfio, last: Massimiliano Gliozzo}
+  - {first: Alfio Massimiliano, last: Gliozzo}
+  - {first: Alfio M., last: Gliozzo}
+alfons-juan:
+  names:
+  - {first: Alfons, last: Juan}
+  - {first: Alfons, last: Juan-Císcar}
+alfonso-mendes:
+  names:
+  - {first: Alfonso, last: Mendes}
+  - {first: Afonso, last: Mendes}
+alfredo-maldonado:
+  names:
+  - {first: Alfredo, last: Maldonado}
+  - {first: Alfredo, last: Maldonado Guerra}
+  - {first: Alfredo, last: Maldonado-Guerra}
+ali-hadian-cefidekhanie:
+  names:
+  - {first: Ali Hadian, last: Cefidekhanie}
+  - {first: Ali, last: Hadian}
+ali-hurriyetoglu:
+  names:
+  - {first: Ali, last: Hürriyetoğlu}
+  - {first: Ali, last: Hurriyetoglu}
+  - {first: Ali, last: Hürriyetoǧlu}
+alice-oh:
+  names:
+  - {first: Alice, last: Oh}
+  - {first: Alice H., last: Oh}
+alina-beatrice-lorent:
+  names:
+  - {first: Alina Beatrice, last: Lorent}
+  - {first: Alina Beatrice, last: Lorenţ}
+  - {first: Alina, last: Lorenț}
+alina-maria-ciobanu:
+  names:
+  - {first: Alina Maria, last: Ciobanu}
+  - {first: Alina, last: Ciobanu}
+aline-a-vanin:
+  names:
+  - {first: Aline A., last: Vanin}
+  - {first: Aline, last: Vanin}
+alipio-jorge:
+  names:
+  - {first: Alipio, last: Jorge}
+  - {first: Alípio, last: Jorge}
+allen-b-tucker:
+  names:
+  - {first: Allen B., last: Tucker}
+  - {first: Allen, last: Tucker}
+allen-l-gorin:
+  names:
+  - {first: Allen L., last: Gorin}
+  - {first: Allen, last: Gorin}
+almut-silja-hildebrand:
+  names:
+  - {first: Almut Silja, last: Hildebrand}
+  - {first: Silja, last: Hildebrand}
+  - {first: Almut, last: Hildebrand}
+alon-lavie:
+  names:
+  - {first: Alon, last: Lavie}
+  - {first: A., last: Lavie}
+alvaro-peris:
+  names:
+  - {first: Álvaro, last: Peris}
+  - {first: Alvaro, last: Peris}
+alvaro-rodrigo:
+  names:
+  - {first: Álvaro, last: Rodrigo}
+  - {first: Alvaro, last: Rodrigo}
+alvin-cheng-hsien-chen:
+  names:
+  - {first: Alvin Cheng-Hsien, last: Chen}
+  - {first: Cheng-Hsien, last: Chen}
+alvin-martin:
+  names:
+  - {first: Alvin, last: Martin}
+  - {first: Alvin F., last: Martin}
+amac-herdagdelen:
+  names:
+  - {first: Amaç, last: Herdaǧdelen}
+  - {first: Amaç, last: Herdağdelen}
+amal-al-saif:
+  names:
+  - {first: Amal, last: Al-Saif}
+  - {first: Amal, last: Alsaif}
+amalia-todirascu:
+  names:
+  - {first: Amalia, last: Todirascu}
+  - {first: Amalia, last: Todiraşcu}
+amanda-c-jobbins:
+  names:
+  - {first: Amanda C., last: Jobbins}
+  - {first: A.C., last: Jobbins}
+amanda-stent:
+  names:
+  - {first: Amanda, last: Stent}
+  - {first: Amanda J., last: Stent}
+  - {first: A., last: Stent}
+amarnag-subramanya:
+  names:
+  - {first: Amarnag, last: Subramanya}
+  - {first: Amar, last: Subramanya}
+amber-boydstun:
+  names:
+  - {first: Amber, last: Boydstun}
+  - {first: Amber E., last: Boydstun}
+amedeo-cappelli:
+  names:
+  - {first: Amedeo, last: Cappelli}
+  - {first: A., last: Cappelli}
+amelie-wuhrl:
+  names:
+  - {first: Amelie, last: Wührl}
+  - {first: Amelie, last: Wuehrl}
+aminul-islam:
+  names:
+  - {first: Aminul, last: Islam}
+  - {first: Md. Aminul, last: Islam}
+amparo-elizabeth-cano-basave:
+  names:
+  - {first: Amparo Elizabeth, last: Cano Basave}
+  - {first: Amparo Elizabeth, last: Cano-Basave}
+amy-weinberg:
+  names:
+  - {first: Amy, last: Weinberg}
+  - {first: Amy S., last: Weinberg}
+amy-winarske:
+  names:
+  - {first: Amy, last: Winarske}
+  - {first: A., last: Winarske}
+an-ta-huang:
+  names:
+  - {first: An-Ta, last: Huang}
+  - {first: Anta, last: Huang}
+ana-cristina-mendes:
+  names:
+  - {first: Ana Cristina, last: Mendes}
+  - {first: Ana C., last: Mendes}
+  - {first: Ana, last: Mendes}
+ana-fernandez:
+  names:
+  - {first: Ana, last: Fernandez}
+  - {first: Ana Fernández, last: Montraveta}
+  - {first: Ana, last: Fernández-Montraveta}
+ana-garcia-serrano:
+  names:
+  - {first: Ana, last: García-Serrano}
+  - {first: Ana M., last: García-Serrano}
+ana-gonzalez-ledesma:
+  names:
+  - {first: Ana, last: González-Ledesma}
+  - {first: Ana, last: Gonzalez}
+anais-cadilhac:
+  names:
+  - {first: Anais, last: Cadilhac}
+  - {first: Anaïs, last: Cadilhac}
+anais-lefeuvre:
+  names:
+  - {first: Anaïs, last: Lefeuvre}
+  - {first: Anaïs, last: Lefeuvre-Haftermeyer}
+anand-kumar-m:
+  names:
+  - {first: Anand, last: Kumar M}
+  - {first: Anand Kumar, last: Madasamy}
+  - {first: Anand Kumar, last: M}
+ananth-ramakrishnan-a:
+  names:
+  - {first: Ananth, last: Ramakrishnan A.}
+  - {first: Ananth, last: Ramakrishnan A}
+anas-el-isbihani:
+  names:
+  - {first: Anas El, last: Isbihani}
+  - {first: Anas, last: El Isbihani}
+anatoli-starostin:
+  names:
+  - {first: Anatoli, last: Starostin}
+  - {first: Anatoly, last: Starostin}
+anca-roxana-simon:
+  names:
+  - {first: Anca-Roxana, last: Simon}
+  - {first: Anca, last: Simon}
+  - {first: Anca-Roxana, last: Şimon}
+anders-johannsen:
+  names:
+  - {first: Anders, last: Johannsen}
+  - {first: Anders, last: Johanssen}
+anders-sogaard:
+  names:
+  - {first: Anders, last: Søgaard}
+  - {first: Anders, last: Sogaard}
+andoni-valverde:
+  names:
+  - {first: Andoni, last: Valverde}
+  - {first: A., last: Valverde}
+andras-csomai:
+  names:
+  - {first: Andras, last: Csomai}
+  - {first: András, last: Csomai}
+andras-kocsor:
+  names:
+  - {first: Andras, last: Kocsor}
+  - {first: András, last: Kocsor}
+andre-blessing:
+  names:
+  - {first: André, last: Blessing}
+  - {first: Andre, last: Blessing}
+andre-f-t-martins:
+  names:
+  - {first: André F. T., last: Martins}
+  - {first: Andre, last: Martins}
+  - {first: André, last: Martins}
+andre-freitas:
+  names:
+  - {first: André, last: Freitas}
+  - {first: Andre, last: Freitas}
+andre-kempe:
+  names:
+  - {first: Andre, last: Kempe}
+  - {first: André, last: Kempe}
+andre-lamurias:
+  names:
+  - {first: André, last: Lamúrias}
+  - {first: Andre, last: Lamurias}
+andre-mansikkaniemi:
+  names:
+  - {first: Andre, last: Mansikkaniemi}
+  - {first: André, last: Mansikkaniemi}
+andre-mariotti:
+  names:
+  - {first: Andre, last: Mariotti}
+  - {first: André, last: Mariotti}
+andre-valli:
+  names:
+  - {first: Andre, last: Valli}
+  - {first: André, last: Valli}
+andrea-bolognesi:
+  names:
+  - {first: Andrea, last: Bolognesi}
+  - {first: A., last: Bolognesi}
+andrea-setzer:
+  names:
+  - {first: Andrea, last: Setzer}
+  - {first: A., last: Setzer}
+andreas-eisele:
+  names:
+  - {first: Andreas, last: Eisele}
+  - {first: A., last: Eisele}
+andreas-kustner:
+  names:
+  - {first: Andreas, last: Kustner}
+  - {first: A., last: Kustner}
+andreas-persidis:
+  names:
+  - {first: Andreas, last: Persidis}
+  - {first: A., last: Persidis}
+andreas-soeborg-kirkedal:
+  names:
+  - {first: Andreas Søeborg, last: Kirkedal}
+  - {first: Andreas, last: Søeborg Kirkedal}
+andreas-stolcke:
+  names:
+  - {first: Andreas, last: Stolcke}
+  - {first: A., last: Stolcke}
+andrei-butnaru:
+  names:
+  - {first: Andrei, last: Butnaru}
+  - {first: Andrei M., last: Butnaru}
+andrei-popescu-belis:
+  names:
+  - {first: Andrei, last: Popescu-Belis}
+  - {first: Andrei, last: Popescu Belis}
+  - {first: A., last: Popescu-Belis}
+andrej-ljolje:
+  names:
+  - {first: Andrej, last: Ljolje}
+  - {first: A., last: Ljolje}
+  - {first: A, last: Ljolje}
+andrejs-vasiljevs:
+  names:
+  - {first: Andrejs, last: Vasiļjevs}
+  - {first: Andrejs, last: Vasiljevs}
+andres-marzal:
+  names:
+  - {first: Andrés, last: Marzal}
+  - {first: A., last: Marzal}
+andres-montoyo:
+  names:
+  - {first: Andrés, last: Montoyo}
+  - {first: Andres, last: Montoyo}
+  - {first: Andrés, last: Montoyo Guijarro}
+andrew-b-goldberg:
+  names:
+  - {first: Andrew B., last: Goldberg}
+  - {first: Andrew, last: Goldberg}
+andrew-bennett:
+  names:
+  - {first: Andrew, last: Bennett}
+  similar:
+  - andrew-bennetts
+andrew-bennetts:
+  names:
+  - {first: Andrew, last: Bennetts}
+  similar:
+  - andrew-bennett
+andrew-boyd:
+  names:
+  - {first: Andrew, last: Boyd}
+  - {first: Andrew D., last: Boyd}
+andrew-brasher:
+  names:
+  - {first: Andrew, last: Brasher}
+  - {first: A., last: Brasher}
+andrew-david-beale:
+  names:
+  - {first: Andrew David, last: Beale}
+  - {first: Andrew, last: David}
+andrew-gordon:
+  names:
+  - {first: Andrew, last: Gordon}
+  - {first: Andrew S., last: Gordon}
+andrew-haas:
+  names:
+  - {first: Andrew, last: Haas}
+  - {first: Andrew R., last: Haas}
+andrew-j-anderson:
+  names:
+  - {first: Andrew J., last: Anderson}
+  - {first: Andrew, last: Anderson}
+andrew-kehler:
+  names:
+  - {first: Andrew, last: Kehler}
+  - {first: Andy, last: Kehler}
+andrew-l-kun:
+  names:
+  - {first: Andrew L., last: Kun}
+  - {first: Andrew, last: Kun}
+andrew-maas:
+  names:
+  - {first: Andrew, last: Maas}
+  - {first: Andrew L., last: Maas}
+andrew-mackinlay:
+  names:
+  - {first: Andrew, last: MacKinlay}
+  - {first: Andrew, last: McKinlay}
+andrew-olney:
+  names:
+  - {first: Andrew, last: Olney}
+  - {first: Andrew M., last: Olney}
+andrew-smith:
+  names:
+  - {first: Andrew, last: Smith}
+  - {first: Andrew E., last: Smith}
+andrew-w-cole:
+  names:
+  - {first: Andrew W., last: Cole}
+  - {first: Andrew, last: Cole}
+andrew-wilson:
+  names:
+  - {first: Andrew, last: Wilson}
+  - {first: Andrew T., last: Wilson}
+andrew-y-ng:
+  names:
+  - {first: Andrew Y., last: Ng}
+  - {first: Andrew, last: Ng}
+angel-chang:
+  names:
+  - {first: Angel, last: Chang}
+  - {first: Angel X., last: Chang}
+angel-de-la-torre:
+  names:
+  - {first: Ángel, last: De la Torre}
+  - {first: A., last: De la Torre}
+angela-costa:
+  names:
+  - {first: Angela, last: Costa}
+  - {first: Ângela, last: Costa}
+angels-egea:
+  names:
+  - {first: Angels, last: Egea}
+  - {first: Àngels, last: Egea}
+angrosh-mandya:
+  names:
+  - {first: Angrosh, last: Mandya}
+  - {first: Mandya, last: Angrosh}
+anil-kumar-nelakanti:
+  names:
+  - {first: Anil Kumar, last: Nelakanti}
+  - {first: Anil, last: Kumar}
+anil-kumar-singh:
+  names:
+  - {first: Anil Kumar, last: Singh}
+  - {first: Anil, last: Kumar Singh}
+animashree-anandkumar:
+  names:
+  - {first: Animashree, last: Anandkumar}
+  - {first: Anima, last: Anandkumar}
+anirudh-sundar:
+  names:
+  - {first: Anirudh, last: Sundar}
+  - {first: Anirudh S., last: Sundar}
+  - {first: Anirudh S, last: Sundar}
+anish-das-sarma:
+  names:
+  - {first: Anish Das, last: Sarma}
+  - {first: Atish Das, last: Sarma}
+anita-lilla-vero:
+  names:
+  - {first: Anita Lilla, last: Verő}
+  - {first: Anita Lilla, last: Vero}
+anja-belz:
+  names:
+  - {first: Anja, last: Belz}
+  - {first: Anya, last: Belz}
+anja-hoethker:
+  names:
+  - {first: Anja, last: Hoethker}
+  - {first: Anja, last: Höthker}
+ankit-srivastava:
+  names:
+  - {first: Ankit, last: Srivastava}
+  - {first: Ankit Kumar, last: Srivastava}
+  - {first: Ankit K., last: Srivastava}
+  - {first: Ankit, last: Kumar}
+ankur-parikh:
+  names:
+  - {first: Ankur, last: Parikh}
+  - {first: Ankur P., last: Parikh}
+anna-jonsson-umea:
+  comment: Umeå University
+  disable_name_matching: true
+  names:
+  - {first: Anna, last: Jonsson}
+  orcid: 0000-0002-9873-4170
+anna-kupsc:
+  names:
+  - {first: Anna, last: Kupść}
+  - {first: Anna, last: Kupsc}
+anna-n-rafferty:
+  names:
+  - {first: Anna N., last: Rafferty}
+  - {first: Anna, last: Rafferty}
+anna-sagvall-hein:
+  names:
+  - {first: Anna, last: Sågvall Hein}
+  - {first: Anna Sagvall, last: Hein}
+  - {first: Anna Sågvall, last: Hein}
+annalu-waller:
+  names:
+  - {first: Annalu, last: Waller}
+  - {first: A., last: Waller}
+anne-abeille:
+  names:
+  - {first: Anne, last: Abeillé}
+  - {first: Anne, last: Abeille}
+anne-de-roeck:
+  names:
+  - {first: Anne, last: De Roeck}
+  - {first: Anne, last: DeRoeck}
+  - {first: Anne, last: de Roeck}
+  - {first: Anne, last: deRoeck}
+  - {first: A.N., last: De Roeck}
+  - {first: Anne N., last: De Roeck}
+anne-gohring:
+  names:
+  - {first: Anne, last: Göhring}
+  - {first: Anne, last: Goehring}
+anne-h-anderson:
+  names:
+  - {first: Anne H., last: Anderson}
+  - {first: Anne, last: Anderson}
+anne-haake:
+  names:
+  - {first: Anne, last: Haake}
+  - {first: Anne R., last: Haake}
+anne-kuhn:
+  names:
+  - {first: Anne, last: Kuhn}
+  - {first: A., last: Kuhn}
+anne-lacheret:
+  names:
+  - {first: Anne, last: Lacheret}
+  - {first: Anne, last: Lacheret-Dujour}
+annelies-braffort:
+  names:
+  - {first: Annelies, last: Braffort}
+  - {first: A., last: Braffort}
+annette-hautli:
+  names:
+  - {first: Annette, last: Hautli}
+  - {first: Annette, last: Hautli-Janisz}
+annette-ostling-andersson:
+  names:
+  - {first: Annette, last: Östling Andersson}
+  - {first: Annette, last: Östling}
+annette-rios-gonzales:
+  names:
+  - {first: Annette, last: Rios Gonzales}
+  - {first: Annette, last: Rios}
+annick-choisier:
+  names:
+  - {first: Annick, last: Choisier}
+  - {first: A., last: Choisier}
+annick-corluy:
+  names:
+  - {first: Annick, last: Corluy}
+  - {first: A., last: Corluy}
+ansaf-salleb-aouissi:
+  names:
+  - {first: Ansaf, last: Salleb-Aouissi}
+  - {first: Ansaf, last: Salleb-Aoussi}
+anselmo-penas:
+  names:
+  - {first: Anselmo, last: Peñas}
+  - {first: Anselmo, last: Penas}
+antal-van-den-bosch:
+  names:
+  - {first: Antal, last: van den Bosch}
+  - {first: Antal, last: Van den Bosch}
+  - {first: Antal, last: Van Den Bosch}
+anthony-hartley:
+  names:
+  - {first: Anthony, last: Hartley}
+  - {first: Anthony F., last: Hartley}
+  - {first: A., last: Hartley}
+anthony-hughes:
+  names:
+  - {first: Anthony, last: Hughes}
+  - {first: Anthony James, last: Hughes}
+  orcid: 0009-0003-4065-1094
+anthony-jameson:
+  names:
+  - {first: Anthony, last: Jameson}
+  - {first: A., last: Jameson}
+anthony-kroch:
+  names:
+  - {first: Anthony, last: Kroch}
+  - {first: Anthony S., last: Kroch}
+antje-rossdeutscher:
+  names:
+  - {first: Antje, last: Roßdeutscher}
+  - {first: Antje, last: Rossdeutscher}
+anton-batliner:
+  names:
+  - {first: Anton, last: Batliner}
+  - {first: A., last: Batliner}
+anton-karl-ingason:
+  names:
+  - {first: Anton Karl, last: Ingason}
+  - {first: Anton K., last: Ingason}
+antonella-de-angeli:
+  names:
+  - {first: Antonella, last: De Angeli}
+  - {first: Antonella, last: DeAngeli}
+antonio-bonafonte:
+  names:
+  - {first: Antonio, last: Bonafonte}
+  - {first: A., last: Bonafonte}
+antonio-branco:
+  names:
+  - {first: António, last: Branco}
+  - {first: Antonio, last: Branco}
+  - {first: Antonio H., last: Branco}
+  - {first: António Horta, last: Branco}
+antonio-cardenal:
+  names:
+  - {first: Antonio, last: Cardenal}
+  - {first: Antonio, last: Cardenal-Lopez}
+antonio-castellanos:
+  names:
+  - {first: Antonio, last: Castellanos}
+  - {first: A., last: Castellanos}
+antonio-fernandez-orquin:
+  names:
+  - {first: Antonio, last: Fernández Orquín}
+  - {first: Antonio, last: Fernandez Orquín}
+  - {first: Antonio, last: Fernández-Orquín}
+  - {first: Antonio, last: Fernández}
+antonio-ferrandez:
+  names:
+  - {first: Antonio, last: Ferrández}
+  - {first: Antonio, last: Ferrandez}
+  - {first: A., last: Ferrandez}
+  - {first: A., last: Ferrández}
+antonio-j-rubio:
+  names:
+  - {first: Antonio J., last: Rubio}
+  - {first: A.J., last: Rubio}
+antonio-jimeno-yepes:
+  names:
+  - {first: Antonio, last: Jimeno Yepes}
+  - {first: Antonio Jimeno, last: Yepes}
+  - {first: Antonio José, last: Jimeno Yepes}
+  - {first: Antonio, last: Jimeno-Yepes}
+antonio-l-lagarda:
+  names:
+  - {first: Antonio-L., last: Lagarda}
+  - {first: Antonio, last: Lagarda}
+  - {first: Antonio L., last: Lagarda}
+antonio-moreno-ortiz:
+  comment: Univ. of Málaga
+  names:
+  - {first: Antonio, last: Moreno-Ortiz}
+  - {first: Antonio, last: Moreno Ortiz}
+  - {first: Antonio, last: Moreno}
+  similar:
+  - antonio-moreno-ribas
+  - antonio-moreno-sandoval
+antonio-moreno-ribas:
+  comment: Univ. Rovira i Virgili
+  names:
+  - {first: Antonio, last: Moreno Ribas}
+  - {first: Antonio, last: Moreno}
+  similar:
+  - antonio-moreno-ortiz
+  - antonio-moreno-sandoval
+antonio-moreno-sandoval:
+  comment: NYU, Univ. Autónoma de Madrid
+  names:
+  - {first: Antonio, last: Moreno-Sandoval}
+  - {first: Antonio Moreno, last: Sandoval}
+  - {first: Antonio, last: Moreno}
+  similar:
+  - antonio-moreno-ortiz
+  - antonio-moreno-ribas
+antonio-pareja-lora:
+  names:
+  - {first: Antonio, last: Pareja Lora}
+  - {first: Antonio, last: Pareja-Lora}
+antonio-rincon:
+  names:
+  - {first: Antonio, last: Rincón}
+  - {first: Antonio, last: Rincon}
+antonio-s-valderrabanos:
+  names:
+  - {first: Antonio S., last: Valderrábanos}
+  - {first: Antonio S., last: Valderrabanos}
+antonio-valerio-miceli-barone:
+  names:
+  - {first: Antonio Valerio, last: Miceli-Barone}
+  - {first: Antonio Valerio, last: Miceli Barone}
+antonio-zampolli:
+  names:
+  - {first: Antonio, last: Zampolli}
+  - {first: A., last: Zampolli}
+antti-veikko-rosti:
+  names:
+  - {first: Antti-Veikko, last: Rosti}
+  - {first: Antti-Veikko I., last: Rosti}
+antton-gurrutxaga:
+  names:
+  - {first: Antton, last: Gurrutxaga}
+  - {first: A., last: Gurrutxaga}
+anup-barman:
+  names:
+  - {first: Anup, last: Barman}
+  - {first: Anup Kr., last: Barman}
+anup-kumar-kolya:
+  names:
+  - {first: Anup Kumar, last: Kolya}
+  - {first: Anup, last: Kumar Kolya}
+  - {first: Anup, last: Kolya}
+aparna-nurani-venkitasubramanian:
+  names:
+  - {first: Aparna, last: Nurani Venkitasubramanian}
+  - {first: Aparna N., last: Venkitasubramanian}
+arantza-casillas:
+  names:
+  - {first: Arantza, last: Casillas}
+  - {first: A., last: Casillas}
+arantza-diaz-de-ilarraza:
+  names:
+  - {first: Arantza, last: Díaz de Ilarraza}
+  - {first: Arantza, last: Diaz de Ilarraza}
+  - {first: A, last: Diaz de Ilarraza}
+  - {first: A., last: Diaz de Ilarraza Sanchez}
+  - {first: A., last: Diaz de Ilarraza}
+  - {first: A., last: Díaz de Ilarraza}
+aravind-joshi:
+  names:
+  - {first: Aravind, last: Joshi}
+  - {first: Aravind K., last: Joshi}
+  - {first: A., last: Joshi}
+  - {first: A.K., last: Joshi}
+  - {first: A. K., last: Joshi}
+  - {first: Aravin K., last: Joshi}
+archibald-michiels:
+  names:
+  - {first: Archibald, last: Michiels}
+  - {first: A., last: Michiels}
+ariadna-font-llitjos:
+  names:
+  - {first: Ariadna, last: Font Llitjós}
+  - {first: Ariadna, last: Font-Llitjos}
+  - {first: Ariadna, last: Font Llitjos}
+ariadne-m-b-rizzoni-carvalho:
+  names:
+  - {first: Ariadne, last: M. B. Rizzoni Carvalho}
+  - {first: Ariadne M. B. R., last: Carvalho}
+ariani-di-felippo:
+  names:
+  - {first: Ariani Di, last: Felippo}
+  - {first: Ariani, last: Di-Felippo}
+  - {first: Ariani, last: Di Felippo}
+ariel-schwartz:
+  names:
+  - {first: Ariel, last: Schwartz}
+  - {first: Ariel S., last: Schwartz}
+arienne-dwyer:
+  names:
+  - {first: Arienne, last: Dwyer}
+  - {first: A., last: Dwyer}
+arjun-atreya-v:
+  names:
+  - {first: Arjun Atreya, last: V}
+  - {first: Arjun, last: Atreya V}
+  - {first: Arjun, last: Atreya}
+arlindo-veiga:
+  names:
+  - {first: Arlindo, last: Veiga}
+  - {first: Arlindo O., last: Veiga}
+armando-suarez:
+  names:
+  - {first: Armando, last: Suárez}
+  - {first: A., last: Suárez}
+arnab-dhar:
+  names:
+  - {first: Arnab, last: Dhar}
+  - {first: Arnad, last: Dhar}
+arnaldo-candido-jr:
+  names:
+  - {first: Arnaldo, last: 'Candido, Jr.'}
+  - {first: Arnaldo, last: Candido Jr.}
+  - {first: Arnaldo, last: Candido Jr}
+  - {first: Arnaldo, last: Candido}
+arne-jonsson:
+  names:
+  - {first: Arne, last: Jönsson}
+  - {first: Arne, last: Jonsson}
+artem-sokolov:
+  names:
+  - {first: Artem, last: Sokolov}
+  - {first: Artem, last: Sokokov}
+artemis-parvizi:
+  names:
+  - {first: Artemis, last: Parvizi}
+  - {first: Artemis, last: Parvisi}
+arthur-c-graesser:
+  names:
+  - {first: Arthur C., last: Graesser}
+  - {first: Art, last: Graesser}
+  - {first: Arthur, last: Graesser}
+arturo-calvo-devesa:
+  names:
+  - {first: Arturo Calvo, last: Devesa}
+  - {first: Arturo, last: Calvo}
+arturo-curiel:
+  names:
+  - {first: Arturo, last: Curiel}
+  - {first: Arturo, last: Curiel Díaz}
+arturo-oncevay:
+  names:
+  - {first: Arturo, last: Oncevay}
+  - {first: Arturo, last: Oncevay-Marcos}
+arturs-znotins:
+  names:
+  - {first: Arturs, last: Znotins}
+  - {first: Artūrs, last: Znotiņš}
+arun-kumar-jayapal:
+  names:
+  - {first: Arun Kumar, last: Jayapal}
+  - {first: Arun, last: Jayapal}
+arya-d-mccarthy:
+  names:
+  - {first: Arya D., last: McCarthy}
+  - {first: Arya, last: McCarthy}
+arzucan-ozgur:
+  names:
+  - {first: Arzucan, last: Özgür}
+  - {first: Arzucan, last: Ozgur}
+asad-sayeed:
+  names:
+  - {first: Asad, last: Sayeed}
+  - {first: Asad B., last: Sayeed}
+ascension-gallardo-antolin:
+  names:
+  - {first: Ascension, last: Gallardo-Antolin}
+  - {first: Ascension, last: Gallardo}
+ashish-v-tendulkar:
+  names:
+  - {first: Ashish V., last: Tendulkar}
+  - {first: Ashish, last: Tendulkar}
+ashok-popat:
+  names:
+  - {first: Ashok, last: Popat}
+  - {first: Ashok C., last: Popat}
+ashvin-kannan:
+  names:
+  - {first: Ashvin, last: Kannan}
+  - {first: A., last: Kannan}
+asuncion-castano:
+  names:
+  - {first: Asunción, last: Castaño}
+  - {first: A., last: Castaño}
+asuncion-gomez-perez:
+  names:
+  - {first: Asunción, last: Gómez-Pérez}
+  - {first: Asunción Gómez, last: Pérez}
+asuncion-moreno:
+  names:
+  - {first: Asunción, last: Moreno}
+  - {first: Asuncion, last: Moreno}
+  - {first: Asuncíon, last: Moreno}
+  - {first: A., last: Moreno}
+aswarth-abhilash-dara:
+  names:
+  - {first: Aswarth Abhilash, last: Dara}
+  - {first: Aswarth, last: Dara}
+atanas-kiryakov:
+  names:
+  - {first: Atanas, last: Kiryakov}
+  - {first: Atanas K., last: Kiryakov}
+atsuko-koizumi:
+  names:
+  - {first: Atsuko, last: Koizumi}
+  - {first: A., last: Koizumi}
+atul-kr-ojha:
+  names:
+  - {first: Atul Kr., last: Ojha}
+  - {first: Atul Ku., last: Ojha}
+audrey-le:
+  names:
+  - {first: Audrey, last: Le}
+  - {first: Audrey N., last: Le}
+aurelie-herbelot:
+  names:
+  - {first: Aurélie, last: Herbelot}
+  - {first: Aurelie, last: Herbelot}
+aurelie-neveol:
+  names:
+  - {first: Aurelie, last: Neveol}
+  - {first: Aurélie, last: Névéol}
+avirup-sil:
+  names:
+  - {first: Avirup, last: Sil}
+  - {first: Avi, last: Sil}
+ayisigi-b-sevdik-calli:
+  names:
+  - {first: Ayisigi B., last: Sevdik-Calli}
+  - {first: Ayişiği, last: Sevdik-Çalli}
+ayush-kumar:
+  names:
+  - {first: Ayush, last: Kumar}
+  - {first: Kumar, last: Ayush}
+azucena-montes-rendon:
+  names:
+  - {first: Azucena, last: Montes-Rendon}
+  - {first: Azucena, last: Montes}
+baiba-saulite:
+  names:
+  - {first: Baiba, last: Saulīte}
+  - {first: Baiba, last: Saulite}
+balamurali-ar:
+  names:
+  - {first: Balamurali, last: AR}
+  - {first: Balamurali, last: A.R.}
+  - {first: Balamurali, last: A.R}
+balaraman-ravindran:
+  names:
+  - {first: Balaraman, last: Ravindran}
+  - {first: B., last: Ravindran}
+balazs-kis:
+  names:
+  - {first: Balázs, last: Kis}
+  - {first: Balazs, last: Kis}
+bali-ranaivo-malancon:
+  names:
+  - {first: Bali, last: Ranaivo-Malançon}
+  - {first: Bali, last: Ranaivo-Malancon}
+  - {first: Bali Ranaivo, last: Malanҫon}
+bang-nguyen:
+  names:
+  - {first: Bang, last: Nguyen}
+  orcid: 0009-0002-8365-4562
+bao-liang-lu:
+  names:
+  - {first: Bao-Liang, last: Lu}
+  - {first: Bao-liang, last: Lu}
+bao-quoc-ho:
+  names:
+  - {first: Bao Quoc, last: Ho}
+  - {first: Quoc, last: Ho}
+  - {first: Quoc, last: Ho Bao}
+baobao-chang:
+  names:
+  - {first: Baobao, last: Chang}
+  - {first: Bao-Bao, last: Chang}
+barathi-ganesh-h-b:
+  names:
+  - {first: Barathi, last: Ganesh H. B.}
+  - {first: Barathi, last: Ganesh HB}
+barbara-bullock:
+  names:
+  - {first: Barbara, last: Bullock}
+  - {first: Barbara E., last: Bullock}
+barbara-gawronska:
+  names:
+  - {first: Barbara, last: Gawronska}
+  - {first: Barbara, last: Gawronska-Werngren}
+  - {first: Barbara, last: Gawrońska-Werngren}
+barbara-h-partee:
+  names:
+  - {first: Barbara H., last: Partee}
+  - {first: Barbara, last: Partee}
+barbara-j-grosz:
+  names:
+  - {first: Barbara J., last: Grosz}
+  - {first: Barbara, last: Grosz}
+barbara-lewandowska-tomaszyk:
+  names:
+  - {first: Barbara, last: Lewandowska-Tomaszyk}
+  - {first: Barbara, last: Lewandowska}
+barbara-plank:
+  names:
+  - {first: Barbara, last: Plank}
+  - {first: B., last: Plank}
+barbora-hladka:
+  names:
+  - {first: Barbora, last: Hladká}
+  - {first: Barbora, last: Hladka}
+  - {first: B., last: Hladká}
+baskaran-sankaran:
+  names:
+  - {first: Baskaran, last: Sankaran}
+  - {first: Sankaran, last: Baskaran}
+bassam-haddad:
+  names:
+  - {first: Bassam, last: Haddad}
+  - {first: B., last: Haddad}
+bayan-abu-shawar:
+  names:
+  - {first: Bayan Abu, last: Shawar}
+  - {first: Bayan, last: Abu Shawar}
+baye-yimam-mekonnen:
+  names:
+  - {first: Baye Yimam, last: Mekonnen}
+  - {first: Baye, last: Yimam}
+bayu-distiawan:
+  names:
+  - {first: Bayu, last: Distiawan}
+  - {first: Bayu Distiawan, last: Trisedya}
+beata-megyesi:
+  names:
+  - {first: Beáta, last: Megyesi}
+  - {first: Beata, last: Megyesi}
+  - {first: Beáta Bandmann, last: Megyesi}
+  - {first: Beáta B., last: Megyesi}
+beata-trawinski:
+  names:
+  - {first: Beata, last: Trawiński}
+  - {first: Beata, last: Trawinski}
+beatrice-alex:
+  names:
+  - {first: Beatrice, last: Alex}
+  - {first: Bea, last: Alex}
+beatrice-daille:
+  names:
+  - {first: Béatrice, last: Daille}
+  - {first: Beatrice, last: Daille}
+beatrice-oshika:
+  names:
+  - {first: Beatrice, last: Oshika}
+  - {first: Beatrice T., last: Oshika}
+beatrice-santorini:
+  names:
+  - {first: Beatrice, last: Santorini}
+  - {first: B., last: Santorini}
+begona-villada-moiron:
+  names:
+  - {first: Begoña Villada, last: Moirón}
+  - {first: Begoña, last: Villada Moirón}
+  - {first: Begoña, last: Villada}
+  - {first: M. Begoña Villada, last: Moirón}
+behrang-qasemizadeh:
+  names:
+  - {first: Behrang, last: QasemiZadeh}
+  - {first: Behrang, last: Q. Zadeh}
+  - {first: Behrang Q., last: Zadeh}
+  - {first: Behrang, last: Zadeh}
+  - {first: Behrang, last: Qasemizadeh}
+behrouz-minaei-bidgoli:
+  names:
+  - {first: Behrouz, last: Minaei-Bidgoli}
+  - {first: Behrouz, last: Minaei-bidgoli}
+  - {first: Behrouz, last: Minaei}
+belem-priego-sanchez:
+  names:
+  - {first: Belém, last: Priego Sanchez}
+  - {first: Belem, last: Priego}
+belinda-z-li:
+  names:
+  - {first: Belinda Z., last: Li}
+  - {first: Belinda, last: Li}
+ben-swanson:
+  names:
+  - {first: Ben, last: Swanson}
+  - {first: Benjamin, last: Swanson}
+ben-wellner:
+  names:
+  - {first: Ben, last: Wellner}
+  - {first: Benjamin, last: Wellner}
+benjamin-borschinger:
+  names:
+  - {first: Benjamin, last: Börschinger}
+  - {first: Benjamin, last: Boerschinger}
+benjamin-k-tsou:
+  names:
+  - {first: Benjamin K., last: Tsou}
+  - {first: Benjamin K.Y., last: Tsou}
+  - {first: Benjamin K., last: T’sou}
+  - {first: Benjamin, last: Tsou}
+  - {first: Benjamin K, last: Tsou}
+  - {first: B. K., last: T’sou}
+benjamin-matthias-ruppik:
+  names:
+  - {first: Benjamin Matthias, last: Ruppik}
+  - {first: Benjamin, last: Ruppik}
+  orcid: 0000-0001-9035-9217
+benjamin-s-meyers:
+  names:
+  - {first: Benjamin S., last: Meyers}
+  - {first: Benjamin, last: Meyers}
+benjamin-wing:
+  names:
+  - {first: Benjamin, last: Wing}
+  - {first: Ben, last: Wing}
+benoit-crabbe:
+  names:
+  - {first: Benoit, last: Crabbé}
+  - {first: Benoît, last: Crabbé}
+benoit-favre:
+  names:
+  - {first: Benoit, last: Favre}
+  - {first: Benoît, last: Favre}
+benoit-gaillard:
+  names:
+  - {first: Benoit, last: Gaillard}
+  - {first: Benoît, last: Gaillard}
+benoit-habert:
+  names:
+  - {first: Benoit, last: Habert}
+  - {first: Benoît, last: Habert}
+  - {first: B., last: Habert}
+benoit-lavoie:
+  names:
+  - {first: Benoit, last: Lavoie}
+  - {first: B., last: Lavoie}
+benoit-ozell:
+  names:
+  - {first: Benoît, last: Ozell}
+  - {first: Benoit, last: Ozell}
+benoit-sagot:
+  names:
+  - {first: Benoît, last: Sagot}
+  - {first: Benoit, last: Sagot}
+bente-maegaard:
+  names:
+  - {first: Bente, last: Maegaard}
+  - {first: B., last: Maegaard}
+bento-carlos-dias-da-silva:
+  names:
+  - {first: Bento Carlos, last: Dias-da-Silva}
+  - {first: Bento Carlos Dias, last: da Silva}
+berfin-aktas:
+  names:
+  - {first: Berfin, last: Aktaş}
+  - {first: Berfin, last: Aktas}
+berkay-furkan-onder:
+  names:
+  - {first: Berkay Furkan, last: Önder}
+  - {first: Berkay, last: Önder}
+bernard-merialdo:
+  names:
+  - {first: Bernard, last: Merialdo}
+  - {first: B., last: Merialdo}
+bernard-seite:
+  names:
+  - {first: Bernard, last: Seite}
+  - {first: B., last: Seite}
+bernard-vauquois:
+  names:
+  - {first: Bernard, last: Vauquois}
+  - {first: B., last: Vauquois}
+bernardino-casas:
+  names:
+  - {first: Bernardino, last: Casas}
+  - {first: B., last: Casas}
+bernardo-magnini:
+  names:
+  - {first: Bernardo, last: Magnini}
+  - {first: B., last: Magnini}
+bernd-kiefer:
+  names:
+  - {first: Bernd, last: Kiefer}
+  - {first: B., last: Kiefer}
+bernd-mobius:
+  names:
+  - {first: Bernd, last: Möbius}
+  - {first: Bernd, last: Mobius}
+beth-ann-hockey:
+  names:
+  - {first: Beth Ann, last: Hockey}
+  - {first: Beth A., last: Hockey}
+  - {first: Beth, last: Hockey}
+  - {first: B. A., last: Hockey}
+beth-m-sundheim:
+  names:
+  - {first: Beth M., last: Sundheim}
+  - {first: Beth, last: Sundheim}
+bevan-jones:
+  names:
+  - {first: Bevan, last: Jones}
+  - {first: Bevan K., last: Jones}
+  - {first: Bevan Keeley, last: Jones}
+bhadran-v-k:
+  names:
+  - {first: Bhadran V., last: K}
+  - {first: Bhadran, last: V K}
+  - {first: Bhadran V, last: K}
+bhavana-dalvi:
+  names:
+  - {first: Bhavana, last: Dalvi}
+  - {first: Bhavana, last: Dalvi Mishra}
+bhuvana-ramabhadran:
+  names:
+  - {first: Bhuvana, last: Ramabhadran}
+  - {first: B., last: Ramabhadran}
+bianka-buschbeck:
+  names:
+  - {first: Bianka, last: Buschbeck}
+  - {first: Bianka, last: Buschbeck-Wolf}
+  - {first: B., last: Buschbeck}
+bidyut-baran-chaudhuri:
+  names:
+  - {first: Bidyut Baran, last: Chaudhuri}
+  - {first: Bidyut B., last: Chaudhuri}
+  - {first: B. B., last: Chaudhuri}
+biljana-drndarevic:
+  names:
+  - {first: Biljana, last: Drndarević}
+  - {first: Biljana, last: Drndarevic}
+bill-byrne:
+  comment: University of Cambridge
+  names:
+  - {first: Bill, last: Byrne}
+  - {first: W., last: Byrne}
+  - {first: William, last: Byrne}
+  - {first: William J., last: Byrne}
+bill-byrne-ucsd:
+  comment: UCSD Ph.d; https://www.linkedin.com/in/billb/
+  names:
+  - {first: Bill, last: Byrne}
+bill-yuchen-lin:
+  names:
+  - {first: Bill Yuchen, last: Lin}
+  - {first: Bill Y., last: Lin}
+billy-t-m-wong:
+  names:
+  - {first: Billy T.M., last: Wong}
+  - {first: Billy T. M., last: Wong}
+bingquan-liu:
+  names:
+  - {first: Bingquan, last: Liu}
+  - {first: BingQuan, last: Liu}
+binyam-ephrem-seyoum:
+  names:
+  - {first: Binyam Ephrem, last: Seyoum}
+  - {first: Binyam, last: Ephrem}
+binyam-gebrekidan-gebre:
+  names:
+  - {first: Binyam Gebrekidan, last: Gebre}
+  - {first: Binyam, last: Gebre}
+birte-lonneker:
+  names:
+  - {first: Birte, last: Lönneker}
+  - {first: Birte, last: Lönneker-Rodman}
+  - {first: Birte, last: Loenneker-Rodman}
+bjorn-gamback:
+  names:
+  - {first: Björn, last: Gambäck}
+  - {first: Bjorn, last: Gamback}
+  - {first: Björn, last: Gämback}
+bjorn-rudzewitz:
+  names:
+  - {first: Björn, last: Rudzewitz}
+  - {first: Bjoern, last: Rudzewitz}
+bjorn-schuller:
+  names:
+  - {first: Björn, last: Schuller}
+  - {first: Bjoern, last: Schuller}
+blake-howald:
+  names:
+  - {first: Blake, last: Howald}
+  - {first: Blake Stephen, last: Howald}
+bo-june-paul-hsu:
+  names:
+  - {first: Bo-June (Paul), last: Hsu}
+  - {first: Bo-june Paul, last: Hsu}
+  - {first: Bo-June Paul, last: Hsu}
+bo-li-bh:
+  comment: BeiHang
+  names:
+  - {first: Bo, last: Li}
+bo-li-cas:
+  comment: Chinese Academy of Sciences
+  names:
+  - {first: Bo, last: Li}
+bo-li-nus:
+  comment: NUS, Google
+  names:
+  - {first: Bo, last: Li}
+  - {first: Troy, last: Lee}
+bo-li-vanderbilt:
+  comment: Vanderbilt, UIUC
+  names:
+  - {first: Bo, last: Li}
+bo-yeong-kang:
+  names:
+  - {first: Bo-Yeong, last: Kang}
+  - {first: Bo-yeong, last: Kang}
+bodil-nistrup-madsen:
+  names:
+  - {first: Bodil, last: Nistrup Madsen}
+  - {first: Bodil Nistrup, last: Madsen}
+bojana-dalbelo-basic:
+  names:
+  - {first: Bojana, last: Dalbelo Bašić}
+  - {first: Bojana Dalbelo, last: Bašić}
+bolette-sandford-pedersen:
+  names:
+  - {first: Bolette Sandford, last: Pedersen}
+  - {first: Bolette, last: Sandford Pedersen}
+  - {first: Bolette S., last: Pedersen}
+  - {first: Bolette, last: Pedersen}
+  - {first: Bo, last: Pedersen}
+bong-wan-kim:
+  names:
+  - {first: Bong-Wan, last: Kim}
+  - {first: Jong Wan, last: Kim}
+bonnie-dorr:
+  names:
+  - {first: Bonnie, last: Dorr}
+  - {first: Bonnie J., last: Dorr}
+bonnie-glover-stalls:
+  names:
+  - {first: Bonnie Glover, last: Stalls}
+  - {first: Bonnie, last: Glover}
+bonnie-webber:
+  names:
+  - {first: Bonnie, last: Webber}
+  - {first: Bonnie L., last: Webber}
+  - {first: Bonnie Lynn, last: Webber}
+  - {first: B., last: Webber}
+  - {first: B.L., last: Nash-Webber}
+bor-shen-lin:
+  names:
+  - {first: Bor-Shen, last: Lin}
+  - {first: Bor-shen, last: Lin}
+boris-v-dobrov:
+  names:
+  - {first: Boris V., last: Dobrov}
+  - {first: Boris, last: Dobrov}
+  - {first: B., last: Dobrov}
+borja-navarro:
+  names:
+  - {first: Borja, last: Navarro}
+  - {first: Borja, last: Navarro-Colorado}
+  - {first: B., last: Navarro}
+borje-f-karlsson:
+  comment: https://github.com/acl-org/acl-anthology/issues/4041
+  degree: PUC-Rio
+  names:
+  - {first: Börje F., last: Karlsson}
+  - {first: Börje, last: Karlsson}
+  orcid: 0000-0001-8925-360X
+boubaker-meddeb-hamrouni:
+  names:
+  - {first: Boubaker, last: Meddeb-Hamrouni}
+  - {first: Boubaker, last: Meddeb Hamrouni}
+boyan-onyshkevych:
+  names:
+  - {first: Boyan, last: Onyshkevych}
+  - {first: Boyan A., last: Onyshkevych}
+brage-ekroll-jahren:
+  names:
+  - {first: Brage Ekroll, last: Jahren}
+  - {first: Brage, last: Jahren}
+brandon-m-stewart:
+  names:
+  - {first: Brandon M., last: Stewart}
+  - {first: Brandon, last: Stewart}
+branimir-boguraev:
+  names:
+  - {first: Branimir, last: Boguraev}
+  - {first: Branimir K., last: Boguraev}
+  - {first: Bran, last: Boguraev}
+  - {first: B.K., last: Boguraev}
+brett-w-bader:
+  names:
+  - {first: Brett W., last: Bader}
+  - {first: Brett, last: Bader}
+brian-cantwell-smith:
+  names:
+  - {first: Brian Cantwell, last: Smith}
+  - {first: Brian, last: Smith}
+brian-j-chandler:
+  names:
+  - {first: Brian J., last: Chandler}
+  - {first: Brian, last: Chandler}
+  - {first: B. J., last: Chandler}
+brian-kingsbury:
+  names:
+  - {first: Brian, last: Kingsbury}
+  - {first: B., last: Kingsbury}
+brian-mitchell:
+  names:
+  - {first: Brian, last: Mitchell}
+  - {first: B., last: Mitchell}
+brian-w-dillon:
+  names:
+  - {first: Brian W., last: Dillon}
+  - {first: Brian, last: Dillon}
+bridget-mcinnes:
+  names:
+  - {first: Bridget, last: McInnes}
+  - {first: Bridget Thomson, last: McInnes}
+  - {first: Bridget T., last: McInnes}
+brigitte-jorg:
+  names:
+  - {first: Brigitte, last: Jörg}
+  - {first: Brigitte, last: Jorg}
+brigitte-roudaud:
+  names:
+  - {first: Brigitte, last: Roudaud}
+  - {first: B., last: Roudaud}
+britta-zeller:
+  names:
+  - {first: Britta, last: Zeller}
+  - {first: Britta D., last: Zeller}
+bruce-e-nevin:
+  names:
+  - {first: Bruce E., last: Nevin}
+  - {first: Bruce, last: Nevin}
+bruce-porter:
+  names:
+  - {first: Bruce, last: Porter}
+  - {first: Bruce W., last: Porter}
+bruce-w-ballard:
+  names:
+  - {first: Bruce W., last: Ballard}
+  - {first: Bruce, last: Ballard}
+  - {first: B., last: Ballard}
+brunelle-magnana-ekoukou:
+  names:
+  - {first: Brunelle, last: Magnana Ekoukou}
+  - {first: Brunelle Magnana, last: Ekoukou}
+bryan-pellom:
+  names:
+  - {first: Bryan, last: Pellom}
+  - {first: B., last: Pellom}
+bryan-r-routledge:
+  names:
+  - {first: Bryan R., last: Routledge}
+  - {first: Bryan, last: Routledge}
+byron-c-wallace:
+  names:
+  - {first: Byron C., last: Wallace}
+  - {first: Byron, last: Wallace}
+byung-gyu-ahn:
+  names:
+  - {first: Byung-Gyu, last: Ahn}
+  - {first: Byung Gyu, last: Ahn}
+c-h-lee:
+  names:
+  - {first: C. H., last: Lee}
+  - {first: C.-H., last: Lee}
+c-j-rupp:
+  names:
+  - {first: C.J., last: Rupp}
+  - {first: C. J., last: Rupp}
+c-mario-christoudias:
+  names:
+  - {first: C. Mario, last: Christoudias}
+  - {first: Mario, last: Christoudias}
+c-raymond-perrault:
+  names:
+  - {first: C. Raymond, last: Perrault}
+  - {first: Raymond, last: Perrault}
+cacilia-zirn:
+  names:
+  - {first: Cäcilia, last: Zirn}
+  - {first: Caecilia, last: Zirn}
+cagil-sonmez:
+  names:
+  - {first: Cagil, last: Sonmez}
+  - {first: Çağıl, last: Sönmez}
+  - {first: Cagil, last: Sönmez}
+calkin-s-montero:
+  names:
+  - {first: Calkin S., last: Montero}
+  - {first: Calkin, last: Montero}
+cam-tu-nguyen:
+  names:
+  - {first: Cam-Tu, last: Nguyen}
+  - {first: Cẩm Tú, last: Nguyễn}
+cameron-shaw-fordyce:
+  names:
+  - {first: Cameron Shaw, last: Fordyce}
+  - {first: Cameron, last: Fordyce}
+canberk-ozdemir:
+  names:
+  - {first: Canberk, last: Ozdemir}
+  - {first: Canberk, last: Özdemir}
+candace-a-kamm:
+  names:
+  - {first: Candace A., last: Kamm}
+  - {first: Candace, last: Kamm}
+candace-l-sidner:
+  names:
+  - {first: Candace L., last: Sidner}
+  - {first: Candace, last: Sidner}
+carl-de-marcken:
+  names:
+  - {first: Carl, last: de Marcken}
+  - {first: Carl G., last: de Marcken}
+carl-pollard:
+  names:
+  - {first: Carl, last: Pollard}
+  - {first: Carl J., last: Pollard}
+carla-parra-escartin:
+  names:
+  - {first: Carla, last: Parra Escartín}
+  - {first: Carla, last: Parra}
+  - {first: Carla Parra, last: Escartín}
+  - {first: Carla Parra, last: Escartin}
+carlos-a-iglesias:
+  names:
+  - {first: Carlos A., last: Iglesias}
+  - {first: Carlos, last: Iglesias}
+carlos-a-prolo:
+  names:
+  - {first: Carlos A., last: Prolo}
+  - {first: Carlos, last: Prolo}
+carlos-alzate:
+  names:
+  - {first: Carlos, last: Alzate}
+  - {first: Carlos, last: Alzate Perez}
+carlos-d-martinez-hinarejos:
+  names:
+  - {first: Carlos-D., last: Martínez-Hinarejos}
+  - {first: Carlos D., last: Martínez-Hinarejos}
+  - {first: Carlos D., last: Martínez Hinarejos}
+  - {first: Carlos D., last: Martínez}
+carlos-henriquez:
+  names:
+  - {first: Carlos, last: Henríquez}
+  - {first: Carlos, last: Henriquez}
+  - {first: Carlos A., last: Henríquez Q.}
+carlos-mario-zapata-jaramillo:
+  names:
+  - {first: Carlos Mario, last: Zapata Jaramillo}
+  - {first: Carlos M., last: Zapata Jaramillo}
+carlos-martin-vide:
+  names:
+  - {first: Carlos, last: Martín-Vide}
+  - {first: Carlos Martin, last: Vide}
+carlos-rodriguez-penagos:
+  names:
+  - {first: Carlos, last: Rodriguez-Penagos}
+  - {first: Carlos, last: Rodríguez}
+  - {first: Carlos Rodriguez, last: Penagos}
+  - {first: Carlos, last: Rodríguez Penagos}
+  - {first: Carlos, last: Rodríguez-Penagos}
+carlos-subirats-ruggeberg:
+  names:
+  - {first: Carlos Subirats, last: Rüggeberg}
+  - {first: Carlos, last: Subirats}
+carmen-garcia-mateo:
+  names:
+  - {first: Carmen, last: Garcia-Mateo}
+  - {first: Carmen, last: García-Mateo}
+carol-friedman:
+  names:
+  - {first: Carol, last: Friedman}
+  - {first: C., last: Friedman}
+carol-van-ess-dykema:
+  names:
+  - {first: Carol, last: Van Ess-Dykema}
+  - {first: Carol J., last: Van Ess-Dykema}
+  - {first: Carol, last: VanEss-Dykema}
+carole-bergamini:
+  names:
+  - {first: Carole, last: Bergamini}
+  - {first: C., last: Bergamini}
+carolina-scarton:
+  names:
+  - {first: Carolina, last: Scarton}
+  - {first: Carolina Evaristo, last: Scarton}
+caroline-barriere:
+  names:
+  - {first: Caroline, last: Barriere}
+  - {first: Caroline, last: Barrière}
+caroline-bousquet-vernhettes:
+  names:
+  - {first: Caroline, last: Bousquet-Vernhettes}
+  - {first: Caroline, last: Bousquet}
+carolyn-rose:
+  names:
+  - {first: Carolyn, last: Rose}
+  - {first: Carolyn P., last: Rose}
+  - {first: Carolyn P., last: Rosé}
+  - {first: Carolyn, last: P. Rosé}
+  - {first: Carolyn Penstein, last: Rose}
+  - {first: Carolyn, last: Penstein Rosé}
+  - {first: Carolyn Penstein, last: Rosé}
+  - {first: Carolyn, last: Penstein-Rosé}
+  - {first: Carolyn, last: Rosé}
+  - {first: C. P., last: Rose}
+casey-kennington:
+  names:
+  - {first: Casey, last: Kennington}
+  - {first: Casey Redd, last: Kennington}
+cassandra-l-jacobs:
+  names:
+  - {first: Cassandra L., last: Jacobs}
+  - {first: Cassandra, last: Jacobs}
+cassia-trojahn:
+  names:
+  - {first: Cassia, last: Trojahn}
+  - {first: Cássia, last: Trojahn}
+catalin-mititelu:
+  names:
+  - {first: Catalin, last: Mititelu}
+  - {first: Cătălin, last: Mititelu}
+catalina-barbu:
+  names:
+  - {first: Cătălina, last: Barbu}
+  - {first: Catalina, last: Barbu}
+  - {first: C., last: Barbu}
+catalina-oana-tudor:
+  names:
+  - {first: Catalina Oana, last: Tudor}
+  - {first: Catalina O., last: Tudor}
+catherine-chen-bu:
+  comment: Brown
+  names:
+  - {first: Catherine, last: Chen}
+catherine-chen-ucberkley:
+  comment: UC Berkley
+  names:
+  - {first: Catherine, last: Chen}
+catherine-i-watson:
+  names:
+  - {first: Catherine I., last: Watson}
+  - {first: Catherine, last: Watson}
+catherine-macleod:
+  names:
+  - {first: Catherine, last: Macleod}
+  - {first: Catherine, last: MacLeod}
+catherine-n-ball:
+  names:
+  - {first: Catherine N., last: Ball}
+  - {first: Catherine, last: Ball}
+catia-cucchiarini:
+  names:
+  - {first: Catia, last: Cucchiarini}
+  - {first: C., last: Cucchiarini}
+cecile-fabre:
+  names:
+  - {first: Cécile, last: Fabre}
+  - {first: Cecile, last: Fabre}
+cecile-paris:
+  names:
+  - {first: Cecile, last: Paris}
+  - {first: Cécile, last: Paris}
+  - {first: Cecile L., last: Paris}
+cecilia-ovesdotter-alm:
+  names:
+  - {first: Cecilia, last: Ovesdotter Alm}
+  - {first: Cecilia Ovesdotter, last: Alm}
+  - {first: Cecilia O., last: Alm}
+  - {first: Cecilia, last: O. Alm}
+cedric-gendrot:
+  names:
+  - {first: Cédric, last: Gendrot}
+  - {first: Cedric, last: Gendrot}
+cedrick-fairon:
+  names:
+  - {first: Cédrick, last: Fairon}
+  - {first: Cedrick, last: Fairon}
+celine-de-looze:
+  names:
+  - {first: Céline, last: de Looze}
+  - {first: Céline, last: Delooze}
+  - {first: Céline, last: De Looze}
+  - {first: Celine, last: De Looze}
+cem-bozsahin:
+  names:
+  - {first: Cem, last: Bozsahin}
+  - {first: Cem, last: Bozşahin}
+  - {first: H. Cem, last: Bozsahin}
+cenel-augusto-perez:
+  names:
+  - {first: Cenel-Augusto, last: Perez}
+  - {first: Cenel Augusto, last: Perez}
+chafic-mukbel:
+  names:
+  - {first: Chafic, last: Mukbel}
+  - {first: C., last: Mukbel}
+chalathip-thumkanon:
+  names:
+  - {first: Chalathip, last: Thumkanon}
+  - {first: Chalatip, last: Thumkanon}
+chan-hung-kuo:
+  names:
+  - {first: Chan-hung, last: Kuo}
+  - {first: Chan-Hung, last: Kuo}
+chandra-kiran-reddy-evuru:
+  names:
+  - {first: Chandra Kiran Reddy, last: Evuru}
+  - {first: Chandra Kiran, last: Evuru}
+chang-hyun-kim:
+  names:
+  - {first: Chang-Hyun, last: Kim}
+  - {first: Changhyun, last: Kim}
+  - {first: Chang Hyun, last: Kim}
+changning-huang:
+  names:
+  - {first: Changning, last: Huang}
+  - {first: Chang-Ning, last: Huang}
+  - {first: Chang-ning, last: Huang}
+changye-li-umn:
+  degree: University of Minnesota
+  disable_name_matching: true
+  names:
+  - {first: Changye, last: Li}
+  orcid: 0000-0002-9743-7406
+chantal-enguehard:
+  names:
+  - {first: Chantal, last: Enguehard}
+  - {first: C., last: Enguehard}
+chantal-perez-hernandez:
+  names:
+  - {first: Chantal, last: Pérez-Hernández}
+  - {first: Chantal, last: Pérez}
+chao-han-huck-yang:
+  names:
+  - {first: Chao-Han Huck, last: Yang}
+  - {first: Huck Chao-Han, last: Yang}
+chao-jan-chen:
+  names:
+  - {first: Chao-Jan, last: Chen}
+  - {first: Chao-jan, last: Chen}
+chao-zhang-tu:
+  comment: Tsinghua University
+  names:
+  - {first: Chao, last: Zhang}
+  - {first: Zhang, last: Chao}
+chaoqun-liu-ntu:
+  comment: NTU
+  disable_name_matching: true
+  names:
+  - {first: Chaoqun, last: Liu}
+  orcid: 0000-0001-8014-2516
+charles-b-callaway:
+  names:
+  - {first: Charles B., last: Callaway}
+  - {first: Charles, last: Callaway}
+charles-c-lee:
+  names:
+  - {first: Charles C., last: Lee}
+  - {first: Charles, last: Lee}
+charles-chen-jr:
+  names:
+  - {first: Charles, last: 'Chen, Jr.'}
+  - {first: Charles, last: Chen}
+  - {first: Charles, last: Chen Jr.}
+charles-j-fillmore:
+  names:
+  - {first: Charles J., last: Fillmore}
+  - {first: Charles, last: Fillmore}
+charles-l-a-clarke:
+  names:
+  - {first: Charles L. A., last: Clarke}
+  - {first: C. L. A., last: Clarke}
+charles-p-dolan:
+  names:
+  - {first: Charles P., last: Dolan}
+  - {first: Charles, last: Dolan}
+charles-t-hemphill:
+  names:
+  - {first: Charles T., last: Hemphill}
+  - {first: Charles, last: Hemphill}
+charles-yang:
+  names:
+  - {first: Charles, last: Yang}
+  - {first: Charles D., last: Yang}
+chau-minh-pham:
+  names:
+  - {first: Chau Minh, last: Pham}
+  - {first: Chau, last: Pham}
+  orcid: 0009-0004-0435-7450
+chedi-bechikh-ali:
+  names:
+  - {first: Chedi, last: Bechikh Ali}
+  - {first: Chedi, last: Bechikh}
+chee-wee-leong:
+  names:
+  - {first: Chee Wee, last: Leong}
+  - {first: Chee Wee (Ben), last: Leong}
+cheikh-m-bamba-dione:
+  names:
+  - {first: Cheikh M. Bamba, last: Dione}
+  - {first: Cheikh Bamba, last: Dione}
+chen-cecilia-liu:
+  comment: Technische Universität Darmstadt
+  disable_name_matching: true
+  names:
+  - {first: Chen, last: Cecilia Liu}
+  - {first: Chen, last: Liu}
+  orcid: 0009-0004-2382-8609
+chen-zhang-peking:
+  disable_name_matching: true
+  names:
+  - {first: Chen, last: Zhang}
+  orcid: 0000-0001-5842-0516
+cheng-chao-su:
+  names:
+  - {first: Cheng-chao, last: Su}
+  - {first: Cheng-Chao, last: Su}
+cheng-der-chen:
+  names:
+  - {first: Cheng-Der, last: Chen}
+  - {first: Cheng-der, last: Chen}
+cheng-jie-sun:
+  names:
+  - {first: Cheng-Jie, last: Sun}
+  - {first: Chengjie, last: Sun}
+cheng-ming-guo:
+  names:
+  - {first: Cheng-ming, last: Guo}
+  - {first: Cheng Ming, last: Guo}
+cheng-yan-kao:
+  names:
+  - {first: Cheng-yan, last: Kao}
+  - {first: Cheng-Yan, last: Kao}
+  - {first: Cheng Yan, last: Kao}
+cheng-yuan-lin:
+  names:
+  - {first: Cheng-Yuan, last: Lin}
+  - {first: Cheng Yuan, last: Lin}
+chengqing-zong:
+  names:
+  - {first: Chengqing, last: Zong}
+  - {first: Cheng-qing, last: Zong}
+chengxiang-zhai:
+  names:
+  - {first: ChengXiang, last: Zhai}
+  - {first: Chengxiang, last: Zhai}
+cheol-jung-kweon:
+  names:
+  - {first: Cheol Jung, last: Kweon}
+  - {first: Cheoljung, last: Kweon}
+cheol-young-ock:
+  names:
+  - {first: Cheol-Young, last: Ock}
+  - {first: Cheolyoung, last: Ock}
+  - {first: Cheol-young, last: Ock}
+chew-lim-tan:
+  names:
+  - {first: Chew Lim, last: Tan}
+  - {first: Chew-Lim, last: Tan}
+  - {first: ChewLim, last: Tan}
+chi-chun-lee:
+  names:
+  - {first: Chi-Chun, last: Lee}
+  - {first: Chi-Chun (Jeremy), last: Lee}
+  - {first: Chi-Chun Jeremy, last: Lee}
+chi-san-althon-lin:
+  names:
+  - {first: Chi-san Althon, last: Lin}
+  - {first: Chi-San, last: Lin}
+  - {first: Chi-San Althon, last: Lin}
+chi-shing-wang:
+  names:
+  - {first: Chi-Shing, last: Wang}
+  - {first: Chi-shing, last: Wang}
+chi-shun-cheung:
+  names:
+  - {first: Chi-Shun, last: Cheung}
+  - {first: Chi Shun, last: Cheung}
+chi-yao-lee:
+  names:
+  - {first: Chi-Yao, last: Lee}
+  - {first: Chih-yao, last: Lee}
+  - {first: Chih-Yao, last: Lee}
+chia-hung-tai:
+  names:
+  - {first: Chia-Hung, last: Tai}
+  - {first: Chia-hung, last: Tai}
+chia-lung-wu:
+  names:
+  - {first: Chia-Lung, last: Wu}
+  - {first: Chia-Long, last: Wu}
+chia-ming-lee:
+  names:
+  - {first: Chia-ming, last: Lee}
+  - {first: Chia-Ming, last: Lee}
+chia-ying-lee:
+  names:
+  - {first: Chia-Ying, last: Lee}
+  - {first: Chia-ying, last: Lee}
+chih-lung-lin:
+  names:
+  - {first: Chih-Lung, last: Lin}
+  - {first: Chih-Long, last: Lin}
+chih-ming-chiu:
+  names:
+  - {first: Chih-Ming, last: Chiu}
+  - {first: Chih-ming, last: Chiu}
+chiharu-uda-kikuta:
+  names:
+  - {first: Chiharu Uda, last: Kikuta}
+  - {first: Chiharu, last: Uda}
+chin-ting-liu:
+  names:
+  - {first: Chin-Ting, last: Liu}
+  - {first: Chin-Ting Jimbo, last: Liu}
+chin-yew-lin:
+  names:
+  - {first: Chin-Yew, last: Lin}
+  - {first: ChinYew, last: Lin}
+ching-sheng-lin:
+  names:
+  - {first: Ching-sheng, last: Lin}
+  - {first: Ching-Sheng, last: Lin}
+ching-yun-chang:
+  names:
+  - {first: Ching Yun, last: Chang}
+  - {first: Ching-Yun, last: Chang}
+chiu-yu-tseng:
+  names:
+  - {first: Chiu-yu, last: Tseng}
+  - {first: Chiu-Yu, last: Tseng}
+chiung-hui-tseng:
+  names:
+  - {first: Chiung-hui, last: Tseng}
+  - {first: Chiung-Hui, last: Tseng}
+chiwei-che:
+  names:
+  - {first: Chiwei, last: Che}
+  - {first: C., last: Che}
+chloe-kiddon:
+  names:
+  - {first: Chloé, last: Kiddon}
+  - {first: Chloe, last: Kiddon}
+chomicha-bendahman:
+  names:
+  - {first: Chomicha, last: Bendahman}
+  - {first: C., last: Bendahman}
+chong-zhang-xjtlu:
+  degree: Xi'an Jiaotong-Liverpool University
+  disable_name_matching: true
+  names:
+  - {first: Chong, last: Zhang}
+  orcid: 0009-0003-2020-6989
+chooi-ling-goh:
+  names:
+  - {first: Chooi-Ling, last: Goh}
+  - {first: Chooi Ling, last: Goh}
+chris-barry:
+  names:
+  - {first: Chris, last: Barry}
+  - {first: C., last: Barry}
+chris-biemann:
+  names:
+  - {first: Chris, last: Biemann}
+  - {first: Christian, last: Biemann}
+chris-buckley:
+  names:
+  - {first: Chris, last: Buckley}
+  - {first: C., last: Buckley}
+chris-clark:
+  names:
+  - {first: Chris, last: Clark}
+  - {first: Christine, last: Clark}
+chris-culy:
+  names:
+  - {first: Chris, last: Culy}
+  - {first: Christopher, last: Culy}
+chris-dyer:
+  names:
+  - {first: Chris, last: Dyer}
+  - {first: Christopher, last: Dyer}
+  - {first: Christopher J., last: Dyer}
+chris-hokamp:
+  names:
+  - {first: Chris, last: Hokamp}
+  - {first: Christopher, last: Hokamp}
+chris-irwin-davis:
+  names:
+  - {first: Chris Irwin, last: Davis}
+  - {first: Chris, last: Davis}
+chris-mellish:
+  names:
+  - {first: Chris, last: Mellish}
+  - {first: Chris S., last: Mellish}
+  - {first: C, last: Mellish}
+  - {first: C. S., last: Mellish}
+chris-reed:
+  names:
+  - {first: Chris, last: Reed}
+  - {first: C., last: Reed}
+chris-thomas:
+  disable_name_matching: true
+  names:
+  - {first: Chris, last: Thomas}
+  - {first: Christopher, last: Thomas}
+  orcid: 0000-0002-3226-396X
+chris-welty:
+  names:
+  - {first: Chris, last: Welty}
+  - {first: Christopher, last: Welty}
+christelle-ayache:
+  names:
+  - {first: Christelle, last: Ayache}
+  - {first: C., last: Ayache}
+christian-blaschke:
+  names:
+  - {first: Christian, last: Blaschke}
+  - {first: C., last: Blaschke}
+christian-boitet:
+  names:
+  - {first: Christian, last: Boitet}
+  - {first: Ch., last: Boitet}
+christian-f-hempelmann:
+  names:
+  - {first: Christian F., last: Hempelmann}
+  - {first: Christian, last: Hempelmann}
+christian-fluhr:
+  names:
+  - {first: Christian, last: Fluhr}
+  - {first: C., last: Fluhr}
+christian-girardi:
+  names:
+  - {first: Christian, last: Girardi}
+  - {first: C., last: Girardi}
+christian-hanig:
+  names:
+  - {first: Christian, last: Hänig}
+  - {first: Christian, last: Haenig}
+christian-huyck:
+  names:
+  - {first: Christian, last: Huyck}
+  - {first: C., last: Huyck}
+christian-m-i-m-matthiessen:
+  names:
+  - {first: Christian M.I.M., last: Matthiessen}
+  - {first: Christian M. I. M., last: Matthiessen}
+christian-moldovan:
+  names:
+  - {first: Christian, last: Moldovan}
+  - {first: Cristian, last: Moldovan}
+christian-morbidoni:
+  names:
+  - {first: Christian, last: Morbidoni}
+  - {first: C., last: Morbidoni}
+christian-mueller:
+  names:
+  - {first: Christian, last: Mueller}
+  - {first: Christian, last: Müller}
+christine-a-montgomery:
+  names:
+  - {first: Christine A., last: Montgomery}
+  - {first: Christine, last: Montgomery}
+christine-doran:
+  names:
+  - {first: Christine, last: Doran}
+  - {first: C, last: Doran}
+christine-h-nakatani:
+  names:
+  - {first: Christine H., last: Nakatani}
+  - {first: Christine, last: Nakatani}
+christine-piatko:
+  names:
+  - {first: Christine, last: Piatko}
+  - {first: Christine D., last: Piatko}
+christof-muller:
+  names:
+  - {first: Christof, last: Müller}
+  - {first: Christof E., last: Müller}
+christoph-muller:
+  names:
+  - {first: Christoph, last: Müller}
+  - {first: Christoph, last: Mueller}
+christoph-tillmann:
+  names:
+  - {first: Christoph, last: Tillmann}
+  - {first: C., last: Tillmann}
+christophe-collet:
+  names:
+  - {first: Christophe, last: Collet}
+  - {first: C., last: Collet}
+christophe-costa-florencio:
+  names:
+  - {first: Christophe, last: Costa Florêncio}
+  - {first: Christophe Costa, last: Florencio}
+christophe-fouquere:
+  names:
+  - {first: Christophe, last: Fouqueré}
+  - {first: C., last: Fouquere}
+christophe-laprun:
+  names:
+  - {first: Christophe, last: Laprun}
+  - {first: Christophe D., last: Laprun}
+christophe-roeder:
+  names:
+  - {first: Christophe, last: Roeder}
+  - {first: Chris, last: Roeder}
+christophe-servan:
+  names:
+  - {first: Christophe, last: Servan}
+  - {first: C., last: Servan}
+christopher-bogart:
+  names:
+  - {first: Christopher, last: Bogart}
+  - {first: Chris, last: Bogart}
+christopher-caruso:
+  names:
+  - {first: Christopher, last: Caruso}
+  - {first: Chris, last: Caruso}
+christopher-chute:
+  names:
+  - {first: Christopher, last: Chute}
+  - {first: Christopher G., last: Chute}
+christopher-cieri:
+  names:
+  - {first: Christopher, last: Cieri}
+  - {first: Chris, last: Cieri}
+christopher-d-manning:
+  names:
+  - {first: Christopher D., last: Manning}
+  - {first: Christopher, last: Manning}
+  - {first: Chris, last: Manning}
+christopher-dozier:
+  names:
+  - {first: Christopher, last: Dozier}
+  - {first: Christopher C., last: Dozier}
+christopher-habel:
+  names:
+  - {first: Christopher, last: Habel}
+  - {first: Christopher U., last: Habel}
+christopher-j-c-burges:
+  names:
+  - {first: Christopher J.C., last: Burges}
+  - {first: Chris J.C., last: Burges}
+christopher-jones:
+  names:
+  - {first: Christopher, last: Jones}
+  - {first: Chris, last: Jones}
+christopher-m-white:
+  names:
+  - {first: Christopher M., last: White}
+  - {first: C. M., last: White}
+christopher-meek:
+  names:
+  - {first: Christopher, last: Meek}
+  - {first: Chris, last: Meek}
+christopher-mitchell:
+  names:
+  - {first: Christopher, last: Mitchell}
+  - {first: Christopher M., last: Mitchell}
+christopher-pal:
+  names:
+  - {first: Christopher, last: Pal}
+  - {first: Chris, last: Pal}
+christopher-pennington:
+  names:
+  - {first: Christopher, last: Pennington}
+  - {first: Chris, last: Pennington}
+christopher-r-walker:
+  names:
+  - {first: Christopher R., last: Walker}
+  - {first: Christopher, last: Walker}
+  - {first: Christopher R, last: Walker}
+chrysanne-dimarco:
+  names:
+  - {first: Chrysanne, last: DiMarco}
+  - {first: Chrysanne, last: Di Marco}
+chun-jen-lee:
+  names:
+  - {first: Chun-Jen, last: Lee}
+  - {first: Chun-Jun, last: Lee}
+chun-kai-wu:
+  names:
+  - {first: Chun-Kai, last: Wu}
+  - {first: Kevin Chun-Kai, last: Wu}
+chun-nan-hsu:
+  names:
+  - {first: Chun-nan, last: Hsu}
+  - {first: Chun-Nan, last: Hsu}
+chung-chi-huang:
+  names:
+  - {first: Chung-Chi, last: Huang}
+  - {first: Chung-chi, last: Huang}
+chung-hye-han:
+  names:
+  - {first: Chung-hye, last: Han}
+  - {first: Chung-Hye, last: Han}
+  - {first: Chunghye, last: Han}
+chung-yong-lim:
+  names:
+  - {first: Chung Yong, last: Lim}
+  - {first: Daniel Chung Yong, last: Lim}
+chungmin-lee:
+  names:
+  - {first: Chungmin, last: Lee}
+  - {first: Chong Min, last: Lee}
+  - {first: Chung-min, last: Lee}
+chunyang-jiang-hkust:
+  comment: HKUST
+  disable_name_matching: true
+  names:
+  - {first: Chunyang, last: Jiang}
+  orcid: 0009-0005-3401-4093
+chunyu-kit:
+  names:
+  - {first: Chunyu, last: Kit}
+  - {first: Chun-yu, last: Kit}
+cicero-dos-santos:
+  names:
+  - {first: Cicero, last: dos Santos}
+  - {first: Cícero, last: dos Santos}
+  - {first: Cícero Nogueira, last: dos Santos}
+  - {first: Cicero, last: Nogueira dos Santos}
+  - {first: Cícero, last: Nogueira dos Santos}
+ciprian-bacalu:
+  names:
+  - {first: Ciprian, last: Bacalu}
+  - {first: C., last: Bacalu}
+claire-bonial:
+  names:
+  - {first: Claire, last: Bonial}
+  - {first: Claire N., last: Bonial}
+claire-cardie:
+  names:
+  - {first: Claire, last: Cardie}
+  - {first: C., last: Cardie}
+claire-francois:
+  names:
+  - {first: Claire, last: François}
+  - {first: Claire, last: Francois}
+claire-nedellec:
+  names:
+  - {first: Claire, last: Nédellec}
+  - {first: Claire, last: Nėdellec}
+clare-voss:
+  names:
+  - {first: Clare, last: Voss}
+  - {first: Clare R., last: Voss}
+claude-barras:
+  names:
+  - {first: Claude, last: Barras}
+  - {first: C., last: Barras}
+claude-de-loupy:
+  names:
+  - {first: Claude, last: de Loupy}
+  - {first: Claude, last: De Loupy}
+claudia-freitas:
+  names:
+  - {first: Cláudia, last: Freitas}
+  - {first: Claudia, last: Freitas}
+claudia-gdaniec:
+  names:
+  - {first: Claudia, last: Gdaniec}
+  - {first: C., last: Gdaniec}
+claudio-giuliano:
+  names:
+  - {first: Claudio, last: Giuliano}
+  - {first: C., last: Giuliano}
+claudiu-musat:
+  names:
+  - {first: Claudiu, last: Musat}
+  - {first: Claudiu-Cristian, last: Musat}
+clement-de-groc:
+  names:
+  - {first: Clément, last: de Groc}
+  - {first: Clément, last: De Groc}
+clement-jonquet:
+  names:
+  - {first: Clement, last: Jonquet}
+  - {first: Clément, last: Jonquet}
+clement-t-yu:
+  names:
+  - {first: Clement T., last: Yu}
+  - {first: Clement, last: Yu}
+cleo-jullien:
+  names:
+  - {first: Cléo, last: Jullien}
+  - {first: Cleo, last: Jullien}
+clifford-j-weinstein:
+  names:
+  - {first: Clifford J., last: Weinstein}
+  - {first: Clifford, last: Weinstein}
+clint-burfoot:
+  names:
+  - {first: Clint, last: Burfoot}
+  - {first: Clinton, last: Burfoot}
+colin-batchelor:
+  names:
+  - {first: Colin, last: Batchelor}
+  - {first: Colin R., last: Batchelor}
+colin-w-wightman:
+  names:
+  - {first: Colin W., last: Wightman}
+  - {first: C.W., last: Wightman}
+  - {first: C. W., last: Wightman}
+collin-f-baker:
+  names:
+  - {first: Collin F., last: Baker}
+  - {first: Collin, last: Baker}
+cong-kai-lin:
+  names:
+  - {first: Cong-kai, last: Lin}
+  - {first: Cong-Kai, last: Lin}
+cong-liu-fau:
+  comment: Florida Atlantic University
+  names:
+  - {first: Cong, last: Liu}
+cong-liu-iflytek:
+  comment: iFLYTEK Research
+  names:
+  - {first: Cong, last: Liu}
+cong-liu-ucr:
+  comment: University of California, Riverside
+  names:
+  - {first: Cong, last: Liu}
+constantin-orasan:
+  names:
+  - {first: Constantin, last: Orasan}
+  - {first: Constantin, last: Orăsan}
+  - {first: C., last: Orasan}
+constantine-d-spyropoulos:
+  names:
+  - {first: Constantine D., last: Spyropoulos}
+  - {first: Constantine, last: Spyropoulos}
+constantine-papageorgiou:
+  names:
+  - {first: Constantine, last: Papageorgiou}
+  - {first: Constantine P., last: Papageorgiou}
+corina-forascu:
+  names:
+  - {first: Corina, last: Forăscu}
+  - {first: Corina, last: Forascu}
+corinna-onelli:
+  names:
+  - {first: Corinna, last: Onelli}
+  - {first: C., last: Onelli}
+corrado-seidenari:
+  names:
+  - {first: Corrado, last: Seidenari}
+  - {first: C., last: Seidenari}
+cory-hayes:
+  names:
+  - {first: Cory, last: Hayes}
+  - {first: Cory J., last: Hayes}
+cosmin-adrian-bejan:
+  names:
+  - {first: Cosmin Adrian, last: Bejan}
+  - {first: Cosmin, last: Adrian Bejan}
+  - {first: Cosmin, last: Bejan}
+courtney-d-corley:
+  names:
+  - {first: Courtney D., last: Corley}
+  - {first: Courtney, last: Corley}
+cristian-ursu:
+  names:
+  - {first: Cristian, last: Ursu}
+  - {first: Christian, last: Ursu}
+cristina-sanchez-marco:
+  names:
+  - {first: Cristina, last: Sánchez-Marco}
+  - {first: Cristina, last: Marco}
+  - {first: Cristina Sánchez, last: Marco}
+curry-i-guinn:
+  names:
+  - {first: Curry I., last: Guinn}
+  - {first: Curry, last: Guinn}
+cyril-goutte:
+  names:
+  - {first: Cyril, last: Goutte}
+  - {first: C., last: Goutte}
+d-s-bree:
+  names:
+  - {first: D. S., last: Bree}
+  - {first: D.S., last: Bree}
+d-terence-langendoen:
+  names:
+  - {first: D. Terence, last: Langendoen}
+  - {first: Terence, last: Langendoen}
+daan-broeder:
+  names:
+  - {first: Daan, last: Broeder}
+  - {first: D., last: Broeder}
+daisy-zhe-wang:
+  names:
+  - {first: Daisy Zhe, last: Wang}
+  - {first: Zhe, last: Wang}
+dale-a-miller:
+  names:
+  - {first: Dale A., last: Miller}
+  - {first: Dale, last: Miller}
+damaris-ayuso:
+  names:
+  - {first: Damaris, last: Ayuso}
+  - {first: Damaris M., last: Ayuso}
+  - {first: D., last: Ayuso}
+damir-cavar:
+  names:
+  - {first: Damir, last: Ćavar}
+  - {first: Damir, last: Cavar}
+dan-bohus:
+  names:
+  - {first: Dan, last: Bohus}
+  - {first: Dan, last: Bohuş}
+  - {first: D., last: Bohus}
+dan-flickinger:
+  names:
+  - {first: Dan, last: Flickinger}
+  - {first: Daniel, last: Flickinger}
+  - {first: Daniel P., last: Flickinger}
+  - {first: D., last: Flickenger}
+dan-gillick:
+  names:
+  - {first: Dan, last: Gillick}
+  - {first: Daniel, last: Gillick}
+dan-hunter:
+  names:
+  - {first: Dan, last: Hunter}
+  - {first: D., last: Hunter}
+dan-jurafsky:
+  names:
+  - {first: Dan, last: Jurafsky}
+  - {first: Daniel, last: Jurafsky}
+dan-mcfarland:
+  names:
+  - {first: Dan, last: McFarland}
+  - {first: Daniel, last: McFarland}
+  - {first: Daniel A., last: McFarland}
+dan-moldovan:
+  names:
+  - {first: Dan, last: Moldovan}
+  - {first: Dan I., last: Moldovan}
+  - {first: D., last: Moldovan}
+dan-simonson:
+  names:
+  - {first: Dan, last: Simonson}
+  - {first: Daniel, last: Simonson}
+dan-stefanescu:
+  names:
+  - {first: Dan, last: Stefanescu}
+  - {first: Dan, last: Ştefănescu}
+  - {first: Dan, last: Ştefanescu}
+  - {first: Dan, last: Ștefănescu}
+dan-tufis:
+  names:
+  - {first: Dan, last: Tufiş}
+  - {first: Dan, last: Tufis}
+  - {first: Dan, last: Tufiș}
+dan-zhang-tsinghua:
+  comment: Tsinghua University
+  disable_name_matching: true
+  names:
+  - {first: Dan, last: Zhang}
+danica-damljanovic:
+  names:
+  - {first: Danica, last: Damljanović}
+  - {first: Danica, last: Damljanovic}
+danie-j-prinsloo:
+  names:
+  - {first: Danie J., last: Prinsloo}
+  - {first: Danie, last: Prinsloo}
+daniel-bachut:
+  names:
+  - {first: Daniel, last: Bachut}
+  - {first: D., last: Bachut}
+daniel-bastos-pereira:
+  names:
+  - {first: Daniel Bastos, last: Pereira}
+  - {first: Daniel B., last: Pereira}
+daniel-beck:
+  names:
+  - {first: Daniel, last: Beck}
+  - {first: Daniel Emilio, last: Beck}
+daniel-bobrow:
+  names:
+  - {first: Daniel, last: Bobrow}
+  - {first: Daniel G., last: Bobrow}
+daniel-bolanos:
+  names:
+  - {first: Daniel, last: Bolaños}
+  - {first: Daniel, last: Bolanos}
+daniel-couto-vale:
+  names:
+  - {first: Daniel, last: Couto Vale}
+  - {first: Daniel, last: Couto-Vale}
+  - {first: Daniel, last: Vale}
+daniel-dechelotte:
+  names:
+  - {first: Daniel, last: Déchelotte}
+  - {first: Daniel, last: Dechelotte}
+daniel-ferres:
+  names:
+  - {first: Daniel, last: Ferrés}
+  - {first: Dani, last: Ferrés}
+daniel-g-swanson:
+  names:
+  - {first: Daniel G., last: Swanson}
+  - {first: Daniel, last: Swanson}
+daniel-hernandez-lopez:
+  names:
+  - {first: Daniel, last: Hernandez-Lopez}
+  - {first: Daniel Hernández, last: López}
+daniel-hromada:
+  names:
+  - {first: Daniel, last: Hromada}
+  - {first: Daniel Devatman, last: Hromada}
+  - {first: Daniel, last: Devatman Hromada}
+daniel-keim:
+  names:
+  - {first: Daniel, last: Keim}
+  - {first: Daniel A., last: Keim}
+daniel-m-bikel:
+  names:
+  - {first: Daniel M., last: Bikel}
+  - {first: Daniel, last: Bikel}
+  - {first: Dan, last: Bikel}
+daniel-maxwell:
+  names:
+  - {first: Daniel, last: Maxwell}
+  - {first: Dan, last: Maxwell}
+daniel-ortiz-martinez:
+  names:
+  - {first: Daniel, last: Ortiz-Martínez}
+  - {first: Daniel, last: Ortíz-Martínez}
+  - {first: Daniel, last: Ortiz Martínez}
+daniel-p-mills:
+  names:
+  - {first: Daniel P., last: Mills}
+  - {first: Daniel, last: Mills}
+daniel-paiva:
+  names:
+  - {first: Daniel, last: Paiva}
+  - {first: Daniel S., last: Paiva}
+  - {first: D, last: Paiva}
+  - {first: D., last: Paiva}
+daniel-peterson:
+  names:
+  - {first: Daniel, last: Peterson}
+  - {first: Daniel W., last: Peterson}
+daniel-povey:
+  names:
+  - {first: Daniel, last: Povey}
+  - {first: D., last: Povey}
+daniel-preotiuc-pietro:
+  names:
+  - {first: Daniel, last: Preoţiuc-Pietro}
+  - {first: Daniel, last: Preotiuc-Pietro}
+daniel-s-weld:
+  names:
+  - {first: Daniel S., last: Weld}
+  - {first: Daniel, last: Weld}
+  - {first: Dan, last: Weld}
+daniel-sanchez-cisneros:
+  names:
+  - {first: Daniel, last: Sanchez-Cisneros}
+  - {first: Daniel, last: Sánchez}
+daniel-tapias:
+  names:
+  - {first: Daniel, last: Tapias}
+  - {first: Daniel Tapias, last: Merino}
+daniel-varga:
+  names:
+  - {first: Dániel, last: Varga}
+  - {first: Daniel, last: Varga}
+  - {first: D., last: Varga}
+daniel-whyatt:
+  names:
+  - {first: Daniel, last: Whyatt}
+  - {first: Dan, last: Whyatt}
+daniel-zeman:
+  names:
+  - {first: Daniel, last: Zeman}
+  - {first: Dan, last: Zeman}
+daniela-gifu:
+  names:
+  - {first: Daniela, last: Gifu}
+  - {first: Daniela, last: Gîfu}
+daniela-m-romano:
+  names:
+  - {first: Daniela M., last: Romano}
+  - {first: Daniela, last: Romano}
+daniela-oliveira-f-do-amaral:
+  names:
+  - {first: Daniela Oliveira F., last: do Amaral}
+  - {first: Daniela O. F., last: do Amaral}
+daniele-godard:
+  names:
+  - {first: Daniele, last: Godard}
+  - {first: Danièle, last: Godard}
+danielle-l-mowery:
+  names:
+  - {first: Danielle L., last: Mowery}
+  - {first: Danielle, last: Mowery}
+  - {first: Danielle L, last: Mowery}
+danielle-s-mcnamara:
+  names:
+  - {first: Danielle S., last: McNamara}
+  - {first: Danielle, last: McNamara}
+danniel-liwanag-alcantara:
+  names:
+  - {first: Danniel Liwanag, last: Alcantara}
+  - {first: Danniel, last: Alcantara}
+darla-magdalene-shockley:
+  names:
+  - {first: Darla Magdalene, last: Shockley}
+  - {first: Darla, last: Shockley}
+darnes-vilarino:
+  names:
+  - {first: Darnes, last: Vilariño}
+  - {first: Darnes, last: Vilariño Ayala}
+darren-hsin-hung-lin:
+  names:
+  - {first: Darren Hsin-Hung, last: Lin}
+  - {first: Darren Hsin-hung, last: Lin}
+  - {first: Hsin-Hung, last: Lin}
+dasa-munkova:
+  names:
+  - {first: Dasa, last: Munkova}
+  - {first: Daša, last: Munková}
+dau-cheng-lyu:
+  names:
+  - {first: Dau-cheng, last: Lyu}
+  - {first: Dau-Cheng, last: Lyu}
+dave-omara:
+  names:
+  - {first: Dave, last: O’mara}
+  - {first: D., last: O’Mara}
+david-a-evans:
+  names:
+  - {first: David A., last: Evans}
+  - {first: David Andreoff, last: Evans}
+david-a-smith:
+  names:
+  - {first: David A., last: Smith}
+  - {first: David, last: Smith}
+  - {first: David Addison, last: Smith}
+david-blei:
+  names:
+  - {first: David, last: Blei}
+  - {first: David M., last: Blei}
+david-c-uthus:
+  names:
+  - {first: David C., last: Uthus}
+  - {first: David, last: Uthus}
+david-cabrero-souto:
+  names:
+  - {first: David Cabrero, last: Souto}
+  - {first: David, last: Cabrero}
+david-carter:
+  names:
+  - {first: David, last: Carter}
+  - {first: David M., last: Carter}
+david-d-lewis:
+  names:
+  - {first: David D., last: Lewis}
+  - {first: David, last: Lewis}
+david-d-mcdonald:
+  comment: MIT, BBN, SIFT
+  names:
+  - {first: David D., last: McDonald}
+  - {first: David, last: McDonald}
+  similar:
+  - david-w-mcdonald
+david-d-palmer:
+  names:
+  - {first: David D., last: Palmer}
+  - {first: David, last: Palmer}
+david-d-sherertz:
+  names:
+  - {first: David D., last: Sherertz}
+  - {first: D. D., last: Sherertz}
+david-day:
+  names:
+  - {first: David, last: Day}
+  - {first: David S., last: Day}
+david-e-johnson:
+  names:
+  - {first: David E., last: Johnson}
+  - {first: David, last: Johnson}
+david-ellis:
+  names:
+  - {first: David, last: Ellis}
+  - {first: David Ellis, last: Rogers}
+david-elson:
+  names:
+  - {first: David, last: Elson}
+  - {first: David K., last: Elson}
+david-farwell:
+  names:
+  - {first: David, last: Farwell}
+  - {first: D., last: Farwell}
+david-fernandez-amoros:
+  names:
+  - {first: David, last: Fernández-Amorós}
+  - {first: David, last: Fernández}
+  - {first: David, last: Férnandez-Amorós}
+david-fisher:
+  names:
+  - {first: David, last: Fisher}
+  - {first: D., last: Fisher}
+david-forsyth:
+  names:
+  - {first: David, last: Forsyth}
+  - {first: David A., last: Forsyth}
+david-goodine:
+  names:
+  - {first: David, last: Goodine}
+  - {first: D., last: Goodine}
+david-h-d-warren:
+  names:
+  - {first: David H. D., last: Warren}
+  - {first: David H.D., last: Warren}
+david-ifeoluwa-adelani:
+  names:
+  - {first: David Ifeoluwa, last: Adelani}
+  - {first: David, last: Adelani}
+  - {first: David I., last: Adelani}
+david-israel:
+  names:
+  - {first: David, last: Israel}
+  - {first: David J., last: Israel}
+david-k-evans:
+  names:
+  - {first: David K., last: Evans}
+  - {first: David, last: Evans}
+  - {first: David Kirk, last: Evans}
+david-king:
+  names:
+  - {first: David, last: King}
+  - {first: David L., last: King}
+david-kolovratnik:
+  names:
+  - {first: David, last: Kolovratnik}
+  - {first: David, last: Kolovratník}
+david-l-bean:
+  names:
+  - {first: David L., last: Bean}
+  - {first: David, last: Bean}
+david-l-waltz:
+  names:
+  - {first: David L., last: Waltz}
+  - {first: D. L., last: Waltz}
+david-llorens:
+  names:
+  - {first: David, last: Llorens}
+  - {first: D., last: Llorens}
+david-m-howcroft:
+  names:
+  - {first: David M., last: Howcroft}
+  - {first: David, last: Howcroft}
+david-m-magerman:
+  names:
+  - {first: David M., last: Magerman}
+  - {first: David, last: Magerman}
+  - {first: D., last: Magerman}
+david-m-rojas:
+  names:
+  - {first: David M., last: Rojas}
+  - {first: David, last: Rojas}
+david-m-w-powers:
+  names:
+  - {first: David M. W., last: Powers}
+  - {first: David M W, last: Powers}
+  - {first: D. M. W., last: Powers}
+david-mark-nemeskey:
+  names:
+  - {first: Dávid Márk, last: Nemeskey}
+  - {first: David Mark, last: Nemeskey}
+david-martinez:
+  names:
+  - {first: David, last: Martinez}
+  - {first: David, last: Martínez}
+david-martins-de-matos:
+  names:
+  - {first: David, last: Martins de Matos}
+  - {first: David Martins, last: de Matos}
+  - {first: David M., last: de Matos}
+david-mcgee:
+  names:
+  - {first: David, last: McGee}
+  - {first: David R., last: McGee}
+david-n-milne:
+  names:
+  - {first: David N., last: Milne}
+  - {first: David, last: Milne}
+david-nicolas-racca:
+  names:
+  - {first: David Nicolas, last: Racca}
+  - {first: David Nicolás, last: Racca}
+david-pico:
+  names:
+  - {first: David, last: Picó}
+  - {first: David, last: Pico}
+david-pierce:
+  names:
+  - {first: David, last: Pierce}
+  - {first: David R., last: Pierce}
+david-pinto:
+  names:
+  - {first: David, last: Pinto}
+  - {first: David Eduardo, last: Pinto Avendaño}
+  - {first: David, last: Pinto Avendaño}
+david-r-karger:
+  names:
+  - {first: David R., last: Karger}
+  - {first: David, last: Karger}
+david-r-mortensen:
+  names:
+  - {first: David R., last: Mortensen}
+  - {first: David, last: Mortensen}
+david-s-pallett:
+  names:
+  - {first: David S., last: Pallett}
+  - {first: David, last: Pallett}
+  - {first: D. S., last: Pallett}
+  - {first: D., last: Pallett}
+david-stallard:
+  names:
+  - {first: David, last: Stallard}
+  - {first: David G., last: Stallard}
+  - {first: D., last: Stallard}
+david-suendermann-oeft:
+  names:
+  - {first: David, last: Suendermann-Oeft}
+  - {first: David, last: Suendermann}
+david-tomas:
+  names:
+  - {first: David, last: Tomas}
+  - {first: David, last: Tomás}
+david-traum:
+  names:
+  - {first: David, last: Traum}
+  - {first: David R., last: Traum}
+david-w-aha:
+  names:
+  - {first: David W., last: Aha}
+  - {first: David, last: Aha}
+david-w-mcdonald:
+  comment: Univ. of Washington
+  names:
+  - {first: David W., last: McDonald}
+  similar:
+  - david-d-mcdonald
+david-weir:
+  names:
+  - {first: David, last: Weir}
+  - {first: David J., last: Weir}
+  - {first: David, last: Wei}
+  - {first: D. J., last: Weir}
+david-yoshikazu-oshima:
+  names:
+  - {first: David Yoshikazu, last: Oshima}
+  - {first: David Y., last: Oshima}
+davis-muhajereen-d-dimalen:
+  names:
+  - {first: Davis Muhajereen D., last: Dimalen}
+  - {first: Davis Muhajereen, last: Dimalen}
+davy-weissenbacher:
+  names:
+  - {first: Davy, last: Weissenbacher}
+  - {first: D., last: Weissenbacher}
+dayne-freitag:
+  names:
+  - {first: Dayne, last: Freitag}
+  - {first: D., last: Freitag}
+dean-foster:
+  names:
+  - {first: Dean, last: Foster}
+  - {first: Dean P., last: Foster}
+dean-sturtevant:
+  names:
+  - {first: Dean, last: Sturtevant}
+  - {first: Dean G., last: Sturtevant}
+deb-roy:
+  names:
+  - {first: Deb, last: Roy}
+  - {first: Suman, last: Deb Roy}
+debela-tesfaye-gemechu:
+  names:
+  - {first: Debela Tesfaye, last: Gemechu}
+  - {first: Debela, last: Tesfaye}
+deborah-a-dahl:
+  names:
+  - {first: Deborah A., last: Dahl}
+  - {first: Deborah, last: Dahl}
+  - {first: D., last: Dahl}
+deborah-brady:
+  names:
+  - {first: Deborah, last: Brady}
+  - {first: Deb, last: Brady}
+dechuan-yang:
+  names:
+  - {first: Dechuan, last: Yang}
+  - {first: De, last: Yang}
+deepak-gupta:
+  names:
+  - {first: Deepak, last: Gupta}
+  - {first: Deepak Kumar, last: Gupta}
+  - {first: Deepa, last: Gupta}
+deepak-kumar-malladi:
+  names:
+  - {first: Deepak Kumar, last: Malladi}
+  - {first: Deepak, last: Malladi}
+deepak-p:
+  names:
+  - {first: Deepak, last: P}
+  - {first: Deepak, last: Padmanabhan}
+degen-huang:
+  names:
+  - {first: Degen, last: Huang}
+  - {first: De-Gen, last: Huang}
+demetrios-master:
+  names:
+  - {first: Demetrios, last: Master}
+  - {first: Demitrios, last: Master}
+denise-dipersio:
+  names:
+  - {first: Denise, last: DiPersio}
+  - {first: Denise, last: Dipersio}
+dennis-mehay:
+  names:
+  - {first: Dennis, last: Mehay}
+  - {first: Dennis Nolan, last: Mehay}
+dennis-ryan-storoshenko:
+  names:
+  - {first: Dennis Ryan, last: Storoshenko}
+  - {first: Dennis R., last: Storoshenko}
+deok-bong-kim:
+  names:
+  - {first: Deok-bong, last: Kim}
+  - {first: Deok-Bong, last: Kim}
+derek-long:
+  names:
+  - {first: Derek, last: Long}
+  - {first: Derek P., last: Long}
+derry-tanti-wijaya:
+  names:
+  - {first: Derry Tanti, last: Wijaya}
+  - {first: Derry, last: Wijaya}
+dessi-puji-lestari:
+  names:
+  - {first: Dessi Puji, last: Lestari}
+  - {first: Dessi, last: Lestari}
+detmar-meurers:
+  names:
+  - {first: Detmar, last: Meurers}
+  - {first: W. Detmar, last: Meurers}
+  - {first: Walt Detmar, last: Meurers}
+devadath-v-v:
+  names:
+  - {first: Devadath, last: V V}
+  - {first: Devadath V, last: V}
+deyi-xiong:
+  names:
+  - {first: Deyi, last: Xiong}
+  - {first: De-Yi, last: Xiong}
+diana-binnenpoorte:
+  names:
+  - {first: Diana, last: Binnenpoorte}
+  - {first: D., last: Binnenpoorte}
+diana-inkpen:
+  names:
+  - {first: Diana, last: Inkpen}
+  - {first: Diana Zaiu, last: Inkpen}
+  - {first: Diana, last: Zaiu}
+diana-mccarthy:
+  names:
+  - {first: Diana, last: McCarthy}
+  - {first: Diana F., last: McCarthy}
+diana-nicoleta-popa:
+  names:
+  - {first: Diana Nicoleta, last: Popa}
+  - {first: Diana, last: Popa}
+diana-trandabat:
+  names:
+  - {first: Diana, last: Trandabat}
+  - {first: Diana, last: Trandabăț}
+  - {first: Diana, last: Trandabăţ}
+  - {first: Diana Marie, last: Trandabăţ}
+diane-litman:
+  names:
+  - {first: Diane, last: Litman}
+  - {first: Diane J., last: Litman}
+dianne-p-oleary:
+  names:
+  - {first: Dianne P., last: O’Leary}
+  - {first: Dianne, last: O’Leary}
+didzis-gosko:
+  names:
+  - {first: Didzis, last: Gosko}
+  - {first: Didzis, last: Goško}
+diego-a-burgos:
+  names:
+  - {first: Diego A., last: Burgos}
+  - {first: Diego, last: Burgos}
+diego-calvanese:
+  names:
+  - {first: Diego, last: Calvanese}
+  - {first: D., last: Calvanese}
+diego-fernandez-slezak:
+  names:
+  - {first: Diego, last: Fernandez Slezak}
+  - {first: Diego, last: Fernández Slezak}
+diego-molla:
+  names:
+  - {first: Diego, last: Molla}
+  - {first: Diego, last: Mollá-Aliod}
+  - {first: Diego, last: Mollá Aliod}
+  - {first: Diego, last: Molla-Aliod}
+  - {first: Diego, last: Mollá}
+diego-raphael-amancio:
+  names:
+  - {first: Diego Raphael, last: Amancio}
+  - {first: Diego, last: Amancio}
+dieter-metzing:
+  names:
+  - {first: Dieter, last: Metzing}
+  - {first: D., last: Metzing}
+dieter-van-uytvanck:
+  names:
+  - {first: Dieter, last: van Uytvanck}
+  - {first: Dieter, last: Van Uytvanck}
+dietmar-rosner:
+  names:
+  - {first: Dietmar, last: Rösner}
+  - {first: Dietmar, last: Rosner}
+  - {first: Dietmar F., last: Roesner}
+  - {first: Dietmar, last: Roesner}
+  - {first: D., last: Roesner}
+dietrich-rebholz-schuhmann:
+  names:
+  - {first: Dietrich, last: Rebholz Schuhmann}
+  - {first: Dietrich, last: Rebholz-Schuhmann}
+dilara-torunoglu-selamet:
+  names:
+  - {first: Dilara, last: Torunoğlu-Selamet}
+  - {first: Dilara, last: Torunoǧlu}
+dilek-hakkani-tur:
+  names:
+  - {first: Dilek, last: Hakkani-Tur}
+  - {first: Dilek, last: Hakkani-Tür}
+  - {first: Dilek Zeynep, last: Hakkani}
+  - {first: D., last: Hakkani-Tur}
+diman-karagyozov:
+  names:
+  - {first: Diman, last: Karagyozov}
+  - {first: Diman, last: Karagiozov}
+dimitrios-bilidas:
+  names:
+  - {first: Dimitrios, last: Bilidas}
+  - {first: Dimitris, last: Bilidas}
+dimitrios-galanis:
+  names:
+  - {first: Dimitrios, last: Galanis}
+  - {first: Dimitris, last: Galanis}
+dimitris-christodoulakis:
+  names:
+  - {first: Dimitris, last: Christodoulakis}
+  - {first: Dimitris N., last: Christodoulakis}
+dinh-dien:
+  names:
+  - {first: Dinh, last: Dien}
+  - {first: Dien, last: Dinh}
+diogo-gloria-silva:
+  names:
+  - {first: Diogo, last: Glória-Silva}
+  - {first: Diogo F. C., last: Silva}
+  orcid: 0000-0002-4420-7455
+dipendra-misra:
+  names:
+  - {first: Dipendra, last: Misra}
+  - {first: Dipendra Kumar, last: Misra}
+dipti-misra-sharma:
+  names:
+  - {first: Dipti Misra, last: Sharma}
+  - {first: Dipti, last: Misra Sharma}
+  - {first: Dipti, last: Sharma}
+  - {first: Dipti M., last: Sharma}
+  - {first: Dipti, last: Misra}
+  - {first: Dipti M, last: Sharma}
+dirk-von-gruenigen:
+  names:
+  - {first: Dirk, last: Von Gruenigen}
+  - {first: Dirk, last: von Grünigen}
+djame-seddah:
+  names:
+  - {first: Djamé, last: Seddah}
+  - {first: Djame, last: Seddah}
+djamel-mostefa:
+  names:
+  - {first: Djamel, last: Mostefa}
+  - {first: D., last: Mostefa}
+do-dat-tran:
+  names:
+  - {first: Do-Dat, last: Tran}
+  - {first: Do Dat, last: Tran}
+dogan-can:
+  names:
+  - {first: Doğan, last: Can}
+  - {first: Dogan, last: Can}
+doina-tatar:
+  names:
+  - {first: Doina, last: Tatar}
+  - {first: Doina, last: Tătar}
+dolors-catala:
+  names:
+  - {first: Dolors, last: Català}
+  - {first: Dolors, last: Catala}
+domenico-proietti:
+  names:
+  - {first: Domenico, last: Proietti}
+  - {first: D., last: Proietti}
+dominic-r-jones:
+  names:
+  - {first: Dominic R., last: Jones}
+  - {first: Dominic, last: Jones}
+dominique-estival:
+  names:
+  - {first: Dominique, last: Estival}
+  - {first: D, last: Estival}
+  - {first: D., last: Estival}
+dominique-vaufreydaz:
+  names:
+  - {first: Dominique, last: Vaufreydaz}
+  - {first: D., last: Vaufreydaz}
+donald-c-comeau:
+  names:
+  - {first: Donald C., last: Comeau}
+  - {first: Don, last: Comeau}
+  - {first: Donald, last: Comeau}
+  - {first: Donald C, last: Comeau}
+donald-hindle:
+  names:
+  - {first: Donald, last: Hindle}
+  - {first: Don, last: Hindle}
+  - {first: D., last: Hindle}
+dong-cheng-hu:
+  names:
+  - {first: Dong Cheng, last: Hu}
+  - {first: Dong-Cheng, last: Hu}
+dong-il-kim:
+  names:
+  - {first: Dong-Il, last: Kim}
+  - {first: Dong-il, last: Kim}
+dong-yang:
+  names:
+  - {first: Dong, last: Yang}
+  - {first: D., last: Yang}
+dongfeng-cai:
+  names:
+  - {first: Dongfeng, last: Cai}
+  - {first: DongFeng, last: Cai}
+donghong-ji:
+  names:
+  - {first: Donghong, last: Ji}
+  - {first: DongHong, last: Ji}
+  - {first: Dong-Hong, last: Ji}
+  - {first: Dong Hong, last: Ji}
+donghun-lee-kb:
+  comment: Kakao Brain
+  names:
+  - {first: Donghun, last: Lee}
+donghun-lee-ku:
+  comment: Korea University
+  names:
+  - {first: Donghun, last: Lee}
+dongsheng-li-fudan:
+  comment: Fudan
+  disable_name_matching: true
+  names:
+  - {first: Dongsheng, last: Li}
+  orcid: 0000-0003-3103-8442
+donia-scott:
+  names:
+  - {first: Donia, last: Scott}
+  - {first: Donia R., last: Scott}
+  - {first: D, last: Scott}
+donna-byron:
+  names:
+  - {first: Donna, last: Byron}
+  - {first: Donna K., last: Byron}
+  - {first: D., last: Byron}
+donna-gates:
+  names:
+  - {first: Donna, last: Gates}
+  - {first: Donna M., last: Gates}
+doroteo-t-toledano:
+  names:
+  - {first: Doroteo T., last: Toledano}
+  - {first: Doroteo Torre, last: Toledano}
+  - {first: Doroteo, last: Toledano}
+dorte-haltrup-hansen:
+  names:
+  - {first: Dorte Haltrup, last: Hansen}
+  - {first: Dorte H., last: Hansen}
+douglas-appelt:
+  names:
+  - {first: Douglas, last: Appelt}
+  - {first: Douglas E., last: Appelt}
+  - {first: Doug, last: Appelt}
+douglas-b-moran:
+  names:
+  - {first: Douglas B., last: Moran}
+  - {first: Douglas, last: Moran}
+  - {first: Doug, last: Moran}
+douglas-jones:
+  names:
+  - {first: Douglas, last: Jones}
+  - {first: Douglas A., last: Jones}
+  - {first: Doug, last: Jones}
+douglas-mckee:
+  names:
+  - {first: Douglas, last: McKee}
+  - {first: Doug, last: McKee}
+douglas-oshaughnessy:
+  names:
+  - {first: Douglas, last: O’Shaughnessy}
+  - {first: Douglas D., last: O’Shaughnessy}
+  - {first: D., last: O’Shaughnessy}
+douglas-w-oard:
+  names:
+  - {first: Douglas W., last: Oard}
+  - {first: Douglas, last: Oard}
+  - {first: Doug, last: Oard}
+douglass-cutting:
+  names:
+  - {first: Douglass, last: Cutting}
+  - {first: Doug, last: Cutting}
+dragomir-radev:
+  names:
+  - {first: Dragomir, last: Radev}
+  - {first: Dragomir R., last: Radev}
+dragos-stefan-munteanu:
+  names:
+  - {first: Dragos Stefan, last: Munteanu}
+  - {first: Dragos, last: Munteanu}
+drahomira-johanka-spoustova:
+  names:
+  - {first: Drahomíra “johanka”, last: Spoustová}
+  - {first: Johanka, last: Spoustová}
+  - {first: Drahomíra „johanka“, last: Spoustová}
+duane-e-olawsky:
+  names:
+  - {first: Duane E., last: Olawsky}
+  - {first: Duane, last: Olawsky}
+duc-vu-tran:
+  names:
+  - {first: Duc-Vu, last: Tran}
+  - {first: Vu Duc, last: Tran}
+dusan-varis:
+  names:
+  - {first: Dusan, last: Varis}
+  - {first: Dušan, last: Variš}
+dustin-hillard:
+  names:
+  - {first: Dustin, last: Hillard}
+  - {first: D., last: Hillard}
+e-dario-gutierrez:
+  names:
+  - {first: E. Dario, last: Gutierrez}
+  - {first: Elkin, last: Darío Gutiérrez}
+  - {first: E. Darío, last: Gutiérrez}
+e-gabriela-garza:
+  names:
+  - {first: E. Gabriela, last: Garza}
+  - {first: Gabriela, last: Garza}
+eckhard-bick:
+  names:
+  - {first: Eckhard, last: Bick}
+  - {first: E., last: Bick}
+eddie-antonio-santos:
+  names:
+  - {first: Eddie Antonio, last: Santos}
+  - {first: Eddie A., last: Santos}
+  - {first: Eddie, last: Antonio Santos}
+  - {first: Eddie, last: Santos}
+  orcid: 0000-0001-5337-715X
+edgar-gonzalez-pellicer:
+  names:
+  - {first: Edgar, last: Gonzàlez Pellicer}
+  - {first: Edgar, last: Gonzàlez}
+edmund-grimley-evans:
+  names:
+  - {first: Edmund Grimley, last: Evans}
+  - {first: Edmund, last: Grimley-Evans}
+edmund-yu:
+  names:
+  - {first: Edmund, last: Yu}
+  - {first: Edmund S., last: Yu}
+edouard-geoffrois:
+  names:
+  - {first: Edouard, last: Geoffrois}
+  - {first: E., last: Geoffrois}
+edouard-grave:
+  names:
+  - {first: Édouard, last: Grave}
+  - {first: Edouard, last: Grave}
+eduard-hovy:
+  names:
+  - {first: Eduard, last: Hovy}
+  - {first: Eduard H., last: Hovy}
+  - {first: Ed, last: Hovy}
+eduardo-lleida:
+  names:
+  - {first: Eduardo, last: Lleida}
+  - {first: Eduardo, last: LLeida}
+eduardo-r-banga:
+  names:
+  - {first: Eduardo R., last: Banga}
+  - {first: Eduardo, last: R. Banga}
+  - {first: Eduardo Rodríguez, last: Banga}
+edward-collins:
+  names:
+  - {first: Edward, last: Collins}
+  - {first: Ed, last: Collins}
+edward-grefenstette:
+  names:
+  - {first: Edward, last: Grefenstette}
+  - {first: E., last: Grefenstette}
+edward-hoenkamp:
+  names:
+  - {first: Edward, last: Hoenkamp}
+  - {first: Eduard, last: Hoenkamp}
+edward-schofield:
+  names:
+  - {first: Edward, last: Schofield}
+  - {first: Ed, last: Schofield}
+edward-stabler:
+  names:
+  - {first: Edward, last: Stabler}
+  - {first: Edward P., last: 'Stabler, Jr.'}
+  - {first: Edward P., last: Stabler}
+edward-w-d-whittaker:
+  names:
+  - {first: Edward W. D., last: Whittaker}
+  - {first: E.W.D., last: Whittaker}
+edward-y-chang:
+  names:
+  - {first: Edward Y., last: Chang}
+  - {first: Edward, last: Chang}
+efstathios-stamatatos:
+  names:
+  - {first: Efstathios, last: Stamatatos}
+  - {first: E., last: Stamatatos}
+efsun-sarioglu-kayi:
+  names:
+  - {first: Efsun, last: Sarioglu Kayi}
+  - {first: Efsun, last: Sarioglu}
+egidio-giachin:
+  names:
+  - {first: Egidio, last: Giachin}
+  - {first: E., last: Giachin}
+egidio-l-terra:
+  names:
+  - {first: Egidio L., last: Terra}
+  - {first: Egidio, last: Terra}
+egon-stemle:
+  names:
+  - {first: Egon, last: Stemle}
+  - {first: Egon W., last: Stemle}
+ehsan-mohammady-ardehaly:
+  names:
+  - {first: Ehsan, last: Mohammady Ardehaly}
+  - {first: Ehsan, last: Mohammady}
+eiichiro-sumita:
+  names:
+  - {first: Eiichiro, last: Sumita}
+  - {first: Eiichro, last: Sumita}
+eileen-fitzpatrick:
+  names:
+  - {first: Eileen, last: Fitzpatrick}
+  - {first: E., last: Fitzpatrick}
+eirikur-rognvaldsson:
+  names:
+  - {first: Eirikur, last: Rögnvaldsson}
+  - {first: Eiríkur, last: Rögnvaldsson}
+ekaterina-v-rakhilina:
+  names:
+  - {first: Ekaterina V., last: Rakhilina}
+  - {first: Ekaterina, last: Rakhilina}
+elaine-omahony:
+  names:
+  - {first: Elaine, last: O′Mahony}
+  - {first: Elaine, last: O’Mahony}
+elaine-rich:
+  names:
+  - {first: Elaine, last: Rich}
+  - {first: Elaine A., last: Rich}
+elaine-ui-dhonnchadha:
+  names:
+  - {first: Elaine, last: Uí Dhonnchadha}
+  - {first: E., last: Uí Dhonnchadha}
+eldon-g-lytle:
+  names:
+  - {first: Eldon G., last: Lytle}
+  - {first: Eldon G., last: Lytel}
+elena-paskaleva:
+  names:
+  - {first: Elena, last: Paskaleva}
+  - {first: Elena, last: Pascaleva}
+eleni-koutsogeorgos:
+  names:
+  - {first: Eleni, last: Koutsogeorgos}
+  - {first: E., last: Koutsogeorgos}
+eli-goldberg:
+  names:
+  - {first: Eli, last: Goldberg}
+  - {first: E., last: Goldberg}
+elina-desipri:
+  names:
+  - {first: Elina, last: Desipri}
+  - {first: Elina, last: Desypri}
+  - {first: E., last: Desipri}
+elisa-bertino:
+  names:
+  - {first: Elisa, last: Bertino}
+  - {first: E., last: Bertino}
+elisabet-comelles:
+  names:
+  - {first: Elisabet, last: Comelles}
+  - {first: E., last: Comelles}
+elisabeth-ahlsen:
+  names:
+  - {first: Elisabeth, last: Ahlsén}
+  - {first: Elisabeth, last: Ahlsen}
+elisabeth-andre:
+  names:
+  - {first: Elisabeth, last: Andre}
+  - {first: Elisabeth, last: André}
+elisabeth-delais-roussarie:
+  names:
+  - {first: Elisabeth, last: Delais-Roussarie}
+  - {first: Élisabeth, last: Delais-Roussarie}
+elisabeth-dhalleweyn:
+  names:
+  - {first: Elisabeth, last: D’Halleweyn}
+  - {first: Elizabeth, last: D’Halleweyn}
+elisabeth-frasnelli:
+  names:
+  - {first: Elisabeth, last: Frasnelli}
+  - {first: E., last: Frasnelli}
+elisabeth-maier:
+  names:
+  - {first: Elisabeth, last: Maier}
+  - {first: Elisabeth, last: Mager}
+elizabeth-a-hinkelman:
+  names:
+  - {first: Elizabeth A., last: Hinkelman}
+  - {first: Elizabeth, last: Hinkelman}
+elizabeth-bishop:
+  names:
+  - {first: Elizabeth, last: Bishop}
+  - {first: E., last: Bishop}
+elizabeth-c-botha:
+  names:
+  - {first: Elizabeth C., last: Botha}
+  - {first: E.C., last: Botha}
+elizabeth-d-liddy:
+  names:
+  - {first: Elizabeth D., last: Liddy}
+  - {first: Elizabeth, last: Liddy}
+elizabeth-merkhofer:
+  names:
+  - {first: Elizabeth, last: Merkhofer}
+  - {first: Elizabeth M., last: Merkhofer}
+elizabeth-schroeder:
+  names:
+  - {first: Elizabeth, last: Schroeder}
+  - {first: Elizabeth Schroeder, last: Richerson}
+  - {first: Elizabeth, last: Richerson}
+elizabeth-shriberg:
+  names:
+  - {first: Elizabeth, last: Shriberg}
+  - {first: E., last: Shriberg}
+elizaveta-loginova-clouet:
+  names:
+  - {first: Elizaveta, last: Loginova-Clouet}
+  - {first: Elizaveta, last: Clouet}
+ellen-campana:
+  names:
+  - {first: Ellen, last: Campana}
+  - {first: E., last: Campana}
+ellen-douglas-cowie:
+  names:
+  - {first: Ellen, last: Douglas-Cowie}
+  - {first: E., last: Douglas-Cowie}
+ellen-k-dodge:
+  names:
+  - {first: Ellen K., last: Dodge}
+  - {first: Ellen, last: Dodge}
+ellen-m-voorhees:
+  names:
+  - {first: Ellen M., last: Voorhees}
+  - {first: Ellen, last: Voorhees}
+ellen-riloff:
+  names:
+  - {first: Ellen, last: Riloff}
+  - {first: E., last: Riloff}
+elliott-franco-drabek:
+  names:
+  - {first: Elliott Franco, last: Drabek}
+  - {first: Elliott, last: Drabek}
+  - {first: Elliott, last: Drábek}
+elliott-moreton:
+  names:
+  - {first: Elliott, last: Moreton}
+  - {first: Elliot, last: Moreton}
+elmar-noth:
+  names:
+  - {first: Elmar, last: Nöth}
+  - {first: Elmar, last: Noth}
+  - {first: E., last: Nöth}
+elvira-i-sicilia-garcia:
+  names:
+  - {first: Elvira I., last: Sicilia-Garcia}
+  - {first: E.I., last: Sicilia-Garcia}
+  - {first: E. I., last: Sicilia-Garcia}
+ely-edison-da-silva-matos:
+  names:
+  - {first: Ely Edison da Silva, last: Matos}
+  - {first: Ely, last: Matos}
+  - {first: Ely E. S., last: Matos}
+emanuela-boros:
+  names:
+  - {first: Emanuela, last: Boroş}
+  - {first: Emanuela, last: Boroș}
+  - {first: Emanuela, last: Boros}
+emanuele-pianta:
+  names:
+  - {first: Emanuele, last: Pianta}
+  - {first: E., last: Pianta}
+emerson-cabrera-paraiso:
+  names:
+  - {first: Emerson Cabrera, last: Paraiso}
+  - {first: Emerson, last: Paraiso}
+emiel-krahmer:
+  names:
+  - {first: Emiel, last: Krahmer}
+  - {first: Emiel J., last: Krahmer}
+emiliano-giovannetti:
+  names:
+  - {first: Emiliano, last: Giovannetti}
+  - {first: Emiliano, last: Giovanetti}
+emiliano-raul-guevara:
+  names:
+  - {first: Emiliano Raul, last: Guevara}
+  - {first: Emiliano, last: Guevara}
+emilio-sanchis:
+  names:
+  - {first: Emilio, last: Sanchis}
+  - {first: Emilio, last: Sanchís}
+emily-m-bender:
+  names:
+  - {first: Emily M., last: Bender}
+  - {first: Emily, last: Bender}
+emily-prudhommeaux:
+  names:
+  - {first: Emily, last: Prud’hommeaux}
+  - {first: Emily T., last: Prud’hommeaux}
+  - {first: Emily, last: Prud'hommeaux}
+emina-kurtic:
+  names:
+  - {first: Emina, last: Kurtić}
+  - {first: Emina, last: Kurtic}
+eneida-a-mendonca:
+  names:
+  - {first: Eneida A., last: Mendonca}
+  - {first: Eneida, last: Mendonca}
+eneko-agirre:
+  names:
+  - {first: Eneko, last: Agirre}
+  - {first: E., last: Agirre}
+eneldo-loza-mencia:
+  names:
+  - {first: Eneldo, last: Loza Mencía}
+  - {first: Eneldo Loza, last: Mencía}
+enrico-zovato:
+  names:
+  - {first: Enrico, last: Zovato}
+  - {first: E., last: Zovato}
+enrique-amigo:
+  names:
+  - {first: Enrique, last: Amigó}
+  - {first: Enrique, last: Amigo}
+enrique-henestroza-anguiano:
+  names:
+  - {first: Enrique, last: Henestroza Anguiano}
+  - {first: Enrique Henestroza, last: Anguiano}
+enrique-manjavacas:
+  names:
+  - {first: Enrique, last: Manjavacas}
+  - {first: Enrique, last: Manjavacas Arevalo}
+enrique-vidal:
+  names:
+  - {first: Enrique, last: Vidal}
+  - {first: E., last: Vidal}
+eraldo-fernandes:
+  names:
+  - {first: Eraldo, last: Fernandes}
+  - {first: Eraldo Rezende, last: Fernandes}
+ergina-kavallieratou:
+  names:
+  - {first: Ergina, last: Kavallieratou}
+  - {first: E., last: Kavallieratou}
+ergun-bicici:
+  names:
+  - {first: Ergun, last: Bicici}
+  - {first: Ergun, last: Biçici}
+erhard-hinrichs:
+  names:
+  - {first: Erhard, last: Hinrichs}
+  - {first: Erhard W., last: Hinrichs}
+eric-atwell:
+  names:
+  - {first: Eric, last: Atwell}
+  - {first: Eric Steven, last: Atwell}
+  - {first: Eric S., last: Atwell}
+eric-bilinski:
+  names:
+  - {first: Eric, last: Bilinski}
+  - {first: Éric, last: Bilinski}
+eric-breck:
+  names:
+  - {first: Eric, last: Breck}
+  - {first: Eric J., last: Breck}
+eric-castelli:
+  names:
+  - {first: Eric, last: Castelli’}
+  - {first: Eric, last: Castelli}
+eric-charton:
+  names:
+  - {first: Eric, last: Charton}
+  - {first: Éric, last: Charton}
+eric-fosler-lussier:
+  names:
+  - {first: Eric, last: Fosler-Lussier}
+  - {first: J. Eric, last: Fosler}
+  - {first: Eric, last: Fosler}
+eric-gaussier:
+  names:
+  - {first: Eric, last: Gaussier}
+  - {first: Éric, last: Gaussier}
+eric-h-huang:
+  names:
+  - {first: Eric H., last: Huang}
+  - {first: Eric, last: Huang}
+eric-jackson:
+  names:
+  - {first: Eric, last: Jackson}
+  - {first: E., last: Jackson}
+eric-laporte:
+  names:
+  - {first: Eric, last: Laporte}
+  - {first: Éric, last: Laporte}
+eric-nyberg:
+  names:
+  - {first: Eric, last: Nyberg}
+  - {first: Eric H., last: Nyberg III}
+  - {first: Eric H., last: Nyberg}
+  - {first: Eric H., last: 'Nyberg, 3rd'}
+eric-ringger:
+  names:
+  - {first: Eric, last: Ringger}
+  - {first: Eric K., last: Ringger}
+eric-sadit-tellez:
+  names:
+  - {first: Eric Sadit, last: Tellez}
+  - {first: Eric S., last: Tellez}
+eric-sven-ristad:
+  names:
+  - {first: Eric Sven, last: Ristad}
+  - {first: Eric, last: Ristad}
+eric-villemonte-de-la-clergerie:
+  names:
+  - {first: Éric, last: Villemonte de la Clergerie}
+  - {first: Eric, last: Villemonte de la Clergerie}
+  - {first: Eric, last: de la Clergerie}
+  - {first: Eric, last: de La Clergerie}
+  - {first: Éric, last: de La Clergerie}
+  - {first: Éric, last: de la Clergerie}
+  - {first: Éric, last: Villemonte de La Clergerie}
+eric-wehrli:
+  names:
+  - {first: Eric, last: Wehrli}
+  - {first: Éric, last: Wehrli}
+eric-xing:
+  names:
+  - {first: Eric, last: Xing}
+  - {first: Eric P., last: Xing}
+erich-h-steiner:
+  names:
+  - {first: Erich H., last: Steiner}
+  - {first: Erich, last: Steiner}
+erick-alphonse:
+  names:
+  - {first: Erick, last: Alphonse}
+  - {first: E., last: Alphonse}
+erick-fonseca:
+  names:
+  - {first: Erick, last: Fonseca}
+  - {first: Erick Rocha, last: Fonseca}
+  - {first: Erick R., last: Fonseca}
+erick-galani-maziero:
+  names:
+  - {first: Erick Galani, last: Maziero}
+  - {first: Erick, last: Maziero}
+erik-baert:
+  names:
+  - {first: Erik, last: Baert}
+  - {first: E., last: Baert}
+erik-tjong-kim-sang:
+  names:
+  - {first: Erik, last: Tjong Kim Sang}
+  - {first: Erik F., last: Tjong Kim Sang}
+erik-van-der-goot:
+  names:
+  - {first: Erik, last: van der Goot}
+  - {first: Erik, last: Van der Goot}
+erik-van-mulligen:
+  names:
+  - {first: Erik, last: van Mulligen}
+  - {first: Erik M., last: van Mulligen}
+ernst-buchberger:
+  names:
+  - {first: Ernst, last: Buchberger}
+  - {first: E., last: Buchberger}
+ertugrul-yilmaz:
+  names:
+  - {first: Ertugrul, last: Yılmaz}
+  - {first: Ertuğrul, last: Yilmaz}
+  - {first: Ertuǧrul, last: Yılmaz}
+esa-nelimarkka:
+  names:
+  - {first: Esa, last: Nelimarkka}
+  - {first: E., last: Nelimarkka}
+esmeralda-manandise:
+  names:
+  - {first: Esmeralda, last: Manandise}
+  - {first: Esme, last: Manandise}
+esref-adali:
+  names:
+  - {first: Esref, last: Adali}
+  - {first: Eşref, last: Adalı}
+  - {first: Eşref, last: Adali}
+estela-saquete:
+  names:
+  - {first: Estela, last: Saquete}
+  - {first: Estela, last: Saquete Boro}
+  - {first: E., last: Saquete}
+estevam-r-hruschka-jr:
+  names:
+  - {first: Estevam R., last: 'Hruschka, Jr.'}
+  - {first: Estevam R., last: Hruschka Jr.}
+esther-klabbers:
+  names:
+  - {first: Esther, last: Klabbers}
+  - {first: E., last: Klabbers}
+esther-ratsch:
+  names:
+  - {first: Esther, last: Ratsch}
+  - {first: E., last: Ratsch}
+ethan-selfridge:
+  names:
+  - {first: Ethan, last: Selfridge}
+  - {first: Ethan O., last: Selfridge}
+eugenio-martinez-camara:
+  names:
+  - {first: Eugenio, last: Martínez-Cámara}
+  - {first: Eugenio, last: Martinez Camara}
+eun-kyung-kim:
+  names:
+  - {first: Eun-kyung, last: Kim}
+  - {first: Eun-Kyung, last: Kim}
+eun-suk-yang:
+  names:
+  - {first: Eun-Suk, last: Yang}
+  - {first: Eunsuk, last: Yang}
+eun-young-ha:
+  names:
+  - {first: Eun Young, last: Ha}
+  - {first: Eun, last: Ha}
+  - {first: Eun Y., last: Ha}
+eva-ejerhed:
+  names:
+  - {first: Eva, last: Ejerhed}
+  - {first: Eva I., last: Ejerhed}
+eva-fucikova:
+  names:
+  - {first: Eva, last: Fucikova}
+  - {first: Eva, last: Fučíková}
+eva-hajicova:
+  names:
+  - {first: Eva, last: Hajicova}
+  - {first: Eva, last: Hajicová}
+  - {first: Eva, last: Hajičová}
+  - {first: E., last: Hajicova}
+eva-maria-vecchi:
+  names:
+  - {first: Eva Maria, last: Vecchi}
+  - {first: Eva, last: Vecchi}
+eva-martinez-garcia:
+  names:
+  - {first: Eva, last: Martínez Garcia}
+  - {first: Eva Martínez, last: Garcia}
+eva-navas:
+  names:
+  - {first: Eva, last: Navas}
+  - {first: E., last: Navas}
+evandro-b-fonseca:
+  names:
+  - {first: Evandro B., last: Fonseca}
+  - {first: Evandro, last: Fonseca}
+evelyne-tzoukermann:
+  names:
+  - {first: Evelyne, last: Tzoukermann}
+  - {first: E., last: Tzoukermann}
+evgeny-stepanov:
+  names:
+  - {first: Evgeny, last: Stepanov}
+  - {first: Evgeny A., last: Stepanov}
+ezra-black:
+  names:
+  - {first: Ezra, last: Black}
+  - {first: Ezra W., last: Black}
+  - {first: E., last: Black}
+f-y-august-chao:
+  names:
+  - {first: F. Y. August, last: Chao}
+  - {first: F.Y. August, last: Chao}
+fabian-suchanek:
+  names:
+  - {first: Fabian, last: Suchanek}
+  - {first: Fabian M., last: Suchanek}
+fabien-cromieres:
+  names:
+  - {first: Fabien, last: Cromieres}
+  - {first: Fabien, last: Cromières}
+fabio-a-gonzalez:
+  names:
+  - {first: Fabio A., last: González}
+  - {first: Fabio, last: González}
+fabio-ciravegna:
+  names:
+  - {first: Fabio, last: Ciravegna}
+  - {first: F., last: Ciravegna}
+fabio-kepler:
+  names:
+  - {first: Fabio, last: Kepler}
+  - {first: Fabio N., last: Kepler}
+  - {first: Fabio Natanael, last: Kepler}
+  - {first: F., last: Kepler}
+fabio-massimo-zanzotto:
+  names:
+  - {first: Fabio Massimo, last: Zanzotto}
+  - {first: Fabio, last: Massimo Zanzotto}
+  - {first: Fabio, last: Zanzotto}
+  - {first: F., last: Zanzotto}
+fabio-pianesi:
+  names:
+  - {first: Fabio, last: Pianesi}
+  - {first: F., last: Pianesi}
+fabio-tamburini:
+  names:
+  - {first: Fabio, last: Tamburini}
+  - {first: F., last: Tamburini}
+fabrice-lefevre:
+  names:
+  - {first: Fabrice, last: Lefèvre}
+  - {first: Fabrice, last: Lefevre}
+  - {first: F., last: Lefevre}
+fahad-khan:
+  names:
+  - {first: Fahad, last: Khan}
+  - {first: Anas Fahad, last: Khan}
+faisal-ahmad:
+  names:
+  - {first: Faisal, last: Ahmad}
+  - {first: Faisal, last: Ahmed}
+faiza-elkateb-gara:
+  names:
+  - {first: Faiza, last: Elkateb-Gara}
+  - {first: Faiza, last: Gara}
+  - {first: Faïza, last: Elkateb-Gara}
+fang-fang-zhang:
+  names:
+  - {first: Fang-Fang, last: Zhang}
+  - {first: Fangfang, last: Zhang}
+fang-zheng:
+  names:
+  - {first: Fang, last: Zheng}
+  - {first: Thomas Fang, last: Zheng}
+fangfang-feng:
+  names:
+  - {first: Fangfang, last: Feng}
+  - {first: F., last: Feng}
+farah-benamara:
+  names:
+  - {first: Farah, last: Benamara}
+  - {first: Farah, last: Beanamara}
+  - {first: Farah, last: Benamara Zitoune}
+fatemeh-torabi-asr:
+  names:
+  - {first: Fatemeh, last: Torabi Asr}
+  - {first: Fatemeh Torabi, last: Asr}
+federico-prat:
+  names:
+  - {first: Federico, last: Prat}
+  - {first: F., last: Prat}
+fei-liu-gga:
+  comment: Google Assistant
+  names:
+  - {first: Fei, last: Liu}
+fei-liu-unimelb:
+  comment: University of Melbourne
+  names:
+  - {first: Fei, last: Liu}
+fei-liu-utdallas:
+  comment: UT Dallas, Bosch, CMU, University of Central Florida, Emory University
+  names:
+  - {first: Fei, last: Liu}
+felicia-koerner:
+  names:
+  - {first: Felicia, last: Körner}
+  - {first: Felicia, last: Koerner}
+felix-dreizin:
+  names:
+  - {first: Felix, last: Dreizin}
+  - {first: F., last: Dreizin}
+feng-ju-lo:
+  names:
+  - {first: Feng-Ju, last: Lo}
+  - {first: Fengju, last: Lo}
+feng-long-huang:
+  names:
+  - {first: Feng-Long, last: Huang}
+  - {first: Feng-Long, last: Hwang}
+feng-yi-chen:
+  names:
+  - {first: Feng-Yi, last: Chen}
+  - {first: Feng-yi, last: Chen}
+ferhan-ture:
+  names:
+  - {first: Ferhan, last: Türe}
+  - {first: Ferhan, last: Ture}
+fernando-c-n-pereira:
+  names:
+  - {first: Fernando C. N., last: Pereira}
+  - {first: Fernando C.N., last: Pereira}
+fernando-fernandez-martinez:
+  names:
+  - {first: Fernando, last: Fernández-Martínez}
+  - {first: Fernando Fernández, last: Martínez}
+fernando-garcia:
+  names:
+  - {first: Fernando, last: Garcia}
+  - {first: Fernando, last: García-Granada}
+  - {first: Fernando, last: García}
+fernando-llopis:
+  names:
+  - {first: Fernando, last: Llopis}
+  - {first: Fernando, last: LLopis}
+fernando-martinez-santiago:
+  names:
+  - {first: Fernando, last: Martínez-Santiago}
+  - {first: Fernando, last: Martínez Santiago}
+fernando-martins:
+  names:
+  - {first: Fernando, last: Martins}
+  - {first: F., last: Martins}
+fernando-perdigao:
+  names:
+  - {first: Fernando, last: Perdigão}
+  - {first: Fernando S., last: Perdigão}
+fernando-sanchez-leon:
+  names:
+  - {first: Fernando Sánchez, last: León}
+  - {first: Fernando, last: Sánchez}
+fiammetta-namer:
+  names:
+  - {first: Fiammetta, last: Namer}
+  - {first: Fiametta, last: Namer}
+fidelia-ibekwe-sanjuan:
+  names:
+  - {first: Fidelia, last: Ibekwe-SanJuan}
+  - {first: Fidelia, last: Ibekwe-Sanjuan}
+fil-alleva:
+  names:
+  - {first: Fil, last: Alleva}
+  - {first: Fileno, last: Alleva}
+  - {first: F., last: Alleva}
+filip-gralinski:
+  names:
+  - {first: Filip, last: Gralinski}
+  - {first: Filip, last: Graliński}
+filip-jurcicek:
+  names:
+  - {first: Filip, last: Jurcicek}
+  - {first: Filip, last: Jurčíček}
+finley-lacatusu:
+  names:
+  - {first: Finley, last: Lacatusu}
+  - {first: V. Finley, last: Lacatusu}
+fintan-j-costello:
+  names:
+  - {first: Fintan J., last: Costello}
+  - {first: Fintan, last: Costello}
+flavio-massimiliano-cecchini:
+  names:
+  - {first: Flavio, last: Massimiliano Cecchini}
+  - {first: Flavio Massimiliano, last: Cecchini}
+flora-yu-fang-wang:
+  names:
+  - {first: Flora Yu-Fang, last: Wang}
+  - {first: Yu-Fang, last: Wang}
+florence-reeder:
+  names:
+  - {first: Florence, last: Reeder}
+  - {first: Florence M., last: Reeder}
+florin-bulgarov:
+  names:
+  - {first: Florin, last: Bulgarov}
+  - {first: Florin Adrian, last: Bulgarov}
+folkert-de-vriend:
+  names:
+  - {first: Folkert, last: de Vriend}
+  - {first: F., last: De Vriend}
+  - {first: F., last: de Vriend}
+forrest-bao:
+  names:
+  - {first: Forrest, last: Bao}
+  - {first: Forrest Sheng, last: Bao}
+france-mihelic:
+  names:
+  - {first: France, last: Mihelic}
+  - {first: France, last: Mihelič}
+francesc-ribas:
+  names:
+  - {first: Francesc, last: Ribas}
+  - {first: Francesc Ribas, last: Framis}
+francesco-piazza:
+  names:
+  - {first: Francesco, last: Piazza}
+  - {first: F., last: Piazza}
+francine-chen:
+  names:
+  - {first: Francine, last: Chen}
+  - {first: Francine R., last: Chen}
+francis-j-smith:
+  names:
+  - {first: Francis J., last: Smith}
+  - {first: F. J., last: Smith}
+  - {first: F J, last: Smith}
+francis-kubala:
+  names:
+  - {first: Francis, last: Kubala}
+  - {first: F., last: Kubala}
+francis-tyers:
+  names:
+  - {first: Francis, last: Tyers}
+  - {first: Francis M., last: Tyers}
+francisco-campillo:
+  names:
+  - {first: Francisco, last: Campillo}
+  - {first: Francisco Campillo, last: Díaz}
+francisco-casacuberta:
+  names:
+  - {first: Francisco, last: Casacuberta}
+  - {first: F., last: Casacuberta}
+francisco-guzman:
+  names:
+  - {first: Francisco, last: Guzmán}
+  - {first: Francisco, last: Guzman}
+francisco-javier-gonzalez-castano:
+  names:
+  - {first: Francisco Javier, last: González-Castaño}
+  - {first: Francisco J., last: González-Castaño}
+francisco-m-couto:
+  names:
+  - {first: Francisco M., last: Couto}
+  - {first: Francisco, last: Couto}
+franco-turini:
+  names:
+  - {first: Franco, last: Turini}
+  - {first: F., last: Turini}
+francois-barthelemy:
+  names:
+  - {first: François, last: Barthélemy}
+  - {first: Francois, last: Barthelemy}
+francois-levy:
+  names:
+  - {first: François, last: Lévy}
+  - {first: François, last: Levy}
+francois-mairesse:
+  names:
+  - {first: François, last: Mairesse}
+  - {first: Francois, last: Mairesse}
+francois-michel-lang:
+  names:
+  - {first: Francois-Michel, last: Lang}
+  - {first: Francois M., last: Lang}
+francois-rousselot:
+  names:
+  - {first: François, last: Rousselot}
+  - {first: Francois, last: Rousselot}
+  - {first: F., last: Rousselot}
+frank-f-xu:
+  names:
+  - {first: Frank F., last: Xu}
+  - {first: Frank, last: Xu}
+frank-henrik-muller:
+  names:
+  - {first: Frank Henrik, last: Müller}
+  - {first: Frank H., last: Müller}
+  - {first: Frank, last: Müller}
+frank-k-soong:
+  names:
+  - {first: Frank K., last: Soong}
+  - {first: Frank, last: Soong}
+frank-smadja:
+  names:
+  - {first: Frank, last: Smadja}
+  - {first: Frank A., last: Smadja}
+frank-van-eynde:
+  names:
+  - {first: Frank Van, last: Eynde}
+  - {first: Frank, last: van Eynde}
+  - {first: Frank, last: Van Eynde}
+franz-guenthner:
+  names:
+  - {first: Franz, last: Guenthner}
+  - {first: F., last: Guenthner}
+franz-josef-och:
+  names:
+  - {first: Franz Josef, last: Och}
+  - {first: Franz J., last: Och}
+  - {first: Franz, last: Och}
+  - {first: F. J., last: Och}
+fred-damerau:
+  names:
+  - {first: Fred, last: Damerau}
+  - {first: Fred J., last: Damerau}
+freda-shi:
+  names:
+  - {first: Freda, last: Shi}
+  - {first: Haoyue, last: Shi}
+  orcid: 0009-0009-5697-449X
+frederic-bechet:
+  names:
+  - {first: Frederic, last: Bechet}
+  - {first: Frédéric, last: Bechét}
+  - {first: Frédéric, last: Béchet}
+  - {first: Frederic, last: Béchet}
+  - {first: F., last: Bechet}
+frederic-blain:
+  names:
+  - {first: Frédéric, last: Blain}
+  - {first: Frederic, last: Blain}
+frederic-elisei:
+  names:
+  - {first: Frédéric, last: Eliséi}
+  - {first: Frederic, last: Elisei}
+frederic-mailhot:
+  names:
+  - {first: Frederic, last: Mailhot}
+  - {first: Fred, last: Mailhot}
+  - {first: Frédéric, last: Mailhot}
+frederic-meunier:
+  names:
+  - {first: Frédéric, last: Meunier}
+  - {first: Frederic, last: Meunier}
+frederick-jelinek:
+  names:
+  - {first: Frederick, last: Jelinek}
+  - {first: Fred, last: Jelinek}
+  - {first: Fredrick, last: Jelinek}
+  - {first: F., last: Jelinek}
+frederick-m-hoyt:
+  names:
+  - {first: Frederick M., last: Hoyt}
+  - {first: Frederick, last: Hoyt}
+frederick-reiss:
+  names:
+  - {first: Frederick, last: Reiss}
+  - {first: Frederick R., last: Reiss}
+frederique-laforest:
+  names:
+  - {first: Frederique, last: Laforest}
+  - {first: Frédérique, last: Laforest}
+frederique-segond:
+  names:
+  - {first: Frédérique, last: Segond}
+  - {first: Frederique, last: Segond}
+fredy-a-amaya:
+  names:
+  - {first: Fredy A., last: Amaya}
+  - {first: F., last: Amaya}
+fumito-masui:
+  names:
+  - {first: Fumito, last: Masui}
+  - {first: F., last: Masui}
+g-bowden-wise:
+  names:
+  - {first: G. Bowden, last: Wise}
+  - {first: Bowden, last: Wise}
+g-edward-barton:
+  names:
+  - {first: G. Edward, last: Barton}
+  - {first: G. Edward, last: 'Barton, Jr.'}
+gabor-proszeky:
+  names:
+  - {first: Gabor, last: Proszeky}
+  - {first: Gábor, last: Prószéky}
+  - {first: Gabor, last: Prbszeky}
+gabriel-g-bes:
+  names:
+  - {first: Gabriel G., last: Bes}
+  - {first: Gabriel G., last: Bès}
+  - {first: Gabriel, last: Bès}
+  - {first: G.G., last: Bes}
+gabriel-lopes:
+  names:
+  - {first: Gabriel, last: Lopes}
+  - {first: Jose Gabriel P., last: Lopes}
+  - {first: Jose Gabriel, last: Lopes}
+  - {first: Gabriel P., last: Lopes}
+  - {first: José Gabriel Pereira, last: Lopes}
+  - {first: Gabriel, last: Pereira Lopes}
+  - {first: Gabriel Pereira, last: Lopes}
+gabriela-cavaglia:
+  names:
+  - {first: Gabriela, last: Cavaglià}
+  - {first: Gabriela, last: Cavaglia}
+gabriela-serban:
+  names:
+  - {first: Gabriela, last: Şerban}
+  - {first: Gabriela, last: Serban}
+gabriele-musillo:
+  names:
+  - {first: Gabriele, last: Musillo}
+  - {first: Gabriele Antonio, last: Musillo}
+gael-dias:
+  names:
+  - {first: Gaël, last: Dias}
+  - {first: Gael, last: Dias}
+  - {first: Gäel, last: Dias}
+gaelle-ferre:
+  names:
+  - {first: Gaëlle, last: Ferré}
+  - {first: Gaelle, last: Ferré}
+gail-m-keenan:
+  names:
+  - {first: Gail M., last: Keenan}
+  - {first: Gail, last: Keenan}
+  - {first: Gail M, last: Keenan}
+gaja-jarosz:
+  names:
+  - {first: Gaja, last: Jarosz}
+  - {first: Gaja E., last: Jarosz}
+gareth-j-f-jones:
+  names:
+  - {first: Gareth J. F., last: Jones}
+  - {first: Gareth J.F., last: Jones}
+gary-g-hendrix:
+  names:
+  - {first: Gary G., last: Hendrix}
+  - {first: Gary, last: Hendrix}
+gary-geunbae-lee:
+  names:
+  - {first: Gary Geunbae, last: Lee}
+  - {first: Geunbae, last: Lee}
+gary-k-k-chan:
+  names:
+  - {first: Gary K. K., last: Chan}
+  - {first: G. K. K., last: Chan}
+gaston-burek:
+  names:
+  - {first: Gaston, last: Burek}
+  - {first: Gaston G., last: Burek}
+gaurav-singh-tomar:
+  names:
+  - {first: Gaurav Singh, last: Tomar}
+  - {first: Gaurav, last: Singh}
+gauri-shankar-gautam:
+  names:
+  - {first: Gauri Shankar, last: Gautam}
+  - {first: Gauri, last: S. Gautam}
+geert-adriaens:
+  names:
+  - {first: Geert, last: Adriaens}
+  - {first: G., last: Adriaens}
+geert-jan-m-kruijff:
+  names:
+  - {first: Geert-Jan M., last: Kruijff}
+  - {first: Geert-Jan, last: Kruijff}
+gema-ramirez-sanchez:
+  names:
+  - {first: Gema, last: Ramírez-Sánchez}
+  - {first: Gema, last: Ramírez}
+gemma-bel-enguix:
+  names:
+  - {first: Gemma, last: Bel-Enguix}
+  - {first: Gemma Bel, last: Enguix}
+  - {first: Gemma, last: Bel Enguix}
+gemma-boleda:
+  names:
+  - {first: Gemma, last: Boleda}
+  - {first: Gemma, last: Boleda Torrent}
+gen-ichiro-kikui:
+  names:
+  - {first: Gen-ichiro, last: Kikui}
+  - {first: Gen’ichiro, last: Kikui}
+genevieve-gorrell:
+  names:
+  - {first: Genevieve, last: Gorrell}
+  - {first: G., last: Gorrell}
+genta-indra-winata:
+  names:
+  - {first: Genta Indra, last: Winata}
+  - {first: Genta, last: Winata}
+geoffrey-k-pullum:
+  names:
+  - {first: Geoffrey K., last: Pullum}
+  - {first: Geoffrey, last: Pullum}
+geoffrey-zweig:
+  names:
+  - {first: Geoffrey, last: Zweig}
+  - {first: Geoff, last: Zweig}
+  - {first: G., last: Zweig}
+george-aaron-broadwell:
+  names:
+  - {first: George Aaron, last: Broadwell}
+  - {first: Aaron, last: Broadwell}
+  - {first: G. Aaron, last: Broadwell}
+george-anton-kiraz:
+  names:
+  - {first: George Anton, last: Kiraz}
+  - {first: George, last: Kiraz}
+george-baker:
+  names:
+  - {first: George, last: Baker}
+  - {first: George Arthur, last: Baker}
+george-carayannis:
+  names:
+  - {first: George, last: Carayannis}
+  - {first: G., last: Carayannis}
+george-caridakis:
+  names:
+  - {first: George, last: Caridakis}
+  - {first: G., last: Caridakis}
+george-chou:
+  names:
+  - {first: George, last: Chou}
+  - {first: G., last: Chou}
+george-demetriou:
+  names:
+  - {first: George, last: Demetriou}
+  - {first: George C., last: Demetriou}
+george-e-heidorn:
+  names:
+  - {first: George E., last: Heidorn}
+  - {first: G. E., last: Heidorn}
+george-kokkinakis:
+  names:
+  - {first: George, last: Kokkinakis}
+  - {first: George K., last: Kokkinakis}
+  - {first: G., last: Kokkinakis}
+george-krupka:
+  names:
+  - {first: George, last: Krupka}
+  - {first: George R., last: Krupka}
+george-r-doddington:
+  names:
+  - {first: George R., last: Doddington}
+  - {first: George, last: Doddington}
+george-vladutz:
+  names:
+  - {first: George, last: Vladutz}
+  - {first: G., last: Vladutz}
+george-zavaliagkos:
+  names:
+  - {first: George, last: Zavaliagkos}
+  - {first: G., last: Zavaliagkos}
+georges-antoniadis:
+  names:
+  - {first: Georges, last: Antoniadis}
+  - {first: G., last: Antoniadis}
+georges-de-moor:
+  names:
+  - {first: Georges, last: De Moor}
+  - {first: G., last: De Moor}
+georges-linares:
+  names:
+  - {first: Georges, last: Linarès}
+  - {first: Georges, last: Linares}
+georgiana-dinu:
+  names:
+  - {first: Georgiana, last: Dinu}
+  - {first: G., last: Dinu}
+geraldine-damnati:
+  names:
+  - {first: Géraldine, last: Damnati}
+  - {first: Geraldine, last: Damnati}
+geraldo-bonorino-xexeo:
+  names:
+  - {first: Geraldo Bonorino, last: Xexéo}
+  - {first: Geraldo, last: Xexéo}
+gerard-salton:
+  names:
+  - {first: Gerard, last: Salton}
+  - {first: G., last: Salton}
+  - {first: G, last: Salton}
+gerard-veillon:
+  names:
+  - {first: Gerard, last: Veillon}
+  - {first: G., last: Veillon}
+gerardo-sierra:
+  names:
+  - {first: Gerardo, last: Sierra}
+  - {first: Gerardo, last: Sierra-Martínez}
+gerda-klimonow:
+  names:
+  - {first: Gerda, last: Klimonow}
+  - {first: G., last: Klimonow}
+gerhard-b-van-huyssteen:
+  names:
+  - {first: Gerhard B., last: van Huyssteen}
+  - {first: Gerhard, last: Van Huyssteen}
+  - {first: Gerhard, last: van Huyssteen}
+  - {first: Gerhard B, last: van Huyssteen}
+gerhard-paass:
+  names:
+  - {first: Gerhard, last: Paaß}
+  - {first: Gerhard, last: Paass}
+german-bordel:
+  names:
+  - {first: German, last: Bordel}
+  - {first: Germán, last: Bordel}
+  - {first: G., last: Bordel}
+german-kruszewski:
+  names:
+  - {first: Germán, last: Kruszewski}
+  - {first: German, last: Kruszewski}
+german-rigau:
+  names:
+  - {first: German, last: Rigau}
+  - {first: G., last: Rigau}
+german-sanchis-trilles:
+  names:
+  - {first: Germán, last: Sanchis-Trilles}
+  - {first: Germán, last: Sanchis Trilles}
+  - {first: Germán, last: Sanchis}
+gertjan-van-noord:
+  names:
+  - {first: Gertjan, last: van Noord}
+  - {first: Gertjan, last: Van Noord}
+gholamreza-ghassem-sani:
+  names:
+  - {first: Gholamreza, last: Ghassem-Sani}
+  - {first: Gholamreza, last: Ghassem-sani}
+  - {first: Gholamreza, last: Ghasem-Sani}
+gholamreza-haffari:
+  names:
+  - {first: Gholamreza, last: Haffari}
+  - {first: Reza, last: Haffari}
+giampaolo-mazzini:
+  names:
+  - {first: Giampaolo, last: Mazzini}
+  - {first: G., last: Mazzini}
+gian-piero-zarri:
+  names:
+  - {first: Gian Piero, last: Zarri}
+  - {first: G.P., last: Zarri}
+giancarlo-salton:
+  names:
+  - {first: Giancarlo, last: Salton}
+  - {first: Giancarlo D., last: Salton}
+giang-binh-tran:
+  names:
+  - {first: Giang Binh, last: Tran}
+  - {first: Giang, last: Tran}
+gianluca-de-rossi:
+  names:
+  - {first: Gianluca, last: De Rossi}
+  - {first: Gianluca, last: Rossi}
+gianluca-e-lebani:
+  names:
+  - {first: Gianluca E., last: Lebani}
+  - {first: Gianluca, last: Lebani}
+gianmaria-ajani:
+  names:
+  - {first: Gianmaria, last: Ajani}
+  - {first: G., last: Ajani}
+gideon-mann:
+  names:
+  - {first: Gideon, last: Mann}
+  - {first: Gideon S., last: Mann}
+gil-chang-kim:
+  names:
+  - {first: Gil Chang, last: Kim}
+  - {first: GilChang, last: Kim}
+  - {first: Gil-Chang, last: Kim}
+  - {first: Gilchang, last: Kim}
+gildas-menier:
+  names:
+  - {first: Gildas, last: Ménier}
+  - {first: Gildas, last: Menier}
+gilles-adda:
+  names:
+  - {first: Gilles, last: Adda}
+  - {first: G., last: Adda}
+gilles-boulianne:
+  names:
+  - {first: Gilles, last: Boulianne}
+  - {first: G., last: Boulianne}
+gilles-lechenadec:
+  names:
+  - {first: Gilles, last: Lechenadec}
+  - {first: G., last: Lechenadec}
+gilles-serasset:
+  names:
+  - {first: Gilles, last: Sérasset}
+  - {first: Gilles, last: Serasset}
+gina-anne-levow:
+  names:
+  - {first: Gina-Anne, last: Levow}
+  - {first: Gina, last: Levow}
+gintare-grigonyte:
+  names:
+  - {first: Gintarė, last: Grigonytė}
+  - {first: Gintare, last: Grigonyte}
+  - {first: Gintarė, last: Grigonyte}
+giorgio-maria-di-nunzio:
+  names:
+  - {first: Giorgio Maria, last: Di Nunzio}
+  - {first: Giorgio, last: Di Nunzio}
+giovanni-adorni:
+  names:
+  - {first: Giovanni, last: Adorni}
+  - {first: G., last: Adorni}
+giovanni-battista-varile:
+  names:
+  - {first: Giovanni Battista, last: Varile}
+  - {first: Giovanni B., last: Varile}
+  - {first: G.B., last: Varile}
+giovanni-semeraro:
+  names:
+  - {first: Giovanni, last: Semeraro}
+  - {first: G., last: Semeraro}
+giovanni-tummarello:
+  names:
+  - {first: Giovanni, last: Tummarello}
+  - {first: G., last: Tummarello}
+girish-nath-jha:
+  names:
+  - {first: Girish Nath, last: Jha}
+  - {first: Girish, last: Jha}
+girish-palshikar:
+  names:
+  - {first: Girish, last: Palshikar}
+  - {first: Girish K., last: Palshikar}
+  - {first: Girish K, last: Palshikar}
+gisele-montilha-pinheiro:
+  names:
+  - {first: Gisele Montilha, last: Pinheiro}
+  - {first: Gisele, last: Montilha}
+giulia-marchesini:
+  names:
+  - {first: Giulia, last: Marchesini}
+  - {first: Giulia, last: Marchesi}
+giuseppe-di-fabbrizio:
+  names:
+  - {first: Giuseppe, last: Di Fabbrizio}
+  - {first: Giuseppe, last: Fabbrizio}
+gloria-corpas-pastor:
+  names:
+  - {first: Gloria, last: Corpas Pastor}
+  - {first: Gloria, last: Corpas}
+  - {first: Gloria Corpas, last: Pastor}
+gloria-vazquez:
+  names:
+  - {first: Glòria, last: Vázquez}
+  - {first: Gloria, last: Vázquez}
+  - {first: Gloria, last: Vazquez}
+gokhan-tur:
+  names:
+  - {first: Gokhan, last: Tur}
+  - {first: Gokhan, last: Tür}
+  - {first: G., last: Tur}
+golnar-sheikhshab:
+  names:
+  - {first: Golnar, last: Sheikhshab}
+  - {first: Golnar, last: Sheikhshabbafghi}
+goncal-v-garces-diaz-munio:
+  names:
+  - {first: Gonçal V., last: Garcés Díaz-Munío}
+  orcid: 0000-0002-2594-5858
+goran-nenadic:
+  names:
+  - {first: Goran, last: Nenadic}
+  - {first: Goran, last: Nenadić}
+gordana-ilic-holen:
+  names:
+  - {first: Gordana Ilić, last: Holen}
+  - {first: Gordana Ilic, last: Holen}
+gordon-i-mccalla:
+  names:
+  - {first: Gordon I., last: McCalla}
+  - {first: Gordon, last: McCalla}
+  - {first: G.I., last: McCalla}
+gordon-pace:
+  names:
+  - {first: Gordon, last: Pace}
+  - {first: Gordon J., last: Pace}
+gorka-labaka:
+  names:
+  - {first: Gorka, last: Labaka}
+  - {first: G., last: Labaka}
+govind-kothari:
+  names:
+  - {first: Govind, last: Kothari}
+  - {first: '', last: Govind}
+gozde-gul-sahin:
+  names:
+  - {first: Gözde Gül, last: Şahin}
+  - {first: Gözde, last: Şahin}
+  - {first: Gözde Gül, last: İşgüder}
+gozde-ozbal:
+  names:
+  - {first: Gözde, last: Özbal}
+  - {first: Gozde, last: Ozbal}
+grace-chung:
+  names:
+  - {first: Grace, last: Chung}
+  - {first: Grace Y, last: Chung}
+graciela-gonzalez:
+  names:
+  - {first: Graciela, last: Gonzalez}
+  - {first: Graciela, last: Gonzalez-Hernandez}
+graeme-ritchie:
+  names:
+  - {first: Graeme, last: Ritchie}
+  - {first: Graeme D., last: Ritchie}
+  - {first: G.D., last: Ritchie}
+  - {first: G., last: Ritchie}
+graham-katz:
+  names:
+  - {first: Graham, last: Katz}
+  - {first: E. Graham, last: Katz}
+graham-russell:
+  names:
+  - {first: Graham, last: Russell}
+  - {first: Graham J., last: Russell}
+  - {first: G.J., last: Russell}
+graham-wilcock:
+  names:
+  - {first: Graham, last: Wilcock}
+  - {first: G., last: Wilcock}
+grazyna-vetulani:
+  names:
+  - {first: Grażyna, last: Vetulani}
+  - {first: Grazyna, last: Vetulani}
+greg-gul-rajani:
+  names:
+  - {first: Greg, last: Gul-rajani}
+  - {first: Greg, last: Gulrajani}
+gregoire-moreau-de-montcheuil:
+  names:
+  - {first: Grégoire, last: Moreau de Montcheuil}
+  - {first: Grégoire, last: de Montcheuil}
+gregorio-hernandez:
+  names:
+  - {first: Gregorio, last: Hernández}
+  - {first: Gregorio, last: Hernandez}
+  - {first: G., last: Hernández}
+gregory-aist:
+  names:
+  - {first: Gregory, last: Aist}
+  - {first: Greg, last: Aist}
+gregory-crane:
+  names:
+  - {first: Gregory, last: Crane}
+  - {first: Gregory R., last: Crane}
+gregory-finley:
+  names:
+  - {first: Gregory, last: Finley}
+  - {first: Greg, last: Finley}
+gregory-roulet-guiot:
+  names:
+  - {first: Grégory, last: Roulet--Guiot}
+  - {first: Grégory, last: Roulet-Guiot}
+gregory-sanders:
+  names:
+  - {first: Gregory, last: Sanders}
+  - {first: Gregory A., last: Sanders}
+  - {first: Greg, last: Sanders}
+gregory-stainhauer:
+  names:
+  - {first: Gregory, last: Stainhauer}
+  - {first: G., last: Stainhauer}
+  - {first: Gregory, last: Stainhaouer}
+greville-c-corbett:
+  names:
+  - {first: Greville C., last: Corbett}
+  - {first: Greville, last: Corbett}
+  - {first: Greville G., last: Corbett}
+grzegorz-chrupala:
+  names:
+  - {first: Grzegorz, last: Chrupała}
+  - {first: Grzegorz, last: Chrupala}
+guadalupe-aguado-de-cea:
+  names:
+  - {first: Guadalupe Aguado, last: de Cea}
+  - {first: Guadalupe, last: Aguado de Cea}
+  - {first: Guadalupe, last: Aguado-de-Cea}
+gualberto-a-guzman:
+  names:
+  - {first: Gualberto A., last: Guzman}
+  - {first: Gualberto, last: Guzmán}
+gudrun-magnusdottir:
+  names:
+  - {first: Guðrun, last: Magnúsdóttir}
+  - {first: Guðrún, last: Magnúsdóttir}
+guenther-goerz:
+  names:
+  - {first: Guenther, last: Goerz}
+  - {first: G., last: Goerz}
+guido-boella:
+  names:
+  - {first: Guido, last: Boella}
+  - {first: G., last: Boella}
+guillaume-bonfante:
+  names:
+  - {first: Guillaume, last: Bonfante}
+  - {first: Guillame, last: Bonfante}
+guillaume-gravier:
+  names:
+  - {first: Guillaume, last: Gravier}
+  - {first: G., last: Gravier}
+guillaume-vauvert:
+  names:
+  - {first: Guillaume, last: Vauvert}
+  - {first: G., last: Vauvert}
+guillermo-a-cecchi:
+  names:
+  - {first: Guillermo A., last: Cecchi}
+  - {first: Guillermo, last: Cecchi}
+guiping-zhang:
+  names:
+  - {first: Guiping, last: Zhang}
+  - {first: GuiPing, last: Zhang}
+gulsen-eryigit:
+  names:
+  - {first: Gülşen, last: Eryiğit}
+  - {first: Gülşen, last: Eryiǧit}
+gunaranjan-vasireddy:
+  names:
+  - {first: Gunaranjan, last: Vasireddy}
+  - {first: G., last: Vasireddy}
+gunes-erkan:
+  names:
+  - {first: Gunes, last: Erkan}
+  - {first: Güneş, last: Erkan}
+gunn-inger-lyse:
+  names:
+  - {first: Gunn Inger, last: Lyse}
+  - {first: Gunn, last: Lyse}
+gunnel-kallgren:
+  names:
+  - {first: Gunnel, last: Källgren}
+  - {first: Gunnel, last: Kallgren}
+gunta-nespore:
+  names:
+  - {first: Gunta, last: Nešpore}
+  - {first: Gunta, last: Nespore-Berzkalne}
+gunter-neumann:
+  names:
+  - {first: Günter, last: Neumann}
+  - {first: Gunter, last: Neumann}
+  - {first: Guenter, last: Neumann}
+guntis-barzdins:
+  names:
+  - {first: Guntis, last: Barzdins}
+  - {first: Guntis, last: Bārzdiņš}
+guodong-zhou:
+  names:
+  - {first: Guodong, last: Zhou}
+  - {first: GuoDong, last: Zhou}
+gurpreet-singh-lehal:
+  names:
+  - {first: Gurpreet Singh, last: Lehal}
+  - {first: Gurpreet, last: Singh Lehal}
+  - {first: Gurpreet, last: Lehal}
+gus-hahn-powell:
+  names:
+  - {first: Gus, last: Hahn-Powell}
+  - {first: Gustave, last: Hahn-Powell}
+gustavo-mendonca:
+  names:
+  - {first: Gustavo, last: Mendonca}
+  - {first: Gustavo, last: Mendonça}
+gustavo-paetzold:
+  names:
+  - {first: Gustavo, last: Paetzold}
+  - {first: Gustavo H., last: Paetzold}
+  - {first: Gustavo, last: Henrique Paetzold}
+  - {first: Gustavo Henrique, last: Paetzold}
+guy-noel-kouarata:
+  names:
+  - {first: Guy-Noel, last: Kouarata}
+  - {first: Guy-Noël, last: Kouarata}
+guy-perennou:
+  names:
+  - {first: Guy, last: Pérennou}
+  - {first: G., last: Perennou}
+gyri-smordal-losnegaard:
+  names:
+  - {first: Gyri, last: Smørdal Losnegaard}
+  - {first: Gyri S., last: Losnegaard}
+  - {first: Gyri, last: Losnegaard}
+gyuhyeon-choi:
+  names:
+  - {first: GyuHyeon, last: Choi}
+  - {first: Gyu-Hyeon, last: Choi}
+h-andrew-schwartz:
+  names:
+  - {first: H. Andrew, last: Schwartz}
+  - {first: Hansen Andrew, last: Schwartz}
+  - {first: Hansen A., last: Schwartz}
+  - {first: H Andrew, last: Schwartz}
+h-rodriguez-hontoria:
+  names:
+  - {first: H., last: Rodriguez Hontoria}
+  - {first: H., last: Rodriguez}
+hae-chang-rim:
+  names:
+  - {first: Hae Chang, last: Rim}
+  - {first: Hae-Chang, last: Rim}
+hai-quan-vu:
+  names:
+  - {first: Hai-Quan, last: Vu}
+  - {first: Hai Quan, last: Vu}
+hai-son-le:
+  names:
+  - {first: Hai-Son, last: Le}
+  - {first: Hai Son, last: Le}
+  - {first: Hai-son, last: Le}
+hakaze-cho:
+  names:
+  - {first: Hakaze, last: Cho}
+  - {first: Yufeng, last: Zhao}
+hakimeh-fadaee:
+  names:
+  - {first: Hakimeh, last: Fadaee}
+  - {first: Hakimeh, last: Fadaei}
+hal-daume-iii:
+  names:
+  - {first: Hal, last: Daumé III}
+  - {first: Hal, last: Daume III}
+  - {first: Hal, last: Daume}
+  - {first: Hal, last: Daumé}
+haldur-oim:
+  names:
+  - {first: Haldur, last: Õim}
+  - {first: Haldur, last: Oim}
+  - {first: H., last: Oim}
+hale-ogel-balaban:
+  names:
+  - {first: Hale, last: Ögel Balaban}
+  - {first: Hale, last: Ogel}
+haley-lepp:
+  names:
+  - {first: Haley, last: Lepp}
+  - {first: Haley M., last: Lepp}
+hamed-movasagh:
+  names:
+  - {first: Hamed, last: Movasagh}
+  - {first: H., last: Movasagh}
+hamish-cunningham:
+  names:
+  - {first: Hamish, last: Cunningham}
+  - {first: H., last: Cunningham}
+hammam-riza:
+  names:
+  - {first: Hammam, last: Riza}
+  - {first: Ir. Hammam, last: Riza}
+han-min-jung:
+  names:
+  - {first: Han-Min, last: Jung}
+  - {first: Hanmin, last: Jung}
+hana-skoumalova:
+  names:
+  - {first: Hana, last: Skoumalova}
+  - {first: Hana, last: Skoumalová}
+hanae-koiso:
+  names:
+  - {first: Hanae, last: Koiso}
+  - {first: H., last: Koiso}
+hanna-wallach:
+  names:
+  - {first: Hanna, last: Wallach}
+  - {first: Hanna M., last: Wallach}
+hannah-bechara:
+  names:
+  - {first: Hannah, last: Bechara}
+  - {first: Hanna, last: Béchara}
+  - {first: Hanna, last: Bechara}
+  - {first: Hannah, last: Béchara}
+hannah-cyberey:
+  names:
+  - {first: Hannah, last: Cyberey}
+  - {first: Hannah, last: Chen}
+hanne-erdman-thomsen:
+  names:
+  - {first: Hanne Erdman, last: Thomsen}
+  - {first: Hanne, last: Erdman Thomsen}
+hanne-fersoe:
+  names:
+  - {first: Hanne, last: Fersøe}
+  - {first: Hanne, last: Fersoe}
+  - {first: H., last: Fersøe}
+hans-dybkjaer:
+  names:
+  - {first: Hans, last: Dybkjaer}
+  - {first: Hans, last: Dybkjær}
+hans-ulrich-block:
+  names:
+  - {first: Hans Ulrich, last: Block}
+  - {first: Hans-Ulrich, last: Block}
+hans-ulrich-krieger:
+  names:
+  - {first: Hans-Ulrich, last: Krieger}
+  - {first: HansUlrich, last: Krieger}
+hans-van-halteren:
+  names:
+  - {first: Hans, last: van Halteren}
+  - {first: Hans, last: Van Halteren}
+hany-hassan-awadalla:
+  names:
+  - {first: Hany, last: Hassan Awadalla}
+  - {first: Hany, last: Hassan}
+haoliang-qi:
+  names:
+  - {first: Haoliang, last: Qi}
+  - {first: HaoLiang, last: Qi}
+harald-baayen:
+  names:
+  - {first: Harald, last: Baayen}
+  - {first: R. Harald, last: Baayen}
+harald-h-zimmermann:
+  names:
+  - {first: Harald H., last: Zimmermann}
+  - {first: H., last: Zimmermann}
+harald-hoge:
+  names:
+  - {first: Harald, last: Höge}
+  - {first: Harald, last: Hoege}
+  - {first: H., last: Höge}
+harald-lungen:
+  names:
+  - {first: Harald, last: Lüngen}
+  - {first: Harald, last: Lungen}
+harald-trost:
+  names:
+  - {first: Harald, last: Trost}
+  - {first: H., last: Trost}
+harold-somers:
+  names:
+  - {first: Harold, last: Somers}
+  - {first: Harold L., last: Somers}
+  - {first: H.L., last: Somers}
+harri-jappinen:
+  names:
+  - {first: Harri, last: Jäppinen}
+  - {first: Harri, last: Jappinen}
+  - {first: H., last: Jäppinen}
+harris-papageorgiou:
+  names:
+  - {first: Harris, last: Papageorgiou}
+  - {first: Haris, last: Papageorgiou}
+harry-bratt:
+  names:
+  - {first: Harry, last: Bratt}
+  - {first: H., last: Bratt}
+harry-bunt:
+  names:
+  - {first: Harry, last: Bunt}
+  - {first: H. C., last: Bunt}
+harry-j-tily:
+  names:
+  - {first: Harry J., last: Tily}
+  - {first: Harry, last: Tily}
+harry-tennant:
+  names:
+  - {first: Harry, last: Tennant}
+  - {first: Harry R., last: Tennant}
+harsh-vardhan-sharma:
+  names:
+  - {first: Harsh Vardhan, last: Sharma}
+  - {first: Harsh, last: Sharma}
+harshit-kumar:
+  names:
+  - {first: Harshit, last: Kumar}
+harshit-kumar-iit:
+  names:
+  - {first: Harshit, last: Kumar}
+hassan-s-shavarani:
+  names:
+  - {first: Hassan S., last: Shavarani}
+  - {first: Hassan, last: Shavarani}
+hatte-blejer:
+  names:
+  - {first: Hatte, last: Blejer}
+  - {first: Hatte R., last: Blejer}
+hazem-raafat:
+  names:
+  - {first: Hazem, last: Raafat}
+  - {first: Hazem, last: M. Raafat}
+he-yan-huang:
+  names:
+  - {first: He-Yan, last: Huang}
+  - {first: He-yan, last: Huang}
+  - {first: Heyan, last: Huang}
+heather-horsfall:
+  names:
+  - {first: Heather, last: Horsfall}
+  - {first: H. J., last: Horsfall}
+hector-allende-cid:
+  names:
+  - {first: Hector, last: Allende-Cid}
+  - {first: Héctor, last: Allende}
+  - {first: Héctor, last: Allende-Cid}
+hector-llorens:
+  names:
+  - {first: Héctor, last: Llorens}
+  - {first: Hector, last: Llorens}
+hector-martinez-alonso:
+  names:
+  - {first: Héctor, last: Martínez Alonso}
+  - {first: Hector, last: Martinez}
+  - {first: Héctor, last: Martínez}
+  - {first: Héctor Martínez, last: Alonso}
+  - {first: Hector, last: Martinez Alonso}
+  - {first: Héctor, last: Martinez Alonso}
+  - {first: Hector, last: Martínez Alonso}
+hee-sook-bae:
+  names:
+  - {first: Hee-Sook, last: Bae}
+  - {first: Hee Sook, last: Bae}
+hee-sung-chung:
+  names:
+  - {first: Hee Sung, last: Chung}
+  - {first: Hee-Sung, last: Chung}
+heidi-fox:
+  names:
+  - {first: Heidi, last: Fox}
+  - {first: Heidi J., last: Fox}
+heiki-jaan-kaalep:
+  names:
+  - {first: Heiki-Jaan, last: Kaalep}
+  - {first: Heiki Jaan, last: Kaalep}
+heinz-j-weber:
+  names:
+  - {first: Heinz J., last: Weber}
+  - {first: H-J., last: Weber}
+helen-hastie:
+  names:
+  - {first: Helen, last: Hastie}
+  - {first: Helen Wright, last: Hastie}
+helen-kaiyun-chen:
+  names:
+  - {first: Helen Kaiyun, last: Chen}
+  - {first: Kai-Yun, last: Chen}
+  - {first: Kai-yun, last: Chen}
+  - {first: Helen Kai-yun, last: Chen}
+helen-l-johnson:
+  names:
+  - {first: Helen L., last: Johnson}
+  - {first: Helen, last: Johnson}
+helen-m-gigley:
+  names:
+  - {first: Helen M., last: Gigley}
+  - {first: Helen, last: Gigley}
+helen-meng:
+  names:
+  - {first: Helen, last: Meng}
+  - {first: Helen M., last: Meng}
+helen-pain:
+  names:
+  - {first: Helen, last: Pain}
+  - {first: H., last: Pain}
+helen-v-cook:
+  names:
+  - {first: Helen V., last: Cook}
+  - {first: Helen, last: Cook}
+  - {first: Helen V, last: Cook}
+helena-de-medeiros-caseli:
+  names:
+  - {first: Helena de Medeiros, last: Caseli}
+  - {first: Helena, last: de Medeiros Caseli}
+helena-gomez:
+  names:
+  - {first: Helena, last: Gomez}
+  - {first: Helena, last: Gómez}
+helena-hong-gao:
+  names:
+  - {first: Helena Hong, last: Gao}
+  - {first: Helena, last: Gao}
+helene-bonneau-maynard:
+  names:
+  - {first: Hélène, last: Bonneau-Maynard}
+  - {first: Hélène, last: Maynard}
+  - {first: H., last: Bonneau-Maynard}
+helka-folch:
+  names:
+  - {first: Helka, last: Folch}
+  - {first: H., last: Folch}
+helmer-strik:
+  names:
+  - {first: Helmer, last: Strik}
+  - {first: H., last: Strik}
+hema-a-murthy:
+  names:
+  - {first: Hema A., last: Murthy}
+  - {first: Hema, last: Murthy}
+hendrik-johannes-groenewald:
+  names:
+  - {first: Hendrik Johannes, last: Groenewald}
+  - {first: Hendrik J., last: Groenewald}
+heng-wang-sydney:
+  comment: University of Sydney
+  disable_name_matching: true
+  names:
+  - {first: Heng, last: Wang}
+  orcid: 0009-0009-5473-5751
+henk-van-den-heuvel:
+  names:
+  - {first: Henk, last: van den Heuvel}
+  - {first: H., last: van den Heuvel}
+hennie-brugman:
+  names:
+  - {first: Hennie, last: Brugman}
+  - {first: H., last: Brugman}
+hennie-van-der-vliet:
+  names:
+  - {first: Hennie, last: van der Vliet}
+  - {first: Hennie, last: VanderVliet}
+henry-s-thompson:
+  names:
+  - {first: Henry S., last: Thompson}
+  - {first: Henry, last: Thompson}
+herbert-gish:
+  names:
+  - {first: Herbert, last: Gish}
+  - {first: Herb, last: Gish}
+hermann-ney:
+  names:
+  - {first: Hermann, last: Ney}
+  - {first: H., last: Ney}
+herve-blanchon:
+  names:
+  - {first: Hervé, last: Blanchon}
+  - {first: Herve, last: Blanchon}
+herve-dejean:
+  names:
+  - {first: Hervé, last: Déjean}
+  - {first: Herve, last: Dejean}
+  - {first: H., last: Dejean}
+herve-saint-amand:
+  names:
+  - {first: Herve, last: Saint-Amand}
+  - {first: Hervé, last: Saint-Amand}
+heui-seok-lim:
+  names:
+  - {first: Heui-Seok, last: Lim}
+  - {first: Heuiseok, last: Lim}
+heung-yeung-shum:
+  names:
+  - {first: Heung Yeung, last: Shum}
+  - {first: Heung-Yeung, last: Shum}
+hideaki-kikuchi:
+  names:
+  - {first: Hideaki, last: Kikuchi}
+  - {first: H., last: Kikuchi}
+hideki-hirakawa:
+  names:
+  - {first: Hideki, last: Hirakawa}
+  - {first: H., last: Hirakawa}
+hideki-kashioka:
+  names:
+  - {first: Hideki, last: Kashioka}
+  - {first: H, last: Kashioka}
+himani-chaudhry:
+  names:
+  - {first: Himani, last: Chaudhry}
+  - {first: Himani, last: Chaudhary}
+hing-cheung-ho:
+  names:
+  - {first: Hing-cheung, last: Ho}
+  - {first: Hing-Cheung, last: Ho}
+hing-lung-lin:
+  names:
+  - {first: Hing-Lung, last: Lin}
+  - {first: Hing-lung, last: Lin}
+hinrich-schutze:
+  names:
+  - {first: Hinrich, last: Schütze}
+  - {first: Hinrich, last: Schutze}
+  - {first: Hinrich, last: Schuetze}
+hirofumi-yamamoto:
+  names:
+  - {first: Hirofumi, last: Yamamoto}
+  - {first: Hirohumi, last: Yamamoto}
+hiromi-itoh-ozaku:
+  names:
+  - {first: Hiromi Itoh, last: Ozaku}
+  - {first: Hiromi itoh, last: Ozaku}
+hiroshi-echizen-ya:
+  names:
+  - {first: Hiroshi, last: Echizen-ya}
+  - {first: Hiroshi, last: Echizen’ya}
+hiroshi-g-okuno:
+  names:
+  - {first: Hiroshi G., last: Okuno}
+  - {first: Hiroshi, last: Okuno}
+hiroshi-masuichi:
+  names:
+  - {first: Hiroshi, last: Masuichi}
+  - {first: Hiroshi, last: Mashuichi}
+hiroyuki-akama:
+  names:
+  - {first: Hiroyuki, last: Akama}
+  - {first: Hiroyuki, last: Akam}
+hisashi-kawai:
+  names:
+  - {first: Hisashi, last: Kawai}
+  - {first: Kawai, last: Hisashi}
+hiyan-alshawi:
+  names:
+  - {first: Hiyan, last: Alshawi}
+  - {first: Hiyan, last: Alsawi}
+hoa-trang-dang:
+  names:
+  - {first: Hoa Trang, last: Dang}
+  - {first: Hoa, last: Dang}
+hoang-kiem:
+  names:
+  - {first: Hoang, last: Kiem}
+  - {first: Kiem, last: Hoang}
+hoang-quynh-le:
+  names:
+  - {first: Hoang Quynh, last: Le}
+  - {first: Hoang-Quynh, last: Le}
+holger-hoffmann:
+  names:
+  - {first: Holger, last: Hoffmann}
+  - {first: Holger, last: Hoffman}
+hong-i-ng:
+  names:
+  - {first: Hong-I, last: Ng}
+  - {first: Hong I, last: Ng}
+hong-leung:
+  names:
+  - {first: Hong, last: Leung}
+  - {first: Hong C., last: Leung}
+hongsuck-seo:
+  names:
+  - {first: Hongsuck, last: Seo}
+  - {first: Paul Hongsuck, last: Seo}
+hongyan-jing:
+  names:
+  - {first: Hongyan, last: Jing}
+  - {first: H., last: Jing}
+hongying-zan:
+  names:
+  - {first: Hongying, last: Zan}
+  - {first: Hong-ying, last: Zan}
+hoojung-chung:
+  names:
+  - {first: HooJung, last: Chung}
+  - {first: Hoojung, last: Chung}
+horacio-rodriguez:
+  names:
+  - {first: Horacio, last: Rodríguez}
+  - {first: Horacio, last: Rodriguez}
+horng-jyh-paul-wu:
+  names:
+  - {first: Horng Jyh Paul, last: Wu}
+  - {first: Horng-Jyh P., last: Wu}
+horst-udo-hain:
+  names:
+  - {first: Horst-Udo, last: Hain}
+  - {first: H.-U., last: Hain}
+hossein-sameti:
+  names:
+  - {first: Hossein, last: Sameti}
+  - {first: H., last: Sameti}
+houda-saadane:
+  names:
+  - {first: Houda, last: Saadane}
+  - {first: Houda, last: Saâdane}
+howard-r-turtle:
+  names:
+  - {first: Howard R., last: Turtle}
+  - {first: Howard, last: Turtle}
+hristo-tanev:
+  names:
+  - {first: Hristo, last: Tanev}
+  - {first: Hristo, last: Tannev}
+hsiang-pin-lee:
+  names:
+  - {first: Hsiang-Pin, last: Lee}
+  - {first: Hsiang-Ping, last: Lee}
+hsiao-wuen-hon:
+  names:
+  - {first: Hsiao-Wuen, last: Hon}
+  - {first: H.W., last: Hon}
+  - {first: H., last: Hon}
+hsin-min-wang:
+  names:
+  - {first: Hsin-Min, last: Wang}
+  - {first: Hsin-min, last: Wang}
+hsue-hueh-shih:
+  names:
+  - {first: Hsue-Hueh, last: Shih}
+  - {first: Rebecca Hsue-Hueh, last: Shih}
+hsun-wen-chiu:
+  names:
+  - {first: Hsun-Wen, last: Chiu}
+  - {first: Hsun-wen, last: Chiu}
+huarui-zhang:
+  names:
+  - {first: Huarui, last: Zhang}
+  - {first: HuaRui, last: Zhang}
+huda-almuzaini:
+  names:
+  - {first: Huda, last: Almuzaini}
+huey-chyun-chen:
+  names:
+  - {first: Huey-Chyun, last: Chen}
+  - {first: Mathis Huey-chyun, last: Chen}
+hugo-goncalo-oliveira:
+  names:
+  - {first: Hugo, last: Gonçalo Oliveira}
+  - {first: Hugo Gonçalo, last: Oliveira}
+hugo-van-hamme:
+  names:
+  - {first: Hugo Van, last: hamme}
+  - {first: Hugo, last: Van hamme}
+huidan-liu:
+  names:
+  - {first: Huidan, last: Liu}
+  - {first: Hui Dan, last: Liu}
+huifeng-li:
+  names:
+  - {first: Huifeng, last: Li}
+  - {first: Hui-Feng, last: Li}
+huihsin-tseng:
+  names:
+  - {first: Huihsin, last: Tseng}
+  - {first: Hui-hsin, last: Tseng}
+  - {first: Hui-Hsin, last: Tseng}
+huiwei-zhou:
+  names:
+  - {first: Huiwei, last: Zhou}
+  - {first: HuiWei, last: Zhou}
+huizhi-liang:
+  names:
+  - {first: Huizhi, last: Liang}
+  - {first: HuiZhi, last: Liang}
+hung-ting-hsieh:
+  names:
+  - {first: Hung-ting, last: Hsieh}
+  - {first: Hung-Ting, last: Hsieh}
+hung-yan-gu:
+  names:
+  - {first: Hung-Yan, last: Gu}
+  - {first: Hung-yan, last: Gu}
+huy-nguyen-bcl:
+  comment: BCL Technologies Inc.
+  names:
+  - {first: Huy, last: Nguyen}
+huy-nguyen-lls:
+  comment: ex-liulishuo
+  names:
+  - {first: Huy, last: Nguyen}
+huy-nguyen-pgh:
+  comment: UPitt, Amazon
+  names:
+  - {first: Huy, last: Nguyen}
+huy-nguyen-stanford:
+  comment: Stanford
+  names:
+  - {first: Huy, last: Nguyen}
+huy-tien-nguyen:
+  names:
+  - {first: Huy Tien, last: Nguyen}
+  - {first: Huy-Tien, last: Nguyen}
+hy-murveit:
+  names:
+  - {first: Hy, last: Murveit}
+  - {first: H., last: Murveit}
+hyeon-gu-lee:
+  names:
+  - {first: Hyeon-gu, last: Lee}
+  - {first: Hyeon-Gu, last: Lee}
+hyojung-han:
+  names:
+  - {first: HyoJung, last: Han}
+  - {first: Hou Jeung, last: Han}
+hyuhng-joon-kim:
+  names:
+  - {first: Hyuhng Joon, last: Kim}
+  - {first: Hyuhng, last: Kim}
+hyukro-park:
+  names:
+  - {first: HyukRo, last: Park}
+  - {first: Hyukro, last: Park}
+hyun-seok-park:
+  names:
+  - {first: Hyun Seok, last: Park}
+  - {first: Hyun S., last: Park}
+hyung-bae-jeon:
+  names:
+  - {first: Hyung-Bae, last: Jeon}
+  - {first: Hyungbae, last: Jeon}
+iain-marshall:
+  names:
+  - {first: Iain, last: Marshall}
+  - {first: Iain J., last: Marshall}
+ian-m-oneill:
+  names:
+  - {first: Ian M., last: O’Neill}
+  - {first: Ian, last: O’Neill}
+ian-p-davy:
+  names:
+  - {first: Ian P., last: Davy}
+  - {first: Ian, last: Davy}
+  - {first: Ian P, last: Davy}
+igor-a-bolshakov:
+  names:
+  - {first: Igor A., last: Bolshakov}
+  - {first: Igor, last: Bolshakov}
+igor-boguslavsky:
+  names:
+  - {first: Igor, last: Boguslavsky}
+  - {first: Igor M., last: Boguslavsky}
+igor-leturia:
+  names:
+  - {first: Igor, last: Leturia}
+  - {first: I., last: Leturia}
+igor-melcuk:
+  names:
+  - {first: Igor, last: Mel’čuk}
+  - {first: I., last: Mel’cuk}
+  - {first: I. A., last: Mel’čuk}
+ihsan-yalcinkaya:
+  names:
+  - {first: Ihsan, last: Yalcinkaya}
+  - {first: İhsan, last: Yalçinkaya}
+  - {first: İhsan, last: Yalcinkaya}
+ik-hwan-lee:
+  names:
+  - {first: Ik-Hwan, last: Lee}
+  - {first: Ik-hwan, last: Lee}
+ilknur-durgar-el-kahlout:
+  names:
+  - {first: Ilknur, last: Durgar El-Kahlout}
+  - {first: Ilknur Durgar, last: El-Kahlout}
+  - {first: İlknur, last: Durgar El-Kahlout}
+  - {first: İlknur Durgar, last: El-Kahlout}
+ilyas-cicekli:
+  names:
+  - {first: Ilyas, last: Cicekli}
+  - {first: İlyas, last: Çiçekli}
+imanol-madariaga:
+  names:
+  - {first: Imanol, last: Madariaga}
+  - {first: I., last: Madariaga}
+imre-kiss:
+  names:
+  - {first: Imre, last: Kiss}
+  - {first: I., last: Kiss}
+ina-roesiger:
+  names:
+  - {first: Ina, last: Roesiger}
+  - {first: Ina, last: Rösiger}
+inaki-alegria:
+  names:
+  - {first: Iñaki, last: Alegría}
+  - {first: Iñaki, last: Alegria}
+  - {first: Inaki, last: Alegria}
+  - {first: I, last: Alegria}
+  - {first: I., last: Alegria}
+inaki-gaminde:
+  names:
+  - {first: Iñaki, last: Gaminde}
+  - {first: I., last: Gaminde}
+ingrid-starke:
+  names:
+  - {first: Ingrid, last: Starke}
+  - {first: I., last: Starke}
+inguna-skadina:
+  names:
+  - {first: Inguna, last: Skadiņa}
+  - {first: Inguna, last: Skadina}
+  - {first: Inguna, last: Skadin̨a}
+inmaculada-hernaez:
+  names:
+  - {first: Inmaculada, last: Hernáez}
+  - {first: Inmaculada, last: Hernaez}
+  - {first: Inma, last: Hernaez}
+  - {first: Inma, last: Hernáez}
+  - {first: I., last: Hernáez}
+ioana-vasilescu:
+  names:
+  - {first: Ioana, last: Vasilescu}
+  - {first: I., last: Vasilescu}
+ioannis-dologlou:
+  names:
+  - {first: Ioannis, last: Dologlou}
+  - {first: I., last: Dologlou}
+ioannis-kakadiaris:
+  names:
+  - {first: Ioannis, last: Kakadiaris}
+  - {first: Ioannis A., last: Kakadiaris}
+ioannis-klapaftis:
+  names:
+  - {first: Ioannis, last: Klapaftis}
+  - {first: Ioannis P., last: Klapaftis}
+ionut-sorodoc:
+  names:
+  - {first: Ionut, last: Sorodoc}
+  - {first: Ionut-Teodor, last: Sorodoc}
+irena-spasic:
+  names:
+  - {first: Irena, last: Spasić}
+  - {first: Irena, last: Spasic}
+irene-castellon:
+  names:
+  - {first: Irene, last: Castellón}
+  - {first: Irene, last: Castellon}
+irene-langkilde:
+  names:
+  - {first: Irene, last: Langkilde}
+  - {first: Irene, last: Langkilde-Geary}
+irene-nirenburg:
+  names:
+  - {first: Irene, last: Nirenburg}
+  - {first: Irene B., last: Nirenburg}
+irene-rodrigues:
+  names:
+  - {first: Irene, last: Rodrigues}
+  - {first: Irene Pimenta, last: Rodrigues}
+  - {first: Irene, last: Pimenta Rodrigues}
+iria-da-cunha:
+  names:
+  - {first: Iria, last: da Cunha}
+  - {first: I., last: da Cunha}
+iria-del-rio-gayo:
+  names:
+  - {first: Iria, last: Del Río Gayo}
+  - {first: Iria, last: del Río Gayo}
+  - {first: Iria, last: del Río}
+  - {first: Iria, last: del Rio}
+irina-matveeva:
+  names:
+  - {first: Irina, last: Matveeva}
+  - {first: I., last: Matveeva}
+irina-prodanof:
+  names:
+  - {first: Irina, last: Prodanof}
+  - {first: I., last: Prodanof}
+iris-eshkol:
+  names:
+  - {first: Iris, last: Eshkol}
+  - {first: Iris, last: Eshkol-Taravella}
+iris-hoser:
+  names:
+  - {first: Iris, last: Hoser}
+  - {first: I., last: Hoser}
+irshad-bhat:
+  names:
+  - {first: Irshad, last: Bhat}
+  - {first: Irshad A., last: Bhat}
+isabel-segura-bedmar:
+  names:
+  - {first: Isabel, last: Segura-Bedmar}
+  - {first: Isabel, last: Segura Bedmar}
+isin-demirsahin:
+  names:
+  - {first: Isin, last: Demirsahin}
+  - {first: Işin, last: Demirşahin}
+  - {first: Isin, last: Demirşahin}
+islam-beltagy:
+  names:
+  - {first: Islam, last: Beltagy}
+  - {first: I., last: Beltagy}
+ismael-garcia-varea:
+  names:
+  - {first: Ismael, last: García-Varea}
+  - {first: Ismael García, last: Varea}
+  - {first: Ismael, last: García Varea}
+ismail-babaoglu:
+  names:
+  - {first: Ismail, last: Babaoğlu}
+  - {first: Ismail, last: Babaoglu}
+ismail-el-maarouf:
+  names:
+  - {first: Ismail, last: El Maarouf}
+  - {first: Ismaïl, last: El Maarouf}
+ismail-timimi:
+  names:
+  - {first: Ismail, last: Timimi}
+  - {first: Ismaïl, last: Timimi}
+  - {first: I., last: Timimi}
+istvan-batori:
+  names:
+  - {first: Istvan, last: Batori}
+  - {first: I., last: Batori}
+istvan-nagy-t:
+  names:
+  - {first: István, last: Nagy T.}
+  - {first: István T., last: Nagy}
+  - {first: István, last: Nagy}
+  - {first: Istvan, last: Nagy}
+istvan-varga:
+  names:
+  - {first: István, last: Varga}
+  - {first: Istvan, last: Varga}
+itziar-aduriz:
+  names:
+  - {first: Itziar, last: Aduriz}
+  - {first: I., last: Aduriz}
+iulian-vlad-serban:
+  names:
+  - {first: Iulian Vlad, last: Serban}
+  - {first: Iulian, last: Serban}
+iuliana-alexandra-flescan-lovin-arseni:
+  names:
+  - {first: Iuliana Alexandra, last: Fleşcan-Lovin-Arseni}
+  - {first: Iuliana Alexandra, last: Fleșcan-Lovin-Arseni}
+  - {first: Iuliana-Alexandra, last: Flescan-Lovin-Arseni}
+ivan-meza-ruiz:
+  names:
+  - {first: Ivan, last: Meza-Ruiz}
+  - {first: Ivan Vladimir, last: Meza Ruiz}
+  - {first: Ivan V., last: Meza}
+  - {first: Ivan, last: Meza}
+  - {first: Ivan Vladimir, last: Meza-Ruiz}
+ivan-obradovic:
+  names:
+  - {first: Ivan, last: Obradović}
+  - {first: Ivan, last: Obradoviæ}
+ivana-kruijff-korbayova:
+  names:
+  - {first: Ivana, last: Kruijff-Korbayová}
+  - {first: Ivana, last: Kruijff-Korbayova}
+  - {first: Ivana, last: Kruijff-Korbayovà}
+ivandre-paraboni:
+  names:
+  - {first: Ivandré, last: Paraboni}
+  - {first: Ivandre, last: Paraboni}
+ivona-kucerova:
+  names:
+  - {first: Ivona, last: Kučerová}
+  - {first: Ivona, last: Kuc̆erová}
+izaskun-aldezabal:
+  names:
+  - {first: Izaskun, last: Aldezabal}
+  - {first: I., last: Aldezabal}
+j-angus-webb:
+  names:
+  - {first: J. Angus, last: Webb}
+  - {first: Angus, last: Webb}
+j-fernando-sanchez-rada:
+  names:
+  - {first: J. Fernando, last: Sánchez-Rada}
+  - {first: Fernando, last: Sánchez-Rada}
+j-peterson:
+  names:
+  - {first: J., last: Peterson}
+  - {first: Jill, last: Peterson}
+j-robin-rohlicek:
+  names:
+  - {first: J. Robin, last: Rohlicek}
+  - {first: Robin, last: Rohlicek}
+  - {first: J.R., last: Rohlicek}
+  - {first: J. R., last: Rohlicek}
+j-scott-mccarley:
+  names:
+  - {first: J. Scott, last: McCarley}
+  - {first: Scott, last: McCarley}
+j-walker-orr:
+  names:
+  - {first: J. Walker, last: Orr}
+  - {first: Walker, last: Orr}
+jaakko-vayrynen:
+  names:
+  - {first: Jaakko, last: Väyrynen}
+  - {first: Jaakko J., last: Väyrynen}
+jackie-chi-kit-cheung:
+  names:
+  - {first: Jackie Chi Kit, last: Cheung}
+  - {first: Jackie C. K., last: Cheung}
+  - {first: Jackie C.K., last: Cheung}
+  - {first: Jackie, last: Cheung}
+jackson-souza:
+  names:
+  - {first: Jackson, last: Souza}
+  - {first: J., last: Souza}
+jacob-hoover-vigly:
+  names:
+  - {first: Jacob Hoover, last: Vigly}
+  - {first: Jacob Louis, last: Hoover}
+  - {first: Jacob, last: Hoover}
+jacqueline-leon:
+  names:
+  - {first: Jacqueline, last: Leon}
+  - {first: Jacqueline, last: Léon}
+jacqueline-vaissiere:
+  names:
+  - {first: Jacqueline, last: Vaissiere}
+  - {first: Jacqueline, last: Vaissière}
+jacques-chauche:
+  names:
+  - {first: Jacques, last: Chauché}
+  - {first: J., last: Chauche}
+  - {first: J., last: Chauché}
+jacques-rouault:
+  names:
+  - {first: Jacques, last: Rouault}
+  - {first: J., last: Rouault}
+jade-goldstein:
+  names:
+  - {first: Jade, last: Goldstein}
+  - {first: Jade, last: Goldstein-Stewart}
+jae-hee-lee-bremen:
+  comment: Bremen
+  disable_name_matching: true
+  names:
+  - {first: Jae, last: Hee Lee}
+  - {first: Jae Hee, last: Lee}
+  orcid: 0000-0001-9840-780X
+jae-won-lee:
+  names:
+  - {first: Jae-Won, last: Lee}
+  - {first: Jae-won, last: Lee}
+jaesong-lee:
+  names:
+  - {first: Jaesong, last: Lee}
+  - {first: JaeSong, last: Lee}
+jaesung-lee:
+  names:
+  - {first: JaeSung, last: Lee}
+  - {first: Jae-Sung, last: Lee}
+jahna-otterbacher:
+  names:
+  - {first: Jahna, last: Otterbacher}
+  - {first: Jahna C., last: Otterbacher}
+jaime-g-carbonell:
+  comment: CMU
+  names:
+  - {first: Jaime G., last: Carbonell}
+  - {first: Jaime, last: Carbonell}
+  - {first: Jaime G., last: Carbonell Jr}
+  similar:
+  - jaime-r-carbonell
+jaime-r-carbonell:
+  comment: BBN; d. 1973
+  names:
+  - {first: Jaime R., last: Carbonell}
+  similar:
+  - jaime-g-carbonell
+jakub-waszczuk:
+  names:
+  - {first: Jakub, last: Waszczuk}
+  - {first: Jakub, last: Wasczuk}
+jamal-a-nasir:
+  names:
+  - {first: Jamal A., last: Nasir}
+  - {first: Jamal, last: Nasir}
+james-allan:
+  comment: UMass Amherst
+  names:
+  - {first: James, last: Allan}
+  - {first: J., last: Allan}
+  similar:
+  - james-allen
+james-allen:
+  comment: Rochester
+  names:
+  - {first: James, last: Allen}
+  - {first: James F., last: Allen}
+  similar:
+  - james-allan
+james-baker:
+  names:
+  - {first: James, last: Baker}
+  - {first: James K., last: Baker}
+james-davis:
+  names:
+  - {first: James, last: Davis}
+  - {first: James Raymond, last: Davis}
+james-g-mork:
+  names:
+  - {first: James G., last: Mork}
+  - {first: James, last: Mork}
+james-glass:
+  names:
+  - {first: James, last: Glass}
+  - {first: James R., last: Glass}
+james-h-martin:
+  names:
+  - {first: James H., last: Martin}
+  - {first: James, last: Martin}
+james-henderson:
+  names:
+  - {first: James, last: Henderson}
+  - {first: James B., last: Henderson}
+james-hendler:
+  names:
+  - {first: James, last: Hendler}
+  - {first: James A., last: Hendler}
+james-hieronymus:
+  names:
+  - {first: James, last: Hieronymus}
+  - {first: J., last: Hieronymus}
+james-l-flanagan:
+  names:
+  - {first: James L., last: Flanagan}
+  - {first: J. L., last: Flanagan}
+  - {first: J., last: Flanagan}
+james-lester:
+  names:
+  - {first: James, last: Lester}
+  - {first: James C., last: Lester}
+james-paul-white:
+  names:
+  - {first: James Paul, last: White}
+  - {first: James P., last: White}
+  - {first: James, last: White}
+james-pustejovsky:
+  names:
+  - {first: James, last: Pustejovsky}
+  - {first: James D., last: Pustejovsky}
+  - {first: J., last: Pustejovsky}
+james-r-curran:
+  names:
+  - {first: James R., last: Curran}
+  - {first: James, last: Curran}
+james-yoon:
+  names:
+  - {first: James, last: Yoon}
+  - {first: James H., last: Yoon}
+jamie-kiros:
+  names:
+  - {first: Jamie, last: Kiros}
+  - {first: Jamie Ryan, last: Kiros}
+jan-curin:
+  names:
+  - {first: Jan, last: Cuřín}
+  - {first: J., last: Cuřín}
+jan-hajic:
+  names:
+  - {first: Jan, last: Hajic}
+  - {first: Jan, last: Hajič}
+  - {first: J., last: Hajič}
+  similar:
+  - jan-hajic-jr
+jan-hajic-jr:
+  names:
+  - {first: Jan, last: Hajič jr.}
+  similar:
+  - jan-hajic
+jan-kors:
+  names:
+  - {first: Jan, last: Kors}
+  - {first: Jan, last: Korst}
+jan-landsbergen:
+  names:
+  - {first: Jan, last: Landsbergen}
+  - {first: S. P. J., last: Landsbergen}
+  - {first: S.P.J., last: Landsbergen}
+jan-milan-deriu:
+  names:
+  - {first: Jan Milan, last: Deriu}
+  - {first: Jan, last: Deriu}
+jan-odijk:
+  names:
+  - {first: Jan, last: Odijk}
+  - {first: J., last: Odijk}
+jan-ptacek:
+  names:
+  - {first: Jan, last: Ptacek}
+  - {first: Jan, last: Ptáček}
+jan-vystrcil:
+  names:
+  - {first: Jan, last: Vystrčil}
+  - {first: Jan, last: Vystrcil}
+jana-gotze:
+  names:
+  - {first: Jana, last: Götze}
+  - {first: Jana, last: Goetze}
+jana-kravalova:
+  names:
+  - {first: Jana, last: Kravalová}
+  - {first: Jana, last: Kravalova}
+jana-sindlerova:
+  names:
+  - {first: Jana, last: Šindlerová}
+  - {first: Jana, last: Sindlerova}
+jana-sukkarieh:
+  names:
+  - {first: Jana, last: Sukkarieh}
+  - {first: Jana Z., last: Sukkarieh}
+jane-j-robinson:
+  names:
+  - {first: Jane J., last: Robinson}
+  - {first: Jane, last: Robinson}
+janet-baker:
+  names:
+  - {first: Janet, last: Baker}
+  - {first: Janet M., last: Baker}
+janet-hitzeman:
+  names:
+  - {first: Janet, last: Hitzeman}
+  - {first: J., last: Hitzeman}
+janet-pierrehumbert:
+  names:
+  - {first: Janet, last: Pierrehumbert}
+  - {first: Janet B., last: Pierrehumbert}
+janez-zibert:
+  names:
+  - {first: Janez, last: Zibert}
+  - {first: Janez, last: Žibert}
+janienke-sturm:
+  names:
+  - {first: Janienke, last: Sturm}
+  - {first: J., last: Sturm}
+jann-railey-montalan:
+  names:
+  - {first: Jann Railey, last: Montalan}
+  - {first: Jann, last: Montalan}
+  - {first: Railey, last: Montalan}
+  - {first: Jann Railey E., last: Montalan}
+janne-bondi-johannessen:
+  names:
+  - {first: Janne Bondi, last: Johannessen}
+  - {first: Janne, last: Bondi Johannessen}
+janusz-stanislaw-bien:
+  names:
+  - {first: Janusz Stanisław, last: Bien}
+  - {first: Janusz Stanislaw, last: Bien}
+  - {first: Janusz S., last: Bień}
+  - {first: Janusz S., last: Bien}
+janyce-wiebe:
+  names:
+  - {first: Janyce, last: Wiebe}
+  - {first: Janyce M., last: Wiebe}
+  - {first: Jan, last: Wiebe}
+jarmila-panevova:
+  names:
+  - {first: Jarmila, last: Panevová}
+  - {first: Jarmila, last: Panevova}
+jason-brenier:
+  names:
+  - {first: Jason, last: Brenier}
+  - {first: Jason M., last: Brenier}
+jason-d-williams:
+  names:
+  - {first: Jason D., last: Williams}
+  - {first: Jason, last: Williams}
+jason-eisner:
+  names:
+  - {first: Jason, last: Eisner}
+  - {first: Jason M., last: Eisner}
+jason-katz-brown:
+  names:
+  - {first: Jason, last: Katz-Brown}
+  - {first: Jason, last: Brown}
+jason-s-chang:
+  names:
+  - {first: Jason S., last: Chang}
+  - {first: Jason, last: Chang}
+  - {first: Jason J. S., last: Chang}
+  - {first: Jason J.S., last: Chang}
+  - {first: Jason J., last: Chang}
+jason-smith:
+  names:
+  - {first: Jason, last: Smith}
+  - {first: Jason R., last: Smith}
+javier-dieguez-tirado:
+  names:
+  - {first: Javier, last: Dieguez-Tirado}
+  - {first: Javier, last: Dieguez}
+javier-farreres:
+  names:
+  - {first: Javier, last: Farreres}
+  - {first: Xavier, last: Farreres}
+javier-ortega-garcia:
+  names:
+  - {first: Javier, last: Ortega-García}
+  - {first: Javier, last: Ortega-Garcia}
+jay-wilpon:
+  names:
+  - {first: Jay, last: Wilpon}
+  - {first: Jay G., last: Wilpon}
+jean-claude-martin:
+  names:
+  - {first: Jean-Claude, last: Martin}
+  - {first: J-C., last: Martin}
+  - {first: J.-C., last: Martin}
+  - {first: J.C., last: Martin}
+jean-david-ruvini:
+  names:
+  - {first: Jean David, last: Ruvini}
+  - {first: Jean-David, last: Ruvini}
+jean-e-fox-tree:
+  names:
+  - {first: Jean E., last: Fox Tree}
+  - {first: Jean Fox, last: Tree}
+  - {first: Jean, last: Fox Tree}
+jean-francois-bonastre:
+  names:
+  - {first: Jean-François, last: Bonastre}
+  - {first: Jean-Francois, last: Bonastre}
+  - {first: J-F., last: Bonastre}
+  - {first: J.-F., last: Bonastre}
+jean-francois-delannoy:
+  names:
+  - {first: Jean-François, last: Delannoy}
+  - {first: Jean-Francois, last: Delannoy}
+jean-francois-serignat:
+  names:
+  - {first: Jean-François, last: Serignat}
+  - {first: J.F., last: Serignat}
+jean-luc-lebrun:
+  names:
+  - {first: Jean-Luc, last: LeBrun}
+  - {first: Jean-Luc, last: Lebrun}
+jean-marc-colletta:
+  names:
+  - {first: Jean-Marc, last: Colletta}
+  - {first: J.M., last: Colletta}
+jean-mark-gawron:
+  names:
+  - {first: Jean Mark, last: Gawron}
+  - {first: Mark, last: Gawron}
+  - {first: J. Mark, last: Gawron}
+  - {first: J. M., last: Gawron}
+jean-pierre-descles:
+  names:
+  - {first: Jean-Pierre, last: Descles}
+  - {first: Jean-Pierre, last: Desclés}
+  - {first: Jean Pierre, last: Descles}
+jean-pierre-paillet:
+  names:
+  - {first: Jean-Pierre, last: Paillet}
+  - {first: Jean Pierre, last: Paillet}
+jean-veronis:
+  names:
+  - {first: Jean, last: Veronis}
+  - {first: Jean, last: Véronis}
+jeanne-baguenier-desormeaux:
+  names:
+  - {first: Jeanne, last: Baguenier Desormeaux}
+  - {first: Jeanne, last: Baguenier-Desormeaux}
+jeanne-villaneau:
+  names:
+  - {first: Jeanne, last: Villaneau}
+  - {first: J., last: Villaneau}
+jeannette-g-neal:
+  names:
+  - {first: Jeannette G., last: Neal}
+  - {first: J.G., last: Neal}
+jee-sun-nam:
+  names:
+  - {first: Jee-sun, last: Nam}
+  - {first: Jee-Sun, last: Nam}
+jeff-bilmes:
+  names:
+  - {first: Jeff, last: Bilmes}
+  - {first: Jeff A., last: Bilmes}
+jeffrey-p-bigham:
+  names:
+  - {first: Jeffrey P., last: Bigham}
+  - {first: Jeffrey, last: Bigham}
+jeffrey-sorensen:
+  names:
+  - {first: Jeffrey, last: Sorensen}
+  - {first: Jeffrey S., last: Sorensen}
+jeih-weih-hung:
+  names:
+  - {first: Jeih-weih, last: Hung}
+  - {first: Jeih-Weih, last: Hung}
+jen-nan-chen:
+  names:
+  - {first: Jen Nan, last: Chen}
+  - {first: Jen-Nan, last: Chen}
+  - {first: Jen-nan, last: Chen}
+jen-tzung-chien:
+  names:
+  - {first: Jen-Tzung, last: Chien}
+  - {first: Jen-Tzong, last: Chien}
+jennifer-c-lai:
+  names:
+  - {first: Jennifer C., last: Lai}
+  - {first: Jenifer C., last: Lai}
+  - {first: Jennifer, last: Lai}
+jennifer-chu-carroll:
+  names:
+  - {first: Jennifer, last: Chu-Carroll}
+  - {first: Jennifer, last: Chu}
+jennifer-doyon:
+  names:
+  - {first: Jennifer, last: Doyon}
+  - {first: Jennifer B., last: Doyon}
+jennifer-hay:
+  names:
+  - {first: Jennifer, last: Hay}
+  - {first: Jennifer B., last: Hay}
+jennifer-stromer-galley:
+  names:
+  - {first: Jennifer, last: Stromer-Galley}
+  - {first: Jennifer, last: Strommer-Galley}
+jenny-rose-finkel:
+  names:
+  - {first: Jenny Rose, last: Finkel}
+  - {first: Jenny, last: Finkel}
+jens-erik-fenstad:
+  names:
+  - {first: Jens Erik, last: Fenstad}
+  - {first: Jens-Erik, last: Fenstad}
+jeong-won-cha:
+  names:
+  - {first: Jeong-Won, last: Cha}
+  - {first: Jeongwon, last: Cha}
+jer-hayes:
+  names:
+  - {first: Jer, last: Hayes}
+  - {first: Jeremiah, last: Hayes}
+jeremie-segouat:
+  names:
+  - {first: Jérémie, last: Segouat}
+  - {first: J., last: Segouat}
+jeremy-g-kahn:
+  names:
+  - {first: Jeremy G., last: Kahn}
+  - {first: Jeremy, last: Kahn}
+jeremy-j-carroll:
+  names:
+  - {first: Jeremy J., last: Carroll}
+  - {first: Jeremy, last: Carroll}
+jeremy-leixa:
+  names:
+  - {first: Jeremy, last: Leixa}
+  - {first: Jérémy, last: Leixa}
+jeremy-trione:
+  names:
+  - {first: Jérémy, last: Trione}
+  - {first: Jeremy, last: Trione}
+jerneja-gros:
+  names:
+  - {first: Jerneja, last: Gros}
+  - {first: Jerneja Žganec, last: Gros}
+jerome-goulian:
+  names:
+  - {first: Jérôme, last: Goulian}
+  - {first: J., last: Goulian}
+jerome-vapillon:
+  names:
+  - {first: Jerome, last: Vapillon}
+  - {first: J., last: Vapillon}
+jerry-r-hobbs:
+  names:
+  - {first: Jerry R., last: Hobbs}
+  - {first: Jerry, last: Hobbs}
+  - {first: J.R., last: Hobbs}
+jessica-moszkowicz:
+  names:
+  - {first: Jessica, last: Moszkowicz}
+  - {first: Jessica L., last: Moszkowicz}
+jesujoba-alabi:
+  names:
+  - {first: Jesujoba, last: Alabi}
+  - {first: Jesujoba O., last: Alabi}
+  - {first: Jesujoba Oluwadara, last: Alabi}
+jesus-e-diaz-verdejo:
+  names:
+  - {first: Jesús E., last: Díaz Verdejo}
+  - {first: J.E., last: Díaz Verdejo}
+jesus-gimenez:
+  names:
+  - {first: Jesús, last: Giménez}
+  - {first: Jesus, last: Gimenez}
+jesus-gonzalez-rubio:
+  names:
+  - {first: Jesús, last: González-Rubio}
+  - {first: Jesús, last: González Rubio}
+jesus-peral:
+  names:
+  - {first: Jesús, last: Peral}
+  - {first: Jesus, last: Peral}
+  - {first: J., last: Peral}
+jia-fei-hong:
+  names:
+  - {first: Jia-Fei, last: Hong}
+  - {first: Jia-Fei, last: Hung}
+jia-lin-shen:
+  names:
+  - {first: Jia-Lin, last: Shen}
+  - {first: Jia-lin, last: Shen}
+jia-lu:
+  names:
+  - {first: Jia, last: Lü}
+  - {first: Jia, last: Lu}
+jia-yan-jian:
+  names:
+  - {first: Jia-Yan, last: Jian}
+  - {first: Jia Yan, last: Jian}
+jiahao-yuan-ecnu:
+  degree: East China Normal University
+  disable_name_matching: true
+  names:
+  - {first: Jiahao, last: Yuan}
+  orcid: 0009-0002-6194-450X
+jiaheng-zheng:
+  names:
+  - {first: Jiaheng, last: Zheng}
+  - {first: Jia-heng, last: Zheng}
+jiajun-chen:
+  names:
+  - {first: Jiajun, last: Chen}
+  - {first: Jia-jun, last: Chen}
+  - {first: Jia-Jun, last: Chen}
+jian-chen-ub:
+  comment: University at Buffalo
+  disable_name_matching: true
+  names:
+  - {first: Jian, last: Chen}
+jian-chen-wu:
+  names:
+  - {first: Jian-Chen, last: Wu}
+  - {first: Jien-Chen, last: Wu}
+jian-cheng-wu:
+  names:
+  - {first: Jian-Cheng, last: Wu}
+  - {first: Jian-cheng, last: Wu}
+  - {first: Jiancheng, last: Wu}
+jian-ming-xu:
+  names:
+  - {first: Jian-ming, last: Xu}
+  - {first: Jian-Ming, last: Xu}
+jian-wang-hongkongpoly:
+  comment: Hong Kong Polytechnic
+  disable_name_matching: true
+  names:
+  - {first: Jian, last: Wang}
+  orcid: 0000-0002-8992-8336
+jian-yun-nie:
+  names:
+  - {first: Jian-Yun, last: Nie}
+  - {first: Jian-yun, last: Nie}
+jianmin-yao:
+  names:
+  - {first: Jianmin, last: Yao}
+  - {first: Jian-min, last: Yao}
+  - {first: Jian-Min, last: Yao}
+jianxiang-wang:
+  names:
+  - {first: JianXiang, last: Wang}
+  - {first: Jianxiang, last: Wang}
+jianyong-duan:
+  names:
+  - {first: Jianyong, last: Duan}
+  - {first: Jian-Yong, last: Duan}
+jiatong-li-hk:
+  comment: Hong Kong Polytechnic
+  names:
+  - {first: Jiatong, last: Li}
+jiatong-li-ru:
+  comment: Rutgers
+  names:
+  - {first: Jiatong, last: Li}
+jihai-zhang-cuhk:
+  comment: CUHK
+  disable_name_matching: true
+  names:
+  - {first: Jihai, last: Zhang}
+  orcid: 0000-0002-1400-9116
+jill-fain-lehman:
+  names:
+  - {first: Jill Fain, last: Lehman}
+  - {first: Jill F., last: Lehman}
+jim-chang:
+  names:
+  - {first: Jim, last: Chang}
+  - {first: Jimmy, last: Chang}
+jim-cowie:
+  names:
+  - {first: Jim, last: Cowie}
+  - {first: J., last: Cowie}
+jim-kinzey:
+  names:
+  - {first: Jim, last: Kinzey}
+  - {first: Jim, last: Kimzey}
+jin-ge-yao:
+  names:
+  - {first: Jin-ge, last: Yao}
+  - {first: Jin-Ge, last: Yao}
+jin-hu-huang:
+  names:
+  - {first: Jin Hu, last: Huang}
+  - {first: JinHu, last: Huang}
+jin-seok-lee:
+  names:
+  - {first: Jin-seok, last: Lee}
+  - {first: Jin-Seok, last: Lee}
+jinan-xu:
+  names:
+  - {first: Jinan, last: Xu}
+  - {first: JinAn, last: Xu}
+jindrich-helcl:
+  names:
+  - {first: Jindřich, last: Helcl}
+  - {first: Jindrich, last: Helcl}
+jingguang-han:
+  names:
+  - {first: Jingguang, last: Han}
+  - {first: Jing Guang, last: Han}
+jingguang-sun:
+  names:
+  - {first: Jingguang, last: Sun}
+  - {first: JingGuang, last: Sun}
+jinghui-xiao:
+  names:
+  - {first: Jinghui, last: Xiao}
+  - {first: JingHui, last: Xiao}
+jinho-d-choi:
+  names:
+  - {first: Jinho D., last: Choi}
+  - {first: Jinho, last: Choi}
+jiri-havelka:
+  names:
+  - {first: Jiří, last: Havelka}
+  - {first: Jiri, last: Havelka}
+jiri-navratil:
+  names:
+  - {first: Jiří, last: Navrátil}
+  - {first: Jiri, last: Navratil}
+jiri-semecky:
+  names:
+  - {first: Jiří, last: Semecký}
+  - {first: Jirí, last: Semecky}
+jisha-p-jayan:
+  names:
+  - {first: Jisha P., last: Jayan}
+  - {first: Jisha, last: P Jayan}
+  - {first: Jisha P, last: Jayan}
+jo-calder:
+  names:
+  - {first: Jo, last: Calder}
+  - {first: Jonathan, last: Calder}
+joachim-kohler:
+  names:
+  - {first: Joachim, last: Köhler}
+  - {first: Joachim, last: Koehler}
+joan-andreu-sanchez:
+  names:
+  - {first: Joan-Andreu, last: Sánchez}
+  - {first: Joan-Andreu, last: Sanchez}
+  - {first: Joan Andreu, last: Sánchez}
+joan-bachenko:
+  names:
+  - {first: Joan, last: Bachenko}
+  - {first: J., last: Bachenko}
+joan-codina-filba:
+  names:
+  - {first: Joan, last: Codina-Filba}
+  - {first: Joan, last: Codina-Filbà}
+  - {first: Joan, last: Codina}
+joan-giralt-duran:
+  names:
+  - {first: Joan, last: Giralt Duran}
+  - {first: Joan Giralt, last: Duran}
+joan-soler-i-bou:
+  names:
+  - {first: Joan, last: Soler i Bou}
+  - {first: Joan, last: Soler}
+joanna-mrozinski:
+  names:
+  - {first: Joanna, last: Mrozinski}
+  - {first: J., last: Mrozinski}
+joao-graca:
+  names:
+  - {first: João, last: Graça}
+  - {first: Joao, last: Graca}
+  - {first: João V., last: Graça}
+joao-miguel-casteleiro:
+  names:
+  - {first: João Miguel, last: Casteleiro}
+  - {first: João, last: Casteleiro}
+joao-p-neto:
+  names:
+  - {first: João P., last: Neto}
+  - {first: Joao P., last: Neto}
+  - {first: Joao, last: Neto}
+  - {first: João, last: Neto}
+  - {first: João Paulo, last: Neto}
+joao-paulo-cabral:
+  names:
+  - {first: Joao Paulo, last: Cabral}
+  - {first: João P., last: Cabral}
+joao-paulo-cordeiro:
+  names:
+  - {first: João Paulo, last: Cordeiro}
+  - {first: João, last: Cordeiro}
+joao-paulo-teixeira:
+  names:
+  - {first: João Paulo, last: Teixeira}
+  - {first: João P., last: Teixeira}
+joao-rodrigues:
+  names:
+  - {first: João, last: Rodrigues}
+  - {first: João, last: António Rodrigues}
+joao-silva:
+  names:
+  - {first: João, last: Silva}
+  - {first: João Ricardo, last: Silva}
+joaquin-gonzalez-rodriguez:
+  names:
+  - {first: Joaquín, last: González-Rodríguez}
+  - {first: Joaquin, last: Gonzalez-Rodriguez}
+jochen-dorre:
+  names:
+  - {first: Jochen, last: Dorre}
+  - {first: Jochen, last: Dörre}
+jochen-l-leidner:
+  names:
+  - {first: Jochen L., last: Leidner}
+  - {first: Jochen, last: Leidner}
+joe-mccarthy:
+  names:
+  - {first: Joe, last: McCarthy}
+  - {first: J., last: McCarthy}
+joe-zhou:
+  names:
+  - {first: Joe, last: Zhou}
+  - {first: Joe F., last: Zhou}
+joel-priestley:
+  names:
+  - {first: Joel, last: Priestley}
+  - {first: Joel James, last: Priestley}
+joel-tetreault:
+  names:
+  - {first: Joel, last: Tetreault}
+  - {first: Joel R., last: Tetreault}
+joel-wallenberg:
+  names:
+  - {first: Joel, last: Wallenberg}
+  - {first: Joel C., last: Wallenberg}
+johan-adam-du-preez:
+  names:
+  - {first: Johan Adam, last: du Preez}
+  - {first: J.A., last: du Preez}
+johanna-d-moore:
+  names:
+  - {first: Johanna D., last: Moore}
+  - {first: Johanna, last: Moore}
+  - {first: J. D., last: Moore}
+johanna-geiss:
+  names:
+  - {first: Johanna, last: Geiß}
+  - {first: Johanna, last: Geiss}
+john-a-carroll:
+  comment: Cambridge, Sussex
+  names:
+  - {first: John A., last: Carroll}
+  - {first: John, last: Carroll}
+  similar:
+  - john-b-carroll
+john-b-carroll:
+  comment: UNC
+  names:
+  - {first: John B., last: Carroll}
+  similar:
+  - john-a-carroll
+john-b-lowe:
+  names:
+  - {first: John B., last: Lowe}
+  - {first: John, last: Lowe}
+john-barnden:
+  names:
+  - {first: John, last: Barnden}
+  - {first: John A., last: Barnden}
+  - {first: J.A., last: Barnden}
+john-bateman:
+  names:
+  - {first: John, last: Bateman}
+  - {first: John A., last: Bateman}
+john-bear:
+  names:
+  - {first: John, last: Bear}
+  - {first: J., last: Bear}
+john-c-platt:
+  names:
+  - {first: John C., last: Platt}
+  - {first: John, last: Platt}
+john-c-thomas:
+  names:
+  - {first: John C., last: Thomas}
+  - {first: John, last: Thomas}
+john-cocke:
+  names:
+  - {first: John, last: Cocke}
+  - {first: J., last: Cocke}
+john-conroy:
+  names:
+  - {first: John, last: Conroy}
+  - {first: John M., last: Conroy}
+john-d-burger:
+  comment: MITRE
+  names:
+  - {first: John D., last: Burger}
+  - {first: John, last: Burger}
+  similar:
+  - john-f-burger
+john-dowding:
+  names:
+  - {first: John, last: Dowding}
+  - {first: J., last: Dowding}
+john-elliott:
+  names:
+  - {first: John, last: Elliott}
+  - {first: John, last: Elliot}
+john-f-burger:
+  comment: System Development Corporation
+  names:
+  - {first: John F., last: Burger}
+  - {first: John, last: Burger}
+  similar:
+  - john-d-burger
+john-f-pitrelli:
+  names:
+  - {first: John F., last: Pitrelli}
+  - {first: John, last: Pitrelli}
+john-h-clippinger-jr:
+  names:
+  - {first: John H., last: 'Clippinger, Jr.'}
+  - {first: John Henry, last: 'Clippinger, Jr.'}
+john-hale:
+  names:
+  - {first: John, last: Hale}
+  - {first: John T., last: Hale}
+john-henderson:
+  names:
+  - {first: John, last: Henderson}
+  - {first: John C., last: Henderson}
+john-j-kovarik:
+  names:
+  - {first: John J., last: Kovarik}
+  - {first: John, last: Kovarik}
+john-k-pate:
+  names:
+  - {first: John K., last: Pate}
+  - {first: John, last: Pate}
+  - {first: John K, last: Pate}
+john-keane:
+  names:
+  - {first: John, last: Keane}
+  - {first: John, last: Kane}
+john-kelleher:
+  names:
+  - {first: John, last: Kelleher}
+  - {first: John D., last: Kelleher}
+john-lafferty:
+  names:
+  - {first: John, last: Lafferty}
+  - {first: John D., last: Lafferty}
+  - {first: John, last: Lafrerty}
+  - {first: J., last: Lafferty}
+john-makhoul:
+  names:
+  - {first: John, last: Makhoul}
+  - {first: J., last: Makhoul}
+john-mcnaught:
+  names:
+  - {first: John, last: McNaught}
+  - {first: J., last: McNaught}
+john-miller:
+  names:
+  - {first: John, last: Miller}
+  - {first: John E., last: Miller}
+john-p-lalor:
+  names:
+  - {first: John P., last: Lalor}
+  - {first: John, last: Lalor}
+john-pestian:
+  names:
+  - {first: John, last: Pestian}
+  - {first: John P., last: Pestian}
+john-philip-mccrae:
+  names:
+  - {first: John Philip, last: McCrae}
+  - {first: John, last: McCrae}
+  - {first: John P., last: McCrae}
+john-phillips:
+  comment: Univ. of Manchester
+  names:
+  - {first: John, last: Phillips}
+  similar:
+  - jon-phillips
+john-r-hershey:
+  names:
+  - {first: John R., last: Hershey}
+  - {first: John, last: Hershey}
+john-s-garofolo:
+  names:
+  - {first: John S., last: Garofolo}
+  - {first: John, last: Garofolo}
+  - {first: J. S., last: Garofolo}
+  - {first: J., last: Garofolo}
+john-s-white:
+  names:
+  - {first: John S., last: White}
+  - {first: John, last: White}
+john-s-y-lee:
+  names:
+  - {first: John S. Y., last: Lee}
+  - {first: John, last: Lee}
+john-t-maxwell-iii:
+  names:
+  - {first: John T., last: Maxwell III}
+  - {first: John, last: Maxwell}
+  - {first: John T., last: Maxwell}
+john-tait:
+  names:
+  - {first: John, last: Tait}
+  - {first: John Irving, last: Tait}
+john-wilkerson:
+  names:
+  - {first: John, last: Wilkerson}
+  - {first: John D., last: Wilkerson}
+jon-oberlander:
+  names:
+  - {first: Jon, last: Oberlander}
+  - {first: Jonathan, last: Oberländer}
+jon-patrick:
+  names:
+  - {first: Jon, last: Patrick}
+  - {first: Jon D., last: Patrick}
+  - {first: Jon David, last: Patrick}
+jon-phillips:
+  comment: Georgetown, MITRE
+  names:
+  - {first: Jon, last: Phillips}
+  - {first: John, last: Phillips}
+  similar:
+  - john-phillips
+jon-sanchez:
+  names:
+  - {first: Jon, last: Sánchez}
+  - {first: Jon, last: Sanchez}
+  - {first: J., last: Sánchez}
+jonathan-allen:
+  names:
+  - {first: Jonathan, last: Allen}
+  - {first: Jonathan, last: All}
+jonathan-decristofaro:
+  names:
+  - {first: Jonathan, last: DeCristofaro}
+  - {first: Jonathan D., last: DeCristofaro}
+jonathan-g-fiscus:
+  names:
+  - {first: Jonathan G., last: Fiscus}
+  - {first: Jonathan C., last: Fiscus}
+  - {first: Jonathan, last: Fiscus}
+  - {first: J. G., last: Fiscus}
+  - {first: Johathan G., last: Fiscus}
+jonathan-h-clark:
+  names:
+  - {first: Jonathan H., last: Clark}
+  - {first: Jonathan, last: Clark}
+jonathan-j-webster:
+  names:
+  - {first: Jonathan J., last: Webster}
+  - {first: Jonathan, last: Webster}
+jonathan-washington:
+  names:
+  - {first: Jonathan, last: Washington}
+  - {first: Jonathan North, last: Washington}
+  - {first: Jonathan N., last: Washington}
+jong-c-park:
+  names:
+  - {first: Jong C., last: Park}
+  - {first: Jong, last: Park}
+jong-hoon-oh:
+  names:
+  - {first: Jong-Hoon, last: Oh}
+  - {first: Jong Hoon, last: Oh}
+jonghyun-choi-umd:
+  comment: University of Maryland
+  disable_name_matching: true
+  names:
+  - {first: Jonghyun, last: Choi}
+  orcid: 0000-0002-7934-8434
+joo-young-lee:
+  names:
+  - {first: Joo-Young, last: Lee}
+  - {first: JooYoung, last: Lee}
+jordan-cohen:
+  names:
+  - {first: Jordan, last: Cohen}
+  - {first: Jordan R., last: Cohen}
+jordan-r-green:
+  names:
+  - {first: Jordan R., last: Green}
+  - {first: Jordan, last: Green}
+jordi-atserias:
+  names:
+  - {first: Jordi, last: Atserias}
+  - {first: Jordi, last: Atserias Batalla}
+  - {first: J., last: Atserias}
+jordi-daude:
+  names:
+  - {first: Jordi, last: Daudé}
+  - {first: J., last: Daudé}
+jordi-porta-zamorano:
+  names:
+  - {first: Jordi Porta, last: Zamorano}
+  - {first: Jordi, last: Porta}
+jordi-turmo:
+  names:
+  - {first: Jordi, last: Turmo}
+  - {first: J., last: Turmo}
+jorg-kleinz:
+  names:
+  - {first: Jörg, last: Kleinz}
+  - {first: Jorg, last: Kleinz}
+jorg-tiedemann:
+  names:
+  - {first: Jörg, last: Tiedemann}
+  - {first: Jorg, last: Tiedemann}
+  - {first: Joerg, last: Tiedemann}
+jorge-carrillo-de-albornoz:
+  names:
+  - {first: Jorge, last: Carrillo de Albornoz}
+  - {first: Jorge Carrillo, last: de Albornoz}
+jorge-garcia-flores:
+  names:
+  - {first: Jorge, last: Garcia Flores}
+  - {first: Jorge, last: García Flores}
+  - {first: Jorge J., last: García Flores}
+jorgen-villadsen:
+  names:
+  - {first: Jorgen, last: Villadsen}
+  - {first: Jørgen, last: Villadsen}
+jort-florent-gemmeke:
+  names:
+  - {first: Jort Florent, last: Gemmeke}
+  - {first: Jort F., last: Gemmeke}
+  - {first: Jort, last: Gemmeke}
+jose-a-r-fonollosa:
+  names:
+  - {first: José A. R., last: Fonollosa}
+  - {first: Jose A., last: R. Fonollosa}
+  - {first: José A.R., last: Fonollosa}
+  - {first: José A., last: R. Fonollosa}
+  - {first: Jose A. R., last: Fonollosa}
+jose-b-marino:
+  names:
+  - {first: José B., last: Mariño}
+  - {first: José, last: Mariño}
+jose-camacho-collados:
+  names:
+  - {first: Jose, last: Camacho-Collados}
+  - {first: José, last: Camacho-Collados}
+jose-castano:
+  names:
+  - {first: José, last: Castaño}
+  - {first: José M., last: Castaño}
+jose-coch:
+  names:
+  - {first: Jose, last: Coch}
+  - {first: José, last: Coch}
+jose-deulofeu:
+  names:
+  - {first: José, last: Deulofeu}
+  - {first: Jose, last: Deulofeu}
+jose-g-c-de-souza:
+  names:
+  - {first: José G., last: C. de Souza}
+  - {first: José G.C., last: de Souza}
+  - {first: Jose G.C., last: de Souza}
+  - {first: José Guilherme, last: Camargo de Souza}
+  - {first: José G., last: Camargo de Souza}
+  - {first: José Guilherme, last: C. de Souza}
+jose-g-moreno:
+  names:
+  - {first: José G., last: Moreno}
+  - {first: Jose G., last: Moreno}
+  - {first: Jose, last: Moreno}
+jose-i-abreu:
+  names:
+  - {first: José I., last: Abreu}
+  - {first: Jose I., last: Abreu}
+  - {first: José, last: Abreu}
+jose-iria:
+  names:
+  - {first: José, last: Iria}
+  - {first: Jose, last: Iria}
+jose-joao-almeida:
+  names:
+  - {first: José João, last: Almeida}
+  - {first: Jose Joao, last: Almeida}
+jose-luis-oliveira:
+  names:
+  - {first: José Luís, last: Oliveira}
+  - {first: Luís, last: Oliveira}
+jose-luis-vicedo:
+  names:
+  - {first: José Luis, last: Vicedo}
+  - {first: Jose-Luis, last: Vicedo}
+  - {first: Jose Luis, last: Vicedo}
+  - {first: José L., last: Vicedo}
+  - {first: J.L., last: Vicedo}
+jose-m-alonso:
+  names:
+  - {first: Jose M., last: Alonso}
+  - {first: Jose, last: Alonso}
+jose-m-garcia-miguel:
+  names:
+  - {first: José M., last: García Miguel}
+  - {first: José M., last: García-Miguel}
+jose-m-gomez:
+  names:
+  - {first: José M., last: Gómez}
+  - {first: José Manuel, last: Gómez}
+  - {first: Jose Manuel, last: Gómez}
+  - {first: Jose M., last: Gomez}
+jose-m-guirao:
+  names:
+  - {first: José M., last: Guirao}
+  - {first: José María, last: Guirao}
+jose-manuel-gomez-perez:
+  names:
+  - {first: José Manuel, last: Gómez-Pérez}
+  - {first: Jose Manuel, last: Gomez-Perez}
+jose-manuel-martinez:
+  names:
+  - {first: José Manuel, last: Martínez}
+  - {first: Jose M.M., last: Martinez}
+  - {first: José Manuel, last: Martínez Martínez}
+  - {first: Jose Manuel, last: Martinez}
+jose-manuel-pardo:
+  names:
+  - {first: Jose Manuel, last: Pardo}
+  - {first: Jose M., last: Pardo}
+  - {first: José M., last: Pardo}
+jose-manuel-perea-ortega:
+  names:
+  - {first: Jose Manuel, last: Perea-Ortega}
+  - {first: José M., last: Perea-Ortega}
+  - {first: Jose Manuel, last: Perea}
+  - {first: Jose-Manuel, last: Perea-Ortega}
+jose-mari-arriola:
+  names:
+  - {first: Jose Mari, last: Arriola}
+  - {first: J. M., last: Arriola}
+  - {first: J.M., last: Arriola}
+  - {first: J.M, last: Arriola}
+jose-maria-carazo:
+  names:
+  - {first: José María, last: Carazo}
+  - {first: José-María, last: Carazo}
+jose-maria-gomez-hidalgo:
+  names:
+  - {first: Jose Maria, last: Gomez-Hidalgo}
+  - {first: Jose Maria Gomez, last: Hidalgo}
+  - {first: José M. Gómez, last: Hidalgo}
+jose-miguel-benedi:
+  names:
+  - {first: José-Miguel, last: Benedí}
+  - {first: Jose-Miguel, last: Benedi}
+  - {first: José Miguel, last: Benedí}
+  - {first: José Miguel, last: Benedí Ruíz}
+  - {first: José-M., last: Benedí}
+  - {first: José Miguel, last: Benedi Ruiz}
+  - {first: José-Miguel, last: Benedí Ruíz}
+  - {first: J. M., last: Benedí}
+jose-ramom-pichel-campos:
+  names:
+  - {first: José Ramom, last: Pichel Campos}
+  - {first: José Ramom, last: Pichel}
+  - {first: Jose Ramom, last: Pichel}
+  - {first: Jose Ramom, last: Pichel Campos}
+jose-relano-gil:
+  names:
+  - {first: Jose, last: Relaño-Gil}
+  - {first: Jose, last: Relano Gil}
+  - {first: José, last: Relaño Gil}
+  - {first: José, last: Relaño}
+josef-psutka:
+  names:
+  - {first: Josef, last: Psutka}
+  - {first: Josef V., last: Psutka}
+  - {first: J., last: Psutka}
+  - {first: J.V., last: Psutka}
+josef-van-genabith:
+  names:
+  - {first: Josef, last: van Genabith}
+  - {first: Josef, last: Van Genabith}
+  - {first: J., last: Van Genabith}
+josep-m-crego:
+  names:
+  - {first: Josep M., last: Crego}
+  - {first: Josep Maria, last: Crego}
+  - {first: Josep, last: Crego}
+josep-maria-fontana:
+  names:
+  - {first: Josep Maria, last: Fontana}
+  - {first: Josep, last: Fontana}
+joseph-f-grafsgaard:
+  names:
+  - {first: Joseph F., last: Grafsgaard}
+  - {first: Joseph, last: Grafsgaard}
+joseph-j-peper:
+  names:
+  - {first: Joseph J., last: Peper}
+  - {first: Joseph, last: Peper}
+joseph-le-roux:
+  names:
+  - {first: Joseph, last: Le Roux}
+  - {first: Joseph Le, last: Roux}
+joseph-mariani:
+  names:
+  - {first: Joseph, last: Mariani}
+  - {first: J., last: Mariani}
+joseph-p-campbell:
+  names:
+  - {first: Joseph P., last: Campbell}
+  - {first: Joseph, last: Campbell}
+joseph-polifroni:
+  names:
+  - {first: Joseph, last: Polifroni}
+  - {first: Joseph H., last: Polifroni}
+joseph-turian:
+  names:
+  - {first: Joseph, last: Turian}
+  - {first: Joseph P., last: Turian}
+joseph-z-chang:
+  names:
+  - {first: Joseph Z., last: Chang}
+  - {first: Joseph, last: Chang}
+  - {first: Joseph Z, last: Chang}
+joshua-b-gordon:
+  names:
+  - {first: Joshua B., last: Gordon}
+  - {first: Joshua, last: Gordon}
+joshua-goodman:
+  names:
+  - {first: Joshua, last: Goodman}
+  - {first: Joshua T., last: Goodman}
+joyce-chai:
+  names:
+  - {first: Joyce, last: Chai}
+  - {first: Joyce Yue, last: Chai}
+  - {first: Joyce Y., last: Chai}
+joyce-mcdowell:
+  names:
+  - {first: Joyce, last: McDowell}
+  - {first: J., last: McDowell}
+jozef-juhar:
+  names:
+  - {first: Jozef, last: Juhár}
+  - {first: Jozef, last: Juhar}
+ju-yeon-ryu:
+  names:
+  - {first: Ju-yeon, last: Ryu}
+  - {first: Ju-Yeon, last: Ryu}
+juan-carlos-amengual:
+  names:
+  - {first: Juan Carlos, last: Amengual}
+  - {first: J. C., last: Amengual}
+juan-jose-rodriguez-soler:
+  names:
+  - {first: Juan José Rodríguez, last: Soler}
+  - {first: Juan José, last: Rodríguez}
+juan-m-montero:
+  names:
+  - {first: Juan M., last: Montero}
+  - {first: Juan Manuel, last: Montero}
+juan-manuel-lucas-cuesta:
+  names:
+  - {first: Juan Manuel, last: Lucas-Cuesta}
+  - {first: Juan Manuel, last: Lucas}
+juan-manuel-torres-moreno:
+  names:
+  - {first: Juan-Manuel, last: Torres-Moreno}
+  - {first: Juan-Manuel Torres, last: Moreno}
+  - {first: Juan-Manuel, last: Torres}
+juan-maria-garrido:
+  names:
+  - {first: Juan María, last: Garrido}
+  - {first: Juan Maria, last: Garrido}
+juan-miguel-vilar:
+  names:
+  - {first: Juan Miguel, last: Vilar}
+  - {first: Juan-Miguel, last: Vilar}
+  - {first: Juan M., last: Vilar}
+  - {first: J. M., last: Vilar}
+juan-pablo-munoz:
+  names:
+  - {first: Juan Pablo, last: Munoz}
+  - {first: J. Pablo, last: Muñoz}
+juan-soler-company:
+  names:
+  - {first: Juan, last: Soler-Company}
+  - {first: Juan, last: Soler Company}
+juana-maria-ruiz-martinez:
+  names:
+  - {first: Juana María, last: Ruiz-Martínez}
+  - {first: Juana Maria, last: Ruiz-Martínez}
+  - {first: Juana Maria, last: Ruiz Martinez}
+judith-d-schlesinger:
+  names:
+  - {first: Judith D., last: Schlesinger}
+  - {first: Judith, last: Schlesinger}
+judith-l-klavans:
+  names:
+  - {first: Judith L., last: Klavans}
+  - {first: Judith, last: Klavans}
+  - {first: J., last: Klavans}
+judy-anne-kegl:
+  names:
+  - {first: Judy Anne, last: Kegl}
+  - {first: Judy, last: Kegl}
+jugal-kalita:
+  names:
+  - {first: Jugal, last: Kalita}
+  - {first: Jugal K., last: Kalita}
+  - {first: J.K., last: Kalita}
+jui-ting-huang:
+  names:
+  - {first: Jui Ting, last: Huang}
+  - {first: Jui-Ting, last: Huang}
+julia-hirschberg:
+  names:
+  - {first: Julia, last: Hirschberg}
+  - {first: Julia B., last: Hirschberg}
+julia-lavid-lopez:
+  names:
+  - {first: Julia, last: Lavid-López}
+  - {first: Julia, last: Lavid}
+julia-maria-schulz:
+  names:
+  - {first: Julia Maria, last: Schulz}
+  - {first: Julia, last: Schulz}
+julia-otmakhova:
+  names:
+  - {first: Julia, last: Otmakhova}
+  - {first: Yulia, last: Otmakhova}
+julian-j-odell:
+  names:
+  - {first: Julian J., last: Odell}
+  - {first: J.J., last: Odell}
+julian-j-schloder:
+  names:
+  - {first: Julian J., last: Schlöder}
+  - {first: Julian, last: Schlöder}
+julian-moreno-schneider:
+  names:
+  - {first: Julian, last: Moreno Schneider}
+  - {first: Julian, last: Moreno-Schneider}
+  - {first: Julián, last: Moreno-Schneider}
+  - {first: Julián, last: Moreno Schneider}
+juliano-d-antonio:
+  names:
+  - {first: Juliano D., last: Antonio}
+  - {first: J.D., last: Antonio}
+juliano-efson-sales:
+  names:
+  - {first: Juliano Efson, last: Sales}
+  - {first: Juliano, last: Efson Sales}
+  - {first: Juliano, last: Sales}
+julie-beliao:
+  names:
+  - {first: Julie, last: Belião}
+  - {first: Julie, last: Beliao}
+julie-carson-berndsen:
+  names:
+  - {first: Julie, last: Carson-Berndsen}
+  - {first: Julie, last: Carson}
+  - {first: Julle, last: Carson-Berndsen}
+julie-e-boland:
+  names:
+  - {first: Julie E., last: Boland}
+  - {first: Julie, last: Boland}
+julien-deriviere:
+  names:
+  - {first: Julien, last: Derivière}
+  - {first: J., last: Derivière}
+julien-hamonic:
+  names:
+  - {first: Julien, last: Hamonic}
+  - {first: J., last: Hamonic}
+julio-castillo:
+  names:
+  - {first: Julio, last: Castillo}
+  - {first: Julio Javier, last: Castillo}
+jun-fu-cai:
+  names:
+  - {first: Jun Fu, last: Cai}
+  - {first: Junfu, last: Cai}
+jun-ichi-nakamura:
+  names:
+  - {first: Jun-ichi, last: Nakamura}
+  - {first: Jun’ichi, last: Nakamura}
+jun-ping-ng:
+  names:
+  - {first: Jun Ping, last: Ng}
+  - {first: Jun-Ping, last: Ng}
+junfeng-tian:
+  names:
+  - {first: Junfeng, last: Tian}
+  - {first: Jun Feng, last: Tian}
+jung-jae-kim:
+  names:
+  - {first: Jung-jae, last: Kim}
+  - {first: Jung-Jae, last: Kim}
+jungyun-seo:
+  names:
+  - {first: Jungyun, last: Seo}
+  - {first: Jung Yun, last: Seo}
+junhui-li:
+  names:
+  - {first: Junhui, last: Li}
+  - {first: JunHui, last: Li}
+junichi-fukumoto:
+  names:
+  - {first: Jun’ichi, last: Fukumoto}
+  - {first: Junichi, last: Fukumoto}
+junichi-kakegawa:
+  names:
+  - {first: Jun’ichi, last: Kakegawa}
+  - {first: Jun-ichi, last: Kakegawa}
+junichi-kazama:
+  names:
+  - {first: Jun′ichi, last: Kazama}
+  - {first: Jun’ichi, last: Kazama}
+junichi-tsujii:
+  names:
+  - {first: Jun’ichi, last: Tsujii}
+  - {first: Jun-ichi, last: Tsujii}
+  - {first: Jun-Ichi, last: Tsujii}
+  - {first: Junichi, last: Tsujii}
+  - {first: Jun-ich, last: Tsujii}
+  - {first: J., last: Tsujii}
+junping-gong:
+  names:
+  - {first: Junping, last: Gong}
+  - {first: Jun-ping, last: Gong}
+junya-tsutsumi:
+  names:
+  - {first: Junya, last: Tsutsumi}
+  - {first: J., last: Tsutsumi}
+junyu-luo-pu:
+  degree: Peking University
+  disable_name_matching: true
+  names:
+  - {first: Junyu, last: Luo}
+  orcid: 0009-0001-6894-1144
+jurgen-wedekind:
+  names:
+  - {first: Jurgen, last: Wedekind}
+  - {first: Jürgen, last: Wedekind}
+jurgita-kapociute-dzikiene:
+  names:
+  - {first: Jurgita, last: Kapočiūtė-Dzikienė}
+  - {first: Jurgita, last: Kapociute-Dzikiene}
+jurica-seva:
+  names:
+  - {first: Jurica, last: Seva}
+  - {first: Jurica, last: Ševa}
+justin-h-gross:
+  names:
+  - {first: Justin H., last: Gross}
+  - {first: Justin, last: Gross}
+justus-c-roux:
+  names:
+  - {first: Justus C., last: Roux}
+  - {first: J.C., last: Roux}
+  - {first: J. C., last: Roux}
+jyh-shing-roger-jang:
+  names:
+  - {first: Jyh-Shing Roger, last: Jang}
+  - {first: Jyh-Shing, last: Jang}
+  - {first: Jyh-Shing, last: Roger Jang}
+  - {first: Roger Jyh-Shing, last: Jang}
+jyoti-pawar:
+  names:
+  - {first: Jyoti, last: Pawar}
+  - {first: Jyoti D., last: Pawar}
+  - {first: Jyoti D, last: Pawar}
+jyun-sheng-chang:
+  names:
+  - {first: Jyun-Sheng, last: Chang}
+  - {first: Jyun-sheng, last: Chang}
+k-bretonnel-cohen:
+  names:
+  - {first: K. Bretonnel, last: Cohen}
+  - {first: Kevin Bretonnel, last: Cohen}
+  - {first: Kevin B., last: Cohen}
+  - {first: Kevin, last: Cohen}
+k-saravanan:
+  names:
+  - {first: K, last: Saravanan}
+  - {first: Saravanan, last: K}
+  - {first: K., last: Saravanan}
+k-sreenivasa-rao:
+  names:
+  - {first: K Sreenivasa, last: Rao}
+  - {first: K. Sreenivasa, last: Rao}
+k-tamsin-maxwell:
+  names:
+  - {first: K. Tamsin, last: Maxwell}
+  - {first: Tamsin, last: Maxwell}
+k-v-s-prasad:
+  names:
+  - {first: K.V.S., last: Prasad}
+  - {first: K.V.S, last: Prasad}
+k-vijay-shanker:
+  names:
+  - {first: K., last: Vijay-Shanker}
+  - {first: K, last: Vijay-Shanker}
+  - {first: K., last: Vijay-Shankar}
+  - {first: Vijay, last: Shanker}
+ka-wai-chui:
+  names:
+  - {first: Ka-Wai, last: Chui}
+  - {first: Kawai, last: Chui}
+kadri-hacioglu:
+  names:
+  - {first: Kadri, last: Hacioglu}
+  - {first: K., last: Hacioglu}
+kai-fu-lee:
+  names:
+  - {first: Kai-Fu, last: Lee}
+  - {first: K.F., last: Lee}
+kai-min-kevin-chang:
+  names:
+  - {first: Kai-min Kevin, last: Chang}
+  - {first: Kai-Min, last: Chang}
+  - {first: Kai-min K., last: Chang}
+kalina-bontcheva:
+  names:
+  - {first: Kalina, last: Bontcheva}
+  similar:
+  - katina-bontcheva
+kalliopi-zervanou:
+  names:
+  - {first: Kalliopi, last: Zervanou}
+  - {first: Kalliopi A., last: Zervanou}
+kam-fai-wong:
+  names:
+  - {first: Kam-Fai, last: Wong}
+  - {first: Kam-fai, last: Wong}
+  - {first: K.F., last: Wong}
+kamel-smaili:
+  names:
+  - {first: Kamel, last: Smaili}
+  - {first: Kamel, last: Smaïli}
+kanokorn-trakultaweekoon:
+  names:
+  - {first: Kanokorn, last: Trakultaweekoon}
+  - {first: Kanokorn, last: Trakultaweekool}
+kanthashree-mysore-sathyendra:
+  names:
+  - {first: Kanthashree, last: Mysore Sathyendra}
+  - {first: Kanthashree Mysore, last: Sathyendra}
+karel-oliva:
+  names:
+  - {first: Karel, last: Oliva}
+  - {first: Karel, last: Oli̊va}
+karen-e-lochbaum:
+  names:
+  - {first: Karen E., last: Lochbaum}
+  - {first: Karen, last: Lochbaum}
+karen-jensen:
+  names:
+  - {first: Karen, last: Jensen}
+  - {first: K., last: Jensen}
+karen-l-ryan:
+  names:
+  - {first: Karen L., last: Ryan}
+  - {first: Karen, last: Ryan}
+karen-sparck-jones:
+  names:
+  - {first: Karen, last: Spärck Jones}
+  - {first: Karen, last: Sparck Jones}
+  - {first: Karen, last: Jones}
+  - {first: K., last: Sparck Jones}
+karin-friberg-heppin:
+  names:
+  - {first: Karin, last: Friberg Heppin}
+  - {first: Karin Friberg, last: Heppin}
+  - {first: Karin, last: Friberg}
+karin-kipper:
+  names:
+  - {first: Karin, last: Kipper}
+  - {first: Karin Christine, last: Kipper}
+  - {first: Karin, last: Schuler}
+  - {first: Karin, last: Kipper Schuler}
+  - {first: Karin, last: Kipper-Schuler}
+karin-sim-smith:
+  names:
+  - {first: Karin, last: Sim Smith}
+  - {first: Karin Sim, last: Smith}
+karin-verspoor:
+  names:
+  - {first: Karin, last: Verspoor}
+  - {first: Karin M., last: Verspoor}
+  - {first: Cornelia Maria, last: Verspoor}
+karine-baschung:
+  names:
+  - {first: Karine, last: Baschung}
+  - {first: K., last: Baschung}
+karmele-lopez-de-ipina:
+  names:
+  - {first: Karmele, last: López de Ipiña}
+  - {first: K., last: López de Ipiña}
+  - {first: K., last: Lopez de Ipina}
+karoly-fabricz:
+  names:
+  - {first: Karoly, last: Fabricz}
+  - {first: K., last: Fabricz}
+karsten-l-worm:
+  names:
+  - {first: Karsten L., last: Worm}
+  - {first: Karsten, last: Worm}
+  - {first: K. L., last: Worm}
+kasu-sai-kartheek-reddy:
+  names:
+  - {first: Kasu Sai Kartheek, last: Reddy}
+  - {first: Sai Kartheek, last: Reddy Kasu}
+  orcid: 0009-0007-6679-3313
+katalin-ilona-simko:
+  names:
+  - {first: Katalin Ilona, last: Simkó}
+  - {first: Katalin, last: Simkó}
+katarina-heimann-muhlenbock:
+  names:
+  - {first: Katarina, last: Heimann Mühlenbock}
+  - {first: Katarina, last: Mühlenbock}
+kate-forbes-riley:
+  names:
+  - {first: Kate, last: Forbes-Riley}
+  - {first: Kate, last: Forbes}
+  - {first: Katherine, last: Forbes-Riley}
+  - {first: Katherine, last: Forbes}
+  - {first: Katherine, last: Forbes Riley}
+kate-hunicke-smith:
+  names:
+  - {first: Kate, last: Hunicke-Smith}
+  - {first: K., last: Hunicke-Smith}
+katerina-vesela:
+  names:
+  - {first: Katerina, last: Veselá}
+  - {first: Kateřina, last: Veselá}
+katharina-boesefeldt:
+  names:
+  - {first: Katharina, last: Boesefeldt}
+  - {first: K., last: Boesefeldt}
+katharina-von-der-wense:
+  names:
+  - {first: Katharina, last: von der Wense}
+  - {first: Katharina, last: Kann}
+katharina-waschle:
+  names:
+  - {first: Katharina, last: Wäschle}
+  - {first: Katharina, last: Waeschle}
+kathleen-c-fraser:
+  names:
+  - {first: Kathleen C., last: Fraser}
+  - {first: Kathleen, last: Fraser}
+kathleen-dahlgren:
+  names:
+  - {first: Kathleen, last: Dahlgren}
+  - {first: K., last: Dahlgren}
+kathleen-f-mccoy:
+  names:
+  - {first: Kathleen F., last: McCoy}
+  - {first: Kathleen, last: McCoy}
+  - {first: Kathleen E., last: McCoy}
+kathleen-ferraro:
+  names:
+  - {first: Kathleen, last: Ferraro}
+  - {first: Kathleen, last: Ferrara}
+kathleen-m-carley:
+  names:
+  - {first: Kathleen M., last: Carley}
+  - {first: Kathleen, last: Carley}
+kathleen-m-sheehan:
+  names:
+  - {first: Kathleen M., last: Sheehan}
+  - {first: Kathleen, last: Sheehan}
+kathleen-mckeown:
+  names:
+  - {first: Kathleen, last: McKeown}
+  - {first: Kathy, last: McKeown}
+  - {first: Kathleen R., last: McKeown}
+kathryn-baker:
+  names:
+  - {first: Kathryn, last: Baker}
+  - {first: Kathryn L., last: Baker}
+katia-lida-kermanidis:
+  names:
+  - {first: Katia Lida, last: Kermanidis}
+  - {first: Katia, last: Kermanidis}
+katina-bontcheva:
+  names:
+  - {first: Katina, last: Bontcheva}
+  similar:
+  - kalina-bontcheva
+katsuhiko-shirai:
+  names:
+  - {first: Katsuhiko, last: Shirai}
+  - {first: K., last: Shirai}
+katsumasa-shimizu:
+  names:
+  - {first: Katsumasa, last: Shimizu}
+  - {first: K., last: Shimizu}
+kavi-mahesh:
+  names:
+  - {first: Kavi, last: Mahesh}
+  - {first: Kavitha, last: Mahesh}
+  - {first: Kavitha Karimbi, last: Mahesh}
+kavosh-asadi-atui:
+  names:
+  - {first: Kavosh, last: Asadi Atui}
+  - {first: Kavosh, last: Asadi}
+kazuya-takeda:
+  names:
+  - {first: Kazuya, last: Takeda}
+  - {first: Kasuya, last: Takeda}
+ke-jia-zhang:
+  names:
+  - {first: Ke-Jia, last: Zhang}
+  - {first: Ke-Jia, last: Chang}
+ke-m-tran:
+  names:
+  - {first: Ke M., last: Tran}
+  - {first: Ke, last: Tran}
+  - {first: Ke, last: Tran Manh}
+kees-van-deemter:
+  names:
+  - {first: Kees, last: van Deemter}
+  - {first: Kees, last: Van Deemter}
+keh-jiann-chen:
+  names:
+  - {first: Keh-Jiann, last: Chen}
+  - {first: Keh-jiann, last: Chen}
+  - {first: Ke-Jiann, last: Chen}
+  - {first: K. J., last: Chen}
+kei-mitamura:
+  names:
+  - {first: Kei, last: Mitamura}
+  - {first: K., last: Mitamura}
+kei-yoshimoto:
+  names:
+  - {first: Kei, last: Yoshimoto}
+  - {first: K., last: Yoshimoto}
+keith-hall:
+  names:
+  - {first: Keith, last: Hall}
+  - {first: Keith B., last: Hall}
+keith-j-miller:
+  names:
+  - {first: Keith J., last: Miller}
+  - {first: Keith, last: Miller}
+ken-samuel:
+  names:
+  - {first: Ken, last: Samuel}
+  - {first: Kenneth, last: Samuel}
+kenji-araki:
+  names:
+  - {first: Kenji, last: Araki}
+  - {first: K., last: Araki}
+kenji-hanakata:
+  names:
+  - {first: Kenji, last: Hanakata}
+  - {first: K., last: Hanakata}
+kenneth-church:
+  names:
+  - {first: Kenneth, last: Church}
+  - {first: Kenneth Ward, last: Church}
+  - {first: Ken, last: Church}
+  - {first: Kenneth W., last: Church}
+kenneth-forbus:
+  names:
+  - {first: Kenneth, last: Forbus}
+  - {first: Kenneth D., last: Forbus}
+kenneth-j-mckeever:
+  names:
+  - {first: Kenneth J., last: McKeever}
+  - {first: Kenneth, last: McKeever}
+kenneth-m-anderson:
+  names:
+  - {first: Kenneth M., last: Anderson}
+  - {first: Kenneth, last: Anderson}
+  - {first: Ken, last: Anderson}
+kenneth-s-bogh:
+  names:
+  - {first: Kenneth S., last: Bøgh}
+  - {first: Kenneth, last: Bøgh}
+kenny-zhu:
+  names:
+  - {first: Kenny, last: Zhu}
+  - {first: Kenny Q., last: Zhu}
+kepa-joseba-rodriguez:
+  names:
+  - {first: Kepa Joseba, last: Rodriguez}
+  - {first: Kepa J., last: Rodríguez}
+  - {first: Kepa Joseba, last: Rodríguez}
+kepa-sarasola:
+  names:
+  - {first: Kepa, last: Sarasola}
+  - {first: K, last: Sarasola}
+  - {first: K., last: Sarasola}
+kerstin-kunz:
+  names:
+  - {first: Kerstin, last: Kunz}
+  - {first: Kerstin Anna, last: Kunz}
+kerstin-severinson-eklundh:
+  names:
+  - {first: Kerstin Severinson, last: Eklundh}
+  - {first: Kerstin, last: Severinson Eklundh}
+  - {first: Kerstin, last: Severinson}
+kevin-c-yeh:
+  names:
+  - {first: Kevin C., last: Yeh}
+  - {first: Kevin, last: Yeh}
+kevin-d-ashley:
+  names:
+  - {first: Kevin D., last: Ashley}
+  - {first: Kevin, last: Ashley}
+kevin-humphreys:
+  names:
+  - {first: Kevin, last: Humphreys}
+  - {first: K., last: Humphreys}
+kevin-m-quinn:
+  names:
+  - {first: Kevin M., last: Quinn}
+  - {first: Kevin, last: Quinn}
+kevin-mctait:
+  names:
+  - {first: Kevin, last: McTait}
+  - {first: K., last: McTait}
+kexin-wang-bd:
+  comment: Bytedance
+  names:
+  - {first: Kexin, last: Wang}
+kexin-wang-tudarmstadt:
+  comment: TU Darmstadt
+  names:
+  - {first: Kexin, last: Wang}
+  orcid: 0000-0003-1175-7829
+key-sun-choi:
+  names:
+  - {first: Key-Sun, last: Choi}
+  - {first: Key-sun, last: Choi}
+khaled-shaban:
+  names:
+  - {first: Khaled, last: Shaban}
+  - {first: Khaled, last: Bashir Shaban}
+khalid-al-khatib:
+  names:
+  - {first: Khalid, last: Al Khatib}
+  - {first: Khalid, last: Al-Khatib}
+khalid-choukri:
+  names:
+  - {first: Khalid, last: Choukri}
+  - {first: Kalid, last: Choukri}
+  - {first: K., last: Choukri}
+khalil-simaan:
+  names:
+  - {first: Khalil, last: Sima’an}
+  - {first: K., last: Sima’an}
+khe-chai-sim:
+  names:
+  - {first: Khe Chai, last: Sim}
+  - {first: Khe-Chai, last: Sim}
+khiet-p-truong:
+  names:
+  - {first: Khiet P., last: Truong}
+  - {first: Khiet, last: Truong}
+kian-ming-a-chai:
+  names:
+  - {first: Kian Ming A., last: Chai}
+  - {first: Kian Ming Adam, last: Chai}
+kiat-gak-lau:
+  names:
+  - {first: Kiat-gak, last: Lau}
+  - {first: Kiat-Gak, last: Lau}
+  - {first: Kiãt-gãk, last: Lâu}
+kikuo-maekawa:
+  names:
+  - {first: Kikuo, last: Maekawa}
+  - {first: K., last: Maekawa}
+kilian-a-foth:
+  names:
+  - {first: Kilian A., last: Foth}
+  - {first: Kilian, last: Foth}
+kim-e-a-silverman:
+  names:
+  - {first: Kim E. A., last: Silverman}
+  - {first: Kim E.A., last: Silverman}
+kim-teng-lua:
+  names:
+  - {first: Kim-Teng, last: Lua}
+  - {first: KimTeng, last: Lua}
+  - {first: Kim Teng, last: Lua}
+king-kui-sin:
+  names:
+  - {first: King Kui, last: Sin}
+  - {first: KingKui, last: Sin}
+  - {first: K. K., last: Sin}
+  - {first: K.K., last: Sin}
+kiran-gvr:
+  names:
+  - {first: Kiran, last: GVR}
+  - {first: Kiran, last: Gvr}
+kiril-simov:
+  names:
+  - {first: Kiril, last: Simov}
+  - {first: Kiril Iv., last: Simov}
+klaus-wothke:
+  names:
+  - {first: Klaus, last: Wothke}
+  - {first: K., last: Wothke}
+koel-dutta-chowdhury:
+  names:
+  - {first: Koel, last: Dutta Chowdhury}
+  - {first: Koel Dutta, last: Chowdhury}
+koenraad-de-smedt:
+  names:
+  - {first: Koenraad, last: De Smedt}
+  - {first: Koenraad, last: de Smedt}
+  - {first: Koenraad, last: DeSmedt}
+koiti-hasida:
+  names:
+  - {first: Koiti, last: Hasida}
+  - {first: Kôiti, last: Hasida}
+kok-wee-gan:
+  names:
+  - {first: Kok Wee, last: Gan}
+  - {first: Kok-Wee, last: Gan}
+koldo-gojenola:
+  names:
+  - {first: Koldo, last: Gojenola}
+  - {first: Koldobika, last: Gojenola}
+  - {first: Koldo, last: Gojenola Galletebeitia}
+  - {first: K., last: Gojenola}
+kornel-marko:
+  names:
+  - {first: Kornél, last: Markó}
+  - {first: Kornel, last: Markó}
+kostas-karpouzis:
+  names:
+  - {first: Kostas, last: Karpouzis}
+  - {first: K., last: Karpouzis}
+kranti-chalamalasetti:
+  names:
+  - {first: Kranti, last: Chalamalasetti}
+  - {first: Chalamalasetti, last: Kranti}
+krasimira-ivanova:
+  names:
+  - {first: Krasimira, last: Ivanova}
+  - {first: Krassimira, last: Ivanova}
+kristen-johnson:
+  names:
+  - {first: Kristen, last: Johnson}
+  - {first: Kristen Marie, last: Johnson}
+krister-linden:
+  names:
+  - {first: Krister, last: Lindén}
+  - {first: Krister, last: Linden}
+kristian-concepcion:
+  names:
+  - {first: Kristian, last: Concepcion}
+  - {first: Kris, last: Concepcion}
+kristiina-jokinen:
+  names:
+  - {first: Kristiina, last: Jokinen}
+  - {first: Päivi Kristiina, last: Jokinen}
+kristina-nilsson-bjorkenstam:
+  names:
+  - {first: Kristina, last: Nilsson Björkenstam}
+  - {first: Kristina, last: Nilsson}
+  - {first: Kristina Nilsson, last: Björkenstam}
+  - {first: Kristina, last: N. Björkenstam}
+kristina-vuckovic:
+  names:
+  - {first: Kristina, last: Vuckovic}
+  - {first: Kristina, last: Vučković}
+kristine-levane-petrova:
+  names:
+  - {first: Kristīne, last: Levāne-Petrova}
+  - {first: Kristīne, last: Levāne}
+kristy-boyer:
+  names:
+  - {first: Kristy, last: Boyer}
+  - {first: Kristy Elizabeth, last: Boyer}
+krzysztof-kochut:
+  names:
+  - {first: Krzysztof, last: Kochut}
+  - {first: K., last: Kochut}
+kuang-hua-chen:
+  names:
+  - {first: Kuang-hua, last: Chen}
+  - {first: Kuang-Hua, last: Chen}
+kui-lam-kwok:
+  names:
+  - {first: Kui-Lam, last: Kwok}
+  - {first: Kui Lam, last: Kwok}
+  - {first: K.L., last: Kwok}
+kumiko-tanaka-ishii:
+  names:
+  - {first: Kumiko, last: Tanaka-Ishii}
+  - {first: Kumiko, last: Tanaka}
+kun-ching-wang:
+  names:
+  - {first: Kun-Ching, last: Wang}
+  - {first: Kun-ching, last: Wang}
+kun-zhang-inria:
+  comment: Inria Saclay-Île-de-France
+  names:
+  - {first: Kun, last: Zhang}
+kun-zhang-ucas:
+  comment: University of Chinese Academy of Sciences
+  names:
+  - {first: Kun, last: Zhang}
+kun-zhang-ustc:
+  comment: University of Science and Technology of China
+  names:
+  - {first: Kun, last: Zhang}
+kwok-ping-chan:
+  names:
+  - {first: Kwok-Ping, last: Chan}
+  - {first: Kwok Ping, last: Chan}
+kyong-hi-moon:
+  names:
+  - {first: Kyong-Hi, last: Moon}
+  - {first: Kyonghi, last: Moon}
+kyosuke-yoshida:
+  names:
+  - {first: Kyosuke, last: Yoshida}
+  - {first: Kyôsuke, last: Yoshida}
+kyoung-young-kim:
+  names:
+  - {first: Kyoung-young, last: Kim}
+  - {first: Kyoung-Young, last: Kim}
+kyumars-sheykh-esmaili:
+  names:
+  - {first: Kyumars, last: Sheykh Esmaili}
+  - {first: Kyumars Sheykh, last: Esmaili}
+kyung-soon-lee:
+  names:
+  - {first: Kyung-Soon, last: Lee}
+  - {first: KyungSoon, last: Lee}
+kyungtae-lim:
+  names:
+  - {first: KyungTae, last: Lim}
+  - {first: Kyungtae, last: Lim}
+kyuyoung-kim:
+  names:
+  - {first: Kyuyoung, last: Kim}
+  - {first: Kyu-Young, last: Kim}
+l-alfonso-urena-lopez:
+  names:
+  - {first: L. Alfonso, last: Urena Lopez}
+  - {first: L. Alfonso, last: Ureña-López}
+  - {first: L. Alfonso, last: Ureña López}
+  - {first: L. Alfonso, last: Urena-López}
+  - {first: L. Alfonso, last: Urena}
+  - {first: Alfonso, last: Ureña-López}
+  - {first: Luis Alfonso, last: Ureña-López}
+  - {first: L. Alfonso, last: Ureña- López}
+l-venkata-subramaniam:
+  names:
+  - {first: L. Venkata, last: Subramaniam}
+  - {first: L Venkata, last: Subramaniam}
+  - {first: L. V., last: Subramaniam}
+  - {first: L V, last: Subramaniam}
+laila-dybkjaer:
+  names:
+  - {first: Laila, last: Dybkjaer}
+  - {first: Laila, last: Dybkjær}
+lakshmi-s:
+  names:
+  - {first: Lakshmi, last: S}
+  - {first: Lakshmi, last: Saheer}
+  - {first: Lakshmi, last: S.}
+lalit-r-bahl:
+  names:
+  - {first: Lalit R., last: Bahl}
+  - {first: L. R., last: Bahl}
+  - {first: L.R., last: Bahl}
+lambros-cranias:
+  names:
+  - {first: Lambros, last: Cranias}
+  - {first: Lambros, last: Kranias}
+lamia-hadrich-belguith:
+  names:
+  - {first: Lamia, last: Hadrich Belguith}
+  - {first: Lamia Hadrich, last: Belguith}
+  - {first: Lamia, last: Hadrich-Belguith}
+  - {first: Lamia, last: Belguith}
+  - {first: Lamia, last: Belguith Hadrich}
+lance-a-miller:
+  names:
+  - {first: Lance A., last: Miller}
+  - {first: L. A., last: Miller}
+lance-ramshaw:
+  names:
+  - {first: Lance, last: Ramshaw}
+  - {first: Lance A., last: Ramshaw}
+larry-h-reeker:
+  names:
+  - {first: Larry H., last: Reeker}
+  - {first: L.H., last: Reeker}
+lars-juhl-jensen:
+  names:
+  - {first: Lars Juhl, last: Jensen}
+  - {first: Lars J., last: Jensen}
+laszlo-felfoldi:
+  names:
+  - {first: Laszlo, last: Felfoldi}
+  - {first: László, last: Felföldi}
+laszlo-grunfeld:
+  names:
+  - {first: Laszlo, last: Grunfeld}
+  - {first: L., last: Grunfeld}
+laszlo-tihanyi:
+  names:
+  - {first: Laszlo, last: Tihanyi}
+  - {first: László, last: Tihanyi}
+lauma-pretkalnina:
+  names:
+  - {first: Lauma, last: Pretkalniņa}
+  - {first: Lauma, last: Pretkalnina}
+  - {first: Lauma, last: Pretkalnin̨a}
+laura-alonso-alemany:
+  names:
+  - {first: Laura, last: Alonso Alemany}
+  - {first: Laura, last: Alonso i Alemany}
+  - {first: Laura, last: Alonso}
+laura-ana-maria-oberlander:
+  names:
+  - {first: Laura Ana Maria, last: Oberländer}
+  - {first: Laura Ana Maria, last: Bostan}
+  - {first: Laura-Ana-Maria, last: Bostan}
+laura-burdick:
+  names:
+  - {first: Laura, last: Burdick}
+  - {first: Laura, last: Wendlandt}
+laura-docio-fernandez:
+  names:
+  - {first: Laura, last: Docio-Fernandez}
+  - {first: Laura, last: Docío-Fernández}
+laura-g-miller:
+  names:
+  - {first: Laura G., last: Miller}
+  - {first: L. G., last: Miller}
+laura-mayfield-tomokiyo:
+  names:
+  - {first: Laura Mayfield, last: Tomokiyo}
+  - {first: Laura, last: Mayfield}
+lauren-levine:
+  names:
+  - {first: Lauren, last: Levine}
+  - {first: Lauren Elizabeth, last: Levine}
+laurence-devillers:
+  names:
+  - {first: Laurence, last: Devillers}
+  - {first: L., last: Devillers}
+laurence-gillick:
+  names:
+  - {first: Laurence, last: Gillick}
+  - {first: Laurence S., last: Gillick}
+laurent-besacier:
+  names:
+  - {first: Laurent, last: Besacier}
+  - {first: L., last: Besacier}
+laurent-bourbeau:
+  names:
+  - {first: Laurent, last: Bourbeau}
+  - {first: L., last: Bourbeau}
+laurent-gillard:
+  names:
+  - {first: Laurent, last: Gillard}
+  - {first: L., last: Gillard}
+laurent-prevot:
+  names:
+  - {first: Laurent, last: Prévot}
+  - {first: Laurent, last: Prevot}
+laurent-romary:
+  names:
+  - {first: Laurent, last: Romary}
+  - {first: L., last: Romary}
+laurent-schmitt:
+  names:
+  - {first: Laurent, last: Schmitt}
+  - {first: L., last: Schmitt}
+laurie-damianos:
+  names:
+  - {first: Laurie, last: Damianos}
+  - {first: Laurie E., last: Damianos}
+laurie-feldman:
+  names:
+  - {first: Laurie, last: Feldman}
+  - {first: Laurie Beth, last: Feldman}
+lawrence-hunter:
+  names:
+  - {first: Lawrence, last: Hunter}
+  - {first: Lawrence E., last: Hunter}
+lawrence-r-rabiner:
+  names:
+  - {first: Lawrence R., last: Rabiner}
+  - {first: L. R., last: Rabiner}
+lawrence-s-moss:
+  names:
+  - {first: Lawrence S., last: Moss}
+  - {first: Lawrence, last: Moss}
+lawrence-y-l-cheung:
+  names:
+  - {first: Lawrence Y. L., last: Cheung}
+  - {first: Lawrence Y.L., last: Cheung}
+  - {first: L. Y. L., last: Cheung}
+le-quan-ha:
+  names:
+  - {first: Le Quan, last: Ha}
+  - {first: Le Q, last: Ha}
+lee-becker:
+  names:
+  - {first: Lee, last: Becker}
+  - {first: Lee A., last: Becker}
+leif-groenqvist:
+  names:
+  - {first: Leif, last: Groenqvist}
+  - {first: Leif, last: Gronqvist}
+lena-stromback:
+  names:
+  - {first: Lena, last: Stromback}
+  - {first: Lena, last: Strömbäck}
+lenhart-schubert:
+  names:
+  - {first: Lenhart, last: Schubert}
+  - {first: Lenhart K., last: Schubert}
+  - {first: Len, last: Schubert}
+leon-derczynski:
+  names:
+  - {first: Leon, last: Derczynski}
+  - {first: Leon, last: Strømberg-Derczynski}
+leon-dostert:
+  names:
+  - {first: Léon, last: Dostert}
+  - {first: Leon, last: Dostert}
+  - {first: L. E., last: Dostert}
+leonardo-campillos-llanos:
+  names:
+  - {first: Leonardo Campillos, last: Llanos}
+  - {first: Leonardo, last: Campillos Llanos}
+leonardo-lesmo:
+  names:
+  - {first: Leonardo, last: Lesmo}
+  - {first: L., last: Lesmo}
+leonid-iomdin:
+  names:
+  - {first: Leonid, last: Iomdin}
+  - {first: Leonid L., last: Iomdin}
+leonida-della-rocca:
+  names:
+  - {first: Leonida Della, last: Rocca}
+  - {first: Leonida, last: Della-Rocca}
+  - {first: Leonida, last: Della Rocca}
+lesly-miculicich-werlen:
+  names:
+  - {first: Lesly, last: Miculicich Werlen}
+  - {first: Lesly, last: Miculicich}
+lev-ratinov:
+  names:
+  - {first: Lev, last: Ratinov}
+  - {first: Lev-Arie, last: Ratinov}
+lewis-m-norton:
+  names:
+  - {first: Lewis M., last: Norton}
+  - {first: Lewis, last: Norton}
+li-chin-yang:
+  names:
+  - {first: Li-chin, last: Yang}
+  - {first: Li-Chin, last: Yang}
+li-li-chang:
+  names:
+  - {first: Li-Li, last: Chang}
+  - {first: Li-li, last: Chang}
+li-lin-pku:
+  degree: Peking University
+  disable_name_matching: true
+  names:
+  - {first: Li, last: Lin}
+  orcid: 0009-0008-5072-5022
+li-lucy:
+  names:
+  - {first: Li, last: Lucy}
+  - {first: Lucy, last: Li}
+li-mei-chen:
+  names:
+  - {first: Li-mei, last: Chen}
+  - {first: Li-Mei, last: Chen}
+li-ping-chang:
+  names:
+  - {first: Li-Ping, last: Chang}
+  - {first: Li-ping, last: Chang}
+li-rong-dai:
+  names:
+  - {first: Li-Rong, last: Dai}
+  - {first: LiRong, last: Dai}
+li-zhang-aws:
+  comment: AWS
+  names:
+  - {first: Li, last: Zhang}
+li-zhang-birmingham:
+  comment: Birmingham
+  names:
+  - {first: Li, last: Zhang}
+li-zhang-gg:
+  comment: Google
+  names:
+  - {first: Li, last: Zhang}
+li-zhang-google:
+  comment: Google
+  names:
+  - {first: Li, last: Zhang}
+li-zhang-ibmc:
+  comment: IBM-china
+  names:
+  - {first: Li, last: Zhang}
+li-zhang-nankai:
+  comment: Nankai
+  names:
+  - {first: Li, last: Zhang}
+li-zhang-newcastle:
+  comment: Newcastle, UK
+  names:
+  - {first: Li, last: Zhang}
+li-zhang-teesside:
+  comment: Teesside University
+  names:
+  - {first: Li, last: Zhang}
+li-zhang-ucsandiego:
+  comment: UC San Diego
+  names:
+  - {first: Li, last: Zhang}
+li-zhang-uk:
+  comment: UK
+  names:
+  - {first: Li, last: Zhang}
+li-zhang-upenn:
+  comment: University of Pennsylvania
+  names:
+  - {first: Li, last: Zhang}
+li-zhang-wuhan:
+  comment: Wuhan
+  names:
+  - {first: Li, last: Zhang}
+lianen-huang:
+  names:
+  - {first: Lian′en, last: Huang}
+  - {first: Lian’en, last: Huang}
+liang-chih-yu:
+  names:
+  - {first: Liang-Chih, last: Yu}
+  - {first: Liang-chih, last: Yu}
+liang-yu-chen:
+  names:
+  - {first: Liang-Yu, last: Chen}
+  - {first: Liangyu, last: Chen}
+lianhau-lee:
+  names:
+  - {first: Lianhau, last: Lee}
+  - {first: Lian Hau, last: Lee}
+liat-ein-dor:
+  names:
+  - {first: Liat, last: Ein Dor}
+  - {first: Liat, last: Ein-Dor}
+lide-wu:
+  names:
+  - {first: Lide, last: Wu}
+  - {first: Li-de, last: Wu}
+lidia-mangu:
+  names:
+  - {first: Lidia, last: Mangu}
+  - {first: L., last: Mangu}
+lidia-moreno:
+  names:
+  - {first: Lidia, last: Moreno}
+  - {first: L., last: Moreno}
+liliana-mamani-sanchez:
+  names:
+  - {first: Liliana, last: Mamani Sanchez}
+  - {first: Liliana, last: Mamani Sánchez}
+  - {first: Liliana Mamani, last: Sanchez}
+lilja-ovrelid:
+  names:
+  - {first: Lilja, last: Øvrelid}
+  - {first: Lilja, last: Ovrelid}
+lin-shan-lee:
+  names:
+  - {first: Lin-Shan, last: Lee}
+  - {first: Lin-shan, last: Lee}
+lina-m-rojas-barahona:
+  names:
+  - {first: Lina M., last: Rojas Barahona}
+  - {first: Lina M., last: Rojas-Barahona}
+  - {first: Lina, last: Rojas-Barahona}
+  - {first: Lina, last: Rojas}
+  - {first: Lina Maria, last: Rojas-Barahona}
+linda-fineman:
+  names:
+  - {first: Linda, last: Fineman}
+  - {first: L., last: Fineman}
+lindsay-j-evett:
+  names:
+  - {first: Lindsay J., last: Evett}
+  - {first: L.J., last: Evett}
+ling-xiao-wang:
+  names:
+  - {first: Ling Xiao, last: Wang}
+  - {first: Lingxiao, last: Wang}
+lingpeng-yang:
+  names:
+  - {first: Lingpeng, last: Yang}
+  - {first: LingPeng, last: Yang}
+lingshuang-jack-mao:
+  names:
+  - {first: Lingshuang Jack, last: Mao}
+  - {first: Lingshuang, last: Mao}
+lisa-braden-harder:
+  names:
+  - {first: Lisa, last: Braden-Harder}
+  - {first: Lisa C., last: Braden-Harder}
+lisa-n-michaud:
+  names:
+  - {first: Lisa N., last: Michaud}
+  - {first: Lisa, last: Michaud}
+lisa-rau:
+  names:
+  - {first: Lisa, last: Rau}
+  - {first: Lisa F., last: Rau}
+lisanne-teunissen:
+  names:
+  - {first: Lisanne, last: Teunissen}
+  - {first: Lisa, last: Teunissen}
+litton-j-kurisinkel:
+  names:
+  - {first: Litton, last: J Kurisinkel}
+  - {first: Litton J, last: Kurisinkel}
+liviu-p-dinu:
+  names:
+  - {first: Liviu P., last: Dinu}
+  - {first: Liviu, last: Dinu}
+  - {first: Liviu Petrisor, last: Dinu}
+lluis-f-hurtado:
+  names:
+  - {first: Lluís-F., last: Hurtado}
+  - {first: Lluís F., last: Hurtado}
+  - {first: LLuís-F., last: Hurtado}
+lluis-formiga:
+  names:
+  - {first: Lluis, last: Formiga}
+  - {first: Lluís, last: Formiga}
+lluis-marquez:
+  names:
+  - {first: Lluís, last: Màrquez}
+  - {first: Lluis, last: Marquez}
+  - {first: Lluis, last: Màrquez}
+  - {first: Lluis, last: Márquez}
+  - {first: L., last: Màrquez}
+lluis-padro:
+  names:
+  - {first: Lluís, last: Padró}
+  - {first: Lluis, last: Padro}
+  - {first: Lluis, last: Padró}
+  - {first: L., last: Padró}
+  - {first: L., last: Padro}
+loic-boizou:
+  names:
+  - {first: Loic, last: Boizou}
+  - {first: Loïc, last: Boizou}
+loic-dugast:
+  names:
+  - {first: Loic, last: Dugast}
+  - {first: Loïc, last: Dugast}
+lois-c-childs:
+  names:
+  - {first: Lois C., last: Childs}
+  - {first: Lois, last: Childs}
+lois-m-black:
+  names:
+  - {first: Lois M., last: Black}
+  - {first: Lois, last: Black}
+long-duong:
+  names:
+  - {first: Long, last: Duong}
+  - {first: Long, last: Duong Thanh}
+long-nguyen:
+  names:
+  - {first: Long, last: Nguyen}
+  - {first: L., last: Nguyen}
+longhua-qian:
+  names:
+  - {first: Longhua, last: Qian}
+  - {first: LongHua, last: Qian}
+longlong-ma:
+  names:
+  - {first: Longlong, last: Ma}
+  - {first: Long Long, last: Ma}
+lonneke-van-der-plas:
+  names:
+  - {first: Lonneke, last: van der Plas}
+  - {first: Lonneke, last: Van Der Plas}
+loong-cheong-tong:
+  names:
+  - {first: Loong-Cheong, last: Tong}
+  - {first: Loong Cheong, last: Tong}
+lorenza-romano:
+  names:
+  - {first: Lorenza, last: Romano}
+  - {first: L., last: Romano}
+lorenzo-moretti:
+  names:
+  - {first: Lorenzo, last: Moretti}
+  - {first: L., last: Moretti}
+lori-lamel:
+  names:
+  - {first: Lori, last: Lamel}
+  - {first: Lori F., last: Lamel}
+  - {first: L.F., last: Lamel}
+lori-levin:
+  names:
+  - {first: Lori, last: Levin}
+  - {first: Lori S., last: Levin}
+lou-boves:
+  names:
+  - {first: Lou, last: Boves}
+  - {first: Louis, last: Boves}
+louis-des-tombe:
+  names:
+  - {first: Louis, last: des Tombe}
+  - {first: L., last: des Tombe}
+louise-amelie-cougnon:
+  names:
+  - {first: Louise-Amélie, last: Cougnon}
+  - {first: Louis-Amélie, last: Cougnon}
+louise-corti:
+  names:
+  - {first: Louise, last: Corti}
+  - {first: L., last: Corti}
+louise-guthrie:
+  names:
+  - {first: Louise, last: Guthrie}
+  - {first: L., last: Guthrie}
+louisette-emirkanian:
+  names:
+  - {first: Louisette, last: Emirkanian}
+  - {first: L., last: Emirkanian}
+lu-xu-uniroma1:
+  degree: Sapienza University of Rome
+  disable_name_matching: true
+  names:
+  - {first: Lu, last: Xu}
+  orcid: 0000-0002-5660-3631
+luana-fagarasan:
+  names:
+  - {first: Luana, last: Fǎgǎrǎşan}
+  - {first: Luana, last: Fagarasan}
+lubos-popelinsky:
+  names:
+  - {first: Lubos, last: Popelínsky}
+  - {first: Luboš, last: Popelínský}
+luca-cristoforetti:
+  names:
+  - {first: Luca, last: Cristoforetti}
+  - {first: L., last: Cristoforetti}
+luca-dini:
+  names:
+  - {first: Luca, last: Dini}
+  - {first: L., last: Dini}
+luca-gilardoni:
+  names:
+  - {first: Luca, last: Gilardoni}
+  - {first: L., last: Gilardoni}
+lucas-georges-gabriel-charpentier:
+  names:
+  - {first: Lucas, last: Georges Gabriel Charpentier}
+  - {first: Lucas, last: Charpentier}
+lucas-welter-hilgert:
+  names:
+  - {first: Lucas Welter, last: Hilgert}
+  - {first: Lucas, last: Hilgert}
+lucian-vlad-lita:
+  names:
+  - {first: Lucian Vlad, last: Lita}
+  - {first: Lucian, last: Lita}
+luciana-beatriz-avila:
+  names:
+  - {first: Luciana Beatriz, last: Avila}
+  - {first: Luciana Beatriz, last: Ávila}
+  - {first: Luciana, last: Ávila}
+luciano-serafini:
+  names:
+  - {first: Luciano, last: Serafini}
+  - {first: L., last: Serafini}
+lucja-iwanska:
+  names:
+  - {first: Lucja, last: Iwanska}
+  - {first: Lucja M., last: Iwanska}
+lucy-lu-wang:
+  names:
+  - {first: Lucy Lu, last: Wang}
+  - {first: Lucy, last: Wang}
+luigi-di-caro:
+  names:
+  - {first: Luigi, last: Di Caro}
+  - {first: Luigi, last: di Caro}
+luis-a-leiva:
+  names:
+  - {first: Luis A., last: Leiva}
+  - {first: Luis, last: Leiva}
+luis-a-pineda:
+  names:
+  - {first: Luis. A., last: Pineda}
+  - {first: Luis, last: Pineda}
+luis-adrian-cabrera-diego:
+  names:
+  - {first: Luis-Adrián, last: Cabrera-Diego}
+  - {first: Luis Adrián, last: Cabrera-Diego}
+luis-espinosa-anke:
+  names:
+  - {first: Luis, last: Espinosa Anke}
+  - {first: Luis, last: Espinosa-Anke}
+  - {first: Luis Espinosa, last: Anke}
+luis-fernando-costa:
+  names:
+  - {first: Luís Fernando, last: Costa}
+  - {first: Luís, last: Costa}
+luis-fernando-dharo:
+  names:
+  - {first: Luis Fernando, last: D’Haro}
+  - {first: Luis F., last: d’Haro}
+luis-gerardo-mojica-de-la-vega:
+  names:
+  - {first: Luis Gerardo, last: Mojica de la Vega}
+  - {first: Luis, last: Mojica de la Vega}
+luis-hernandez:
+  names:
+  - {first: Luis, last: Hernández}
+  - {first: Luis Hernández, last: Gomez}
+  - {first: Luis Hernández, last: Gómez}
+  - {first: Luis A., last: Hernandez}
+  - {first: Luis A., last: Hernández}
+  - {first: Luis A. Hernández, last: Gómez}
+luis-javier-rodriguez-fuentes:
+  names:
+  - {first: Luis Javier, last: Rodriguez-Fuentes}
+  - {first: Luis Javier, last: Rodríguez-Fuentes}
+luis-marujo:
+  names:
+  - {first: Luis, last: Marujo}
+  - {first: Luís, last: Marujo}
+luis-miguel-cabral:
+  names:
+  - {first: Luís Miguel, last: Cabral}
+  - {first: Luís, last: Cabral}
+luis-rodrigo-aguado:
+  names:
+  - {first: Luis, last: Rodrigo-Aguado}
+  - {first: Luis, last: Rodrigo}
+luis-villarejo:
+  names:
+  - {first: Luís, last: Villarejo}
+  - {first: Luis, last: Villarejo}
+luis-villasenor-pineda:
+  names:
+  - {first: Luis, last: Villaseñor-Pineda}
+  - {first: Luis, last: Villaseñor}
+  - {first: Luis, last: Villasenor}
+luisa-coheur:
+  names:
+  - {first: Luísa, last: Coheur}
+  - {first: Luisa, last: Coheur}
+luisa-pereira:
+  names:
+  - {first: Luísa, last: Pereira}
+  - {first: Luisa, last: Pereira}
+luiz-augusto-pizzato:
+  names:
+  - {first: Luiz Augusto, last: Pizzato}
+  - {first: Luiz Augusto Sangoi, last: Pizzato}
+luka-a-clarke:
+  names:
+  - {first: Luka A., last: Clarke}
+  - {first: Luka, last: Clarke}
+lukas-zilka:
+  names:
+  - {first: Lukáš, last: Žilka}
+  - {first: Lukas, last: Zilka}
+lukasz-kaiser:
+  names:
+  - {first: Łukasz, last: Kaiser}
+  - {first: Lukasz, last: Kaiser}
+luke-zettlemoyer:
+  names:
+  - {first: Luke, last: Zettlemoyer}
+  - {first: Luke S., last: Zettlemoyer}
+luminita-chiran:
+  names:
+  - {first: Luminita, last: Chiran}
+  - {first: L., last: Chiran}
+luu-anh-tuan:
+  names:
+  - {first: Luu Anh, last: Tuan}
+  - {first: Anh, last: Luu}
+  - {first: Anh Tuan, last: Luu}
+luuk-van-waes:
+  names:
+  - {first: Luuk Van, last: Waes}
+  - {first: Luuk, last: Van Waes}
+lydia-mai-ho-dac:
+  names:
+  - {first: Lydia-Mai, last: Ho-Dac}
+  - {first: Mai, last: Ho-dac}
+lyle-ungar:
+  names:
+  - {first: Lyle, last: Ungar}
+  - {first: Lyle H., last: Ungar}
+lynette-hirschman:
+  names:
+  - {first: Lynette, last: Hirschman}
+  - {first: Lynette, last: Hirshman}
+  - {first: L., last: Hirschman}
+lynne-cahill:
+  names:
+  - {first: Lynne, last: Cahill}
+  - {first: Lynne J., last: Cahill}
+  - {first: L, last: Cahill}
+m-a-picheny:
+  names:
+  - {first: M. A., last: Picheny}
+  - {first: M.A., last: Picheny}
+m-amin-farajian:
+  names:
+  - {first: M. Amin, last: Farajian}
+  - {first: Mohammad Amin, last: Farajian}
+m-antonia-marti:
+  names:
+  - {first: M. Antònia, last: Martí}
+  - {first: M. Antonia, last: Martí}
+  - {first: M. Antonia, last: Marti}
+  - {first: Antonia, last: Martí}
+  - {first: Mª Antònia, last: Martí}
+  - {first: Maria Antònia, last: Martí}
+  - {first: Toni, last: Martí}
+  - {first: M. A., last: Marti}
+  - {first: M.A., last: Martí}
+  - {first: M. A., last: Martí}
+m-dolores-jimenez-lopez:
+  names:
+  - {first: M. Dolores, last: Jiménez-López}
+  - {first: Maria Dolores, last: Jiménez-López}
+m-dolores-molina-gonzalez:
+  names:
+  - {first: M. Dolores, last: Molina-González}
+  - {first: M. Dolores, last: Molina-Gonzalez}
+m-g-abbas-malik:
+  names:
+  - {first: M. G. Abbas, last: Malik}
+  - {first: M.G. Abbas, last: Malik}
+  - {first: M G Abbas, last: Malik}
+m-ines-torres:
+  names:
+  - {first: M. Inés, last: Torres}
+  - {first: María Inés, last: Torres}
+m-patrick-martin:
+  names:
+  - {first: M. Patrick, last: Martin}
+  - {first: Pierre M., last: Martin}
+  - {first: Patrick, last: Martin}
+m-pilar-valverde-ibanez:
+  names:
+  - {first: M. Pilar, last: Valverde Ibáñez}
+  - {first: M. Pilar, last: Valverde Ibañez}
+m-soledad-lopez-gambino:
+  names:
+  - {first: M. Soledad, last: López Gambino}
+  - {first: Soledad, last: López Gambino}
+m-teresa-martin-valdivia:
+  names:
+  - {first: M. Teresa, last: Martín-Valdivia}
+  - {first: Maite, last: Martin}
+  - {first: María Teresa, last: Martín-Valdivia}
+  - {first: Maria Teresa, last: Martín-Valdivia}
+  - {first: Teresa, last: Martin}
+  - {first: M. Teresa, last: Martín}
+  - {first: Maite, last: Martín-Valdivia}
+maarten-de-rijke:
+  names:
+  - {first: Maarten, last: de Rijke}
+  - {first: Maarten, last: De Rijke}
+maayan-geffet:
+  names:
+  - {first: Maayan, last: Geffet}
+  - {first: Maayan, last: Zhitomirsky-Geffet}
+maddalen-lopez-de-lacalle:
+  names:
+  - {first: Maddalen, last: López de Lacalle}
+  - {first: Maddalen, last: Lopez de Lacalle}
+madeleine-bates:
+  names:
+  - {first: Madeleine, last: Bates}
+  - {first: Madeline, last: Bates}
+  - {first: M., last: Bates}
+madis-saluveer:
+  names:
+  - {first: Madis, last: Saluveer}
+  - {first: M., last: Saluveer}
+magali-sanches-duran:
+  names:
+  - {first: Magali Sanches, last: Duran}
+  - {first: Magali, last: Sanches Duran}
+  - {first: Magali, last: Duran}
+magdalena-plamada:
+  names:
+  - {first: Magdalena, last: Plamadă}
+  - {first: Magdalena, last: Plamada}
+magdi-nagi:
+  names:
+  - {first: Magdi, last: Nagi}
+  - {first: Magdy, last: Nagi}
+mahbaneh-eshaghzadeh-torbati:
+  names:
+  - {first: Mahbaneh, last: Eshaghzadeh Torbati}
+  - {first: Mahbaneh, last: Eshaghzadeh}
+mahesh-v-chitrao:
+  names:
+  - {first: Mahesh V., last: Chitrao}
+  - {first: Mahesh, last: Chitrao}
+mai-vu-tran:
+  names:
+  - {first: Mai-Vu, last: Tran}
+  - {first: Mai-vu, last: Tran}
+maira-gatti:
+  names:
+  - {first: Maíra, last: Gatti}
+  - {first: Maira, last: Gatti}
+mairgup-mansur:
+  names:
+  - {first: Mairgup, last: Mansur}
+  - {first: Mansur, last: Mairgup}
+maite-oronoz:
+  names:
+  - {first: Maite, last: Oronoz}
+  - {first: M., last: Oronoz}
+maite-taboada:
+  names:
+  - {first: Maite, last: Taboada}
+  - {first: M., last: Taboada}
+maja-popovic:
+  names:
+  - {first: Maja, last: Popović}
+  - {first: Maja, last: Popovic}
+makoto-nagao:
+  names:
+  - {first: Makoto, last: Nagao}
+  - {first: M., last: Nagao}
+malarkodi-c-s:
+  names:
+  - {first: Malarkodi, last: C.S.}
+  - {first: Malarkodi, last: C.S}
+  - {first: CS., last: Malarkodi}
+malgorzata-marciniak:
+  names:
+  - {first: Malgorzata, last: Marciniak}
+  - {first: Małgorzata, last: Marciniak}
+malhar-kulkarni:
+  names:
+  - {first: Malhar, last: Kulkarni}
+  - {first: Malhar A., last: Kulkarni}
+man-lan:
+  names:
+  - {first: Man, last: Lan}
+  - {first: Lan, last: Man}
+manabu-okumura:
+  names:
+  - {first: Manabu, last: Okumura}
+  - {first: Manabu, last: Okumara}
+manish-shrivastava:
+  names:
+  - {first: Manish, last: Shrivastava}
+  - {first: Manish, last: Srivastava}
+manny-rayner:
+  names:
+  - {first: Manny, last: Rayner}
+  - {first: M., last: Rayner}
+manoj-chinnakotla:
+  names:
+  - {first: Manoj, last: Chinnakotla}
+  - {first: Manoj K., last: Chinnakotla}
+  - {first: Manoj Kumar, last: Chinnakotla}
+manuel-garcia-vega:
+  names:
+  - {first: Manuel, last: García-Vega}
+  - {first: Manuel, last: García}
+manuel-j-mana-lopez:
+  names:
+  - {first: Manuel J., last: Maña López}
+  - {first: Manuel J., last: Maña}
+  - {first: Manual Maña, last: López}
+  - {first: Manuel, last: Maña López}
+manuel-montes:
+  names:
+  - {first: Manuel, last: Montes}
+  - {first: Manuel, last: Montes-y-Gómez}
+  - {first: Manuel, last: Montes y Gomez}
+  - {first: Manuel, last: Montes y Gómez}
+manuel-palomar:
+  names:
+  - {first: Manuel, last: Palomar}
+  - {first: M., last: Palomar}
+manuel-r-ciosici:
+  names:
+  - {first: Manuel R., last: Ciosici}
+  - {first: Manuel, last: Ciosici}
+manuela-huerlimann:
+  names:
+  - {first: Manuela, last: Huerlimann}
+  - {first: Manuela, last: Hürlimann}
+  - {first: Manuela, last: Huerliman}
+manuela-speranza:
+  names:
+  - {first: Manuela, last: Speranza}
+  - {first: M., last: Speranza}
+mar-garcia:
+  names:
+  - {first: Mar, last: García}
+  - {first: M., last: García}
+marc-el-beze:
+  names:
+  - {first: Marc, last: El-Bèze}
+  - {first: Marc, last: El-Beze}
+  - {first: M., last: El-Bèze}
+marc-vilain:
+  names:
+  - {first: Marc, last: Vilain}
+  - {first: Marc B., last: Vilain}
+marcel-adam-just:
+  names:
+  - {first: Marcel Adam, last: Just}
+  - {first: Marcel, last: Just}
+marcel-p-van-lohuizen:
+  names:
+  - {first: Marcel P., last: van Lohuizen}
+  - {first: Marcel P., last: Van Lohuizen}
+marcela-charfuelan:
+  names:
+  - {first: Marcela, last: Charfuelan}
+  - {first: Marcela, last: Charfuelán}
+marcelo-adriano-amancio:
+  names:
+  - {first: Marcelo Adriano, last: Amâncio}
+  - {first: Marcelo, last: Amancio}
+marcia-c-linebarger:
+  names:
+  - {first: Marcia C., last: Linebarger}
+  - {first: Marcia, last: Linebarger}
+marcin-szummer:
+  names:
+  - {first: Marcin, last: Szummer}
+  - {first: Martin, last: Szummer}
+marcis-pinnis:
+  names:
+  - {first: Mārcis, last: Pinnis}
+  - {first: Marcis, last: Pinnis}
+marco-a-valenzuela-escarcega:
+  names:
+  - {first: Marco A., last: Valenzuela-Escárcega}
+  - {first: Marco Antonio, last: Valenzuela-Escárcega}
+marco-aldo-piccolino-boniforti:
+  names:
+  - {first: Marco Aldo Piccolino, last: Boniforti}
+  - {first: Marco Aldo, last: Piccolino Boniforti}
+marco-antonio-sobrevilla-cabezudo:
+  names:
+  - {first: Marco Antonio, last: Sobrevilla Cabezudo}
+  - {first: Marco A., last: Sobrevilla Cabezudo}
+  - {first: Marco, last: Sobrevilla}
+marco-antonio-stranisci:
+  names:
+  - {first: Marco Antonio, last: Stranisci}
+  - {first: Marco, last: Stranisci}
+marco-baroni:
+  names:
+  - {first: Marco, last: Baroni}
+  - {first: M., last: Baroni}
+marco-battista:
+  names:
+  - {first: Marco, last: Battista}
+  - {first: M., last: Battista}
+marco-martin:
+  names:
+  - {first: Marco, last: Martin}
+  - {first: M., last: Martin}
+marco-matassoni:
+  names:
+  - {first: Marco, last: Matassoni}
+  - {first: M., last: Matassoni}
+marco-tulio-ribeiro:
+  names:
+  - {first: Marco Tulio, last: Ribeiro}
+  - {first: Marco, last: Ribeiro}
+marcos-didonet-del-fabro:
+  names:
+  - {first: Marcos Didonet Del, last: Fabro}
+  - {first: Marcus Didonet, last: Del Fabro}
+marcos-garcia-salido:
+  names:
+  - {first: Marcos, last: García Salido}
+  - {first: Marcos, last: García-Salido}
+marcus-uneson:
+  names:
+  - {first: Marcus, last: Uneson}
+  - {first: Markus, last: Uneson}
+mare-koit:
+  names:
+  - {first: Mare, last: Koit}
+  - {first: M., last: Koit}
+marek-kozlowski:
+  names:
+  - {first: Marek, last: Kozlowski}
+  - {first: Marek, last: Kozłowski}
+margaret-kern:
+  names:
+  - {first: Margaret, last: Kern}
+  - {first: Margaret L., last: Kern}
+margaret-mcrorie:
+  names:
+  - {first: Margaret, last: McRorie}
+  - {first: M., last: McRorie}
+margarita-alonso-ramos:
+  names:
+  - {first: Margarita Alonso, last: Ramos}
+  - {first: Margarita, last: Alonso-Ramos}
+margo-stys-budzikowska:
+  names:
+  - {first: Margo, last: Stys-Budzikowska}
+  - {first: Margo, last: Budzikowska}
+  - {first: Margo, last: Stys}
+mari-broman-olsen:
+  names:
+  - {first: Mari Broman, last: Olsen}
+  - {first: Mari, last: Olsen}
+mari-carmen-rodriguez-gancedo:
+  names:
+  - {first: Mari Carmen, last: Rodriguez-Gancedo}
+  - {first: M. Carmen Rodríguez, last: Gancedo}
+  - {first: M. Carmen, last: Rodríguez}
+  - {first: Mari Carmen, last: Rodríguez}
+mari-carmen-suarez-figueroa:
+  names:
+  - {first: Mari Carmen, last: Suárez-Figueroa}
+  - {first: M. Carmen, last: Suárez-Figueroa}
+mari-ostendorf:
+  names:
+  - {first: Mari, last: Ostendorf}
+  - {first: M., last: Ostendorf}
+  - {first: M, last: Ostendorf}
+maria-berger:
+  names:
+  - {first: Maria, last: Berger}
+  - {first: Maria, last: Moritz}
+maria-clara-paixao-de-sousa:
+  names:
+  - {first: Maria Clara Paixão de, last: Sousa}
+  - {first: Maria Clara, last: Paixão de Sousa}
+maria-das-gracas-volpe-nunes:
+  names:
+  - {first: Maria das Graças, last: Volpe Nunes}
+  - {first: Maria, last: das Graças Volpe Nunes}
+  - {first: Maria, last: das Gracas Volpe Nunes}
+  - {first: Maria das Graças Volpe, last: Nunes}
+  - {first: Maria, last: das Graças}
+  - {first: Maria das Graças V., last: Nunes}
+  - {first: Maria das Graças, last: Nunes}
+  - {first: Maria das Gracas, last: Volpe}
+maria-fernanda-bacelar-do-nascimento:
+  names:
+  - {first: Maria Fernanda Bacelar do, last: Nascimento}
+  - {first: Maria Fernanda Bacelar, last: do Nascimento}
+  - {first: Fernanda Bacelar, last: do Nascimento}
+maria-fuentes:
+  names:
+  - {first: Maria, last: Fuentes}
+  - {first: Maria, last: Fuentes Fort}
+maria-gavrilidou:
+  names:
+  - {first: Maria, last: Gavrilidou}
+  - {first: M., last: Gavrilidou}
+maria-jose-b-finatto:
+  names:
+  - {first: Maria José B., last: Finatto}
+  - {first: Maria José, last: Finatto}
+  - {first: Maria José Bocorny, last: Finatto}
+maria-jose-castro-bleda:
+  names:
+  - {first: Maria Jose, last: Castro-Bleda}
+  - {first: María José, last: Castro}
+  - {first: María-José, last: Castro}
+maria-leonor-pacheco:
+  names:
+  - {first: María Leonor, last: Pacheco}
+  - {first: Maria Leonor, last: Pacheco}
+maria-lucia-castro-jorge:
+  names:
+  - {first: Maria Lucia, last: Castro Jorge}
+  - {first: Maria Lucía Castro, last: Jorge}
+  - {first: Maria Lucía, last: Castro Jorge}
+maria-nadejde:
+  names:
+  - {first: Maria, last: Nadejde}
+  - {first: Maria, last: Nădejde}
+maria-novella-catarsi:
+  names:
+  - {first: Maria Novella, last: Catarsi}
+  - {first: M. N., last: Catarsi}
+maria-pozzi:
+  names:
+  - {first: Maria, last: Pozzi}
+  - {first: María, last: Pozzi}
+  - {first: Mara, last: Pozzi}
+maria-ruiz-casado:
+  names:
+  - {first: María, last: Ruiz-Casado}
+  - {first: Maria, last: Ruiz-Casado}
+maria-teresa-cabre:
+  names:
+  - {first: Maria Teresa, last: Cabré}
+  - {first: M. Teresa, last: Cabré}
+  - {first: Teresa, last: Cabré}
+maria-teresa-lino:
+  names:
+  - {first: Maria Teresa, last: Lino}
+  - {first: Teresa, last: Lino}
+maria-teresa-pazienza:
+  names:
+  - {first: Maria Teresa, last: Pazienza}
+  - {first: Maria Teresa, last: Pazienze}
+  - {first: Maria, last: Pazienza}
+  - {first: M. T., last: Pazienza}
+  - {first: M.T., last: Pazienza}
+maria-toporowska-gronostaj:
+  names:
+  - {first: Maria Toporowska, last: Gronostaj}
+  - {first: Maria, last: Toporowska Gronostaj}
+marian-trnka:
+  names:
+  - {first: Marian, last: Trnka}
+  - {first: Marián, last: Trnka}
+marianna-martindale:
+  names:
+  - {first: Marianna, last: Martindale}
+  - {first: Marianna J., last: Martindale}
+marianne-dabbadie:
+  names:
+  - {first: Marianne, last: Dabbadie}
+  - {first: M., last: Dabbadie}
+marie-a-bienkowski:
+  names:
+  - {first: Marie A., last: Bienkowski}
+  - {first: Marie, last: Bienkowski}
+marie-candito:
+  names:
+  - {first: Marie, last: Candito}
+  - {first: Marie-Helene, last: Candito}
+  - {first: Marie-Hélène, last: Candito}
+marie-catherine-de-marneffe:
+  names:
+  - {first: Marie-Catherine, last: de Marneffe}
+  - {first: Marie Catherine, last: de Marneffe}
+marie-claude-lhomme:
+  names:
+  - {first: Marie-Claude, last: L’Homme}
+  - {first: Marie-Claude, last: L’ Homme}
+marie-francine-moens:
+  names:
+  - {first: Marie Francine, last: Moens}
+  - {first: Marie-Francine, last: Moens}
+marie-helene-metzger:
+  names:
+  - {first: Marie Hélène, last: Metzger}
+  - {first: Marie-Hélène, last: Metzger}
+marie-helene-stefanini:
+  names:
+  - {first: Marie-Hélène, last: Stéfanini}
+  - {first: Marie-Helene, last: Stefanini}
+marie-hinrichs:
+  names:
+  - {first: Marie, last: Hinrichs}
+  - {first: Marie, last: Boyle-Hinrichs}
+marie-meteer:
+  names:
+  - {first: Marie, last: Meteer}
+  - {first: Marie W., last: Meteer}
+marie-neige-garcia:
+  names:
+  - {first: Marie-Neige, last: Garcia}
+  - {first: M. N., last: Garcia}
+marie-paule-pery-woodley:
+  names:
+  - {first: Marie-Paule, last: Pery-Woodley}
+  - {first: Marie-Paule, last: Péry-Woodley}
+marielle-leijten:
+  names:
+  - {first: Marielle, last: Leijten}
+  - {first: Mariëlle, last: Leijten}
+mariem-ellouze-khemekhem:
+  names:
+  - {first: Mariem, last: Ellouze Khemekhem}
+  - {first: Mariem, last: Ellouze Khemakhem}
+  - {first: Mariem, last: Ellouze}
+  - {first: Mariem Ellouze, last: Khmekhem}
+  - {first: Mariem, last: Ellouze khemekhem}
+mariet-theune:
+  names:
+  - {first: Mariët, last: Theune}
+  - {first: Mariet, last: Theune}
+  - {first: M., last: Theune}
+marilyn-walker:
+  names:
+  - {first: Marilyn, last: Walker}
+  - {first: Marilyn A., last: Walker}
+  - {first: M. A., last: Walker}
+marina-vigario:
+  names:
+  - {first: Marina, last: Vigário}
+  - {first: M., last: Vigário}
+mario-j-silva:
+  names:
+  - {first: Mario J., last: Silva}
+  - {first: Mário J., last: Silva}
+  - {first: Mário, last: Silva}
+marion-weller-di-marco:
+  names:
+  - {first: Marion, last: Weller-Di Marco}
+  - {first: Marion, last: Di Marco}
+mariona-coll-ardanuy:
+  names:
+  - {first: Mariona, last: Coll Ardanuy}
+  - {first: Mariona Coll, last: Ardanuy}
+mariona-taule:
+  names:
+  - {first: Mariona, last: Taulé}
+  - {first: Mariona, last: Taule}
+  - {first: M., last: Taulé}
+marius-pasca:
+  names:
+  - {first: Marius, last: Pasca}
+  - {first: Marius A., last: Pasca}
+  - {first: Marius, last: Paşca}
+marjo-van-koppen:
+  names:
+  - {first: Marjo, last: Van Koppen}
+  - {first: Marjo, last: van Koppen}
+mark-a-clements:
+  names:
+  - {first: Mark A., last: Clements}
+  - {first: Mark, last: Clements}
+mark-a-greenwood:
+  names:
+  - {first: Mark A., last: Greenwood}
+  - {first: Mark, last: Greenwood}
+mark-carman:
+  names:
+  - {first: Mark, last: Carman}
+  - {first: Mark J., last: Carman}
+  - {first: Mark James, last: Carman}
+mark-christoph-muller:
+  names:
+  - {first: Mark-Christoph, last: Müller}
+  - {first: Mark-Christoph, last: Mueller}
+mark-epstein:
+  names:
+  - {first: Mark, last: Epstein}
+  - {first: M., last: Epstein}
+mark-finlayson:
+  names:
+  - {first: Mark, last: Finlayson}
+  - {first: Mark A., last: Finlayson}
+mark-g-core:
+  names:
+  - {first: Mark G., last: Core}
+  - {first: Mark, last: Core}
+mark-h-smith:
+  names:
+  - {first: Mark H., last: Smith}
+  - {first: Mark, last: Smith}
+mark-hall:
+  names:
+  - {first: Mark, last: Hall}
+  - {first: Mark Michael, last: Hall}
+mark-jones:
+  names:
+  - {first: Mark, last: Jones}
+  - {first: Mark A., last: Jones}
+  - {first: Mark Alan, last: Jones}
+mark-lee:
+  names:
+  - {first: Mark, last: Lee}
+  - {first: Mark G., last: Lee}
+  - {first: M.G., last: Lee}
+mark-liberman:
+  names:
+  - {first: Mark, last: Liberman}
+  - {first: Mark Y., last: Liberman}
+  - {first: M. Y., last: Liberman}
+  - {first: M., last: Liberman}
+mark-przybocki:
+  names:
+  - {first: Mark, last: Przybocki}
+  - {first: Mark A., last: Przybocki}
+mark-s-tuttle:
+  names:
+  - {first: Mark S., last: Tuttle}
+  - {first: M. S., last: Tuttle}
+mark-steedman:
+  names:
+  - {first: Mark, last: Steedman}
+  - {first: M., last: Steedman}
+mark-t-maybury:
+  names:
+  - {first: Mark T., last: Maybury}
+  - {first: Mark, last: Maybury}
+mark-w-davis:
+  names:
+  - {first: Mark W., last: Davis}
+  - {first: Mark, last: Davis}
+marketa-lopatkova:
+  names:
+  - {first: Marketa, last: Lopatkova}
+  - {first: Markéta, last: Straňáková-Lopatková}
+  - {first: Markéta, last: Lopatková}
+marsal-gavalda:
+  names:
+  - {first: Marsal, last: Gavalda}
+  - {first: Marsal, last: Gavaldà}
+marsden-s-blois:
+  names:
+  - {first: Marsden S., last: Blois}
+  - {first: M. S., last: Blois}
+marta-garrote-salazar:
+  names:
+  - {first: Marta, last: Garrote-Salazar}
+  - {first: Marta, last: Garrote}
+marta-r-costa-jussa:
+  names:
+  - {first: Marta R., last: Costa-jussà}
+  - {first: Marta, last: R. Costa-jussà}
+  - {first: Marta R., last: Costa-Jussà}
+  - {first: Marta R., last: Costa-Jussa}
+  - {first: Marta, last: Ruiz Costa-jussà}
+  - {first: Marta Ruiz, last: Costa-jussà}
+marten-during-ul:
+  comment: University of Luxembourg
+  names:
+  - {first: Marten, last: During}
+marten-van-schijndel:
+  names:
+  - {first: Marten, last: van Schijndel}
+  - {first: Marten, last: Van Schijndel}
+  - {first: Martin, last: van Schijndel}
+martha-alicia-rocha:
+  names:
+  - {first: Martha-Alicia, last: Rocha}
+  - {first: Martha Alicia, last: Rocha}
+martha-evens:
+  names:
+  - {first: Martha, last: Evens}
+  - {first: Martha W., last: Evens}
+  - {first: Martha W, last: Evens}
+martha-palmer:
+  names:
+  - {first: Martha, last: Palmer}
+  - {first: Martha Stone, last: Palmer}
+  - {first: Martha S., last: Palmer}
+martha-yifiru-tachbelie:
+  names:
+  - {first: Martha Yifiru, last: Tachbelie}
+  - {first: Martha, last: Yifiru Tachbelie}
+marti-a-hearst:
+  names:
+  - {first: Marti A., last: Hearst}
+  - {first: Marti, last: Hearst}
+martin-ariel-dominguez:
+  names:
+  - {first: Martin Ariel, last: Dominguez}
+  - {first: Martín, last: Domínguez}
+  - {first: Martin Ariel, last: Domínguez}
+martin-c-emele:
+  names:
+  - {first: Martin C., last: Emele}
+  - {first: Martin, last: Emele}
+martin-chodorow:
+  names:
+  - {first: Martin, last: Chodorow}
+  - {first: Martin S., last: Chodorow}
+martin-cmejrek:
+  names:
+  - {first: Martin, last: Cmejrek}
+  - {first: Martin, last: Čmejrek}
+martin-hofmann-apitius:
+  names:
+  - {first: Martin, last: Hofmann--Apitius}
+  - {first: Martin, last: Hofmann-Apitius}
+martin-krallinger:
+  names:
+  - {first: Martin, last: Krallinger}
+  - {first: M., last: Krallinger}
+martin-labsky:
+  names:
+  - {first: Martin, last: Labský}
+  - {first: Martin, last: Labsky}
+martin-pereira-farina:
+  names:
+  - {first: Martín, last: Pereira-Fariña}
+  - {first: M., last: Pereira-Fariña}
+martin-platek:
+  names:
+  - {first: Martin, last: Platek}
+  - {first: Martin, last: Plátek}
+martin-rajman:
+  names:
+  - {first: Martin, last: Rajman}
+  - {first: M., last: Rajman}
+martin-russell:
+  names:
+  - {first: Martin, last: Russell}
+  - {first: M., last: Russell}
+martin-schaler:
+  names:
+  - {first: Martin, last: Schäler}
+  - {first: Martin, last: Schäfer}
+martine-adda-decker:
+  names:
+  - {first: Martine, last: Adda-Decker}
+  - {first: Martine, last: Adda-decker}
+  - {first: M., last: Adda-Decker}
+martine-de-calmes:
+  names:
+  - {first: Martine, last: de Calmès}
+  - {first: M., last: de Calmes}
+mary-elaine-califf:
+  names:
+  - {first: Mary Elaine, last: Califf}
+  - {first: M. E., last: Califf}
+mary-ellen-foster:
+  names:
+  - {first: Mary Ellen, last: Foster}
+  - {first: Mary E., last: Foster}
+mary-harper:
+  names:
+  - {first: Mary, last: Harper}
+  - {first: Mary P., last: Harper}
+  - {first: M. P., last: Harper}
+mary-mcgee-wood:
+  names:
+  - {first: Mary McGee, last: Wood}
+  - {first: Mary, last: McGee Wood}
+  - {first: M., last: McGee Wood}
+mary-s-neff:
+  names:
+  - {first: Mary S., last: Neff}
+  - {first: Mary, last: Neff}
+mary-swift:
+  names:
+  - {first: Mary, last: Swift}
+  - {first: Mary D., last: Swift}
+masahiro-araki:
+  names:
+  - {first: Masahiro, last: Araki}
+  - {first: M., last: Araki}
+masakatsu-sugimoto:
+  names:
+  - {first: Masakatsu, last: Sugimoto}
+  - {first: M., last: Sugimoto}
+masako-kume:
+  names:
+  - {first: Masako, last: Kume}
+  - {first: M., last: Kume}
+masaru-tomita:
+  names:
+  - {first: Masaru, last: Tomita}
+  - {first: M., last: Tomita}
+masatake-dantsuji:
+  names:
+  - {first: Masatake, last: Dantsuji}
+  - {first: M., last: Dantsuji}
+masato-ishizaki:
+  names:
+  - {first: Masato, last: Ishizaki}
+  - {first: M., last: Ishizaki}
+massih-r-amini:
+  names:
+  - {first: Massih R., last: Amini}
+  - {first: Massih-Reza, last: Amini}
+massimo-poesio:
+  names:
+  - {first: Massimo, last: Poesio}
+  - {first: M., last: Poesio}
+matej-durco:
+  names:
+  - {first: Matej, last: Ďurčo}
+  - {first: Matej, last: Durco}
+mateja-verlic:
+  names:
+  - {first: Mateja, last: Verlič}
+  - {first: Mateja, last: Verlic}
+mathew-huerta-enochian:
+  names:
+  - {first: Mathew, last: Huerta-Enochian}
+mathieu-mangeot:
+  names:
+  - {first: Mathieu, last: Mangeot}
+  - {first: Mathieu, last: Mangeot-Lerebours}
+matko-bosnjak:
+  names:
+  - {first: Matko, last: Bosnjak}
+  - {first: Matko, last: Bošnjak}
+mats-wiren:
+  names:
+  - {first: Mats, last: Wirén}
+  - {first: Mats, last: Wiren}
+matteo-negri:
+  names:
+  - {first: Matteo, last: Negri}
+  - {first: M., last: Negri}
+matthew-crocker:
+  names:
+  - {first: Matthew, last: Crocker}
+  - {first: Matthew W., last: Crocker}
+matthew-e-peters:
+  names:
+  - {first: Matthew E., last: Peters}
+  - {first: Matthew, last: Peters}
+matthew-gerber:
+  names:
+  - {first: Matthew, last: Gerber}
+  - {first: Matt, last: Gerber}
+  - {first: Matthew S., last: Gerber}
+  - {first: Matthew, last: Garber}
+matthew-j-green:
+  names:
+  - {first: Matthew J., last: Green}
+  - {first: Matthew, last: Green}
+matthew-r-gormley:
+  names:
+  - {first: Matthew R., last: Gormley}
+  - {first: Matthew, last: Gormley}
+matthew-snover:
+  names:
+  - {first: Matthew, last: Snover}
+  - {first: Matthew G., last: Snover}
+matthew-w-bilotti:
+  names:
+  - {first: Matthew W., last: Bilotti}
+  - {first: Matthew, last: Bilotti}
+matthias-hartung:
+  names:
+  - {first: Matthias, last: Hartung}
+  - {first: M., last: Hartung}
+matthieu-constant:
+  names:
+  - {first: Matthieu, last: Constant}
+  - {first: Mathieu, last: Constant}
+matthieu-quignard:
+  names:
+  - {first: Matthieu, last: Quignard}
+  - {first: M., last: Quignard}
+matti-ylilammi:
+  names:
+  - {first: Matti, last: Ylilammi}
+  - {first: M., last: Ylilammi}
+mattia-a-di-gangi:
+  names:
+  - {first: Mattia A., last: Di Gangi}
+  - {first: Mattia Antonino, last: Di Gangi}
+  - {first: Mattia, last: Di Gangi}
+matus-pleva:
+  names:
+  - {first: Matúš, last: Pleva}
+  - {first: Matus, last: Pleva}
+maunendra-sankar-desarkar:
+  names:
+  - {first: Maunendra Sankar, last: Desarkar}
+  - {first: Maunendra, last: Sankar Desarkar}
+maurice-quezel-ambrunaz:
+  names:
+  - {first: Maurice, last: Quezel-Ambrunaz}
+  - {first: M., last: Quezel-Ambrunaz}
+maurizio-omologo:
+  names:
+  - {first: Maurizio, last: Omologo}
+  - {first: M., last: Omologo}
+maurizio-tesconi:
+  names:
+  - {first: Maurizio, last: Tesconi}
+  - {first: Maurizio, last: Tescon}
+mauro-di-manzo:
+  names:
+  - {first: Mauro, last: Di Manzo}
+  - {first: M., last: Di Manzo}
+max-silberztein:
+  names:
+  - {first: Max, last: Silberztein}
+  - {first: Max D., last: Silberztein}
+maximiliano-saiz-noeda:
+  names:
+  - {first: Maximiliano, last: Saiz-Noeda}
+  - {first: M., last: Saiz-Noeda}
+maxwell-weinzierl:
+  names:
+  - {first: Maxwell, last: Weinzierl}
+  - {first: Maxwell A., last: Weinzierl}
+mayank-n-vahia:
+  names:
+  - {first: Mayank N., last: Vahia}
+  - {first: Mayank, last: Vahia}
+mayank-singh-az:
+  comment: University of Arizona
+  disable_name_matching: true
+  names:
+  - {first: Mayank, last: Singh}
+mazhar-mehdi-hussain:
+  names:
+  - {first: Mazhar Mehdi, last: Hussain}
+  - {first: Mazhar, last: Hussain}
+md-anwarus-salam-khan:
+  names:
+  - {first: Md. Anwarus Salam, last: Khan}
+  - {first: Khan Md. Anwarus, last: Salam}
+  - {first: Khan Md., last: Anwarus Salam}
+md-arafat-sultan:
+  names:
+  - {first: Md Arafat, last: Sultan}
+  - {first: Md. Arafat, last: Sultan}
+  - {first: Md., last: Sultan}
+md-maruf-hasan:
+  names:
+  - {first: Md. Maruf, last: Hasan}
+  - {first: Md Maruf, last: Hasan}
+  - {first: Maruf, last: Hasan}
+md-rizwan-parvez:
+  names:
+  - {first: Md. Rizwan, last: Parvez}
+  - {first: Md Rizwan, last: Parvez}
+meenakshi-nagarajan:
+  names:
+  - {first: Meenakshi, last: Nagarajan}
+  - {first: Meena, last: Nagarajan}
+meghan-glenn:
+  names:
+  - {first: Meghan, last: Glenn}
+  - {first: Meghan Lammie, last: Glenn}
+mehrnoosh-sadrzadeh:
+  names:
+  - {first: Mehrnoosh, last: Sadrzadeh}
+  - {first: M., last: Sadrzadeh}
+mei-chih-tsai:
+  names:
+  - {first: Mei-Chih, last: Tsai}
+  - {first: Mei-chih, last: Tsai}
+mei-chun-liu:
+  names:
+  - {first: Mei-Chun, last: Liu}
+  - {first: Mei-chun, last: Liu}
+mei-hua-chen:
+  names:
+  - {first: Mei-hua, last: Chen}
+  - {first: Mei-Hua, last: Chen}
+mei-yuh-hwang:
+  names:
+  - {first: Mei-Yuh, last: Hwang}
+  - {first: M., last: Hwang}
+melanie-martin:
+  names:
+  - {first: Melanie, last: Martin}
+  - {first: Melanie J., last: Martin}
+meliha-yetisgen-yildiz:
+  names:
+  - {first: Meliha, last: Yetisgen-Yildiz}
+  - {first: Meliha, last: Yetisgen}
+  - {first: Meliha, last: Yetişgen}
+memduh-gokirmak:
+  names:
+  - {first: Memduh, last: Gökırmak}
+  - {first: Memduh, last: Gokirmak}
+menno-van-zaanen:
+  names:
+  - {first: Menno, last: van Zaanen}
+  - {first: Menno, last: van Zannen}
+mercedes-garcia-martinez:
+  names:
+  - {first: Mercedes, last: García-Martínez}
+  - {first: Mercedes García, last: Martínez}
+meriama-laib:
+  names:
+  - {first: Meriama, last: Laib}
+  - {first: Meriama, last: Laïb}
+  - {first: Mariama, last: Laib}
+meritxell-gonzalez:
+  names:
+  - {first: Meritxell, last: Gonzàlez}
+  - {first: Meritxell, last: González}
+  - {first: M., last: González}
+mi-zhang-ucd:
+  comment: Dublin
+  disable_name_matching: true
+  names:
+  - {first: Mi, last: Zhang}
+  orcid: 0000-0003-3567-3478
+mia-xu-chen:
+  names:
+  - {first: Mia Xu, last: Chen}
+  - {first: Mia, last: Chen}
+michael-b-kac:
+  names:
+  - {first: Michael B., last: Kac}
+  - {first: Michael, last: Kac}
+michael-c-frank:
+  names:
+  - {first: Michael C., last: Frank}
+  - {first: Michael, last: Frank}
+michael-c-mccord:
+  names:
+  - {first: Michael C., last: McCord}
+  - {first: Michael, last: McCord}
+michael-collins:
+  names:
+  - {first: Michael, last: Collins}
+  - {first: Michael John, last: Collins}
+  - {first: Mike, last: Collins}
+michael-crystal:
+  names:
+  - {first: Michael, last: Crystal}
+  - {first: Michael R., last: Crystal}
+michael-e-jahr:
+  names:
+  - {first: Michael E., last: Jahr}
+  - {first: Michael, last: Jahr}
+michael-ellsworth:
+  names:
+  - {first: Michael, last: Ellsworth}
+  - {first: Michael J., last: Ellsworth}
+michael-f-mctear:
+  names:
+  - {first: Michael F., last: McTear}
+  - {first: Michael, last: McTear}
+michael-g-dyer:
+  names:
+  - {first: Michael G., last: Dyer}
+  - {first: Michael, last: Dyer}
+michael-glass:
+  names:
+  - {first: Michael, last: Glass}
+  - {first: Michael R., last: Glass}
+michael-i-jordan:
+  names:
+  - {first: Michael I., last: Jordan}
+  - {first: Michael, last: Jordan}
+michael-j-cafarella:
+  names:
+  - {first: Michael J., last: Cafarella}
+  - {first: Michael, last: Cafarella}
+michael-j-pan:
+  names:
+  - {first: Michael J., last: Pan}
+  - {first: Michael, last: Pan}
+michael-j-witbrock:
+  names:
+  - {first: Michael J., last: Witbrock}
+  - {first: Michael, last: Witbrock}
+michael-johnston:
+  names:
+  - {first: Michael, last: Johnston}
+  - {first: M., last: Johnston}
+michael-jones:
+  names:
+  - {first: Michael, last: Jones}
+  - {first: Michael P., last: Jones}
+michael-k-brown:
+  names:
+  - {first: Michael K., last: Brown}
+  - {first: Michael, last: Brown}
+michael-kaisser:
+  names:
+  - {first: Michael, last: Kaisser}
+  - {first: Michael, last: Kaißer}
+michael-l-mauldin:
+  names:
+  - {first: Michael L., last: Mauldin}
+  - {first: Michael, last: Mauldin}
+michael-l-mc-hale:
+  names:
+  - {first: Michael L., last: Mc Hale}
+  - {first: Michael L., last: McHale}
+michael-mandel:
+  names:
+  - {first: Michael, last: Mandel}
+  - {first: Michael, last: Mandl}
+michael-minock:
+  names:
+  - {first: Michael, last: Minock}
+  - {first: Michael J., last: Minock}
+michael-moortgat:
+  names:
+  - {first: Michael, last: Moortgat}
+  - {first: M., last: Moortgat}
+michael-paul:
+  names:
+  - {first: Michael, last: Paul}
+  - {first: Michael J., last: Paul}
+michael-phillips:
+  names:
+  - {first: Michael, last: Phillips}
+  - {first: M., last: Phillips}
+michael-riley:
+  names:
+  - {first: Michael, last: Riley}
+  - {first: Michael D., last: Riley}
+michael-rosner:
+  names:
+  - {first: Michael, last: Rosner}
+  - {first: Mike, last: Rosner}
+  - {first: M., last: Rosner}
+  - {first: M.A., last: Rosner}
+michael-s-kearns:
+  names:
+  - {first: Michael S., last: Kearns}
+  - {first: Michael, last: Kearns}
+michael-schlichtkrull:
+  names:
+  - {first: Michael, last: Schlichtkrull}
+  - {first: Michael Sejr, last: Schlichtkrull}
+michael-skinner:
+  names:
+  - {first: Michael, last: Skinner}
+  - {first: Michael A., last: Skinner}
+michael-t-johnson:
+  names:
+  - {first: Michael T., last: Johnson}
+  - {first: M. T., last: Johnson}
+michael-wayne-goodman:
+  names:
+  - {first: Michael Wayne, last: Goodman}
+  - {first: Michael, last: Goodman}
+michael-white:
+  names:
+  - {first: Michael, last: White}
+  - {first: Mike, last: White}
+michele-jardino:
+  names:
+  - {first: Michèle, last: Jardino}
+  - {first: Michele, last: Jardino}
+  - {first: M., last: Jardino}
+michelle-gregory:
+  names:
+  - {first: Michelle, last: Gregory}
+  - {first: Michelle L., last: Gregory}
+  - {first: M. L., last: Gregory}
+michelle-q-wang:
+  names:
+  - {first: Michelle Q., last: Wang}
+  - {first: Michelle, last: Wang}
+mickey-w-c-chong:
+  names:
+  - {first: Mickey W. C., last: Chong}
+  - {first: Mickey W.C., last: Chong}
+miguel-a-alonso:
+  names:
+  - {first: Miguel A., last: Alonso}
+  - {first: Miguel, last: Alonso Pardo}
+  - {first: Miguel A., last: Alonso Pardo}
+miguel-angel-garcia-cumbreras:
+  names:
+  - {first: Miguel Ángel, last: García-Cumbreras}
+  - {first: M. Ángel, last: García}
+  - {first: Miguel, last: García-Cumbreras}
+  - {first: Miguel Á., last: García Cumbreras}
+miguel-b-almeida:
+  names:
+  - {first: Miguel B., last: Almeida}
+  - {first: Miguel, last: Almeida}
+miguel-rodriguez-hernandez:
+  names:
+  - {first: Miguel, last: Rodríguez Hernández}
+  - {first: Miguel Ángel, last: Rodríguez}
+  - {first: Miguel, last: Rodríguez}
+miguel-sales-dias:
+  names:
+  - {first: Miguel Sales, last: Dias}
+  - {first: Miguel, last: Dias}
+mihael-arcan:
+  names:
+  - {first: Mihael, last: Arcan}
+  - {first: Mihael, last: Arčan}
+mihaela-plamada-onofrei:
+  names:
+  - {first: Mihaela, last: Plamada-Onofrei}
+  - {first: Mihaela, last: Onofrei}
+  - {first: Mihaela, last: Plămadă-Onofrei}
+miikka-silfverberg:
+  names:
+  - {first: Miikka, last: Silfverberg}
+  - {first: Miikka P., last: Silfverberg}
+mijail-kabadjov:
+  names:
+  - {first: Mijail, last: Kabadjov}
+  - {first: Mijail A., last: Kabadjov}
+  - {first: Mijail, last: Alexandrov-Kabadjov}
+  - {first: M. A., last: Kabadjov}
+mike-reape:
+  names:
+  - {first: Mike, last: Reape}
+  - {first: M, last: Reape}
+mike-tian-jian-jiang:
+  names:
+  - {first: Mike Tian-Jian, last: Jiang}
+  - {first: Tian-Jian, last: Jiang}
+mikel-iruskieta:
+  names:
+  - {first: Mikel, last: Iruskieta}
+  - {first: M., last: Iruskieta}
+mikel-l-forcada:
+  names:
+  - {first: Mikel L., last: Forcada}
+  - {first: Mikel, last: Forcada}
+mikel-lersundi:
+  names:
+  - {first: Mikel, last: Lersundi}
+  - {first: M., last: Lersundi}
+mikel-penagarikano:
+  names:
+  - {first: Mikel, last: Penagarikano}
+  - {first: M., last: Peñagarikano}
+milan-bily:
+  names:
+  - {first: Milan, last: Bily}
+  - {first: Milan, last: Bílý}
+milica-gasic:
+  names:
+  - {first: Milica, last: Gasic}
+  - {first: Milica, last: Gašić}
+min-hua-lai:
+  names:
+  - {first: Min-Hua, last: Lai}
+  - {first: Min Hua, last: Lai}
+ming-chin-yen:
+  names:
+  - {first: Ming-chin, last: Yen}
+  - {first: Ming-Chin, last: Yen}
+ming-chui-dong:
+  names:
+  - {first: Ming Chui, last: Dong}
+  - {first: Ming-Chui, last: Dong}
+ming-feng-tsai:
+  names:
+  - {first: Ming-Feng, last: Tsai}
+  - {first: Meng-Feng, last: Tsai}
+ming-jer-wu:
+  names:
+  - {first: Ming-Jer, last: Wu}
+  - {first: Min-Jer, last: Wu}
+ming-shing-yu:
+  names:
+  - {first: Ming-Shing, last: Yu}
+  - {first: Ming-shing, last: Yu}
+mingbin-xu:
+  names:
+  - {first: Mingbin, last: Xu}
+  - {first: MingBin, last: Xu}
+minghua-nuo:
+  names:
+  - {first: Minghua, last: Nuo}
+  - {first: Ming Hua, last: Nuo}
+mingwen-wang:
+  names:
+  - {first: Mingwen, last: Wang}
+  - {first: MingWen, last: Wang}
+  - {first: Ming-Wei, last: Wang}
+minh-le-nguyen:
+  names:
+  - {first: Minh Le, last: Nguyen}
+  - {first: Minh-Le, last: Nguyen}
+  - {first: Le-Minh, last: Nguyen}
+  - {first: Nguyen Le, last: Minh}
+  - {first: Le Minh, last: Nguyen}
+  - {first: Nguyen, last: Le Minh}
+  - {first: M.L, last: Nguyen}
+minh-quang-nhat-pham:
+  comment: JAIST, Alt Vietnam
+  names:
+  - {first: Minh Quang Nhat, last: Pham}
+  similar:
+  - minh-quang-pham
+minh-quang-pham:
+  comment: SYSTRAN
+  names:
+  - {first: Minh Quang, last: Pham}
+  - {first: MinhQuang, last: Pham}
+  similar:
+  - minh-quang-nhat-pham
+minh-thang-luong:
+  names:
+  - {first: Minh-Thang, last: Luong}
+  - {first: Thang, last: Luong}
+miquel-espla-gomis:
+  names:
+  - {first: Miquel, last: Esplà-Gomis}
+  - {first: Miquel, last: Esplà}
+mireia-diez:
+  names:
+  - {first: Mireia, last: Diez}
+  - {first: Mireia, last: Díez}
+mireia-ginesti-rosell:
+  names:
+  - {first: Mireia, last: Ginestí-Rosell}
+  - {first: Mireia, last: Ginestí Rosell}
+miriam-r-l-petruck:
+  names:
+  - {first: Miriam R. L., last: Petruck}
+  - {first: Miriam R.L., last: Petruck}
+  - {first: Miriam R L, last: Petruck}
+miriam-tavoni:
+  names:
+  - {first: Miriam, last: Tavoni}
+  - {first: M., last: Tavoni}
+miriam-urkia:
+  names:
+  - {first: Miriam, last: Urkia}
+  - {first: M, last: Urkia}
+  - {first: M., last: Urkia}
+mirjam-sepesy-maucec:
+  names:
+  - {first: Mirjam Sepesy, last: Maucec}
+  - {first: Mirjam Sepesy, last: Maučec}
+mirko-baglioni:
+  names:
+  - {first: Mirko, last: Baglioni}
+  - {first: M., last: Baglioni}
+miroslav-martinovic:
+  names:
+  - {first: Miroslav, last: Martinović}
+  - {first: Miroslav, last: Martinovic}
+miruna-clinciu:
+  names:
+  - {first: Miruna, last: Clinciu}
+  - {first: Miruna-Adriana, last: Clinciu}
+mitch-marcus:
+  names:
+  - {first: Mitch, last: Marcus}
+  - {first: Mitchell, last: Marcus}
+  - {first: Mitchell P., last: Marcus}
+  - {first: M., last: Marcus}
+mitch-weintraub:
+  names:
+  - {first: Mitch, last: Weintraub}
+  - {first: Mitchel, last: Weintraub}
+  - {first: M., last: Weintraub}
+mitesh-m-khapra:
+  names:
+  - {first: Mitesh M., last: Khapra}
+  - {first: Mitesh, last: Khapra}
+  - {first: Mitesh, last: M. Khapra}
+  - {first: Mitesh M, last: Khapra}
+  - {first: Mitesh Shantadevi, last: Khapra}
+mitsuo-shimohata:
+  names:
+  - {first: Mitsuo, last: Shimohata}
+  - {first: M., last: Shimohata}
+mohab-el-karef:
+  names:
+  - {first: Mohab, last: El-karef}
+  - {first: Mohab, last: Elkaref}
+mohamed-ahmed-sherif:
+  names:
+  - {first: Mohamed Ahmed, last: Sherif}
+  - {first: Mohamed, last: Sherif}
+mohamed-altantawy:
+  names:
+  - {first: Mohamed, last: Altantawy}
+  - {first: Mohamed, last: AlTantawy}
+mohamed-attia:
+  names:
+  - {first: Mohamed, last: Attia}
+  - {first: M., last: Attia}
+mohamed-maamouri:
+  names:
+  - {first: Mohamed, last: Maamouri}
+  - {first: Mohammed, last: Maamouri}
+mohamed-mahdi-boudabous:
+  names:
+  - {first: Mohamed Mahdi, last: Boudabous}
+  - {first: Mohamed, last: Boudabous}
+mohamed-nassime-hadjadj:
+  names:
+  - {first: Mohamed Nassime, last: Hadjadj}
+  - {first: Mohamed, last: Hadjadj}
+mohamed-r-amer:
+  names:
+  - {first: Mohamed R., last: Amer}
+  - {first: Mohamed, last: Amer}
+mohamed-zakaria-kurdi:
+  names:
+  - {first: Mohamed Zakaria, last: Kurdi}
+  - {first: Mohamed-Zakaria, last: Kurdi}
+mohammad-akbar:
+  names:
+  - {first: Mohammad, last: Akbar}
+  - {first: M., last: Akbar}
+mohammad-bahrani:
+  names:
+  - {first: Mohammad, last: Bahrani}
+  - {first: M., last: Bahrani}
+mohammed-arif-khan:
+  names:
+  - {first: Mohammed Arif, last: Khan}
+  - {first: Arif, last: Khan}
+  - {first: Arif Md., last: Khan}
+mohan-zhang-unc:
+  comment: UNC
+  disable_name_matching: true
+  names:
+  - {first: Mohan, last: Zhang}
+  orcid: 0009-0000-8866-7878
+mohsen-rashwan:
+  names:
+  - {first: Mohsen, last: Rashwan}
+  - {first: M., last: Rashwan}
+mokhtar-b-billami:
+  names:
+  - {first: Mokhtar B., last: Billami}
+  - {first: Mokhtar-Boumedyen, last: Billami}
+molly-ireland:
+  names:
+  - {first: Molly, last: Ireland}
+  - {first: Molly E., last: Ireland}
+mona-diab:
+  names:
+  - {first: Mona, last: Diab}
+  - {first: Mona T., last: Diab}
+monica-lestari-paramita:
+  names:
+  - {first: Monica Lestari, last: Paramita}
+  - {first: Monica, last: Paramita}
+monika-woszczyna:
+  names:
+  - {first: Monika, last: Woszczyna}
+  - {first: M., last: Woszczyna}
+montse-maritxalar:
+  names:
+  - {first: Montse, last: Maritxalar}
+  - {first: M, last: Maritxalar}
+  - {first: M., last: Maritxalar}
+montserrat-civit:
+  names:
+  - {first: Montserrat, last: Civit}
+  - {first: M., last: Civit}
+montserrat-marimon:
+  names:
+  - {first: Montserrat, last: Marimon}
+  - {first: Montserrat, last: Marimón}
+  - {first: Montserrat Marimon, last: Felipe}
+montserrat-meya:
+  names:
+  - {first: Montserrat, last: Meya}
+  - {first: M., last: Meya}
+moritz-schaeffer:
+  names:
+  - {first: Moritz, last: Schaeffer}
+  - {first: Moritz Jonas, last: Schaeffer}
+morris-salkoff:
+  names:
+  - {first: Morris, last: Salkoff}
+  - {first: M., last: Salkoff}
+mosleh-hmoud-al-adhaileh:
+  names:
+  - {first: Mosleh Hmoud, last: Al-Adhaileh}
+  - {first: Mosleh H., last: Al-Adhaileh}
+mostafa-shahin:
+  names:
+  - {first: Mostafa, last: Shahin}
+  - {first: M., last: Shahin}
+muhammad-elnokrashy:
+  names:
+  - {first: Muhammad N., last: ElNokrashy}
+  - {first: Muhammad, last: ElNokrashy}
+  - {first: Muhammad Nael, last: ElNokrashy}
+muhammad-tasnim-mohiuddin:
+  names:
+  - {first: Muhammad Tasnim, last: Mohiuddin}
+  - {first: Tasnim, last: Mohiuddin}
+munindar-p-singh:
+  names:
+  - {first: Munindar P., last: Singh}
+  - {first: Munindar, last: Singh}
+munirathnam-srikanth:
+  names:
+  - {first: Munirathnam, last: Srikanth}
+  - {first: Muirathnam, last: Srikanth}
+  - {first: M., last: Srikanth}
+muntsa-padro:
+  names:
+  - {first: Muntsa, last: Padró}
+  - {first: M., last: Padró}
+murat-saraclar:
+  names:
+  - {first: Murat, last: Saraclar}
+  - {first: Murat, last: Saraçlar}
+mustafa-yaseen:
+  names:
+  - {first: Mustafa, last: Yaseen}
+  - {first: M., last: Yaseen}
+muthu-kumar-chandrasekaran:
+  names:
+  - {first: Muthu Kumar, last: Chandrasekaran}
+  - {first: Muthu, last: Kumar Chandrasekaran}
+muyun-yang:
+  names:
+  - {first: Muyun, last: Yang}
+  - {first: MuYun, last: Yang}
+  - {first: Mu-yun, last: Yang}
+myriam-hernandez:
+  names:
+  - {first: Myriam, last: Hernandez}
+  - {first: Myriam, last: Hernández A}
+  - {first: Myriam, last: Hernández}
+myroslava-o-dzikovska:
+  names:
+  - {first: Myroslava O., last: Dzikovska}
+  - {first: Myroslava, last: Dzikovska}
+myung-gil-jang:
+  names:
+  - {first: Myung-Gil, last: Jang}
+  - {first: Myoung-Gil, last: Jang}
+myung-kwan-park:
+  names:
+  - {first: Myung-Kwan, last: Park}
+  - {first: Myungkwan, last: Park}
+nadia-mana:
+  names:
+  - {first: Nadia, last: Mana}
+  - {first: N., last: Mana}
+nafise-sadat-moosavi:
+  names:
+  - {first: Nafise Sadat, last: Moosavi}
+  - {first: Nafise, last: Moosavi}
+nagesh-c-panyam:
+  names:
+  - {first: Nagesh C., last: Panyam}
+  - {first: Nagesh, last: C. Panyam}
+nagiza-samatova:
+  names:
+  - {first: Nagiza, last: Samatova}
+  - {first: Nagiza F., last: Samatova}
+nagwa-m-el-makky:
+  names:
+  - {first: Nagwa, last: M. El-Makky}
+  - {first: Nagwa, last: El-Makky}
+naiara-perez:
+  names:
+  - {first: Naiara, last: Pérez}
+  - {first: Naiara, last: Perez-Miguel}
+  - {first: Naiara, last: Miguel}
+  orcid: 0000-0001-8648-0428
+naida-graham:
+  names:
+  - {first: Naida, last: Graham}
+  - {first: Naida L., last: Graham}
+nam-khanh-tran:
+  names:
+  - {first: Nam-Khanh, last: Tran}
+  - {first: Nam Khanh, last: Tran}
+naman-k-gupta:
+  names:
+  - {first: Naman K., last: Gupta}
+  - {first: Naman, last: Gupta}
+nancy-chen:
+  names:
+  - {first: Nancy, last: Chen}
+  - {first: Nancy F., last: Chen}
+nancy-chinchor:
+  names:
+  - {first: Nancy, last: Chinchor}
+  - {first: Nancy A., last: Chinchor}
+  - {first: N., last: Chinchor}
+nancy-green:
+  names:
+  - {first: Nancy, last: Green}
+  - {first: Nancy L., last: Green}
+nancy-ide:
+  names:
+  - {first: Nancy, last: Ide}
+  - {first: Nancy M., last: Ide}
+nancy-mccracken:
+  names:
+  - {first: Nancy, last: McCracken}
+  - {first: Nancy J., last: McCracken}
+nancy-underwood:
+  names:
+  - {first: Nancy, last: Underwood}
+  - {first: Nancy L., last: Underwood}
+nanda-kambhatla:
+  names:
+  - {first: Nanda, last: Kambhatla}
+  - {first: Nandakishore, last: Kambhatla}
+  - {first: N., last: Kambhatla}
+nanette-veilleux:
+  names:
+  - {first: Nanette M., last: Veilleux}
+  - {first: N. M., last: Veilleux}
+  - {first: N, last: Veilleux}
+naoaki-okazaki:
+  names:
+  - {first: Naoaki, last: Okazaki}
+  - {first: Naoki, last: Okazaki}
+naomi-feldman:
+  names:
+  - {first: Naomi, last: Feldman}
+  - {first: Naomi H., last: Feldman}
+naomi-sager:
+  names:
+  - {first: Naomi, last: Sager}
+  - {first: N., last: Sager}
+naoto-kato:
+  names:
+  - {first: Naoto, last: Kato}
+  - {first: Naoto, last: Katoh}
+narjes-bellamine-ben-saoud:
+  names:
+  - {first: Narjès, last: Bellamine Ben Saoud}
+  - {first: Narjès Bellamine Ben, last: Saoud}
+nasser-smaili:
+  names:
+  - {first: Nasser, last: Smaili}
+  - {first: N., last: Smaili}
+natalia-kariaeva-rutgers:
+  names:
+  - {first: Natalia Kariaeva, last: Rutgers}
+  - {first: Natalia, last: Kariaeva}
+natalia-klyueva:
+  names:
+  - {first: Natalia, last: Klyueva}
+  - {first: Natalia, last: Kljueva}
+natalia-loukachevitch:
+  names:
+  - {first: Natalia, last: Loukachevitch}
+  - {first: Natalia V., last: Loukachevitch}
+  - {first: N., last: Loukachevitch}
+natalia-n-modjeska:
+  names:
+  - {first: Natalia N., last: Modjeska}
+  - {first: Natalia, last: Modjeska}
+natalie-kubler:
+  names:
+  - {first: Natalie, last: Kübler}
+  - {first: Natalie, last: Kubler}
+natalie-m-schrimpf:
+  names:
+  - {first: Natalie M., last: Schrimpf}
+  - {first: Natalie, last: Schrimpf}
+nate-blaylock:
+  names:
+  - {first: Nate, last: Blaylock}
+  - {first: N., last: Blaylock}
+nathalie-le-brun:
+  names:
+  - {first: Nathalie, last: Le Brun}
+  - {first: Nathalie Le, last: Brun}
+nathalie-rose-lim:
+  names:
+  - {first: Nathalie Rose, last: Lim}
+  - {first: Nathalie, last: Lim}
+nathalie-simonin:
+  names:
+  - {first: Nathalie, last: Simonin}
+  - {first: N., last: Simonin}
+nathanael-chambers:
+  names:
+  - {first: Nathanael, last: Chambers}
+  - {first: Nathan, last: Chambers}
+natsuko-holden:
+  names:
+  - {first: Natsuko, last: Holden}
+  - {first: N., last: Holden}
+naveen-kumar-laskari:
+  names:
+  - {first: Naveen Kumar, last: Laskari}
+  - {first: Naveen, last: Kumar}
+nazila-hafezi:
+  names:
+  - {first: Nazila, last: Hafezi}
+  - {first: N., last: Hafezi}
+ndapandula-nakashole:
+  names:
+  - {first: Ndapandula, last: Nakashole}
+  - {first: Ndapa, last: Nakashole}
+necip-fazil-ayan:
+  names:
+  - {first: Necip Fazil, last: Ayan}
+  - {first: Necip, last: Fazil Ayan}
+negacy-hailu:
+  names:
+  - {first: Negacy, last: Hailu}
+  - {first: Negacy D., last: Hailu}
+neil-tipper:
+  names:
+  - {first: Neil, last: Tipper}
+  - {first: N, last: Tipper}
+nelson-f-liu:
+  names:
+  - {first: Nelson F., last: Liu}
+  - {first: Nelson, last: Liu}
+nerea-areta:
+  names:
+  - {first: Nerea, last: Areta}
+  - {first: N., last: Areta}
+nerea-ezeiza:
+  names:
+  - {first: Nerea, last: Ezeiza}
+  - {first: N., last: Ezeiza}
+nghia-the-pham:
+  names:
+  - {first: Nghia The, last: Pham}
+  - {first: Nghia, last: Pham}
+ngo-thanh-nhan:
+  names:
+  - {first: Ngô Thanh, last: Nhàn}
+  - {first: Ngo Thanh, last: Nhan}
+  - {first: NT., last: Nhàn}
+ngo-xuan-bach:
+  names:
+  - {first: Ngo Xuan, last: Bach}
+  - {first: Ngo, last: Xuan Bach}
+ngoc-quan-pham:
+  names:
+  - {first: Ngoc-Quan, last: Pham}
+  - {first: Ngoc Quan, last: Pham}
+ngoc-quang-luong:
+  names:
+  - {first: Ngoc Quang, last: Luong}
+  - {first: Ngoc-Quang, last: Luong}
+nguyen-vo:
+  names:
+  - {first: Nguyen, last: Vo}
+  - {first: Nguyen, last: Ha Vo}
+nicholas-asher:
+  names:
+  - {first: Nicholas, last: Asher}
+  - {first: Nicolas, last: Asher}
+nicholas-j-haddock:
+  names:
+  - {first: Nicholas J., last: Haddock}
+  - {first: Nicholas, last: Haddock}
+nicholas-kushmerick:
+  names:
+  - {first: Nicholas, last: Kushmerick}
+  - {first: N., last: Kushmerick}
+nick-j-youd:
+  names:
+  - {first: Nick J., last: Youd}
+  - {first: Nick, last: Youd}
+nick-rizzolo:
+  names:
+  - {first: Nick, last: Rizzolo}
+  - {first: Nicholas, last: Rizzolo}
+nick-webb:
+  names:
+  - {first: Nick, last: Webb}
+  - {first: N., last: Webb}
+nicolas-lefebvre:
+  names:
+  - {first: Nicolas, last: Lefebvre}
+  - {first: Nicolas, last: Lefèbvre}
+nicolas-marin:
+  names:
+  - {first: Nicolas, last: Marin}
+  - {first: Nicolás, last: Marín}
+nicolas-morales:
+  names:
+  - {first: Nicolás, last: Morales}
+  - {first: Nicolas, last: Morales}
+nicolas-nedobejkine:
+  names:
+  - {first: Nicolas, last: Nedobejkine}
+  - {first: N., last: Nedobejkine}
+nicolas-nicolov:
+  names:
+  - {first: Nicolas, last: Nicolov}
+  - {first: N., last: Nicolov}
+  similar:
+  - nikola-i-nikolov
+nicolas-r-fauceglia:
+  names:
+  - {first: Nicolas R., last: Fauceglia}
+  - {first: Nicolas, last: Fauceglia}
+nicolas-serrano:
+  names:
+  - {first: Nicolás, last: Serrano}
+  - {first: Nicolas, last: Serrano}
+nicole-gregoire:
+  names:
+  - {first: Nicole, last: Grégoire}
+  - {first: Nicole, last: Gregoire}
+nicoletta-calzolari:
+  names:
+  - {first: Nicoletta, last: Calzolari}
+  - {first: Nicoletta Calzolari, last: Zamorani}
+  - {first: N., last: Calzolari}
+niels-ole-bernsen:
+  names:
+  - {first: Niels Ole, last: Bernsen}
+  - {first: Niels Ole, last: Bernse}
+  - {first: Niels O., last: Bernsen}
+nigel-ward:
+  names:
+  - {first: Nigel, last: Ward}
+  - {first: Nigel G., last: Ward}
+niklas-paulsson:
+  names:
+  - {first: Niklas, last: Paulsson}
+  - {first: N., last: Paulsson}
+nikola-i-nikolov:
+  names:
+  - {first: Nikola I., last: Nikolov}
+  similar:
+  - nicolas-nicolov
+nikolay-arefyev:
+  names:
+  - {first: Nikolay, last: Arefyev}
+  - {first: Nikolay, last: Arefiev}
+nikos-fakotakis:
+  names:
+  - {first: Nikos, last: Fakotakis}
+  - {first: Nikos D., last: Fakotakis}
+  - {first: N., last: Fakotakis}
+nikos-liolios:
+  names:
+  - {first: Nikos, last: Liolios}
+  - {first: N., last: Liolios}
+niladri-chatterjee:
+  names:
+  - {first: Niladri, last: Chatterjee}
+  - {first: N., last: Chatterjee}
+ning-liu-tsinghua:
+  degree: Tsinghua University
+  disable_name_matching: true
+  names:
+  - {first: Ning, last: Liu}
+  orcid: 0000-0001-7475-9739
+nishat-raihan:
+  names:
+  - {first: Nishat, last: Raihan}
+  - {first: Md Nishat, last: Raihan}
+  orcid: 0000-0001-6242-398X
+nishtha-malhotra:
+  names:
+  - {first: Nishtha, last: Malhotra}
+  - {first: Nishta, last: Malhotra}
+nives-mikelic-preradovic:
+  names:
+  - {first: Nives Mikelić, last: Preradović}
+  - {first: Nives, last: Mikelić Preradović}
+noa-p-cruz-diaz:
+  names:
+  - {first: Noa P., last: Cruz Diaz}
+  - {first: Noa P., last: Cruz}
+  - {first: Noa, last: Cruz}
+  - {first: Noa P., last: Cruz Díaz}
+noah-a-smith:
+  names:
+  - {first: Noah A., last: Smith}
+  - {first: Noah, last: Smith}
+noah-coccaro:
+  names:
+  - {first: Noah, last: Coccaro}
+  - {first: N., last: Coccaro}
+noah-goodman:
+  names:
+  - {first: Noah, last: Goodman}
+  - {first: Noah D., last: Goodman}
+nobal-bikram-niraula:
+  names:
+  - {first: Nobal Bikram, last: Niraula}
+  - {first: Nobal, last: Niraula}
+nobuaki-minematsu:
+  names:
+  - {first: Nobuaki, last: Minematsu}
+  - {first: N., last: Minematsu}
+noel-chateau:
+  names:
+  - {first: Noël, last: Chateau}
+  - {first: N., last: Chateau}
+noemie-elhadad:
+  names:
+  - {first: Noémie, last: Elhadad}
+  - {first: Noemie, last: Elhadad}
+noortje-venhuizen:
+  names:
+  - {first: Noortje, last: Venhuizen}
+  - {first: Noortje J., last: Venhuizen}
+norbert-dinstl:
+  names:
+  - {first: Norbert, last: Dinstl}
+  - {first: N., last: Dinstl}
+norihito-yasuda:
+  names:
+  - {first: Norihito, last: Yasuda}
+  - {first: Norihi, last: Yasuda}
+noriko-h-arai:
+  names:
+  - {first: Noriko H., last: Arai}
+  - {first: Noriko, last: Arai}
+noriyuki-tamura:
+  names:
+  - {first: Noriyuki, last: Tamura}
+  - {first: N., last: Tamura}
+norman-k-sondheimer:
+  names:
+  - {first: Norman K., last: Sondheimer}
+  - {first: Norman, last: Sondheimer}
+norman-m-fraser:
+  names:
+  - {first: Norman M., last: Fraser}
+  - {first: Norman, last: Fraser}
+normunds-gruzitis:
+  names:
+  - {first: Normunds, last: Gruzitis}
+  - {first: Normunds, last: Grūzītis}
+norton-trevisan-roman:
+  names:
+  - {first: Norton Trevisan, last: Roman}
+  - {first: Norton T., last: Roman}
+  - {first: Norton, last: Trevisan Roman}
+noureddine-chenfour:
+  names:
+  - {first: Noureddine, last: Chenfour}
+  - {first: N., last: Chenfour}
+noushin-rezapour-asheghi:
+  names:
+  - {first: Noushin Rezapour, last: Asheghi}
+  - {first: Noushin, last: Rezapour Asheghi}
+nuno-mamede:
+  names:
+  - {first: Nuno, last: Mamede}
+  - {first: Nuno J., last: Mamede}
+nuria-artigas:
+  names:
+  - {first: Núria, last: Artigas}
+  - {first: N., last: Artigas}
+nuria-bel:
+  names:
+  - {first: Núria, last: Bel}
+  - {first: Nuria, last: Bel}
+nuria-bertomeu:
+  names:
+  - {first: Núria, last: Bertomeu}
+  - {first: Nuria, last: Bertomeu}
+  - {first: Núria, last: Bertomeu Castelló}
+  - {first: Núria Bertomeu, last: Castelló}
+nuria-castell:
+  names:
+  - {first: Núria, last: Castell}
+  - {first: Nuria, last: Castell}
+nuria-gala:
+  names:
+  - {first: Nuria, last: Gala}
+  - {first: Núria, last: Gala}
+  - {first: Nùria, last: Gala}
+oana-postolache:
+  names:
+  - {first: Oana, last: Postolache}
+  - {first: Oana-Diana, last: Postolache}
+octavia-maria-sulea:
+  names:
+  - {first: Octavia-Maria, last: Şulea}
+  - {first: Maria, last: Sulea}
+  - {first: Octavia-Maria, last: Sulea}
+  - {first: Maria-Octavia, last: Sulea}
+odbayar-chimeddorj:
+  names:
+  - {first: Odbayar, last: Chimeddorj}
+  - {first: Chimeddorj, last: Odbayar}
+oier-lopez-de-lacalle:
+  names:
+  - {first: Oier, last: Lopez de Lacalle}
+  - {first: Oier López, last: de Lacalle}
+  - {first: Oier Lopez, last: de Lacalle}
+  - {first: Oier, last: López de Lacalle}
+oistein-e-andersen:
+  names:
+  - {first: Øistein E., last: Andersen}
+  - {first: Øistein, last: Andersen}
+oiwi-parker-jones:
+  names:
+  - {first: ‘Ōiwi, last: Parker Jones}
+  - {first: Oiwi, last: Parker Jones}
+olatz-ansa:
+  names:
+  - {first: Olatz, last: Ansa}
+  - {first: O., last: Ansa}
+olga-n-lashevskaja:
+  names:
+  - {first: Olga N., last: Lashevskaja}
+  - {first: Olga, last: Lashevskaja}
+olivia-o-y-kwong:
+  names:
+  - {first: Olivia O.Y., last: Kwong}
+  - {first: O.Y., last: Kwong}
+  - {first: Oi Yee, last: Kwong}
+olivia-sanchez-graillet:
+  names:
+  - {first: Olivia, last: Sanchez-Graillet}
+  - {first: Olivia, last: Sanchez}
+olivier-boeffard:
+  names:
+  - {first: Olivier, last: Boëffard}
+  - {first: Olivier, last: Boeffard}
+olivier-hamon:
+  names:
+  - {first: Olivier, last: Hamon}
+  - {first: O., last: Hamon}
+olivier-kraif:
+  names:
+  - {first: Olivier, last: Kraif}
+  - {first: O., last: Kraif}
+om-p-damani:
+  names:
+  - {first: Om P., last: Damani}
+  - {first: Om, last: Damani}
+omar-zaidan:
+  names:
+  - {first: Omar, last: Zaidan}
+  - {first: Omar F., last: Zaidan}
+omer-farukhan-gunes:
+  names:
+  - {first: Omer Farukhan, last: Gunes}
+  - {first: Omer, last: Gunes}
+ona-de-gibert:
+  degree: University of Helsinki, Finland
+  names:
+  - {first: Ona, last: de Gibert}
+  - {first: Ona, last: de Gibert Bonet}
+  orcid: 0000-0002-7163-4807
+ondrej-bajgar:
+  names:
+  - {first: Ondřej, last: Bajgar}
+  - {first: Ondrej, last: Bajgar}
+ondrej-bojar:
+  names:
+  - {first: Ondřej, last: Bojar}
+  - {first: Ondrej, last: Bojar}
+onkar-arun-pandit:
+  names:
+  - {first: Onkar Arun, last: Pandit}
+  - {first: Onkar, last: Pandit}
+ornella-corazzari:
+  names:
+  - {first: Ornella, last: Corazzari}
+  - {first: O., last: Corazzari}
+orphee-de-clercq:
+  names:
+  - {first: Orphee, last: De Clercq}
+  - {first: Orphée, last: De Clercq}
+oscar-ferrandez:
+  names:
+  - {first: Óscar, last: Ferrández}
+  - {first: Oscar, last: Ferrandez}
+  - {first: Oscar, last: Ferrández}
+osmar-r-zaiane:
+  names:
+  - {first: Osmar R., last: Zaiane}
+  - {first: Osmar, last: Zaïane}
+  - {first: Osmar, last: Zaiane}
+  - {first: Osmar R., last: Zaïane}
+osvaldo-novais-oliveira-jr:
+  names:
+  - {first: Osvaldo Novais, last: Oliveira Jr.}
+  - {first: Osvaldo, last: Oliveira Jr}
+otakar-smrz:
+  names:
+  - {first: Otakar, last: Smrz}
+  - {first: Otakar, last: Smrž}
+oto-vale:
+  names:
+  - {first: Oto, last: Vale}
+  - {first: Oto A., last: Vale}
+owen-kimball:
+  names:
+  - {first: Owen, last: Kimball}
+  - {first: O., last: Kimball}
+owen-rambow:
+  names:
+  - {first: Owen, last: Rambow}
+  - {first: Owen C., last: Rambow}
+ozan-irsoy:
+  names:
+  - {first: Ozan, last: İrsoy}
+  - {first: Ozan, last: Irsoy}
+ozlem-cetinoglu:
+  names:
+  - {first: Özlem, last: Çetinoğlu}
+  - {first: Ozlem, last: Cetinoglu}
+  - {first: Özlem, last: Çetinoglu}
+ozlem-uzuner:
+  names:
+  - {first: Ozlem, last: Uzuner}
+  - {first: Özlem, last: Uzuner}
+p-c-ching:
+  names:
+  - {first: P. C., last: Ching}
+  - {first: P.C., last: Ching}
+p-h-j-van-der-kamp:
+  names:
+  - {first: P. H. J., last: van der Kamp}
+  - {first: P.H.J., last: van der Kamp}
+p-senthil-nathan:
+  names:
+  - {first: P. Senthil, last: Nathan}
+  - {first: Senthil, last: Nathan}
+pablo-duboue:
+  names:
+  - {first: Pablo, last: Duboue}
+  - {first: Pablo A., last: Duboue}
+  - {first: Pablo Ariel, last: Duboue}
+pablo-gervas:
+  names:
+  - {first: Pablo, last: Gervás}
+  - {first: P., last: Gervás}
+pablo-ruiz-fabo:
+  names:
+  - {first: Pablo, last: Ruiz Fabo}
+  - {first: Pablo, last: Ruiz}
+paloma-moreda-pozo:
+  names:
+  - {first: Paloma, last: Moreda Pozo}
+  - {first: Paloma, last: Moreda}
+pamela-e-fink:
+  names:
+  - {first: Pamela E., last: Fink}
+  - {first: P., last: Fink}
+pamela-jordan:
+  names:
+  - {first: Pamela, last: Jordan}
+  - {first: Pamela W., last: Jordan}
+panayiotis-georgiou:
+  names:
+  - {first: Panayiotis, last: Georgiou}
+  - {first: Panayiotis G., last: Georgiou}
+paola-pietrandrea:
+  names:
+  - {first: Paola, last: Pietrandrea}
+  - {first: Paola, last: Pietandrea}
+paola-velardi:
+  names:
+  - {first: Paola, last: Velardi}
+  - {first: P., last: Velardi}
+paolo-bouquet:
+  names:
+  - {first: Paolo, last: Bouquet}
+  - {first: P., last: Bouquet}
+paolo-puliti:
+  names:
+  - {first: Paolo, last: Puliti}
+  - {first: P., last: Puliti}
+paramveer-s-dhillon:
+  names:
+  - {first: Paramveer S., last: Dhillon}
+  - {first: Paramveer, last: Dhillon}
+partha-talukdar:
+  names:
+  - {first: Partha, last: Talukdar}
+  - {first: Partha Pratim, last: Talukdar}
+  - {first: Partha, last: Pratim Talukdar}
+  - {first: Partha P., last: Talukdar}
+pascal-nocera:
+  names:
+  - {first: Pascal, last: Nocéra}
+  - {first: Pascal, last: Nocera}
+pascale-feldkamp:
+  names:
+  - {first: Pascale, last: Feldkamp}
+  - {first: Pascale, last: Moreira}
+  - {first: Pascale Feldkamp, last: Moreira}
+  orcid: 0000-0002-2434-4268
+patrice-bellot:
+  names:
+  - {first: Patrice, last: Bellot}
+  - {first: P., last: Bellot}
+patrice-dalle:
+  names:
+  - {first: Patrice, last: Dalle}
+  - {first: P., last: Dalle}
+patricia-goncalves:
+  names:
+  - {first: Patricia, last: Gonçalves}
+  - {first: Patricia Nunes, last: Gonçalves}
+  - {first: Patrícia, last: Gonçalves}
+patricia-robinson:
+  names:
+  - {first: Patricia, last: Robinson}
+  - {first: P., last: Robinson}
+patricia-velazquez-morales:
+  names:
+  - {first: Patricia, last: Velazquez-Morales}
+  - {first: Patricia, last: Velázquez-Morales}
+patricio-martinez-barco:
+  names:
+  - {first: Patricio, last: Martínez-Barco}
+  - {first: Patricio, last: Martinez-Barco}
+  - {first: Patricio Martinez, last: Barco}
+  - {first: P., last: Martínez-Barco}
+patrick-cardinal:
+  names:
+  - {first: Patrick, last: Cardinal}
+  - {first: P., last: Cardinal}
+patrick-haller-zurich:
+  comment: University of Zurich
+  names:
+  - {first: Patrick, last: Haller}
+  orcid: 0000-0002-8968-7587
+patrick-healey:
+  names:
+  - {first: Patrick, last: Healey}
+  - {first: Pat, last: Healey}
+  - {first: Patrick G. T., last: Healey}
+  - {first: Patrick G.T., last: Healey}
+patrick-l-lange:
+  names:
+  - {first: Patrick L., last: Lange}
+  - {first: Patrick, last: Lange}
+patrick-paroubek:
+  names:
+  - {first: Patrick, last: Paroubek}
+  - {first: P., last: Paroubek}
+patrick-saint-dizier:
+  names:
+  - {first: Patrick, last: Saint-Dizier}
+  - {first: Patrick, last: Saint Dizier}
+patrizia-michelassi:
+  names:
+  - {first: Patrizia, last: Michelassi}
+  - {first: P., last: Michelassi}
+pattabhi-rk-rao:
+  names:
+  - {first: Pattabhi, last: RK Rao}
+  - {first: T. Pattabhi, last: R. K Rao}
+  - {first: Pattabhi RK, last: Rao}
+patti-price:
+  names:
+  - {first: Patti, last: Price}
+  - {first: Patti J., last: Price}
+  - {first: P. J., last: Price}
+  - {first: P., last: Price}
+paul-a-crook:
+  names:
+  - {first: Paul A., last: Crook}
+  - {first: Paul, last: Crook}
+paul-bedaride:
+  names:
+  - {first: Paul, last: Bedaride}
+  - {first: Paul, last: Bédaride}
+paul-bennett:
+  names:
+  - {first: Paul, last: Bennett}
+  - {first: Paul N., last: Bennett}
+paul-clough:
+  names:
+  - {first: Paul, last: Clough}
+  - {first: Paul D., last: Clough}
+paul-d-ji:
+  names:
+  - {first: Paul D, last: Ji}
+  - {first: Paul D., last: Ji}
+paul-deleglise:
+  names:
+  - {first: Paul, last: Deléglise}
+  - {first: Paul, last: Deleglise}
+paul-dixon:
+  names:
+  - {first: Paul, last: Dixon}
+  - {first: Paul R., last: Dixon}
+paul-h-garthwaite:
+  names:
+  - {first: Paul H., last: Garthwaite}
+  - {first: Paul, last: Garthwaite}
+  - {first: Paul H, last: Garthwaite}
+paul-kantor:
+  names:
+  - {first: Paul, last: Kantor}
+  - {first: Paul B., last: Kantor}
+paul-morarescu:
+  names:
+  - {first: Paul, last: Morarescu}
+  - {first: Paul, last: Morărescu}
+  - {first: Paul C., last: Morărescu}
+paul-placeway:
+  names:
+  - {first: Paul, last: Placeway}
+  - {first: P., last: Placeway}
+paul-roossin:
+  names:
+  - {first: Paul, last: Roossin}
+  - {first: P., last: Roossin}
+paul-s-jacobs:
+  names:
+  - {first: Paul S., last: Jacobs}
+  - {first: Paul, last: Jacobs}
+  - {first: P., last: Jacobs}
+paula-cardoso:
+  names:
+  - {first: Paula, last: Cardoso}
+  - {first: Paula C. Figueira, last: Cardoso}
+  - {first: Paula C. F., last: Cardoso}
+  - {first: P., last: Cardoso}
+paula-newman:
+  names:
+  - {first: Paula, last: Newman}
+  - {first: Paula S., last: Newman}
+  - {first: P. S., last: Newman}
+  - {first: P., last: Newman}
+paulo-c-f-de-oliveira:
+  names:
+  - {first: Paulo C F, last: de Oliveira}
+  - {first: Paulo C. F., last: de Oliveira}
+pavankumar-satuluri:
+  names:
+  - {first: Pavankumar, last: Satuluri}
+  - {first: Pavan Kumar, last: Satuluri}
+pavel-kveton:
+  names:
+  - {first: Pavel, last: Kvĕtoň}
+  - {first: Pavel, last: Kveton}
+  - {first: Pavel, last: Květoň}
+pavel-rychly:
+  names:
+  - {first: Pavel, last: Rychlý}
+  - {first: Pavel, last: Rychly}
+pavel-smrz:
+  names:
+  - {first: Pavel, last: Smrz}
+  - {first: Pavel, last: Smrž}
+pawel-mazur:
+  names:
+  - {first: Pawel, last: Mazur}
+  - {first: Paweł, last: Mazur}
+pedro-balage-filho:
+  names:
+  - {first: Pedro, last: Balage Filho}
+  - {first: Pedro, last: Balage}
+  - {first: Pedro Paulo, last: Balage Filho}
+  - {first: Pedro P. Balage, last: Filho}
+  - {first: Pedro, last: Filho}
+pedro-concejero-cerezo:
+  names:
+  - {first: Pedro Concejero, last: Cerezo}
+  - {first: Pedro, last: Concejero}
+pedro-ortiz-suarez:
+  names:
+  - {first: Pedro, last: Ortiz Suarez}
+  - {first: Pedro Javier, last: Ortiz Suárez}
+pengyuan-liu:
+  names:
+  - {first: Pengyuan, last: Liu}
+  - {first: PengYuan, last: Liu}
+  - {first: Peng-Yuan, last: Liu}
+penny-labropoulou:
+  names:
+  - {first: Penny, last: Labropoulou}
+  - {first: P., last: Labropoulou}
+pere-comas:
+  names:
+  - {first: Pere, last: Comas}
+  - {first: Pere R., last: Comas}
+pete-whitelock:
+  names:
+  - {first: Pete, last: Whitelock}
+  - {first: P. J., last: Whitelock}
+  - {first: P., last: Whitelock}
+peter-a-chew:
+  names:
+  - {first: Peter A., last: Chew}
+  - {first: Peter, last: Chew}
+peter-a-heeman:
+  names:
+  - {first: Peter A., last: Heeman}
+  - {first: Peter, last: Heeman}
+peter-a-rankel:
+  names:
+  - {first: Peter A., last: Rankel}
+  - {first: Peter, last: Rankel}
+peter-anick:
+  names:
+  - {first: Peter, last: Anick}
+  - {first: Peter G., last: Anick}
+peter-arno-coppen:
+  names:
+  - {first: Peter-Arno, last: Coppen}
+  - {first: P.A., last: Coppen}
+peter-corbett:
+  names:
+  - {first: Peter, last: Corbett}
+  - {first: Peter T., last: Corbett}
+peter-deng:
+  names:
+  - {first: Peter, last: Deng}
+  - {first: P., last: Deng}
+peter-f-brown:
+  names:
+  - {first: Peter F., last: Brown}
+  - {first: P., last: Brown}
+peter-foltz:
+  names:
+  - {first: Peter, last: Foltz}
+  - {first: Peter W., last: Foltz}
+peter-halacsy:
+  names:
+  - {first: Péter, last: Halácsy}
+  - {first: Péter, last: Halácsky}
+peter-j-liu:
+  names:
+  - {first: Peter J., last: Liu}
+  - {first: Peter, last: Liu}
+peter-j-ludlow:
+  names:
+  - {first: Peter J., last: Ludlow}
+  - {first: Peter, last: Ludlow}
+peter-jansen:
+  names:
+  - {first: Peter, last: Jansen}
+  - {first: Peter J., last: Jansen}
+peter-juel-henrichsen:
+  names:
+  - {first: Peter Juel, last: Henrichsen}
+  - {first: Peter, last: Juel Henrichsen}
+peter-ljunglof:
+  names:
+  - {first: Peter, last: Ljunglöf}
+  - {first: Peter, last: Ljunglof}
+peter-machonis:
+  names:
+  - {first: Peter, last: Machonis}
+  - {first: Peter A., last: Machonis}
+peter-pal-boda:
+  names:
+  - {first: Péter Pál, last: Boda}
+  - {first: Péter, last: Boda}
+peter-rossen-skadhauge:
+  names:
+  - {first: Peter, last: Rossen Skadhauge}
+  - {first: Peter Rossen, last: Skadhauge}
+peter-schauble:
+  names:
+  - {first: Peter, last: Schauble}
+  - {first: Peter, last: Schäuble}
+peter-spyns:
+  names:
+  - {first: Peter, last: Spyns}
+  - {first: P., last: Spyns}
+peter-turney:
+  names:
+  - {first: Peter, last: Turney}
+  - {first: Peter D., last: Turney}
+peter-v-desouza:
+  names:
+  - {first: Peter V., last: deSouza}
+  - {first: P. V., last: deSouza}
+  - {first: P.V., last: de Souza}
+peter-waiganjo-wagacha:
+  names:
+  - {first: Peter Waiganjo, last: Wagacha}
+  - {first: Peter W., last: Wagacha}
+  - {first: Peter, last: Wagacha}
+peter-white:
+  names:
+  - {first: Peter, last: White}
+  - {first: Pete, last: White}
+peter-wittenburg:
+  names:
+  - {first: Peter, last: Wittenburg}
+  - {first: P., last: Wittenburg}
+peteris-paikens:
+  names:
+  - {first: Peteris, last: Paikens}
+  - {first: Pēteris, last: Paikens}
+petr-jirku:
+  names:
+  - {first: Petr, last: Jirku}
+  - {first: P., last: Jirku}
+petr-pollak:
+  names:
+  - {first: Petr, last: Pollák}
+  - {first: Petr, last: Pollak}
+petr-sgall:
+  names:
+  - {first: Petr, last: Sgall}
+  - {first: P., last: Sgall}
+petra-barancikova:
+  names:
+  - {first: Petra, last: Barancikova}
+  - {first: Petra, last: Barančíková}
+petra-poukarova:
+  names:
+  - {first: Petra, last: Poukarová}
+  - {first: Petra, last: Klimešová}
+phil-blunsom:
+  names:
+  - {first: Phil, last: Blunsom}
+  - {first: Philip, last: Blunsom}
+phil-c-woodland:
+  names:
+  - {first: Phil C., last: Woodland}
+  - {first: P.C., last: Woodland}
+phil-harrison:
+  names:
+  - {first: Phil, last: Harrison}
+  - {first: Philip, last: Harrison}
+  - {first: P., last: Harrison}
+phil-sidney-ostheimer:
+  names:
+  - {first: Phil Sidney, last: Ostheimer}
+  - {first: Phil, last: Ostheimer}
+  orcid: 0009-0009-6186-3233
+philip-edmonds:
+  names:
+  - {first: Philip, last: Edmonds}
+  - {first: Philip G., last: Edmonds}
+philip-gorinski:
+  names:
+  - {first: Philip, last: Gorinski}
+  - {first: Philip John, last: Gorinski}
+philip-hanna:
+  names:
+  - {first: Philip, last: Hanna}
+  - {first: P., last: Hanna}
+  - {first: P, last: Hanna}
+philip-hoole:
+  names:
+  - {first: Philip, last: Hoole}
+  - {first: Phil, last: Hoole}
+philip-ogren:
+  names:
+  - {first: Philip, last: Ogren}
+  - {first: Philip V., last: Ogren}
+philip-r-cohen:
+  names:
+  - {first: Philip R., last: Cohen}
+  - {first: Philip, last: Cohen}
+  - {first: Phil R., last: Cohen}
+philip-s-yu:
+  names:
+  - {first: Philip S., last: Yu}
+  - {first: Philip, last: Yu}
+philipp-cimiano:
+  names:
+  - {first: Philipp, last: Cimiano}
+  - {first: P., last: Cimiano}
+philippe-boula-de-mareuil:
+  names:
+  - {first: Philippe, last: Boula de Mareüil}
+  - {first: Philippe Boula, last: de Mareüil}
+  - {first: P. Boula, last: de Mareüil}
+philippe-langlais:
+  names:
+  - {first: Philippe, last: Langlais}
+  - {first: Phillippe, last: Langlais}
+phuong-le-hong:
+  names:
+  - {first: Phuong, last: Le Hong}
+  - {first: Phuong, last: Le-Hong}
+  - {first: Hồng Phương, last: Lê}
+  - {first: Phương, last: Lê Hồng}
+  - {first: Hong-Phuong, last: Le}
+  - {first: H. Phuong, last: Le}
+phuong-thai-nguyen:
+  names:
+  - {first: Phuong-Thai, last: Nguyen}
+  - {first: Phuong Thai, last: Nguyen}
+pi-chuan-chang:
+  names:
+  - {first: Pi-Chuan, last: Chang}
+  - {first: Pichuan, last: Chang}
+piercarlo-rossi:
+  names:
+  - {first: Piercarlo, last: Rossi}
+  - {first: P., last: Rossi}
+piergiorgio-svaizer:
+  names:
+  - {first: Piergiorgio, last: Svaizer}
+  - {first: P., last: Svaizer}
+pierre-dumouchel:
+  names:
+  - {first: Pierre, last: Dumouchel}
+  - {first: P., last: Dumouchel}
+pierre-emmanuel-mazare:
+  names:
+  - {first: Pierre-Emmanuel, last: Mazare}
+  - {first: Pierre-Emmanuel, last: Mazaré}
+pierre-francois-marteau:
+  names:
+  - {first: Pierre-Francois, last: Marteau}
+  - {first: Pierre-François, last: Marteau}
+pierre-guillaume:
+  names:
+  - {first: Pierre, last: Guillaume}
+  - {first: P., last: Guillaume}
+pierre-zweigenbaum:
+  names:
+  - {first: Pierre, last: Zweigenbaum}
+  - {first: P., last: Zweigenbaum}
+pierrette-bouillon:
+  names:
+  - {first: Pierrette, last: Bouillon}
+  - {first: P., last: Bouillon}
+pietro-leo:
+  names:
+  - {first: Pietro, last: Leo}
+  - {first: P., last: Leo}
+pilar-leon-arauz:
+  names:
+  - {first: Pilar, last: León-Araúz}
+  - {first: Pilar León, last: Araúz}
+pinar-oezden-wennerberg:
+  names:
+  - {first: Pinar, last: Oezden Wennerberg}
+  - {first: Pinar, last: Wennerberg}
+  - {first: Pinar Oezden, last: Wennerberg}
+ping-che-yang:
+  names:
+  - {first: Ping-Che, last: Yang}
+  - {first: Ping-che, last: Yang}
+ping-wai-wong:
+  names:
+  - {first: Ping Wai, last: Wong}
+  - {first: Percy Ping-Wai, last: Wong}
+pirros-tsiakoulis:
+  names:
+  - {first: Pirros, last: Tsiakoulis}
+  - {first: P., last: Tsiakoulis}
+plaban-kr-bhowmick:
+  names:
+  - {first: Plaban Kr., last: Bhowmick}
+  - {first: Plaban, last: Bhowmick}
+po-chun-chen:
+  names:
+  - {first: Po Chun, last: Chen}
+  - {first: Po-Chun, last: Chen}
+po-hsuan-chen:
+  names:
+  - {first: Po Hsuan, last: Chen}
+  - {first: Po-Hsuan, last: Chen}
+po-yu-liang:
+  names:
+  - {first: Po-Yu, last: Liang}
+  - {first: Po-yu, last: Liang}
+poul-soren-kjaersgaard:
+  names:
+  - {first: Poul Søren, last: Kjærsgaard}
+  - {first: Poul Soren, last: Kjaersgaard}
+pradeep-muthukrishnan:
+  names:
+  - {first: Pradeep, last: Muthukrishnan}
+  - {first: Pradeep, last: Muthukrishan}
+pradip-dey:
+  names:
+  - {first: Pradip, last: Dey}
+  - {first: Paradip, last: Dey}
+prafulla-kumar-choubey:
+  names:
+  - {first: Prafulla Kumar, last: Choubey}
+  - {first: Prafulla, last: Choubey}
+prajwol-shrestha:
+  names:
+  - {first: Prajwol, last: Shrestha}
+  - {first: Prajol, last: Shrestha}
+pranav-a:
+  comment: UC Santa Cruz
+  names:
+  - {first: Pranav, last: A}
+  similar:
+  - pranav-anand
+pranav-anand:
+  comment: Dayta AI
+  names:
+  - {first: Pranav, last: Anand}
+  similar:
+  - pranav-a
+pranav-goel-umd:
+  comment: UMD
+  disable_name_matching: true
+  names:
+  - {first: Pranav, last: Goel}
+  orcid: 0000-0003-1037-2687
+pranav-narayanan-venkit:
+  names:
+  - {first: Pranav Narayanan, last: Venkit}
+  - {first: Pranav, last: Venkit}
+pranava-swaroop-madhyastha:
+  names:
+  - {first: Pranava Swaroop, last: Madhyastha}
+  - {first: Pranava, last: Madhyastha}
+praneeth-m-shishtla:
+  names:
+  - {first: Praneeth M., last: Shishtla}
+  - {first: Praneeth, last: Shishtla}
+  - {first: Praneeth M, last: Shishtla}
+prashanth-mannem:
+  names:
+  - {first: Prashanth, last: Mannem}
+  - {first: Prashanth Reddy, last: Mannem}
+  - {first: Prashanth, last: Reddy}
+prathusha-kameswara-sarma:
+  names:
+  - {first: Prathusha, last: Kameswara Sarma}
+  - {first: Prathusha, last: K Sarma}
+pratikkumar-patel:
+  names:
+  - {first: Pratikkumar, last: Patel}
+  - {first: Pratik, last: Patel}
+praveen-paritosh:
+  names:
+  - {first: Praveen, last: Paritosh}
+  - {first: Praveen, last: P}
+preetam-maloor:
+  names:
+  - {first: Preetam, last: Maloor}
+  - {first: P., last: Maloor}
+prem-natarajan:
+  names:
+  - {first: Prem, last: Natarajan}
+  - {first: Premkumar, last: Natarajan}
+preslav-nakov:
+  names:
+  - {first: Preslav, last: Nakov}
+  - {first: Preslav I., last: Nakov}
+primoz-jakopin:
+  names:
+  - {first: Primož, last: Jakopin}
+  - {first: Primoz, last: Jakopin}
+pu-zhao-northeastern:
+  comment: Northeastern
+  disable_name_matching: true
+  names:
+  - {first: Pu, last: Zhao}
+  orcid: 0000-0001-5018-2859
+pushpak-bhattacharyya:
+  names:
+  - {first: Pushpak, last: Bhattacharyya}
+  - {first: Pushpak, last: Bhattacharya}
+qi-li-ub:
+  degree: University at Buffalo
+  disable_name_matching: true
+  names:
+  - {first: Qi, last: Li}
+  orcid: 0000-0002-3136-2157
+qi-quan-huang:
+  names:
+  - {first: Qi-quan, last: Huang}
+  - {first: Qi-Quan, last: Huang}
+qian-cao-renmin:
+  comment: Renmin
+  disable_name_matching: true
+  names:
+  - {first: Qian, last: Cao}
+  orcid: 0000-0003-3288-1714
+qiaoming-zhu:
+  names:
+  - {first: Qiaoming, last: Zhu}
+  - {first: Qiao-ming, last: Zhu}
+  - {first: Qiao-Ming, last: Zhu}
+  - {first: QiaoMing, last: Zhu}
+qiguang-lin:
+  names:
+  - {first: Qiguang, last: Lin}
+  - {first: Q., last: Lin}
+qin-lu:
+  names:
+  - {first: Qin, last: Lu}
+  - {first: Q., last: Lu}
+qingqing-cai:
+  names:
+  - {first: Qingqing, last: Cai}
+  - {first: Qing-qing, last: Cai}
+quan-hung-tran:
+  names:
+  - {first: Quan Hung, last: Tran}
+  - {first: Quan, last: Tran}
+quang-le-minh:
+  names:
+  - {first: Quang, last: Le Minh}
+  - {first: Minh Quang, last: Le}
+quang-thuy-ha:
+  names:
+  - {first: Quang Thuy, last: Ha}
+  - {first: Quang-Thuy, last: Ha}
+quoc-khanh-do:
+  names:
+  - {first: Quoc Khanh, last: Do}
+  - {first: Quoc-Khanh, last: Do}
+quoc-le:
+  names:
+  - {first: Quoc, last: Le}
+  - {first: Quoc V., last: Le}
+quy-nguyen:
+  names:
+  - {first: Quy, last: Nguyen}
+  - {first: Quy T., last: Nguyen}
+r-a-smit:
+  names:
+  - {first: R. A., last: Smit}
+  - {first: R.A., last: Smit}
+r-mahesh-k-sinha:
+  names:
+  - {first: R Mahesh K, last: Sinha}
+  - {first: R. Mahesh K., last: Sinha}
+r-nozohoor-farshi:
+  names:
+  - {first: R., last: Nozohoor-Farshi}
+  - {first: R, last: Nozohoor-Farshi}
+r-piotrowski:
+  names:
+  - {first: R., last: Piotrowski}
+  - {first: R. G., last: Piotrowski}
+r-thomas-mccoy:
+  names:
+  - {first: R. Thomas, last: McCoy}
+  - {first: Tom, last: McCoy}
+rachel-edita-roxas:
+  names:
+  - {first: Rachel Edita, last: Roxas}
+  - {first: Rachel Edita O., last: Roxas}
+  - {first: Rachel, last: Roxas}
+rachele-sprugnoli:
+  names:
+  - {first: Rachele, last: Sprugnoli}
+  - {first: R., last: Sprugnoli}
+rada-mihalcea:
+  names:
+  - {first: Rada, last: Mihalcea}
+  - {first: Rada F., last: Mihalcea}
+radoslaw-ramocki:
+  names:
+  - {first: Radoslaw, last: Ramocki}
+  - {first: Radosław, last: Ramocki}
+radovan-garabik:
+  names:
+  - {first: Radovan, last: Garabík}
+  - {first: Radovan, last: Garabik}
+radu-florian:
+  names:
+  - {first: Radu, last: Florian}
+  - {first: R., last: Florian}
+rafa-saiz:
+  names:
+  - {first: Rafa, last: Saiz}
+  - {first: R., last: Saiz}
+rafael-e-banchs:
+  names:
+  - {first: Rafael E., last: Banchs}
+  - {first: Rafael, last: Banchs}
+rafael-michael-karampatsis:
+  names:
+  - {first: Rafael - Michael, last: Karampatsis}
+  - {first: Rafael Michael, last: Karampatsis}
+rafael-munoz:
+  names:
+  - {first: Rafael, last: Muñoz}
+  - {first: Rafael, last: Muñoz Guillena}
+  - {first: Rafael, last: Muñoz-Guillena}
+  - {first: R., last: Muñoz}
+raffaella-bernardi:
+  names:
+  - {first: Raffaella, last: Bernardi}
+  - {first: R., last: Bernardi}
+raghava-krishnan:
+  names:
+  - {first: Raghava, last: Krishnan}
+  - {first: R, last: Krishnan}
+raghavendra-udupa:
+  names:
+  - {first: Raghavendra, last: Udupa}
+  - {first: Raghavendra Udupa, last: U.}
+raghu-pujitha-gade:
+  names:
+  - {first: Raghu Pujitha, last: Gade}
+  - {first: Pujitha, last: Gade}
+rajakrishnan-rajkumar:
+  names:
+  - {first: Rajakrishnan, last: Rajkumar}
+  - {first: Rajkumar, last: Rajakrishnan}
+rajen-chatterjee:
+  names:
+  - {first: Rajen, last: Chatterjee}
+  - {first: Rajan, last: Chatterjee}
+rajesh-bhat:
+  names:
+  - {first: Rajesh, last: Bhat}
+  similar:
+  - rajesh-bhatt
+rajesh-bhatt:
+  comment: UMass Amherst
+  names:
+  - {first: Rajesh, last: Bhatt}
+  similar:
+  - rajesh-bhat
+rajiv-shah:
+  names:
+  - {first: Rajiv, last: Shah}
+  - {first: Rajiv Ratn, last: Shah}
+rajkumar-pujari:
+  names:
+  - {first: Rajkumar, last: Pujari}
+  - {first: Pujari, last: Rajkumar}
+rakesh-r-menon:
+  names:
+  - {first: Rakesh R, last: Menon}
+  - {first: Rakesh, last: Menon}
+ralf-d-brown:
+  names:
+  - {first: Ralf D., last: Brown}
+  - {first: Ralf, last: Brown}
+ralf-schlueter:
+  names:
+  - {first: Ralf, last: Schlueter}
+  - {first: Ralf, last: Schlüter}
+ralph-grishman:
+  names:
+  - {first: Ralph, last: Grishman}
+  - {first: R., last: Grishman}
+ralph-weischedel:
+  names:
+  - {first: Ralph, last: Weischedel}
+  - {first: Ralph M., last: Weischedel}
+raman-chandrasekar:
+  names:
+  - {first: Raman, last: Chandrasekar}
+  - {first: R., last: Chandrasekar}
+  - {first: Raman, last: Chandraseker}
+ramesh-manuvinakurike:
+  names:
+  - {first: Ramesh, last: Manuvinakurike}
+  - {first: Ramesh, last: Manuvirakurike}
+ramon-fernandez-astudillo:
+  names:
+  - {first: Ramón, last: Fernandez Astudillo}
+  - {first: Ramón, last: Astudillo}
+  - {first: Ramón, last: F. Astudillo}
+  - {first: Ramon, last: F. Astudillo}
+ramon-granell:
+  names:
+  - {first: Ramon, last: Granell}
+  - {first: Ramón, last: Granell}
+ramon-lopez-cozar:
+  names:
+  - {first: Ramón, last: López-Cózar}
+  - {first: R., last: López-Cózar}
+ramona-andreea-turcu:
+  names:
+  - {first: Ramona Andreea, last: Turcu}
+  - {first: Ramona-Andreea, last: Turcu}
+ramzi-abbes:
+  names:
+  - {first: Ramzi, last: Abbès}
+  - {first: Ramzi, last: Abbes}
+randall-a-helzerman:
+  names:
+  - {first: Randall A., last: Helzerman}
+  - {first: R. A., last: Helzerman}
+randy-m-kaplan:
+  names:
+  - {first: Randy M., last: Kaplan}
+  - {first: Randy, last: Kaplan}
+ranka-stankovic:
+  names:
+  - {first: Ranka, last: Stanković}
+  - {first: Ranka, last: Stankoviæ}
+ranran-haoran-zhang:
+  comment: Penn State University
+  names:
+  - {first: Ranran Haoran, last: Zhang}
+raoul-n-smith:
+  names:
+  - {first: Raoul N., last: Smith}
+  - {first: Raoul N, last: Smith}
+raphael-hoffmann:
+  names:
+  - {first: Raphael, last: Hoffmann}
+  - {first: Raphael, last: Hoffman}
+raphael-rubino:
+  names:
+  - {first: Raphael, last: Rubino}
+  - {first: Raphaël, last: Rubino}
+raphael-troncy:
+  names:
+  - {first: Raphael, last: Troncy}
+  - {first: Raphaël, last: Troncy}
+raquel-fernandez:
+  names:
+  - {first: Raquel, last: Fernández}
+  - {first: Raquel, last: Fernandez}
+raquel-martinez:
+  names:
+  - {first: Raquel, last: Martínez}
+  - {first: Raquel, last: Martinez}
+rasoul-samad-zadeh-kaljahi:
+  names:
+  - {first: Rasoul, last: Samad Zadeh Kaljahi}
+  - {first: Rasul, last: Samad Zadeh Kaljahi}
+ravikumar-komandur:
+  names:
+  - {first: Ravikumar, last: Komandur}
+  - {first: K, last: Ravikumar}
+ravikumar-kondadadi:
+  names:
+  - {first: Ravikumar, last: Kondadadi}
+  - {first: Ravi, last: Kondadadi}
+  - {first: Ravi Kumar, last: Kondadadi}
+raymond-mooney:
+  names:
+  - {first: Raymond, last: Mooney}
+  - {first: Raymond J., last: Mooney}
+raymond-ng:
+  names:
+  - {first: Raymond, last: Ng}
+  - {first: Raymond T., last: Ng}
+raymond-ptucha:
+  names:
+  - {first: Raymond, last: Ptucha}
+  - {first: Ray, last: Ptucha}
+raymond-wong:
+  names:
+  - {first: Raymond, last: Wong}
+  - {first: Raymond K., last: Wong}
+razvan-bunescu:
+  names:
+  - {first: Razvan, last: Bunescu}
+  - {first: Razvan C., last: Bunescu}
+rebecca-bruce:
+  names:
+  - {first: Rebecca, last: Bruce}
+  - {first: Rebecca F., last: Bruce}
+rebecca-j-passonneau:
+  names:
+  - {first: Rebecca J., last: Passonneau}
+  - {first: Rebecca, last: Passonneau}
+reinald-kim-amplayo:
+  names:
+  - {first: Reinald Kim, last: Amplayo}
+  - {first: Reinald, last: Kim Amplayo}
+reinhard-schaler:
+  names:
+  - {first: Reinhard, last: Schäler}
+  - {first: Reinhard, last: Schaler}
+remi-zajac:
+  names:
+  - {first: Remi, last: Zajac}
+  - {first: Rémi, last: Zajac}
+remko-scha:
+  names:
+  - {first: Remko, last: Scha}
+  - {first: Remko J. H., last: Scha}
+  - {first: R. J. H., last: Scha}
+remo-raffaelli:
+  names:
+  - {first: Remo, last: Raffaelli}
+  - {first: R., last: Raffaelli}
+ren-yuan-lyu:
+  names:
+  - {first: Ren-Yuan, last: Lyu}
+  - {first: Ren-yuan, last: Lyu}
+renata-vieira:
+  names:
+  - {first: Renata, last: Vieira}
+  - {first: R., last: Vieira}
+renate-henschel:
+  names:
+  - {first: Renate, last: Henschel}
+  - {first: R., last: Henschel}
+renato-de-mori:
+  names:
+  - {first: Renato, last: De Mori}
+  - {first: Renato, last: de Mori}
+rene-schneider:
+  names:
+  - {first: René, last: Schneider}
+  - {first: Rene, last: Schneider}
+rene-van-der-wal:
+  names:
+  - {first: Rene, last: van der Wal}
+  - {first: René, last: van der Wal}
+  - {first: Rene, last: Van Der Wal}
+rezarta-islamaj-dogan:
+  names:
+  - {first: Rezarta, last: Islamaj Dogan}
+  - {first: Rezarta, last: Islamaj Doğan}
+ricardo-de-cordoba:
+  names:
+  - {first: Ricardo, last: de Córdoba}
+  - {first: Ricardo, last: de Cordoba}
+ricardo-ribeiro:
+  names:
+  - {first: Ricardo, last: Ribeiro}
+  - {first: Ricardo Daniel, last: Ribeiro}
+riccardo-del-gratta:
+  names:
+  - {first: Riccardo, last: Del Gratta}
+  - {first: Riccardo, last: del Gratta}
+richard-a-hudson:
+  names:
+  - {first: Richard A., last: Hudson}
+  - {first: Richard, last: Hudson}
+richard-a-sharman:
+  names:
+  - {first: Richard A., last: Sharman}
+  - {first: R.A., last: Sharman}
+  - {first: R. A., last: Sharman}
+richard-c-wang:
+  names:
+  - {first: Richard C., last: Wang}
+  - {first: Richard, last: Wang}
+richard-d-boyce:
+  names:
+  - {first: Richard D., last: Boyce}
+  - {first: Richard, last: Boyce}
+richard-e-leibbrandt:
+  names:
+  - {first: Richard E, last: Leibbrandt}
+  - {first: Richard E., last: Leibbrandt}
+richard-evans:
+  names:
+  - {first: Richard, last: Evans}
+  - {first: R., last: Evans}
+richard-f-e-sutcliffe:
+  names:
+  - {first: Richard F. E., last: Sutcliffe}
+  - {first: Richard F.E., last: Sutcliffe}
+richard-farkas:
+  names:
+  - {first: Richárd, last: Farkas}
+  - {first: Richard, last: Farkas}
+richard-fritzson:
+  names:
+  - {first: Richard, last: Fritzson}
+  - {first: Rich, last: Fritzson}
+richard-g-morgan:
+  names:
+  - {first: Richard G., last: Morgan}
+  - {first: Richard, last: Morgan}
+richard-kittredge:
+  names:
+  - {first: Richard, last: Kittredge}
+  - {first: R., last: Kittredge}
+richard-l-lewis:
+  names:
+  - {first: Richard L., last: Lewis}
+  - {first: Richard, last: Lewis}
+richard-m-stern:
+  names:
+  - {first: Richard M., last: Stern}
+  - {first: Richard, last: Stern}
+richard-schwartz:
+  names:
+  - {first: Richard, last: Schwartz}
+  - {first: Rich, last: Schwartz}
+  - {first: R., last: Schwartz}
+richard-sproat:
+  names:
+  - {first: Richard, last: Sproat}
+  - {first: Richard W., last: Sproat}
+richard-tzong-han-tsai:
+  names:
+  - {first: Richard Tzong-Han, last: Tsai}
+  - {first: Tzong-Han, last: Tsai}
+  - {first: Tzong-Han Richard, last: Tsai}
+  - {first: Richard Tzong-han, last: Tsai}
+richard-zuber:
+  names:
+  - {first: Richard, last: Zuber}
+  - {first: R., last: Zuber}
+richmond-h-thomason:
+  names:
+  - {first: Richmond H., last: Thomason}
+  - {first: Richmond, last: Thomason}
+rie-johnson:
+  names:
+  - {first: Rie, last: Johnson}
+  - {first: Rie, last: Ando}
+  - {first: Rie Kubota, last: Ando}
+rihards-kalnins:
+  names:
+  - {first: Rihards, last: Kalniņš}
+  - {first: Rihards, last: Kalnins}
+rihards-krislauks:
+  names:
+  - {first: Rihards, last: Krišlauks}
+  - {first: Rihards, last: Krislauks}
+rik-koncel-kedziorski:
+  names:
+  - {first: Rik, last: Koncel-Kedziorski}
+  - {first: R., last: Koncel-Kedziorski}
+rila-mandala:
+  names:
+  - {first: Rila, last: Mandala}
+  - {first: Mandala, last: Rila}
+rita-nuebel:
+  names:
+  - {first: Rita, last: Nuebel}
+  - {first: Rita, last: Nüebel}
+ritesh-shah:
+  names:
+  - {first: Ritesh, last: Shah}
+  - {first: Ritesh M., last: Shah}
+riyaz-ahmad-bhat:
+  names:
+  - {first: Riyaz Ahmad, last: Bhat}
+  - {first: Riyaz A., last: Bhat}
+riza-theresa-batista-navarro:
+  names:
+  - {first: Riza Theresa, last: Batista-Navarro}
+  - {first: Riza, last: Batista-Navarro}
+robert-bobrow:
+  names:
+  - {first: Robert, last: Bobrow}
+  - {first: Robert J., last: Bobrow}
+  - {first: Rusty, last: Bobrow}
+  - {first: R., last: Bobrow}
+robert-c-berwick:
+  names:
+  - {first: Robert C., last: Berwick}
+  - {first: Robert, last: Berwick}
+  - {first: Robert Cregar, last: Berwick}
+robert-c-moore:
+  names:
+  - {first: Robert C., last: Moore}
+  - {first: Robert, last: Moore}
+  - {first: R. C., last: Moore}
+robert-e-mercer:
+  comment: Univ. of Western Ontario
+  names:
+  - {first: Robert E., last: Mercer}
+  - {first: Robert, last: Mercer}
+  similar:
+  - robert-l-mercer
+robert-frederking:
+  names:
+  - {first: Robert, last: Frederking}
+  - {first: Robert E., last: Frederking}
+robert-gaizauskas:
+  names:
+  - {first: Robert, last: Gaizauskas}
+  - {first: Robert J., last: Gaizauskas}
+  - {first: Rob, last: Gaizauskas}
+  - {first: R., last: Gaizauskas}
+robert-granville:
+  names:
+  - {first: Robert, last: Granville}
+  - {first: Robert Alan, last: Granville}
+robert-i-damper:
+  names:
+  - {first: Robert I., last: Damper}
+  - {first: R.I., last: Damper}
+robert-ingria:
+  names:
+  - {first: Robert, last: Ingria}
+  - {first: R., last: Ingria}
+robert-j-hendley:
+  names:
+  - {first: Robert J., last: Hendley}
+  - {first: Robert, last: Hendley}
+robert-j-kuhns:
+  names:
+  - {first: Robert J., last: Kuhns}
+  - {first: Robert, last: Kuhns}
+robert-l-mercer:
+  comment: IBM
+  names:
+  - {first: Robert L., last: Mercer}
+  - {first: R., last: Mercer}
+  - {first: R. L., last: Mercer}
+  - {first: Robert, last: Mercer}
+  similar:
+  - robert-e-mercer
+robert-m-haralick:
+  names:
+  - {first: Robert M., last: Haralick}
+  - {first: Robert, last: Haralick}
+robert-malouf:
+  names:
+  - {first: Robert, last: Malouf}
+  - {first: Rob, last: Malouf}
+robert-milne:
+  names:
+  - {first: Robert, last: Milne}
+  - {first: Rob, last: Milne}
+robert-p-futrelle:
+  names:
+  - {first: Robert P., last: Futrelle}
+  - {first: Robert, last: Futrelle}
+robert-phillips:
+  names:
+  - {first: Robert, last: Phillips}
+  - {first: Rob, last: Phillips}
+robert-s-belvin:
+  names:
+  - {first: Robert S., last: Belvin}
+  - {first: Robert, last: Belvin}
+  - {first: Robert S., last: Melvin}
+robert-s-swier:
+  names:
+  - {first: Robert S., last: Swier}
+  - {first: Robert, last: Swier}
+robert-stewart:
+  names:
+  - {first: Robert, last: Stewart}
+  - {first: Rob, last: Stewart}
+robert-t-kasper:
+  names:
+  - {first: Robert T., last: Kasper}
+  - {first: Robert, last: Kasper}
+robert-t-schultz:
+  names:
+  - {first: Robert T., last: Schultz}
+  - {first: Robert, last: Schultz}
+robert-w-morris:
+  names:
+  - {first: Robert W., last: Morris}
+  - {first: Robert, last: Morris}
+robert-w-p-luk:
+  names:
+  - {first: Robert W.P., last: Luk}
+  - {first: R.W.P., last: Luk}
+robert-weide:
+  names:
+  - {first: Robert, last: Weide}
+  - {first: R., last: Weide}
+robert-wing-pong-luk:
+  names:
+  - {first: Robert Wing Pong, last: Luk}
+  - {first: Wing-Pong, last: Luk}
+roberta-catizone:
+  names:
+  - {first: Roberta, last: Catizone}
+  - {first: R., last: Catizone}
+roberta-h-merchant:
+  names:
+  - {first: Roberta H., last: Merchant}
+  - {first: Roberta, last: Merchant}
+roberto-barra-chicote:
+  names:
+  - {first: Roberto, last: Barra-Chicote}
+  - {first: Roberto Barra, last: Chicote}
+roberto-basili:
+  names:
+  - {first: Roberto, last: Basili}
+  - {first: R., last: Basili}
+roberto-garigliano:
+  names:
+  - {first: Roberto, last: Garigliano}
+  - {first: R., last: Garigliano}
+roberto-pieraccini:
+  names:
+  - {first: Roberto, last: Pieraccini}
+  - {first: R., last: Pieraccini}
+robin-l-hill:
+  names:
+  - {first: Robin L., last: Hill}
+  - {first: Robin, last: Hill}
+roddy-cowie:
+  names:
+  - {first: Roddy, last: Cowie}
+  - {first: R., last: Cowie}
+roderick-l-johnson:
+  names:
+  - {first: Roderick L., last: Johnson}
+  - {first: R.L., last: Johnson}
+  - {first: R., last: Johnson}
+rodger-kibble:
+  names:
+  - {first: Rodger, last: Kibble}
+  - {first: R., last: Kibble}
+rodney-nielsen:
+  names:
+  - {first: Rodney, last: Nielsen}
+  - {first: Rodney D., last: Nielsen}
+rodolfo-delmonte:
+  names:
+  - {first: Rodolfo, last: Delmonte}
+  - {first: R., last: Delmonte}
+rodrigo-agerri:
+  names:
+  - {first: Rodrigo, last: Agerri}
+  - {first: R., last: Agerri}
+roger-c-schank:
+  names:
+  - {first: Roger C., last: Schank}
+  - {first: Roger, last: Schank}
+roger-evans:
+  names:
+  - {first: Roger, last: Evans}
+  - {first: R, last: Evans}
+roger-k-moore:
+  names:
+  - {first: Roger K., last: Moore}
+  - {first: Roger, last: Moore}
+roger-levy:
+  names:
+  - {first: Roger, last: Levy}
+  - {first: Roger P., last: Levy}
+rohini-k-srihari:
+  names:
+  - {first: Rohini K., last: Srihari}
+  - {first: Rohini, last: Srihari}
+  - {first: K. Rohini, last: Srihari}
+rohit-kate:
+  names:
+  - {first: Rohit, last: Kate}
+  - {first: Rohit J., last: Kate}
+roland-r-hausser:
+  names:
+  - {first: Roland R., last: Hausser}
+  - {first: Roland, last: Hausser}
+romina-altamirano:
+  names:
+  - {first: Romina, last: Altamirano}
+  - {first: Ivana Romina, last: Altamirano}
+romuald-skiba:
+  names:
+  - {first: Romuald, last: Skiba}
+  - {first: R., last: Skiba}
+ron-daniel-jr:
+  names:
+  - {first: Ron, last: 'Daniel, Jr.'}
+  - {first: Ron, last: Daniel}
+  - {first: Ron, last: Daniel Jr.}
+ronald-cole:
+  names:
+  - {first: Ronald, last: Cole}
+  - {first: Ron, last: Cole}
+  - {first: Ronald A., last: Cole}
+  - {first: R., last: Cole}
+ronald-m-kaplan:
+  names:
+  - {first: Ronald M., last: Kaplan}
+  - {first: Ronald, last: Kaplan}
+  - {first: Ron, last: Kaplan}
+ronald-rosenfeld:
+  names:
+  - {first: Ronald, last: Rosenfeld}
+  - {first: R., last: Rosenfeld}
+ronaldo-teixeira-martins:
+  names:
+  - {first: Ronaldo Teixeira, last: Martins}
+  - {first: Ronaldo, last: Martins}
+ronan-g-reilly:
+  names:
+  - {first: Ronan G., last: Reilly}
+  - {first: Ronan, last: Reilly}
+ronan-mac-an-tsaoir:
+  names:
+  - {first: Rónan, last: Mac an tSaoir}
+  - {first: Ronan, last: Mac an tSaoir}
+roni-ben-aharon:
+  names:
+  - {first: Roni, last: Ben Aharon}
+  - {first: Roni, last: Ben-Aharon}
+ronnie-w-smith:
+  names:
+  - {first: Ronnie W., last: Smith}
+  - {first: Ronnie, last: Smith}
+roque-lopez-condori:
+  names:
+  - {first: Roque, last: Lopez Condori}
+  - {first: Roque, last: López}
+rosa-del-gaudio:
+  names:
+  - {first: Rosa, last: Del Gaudio}
+  - {first: Rosa, last: Gaudio}
+rose-catherine-kanjirathinkal:
+  names:
+  - {first: Rose Catherine, last: Kanjirathinkal}
+  - {first: Rose, last: Catherine}
+rosemary-stevenson:
+  names:
+  - {first: Rosemary, last: Stevenson}
+  - {first: R., last: Stevenson}
+roser-sauri:
+  names:
+  - {first: Roser, last: Saurí}
+  - {first: Roser, last: Sauri}
+roxana-girju:
+  names:
+  - {first: Roxana, last: Girju}
+  - {first: Roxana, last: Gîrju}
+roxane-segers:
+  names:
+  - {first: Roxane, last: Segers}
+  - {first: Roxanne, last: Segers}
+roy-tromble:
+  names:
+  - {first: Roy, last: Tromble}
+  - {first: Roy W., last: Tromble}
+ruben-a-proano:
+  names:
+  - {first: Ruben A., last: Proano}
+  - {first: Rubén, last: Proaño}
+  - {first: Rubén A., last: Proaño}
+ruben-izquierdo:
+  names:
+  - {first: Rubén, last: Izquierdo}
+  - {first: Ruben, last: Izquierdo Bevia}
+  - {first: Ruben, last: Izquierdo}
+ruben-san-segundo:
+  names:
+  - {first: Ruben, last: San-Segundo}
+  - {first: Rubén, last: San-Segundo}
+ruben-urizar:
+  names:
+  - {first: Ruben, last: Urizar}
+  - {first: Rubén, last: Urizar}
+  - {first: R., last: Urizar}
+ruhi-sarikaya:
+  names:
+  - {first: Ruhi, last: Sarikaya}
+  - {first: Ruhi, last: Srikaya}
+ruket-cakici:
+  names:
+  - {first: Ruket, last: Çakıcı}
+  - {first: Ruket, last: Cakici}
+  - {first: Ruken, last: Cakici}
+  - {first: Ruken, last: Çakıcı}
+ruli-manurung:
+  names:
+  - {first: Ruli, last: Manurung}
+  - {first: R., last: Manurung}
+rune-saetre:
+  names:
+  - {first: Rune, last: Sætre}
+  - {first: Rune, last: Saetre}
+ruslan-mitkov:
+  names:
+  - {first: Ruslan, last: Mitkov}
+  - {first: R., last: Mitkov}
+russell-beckley:
+  names:
+  - {first: Russell, last: Beckley}
+  - {first: Russ, last: Beckley}
+rutu-mulkar-mehta:
+  names:
+  - {first: Rutu, last: Mulkar-Mehta}
+  - {first: Rutu, last: Mulkar}
+ruy-luiz-milidiu:
+  names:
+  - {first: Ruy Luiz, last: Milidiú}
+  - {first: Ruy, last: Milidiú}
+ruzena-bajcsy:
+  names:
+  - {first: Ruzena, last: Bajcsy}
+  - {first: R., last: Bajcsy}
+ryan-boyd:
+  degree: University of Texas at Austin
+  names:
+  - {first: Ryan, last: Boyd}
+  - {first: Ryan L., last: Boyd}
+  orcid: 0000-0002-1876-6050
+ryan-roth:
+  names:
+  - {first: Ryan, last: Roth}
+  - {first: Ryan, last: M. Roth}
+ryen-white:
+  names:
+  - {first: Ryen, last: White}
+  - {first: Ryan, last: White}
+ryochi-sugimura:
+  names:
+  - {first: Ryochi, last: Sugimura}
+  - {first: R., last: Sugimura}
+ryosuke-takahashi-tohoku:
+  comment: Tohoku
+  disable_name_matching: true
+  names:
+  - {first: Ryosuke, last: Takahashi}
+  orcid: 0009-0002-9887-2781
+s-r-k-branavan:
+  names:
+  - {first: S.R.K., last: Branavan}
+  - {first: S. R. K., last: Branavan}
+s-rajendran:
+  names:
+  - {first: S., last: Rajendran}
+  - {first: Rajendran, last: S}
+s-v-n-vishwanathan:
+  names:
+  - {first: S. V. N., last: Vishwanathan}
+  - {first: S.V.N., last: Vishwanathan}
+sabine-schulte-im-walde:
+  names:
+  - {first: Sabine, last: Schulte im Walde}
+  - {first: Sabine, last: Schulte Im Walde}
+  - {first: Sabine, last: Schulte in Walde}
+sabrina-j-mielke:
+  names:
+  - {first: Sabrina J., last: Mielke}
+  - {first: Sabrina, last: Mielke}
+sachindra-joshi:
+  names:
+  - {first: Sachindra, last: Joshi}
+  - {first: Sachin, last: Joshi}
+sadaoki-furui:
+  names:
+  - {first: Sadaoki, last: Furui}
+  - {first: S., last: Furui}
+sadid-a-hasan:
+  names:
+  - {first: Sadid A., last: Hasan}
+  - {first: Sadid, last: Hasan}
+said-ouatik-el-alaoui:
+  names:
+  - {first: Said Ouatik, last: El Alaoui}
+  - {first: Said, last: Ouatik El Alaoui}
+saif-mohammad:
+  names:
+  - {first: Saif, last: Mohammad}
+  - {first: Saif M., last: Mohammad}
+saim-shin:
+  names:
+  - {first: Saim, last: Shin}
+  - {first: Sa-Im, last: Shin}
+sainik-mahata:
+  names:
+  - {first: Sainik, last: Mahata}
+  - {first: Sainik Kumar, last: Mahata}
+salah-ait-mokhtar:
+  names:
+  - {first: Salah, last: Ait-Mokhtar}
+  - {first: Salah, last: Aït-Mokhtar}
+salah-haamid:
+  names:
+  - {first: Salah, last: Haamid}
+  - {first: S., last: Haamid}
+saliha-azzam:
+  names:
+  - {first: Saliha, last: Azzam}
+  - {first: S., last: Azzam}
+salim-roukos:
+  names:
+  - {first: Salim, last: Roukos}
+  - {first: S., last: Roukos}
+salud-maria-jimenez-zafra:
+  names:
+  - {first: Salud María, last: Jiménez-Zafra}
+  - {first: Salud M., last: Jiménez-Zafra}
+  - {first: Salud M., last: Jiménez Zafra}
+salvador-espana:
+  names:
+  - {first: Salvador, last: España}
+  - {first: S., last: España}
+sameer-pradhan:
+  names:
+  - {first: Sameer, last: Pradhan}
+  - {first: Sameer S., last: Pradhan}
+  - {first: S., last: Pradhan}
+samhaa-r-el-beltagy:
+  names:
+  - {first: Samhaa R., last: El-Beltagy}
+  - {first: Samhaa, last: El-Beltagy}
+samik-ghosh:
+  names:
+  - {first: Samik, last: Ghosh}
+  - {first: Samik, last: Gosh}
+samir-abdelrahman:
+  names:
+  - {first: Samir, last: AbdelRahman}
+  - {first: Samir, last: Abdelrahman}
+samira-ellouze:
+  names:
+  - {first: Samira, last: Ellouze}
+  - {first: Samira Walha, last: Ellouze}
+samuel-bayer:
+  names:
+  - {first: Samuel, last: Bayer}
+  - {first: Sam, last: Bayer}
+samuel-bowman:
+  names:
+  - {first: Samuel, last: Bowman}
+  - {first: Samuel R., last: Bowman}
+  - {first: Sam, last: Bowman}
+samuel-w-k-chan:
+  names:
+  - {first: Samuel W. K., last: Chan}
+  - {first: Samuel W.K., last: Chan}
+  - {first: S. W. K., last: Chan}
+sanda-harabagiu:
+  names:
+  - {first: Sanda, last: Harabagiu}
+  - {first: Sanda M., last: Harabagiu}
+sandipan-dandapat:
+  names:
+  - {first: Sandipan, last: Dandapat}
+  - {first: Sandipan, last: Dandpat}
+sandra-aluisio:
+  names:
+  - {first: Sandra, last: Aluísio}
+  - {first: Sandra Maria, last: Aluísio}
+  - {first: Sandra, last: Aluisio}
+  - {first: Sandra M., last: Aluísio}
+sandra-carberry:
+  names:
+  - {first: Sandra, last: Carberry}
+  - {first: M. Sandra, last: Carberry}
+sandra-collovini:
+  names:
+  - {first: Sandra, last: Collovini}
+  - {first: S., last: Collovini}
+sandra-kubler:
+  names:
+  - {first: Sandra, last: Kübler}
+  - {first: Sandra, last: Kubler}
+  - {first: Sandra, last: Kuebler}
+sandra-milena-castellanos-paez:
+  names:
+  - {first: Sandra, last: Milena Castellanos Páez}
+  - {first: Sandra Castellanos, last: Páez}
+sang-jo-lee:
+  names:
+  - {first: Sang-Jo, last: Lee}
+  - {first: Sang Jo, last: Lee}
+sang-kyu-park:
+  names:
+  - {first: Sang-Kyu, last: Park}
+  - {first: Sangkyu, last: Park}
+sangkeun-jung:
+  names:
+  - {first: Sangkeun, last: Jung}
+  - {first: SangKeun, last: Jung}
+sanja-stajner:
+  names:
+  - {first: Sanja, last: Štajner}
+  - {first: Sanja, last: Stajner}
+sanjeev-khudanpur:
+  names:
+  - {first: Sanjeev, last: Khudanpur}
+  - {first: S., last: Khudanpur}
+sankar-kuppan:
+  names:
+  - {first: Sankar, last: Kuppan}
+  - {first: Sankar, last: K}
+sanket-vaibhav-mehta:
+  names:
+  - {first: Sanket Vaibhav, last: Mehta}
+  - {first: Vaibhav, last: Mehta}
+santiago-cortes-vaillo:
+  names:
+  - {first: Santiago, last: Cortés Vaíllo}
+  - {first: Santiago, last: Cortes}
+  - {first: Santiago Cortés, last: Vaíllo}
+saptarshi-ghosh-cincinnati:
+  degree: University of Cincinnati
+  disable_name_matching: true
+  names:
+  - {first: Saptarshi, last: Ghosh}
+  orcid: 0009-0006-9472-7121
+sarah-e-schwarm:
+  names:
+  - {first: Sarah E., last: Schwarm}
+  - {first: Sarah, last: Schwarm}
+sarah-masud-preum:
+  names:
+  - {first: Sarah Masud, last: Preum}
+  - {first: Sarah, last: Preum}
+  - {first: Sarah M., last: Preum}
+  orcid: 0000-0002-7771-8323
+sarah-moeller:
+  names:
+  - {first: Sarah, last: Moeller}
+  - {first: Sarah R., last: Moeller}
+sarah-taylor:
+  names:
+  - {first: Sarah, last: Taylor}
+  - {first: Sarah M., last: Taylor}
+sarah-vieweg:
+  names:
+  - {first: Sarah, last: Vieweg}
+  - {first: Sarah E., last: Vieweg}
+sari-saba-sadiya:
+  names:
+  - {first: Sari, last: Saba-Sadiya}
+  - {first: Sari, last: Sadiya}
+sarkis-abrilian:
+  names:
+  - {first: Sarkis, last: Abrilian}
+  - {first: S., last: Abrilian}
+sasa-hasan:
+  names:
+  - {first: Saša, last: Hasan}
+  - {first: Sasa, last: Hasan}
+sasa-petrovic:
+  names:
+  - {first: Saša, last: Petrović}
+  - {first: Sasa, last: Petrovic}
+sashka-t-davis:
+  names:
+  - {first: Sashka T., last: Davis}
+  - {first: Sashka, last: Davis}
+sasikumar-m:
+  names:
+  - {first: Sasikumar, last: M}
+  - {first: Sasikumar, last: M.}
+sathish-reddy-indurthi:
+  names:
+  - {first: Sathish Reddy, last: Indurthi}
+  - {first: Sathish, last: Reddy}
+  - {first: Sathish, last: Indurthi}
+sathiya-keerthi-selvaraj:
+  names:
+  - {first: Sathiya Keerthi, last: Selvaraj}
+  - {first: Sathiya, last: Keerthi}
+satoshi-sakai:
+  names:
+  - {first: Satoshi, last: Sakai}
+  - {first: S., last: Sakai}
+satoshi-sato:
+  names:
+  - {first: Satoshi, last: Sato}
+  - {first: S., last: Sato}
+satoshi-shirai:
+  names:
+  - {first: Satoshi, last: Shirai}
+  - {first: Satosi, last: Shirai}
+saul-leon:
+  names:
+  - {first: Saul, last: León}
+  - {first: Saul, last: León Silverio}
+  - {first: Saúl, last: León}
+scott-a-hale:
+  names:
+  - {first: Scott A., last: Hale}
+  - {first: Scott, last: Hale}
+scott-c-stoness:
+  names:
+  - {first: Scott C., last: Stoness}
+  - {first: Scott, last: Stoness}
+scott-cyphers:
+  names:
+  - {first: Scott, last: Cyphers}
+  - {first: D. Scott, last: Cyphers}
+scott-f-kiesling:
+  names:
+  - {first: Scott F., last: Kiesling}
+  - {first: Scott, last: Kiesling}
+scott-s-l-piao:
+  names:
+  - {first: Scott S.L., last: Piao}
+  - {first: Scott, last: Piao}
+  - {first: Scott S. L., last: Piao}
+  - {first: S. L., last: Piao}
+se-young-park:
+  names:
+  - {first: Se-Young, last: Park}
+  - {first: Se Young, last: Park}
+seamus-lawless:
+  names:
+  - {first: Seamus, last: Lawless}
+  - {first: Séamus, last: Lawless}
+sean-a-fulop:
+  names:
+  - {first: Sean A., last: Fulop}
+  - {first: Sean, last: Fulop}
+sean-boisen:
+  names:
+  - {first: Sean, last: Boisen}
+  - {first: S., last: Boisen}
+sebastian-drude:
+  names:
+  - {first: Sebastian, last: Drude}
+  - {first: S., last: Drude}
+sebastian-g-m-handschke:
+  names:
+  - {first: Sebastian G. M., last: Händschke}
+  - {first: Sebastian G.M., last: Händschke}
+sebastian-goeser:
+  names:
+  - {first: Sebastian, last: Goeser}
+  - {first: S., last: Goeser}
+sebastian-pado:
+  names:
+  - {first: Sebastian, last: Padó}
+  - {first: Sebastian, last: Pado}
+sebastian-pena-saldarriaga:
+  names:
+  - {first: Sebastián Peña, last: Saldarriaga}
+  - {first: Peña, last: Saldarriaga}
+  - {first: Sebastian, last: Peña Saldarriaga}
+sebastian-stuker:
+  names:
+  - {first: Sebastian, last: Stüker}
+  - {first: Sebastian, last: Stueker}
+sebastien-flavier:
+  names:
+  - {first: Sébastien, last: Flavier}
+  - {first: Sebastien, last: Flavier}
+sebastien-fournier:
+  names:
+  - {first: Sébastien, last: Fournier}
+  - {first: Sebastien, last: Fournier}
+sebastien-le-maguer:
+  names:
+  - {first: Sébastien, last: Le Maguer}
+  - {first: Sébastien Le, last: Maguer}
+see-kiong-ng:
+  names:
+  - {first: See Kiong, last: Ng}
+  - {first: See-Kiong, last: Ng}
+seiichi-nakagawa:
+  names:
+  - {first: Seiichi, last: Nakagawa}
+  - {first: S., last: Nakagawa}
+selcuk-kopru:
+  names:
+  - {first: Selcuk, last: Kopru}
+  - {first: Selçuk, last: Köprü}
+seng-cho-t-chou:
+  names:
+  - {first: Seng-Cho T., last: Chou}
+  - {first: Seng-cho T., last: Chou}
+seok-bae-jang:
+  names:
+  - {first: Seok Bae, last: Jang}
+  - {first: Seok B., last: Jang}
+septina-dian-larasati:
+  names:
+  - {first: Septina Dian, last: Larasati}
+  - {first: Septina, last: Larasati}
+serge-a-yablonsky:
+  names:
+  - {first: Serge A., last: Yablonsky}
+  - {first: Serge, last: Yablonsky}
+sergei-nirenburg:
+  names:
+  - {first: Sergei, last: Nirenburg}
+  - {first: Sergei, last: Nirenberg}
+  - {first: S., last: Nirenburg}
+sergey-o-kuznetsov:
+  names:
+  - {first: Sergey O., last: Kuznetsov}
+  - {first: Sergei O., last: Kuznetsov}
+sergey-v-pakhomov:
+  names:
+  - {first: Sergey V., last: Pakhomov}
+  - {first: Sergey, last: Pakhomov}
+sergio-barrachina:
+  names:
+  - {first: Sergio, last: Barrachina}
+  - {first: S., last: Barrachina}
+sergio-jose-rodriguez-mendez:
+  comment: NYCU
+  names:
+  - {first: Sergio José, last: Rodríguez Méndez}
+  - {first: Sergio J., last: Rodriguez Mendez}
+  orcid: 0000-0001-7203-8399
+sergio-ortiz-rojas:
+  names:
+  - {first: Sergio, last: Ortiz Rojas}
+  - {first: Sergio, last: Ortiz-Rojas}
+  - {first: Sergio Ortiz, last: Rojas}
+serguei-pakhomov:
+  names:
+  - {first: Serguei, last: Pakhomov}
+  - {first: Serguei V., last: Pakhomov}
+sethserey-sam:
+  names:
+  - {first: Sethserey, last: Sam*’}
+  - {first: Sethserey, last: Sam}
+seungho-cha:
+  names:
+  - {first: Seungho, last: Cha}
+  - {first: S., last: Cha}
+shachi-h-kumar:
+  names:
+  - {first: Shachi, last: H. Kumar}
+  - {first: Shachi H, last: Kumar}
+shafiq-joty:
+  names:
+  - {first: Shafiq, last: Joty}
+  - {first: Shafiq R., last: Joty}
+shannon-l-spruit:
+  names:
+  - {first: Shannon L., last: Spruit}
+  - {first: Shannon, last: Spruit}
+sharada-prasanna-mohanty:
+  names:
+  - {first: Sharada Prasanna, last: Mohanty}
+  - {first: Sharada, last: Mohanty}
+sharath-chandra-guntuku:
+  names:
+  - {first: Sharath, last: Chandra Guntuku}
+  - {first: Sharath Chandra, last: Guntuku}
+shari-landes:
+  names:
+  - {first: Shari, last: Landes}
+  - {first: Shari, last: Land}
+sharon-goldwater:
+  names:
+  - {first: Sharon, last: Goldwater}
+  - {first: Sharon J., last: Goldwater}
+sharon-oviatt:
+  names:
+  - {first: Sharon, last: Oviatt}
+  - {first: Sharon L., last: Oviatt}
+sharon-small:
+  names:
+  - {first: Sharon, last: Small}
+  - {first: Sharon, last: Gower Small}
+shashank-gupta-uiuc:
+  disable_name_matching: true
+  names:
+  - {first: Shashank, last: Gupta}
+  orcid: 0000-0002-3683-3739
+shay-b-cohen:
+  names:
+  - {first: Shay B., last: Cohen}
+  - {first: Shay, last: Cohen}
+sheila-c-m-de-sousa:
+  names:
+  - {first: Sheila, last: C. M. de Sousa}
+  - {first: Sheila C.M., last: de Sousa}
+sheila-r-glasbey:
+  names:
+  - {first: Sheila R., last: Glasbey}
+  - {first: Sheila, last: Glasbey}
+  - {first: S.R., last: Glasbey}
+shelley-ching-yu-hsieh:
+  names:
+  - {first: Shelley Ching-Yu, last: Hsieh}
+  - {first: Ching-yu, last: Hsieh}
+  - {first: Shelley Ching-yu, last: Hsieh}
+  - {first: Ching-yu Shelley, last: Hsieh}
+sheng-he-sun:
+  names:
+  - {first: Sheng-he, last: Sun}
+  - {first: Sheng-He, last: Sun}
+shengjie-li:
+  comment: University of Texas at Dallas
+  names:
+  - {first: Shengjie, last: Li}
+  orcid: 0000-0002-5442-5464
+shengjie-li-peking:
+  comment: Peking University
+  names:
+  - {first: Shengjie, last: Li}
+  orcid: 0000-0003-3489-9125
+sherri-condon:
+  names:
+  - {first: Sherri, last: Condon}
+  - {first: Sherri L., last: Condon}
+shervin-malmasi:
+  names:
+  - {first: Shervin, last: Malmasi}
+  - {first: Shevin, last: Malmasi}
+sheryl-young:
+  names:
+  - {first: Sheryl, last: Young}
+  - {first: Sheryl R., last: Young}
+shih-fu-chang:
+  names:
+  - {first: Shih-Fu, last: Chang}
+  - {first: Shih-fu, last: Chang}
+shih-min-li:
+  names:
+  - {first: Shih-Min, last: Li}
+  - {first: Shi-Min, last: Li}
+shih-ping-wang:
+  names:
+  - {first: Shih-ping, last: Wang}
+  - {first: Shih-Ping, last: Wang}
+shih-ting-huang:
+  names:
+  - {first: Shih-Ting, last: Huang}
+  - {first: Shih-ting, last: Huang}
+  - {first: Shi-Ting, last: Huang}
+shikhar-kumar-sarma-gu:
+  degree: Gauhati University
+  disable_name_matching: true
+  names:
+  - {first: Shikhar Kr., last: Sarma}
+  - {first: Shikhar, last: Sarma}
+  - {first: Shikhar, last: Sharma}
+  - {first: Shikhar Kr, last: Sarma}
+  - {first: Shikhar, last: Kumar Sarma}
+  - {first: Shikhar Kumar, last: Sarma}
+  orcid: 0000-0002-9495-1901
+shin-ichiro-kamei:
+  names:
+  - {first: Shin-ichiro, last: Kamei}
+  - {first: Shinichiro, last: Kamei}
+shin-ya-amano:
+  names:
+  - {first: Shin-ya, last: Amano}
+  - {first: Sin-ya, last: Amano}
+shinichi-ando:
+  names:
+  - {first: Shinichi, last: Ando}
+  - {first: Sinichi, last: Ando}
+  - {first: Shin-ichi, last: Ando}
+  - {first: Shin-Ichi, last: Ando}
+shinichi-doi:
+  names:
+  - {first: Shinichi, last: Doi}
+  - {first: Shin’ichi, last: Doi}
+shirley-dita:
+  names:
+  - {first: Shirley, last: Dita}
+  - {first: Shirley N., last: Dita}
+shivashankar-subramanian:
+  names:
+  - {first: Shivashankar, last: Subramanian}
+  - {first: S., last: Shivashankar}
+shixi-fan:
+  names:
+  - {first: Shixi, last: Fan}
+  - {first: ShiXi, last: Fan}
+shoichi-matsunaga:
+  names:
+  - {first: Shoichi, last: Matsunaga}
+  - {first: Sho-ichi, last: Matsunaga}
+shoichi-yokoyama:
+  names:
+  - {first: Shoichi, last: Yokoyama}
+  - {first: S., last: Yokoyama}
+shou-de-lin:
+  names:
+  - {first: Shou-De, last: Lin}
+  - {first: Shou-de, last: Lin}
+shozo-makino:
+  names:
+  - {first: Shozo, last: Makino}
+  - {first: S., last: Makino}
+shrikanth-narayanan:
+  names:
+  - {first: Shrikanth, last: Narayanan}
+  - {first: Shri, last: Narayanan}
+  - {first: Shrikanth S., last: Narayanan}
+shu-kai-hsieh:
+  names:
+  - {first: Shu-Kai, last: Hsieh}
+  - {first: Shu-kai, last: Hsieh}
+  - {first: ShuKai, last: Hsieh}
+shu-nakazato:
+  names:
+  - {first: Shu, last: Nakazato}
+  - {first: S., last: Nakazato}
+shu-yang-ubc:
+  comment: University of British Columbia
+  disable_name_matching: true
+  names:
+  - {first: Shu, last: Yang}
+  orcid: 0000-0002-8507-7191
+shu-yen-lin:
+  names:
+  - {first: Shu-Yen, last: Lin}
+  - {first: Shu-yen, last: Lin}
+shuan-fan-huang:
+  names:
+  - {first: Shuan-fan, last: Huang}
+  - {first: Shuan-Fan, last: Huang}
+shuanglong-li:
+  names:
+  - {first: Shuanglong, last: Li}
+  - {first: ShuangLong, last: Li}
+shuichi-itahashi:
+  names:
+  - {first: Shuichi, last: Itahashi}
+  - {first: Shuich, last: Itahashi}
+shun-der-chen:
+  names:
+  - {first: Shun-Der, last: Chen}
+  - {first: Shun-Der, last: Cheng}
+shun-ya-fukunaga:
+  names:
+  - {first: Shun-ya, last: Fukunaga}
+  - {first: Shunya, last: Fukunaga}
+shunsuke-uemura:
+  names:
+  - {first: Shunsuke, last: Uemura}
+  - {first: Syunsuke, last: Uemura}
+shunya-iwasawa:
+  names:
+  - {first: Shun’ya, last: Iwasawa}
+  - {first: Shunya, last: Iwasawa}
+shyam-sundar-agrawal:
+  names:
+  - {first: Shyam Sundar, last: Agrawal}
+  - {first: Shyam, last: Agrawal}
+sia-kolkovska:
+  names:
+  - {first: Sia, last: Kolkovska}
+  - {first: Siya, last: Kolkovska}
+siaw-fong-chung:
+  names:
+  - {first: Siaw-Fong, last: Chung}
+  - {first: Siaw Fong, last: Chung}
+sida-i-wang:
+  names:
+  - {first: Sida I., last: Wang}
+  - {first: Sida, last: Wang}
+siddharth-jain:
+  names:
+  - {first: Siddharth, last: Jain}
+  - {first: Siddhanth, last: Jain}
+silvia-hansen-schirra:
+  names:
+  - {first: Silvia, last: Hansen-Schirra}
+  - {first: Silvia, last: Hansen}
+silvia-moraes:
+  names:
+  - {first: Sílvia, last: Moraes}
+  - {first: Silvia, last: Moraes}
+silvia-necsulescu:
+  names:
+  - {first: Silvia, last: Necşulescu}
+  - {first: Silvia, last: Necsulescu}
+silvia-vazquez:
+  names:
+  - {first: Silvia, last: Vázquez}
+  - {first: Silvia Rodríguez, last: Vázquez}
+silvio-cordeiro:
+  names:
+  - {first: Silvio, last: Cordeiro}
+  - {first: Silvio Ricardo, last: Cordeiro}
+silviu-cucerzan:
+  names:
+  - {first: Silviu, last: Cucerzan}
+  - {first: Silviu-Petru, last: Cucerzan}
+simeon-junker:
+  names:
+  - {first: Simeon, last: Junker}
+  - {first: Simeon, last: Schüz}
+simon-benigeri:
+  names:
+  - {first: Simon, last: Benigeri}
+  - {first: Simon, last: Ben Igeri}
+simon-corston-oliver:
+  names:
+  - {first: Simon, last: Corston-Oliver}
+  - {first: Simon H., last: Corston-Oliver}
+simon-dobrisek:
+  names:
+  - {first: Simon, last: Dobrisek}
+  - {first: Simon, last: Dobrišek}
+simon-suster:
+  names:
+  - {first: Simon, last: Suster}
+  - {first: Simon, last: Šuster}
+simone-paolo-ponzetto:
+  names:
+  - {first: Simone Paolo, last: Ponzetto}
+  - {first: Simone P., last: Ponzetto}
+  - {first: Simone, last: Ponzetto}
+simonetta-montemagni:
+  names:
+  - {first: Simonetta, last: Montemagni}
+  - {first: S., last: Montemagni}
+sina-zarriess:
+  names:
+  - {first: Sina, last: Zarrieß}
+  - {first: Sina, last: Zarriess}
+sisay-fissaha-adafre:
+  names:
+  - {first: Sisay, last: Fissaha Adafre}
+  - {first: Sisay, last: Fissaha}
+  - {first: Sisay Fissaha, last: Adafre}
+sivaji-bandyopadhyay:
+  names:
+  - {first: Sivaji, last: Bandyopadhyay}
+  - {first: Sivaji, last: Bandopadhyay}
+  - {first: Sivaju, last: Bandyopadhyay}
+  - {first: Sivaji, last: B}
+sjur-moshagen:
+  names:
+  - {first: Sjur, last: Moshagen}
+  - {first: Sjur Nørstebø, last: Moshagen}
+  - {first: Sjur N., last: Moshagen}
+sobha-l:
+  names:
+  - {first: Sobha, last: L}
+  - {first: L., last: Sobha}
+sobha-lalitha-devi:
+  names:
+  - {first: Sobha, last: Lalitha Devi}
+  - {first: Lalitha Devi, last: Sobha}
+  - {first: Sobha Lalitha, last: Devi}
+sofia-gustafson-capkova:
+  names:
+  - {first: Sofia, last: Gustafson-Capková}
+  - {first: Sofia, last: Gustafson Capková}
+sofia-stronbergsson:
+  names:
+  - {first: Sofia, last: Strönbergsson}
+  - {first: Sofia, last: Strömbergsson}
+sofie-johansson-kokkinakis:
+  names:
+  - {first: Sofie Johansson, last: Kokkinakis}
+  - {first: Sofie, last: Johansson Kokkinakis}
+solange-oliveira-rezende:
+  names:
+  - {first: Solange, last: Oliveira Rezende}
+  - {first: Solange, last: Rezende}
+solomon-teferra-abate:
+  names:
+  - {first: Solomon Teferra, last: Abate}
+  - {first: Solomon, last: Teferra Abate}
+  - {first: Solomon, last: Teferra}
+somayajulu-sripada:
+  names:
+  - {first: Somayajulu, last: Sripada}
+  - {first: Somayajulu G., last: Sripada}
+  - {first: Somayajula G., last: Sripada}
+  - {first: Somayajulu Gowri, last: Sripada}
+somayeh-jafaritazehjani:
+  names:
+  - {first: Somayeh, last: Jafaritazehjani}
+  - {first: Somayeh, last: Jafaritazehjan}
+song-chun-zhu:
+  names:
+  - {first: Song-chun, last: Zhu}
+  - {first: Song-Chun, last: Zhu}
+sonia-frota:
+  names:
+  - {first: Sónia, last: Frota}
+  - {first: S., last: Frota}
+sonia-vazquez:
+  names:
+  - {first: Sonia, last: Vázquez}
+  - {first: Sonia, last: Vazquez}
+  - {first: Sonia, last: Vázquez Pérez}
+sonja-bosch:
+  names:
+  - {first: Sonja, last: Bosch}
+  - {first: Sonja E., last: Bosch}
+sonja-niessen:
+  names:
+  - {first: Sonja, last: Nießen}
+  - {first: Sonja, last: Niessen}
+  - {first: S., last: Nießen}
+sophia-y-m-lee:
+  names:
+  - {first: Sophia Y. M., last: Lee}
+  - {first: Sophia Y.M., last: Lee}
+sophia-yat-mei-lee:
+  names:
+  - {first: Sophia Yat Mei, last: Lee}
+  - {first: Yat-Mei, last: Lee}
+sophie-rosset:
+  names:
+  - {first: Sophie, last: Rosset}
+  - {first: S., last: Rosset}
+sotaro-kita:
+  names:
+  - {first: Sotaro, last: Kita}
+  - {first: S., last: Kita}
+soumil-mandal:
+  names:
+  - {first: Soumil, last: Mandal}
+  - {first: Soumik, last: Mandal}
+soumya-sankar-ghosh:
+  names:
+  - {first: Soumya Sankar, last: Ghosh}
+  - {first: Soumya, last: Ghosh}
+sowmya-s-sundaram:
+  names:
+  - {first: Sowmya S., last: Sundaram}
+  - {first: Sowmya S, last: Sundaram}
+spela-vintar:
+  names:
+  - {first: Špela, last: Vintar}
+  - {first: Spela, last: Vintar}
+spyros-raptis:
+  names:
+  - {first: Spyros, last: Raptis}
+  - {first: S., last: Raptis}
+srini-narayanan:
+  names:
+  - {first: Srini, last: Narayanan}
+  - {first: Srinivas, last: Narayanan}
+srinivas-bangalore:
+  names:
+  - {first: Srinivas, last: Bangalore}
+  - {first: B., last: Srinivas}
+  - {first: '', last: Srinivas}
+srinivasan-janarthanam:
+  names:
+  - {first: Srinivasan, last: Janarthanam}
+  - {first: Srini, last: Janarthanam}
+ssu-cheng-chen:
+  names:
+  - {first: Ssu-Cheng, last: Chen}
+  - {first: Su-Cheng, last: Chen}
+stan-c-kwasny:
+  names:
+  - {first: Stan C., last: Kwasny}
+  - {first: Stan, last: Kwasny}
+stan-szpakowicz:
+  names:
+  - {first: Stan, last: Szpakowicz}
+  - {first: Stanislaw, last: Szpakowicz}
+  - {first: Stanisław, last: Szpakowicz}
+stanley-f-chen:
+  names:
+  - {first: Stanley F., last: Chen}
+  - {first: Stanley, last: Chen}
+stanley-j-rosenschein:
+  names:
+  - {first: Stanley J., last: Rosenschein}
+  - {first: Stanley, last: Rosenschein}
+  - {first: Stan, last: Rosenschein}
+stasa-vujicic-stankovic:
+  names:
+  - {first: Stasa, last: Vujicic-Stankovic}
+  - {first: Staša Vujičić, last: Stanković}
+  - {first: Staša, last: Vujičić Stanković}
+stavroula-evita-fotinea:
+  names:
+  - {first: Stavroula-Evita, last: Fotinea}
+  - {first: S.-E., last: Fotinea}
+stefan-benus:
+  names:
+  - {first: Štefan, last: Beňuš}
+  - {first: Stefan, last: Benus}
+  - {first: S̆tefan, last: Ben̆us̆}
+stefan-daniel-dumitrescu:
+  names:
+  - {first: Stefan Daniel, last: Dumitrescu}
+  - {first: Ștefan Daniel, last: Dumitrescu}
+  - {first: Ștefan, last: Dumitrescu}
+stefan-darmoni:
+  names:
+  - {first: Stéfan, last: Darmoni}
+  - {first: Stefan, last: Darmoni}
+stefan-l-frank:
+  names:
+  - {first: Stefan L., last: Frank}
+  - {first: Stefan, last: Frank}
+stefan-muller:
+  names:
+  - {first: Stefan, last: Müller}
+  - {first: Stefan, last: Muller}
+  - {first: Stefan, last: Mueller}
+stefan-rued:
+  names:
+  - {first: Stefan, last: Rued}
+  - {first: Stefan, last: Rüd}
+stefan-schulz:
+  names:
+  - {first: Stefan, last: Schulz}
+  - {first: Stefan, last: Schultz}
+stefan-steidl:
+  names:
+  - {first: Stefan, last: Steidl}
+  - {first: S., last: Steidl}
+stefan-wagner:
+  names:
+  - {first: Stefan, last: Wagner}
+  - {first: Stefan, last: Wager}
+stefanie-shattuck-hufnagel:
+  names:
+  - {first: Stefanie, last: Shattuck-Hufnagel}
+  - {first: S. Shattuck, last: Hufnagel}
+stefano-dei-rossi:
+  names:
+  - {first: Stefano Dei, last: Rossi}
+  - {first: Stefano, last: Dei Rossi}
+stefano-zanobini:
+  names:
+  - {first: Stefano, last: Zanobini}
+  - {first: S., last: Zanobini}
+stelios-piperidis:
+  names:
+  - {first: Stelios, last: Piperidis}
+  - {first: Stelios, last: Piperdis}
+  - {first: S., last: Piperidis}
+stella-markantonatou:
+  names:
+  - {first: Stella, last: Markantonatou}
+  - {first: S., last: Markantonatou}
+stephan-m-kerpedjiev:
+  names:
+  - {first: Stephan M., last: Kerpedjiev}
+  - {first: Stephan, last: Kerpedjiev}
+stephan-vogel:
+  names:
+  - {first: Stephan, last: Vogel}
+  - {first: Stephen, last: Vogel}
+  - {first: S., last: Vogel}
+stephane-ferrari:
+  names:
+  - {first: Stéphane, last: Ferrari}
+  - {first: Stephane, last: Ferrari}
+stephane-meystre:
+  names:
+  - {first: Stephane, last: Meystre}
+  - {first: Stéphane, last: Meystre}
+stephanie-lukin:
+  names:
+  - {first: Stephanie, last: Lukin}
+  - {first: Stephanie M., last: Lukin}
+stephanie-s-everett:
+  names:
+  - {first: Stephanie S., last: Everett}
+  - {first: Stephanie, last: Everett}
+stephanie-seneff:
+  names:
+  - {first: Stephanie, last: Seneff}
+  - {first: S., last: Seneff}
+stephanie-strassel:
+  names:
+  - {first: Stephanie, last: Strassel}
+  - {first: Stephanie M., last: Strassel}
+stephen-a-della-pietra:
+  names:
+  - {first: Stephen A., last: Della Pietra}
+  - {first: Stephen, last: Della Pietra}
+  - {first: Stephen, last: DellaPietra}
+  - {first: S., last: Della Pietra}
+stephen-boxwell:
+  names:
+  - {first: Stephen, last: Boxwell}
+  - {first: Stephen A., last: Boxwell}
+stephen-c-levinson:
+  comment: Max-Planck-Institute for Psycholinguistics
+  names:
+  - {first: Stephen C., last: Levinson}
+  - {first: St., last: Levinson}
+  similar:
+  - stephen-e-levinson
+stephen-e-levinson:
+  comment: Bell Labs
+  names:
+  - {first: Stephen E., last: Levinson}
+  - {first: S. E., last: Levinson}
+  similar:
+  - stephen-c-levinson
+stephen-j-green:
+  names:
+  - {first: Stephen J., last: Green}
+  - {first: Stephen, last: Green}
+  - {first: Stephen J, last: Green}
+stephen-kunath:
+  names:
+  - {first: Stephen, last: Kunath}
+  - {first: Stephen A., last: Kunath}
+stephen-l-gallant:
+  names:
+  - {first: Stephen L., last: Gallant}
+  - {first: Stephen, last: Gallant}
+stephen-pulman:
+  names:
+  - {first: Stephen, last: Pulman}
+  - {first: Stephen G., last: Pulman}
+  - {first: S.G., last: Pulman}
+  - {first: S. G., last: Pulman}
+stephen-soderland:
+  names:
+  - {first: Stephen, last: Soderland}
+  - {first: S., last: Soderland}
+steve-austin:
+  names:
+  - {first: Steve, last: Austin}
+  - {first: S., last: Austin}
+steve-j-young:
+  names:
+  - {first: Steve J., last: Young}
+  - {first: S.J., last: Young}
+steve-whittaker:
+  names:
+  - {first: Steve, last: Whittaker}
+  - {first: S., last: Whittaker}
+steve-young:
+  names:
+  - {first: Steve, last: Young}
+  - {first: Steven, last: Young}
+steven-abney:
+  names:
+  - {first: Steven, last: Abney}
+  - {first: Steve, last: Abney}
+  - {first: Steven P., last: Abney}
+  - {first: S., last: Abney}
+steven-feiner:
+  names:
+  - {first: Steven, last: Feiner}
+  - {first: Steven K., last: Feiner}
+steven-h-weinberger:
+  names:
+  - {first: Steven H., last: Weinberger}
+  - {first: Steven, last: Weinberger}
+steven-j-maiorano:
+  names:
+  - {first: Steven J., last: Maiorano}
+  - {first: Steve, last: Maiorano}
+  - {first: Steven, last: Maiorano}
+  - {first: Steve, last: Moiorano}
+steven-jm-jones:
+  names:
+  - {first: Steven JM, last: Jones}
+  - {first: Steven, last: Jones}
+steven-krauwer:
+  names:
+  - {first: Steven, last: Krauwer}
+  - {first: S., last: Krauwer}
+steven-l-lytinen:
+  names:
+  - {first: Steven L., last: Lytinen}
+  - {first: Steven, last: Lytinen}
+steven-moran:
+  names:
+  - {first: Steven, last: Moran}
+  - {first: Steve, last: Moran}
+steven-roth:
+  names:
+  - {first: Steven, last: Roth}
+  - {first: Steven F., last: Roth}
+steven-skiena:
+  names:
+  - {first: Steven, last: Skiena}
+  - {first: Steve, last: Skiena}
+stuart-c-shapiro:
+  names:
+  - {first: Stuart C., last: Shapiro}
+  - {first: S.C., last: Shapiro}
+stuart-m-shieber:
+  names:
+  - {first: Stuart M., last: Shieber}
+  - {first: Stuart, last: Shieber}
+stylianos-bakamidis:
+  names:
+  - {first: Stylianos, last: Bakamidis}
+  - {first: S., last: Bakamidis}
+su-youn-yoon:
+  names:
+  - {first: Su-Youn, last: Yoon}
+  - {first: Su-youn, last: Yoon}
+subbarao-k-v:
+  names:
+  - {first: Subbarao K., last: V}
+  - {first: K.V., last: Subbarao}
+  - {first: Subbarao K, last: V.}
+sudip-kumar-naskar:
+  names:
+  - {first: Sudip Kumar, last: Naskar}
+  - {first: Sudip, last: Kumar Naskar}
+  - {first: Sudip, last: Naskar}
+sue-ellen-wright:
+  names:
+  - {first: Sue Ellen, last: Wright}
+  - {first: Sue, last: Wright}
+sue-j-ker:
+  names:
+  - {first: Sue J., last: Ker}
+  - {first: Sur-Jin, last: Ker}
+  - {first: Su-Jin, last: Ker}
+  - {first: Sue-Jin, last: Ker}
+  - {first: Sue-jin, last: Ker}
+suen-caesar-lun:
+  names:
+  - {first: Suen Caesar, last: Lun}
+  - {first: Caesar Suen, last: Lun}
+  - {first: Caesar, last: Lun}
+  - {first: S. Caesar, last: Lun}
+  - {first: C, last: Lun}
+suguru-saito:
+  names:
+  - {first: Suguru, last: Saitô}
+  - {first: Suguru, last: Saito}
+sujatha-das-gollapalli:
+  names:
+  - {first: Sujatha Das, last: Gollapalli}
+  - {first: Sujatha, last: Das Gollapalli}
+  - {first: Sujatha, last: Das}
+sukhada:
+  names:
+  - {first: '', last: Sukhada}
+  - {first: Sukhada, last: Palkar}
+sung-dong-kim:
+  names:
+  - {first: Sung Dong, last: Kim}
+  - {first: Sung-Dong, last: Kim}
+sung-fung-tsai:
+  names:
+  - {first: Sung-Fung, last: Tsai}
+  - {first: Sung-Feng, last: Tsai}
+sung-hyon-myaeng:
+  names:
+  - {first: Sung-Hyon, last: Myaeng}
+  - {first: Sung Hyon, last: Myaeng}
+  - {first: Sung H., last: Myaeng}
+  - {first: Sung-hyon, last: Myaeng}
+sung-young-jung:
+  names:
+  - {first: Sung Young, last: Jung}
+  - {first: Sung-Young, last: Jung}
+sunghwan-mac-kim:
+  names:
+  - {first: Sunghwan Mac, last: Kim}
+  - {first: Sunghwan, last: Kim}
+sungjin-lee:
+  names:
+  - {first: Sungjin, last: Lee}
+  - {first: Sung-Jin, last: Lee}
+sungrim-moon:
+  names:
+  - {first: Sungrim, last: Moon}
+  - {first: SungRim, last: Moon}
+surya-ganesh:
+  names:
+  - {first: Surya, last: Ganesh}
+  - {first: Surya Ganesh, last: V}
+  - {first: Surya Ganesh, last: Veeravalli}
+susan-armstrong:
+  names:
+  - {first: Susan, last: Armstrong}
+  - {first: Susan, last: Warwick-Armstrong}
+  - {first: Susan, last: Warwick}
+  - {first: S., last: Warwick-Armstrong}
+susan-e-brennan:
+  names:
+  - {first: Susan E., last: Brennan}
+  - {first: Susan, last: Brennan}
+susan-haller:
+  names:
+  - {first: Susan, last: Haller}
+  - {first: Susan M., last: Haller}
+  - {first: S.M., last: Haller}
+susan-p-converse:
+  names:
+  - {first: Susan P., last: Converse}
+  - {first: Susan, last: Converse}
+susan-w-mcroy:
+  names:
+  - {first: Susan W., last: McRoy}
+  - {first: Susan, last: McRoy}
+susan-w-talbott:
+  names:
+  - {first: Susan W., last: Talbott}
+  - {first: Susan, last: Talbott}
+susan-windisch-brown:
+  names:
+  - {first: Susan Windisch, last: Brown}
+  - {first: Susan, last: Windisch Brown}
+  - {first: Susan, last: Brown}
+  - {first: Susan W., last: Brown}
+susana-early:
+  names:
+  - {first: Susana, last: Early}
+  - {first: S., last: Early}
+susana-sotelo:
+  names:
+  - {first: Susana, last: Sotelo}
+  - {first: Susana Sotelo, last: Docio}
+  orcid: 0000-0002-0067-7957
+susann-luperfoy:
+  names:
+  - {first: Susann, last: LuperFoy}
+  - {first: Susann, last: Luperfoy}
+suzan-uskudarli:
+  names:
+  - {first: Suzan, last: Uskudarli}
+  - {first: Suzan, last: Üsküdarlı}
+suzana-ilic:
+  names:
+  - {first: Suzana, last: Ilic}
+  - {first: Suzana, last: Ilić}
+suzanne-liebowitz-taylor:
+  names:
+  - {first: Suzanne Liebowitz, last: Taylor}
+  - {first: Suzanne, last: Liebowitz}
+sv-ramanan:
+  names:
+  - {first: Sv, last: Ramanan}
+  - {first: SV, last: Ramanan}
+sven-buechel:
+  names:
+  - {first: Sven, last: Buechel}
+  - {first: Sven, last: Büchel}
+sylvain-galliano:
+  names:
+  - {first: Sylvain, last: Galliano}
+  - {first: S., last: Galliano}
+sylvana-sofkova-hashemi:
+  names:
+  - {first: Sylvana, last: Sofkova Hashemi}
+  - {first: Sylvana, last: Sofkova}
+szu-ting-yi:
+  names:
+  - {first: Szu-ting, last: Yi}
+  - {first: Szuting, last: Yi}
+t-florian-jaeger:
+  names:
+  - {first: T. Florian, last: Jaeger}
+  - {first: Florian, last: Jaeger}
+t-mark-ellison:
+  names:
+  - {first: T. Mark, last: Ellison}
+  - {first: T. M., last: Ellison}
+t-t-mirnalinee:
+  names:
+  - {first: T. T., last: Mirnalinee}
+  - {first: Mirnalinee, last: T T}
+  - {first: T T, last: Mirnalinee}
+t-v-geetha:
+  names:
+  - {first: T. V., last: Geetha}
+  - {first: Geetha, last: T V}
+  - {first: T V, last: Geetha}
+tafseer-ahmed:
+  names:
+  - {first: Tafseer, last: Ahmed}
+  - {first: Tafseer, last: Ahmed Khan}
+takahiro-ohno:
+  names:
+  - {first: Takahiro, last: Ohno}
+  - {first: Takahiro, last: Ono}
+takahiro-wakao:
+  names:
+  - {first: Takahiro, last: Wakao}
+  - {first: T., last: Wakao}
+takashi-onishi:
+  names:
+  - {first: Takashi, last: Onishi}
+  - {first: Takeshi, last: Onishi}
+  - {first: Takashi, last: Oonishi}
+takehiko-yoshimi:
+  names:
+  - {first: Takehiko, last: Yoshimi}
+  - {first: T., last: Yoshimi}
+taku-kudo:
+  names:
+  - {first: Taku, last: Kudo}
+  - {first: Taku, last: Kudoh}
+takumi-goto:
+  degree: Nara Institute of Science and Technology
+  names:
+  - {first: Takumi, last: Goto}
+  - {first: Takumi, last: Gotou}
+  orcid: 0009-0006-8124-899X
+tamara-berg:
+  names:
+  - {first: Tamara, last: Berg}
+  - {first: Tamara L., last: Berg}
+  - {first: Tamara L, last: Berg}
+tamara-bobic:
+  names:
+  - {first: Tamara, last: Bobić}
+  - {first: Tamara, last: Bobic}
+tamas-biro:
+  names:
+  - {first: Tamás, last: Bíró}
+  - {first: Tamás, last: Biró}
+tamas-horvath:
+  names:
+  - {first: Tamás, last: Horváth}
+  - {first: T., last: Horvath}
+tamas-varadi:
+  names:
+  - {first: Tamás, last: Váradi}
+  - {first: Tamas, last: Váradi}
+tangqiu-li:
+  names:
+  - {first: Tangqiu, last: Li}
+  - {first: Tanqiu, last: Li}
+tanja-samardzic:
+  names:
+  - {first: Tanja, last: Samardzic}
+  - {first: Tanja, last: Samardžić}
+tanveer-a-faruquie:
+  names:
+  - {first: Tanveer A., last: Faruquie}
+  - {first: Tanveer, last: Faruquie}
+  - {first: Tanveer A, last: Faruquie}
+tapas-nayak:
+  names:
+  - {first: Tapas, last: Nayak}
+  - {first: Tapas, last: Nayek}
+tat-seng-chua:
+  names:
+  - {first: Tat-Seng, last: Chua}
+  - {first: Tat Seng, last: Chua}
+tatsunori-b-hashimoto:
+  names:
+  - {first: Tatsunori B., last: Hashimoto}
+  - {first: Tatsunori, last: Hashimoto}
+tatyana-ruzsics:
+  names:
+  - {first: Tatyana, last: Ruzsics}
+  - {first: Tatiana, last: Ruzsics}
+ted-briscoe:
+  names:
+  - {first: Ted, last: Briscoe}
+  - {first: Edward, last: Briscoe}
+  - {first: E.J., last: Briscoe}
+ted-e-dunning:
+  names:
+  - {first: Ted E., last: Dunning}
+  - {first: Ted, last: Dunning}
+teresa-goncalves:
+  names:
+  - {first: Teresa, last: Gonçalves}
+  - {first: Teresa, last: Goncalves}
+terry-patten:
+  names:
+  - {first: Terry, last: Patten}
+  - {first: T., last: Patten}
+th-r-hofmann:
+  names:
+  - {first: Th. R., last: Hofmann}
+  - {first: T. R., last: Hofmann}
+theo-desbordes:
+  names:
+  - {first: Théo, last: Desbordes}
+  - {first: Theo, last: Desbordes}
+therese-firmin:
+  names:
+  - {first: Therese, last: Firmin}
+  - {first: Therese Firmin, last: Hand}
+thi-minh-huyen-nguyen:
+  names:
+  - {first: Thi Minh Huyen, last: Nguyen}
+  - {first: Thi Minh Huyền, last: Nguyễn}
+  - {first: Thị Minh Huyền, last: Nguyễn}
+  - {first: Thi-Minh-Huyen, last: Nguyen}
+  - {first: T. M. Huyen, last: Nguyen}
+thiago-castro-ferreira:
+  names:
+  - {first: Thiago, last: Castro Ferreira}
+  - {first: Thiago, last: Ferreira}
+thiago-d-tadeu:
+  names:
+  - {first: Thiago D., last: Tadeu}
+  - {first: Thiago, last: Tadeu}
+thierry-guillotin:
+  names:
+  - {first: Thierry, last: Guillotin}
+  - {first: T., last: Guillotin}
+thierry-hamon:
+  names:
+  - {first: Thierry, last: Hamon}
+  - {first: T., last: Hamon}
+thilo-gotz:
+  names:
+  - {first: Thilo, last: Gotz}
+  - {first: Thilo, last: Götz}
+thomas-a-keenan:
+  names:
+  - {first: Thomas A., last: Keenan}
+  - {first: Thomas, last: Keenan}
+thomas-ahlswede:
+  names:
+  - {first: Thomas, last: Ahlswede}
+  - {first: Thomas E., last: Ahlswede}
+thomas-alexander-trost:
+  names:
+  - {first: Thomas Alexander, last: Trost}
+  - {first: Thomas, last: Trost}
+thomas-c-rindflesch:
+  names:
+  - {first: Thomas C., last: Rindflesch}
+  - {first: Thomas, last: Rindflesch}
+thomas-l-cornell:
+  names:
+  - {first: Thomas L., last: Cornell}
+  - {first: Thomas, last: Cornell}
+thomas-l-griffiths:
+  names:
+  - {first: Thomas L., last: Griffiths}
+  - {first: Thomas, last: Griffiths}
+thomas-landauer:
+  names:
+  - {first: Thomas, last: Landauer}
+  - {first: Thomas K, last: Landauer}
+thomas-mueller:
+  names:
+  - {first: Thomas, last: Mueller}
+  - {first: Thomas, last: Müller}
+thomas-p-ohara:
+  names:
+  - {first: Thomas P., last: O’Hara}
+  - {first: Thomas, last: O’Hara}
+thomas-s-morton:
+  names:
+  - {first: Thomas S., last: Morton}
+  - {first: Thomas, last: Morton}
+thomas-ulrich-christiansen:
+  names:
+  - {first: Thomas Ulrich, last: Christiansen}
+  - {first: Thomas, last: Christiansen}
+thomas-wasow:
+  names:
+  - {first: Thomas, last: Wasow}
+  - {first: Tom, last: Wasow}
+thoudam-doren-singh:
+  names:
+  - {first: Thoudam Doren, last: Singh}
+  - {first: Thoudam, last: Doren Singh}
+thuy-vu:
+  names:
+  - {first: Thuy, last: Vu}
+  - {first: Thuy-Trang, last: Vu}
+thuylinh-nguyen:
+  names:
+  - {first: ThuyLinh, last: Nguyen}
+  - {first: Thuy Linh, last: Nguyen}
+tiago-timponi-torrent:
+  names:
+  - {first: Tiago Timponi, last: Torrent}
+  - {first: Tiago, last: Torrent}
+  - {first: Tiago T., last: Torrent}
+tiberiu-boros:
+  names:
+  - {first: Tiberiu, last: Boroş}
+  - {first: Tiberiu, last: Boroș}
+  - {first: Tiberiu, last: Boros}
+tibor-gyimothy:
+  names:
+  - {first: Tibor, last: Gyimóthy}
+  - {first: T., last: Gyimothy}
+tiejun-zhao:
+  names:
+  - {first: Tiejun, last: Zhao}
+  - {first: TieJun, last: Zhao}
+  - {first: Tie-Jun, last: Zhao}
+  - {first: Tie-jun, last: Zhao}
+tiit-roosmaa:
+  names:
+  - {first: Tiit, last: Roosmaa}
+  - {first: T., last: Roosmaa}
+tim-anderson:
+  names:
+  - {first: Tim, last: Anderson}
+  - {first: Timothy, last: Anderson}
+tim-finin:
+  names:
+  - {first: Tim, last: Finin}
+  - {first: Timothy W., last: Finin}
+tim-okeefe:
+  names:
+  - {first: Tim, last: O’Keefe}
+  - {first: Timothy, last: O’Keefe}
+tim-rocktaschel:
+  names:
+  - {first: Tim, last: Rocktäschel}
+  - {first: Tim, last: Rocktaschel}
+tim-van-de-cruys:
+  names:
+  - {first: Tim, last: Van de Cruys}
+  - {first: Tim, last: Van De Cruys}
+timo-jarvinen:
+  names:
+  - {first: Timo, last: Jarvinen}
+  - {first: Timo, last: Järvinen}
+timothy-baldwin:
+  names:
+  - {first: Timothy, last: Baldwin}
+  - {first: Tim, last: Baldwin}
+timothy-chklovski:
+  names:
+  - {first: Timothy, last: Chklovski}
+  - {first: Tim, last: Chklovski}
+timothy-j-hazen:
+  names:
+  - {first: Timothy J., last: Hazen}
+  - {first: T. J., last: Hazen}
+timothy-miller:
+  names:
+  - {first: Timothy, last: Miller}
+  - {first: Tim, last: Miller}
+timothy-odonnell:
+  names:
+  - {first: Timothy, last: O’Donnell}
+  - {first: Timothy J., last: O’Donnell}
+  - {first: Tim, last: O’Donnell}
+timothy-w-bickmore:
+  names:
+  - {first: Timothy W., last: Bickmore}
+  - {first: Timothy, last: Bickmore}
+tina-kluwer:
+  names:
+  - {first: Tina, last: Klüwer}
+  - {first: Tina, last: Kluewer}
+ting-hao-huang:
+  names:
+  - {first: Ting-Hao, last: Huang}
+  - {first: Ting-Hao ‘Kenneth’, last: Huang}
+  - {first: Ting-Hao Kenneth, last: Huang}
+ting-hao-yang:
+  names:
+  - {first: Ting-hao, last: Yang}
+  - {first: Ting-Hao, last: Yang}
+ting-hui-kao:
+  names:
+  - {first: Ting-hui, last: Kao}
+  - {first: Ting-Hui, last: Kao}
+toan-q-nguyen:
+  names:
+  - {first: Toan Q., last: Nguyen}
+  - {first: Toan, last: Nguyen}
+tohru-shimizu:
+  names:
+  - {first: Tohru, last: Shimizu}
+  - {first: Toru, last: Shimizu}
+tom-b-y-lai:
+  names:
+  - {first: Tom B.Y., last: Lai}
+  - {first: Tom B. Y., last: Lai}
+  - {first: Tom B.Y, last: Lai}
+  - {first: T. B. Y., last: Lai}
+tom-bong-yeung-lai:
+  names:
+  - {first: Tom Bong-yeung, last: Lai}
+  - {first: Bong-Yeung, last: Lai}
+tom-mitchell:
+  names:
+  - {first: Tom, last: Mitchell}
+  - {first: Tom M., last: Mitchell}
+tomas-holan:
+  names:
+  - {first: Tomáš, last: Holan}
+  - {first: Tomas, last: Holan}
+tomas-mikolov:
+  names:
+  - {first: Tomáš, last: Mikolov}
+  - {first: Tomas, last: Mikolov}
+tomasz-obrebski:
+  names:
+  - {first: Tomasz, last: Obrębski}
+  - {first: Tomasz, last: Obrebski}
+tomaz-erjavec:
+  names:
+  - {first: Tomaž, last: Erjavec}
+  - {first: Tomaz, last: Erjavec}
+tomek-strzalkowski:
+  names:
+  - {first: Tomek, last: Strzalkowski}
+  - {first: Tomek, last: Strzalkowskl}
+  - {first: T., last: Strzalkowski}
+tommi-a-pirinen:
+  names:
+  - {first: Tommi A., last: Pirinen}
+  - {first: Tommi, last: Pirinen}
+  - {first: Tommi A, last: Pirinen}
+tomoki-toda:
+  names:
+  - {first: Tomoki, last: Toda}
+  - {first: Tomiki, last: Toda}
+tony-rose:
+  names:
+  - {first: Tony, last: Rose}
+  - {first: Tony G., last: Rose}
+  - {first: T.G., last: Rose}
+tor-klingberg:
+  names:
+  - {first: Tor, last: Klingberg}
+  - {first: T., last: Klingberg}
+torbjorn-lager:
+  names:
+  - {first: Torbjörn, last: Lager}
+  - {first: Torbjorn, last: Lager}
+  - {first: Torbjoern, last: Lager}
+toru-hitaka:
+  names:
+  - {first: Toru, last: Hitaka}
+  - {first: Tooru, last: Hitaka}
+toyoaki-nishida:
+  names:
+  - {first: Toyoaki, last: Nishida}
+  - {first: Toyo-aki, last: Nishida}
+tracy-holloway-king:
+  names:
+  - {first: Tracy Holloway, last: King}
+  - {first: Tracy H., last: King}
+trang-mai-xuan:
+  names:
+  - {first: Trang, last: Mai Xuan}
+  - {first: Trang Mai, last: Xuan}
+trevor-cohen:
+  comment: University of Washington
+  names:
+  - {first: Trevor, last: Cohen}
+  similar:
+  - trevor-cohn
+trevor-cohn:
+  comment: University of Melbourne
+  names:
+  - {first: Trevor, last: Cohn}
+  similar:
+  - trevor-cohen
+tri-thanh-nguyen:
+  names:
+  - {first: Tri-Thanh, last: Nguyen}
+  - {first: Tri Thanh, last: Nguyen}
+tristan-vanrullen:
+  names:
+  - {first: Tristan, last: Vanrullen}
+  - {first: Tristan, last: van Rullen}
+  - {first: Tristan, last: Van Rullen}
+trung-bui:
+  names:
+  - {first: Trung, last: Bui}
+  - {first: Trung H., last: Bui}
+tsong-yi-chen:
+  names:
+  - {first: Tsong-yi, last: Chen}
+  - {first: Tsong-Yi, last: Chen}
+tsuyoshi-morimoto:
+  names:
+  - {first: Tsuyoshi, last: Morimoto}
+  - {first: Tsuyosi, last: Morimoto}
+tu-bao-ho:
+  names:
+  - {first: Tu-Bao, last: Ho}
+  - {first: Tu Bao, last: Ho}
+tu-vu:
+  names:
+  - {first: Tu, last: Vu}
+  - {first: Tu Thanh, last: Vu}
+tuan-anh-le:
+  names:
+  - {first: Tuan Anh, last: Lê}
+  - {first: Tuan Anh, last: Le}
+  - {first: Tuấn Anh, last: Lê}
+tuan-lai:
+  names:
+  - {first: Tuan, last: Lai}
+  - {first: Tuan Manh, last: Lai}
+tuan-tran:
+  names:
+  - {first: Tuan, last: Tran}
+  - {first: Tuan Dung, last: Tran}
+tung-hui-chiang:
+  names:
+  - {first: Tung-Hui, last: Chiang}
+  - {first: TungHui, last: Chiang}
+tuoi-thi-phan:
+  names:
+  - {first: Tuoi Thi, last: Phan}
+  - {first: Tuoi, last: T. Phan}
+u-rohini:
+  names:
+  - {first: U., last: Rohini}
+  - {first: Rohini, last: U}
+udo-kruschwitz:
+  names:
+  - {first: Udo, last: Kruschwitz}
+  - {first: U., last: Kruschwitz}
+ulrich-schafer:
+  names:
+  - {first: Ulrich, last: Schäfer}
+  - {first: Ulrich, last: Schafer}
+  - {first: Ulrich, last: Schaefer}
+ulrike-kugler:
+  names:
+  - {first: Ulrike, last: Kugler}
+  - {first: U., last: Kugler}
+ulrike-mosel:
+  names:
+  - {first: Ulrike, last: Mosel}
+  - {first: U., last: Mosel}
+ulrike-pado:
+  names:
+  - {first: Ulrike, last: Pado}
+  - {first: Ulrike, last: Padó}
+umit-deniz-turan:
+  names:
+  - {first: Umit Deniz, last: Turan}
+  - {first: Ümit Deniz, last: Turan}
+un-gian-iunn:
+  names:
+  - {first: Un-Gian, last: Iunn}
+  - {first: Un-gian, last: Iun}
+  - {first: Ún-giân, last: Iû}
+ute-ziegenhain:
+  names:
+  - {first: Ute, last: Ziegenhain}
+  - {first: U., last: Ziegenhain}
+utpal-kumar-sikdar:
+  names:
+  - {first: Utpal Kumar, last: Sikdar}
+  - {first: Utpal, last: Sikdar}
+uwe-reichel:
+  names:
+  - {first: Uwe, last: Reichel}
+  - {first: Uwe D., last: Reichel}
+v-k-mittal:
+  names:
+  - {first: V. K., last: Mittal}
+  - {first: V.K., last: Mittal}
+vaijayanthi-m-sarma:
+  names:
+  - {first: Vaijayanthi M., last: Sarma}
+  - {first: Vaijayanthi, last: Sarma}
+valentin-i-spitkovsky:
+  names:
+  - {first: Valentin I., last: Spitkovsky}
+  - {first: Valentin, last: Spitkovsky}
+valentina-bartalesi-lenzi:
+  names:
+  - {first: Valentina, last: Bartalesi Lenzi}
+  - {first: V., last: Bartalesi Lenzi}
+valeria-de-paiva:
+  names:
+  - {first: Valeria, last: de Paiva}
+valeria-delisandra-feltrim:
+  names:
+  - {first: Valéria Delisandra, last: Feltrim}
+  - {first: Valéria, last: Feltrim}
+  - {first: Valéria D., last: Feltrim}
+valerie-bellynck:
+  names:
+  - {first: Valérie, last: Bellynck}
+  - {first: Valerie, last: Bellynck}
+valerie-maffiolo:
+  names:
+  - {first: Valérie, last: Maffiolo}
+  - {first: V., last: Maffiolo}
+valerie-mapelli:
+  names:
+  - {first: Valérie, last: Mapelli}
+  - {first: Valerie, last: Mapelli}
+valtcho-valtchev:
+  names:
+  - {first: Valtcho, last: Valtchev}
+  - {first: V., last: Valtchev}
+van-minh-nguyen:
+  names:
+  - {first: Van minh, last: Nguyen}
+  - {first: Van Minh, last: Nguyen}
+vania-dimitrova:
+  names:
+  - {first: Vania, last: Dimitrova}
+  - {first: Vanya, last: Dimitrova}
+vanja-m-karan:
+  names:
+  - {first: Vanja M., last: Karan}
+  - {first: Vanja Mladen, last: Karan}
+varun-manjunatha:
+  names:
+  - {first: Varun, last: Manjunatha}
+  - {first: Varun, last: Manjunath}
+vassilios-digalakis:
+  names:
+  - {first: Vassilios, last: Digalakis}
+  - {first: V., last: Digalakis}
+vera-lucia-strube-de-lima:
+  names:
+  - {first: Vera Lucia Strube, last: de Lima}
+  - {first: Vera Lúcia Strube, last: de Lima}
+verginica-barbu-mititelu:
+  names:
+  - {first: Verginica, last: Barbu Mititelu}
+  - {first: Verginica Barbu, last: Mititelu}
+vern-walker:
+  names:
+  - {first: Vern, last: Walker}
+  - {first: Vern R., last: Walker}
+veronika-lux:
+  names:
+  - {first: Veronika, last: Lux}
+  - {first: Veronika, last: Lux-Pogodalla}
+  - {first: Véronika, last: Lux-Pogodalla}
+veronique-hoste:
+  names:
+  - {first: Veronique, last: Hoste}
+  - {first: Véronique, last: Hoste}
+veronique-moriceau:
+  names:
+  - {first: Véronique, last: Moriceau}
+  - {first: Veronique, last: Moriceau}
+vibhu-o-mittal:
+  names:
+  - {first: Vibhu O., last: Mittal}
+  - {first: Vibhu, last: Mittal}
+vicent-alabau:
+  names:
+  - {first: Vicent, last: Alabau}
+  - {first: Vicente, last: Alabau}
+victor-abrash:
+  names:
+  - {first: Victor, last: Abrash}
+  - {first: V., last: Abrash}
+victor-j-diaz:
+  names:
+  - {first: Víctor J., last: Díaz}
+  - {first: Victor J., last: Díaz}
+victor-m-sanchez-cartagena:
+  names:
+  - {first: Víctor M., last: Sánchez-Cartagena}
+  - {first: Victor M., last: Sánchez-Cartagena}
+victor-o-k-li:
+  names:
+  - {first: Victor O.K., last: Li}
+  - {first: Victor O. K., last: Li}
+victor-peinado:
+  names:
+  - {first: Víctor, last: Peinado}
+  - {first: Victor, last: Peinado}
+victor-rodriguez-doncel:
+  names:
+  - {first: Victor, last: Rodriguez-Doncel}
+  - {first: Víctor, last: Rodríguez}
+  - {first: Victor, last: Rodríguez Doncel}
+victor-sadler:
+  names:
+  - {first: Victor, last: Sadler}
+  - {first: V., last: Sadler}
+victor-zue:
+  names:
+  - {first: Victor, last: Zue}
+  - {first: Victor W., last: Zue}
+  - {first: V., last: Zue}
+victoria-fossum:
+  names:
+  - {first: Victoria, last: Fossum}
+  - {first: Victoria Li, last: Fossum}
+victoria-l-rubin:
+  names:
+  - {first: Victoria L., last: Rubin}
+  - {first: Victoria, last: Rubin}
+victoria-lin-cmu:
+  comment: CMU
+  names:
+  - {first: Victoria, last: Lin}
+vidas-daudaravicius:
+  names:
+  - {first: Vidas, last: Daudaravicius}
+  - {first: Vidas, last: Daudaravičius}
+viet-cuong-nguyen:
+  names:
+  - {first: Viet Cuong, last: Nguyen}
+  - {first: Nguyen Viet, last: Cuong}
+viet-hong-tran:
+  names:
+  - {first: Viet Hong, last: Tran}
+  - {first: Viet-Hong, last: Tran}
+vijay-sundar-ram:
+  names:
+  - {first: Vijay, last: Sundar Ram}
+  - {first: Vijay Sundar, last: Ram}
+  - {first: R. Vijay Sundar, last: Ram}
+  - {first: Vijay Sundar Ram, last: R}
+vikas-ganjigunte-ashok:
+  names:
+  - {first: Vikas, last: Ganjigunte Ashok}
+  - {first: Vikas, last: Ashok}
+vikash-khandelwal:
+  names:
+  - {first: Vikash, last: Khandelwal}
+  - {first: Vikas, last: Khandelwal}
+vincent-j-della-pietra:
+  names:
+  - {first: Vincent J., last: Della Pietra}
+  - {first: Vincent, last: DellaPietra}
+  - {first: V., last: Della Pietra}
+vineet-gupta:
+  names:
+  - {first: Vineet, last: Gupta}
+  - {first: V., last: Gupta}
+vinh-van-nguyen:
+  names:
+  - {first: Vinh Van, last: Nguyen}
+  - {first: Vinh-Van, last: Nguyen}
+vinicius-mourao-alves-de-souza:
+  names:
+  - {first: Vinícius Mourão Alves de, last: Souza}
+  - {first: Vinícius Mourão Alves, last: de Souza}
+virendrakumar-bhavsar:
+  names:
+  - {first: Virendrakumar, last: Bhavsar}
+  - {first: Virendra, last: Bhavsar}
+vishnu-dutt-sharma:
+  names:
+  - {first: Vishnu Dutt, last: Sharma}
+  - {first: Vishnu, last: Sharma}
+vit-suchomel:
+  names:
+  - {first: Vit, last: Suchomel}
+  - {first: Vít, last: Suchomel}
+vitor-carvalho:
+  names:
+  - {first: Vitor, last: Carvalho}
+  - {first: Vitor R., last: Carvalho}
+vitor-de-araujo:
+  names:
+  - {first: Vitor, last: De Araujo}
+  - {first: Vítor, last: Araújo}
+vittorio-di-tomaso:
+  names:
+  - {first: Vittorio, last: Di Tomaso}
+  - {first: V., last: Di Tomaso}
+vivi-nastase:
+  names:
+  - {first: Vivi, last: Nastase}
+  - {first: Vivi, last: Năstase}
+vivian-k-lee:
+  names:
+  - {first: Vivian K., last: Lee}
+  - {first: Vivian, last: Lee}
+vladimir-kadlec:
+  names:
+  - {first: Vladimír, last: Kadlec}
+  - {first: Vladimir, last: Kadlec}
+vladimir-petkevic:
+  names:
+  - {first: Vladimir, last: Petkevic}
+  - {first: Vladimír, last: Petkevič}
+vladislav-kubon:
+  names:
+  - {first: Vladislav, last: Kubon}
+  - {first: Vladislav, last: Kuboň}
+  - {first: Vladlslav, last: Kubon}
+vlado-keselj:
+  names:
+  - {first: Vlado, last: Keselj}
+  - {first: Vlado, last: Kešelj}
+vojtech-kovar:
+  names:
+  - {first: Vojtěch, last: Kovář}
+  - {first: Vojtech, last: Kovář}
+von-wun-soo:
+  names:
+  - {first: Von-Wun, last: Soo}
+  - {first: Von-wun, last: Soo}
+voula-giouli:
+  names:
+  - {first: Voula, last: Giouli}
+  - {first: V., last: Giouli}
+w-bruce-croft:
+  names:
+  - {first: W. Bruce, last: Croft}
+  - {first: Bruce, last: Croft}
+w-john-hutchins:
+  names:
+  - {first: W. John, last: Hutchins}
+  - {first: John, last: Hutchins}
+wai-kit-lo:
+  names:
+  - {first: Wai-Kit, last: Lo}
+  - {first: Wai Kit, last: Lo}
+wai-lok-tam:
+  names:
+  - {first: Wai Lok, last: Tam}
+  - {first: Wailok, last: Tam}
+walter-daelemans:
+  names:
+  - {first: Walter, last: Daelemans}
+  - {first: W., last: Daelemans}
+walter-haeseryn:
+  names:
+  - {first: Walter, last: Haeseryn}
+  - {first: W., last: Haeseryn}
+walter-kasper:
+  names:
+  - {first: Walter, last: Kasper}
+  - {first: W., last: Kasper}
+walter-lasecki:
+  names:
+  - {first: Walter, last: Lasecki}
+  - {first: Walter S., last: Lasecki}
+walther-von-hahn:
+  names:
+  - {first: Walther, last: von Hahn}
+  - {first: Walther, last: v. Hahn}
+waqas-anwar:
+  names:
+  - {first: Waqas, last: Anwar}
+  - {first: Muhammad Waqas, last: Anwar}
+warren-greiff:
+  names:
+  - {first: Warren, last: Greiff}
+  - {first: Warren R., last: Greiff}
+wasi-ahmad:
+  names:
+  - {first: Wasi, last: Ahmad}
+  - {first: Wasi Uddin, last: Ahmad}
+wassim-el-hajj:
+  names:
+  - {first: Wassim, last: El-Hajj}
+  - {first: Wassim, last: El Hajj}
+wayne-ward:
+  names:
+  - {first: Wayne, last: Ward}
+  - {first: Wayne H., last: Ward}
+  - {first: W., last: Ward}
+wayne-xin-zhao:
+  names:
+  - {first: Wayne Xin, last: Zhao}
+  - {first: Xin, last: Zhao}
+wei-ai-umich:
+  disable_name_matching: true
+  names:
+  - {first: Wei, last: Ai}
+  orcid: 0000-0001-6271-9430
+wei-fan-hkust:
+  comment: HKUST
+  disable_name_matching: true
+  names:
+  - {first: Wei, last: Fan}
+  orcid: 0009-0008-1900-7081
+wei-liu-kcl:
+  comment: KCL
+  disable_name_matching: true
+  names:
+  - {first: Wei, last: Liu}
+  orcid: 0000-0003-0011-7797
+wei-lun-lu:
+  names:
+  - {first: Wei-lun, last: Lu}
+  - {first: Wei-Lwun, last: Lu}
+  - {first: Louis Wei-lun, last: Lu}
+wei-ying-ma:
+  names:
+  - {first: Wei-Ying, last: Ma}
+  - {first: Wei-ying, last: Ma}
+wei-yun-ma:
+  names:
+  - {first: Wei-Yun, last: Ma}
+  - {first: Wei Yun, last: Ma}
+weigang-li:
+  names:
+  - {first: Weigang, last: Li}
+  - {first: Weikang, last: Li}
+weina-zhao:
+  names:
+  - {first: Weina, last: Zhao}
+  - {first: Wei Na, last: Zhao}
+weinan-zhang:
+  names:
+  - {first: Weinan, last: Zhang}
+  - {first: Wei-Nan, last: Zhang}
+weiwei-sun-sd:
+  comment: Shandong University
+  names:
+  - {first: Weiwei, last: Sun}
+weiyi-liu:
+  names:
+  - {first: Weiyi, last: Liu}
+  - {first: Weiyi, last: Lu}
+wen-chi-hsien:
+  names:
+  - {first: Wen-Chi, last: Hsien}
+  - {first: Wen-Chi, last: Hsie}
+wen-hsiang-tu:
+  names:
+  - {first: Wen-Hsiang, last: Tu}
+  - {first: Wen-hsiang, last: Tu}
+wen-huei-cheng:
+  names:
+  - {first: Wen-Huei, last: Cheng}
+  - {first: Wen-Hui, last: Cheng}
+wen-juan-hou:
+  names:
+  - {first: Wen-Juan, last: Hou}
+  - {first: Wen, last: Juan Hou}
+  - {first: Juan, last: Wen}
+wen-lian-hsu:
+  names:
+  - {first: Wen-Lian, last: Hsu}
+  - {first: Wen-lian, last: Hsu}
+wen-tau-yih:
+  names:
+  - {first: Wen-tau, last: Yih}
+  - {first: Scott Wen-tau, last: Yih}
+wen-ting-wang:
+  names:
+  - {first: Wen Ting, last: Wang}
+  - {first: WenTing, last: Wang}
+wen-wang:
+  names:
+  - {first: Wen, last: Wang}
+  - {first: W., last: Wang}
+wendy-chapman:
+  names:
+  - {first: Wendy, last: Chapman}
+  - {first: Wendy W, last: Chapman}
+wendy-lehnert:
+  names:
+  - {first: Wendy, last: Lehnert}
+  - {first: Wendy G., last: Lehnert}
+  - {first: W., last: Lehnert}
+wenhan-chao:
+  names:
+  - {first: Wenhan, last: Chao}
+  - {first: WenHan, last: Chao}
+  - {first: Wen-Han, last: Chao}
+wenyu-zhang-cornell:
+  comment: Cornell
+  disable_name_matching: true
+  names:
+  - {first: Wenyu, last: Zhang}
+  orcid: 0000-0002-3849-4320
+wenzheng-zhang-ru:
+  comment: Rutgers University
+  disable_name_matching: true
+  names:
+  - {first: Wenzheng, last: Zhang}
+  orcid: 0009-0009-2578-9224
+whitney-l-cade:
+  names:
+  - {first: Whitney L., last: Cade}
+  - {first: Whitney, last: Cade}
+widad-mustafa-el-hadi:
+  names:
+  - {first: Widad Mustafa El, last: Hadi}
+  - {first: Widad Mustafa, last: El Hadi}
+  - {first: Widad, last: Mustafa El Hadi}
+  - {first: W., last: Mustafa El Hadi}
+willem-robert-van-hage:
+  names:
+  - {first: Willem Robert, last: van Hage}
+  - {first: Willem, last: Van Hage}
+  - {first: Willem, last: van Hage}
+william-a-baumgartner-jr:
+  names:
+  - {first: William A., last: 'Baumgartner, Jr.'}
+  - {first: William A., last: Baumgartner Jr.}
+  - {first: William A., last: Baumgartner}
+  - {first: William, last: Baumgartner}
+  - {first: William, last: Baumgartner Jr.}
+william-a-gale:
+  names:
+  - {first: William A., last: Gale}
+  - {first: William, last: Gale}
+william-a-martin:
+  names:
+  - {first: William A., last: Martin}
+  - {first: W. A., last: Martin}
+william-a-woods:
+  names:
+  - {first: William A., last: Woods}
+  - {first: W. A., last: Woods}
+william-b-dolan:
+  names:
+  - {first: William B., last: Dolan}
+  - {first: William, last: Dolan}
+  - {first: Bill, last: Dolan}
+william-c-mann:
+  names:
+  - {first: William C., last: Mann}
+  - {first: William, last: Mann}
+william-c-ogden:
+  names:
+  - {first: William C., last: Ogden}
+  - {first: William, last: Ogden}
+william-cohen:
+  names:
+  - {first: William, last: Cohen}
+  - {first: William W., last: Cohen}
+william-coster:
+  names:
+  - {first: William, last: Coster}
+  - {first: Will, last: Coster}
+william-de-beaumont:
+  names:
+  - {first: William, last: de Beaumont}
+  - {first: Will, last: de Beaumont}
+william-j-black:
+  names:
+  - {first: William J., last: Black}
+  - {first: William J, last: Black}
+  - {first: William, last: Black}
+  - {first: W.J., last: Black}
+william-j-corvey:
+  names:
+  - {first: William J., last: Corvey}
+  - {first: William, last: Corvey}
+william-j-teahan:
+  names:
+  - {first: William J., last: Teahan}
+  - {first: William J, last: Teahan}
+  - {first: W. J., last: Teahan}
+william-lewis:
+  names:
+  - {first: William, last: Lewis}
+  - {first: William D., last: Lewis}
+william-m-fisher:
+  names:
+  - {first: William M., last: Fisher}
+  - {first: William, last: Fisher}
+  - {first: W. M., last: Fisher}
+  - {first: W., last: Fisher}
+william-morgan:
+  names:
+  - {first: William, last: Morgan}
+  - {first: William T., last: Morgan}
+william-r-murray:
+  names:
+  - {first: William R., last: Murray}
+  - {first: William, last: Murray}
+william-s-y-wang:
+  names:
+  - {first: William S-Y., last: Wang}
+  - {first: William S.-Y., last: Wang}
+william-soto-martinez:
+  names:
+  - {first: William, last: Soto Martinez}
+  - {first: William, last: Soto}
+wim-peters:
+  names:
+  - {first: Wim, last: Peters}
+  - {first: W., last: Peters}
+winston-n-anderson:
+  names:
+  - {first: Winston N, last: Anderson}
+  - {first: Winston, last: Anderson}
+witold-drozdzynski:
+  names:
+  - {first: Witold, last: Drożdżyński}
+  - {first: Witold, last: Drozdzynski}
+wojciech-skalmowski:
+  names:
+  - {first: Wojciech, last: Skalmowski}
+  - {first: W., last: Skalmowski}
+wolfgang-hoeppner:
+  names:
+  - {first: Wolfgang, last: Hoeppner}
+  - {first: W., last: Hoeppner}
+wolfgang-klein:
+  names:
+  - {first: Wolfgang, last: Klein}
+  - {first: W., last: Klein}
+wolfgang-wahlster:
+  names:
+  - {first: Wolfgang, last: Wahlster}
+  - {first: W., last: Wahlster}
+won-ho-ryu:
+  names:
+  - {first: Won Ho, last: Ryu}
+  - {first: Won-Ho, last: Ryu}
+woong-ki-lee:
+  names:
+  - {first: Woong Ki, last: Lee}
+  - {first: Woong-Ki, last: Lee}
+xabier-arregi:
+  names:
+  - {first: Xabier, last: Arregi}
+  - {first: X, last: Arregi}
+  - {first: X., last: Arregi}
+xabier-artola:
+  names:
+  - {first: Xabier, last: Artola}
+  - {first: X, last: Artola}
+  - {first: X., last: Artola}
+xabier-saralegi:
+  names:
+  - {first: Xabier, last: Saralegi}
+  - {first: X., last: Saralegi}
+xabier-zalbide:
+  names:
+  - {first: Xabier, last: Zalbide}
+  - {first: X., last: Zalbide}
+xavier-briffault:
+  names:
+  - {first: Xavier, last: Briffault}
+  - {first: X., last: Briffault}
+xavier-gomez-guinovart:
+  names:
+  - {first: Xavier, last: Gómez Guinovart}
+  - {first: Xavier, last: Gómez-Guinovart}
+xi-victoria-lin:
+  comment: U of Washington, Meta
+  names:
+  - {first: Xi Victoria, last: Lin}
+xia-wang:
+  names:
+  - {first: Xia, last: Wang}
+  - {first: X. S., last: Wang}
+xiang-dai:
+  names:
+  - {first: Xiang, last: Dai}
+  - {first: Xiangying, last: Dai}
+xiangfeng-wei:
+  names:
+  - {first: Xiangfeng, last: Wei}
+  - {first: XiangFeng, last: Wei}
+xiangji-huang:
+  names:
+  - {first: Xiangji, last: Huang}
+  - {first: Jimmy Xiangji, last: Huang}
+xiao-long-wang:
+  names:
+  - {first: Xiao-Long, last: Wang}
+  - {first: XiaoLong, last: Wang}
+  - {first: Xiao-long, last: Wang}
+xiaojin-zhu:
+  names:
+  - {first: Xiaojin, last: Zhu}
+  - {first: Xiaojin Jerry, last: Zhu}
+xiaojun-lin:
+  names:
+  - {first: Xiaojun, last: Lin}
+  - {first: Xiaojun, last: Li}
+xiaolei-wang-fudan:
+  comment: Fudan
+  names:
+  - {first: Xiaolei, last: Wang}
+xiaolei-wang-renmin:
+  comment: Renmin
+  names:
+  - {first: Xiaolei, last: Wang}
+xiaoqiang-luo:
+  names:
+  - {first: Xiaoqiang, last: Luo}
+  - {first: X., last: Luo}
+xin-luna-dong:
+  names:
+  - {first: Xin Luna, last: Dong}
+  - {first: Xin, last: Dong}
+xin-xu-ucsd:
+  comment: UCSD
+  disable_name_matching: true
+  names:
+  - {first: Xin, last: Xu}
+  orcid: 0000-0001-5238-0955
+xin-ying-qiu:
+  names:
+  - {first: Xin Ying, last: Qiu}
+  - {first: Xinying, last: Qiu}
+xinnian-mao:
+  names:
+  - {first: Xinnian, last: Mao}
+  - {first: Xin, last: Mao}
+xinpeng-wang-lmu:
+  degree: Ludwig Maximilian University of Munich (LMU)
+  disable_name_matching: true
+  names:
+  - {first: Xinpeng, last: Wang}
+  orcid: 0009-0006-5213-1119
+xinyu-dai:
+  names:
+  - {first: Xinyu, last: Dai}
+  - {first: Xin-yu, last: Dai}
+  - {first: Xin-Yu, last: Dai}
+xinyu-deng:
+  names:
+  - {first: Xinyu, last: Deng}
+  - {first: XinYu, last: Deng}
+xiuzhen-jenny-zhang:
+  names:
+  - {first: Xiuzhen (Jenny), last: Zhang}
+  - {first: Xiuzhen, last: Zhang}
+xixian-chen:
+  names:
+  - {first: Xixian, last: Chen}
+  - {first: XiXian, last: Chen}
+xuan-jing-huang:
+  names:
+  - {first: Xuan-Jing, last: Huang}
+  - {first: Xuan-jing, last: Huang}
+  - {first: Xuanjing, last: Huang}
+xuan-long-do:
+  names:
+  - {first: Xuan Long, last: Do}
+  - {first: Do Xuan, last: Long}
+xuan-luong-vu:
+  names:
+  - {first: Xuan Luong, last: Vu}
+  - {first: Xuân Lương, last: Vũ}
+  - {first: Xuan-Luong, last: Vu}
+xuan-nga-cao:
+  names:
+  - {first: Xuan-Nga, last: Cao}
+  - {first: Xuân-Nga, last: Cao}
+  - {first: Xuân-Nga Cao, last: Kam}
+xuedong-huang:
+  names:
+  - {first: Xuedong, last: Huang}
+  - {first: X.D., last: Huang}
+  - {first: X., last: Huang}
+xueqi-cheng:
+  names:
+  - {first: Xueqi, last: Cheng}
+  - {first: Xue-Qi, last: Cheng}
+y-albert-park:
+  names:
+  - {first: Y. Albert, last: Park}
+  - {first: Albert, last: Park}
+ya-li-cas:
+  degree: Chinese Academy of Sciences
+  disable_name_matching: true
+  names:
+  - {first: Ya, last: Li}
+  orcid: 0000-0002-6284-5039
+ya-ting-lin:
+  names:
+  - {first: Ya-Ting, last: Lin}
+  - {first: Ya-Ting, last: Li}
+yaakov-hacohen-kerner:
+  names:
+  - {first: Yaakov, last: HaCohen-Kerner}
+  - {first: Yaakov, last: Hacohen-Kerner}
+yael-cohen-sygal:
+  names:
+  - {first: Yael, last: Cohen-Sygal}
+  - {first: Yael, last: Sygal}
+yael-netzer:
+  names:
+  - {first: Yael, last: Netzer}
+  - {first: Yael Dahan, last: Netzer}
+  - {first: Yael, last: Dahan}
+yael-ravin:
+  names:
+  - {first: Yael, last: Ravin}
+  - {first: Y., last: Ravin}
+yajuan-lu:
+  names:
+  - {first: Yajuan, last: Lü}
+  - {first: Yajuan, last: Lu}
+  - {first: Yajuan, last: Lv}
+yan-zuo-zhou:
+  names:
+  - {first: Yan-Zuo, last: Zhou}
+  - {first: Yen-zuo, last: Zhou}
+yang-janet-liu:
+  comment: Georgetown University; 刘洋
+  names:
+  - {first: Yang Janet, last: Liu}
+  - {first: Yang, last: Liu}
+yang-liu-3m:
+  comment: 3M Health Information Systems
+  names:
+  - {first: Yang, last: Liu}
+yang-liu-blcu:
+  comment: Beijing Language and Culture University
+  names:
+  - {first: Yang, last: Liu}
+yang-liu-dt:
+  comment: National University of Defense Technology
+  names:
+  - {first: Yang, last: Liu}
+yang-liu-edinburgh:
+  comment: Edinburgh Ph.D., Microsoft
+  names:
+  - {first: Yang, last: Liu}
+yang-liu-helsinki:
+  comment: University of Helsinki
+  names:
+  - {first: Yang, last: Liu}
+yang-liu-hk:
+  comment: The Chinese University of Hong Kong (Shenzhen)
+  names:
+  - {first: Yang, last: Liu}
+yang-liu-icsi:
+  comment: 刘扬; Ph.D Purdue; ICSI, Dallas, Facebook, Liulishuo, Amazon
+  names:
+  - {first: Yang, last: Liu}
+  - {first: Y., last: Liu}
+yang-liu-ict:
+  comment: 刘洋; ICT, Tsinghua, Beijing Academy of Artificial Intelligence
+  names:
+  - {first: Yang, last: Liu}
+yang-liu-microsoft:
+  comment: Microsoft Cognitive Services Research
+  names:
+  - {first: Yang, last: Liu}
+yang-liu-pk:
+  comment: Peking University
+  names:
+  - {first: Yang, last: Liu}
+yang-liu-ss:
+  comment: Samsung Research Center Beijing
+  names:
+  - {first: Yang, last: Liu}
+yang-liu-tianjin:
+  comment: Tianjin University, China
+  names:
+  - {first: Yang, last: Liu}
+yang-liu-umich:
+  comment: Univ. of Michigan, UC Santa Cruz
+  names:
+  - {first: Yang, last: Liu}
+yang-liu-wl:
+  comment: Wilfrid Laurier University
+  names:
+  - {first: Yang, last: Liu}
+yang-zhang-ustc:
+  comment: USTC
+  disable_name_matching: true
+  names:
+  - {first: Yang, last: Zhang}
+  orcid: 0000-0002-7863-5183
+yannick-marchand:
+  names:
+  - {first: Yannick, last: Marchand}
+  - {first: Y., last: Marchand}
+yannick-mathieu:
+  names:
+  - {first: Yannick, last: Mathieu}
+  - {first: Yvette Yannick, last: Mathieu}
+  - {first: Yvette, last: Mathieu}
+yao-ting-sung:
+  names:
+  - {first: Yao-Ting, last: Sung}
+  - {first: Yao-Ting, last: Hung}
+yao-yao:
+  names:
+  - {first: Yao, last: Yao}
+yao-yao-uwisc:
+  names:
+  - {first: Yao, last: Yao}
+yao-zhong-zhang:
+  names:
+  - {first: Yao-Zhong, last: Zhang}
+  - {first: Yao Zhong, last: Zhang}
+  - {first: Yao-zhong, last: Zhang}
+  - {first: Y., last: Zhang}
+yaosheng-yang:
+  names:
+  - {first: Yaosheng, last: Yang}
+  - {first: YaoSheng, last: Yang}
+yaser-al-onaizan:
+  names:
+  - {first: Yaser, last: Al-Onaizan}
+  - {first: Yaser, last: Al-onaizan}
+yau-tarng-juang:
+  names:
+  - {first: Yau-Tarng, last: Juang}
+  - {first: Yau-Tang, last: Juang}
+yen-lu-chow:
+  names:
+  - {first: Yen-Lu, last: Chow}
+  - {first: Yen-lu, last: Chow}
+yeon-su-lee:
+  names:
+  - {first: Yeon Su, last: Lee}
+  - {first: Yeon-Su, last: Lee}
+yerai-doval:
+  names:
+  - {first: Yerai, last: Doval}
+  - {first: Yerai, last: Doval Mosquera}
+yi-cheng-pan:
+  names:
+  - {first: Yi-Cheng, last: Pan}
+  - {first: Yi-cheng, last: Pan}
+yi-jing-zhao:
+  names:
+  - {first: Yi-jing, last: Zhao}
+  - {first: Yi-Jing, last: Hao}
+yi-rong-chen:
+  names:
+  - {first: Yi-Rong, last: Chen}
+  - {first: YiRong, last: Chen}
+  - {first: Yi-Rung, last: Chen}
+yichun-chen:
+  names:
+  - {first: YiChun, last: Chen}
+  - {first: Yi-Chun, last: Chen}
+yifan-peng-cmu:
+  comment: cmu
+  names:
+  - {first: Yifan, last: Peng}
+yih-ru-wang:
+  names:
+  - {first: Yih-Ru, last: Wang}
+  - {first: Yih-ru, last: Wang}
+ying-chieh-tu:
+  names:
+  - {first: Ying-Chieh, last: Tu}
+  - {first: Ying-chieh, last: Tu}
+ying-mei-guo:
+  names:
+  - {first: Ying-Mei, last: Guo}
+  - {first: YingMei, last: Guo}
+ying-zhang:
+  names:
+  - {first: Ying, last: Zhang}
+  - {first: Joy Ying, last: Zhang}
+yingju-xia:
+  names:
+  - {first: Yingju, last: Xia}
+  - {first: Ying-Ju, last: Xia}
+  - {first: YingJu, last: Xia}
+yixuan-tang-hkust:
+  comment: HKUST
+  disable_name_matching: true
+  names:
+  - {first: Yixuan, last: Tang}
+  orcid: 0009-0006-2405-2026
+yiyang-du-cmu:
+  comment: CMU
+  disable_name_matching: true
+  names:
+  - {first: Yiyang, last: Du}
+  orcid: 0009-0007-1949-9736
+yoan-gutierrez:
+  names:
+  - {first: Yoan, last: Gutiérrez}
+  - {first: Yoan, last: Gutiérrez Vázquez}
+yoichi-yamashita:
+  names:
+  - {first: Yoichi, last: Yamashita}
+  - {first: Y., last: Yamashita}
+yonael-gorfu:
+  names:
+  - {first: Yonael, last: Gorfu}
+  - {first: Y., last: Gorfu}
+yong-hun-lee:
+  names:
+  - {first: Yong-Hun, last: Lee}
+  - {first: Yong-hun, last: Lee}
+yongcheng-wang:
+  names:
+  - {first: YongCheng, last: Wang}
+  - {first: Yong-Cheng, last: Wang}
+  - {first: Yong Cheng, last: Wang}
+yonglin-teng:
+  names:
+  - {first: Yonglin, last: Teng}
+  - {first: Yong-lin, last: Teng}
+yongqi-li-hk:
+  comment: The Hong Kong Polytechnic University
+  names:
+  - {first: Yongqi, last: Li}
+yongqi-li-wuhan:
+  comment: Wuhan University
+  names:
+  - {first: Yongqi, last: Li}
+yoong-keok-lee:
+  names:
+  - {first: Yoong Keok, last: Lee}
+  - {first: Yoong, last: Keok Lee}
+yorick-wilks:
+  names:
+  - {first: Yorick, last: Wilks}
+  - {first: Y., last: Wilks}
+yoshi-suhara:
+  names:
+  - {first: Yoshi, last: Suhara}
+  - {first: Yoshihiko, last: Suhara}
+yoshihide-kato:
+  names:
+  - {first: Yoshihide, last: Kato}
+  - {first: Yoshihide, last: Sato}
+yoshihiko-hayashi:
+  names:
+  - {first: Yoshihiko, last: Hayashi}
+  - {first: Y., last: Hayashi}
+yoshihiro-tomiyama:
+  names:
+  - {first: Yoshihiro, last: Tomiyama}
+  - {first: Y., last: Tomiyama}
+yoshihiro-ueda:
+  names:
+  - {first: Yoshihiro, last: Ueda}
+  - {first: Y., last: Ueda}
+you-shan-chung:
+  names:
+  - {first: You-Shan, last: Chung}
+  - {first: You-shan, last: Chung}
+young-chol-song:
+  names:
+  - {first: Young Chol, last: Song}
+  - {first: Young C., last: Song}
+young-gil-kim:
+  names:
+  - {first: Young-Gil, last: Kim}
+  - {first: Young-Kil, last: Kim}
+  - {first: Young Kil, last: Kim}
+  - {first: Young-Kill, last: Kim}
+  - {first: YoungKil, last: Kim}
+younggyun-hahm:
+  names:
+  - {first: Younggyun, last: Hahm}
+  - {first: YoungGyun, last: Hahm}
+yu-da-lai:
+  names:
+  - {first: Yu-da, last: Lai}
+  - {first: Yu-Da, last: Lai}
+yu-hang-mao:
+  names:
+  - {first: Yu Hang, last: Mao}
+  - {first: Yu-Hang, last: Mao}
+  - {first: Yuhang, last: Mao}
+yu-ling-una-hsu:
+  names:
+  - {first: Yu-Ling Una, last: Hsu}
+  - {first: Yu-Ling, last: Hsu}
+yu-wei-chang:
+  names:
+  - {first: Yu-wei, last: Chang}
+  - {first: Yu-Wei, last: Chang}
+yuang-chin-chiang:
+  names:
+  - {first: Yuang-Chin, last: Chiang}
+  - {first: Yuang-chin, last: Chiang}
+yuanzhu-peter-chen:
+  names:
+  - {first: Yuanzhu Peter, last: Chen}
+  - {first: Peter, last: Chen}
+yucel-saygin:
+  names:
+  - {first: Yucel, last: Saygin}
+  - {first: Yücel, last: Saygın}
+yue-li-ecnu:
+  degree: East China Normal University
+  disable_name_matching: true
+  names:
+  - {first: Yue, last: Li}
+  orcid: 0009-0005-5509-2103
+yuen-hsien-tseng:
+  names:
+  - {first: Yuen-Hsien, last: Tseng}
+  - {first: Yuan-Hsien, last: Tseng}
+yufang-sun:
+  names:
+  - {first: Yufang, last: Sun}
+  - {first: Yu-fang, last: Sun}
+yuguang-duan:
+  names:
+  - {first: Yuguang, last: Duan}
+  - {first: Yu, last: Duan}
+yuhao-wang-renmin:
+  comment: Renmin
+  disable_name_matching: true
+  names:
+  - {first: Yuhao, last: Wang}
+  orcid: 0009-0001-5760-9285
+yuji-matsumoto:
+  names:
+  - {first: Yuji, last: Matsumoto}
+  - {first: Yūji, last: Matsumoto}
+yuka-takei:
+  names:
+  - {first: Yuka, last: Takei}
+  - {first: Yuya, last: Takei}
+yuka-tateisi:
+  names:
+  - {first: Yuka, last: Tateisi}
+  - {first: Yuka, last: Tateishi}
+yukihiro-itoh:
+  names:
+  - {first: Yukihiro, last: Itoh}
+  - {first: Yukihiro, last: Ito}
+yukiko-i-nakano:
+  names:
+  - {first: Yukiko I., last: Nakano}
+  - {first: Yukiko, last: Nakano}
+yun-cheng-ju:
+  names:
+  - {first: Yun-Cheng, last: Ju}
+  - {first: Yun Cheng, last: Ju}
+yun-qian-qu:
+  names:
+  - {first: Yun-Qian, last: Qu}
+  - {first: Yunqian, last: Qu}
+yung-taek-kim:
+  names:
+  - {first: Yung Taek, last: Kim}
+  - {first: Yung-Taek, last: Kim}
+yuqing-guo:
+  names:
+  - {first: Yuqing, last: Guo}
+  - {first: Yuqing, last: Gao}
+yurii-kuratov:
+  names:
+  - {first: Yurii, last: Kuratov}
+  - {first: Yuri, last: Kuratov}
+zachary-c-lipton:
+  names:
+  - {first: Zachary C., last: Lipton}
+  - {first: Zachary, last: Lipton}
+zaharin-yusoff:
+  names:
+  - {first: Zaharin, last: Yusoff}
+  - {first: Y., last: Zaharin}
+zahurul-islam:
+  names:
+  - {first: Zahurul, last: Islam}
+  - {first: Zahrul, last: Islam}
+zarah-weiss:
+  names:
+  - {first: Zarah, last: Weiss}
+  - {first: Zarah, last: Weiß}
+zdenek-zabokrtsky:
+  names:
+  - {first: Zdeněk, last: Žabokrtský}
+  - {first: Zdenek, last: Zabokrtsky}
+  - {first: Zdenĕk, last: Žabokrtský}
+  - {first: Zdenek, last: Žabokrtsky}
+zdenka-uresova:
+  names:
+  - {first: Zdenka, last: Uresova}
+  - {first: Zdeňka, last: Urešová}
+zdravko-kacic:
+  names:
+  - {first: Zdravko, last: Kačič}
+  - {first: Zdravko, last: Kacic}
+ze-yu-zheng:
+  names:
+  - {first: Ze-yu, last: Zheng}
+  - {first: Zeyu, last: Zheng}
+zeerak-talat:
+  names:
+  - {first: Zeerak, last: Talat}
+  - {first: Zeerak, last: Waseem}
+zeljko-agic:
+  names:
+  - {first: Željko, last: Agić}
+  - {first: Zeljko, last: Agic}
+zeynep-orhan:
+  names:
+  - {first: Zeynep, last: Orhan}
+  - {first: Orhan, last: Zeynep}
+zhao-ming-gao:
+  names:
+  - {first: Zhao Ming, last: Gao}
+  - {first: Zhao-Ming, last: Gao}
+  - {first: Zhao-ming, last: Gao}
+zhaoyan-ming:
+  names:
+  - {first: Zhaoyan, last: Ming}
+  - {first: Zhao-Yan, last: Ming}
+zheng-ping-jiang:
+  names:
+  - {first: Zheng Ping, last: Jiang}
+  - {first: Zhengping, last: Jiang}
+zheng-yu-niu:
+  names:
+  - {first: Zheng-Yu, last: Niu}
+  - {first: Zheng Yu, last: Niu}
+  - {first: Zhengyu, last: Niu}
+zheng-yuan-cambridge:
+  comment: Cambridge
+  disable_name_matching: true
+  names:
+  - {first: Zheng, last: Yuan}
+  orcid: 0000-0003-2406-1708
+zhengxian-gong:
+  names:
+  - {first: Zhengxian, last: Gong}
+  - {first: ZhengXian, last: Gong}
+zhengyan-shi:
+  degree: University College London
+  names:
+  - {first: Zhengyan, last: Shi}
+  - {first: Zhengxiang, last: Shi}
+  orcid: 0000-0003-3074-3035
+zhi-hong-deng:
+  names:
+  - {first: Zhi-Hong, last: Deng}
+  - {first: Zhihong, last: Deng}
+zhi-min-zhou:
+  names:
+  - {first: Zhi Min, last: Zhou}
+  - {first: Zhi-Min, last: Zhou}
+zhicheng-guo-tsinghua:
+  comment: Tsinghua
+  names:
+  - {first: Zhicheng, last: Guo}
+zhicheng-guo-xidian:
+  comment: xidian
+  names:
+  - {first: Zhicheng, last: Guo}
+zhihan-zhang-smu:
+  degree: Singapore Management University
+  disable_name_matching: true
+  names:
+  - {first: Zhihan, last: Zhang}
+  orcid: 0009-0009-5813-9172
+zhihao-wang-xu:
+  degree: Xiamen University
+  disable_name_matching: true
+  names:
+  - {first: Zhihao, last: Wang}
+  orcid: 0009-0008-7497-6467
+zhihao-zhang-soochow:
+  comment: Soochow
+  disable_name_matching: true
+  names:
+  - {first: Zhihao, last: Zhang}
+  orcid: 0000-0001-9283-101X
+zhiming-xu:
+  names:
+  - {first: Zhiming, last: Xu}
+  - {first: Zhi-Ming, last: Xu}
+zhiyu-chen-lehigh:
+  comment: Lehigh University
+  disable_name_matching: true
+  names:
+  - {first: Zhiyu, last: Chen}
+  orcid: 0000-0002-3096-7912
+zihao-li-helsinki:
+  comment: Helsinki
+  disable_name_matching: true
+  names:
+  - {first: Zihao, last: Li}
+  orcid: 0009-0008-9329-5341
+ziortza-polin:
+  names:
+  - {first: Ziortza, last: Polin}
+  - {first: Z., last: Polin}
+zoltan-alexin:
+  names:
+  - {first: Zoltán, last: Alexin}
+  - {first: Z., last: Alexin}
+zoya-m-shalyapina:
+  names:
+  - {first: Zoya M., last: Shalyapina}
+  - {first: Zoyn M., last: Shalyapina}
+  - {first: Z.M., last: Shalyapina}
+  - {first: Z. M., last: Shalyapina}
+zuzana-fraterova:
+  names:
+  - {first: Zuzana, last: Fraterova}
+  - {first: Zuzana, last: Fráterová}
diff --git a/hugo/content/info/verification.md b/hugo/content/info/verification.md
new file mode 100644
index 0000000000..b934c2c152
--- /dev/null
+++ b/hugo/content/info/verification.md
@@ -0,0 +1,11 @@
+---
+Title: Verified authors
+linktitle: Verification
+subtitle: How the ACL Anthology verifies authors
+date: "2025-09-19"
+---
+The ACL Anthology distinguishes between verified and unverified people.
+
+A _verified_ person is one for whom we have an explicit entry in our names database.
+This can happen in two ways: manual verification, or automatically via the provision
+of an ORCID iD.
diff --git a/hugo/content/people/_content.gotmpl b/hugo/content/people/_content.gotmpl
index 8b3bc09fe8..d3eb6da03e 100644
--- a/hugo/content/people/_content.gotmpl
+++ b/hugo/content/people/_content.gotmpl
@@ -2,7 +2,7 @@
    {{ $page := dict
       "kind" "page"
       "path" $person_id
-      "slug" $person_id
+      "slug" (index (split $person_id "/") -1)
       "params" (dict "name" $person_id "lastname" $person.last)
       "title" $person.full
    }}
diff --git a/hugo/layouts/_default/baseof.html b/hugo/layouts/_default/baseof.html
index 61777eea19..bf0a9eb7c6 100644
--- a/hugo/layouts/_default/baseof.html
+++ b/hugo/layouts/_default/baseof.html
@@ -22,7 +22,7 @@
     {{ $sass_options := (dict "includePaths" (slice "assets/css" "assets/css/vendor/bootstrap/scss")) }}
     {{ $style := resources.Get "css/main.scss" | toCSS $sass_options | minify | fingerprint }}
     <link rel="stylesheet" href="{{ $style.RelPermalink }}" media="screen" />
-    <link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.7.2/css/all.css" integrity="sha384-fnmOCqbTlWIlj8LyTjo7mOUStjsKC4pOpQbqyi7RrhN7udi9RwhKkMHpvLbHG9Sr" crossorigin="anonymous">
+    <link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.11.0/css/all.css" integrity="sha384-XLYVh3ZsmvjnjODXg/qvDYjcINmPLORACP+Tk6qA3jNLbStl84PzAeEz2Su02511" crossorigin="anonymous">
     <link rel="stylesheet" href="{{ relURL "css/academicons.min.css" }}">
 
     {{ block "meta" . }}{{ end }}
diff --git a/hugo/layouts/people/single.html b/hugo/layouts/people/single.html
index a4348e8996..5a5e7c10db 100644
--- a/hugo/layouts/people/single.html
+++ b/hugo/layouts/people/single.html
@@ -4,6 +4,15 @@
   <h2 id="title">
     <!-- {{- .Title -}} -->
     <span class="font-weight-normal">{{ $person.first }}</span> <span class="font-weight-bold">{{ $person.last }}</span>
+    {{ with $person.orcid }}
+      <a href="https://orcid.org/{{.}}" title="ORCID profile" target="_blank" rel="noopener">
+        <i class="fab fa-orcid fa-sm"></i>
+      </a>
+    {{ else }}
+      <a href="{{ ref . `info/verification` }}" title="How to verify an author" target="_blank" rel="noopener">
+        <i class="fas fa-question-circle fa-sm" style="opacity: 0.2"></i>
+      </a>
+    {{ end }}
   </h2>
   {{ with $person.comment }}
   <p class="font-weight-light text-muted">{{.}}</p>
diff --git a/hugo/static/.htaccess b/hugo/static/.htaccess
index 6f95b2e073..ed7455ab09 100644
--- a/hugo/static/.htaccess
+++ b/hugo/static/.htaccess
@@ -81,6 +81,10 @@ RewriteRule ^thumb/(.*)$ anthology-files/thumb/$1 [L,NC]
 # since the pattern-match can't match source side.
 RewriteRule ^people/[a-z]/([\-a-z0-9]+)/?$ people/$1/ [R=301,L,NC]
 
+# If the requested author page does not exist, soft-redirect [303 See Other] to the unverified/ URL
+RewriteCond %{REQUEST_FILENAME} !-d
+RewriteRule ^people/([^/]+)/?$ people/unverified/$1/ [L,R=303]
+
 # Videos
 ## match old-style URLs, e.g., /N13-1001.mp4 -> anthology-files/videos/N/N13-1001.mp4
 ## also supports videos split into pieces, e.g., /N13-4001.1.mp4
@@ -101,4 +105,4 @@ RewriteRule \.copyright\.pdf$ - [R=404,L]
 Options +ExecCGI
 AddHandler cgi-script .cgi
 RewriteRule ^(\d{4}\.[a-zA-Z\d]+-[a-zA-Z\d]+\.[a-zA-Z\d]+?)(?:v\d+)?\.(bib|xml|endf)$ /ANTHOLOGYDIR/cgi-bin/extract_citation.cgi?anthology_id=$1&format=$2 [L,NC]
-RewriteRule ^([A-Za-z]\d{2}\-\d{4})(?:v\d+)?\.(bib|xml|endf)$ /ANTHOLOGYDIR/cgi-bin/extract_citation.cgi?anthology_id=$1&format=$2 [L,NC]
\ No newline at end of file
+RewriteRule ^([A-Za-z]\d{2}\-\d{4})(?:v\d+)?\.(bib|xml|endf)$ /ANTHOLOGYDIR/cgi-bin/extract_citation.cgi?anthology_id=$1&format=$2 [L,NC]
diff --git a/hugo/static/images/orcid_16x16.gif.webp b/hugo/static/images/orcid_16x16.gif.webp
new file mode 100644
index 0000000000..281082d57f
Binary files /dev/null and b/hugo/static/images/orcid_16x16.gif.webp differ
diff --git a/python/CHANGELOG.md b/python/CHANGELOG.md
index ff2aadaa04..8bcb077605 100644
--- a/python/CHANGELOG.md
+++ b/python/CHANGELOG.md
@@ -1,5 +1,34 @@
 # Changelog
 
+## [Unreleased]
+
+This release implements the new [name resolution and author ID logic](https://github.com/acl-org/acl-anthology/wiki/Author-Page-Plan), and is therefore fundamentally incompatible with ACL Anthology data before the switch to this new system.
+
+### Added
+
+- NameSpecification now provides an `orcid` field.
+- Person:
+  - Now provides `orcid`, `degree`, `disable_name_matching`, and `similar_ids` fields that correspond to the respective fields in the new `people.yaml`.
+  - Changing `id`, `orcid`, `names`, or using `add_name()` or `remove_names()` will now automatically update the PersonIndex.
+  - Added `update_id()` that updates a person's ID on all of their connected papers.
+  - Added `make_explicit()` that makes all necessary changes to change an implicit ("unverified/") to an explicit Person.
+- PersonIndex:
+  - Now also indexes Person objects by ORCID, and provides `by_orcid` and `get_by_orcid()`.
+  - Now also keeps a mapping of name slugs to (verified) person IDs, via `slugs_to_verified_ids` (mostly for internal use).
+  - Added `ingest_namespec()` to implement the [matching logic on ingestion](https://github.com/acl-org/acl-anthology/wiki/Author-Page-Plan#ingestion) of new volumes.
+  - Added `create_person()` to instantiate a new Person and add it to the index.
+
+### Changed
+
+- Several breaking changes to PersonIndex for the new author ID system:
+  - Loading the index now expects a `people.yaml` file instead of `name_variants.yaml`.
+  - Renamed `get_or_create_person()` to `resolve_namespec()` and refactored it to reflect the [new name resolution logic](https://github.com/acl-org/acl-anthology/wiki/Author-Page-Plan#proposed-name-resolution-logic).
+  - Renamed `name_to_ids` to `by_name`, in line with the new `by_orcid` field.
+  - Changed the type of exceptions that can be raised; `AmbiguousNameError` was replaced by `NameSpecResolutionError` and `PersonDefinitionError`.
+  - Changed the previously experimental `save()` function to serialize the `people.yaml` file.
+- Person now stores names as tuples of `(Name, NameLink)`, the latter of which indicates if the name was explicitly defined in `people.yaml` or inferred by the name resolution logic (e.g. via slug matching).  As a consequence, `Person.names` can no longer be modified in-place; use `Person.add_name()`, `Person.remove_name()`, or the setter of `Person.names`.
+- Setting a canonical name for a Person changed from `.set_canonical_name()` to `Person.canonical_name = ...`
+
 ## [0.5.3] — 2025-06-22
 
 This release adds more functionality for ingesting new proceedings and modifying existing data.
diff --git a/python/acl_anthology/collections/collection.py b/python/acl_anthology/collections/collection.py
index cd868a3ae7..29615e78f7 100644
--- a/python/acl_anthology/collections/collection.py
+++ b/python/acl_anthology/collections/collection.py
@@ -193,8 +193,10 @@ def create_volume(
         )
         volume.is_data_loaded = True
 
-        # For convenience, if editors were given, we add them to the index here
+        # If editors were given, we fill in their ID & add them to the index
         if volume.editors:
+            for namespec in volume.editors:
+                self.root.people.ingest_namespec(namespec)
             self.root.people._add_to_index(volume.editors, volume.full_id_tuple)
 
         self.data[id] = volume
@@ -302,7 +304,7 @@ def save(self, path: Optional[StrPath] = None, minimal_diff: bool = True) -> Non
             minimal_diff: If True (default), will compare against an existing XML file in `self.path` to minimize the difference, i.e., to prevent noise from changes in the XML that make no semantic difference.  See [`utils.xml.ensure_minimal_diff`][acl_anthology.utils.xml.ensure_minimal_diff] for details.
         """
         if path is None:
-            path = self.path
+            path = self.path  # pragma: no cover
         collection = etree.Element("collection", {"id": self.id})
         for volume in self.volumes():
             collection.append(volume.to_xml(with_papers=True))
diff --git a/python/acl_anthology/collections/paper.py b/python/acl_anthology/collections/paper.py
index b48807fae9..b95e04896a 100644
--- a/python/acl_anthology/collections/paper.py
+++ b/python/acl_anthology/collections/paper.py
@@ -252,7 +252,7 @@ class Paper:
         type: The paper's type, currently used to mark frontmatter and backmatter.
     """
 
-    id: str = field(converter=int_to_str)
+    id: str = field(converter=int_to_str)  # validator defined below
     parent: Volume = field(repr=False, eq=False)
     bibkey: str = field(
         on_setattr=attrs.setters.pipe(attrs.setters.validate, _update_bibkey_index),
diff --git a/python/acl_anthology/collections/volume.py b/python/acl_anthology/collections/volume.py
index ee6dba7aeb..07d3857d5b 100644
--- a/python/acl_anthology/collections/volume.py
+++ b/python/acl_anthology/collections/volume.py
@@ -74,7 +74,7 @@ class Volume(SlottedDict[Paper]):
         shorttitle: A shortened form of the title. (Aliased to `shortbooktitle` for initialization.)
     """
 
-    id: str = field(converter=int_to_str)
+    id: str = field(converter=int_to_str)  # validator defined below
     parent: Collection = field(repr=False, eq=False)
     type: VolumeType = field(repr=False, converter=VolumeType)
     title: MarkupText = field(alias="booktitle")
@@ -276,10 +276,14 @@ def create_paper(
         # Necessary because on_setattr is not called during initialization:
         paper.bibkey = bibkey  # triggers bibkey generating (if necessary) & indexing
 
-        # For convenience, if authors/editors were given, we add them to the index here
+        # If authors/editors were given, we fill in their ID & add them to the index
         if paper.authors:
+            for namespec in paper.authors:
+                self.root.people.ingest_namespec(namespec)
             self.root.people._add_to_index(paper.authors, paper.full_id_tuple)
         if paper.editors:
+            for namespec in paper.editors:
+                self.root.people.ingest_namespec(namespec)
             self.root.people._add_to_index(paper.editors, paper.full_id_tuple)
 
         self.data[id] = paper
diff --git a/python/acl_anthology/exceptions.py b/python/acl_anthology/exceptions.py
index e091a51f9f..008b2d1e33 100644
--- a/python/acl_anthology/exceptions.py
+++ b/python/acl_anthology/exceptions.py
@@ -18,7 +18,7 @@
 from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
-    from .people import Name, NameSpecification
+    from .people import NameSpecification
     from .utils.ids import AnthologyIDTuple
 
 if sys.version_info >= (3, 11):
@@ -40,19 +40,6 @@ def add_note(self, note: str) -> None:
             self.__notes__.append(note)
 
 
-class AmbiguousNameError(AnthologyException):
-    """Raised when an ambiguous name would need an explicit and unique ID, but does not have one.
-
-    Attributes:
-        name (Name): The name that raised the error.
-    """
-
-    def __init__(self, name: Name, message: str) -> None:
-        super().__init__(message)
-        self.name = name
-        self.add_note("Did you forget to add an explicit/unique ID to this name?")
-
-
 class AnthologyDuplicateIDError(AnthologyException, ValueError):
     """Raised when trying to set an ID or create an item with an ID that already exists.
 
@@ -91,10 +78,10 @@ def __init__(self, parent: AnthologyIDTuple, tag: str, message: str) -> None:
         self.tag = tag
 
 
-class NameIDUndefinedError(AnthologyException):
-    """Raised when an author ID was requested that is not defined.
+class NameSpecResolutionError(AnthologyException):
+    """Raised when a NameSpecification cannot be resolved to a person.
 
-    This can happen when an `<author>` or `<editor>` was used with an ID which was not defined in `name_variants.yaml`, or when trying to look up a NameSpecification that does not correspond to any Person in the PersonIndex.
+    This should never happen with a NameSpecification from the loaded Anthology data, but might happen if a NameSpecification is manually created.
 
     Attributes:
         name_spec (NameSpecification): The name specification that raised the error.
@@ -105,6 +92,18 @@ def __init__(self, name_spec: NameSpecification, message: str) -> None:
         self.name_spec = name_spec
 
 
+class PersonDefinitionError(NameSpecResolutionError):
+    """Raised when a NameSpecification defines an ID, but either the ID or one of its fields is not compatible with the definition in `people.yaml`.
+
+    This can happen when an `<author>` or `<editor>` is used with an ID which was not defined in `people.yaml`; when the name used together with this ID was not listed among the possible names in `people.yaml`; or when the ORCID used together with this ID does not match the ORCID defined in `people.yaml`.
+
+    Attributes:
+        name_spec (NameSpecification): The name specification that raised the error.
+    """
+
+    pass
+
+
 class SchemaMismatchWarning(UserWarning):
     """Raised when the data directory contains a different XML schema as this library.
 
@@ -118,4 +117,4 @@ def __init__(self) -> None:
         super().__init__(
             "Data directory contains a different schema.rnc as this library; "
             "you might need to update the data or the acl-anthology library."
-        )
+        )  # pragma: no cover
diff --git a/python/acl_anthology/people/__init__.py b/python/acl_anthology/people/__init__.py
index 49063fa6de..99053f9a78 100644
--- a/python/acl_anthology/people/__init__.py
+++ b/python/acl_anthology/people/__init__.py
@@ -13,7 +13,14 @@
 # limitations under the License.
 
 from .name import Name, NameSpecification, ConvertableIntoName
-from .person import Person
+from .person import Person, NameLink
 from .index import PersonIndex
 
-__all__ = ["ConvertableIntoName", "Name", "NameSpecification", "Person", "PersonIndex"]
+__all__ = [
+    "ConvertableIntoName",
+    "Name",
+    "NameLink",
+    "NameSpecification",
+    "Person",
+    "PersonIndex",
+]
diff --git a/python/acl_anthology/people/index.py b/python/acl_anthology/people/index.py
index 9a3df2423a..980909d464 100644
--- a/python/acl_anthology/people/index.py
+++ b/python/acl_anthology/people/index.py
@@ -14,7 +14,7 @@
 
 from __future__ import annotations
 
-from attrs import define, field, asdict
+from attrs import define, field
 from collections.abc import Iterable
 from collections import Counter, defaultdict
 import itertools as it
@@ -22,7 +22,7 @@
 from rich.progress import track
 from scipy.cluster.hierarchy import DisjointSet  # type: ignore
 import sys
-from typing import cast, Any, TYPE_CHECKING
+from typing import cast, Any, Optional, TYPE_CHECKING
 import yaml
 
 try:
@@ -31,10 +31,16 @@
     from yaml import Loader, Dumper  # type: ignore
 
 from ..containers import SlottedDict
-from ..exceptions import AnthologyException, AmbiguousNameError, NameIDUndefinedError
-from ..utils.ids import AnthologyIDTuple
+from ..exceptions import (
+    AnthologyException,
+    AnthologyInvalidIDError,
+    NameSpecResolutionError,
+    PersonDefinitionError,
+)
+from ..utils.ids import AnthologyIDTuple, is_verified_person_id
 from ..utils.logging import get_logger
-from . import Person, Name, NameSpecification
+from . import Person, Name, NameLink, NameSpecification
+from .name import _YAMLName
 
 if TYPE_CHECKING:
     from _typeshed import StrPath
@@ -42,7 +48,7 @@
     from ..collections import Paper, Volume
 
 log = get_logger()
-VARIANTS_FILE = "yaml/name_variants.yaml"
+PEOPLE_INDEX_FILE = "yaml/people.yaml"
 
 
 @define
@@ -62,19 +68,55 @@ class PersonIndex(SlottedDict[Person]):
     Attributes:
         parent: The parent Anthology instance to which this index belongs.
         verbose: If False, will not show progress bar when building the index from scratch.
-        name_to_ids: A mapping of [Name][acl_anthology.people.name.Name] instances to person IDs.
+        path: The path to `people.yaml`.
+        by_orcid: A mapping of ORCIDs (as strings) to person IDs.
+        by_name: A mapping of [Name][acl_anthology.people.name.Name] instances to lists of person IDs.
+        slugs_to_verified_ids: A mapping of strings (representing slugified names) to lists of person IDs.
         similar: A [disjoint-set structure][scipy.cluster.hierarchy.DisjointSet] of persons with similar names.
         is_data_loaded: A flag indicating whether the index has been constructed.
     """
 
     parent: Anthology = field(repr=False, eq=False)
     verbose: bool = field(default=True)
-    name_to_ids: dict[Name, list[str]] = field(
+    path: Path = field(init=False)
+    _by_orcid: dict[str, str] = field(init=False, repr=False, default={})
+    _by_name: dict[Name, list[str]] = field(
         init=False, repr=False, factory=lambda: defaultdict(list)
     )
-    similar: DisjointSet = field(init=False, repr=False, factory=DisjointSet)
+    _slugs_to_verified_ids: dict[str, set[str]] = field(
+        init=False, repr=False, factory=lambda: defaultdict(list)
+    )
+    _similar: DisjointSet = field(init=False, repr=False, factory=DisjointSet)
     is_data_loaded: bool = field(init=False, repr=True, default=False)
 
+    @path.default
+    def _path(self) -> Path:
+        return self.parent.datadir / Path(PEOPLE_INDEX_FILE)
+
+    @property
+    def by_orcid(self) -> dict[str, str]:
+        if not self.is_data_loaded:
+            self.load()
+        return self._by_orcid
+
+    @property
+    def by_name(self) -> dict[Name, list[str]]:
+        if not self.is_data_loaded:
+            self.load()
+        return self._by_name
+
+    @property
+    def similar(self) -> DisjointSet:
+        if not self.is_data_loaded:
+            self.load()
+        return self._similar
+
+    @property
+    def slugs_to_verified_ids(self) -> dict[str, set[str]]:
+        if not self.is_data_loaded:
+            self.load()
+        return self._slugs_to_verified_ids
+
     def get_by_name(self, name: Name) -> list[Person]:
         """Access persons by their name.
 
@@ -86,12 +128,12 @@ def get_by_name(self, name: Name) -> list[Person]:
         """
         if not self.is_data_loaded:
             self.load()
-        return [self.data[pid] for pid in self.name_to_ids[name]]
+        return [self.data[pid] for pid in self._by_name[name]]
 
     def get_by_namespec(self, name_spec: NameSpecification) -> Person:
         """Access persons by their name specification.
 
-        See [get_or_create_person()][acl_anthology.people.index.PersonIndex.get_or_create_person] for exceptions that can be raised by this function.
+        See [resolve_namespec()][acl_anthology.people.index.PersonIndex.resolve_namespec] for exceptions that can be raised by this function.
 
         Parameters:
             name_spec: A name specification.
@@ -101,7 +143,22 @@ def get_by_namespec(self, name_spec: NameSpecification) -> Person:
         """
         if not self.is_data_loaded:
             self.load()
-        return self.get_or_create_person(name_spec, create=False)
+        return self.resolve_namespec(name_spec)
+
+    def get_by_orcid(self, orcid: str) -> Person | None:
+        """Access persons by their ORCID.
+
+        Parameters:
+            orcid: A string representing an ORCID.
+
+        Returns:
+            The person with that ORCID, if it exists, otherwise None.
+        """
+        if not self.is_data_loaded:
+            self.load()
+        if orcid in self._by_orcid:
+            return self.data[self._by_orcid[orcid]]
+        return None
 
     def find_coauthors(
         self, person: str | Person, include_volumes: bool = True
@@ -143,13 +200,9 @@ def find_coauthors_counter(
                 and not cast("Volume", item).has_frontmatter
             ):
                 continue
-            coauthors.update(
-                self.get_or_create_person(ns, create=False).id for ns in item.editors
-            )
+            coauthors.update(self.resolve_namespec(ns).id for ns in item.editors)
             if hasattr(item, "authors"):
-                coauthors.update(
-                    self.get_or_create_person(ns, create=False).id for ns in item.authors
-                )
+                coauthors.update(self.resolve_namespec(ns).id for ns in item.authors)
         del coauthors[person.id]
         return coauthors
 
@@ -164,8 +217,10 @@ def load(self) -> None:
     def reset(self) -> None:
         """Resets the index."""
         self.data = {}
-        self.name_to_ids = defaultdict(list)
-        self.similar = DisjointSet()
+        self._by_orcid = {}
+        self._by_name = defaultdict(list)
+        self._slugs_to_verified_ids = defaultdict(set)
+        self._similar = DisjointSet()
         self.is_data_loaded = False
 
     def build(self, show_progress: bool = False) -> None:
@@ -175,8 +230,7 @@ def build(self, show_progress: bool = False) -> None:
             Exceptions raised during the index creation are sent to the logger, and only a generic exception is raised at the end.
         """
         self.reset()
-        # Load variant list, so IDs defined there are added first
-        self._load_variant_list()
+        self._load_people_index()
         # Go through every single volume/paper and add authors/editors
         iterator = track(
             self.parent.collections.values(),
@@ -190,7 +244,7 @@ def build(self, show_progress: bool = False) -> None:
                 context: Paper | Volume = volume
                 try:
                     for name_spec in volume.editors:
-                        person = self.get_or_create_person(name_spec)
+                        person = self.resolve_namespec(name_spec, allow_creation=True)
                         person.item_ids.append(volume.full_id_tuple)
                     for paper in volume.papers():
                         context = paper
@@ -202,9 +256,9 @@ def build(self, show_progress: bool = False) -> None:
                             else paper.get_editors()
                         )
                         for name_spec in name_specs:
-                            person = self.get_or_create_person(name_spec)
+                            person = self.resolve_namespec(name_spec, allow_creation=True)
                             person.item_ids.append(paper.full_id_tuple)
-                except Exception as exc:
+                except Exception as exc:  # pragma: no cover
                     note = f"Raised in {context.__class__.__name__} {context.full_id}; {name_spec}"
                     # If this is merged into a single if-statement (with "or"),
                     # the type checker complains ¯\_(ツ)_/¯
@@ -217,107 +271,312 @@ def build(self, show_progress: bool = False) -> None:
         if raised_exception:
             raise Exception(
                 "An exception was raised while building PersonIndex; check the logger for details."
-            )
+            )  # pragma: no cover
         self.is_data_loaded = True
 
+    def _load_people_index(self) -> None:
+        """Load and parse the `people.yaml` file.
+
+        Raises:
+            AnthologyInvalidIDError: If `people.yaml` contains a malformed person ID; or if a person is listed without any names.
+        """
+        merge_list: list[tuple[str, str]] = []
+
+        with open(self.path, "r", encoding="utf-8") as f:
+            data = yaml.load(f, Loader=Loader)
+
+        for pid, entry in data.items():
+            if not is_verified_person_id(pid):
+                raise AnthologyInvalidIDError(
+                    pid, f"Invalid person ID in people.yaml: {pid}"
+                )  # pragma: no cover
+            self.add_person(
+                Person(
+                    id=pid,
+                    parent=self.parent,
+                    names=[Name.from_dict(n) for n in entry.pop("names")],
+                    orcid=entry.pop("orcid", None),
+                    comment=entry.pop("comment", None),
+                    degree=entry.pop("degree", None),
+                    similar_ids=entry.get("similar", []),
+                    disable_name_matching=entry.pop("disable_name_matching", False),
+                    is_explicit=True,
+                )
+            )
+            for similar_id in entry.pop("similar", []):
+                merge_list.append((pid, similar_id))
+
+            # Check for unprocessed keys to catch errors
+            if entry:
+                log.warning(
+                    f"people.yaml: entry '{pid}' has unknown keys: {entry.keys()}"
+                )  # pragma: no cover
+
+        # Process IDs with similar names
+        for pid_set in self._slugs_to_verified_ids.values():
+            pid_list = list(pid_set)
+            for pid in pid_list[1:]:
+                self._similar.merge(pid_list[0], pid)
+        for a, b in merge_list:
+            self._similar.merge(a, b)
+
     def add_person(self, person: Person) -> None:
         """Add a new person to the index.
 
         Parameters:
             person: The person to add, which should not exist in the index yet.
+
+        Raises:
+            AnthologyInvalidIDError: If a person with the same ID or ORCID already exists in the index.
         """
         if (pid := person.id) in self.data:
-            raise KeyError(f"A Person with ID '{pid}' already exists in the index")
+            raise AnthologyInvalidIDError(
+                pid, f"A Person with ID '{pid}' already exists in the index"
+            )
         self.data[pid] = person
-        self.similar.add(pid)
+        self._similar.add(pid)
+        if person.orcid is not None:
+            if person.orcid in self._by_orcid:
+                raise ValueError(
+                    f"ORCID '{person.orcid}' already assigned to person '{self._by_orcid[person.orcid]}'"
+                )
+            self._by_orcid[person.orcid] = pid
+        for name in person.names:
+            self._by_name[name].append(pid)
+            if is_verified_person_id(pid):
+                self._slugs_to_verified_ids[name.slugify()].add(pid)
+
+    def create_person(
+        self,
+        id: str,
+        names: list[Name],
+        **kwargs: Any,
+    ) -> Person:
+        """Create a new explicit person and add it to the index.
+
+        Parameters:
+            id: The ID of the new person.
+            names: A list of names for the new person; must contain at least one.
+            **kwargs: Any valid list or optional attribute of [Person][acl_anthology.people.person.Person].
+
+        Returns:
+            The created [Person][acl_anthology.people.person.Person] object.
+
+        Raises:
+            AnthologyInvalidIDError: If a person with the given ID already exists, or the ID is not a well-formed verified-person ID.
+            ValueError: If the list of names is empty.
+        """
+        if not self.is_data_loaded:
+            self.load()
+        if id in self.data:
+            raise AnthologyInvalidIDError(
+                id, f"A Person with ID '{id}' already exists in the index"
+            )
+        if not is_verified_person_id(id):
+            raise AnthologyInvalidIDError(id, f"Not a valid verified-person ID: {id}")
+        if not names:
+            raise ValueError("List of names cannot be empty")
+
+        kwargs["parent"] = self.parent
+        kwargs["is_explicit"] = True
+
+        person = Person(id=id, names=names, **kwargs)
+        self.add_person(person)
+        return person
+
+    def _update_id(self, old_id: str, new_id: str) -> None:
+        """Update a person ID in the index.
+
+        Will change all indices to remove the old ID and replace it with the new one.  Will be called automatically from Person; do not call manually.
+
+        Parameters:
+            old_id: A person ID that already exists in the index.
+            new_id: The new person ID it should be changed to, which mustn't exist in the index.
+        """
+        if not self.is_data_loaded:
+            return
+        person = self.data.pop(old_id)
+        self.data[new_id] = person
+        # Note: cannot remove from DisjointSet
+        self._similar.add(new_id)
+        self._similar.merge(old_id, new_id)
+        if person.orcid is not None:
+            self._by_orcid[person.orcid] = new_id
         for name in person.names:
-            self.name_to_ids[name].append(pid)
+            self._remove_name(old_id, name)
+            self._add_name(new_id, name)
+
+    def _update_orcid(self, pid: str, old: Optional[str], new: Optional[str]) -> None:
+        """Update a person's ORCID in the index.
+
+        Will be called automatically from Person; do not call manually.
+        """
+        if not self.is_data_loaded:
+            return
+        if old is not None and old in self._by_orcid:
+            del self._by_orcid[old]
+        if new is not None:
+            self._by_orcid[new] = pid
+
+    def _add_name(self, pid: str, name: Name) -> None:
+        """Add a name for a person to the index.
+
+        Will be called automatically from Person; do not call manually.
+        """
+        if not self.is_data_loaded:
+            return
+        self._by_name[name].append(pid)
+        if is_verified_person_id(pid):
+            self._slugs_to_verified_ids[name.slugify()].add(pid)
+
+    def _remove_name(self, pid: str, name: Name) -> None:
+        """Remove a name for a person from the index.
+
+        Will be called automatically from Person; do not call manually.
+        """
+        if not self.is_data_loaded:
+            return
+        try:
+            self._by_name[name].remove(pid)
+            if is_verified_person_id(pid):
+                self._slugs_to_verified_ids[name.slugify()].remove(pid)
+        except KeyError:
+            pass
+
+    def ingest_namespec(self, name_spec: NameSpecification) -> NameSpecification:
+        """Update a name specification for ingestion, potentially filling in the ID field.
 
-    def get_or_create_person(
-        self, name_spec: NameSpecification, create: bool = True
+        If the name specification contains an ORCID but doesn't have an ID yet, this will find the person with this ORCID and fill in their ID; if it doesn't exist yet, it will create a new person with a "verified" ID and fill in the new, generated ID.  The supplied name specification will be modified in-place, but also returned.
+
+        Parameters:
+            name_spec: The name specification on the paper, volume, etc.
+
+        Returns:
+            The name specification as it should be used for the new ingestion material.
+        """
+        if name_spec.orcid is None or name_spec.id is not None:
+            return name_spec
+
+        if (person := self.get_by_orcid(name_spec.orcid)) is not None:
+            name_spec.id = person.id
+            # Make sure the name used here is listed for this person
+            person.add_name(name_spec.name)
+        else:
+            # Need to create a new person; generate name slug for the ID
+            pid = name_spec.name.slugify()
+            if pid in self.data:
+                # ID is already in use; add last four digits of ORCID to disambiguate
+                pid = f"{pid}-{name_spec.orcid[-4:]}"
+
+            self.add_person(
+                Person(
+                    id=pid,
+                    parent=self.parent,
+                    names=[name_spec.name] + name_spec.variants,
+                    orcid=name_spec.orcid,
+                    is_explicit=True,
+                )
+            )
+            name_spec.id = pid
+
+        return name_spec
+
+    def resolve_namespec(
+        self, name_spec: NameSpecification, allow_creation: bool = False
     ) -> Person:
-        """Get the person represented by a name specification, or create a new one if needed.
+        """Resolve a name specification to a person, potentially creating a new unverified person instance.
 
         Parameters:
             name_spec: The name specification on the paper, volume, etc.
-            create: If False, will not create a new Person object, but instead raise `NameIDUndefinedError` if no person matching `name_spec` exists.  Defaults to True.
+            allow_creation: If True, will instantiate a new Person object with an unverified ID if no person matching `name_spec` exists.  Defaults to False.
 
         Returns:
-            The person represented by `name_spec`.  This will try to use the `id` attribute if it is set, look up the name in the index otherwise, or try to find a matching person by way of an ID clash.  If all of these fail, it will create a new person and return that.
+            The person represented by `name_spec`.  If `name_spec.id` is set, this will determine the person to resolve to.  Otherwise, the slugified name will be used to find a matching person; an explicitly-defined (verified) person can be returned if exactly one such person exists and does not have `disable_name_matching` set.  In all other cases, it will resolve to an unverified person.
 
         Raises:
-            AmbiguousNameError: If there are multiple known IDs for the given name, but there is no explicit `id` attribute.
-            NameIDUndefinedError: If there is an explicit `id` attribute, but the ID has not been defined.
+            NameSpecResolutionError: If `name_spec` cannot be resolved to a Person and `allow_creation` is False.
+            PersonDefinitionError: If `name_spec.id` is set, but either the ID or the name used with the ID has not been defined in `people.yaml`. (Inherits from NameSpecResolutionError)
         """
         name = name_spec.name
         if (pid := name_spec.id) is not None:
-            # Explicit ID given; should already exist from name_variants.yaml
-            person = self.data.get(pid)
-            if person is None or not person.is_explicit:
-                exc1 = NameIDUndefinedError(
-                    name_spec, f"Name '{name}' used with ID '{pid}' that doesn't exist"
+            # Explicit ID given – should be explicitly defined in people.yaml
+            if pid not in self.data or not (person := self.data[pid]).is_explicit:
+                raise PersonDefinitionError(
+                    name_spec, f"ID '{pid}' wasn't defined in people.yaml"
                 )
-                exc1.add_note("Did you forget to define the ID in name_variants.yaml?")
-                raise exc1
-            person.add_name(name)
-        elif pid_list := self.name_to_ids[name]:
-            # Name already exists in the index, but has no explicit ID
-            if len(pid_list) > 1:
-                exc2 = AmbiguousNameError(
-                    name,
-                    f"Name '{name.as_first_last()}' is ambiguous, but was used without an ID",
+            if not person.has_name(name):
+                raise PersonDefinitionError(
+                    name_spec,
+                    f"ID '{pid}' was used with name '{name}' that wasn't defined in people.yaml",
+                )
+            if name_spec.orcid is not None and name_spec.orcid != person.orcid:
+                raise PersonDefinitionError(
+                    name_spec,
+                    f"ID '{pid}' was used with ORCID '{name_spec.orcid}', but people.yaml has '{person.orcid}'",
                 )
-                exc2.add_note(f"Known IDs are: {', '.join(pid_list)}")
-                raise exc2
-            pid = pid_list[0]
-            person = self.data[pid]
         else:
-            # Name not in the index and has no explicit ID
-            pid = self.generate_id(name)
-            try:
-                # If the auto-generated ID already exists, we assume it's the same person
-                person = self.data[pid]
-                # If the name scores higher than the current canonical one, we
-                # also assume we should set this as the canonical one
-                if (not person.is_explicit) and (
-                    name.score() > person.canonical_name.score()
-                ):
-                    person.set_canonical_name(name)
-                else:
-                    person.add_name(name)
-                self.name_to_ids[name].append(pid)
-            except KeyError:
-                if create:
-                    # If the auto-generated ID doesn't exist yet, then and only
-                    # then do we create a new person
-                    person = Person(id=pid, parent=self.parent, names=[name])
+            # No explicit ID given
+            if name_spec.orcid is not None:
+                exc1 = NameSpecResolutionError(
+                    name_spec,
+                    "NameSpecification defines an ORCID without an ID",
+                )
+                exc1.add_note(
+                    "To specify an ORCID on a paper, the person needs to have an entry in `people.yaml` and be used with an explicit ID."
+                )
+                raise exc1
+
+            # Generate slug for name matching
+            slug = name.slugify()
+
+            # Check if the slugified name matches any verified IDs
+            matching_ids = list(self._slugs_to_verified_ids.get(slug, []))
+            if (
+                len(matching_ids) == 1
+                and not (person := self.data[matching_ids[0]]).disable_name_matching
+            ):
+                # Slug unambiguously maps to person and name matching not disabled
+                pid = person.id
+                if not person.has_name(name):
+                    person.add_name(name, inferred=True)
+                    self._by_name[name].append(pid)
+
+            else:
+                # Resolve to unverified ID
+                pid = f"unverified/{slug}"
+
+                if pid in self.data:
+                    # Unverified ID already exists; assume it's the same person
+                    person = self.data[pid]
+                    if not person.has_name(name):
+                        # If the name scores higher than the current canonical
+                        # one, we also assume we should set this as the
+                        # canonical one
+                        if name.score() > person.canonical_name.score():
+                            person._set_canonical_name(name, inferred=True)
+                        else:
+                            person.add_name(name, inferred=True)
+                        self._by_name[name].append(pid)
+                elif allow_creation:
+                    # Unverified ID doesn't exist yet; create it
+                    person = Person(
+                        id=pid, parent=self.parent, names=[(name, NameLink.INFERRED)]
+                    )
                     self.add_person(person)
                 else:
-                    raise NameIDUndefinedError(
+                    raise NameSpecResolutionError(
                         name_spec,
-                        f"Name '{name}' generated ID '{pid}' that doesn't exist",
+                        f"NameSpecification resolved to ID '{pid}' which doesn't exist",
                     )
+
         # Make sure that name variants specified here are registered
         for name in name_spec.variants:
-            person.add_name(name)
-            if name not in self.name_to_ids:
-                self.name_to_ids[name].append(pid)
+            if not person.has_name(name):
+                person.add_name(name, inferred=True)
+            if name not in self._by_name:
+                self._by_name[name].append(pid)
         return person
 
-    @staticmethod
-    def generate_id(name: Name) -> str:
-        """Generates and returns an ID from the given name.
-
-        Warning:
-            This **intentionally doesn't guarantee uniqueness** of the generated ID.
-            If two names generate identical IDs with this method, we assume they
-            refer to the same person.  This happens e.g. when there are missing
-            accents in one version, or when we have an inconsistent first/last split
-            for multiword names.  These cases have in practice always referred to
-            the same person.
-        """
-        return name.slugify()
-
     def _add_to_index(
         self, namespecs: Iterable[NameSpecification], item_id: AnthologyIDTuple
     ) -> None:
@@ -329,89 +588,36 @@ def _add_to_index(
             return
 
         for namespec in namespecs:
-            person = self.get_or_create_person(namespec)
+            person = self.resolve_namespec(namespec, allow_creation=True)
             person.item_ids.append(item_id)
 
-    def _load_variant_list(self) -> None:
-        """Loads and parses the `name_variant.yaml` file.
-
-        Raises:
-            AmbiguousNameError: If there are ambiguous "canonical" names without explicit, unique IDs for each one.
-        """
-        filename = self.parent.datadir / Path(VARIANTS_FILE)
-        merge_list: list[tuple[str, str]] = []
-        with open(filename, "r", encoding="utf-8") as f:
-            variant_list = yaml.load(f, Loader=Loader)
-        for entry in variant_list:
-            # Every entry must have a "canonical" name
-            canonical = Name.from_dict(entry["canonical"])
-            # If it doesn't define an ID, we have to create one
-            if (pid := entry.get("id")) is None:
-                pid = self.generate_id(canonical)
-                if pid in self.data:
-                    raise AmbiguousNameError(
-                        canonical,
-                        (
-                            f"While parsing {filename}: "
-                            f"name '{canonical.as_first_last()}' is ambiguous, but the "
-                            f"automatically generated ID '{pid}' already exists."
-                        ),
-                    )
-            # Parse all the variant names, and make sure canonical stays at index 0
-            names = [canonical] + [
-                Name.from_dict(var) for var in entry.get("variants", [])
-            ]
-            # Now we can create a new person from this entry...
-            person = Person(
-                id=pid,
-                parent=self.parent,
-                names=names,
-                comment=entry.get("comment", None),
-                is_explicit=True,
-            )
-            # ...and add it to the index
-            self.add_person(person)
-            for similar_id in entry.get("similar", []):
-                merge_list.append((pid, similar_id))
-
-        # Process IDs with similar names
-        for name, pid_list in self.name_to_ids.items():
-            for pid in pid_list[1:]:
-                self.similar.merge(pid_list[0], pid)
-        for a, b in merge_list:
-            self.similar.merge(a, b)
-
-    def save(self, path: StrPath) -> None:
-        """Save the entire index.
-
-        CURRENTLY UNTESTED; DO NOT USE.
+    def save(self, path: Optional[StrPath] = None) -> None:
+        """Save the `people.yaml` file.
 
         Arguments:
-            path: The filename to save to.
+            path: The filename to save to. If None, defaults to the parent Anthology's `people.yaml` file.
         """
-        data = []
+        if path is None:
+            path = self.path  # pragma: no cover
+
+        data = {}
         for person in self.values():
+            if not person.is_explicit:
+                continue
+
             attrib: dict[str, Any] = {
-                "id": person.id,
-                "canonical": asdict(
-                    person.canonical_name,
-                    filter=lambda a, v: not (a.name == "script" and v is None),
-                ),
+                "names": [
+                    _YAMLName(name)
+                    for (name, link_type) in person._names
+                    if link_type == NameLink.EXPLICIT
+                ],
+                "comment": person.comment,
+                "degree": person.degree,
+                "disable_name_matching": person.disable_name_matching,
+                "orcid": person.orcid,
+                "similar": person.similar_ids,
             }
-            if person.item_ids:
-                attrib["items"] = person.item_ids
-            if len(person.names) > 1:
-                attrib["variants"] = [
-                    asdict(
-                        name, filter=lambda a, v: not (a.name == "script" and v is None)
-                    )
-                    for name in person.names[1:]
-                ]
-            similar = self.similar.subset(person.id)
-            if len(similar) > 1:
-                attrib["similar"] = [id_ for id_ in similar if id_ != person.id]
-            if person.comment is not None:
-                attrib["comment"] = person.comment
-            data.append(attrib)
+            data[person.id] = {k: v for k, v in attrib.items() if v}
+
         with open(path, "w", encoding="utf-8") as f:
-            yaml.dump(data, f, Dumper=Dumper)
+            yaml.dump(data, f, allow_unicode=True, Dumper=Dumper)
diff --git a/python/acl_anthology/people/name.py b/python/acl_anthology/people/name.py
index 289787dbf0..b82acedc74 100644
--- a/python/acl_anthology/people/name.py
+++ b/python/acl_anthology/people/name.py
@@ -21,6 +21,12 @@
 import re
 from slugify import slugify
 from typing import Any, Optional, cast, TypeAlias
+import yaml
+
+try:
+    from yaml import CDumper as Dumper
+except ImportError:  # pragma: no cover
+    from yaml import Dumper  # type: ignore
 
 from ..utils.latex import latex_encode
 
@@ -114,17 +120,13 @@ def score(self) -> float:
             score += 0.5
         return score
 
+    @cache
     def slugify(self) -> str:
         """
         Returns:
             A [slugified string](https://github.com/un33k/python-slugify#how-to-use) of the full name.
         """
-        if not (name := self.as_first_last()):
-            # Only necessary because of <https://github.com/acl-org/acl-anthology/issues/2725>
-            slug = "none"
-        else:
-            slug = slugify(name)
-        return slug
+        return slugify(self.as_first_last())
 
     @classmethod
     def from_dict(cls, name: dict[str, str]) -> Name:
@@ -250,8 +252,9 @@ class NameSpecification:
 
     Attributes:
         name: The person's name.
-        id: Unique ID for the person that this name refers to.  Defaults to `None`.
-        affiliation: Professional affiliation.  Defaults to `None`.
+        id: Unique ID for the person that this name refers to.
+        orcid: An ORCID that was supplied together with this name.
+        affiliation: Professional affiliation.
         variants: Variant spellings of this name in different scripts.
 
     Note:
@@ -263,6 +266,7 @@ class NameSpecification:
 
     name: Name = field(converter=_Name_from)
     id: Optional[str] = field(default=None, validator=v.optional(v.instance_of(str)))
+    orcid: Optional[str] = field(default=None, validator=v.optional(v.instance_of(str)))
     affiliation: Optional[str] = field(
         default=None, validator=v.optional(v.instance_of(str))
     )
@@ -321,6 +325,7 @@ def from_xml(cls, person: etree._Element) -> NameSpecification:
         return cls(
             Name(first, cast(str, last)),
             id=person.get("id"),
+            orcid=person.get("orcid"),
             affiliation=affiliation,
             variants=variants,
         )
@@ -336,6 +341,8 @@ def to_xml(self, tag: str = "author") -> etree._Element:
         elem = etree.Element(tag)
         if self.id is not None:
             elem.set("id", self.id)
+        if self.orcid is not None:
+            elem.set("orcid", self.orcid)
         elem.extend(
             (
                 E.first(self.first) if self.first else E.first(),
@@ -347,3 +354,21 @@ def to_xml(self, tag: str = "author") -> etree._Element:
         for variant in self.variants:
             elem.append(variant.to_xml())
         return elem
+
+
+class _YAMLName(yaml.YAMLObject):
+    """YAMLObject representing names.
+
+    This exists to serialize names in "flow" style (i.e. one-liner `{first: ..., last: ...}`) without having to force flow style on the entire YAML document.
+    """
+
+    yaml_dumper = Dumper
+    yaml_tag = "tag:yaml.org,2002:map"  # serialize like a dictionary
+    yaml_flow_style = True  # force flow style
+
+    def __init__(self, name: Name) -> None:
+        if name.first is not None:
+            self.first = name.first
+        self.last = name.last
+        if name.script is not None:
+            self.script = name.script
diff --git a/python/acl_anthology/people/person.py b/python/acl_anthology/people/person.py
index be0652b8e9..0b01bb3919 100644
--- a/python/acl_anthology/people/person.py
+++ b/python/acl_anthology/people/person.py
@@ -14,41 +14,99 @@
 
 from __future__ import annotations
 
+import attrs
 from attrs import define, field
-from typing import Iterator, Optional, TYPE_CHECKING
+from enum import Enum
+import itertools as it
+from typing import Any, Iterator, Optional, Sequence, TYPE_CHECKING
+from ..exceptions import AnthologyException, AnthologyInvalidIDError
 from ..utils.attrs import auto_validate_types
-from ..utils.ids import AnthologyIDTuple, build_id_from_tuple
+from ..utils.ids import (
+    AnthologyIDTuple,
+    build_id_from_tuple,
+    is_valid_orcid,
+    is_verified_person_id,
+)
 from . import Name
 
 if TYPE_CHECKING:
+    from . import NameSpecification
     from ..anthology import Anthology
     from ..collections import Paper, Volume
 
 
+class NameLink(Enum):
+    """How a Name was connected to a Person."""
+
+    EXPLICIT = "explicit"
+    """Name is explicitly listed in `people.yaml` file."""
+
+    INFERRED = "inferred"
+    """Name was connected to this Person via slug matching heuristic."""
+
+
+def _name_list_converter(
+    name_list: Sequence[Name | tuple[Name, NameLink]],
+) -> list[tuple[Name, NameLink]]:
+    return [
+        (item, NameLink.EXPLICIT) if isinstance(item, Name) else item
+        for item in name_list
+    ]
+
+
+def _update_person_index(person: Person, attr: attrs.Attribute[Any], value: str) -> str:
+    """Update the [PersonIndex][acl_anthology.people.index.PersonIndex].
+
+    Intended to be called from `on_setattr` of an [attrs.field][].
+    """
+    index = person.parent.people
+    if attr.name == "id":
+        index._update_id(person.id, value)
+    elif attr.name == "orcid":
+        index._update_orcid(person.id, person.orcid, value)
+    return value
+
+
 @define(field_transformer=auto_validate_types)
 class Person:
     """A natural person.
 
     Info:
-        All information about persons is currently derived from [name specifications][acl_anthology.people.name.NameSpecification] on volumes and papers, and not stored explicitly. This means that Person objects **cannot be used to make changes** to Anthology data; change the information on papers instead.
+        The connection between persons and Anthology items is derived from [name specifications][acl_anthology.people.name.NameSpecification] on volumes and papers, and not stored explicitly. This means that Person objects **cannot be used to make changes to paper metadata**, e.g. which person a paper is associated with or under which name; change the information on papers instead.
+
+        Person objects **can** be used to make changes to metadata that appears in `people.yaml`, such as ORCID, comment, degree, and alternative names for this person.
 
     Attributes:
         id: A unique ID for this person.
         parent: The parent Anthology instance to which this person belongs.
-        names: A list of names under which this person has published.
         item_ids: A list of volume and/or paper IDs this person has authored or edited.
-        comment: A comment for disambiguation purposes; can be stored in `name_variants.yaml`.
-        is_explicit: True if this person has names explicitly defined in `name_variants.yaml`.  Note this does _not_ necessarily mean an explicit ID was defined for the person there.
+        orcid: The person's ORCID.
+        comment: A comment for disambiguation purposes.
+        degree: The person's institution of highest degree, for disambiguation purposes.
+        similar_ids: A list of person IDs with names that should be considered similar to this one.  Do **not** use this to _find_ people with similar names; that should be done via [`PersonIndex.similar`][acl_anthology.people.index.PersonIndex].  This attribute can be used to explicitly add more "similar IDs" that are not automatically derived via similar names.
+        disable_name_matching: If True, no items should be assigned to this person unless they explicitly specify this person's ID.
+        is_explicit: If True, this person's ID is explicitly defined in `people.yaml`.  You probably want to use [`make_explicit()`][acl_anthology.people.person.Person.make_explicit] rather than change this attribute.
     """
 
-    id: str = field()
+    id: str = field(
+        on_setattr=attrs.setters.pipe(attrs.setters.validate, _update_person_index)
+    )
     parent: Anthology = field(repr=False, eq=False)
-    names: list[Name] = field(factory=list)
+    _names: list[tuple[Name, NameLink]] = field(
+        factory=list, converter=_name_list_converter
+    )
     item_ids: list[AnthologyIDTuple] = field(
         factory=list, repr=lambda x: f"<list of {len(x)} AnthologyIDTuple objects>"
     )
+    orcid: Optional[str] = field(
+        default=None,
+        on_setattr=attrs.setters.pipe(attrs.setters.validate, _update_person_index),
+    )  # validator defined below
     comment: Optional[str] = field(default=None)
-    is_explicit: Optional[bool] = field(default=False)  # TODO: why can this be None?
+    degree: Optional[str] = field(default=None)
+    similar_ids: list[str] = field(factory=list)
+    disable_name_matching: Optional[bool] = field(default=False, converter=bool)
+    is_explicit: Optional[bool] = field(default=False, converter=bool)
 
     def __eq__(self, other: object) -> bool:
         if not isinstance(other, Person):
@@ -58,6 +116,23 @@ def __eq__(self, other: object) -> bool:
     def __hash__(self) -> int:
         return hash(self.id)
 
+    @orcid.validator
+    def _check_orcid(self, _: Any, value: Optional[str]) -> None:
+        if value is not None and not is_valid_orcid(value):
+            raise ValueError("ORCID is not valid (wrong format or checksum)")
+
+    @property
+    def names(self) -> list[Name]:
+        return [name for (name, _) in self._names]
+
+    @names.setter
+    def names(self, values: list[Name]) -> None:
+        for name, _ in self._names:
+            self.parent.people._remove_name(self.id, name)
+        for name in values:
+            self.parent.people._add_name(self.id, name)
+        self._names = _name_list_converter(values)
+
     @property
     def canonical_name(self) -> Name:
         """
@@ -67,22 +142,60 @@ def canonical_name(self) -> Name:
         try:
             # By convention, the first entry of `self.names` is treated as the
             # canonical entry
-            return self.names[0]
-        except KeyError:
+            return self._names[0][0]
+        except IndexError:
             raise ValueError(f"No names defined for person '{self.id}'")
 
     @canonical_name.setter
     def canonical_name(self, name: Name) -> None:
-        self.set_canonical_name(name)
+        self._set_canonical_name(name)
 
-    def add_name(self, name: Name) -> None:
+    def _set_canonical_name(self, name: Name, inferred: bool = False) -> None:
+        """Set the canonical name for this person.
+
+        Outside of the library, use Person.canonical_name = ...
+
+        Parameters:
+            name: Name that should be treated as canonical for this person.
+            inferred: Marks the canonical name as inferred (used inside the name slug matching algorithm).
+        """
+        link_type = NameLink.INFERRED if inferred else NameLink.EXPLICIT
+        if not self.has_name(name):
+            self._names.insert(0, (name, link_type))
+        else:
+            self._names = [(name, link_type)] + [x for x in self._names if x[0] != name]
+
+    def add_name(self, name: Name, inferred: bool = False) -> None:
         """Add a name for this person.
 
         Parameters:
             name: Name that can refer to this person.
+            inferred: If True, will be marked as `NameLinkingType.INFERRED`, which will e.g. cause this name to not be written to `people.yaml`.  Used when building the [`PersonIndex`][acl_anthology.people.index.PersonIndex] from the XML data; you probably don't want to set this manually.  Defaults to False.
         """
-        if name not in self.names:
-            self.names.append(name)
+        link_type = NameLink.INFERRED if inferred else NameLink.EXPLICIT
+        if not self.has_name(name):
+            self._names.append((name, link_type))
+            self.parent.people._add_name(self.id, name)
+        elif (name, link_type) not in self._names:
+            # ensure that name is re-inserted at same position
+            idx = self.names.index(name)
+            del self._names[idx]
+            self._names.insert(idx, (name, link_type))
+
+    def remove_name(self, name: Name) -> None:
+        """Remove an explicit name for this person.
+
+        Warning:
+            If the name is still used on a paper or volume with the ID of this person, this may result in an Exception during index building.  Names that were implicitly linked to this person cannot be removed this way, as the name would simply reappear on next load.
+
+        Parameters:
+            name: Name that should be removed from this person.
+
+        Raises:
+            ValueError: If this name was not explicitly linked to this person.
+        """
+        self._names.remove((name, NameLink.EXPLICIT))
+        self.parent.people._remove_name(self.id, name)
 
     def has_name(self, name: Name) -> bool:
         """
@@ -92,19 +205,64 @@ def has_name(self, name: Name) -> bool:
         Returns:
             True if the given name can refer to this person.
         """
-        return name in self.names
+        return any(existing_name == name for (existing_name, _) in self._names)
 
-    def set_canonical_name(self, name: Name) -> None:
-        """Set the canonical name for this person.
+    def make_explicit(self, new_id: str) -> None:
+        """Turn this person that was implicitly created into an explicitly-represented one.
+
+        This will result in this person having an explicit entry in `people.yaml` with all names that are currently associated with this person.  It will also add their new explicit ID to all papers and volumes currently associated with this person.
 
         Parameters:
-            name: Name that should be treated as canonical for this person.
+            new_id: The new ID for this person, which must match [`RE_VERIFIED_PERSON_ID`][acl_anthology.utils.ids.RE_VERIFIED_PERSON_ID].
+
+        Raises:
+            AnthologyException: If `self.explicit` is already True.
+            ValueError: If the supplied ID is not valid, or if it already exists in the PersonIndex.
         """
-        try:
-            self.names.pop(self.names.index(name))
-        except ValueError:
-            pass
-        self.names.insert(0, name)
+        if self.is_explicit:
+            raise AnthologyException("Person is already explicit")
+
+        self.is_explicit = True
+        self.update_id(new_id)
+        self._names = [(name, NameLink.EXPLICIT) for name, _ in self._names]
+
+    def update_id(self, new_id: str) -> None:
+        """Update the ID of this person, including on all of their associated papers.
+
+        In contrast to simply changing the `id` attribute, this function will go through all associated papers and update the ID attribute there.
+
+        Parameters:
+            new_id: The new ID for this person, which must match [`RE_VERIFIED_PERSON_ID`][acl_anthology.utils.ids.RE_VERIFIED_PERSON_ID].
+
+        Raises:
+            AnthologyException: If `self.is_explicit` is False.
+            AnthologyInvalidIDError: If the supplied ID is not valid, or if it already exists in the PersonIndex.
+        """
+        if not self.is_explicit:
+            exc = AnthologyException("Can only update ID for explicit person")
+            exc.add_note("Did you want to use make_explicit() instead?")
+            raise exc
+        if not is_verified_person_id(new_id):
+            raise AnthologyInvalidIDError(
+                new_id, f"Not a valid verified-person ID: {new_id}"
+            )
+
+        old_id = self.id
+
+        def namespec_refers_to_self(namespec: NameSpecification) -> bool:
+            if is_verified_person_id(old_id):
+                return namespec.id == old_id
+            return namespec.id is None and self.has_name(namespec.name)
+
+        self.id = new_id  # will update PersonIndex
+        for paper in self.papers():
+            for namespec in it.chain(paper.authors, paper.editors):
+                if namespec_refers_to_self(namespec):
+                    namespec.id = new_id
+        for volume in self.volumes():
+            for namespec in volume.editors:
+                if namespec_refers_to_self(namespec):
+                    namespec.id = new_id
 
     def papers(self) -> Iterator[Paper]:
         """Returns an iterator over all papers associated with this person.
@@ -119,7 +277,7 @@ def papers(self) -> Iterator[Paper]:
                 if paper is None:
                     raise ValueError(
                         f"Person {self.id} lists associated paper {build_id_from_tuple(anthology_id)}, which doesn't exist"
-                    )
+                    )  # pragma: no cover
                 yield paper
 
     def volumes(self) -> Iterator[Volume]:
@@ -131,5 +289,5 @@ def volumes(self) -> Iterator[Volume]:
                 if volume is None:
                     raise ValueError(
                         f"Person {self.id} lists associated volume {build_id_from_tuple(anthology_id)}, which doesn't exist"
-                    )
+                    )  # pragma: no cover
                 yield volume
diff --git a/python/acl_anthology/text/markuptext.py b/python/acl_anthology/text/markuptext.py
index 18f18fbd2d..6d3aca6087 100644
--- a/python/acl_anthology/text/markuptext.py
+++ b/python/acl_anthology/text/markuptext.py
@@ -241,7 +241,8 @@ def to_xml(self, tag: str = "span") -> etree._Element:
         """
         if isinstance(self._content, str):
             element = etree.Element(tag)
-            element.text = self._content
+            if self._content:
+                element.text = self._content
         else:
             element = deepcopy(self._content)
             element.tag = tag
diff --git a/python/acl_anthology/utils/ids.py b/python/acl_anthology/utils/ids.py
index 28812f3e10..1cc5386981 100644
--- a/python/acl_anthology/utils/ids.py
+++ b/python/acl_anthology/utils/ids.py
@@ -1,4 +1,4 @@
-# Copyright 2023-2024 Marcel Bollmann <marcel@bollmann.me>
+# Copyright 2023-2025 Marcel Bollmann <marcel@bollmann.me>
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,6 +14,7 @@
 
 """Functions for manipulating Anthology IDs."""
 
+import functools
 import re
 from typing import Optional
 
@@ -30,6 +31,12 @@
 RE_ITEM_ID = re.compile(r"[a-z0-9]+")
 """A regular expression matching any valid volume or paper ID."""
 
+RE_VERIFIED_PERSON_ID = re.compile(r"[a-z][\-a-z0-9]+")
+"""A regular expression matching any valid verified person ID."""
+
+RE_ORCID = re.compile(r"[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{3}[0-9X]")
+"""A regular expression matching any string that looks like an ORCID."""
+
 
 def build_id(
     collection_id: str, volume_id: Optional[str] = None, paper_id: Optional[str] = None
@@ -200,6 +207,34 @@ def is_valid_item_id(id_: str) -> bool:
     return RE_ITEM_ID.fullmatch(id_) is not None
 
 
+def is_valid_orcid(orcid: str) -> bool:
+    """Validate that a string looks like an ORCID and has the correct checksum.
+
+    Returns:
+        True if the ORCID validates, False otherwise.
+    """
+    if RE_ORCID.fullmatch(orcid) is None:
+        return False
+    # <https://support.orcid.org/hc/en-us/articles/360006897674-Structure-of-the-ORCID-Identifier>
+    total = functools.reduce(
+        lambda x, y: (x + int(y)) * 2, orcid[:-1].replace("-", ""), 0
+    )
+    checksum = (12 - (total % 11)) % 11
+    return orcid[-1] == str(checksum) if checksum < 10 else orcid[-1] == "X"
+
+
+def is_verified_person_id(id_: str) -> bool:
+    """Validate that a string is formatted like a verified person ID.
+
+    Returns:
+        True if this ID can refer to a verified person.
+
+    Warning:
+        Does not perform any kind of input validation.
+    """
+    return RE_VERIFIED_PERSON_ID.fullmatch(id_) is not None
+
+
 def infer_year(anthology_id: AnthologyID) -> str:
     """Infer the year from an Anthology ID.
 
diff --git a/python/acl_anthology/utils/logging.py b/python/acl_anthology/utils/logging.py
index a6e5a95044..964640d6ac 100644
--- a/python/acl_anthology/utils/logging.py
+++ b/python/acl_anthology/utils/logging.py
@@ -15,8 +15,9 @@
 """Functions for convenient logging."""
 
 import logging
+from rich.console import Console
 from rich.logging import RichHandler
-from typing import cast
+from typing import cast, Optional
 from ..config import config
 
 
@@ -46,7 +47,9 @@ def emit(self, record: logging.LogRecord) -> None:
             self.highest = record.levelno
 
 
-def setup_rich_logging(**kwargs: object) -> SeverityTracker:
+def setup_rich_logging(
+    console: Optional[Console] = None, **kwargs: object
+) -> SeverityTracker:
     """Set up a logger that uses rich markup and severity tracking.
 
     This function is intended to be called in a script. It calls [logging.basicConfig][] and is therefore not executed by default, as applications may wish to setup their loggers differently.
@@ -57,6 +60,8 @@ def setup_rich_logging(**kwargs: object) -> SeverityTracker:
     Returns:
         The severity tracker, so that it can be used to check the highest emitted log level.
     """
+    if console is None:
+        console = Console(stderr=True)
     log_config: dict[str, object] = dict(
         level="NOTSET",
         format="%(message)s",
@@ -65,7 +70,9 @@ def setup_rich_logging(**kwargs: object) -> SeverityTracker:
     )
     log_config.update(kwargs)
     tracker = SeverityTracker()
-    cast(list[logging.Handler], log_config["handlers"]).extend([RichHandler(), tracker])
+    cast(list[logging.Handler], log_config["handlers"]).extend(
+        [RichHandler(console=console), tracker]
+    )
     logging.basicConfig(**log_config)  # type: ignore
     logging.captureWarnings(True)
     return tracker
diff --git a/python/docs/guide/modifying-data.md b/python/docs/guide/modifying-data.md
index b085684803..ab1c21db7d 100644
--- a/python/docs/guide/modifying-data.md
+++ b/python/docs/guide/modifying-data.md
@@ -16,7 +16,9 @@ are some rules of thumb when making modifications to the data:
    whenever possible, rather than instantiating them directly.
 2. You can modify objects by simply modifying their attributes, as long as the
    object in question has an explicit representation in the Anthology data.
-    - This includes collections, volumes, papers, events, but not e.g. persons.
+    - This includes collections, volumes, papers, events.
+    - It also includes persons where `Person.is_explicit == True`, as those have
+      an explicit representation in `people.yaml`.
 3. Saving data is always non-destructive and will avoid introducing unnecessary
    changes (e.g. no needless reordering of XML tags).  {==This is currently only
    true & tested for XML files, not for the YAML files.==}
@@ -39,6 +41,10 @@ just fetch the paper and set its `doi` attribute:
 >>> paper.doi = '10.18653/v1/2022.acl-long.99'
 ```
 
+!!! tip "Rule of thumb"
+
+    For all `collections` objects, setting attributes should either raise a `TypeError`, or "do the right thing."  However, be careful when modifying _list_ attributes in-place, as no input validation is performed in that case.
+
 ### Simple attributes
 
 Attributes generally perform **input validation**.  For example, since a paper's
@@ -65,9 +71,6 @@ date of ingestion is stored as a string, but the following will also work:
 '2025-01-08'
 ```
 
-**As a general rule, setting attributes of `collections` objects should either
-raise a `TypeError`, or "do the right thing."**
-
 ### List attributes
 
 List attributes can be modified the same way as other attributes; for example,
@@ -80,26 +83,24 @@ to the author list:
 >>> paper.authors.append(spec)
 ```
 
-To change an existing author's name, you just need to remember that names are
-immutable:
+To change an existing author's name, you just need to remember that **names are
+immutable**:
 
 ```pycon
 >>> paper.authors[0].name.first = "Marc Marcel"             # will NOT work
 >>> paper.authors[0].name = Name("Bollmann, Marc Marcel")   # works
 ```
 
-!!! danger
-
-    Input validation or conversion cannot be done when modifying mutable
-    attributes such as lists (only when _setting_ them).  That means you won't
-    get an (immediate) error if you e.g. append the wrong type of object to a
-    list attribute.
+There is **no input validation or conversion** when modifying mutable attributes
+such as lists (only when _setting_ them).  That means you won't get an
+immediate error if you e.g. append the wrong type of object to a list
+attribute!
 
 ### Things to keep in mind
 
 #### Citation keys
 If a paper's title or author list has changed, you might want to recreate its
-citation key (or 'bibkey').  This can be done by simply calling
+citation key (or 'bibkey').  This can be done by calling
 [`Paper.refresh_bibkey()`][acl_anthology.collections.paper.Paper.refresh_bibkey].
 If the auto-generated bibkey is identical to the current one, the bibkey will
 not change.
@@ -123,45 +124,156 @@ the new data.
 
 ## Modifying people
 
-{==TODO==}
+A person can be _explicit_ (has an entry in `people.yaml`) or _inferred_ (was instantiated from a name specification without an ID).  To make modifications to persons, it is important to remember that:
+
+1. Only an _explicit_ person's attributes can be meaningfully modified.
+
+2. Changing which person a paper/volume is assigned to should be done by modifying the name specification on the paper/volume, not by changing anything on the Person object.
+
+??? info "A note on terminology"
+
+    Within the library, the term **explicit** refers to a person that has an entry in `people.yaml`, whereas **inferred** refers to a person that was instantiated automatically while loading the XML data files (and has no entry in `people.yaml`).
+
+    Currently, all inferred persons have IDs beginning with `unverified/`, while IDs of explicit persons _must not_ begin with `unverified/`.
+
+    In practice, this means that "inferred" persons are currently equivalent to "unverified" persons, but the library intentionally uses terminology that is agnostic to the semantics of the ID.  If the semantics of whom we consider "(un)verified" change, the terminology in the library needn't change, as it only refers to the technical aspect of where the ID came from (`people.yaml` vs. implicit instantiation).
+
+### Creating a new person
+
+Manually creating a new person (that will get saved to `people.yaml` and can
+have an ORCID and other metadata) can be done in two ways:
+
+1. By calling [`PersonIndex.create_person()`][acl_anthology.people.index.PersonIndex.create_person].  The returned Person is _not_ linked to any papers/volumes, but you can set their ID afterwards on name specifications.
+
+2. By calling [`make_explicit()`][acl_anthology.people.person.Person.make_explicit] on a currently _inferred_ person.  This will not only add this person to the database, but also **set their ID on all papers/volumes** currently associated with them.
+
+### Example: Merging two persons
+
+**Situation:** An author has published under multiple names, and therefore two separate persons get instantiated for them (let's call them `p1` and `p2`).  We want to merge them into a single person.
+
+1. If neither `p1` nor `p2` are _explicit_: Call [`p1.make_explicit()`][acl_anthology.people.person.Person.make_explicit].  This will create an entry in `people.yaml` with all current names of `p1` and add the new ID to all papers and volumes currently inferred to belong to `p1`.
+
+2. Iterate through `p2.papers()` and `p2.volumes()` {==(TODO: a function to iterate through all items, no matter the type)==} and add `p1`'s new ID to the name specifications that are currently resolved to `p2`.  {==TODO: It's currently a bit tricky to find the _name specification_ referring to a person; should add a function for this.==}
+
+3. Save both the PersonIndex and the changed collections. {==TODO: The library current cannot track which collections have actually changed, so there is no "save all" function yet.==}
+
+### Example: Disambiguating a person
+
+**Situation:** A person `p1` is currently associated with papers/volumes that actually belong to different people, who just happened to publish under the same name.  We want to create a new person instance for the other author with the same name.
+
+1. Call [`anthology.people.create_person()`][acl_anthology.people.index.PersonIndex.create_person] for all persons who do not have an explicit ID yet, giving all the names that can refer to this person.  Also supply the ORCID when calling this function, if it is known.
+
+2. For each person, iterate through the papers that actually belong to them and update the name specification that currently resolves to `p1` by setting the explicit ID of the correct newly-created person.  {==TODO: Same as above: It's currently a bit tricky to find the _name specification_ referring to a person; should add a function for this.==}
 
 
 ## Ingesting new proceedings
 
-{==TODO==}
+Proceedings can be ingested almost entirely via functionality from this library;
+in particular, no data files (XML or YAML) need to be saved manually.  _(The
+only functionality that is currently not part of this library is the fixed-caser
+for paper titles, which is described below.)_
 
 ### New collections, volumes, and papers
 
 Creating new objects from `acl_anthology.collections` should be done with
-`create_` functions from their respective parent objects.  Here is a minimal
-example to create a new paper in an entirely new collection:
+`create_` functions from their respective parent objects.
 
-```python
-collection = anthology.create_collection("2049.acl")
-volume = collection.create_volume(
-    id="long",
-    title=MarkupText.from_string("Proceedings of the ..."),
-)
-paper = volume.create_paper(
-    title=MarkupText.from_string("GPT-5000 is all you need")
-)
-```
-
-All attributes that can be set on these objects can also be supplied as keyword
-parameters to the `create_` functions; alternatively, they can be set on the
-object after it has been created.
-
-Some required attributes don't _need_ to be supplied on these functions:
+All attributes that can be set on these objects (Volumes, Papers, etc.) can also
+be supplied as keyword parameters to the `create_` functions.  Some required
+attributes don't _need_ to be supplied here:
 
+- A Paper's `id` will be set to the next-highest numeric ID that doesn't already
+  exist in the volume, starting at `"1"`.
+- A Paper's `bibkey` will be automatically generated if not explicitly set.
 - A Volume's `year` attribute will be derived from the collection ID (e.g.,
   `"2049"` in a collection with ID `"2049.acl"`).
 - A Volume's `type` will default to
   [PROCEEDINGS][acl_anthology.collections.types.VolumeType].
-- A Paper's `id` will be set to the next-highest numeric ID that doesn't already
-  exist in the volume, starting at `"1"`.
-- A Paper's `bibkey` will be automatically generated if not explicitly set.
-  (But if you didn't supply an `authors` list when creating the paper, you will
-  want to call `refresh_bibkey()` on the Paper after setting the authors.)
+
+However, it is **strongly recommended to supply the author/editor list** when
+calling a `create_` function, as this will resolve person IDs and create correct
+bibkeys automatically.
+
+!!! example
+
+    Here is an example for how to create a new paper in an entirely new collection:
+
+    ```python
+    collection = anthology.create_collection("2049.acl")
+    volume = collection.create_volume(
+        id="long",
+        title=MarkupText.from_latex_maybe("Proceedings of the ..."),
+        venue_ids=["acl"],
+    )
+    paper = volume.create_paper(
+        title=MarkupText.from_latex_maybe("GPT$^{\\infty}$ is all you need"),
+        authors=[NameSpecification(first="John", last="Doe")],
+    )
+    ```
+
+    When all volumes and papers have been added, the XML file is written by calling:
+
+    ```python
+    collection.save()
+    ```
+
+??? info "If you don't supply an author list here..."
+
+    If you don't supply `authors` or `editors` when calling a `create_` function, or you need to modify those afterwards for some reason, you will need to perform these steps manually (which are otherwise handled by the `create_` function):
+
+    - Call `anthology.people.ingest_namespec()` on each NameSpecification.
+    - Call `refresh_bibkey()` on the Paper.
+
+### Specifying titles and abstracts
+
+Paper titles and abstracts always need to be **supplied as [MarkupText][acl_anthology.text.markuptext.MarkupText]**.  Simple strings can be instantiated with [`MarkupText.from_string()`][acl_anthology.text.markuptext.MarkupText.from_string].  For titles and abstracts containing LaTeX commands, [`MarkupText.from_latex()`][acl_anthology.text.markuptext.MarkupText.from_latex] can be used.  In practice, however, it may be unknown if text actually contains LaTeX markup.  In that case, using [`MarkupText.from_latex_maybe()`][acl_anthology.text.markuptext.MarkupText.from_latex_maybe] may be preferable, which will e.g. prevent percentage signs `%` from being interpreted as starting a LaTeX comment, and apply a heuristic to decide if a tilde `~` should be interpreted as a literal character or as a LaTeX non-breaking space.  {==TODO: We might want to make `as_latex_maybe()` the default instantiator for MarkupText, which would greatly simplify the instantiation of this in what is probably the most common use case.==}
+
+Paper titles should also have our **fixed-casing algorithm** applied to protect certain characters e.g. by wrapping them in braces in BibTeX entries.  **The fixed-caser is currently not part of this Python library.**  There are two options for running the fixed-casing on a new ingestion:
+
+1. _Outside the ingestion script:_ Run [`bin/fixedcase/protect.py`](https://github.com/acl-org/acl-anthology/blob/master/bin/fixedcase/protect.py) on the new XML files produced by the ingestion script.
+
+2. _Within the ingestion script:_ Convert titles to XML, run `fixedcase.protect()`, then set the title again from the modified XML element:
+
+    ```python
+    import fixedcase
+
+    xml_title = paper.title.to_xml("title")
+    fixedcase.protect(xml_title)
+    paper.title = MarkupText.from_xml(xml_title)
+    ```
+
+
+### Specifying authors
+
+Authors need to be specified by creating [name
+specifications](accessing-authors.md#name-specifications), for example:
+
+```python
+NameSpecification(Name("Marcel", "Bollmann"), orcid="0000-0003-2598-8150")
+```
+
+If an ORCID is supplied, the NameSpecification also needs to have an explicit ID
+referring to an entry in `people.yaml`.  **The library can add an ID
+automatically** as long as you supply the author/editor list to the `create_`
+function, so there is typically **no need to call `create_person()`** during
+ingestion!
+
+!!! example
+
+    If you create a paper in the following way...
+
+    ```python
+    paper = volume.create_paper(
+        title=MarkupText.from_string("The past and future of the ACL Anthology"),
+        authors=[NameSpecification(Name("Marcel", "Bollmann"), orcid="0000-0003-2598-8150")],
+    )
+    ```
+
+    ...the name specification will automatically be updated with an ID referring to this person in one of two ways:
+
+    - If a person with this ORCID already exists in `people.yaml`, their ID will be filled in.
+    - If a person with this ORCID does not exist in `people.yaml`, a new entry with this ORCID will be added to `people.yaml` with an auto-generated person ID.  The ID is a slug of the person's name; if necessary to avoid an ID clash, the last four digits of their ORCID will be appended.
+
 
 ### New events
 
@@ -187,18 +299,11 @@ the gory details), it's best to ensure that:
    `event.add_colocated(volume)`.
 
 
-### Parsing markup
+### Connecting to venues and SIGs
 
-MarkupText can be instantiated from strings representing LaTeX via
-[`MarkupText.from_latex()`][acl_anthology.text.markuptext.MarkupText.from_latex].
-This can be useful for titles and abstracts if they contain LaTeX commands, but
-in practice, it may be unknown if they actually do.  In that case, using
-[`MarkupText.from_latex_maybe()`][acl_anthology.text.markuptext.MarkupText.from_latex_maybe]
-may be preferable, which will e.g. prevent percentage signs `%` from being
-interpreted as starting a LaTeX comment, and apply a heuristic to decide if a
-tilde `~` should be interpreted as a literal character or as a LaTeX
-non-breaking space.
+Volumes can be connected to venues by modifying the volume's `venue_ids` list. {==TODO: adding new venues==}
 
+{==TODO: connecting to SIGs; we may want to refactor how SIGs are represented before introducing this functionality.==}
 
 
 ## Saving changes
@@ -213,4 +318,7 @@ non-breaking space.
   non-destructive through [integration tests on the entire Anthology
   data](https://github.com/acl-org/acl-anthology/blob/master/python/tests/anthology_integration_test.py).
 
-{==TODO: changes to YAML files, `Anthology.save_all()`, etc. ==}
+- **Changes to the person database (`people.yaml`)** can be saved by calling
+  [`PersonIndex.save()`][acl_anthology.people.index.PersonIndex.save].
+
+{==TODO: changes to other YAML files, `Anthology.save_all()`, etc. ==}
diff --git a/python/mkdocs.yml b/python/mkdocs.yml
index 0539c2f47a..7525e533bb 100644
--- a/python/mkdocs.yml
+++ b/python/mkdocs.yml
@@ -9,6 +9,7 @@ markdown_extensions:
   - footnotes
   - pymdownx.betterem
   - pymdownx.critic
+  - pymdownx.details
   - pymdownx.smartsymbols
   - pymdownx.superfences:
       custom_fences:
diff --git a/python/tests/anthology_integration_test.py b/python/tests/anthology_integration_test.py
index a69f4bf1d0..eba941ca60 100644
--- a/python/tests/anthology_integration_test.py
+++ b/python/tests/anthology_integration_test.py
@@ -69,6 +69,22 @@ def test_full_anthology_should_validate_schema(full_anthology):
         collection.validate_schema()
 
 
+@pytest.mark.integration
+def test_full_anthology_roundtrip_people_yaml(full_anthology, tmp_path):
+    full_anthology.people.build()
+    yaml_in = full_anthology.people.path
+    yaml_out = tmp_path / "people.yaml"
+    full_anthology.people.save(yaml_out)
+    assert yaml_out.is_file()
+    with (
+        open(yaml_in, "r", encoding="utf-8") as f,
+        open(yaml_out, "r", encoding="utf-8") as g,
+    ):
+        expected = f.read()
+        out = g.read()
+    assert out == expected
+
+
 @pytest.mark.integration
 @pytest.mark.parametrize("minimal_diff", (True, False))
 def test_full_anthology_roundtrip_xml(
diff --git a/python/tests/anthology_test.py b/python/tests/anthology_test.py
index 275ea83a01..094019a652 100644
--- a/python/tests/anthology_test.py
+++ b/python/tests/anthology_test.py
@@ -142,10 +142,10 @@ def test_get_event(anthology):
 
 
 def test_get_person(anthology):
-    person = anthology.get_person("yang-liu-edinburgh")
+    person = anthology.get_person("yang-liu-microsoft")
     assert person is not None
     assert person.canonical_name == Name("Yang", "Liu")
-    assert person.comment == "Edinburgh"
+    assert person.comment == "Microsoft Cognitive Services Research"
 
 
 def test_find_people(anthology):
diff --git a/python/tests/collections/paper_test.py b/python/tests/collections/paper_test.py
index d28a0e1f14..805001c7d6 100644
--- a/python/tests/collections/paper_test.py
+++ b/python/tests/collections/paper_test.py
@@ -17,6 +17,7 @@
 import pytest
 from acl_anthology.collections import CollectionIndex
 from acl_anthology.collections.types import PaperType, VolumeType
+from acl_anthology.exceptions import AnthologyXMLError
 from acl_anthology.files import AttachmentReference, PDFReference
 from acl_anthology.people import NameSpecification
 from acl_anthology.text import MarkupText
@@ -35,6 +36,7 @@
 class VolumeStub:
     title = MarkupText.from_string("Generic volume")
     editors = []
+    full_id_tuple = ("2099", "stub", None)
 
 
 @pytest.fixture
@@ -168,7 +170,7 @@ def test_paper_remove_author(anthology):
     paper = anthology.get_paper("2022.acl-demo.2")
     ns = paper.authors[-1]
     person = anthology.resolve(ns)
-    assert person.id == "iryna-gurevych"
+    assert person.id == "unverified/iryna-gurevych"
     assert paper.full_id_tuple in person.item_ids
 
     # Removing last author from paper
@@ -205,6 +207,7 @@ def test_paper_add_author(anthology):
   <title>Strings from neurons to language</title>
   <author><first>Tim</first><last>Fernando</last></author>
   <pages>1–10</pages>
+  <abstract/>
   <url hash="61daae5b">2022.naloma-1.1</url>
   <bibkey>fernando-2022-strings</bibkey>
 </paper>
@@ -271,6 +274,18 @@ def test_paper_roundtrip_xml(xml):
     assert etree.tostring(out, encoding="unicode") == xml
 
 
+def test_paper_from_xml_invalid_tag():
+    xml = """<paper id="9">
+  <title>Briefly Noted</title>
+  <speaker><first>John</first><last>Doe</last></speaker>
+  <url hash="166bd6c1">J89-1009</url>
+  <bibkey>nn-1989-briefly</bibkey>
+</paper>
+"""
+    with pytest.raises(AnthologyXMLError):
+        Paper.from_xml(VolumeStub(), etree.fromstring(xml))
+
+
 test_cases_paper_to_bibtex = (
     (
         "2022.acl-long.268",
diff --git a/python/tests/collections/volume_test.py b/python/tests/collections/volume_test.py
index 275cc3abb5..f57d7d65c0 100644
--- a/python/tests/collections/volume_test.py
+++ b/python/tests/collections/volume_test.py
@@ -463,7 +463,7 @@ def test_volume_remove_editor(anthology):
     volume = anthology.get_volume("2022.acl-long")
     ns = volume.editors[1]
     person = anthology.resolve(ns)
-    assert person.id == "preslav-nakov"
+    assert person.id == "unverified/preslav-nakov"
     assert volume.full_id_tuple in person.item_ids
 
     # Removing editor from volume
diff --git a/python/tests/conftest.py b/python/tests/conftest.py
index afbd96201b..2b8dd7d261 100644
--- a/python/tests/conftest.py
+++ b/python/tests/conftest.py
@@ -16,6 +16,7 @@
 import itertools as it
 import pytest
 import reprlib
+from unittest.mock import Mock
 
 pytest.register_assert_rewrite("acl_anthology.utils.xml")
 
@@ -24,6 +25,7 @@
 
 class AnthologyStub:
     datadir = None
+    people = Mock()
 
 
 @pytest.fixture
diff --git a/python/tests/data/anthology/xml/1989.cl.xml b/python/tests/data/anthology/xml/1989.cl.xml
index a7523884a8..723e126dcf 100644
--- a/python/tests/data/anthology/xml/1989.cl.xml
+++ b/python/tests/data/anthology/xml/1989.cl.xml
@@ -1,3 +1,2 @@
 <?xml version='1.0' encoding='UTF-8'?>
-<collection id="1989.cl">
-</collection>
+<collection id="1989.cl"/>
diff --git a/python/tests/data/anthology/xml/2022.acl.xml b/python/tests/data/anthology/xml/2022.acl.xml
index 3e9ee6f0b5..46d3d6c1e7 100644
--- a/python/tests/data/anthology/xml/2022.acl.xml
+++ b/python/tests/data/anthology/xml/2022.acl.xml
@@ -4363,7 +4363,7 @@ in the Case of Unambiguous Gender</title>
       <author><first>Zoey</first><last>Liu</last></author>
       <author><first>Crystal</first><last>Richardson</last></author>
       <author><first>Richard</first><last>Hatcher</last></author>
-      <author><first>Emily</first><last>Prud’hommeaux</last></author>
+      <author id="emily-prudhommeaux"><first>Emily</first><last>Prud’hommeaux</last></author>
       <pages>3933-3944</pages>
       <abstract>Languages are classified as low-resource when they lack the quantity of data necessary for training statistical and machine learning tools and models. Causes of resource scarcity vary but can include poor access to technology for developing these resources, a relatively small population of speakers, or a lack of urgency for collecting such resources in bilingual populations where the second language is high-resource. As a result, the languages described as low-resource in the literature are as different as Finnish on the one hand, with millions of speakers using it in every imaginable domain, and Seneca, with only a small-handful of fluent speakers using the language primarily in a restricted domain. While issues stemming from the lack of resources necessary to train models unite this disparate group of languages, many other issues cut across the divide between widely-spoken low-resource languages and endangered languages. In this position paper, we discuss the unique technological, cultural, practical, and ethical challenges that researchers and indigenous speech community members face when working together to develop language technology to support endangered language documentation and revitalization. We report the perspectives of language teachers, Master Speakers and elders from indigenous communities, as well as the point of view of academics. We describe an ongoing fruitful collaboration and make recommendations for future partnerships between academic researchers and language community stakeholders.</abstract>
       <url hash="07da4fea">2022.acl-long.272</url>
diff --git a/python/tests/data/anthology/yaml/name_variants.yaml b/python/tests/data/anthology/yaml/name_variants.yaml
deleted file mode 100644
index a3a2b9ff80..0000000000
--- a/python/tests/data/anthology/yaml/name_variants.yaml
+++ /dev/null
@@ -1,37 +0,0 @@
-- canonical: {first: Pranav, last: A}
-  comment: UC Santa Cruz
-  id: pranav-a
-  similar: [pranav-anand]
-- canonical: {first: Pranav, last: Anand}
-  comment: Dayta AI
-  id: pranav-anand
-  similar: [pranav-a]
-- canonical: {first: Yang, last: Liu}
-  comment: Edinburgh
-  id: yang-liu-edinburgh
-- canonical: {first: Yang, last: Liu}
-  comment: 刘扬; Ph.D Purdue; ICSI, Dallas, Facebook, Liulishuo, Amazon
-  id: yang-liu-icsi
-- canonical: {first: Yang, last: Liu}
-  comment: 刘洋; ICT, Tsinghua, Beijing Academy of Artificial Intelligence
-  id: yang-liu-ict
-- canonical: {first: Yang, last: Liu}
-  comment: Microsoft Cognitive Services Research
-  id: yang-liu-microsoft
-- canonical: {first: José M., last: Guirao}
-  variants:
-  - {first: José María, last: Guirao}
-- canonical: {first: Steven, last: Krauwer}
-  id: steven-krauwer
-- canonical: {first: Susan, last: Armstrong}
-  variants:
-  - {first: Susan, last: Warwick-Armstrong}
-  - {first: Susan, last: Warwick}
-- canonical: {first: Emily, last: Prud’hommeaux}
-  variants:
-  - {first: Emily T., last: Prud’hommeaux}
-  - {first: Emily, last: Prud'hommeaux}
-- canonical: {first: Srinivas, last: Bangalore}
-  variants:
-  - {first: B., last: Srinivas}
-  - {first: '', last: Srinivas}
diff --git a/python/tests/data/anthology/yaml/people.yaml b/python/tests/data/anthology/yaml/people.yaml
new file mode 100644
index 0000000000..f14543583c
--- /dev/null
+++ b/python/tests/data/anthology/yaml/people.yaml
@@ -0,0 +1,36 @@
+emily-prudhommeaux:
+  names:
+  - {first: Emily, last: Prud’hommeaux}
+  - {first: Emily T., last: Prud’hommeaux}
+  - {first: Emily, last: Prud'hommeaux}
+marcel-bollmann:
+  names:
+  - {first: Marcel, last: Bollmann}
+  orcid: 0000-0003-2598-8150
+pranav-a:
+  comment: UC Santa Cruz
+  names:
+  - {first: Pranav, last: A}
+  similar:
+  - pranav-anand
+pranav-anand:
+  comment: Dayta AI
+  disable_name_matching: true
+  names:
+  - {first: Pranav, last: Anand}
+steven-krauwer:
+  names:
+  - {first: Steven, last: Krauwer}
+  - {first: S., last: Krauwer}
+yang-liu-icsi:
+  comment: 刘扬; Ph.D Purdue; ICSI, Dallas, Facebook, Liulishuo, Amazon
+  names:
+  - {first: Yang, last: Liu}
+yang-liu-ict:
+  comment: 刘洋; ICT, Tsinghua, Beijing Academy of Artificial Intelligence
+  names:
+  - {first: Yang, last: Liu}
+yang-liu-microsoft:
+  comment: Microsoft Cognitive Services Research
+  names:
+  - {first: Yang, last: Liu}
diff --git a/python/tests/people/name_test.py b/python/tests/people/name_test.py
index a12ee5200a..131b7d79ba 100644
--- a/python/tests/people/name_test.py
+++ b/python/tests/people/name_test.py
@@ -141,6 +141,12 @@ def test_name_spec_to_xml_onlylast():
     assert etree.tostring(element, encoding="unicode") == xml
 
 
+def test_name_spec_to_xml_with_id_and_orcid():
+    xml = '<editor id="mausam" orcid="0000-0003-4088-4296"><first/><last>Mausam</last></editor>'
+    element = NameSpecification.from_xml(etree.fromstring(xml)).to_xml("editor")
+    assert etree.tostring(element, encoding="unicode") == xml
+
+
 def test_name_variant_from_xml():
     xml = """
         <variant script="hani">
diff --git a/python/tests/people/person_test.py b/python/tests/people/person_test.py
index 1c0a81c67d..ec1c376da0 100644
--- a/python/tests/people/person_test.py
+++ b/python/tests/people/person_test.py
@@ -12,7 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from acl_anthology.people import Name, Person
+import pytest
+from acl_anthology.exceptions import AnthologyException, AnthologyInvalidIDError
+from acl_anthology.people import Name, NameLink, Person
 
 
 def test_person_names(anthology_stub):
@@ -26,22 +28,22 @@ def test_person_names(anthology_stub):
     assert not person.has_name(n3)
 
 
-def test_person_canonical_names(anthology_stub):
+def test_person_canonical_name(anthology_stub):
     n1 = Name("Yang", "Liu")
     n2 = Name("Y.", "Liu")
     person = Person("yang-liu", anthology_stub, [n1, n2])
     assert person.canonical_name == n1
-    person.set_canonical_name(n2)
+    person.canonical_name = n2
     assert person.canonical_name == n2
     assert len(person.names) == 2
 
 
-def test_person_add_names(anthology_stub):
+def test_person_add_name(anthology_stub):
     n1 = Name("Yang", "Liu")
     n2 = Name("Y.", "Liu")
     person = Person("yang-liu", anthology_stub, [n1])
     assert person.canonical_name == n1
-    person.set_canonical_name(n2)
+    person.canonical_name = n2
     assert person.canonical_name == n2
     assert len(person.names) == 2
     n3 = Name("Yang X.", "Liu")
@@ -50,14 +52,131 @@ def test_person_add_names(anthology_stub):
     assert len(person.names) == 3
 
 
-def test_person_papers(anthology):
-    person = anthology.get_person("nicoletta-calzolari")
+def test_person_remove_name(anthology_stub):
+    n1 = Name("Yang", "Liu")
+    n2 = Name("Y.", "Liu")
+    person = Person("yang-liu", anthology_stub, [n1, n2])
+    assert person.has_name(n2)
+    person.remove_name(n2)
+    assert not person.has_name(n2)
+    assert len(person.names) == 1
+
+
+def test_person_names_explicit_vs_inferred(anthology_stub):
+    n1 = Name("Yang", "Liu")
+    n2 = Name("Y.", "Liu")
+    person = Person("yang-liu", anthology_stub, [n1])
+    assert (n1, NameLink.EXPLICIT) in person._names
+    person.canonical_name = n2
+    assert (n2, NameLink.EXPLICIT) in person._names
+    n3 = Name("Yang X.", "Liu")
+    person.add_name(n3, inferred=True)
+    assert (n3, NameLink.INFERRED) in person._names
+
+
+def test_person_add_name_explicit_vs_inferred(anthology_stub):
+    n1 = Name("Yang", "Liu")
+    n2 = Name("Y.", "Liu")
+    n3 = Name("Yang X.", "Liu")
+    person = Person("yang-liu", anthology_stub, [n1])
+    person.add_name(n2, inferred=True)
+    person.add_name(n3, inferred=False)
+    assert person._names[1] == (n2, NameLink.INFERRED)
+    assert person._names[2] == (n3, NameLink.EXPLICIT)
+    # Calling add_name() with an existing name, but different "inferred" flag,
+    # should overwrite the NameLink value but keep the name in the same position
+    person.add_name(n2, inferred=False)
+    assert person._names[1] == (n2, NameLink.EXPLICIT)
+    person.add_name(n3, inferred=True)
+    assert person._names[2] == (n3, NameLink.INFERRED)
+
+
+def test_person_no_name(anthology_stub):
+    person = Person("yang-liu", anthology_stub, [])
+    assert len(person.names) == 0
+    with pytest.raises(ValueError):
+        person.canonical_name
+    name = Name("Yang", "Liu")
+    person.canonical_name = name
+    assert len(person.names) == 1
+    assert person.canonical_name == name
+
+
+def test_person_set_canonical_name(anthology_stub):
+    person = Person("rene-muller", anthology_stub, [Name("Rene", "Muller")])
+    assert len(person.names) == 1
+    name = Name("René", "Müller")
+    person.canonical_name = name
+    assert len(person.names) == 2
+    assert person.canonical_name == name
+
+
+def test_person_orcid(anthology_stub):
+    person = Person(
+        "marcel-bollmann",
+        anthology_stub,
+        [Name("Marcel", "Bollmann")],
+        orcid="0000-0002-1297-6794",
+    )
+    assert person.orcid == "0000-0002-1297-6794"
+    person.orcid = "0000-0003-2598-8150"
+    assert person.orcid == "0000-0003-2598-8150"
+    with pytest.raises(ValueError):
+        person.orcid = "https://orcid.org/0000-0003-2598-8150"
+    with pytest.raises(ValueError):
+        person.orcid = "0000-0003-2598-815X"
+
+
+def test_person_papers_unverified(anthology):
+    person = anthology.get_person("unverified/nicoletta-calzolari")
     assert person.canonical_name == Name("Nicoletta", "Calzolari")
     assert len(person.item_ids) == 3
     assert len(list(person.papers())) == 2
     assert len(list(person.volumes())) == 1
 
 
+def test_person_papers_verified(anthology):
+    person = anthology.get_person("yang-liu-ict")
+    assert person.canonical_name == Name("Yang", "Liu")
+    assert len(person.item_ids) == 2
+    assert len(list(person.papers())) == 2
+
+
+def test_person_update_id(anthology):
+    person = anthology.get_person("marcel-bollmann")
+    person.update_id("marcel-bollmann-rub")
+    assert anthology.get_person("marcel-bollmann") is None
+    assert anthology.get_person("marcel-bollmann-rub") is person
+    person.update_id("marcel-bollmann")
+    assert anthology.get_person("marcel-bollmann") is person
+    assert anthology.get_person("marcel-bollmann-rub") is None
+
+
+def test_person_update_id_should_update_connected_papers(anthology):
+    person = anthology.get_person("yang-liu-ict")
+    person.update_id("yang-liu-new")
+    namespec = anthology.get(person.item_ids[0]).authors[-1]
+    assert namespec.name == Name("Yang", "Liu")
+    assert namespec.id == "yang-liu-new"
+
+
+def test_person_cannot_update_id_when_inferred(anthology):
+    person = anthology.get_person("unverified/nicoletta-calzolari")
+    assert not person.is_explicit
+    with pytest.raises(AnthologyException):
+        person.update_id("nicoletta-calzolari")
+
+
+def test_person_cannot_update_id_with_invalid_id(anthology):
+    person = anthology.get_person("marcel-bollmann")
+    with pytest.raises(AnthologyInvalidIDError):
+        person.update_id("Marcel-Bollmann")
+    with pytest.raises(AnthologyInvalidIDError):
+        person.update_id("42-marcel-bollmann")
+    with pytest.raises(AnthologyInvalidIDError):
+        person.update_id("marcel bollmann")
+
+
 def test_person_with_name_variants(anthology):
     # Name variants should be recorded as names of that person
     person = anthology.get_person("yang-liu-ict")
@@ -68,10 +187,23 @@ def test_person_with_name_variants(anthology):
 def test_person_is_explicit(anthology):
     person = anthology.get_person("yang-liu-ict")
     assert person.is_explicit
-    person = anthology.get_person("nicoletta-calzolari")
+    person = anthology.get_person("unverified/nicoletta-calzolari")
     assert not person.is_explicit
-    person = anthology.get_person("srinivas-bangalore")
+
+
+def test_person_make_explicit(anthology):
+    person = anthology.get_person("unverified/nicoletta-calzolari")
+    assert not person.is_explicit
+    person.make_explicit("nicoletta-calzolari")
+    assert person.is_explicit
+    assert person.id == "nicoletta-calzolari"
+
+
+def test_person_make_explicit_should_raise_when_explicit(anthology):
+    person = anthology.get_person("marcel-bollmann")
     assert person.is_explicit
+    with pytest.raises(AnthologyException):
+        person.make_explicit("marcel-bollmann")
 
 
 def test_person_equality(anthology_stub):
@@ -82,4 +214,5 @@ def test_person_equality(anthology_stub):
     assert person1 == person2
     assert person1 != person3
     assert person2 != person3
+    assert person2 != "yang-liu"  # comparison with non-Person object is always False
     assert hash(person1) == hash(person2)
diff --git a/python/tests/people/personindex_test.py b/python/tests/people/personindex_test.py
index 24963c2e53..87b9731f16 100644
--- a/python/tests/people/personindex_test.py
+++ b/python/tests/people/personindex_test.py
@@ -1,4 +1,4 @@
-# Copyright 2023-2024 Marcel Bollmann <marcel@bollmann.me>
+# Copyright 2023-2025 Marcel Bollmann <marcel@bollmann.me>
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,195 +13,124 @@
 # limitations under the License.
 
 import pytest
-from acl_anthology.exceptions import AmbiguousNameError, NameIDUndefinedError
-from acl_anthology.people import Name, NameSpecification, Person, PersonIndex
+from acl_anthology.exceptions import (
+    AnthologyInvalidIDError,
+    NameSpecResolutionError,
+    PersonDefinitionError,
+)
+from acl_anthology.people import Name, NameLink, NameSpecification, Person, PersonIndex
 
 
 @pytest.fixture
-def index(anthology_stub):
+def index_stub(anthology_stub):
     return PersonIndex(anthology_stub)
 
 
 @pytest.fixture
-def index_with_full_anthology(anthology):
-    return PersonIndex(anthology)
+def index(anthology):
+    return anthology.people
 
 
-def test_load_variant_list(index):
-    index._load_variant_list()
+def test_load_people_index(index_stub):
+    index = index_stub
+    index.reset()
+    index._load_people_index()
     index.is_data_loaded = True
     for pid in (
-        "pranav-a",
-        "pranav-anand",
-        "yang-liu-edinburgh",
+        "emily-prudhommeaux",
+        "steven-krauwer",
         "yang-liu-icsi",
         "yang-liu-ict",
         "yang-liu-microsoft",
-        "steven-krauwer",
     ):
         assert pid in index
 
 
-def test_load_variant_list_correct_variants(index):
-    index._load_variant_list()
+def test_load_people_index_registers_names(index_stub):
+    index = index_stub
+    index.reset()
+    index._load_people_index()
     index.is_data_loaded = True
-    n1 = Name("Susan", "Armstrong")
-    n2 = Name("Susan", "Warwick")
-    assert n1 in index.name_to_ids
-    assert n2 in index.name_to_ids
-    pid = index.name_to_ids[n1]
-    assert pid == index.name_to_ids[n2]
+    n1 = Name("Steven", "Krauwer")
+    n2 = Name("S.", "Krauwer")
+    assert n1 in index.by_name
+    assert n2 in index.by_name
+    pid = index.by_name[n1]
+    assert pid == index.by_name[n2]
     assert pid[0] in index
 
 
-def test_load_variant_list_correct_ids(index):
-    # If no explicit ID is defined, the ID should be based on the canonical
-    # name in the variants list
-    index._load_variant_list()
-    index.is_data_loaded = True
-    n1 = Name("Susan", "Warwick-Armstrong")
-    pid = index.name_to_ids[n1]
-    assert pid == ["susan-armstrong"]
-
-
-def test_load_variant_find_people_single_name(index):
-    # People with a single name should correctly be found
-    index._load_variant_list()
-    index.is_data_loaded = True
-    n1 = Name(None, "Srinivas")
-    pid = index.name_to_ids[n1]
-    assert pid == ["srinivas-bangalore"]
-
-
-def test_add_person(index):
+def test_add_person(index_stub):
+    index = index_stub
+    index.reset()
     p1 = Person("yang-liu", index.parent, [Name("Yang", "Liu")])
     index.add_person(p1)
     index.is_data_loaded = True  # to prevent it attempting to build itself
     assert "yang-liu" in index
-    assert Name("Yang", "Liu") in index.name_to_ids
-    assert index.name_to_ids[Name("Yang", "Liu")] == ["yang-liu"]
+    assert Name("Yang", "Liu") in index.by_name
+    assert index.by_name[Name("Yang", "Liu")] == ["yang-liu"]
     assert index.get_by_name(Name("Yang", "Liu"))[0] is p1
     assert index.get_by_namespec(NameSpecification(Name("Yang", "Liu"))) is p1
     assert index.get("yang-liu") is p1
-    with pytest.raises(KeyError):
+    with pytest.raises(ValueError):
         index.add_person(Person("yang-liu", index.parent))
 
 
-def test_get_or_create_person_with_id(index):
+def test_similar_names_defined_in_people_index(index_stub):
+    index = index_stub
+    index.reset()
+    index._load_people_index()
     index.is_data_loaded = True
-    ns1 = NameSpecification(Name("Yang", "Liu"), id="yang-liu-icsi")
-    ns2 = NameSpecification(Name("Y.", "Liu"), id="yang-liu-icsi")
-    with pytest.raises(NameIDUndefinedError):
-        index.get_or_create_person(ns1)
-    index._load_variant_list()
-    person1 = index.get_or_create_person(ns1)
-    assert person1.id == "yang-liu-icsi"
-    person2 = index.get_or_create_person(ns2)
-    assert person1 is person2
-    assert person1 is index["yang-liu-icsi"]
-    assert person1.has_name(Name("Yang", "Liu"))
-    assert person1.has_name(Name("Y.", "Liu"))
-
-
-def test_get_or_create_person_new_person(index):
-    index.is_data_loaded = True
-    ns1 = NameSpecification(Name("Yang", "Liu"))
-    ns2 = NameSpecification(Name("Yang", "Liu"), affiliation="University of Edinburgh")
-    person1 = index.get_or_create_person(ns1)
-    assert person1.has_name(Name("Yang", "Liu"))
-    person2 = index.get_or_create_person(ns2)
-    assert person1 is person2
-    assert person1 is index[person1.id]
-
-
-def test_get_or_create_person_new_person_disallowed(index):
-    ns1 = NameSpecification(Name("Yang", "Liu"))
-    with pytest.raises(NameIDUndefinedError):
-        index.get_or_create_person(ns1, create=False)
-
-
-def test_get_or_create_person_with_ambiguous_name(index):
-    index._load_variant_list()
-    ns1 = NameSpecification(Name("Yang", "Liu"))
-    ns2 = NameSpecification(Name("Yang", "Liu"), id="yang-liu-icsi")
-    with pytest.raises(AmbiguousNameError):
-        index.get_or_create_person(ns1)
-    person = index.get_or_create_person(ns2)
-    assert person.id == "yang-liu-icsi"
-
-
-def test_get_or_create_person_with_name_merging(index):
-    ns1 = NameSpecification(Name("John", "Neumann"))
-    ns2 = NameSpecification(Name("Jöhn", "Néumänn"))
-    person1 = index.get_or_create_person(ns1)
-    person2 = index.get_or_create_person(ns2)
-    assert person1 is person2
-    assert person2.has_name(ns1.name)
-    assert person2.has_name(ns2.name)
-    assert person2.canonical_name == ns2.name
-
-
-def test_get_or_create_person_with_explicit_canonical_name(index):
-    index._load_variant_list()
-    # This name is defined as canonical in the variants list
-    ns1 = NameSpecification(Name("Emily", "Prud’hommeaux"))
-    # This one is not, but scores higher according to our heuristics
-    ns2 = NameSpecification(Name("Emily", "Prud’Hommeaux"))
-    assert (
-        ns2.name.score() > ns1.name.score()
-    ), "This test assumes that `ns2` will score higher than `ns1`."
-    person1 = index.get_or_create_person(ns1)
-    person2 = index.get_or_create_person(ns2)
-    assert person1 is person2
-    assert person2.has_name(ns1.name)
-    assert person2.has_name(ns2.name)
-    # Canonical name should still be the one defined in variants list
-    assert person2.canonical_name == ns1.name
-
-
-def test_similar_names_defined_in_variant_list(index):
-    index._load_variant_list()
     similar = index.similar.subset("pranav-a")
     assert similar == {"pranav-a", "pranav-anand"}
 
 
-def test_similar_names_through_same_canonical_name(index):
-    index._load_variant_list()
+def test_similar_names_through_same_canonical_name(index_stub):
+    index = index_stub
+    index.reset()
+    index._load_people_index()
+    index.is_data_loaded = True
     similar = index.similar.subset("yang-liu-ict")
     assert similar == {
-        "yang-liu-edinburgh",
         "yang-liu-icsi",
         "yang-liu-ict",
         "yang-liu-microsoft",
     }
 
 
-def test_build_personindex(index_with_full_anthology):
-    index = index_with_full_anthology
+def test_build_personindex(index):
     assert not index.is_data_loaded
     index.build(show_progress=False)
     assert index.is_data_loaded
     assert "yang-liu-microsoft" in index
-    assert Name("Nicoletta", "Calzolari") in index.name_to_ids
+    assert Name("Nicoletta", "Calzolari") in index.by_name
+    assert "0000-0003-2598-8150" in index.by_orcid
 
 
-def test_build_personindex_automatically(index_with_full_anthology):
-    index = index_with_full_anthology
+def test_build_personindex_automatically(index):
     assert not index.is_data_loaded
     persons = index.get_by_name(Name("Nicoletta", "Calzolari"))
     assert index.is_data_loaded
     assert len(persons) == 1
 
 
-def test_canonical_name_is_never_a_variant(index_with_full_anthology):
-    index = index_with_full_anthology
+@pytest.mark.parametrize(
+    "name", ("by_orcid", "by_name", "similar", "slugs_to_verified_ids")
+)
+def test_build_personindex_automatically_on_property_access(index, name):
+    assert not index.is_data_loaded
+    _ = getattr(index, name)
+    assert index.is_data_loaded
+
+
+def test_canonical_name_never_has_script(index):
     for person in index.values():
         assert person.canonical_name.script is None
 
 
-def test_get_person_coauthors(index_with_full_anthology):
-    index = index_with_full_anthology
-    person = index.get_by_name(Name("Kathleen", "Dahlgren"))[0]
+def test_get_person_coauthors(index):
+    index.load()
+    person = index.by_name[Name("Kathleen", "Dahlgren")][0]
     coauthors = index.find_coauthors(person)
     assert len(coauthors) == 1
     assert coauthors[0].canonical_name == Name("Joyce", "McDowell")
@@ -214,36 +143,524 @@ def test_get_person_coauthors(index_with_full_anthology):
     assert len(coauthors) == 2
 
 
-def test_get_person_coauthors_counter(index_with_full_anthology):
-    index = index_with_full_anthology
-    person = index.get_by_name(Name("Kathleen", "Dahlgren"))[0]
-    coauthors = index.find_coauthors_counter(person)
+def test_get_person_coauthors_counter(index):
+    coauthors = index.find_coauthors_counter("unverified/kathleen-dahlgren")
     assert len(coauthors) == 1
-    assert coauthors["joyce-mcdowell"] == 1
+    assert coauthors["unverified/joyce-mcdowell"] == 1
 
     person = index.get_by_name(Name("Preslav", "Nakov"))[0]
     coauthors = index.find_coauthors_counter(person)
     assert len(coauthors) == 2
-    assert coauthors["joyce-mcdowell"] == 0
-    assert coauthors["aline-villavicencio"] == 2
+    assert coauthors["unverified/joyce-mcdowell"] == 0
+    assert coauthors["unverified/aline-villavicencio"] == 2
 
 
-def test_get_by_namespec(index_with_full_anthology):
-    index = index_with_full_anthology
+def test_get_by_namespec(index):
     ns1 = NameSpecification(Name("Yang", "Liu"))
     ns2 = NameSpecification(Name("Yang", "Liu"), id="yang-liu-microsoft")
-    # In contrast to test_get_or_create_person_new_person_disallowed, this
-    # should behave differently because it makes sure the index is built first
-    with pytest.raises(AmbiguousNameError):
+    with pytest.raises(NameSpecResolutionError):
         index.get_by_namespec(ns1)
     person = index.get_by_namespec(ns2)
     assert person.id == "yang-liu-microsoft"
     assert person.canonical_name == Name("Yang", "Liu")
 
 
-def test_get_by_name_variants(index_with_full_anthology):
+def test_get_by_name_variants(index):
     # It should be possible to find a person by a name variant
-    index = index_with_full_anthology
     persons = index.get_by_name(Name("洋", "刘"))
     assert len(persons) == 1
     assert persons[0].id == "yang-liu-ict"
+
+
+def test_get_by_orcid(index):
+    person = index.get_by_orcid("0000-0003-2598-8150")
+    assert person is not None
+    assert person.id == "marcel-bollmann"
+    assert index.get_by_orcid("0000-0000-0000-0000") is None
+
+
+def test_change_orcid(index):
+    person = index.get_by_orcid("0000-0003-2598-8150")
+    assert person is not None
+    assert person.id == "marcel-bollmann"
+    person.orcid = "0000-0002-2909-0906"
+    assert index.get_by_orcid("0000-0003-2598-8150") is None
+    assert index.get_by_orcid("0000-0002-2909-0906") is person
+
+
+def test_create_person(index):
+    person = index.create_person(
+        id="matt-post",
+        names=[Name("Matt", "Post")],
+        orcid="0000-0002-1297-6794",
+    )
+    assert person.id in index
+    assert person.id == "matt-post"
+    assert person.orcid == "0000-0002-1297-6794"
+    assert person.is_explicit
+
+
+def test_create_person_should_fail_on_duplicate_orcid(index):
+    with pytest.raises(ValueError):
+        index.create_person(
+            id="marcel-bollmann-twin",
+            names=[Name("Marcel", "Bollmann")],
+            orcid="0000-0003-2598-8150",  # already assigned to "marcel-bollmann"
+        )
+
+
+def test_create_person_should_fail_on_duplicate_id(index):
+    with pytest.raises(AnthologyInvalidIDError):
+        index.create_person(
+            id="marcel-bollmann",  # already exists
+            names=[Name("Marcel", "Bollmann")],
+        )
+
+
+def test_create_person_should_fail_on_unverified_id(index):
+    with pytest.raises(AnthologyInvalidIDError):
+        index.create_person(
+            id="unverified/john-doe",  # cannot create this manually
+            names=[Name("John", "Doe")],
+        )
+
+
+def test_create_person_should_fail_on_empty_names(index):
+    with pytest.raises(ValueError):
+        index.create_person(
+            id="john-doe-new",
+            names=[],  # cannot be empty
+        )
+
+
+##############################################################################
+### Tests for changing Person attributes that should update the index
+##############################################################################
+
+
+def test_person_id_change_should_update_index(anthology):
+    index = anthology.people
+    person = index["marcel-bollmann"]
+    person.id = "marcel-bollmann-rub"
+    assert "marcel-bollmann" not in index
+    assert "marcel-bollmann-rub" in index
+    assert index.by_orcid["0000-0003-2598-8150"] == "marcel-bollmann-rub"
+    assert index.by_name[Name("Marcel", "Bollmann")] == ["marcel-bollmann-rub"]
+
+
+def test_person_orcid_change_should_update_index(anthology):
+    index = anthology.people
+    person = index["yang-liu-ict"]
+    orcid = "0000-0003-4154-7507"
+    assert orcid not in index.by_orcid
+    person.orcid = orcid
+    assert orcid in index.by_orcid
+    assert index.by_orcid[orcid] == "yang-liu-ict"
+
+
+def test_person_add_name_should_update_index(anthology):
+    index = anthology.people
+    person = index["marcel-bollmann"]
+    name = Name("Marc Marcel", "Bollmann")
+    assert not index.by_name[name]
+    person.add_name(name)
+    assert index.by_name[name] == ["marcel-bollmann"]
+    assert index.slugs_to_verified_ids[name.slugify()] == set(["marcel-bollmann"])
+
+
+def test_person_remove_name_should_update_index(anthology):
+    index = anthology.people
+    person = index["steven-krauwer"]
+    name = Name("S.", "Krauwer")
+    assert index.by_name[name] == ["steven-krauwer"]
+    person.remove_name(name)
+    assert not index.by_name[name]
+    assert not index.slugs_to_verified_ids[name.slugify()]
+
+
+def test_person_setting_names_should_update_index(anthology):
+    index = anthology.people
+    person = index["steven-krauwer"]
+    names = [Name("Steven", "Krauwer"), Name("Steven J.", "Krauwer")]
+    person.names = names
+    # previously existing name
+    assert index.by_name[names[0]] == ["steven-krauwer"]
+    # added name
+    assert index.by_name[names[1]] == ["steven-krauwer"]
+    # removed name
+    assert not index.by_name[Name("S.", "Krauwer")]
+
+
+##############################################################################
+### Tests for name resolution logic
+##############################################################################
+
+# Format: (Name, NameSpecification attributes, expected ID or Exception)
+test_cases_resolve_namespec = (
+    #### "No match" cases
+    (  # Name does not exist in people.yaml
+        {"first": "Matthew", "last": "Stevens"},
+        {},
+        "unverified/matthew-stevens",
+    ),
+    (  # Person with explicit ID does not exist in people.yaml
+        {"first": "Matthew", "last": "Stevens"},
+        {"id": "matthew-stevens"},
+        PersonDefinitionError,
+    ),
+    #### "One match" cases
+    (  # Name exists in people.yaml, unambiguous
+        {"first": "Steven", "last": "Krauwer"},
+        {},
+        "steven-krauwer",
+    ),
+    (  # Name exists in people.yaml, unambiguous, but not as canonical name
+        {"first": "Emily T.", "last": "Prud’hommeaux"},
+        {},
+        "emily-prudhommeaux",
+    ),
+    (  # Person unambiguous, but has `disable_name_matching: true`
+        {"first": "Pranav", "last": "Anand"},
+        {},
+        "unverified/pranav-anand",
+    ),
+    (  # `disable_name_matching: true` doesn't affect NameSpecs with explicit ID
+        {"first": "Pranav", "last": "Anand"},
+        {"id": "pranav-anand"},
+        "pranav-anand",
+    ),
+    (  # Name exists in people.yaml with an ORCID, unambiguous
+        {"first": "Marcel", "last": "Bollmann"},
+        {},
+        "marcel-bollmann",
+    ),
+    (  # ... with explicit ID
+        {"first": "Marcel", "last": "Bollmann"},
+        {"id": "marcel-bollmann"},
+        "marcel-bollmann",
+    ),
+    (  # ... with explicit ID & ORCID
+        {"first": "Marcel", "last": "Bollmann"},
+        {"id": "marcel-bollmann", "orcid": "0000-0003-2598-8150"},
+        "marcel-bollmann",
+    ),
+    (  # ... with explicit ID & ORCID, but ORCID doesn't match
+        {"first": "Marcel", "last": "Bollmann"},
+        {"id": "marcel-bollmann", "orcid": "0000-0002-7491-7669"},
+        PersonDefinitionError,
+    ),
+    (  # ... with explicit ID & ORCID, but name isn't listed in people.yaml
+        {"first": "Marc Marcel", "last": "Bollmann"},
+        {"id": "marcel-bollmann", "orcid": "0000-0003-2598-8150"},
+        PersonDefinitionError,
+    ),
+    (  # Name matches an existing, unambiguous name via slugification
+        {"first": "Stèven", "last": "Kräuwer"},
+        {},
+        "steven-krauwer",
+    ),
+    (  # ... even when it's not the canonical name
+        {"first": "Emily T.", "last": "Prüd’hommeaux"},
+        {},
+        "emily-prudhommeaux",
+    ),
+    (  # ... even with different first/last split
+        {"first": "Emily", "last": "T. Prud’hommeaux"},
+        {},
+        "emily-prudhommeaux",
+    ),
+    #### "2+ matches" cases
+    (  # Name exists in people.yaml for several people
+        {"first": "Yang", "last": "Liu"},
+        {},
+        "unverified/yang-liu",
+    ),
+    (  # ... will resolve to known person with explicit ID
+        {"first": "Yang", "last": "Liu"},
+        {"id": "yang-liu-icsi"},
+        "yang-liu-icsi",
+    ),
+    (  # ... affiliation is NOT used in any way for name resolution
+        {"first": "Yang", "last": "Liu"},
+        {"affiliation": "Microsoft Cognitive Services Research"},
+        "unverified/yang-liu",
+    ),
+    #### Malformed name specifications
+    (  # Person with explicit ORCID, but no explicit ID (always disallowed)
+        {"first": "Matthew", "last": "Stevens"},
+        {"orcid": "0000-0002-7491-7669"},
+        NameSpecResolutionError,
+    ),
+    (  # ... even if the person exists (ID is still required)
+        {"first": "Marcel", "last": "Bollmann"},
+        {"orcid": "0000-0003-2598-8150"},
+        NameSpecResolutionError,
+    ),
+)
+
+
+@pytest.mark.parametrize(
+    "name_dict, namespec_params, expected_result",
+    test_cases_resolve_namespec,
+)
+def test_resolve_namespec(name_dict, namespec_params, expected_result, index):
+    index.reset()
+    index._load_people_index()
+    name = Name.from_dict(name_dict)
+    namespec = NameSpecification(name, **namespec_params)
+
+    if isinstance(expected_result, str):
+        person = index.resolve_namespec(namespec, allow_creation=True)
+        assert person.has_name(name)
+        assert person.id == expected_result
+    elif isinstance(expected_result, type):
+        with pytest.raises(expected_result):
+            index.resolve_namespec(namespec, allow_creation=True)
+    else:
+        raise ValueError(
+            f"Test cannot take expected result of type {type(expected_result)}"
+        )
+
+
+def test_resolve_namespec_disallow_creation(index):
+    index.reset()
+    index._load_people_index()
+    # If we would map to an unverified ID but allow_creation is False, should raise
+    with pytest.raises(NameSpecResolutionError):
+        index.resolve_namespec(
+            NameSpecification(Name("Matthew", "Stevens")), allow_creation=False
+        )
+
+
+def test_resolve_namespec_name_scoring_for_unverified_ids(index_stub):
+    # Person does not exist, will create an unverified ID
+    person1 = index_stub.resolve_namespec(
+        NameSpecification(Name("Rene", "Muller")), allow_creation=True
+    )
+    assert person1.id == "unverified/rene-muller"
+    assert person1.canonical_name == Name("Rene", "Muller")
+    # Name resolves to the same person as above
+    person2 = index_stub.resolve_namespec(
+        NameSpecification(Name("René", "Müller")), allow_creation=True
+    )
+    assert person2.id == "unverified/rene-muller"
+    assert person2 is person1
+    # ... and also updates their canonical name, as it scores higher!
+    assert person2.canonical_name == Name("René", "Müller")
+
+
+test_cases_namelink = (
+    # Names that are explicitly defined in people.yaml should always have
+    # NameLink.EXPLICIT after resolve_namespec()
+    (
+        {"first": "Steven", "last": "Krauwer"},
+        NameLink.EXPLICIT,
+    ),
+    (
+        {"first": "S.", "last": "Krauwer"},
+        NameLink.EXPLICIT,
+    ),
+    (
+        {"first": "Marcel", "last": "Bollmann"},
+        NameLink.EXPLICIT,
+    ),
+    # Names that are matched via slugification should always have
+    # NameLink.INFERRED after resolve_namespec()
+    (
+        {"first": "Stèven", "last": "Kräuwer"},
+        NameLink.INFERRED,
+    ),
+    (
+        {"first": "Emily T.", "last": "Prüd’hommeaux"},
+        NameLink.INFERRED,
+    ),
+    (
+        {"first": "Emily", "last": "T. Prud’hommeaux"},
+        NameLink.INFERRED,
+    ),
+)
+
+
+@pytest.mark.parametrize("name_dict, expected_namelink", test_cases_namelink)
+def test_check_namelink_after_resolve_namespec(name_dict, expected_namelink, index):
+    index.reset()
+    index._load_people_index()
+    name = Name.from_dict(name_dict)
+    namespec = NameSpecification(name)
+    person = index.resolve_namespec(namespec, allow_creation=True)
+
+    assert (
+        name,
+        expected_namelink,
+    ) in person._names  # maybe provide a function for this?
+
+
+##############################################################################
+### Tests for ingestion logic
+##############################################################################
+
+# Format: (Name, NameSpecification attributes, expected ID)
+test_cases_ingest_namespec = (
+    (  # No ORCID in the ingestion material
+        {"first": "Matthew", "last": "Stevens"},
+        {},
+        None,
+    ),
+    #### ORCID in the ingestion material, matches a person in our `people.yaml`
+    (
+        {"first": "Marcel", "last": "Bollmann"},
+        {"orcid": "0000-0003-2598-8150"},
+        "marcel-bollmann",
+    ),
+    (  # ... even if the name wasn't recorded yet in `people.yaml`
+        {"first": "Marc Marcel", "last": "Bollmann"},
+        {"orcid": "0000-0003-2598-8150"},
+        "marcel-bollmann",
+    ),
+    #### ORCID in the ingestion material, no match in our `people.yaml`
+    (  # Person should be created
+        {"first": "Matt", "last": "Post"},
+        {"orcid": "0000-0002-1297-6794"},
+        "matt-post",
+    ),
+    (  # It shouldn't matter if other persons with the same name exist, only ORCID matters
+        {"first": "Yang", "last": "Liu"},
+        {"orcid": "0000-0003-4154-7507"},
+        "yang-liu",  # this ID is actually not defined in people.yaml!
+    ),
+    (  # When generated ID is already taken, append the last four digits of ORCID
+        {"first": "Marcel", "last": "Bollmann"},
+        {"orcid": "0000-0003-3750-1098"},
+        "marcel-bollmann-1098",
+    ),
+    #### Edge cases
+    (  # If function is already called with an ID for some reason, nothing happens
+        {"first": "Marcel", "last": "Bollmann"},
+        {"id": "marcel-bollmann"},
+        "marcel-bollmann",
+    ),
+)
+
+
+@pytest.mark.parametrize(
+    "name_dict, namespec_params, expected_result",
+    test_cases_ingest_namespec,
+)
+def test_ingest_namespec(name_dict, namespec_params, expected_result, index):
+    index.reset()
+    index._load_people_index()
+    name = Name.from_dict(name_dict)
+    namespec = NameSpecification(name, **namespec_params)
+    index.ingest_namespec(namespec)
+
+    assert namespec.id == expected_result
+    if namespec.id is not None:
+        # Should also exist in (or have been added to) index
+        assert namespec.id in index
+        # ... with the name given here
+        assert index[namespec.id].has_name(name)
+
+
+def test_ingest_namespec_returns_namespec(index):
+    ns1 = NameSpecification(Name("Matt", "Post"), orcid="0000-0002-1297-6794")
+    ns2 = index.ingest_namespec(ns1)
+    assert ns1 is ns2
+
+
+##############################################################################
+### Tests for saving people.yaml
+##############################################################################
+
+
+def test_people_yaml_roundtrip(index, tmp_path):
+    index.load()
+    yaml_in = index.path
+    yaml_out = tmp_path / "people.yaml"
+    index.save(yaml_out)
+    assert yaml_out.is_file()
+    with (
+        open(yaml_in, "r", encoding="utf-8") as f,
+        open(yaml_out, "r", encoding="utf-8") as g,
+    ):
+        expected = f.read()
+        out = g.read()
+    assert out == expected
+
+
+def test_add_fields_to_people_yaml(index, tmp_path):
+    index.load()
+    yaml_out = tmp_path / "people.add_fields.yaml"
+
+    # Modifications
+    person = index["marcel-bollmann"]
+    person.add_name(Name("Marc Marcel", "Bollmann"))
+    person.degree = "Ruhr-Universität Bochum"
+
+    # Test that modifications are saved to people.yaml
+    index.save(yaml_out)
+    assert yaml_out.is_file()
+    with open(yaml_out, "r", encoding="utf-8") as f:
+        out = f.read()
+
+    assert (
+        """
+marcel-bollmann:
+  degree: Ruhr-Universität Bochum
+  names:
+  - {first: Marcel, last: Bollmann}
+  - {first: Marc Marcel, last: Bollmann}
+  orcid: 0000-0003-2598-8150"""
+        in out
+    )
+
+
+def test_add_person_to_people_yaml_via_make_explicit(index, tmp_path):
+    index.load()
+    yaml_out = tmp_path / "people.make_explicit.yaml"
+
+    # Modifications
+    person = index["unverified/preslav-nakov"]
+    person.make_explicit("preslav-nakov")
+    person.orcid = "0000-0002-3600-1510"
+
+    # Test that modifications are saved to people.yaml
+    index.save(yaml_out)
+    assert yaml_out.is_file()
+    with open(yaml_out, "r", encoding="utf-8") as f:
+        out = f.read()
+
+    assert (
+        """
+preslav-nakov:
+  names:
+  - {first: Preslav, last: Nakov}
+  orcid: 0000-0002-3600-1510"""
+        in out
+    )
+
+
+def test_add_person_to_people_yaml_via_create_person(index, tmp_path):
+    index.load()
+    yaml_out = tmp_path / "people.create_person.yaml"
+
+    # Modifications
+    index.create_person(
+        id="preslav-nakov",
+        names=[Name("Preslav", "Nakov")],
+        orcid="0000-0002-3600-1510",
+    )
+
+    # Test that modifications are saved to people.yaml
+    index.save(yaml_out)
+    assert yaml_out.is_file()
+    with open(yaml_out, "r", encoding="utf-8") as f:
+        out = f.read()
+
+    assert (
+        """
+preslav-nakov:
+  names:
+  - {first: Preslav, last: Nakov}
+  orcid: 0000-0002-3600-1510"""
+        in out
+    )
diff --git a/python/tests/utils/ids_test.py b/python/tests/utils/ids_test.py
index 7ea051a565..4f195805d0 100644
--- a/python/tests/utils/ids_test.py
+++ b/python/tests/utils/ids_test.py
@@ -100,3 +100,14 @@ def test_is_valid_item_id():
     assert not ids.is_valid_item_id("main.42")
     assert not ids.is_valid_item_id("acl ")
     assert not ids.is_valid_item_id("")
+
+
+def test_is_valid_orcid():
+    assert ids.is_valid_orcid("0000-0002-1825-0097")
+    assert ids.is_valid_orcid("0000-0001-5109-3700")
+    assert ids.is_valid_orcid("0000-0002-1694-233X")
+    assert not ids.is_valid_orcid("0000-0002-1825-009X")
+    assert not ids.is_valid_orcid("0000-0001-5109-3701")
+    assert not ids.is_valid_orcid("0000-0002-1694-2339")
+    assert not ids.is_valid_orcid("000000021694233X")
+    assert not ids.is_valid_orcid("0002-1694-233X")